3510 files changed, 190606 insertions, 62424 deletions
diff --git a/Documentation/ABI/stable/sysfs-driver-dma-idxd b/Documentation/ABI/stable/sysfs-driver-dma-idxd
index d431e2d00472..df4afbccf037 100644
--- a/Documentation/ABI/stable/sysfs-driver-dma-idxd
+++ b/Documentation/ABI/stable/sysfs-driver-dma-idxd
@@ -128,6 +128,8 @@ Date:		Aug 28, 2020
 KernelVersion:	5.10.0
 Contact:	dmaengine@vger.kernel.org
 Description:	The last executed device administrative command's status/error.
+		Also last configuration error overloaded.
+		Writing to it will clear the status.
 
 What:		/sys/bus/dsa/devices/wq<m>.<n>/block_on_fault
 Date:		Oct 27, 2020
@@ -211,6 +213,13 @@ Contact:	dmaengine@vger.kernel.org
 Description:	Indicate whether ATS disable is turned on for the workqueue.
 		0 indicates ATS is on, and 1 indicates ATS is off for the workqueue.
 
+What:		/sys/bus/dsa/devices/wq<m>.<n>/occupancy
+Date		May 25, 2021
+KernelVersion:	5.14.0
+Contact:	dmaengine@vger.kernel.org
+Description:	Show the current number of entries in this WQ if WQ Occupancy
+		Support bit WQ capabilities is 1.
+
 What:           /sys/bus/dsa/devices/engine<m>.<n>/group_id
 Date:           Oct 25, 2019
 KernelVersion:  5.6.0
diff --git a/Documentation/ABI/testing/debugfs-driver-habanalabs b/Documentation/ABI/testing/debugfs-driver-habanalabs
index a5c28f606865..284e2dfa61cd 100644
--- a/Documentation/ABI/testing/debugfs-driver-habanalabs
+++ b/Documentation/ABI/testing/debugfs-driver-habanalabs
@@ -215,6 +215,17 @@ Description:    Sets the skip reset on timeout option for the device. Value of
                 "0" means device will be reset in case some CS has timed out,
                 otherwise it will not be reset.
 
+What:           /sys/kernel/debug/habanalabs/hl<n>/state_dump
+Date:           Oct 2021
+KernelVersion:  5.15
+Contact:        ynudelman@habana.ai
+Description:    Gets the state dump occurring on a CS timeout or failure.
+                State dump is used for debug and is created each time in case of
+                a problem in a CS execution, before reset.
+                Reading from the node returns the newest state dump available.
+                Writing an integer X discards X state dumps, so that the
+                next read would return X+1-st newest state dump.
+
 What:           /sys/kernel/debug/habanalabs/hl<n>/stop_on_err
 Date:           Mar 2020
 KernelVersion:  5.6
@@ -230,6 +241,14 @@ Description:    Displays a list with information about the currently user
                 pointers (user virtual addresses) that are pinned and mapped
                 to DMA addresses
 
+What:           /sys/kernel/debug/habanalabs/hl<n>/userptr_lookup
+Date:           Aug 2021
+KernelVersion:  5.15
+Contact:        ogabbay@kernel.org
+Description:    Allows to search for specific user pointers (user virtual
+                addresses) that are pinned and mapped to DMA addresses, and see
+                their resolution to the specific dma address.
+
 What:           /sys/kernel/debug/habanalabs/hl<n>/vm
 Date:           Jan 2019
 KernelVersion:  5.1
diff --git a/Documentation/ABI/testing/dell-smbios-wmi b/Documentation/ABI/testing/dell-smbios-wmi
index 5f3a0dc67050..f58229084469 100644
--- a/Documentation/ABI/testing/dell-smbios-wmi
+++ b/Documentation/ABI/testing/dell-smbios-wmi
@@ -1,7 +1,7 @@
 What:		/dev/wmi/dell-smbios
 Date:		November 2017
 KernelVersion:	4.15
-Contact:	"Mario Limonciello" <mario.limonciello@dell.com>
+Contact:	Dell.Client.Kernel@dell.com
 Description:
 		Perform SMBIOS calls on supported Dell machines.
 		through the Dell ACPI-WMI interface.
diff --git a/Documentation/ABI/testing/ima_policy b/Documentation/ABI/testing/ima_policy
index 070779e8d836..5c2798534950 100644
--- a/Documentation/ABI/testing/ima_policy
+++ b/Documentation/ABI/testing/ima_policy
@@ -27,12 +27,13 @@ Description:
 			lsm:	[[subj_user=] [subj_role=] [subj_type=]
 				 [obj_user=] [obj_role=] [obj_type=]]
 			option:	[[appraise_type=]] [template=] [permit_directio]
-				[appraise_flag=] [keyrings=]
+				[appraise_flag=] [appraise_algos=] [keyrings=]
 		  base:
 			func:= [BPRM_CHECK][MMAP_CHECK][CREDS_CHECK][FILE_CHECK][MODULE_CHECK]
-			        [FIRMWARE_CHECK]
+				[FIRMWARE_CHECK]
 				[KEXEC_KERNEL_CHECK] [KEXEC_INITRAMFS_CHECK]
 				[KEXEC_CMDLINE] [KEY_CHECK] [CRITICAL_DATA]
+				[SETXATTR_CHECK]
 			mask:= [[^]MAY_READ] [[^]MAY_WRITE] [[^]MAY_APPEND]
 			       [[^]MAY_EXEC]
 			fsmagic:= hex value
@@ -55,6 +56,10 @@ Description:
 			label:= [selinux]|[kernel_info]|[data_label]
 			data_label:= a unique string used for grouping and limiting critical data.
 			For example, "selinux" to measure critical data for SELinux.
+			appraise_algos:= comma-separated list of hash algorithms
+			For example, "sha256,sha512" to only accept to appraise
+			files where the security.ima xattr was hashed with one
+			of these two algorithms.
 
 		  default policy:
 			# PROC_SUPER_MAGIC
@@ -134,3 +139,9 @@ Description:
 		keys added to .builtin_trusted_keys or .ima keyring:
 
 			measure func=KEY_CHECK keyrings=.builtin_trusted_keys|.ima
+
+		Example of the special SETXATTR_CHECK appraise rule, that
+		restricts the hash algorithms allowed when writing to the
+		security.ima xattr of a file:
+
+			appraise func=SETXATTR_CHECK appraise_algos=sha256,sha384,sha512
diff --git a/Documentation/ABI/testing/sysfs-bus-pci b/Documentation/ABI/testing/sysfs-bus-pci
index 793cbb76cd25..d4ae03296861 100644
--- a/Documentation/ABI/testing/sysfs-bus-pci
+++ b/Documentation/ABI/testing/sysfs-bus-pci
@@ -121,6 +121,23 @@ Description:
 		child buses, and re-discover devices removed earlier
 		from this part of the device tree.
 
+What:		/sys/bus/pci/devices/.../reset_method
+Date:		August 2021
+Contact:	Amey Narkhede <ameynarkhede03@gmail.com>
+Description:
+		Some devices allow an individual function to be reset
+		without affecting other functions in the same slot.
+
+		For devices that have this support, a file named
+		reset_method is present in sysfs.  Reading this file
+		gives names of the supported and enabled reset methods and
+		their ordering.  Writing a space-separated list of names of
+		reset methods sets the reset methods and ordering to be
+		used when resetting the device.  Writing an empty string
+		disables the ability to reset the device.  Writing
+		"default" enables all supported reset methods in the
+		default ordering.
+
 What:		/sys/bus/pci/devices/.../reset
 Date:		July 2009
 Contact:	Michael S. Tsirkin <mst@redhat.com>
diff --git a/Documentation/ABI/testing/sysfs-bus-thunderbolt b/Documentation/ABI/testing/sysfs-bus-thunderbolt
index 95c21d6c9a84..b7e87f6c7d47 100644
--- a/Documentation/ABI/testing/sysfs-bus-thunderbolt
+++ b/Documentation/ABI/testing/sysfs-bus-thunderbolt
@@ -232,7 +232,7 @@ Description:	When new NVM image is written to the non-active NVM
 What:		/sys/bus/thunderbolt/devices/.../nvm_authenticate_on_disconnect
 Date:		Oct 2020
 KernelVersion:	v5.9
-Contact:	Mario Limonciello <mario.limonciello@dell.com>
+Contact:	Mario Limonciello <mario.limonciello@outlook.com>
 Description:	For supported devices, automatically authenticate the new Thunderbolt
 		image when the device is disconnected from the host system.
 
diff --git a/Documentation/ABI/testing/sysfs-class-firmware-attributes b/Documentation/ABI/testing/sysfs-class-firmware-attributes
index 3348bf80a37c..90fdf935aa5e 100644
--- a/Documentation/ABI/testing/sysfs-class-firmware-attributes
+++ b/Documentation/ABI/testing/sysfs-class-firmware-attributes
@@ -2,8 +2,8 @@ What:		/sys/class/firmware-attributes/*/attributes/*/
 Date:		February 2021
 KernelVersion:	5.11
 Contact:	Divya Bharathi <Divya.Bharathi@Dell.com>,
-		Mario Limonciello <mario.limonciello@dell.com>,
 		Prasanth KSR <prasanth.ksr@dell.com>
+		Dell.Client.Kernel@dell.com
 Description:
 		A sysfs interface for systems management software to enable
 		configuration capability on supported systems.  This directory
@@ -130,8 +130,8 @@ What:		/sys/class/firmware-attributes/*/authentication/
 Date:		February 2021
 KernelVersion:	5.11
 Contact:	Divya Bharathi <Divya.Bharathi@Dell.com>,
-		Mario Limonciello <mario.limonciello@dell.com>,
 		Prasanth KSR <prasanth.ksr@dell.com>
+		Dell.Client.Kernel@dell.com
 Description:
 		Devices support various authentication mechanisms which can be exposed
 		as a separate configuration object.
@@ -220,8 +220,8 @@ What:		/sys/class/firmware-attributes/*/attributes/pending_reboot
 Date:		February 2021
 KernelVersion:	5.11
 Contact:	Divya Bharathi <Divya.Bharathi@Dell.com>,
-		Mario Limonciello <mario.limonciello@dell.com>,
 		Prasanth KSR <prasanth.ksr@dell.com>
+		Dell.Client.Kernel@dell.com
 Description:
 		A read-only attribute reads 1 if a reboot is necessary to apply
 		pending BIOS attribute changes. Also, an uevent_KOBJ_CHANGE is
@@ -249,8 +249,8 @@ What:		/sys/class/firmware-attributes/*/attributes/reset_bios
 Date:		February 2021
 KernelVersion:	5.11
 Contact:	Divya Bharathi <Divya.Bharathi@Dell.com>,
-		Mario Limonciello <mario.limonciello@dell.com>,
 		Prasanth KSR <prasanth.ksr@dell.com>
+		Dell.Client.Kernel@dell.com
 Description:
 		This attribute can be used to reset the BIOS Configuration.
 		Specifically, it tells which type of reset BIOS configuration is being
@@ -272,3 +272,14 @@ Description:
 
 		Note that any changes to this attribute requires a reboot
 		for changes to take effect.
+
+What:		/sys/class/firmware-attributes/*/attributes/debug_cmd
+Date:		July 2021
+KernelVersion:	5.14
+Contact:	Mark Pearson <markpearson@lenovo.com>
+Description:
+		This write only attribute can be used to send debug commands to the BIOS.
+		This should only be used when recommended by the BIOS vendor. Vendors may
+		use it to enable extra debug attributes or BIOS features for testing purposes.
+
+		Note that any changes to this attribute requires a reboot for changes to take effect.
diff --git a/Documentation/ABI/testing/sysfs-driver-intc_sar b/Documentation/ABI/testing/sysfs-driver-intc_sar
new file mode 100644
index 000000000000..ec334b0e5ed9
--- /dev/null
+++ b/Documentation/ABI/testing/sysfs-driver-intc_sar
@@ -0,0 +1,54 @@
+What:		/sys/bus/platform/devices/INTC1092:00/intc_reg
+Date:		August 2021
+KernelVersion:	5.15
+Contact:	Shravan S <s.shravan@intel.com>,
+		An Sudhakar <sudhakar.an@intel.com>
+Description:
+		Specific Absorption Rate (SAR) regulatory mode is typically
+		derived based on information like mcc (Mobile Country Code) and
+		mnc (Mobile Network Code) that is available for the currently
+		attached LTE network. A userspace application is required to set
+		the current SAR regulatory mode on the Dynamic SAR driver using
+		this sysfs node. Such an application can also read back using
+		this sysfs node, the currently configured regulatory mode value
+		from the Dynamic SAR driver.
+
+		Acceptable regulatory modes are:
+			==	====
+			0	FCC
+			1	CE
+			2	ISED
+			==	====
+
+		- The regulatory mode value has one of the above values.
+		- The default regulatory mode used in the driver is 0.
+
+What:		/sys/bus/platform/devices/INTC1092:00/intc_data
+Date:		August 2021
+KernelVersion:	5.15
+Contact:	Shravan S <s.shravan@intel.com>,
+		An Sudhakar <sudhakar.an@intel.com>
+Description:
+		This sysfs entry is used to retrieve Dynamic SAR information
+		emitted/maintained by a BIOS that supports Dynamic SAR.
+
+		The retrieved information is in the order given below:
+		- device_mode
+		- bandtable_index
+		- antennatable_index
+		- sartable_index
+
+		The above information is sent as integer values separated
+		by a single space. This information can then be pushed to a
+		WWAN modem that uses this to control the transmit signal
+		level using the Band/Antenna/SAR table index information.
+		These parameters are derived/decided by aggregating
+		device-mode like laptop/tablet/clamshell etc. and the
+		proximity-sensor data available to the embedded controller on
+		given host. The regulatory mode configured on Dynamic SAR
+		driver also influences these values.
+
+		The userspace applications can poll for changes to this file
+		using POLLPRI event on file-descriptor (fd) obtained by opening
+		this sysfs entry. Application can then read this information from
+		the sysfs node and consume the given information.
diff --git a/Documentation/ABI/testing/sysfs-driver-ufs b/Documentation/ABI/testing/sysfs-driver-ufs
index b4a5d55fa19f..ec3a7149ced5 100644
--- a/Documentation/ABI/testing/sysfs-driver-ufs
+++ b/Documentation/ABI/testing/sysfs-driver-ufs
@@ -1298,3 +1298,239 @@ Description:	This node is used to set or display whether UFS WriteBooster is
 		(if the platform supports UFSHCD_CAP_CLK_SCALING). For a
 		platform that doesn't support UFSHCD_CAP_CLK_SCALING, we can
 		disable/enable WriteBooster through this sysfs node.
+
+What:		/sys/bus/platform/drivers/ufshcd/*/device_descriptor/hpb_version
+Date:		June 2021
+Contact:	Daejun Park <daejun7.park@samsung.com>
+Description:	This entry shows the HPB specification version.
+		The full information about the descriptor can be found in the UFS
+		HPB (Host Performance Booster) Extension specifications.
+		Example: version 1.2.3 = 0123h
+
+		The file is read only.
+
+What:		/sys/bus/platform/drivers/ufshcd/*/device_descriptor/hpb_control
+Date:		June 2021
+Contact:	Daejun Park <daejun7.park@samsung.com>
+Description:	This entry shows an indication of the HPB control mode.
+		00h: Host control mode
+		01h: Device control mode
+
+		The file is read only.
+
+What:		/sys/bus/platform/drivers/ufshcd/*/geometry_descriptor/hpb_region_size
+Date:		June 2021
+Contact:	Daejun Park <daejun7.park@samsung.com>
+Description:	This entry shows the bHPBRegionSize which can be calculated
+		as in the following (in bytes):
+		HPB Region size = 512B * 2^bHPBRegionSize
+
+		The file is read only.
+
+What:		/sys/bus/platform/drivers/ufshcd/*/geometry_descriptor/hpb_number_lu
+Date:		June 2021
+Contact:	Daejun Park <daejun7.park@samsung.com>
+Description:	This entry shows the maximum number of HPB LU supported	by
+		the device.
+		00h: HPB is not supported by the device.
+		01h ~ 20h: Maximum number of HPB LU supported by the device
+
+		The file is read only.
+
+What:		/sys/bus/platform/drivers/ufshcd/*/geometry_descriptor/hpb_subregion_size
+Date:		June 2021
+Contact:	Daejun Park <daejun7.park@samsung.com>
+Description:	This entry shows the bHPBSubRegionSize, which can be
+		calculated as in the following (in bytes) and shall be a multiple of
+		logical block size:
+		HPB Sub-Region size = 512B x 2^bHPBSubRegionSize
+		bHPBSubRegionSize shall not exceed bHPBRegionSize.
+
+		The file is read only.
+
+What:		/sys/bus/platform/drivers/ufshcd/*/geometry_descriptor/hpb_max_active_regions
+Date:		June 2021
+Contact:	Daejun Park <daejun7.park@samsung.com>
+Description:	This entry shows the maximum number of active HPB regions that
+		is supported by the device.
+
+		The file is read only.
+
+What:		/sys/class/scsi_device/*/device/unit_descriptor/hpb_lu_max_active_regions
+Date:		June 2021
+Contact:	Daejun Park <daejun7.park@samsung.com>
+Description:	This entry shows the maximum number of HPB regions assigned to
+		the HPB logical unit.
+
+		The file is read only.
+
+What:		/sys/class/scsi_device/*/device/unit_descriptor/hpb_pinned_region_start_offset
+Date:		June 2021
+Contact:	Daejun Park <daejun7.park@samsung.com>
+Description:	This entry shows the start offset of HPB pinned region.
+
+		The file is read only.
+
+What:		/sys/class/scsi_device/*/device/unit_descriptor/hpb_number_pinned_regions
+Date:		June 2021
+Contact:	Daejun Park <daejun7.park@samsung.com>
+Description:	This entry shows the number of HPB pinned regions assigned to
+		the HPB logical unit.
+
+		The file is read only.
+
+What:		/sys/class/scsi_device/*/device/hpb_stats/hit_cnt
+Date:		June 2021
+Contact:	Daejun Park <daejun7.park@samsung.com>
+Description:	This entry shows the number of reads that changed to HPB read.
+
+		The file is read only.
+
+What:		/sys/class/scsi_device/*/device/hpb_stats/miss_cnt
+Date:		June 2021
+Contact:	Daejun Park <daejun7.park@samsung.com>
+Description:	This entry shows the number of reads that cannot be changed to
+		HPB read.
+
+		The file is read only.
+
+What:		/sys/class/scsi_device/*/device/hpb_stats/rb_noti_cnt
+Date:		June 2021
+Contact:	Daejun Park <daejun7.park@samsung.com>
+Description:	This entry shows the number of response UPIUs that has
+		recommendations for activating sub-regions and/or inactivating region.
+
+		The file is read only.
+
+What:		/sys/class/scsi_device/*/device/hpb_stats/rb_active_cnt
+Date:		June 2021
+Contact:	Daejun Park <daejun7.park@samsung.com>
+Description:	This entry shows the number of active sub-regions recommended by
+		response UPIUs.
+
+		The file is read only.
+
+What:		/sys/class/scsi_device/*/device/hpb_stats/rb_inactive_cnt
+Date:		June 2021
+Contact:	Daejun Park <daejun7.park@samsung.com>
+Description:	This entry shows the number of inactive regions recommended by
+		response UPIUs.
+
+		The file is read only.
+
+What:		/sys/class/scsi_device/*/device/hpb_stats/map_req_cnt
+Date:		June 2021
+Contact:	Daejun Park <daejun7.park@samsung.com>
+Description:	This entry shows the number of read buffer commands for
+		activating sub-regions recommended by response UPIUs.
+
+		The file is read only.
+
+What:		/sys/class/scsi_device/*/device/hpb_params/requeue_timeout_ms
+Date:		June 2021
+Contact:	Daejun Park <daejun7.park@samsung.com>
+Description:	This entry shows the requeue timeout threshold for write buffer
+		command in ms. The value can be changed by writing an integer to
+		this entry.
+
+What:		/sys/bus/platform/drivers/ufshcd/*/attributes/max_data_size_hpb_single_cmd
+Date:		June 2021
+Contact:	Daejun Park <daejun7.park@samsung.com>
+Description:	This entry shows the maximum HPB data size for using a single HPB
+		command.
+
+		===  ========
+		00h  4KB
+		01h  8KB
+		02h  12KB
+		...
+		FFh  1024KB
+		===  ========
+
+		The file is read only.
+
+What:		/sys/bus/platform/drivers/ufshcd/*/flags/hpb_enable
+Date:		June 2021
+Contact:	Daejun Park <daejun7.park@samsung.com>
+Description:	This entry shows the status of HPB.
+
+		== ============================
+		0  HPB is not enabled.
+		1  HPB is enabled
+		== ============================
+
+		The file is read only.
+
+What:		/sys/class/scsi_device/*/device/hpb_param_sysfs/activation_thld
+Date:		February 2021
+Contact:	Avri Altman <avri.altman@wdc.com>
+Description:	In host control mode, reads are the major source of activation
+		trials.  Once this threshold hs met, the region is added to the
+		"to-be-activated" list.  Since we reset the read counter upon
+		write, this include sending a rb command updating the region
+		ppn as well.
+
+What:		/sys/class/scsi_device/*/device/hpb_param_sysfs/normalization_factor
+Date:		February 2021
+Contact:	Avri Altman <avri.altman@wdc.com>
+Description:	In host control mode, we think of the regions as "buckets".
+		Those buckets are being filled with reads, and emptied on write.
+		We use entries_per_srgn - the amount of blocks in a subregion as
+		our bucket size.  This applies because HPB1.0 only handles
+		single-block reads.  Once the bucket size is crossed, we trigger
+		a normalization work - not only to avoid overflow, but mainly
+		because we want to keep those counters normalized, as we are
+		using those reads as a comparative score, to make various decisions.
+		The normalization is dividing (shift right) the read counter by
+		the normalization_factor. If during consecutive normalizations
+		an active region has exhausted its reads - inactivate it.
+
+What:		/sys/class/scsi_device/*/device/hpb_param_sysfs/eviction_thld_enter
+Date:		February 2021
+Contact:	Avri Altman <avri.altman@wdc.com>
+Description:	Region deactivation is often due to the fact that eviction took
+		place: A region becomes active at the expense of another. This is
+		happening when the max-active-regions limit has been crossed.
+		In host mode, eviction is considered an extreme measure. We
+		want to verify that the entering region has enough reads, and
+		the exiting region has much fewer reads.  eviction_thld_enter is
+		the min reads that a region must have in order to be considered
+		a candidate for evicting another region.
+
+What:		/sys/class/scsi_device/*/device/hpb_param_sysfs/eviction_thld_exit
+Date:		February 2021
+Contact:	Avri Altman <avri.altman@wdc.com>
+Description:	Same as above for the exiting region. A region is considered to
+		be a candidate for eviction only if it has fewer reads than
+		eviction_thld_exit.
+
+What:		/sys/class/scsi_device/*/device/hpb_param_sysfs/read_timeout_ms
+Date:		February 2021
+Contact:	Avri Altman <avri.altman@wdc.com>
+Description:	In order not to hang on to "cold" regions, we inactivate
+		a region that has no READ access for a predefined amount of
+		time - read_timeout_ms. If read_timeout_ms has expired, and the
+		region is dirty, it is less likely that we can make any use of
+		HPB reading it so we inactivate it.  Still, deactivation has
+		its overhead, and we may still benefit from HPB reading this
+		region if it is clean - see read_timeout_expiries.
+
+What:		/sys/class/scsi_device/*/device/hpb_param_sysfs/read_timeout_expiries
+Date:		February 2021
+Contact:	Avri Altman <avri.altman@wdc.com>
+Description:	If the region read timeout has expired, but the region is clean,
+		just re-wind its timer for another spin.  Do that as long as it
+		is clean and did not exhaust its read_timeout_expiries threshold.
+
+What:		/sys/class/scsi_device/*/device/hpb_param_sysfs/timeout_polling_interval_ms
+Date:		February 2021
+Contact:	Avri Altman <avri.altman@wdc.com>
+Description:	The frequency with which the delayed worker that checks the
+		read_timeouts is awakened.
+
+What:		/sys/class/scsi_device/*/device/hpb_param_sysfs/inflight_map_req
+Date:		February 2021
+Contact:	Avri Altman <avri.altman@wdc.com>
+Description:	In host control mode the host is the originator of map requests.
+		To avoid flooding the device with map requests, use a simple throttling
+		mechanism that limits the number of inflight map requests.
diff --git a/Documentation/ABI/testing/sysfs-fs-f2fs b/Documentation/ABI/testing/sysfs-fs-f2fs
index ef4b9218ae1e..f627e705e663 100644
--- a/Documentation/ABI/testing/sysfs-fs-f2fs
+++ b/Documentation/ABI/testing/sysfs-fs-f2fs
@@ -41,8 +41,7 @@ Description:	This parameter controls the number of prefree segments to be
 What:		/sys/fs/f2fs/<disk>/main_blkaddr
 Date:		November 2019
 Contact:	"Ramon Pantin" <pantin@google.com>
-Description:
-		 Shows first block address of MAIN area.
+Description:	Shows first block address of MAIN area.
 
 What:		/sys/fs/f2fs/<disk>/ipu_policy
 Date:		November 2013
@@ -493,3 +492,23 @@ Contact:	"Chao Yu" <yuchao0@huawei.com>
 Description:	When ATGC is on, it controls age threshold to bypass GCing young
 		candidates whose age is not beyond the threshold, by default it was
 		initialized as 604800 seconds (equals to 7 days).
+
+What:		/sys/fs/f2fs/<disk>/gc_reclaimed_segments
+Date:		July 2021
+Contact:	"Daeho Jeong" <daehojeong@google.com>
+Description:	Show how many segments have been reclaimed by GC during a specific
+		GC mode (0: GC normal, 1: GC idle CB, 2: GC idle greedy,
+		3: GC idle AT, 4: GC urgent high, 5: GC urgent low)
+		You can re-initialize this value to "0".
+
+What:		/sys/fs/f2fs/<disk>/gc_segment_mode
+Date:		July 2021
+Contact:	"Daeho Jeong" <daehojeong@google.com>
+Description:	You can control for which gc mode the "gc_reclaimed_segments" node shows.
+		Refer to the description of the modes in "gc_reclaimed_segments".
+
+What:		/sys/fs/f2fs/<disk>/seq_file_ra_mul
+Date:		July 2021
+Contact:	"Daeho Jeong" <daehojeong@google.com>
+Description:	You can	control the multiplier value of	bdi device readahead window size
+		between 2 (default) and 256 for POSIX_FADV_SEQUENTIAL advise option.
diff --git a/Documentation/ABI/testing/sysfs-kernel-iommu_groups b/Documentation/ABI/testing/sysfs-kernel-iommu_groups
index eae2f1c1e11e..b15af6a5bc08 100644
--- a/Documentation/ABI/testing/sysfs-kernel-iommu_groups
+++ b/Documentation/ABI/testing/sysfs-kernel-iommu_groups
@@ -42,8 +42,12 @@ Description:	/sys/kernel/iommu_groups/<grp_id>/type shows the type of default
 		========  ======================================================
 		DMA       All the DMA transactions from the device in this group
 		          are translated by the iommu.
+		DMA-FQ    As above, but using batched invalidation to lazily
+		          remove translations after use. This may offer reduced
+			  overhead at the cost of reduced memory protection.
 		identity  All the DMA transactions from the device in this group
-		          are not translated by the iommu.
+		          are not translated by the iommu. Maximum performance
+			  but zero protection.
 		auto      Change to the type the device was booted with.
 		========  ======================================================
 
diff --git a/Documentation/ABI/testing/sysfs-kernel-mm-numa b/Documentation/ABI/testing/sysfs-kernel-mm-numa
new file mode 100644
index 000000000000..77e559d4ed80
--- /dev/null
+++ b/Documentation/ABI/testing/sysfs-kernel-mm-numa
@@ -0,0 +1,24 @@
+What:		/sys/kernel/mm/numa/
+Date:		June 2021
+Contact:	Linux memory management mailing list <linux-mm@kvack.org>
+Description:	Interface for NUMA
+
+What:		/sys/kernel/mm/numa/demotion_enabled
+Date:		June 2021
+Contact:	Linux memory management mailing list <linux-mm@kvack.org>
+Description:	Enable/disable demoting pages during reclaim
+
+		Page migration during reclaim is intended for systems
+		with tiered memory configurations.  These systems have
+		multiple types of memory with varied performance
+		characteristics instead of plain NUMA systems where
+		the same kind of memory is found at varied distances.
+		Allowing page migration during reclaim enables these
+		systems to migrate pages from fast tiers to slow tiers
+		when the fast tier is under pressure.  This migration
+		is performed before swap.  It may move data to a NUMA
+		node that does not fall into the cpuset of the
+		allocating process which might be construed to violate
+		the guarantees of cpusets.  This should not be enabled
+		on systems which need strict cpuset location
+		guarantees.
diff --git a/Documentation/ABI/testing/sysfs-platform-dell-smbios b/Documentation/ABI/testing/sysfs-platform-dell-smbios
index e6e0f7f834a7..5583da581025 100644
--- a/Documentation/ABI/testing/sysfs-platform-dell-smbios
+++ b/Documentation/ABI/testing/sysfs-platform-dell-smbios
@@ -1,7 +1,7 @@
 What:		/sys/devices/platform/<platform>/tokens/*
 Date:		November 2017
 KernelVersion:	4.15
-Contact:	"Mario Limonciello" <mario.limonciello@dell.com>
+Contact:	Dell.Client.Kernel@dell.com
 Description:
 		A read-only description of Dell platform tokens
 		available on the machine.
diff --git a/Documentation/ABI/testing/sysfs-platform-intel-wmi-thunderbolt b/Documentation/ABI/testing/sysfs-platform-intel-wmi-thunderbolt
index e19144fd5d86..fd3a7ec79760 100644
--- a/Documentation/ABI/testing/sysfs-platform-intel-wmi-thunderbolt
+++ b/Documentation/ABI/testing/sysfs-platform-intel-wmi-thunderbolt
@@ -1,7 +1,7 @@
 What:		/sys/devices/platform/<platform>/force_power
 Date:		September 2017
 KernelVersion:	4.15
-Contact:	"Mario Limonciello" <mario.limonciello@dell.com>
+Contact:	"Mario Limonciello" <mario.limonciello@outlook.com>
 Description:
 		Modify the platform force power state, influencing
 		Thunderbolt controllers to turn on or off when no
diff --git a/Documentation/ABI/testing/sysfs-power b/Documentation/ABI/testing/sysfs-power
index 51c0f578bfce..90ec4987074b 100644
--- a/Documentation/ABI/testing/sysfs-power
+++ b/Documentation/ABI/testing/sysfs-power
@@ -295,7 +295,7 @@ Description:
 
 What:		/sys/power/resume_offset
 Date:		April 2018
-Contact:	Mario Limonciello <mario.limonciello@dell.com>
+Contact:	Mario Limonciello <mario.limonciello@outlook.com>
 Description:
 		This file is used for telling the kernel an offset into a disk
 		to use when hibernating the system such as with a swap file.
diff --git a/Documentation/PCI/endpoint/pci-endpoint-cfs.rst b/Documentation/PCI/endpoint/pci-endpoint-cfs.rst
index db609b97ad58..fb73345cfb8a 100644
--- a/Documentation/PCI/endpoint/pci-endpoint-cfs.rst
+++ b/Documentation/PCI/endpoint/pci-endpoint-cfs.rst
@@ -43,6 +43,7 @@ entries corresponding to EPF driver will be created by the EPF core.
 		.. <EPF Driver1>/
 			... <EPF Device 11>/
 			... <EPF Device 21>/
+			... <EPF Device 31>/
 		.. <EPF Driver2>/
 			... <EPF Device 12>/
 			... <EPF Device 22>/
@@ -68,6 +69,7 @@ created)
 				... subsys_vendor_id
 				... subsys_id
 				... interrupt_pin
+			        ... <Symlink EPF Device 31>/
                                 ... primary/
 			                ... <Symlink EPC Device1>/
                                 ... secondary/
@@ -79,6 +81,13 @@ interface should be added in 'primary' directory and symlink of endpoint
 controller connected to secondary interface should be added in 'secondary'
 directory.
 
+The <EPF Device> directory can have a list of symbolic links
+(<Symlink EPF Device 31>) to other <EPF Device>. These symbolic links should
+be created by the user to represent the virtual functions that are bound to
+the physical function. In the above directory structure <EPF Device 11> is a
+physical function and <EPF Device 31> is a virtual function. An EPF device once
+it's linked to another EPF device, cannot be linked to a EPC device.
+
 EPC Device
 ==========
 
@@ -98,7 +107,8 @@ entries corresponding to EPC device will be created by the EPC core.
 
 The <EPC Device> directory will have a list of symbolic links to
 <EPF Device>. These symbolic links should be created by the user to
-represent the functions present in the endpoint device.
+represent the functions present in the endpoint device. Only <EPF Device>
+that represents a physical function can be linked to a EPC device.
 
 The <EPC Device> directory will also have a *start* field. Once
 "1" is written to this field, the endpoint device will be ready to
diff --git a/Documentation/PCI/pci.rst b/Documentation/PCI/pci.rst
index fa651e25d98c..87c6f4a6ca32 100644
--- a/Documentation/PCI/pci.rst
+++ b/Documentation/PCI/pci.rst
@@ -103,6 +103,7 @@ need pass only as many optional fields as necessary:
   - subvendor and subdevice fields default to PCI_ANY_ID (FFFFFFFF)
   - class and classmask fields default to 0
   - driver_data defaults to 0UL.
+  - override_only field defaults to 0.
 
 Note that driver_data must match the value used by any of the pci_device_id
 entries defined in the driver. This makes the driver_data field mandatory
diff --git a/Documentation/admin-guide/acpi/ssdt-overlays.rst b/Documentation/admin-guide/acpi/ssdt-overlays.rst
index 5d7e25988085..b5fbf54dca19 100644
--- a/Documentation/admin-guide/acpi/ssdt-overlays.rst
+++ b/Documentation/admin-guide/acpi/ssdt-overlays.rst
@@ -30,22 +30,21 @@ following ASL code can be used::
         {
             Device (STAC)
             {
-                Name (_ADR, Zero)
                 Name (_HID, "BMA222E")
+                Name (RBUF, ResourceTemplate ()
+                {
+                    I2cSerialBus (0x0018, ControllerInitiated, 0x00061A80,
+                                AddressingMode7Bit, "\\_SB.I2C6", 0x00,
+                                ResourceConsumer, ,)
+                    GpioInt (Edge, ActiveHigh, Exclusive, PullDown, 0x0000,
+                            "\\_SB.GPO2", 0x00, ResourceConsumer, , )
+                    { // Pin list
+                        0
+                    }
+                })
 
                 Method (_CRS, 0, Serialized)
                 {
-                    Name (RBUF, ResourceTemplate ()
-                    {
-                        I2cSerialBus (0x0018, ControllerInitiated, 0x00061A80,
-                                    AddressingMode7Bit, "\\_SB.I2C6", 0x00,
-                                    ResourceConsumer, ,)
-                        GpioInt (Edge, ActiveHigh, Exclusive, PullDown, 0x0000,
-                                "\\_SB.GPO2", 0x00, ResourceConsumer, , )
-                        { // Pin list
-                            0
-                        }
-                    })
                     Return (RBUF)
                 }
             }
@@ -75,7 +74,7 @@ This option allows loading of user defined SSDTs from initrd and it is useful
 when the system does not support EFI or when there is not enough EFI storage.
 
 It works in a similar way with initrd based ACPI tables override/upgrade: SSDT
-aml code must be placed in the first, uncompressed, initrd under the
+AML code must be placed in the first, uncompressed, initrd under the
 "kernel/firmware/acpi" path. Multiple files can be used and this will translate
 in loading multiple tables. Only SSDT and OEM tables are allowed. See
 initrd_table_override.txt for more details.
@@ -103,12 +102,14 @@ This is the preferred method, when EFI is supported on the platform, because it
 allows a persistent, OS independent way of storing the user defined SSDTs. There
 is also work underway to implement EFI support for loading user defined SSDTs
 and using this method will make it easier to convert to the EFI loading
-mechanism when that will arrive.
+mechanism when that will arrive. To enable it, the
+CONFIG_EFI_CUSTOM_SSDT_OVERLAYS shoyld be chosen to y.
 
-In order to load SSDTs from an EFI variable the efivar_ssdt kernel command line
-parameter can be used. The argument for the option is the variable name to
-use. If there are multiple variables with the same name but with different
-vendor GUIDs, all of them will be loaded.
+In order to load SSDTs from an EFI variable the ``"efivar_ssdt=..."`` kernel
+command line parameter can be used (the name has a limitation of 16 characters).
+The argument for the option is the variable name to use. If there are multiple
+variables with the same name but with different vendor GUIDs, all of them will
+be loaded.
 
 In order to store the AML code in an EFI variable the efivarfs filesystem can be
 used. It is enabled and mounted by default in /sys/firmware/efi/efivars in all
@@ -127,7 +128,7 @@ variable with the content from a given file::
 
     #!/bin/sh -e
 
-    while ! [ -z "$1" ]; do
+    while [ -n "$1" ]; do
             case "$1" in
             "-f") filename="$2"; shift;;
             "-g") guid="$2"; shift;;
@@ -167,14 +168,14 @@ variable with the content from a given file::
 Loading ACPI SSDTs from configfs
 ================================
 
-This option allows loading of user defined SSDTs from userspace via the configfs
+This option allows loading of user defined SSDTs from user space via the configfs
 interface. The CONFIG_ACPI_CONFIGFS option must be select and configfs must be
 mounted. In the following examples, we assume that configfs has been mounted in
-/config.
+/sys/kernel/config.
 
-New tables can be loading by creating new directories in /config/acpi/table/ and
-writing the SSDT aml code in the aml attribute::
+New tables can be loading by creating new directories in /sys/kernel/config/acpi/table
+and writing the SSDT AML code in the aml attribute::
 
-    cd /config/acpi/table
+    cd /sys/kernel/config/acpi/table
     mkdir my_ssdt
     cat ~/ssdt.aml > my_ssdt/aml
diff --git a/Documentation/admin-guide/bootconfig.rst b/Documentation/admin-guide/bootconfig.rst
index 6a79f2e59396..a1860fc0ca88 100644
--- a/Documentation/admin-guide/bootconfig.rst
+++ b/Documentation/admin-guide/bootconfig.rst
@@ -178,7 +178,7 @@ update the boot loader and the kernel image itself as long as the boot
 loader passes the correct initrd file size. If by any chance, the boot
 loader passes a longer size, the kernel fails to find the bootconfig data.
 
-To do this operation, Linux kernel provides "bootconfig" command under
+To do this operation, Linux kernel provides ``bootconfig`` command under
 tools/bootconfig, which allows admin to apply or delete the config file
 to/from initrd image. You can build it by the following command::
 
@@ -196,6 +196,43 @@ To remove the config from the image, you can use -d option as below::
 Then add "bootconfig" on the normal kernel command line to tell the
 kernel to look for the bootconfig at the end of the initrd file.
 
+
+Kernel parameters via Boot Config
+=================================
+
+In addition to the kernel command line, the boot config can be used for
+passing the kernel parameters. All the key-value pairs under ``kernel``
+key will be passed to kernel cmdline directly. Moreover, the key-value
+pairs under ``init`` will be passed to init process via the cmdline.
+The parameters are concatinated with user-given kernel cmdline string
+as the following order, so that the command line parameter can override
+bootconfig parameters (this depends on how the subsystem handles parameters
+but in general, earlier parameter will be overwritten by later one.)::
+
+ [bootconfig params][cmdline params] -- [bootconfig init params][cmdline init params]
+
+Here is an example of the bootconfig file for kernel/init parameters.::
+
+ kernel {
+   root = 01234567-89ab-cdef-0123-456789abcd
+ }
+ init {
+  splash
+ }
+
+This will be copied into the kernel cmdline string as the following::
+
+ root="01234567-89ab-cdef-0123-456789abcd" -- splash
+
+If user gives some other command line like,::
+
+ ro bootconfig -- quiet
+
+The final kernel cmdline will be the following::
+
+ root="01234567-89ab-cdef-0123-456789abcd" ro bootconfig -- splash quiet
+
+
 Config File Limitation
 ======================
 
diff --git a/Documentation/admin-guide/cputopology.rst b/Documentation/admin-guide/cputopology.rst
index 8632a1db36e4..b085dbac60a5 100644
--- a/Documentation/admin-guide/cputopology.rst
+++ b/Documentation/admin-guide/cputopology.rst
@@ -58,9 +58,9 @@ source for the output is in brackets ("[]").
 		[NR_CPUS-1]
 
     offline:	CPUs that are not online because they have been
-		HOTPLUGGED off (see cpu-hotplug.txt) or exceed the limit
-		of CPUs allowed by the kernel configuration (kernel_max
-		above). [~cpu_online_mask + cpus >= NR_CPUS]
+		HOTPLUGGED off or exceed the limit of CPUs allowed by the
+		kernel configuration (kernel_max above).
+		[~cpu_online_mask + cpus >= NR_CPUS]
 
     online:	CPUs that are online and being scheduled [cpu_online_mask]
 
@@ -96,5 +96,5 @@ online.)::
        possible: 0-127
         present: 0-3
 
-See cpu-hotplug.txt for the possible_cpus=NUM kernel start parameter
-as well as more information on the various cpumasks.
+See Documentation/core-api/cpu_hotplug.rst for the possible_cpus=NUM
+kernel start parameter as well as more information on the various cpumasks.
diff --git a/Documentation/admin-guide/devices.txt b/Documentation/admin-guide/devices.txt
index 9c2be821c225..922c23bb4372 100644
--- a/Documentation/admin-guide/devices.txt
+++ b/Documentation/admin-guide/devices.txt
@@ -2993,10 +2993,10 @@
 		65 = /dev/infiniband/issm1     Second InfiniBand IsSM device
 		  ...
 		127 = /dev/infiniband/issm63    63rd InfiniBand IsSM device
-		128 = /dev/infiniband/uverbs0   First InfiniBand verbs device
-		129 = /dev/infiniband/uverbs1   Second InfiniBand verbs device
+		192 = /dev/infiniband/uverbs0   First InfiniBand verbs device
+		193 = /dev/infiniband/uverbs1   Second InfiniBand verbs device
 		  ...
-		159 = /dev/infiniband/uverbs31  31st InfiniBand verbs device
+		223 = /dev/infiniband/uverbs31  31st InfiniBand verbs device
 
  232 char	Biometric Devices
 		0 = /dev/biometric/sensor0/fingerprint	first fingerprint sensor on first device
diff --git a/Documentation/admin-guide/hw-vuln/core-scheduling.rst b/Documentation/admin-guide/hw-vuln/core-scheduling.rst
index 7b410aef9c5c..0febe458597c 100644
--- a/Documentation/admin-guide/hw-vuln/core-scheduling.rst
+++ b/Documentation/admin-guide/hw-vuln/core-scheduling.rst
@@ -181,10 +181,12 @@ Open cross-HT issues that core scheduling does not solve
 --------------------------------------------------------
 1. For MDS
 ~~~~~~~~~~
-Core scheduling cannot protect against MDS attacks between an HT running in
-user mode and another running in kernel mode. Even though both HTs run tasks
-which trust each other, kernel memory is still considered untrusted. Such
-attacks are possible for any combination of sibling CPU modes (host or guest mode).
+Core scheduling cannot protect against MDS attacks between the siblings
+running in user mode and the others running in kernel mode. Even though all
+siblings run tasks which trust each other, when the kernel is executing
+code on behalf of a task, it cannot trust the code running in the
+sibling. Such attacks are possible for any combination of sibling CPU modes
+(host or guest mode).
 
 2. For L1TF
 ~~~~~~~~~~~
diff --git a/Documentation/admin-guide/kernel-parameters.txt b/Documentation/admin-guide/kernel-parameters.txt
index 84dc5790741b..91ba391f9b32 100644
--- a/Documentation/admin-guide/kernel-parameters.txt
+++ b/Documentation/admin-guide/kernel-parameters.txt
@@ -301,10 +301,7 @@
 	amd_iommu=	[HW,X86-64]
 			Pass parameters to the AMD IOMMU driver in the system.
 			Possible values are:
-			fullflush - enable flushing of IO/TLB entries when
-				    they are unmapped. Otherwise they are
-				    flushed before they will be reused, which
-				    is a lot of faster
+			fullflush - Deprecated, equivalent to iommu.strict=1
 			off	  - do not initialize any AMD IOMMU found in
 				    the system
 			force_isolation - Force device isolation for all
@@ -1761,6 +1758,11 @@
 			support for the idxd driver. By default it is set to
 			true (1).
 
+	idxd.tc_override= [HW]
+			Format: <bool>
+			Allow override of default traffic class configuration
+			for the device. By default it is set to false (0).
+
 	ieee754=	[MIPS] Select IEEE Std 754 conformance mode
 			Format: { strict | legacy | 2008 | relaxed }
 			Default: strict
@@ -1958,18 +1960,17 @@
 			this case, gfx device will use physical address for
 			DMA.
 		strict [Default Off]
-			With this option on every unmap_single operation will
-			result in a hardware IOTLB flush operation as opposed
-			to batching them for performance.
+			Deprecated, equivalent to iommu.strict=1.
 		sp_off [Default Off]
 			By default, super page will be supported if Intel IOMMU
 			has the capability. With this option, super page will
 			not be supported.
-		sm_on [Default Off]
-			By default, scalable mode will be disabled even if the
-			hardware advertises that it has support for the scalable
-			mode translation. With this option set, scalable mode
-			will be used on hardware which claims to support it.
+		sm_on
+			Enable the Intel IOMMU scalable mode if the hardware
+			advertises that it has support for the scalable mode
+			translation.
+		sm_off
+			Disallow use of the Intel IOMMU scalable mode.
 		tboot_noforce [Default Off]
 			Do not force the Intel IOMMU enabled under tboot.
 			By default, tboot will force Intel IOMMU on, which
@@ -2061,13 +2062,12 @@
 			  throughput at the cost of reduced device isolation.
 			  Will fall back to strict mode if not supported by
 			  the relevant IOMMU driver.
-			1 - Strict mode (default).
+			1 - Strict mode.
 			  DMA unmap operations invalidate IOMMU hardware TLBs
 			  synchronously.
-			Note: on x86, the default behaviour depends on the
-			equivalent driver-specific parameters, but a strict
-			mode explicitly specified by either method takes
-			precedence.
+			unset - Use value of CONFIG_IOMMU_DEFAULT_DMA_{LAZY,STRICT}.
+			Note: on x86, strict mode specified via one of the
+			legacy driver-specific options takes precedence.
 
 	iommu.passthrough=
 			[ARM64, X86] Configure DMA to bypass the IOMMU by default.
diff --git a/Documentation/admin-guide/laptops/lg-laptop.rst b/Documentation/admin-guide/laptops/lg-laptop.rst
index ce9b14671cb9..6fbe165dcd27 100644
--- a/Documentation/admin-guide/laptops/lg-laptop.rst
+++ b/Documentation/admin-guide/laptops/lg-laptop.rst
@@ -13,10 +13,8 @@ Hotkeys
 The following FN keys are ignored by the kernel without this driver:
 
 - FN-F1 (LG control panel)   - Generates F15
-- FN-F5 (Touchpad toggle)    - Generates F13
+- FN-F5 (Touchpad toggle)    - Generates F21
 - FN-F6 (Airplane mode)      - Generates RFKILL
-- FN-F8 (Keyboard backlight) - Generates F16.
-  This key also changes keyboard backlight mode.
 - FN-F9 (Reader mode)        - Generates F14
 
 The rest of the FN keys work without a need for a special driver.
diff --git a/Documentation/admin-guide/mm/damon/index.rst b/Documentation/admin-guide/mm/damon/index.rst
new file mode 100644
index 000000000000..8c5dde3a5754
--- /dev/null
+++ b/Documentation/admin-guide/mm/damon/index.rst
@@ -0,0 +1,15 @@
+.. SPDX-License-Identifier: GPL-2.0
+
+========================
+Monitoring Data Accesses
+========================
+
+:doc:`DAMON </vm/damon/index>` allows light-weight data access monitoring.
+Using DAMON, users can analyze the memory access patterns of their systems and
+optimize those.
+
+.. toctree::
+   :maxdepth: 2
+
+   start
+   usage
diff --git a/Documentation/admin-guide/mm/damon/start.rst b/Documentation/admin-guide/mm/damon/start.rst
new file mode 100644
index 000000000000..d5eb89a8fc38
--- /dev/null
+++ b/Documentation/admin-guide/mm/damon/start.rst
@@ -0,0 +1,114 @@
+.. SPDX-License-Identifier: GPL-2.0
+
+===============
+Getting Started
+===============
+
+This document briefly describes how you can use DAMON by demonstrating its
+default user space tool.  Please note that this document describes only a part
+of its features for brevity.  Please refer to :doc:`usage` for more details.
+
+
+TL; DR
+======
+
+Follow the commands below to monitor and visualize the memory access pattern of
+your workload. ::
+
+    # # build the kernel with CONFIG_DAMON_*=y, install it, and reboot
+    # mount -t debugfs none /sys/kernel/debug/
+    # git clone https://github.com/awslabs/damo
+    # ./damo/damo record $(pidof <your workload>)
+    # ./damo/damo report heat --plot_ascii
+
+The final command draws the access heatmap of ``<your workload>``.  The heatmap
+shows which memory region (x-axis) is accessed when (y-axis) and how frequently
+(number; the higher the more accesses have been observed). ::
+
+    111111111111111111111111111111111111111111111111111111110000
+    111121111111111111111111111111211111111111111111111111110000
+    000000000000000000000000000000000000000000000000001555552000
+    000000000000000000000000000000000000000000000222223555552000
+    000000000000000000000000000000000000000011111677775000000000
+    000000000000000000000000000000000000000488888000000000000000
+    000000000000000000000000000000000177888400000000000000000000
+    000000000000000000000000000046666522222100000000000000000000
+    000000000000000000000014444344444300000000000000000000000000
+    000000000000000002222245555510000000000000000000000000000000
+    # access_frequency:  0  1  2  3  4  5  6  7  8  9
+    # x-axis: space (140286319947776-140286426374096: 101.496 MiB)
+    # y-axis: time (605442256436361-605479951866441: 37.695430s)
+    # resolution: 60x10 (1.692 MiB and 3.770s for each character)
+
+
+Prerequisites
+=============
+
+Kernel
+------
+
+You should first ensure your system is running on a kernel built with
+``CONFIG_DAMON_*=y``.
+
+
+User Space Tool
+---------------
+
+For the demonstration, we will use the default user space tool for DAMON,
+called DAMON Operator (DAMO).  It is available at
+https://github.com/awslabs/damo.  The examples below assume that ``damo`` is on
+your ``$PATH``.  It's not mandatory, though.
+
+Because DAMO is using the debugfs interface (refer to :doc:`usage` for the
+detail) of DAMON, you should ensure debugfs is mounted.  Mount it manually as
+below::
+
+    # mount -t debugfs none /sys/kernel/debug/
+
+or append the following line to your ``/etc/fstab`` file so that your system
+can automatically mount debugfs upon booting::
+
+    debugfs /sys/kernel/debug debugfs defaults 0 0
+
+
+Recording Data Access Patterns
+==============================
+
+The commands below record the memory access patterns of a program and save the
+monitoring results to a file. ::
+
+    $ git clone https://github.com/sjp38/masim
+    $ cd masim; make; ./masim ./configs/zigzag.cfg &
+    $ sudo damo record -o damon.data $(pidof masim)
+
+The first two lines of the commands download an artificial memory access
+generator program and run it in the background.  The generator will repeatedly
+access two 100 MiB sized memory regions one by one.  You can substitute this
+with your real workload.  The last line asks ``damo`` to record the access
+pattern in the ``damon.data`` file.
+
+
+Visualizing Recorded Patterns
+=============================
+
+The following three commands visualize the recorded access patterns and save
+the results as separate image files. ::
+
+    $ damo report heats --heatmap access_pattern_heatmap.png
+    $ damo report wss --range 0 101 1 --plot wss_dist.png
+    $ damo report wss --range 0 101 1 --sortby time --plot wss_chron_change.png
+
+- ``access_pattern_heatmap.png`` will visualize the data access pattern in a
+  heatmap, showing which memory region (y-axis) got accessed when (x-axis)
+  and how frequently (color).
+- ``wss_dist.png`` will show the distribution of the working set size.
+- ``wss_chron_change.png`` will show how the working set size has
+  chronologically changed.
+
+You can view the visualizations of this example workload at [1]_.
+Visualizations of other realistic workloads are available at [2]_ [3]_ [4]_.
+
+.. [1] https://damonitor.github.io/doc/html/v17/admin-guide/mm/damon/start.html#visualizing-recorded-patterns
+.. [2] https://damonitor.github.io/test/result/visual/latest/rec.heatmap.1.png.html
+.. [3] https://damonitor.github.io/test/result/visual/latest/rec.wss_sz.png.html
+.. [4] https://damonitor.github.io/test/result/visual/latest/rec.wss_time.png.html
diff --git a/Documentation/admin-guide/mm/damon/usage.rst b/Documentation/admin-guide/mm/damon/usage.rst
new file mode 100644
index 000000000000..a72cda374aba
--- /dev/null
+++ b/Documentation/admin-guide/mm/damon/usage.rst
@@ -0,0 +1,112 @@
+.. SPDX-License-Identifier: GPL-2.0
+
+===============
+Detailed Usages
+===============
+
+DAMON provides below three interfaces for different users.
+
+- *DAMON user space tool.*
+  This is for privileged people such as system administrators who want a
+  just-working human-friendly interface.  Using this, users can use the DAMON’s
+  major features in a human-friendly way.  It may not be highly tuned for
+  special cases, though.  It supports only virtual address spaces monitoring.
+- *debugfs interface.*
+  This is for privileged user space programmers who want more optimized use of
+  DAMON.  Using this, users can use DAMON’s major features by reading
+  from and writing to special debugfs files.  Therefore, you can write and use
+  your personalized DAMON debugfs wrapper programs that reads/writes the
+  debugfs files instead of you.  The DAMON user space tool is also a reference
+  implementation of such programs.  It supports only virtual address spaces
+  monitoring.
+- *Kernel Space Programming Interface.*
+  This is for kernel space programmers.  Using this, users can utilize every
+  feature of DAMON most flexibly and efficiently by writing kernel space
+  DAMON application programs for you.  You can even extend DAMON for various
+  address spaces.
+
+Nevertheless, you could write your own user space tool using the debugfs
+interface.  A reference implementation is available at
+https://github.com/awslabs/damo.  If you are a kernel programmer, you could
+refer to :doc:`/vm/damon/api` for the kernel space programming interface.  For
+the reason, this document describes only the debugfs interface
+
+debugfs Interface
+=================
+
+DAMON exports three files, ``attrs``, ``target_ids``, and ``monitor_on`` under
+its debugfs directory, ``<debugfs>/damon/``.
+
+
+Attributes
+----------
+
+Users can get and set the ``sampling interval``, ``aggregation interval``,
+``regions update interval``, and min/max number of monitoring target regions by
+reading from and writing to the ``attrs`` file.  To know about the monitoring
+attributes in detail, please refer to the :doc:`/vm/damon/design`.  For
+example, below commands set those values to 5 ms, 100 ms, 1,000 ms, 10 and
+1000, and then check it again::
+
+    # cd <debugfs>/damon
+    # echo 5000 100000 1000000 10 1000 > attrs
+    # cat attrs
+    5000 100000 1000000 10 1000
+
+
+Target IDs
+----------
+
+Some types of address spaces supports multiple monitoring target.  For example,
+the virtual memory address spaces monitoring can have multiple processes as the
+monitoring targets.  Users can set the targets by writing relevant id values of
+the targets to, and get the ids of the current targets by reading from the
+``target_ids`` file.  In case of the virtual address spaces monitoring, the
+values should be pids of the monitoring target processes.  For example, below
+commands set processes having pids 42 and 4242 as the monitoring targets and
+check it again::
+
+    # cd <debugfs>/damon
+    # echo 42 4242 > target_ids
+    # cat target_ids
+    42 4242
+
+Note that setting the target ids doesn't start the monitoring.
+
+
+Turning On/Off
+--------------
+
+Setting the files as described above doesn't incur effect unless you explicitly
+start the monitoring.  You can start, stop, and check the current status of the
+monitoring by writing to and reading from the ``monitor_on`` file.  Writing
+``on`` to the file starts the monitoring of the targets with the attributes.
+Writing ``off`` to the file stops those.  DAMON also stops if every target
+process is terminated.  Below example commands turn on, off, and check the
+status of DAMON::
+
+    # cd <debugfs>/damon
+    # echo on > monitor_on
+    # echo off > monitor_on
+    # cat monitor_on
+    off
+
+Please note that you cannot write to the above-mentioned debugfs files while
+the monitoring is turned on.  If you write to the files while DAMON is running,
+an error code such as ``-EBUSY`` will be returned.
+
+
+Tracepoint for Monitoring Results
+=================================
+
+DAMON provides the monitoring results via a tracepoint,
+``damon:damon_aggregated``.  While the monitoring is turned on, you could
+record the tracepoint events and show results using tracepoint supporting tools
+like ``perf``.  For example::
+
+    # echo on > monitor_on
+    # perf record -e damon:damon_aggregated &
+    # sleep 5
+    # kill 9 $(pidof perf)
+    # echo off > monitor_on
+    # perf script
diff --git a/Documentation/admin-guide/mm/index.rst b/Documentation/admin-guide/mm/index.rst
index 4b14d8b50e9e..cbd19d5e625f 100644
--- a/Documentation/admin-guide/mm/index.rst
+++ b/Documentation/admin-guide/mm/index.rst
@@ -27,6 +27,7 @@ the Linux memory management.
 
    concepts
    cma_debugfs
+   damon/index
    hugetlbpage
    idle_page_tracking
    ksm
diff --git a/Documentation/admin-guide/mm/memory-hotplug.rst b/Documentation/admin-guide/mm/memory-hotplug.rst
index c6bae2d77160..03dfbc925252 100644
--- a/Documentation/admin-guide/mm/memory-hotplug.rst
+++ b/Documentation/admin-guide/mm/memory-hotplug.rst
@@ -1,466 +1,576 @@
 .. _admin_guide_memory_hotplug:
 
-==============
-Memory Hotplug
-==============
+==================
+Memory Hot(Un)Plug
+==================
 
-:Created:							Jul 28 2007
-:Updated: Add some details about locking internals:		Aug 20 2018
-
-This document is about memory hotplug including how-to-use and current status.
-Because Memory Hotplug is still under development, contents of this text will
-be changed often.
+This document describes generic Linux support for memory hot(un)plug with
+a focus on System RAM, including ZONE_MOVABLE support.
 
 .. contents:: :local:
 
-.. note::
+Introduction
+============
 
-    (1) x86_64's has special implementation for memory hotplug.
-        This text does not describe it.
-    (2) This text assumes that sysfs is mounted at ``/sys``.
+Memory hot(un)plug allows for increasing and decreasing the size of physical
+memory available to a machine at runtime. In the simplest case, it consists of
+physically plugging or unplugging a DIMM at runtime, coordinated with the
+operating system.
 
+Memory hot(un)plug is used for various purposes:
 
-Introduction
-============
+- The physical memory available to a machine can be adjusted at runtime, up- or
+  downgrading the memory capacity. This dynamic memory resizing, sometimes
+  referred to as "capacity on demand", is frequently used with virtual machines
+  and logical partitions.
+
+- Replacing hardware, such as DIMMs or whole NUMA nodes, without downtime. One
+  example is replacing failing memory modules.
 
-Purpose of memory hotplug
--------------------------
+- Reducing energy consumption either by physically unplugging memory modules or
+  by logically unplugging (parts of) memory modules from Linux.
 
-Memory Hotplug allows users to increase/decrease the amount of memory.
-Generally, there are two purposes.
+Further, the basic memory hot(un)plug infrastructure in Linux is nowadays also
+used to expose persistent memory, other performance-differentiated memory and
+reserved memory regions as ordinary system RAM to Linux.
 
-(A) For changing the amount of memory.
-    This is to allow a feature like capacity on demand.
-(B) For installing/removing DIMMs or NUMA-nodes physically.
-    This is to exchange DIMMs/NUMA-nodes, reduce power consumption, etc.
+Linux only supports memory hot(un)plug on selected 64 bit architectures, such as
+x86_64, arm64, ppc64, s390x and ia64.
 
-(A) is required by highly virtualized environments and (B) is required by
-hardware which supports memory power management.
+Memory Hot(Un)Plug Granularity
+------------------------------
 
-Linux memory hotplug is designed for both purpose.
+Memory hot(un)plug in Linux uses the SPARSEMEM memory model, which divides the
+physical memory address space into chunks of the same size: memory sections. The
+size of a memory section is architecture dependent. For example, x86_64 uses
+128 MiB and ppc64 uses 16 MiB.
 
-Phases of memory hotplug
+Memory sections are combined into chunks referred to as "memory blocks". The
+size of a memory block is architecture dependent and corresponds to the smallest
+granularity that can be hot(un)plugged. The default size of a memory block is
+the same as memory section size, unless an architecture specifies otherwise.
+
+All memory blocks have the same size.
+
+Phases of Memory Hotplug
 ------------------------
 
-There are 2 phases in Memory Hotplug:
+Memory hotplug consists of two phases:
 
-  1) Physical Memory Hotplug phase
-  2) Logical Memory Hotplug phase.
+(1) Adding the memory to Linux
+(2) Onlining memory blocks
 
-The First phase is to communicate hardware/firmware and make/erase
-environment for hotplugged memory. Basically, this phase is necessary
-for the purpose (B), but this is good phase for communication between
-highly virtualized environments too.
+In the first phase, metadata, such as the memory map ("memmap") and page tables
+for the direct mapping, is allocated and initialized, and memory blocks are
+created; the latter also creates sysfs files for managing newly created memory
+blocks.
 
-When memory is hotplugged, the kernel recognizes new memory, makes new memory
-management tables, and makes sysfs files for new memory's operation.
+In the second phase, added memory is exposed to the page allocator. After this
+phase, the memory is visible in memory statistics, such as free and total
+memory, of the system.
 
-If firmware supports notification of connection of new memory to OS,
-this phase is triggered automatically. ACPI can notify this event. If not,
-"probe" operation by system administration is used instead.
-(see :ref:`memory_hotplug_physical_mem`).
+Phases of Memory Hotunplug
+--------------------------
 
-Logical Memory Hotplug phase is to change memory state into
-available/unavailable for users. Amount of memory from user's view is
-changed by this phase. The kernel makes all memory in it as free pages
-when a memory range is available.
+Memory hotunplug consists of two phases:
 
-In this document, this phase is described as online/offline.
+(1) Offlining memory blocks
+(2) Removing the memory from Linux
 
-Logical Memory Hotplug phase is triggered by write of sysfs file by system
-administrator. For the hot-add case, it must be executed after Physical Hotplug
-phase by hand.
-(However, if you writes udev's hotplug scripts for memory hotplug, these
-phases can be execute in seamless way.)
+In the fist phase, memory is "hidden" from the page allocator again, for
+example, by migrating busy memory to other memory locations and removing all
+relevant free pages from the page allocator After this phase, the memory is no
+longer visible in memory statistics of the system.
 
-Unit of Memory online/offline operation
----------------------------------------
+In the second phase, the memory blocks are removed and metadata is freed.
 
-Memory hotplug uses SPARSEMEM memory model which allows memory to be divided
-into chunks of the same size. These chunks are called "sections". The size of
-a memory section is architecture dependent. For example, power uses 16MiB, ia64
-uses 1GiB.
+Memory Hotplug Notifications
+============================
 
-Memory sections are combined into chunks referred to as "memory blocks". The
-size of a memory block is architecture dependent and represents the logical
-unit upon which memory online/offline operations are to be performed. The
-default size of a memory block is the same as memory section size unless an
-architecture specifies otherwise. (see :ref:`memory_hotplug_sysfs_files`.)
+There are various ways how Linux is notified about memory hotplug events such
+that it can start adding hotplugged memory. This description is limited to
+systems that support ACPI; mechanisms specific to other firmware interfaces or
+virtual machines are not described.
 
-To determine the size (in bytes) of a memory block please read this file::
+ACPI Notifications
+------------------
 
-  /sys/devices/system/memory/block_size_bytes
+Platforms that support ACPI, such as x86_64, can support memory hotplug
+notifications via ACPI.
 
-Kernel Configuration
-====================
+In general, a firmware supporting memory hotplug defines a memory class object
+HID "PNP0C80". When notified about hotplug of a new memory device, the ACPI
+driver will hotplug the memory to Linux.
 
-To use memory hotplug feature, kernel must be compiled with following
-config options.
+If the firmware supports hotplug of NUMA nodes, it defines an object _HID
+"ACPI0004", "PNP0A05", or "PNP0A06". When notified about an hotplug event, all
+assigned memory devices are added to Linux by the ACPI driver.
 
-- For all memory hotplug:
-    - Memory model -> Sparse Memory  (``CONFIG_SPARSEMEM``)
-    - Allow for memory hot-add       (``CONFIG_MEMORY_HOTPLUG``)
+Similarly, Linux can be notified about requests to hotunplug a memory device or
+a NUMA node via ACPI. The ACPI driver will try offlining all relevant memory
+blocks, and, if successful, hotunplug the memory from Linux.
 
-- To enable memory removal, the following are also necessary:
-    - Allow for memory hot remove    (``CONFIG_MEMORY_HOTREMOVE``)
-    - Page Migration                 (``CONFIG_MIGRATION``)
+Manual Probing
+--------------
 
-- For ACPI memory hotplug, the following are also necessary:
-    - Memory hotplug (under ACPI Support menu) (``CONFIG_ACPI_HOTPLUG_MEMORY``)
-    - This option can be kernel module.
+On some architectures, the firmware may not be able to notify the operating
+system about a memory hotplug event. Instead, the memory has to be manually
+probed from user space.
 
-- As a related configuration, if your box has a feature of NUMA-node hotplug
-  via ACPI, then this option is necessary too.
+The probe interface is located at::
 
-    - ACPI0004,PNP0A05 and PNP0A06 Container Driver (under ACPI Support menu)
-      (``CONFIG_ACPI_CONTAINER``).
+	/sys/devices/system/memory/probe
 
-     This option can be kernel module too.
+Only complete memory blocks can be probed. Individual memory blocks are probed
+by providing the physical start address of the memory block::
 
+	% echo addr > /sys/devices/system/memory/probe
 
-.. _memory_hotplug_sysfs_files:
+Which results in a memory block for the range [addr, addr + memory_block_size)
+being created.
 
-sysfs files for memory hotplug
-==============================
+.. note::
 
-All memory blocks have their device information in sysfs.  Each memory block
-is described under ``/sys/devices/system/memory`` as::
+  Using the probe interface is discouraged as it is easy to crash the kernel,
+  because Linux cannot validate user input; this interface might be removed in
+  the future.
 
-	/sys/devices/system/memory/memoryXXX
+Onlining and Offlining Memory Blocks
+====================================
 
-where XXX is the memory block id.
+After a memory block has been created, Linux has to be instructed to actually
+make use of that memory: the memory block has to be "online".
 
-For the memory block covered by the sysfs directory.  It is expected that all
-memory sections in this range are present and no memory holes exist in the
-range. Currently there is no way to determine if there is a memory hole, but
-the existence of one should not affect the hotplug capabilities of the memory
-block.
+Before a memory block can be removed, Linux has to stop using any memory part of
+the memory block: the memory block has to be "offlined".
 
-For example, assume 1GiB memory block size. A device for a memory starting at
-0x100000000 is ``/sys/device/system/memory/memory4``::
+The Linux kernel can be configured to automatically online added memory blocks
+and drivers automatically trigger offlining of memory blocks when trying
+hotunplug of memory. Memory blocks can only be removed once offlining succeeded
+and drivers may trigger offlining of memory blocks when attempting hotunplug of
+memory.
 
-	(0x100000000 / 1Gib = 4)
+Onlining Memory Blocks Manually
+-------------------------------
 
-This device covers address range [0x100000000 ... 0x140000000)
+If auto-onlining of memory blocks isn't enabled, user-space has to manually
+trigger onlining of memory blocks. Often, udev rules are used to automate this
+task in user space.
 
-Under each memory block, you can see 5 files:
+Onlining of a memory block can be triggered via::
 
-- ``/sys/devices/system/memory/memoryXXX/phys_index``
-- ``/sys/devices/system/memory/memoryXXX/phys_device``
-- ``/sys/devices/system/memory/memoryXXX/state``
-- ``/sys/devices/system/memory/memoryXXX/removable``
-- ``/sys/devices/system/memory/memoryXXX/valid_zones``
+	% echo online > /sys/devices/system/memory/memoryXXX/state
 
-=================== ============================================================
-``phys_index``      read-only and contains memory block id, same as XXX.
-``state``           read-write
+Or alternatively::
 
-                    - at read:  contains online/offline state of memory.
-                    - at write: user can specify "online_kernel",
+	% echo 1 > /sys/devices/system/memory/memoryXXX/online
 
-                    "online_movable", "online", "offline" command
-                    which will be performed on all sections in the block.
-``phys_device``	    read-only: legacy interface only ever used on s390x to
-		    expose the covered storage increment.
-``removable``	    read-only: legacy interface that indicated whether a memory
-		    block was likely to be offlineable or not.  Newer kernel
-		    versions return "1" if and only if the kernel supports
-		    memory offlining.
-``valid_zones``     read-only: designed to show by which zone memory provided by
-		    a memory block is managed, and to show by which zone memory
-		    provided by an offline memory block could be managed when
-		    onlining.
-
-		    The first column shows it`s default zone.
-
-		    "memory6/valid_zones: Normal Movable" shows this memoryblock
-		    can be onlined to ZONE_NORMAL by default and to ZONE_MOVABLE
-		    by online_movable.
-
-		    "memory7/valid_zones: Movable Normal" shows this memoryblock
-		    can be onlined to ZONE_MOVABLE by default and to ZONE_NORMAL
-		    by online_kernel.
-=================== ============================================================
+The kernel will select the target zone automatically, usually defaulting to
+``ZONE_NORMAL`` unless ``movablecore=1`` has been specified on the kernel
+command line or if the memory block would intersect the ZONE_MOVABLE already.
 
-.. note::
+One can explicitly request to associate an offline memory block with
+ZONE_MOVABLE by::
 
-  These directories/files appear after physical memory hotplug phase.
+	% echo online_movable > /sys/devices/system/memory/memoryXXX/state
 
-If CONFIG_NUMA is enabled the memoryXXX/ directories can also be accessed
-via symbolic links located in the ``/sys/devices/system/node/node*`` directories.
+Or one can explicitly request a kernel zone (usually ZONE_NORMAL) by::
 
-For example::
+	% echo online_kernel > /sys/devices/system/memory/memoryXXX/state
 
-	/sys/devices/system/node/node0/memory9 -> ../../memory/memory9
+In any case, if onlining succeeds, the state of the memory block is changed to
+be "online". If it fails, the state of the memory block will remain unchanged
+and the above commands will fail.
 
-A backlink will also be created::
+Onlining Memory Blocks Automatically
+------------------------------------
 
-	/sys/devices/system/memory/memory9/node0 -> ../../node/node0
+The kernel can be configured to try auto-onlining of newly added memory blocks.
+If this feature is disabled, the memory blocks will stay offline until
+explicitly onlined from user space.
 
-.. _memory_hotplug_physical_mem:
+The configured auto-online behavior can be observed via::
 
-Physical memory hot-add phase
-=============================
+	% cat /sys/devices/system/memory/auto_online_blocks
 
-Hardware(Firmware) Support
---------------------------
+Auto-onlining can be enabled by writing ``online``, ``online_kernel`` or
+``online_movable`` to that file, like::
 
-On x86_64/ia64 platform, memory hotplug by ACPI is supported.
+	% echo online > /sys/devices/system/memory/auto_online_blocks
 
-In general, the firmware (ACPI) which supports memory hotplug defines
-memory class object of _HID "PNP0C80". When a notify is asserted to PNP0C80,
-Linux's ACPI handler does hot-add memory to the system and calls a hotplug udev
-script. This will be done automatically.
+Modifying the auto-online behavior will only affect all subsequently added
+memory blocks only.
 
-But scripts for memory hotplug are not contained in generic udev package(now).
-You may have to write it by yourself or online/offline memory by hand.
-Please see :ref:`memory_hotplug_how_to_online_memory` and
-:ref:`memory_hotplug_how_to_offline_memory`.
+.. note::
 
-If firmware supports NUMA-node hotplug, and defines an object _HID "ACPI0004",
-"PNP0A05", or "PNP0A06", notification is asserted to it, and ACPI handler
-calls hotplug code for all of objects which are defined in it.
-If memory device is found, memory hotplug code will be called.
+  In corner cases, auto-onlining can fail. The kernel won't retry. Note that
+  auto-onlining is not expected to fail in default configurations.
 
-Notify memory hot-add event by hand
------------------------------------
+.. note::
 
-On some architectures, the firmware may not notify the kernel of a memory
-hotplug event.  Therefore, the memory "probe" interface is supported to
-explicitly notify the kernel.  This interface depends on
-CONFIG_ARCH_MEMORY_PROBE and can be configured on powerpc, sh, and x86
-if hotplug is supported, although for x86 this should be handled by ACPI
-notification.
+  DLPAR on ppc64 ignores the ``offline`` setting and will still online added
+  memory blocks; if onlining fails, memory blocks are removed again.
 
-Probe interface is located at::
+Offlining Memory Blocks
+-----------------------
 
-	/sys/devices/system/memory/probe
+In the current implementation, Linux's memory offlining will try migrating all
+movable pages off the affected memory block. As most kernel allocations, such as
+page tables, are unmovable, page migration can fail and, therefore, inhibit
+memory offlining from succeeding.
 
-You can tell the physical address of new memory to the kernel by::
+Having the memory provided by memory block managed by ZONE_MOVABLE significantly
+increases memory offlining reliability; still, memory offlining can fail in
+some corner cases.
 
-	% echo start_address_of_new_memory > /sys/devices/system/memory/probe
+Further, memory offlining might retry for a long time (or even forever), until
+aborted by the user.
 
-Then, [start_address_of_new_memory, start_address_of_new_memory +
-memory_block_size] memory range is hot-added. In this case, hotplug script is
-not called (in current implementation). You'll have to online memory by
-yourself.  Please see :ref:`memory_hotplug_how_to_online_memory`.
+Offlining of a memory block can be triggered via::
 
-Logical Memory hot-add phase
-============================
+	% echo offline > /sys/devices/system/memory/memoryXXX/state
 
-State of memory
----------------
+Or alternatively::
 
-To see (online/offline) state of a memory block, read 'state' file::
+	% echo 0 > /sys/devices/system/memory/memoryXXX/online
 
-	% cat /sys/device/system/memory/memoryXXX/state
+If offlining succeeds, the state of the memory block is changed to be "offline".
+If it fails, the state of the memory block will remain unchanged and the above
+commands will fail, for example, via::
 
+	bash: echo: write error: Device or resource busy
 
-- If the memory block is online, you'll read "online".
-- If the memory block is offline, you'll read "offline".
+or via::
 
+	bash: echo: write error: Invalid argument
 
-.. _memory_hotplug_how_to_online_memory:
+Observing the State of Memory Blocks
+------------------------------------
 
-How to online memory
---------------------
+The state (online/offline/going-offline) of a memory block can be observed
+either via::
 
-When the memory is hot-added, the kernel decides whether or not to "online"
-it according to the policy which can be read from "auto_online_blocks" file::
+	% cat /sys/device/system/memory/memoryXXX/state
 
-	% cat /sys/devices/system/memory/auto_online_blocks
+Or alternatively (1/0) via::
 
-The default depends on the CONFIG_MEMORY_HOTPLUG_DEFAULT_ONLINE kernel config
-option. If it is disabled the default is "offline" which means the newly added
-memory is not in a ready-to-use state and you have to "online" the newly added
-memory blocks manually. Automatic onlining can be requested by writing "online"
-to "auto_online_blocks" file::
+	% cat /sys/device/system/memory/memoryXXX/online
 
-	% echo online > /sys/devices/system/memory/auto_online_blocks
+For an online memory block, the managing zone can be observed via::
 
-This sets a global policy and impacts all memory blocks that will subsequently
-be hotplugged. Currently offline blocks keep their state. It is possible, under
-certain circumstances, that some memory blocks will be added but will fail to
-online. User space tools can check their "state" files
-(``/sys/devices/system/memory/memoryXXX/state``) and try to online them manually.
+	% cat /sys/device/system/memory/memoryXXX/valid_zones
 
-If the automatic onlining wasn't requested, failed, or some memory block was
-offlined it is possible to change the individual block's state by writing to the
-"state" file::
+Configuring Memory Hot(Un)Plug
+==============================
 
-	% echo online > /sys/devices/system/memory/memoryXXX/state
+There are various ways how system administrators can configure memory
+hot(un)plug and interact with memory blocks, especially, to online them.
 
-This onlining will not change the ZONE type of the target memory block,
-If the memory block doesn't belong to any zone an appropriate kernel zone
-(usually ZONE_NORMAL) will be used unless movable_node kernel command line
-option is specified when ZONE_MOVABLE will be used.
+Memory Hot(Un)Plug Configuration via Sysfs
+------------------------------------------
 
-You can explicitly request to associate it with ZONE_MOVABLE by::
+Some memory hot(un)plug properties can be configured or inspected via sysfs in::
 
-	% echo online_movable > /sys/devices/system/memory/memoryXXX/state
+	/sys/devices/system/memory/
 
-.. note:: current limit: this memory block must be adjacent to ZONE_MOVABLE
+The following files are currently defined:
 
-Or you can explicitly request a kernel zone (usually ZONE_NORMAL) by::
+====================== =========================================================
+``auto_online_blocks`` read-write: set or get the default state of new memory
+		       blocks; configure auto-onlining.
 
-	% echo online_kernel > /sys/devices/system/memory/memoryXXX/state
+		       The default value depends on the
+		       CONFIG_MEMORY_HOTPLUG_DEFAULT_ONLINE kernel configuration
+		       option.
 
-.. note:: current limit: this memory block must be adjacent to ZONE_NORMAL
+		       See the ``state`` property of memory blocks for details.
+``block_size_bytes``   read-only: the size in bytes of a memory block.
+``probe``	       write-only: add (probe) selected memory blocks manually
+		       from user space by supplying the physical start address.
 
-An explicit zone onlining can fail (e.g. when the range is already within
-and existing and incompatible zone already).
+		       Availability depends on the CONFIG_ARCH_MEMORY_PROBE
+		       kernel configuration option.
+``uevent``	       read-write: generic udev file for device subsystems.
+====================== =========================================================
 
-After this, memory block XXX's state will be 'online' and the amount of
-available memory will be increased.
+.. note::
 
-This may be changed in future.
+  When the CONFIG_MEMORY_FAILURE kernel configuration option is enabled, two
+  additional files ``hard_offline_page`` and ``soft_offline_page`` are available
+  to trigger hwpoisoning of pages, for example, for testing purposes. Note that
+  this functionality is not really related to memory hot(un)plug or actual
+  offlining of memory blocks.
 
-Logical memory remove
-=====================
+Memory Block Configuration via Sysfs
+------------------------------------
 
-Memory offline and ZONE_MOVABLE
--------------------------------
+Each memory block is represented as a memory block device that can be
+onlined or offlined. All memory blocks have their device information located in
+sysfs. Each present memory block is listed under
+``/sys/devices/system/memory`` as::
 
-Memory offlining is more complicated than memory online. Because memory offline
-has to make the whole memory block be unused, memory offline can fail if
-the memory block includes memory which cannot be freed.
+	/sys/devices/system/memory/memoryXXX
 
-In general, memory offline can use 2 techniques.
+where XXX is the memory block id; the number of digits is variable.
 
-(1) reclaim and free all memory in the memory block.
-(2) migrate all pages in the memory block.
+A present memory block indicates that some memory in the range is present;
+however, a memory block might span memory holes. A memory block spanning memory
+holes cannot be offlined.
 
-In the current implementation, Linux's memory offline uses method (2), freeing
-all  pages in the memory block by page migration. But not all pages are
-migratable. Under current Linux, migratable pages are anonymous pages and
-page caches. For offlining a memory block by migration, the kernel has to
-guarantee that the memory block contains only migratable pages.
+For example, assume 1 GiB memory block size. A device for a memory starting at
+0x100000000 is ``/sys/device/system/memory/memory4``::
 
-Now, a boot option for making a memory block which consists of migratable pages
-is supported. By specifying "kernelcore=" or "movablecore=" boot option, you can
-create ZONE_MOVABLE...a zone which is just used for movable pages.
-(See also Documentation/admin-guide/kernel-parameters.rst)
+	(0x100000000 / 1Gib = 4)
 
-Assume the system has "TOTAL" amount of memory at boot time, this boot option
-creates ZONE_MOVABLE as following.
+This device covers address range [0x100000000 ... 0x140000000)
 
-1) When kernelcore=YYYY boot option is used,
-   Size of memory not for movable pages (not for offline) is YYYY.
-   Size of memory for movable pages (for offline) is TOTAL-YYYY.
+The following files are currently defined:
 
-2) When movablecore=ZZZZ boot option is used,
-   Size of memory not for movable pages (not for offline) is TOTAL - ZZZZ.
-   Size of memory for movable pages (for offline) is ZZZZ.
+=================== ============================================================
+``online``	    read-write: simplified interface to trigger onlining /
+		    offlining and to observe the state of a memory block.
+		    When onlining, the zone is selected automatically.
+``phys_device``	    read-only: legacy interface only ever used on s390x to
+		    expose the covered storage increment.
+``phys_index``	    read-only: the memory block id (XXX).
+``removable``	    read-only: legacy interface that indicated whether a memory
+		    block was likely to be offlineable or not. Nowadays, the
+		    kernel return ``1`` if and only if it supports memory
+		    offlining.
+``state``	    read-write: advanced interface to trigger onlining /
+		    offlining and to observe the state of a memory block.
+
+		    When writing, ``online``, ``offline``, ``online_kernel`` and
+		    ``online_movable`` are supported.
+
+		    ``online_movable`` specifies onlining to ZONE_MOVABLE.
+		    ``online_kernel`` specifies onlining to the default kernel
+		    zone for the memory block, such as ZONE_NORMAL.
+                    ``online`` let's the kernel select the zone automatically.
+
+		    When reading, ``online``, ``offline`` and ``going-offline``
+		    may be returned.
+``uevent``	    read-write: generic uevent file for devices.
+``valid_zones``     read-only: when a block is online, shows the zone it
+		    belongs to; when a block is offline, shows what zone will
+		    manage it when the block will be onlined.
+
+		    For online memory blocks, ``DMA``, ``DMA32``, ``Normal``,
+		    ``Movable`` and ``none`` may be returned. ``none`` indicates
+		    that memory provided by a memory block is managed by
+		    multiple zones or spans multiple nodes; such memory blocks
+		    cannot be offlined. ``Movable`` indicates ZONE_MOVABLE.
+		    Other values indicate a kernel zone.
+
+		    For offline memory blocks, the first column shows the
+		    zone the kernel would select when onlining the memory block
+		    right now without further specifying a zone.
+
+		    Availability depends on the CONFIG_MEMORY_HOTREMOVE
+		    kernel configuration option.
+=================== ============================================================
 
 .. note::
 
-   Unfortunately, there is no information to show which memory block belongs
-   to ZONE_MOVABLE. This is TBD.
+  If the CONFIG_NUMA kernel configuration option is enabled, the memoryXXX/
+  directories can also be accessed via symbolic links located in the
+  ``/sys/devices/system/node/node*`` directories.
+
+  For example::
+
+	/sys/devices/system/node/node0/memory9 -> ../../memory/memory9
+
+  A backlink will also be created::
+
+	/sys/devices/system/memory/memory9/node0 -> ../../node/node0
+
+Command Line Parameters
+-----------------------
+
+Some command line parameters affect memory hot(un)plug handling. The following
+command line parameters are relevant:
+
+======================== =======================================================
+``memhp_default_state``	 configure auto-onlining by essentially setting
+                         ``/sys/devices/system/memory/auto_online_blocks``.
+``movablecore``		 configure automatic zone selection of the kernel. When
+			 set, the kernel will default to ZONE_MOVABLE, unless
+			 other zones can be kept contiguous.
+======================== =======================================================
+
+Module Parameters
+------------------
 
-   Memory offlining can fail when dissolving a free huge page on ZONE_MOVABLE
-   and the feature of freeing unused vmemmap pages associated with each hugetlb
-   page is enabled.
+Instead of additional command line parameters or sysfs files, the
+``memory_hotplug`` subsystem now provides a dedicated namespace for module
+parameters. Module parameters can be set via the command line by predicating
+them with ``memory_hotplug.`` such as::
+
+	memory_hotplug.memmap_on_memory=1
+
+and they can be observed (and some even modified at runtime) via::
+
+	/sys/modules/memory_hotplug/parameters/
+
+The following module parameters are currently defined:
+
+======================== =======================================================
+``memmap_on_memory``	 read-write: Allocate memory for the memmap from the
+			 added memory block itself. Even if enabled, actual
+			 support depends on various other system properties and
+			 should only be regarded as a hint whether the behavior
+			 would be desired.
+
+			 While allocating the memmap from the memory block
+			 itself makes memory hotplug less likely to fail and
+			 keeps the memmap on the same NUMA node in any case, it
+			 can fragment physical memory in a way that huge pages
+			 in bigger granularity cannot be formed on hotplugged
+			 memory.
+======================== =======================================================
+
+ZONE_MOVABLE
+============
+
+ZONE_MOVABLE is an important mechanism for more reliable memory offlining.
+Further, having system RAM managed by ZONE_MOVABLE instead of one of the
+kernel zones can increase the number of possible transparent huge pages and
+dynamically allocated huge pages.
+
+Most kernel allocations are unmovable. Important examples include the memory
+map (usually 1/64ths of memory), page tables, and kmalloc(). Such allocations
+can only be served from the kernel zones.
+
+Most user space pages, such as anonymous memory, and page cache pages are
+movable. Such allocations can be served from ZONE_MOVABLE and the kernel zones.
+
+Only movable allocations are served from ZONE_MOVABLE, resulting in unmovable
+allocations being limited to the kernel zones. Without ZONE_MOVABLE, there is
+absolutely no guarantee whether a memory block can be offlined successfully.
+
+Zone Imbalances
+---------------
 
-   This can happen when we have plenty of ZONE_MOVABLE memory, but not enough
-   kernel memory to allocate vmemmmap pages.  We may even be able to migrate
-   huge page contents, but will not be able to dissolve the source huge page.
-   This will prevent an offline operation and is unfortunate as memory offlining
-   is expected to succeed on movable zones.  Users that depend on memory hotplug
-   to succeed for movable zones should carefully consider whether the memory
-   savings gained from this feature are worth the risk of possibly not being
-   able to offline memory in certain situations.
+Having too much system RAM managed by ZONE_MOVABLE is called a zone imbalance,
+which can harm the system or degrade performance. As one example, the kernel
+might crash because it runs out of free memory for unmovable allocations,
+although there is still plenty of free memory left in ZONE_MOVABLE.
+
+Usually, MOVABLE:KERNEL ratios of up to 3:1 or even 4:1 are fine. Ratios of 63:1
+are definitely impossible due to the overhead for the memory map.
+
+Actual safe zone ratios depend on the workload. Extreme cases, like excessive
+long-term pinning of pages, might not be able to deal with ZONE_MOVABLE at all.
 
 .. note::
-   Techniques that rely on long-term pinnings of memory (especially, RDMA and
-   vfio) are fundamentally problematic with ZONE_MOVABLE and, therefore, memory
-   hot remove. Pinned pages cannot reside on ZONE_MOVABLE, to guarantee that
-   memory can still get hot removed - be aware that pinning can fail even if
-   there is plenty of free memory in ZONE_MOVABLE. In addition, using
-   ZONE_MOVABLE might make page pinning more expensive, because pages have to be
-   migrated off that zone first.
 
-.. _memory_hotplug_how_to_offline_memory:
+  CMA memory part of a kernel zone essentially behaves like memory in
+  ZONE_MOVABLE and similar considerations apply, especially when combining
+  CMA with ZONE_MOVABLE.
 
-How to offline memory
----------------------
+ZONE_MOVABLE Sizing Considerations
+----------------------------------
 
-You can offline a memory block by using the same sysfs interface that was used
-in memory onlining::
+We usually expect that a large portion of available system RAM will actually
+be consumed by user space, either directly or indirectly via the page cache. In
+the normal case, ZONE_MOVABLE can be used when allocating such pages just fine.
 
-	% echo offline > /sys/devices/system/memory/memoryXXX/state
+With that in mind, it makes sense that we can have a big portion of system RAM
+managed by ZONE_MOVABLE. However, there are some things to consider when using
+ZONE_MOVABLE, especially when fine-tuning zone ratios:
+
+- Having a lot of offline memory blocks. Even offline memory blocks consume
+  memory for metadata and page tables in the direct map; having a lot of offline
+  memory blocks is not a typical case, though.
+
+- Memory ballooning without balloon compaction is incompatible with
+  ZONE_MOVABLE. Only some implementations, such as virtio-balloon and
+  pseries CMM, fully support balloon compaction.
+
+  Further, the CONFIG_BALLOON_COMPACTION kernel configuration option might be
+  disabled. In that case, balloon inflation will only perform unmovable
+  allocations and silently create a zone imbalance, usually triggered by
+  inflation requests from the hypervisor.
+
+- Gigantic pages are unmovable, resulting in user space consuming a
+  lot of unmovable memory.
+
+- Huge pages are unmovable when an architectures does not support huge
+  page migration, resulting in a similar issue as with gigantic pages.
+
+- Page tables are unmovable. Excessive swapping, mapping extremely large
+  files or ZONE_DEVICE memory can be problematic, although only really relevant
+  in corner cases. When we manage a lot of user space memory that has been
+  swapped out or is served from a file/persistent memory/... we still need a lot
+  of page tables to manage that memory once user space accessed that memory.
+
+- In certain DAX configurations the memory map for the device memory will be
+  allocated from the kernel zones.
+
+- KASAN can have a significant memory overhead, for example, consuming 1/8th of
+  the total system memory size as (unmovable) tracking metadata.
+
+- Long-term pinning of pages. Techniques that rely on long-term pinnings
+  (especially, RDMA and vfio/mdev) are fundamentally problematic with
+  ZONE_MOVABLE, and therefore, memory offlining. Pinned pages cannot reside
+  on ZONE_MOVABLE as that would turn these pages unmovable. Therefore, they
+  have to be migrated off that zone while pinning. Pinning a page can fail
+  even if there is plenty of free memory in ZONE_MOVABLE.
+
+  In addition, using ZONE_MOVABLE might make page pinning more expensive,
+  because of the page migration overhead.
+
+By default, all the memory configured at boot time is managed by the kernel
+zones and ZONE_MOVABLE is not used.
+
+To enable ZONE_MOVABLE to include the memory present at boot and to control the
+ratio between movable and kernel zones there are two command line options:
+``kernelcore=`` and ``movablecore=``. See
+Documentation/admin-guide/kernel-parameters.rst for their description.
+
+Memory Offlining and ZONE_MOVABLE
+---------------------------------
+
+Even with ZONE_MOVABLE, there are some corner cases where offlining a memory
+block might fail:
+
+- Memory blocks with memory holes; this applies to memory blocks present during
+  boot and can apply to memory blocks hotplugged via the XEN balloon and the
+  Hyper-V balloon.
+
+- Mixed NUMA nodes and mixed zones within a single memory block prevent memory
+  offlining; this applies to memory blocks present during boot only.
+
+- Special memory blocks prevented by the system from getting offlined. Examples
+  include any memory available during boot on arm64 or memory blocks spanning
+  the crashkernel area on s390x; this usually applies to memory blocks present
+  during boot only.
+
+- Memory blocks overlapping with CMA areas cannot be offlined, this applies to
+  memory blocks present during boot only.
+
+- Concurrent activity that operates on the same physical memory area, such as
+  allocating gigantic pages, can result in temporary offlining failures.
+
+- Out of memory when dissolving huge pages, especially when freeing unused
+  vmemmap pages associated with each hugetlb page is enabled.
+
+  Offlining code may be able to migrate huge page contents, but may not be able
+  to dissolve the source huge page because it fails allocating (unmovable) pages
+  for the vmemmap, because the system might not have free memory in the kernel
+  zones left.
+
+  Users that depend on memory offlining to succeed for movable zones should
+  carefully consider whether the memory savings gained from this feature are
+  worth the risk of possibly not being able to offline memory in certain
+  situations.
+
+Further, when running into out of memory situations while migrating pages, or
+when still encountering permanently unmovable pages within ZONE_MOVABLE
+(-> BUG), memory offlining will keep retrying until it eventually succeeds.
+
+When offlining is triggered from user space, the offlining context can be
+terminated by sending a fatal signal. A timeout based offlining can easily be
+implemented via::
 
-If offline succeeds, the state of the memory block is changed to be "offline".
-If it fails, some error core (like -EBUSY) will be returned by the kernel.
-Even if a memory block does not belong to ZONE_MOVABLE, you can try to offline
-it.  If it doesn't contain 'unmovable' memory, you'll get success.
-
-A memory block under ZONE_MOVABLE is considered to be able to be offlined
-easily.  But under some busy state, it may return -EBUSY. Even if a memory
-block cannot be offlined due to -EBUSY, you can retry offlining it and may be
-able to offline it (or not). (For example, a page is referred to by some kernel
-internal call and released soon.)
-
-Consideration:
-  Memory hotplug's design direction is to make the possibility of memory
-  offlining higher and to guarantee unplugging memory under any situation. But
-  it needs more work. Returning -EBUSY under some situation may be good because
-  the user can decide to retry more or not by himself. Currently, memory
-  offlining code does some amount of retry with 120 seconds timeout.
-
-Physical memory remove
-======================
-
-Need more implementation yet....
- - Notification completion of remove works by OS to firmware.
- - Guard from remove if not yet.
-
-
-Locking Internals
-=================
-
-When adding/removing memory that uses memory block devices (i.e. ordinary RAM),
-the device_hotplug_lock should be held to:
-
-- synchronize against online/offline requests (e.g. via sysfs). This way, memory
-  block devices can only be accessed (.online/.state attributes) by user
-  space once memory has been fully added. And when removing memory, we
-  know nobody is in critical sections.
-- synchronize against CPU hotplug and similar (e.g. relevant for ACPI and PPC)
-
-Especially, there is a possible lock inversion that is avoided using
-device_hotplug_lock when adding memory and user space tries to online that
-memory faster than expected:
-
-- device_online() will first take the device_lock(), followed by
-  mem_hotplug_lock
-- add_memory_resource() will first take the mem_hotplug_lock, followed by
-  the device_lock() (while creating the devices, during bus_add_device()).
-
-As the device is visible to user space before taking the device_lock(), this
-can result in a lock inversion.
-
-onlining/offlining of memory should be done via device_online()/
-device_offline() - to make sure it is properly synchronized to actions
-via sysfs. Holding device_hotplug_lock is advised (to e.g. protect online_type)
-
-When adding/removing/onlining/offlining memory or adding/removing
-heterogeneous/device memory, we should always hold the mem_hotplug_lock in
-write mode to serialise memory hotplug (e.g. access to global/zone
-variables).
-
-In addition, mem_hotplug_lock (in contrast to device_hotplug_lock) in read
-mode allows for a quite efficient get_online_mems/put_online_mems
-implementation, so code accessing memory can protect from that memory
-vanishing.
-
-
-Future Work
-===========
-
-  - allowing memory hot-add to ZONE_MOVABLE. maybe we need some switch like
-    sysctl or new control file.
-  - showing memory block and physical device relationship.
-  - test and make it better memory offlining.
-  - support HugeTLB page migration and offlining.
-  - memmap removing at memory offline.
-  - physical remove memory.
+	% timeout $TIMEOUT offline_block | failure_handling
diff --git a/Documentation/admin-guide/mm/numa_memory_policy.rst b/Documentation/admin-guide/mm/numa_memory_policy.rst
index 067a90a1499c..64fd0ba0d057 100644
--- a/Documentation/admin-guide/mm/numa_memory_policy.rst
+++ b/Documentation/admin-guide/mm/numa_memory_policy.rst
@@ -245,6 +245,13 @@ MPOL_INTERLEAVED
 	address range or file.  During system boot up, the temporary
 	interleaved system default policy works in this mode.
 
+MPOL_PREFERRED_MANY
+	This mode specifices that the allocation should be preferrably
+	satisfied from the nodemask specified in the policy. If there is
+	a memory pressure on all nodes in the nodemask, the allocation
+	can fall back to all existing numa nodes. This is effectively
+	MPOL_PREFERRED allowed for a mask rather than a single node.
+
 NUMA memory policy supports the following optional mode flags:
 
 MPOL_F_STATIC_NODES
@@ -253,10 +260,10 @@ MPOL_F_STATIC_NODES
 	nodes changes after the memory policy has been defined.
 
 	Without this flag, any time a mempolicy is rebound because of a
-	change in the set of allowed nodes, the node (Preferred) or
-	nodemask (Bind, Interleave) is remapped to the new set of
-	allowed nodes.  This may result in nodes being used that were
-	previously undesired.
+        change in the set of allowed nodes, the preferred nodemask (Preferred
+        Many), preferred node (Preferred) or nodemask (Bind, Interleave) is
+        remapped to the new set of allowed nodes.  This may result in nodes
+        being used that were previously undesired.
 
 	With this flag, if the user-specified nodes overlap with the
 	nodes allowed by the task's cpuset, then the memory policy is
diff --git a/Documentation/admin-guide/sysctl/vm.rst b/Documentation/admin-guide/sysctl/vm.rst
index 003d5cc3751b..5e795202111f 100644
--- a/Documentation/admin-guide/sysctl/vm.rst
+++ b/Documentation/admin-guide/sysctl/vm.rst
@@ -118,7 +118,8 @@ compaction_proactiveness
 
 This tunable takes a value in the range [0, 100] with a default value of
 20. This tunable determines how aggressively compaction is done in the
-background. Setting it to 0 disables proactive compaction.
+background. Write of a non zero value to this tunable will immediately
+trigger the proactive compaction. Setting it to 0 disables proactive compaction.
 
 Note that compaction has a non-trivial system-wide impact as pages
 belonging to different processes are moved around, which could also lead
diff --git a/Documentation/admin-guide/sysrq.rst b/Documentation/admin-guide/sysrq.rst
index 60ce5f5ebab6..0a178ef0111d 100644
--- a/Documentation/admin-guide/sysrq.rst
+++ b/Documentation/admin-guide/sysrq.rst
@@ -72,7 +72,7 @@ On PowerPC
 
 On other
 	If you know of the key combos for other architectures, please
-        let me know so I can add them to this section.
+	submit a patch to be included in this section.
 
 On all
 	Write a character to /proc/sysrq-trigger.  e.g.::
@@ -205,10 +205,12 @@ frozen (probably root) filesystem via the FIFREEZE ioctl.
 Sometimes SysRq seems to get 'stuck' after using it, what can I do?
 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
 
-That happens to me, also. I've found that tapping shift, alt, and control
-on both sides of the keyboard, and hitting an invalid sysrq sequence again
-will fix the problem. (i.e., something like :kbd:`alt-sysrq-z`). Switching to
-another virtual console (:kbd:`ALT+Fn`) and then back again should also help.
+When this happens, try tapping shift, alt and control on both sides of the
+keyboard, and hitting an invalid sysrq sequence again. (i.e., something like
+:kbd:`alt-sysrq-z`).
+
+Switching to another virtual console (:kbd:`ALT+Fn`) and then back again
+should also help.
 
 I hit SysRq, but nothing seems to happen, what's wrong?
 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
diff --git a/Documentation/arm/marvell.rst b/Documentation/arm/marvell.rst
index db2246493d18..56bb592dbd0c 100644
--- a/Documentation/arm/marvell.rst
+++ b/Documentation/arm/marvell.rst
@@ -58,11 +58,19 @@ Kirkwood family
                 - Product Brief  : https://web.archive.org/web/20120616201621/http://www.marvell.com/embedded-processors/kirkwood/assets/88F6180-003_ver1.pdf
                 - Hardware Spec  : https://web.archive.org/web/20130730091654/http://www.marvell.com/embedded-processors/kirkwood/assets/HW_88F6180_OpenSource.pdf
                 - Functional Spec: https://web.archive.org/web/20130730091033/http://www.marvell.com/embedded-processors/kirkwood/assets/FS_88F6180_9x_6281_OpenSource.pdf
+        - 88F6280
+
+                - Product Brief  : https://web.archive.org/web/20130730091058/http://www.marvell.com/embedded-processors/kirkwood/assets/88F6280_SoC_PB-001.pdf
         - 88F6281
 
                 - Product Brief  : https://web.archive.org/web/20120131133709/http://www.marvell.com/embedded-processors/kirkwood/assets/88F6281-004_ver1.pdf
                 - Hardware Spec  : https://web.archive.org/web/20120620073511/http://www.marvell.com/embedded-processors/kirkwood/assets/HW_88F6281_OpenSource.pdf
                 - Functional Spec: https://web.archive.org/web/20130730091033/http://www.marvell.com/embedded-processors/kirkwood/assets/FS_88F6180_9x_6281_OpenSource.pdf
+        - 88F6321
+        - 88F6322
+        - 88F6323
+
+                - Product Brief  : https://web.archive.org/web/20120616201639/http://www.marvell.com/embedded-processors/kirkwood/assets/88f632x_pb.pdf
   Homepage:
 	https://web.archive.org/web/20160513194943/http://www.marvell.com/embedded-processors/kirkwood/
   Core:
@@ -89,6 +97,10 @@ Discovery family
 
         - MV76100
 
+                - Product Brief  : https://web.archive.org/web/20140722064429/http://www.marvell.com/embedded-processors/discovery-innovation/assets/MV76100-002_WEB.pdf
+                - Hardware Spec  : https://web.archive.org/web/20140722064425/http://www.marvell.com/embedded-processors/discovery-innovation/assets/HW_MV76100_OpenSource.pdf
+                - Functional Spec: https://web.archive.org/web/20111110081125/http://www.marvell.com/embedded-processors/discovery-innovation/assets/FS_MV76100_78100_78200_OpenSource.pdf
+
                 Not supported by the Linux kernel.
 
   Core:
@@ -124,17 +136,24 @@ EBU Armada family
 
   Armada 38x Flavors:
 	- 88F6810	Armada 380
+	- 88F6811 Armada 381
+	- 88F6821 Armada 382
+	- 88F6W21 Armada 383
 	- 88F6820 Armada 385
+	- 88F6825
 	- 88F6828 Armada 388
 
     - Product infos:   https://web.archive.org/web/20181006144616/http://www.marvell.com/embedded-processors/armada-38x/
     - Functional Spec: https://web.archive.org/web/20200420191927/https://www.marvell.com/content/dam/marvell/en/public-collateral/embedded-processors/marvell-embedded-processors-armada-38x-functional-specifications-2015-11.pdf
+    - Hardware Spec:   https://web.archive.org/web/20180713105318/https://www.marvell.com/docs/embedded-processors/assets/marvell-embedded-processors-armada-38x-hardware-specifications-2017-03.pdf
+    - Design guide:    https://web.archive.org/web/20180712231737/https://www.marvell.com/docs/embedded-processors/assets/marvell-embedded-processors-armada-38x-hardware-design-guide-2017-08.pdf
 
   Core:
 	ARM Cortex-A9
 
   Armada 39x Flavors:
 	- 88F6920 Armada 390
+	- 88F6925 Armada 395
 	- 88F6928 Armada 398
 
     - Product infos: https://web.archive.org/web/20181020222559/http://www.marvell.com/embedded-processors/armada-39x/
diff --git a/Documentation/block/blk-mq.rst b/Documentation/block/blk-mq.rst
index d96118c73954..31f52f326971 100644
--- a/Documentation/block/blk-mq.rst
+++ b/Documentation/block/blk-mq.rst
@@ -54,7 +54,7 @@ layer or if we want to try to merge requests. In both cases, requests will be
 sent to the software queue.
 
 Then, after the requests are processed by software queues, they will be placed
-at the hardware queue, a second stage queue were the hardware has direct access
+at the hardware queue, a second stage queue where the hardware has direct access
 to process those requests. However, if the hardware does not have enough
 resources to accept more requests, blk-mq will places requests on a temporary
 queue, to be sent in the future, when the hardware is able.
diff --git a/Documentation/conf.py b/Documentation/conf.py
index 7d92ec3e5b6e..948a97d6387d 100644
--- a/Documentation/conf.py
+++ b/Documentation/conf.py
@@ -16,8 +16,6 @@ import sys
 import os
 import sphinx
 
-from subprocess import check_output
-
 # Get Sphinx version
 major, minor, patch = sphinx.version_info[:3]
 
@@ -343,6 +341,9 @@ latex_elements = {
         verbatimhintsturnover=false,
     ''',
 
+    # For CJK One-half spacing, need to be in front of hyperref
+    'extrapackages': r'\usepackage{setspace}',
+
     # Additional stuff for the LaTeX preamble.
     'preamble': '''
 	% Prevent column squeezing of tabulary.
@@ -355,29 +356,117 @@ latex_elements = {
      ''',
 }
 
-# At least one book (translations) may have Asian characters
-# with are only displayed if xeCJK is used
+# Translations have Asian (CJK) characters which are only displayed if
+# xeCJK is used
 
-cjk_cmd = check_output(['fc-list', '--format="%{family[0]}\n"']).decode('utf-8', 'ignore')
-if cjk_cmd.find("Noto Sans CJK SC") >= 0:
-    latex_elements['preamble']  += '''
+latex_elements['preamble']  += '''
+    \\IfFontExistsTF{Noto Sans CJK SC}{
 	% This is needed for translations
-        \\usepackage{xeCJK}
-        \\setCJKmainfont{Noto Sans CJK SC}
+	\\usepackage{xeCJK}
+	\\IfFontExistsTF{Noto Serif CJK SC}{
+	    \\setCJKmainfont{Noto Serif CJK SC}[AutoFakeSlant]
+	}{
+	    \\setCJKmainfont{Noto Sans CJK SC}[AutoFakeSlant]
+	}
+	\\setCJKsansfont{Noto Sans CJK SC}[AutoFakeSlant]
+	\\setCJKmonofont{Noto Sans Mono CJK SC}[AutoFakeSlant]
+	% CJK Language-specific font choices
+	\\IfFontExistsTF{Noto Serif CJK SC}{
+	    \\newCJKfontfamily[SCmain]\\scmain{Noto Serif CJK SC}[AutoFakeSlant]
+	    \\newCJKfontfamily[SCserif]\\scserif{Noto Serif CJK SC}[AutoFakeSlant]
+	}{
+	    \\newCJKfontfamily[SCmain]\\scmain{Noto Sans CJK SC}[AutoFakeSlant]
+	    \\newCJKfontfamily[SCserif]\\scserif{Noto Sans CJK SC}[AutoFakeSlant]
+	}
+	\\newCJKfontfamily[SCsans]\\scsans{Noto Sans CJK SC}[AutoFakeSlant]
+	\\newCJKfontfamily[SCmono]\\scmono{Noto Sans Mono CJK SC}[AutoFakeSlant]
+	\\IfFontExistsTF{Noto Serif CJK TC}{
+	    \\newCJKfontfamily[TCmain]\\tcmain{Noto Serif CJK TC}[AutoFakeSlant]
+	    \\newCJKfontfamily[TCserif]\\tcserif{Noto Serif CJK TC}[AutoFakeSlant]
+	}{
+	    \\newCJKfontfamily[TCmain]\\tcmain{Noto Sans CJK TC}[AutoFakeSlant]
+	    \\newCJKfontfamily[TCserif]\\tcserif{Noto Sans CJK TC}[AutoFakeSlant]
+	}
+	\\newCJKfontfamily[TCsans]\\tcsans{Noto Sans CJK TC}[AutoFakeSlant]
+	\\newCJKfontfamily[TCmono]\\tcmono{Noto Sans Mono CJK TC}[AutoFakeSlant]
+	\\IfFontExistsTF{Noto Serif CJK KR}{
+	    \\newCJKfontfamily[KRmain]\\krmain{Noto Serif CJK KR}[AutoFakeSlant]
+	    \\newCJKfontfamily[KRserif]\\krserif{Noto Serif CJK KR}[AutoFakeSlant]
+	}{
+	    \\newCJKfontfamily[KRmain]\\krmain{Noto Sans CJK KR}[AutoFakeSlant]
+	    \\newCJKfontfamily[KRserif]\\krserif{Noto Sans CJK KR}[AutoFakeSlant]
+	}
+	\\newCJKfontfamily[KRsans]\\krsans{Noto Sans CJK KR}[AutoFakeSlant]
+	\\newCJKfontfamily[KRmono]\\krmono{Noto Sans Mono CJK KR}[AutoFakeSlant]
+	\\IfFontExistsTF{Noto Serif CJK JP}{
+	    \\newCJKfontfamily[JPmain]\\jpmain{Noto Serif CJK JP}[AutoFakeSlant]
+	    \\newCJKfontfamily[JPserif]\\jpserif{Noto Serif CJK JP}[AutoFakeSlant]
+	}{
+	    \\newCJKfontfamily[JPmain]\\jpmain{Noto Sans CJK JP}[AutoFakeSlant]
+	    \\newCJKfontfamily[JPserif]\\jpserif{Noto Sans CJK JP}[AutoFakeSlant]
+	}
+	\\newCJKfontfamily[JPsans]\\jpsans{Noto Sans CJK JP}[AutoFakeSlant]
+	\\newCJKfontfamily[JPmono]\\jpmono{Noto Sans Mono CJK JP}[AutoFakeSlant]
+	% Dummy commands for Sphinx < 2.3 (no 'extrapackages' support)
+	\\providecommand{\\onehalfspacing}{}
+	\\providecommand{\\singlespacing}{}
 	% Define custom macros to on/off CJK
-	\\newcommand{\\kerneldocCJKon}{\\makexeCJKactive}
-	\\newcommand{\\kerneldocCJKoff}{\\makexeCJKinactive}
-	% To customize \sphinxtableofcontents
+	\\newcommand{\\kerneldocCJKon}{\\makexeCJKactive\\onehalfspacing}
+	\\newcommand{\\kerneldocCJKoff}{\\makexeCJKinactive\\singlespacing}
+	\\newcommand{\\kerneldocBeginSC}{%
+	    \\begingroup%
+	    \\scmain%
+	}
+	\\newcommand{\\kerneldocEndSC}{\\endgroup}
+	\\newcommand{\\kerneldocBeginTC}{%
+	    \\begingroup%
+	    \\tcmain%
+	    \\renewcommand{\\CJKrmdefault}{TCserif}%
+	    \\renewcommand{\\CJKsfdefault}{TCsans}%
+	    \\renewcommand{\\CJKttdefault}{TCmono}%
+	}
+	\\newcommand{\\kerneldocEndTC}{\\endgroup}
+	\\newcommand{\\kerneldocBeginKR}{%
+	    \\begingroup%
+	    \\xeCJKDeclareCharClass{HalfLeft}{`“,`‘}%
+	    \\xeCJKDeclareCharClass{HalfRight}{`”,`’}%
+	    \\krmain%
+	    \\renewcommand{\\CJKrmdefault}{KRserif}%
+	    \\renewcommand{\\CJKsfdefault}{KRsans}%
+	    \\renewcommand{\\CJKttdefault}{KRmono}%
+	    \\xeCJKsetup{CJKspace = true} % For inter-phrase space
+	}
+	\\newcommand{\\kerneldocEndKR}{\\endgroup}
+	\\newcommand{\\kerneldocBeginJP}{%
+	    \\begingroup%
+	    \\xeCJKDeclareCharClass{HalfLeft}{`“,`‘}%
+	    \\xeCJKDeclareCharClass{HalfRight}{`”,`’}%
+	    \\jpmain%
+	    \\renewcommand{\\CJKrmdefault}{JPserif}%
+	    \\renewcommand{\\CJKsfdefault}{JPsans}%
+	    \\renewcommand{\\CJKttdefault}{JPmono}%
+	}
+	\\newcommand{\\kerneldocEndJP}{\\endgroup}
+	% Single spacing in literal blocks
+	\\fvset{baselinestretch=1}
+	% To customize \\sphinxtableofcontents
 	\\usepackage{etoolbox}
 	% Inactivate CJK after tableofcontents
 	\\apptocmd{\\sphinxtableofcontents}{\\kerneldocCJKoff}{}{}
-     '''
-else:
-    latex_elements['preamble']  += '''
+    }{ % No CJK font found
 	% Custom macros to on/off CJK (Dummy)
 	\\newcommand{\\kerneldocCJKon}{}
 	\\newcommand{\\kerneldocCJKoff}{}
-     '''
+	\\newcommand{\\kerneldocBeginSC}{}
+	\\newcommand{\\kerneldocEndSC}{}
+	\\newcommand{\\kerneldocBeginTC}{}
+	\\newcommand{\\kerneldocEndTC}{}
+	\\newcommand{\\kerneldocBeginKR}{}
+	\\newcommand{\\kerneldocEndKR}{}
+	\\newcommand{\\kerneldocBeginJP}{}
+	\\newcommand{\\kerneldocEndJP}{}
+    }
+'''
 
 # Fix reference escape troubles with Sphinx 1.4.x
 if major == 1:
diff --git a/Documentation/core-api/cachetlb.rst b/Documentation/core-api/cachetlb.rst
index fe4290e26729..8aed9103e48a 100644
--- a/Documentation/core-api/cachetlb.rst
+++ b/Documentation/core-api/cachetlb.rst
@@ -271,10 +271,15 @@ maps this page at its virtual address.
 
   ``void flush_dcache_page(struct page *page)``
 
-	Any time the kernel writes to a page cache page, _OR_
-	the kernel is about to read from a page cache page and
-	user space shared/writable mappings of this page potentially
-	exist, this routine is called.
+        This routines must be called when:
+
+	  a) the kernel did write to a page that is in the page cache page
+	     and / or in high memory
+	  b) the kernel is about to read from a page cache page and user space
+	     shared/writable mappings of this page potentially exist.  Note
+	     that {get,pin}_user_pages{_fast} already call flush_dcache_page
+	     on any page found in the user address space and thus driver
+	     code rarely needs to take this into account.
 
 	.. note::
 
@@ -284,38 +289,34 @@ maps this page at its virtual address.
 	      handling vfs symlinks in the page cache need not call
 	      this interface at all.
 
-	The phrase "kernel writes to a page cache page" means,
-	specifically, that the kernel executes store instructions
-	that dirty data in that page at the page->virtual mapping
-	of that page.  It is important to flush here to handle
-	D-cache aliasing, to make sure these kernel stores are
-	visible to user space mappings of that page.
-
-	The corollary case is just as important, if there are users
-	which have shared+writable mappings of this file, we must make
-	sure that kernel reads of these pages will see the most recent
-	stores done by the user.
-
-	If D-cache aliasing is not an issue, this routine may
-	simply be defined as a nop on that architecture.
-
-        There is a bit set aside in page->flags (PG_arch_1) as
-	"architecture private".  The kernel guarantees that,
-	for pagecache pages, it will clear this bit when such
-	a page first enters the pagecache.
-
-	This allows these interfaces to be implemented much more
-	efficiently.  It allows one to "defer" (perhaps indefinitely)
-	the actual flush if there are currently no user processes
-	mapping this page.  See sparc64's flush_dcache_page and
-	update_mmu_cache implementations for an example of how to go
-	about doing this.
-
-	The idea is, first at flush_dcache_page() time, if
-	page->mapping->i_mmap is an empty tree, just mark the architecture
-	private page flag bit.  Later, in update_mmu_cache(), a check is
-	made of this flag bit, and if set the flush is done and the flag
-	bit is cleared.
+	The phrase "kernel writes to a page cache page" means, specifically,
+	that the kernel executes store instructions that dirty data in that
+	page at the page->virtual mapping of that page.  It is important to
+	flush here to handle D-cache aliasing, to make sure these kernel stores
+	are visible to user space mappings of that page.
+
+	The corollary case is just as important, if there are users which have
+	shared+writable mappings of this file, we must make sure that kernel
+	reads of these pages will see the most recent stores done by the user.
+
+	If D-cache aliasing is not an issue, this routine may simply be defined
+	as a nop on that architecture.
+
+        There is a bit set aside in page->flags (PG_arch_1) as "architecture
+	private".  The kernel guarantees that, for pagecache pages, it will
+	clear this bit when such a page first enters the pagecache.
+
+	This allows these interfaces to be implemented much more efficiently.
+	It allows one to "defer" (perhaps indefinitely) the actual flush if
+	there are currently no user processes mapping this page.  See sparc64's
+	flush_dcache_page and update_mmu_cache implementations for an example
+	of how to go about doing this.
+
+	The idea is, first at flush_dcache_page() time, if page_file_mapping()
+	returns a mapping, and mapping_mapped on that mapping returns %false,
+	just mark the architecture private page flag bit.  Later, in
+	update_mmu_cache(), a check is made of this flag bit, and if set the
+	flush is done and the flag bit is cleared.
 
 	.. important::
 
@@ -351,19 +352,6 @@ maps this page at its virtual address.
 	architectures).  For incoherent architectures, it should flush
 	the cache of the page at vmaddr.
 
-  ``void flush_kernel_dcache_page(struct page *page)``
-
-	When the kernel needs to modify a user page is has obtained
-	with kmap, it calls this function after all modifications are
-	complete (but before kunmapping it) to bring the underlying
-	page up to date.  It is assumed here that the user has no
-	incoherent cached copies (i.e. the original page was obtained
-	from a mechanism like get_user_pages()).  The default
-	implementation is a nop and should remain so on all coherent
-	architectures.  On incoherent architectures, this should flush
-	the kernel cache for page (using page_address(page)).
-
-
   ``void flush_icache_range(unsigned long start, unsigned long end)``
 
   	When the kernel stores into addresses that it will execute
diff --git a/Documentation/core-api/cpu_hotplug.rst b/Documentation/core-api/cpu_hotplug.rst
index 1122cd3044c0..c6f4ba2fb32d 100644
--- a/Documentation/core-api/cpu_hotplug.rst
+++ b/Documentation/core-api/cpu_hotplug.rst
@@ -2,12 +2,13 @@
 CPU hotplug in the Kernel
 =========================
 
-:Date: December, 2016
+:Date: September, 2021
 :Author: Sebastian Andrzej Siewior <bigeasy@linutronix.de>,
-          Rusty Russell <rusty@rustcorp.com.au>,
-          Srivatsa Vaddagiri <vatsa@in.ibm.com>,
-          Ashok Raj <ashok.raj@intel.com>,
-          Joel Schopp <jschopp@austin.ibm.com>
+         Rusty Russell <rusty@rustcorp.com.au>,
+         Srivatsa Vaddagiri <vatsa@in.ibm.com>,
+         Ashok Raj <ashok.raj@intel.com>,
+         Joel Schopp <jschopp@austin.ibm.com>,
+	 Thomas Gleixner <tglx@linutronix.de>
 
 Introduction
 ============
@@ -91,9 +92,10 @@ Never use anything other than ``cpumask_t`` to represent bitmap of CPUs.
 
 Using CPU hotplug
 =================
+
 The kernel option *CONFIG_HOTPLUG_CPU* needs to be enabled. It is currently
 available on multiple architectures including ARM, MIPS, PowerPC and X86. The
-configuration is done via the sysfs interface: ::
+configuration is done via the sysfs interface::
 
  $ ls -lh /sys/devices/system/cpu
  total 0
@@ -113,14 +115,14 @@ configuration is done via the sysfs interface: ::
 
 The files *offline*, *online*, *possible*, *present* represent the CPU masks.
 Each CPU folder contains an *online* file which controls the logical on (1) and
-off (0) state. To logically shutdown CPU4: ::
+off (0) state. To logically shutdown CPU4::
 
  $ echo 0 > /sys/devices/system/cpu/cpu4/online
   smpboot: CPU 4 is now offline
 
 Once the CPU is shutdown, it will be removed from */proc/interrupts*,
 */proc/cpuinfo* and should also not be shown visible by the *top* command. To
-bring CPU4 back online: ::
+bring CPU4 back online::
 
  $ echo 1 > /sys/devices/system/cpu/cpu4/online
  smpboot: Booting Node 0 Processor 4 APIC 0x1
@@ -142,6 +144,7 @@ The CPU hotplug coordination
 
 The offline case
 ----------------
+
 Once a CPU has been logically shutdown the teardown callbacks of registered
 hotplug states will be invoked, starting with ``CPUHP_ONLINE`` and terminating
 at state ``CPUHP_OFFLINE``. This includes:
@@ -156,105 +159,491 @@ at state ``CPUHP_OFFLINE``. This includes:
 * Once all services are migrated, kernel calls an arch specific routine
   ``__cpu_disable()`` to perform arch specific cleanup.
 
-Using the hotplug API
----------------------
-It is possible to receive notifications once a CPU is offline or onlined. This
-might be important to certain drivers which need to perform some kind of setup
-or clean up functions based on the number of available CPUs: ::
-
-  #include <linux/cpuhotplug.h>
-
-  ret = cpuhp_setup_state(CPUHP_AP_ONLINE_DYN, "X/Y:online",
-                          Y_online, Y_prepare_down);
-
-*X* is the subsystem and *Y* the particular driver. The *Y_online* callback
-will be invoked during registration on all online CPUs. If an error
-occurs during the online callback the *Y_prepare_down* callback will be
-invoked on all CPUs on which the online callback was previously invoked.
-After registration completed, the *Y_online* callback will be invoked
-once a CPU is brought online and *Y_prepare_down* will be invoked when a
-CPU is shutdown. All resources which were previously allocated in
-*Y_online* should be released in *Y_prepare_down*.
-The return value *ret* is negative if an error occurred during the
-registration process. Otherwise a positive value is returned which
-contains the allocated hotplug for dynamically allocated states
-(*CPUHP_AP_ONLINE_DYN*). It will return zero for predefined states.
-
-The callback can be remove by invoking ``cpuhp_remove_state()``. In case of a
-dynamically allocated state (*CPUHP_AP_ONLINE_DYN*) use the returned state.
-During the removal of a hotplug state the teardown callback will be invoked.
-
-Multiple instances
-~~~~~~~~~~~~~~~~~~
-If a driver has multiple instances and each instance needs to perform the
-callback independently then it is likely that a ''multi-state'' should be used.
-First a multi-state state needs to be registered: ::
-
-  ret = cpuhp_setup_state_multi(CPUHP_AP_ONLINE_DYN, "X/Y:online,
-                                Y_online, Y_prepare_down);
-  Y_hp_online = ret;
-
-The ``cpuhp_setup_state_multi()`` behaves similar to ``cpuhp_setup_state()``
-except it prepares the callbacks for a multi state and does not invoke
-the callbacks. This is a one time setup.
-Once a new instance is allocated, you need to register this new instance: ::
-
-  ret = cpuhp_state_add_instance(Y_hp_online, &d->node);
-
-This function will add this instance to your previously allocated
-*Y_hp_online* state and invoke the previously registered callback
-(*Y_online*) on all online CPUs. The *node* element is a ``struct
-hlist_node`` member of your per-instance data structure.
-
-On removal of the instance: ::
-  cpuhp_state_remove_instance(Y_hp_online, &d->node)
-
-should be invoked which will invoke the teardown callback on all online
-CPUs.
-
-Manual setup
-~~~~~~~~~~~~
-Usually it is handy to invoke setup and teardown callbacks on registration or
-removal of a state because usually the operation needs to performed once a CPU
-goes online (offline) and during initial setup (shutdown) of the driver. However
-each registration and removal function is also available with a ``_nocalls``
-suffix which does not invoke the provided callbacks if the invocation of the
-callbacks is not desired. During the manual setup (or teardown) the functions
-``cpus_read_lock()`` and ``cpus_read_unlock()`` should be used to inhibit CPU
-hotplug operations.
-
-
-The ordering of the events
---------------------------
-The hotplug states are defined in ``include/linux/cpuhotplug.h``:
-
-* The states *CPUHP_OFFLINE* … *CPUHP_AP_OFFLINE* are invoked before the
-  CPU is up.
-* The states *CPUHP_AP_OFFLINE* … *CPUHP_AP_ONLINE* are invoked
-  just the after the CPU has been brought up. The interrupts are off and
-  the scheduler is not yet active on this CPU. Starting with *CPUHP_AP_OFFLINE*
-  the callbacks are invoked on the target CPU.
-* The states between *CPUHP_AP_ONLINE_DYN* and *CPUHP_AP_ONLINE_DYN_END* are
-  reserved for the dynamic allocation.
-* The states are invoked in the reverse order on CPU shutdown starting with
-  *CPUHP_ONLINE* and stopping at *CPUHP_OFFLINE*. Here the callbacks are
-  invoked on the CPU that will be shutdown until *CPUHP_AP_OFFLINE*.
-
-A dynamically allocated state via *CPUHP_AP_ONLINE_DYN* is often enough.
-However if an earlier invocation during the bring up or shutdown is required
-then an explicit state should be acquired. An explicit state might also be
-required if the hotplug event requires specific ordering in respect to
-another hotplug event.
+
+The CPU hotplug API
+===================
+
+CPU hotplug state machine
+-------------------------
+
+CPU hotplug uses a trivial state machine with a linear state space from
+CPUHP_OFFLINE to CPUHP_ONLINE. Each state has a startup and a teardown
+callback.
+
+When a CPU is onlined, the startup callbacks are invoked sequentially until
+the state CPUHP_ONLINE is reached. They can also be invoked when the
+callbacks of a state are set up or an instance is added to a multi-instance
+state.
+
+When a CPU is offlined the teardown callbacks are invoked in the reverse
+order sequentially until the state CPUHP_OFFLINE is reached. They can also
+be invoked when the callbacks of a state are removed or an instance is
+removed from a multi-instance state.
+
+If a usage site requires only a callback in one direction of the hotplug
+operations (CPU online or CPU offline) then the other not-required callback
+can be set to NULL when the state is set up.
+
+The state space is divided into three sections:
+
+* The PREPARE section
+
+  The PREPARE section covers the state space from CPUHP_OFFLINE to
+  CPUHP_BRINGUP_CPU.
+
+  The startup callbacks in this section are invoked before the CPU is
+  started during a CPU online operation. The teardown callbacks are invoked
+  after the CPU has become dysfunctional during a CPU offline operation.
+
+  The callbacks are invoked on a control CPU as they can't obviously run on
+  the hotplugged CPU which is either not yet started or has become
+  dysfunctional already.
+
+  The startup callbacks are used to setup resources which are required to
+  bring a CPU successfully online. The teardown callbacks are used to free
+  resources or to move pending work to an online CPU after the hotplugged
+  CPU became dysfunctional.
+
+  The startup callbacks are allowed to fail. If a callback fails, the CPU
+  online operation is aborted and the CPU is brought down to the previous
+  state (usually CPUHP_OFFLINE) again.
+
+  The teardown callbacks in this section are not allowed to fail.
+
+* The STARTING section
+
+  The STARTING section covers the state space between CPUHP_BRINGUP_CPU + 1
+  and CPUHP_AP_ONLINE.
+
+  The startup callbacks in this section are invoked on the hotplugged CPU
+  with interrupts disabled during a CPU online operation in the early CPU
+  setup code. The teardown callbacks are invoked with interrupts disabled
+  on the hotplugged CPU during a CPU offline operation shortly before the
+  CPU is completely shut down.
+
+  The callbacks in this section are not allowed to fail.
+
+  The callbacks are used for low level hardware initialization/shutdown and
+  for core subsystems.
+
+* The ONLINE section
+
+  The ONLINE section covers the state space between CPUHP_AP_ONLINE + 1 and
+  CPUHP_ONLINE.
+
+  The startup callbacks in this section are invoked on the hotplugged CPU
+  during a CPU online operation. The teardown callbacks are invoked on the
+  hotplugged CPU during a CPU offline operation.
+
+  The callbacks are invoked in the context of the per CPU hotplug thread,
+  which is pinned on the hotplugged CPU. The callbacks are invoked with
+  interrupts and preemption enabled.
+
+  The callbacks are allowed to fail. When a callback fails the hotplug
+  operation is aborted and the CPU is brought back to the previous state.
+
+CPU online/offline operations
+-----------------------------
+
+A successful online operation looks like this::
+
+  [CPUHP_OFFLINE]
+  [CPUHP_OFFLINE + 1]->startup()       -> success
+  [CPUHP_OFFLINE + 2]->startup()       -> success
+  [CPUHP_OFFLINE + 3]                  -> skipped because startup == NULL
+  ...
+  [CPUHP_BRINGUP_CPU]->startup()       -> success
+  === End of PREPARE section
+  [CPUHP_BRINGUP_CPU + 1]->startup()   -> success
+  ...
+  [CPUHP_AP_ONLINE]->startup()         -> success
+  === End of STARTUP section
+  [CPUHP_AP_ONLINE + 1]->startup()     -> success
+  ...
+  [CPUHP_ONLINE - 1]->startup()        -> success
+  [CPUHP_ONLINE]
+
+A successful offline operation looks like this::
+
+  [CPUHP_ONLINE]
+  [CPUHP_ONLINE - 1]->teardown()       -> success
+  ...
+  [CPUHP_AP_ONLINE + 1]->teardown()    -> success
+  === Start of STARTUP section
+  [CPUHP_AP_ONLINE]->teardown()        -> success
+  ...
+  [CPUHP_BRINGUP_ONLINE - 1]->teardown()
+  ...
+  === Start of PREPARE section
+  [CPUHP_BRINGUP_CPU]->teardown()
+  [CPUHP_OFFLINE + 3]->teardown()
+  [CPUHP_OFFLINE + 2]                  -> skipped because teardown == NULL
+  [CPUHP_OFFLINE + 1]->teardown()
+  [CPUHP_OFFLINE]
+
+A failed online operation looks like this::
+
+  [CPUHP_OFFLINE]
+  [CPUHP_OFFLINE + 1]->startup()       -> success
+  [CPUHP_OFFLINE + 2]->startup()       -> success
+  [CPUHP_OFFLINE + 3]                  -> skipped because startup == NULL
+  ...
+  [CPUHP_BRINGUP_CPU]->startup()       -> success
+  === End of PREPARE section
+  [CPUHP_BRINGUP_CPU + 1]->startup()   -> success
+  ...
+  [CPUHP_AP_ONLINE]->startup()         -> success
+  === End of STARTUP section
+  [CPUHP_AP_ONLINE + 1]->startup()     -> success
+  ---
+  [CPUHP_AP_ONLINE + N]->startup()     -> fail
+  [CPUHP_AP_ONLINE + (N - 1)]->teardown()
+  ...
+  [CPUHP_AP_ONLINE + 1]->teardown()
+  === Start of STARTUP section
+  [CPUHP_AP_ONLINE]->teardown()
+  ...
+  [CPUHP_BRINGUP_ONLINE - 1]->teardown()
+  ...
+  === Start of PREPARE section
+  [CPUHP_BRINGUP_CPU]->teardown()
+  [CPUHP_OFFLINE + 3]->teardown()
+  [CPUHP_OFFLINE + 2]                  -> skipped because teardown == NULL
+  [CPUHP_OFFLINE + 1]->teardown()
+  [CPUHP_OFFLINE]
+
+A failed offline operation looks like this::
+
+  [CPUHP_ONLINE]
+  [CPUHP_ONLINE - 1]->teardown()       -> success
+  ...
+  [CPUHP_ONLINE - N]->teardown()       -> fail
+  [CPUHP_ONLINE - (N - 1)]->startup()
+  ...
+  [CPUHP_ONLINE - 1]->startup()
+  [CPUHP_ONLINE]
+
+Recursive failures cannot be handled sensibly. Look at the following
+example of a recursive fail due to a failed offline operation: ::
+
+  [CPUHP_ONLINE]
+  [CPUHP_ONLINE - 1]->teardown()       -> success
+  ...
+  [CPUHP_ONLINE - N]->teardown()       -> fail
+  [CPUHP_ONLINE - (N - 1)]->startup()  -> success
+  [CPUHP_ONLINE - (N - 2)]->startup()  -> fail
+
+The CPU hotplug state machine stops right here and does not try to go back
+down again because that would likely result in an endless loop::
+
+  [CPUHP_ONLINE - (N - 1)]->teardown() -> success
+  [CPUHP_ONLINE - N]->teardown()       -> fail
+  [CPUHP_ONLINE - (N - 1)]->startup()  -> success
+  [CPUHP_ONLINE - (N - 2)]->startup()  -> fail
+  [CPUHP_ONLINE - (N - 1)]->teardown() -> success
+  [CPUHP_ONLINE - N]->teardown()       -> fail
+
+Lather, rinse and repeat. In this case the CPU left in state::
+
+  [CPUHP_ONLINE - (N - 1)]
+
+which at least lets the system make progress and gives the user a chance to
+debug or even resolve the situation.
+
+Allocating a state
+------------------
+
+There are two ways to allocate a CPU hotplug state:
+
+* Static allocation
+
+  Static allocation has to be used when the subsystem or driver has
+  ordering requirements versus other CPU hotplug states. E.g. the PERF core
+  startup callback has to be invoked before the PERF driver startup
+  callbacks during a CPU online operation. During a CPU offline operation
+  the driver teardown callbacks have to be invoked before the core teardown
+  callback. The statically allocated states are described by constants in
+  the cpuhp_state enum which can be found in include/linux/cpuhotplug.h.
+
+  Insert the state into the enum at the proper place so the ordering
+  requirements are fulfilled. The state constant has to be used for state
+  setup and removal.
+
+  Static allocation is also required when the state callbacks are not set
+  up at runtime and are part of the initializer of the CPU hotplug state
+  array in kernel/cpu.c.
+
+* Dynamic allocation
+
+  When there are no ordering requirements for the state callbacks then
+  dynamic allocation is the preferred method. The state number is allocated
+  by the setup function and returned to the caller on success.
+
+  Only the PREPARE and ONLINE sections provide a dynamic allocation
+  range. The STARTING section does not as most of the callbacks in that
+  section have explicit ordering requirements.
+
+Setup of a CPU hotplug state
+----------------------------
+
+The core code provides the following functions to setup a state:
+
+* cpuhp_setup_state(state, name, startup, teardown)
+* cpuhp_setup_state_nocalls(state, name, startup, teardown)
+* cpuhp_setup_state_cpuslocked(state, name, startup, teardown)
+* cpuhp_setup_state_nocalls_cpuslocked(state, name, startup, teardown)
+
+For cases where a driver or a subsystem has multiple instances and the same
+CPU hotplug state callbacks need to be invoked for each instance, the CPU
+hotplug core provides multi-instance support. The advantage over driver
+specific instance lists is that the instance related functions are fully
+serialized against CPU hotplug operations and provide the automatic
+invocations of the state callbacks on add and removal. To set up such a
+multi-instance state the following function is available:
+
+* cpuhp_setup_state_multi(state, name, startup, teardown)
+
+The @state argument is either a statically allocated state or one of the
+constants for dynamically allocated states - CPUHP_PREPARE_DYN,
+CPUHP_ONLINE_DYN - depending on the state section (PREPARE, ONLINE) for
+which a dynamic state should be allocated.
+
+The @name argument is used for sysfs output and for instrumentation. The
+naming convention is "subsys:mode" or "subsys/driver:mode",
+e.g. "perf:mode" or "perf/x86:mode". The common mode names are:
+
+======== =======================================================
+prepare  For states in the PREPARE section
+
+dead     For states in the PREPARE section which do not provide
+         a startup callback
+
+starting For states in the STARTING section
+
+dying    For states in the STARTING section which do not provide
+         a startup callback
+
+online   For states in the ONLINE section
+
+offline  For states in the ONLINE section which do not provide
+         a startup callback
+======== =======================================================
+
+As the @name argument is only used for sysfs and instrumentation other mode
+descriptors can be used as well if they describe the nature of the state
+better than the common ones.
+
+Examples for @name arguments: "perf/online", "perf/x86:prepare",
+"RCU/tree:dying", "sched/waitempty"
+
+The @startup argument is a function pointer to the callback which should be
+invoked during a CPU online operation. If the usage site does not require a
+startup callback set the pointer to NULL.
+
+The @teardown argument is a function pointer to the callback which should
+be invoked during a CPU offline operation. If the usage site does not
+require a teardown callback set the pointer to NULL.
+
+The functions differ in the way how the installed callbacks are treated:
+
+  * cpuhp_setup_state_nocalls(), cpuhp_setup_state_nocalls_cpuslocked()
+    and cpuhp_setup_state_multi() only install the callbacks
+
+  * cpuhp_setup_state() and cpuhp_setup_state_cpuslocked() install the
+    callbacks and invoke the @startup callback (if not NULL) for all online
+    CPUs which have currently a state greater than the newly installed
+    state. Depending on the state section the callback is either invoked on
+    the current CPU (PREPARE section) or on each online CPU (ONLINE
+    section) in the context of the CPU's hotplug thread.
+
+    If a callback fails for CPU N then the teardown callback for CPU
+    0 .. N-1 is invoked to rollback the operation. The state setup fails,
+    the callbacks for the state are not installed and in case of dynamic
+    allocation the allocated state is freed.
+
+The state setup and the callback invocations are serialized against CPU
+hotplug operations. If the setup function has to be called from a CPU
+hotplug read locked region, then the _cpuslocked() variants have to be
+used. These functions cannot be used from within CPU hotplug callbacks.
+
+The function return values:
+  ======== ===================================================================
+  0        Statically allocated state was successfully set up
+
+  >0       Dynamically allocated state was successfully set up.
+
+           The returned number is the state number which was allocated. If
+           the state callbacks have to be removed later, e.g. module
+           removal, then this number has to be saved by the caller and used
+           as @state argument for the state remove function. For
+           multi-instance states the dynamically allocated state number is
+           also required as @state argument for the instance add/remove
+           operations.
+
+  <0	   Operation failed
+  ======== ===================================================================
+
+Removal of a CPU hotplug state
+------------------------------
+
+To remove a previously set up state, the following functions are provided:
+
+* cpuhp_remove_state(state)
+* cpuhp_remove_state_nocalls(state)
+* cpuhp_remove_state_nocalls_cpuslocked(state)
+* cpuhp_remove_multi_state(state)
+
+The @state argument is either a statically allocated state or the state
+number which was allocated in the dynamic range by cpuhp_setup_state*(). If
+the state is in the dynamic range, then the state number is freed and
+available for dynamic allocation again.
+
+The functions differ in the way how the installed callbacks are treated:
+
+  * cpuhp_remove_state_nocalls(), cpuhp_remove_state_nocalls_cpuslocked()
+    and cpuhp_remove_multi_state() only remove the callbacks.
+
+  * cpuhp_remove_state() removes the callbacks and invokes the teardown
+    callback (if not NULL) for all online CPUs which have currently a state
+    greater than the removed state. Depending on the state section the
+    callback is either invoked on the current CPU (PREPARE section) or on
+    each online CPU (ONLINE section) in the context of the CPU's hotplug
+    thread.
+
+    In order to complete the removal, the teardown callback should not fail.
+
+The state removal and the callback invocations are serialized against CPU
+hotplug operations. If the remove function has to be called from a CPU
+hotplug read locked region, then the _cpuslocked() variants have to be
+used. These functions cannot be used from within CPU hotplug callbacks.
+
+If a multi-instance state is removed then the caller has to remove all
+instances first.
+
+Multi-Instance state instance management
+----------------------------------------
+
+Once the multi-instance state is set up, instances can be added to the
+state:
+
+  * cpuhp_state_add_instance(state, node)
+  * cpuhp_state_add_instance_nocalls(state, node)
+
+The @state argument is either a statically allocated state or the state
+number which was allocated in the dynamic range by cpuhp_setup_state_multi().
+
+The @node argument is a pointer to an hlist_node which is embedded in the
+instance's data structure. The pointer is handed to the multi-instance
+state callbacks and can be used by the callback to retrieve the instance
+via container_of().
+
+The functions differ in the way how the installed callbacks are treated:
+
+  * cpuhp_state_add_instance_nocalls() and only adds the instance to the
+    multi-instance state's node list.
+
+  * cpuhp_state_add_instance() adds the instance and invokes the startup
+    callback (if not NULL) associated with @state for all online CPUs which
+    have currently a state greater than @state. The callback is only
+    invoked for the to be added instance. Depending on the state section
+    the callback is either invoked on the current CPU (PREPARE section) or
+    on each online CPU (ONLINE section) in the context of the CPU's hotplug
+    thread.
+
+    If a callback fails for CPU N then the teardown callback for CPU
+    0 .. N-1 is invoked to rollback the operation, the function fails and
+    the instance is not added to the node list of the multi-instance state.
+
+To remove an instance from the state's node list these functions are
+available:
+
+  * cpuhp_state_remove_instance(state, node)
+  * cpuhp_state_remove_instance_nocalls(state, node)
+
+The arguments are the same as for the the cpuhp_state_add_instance*()
+variants above.
+
+The functions differ in the way how the installed callbacks are treated:
+
+  * cpuhp_state_remove_instance_nocalls() only removes the instance from the
+    state's node list.
+
+  * cpuhp_state_remove_instance() removes the instance and invokes the
+    teardown callback (if not NULL) associated with @state for all online
+    CPUs which have currently a state greater than @state.  The callback is
+    only invoked for the to be removed instance.  Depending on the state
+    section the callback is either invoked on the current CPU (PREPARE
+    section) or on each online CPU (ONLINE section) in the context of the
+    CPU's hotplug thread.
+
+    In order to complete the removal, the teardown callback should not fail.
+
+The node list add/remove operations and the callback invocations are
+serialized against CPU hotplug operations. These functions cannot be used
+from within CPU hotplug callbacks and CPU hotplug read locked regions.
+
+Examples
+--------
+
+Setup and teardown a statically allocated state in the STARTING section for
+notifications on online and offline operations::
+
+   ret = cpuhp_setup_state(CPUHP_SUBSYS_STARTING, "subsys:starting", subsys_cpu_starting, subsys_cpu_dying);
+   if (ret < 0)
+        return ret;
+   ....
+   cpuhp_remove_state(CPUHP_SUBSYS_STARTING);
+
+Setup and teardown a dynamically allocated state in the ONLINE section
+for notifications on offline operations::
+
+   state = cpuhp_setup_state(CPUHP_ONLINE_DYN, "subsys:offline", NULL, subsys_cpu_offline);
+   if (state < 0)
+       return state;
+   ....
+   cpuhp_remove_state(state);
+
+Setup and teardown a dynamically allocated state in the ONLINE section
+for notifications on online operations without invoking the callbacks::
+
+   state = cpuhp_setup_state_nocalls(CPUHP_ONLINE_DYN, "subsys:online", subsys_cpu_online, NULL);
+   if (state < 0)
+       return state;
+   ....
+   cpuhp_remove_state_nocalls(state);
+
+Setup, use and teardown a dynamically allocated multi-instance state in the
+ONLINE section for notifications on online and offline operation::
+
+   state = cpuhp_setup_state_multi(CPUHP_ONLINE_DYN, "subsys:online", subsys_cpu_online, subsys_cpu_offline);
+   if (state < 0)
+       return state;
+   ....
+   ret = cpuhp_state_add_instance(state, &inst1->node);
+   if (ret)
+        return ret;
+   ....
+   ret = cpuhp_state_add_instance(state, &inst2->node);
+   if (ret)
+        return ret;
+   ....
+   cpuhp_remove_instance(state, &inst1->node);
+   ....
+   cpuhp_remove_instance(state, &inst2->node);
+   ....
+   remove_multi_state(state);
+
 
 Testing of hotplug states
 =========================
+
 One way to verify whether a custom state is working as expected or not is to
 shutdown a CPU and then put it online again. It is also possible to put the CPU
 to certain state (for instance *CPUHP_AP_ONLINE*) and then go back to
 *CPUHP_ONLINE*. This would simulate an error one state after *CPUHP_AP_ONLINE*
 which would lead to rollback to the online state.
 
-All registered states are enumerated in ``/sys/devices/system/cpu/hotplug/states``: ::
+All registered states are enumerated in ``/sys/devices/system/cpu/hotplug/states`` ::
 
  $ tail /sys/devices/system/cpu/hotplug/states
  138: mm/vmscan:online
@@ -268,7 +657,7 @@ All registered states are enumerated in ``/sys/devices/system/cpu/hotplug/states
  168: sched:active
  169: online
 
-To rollback CPU4 to ``lib/percpu_cnt:online`` and back online just issue: ::
+To rollback CPU4 to ``lib/percpu_cnt:online`` and back online just issue::
 
   $ cat /sys/devices/system/cpu/cpu4/hotplug/state
   169
@@ -276,14 +665,14 @@ To rollback CPU4 to ``lib/percpu_cnt:online`` and back online just issue: ::
   $ cat /sys/devices/system/cpu/cpu4/hotplug/state
   140
 
-It is important to note that the teardown callbac of state 140 have been
-invoked. And now get back online: ::
+It is important to note that the teardown callback of state 140 have been
+invoked. And now get back online::
 
   $ echo 169 > /sys/devices/system/cpu/cpu4/hotplug/target
   $ cat /sys/devices/system/cpu/cpu4/hotplug/state
   169
 
-With trace events enabled, the individual steps are visible, too: ::
+With trace events enabled, the individual steps are visible, too::
 
   #  TASK-PID   CPU#    TIMESTAMP  FUNCTION
   #     | |       |        |         |
@@ -318,6 +707,7 @@ trace.
 
 Architecture's requirements
 ===========================
+
 The following functions and configurations are required:
 
 ``CONFIG_HOTPLUG_CPU``
@@ -339,11 +729,12 @@ The following functions and configurations are required:
 
 User Space Notification
 =======================
-After CPU successfully onlined or offline udev events are sent. A udev rule like: ::
+
+After CPU successfully onlined or offline udev events are sent. A udev rule like::
 
   SUBSYSTEM=="cpu", DRIVERS=="processor", DEVPATH=="/devices/system/cpu/*", RUN+="the_hotplug_receiver.sh"
 
-will receive all events. A script like: ::
+will receive all events. A script like::
 
   #!/bin/sh
 
diff --git a/Documentation/core-api/kernel-api.rst b/Documentation/core-api/kernel-api.rst
index 2a7444e3a4c2..2e7186805148 100644
--- a/Documentation/core-api/kernel-api.rst
+++ b/Documentation/core-api/kernel-api.rst
@@ -315,6 +315,9 @@ Block Devices
 .. kernel-doc:: block/genhd.c
    :export:
 
+.. kernel-doc:: block/bdev.c
+   :export:
+
 Char devices
 ============
 
diff --git a/Documentation/core-api/printk-formats.rst b/Documentation/core-api/printk-formats.rst
index d941717a191b..5e89497ba314 100644
--- a/Documentation/core-api/printk-formats.rst
+++ b/Documentation/core-api/printk-formats.rst
@@ -130,6 +130,7 @@ printed after the symbol name with an extra ``b`` appended to the end of the
 specifier.
 
 ::
+
 	%pS	versatile_init+0x0/0x110 [module_name]
 	%pSb	versatile_init+0x0/0x110 [module_name ed5019fdf5e53be37cb1ba7899292d7e143b259e]
 	%pSRb	versatile_init+0x9/0x110 [module_name ed5019fdf5e53be37cb1ba7899292d7e143b259e]
@@ -579,7 +580,7 @@ Flags bitfields such as page flags, gfp_flags
 
 ::
 
-	%pGp	referenced|uptodate|lru|active|private|node=0|zone=2|lastcpupid=0x1fffff
+	%pGp	0x17ffffc0002036(referenced|uptodate|lru|active|private|node=0|zone=2|lastcpupid=0x1fffff)
 	%pGg	GFP_USER|GFP_DMA32|GFP_NOWARN
 	%pGv	read|exec|mayread|maywrite|mayexec|denywrite
 
diff --git a/Documentation/cpu-freq/cpu-drivers.rst b/Documentation/cpu-freq/cpu-drivers.rst
index d84ededb66f9..3b32336a7803 100644
--- a/Documentation/cpu-freq/cpu-drivers.rst
+++ b/Documentation/cpu-freq/cpu-drivers.rst
@@ -75,9 +75,6 @@ And optionally
  .resume - A pointer to a per-policy resume function which is called
  with interrupts disabled and _before_ the governor is started again.
 
- .ready - A pointer to a per-policy ready function which is called after
- the policy is fully initialized.
-
  .attr - A pointer to a NULL-terminated list of "struct freq_attr" which
  allow to export values to sysfs.
 
diff --git a/Documentation/dev-tools/kasan.rst b/Documentation/dev-tools/kasan.rst
index 83ec4a556c19..21dc03bc10a4 100644
--- a/Documentation/dev-tools/kasan.rst
+++ b/Documentation/dev-tools/kasan.rst
@@ -181,9 +181,16 @@ By default, KASAN prints a bug report only for the first invalid memory access.
 With ``kasan_multi_shot``, KASAN prints a report on every invalid access. This
 effectively disables ``panic_on_warn`` for KASAN reports.
 
+Alternatively, independent of ``panic_on_warn`` the ``kasan.fault=`` boot
+parameter can be used to control panic and reporting behaviour:
+
+- ``kasan.fault=report`` or ``=panic`` controls whether to only print a KASAN
+  report or also panic the kernel (default: ``report``). The panic happens even
+  if ``kasan_multi_shot`` is enabled.
+
 Hardware tag-based KASAN mode (see the section about various modes below) is
 intended for use in production as a security mitigation. Therefore, it supports
-boot parameters that allow disabling KASAN or controlling its features.
+additional boot parameters that allow disabling KASAN or controlling features:
 
 - ``kasan=off`` or ``=on`` controls whether KASAN is enabled (default: ``on``).
 
@@ -199,10 +206,6 @@ boot parameters that allow disabling KASAN or controlling its features.
 - ``kasan.stacktrace=off`` or ``=on`` disables or enables alloc and free stack
   traces collection (default: ``on``).
 
-- ``kasan.fault=report`` or ``=panic`` controls whether to only print a KASAN
-  report or also panic the kernel (default: ``report``). The panic happens even
-  if ``kasan_multi_shot`` is enabled.
-
 Implementation details
 ----------------------
 
diff --git a/Documentation/dev-tools/kcsan.rst b/Documentation/dev-tools/kcsan.rst
index 6a600cf8430b..7db43c7c09b8 100644
--- a/Documentation/dev-tools/kcsan.rst
+++ b/Documentation/dev-tools/kcsan.rst
@@ -127,6 +127,18 @@ Kconfig options:
   causes KCSAN to not report data races due to conflicts where the only plain
   accesses are aligned writes up to word size.
 
+* ``CONFIG_KCSAN_PERMISSIVE``: Enable additional permissive rules to ignore
+  certain classes of common data races. Unlike the above, the rules are more
+  complex involving value-change patterns, access type, and address. This
+  option depends on ``CONFIG_KCSAN_REPORT_VALUE_CHANGE_ONLY=y``. For details
+  please see the ``kernel/kcsan/permissive.h``. Testers and maintainers that
+  only focus on reports from specific subsystems and not the whole kernel are
+  recommended to disable this option.
+
+To use the strictest possible rules, select ``CONFIG_KCSAN_STRICT=y``, which
+configures KCSAN to follow the Linux-kernel memory consistency model (LKMM) as
+closely as possible.
+
 DebugFS interface
 ~~~~~~~~~~~~~~~~~
 
diff --git a/Documentation/dev-tools/kfence.rst b/Documentation/dev-tools/kfence.rst
index fdf04e741ea5..0fbe3308bf37 100644
--- a/Documentation/dev-tools/kfence.rst
+++ b/Documentation/dev-tools/kfence.rst
@@ -65,25 +65,27 @@ Error reports
 A typical out-of-bounds access looks like this::
 
     ==================================================================
-    BUG: KFENCE: out-of-bounds read in test_out_of_bounds_read+0xa3/0x22b
+    BUG: KFENCE: out-of-bounds read in test_out_of_bounds_read+0xa6/0x234
 
-    Out-of-bounds read at 0xffffffffb672efff (1B left of kfence-#17):
-     test_out_of_bounds_read+0xa3/0x22b
-     kunit_try_run_case+0x51/0x85
+    Out-of-bounds read at 0xffff8c3f2e291fff (1B left of kfence-#72):
+     test_out_of_bounds_read+0xa6/0x234
+     kunit_try_run_case+0x61/0xa0
      kunit_generic_run_threadfn_adapter+0x16/0x30
-     kthread+0x137/0x160
+     kthread+0x176/0x1b0
      ret_from_fork+0x22/0x30
 
-    kfence-#17 [0xffffffffb672f000-0xffffffffb672f01f, size=32, cache=kmalloc-32] allocated by task 507:
-     test_alloc+0xf3/0x25b
-     test_out_of_bounds_read+0x98/0x22b
-     kunit_try_run_case+0x51/0x85
+    kfence-#72: 0xffff8c3f2e292000-0xffff8c3f2e29201f, size=32, cache=kmalloc-32
+
+    allocated by task 484 on cpu 0 at 32.919330s:
+     test_alloc+0xfe/0x738
+     test_out_of_bounds_read+0x9b/0x234
+     kunit_try_run_case+0x61/0xa0
      kunit_generic_run_threadfn_adapter+0x16/0x30
-     kthread+0x137/0x160
+     kthread+0x176/0x1b0
      ret_from_fork+0x22/0x30
 
-    CPU: 4 PID: 107 Comm: kunit_try_catch Not tainted 5.8.0-rc6+ #7
-    Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS 1.13.0-1 04/01/2014
+    CPU: 0 PID: 484 Comm: kunit_try_catch Not tainted 5.13.0-rc3+ #7
+    Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS 1.14.0-2 04/01/2014
     ==================================================================
 
 The header of the report provides a short summary of the function involved in
@@ -96,30 +98,32 @@ Use-after-free accesses are reported as::
     ==================================================================
     BUG: KFENCE: use-after-free read in test_use_after_free_read+0xb3/0x143
 
-    Use-after-free read at 0xffffffffb673dfe0 (in kfence-#24):
+    Use-after-free read at 0xffff8c3f2e2a0000 (in kfence-#79):
      test_use_after_free_read+0xb3/0x143
-     kunit_try_run_case+0x51/0x85
+     kunit_try_run_case+0x61/0xa0
      kunit_generic_run_threadfn_adapter+0x16/0x30
-     kthread+0x137/0x160
+     kthread+0x176/0x1b0
      ret_from_fork+0x22/0x30
 
-    kfence-#24 [0xffffffffb673dfe0-0xffffffffb673dfff, size=32, cache=kmalloc-32] allocated by task 507:
-     test_alloc+0xf3/0x25b
+    kfence-#79: 0xffff8c3f2e2a0000-0xffff8c3f2e2a001f, size=32, cache=kmalloc-32
+
+    allocated by task 488 on cpu 2 at 33.871326s:
+     test_alloc+0xfe/0x738
      test_use_after_free_read+0x76/0x143
-     kunit_try_run_case+0x51/0x85
+     kunit_try_run_case+0x61/0xa0
      kunit_generic_run_threadfn_adapter+0x16/0x30
-     kthread+0x137/0x160
+     kthread+0x176/0x1b0
      ret_from_fork+0x22/0x30
 
-    freed by task 507:
+    freed by task 488 on cpu 2 at 33.871358s:
      test_use_after_free_read+0xa8/0x143
-     kunit_try_run_case+0x51/0x85
+     kunit_try_run_case+0x61/0xa0
      kunit_generic_run_threadfn_adapter+0x16/0x30
-     kthread+0x137/0x160
+     kthread+0x176/0x1b0
      ret_from_fork+0x22/0x30
 
-    CPU: 4 PID: 109 Comm: kunit_try_catch Tainted: G        W         5.8.0-rc6+ #7
-    Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS 1.13.0-1 04/01/2014
+    CPU: 2 PID: 488 Comm: kunit_try_catch Tainted: G    B             5.13.0-rc3+ #7
+    Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS 1.14.0-2 04/01/2014
     ==================================================================
 
 KFENCE also reports on invalid frees, such as double-frees::
@@ -127,30 +131,32 @@ KFENCE also reports on invalid frees, such as double-frees::
     ==================================================================
     BUG: KFENCE: invalid free in test_double_free+0xdc/0x171
 
-    Invalid free of 0xffffffffb6741000:
+    Invalid free of 0xffff8c3f2e2a4000 (in kfence-#81):
      test_double_free+0xdc/0x171
-     kunit_try_run_case+0x51/0x85
+     kunit_try_run_case+0x61/0xa0
      kunit_generic_run_threadfn_adapter+0x16/0x30
-     kthread+0x137/0x160
+     kthread+0x176/0x1b0
      ret_from_fork+0x22/0x30
 
-    kfence-#26 [0xffffffffb6741000-0xffffffffb674101f, size=32, cache=kmalloc-32] allocated by task 507:
-     test_alloc+0xf3/0x25b
+    kfence-#81: 0xffff8c3f2e2a4000-0xffff8c3f2e2a401f, size=32, cache=kmalloc-32
+
+    allocated by task 490 on cpu 1 at 34.175321s:
+     test_alloc+0xfe/0x738
      test_double_free+0x76/0x171
-     kunit_try_run_case+0x51/0x85
+     kunit_try_run_case+0x61/0xa0
      kunit_generic_run_threadfn_adapter+0x16/0x30
-     kthread+0x137/0x160
+     kthread+0x176/0x1b0
      ret_from_fork+0x22/0x30
 
-    freed by task 507:
+    freed by task 490 on cpu 1 at 34.175348s:
      test_double_free+0xa8/0x171
-     kunit_try_run_case+0x51/0x85
+     kunit_try_run_case+0x61/0xa0
      kunit_generic_run_threadfn_adapter+0x16/0x30
-     kthread+0x137/0x160
+     kthread+0x176/0x1b0
      ret_from_fork+0x22/0x30
 
-    CPU: 4 PID: 111 Comm: kunit_try_catch Tainted: G        W         5.8.0-rc6+ #7
-    Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS 1.13.0-1 04/01/2014
+    CPU: 1 PID: 490 Comm: kunit_try_catch Tainted: G    B             5.13.0-rc3+ #7
+    Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS 1.14.0-2 04/01/2014
     ==================================================================
 
 KFENCE also uses pattern-based redzones on the other side of an object's guard
@@ -160,23 +166,25 @@ These are reported on frees::
     ==================================================================
     BUG: KFENCE: memory corruption in test_kmalloc_aligned_oob_write+0xef/0x184
 
-    Corrupted memory at 0xffffffffb6797ff9 [ 0xac . . . . . . ] (in kfence-#69):
+    Corrupted memory at 0xffff8c3f2e33aff9 [ 0xac . . . . . . ] (in kfence-#156):
      test_kmalloc_aligned_oob_write+0xef/0x184
-     kunit_try_run_case+0x51/0x85
+     kunit_try_run_case+0x61/0xa0
      kunit_generic_run_threadfn_adapter+0x16/0x30
-     kthread+0x137/0x160
+     kthread+0x176/0x1b0
      ret_from_fork+0x22/0x30
 
-    kfence-#69 [0xffffffffb6797fb0-0xffffffffb6797ff8, size=73, cache=kmalloc-96] allocated by task 507:
-     test_alloc+0xf3/0x25b
+    kfence-#156: 0xffff8c3f2e33afb0-0xffff8c3f2e33aff8, size=73, cache=kmalloc-96
+
+    allocated by task 502 on cpu 7 at 42.159302s:
+     test_alloc+0xfe/0x738
      test_kmalloc_aligned_oob_write+0x57/0x184
-     kunit_try_run_case+0x51/0x85
+     kunit_try_run_case+0x61/0xa0
      kunit_generic_run_threadfn_adapter+0x16/0x30
-     kthread+0x137/0x160
+     kthread+0x176/0x1b0
      ret_from_fork+0x22/0x30
 
-    CPU: 4 PID: 120 Comm: kunit_try_catch Tainted: G        W         5.8.0-rc6+ #7
-    Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS 1.13.0-1 04/01/2014
+    CPU: 7 PID: 502 Comm: kunit_try_catch Tainted: G    B             5.13.0-rc3+ #7
+    Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS 1.14.0-2 04/01/2014
     ==================================================================
 
 For such errors, the address where the corruption occurred as well as the
diff --git a/Documentation/dev-tools/kunit/kunit-tool.rst b/Documentation/dev-tools/kunit/kunit-tool.rst
index c7ff9afe407a..ae52e0f489f9 100644
--- a/Documentation/dev-tools/kunit/kunit-tool.rst
+++ b/Documentation/dev-tools/kunit/kunit-tool.rst
@@ -114,9 +114,12 @@ results in TAP format, you can pass the ``--raw_output`` argument.
 
 	./tools/testing/kunit/kunit.py run --raw_output
 
-.. note::
-	The raw output from test runs may contain other, non-KUnit kernel log
-	lines.
+The raw output from test runs may contain other, non-KUnit kernel log
+lines. You can see just KUnit output with ``--raw_output=kunit``:
+
+.. code-block:: bash
+
+	./tools/testing/kunit/kunit.py run --raw_output=kunit
 
 If you have KUnit results in their raw TAP format, you can parse them and print
 the human-readable summary with the ``parse`` command for kunit_tool. This
diff --git a/Documentation/dev-tools/kunit/running_tips.rst b/Documentation/dev-tools/kunit/running_tips.rst
index d1626d548fa5..30d2147eb5b5 100644
--- a/Documentation/dev-tools/kunit/running_tips.rst
+++ b/Documentation/dev-tools/kunit/running_tips.rst
@@ -80,6 +80,16 @@ file ``.kunitconfig``, you can just pass in the dir, e.g.
 	automagically, but tests could theoretically depend on incompatible
 	options, so handling that would be tricky.
 
+Setting kernel commandline parameters
+-------------------------------------
+
+You can use ``--kernel_args`` to pass arbitrary kernel arguments, e.g.
+
+.. code-block:: bash
+
+	$ ./tools/testing/kunit/kunit.py run --kernel_args=param=42 --kernel_args=param2=false
+
+
 Generating code coverage reports under UML
 ------------------------------------------
 
diff --git a/Documentation/devicetree/bindings/arm/mediatek/mediatek,audsys.txt b/Documentation/devicetree/bindings/arm/mediatek/mediatek,audsys.txt
index b32d374193c7..699776be1dd3 100644
--- a/Documentation/devicetree/bindings/arm/mediatek/mediatek,audsys.txt
+++ b/Documentation/devicetree/bindings/arm/mediatek/mediatek,audsys.txt
@@ -13,6 +13,7 @@ Required Properties:
 	- "mediatek,mt7623-audsys", "mediatek,mt2701-audsys", "syscon"
 	- "mediatek,mt8167-audiosys", "syscon"
 	- "mediatek,mt8183-audiosys", "syscon"
+	- "mediatek,mt8192-audsys", "syscon"
 	- "mediatek,mt8516-audsys", "syscon"
 - #clock-cells: Must be 1
 
diff --git a/Documentation/devicetree/bindings/arm/mediatek/mediatek,mmsys.yaml b/Documentation/devicetree/bindings/arm/mediatek/mediatek,mmsys.yaml
index 2d4ff0ce387b..f9ffa5b703a5 100644
--- a/Documentation/devicetree/bindings/arm/mediatek/mediatek,mmsys.yaml
+++ b/Documentation/devicetree/bindings/arm/mediatek/mediatek,mmsys.yaml
@@ -29,6 +29,7 @@ properties:
               - mediatek,mt8167-mmsys
               - mediatek,mt8173-mmsys
               - mediatek,mt8183-mmsys
+              - mediatek,mt8192-mmsys
               - mediatek,mt8365-mmsys
           - const: syscon
       - items:
diff --git a/Documentation/devicetree/bindings/arm/mediatek/mediatek,mt8192-clock.yaml b/Documentation/devicetree/bindings/arm/mediatek/mediatek,mt8192-clock.yaml
new file mode 100644
index 000000000000..c8c67c033f8c
--- /dev/null
+++ b/Documentation/devicetree/bindings/arm/mediatek/mediatek,mt8192-clock.yaml
@@ -0,0 +1,199 @@
+# SPDX-License-Identifier: (GPL-2.0 OR BSD-2-Clause)
+%YAML 1.2
+---
+$id: "http://devicetree.org/schemas/arm/mediatek/mediatek,mt8192-clock.yaml#"
+$schema: "http://devicetree.org/meta-schemas/core.yaml#"
+
+title: MediaTek Functional Clock Controller for MT8192
+
+maintainers:
+  - Chun-Jie Chen <chun-jie.chen@mediatek.com>
+
+description:
+  The Mediatek functional clock controller provides various clocks on MT8192.
+
+properties:
+  compatible:
+    items:
+      - enum:
+          - mediatek,mt8192-scp_adsp
+          - mediatek,mt8192-imp_iic_wrap_c
+          - mediatek,mt8192-imp_iic_wrap_e
+          - mediatek,mt8192-imp_iic_wrap_s
+          - mediatek,mt8192-imp_iic_wrap_ws
+          - mediatek,mt8192-imp_iic_wrap_w
+          - mediatek,mt8192-imp_iic_wrap_n
+          - mediatek,mt8192-msdc_top
+          - mediatek,mt8192-msdc
+          - mediatek,mt8192-mfgcfg
+          - mediatek,mt8192-imgsys
+          - mediatek,mt8192-imgsys2
+          - mediatek,mt8192-vdecsys_soc
+          - mediatek,mt8192-vdecsys
+          - mediatek,mt8192-vencsys
+          - mediatek,mt8192-camsys
+          - mediatek,mt8192-camsys_rawa
+          - mediatek,mt8192-camsys_rawb
+          - mediatek,mt8192-camsys_rawc
+          - mediatek,mt8192-ipesys
+          - mediatek,mt8192-mdpsys
+
+  reg:
+    maxItems: 1
+
+  '#clock-cells':
+    const: 1
+
+required:
+  - compatible
+  - reg
+
+additionalProperties: false
+
+examples:
+  - |
+    scp_adsp: clock-controller@10720000 {
+        compatible = "mediatek,mt8192-scp_adsp";
+        reg = <0x10720000 0x1000>;
+        #clock-cells = <1>;
+    };
+
+  - |
+    imp_iic_wrap_c: clock-controller@11007000 {
+        compatible = "mediatek,mt8192-imp_iic_wrap_c";
+        reg = <0x11007000 0x1000>;
+        #clock-cells = <1>;
+    };
+
+  - |
+    imp_iic_wrap_e: clock-controller@11cb1000 {
+        compatible = "mediatek,mt8192-imp_iic_wrap_e";
+        reg = <0x11cb1000 0x1000>;
+        #clock-cells = <1>;
+    };
+
+  - |
+    imp_iic_wrap_s: clock-controller@11d03000 {
+        compatible = "mediatek,mt8192-imp_iic_wrap_s";
+        reg = <0x11d03000 0x1000>;
+        #clock-cells = <1>;
+    };
+
+  - |
+    imp_iic_wrap_ws: clock-controller@11d23000 {
+        compatible = "mediatek,mt8192-imp_iic_wrap_ws";
+        reg = <0x11d23000 0x1000>;
+        #clock-cells = <1>;
+    };
+
+  - |
+    imp_iic_wrap_w: clock-controller@11e01000 {
+        compatible = "mediatek,mt8192-imp_iic_wrap_w";
+        reg = <0x11e01000 0x1000>;
+        #clock-cells = <1>;
+    };
+
+  - |
+    imp_iic_wrap_n: clock-controller@11f02000 {
+        compatible = "mediatek,mt8192-imp_iic_wrap_n";
+        reg = <0x11f02000 0x1000>;
+        #clock-cells = <1>;
+    };
+
+  - |
+    msdc_top: clock-controller@11f10000 {
+        compatible = "mediatek,mt8192-msdc_top";
+        reg = <0x11f10000 0x1000>;
+        #clock-cells = <1>;
+    };
+
+  - |
+    msdc: clock-controller@11f60000 {
+        compatible = "mediatek,mt8192-msdc";
+        reg = <0x11f60000 0x1000>;
+        #clock-cells = <1>;
+    };
+
+  - |
+    mfgcfg: clock-controller@13fbf000 {
+        compatible = "mediatek,mt8192-mfgcfg";
+        reg = <0x13fbf000 0x1000>;
+        #clock-cells = <1>;
+    };
+
+  - |
+    imgsys: clock-controller@15020000 {
+        compatible = "mediatek,mt8192-imgsys";
+        reg = <0x15020000 0x1000>;
+        #clock-cells = <1>;
+    };
+
+  - |
+    imgsys2: clock-controller@15820000 {
+        compatible = "mediatek,mt8192-imgsys2";
+        reg = <0x15820000 0x1000>;
+        #clock-cells = <1>;
+    };
+
+  - |
+    vdecsys_soc: clock-controller@1600f000 {
+        compatible = "mediatek,mt8192-vdecsys_soc";
+        reg = <0x1600f000 0x1000>;
+        #clock-cells = <1>;
+    };
+
+  - |
+    vdecsys: clock-controller@1602f000 {
+        compatible = "mediatek,mt8192-vdecsys";
+        reg = <0x1602f000 0x1000>;
+        #clock-cells = <1>;
+    };
+
+  - |
+    vencsys: clock-controller@17000000 {
+        compatible = "mediatek,mt8192-vencsys";
+        reg = <0x17000000 0x1000>;
+        #clock-cells = <1>;
+    };
+
+  - |
+    camsys: clock-controller@1a000000 {
+        compatible = "mediatek,mt8192-camsys";
+        reg = <0x1a000000 0x1000>;
+        #clock-cells = <1>;
+    };
+
+  - |
+    camsys_rawa: clock-controller@1a04f000 {
+        compatible = "mediatek,mt8192-camsys_rawa";
+        reg = <0x1a04f000 0x1000>;
+        #clock-cells = <1>;
+    };
+
+  - |
+    camsys_rawb: clock-controller@1a06f000 {
+        compatible = "mediatek,mt8192-camsys_rawb";
+        reg = <0x1a06f000 0x1000>;
+        #clock-cells = <1>;
+    };
+
+  - |
+    camsys_rawc: clock-controller@1a08f000 {
+        compatible = "mediatek,mt8192-camsys_rawc";
+        reg = <0x1a08f000 0x1000>;
+        #clock-cells = <1>;
+    };
+
+  - |
+    ipesys: clock-controller@1b000000 {
+        compatible = "mediatek,mt8192-ipesys";
+        reg = <0x1b000000 0x1000>;
+        #clock-cells = <1>;
+    };
+
+  - |
+    mdpsys: clock-controller@1f000000 {
+        compatible = "mediatek,mt8192-mdpsys";
+        reg = <0x1f000000 0x1000>;
+        #clock-cells = <1>;
+    };
diff --git a/Documentation/devicetree/bindings/arm/mediatek/mediatek,mt8192-sys-clock.yaml b/Documentation/devicetree/bindings/arm/mediatek/mediatek,mt8192-sys-clock.yaml
new file mode 100644
index 000000000000..5705bcf1fe47
--- /dev/null
+++ b/Documentation/devicetree/bindings/arm/mediatek/mediatek,mt8192-sys-clock.yaml
@@ -0,0 +1,65 @@
+# SPDX-License-Identifier: (GPL-2.0 OR BSD-2-Clause)
+%YAML 1.2
+---
+$id: "http://devicetree.org/schemas/arm/mediatek/mediatek,mt8192-sys-clock.yaml#"
+$schema: "http://devicetree.org/meta-schemas/core.yaml#"
+
+title: MediaTek System Clock Controller for MT8192
+
+maintainers:
+  - Chun-Jie Chen <chun-jie.chen@mediatek.com>
+
+description:
+  The Mediatek system clock controller provides various clocks and system configuration
+  like reset and bus protection on MT8192.
+
+properties:
+  compatible:
+    items:
+      - enum:
+          - mediatek,mt8192-topckgen
+          - mediatek,mt8192-infracfg
+          - mediatek,mt8192-pericfg
+          - mediatek,mt8192-apmixedsys
+      - const: syscon
+
+  reg:
+    maxItems: 1
+
+  '#clock-cells':
+    const: 1
+
+required:
+  - compatible
+  - reg
+
+additionalProperties: false
+
+examples:
+  - |
+    topckgen: syscon@10000000 {
+        compatible = "mediatek,mt8192-topckgen", "syscon";
+        reg = <0x10000000 0x1000>;
+        #clock-cells = <1>;
+    };
+
+  - |
+    infracfg: syscon@10001000 {
+        compatible = "mediatek,mt8192-infracfg", "syscon";
+        reg = <0x10001000 0x1000>;
+        #clock-cells = <1>;
+    };
+
+  - |
+    pericfg: syscon@10003000 {
+        compatible = "mediatek,mt8192-pericfg", "syscon";
+        reg = <0x10003000 0x1000>;
+        #clock-cells = <1>;
+    };
+
+  - |
+    apmixedsys: syscon@1000c000 {
+        compatible = "mediatek,mt8192-apmixedsys", "syscon";
+        reg = <0x1000c000 0x1000>;
+        #clock-cells = <1>;
+    };
diff --git a/Documentation/devicetree/bindings/auxdisplay/hit,hd44780.yaml b/Documentation/devicetree/bindings/auxdisplay/hit,hd44780.yaml
index 9222b06e93a0..fde07e4b119d 100644
--- a/Documentation/devicetree/bindings/auxdisplay/hit,hd44780.yaml
+++ b/Documentation/devicetree/bindings/auxdisplay/hit,hd44780.yaml
@@ -12,7 +12,10 @@ maintainers:
 description:
   The Hitachi HD44780 Character LCD Controller is commonly used on character
   LCDs that can display one or more lines of text. It exposes an M6800 bus
-  interface, which can be used in either 4-bit or 8-bit mode.
+  interface, which can be used in either 4-bit or 8-bit mode. By using a
+  GPIO expander it is possible to use the driver with one of the popular I2C
+  expander boards based on the PCF8574 available for these displays. For
+  an example see below.
 
 properties:
   compatible:
@@ -94,3 +97,29 @@ examples:
             display-height-chars = <2>;
             display-width-chars = <16>;
     };
+  - |
+    #include <dt-bindings/gpio/gpio.h>
+    i2c {
+            #address-cells = <1>;
+            #size-cells = <0>;
+
+            pcf8574: pcf8574@27 {
+                    compatible = "nxp,pcf8574";
+                    reg = <0x27>;
+                    gpio-controller;
+                    #gpio-cells = <2>;
+            };
+    };
+    hd44780 {
+            compatible = "hit,hd44780";
+            display-height-chars = <2>;
+            display-width-chars  = <16>;
+            data-gpios = <&pcf8574 4 0>,
+                         <&pcf8574 5 0>,
+                         <&pcf8574 6 0>,
+                         <&pcf8574 7 0>;
+            enable-gpios = <&pcf8574 2 0>;
+            rs-gpios = <&pcf8574 0 0>;
+            rw-gpios = <&pcf8574 1 0>;
+            backlight-gpios = <&pcf8574 3 0>;
+    };
diff --git a/Documentation/devicetree/bindings/clock/brcm,iproc-clocks.yaml b/Documentation/devicetree/bindings/clock/brcm,iproc-clocks.yaml
index 1174c9aa9934..5ad147d265e6 100644
--- a/Documentation/devicetree/bindings/clock/brcm,iproc-clocks.yaml
+++ b/Documentation/devicetree/bindings/clock/brcm,iproc-clocks.yaml
@@ -61,7 +61,7 @@ properties:
     maxItems: 1
 
   '#clock-cells':
-    const: 1
+    true
 
   clock-output-names:
     minItems: 1
@@ -74,6 +74,23 @@ allOf:
           contains:
             enum:
               - brcm,cygnus-armpll
+              - brcm,nsp-armpll
+    then:
+      properties:
+        '#clock-cells':
+          const: 0
+    else:
+      properties:
+        '#clock-cells':
+          const: 1
+      required:
+        - clock-output-names
+  - if:
+      properties:
+        compatible:
+          contains:
+            enum:
+              - brcm,cygnus-armpll
               - brcm,cygnus-genpll
               - brcm,cygnus-lcpll0
               - brcm,cygnus-mipipll
@@ -358,7 +375,6 @@ required:
   - reg
   - clocks
   - '#clock-cells'
-  - clock-output-names
 
 additionalProperties: false
 
@@ -392,3 +408,10 @@ examples:
         clocks = <&osc2>;
         clock-output-names = "keypad", "adc/touch", "pwm";
     };
+  - |
+    arm_clk@0 {
+        #clock-cells = <0>;
+        compatible = "brcm,nsp-armpll";
+        clocks = <&osc>;
+        reg = <0x0 0x1000>;
+    };
diff --git a/Documentation/devicetree/bindings/clock/clk-exynos-audss.txt b/Documentation/devicetree/bindings/clock/clk-exynos-audss.txt
deleted file mode 100644
index 6030afb10b5c..000000000000
--- a/Documentation/devicetree/bindings/clock/clk-exynos-audss.txt
+++ /dev/null
@@ -1,103 +0,0 @@
-* Samsung Audio Subsystem Clock Controller
-
-The Samsung Audio Subsystem clock controller generates and supplies clocks
-to Audio Subsystem block available in the S5PV210 and Exynos SoCs. The clock
-binding described here is applicable to all SoCs in Exynos family.
-
-Required Properties:
-
-- compatible: should be one of the following:
-  - "samsung,exynos4210-audss-clock" - controller compatible with all Exynos4 SoCs.
-  - "samsung,exynos5250-audss-clock" - controller compatible with Exynos5250
-    SoCs.
-  - "samsung,exynos5410-audss-clock" - controller compatible with Exynos5410
-    SoCs.
-  - "samsung,exynos5420-audss-clock" - controller compatible with Exynos5420
-    SoCs.
-- reg: physical base address and length of the controller's register set.
-
-- #clock-cells: should be 1.
-
-- clocks:
-  - pll_ref: Fixed rate PLL reference clock, parent of mout_audss. "fin_pll"
-    is used if not specified.
-  - pll_in: Input PLL to the AudioSS block, parent of mout_audss. "fout_epll"
-    is used if not specified.
-  - cdclk: External i2s clock, parent of mout_i2s. "cdclk0" is used if not
-    specified.
-  - sclk_audio: Audio bus clock, parent of mout_i2s. "sclk_audio0" is used if
-    not specified.
-  - sclk_pcm_in: PCM clock, parent of sclk_pcm.  "sclk_pcm0" is used if not
-    specified.
-
-- clock-names: Aliases for the above clocks. They should be "pll_ref",
-  "pll_in", "cdclk", "sclk_audio", and "sclk_pcm_in" respectively.
-
-Optional Properties:
-
-  - power-domains: a phandle to respective power domain node as described by
-    generic PM domain bindings (see power/power_domain.txt for more
-    information).
-
-The following is the list of clocks generated by the controller. Each clock is
-assigned an identifier and client nodes use this identifier to specify the
-clock which they consume. Some of the clocks are available only on a particular
-Exynos4 SoC and this is specified where applicable.
-
-Provided clocks:
-
-Clock           ID      SoC (if specific)
------------------------------------------------
-
-mout_audss      0
-mout_i2s        1
-dout_srp        2
-dout_aud_bus    3
-dout_i2s        4
-srp_clk         5
-i2s_bus         6
-sclk_i2s        7
-pcm_bus         8
-sclk_pcm        9
-adma            10      Exynos5420
-
-Example 1: An example of a clock controller node using the default input
-	   clock names is listed below.
-
-clock_audss: audss-clock-controller@3810000 {
-	compatible = "samsung,exynos5250-audss-clock";
-	reg = <0x03810000 0x0C>;
-	#clock-cells = <1>;
-};
-
-Example 2: An example of a clock controller node with the input clocks
-           specified.
-
-clock_audss: audss-clock-controller@3810000 {
-	compatible = "samsung,exynos5250-audss-clock";
-	reg = <0x03810000 0x0C>;
-	#clock-cells = <1>;
-	clocks = <&clock 1>, <&clock 7>, <&clock 138>, <&clock 160>,
-		<&ext_i2s_clk>;
-	clock-names = "pll_ref", "pll_in", "sclk_audio", "sclk_pcm_in", "cdclk";
-};
-
-Example 3: I2S controller node that consumes the clock generated by the clock
-           controller. Refer to the standard clock bindings for information
-           about 'clocks' and 'clock-names' property.
-
-i2s0: i2s@3830000 {
-	compatible = "samsung,i2s-v5";
-	reg = <0x03830000 0x100>;
-	dmas = <&pdma0 10
-		&pdma0 9
-		&pdma0 8>;
-	dma-names = "tx", "rx", "tx-sec";
-	clocks = <&clock_audss EXYNOS_I2S_BUS>,
-		<&clock_audss EXYNOS_I2S_BUS>,
-		<&clock_audss EXYNOS_SCLK_I2S>,
-		<&clock_audss EXYNOS_MOUT_AUDSS>,
-		<&clock_audss EXYNOS_MOUT_I2S>;
-	clock-names = "iis", "i2s_opclk0", "i2s_opclk1",
-		      "mout_audss", "mout_i2s";
-};
diff --git a/Documentation/devicetree/bindings/clock/clk-s5pv210-audss.txt b/Documentation/devicetree/bindings/clock/clk-s5pv210-audss.txt
deleted file mode 100644
index f6272dcd96f4..000000000000
--- a/Documentation/devicetree/bindings/clock/clk-s5pv210-audss.txt
+++ /dev/null
@@ -1,53 +0,0 @@
-* Samsung Audio Subsystem Clock Controller
-
-The Samsung Audio Subsystem clock controller generates and supplies clocks
-to Audio Subsystem block available in the S5PV210 and compatible SoCs.
-
-Required Properties:
-
-- compatible: should be "samsung,s5pv210-audss-clock".
-- reg: physical base address and length of the controller's register set.
-
-- #clock-cells: should be 1.
-
-- clocks:
-  - hclk: AHB bus clock of the Audio Subsystem.
-  - xxti: Optional fixed rate PLL reference clock, parent of mout_audss. If
-    not specified (i.e. xusbxti is used for PLL reference), it is fixed to
-    a clock named "xxti".
-  - fout_epll: Input PLL to the AudioSS block, parent of mout_audss.
-  - iiscdclk0: Optional external i2s clock, parent of mout_i2s. If not
-    specified, it is fixed to a clock named "iiscdclk0".
-  - sclk_audio0: Audio bus clock, parent of mout_i2s.
-
-- clock-names: Aliases for the above clocks. They should be "hclk",
-  "xxti", "fout_epll", "iiscdclk0", and "sclk_audio0" respectively.
-
-All available clocks are defined as preprocessor macros in
-dt-bindings/clock/s5pv210-audss-clk.h header and can be used in device
-tree sources.
-
-Example: Clock controller node.
-
-	clk_audss: clock-controller@c0900000 {
-		compatible = "samsung,s5pv210-audss-clock";
-		reg = <0xc0900000 0x1000>;
-		#clock-cells = <1>;
-		clock-names = "hclk", "xxti",
-				"fout_epll", "sclk_audio0";
-		clocks = <&clocks DOUT_HCLKP>, <&xxti>,
-				<&clocks FOUT_EPLL>, <&clocks SCLK_AUDIO0>;
-	};
-
-Example: I2S controller node that consumes the clock generated by the clock
-	 controller. Refer to the standard clock bindings for information
-         about 'clocks' and 'clock-names' property.
-
-	i2s0: i2s@3830000 {
-		/* ... */
-		clock-names = "iis", "i2s_opclk0",
-				"i2s_opclk1";
-		clocks = <&clk_audss CLK_I2S>, <&clk_audss CLK_I2S>,
-				<&clk_audss CLK_DOUT_AUD_BUS>;
-		/* ... */
-	};
diff --git a/Documentation/devicetree/bindings/clock/exynos3250-clock.txt b/Documentation/devicetree/bindings/clock/exynos3250-clock.txt
deleted file mode 100644
index 7441ed519f02..000000000000
--- a/Documentation/devicetree/bindings/clock/exynos3250-clock.txt
+++ /dev/null
@@ -1,57 +0,0 @@
-* Samsung Exynos3250 Clock Controller
-
-The Exynos3250 clock controller generates and supplies clock to various
-controllers within the Exynos3250 SoC.
-
-Required Properties:
-
-- compatible: should be one of the following.
-  - "samsung,exynos3250-cmu" - controller compatible with Exynos3250 SoC.
-  - "samsung,exynos3250-cmu-dmc" - controller compatible with
-    Exynos3250 SoC for Dynamic Memory Controller domain.
-  - "samsung,exynos3250-cmu-isp" - ISP block clock controller compatible
-     with Exynos3250 SOC
-
-- reg: physical base address of the controller and length of memory mapped
-  region.
-
-- #clock-cells: should be 1.
-
-Each clock is assigned an identifier and client nodes can use this identifier
-to specify the clock which they consume.
-
-All available clocks are defined as preprocessor macros in
-dt-bindings/clock/exynos3250.h header and can be used in device
-tree sources.
-
-Example 1: Examples of clock controller nodes are listed below.
-
-	cmu: clock-controller@10030000 {
-		compatible = "samsung,exynos3250-cmu";
-		reg = <0x10030000 0x20000>;
-		#clock-cells = <1>;
-	};
-
-	cmu_dmc: clock-controller@105c0000 {
-		compatible = "samsung,exynos3250-cmu-dmc";
-		reg = <0x105C0000 0x2000>;
-		#clock-cells = <1>;
-	};
-
-	cmu_isp: clock-controller@10048000 {
-		compatible = "samsung,exynos3250-cmu-isp";
-		reg = <0x10048000 0x1000>;
-		#clock-cells = <1>;
-	};
-
-Example 2: UART controller node that consumes the clock generated by the clock
-	   controller. Refer to the standard clock bindings for information
-	   about 'clocks' and 'clock-names' property.
-
-	serial@13800000 {
-		compatible = "samsung,exynos4210-uart";
-		reg = <0x13800000 0x100>;
-		interrupts = <0 109 0>;
-		clocks = <&cmu CLK_UART0>, <&cmu CLK_SCLK_UART0>;
-		clock-names = "uart", "clk_uart_baud0";
-	};
diff --git a/Documentation/devicetree/bindings/clock/exynos4-clock.txt b/Documentation/devicetree/bindings/clock/exynos4-clock.txt
deleted file mode 100644
index 17bb11365354..000000000000
--- a/Documentation/devicetree/bindings/clock/exynos4-clock.txt
+++ /dev/null
@@ -1,86 +0,0 @@
-* Samsung Exynos4 Clock Controller
-
-The Exynos4 clock controller generates and supplies clock to various controllers
-within the Exynos4 SoC. The clock binding described here is applicable to all
-SoC's in the Exynos4 family.
-
-Required Properties:
-
-- compatible: should be one of the following.
-  - "samsung,exynos4210-clock" - controller compatible with Exynos4210 SoC.
-  - "samsung,exynos4412-clock" - controller compatible with Exynos4412 SoC.
-
-- reg: physical base address of the controller and length of memory mapped
-  region.
-
-- #clock-cells: should be 1.
-
-Each clock is assigned an identifier and client nodes can use this identifier
-to specify the clock which they consume.
-
-All available clocks are defined as preprocessor macros in
-dt-bindings/clock/exynos4.h header and can be used in device
-tree sources.
-
-Example 1: An example of a clock controller node is listed below.
-
-	clock: clock-controller@10030000 {
-		compatible = "samsung,exynos4210-clock";
-		reg = <0x10030000 0x20000>;
-		#clock-cells = <1>;
-	};
-
-Example 2: UART controller node that consumes the clock generated by the clock
-	   controller. Refer to the standard clock bindings for information
-	   about 'clocks' and 'clock-names' property.
-
-	serial@13820000 {
-		compatible = "samsung,exynos4210-uart";
-		reg = <0x13820000 0x100>;
-		interrupts = <0 54 0>;
-		clocks = <&clock CLK_UART2>, <&clock CLK_SCLK_UART2>;
-		clock-names = "uart", "clk_uart_baud0";
-	};
-
-Exynos4412 SoC contains some additional clocks for FIMC-ISP (Camera ISP)
-subsystem. Registers for those clocks are located in the ISP power domain.
-Because those registers are also located in a different memory region than
-the main clock controller, a separate clock controller has to be defined for
-handling them.
-
-Required Properties:
-
-- compatible: should be "samsung,exynos4412-isp-clock".
-
-- reg: physical base address of the ISP clock controller and length of memory
-  mapped region.
-
-- #clock-cells: should be 1.
-
-- clocks: list of the clock controller input clock identifiers,
-  from common clock bindings, should point to CLK_ACLK200 and
-  CLK_ACLK400_MCUISP clocks from the main clock controller.
-
-- clock-names: list of the clock controller input clock names,
-  as described in clock-bindings.txt, should be "aclk200" and
-  "aclk400_mcuisp".
-
-- power-domains: a phandle to ISP power domain node as described by
-  generic PM domain bindings.
-
-Example 3: The clock controllers bindings for Exynos4412 SoCs.
-
-	clock: clock-controller@10030000 {
-		compatible = "samsung,exynos4412-clock";
-		reg = <0x10030000 0x18000>;
-		#clock-cells = <1>;
-	};
-
-	isp_clock: clock-controller@10048000 {
-		compatible = "samsung,exynos4412-isp-clock";
-		reg = <0x10048000 0x1000>;
-		#clock-cells = <1>;
-		power-domains = <&pd_isp>;
-		clocks = <&clock CLK_ACLK200>, <&clock CLK_ACLK400_MCUISP>;
-		clock-names = "aclk200", "aclk400_mcuisp";
-	};
diff --git a/Documentation/devicetree/bindings/clock/exynos5250-clock.txt b/Documentation/devicetree/bindings/clock/exynos5250-clock.txt
deleted file mode 100644
index aff266a12eeb..000000000000
--- a/Documentation/devicetree/bindings/clock/exynos5250-clock.txt
+++ /dev/null
@@ -1,41 +0,0 @@
-* Samsung Exynos5250 Clock Controller
-
-The Exynos5250 clock controller generates and supplies clock to various
-controllers within the Exynos5250 SoC.
-
-Required Properties:
-
-- compatible: should be one of the following.
-  - "samsung,exynos5250-clock" - controller compatible with Exynos5250 SoC.
-
-- reg: physical base address of the controller and length of memory mapped
-  region.
-
-- #clock-cells: should be 1.
-
-Each clock is assigned an identifier and client nodes can use this identifier
-to specify the clock which they consume.
-
-All available clocks are defined as preprocessor macros in
-dt-bindings/clock/exynos5250.h header and can be used in device
-tree sources.
-
-Example 1: An example of a clock controller node is listed below.
-
-	clock: clock-controller@10010000 {
-		compatible = "samsung,exynos5250-clock";
-		reg = <0x10010000 0x30000>;
-		#clock-cells = <1>;
-	};
-
-Example 2: UART controller node that consumes the clock generated by the clock
-	   controller. Refer to the standard clock bindings for information
-	   about 'clocks' and 'clock-names' property.
-
-	serial@13820000 {
-		compatible = "samsung,exynos4210-uart";
-		reg = <0x13820000 0x100>;
-		interrupts = <0 54 0>;
-		clocks = <&clock CLK_UART2>, <&clock CLK_SCLK_UART2>;
-		clock-names = "uart", "clk_uart_baud0";
-	};
diff --git a/Documentation/devicetree/bindings/clock/exynos5420-clock.txt b/Documentation/devicetree/bindings/clock/exynos5420-clock.txt
deleted file mode 100644
index 717a7b1531c7..000000000000
--- a/Documentation/devicetree/bindings/clock/exynos5420-clock.txt
+++ /dev/null
@@ -1,42 +0,0 @@
-* Samsung Exynos5420 Clock Controller
-
-The Exynos5420 clock controller generates and supplies clock to various
-controllers within the Exynos5420 SoC and for the Exynos5800 SoC.
-
-Required Properties:
-
-- compatible: should be one of the following.
-  - "samsung,exynos5420-clock" - controller compatible with Exynos5420 SoC.
-  - "samsung,exynos5800-clock" - controller compatible with Exynos5800 SoC.
-
-- reg: physical base address of the controller and length of memory mapped
-  region.
-
-- #clock-cells: should be 1.
-
-Each clock is assigned an identifier and client nodes can use this identifier
-to specify the clock which they consume.
-
-All available clocks are defined as preprocessor macros in
-dt-bindings/clock/exynos5420.h header and can be used in device
-tree sources.
-
-Example 1: An example of a clock controller node is listed below.
-
-	clock: clock-controller@10010000 {
-		compatible = "samsung,exynos5420-clock";
-		reg = <0x10010000 0x30000>;
-		#clock-cells = <1>;
-	};
-
-Example 2: UART controller node that consumes the clock generated by the clock
-	   controller. Refer to the standard clock bindings for information
-	   about 'clocks' and 'clock-names' property.
-
-	serial@13820000 {
-		compatible = "samsung,exynos4210-uart";
-		reg = <0x13820000 0x100>;
-		interrupts = <0 54 0>;
-		clocks = <&clock CLK_UART2>, <&clock CLK_SCLK_UART2>;
-		clock-names = "uart", "clk_uart_baud0";
-	};
diff --git a/Documentation/devicetree/bindings/clock/idt,versaclock5.yaml b/Documentation/devicetree/bindings/clock/idt,versaclock5.yaml
index 26ed040bc717..ffd6ae0eed64 100644
--- a/Documentation/devicetree/bindings/clock/idt,versaclock5.yaml
+++ b/Documentation/devicetree/bindings/clock/idt,versaclock5.yaml
@@ -30,6 +30,20 @@ description: |
     3 -- OUT3
     4 -- OUT4
 
+  The idt,shutdown and idt,output-enable-active properties control the
+  SH (en_global_shutdown) and SP bits of the Primary Source and Shutdown
+  Register, respectively. Their behavior is summarized by the following
+  table:
+
+  SH SP Output when the SD/OE pin is Low/High
+  == == =====================================
+   0  0 Active/Inactive
+   0  1 Inactive/Active
+   1  0 Active/Shutdown
+   1  1 Inactive/Shutdown
+
+  The case where SH and SP are both 1 is likely not very interesting.
+
 maintainers:
   - Luca Ceresoli <luca@lucaceresoli.net>
 
@@ -64,6 +78,26 @@ properties:
     maximum: 22760
     description: Optional load capacitor for XTAL1 and XTAL2
 
+  idt,shutdown:
+    $ref: /schemas/types.yaml#/definitions/uint32
+    enum: [0, 1]
+    description: |
+      If 1, this enables the shutdown functionality: the chip will be
+      shut down if the SD/OE pin is driven high. If 0, this disables the
+      shutdown functionality: the chip will never be shut down based on
+      the value of the SD/OE pin. This property corresponds to the SH
+      bit of the Primary Source and Shutdown Register.
+
+  idt,output-enable-active:
+    $ref: /schemas/types.yaml#/definitions/uint32
+    enum: [0, 1]
+    description: |
+      If 1, this enables output when the SD/OE pin is high, and disables
+      output when the SD/OE pin is low. If 0, this disables output when
+      the SD/OE pin is high, and enables output when the SD/OE pin is
+      low. This corresponds to the SP bit of the Primary Source and
+      Shutdown Register.
+
 patternProperties:
   "^OUT[1-4]$":
     type: object
@@ -90,6 +124,8 @@ required:
   - compatible
   - reg
   - '#clock-cells'
+  - idt,shutdown
+  - idt,output-enable-active
 
 allOf:
   - if:
@@ -139,6 +175,10 @@ examples:
             clocks = <&ref25m>;
             clock-names = "xin";
 
+            /* Set the SD/OE pin's settings */
+            idt,shutdown = <0>;
+            idt,output-enable-active = <0>;
+
             OUT1 {
                 idt,mode = <VC5_CMOSD>;
                 idt,voltage-microvolt = <1800000>;
diff --git a/Documentation/devicetree/bindings/clock/qcom,a53pll.yaml b/Documentation/devicetree/bindings/clock/qcom,a53pll.yaml
index db3d0ea6bc7a..fbd758470b88 100644
--- a/Documentation/devicetree/bindings/clock/qcom,a53pll.yaml
+++ b/Documentation/devicetree/bindings/clock/qcom,a53pll.yaml
@@ -18,6 +18,7 @@ properties:
     enum:
       - qcom,ipq6018-a53pll
       - qcom,msm8916-a53pll
+      - qcom,msm8939-a53pll
 
   reg:
     maxItems: 1
@@ -33,6 +34,8 @@ properties:
     items:
       - const: xo
 
+  operating-points-v2: true
+
 required:
   - compatible
   - reg
diff --git a/Documentation/devicetree/bindings/clock/qcom,gcc-sm6115.yaml b/Documentation/devicetree/bindings/clock/qcom,gcc-sm6115.yaml
new file mode 100644
index 000000000000..26050da844d5
--- /dev/null
+++ b/Documentation/devicetree/bindings/clock/qcom,gcc-sm6115.yaml
@@ -0,0 +1,72 @@
+# SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause)
+%YAML 1.2
+---
+$id: http://devicetree.org/schemas/clock/qcom,gcc-sm6115.yaml#
+$schema: http://devicetree.org/meta-schemas/core.yaml#
+
+title: Qualcomm Global Clock & Reset Controller Binding for SM6115 and SM4250
+
+maintainers:
+  - Iskren Chernev <iskren.chernev@gmail.com>
+
+description: |
+  Qualcomm global clock control module which supports the clocks, resets and
+  power domains on SM4250/6115.
+
+  See also:
+  - dt-bindings/clock/qcom,gcc-sm6115.h
+
+properties:
+  compatible:
+    const: qcom,gcc-sm6115
+
+  clocks:
+    items:
+      - description: Board XO source
+      - description: Sleep clock source
+
+  clock-names:
+    items:
+      - const: bi_tcxo
+      - const: sleep_clk
+
+  '#clock-cells':
+    const: 1
+
+  '#reset-cells':
+    const: 1
+
+  '#power-domain-cells':
+    const: 1
+
+  reg:
+    maxItems: 1
+
+  protected-clocks:
+    description:
+      Protected clock specifier list as per common clock binding.
+
+required:
+  - compatible
+  - clocks
+  - clock-names
+  - reg
+  - '#clock-cells'
+  - '#reset-cells'
+  - '#power-domain-cells'
+
+additionalProperties: false
+
+examples:
+  - |
+    #include <dt-bindings/clock/qcom,rpmcc.h>
+    clock-controller@1400000 {
+        compatible = "qcom,gcc-sm6115";
+        reg = <0x01400000 0x1f0000>;
+        #clock-cells = <1>;
+        #reset-cells = <1>;
+        #power-domain-cells = <1>;
+        clock-names = "bi_tcxo", "sleep_clk";
+        clocks = <&rpmcc RPM_SMD_XO_CLK_SRC>, <&sleep_clk>;
+    };
+...
diff --git a/Documentation/devicetree/bindings/clock/qcom,gcc-sm6350.yaml b/Documentation/devicetree/bindings/clock/qcom,gcc-sm6350.yaml
new file mode 100644
index 000000000000..20926cd8293e
--- /dev/null
+++ b/Documentation/devicetree/bindings/clock/qcom,gcc-sm6350.yaml
@@ -0,0 +1,76 @@
+# SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause)
+%YAML 1.2
+---
+$id: http://devicetree.org/schemas/clock/qcom,gcc-sm6350.yaml#
+$schema: http://devicetree.org/meta-schemas/core.yaml#
+
+title: Qualcomm Global Clock & Reset Controller Binding for SM6350
+
+maintainers:
+  - Konrad Dybcio <konrad.dybcio@somainline.org>
+
+description: |
+  Qualcomm global clock control module which supports the clocks, resets and
+  power domains on SM6350.
+
+  See also:
+  - dt-bindings/clock/qcom,gcc-sm6350.h
+
+properties:
+  compatible:
+    const: qcom,gcc-sm6350
+
+  clocks:
+    items:
+      - description: Board XO source
+      - description: Board active XO source
+      - description: Sleep clock source
+
+  clock-names:
+    items:
+      - const: bi_tcxo
+      - const: bi_tcxo_ao
+      - const: sleep_clk
+
+  '#clock-cells':
+    const: 1
+
+  '#reset-cells':
+    const: 1
+
+  '#power-domain-cells':
+    const: 1
+
+  reg:
+    maxItems: 1
+
+  protected-clocks:
+    description:
+      Protected clock specifier list as per common clock binding.
+
+required:
+  - compatible
+  - clocks
+  - clock-names
+  - reg
+  - '#clock-cells'
+  - '#reset-cells'
+  - '#power-domain-cells'
+
+additionalProperties: false
+
+examples:
+  - |
+    #include <dt-bindings/clock/qcom,rpmh.h>
+    clock-controller@100000 {
+      compatible = "qcom,gcc-sm6350";
+      reg = <0x00100000 0x1f0000>;
+      clocks = <&rpmhcc RPMH_CXO_CLK>,
+               <&rpmhcc RPMH_CXO_CLK_A>,
+               <&sleep_clk>;
+      clock-names = "bi_tcxo", "bi_tcxo_ao", "sleep_clk";
+      #clock-cells = <1>;
+      #reset-cells = <1>;
+      #power-domain-cells = <1>;
+    };
+...
diff --git a/Documentation/devicetree/bindings/clock/qcom,gcc.yaml b/Documentation/devicetree/bindings/clock/qcom,gcc.yaml
index 8453eeddf30e..2f20f8aa932a 100644
--- a/Documentation/devicetree/bindings/clock/qcom,gcc.yaml
+++ b/Documentation/devicetree/bindings/clock/qcom,gcc.yaml
@@ -23,6 +23,7 @@ description: |
   - dt-bindings/clock/qcom,gcc-ipq806x.h (qcom,gcc-ipq8064)
   - dt-bindings/reset/qcom,gcc-ipq806x.h (qcom,gcc-ipq8064)
   - dt-bindings/clock/qcom,gcc-msm8939.h
+  - dt-bindings/clock/qcom,gcc-msm8953.h
   - dt-bindings/reset/qcom,gcc-msm8939.h
   - dt-bindings/clock/qcom,gcc-msm8660.h
   - dt-bindings/reset/qcom,gcc-msm8660.h
@@ -46,6 +47,7 @@ properties:
       - qcom,gcc-msm8660
       - qcom,gcc-msm8916
       - qcom,gcc-msm8939
+      - qcom,gcc-msm8953
       - qcom,gcc-msm8960
       - qcom,gcc-msm8974
       - qcom,gcc-msm8974pro
diff --git a/Documentation/devicetree/bindings/clock/qcom,gpucc.yaml b/Documentation/devicetree/bindings/clock/qcom,gpucc.yaml
index df943c4c3234..46dff46d5760 100644
--- a/Documentation/devicetree/bindings/clock/qcom,gpucc.yaml
+++ b/Documentation/devicetree/bindings/clock/qcom,gpucc.yaml
@@ -1,4 +1,4 @@
-# SPDX-License-Identifier: GPL-2.0-only
+# SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause)
 %YAML 1.2
 ---
 $id: http://devicetree.org/schemas/clock/qcom,gpucc.yaml#
@@ -11,11 +11,12 @@ maintainers:
 
 description: |
   Qualcomm graphics clock control module which supports the clocks, resets and
-  power domains on SDM845/SC7180/SM8150/SM8250.
+  power domains on Qualcomm SoCs.
 
   See also:
     dt-bindings/clock/qcom,gpucc-sdm845.h
     dt-bindings/clock/qcom,gpucc-sc7180.h
+    dt-bindings/clock/qcom,gpucc-sc7280.h
     dt-bindings/clock/qcom,gpucc-sm8150.h
     dt-bindings/clock/qcom,gpucc-sm8250.h
 
@@ -24,6 +25,8 @@ properties:
     enum:
       - qcom,sdm845-gpucc
       - qcom,sc7180-gpucc
+      - qcom,sc7280-gpucc
+      - qcom,sc8180x-gpucc
       - qcom,sm8150-gpucc
       - qcom,sm8250-gpucc
 
diff --git a/Documentation/devicetree/bindings/clock/qcom,mmcc.yaml b/Documentation/devicetree/bindings/clock/qcom,mmcc.yaml
index 8b0b1c56f354..68fdc3d4982a 100644
--- a/Documentation/devicetree/bindings/clock/qcom,mmcc.yaml
+++ b/Documentation/devicetree/bindings/clock/qcom,mmcc.yaml
@@ -22,6 +22,8 @@ properties:
       - qcom,mmcc-msm8660
       - qcom,mmcc-msm8960
       - qcom,mmcc-msm8974
+      - qcom,mmcc-msm8992
+      - qcom,mmcc-msm8994
       - qcom,mmcc-msm8996
       - qcom,mmcc-msm8998
       - qcom,mmcc-sdm630
diff --git a/Documentation/devicetree/bindings/clock/qcom,rpmcc.txt b/Documentation/devicetree/bindings/clock/qcom,rpmcc.txt
index 6cf5a7ec2b4c..a4877881f1d8 100644
--- a/Documentation/devicetree/bindings/clock/qcom,rpmcc.txt
+++ b/Documentation/devicetree/bindings/clock/qcom,rpmcc.txt
@@ -10,11 +10,13 @@ Required properties :
 - compatible : shall contain only one of the following. The generic
                compatible "qcom,rpmcc" should be also included.
 
+			"qcom,rpmcc-mdm9607", "qcom,rpmcc"
 			"qcom,rpmcc-msm8660", "qcom,rpmcc"
 			"qcom,rpmcc-apq8060", "qcom,rpmcc"
 			"qcom,rpmcc-msm8226", "qcom,rpmcc"
 			"qcom,rpmcc-msm8916", "qcom,rpmcc"
 			"qcom,rpmcc-msm8936", "qcom,rpmcc"
+			"qcom,rpmcc-msm8953", "qcom,rpmcc"
 			"qcom,rpmcc-msm8974", "qcom,rpmcc"
 			"qcom,rpmcc-msm8976", "qcom,rpmcc"
 			"qcom,rpmcc-apq8064", "qcom,rpmcc"
@@ -25,6 +27,8 @@ Required properties :
 			"qcom,rpmcc-msm8998", "qcom,rpmcc"
 			"qcom,rpmcc-qcs404", "qcom,rpmcc"
 			"qcom,rpmcc-sdm660", "qcom,rpmcc"
+			"qcom,rpmcc-sm6115", "qcom,rpmcc"
+			"qcom,rpmcc-sm6125", "qcom,rpmcc"
 
 - #clock-cells : shall contain 1
 
diff --git a/Documentation/devicetree/bindings/clock/qcom,rpmhcc.yaml b/Documentation/devicetree/bindings/clock/qcom,rpmhcc.yaml
index 9ea0b3f5a4f2..72212970e6f5 100644
--- a/Documentation/devicetree/bindings/clock/qcom,rpmhcc.yaml
+++ b/Documentation/devicetree/bindings/clock/qcom,rpmhcc.yaml
@@ -22,6 +22,7 @@ properties:
       - qcom,sc8180x-rpmh-clk
       - qcom,sdm845-rpmh-clk
       - qcom,sdx55-rpmh-clk
+      - qcom,sm6350-rpmh-clk
       - qcom,sm8150-rpmh-clk
       - qcom,sm8250-rpmh-clk
       - qcom,sm8350-rpmh-clk
diff --git a/Documentation/devicetree/bindings/clock/qcom,sc7280-dispcc.yaml b/Documentation/devicetree/bindings/clock/qcom,sc7280-dispcc.yaml
new file mode 100644
index 000000000000..2178666fb697
--- /dev/null
+++ b/Documentation/devicetree/bindings/clock/qcom,sc7280-dispcc.yaml
@@ -0,0 +1,94 @@
+# SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause)
+%YAML 1.2
+---
+$id: http://devicetree.org/schemas/clock/qcom,sc7280-dispcc.yaml#
+$schema: http://devicetree.org/meta-schemas/core.yaml#
+
+title: Qualcomm Display Clock & Reset Controller Binding for SC7280
+
+maintainers:
+  - Taniya Das <tdas@codeaurora.org>
+
+description: |
+  Qualcomm display clock control module which supports the clocks, resets and
+  power domains on SC7280.
+
+  See also dt-bindings/clock/qcom,dispcc-sc7280.h.
+
+properties:
+  compatible:
+    const: qcom,sc7280-dispcc
+
+  clocks:
+    items:
+      - description: Board XO source
+      - description: GPLL0 source from GCC
+      - description: Byte clock from DSI PHY
+      - description: Pixel clock from DSI PHY
+      - description: Link clock from DP PHY
+      - description: VCO DIV clock from DP PHY
+      - description: Link clock from EDP PHY
+      - description: VCO DIV clock from EDP PHY
+
+  clock-names:
+    items:
+      - const: bi_tcxo
+      - const: gcc_disp_gpll0_clk
+      - const: dsi0_phy_pll_out_byteclk
+      - const: dsi0_phy_pll_out_dsiclk
+      - const: dp_phy_pll_link_clk
+      - const: dp_phy_pll_vco_div_clk
+      - const: edp_phy_pll_link_clk
+      - const: edp_phy_pll_vco_div_clk
+
+  '#clock-cells':
+    const: 1
+
+  '#reset-cells':
+    const: 1
+
+  '#power-domain-cells':
+    const: 1
+
+  reg:
+    maxItems: 1
+
+required:
+  - compatible
+  - reg
+  - clocks
+  - clock-names
+  - '#clock-cells'
+  - '#reset-cells'
+  - '#power-domain-cells'
+
+additionalProperties: false
+
+examples:
+  - |
+    #include <dt-bindings/clock/qcom,gcc-sc7280.h>
+    #include <dt-bindings/clock/qcom,rpmh.h>
+    clock-controller@af00000 {
+      compatible = "qcom,sc7280-dispcc";
+      reg = <0x0af00000 0x200000>;
+      clocks = <&rpmhcc RPMH_CXO_CLK>,
+               <&gcc GCC_DISP_GPLL0_CLK_SRC>,
+               <&dsi_phy 0>,
+               <&dsi_phy 1>,
+               <&dp_phy 0>,
+               <&dp_phy 1>,
+               <&edp_phy 0>,
+               <&edp_phy 1>;
+      clock-names = "bi_tcxo",
+                    "gcc_disp_gpll0_clk",
+                    "dsi0_phy_pll_out_byteclk",
+                    "dsi0_phy_pll_out_dsiclk",
+                    "dp_phy_pll_link_clk",
+                    "dp_phy_pll_vco_div_clk",
+                    "edp_phy_pll_link_clk",
+                    "edp_phy_pll_vco_div_clk";
+      #clock-cells = <1>;
+      #reset-cells = <1>;
+      #power-domain-cells = <1>;
+    };
+...
diff --git a/Documentation/devicetree/bindings/clock/qcom,videocc.yaml b/Documentation/devicetree/bindings/clock/qcom,videocc.yaml
index 567202942b88..0d224f114b5b 100644
--- a/Documentation/devicetree/bindings/clock/qcom,videocc.yaml
+++ b/Documentation/devicetree/bindings/clock/qcom,videocc.yaml
@@ -1,4 +1,4 @@
-# SPDX-License-Identifier: GPL-2.0-only
+# SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause)
 %YAML 1.2
 ---
 $id: http://devicetree.org/schemas/clock/qcom,videocc.yaml#
@@ -11,10 +11,11 @@ maintainers:
 
 description: |
   Qualcomm video clock control module which supports the clocks, resets and
-  power domains on SDM845/SC7180/SM8150/SM8250.
+  power domains on Qualcomm SoCs.
 
   See also:
     dt-bindings/clock/qcom,videocc-sc7180.h
+    dt-bindings/clock/qcom,videocc-sc7280.h
     dt-bindings/clock/qcom,videocc-sdm845.h
     dt-bindings/clock/qcom,videocc-sm8150.h
     dt-bindings/clock/qcom,videocc-sm8250.h
@@ -23,6 +24,7 @@ properties:
   compatible:
     enum:
       - qcom,sc7180-videocc
+      - qcom,sc7280-videocc
       - qcom,sdm845-videocc
       - qcom,sm8150-videocc
       - qcom,sm8250-videocc
diff --git a/Documentation/devicetree/bindings/clock/rockchip,rk3399-cru.txt b/Documentation/devicetree/bindings/clock/rockchip,rk3399-cru.txt
deleted file mode 100644
index 3bc56fae90ac..000000000000
--- a/Documentation/devicetree/bindings/clock/rockchip,rk3399-cru.txt
+++ /dev/null
@@ -1,68 +0,0 @@
-* Rockchip RK3399 Clock and Reset Unit
-
-The RK3399 clock controller generates and supplies clock to various
-controllers within the SoC and also implements a reset controller for SoC
-peripherals.
-
-Required Properties:
-
-- compatible: PMU for CRU should be "rockchip,rk3399-pmucru"
-- compatible: CRU should be "rockchip,rk3399-cru"
-- reg: physical base address of the controller and length of memory mapped
-  region.
-- #clock-cells: should be 1.
-- #reset-cells: should be 1.
-
-Optional Properties:
-
-- rockchip,grf: phandle to the syscon managing the "general register files".
-  It is used for GRF muxes, if missing any muxes present in the GRF will not
-  be available.
-
-Each clock is assigned an identifier and client nodes can use this identifier
-to specify the clock which they consume. All available clocks are defined as
-preprocessor macros in the dt-bindings/clock/rk3399-cru.h headers and can be
-used in device tree sources. Similar macros exist for the reset sources in
-these files.
-
-External clocks:
-
-There are several clocks that are generated outside the SoC. It is expected
-that they are defined using standard clock bindings with following
-clock-output-names:
- - "xin24m" - crystal input - required,
- - "xin32k" - rtc clock - optional,
- - "clkin_gmac" - external GMAC clock - optional,
- - "clkin_i2s" - external I2S clock - optional,
- - "pclkin_cif" - external ISP clock - optional,
- - "clk_usbphy0_480m" - output clock of the pll in the usbphy0
- - "clk_usbphy1_480m" - output clock of the pll in the usbphy1
-
-Example: Clock controller node:
-
-	pmucru: pmu-clock-controller@ff750000 {
-		compatible = "rockchip,rk3399-pmucru";
-		reg = <0x0 0xff750000 0x0 0x1000>;
-		#clock-cells = <1>;
-		#reset-cells = <1>;
-	};
-
-	cru: clock-controller@ff760000 {
-		compatible = "rockchip,rk3399-cru";
-		reg = <0x0 0xff760000 0x0 0x1000>;
-		#clock-cells = <1>;
-		#reset-cells = <1>;
-	};
-
-Example: UART controller node that consumes the clock generated by the clock
-  controller:
-
-	uart0: serial@ff1a0000 {
-		compatible = "rockchip,rk3399-uart", "snps,dw-apb-uart";
-		reg = <0x0 0xff180000 0x0 0x100>;
-		clocks = <&cru SCLK_UART0>, <&cru PCLK_UART0>;
-		clock-names = "baudclk", "apb_pclk";
-		interrupts = <GIC_SPI 99 IRQ_TYPE_LEVEL_HIGH>;
-		reg-shift = <2>;
-		reg-io-width = <4>;
-	};
diff --git a/Documentation/devicetree/bindings/clock/rockchip,rk3399-cru.yaml b/Documentation/devicetree/bindings/clock/rockchip,rk3399-cru.yaml
new file mode 100644
index 000000000000..72b286a1beba
--- /dev/null
+++ b/Documentation/devicetree/bindings/clock/rockchip,rk3399-cru.yaml
@@ -0,0 +1,92 @@
+# SPDX-License-Identifier: GPL-2.0-only
+%YAML 1.2
+---
+$id: http://devicetree.org/schemas/clock/rockchip,rk3399-cru.yaml#
+$schema: http://devicetree.org/meta-schemas/core.yaml#
+
+title: Rockchip RK3399 Clock and Reset Unit
+
+maintainers:
+  - Xing Zheng <zhengxing@rock-chips.com>
+  - Heiko Stuebner <heiko@sntech.de>
+
+description: |
+  The RK3399 clock controller generates and supplies clock to various
+  controllers within the SoC and also implements a reset controller for SoC
+  peripherals.
+  Each clock is assigned an identifier and client nodes can use this identifier
+  to specify the clock which they consume. All available clocks are defined as
+  preprocessor macros in the dt-bindings/clock/rk3399-cru.h headers and can be
+  used in device tree sources. Similar macros exist for the reset sources in
+  these files.
+  There are several clocks that are generated outside the SoC. It is expected
+  that they are defined using standard clock bindings with following
+  clock-output-names:
+    - "xin24m" - crystal input - required,
+    - "xin32k" - rtc clock - optional,
+    - "clkin_gmac" - external GMAC clock - optional,
+    - "clkin_i2s" - external I2S clock - optional,
+    - "pclkin_cif" - external ISP clock - optional,
+    - "clk_usbphy0_480m" - output clock of the pll in the usbphy0
+    - "clk_usbphy1_480m" - output clock of the pll in the usbphy1
+
+properties:
+  compatible:
+    enum:
+      - rockchip,rk3399-pmucru
+      - rockchip,rk3399-cru
+
+  reg:
+    maxItems: 1
+
+  "#clock-cells":
+    const: 1
+
+  "#reset-cells":
+    const: 1
+
+  clocks:
+    minItems: 1
+
+  assigned-clocks:
+    minItems: 1
+    maxItems: 64
+
+  assigned-clock-parents:
+    minItems: 1
+    maxItems: 64
+
+  assigned-clock-rates:
+    minItems: 1
+    maxItems: 64
+
+  rockchip,grf:
+    $ref: /schemas/types.yaml#/definitions/phandle
+    description: >
+      phandle to the syscon managing the "general register files". It is used
+      for GRF muxes, if missing any muxes present in the GRF will not be
+      available.
+
+required:
+  - compatible
+  - reg
+  - "#clock-cells"
+  - "#reset-cells"
+
+additionalProperties: false
+
+examples:
+  - |
+    pmucru: pmu-clock-controller@ff750000 {
+      compatible = "rockchip,rk3399-pmucru";
+      reg = <0xff750000 0x1000>;
+      #clock-cells = <1>;
+      #reset-cells = <1>;
+    };
+  - |
+    cru: clock-controller@ff760000 {
+      compatible = "rockchip,rk3399-cru";
+      reg = <0xff760000 0x1000>;
+      #clock-cells = <1>;
+      #reset-cells = <1>;
+    };
diff --git a/Documentation/devicetree/bindings/clock/samsung,exynos-audss-clock.yaml b/Documentation/devicetree/bindings/clock/samsung,exynos-audss-clock.yaml
new file mode 100644
index 000000000000..f14f1d39da36
--- /dev/null
+++ b/Documentation/devicetree/bindings/clock/samsung,exynos-audss-clock.yaml
@@ -0,0 +1,80 @@
+# SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause)
+%YAML 1.2
+---
+$id: http://devicetree.org/schemas/clock/samsung,exynos-audss-clock.yaml#
+$schema: http://devicetree.org/meta-schemas/core.yaml#
+
+title: Samsung Exynos SoC Audio SubSystem clock controller
+
+maintainers:
+  - Chanwoo Choi <cw00.choi@samsung.com>
+  - Krzysztof Kozlowski <krzysztof.kozlowski@canonical.com>
+  - Sylwester Nawrocki <s.nawrocki@samsung.com>
+  - Tomasz Figa <tomasz.figa@gmail.com>
+
+description: |
+  All available clocks are defined as preprocessor macros in
+  include/dt-bindings/clock/exynos-audss-clk.h header.
+
+properties:
+  compatible:
+    enum:
+      - samsung,exynos4210-audss-clock
+      - samsung,exynos5250-audss-clock
+      - samsung,exynos5410-audss-clock
+      - samsung,exynos5420-audss-clock
+
+  clocks:
+    minItems: 2
+    items:
+      - description:
+          Fixed rate PLL reference clock, parent of mout_audss. "fin_pll" is
+          used if not specified.
+      - description:
+          Input PLL to the AudioSS block, parent of mout_audss. "fout_epll" is
+          used if not specified.
+      - description:
+          Audio bus clock, parent of mout_i2s. "sclk_audio0" is used if not
+          specified.
+      - description:
+          PCM clock, parent of sclk_pcm.  "sclk_pcm0" is used if not specified.
+      - description:
+          External i2s clock, parent of mout_i2s. "cdclk0" is used if not
+          specified.
+
+  clock-names:
+    minItems: 2
+    items:
+      - const: pll_ref
+      - const: pll_in
+      - const: sclk_audio
+      - const: sclk_pcm_in
+      - const: cdclk
+
+  "#clock-cells":
+    const: 1
+
+  power-domains:
+    maxItems: 1
+
+  reg:
+    maxItems: 1
+
+required:
+  - compatible
+  - clocks
+  - clock-names
+  - "#clock-cells"
+  - reg
+
+additionalProperties: false
+
+examples:
+  - |
+    clock-controller@3810000 {
+        compatible = "samsung,exynos5250-audss-clock";
+        reg = <0x03810000 0x0c>;
+        #clock-cells = <1>;
+        clocks = <&clock 1>, <&clock 7>, <&clock 138>, <&clock 160>, <&ext_i2s_clk>;
+        clock-names = "pll_ref", "pll_in", "sclk_audio", "sclk_pcm_in", "cdclk";
+    };
diff --git a/Documentation/devicetree/bindings/clock/samsung,exynos-clock.yaml b/Documentation/devicetree/bindings/clock/samsung,exynos-clock.yaml
new file mode 100644
index 000000000000..4e8062860986
--- /dev/null
+++ b/Documentation/devicetree/bindings/clock/samsung,exynos-clock.yaml
@@ -0,0 +1,59 @@
+# SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause)
+%YAML 1.2
+---
+$id: http://devicetree.org/schemas/clock/samsung,exynos-clock.yaml#
+$schema: http://devicetree.org/meta-schemas/core.yaml#
+
+title: Samsung Exynos SoC clock controller
+
+maintainers:
+  - Chanwoo Choi <cw00.choi@samsung.com>
+  - Krzysztof Kozlowski <krzysztof.kozlowski@canonical.com>
+  - Sylwester Nawrocki <s.nawrocki@samsung.com>
+  - Tomasz Figa <tomasz.figa@gmail.com>
+
+description: |
+  All available clocks are defined as preprocessor macros in
+  dt-bindings/clock/ headers.
+
+properties:
+  compatible:
+    oneOf:
+      - enum:
+          - samsung,exynos3250-cmu
+          - samsung,exynos3250-cmu-dmc
+          - samsung,exynos3250-cmu-isp
+          - samsung,exynos4210-clock
+          - samsung,exynos4412-clock
+          - samsung,exynos5250-clock
+      - items:
+          - enum:
+              - samsung,exynos5420-clock
+              - samsung,exynos5800-clock
+          - const: syscon
+
+  clocks:
+    minItems: 1
+    maxItems: 4
+
+  "#clock-cells":
+    const: 1
+
+  reg:
+    maxItems: 1
+
+required:
+  - compatible
+  - "#clock-cells"
+  - reg
+
+additionalProperties: false
+
+examples:
+  - |
+    #include <dt-bindings/clock/exynos5250.h>
+    clock: clock-controller@10010000 {
+        compatible = "samsung,exynos5250-clock";
+        reg = <0x10010000 0x30000>;
+        #clock-cells = <1>;
+    };
diff --git a/Documentation/devicetree/bindings/clock/samsung,exynos-ext-clock.yaml b/Documentation/devicetree/bindings/clock/samsung,exynos-ext-clock.yaml
new file mode 100644
index 000000000000..64d027dbe3b2
--- /dev/null
+++ b/Documentation/devicetree/bindings/clock/samsung,exynos-ext-clock.yaml
@@ -0,0 +1,46 @@
+# SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause)
+%YAML 1.2
+---
+$id: http://devicetree.org/schemas/clock/samsung,exynos-ext-clock.yaml#
+$schema: http://devicetree.org/meta-schemas/core.yaml#
+
+title: Samsung SoC external/osc/XXTI/XusbXTI clock
+
+maintainers:
+  - Chanwoo Choi <cw00.choi@samsung.com>
+  - Krzysztof Kozlowski <krzysztof.kozlowski@canonical.com>
+  - Sylwester Nawrocki <s.nawrocki@samsung.com>
+  - Tomasz Figa <tomasz.figa@gmail.com>
+
+description: |
+  Samsung SoCs require an external clock supplied through XXTI or XusbXTI pins.
+
+properties:
+  compatible:
+    enum:
+      - samsung,clock-xxti
+      - samsung,clock-xusbxti
+      - samsung,exynos5420-oscclk
+
+  "#clock-cells":
+    const: 0
+
+  clock-frequency: true
+
+  clock-output-names:
+    maxItems: 1
+
+required:
+  - compatible
+  - clock-frequency
+
+additionalProperties: false
+
+examples:
+  - |
+    fixed-rate-clocks {
+        clock {
+            compatible = "samsung,clock-xxti";
+            clock-frequency = <24000000>;
+        };
+    };
diff --git a/Documentation/devicetree/bindings/clock/samsung,exynos4412-isp-clock.yaml b/Documentation/devicetree/bindings/clock/samsung,exynos4412-isp-clock.yaml
new file mode 100644
index 000000000000..1ed64add4355
--- /dev/null
+++ b/Documentation/devicetree/bindings/clock/samsung,exynos4412-isp-clock.yaml
@@ -0,0 +1,64 @@
+# SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause)
+%YAML 1.2
+---
+$id: http://devicetree.org/schemas/clock/samsung,exynos4412-isp-clock.yaml#
+$schema: http://devicetree.org/meta-schemas/core.yaml#
+
+title: Samsung Exynos4412 SoC ISP clock controller
+
+maintainers:
+  - Chanwoo Choi <cw00.choi@samsung.com>
+  - Krzysztof Kozlowski <krzysztof.kozlowski@canonical.com>
+  - Sylwester Nawrocki <s.nawrocki@samsung.com>
+  - Tomasz Figa <tomasz.figa@gmail.com>
+
+description: |
+  Clock controller for Samsung Exynos4412 SoC FIMC-ISP (Camera ISP)
+  All available clocks are defined as preprocessor macros in
+  dt-bindings/clock/ headers.
+
+properties:
+  compatible:
+    const: samsung,exynos4412-isp-clock
+
+  clocks:
+    items:
+      - description: CLK_ACLK200 from the main clock controller
+      - description: CLK_ACLK400_MCUISP from the main clock controller
+
+  clock-names:
+    items:
+      - const: aclk200
+      - const: aclk400_mcuisp
+
+  "#clock-cells":
+    const: 1
+
+  power-domains:
+    maxItems: 1
+
+  reg:
+    maxItems: 1
+
+required:
+  - compatible
+  - "#clock-cells"
+  - clocks
+  - clock-names
+  - power-domains
+  - reg
+
+additionalProperties: false
+
+examples:
+  - |
+    #include <dt-bindings/clock/exynos4.h>
+    clock-controller@10048000 {
+        compatible = "samsung,exynos4412-isp-clock";
+        reg = <0x10048000 0x1000>;
+        #clock-cells = <1>;
+        power-domains = <&pd_isp>;
+        clocks = <&clock CLK_ACLK200>, <&clock CLK_ACLK400_MCUISP>;
+        clock-names = "aclk200", "aclk400_mcuisp";
+    };
+
diff --git a/Documentation/devicetree/bindings/clock/samsung,s5pv210-audss-clock.yaml b/Documentation/devicetree/bindings/clock/samsung,s5pv210-audss-clock.yaml
new file mode 100644
index 000000000000..ae8f8fc93233
--- /dev/null
+++ b/Documentation/devicetree/bindings/clock/samsung,s5pv210-audss-clock.yaml
@@ -0,0 +1,78 @@
+# SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause)
+%YAML 1.2
+---
+$id: http://devicetree.org/schemas/clock/samsung,s5pv210-audss-clock.yaml#
+$schema: http://devicetree.org/meta-schemas/core.yaml#
+
+title: Samsung S5Pv210 SoC Audio SubSystem clock controller
+
+maintainers:
+  - Chanwoo Choi <cw00.choi@samsung.com>
+  - Krzysztof Kozlowski <krzysztof.kozlowski@canonical.com>
+  - Sylwester Nawrocki <s.nawrocki@samsung.com>
+  - Tomasz Figa <tomasz.figa@gmail.com>
+
+description: |
+  All available clocks are defined as preprocessor macros in
+  include/dt-bindings/clock/s5pv210-audss.h header.
+
+properties:
+  compatible:
+    const: samsung,s5pv210-audss-clock
+
+  clocks:
+    minItems: 4
+    items:
+      - description:
+          AHB bus clock of the Audio Subsystem.
+      - description:
+          Optional fixed rate PLL reference clock, parent of mout_audss. If not
+          specified (i.e. xusbxti is used for PLL reference), it is fixed to a
+          clock named "xxti".
+      - description:
+          Input PLL to the AudioSS block, parent of mout_audss.
+      - description:
+          Audio bus clock, parent of mout_i2s.
+      - description:
+          Optional external i2s clock, parent of mout_i2s. If not specified, it
+          is fixed to a clock named "iiscdclk0".
+
+  clock-names:
+    minItems: 4
+    items:
+      - const: hclk
+      - const: xxti
+      - const: fout_epll
+      - const: sclk_audio0
+      - const: iiscdclk0
+
+  "#clock-cells":
+    const: 1
+
+  power-domains:
+    maxItems: 1
+
+  reg:
+    maxItems: 1
+
+required:
+  - compatible
+  - clocks
+  - clock-names
+  - "#clock-cells"
+  - reg
+
+additionalProperties: false
+
+examples:
+  - |
+    #include <dt-bindings/clock/s5pv210.h>
+
+    clock-controller@c0900000 {
+        compatible = "samsung,s5pv210-audss-clock";
+        reg = <0xc0900000 0x1000>;
+        #clock-cells = <1>;
+        clock-names = "hclk", "xxti", "fout_epll", "sclk_audio0";
+        clocks = <&clocks DOUT_HCLKP>, <&xxti>, <&clocks FOUT_EPLL>,
+                 <&clocks SCLK_AUDIO0>;
+    };
diff --git a/Documentation/devicetree/bindings/cpufreq/cpufreq-dt.txt b/Documentation/devicetree/bindings/cpufreq/cpufreq-dt.txt
index 56f442374383..1d7e49167666 100644
--- a/Documentation/devicetree/bindings/cpufreq/cpufreq-dt.txt
+++ b/Documentation/devicetree/bindings/cpufreq/cpufreq-dt.txt
@@ -11,7 +11,7 @@ Required properties:
 - None
 
 Optional properties:
-- operating-points: Refer to Documentation/devicetree/bindings/opp/opp.txt for
+- operating-points: Refer to Documentation/devicetree/bindings/opp/opp-v1.yaml for
   details. OPPs *must* be supplied either via DT, i.e. this property, or
   populated at runtime.
 - clock-latency: Specify the possible maximum transition latency for clock,
diff --git a/Documentation/devicetree/bindings/cpufreq/cpufreq-mediatek-hw.yaml b/Documentation/devicetree/bindings/cpufreq/cpufreq-mediatek-hw.yaml
new file mode 100644
index 000000000000..9cd42a64b13e
--- /dev/null
+++ b/Documentation/devicetree/bindings/cpufreq/cpufreq-mediatek-hw.yaml
@@ -0,0 +1,70 @@
+# SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause)
+%YAML 1.2
+---
+$id: http://devicetree.org/schemas/cpufreq/cpufreq-mediatek-hw.yaml#
+$schema: http://devicetree.org/meta-schemas/core.yaml#
+
+title: MediaTek's CPUFREQ Bindings
+
+maintainers:
+  - Hector Yuan <hector.yuan@mediatek.com>
+
+description:
+  CPUFREQ HW is a hardware engine used by MediaTek SoCs to
+  manage frequency in hardware. It is capable of controlling
+  frequency for multiple clusters.
+
+properties:
+  compatible:
+    const: mediatek,cpufreq-hw
+
+  reg:
+    minItems: 1
+    maxItems: 2
+    description:
+      Addresses and sizes for the memory of the HW bases in
+      each frequency domain. Each entry corresponds to
+      a register bank for each frequency domain present.
+
+  "#performance-domain-cells":
+    description:
+      Number of cells in a performance domain specifier.
+      Set const to 1 here for nodes providing multiple
+      performance domains.
+    const: 1
+
+required:
+  - compatible
+  - reg
+  - "#performance-domain-cells"
+
+additionalProperties: false
+
+examples:
+  - |
+    cpus {
+            #address-cells = <1>;
+            #size-cells = <0>;
+
+            cpu0: cpu@0 {
+                device_type = "cpu";
+                compatible = "arm,cortex-a55";
+                enable-method = "psci";
+                performance-domains = <&performance 0>;
+                reg = <0x000>;
+            };
+    };
+
+    /* ... */
+
+    soc {
+        #address-cells = <2>;
+        #size-cells = <2>;
+
+        performance: performance-controller@11bc00 {
+            compatible = "mediatek,cpufreq-hw";
+            reg = <0 0x0011bc10 0 0x120>, <0 0x0011bd30 0 0x120>;
+
+            #performance-domain-cells = <1>;
+        };
+    };
diff --git a/Documentation/devicetree/bindings/cpufreq/cpufreq-mediatek.txt b/Documentation/devicetree/bindings/cpufreq/cpufreq-mediatek.txt
index ef68711716fb..b8233ec91d3d 100644
--- a/Documentation/devicetree/bindings/cpufreq/cpufreq-mediatek.txt
+++ b/Documentation/devicetree/bindings/cpufreq/cpufreq-mediatek.txt
@@ -10,7 +10,7 @@ Required properties:
 			  transition and not stable yet.
 	Please refer to Documentation/devicetree/bindings/clock/clock-bindings.txt for
 	generic clock consumer properties.
-- operating-points-v2: Please refer to Documentation/devicetree/bindings/opp/opp.txt
+- operating-points-v2: Please refer to Documentation/devicetree/bindings/opp/opp-v2.yaml
 	for detail.
 - proc-supply: Regulator for Vproc of CPU cluster.
 
diff --git a/Documentation/devicetree/bindings/cpufreq/cpufreq-st.txt b/Documentation/devicetree/bindings/cpufreq/cpufreq-st.txt
index d91a02a3b6b0..6b0b452acef0 100644
--- a/Documentation/devicetree/bindings/cpufreq/cpufreq-st.txt
+++ b/Documentation/devicetree/bindings/cpufreq/cpufreq-st.txt
@@ -6,8 +6,6 @@ from the SoC, then supplies the OPP framework with 'prop' and 'supported
 hardware' information respectively.  The framework is then able to read
 the DT and operate in the usual way.
 
-For more information about the expected DT format [See: ../opp/opp.txt].
-
 Frequency Scaling only
 ----------------------
 
@@ -15,7 +13,7 @@ No vendor specific driver required for this.
 
 Located in CPU's node:
 
-- operating-points		: [See: ../power/opp.txt]
+- operating-points		: [See: ../power/opp-v1.yaml]
 
 Example [safe]
 --------------
@@ -37,7 +35,7 @@ This requires the ST CPUFreq driver to supply 'process' and 'version' info.
 
 Located in CPU's node:
 
-- operating-points-v2		: [See ../power/opp.txt]
+- operating-points-v2		: [See ../power/opp-v2.yaml]
 
 Example [unsafe]
 ----------------
diff --git a/Documentation/devicetree/bindings/cpufreq/nvidia,tegra20-cpufreq.txt b/Documentation/devicetree/bindings/cpufreq/nvidia,tegra20-cpufreq.txt
index 52a24b82fd86..bdbfd7c36101 100644
--- a/Documentation/devicetree/bindings/cpufreq/nvidia,tegra20-cpufreq.txt
+++ b/Documentation/devicetree/bindings/cpufreq/nvidia,tegra20-cpufreq.txt
@@ -4,7 +4,7 @@ Binding for NVIDIA Tegra20 CPUFreq
 Required properties:
 - clocks: Must contain an entry for the CPU clock.
   See ../clocks/clock-bindings.txt for details.
-- operating-points-v2: See ../bindings/opp/opp.txt for details.
+- operating-points-v2: See ../bindings/opp/opp-v2.yaml for details.
 - #cooling-cells: Should be 2. See ../thermal/thermal-cooling-devices.yaml for details.
 
 For each opp entry in 'operating-points-v2' table:
diff --git a/Documentation/devicetree/bindings/devfreq/rk3399_dmc.txt b/Documentation/devicetree/bindings/devfreq/rk3399_dmc.txt
index ac189dd82b08..3fbeb3733c48 100644
--- a/Documentation/devicetree/bindings/devfreq/rk3399_dmc.txt
+++ b/Documentation/devicetree/bindings/devfreq/rk3399_dmc.txt
@@ -8,7 +8,7 @@ Required properties:
 - clocks:		 Phandles for clock specified in "clock-names" property
 - clock-names :		 The name of clock used by the DFI, must be
 			 "pclk_ddr_mon";
-- operating-points-v2:	 Refer to Documentation/devicetree/bindings/opp/opp.txt
+- operating-points-v2:	 Refer to Documentation/devicetree/bindings/opp/opp-v2.yaml
 			 for details.
 - center-supply:	 DMC supply node.
 - status:		 Marks the node enabled/disabled.
diff --git a/Documentation/devicetree/bindings/display/msm/dsi-phy-7nm.yaml b/Documentation/devicetree/bindings/display/msm/dsi-phy-7nm.yaml
index 4265399bb154..c851770bbdf2 100644
--- a/Documentation/devicetree/bindings/display/msm/dsi-phy-7nm.yaml
+++ b/Documentation/devicetree/bindings/display/msm/dsi-phy-7nm.yaml
@@ -14,10 +14,10 @@ allOf:
 
 properties:
   compatible:
-    oneOf:
-      - const: qcom,dsi-phy-7nm
-      - const: qcom,dsi-phy-7nm-8150
-      - const: qcom,sc7280-dsi-phy-7nm
+    enum:
+      - qcom,dsi-phy-7nm
+      - qcom,dsi-phy-7nm-8150
+      - qcom,sc7280-dsi-phy-7nm
 
   reg:
     items:
diff --git a/Documentation/devicetree/bindings/dma/altr,msgdma.yaml b/Documentation/devicetree/bindings/dma/altr,msgdma.yaml
index a4f9fe23dcd9..b193ee2db4a7 100644
--- a/Documentation/devicetree/bindings/dma/altr,msgdma.yaml
+++ b/Documentation/devicetree/bindings/dma/altr,msgdma.yaml
@@ -24,13 +24,15 @@ properties:
     items:
       - description: Control and Status Register Slave Port
       - description: Descriptor Slave Port
-      - description: Response Slave Port
+      - description: Response Slave Port (Optional)
+    minItems: 2
 
   reg-names:
     items:
       - const: csr
       - const: desc
       - const: resp
+    minItems: 2
 
   interrupts:
     maxItems: 1
diff --git a/Documentation/devicetree/bindings/dma/renesas,rz-dmac.yaml b/Documentation/devicetree/bindings/dma/renesas,rz-dmac.yaml
new file mode 100644
index 000000000000..7a4f415d74dc
--- /dev/null
+++ b/Documentation/devicetree/bindings/dma/renesas,rz-dmac.yaml
@@ -0,0 +1,130 @@
+# SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause)
+%YAML 1.2
+---
+$id: http://devicetree.org/schemas/dma/renesas,rz-dmac.yaml#
+$schema: http://devicetree.org/meta-schemas/core.yaml#
+
+title: Renesas RZ/G2L DMA Controller
+
+maintainers:
+  - Biju Das <biju.das.jz@bp.renesas.com>
+
+allOf:
+  - $ref: "dma-controller.yaml#"
+
+properties:
+  compatible:
+    items:
+      - enum:
+          - renesas,r9a07g044-dmac # RZ/G2{L,LC}
+      - const: renesas,rz-dmac
+
+  reg:
+    items:
+      - description: Control and channel register block
+      - description: DMA extended resource selector block
+
+  interrupts:
+    maxItems: 17
+
+  interrupt-names:
+    items:
+      - const: error
+      - const: ch0
+      - const: ch1
+      - const: ch2
+      - const: ch3
+      - const: ch4
+      - const: ch5
+      - const: ch6
+      - const: ch7
+      - const: ch8
+      - const: ch9
+      - const: ch10
+      - const: ch11
+      - const: ch12
+      - const: ch13
+      - const: ch14
+      - const: ch15
+
+  clocks:
+    items:
+      - description: DMA main clock
+      - description: DMA register access clock
+
+  '#dma-cells':
+    const: 1
+    description:
+      The cell specifies the encoded MID/RID values of the DMAC port
+      connected to the DMA client and the slave channel configuration
+      parameters.
+      bits[0:9] - Specifies MID/RID value
+      bit[10] - Specifies DMA request high enable (HIEN)
+      bit[11] - Specifies DMA request detection type (LVL)
+      bits[12:14] - Specifies DMAACK output mode (AM)
+      bit[15] - Specifies Transfer Mode (TM)
+
+  dma-channels:
+    const: 16
+
+  power-domains:
+    maxItems: 1
+
+  resets:
+    items:
+      - description: Reset for DMA ARESETN reset terminal
+      - description: Reset for DMA RST_ASYNC reset terminal
+
+required:
+  - compatible
+  - reg
+  - interrupts
+  - interrupt-names
+  - clocks
+  - '#dma-cells'
+  - dma-channels
+  - power-domains
+  - resets
+
+additionalProperties: false
+
+examples:
+  - |
+    #include <dt-bindings/interrupt-controller/arm-gic.h>
+    #include <dt-bindings/clock/r9a07g044-cpg.h>
+
+    dmac: dma-controller@11820000 {
+        compatible = "renesas,r9a07g044-dmac",
+                     "renesas,rz-dmac";
+        reg = <0x11820000 0x10000>,
+              <0x11830000 0x10000>;
+        interrupts = <GIC_SPI 141 IRQ_TYPE_EDGE_RISING>,
+                     <GIC_SPI 125 IRQ_TYPE_EDGE_RISING>,
+                     <GIC_SPI 126 IRQ_TYPE_EDGE_RISING>,
+                     <GIC_SPI 127 IRQ_TYPE_EDGE_RISING>,
+                     <GIC_SPI 128 IRQ_TYPE_EDGE_RISING>,
+                     <GIC_SPI 129 IRQ_TYPE_EDGE_RISING>,
+                     <GIC_SPI 130 IRQ_TYPE_EDGE_RISING>,
+                     <GIC_SPI 131 IRQ_TYPE_EDGE_RISING>,
+                     <GIC_SPI 132 IRQ_TYPE_EDGE_RISING>,
+                     <GIC_SPI 133 IRQ_TYPE_EDGE_RISING>,
+                     <GIC_SPI 134 IRQ_TYPE_EDGE_RISING>,
+                     <GIC_SPI 135 IRQ_TYPE_EDGE_RISING>,
+                     <GIC_SPI 136 IRQ_TYPE_EDGE_RISING>,
+                     <GIC_SPI 137 IRQ_TYPE_EDGE_RISING>,
+                     <GIC_SPI 138 IRQ_TYPE_EDGE_RISING>,
+                     <GIC_SPI 139 IRQ_TYPE_EDGE_RISING>,
+                     <GIC_SPI 140 IRQ_TYPE_EDGE_RISING>;
+        interrupt-names = "error",
+                          "ch0", "ch1", "ch2", "ch3",
+                          "ch4", "ch5", "ch6", "ch7",
+                          "ch8", "ch9", "ch10", "ch11",
+                          "ch12", "ch13", "ch14", "ch15";
+        clocks = <&cpg CPG_MOD R9A07G044_DMAC_ACLK>,
+                 <&cpg CPG_MOD R9A07G044_DMAC_PCLK>;
+        power-domains = <&cpg>;
+        resets = <&cpg R9A07G044_DMAC_ARESETN>,
+                 <&cpg R9A07G044_DMAC_RST_ASYNC>;
+        #dma-cells = <1>;
+        dma-channels = <16>;
+    };
diff --git a/Documentation/devicetree/bindings/dma/st,stm32-dma.yaml b/Documentation/devicetree/bindings/dma/st,stm32-dma.yaml
index 2a5325f480f6..4bf676fd25dc 100644
--- a/Documentation/devicetree/bindings/dma/st,stm32-dma.yaml
+++ b/Documentation/devicetree/bindings/dma/st,stm32-dma.yaml
@@ -40,6 +40,13 @@ description: |
          0x0: FIFO mode with threshold selectable with bit 0-1
          0x1: Direct mode: each DMA request immediately initiates a transfer
               from/to the memory, FIFO is bypassed.
+       -bit 4: alternative DMA request/acknowledge protocol
+         0x0: Use standard DMA ACK management, where ACK signal is maintained
+              up to the removal of request and transfer completion
+         0x1: Use alternative DMA ACK management, where ACK de-assertion does
+              not wait for the de-assertion of the REQuest, ACK is only managed
+              by transfer completion. This must only be used on channels
+              managing transfers for STM32 USART/UART.
 
 
 maintainers:
diff --git a/Documentation/devicetree/bindings/gpio/gpio-virtio.yaml b/Documentation/devicetree/bindings/gpio/gpio-virtio.yaml
new file mode 100644
index 000000000000..601d85754577
--- /dev/null
+++ b/Documentation/devicetree/bindings/gpio/gpio-virtio.yaml
@@ -0,0 +1,59 @@
+# SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause)
+%YAML 1.2
+---
+$id: http://devicetree.org/schemas/gpio/gpio-virtio.yaml#
+$schema: http://devicetree.org/meta-schemas/core.yaml#
+
+title: Virtio GPIO controller
+
+maintainers:
+  - Viresh Kumar <viresh.kumar@linaro.org>
+
+allOf:
+  - $ref: /schemas/virtio/virtio-device.yaml#
+
+description:
+  Virtio GPIO controller, see /schemas/virtio/virtio-device.yaml for more
+  details.
+
+properties:
+  $nodename:
+    const: gpio
+
+  compatible:
+    const: virtio,device29
+
+  gpio-controller: true
+
+  "#gpio-cells":
+    const: 2
+
+  interrupt-controller: true
+
+  "#interrupt-cells":
+    const: 2
+
+required:
+  - compatible
+  - gpio-controller
+  - "#gpio-cells"
+
+unevaluatedProperties: false
+
+examples:
+  - |
+    virtio@3000 {
+        compatible = "virtio,mmio";
+        reg = <0x3000 0x100>;
+        interrupts = <41>;
+
+        gpio {
+            compatible = "virtio,device29";
+            gpio-controller;
+            #gpio-cells = <2>;
+            interrupt-controller;
+            #interrupt-cells = <2>;
+        };
+    };
+
+...
diff --git a/Documentation/devicetree/bindings/gpu/arm,mali-bifrost.yaml b/Documentation/devicetree/bindings/gpu/arm,mali-bifrost.yaml
index c5f6092a2855..6f98dd55fb4c 100644
--- a/Documentation/devicetree/bindings/gpu/arm,mali-bifrost.yaml
+++ b/Documentation/devicetree/bindings/gpu/arm,mali-bifrost.yaml
@@ -137,7 +137,7 @@ examples:
       resets = <&reset 0>, <&reset 1>;
     };
 
-    gpu_opp_table: opp_table0 {
+    gpu_opp_table: opp-table {
       compatible = "operating-points-v2";
 
       opp-533000000 {
diff --git a/Documentation/devicetree/bindings/gpu/arm,mali-midgard.yaml b/Documentation/devicetree/bindings/gpu/arm,mali-midgard.yaml
index 696c17aedbbe..d209f272625d 100644
--- a/Documentation/devicetree/bindings/gpu/arm,mali-midgard.yaml
+++ b/Documentation/devicetree/bindings/gpu/arm,mali-midgard.yaml
@@ -160,7 +160,7 @@ examples:
       #cooling-cells = <2>;
     };
 
-    gpu_opp_table: opp_table0 {
+    gpu_opp_table: opp-table {
       compatible = "operating-points-v2";
 
       opp-533000000 {
diff --git a/Documentation/devicetree/bindings/i2c/i2c-virtio.yaml b/Documentation/devicetree/bindings/i2c/i2c-virtio.yaml
new file mode 100644
index 000000000000..7d87ed855301
--- /dev/null
+++ b/Documentation/devicetree/bindings/i2c/i2c-virtio.yaml
@@ -0,0 +1,51 @@
+# SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause)
+%YAML 1.2
+---
+$id: http://devicetree.org/schemas/i2c/i2c-virtio.yaml#
+$schema: http://devicetree.org/meta-schemas/core.yaml#
+
+title: Virtio I2C Adapter
+
+maintainers:
+  - Viresh Kumar <viresh.kumar@linaro.org>
+
+allOf:
+  - $ref: /schemas/i2c/i2c-controller.yaml#
+  - $ref: /schemas/virtio/virtio-device.yaml#
+
+description:
+  Virtio I2C device, see /schemas/virtio/virtio-device.yaml for more details.
+
+properties:
+  $nodename:
+    const: i2c
+
+  compatible:
+    const: virtio,device22
+
+required:
+  - compatible
+
+unevaluatedProperties: false
+
+examples:
+  - |
+    virtio@3000 {
+        compatible = "virtio,mmio";
+        reg = <0x3000 0x100>;
+        interrupts = <41>;
+
+        i2c {
+            compatible = "virtio,device22";
+
+            #address-cells = <1>;
+            #size-cells = <0>;
+
+            light-sensor@20 {
+                compatible = "dynaimage,al3320a";
+                reg = <0x20>;
+            };
+        };
+    };
+
+...
diff --git a/Documentation/devicetree/bindings/input/allwinner,sun4i-a10-lradc-keys.yaml b/Documentation/devicetree/bindings/input/allwinner,sun4i-a10-lradc-keys.yaml
index cffd02028d02..d74f2002409e 100644
--- a/Documentation/devicetree/bindings/input/allwinner,sun4i-a10-lradc-keys.yaml
+++ b/Documentation/devicetree/bindings/input/allwinner,sun4i-a10-lradc-keys.yaml
@@ -29,6 +29,8 @@ properties:
     description:
       Regulator for the LRADC reference voltage
 
+  wakeup-source: true
+
 patternProperties:
   "^button-[0-9]+$":
     type: object
diff --git a/Documentation/devicetree/bindings/input/qcom,pm8941-pwrkey.txt b/Documentation/devicetree/bindings/input/qcom,pm8941-pwrkey.txt
deleted file mode 100644
index 6cd08bca2c66..000000000000
--- a/Documentation/devicetree/bindings/input/qcom,pm8941-pwrkey.txt
+++ /dev/null
@@ -1,55 +0,0 @@
-Qualcomm PM8941 PMIC Power Key
-
-PROPERTIES
-
-- compatible:
-	Usage: required
-	Value type: <string>
-	Definition: must be one of:
-		    "qcom,pm8941-pwrkey"
-		    "qcom,pm8941-resin"
-		    "qcom,pmk8350-pwrkey"
-		    "qcom,pmk8350-resin"
-
-- reg:
-	Usage: required
-	Value type: <prop-encoded-array>
-	Definition: base address of registers for block
-
-- interrupts:
-	Usage: required
-	Value type: <prop-encoded-array>
-	Definition: key change interrupt; The format of the specifier is
-		    defined by the binding document describing the node's
-		    interrupt parent.
-
-- debounce:
-	Usage: optional
-	Value type: <u32>
-	Definition: time in microseconds that key must be pressed or released
-		    for state change interrupt to trigger.
-
-- bias-pull-up:
-	Usage: optional
-	Value type: <empty>
-	Definition: presence of this property indicates that the KPDPWR_N pin
-		    should be configured for pull up.
-
-- linux,code:
-	Usage: optional
-	Value type: <u32>
-	Definition: The input key-code associated with the power key.
-		    Use the linux event codes defined in
-		    include/dt-bindings/input/linux-event-codes.h
-		    When property is omitted KEY_POWER is assumed.
-
-EXAMPLE
-
-	pwrkey@800 {
-		compatible = "qcom,pm8941-pwrkey";
-		reg = <0x800>;
-		interrupts = <0x0 0x8 0 IRQ_TYPE_EDGE_BOTH>;
-		debounce = <15625>;
-		bias-pull-up;
-		linux,code = <KEY_POWER>;
-	};
diff --git a/Documentation/devicetree/bindings/input/qcom,pm8941-pwrkey.yaml b/Documentation/devicetree/bindings/input/qcom,pm8941-pwrkey.yaml
new file mode 100644
index 000000000000..62314a5fdce5
--- /dev/null
+++ b/Documentation/devicetree/bindings/input/qcom,pm8941-pwrkey.yaml
@@ -0,0 +1,51 @@
+# SPDX-License-Identifier: (GPL-2.0 OR BSD-2-Clause)
+%YAML 1.2
+---
+$id: http://devicetree.org/schemas/input/qcom,pm8941-pwrkey.yaml#
+$schema: http://devicetree.org/meta-schemas/core.yaml#
+
+title: Qualcomm PM8941 PMIC Power Key
+
+maintainers:
+  - Courtney Cavin <courtney.cavin@sonymobile.com>
+  - Vinod Koul <vkoul@kernel.org>
+
+allOf:
+  - $ref: input.yaml#
+
+properties:
+  compatible:
+    enum:
+      - qcom,pm8941-pwrkey
+      - qcom,pm8941-resin
+      - qcom,pmk8350-pwrkey
+      - qcom,pmk8350-resin
+
+  interrupts:
+    maxItems: 1
+
+  debounce:
+    description: |
+          Time in microseconds that key must be pressed or
+          released for state change interrupt to trigger.
+    $ref: /schemas/types.yaml#/definitions/uint32
+
+  bias-pull-up:
+    description: |
+           Presence of this property indicates that the KPDPWR_N
+           pin should be configured for pull up.
+    $ref: /schemas/types.yaml#/definitions/flag
+
+  linux,code:
+    description: |
+           The input key-code associated with the power key.
+           Use the linux event codes defined in
+           include/dt-bindings/input/linux-event-codes.h
+           When property is omitted KEY_POWER is assumed.
+
+required:
+  - compatible
+  - interrupts
+
+unevaluatedProperties: false
+...
diff --git a/Documentation/devicetree/bindings/input/regulator-haptic.txt b/Documentation/devicetree/bindings/input/regulator-haptic.txt
deleted file mode 100644
index 3ed1c7eb2f97..000000000000
--- a/Documentation/devicetree/bindings/input/regulator-haptic.txt
+++ /dev/null
@@ -1,21 +0,0 @@
-* Regulator Haptic Device Tree Bindings
-
-Required Properties:
- - compatible : Should be "regulator-haptic"
- - haptic-supply : Power supply to the haptic motor.
-	[*] refer Documentation/devicetree/bindings/regulator/regulator.txt
-
- - max-microvolt : The maximum voltage value supplied to the haptic motor.
-		[The unit of the voltage is a micro]
-
- - min-microvolt : The minimum voltage value supplied to the haptic motor.
-		[The unit of the voltage is a micro]
-
-Example:
-
-	haptics {
-		compatible = "regulator-haptic";
-		haptic-supply = <&motor_regulator>;
-		max-microvolt = <2700000>;
-		min-microvolt = <1100000>;
-	};
diff --git a/Documentation/devicetree/bindings/input/regulator-haptic.yaml b/Documentation/devicetree/bindings/input/regulator-haptic.yaml
new file mode 100644
index 000000000000..b1ae72f9cd2d
--- /dev/null
+++ b/Documentation/devicetree/bindings/input/regulator-haptic.yaml
@@ -0,0 +1,43 @@
+# SPDX-License-Identifier: GPL-2.0
+%YAML 1.2
+---
+$id: "http://devicetree.org/schemas/input/regulator-haptic.yaml#"
+$schema: "http://devicetree.org/meta-schemas/core.yaml#"
+
+title: Regulator Haptic Device Tree Bindings
+
+maintainers:
+  - Jaewon Kim <jaewon02.kim@samsung.com>
+
+properties:
+  compatible:
+    const: regulator-haptic
+
+  haptic-supply:
+    description: >
+      Power supply to the haptic motor
+
+  max-microvolt:
+    description: >
+      The maximum voltage value supplied to the haptic motor
+
+  min-microvolt:
+    description: >
+      The minimum voltage value supplied to the haptic motor
+
+required:
+  - compatible
+  - haptic-supply
+  - max-microvolt
+  - min-microvolt
+
+additionalProperties: false
+
+examples:
+  - |
+    haptics {
+        compatible = "regulator-haptic";
+        haptic-supply = <&motor_regulator>;
+        max-microvolt = <2700000>;
+        min-microvolt = <1100000>;
+    };
diff --git a/Documentation/devicetree/bindings/input/touchscreen/chipone,icn8318.yaml b/Documentation/devicetree/bindings/input/touchscreen/chipone,icn8318.yaml
new file mode 100644
index 000000000000..9df685bdc5db
--- /dev/null
+++ b/Documentation/devicetree/bindings/input/touchscreen/chipone,icn8318.yaml
@@ -0,0 +1,62 @@
+# SPDX-License-Identifier: GPL-2.0
+%YAML 1.2
+---
+$id: http://devicetree.org/schemas/input/touchscreen/chipone,icn8318.yaml#
+$schema: http://devicetree.org/meta-schemas/core.yaml#
+
+title: ChipOne ICN8318 Touchscreen Controller Device Tree Bindings
+
+maintainers:
+  - Dmitry Torokhov <dmitry.torokhov@gmail.com>
+
+allOf:
+  - $ref: touchscreen.yaml#
+
+properties:
+  compatible:
+    const: chipone,icn8318
+
+  reg:
+    maxItems: 1
+
+  interrupts:
+    maxItems: 1
+
+  wake-gpios:
+    maxItems: 1
+
+unevaluatedProperties: false
+
+required:
+  - compatible
+  - reg
+  - interrupts
+  - wake-gpios
+  - touchscreen-size-x
+  - touchscreen-size-y
+
+examples:
+  - |
+    #include <dt-bindings/gpio/gpio.h>
+    #include <dt-bindings/interrupt-controller/arm-gic.h>
+
+    i2c {
+        #address-cells = <1>;
+        #size-cells = <0>;
+
+        touchscreen@40 {
+            compatible = "chipone,icn8318";
+            reg = <0x40>;
+            interrupt-parent = <&pio>;
+            interrupts = <9 IRQ_TYPE_EDGE_FALLING>; /* EINT9 (PG9) */
+            pinctrl-names = "default";
+            pinctrl-0 = <&ts_wake_pin_p66>;
+            wake-gpios = <&pio 1 3 GPIO_ACTIVE_HIGH>; /* PB3 */
+            touchscreen-size-x = <800>;
+            touchscreen-size-y = <480>;
+            touchscreen-inverted-x;
+            touchscreen-swapped-x-y;
+        };
+    };
+
+...
diff --git a/Documentation/devicetree/bindings/input/touchscreen/chipone_icn8318.txt b/Documentation/devicetree/bindings/input/touchscreen/chipone_icn8318.txt
deleted file mode 100644
index 38b0603f65f3..000000000000
--- a/Documentation/devicetree/bindings/input/touchscreen/chipone_icn8318.txt
+++ /dev/null
@@ -1,44 +0,0 @@
-* ChipOne icn8318 I2C touchscreen controller
-
-Required properties:
- - compatible		  : "chipone,icn8318"
- - reg			  : I2C slave address of the chip (0x40)
- - interrupts		  : interrupt specification for the icn8318 interrupt
- - wake-gpios		  : GPIO specification for the WAKE input
- - touchscreen-size-x	  : horizontal resolution of touchscreen (in pixels)
- - touchscreen-size-y	  : vertical resolution of touchscreen (in pixels)
-
-Optional properties:
- - pinctrl-names	  : should be "default"
- - pinctrl-0:		  : a phandle pointing to the pin settings for the
-			    control gpios
- - touchscreen-fuzz-x	  : horizontal noise value of the absolute input
-			    device (in pixels)
- - touchscreen-fuzz-y	  : vertical noise value of the absolute input
-			    device (in pixels)
- - touchscreen-inverted-x : X axis is inverted (boolean)
- - touchscreen-inverted-y : Y axis is inverted (boolean)
- - touchscreen-swapped-x-y	  : X and Y axis are swapped (boolean)
-			    Swapping is done after inverting the axis
-
-Example:
-
-i2c@00000000 {
-	/* ... */
-
-	chipone_icn8318@40 {
-		compatible = "chipone,icn8318";
-		reg = <0x40>;
-		interrupt-parent = <&pio>;
-		interrupts = <9 IRQ_TYPE_EDGE_FALLING>; /* EINT9 (PG9) */
-		pinctrl-names = "default";
-		pinctrl-0 = <&ts_wake_pin_p66>;
-		wake-gpios = <&pio 1 3 GPIO_ACTIVE_HIGH>; /* PB3 */
-		touchscreen-size-x = <800>;
-		touchscreen-size-y = <480>;
-		touchscreen-inverted-x;
-		touchscreen-swapped-x-y;
-	};
-
-	/* ... */
-};
diff --git a/Documentation/devicetree/bindings/input/touchscreen/pixcir,pixcir_ts.yaml b/Documentation/devicetree/bindings/input/touchscreen/pixcir,pixcir_ts.yaml
new file mode 100644
index 000000000000..f9998edbff70
--- /dev/null
+++ b/Documentation/devicetree/bindings/input/touchscreen/pixcir,pixcir_ts.yaml
@@ -0,0 +1,68 @@
+# SPDX-License-Identifier: GPL-2.0
+%YAML 1.2
+---
+$id: http://devicetree.org/schemas/input/touchscreen/pixcir,pixcir_ts.yaml#
+$schema: http://devicetree.org/meta-schemas/core.yaml#
+
+title: Pixcir Touchscreen Controller Device Tree Bindings
+
+maintainers:
+  - Dmitry Torokhov <dmitry.torokhov@gmail.com>
+
+allOf:
+  - $ref: touchscreen.yaml#
+
+properties:
+  compatible:
+    enum:
+      - pixcir,pixcir_ts
+      - pixcir,pixcir_tangoc
+
+  reg:
+    maxItems: 1
+
+  interrupts:
+    maxItems: 1
+
+  attb-gpio:
+    maxItems: 1
+
+  reset-gpios:
+    maxItems: 1
+
+  enable-gpios:
+    maxItems: 1
+
+  wake-gpios:
+    maxItems: 1
+
+unevaluatedProperties: false
+
+required:
+  - compatible
+  - reg
+  - interrupts
+  - attb-gpio
+  - touchscreen-size-x
+  - touchscreen-size-y
+
+examples:
+  - |
+    #include <dt-bindings/gpio/gpio.h>
+    #include <dt-bindings/interrupt-controller/arm-gic.h>
+
+    i2c {
+        #address-cells = <1>;
+        #size-cells = <0>;
+
+        touchscreen@5c {
+            compatible = "pixcir,pixcir_ts";
+            reg = <0x5c>;
+            interrupts = <2 0>;
+            attb-gpio = <&gpf 2 0 2>;
+            touchscreen-size-x = <800>;
+            touchscreen-size-y = <600>;
+        };
+    };
+
+...
diff --git a/Documentation/devicetree/bindings/input/touchscreen/pixcir_i2c_ts.txt b/Documentation/devicetree/bindings/input/touchscreen/pixcir_i2c_ts.txt
deleted file mode 100644
index 697a3e7831e7..000000000000
--- a/Documentation/devicetree/bindings/input/touchscreen/pixcir_i2c_ts.txt
+++ /dev/null
@@ -1,31 +0,0 @@
-* Pixcir I2C touchscreen controllers
-
-Required properties:
-- compatible: must be "pixcir,pixcir_ts" or "pixcir,pixcir_tangoc"
-- reg: I2C address of the chip
-- interrupts: interrupt to which the chip is connected
-- attb-gpio: GPIO connected to the ATTB line of the chip
-- touchscreen-size-x: horizontal resolution of touchscreen (in pixels)
-- touchscreen-size-y: vertical resolution of touchscreen (in pixels)
-
-Optional properties:
-- reset-gpios: GPIO connected to the RESET line of the chip
-- enable-gpios: GPIO connected to the ENABLE line of the chip
-- wake-gpios: GPIO connected to the WAKE line of the chip
-
-Example:
-
-	i2c@00000000 {
-		/* ... */
-
-		pixcir_ts@5c {
-			compatible = "pixcir,pixcir_ts";
-			reg = <0x5c>;
-			interrupts = <2 0>;
-			attb-gpio = <&gpf 2 0 2>;
-			touchscreen-size-x = <800>;
-			touchscreen-size-y = <600>;
-		};
-
-		/* ... */
-	};
diff --git a/Documentation/devicetree/bindings/input/touchscreen/ti,tsc2005.yaml b/Documentation/devicetree/bindings/input/touchscreen/ti,tsc2005.yaml
new file mode 100644
index 000000000000..938aab016cc2
--- /dev/null
+++ b/Documentation/devicetree/bindings/input/touchscreen/ti,tsc2005.yaml
@@ -0,0 +1,128 @@
+# SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause)
+%YAML 1.2
+---
+$id: http://devicetree.org/schemas/input/touchscreen/ti,tsc2005.yaml#
+$schema: http://devicetree.org/meta-schemas/core.yaml#
+
+title: Texas Instruments TSC2004 and TSC2005 touchscreen controller bindings
+
+maintainers:
+  - Marek Vasut <marex@denx.de>
+  - Michael Welling <mwelling@ieee.org>
+
+properties:
+  $nodename:
+    pattern: "^touchscreen(@.*)?$"
+
+  compatible:
+    enum:
+      - ti,tsc2004
+      - ti,tsc2005
+
+  reg:
+    maxItems: 1
+    description: |
+      I2C address when used on the I2C bus, or the SPI chip select index
+      when used on the SPI bus
+
+  interrupts:
+    maxItems: 1
+
+  reset-gpios:
+    maxItems: 1
+    description: GPIO specifier for the controller reset line
+
+  spi-max-frequency:
+    description: TSC2005 SPI bus clock frequency.
+    maximum: 25000000
+
+  ti,x-plate-ohms:
+    description: resistance of the touchscreen's X plates in ohm (defaults to 280)
+
+  ti,esd-recovery-timeout-ms:
+    description: |
+        if the touchscreen does not respond after the configured time
+        (in milli seconds), the driver will reset it. This is disabled
+        by default.
+
+  vio-supply:
+    description: Regulator specifier
+
+  touchscreen-fuzz-pressure: true
+  touchscreen-fuzz-x: true
+  touchscreen-fuzz-y: true
+  touchscreen-max-pressure: true
+  touchscreen-size-x: true
+  touchscreen-size-y: true
+
+allOf:
+  - $ref: touchscreen.yaml#
+  - if:
+      properties:
+        compatible:
+          contains:
+            const: ti,tsc2004
+    then:
+      properties:
+        spi-max-frequency: false
+
+additionalProperties: false
+
+required:
+  - compatible
+  - reg
+  - interrupts
+
+examples:
+  - |
+    #include <dt-bindings/interrupt-controller/irq.h>
+    #include <dt-bindings/gpio/gpio.h>
+    i2c {
+        #address-cells = <1>;
+        #size-cells = <0>;
+        touchscreen@48 {
+            compatible = "ti,tsc2004";
+            reg = <0x48>;
+            vio-supply = <&vio>;
+
+            reset-gpios = <&gpio4 8 GPIO_ACTIVE_HIGH>;
+            interrupts-extended = <&gpio1 27 IRQ_TYPE_EDGE_RISING>;
+
+            touchscreen-fuzz-x = <4>;
+            touchscreen-fuzz-y = <7>;
+            touchscreen-fuzz-pressure = <2>;
+            touchscreen-size-x = <4096>;
+            touchscreen-size-y = <4096>;
+            touchscreen-max-pressure = <2048>;
+
+            ti,x-plate-ohms = <280>;
+            ti,esd-recovery-timeout-ms = <8000>;
+        };
+    };
+  - |
+    #include <dt-bindings/interrupt-controller/irq.h>
+    #include <dt-bindings/gpio/gpio.h>
+    spi {
+        #address-cells = <1>;
+        #size-cells = <0>;
+        touchscreen@0 {
+            compatible = "ti,tsc2005";
+            spi-max-frequency = <6000000>;
+            reg = <0>;
+
+            vio-supply = <&vio>;
+
+            reset-gpios = <&gpio4 8 GPIO_ACTIVE_HIGH>; /* 104 */
+            interrupts-extended = <&gpio4 4 IRQ_TYPE_EDGE_RISING>; /* 100 */
+
+            touchscreen-fuzz-x = <4>;
+            touchscreen-fuzz-y = <7>;
+            touchscreen-fuzz-pressure = <2>;
+            touchscreen-size-x = <4096>;
+            touchscreen-size-y = <4096>;
+            touchscreen-max-pressure = <2048>;
+
+            ti,x-plate-ohms = <280>;
+            ti,esd-recovery-timeout-ms = <8000>;
+        };
+    };
diff --git a/Documentation/devicetree/bindings/input/touchscreen/tsc2005.txt b/Documentation/devicetree/bindings/input/touchscreen/tsc2005.txt
deleted file mode 100644
index b80c04b0e5c0..000000000000
--- a/Documentation/devicetree/bindings/input/touchscreen/tsc2005.txt
+++ /dev/null
@@ -1,64 +0,0 @@
-* Texas Instruments tsc2004 and tsc2005 touchscreen controllers
-
-Required properties:
- - compatible		      : "ti,tsc2004" or "ti,tsc2005"
- - reg			      : Device address
- - interrupts		      : IRQ specifier
- - spi-max-frequency	      : Maximum SPI clocking speed of the device
-			        (for tsc2005)
-
-Optional properties:
- - vio-supply		      : Regulator specifier
- - reset-gpios		      : GPIO specifier for the controller reset line
- - ti,x-plate-ohms	      : integer, resistance of the touchscreen's X plates
-				in ohm (defaults to 280)
- - ti,esd-recovery-timeout-ms : integer, if the touchscreen does not respond after
-				the configured time (in milli seconds), the driver
-				will reset it. This is disabled by default.
- - properties defined in touchscreen.txt
-
-Example:
-
-&i2c3 {
-	tsc2004@48 {
-		compatible = "ti,tsc2004";
-		reg = <0x48>;
-		vio-supply = <&vio>;
-
-		reset-gpios = <&gpio4 8 GPIO_ACTIVE_HIGH>;
-		interrupts-extended = <&gpio1 27 IRQ_TYPE_EDGE_RISING>;
-
-		touchscreen-fuzz-x = <4>;
-		touchscreen-fuzz-y = <7>;
-		touchscreen-fuzz-pressure = <2>;
-		touchscreen-size-x = <4096>;
-		touchscreen-size-y = <4096>;
-		touchscreen-max-pressure = <2048>;
-
-		ti,x-plate-ohms = <280>;
-		ti,esd-recovery-timeout-ms = <8000>;
-	};
-}
-
-&mcspi1 {
-	tsc2005@0 {
-		compatible = "ti,tsc2005";
-		spi-max-frequency = <6000000>;
-		reg = <0>;
-
-		vio-supply = <&vio>;
-
-		reset-gpios = <&gpio4 8 GPIO_ACTIVE_HIGH>; /* 104 */
-		interrupts-extended = <&gpio4 4 IRQ_TYPE_EDGE_RISING>; /* 100 */
-
-		touchscreen-fuzz-x = <4>;
-		touchscreen-fuzz-y = <7>;
-		touchscreen-fuzz-pressure = <2>;
-		touchscreen-size-x = <4096>;
-		touchscreen-size-y = <4096>;
-		touchscreen-max-pressure = <2048>;
-
-		ti,x-plate-ohms = <280>;
-		ti,esd-recovery-timeout-ms = <8000>;
-	};
-}
diff --git a/Documentation/devicetree/bindings/interconnect/fsl,imx8m-noc.yaml b/Documentation/devicetree/bindings/interconnect/fsl,imx8m-noc.yaml
index a8873739d61a..b8204ed22dd5 100644
--- a/Documentation/devicetree/bindings/interconnect/fsl,imx8m-noc.yaml
+++ b/Documentation/devicetree/bindings/interconnect/fsl,imx8m-noc.yaml
@@ -81,10 +81,10 @@ examples:
         noc_opp_table: opp-table {
             compatible = "operating-points-v2";
 
-            opp-133M {
+            opp-133333333 {
                 opp-hz = /bits/ 64 <133333333>;
             };
-            opp-800M {
+            opp-800000000 {
                 opp-hz = /bits/ 64 <800000000>;
             };
         };
diff --git a/Documentation/devicetree/bindings/iommu/apple,dart.yaml b/Documentation/devicetree/bindings/iommu/apple,dart.yaml
new file mode 100644
index 000000000000..94aa9e9afa59
--- /dev/null
+++ b/Documentation/devicetree/bindings/iommu/apple,dart.yaml
@@ -0,0 +1,81 @@
+# SPDX-License-Identifier: (GPL-2.0 OR BSD-2-Clause)
+%YAML 1.2
+---
+$id: http://devicetree.org/schemas/iommu/apple,dart.yaml#
+$schema: http://devicetree.org/meta-schemas/core.yaml#
+
+title: Apple DART IOMMU
+
+maintainers:
+  - Sven Peter <sven@svenpeter.dev>
+
+description: |+
+  Apple SoCs may contain an implementation of their Device Address
+  Resolution Table which provides a mandatory layer of address
+  translations for various masters.
+
+  Each DART instance is capable of handling up to 16 different streams
+  with individual pagetables and page-level read/write protection flags.
+
+  This DART IOMMU also raises interrupts in response to various
+  fault conditions.
+
+properties:
+  compatible:
+    const: apple,t8103-dart
+
+  reg:
+    maxItems: 1
+
+  interrupts:
+    maxItems: 1
+
+  clocks:
+    description:
+      Reference to the gate clock phandle if required for this IOMMU.
+      Optional since not all IOMMUs are attached to a clock gate.
+
+  '#iommu-cells':
+    const: 1
+    description:
+      Has to be one. The single cell describes the stream id emitted by
+      a master to the IOMMU.
+
+required:
+  - compatible
+  - reg
+  - '#iommu-cells'
+  - interrupts
+
+additionalProperties: false
+
+examples:
+  - |+
+    dart1: iommu@82f80000 {
+      compatible = "apple,t8103-dart";
+      reg = <0x82f80000 0x4000>;
+      interrupts = <1 781 4>;
+      #iommu-cells = <1>;
+    };
+
+    master1 {
+      iommus = <&dart1 0>;
+    };
+
+  - |+
+    dart2a: iommu@82f00000 {
+      compatible = "apple,t8103-dart";
+      reg = <0x82f00000 0x4000>;
+      interrupts = <1 781 4>;
+      #iommu-cells = <1>;
+    };
+    dart2b: iommu@82f80000 {
+      compatible = "apple,t8103-dart";
+      reg = <0x82f80000 0x4000>;
+      interrupts = <1 781 4>;
+      #iommu-cells = <1>;
+    };
+
+    master2 {
+      iommus = <&dart2a 0>, <&dart2b 1>;
+    };
diff --git a/Documentation/devicetree/bindings/mailbox/mtk-gce.txt b/Documentation/devicetree/bindings/mailbox/mtk-gce.txt
index 2ef7ff67cb2b..89a59b9c81f9 100644
--- a/Documentation/devicetree/bindings/mailbox/mtk-gce.txt
+++ b/Documentation/devicetree/bindings/mailbox/mtk-gce.txt
@@ -9,8 +9,8 @@ CMDQ driver uses mailbox framework for communication. Please refer to
 mailbox.txt for generic information about mailbox device-tree bindings.
 
 Required properties:
-- compatible: can be "mediatek,mt8173-gce", "mediatek,mt8183-gce" or
-  "mediatek,mt6779-gce".
+- compatible: can be "mediatek,mt8173-gce", "mediatek,mt8183-gce",
+  "mediatek,mt8192-gce", "mediatek,mt8195-gce" or "mediatek,mt6779-gce".
 - reg: Address range of the GCE unit
 - interrupts: The interrupt signal from the GCE block
 - clock: Clocks according to the common clock binding
@@ -40,8 +40,9 @@ Optional properties for a client mutex node:
   defined in 'dt-bindings/gce/<chip>-gce.h'.
 
 Some vaules of properties are defined in 'dt-bindings/gce/mt8173-gce.h',
-'dt-binding/gce/mt8183-gce.h' or 'dt-bindings/gce/mt6779-gce.h'. Such as
-sub-system ids, thread priority, event ids.
+'dt-binding/gce/mt8183-gce.h', 'dt-binding/gce/mt8192-gce.h',
+'dt-binding/gce/mt8195-gce.h' or 'dt-bindings/gce/mt6779-gce.h'.
+Such as sub-system ids, thread priority, event ids.
 
 Example:
 
diff --git a/Documentation/devicetree/bindings/mailbox/qcom,apcs-kpss-global.yaml b/Documentation/devicetree/bindings/mailbox/qcom,apcs-kpss-global.yaml
index 8878ec00820e..6395281b0cec 100644
--- a/Documentation/devicetree/bindings/mailbox/qcom,apcs-kpss-global.yaml
+++ b/Documentation/devicetree/bindings/mailbox/qcom,apcs-kpss-global.yaml
@@ -20,6 +20,7 @@ properties:
       - qcom,ipq8074-apcs-apps-global
       - qcom,msm8916-apcs-kpss-global
       - qcom,msm8939-apcs-kpss-global
+      - qcom,msm8953-apcs-kpss-global
       - qcom,msm8994-apcs-kpss-global
       - qcom,msm8996-apcs-hmss-global
       - qcom,msm8998-apcs-hmss-global
@@ -29,6 +30,7 @@ properties:
       - qcom,sdm660-apcs-hmss-global
       - qcom,sdm845-apss-shared
       - qcom,sm6125-apcs-hmss-global
+      - qcom,sm6115-apcs-hmss-global
       - qcom,sm8150-apss-shared
 
   reg:
diff --git a/Documentation/devicetree/bindings/mailbox/qcom-ipcc.yaml b/Documentation/devicetree/bindings/mailbox/qcom-ipcc.yaml
index b222f993b232..866efb278813 100644
--- a/Documentation/devicetree/bindings/mailbox/qcom-ipcc.yaml
+++ b/Documentation/devicetree/bindings/mailbox/qcom-ipcc.yaml
@@ -24,6 +24,7 @@ properties:
   compatible:
     items:
       - enum:
+          - qcom,sm6350-ipcc
           - qcom,sm8250-ipcc
           - qcom,sm8350-ipcc
           - qcom,sc7280-ipcc
diff --git a/Documentation/devicetree/bindings/mfd/axp20x.txt b/Documentation/devicetree/bindings/mfd/axp20x.txt
index 4991a6415796..2b53dcc0ea61 100644
--- a/Documentation/devicetree/bindings/mfd/axp20x.txt
+++ b/Documentation/devicetree/bindings/mfd/axp20x.txt
@@ -26,10 +26,10 @@ Required properties:
     * "x-powers,axp803"
     * "x-powers,axp806"
     * "x-powers,axp805", "x-powers,axp806"
+    * "x-powers,axp305", "x-powers,axp805", "x-powers,axp806"
     * "x-powers,axp809"
     * "x-powers,axp813"
 - reg: The I2C slave address or RSB hardware address for the AXP chip
-- interrupts: SoC NMI / GPIO interrupt connected to the PMIC's IRQ pin
 - interrupt-controller: The PMIC has its own internal IRQs
 - #interrupt-cells: Should be set to 1
 
@@ -43,6 +43,7 @@ more information:
 			AXP20x/LDO3: software-based implementation
 
 Optional properties:
+- interrupts: SoC NMI / GPIO interrupt connected to the PMIC's IRQ pin
 - x-powers,dcdc-freq: defines the work frequency of DC-DC in KHz
 		      AXP152/20X: range:  750-1875, Default: 1.5 MHz
 		      AXP22X/8XX: range: 1800-4050, Default: 3   MHz
diff --git a/Documentation/devicetree/bindings/mfd/brcm,cru.yaml b/Documentation/devicetree/bindings/mfd/brcm,cru.yaml
new file mode 100644
index 000000000000..fc1317ab3226
--- /dev/null
+++ b/Documentation/devicetree/bindings/mfd/brcm,cru.yaml
@@ -0,0 +1,86 @@
+# SPDX-License-Identifier: GPL-2.0-only OR BSD-2-Clause
+%YAML 1.2
+---
+$id: http://devicetree.org/schemas/mfd/brcm,cru.yaml#
+$schema: http://devicetree.org/meta-schemas/core.yaml#
+
+title: Broadcom CRU
+
+maintainers:
+  - Rafał Miłecki <rafal@milecki.pl>
+
+description: |
+  Broadcom CRU ("Clock and Reset Unit" or "Central Resource Unit") is a hardware
+  block grouping smaller blocks. On Broadcom Northstar platform it contains e.g.
+  clocks, pinctrl, USB PHY and thermal.
+
+properties:
+  compatible:
+    items:
+      - enum:
+          - brcm,ns-cru
+      - const: simple-mfd
+
+  reg:
+    description: CRU registers
+
+  ranges: true
+
+  "#address-cells":
+    const: 1
+
+  "#size-cells":
+    const: 1
+
+  pinctrl:
+    $ref: ../pinctrl/brcm,ns-pinmux.yaml
+
+patternProperties:
+  '^clock-controller@[a-f0-9]+$':
+    $ref: ../clock/brcm,iproc-clocks.yaml
+
+  '^thermal@[a-f0-9]+$':
+    $ref: ../thermal/brcm,ns-thermal.yaml
+
+additionalProperties: false
+
+required:
+  - reg
+
+examples:
+  - |
+    cru-bus@1800c100 {
+        compatible = "brcm,ns-cru", "simple-mfd";
+        reg = <0x1800c100 0x1d0>;
+        ranges;
+        #address-cells = <1>;
+        #size-cells = <1>;
+
+        clock-controller@100 {
+            #clock-cells = <1>;
+            compatible = "brcm,nsp-lcpll0";
+            reg = <0x100 0x14>;
+            clocks = <&osc>;
+            clock-output-names = "lcpll0", "pcie_phy", "sdio", "ddr_phy";
+        };
+
+        clock-controller@140 {
+            #clock-cells = <1>;
+            compatible = "brcm,nsp-genpll";
+            reg = <0x140 0x24>;
+            clocks = <&osc>;
+            clock-output-names = "genpll", "phy", "ethernetclk", "usbclk",
+                                 "iprocfast", "sata1", "sata2";
+        };
+
+        pinctrl {
+            compatible = "brcm,bcm4708-pinmux";
+            offset = <0x1c0>;
+        };
+
+        thermal@2c0 {
+            compatible = "brcm,ns-thermal";
+            reg = <0x2c0 0x10>;
+            #thermal-sensor-cells = <0>;
+        };
+    };
diff --git a/Documentation/devicetree/bindings/mfd/qcom,pm8008.yaml b/Documentation/devicetree/bindings/mfd/qcom,pm8008.yaml
index 779936850ee0..ec3138c1bbfc 100644
--- a/Documentation/devicetree/bindings/mfd/qcom,pm8008.yaml
+++ b/Documentation/devicetree/bindings/mfd/qcom,pm8008.yaml
@@ -53,7 +53,9 @@ patternProperties:
 
     properties:
       compatible:
-        const: qcom,pm8008-gpio
+        items:
+          - const: qcom,pm8008-gpio
+          - const: qcom,spmi-gpio
 
       reg:
         description: Peripheral address of one of the two GPIO peripherals.
@@ -61,6 +63,9 @@ patternProperties:
 
       gpio-controller: true
 
+      gpio-ranges:
+        maxItems: 1
+
       interrupt-controller: true
 
       "#interrupt-cells":
@@ -75,6 +80,7 @@ patternProperties:
       - gpio-controller
       - interrupt-controller
       - "#gpio-cells"
+      - gpio-ranges
       - "#interrupt-cells"
 
     additionalProperties: false
@@ -107,10 +113,11 @@ examples:
         interrupt-parent = <&tlmm>;
         interrupts = <32 IRQ_TYPE_EDGE_RISING>;
 
-        gpio@c000 {
-          compatible = "qcom,pm8008-gpio";
+        pm8008_gpios: gpio@c000 {
+          compatible = "qcom,pm8008-gpio", "qcom,spmi-gpio";
           reg = <0xc000>;
           gpio-controller;
+          gpio-ranges = <&pm8008_gpios 0 0 2>;
           #gpio-cells = <2>;
           interrupt-controller;
           #interrupt-cells = <2>;
diff --git a/Documentation/devicetree/bindings/mfd/syscon.yaml b/Documentation/devicetree/bindings/mfd/syscon.yaml
index f14ae6da0068..abe3fd817e0b 100644
--- a/Documentation/devicetree/bindings/mfd/syscon.yaml
+++ b/Documentation/devicetree/bindings/mfd/syscon.yaml
@@ -45,9 +45,12 @@ properties:
               - microchip,sparx5-cpu-syscon
               - mstar,msc313-pmsleep
               - rockchip,px30-qos
+              - rockchip,rk3036-qos
               - rockchip,rk3066-qos
+              - rockchip,rk3228-qos
               - rockchip,rk3288-qos
               - rockchip,rk3399-qos
+              - rockchip,rk3568-qos
               - samsung,exynos3-sysreg
               - samsung,exynos4-sysreg
               - samsung,exynos5-sysreg
diff --git a/Documentation/devicetree/bindings/mfd/ti,tps65086.yaml b/Documentation/devicetree/bindings/mfd/ti,tps65086.yaml
new file mode 100644
index 000000000000..6aeedda3be15
--- /dev/null
+++ b/Documentation/devicetree/bindings/mfd/ti,tps65086.yaml
@@ -0,0 +1,124 @@
+# SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause)
+%YAML 1.2
+---
+$id: http://devicetree.org/schemas/mfd/ti,tps65086.yaml#
+$schema: http://devicetree.org/meta-schemas/core.yaml#
+
+title: TPS65086 Power Management Integrated Circuit (PMIC)
+
+maintainers:
+  - Emil Renner Berthing <kernel@esmil.dk>
+
+properties:
+  compatible:
+    const: ti,tps65086
+
+  reg:
+    const: 0x5e
+    description: I2C slave address
+
+  interrupts:
+    maxItems: 1
+
+  interrupt-controller: true
+
+  '#interrupt-cells':
+    const: 2
+    description: |
+      The first cell is the IRQ number. The second cell is the flags,
+      encoded as trigger masks from ../interrupt-controller/interrupts.txt.
+
+  gpio-controller: true
+
+  '#gpio-cells':
+    const: 2
+    description: |
+      The first cell is the pin number and the second cell is used to specify
+      flags.  See ../gpio/gpio.txt for more information.
+
+  regulators:
+    type: object
+    description: |
+      List of child nodes that specify the regulator initialization data.
+      Child nodes must be named after their hardware counterparts:
+      buck[1-6], ldoa[1-3], swa1, swb[1-2], and vtt.
+      Each child node is defined using the standard binding for regulators and
+      the optional regulator properties defined below.
+
+    patternProperties:
+      "^buck[1-6]$":
+        type: object
+        $ref: ../regulator/regulator.yaml
+
+        properties:
+          regulator-name: true
+          regulator-boot-on: true
+          regulator-always-on: true
+          regulator-min-microvolt: true
+          regulator-max-microvolt: true
+          ti,regulator-step-size-25mv:
+            type: boolean
+            description: |
+              Set this if the regulator is factory set with a 25mv step voltage
+              mapping.
+          ti,regulator-decay:
+            type: boolean
+            description: |
+              Set this if the output needs to decay, default is for the output
+              to slew down.
+
+        additionalProperties: false
+
+      "^(ldoa[1-3]|swa1|swb[1-2]|vtt)$":
+        type: object
+        $ref: ../regulator/regulator.yaml
+
+        properties:
+          regulator-name: true
+          regulator-boot-on: true
+          regulator-always-on: true
+          regulator-min-microvolt: true
+          regulator-max-microvolt: true
+
+        additionalProperties: false
+
+additionalProperties: false
+
+required:
+  - compatible
+  - reg
+  - gpio-controller
+  - '#gpio-cells'
+  - regulators
+
+examples:
+  - |
+    #include <dt-bindings/interrupt-controller/irq.h>
+    i2c0 {
+        #address-cells = <1>;
+        #size-cells = <0>;
+
+        pmic: pmic@5e {
+            compatible = "ti,tps65086";
+            reg = <0x5e>;
+            interrupt-parent = <&gpio1>;
+            interrupts = <28 IRQ_TYPE_LEVEL_LOW>;
+            interrupt-controller;
+            #interrupt-cells = <2>;
+            gpio-controller;
+            #gpio-cells = <2>;
+
+            regulators {
+                buck1 {
+                    regulator-name = "vcc1";
+                    regulator-min-microvolt = <1600000>;
+                    regulator-max-microvolt = <1600000>;
+                    regulator-boot-on;
+                    ti,regulator-decay;
+                    ti,regulator-step-size-25mv;
+                };
+            };
+        };
+    };
+
+...
diff --git a/Documentation/devicetree/bindings/mfd/tps65086.txt b/Documentation/devicetree/bindings/mfd/tps65086.txt
deleted file mode 100644
index 67eac0ed32df..000000000000
--- a/Documentation/devicetree/bindings/mfd/tps65086.txt
+++ /dev/null
@@ -1,54 +0,0 @@
-* TPS65086 Power Management Integrated Circuit (PMIC) bindings
-
-Required properties:
- - compatible		: Should be "ti,tps65086".
- - reg			: I2C slave address.
- - interrupts		: The interrupt line the device is connected to.
- - interrupt-controller	: Marks the device node as an interrupt controller.
- - #interrupt-cells	: The number of cells to describe an IRQ, should be 2.
-			    The first cell is the IRQ number.
-			    The second cell is the flags, encoded as trigger
-			    masks from ../interrupt-controller/interrupts.txt.
- - gpio-controller      : Marks the device node as a GPIO Controller.
- - #gpio-cells          : Should be two.  The first cell is the pin number and
-                            the second cell is used to specify flags.
-                            See ../gpio/gpio.txt for more information.
- - regulators:          : List of child nodes that specify the regulator
-                            initialization data. Child nodes must be named
-                            after their hardware counterparts: buck[1-6],
-                            ldoa[1-3], swa1, swb[1-2], and vtt. Each child
-                            node is defined using the standard binding for
-                            regulators and the optional regulator properties
-                            defined below.
-
-Optional regulator properties:
- - ti,regulator-step-size-25mv	: This is applicable for buck[1-6], set this
-				    if the regulator is factory set with a 25mv
-				    step voltage mapping.
- - ti,regulator-decay		: This is applicable for buck[1-6], set this if
-				    the output needs to decay, default is for
-				    the output to slew down.
-
-Example:
-
-	pmic: tps65086@5e {
-		compatible = "ti,tps65086";
-		reg = <0x5e>;
-		interrupt-parent = <&gpio1>;
-		interrupts = <28 IRQ_TYPE_LEVEL_LOW>;
-		interrupt-controller;
-		#interrupt-cells = <2>;
-		gpio-controller;
-		#gpio-cells = <2>;
-
-		regulators {
-			buck1 {
-				regulator-name = "vcc1";
-				regulator-min-microvolt = <1600000>;
-				regulator-max-microvolt = <1600000>;
-				regulator-boot-on;
-				ti,regulator-decay;
-				ti,regulator-step-size-25mv;
-			};
-		};
-	};
diff --git a/Documentation/devicetree/bindings/mtd/gpmc-nand.txt b/Documentation/devicetree/bindings/mtd/gpmc-nand.txt
index 44919d48d241..c459f169a904 100644
--- a/Documentation/devicetree/bindings/mtd/gpmc-nand.txt
+++ b/Documentation/devicetree/bindings/mtd/gpmc-nand.txt
@@ -122,7 +122,7 @@ on various other factors also like;
 	so the device should have enough free bytes available its OOB/Spare
 	area to accommodate ECC for entire page. In general following expression
 	helps in determining if given device can accommodate ECC syndrome:
-	"2 + (PAGESIZE / 512) * ECC_BYTES" >= OOBSIZE"
+	"2 + (PAGESIZE / 512) * ECC_BYTES" <= OOBSIZE"
 	where
 		OOBSIZE		number of bytes in OOB/spare area
 		PAGESIZE	number of bytes in main-area of device page
diff --git a/Documentation/devicetree/bindings/mtd/partitions/redboot-fis.txt b/Documentation/devicetree/bindings/mtd/partitions/redboot-fis.txt
deleted file mode 100644
index fd0ebe4e3415..000000000000
--- a/Documentation/devicetree/bindings/mtd/partitions/redboot-fis.txt
+++ /dev/null
@@ -1,27 +0,0 @@
-RedBoot FLASH Image System (FIS) Partitions
-===========================================
-
-The FLASH Image System (FIS) directory is a flash description
-format closely associated with the RedBoot boot loader.
-
-It uses one single flash eraseblock in the flash to store an index of
-all images in the flash.
-
-This block size will vary depending on flash but is typically
-32 KB in size.
-
-Required properties:
-- compatible : (required) must be "redboot-fis"
-- fis-index-block : (required) a index to the eraseblock containing
-  the FIS directory on this device. On a flash memory with 32KB
-  eraseblocks, 0 means the first eraseblock at 0x00000000, 1 means the
-  second eraseblock at 0x00008000 and so on.
-
-Example:
-
-flash@0 {
-	partitions {
-		compatible = "redboot-fis";
-		fis-index-block = <0>;
-	};
-};
diff --git a/Documentation/devicetree/bindings/mtd/partitions/redboot-fis.yaml b/Documentation/devicetree/bindings/mtd/partitions/redboot-fis.yaml
new file mode 100644
index 000000000000..fee8d81b5276
--- /dev/null
+++ b/Documentation/devicetree/bindings/mtd/partitions/redboot-fis.yaml
@@ -0,0 +1,42 @@
+# SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause)
+%YAML 1.2
+---
+$id: http://devicetree.org/schemas/mtd/partitions/redboot-fis.yaml#
+$schema: http://devicetree.org/meta-schemas/core.yaml#
+
+title: RedBoot FLASH Image System (FIS) Partitions
+
+description: The FLASH Image System (FIS) directory is a flash description
+    format closely associated with the RedBoot boot loader.
+    It uses one single flash eraseblock in the flash to store an index of
+    all images in the flash.
+    This block size will vary depending on flash but is typically
+    32 KB in size.
+
+maintainers:
+  - Linus Walleij <linus.walleij@linaro.org>
+
+properties:
+  compatible:
+    const: redboot-fis
+
+  fis-index-block:
+    $ref: /schemas/types.yaml#/definitions/uint32
+    description: a index to the eraseblock containing the FIS directory on this
+      device. On a flash memory with 32KB eraseblocks, 0 means the first
+      eraseblock at 0x00000000, 1 means the second eraseblock at 0x00008000 and so on.
+
+required:
+  - compatible
+  - fis-index-block
+
+additionalProperties: false
+
+examples:
+  - |
+    flash {
+      partitions {
+        compatible = "redboot-fis";
+        fis-index-block = <0>;
+      };
+    };
diff --git a/Documentation/devicetree/bindings/opp/allwinner,sun50i-h6-operating-points.yaml b/Documentation/devicetree/bindings/opp/allwinner,sun50i-h6-operating-points.yaml
index aeff2bd774dd..729ae97b63d9 100644
--- a/Documentation/devicetree/bindings/opp/allwinner,sun50i-h6-operating-points.yaml
+++ b/Documentation/devicetree/bindings/opp/allwinner,sun50i-h6-operating-points.yaml
@@ -18,6 +18,9 @@ description: |
   sun50i-cpufreq-nvmem driver reads the efuse value from the SoC to
   provide the OPP framework with required information.
 
+allOf:
+  - $ref: opp-v2-base.yaml#
+
 properties:
   compatible:
     const: allwinner,sun50i-h6-operating-points
@@ -43,6 +46,7 @@ patternProperties:
 
     properties:
       opp-hz: true
+      clock-latency-ns: true
 
     patternProperties:
       "opp-microvolt-.*": true
diff --git a/Documentation/devicetree/bindings/opp/opp-v1.yaml b/Documentation/devicetree/bindings/opp/opp-v1.yaml
new file mode 100644
index 000000000000..d585d536a3fb
--- /dev/null
+++ b/Documentation/devicetree/bindings/opp/opp-v1.yaml
@@ -0,0 +1,51 @@
+# SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause)
+%YAML 1.2
+---
+$id: http://devicetree.org/schemas/opp/opp-v1.yaml#
+$schema: http://devicetree.org/meta-schemas/core.yaml#
+
+title: Generic OPP (Operating Performance Points) v1 Bindings
+
+maintainers:
+  - Viresh Kumar <viresh.kumar@linaro.org>
+
+description: |+
+  Devices work at voltage-current-frequency combinations and some implementations
+  have the liberty of choosing these. These combinations are called Operating
+  Performance Points aka OPPs. This document defines bindings for these OPPs
+  applicable across wide range of devices. For illustration purpose, this document
+  uses CPU as a device.
+
+  This binding only supports voltage-frequency pairs.
+
+select: true
+
+properties:
+  operating-points:
+    $ref: /schemas/types.yaml#/definitions/uint32-matrix
+    items:
+      items:
+        - description: Frequency in kHz
+        - description: Voltage for OPP in uV
+
+
+additionalProperties: true
+examples:
+  - |
+    cpus {
+        #address-cells = <1>;
+        #size-cells = <0>;
+
+        cpu@0 {
+            compatible = "arm,cortex-a9";
+            device_type = "cpu";
+            reg = <0>;
+            next-level-cache = <&L2>;
+            operating-points =
+                /* kHz    uV */
+                <792000 1100000>,
+                <396000 950000>,
+                <198000 850000>;
+        };
+    };
+...
diff --git a/Documentation/devicetree/bindings/opp/opp-v2-base.yaml b/Documentation/devicetree/bindings/opp/opp-v2-base.yaml
new file mode 100644
index 000000000000..ae3ae4d39843
--- /dev/null
+++ b/Documentation/devicetree/bindings/opp/opp-v2-base.yaml
@@ -0,0 +1,214 @@
+# SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause)
+%YAML 1.2
+---
+$id: http://devicetree.org/schemas/opp/opp-v2-base.yaml#
+$schema: http://devicetree.org/meta-schemas/core.yaml#
+
+title: Generic OPP (Operating Performance Points) Common Binding
+
+maintainers:
+  - Viresh Kumar <viresh.kumar@linaro.org>
+
+description: |
+  Devices work at voltage-current-frequency combinations and some implementations
+  have the liberty of choosing these. These combinations are called Operating
+  Performance Points aka OPPs. This document defines bindings for these OPPs
+  applicable across wide range of devices. For illustration purpose, this document
+  uses CPU as a device.
+
+  This describes the OPPs belonging to a device.
+
+select: false
+
+properties:
+  $nodename:
+    pattern: '^opp-table(-[a-z0-9]+)?$'
+
+  opp-shared:
+    description:
+      Indicates that device nodes using this OPP Table Node's phandle switch
+      their DVFS state together, i.e. they share clock/voltage/current lines.
+      Missing property means devices have independent clock/voltage/current
+      lines, but they share OPP tables.
+    type: boolean
+
+patternProperties:
+  '^opp-?[0-9]+$':
+    type: object
+    description:
+      One or more OPP nodes describing voltage-current-frequency combinations.
+      Their name isn't significant but their phandle can be used to reference an
+      OPP. These are mandatory except for the case where the OPP table is
+      present only to indicate dependency between devices using the opp-shared
+      property.
+
+    properties:
+      opp-hz:
+        description:
+          Frequency in Hz, expressed as a 64-bit big-endian integer. This is a
+          required property for all device nodes, unless another "required"
+          property to uniquely identify the OPP nodes exists. Devices like power
+          domains must have another (implementation dependent) property.
+
+      opp-microvolt:
+        description: |
+          Voltage for the OPP
+
+          A single regulator's voltage is specified with an array of size one or three.
+          Single entry is for target voltage and three entries are for <target min max>
+          voltages.
+
+          Entries for multiple regulators shall be provided in the same field separated
+          by angular brackets <>. The OPP binding doesn't provide any provisions to
+          relate the values to their power supplies or the order in which the supplies
+          need to be configured and that is left for the implementation specific
+          binding.
+
+          Entries for all regulators shall be of the same size, i.e. either all use a
+          single value or triplets.
+        minItems: 1
+        maxItems: 8   # Should be enough regulators
+        items:
+          minItems: 1
+          maxItems: 3
+
+      opp-microamp:
+        description: |
+          The maximum current drawn by the device in microamperes considering
+          system specific parameters (such as transients, process, aging,
+          maximum operating temperature range etc.) as necessary. This may be
+          used to set the most efficient regulator operating mode.
+
+          Should only be set if opp-microvolt or opp-microvolt-<name> is set for
+          the OPP.
+
+          Entries for multiple regulators shall be provided in the same field
+          separated by angular brackets <>. If current values aren't required
+          for a regulator, then it shall be filled with 0. If current values
+          aren't required for any of the regulators, then this field is not
+          required. The OPP binding doesn't provide any provisions to relate the
+          values to their power supplies or the order in which the supplies need
+          to be configured and that is left for the implementation specific
+          binding.
+        minItems: 1
+        maxItems: 8   # Should be enough regulators
+
+      opp-level:
+        description:
+          A value representing the performance level of the device.
+        $ref: /schemas/types.yaml#/definitions/uint32
+
+      opp-peak-kBps:
+        description:
+          Peak bandwidth in kilobytes per second, expressed as an array of
+          32-bit big-endian integers. Each element of the array represents the
+          peak bandwidth value of each interconnect path. The number of elements
+          should match the number of interconnect paths.
+        minItems: 1
+        maxItems: 32  # Should be enough
+
+      opp-avg-kBps:
+        description:
+          Average bandwidth in kilobytes per second, expressed as an array
+          of 32-bit big-endian integers. Each element of the array represents the
+          average bandwidth value of each interconnect path. The number of elements
+          should match the number of interconnect paths. This property is only
+          meaningful in OPP tables where opp-peak-kBps is present.
+        minItems: 1
+        maxItems: 32  # Should be enough
+
+      clock-latency-ns:
+        description:
+          Specifies the maximum possible transition latency (in nanoseconds) for
+          switching to this OPP from any other OPP.
+
+      turbo-mode:
+        description:
+          Marks the OPP to be used only for turbo modes. Turbo mode is available
+          on some platforms, where the device can run over its operating
+          frequency for a short duration of time limited by the device's power,
+          current and thermal limits.
+        type: boolean
+
+      opp-suspend:
+        description:
+          Marks the OPP to be used during device suspend. If multiple OPPs in
+          the table have this, the OPP with highest opp-hz will be used.
+        type: boolean
+
+      opp-supported-hw:
+        description: |
+          This property allows a platform to enable only a subset of the OPPs
+          from the larger set present in the OPP table, based on the current
+          version of the hardware (already known to the operating system).
+
+          Each block present in the array of blocks in this property, represents
+          a sub-group of hardware versions supported by the OPP. i.e. <sub-group
+          A>, <sub-group B>, etc. The OPP will be enabled if _any_ of these
+          sub-groups match the hardware's version.
+
+          Each sub-group is a platform defined array representing the hierarchy
+          of hardware versions supported by the platform. For a platform with
+          three hierarchical levels of version (X.Y.Z), this field shall look
+          like
+
+          opp-supported-hw = <X1 Y1 Z1>, <X2 Y2 Z2>, <X3 Y3 Z3>.
+
+          Each level (eg. X1) in version hierarchy is represented by a 32 bit
+          value, one bit per version and so there can be maximum 32 versions per
+          level. Logical AND (&) operation is performed for each level with the
+          hardware's level version and a non-zero output for _all_ the levels in
+          a sub-group means the OPP is supported by hardware. A value of
+          0xFFFFFFFF for each level in the sub-group will enable the OPP for all
+          versions for the hardware.
+        $ref: /schemas/types.yaml#/definitions/uint32-matrix
+        maxItems: 32
+        items:
+          minItems: 1
+          maxItems: 4
+
+      required-opps:
+        description:
+          This contains phandle to an OPP node in another device's OPP table. It
+          may contain an array of phandles, where each phandle points to an OPP
+          of a different device. It should not contain multiple phandles to the
+          OPP nodes in the same OPP table. This specifies the minimum required
+          OPP of the device(s), whose OPP's phandle is present in this property,
+          for the functioning of the current device at the current OPP (where
+          this property is present).
+        $ref: /schemas/types.yaml#/definitions/phandle-array
+
+    patternProperties:
+      '^opp-microvolt-':
+        description:
+          Named opp-microvolt property. This is exactly similar to the above
+          opp-microvolt property, but allows multiple voltage ranges to be
+          provided for the same OPP. At runtime, the platform can pick a <name>
+          and matching opp-microvolt-<name> property will be enabled for all
+          OPPs. If the platform doesn't pick a specific <name> or the <name>
+          doesn't match with any opp-microvolt-<name> properties, then
+          opp-microvolt property shall be used, if present.
+        $ref: /schemas/types.yaml#/definitions/uint32-matrix
+        minItems: 1
+        maxItems: 8   # Should be enough regulators
+        items:
+          minItems: 1
+          maxItems: 3
+
+      '^opp-microamp-':
+        description:
+          Named opp-microamp property. Similar to opp-microvolt-<name> property,
+          but for microamp instead.
+        $ref: /schemas/types.yaml#/definitions/uint32-array
+        minItems: 1
+        maxItems: 8   # Should be enough regulators
+
+    dependencies:
+      opp-avg-kBps: [ opp-peak-kBps ]
+
+required:
+  - compatible
+
+additionalProperties: true
+
+...
diff --git a/Documentation/devicetree/bindings/opp/opp-v2.yaml b/Documentation/devicetree/bindings/opp/opp-v2.yaml
new file mode 100644
index 000000000000..eaf8fba2c691
--- /dev/null
+++ b/Documentation/devicetree/bindings/opp/opp-v2.yaml
@@ -0,0 +1,475 @@
+# SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause)
+%YAML 1.2
+---
+$id: http://devicetree.org/schemas/opp/opp-v2.yaml#
+$schema: http://devicetree.org/meta-schemas/core.yaml#
+
+title: Generic OPP (Operating Performance Points) Bindings
+
+maintainers:
+  - Viresh Kumar <viresh.kumar@linaro.org>
+
+allOf:
+  - $ref: opp-v2-base.yaml#
+
+properties:
+  compatible:
+    const: operating-points-v2
+
+unevaluatedProperties: false
+
+examples:
+  - |
+    /*
+     * Example 1: Single cluster Dual-core ARM cortex A9, switch DVFS states
+     * together.
+     */
+    cpus {
+        #address-cells = <1>;
+        #size-cells = <0>;
+
+        cpu@0 {
+            compatible = "arm,cortex-a9";
+            device_type = "cpu";
+            reg = <0>;
+            next-level-cache = <&L2>;
+            clocks = <&clk_controller 0>;
+            clock-names = "cpu";
+            cpu-supply = <&cpu_supply0>;
+            operating-points-v2 = <&cpu0_opp_table0>;
+        };
+
+        cpu@1 {
+            compatible = "arm,cortex-a9";
+            device_type = "cpu";
+            reg = <1>;
+            next-level-cache = <&L2>;
+            clocks = <&clk_controller 0>;
+            clock-names = "cpu";
+            cpu-supply = <&cpu_supply0>;
+            operating-points-v2 = <&cpu0_opp_table0>;
+        };
+    };
+
+    cpu0_opp_table0: opp-table {
+        compatible = "operating-points-v2";
+        opp-shared;
+
+        opp-1000000000 {
+            opp-hz = /bits/ 64 <1000000000>;
+            opp-microvolt = <975000 970000 985000>;
+            opp-microamp = <70000>;
+            clock-latency-ns = <300000>;
+            opp-suspend;
+        };
+        opp-1100000000 {
+            opp-hz = /bits/ 64 <1100000000>;
+            opp-microvolt = <1000000 980000 1010000>;
+            opp-microamp = <80000>;
+            clock-latency-ns = <310000>;
+        };
+        opp-1200000000 {
+            opp-hz = /bits/ 64 <1200000000>;
+            opp-microvolt = <1025000>;
+            clock-latency-ns = <290000>;
+            turbo-mode;
+        };
+    };
+
+  - |
+    /*
+     * Example 2: Single cluster, Quad-core Qualcom-krait, switches DVFS states
+     * independently.
+     */
+    cpus {
+        #address-cells = <1>;
+        #size-cells = <0>;
+
+        cpu@0 {
+            compatible = "qcom,krait";
+            device_type = "cpu";
+            reg = <0>;
+            next-level-cache = <&L2>;
+            clocks = <&clk_controller 0>;
+            clock-names = "cpu";
+            cpu-supply = <&cpu_supply0>;
+            operating-points-v2 = <&cpu_opp_table>;
+        };
+
+        cpu@1 {
+            compatible = "qcom,krait";
+            device_type = "cpu";
+            reg = <1>;
+            next-level-cache = <&L2>;
+            clocks = <&clk_controller 1>;
+            clock-names = "cpu";
+            cpu-supply = <&cpu_supply1>;
+            operating-points-v2 = <&cpu_opp_table>;
+        };
+
+        cpu@2 {
+            compatible = "qcom,krait";
+            device_type = "cpu";
+            reg = <2>;
+            next-level-cache = <&L2>;
+            clocks = <&clk_controller 2>;
+            clock-names = "cpu";
+            cpu-supply = <&cpu_supply2>;
+            operating-points-v2 = <&cpu_opp_table>;
+        };
+
+        cpu@3 {
+            compatible = "qcom,krait";
+            device_type = "cpu";
+            reg = <3>;
+            next-level-cache = <&L2>;
+            clocks = <&clk_controller 3>;
+            clock-names = "cpu";
+            cpu-supply = <&cpu_supply3>;
+            operating-points-v2 = <&cpu_opp_table>;
+        };
+    };
+
+    cpu_opp_table: opp-table {
+        compatible = "operating-points-v2";
+
+        /*
+         * Missing opp-shared property means CPUs switch DVFS states
+         * independently.
+         */
+
+        opp-1000000000 {
+            opp-hz = /bits/ 64 <1000000000>;
+            opp-microvolt = <975000 970000 985000>;
+            opp-microamp = <70000>;
+            clock-latency-ns = <300000>;
+            opp-suspend;
+        };
+        opp-1100000000 {
+            opp-hz = /bits/ 64 <1100000000>;
+            opp-microvolt = <1000000 980000 1010000>;
+            opp-microamp = <80000>;
+            clock-latency-ns = <310000>;
+        };
+        opp-1200000000 {
+            opp-hz = /bits/ 64 <1200000000>;
+            opp-microvolt = <1025000>;
+            opp-microamp = <90000>;
+            lock-latency-ns = <290000>;
+            turbo-mode;
+        };
+    };
+
+  - |
+    /*
+     * Example 3: Dual-cluster, Dual-core per cluster. CPUs within a cluster switch
+     * DVFS state together.
+     */
+    cpus {
+        #address-cells = <1>;
+        #size-cells = <0>;
+
+        cpu@0 {
+            compatible = "arm,cortex-a7";
+            device_type = "cpu";
+            reg = <0>;
+            next-level-cache = <&L2>;
+            clocks = <&clk_controller 0>;
+            clock-names = "cpu";
+            cpu-supply = <&cpu_supply0>;
+            operating-points-v2 = <&cluster0_opp>;
+        };
+
+        cpu@1 {
+            compatible = "arm,cortex-a7";
+            device_type = "cpu";
+            reg = <1>;
+            next-level-cache = <&L2>;
+            clocks = <&clk_controller 0>;
+            clock-names = "cpu";
+            cpu-supply = <&cpu_supply0>;
+            operating-points-v2 = <&cluster0_opp>;
+        };
+
+        cpu@100 {
+            compatible = "arm,cortex-a15";
+            device_type = "cpu";
+            reg = <100>;
+            next-level-cache = <&L2>;
+            clocks = <&clk_controller 1>;
+            clock-names = "cpu";
+            cpu-supply = <&cpu_supply1>;
+            operating-points-v2 = <&cluster1_opp>;
+        };
+
+        cpu@101 {
+            compatible = "arm,cortex-a15";
+            device_type = "cpu";
+            reg = <101>;
+            next-level-cache = <&L2>;
+            clocks = <&clk_controller 1>;
+            clock-names = "cpu";
+            cpu-supply = <&cpu_supply1>;
+            operating-points-v2 = <&cluster1_opp>;
+        };
+    };
+
+    cluster0_opp: opp-table-0 {
+        compatible = "operating-points-v2";
+        opp-shared;
+
+        opp-1000000000 {
+            opp-hz = /bits/ 64 <1000000000>;
+            opp-microvolt = <975000 970000 985000>;
+            opp-microamp = <70000>;
+            clock-latency-ns = <300000>;
+            opp-suspend;
+        };
+        opp-1100000000 {
+            opp-hz = /bits/ 64 <1100000000>;
+            opp-microvolt = <1000000 980000 1010000>;
+            opp-microamp = <80000>;
+            clock-latency-ns = <310000>;
+        };
+        opp-1200000000 {
+            opp-hz = /bits/ 64 <1200000000>;
+            opp-microvolt = <1025000>;
+            opp-microamp = <90000>;
+            clock-latency-ns = <290000>;
+            turbo-mode;
+        };
+    };
+
+    cluster1_opp: opp-table-1 {
+        compatible = "operating-points-v2";
+        opp-shared;
+
+        opp-1300000000 {
+            opp-hz = /bits/ 64 <1300000000>;
+            opp-microvolt = <1050000 1045000 1055000>;
+            opp-microamp = <95000>;
+            clock-latency-ns = <400000>;
+            opp-suspend;
+        };
+        opp-1400000000 {
+            opp-hz = /bits/ 64 <1400000000>;
+            opp-microvolt = <1075000>;
+            opp-microamp = <100000>;
+            clock-latency-ns = <400000>;
+        };
+        opp-1500000000 {
+            opp-hz = /bits/ 64 <1500000000>;
+            opp-microvolt = <1100000 1010000 1110000>;
+            opp-microamp = <95000>;
+            clock-latency-ns = <400000>;
+            turbo-mode;
+        };
+    };
+
+  - |
+    /* Example 4: Handling multiple regulators */
+    cpus {
+        #address-cells = <1>;
+        #size-cells = <0>;
+
+        cpu@0 {
+            compatible = "foo,cpu-type";
+            device_type = "cpu";
+            reg = <0>;
+
+            vcc0-supply = <&cpu_supply0>;
+            vcc1-supply = <&cpu_supply1>;
+            vcc2-supply = <&cpu_supply2>;
+            operating-points-v2 = <&cpu0_opp_table4>;
+        };
+    };
+
+    cpu0_opp_table4: opp-table-0 {
+        compatible = "operating-points-v2";
+        opp-shared;
+
+        opp-1000000000 {
+            opp-hz = /bits/ 64 <1000000000>;
+            opp-microvolt = <970000>, /* Supply 0 */
+                            <960000>, /* Supply 1 */
+                            <960000>; /* Supply 2 */
+            opp-microamp =  <70000>,  /* Supply 0 */
+                            <70000>,  /* Supply 1 */
+                            <70000>;  /* Supply 2 */
+            clock-latency-ns = <300000>;
+        };
+
+        /* OR */
+
+        opp-1000000001 {
+            opp-hz = /bits/ 64 <1000000001>;
+            opp-microvolt = <975000 970000 985000>, /* Supply 0 */
+                            <965000 960000 975000>, /* Supply 1 */
+                            <965000 960000 975000>; /* Supply 2 */
+            opp-microamp =  <70000>,    /* Supply 0 */
+                <70000>,    /* Supply 1 */
+                <70000>;    /* Supply 2 */
+            clock-latency-ns = <300000>;
+        };
+
+        /* OR */
+
+        opp-1000000002 {
+            opp-hz = /bits/ 64 <1000000002>;
+            opp-microvolt = <975000 970000 985000>, /* Supply 0 */
+                <965000 960000 975000>, /* Supply 1 */
+                <965000 960000 975000>; /* Supply 2 */
+            opp-microamp =  <70000>,    /* Supply 0 */
+                <0>,      /* Supply 1 doesn't need this */
+                <70000>;    /* Supply 2 */
+            clock-latency-ns = <300000>;
+        };
+    };
+
+  - |
+    /*
+     * Example 5: opp-supported-hw
+     * (example: three level hierarchy of versions: cuts, substrate and process)
+     */
+    cpus {
+        #address-cells = <1>;
+        #size-cells = <0>;
+
+        cpu@0 {
+            compatible = "arm,cortex-a7";
+            device_type = "cpu";
+            reg = <0>;
+            cpu-supply = <&cpu_supply>;
+            operating-points-v2 = <&cpu0_opp_table_slow>;
+        };
+    };
+
+    cpu0_opp_table_slow: opp-table {
+        compatible = "operating-points-v2";
+        opp-shared;
+
+        opp-600000000 {
+            /*
+             * Supports all substrate and process versions for 0xF
+             * cuts, i.e. only first four cuts.
+             */
+            opp-supported-hw = <0xF 0xFFFFFFFF 0xFFFFFFFF>;
+            opp-hz = /bits/ 64 <600000000>;
+        };
+
+        opp-800000000 {
+            /*
+             * Supports:
+             * - cuts: only one, 6th cut (represented by 6th bit).
+             * - substrate: supports 16 different substrate versions
+             * - process: supports 9 different process versions
+             */
+            opp-supported-hw = <0x20 0xff0000ff 0x0000f4f0>;
+            opp-hz = /bits/ 64 <800000000>;
+        };
+
+        opp-900000000 {
+            /*
+             * Supports:
+             * - All cuts and substrate where process version is 0x2.
+             * - All cuts and process where substrate version is 0x2.
+             */
+            opp-supported-hw = <0xFFFFFFFF 0xFFFFFFFF 0x02>,
+                               <0xFFFFFFFF 0x01 0xFFFFFFFF>;
+            opp-hz = /bits/ 64 <900000000>;
+        };
+    };
+
+  - |
+    /*
+     * Example 6: opp-microvolt-<name>, opp-microamp-<name>:
+     * (example: device with two possible microvolt ranges: slow and fast)
+     */
+    cpus {
+        #address-cells = <1>;
+        #size-cells = <0>;
+
+        cpu@0 {
+            compatible = "arm,cortex-a7";
+            device_type = "cpu";
+            reg = <0>;
+            operating-points-v2 = <&cpu0_opp_table6>;
+        };
+    };
+
+    cpu0_opp_table6: opp-table-0 {
+        compatible = "operating-points-v2";
+        opp-shared;
+
+        opp-1000000000 {
+            opp-hz = /bits/ 64 <1000000000>;
+            opp-microvolt-slow = <915000 900000 925000>;
+            opp-microvolt-fast = <975000 970000 985000>;
+            opp-microamp-slow =  <70000>;
+            opp-microamp-fast =  <71000>;
+        };
+
+        opp-1200000000 {
+            opp-hz = /bits/ 64 <1200000000>;
+            opp-microvolt-slow = <915000 900000 925000>, /* Supply vcc0 */
+                                 <925000 910000 935000>; /* Supply vcc1 */
+            opp-microvolt-fast = <975000 970000 985000>, /* Supply vcc0 */
+                                 <965000 960000 975000>; /* Supply vcc1 */
+            opp-microamp =  <70000>; /* Will be used for both slow/fast */
+        };
+    };
+
+  - |
+    /*
+     * Example 7: Single cluster Quad-core ARM cortex A53, OPP points from firmware,
+     * distinct clock controls but two sets of clock/voltage/current lines.
+     */
+    cpus {
+        #address-cells = <2>;
+        #size-cells = <0>;
+
+        cpu@0 {
+            compatible = "arm,cortex-a53";
+            device_type = "cpu";
+            reg = <0x0 0x100>;
+            next-level-cache = <&A53_L2>;
+            clocks = <&dvfs_controller 0>;
+            operating-points-v2 = <&cpu_opp0_table>;
+        };
+        cpu@1 {
+            compatible = "arm,cortex-a53";
+            device_type = "cpu";
+            reg = <0x0 0x101>;
+            next-level-cache = <&A53_L2>;
+            clocks = <&dvfs_controller 1>;
+            operating-points-v2 = <&cpu_opp0_table>;
+        };
+        cpu@2 {
+            compatible = "arm,cortex-a53";
+            device_type = "cpu";
+            reg = <0x0 0x102>;
+            next-level-cache = <&A53_L2>;
+            clocks = <&dvfs_controller 2>;
+            operating-points-v2 = <&cpu_opp1_table>;
+        };
+        cpu@3 {
+            compatible = "arm,cortex-a53";
+            device_type = "cpu";
+            reg = <0x0 0x103>;
+            next-level-cache = <&A53_L2>;
+            clocks = <&dvfs_controller 3>;
+            operating-points-v2 = <&cpu_opp1_table>;
+        };
+
+    };
+
+    cpu_opp0_table: opp-table-0 {
+        compatible = "operating-points-v2";
+        opp-shared;
+    };
+
+    cpu_opp1_table: opp-table-1 {
+        compatible = "operating-points-v2";
+        opp-shared;
+    };
+...
diff --git a/Documentation/devicetree/bindings/opp/opp.txt b/Documentation/devicetree/bindings/opp/opp.txt
deleted file mode 100644
index 08b3da4736cf..000000000000
--- a/Documentation/devicetree/bindings/opp/opp.txt
+++ /dev/null
@@ -1,622 +0,0 @@
-Generic OPP (Operating Performance Points) Bindings
-----------------------------------------------------
-
-Devices work at voltage-current-frequency combinations and some implementations
-have the liberty of choosing these. These combinations are called Operating
-Performance Points aka OPPs. This document defines bindings for these OPPs
-applicable across wide range of devices. For illustration purpose, this document
-uses CPU as a device.
-
-This document contain multiple versions of OPP binding and only one of them
-should be used per device.
-
-Binding 1: operating-points
-============================
-
-This binding only supports voltage-frequency pairs.
-
-Properties:
-- operating-points: An array of 2-tuples items, and each item consists
-  of frequency and voltage like <freq-kHz vol-uV>.
-	freq: clock frequency in kHz
-	vol: voltage in microvolt
-
-Examples:
-
-cpu@0 {
-	compatible = "arm,cortex-a9";
-	reg = <0>;
-	next-level-cache = <&L2>;
-	operating-points = <
-		/* kHz    uV */
-		792000  1100000
-		396000  950000
-		198000  850000
-	>;
-};
-
-
-Binding 2: operating-points-v2
-============================
-
-* Property: operating-points-v2
-
-Devices supporting OPPs must set their "operating-points-v2" property with
-phandle to a OPP table in their DT node. The OPP core will use this phandle to
-find the operating points for the device.
-
-This can contain more than one phandle for power domain providers that provide
-multiple power domains. That is, one phandle for each power domain. If only one
-phandle is available, then the same OPP table will be used for all power domains
-provided by the power domain provider.
-
-If required, this can be extended for SoC vendor specific bindings. Such bindings
-should be documented as Documentation/devicetree/bindings/power/<vendor>-opp.txt
-and should have a compatible description like: "operating-points-v2-<vendor>".
-
-* OPP Table Node
-
-This describes the OPPs belonging to a device. This node can have following
-properties:
-
-Required properties:
-- compatible: Allow OPPs to express their compatibility. It should be:
-  "operating-points-v2".
-
-- OPP nodes: One or more OPP nodes describing voltage-current-frequency
-  combinations. Their name isn't significant but their phandle can be used to
-  reference an OPP. These are mandatory except for the case where the OPP table
-  is present only to indicate dependency between devices using the opp-shared
-  property.
-
-Optional properties:
-- opp-shared: Indicates that device nodes using this OPP Table Node's phandle
-  switch their DVFS state together, i.e. they share clock/voltage/current lines.
-  Missing property means devices have independent clock/voltage/current lines,
-  but they share OPP tables.
-
-- status: Marks the OPP table enabled/disabled.
-
-
-* OPP Node
-
-This defines voltage-current-frequency combinations along with other related
-properties.
-
-Required properties:
-- opp-hz: Frequency in Hz, expressed as a 64-bit big-endian integer. This is a
-  required property for all device nodes, unless another "required" property to
-  uniquely identify the OPP nodes exists. Devices like power domains must have
-  another (implementation dependent) property.
-
-- opp-peak-kBps: Peak bandwidth in kilobytes per second, expressed as an array
-  of 32-bit big-endian integers. Each element of the array represents the
-  peak bandwidth value of each interconnect path. The number of elements should
-  match the number of interconnect paths.
-
-Optional properties:
-- opp-microvolt: voltage in micro Volts.
-
-  A single regulator's voltage is specified with an array of size one or three.
-  Single entry is for target voltage and three entries are for <target min max>
-  voltages.
-
-  Entries for multiple regulators shall be provided in the same field separated
-  by angular brackets <>. The OPP binding doesn't provide any provisions to
-  relate the values to their power supplies or the order in which the supplies
-  need to be configured and that is left for the implementation specific
-  binding.
-
-  Entries for all regulators shall be of the same size, i.e. either all use a
-  single value or triplets.
-
-- opp-microvolt-<name>: Named opp-microvolt property. This is exactly similar to
-  the above opp-microvolt property, but allows multiple voltage ranges to be
-  provided for the same OPP. At runtime, the platform can pick a <name> and
-  matching opp-microvolt-<name> property will be enabled for all OPPs. If the
-  platform doesn't pick a specific <name> or the <name> doesn't match with any
-  opp-microvolt-<name> properties, then opp-microvolt property shall be used, if
-  present.
-
-- opp-microamp: The maximum current drawn by the device in microamperes
-  considering system specific parameters (such as transients, process, aging,
-  maximum operating temperature range etc.) as necessary. This may be used to
-  set the most efficient regulator operating mode.
-
-  Should only be set if opp-microvolt is set for the OPP.
-
-  Entries for multiple regulators shall be provided in the same field separated
-  by angular brackets <>. If current values aren't required for a regulator,
-  then it shall be filled with 0. If current values aren't required for any of
-  the regulators, then this field is not required. The OPP binding doesn't
-  provide any provisions to relate the values to their power supplies or the
-  order in which the supplies need to be configured and that is left for the
-  implementation specific binding.
-
-- opp-microamp-<name>: Named opp-microamp property. Similar to
-  opp-microvolt-<name> property, but for microamp instead.
-
-- opp-level: A value representing the performance level of the device,
-  expressed as a 32-bit integer.
-
-- opp-avg-kBps: Average bandwidth in kilobytes per second, expressed as an array
-  of 32-bit big-endian integers. Each element of the array represents the
-  average bandwidth value of each interconnect path. The number of elements
-  should match the number of interconnect paths. This property is only
-  meaningful in OPP tables where opp-peak-kBps is present.
-
-- clock-latency-ns: Specifies the maximum possible transition latency (in
-  nanoseconds) for switching to this OPP from any other OPP.
-
-- turbo-mode: Marks the OPP to be used only for turbo modes. Turbo mode is
-  available on some platforms, where the device can run over its operating
-  frequency for a short duration of time limited by the device's power, current
-  and thermal limits.
-
-- opp-suspend: Marks the OPP to be used during device suspend. If multiple OPPs
-  in the table have this, the OPP with highest opp-hz will be used.
-
-- opp-supported-hw: This property allows a platform to enable only a subset of
-  the OPPs from the larger set present in the OPP table, based on the current
-  version of the hardware (already known to the operating system).
-
-  Each block present in the array of blocks in this property, represents a
-  sub-group of hardware versions supported by the OPP. i.e. <sub-group A>,
-  <sub-group B>, etc. The OPP will be enabled if _any_ of these sub-groups match
-  the hardware's version.
-
-  Each sub-group is a platform defined array representing the hierarchy of
-  hardware versions supported by the platform. For a platform with three
-  hierarchical levels of version (X.Y.Z), this field shall look like
-
-  opp-supported-hw = <X1 Y1 Z1>, <X2 Y2 Z2>, <X3 Y3 Z3>.
-
-  Each level (eg. X1) in version hierarchy is represented by a 32 bit value, one
-  bit per version and so there can be maximum 32 versions per level. Logical AND
-  (&) operation is performed for each level with the hardware's level version
-  and a non-zero output for _all_ the levels in a sub-group means the OPP is
-  supported by hardware. A value of 0xFFFFFFFF for each level in the sub-group
-  will enable the OPP for all versions for the hardware.
-
-- status: Marks the node enabled/disabled.
-
-- required-opps: This contains phandle to an OPP node in another device's OPP
-  table. It may contain an array of phandles, where each phandle points to an
-  OPP of a different device. It should not contain multiple phandles to the OPP
-  nodes in the same OPP table. This specifies the minimum required OPP of the
-  device(s), whose OPP's phandle is present in this property, for the
-  functioning of the current device at the current OPP (where this property is
-  present).
-
-Example 1: Single cluster Dual-core ARM cortex A9, switch DVFS states together.
-
-/ {
-	cpus {
-		#address-cells = <1>;
-		#size-cells = <0>;
-
-		cpu@0 {
-			compatible = "arm,cortex-a9";
-			reg = <0>;
-			next-level-cache = <&L2>;
-			clocks = <&clk_controller 0>;
-			clock-names = "cpu";
-			cpu-supply = <&cpu_supply0>;
-			operating-points-v2 = <&cpu0_opp_table>;
-		};
-
-		cpu@1 {
-			compatible = "arm,cortex-a9";
-			reg = <1>;
-			next-level-cache = <&L2>;
-			clocks = <&clk_controller 0>;
-			clock-names = "cpu";
-			cpu-supply = <&cpu_supply0>;
-			operating-points-v2 = <&cpu0_opp_table>;
-		};
-	};
-
-	cpu0_opp_table: opp_table0 {
-		compatible = "operating-points-v2";
-		opp-shared;
-
-		opp-1000000000 {
-			opp-hz = /bits/ 64 <1000000000>;
-			opp-microvolt = <975000 970000 985000>;
-			opp-microamp = <70000>;
-			clock-latency-ns = <300000>;
-			opp-suspend;
-		};
-		opp-1100000000 {
-			opp-hz = /bits/ 64 <1100000000>;
-			opp-microvolt = <1000000 980000 1010000>;
-			opp-microamp = <80000>;
-			clock-latency-ns = <310000>;
-		};
-		opp-1200000000 {
-			opp-hz = /bits/ 64 <1200000000>;
-			opp-microvolt = <1025000>;
-			clock-latency-ns = <290000>;
-			turbo-mode;
-		};
-	};
-};
-
-Example 2: Single cluster, Quad-core Qualcom-krait, switches DVFS states
-independently.
-
-/ {
-	cpus {
-		#address-cells = <1>;
-		#size-cells = <0>;
-
-		cpu@0 {
-			compatible = "qcom,krait";
-			reg = <0>;
-			next-level-cache = <&L2>;
-			clocks = <&clk_controller 0>;
-			clock-names = "cpu";
-			cpu-supply = <&cpu_supply0>;
-			operating-points-v2 = <&cpu_opp_table>;
-		};
-
-		cpu@1 {
-			compatible = "qcom,krait";
-			reg = <1>;
-			next-level-cache = <&L2>;
-			clocks = <&clk_controller 1>;
-			clock-names = "cpu";
-			cpu-supply = <&cpu_supply1>;
-			operating-points-v2 = <&cpu_opp_table>;
-		};
-
-		cpu@2 {
-			compatible = "qcom,krait";
-			reg = <2>;
-			next-level-cache = <&L2>;
-			clocks = <&clk_controller 2>;
-			clock-names = "cpu";
-			cpu-supply = <&cpu_supply2>;
-			operating-points-v2 = <&cpu_opp_table>;
-		};
-
-		cpu@3 {
-			compatible = "qcom,krait";
-			reg = <3>;
-			next-level-cache = <&L2>;
-			clocks = <&clk_controller 3>;
-			clock-names = "cpu";
-			cpu-supply = <&cpu_supply3>;
-			operating-points-v2 = <&cpu_opp_table>;
-		};
-	};
-
-	cpu_opp_table: opp_table {
-		compatible = "operating-points-v2";
-
-		/*
-		 * Missing opp-shared property means CPUs switch DVFS states
-		 * independently.
-		 */
-
-		opp-1000000000 {
-			opp-hz = /bits/ 64 <1000000000>;
-			opp-microvolt = <975000 970000 985000>;
-			opp-microamp = <70000>;
-			clock-latency-ns = <300000>;
-			opp-suspend;
-		};
-		opp-1100000000 {
-			opp-hz = /bits/ 64 <1100000000>;
-			opp-microvolt = <1000000 980000 1010000>;
-			opp-microamp = <80000>;
-			clock-latency-ns = <310000>;
-		};
-		opp-1200000000 {
-			opp-hz = /bits/ 64 <1200000000>;
-			opp-microvolt = <1025000>;
-			opp-microamp = <90000;
-			lock-latency-ns = <290000>;
-			turbo-mode;
-		};
-	};
-};
-
-Example 3: Dual-cluster, Dual-core per cluster. CPUs within a cluster switch
-DVFS state together.
-
-/ {
-	cpus {
-		#address-cells = <1>;
-		#size-cells = <0>;
-
-		cpu@0 {
-			compatible = "arm,cortex-a7";
-			reg = <0>;
-			next-level-cache = <&L2>;
-			clocks = <&clk_controller 0>;
-			clock-names = "cpu";
-			cpu-supply = <&cpu_supply0>;
-			operating-points-v2 = <&cluster0_opp>;
-		};
-
-		cpu@1 {
-			compatible = "arm,cortex-a7";
-			reg = <1>;
-			next-level-cache = <&L2>;
-			clocks = <&clk_controller 0>;
-			clock-names = "cpu";
-			cpu-supply = <&cpu_supply0>;
-			operating-points-v2 = <&cluster0_opp>;
-		};
-
-		cpu@100 {
-			compatible = "arm,cortex-a15";
-			reg = <100>;
-			next-level-cache = <&L2>;
-			clocks = <&clk_controller 1>;
-			clock-names = "cpu";
-			cpu-supply = <&cpu_supply1>;
-			operating-points-v2 = <&cluster1_opp>;
-		};
-
-		cpu@101 {
-			compatible = "arm,cortex-a15";
-			reg = <101>;
-			next-level-cache = <&L2>;
-			clocks = <&clk_controller 1>;
-			clock-names = "cpu";
-			cpu-supply = <&cpu_supply1>;
-			operating-points-v2 = <&cluster1_opp>;
-		};
-	};
-
-	cluster0_opp: opp_table0 {
-		compatible = "operating-points-v2";
-		opp-shared;
-
-		opp-1000000000 {
-			opp-hz = /bits/ 64 <1000000000>;
-			opp-microvolt = <975000 970000 985000>;
-			opp-microamp = <70000>;
-			clock-latency-ns = <300000>;
-			opp-suspend;
-		};
-		opp-1100000000 {
-			opp-hz = /bits/ 64 <1100000000>;
-			opp-microvolt = <1000000 980000 1010000>;
-			opp-microamp = <80000>;
-			clock-latency-ns = <310000>;
-		};
-		opp-1200000000 {
-			opp-hz = /bits/ 64 <1200000000>;
-			opp-microvolt = <1025000>;
-			opp-microamp = <90000>;
-			clock-latency-ns = <290000>;
-			turbo-mode;
-		};
-	};
-
-	cluster1_opp: opp_table1 {
-		compatible = "operating-points-v2";
-		opp-shared;
-
-		opp-1300000000 {
-			opp-hz = /bits/ 64 <1300000000>;
-			opp-microvolt = <1050000 1045000 1055000>;
-			opp-microamp = <95000>;
-			clock-latency-ns = <400000>;
-			opp-suspend;
-		};
-		opp-1400000000 {
-			opp-hz = /bits/ 64 <1400000000>;
-			opp-microvolt = <1075000>;
-			opp-microamp = <100000>;
-			clock-latency-ns = <400000>;
-		};
-		opp-1500000000 {
-			opp-hz = /bits/ 64 <1500000000>;
-			opp-microvolt = <1100000 1010000 1110000>;
-			opp-microamp = <95000>;
-			clock-latency-ns = <400000>;
-			turbo-mode;
-		};
-	};
-};
-
-Example 4: Handling multiple regulators
-
-/ {
-	cpus {
-		cpu@0 {
-			compatible = "vendor,cpu-type";
-			...
-
-			vcc0-supply = <&cpu_supply0>;
-			vcc1-supply = <&cpu_supply1>;
-			vcc2-supply = <&cpu_supply2>;
-			operating-points-v2 = <&cpu0_opp_table>;
-		};
-	};
-
-	cpu0_opp_table: opp_table0 {
-		compatible = "operating-points-v2";
-		opp-shared;
-
-		opp-1000000000 {
-			opp-hz = /bits/ 64 <1000000000>;
-			opp-microvolt = <970000>, /* Supply 0 */
-					<960000>, /* Supply 1 */
-					<960000>; /* Supply 2 */
-			opp-microamp =  <70000>,  /* Supply 0 */
-					<70000>,  /* Supply 1 */
-					<70000>;  /* Supply 2 */
-			clock-latency-ns = <300000>;
-		};
-
-		/* OR */
-
-		opp-1000000000 {
-			opp-hz = /bits/ 64 <1000000000>;
-			opp-microvolt = <975000 970000 985000>, /* Supply 0 */
-					<965000 960000 975000>, /* Supply 1 */
-					<965000 960000 975000>; /* Supply 2 */
-			opp-microamp =  <70000>,		/* Supply 0 */
-					<70000>,		/* Supply 1 */
-					<70000>;		/* Supply 2 */
-			clock-latency-ns = <300000>;
-		};
-
-		/* OR */
-
-		opp-1000000000 {
-			opp-hz = /bits/ 64 <1000000000>;
-			opp-microvolt = <975000 970000 985000>, /* Supply 0 */
-					<965000 960000 975000>, /* Supply 1 */
-					<965000 960000 975000>; /* Supply 2 */
-			opp-microamp =  <70000>,		/* Supply 0 */
-					<0>,			/* Supply 1 doesn't need this */
-					<70000>;		/* Supply 2 */
-			clock-latency-ns = <300000>;
-		};
-	};
-};
-
-Example 5: opp-supported-hw
-(example: three level hierarchy of versions: cuts, substrate and process)
-
-/ {
-	cpus {
-		cpu@0 {
-			compatible = "arm,cortex-a7";
-			...
-
-			cpu-supply = <&cpu_supply>
-			operating-points-v2 = <&cpu0_opp_table_slow>;
-		};
-	};
-
-	opp_table {
-		compatible = "operating-points-v2";
-		opp-shared;
-
-		opp-600000000 {
-			/*
-			 * Supports all substrate and process versions for 0xF
-			 * cuts, i.e. only first four cuts.
-			 */
-			opp-supported-hw = <0xF 0xFFFFFFFF 0xFFFFFFFF>
-			opp-hz = /bits/ 64 <600000000>;
-			...
-		};
-
-		opp-800000000 {
-			/*
-			 * Supports:
-			 * - cuts: only one, 6th cut (represented by 6th bit).
-			 * - substrate: supports 16 different substrate versions
-			 * - process: supports 9 different process versions
-			 */
-			opp-supported-hw = <0x20 0xff0000ff 0x0000f4f0>
-			opp-hz = /bits/ 64 <800000000>;
-			...
-		};
-
-		opp-900000000 {
-			/*
-			 * Supports:
-			 * - All cuts and substrate where process version is 0x2.
-			 * - All cuts and process where substrate version is 0x2.
-			 */
-			opp-supported-hw = <0xFFFFFFFF 0xFFFFFFFF 0x02>, <0xFFFFFFFF 0x01 0xFFFFFFFF>
-			opp-hz = /bits/ 64 <900000000>;
-			...
-		};
-	};
-};
-
-Example 6: opp-microvolt-<name>, opp-microamp-<name>:
-(example: device with two possible microvolt ranges: slow and fast)
-
-/ {
-	cpus {
-		cpu@0 {
-			compatible = "arm,cortex-a7";
-			...
-
-			operating-points-v2 = <&cpu0_opp_table>;
-		};
-	};
-
-	cpu0_opp_table: opp_table0 {
-		compatible = "operating-points-v2";
-		opp-shared;
-
-		opp-1000000000 {
-			opp-hz = /bits/ 64 <1000000000>;
-			opp-microvolt-slow = <915000 900000 925000>;
-			opp-microvolt-fast = <975000 970000 985000>;
-			opp-microamp-slow =  <70000>;
-			opp-microamp-fast =  <71000>;
-		};
-
-		opp-1200000000 {
-			opp-hz = /bits/ 64 <1200000000>;
-			opp-microvolt-slow = <915000 900000 925000>, /* Supply vcc0 */
-					      <925000 910000 935000>; /* Supply vcc1 */
-			opp-microvolt-fast = <975000 970000 985000>, /* Supply vcc0 */
-					     <965000 960000 975000>; /* Supply vcc1 */
-			opp-microamp =  <70000>; /* Will be used for both slow/fast */
-		};
-	};
-};
-
-Example 7: Single cluster Quad-core ARM cortex A53, OPP points from firmware,
-distinct clock controls but two sets of clock/voltage/current lines.
-
-/ {
-	cpus {
-		#address-cells = <2>;
-		#size-cells = <0>;
-
-		cpu@0 {
-			compatible = "arm,cortex-a53";
-			reg = <0x0 0x100>;
-			next-level-cache = <&A53_L2>;
-			clocks = <&dvfs_controller 0>;
-			operating-points-v2 = <&cpu_opp0_table>;
-		};
-		cpu@1 {
-			compatible = "arm,cortex-a53";
-			reg = <0x0 0x101>;
-			next-level-cache = <&A53_L2>;
-			clocks = <&dvfs_controller 1>;
-			operating-points-v2 = <&cpu_opp0_table>;
-		};
-		cpu@2 {
-			compatible = "arm,cortex-a53";
-			reg = <0x0 0x102>;
-			next-level-cache = <&A53_L2>;
-			clocks = <&dvfs_controller 2>;
-			operating-points-v2 = <&cpu_opp1_table>;
-		};
-		cpu@3 {
-			compatible = "arm,cortex-a53";
-			reg = <0x0 0x103>;
-			next-level-cache = <&A53_L2>;
-			clocks = <&dvfs_controller 3>;
-			operating-points-v2 = <&cpu_opp1_table>;
-		};
-
-	};
-
-	cpu_opp0_table: opp0_table {
-		compatible = "operating-points-v2";
-		opp-shared;
-	};
-
-	cpu_opp1_table: opp1_table {
-		compatible = "operating-points-v2";
-		opp-shared;
-	};
-};
diff --git a/Documentation/devicetree/bindings/opp/qcom-opp.txt b/Documentation/devicetree/bindings/opp/qcom-opp.txt
index 32eb0793c7e6..41d3e4ff2dc3 100644
--- a/Documentation/devicetree/bindings/opp/qcom-opp.txt
+++ b/Documentation/devicetree/bindings/opp/qcom-opp.txt
@@ -1,7 +1,7 @@
 Qualcomm OPP bindings to describe OPP nodes
 
 The bindings are based on top of the operating-points-v2 bindings
-described in Documentation/devicetree/bindings/opp/opp.txt
+described in Documentation/devicetree/bindings/opp/opp-v2-base.yaml
 Additional properties are described below.
 
 * OPP Table Node
diff --git a/Documentation/devicetree/bindings/opp/ti-omap5-opp-supply.txt b/Documentation/devicetree/bindings/opp/ti-omap5-opp-supply.txt
index 832346e489a3..b70d326117cd 100644
--- a/Documentation/devicetree/bindings/opp/ti-omap5-opp-supply.txt
+++ b/Documentation/devicetree/bindings/opp/ti-omap5-opp-supply.txt
@@ -13,7 +13,7 @@ regulators to the device that will undergo OPP transitions we can make use
 of the multi regulator binding that is part of the OPP core described here [1]
 to describe both regulators needed by the platform.
 
-[1] Documentation/devicetree/bindings/opp/opp.txt
+[1] Documentation/devicetree/bindings/opp/opp-v2.yaml
 
 Required Properties for Device Node:
 - vdd-supply: phandle to regulator controlling VDD supply
diff --git a/Documentation/devicetree/bindings/pci/intel,keembay-pcie-ep.yaml b/Documentation/devicetree/bindings/pci/intel,keembay-pcie-ep.yaml
new file mode 100644
index 000000000000..e87ff27526ff
--- /dev/null
+++ b/Documentation/devicetree/bindings/pci/intel,keembay-pcie-ep.yaml
@@ -0,0 +1,69 @@
+# SPDX-License-Identifier: (GPL-2.0 OR BSD-2-Clause)
+%YAML 1.2
+---
+$id: "http://devicetree.org/schemas/pci/intel,keembay-pcie-ep.yaml#"
+$schema: "http://devicetree.org/meta-schemas/core.yaml#"
+
+title: Intel Keem Bay PCIe controller Endpoint mode
+
+maintainers:
+  - Wan Ahmad Zainie <wan.ahmad.zainie.wan.mohamad@intel.com>
+  - Srikanth Thokala <srikanth.thokala@intel.com>
+
+properties:
+  compatible:
+    const: intel,keembay-pcie-ep
+
+  reg:
+    maxItems: 5
+
+  reg-names:
+    items:
+      - const: dbi
+      - const: dbi2
+      - const: atu
+      - const: addr_space
+      - const: apb
+
+  interrupts:
+    maxItems: 4
+
+  interrupt-names:
+    items:
+      - const: pcie
+      - const: pcie_ev
+      - const: pcie_err
+      - const: pcie_mem_access
+
+  num-lanes:
+    description: Number of lanes to use.
+    enum: [ 1, 2 ]
+
+required:
+  - compatible
+  - reg
+  - reg-names
+  - interrupts
+  - interrupt-names
+
+additionalProperties: false
+
+examples:
+  - |
+    #include <dt-bindings/interrupt-controller/arm-gic.h>
+    #include <dt-bindings/interrupt-controller/irq.h>
+    pcie-ep@37000000 {
+          compatible = "intel,keembay-pcie-ep";
+          reg = <0x37000000 0x00001000>,
+                <0x37100000 0x00001000>,
+                <0x37300000 0x00001000>,
+                <0x36000000 0x01000000>,
+                <0x37800000 0x00000200>;
+          reg-names = "dbi", "dbi2", "atu", "addr_space", "apb";
+          interrupts = <GIC_SPI 107 IRQ_TYPE_LEVEL_HIGH>,
+                       <GIC_SPI 108 IRQ_TYPE_EDGE_RISING>,
+                       <GIC_SPI 109 IRQ_TYPE_LEVEL_HIGH>,
+                       <GIC_SPI 110 IRQ_TYPE_LEVEL_HIGH>;
+          interrupt-names = "pcie", "pcie_ev", "pcie_err", "pcie_mem_access";
+          num-lanes = <2>;
+    };
diff --git a/Documentation/devicetree/bindings/pci/intel,keembay-pcie.yaml b/Documentation/devicetree/bindings/pci/intel,keembay-pcie.yaml
new file mode 100644
index 000000000000..ed4400c9ac09
--- /dev/null
+++ b/Documentation/devicetree/bindings/pci/intel,keembay-pcie.yaml
@@ -0,0 +1,97 @@
+# SPDX-License-Identifier: (GPL-2.0 OR BSD-2-Clause)
+%YAML 1.2
+---
+$id: "http://devicetree.org/schemas/pci/intel,keembay-pcie.yaml#"
+$schema: "http://devicetree.org/meta-schemas/core.yaml#"
+
+title: Intel Keem Bay PCIe controller Root Complex mode
+
+maintainers:
+  - Wan Ahmad Zainie <wan.ahmad.zainie.wan.mohamad@intel.com>
+  - Srikanth Thokala <srikanth.thokala@intel.com>
+
+allOf:
+  - $ref: /schemas/pci/pci-bus.yaml#
+
+properties:
+  compatible:
+    const: intel,keembay-pcie
+
+  ranges:
+    maxItems: 1
+
+  reset-gpios:
+    maxItems: 1
+
+  reg:
+    maxItems: 4
+
+  reg-names:
+    items:
+      - const: dbi
+      - const: atu
+      - const: config
+      - const: apb
+
+  clocks:
+    maxItems: 2
+
+  clock-names:
+    items:
+      - const: master
+      - const: aux
+
+  interrupts:
+    maxItems: 3
+
+  interrupt-names:
+    items:
+      - const: pcie
+      - const: pcie_ev
+      - const: pcie_err
+
+  num-lanes:
+    description: Number of lanes to use.
+    enum: [ 1, 2 ]
+
+required:
+  - compatible
+  - reg
+  - reg-names
+  - ranges
+  - clocks
+  - clock-names
+  - interrupts
+  - interrupt-names
+  - reset-gpios
+
+unevaluatedProperties: false
+
+examples:
+  - |
+    #include <dt-bindings/interrupt-controller/arm-gic.h>
+    #include <dt-bindings/interrupt-controller/irq.h>
+    #include <dt-bindings/gpio/gpio.h>
+    #define KEEM_BAY_A53_PCIE
+    #define KEEM_BAY_A53_AUX_PCIE
+    pcie@37000000 {
+          compatible = "intel,keembay-pcie";
+          reg = <0x37000000 0x00001000>,
+                <0x37300000 0x00001000>,
+                <0x36e00000 0x00200000>,
+                <0x37800000 0x00000200>;
+          reg-names = "dbi", "atu", "config", "apb";
+          #address-cells = <3>;
+          #size-cells = <2>;
+          device_type = "pci";
+          ranges = <0x02000000 0 0x36000000 0x36000000 0 0x00e00000>;
+          interrupts = <GIC_SPI 107 IRQ_TYPE_LEVEL_HIGH>,
+                       <GIC_SPI 108 IRQ_TYPE_LEVEL_HIGH>,
+                       <GIC_SPI 109 IRQ_TYPE_LEVEL_HIGH>;
+          interrupt-names = "pcie", "pcie_ev", "pcie_err";
+          clocks = <&scmi_clk KEEM_BAY_A53_PCIE>,
+                   <&scmi_clk KEEM_BAY_A53_AUX_PCIE>;
+          clock-names = "master", "aux";
+          reset-gpios = <&pca2 9 GPIO_ACTIVE_LOW>;
+          num-lanes = <2>;
+    };
diff --git a/Documentation/devicetree/bindings/pci/mediatek-pcie-cfg.yaml b/Documentation/devicetree/bindings/pci/mediatek-pcie-cfg.yaml
new file mode 100644
index 000000000000..841a3d284bbf
--- /dev/null
+++ b/Documentation/devicetree/bindings/pci/mediatek-pcie-cfg.yaml
@@ -0,0 +1,39 @@
+# SPDX-License-Identifier: GPL-2.0-only OR BSD-2-Clause
+%YAML 1.2
+---
+$id: http://devicetree.org/schemas/pci/mediatek-pcie-cfg.yaml#
+$schema: http://devicetree.org/meta-schemas/core.yaml#
+
+title: MediaTek PCIECFG controller
+
+maintainers:
+  - Chuanjia Liu <chuanjia.liu@mediatek.com>
+  - Jianjun Wang <jianjun.wang@mediatek.com>
+
+description: |
+  The MediaTek PCIECFG controller controls some feature about
+  LTSSM, ASPM and so on.
+
+properties:
+  compatible:
+    items:
+      - enum:
+          - mediatek,generic-pciecfg
+      - const: syscon
+
+  reg:
+    maxItems: 1
+
+required:
+  - compatible
+  - reg
+
+additionalProperties: false
+
+examples:
+  - |
+    pciecfg: pciecfg@1a140000 {
+        compatible = "mediatek,generic-pciecfg", "syscon";
+        reg = <0x1a140000 0x1000>;
+    };
+...
diff --git a/Documentation/devicetree/bindings/pci/mediatek-pcie.txt b/Documentation/devicetree/bindings/pci/mediatek-pcie.txt
index 7468d666763a..57ae73462272 100644
--- a/Documentation/devicetree/bindings/pci/mediatek-pcie.txt
+++ b/Documentation/devicetree/bindings/pci/mediatek-pcie.txt
@@ -8,7 +8,7 @@ Required properties:
 	"mediatek,mt7623-pcie"
 	"mediatek,mt7629-pcie"
 - device_type: Must be "pci"
-- reg: Base addresses and lengths of the PCIe subsys and root ports.
+- reg: Base addresses and lengths of the root ports.
 - reg-names: Names of the above areas to use during resource lookup.
 - #address-cells: Address representation for root ports (must be 3)
 - #size-cells: Size representation for root ports (must be 2)
@@ -47,9 +47,12 @@ Required properties for MT7623/MT2701:
 - reset-names: Must be "pcie-rst0", "pcie-rst1", "pcie-rstN".. based on the
   number of root ports.
 
-Required properties for MT2712/MT7622:
+Required properties for MT2712/MT7622/MT7629:
 -interrupts: A list of interrupt outputs of the controller, must have one
 	     entry for each PCIe port
+- interrupt-names: Must include the following entries:
+	- "pcie_irq": The interrupt that is asserted when an MSI/INTX is received
+- linux,pci-domain: PCI domain ID. Should be unique for each host controller
 
 In addition, the device tree node must have sub-nodes describing each
 PCIe port interface, having the following mandatory properties:
@@ -143,130 +146,143 @@ Examples for MT7623:
 
 Examples for MT2712:
 
-	pcie: pcie@11700000 {
+	pcie1: pcie@112ff000 {
 		compatible = "mediatek,mt2712-pcie";
 		device_type = "pci";
-		reg = <0 0x11700000 0 0x1000>,
-		      <0 0x112ff000 0 0x1000>;
-		reg-names = "port0", "port1";
+		reg = <0 0x112ff000 0 0x1000>;
+		reg-names = "port1";
+		linux,pci-domain = <1>;
 		#address-cells = <3>;
 		#size-cells = <2>;
-		interrupts = <GIC_SPI 115 IRQ_TYPE_LEVEL_HIGH>,
-			     <GIC_SPI 117 IRQ_TYPE_LEVEL_HIGH>;
-		clocks = <&topckgen CLK_TOP_PE2_MAC_P0_SEL>,
-			 <&topckgen CLK_TOP_PE2_MAC_P1_SEL>,
-			 <&pericfg CLK_PERI_PCIE0>,
+		interrupts = <GIC_SPI 117 IRQ_TYPE_LEVEL_HIGH>;
+		interrupt-names = "pcie_irq";
+		clocks = <&topckgen CLK_TOP_PE2_MAC_P1_SEL>,
 			 <&pericfg CLK_PERI_PCIE1>;
-		clock-names = "sys_ck0", "sys_ck1", "ahb_ck0", "ahb_ck1";
-		phys = <&pcie0_phy PHY_TYPE_PCIE>, <&pcie1_phy PHY_TYPE_PCIE>;
-		phy-names = "pcie-phy0", "pcie-phy1";
+		clock-names = "sys_ck1", "ahb_ck1";
+		phys = <&u3port1 PHY_TYPE_PCIE>;
+		phy-names = "pcie-phy1";
 		bus-range = <0x00 0xff>;
-		ranges = <0x82000000 0 0x20000000  0x0 0x20000000  0 0x10000000>;
+		ranges = <0x82000000 0 0x11400000  0x0 0x11400000  0 0x300000>;
+		status = "disabled";
 
-		pcie0: pcie@0,0 {
-			reg = <0x0000 0 0 0 0>;
-			#address-cells = <3>;
-			#size-cells = <2>;
+		#interrupt-cells = <1>;
+		interrupt-map-mask = <0 0 0 7>;
+		interrupt-map = <0 0 0 1 &pcie_intc1 0>,
+				<0 0 0 2 &pcie_intc1 1>,
+				<0 0 0 3 &pcie_intc1 2>,
+				<0 0 0 4 &pcie_intc1 3>;
+		pcie_intc1: interrupt-controller {
+			interrupt-controller;
+			#address-cells = <0>;
 			#interrupt-cells = <1>;
-			ranges;
-			interrupt-map-mask = <0 0 0 7>;
-			interrupt-map = <0 0 0 1 &pcie_intc0 0>,
-					<0 0 0 2 &pcie_intc0 1>,
-					<0 0 0 3 &pcie_intc0 2>,
-					<0 0 0 4 &pcie_intc0 3>;
-			pcie_intc0: interrupt-controller {
-				interrupt-controller;
-				#address-cells = <0>;
-				#interrupt-cells = <1>;
-			};
 		};
+	};
 
-		pcie1: pcie@1,0 {
-			reg = <0x0800 0 0 0 0>;
-			#address-cells = <3>;
-			#size-cells = <2>;
+	pcie0: pcie@11700000 {
+		compatible = "mediatek,mt2712-pcie";
+		device_type = "pci";
+		reg = <0 0x11700000 0 0x1000>;
+		reg-names = "port0";
+		linux,pci-domain = <0>;
+		#address-cells = <3>;
+		#size-cells = <2>;
+		interrupts = <GIC_SPI 115 IRQ_TYPE_LEVEL_HIGH>;
+		interrupt-names = "pcie_irq";
+		clocks = <&topckgen CLK_TOP_PE2_MAC_P0_SEL>,
+			 <&pericfg CLK_PERI_PCIE0>;
+		clock-names = "sys_ck0", "ahb_ck0";
+		phys = <&u3port0 PHY_TYPE_PCIE>;
+		phy-names = "pcie-phy0";
+		bus-range = <0x00 0xff>;
+		ranges = <0x82000000 0 0x20000000 0x0 0x20000000 0 0x10000000>;
+		status = "disabled";
+
+		#interrupt-cells = <1>;
+		interrupt-map-mask = <0 0 0 7>;
+		interrupt-map = <0 0 0 1 &pcie_intc0 0>,
+				<0 0 0 2 &pcie_intc0 1>,
+				<0 0 0 3 &pcie_intc0 2>,
+				<0 0 0 4 &pcie_intc0 3>;
+		pcie_intc0: interrupt-controller {
+			interrupt-controller;
+			#address-cells = <0>;
 			#interrupt-cells = <1>;
-			ranges;
-			interrupt-map-mask = <0 0 0 7>;
-			interrupt-map = <0 0 0 1 &pcie_intc1 0>,
-					<0 0 0 2 &pcie_intc1 1>,
-					<0 0 0 3 &pcie_intc1 2>,
-					<0 0 0 4 &pcie_intc1 3>;
-			pcie_intc1: interrupt-controller {
-				interrupt-controller;
-				#address-cells = <0>;
-				#interrupt-cells = <1>;
-			};
 		};
 	};
 
 Examples for MT7622:
 
-	pcie: pcie@1a140000 {
+	pcie0: pcie@1a143000 {
 		compatible = "mediatek,mt7622-pcie";
 		device_type = "pci";
-		reg = <0 0x1a140000 0 0x1000>,
-		      <0 0x1a143000 0 0x1000>,
-		      <0 0x1a145000 0 0x1000>;
-		reg-names = "subsys", "port0", "port1";
+		reg = <0 0x1a143000 0 0x1000>;
+		reg-names = "port0";
+		linux,pci-domain = <0>;
 		#address-cells = <3>;
 		#size-cells = <2>;
-		interrupts = <GIC_SPI 228 IRQ_TYPE_LEVEL_LOW>,
-			     <GIC_SPI 229 IRQ_TYPE_LEVEL_LOW>;
+		interrupts = <GIC_SPI 228 IRQ_TYPE_LEVEL_LOW>;
+		interrupt-names = "pcie_irq";
 		clocks = <&pciesys CLK_PCIE_P0_MAC_EN>,
-			 <&pciesys CLK_PCIE_P1_MAC_EN>,
 			 <&pciesys CLK_PCIE_P0_AHB_EN>,
-			 <&pciesys CLK_PCIE_P1_AHB_EN>,
 			 <&pciesys CLK_PCIE_P0_AUX_EN>,
-			 <&pciesys CLK_PCIE_P1_AUX_EN>,
 			 <&pciesys CLK_PCIE_P0_AXI_EN>,
-			 <&pciesys CLK_PCIE_P1_AXI_EN>,
 			 <&pciesys CLK_PCIE_P0_OBFF_EN>,
-			 <&pciesys CLK_PCIE_P1_OBFF_EN>,
-			 <&pciesys CLK_PCIE_P0_PIPE_EN>,
-			 <&pciesys CLK_PCIE_P1_PIPE_EN>;
-		clock-names = "sys_ck0", "sys_ck1", "ahb_ck0", "ahb_ck1",
-			      "aux_ck0", "aux_ck1", "axi_ck0", "axi_ck1",
-			      "obff_ck0", "obff_ck1", "pipe_ck0", "pipe_ck1";
-		phys = <&pcie0_phy PHY_TYPE_PCIE>, <&pcie1_phy PHY_TYPE_PCIE>;
-		phy-names = "pcie-phy0", "pcie-phy1";
+			 <&pciesys CLK_PCIE_P0_PIPE_EN>;
+		clock-names = "sys_ck0", "ahb_ck0", "aux_ck0",
+			      "axi_ck0", "obff_ck0", "pipe_ck0";
+
 		power-domains = <&scpsys MT7622_POWER_DOMAIN_HIF0>;
 		bus-range = <0x00 0xff>;
-		ranges = <0x82000000 0 0x20000000  0x0 0x20000000  0 0x10000000>;
+		ranges = <0x82000000 0 0x20000000  0x0 0x20000000  0 0x8000000>;
+		status = "disabled";
 
-		pcie0: pcie@0,0 {
-			reg = <0x0000 0 0 0 0>;
-			#address-cells = <3>;
-			#size-cells = <2>;
+		#interrupt-cells = <1>;
+		interrupt-map-mask = <0 0 0 7>;
+		interrupt-map = <0 0 0 1 &pcie_intc0 0>,
+				<0 0 0 2 &pcie_intc0 1>,
+				<0 0 0 3 &pcie_intc0 2>,
+				<0 0 0 4 &pcie_intc0 3>;
+		pcie_intc0: interrupt-controller {
+			interrupt-controller;
+			#address-cells = <0>;
 			#interrupt-cells = <1>;
-			ranges;
-			interrupt-map-mask = <0 0 0 7>;
-			interrupt-map = <0 0 0 1 &pcie_intc0 0>,
-					<0 0 0 2 &pcie_intc0 1>,
-					<0 0 0 3 &pcie_intc0 2>,
-					<0 0 0 4 &pcie_intc0 3>;
-			pcie_intc0: interrupt-controller {
-				interrupt-controller;
-				#address-cells = <0>;
-				#interrupt-cells = <1>;
-			};
 		};
+	};
 
-		pcie1: pcie@1,0 {
-			reg = <0x0800 0 0 0 0>;
-			#address-cells = <3>;
-			#size-cells = <2>;
+	pcie1: pcie@1a145000 {
+		compatible = "mediatek,mt7622-pcie";
+		device_type = "pci";
+		reg = <0 0x1a145000 0 0x1000>;
+		reg-names = "port1";
+		linux,pci-domain = <1>;
+		#address-cells = <3>;
+		#size-cells = <2>;
+		interrupts = <GIC_SPI 229 IRQ_TYPE_LEVEL_LOW>;
+		interrupt-names = "pcie_irq";
+		clocks = <&pciesys CLK_PCIE_P1_MAC_EN>,
+			 /* designer has connect RC1 with p0_ahb clock */
+			 <&pciesys CLK_PCIE_P0_AHB_EN>,
+			 <&pciesys CLK_PCIE_P1_AUX_EN>,
+			 <&pciesys CLK_PCIE_P1_AXI_EN>,
+			 <&pciesys CLK_PCIE_P1_OBFF_EN>,
+			 <&pciesys CLK_PCIE_P1_PIPE_EN>;
+		clock-names = "sys_ck1", "ahb_ck1", "aux_ck1",
+			      "axi_ck1", "obff_ck1", "pipe_ck1";
+
+		power-domains = <&scpsys MT7622_POWER_DOMAIN_HIF0>;
+		bus-range = <0x00 0xff>;
+		ranges = <0x82000000 0 0x28000000  0x0 0x28000000  0 0x8000000>;
+		status = "disabled";
+
+		#interrupt-cells = <1>;
+		interrupt-map-mask = <0 0 0 7>;
+		interrupt-map = <0 0 0 1 &pcie_intc1 0>,
+				<0 0 0 2 &pcie_intc1 1>,
+				<0 0 0 3 &pcie_intc1 2>,
+				<0 0 0 4 &pcie_intc1 3>;
+		pcie_intc1: interrupt-controller {
+			interrupt-controller;
+			#address-cells = <0>;
 			#interrupt-cells = <1>;
-			ranges;
-			interrupt-map-mask = <0 0 0 7>;
-			interrupt-map = <0 0 0 1 &pcie_intc1 0>,
-					<0 0 0 2 &pcie_intc1 1>,
-					<0 0 0 3 &pcie_intc1 2>,
-					<0 0 0 4 &pcie_intc1 3>;
-			pcie_intc1: interrupt-controller {
-				interrupt-controller;
-				#address-cells = <0>;
-				#interrupt-cells = <1>;
-			};
 		};
 	};
diff --git a/Documentation/devicetree/bindings/pci/pci-ep.yaml b/Documentation/devicetree/bindings/pci/pci-ep.yaml
index 7847bbcd4a03..ccec51ab5247 100644
--- a/Documentation/devicetree/bindings/pci/pci-ep.yaml
+++ b/Documentation/devicetree/bindings/pci/pci-ep.yaml
@@ -23,6 +23,13 @@ properties:
     default: 1
     maximum: 255
 
+  max-virtual-functions:
+    description: Array representing the number of virtual functions corresponding to each physical
+      function
+    $ref: /schemas/types.yaml#/definitions/uint8-array
+    minItems: 1
+    maxItems: 255
+
   max-link-speed:
     $ref: /schemas/types.yaml#/definitions/uint32
     enum: [ 1, 2, 3, 4 ]
diff --git a/Documentation/devicetree/bindings/pci/xilinx-nwl-pcie.txt b/Documentation/devicetree/bindings/pci/xilinx-nwl-pcie.txt
index 2d677e90a7e2..f56f8c58c5d9 100644
--- a/Documentation/devicetree/bindings/pci/xilinx-nwl-pcie.txt
+++ b/Documentation/devicetree/bindings/pci/xilinx-nwl-pcie.txt
@@ -35,6 +35,7 @@ Required properties:
 
 Optional properties:
 - dma-coherent: present if DMA operations are coherent
+- clocks: Input clock specifier. Refer to common clock bindings
 
 Example:
 ++++++++
diff --git a/Documentation/devicetree/bindings/pinctrl/fsl,imx8ulp-pinctrl.yaml b/Documentation/devicetree/bindings/pinctrl/fsl,imx8ulp-pinctrl.yaml
new file mode 100644
index 000000000000..86622c4f374b
--- /dev/null
+++ b/Documentation/devicetree/bindings/pinctrl/fsl,imx8ulp-pinctrl.yaml
@@ -0,0 +1,79 @@
+# SPDX-License-Identifier: GPL-2.0
+%YAML 1.2
+---
+$id: http://devicetree.org/schemas/pinctrl/fsl,imx8ulp-pinctrl.yaml#
+$schema: http://devicetree.org/meta-schemas/core.yaml#
+
+title: Freescale IMX8ULP IOMUX Controller
+
+maintainers:
+  - Jacky Bai <ping.bai@nxp.com>
+
+description:
+  Please refer to fsl,imx-pinctrl.txt and pinctrl-bindings.txt in this directory
+  for common binding part and usage.
+
+properties:
+  compatible:
+    const: fsl,imx8ulp-iomuxc1
+
+  reg:
+    maxItems: 1
+
+# Client device subnode's properties
+patternProperties:
+  'grp$':
+    type: object
+    description:
+      Pinctrl node's client devices use subnodes for desired pin configuration.
+      Client device subnodes use below standard properties.
+
+    properties:
+      fsl,pins:
+        description:
+          each entry consists of 5 integers and represents the mux and config
+          setting for one pin. The first 4 integers <mux_config_reg input_reg
+          mux_mode input_val> are specified using a PIN_FUNC_ID macro, which can
+          be found in <arch/arm64/boot/dts/freescale/imx8ulp-pinfunc.h>. The last
+          integer CONFIG is the pad setting value like pull-up on this pin. Please
+          refer to i.MX8ULP Reference Manual for detailed CONFIG settings.
+        $ref: /schemas/types.yaml#/definitions/uint32-matrix
+        items:
+          items:
+            - description: |
+                "mux_config_reg" indicates the offset of mux register.
+            - description: |
+                "input_reg" indicates the offset of select input register.
+            - description: |
+                "mux_mode" indicates the mux value to be applied.
+            - description: |
+                "input_val" indicates the select input value to be applied.
+            - description: |
+                "pad_setting" indicates the pad configuration value to be applied.
+
+    required:
+      - fsl,pins
+
+    additionalProperties: false
+
+required:
+  - compatible
+  - reg
+
+additionalProperties: false
+
+examples:
+  # Pinmux controller node
+  - |
+    iomuxc: pinctrl@298c0000 {
+        compatible = "fsl,imx8ulp-iomuxc1";
+        reg = <0x298c0000 0x10000>;
+
+        pinctrl_lpuart5: lpuart5grp {
+            fsl,pins =
+                <0x0138 0x08F0 0x4 0x3	0x3>,
+                <0x013C 0x08EC 0x4 0x3	0x3>;
+        };
+    };
+
+...
diff --git a/Documentation/devicetree/bindings/pinctrl/ingenic,pinctrl.yaml b/Documentation/devicetree/bindings/pinctrl/ingenic,pinctrl.yaml
index a4846d78111c..a12d0ceb7637 100644
--- a/Documentation/devicetree/bindings/pinctrl/ingenic,pinctrl.yaml
+++ b/Documentation/devicetree/bindings/pinctrl/ingenic,pinctrl.yaml
@@ -19,10 +19,10 @@ description: >
   pin within that GPIO port. For example PA0 is the first pin in GPIO port A,
   and PB31 is the last pin in GPIO port B. The JZ4730, the JZ4740, the JZ4725B,
   the X1000 and the X1830 contains 4 GPIO ports, PA to PD, for a total of 128
-  pins. The X2000 contains 5 GPIO ports, PA to PE, for a total of 160 pins.
-  The JZ4750, the JZ4755 the JZ4760, the JZ4770 and the JZ4780 contains 6 GPIO
-  ports, PA to PF, for a total of 192 pins. The JZ4775 contains 7 GPIO ports,
-  PA to PG, for a total of 224 pins.
+  pins. The X2000 and the X2100 contains 5 GPIO ports, PA to PE, for a total of
+  160 pins. The JZ4750, the JZ4755 the JZ4760, the JZ4770 and the JZ4780 contains
+  6 GPIO ports, PA to PF, for a total of 192 pins. The JZ4775 contains 7 GPIO
+  ports, PA to PG, for a total of 224 pins.
 
 maintainers:
   - Paul Cercueil <paul@crapouillou.net>
@@ -47,6 +47,7 @@ properties:
           - ingenic,x1500-pinctrl
           - ingenic,x1830-pinctrl
           - ingenic,x2000-pinctrl
+          - ingenic,x2100-pinctrl
       - items:
           - const: ingenic,jz4760b-pinctrl
           - const: ingenic,jz4760-pinctrl
@@ -85,6 +86,7 @@ patternProperties:
           - ingenic,x1500-gpio
           - ingenic,x1830-gpio
           - ingenic,x2000-gpio
+          - ingenic,x2100-gpio
 
       reg:
         items:
diff --git a/Documentation/devicetree/bindings/pinctrl/intel,pinctrl-keembay.yaml b/Documentation/devicetree/bindings/pinctrl/intel,pinctrl-keembay.yaml
new file mode 100644
index 000000000000..5e99d79499b4
--- /dev/null
+++ b/Documentation/devicetree/bindings/pinctrl/intel,pinctrl-keembay.yaml
@@ -0,0 +1,135 @@
+# SPDX-License-Identifier: (GPL-2.0 OR BSD-2-Clause)
+%YAML 1.2
+---
+$id: http://devicetree.org/schemas/pinctrl/intel,pinctrl-keembay.yaml#
+$schema: http://devicetree.org/meta-schemas/core.yaml#
+
+title: Intel Keem Bay pin controller Device Tree Bindings
+
+maintainers:
+  - Lakshmi Sowjanya D <lakshmi.sowjanya.d@intel.com>
+
+description: |
+  Intel Keem Bay SoC integrates a pin controller which enables control
+  of pin directions, input/output values and configuration
+  for a total of 80 pins.
+
+properties:
+  compatible:
+    const: intel,keembay-pinctrl
+
+  reg:
+    maxItems: 2
+
+  gpio-controller: true
+
+  '#gpio-cells':
+    const: 2
+
+  ngpios:
+    description: The number of GPIOs exposed.
+    const: 80
+
+  interrupts:
+    description:
+      Specifies the interrupt lines to be used by the controller.
+      Each interrupt line is shared by upto 4 GPIO lines.
+    maxItems: 8
+
+  interrupt-controller: true
+
+  '#interrupt-cells':
+    const: 2
+
+patternProperties:
+  '^gpio@[0-9a-f]*$':
+    type: object
+
+    description:
+      Child nodes can be specified to contain pin configuration information,
+      which can then be utilized by pinctrl client devices.
+      The following properties are supported.
+
+    properties:
+      pins:
+        description: |
+          The name(s) of the pins to be configured in the child node.
+          Supported pin names are "GPIO0" up to "GPIO79".
+
+      bias-disable: true
+
+      bias-pull-down: true
+
+      bias-pull-up: true
+
+      drive-strength:
+        description: IO pads drive strength in milli Ampere.
+        enum: [2, 4, 8, 12]
+
+      bias-bus-hold:
+        type: boolean
+
+      input-schmitt-enable:
+        type: boolean
+
+      slew-rate:
+        description: GPIO slew rate control.
+                      0 - Fast(~100MHz)
+                      1 - Slow(~50MHz)
+        enum: [0, 1]
+
+additionalProperties: false
+
+required:
+  - compatible
+  - reg
+  - gpio-controller
+  - ngpios
+  - '#gpio-cells'
+  - interrupts
+  - interrupt-controller
+  - '#interrupt-cells'
+
+examples:
+  - |
+    #include <dt-bindings/interrupt-controller/arm-gic.h>
+    #include <dt-bindings/interrupt-controller/irq.h>
+    // Example 1
+    gpio@0 {
+        compatible = "intel,keembay-pinctrl";
+        reg = <0x600b0000 0x88>,
+              <0x600b0190 0x1ac>;
+        gpio-controller;
+        ngpios = <0x50>;
+        #gpio-cells = <0x2>;
+        interrupts = <GIC_SPI 94 IRQ_TYPE_LEVEL_HIGH>,
+                     <GIC_SPI 95 IRQ_TYPE_LEVEL_HIGH>,
+                     <GIC_SPI 96 IRQ_TYPE_LEVEL_HIGH>,
+                     <GIC_SPI 97 IRQ_TYPE_LEVEL_HIGH>,
+                     <GIC_SPI 98 IRQ_TYPE_LEVEL_HIGH>,
+                     <GIC_SPI 99 IRQ_TYPE_LEVEL_HIGH>,
+                     <GIC_SPI 100 IRQ_TYPE_LEVEL_HIGH>,
+                     <GIC_SPI 101 IRQ_TYPE_LEVEL_HIGH>;
+        interrupt-controller;
+        #interrupt-cells = <2>;
+    };
+
+    // Example 2
+    gpio@1 {
+        compatible = "intel,keembay-pinctrl";
+        reg = <0x600c0000 0x88>,
+              <0x600c0190 0x1ac>;
+        gpio-controller;
+        ngpios = <0x50>;
+        #gpio-cells = <0x2>;
+        interrupts = <GIC_SPI 94 IRQ_TYPE_LEVEL_HIGH>,
+                     <GIC_SPI 95 IRQ_TYPE_LEVEL_HIGH>,
+                     <GIC_SPI 96 IRQ_TYPE_LEVEL_HIGH>,
+                     <GIC_SPI 97 IRQ_TYPE_LEVEL_HIGH>,
+                     <GIC_SPI 98 IRQ_TYPE_LEVEL_HIGH>,
+                     <GIC_SPI 99 IRQ_TYPE_LEVEL_HIGH>,
+                     <GIC_SPI 100 IRQ_TYPE_LEVEL_HIGH>,
+                     <GIC_SPI 101 IRQ_TYPE_LEVEL_HIGH>;
+        interrupt-controller;
+        #interrupt-cells = <2>;
+    };
diff --git a/Documentation/devicetree/bindings/pinctrl/marvell,armada-37xx-pinctrl.txt b/Documentation/devicetree/bindings/pinctrl/marvell,armada-37xx-pinctrl.txt
index 38dc56a57760..ecec514b3155 100644
--- a/Documentation/devicetree/bindings/pinctrl/marvell,armada-37xx-pinctrl.txt
+++ b/Documentation/devicetree/bindings/pinctrl/marvell,armada-37xx-pinctrl.txt
@@ -43,19 +43,19 @@ group emmc_nb
 
 group pwm0
  - pin 11 (GPIO1-11)
- - functions pwm, gpio
+ - functions pwm, led, gpio
 
 group pwm1
  - pin 12
- - functions pwm, gpio
+ - functions pwm, led, gpio
 
 group pwm2
  - pin 13
- - functions pwm, gpio
+ - functions pwm, led, gpio
 
 group pwm3
  - pin 14
- - functions pwm, gpio
+ - functions pwm, led, gpio
 
 group pmic1
  - pin 7
diff --git a/Documentation/devicetree/bindings/pinctrl/mediatek,mt65xx-pinctrl.yaml b/Documentation/devicetree/bindings/pinctrl/mediatek,mt65xx-pinctrl.yaml
new file mode 100644
index 000000000000..f8e6e138dc13
--- /dev/null
+++ b/Documentation/devicetree/bindings/pinctrl/mediatek,mt65xx-pinctrl.yaml
@@ -0,0 +1,206 @@
+# SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause)
+%YAML 1.2
+---
+$id: http://devicetree.org/schemas/pinctrl/mediatek,mt65xx-pinctrl.yaml#
+$schema: http://devicetree.org/meta-schemas/core.yaml#
+
+title: Mediatek MT65xx Pin Controller Device Tree Bindings
+
+maintainers:
+  - Sean Wang <sean.wang@kernel.org>
+
+description: |+
+  The Mediatek's Pin controller is used to control SoC pins.
+
+properties:
+  compatible:
+    enum:
+      - mediatek,mt2701-pinctrl
+      - mediatek,mt2712-pinctrl
+      - mediatek,mt6397-pinctrl
+      - mediatek,mt7623-pinctrl
+      - mediatek,mt8127-pinctrl
+      - mediatek,mt8135-pinctrl
+      - mediatek,mt8167-pinctrl
+      - mediatek,mt8173-pinctrl
+      - mediatek,mt8516-pinctrl
+
+  reg:
+    maxItems: 1
+
+  pins-are-numbered:
+    $ref: /schemas/types.yaml#/definitions/flag
+    description: |
+      Specify the subnodes are using numbered pinmux to specify pins.
+
+  gpio-controller: true
+
+  "#gpio-cells":
+    const: 2
+    description: |
+      Number of cells in GPIO specifier. Since the generic GPIO
+      binding is used, the amount of cells must be specified as 2. See the below
+      mentioned gpio binding representation for description of particular cells.
+
+  mediatek,pctl-regmap:
+    $ref: /schemas/types.yaml#/definitions/phandle-array
+    minItems: 1
+    maxItems: 2
+    description: |
+      Should be phandles of the syscfg node.
+
+  interrupt-controller: true
+
+  interrupts:
+    minItems: 1
+    maxItems: 3
+
+  "#interrupt-cells":
+    const: 2
+
+required:
+  - compatible
+  - pins-are-numbered
+  - gpio-controller
+  - "#gpio-cells"
+
+patternProperties:
+  '-[0-9]+$':
+    type: object
+    additionalProperties: false
+    patternProperties:
+      'pins':
+        type: object
+        additionalProperties: false
+        description: |
+          A pinctrl node should contain at least one subnodes representing the
+          pinctrl groups available on the machine. Each subnode will list the
+          pins it needs, and how they should be configured, with regard to muxer
+          configuration, pullups, drive strength, input enable/disable and input
+          schmitt.
+        $ref: "/schemas/pinctrl/pincfg-node.yaml"
+
+        properties:
+          pinmux:
+            description:
+              integer array, represents gpio pin number and mux setting.
+              Supported pin number and mux varies for different SoCs, and are
+              defined as macros in <soc>-pinfunc.h directly.
+
+          bias-disable: true
+
+          bias-pull-up:
+            description: |
+              Besides generic pinconfig options, it can be used as the pull up
+              settings for 2 pull resistors, R0 and R1. User can configure those
+              special pins. Some macros have been defined for this usage, such
+              as MTK_PUPD_SET_R1R0_00. See dt-bindings/pinctrl/mt65xx.h for
+              valid arguments.
+
+          bias-pull-down: true
+
+          input-enable: true
+
+          input-disable: true
+
+          output-low: true
+
+          output-high: true
+
+          input-schmitt-enable: true
+
+          input-schmitt-disable: true
+
+          drive-strength:
+            description: |
+              Can support some arguments, such as MTK_DRIVE_4mA, MTK_DRIVE_6mA,
+              etc. See dt-bindings/pinctrl/mt65xx.h for valid arguments.
+
+        required:
+          - pinmux
+
+additionalProperties: false
+
+examples:
+  - |
+    #include <dt-bindings/interrupt-controller/irq.h>
+    #include <dt-bindings/interrupt-controller/arm-gic.h>
+    #include <dt-bindings/pinctrl/mt8135-pinfunc.h>
+
+    soc {
+        #address-cells = <2>;
+        #size-cells = <2>;
+
+        syscfg_pctl_a: syscfg-pctl-a@10005000 {
+          compatible = "mediatek,mt8135-pctl-a-syscfg", "syscon";
+          reg = <0 0x10005000 0 0x1000>;
+        };
+
+        syscfg_pctl_b: syscfg-pctl-b@1020c020 {
+          compatible = "mediatek,mt8135-pctl-b-syscfg", "syscon";
+          reg = <0 0x1020C020 0 0x1000>;
+        };
+
+        pinctrl@1c20800 {
+          compatible = "mediatek,mt8135-pinctrl";
+          reg = <0 0x1000B000 0 0x1000>;
+          mediatek,pctl-regmap = <&syscfg_pctl_a>, <&syscfg_pctl_b>;
+          pins-are-numbered;
+          gpio-controller;
+          #gpio-cells = <2>;
+          interrupt-controller;
+          #interrupt-cells = <2>;
+          interrupts = <GIC_SPI 116 IRQ_TYPE_LEVEL_HIGH>,
+              <GIC_SPI 117 IRQ_TYPE_LEVEL_HIGH>,
+              <GIC_SPI 118 IRQ_TYPE_LEVEL_HIGH>;
+
+          i2c0_pins_a: i2c0-0 {
+            pins1 {
+              pinmux = <MT8135_PIN_100_SDA0__FUNC_SDA0>,
+                <MT8135_PIN_101_SCL0__FUNC_SCL0>;
+              bias-disable;
+            };
+          };
+
+          i2c1_pins_a: i2c1-0 {
+            pins {
+              pinmux = <MT8135_PIN_195_SDA1__FUNC_SDA1>,
+                <MT8135_PIN_196_SCL1__FUNC_SCL1>;
+              bias-pull-up = <MTK_PUPD_SET_R1R0_01>;
+            };
+          };
+
+          i2c2_pins_a: i2c2-0 {
+            pins1 {
+              pinmux = <MT8135_PIN_193_SDA2__FUNC_SDA2>;
+              bias-pull-down;
+            };
+
+            pins2 {
+              pinmux = <MT8135_PIN_49_WATCHDOG__FUNC_GPIO49>;
+              bias-pull-up;
+            };
+          };
+
+          i2c3_pins_a: i2c3-0 {
+            pins1 {
+              pinmux = <MT8135_PIN_40_DAC_CLK__FUNC_GPIO40>,
+                <MT8135_PIN_41_DAC_WS__FUNC_GPIO41>;
+              bias-pull-up = <MTK_PUPD_SET_R1R0_01>;
+            };
+
+            pins2 {
+              pinmux = <MT8135_PIN_35_SCL3__FUNC_SCL3>,
+                <MT8135_PIN_36_SDA3__FUNC_SDA3>;
+              output-low;
+              bias-pull-up = <MTK_PUPD_SET_R1R0_01>;
+            };
+
+            pins3 {
+              pinmux = <MT8135_PIN_57_JTCK__FUNC_GPIO57>,
+                <MT8135_PIN_60_JTDI__FUNC_JTDI>;
+              drive-strength = <32>;
+            };
+          };
+        };
+    };
diff --git a/Documentation/devicetree/bindings/pinctrl/mediatek,mt6797-pinctrl.yaml b/Documentation/devicetree/bindings/pinctrl/mediatek,mt6797-pinctrl.yaml
new file mode 100644
index 000000000000..76a6df75ed9c
--- /dev/null
+++ b/Documentation/devicetree/bindings/pinctrl/mediatek,mt6797-pinctrl.yaml
@@ -0,0 +1,173 @@
+# SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause)
+%YAML 1.2
+---
+$id: http://devicetree.org/schemas/pinctrl/mediatek,mt6797-pinctrl.yaml#
+$schema: http://devicetree.org/meta-schemas/core.yaml#
+
+title: Mediatek MT6797 Pin Controller Device Tree Bindings
+
+maintainers:
+  - Sean Wang <sean.wang@kernel.org>
+
+description: |+
+  The MediaTek's MT6797 Pin controller is used to control SoC pins.
+
+properties:
+  compatible:
+    const: mediatek,mt6797-pinctrl
+
+  reg:
+    minItems: 5
+    maxItems: 5
+
+  reg-names:
+    items:
+      - const: gpio
+      - const: iocfgl
+      - const: iocfgb
+      - const: iocfgr
+      - const: iocfgt
+
+  gpio-controller: true
+
+  "#gpio-cells":
+    const: 2
+    description: |
+      Number of cells in GPIO specifier. Since the generic GPIO
+      binding is used, the amount of cells must be specified as 2. See the below
+      mentioned gpio binding representation for description of particular cells.
+
+  interrupt-controller: true
+
+  interrupts:
+    maxItems: 1
+
+  "#interrupt-cells":
+    const: 2
+
+required:
+  - compatible
+  - reg
+  - reg-names
+  - gpio-controller
+  - "#gpio-cells"
+
+patternProperties:
+  '-[0-9]+$':
+    type: object
+    additionalProperties: false
+    patternProperties:
+      'pins':
+        type: object
+        additionalProperties: false
+        description: |
+          A pinctrl node should contain at least one subnodes representing the
+          pinctrl groups available on the machine. Each subnode will list the
+          pins it needs, and how they should be configured, with regard to muxer
+          configuration, pullups, drive strength, input enable/disable and input
+          schmitt.
+        $ref: "/schemas/pinctrl/pincfg-node.yaml"
+
+        properties:
+          pinmux:
+            description:
+              integer array, represents gpio pin number and mux setting.
+              Supported pin number and mux varies for different SoCs, and are
+              defined as macros in <soc>-pinfunc.h directly.
+
+          bias-disable: true
+
+          bias-pull-up: true
+
+          bias-pull-down: true
+
+          input-enable: true
+
+          input-disable: true
+
+          output-enable: true
+
+          output-low: true
+
+          output-high: true
+
+          input-schmitt-enable: true
+
+          input-schmitt-disable: true
+
+          drive-strength:
+            enum: [2, 4, 8, 12, 16]
+
+          slew-rate:
+            enum: [0, 1]
+
+          mediatek,pull-up-adv:
+            description: |
+              Pull up setings for 2 pull resistors, R0 and R1. User can
+              configure those special pins. Valid arguments are described as below:
+              0: (R1, R0) = (0, 0) which means R1 disabled and R0 disabled.
+              1: (R1, R0) = (0, 1) which means R1 disabled and R0 enabled.
+              2: (R1, R0) = (1, 0) which means R1 enabled and R0 disabled.
+              3: (R1, R0) = (1, 1) which means R1 enabled and R0 enabled.
+            $ref: /schemas/types.yaml#/definitions/uint32
+            enum: [0, 1, 2, 3]
+
+          mediatek,pull-down-adv:
+            description: |
+              Pull down settings for 2 pull resistors, R0 and R1. User can
+              configure those special pins. Valid arguments are described as below:
+              0: (R1, R0) = (0, 0) which means R1 disabled and R0 disabled.
+              1: (R1, R0) = (0, 1) which means R1 disabled and R0 enabled.
+              2: (R1, R0) = (1, 0) which means R1 enabled and R0 disabled.
+              3: (R1, R0) = (1, 1) which means R1 enabled and R0 enabled.
+            $ref: /schemas/types.yaml#/definitions/uint32
+            enum: [0, 1, 2, 3]
+
+          mediatek,tdsel:
+            description: |
+              An integer describing the steps for output level shifter duty
+              cycle when asserted (high pulse width adjustment). Valid arguments
+              are from 0 to 15.
+            $ref: /schemas/types.yaml#/definitions/uint32
+
+          mediatek,rdsel:
+            description: |
+              An integer describing the steps for input level shifter duty cycle
+              when asserted (high pulse width adjustment). Valid arguments are
+              from 0 to 63.
+            $ref: /schemas/types.yaml#/definitions/uint32
+
+        required:
+          - pinmux
+
+additionalProperties: false
+
+examples:
+  - |
+    #include <dt-bindings/interrupt-controller/irq.h>
+    #include <dt-bindings/interrupt-controller/arm-gic.h>
+    #include <dt-bindings/pinctrl/mt6797-pinfunc.h>
+
+    soc {
+        #address-cells = <2>;
+        #size-cells = <2>;
+
+        pio: pinctrl@10005000 {
+            compatible = "mediatek,mt6797-pinctrl";
+            reg = <0 0x10005000 0 0x1000>,
+                  <0 0x10002000 0 0x400>,
+                  <0 0x10002400 0 0x400>,
+                  <0 0x10002800 0 0x400>,
+                  <0 0x10002C00 0 0x400>;
+            reg-names = "gpio", "iocfgl", "iocfgb", "iocfgr", "iocfgt";
+            gpio-controller;
+            #gpio-cells = <2>;
+
+            uart_pins_a: uart-0 {
+                pins1 {
+                    pinmux = <MT6797_GPIO232__FUNC_URXD1>,
+                            <MT6797_GPIO233__FUNC_UTXD1>;
+                };
+            };
+        };
+    };
diff --git a/Documentation/devicetree/bindings/pinctrl/mediatek,mt7622-pinctrl.yaml b/Documentation/devicetree/bindings/pinctrl/mediatek,mt7622-pinctrl.yaml
new file mode 100644
index 000000000000..0feecd376c69
--- /dev/null
+++ b/Documentation/devicetree/bindings/pinctrl/mediatek,mt7622-pinctrl.yaml
@@ -0,0 +1,373 @@
+# SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause)
+%YAML 1.2
+---
+$id: http://devicetree.org/schemas/pinctrl/mediatek,mt7622-pinctrl.yaml#
+$schema: http://devicetree.org/meta-schemas/core.yaml#
+
+title: Mediatek MT7622 Pin Controller Device Tree Bindings
+
+maintainers:
+  - Sean Wang <sean.wang@kernel.org>
+
+description: |+
+  The MediaTek's MT7622 Pin controller is used to control SoC pins.
+
+properties:
+  compatible:
+    enum:
+      - mediatek,mt7622-pinctrl
+      - mediatek,mt7629-pinctrl
+
+  reg:
+    maxItems: 1
+
+  reg-names:
+    items:
+      - const: eint
+
+  gpio-controller: true
+
+  "#gpio-cells":
+    const: 2
+    description: |
+      Number of cells in GPIO specifier. Since the generic GPIO
+      binding is used, the amount of cells must be specified as 2. See the below
+      mentioned gpio binding representation for description of particular cells.
+
+  interrupt-controller: true
+
+  interrupts:
+    maxItems: 1
+
+  "#interrupt-cells":
+    const: 2
+
+required:
+  - compatible
+  - reg
+  - gpio-controller
+  - "#gpio-cells"
+
+if:
+  required:
+    - interrupt-controller
+then:
+  required:
+    - reg-names
+    - interrupts
+    - "#interrupt-cells"
+
+patternProperties:
+  '-[0-9]+$':
+    type: object
+    additionalProperties: false
+    patternProperties:
+      'mux':
+        type: object
+        additionalProperties: false
+        description: |
+          pinmux configuration nodes.
+        $ref: "/schemas/pinctrl/pinmux-node.yaml"
+        properties:
+          function:
+            description: |
+              A string containing the name of the function to mux to the group.
+            enum: [emmc, eth, i2c, i2s, ir, led, flash, pcie, pmic, pwm, sd,
+                   spi, tdm, uart, watchdog, wifi]
+
+          groups:
+            description: |
+              An array of strings. Each string contains the name of a group.
+
+          drive-strength:
+            enum: [4, 8, 12, 16]
+
+        required:
+          - groups
+          - function
+
+        allOf:
+          - if:
+              properties:
+                function:
+                  const: emmc
+            then:
+              properties:
+                groups:
+                  enum: [emmc, emmc_rst]
+          - if:
+              properties:
+                function:
+                  const: eth
+            then:
+              properties:
+                groups:
+                  enum: [esw, esw_p0_p1, esw_p2_p3_p4, rgmii_via_esw,
+                         rgmii_via_gmac1, rgmii_via_gmac2, mdc_mdio]
+          - if:
+              properties:
+                function:
+                  const: i2c
+            then:
+              properties:
+                groups:
+                  enum: [i2c0, i2c_0, i2c_1, i2c1_0, i2c1_1, i2c1_2, i2c2_0,
+                         i2c2_1, i2c2_2]
+          - if:
+              properties:
+                function:
+                  const: i2s
+            then:
+              properties:
+                groups:
+                  enum: [i2s_in_mclk_bclk_ws, i2s1_in_data, i2s2_in_data,
+                         i2s3_in_data, i2s4_in_data, i2s_out_mclk_bclk_ws,
+                         i2s1_out_data, i2s2_out_data, i2s3_out_data,
+                         i2s4_out_data]
+          - if:
+              properties:
+                function:
+                  const: ir
+            then:
+              properties:
+                groups:
+                  enum: [ir_0_tx, ir_1_tx, ir_2_tx, ir_0_rx, ir_1_rx, ir_2_rx]
+          - if:
+              properties:
+                function:
+                  const: led
+            then:
+              properties:
+                groups:
+                  enum: [ephy_leds, ephy0_led, ephy1_led, ephy2_led, ephy3_led,
+                         ephy4_led, wled, wf2g_led, wf5g_led]
+          - if:
+              properties:
+                function:
+                  const: flash
+            then:
+              properties:
+                groups:
+                  enum: [par_nand, snfi, spi_nor]
+          - if:
+              properties:
+                function:
+                  const: pcie
+            then:
+              properties:
+                groups:
+                  enum: [pcie0_0_waken, pcie0_1_waken, pcie1_0_waken,
+                         pcie0_0_clkreq, pcie0_1_clkreq, pcie1_0_clkreq,
+                         pcie0_pad_perst, pcie1_pad_perst, pcie_pereset,
+                         pcie_wake, pcie_clkreq]
+          - if:
+              properties:
+                function:
+                  const: pmic
+            then:
+              properties:
+                groups:
+                  enum: [pmic_bus]
+          - if:
+              properties:
+                function:
+                  const: pwm
+            then:
+              properties:
+                groups:
+                  enum: [pwm_ch1_0, pwm_ch1_1, pwm_ch1_2, pwm_ch2_0, pwm_ch2_1,
+                         pwm_ch2_2, pwm_ch3_0, pwm_ch3_1, pwm_ch3_2, pwm_ch4_0,
+                         pwm_ch4_1, pwm_ch4_2, pwm_ch4_3, pwm_ch5_0, pwm_ch5_1,
+                         pwm_ch5_2, pwm_ch6_0, pwm_ch6_1, pwm_ch6_2, pwm_ch6_3,
+                         pwm_ch7_0, pwm_0, pwm_1]
+          - if:
+              properties:
+                function:
+                  const: sd
+            then:
+              properties:
+                groups:
+                  enum: [sd_0, sd_1]
+          - if:
+              properties:
+                function:
+                  const: spi
+            then:
+              properties:
+                groups:
+                  enum: [spic0_0, spic0_1, spic1_0, spic1_1, spic2_0_wp_hold,
+                         spic2_0, spi_0, spi_1, spi_wp, spi_hold]
+          - if:
+              properties:
+                function:
+                  const: tdm
+            then:
+              properties:
+                groups:
+                  enum: [tdm_0_out_mclk_bclk_ws, tdm_0_in_mclk_bclk_ws,
+                         tdm_0_out_data, tdm_0_in_data, tdm_1_out_mclk_bclk_ws,
+                         tdm_1_in_mclk_bclk_ws, tdm_1_out_data, tdm_1_in_data]
+          - if:
+              properties:
+                function:
+                  const: uart
+            then:
+              properties:
+                groups:
+                  enum: [uart0_0_tx_rx, uart1_0_tx_rx, uart1_0_rts_cts,
+                         uart1_1_tx_rx, uart1_1_rts_cts, uart2_0_tx_rx,
+                         uart2_0_rts_cts, uart2_1_tx_rx, uart2_1_rts_cts,
+                         uart2_2_tx_rx, uart2_2_rts_cts, uart2_3_tx_rx,
+                         uart3_0_tx_rx, uart3_1_tx_rx, uart3_1_rts_cts,
+                         uart4_0_tx_rx, uart4_1_tx_rx, uart4_1_rts_cts,
+                         uart4_2_tx_rx, uart4_2_rts_cts, uart0_txd_rxd,
+                         uart1_0_txd_rxd, uart1_0_cts_rts, uart1_1_txd_rxd,
+                         uart1_1_cts_rts, uart2_0_txd_rxd, uart2_0_cts_rts,
+                         uart2_1_txd_rxd, uart2_1_cts_rts]
+          - if:
+              properties:
+                function:
+                  const: watchdog
+            then:
+              properties:
+                groups:
+                  enum: [watchdog]
+          - if:
+              properties:
+                function:
+                  const: wifi
+            then:
+              properties:
+                groups:
+                  enum: [wf0_2g, wf0_5g]
+
+      'conf':
+        type: object
+        additionalProperties: false
+        description: |
+          pinconf configuration nodes.
+        $ref: "/schemas/pinctrl/pincfg-node.yaml"
+
+        properties:
+          groups:
+            description: |
+              An array of strings. Each string contains the name of a group.
+              Valid values are the same as the pinmux node.
+
+          pins:
+            description: |
+              An array of strings. Each string contains the name of a pin.
+            enum: [GPIO_A, I2S1_IN, I2S1_OUT, I2S_BCLK, I2S_WS, I2S_MCLK, TXD0,
+                   RXD0, SPI_WP, SPI_HOLD, SPI_CLK, SPI_MOSI, SPI_MISO, SPI_CS,
+                   I2C_SDA, I2C_SCL, I2S2_IN, I2S3_IN, I2S4_IN, I2S2_OUT,
+                   I2S3_OUT, I2S4_OUT, GPIO_B, MDC, MDIO, G2_TXD0, G2_TXD1,
+                   G2_TXD2, G2_TXD3, G2_TXEN, G2_TXC, G2_RXD0, G2_RXD1, G2_RXD2,
+                   G2_RXD3, G2_RXDV, G2_RXC, NCEB, NWEB, NREB, NDL4, NDL5, NDL6,
+                   NDL7, NRB, NCLE, NALE, NDL0, NDL1, NDL2, NDL3, MDI_TP_P0,
+                   MDI_TN_P0, MDI_RP_P0, MDI_RN_P0, MDI_TP_P1, MDI_TN_P1,
+                   MDI_RP_P1, MDI_RN_P1, MDI_RP_P2, MDI_RN_P2, MDI_TP_P2,
+                   MDI_TN_P2, MDI_TP_P3, MDI_TN_P3, MDI_RP_P3, MDI_RN_P3,
+                   MDI_RP_P4, MDI_RN_P4, MDI_TP_P4, MDI_TN_P4, PMIC_SCL,
+                   PMIC_SDA, SPIC1_CLK, SPIC1_MOSI, SPIC1_MISO, SPIC1_CS,
+                   GPIO_D, WATCHDOG, RTS3_N, CTS3_N, TXD3, RXD3, PERST0_N,
+                   PERST1_N, WLED_N, EPHY_LED0_N, AUXIN0, AUXIN1, AUXIN2,
+                   AUXIN3, TXD4, RXD4, RTS4_N, CST4_N, PWM1, PWM2, PWM3, PWM4,
+                   PWM5, PWM6, PWM7, GPIO_E, TOP_5G_CLK, TOP_5G_DATA,
+                   WF0_5G_HB0, WF0_5G_HB1, WF0_5G_HB2, WF0_5G_HB3, WF0_5G_HB4,
+                   WF0_5G_HB5, WF0_5G_HB6, XO_REQ, TOP_RST_N, SYS_WATCHDOG,
+                   EPHY_LED0_N_JTDO, EPHY_LED1_N_JTDI, EPHY_LED2_N_JTMS,
+                   EPHY_LED3_N_JTCLK, EPHY_LED4_N_JTRST_N, WF2G_LED_N,
+                   WF5G_LED_N, GPIO_9, GPIO_10, GPIO_11, GPIO_12, UART1_TXD,
+                   UART1_RXD, UART1_CTS, UART1_RTS, UART2_TXD, UART2_RXD,
+                   UART2_CTS, UART2_RTS, SMI_MDC, SMI_MDIO, PCIE_PERESET_N,
+                   PWM_0, GPIO_0, GPIO_1, GPIO_2, GPIO_3, GPIO_4, GPIO_5,
+                   GPIO_6, GPIO_7, GPIO_8, UART0_TXD, UART0_RXD, TOP_2G_CLK,
+                   TOP_2G_DATA, WF0_2G_HB0, WF0_2G_HB1, WF0_2G_HB2, WF0_2G_HB3,
+                   WF0_2G_HB4, WF0_2G_HB5, WF0_2G_HB6]
+
+          bias-disable: true
+
+          bias-pull-up: true
+
+          bias-pull-down: true
+
+          input-enable: true
+
+          input-disable: true
+
+          output-enable: true
+
+          output-low: true
+
+          output-high: true
+
+          input-schmitt-enable: true
+
+          input-schmitt-disable: true
+
+          drive-strength:
+            enum: [4, 8, 12, 16]
+
+          slew-rate:
+            enum: [0, 1]
+
+          mediatek,tdsel:
+            description: |
+              An integer describing the steps for output level shifter duty
+              cycle when asserted (high pulse width adjustment). Valid arguments
+              are from 0 to 15.
+            $ref: /schemas/types.yaml#/definitions/uint32
+
+          mediatek,rdsel:
+            description: |
+              An integer describing the steps for input level shifter duty cycle
+              when asserted (high pulse width adjustment). Valid arguments are
+              from 0 to 63.
+            $ref: /schemas/types.yaml#/definitions/uint32
+
+        required:
+          - pins
+
+additionalProperties: false
+
+examples:
+  - |
+    #include <dt-bindings/interrupt-controller/irq.h>
+    #include <dt-bindings/interrupt-controller/arm-gic.h>
+
+    soc {
+        #address-cells = <2>;
+        #size-cells = <2>;
+
+        pio: pinctrl@10211000 {
+          compatible = "mediatek,mt7622-pinctrl";
+          reg = <0 0x10211000 0 0x1000>;
+          gpio-controller;
+          #gpio-cells = <2>;
+
+          pinctrl_eth_default: eth-0 {
+            mux-mdio {
+              groups = "mdc_mdio";
+              function = "eth";
+              drive-strength = <12>;
+            };
+
+            mux-gmac2 {
+              groups = "rgmii_via_gmac2";
+              function = "eth";
+              drive-strength = <12>;
+            };
+
+            mux-esw {
+              groups = "esw";
+              function = "eth";
+              drive-strength = <8>;
+            };
+
+            conf-mdio {
+              pins = "MDC";
+              bias-pull-up;
+            };
+          };
+        };
+    };
diff --git a/Documentation/devicetree/bindings/pinctrl/mediatek,mt8183-pinctrl.yaml b/Documentation/devicetree/bindings/pinctrl/mediatek,mt8183-pinctrl.yaml
new file mode 100644
index 000000000000..cc1509e9b981
--- /dev/null
+++ b/Documentation/devicetree/bindings/pinctrl/mediatek,mt8183-pinctrl.yaml
@@ -0,0 +1,228 @@
+# SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause)
+%YAML 1.2
+---
+$id: http://devicetree.org/schemas/pinctrl/mediatek,mt8183-pinctrl.yaml#
+$schema: http://devicetree.org/meta-schemas/core.yaml#
+
+title: Mediatek MT8183 Pin Controller Device Tree Bindings
+
+maintainers:
+  - Sean Wang <sean.wang@kernel.org>
+
+description: |+
+  The MediaTek's MT8183 Pin controller is used to control SoC pins.
+
+properties:
+  compatible:
+    const: mediatek,mt8183-pinctrl
+
+  reg:
+    minItems: 10
+    maxItems: 10
+
+  reg-names:
+    items:
+      - const: iocfg0
+      - const: iocfg1
+      - const: iocfg2
+      - const: iocfg3
+      - const: iocfg4
+      - const: iocfg5
+      - const: iocfg6
+      - const: iocfg7
+      - const: iocfg8
+      - const: eint
+
+  gpio-controller: true
+
+  "#gpio-cells":
+    const: 2
+    description: |
+      Number of cells in GPIO specifier. Since the generic GPIO
+      binding is used, the amount of cells must be specified as 2. See the below
+      mentioned gpio binding representation for description of particular cells.
+
+  gpio-ranges:
+    minItems: 1
+    maxItems: 5
+    description: |
+      GPIO valid number range.
+
+  interrupt-controller: true
+
+  interrupts:
+    maxItems: 1
+
+  "#interrupt-cells":
+    const: 2
+
+required:
+  - compatible
+  - reg
+  - gpio-controller
+  - "#gpio-cells"
+  - gpio-ranges
+
+patternProperties:
+  '-[0-9]+$':
+    type: object
+    additionalProperties: false
+    patternProperties:
+      'pins':
+        type: object
+        additionalProperties: false
+        description: |
+          A pinctrl node should contain at least one subnodes representing the
+          pinctrl groups available on the machine. Each subnode will list the
+          pins it needs, and how they should be configured, with regard to muxer
+          configuration, pullups, drive strength, input enable/disable and input
+          schmitt.
+        $ref: "/schemas/pinctrl/pincfg-node.yaml"
+
+        properties:
+          pinmux:
+            description:
+              integer array, represents gpio pin number and mux setting.
+              Supported pin number and mux varies for different SoCs, and are
+              defined as macros in <soc>-pinfunc.h directly.
+
+          bias-disable: true
+
+          bias-pull-up: true
+
+          bias-pull-down: true
+
+          input-enable: true
+
+          input-disable: true
+
+          output-low: true
+
+          output-high: true
+
+          input-schmitt-enable: true
+
+          input-schmitt-disable: true
+
+          drive-strength:
+            enum: [2, 4, 6, 8, 10, 12, 14, 16]
+
+          mediatek,drive-strength-adv:
+            description: |
+              Describe the specific driving setup property.
+              For I2C pins, the existing generic driving setup can only support
+              2/4/6/8/10/12/14/16mA driving. But in specific driving setup, they
+              can support 0.125/0.25/0.5/1mA adjustment. If we enable specific
+              driving setup, the existing generic setup will be disabled.
+              The specific driving setup is controlled by E1E0EN.
+              When E1=0/E0=0, the strength is 0.125mA.
+              When E1=0/E0=1, the strength is 0.25mA.
+              When E1=1/E0=0, the strength is 0.5mA.
+              When E1=1/E0=1, the strength is 1mA.
+              EN is used to enable or disable the specific driving setup.
+              Valid arguments are described as below:
+              0: (E1, E0, EN) = (0, 0, 0)
+              1: (E1, E0, EN) = (0, 0, 1)
+              2: (E1, E0, EN) = (0, 1, 0)
+              3: (E1, E0, EN) = (0, 1, 1)
+              4: (E1, E0, EN) = (1, 0, 0)
+              5: (E1, E0, EN) = (1, 0, 1)
+              6: (E1, E0, EN) = (1, 1, 0)
+              7: (E1, E0, EN) = (1, 1, 1)
+              So the valid arguments are from 0 to 7.
+            $ref: /schemas/types.yaml#/definitions/uint32
+            enum: [0, 1, 2, 3, 4, 5, 6, 7]
+
+          mediatek,pull-up-adv:
+            description: |
+              Pull up setings for 2 pull resistors, R0 and R1. User can
+              configure those special pins. Valid arguments are described as below:
+              0: (R1, R0) = (0, 0) which means R1 disabled and R0 disabled.
+              1: (R1, R0) = (0, 1) which means R1 disabled and R0 enabled.
+              2: (R1, R0) = (1, 0) which means R1 enabled and R0 disabled.
+              3: (R1, R0) = (1, 1) which means R1 enabled and R0 enabled.
+            $ref: /schemas/types.yaml#/definitions/uint32
+            enum: [0, 1, 2, 3]
+
+          mediatek,pull-down-adv:
+            description: |
+              Pull down settings for 2 pull resistors, R0 and R1. User can
+              configure those special pins. Valid arguments are described as below:
+              0: (R1, R0) = (0, 0) which means R1 disabled and R0 disabled.
+              1: (R1, R0) = (0, 1) which means R1 disabled and R0 enabled.
+              2: (R1, R0) = (1, 0) which means R1 enabled and R0 disabled.
+              3: (R1, R0) = (1, 1) which means R1 enabled and R0 enabled.
+            $ref: /schemas/types.yaml#/definitions/uint32
+            enum: [0, 1, 2, 3]
+
+          mediatek,tdsel:
+            description: |
+              An integer describing the steps for output level shifter duty
+              cycle when asserted (high pulse width adjustment). Valid arguments
+              are from 0 to 15.
+            $ref: /schemas/types.yaml#/definitions/uint32
+
+          mediatek,rdsel:
+            description: |
+              An integer describing the steps for input level shifter duty cycle
+              when asserted (high pulse width adjustment). Valid arguments are
+              from 0 to 63.
+            $ref: /schemas/types.yaml#/definitions/uint32
+
+        required:
+          - pinmux
+
+additionalProperties: false
+
+examples:
+  - |
+    #include <dt-bindings/interrupt-controller/irq.h>
+    #include <dt-bindings/interrupt-controller/arm-gic.h>
+    #include <dt-bindings/pinctrl/mt8183-pinfunc.h>
+
+    soc {
+        #address-cells = <2>;
+        #size-cells = <2>;
+
+        pio: pinctrl@10005000 {
+          compatible = "mediatek,mt8183-pinctrl";
+          reg = <0 0x10005000 0 0x1000>,
+                <0 0x11f20000 0 0x1000>,
+                <0 0x11e80000 0 0x1000>,
+                <0 0x11e70000 0 0x1000>,
+                <0 0x11e90000 0 0x1000>,
+                <0 0x11d30000 0 0x1000>,
+                <0 0x11d20000 0 0x1000>,
+                <0 0x11c50000 0 0x1000>,
+                <0 0x11f30000 0 0x1000>,
+                <0 0x1000b000 0 0x1000>;
+          reg-names = "iocfg0", "iocfg1", "iocfg2",
+                "iocfg3", "iocfg4", "iocfg5",
+                "iocfg6", "iocfg7", "iocfg8",
+                "eint";
+          gpio-controller;
+          #gpio-cells = <2>;
+          gpio-ranges = <&pio 0 0 192>;
+          interrupt-controller;
+          interrupts = <GIC_SPI 177 IRQ_TYPE_LEVEL_HIGH>;
+          #interrupt-cells = <2>;
+
+          i2c0_pins_a: i2c-0 {
+            pins1 {
+              pinmux = <PINMUX_GPIO48__FUNC_SCL5>,
+                <PINMUX_GPIO49__FUNC_SDA5>;
+              mediatek,pull-up-adv = <3>;
+              mediatek,drive-strength-adv = <7>;
+            };
+          };
+
+          i2c1_pins_a: i2c-1 {
+            pins {
+              pinmux = <PINMUX_GPIO50__FUNC_SCL3>,
+                <PINMUX_GPIO51__FUNC_SDA3>;
+              mediatek,pull-down-adv = <2>;
+              mediatek,drive-strength-adv = <4>;
+            };
+          };
+        };
+    };
diff --git a/Documentation/devicetree/bindings/pinctrl/pinctrl-mt65xx.txt b/Documentation/devicetree/bindings/pinctrl/pinctrl-mt65xx.txt
deleted file mode 100644
index 5fe2c26c28bf..000000000000
--- a/Documentation/devicetree/bindings/pinctrl/pinctrl-mt65xx.txt
+++ /dev/null
@@ -1,156 +0,0 @@
-* Mediatek MT65XX Pin Controller
-
-The Mediatek's Pin controller is used to control SoC pins.
-
-Required properties:
-- compatible: value should be one of the following.
-	"mediatek,mt2701-pinctrl", compatible with mt2701 pinctrl.
-	"mediatek,mt2712-pinctrl", compatible with mt2712 pinctrl.
-	"mediatek,mt6397-pinctrl", compatible with mt6397 pinctrl.
-	"mediatek,mt7623-pinctrl", compatible with mt7623 pinctrl.
-	"mediatek,mt8127-pinctrl", compatible with mt8127 pinctrl.
-	"mediatek,mt8135-pinctrl", compatible with mt8135 pinctrl.
-	"mediatek,mt8167-pinctrl", compatible with mt8167 pinctrl.
-	"mediatek,mt8173-pinctrl", compatible with mt8173 pinctrl.
-	"mediatek,mt8365-pinctrl", compatible with mt8365 pinctrl.
-	"mediatek,mt8516-pinctrl", compatible with mt8516 pinctrl.
-- pins-are-numbered: Specify the subnodes are using numbered pinmux to
-  specify pins.
-- gpio-controller : Marks the device node as a gpio controller.
-- #gpio-cells: number of cells in GPIO specifier. Since the generic GPIO
-  binding is used, the amount of cells must be specified as 2. See the below
-  mentioned gpio binding representation for description of particular cells.
-
-	Eg: <&pio 6 0>
-	<[phandle of the gpio controller node]
-	[line number within the gpio controller]
-	[flags]>
-
-	Values for gpio specifier:
-	- Line number: is a value between 0 to 202.
-	- Flags:  bit field of flags, as defined in <dt-bindings/gpio/gpio.h>.
-            Only the following flags are supported:
-            0 - GPIO_ACTIVE_HIGH
-            1 - GPIO_ACTIVE_LOW
-
-Optional properties:
-- mediatek,pctl-regmap: Should be a phandle of the syscfg node.
-- reg: physicall address base for EINT registers
-- interrupt-controller: Marks the device node as an interrupt controller
-- #interrupt-cells: Should be two.
-- interrupts : The interrupt outputs from the controller.
-
-Please refer to pinctrl-bindings.txt in this directory for details of the
-common pinctrl bindings used by client devices.
-
-Subnode format
-A pinctrl node should contain at least one subnodes representing the
-pinctrl groups available on the machine. Each subnode will list the
-pins it needs, and how they should be configured, with regard to muxer
-configuration, pullups, drive strength, input enable/disable and input schmitt.
-
-    node {
-	pinmux = <PIN_NUMBER_PINMUX>;
-	GENERIC_PINCONFIG;
-    };
-
-Required properties:
-- pinmux: integer array, represents gpio pin number and mux setting.
-    Supported pin number and mux varies for different SoCs, and are defined
-    as macros in boot/dts/<soc>-pinfunc.h directly.
-
-Optional properties:
-- GENERIC_PINCONFIG: is the generic pinconfig options to use, bias-disable,
-    bias-pull-down, bias-pull-up, input-enable, input-disable, output-low, output-high,
-    input-schmitt-enable, input-schmitt-disable and drive-strength are valid.
-
-    Some special pins have extra pull up strength, there are R0 and R1 pull-up
-    resistors available, but for user, it's only need to set R1R0 as 00, 01, 10 or 11.
-    So when config bias-pull-up, it support arguments for those special pins.
-    Some macros have been defined for this usage, such as MTK_PUPD_SET_R1R0_00.
-    See dt-bindings/pinctrl/mt65xx.h.
-
-    When config drive-strength, it can support some arguments, such as
-    MTK_DRIVE_4mA, MTK_DRIVE_6mA, etc. See dt-bindings/pinctrl/mt65xx.h.
-
-Examples:
-
-#include "mt8135-pinfunc.h"
-
-...
-{
-	syscfg_pctl_a: syscfg-pctl-a@10005000 {
-		compatible = "mediatek,mt8135-pctl-a-syscfg", "syscon";
-		reg = <0 0x10005000 0 0x1000>;
-	};
-
-	syscfg_pctl_b: syscfg-pctl-b@1020c020 {
-		compatible = "mediatek,mt8135-pctl-b-syscfg", "syscon";
-		reg = <0 0x1020C020 0 0x1000>;
-	};
-
-	pinctrl@1c20800 {
-		compatible = "mediatek,mt8135-pinctrl";
-		reg = <0 0x1000B000 0 0x1000>;
-		mediatek,pctl-regmap = <&syscfg_pctl_a>, <&syscfg_pctl_b>;
-		pins-are-numbered;
-		gpio-controller;
-		#gpio-cells = <2>;
-		interrupt-controller;
-		#interrupt-cells = <2>;
-		interrupts = <GIC_SPI 116 IRQ_TYPE_LEVEL_HIGH>,
-				<GIC_SPI 117 IRQ_TYPE_LEVEL_HIGH>,
-				<GIC_SPI 118 IRQ_TYPE_LEVEL_HIGH>;
-
-		i2c0_pins_a: i2c0@0 {
-			pins1 {
-				pinmux = <MT8135_PIN_100_SDA0__FUNC_SDA0>,
-					 <MT8135_PIN_101_SCL0__FUNC_SCL0>;
-				bias-disable;
-			};
-		};
-
-		i2c1_pins_a: i2c1@0 {
-			pins {
-				pinmux = <MT8135_PIN_195_SDA1__FUNC_SDA1>,
-					 <MT8135_PIN_196_SCL1__FUNC_SCL1>;
-				bias-pull-up = <55>;
-			};
-		};
-
-		i2c2_pins_a: i2c2@0 {
-			pins1 {
-				pinmux = <MT8135_PIN_193_SDA2__FUNC_SDA2>;
-				bias-pull-down;
-			};
-
-			pins2 {
-				pinmux = <MT8135_PIN_49_WATCHDOG__FUNC_GPIO49>;
-				bias-pull-up;
-			};
-		};
-
-		i2c3_pins_a: i2c3@0 {
-			pins1 {
-				pinmux = <MT8135_PIN_40_DAC_CLK__FUNC_GPIO40>,
-					 <MT8135_PIN_41_DAC_WS__FUNC_GPIO41>;
-				bias-pull-up = <55>;
-			};
-
-			pins2 {
-				pinmux = <MT8135_PIN_35_SCL3__FUNC_SCL3>,
-					 <MT8135_PIN_36_SDA3__FUNC_SDA3>;
-				output-low;
-				bias-pull-up = <55>;
-			};
-
-			pins3 {
-				pinmux = <MT8135_PIN_57_JTCK__FUNC_GPIO57>,
-					 <MT8135_PIN_60_JTDI__FUNC_JTDI>;
-				drive-strength = <32>;
-			};
-		};
-
-		...
-	}
-};
diff --git a/Documentation/devicetree/bindings/pinctrl/pinctrl-mt6797.txt b/Documentation/devicetree/bindings/pinctrl/pinctrl-mt6797.txt
deleted file mode 100644
index bd83401e6179..000000000000
--- a/Documentation/devicetree/bindings/pinctrl/pinctrl-mt6797.txt
+++ /dev/null
@@ -1,83 +0,0 @@
-* MediaTek MT6797 Pin Controller
-
-The MediaTek's MT6797 Pin controller is used to control SoC pins.
-
-Required properties:
-- compatible: Value should be one of the following.
-              "mediatek,mt6797-pinctrl", compatible with mt6797 pinctrl.
-- reg:        Should contain address and size for gpio, iocfgl, iocfgb,
-              iocfgr and iocfgt register bases.
-- reg-names:  An array of strings describing the "reg" entries. Must
-              contain "gpio", "iocfgl", "iocfgb", "iocfgr", "iocfgt".
-- gpio-controller: Marks the device node as a gpio controller.
-- #gpio-cells: Should be two. The first cell is the gpio pin number
-               and the second cell is used for optional parameters.
-
-Optional properties:
-- interrupt-controller: Marks the device node as an interrupt controller.
-- #interrupt-cells: Should be two.
-- interrupts : The interrupt outputs from the controller.
-
-Please refer to pinctrl-bindings.txt in this directory for details of the
-common pinctrl bindings used by client devices.
-
-Subnode format
-A pinctrl node should contain at least one subnodes representing the
-pinctrl groups available on the machine. Each subnode will list the
-pins it needs, and how they should be configured, with regard to muxer
-configuration, pullups, drive strength, input enable/disable and input schmitt.
-
-    node {
-        pinmux = <PIN_NUMBER_PINMUX>;
-        GENERIC_PINCONFIG;
-    };
-
-Required properties:
-- pinmux: Integer array, represents gpio pin number and mux setting.
-    Supported pin number and mux varies for different SoCs, and are defined
-    as macros in dt-bindings/pinctrl/<soc>-pinfunc.h directly.
-
-Optional properties:
-- GENERIC_PINCONFIG: is the generic pinconfig options to use, bias-disable,
-    bias-pull, bias-pull-down, input-enable, input-schmitt-enable,
-    input-schmitt-disable, output-enable output-low, output-high,
-    drive-strength, and slew-rate are valid.
-
-    Valid arguments for 'slew-rate' are '0' for no slew rate controlled and
-    '1' for slower slew rate respectively. Valid arguments for 'drive-strength'
-    is limited, such as 2, 4, 8, 12, or 16 in mA.
-
-    Some optional vendor properties as defined are valid to specify in a
-    pinconf subnode:
-    - mediatek,tdsel: An integer describing the steps for output level shifter
-      duty cycle when asserted (high pulse width adjustment). Valid arguments
-      are from 0 to 15.
-    - mediatek,rdsel: An integer describing the steps for input level shifter
-      duty cycle when asserted (high pulse width adjustment). Valid arguments
-      are from 0 to 63.
-    - mediatek,pull-up-adv: An integer describing the code R1R0 as 0, 1, 2
-      or 3 for the advanced pull-up resistors.
-    - mediatek,pull-down-adv: An integer describing the code R1R0 as 0, 1, 2,
-      or 3 for the advanced pull-down resistors.
-
-Examples:
-
-        pio: pinctrl@10005000 {
-                compatible = "mediatek,mt6797-pinctrl";
-                reg = <0 0x10005000 0 0x1000>,
-                      <0 0x10002000 0 0x400>,
-                      <0 0x10002400 0 0x400>,
-                      <0 0x10002800 0 0x400>,
-                      <0 0x10002C00 0 0x400>;
-                reg-names = "gpio", "iocfgl", "iocfgb",
-                            "iocfgr", "iocfgt";
-                gpio-controller;
-                #gpio-cells = <2>;
-
-                uart1_pins_a: uart1 {
-                        pins1 {
-                                pinmux = <MT6797_GPIO232__FUNC_URXD1>,
-                                         <MT6797_GPIO233__FUNC_UTXD1>;
-                        };
-                };
-        };
diff --git a/Documentation/devicetree/bindings/pinctrl/pinctrl-mt7622.txt b/Documentation/devicetree/bindings/pinctrl/pinctrl-mt7622.txt
deleted file mode 100644
index 7a7aca1ed705..000000000000
--- a/Documentation/devicetree/bindings/pinctrl/pinctrl-mt7622.txt
+++ /dev/null
@@ -1,490 +0,0 @@
-== MediaTek MT7622 pinctrl controller ==
-
-Required properties for the root node:
- - compatible: Should be one of the following
-	       "mediatek,mt7622-pinctrl" for MT7622 SoC
-	       "mediatek,mt7629-pinctrl" for MT7629 SoC
- - reg: offset and length of the pinctrl space
-
- - gpio-controller: Marks the device node as a GPIO controller.
- - #gpio-cells: Should be two. The first cell is the pin number and the
-   second is the GPIO flags.
-
-Optional properties:
-- interrupt-controller  : Marks the device node as an interrupt controller
-
-If the property interrupt-controller is defined, following property is required
-- reg-names: A string describing the "reg" entries. Must contain "eint".
-- interrupts : The interrupt output from the controller.
-- #interrupt-cells: Should be two.
-
-Please refer to pinctrl-bindings.txt in this directory for details of the
-common pinctrl bindings used by client devices, including the meaning of the
-phrase "pin configuration node".
-
-MT7622 pin configuration nodes act as a container for an arbitrary number of
-subnodes. Each of these subnodes represents some desired configuration for a
-pin, a group, or a list of pins or groups. This configuration can include the
-mux function to select on those pin(s)/group(s), and various pin configuration
-parameters, such as pull-up, slew rate, etc.
-
-We support 2 types of configuration nodes. Those nodes can be either pinmux
-nodes or pinconf nodes. Each configuration node can consist of multiple nodes
-describing the pinmux and pinconf options.
-
-The name of each subnode doesn't matter as long as it is unique; all subnodes
-should be enumerated and processed purely based on their content.
-
-== pinmux nodes content ==
-
-The following generic properties as defined in pinctrl-bindings.txt are valid
-to specify in a pinmux subnode:
-
-Required properties are:
- - groups: An array of strings. Each string contains the name of a group.
-  Valid values for these names are listed below.
- - function: A string containing the name of the function to mux to the
-  group. Valid values for function names are listed below.
-
-== pinconf nodes content ==
-
-The following generic properties as defined in pinctrl-bindings.txt are valid
-to specify in a pinconf subnode:
-
-Required properties are:
- - pins: An array of strings. Each string contains the name of a pin.
-  Valid values for these names are listed below.
- - groups: An array of strings. Each string contains the name of a group.
-  Valid values for these names are listed below.
-
-Optional properies are:
- bias-disable, bias-pull, bias-pull-down, input-enable,
- input-schmitt-enable, input-schmitt-disable, output-enable
- output-low, output-high, drive-strength, slew-rate
-
- Valid arguments for 'slew-rate' are '0' for no slew rate controlled and '1' for
- slower slew rate respectively.
- Valid arguments for 'drive-strength', 4, 8, 12, or 16 in mA.
-
-The following specific properties as defined are valid to specify in a pinconf
-subnode:
-
-Optional properties are:
- - mediatek,tdsel: An integer describing the steps for output level shifter duty
-   cycle when asserted (high pulse width adjustment). Valid arguments are from 0
-   to 15.
- - mediatek,rdsel: An integer describing the steps for input level shifter duty
-   cycle when asserted (high pulse width adjustment). Valid arguments are from 0
-   to 63.
-
-== Valid values for pins, function and groups on MT7622 ==
-
-Valid values for pins are:
-pins can be referenced via the pin names as the below table shown and the
-related physical number is also put ahead of those names which helps cross
-references to pins between groups to know whether pins assignment conflict
-happens among devices try to acquire those available pins.
-
-	Pin #:  Valid values for pins
-	-----------------------------
-	PIN 0: "GPIO_A"
-	PIN 1: "I2S1_IN"
-	PIN 2: "I2S1_OUT"
-	PIN 3: "I2S_BCLK"
-	PIN 4: "I2S_WS"
-	PIN 5: "I2S_MCLK"
-	PIN 6: "TXD0"
-	PIN 7: "RXD0"
-	PIN 8: "SPI_WP"
-	PIN 9: "SPI_HOLD"
-	PIN 10: "SPI_CLK"
-	PIN 11: "SPI_MOSI"
-	PIN 12: "SPI_MISO"
-	PIN 13: "SPI_CS"
-	PIN 14: "I2C_SDA"
-	PIN 15: "I2C_SCL"
-	PIN 16: "I2S2_IN"
-	PIN 17: "I2S3_IN"
-	PIN 18: "I2S4_IN"
-	PIN 19: "I2S2_OUT"
-	PIN 20: "I2S3_OUT"
-	PIN 21: "I2S4_OUT"
-	PIN 22: "GPIO_B"
-	PIN 23: "MDC"
-	PIN 24: "MDIO"
-	PIN 25: "G2_TXD0"
-	PIN 26: "G2_TXD1"
-	PIN 27: "G2_TXD2"
-	PIN 28: "G2_TXD3"
-	PIN 29: "G2_TXEN"
-	PIN 30: "G2_TXC"
-	PIN 31: "G2_RXD0"
-	PIN 32: "G2_RXD1"
-	PIN 33: "G2_RXD2"
-	PIN 34: "G2_RXD3"
-	PIN 35: "G2_RXDV"
-	PIN 36: "G2_RXC"
-	PIN 37: "NCEB"
-	PIN 38: "NWEB"
-	PIN 39: "NREB"
-	PIN 40: "NDL4"
-	PIN 41: "NDL5"
-	PIN 42: "NDL6"
-	PIN 43: "NDL7"
-	PIN 44: "NRB"
-	PIN 45: "NCLE"
-	PIN 46: "NALE"
-	PIN 47: "NDL0"
-	PIN 48: "NDL1"
-	PIN 49: "NDL2"
-	PIN 50: "NDL3"
-	PIN 51: "MDI_TP_P0"
-	PIN 52: "MDI_TN_P0"
-	PIN 53: "MDI_RP_P0"
-	PIN 54: "MDI_RN_P0"
-	PIN 55: "MDI_TP_P1"
-	PIN 56: "MDI_TN_P1"
-	PIN 57: "MDI_RP_P1"
-	PIN 58: "MDI_RN_P1"
-	PIN 59: "MDI_RP_P2"
-	PIN 60: "MDI_RN_P2"
-	PIN 61: "MDI_TP_P2"
-	PIN 62: "MDI_TN_P2"
-	PIN 63: "MDI_TP_P3"
-	PIN 64: "MDI_TN_P3"
-	PIN 65: "MDI_RP_P3"
-	PIN 66: "MDI_RN_P3"
-	PIN 67: "MDI_RP_P4"
-	PIN 68: "MDI_RN_P4"
-	PIN 69: "MDI_TP_P4"
-	PIN 70: "MDI_TN_P4"
-	PIN 71: "PMIC_SCL"
-	PIN 72: "PMIC_SDA"
-	PIN 73: "SPIC1_CLK"
-	PIN 74: "SPIC1_MOSI"
-	PIN 75: "SPIC1_MISO"
-	PIN 76: "SPIC1_CS"
-	PIN 77: "GPIO_D"
-	PIN 78: "WATCHDOG"
-	PIN 79: "RTS3_N"
-	PIN 80: "CTS3_N"
-	PIN 81: "TXD3"
-	PIN 82: "RXD3"
-	PIN 83: "PERST0_N"
-	PIN 84: "PERST1_N"
-	PIN 85: "WLED_N"
-	PIN 86: "EPHY_LED0_N"
-	PIN 87: "AUXIN0"
-	PIN 88: "AUXIN1"
-	PIN 89: "AUXIN2"
-	PIN 90: "AUXIN3"
-	PIN 91: "TXD4"
-	PIN 92: "RXD4"
-	PIN 93: "RTS4_N"
-	PIN 94: "CST4_N"
-	PIN 95: "PWM1"
-	PIN 96: "PWM2"
-	PIN 97: "PWM3"
-	PIN 98: "PWM4"
-	PIN 99: "PWM5"
-	PIN 100: "PWM6"
-	PIN 101: "PWM7"
-	PIN 102: "GPIO_E"
-
-Valid values for function are:
-	"emmc", "eth", "i2c", "i2s", "ir", "led", "flash", "pcie",
-	"pmic", "pwm", "sd", "spi", "tdm", "uart", "watchdog"
-
-Valid values for groups are:
-additional data is put followingly with valid value allowing us to know which
-applicable function and which relevant pins (in pin#) are able applied for that
-group.
-
-	Valid value			function	pins (in pin#)
-	-------------------------------------------------------------------------
-	"emmc"				"emmc"		40, 41, 42, 43, 44, 45,
-							47, 48, 49, 50
-	"emmc_rst"			"emmc"		37
-	"esw"				"eth"		51, 52, 53, 54, 55, 56,
-							57, 58, 59, 60, 61, 62,
-							63, 64, 65, 66, 67, 68,
-							69, 70
-	"esw_p0_p1"			"eth"		51, 52, 53, 54, 55, 56,
-							57, 58
-	"esw_p2_p3_p4"			"eth"		59, 60, 61, 62, 63, 64,
-							65, 66, 67, 68, 69, 70
-	"rgmii_via_esw"			"eth"		59, 60, 61, 62, 63, 64,
-							65, 66, 67, 68, 69, 70
-	"rgmii_via_gmac1"		"eth"		59, 60, 61, 62, 63, 64,
-							65, 66, 67, 68, 69, 70
-	"rgmii_via_gmac2"		"eth"		25, 26, 27, 28, 29, 30,
-							31, 32, 33, 34, 35, 36
-	"mdc_mdio"			"eth"		23, 24
-	"i2c0"				"i2c"		14, 15
-	"i2c1_0"			"i2c"		55, 56
-	"i2c1_1"			"i2c"		73, 74
-	"i2c1_2"			"i2c"		87, 88
-	"i2c2_0"			"i2c"		57, 58
-	"i2c2_1"			"i2c"		75, 76
-	"i2c2_2"			"i2c"		89, 90
-	"i2s_in_mclk_bclk_ws"		"i2s"		3, 4, 5
-	"i2s1_in_data"			"i2s"		1
-	"i2s2_in_data"			"i2s"		16
-	"i2s3_in_data"			"i2s"		17
-	"i2s4_in_data"			"i2s"		18
-	"i2s_out_mclk_bclk_ws"		"i2s"		3, 4, 5
-	"i2s1_out_data"			"i2s"		2
-	"i2s2_out_data"			"i2s"		19
-	"i2s3_out_data"			"i2s"		20
-	"i2s4_out_data"			"i2s"		21
-	"ir_0_tx"			"ir"		16
-	"ir_1_tx"			"ir"		59
-	"ir_2_tx"			"ir"		99
-	"ir_0_rx"			"ir"		17
-	"ir_1_rx"			"ir"		60
-	"ir_2_rx"			"ir"		100
-	"ephy_leds"			"led"		86, 91, 92, 93, 94
-	"ephy0_led"			"led"		86
-	"ephy1_led"			"led"		91
-	"ephy2_led"			"led"		92
-	"ephy3_led"			"led"		93
-	"ephy4_led"			"led"		94
-	"wled"				"led"		85
-	"par_nand"			"flash"		37, 38, 39, 40, 41, 42,
-							43, 44, 45, 46, 47, 48,
-							49, 50
-	"snfi"				"flash"		8, 9, 10, 11, 12, 13
-	"spi_nor"			"flash"		8, 9, 10, 11, 12, 13
-	"pcie0_0_waken"			"pcie"		14
-	"pcie0_1_waken"			"pcie"		79
-	"pcie1_0_waken"			"pcie"		14
-	"pcie0_0_clkreq"		"pcie"		15
-	"pcie0_1_clkreq"		"pcie"		80
-	"pcie1_0_clkreq"		"pcie"		15
-	"pcie0_pad_perst"		"pcie"		83
-	"pcie1_pad_perst"		"pcie"		84
-	"pmic_bus"			"pmic"		71, 72
-	"pwm_ch1_0"			"pwm"		51
-	"pwm_ch1_1"			"pwm"		73
-	"pwm_ch1_2"			"pwm"		95
-	"pwm_ch2_0"			"pwm"		52
-	"pwm_ch2_1"			"pwm"		74
-	"pwm_ch2_2"			"pwm"		96
-	"pwm_ch3_0"			"pwm"		53
-	"pwm_ch3_1"			"pwm"		75
-	"pwm_ch3_2"			"pwm"		97
-	"pwm_ch4_0"			"pwm"		54
-	"pwm_ch4_1"			"pwm"		67
-	"pwm_ch4_2"			"pwm"		76
-	"pwm_ch4_3"			"pwm"		98
-	"pwm_ch5_0"			"pwm"		68
-	"pwm_ch5_1"			"pwm"		77
-	"pwm_ch5_2"			"pwm"		99
-	"pwm_ch6_0"			"pwm"		69
-	"pwm_ch6_1"			"pwm"		78
-	"pwm_ch6_2"			"pwm"		81
-	"pwm_ch6_3"			"pwm"		100
-	"pwm_ch7_0"			"pwm"		70
-	"pwm_ch7_1"			"pwm"		82
-	"pwm_ch7_2"			"pwm"		101
-	"sd_0"				"sd"		16, 17, 18, 19, 20, 21
-	"sd_1"				"sd"		25, 26, 27, 28, 29, 30
-	"spic0_0"			"spi"		63, 64, 65, 66
-	"spic0_1"			"spi"		79, 80, 81, 82
-	"spic1_0"			"spi"		67, 68, 69, 70
-	"spic1_1"			"spi"		73, 74, 75, 76
-	"spic2_0_wp_hold"		"spi"		8, 9
-	"spic2_0"			"spi"		10, 11, 12, 13
-	"tdm_0_out_mclk_bclk_ws"	"tdm"		8, 9, 10
-	"tdm_0_in_mclk_bclk_ws"		"tdm"		11, 12, 13
-	"tdm_0_out_data"		"tdm"		20
-	"tdm_0_in_data"			"tdm"		21
-	"tdm_1_out_mclk_bclk_ws"	"tdm"		57, 58, 59
-	"tdm_1_in_mclk_bclk_ws"		"tdm"		60, 61, 62
-	"tdm_1_out_data"		"tdm"		55
-	"tdm_1_in_data"			"tdm"		56
-	"uart0_0_tx_rx"			"uart"		6, 7
-	"uart1_0_tx_rx"			"uart"		55, 56
-	"uart1_0_rts_cts"		"uart"		57, 58
-	"uart1_1_tx_rx"			"uart"		73, 74
-	"uart1_1_rts_cts"		"uart"		75, 76
-	"uart2_0_tx_rx"			"uart"		3, 4
-	"uart2_0_rts_cts"		"uart"		1, 2
-	"uart2_1_tx_rx"			"uart"		51, 52
-	"uart2_1_rts_cts"		"uart"		53, 54
-	"uart2_2_tx_rx"			"uart"		59, 60
-	"uart2_2_rts_cts"		"uart"		61, 62
-	"uart2_3_tx_rx"			"uart"		95, 96
-	"uart3_0_tx_rx"			"uart"		57, 58
-	"uart3_1_tx_rx"			"uart"		81, 82
-	"uart3_1_rts_cts"		"uart"		79, 80
-	"uart4_0_tx_rx"			"uart"		61, 62
-	"uart4_1_tx_rx"			"uart"		91, 92
-	"uart4_1_rts_cts"		"uart"		93, 94
-	"uart4_2_tx_rx"			"uart"		97, 98
-	"uart4_2_rts_cts"		"uart"		95, 96
-	"watchdog"			"watchdog"	78
-
-
-== Valid values for pins, function and groups on MT7629 ==
-
-	Pin #:  Valid values for pins
-	-----------------------------
-	PIN 0: "TOP_5G_CLK"
-	PIN 1: "TOP_5G_DATA"
-	PIN 2: "WF0_5G_HB0"
-	PIN 3: "WF0_5G_HB1"
-	PIN 4: "WF0_5G_HB2"
-	PIN 5: "WF0_5G_HB3"
-	PIN 6: "WF0_5G_HB4"
-	PIN 7: "WF0_5G_HB5"
-	PIN 8: "WF0_5G_HB6"
-	PIN 9: "XO_REQ"
-	PIN 10: "TOP_RST_N"
-	PIN 11: "SYS_WATCHDOG"
-	PIN 12: "EPHY_LED0_N_JTDO"
-	PIN 13: "EPHY_LED1_N_JTDI"
-	PIN 14: "EPHY_LED2_N_JTMS"
-	PIN 15: "EPHY_LED3_N_JTCLK"
-	PIN 16: "EPHY_LED4_N_JTRST_N"
-	PIN 17: "WF2G_LED_N"
-	PIN 18: "WF5G_LED_N"
-	PIN 19: "I2C_SDA"
-	PIN 20: "I2C_SCL"
-	PIN 21: "GPIO_9"
-	PIN 22: "GPIO_10"
-	PIN 23: "GPIO_11"
-	PIN 24: "GPIO_12"
-	PIN 25: "UART1_TXD"
-	PIN 26: "UART1_RXD"
-	PIN 27: "UART1_CTS"
-	PIN 28: "UART1_RTS"
-	PIN 29: "UART2_TXD"
-	PIN 30: "UART2_RXD"
-	PIN 31: "UART2_CTS"
-	PIN 32: "UART2_RTS"
-	PIN 33: "MDI_TP_P1"
-	PIN 34: "MDI_TN_P1"
-	PIN 35: "MDI_RP_P1"
-	PIN 36: "MDI_RN_P1"
-	PIN 37: "MDI_RP_P2"
-	PIN 38: "MDI_RN_P2"
-	PIN 39: "MDI_TP_P2"
-	PIN 40: "MDI_TN_P2"
-	PIN 41: "MDI_TP_P3"
-	PIN 42: "MDI_TN_P3"
-	PIN 43: "MDI_RP_P3"
-	PIN 44: "MDI_RN_P3"
-	PIN 45: "MDI_RP_P4"
-	PIN 46: "MDI_RN_P4"
-	PIN 47: "MDI_TP_P4"
-	PIN 48: "MDI_TN_P4"
-	PIN 49: "SMI_MDC"
-	PIN 50: "SMI_MDIO"
-	PIN 51: "PCIE_PERESET_N"
-	PIN 52: "PWM_0"
-	PIN 53: "GPIO_0"
-	PIN 54: "GPIO_1"
-	PIN 55: "GPIO_2"
-	PIN 56: "GPIO_3"
-	PIN 57: "GPIO_4"
-	PIN 58: "GPIO_5"
-	PIN 59: "GPIO_6"
-	PIN 60: "GPIO_7"
-	PIN 61: "GPIO_8"
-	PIN 62: "SPI_CLK"
-	PIN 63: "SPI_CS"
-	PIN 64: "SPI_MOSI"
-	PIN 65: "SPI_MISO"
-	PIN 66: "SPI_WP"
-	PIN 67: "SPI_HOLD"
-	PIN 68: "UART0_TXD"
-	PIN 69: "UART0_RXD"
-	PIN 70: "TOP_2G_CLK"
-	PIN 71: "TOP_2G_DATA"
-	PIN 72: "WF0_2G_HB0"
-	PIN 73: "WF0_2G_HB1"
-	PIN 74: "WF0_2G_HB2"
-	PIN 75: "WF0_2G_HB3"
-	PIN 76: "WF0_2G_HB4"
-	PIN 77: "WF0_2G_HB5"
-	PIN 78: "WF0_2G_HB6"
-
-Valid values for function are:
-	"eth", "i2c", "led", "flash", "pcie", "pwm", "spi", "uart",
-	"watchdog", "wifi"
-
-Valid values for groups are:
-	Valid value			function	pins (in pin#)
-	----------------------------------------------------------------
-	"mdc_mdio"			"eth"		23, 24
-	"i2c_0"				"i2c"		19, 20
-	"i2c_1"				"i2c"		53, 54
-	"ephy_leds"			"led"		12, 13, 14, 15, 16,
-							17, 18
-	"ephy0_led"			"led"		12
-	"ephy1_led"			"led"		13
-	"ephy2_led"			"led"		14
-	"ephy3_led"			"led"		15
-	"ephy4_led"			"led"		16
-	"wf2g_led"			"led"		17
-	"wf5g_led"			"led"		18
-	"snfi"				"flash"		62, 63, 64, 65, 66, 67
-	"spi_nor"			"flash"		62, 63, 64, 65, 66, 67
-	"pcie_pereset"			"pcie"		51
-	"pcie_wake"			"pcie"		55
-	"pcie_clkreq"			"pcie"		56
-	"pwm_0"				"pwm"		52
-	"pwm_1"				"pwm"		61
-	"spi_0"				"spi"		21, 22, 23, 24
-	"spi_1"				"spi"		62, 63, 64, 65
-	"spi_wp"			"spi"		66
-	"spi_hold"			"spi"		67
-	"uart0_txd_rxd"			"uart"		68, 69
-	"uart1_0_txd_rxd"		"uart"		25, 26
-	"uart1_0_cts_rts"		"uart"		27, 28
-	"uart1_1_txd_rxd"		"uart"		53, 54
-	"uart1_1_cts_rts"		"uart"		55, 56
-	"uart2_0_txd_rxd"		"uart"		29, 30
-	"uart2_0_cts_rts"		"uart"		31, 32
-	"uart2_1_txd_rxd"		"uart"		57, 58
-	"uart2_1_cts_rts"		"uart"		59, 60
-	"watchdog"			"watchdog"	11
-	"wf0_2g"			"wifi"		70, 71, 72, 73, 74,
-							75, 76, 77, 78
-	"wf0_5g"			"wifi"		0, 1, 2, 3, 4, 5, 6,
-							7, 8, 9, 10
-
-Example:
-
-	pio: pinctrl@10211000 {
-		compatible = "mediatek,mt7622-pinctrl";
-		reg = <0 0x10211000 0 0x1000>;
-		gpio-controller;
-		#gpio-cells = <2>;
-
-		pinctrl_eth_default: eth-default {
-			mux-mdio {
-				groups = "mdc_mdio";
-				function = "eth";
-				drive-strength = <12>;
-			};
-
-			mux-gmac2 {
-				groups = "gmac2";
-				function = "eth";
-				drive-strength = <12>;
-			};
-
-			mux-esw {
-				groups = "esw";
-				function = "eth";
-				drive-strength = <8>;
-			};
-
-			conf-mdio {
-				pins = "MDC";
-				bias-pull-up;
-			};
-		};
-	};
diff --git a/Documentation/devicetree/bindings/pinctrl/pinctrl-mt8183.txt b/Documentation/devicetree/bindings/pinctrl/pinctrl-mt8183.txt
deleted file mode 100644
index eccbe3f55d3f..000000000000
--- a/Documentation/devicetree/bindings/pinctrl/pinctrl-mt8183.txt
+++ /dev/null
@@ -1,132 +0,0 @@
-* Mediatek MT8183 Pin Controller
-
-The Mediatek's Pin controller is used to control SoC pins.
-
-Required properties:
-- compatible: value should be one of the following.
-	"mediatek,mt8183-pinctrl", compatible with mt8183 pinctrl.
-- gpio-controller : Marks the device node as a gpio controller.
-- #gpio-cells: number of cells in GPIO specifier. Since the generic GPIO
-  binding is used, the amount of cells must be specified as 2. See the below
-  mentioned gpio binding representation for description of particular cells.
-- gpio-ranges : gpio valid number range.
-- reg: physical address base for gpio base registers. There are 10 GPIO
-  physical address base in mt8183.
-
-Optional properties:
-- reg-names: gpio base register names. There are 10 gpio base register
-  names in mt8183. They are "iocfg0", "iocfg1", "iocfg2", "iocfg3", "iocfg4",
-  "iocfg5", "iocfg6", "iocfg7", "iocfg8", "eint".
-- interrupt-controller: Marks the device node as an interrupt controller
-- #interrupt-cells: Should be two.
-- interrupts : The interrupt outputs to sysirq.
-
-Please refer to pinctrl-bindings.txt in this directory for details of the
-common pinctrl bindings used by client devices.
-
-Subnode format
-A pinctrl node should contain at least one subnodes representing the
-pinctrl groups available on the machine. Each subnode will list the
-pins it needs, and how they should be configured, with regard to muxer
-configuration, pullups, drive strength, input enable/disable and input schmitt.
-
-    node {
-	pinmux = <PIN_NUMBER_PINMUX>;
-	GENERIC_PINCONFIG;
-    };
-
-Required properties:
-- pinmux: integer array, represents gpio pin number and mux setting.
-    Supported pin number and mux varies for different SoCs, and are defined
-    as macros in boot/dts/<soc>-pinfunc.h directly.
-
-Optional properties:
-- GENERIC_PINCONFIG: is the generic pinconfig options to use, bias-disable,
-    bias-pull-down, bias-pull-up, input-enable, input-disable, output-low,
-    output-high, input-schmitt-enable, input-schmitt-disable
-    and drive-strength are valid.
-
-    Some special pins have extra pull up strength, there are R0 and R1 pull-up
-    resistors available, but for user, it's only need to set R1R0 as 00, 01,
-    10 or 11. So It needs config "mediatek,pull-up-adv" or
-    "mediatek,pull-down-adv" to support arguments for those special pins.
-    Valid arguments are from 0 to 3.
-
-    mediatek,tdsel: An integer describing the steps for output level shifter
-      duty cycle when asserted (high pulse width adjustment). Valid arguments
-      are from 0 to 15.
-    mediatek,rdsel: An integer describing the steps for input level shifter
-      duty cycle when asserted (high pulse width adjustment). Valid arguments
-      are from 0 to 63.
-
-    When config drive-strength, it can support some arguments, such as
-    MTK_DRIVE_4mA, MTK_DRIVE_6mA, etc. See dt-bindings/pinctrl/mt65xx.h.
-    It can only support 2/4/6/8/10/12/14/16mA in mt8183.
-    For I2C pins, there are existing generic driving setup and the specific
-    driving setup. I2C pins can only support 2/4/6/8/10/12/14/16mA driving
-    adjustment in generic driving setup. But in specific driving setup,
-    they can support 0.125/0.25/0.5/1mA adjustment. If we enable specific
-    driving setup for I2C pins, the existing generic driving setup will be
-    disabled. For some special features, we need the I2C pins specific
-    driving setup. The specific driving setup is controlled by E1E0EN.
-    So we need add extra vendor driving preperty instead of
-    the generic driving property.
-    We can add "mediatek,drive-strength-adv = <XXX>;" to describe the specific
-    driving setup property. "XXX" means the value of E1E0EN. EN is 0 or 1.
-    It is used to enable or disable the specific driving setup.
-    E1E0 is used to describe the detail strength specification of the I2C pin.
-    When E1=0/E0=0, the strength is 0.125mA.
-    When E1=0/E0=1, the strength is 0.25mA.
-    When E1=1/E0=0, the strength is 0.5mA.
-    When E1=1/E0=1, the strength is 1mA.
-    So the valid arguments of "mediatek,drive-strength-adv" are from 0 to 7.
-
-Examples:
-
-#include "mt8183-pinfunc.h"
-
-...
-{
-	pio: pinctrl@10005000 {
-		compatible = "mediatek,mt8183-pinctrl";
-		reg = <0 0x10005000 0 0x1000>,
-		      <0 0x11f20000 0 0x1000>,
-		      <0 0x11e80000 0 0x1000>,
-		      <0 0x11e70000 0 0x1000>,
-		      <0 0x11e90000 0 0x1000>,
-		      <0 0x11d30000 0 0x1000>,
-		      <0 0x11d20000 0 0x1000>,
-		      <0 0x11c50000 0 0x1000>,
-		      <0 0x11f30000 0 0x1000>,
-		      <0 0x1000b000 0 0x1000>;
-		reg-names = "iocfg0", "iocfg1", "iocfg2",
-			    "iocfg3", "iocfg4", "iocfg5",
-			    "iocfg6", "iocfg7", "iocfg8",
-			    "eint";
-		gpio-controller;
-		#gpio-cells = <2>;
-		gpio-ranges = <&pio 0 0 192>;
-		interrupt-controller;
-		interrupts = <GIC_SPI 177 IRQ_TYPE_LEVEL_HIGH>;
-		#interrupt-cells = <2>;
-
-		i2c0_pins_a: i2c0 {
-			pins1 {
-				pinmux = <PINMUX_GPIO48__FUNC_SCL5>,
-					 <PINMUX_GPIO49__FUNC_SDA5>;
-				mediatek,pull-up-adv = <3>;
-				mediatek,drive-strength-adv = <7>;
-			};
-		};
-
-		i2c1_pins_a: i2c1 {
-			pins {
-				pinmux = <PINMUX_GPIO50__FUNC_SCL3>,
-					 <PINMUX_GPIO51__FUNC_SDA3>;
-				mediatek,pull-down-adv = <2>;
-				mediatek,drive-strength-adv = <4>;
-			};
-		};
-		...
-	};
-};
diff --git a/Documentation/devicetree/bindings/pinctrl/pinctrl-mt8195.yaml b/Documentation/devicetree/bindings/pinctrl/pinctrl-mt8195.yaml
index 2f12ec59eee5..e17a399e0904 100644
--- a/Documentation/devicetree/bindings/pinctrl/pinctrl-mt8195.yaml
+++ b/Documentation/devicetree/bindings/pinctrl/pinctrl-mt8195.yaml
@@ -80,10 +80,7 @@ patternProperties:
           as macros in dt-bindings/pinctrl/<soc>-pinfunc.h directly.
 
       drive-strength:
-        description: |
-          It can support some arguments which is from 0 to 7. It can only support
-          2/4/6/8/10/12/14/16mA in mt8195.
-        enum: [0, 1, 2, 3, 4, 5, 6, 7]
+        enum: [2, 4, 6, 8, 10, 12, 14, 16]
 
       bias-pull-down: true
 
diff --git a/Documentation/devicetree/bindings/pinctrl/qcom,mdm9607-pinctrl.yaml b/Documentation/devicetree/bindings/pinctrl/qcom,mdm9607-pinctrl.yaml
new file mode 100644
index 000000000000..3b02dc6626ed
--- /dev/null
+++ b/Documentation/devicetree/bindings/pinctrl/qcom,mdm9607-pinctrl.yaml
@@ -0,0 +1,133 @@
+# SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause)
+%YAML 1.2
+---
+$id: http://devicetree.org/schemas/pinctrl/qcom,mdm9607-pinctrl.yaml#
+$schema: http://devicetree.org/meta-schemas/core.yaml#
+
+title: Qualcomm Technologies, Inc. MDM9607 TLMM block
+
+maintainers:
+  - Konrad Dybcio <konrad.dybcio@somainline.org>
+
+description: |
+  This binding describes the Top Level Mode Multiplexer block found in the
+  MDM9607 platform.
+
+allOf:
+  - $ref: /schemas/pinctrl/qcom,tlmm-common.yaml#
+
+properties:
+  compatible:
+    const: qcom,mdm9607-tlmm
+
+  reg:
+    maxItems: 1
+
+  interrupts: true
+  interrupt-controller: true
+  '#interrupt-cells': true
+  gpio-controller: true
+  gpio-reserved-ranges: true
+  '#gpio-cells': true
+  gpio-ranges: true
+  wakeup-parent: true
+
+required:
+  - compatible
+  - reg
+
+additionalProperties: false
+
+patternProperties:
+  '-state$':
+    oneOf:
+      - $ref: "#/$defs/qcom-mdm9607-tlmm-state"
+      - patternProperties:
+          ".*":
+            $ref: "#/$defs/qcom-mdm9607-tlmm-state"
+
+'$defs':
+  qcom-mdm9607-tlmm-state:
+    type: object
+    description:
+      Pinctrl node's client devices use subnodes for desired pin configuration.
+      Client device subnodes use below standard properties.
+    $ref: "qcom,tlmm-common.yaml#/$defs/qcom-tlmm-state"
+
+    properties:
+      pins:
+        description:
+          List of gpio pins affected by the properties specified in this
+          subnode.
+        items:
+          oneOf:
+            - pattern: "^gpio([1-9]|[1-7][0-9]|80)$"
+            - enum: [ sdc1_clk, sdc1_cmd, sdc1_data, sdc2_clk, sdc2_cmd,
+                      sdc2_data, qdsd_cmd, qdsd_data0, qdsd_data1, qdsd_data2,
+                      qdsd_data3 ]
+        minItems: 1
+        maxItems: 16
+
+      function:
+        description:
+          Specify the alternative function to be configured for the specified
+          pins.
+
+        enum: [ adsp_ext, atest_bbrx0, atest_bbrx1, atest_char, atest_char0,
+                atest_char1, atest_char2, atest_char3,
+                atest_combodac_to_gpio_native, atest_gpsadc_dtest0_native,
+                atest_gpsadc_dtest1_native, atest_tsens, backlight_en_b,
+                bimc_dte0, bimc_dte1, blsp1_spi, blsp2_spi, blsp3_spi,
+                blsp_i2c1, blsp_i2c2, blsp_i2c3, blsp_i2c4, blsp_i2c5,
+                blsp_i2c6, blsp_spi1, blsp_spi2, blsp_spi3, blsp_spi4,
+                blsp_spi5, blsp_spi6, blsp_uart1, blsp_uart2, blsp_uart3,
+                blsp_uart4, blsp_uart5, blsp_uart6, blsp_uim1, blsp_uim2,
+                codec_int, codec_rst, coex_uart, cri_trng, cri_trng0,
+                cri_trng1, dbg_out, ebi0_wrcdc, ebi2_a, ebi2_a_d_8_b,
+                ebi2_lcd, ebi2_lcd_cs_n_b, ebi2_lcd_te_b, eth_irq, eth_rst,
+                gcc_gp1_clk_a, gcc_gp1_clk_b, gcc_gp2_clk_a, gcc_gp2_clk_b,
+                gcc_gp3_clk_a, gcc_gp3_clk_b, gcc_plltest, gcc_tlmm, gmac_mdio,
+                gpio, gsm0_tx, lcd_rst, ldo_en, ldo_update, m_voc, modem_tsync,
+                nav_ptp_pps_in_a, nav_ptp_pps_in_b, nav_tsync_out_a,
+                nav_tsync_out_b, pa_indicator, pbs0, pbs1, pbs2,
+                pri_mi2s_data0_a, pri_mi2s_data1_a, pri_mi2s_mclk_a,
+                pri_mi2s_sck_a, pri_mi2s_ws_a, prng_rosc, ptp_pps_out_a,
+                ptp_pps_out_b, pwr_crypto_enabled_a, pwr_crypto_enabled_b,
+                pwr_modem_enabled_a, pwr_modem_enabled_b, pwr_nav_enabled_a,
+                pwr_nav_enabled_b, qdss_cti_trig_in_a0, qdss_cti_trig_in_a1,
+                qdss_cti_trig_in_b0, qdss_cti_trig_in_b1, qdss_cti_trig_out_a0,
+                qdss_cti_trig_out_a1, qdss_cti_trig_out_b0, qdss_cti_trig_out_b1,
+                qdss_traceclk_a, qdss_traceclk_b, qdss_tracectl_a,
+                qdss_tracectl_b, qdss_tracedata_a, qdss_tracedata_b, rcm_marker1,
+                rcm_marker2, sd_write, sec_mi2s, sensor_en, sensor_int2,
+                sensor_int3, sensor_rst, ssbi1, ssbi2, touch_rst, ts_int,
+                uim1_clk, uim1_data, uim1_present, uim1_reset, uim2_clk,
+                uim2_data, uim2_present, uim2_reset, uim_batt, wlan_en1, ]
+
+      bias-disable: true
+      bias-pull-down: true
+      bias-pull-up: true
+      drive-strength: true
+      input-enable: true
+      output-high: true
+      output-low: true
+
+    required:
+      - pins
+      - function
+
+    additionalProperties: false
+
+examples:
+  - |
+        #include <dt-bindings/interrupt-controller/arm-gic.h>
+        tlmm: pinctrl@1000000 {
+          compatible = "qcom,mdm9607-tlmm";
+          reg = <0x01000000 0x300000>;
+          interrupts = <GIC_SPI 208 IRQ_TYPE_LEVEL_HIGH>;
+          gpio-controller;
+          gpio-ranges = <&msmgpio 0 0 80>;
+          #gpio-cells = <2>;
+          interrupt-controller;
+          #interrupt-cells = <2>;
+        };
diff --git a/Documentation/devicetree/bindings/pinctrl/qcom,pmic-gpio.txt b/Documentation/devicetree/bindings/pinctrl/qcom,pmic-gpio.txt
deleted file mode 100644
index 161216daf463..000000000000
--- a/Documentation/devicetree/bindings/pinctrl/qcom,pmic-gpio.txt
+++ /dev/null
@@ -1,288 +0,0 @@
-Qualcomm PMIC GPIO block
-
-This binding describes the GPIO block(s) found in the 8xxx series of
-PMIC's from Qualcomm.
-
-- compatible:
-	Usage: required
-	Value type: <string>
-	Definition: must be one of:
-		    "qcom,pm8005-gpio"
-		    "qcom,pm8018-gpio"
-		    "qcom,pm8038-gpio"
-		    "qcom,pm8058-gpio"
-		    "qcom,pm8916-gpio"
-		    "qcom,pm8917-gpio"
-		    "qcom,pm8921-gpio"
-		    "qcom,pm8941-gpio"
-		    "qcom,pm8950-gpio"
-		    "qcom,pm8994-gpio"
-		    "qcom,pm8998-gpio"
-		    "qcom,pma8084-gpio"
-		    "qcom,pmi8950-gpio"
-		    "qcom,pmi8994-gpio"
-		    "qcom,pmi8998-gpio"
-		    "qcom,pms405-gpio"
-		    "qcom,pm660-gpio"
-		    "qcom,pm660l-gpio"
-		    "qcom,pm8150-gpio"
-		    "qcom,pm8150b-gpio"
-		    "qcom,pm8350-gpio"
-		    "qcom,pm8350b-gpio"
-		    "qcom,pm8350c-gpio"
-		    "qcom,pmk8350-gpio"
-		    "qcom,pm7325-gpio"
-		    "qcom,pmr735a-gpio"
-		    "qcom,pmr735b-gpio"
-		    "qcom,pm6150-gpio"
-		    "qcom,pm6150l-gpio"
-		    "qcom,pm8008-gpio"
-		    "qcom,pmx55-gpio"
-
-		    And must contain either "qcom,spmi-gpio" or "qcom,ssbi-gpio"
-		    if the device is on an spmi bus or an ssbi bus respectively
-
-- reg:
-	Usage: required
-	Value type: <prop-encoded-array>
-	Definition: Register base of the GPIO block and length.
-
-- interrupts:
-	Usage: required
-	Value type: <prop-encoded-array>
-	Definition: Must contain an array of encoded interrupt specifiers for
-		    each available GPIO
-
-- gpio-controller:
-	Usage: required
-	Value type: <none>
-	Definition: Mark the device node as a GPIO controller
-
-- #gpio-cells:
-	Usage: required
-	Value type: <u32>
-	Definition: Must be 2;
-		    the first cell will be used to define gpio number and the
-		    second denotes the flags for this gpio
-
-Please refer to ../gpio/gpio.txt and ../interrupt-controller/interrupts.txt for
-a general description of GPIO and interrupt bindings.
-
-Please refer to pinctrl-bindings.txt in this directory for details of the
-common pinctrl bindings used by client devices, including the meaning of the
-phrase "pin configuration node".
-
-The pin configuration nodes act as a container for an arbitrary number of
-subnodes. Each of these subnodes represents some desired configuration for a
-pin or a list of pins. This configuration can include the
-mux function to select on those pin(s), and various pin configuration
-parameters, as listed below.
-
-
-SUBNODES:
-
-The name of each subnode is not important; all subnodes should be enumerated
-and processed purely based on their content.
-
-Each subnode only affects those parameters that are explicitly listed. In
-other words, a subnode that lists a mux function but no pin configuration
-parameters implies no information about any pin configuration parameters.
-Similarly, a pin subnode that describes a pullup parameter implies no
-information about e.g. the mux function.
-
-The following generic properties as defined in pinctrl-bindings.txt are valid
-to specify in a pin configuration subnode:
-
-- pins:
-	Usage: required
-	Value type: <string-array>
-	Definition: List of gpio pins affected by the properties specified in
-		    this subnode.  Valid pins are:
-		    gpio1-gpio4 for pm8005
-		    gpio1-gpio6 for pm8018
-		    gpio1-gpio12 for pm8038
-		    gpio1-gpio40 for pm8058
-		    gpio1-gpio4 for pm8916
-		    gpio1-gpio38 for pm8917
-		    gpio1-gpio44 for pm8921
-		    gpio1-gpio36 for pm8941
-		    gpio1-gpio8 for pm8950 (hole on gpio3)
-		    gpio1-gpio22 for pm8994
-		    gpio1-gpio26 for pm8998
-		    gpio1-gpio22 for pma8084
-		    gpio1-gpio2 for pmi8950
-		    gpio1-gpio10 for pmi8994
-		    gpio1-gpio12 for pms405 (holes on gpio1, gpio9 and gpio10)
-		    gpio1-gpio10 for pm8150 (holes on gpio2, gpio5, gpio7
-					     and gpio8)
-		    gpio1-gpio12 for pm8150b (holes on gpio3, gpio4, gpio7)
-		    gpio1-gpio12 for pm8150l (hole on gpio7)
-		    gpio1-gpio10 for pm8350
-		    gpio1-gpio8 for pm8350b
-		    gpio1-gpio9 for pm8350c
-		    gpio1-gpio4 for pmk8350
-		    gpio1-gpio10 for pm7325
-		    gpio1-gpio4 for pmr735a
-		    gpio1-gpio4 for pmr735b
-		    gpio1-gpio10 for pm6150
-		    gpio1-gpio12 for pm6150l
-		    gpio1-gpio2 for pm8008
-		    gpio1-gpio11 for pmx55 (holes on gpio3, gpio7, gpio10
-					    and gpio11)
-
-- function:
-	Usage: required
-	Value type: <string>
-	Definition: Specify the alternative function to be configured for the
-		    specified pins.  Valid values are:
-		    "normal",
-		    "paired",
-		    "func1",
-		    "func2",
-		    "dtest1",
-		    "dtest2",
-		    "dtest3",
-		    "dtest4",
-		    And following values are supported by LV/MV GPIO subtypes:
-		    "func3",
-		    "func4"
-
-- bias-disable:
-	Usage: optional
-	Value type: <none>
-	Definition: The specified pins should be configured as no pull.
-
-- bias-pull-down:
-	Usage: optional
-	Value type: <none>
-	Definition: The specified pins should be configured as pull down.
-
-- bias-pull-up:
-	Usage: optional
-	Value type: <empty>
-	Definition: The specified pins should be configured as pull up.
-
-- qcom,pull-up-strength:
-	Usage: optional
-	Value type: <u32>
-	Definition: Specifies the strength to use for pull up, if selected.
-		    Valid values are; as defined in
-		    <dt-bindings/pinctrl/qcom,pmic-gpio.h>:
-		    1: 30uA                     (PMIC_GPIO_PULL_UP_30)
-		    2: 1.5uA                    (PMIC_GPIO_PULL_UP_1P5)
-		    3: 31.5uA                   (PMIC_GPIO_PULL_UP_31P5)
-		    4: 1.5uA + 30uA boost       (PMIC_GPIO_PULL_UP_1P5_30)
-		    If this property is omitted 30uA strength will be used if
-		    pull up is selected
-
-- bias-high-impedance:
-	Usage: optional
-	Value type: <none>
-	Definition: The specified pins will put in high-Z mode and disabled.
-
-- input-enable:
-	Usage: optional
-	Value type: <none>
-	Definition: The specified pins are put in input mode.
-
-- output-high:
-	Usage: optional
-	Value type: <none>
-	Definition: The specified pins are configured in output mode, driven
-		    high.
-
-- output-low:
-	Usage: optional
-	Value type: <none>
-	Definition: The specified pins are configured in output mode, driven
-		    low.
-
-- power-source:
-	Usage: optional
-	Value type: <u32>
-	Definition: Selects the power source for the specified pins. Valid
-		    power sources are defined per chip in
-		    <dt-bindings/pinctrl/qcom,pmic-gpio.h>
-
-- qcom,drive-strength:
-	Usage: optional
-	Value type: <u32>
-	Definition: Selects the drive strength for the specified pins. Value
-		    drive strengths are:
-		    0: no (PMIC_GPIO_STRENGTH_NO)
-		    1: high (PMIC_GPIO_STRENGTH_HIGH) 0.9mA @ 1.8V - 1.9mA @ 2.6V
-		    2: medium (PMIC_GPIO_STRENGTH_MED) 0.6mA @ 1.8V - 1.25mA @ 2.6V
-		    3: low (PMIC_GPIO_STRENGTH_LOW) 0.15mA @ 1.8V - 0.3mA @ 2.6V
-		    as defined in <dt-bindings/pinctrl/qcom,pmic-gpio.h>
-
-- drive-push-pull:
-	Usage: optional
-	Value type: <none>
-	Definition: The specified pins are configured in push-pull mode.
-
-- drive-open-drain:
-	Usage: optional
-	Value type: <none>
-	Definition: The specified pins are configured in open-drain mode.
-
-- drive-open-source:
-	Usage: optional
-	Value type: <none>
-	Definition: The specified pins are configured in open-source mode.
-
-- qcom,analog-pass:
-	Usage: optional
-	Value type: <none>
-	Definition: The specified pins are configured in analog-pass-through mode.
-
-- qcom,atest:
-	Usage: optional
-	Value type: <u32>
-	Definition: Selects ATEST rail to route to GPIO when it's configured
-		    in analog-pass-through mode.
-		    Valid values are 1-4 corresponding to ATEST1 to ATEST4.
-
-- qcom,dtest-buffer:
-	Usage: optional
-	Value type: <u32>
-	Definition: Selects DTEST rail to route to GPIO when it's configured
-		    as digital input.
-		    Valid values are 1-4 corresponding to DTEST1 to DTEST4.
-
-Example:
-
-	pm8921_gpio: gpio@150 {
-		compatible = "qcom,pm8921-gpio", "qcom,ssbi-gpio";
-		reg = <0x150 0x160>;
-		interrupts = <192 1>, <193 1>, <194 1>,
-			     <195 1>, <196 1>, <197 1>,
-			     <198 1>, <199 1>, <200 1>,
-			     <201 1>, <202 1>, <203 1>,
-			     <204 1>, <205 1>, <206 1>,
-			     <207 1>, <208 1>, <209 1>,
-			     <210 1>, <211 1>, <212 1>,
-			     <213 1>, <214 1>, <215 1>,
-			     <216 1>, <217 1>, <218 1>,
-			     <219 1>, <220 1>, <221 1>,
-			     <222 1>, <223 1>, <224 1>,
-			     <225 1>, <226 1>, <227 1>,
-			     <228 1>, <229 1>, <230 1>,
-			     <231 1>, <232 1>, <233 1>,
-			     <234 1>, <235 1>;
-
-		gpio-controller;
-		#gpio-cells = <2>;
-
-		pm8921_gpio_keys: gpio-keys {
-			volume-keys {
-				pins = "gpio20", "gpio21";
-				function = "normal";
-
-				input-enable;
-				bias-pull-up;
-				drive-push-pull;
-				qcom,drive-strength = <PMIC_GPIO_STRENGTH_NO>;
-				power-source = <PM8921_GPIO_S4>;
-			};
-		};
-	};
diff --git a/Documentation/devicetree/bindings/pinctrl/qcom,pmic-gpio.yaml b/Documentation/devicetree/bindings/pinctrl/qcom,pmic-gpio.yaml
new file mode 100644
index 000000000000..9bd01db37dcd
--- /dev/null
+++ b/Documentation/devicetree/bindings/pinctrl/qcom,pmic-gpio.yaml
@@ -0,0 +1,239 @@
+# SPDX-License-Identifier: (GPL-2.0 OR BSD-2-Clause)
+%YAML 1.2
+---
+$id: http://devicetree.org/schemas/pinctrl/qcom,pmic-gpio.yaml#
+$schema: http://devicetree.org/meta-schemas/core.yaml#
+
+title: Qualcomm PMIC GPIO block
+
+maintainers:
+  - Bjorn Andersson <bjorn.andersson@linaro.org>
+
+description:
+  This binding describes the GPIO block(s) found in the 8xxx series of
+  PMIC's from Qualcomm.
+
+properties:
+  compatible:
+    items:
+      - enum:
+          - qcom,pm660-gpio
+          - qcom,pm660l-gpio
+          - qcom,pm6150-gpio
+          - qcom,pm6150l-gpio
+          - qcom,pm7325-gpio
+          - qcom,pm8005-gpio
+          - qcom,pm8008-gpio
+          - qcom,pm8018-gpio
+          - qcom,pm8038-gpio
+          - qcom,pm8058-gpio
+          - qcom,pm8150-gpio
+          - qcom,pm8150b-gpio
+          - qcom,pm8350-gpio
+          - qcom,pm8350b-gpio
+          - qcom,pm8350c-gpio
+          - qcom,pm8916-gpio
+          - qcom,pm8917-gpio
+          - qcom,pm8921-gpio
+          - qcom,pm8941-gpio
+          - qcom,pm8950-gpio
+          - qcom,pm8994-gpio
+          - qcom,pm8998-gpio
+          - qcom,pma8084-gpio
+          - qcom,pmi8950-gpio
+          - qcom,pmi8994-gpio
+          - qcom,pmi8998-gpio
+          - qcom,pmk8350-gpio
+          - qcom,pmr735a-gpio
+          - qcom,pmr735b-gpio
+          - qcom,pms405-gpio
+          - qcom,pmx55-gpio
+
+      - enum:
+          - qcom,spmi-gpio
+          - qcom,ssbi-gpio
+
+  reg:
+    maxItems: 1
+
+  interrupt-controller: true
+
+  '#interrupt-cells':
+    const: 2
+
+  gpio-controller: true
+
+  gpio-ranges:
+    maxItems: 1
+
+  '#gpio-cells':
+    const: 2
+    description:
+      The first cell will be used to define gpio number and the
+      second denotes the flags for this gpio
+
+additionalProperties: false
+
+required:
+  - compatible
+  - reg
+  - gpio-controller
+  - '#gpio-cells'
+  - gpio-ranges
+  - interrupt-controller
+
+patternProperties:
+  '-state$':
+    oneOf:
+      - $ref: "#/$defs/qcom-pmic-gpio-state"
+      - patternProperties:
+          ".*":
+            $ref: "#/$defs/qcom-pmic-gpio-state"
+
+$defs:
+  qcom-pmic-gpio-state:
+    type: object
+    allOf:
+      - $ref: "pinmux-node.yaml"
+      - $ref: "pincfg-node.yaml"
+    properties:
+      pins:
+        description:
+          List of gpio pins affected by the properties specified in
+          this subnode.  Valid pins are
+                 - gpio1-gpio10 for pm6150
+                 - gpio1-gpio12 for pm6150l
+                 - gpio1-gpio10 for pm7325
+                 - gpio1-gpio4 for pm8005
+                 - gpio1-gpio2 for pm8008
+                 - gpio1-gpio6 for pm8018
+                 - gpio1-gpio12 for pm8038
+                 - gpio1-gpio40 for pm8058
+                 - gpio1-gpio10 for pm8150 (holes on gpio2, gpio5,
+                                            gpio7 and gpio8)
+                 - gpio1-gpio12 for pm8150b (holes on gpio3, gpio4
+                                             and gpio7)
+                 - gpio1-gpio12 for pm8150l (hole on gpio7)
+                 - gpio1-gpio4 for pm8916
+                 - gpio1-gpio10 for pm8350
+                 - gpio1-gpio8 for pm8350b
+                 - gpio1-gpio9 for pm8350c
+                 - gpio1-gpio38 for pm8917
+                 - gpio1-gpio44 for pm8921
+                 - gpio1-gpio36 for pm8941
+                 - gpio1-gpio8 for pm8950 (hole on gpio3)
+                 - gpio1-gpio22 for pm8994
+                 - gpio1-gpio26 for pm8998
+                 - gpio1-gpio22 for pma8084
+                 - gpio1-gpio2 for pmi8950
+                 - gpio1-gpio10 for pmi8994
+                 - gpio1-gpio4 for pmk8350
+                 - gpio1-gpio4 for pmr735a
+                 - gpio1-gpio4 for pmr735b
+                 - gpio1-gpio12 for pms405 (holes on gpio1, gpio9
+                                            and gpio10)
+                 - gpio1-gpio11 for pmx55 (holes on gpio3, gpio7, gpio10
+                                            and gpio11)
+
+        items:
+          pattern: "^gpio([0-9]+)$"
+
+      function:
+        items:
+          - enum:
+              - normal
+              - paired
+              - func1
+              - func2
+              - dtest1
+              - dtest2
+              - dtest3
+              - dtest4
+              - func3  # supported by LV/MV GPIO subtypes
+              - func4  # supported by LV/MV GPIO subtypes
+
+      bias-disable: true
+      bias-pull-down: true
+      bias-pull-up: true
+
+      qcom,pull-up-strength:
+        $ref: /schemas/types.yaml#/definitions/uint32
+        description:
+          Specifies the strength to use for pull up, if selected.
+          Valid values are defined in
+          <dt-bindings/pinctrl/qcom,pmic-gpio.h>
+          If this property is omitted 30uA strength will be used
+          if pull up is selected
+        enum: [0, 1, 2, 3]
+
+      bias-high-impedance: true
+      input-enable: true
+      output-high: true
+      output-low: true
+      power-source: true
+
+      qcom,drive-strength:
+        $ref: /schemas/types.yaml#/definitions/uint32
+        description:
+          Selects the drive strength for the specified pins
+          Valid drive strength values are defined in
+          <dt-bindings/pinctrl/qcom,pmic-gpio.h>
+        enum: [0, 1, 2, 3]
+
+      drive-push-pull: true
+      drive-open-drain: true
+      drive-open-source: true
+
+      qcom,analog-pass:
+        $ref: /schemas/types.yaml#/definitions/flag
+        description:
+          The specified pins are configured in
+          analog-pass-through mode.
+
+      qcom,atest:
+        $ref: /schemas/types.yaml#/definitions/uint32
+        description:
+          Selects ATEST rail to route to GPIO when it's
+          configured in analog-pass-through mode.
+        enum: [1, 2, 3, 4]
+
+      qcom,dtest-buffer:
+        $ref: /schemas/types.yaml#/definitions/uint32
+        description:
+          Selects DTEST rail to route to GPIO when it's
+          configured as digital input.
+        enum: [1, 2, 3, 4]
+
+    required:
+      - pins
+      - function
+
+    additionalProperties: false
+
+examples:
+  - |
+    #include <dt-bindings/pinctrl/qcom,pmic-gpio.h>
+
+    pm8921_gpio: gpio@150 {
+      compatible = "qcom,pm8921-gpio", "qcom,ssbi-gpio";
+      reg = <0x150 0x160>;
+      interrupt-controller;
+      #interrupt-cells = <2>;
+      gpio-controller;
+      gpio-ranges = <&pm8921_gpio 0 0 44>;
+      #gpio-cells = <2>;
+
+      pm8921_gpio_keys: gpio-keys-state {
+        volume-keys {
+          pins = "gpio20", "gpio21";
+          function = "normal";
+
+          input-enable;
+          bias-pull-up;
+          drive-push-pull;
+          qcom,drive-strength = <PMIC_GPIO_STRENGTH_NO>;
+          power-source = <PM8921_GPIO_S4>;
+        };
+      };
+    };
+...
diff --git a/Documentation/devicetree/bindings/pinctrl/qcom,sm6115-pinctrl.yaml b/Documentation/devicetree/bindings/pinctrl/qcom,sm6115-pinctrl.yaml
new file mode 100644
index 000000000000..8fc06f6a3ef4
--- /dev/null
+++ b/Documentation/devicetree/bindings/pinctrl/qcom,sm6115-pinctrl.yaml
@@ -0,0 +1,179 @@
+# SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause)
+%YAML 1.2
+---
+$id: http://devicetree.org/schemas/pinctrl/qcom,sm6115-pinctrl.yaml#
+$schema: http://devicetree.org/meta-schemas/core.yaml#
+
+title: Qualcomm Technologies, Inc. SM6115, SM4250 TLMM block
+
+maintainers:
+  - Iskren Chernev <iskren.chernev@gmail.com>
+
+description:
+  This binding describes the Top Level Mode Multiplexer block found in the
+  SM4250/6115 platforms.
+
+properties:
+  compatible:
+    const: qcom,sm6115-tlmm
+
+  reg:
+    minItems: 3
+    maxItems: 3
+
+  reg-names:
+    items:
+      - const: west
+      - const: south
+      - const: east
+
+  interrupts:
+    description: Specifies the TLMM summary IRQ
+    maxItems: 1
+
+  interrupt-controller: true
+
+  '#interrupt-cells':
+    description:
+      Specifies the PIN numbers and Flags, as defined in defined in
+      include/dt-bindings/interrupt-controller/irq.h
+    const: 2
+
+  gpio-controller: true
+
+  '#gpio-cells':
+    description: Specifying the pin number and flags, as defined in
+      include/dt-bindings/gpio/gpio.h
+    const: 2
+
+  gpio-ranges:
+    maxItems: 1
+
+  wakeup-parent:
+    maxItems: 1
+
+#PIN CONFIGURATION NODES
+patternProperties:
+  '-state$':
+    oneOf:
+      - $ref: "#/$defs/qcom-sm6115-tlmm-state"
+      - patternProperties:
+          ".*":
+            $ref: "#/$defs/qcom-sm6115-tlmm-state"
+
+'$defs':
+  qcom-sm6115-tlmm-state:
+    type: object
+    description:
+      Pinctrl node's client devices use subnodes for desired pin configuration.
+      Client device subnodes use below standard properties.
+    $ref: "qcom,tlmm-common.yaml#/$defs/qcom-tlmm-state"
+
+    properties:
+      pins:
+        description:
+          List of gpio pins affected by the properties specified in this
+          subnode.
+        items:
+          oneOf:
+            - pattern: "^gpio([0-9]|[1-9][0-9]|10[0-9]|11[0-2])$"
+            - enum: [ sdc1_rclk, sdc1_clk, sdc1_cmd, sdc1_data,
+                      sdc2_clk, sdc2_cmd, sdc2_data, ufs_reset ]
+        minItems: 1
+        maxItems: 36
+
+      function:
+        description:
+          Specify the alternative function to be configured for the specified
+          pins.
+
+        enum: [ adsp_ext, agera_pll, atest, cam_mclk, cci_async, cci_i2c,
+                cci_timer, cri_trng, dac_calib, dbg_out, ddr_bist, ddr_pxi0,
+                ddr_pxi1, ddr_pxi2, ddr_pxi3, gcc_gp1, gcc_gp2, gcc_gp3, gpio,
+                gp_pdm0, gp_pdm1, gp_pdm2, gsm0_tx, gsm1_tx, jitter_bist,
+                mdp_vsync, mdp_vsync_out_0, mdp_vsync_out_1, mpm_pwr, mss_lte,
+                m_voc, nav_gpio, pa_indicator, pbs, pbs_out, phase_flag,
+                pll_bist, pll_bypassnl, pll_reset, prng_rosc, qdss_cti,
+                qdss_gpio, qup0, qup1, qup2, qup3, qup4, qup5, sdc1_tb,
+                sdc2_tb, sd_write, ssbi_wtr1, tgu, tsense_pwm, uim1_clk,
+                uim1_data, uim1_present, uim1_reset, uim2_clk, uim2_data,
+                uim2_present, uim2_reset, usb_phy, vfr_1, vsense_trigger,
+                wlan1_adc0, elan1_adc1 ]
+
+      drive-strength:
+        enum: [2, 4, 6, 8, 10, 12, 14, 16]
+        default: 2
+        description:
+          Selects the drive strength for the specified pins, in mA.
+
+      bias-pull-down: true
+
+      bias-pull-up: true
+
+      bias-disable: true
+
+      output-high: true
+
+      output-low: true
+
+    required:
+      - pins
+
+    additionalProperties: false
+
+required:
+  - compatible
+  - reg
+  - reg-names
+  - interrupts
+  - interrupt-controller
+  - '#interrupt-cells'
+  - gpio-controller
+  - '#gpio-cells'
+  - gpio-ranges
+
+additionalProperties: false
+
+examples:
+  - |
+        #include <dt-bindings/interrupt-controller/arm-gic.h>
+        tlmm: pinctrl@500000 {
+                compatible = "qcom,sm6115-tlmm";
+                reg = <0x500000 0x400000>,
+                        <0x900000 0x400000>,
+                        <0xd00000 0x400000>;
+                reg-names = "west", "south", "east";
+                interrupts = <GIC_SPI 227 IRQ_TYPE_LEVEL_HIGH>;
+                gpio-controller;
+                #gpio-cells = <2>;
+                interrupt-controller;
+                #interrupt-cells = <2>;
+                gpio-ranges = <&tlmm 0 0 114>;
+
+                sdc2_on_state: sdc2-on-state {
+                        clk {
+                                pins = "sdc2_clk";
+                                bias-disable;
+                                drive-strength = <16>;
+                        };
+
+                        cmd {
+                                pins = "sdc2_cmd";
+                                bias-pull-up;
+                                drive-strength = <10>;
+                        };
+
+                        data {
+                                pins = "sdc2_data";
+                                bias-pull-up;
+                                drive-strength = <10>;
+                        };
+
+                        sd-cd {
+                                pins = "gpio88";
+                                function = "gpio";
+                                bias-pull-up;
+                                drive-strength = <2>;
+                        };
+                };
+        };
diff --git a/Documentation/devicetree/bindings/pinctrl/renesas,rzg2l-pinctrl.yaml b/Documentation/devicetree/bindings/pinctrl/renesas,rzg2l-pinctrl.yaml
new file mode 100644
index 000000000000..ef68dabcf4dc
--- /dev/null
+++ b/Documentation/devicetree/bindings/pinctrl/renesas,rzg2l-pinctrl.yaml
@@ -0,0 +1,155 @@
+# SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause)
+%YAML 1.2
+---
+$id: http://devicetree.org/schemas/pinctrl/renesas,rzg2l-pinctrl.yaml#
+$schema: http://devicetree.org/meta-schemas/core.yaml#
+
+title: Renesas RZ/G2L combined Pin and GPIO controller
+
+maintainers:
+  - Geert Uytterhoeven <geert+renesas@glider.be>
+  - Lad Prabhakar <prabhakar.mahadev-lad.rj@bp.renesas.com>
+
+description:
+  The Renesas SoCs of the RZ/G2L series feature a combined Pin and GPIO
+  controller.
+  Pin multiplexing and GPIO configuration is performed on a per-pin basis.
+  Each port features up to 8 pins, each of them configurable for GPIO function
+  (port mode) or in alternate function mode.
+  Up to 8 different alternate function modes exist for each single pin.
+
+properties:
+  compatible:
+    enum:
+      - renesas,r9a07g044-pinctrl # RZ/G2{L,LC}
+
+  reg:
+    maxItems: 1
+
+  gpio-controller: true
+
+  '#gpio-cells':
+    const: 2
+    description:
+      The first cell contains the global GPIO port index, constructed using the
+      RZG2L_GPIO() helper macro in <dt-bindings/pinctrl/rzg2l-pinctrl.h> and the
+      second cell represents consumer flag as mentioned in ../gpio/gpio.txt
+      E.g. "RZG2L_GPIO(39, 1)" for P39_1.
+
+  gpio-ranges:
+    maxItems: 1
+
+  clocks:
+    maxItems: 1
+
+  power-domains:
+    maxItems: 1
+
+  resets:
+    items:
+      - description: GPIO_RSTN signal
+      - description: GPIO_PORT_RESETN signal
+      - description: GPIO_SPARE_RESETN signal
+
+additionalProperties:
+  anyOf:
+    - type: object
+      allOf:
+        - $ref: pincfg-node.yaml#
+        - $ref: pinmux-node.yaml#
+
+      description:
+        Pin controller client devices use pin configuration subnodes (children
+        and grandchildren) for desired pin configuration.
+        Client device subnodes use below standard properties.
+
+      properties:
+        phandle: true
+        pinmux:
+          description:
+            Values are constructed from GPIO port number, pin number, and
+            alternate function configuration number using the RZG2L_PORT_PINMUX()
+            helper macro in <dt-bindings/pinctrl/rzg2l-pinctrl.h>.
+        pins: true
+        drive-strength:
+          enum: [ 2, 4, 8, 12 ]
+        power-source:
+          enum: [ 1800, 2500, 3300 ]
+        slew-rate: true
+        gpio-hog: true
+        gpios: true
+        input-enable: true
+        output-high: true
+        output-low: true
+        line-name: true
+
+    - type: object
+      properties:
+        phandle: true
+
+      additionalProperties:
+        $ref: "#/additionalProperties/anyOf/0"
+
+required:
+  - compatible
+  - reg
+  - gpio-controller
+  - '#gpio-cells'
+  - gpio-ranges
+  - clocks
+  - power-domains
+  - resets
+
+examples:
+  - |
+    #include <dt-bindings/pinctrl/rzg2l-pinctrl.h>
+    #include <dt-bindings/clock/r9a07g044-cpg.h>
+
+    pinctrl: pinctrl@11030000 {
+            compatible = "renesas,r9a07g044-pinctrl";
+            reg = <0x11030000 0x10000>;
+
+            gpio-controller;
+            #gpio-cells = <2>;
+            gpio-ranges = <&pinctrl 0 0 392>;
+            clocks = <&cpg CPG_MOD R9A07G044_GPIO_HCLK>;
+            resets = <&cpg R9A07G044_GPIO_RSTN>,
+                     <&cpg R9A07G044_GPIO_PORT_RESETN>,
+                     <&cpg R9A07G044_GPIO_SPARE_RESETN>;
+            power-domains = <&cpg>;
+
+            scif0_pins: serial0 {
+                    pinmux = <RZG2L_PORT_PINMUX(38, 0, 1)>, /* Tx */
+                             <RZG2L_PORT_PINMUX(38, 1, 1)>; /* Rx */
+            };
+
+            i2c1_pins: i2c1 {
+                    pins = "RIIC1_SDA", "RIIC1_SCL";
+                    input-enable;
+            };
+
+            sd1-pwr-en-hog {
+                    gpio-hog;
+                    gpios = <RZG2L_GPIO(39, 2) 0>;
+                    output-high;
+                    line-name = "sd1_pwr_en";
+            };
+
+            sdhi1_pins: sd1 {
+                    sd1_mux {
+                            pinmux = <RZG2L_PORT_PINMUX(19, 0, 1)>, /* CD */
+                                     <RZG2L_PORT_PINMUX(19, 1, 1)>; /* WP */
+                            power-source  = <3300>;
+                    };
+
+                    sd1_data {
+                            pins = "SD1_DATA0", "SD1_DATA1", "SD1_DATA2", "SD1_DATA3";
+                            power-source  = <3300>;
+                    };
+
+                    sd1_ctrl {
+                            pins = "SD1_CLK", "SD1_CMD";
+                            power-source  = <3300>;
+                    };
+            };
+    };
diff --git a/Documentation/devicetree/bindings/pinctrl/samsung-pinctrl.txt b/Documentation/devicetree/bindings/pinctrl/samsung-pinctrl.txt
index 38a1416fd2cd..e7a1b1880375 100644
--- a/Documentation/devicetree/bindings/pinctrl/samsung-pinctrl.txt
+++ b/Documentation/devicetree/bindings/pinctrl/samsung-pinctrl.txt
@@ -22,6 +22,7 @@ Required Properties:
   - "samsung,exynos5420-pinctrl": for Exynos5420 compatible pin-controller.
   - "samsung,exynos5433-pinctrl": for Exynos5433 compatible pin-controller.
   - "samsung,exynos7-pinctrl": for Exynos7 compatible pin-controller.
+  - "samsung,exynos850-pinctrl": for Exynos850 compatible pin-controller.
 
 - reg: Base address of the pin controller hardware module and length of
   the address space it occupies.
diff --git a/Documentation/devicetree/bindings/pinctrl/st,stm32-pinctrl.yaml b/Documentation/devicetree/bindings/pinctrl/st,stm32-pinctrl.yaml
index 72877544ca78..dfee6d38a701 100644
--- a/Documentation/devicetree/bindings/pinctrl/st,stm32-pinctrl.yaml
+++ b/Documentation/devicetree/bindings/pinctrl/st,stm32-pinctrl.yaml
@@ -24,6 +24,7 @@ properties:
       - st,stm32f746-pinctrl
       - st,stm32f769-pinctrl
       - st,stm32h743-pinctrl
+      - st,stm32mp135-pinctrl
       - st,stm32mp157-pinctrl
       - st,stm32mp157-z-pinctrl
 
diff --git a/Documentation/devicetree/bindings/pinctrl/xlnx,zynq-pinctrl.txt b/Documentation/devicetree/bindings/pinctrl/xlnx,zynq-pinctrl.txt
deleted file mode 100644
index f488b0f77406..000000000000
--- a/Documentation/devicetree/bindings/pinctrl/xlnx,zynq-pinctrl.txt
+++ /dev/null
@@ -1,105 +0,0 @@
-	Binding for Xilinx Zynq Pinctrl
-
-Required properties:
-- compatible: "xlnx,zynq-pinctrl"
-- syscon: phandle to SLCR
-- reg: Offset and length of pinctrl space in SLCR
-
-Please refer to pinctrl-bindings.txt in this directory for details of the
-common pinctrl bindings used by client devices, including the meaning of the
-phrase "pin configuration node".
-
-Zynq's pin configuration nodes act as a container for an arbitrary number of
-subnodes. Each of these subnodes represents some desired configuration for a
-pin, a group, or a list of pins or groups. This configuration can include the
-mux function to select on those pin(s)/group(s), and various pin configuration
-parameters, such as pull-up, slew rate, etc.
-
-Each configuration node can consist of multiple nodes describing the pinmux and
-pinconf options. Those nodes can be pinmux nodes or pinconf nodes.
-
-The name of each subnode is not important; all subnodes should be enumerated
-and processed purely based on their content.
-
-Required properties for pinmux nodes are:
- - groups: A list of pinmux groups.
- - function: The name of a pinmux function to activate for the specified set
-   of groups.
-
-Required properties for configuration nodes:
-One of:
- - pins: a list of pin names
- - groups: A list of pinmux groups.
-
-The following generic properties as defined in pinctrl-bindings.txt are valid
-to specify in a pinmux subnode:
- groups, function
-
-The following generic properties as defined in pinctrl-bindings.txt are valid
-to specify in a pinconf subnode:
- groups, pins, bias-disable, bias-high-impedance, bias-pull-up, slew-rate,
- low-power-disable, low-power-enable
-
- Valid arguments for 'slew-rate' are '0' and '1' to select between slow and fast
- respectively.
-
- Valid values for groups are:
-   ethernet0_0_grp, ethernet1_0_grp, mdio0_0_grp, mdio1_0_grp,
-   qspi0_0_grp, qspi1_0_grp, qspi_fbclk, qspi_cs1_grp, spi0_0_grp - spi0_2_grp,
-   spi0_X_ssY (X=0..2, Y=0..2), spi1_0_grp - spi1_3_grp,
-   spi1_X_ssY (X=0..3, Y=0..2), sdio0_0_grp - sdio0_2_grp,
-   sdio1_0_grp - sdio1_3_grp, sdio0_emio_wp, sdio0_emio_cd, sdio1_emio_wp,
-   sdio1_emio_cd, smc0_nor, smc0_nor_cs1_grp, smc0_nor_addr25_grp, smc0_nand,
-   can0_0_grp - can0_10_grp, can1_0_grp - can1_11_grp, uart0_0_grp - uart0_10_grp,
-   uart1_0_grp - uart1_11_grp, i2c0_0_grp - i2c0_10_grp, i2c1_0_grp - i2c1_10_grp,
-   ttc0_0_grp - ttc0_2_grp, ttc1_0_grp - ttc1_2_grp, swdt0_0_grp - swdt0_4_grp,
-   gpio0_0_grp - gpio0_53_grp, usb0_0_grp, usb1_0_grp
-
- Valid values for pins are:
-   MIO0 - MIO53
-
- Valid values for function are:
-   ethernet0, ethernet1, mdio0, mdio1, qspi0, qspi1, qspi_fbclk, qspi_cs1,
-   spi0, spi0_ss, spi1, spi1_ss, sdio0, sdio0_pc, sdio0_cd, sdio0_wp,
-   sdio1, sdio1_pc, sdio1_cd, sdio1_wp,
-   smc0_nor, smc0_nor_cs1, smc0_nor_addr25, smc0_nand, can0, can1, uart0, uart1,
-   i2c0, i2c1, ttc0, ttc1, swdt0, gpio0, usb0, usb1
-
-The following driver-specific properties as defined here are valid to specify in
-a pin configuration subnode:
- - io-standard: Configure the pin to use the selected IO standard according to
-   this mapping:
-    1: LVCMOS18
-    2: LVCMOS25
-    3: LVCMOS33
-    4: HSTL
-
-Example:
-	pinctrl0: pinctrl@700 {
-		compatible = "xlnx,pinctrl-zynq";
-		reg = <0x700 0x200>;
-		syscon = <&slcr>;
-
-		pinctrl_uart1_default: uart1-default {
-			mux {
-				groups = "uart1_10_grp";
-				function = "uart1";
-			};
-
-			conf {
-				groups = "uart1_10_grp";
-				slew-rate = <0>;
-				io-standard = <1>;
-			};
-
-			conf-rx {
-				pins = "MIO49";
-				bias-high-impedance;
-			};
-
-			conf-tx {
-				pins = "MIO48";
-				bias-disable;
-			};
-		};
-	};
diff --git a/Documentation/devicetree/bindings/pinctrl/xlnx,zynq-pinctrl.yaml b/Documentation/devicetree/bindings/pinctrl/xlnx,zynq-pinctrl.yaml
new file mode 100644
index 000000000000..ac97dbf6998e
--- /dev/null
+++ b/Documentation/devicetree/bindings/pinctrl/xlnx,zynq-pinctrl.yaml
@@ -0,0 +1,214 @@
+# SPDX-License-Identifier: GPL-2.0-only OR BSD-2-Clause
+%YAML 1.2
+---
+$id: http://devicetree.org/schemas/pinctrl/xlnx,zynq-pinctrl.yaml#
+$schema: http://devicetree.org/meta-schemas/core.yaml#
+
+title: Xilinx Zynq Pinctrl
+
+maintainers:
+  - Sai Krishna Potthuri <lakshmi.sai.krishna.potthuri@xilinx.com>
+
+description: |
+  Please refer to pinctrl-bindings.txt in this directory for details of the
+  common pinctrl bindings used by client devices, including the meaning of the
+  phrase "pin configuration node".
+
+  Zynq's pin configuration nodes act as a container for an arbitrary number of
+  subnodes. Each of these subnodes represents some desired configuration for a
+  pin, a group, or a list of pins or groups. This configuration can include the
+  mux function to select on those pin(s)/group(s), and various pin configuration
+  parameters, such as pull-up, slew rate, etc.
+
+  Each configuration node can consist of multiple nodes describing the pinmux and
+  pinconf options. Those nodes can be pinmux nodes or pinconf nodes.
+
+  The name of each subnode is not important; all subnodes should be enumerated
+  and processed purely based on their content.
+
+properties:
+  compatible:
+    const: xlnx,zynq-pinctrl
+
+  reg:
+    description: Specifies the base address and size of the SLCR space.
+    maxItems: 1
+
+  syscon:
+    description:
+      phandle to the SLCR.
+
+patternProperties:
+  '^(.*-)?(default|gpio)$':
+    type: object
+    patternProperties:
+      '^mux':
+        type: object
+        description:
+          Pinctrl node's client devices use subnodes for pin muxes,
+          which in turn use below standard properties.
+        $ref: pinmux-node.yaml#
+
+        properties:
+          groups:
+            description:
+              List of groups to select (either this or "pins" must be
+              specified), available groups for this subnode.
+            items:
+              enum: [ethernet0_0_grp, ethernet1_0_grp, mdio0_0_grp,
+                     mdio1_0_grp, qspi0_0_grp, qspi1_0_grp, qspi_fbclk,
+                     qspi_cs1_grp, spi0_0_grp, spi0_1_grp, spi0_2_grp,
+                     spi0_0_ss0, spi0_0_ss1, spi0_0_ss2, spi0_1_ss0,
+                     spi0_1_ss1, spi0_1_ss2, spi0_2_ss0, spi0_2_ss1,
+                     spi0_2_ss2, spi1_0_grp, spi1_1_grp, spi1_2_grp,
+                     spi1_3_grp, spi1_0_ss0, spi1_0_ss1, spi1_0_ss2,
+                     spi1_1_ss0, spi1_1_ss1, spi1_1_ss2, spi1_2_ss0,
+                     spi1_2_ss1, spi1_2_ss2, spi1_3_ss0, spi1_3_ss1,
+                     spi1_3_ss2, sdio0_0_grp, sdio0_1_grp, sdio0_2_grp,
+                     sdio1_0_grp, sdio1_1_grp, sdio1_2_grp, sdio1_3_grp,
+                     sdio0_emio_wp, sdio0_emio_cd, sdio1_emio_wp,
+                     sdio1_emio_cd, smc0_nor, smc0_nor_cs1_grp,
+                     smc0_nor_addr25_grp, smc0_nand, can0_0_grp, can0_1_grp,
+                     can0_2_grp, can0_3_grp, can0_4_grp, can0_5_grp,
+                     can0_6_grp, can0_7_grp, can0_8_grp, can0_9_grp,
+                     can0_10_grp, can1_0_grp, can1_1_grp, can1_2_grp,
+                     can1_3_grp, can1_4_grp, can1_5_grp, can1_6_grp,
+                     can1_7_grp, can1_8_grp, can1_9_grp, can1_10_grp,
+                     can1_11_grp, uart0_0_grp, uart0_1_grp, uart0_2_grp,
+                     uart0_3_grp, uart0_4_grp, uart0_5_grp, uart0_6_grp,
+                     uart0_7_grp, uart0_8_grp, uart0_9_grp, uart0_10_grp,
+                     uart1_0_grp, uart1_1_grp, uart1_2_grp, uart1_3_grp,
+                     uart1_4_grp, uart1_5_grp, uart1_6_grp, uart1_7_grp,
+                     uart1_8_grp, uart1_9_grp, uart1_10_grp, uart1_11_grp,
+                     i2c0_0_grp, i2c0_1_grp, i2c0_2_grp, i2c0_3_grp,
+                     i2c0_4_grp, i2c0_5_grp, i2c0_6_grp, i2c0_7_grp,
+                     i2c0_8_grp, i2c0_9_grp, i2c0_10_grp, i2c1_0_grp,
+                     i2c1_1_grp, i2c1_2_grp, i2c1_3_grp, i2c1_4_grp,
+                     i2c1_5_grp, i2c1_6_grp, i2c1_7_grp, i2c1_8_grp,
+                     i2c1_9_grp, i2c1_10_grp, ttc0_0_grp, ttc0_1_grp,
+                     ttc0_2_grp, ttc1_0_grp, ttc1_1_grp, ttc1_2_grp,
+                     swdt0_0_grp, swdt0_1_grp, swdt0_2_grp, swdt0_3_grp,
+                     swdt0_4_grp, gpio0_0_grp, gpio0_1_grp, gpio0_2_grp,
+                     gpio0_3_grp, gpio0_4_grp, gpio0_5_grp, gpio0_6_grp,
+                     gpio0_7_grp, gpio0_8_grp, gpio0_9_grp, gpio0_10_grp,
+                     gpio0_11_grp, gpio0_12_grp, gpio0_13_grp, gpio0_14_grp,
+                     gpio0_15_grp, gpio0_16_grp, gpio0_17_grp, gpio0_18_grp,
+                     gpio0_19_grp, gpio0_20_grp, gpio0_21_grp, gpio0_22_grp,
+                     gpio0_23_grp, gpio0_24_grp, gpio0_25_grp, gpio0_26_grp,
+                     gpio0_27_grp, gpio0_28_grp, gpio0_29_grp, gpio0_30_grp,
+                     gpio0_31_grp, gpio0_32_grp, gpio0_33_grp, gpio0_34_grp,
+                     gpio0_35_grp, gpio0_36_grp, gpio0_37_grp, gpio0_38_grp,
+                     gpio0_39_grp, gpio0_40_grp, gpio0_41_grp, gpio0_42_grp,
+                     gpio0_43_grp, gpio0_44_grp, gpio0_45_grp, gpio0_46_grp,
+                     gpio0_47_grp, gpio0_48_grp, gpio0_49_grp, gpio0_50_grp,
+                     gpio0_51_grp, gpio0_52_grp, gpio0_53_grp, usb0_0_grp,
+                     usb1_0_grp]
+            maxItems: 54
+
+          function:
+            description:
+              Specify the alternative function to be configured for the
+              given pin groups.
+            enum: [ethernet0, ethernet1, mdio0, mdio1, qspi0, qspi1, qspi_fbclk,
+                   qspi_cs1, spi0, spi0_ss, spi1, spi1_ss, sdio0, sdio0_pc,
+                   sdio0_cd, sdio0_wp, sdio1, sdio1_pc, sdio1_cd, sdio1_wp,
+                   smc0_nor, smc0_nor_cs1, smc0_nor_addr25, smc0_nand, can0,
+                   can1, uart0, uart1, i2c0, i2c1, ttc0, ttc1, swdt0, gpio0,
+                   usb0, usb1]
+
+        required:
+          - groups
+          - function
+
+        additionalProperties: false
+
+      '^conf':
+        type: object
+        description:
+          Pinctrl node's client devices use subnodes for pin configurations,
+          which in turn use the standard properties below.
+        $ref: pincfg-node.yaml#
+
+        properties:
+          groups:
+            description:
+              List of pin groups as mentioned above.
+
+          pins:
+            description:
+              List of pin names to select in this subnode.
+            items:
+              pattern: '^MIO([0-9]|[1-4][0-9]|5[0-3])$'
+            maxItems: 54
+
+          bias-pull-up: true
+
+          bias-pull-down: true
+
+          bias-disable: true
+
+          bias-high-impedance: true
+
+          low-power-enable: true
+
+          low-power-disable: true
+
+          slew-rate:
+            enum: [0, 1]
+
+          power-source:
+            enum: [1, 2, 3, 4]
+
+        oneOf:
+          - required: [ groups ]
+          - required: [ pins ]
+
+        additionalProperties: false
+
+    additionalProperties: false
+
+required:
+  - compatible
+  - reg
+  - syscon
+
+additionalProperties: false
+
+examples:
+  - |
+    #include <dt-bindings/pinctrl/pinctrl-zynq.h>
+    pinctrl0: pinctrl@700 {
+       compatible = "xlnx,zynq-pinctrl";
+       reg = <0x700 0x200>;
+       syscon = <&slcr>;
+
+       pinctrl_uart1_default: uart1-default {
+           mux {
+               groups = "uart1_10_grp";
+               function = "uart1";
+           };
+
+           conf {
+               groups = "uart1_10_grp";
+               slew-rate = <0>;
+               power-source = <IO_STANDARD_LVCMOS18>;
+           };
+
+           conf-rx {
+               pins = "MIO49";
+               bias-high-impedance;
+           };
+
+           conf-tx {
+               pins = "MIO48";
+               bias-disable;
+           };
+       };
+    };
+
+    uart1 {
+         pinctrl-names = "default";
+         pinctrl-0 = <&pinctrl_uart1_default>;
+    };
+
+...
diff --git a/Documentation/devicetree/bindings/power/power-domain.yaml b/Documentation/devicetree/bindings/power/power-domain.yaml
index aed51e9dcb11..3143ed9a3313 100644
--- a/Documentation/devicetree/bindings/power/power-domain.yaml
+++ b/Documentation/devicetree/bindings/power/power-domain.yaml
@@ -46,7 +46,7 @@ properties:
       Phandles to the OPP tables of power domains provided by a power domain
       provider. If the provider provides a single power domain only or all
       the power domains provided by the provider have identical OPP tables,
-      then this shall contain a single phandle. Refer to ../opp/opp.txt
+      then this shall contain a single phandle. Refer to ../opp/opp-v2-base.yaml
       for more information.
 
   "#power-domain-cells":
diff --git a/Documentation/devicetree/bindings/power/reset/qcom,pon.txt b/Documentation/devicetree/bindings/power/reset/qcom,pon.txt
deleted file mode 100644
index 0c0dc3a1e693..000000000000
--- a/Documentation/devicetree/bindings/power/reset/qcom,pon.txt
+++ /dev/null
@@ -1,49 +0,0 @@
-Qualcomm PON Device
-
-The Power On device for Qualcomm PM8xxx is MFD supporting pwrkey
-and resin along with the Android reboot-mode.
-
-This DT node has pwrkey and resin as sub nodes.
-
-Required Properties:
--compatible: Must be one of:
-	"qcom,pm8916-pon"
-	"qcom,pms405-pon"
-	"qcom,pm8998-pon"
-
--reg: Specifies the physical address of the pon register
-
-Optional subnode:
--pwrkey: Specifies the subnode pwrkey and should follow the
- qcom,pm8941-pwrkey.txt description.
--resin: Specifies the subnode resin and should follow the
- qcom,pm8xxx-pwrkey.txt description.
-
-The rest of the properties should follow the generic reboot-mode description
-found in reboot-mode.txt
-
-Example:
-
-	pon@800 {
-		compatible = "qcom,pm8916-pon";
-
-		reg = <0x800>;
-		mode-bootloader = <0x2>;
-		mode-recovery = <0x1>;
-
-		pwrkey {
-			compatible = "qcom,pm8941-pwrkey";
-			interrupts = <0x0 0x8 0 IRQ_TYPE_EDGE_BOTH>;
-			debounce = <15625>;
-			bias-pull-up;
-			linux,code = <KEY_POWER>;
-		};
-
-		resin {
-			compatible = "qcom,pm8941-resin";
-			interrupts = <0x0 0x8 1 IRQ_TYPE_EDGE_BOTH>;
-			debounce = <15625>;
-			bias-pull-up;
-			linux,code = <KEY_VOLUMEDOWN>;
-		};
-	};
diff --git a/Documentation/devicetree/bindings/power/reset/qcom,pon.yaml b/Documentation/devicetree/bindings/power/reset/qcom,pon.yaml
new file mode 100644
index 000000000000..353f155df0f4
--- /dev/null
+++ b/Documentation/devicetree/bindings/power/reset/qcom,pon.yaml
@@ -0,0 +1,80 @@
+# SPDX-License-Identifier: (GPL-2.0 OR BSD-2-Clause)
+%YAML 1.2
+---
+$id: http://devicetree.org/schemas/power/reset/qcom,pon.yaml#
+$schema: http://devicetree.org/meta-schemas/core.yaml#
+
+title: Qualcomm PON Device
+
+maintainers:
+  - Vinod Koul <vkoul@kernel.org>
+
+description: |
+  The Power On device for Qualcomm PM8xxx is MFD supporting pwrkey
+  and resin along with the Android reboot-mode.
+
+  This DT node has pwrkey and resin as sub nodes.
+
+allOf:
+  - $ref: reboot-mode.yaml#
+
+properties:
+  compatible:
+    enum:
+      - qcom,pm8916-pon
+      - qcom,pms405-pon
+      - qcom,pm8998-pon
+
+  reg:
+    maxItems: 1
+
+  pwrkey:
+    type: object
+    $ref: "../../input/qcom,pm8941-pwrkey.yaml#"
+
+  resin:
+    type: object
+    $ref: "../../input/qcom,pm8941-pwrkey.yaml#"
+
+required:
+  - compatible
+  - reg
+
+unevaluatedProperties: false
+
+examples:
+  - |
+   #include <dt-bindings/interrupt-controller/irq.h>
+   #include <dt-bindings/input/linux-event-codes.h>
+   #include <dt-bindings/spmi/spmi.h>
+   spmi_bus: spmi@c440000 {
+     reg = <0x0c440000 0x1100>;
+     #address-cells = <2>;
+     #size-cells = <0>;
+     pmk8350: pmic@0 {
+       reg = <0x0 SPMI_USID>;
+       #address-cells = <1>;
+       #size-cells = <0>;
+       pmk8350_pon: pon_hlos@1300 {
+         reg = <0x1300>;
+         compatible = "qcom,pm8998-pon";
+
+         pwrkey {
+            compatible = "qcom,pm8941-pwrkey";
+            interrupts = < 0x0 0x8 0 IRQ_TYPE_EDGE_BOTH >;
+            debounce = <15625>;
+            bias-pull-up;
+            linux,code = <KEY_POWER>;
+         };
+
+         resin {
+            compatible = "qcom,pm8941-resin";
+            interrupts = <0x0 0x8 1 IRQ_TYPE_EDGE_BOTH>;
+            debounce = <15625>;
+            bias-pull-up;
+            linux,code = <KEY_VOLUMEDOWN>;
+         };
+       };
+     };
+   };
+...
diff --git a/Documentation/devicetree/bindings/power/reset/reboot-mode.yaml b/Documentation/devicetree/bindings/power/reset/reboot-mode.yaml
index 9c6fda6b1dd9..ad0a0b95cec1 100644
--- a/Documentation/devicetree/bindings/power/reset/reboot-mode.yaml
+++ b/Documentation/devicetree/bindings/power/reset/reboot-mode.yaml
@@ -36,7 +36,7 @@ patternProperties:
   "^mode-.*$":
     $ref: /schemas/types.yaml#/definitions/uint32
 
-additionalProperties: false
+additionalProperties: true
 
 examples:
   - |
diff --git a/Documentation/devicetree/bindings/pwm/pwm-rockchip.yaml b/Documentation/devicetree/bindings/pwm/pwm-rockchip.yaml
index 5596bee70509..81a54a4e8e3e 100644
--- a/Documentation/devicetree/bindings/pwm/pwm-rockchip.yaml
+++ b/Documentation/devicetree/bindings/pwm/pwm-rockchip.yaml
@@ -29,6 +29,7 @@ properties:
           - enum:
               - rockchip,px30-pwm
               - rockchip,rk3308-pwm
+              - rockchip,rk3568-pwm
           - const: rockchip,rk3328-pwm
 
   reg:
diff --git a/Documentation/devicetree/bindings/remoteproc/qcom,adsp.yaml b/Documentation/devicetree/bindings/remoteproc/qcom,adsp.yaml
index c597ccced623..0c112f3264a9 100644
--- a/Documentation/devicetree/bindings/remoteproc/qcom,adsp.yaml
+++ b/Documentation/devicetree/bindings/remoteproc/qcom,adsp.yaml
@@ -28,6 +28,7 @@ properties:
       - qcom,sc8180x-adsp-pas
       - qcom,sc8180x-cdsp-pas
       - qcom,sc8180x-mpss-pas
+      - qcom,sdm660-adsp-pas
       - qcom,sdm845-adsp-pas
       - qcom,sdm845-cdsp-pas
       - qcom,sdx55-mpss-pas
diff --git a/Documentation/devicetree/bindings/reserved-memory/reserved-memory.txt b/Documentation/devicetree/bindings/reserved-memory/reserved-memory.txt
index e8d3096d922c..39b5f4c5a511 100644
--- a/Documentation/devicetree/bindings/reserved-memory/reserved-memory.txt
+++ b/Documentation/devicetree/bindings/reserved-memory/reserved-memory.txt
@@ -51,6 +51,23 @@ compatible (optional) - standard definition
           used as a shared pool of DMA buffers for a set of devices. It can
           be used by an operating system to instantiate the necessary pool
           management subsystem if necessary.
+        - restricted-dma-pool: This indicates a region of memory meant to be
+          used as a pool of restricted DMA buffers for a set of devices. The
+          memory region would be the only region accessible to those devices.
+          When using this, the no-map and reusable properties must not be set,
+          so the operating system can create a virtual mapping that will be used
+          for synchronization. The main purpose for restricted DMA is to
+          mitigate the lack of DMA access control on systems without an IOMMU,
+          which could result in the DMA accessing the system memory at
+          unexpected times and/or unexpected addresses, possibly leading to data
+          leakage or corruption. The feature on its own provides a basic level
+          of protection against the DMA overwriting buffer contents at
+          unexpected times. However, to protect against general data leakage and
+          system memory corruption, the system needs to provide way to lock down
+          the memory access, e.g., MPU. Note that since coherent allocation
+          needs remapping, one must set up another device coherent pool by
+          shared-dma-pool and use dma_alloc_from_dev_coherent instead for atomic
+          coherent allocation.
         - vendor specific string in the form <vendor>,[<device>-]<usage>
 no-map (optional) - empty property
     - Indicates the operating system must not create a virtual mapping
@@ -85,10 +102,11 @@ memory-region-names (optional) - a list of names, one for each corresponding
 
 Example
 -------
-This example defines 3 contiguous regions are defined for Linux kernel:
+This example defines 4 contiguous regions for Linux kernel:
 one default of all device drivers (named linux,cma@72000000 and 64MiB in size),
-one dedicated to the framebuffer device (named framebuffer@78000000, 8MiB), and
-one for multimedia processing (named multimedia-memory@77000000, 64MiB).
+one dedicated to the framebuffer device (named framebuffer@78000000, 8MiB),
+one for multimedia processing (named multimedia-memory@77000000, 64MiB), and
+one for restricted dma pool (named restricted_dma_reserved@0x50000000, 64MiB).
 
 / {
 	#address-cells = <1>;
@@ -120,6 +138,11 @@ one for multimedia processing (named multimedia-memory@77000000, 64MiB).
 			compatible = "acme,multimedia-memory";
 			reg = <0x77000000 0x4000000>;
 		};
+
+		restricted_dma_reserved: restricted_dma_reserved {
+			compatible = "restricted-dma-pool";
+			reg = <0x50000000 0x4000000>;
+		};
 	};
 
 	/* ... */
@@ -138,4 +161,11 @@ one for multimedia processing (named multimedia-memory@77000000, 64MiB).
 		memory-region = <&multimedia_reserved>;
 		/* ... */
 	};
+
+	pcie_device: pcie_device@0,0 {
+		reg = <0x83010000 0x0 0x00000000 0x0 0x00100000
+		       0x83010000 0x0 0x00100000 0x0 0x00100000>;
+		memory-region = <&restricted_dma_reserved>;
+		/* ... */
+	};
 };
diff --git a/Documentation/devicetree/bindings/riscv/starfive.yaml b/Documentation/devicetree/bindings/riscv/starfive.yaml
new file mode 100644
index 000000000000..5b36243fd674
--- /dev/null
+++ b/Documentation/devicetree/bindings/riscv/starfive.yaml
@@ -0,0 +1,27 @@
+# SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause)
+%YAML 1.2
+---
+$id: http://devicetree.org/schemas/riscv/starfive.yaml#
+$schema: http://devicetree.org/meta-schemas/core.yaml#
+
+title: StarFive SoC-based boards
+
+maintainers:
+  - Michael Zhu <michael.zhu@starfivetech.com>
+  - Drew Fustini <drew@beagleboard.org>
+
+description:
+  StarFive SoC-based boards
+
+properties:
+  $nodename:
+    const: '/'
+  compatible:
+    oneOf:
+      - items:
+          - const: beagle,beaglev-starlight-jh7100-r0
+          - const: starfive,jh7100
+
+additionalProperties: true
+
+...
diff --git a/Documentation/devicetree/bindings/rtc/trivial-rtc.yaml b/Documentation/devicetree/bindings/rtc/trivial-rtc.yaml
index 7548d8714871..13925bb78ec7 100644
--- a/Documentation/devicetree/bindings/rtc/trivial-rtc.yaml
+++ b/Documentation/devicetree/bindings/rtc/trivial-rtc.yaml
@@ -32,6 +32,9 @@ properties:
       - dallas,ds3232
       # I2C-BUS INTERFACE REAL TIME CLOCK MODULE
       - epson,rx8010
+      # I2C-BUS INTERFACE REAL TIME CLOCK MODULE
+      - epson,rx8025
+      - epson,rx8035
       # I2C-BUS INTERFACE REAL TIME CLOCK MODULE with Battery Backed RAM
       - epson,rx8571
       # I2C-BUS INTERFACE REAL TIME CLOCK MODULE
diff --git a/Documentation/devicetree/bindings/sound/fsl,rpmsg.yaml b/Documentation/devicetree/bindings/sound/fsl,rpmsg.yaml
index 61802a11baf4..d370c98a62c7 100644
--- a/Documentation/devicetree/bindings/sound/fsl,rpmsg.yaml
+++ b/Documentation/devicetree/bindings/sound/fsl,rpmsg.yaml
@@ -21,6 +21,7 @@ properties:
       - fsl,imx8mn-rpmsg-audio
       - fsl,imx8mm-rpmsg-audio
       - fsl,imx8mp-rpmsg-audio
+      - fsl,imx8ulp-rpmsg-audio
 
   model:
     $ref: /schemas/types.yaml#/definitions/string
diff --git a/Documentation/devicetree/bindings/sound/mt8195-afe-pcm.yaml b/Documentation/devicetree/bindings/sound/mt8195-afe-pcm.yaml
index 53e9434a6d9d..dcf790b053d2 100644
--- a/Documentation/devicetree/bindings/sound/mt8195-afe-pcm.yaml
+++ b/Documentation/devicetree/bindings/sound/mt8195-afe-pcm.yaml
@@ -130,36 +130,34 @@ additionalProperties: false
 
 examples:
   - |
-    #include <dt-bindings/clock/mt8195-clk.h>
     #include <dt-bindings/interrupt-controller/arm-gic.h>
     #include <dt-bindings/interrupt-controller/irq.h>
-    #include <dt-bindings/power/mt8195-power.h>
 
     afe: mt8195-afe-pcm@10890000 {
         compatible = "mediatek,mt8195-audio";
         reg = <0x10890000 0x10000>;
         interrupts = <GIC_SPI 822 IRQ_TYPE_LEVEL_HIGH 0>;
         mediatek,topckgen = <&topckgen>;
-        power-domains = <&spm MT8195_POWER_DOMAIN_AUDIO>;
+        power-domains = <&spm 7>; //MT8195_POWER_DOMAIN_AUDIO
         clocks = <&clk26m>,
-                 <&topckgen CLK_TOP_APLL1>,
-                 <&topckgen CLK_TOP_APLL2>,
-                 <&topckgen CLK_TOP_APLL12_DIV0>,
-                 <&topckgen CLK_TOP_APLL12_DIV1>,
-                 <&topckgen CLK_TOP_APLL12_DIV2>,
-                 <&topckgen CLK_TOP_APLL12_DIV3>,
-                 <&topckgen CLK_TOP_APLL12_DIV9>,
-                 <&topckgen CLK_TOP_A1SYS_HP_SEL>,
-                 <&topckgen CLK_TOP_AUD_INTBUS_SEL>,
-                 <&topckgen CLK_TOP_AUDIO_H_SEL>,
-                 <&topckgen CLK_TOP_AUDIO_LOCAL_BUS_SEL>,
-                 <&topckgen CLK_TOP_DPTX_M_SEL>,
-                 <&topckgen CLK_TOP_I2SO1_M_SEL>,
-                 <&topckgen CLK_TOP_I2SO2_M_SEL>,
-                 <&topckgen CLK_TOP_I2SI1_M_SEL>,
-                 <&topckgen CLK_TOP_I2SI2_M_SEL>,
-                 <&infracfg_ao CLK_INFRA_AO_AUDIO_26M_B>,
-                 <&scp_adsp CLK_SCP_ADSP_AUDIODSP>;
+                 <&topckgen 163>, //CLK_TOP_APLL1
+                 <&topckgen 166>, //CLK_TOP_APLL2
+                 <&topckgen 233>, //CLK_TOP_APLL12_DIV0
+                 <&topckgen 234>, //CLK_TOP_APLL12_DIV1
+                 <&topckgen 235>, //CLK_TOP_APLL12_DIV2
+                 <&topckgen 236>, //CLK_TOP_APLL12_DIV3
+                 <&topckgen 238>, //CLK_TOP_APLL12_DIV9
+                 <&topckgen 100>, //CLK_TOP_A1SYS_HP_SEL
+                 <&topckgen 33>, //CLK_TOP_AUD_INTBUS_SEL
+                 <&topckgen 34>, //CLK_TOP_AUDIO_H_SEL
+                 <&topckgen 107>, //CLK_TOP_AUDIO_LOCAL_BUS_SEL
+                 <&topckgen 98>, //CLK_TOP_DPTX_M_SEL
+                 <&topckgen 94>, //CLK_TOP_I2SO1_M_SEL
+                 <&topckgen 95>, //CLK_TOP_I2SO2_M_SEL
+                 <&topckgen 96>, //CLK_TOP_I2SI1_M_SEL
+                 <&topckgen 97>, //CLK_TOP_I2SI2_M_SEL
+                 <&infracfg_ao 50>, //CLK_INFRA_AO_AUDIO_26M_B
+                 <&scp_adsp 0>; //CLK_SCP_ADSP_AUDIODSP
         clock-names = "clk26m",
                       "apll1_ck",
                       "apll2_ck",
diff --git a/Documentation/devicetree/bindings/spi/omap-spi.yaml b/Documentation/devicetree/bindings/spi/omap-spi.yaml
index e55538186cf6..9952199cae11 100644
--- a/Documentation/devicetree/bindings/spi/omap-spi.yaml
+++ b/Documentation/devicetree/bindings/spi/omap-spi.yaml
@@ -84,9 +84,9 @@ unevaluatedProperties: false
 if:
   properties:
     compatible:
-      oneOf:
-        - const: ti,omap2-mcspi
-        - const: ti,omap4-mcspi
+      enum:
+        - ti,omap2-mcspi
+        - ti,omap4-mcspi
 
 then:
   properties:
diff --git a/Documentation/devicetree/bindings/spi/spi-xilinx.yaml b/Documentation/devicetree/bindings/spi/spi-xilinx.yaml
index 593f7693bace..03e5dca7e933 100644
--- a/Documentation/devicetree/bindings/spi/spi-xilinx.yaml
+++ b/Documentation/devicetree/bindings/spi/spi-xilinx.yaml
@@ -27,13 +27,11 @@ properties:
 
   xlnx,num-ss-bits:
     description: Number of chip selects used.
-    $ref: /schemas/types.yaml#/definitions/uint32
     minimum: 1
     maximum: 32
 
   xlnx,num-transfer-bits:
     description: Number of bits per transfer. This will be 8 if not specified.
-    $ref: /schemas/types.yaml#/definitions/uint32
     enum: [8, 16, 32]
     default: 8
 
diff --git a/Documentation/devicetree/bindings/thermal/qcom-lmh.yaml b/Documentation/devicetree/bindings/thermal/qcom-lmh.yaml
new file mode 100644
index 000000000000..289e9a845600
--- /dev/null
+++ b/Documentation/devicetree/bindings/thermal/qcom-lmh.yaml
@@ -0,0 +1,82 @@
+# SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause)
+# Copyright 2021 Linaro Ltd.
+%YAML 1.2
+---
+$id: http://devicetree.org/schemas/thermal/qcom-lmh.yaml#
+$schema: http://devicetree.org/meta-schemas/core.yaml#
+
+title: Qualcomm Limits Management Hardware(LMh)
+
+maintainers:
+  - Thara Gopinath <thara.gopinath@linaro.org>
+
+description:
+  Limits Management Hardware(LMh) is a hardware infrastructure on some
+  Qualcomm SoCs that can enforce temperature and current limits as
+  programmed by software for certain IPs like CPU.
+
+properties:
+  compatible:
+    enum:
+      - qcom,sdm845-lmh
+
+  reg:
+    items:
+      - description: core registers
+
+  interrupts:
+    maxItems: 1
+
+  '#interrupt-cells':
+    const: 1
+
+  interrupt-controller: true
+
+  cpus:
+    description:
+      phandle of the first cpu in the LMh cluster
+    $ref: /schemas/types.yaml#/definitions/phandle
+
+  qcom,lmh-temp-arm-millicelsius:
+    description:
+      An integer expressing temperature threshold at which the LMh thermal
+      FSM is engaged.
+
+  qcom,lmh-temp-low-millicelsius:
+    description:
+      An integer expressing temperature threshold at which the state machine
+      will attempt to remove frequency throttling.
+
+  qcom,lmh-temp-high-millicelsius:
+    description:
+      An integer expressing temperature threshold at which the state machine
+      will attempt to throttle the frequency.
+
+required:
+  - compatible
+  - reg
+  - interrupts
+  - '#interrupt-cells'
+  - interrupt-controller
+  - cpus
+  - qcom,lmh-temp-arm-millicelsius
+  - qcom,lmh-temp-low-millicelsius
+  - qcom,lmh-temp-high-millicelsius
+
+additionalProperties: false
+
+examples:
+  - |
+    #include <dt-bindings/interrupt-controller/arm-gic.h>
+
+    lmh@17d70800 {
+      compatible = "qcom,sdm845-lmh";
+      reg = <0x17d70800 0x400>;
+      interrupts = <GIC_SPI 33 IRQ_TYPE_LEVEL_HIGH>;
+      cpus = <&CPU4>;
+      qcom,lmh-temp-arm-millicelsius = <65000>;
+      qcom,lmh-temp-low-millicelsius = <94500>;
+      qcom,lmh-temp-high-millicelsius = <95000>;
+      interrupt-controller;
+      #interrupt-cells = <1>;
+    };
diff --git a/Documentation/devicetree/bindings/thermal/thermal-zones.yaml b/Documentation/devicetree/bindings/thermal/thermal-zones.yaml
index 164f71598c59..a07de5ed0ca6 100644
--- a/Documentation/devicetree/bindings/thermal/thermal-zones.yaml
+++ b/Documentation/devicetree/bindings/thermal/thermal-zones.yaml
@@ -215,7 +215,7 @@ patternProperties:
       - polling-delay
       - polling-delay-passive
       - thermal-sensors
-      - trips
+
     additionalProperties: false
 
 additionalProperties: false
diff --git a/Documentation/devicetree/bindings/virtio/mmio.yaml b/Documentation/devicetree/bindings/virtio/mmio.yaml
index d46597028cf1..4b7a0273181c 100644
--- a/Documentation/devicetree/bindings/virtio/mmio.yaml
+++ b/Documentation/devicetree/bindings/virtio/mmio.yaml
@@ -36,7 +36,8 @@ required:
   - reg
   - interrupts
 
-additionalProperties: false
+additionalProperties:
+  type: object
 
 examples:
   - |
diff --git a/Documentation/devicetree/bindings/virtio/virtio-device.yaml b/Documentation/devicetree/bindings/virtio/virtio-device.yaml
new file mode 100644
index 000000000000..1778ea9b5aa5
--- /dev/null
+++ b/Documentation/devicetree/bindings/virtio/virtio-device.yaml
@@ -0,0 +1,41 @@
+# SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause)
+%YAML 1.2
+---
+$id: http://devicetree.org/schemas/virtio/virtio-device.yaml#
+$schema: http://devicetree.org/meta-schemas/core.yaml#
+
+title: Virtio device bindings
+
+maintainers:
+  - Viresh Kumar <viresh.kumar@linaro.org>
+
+description:
+  These bindings are applicable to virtio devices irrespective of the bus they
+  are bound to, like mmio or pci.
+
+# We need a select here so we don't match all nodes with 'virtio,mmio'
+properties:
+  compatible:
+    pattern: "^virtio,device[0-9a-f]{1,8}$"
+    description: Virtio device nodes.
+      "virtio,deviceID", where ID is the virtio device id. The textual
+      representation of ID shall be in lower case hexadecimal with leading
+      zeroes suppressed.
+
+required:
+  - compatible
+
+additionalProperties: true
+
+examples:
+  - |
+    virtio@3000 {
+        compatible = "virtio,mmio";
+        reg = <0x3000 0x100>;
+        interrupts = <43>;
+
+        i2c {
+            compatible = "virtio,device22";
+        };
+    };
+...
diff --git a/Documentation/devicetree/bindings/watchdog/maxim,max63xx.yaml b/Documentation/devicetree/bindings/watchdog/maxim,max63xx.yaml
new file mode 100644
index 000000000000..ab9641e845db
--- /dev/null
+++ b/Documentation/devicetree/bindings/watchdog/maxim,max63xx.yaml
@@ -0,0 +1,44 @@
+# SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause)
+%YAML 1.2
+---
+$id: http://devicetree.org/schemas/watchdog/maxim,max63xx.yaml#
+$schema: http://devicetree.org/meta-schemas/core.yaml#
+
+title: Maxim 63xx Watchdog Timers
+
+allOf:
+  - $ref: "watchdog.yaml#"
+
+maintainers:
+  - Marc Zyngier <maz@kernel.org>
+  - Linus Walleij <linus.walleij@linaro.org>
+
+properties:
+  compatible:
+    enum:
+      - maxim,max6369
+      - maxim,max6370
+      - maxim,max6371
+      - maxim,max6372
+      - maxim,max6373
+      - maxim,max6374
+
+  reg:
+    description: This is a 1-byte memory-mapped address
+    maxItems: 1
+
+required:
+  - compatible
+  - reg
+
+unevaluatedProperties: false
+
+examples:
+  - |
+    wdt: watchdog@50000000 {
+        compatible = "maxim,max6369";
+        reg = <0x50000000 0x1>;
+        timeout-sec = <10>;
+    };
+
+...
diff --git a/Documentation/devicetree/bindings/watchdog/mtk-wdt.txt b/Documentation/devicetree/bindings/watchdog/mtk-wdt.txt
index 416d716403f6..a4e31ce96e0e 100644
--- a/Documentation/devicetree/bindings/watchdog/mtk-wdt.txt
+++ b/Documentation/devicetree/bindings/watchdog/mtk-wdt.txt
@@ -13,6 +13,7 @@ Required properties:
 	"mediatek,mt7622-wdt", "mediatek,mt6589-wdt": for MT7622
 	"mediatek,mt7623-wdt", "mediatek,mt6589-wdt": for MT7623
 	"mediatek,mt7629-wdt", "mediatek,mt6589-wdt": for MT7629
+	"mediatek,mt7986-wdt", "mediatek,mt6589-wdt": for MT7986
 	"mediatek,mt8183-wdt": for MT8183
 	"mediatek,mt8516-wdt", "mediatek,mt6589-wdt": for MT8516
 	"mediatek,mt8192-wdt": for MT8192
diff --git a/Documentation/driver-api/cxl/memory-devices.rst b/Documentation/driver-api/cxl/memory-devices.rst
index 487ce4f41d77..50ebcda17ad0 100644
--- a/Documentation/driver-api/cxl/memory-devices.rst
+++ b/Documentation/driver-api/cxl/memory-devices.rst
@@ -36,9 +36,15 @@ CXL Core
 .. kernel-doc:: drivers/cxl/cxl.h
    :internal:
 
-.. kernel-doc:: drivers/cxl/core.c
+.. kernel-doc:: drivers/cxl/core/bus.c
    :doc: cxl core
 
+.. kernel-doc:: drivers/cxl/core/pmem.c
+   :doc: cxl pmem
+
+.. kernel-doc:: drivers/cxl/core/regs.c
+   :doc: cxl registers
+
 External Interfaces
 ===================
 
diff --git a/Documentation/driver-api/gpio/consumer.rst b/Documentation/driver-api/gpio/consumer.rst
index 3366a991b4aa..47869ca8ccf0 100644
--- a/Documentation/driver-api/gpio/consumer.rst
+++ b/Documentation/driver-api/gpio/consumer.rst
@@ -72,6 +72,10 @@ for the GPIO. Values can be:
 * GPIOD_OUT_HIGH_OPEN_DRAIN same as GPIOD_OUT_HIGH but also enforce the line
   to be electrically used with open drain.
 
+Note that the initial value is *logical* and the physical line level depends on
+whether the line is configured active high or active low (see
+:ref:`active_low_semantics`).
+
 The two last flags are used for use cases where open drain is mandatory, such
 as I2C: if the line is not already configured as open drain in the mappings
 (see board.txt), then open drain will be enforced anyway and a warning will be
@@ -252,6 +256,8 @@ that can't be accessed from hardIRQ handlers, these calls act the same as the
 spinlock-safe calls.
 
 
+.. _active_low_semantics:
+
 The active low and open drain semantics
 ---------------------------------------
 As a consumer should not have to care about the physical line level, all of the
@@ -309,9 +315,11 @@ work on the raw line value::
 	void gpiod_set_raw_value_cansleep(struct gpio_desc *desc, int value)
 	int gpiod_direction_output_raw(struct gpio_desc *desc, int value)
 
-The active low state of a GPIO can also be queried using the following call::
+The active low state of a GPIO can also be queried and toggled using the
+following calls::
 
 	int gpiod_is_active_low(const struct gpio_desc *desc)
+	void gpiod_toggle_active_low(struct gpio_desc *desc)
 
 Note that these functions should only be used with great moderation; a driver
 should not have to care about the physical line level or open drain semantics.
diff --git a/Documentation/driver-api/gpio/driver.rst b/Documentation/driver-api/gpio/driver.rst
index d6b0d779859b..bbc53920d4dd 100644
--- a/Documentation/driver-api/gpio/driver.rst
+++ b/Documentation/driver-api/gpio/driver.rst
@@ -547,13 +547,10 @@ To use the helpers please keep the following in mind:
   the irqchip can initialize. E.g. .dev and .can_sleep shall be set up
   properly.
 
-- Nominally set all handlers to handle_bad_irq() in the setup call and pass
-  handle_bad_irq() as flow handler parameter in gpiochip_irqchip_add() if it is
-  expected for GPIO driver that irqchip .set_type() callback will be called
-  before using/enabling each GPIO IRQ. Then set the handler to
-  handle_level_irq() and/or handle_edge_irq() in the irqchip .set_type()
-  callback depending on what your controller supports and what is requested
-  by the consumer.
+- Nominally set gpio_irq_chip.handler to handle_bad_irq. Then, if your irqchip
+  is cascaded, set the handler to handle_level_irq() and/or handle_edge_irq()
+  in the irqchip .set_type() callback depending on what your controller
+  supports and what is requested by the consumer.
 
 
 Locking IRQ usage
diff --git a/Documentation/driver-api/vfio.rst b/Documentation/driver-api/vfio.rst
index 606eed8823ce..c663b6f97825 100644
--- a/Documentation/driver-api/vfio.rst
+++ b/Documentation/driver-api/vfio.rst
@@ -255,11 +255,13 @@ vfio_unregister_group_dev() respectively::
 	void vfio_init_group_dev(struct vfio_device *device,
 				struct device *dev,
 				const struct vfio_device_ops *ops);
+	void vfio_uninit_group_dev(struct vfio_device *device);
 	int vfio_register_group_dev(struct vfio_device *device);
 	void vfio_unregister_group_dev(struct vfio_device *device);
 
 The driver should embed the vfio_device in its own structure and call
-vfio_init_group_dev() to pre-configure it before going to registration.
+vfio_init_group_dev() to pre-configure it before going to registration
+and call vfio_uninit_group_dev() after completing the un-registration.
 vfio_register_group_dev() indicates to the core to begin tracking the
 iommu_group of the specified dev and register the dev as owned by a VFIO bus
 driver. Once vfio_register_group_dev() returns it is possible for userspace to
diff --git a/Documentation/features/vm/ELF-ASLR/arch-support.txt b/Documentation/features/vm/ELF-ASLR/arch-support.txt
index 99cb6d7f5005..2949c99fbb2f 100644
--- a/Documentation/features/vm/ELF-ASLR/arch-support.txt
+++ b/Documentation/features/vm/ELF-ASLR/arch-support.txt
@@ -22,7 +22,7 @@
     |    openrisc: | TODO |
     |      parisc: |  ok  |
     |     powerpc: |  ok  |
-    |       riscv: | TODO |
+    |       riscv: |  ok  |
     |        s390: |  ok  |
     |          sh: | TODO |
     |       sparc: | TODO |
diff --git a/Documentation/features/vm/THP/arch-support.txt b/Documentation/features/vm/THP/arch-support.txt
index e8238cb2a4da..7dbd6967b37e 100644
--- a/Documentation/features/vm/THP/arch-support.txt
+++ b/Documentation/features/vm/THP/arch-support.txt
@@ -22,7 +22,7 @@
     |    openrisc: |  ..  |
     |      parisc: | TODO |
     |     powerpc: |  ok  |
-    |       riscv: | TODO |
+    |       riscv: |  ok  |
     |        s390: |  ok  |
     |          sh: |  ..  |
     |       sparc: |  ok  |
diff --git a/Documentation/features/vm/huge-vmap/arch-support.txt b/Documentation/features/vm/huge-vmap/arch-support.txt
index 439fd9069b8b..bc53905a0306 100644
--- a/Documentation/features/vm/huge-vmap/arch-support.txt
+++ b/Documentation/features/vm/huge-vmap/arch-support.txt
@@ -1,7 +1,7 @@
 #
 # Feature name:          huge-vmap
 #         Kconfig:       HAVE_ARCH_HUGE_VMAP
-#         description:   arch supports the ioremap_pud_enabled() and ioremap_pmd_enabled() VM APIs
+#         description:   arch supports the arch_vmap_pud_supported() and arch_vmap_pmd_supported() VM APIs
 #
     -----------------------
     |         arch |status|
diff --git a/Documentation/filesystems/api-summary.rst b/Documentation/filesystems/api-summary.rst
index 7e5c04c98619..98db2ea5fa12 100644
--- a/Documentation/filesystems/api-summary.rst
+++ b/Documentation/filesystems/api-summary.rst
@@ -71,9 +71,6 @@ Other Functions
 .. kernel-doc:: fs/fs-writeback.c
    :export:
 
-.. kernel-doc:: fs/block_dev.c
-   :export:
-
 .. kernel-doc:: fs/anon_inodes.c
    :export:
 
diff --git a/Documentation/filesystems/erofs.rst b/Documentation/filesystems/erofs.rst
index 832839fcf4c3..b97579b7d8fb 100644
--- a/Documentation/filesystems/erofs.rst
+++ b/Documentation/filesystems/erofs.rst
@@ -84,6 +84,9 @@ cache_strategy=%s      Select a strategy for cached decompression from now on:
                                    It still does in-place I/O decompression
                                    for the rest compressed physical clusters.
 		       ==========  =============================================
+dax={always,never}     Use direct access (no page cache).  See
+                       Documentation/filesystems/dax.rst.
+dax                    A legacy option which is an alias for ``dax=always``.
 ===================    =========================================================
 
 On-disk details
@@ -153,13 +156,14 @@ may not. All metadatas can be now observed in two different spaces (views):
 
     Xattrs, extents, data inline are followed by the corresponding inode with
     proper alignment, and they could be optional for different data mappings.
-    _currently_ total 4 valid data mappings are supported:
+    _currently_ total 5 data layouts are supported:
 
     ==  ====================================================================
      0  flat file data without data inline (no extent);
      1  fixed-sized output data compression (with non-compacted indexes);
      2  flat file data with tail packing data inline (no extent);
-     3  fixed-sized output data compression (with compacted indexes, v5.3+).
+     3  fixed-sized output data compression (with compacted indexes, v5.3+);
+     4  chunk-based file (v5.15+).
     ==  ====================================================================
 
     The size of the optional xattrs is indicated by i_xattr_count in inode
@@ -210,6 +214,17 @@ Note that apart from the offset of the first filename, nameoff0 also indicates
 the total number of directory entries in this block since it is no need to
 introduce another on-disk field at all.
 
+Chunk-based file
+----------------
+In order to support chunk-based data deduplication, a new inode data layout has
+been supported since Linux v5.15: Files are split in equal-sized data chunks
+with ``extents`` area of the inode metadata indicating how to get the chunk
+data: these can be simply as a 4-byte block address array or in the 8-byte
+chunk index form (see struct erofs_inode_chunk_index in erofs_fs.h for more
+details.)
+
+By the way, chunk-based files are all uncompressed for now.
+
 Data compression
 ----------------
 EROFS implements LZ4 fixed-sized output compression which generates fixed-sized
diff --git a/Documentation/filesystems/ext4/globals.rst b/Documentation/filesystems/ext4/globals.rst
index 368bf7662b96..b17418974fd3 100644
--- a/Documentation/filesystems/ext4/globals.rst
+++ b/Documentation/filesystems/ext4/globals.rst
@@ -11,3 +11,4 @@ have static metadata at fixed locations.
 .. include:: bitmaps.rst
 .. include:: mmp.rst
 .. include:: journal.rst
+.. include:: orphan.rst
diff --git a/Documentation/filesystems/ext4/inodes.rst b/Documentation/filesystems/ext4/inodes.rst
index a65baffb4ebf..6c5ce666e63f 100644
--- a/Documentation/filesystems/ext4/inodes.rst
+++ b/Documentation/filesystems/ext4/inodes.rst
@@ -498,11 +498,11 @@ structure -- inode change time (ctime), access time (atime), data
 modification time (mtime), and deletion time (dtime). The four fields
 are 32-bit signed integers that represent seconds since the Unix epoch
 (1970-01-01 00:00:00 GMT), which means that the fields will overflow in
-January 2038. For inodes that are not linked from any directory but are
-still open (orphan inodes), the dtime field is overloaded for use with
-the orphan list. The superblock field ``s_last_orphan`` points to the
-first inode in the orphan list; dtime is then the number of the next
-orphaned inode, or zero if there are no more orphans.
+January 2038. If the filesystem does not have orphan_file feature, inodes
+that are not linked from any directory but are still open (orphan inodes) have
+the dtime field overloaded for use with the orphan list. The superblock field
+``s_last_orphan`` points to the first inode in the orphan list; dtime is then
+the number of the next orphaned inode, or zero if there are no more orphans.
 
 If the inode structure size ``sb->s_inode_size`` is larger than 128
 bytes and the ``i_inode_extra`` field is large enough to encompass the
diff --git a/Documentation/filesystems/ext4/orphan.rst b/Documentation/filesystems/ext4/orphan.rst
new file mode 100644
index 000000000000..bb19ecd1b626
--- /dev/null
+++ b/Documentation/filesystems/ext4/orphan.rst
@@ -0,0 +1,52 @@
+.. SPDX-License-Identifier: GPL-2.0
+
+Orphan file
+-----------
+
+In unix there can inodes that are unlinked from directory hierarchy but that
+are still alive because they are open. In case of crash the filesystem has to
+clean up these inodes as otherwise they (and the blocks referenced from them)
+would leak. Similarly if we truncate or extend the file, we need not be able
+to perform the operation in a single journalling transaction. In such case we
+track the inode as orphan so that in case of crash extra blocks allocated to
+the file get truncated.
+
+Traditionally ext4 tracks orphan inodes in a form of single linked list where
+superblock contains the inode number of the last orphan inode (s\_last\_orphan
+field) and then each inode contains inode number of the previously orphaned
+inode (we overload i\_dtime inode field for this). However this filesystem
+global single linked list is a scalability bottleneck for workloads that result
+in heavy creation of orphan inodes. When orphan file feature
+(COMPAT\_ORPHAN\_FILE) is enabled, the filesystem has a special inode
+(referenced from the superblock through s\_orphan_file_inum) with several
+blocks. Each of these blocks has a structure:
+
+.. list-table::
+   :widths: 8 8 24 40
+   :header-rows: 1
+
+   * - Offset
+     - Type
+     - Name
+     - Description
+   * - 0x0
+     - Array of \_\_le32 entries
+     - Orphan inode entries
+     - Each \_\_le32 entry is either empty (0) or it contains inode number of
+       an orphan inode.
+   * - blocksize - 8
+     - \_\_le32
+     - ob\_magic
+     - Magic value stored in orphan block tail (0x0b10ca04)
+   * - blocksize - 4
+     - \_\_le32
+     - ob\_checksum
+     - Checksum of the orphan block.
+
+When a filesystem with orphan file feature is writeably mounted, we set
+RO\_COMPAT\_ORPHAN\_PRESENT feature in the superblock to indicate there may
+be valid orphan entries. In case we see this feature when mounting the
+filesystem, we read the whole orphan file and process all orphan inodes found
+there as usual. When cleanly unmounting the filesystem we remove the
+RO\_COMPAT\_ORPHAN\_PRESENT feature to avoid unnecessary scanning of the orphan
+file and also make the filesystem fully compatible with older kernels.
diff --git a/Documentation/filesystems/ext4/special_inodes.rst b/Documentation/filesystems/ext4/special_inodes.rst
index 9061aabba827..94f304e3a0a7 100644
--- a/Documentation/filesystems/ext4/special_inodes.rst
+++ b/Documentation/filesystems/ext4/special_inodes.rst
@@ -36,3 +36,20 @@ ext4 reserves some inode for special features, as follows:
    * - 11
      - Traditional first non-reserved inode. Usually this is the lost+found directory. See s\_first\_ino in the superblock.
 
+Note that there are also some inodes allocated from non-reserved inode numbers
+for other filesystem features which are not referenced from standard directory
+hierarchy. These are generally reference from the superblock. They are:
+
+.. list-table::
+   :widths: 20 50
+   :header-rows: 1
+
+   * - Superblock field
+     - Description
+
+   * - s\_lpf\_ino
+     - Inode number of lost+found directory.
+   * - s\_prj\_quota\_inum
+     - Inode number of quota file tracking project quotas
+   * - s\_orphan\_file\_inum
+     - Inode number of file tracking orphan inodes.
diff --git a/Documentation/filesystems/ext4/super.rst b/Documentation/filesystems/ext4/super.rst
index 2eb1ab20498d..f6a548e957bb 100644
--- a/Documentation/filesystems/ext4/super.rst
+++ b/Documentation/filesystems/ext4/super.rst
@@ -479,7 +479,11 @@ The ext4 superblock is laid out as follows in
      - Filename charset encoding flags.
    * - 0x280
      - \_\_le32
-     - s\_reserved[95]
+     - s\_orphan\_file\_inum
+     - Orphan file inode number.
+   * - 0x284
+     - \_\_le32
+     - s\_reserved[94]
      - Padding to the end of the block.
    * - 0x3FC
      - \_\_le32
@@ -603,6 +607,11 @@ following:
        the journal, JBD2 incompat feature
        (JBD2\_FEATURE\_INCOMPAT\_FAST\_COMMIT) gets
        set (COMPAT\_FAST\_COMMIT).
+   * - 0x1000
+     - Orphan file allocated. This is the special file for more efficient
+       tracking of unlinked but still open inodes. When there may be any
+       entries in the file, we additionally set proper rocompat feature
+       (RO\_COMPAT\_ORPHAN\_PRESENT).
 
 .. _super_incompat:
 
@@ -713,6 +722,10 @@ the following:
      - Filesystem tracks project quotas. (RO\_COMPAT\_PROJECT)
    * - 0x8000
      - Verity inodes may be present on the filesystem. (RO\_COMPAT\_VERITY)
+   * - 0x10000
+     - Indicates orphan file may have valid orphan entries and thus we need
+       to clean them up when mounting the filesystem
+       (RO\_COMPAT\_ORPHAN\_PRESENT).
 
 .. _super_def_hash:
 
diff --git a/Documentation/filesystems/f2fs.rst b/Documentation/filesystems/f2fs.rst
index ff9e7cc97c65..09de6ebbbdfa 100644
--- a/Documentation/filesystems/f2fs.rst
+++ b/Documentation/filesystems/f2fs.rst
@@ -185,6 +185,7 @@ fault_type=%d		 Support configuring fault injection type, should be
 			 FAULT_KVMALLOC		  0x000000002
 			 FAULT_PAGE_ALLOC	  0x000000004
 			 FAULT_PAGE_GET		  0x000000008
+			 FAULT_ALLOC_BIO	  0x000000010 (obsolete)
 			 FAULT_ALLOC_NID	  0x000000020
 			 FAULT_ORPHAN		  0x000000040
 			 FAULT_BLOCK		  0x000000080
@@ -195,6 +196,7 @@ fault_type=%d		 Support configuring fault injection type, should be
 			 FAULT_CHECKPOINT	  0x000001000
 			 FAULT_DISCARD		  0x000002000
 			 FAULT_WRITE_IO		  0x000004000
+			 FAULT_SLAB_ALLOC	  0x000008000
 			 ===================	  ===========
 mode=%s			 Control block allocation mode which supports "adaptive"
 			 and "lfs". In "lfs" mode, there should be no random
@@ -312,6 +314,14 @@ inlinecrypt		 When possible, encrypt/decrypt the contents of encrypted
 			 Documentation/block/inline-encryption.rst.
 atgc			 Enable age-threshold garbage collection, it provides high
 			 effectiveness and efficiency on background GC.
+discard_unit=%s		 Control discard unit, the argument can be "block", "segment"
+			 and "section", issued discard command's offset/size will be
+			 aligned to the unit, by default, "discard_unit=block" is set,
+			 so that small discard functionality is enabled.
+			 For blkzoned device, "discard_unit=section" will be set by
+			 default, it is helpful for large sized SMR or ZNS devices to
+			 reduce memory cost by getting rid of fs metadata supports small
+			 discard.
 ======================== ============================================================
 
 Debugfs Entries
@@ -857,8 +867,11 @@ Compression implementation
   directly in order to guarantee potential data updates later to the space.
   Instead, the main goal is to reduce data writes to flash disk as much as
   possible, resulting in extending disk life time as well as relaxing IO
-  congestion. Alternatively, we've added ioctl interface to reclaim compressed
-  space and show it to user after putting the immutable bit.
+  congestion. Alternatively, we've added ioctl(F2FS_IOC_RELEASE_COMPRESS_BLOCKS)
+  interface to reclaim compressed space and show it to user after putting the
+  immutable bit. Immutable bit, after release, it doesn't allow writing/mmaping
+  on the file, until reserving compressed space via
+  ioctl(F2FS_IOC_RESERVE_COMPRESS_BLOCKS) or truncating filesize to zero.
 
 Compress metadata layout::
 
diff --git a/Documentation/filesystems/index.rst b/Documentation/filesystems/index.rst
index 1a2dd4d35717..c0ad233963ae 100644
--- a/Documentation/filesystems/index.rst
+++ b/Documentation/filesystems/index.rst
@@ -101,6 +101,7 @@ Documentation for filesystem implementations.
    nilfs2
    nfs/index
    ntfs
+   ntfs3
    ocfs2
    ocfs2-online-filecheck
    omfs
diff --git a/Documentation/filesystems/locking.rst b/Documentation/filesystems/locking.rst
index 2a75dd5da7b5..d36fe79167b3 100644
--- a/Documentation/filesystems/locking.rst
+++ b/Documentation/filesystems/locking.rst
@@ -70,7 +70,7 @@ prototypes::
 	const char *(*get_link) (struct dentry *, struct inode *, struct delayed_call *);
 	void (*truncate) (struct inode *);
 	int (*permission) (struct inode *, int, unsigned int);
-	int (*get_acl)(struct inode *, int);
+	struct posix_acl * (*get_acl)(struct inode *, int, bool);
 	int (*setattr) (struct dentry *, struct iattr *);
 	int (*getattr) (const struct path *, struct kstat *, u32, unsigned int);
 	ssize_t (*listxattr) (struct dentry *, char *, size_t);
diff --git a/Documentation/filesystems/ntfs3.rst b/Documentation/filesystems/ntfs3.rst
new file mode 100644
index 000000000000..ffe9ea0c1499
--- /dev/null
+++ b/Documentation/filesystems/ntfs3.rst
@@ -0,0 +1,106 @@
+.. SPDX-License-Identifier: GPL-2.0
+
+=====
+NTFS3
+=====
+
+
+Summary and Features
+====================
+
+NTFS3 is fully functional NTFS Read-Write driver. The driver works with
+NTFS versions up to 3.1, normal/compressed/sparse files
+and journal replaying. File system type to use on mount is 'ntfs3'.
+
+- This driver implements NTFS read/write support for normal, sparse and
+  compressed files.
+- Supports native journal replaying;
+- Supports extended attributes
+	Predefined extended attributes:
+	- 'system.ntfs_security' gets/sets security
+			descriptor (SECURITY_DESCRIPTOR_RELATIVE)
+	- 'system.ntfs_attrib' gets/sets ntfs file/dir attributes.
+		Note: applied to empty files, this allows to switch type between
+		sparse(0x200), compressed(0x800) and normal;
+- Supports NFS export of mounted NTFS volumes.
+
+Mount Options
+=============
+
+The list below describes mount options supported by NTFS3 driver in addition to
+generic ones.
+
+===============================================================================
+
+nls=name		This option informs the driver how to interpret path
+			strings and translate them to Unicode and back. If
+			this option is not set, the default codepage will be
+			used (CONFIG_NLS_DEFAULT).
+			Examples:
+				'nls=utf8'
+
+uid=
+gid=
+umask=			Controls the default permissions for files/directories created
+			after the NTFS volume is mounted.
+
+fmask=
+dmask=			Instead of specifying umask which applies both to
+			files and directories, fmask applies only to files and
+			dmask only to directories.
+
+nohidden		Files with the Windows-specific HIDDEN (FILE_ATTRIBUTE_HIDDEN)
+			attribute will not be shown under Linux.
+
+sys_immutable		Files with the Windows-specific SYSTEM
+			(FILE_ATTRIBUTE_SYSTEM) attribute will be marked as system
+			immutable files.
+
+discard			Enable support of the TRIM command for improved performance
+			on delete operations, which is recommended for use with the
+			solid-state drives (SSD).
+
+force			Forces the driver to mount partitions even if 'dirty' flag
+			(volume dirty) is set. Not recommended for use.
+
+sparse			Create new files as "sparse".
+
+showmeta		Use this parameter to show all meta-files (System Files) on
+			a mounted NTFS partition.
+			By default, all meta-files are hidden.
+
+prealloc		Preallocate space for files excessively when file size is
+			increasing on writes. Decreases fragmentation in case of
+			parallel write operations to different files.
+
+no_acs_rules		"No access rules" mount option sets access rights for
+			files/folders to 777 and owner/group to root. This mount
+			option absorbs all other permissions:
+			- permissions change for files/folders will be reported
+				as successful, but they will remain 777;
+			- owner/group change will be reported as successful, but
+				they will stay as root
+
+acl			Support POSIX ACLs (Access Control Lists). Effective if
+			supported by Kernel. Not to be confused with NTFS ACLs.
+			The option specified as acl enables support for POSIX ACLs.
+
+noatime			All files and directories will not update their last access
+			time attribute if a partition is mounted with this parameter.
+			This option can speed up file system operation.
+
+===============================================================================
+
+ToDo list
+=========
+
+- Full journaling support (currently journal replaying is supported) over JBD.
+
+
+References
+==========
+https://www.paragon-software.com/home/ntfs-linux-professional/
+	- Commercial version of the NTFS driver for Linux.
+
+almaz.alexandrovich@paragon-software.com
+	- Direct e-mail address for feedback and requests on the NTFS3 implementation.
diff --git a/Documentation/filesystems/overlayfs.rst b/Documentation/filesystems/overlayfs.rst
index 455ca86eb4fc..7da6c30ed596 100644
--- a/Documentation/filesystems/overlayfs.rst
+++ b/Documentation/filesystems/overlayfs.rst
@@ -427,6 +427,9 @@ b) If a file residing on a lower layer is opened for read-only and then
 memory mapped with MAP_SHARED, then subsequent changes to the file are not
 reflected in the memory mapping.
 
+c) If a file residing on a lower layer is being executed, then opening that
+file for write or truncating the file will not be denied with ETXTBSY.
+
 The following options allow overlayfs to act more like a standards
 compliant filesystem:
 
diff --git a/Documentation/filesystems/vfs.rst b/Documentation/filesystems/vfs.rst
index 14c31eced416..bf5c48066fac 100644
--- a/Documentation/filesystems/vfs.rst
+++ b/Documentation/filesystems/vfs.rst
@@ -432,7 +432,7 @@ As of kernel 2.6.22, the following members are defined:
 		const char *(*get_link) (struct dentry *, struct inode *,
 					 struct delayed_call *);
 		int (*permission) (struct user_namespace *, struct inode *, int);
-		int (*get_acl)(struct inode *, int);
+		struct posix_acl * (*get_acl)(struct inode *, int, bool);
 		int (*setattr) (struct user_namespace *, struct dentry *, struct iattr *);
 		int (*getattr) (struct user_namespace *, const struct path *, struct kstat *, u32, unsigned int);
 		ssize_t (*listxattr) (struct dentry *, char *, size_t);
diff --git a/Documentation/firmware-guide/acpi/dsd/graph.rst b/Documentation/firmware-guide/acpi/dsd/graph.rst
index 4341299aa937..0ced07cb1be3 100644
--- a/Documentation/firmware-guide/acpi/dsd/graph.rst
+++ b/Documentation/firmware-guide/acpi/dsd/graph.rst
@@ -159,7 +159,7 @@ References
 
 [2] Devicetree. https://www.devicetree.org, referenced 2016-10-03.
 
-[3] Documentation/devicetree/bindings/graph.txt
+[3] Documentation/devicetree/bindings/graph.txt
 
 [4] Device Properties UUID For _DSD.
     https://www.uefi.org/sites/default/files/resources/_DSD-device-properties-UUID.pdf,
diff --git a/Documentation/gpu/drm-mm.rst b/Documentation/gpu/drm-mm.rst
index d5a73fa2c9ef..8126beadc7df 100644
--- a/Documentation/gpu/drm-mm.rst
+++ b/Documentation/gpu/drm-mm.rst
@@ -37,7 +37,7 @@ TTM initialization
     This section is outdated.
 
 Drivers wishing to support TTM must pass a filled :c:type:`ttm_bo_driver
-<ttm_bo_driver>` structure to ttm_bo_device_init, together with an
+<ttm_bo_driver>` structure to ttm_device_init, together with an
 initialized global reference to the memory manager.  The ttm_bo_driver
 structure contains several fields with function pointers for
 initializing the TTM, allocating and freeing memory, waiting for command
diff --git a/Documentation/kbuild/llvm.rst b/Documentation/kbuild/llvm.rst
index b18401d2ba82..d32616891dcf 100644
--- a/Documentation/kbuild/llvm.rst
+++ b/Documentation/kbuild/llvm.rst
@@ -38,7 +38,7 @@ Cross Compiling
 A single Clang compiler binary will typically contain all supported backends,
 which can help simplify cross compiling. ::
 
-	ARCH=arm64 CROSS_COMPILE=aarch64-linux-gnu- make CC=clang
+	make ARCH=arm64 CC=clang CROSS_COMPILE=aarch64-linux-gnu-
 
 ``CROSS_COMPILE`` is not used to prefix the Clang compiler binary, instead
 ``CROSS_COMPILE`` is used to set a command line flag: ``--target=<triple>``. For
@@ -60,8 +60,27 @@ They can be enabled individually. The full list of the parameters: ::
 	  OBJCOPY=llvm-objcopy OBJDUMP=llvm-objdump READELF=llvm-readelf \
 	  HOSTCC=clang HOSTCXX=clang++ HOSTAR=llvm-ar HOSTLD=ld.lld
 
-Currently, the integrated assembler is disabled by default. You can pass
-``LLVM_IAS=1`` to enable it.
+The integrated assembler is enabled by default. You can pass ``LLVM_IAS=0`` to
+disable it.
+
+Omitting CROSS_COMPILE
+----------------------
+
+As explained above, ``CROSS_COMPILE`` is used to set ``--target=<triple>``.
+
+If ``CROSS_COMPILE`` is not specified, the ``--target=<triple>`` is inferred
+from ``ARCH``.
+
+That means if you use only LLVM tools, ``CROSS_COMPILE`` becomes unnecessary.
+
+For example, to cross-compile the arm64 kernel::
+
+	make ARCH=arm64 LLVM=1
+
+If ``LLVM_IAS=0`` is specified, ``CROSS_COMPILE`` is also used to derive
+``--prefix=<path>`` to search for the GNU assembler and linker. ::
+
+	make ARCH=arm64 LLVM=1 LLVM_IAS=0 CROSS_COMPILE=aarch64-linux-gnu-
 
 Supported Architectures
 -----------------------
@@ -111,9 +130,10 @@ Getting Help
 ------------
 
 - `Website <https://clangbuiltlinux.github.io/>`_
-- `Mailing List <https://groups.google.com/forum/#!forum/clang-built-linux>`_: <clang-built-linux@googlegroups.com>
+- `Mailing List <https://lore.kernel.org/llvm/>`_: <llvm@lists.linux.dev>
+- `Old Mailing List Archives <https://groups.google.com/g/clang-built-linux>`_
 - `Issue Tracker <https://github.com/ClangBuiltLinux/linux/issues>`_
-- IRC: #clangbuiltlinux on chat.freenode.net
+- IRC: #clangbuiltlinux on irc.libera.chat
 - `Telegram <https://t.me/ClangBuiltLinux>`_: @ClangBuiltLinux
 - `Wiki <https://github.com/ClangBuiltLinux/linux/wiki>`_
 - `Beginner Bugs <https://github.com/ClangBuiltLinux/linux/issues?q=is%3Aopen+is%3Aissue+label%3A%22good+first+issue%22>`_
diff --git a/Documentation/kernel-hacking/hacking.rst b/Documentation/kernel-hacking/hacking.rst
index df65c19aa7df..55bd37a2efb0 100644
--- a/Documentation/kernel-hacking/hacking.rst
+++ b/Documentation/kernel-hacking/hacking.rst
@@ -76,8 +76,8 @@ handler is never re-entered: if the same interrupt arrives, it is queued
 fast: frequently it simply acknowledges the interrupt, marks a 'software
 interrupt' for execution and exits.
 
-You can tell you are in a hardware interrupt, because
-:c:func:`in_irq()` returns true.
+You can tell you are in a hardware interrupt, because in_hardirq() returns
+true.
 
 .. warning::
 
diff --git a/Documentation/kernel-hacking/locking.rst b/Documentation/kernel-hacking/locking.rst
index ed1284c6f078..90bc3f51eda9 100644
--- a/Documentation/kernel-hacking/locking.rst
+++ b/Documentation/kernel-hacking/locking.rst
@@ -94,16 +94,10 @@ primitives, but I'll pretend they don't exist.
 Locking in the Linux Kernel
 ===========================
 
-If I could give you one piece of advice: never sleep with anyone crazier
-than yourself. But if I had to give you advice on locking: **keep it
-simple**.
+If I could give you one piece of advice on locking: **keep it simple**.
 
 Be reluctant to introduce new locks.
 
-Strangely enough, this last one is the exact reverse of my advice when
-you **have** slept with someone crazier than yourself. And you should
-think about getting a big dog.
-
 Two Main Types of Kernel Locks: Spinlocks and Mutexes
 -----------------------------------------------------
 
@@ -1406,7 +1400,7 @@ bh
   half will be running at any time.
 
 Hardware Interrupt / Hardware IRQ
-  Hardware interrupt request. in_irq() returns true in a
+  Hardware interrupt request. in_hardirq() returns true in a
   hardware interrupt handler.
 
 Interrupt Context
@@ -1418,7 +1412,7 @@ SMP
   (``CONFIG_SMP=y``).
 
 Software Interrupt / softirq
-  Software interrupt handler. in_irq() returns false;
+  Software interrupt handler. in_hardirq() returns false;
   in_softirq() returns true. Tasklets and softirqs both
   fall into the category of 'software interrupts'.
 
diff --git a/Documentation/locking/futex-requeue-pi.rst b/Documentation/locking/futex-requeue-pi.rst
index 14ab5787b9a7..dd4ecf4528a4 100644
--- a/Documentation/locking/futex-requeue-pi.rst
+++ b/Documentation/locking/futex-requeue-pi.rst
@@ -5,7 +5,7 @@ Futex Requeue PI
 Requeueing of tasks from a non-PI futex to a PI futex requires
 special handling in order to ensure the underlying rt_mutex is never
 left without an owner if it has waiters; doing so would break the PI
-boosting logic [see rt-mutex-desgin.txt] For the purposes of
+boosting logic [see rt-mutex-design.rst] For the purposes of
 brevity, this action will be referred to as "requeue_pi" throughout
 this document.  Priority inheritance is abbreviated throughout as
 "PI".
diff --git a/Documentation/locking/ww-mutex-design.rst b/Documentation/locking/ww-mutex-design.rst
index 54d9c17bb66b..6a4d7319f8f0 100644
--- a/Documentation/locking/ww-mutex-design.rst
+++ b/Documentation/locking/ww-mutex-design.rst
@@ -2,7 +2,7 @@
 Wound/Wait Deadlock-Proof Mutex Design
 ======================================
 
-Please read mutex-design.txt first, as it applies to wait/wound mutexes too.
+Please read mutex-design.rst first, as it applies to wait/wound mutexes too.
 
 Motivation for WW-Mutexes
 -------------------------
diff --git a/Documentation/networking/device_drivers/ethernet/freescale/dpaa2/dpio-driver.rst b/Documentation/networking/device_drivers/ethernet/freescale/dpaa2/dpio-driver.rst
index c50fd46631e0..e4ebfe62a183 100644
--- a/Documentation/networking/device_drivers/ethernet/freescale/dpaa2/dpio-driver.rst
+++ b/Documentation/networking/device_drivers/ethernet/freescale/dpaa2/dpio-driver.rst
@@ -1,5 +1,6 @@
 .. include:: <isonum.txt>
 
+===================================
 DPAA2 DPIO (Data Path I/O) Overview
 ===================================
 
diff --git a/Documentation/networking/nf_conntrack-sysctl.rst b/Documentation/networking/nf_conntrack-sysctl.rst
index 34ca762ea56f..311128abb768 100644
--- a/Documentation/networking/nf_conntrack-sysctl.rst
+++ b/Documentation/networking/nf_conntrack-sysctl.rst
@@ -17,9 +17,8 @@ nf_conntrack_acct - BOOLEAN
 nf_conntrack_buckets - INTEGER
 	Size of hash table. If not specified as parameter during module
 	loading, the default size is calculated by dividing total memory
-	by 16384 to determine the number of buckets but the hash table will
-	never have fewer than 32 and limited to 16384 buckets. For systems
-	with more than 4GB of memory it will be 65536 buckets.
+	by 16384 to determine the number of buckets. The hash table will
+	never have fewer than 1024 and never more than 262144 buckets.
 	This sysctl is only writeable in the initial net namespace.
 
 nf_conntrack_checksum - BOOLEAN
@@ -100,8 +99,12 @@ nf_conntrack_log_invalid - INTEGER
 	Log invalid packets of a type specified by value.
 
 nf_conntrack_max - INTEGER
-	Size of connection tracking table.  Default value is
-	nf_conntrack_buckets value * 4.
+        Maximum number of allowed connection tracking entries. This value is set
+        to nf_conntrack_buckets by default.
+        Note that connection tracking entries are added to the table twice -- once
+        for the original direction and once for the reply direction (i.e., with
+        the reversed address). This means that with default settings a maxed-out
+        table will have a average hash chain length of 2, not 1.
 
 nf_conntrack_tcp_be_liberal - BOOLEAN
 	- 0 - disabled (default)
diff --git a/Documentation/power/energy-model.rst b/Documentation/power/energy-model.rst
index 60ac091d3b0d..8a2788afe89b 100644
--- a/Documentation/power/energy-model.rst
+++ b/Documentation/power/energy-model.rst
@@ -101,8 +101,7 @@ subsystems which use EM might rely on this flag to check if all EM devices use
 the same scale. If there are different scales, these subsystems might decide
 to: return warning/error, stop working or panic.
 See Section 3. for an example of driver implementing this
-callback, and kernel/power/energy_model.c for further documentation on this
-API.
+callback, or Section 2.4 for further documentation on this API
 
 
 2.3 Accessing performance domains
@@ -123,7 +122,17 @@ em_cpu_energy() API. The estimation is performed assuming that the schedutil
 CPUfreq governor is in use in case of CPU device. Currently this calculation is
 not provided for other type of devices.
 
-More details about the above APIs can be found in include/linux/energy_model.h.
+More details about the above APIs can be found in ``<linux/energy_model.h>``
+or in Section 2.4
+
+
+2.4 Description details of this API
+^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
+.. kernel-doc:: include/linux/energy_model.h
+   :internal:
+
+.. kernel-doc:: kernel/power/energy_model.c
+   :export:
 
 
 3. Example driver
diff --git a/Documentation/powerpc/associativity.rst b/Documentation/powerpc/associativity.rst
new file mode 100644
index 000000000000..4d01c7368561
--- /dev/null
+++ b/Documentation/powerpc/associativity.rst
@@ -0,0 +1,105 @@
+============================
+NUMA resource associativity
+============================
+
+Associativity represents the groupings of the various platform resources into
+domains of substantially similar mean performance relative to resources outside
+of that domain. Resources subsets of a given domain that exhibit better
+performance relative to each other than relative to other resources subsets
+are represented as being members of a sub-grouping domain. This performance
+characteristic is presented in terms of NUMA node distance within the Linux kernel.
+From the platform view, these groups are also referred to as domains.
+
+PAPR interface currently supports different ways of communicating these resource
+grouping details to the OS. These are referred to as Form 0, Form 1 and Form2
+associativity grouping. Form 0 is the oldest format and is now considered deprecated.
+
+Hypervisor indicates the type/form of associativity used via "ibm,architecture-vec-5 property".
+Bit 0 of byte 5 in the "ibm,architecture-vec-5" property indicates usage of Form 0 or Form 1.
+A value of 1 indicates the usage of Form 1 associativity. For Form 2 associativity
+bit 2 of byte 5 in the "ibm,architecture-vec-5" property is used.
+
+Form 0
+------
+Form 0 associativity supports only two NUMA distances (LOCAL and REMOTE).
+
+Form 1
+------
+With Form 1 a combination of ibm,associativity-reference-points, and ibm,associativity
+device tree properties are used to determine the NUMA distance between resource groups/domains.
+
+The “ibm,associativity” property contains a list of one or more numbers (domainID)
+representing the resource’s platform grouping domains.
+
+The “ibm,associativity-reference-points” property contains a list of one or more numbers
+(domainID index) that represents the 1 based ordinal in the associativity lists.
+The list of domainID indexes represents an increasing hierarchy of resource grouping.
+
+ex:
+{ primary domainID index, secondary domainID index, tertiary domainID index.. }
+
+Linux kernel uses the domainID at the primary domainID index as the NUMA node id.
+Linux kernel computes NUMA distance between two domains by recursively comparing
+if they belong to the same higher-level domains. For mismatch at every higher
+level of the resource group, the kernel doubles the NUMA distance between the
+comparing domains.
+
+Form 2
+-------
+Form 2 associativity format adds separate device tree properties representing NUMA node distance
+thereby making the node distance computation flexible. Form 2 also allows flexible primary
+domain numbering. With numa distance computation now detached from the index value in
+"ibm,associativity-reference-points" property, Form 2 allows a large number of primary domain
+ids at the same domainID index representing resource groups of different performance/latency
+characteristics.
+
+Hypervisor indicates the usage of FORM2 associativity using bit 2 of byte 5 in the
+"ibm,architecture-vec-5" property.
+
+"ibm,numa-lookup-index-table" property contains a list of one or more numbers representing
+the domainIDs present in the system. The offset of the domainID in this property is
+used as an index while computing numa distance information via "ibm,numa-distance-table".
+
+prop-encoded-array: The number N of the domainIDs encoded as with encode-int, followed by
+N domainID encoded as with encode-int
+
+For ex:
+"ibm,numa-lookup-index-table" =  {4, 0, 8, 250, 252}. The offset of domainID 8 (2) is used when
+computing the distance of domain 8 from other domains present in the system. For the rest of
+this document, this offset will be referred to as domain distance offset.
+
+"ibm,numa-distance-table" property contains a list of one or more numbers representing the NUMA
+distance between resource groups/domains present in the system.
+
+prop-encoded-array: The number N of the distance values encoded as with encode-int, followed by
+N distance values encoded as with encode-bytes. The max distance value we could encode is 255.
+The number N must be equal to the square of m where m is the number of domainIDs in the
+numa-lookup-index-table.
+
+For ex:
+ibm,numa-lookup-index-table = <3 0 8 40>;
+ibm,numa-distace-table = <9>, /bits/ 8 < 10  20  80 20  10 160 80 160  10>;
+
+::
+
+	  | 0    8   40
+	--|------------
+	  |
+	0 | 10   20  80
+	  |
+	8 | 20   10  160
+	  |
+	40| 80   160  10
+
+A possible "ibm,associativity" property for resources in node 0, 8 and 40
+
+{ 3, 6, 7, 0 }
+{ 3, 6, 9, 8 }
+{ 3, 6, 7, 40}
+
+With "ibm,associativity-reference-points"  { 0x3 }
+
+"ibm,lookup-index-table" helps in having a compact representation of distance matrix.
+Since domainID can be sparse, the matrix of distances can also be effectively sparse.
+With "ibm,lookup-index-table" we can achieve a compact representation of
+distance information.
diff --git a/Documentation/powerpc/index.rst b/Documentation/powerpc/index.rst
index bf5f1a2bdbdf..0f7d3c495693 100644
--- a/Documentation/powerpc/index.rst
+++ b/Documentation/powerpc/index.rst
@@ -7,6 +7,7 @@ powerpc
 .. toctree::
     :maxdepth: 1
 
+    associativity
     booting
     bootwrapper
     cpu_families
diff --git a/Documentation/process/applying-patches.rst b/Documentation/process/applying-patches.rst
index 2e7017bef4b8..c2121c1e55d7 100644
--- a/Documentation/process/applying-patches.rst
+++ b/Documentation/process/applying-patches.rst
@@ -389,7 +389,7 @@ The -mm patches are experimental patches released by Andrew Morton.
 
 In the past, -mm tree were used to also test subsystem patches, but this
 function is now done via the
-`linux-next <https://www.kernel.org/doc/man-pages/linux-next.html>`
+`linux-next` (https://www.kernel.org/doc/man-pages/linux-next.html)
 tree. The Subsystem maintainers push their patches first to linux-next,
 and, during the merge window, sends them directly to Linus.
 
diff --git a/Documentation/process/deprecated.rst b/Documentation/process/deprecated.rst
index 9d83b8db8874..8ced754a5a0f 100644
--- a/Documentation/process/deprecated.rst
+++ b/Documentation/process/deprecated.rst
@@ -164,7 +164,9 @@ Paraphrasing Linus's current `guidance <https://lore.kernel.org/lkml/CA+55aFwQEd
   up to Linus's scrutiny, maybe you can use "%px", along with making sure
   you have sensible permissions.
 
-And finally, know that a toggle for "%p" hashing will `not be accepted <https://lore.kernel.org/lkml/CA+55aFwieC1-nAs+NFq9RTwaR8ef9hWa4MjNBWL41F-8wM49eA@mail.gmail.com/>`_.
+If you are debugging something where "%p" hashing is causing problems,
+you can temporarily boot with the debug flag "`no_hash_pointers
+<https://git.kernel.org/linus/5ead723a20e0447bc7db33dc3070b420e5f80aa6>`_".
 
 Variable Length Arrays (VLAs)
 -----------------------------
diff --git a/Documentation/process/kernel-docs.rst b/Documentation/process/kernel-docs.rst
index 22d9ace5df2a..da9527502ef0 100644
--- a/Documentation/process/kernel-docs.rst
+++ b/Documentation/process/kernel-docs.rst
@@ -126,15 +126,17 @@ On-line docs
         describes how to write user-mode utilities for communicating with
         Card Services.
 
-    * Title: **Linux Kernel Module Programming Guide**
+    * Title: **The Linux Kernel Module Programming Guide**
 
-      :Author: Ori Pomerantz.
-      :URL: https://tldp.org/LDP/lkmpg/2.6/html/index.html
-      :Date: 2001
+      :Author: Peter Jay Salzman, Michael Burian, Ori Pomerantz, Bob Mottram,
+        Jim Huang.
+      :URL: https://sysprog21.github.io/lkmpg/
+      :Date: 2021
       :Keywords: modules, GPL book, /proc, ioctls, system calls,
         interrupt handlers .
-      :Description: Very nice 92 pages GPL book on the topic of modules
-        programming. Lots of examples.
+      :Description: A very nice GPL book on the topic of modules
+        programming. Lots of examples. Currently the new version is being
+        actively maintained at https://github.com/sysprog21/lkmpg.
 
     * Title: **Global spinlock list and usage**
 
diff --git a/Documentation/process/maintainer-pgp-guide.rst b/Documentation/process/maintainer-pgp-guide.rst
index 8f8f1fee92b8..29e7d7b1cd44 100644
--- a/Documentation/process/maintainer-pgp-guide.rst
+++ b/Documentation/process/maintainer-pgp-guide.rst
@@ -944,12 +944,11 @@ have on your keyring::
     uid           [ unknown] Linus Torvalds <torvalds@kernel.org>
     sub   rsa2048 2011-09-20 [E]
 
-Next, open the `PGP pathfinder`_. In the "From" field, paste the key
-fingerprint of Linus Torvalds from the output above. In the "To" field,
-paste the key-id you found via ``gpg --search`` of the unknown key, and
-check the results:
-
-- `Finding paths to Linus`_
+Next, find a trust path from Linus Torvalds to the key-id you found via ``gpg
+--search`` of the unknown key.  For this, you can use several tools including
+https://github.com/mricon/wotmate,
+https://git.kernel.org/pub/scm/docs/kernel/pgpkeys.git/tree/graphs, and
+https://the.earth.li/~noodles/pathfind.html.
 
 If you get a few decent trust paths, then it's a pretty good indication
 that it is a valid key. You can add it to your keyring from the
@@ -962,6 +961,3 @@ administrators of the PGP Pathfinder service to not be malicious (in
 fact, this goes against :ref:`devs_not_infra`). However, if you
 do not carefully maintain your own web of trust, then it is a marked
 improvement over blindly trusting keyservers.
-
-.. _`PGP pathfinder`: https://pgp.cs.uu.nl/
-.. _`Finding paths to Linus`: https://pgp.cs.uu.nl/paths/79BE3E4300411886/to/C94035C21B4F2AEB.html
diff --git a/Documentation/process/submitting-patches.rst b/Documentation/process/submitting-patches.rst
index 0852bcf73630..8ad6b93f91e6 100644
--- a/Documentation/process/submitting-patches.rst
+++ b/Documentation/process/submitting-patches.rst
@@ -216,11 +216,11 @@ cannot find a maintainer for the subsystem you are working on, Andrew
 Morton (akpm@linux-foundation.org) serves as a maintainer of last resort.
 
 You should also normally choose at least one mailing list to receive a copy
-of your patch set.  linux-kernel@vger.kernel.org functions as a list of
-last resort, but the volume on that list has caused a number of developers
-to tune it out.  Look in the MAINTAINERS file for a subsystem-specific
-list; your patch will probably get more attention there.  Please do not
-spam unrelated lists, though.
+of your patch set.  linux-kernel@vger.kernel.org should be used by default
+for all patches, but the volume on that list has caused a number of
+developers to tune it out.  Look in the MAINTAINERS file for a
+subsystem-specific list; your patch will probably get more attention there.
+Please do not spam unrelated lists, though.
 
 Many kernel-related lists are hosted on vger.kernel.org; you can find a
 list of them at http://vger.kernel.org/vger-lists.html.  There are
diff --git a/Documentation/sound/kernel-api/writing-an-alsa-driver.rst b/Documentation/sound/kernel-api/writing-an-alsa-driver.rst
index 255b7d3bebd6..176b73583b7a 100644
--- a/Documentation/sound/kernel-api/writing-an-alsa-driver.rst
+++ b/Documentation/sound/kernel-api/writing-an-alsa-driver.rst
@@ -3368,7 +3368,7 @@ This ensures that the device can be closed and the driver unloaded
 without losing data.
 
 This callback is optional. If you do not set ``drain`` in the struct
-snd_rawmidi_ops structure, ALSA will simply wait for 50 milliseconds
+snd_rawmidi_ops structure, ALSA will simply wait for 50 milliseconds
 instead.
 
 Miscellaneous Devices
diff --git a/Documentation/sphinx/requirements.txt b/Documentation/sphinx/requirements.txt
index 489f6626de67..9a35f50798a6 100644
--- a/Documentation/sphinx/requirements.txt
+++ b/Documentation/sphinx/requirements.txt
@@ -1,3 +1,2 @@
-docutils
-Sphinx==2.4.4
 sphinx_rtd_theme
+Sphinx==2.4.4
diff --git a/Documentation/trace/boottime-trace.rst b/Documentation/trace/boottime-trace.rst
index 8053898cfeb4..6dcfbc64014d 100644
--- a/Documentation/trace/boottime-trace.rst
+++ b/Documentation/trace/boottime-trace.rst
@@ -125,6 +125,71 @@ Note that kprobe and synthetic event definitions can be written under
 instance node, but those are also visible from other instances. So please
 take care for event name conflict.
 
+Ftrace Histogram Options
+------------------------
+
+Since it is too long to write a histogram action as a string for per-event
+action option, there are tree-style options under per-event 'hist' subkey
+for the histogram actions. For the detail of the each parameter,
+please read the event histogram document [3]_.
+
+.. [3] See :ref:`Documentation/trace/histogram.rst <histogram>`
+
+ftrace.[instance.INSTANCE.]event.GROUP.EVENT.hist.[N.]keys = KEY1[, KEY2[...]]
+  Set histogram key parameters. (Mandatory)
+  The 'N' is a digit string for the multiple histogram. You can omit it
+  if there is one histogram on the event.
+
+ftrace.[instance.INSTANCE.]event.GROUP.EVENT.hist.[N.]values = VAL1[, VAL2[...]]
+  Set histogram value parameters.
+
+ftrace.[instance.INSTANCE.]event.GROUP.EVENT.hist.[N.]sort = SORT1[, SORT2[...]]
+  Set histogram sort parameter options.
+
+ftrace.[instance.INSTANCE.]event.GROUP.EVENT.hist.[N.]size = NR_ENTRIES
+  Set histogram size (number of entries).
+
+ftrace.[instance.INSTANCE.]event.GROUP.EVENT.hist.[N.]name = NAME
+  Set histogram name.
+
+ftrace.[instance.INSTANCE.]event.GROUP.EVENT.hist.[N.]var.VARIABLE = EXPR
+  Define a new VARIABLE by EXPR expression.
+
+ftrace.[instance.INSTANCE.]event.GROUP.EVENT.hist.[N.]<pause|continue|clear>
+  Set histogram control parameter. You can set one of them.
+
+ftrace.[instance.INSTANCE.]event.GROUP.EVENT.hist.[N.]onmatch.[M.]event = GROUP.EVENT
+  Set histogram 'onmatch' handler matching event parameter.
+  The 'M' is a digit string for the multiple 'onmatch' handler. You can omit it
+  if there is one 'onmatch' handler on this histogram.
+
+ftrace.[instance.INSTANCE.]event.GROUP.EVENT.hist.[N.]onmatch.[M.]trace = EVENT[, ARG1[...]]
+  Set histogram 'trace' action for 'onmatch'.
+  EVENT must be a synthetic event name, and ARG1... are parameters
+  for that event. Mandatory if 'onmatch.event' option is set.
+
+ftrace.[instance.INSTANCE.]event.GROUP.EVENT.hist.[N.]onmax.[M.]var = VAR
+  Set histogram 'onmax' handler variable parameter.
+
+ftrace.[instance.INSTANCE.]event.GROUP.EVENT.hist.[N.]onchange.[M.]var = VAR
+  Set histogram 'onchange' handler variable parameter.
+
+ftrace.[instance.INSTANCE.]event.GROUP.EVENT.hist.[N.]<onmax|onchange>.[M.]save = ARG1[, ARG2[...]]
+  Set histogram 'save' action parameters for 'onmax' or 'onchange' handler.
+  This option or below 'snapshot' option is mandatory if 'onmax.var' or
+  'onchange.var' option is set.
+
+ftrace.[instance.INSTANCE.]event.GROUP.EVENT.hist.[N.]<onmax|onchange>.[M.]snapshot
+  Set histogram 'snapshot' action for 'onmax' or 'onchange' handler.
+  This option or above 'save' option is mandatory if 'onmax.var' or
+  'onchange.var' option is set.
+
+ftrace.[instance.INSTANCE.]event.GROUP.EVENT.hist.filter = FILTER_EXPR
+  Set histogram filter expression. You don't need 'if' in the FILTER_EXPR.
+
+Note that this 'hist' option can conflict with the per-event 'actions'
+option if the 'actions' option has a histogram action.
+
 
 When to Start
 =============
@@ -159,13 +224,23 @@ below::
         }
         synthetic.initcall_latency {
                 fields = "unsigned long func", "u64 lat"
-                actions = "hist:keys=func.sym,lat:vals=lat:sort=lat"
+                hist {
+                        keys = func.sym, lat
+                        values = lat
+                        sort = lat
+                }
         }
-        initcall.initcall_start {
-                actions = "hist:keys=func:ts0=common_timestamp.usecs"
+        initcall.initcall_start.hist {
+                keys = func
+                var.ts0 = common_timestamp.usecs
         }
-        initcall.initcall_finish {
-                actions = "hist:keys=func:lat=common_timestamp.usecs-$ts0:onmatch(initcall.initcall_start).initcall_latency(func,$lat)"
+        initcall.initcall_finish.hist {
+                keys = func
+                var.lat = common_timestamp.usecs - $ts0
+                onmatch {
+                        event = initcall.initcall_start
+                        trace = initcall_latency, func, $lat
+                }
         }
   }
 
diff --git a/Documentation/trace/histogram.rst b/Documentation/trace/histogram.rst
index f99be8062bc8..533415644c54 100644
--- a/Documentation/trace/histogram.rst
+++ b/Documentation/trace/histogram.rst
@@ -70,15 +70,16 @@ Documentation written by Tom Zanussi
   modified by appending any of the following modifiers to the field
   name:
 
-	=========== ==========================================
-        .hex        display a number as a hex value
-	.sym        display an address as a symbol
-	.sym-offset display an address as a symbol and offset
-	.syscall    display a syscall id as a system call name
-	.execname   display a common_pid as a program name
-	.log2       display log2 value rather than raw number
-	.usecs      display a common_timestamp in microseconds
-	=========== ==========================================
+	=============  =================================================
+        .hex           display a number as a hex value
+	.sym           display an address as a symbol
+	.sym-offset    display an address as a symbol and offset
+	.syscall       display a syscall id as a system call name
+	.execname      display a common_pid as a program name
+	.log2          display log2 value rather than raw number
+	.buckets=size  display grouping of values rather than raw number
+	.usecs         display a common_timestamp in microseconds
+	=============  =================================================
 
   Note that in general the semantics of a given field aren't
   interpreted when applying a modifier to it, but there are some
@@ -228,7 +229,7 @@ Extended error information
   that lists the total number of bytes requested for each function in
   the kernel that made one or more calls to kmalloc::
 
-    # echo 'hist:key=call_site:val=bytes_req' > \
+    # echo 'hist:key=call_site:val=bytes_req.buckets=32' > \
             /sys/kernel/debug/tracing/events/kmem/kmalloc/trigger
 
   This tells the tracing system to create a 'hist' trigger using the
@@ -1823,20 +1824,99 @@ and variables defined on other events (see Section 2.2.3 below on
 how that is done using hist trigger 'onmatch' action). Once that is
 done, the 'wakeup_latency' synthetic event instance is created.
 
-A histogram can now be defined for the new synthetic event::
-
-  # echo 'hist:keys=pid,prio,lat.log2:sort=pid,lat' >> \
-        /sys/kernel/debug/tracing/events/synthetic/wakeup_latency/trigger
-
 The new event is created under the tracing/events/synthetic/ directory
 and looks and behaves just like any other event::
 
   # ls /sys/kernel/debug/tracing/events/synthetic/wakeup_latency
         enable  filter  format  hist  id  trigger
 
+A histogram can now be defined for the new synthetic event::
+
+  # echo 'hist:keys=pid,prio,lat.log2:sort=lat' >> \
+        /sys/kernel/debug/tracing/events/synthetic/wakeup_latency/trigger
+
+The above shows the latency "lat" in a power of 2 grouping.
+
 Like any other event, once a histogram is enabled for the event, the
 output can be displayed by reading the event's 'hist' file.
 
+  # cat /sys/kernel/debug/tracing/events/synthetic/wakeup_latency/hist
+
+  # event histogram
+  #
+  # trigger info: hist:keys=pid,prio,lat.log2:vals=hitcount:sort=lat.log2:size=2048 [active]
+  #
+
+  { pid:       2035, prio:          9, lat: ~ 2^2  } hitcount:         43
+  { pid:       2034, prio:          9, lat: ~ 2^2  } hitcount:         60
+  { pid:       2029, prio:          9, lat: ~ 2^2  } hitcount:        965
+  { pid:       2034, prio:        120, lat: ~ 2^2  } hitcount:          9
+  { pid:       2033, prio:        120, lat: ~ 2^2  } hitcount:          5
+  { pid:       2030, prio:          9, lat: ~ 2^2  } hitcount:        335
+  { pid:       2030, prio:        120, lat: ~ 2^2  } hitcount:         10
+  { pid:       2032, prio:        120, lat: ~ 2^2  } hitcount:          1
+  { pid:       2035, prio:        120, lat: ~ 2^2  } hitcount:          2
+  { pid:       2031, prio:          9, lat: ~ 2^2  } hitcount:        176
+  { pid:       2028, prio:        120, lat: ~ 2^2  } hitcount:         15
+  { pid:       2033, prio:          9, lat: ~ 2^2  } hitcount:         91
+  { pid:       2032, prio:          9, lat: ~ 2^2  } hitcount:        125
+  { pid:       2029, prio:        120, lat: ~ 2^2  } hitcount:          4
+  { pid:       2031, prio:        120, lat: ~ 2^2  } hitcount:          3
+  { pid:       2029, prio:        120, lat: ~ 2^3  } hitcount:          2
+  { pid:       2035, prio:          9, lat: ~ 2^3  } hitcount:         41
+  { pid:       2030, prio:        120, lat: ~ 2^3  } hitcount:          1
+  { pid:       2032, prio:          9, lat: ~ 2^3  } hitcount:         32
+  { pid:       2031, prio:          9, lat: ~ 2^3  } hitcount:         44
+  { pid:       2034, prio:          9, lat: ~ 2^3  } hitcount:         40
+  { pid:       2030, prio:          9, lat: ~ 2^3  } hitcount:         29
+  { pid:       2033, prio:          9, lat: ~ 2^3  } hitcount:         31
+  { pid:       2029, prio:          9, lat: ~ 2^3  } hitcount:         31
+  { pid:       2028, prio:        120, lat: ~ 2^3  } hitcount:         18
+  { pid:       2031, prio:        120, lat: ~ 2^3  } hitcount:          2
+  { pid:       2028, prio:        120, lat: ~ 2^4  } hitcount:          1
+  { pid:       2029, prio:          9, lat: ~ 2^4  } hitcount:          4
+  { pid:       2031, prio:        120, lat: ~ 2^7  } hitcount:          1
+  { pid:       2032, prio:        120, lat: ~ 2^7  } hitcount:          1
+
+  Totals:
+      Hits: 2122
+      Entries: 30
+      Dropped: 0
+
+
+The latency values can also be grouped linearly by a given size with
+the ".buckets" modifier and specify a size (in this case groups of 10).
+
+  # echo 'hist:keys=pid,prio,lat.buckets=10:sort=lat' >> \
+        /sys/kernel/debug/tracing/events/synthetic/wakeup_latency/trigger
+
+  # event histogram
+  #
+  # trigger info: hist:keys=pid,prio,lat.buckets=10:vals=hitcount:sort=lat.buckets=10:size=2048 [active]
+  #
+
+  { pid:       2067, prio:          9, lat: ~ 0-9 } hitcount:        220
+  { pid:       2068, prio:          9, lat: ~ 0-9 } hitcount:        157
+  { pid:       2070, prio:          9, lat: ~ 0-9 } hitcount:        100
+  { pid:       2067, prio:        120, lat: ~ 0-9 } hitcount:          6
+  { pid:       2065, prio:        120, lat: ~ 0-9 } hitcount:          2
+  { pid:       2066, prio:        120, lat: ~ 0-9 } hitcount:          2
+  { pid:       2069, prio:          9, lat: ~ 0-9 } hitcount:        122
+  { pid:       2069, prio:        120, lat: ~ 0-9 } hitcount:          8
+  { pid:       2070, prio:        120, lat: ~ 0-9 } hitcount:          1
+  { pid:       2068, prio:        120, lat: ~ 0-9 } hitcount:          7
+  { pid:       2066, prio:          9, lat: ~ 0-9 } hitcount:        365
+  { pid:       2064, prio:        120, lat: ~ 0-9 } hitcount:         35
+  { pid:       2065, prio:          9, lat: ~ 0-9 } hitcount:        998
+  { pid:       2071, prio:          9, lat: ~ 0-9 } hitcount:         85
+  { pid:       2065, prio:          9, lat: ~ 10-19 } hitcount:          2
+  { pid:       2064, prio:        120, lat: ~ 10-19 } hitcount:          2
+
+  Totals:
+      Hits: 2112
+      Entries: 16
+      Dropped: 0
+
 2.2.3 Hist trigger 'handlers' and 'actions'
 -------------------------------------------
 
diff --git a/Documentation/translations/conf.py b/Documentation/translations/conf.py
new file mode 100644
index 000000000000..92cdbba74229
--- /dev/null
+++ b/Documentation/translations/conf.py
@@ -0,0 +1,12 @@
+# -*- coding: utf-8 -*-
+# SPDX-License-Identifier: GPL-2.0
+
+# -- Additinal options for LaTeX output ----------------------------------
+# font config for ascii-art alignment
+
+latex_elements['preamble']  += '''
+    \\IfFontExistsTF{Noto Sans CJK SC}{
+	% For CJK ascii-art alignment
+	\\setmonofont{Noto Sans Mono CJK SC}[AutoFakeSlant]
+    }{}
+'''
diff --git a/Documentation/translations/index.rst b/Documentation/translations/index.rst
index 556b050884fc..1175a47d07f0 100644
--- a/Documentation/translations/index.rst
+++ b/Documentation/translations/index.rst
@@ -8,6 +8,7 @@ Translations
    :maxdepth: 1
 
    zh_CN/index
+   zh_TW/index
    it_IT/index
    ko_KR/index
    ja_JP/index
diff --git a/Documentation/translations/it_IT/core-api/symbol-namespaces.rst b/Documentation/translations/it_IT/core-api/symbol-namespaces.rst
index aa851a57a4b0..42f5d04e38ec 100644
--- a/Documentation/translations/it_IT/core-api/symbol-namespaces.rst
+++ b/Documentation/translations/it_IT/core-api/symbol-namespaces.rst
@@ -42,15 +42,15 @@ nomi: EXPORT_SYMBOL_NS() ed EXPORT_SYMBOL_NS_GPL(). Queste macro richiedono un
 argomento aggiuntivo: lo spazio dei nomi.
 Tenete presente che per via dell'espansione delle macro questo argomento deve
 essere un simbolo di preprocessore. Per esempio per esportare il
-simbolo `usb_stor_suspend` nello spazio dei nomi `USB_STORAGE` usate::
+simbolo ``usb_stor_suspend`` nello spazio dei nomi ``USB_STORAGE`` usate::
 
 	EXPORT_SYMBOL_NS(usb_stor_suspend, USB_STORAGE);
 
 Di conseguenza, nella tabella dei simboli del kernel ci sarà una voce
-rappresentata dalla struttura `kernel_symbol` che avrà il campo
-`namespace` (spazio dei nomi) impostato. Un simbolo esportato senza uno spazio
-dei nomi avrà questo campo impostato a `NULL`. Non esiste uno spazio dei nomi
-di base. Il programma `modpost` e il codice in kernel/module.c usano lo spazio
+rappresentata dalla struttura ``kernel_symbol`` che avrà il campo
+``namespace`` (spazio dei nomi) impostato. Un simbolo esportato senza uno spazio
+dei nomi avrà questo campo impostato a ``NULL``. Non esiste uno spazio dei nomi
+di base. Il programma ``modpost`` e il codice in kernel/module.c usano lo spazio
 dei nomi, rispettivamente, durante la compilazione e durante il caricamento
 di un modulo.
 
@@ -65,7 +65,7 @@ ed EXPORT_SYMBOL_GPL() che non specificano esplicitamente uno spazio dei nomi.
 
 Ci sono molti modi per specificare questo simbolo di preprocessore e il loro
 uso dipende dalle preferenze del manutentore di un sottosistema. La prima
-possibilità è quella di definire il simbolo nel `Makefile` del sottosistema.
+possibilità è quella di definire il simbolo nel ``Makefile`` del sottosistema.
 Per esempio per esportare tutti i simboli definiti in usb-common nello spazio
 dei nomi USB_COMMON, si può aggiungere la seguente linea in
 drivers/usb/common/Makefile::
@@ -97,7 +97,7 @@ USB_STORAGE usando la seguente dichiarazione::
 
 	MODULE_IMPORT_NS(USB_STORAGE);
 
-Questo creerà un'etichetta `modinfo` per ogni spazio dei nomi
+Questo creerà un'etichetta ``modinfo`` per ogni spazio dei nomi
 importato. Un risvolto di questo fatto è che gli spazi dei
 nomi importati da un modulo possono essere ispezionati tramite
 modinfo::
@@ -116,7 +116,7 @@ mancanti.
 4. Caricare moduli che usano simboli provenienti da spazi dei nomi
 ==================================================================
 
-Quando un modulo viene caricato (per esempio usando `insmod`), il kernel
+Quando un modulo viene caricato (per esempio usando ``insmod``), il kernel
 verificherà la disponibilità di ogni simbolo usato e se lo spazio dei nomi
 che potrebbe contenerli è stato importato. Il comportamento di base del kernel
 è di rifiutarsi di caricare quei moduli che non importano tutti gli spazi dei
@@ -144,22 +144,22 @@ Lo scenario tipico di chi scrive un modulo potrebbe essere::
 
 	- scrivere codice che dipende da un simbolo appartenente ad uno spazio
 	  dei nomi non importato
-	- eseguire `make`
+	- eseguire ``make``
 	- aver notato un avviso da modpost che parla di un'importazione
 	  mancante
-	- eseguire `make nsdeps` per aggiungere import nel posto giusto
+	- eseguire ``make nsdeps`` per aggiungere import nel posto giusto
 
 Per i manutentori di sottosistemi che vogliono aggiungere uno spazio dei nomi,
-l'approccio è simile. Di nuovo, eseguendo `make nsdeps` aggiungerà le
+l'approccio è simile. Di nuovo, eseguendo ``make nsdeps`` aggiungerà le
 importazioni mancanti nei moduli inclusi nel kernel::
 
 	- spostare o aggiungere simboli ad uno spazio dei nomi (per esempio
 	  usando EXPORT_SYMBOL_NS())
-	- eseguire `make` (preferibilmente con allmodconfig per coprire tutti
+	- eseguire ``make`` (preferibilmente con allmodconfig per coprire tutti
 	  i moduli del kernel)
 	- aver notato un avviso da modpost che parla di un'importazione
 	  mancante
-	- eseguire `make nsdeps` per aggiungere import nel posto giusto
+	- eseguire ``make nsdeps`` per aggiungere import nel posto giusto
 
 Potete anche eseguire nsdeps per moduli esterni. Solitamente si usa così::
 
diff --git a/Documentation/translations/it_IT/kernel-hacking/hacking.rst b/Documentation/translations/it_IT/kernel-hacking/hacking.rst
index f6beb385b4ac..d5c521327f6a 100644
--- a/Documentation/translations/it_IT/kernel-hacking/hacking.rst
+++ b/Documentation/translations/it_IT/kernel-hacking/hacking.rst
@@ -90,7 +90,7 @@ i gestori d'interruzioni devono essere veloci: spesso si limitano
 esclusivamente a notificare la presa in carico dell'interruzione,
 programmare una 'interruzione software' per l'esecuzione e quindi terminare.
 
-Potete dire d'essere in una interruzione hardware perché :c:func:`in_irq()`
+Potete dire d'essere in una interruzione hardware perché in_hardirq()
 ritorna vero.
 
 .. warning::
@@ -634,7 +634,7 @@ Definita in ``include/linux/export.h``
 
 Questa è una variate di `EXPORT_SYMBOL()` che permette di specificare uno
 spazio dei nomi. Lo spazio dei nomi è documentato in
-:doc:`../core-api/symbol-namespaces`
+Documentation/translations/it_IT/core-api/symbol-namespaces.rst.
 
 :c:func:`EXPORT_SYMBOL_NS_GPL()`
 --------------------------------
@@ -643,7 +643,7 @@ Definita in ``include/linux/export.h``
 
 Questa è una variate di `EXPORT_SYMBOL_GPL()` che permette di specificare uno
 spazio dei nomi. Lo spazio dei nomi è documentato in
-:doc:`../core-api/symbol-namespaces`
+Documentation/translations/it_IT/core-api/symbol-namespaces.rst.
 
 Procedure e convenzioni
 =======================
diff --git a/Documentation/translations/it_IT/kernel-hacking/locking.rst b/Documentation/translations/it_IT/kernel-hacking/locking.rst
index 1e7c84def369..1efb8293bf1f 100644
--- a/Documentation/translations/it_IT/kernel-hacking/locking.rst
+++ b/Documentation/translations/it_IT/kernel-hacking/locking.rst
@@ -1459,11 +1459,11 @@ contesto utente
   che hardware.
 
 interruzione hardware
-  Richiesta di interruzione hardware. in_irq() ritorna vero in un
+  Richiesta di interruzione hardware. in_hardirq() ritorna vero in un
   gestore d'interruzioni hardware.
 
 interruzione software / softirq
-  Gestore di interruzioni software: in_irq() ritorna falso;
+  Gestore di interruzioni software: in_hardirq() ritorna falso;
   in_softirq() ritorna vero. I tasklet e le softirq sono entrambi
   considerati 'interruzioni software'.
 
diff --git a/Documentation/translations/it_IT/process/deprecated.rst b/Documentation/translations/it_IT/process/deprecated.rst
index 07c79d4bafca..987f45ee1804 100644
--- a/Documentation/translations/it_IT/process/deprecated.rst
+++ b/Documentation/translations/it_IT/process/deprecated.rst
@@ -183,9 +183,11 @@ di Linus:
   affrontare il giudizio di Linus, allora forse potrai usare "%px",
   assicurandosi anche di averne il permesso.
 
-Infine, sappi che un cambio in favore di "%p" con hash `non verrà
-accettato
-<https://lore.kernel.org/lkml/CA+55aFwieC1-nAs+NFq9RTwaR8ef9hWa4MjNBWL41F-8wM49eA@mail.gmail.com/>`_.
+Potete disabilitare temporaneamente l'hashing di "%p" nel caso in cui questa
+funzionalità vi sia d'ostacolo durante una sessione di debug. Per farlo
+aggiungete l'opzione di debug "`no_hash_pointers
+<https://git.kernel.org/linus/5ead723a20e0447bc7db33dc3070b420e5f80aa6>`_" alla
+riga di comando del kernel.
 
 Vettori a dimensione variabile (VLA)
 ------------------------------------
diff --git a/Documentation/translations/it_IT/process/stable-kernel-rules.rst b/Documentation/translations/it_IT/process/stable-kernel-rules.rst
index 283d62541c4f..83f48afe48b9 100644
--- a/Documentation/translations/it_IT/process/stable-kernel-rules.rst
+++ b/Documentation/translations/it_IT/process/stable-kernel-rules.rst
@@ -41,12 +41,6 @@ Regole sul tipo di patch che vengono o non vengono accettate nei sorgenti
 Procedura per sottomettere patch per i sorgenti -stable
 -------------------------------------------------------
 
- - Se la patch contiene modifiche a dei file nelle cartelle net/ o drivers/net,
-   allora seguite le linee guida descritte in
-   :ref:`Documentation/translations/it_IT/networking/netdev-FAQ.rst <it_netdev-FAQ>`;
-   ma solo dopo aver verificato al seguente indirizzo che la patch non sia
-   già in coda:
-   https://patchwork.kernel.org/bundle/netdev/stable/?state=*
  - Una patch di sicurezza non dovrebbero essere gestite (solamente) dal processo
    di revisione -stable, ma dovrebbe seguire le procedure descritte in
    :ref:`Documentation/translations/it_IT/admin-guide/security-bugs.rst <it_securitybugs>`.
diff --git a/Documentation/translations/it_IT/process/submitting-patches.rst b/Documentation/translations/it_IT/process/submitting-patches.rst
index ded95048b9a8..458d9d24b9c0 100644
--- a/Documentation/translations/it_IT/process/submitting-patches.rst
+++ b/Documentation/translations/it_IT/process/submitting-patches.rst
@@ -14,14 +14,15 @@ una certa familiarità col "sistema".  Questo testo è una raccolta di
 suggerimenti che aumenteranno significativamente le probabilità di vedere le
 vostre patch accettate.
 
-Questo documento contiene un vasto numero di suggerimenti concisi.  Per
-maggiori dettagli su come funziona il processo di sviluppo del kernel leggete
-:doc:`development-process`.
-Leggete anche :doc:`submit-checklist` per una lista di punti da
-verificare prima di inviare del codice.  Se state inviando un driver,
-allora leggete anche :doc:`submitting-drivers`; per delle patch
+Questo documento contiene un vasto numero di suggerimenti concisi. Per maggiori
+dettagli su come funziona il processo di sviluppo del kernel leggete
+Documentation/translations/it_IT/process/development-process.rst. Leggete anche
+Documentation/translations/it_IT/process/submit-checklist.rst per una lista di
+punti da verificare prima di inviare del codice. Se state inviando un driver,
+allora leggete anche
+Documentation/translations/it_IT/process/submitting-drivers.rst; per delle patch
 relative alle associazioni per Device Tree leggete
-:doc:`submitting-patches`.
+Documentation/translations/it_IT/process/submitting-patches.rst.
 
 Questa documentazione assume che sappiate usare ``git`` per preparare le patch.
 Se non siete pratici di ``git``, allora è bene che lo impariate;
@@ -193,7 +194,7 @@ ed integrate.
 ---------------------------------------------
 
 Controllate che la vostra patch non violi lo stile del codice, maggiori
-dettagli sono disponibili in :ref:`Documentation/translations/it_IT/process/coding-style.rst <it_codingstyle>`.
+dettagli sono disponibili in Documentation/translations/it_IT/process/coding-style.rst.
 Non farlo porta semplicemente a una perdita di tempo da parte dei revisori e
 voi vedrete la vostra patch rifiutata, probabilmente senza nemmeno essere stata
 letta.
@@ -230,13 +231,13 @@ scripts/get_maintainer.pl può esservi d'aiuto.  Se non riuscite a trovare un
 manutentore per il sottosistema su cui state lavorando, allora Andrew Morton
 (akpm@linux-foundation.org) sarà la vostra ultima possibilità.
 
-Normalmente, dovreste anche scegliere una lista di discussione a cui inviare
-la vostra serie di patch.  La lista di discussione linux-kernel@vger.kernel.org
-è proprio l'ultima spiaggia, il volume di email su questa lista fa si che
-diversi sviluppatori non la seguano.  Guardate nel file MAINTAINERS per trovare
-la lista di discussione dedicata ad un sottosistema; probabilmente lì la vostra
-patch riceverà molta più attenzione.  Tuttavia, per favore, non spammate le
-liste di discussione che non sono interessate al vostro lavoro.
+Normalmente, dovreste anche scegliere una lista di discussione a cui inviare la
+vostra serie di patch. La lista di discussione linux-kernel@vger.kernel.org
+dovrebbe essere usata per inviare tutte le patch, ma il traffico è tale per cui
+diversi sviluppatori la trascurano. Guardate nel file MAINTAINERS per trovare la
+lista di discussione dedicata ad un sottosistema; probabilmente lì la vostra
+patch riceverà molta più attenzione. Tuttavia, per favore, non spammate le liste
+di discussione che non sono interessate al vostro lavoro.
 
 Molte delle liste di discussione relative al kernel vengono ospitate su
 vger.kernel.org; potete trovare un loro elenco alla pagina
@@ -257,7 +258,7 @@ embargo potrebbe essere preso in considerazione per dare il tempo alle
 distribuzioni di prendere la patch e renderla disponibile ai loro utenti;
 in questo caso, ovviamente, la patch non dovrebbe essere inviata su alcuna
 lista di discussione pubblica. Leggete anche
-:doc:`/admin-guide/security-bugs`.
+Documentation/admin-guide/security-bugs.rst.
 
 Patch che correggono bachi importanti su un kernel già rilasciato, dovrebbero
 essere inviate ai manutentori dei kernel stabili aggiungendo la seguente riga::
@@ -266,12 +267,7 @@ essere inviate ai manutentori dei kernel stabili aggiungendo la seguente riga::
 
 nella vostra patch, nell'area dedicata alle firme (notate, NON come destinatario
 delle e-mail).  In aggiunta a questo file, dovreste leggere anche
-:ref:`Documentation/translations/it_IT/process/stable-kernel-rules.rst <it_stable_kernel_rules>`
-
-Tuttavia, notate, che alcuni manutentori di sottosistema preferiscono avere
-l'ultima parola su quali patch dovrebbero essere aggiunte ai kernel stabili.
-La rete di manutentori, in particolare, non vorrebbe vedere i singoli
-sviluppatori aggiungere alle loro patch delle righe come quella sopracitata.
+Documentation/translations/it_IT/process/stable-kernel-rules.rst.
 
 Se le modifiche hanno effetti sull'interfaccia con lo spazio utente, per favore
 inviate una patch per le pagine man ai manutentori di suddette pagine (elencati
@@ -330,7 +326,7 @@ così la possibilità che il vostro allegato-MIME venga accettato.
 Eccezione: se il vostro servizio di posta storpia le patch, allora qualcuno
 potrebbe chiedervi di rinviarle come allegato MIME.
 
-Leggete :doc:`/translations/it_IT/process/email-clients`
+Leggete Documentation/translations/it_IT/process/email-clients.rst
 per dei suggerimenti sulla configurazione del programmi di posta elettronica
 per l'invio di patch intatte.
 
@@ -351,7 +347,7 @@ richiede molto tempo, e a volte i revisori diventano burberi.  Tuttavia, anche
 in questo caso, rispondete con educazione e concentratevi sul problema che
 hanno evidenziato.
 
-Leggete :doc:`/translations/it_IT/process/email-clients` per
+Leggete Documentation/translations/it_IT/process/email-clients.rst per
 le raccomandazioni sui programmi di posta elettronica e l'etichetta da usare
 sulle liste di discussione.
 
@@ -369,6 +365,16 @@ aver inviato le patch correttamente.  Aspettate almeno una settimana prima di
 rinviare le modifiche o sollecitare i revisori - probabilmente anche di più
 durante la finestra d'integrazione.
 
+Potete anche rinviare la patch, o la serie di patch, dopo un paio di settimane
+aggiungendo la parola "RESEND" nel titolo::
+
+    [PATCH Vx RESEND] sub/sys: Condensed patch summary
+
+Ma non aggiungete "RESEND" quando state sottomettendo una versione modificata
+della vostra patch, o serie di patch - "RESEND" si applica solo alla
+sottomissione di patch, o serie di patch, che non hanno subito modifiche
+dall'ultima volta che sono state inviate.
+
 Aggiungete PATCH nell'oggetto
 -----------------------------
 
@@ -795,8 +801,7 @@ Greg Kroah-Hartman, "Come scocciare un manutentore di un sottosistema"
 No!!!! Basta gigantesche bombe patch alle persone sulla lista linux-kernel@vger.kernel.org!
   <https://lore.kernel.org/r/20050711.125305.08322243.davem@davemloft.net>
 
-Kernel Documentation/translations/it_IT/process/coding-style.rst:
-  :ref:`Documentation/translations/it_IT/process/coding-style.rst <it_codingstyle>`
+Kernel Documentation/translations/it_IT/process/coding-style.rst.
 
 E-mail di Linus Torvalds sul formato canonico di una patch:
   <https://lore.kernel.org/r/Pine.LNX.4.58.0504071023190.28951@ppc970.osdl.org>
diff --git a/Documentation/translations/ja_JP/howto.rst b/Documentation/translations/ja_JP/howto.rst
index 73ebdab4ced7..d667f9d8a02a 100644
--- a/Documentation/translations/ja_JP/howto.rst
+++ b/Documentation/translations/ja_JP/howto.rst
@@ -1,3 +1,7 @@
+.. raw:: latex
+
+	\kerneldocCJKoff
+
 NOTE:
 This is a version of Documentation/process/howto.rst translated into Japanese.
 This document is maintained by Tsugikazu Shibata <tshibata@ab.jp.nec.com>
@@ -11,6 +15,10 @@ try to update the original English file first.
 
 ----------------------------------
 
+.. raw:: latex
+
+	\kerneldocCJKon
+
 この文書は、
 Documentation/process/howto.rst
 の和訳です。
diff --git a/Documentation/translations/ja_JP/index.rst b/Documentation/translations/ja_JP/index.rst
index f94ba62d41c3..88d4d98eed15 100644
--- a/Documentation/translations/ja_JP/index.rst
+++ b/Documentation/translations/ja_JP/index.rst
@@ -3,6 +3,7 @@
 	\renewcommand\thesection*
 	\renewcommand\thesubsection*
 	\kerneldocCJKon
+	\kerneldocBeginJP
 
 Japanese translations
 =====================
@@ -11,3 +12,7 @@ Japanese translations
    :maxdepth: 1
 
    howto
+
+.. raw:: latex
+
+	\kerneldocEndJP
diff --git a/Documentation/translations/ko_KR/howto.rst b/Documentation/translations/ko_KR/howto.rst
index a2bdd564c907..e3cdf0c84892 100644
--- a/Documentation/translations/ko_KR/howto.rst
+++ b/Documentation/translations/ko_KR/howto.rst
@@ -1,3 +1,7 @@
+.. raw:: latex
+
+	\kerneldocCJKoff
+
 NOTE:
 This is a version of Documentation/process/howto.rst translated into korean
 This document is maintained by Minchan Kim <minchan@kernel.org>
@@ -11,6 +15,10 @@ try to update the original English file first.
 
 ----------------------------------
 
+.. raw:: latex
+
+	\kerneldocCJKon
+
 이 문서는
 Documentation/process/howto.rst
 의 한글 번역입니다.
diff --git a/Documentation/translations/ko_KR/index.rst b/Documentation/translations/ko_KR/index.rst
index 6ae258118bdf..f636b482fb4c 100644
--- a/Documentation/translations/ko_KR/index.rst
+++ b/Documentation/translations/ko_KR/index.rst
@@ -3,6 +3,7 @@
 	\renewcommand\thesection*
 	\renewcommand\thesubsection*
 	\kerneldocCJKon
+	\kerneldocBeginKR
 
 한국어 번역
 ===========
@@ -26,3 +27,4 @@
 .. raw:: latex
 
     \normalsize
+    \kerneldocEndKR
diff --git a/Documentation/translations/zh_CN/accounting/index.rst b/Documentation/translations/zh_CN/accounting/index.rst
new file mode 100644
index 000000000000..362e907b41f9
--- /dev/null
+++ b/Documentation/translations/zh_CN/accounting/index.rst
@@ -0,0 +1,25 @@
+.. SPDX-License-Identifier: GPL-2.0
+
+.. include:: ../disclaimer-zh_CN.rst
+
+:Original: Documentation/accounting/index.rst
+:Translator: Yang Yang <yang.yang29@zte.com.cn>
+
+.. _cn_accounting_index.rst:
+
+
+====
+计数
+====
+
+.. toctree::
+   :maxdepth: 1
+
+   psi
+
+Todolist:
+
+   cgroupstats
+   delay-accounting
+   taskstats
+   taskstats-struct
diff --git a/Documentation/translations/zh_CN/accounting/psi.rst b/Documentation/translations/zh_CN/accounting/psi.rst
new file mode 100644
index 000000000000..a0ddb7bd257c
--- /dev/null
+++ b/Documentation/translations/zh_CN/accounting/psi.rst
@@ -0,0 +1,155 @@
+.. include:: ../disclaimer-zh_CN.rst
+
+:Original: Documentation/accounting/psi.rst
+:Translator: Yang Yang <yang.yang29@zte.com.cn>
+
+.. _cn_psi.rst:
+
+
+=================
+PSI——压力阻塞信息
+=================
+
+:日期: April, 2018
+:作者: Johannes Weiner <hannes@cmpxchg.org>
+
+当CPU、memory或IO设备处于竞争状态，业务负载会遭受时延毛刺、吞吐量降低，
+及面临OOM的风险。
+
+如果没有一种准确的方法度量系统竞争程度，则有两种后果：一种是用户过于节制，
+未充分利用系统资源；另一种是过度使用，经常性面临业务中断的风险。
+
+psi特性能够识别和量化资源竞争导致的业务中断，及其对复杂负载乃至整个系统在
+时间上的影响。
+
+准确度量因资源不足造成的生产力损失，有助于用户基于硬件调整业务负载，或基
+于业务负载配置硬件。
+
+psi能够实时的提供相关信息，因此系统可基于psi实现动态的负载管理。如实施
+卸载、迁移、策略性的停止或杀死低优先级或可重启的批处理任务。
+
+psi帮助用户实现硬件资源利用率的最大化。同时无需牺牲业务负载健康度，也无需
+面临OOM等造成业务中断的风险。
+
+压力接口
+========
+
+压力信息可通过/proc/pressure/ --cpu、memory、io文件分别获取。
+
+CPU相关信息格式如下：
+
+        some avg10=0.00 avg60=0.00 avg300=0.00 total=0
+
+内存和IO相关信息如下：
+
+        some avg10=0.00 avg60=0.00 avg300=0.00 total=0
+        full avg10=0.00 avg60=0.00 avg300=0.00 total=0
+
+some行代表至少有一个任务阻塞于特定资源的时间占比。
+
+full行代表所有非idle任务同时阻塞于特定资源的时间占比。在这种状态下CPU资源
+完全被浪费，相对于正常运行，业务负载由于耗费更多时间等待而受到严重影响。
+
+由于此情况严重影响系统性能，因此清楚的识别本情况并与some行所代表的情况区分开，
+将有助于分析及提升系统性能。这就是full独立于some行的原因。
+
+avg代表阻塞时间占比（百分比），为最近10秒、60秒、300秒内的均值。这样我们
+既可观察到短期事件的影响，也可看到中等及长时间内的趋势。total代表总阻塞
+时间（单位微秒），可用于观察时延毛刺，这种毛刺可能在均值中无法体现。
+
+监控压力门限
+============
+
+用户可注册触发器，通过poll()监控资源压力是否超过门限。
+
+触发器定义：指定时间窗口期内累积阻塞时间的最大值。比如可定义500ms内积累
+100ms阻塞，即触发一次唤醒事件。
+
+触发器注册方法：用户打开代表特定资源的psi接口文件，写入门限、时间窗口的值。
+所打开的文件描述符用于等待事件，可使用select()、poll()、epoll()。
+写入信息的格式如下：
+
+        <some|full> <stall amount in us> <time window in us>
+
+示例：向/proc/pressure/memory写入"some 150000 1000000"将新增触发器，将在
+1秒内至少一个任务阻塞于内存的总时间超过150ms时触发。向/proc/pressure/io写入
+"full 50000 1000000"将新增触发器，将在1秒内所有任务都阻塞于io的总时间超过50ms时触发。
+
+触发器可针对多个psi度量值设置，同一个psi度量值可设置多个触发器。每个触发器需要
+单独的文件描述符用于轮询，以区分于其他触发器。所以即使对于同一个psi接口文件，
+每个触发器也需要单独的调用open()。
+
+监控器在被监控资源进入阻塞状态时启动，在系统退出阻塞状态后停用。系统进入阻塞
+状态后，监控psi增长的频率为每监控窗口刷新10次。
+
+内核接受的窗口为500ms~10s，所以监控间隔为50ms~1s。设置窗口下限目的是为了
+防止过于频繁的轮询。设置窗口上限的目的是因为窗口过长则无意义，此时查看
+psi接口提供的均值即可。
+
+监控器在激活后，至少在跟踪窗口期间将保持活动状态。以避免随着系统进入和退出
+阻塞状态，监控器过于频繁的进入和退出活动状态。
+
+用户态通知在监控窗口内会受到速率限制。当对应的文件描述符关闭，触发器会自动注销。
+
+用户态监控器使用示例
+====================
+
+::
+
+  #include <errno.h>
+  #include <fcntl.h>
+  #include <stdio.h>
+  #include <poll.h>
+  #include <string.h>
+  #include <unistd.h>
+
+  /* 监控内存部分阻塞，监控时间窗口为1秒、阻塞门限为150毫秒。*/
+  int main() {
+        const char trig[] = "some 150000 1000000";
+        struct pollfd fds;
+        int n;
+
+        fds.fd = open("/proc/pressure/memory", O_RDWR | O_NONBLOCK);
+        if (fds.fd < 0) {
+                printf("/proc/pressure/memory open error: %s\n",
+                        strerror(errno));
+                return 1;
+        }
+        fds.events = POLLPRI;
+
+        if (write(fds.fd, trig, strlen(trig) + 1) < 0) {
+                printf("/proc/pressure/memory write error: %s\n",
+                        strerror(errno));
+                return 1;
+        }
+
+        printf("waiting for events...\n");
+        while (1) {
+                n = poll(&fds, 1, -1);
+                if (n < 0) {
+                        printf("poll error: %s\n", strerror(errno));
+                        return 1;
+                }
+                if (fds.revents & POLLERR) {
+                        printf("got POLLERR, event source is gone\n");
+                        return 0;
+                }
+                if (fds.revents & POLLPRI) {
+                        printf("event triggered!\n");
+                } else {
+                        printf("unknown event received: 0x%x\n", fds.revents);
+                        return 1;
+                }
+        }
+
+        return 0;
+  }
+
+Cgroup2接口
+===========
+
+对于CONFIG_CGROUP=y及挂载了cgroup2文件系统的系统，能够获取cgroups内任务的psi。
+此场景下cgroupfs挂载点的子目录包含cpu.pressure、memory.pressure、io.pressure文件，
+内容格式与/proc/pressure/下的文件相同。
+
+可设置基于cgroup的psi监控器，方法与系统级psi监控器相同。
diff --git a/Documentation/translations/zh_CN/core-api/cachetlb.rst b/Documentation/translations/zh_CN/core-api/cachetlb.rst
index 8376485a534d..6fee45fe5e80 100644
--- a/Documentation/translations/zh_CN/core-api/cachetlb.rst
+++ b/Documentation/translations/zh_CN/core-api/cachetlb.rst
@@ -80,7 +80,7 @@ cpu上对这个地址空间进行刷新。
 5) ``void update_mmu_cache(struct vm_area_struct *vma,
    unsigned long address, pte_t *ptep)``
 
-	在每个页面故障结束时，这个程序被调用，以告诉体系结构特定的代码，在
+	在每个缺页异常结束时，这个程序被调用，以告诉体系结构特定的代码，在
 	软件页表中，在地址空间“vma->vm_mm”的虚拟地址“地址”处，现在存在
 	一个翻译。
 
@@ -298,15 +298,6 @@ HyperSparc cpu就是这样一个具有这种属性的cpu。
 	用。默认的实现是nop（对于所有相干的架构应该保持这样）。对于不一致性
 	的架构，它应该刷新vmaddr处的页面缓存。
 
-  ``void flush_kernel_dcache_page(struct page *page)``
-
-	当内核需要修改一个用kmap获得的用户页时，它会在所有修改完成后（但在
-	kunmapping之前）调用这个函数，以使底层页面达到最新状态。这里假定用
-	户没有不一致性的缓存副本（即原始页面是从类似get_user_pages()的机制
-	中获得的）。默认的实现是一个nop，在所有相干的架构上都应该如此。在不
-	一致性的架构上，这应该刷新内核缓存中的页面（使用page_address(page)）。
-
-
   ``void flush_icache_range(unsigned long start, unsigned long end)``
 
 	当内核存储到它将执行的地址中时（例如在加载模块时），这个函数被调用。
diff --git a/Documentation/translations/zh_CN/core-api/cpu_hotplug.rst b/Documentation/translations/zh_CN/core-api/cpu_hotplug.rst
new file mode 100644
index 000000000000..85a264287426
--- /dev/null
+++ b/Documentation/translations/zh_CN/core-api/cpu_hotplug.rst
@@ -0,0 +1,348 @@
+.. include:: ../disclaimer-zh_CN.rst
+
+:Original: Documentation/core-api/cpu_hotplug.rst
+:翻译:
+
+ 司延腾 Yanteng Si <siyanteng@loongson.cn>
+
+:校译:
+
+ 吴想成 Wu XiangCheng <bobwxc@email.cn>
+
+.. _cn_core_api_cpu_hotplug:
+
+=================
+内核中的CPU热拔插
+=================
+
+:时间: 2016年12月
+:作者: Sebastian Andrzej Siewior <bigeasy@linutronix.de>,
+          Rusty Russell <rusty@rustcorp.com.au>,
+          Srivatsa Vaddagiri <vatsa@in.ibm.com>,
+          Ashok Raj <ashok.raj@intel.com>,
+          Joel Schopp <jschopp@austin.ibm.com>
+
+简介
+====
+
+现代系统架构的演进已经在处理器中引入了先进的错误报告和纠正能力。有一些OEM也支
+持可热拔插的NUMA（Non Uniform Memory Access，非统一内存访问）硬件,其中物理
+节点的插入和移除需要支持CPU热插拔。
+
+这样的进步要求内核可用的CPU被移除，要么是出于配置的原因，要么是出于RAS的目的，
+以保持一个不需要的CPU不在系统执行路径。因此需要在Linux内核中支持CPU热拔插。
+
+CPU热拔插支持的一个更新颖的用途是它在SMP的暂停恢复支持中的应用。双核和超线程支
+持使得即使是笔记本电脑也能运行不支持这些方法的SMP内核。
+
+
+命令行开关
+==========
+
+``maxcpus=n``
+  限制启动时的CPU为 *n* 个。例如，如果你有四个CPU，使用 ``maxcpus=2`` 将只能启
+  动两个。你可以选择稍后让其他CPU上线。
+
+``nr_cpus=n``
+  限制内核将支持的CPU总量。如果这里提供的数量低于实际可用的CPU数量，那么其他CPU
+  以后就不能上线了。
+
+``additional_cpus=n``
+  使用它来限制可热插拔的CPU。该选项设置
+  ``cpu_possible_mask = cpu_present_mask + additional_cpus``
+
+  这个选项只限于IA64架构。
+
+``possible_cpus=n``
+  这个选项设置 ``cpu_possible_mask`` 中的 ``possible_cpus`` 位。
+
+  这个选项只限于X86和S390架构。
+
+``cpu0_hotplug``
+  允许关闭CPU0。
+
+  这个选项只限于X86架构。
+
+CPU位图
+=======
+
+``cpu_possible_mask``
+  系统中可能可用CPU的位图。这是用来为per_cpu变量分配一些启动时的内存，这些变量
+  不会随着CPU的可用或移除而增加/减少。一旦在启动时的发现阶段被设置，该映射就是静态
+  的，也就是说，任何时候都不会增加或删除任何位。根据你的系统需求提前准确地调整它
+  可以节省一些启动时的内存。
+
+``cpu_online_mask``
+  当前在线的所有CPU的位图。在一个CPU可用于内核调度并准备接收设备的中断后，它被
+  设置在 ``__cpu_up()`` 中。当使用 ``__cpu_disable()`` 关闭一个CPU时，它被清
+  空，在此之前，所有的操作系统服务包括中断都被迁移到另一个目标CPU。
+
+``cpu_present_mask``
+  系统中当前存在的CPU的位图。它们并非全部在线。当物理热拔插被相关的子系统
+  （如ACPI）处理时，可以改变和添加新的位或从位图中删除，这取决于事件是
+  hot-add/hot-remove。目前还没有定死规定。典型的用法是在启动时启动拓扑结构，这时
+  热插拔被禁用。
+
+你真的不需要操作任何系统的CPU映射。在大多数情况下，它们应该是只读的。当设置每个
+CPU资源时，几乎总是使用 ``cpu_possible_mask`` 或 ``for_each_possible_cpu()``
+来进行迭代。宏 ``for_each_cpu()`` 可以用来迭代一个自定义的CPU掩码。
+
+不要使用 ``cpumask_t`` 以外的任何东西来表示CPU的位图。
+
+
+使用CPU热拔插
+=============
+
+内核选项 *CONFIG_HOTPLUG_CPU* 需要被启用。它目前可用于多种架构，包括ARM、MIPS、
+PowerPC和X86。配置是通过sysfs接口完成的::
+
+ $ ls -lh /sys/devices/system/cpu
+ total 0
+ drwxr-xr-x  9 root root    0 Dec 21 16:33 cpu0
+ drwxr-xr-x  9 root root    0 Dec 21 16:33 cpu1
+ drwxr-xr-x  9 root root    0 Dec 21 16:33 cpu2
+ drwxr-xr-x  9 root root    0 Dec 21 16:33 cpu3
+ drwxr-xr-x  9 root root    0 Dec 21 16:33 cpu4
+ drwxr-xr-x  9 root root    0 Dec 21 16:33 cpu5
+ drwxr-xr-x  9 root root    0 Dec 21 16:33 cpu6
+ drwxr-xr-x  9 root root    0 Dec 21 16:33 cpu7
+ drwxr-xr-x  2 root root    0 Dec 21 16:33 hotplug
+ -r--r--r--  1 root root 4.0K Dec 21 16:33 offline
+ -r--r--r--  1 root root 4.0K Dec 21 16:33 online
+ -r--r--r--  1 root root 4.0K Dec 21 16:33 possible
+ -r--r--r--  1 root root 4.0K Dec 21 16:33 present
+
+文件 *offline* 、 *online* 、*possible* 、*present* 代表CPU掩码。每个CPU文件
+夹包含一个 *online* 文件，控制逻辑上的开（1）和关（0）状态。要在逻辑上关闭CPU4::
+
+ $ echo 0 > /sys/devices/system/cpu/cpu4/online
+  smpboot: CPU 4 is now offline
+
+一旦CPU被关闭，它将从 */proc/interrupts* 、*/proc/cpuinfo* 中被删除，也不应该
+被 *top* 命令显示出来。要让CPU4重新上线::
+
+ $ echo 1 > /sys/devices/system/cpu/cpu4/online
+ smpboot: Booting Node 0 Processor 4 APIC 0x1
+
+CPU又可以使用了。这应该对所有的CPU都有效。CPU0通常比较特殊，被排除在CPU热拔插之外。
+在X86上，内核选项 *CONFIG_BOOTPARAM_HOTPLUG_CPU0* 必须被启用，以便能够关闭CPU0。
+或者，可以使用内核命令选项 *cpu0_hotplug* 。CPU0的一些已知的依赖性:
+
+* 从休眠/暂停中恢复。如果CPU0处于离线状态，休眠/暂停将失败。
+* PIC中断。如果检测到PIC中断，CPU0就不能被移除。
+
+如果你发现CPU0上有任何依赖性，请告知Fenghua Yu <fenghua.yu@intel.com>。
+
+CPU的热拔插协作
+===============
+
+下线情况
+--------
+
+一旦CPU被逻辑关闭，注册的热插拔状态的清除回调将被调用，从 ``CPUHP_ONLINE`` 开始，在
+``CPUHP_OFFLINE`` 状态结束。这包括:
+
+* 如果任务因暂停操作而被冻结，那么 *cpuhp_tasks_frozen* 将被设置为true。
+
+* 所有进程都会从这个将要离线的CPU迁移到新的CPU上。新的CPU是从每个进程的当前cpuset中
+  选择的，它可能是所有在线CPU的一个子集。
+
+* 所有针对这个CPU的中断都被迁移到新的CPU上。
+
+* 计时器也会被迁移到新的CPU上。
+
+* 一旦所有的服务被迁移，内核会调用一个特定的例程 ``__cpu_disable()`` 来进行特定的清
+  理。
+
+使用热插拔API
+-------------
+
+一旦一个CPU下线或上线，就有可能收到通知。这对某些需要根据可用CPU数量执行某种设置或清
+理功能的驱动程序来说可能很重要::
+
+  #include <linux/cpuhotplug.h>
+
+  ret = cpuhp_setup_state(CPUHP_AP_ONLINE_DYN, "X/Y:online",
+                          Y_online, Y_prepare_down);
+
+*X* 是子系统， *Y* 是特定的驱动程序。 *Y_online* 回调将在所有在线CPU的注册过程中被调用。
+如果在线回调期间发生错误， *Y_prepare_down*  回调将在所有之前调用过在线回调的CPU上调
+用。注册完成后，一旦有CPU上线， *Y_online* 回调将被调用，当CPU关闭时， *Y_prepare_down*
+将被调用。所有之前在 *Y_online* 中分配的资源都应该在 *Y_prepare_down* 中释放。如果在
+注册过程中发生错误，返回值 *ret* 为负值。否则会返回一个正值，其中包含动态分配状态
+（ *CPUHP_AP_ONLINE_DYN* ）的分配热拔插。对于预定义的状态，它将返回0。
+
+该回调可以通过调用 ``cpuhp_remove_state()`` 来删除。如果是动态分配的状态
+（ *CPUHP_AP_ONLINE_DYN* ），则使用返回的状态。在移除热插拔状态的过程中，将调用拆解回调。
+
+多个实例
+~~~~~~~~
+
+如果一个驱动程序有多个实例，并且每个实例都需要独立执行回调，那么很可能应该使用
+``multi-state`` 。首先需要注册一个多状态的状态::
+
+  ret = cpuhp_setup_state_multi(CPUHP_AP_ONLINE_DYN, "X/Y:online,
+                                Y_online, Y_prepare_down);
+  Y_hp_online = ret;
+
+``cpuhp_setup_state_multi()`` 的行为与 ``cpuhp_setup_state()`` 类似，只是它
+为多状态准备了回调，但不调用回调。这是一个一次性的设置。
+一旦分配了一个新的实例，你需要注册这个新实例::
+
+  ret = cpuhp_state_add_instance(Y_hp_online, &d->node);
+
+这个函数将把这个实例添加到你先前分配的 ``Y_hp_online`` 状态，并在所有在线的
+CPU上调用先前注册的回调（ ``Y_online`` ）。 *node* 元素是你的每个实例数据结构
+中的一个 ``struct hlist_node`` 成员。
+
+在移除该实例时::
+
+  cpuhp_state_remove_instance(Y_hp_online, &d->node)
+
+应该被调用，这将在所有在线CPU上调用拆分回调。
+
+手动设置
+~~~~~~~~
+
+通常情况下，在注册或移除状态时调用setup和teamdown回调是很方便的，因为通常在CPU上线
+（下线）和驱动的初始设置（关闭）时需要执行该操作。然而，每个注册和删除功能也有一个
+_nocalls的后缀，如果不希望调用回调，则不调用所提供的回调。在手动设置（或关闭）期间，
+应该使用 ``get_online_cpus()`` 和 ``put_online_cpus()`` 函数来抑制CPU热插拔操作。
+
+
+事件的顺序
+----------
+
+热插拔状态被定义在 ``include/linux/cpuhotplug.h``:
+
+* ``CPUHP_OFFLINE`` ... ``CPUHP_AP_OFFLINE`` 状态是在CPU启动前调用的。
+
+* ``CPUHP_AP_OFFLINE`` ... ``CPUHP_AP_ONLINE`` 状态是在CPU被启动后被调用的。
+  中断是关闭的，调度程序还没有在这个CPU上活动。从 ``CPUHP_AP_OFFLINE`` 开始，
+  回调被调用到目标CPU上。
+
+* ``CPUHP_AP_ONLINE_DYN`` 和 ``CPUHP_AP_ONLINE_DYN_END`` 之间的状态被保留
+  给动态分配。
+
+* 这些状态在CPU关闭时以相反的顺序调用，从 ``CPUHP_ONLINE`` 开始，在 ``CPUHP_OFFLINE``
+  停止。这里的回调是在将被关闭的CPU上调用的，直到 ``CPUHP_AP_OFFLINE`` 。
+
+通过 ``CPUHP_AP_ONLINE_DYN`` 动态分配的状态通常已经足够了。然而，如果在启动或关闭
+期间需要更早的调用，那么应该获得一个显式状态。如果热拔插事件需要相对于另一个热拔插事
+件的特定排序，也可能需要一个显式状态。
+
+测试热拔插状态
+==============
+
+验证自定义状态是否按预期工作的一个方法是关闭一个CPU，然后再把它上线。也可以把CPU放到某
+些状态（例如 ``CPUHP_AP_ONLINE`` ），然后再回到 ``CPUHP_ONLINE`` 。这将模拟在
+``CPUHP_AP_ONLINE`` 之后的一个状态出现错误，从而导致回滚到在线状态。
+
+所有注册的状态都被列举在 ``/sys/devices/system/cpu/hotplug/states`` ::
+
+ $ tail /sys/devices/system/cpu/hotplug/states
+ 138: mm/vmscan:online
+ 139: mm/vmstat:online
+ 140: lib/percpu_cnt:online
+ 141: acpi/cpu-drv:online
+ 142: base/cacheinfo:online
+ 143: virtio/net:online
+ 144: x86/mce:online
+ 145: printk:online
+ 168: sched:active
+ 169: online
+
+要将CPU4回滚到 ``lib/percpu_cnt:online`` ，再回到在线状态，只需发出::
+
+  $ cat /sys/devices/system/cpu/cpu4/hotplug/state
+  169
+  $ echo 140 > /sys/devices/system/cpu/cpu4/hotplug/target
+  $ cat /sys/devices/system/cpu/cpu4/hotplug/state
+  140
+
+需要注意的是，状态140的清除回调已经被调用。现在重新上线::
+
+  $ echo 169 > /sys/devices/system/cpu/cpu4/hotplug/target
+  $ cat /sys/devices/system/cpu/cpu4/hotplug/state
+  169
+
+启用追踪事件后，单个步骤也是可见的::
+
+  #  TASK-PID   CPU#    TIMESTAMP  FUNCTION
+  #     | |       |        |         |
+      bash-394  [001]  22.976: cpuhp_enter: cpu: 0004 target: 140 step: 169 (cpuhp_kick_ap_work)
+   cpuhp/4-31   [004]  22.977: cpuhp_enter: cpu: 0004 target: 140 step: 168 (sched_cpu_deactivate)
+   cpuhp/4-31   [004]  22.990: cpuhp_exit:  cpu: 0004  state: 168 step: 168 ret: 0
+   cpuhp/4-31   [004]  22.991: cpuhp_enter: cpu: 0004 target: 140 step: 144 (mce_cpu_pre_down)
+   cpuhp/4-31   [004]  22.992: cpuhp_exit:  cpu: 0004  state: 144 step: 144 ret: 0
+   cpuhp/4-31   [004]  22.993: cpuhp_multi_enter: cpu: 0004 target: 140 step: 143 (virtnet_cpu_down_prep)
+   cpuhp/4-31   [004]  22.994: cpuhp_exit:  cpu: 0004  state: 143 step: 143 ret: 0
+   cpuhp/4-31   [004]  22.995: cpuhp_enter: cpu: 0004 target: 140 step: 142 (cacheinfo_cpu_pre_down)
+   cpuhp/4-31   [004]  22.996: cpuhp_exit:  cpu: 0004  state: 142 step: 142 ret: 0
+      bash-394  [001]  22.997: cpuhp_exit:  cpu: 0004  state: 140 step: 169 ret: 0
+      bash-394  [005]  95.540: cpuhp_enter: cpu: 0004 target: 169 step: 140 (cpuhp_kick_ap_work)
+   cpuhp/4-31   [004]  95.541: cpuhp_enter: cpu: 0004 target: 169 step: 141 (acpi_soft_cpu_online)
+   cpuhp/4-31   [004]  95.542: cpuhp_exit:  cpu: 0004  state: 141 step: 141 ret: 0
+   cpuhp/4-31   [004]  95.543: cpuhp_enter: cpu: 0004 target: 169 step: 142 (cacheinfo_cpu_online)
+   cpuhp/4-31   [004]  95.544: cpuhp_exit:  cpu: 0004  state: 142 step: 142 ret: 0
+   cpuhp/4-31   [004]  95.545: cpuhp_multi_enter: cpu: 0004 target: 169 step: 143 (virtnet_cpu_online)
+   cpuhp/4-31   [004]  95.546: cpuhp_exit:  cpu: 0004  state: 143 step: 143 ret: 0
+   cpuhp/4-31   [004]  95.547: cpuhp_enter: cpu: 0004 target: 169 step: 144 (mce_cpu_online)
+   cpuhp/4-31   [004]  95.548: cpuhp_exit:  cpu: 0004  state: 144 step: 144 ret: 0
+   cpuhp/4-31   [004]  95.549: cpuhp_enter: cpu: 0004 target: 169 step: 145 (console_cpu_notify)
+   cpuhp/4-31   [004]  95.550: cpuhp_exit:  cpu: 0004  state: 145 step: 145 ret: 0
+   cpuhp/4-31   [004]  95.551: cpuhp_enter: cpu: 0004 target: 169 step: 168 (sched_cpu_activate)
+   cpuhp/4-31   [004]  95.552: cpuhp_exit:  cpu: 0004  state: 168 step: 168 ret: 0
+      bash-394  [005]  95.553: cpuhp_exit:  cpu: 0004  state: 169 step: 140 ret: 0
+
+可以看到，CPU4一直下降到时间戳22.996，然后又上升到95.552。所有被调用的回调，
+包括它们的返回代码都可以在跟踪中看到。
+
+架构的要求
+==========
+
+需要具备以下功能和配置：
+
+``CONFIG_HOTPLUG_CPU``
+  这个配置项需要在Kconfig中启用
+
+``__cpu_up()``
+  调出一个cpu的架构接口
+
+``__cpu_disable()``
+  关闭CPU的架构接口，在此程序返回后，内核不能再处理任何中断。这包括定时器的关闭。
+
+``__cpu_die()``
+  这实际上是为了确保CPU的死亡。实际上，看看其他架构中实现CPU热拔插的一些示例代
+  码。对于那个特定的架构，处理器被从 ``idle()`` 循环中拿下来。 ``__cpu_die()``
+  通常会等待一些per_cpu状态的设置，以确保处理器的死亡例程被调用来保持活跃。
+
+用户空间通知
+============
+
+在CPU成功上线或下线后，udev事件被发送。一个udev规则，比如::
+
+  SUBSYSTEM=="cpu", DRIVERS=="processor", DEVPATH=="/devices/system/cpu/*", RUN+="the_hotplug_receiver.sh"
+
+将接收所有事件。一个像这样的脚本::
+
+  #!/bin/sh
+
+  if [ "${ACTION}" = "offline" ]
+  then
+      echo "CPU ${DEVPATH##*/} offline"
+
+  elif [ "${ACTION}" = "online" ]
+  then
+      echo "CPU ${DEVPATH##*/} online"
+
+  fi
+
+可以进一步处理该事件。
+
+内核内联文档参考
+================
+
+该API在以下内核代码中:
+
+include/linux/cpuhotplug.h
diff --git a/Documentation/translations/zh_CN/core-api/genericirq.rst b/Documentation/translations/zh_CN/core-api/genericirq.rst
new file mode 100644
index 000000000000..05ccb954c18d
--- /dev/null
+++ b/Documentation/translations/zh_CN/core-api/genericirq.rst
@@ -0,0 +1,409 @@
+.. include:: ../disclaimer-zh_CN.rst
+
+:Original: Documentation/core-api/genericirq.rst
+
+:翻译:
+
+ 司延腾 Yanteng Si <siyanteng@loongson.cn>
+
+:校译:
+
+ 吴想成 Wu XiangCheng <bobwxc@email.cn>
+
+.. include:: <isonum.txt>
+
+.. _cn_core-api_genericirq:
+
+================
+Linux通用IRQ处理
+================
+
+:版权: |copy| 2005-2010: Thomas Gleixner
+:版权: |copy| 2005-2006:  Ingo Molnar
+
+简介
+====
+
+通用中断处理层是为了给设备驱动程序提供一个完整的中断处理抽象（层）。它能够处
+理所有不同类型的中断控制器硬件。设备驱动程序使用通用API函数来请求、启用、禁
+用和释放中断。驱动程序不需要知道任何关于硬件处理中断的细节，所以它们可以在不同的
+平台上使用而不需要修改代码。
+
+本文档提供给那些希望在通用IRQ处理层的帮助下实现基于其架构的中断子系统的开发
+者。
+
+理论依据
+========
+
+Linux中中断处理的原始实现使用__do_IRQ()超级处理程序，它能够处理每种类型的
+中断逻辑。
+
+最初，Russell King确定了不同类型的处理程序，以便为Linux 2.5/2.6中的ARM中
+断处理程序实现建立一个相当通用的集合。他区分了以下几种类型:
+
+-  电平触发型
+
+-  边沿触发型
+
+-  简单型
+
+在实现过程中，我们发现了另一种类型:
+
+-  响应EOI（end of interrupt）型
+
+在SMP的__do_IRQ()超级处理程序中，还需定义一种类型：
+
+-  每cpu型（针对CPU SMP）
+
+这种高层IRQ处理程序的拆分实现使我们能够为每个特定的中断类型优化中断处理的流
+程。这减少了该特定代码路径的复杂性，并允许对特定类型进行优化处理。
+
+最初的通用IRQ实现使用hw_interrupt_type结构体及其 ``->ack`` ``->end`` 等回
+调来区分超级处理程序中的流控制。这导致了流逻辑和低级硬件逻辑的混合，也导致了
+不必要的代码重复：例如i386中的 ``ioapic_level_irq`` 和 ``ioapic_edge_irq`` ，
+这两个IRQ类型共享许多低级的细节，但有不同的流处理。
+
+一个更自然的抽象是“irq流”和“芯片细节”的干净分离。
+
+分析一些架构的IRQ子系统的实现可以发现，他们中的大多数可以使用一套通用的“irq
+流”方法，只需要添加芯片级的特定代码。这种分离对于那些需要IRQ流本身而不需要芯
+片细节的特定（子）架构也很有价值——以提供了一个更透明的IRQ子系统设计。
+
+每个中断描述符都被分配给它自己的高层流程处理程序，这通常是一个通用的实现。(这
+种高层次的流程处理程序的实现也使得提供解复用处理程序变得简单，这可以在各种架
+构的嵌入式平台上找到。)
+
+这种分离使得通用中断处理层更加灵活和可扩展。例如，一个（子）架构可以使用通用
+的IRQ流实现“电平触发型”中断，并添加一个（子）架构特定的“边沿型”实现。
+
+为了使向新模型的过渡更容易，并防止破坏现有实现，__do_IRQ()超级处理程序仍然
+可用。这导致了一种暂时的双重性。随着时间的推移，新的模型应该在越来越多的架构中
+被使用，因为它能使IRQ子系统更小更干净。它已经被废弃三年了，即将被删除。
+
+已知的缺陷和假设
+================
+
+没有（但愿如此）。
+
+抽象层
+======
+
+中断代码中主要有三个抽象层次:
+
+1. 高级别的驱动API
+
+2. 高级别的IRQ流处理器
+
+3. 芯片级的硬件封装
+
+中断控制流
+----------
+
+每个中断都由一个中断描述符结构体irq_desc来描述。中断是由一个“无符号整型”的数值来
+引用的，它在描述符结构体数组中选择相应的中断描述符结构体。描述符结构体包含状态
+信息和指向中断流方法和中断芯片结构的指针，这些都是分配给这个中断的。
+
+每当中断触发时，低级架构代码通过调用desc->handle_irq()调用到通用中断代码中。
+这个高层IRQ处理函数只使用由分配的芯片描述符结构体引用的desc->irq_data.chip
+基元。
+
+高级驱动程序API
+---------------
+
+高层驱动API由以下函数组成:
+
+-  request_irq()
+
+-  request_threaded_irq()
+
+-  free_irq()
+
+-  disable_irq()
+
+-  enable_irq()
+
+-  disable_irq_nosync() (SMP only)
+
+-  synchronize_irq() (SMP only)
+
+-  irq_set_irq_type()
+
+-  irq_set_irq_wake()
+
+-  irq_set_handler_data()
+
+-  irq_set_chip()
+
+-  irq_set_chip_data()
+
+详见自动生成的函数文档。
+
+.. note::
+
+    由于文档构建流程所限，中文文档中并没有引入自动生成的函数文档，所以请读者直接
+    阅读源码注释。
+
+电平触发型IRQ流处理程序
+-----------------------
+
+通用层提供了一套预定义的irq-flow方法:
+
+-  handle_level_irq()
+
+-  handle_edge_irq()
+
+-  handle_fasteoi_irq()
+
+-  handle_simple_irq()
+
+-  handle_percpu_irq()
+
+-  handle_edge_eoi_irq()
+
+-  handle_bad_irq()
+
+中断流处理程序（无论是预定义的还是架构特定的）由架构在启动期间或设备初始化期间分配给
+特定中断。
+
+默认流实现
+~~~~~~~~~~
+
+辅助函数
+^^^^^^^^
+
+辅助函数调用芯片基元，并被默认流实现所使用。以下是实现的辅助函数（简化摘录）::
+
+    default_enable(struct irq_data *data)
+    {
+        desc->irq_data.chip->irq_unmask(data);
+    }
+
+    default_disable(struct irq_data *data)
+    {
+        if (!delay_disable(data))
+            desc->irq_data.chip->irq_mask(data);
+    }
+
+    default_ack(struct irq_data *data)
+    {
+        chip->irq_ack(data);
+    }
+
+    default_mask_ack(struct irq_data *data)
+    {
+        if (chip->irq_mask_ack) {
+            chip->irq_mask_ack(data);
+        } else {
+            chip->irq_mask(data);
+            chip->irq_ack(data);
+        }
+    }
+
+    noop(struct irq_data *data))
+    {
+    }
+
+
+
+默认流处理程序的实现
+~~~~~~~~~~~~~~~~~~~~
+
+电平触发型IRQ流处理器
+^^^^^^^^^^^^^^^^^^^^^
+
+handle_level_irq为电平触发型的中断提供了一个通用实现。
+
+实现的控制流如下（简化摘录）::
+
+    desc->irq_data.chip->irq_mask_ack();
+    handle_irq_event(desc->action);
+    desc->irq_data.chip->irq_unmask();
+
+
+默认的需回应IRQ流处理器
+^^^^^^^^^^^^^^^^^^^^^^^
+
+handle_fasteoi_irq为中断提供了一个通用的实现，它只需要在处理程序的末端有一个EOI。
+
+实现的控制流如下（简化摘录）::
+
+    handle_irq_event(desc->action);
+    desc->irq_data.chip->irq_eoi();
+
+
+默认的边沿触发型IRQ流处理器
+^^^^^^^^^^^^^^^^^^^^^^^^^^^
+
+handle_edge_irq为边沿触发型的中断提供了一个通用的实现。
+
+实现的控制流如下（简化摘录）::
+
+    if (desc->status & running) {
+        desc->irq_data.chip->irq_mask_ack();
+        desc->status |= pending | masked;
+        return;
+    }
+    desc->irq_data.chip->irq_ack();
+    desc->status |= running;
+    do {
+        if (desc->status & masked)
+            desc->irq_data.chip->irq_unmask();
+        desc->status &= ~pending;
+        handle_irq_event(desc->action);
+    } while (status & pending);
+    desc->status &= ~running;
+
+
+默认的简单型IRQ流处理器
+^^^^^^^^^^^^^^^^^^^^^^^
+
+handle_simple_irq提供了一个简单型中断的通用实现。
+
+.. note::
+
+   简单型的流处理程序不调用任何处理程序/芯片基元。
+
+实现的控制流程如下（简化摘录）::
+
+    handle_irq_event(desc->action);
+
+
+默认的每CPU型流处理程序
+^^^^^^^^^^^^^^^^^^^^^^^
+
+handle_percpu_irq为每CPU型中断提供一个通用的实现。
+
+每个CPU中断只在SMP上可用，该处理程序提供了一个没有锁的简化版本。
+
+以下是控制流的实现（简化摘录）::
+
+    if (desc->irq_data.chip->irq_ack)
+        desc->irq_data.chip->irq_ack();
+    handle_irq_event(desc->action);
+    if (desc->irq_data.chip->irq_eoi)
+        desc->irq_data.chip->irq_eoi();
+
+
+EOI边沿型IRQ流处理器
+^^^^^^^^^^^^^^^^^^^^
+
+handle_edge_eoi_irq提供了一个异常的边沿触发型处理程序，它只用于拯救powerpc/cell
+上的一个严重失控的irq控制器。
+
+坏的IRQ流处理器
+^^^^^^^^^^^^^^^
+
+handle_bad_irq用于处理没有真正分配处理程序的假中断。
+
+特殊性和优化
+~~~~~~~~~~~~
+
+通用函数是为“干净”的架构和芯片设计的，它们没有平台特定的IRQ处理特殊性。如果一
+个架构需要在“流”的层面上实现特殊性，那么它可以通过覆盖高层的IRQ-流处理程序来实
+现。
+
+延迟中断禁用
+~~~~~~~~~~~~
+
+每个中断可选择的功能是由Russell King在ARM中断实现中引入的，当调用disable_irq()
+时，不会在硬件层面上屏蔽中断。中断保持启用状态，而在中断事件发生时在流处理器中被
+屏蔽。这可以防止在硬件上丢失边沿中断，因为硬件上不存储边沿中断事件，而中断在硬件
+级被禁用。当一个中断在IRQ_DISABLED标志被设置时到达，那么该中断在硬件层面被屏蔽，
+IRQ_PENDING位被设置。当中断被enable_irq()重新启用时，将检查挂起位，如果它被设置，
+中断将通过硬件或软件重发机制重新发送。(当你想使用延迟中断禁用功能，而你的硬件又不
+能重新触发中断时，有必要启用CONFIG_HARDIRQS_SW_RESEND。) 延迟中断禁止功能是不可
+配置的。
+
+芯片级硬件封装
+--------------
+
+芯片级硬件描述符结构体 :c:type:`irq_chip` 包含了所有与芯片直接相关的功能，这些功
+能可以被irq流实现所利用。
+
+-  ``irq_ack``
+
+-  ``irq_mask_ack`` - 可选的，建议使用的性能
+
+-  ``irq_mask``
+
+-  ``irq_unmask``
+
+-  ``irq_eoi`` - 可选的，EOI流处理程序需要
+
+-  ``irq_retrigger`` - 可选的
+
+-  ``irq_set_type`` - 可选的
+
+-  ``irq_set_wake`` - 可选的
+
+这些基元的意思是严格意义上的：ack是指ACK，masking是指对IRQ线的屏蔽，等等。这取决
+于流处理器如何使用这些基本的低级功能单元。
+
+__do_IRQ入口点
+==============
+
+最初的实现__do_IRQ()是所有类型中断的替代入口点。它已经不存在了。
+
+这个处理程序被证明不适合所有的中断硬件，因此被重新实现了边沿/级别/简单/超高速中断
+的拆分功能。这不仅是一个功能优化。它也缩短了中断的代码路径。
+
+在SMP上的锁
+===========
+
+芯片寄存器的锁定是由定义芯片基元的架构决定的。每个寄存器的结构通过desc->lock，由
+通用层保护。
+
+通用中断芯片
+============
+
+为了避免复制相同的IRQ芯片实现，核心提供了一个可配置的通用中断芯片实现。开发者在自
+己实现相同的功能之前，应该仔细检查通用芯片是否符合他们的需求，并以稍微不同的方式实
+现相同的功能。
+
+该API在以下内核代码中:
+
+kernel/irq/generic-chip.c
+
+结构体
+======
+
+本章包含自动生成的结构体文档，这些结构体在通用IRQ层中使用。
+
+该API在以下内核代码中:
+
+include/linux/irq.h
+
+include/linux/interrupt.h
+
+提供的通用函数
+==============
+
+这一章包含了自动生成的内核API函数的文档，这些函数被导出。
+
+该API在以下内核代码中:
+
+kernel/irq/manage.c
+
+kernel/irq/chip.c
+
+提供的内部函数
+==============
+
+本章包含自动生成的内部函数的文档。
+
+该API在以下内核代码中:
+
+kernel/irq/irqdesc.c
+
+kernel/irq/handle.c
+
+kernel/irq/chip.c
+
+鸣谢
+====
+
+感谢以下人士对本文档作出的贡献：
+
+1. Thomas Gleixner tglx@linutronix.de
+
+2. Ingo Molnar mingo@elte.hu
diff --git a/Documentation/translations/zh_CN/core-api/index.rst b/Documentation/translations/zh_CN/core-api/index.rst
index b4bde9396339..72f0a36daa1c 100644
--- a/Documentation/translations/zh_CN/core-api/index.rst
+++ b/Documentation/translations/zh_CN/core-api/index.rst
@@ -1,10 +1,12 @@
 .. include:: ../disclaimer-zh_CN.rst
 
-:Original: :doc:`../../../core-api/irq/index`
-:Translator: Yanteng Si <siyanteng@loongson.cn>
+:Original: Documentation/core-api/index.rst
 
-.. _cn_core-api_index.rst:
+:翻译:
+
+ 司延腾 Yanteng Si <siyanteng@loongson.cn>
 
+.. _cn_core-api_index.rst:
 
 ===========
 核心API文档
@@ -80,14 +82,17 @@ Todolist:
    :maxdepth: 1
 
    cachetlb
+   cpu_hotplug
+   genericirq
+   memory-hotplug
+   protection-keys
 
 Todolist:
 
 
-   cpu_hotplug
    memory-hotplug
+   cpu_hotplug
    genericirq
-   protection-keys
 
 
 内存管理
diff --git a/Documentation/translations/zh_CN/core-api/irq/concepts.rst b/Documentation/translations/zh_CN/core-api/irq/concepts.rst
index 41455bf0f783..9957f0453353 100644
--- a/Documentation/translations/zh_CN/core-api/irq/concepts.rst
+++ b/Documentation/translations/zh_CN/core-api/irq/concepts.rst
@@ -1,10 +1,12 @@
 .. include:: ../../disclaimer-zh_CN.rst
 
-:Original: :doc:`../../../../core-api/irq/concepts`
-:Translator: Yanteng Si <siyanteng@loongson.cn>
+:Original: Documentation/core-api/irq/concepts.rst
 
-.. _cn_concepts.rst:
+:翻译:
+
+ 司延腾 Yanteng Si <siyanteng@loongson.cn>
 
+.. _cn_concepts.rst:
 
 ===========
 什么是IRQ？
diff --git a/Documentation/translations/zh_CN/core-api/irq/index.rst b/Documentation/translations/zh_CN/core-api/irq/index.rst
index 910ccabf041f..ba6acc4b48e5 100644
--- a/Documentation/translations/zh_CN/core-api/irq/index.rst
+++ b/Documentation/translations/zh_CN/core-api/irq/index.rst
@@ -1,7 +1,10 @@
 .. include:: ../../disclaimer-zh_CN.rst
 
-:Original: :doc:`../../../../core-api/irq/index`
-:Translator: Yanteng Si <siyanteng@loongson.cn>
+:Original: Documentation/core-api/irq/index.rst
+
+:翻译:
+
+ 司延腾 Yanteng Si <siyanteng@loongson.cn>
 
 .. _cn_irq_index.rst:
 
diff --git a/Documentation/translations/zh_CN/core-api/irq/irq-affinity.rst b/Documentation/translations/zh_CN/core-api/irq/irq-affinity.rst
index 82a4428f22fd..7addd5f27a88 100644
--- a/Documentation/translations/zh_CN/core-api/irq/irq-affinity.rst
+++ b/Documentation/translations/zh_CN/core-api/irq/irq-affinity.rst
@@ -1,10 +1,12 @@
 .. include:: ../../disclaimer-zh_CN.rst
 
-:Original: :doc:`../../../../core-api/irq/irq-affinity`
-:Translator: Yanteng Si <siyanteng@loongson.cn>
+:Original: Documentation/core-api/irq/irq-affinity
 
-.. _cn_irq-affinity.rst:
+:翻译:
+
+ 司延腾 Yanteng Si <siyanteng@loongson.cn>
 
+.. _cn_irq-affinity.rst:
 
 ==============
 SMP IRQ 亲和性
diff --git a/Documentation/translations/zh_CN/core-api/irq/irq-domain.rst b/Documentation/translations/zh_CN/core-api/irq/irq-domain.rst
index 3c82dd307a46..7d077742f758 100644
--- a/Documentation/translations/zh_CN/core-api/irq/irq-domain.rst
+++ b/Documentation/translations/zh_CN/core-api/irq/irq-domain.rst
@@ -1,10 +1,12 @@
 .. include:: ../../disclaimer-zh_CN.rst
 
-:Original: :doc:`../../../../core-api/irq/irq-domain`
-:Translator: Yanteng Si <siyanteng@loongson.cn>
+:Original: Documentation/core-api/irq/irq-domain.rst
 
-.. _cn_irq-domain.rst:
+:翻译:
+
+ 司延腾 Yanteng Si <siyanteng@loongson.cn>
 
+.. _cn_irq-domain.rst:
 
 =======================
 irq_domain 中断号映射库
diff --git a/Documentation/translations/zh_CN/core-api/irq/irqflags-tracing.rst b/Documentation/translations/zh_CN/core-api/irq/irqflags-tracing.rst
index c889bd0f65d9..9af50b4b8c2d 100644
--- a/Documentation/translations/zh_CN/core-api/irq/irqflags-tracing.rst
+++ b/Documentation/translations/zh_CN/core-api/irq/irqflags-tracing.rst
@@ -1,10 +1,12 @@
 .. include:: ../../disclaimer-zh_CN.rst
 
-:Original: :doc:`../../../../core-api/irq/irqflags-tracing`
-:Translator: Yanteng Si <siyanteng@loongson.cn>
+:Original: Documentation/core-api/irq/irqflags-tracing.rst
 
-.. _cn_irqflags-tracing.rst:
+:翻译:
+
+ 司延腾 Yanteng Si <siyanteng@loongson.cn>
 
+.. _cn_irqflags-tracing.rst:
 
 =================
 IRQ-flags状态追踪
diff --git a/Documentation/translations/zh_CN/core-api/kernel-api.rst b/Documentation/translations/zh_CN/core-api/kernel-api.rst
index d6f815ec265b..ab7d81889340 100644
--- a/Documentation/translations/zh_CN/core-api/kernel-api.rst
+++ b/Documentation/translations/zh_CN/core-api/kernel-api.rst
@@ -1,10 +1,12 @@
 .. include:: ../disclaimer-zh_CN.rst
 
 :Original: Documentation/core-api/kernel-api.rst
-:Translator: Yanteng Si <siyanteng@loongson.cn>
 
-.. _cn_kernel-api.rst:
+:翻译:
+
+ 司延腾 Yanteng Si <siyanteng@loongson.cn>
 
+.. _cn_kernel-api.rst:
 
 ============
 Linux内核API
diff --git a/Documentation/translations/zh_CN/core-api/kobject.rst b/Documentation/translations/zh_CN/core-api/kobject.rst
index f0e6a4aeb372..b7c37794cc7f 100644
--- a/Documentation/translations/zh_CN/core-api/kobject.rst
+++ b/Documentation/translations/zh_CN/core-api/kobject.rst
@@ -1,7 +1,10 @@
 .. include:: ../disclaimer-zh_CN.rst
 
 :Original: Documentation/core-api/kobject.rst
-:Translator: Yanteng Si <siyanteng@loongson.cn>
+
+:翻译:
+
+ 司延腾 Yanteng Si <siyanteng@loongson.cn>
 
 .. _cn_core_api_kobject.rst:
 
diff --git a/Documentation/translations/zh_CN/core-api/local_ops.rst b/Documentation/translations/zh_CN/core-api/local_ops.rst
index ee67379b6869..41e4525038e8 100644
--- a/Documentation/translations/zh_CN/core-api/local_ops.rst
+++ b/Documentation/translations/zh_CN/core-api/local_ops.rst
@@ -1,10 +1,12 @@
 .. include:: ../disclaimer-zh_CN.rst
 
 :Original: Documentation/core-api/local_ops.rst
-:Translator: Yanteng Si <siyanteng@loongson.cn>
 
-.. _cn_local_ops:
+:翻译:
+
+ 司延腾 Yanteng Si <siyanteng@loongson.cn>
 
+.. _cn_local_ops:
 
 ========================
 本地原子操作的语义和行为
diff --git a/Documentation/translations/zh_CN/core-api/memory-hotplug.rst b/Documentation/translations/zh_CN/core-api/memory-hotplug.rst
new file mode 100644
index 000000000000..161f4d2c18cc
--- /dev/null
+++ b/Documentation/translations/zh_CN/core-api/memory-hotplug.rst
@@ -0,0 +1,126 @@
+.. include:: ../disclaimer-zh_CN.rst
+
+:Original: Documentation/core-api/memory_hotplug.rst
+
+:翻译:
+
+ 司延腾 Yanteng Si <siyanteng@loongson.cn>
+
+:校译:
+
+ 吴想成 Wu XiangCheng <bobwxc@email.cn>
+
+.. _cn_core-api_memory-hotplug:
+
+==========
+内存热插拔
+==========
+
+内存热拔插事件通知器
+====================
+
+热插拔事件被发送到一个通知队列中。
+
+在 ``include/linux/memory.h`` 中定义了六种类型的通知：
+
+MEM_GOING_ONLINE
+  在新内存可用之前生成，以便能够为子系统处理内存做准备。页面分配器仍然无法从新
+  的内存中进行分配。
+
+MEM_CANCEL_ONLINE
+  如果MEM_GOING_ONLINE失败，则生成。
+
+MEM_ONLINE
+  当内存成功上线时产生。回调可以从新的内存中分配页面。
+
+MEM_GOING_OFFLINE
+  在开始对内存进行下线处理时生成。从内存中的分配不再可能，但是一些要下线的内存
+  仍然在使用。回调可以用来释放一个子系统在指定内存块中已知的内存。
+
+MEM_CANCEL_OFFLINE
+  如果MEM_GOING_OFFLINE失败，则生成。来自我们试图离线的内存块中的内存又可以使
+  用了。
+
+MEM_OFFLINE
+  在内存下线完成后生成。
+
+可以通过调用如下函数来注册一个回调程序:
+
+  hotplug_memory_notifier(callback_func, priority)
+
+优先级数值较高的回调函数在数值较低的回调函数之前被调用。
+
+一个回调函数必须有以下原型::
+
+  int callback_func(
+    struct notifier_block *self, unsigned long action, void *arg);
+
+回调函数的第一个参数（self）是指向回调函数本身的通知器链块的一个指针。第二个参
+数（action）是上述的事件类型之一。第三个参数（arg）传递一个指向
+memory_notify结构体的指针::
+
+	struct memory_notify {
+		unsigned long start_pfn;
+		unsigned long nr_pages;
+		int status_change_nid_normal;
+		int status_change_nid_high;
+		int status_change_nid;
+	}
+
+- start_pfn是在线/离线内存的start_pfn。
+
+- nr_pages是在线/离线内存的页数。
+
+- status_change_nid_normal是当nodemask的N_NORMAL_MEMORY被设置/清除时设置节
+  点id，如果是-1，则nodemask状态不改变。
+
+- status_change_nid_high是当nodemask的N_HIGH_MEMORY被设置/清除时设置的节点
+  id，如果这个值为-1，那么nodemask状态不会改变。
+
+- status_change_nid是当nodemask的N_MEMORY被（将）设置/清除时设置的节点id。这
+  意味着一个新的（没上线的）节点通过联机获得新的内存，而一个节点失去了所有的内
+  存。如果这个值为-1，那么nodemask的状态就不会改变。
+
+  如果 status_changed_nid* >= 0，回调应该在必要时为节点创建/丢弃结构体。
+
+回调程序应返回 ``include/linux/notifier.h`` 中定义的NOTIFY_DONE, NOTIFY_OK,
+NOTIFY_BAD, NOTIFY_STOP中的一个值。
+
+NOTIFY_DONE和NOTIFY_OK对进一步处理没有影响。
+
+NOTIFY_BAD是作为对MEM_GOING_ONLINE、MEM_GOING_OFFLINE、MEM_ONLINE或MEM_OFFLINE
+动作的回应，用于取消热插拔。它停止对通知队列的进一步处理。
+
+NOTIFY_STOP停止对通知队列的进一步处理。
+
+内部锁
+======
+
+当添加/删除使用内存块设备（即普通RAM）的内存时，device_hotplug_lock应该被保持
+为:
+
+- 针对在线/离线请求进行同步（例如，通过sysfs）。这样一来，内存块设备只有在内存
+  被完全添加后才能被用户空间访问（.online/.state属性）。而在删除内存时，我们知
+  道没有人在临界区。
+
+- 与CPU热拔插或类似操作同步（例如ACPI和PPC相关操作）
+
+特别是，在添加内存和用户空间试图以比预期更快的速度上线该内存时，有可能出现锁反转，
+使用device_hotplug_lock可以避免此情况:
+
+- device_online()将首先接受device_lock()，然后是mem_hotplug_lock。
+
+- add_memory_resource()将首先使用mem_hotplug_lock，然后是device_lock()（在创
+  建设备时，在bus_add_device()期间）。
+
+由于在使用device_lock()之前，设备对用户空间是可见的，这可能导致锁的反转。
+
+内存的上线/下线应该通过device_online()/device_offline()完成————确保它与通过
+sysfs进行的操作正确同步。建议持有device_hotplug_lock（例如，保护online_type）。
+
+当添加/删除/上线/下线内存或者添加/删除异构或设备内存时，我们应该始终持有写模式的
+mem_hotplug_lock，以序列化内存热插拔（例如访问全局/区域变量）。
+
+此外，mem_hotplug_lock（与device_hotplug_lock相反）在读取模式下允许一个相当
+有效的get_online_mems/put_online_mems实现，所以访问内存的代码可以防止该内存
+消失。
diff --git a/Documentation/translations/zh_CN/core-api/padata.rst b/Documentation/translations/zh_CN/core-api/padata.rst
index c627f8f131f9..781d30675afd 100644
--- a/Documentation/translations/zh_CN/core-api/padata.rst
+++ b/Documentation/translations/zh_CN/core-api/padata.rst
@@ -3,7 +3,10 @@
 .. include:: ../disclaimer-zh_CN.rst
 
 :Original: Documentation/core-api/padata.rst
-:Translator: Yanteng Si <siyanteng@loongson.cn>
+
+:翻译:
+
+ 司延腾 Yanteng Si <siyanteng@loongson.cn>
 
 .. _cn_core_api_padata.rst:
 
diff --git a/Documentation/translations/zh_CN/core-api/printk-basics.rst b/Documentation/translations/zh_CN/core-api/printk-basics.rst
index 2b20f6303a82..d574de3167c8 100644
--- a/Documentation/translations/zh_CN/core-api/printk-basics.rst
+++ b/Documentation/translations/zh_CN/core-api/printk-basics.rst
@@ -2,10 +2,12 @@
 .. include:: ../disclaimer-zh_CN.rst
 
 :Original: Documentation/core-api/printk-basics.rst
-:Translator: Yanteng Si <siyanteng@loongson.cn>
 
-.. _cn_printk-basics.rst:
+:翻译:
+
+ 司延腾 Yanteng Si <siyanteng@loongson.cn>
 
+.. _cn_printk-basics.rst:
 
 ==================
 使用printk记录消息
diff --git a/Documentation/translations/zh_CN/core-api/printk-formats.rst b/Documentation/translations/zh_CN/core-api/printk-formats.rst
index a680c8f164c3..ce39c788cf5a 100644
--- a/Documentation/translations/zh_CN/core-api/printk-formats.rst
+++ b/Documentation/translations/zh_CN/core-api/printk-formats.rst
@@ -1,10 +1,12 @@
 .. include:: ../disclaimer-zh_CN.rst
 
 :Original: Documentation/core-api/printk-formats.rst
-:Translator: Yanteng Si <siyanteng@loongson.cn>
 
-.. _cn_printk-formats.rst:
+:翻译:
+
+ 司延腾 Yanteng Si <siyanteng@loongson.cn>
 
+.. _cn_printk-formats.rst:
 
 ==============================
 如何获得正确的printk格式占位符
diff --git a/Documentation/translations/zh_CN/core-api/protection-keys.rst b/Documentation/translations/zh_CN/core-api/protection-keys.rst
new file mode 100644
index 000000000000..d07830050153
--- /dev/null
+++ b/Documentation/translations/zh_CN/core-api/protection-keys.rst
@@ -0,0 +1,99 @@
+.. SPDX-License-Identifier: GPL-2.0
+.. include:: ../disclaimer-zh_CN.rst
+
+:Original: Documentation/core-api/protection-keys.rst
+
+:翻译:
+
+ 司延腾 Yanteng Si <siyanteng@loongson.cn>
+
+:校译:
+
+ 吴想成 Wu XiangCheng <bobwxc@email.cn>
+
+.. _cn_core-api_protection-keys:
+
+============
+内存保护密钥
+============
+
+用户空间的内存保护密钥（Memory Protection Keys for Userspace，PKU，亦
+即PKEYs）是英特尔Skylake（及以后）“可扩展处理器”服务器CPU上的一项功能。
+它将在未来的非服务器英特尔处理器和未来的AMD处理器中可用。
+
+对于任何希望测试或使用该功能的人来说，它在亚马逊的EC2 C5实例中是可用的，
+并且已知可以在那里使用Ubuntu 17.04镜像运行。
+
+内存保护密钥提供了一种机制来执行基于页面的保护，但在应用程序改变保护域
+时不需要修改页表。它的工作原理是在每个页表项中为“保护密钥”分配4个以
+前被忽略的位，从而提供16个可能的密钥。
+
+还有一个新的用户可访问寄存器（PKRU），为每个密钥提供两个单独的位（访
+问禁止和写入禁止）。作为一个CPU寄存器，PKRU在本质上是线程本地的，可能
+会给每个线程提供一套不同于其他线程的保护措施。
+
+有两条新指令（RDPKRU/WRPKRU）用于读取和写入新的寄存器。该功能仅在64位
+模式下可用，尽管物理地址扩展页表中理论上有空间。这些权限只在数据访问上
+强制执行，对指令获取没有影响。
+
+
+系统调用
+========
+
+有3个系统调用可以直接与pkeys进行交互::
+
+	int pkey_alloc(unsigned long flags, unsigned long init_access_rights)
+	int pkey_free(int pkey);
+	int pkey_mprotect(unsigned long start, size_t len,
+			  unsigned long prot, int pkey);
+
+在使用一个pkey之前，必须先用pkey_alloc()分配它。一个应用程序直接调用
+WRPKRU指令，以改变一个密钥覆盖的内存的访问权限。在这个例子中，WRPKRU
+被一个叫做pkey_set()的C函数所封装::
+
+	int real_prot = PROT_READ|PROT_WRITE;
+	pkey = pkey_alloc(0, PKEY_DISABLE_WRITE);
+	ptr = mmap(NULL, PAGE_SIZE, PROT_NONE, MAP_ANONYMOUS|MAP_PRIVATE, -1, 0);
+	ret = pkey_mprotect(ptr, PAGE_SIZE, real_prot, pkey);
+	... application runs here
+
+现在，如果应用程序需要更新'ptr'处的数据，它可以获得访问权，进行更新，
+然后取消其写访问权::
+
+	pkey_set(pkey, 0); // clear PKEY_DISABLE_WRITE
+	*ptr = foo; // assign something
+	pkey_set(pkey, PKEY_DISABLE_WRITE); // set PKEY_DISABLE_WRITE again
+
+现在，当它释放内存时，它也将释放pkey，因为它不再被使用了::
+
+	munmap(ptr, PAGE_SIZE);
+	pkey_free(pkey);
+
+.. note:: pkey_set()是RDPKRU和WRPKRU指令的一个封装器。在tools/testing/selftests/x86/protection_keys.c中可以找到一个实现实例。
+          tools/testing/selftests/x86/protection_keys.c.
+
+行为
+====
+
+内核试图使保护密钥与普通的mprotect()的行为一致。例如，如果你这样做::
+
+	mprotect(ptr, size, PROT_NONE);
+	something(ptr);
+
+这样做的时候，你可以期待保护密钥的相同效果::
+
+	pkey = pkey_alloc(0, PKEY_DISABLE_WRITE | PKEY_DISABLE_READ);
+	pkey_mprotect(ptr, size, PROT_READ|PROT_WRITE, pkey);
+	something(ptr);
+
+无论something()是否是对'ptr'的直接访问，这都应该为真。
+如::
+
+	*ptr = foo;
+
+或者当内核代表应用程序进行访问时，比如read()::
+
+	read(fd, ptr, 1);
+
+在这两种情况下，内核都会发送一个SIGSEGV，但当违反保护密钥时，si_code
+将被设置为SEGV_PKERR，而当违反普通的mprotect()权限时，则是SEGV_ACCERR。
diff --git a/Documentation/translations/zh_CN/core-api/refcount-vs-atomic.rst b/Documentation/translations/zh_CN/core-api/refcount-vs-atomic.rst
index ea834e38d2f6..e2467fd26fc0 100644
--- a/Documentation/translations/zh_CN/core-api/refcount-vs-atomic.rst
+++ b/Documentation/translations/zh_CN/core-api/refcount-vs-atomic.rst
@@ -1,10 +1,12 @@
 .. include:: ../disclaimer-zh_CN.rst
 
 :Original: Documentation/core-api/refcount-vs-atomic.rst
-:Translator: Yanteng Si <siyanteng@loongson.cn>
 
-.. _cn_refcount-vs-atomic:
+:翻译:
+
+ 司延腾 Yanteng Si <siyanteng@loongson.cn>
 
+.. _cn_refcount-vs-atomic:
 
 =======================================
 与atomic_t相比，refcount_t的API是这样的
diff --git a/Documentation/translations/zh_CN/core-api/symbol-namespaces.rst b/Documentation/translations/zh_CN/core-api/symbol-namespaces.rst
index ce05c29c7697..6abf7ed534ca 100644
--- a/Documentation/translations/zh_CN/core-api/symbol-namespaces.rst
+++ b/Documentation/translations/zh_CN/core-api/symbol-namespaces.rst
@@ -1,10 +1,12 @@
 .. include:: ../disclaimer-zh_CN.rst
 
 :Original: Documentation/core-api/symbol-namespaces.rst
-:Translator: Yanteng Si <siyanteng@loongson.cn>
 
-.. _cn_symbol-namespaces.rst:
+:翻译:
+
+ 司延腾 Yanteng Si <siyanteng@loongson.cn>
 
+.. _cn_symbol-namespaces.rst:
 
 =================================
 符号命名空间（Symbol Namespaces）
diff --git a/Documentation/translations/zh_CN/core-api/workqueue.rst b/Documentation/translations/zh_CN/core-api/workqueue.rst
index 0b8f730db6c0..e372fa5cf101 100644
--- a/Documentation/translations/zh_CN/core-api/workqueue.rst
+++ b/Documentation/translations/zh_CN/core-api/workqueue.rst
@@ -2,10 +2,12 @@
 .. include:: ../disclaimer-zh_CN.rst
 
 :Original: Documentation/core-api/workqueue.rst
-:Translator: Yanteng Si <siyanteng@loongson.cn>
 
-.. _cn_workqueue.rst:
+:翻译:
+
+ 司延腾 Yanteng Si <siyanteng@loongson.cn>
 
+.. _cn_workqueue.rst:
 
 =========================
 并发管理的工作队列 (cmwq)
diff --git a/Documentation/translations/zh_CN/cpu-freq/core.rst b/Documentation/translations/zh_CN/cpu-freq/core.rst
index 19fb9c029cfe..0c6fd447ced6 100644
--- a/Documentation/translations/zh_CN/cpu-freq/core.rst
+++ b/Documentation/translations/zh_CN/cpu-freq/core.rst
@@ -1,11 +1,13 @@
 .. SPDX-License-Identifier: GPL-2.0
 .. include:: ../disclaimer-zh_CN.rst
 
-:Original: :doc:`../../../cpu-freq/core`
-:Translator: Yanteng Si <siyanteng@loongson.cn>
+:Original: Documentation/cpu-freq/core.rst
 
-.. _cn_core.rst:
+:翻译:
+
+ 司延腾 Yanteng Si <siyanteng@loongson.cn>
 
+.. _cn_core.rst:
 
 ====================================
 CPUFreq核心和CPUFreq通知器的通用说明
diff --git a/Documentation/translations/zh_CN/cpu-freq/cpu-drivers.rst b/Documentation/translations/zh_CN/cpu-freq/cpu-drivers.rst
index 5ae9cfa2ec55..0fc5d1495789 100644
--- a/Documentation/translations/zh_CN/cpu-freq/cpu-drivers.rst
+++ b/Documentation/translations/zh_CN/cpu-freq/cpu-drivers.rst
@@ -2,11 +2,13 @@
 
 .. include:: ../disclaimer-zh_CN.rst
 
-:Original: :doc:`../../../cpu-freq/cpu-drivers`
-:Translator: Yanteng Si <siyanteng@loongson.cn>
+:Original: Documentation/cpu-freq/cpu-drivers.rst
 
-.. _cn_cpu-drivers.rst:
+:翻译:
+
+ 司延腾 Yanteng Si <siyanteng@loongson.cn>
 
+.. _cn_cpu-drivers.rst:
 
 =======================================
 如何实现一个新的CPUFreq处理器驱动程序？
@@ -80,8 +82,6 @@ CPUfreq核心层注册一个cpufreq_driver结构体。
  .resume - 一个指向per-policy恢复函数的指针，该函数在关中断且在调节器再一次开始前被
  调用。
 
- .ready - 一个指向per-policy准备函数的指针，该函数在策略完全初始化之后被调用。
-
  .attr - 一个指向NULL结尾的"struct freq_attr"列表的指针，该函数允许导出值到
  sysfs。
 
diff --git a/Documentation/translations/zh_CN/cpu-freq/cpufreq-stats.rst b/Documentation/translations/zh_CN/cpu-freq/cpufreq-stats.rst
index c90d1d8353ed..f14423099d4b 100644
--- a/Documentation/translations/zh_CN/cpu-freq/cpufreq-stats.rst
+++ b/Documentation/translations/zh_CN/cpu-freq/cpufreq-stats.rst
@@ -2,11 +2,13 @@
 
 .. include:: ../disclaimer-zh_CN.rst
 
-:Original: :doc:`../../../cpu-freq/cpufreq-stats`
-:Translator: Yanteng Si <siyanteng@loongson.cn>
+:Original: Documentation/cpu-freq/cpufreq-stats.rst
 
-.. _cn_cpufreq-stats.rst:
+:翻译:
+
+ 司延腾 Yanteng Si <siyanteng@loongson.cn>
 
+.. _cn_cpufreq-stats.rst:
 
 ==========================================
 sysfs CPUFreq Stats的一般说明
diff --git a/Documentation/translations/zh_CN/cpu-freq/index.rst b/Documentation/translations/zh_CN/cpu-freq/index.rst
index 65074e211940..c6e50963cd33 100644
--- a/Documentation/translations/zh_CN/cpu-freq/index.rst
+++ b/Documentation/translations/zh_CN/cpu-freq/index.rst
@@ -2,11 +2,13 @@
 
 .. include:: ../disclaimer-zh_CN.rst
 
-:Original: :doc:`../../../cpu-freq/index`
-:Translator: Yanteng Si <siyanteng@loongson.cn>
+:Original: Documentation/cpu-freq/index.rst
 
-.. _cn_index.rst:
+:翻译:
+
+ 司延腾 Yanteng Si <siyanteng@loongson.cn>
 
+.. _cn_index.rst:
 
 =======================================================
 Linux CPUFreq - Linux(TM)内核中的CPU频率和电压升降代码
diff --git a/Documentation/translations/zh_CN/dev-tools/index.rst b/Documentation/translations/zh_CN/dev-tools/index.rst
index e6c99f2f543f..0f770b8664e9 100644
--- a/Documentation/translations/zh_CN/dev-tools/index.rst
+++ b/Documentation/translations/zh_CN/dev-tools/index.rst
@@ -11,6 +11,9 @@
 目前这些文档已经整理在一起，不需要再花费额外的精力。
 欢迎任何补丁。
 
+有关测试专用工具的简要概述，参见
+Documentation/translations/zh_CN/dev-tools/testing-overview.rst
+
 .. class:: toc-title
 
 	   目录
@@ -18,6 +21,7 @@
 .. toctree::
    :maxdepth: 2
 
+   testing-overview
    gcov
    kasan
 
@@ -29,6 +33,7 @@ Todolist:
  - ubsan
  - kmemleak
  - kcsan
+ - kfence
  - gdb-kernel-debugging
  - kgdb
  - kselftest
diff --git a/Documentation/translations/zh_CN/dev-tools/testing-overview.rst b/Documentation/translations/zh_CN/dev-tools/testing-overview.rst
new file mode 100644
index 000000000000..b7a1d13da6c6
--- /dev/null
+++ b/Documentation/translations/zh_CN/dev-tools/testing-overview.rst
@@ -0,0 +1,109 @@
+.. SPDX-License-Identifier: GPL-2.0
+
+.. include:: ../disclaimer-zh_CN.rst
+
+:Original: Documentation/dev-tools/testing-overview.rst
+:Translator: 胡皓文 Hu Haowen <src.res@email.cn>
+
+============
+内核测试指南
+============
+
+有许多不同的工具可以用于测试Linux内核，因此了解什么时候使用它们可能
+很困难。本文档粗略概述了它们之间的区别，并阐释了它们是怎样糅合在一起
+的。
+
+编写和运行测试
+==============
+
+大多数内核测试都是用kselftest或KUnit框架之一编写的。它们都让运行测试
+更加简化，并为编写新测试提供帮助。
+
+如果你想验证内核的行为——尤其是内核的特定部分——那你就要使用kUnit或
+kselftest。
+
+KUnit和kselftest的区别
+----------------------
+
+.. note::
+     由于本文段中部分术语尚无较好的对应中文释义，可能导致与原文含义
+     存在些许差异，因此建议读者结合原文
+     （Documentation/dev-tools/testing-overview.rst）辅助阅读。
+     如对部分翻译有异议或有更好的翻译意见，欢迎联系译者进行修订。
+
+KUnit（Documentation/dev-tools/kunit/index.rst）是用于“白箱”测
+试的一个完整的内核内部系统：因为测试代码是内核的一部分，所以它能够访
+问用户空间不能访问到的内部结构和功能。
+
+因此，KUnit测试最好针对内核中较小的、自包含的部分，以便能够独立地测
+试。“单元”测试的概念亦是如此。
+
+比如，一个KUnit测试可能测试一个单独的内核功能（甚至通过一个函数测试
+一个单一的代码路径，例如一个错误处理案例），而不是整个地测试一个特性。
+
+这也使得KUnit测试构建和运行非常地快，从而能够作为开发流程的一部分被
+频繁地运行。
+
+有关更详细的介绍，请参阅KUnit测试代码风格指南
+Documentation/dev-tools/kunit/style.rst
+
+kselftest（Documentation/dev-tools/kselftest.rst），相对来说，大量用
+于用户空间，并且通常测试用户空间的脚本或程序。
+
+这使得编写复杂的测试，或者需要操作更多全局系统状态的测试更加容易（诸
+如生成进程之类）。然而，从kselftest直接调用内核函数是不行的。这也就
+意味着只有通过某种方式（如系统调用、驱动设备、文件系统等）导出到了用
+户空间的内核功能才能使用kselftest来测试。为此，有些测试包含了一个伴
+生的内核模块用于导出更多的信息和功能。不过，对于基本上或者完全在内核
+中运行的测试，KUnit可能是更佳工具。
+
+kselftest也因此非常适合于全部功能的测试，因为这些功能会将接口暴露到
+用户空间，从而能够被测试，而不是展现实现细节。“system”测试和
+“end-to-end”测试亦是如此。
+
+比如，一个新的系统调用应该伴随有新的kselftest测试。
+
+代码覆盖率工具
+==============
+
+支持两种不同代码之间的覆盖率测量工具。它们可以用来验证一项测试执行的
+确切函数或代码行。这有助于决定内核被测试了多少，或用来查找合适的测试
+中没有覆盖到的极端情况。
+
+Documentation/translations/zh_CN/dev-tools/gcov.rst 是GCC的覆盖率测试
+工具，能用于获取内核的全局或每个模块的覆盖率。与KCOV不同的是，这个工具
+不记录每个任务的覆盖率。覆盖率数据可以通过debugfs读取，并通过常规的
+gcov工具进行解释。
+
+Documentation/dev-tools/kcov.rst 是能够构建在内核之中，用于在每个任务
+的层面捕捉覆盖率的一个功能。因此，它对于模糊测试和关于代码执行期间信
+息的其它情况非常有用，比如在一个单一系统调用里使用它就很有用。
+
+动态分析工具
+============
+
+内核也支持许多动态分析工具，用以检测正在运行的内核中出现的多种类型的
+问题。这些工具通常每个去寻找一类不同的缺陷，比如非法内存访问，数据竞
+争等并发问题，或整型溢出等其他未定义行为。
+
+如下所示：
+
+* kmemleak检测可能的内存泄漏。参阅
+  Documentation/dev-tools/kmemleak.rst
+* KASAN检测非法内存访问，如数组越界和释放后重用（UAF）。参阅
+  Documentation/dev-tools/kasan.rst
+* UBSAN检测C标准中未定义的行为，如整型溢出。参阅
+  Documentation/dev-tools/ubsan.rst
+* KCSAN检测数据竞争。参阅 Documentation/dev-tools/kcsan.rst
+* KFENCE是一个低开销的内存问题检测器，比KASAN更快且能被用于批量构建。
+  参阅 Documentation/dev-tools/kfence.rst
+* lockdep是一个锁定正确性检测器。参阅
+  Documentation/locking/lockdep-design.rst
+* 除此以外，在内核中还有一些其它的调试工具，大多数能在
+  lib/Kconfig.debug 中找到。
+
+这些工具倾向于对内核进行整体测试，并且不像kselftest和KUnit一样“传递”。
+它们可以通过在启用这些工具时运行内核测试以与kselftest或KUnit结合起来：
+之后你就能确保这些错误在测试过程中都不会发生了。
+
+一些工具与KUnit和kselftest集成，并且在检测到问题时会自动打断测试。
diff --git a/Documentation/translations/zh_CN/filesystems/debugfs.rst b/Documentation/translations/zh_CN/filesystems/debugfs.rst
index 822c4d42fdf9..4981a82dd651 100644
--- a/Documentation/translations/zh_CN/filesystems/debugfs.rst
+++ b/Documentation/translations/zh_CN/filesystems/debugfs.rst
@@ -2,7 +2,7 @@
 
 .. include:: ../disclaimer-zh_CN.rst
 
-:Original: :doc:`../../../filesystems/debugfs`
+:Original: Documentation/filesystems/debugfs.rst
 
 =======
 Debugfs
diff --git a/Documentation/translations/zh_CN/iio/ep93xx_adc.rst b/Documentation/translations/zh_CN/iio/ep93xx_adc.rst
index 7e91d2197867..64f3f3508353 100644
--- a/Documentation/translations/zh_CN/iio/ep93xx_adc.rst
+++ b/Documentation/translations/zh_CN/iio/ep93xx_adc.rst
@@ -1,10 +1,12 @@
 .. include:: ../disclaimer-zh_CN.rst
 
-:Original: :doc:`../../../iio/ep93xx_adc`
-:Translator: Yanteng Si <siyanteng@loongson.cn>
+:Original: Documentation/iio/ep93xx_adc.rst
 
-.. _cn_iio_ep93xx_adc:
+:翻译:
+
+ 司延腾 Yanteng Si <siyanteng@loongson.cn>
 
+.. _cn_iio_ep93xx_adc:
 
 ==================================
 思睿逻辑 EP93xx 模拟数字转换器驱动
diff --git a/Documentation/translations/zh_CN/iio/iio_configfs.rst b/Documentation/translations/zh_CN/iio/iio_configfs.rst
index 274488e8dce4..d5460e951804 100644
--- a/Documentation/translations/zh_CN/iio/iio_configfs.rst
+++ b/Documentation/translations/zh_CN/iio/iio_configfs.rst
@@ -1,10 +1,12 @@
 .. include:: ../disclaimer-zh_CN.rst
 
-:Original: :doc:`../../../iio/iio_configfs`
-:Translator: Yanteng Si <siyanteng@loongson.cn>
+:Original: Documentation/iio/iio_configfs.rst
 
-.. _cn_iio_configfs:
+:翻译:
+
+ 司延腾 Yanteng Si <siyanteng@loongson.cn>
 
+.. _cn_iio_configfs:
 
 =====================
 工业 IIO configfs支持
diff --git a/Documentation/translations/zh_CN/iio/index.rst b/Documentation/translations/zh_CN/iio/index.rst
index 7087076a10f6..32d69047b16a 100644
--- a/Documentation/translations/zh_CN/iio/index.rst
+++ b/Documentation/translations/zh_CN/iio/index.rst
@@ -2,11 +2,13 @@
 
 .. include:: ../disclaimer-zh_CN.rst
 
-:Original: :doc:`../../../iio/index`
-:Translator: Yanteng Si <siyanteng@loongson.cn>
+:Original: Documentation/iio/index.rst
 
-.. _cn_iio_index:
+:翻译:
+
+ 司延腾 Yanteng Si <siyanteng@loongson.cn>
 
+.. _cn_iio_index:
 
 ========
 工业 I/O
diff --git a/Documentation/translations/zh_CN/index.rst b/Documentation/translations/zh_CN/index.rst
index 1f953d3439a5..a34e58733ac8 100644
--- a/Documentation/translations/zh_CN/index.rst
+++ b/Documentation/translations/zh_CN/index.rst
@@ -5,6 +5,7 @@
 	\renewcommand\thesection*
 	\renewcommand\thesubsection*
 	\kerneldocCJKon
+	\kerneldocBeginSC
 
 .. _linux_doc_zh:
 
@@ -17,6 +18,11 @@
    **翻译计划:**
    内核中文文档欢迎任何翻译投稿，特别是关于内核用户和管理员指南部分。
 
+这是中文内核文档树的顶级目录。内核文档，就像内核本身一样，在很大程度上是一
+项正在进行的工作；当我们努力将许多分散的文件整合成一个连贯的整体时尤其如此。
+另外，随时欢迎您对内核文档进行改进；如果您想提供帮助，请加入vger.kernel.org
+上的linux-doc邮件列表。
+
 许可证文档
 ----------
 
@@ -97,12 +103,14 @@ TODOList:
    iio/index
    sound/index
    filesystems/index
+   virt/index
+   infiniband/index
+   accounting/index
 
 TODOList:
 
 * driver-api/index
 * locking/index
-* accounting/index
 * block/index
 * cdrom/index
 * ide/index
@@ -111,7 +119,6 @@ TODOList:
 * hid/index
 * i2c/index
 * isdn/index
-* infiniband/index
 * leds/index
 * netlabel/index
 * networking/index
@@ -122,7 +129,6 @@ TODOList:
 * spi/index
 * w1/index
 * watchdog/index
-* virt/index
 * input/index
 * hwmon/index
 * gpu/index
@@ -184,3 +190,7 @@ TODOList:
 ----------
 
 * :ref:`genindex`
+
+.. raw:: latex
+
+	\kerneldocEndSC
diff --git a/Documentation/translations/zh_CN/infiniband/core_locking.rst b/Documentation/translations/zh_CN/infiniband/core_locking.rst
new file mode 100644
index 000000000000..42f08038d44b
--- /dev/null
+++ b/Documentation/translations/zh_CN/infiniband/core_locking.rst
@@ -0,0 +1,115 @@
+
+.. include:: ../disclaimer-zh_CN.rst
+
+:Original: Documentation/infiniband/core_locking.rst
+
+:翻译:
+
+ 司延腾 Yanteng Si <siyanteng@loongson.cn>
+
+:校译:
+
+ 王普宇 Puyu Wang <realpuyuwang@gmail.com>
+ 时奎亮 Alex Shi <alexs@kernel.org>
+
+.. _cn_infiniband_core_locking:
+
+==================
+infiniband中间层锁
+==================
+
+  本指南试图明确infiniband中间层的锁假设。它描述了对位于中间层以下的低
+  级驱动程序和使用中间层的上层协议的要求。
+
+睡眠和中断环境
+==============
+
+  除了以下异常情况，ib_device结构体中所有方法的低级驱动实现都可以睡眠。
+  这些异常情况是列表中的任意的方法:
+
+    - create_ah
+    - modify_ah
+    - query_ah
+    - destroy_ah
+    - post_send
+    - post_recv
+    - poll_cq
+    - req_notify_cq
+
+    他们可能不可以睡眠，而且必须可以从任何上下文中调用。
+
+    向上层协议使用者输出的相应函数:
+
+    - rdma_create_ah
+    - rdma_modify_ah
+    - rdma_query_ah
+    - rdma_destroy_ah
+    - ib_post_send
+    - ib_post_recv
+    - ib_req_notify_cq
+
+    因此，在任何情况下都可以安全调用（它们）。
+
+  此外，该函数
+
+    - ib_dispatch_event
+
+  被底层驱动用来通过中间层调度异步事件的“A”，也可以从任何上下文中安全调
+  用。
+
+可重入性
+--------
+
+  由低级驱动程序导出的ib_device结构体中的所有方法必须是完全可重入的。
+  即使使用同一对象的多个函数调用被同时运行，低级驱动程序也需要执行所有
+  必要的同步以保持一致性。
+
+  IB中间层不执行任何函数调用的序列化。
+
+  因为低级驱动程序是可重入的，所以不要求上层协议使用者任何顺序执行。然
+  而，为了得到合理的结果，可能需要一些顺序。例如，一个使用者可以在多个
+  CPU上同时安全地调用ib_poll_cq()。然而，不同的ib_poll_cq()调用之间
+  的工作完成信息的顺序没有被定义。
+
+回调
+----
+
+  低级驱动程序不得直接从与ib_device方法调用相同的调用链中执行回调。例
+  如，低级驱动程序不允许从post_send方法直接调用使用者的完成事件处理程
+  序。相反，低级驱动程序应该推迟这个回调，例如，调度一个tasklet来执行
+  这个回调。
+
+  低层驱动负责确保同一CQ的多个完成事件处理程序不被同时调用。驱动程序必
+  须保证一个给定的CQ的事件处理程序在同一时间只有一个在运行。换句话说，
+  以下情况是不允许的::
+
+          CPU1                                    CPU2
+
+    low-level driver ->
+      consumer CQ event callback:
+        /* ... */
+        ib_req_notify_cq(cq, ...);
+                                          low-level driver ->
+        /* ... */                           consumer CQ event callback:
+                                              /* ... */
+        return from CQ event handler
+
+  完成事件和异步事件回调的运行环境没有被定义。 根据低级别的驱动程序，它可能是
+  进程上下文、softirq上下文或中断上下文。上层协议使用者可能不会在回调中睡眠。
+
+热插拔
+------
+
+  当一个低级驱动程序调用ib_register_device()时，它宣布一个设备已经
+  准备好供使用者使用，所有的初始化必须在这个调用之前完成。设备必须保
+  持可用，直到驱动对ib_unregister_device()的调用返回。
+
+  低级驱动程序必须从进程上下文调用ib_register_device()和
+  ib_unregister_device()。如果使用者在这些调用中回调到驱动程序，它
+  不能持有任何可能导致死锁的semaphores。
+
+  一旦其结构体ib_client的add方法被调用，上层协议使用者就可以开始使用
+  一个IB设备。使用者必须在从移除方法返回之前完成所有的清理工作并释放
+  与设备相关的所有资源。
+
+  使用者被允许在其添加和删除方法中睡眠。
diff --git a/Documentation/translations/zh_CN/infiniband/index.rst b/Documentation/translations/zh_CN/infiniband/index.rst
new file mode 100644
index 000000000000..5634cc48379f
--- /dev/null
+++ b/Documentation/translations/zh_CN/infiniband/index.rst
@@ -0,0 +1,40 @@
+.. SPDX-License-Identifier: GPL-2.0
+
+.. include:: ../disclaimer-zh_CN.rst
+
+:Original: Documentation/infiniband/index.rst
+
+:翻译:
+
+ 司延腾 Yanteng Si <siyanteng@loongson.cn>
+
+:校译:
+
+ 王普宇 Puyu Wang <realpuyuwang@gmail.com>
+ 时奎亮 Alex Shi <alexs@kernel.org>
+
+.. _cn_infiniband_index:
+
+==========
+infiniband
+==========
+
+.. toctree::
+   :maxdepth: 1
+
+   core_locking
+   ipoib
+   opa_vnic
+   sysfs
+   tag_matching
+   user_mad
+   user_verbs
+
+
+
+.. only::  subproject and html
+
+   Indices
+   =======
+
+   * :ref:`genindex`
diff --git a/Documentation/translations/zh_CN/infiniband/ipoib.rst b/Documentation/translations/zh_CN/infiniband/ipoib.rst
new file mode 100644
index 000000000000..56517ea5fe9d
--- /dev/null
+++ b/Documentation/translations/zh_CN/infiniband/ipoib.rst
@@ -0,0 +1,111 @@
+.. include:: ../disclaimer-zh_CN.rst
+
+:Original: Documentation/infiniband/ipoib.rst
+
+:翻译:
+
+ 司延腾 Yanteng Si <siyanteng@loongson.cn>
+
+:校译:
+
+ 王普宇 Puyu Wang <realpuyuwang@gmail.com>
+ 时奎亮 Alex Shi <alexs@kernel.org>
+
+.. _cn_infiniband_ipoib:
+
+=========================
+infiniband上的IP（IPoIB）
+=========================
+
+  ib_ipoib驱动是IETF ipoib工作组发布的RFC 4391和4392所规定的
+  infiniband上IP协议的一个实现。它是一个“本地”实现，即把接口类型设置为
+  ARPHRD_INFINIBAND，硬件地址长度为20（早期的专有实现向内核伪装为以太网
+  接口）。
+
+分区和P_Keys
+============
+
+  当IPoIB驱动被加载时，它会使用索引为0的P_Key给每个端口创建一个接口。要用
+  不同的P_Key创建一个接口，将所需的P_Key写入主接口的
+  /sys/class/net/<intf name>/create_child文件里面。比如说::
+
+    echo 0x8001 > /sys/class/net/ib0/create_child
+
+  这将用P_Key 0x8001创建一个名为ib0.8001的接口。要删除一个子接口，使用
+  ``delete_child`` 文件::
+
+    echo 0x8001 > /sys/class/net/ib0/delete_child
+
+  任何接口的P_Key都由“pkey”文件给出，而子接口的主接口在“parent”中。
+
+  子接口的创建/删除也可以使用IPoIB的rtnl_link_ops来完成，使用两种
+  方式创建的子接口的行为是一样的。
+
+数据报与连接模式
+================
+
+  IPoIB驱动支持两种操作模式：数据报和连接。模式是通过接口的
+  /sys/class/net/<intf name>/mode文件设置和读取的。
+
+  在数据报模式下，使用IB UD（不可靠数据报）传输，因此接口MTU等于IB L2 MTU
+  减去IPoIB封装头（4字节）。例如，在一个典型的具有2K MTU的IB结构中，IPoIB
+  MTU将是2048 - 4 = 2044字节。
+
+  在连接模式下，使用IB RC（可靠的连接）传输。连接模式利用IB传输的连接特性，
+  允许MTU达到最大的IP包大小64K，这减少了处理大型UDP数据包、TCP段等所需的
+  IP包数量，提高了大型信息的性能。
+
+  在连接模式下，接口的UD QP仍被用于组播和与不支持连接模式的对等体的通信。
+  在这种情况下，ICMP PMTU数据包的RX仿真被用来使网络堆栈对这些邻居使用较
+  小的UD MTU。
+
+无状态卸载
+==========
+
+  如果IB HW支持IPoIB无状态卸载，IPoIB会向网络堆栈广播TCP/IP校验和/或大量
+  传送（LSO）负载转移能力。
+
+  大量传送（LSO）负载转移也已实现，可以使用ethtool调用打开/关闭。目前，LRO
+  只支持具有校验和卸载能力的设备。
+
+  无状态卸载只在数据报模式下支持。
+
+中断管理
+========
+
+  如果底层IB设备支持CQ事件管理，可以使用ethtool来设置中断缓解参数，从而减少
+  处理中断产生的开销。IPoIB的主要代码路径不使用TX完成信号的事件，所以只支持
+  RX管理。
+
+调试信息
+========
+
+  通过将CONFIG_INFINIBAND_IPOIB_DEBUG设置为“y”来编译IPoIB驱动，跟踪信
+  息被编译到驱动中。通过将模块参数debug_level和mcast_debug_level设置为1来
+  打开它们。这些参数可以在运行时通过/sys/module/ib_ipoib/的文件来控制。
+
+  CONFIG_INFINIBAND_IPOIB_DEBUG也启用debugfs虚拟文件系统中的文件。通过挂
+  载这个文件系统，例如用::
+
+    mount -t debugfs none /sys/kernel/debug
+
+  可以从/sys/kernel/debug/ipoib/ib0_mcg等文件中获得关于多播组的统计数据。
+
+  这个选项对性能的影响可以忽略不计，所以在正常运行时，在debug_level设置为
+  0的情况下启用这个选项是安全的。
+
+  CONFIG_INFINIBAND_IPOIB_DEBUG_DATA当data_debug_level设置为1时，可以
+  在数据路径中启用更多的调试输出。 然而，即使禁用输出，启用这个配置选项也
+  会影响性能，因为它在快速路径中增加了测试。
+
+引用
+====
+
+  在InfiniBand上传输IP（IPoIB）（RFC 4391）。
+    http://ietf.org/rfc/rfc4391.txt
+
+  infiniband上的IP:上的IP架构（RFC 4392）。
+    http://ietf.org/rfc/rfc4392.txt
+
+  infiniband上的IP: 连接模式 (RFC 4755)
+    http://ietf.org/rfc/rfc4755.txt
diff --git a/Documentation/translations/zh_CN/infiniband/opa_vnic.rst b/Documentation/translations/zh_CN/infiniband/opa_vnic.rst
new file mode 100644
index 000000000000..12b147fbf792
--- /dev/null
+++ b/Documentation/translations/zh_CN/infiniband/opa_vnic.rst
@@ -0,0 +1,156 @@
+.. include:: ../disclaimer-zh_CN.rst
+
+:Original: Documentation/infiniband/opa_vnic.rst
+
+:翻译:
+
+ 司延腾 Yanteng Si <siyanteng@loongson.cn>
+
+:校译:
+
+ 王普宇 Puyu Wang <realpuyuwang@gmail.com>
+ 时奎亮 Alex Shi <alexs@kernel.org>
+
+.. _cn_infiniband_opa_vnic:
+
+=============================================
+英特尔全路径（OPA）虚拟网络接口控制器（VNIC）
+=============================================
+
+英特尔全路径（OPA）虚拟网络接口控制器（VNIC）功能通过封装HFI节点之间的以
+太网数据包，支持Omni-Path结构上的以太网功能。
+
+体系结构
+========
+
+Omni-Path封装的以太网数据包的交换模式涉及Omni-Path结构拓扑上覆盖的一个或
+多个虚拟以太网交换机。Omni-Path结构上的HFI节点的一个子集被允许在特定的虚
+拟以太网交换机上交换封装的以太网数据包。虚拟以太网交换机是通过配置结构上的
+HFI节点实现的逻辑抽象，用于生成和处理报头。在最简单的配置中，整个结构的所有
+HFI节点通过一个虚拟以太网交换机交换封装的以太网数据包。一个虚拟以太网交换机，
+实际上是一个独立的以太网网络。该配置由以太网管理器（EM）执行，它是可信的结
+构管理器（FM）应用程序的一部分。HFI节点可以有多个VNIC，每个连接到不同的虚
+拟以太网交换机。下图介绍了两个虚拟以太网交换机与两个HFI节点的情况::
+
+                               +-------------------+
+                               |      子网/        |
+                               |     以太网        |
+                               |      管理         |
+                               +-------------------+
+                                  /          /
+                                /           /
+                              /            /
+                            /             /
+  +-----------------------------+  +------------------------------+
+  |     虚拟以太网切换          |  |      虚拟以太网切换          |
+  |  +---------+    +---------+ |  | +---------+    +---------+   |
+  |  | VPORT   |    |  VPORT  | |  | |  VPORT  |    |  VPORT  |   |
+  +--+---------+----+---------+-+  +-+---------+----+---------+---+
+           |                 \        /                 |
+           |                   \    /                   |
+           |                     \/                     |
+           |                    /  \                    |
+           |                  /      \                  |
+       +-----------+------------+  +-----------+------------+
+       |   VNIC    |    VNIC    |  |    VNIC   |    VNIC    |
+       +-----------+------------+  +-----------+------------+
+       |          HFI           |  |          HFI           |
+       +------------------------+  +------------------------+
+
+
+Omni-Path封装的以太网数据包格式如下所述。
+
+==================== ================================
+位                   域
+==================== ================================
+Quad Word 0:
+0-19                 SLID (低20位)
+20-30                长度 (以四字为单位)
+31                   BECN 位
+32-51                DLID (低20位)
+52-56                SC (服务级别)
+57-59                RC (路由控制)
+60                   FECN 位
+61-62                L2 (=10, 16B 格式)
+63                   LT (=1, 链路传输头 Flit)
+
+Quad Word 1:
+0-7                  L4 type (=0x78 ETHERNET)
+8-11                 SLID[23:20]
+12-15                DLID[23:20]
+16-31                PKEY
+32-47                熵
+48-63                保留
+
+Quad Word 2:
+0-15                 保留
+16-31                L4 头
+32-63                以太网数据包
+
+Quad Words 3 to N-1:
+0-63                 以太网数据包 (pad拓展)
+
+Quad Word N (last):
+0-23                 以太网数据包 (pad拓展)
+24-55                ICRC
+56-61                尾
+62-63                LT (=01, 链路传输尾 Flit)
+==================== ================================
+
+以太网数据包在传输端被填充，以确保VNIC OPA数据包是四字对齐的。“尾”字段
+包含填充的字节数。在接收端，“尾”字段被读取，在将数据包向上传递到网络堆
+栈之前，填充物被移除（与ICRC、尾和OPA头一起）。
+
+L4头字段包含VNIC端口所属的虚拟以太网交换机ID。在接收端，该字段用于将收
+到的VNIC数据包去多路复用到不同的VNIC端口。
+
+驱动设计
+========
+
+英特尔OPA VNIC的软件设计如下图所示。OPA VNIC功能有一个依赖于硬件的部分
+和一个独立于硬件的部分。
+
+对IB设备分配和释放RDMA netdev设备的支持已经被加入。RDMA netdev支持与
+网络堆栈的对接，从而创建标准的网络接口。OPA_VNIC是一个RDMA netdev设备
+类型。
+
+依赖于HW的VNIC功能是HFI1驱动的一部分。它实现了分配和释放OPA_VNIC RDMA
+netdev的动作。它涉及VNIC功能的HW资源分配/管理。它与网络堆栈接口并实现所
+需的net_device_ops功能。它在传输路径中期待Omni-Path封装的以太网数据包，
+并提供对它们的HW访问。在将数据包向上传递到网络堆栈之前，它把Omni-Path头
+从接收的数据包中剥离。它还实现了RDMA netdev控制操作。
+
+OPA VNIC模块实现了独立于硬件的VNIC功能。它由两部分组成。VNIC以太网管理
+代理（VEMA）作为一个IB客户端向IB核心注册，并与IB MAD栈接口。它与以太网
+管理器（EM）和VNIC netdev交换管理信息。VNIC netdev部分分配和释放OPA_VNIC
+RDMA netdev设备。它在需要时覆盖由依赖HW的VNIC驱动设置的net_device_ops函数，
+以适应任何控制操作。它还处理以太网数据包的封装，在传输路径中使用Omni-Path头。
+对于每个VNIC接口，封装所需的信息是由EM通过VEMA MAD接口配置的。它还通过调用
+RDMA netdev控制操作将任何控制信息传递给依赖于HW的驱动程序::
+
+        +-------------------+ +----------------------+
+        |                   | |       Linux          |
+        |     IB MAD        | |       网络           |
+        |                   | |       栈             |
+        +-------------------+ +----------------------+
+                 |               |          |
+                 |               |          |
+        +----------------------------+      |
+        |                            |      |
+        |      OPA VNIC 模块         |      |
+        |  (OPA VNIC RDMA Netdev     |      |
+        |     & EMA 函数)            |      |
+        |                            |      |
+        +----------------------------+      |
+                    |                       |
+                    |                       |
+           +------------------+             |
+           |     IB 核心      |             |
+           +------------------+             |
+                    |                       |
+                    |                       |
+        +--------------------------------------------+
+        |                                            |
+        |      HFI1 驱动和 VNIC 支持                 |
+        |                                            |
+        +--------------------------------------------+
diff --git a/Documentation/translations/zh_CN/infiniband/sysfs.rst b/Documentation/translations/zh_CN/infiniband/sysfs.rst
new file mode 100644
index 000000000000..e9a48b0b2ba6
--- /dev/null
+++ b/Documentation/translations/zh_CN/infiniband/sysfs.rst
@@ -0,0 +1,21 @@
+.. include:: ../disclaimer-zh_CN.rst
+
+:Original: Documentation/infiniband/sysfs.rst
+
+:翻译:
+
+ 司延腾 Yanteng Si <siyanteng@loongson.cn>
+
+:校译:
+
+ 王普宇 Puyu Wang <realpuyuwang@gmail.com>
+ 时奎亮 Alex Shi <alexs@kernel.org>
+
+.. _cn_infiniband_sysfs:
+
+=========
+Sysfs文件
+=========
+
+sysfs接口已移至
+Documentation/ABI/stable/sysfs-class-infiniband.
diff --git a/Documentation/translations/zh_CN/infiniband/tag_matching.rst b/Documentation/translations/zh_CN/infiniband/tag_matching.rst
new file mode 100644
index 000000000000..19b99587b862
--- /dev/null
+++ b/Documentation/translations/zh_CN/infiniband/tag_matching.rst
@@ -0,0 +1,63 @@
+.. include:: ../disclaimer-zh_CN.rst
+
+:Original: Documentation/infiniband/tag_matching.rst
+
+:翻译:
+
+ 司延腾 Yanteng Si <siyanteng@loongson.cn>
+
+:校译:
+
+ 王普宇 Puyu Wang <realpuyuwang@gmail.com>
+ 时奎亮 Alex Shi <alexs@kernel.org>
+
+.. _cn_infiniband_tag_matching:
+
+============
+标签匹配逻辑
+============
+
+MPI标准定义了一套规则，称为标签匹配，用于将源发送操作与目的接收匹配。以下参数必
+须与以下源和目的参数相匹配:
+
+*	沟通者
+*	用户标签--通配符(wild card)可由接收方指定
+*	来源等级--通配符可由接收方指定
+*	目的地等级 – wild
+
+排序规则要求，当一对以上的发送和接收消息信封可能匹配时，包括最早发布-发送和最早
+发布-接收的一对是必须用来满足匹配操作的一对。然而，这并不意味着标签是按照它们被
+创建的顺序消耗的，例如，如果早期的标签不能用来满足匹配规则，那么后来生成的标签
+可能被消耗。
+
+当消息从发送方发送到接收方时，通信库可能试图在相应的匹配接收被发布之后或之前处
+理该操作。如果匹配的接收被发布，这就是一个预期的消息，否则就被称为一个意外的消
+息。实现时经常为这两种不同的匹配实例使用不同的匹配方案。
+
+为了减少MPI库的内存占用，MPI实现通常使用两种不同的协议来实现这一目的:
+
+1.	Eager协议--当发送方处理完发送时，完整的信息就会被发送。在send_cq中会收到
+一个完成发送的通知，通知缓冲区可以被重新使用。
+
+2.	Rendezvous协议--发送方在第一次通知接收方时发送标签匹配头，也许还有一部分
+数据。当相应的缓冲区被发布时，响应者将使用头中的信息，直接向匹配的缓冲区发起
+RDMA读取操作。为了使缓冲区得到重用，需要收到一个fin消息。
+
+标签匹配的实现
+==============
+
+使用的匹配对象有两种类型，即发布的接收列表和意外消息列表。应用程序通过调用发布
+的接收列表中的MPI接收例程发布接收缓冲区，并使用MPI发送例程发布发送消息。发布的
+接收列表的头部可以由硬件来维护，而软件则要对这个列表进行跟踪。
+
+当发送开始并到达接收端时，如果没有为这个到达的消息预先发布接收，它将被传递给软
+件并被放在意外（unexpect）消息列表中。否则，将对该匹配进行处理，包括交会处理，
+如果合适的话，将数据传送到指定的接收缓冲区。这允许接收方MPI标签匹配与计算重叠。
+
+当一个接收信息被发布时，通信库将首先检查软件的意外信息列表，以寻找一个匹配的接
+收信息。如果找到一个匹配的，数据就会被送到用户缓冲区，使用一个软件控制的协议。
+UCX的实现根据数据大小，使用急切或交会协议。如果没有找到匹配，整个预置的接收列
+表由硬件维护，并且有空间在这个列表中增加一个预置的接收，这个接收被传递给硬件。
+软件要对这个列表进行跟踪，以帮助处理MPI取消操作。此外，由于硬件和软件在标签匹
+配操作方面预计不会紧密同步，这个影子列表被用来检测预先发布的接收被传递到硬件的
+情况，因为匹配的意外消息正在从硬件传递到软件。
diff --git a/Documentation/translations/zh_CN/infiniband/user_mad.rst b/Documentation/translations/zh_CN/infiniband/user_mad.rst
new file mode 100644
index 000000000000..d9ab2edfb559
--- /dev/null
+++ b/Documentation/translations/zh_CN/infiniband/user_mad.rst
@@ -0,0 +1,164 @@
+.. include:: ../disclaimer-zh_CN.rst
+
+:Original: Documentation/infiniband/user_mad.rst
+
+:翻译:
+
+ 司延腾 Yanteng Si <siyanteng@loongson.cn>
+
+:校译:
+
+ 王普宇 Puyu Wang <realpuyuwang@gmail.com>
+ 时奎亮 Alex Shi <alexs@kernel.org>
+
+.. _cn_infiniband_user_mad:
+
+===============
+用户空间MAD访问
+===============
+
+设备文件
+========
+
+  每个InfiniBand设备的每个端口都有一个“umad”设备和一个“issm”设备连接。
+  例如，一个双端口的HCA将有两个umad设备和两个issm设备，而一个交换机将
+  有每个类型的一个设备（对于交换机端口0）。
+
+创建MAD代理
+===========
+
+  一个MAD代理可以通过填写一个结构体ib_user_mad_reg_req来创建，然后在
+  适当的设备文件的文件描述符上调用IB_USER_MAD_REGISTER_AGENT ioctl。
+  如果注册请求成功，结构体中会返回一个32位的ID。比如说::
+
+	struct ib_user_mad_reg_req req = { /* ... */ };
+	ret = ioctl(fd, IB_USER_MAD_REGISTER_AGENT, (char *) &req);
+        if (!ret)
+		my_agent = req.id;
+	else
+		perror("agent register");
+
+  代理可以通过IB_USER_MAD_UNREGISTER_AGENT ioctl取消注册。另外，所有
+  通过文件描述符注册的代理在描述符关闭时将被取消注册。
+
+  2014
+       现在提供了一个新的注册IOctl，允许在注册时提供额外的字段。这个注册
+       调用的用户隐含了对pkey_index的使用（见下文）。现在提供了一个新的
+       注册IOctl，允许在注册时提供额外的字段。这个注册调用的用户隐含了对
+       pkey_index的使用（见下文）。
+
+接收MADs
+========
+
+  使用read()接收MAD。现在接收端支持RMPP。传给read()的缓冲区必须至少是
+  一个struct ib_user_mad + 256字节。比如说:
+
+  如果传递的缓冲区不足以容纳收到的MAD（RMPP），errno被设置为ENOSPC，需
+  要的缓冲区长度被设置在mad.length中。
+
+  正常MAD(非RMPP)的读取示例::
+
+	struct ib_user_mad *mad;
+	mad = malloc(sizeof *mad + 256);
+	ret = read(fd, mad, sizeof *mad + 256);
+	if (ret != sizeof mad + 256) {
+		perror("read");
+		free(mad);
+	}
+
+  RMPP读取示例::
+
+	struct ib_user_mad *mad;
+	mad = malloc(sizeof *mad + 256);
+	ret = read(fd, mad, sizeof *mad + 256);
+	if (ret == -ENOSPC)) {
+		length = mad.length;
+		free(mad);
+		mad = malloc(sizeof *mad + length);
+		ret = read(fd, mad, sizeof *mad + length);
+	}
+	if (ret < 0) {
+		perror("read");
+		free(mad);
+	}
+
+  除了实际的MAD内容外，其他结构体ib_user_mad字段将被填入收到的MAD的信
+  息。例如，远程LID将在mad.lid中。
+
+  如果发送超时，将产生一个接收，mad.status设置为ETIMEDOUT。否则，当一个
+  MAD被成功接收后，mad.status将是0。
+
+  poll()/select()可以用来等待一个MAD可以被读取。
+
+  poll()/select()可以用来等待，直到可以读取一个MAD。
+
+发送MADs
+========
+
+  MADs是用write()发送的。发送的代理ID应该填入MAD的id字段，目的地LID应该
+  填入lid字段，以此类推。发送端确实支持RMPP，所以可以发送任意长度的MAD。
+  比如说::
+
+	struct ib_user_mad *mad;
+
+	mad = malloc(sizeof *mad + mad_length);
+
+	/* fill in mad->data */
+
+	mad->hdr.id  = my_agent;	/* req.id from agent registration */
+	mad->hdr.lid = my_dest;		/* in network byte order... */
+	/* etc. */
+
+	ret = write(fd, &mad, sizeof *mad + mad_length);
+	if (ret != sizeof *mad + mad_length)
+		perror("write");
+
+交换IDs
+=======
+
+  umad设备的用户可以在发送的MAD中使用交换ID字段的低32位（也就是网络字节顺序中
+  最小有效的一半字段）来匹配请求/响应对。上面的32位是保留给内核使用的，在发送
+  MAD之前会被改写。
+
+P_Key索引处理
+=============
+
+  旧的ib_umad接口不允许为发送的MAD设置P_Key索引，也没有提供获取接收的MAD的
+  P_Key索引的方法。一个带有pkey_index成员的struct ib_user_mad_hdr的新布局已
+  经被定义；然而，为了保持与旧的应用程序的二进制兼容性，除非在文件描述符被用于
+  其他用途之前调用IB_USER_MAD_ENABLE_PKEY或IB_USER_MAD_REGISTER_AGENT2 ioctl
+  之一，否则不会使用这种新布局。
+
+  在2008年9月，IB_USER_MAD_ABI_VERSION将被增加到6，默认使用新的ib_user_mad_hdr
+  结构布局，并且IB_USER_MAD_ENABLE_PKEY ioctl将被删除。
+
+设置IsSM功能位
+==============
+
+  要为一个端口设置IsSM功能位，只需打开相应的issm设备文件。如果IsSM位已经被设置，那
+  么打开调用将阻塞，直到该位被清除（或者如果O_NONBLOCK标志被传递给open()，则立即返
+  回，errno设置为EAGAIN）。当issm文件被关闭时，IsSM位将被清除。在issm文件上不能进
+  行任何读、写或其他操作。
+
+/dev文件
+========
+
+为了用 udev自动创建相应的字符设备文件，一个类似::
+
+    KERNEL=="umad*", NAME="infiniband/%k"
+    KERNEL=="issm*", NAME="infiniband/%k"
+
+  的规则可以被使用。它将创建节点的名字::
+
+    /dev/infiniband/umad0
+    /dev/infiniband/issm0
+
+  为第一个端口，以此类推。与这些设备相关的infiniband设备和端口可以从以下文件中确定::
+
+    /sys/class/infiniband_mad/umad0/ibdev
+    /sys/class/infiniband_mad/umad0/port
+
+  和::
+
+    /sys/class/infiniband_mad/issm0/ibdev
+    /sys/class/infiniband_mad/issm0/port
diff --git a/Documentation/translations/zh_CN/infiniband/user_verbs.rst b/Documentation/translations/zh_CN/infiniband/user_verbs.rst
new file mode 100644
index 000000000000..970bc1a4e396
--- /dev/null
+++ b/Documentation/translations/zh_CN/infiniband/user_verbs.rst
@@ -0,0 +1,72 @@
+.. include:: ../disclaimer-zh_CN.rst
+
+:Original: Documentation/infiniband/user_verbs.rst
+
+:翻译:
+
+ 司延腾 Yanteng Si <siyanteng@loongson.cn>
+
+:校译:
+
+ 王普宇 Puyu Wang <realpuyuwang@gmail.com>
+ 时奎亮 Alex Shi <alexs@kernel.org>
+
+.. _cn_infiniband_user_verbs:
+
+=================
+用户空间verbs访问
+=================
+
+  ib_uverbs模块，通过启用CONFIG_INFINIBAND_USER_VERBS构建，使用户空间
+  通过“verbs”直接访问IB硬件，如InfiniBand架构规范第11章所述。
+
+  要使用verbs，需要libibverbs库，可从https://github.com/linux-rdma/rdma-core。
+  libibverbs包含一个独立于设备的API，用于使用ib_uverbs接口。libibverbs
+  还需要为你的InfiniBand硬件提供适当的独立于设备的内核和用户空间驱动。例如，
+  要使用Mellanox HCA，你需要安装ib_mthca内核模块和libmthca用户空间驱动。
+
+用户-内核通信
+=============
+
+  用户空间通过/dev/infiniband/uverbsN字符设备与内核进行慢速路径、资源管理
+  操作的通信。快速路径操作通常是通过直接写入硬件寄存器mmap()到用户空间来完成
+  的，没有系统调用或上下文切换到内核。
+
+  命令是通过在这些设备文件上的write()s发送给内核的。ABI在
+  drivers/infiniband/include/ib_user_verbs.h中定义。需要内核响应的命令的结
+  构包含一个64位字段，用来传递一个指向输出缓冲区的指针。状态作为write()系统调
+  用的返回值被返回到用户空间。
+
+资源管理
+========
+
+  由于所有IB资源的创建和销毁都是通过文件描述符传递的命令完成的，所以内核可以跟
+  踪那些被附加到给定用户空间上下文的资源。ib_uverbs模块维护着idr表，用来在
+  内核指针和不透明的用户空间句柄之间进行转换，这样内核指针就不会暴露给用户空间，
+  而用户空间也无法欺骗内核去跟踪一个假的指针。
+
+  这也允许内核在一个进程退出时进行清理，并防止一个进程触及另一个进程的资源。
+
+内存固定
+========
+
+  直接的用户空间I/O要求与作为潜在I/O目标的内存区域保持在同一物理地址上。ib_uverbs
+  模块通过get_user_pages()和put_page()调用来管理内存区域的固定和解除固定。它还核
+  算进程的pinned_vm中被固定的内存量，并检查非特权进程是否超过其RLIMIT_MEMLOCK限制。
+
+  被多次固定的页面在每次被固定时都会被计数，所以pinned_vm的值可能会高估一个进程所
+  固定的页面数量。
+
+/dev文件
+========
+
+  要想用udev自动创建适当的字符设备文件，可以采用如下规则::
+
+    KERNEL=="uverbs*", NAME="infiniband/%k"
+
+  可以使用。 这将创建设备节点，名为::
+
+    /dev/infiniband/uverbs0
+
+  等等。由于InfiniBand的用户空间verbs对于非特权进程来说应该是安全的，因此在udev规
+  则中加入适当的MODE或GROUP可能是有用的。
diff --git a/Documentation/translations/zh_CN/kernel-hacking/hacking.rst b/Documentation/translations/zh_CN/kernel-hacking/hacking.rst
index ab974faddecf..f2bc154c5bcc 100644
--- a/Documentation/translations/zh_CN/kernel-hacking/hacking.rst
+++ b/Documentation/translations/zh_CN/kernel-hacking/hacking.rst
@@ -68,7 +68,7 @@
 它将被排队（或丢弃）。因为它会关闭中断，所以处理程序必须很快：通常它只是
 确认中断，标记一个“软件中断”以执行并退出。
 
-您可以通过 :c:func:`in_irq()` 返回真来判断您处于硬件中断状态。
+您可以通过 in_hardirq() 返回真来判断您处于硬件中断状态。
 
 .. warning::
 
diff --git a/Documentation/translations/zh_CN/mips/booting.rst b/Documentation/translations/zh_CN/mips/booting.rst
index 96453e1b962e..e0bbd3f20862 100644
--- a/Documentation/translations/zh_CN/mips/booting.rst
+++ b/Documentation/translations/zh_CN/mips/booting.rst
@@ -2,8 +2,11 @@
 
 .. include:: ../disclaimer-zh_CN.rst
 
-:Original: :doc:`../../../mips/booting`
-:Translator: Yanteng Si <siyanteng@loongson.cn>
+:Original: Documentation/mips/booting.rst
+
+:翻译:
+
+ 司延腾 Yanteng Si <siyanteng@loongson.cn>
 
 .. _cn_booting:
 
diff --git a/Documentation/translations/zh_CN/mips/features.rst b/Documentation/translations/zh_CN/mips/features.rst
index 93d93d06b1b3..b61dab06ceaf 100644
--- a/Documentation/translations/zh_CN/mips/features.rst
+++ b/Documentation/translations/zh_CN/mips/features.rst
@@ -2,8 +2,11 @@
 
 .. include:: ../disclaimer-zh_CN.rst
 
-:Original: :doc:`../../../mips/features`
-:Translator: Yanteng Si <siyanteng@loongson.cn>
+:Original: Documentation/mips/features.rst
+
+:翻译:
+
+ 司延腾 Yanteng Si <siyanteng@loongson.cn>
 
 .. _cn_features:
 
diff --git a/Documentation/translations/zh_CN/mips/index.rst b/Documentation/translations/zh_CN/mips/index.rst
index b85033f9d67c..192c6adbb72e 100644
--- a/Documentation/translations/zh_CN/mips/index.rst
+++ b/Documentation/translations/zh_CN/mips/index.rst
@@ -2,8 +2,11 @@
 
 .. include:: ../disclaimer-zh_CN.rst
 
-:Original: :doc:`../../../mips/index`
-:Translator: Yanteng Si <siyanteng@loongson.cn>
+:Original: Documentation/mips/index.rst
+
+:翻译:
+
+ 司延腾 Yanteng Si <siyanteng@loongson.cn>
 
 ===========================
 MIPS特性文档
diff --git a/Documentation/translations/zh_CN/mips/ingenic-tcu.rst b/Documentation/translations/zh_CN/mips/ingenic-tcu.rst
index f04ba407384a..ddbe149c517b 100644
--- a/Documentation/translations/zh_CN/mips/ingenic-tcu.rst
+++ b/Documentation/translations/zh_CN/mips/ingenic-tcu.rst
@@ -2,8 +2,11 @@
 
 .. include:: ../disclaimer-zh_CN.rst
 
-:Original: :doc:`../../../mips/ingenic-tcu`
-:Translator: Yanteng Si <siyanteng@loongson.cn>
+:Original: Documentation/mips/ingenic-tcu.rst
+
+:翻译:
+
+ 司延腾 Yanteng Si <siyanteng@loongson.cn>
 
 .. _cn_ingenic-tcu:
 
diff --git a/Documentation/translations/zh_CN/openrisc/index.rst b/Documentation/translations/zh_CN/openrisc/index.rst
index d722642796c8..9ad6cc600884 100644
--- a/Documentation/translations/zh_CN/openrisc/index.rst
+++ b/Documentation/translations/zh_CN/openrisc/index.rst
@@ -2,11 +2,13 @@
 
 .. include:: ../disclaimer-zh_CN.rst
 
-:Original: :doc:`../../../openrisc/index`
-:Translator: Yanteng Si <siyanteng@loongson.cn>
+:Original: Documentation/openrisc/index.rst
 
-.. _cn_openrisc_index:
+:翻译:
+
+ 司延腾 Yanteng Si <siyanteng@loongson.cn>
 
+.. _cn_openrisc_index:
 
 =================
 OpenRISC 体系架构
diff --git a/Documentation/translations/zh_CN/openrisc/openrisc_port.rst b/Documentation/translations/zh_CN/openrisc/openrisc_port.rst
index e87d0eec281d..b8a67670492d 100644
--- a/Documentation/translations/zh_CN/openrisc/openrisc_port.rst
+++ b/Documentation/translations/zh_CN/openrisc/openrisc_port.rst
@@ -1,7 +1,10 @@
 .. include:: ../disclaimer-zh_CN.rst
 
-:Original: :doc:`../../../openrisc/openrisc_port`
-:Translator: Yanteng Si <siyanteng@loongson.cn>
+:Original: Documentation/openrisc/openrisc_port.rst
+
+:翻译:
+
+ 司延腾 Yanteng Si <siyanteng@loongson.cn>
 
 .. _cn_openrisc_port:
 
diff --git a/Documentation/translations/zh_CN/openrisc/todo.rst b/Documentation/translations/zh_CN/openrisc/todo.rst
index 9944ad05473b..63c38717edb1 100644
--- a/Documentation/translations/zh_CN/openrisc/todo.rst
+++ b/Documentation/translations/zh_CN/openrisc/todo.rst
@@ -1,7 +1,10 @@
 .. include:: ../disclaimer-zh_CN.rst
 
-:Original: :doc:`../../../openrisc/todo`
-:Translator: Yanteng Si <siyanteng@loongson.cn>
+:Original: Documentation/openrisc/todo.rst
+
+:翻译:
+
+ 司延腾 Yanteng Si <siyanteng@loongson.cn>
 
 .. _cn_openrisc_todo.rst:
 
diff --git a/Documentation/translations/zh_CN/parisc/debugging.rst b/Documentation/translations/zh_CN/parisc/debugging.rst
index c21beb986e15..68b73eb57105 100644
--- a/Documentation/translations/zh_CN/parisc/debugging.rst
+++ b/Documentation/translations/zh_CN/parisc/debugging.rst
@@ -1,7 +1,10 @@
 .. include:: ../disclaimer-zh_CN.rst
 
 :Original: Documentation/parisc/debugging.rst
-:Translator: Yanteng Si <siyanteng@loongson.cn>
+
+:翻译:
+
+ 司延腾 Yanteng Si <siyanteng@loongson.cn>
 
 .. _cn_parisc_debugging:
 
diff --git a/Documentation/translations/zh_CN/parisc/index.rst b/Documentation/translations/zh_CN/parisc/index.rst
index a47454ebe32e..0cc553fc8272 100644
--- a/Documentation/translations/zh_CN/parisc/index.rst
+++ b/Documentation/translations/zh_CN/parisc/index.rst
@@ -2,7 +2,10 @@
 .. include:: ../disclaimer-zh_CN.rst
 
 :Original: Documentation/parisc/index.rst
-:Translator: Yanteng Si <siyanteng@loongson.cn>
+
+:翻译:
+
+ 司延腾 Yanteng Si <siyanteng@loongson.cn>
 
 .. _cn_parisc_index:
 
diff --git a/Documentation/translations/zh_CN/parisc/registers.rst b/Documentation/translations/zh_CN/parisc/registers.rst
index 71e2404cd103..d2ab1874a602 100644
--- a/Documentation/translations/zh_CN/parisc/registers.rst
+++ b/Documentation/translations/zh_CN/parisc/registers.rst
@@ -1,7 +1,10 @@
 .. include:: ../disclaimer-zh_CN.rst
 
 :Original: Documentation/parisc/registers.rst
-:Translator: Yanteng Si <siyanteng@loongson.cn>
+
+:翻译:
+
+ 司延腾 Yanteng Si <siyanteng@loongson.cn>
 
 .. _cn_parisc_registers:
 
diff --git a/Documentation/translations/zh_CN/process/coding-style.rst b/Documentation/translations/zh_CN/process/coding-style.rst
index b8c484a84d10..638d714bec83 100644
--- a/Documentation/translations/zh_CN/process/coding-style.rst
+++ b/Documentation/translations/zh_CN/process/coding-style.rst
@@ -268,8 +268,7 @@ C 程序员不使用类似 ThisVariableIsATemporaryCounter 这样华丽的名字
 ``count_active_users()`` 或者类似的名字，你不应该叫它 ``cntuser()`` 。
 
 在函数名中包含函数类型 (所谓的匈牙利命名法) 是脑子出了问题——编译器知道那些类
-型而且能够检查那些类型，这样做只能把程序员弄糊涂了。难怪微软总是制造出有问题
-的程序。
+型而且能够检查那些类型，这样做只能把程序员弄糊涂了。
 
 本地变量名应该简短，而且能够表达相关的含义。如果你有一些随机的整数型的循环计
 数器，它应该被称为 ``i`` 。叫它 ``loop_counter`` 并无益处，如果它没有被误解的
diff --git a/Documentation/translations/zh_CN/riscv/boot-image-header.rst b/Documentation/translations/zh_CN/riscv/boot-image-header.rst
index 241bf9c1bcbe..0234c28a7114 100644
--- a/Documentation/translations/zh_CN/riscv/boot-image-header.rst
+++ b/Documentation/translations/zh_CN/riscv/boot-image-header.rst
@@ -1,10 +1,12 @@
 .. include:: ../disclaimer-zh_CN.rst
 
-:Original: :doc:`../../../riscv/boot-image-header`
-:Translator: Yanteng Si <siyanteng@loongson.cn>
+:Original: Documentation/riscv/boot-image-header.rst
 
-.. _cn_boot-image-header.rst:
+:翻译:
+
+ 司延腾 Yanteng Si <siyanteng@loongson.cn>
 
+.. _cn_boot-image-header.rst:
 
 ==========================
 RISC-V Linux启动镜像文件头
diff --git a/Documentation/translations/zh_CN/riscv/index.rst b/Documentation/translations/zh_CN/riscv/index.rst
index db13b1101490..bbf5d7b3777a 100644
--- a/Documentation/translations/zh_CN/riscv/index.rst
+++ b/Documentation/translations/zh_CN/riscv/index.rst
@@ -2,11 +2,13 @@
 
 .. include:: ../disclaimer-zh_CN.rst
 
-:Original: :doc:`../../../riscv/index`
-:Translator: Yanteng Si <siyanteng@loongson.cn>
+:Original: Documentation/riscv/index.rst
 
-.. _cn_riscv_index:
+:翻译:
+
+ 司延腾 Yanteng Si <siyanteng@loongson.cn>
 
+.. _cn_riscv_index:
 
 ===============
 RISC-V 体系结构
diff --git a/Documentation/translations/zh_CN/riscv/patch-acceptance.rst b/Documentation/translations/zh_CN/riscv/patch-acceptance.rst
index 9fd1c8216763..d180d24717bf 100644
--- a/Documentation/translations/zh_CN/riscv/patch-acceptance.rst
+++ b/Documentation/translations/zh_CN/riscv/patch-acceptance.rst
@@ -2,11 +2,13 @@
 
 .. include:: ../disclaimer-zh_CN.rst
 
-:Original: :doc:`../../../riscv/patch-acceptance`
-:Translator: Yanteng Si <siyanteng@loongson.cn>
+:Original: Documentation/riscv/patch-acceptance.rst
 
-.. _cn_riscv_patch-acceptance:
+:翻译:
+
+ 司延腾 Yanteng Si <siyanteng@loongson.cn>
 
+.. _cn_riscv_patch-acceptance:
 
 arch/riscv 开发者维护指南
 =========================
diff --git a/Documentation/translations/zh_CN/riscv/pmu.rst b/Documentation/translations/zh_CN/riscv/pmu.rst
index 22dcf3a9ca6e..7ec801026c4d 100644
--- a/Documentation/translations/zh_CN/riscv/pmu.rst
+++ b/Documentation/translations/zh_CN/riscv/pmu.rst
@@ -1,10 +1,12 @@
 .. include:: ../disclaimer-zh_CN.rst
 
-:Original: :doc:`../../../riscv/pmu`
-:Translator: Yanteng Si <siyanteng@loongson.cn>
+:Original: Documentation/riscv/pmu.rst
 
-.. _cn_riscv_pmu:
+:翻译:
+
+ 司延腾 Yanteng Si <siyanteng@loongson.cn>
 
+.. _cn_riscv_pmu:
 
 ========================
 RISC-V平台上对PMUs的支持
diff --git a/Documentation/translations/zh_CN/virt/acrn/cpuid.rst b/Documentation/translations/zh_CN/virt/acrn/cpuid.rst
new file mode 100644
index 000000000000..6f7be545611b
--- /dev/null
+++ b/Documentation/translations/zh_CN/virt/acrn/cpuid.rst
@@ -0,0 +1,56 @@
+.. SPDX-License-Identifier: GPL-2.0
+.. include:: ../../disclaimer-zh_CN.rst
+
+:Original: Documentation/virt/acrn/cpuid.rst
+
+:翻译:
+
+ 司延腾 Yanteng Si <siyanteng@loongson.cn>
+
+:校译:
+
+ 时奎亮 Alex Shi <alexs@kernel.org>
+
+.. _cn_virt_acrn_cpuid:
+
+==============
+ACRN CPUID位域
+==============
+
+在ACRN超级管理器上运行的客户虚拟机可以使用CPUID检查其一些功能。
+
+ACRN的cpuid函数是:
+
+函数: 0x40000000
+
+返回::
+
+   eax = 0x40000010
+   ebx = 0x4e524341
+   ecx = 0x4e524341
+   edx = 0x4e524341
+
+注意，ebx，ecx和edx中的这个值对应于字符串“ACRNACRNACRN”。eax中的值对应于这个叶子
+中存在的最大cpuid函数，如果将来有更多的函数加入，将被更新。
+
+函数: define ACRN_CPUID_FEATURES (0x40000001)
+
+返回::
+
+          ebx, ecx, edx
+          eax = an OR'ed group of (1 << flag)
+
+其中 ``flag`` 的定义如下:
+
+================================= =========== ================================
+标志                              值          描述
+================================= =========== ================================
+ACRN_FEATURE_PRIVILEGED_VM        0           客户虚拟机是一个有特权的虚拟机
+================================= =========== ================================
+
+函数: 0x40000010
+
+返回::
+
+          ebx, ecx, edx
+          eax = (Virtual) TSC frequency in kHz.
diff --git a/Documentation/translations/zh_CN/virt/acrn/index.rst b/Documentation/translations/zh_CN/virt/acrn/index.rst
new file mode 100644
index 000000000000..34605d87f103
--- /dev/null
+++ b/Documentation/translations/zh_CN/virt/acrn/index.rst
@@ -0,0 +1,25 @@
+.. SPDX-License-Identifier: GPL-2.0
+.. include:: ../../disclaimer-zh_CN.rst
+
+:Original: Documentation/virt/acrn/index.rst
+
+:翻译:
+
+ 司延腾 Yanteng Si <siyanteng@loongson.cn>
+
+:校译:
+
+ 时奎亮 Alex Shi <alexs@kernel.org>
+
+.. _cn_virt_acrn_index:
+
+==============
+ACRN超级管理器
+==============
+
+.. toctree::
+   :maxdepth: 1
+
+   introduction
+   io-request
+   cpuid
diff --git a/Documentation/translations/zh_CN/virt/acrn/introduction.rst b/Documentation/translations/zh_CN/virt/acrn/introduction.rst
new file mode 100644
index 000000000000..7182415cb087
--- /dev/null
+++ b/Documentation/translations/zh_CN/virt/acrn/introduction.rst
@@ -0,0 +1,52 @@
+.. SPDX-License-Identifier: GPL-2.0
+.. include:: ../../disclaimer-zh_CN.rst
+
+:Original: Documentation/virt/acrn/introduction.rst
+
+:翻译:
+
+ 司延腾 Yanteng Si <siyanteng@loongson.cn>
+
+:校译:
+
+ 时奎亮 Alex Shi <alexs@kernel.org>
+
+.. _cn_virt_acrn_introduction:
+
+ACRN超级管理器介绍
+==================
+
+ACRN超级管理器是一个第一类超级管理器，直接在裸机硬件上运行。它有一个特权管理虚拟机，称为服
+务虚拟机，用于管理用户虚拟机和进行I/O仿真。
+
+ACRN用户空间是一个运行在服务虚拟机中的应用程序，它根据命令行配置为用户虚拟机仿真设备。
+ACRN管理程序服务模块（HSM）是服务虚拟机中的一个内核模块，为ACRN用户空间提供管理程序服
+务。
+
+下图展示了该架构。
+
+::
+
+                服务端VM                      用户端VM
+      +----------------------------+  |  +------------------+
+      |        +--------------+    |  |  |                  |
+      |        |ACRN用户空间  |    |  |  |                  |
+      |        +--------------+    |  |  |                  |
+      |-----------------ioctl------|  |  |                  |   ...
+      |内核空间       +----------+ |  |  |                  |
+      |               |   HSM    | |  |  | 驱动             |
+      |               +----------+ |  |  |                  |
+      +--------------------|-------+  |  +------------------+
+  +---------------------hypercall----------------------------------------+
+  |                         ACRN超级管理器                               |
+  +----------------------------------------------------------------------+
+  |                          硬件                                        |
+  +----------------------------------------------------------------------+
+
+ACRN用户空间为用户虚拟机分配内存，配置和初始化用户虚拟机使用的设备，加载虚拟引导程序，
+初始化虚拟CPU状态，处理来自用户虚拟机的I/O请求访问。它使用ioctls来与HSM通信。HSM通过
+与ACRN超级管理器的hypercalls进行交互来实现管理服务。HSM向用户空间输出一个char设备接口
+（/dev/acrn_hsm）。
+
+ACRN超级管理器是开源的，任何人都可以贡献。源码库在
+https://github.com/projectacrn/acrn-hypervisor。
diff --git a/Documentation/translations/zh_CN/virt/acrn/io-request.rst b/Documentation/translations/zh_CN/virt/acrn/io-request.rst
new file mode 100644
index 000000000000..4b4e7186d9a5
--- /dev/null
+++ b/Documentation/translations/zh_CN/virt/acrn/io-request.rst
@@ -0,0 +1,99 @@
+.. SPDX-License-Identifier: GPL-2.0
+.. include:: ../../disclaimer-zh_CN.rst
+
+:Original: Documentation/virt/acrn/io-request.rst
+
+:翻译:
+
+ 司延腾 Yanteng Si <siyanteng@loongson.cn>
+
+:校译:
+
+ 时奎亮 Alex Shi <alexs@kernel.org>
+
+.. _cn_virt_acrn_io-request:
+
+I/O请求处理
+===========
+
+客户虚拟机的I/O请求由超级管理器构建，由ACRN超级管理器服务模块分发到与I/O请求的地址范
+围相对应的I/O客户端。I/O请求处理的细节将在以下章节描述。
+
+1. I/O请求
+----------
+
+对于每个客户虚拟机，有一个共享的4KB字节的内存区域，用于超级管理器和服务虚拟机之间的
+I/O请求通信。一个I/O请求是一个256字节的结构体缓冲区，它是 "acrn_io_request" 结构
+体，当客户虚拟机中发生被困的I/O访问时，由超级管理器的I/O处理器填充。服务虚拟机中的
+ACRN用户空间首先分配一个4KB字节的页面，并将缓冲区的GPA（客户物理地址）传递给管理平
+台。缓冲区被用作16个I/O请求槽的数组，每个I/O请求槽为256字节。这个数组是按vCPU ID
+索引的。
+
+2. I/O客户端
+------------
+
+一个I/O客户端负责处理客户虚拟机的I/O请求，其访问的GPA在一定范围内。每个客户虚拟机
+可以关联多个I/O客户端。每个客户虚拟机都有一个特殊的客户端，称为默认客户端，负责处理
+所有不在其他客户端范围内的I/O请求。ACRN用户空间充当每个客户虚拟机的默认客户端。
+
+下面的图示显示了I/O请求共享缓冲区、I/O请求和I/O客户端之间的关系。
+
+::
+
+     +------------------------------------------------------+
+     |                                       服务VM         |
+     |+--------------------------------------------------+  |
+     ||      +----------------------------------------+  |  |
+     ||      | 共享页                 ACRN用户空间    |  |  |
+     ||      |    +-----------------+  +------------+ |  |  |
+     ||   +----+->| acrn_io_request |<-+  默认      | |  |  |
+     ||   |  | |  +-----------------+  | I/O客户端  | |  |  |
+     ||   |  | |  |       ...       |  +------------+ |  |  |
+     ||   |  | |  +-----------------+                 |  |  |
+     ||   |  +-|--------------------------------------+  |  |
+     ||---|----|-----------------------------------------|  |
+     ||   |    |                             内核        |  |
+     ||   |    |            +----------------------+     |  |
+     ||   |    |            | +-------------+  HSM |     |  |
+     ||   |    +--------------+             |      |     |  |
+     ||   |                 | | I/O客户端   |      |     |  |
+     ||   |                 | |             |      |     |  |
+     ||   |                 | +-------------+      |     |  |
+     ||   |                 +----------------------+     |  |
+     |+---|----------------------------------------------+  |
+     +----|-------------------------------------------------+
+          |
+     +----|-------------------------------------------------+
+     |  +-+-----------+                                     |
+     |  | I/O处理     |              ACRN超级管理器         |
+     |  +-------------+                                     |
+     +------------------------------------------------------+
+
+3. I/O请求状态转换
+------------------
+
+一个ACRN I/O请求的状态转换如下。
+
+::
+
+   FREE -> PENDING -> PROCESSING -> COMPLETE -> FREE -> ...
+
+- FREE: 这个I/O请求槽是空的
+- PENDING: 在这个槽位上有一个有效的I/O请求正在等待
+- PROCESSING: 正在处理I/O请求
+- COMPLETE: 该I/O请求已被处理
+
+处于COMPLETE或FREE状态的I/O请求是由超级管理器拥有的。HSM和ACRN用户空间负责处理其
+他的。
+
+4. I/O请求的处理流程
+--------------------
+
+a. 当客户虚拟机中发生被陷入的I/O访问时，超级管理器的I/O处理程序将把I/O请求填充为
+   PENDING状态。
+b. 超级管理器向服务虚拟机发出一个向上调用，这是一个通知中断。
+c. upcall处理程序会安排一个工作者来调度I/O请求。
+d. 工作者寻找PENDING I/O请求，根据I/O访问的地址将其分配给不同的注册客户，将其
+   状态更新为PROCESSING，并通知相应的客户进行处理。
+e. 被通知的客户端处理指定的I/O请求。
+f. HSM将I/O请求状态更新为COMPLETE，并通过hypercalls通知超级管理器完成。
diff --git a/Documentation/translations/zh_CN/virt/guest-halt-polling.rst b/Documentation/translations/zh_CN/virt/guest-halt-polling.rst
new file mode 100644
index 000000000000..b798d1cf0b48
--- /dev/null
+++ b/Documentation/translations/zh_CN/virt/guest-halt-polling.rst
@@ -0,0 +1,87 @@
+.. include:: ../disclaimer-zh_CN.rst
+
+:Original: Documentation/virt/guest-halt-polling.rst
+
+:翻译:
+
+ 司延腾 Yanteng Si <siyanteng@loongson.cn>
+
+:校译:
+
+ 时奎亮 Alex Shi <alexs@kernel.org>
+
+.. _cn_virt_guest-halt-polling:
+
+========================================
+客户机停机轮询机制（Guest halt polling）
+========================================
+
+cpuidle_haltpoll驱动，与haltpoll管理器一起，允许客户机vcpus在停机前轮询
+一定的时间。
+
+这为物理机侧的轮询提供了以下好处:
+
+	1) 在执行轮询时，POLL标志被设置，这允许远程vCPU在执行唤醒时避免发送
+	   IPI（以及处理IPI的相关成本）。
+
+	2) 可以避免虚拟机退出的成本。
+
+客户机侧轮询的缺点是，即使在物理机中的其他可运行任务中也会进行轮询。
+
+其基本逻辑如下。一个全局值，即guest_halt_poll_ns，是由用户配置的，表示允
+许轮询的最大时间量。这个值是固定的。
+
+每个vcpu都有一个可调整的guest_halt_poll_ns（"per-cpu guest_halt_poll_ns"），
+它由算法响应事件进行调整（解释如下）。
+
+模块参数
+========
+
+haltpoll管理器有5个可调整的模块参数:
+
+1) guest_halt_poll_ns:
+
+轮询停机前执行的最大时间，以纳秒为单位。
+
+默认值: 200000
+
+2) guest_halt_poll_shrink:
+
+当唤醒事件发生在全局的guest_halt_poll_ns之后，用于缩减每个CPU的guest_halt_poll_ns
+的划分系数。
+
+默认值: 2
+
+3) guest_halt_poll_grow:
+
+当事件发生在per-cpu guest_halt_poll_ns之后但在global guest_halt_poll_ns之前，
+用于增长per-cpu guest_halt_poll_ns的乘法系数。
+
+默认值: 2
+
+4) guest_halt_poll_grow_start:
+
+在系统空闲的情况下，每个cpu guest_halt_poll_ns最终达到零。这个值设置了增长时的
+初始每cpu guest_halt_poll_ns。这个值可以从10000开始增加，以避免在最初的增长阶
+段出现失误。:
+
+10k, 20k, 40k, ... (例如，假设guest_halt_poll_grow=2).
+
+默认值: 50000
+
+5) guest_halt_poll_allow_shrink:
+
+允许缩减的Bool参数。设置为N以避免它（一旦达到全局的guest_halt_poll_ns值，每CPU的
+guest_halt_poll_ns将保持高位）。
+
+默认值: Y
+
+模块参数可以从Debugfs文件中设置，在::
+
+	/sys/module/haltpoll/parameters/
+
+进一步说明
+==========
+
+- 在设置guest_halt_poll_ns参数时应该小心，因为一个大的值有可能使几乎是完全空闲机
+  器上的cpu使用率达到100%。
diff --git a/Documentation/translations/zh_CN/virt/index.rst b/Documentation/translations/zh_CN/virt/index.rst
new file mode 100644
index 000000000000..f8dd13681341
--- /dev/null
+++ b/Documentation/translations/zh_CN/virt/index.rst
@@ -0,0 +1,38 @@
+.. SPDX-License-Identifier: GPL-2.0
+.. include:: ../disclaimer-zh_CN.rst
+
+:Original: Documentation/virt/index.rst
+
+:翻译:
+
+ 司延腾 Yanteng Si <siyanteng@loongson.cn>
+
+:校译:
+
+ 时奎亮 Alex Shi <alexs@kernel.org>
+
+.. _cn_virt_index:
+
+===============
+Linux虚拟化支持
+===============
+
+.. toctree::
+   :maxdepth: 2
+
+   paravirt_ops
+   guest-halt-polling
+   ne_overview
+   acrn/index
+
+TODOLIST:
+
+   kvm/index
+   uml/user_mode_linux_howto_v2
+
+.. only:: html and subproject
+
+   Indices
+   =======
+
+   * :ref:`genindex`
diff --git a/Documentation/translations/zh_CN/virt/ne_overview.rst b/Documentation/translations/zh_CN/virt/ne_overview.rst
new file mode 100644
index 000000000000..2455b371abea
--- /dev/null
+++ b/Documentation/translations/zh_CN/virt/ne_overview.rst
@@ -0,0 +1,88 @@
+.. SPDX-License-Identifier: GPL-2.0
+.. include:: ../disclaimer-zh_CN.rst
+
+:Original: Documentation/virt/ne_overview.rst
+
+:翻译:
+
+ 司延腾 Yanteng Si <siyanteng@loongson.cn>
+
+:校译:
+
+ 时奎亮 Alex Shi <alexs@kernel.org>
+
+.. _cn_virt_ne_overview:
+
+==============
+Nitro Enclaves
+==============
+
+概述
+====
+
+Nitro Enclaves（NE）是亚马逊弹性计算云（EC2）的一项新功能，允许客户在EC2实
+例中划分出孤立的计算环境[1]。
+
+例如，一个处理敏感数据并在虚拟机中运行的应用程序，可以与在同一虚拟机中运行的
+其他应用程序分开。然后，这个应用程序在一个独立于主虚拟机的虚拟机中运行，即
+enclave。
+
+一个enclave与催生它的虚拟机一起运行。这种设置符合低延迟应用的需要。为enclave
+分配的资源，如内存和CPU，是从主虚拟机中分割出来的。每个enclave都被映射到一
+个运行在主虚拟机中的进程，该进程通过一个ioctl接口与NE驱动进行通信。
+
+在这个意义上，有两个组成部分。
+
+1. 一个enclave抽象进程——一个运行在主虚拟机客体中的用户空间进程，它使用NE驱动
+提供的ioctl接口来生成一个enclave虚拟机（这就是下面的2）。
+
+有一个NE模拟的PCI设备暴露给主虚拟机。这个新的PCI设备的驱动被包含在NE驱动中。
+
+ioctl逻辑被映射到PCI设备命令，例如，NE_START_ENCLAVE ioctl映射到一个enclave
+启动PCI命令。然后，PCI设备命令被翻译成在管理程序方面采取的行动；也就是在运
+行主虚拟机的主机上运行的Nitro管理程序。Nitro管理程序是基于KVM核心技术的。
+
+2. enclave本身——一个运行在与催生它的主虚拟机相同的主机上的虚拟机。内存和CPU
+从主虚拟机中分割出来，专门用于enclave虚拟机。enclave没有连接持久性存储。
+
+从主虚拟机中分割出来并给enclave的内存区域需要对齐2 MiB/1 GiB物理连续的内存
+区域（或这个大小的倍数，如8 MiB）。该内存可以通过使用hugetlbfs从用户空间分
+配[2][3]。一个enclave的内存大小需要至少64 MiB。enclave内存和CPU需要来自同
+一个NUMA节点。
+
+一个enclave在专用的核心上运行。CPU 0及其同级别的CPU需要保持对主虚拟机的可用
+性。CPU池必须由具有管理能力的用户为NE目的进行设置。关于CPU池的格式，请看内核
+文档[4]中的cpu list部分。
+
+enclave通过本地通信通道与主虚拟机进行通信，使用virtio-vsock[5]。主虚拟机有
+virtio-pci vsock模拟设备，而飞地虚拟机有virtio-mmio vsock模拟设备。vsock
+设备使用eventfd作为信令。enclave虚拟机看到通常的接口——本地APIC和IOAPIC——从
+virtio-vsock设备获得中断。virtio-mmio设备被放置在典型的4 GiB以下的内存中。
+
+在enclave中运行的应用程序需要和将在enclave虚拟机中运行的操作系统（如内核、
+ramdisk、init）一起被打包到enclave镜像中。enclave虚拟机有自己的内核并遵循标
+准的Linux启动协议[6]。
+
+内核bzImage、内核命令行、ramdisk（s）是enclave镜像格式（EIF）的一部分；另外
+还有一个EIF头，包括元数据，如magic number、eif版本、镜像大小和CRC。
+
+哈希值是为整个enclave镜像（EIF）、内核和ramdisk（s）计算的。例如，这被用来检
+查在enclave虚拟机中加载的enclave镜像是否是打算运行的那个。
+
+这些加密测量包括在由Nitro超级管理器成的签名证明文件中，并进一步用来证明enclave
+的身份；KMS是NE集成的服务的一个例子，它检查证明文件。
+
+enclave镜像（EIF）被加载到enclave内存中，偏移量为8 MiB。enclave中的初始进程
+连接到主虚拟机的vsock CID和一个预定义的端口--9000，以发送一个心跳值--0xb7。这
+个机制用于在主虚拟机中检查enclave是否已经启动。主虚拟机的CID是3。
+
+如果enclave虚拟机崩溃或优雅地退出，NE驱动会收到一个中断事件。这个事件会通过轮询
+通知机制进一步发送到运行在主虚拟机中的用户空间enclave进程。然后，用户空间enclave
+进程就可以退出了。
+
+[1] https://aws.amazon.com/ec2/nitro/nitro-enclaves/
+[2] https://www.kernel.org/doc/html/latest/admin-guide/mm/hugetlbpage.html
+[3] https://lwn.net/Articles/807108/
+[4] https://www.kernel.org/doc/html/latest/admin-guide/kernel-parameters.html
+[5] https://man7.org/linux/man-pages/man7/vsock.7.html
+[6] https://www.kernel.org/doc/html/latest/x86/boot.html
diff --git a/Documentation/translations/zh_CN/virt/paravirt_ops.rst b/Documentation/translations/zh_CN/virt/paravirt_ops.rst
new file mode 100644
index 000000000000..06b122bc915d
--- /dev/null
+++ b/Documentation/translations/zh_CN/virt/paravirt_ops.rst
@@ -0,0 +1,41 @@
+.. SPDX-License-Identifier: GPL-2.0
+.. include:: ../disclaimer-zh_CN.rst
+
+:Original: Documentation/virt/paravirt_ops.rst
+
+:翻译:
+
+ 司延腾 Yanteng Si <siyanteng@loongson.cn>
+
+:校译:
+
+ 陈飞杨 Feiyang Chen <chenfeiyang@loongson.cn>
+ 时奎亮 Alex Shi <alexs@kernel.org>
+
+.. _cn_virt_paravirt_ops:
+
+============
+半虚拟化操作
+============
+
+Linux提供了对不同管理程序虚拟化技术的支持。历史上，为了支持不同的虚拟机超级管理器
+（hypervisor，下文简称超级管理器），需要不同的二进制内核，这个限制已经被pv_ops移
+除了。Linux pv_ops是一个虚拟化API，它能够支持不同的管理程序。它允许每个管理程序
+优先于关键操作，并允许单一的内核二进制文件在所有支持的执行环境中运行，包括本机——没
+有任何管理程序。
+
+pv_ops提供了一组函数指针，代表了与低级关键指令和各领域高级功能相对应的操作。
+pv-ops允许在运行时进行优化，在启动时对低级关键操作进行二进制修补。
+
+pv_ops操作被分为三类:
+
+- 简单的间接调用
+   这些操作对应于高水平的函数，众所周知，间接调用的开销并不十分重要。
+
+- 间接调用，允许用二进制补丁进行优化
+   通常情况下，这些操作对应于低级别的关键指令。它们被频繁地调用，并且是对性能关
+   键。开销是非常重要的。
+
+- 一套用于手写汇编代码的宏程序
+   手写的汇编代码（.S文件）也需要半虚拟化，因为它们包括敏感指令或其中的一些代
+   码路径对性能非常关键。
diff --git a/Documentation/translations/zh_TW/IRQ.txt b/Documentation/translations/zh_TW/IRQ.txt
new file mode 100644
index 000000000000..73d435a0d1e7
--- /dev/null
+++ b/Documentation/translations/zh_TW/IRQ.txt
@@ -0,0 +1,41 @@
+Chinese translated version of Documentation/core-api/irq/index.rst
+
+If you have any comment or update to the content, please contact the
+original document maintainer directly.  However, if you have a problem
+communicating in English you can also ask the Chinese maintainer for
+help.  Contact the Chinese maintainer if this translation is outdated
+or if there is a problem with the translation.
+
+Maintainer: Eric W. Biederman <ebiederman@xmission.com>
+Traditional Chinese maintainer: Hu Haowen <src.res@email.cn>
+---------------------------------------------------------------------
+Documentation/core-api/irq/index.rst 的繁體中文翻譯
+
+如果想評論或更新本文的內容，請直接聯繫原文檔的維護者。如果你使用英文
+交流有困難的話，也可以向繁體中文版維護者求助。如果本翻譯更新不及時或
+者翻譯存在問題，請聯繫繁體中文版維護者。
+
+英文版維護者： Eric W. Biederman <ebiederman@xmission.com>
+繁體中文版維護者： 胡皓文 Hu Haowen <src.res@email.cn>
+繁體中文版翻譯者： 胡皓文 Hu Haowen <src.res@email.cn>
+繁體中文版校譯者： 胡皓文 Hu Haowen <src.res@email.cn>
+
+
+以下爲正文
+---------------------------------------------------------------------
+何爲 IRQ?
+
+一個 IRQ 是來自某個設備的一個中斷請求。目前，它們可以來自一個硬體引腳，
+或來自一個數據包。多個設備可能連接到同個硬體引腳，從而共享一個 IRQ。
+
+一個 IRQ 編號是用於告知硬體中斷源的內核標識。通常情況下，這是一個
+全局 irq_desc 數組的索引，但是除了在 linux/interrupt.h 中的實現，
+具體的細節是體系結構特定的。
+
+一個 IRQ 編號是設備上某個可能的中斷源的枚舉。通常情況下，枚舉的編號是
+該引腳在系統內中斷控制器的所有輸入引腳中的編號。對於 ISA 總線中的情況，
+枚舉的是在兩個 i8259 中斷控制器中 16 個輸入引腳。
+
+架構可以對 IRQ 編號指定額外的含義，在硬體涉及任何手工配置的情況下，
+是被提倡的。ISA 的 IRQ 是一個分配這類額外含義的典型例子。
+
diff --git a/Documentation/translations/zh_TW/admin-guide/README.rst b/Documentation/translations/zh_TW/admin-guide/README.rst
new file mode 100644
index 000000000000..b752e50359e6
--- /dev/null
+++ b/Documentation/translations/zh_TW/admin-guide/README.rst
@@ -0,0 +1,351 @@
+.. SPDX-License-Identifier: GPL-2.0
+
+.. include:: ../disclaimer-zh_TW.rst
+
+:Original: Documentation/admin-guide/README.rst
+
+:譯者:
+
+ 吳想成 Wu XiangCheng <bobwxc@email.cn>
+ 胡皓文 Hu Haowen <src.res@email.cn>
+
+Linux內核5.x版本 <http://kernel.org/>
+=========================================
+
+以下是Linux版本5的發行註記。仔細閱讀它們，
+它們會告訴你這些都是什麼，解釋如何安裝內核，以及遇到問題時該如何做。
+
+什麼是Linux？
+---------------
+
+  Linux是Unix作業系統的克隆版本，由Linus Torvalds在一個鬆散的網絡黑客
+  （Hacker，無貶義）團隊的幫助下從頭開始編寫。它旨在實現兼容POSIX和
+  單一UNIX規範。
+
+  它具有在現代成熟的Unix中應當具有的所有功能，包括真正的多任務處理、虛擬內存、
+  共享庫、按需加載、共享的寫時拷貝（COW）可執行文件、恰當的內存管理以及包括
+  IPv4和IPv6在內的複合網絡棧。
+
+  Linux在GNU通用公共許可證，版本2（GNU GPLv2）下分發，詳見隨附的COPYING文件。
+
+它能在什麼樣的硬體上運行？
+-----------------------------
+
+  雖然Linux最初是爲32位的x86 PC機（386或更高版本）開發的，但今天它也能運行在
+  （至少）Compaq Alpha AXP、Sun SPARC與UltraSPARC、Motorola 68000、PowerPC、
+  PowerPC64、ARM、Hitachi SuperH、Cell、IBM S/390、MIPS、HP PA-RISC、Intel 
+  IA-64、DEC VAX、AMD x86-64 Xtensa和ARC架構上。
+
+  Linux很容易移植到大多數通用的32位或64位體系架構，只要它們有一個分頁內存管理
+  單元（PMMU）和一個移植的GNU C編譯器（gcc；GNU Compiler Collection，GCC的一
+  部分）。Linux也被移植到許多沒有PMMU的體系架構中，儘管功能顯然受到了一定的
+  限制。
+  Linux也被移植到了其自己上。現在可以將內核作爲用戶空間應用程式運行——這被
+  稱爲用戶模式Linux（UML）。
+
+文檔
+-----
+網際網路上和書籍上都有大量的電子文檔，既有Linux專屬文檔，也有與一般UNIX問題相關
+的文檔。我建議在任何Linux FTP站點上查找LDP（Linux文檔項目）書籍的文檔子目錄。
+本自述文件並不是關於系統的文檔：有更好的可用資源。
+
+ - 網際網路上和書籍上都有大量的（電子）文檔，既有Linux專屬文檔，也有與普通
+   UNIX問題相關的文檔。我建議在任何有LDP（Linux文檔項目）書籍的Linux FTP
+   站點上查找文檔子目錄。本自述文件並不是關於系統的文檔：有更好的可用資源。
+
+ - 文檔/子目錄中有各種自述文件：例如，這些文件通常包含一些特定驅動程序的
+   內核安裝說明。請閱讀
+   :ref:`Documentation/process/changes.rst <changes>` 文件，它包含了升級內核
+   可能會導致的問題的相關信息。
+
+安裝內核原始碼
+---------------
+
+ - 如果您要安裝完整的原始碼，請把內核tar檔案包放在您有權限的目錄中（例如您
+   的主目錄）並將其解包::
+
+     xz -cd linux-5.x.tar.xz | tar xvf -
+
+   將「X」替換成最新內核的版本號。
+
+   【不要】使用 /usr/src/linux 目錄！這裡有一組庫頭文件使用的內核頭文件
+   （通常是不完整的）。它們應該與庫匹配，而不是被內核的變化搞得一團糟。
+
+ - 您還可以通過打補丁在5.x版本之間升級。補丁以xz格式分發。要通過打補丁進行
+   安裝，請獲取所有較新的補丁文件，進入內核原始碼（linux-5.x）的目錄並
+   執行::
+
+     xz -cd ../patch-5.x.xz | patch -p1
+
+   請【按順序】替換所有大於當前原始碼樹版本的「x」，這樣就可以了。您可能想要
+   刪除備份文件（文件名類似xxx~ 或 xxx.orig)，並確保沒有失敗的補丁（文件名
+   類似xxx# 或 xxx.rej）。如果有，不是你就是我犯了錯誤。
+
+   與5.x內核的補丁不同，5.x.y內核（也稱爲穩定版內核）的補丁不是增量的，而是
+   直接應用於基本的5.x內核。例如，如果您的基本內核是5.0，並且希望應用5.0.3
+   補丁，則不應先應用5.0.1和5.0.2的補丁。類似地，如果您運行的是5.0.2內核，
+   並且希望跳轉到5.0.3，那麼在應用5.0.3補丁之前，必須首先撤銷5.0.2補丁
+   （即patch -R）。更多關於這方面的內容，請閱讀
+   :ref:`Documentation/process/applying-patches.rst <applying_patches>` 。
+
+   或者，腳本 patch-kernel 可以用來自動化這個過程。它能確定當前內核版本並
+   應用找到的所有補丁::
+
+     linux/scripts/patch-kernel linux
+
+   上面命令中的第一個參數是內核原始碼的位置。補丁是在當前目錄應用的，但是
+   可以將另一個目錄指定爲第二個參數。
+
+ - 確保沒有過時的 .o 文件和依賴項::
+
+     cd linux
+     make mrproper
+
+   現在您應該已經正確安裝了原始碼。
+
+軟體要求
+---------
+
+   編譯和運行5.x內核需要各種軟體包的最新版本。請參考
+   :ref:`Documentation/process/changes.rst <changes>`
+   來了解最低版本要求以及如何升級軟體包。請注意，使用過舊版本的這些包可能會
+   導致很難追蹤的間接錯誤，因此不要以爲在生成或操作過程中出現明顯問題時可以
+   只更新包。
+
+爲內核建立目錄
+---------------
+
+   編譯內核時，默認情況下所有輸出文件都將與內核原始碼放在一起。使用
+   ``make O=output/dir`` 選項可以爲輸出文件（包括 .config）指定備用位置。
+   例如::
+
+     kernel source code: /usr/src/linux-5.x
+     build directory:    /home/name/build/kernel
+
+   要配置和構建內核，請使用::
+
+     cd /usr/src/linux-5.x
+     make O=/home/name/build/kernel menuconfig
+     make O=/home/name/build/kernel
+     sudo make O=/home/name/build/kernel modules_install install
+
+   請注意：如果使用了 ``O=output/dir`` 選項，那麼它必須用於make的所有調用。
+
+配置內核
+---------
+
+   即使只升級一個小版本，也不要跳過此步驟。每個版本中都會添加新的配置選項，
+   如果配置文件沒有按預定設置，就會出現奇怪的問題。如果您想以最少的工作量
+   將現有配置升級到新版本，請使用 ``makeoldconfig`` ，它只會詢問您新配置
+   選項的答案。
+
+ - 其他配置命令包括::
+
+     "make config"      純文本界面。
+
+     "make menuconfig"  基於文本的彩色菜單、選項列表和對話框。
+
+     "make nconfig"     增強的基於文本的彩色菜單。
+
+     "make xconfig"     基於Qt的配置工具。
+
+     "make gconfig"     基於GTK+的配置工具。
+
+     "make oldconfig"   基於現有的 ./.config 文件選擇所有選項，並詢問
+                        新配置選項。
+
+     "make olddefconfig"
+                        類似上一個，但不詢問直接將新選項設置爲默認值。
+
+     "make defconfig"   根據體系架構，使用arch/$arch/defconfig或
+                        arch/$arch/configs/${PLATFORM}_defconfig中的
+                        默認選項值創建./.config文件。
+
+     "make ${PLATFORM}_defconfig"
+                        使用arch/$arch/configs/${PLATFORM}_defconfig中
+                        的默認選項值創建一個./.config文件。
+                        用「makehelp」來獲取您體系架構中所有可用平台的列表。
+
+     "make allyesconfig"
+                        通過儘可能將選項值設置爲「y」，創建一個
+                        ./.config文件。
+
+     "make allmodconfig"
+                        通過儘可能將選項值設置爲「m」，創建一個
+                        ./.config文件。
+
+     "make allnoconfig" 通過儘可能將選項值設置爲「n」，創建一個
+                        ./.config文件。
+
+     "make randconfig"  通過隨機設置選項值來創建./.config文件。
+
+     "make localmodconfig" 基於當前配置和加載的模塊（lsmod）創建配置。禁用
+                           已加載的模塊不需要的任何模塊選項。
+
+                           要爲另一台計算機創建localmodconfig，請將該計算機
+                           的lsmod存儲到一個文件中，並將其作爲lsmod參數傳入。
+
+                           此外，通過在參數LMC_KEEP中指定模塊的路徑，可以將
+                           模塊保留在某些文件夾或kconfig文件中。
+
+                   target$ lsmod > /tmp/mylsmod
+                   target$ scp /tmp/mylsmod host:/tmp
+
+                   host$ make LSMOD=/tmp/mylsmod \
+                           LMC_KEEP="drivers/usb:drivers/gpu:fs" \
+                           localmodconfig
+
+                           上述方法在交叉編譯時也適用。
+
+     "make localyesconfig" 與localmodconfig類似，只是它會將所有模塊選項轉換
+                           爲內置（=y）。你可以同時通過LMC_KEEP保留模塊。
+
+     "make kvmconfig"   爲kvm客體內核支持啓用其他選項。
+
+     "make xenconfig"   爲xen dom0客體內核支持啓用其他選項。
+
+     "make tinyconfig"  配置儘可能小的內核。
+
+   更多關於使用Linux內核配置工具的信息，見文檔
+   Documentation/kbuild/kconfig.rst。
+
+ - ``make config`` 注意事項:
+
+    - 包含不必要的驅動程序會使內核變大，並且在某些情況下會導致問題：
+      探測不存在的控制器卡可能會混淆其他控制器。
+
+    - 如果存在協處理器，則編譯了數學仿真的內核仍將使用協處理器：在
+      這種情況下，數學仿真永遠不會被使用。內核會稍微大一點，但不管
+      是否有數學協處理器，都可以在不同的機器上工作。
+
+    - 「kernel hacking」配置細節通常會導致更大或更慢的內核（或兩者
+      兼而有之），甚至可以通過配置一些例程來主動嘗試破壞壞代碼以發現
+      內核問題，從而降低內核的穩定性（kmalloc()）。因此，您可能應該
+      用於研究「開發」、「實驗」或「調試」特性相關問題。
+
+編譯內核
+---------
+
+ - 確保您至少有gcc 4.9可用。
+   有關更多信息，請參閱 :ref:`Documentation/process/changes.rst <changes>` 。
+
+   請注意，您仍然可以使用此內核運行a.out用戶程序。
+
+ - 執行 ``make`` 來創建壓縮內核映像。如果您安裝了lilo以適配內核makefile，
+   那麼也可以進行 ``makeinstall`` ，但是您可能需要先檢查特定的lilo設置。
+
+   實際安裝必須以root身份執行，但任何正常構建都不需要。
+   無須徒然使用root身份。
+
+ - 如果您將內核的任何部分配置爲模塊，那麼還必須執行 ``make modules_install`` 。
+
+ - 詳細的內核編譯/生成輸出：
+
+   通常，內核構建系統在相當安靜的模式下運行（但不是完全安靜）。但是有時您或
+   其他內核開發人員需要看到編譯、連結或其他命令的執行過程。爲此，可使用
+   「verbose（詳細）」構建模式。
+   向 ``make`` 命令傳遞 ``V=1`` 來實現，例如::
+
+     make V=1 all
+
+   如需構建系統也給出內個目標重建的願意，請使用 ``V=2`` 。默認爲 ``V=0`` 。
+
+ - 準備一個備份內核以防出錯。對於開發版本尤其如此，因爲每個新版本都包含
+   尚未調試的新代碼。也要確保保留與該內核對應的模塊的備份。如果要安裝
+   與工作內核版本號相同的新內核，請在進行 ``make modules_install`` 安裝
+   之前備份modules目錄。
+
+   或者，在編譯之前，使用內核配置選項「LOCALVERSION」向常規內核版本附加
+   一個唯一的後綴。LOCALVERSION可以在「General Setup」菜單中設置。
+
+ - 爲了引導新內核，您需要將內核映像（例如編譯後的
+   .../linux/arch/x86/boot/bzImage）複製到常規可引導內核的位置。
+
+ - 不再支持在沒有LILO等啓動裝載程序幫助的情況下直接從軟盤引導內核。
+
+   如果從硬碟引導Linux，很可能使用LILO，它使用/etc/lilo.conf文件中
+   指定的內核映像文件。內核映像文件通常是/vmlinuz、/boot/vmlinuz、
+   /bzImage或/boot/bzImage。使用新內核前，請保存舊映像的副本，並複製
+   新映像覆蓋舊映像。然後您【必須重新運行LILO】來更新加載映射！否則，
+   將無法啓動新的內核映像。
+
+   重新安裝LILO通常需要運行/sbin/LILO。您可能希望編輯/etc/lilo.conf
+   文件爲舊內核映像指定一個條目（例如/vmlinux.old)防止新的不能正常
+   工作。有關更多信息，請參閱LILO文檔。
+
+   重新安裝LILO之後，您應該就已經準備好了。關閉系統，重新啓動，盡情
+   享受吧！
+
+   如果需要更改內核映像中的默認根設備、視頻模式等，請在適當的地方使用
+   啓動裝載程序的引導選項。無需重新編譯內核即可更改這些參數。
+
+ - 使用新內核重新啓動並享受它吧。
+
+若遇到問題
+-----------
+
+ - 如果您發現了一些可能由於內核缺陷所導致的問題，請檢查MAINTAINERS（維護者）
+   文件看看是否有人與令您遇到麻煩的內核部分相關。如果無人在此列出，那麼第二
+   個最好的方案就是把它們發給我（torvalds@linux-foundation.org），也可能發送
+   到任何其他相關的郵件列表或新聞組。
+
+ - 在所有的缺陷報告中，【請】告訴我們您在說什麼內核，如何復現問題，以及您的
+   設置是什麼的（使用您的常識）。如果問題是新的，請告訴我；如果問題是舊的，
+   請嘗試告訴我您什麼時候首次注意到它。
+
+ - 如果缺陷導致如下消息::
+
+     unable to handle kernel paging request at address C0000010
+     Oops: 0002
+     EIP:   0010:XXXXXXXX
+     eax: xxxxxxxx   ebx: xxxxxxxx   ecx: xxxxxxxx   edx: xxxxxxxx
+     esi: xxxxxxxx   edi: xxxxxxxx   ebp: xxxxxxxx
+     ds: xxxx  es: xxxx  fs: xxxx  gs: xxxx
+     Pid: xx, process nr: xx
+     xx xx xx xx xx xx xx xx xx xx
+
+   或者類似的內核調試信息顯示在屏幕上或在系統日誌里，請【如實】複製它。
+   可能對你來說轉儲（dump）看起來不可理解，但它確實包含可能有助於調試問題的
+   信息。轉儲上方的文本也很重要：它說明了內核轉儲代碼的原因（在上面的示例中，
+   是由於內核指針錯誤）。更多關於如何理解轉儲的信息，請參見
+   Documentation/admin-guide/bug-hunting.rst。
+
+ - 如果使用 CONFIG_KALLSYMS 編譯內核，則可以按原樣發送轉儲，否則必須使用
+   ``ksymoops`` 程序來理解轉儲（但通常首選使用CONFIG_KALLSYMS編譯）。
+   此實用程序可從
+   https://www.kernel.org/pub/linux/utils/kernel/ksymoops/ 下載。
+   或者，您可以手動執行轉儲查找：
+
+ - 在調試像上面這樣的轉儲時，如果您可以查找EIP值的含義，這將非常有幫助。
+   十六進位值本身對我或其他任何人都沒有太大幫助：它會取決於特定的內核設置。
+   您應該做的是從EIP行獲取十六進位值（忽略 ``0010:`` ），然後在內核名字列表
+   中查找它，以查看哪個內核函數包含有問題的地址。
+
+   要找到內核函數名，您需要找到與顯示症狀的內核相關聯的系統二進位文件。就是
+   文件「linux/vmlinux」。要提取名字列表並將其與內核崩潰中的EIP進行匹配，
+   請執行::
+
+     nm vmlinux | sort | less
+
+   這將爲您提供一個按升序排序的內核地址列表，從中很容易找到包含有問題的地址
+   的函數。請注意，內核調試消息提供的地址不一定與函數地址完全匹配（事實上，
+   這是不可能的），因此您不能只「grep」列表：不過列表將爲您提供每個內核函數
+   的起點，因此通過查找起始地址低於你正在搜索的地址，但後一個函數的高於的
+   函數，你會找到您想要的。實際上，在您的問題報告中加入一些「上下文」可能是
+   一個好主意，給出相關的上下幾行。
+
+   如果您由於某些原因無法完成上述操作（如您使用預編譯的內核映像或類似的映像），
+   請儘可能多地告訴我您的相關設置信息，這會有所幫助。有關詳細信息請閱讀
+   『Documentation/admin-guide/reporting-issues.rst』。
+
+ - 或者，您可以在正在運行的內核上使用gdb（只讀的；即不能更改值或設置斷點）。
+   爲此，請首先使用-g編譯內核；適當地編輯arch/x86/Makefile，然後執行 ``make
+   clean`` 。您還需要啓用CONFIG_PROC_FS（通過 ``make config`` ）。
+
+   使用新內核重新啓動後，執行 ``gdb vmlinux /proc/kcore`` 。現在可以使用所有
+   普通的gdb命令。查找系統崩潰點的命令是 ``l *0xXXXXXXXX`` （將xxx替換爲EIP
+   值）。
+
+   用gdb無法調試一個當前未運行的內核是由於gdb（錯誤地）忽略了編譯內核的起始
+   偏移量。
+
diff --git a/Documentation/translations/zh_TW/admin-guide/bug-bisect.rst b/Documentation/translations/zh_TW/admin-guide/bug-bisect.rst
new file mode 100644
index 000000000000..41a39aebb8d6
--- /dev/null
+++ b/Documentation/translations/zh_TW/admin-guide/bug-bisect.rst
@@ -0,0 +1,85 @@
+.. SPDX-License-Identifier: GPL-2.0
+
+.. include:: ../disclaimer-zh_TW.rst
+
+:Original: :doc:`../../../admin-guide/bug-bisect`
+
+:譯者:
+
+ 吳想成 Wu XiangCheng <bobwxc@email.cn>
+ 胡皓文 Hu Haowen <src.res@email.cn>
+
+二分（bisect）缺陷
++++++++++++++++++++
+
+（英文版）最後更新：2016年10月28日
+
+引言
+=====
+
+始終嘗試由來自kernel.org的原始碼構建的最新內核。如果您沒有信心這樣做，請將
+錯誤報告給您的發行版供應商，而不是內核開發人員。
+
+找到缺陷（bug）並不總是那麼容易，不過仍然得去找。如果你找不到它，不要放棄。
+儘可能多的向相關維護人員報告您發現的信息。請參閱MAINTAINERS文件以了解您所
+關注的子系統的維護人員。
+
+在提交錯誤報告之前，請閱讀「Documentation/admin-guide/reporting-issues.rst」。
+
+設備未出現（Devices not appearing）
+====================================
+
+這通常是由udev/systemd引起的。在將其歸咎於內核之前先檢查一下。
+
+查找導致缺陷的補丁
+===================
+
+使用 ``git`` 提供的工具可以很容易地找到缺陷，只要缺陷是可復現的。
+
+操作步驟：
+
+- 從git原始碼構建內核
+- 以此開始二分 [#f1]_::
+
+	$ git bisect start
+
+- 標記損壞的變更集::
+
+	$ git bisect bad [commit]
+
+- 標記正常工作的變更集::
+
+	$ git bisect good [commit]
+
+- 重新構建內核並測試
+- 使用以下任一與git bisect進行交互::
+
+	$ git bisect good
+
+  或::
+
+	$ git bisect bad
+
+  這取決於您測試的變更集上是否有缺陷
+- 在一些交互之後，git bisect將給出可能導致缺陷的變更集。
+
+- 例如，如果您知道當前版本有問題，而4.8版本是正常的，則可以執行以下操作::
+
+	$ git bisect start
+	$ git bisect bad                 # Current version is bad
+	$ git bisect good v4.8
+
+
+.. [#f1] 您可以（可選地）在開始git bisect的時候提供good或bad參數
+         ``git bisect start [BAD] [GOOD]``
+
+如需進一步參考，請閱讀：
+
+- ``git-bisect`` 的手冊頁
+- `Fighting regressions with git bisect（用git bisect解決回歸）
+  <https://www.kernel.org/pub/software/scm/git/docs/git-bisect-lk2009.html>`_
+- `Fully automated bisecting with "git bisect run"（使用git bisect run
+  來全自動二分） <https://lwn.net/Articles/317154>`_
+- `Using Git bisect to figure out when brokenness was introduced
+  （使用Git二分來找出何時引入了錯誤） <http://webchick.net/node/99>`_
+
diff --git a/Documentation/translations/zh_TW/admin-guide/bug-hunting.rst b/Documentation/translations/zh_TW/admin-guide/bug-hunting.rst
new file mode 100644
index 000000000000..4d813aec77d2
--- /dev/null
+++ b/Documentation/translations/zh_TW/admin-guide/bug-hunting.rst
@@ -0,0 +1,344 @@
+.. SPDX-License-Identifier: GPL-2.0
+
+.. include:: ../disclaimer-zh_TW.rst
+
+:Original: :doc:`../../../admin-guide/bug-hunting`
+
+:譯者:
+
+ 吳想成 Wu XiangCheng <bobwxc@email.cn>
+ 胡皓文 Hu Haowen <src.res@email.cn>
+
+追蹤缺陷
+=========
+
+內核錯誤報告通常附帶如下堆棧轉儲::
+
+	------------[ cut here ]------------
+	WARNING: CPU: 1 PID: 28102 at kernel/module.c:1108 module_put+0x57/0x70
+	Modules linked in: dvb_usb_gp8psk(-) dvb_usb dvb_core nvidia_drm(PO) nvidia_modeset(PO) snd_hda_codec_hdmi snd_hda_intel snd_hda_codec snd_hwdep snd_hda_core snd_pcm snd_timer snd soundcore nvidia(PO) [last unloaded: rc_core]
+	CPU: 1 PID: 28102 Comm: rmmod Tainted: P        WC O 4.8.4-build.1 #1
+	Hardware name: MSI MS-7309/MS-7309, BIOS V1.12 02/23/2009
+	 00000000 c12ba080 00000000 00000000 c103ed6a c1616014 00000001 00006dc6
+	 c1615862 00000454 c109e8a7 c109e8a7 00000009 ffffffff 00000000 f13f6a10
+	 f5f5a600 c103ee33 00000009 00000000 00000000 c109e8a7 f80ca4d0 c109f617
+	Call Trace:
+	 [<c12ba080>] ? dump_stack+0x44/0x64
+	 [<c103ed6a>] ? __warn+0xfa/0x120
+	 [<c109e8a7>] ? module_put+0x57/0x70
+	 [<c109e8a7>] ? module_put+0x57/0x70
+	 [<c103ee33>] ? warn_slowpath_null+0x23/0x30
+	 [<c109e8a7>] ? module_put+0x57/0x70
+	 [<f80ca4d0>] ? gp8psk_fe_set_frontend+0x460/0x460 [dvb_usb_gp8psk]
+	 [<c109f617>] ? symbol_put_addr+0x27/0x50
+	 [<f80bc9ca>] ? dvb_usb_adapter_frontend_exit+0x3a/0x70 [dvb_usb]
+	 [<f80bb3bf>] ? dvb_usb_exit+0x2f/0xd0 [dvb_usb]
+	 [<c13d03bc>] ? usb_disable_endpoint+0x7c/0xb0
+	 [<f80bb48a>] ? dvb_usb_device_exit+0x2a/0x50 [dvb_usb]
+	 [<c13d2882>] ? usb_unbind_interface+0x62/0x250
+	 [<c136b514>] ? __pm_runtime_idle+0x44/0x70
+	 [<c13620d8>] ? __device_release_driver+0x78/0x120
+	 [<c1362907>] ? driver_detach+0x87/0x90
+	 [<c1361c48>] ? bus_remove_driver+0x38/0x90
+	 [<c13d1c18>] ? usb_deregister+0x58/0xb0
+	 [<c109fbb0>] ? SyS_delete_module+0x130/0x1f0
+	 [<c1055654>] ? task_work_run+0x64/0x80
+	 [<c1000fa5>] ? exit_to_usermode_loop+0x85/0x90
+	 [<c10013f0>] ? do_fast_syscall_32+0x80/0x130
+	 [<c1549f43>] ? sysenter_past_esp+0x40/0x6a
+	---[ end trace 6ebc60ef3981792f ]---
+
+這樣的堆棧跟蹤提供了足夠的信息來識別內核原始碼中發生錯誤的那一行。根據問題的
+嚴重性，它還可能包含 **「Oops」** 一詞，比如::
+
+	BUG: unable to handle kernel NULL pointer dereference at   (null)
+	IP: [<c06969d4>] iret_exc+0x7d0/0xa59
+	*pdpt = 000000002258a001 *pde = 0000000000000000
+	Oops: 0002 [#1] PREEMPT SMP
+	...
+
+儘管有 **Oops** 或其他類型的堆棧跟蹤，但通常需要找到出問題的行來識別和處理缺
+陷。在本章中，我們將參考「Oops」來了解需要分析的各種堆棧跟蹤。
+
+如果內核是用 ``CONFIG_DEBUG_INFO`` 編譯的，那麼可以使用文件：
+`scripts/decode_stacktrace.sh` 。
+
+連結的模塊
+-----------
+
+受到汙染或正在加載/卸載的模塊用「（…）」標記，汙染標誌在
+`Documentation/admin-guide/tainted-kernels.rst` 文件中進行了描述，「正在被加
+載」用「+」標註，「正在被卸載」用「-」標註。
+
+
+Oops消息在哪？
+---------------
+
+通常，Oops文本由klogd從內核緩衝區讀取，然後交給 ``syslogd`` ，後者將其寫入
+syslog文件，通常是 ``/var/log/messages`` （取決於 ``/etc/syslog.conf`` ）。
+在使用systemd的系統上，它也可以由 ``journald`` 守護進程存儲，並通過運行
+``journalctl`` 命令進行訪問。
+
+有時 ``klogd`` 會掛掉，這種情況下您可以運行 ``dmesg > file`` 從內核緩衝區
+讀取數據並保存它。或者您可以 ``cat /proc/kmsg > file`` ，但是您必須適時
+中斷以停止傳輸，因爲 ``kmsg`` 是一個「永無止境的文件」。
+
+如果機器嚴重崩潰，無法輸入命令或磁碟不可用，那還有三個選項：
+
+(1) 手動複製屏幕上的文本，並在機器重新啓動後輸入。很難受，但這是突然崩潰下
+    唯一的選擇。或者你可以用數位相機拍下屏幕——雖然不那麼好，但總比什麼都沒
+    有好。如果消息滾動超出控制台頂部，使用更高解析度（例如 ``vga=791`` ）
+    引導啓動將允許您閱讀更多文本。（警告：這需要 ``vesafb`` ，因此對「早期」
+    的Oppses沒有幫助）
+
+(2) 從串口終端啓動（參見
+    :ref:`Documentation/admin-guide/serial-console.rst <serial_console>` ），
+    在另一台機器上運行數據機然後用你喜歡的通信程序捕獲輸出。
+    Minicom運行良好。
+
+(3) 使用Kdump（參閱 Documentation/admin-guide/kdump/kdump.rst ），使用
+    Documentation/admin-guide/kdump/gdbmacros.txt 中的dmesg gdbmacro從舊內存
+    中提取內核環形緩衝區。
+
+找到缺陷位置
+-------------
+
+如果你能指出缺陷在內核原始碼中的位置，則報告缺陷的效果會非常好。這有兩種方法。
+通常來說使用 ``gdb`` 會比較容易，不過內核需要用調試信息來預編譯。
+
+gdb
+^^^^
+
+GNU 調試器（GNU debugger， ``gdb`` ）是從 ``vmlinux`` 文件中找出OOPS的確切
+文件和行號的最佳方法。
+
+在使用 ``CONFIG_DEBUG_INFO`` 編譯的內核上使用gdb效果最好。可通過運行以下命令
+進行設置::
+
+  $ ./scripts/config -d COMPILE_TEST -e DEBUG_KERNEL -e DEBUG_INFO
+
+在用 ``CONFIG_DEBUG_INFO`` 編譯的內核上，你可以直接從OOPS複製EIP值::
+
+ EIP:    0060:[<c021e50e>]    Not tainted VLI
+
+並使用GDB來將其翻譯成可讀形式::
+
+  $ gdb vmlinux
+  (gdb) l *0xc021e50e
+
+如果沒有啓用 ``CONFIG_DEBUG_INFO`` ，則使用OOPS的函數偏移::
+
+ EIP is at vt_ioctl+0xda8/0x1482
+
+並在啓用 ``CONFIG_DEBUG_INFO`` 的情況下重新編譯內核::
+
+  $ ./scripts/config -d COMPILE_TEST -e DEBUG_KERNEL -e DEBUG_INFO
+  $ make vmlinux
+  $ gdb vmlinux
+  (gdb) l *vt_ioctl+0xda8
+  0x1888 is in vt_ioctl (drivers/tty/vt/vt_ioctl.c:293).
+  288	{
+  289		struct vc_data *vc = NULL;
+  290		int ret = 0;
+  291
+  292		console_lock();
+  293		if (VT_BUSY(vc_num))
+  294			ret = -EBUSY;
+  295		else if (vc_num)
+  296			vc = vc_deallocate(vc_num);
+  297		console_unlock();
+
+或者若您想要更詳細的顯示::
+
+  (gdb) p vt_ioctl
+  $1 = {int (struct tty_struct *, unsigned int, unsigned long)} 0xae0 <vt_ioctl>
+  (gdb) l *0xae0+0xda8
+
+您也可以使用對象文件作爲替代::
+
+  $ make drivers/tty/
+  $ gdb drivers/tty/vt/vt_ioctl.o
+  (gdb) l *vt_ioctl+0xda8
+
+如果你有調用跟蹤，類似::
+
+     Call Trace:
+      [<ffffffff8802c8e9>] :jbd:log_wait_commit+0xa3/0xf5
+      [<ffffffff810482d9>] autoremove_wake_function+0x0/0x2e
+      [<ffffffff8802770b>] :jbd:journal_stop+0x1be/0x1ee
+      ...
+
+這表明問題可能在 :jbd: 模塊中。您可以在gdb中加載該模塊並列出相關代碼::
+
+  $ gdb fs/jbd/jbd.ko
+  (gdb) l *log_wait_commit+0xa3
+
+.. note::
+
+     您還可以對堆棧跟蹤處的任何函數調用執行相同的操作，例如::
+
+	 [<f80bc9ca>] ? dvb_usb_adapter_frontend_exit+0x3a/0x70 [dvb_usb]
+
+     上述調用發生的位置可以通過以下方式看到::
+
+	$ gdb drivers/media/usb/dvb-usb/dvb-usb.o
+	(gdb) l *dvb_usb_adapter_frontend_exit+0x3a
+
+objdump
+^^^^^^^^
+
+要調試內核，請使用objdump並從崩潰輸出中查找十六進位偏移，以找到有效的代碼/匯
+編行。如果沒有調試符號，您將看到所示例程的彙編程序代碼，但是如果內核有調試
+符號，C代碼也將可見（調試符號可以在內核配置菜單的hacking項中啓用）。例如::
+
+    $ objdump -r -S -l --disassemble net/dccp/ipv4.o
+
+.. note::
+
+   您需要處於內核樹的頂層以便此獲得您的C文件。
+
+如果您無法訪問原始碼，仍然可以使用以下方法調試一些崩潰轉儲（如Dave Miller的
+示例崩潰轉儲輸出所示）::
+
+     EIP is at 	+0x14/0x4c0
+      ...
+     Code: 44 24 04 e8 6f 05 00 00 e9 e8 fe ff ff 8d 76 00 8d bc 27 00 00
+     00 00 55 57  56 53 81 ec bc 00 00 00 8b ac 24 d0 00 00 00 8b 5d 08
+     <8b> 83 3c 01 00 00 89 44  24 14 8b 45 28 85 c0 89 44 24 18 0f 85
+
+     Put the bytes into a "foo.s" file like this:
+
+            .text
+            .globl foo
+     foo:
+            .byte  .... /* bytes from Code: part of OOPS dump */
+
+     Compile it with "gcc -c -o foo.o foo.s" then look at the output of
+     "objdump --disassemble foo.o".
+
+     Output:
+
+     ip_queue_xmit:
+         push       %ebp
+         push       %edi
+         push       %esi
+         push       %ebx
+         sub        $0xbc, %esp
+         mov        0xd0(%esp), %ebp        ! %ebp = arg0 (skb)
+         mov        0x8(%ebp), %ebx         ! %ebx = skb->sk
+         mov        0x13c(%ebx), %eax       ! %eax = inet_sk(sk)->opt
+
+`scripts/decodecode` 文件可以用來自動完成大部分工作，這取決於正在調試的CPU
+體系結構。
+
+報告缺陷
+---------
+
+一旦你通過定位缺陷找到了其發生的地方，你可以嘗試自己修復它或者向上游報告它。
+
+爲了向上游報告，您應該找出用於開發受影響代碼的郵件列表。這可以使用 ``get_maintainer.pl`` 。
+
+
+例如，您在gspca的sonixj.c文件中發現一個缺陷，則可以通過以下方法找到它的維護者::
+
+	$ ./scripts/get_maintainer.pl -f drivers/media/usb/gspca/sonixj.c
+	Hans Verkuil <hverkuil@xs4all.nl> (odd fixer:GSPCA USB WEBCAM DRIVER,commit_signer:1/1=100%)
+	Mauro Carvalho Chehab <mchehab@kernel.org> (maintainer:MEDIA INPUT INFRASTRUCTURE (V4L/DVB),commit_signer:1/1=100%)
+	Tejun Heo <tj@kernel.org> (commit_signer:1/1=100%)
+	Bhaktipriya Shridhar <bhaktipriya96@gmail.com> (commit_signer:1/1=100%,authored:1/1=100%,added_lines:4/4=100%,removed_lines:9/9=100%)
+	linux-media@vger.kernel.org (open list:GSPCA USB WEBCAM DRIVER)
+	linux-kernel@vger.kernel.org (open list)
+
+請注意它將指出：
+
+- 最後接觸原始碼的開發人員（如果這是在git樹中完成的）。在上面的例子中是Tejun
+  和Bhaktipriya（在這個特定的案例中，沒有人真正參與這個文件的開發）；
+- 驅動維護人員（Hans Verkuil）；
+- 子系統維護人員（Mauro Carvalho Chehab）；
+- 驅動程序和/或子系統郵件列表（linux-media@vger.kernel.org）；
+- Linux內核郵件列表（linux-kernel@vger.kernel.org）。
+
+通常，修復缺陷的最快方法是將它報告給用於開發相關代碼的郵件列表（linux-media
+ML），抄送驅動程序維護者（Hans）。
+
+如果你完全不知道該把報告寄給誰，且 ``get_maintainer.pl`` 也沒有提供任何有用
+的信息，請發送到linux-kernel@vger.kernel.org。
+
+感謝您的幫助，這使Linux儘可能穩定:-)
+
+修復缺陷
+---------
+
+如果你懂得編程，你不僅可以通過報告錯誤來幫助我們，還可以提供一個解決方案。
+畢竟，開源就是分享你的工作，你不想因爲你的天才而被認可嗎？
+
+如果你決定這樣做，請在制定解決方案後將其提交到上游。
+
+請務必閱讀
+:ref:`Documentation/process/submitting-patches.rst <submittingpatches>` ，
+以幫助您的代碼被接受。
+
+
+---------------------------------------------------------------------------
+
+用 ``klogd`` 進行Oops跟蹤的注意事項
+------------------------------------
+
+爲了幫助Linus和其他內核開發人員， ``klogd`` 對保護故障的處理提供了大量支持。
+爲了完整支持地址解析，至少應該使用 ``sysklogd`` 包的1.3-pl3版本。
+
+當發生保護故障時， ``klogd`` 守護進程會自動將內核日誌消息中的重要地址轉換爲
+它們的等效符號。然後通過 ``klogd`` 使用的任何報告機制來轉發這個已翻譯的內核
+消息。保護錯誤消息可以直接從消息文件中剪切出來並轉發給內核開發人員。
+
+``klogd`` 執行兩種類型的地址解析，靜態翻譯和動態翻譯。靜態翻譯使用System.map
+文件。爲了進行靜態轉換， ``klogd`` 守護進程必須能夠在守護進程初始化時找到系
+統映射文件。有關 ``klogd`` 如何搜索映射文件的信息，請參見klogd手冊頁。
+
+當使用內核可加載模塊時，動態地址轉換非常重要。由於內核模塊的內存是從內核的
+動態內存池中分配的，因此無論是模塊的開頭還是模塊中的函數和符號都沒有固定的
+位置。
+
+內核支持系統調用，允許程序確定加載哪些模塊及其在內存中的位置。klogd守護進程
+使用這些系統調用構建了一個符號表，可用於調試可加載內核模塊中發生的保護錯誤。
+
+klogd至少會提供產生保護故障的模塊的名稱。如果可加載模塊的開發人員選擇從模塊
+導出符號信息，則可能會有其他可用的符號信息。
+
+由於內核模塊環境可以是動態的，因此當模塊環境發生變化時，必須有一種通知
+``klogd`` 守護進程的機制。有一些可用的命令行選項允許klogd向當前正在執行的守
+護進程發出信號示意應該刷新符號信息。有關更多信息，請參閱 ``klogd`` 手冊頁。
+
+sysklogd發行版附帶了一個補丁，它修改了 ``modules-2.0.0`` 包，以便在加載或
+卸載模塊時自動向klogd發送信號。應用此補丁基本上可無縫支持調試內核可加載模塊
+發生的保護故障。
+
+以下是 ``klogd`` 處理的可加載模塊中的保護故障示例::
+
+	Aug 29 09:51:01 blizard kernel: Unable to handle kernel paging request at virtual address f15e97cc
+	Aug 29 09:51:01 blizard kernel: current->tss.cr3 = 0062d000, %cr3 = 0062d000
+	Aug 29 09:51:01 blizard kernel: *pde = 00000000
+	Aug 29 09:51:01 blizard kernel: Oops: 0002
+	Aug 29 09:51:01 blizard kernel: CPU:    0
+	Aug 29 09:51:01 blizard kernel: EIP:    0010:[oops:_oops+16/3868]
+	Aug 29 09:51:01 blizard kernel: EFLAGS: 00010212
+	Aug 29 09:51:01 blizard kernel: eax: 315e97cc   ebx: 003a6f80   ecx: 001be77b   edx: 00237c0c
+	Aug 29 09:51:01 blizard kernel: esi: 00000000   edi: bffffdb3   ebp: 00589f90   esp: 00589f8c
+	Aug 29 09:51:01 blizard kernel: ds: 0018   es: 0018   fs: 002b   gs: 002b   ss: 0018
+	Aug 29 09:51:01 blizard kernel: Process oops_test (pid: 3374, process nr: 21, stackpage=00589000)
+	Aug 29 09:51:01 blizard kernel: Stack: 315e97cc 00589f98 0100b0b4 bffffed4 0012e38e 00240c64 003a6f80 00000001
+	Aug 29 09:51:01 blizard kernel:        00000000 00237810 bfffff00 0010a7fa 00000003 00000001 00000000 bfffff00
+	Aug 29 09:51:01 blizard kernel:        bffffdb3 bffffed4 ffffffda 0000002b 0007002b 0000002b 0000002b 00000036
+	Aug 29 09:51:01 blizard kernel: Call Trace: [oops:_oops_ioctl+48/80] [_sys_ioctl+254/272] [_system_call+82/128]
+	Aug 29 09:51:01 blizard kernel: Code: c7 00 05 00 00 00 eb 08 90 90 90 90 90 90 90 90 89 ec 5d c3
+
+---------------------------------------------------------------------------
+
+::
+
+  Dr. G.W. Wettstein           Oncology Research Div. Computing Facility
+  Roger Maris Cancer Center    INTERNET: greg@wind.rmcc.com
+  820 4th St. N.
+  Fargo, ND  58122
+  Phone: 701-234-7556
+
diff --git a/Documentation/translations/zh_TW/admin-guide/clearing-warn-once.rst b/Documentation/translations/zh_TW/admin-guide/clearing-warn-once.rst
new file mode 100644
index 000000000000..bdc1a22046cf
--- /dev/null
+++ b/Documentation/translations/zh_TW/admin-guide/clearing-warn-once.rst
@@ -0,0 +1,16 @@
+.. SPDX-License-Identifier: GPL-2.0
+
+.. include:: ../disclaimer-zh_TW.rst
+
+:Translator: 胡皓文 Hu Haowen <src.res@email.cn>
+
+清除 WARN_ONCE
+--------------
+
+WARN_ONCE / WARN_ON_ONCE / printk_once 僅僅列印一次消息.
+
+echo 1 > /sys/kernel/debug/clear_warn_once
+
+可以清除這種狀態並且再次允許列印一次告警信息，這對於運行測試集後重現問題
+很有用。
+
diff --git a/Documentation/translations/zh_TW/admin-guide/cpu-load.rst b/Documentation/translations/zh_TW/admin-guide/cpu-load.rst
new file mode 100644
index 000000000000..be087cef1967
--- /dev/null
+++ b/Documentation/translations/zh_TW/admin-guide/cpu-load.rst
@@ -0,0 +1,112 @@
+.. SPDX-License-Identifier: GPL-2.0
+
+.. include:: ../disclaimer-zh_TW.rst
+
+:Translator: 胡皓文 Hu Haowen <src.res@email.cn>
+
+========
+CPU 負載
+========
+
+Linux通過``/proc/stat``和``/proc/uptime``導出各種信息，用戶空間工具
+如top(1)使用這些信息計算系統花費在某個特定狀態的平均時間。
+例如：
+
+    $ iostat
+    Linux 2.6.18.3-exp (linmac)     02/20/2007
+
+    avg-cpu:  %user   %nice %system %iowait  %steal   %idle
+              10.01    0.00    2.92    5.44    0.00   81.63
+
+    ...
+
+這裡系統認爲在默認採樣周期內有10.01%的時間工作在用戶空間，2.92%的時
+間用在系統空間，總體上有81.63%的時間是空閒的。
+
+大多數情況下``/proc/stat``的信息幾乎真實反映了系統信息，然而，由於內
+核採集這些數據的方式/時間的特點，有時這些信息根本不可靠。
+
+那麼這些信息是如何被搜集的呢？每當時間中斷觸發時，內核查看此刻運行的
+進程類型，並增加與此類型/狀態進程對應的計數器的值。這種方法的問題是
+在兩次時間中斷之間系統（進程）能夠在多種狀態之間切換多次，而計數器只
+增加最後一種狀態下的計數。
+
+舉例
+---
+
+假設系統有一個進程以如下方式周期性地占用cpu::
+
+     兩個時鐘中斷之間的時間線
+    |-----------------------|
+     ^                     ^
+     |_ 開始運行           |
+                           |_ 開始睡眠
+                           （很快會被喚醒）
+
+在上面的情況下，根據``/proc/stat``的信息（由於當系統處於空閒狀態時，
+時間中斷經常會發生）系統的負載將會是0
+
+大家能夠想像內核的這種行爲會發生在許多情況下，這將導致``/proc/stat``
+中存在相當古怪的信息::
+
+	/* gcc -o hog smallhog.c */
+	#include <time.h>
+	#include <limits.h>
+	#include <signal.h>
+	#include <sys/time.h>
+	#define HIST 10
+
+	static volatile sig_atomic_t stop;
+
+	static void sighandler (int signr)
+	{
+	(void) signr;
+	stop = 1;
+	}
+	static unsigned long hog (unsigned long niters)
+	{
+	stop = 0;
+	while (!stop && --niters);
+	return niters;
+	}
+	int main (void)
+	{
+	int i;
+	struct itimerval it = { .it_interval = { .tv_sec = 0, .tv_usec = 1 },
+				.it_value = { .tv_sec = 0, .tv_usec = 1 } };
+	sigset_t set;
+	unsigned long v[HIST];
+	double tmp = 0.0;
+	unsigned long n;
+	signal (SIGALRM, &sighandler);
+	setitimer (ITIMER_REAL, &it, NULL);
+
+	hog (ULONG_MAX);
+	for (i = 0; i < HIST; ++i) v[i] = ULONG_MAX - hog (ULONG_MAX);
+	for (i = 0; i < HIST; ++i) tmp += v[i];
+	tmp /= HIST;
+	n = tmp - (tmp / 3.0);
+
+	sigemptyset (&set);
+	sigaddset (&set, SIGALRM);
+
+	for (;;) {
+		hog (n);
+		sigwait (&set, &i);
+	}
+	return 0;
+	}
+
+
+參考
+---
+
+- https://lore.kernel.org/r/loom.20070212T063225-663@post.gmane.org
+- Documentation/filesystems/proc.rst (1.8)
+
+
+謝謝
+---
+
+Con Kolivas, Pavel Machek
+
diff --git a/Documentation/translations/zh_TW/admin-guide/index.rst b/Documentation/translations/zh_TW/admin-guide/index.rst
new file mode 100644
index 000000000000..293c20245783
--- /dev/null
+++ b/Documentation/translations/zh_TW/admin-guide/index.rst
@@ -0,0 +1,135 @@
+.. SPDX-License-Identifier: GPL-2.0
+
+.. include:: ../disclaimer-zh_TW.rst
+
+:Original: :doc:`../../../admin-guide/index`
+:Translator: 胡皓文 Hu Haowen <src.res@email.cn>
+
+Linux 內核用戶和管理員指南
+==========================
+
+下面是一組隨時間添加到內核中的面向用戶的文檔的集合。到目前爲止，還沒有一個
+整體的順序或組織 - 這些材料不是一個單一的，連貫的文件！幸運的話，情況會隨著
+時間的推移而迅速改善。
+
+這個初始部分包含總體信息，包括描述內核的README， 關於內核參數的文檔等。
+
+.. toctree::
+   :maxdepth: 1
+
+   README
+
+Todolist:
+
+   kernel-parameters
+   devices
+   sysctl/index
+
+本節介紹CPU漏洞及其緩解措施。
+
+Todolist:
+
+   hw-vuln/index
+
+下面的一組文檔，針對的是試圖跟蹤問題和bug的用戶。
+
+.. toctree::
+   :maxdepth: 1
+
+   reporting-issues
+   security-bugs
+   bug-hunting
+   bug-bisect
+   tainted-kernels
+   init
+
+Todolist:
+
+   reporting-bugs
+   ramoops
+   dynamic-debug-howto
+   kdump/index
+   perf/index
+
+這是應用程式開發人員感興趣的章節的開始。可以在這裡找到涵蓋內核ABI各個
+方面的文檔。
+
+Todolist:
+
+   sysfs-rules
+
+本手冊的其餘部分包括各種指南，介紹如何根據您的喜好配置內核的特定行爲。
+
+
+.. toctree::
+   :maxdepth: 1
+
+   clearing-warn-once
+   cpu-load
+   unicode
+
+Todolist:
+
+   acpi/index
+   aoe/index
+   auxdisplay/index
+   bcache
+   binderfs
+   binfmt-misc
+   blockdev/index
+   bootconfig
+   braille-console
+   btmrvl
+   cgroup-v1/index
+   cgroup-v2
+   cifs/index
+   cputopology
+   dell_rbu
+   device-mapper/index
+   edid
+   efi-stub
+   ext4
+   nfs/index
+   gpio/index
+   highuid
+   hw_random
+   initrd
+   iostats
+   java
+   jfs
+   kernel-per-CPU-kthreads
+   laptops/index
+   lcd-panel-cgram
+   ldm
+   lockup-watchdogs
+   LSM/index
+   md
+   media/index
+   mm/index
+   module-signing
+   mono
+   namespaces/index
+   numastat
+   parport
+   perf-security
+   pm/index
+   pnp
+   rapidio
+   ras
+   rtc
+   serial-console
+   svga
+   sysrq
+   thunderbolt
+   ufs
+   vga-softcursor
+   video-output
+   xfs
+
+.. only::  subproject and html
+
+   Indices
+   =======
+
+   * :ref:`genindex`
+
diff --git a/Documentation/translations/zh_TW/admin-guide/init.rst b/Documentation/translations/zh_TW/admin-guide/init.rst
new file mode 100644
index 000000000000..32cdf134948f
--- /dev/null
+++ b/Documentation/translations/zh_TW/admin-guide/init.rst
@@ -0,0 +1,58 @@
+.. SPDX-License-Identifier: GPL-2.0
+
+.. include:: ../disclaimer-zh_TW.rst
+
+:Original: :doc:`../../../admin-guide/init`
+
+:譯者:
+
+ 吳想成 Wu XiangCheng <bobwxc@email.cn>
+ 胡皓文 Hu Haowen <src.res@email.cn>
+
+解釋「No working init found.」啓動掛起消息
+==========================================
+
+:作者:
+
+ Andreas Mohr <andi at lisas period de>
+
+ Cristian Souza <cristianmsbr at gmail period com>
+
+本文檔提供了加載初始化二進位（init binary）失敗的一些高層級原因（大致按執行
+順序列出）。
+
+1) **無法掛載根文件系統Unable to mount root FS** ：請設置「debug」內核參數（在
+   引導加載程序bootloader配置文件或CONFIG_CMDLINE）以獲取更詳細的內核消息。
+
+2) **初始化二進位不存在於根文件系統上init binary doesn't exist on rootfs** ：
+   確保您的根文件系統類型正確（並且 ``root=`` 內核參數指向正確的分區）；擁有
+   所需的驅動程序，例如SCSI或USB等存儲硬體；文件系統（ext3、jffs2等）是內建的
+   （或者作爲模塊由initrd預加載）。
+
+3) **控制台設備損壞Broken console device** ： ``console= setup`` 中可能存在
+   衝突 --> 初始控制台不可用（initial console unavailable）。例如，由於串行
+   IRQ問題（如缺少基於中斷的配置）導致的某些串行控制台不可靠。嘗試使用不同的
+   ``console= device`` 或像 ``netconsole=`` 。
+
+4) **二進位存在但依賴項不可用Binary exists but dependencies not available** ：
+   例如初始化二進位的必需庫依賴項，像 ``/lib/ld-linux.so.2`` 丟失或損壞。使用
+   ``readelf -d <INIT>|grep NEEDED`` 找出需要哪些庫。
+
+5) **無法加載二進位Binary cannot be loaded** ：請確保二進位的體系結構與您的
+   硬體匹配。例如i386不匹配x86_64，或者嘗試在ARM硬體上加載x86。如果您嘗試在
+   此處加載非二進位文件（shell腳本？），您應該確保腳本在其工作頭（shebang
+   header）行 ``#!/...`` 中指定能正常工作的解釋器（包括其庫依賴項）。在處理
+   腳本之前，最好先測試一個簡單的非腳本二進位文件，比如 ``/bin/sh`` ，並確認
+   它能成功執行。要了解更多信息，請將代碼添加到 ``init/main.c`` 以顯示
+   kernel_execve()的返回值。
+
+當您發現新的失敗原因時，請擴展本解釋（畢竟加載初始化二進位是一個 **關鍵** 且
+艱難的過渡步驟，需要儘可能無痛地進行），然後向LKML提交一個補丁。
+
+待辦事項：
+
+- 通過一個可以存儲 ``kernel_execve()`` 結果值的結構體數組實現各種
+  ``run_init_process()`` 調用，並在失敗時通過疊代 **所有** 結果來記錄一切
+  （非常重要的可用性修復）。
+- 試著使實現本身在一般情況下更有幫助，例如在受影響的地方提供額外的錯誤消息。
+
diff --git a/Documentation/translations/zh_TW/admin-guide/reporting-issues.rst b/Documentation/translations/zh_TW/admin-guide/reporting-issues.rst
new file mode 100644
index 000000000000..27638e199f13
--- /dev/null
+++ b/Documentation/translations/zh_TW/admin-guide/reporting-issues.rst
@@ -0,0 +1,1337 @@
+.. SPDX-License-Identifier: (GPL-2.0+ OR CC-BY-4.0)
+..
+   If you want to distribute this text under CC-BY-4.0 only, please use 'The
+   Linux kernel developers' for author attribution and link this as source:
+   https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git/plain/Documentation/admin-guide/reporting-issues.rst
+..
+   Note: Only the content of this RST file as found in the Linux kernel sources
+   is available under CC-BY-4.0, as versions of this text that were processed
+   (for example by the kernel's build system) might contain content taken from
+   files which use a more restrictive license.
+
+.. include:: ../disclaimer-zh_TW.rst
+
+:Original: Documentation/admin-guide/reporting-issues.rst
+
+:譯者:
+
+ 吳想成 Wu XiangCheng <bobwxc@email.cn>
+ 胡皓文 Hu Haowen <src.res@email.cn>
+
+
+報告問題
++++++++++
+
+
+簡明指南（亦即 太長不看）
+==========================
+
+您面臨的是否爲同系列穩定版或長期支持內核的普通內核的回歸？是否仍然受支持？
+請搜索 `LKML內核郵件列表 <https://lore.kernel.org/lkml/>`_ 和
+`Linux穩定版郵件列表 <https://lore.kernel.org/stable/>`_ 存檔中匹配的報告並
+加入討論。如果找不到匹配的報告，請安裝該系列的最新版本。如果它仍然出現問題，
+報告給穩定版郵件列表（stable@vger.kernel.org）。
+
+在所有其他情況下，請儘可能猜測是哪個內核部分導致了問題。查看MAINTAINERS文件，
+了解開發人員希望如何得知問題，大多數情況下，報告問題都是通過電子郵件和抄送
+相關郵件列表進行的。檢查報告目的地的存檔中是否已有匹配的報告；也請搜索
+`LKML <https://lore.kernel.org/lkml/>`_ 和網絡。如果找不到可加入的討論，請
+安裝 `最新的主線內核 <https://kernel.org/>`_ 。如果仍存在問題，請發送報告。
+
+問題已經解決了，但是您希望看到它在一個仍然支持的穩定版或長期支持系列中得到
+解決？請安裝其最新版本。如果它出現了問題，那麼在主線中搜索修復它的更改，並
+檢查是否正在回傳（backporting）或者已放棄；如果兩者都沒有，那麼可詢問處理
+更改的人員。
+
+**通用提醒** ：當安裝和測試上述內核時，請確保它是普通的（即：沒有補丁，也沒
+有使用附加模塊）。還要確保它是在一個正常的環境中構建和運行，並且在問題發生
+之前沒有被汙染（tainted）。
+
+在編寫報告時，要涵蓋與問題相關的所有信息，如使用的內核和發行版。在碰見回歸時，
+嘗試給出引入它的更改的提交ID，二分可以找到它。如果您同時面臨Linux內核的多個
+問題，請分別報告每個問題。
+
+一旦報告發出，請回答任何出現的問題，並儘可能地提供幫助。這包括通過不時重新
+測試新版本並發送狀態更新來推動進展。
+
+
+如何向內核維護人員報告問題的逐步指南
+=====================================
+
+上面的簡明指南概述了如何向Linux內核開發人員報告問題。對於已經熟悉向自由和開
+源軟體（FLOSS）項目報告問題的人來說，這可能是他們所需要的全部內容。對於其他
+人，本部分更爲詳細，並一步一步地描述。爲了便於閱讀，它仍然儘量簡潔，並省略
+了許多細節；這些在逐步指南後的參考章節中進行了描述，該章節更詳細地解釋了每
+個步驟。
+
+注意：本節涉及的方面比簡明指南多，順序也稍有不同。這符合你的利益，以確保您
+儘早意識到看起來像Linux內核毛病的問題可能實際上是由其他原因引起的。這些步驟
+可以確保你最終不會覺得在這一過程中投入的時間是浪費：
+
+ * 您是否面臨硬體或軟體供應商提供的Linux內核的問題？那麼基本上您最好停止閱讀
+   本文檔，轉而向您的供應商報告問題，除非您願意自己安裝最新的Linux版本。尋找
+   和解決問題往往需要後者。
+
+ * 使用您喜愛的網絡搜尋引擎對現有報告進行粗略搜索；此外，請檢查
+   `Linux內核郵件列表（LKML） <https://lore.kernel.org/lkml/>`_ 的存檔。如果
+   找到匹配的報告，請加入討論而不是發送新報告。
+
+ * 看看你正在處理的問題是否爲回歸問題、安全問題或非常嚴重的問題：這些都是需
+   要在接下來的一些步驟中特別處理的「高優先級問題」。
+
+ * 確保不是內核環境導致了您面臨的問題。
+
+ * 創建一個新的備份，並將系統修復和恢復工具放在手邊。
+
+ * 確保您的系統不會通過動態構建額外的內核模塊來增強其內核，像DKMS這樣的解決
+   方案可能在您不知情的情況下就在本地進行了這樣的工作。
+
+ * 當問題發生時，檢查您的內核是否被「汙染」，因爲使內核設置這個標誌的事件可能
+   會導致您面臨的問題。
+
+ * 粗略地寫下如何重現這個問題。如果您同時處理多個問題，請爲每個問題單獨寫注
+   釋，並確保它們在新啓動的系統上獨立出現。這是必要的，因爲每個問題都需要分
+   別報告給內核開發人員，除非它們嚴重糾纏在一起。
+
+ * 如果您正面臨穩定版或長期支持版本線的回歸（例如從5.10.4更新到5.10.5時出現
+   故障），請查看後文「報告穩定版和長期支持內核線的回歸」小節。
+
+ * 定位可能引起問題的驅動程序或內核子系統。找出其開發人員期望的報告的方式和
+   位置。注意：大多數情況下不會是 bugzilla.kernel.org，因爲問題通常需要通
+   過郵件發送給維護人員和公共郵件列表。
+
+ * 在缺陷追蹤器或問題相關郵件列表的存檔中徹底搜索可能與您的問題匹配的報告。
+   如果你發現了一些相關討論，請加入討論而不是發送新的報告。
+
+在完成這些準備之後，你將進入主要部分：
+
+ * 除非您已經在運行最新的「主線」Linux內核，否則最好在報告流程前安裝它。在某些
+   情況下，使用最新的「穩定版」Linux進行測試和報告也是可以接受的替代方案；在
+   合併窗口期間，這實際上可能是最好的方法，但在開發階段最好還是暫停幾天。無論
+   你選擇什麼版本，最好使用「普通」構建。忽略這些建議會大大增加您的報告被拒絕
+   或忽略的風險。
+
+ * 確保您剛剛安裝的內核在運行時不會「汙染」自己。
+
+ * 在您剛剛安裝的內核中復現這個問題。如果它沒有出現，請查看下方只發生在
+   穩定版和長期支持內核的問題的說明。
+
+ * 優化你的筆記：試著找到並寫出最直接的復現問題的方法。確保最終結果包含所有
+   重要的細節，同時讓第一次聽說的人容易閱讀和理解。如果您在此過程中學到了一
+   些東西，請考慮再次搜索關於該問題的現有報告。
+
+ * 如果失敗涉及「panic」、「Oops」、「warning」或「BUG」，請考慮解碼內核日誌以查找觸
+   發錯誤的代碼行。
+
+ * 如果您的問題是回歸問題，請儘可能縮小引入問題時的範圍。
+
+ * 通過詳細描述問題來開始編寫報告。記得包括以下條目：您爲復現而安裝的最新內
+   核版本、使用的Linux發行版以及關於如何復現該問題的說明。如果可能，將內核
+   構建配置（.config）和 ``dmesg`` 的輸出放在網上的某個地方，並連結到它。包
+   含或上傳所有其他可能相關的信息，如Oops的輸出/截圖或來自 ``lspci`` 的輸出
+   。一旦你寫完了這個主要部分，請在上方插入一個正常長度的段落快速概述問題和
+   影響。再在此之上添加一個簡單描述問題的句子，以得到人們的閱讀。現在給出一
+   個更短的描述性標題或主題。然後就可以像MAINTAINERS文件告訴你的那樣發送或
+   提交報告了，除非你在處理一個「高優先級問題」：它們需要按照下面「高優先級問
+   題的特殊處理」所述特別關照。
+
+ * 等待別人的反應，繼續推進事情，直到你能夠接受這樣或那樣的結果。因此，請公
+   開和及時地回應任何詢問。測試提出的修復。積極地測試：至少重新測試每個新主
+   線版本的首個候選版本（RC），並報告你的結果。如果出現拖延，就友好地提醒一
+   下。如果你沒有得到任何幫助或者未能滿意，請試著自己幫助自己。
+
+
+報告穩定版和長期支持內核線的回歸
+----------------------------------
+
+如果您發現了穩定版或長期支持內核版本線中的回歸問題並按上述流程跳到這裡，那麼
+請閱讀本小節。即例如您在從5.10.4更新到5.10.5時出現了問題（從5.9.15到5.10.5則
+不是）。開發人員希望儘快修復此類回歸，因此有一個簡化流程來報告它們：
+
+ * 檢查內核開發人員是否仍然維護你關心的Linux內核版本線：去 `kernel.org 的首頁
+   <https://kernel.org/>`_ ，確保此特定版本線的最新版沒有「[EOL]」標記。
+
+ * 檢查 `Linux穩定版郵件列表 <https://lore.kernel.org/stable/>`_ 中的現有報告。
+
+ * 從特定的版本線安裝最新版本作爲純淨內核。確保這個內核沒有被汙染，並且仍然
+   存在問題，因爲問題可能已經在那裡被修復了。如果您第一次發現供應商內核的問題，
+   請檢查已知最新版本的普通構建是否可以正常運行。
+
+ * 向Linux穩定版郵件列表發送一個簡短的問題報告(stable@vger.kernel.org)。大致
+   描述問題，並解釋如何復現。講清楚首個出現問題的版本和最後一個工作正常的版本。
+   然後等待進一步的指示。
+
+下面的參考章節部分詳細解釋了這些步驟中的每一步。
+
+
+報告只發生在較舊內核版本線的問題
+----------------------------------
+
+若您嘗試了上述的最新主線內核，但未能在那裡復現問題，那麼本小節適用於您；以下
+流程有助於使問題在仍然支持的穩定版或長期支持版本線，或者定期基於最新穩定版或
+長期支持內核的供應商內核中得到修復。如果是這種情況，請執行以下步驟：
+
+ * 請做好準備，接下來的幾個步驟可能無法在舊版本中解決問題：修復可能太大或太
+   冒險，無法移植到那裡。
+
+ * 執行前節「報告穩定版和長期支持內核線的回歸」中的前三個步驟。
+
+ * 在Linux內核版本控制系統中搜索修復主線問題的更改，因爲它的提交消息可能會
+   告訴你修復是否已經計劃好了支持。如果你沒有找到，搜索適當的郵件列表，尋找
+   討論此類問題或同行評議可能修復的帖子；然後檢查討論是否認爲修復不適合支持。
+   如果支持根本不被考慮，加入最新的討論，詢問是否有可能。
+
+ * 前面的步驟之一應該會給出一個解決方案。如果仍未能成功，請向可能引起問題的
+   子系統的維護人員詢問建議；抄送特定子系統的郵件列表以及穩定版郵件列表
+
+下面的參考章節部分詳細解釋了這些步驟中的每一步。
+
+
+參考章節：向內核維護者報告問題
+===============================
+
+上面的詳細指南簡要地列出了所有主要步驟，這對大多數人來說應該足夠了。但有時，
+即使是有經驗的用戶也可能想知道如何實際執行這些步驟之一。這就是本節的目的，
+因爲它將提供關於上述每個步驟的更多細節。請將此作爲參考文檔：可以從頭到尾
+閱讀它。但它主要是爲了瀏覽和查找如何實際執行這些步驟的詳細信息。
+
+在深入挖掘細節之前，我想先給你一些一般性建議：
+
+ * Linux內核開發人員很清楚這個過程很複雜，比其他的FLOSS項目要求更多。我們很
+   希望讓它更簡單。但這需要在不同的地方以及一些基礎設施上付諸努力，這些基礎
+   設施需要持續的維護；尚未有人站出來做這些工作，所以目前情況就是這樣。
+
+ * 與某些供應商簽訂的保證或支持合同並不能使您有權要求上游Linux內核社區的開
+   發人員進行修復：這樣的合同完全在Linux內核、其開發社區和本文檔的範圍之外。
+   這就是爲什麼在這種情況下，你不能要求任何契約保證，即使開發人員處理的問
+   題對供應商有效。如果您想主張您的權利，使用供應商的支持渠道代替。當這樣做
+   的時候，你可能想提出你希望看到這個問題在上游Linux內核中修復；可以這是確
+   保最終修復將被納入所有Linux發行版的唯一方法來鼓勵他們。
+
+ * 如果您從未向FLOSS項目報告過任何問題，那麼您應該考慮閱讀 `如何有效地報告
+   缺陷 <https://www.chiark.greenend.org.uk/~sgtatham/bugs.html>`_ ， `如何
+   以明智的方式提問 <http://www.catb.org/esr/faqs/smart-questions.html>`_ ，
+   和 `如何提出好問題 <https://jvns.ca/blog/good-questions/>`_ 。
+
+解決這些問題之後，可以在下面找到如何正確地向Linux內核報告問題的詳細信息。
+
+
+確保您使用的是上游Linux內核
+----------------------------
+
+   *您是否面臨硬體或軟體供應商提供的Linux內核的問題？那麼基本上您最好停止閱
+   讀本文檔，轉而向您的供應商報告問題，除非您願意自己安裝最新的Linux版本。
+   尋找和解決問題往往需要後者。*
+
+與大多數程式設計師一樣，Linux內核開發人員不喜歡花時間處理他們維護的原始碼中根本
+不會發生的問題的報告。這只會浪費每個人的時間，尤其是你的時間。不幸的是，當
+涉及到內核時，這樣的情況很容易發生，並且常常導致雙方氣餒。這是因爲幾乎所有預
+裝在設備（台式機、筆記本電腦、智慧型手機、路由器等）上的Linux內核，以及大多數
+由Linux發行商提供的內核，都與由kernel.org發行的官方Linux內核相距甚遠：從Linux
+開發的角度來看，這些供應商提供的內核通常是古老的或者經過了大量修改，通常兩點
+兼具。
+
+大多數供應商內核都不適合用來向Linux內核開發人員報告問題：您在其中遇到的問題
+可能已經由Linux內核開發人員在數月或數年前修復；此外，供應商的修改和增強可能
+會導致您面臨的問題，即使它們看起來很小或者完全不相關。這就是爲什麼您應該向
+供應商報告這些內核的問題。它的開發者應該查看報告，如果它是一個上游問題，直接
+於上游修復或將報告轉發到那裡。在實踐中，這有時行不通。因此，您可能需要考慮
+通過自己安裝最新的Linux內核內核來繞過供應商。如果如果您選擇此方法，那麼本指
+南後面的步驟將解釋如何在排除了其他可能導致您的問題的原因後執行此操作。
+
+注意前段使用的詞語是「大多數」，因爲有時候開發人員實際上願意處理供應商內核出現
+的問題報告。他們是否這麼做很大程度上取決於開發人員和相關問題。如果發行版只
+根據最近的Linux版本對內核進行了較小修改，那麼機會就比較大；例如對於Debian
+GNU/Linux Sid或Fedora Rawhide所提供的主線內核。一些開發人員還將接受基於最新
+穩定內核的發行版內核問題報告，只要它改動不大；例如Arch Linux、常規Fedora版本
+和openSUSE Turboweed。但是請記住，您最好使用主線Linux，並避免在此流程中使用
+穩定版內核，如「安裝一個新的內核進行測試」一節中所詳述。
+
+當然，您可以忽略所有這些建議，並向上游Linux開發人員報告舊的或經過大量修改的
+供應商內核的問題。但是注意，這樣的報告經常被拒絕或忽視，所以自行小心考慮一下。
+不過這還是比根本不報告問題要好：有時候這樣的報告會直接或間接地幫助解決之後的
+問題。
+
+
+搜索現有報告（第一部分）
+-------------------------
+
+    *使用您喜愛的網絡搜尋引擎對現有報告進行粗略搜索；此外，請檢查Linux內核
+    郵件列表（LKML）的存檔。如果找到匹配的報告，請加入討論而不是發送新報告。*
+
+報告一個別人已經提出的問題，對每個人來說都是浪費時間，尤其是作爲報告人的你。
+所以徹底檢查是否有人已經報告了這個問題，這對你自己是有利的。在流程中的這一步，
+可以只執行一個粗略的搜索：一旦您知道您的問題需要報告到哪裡，稍後的步驟將告訴
+您如何詳細搜索。儘管如此，不要倉促完成這一步，它可以節省您的時間和減少麻煩。
+
+只需先用你最喜歡的搜尋引擎在網際網路上搜索。然後再搜索Linux內核郵件列表（LKML）
+存檔。
+
+如果搜索結果實在太多，可以考慮讓你的搜尋引擎將搜索時間範圍限制在過去的一個
+月或一年。而且無論你在哪裡搜索，一定要用恰當的搜索關鍵詞；也要變化幾次關鍵
+詞。同時，試著從別人的角度看問題：這將幫助你想出其他的關鍵詞。另外，一定不
+要同時使用過多的關鍵詞。記住搜索時要同時嘗試包含和不包含內核驅動程序的名稱
+或受影響的硬體組件的名稱等信息。但其確切的品牌名稱（比如說「華碩紅魔 Radeon
+RX 5700 XT Gaming OC」）往往幫助不大，因爲它太具體了。相反，嘗試搜索術語，如
+型號（Radeon 5700 或 Radeon 5000）和核心代號（「Navi」或「Navi10」），以及包含
+和不包含其製造商（「AMD」）。
+
+如果你發現了關於你的問題的現有報告，請加入討論，因爲你可能會提供有價值的額
+外信息。這一點很重要，即使是在修復程序已經準備好或處於最後階段，因爲開發人
+員可能會尋找能夠提供額外信息或測試建議修復程序的人。跳到「發布報告後的責任」
+一節，了解有關如何正確參與的細節。
+
+注意，搜索 `bugzilla.kernel.org <https://bugzilla.kernel.org/>`_ 網站可能
+也是一個好主意，因爲這可能會提供有價值的見解或找到匹配的報告。如果您發現後者，
+請記住：大多數子系統都希望在不同的位置報告，如下面「你需要將問題報告到何處」
+一節中所述。因此本應處理這個問題的開發人員甚至可能不知道bugzilla的工單。所以
+請檢查工單中的問題是否已經按照本文檔所述得到報告，如果沒有，請考慮這樣做。
+
+高優先級的問題？
+-----------------
+
+    *看看你正在處理的問題是否是回歸問題、安全問題或非常嚴重的問題：這些都是
+    需要在接下來的一些步驟中特別處理的「高優先級問題」。*
+
+Linus Torvalds和主要的Linux內核開發人員希望看到一些問題儘快得到解決，因此在
+報告過程中有一些「高優先級問題」的處理略有不同。有三種情況符合條件:回歸、安全
+問題和非常嚴重的問題。
+
+如果在舊版本的Linux內核中工作的東西不能在新版本的Linux內核中工作，或者某種
+程度上在新版本的Linux內核中工作得更差，那麼你就需要處理「回歸」。因此，當一個
+在Linux 5.7中表現良好的WiFi驅動程序在5.8中表現不佳或根本不能工作時，這是一
+種回歸。如果應用程式在新的內核中出現不穩定的現象，這也是一種回歸，這可能是
+由於內核和用戶空間之間的接口（如procfs和sysfs）發生不兼容的更改造成的。顯著
+的性能降低或功耗增加也可以稱爲回歸。但是請記住:新內核需要使用與舊內核相似的
+配置來構建（參見下面如何實現這一點）。這是因爲內核開發人員在實現新特性時有
+時無法避免不兼容性；但是爲了避免回歸，這些特性必須在構建配置期間顯式地啓用。
+
+什麼是安全問題留給您自己判斷。在繼續之前，請考慮閱讀
+「Documentation/translations/zh_TW/admin-guide/security-bugs.rst」，
+因爲它提供了如何最恰當地處理安全問題的額外細節。
+
+當發生了完全無法接受的糟糕事情時，此問題就是一個「非常嚴重的問題」。例如，
+Linux內核破壞了它處理的數據或損壞了它運行的硬體。當內核突然顯示錯誤消息
+（「kernel panic」）並停止工作，或者根本沒有任何停止信息時，您也在處理一個嚴重
+的問題。注意：不要混淆「panic」（內核停止自身的致命錯誤）和「Oops」（可恢復錯誤），
+因爲顯示後者之後內核仍然在運行。
+
+
+確保環境健康
+--------------
+
+    *確保不是內核所處環境導致了你所面臨的問題。*
+
+看起來很像內核問題的問題有時是由構建或運行時環境引起的。很難完全排除這種問
+題，但你應該儘量減少這種問題：
+
+ * 構建內核時，請使用經過驗證的工具，因爲編譯器或二進位文件中的錯誤可能會導
+   致內核出現錯誤行爲。
+
+ * 確保您的計算機組件在其設計規範內運行；這對處理器、內存和主板尤爲重要。因
+   此，當面臨潛在的內核問題時，停止低電壓或超頻。
+
+ * 儘量確保不是硬體故障導致了你的問題。例如，內存損壞會導致大量的問題，這些
+   問題會表現爲看起來像內核問題。
+
+ * 如果你正在處理一個文件系統問題，你可能需要用 ``fsck`` 檢查一下文件系統，
+   因爲它可能會以某種方式被損壞，從而導致無法預期的內核行爲。
+
+ * 在處理回歸問題時，要確保沒有在更新內核的同時發生了其他變化。例如，這個問
+   題可能是由同時更新的其他軟體引起的。也有可能是在你第一次重啓進入新內核時，
+   某個硬體巧合地壞了。更新系統 BIOS 或改變 BIOS 設置中的某些內容也會導致
+   一些看起來很像內核回歸的問題。
+
+
+爲緊急情況做好準備
+-------------------
+
+    *創建一個全新的備份，並將系統修復和還原工具放在手邊*
+
+我得提醒您，您正在和計算機打交道，計算機有時會出現意想不到的事情，尤其是當
+您折騰其作業系統的內核等關鍵部件時。而這就是你在這個過程中要做的事情。因此，
+一定要創建一個全新的備份；還要確保你手頭有修復或重裝作業系統的所有工具，
+以及恢復備份所需的一切。
+
+
+確保你的內核不會被增強
+------------------------
+
+    *確保您的系統不會通過動態構建額外的內核模塊來增強其內核，像DKMS這樣的解
+    決方案可能在您不知情的情況下就在本地進行了這樣的工作。*
+
+如果內核以任何方式得到增強，那麼問題報告被忽略或拒絕的風險就會急劇增加。這就
+是爲什麼您應該刪除或禁用像akmods和DKMS這樣的機制：這些機制會自動構建額外內核
+模塊，例如當您安裝新的Linux內核或第一次引導它時。也要記得同時刪除他們可能安裝
+的任何模塊。然後重新啓動再繼續。
+
+注意，你可能不知道你的系統正在使用這些解決方案之一：當你安裝 Nvidia 專有圖
+形驅動程序、VirtualBox 或其他需要 Linux 內核以外的模塊支持的軟體時，它們通
+常會靜默設置。這就是爲什麼你可能需要卸載這些軟體的軟體包，以擺脫任何第三方
+內核模塊。
+
+
+檢測「汙染」標誌
+----------------
+
+    *當問題發生時，檢查您的內核是否被「汙染」，因爲使內核設置這個標誌的事件可
+    能會導致您面臨的問題。*
+
+當某些可能會導致看起來完全不相關的後續錯誤的事情發生時，內核會用「汙染
+（taint）」標誌標記自己。如果您的內核受到汙染，那麼您面臨的可能是這樣的錯誤。
+因此在投入更多時間到這個過程中之前，儘早排除此情況可能對你有好處。這是這個
+步驟出現在這裡的唯一原因，因爲這個過程稍後會告訴您安裝最新的主線內核；然後
+您將需要再次檢查汙染標誌，因爲當它出問題的時候內核報告會關注它。
+
+在正在運行的系統上檢查內核是否汙染非常容易：如果 ``cat /proc/sys/kernel/tainted``
+返回「0」，那麼內核沒有被汙染，一切正常。在某些情況下無法檢查該文件；這就是
+爲什麼當內核報告內部問題（「kernel bug」）、可恢復錯誤（「kernel Oops」）或停止
+操作前不可恢復的錯誤（「kernel panic」）時，它也會提到汙染狀態。當其中一個錯
+誤發生時，查看列印的錯誤消息的頂部，搜索以「CPU:」開頭的行。如果發現問題時內
+核未被汙染，那麼它應該以「Not infected」結束；如果你看到「Tainted:」且後跟一些
+空格和字母，那就被汙染了。
+
+如果你的內核被汙染了，請閱讀「Documentation/translations/zh_TW/admin-guide/tainted-kernels.rst」
+以找出原因。設法消除汙染因素。通常是由以下三種因素之一引起的：
+
+ 1. 發生了一個可恢復的錯誤（「kernel Oops」），內核汙染了自己，因爲內核知道在
+    此之後它可能會出現奇怪的行爲錯亂。在這種情況下，檢查您的內核或系統日誌，
+    並尋找以下列文字開頭的部分::
+
+       Oops: 0000 [#1] SMP
+
+    如方括號中的「#1」所示，這是自啓動以來的第一次Oops。每個Oops和此後發生的
+    任何其他問題都可能是首個Oops的後續問題，即使這兩個問題看起來完全不相關。
+    通過消除首個Oops的原因並在之後復現該問題，可以排除這種情況。有時僅僅
+    重新啓動就足夠了，有時更改配置後重新啓動可以消除Oops。但是在這個流程中
+    不要花費太多時間在這一點上，因爲引起Oops的原因可能已經在您稍後將按流程
+    安裝的新Linux內核版本中修復了。
+
+ 2. 您的系統使用的軟體安裝了自己的內核模塊，例如Nvidia的專有圖形驅動程序或
+    VirtualBox。當內核從外部源（即使它們是開源的）加載此類模塊時，它會汙染
+    自己：它們有時會在不相關的內核區域導致錯誤，從而可能導致您面臨的問題。
+    因此，當您想要向Linux內核開發人員報告問題時，您必須阻止這些模塊加載。
+    大多數情況下最簡單的方法是：臨時卸載這些軟體，包括它們可能已經安裝的任
+    何模塊。之後重新啓動。
+
+ 3. 當內核加載駐留在Linux內核原始碼staging樹中的模塊時，它也會汙染自身。這
+    是一個特殊的區域，代碼（主要是驅動程序）還沒有達到正常Linux內核的質量
+    標準。當您報告此種模塊的問題時，內核受到汙染顯然是沒有問題的；只需確保
+    問題模塊是造成汙染的唯一原因。如果問題發生在一個不相關的區域，重新啓動
+    並通過指定 ``foo.blacklist=1`` 作爲內核參數臨時阻止該模塊被加載（用有
+    問題的模塊名替換「foo」）。
+
+
+記錄如何重現問題
+------------------
+
+    *粗略地寫下如何重現這個問題。如果您同時處理多個問題，請爲每個問題單獨寫
+    注釋，並確保它們在新啓動的系統上獨立出現。這是必要的，因爲每個問題都需
+    要分別報告給內核開發人員，除非它們嚴重糾纏在一起。*
+
+如果你同時處理多個問題，必須分別報告每個問題，因爲它們可能由不同的開發人員
+處理。在一份報告中描述多種問題，也會讓其他人難以將其分開。因此只有在問題嚴
+重糾纏的情況下，才能將問題合併在一份報告中。
+
+此外，在報告過程中，你必須測試該問題是否發生在其他內核版本上。因此，如果您
+知道如何在一個新啓動的系統上快速重現問題，將使您的工作更加輕鬆。
+
+注意：報告只發生過一次的問題往往是沒有結果的，因爲它們可能是由於宇宙輻射導
+致的位翻轉。所以你應該嘗試通過重現問題來排除這種情況，然後再繼續。如果你有
+足夠的經驗來區分由於硬體故障引起的一次性錯誤和難以重現的罕見內核問題，可以
+忽略這個建議。
+
+
+穩定版或長期支持內核的回歸？
+-----------------------------
+
+    *如果您正面臨穩定版或長期支持版本線的回歸（例如從5.10.4更新到5.10.5時出現
+    故障），請查看後文「報告穩定版和長期支持內核線的回歸」小節。*
+
+穩定版和長期支持內核版本線中的回歸是Linux開發人員非常希望解決的問題，這樣的
+問題甚至比主線開發分支中的回歸更不應出現，因爲它們會很快影響到很多人。開發人員
+希望儘快了解此類問題，因此有一個簡化流程來報告這些問題。注意，使用更新內核版
+本線的回歸（比如從5.9.15切換到5.10.5時出現故障）不符合條件。
+
+
+你需要將問題報告到何處
+------------------------
+
+    *定位可能引起問題的驅動程序或內核子系統。找出其開發人員期望的報告的方式
+    和位置。注意：大多數情況下不會是bugzilla.kernel.org，因爲問題通常需要通
+    過郵件發送給維護人員和公共郵件列表。*
+
+將報告發送給合適的人是至關重要的，因爲Linux內核是一個大項目，大多數開發人員
+只熟悉其中的一小部分。例如，相當多的程式設計師只關心一個驅動程序，比如一個WiFi
+晶片驅動程序；它的開發人員可能對疏遠的或不相關的「子系統」（如TCP堆棧、
+PCIe/PCI子系統、內存管理或文件系統）的內部知識了解很少或完全不了解。
+
+問題在於：Linux內核缺少一個，可以簡單地將問題歸檔並讓需要了解它的開發人員了
+解它的，中心化缺陷跟蹤器。這就是爲什麼你必須找到正確的途徑來自己報告問題。
+您可以在腳本的幫助下做到這一點（見下文），但它主要針對的是內核開發人員和專
+家。對於其他人來說，MAINTAINERS（維護人員）文件是更好的選擇。
+
+如何閱讀MAINTAINERS維護者文件
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+
+爲了說明如何使用 :ref:`MAINTAINERS <maintainers>` 文件，讓我們假設您的筆記
+本電腦中的WiFi在更新內核後突然出現了錯誤行爲。這種情況下可能是WiFi驅動的問
+題。顯然，它也可能由於驅動基於的某些代碼，但除非你懷疑有這樣的東西會附著在
+驅動程序上。如果真的是其他的問題，驅動程序的開發人員會讓合適的人參與進來。
+
+遺憾的是，沒有通用且簡單的辦法來檢查哪個代碼驅動了特定硬體組件。
+
+在WiFi驅動出現問題的情況下，你可能想查看 ``lspci -k`` 的輸出，因爲它列出了
+PCI/PCIe總線上的設備和驅動它的內核模塊::
+
+       [user@something ~]$ lspci -k
+       [...]
+       3a:00.0 Network controller: Qualcomm Atheros QCA6174 802.11ac Wireless Network Adapter (rev 32)
+         Subsystem: Bigfoot Networks, Inc. Device 1535
+         Kernel driver in use: ath10k_pci
+         Kernel modules: ath10k_pci
+       [...]
+
+但如果你的WiFi晶片通過USB或其他內部總線連接，這種方法就行不通了。在這種情況
+下，您可能需要檢查您的WiFi管理器或 ``ip link`` 的輸出。尋找有問題的網絡接口
+的名稱，它可能類似於「wlp58s0」。此名稱可以用來找到驅動它的模塊::
+
+       [user@something ~]$ realpath --relative-to=/sys/module//sys/class/net/wlp58s0/device/driver/module
+       ath10k_pci
+
+如果這些技巧不能進一步幫助您，請嘗試在網上搜索如何縮小相關驅動程序或子系統
+的範圍。如果你不確定是哪一個：試著猜一下，即使你猜得不好，也會有人會幫助你
+的。
+
+一旦您知道了相應的驅動程序或子系統，您就希望在MAINTAINERS文件中搜索它。如果
+是「ath10k_pci」，您不會找到任何東西，因爲名稱太具體了。有時你需要在網上尋找
+幫助；但在此之前，請嘗試使用一個稍短或修改過的名稱來搜索MAINTAINERS文件，因
+爲這樣你可能會發現類似這樣的東西::
+
+       QUALCOMM ATHEROS ATH10K WIRELESS DRIVER
+       Mail:          A. Some Human <shuman@example.com>
+       Mailing list:  ath10k@lists.infradead.org
+       Status:        Supported
+       Web-page:      https://wireless.wiki.kernel.org/en/users/Drivers/ath10k
+       SCM:           git git://git.kernel.org/pub/scm/linux/kernel/git/kvalo/ath.git
+       Files:         drivers/net/wireless/ath/ath10k/
+
+注意：如果您閱讀在Linux原始碼樹的根目錄中找到的原始維護者文件，則行描述將是
+縮寫。例如，「Mail:（郵件）」將是「M:」，「Mailing list:（郵件列表）」將是「L」，
+「Status:（狀態）」將是「S:」。此文件頂部有一段解釋了這些和其他縮寫。
+
+首先查看「Status」狀態行。理想情況下，它應該得到「Supported（支持）」或
+「Maintained（維護）」。如果狀態爲「Obsolete（過時的）」，那麼你在使用一些過時的
+方法，需要轉換到新的解決方案上。有時候，只有在感到有動力時，才會有人爲代碼
+提供「Odd Fixes」。如果碰見「Orphan」，你就完全不走運了，因爲再也沒有人關心代碼
+了，只剩下這些選項:準備好與問題共存，自己修復它，或者找一個願意修復它的程式設計師。
+
+檢查狀態後，尋找以「bug:」開頭的一行：它將告訴你在哪裡可以找到子系統特定的缺
+陷跟蹤器來提交你的問題。上面的例子沒有此行。大多數部分都是這樣，因爲 Linux
+內核的開發完全是由郵件驅動的。很少有子系統使用缺陷跟蹤器，且其中只有一部分
+依賴於 bugzilla.kernel.org。
+
+在這種以及其他很多情況下，你必須尋找以「Mail:」開頭的行。這些行提到了特定代碼
+的維護者的名字和電子郵件地址。也可以查找以「Mailing list:」開頭的行，它告訴你
+開發代碼的公共郵件列表。你的報告之後需要通過郵件發到這些地址。另外，對於所有
+通過電子郵件發送的問題報告，一定要抄送 Linux Kernel Mailing List（LKML）
+<linux-kernel@vger.kernel.org>。在以後通過郵件發送問題報告時，不要遺漏任何
+一個郵件列表!維護者都是大忙人，可能會把一些工作留給子系統特定列表上的其他開
+發者；而 LKML 很重要，因爲需要一個可以找到所有問題報告的地方。
+
+
+藉助腳本找到維護者
+~~~~~~~~~~~~~~~~~~~~
+
+對於手頭有Linux源碼的人來說，有第二個可以找到合適的報告地點的選擇：腳本
+「scripts/get_maintainer.pl」，它嘗試找到所有要聯繫的人。它會查詢MAINTAINERS
+文件，並需要用相關原始碼的路徑來調用。對於編譯成模塊的驅動程序，經常可以用
+這樣的命令找到::
+
+       $ modinfo ath10k_pci | grep filename | sed 's!/lib/modules/.*/kernel/!!; s!filename:!!; s!\.ko\(\|\.xz\)!!'
+       drivers/net/wireless/ath/ath10k/ath10k_pci.ko
+
+將其中的部分內容傳遞給腳本::
+
+       $ ./scripts/get_maintainer.pl -f drivers/net/wireless/ath/ath10k*
+       Some Human <shuman@example.com> (supporter:QUALCOMM ATHEROS ATH10K WIRELESS DRIVER)
+       Another S. Human <asomehuman@example.com> (maintainer:NETWORKING DRIVERS)
+       ath10k@lists.infradead.org (open list:QUALCOMM ATHEROS ATH10K WIRELESS DRIVER)
+       linux-wireless@vger.kernel.org (open list:NETWORKING DRIVERS (WIRELESS))
+       netdev@vger.kernel.org (open list:NETWORKING DRIVERS)
+       linux-kernel@vger.kernel.org (open list)
+
+不要把你的報告發給所有的人。發送給維護者，腳本稱之爲「supporter:」；另外抄送
+代碼最相關的郵件列表，以及 Linux 內核郵件列表（LKML）。在此例中，你需要將報
+告發送給 「Some Human <shuman@example.com>」 ，並抄送
+「ath10k@lists.infradead.org」和「linux-kernel@vger.kernel.org」。
+
+注意：如果你用 git 克隆了 Linux 原始碼，你可能需要用--git 再次調用
+get_maintainer.pl。腳本會查看提交歷史，以找到最近哪些人參與了相關代碼的編寫，
+因爲他們可能會提供幫助。但要小心使用這些結果，因爲它很容易讓你誤入歧途。
+例如，這種情況常常會發生在很少被修改的地方（比如老舊的或未維護的驅動程序）：
+有時這樣的代碼會在樹級清理期間被根本不關心此驅動程序的開發者修改。
+
+
+搜索現有報告（第二部分）
+--------------------------
+
+    *在缺陷追蹤器或問題相關郵件列表的存檔中徹底搜索可能與您的問題匹配的報告。
+    如果找到匹配的報告，請加入討論而不是發送新報告。*
+
+如前所述：報告一個別人已經提出的問題，對每個人來說都是浪費時間，尤其是作爲報告
+人的你。這就是爲什麼你應該再次搜索現有的報告。現在你已經知道問題需要報告到哪裡。
+如果是郵件列表，那麼一般在 `lore.kernel.org <https://lore.kernel.org/>`_ 可以
+找到相應存檔。
+
+但有些列表運行在其他地方。例如前面步驟中當例子的ath10k WiFi驅動程序就是這種
+情況。但是你通常可以在網上很容易地找到這些列表的檔案。例如搜索「archive
+ath10k@lists.infradead.org」，將引導您到ath10k郵件列表的信息頁，該頁面頂部連結
+到其 `列表存檔 <https://lists.infradead.org/pipermail/ath10k/>`_ 。遺憾的是，
+這個列表和其他一些列表缺乏搜索其存檔的功能。在這種情況下可以使用常規的網際網路
+搜尋引擎，並添加類似「site:lists.infadead.org/pipermail/ath10k/」這
+樣的搜索條件，這會把結果限制在該連結中的檔案。
+
+也請進一步搜索網絡、LKML和bugzilla.kernel.org網站。
+
+有關如何搜索以及在找到匹配報告時如何操作的詳細信息，請參閱上面的「搜索現有報告
+（第一部分）」。
+
+不要急著完成報告過程的這一步：花30到60分鐘甚至更多的時間可以爲你和其他人節省 /
+減少相當多的時間和麻煩。
+
+
+安裝一個新的內核進行測試
+--------------------------
+
+    *除非您已經在運行最新的「主線」Linux內核，否則最好在報告流程前安裝它。在
+    某些情況下，使用最新的「穩定版」Linux進行測試和報告也是可以接受的替代方案；
+    在合併窗口期間，這實際上可能是最好的方法，但在開發階段最好還是暫停幾天。
+    無論你選擇什麼版本，最好使用「普通」構建。忽略這些建議會大大增加您的報告
+    被拒絕或忽略的風險。*
+
+正如第一步的詳細解釋中所提到的：與大多數程式設計師一樣，與大多數程式設計師一樣，Linux
+內核開發人員不喜歡花時間處理他們維護的原始碼中根本不會發生的問題的報告。這隻
+會浪費每個人的時間，尤其是你的時間。這就是爲什麼在報告問題之前，您必須先確認
+問題仍然存在於最新的上游代碼中，這符合每個人的利益。您可以忽略此建議，但如前
+所述：這樣做會極大地增加問題報告被拒絕或被忽略的風險。
+
+內核「最新上游」的範圍通常指：
+
+ * 安裝一個主線內核；最新的穩定版內核也可以是一個選擇，但大多數時候都最好避免。
+   長期支持內核（有時稱爲「LTS內核」）不適合此流程。下一小節將更詳細地解釋所有
+   這些。
+
+ * 下一小節描述獲取和安裝這樣一個內核的方法。它還指出了使用預編譯內核是可以的，
+   但普通的內核更好，這意味著：它是直接使用從 `kernel.org <https://kernel.org/>`_
+   獲得的Linux原始碼構建並且沒有任何方式修改或增強。
+
+
+選擇適合測試的版本
+~~~~~~~~~~~~~~~~~~~~
+
+前往 `kernel.org <https://kernel.org/>`_ 來決定使用哪個版本。忽略那個寫著
+「Latest release最新版本」的巨大黃色按鈕，往下看有一個表格。在表格的頂部，你會
+看到一行以「mainline」開頭的字樣，大多數情況下它會指向一個版本號類似「5.8-rc2」
+的預發布版本。如果是這樣的話，你將需要使用這個主線內核進行測試。不要讓「rc」
+嚇到你，這些「開發版內核」實際上非常可靠——而且你已經按照上面的指示做了備份，
+不是嗎？
+
+大概每九到十周，「mainline」可能會給你指出一個版本號類似「5.7」的正式版本。如果
+碰見這種情況，請考慮暫停報告過程，直到下一個版本的第一個預發布（5.8-rc1）出
+現在 `kernel.org <https://kernel.org/>`_ 上。這是因爲 Linux 的開發周期正在
+兩周的「合併窗口」內。大部分的改動和所有干擾性的改動都會在這段時間內被合併到
+下一個版本中。在此期間使用主線是比較危險的。內核開發者通常也很忙，可能沒有
+多餘的時間來處理問題報告。這也是很有可能在合併窗口中應用了許多修改來修復你
+所面臨的問題；這就是爲什麼你很快就得用一個新的內核版本重新測試，就像下面「發
+布報告後的責任」一節中所述的那樣。
+
+這就是爲什麼要等到合併窗口結束後才去做。但是如果你處理的是一些不應該等待的
+東西，則無需這樣做。在這種情況下，可以考慮通過 git 獲取最新的主線內核（見下
+文），或者使用 kernel.org 上提供的最新穩定版本。如果 mainline 因爲某些原因
+不無法正常工作，那麼使用它也是可以接受的。總的來說：用它來重現問題也比完全
+不報告問題要好。
+
+最好避免在合併窗口外使用最新的穩定版內核，因爲所有修復都必須首先應用於主線。
+這就是爲什麼檢查最新的主線內核是如此重要：你希望看到在舊版本線修復的任何問題
+需要先在主線修復，然後才能得到回傳，這可能需要幾天或幾周。另一個原因是：您
+希望的修復對於回傳來說可能太難或太冒險；因此再次報告問題不太可能改變任何事情。
+
+這些方面也部分表明了爲什麼長期支持內核（有時稱爲「LTS內核」）不適合報告流程：
+它們與當前代碼的距離太遠。因此，先去測試主線，然後再按流程走：如果主線沒有
+出現問題，流程將指導您如何在舊版本線中修復它。
+
+如何獲得新的 Linux 內核
+~~~~~~~~~~~~~~~~~~~~~~~~~
+
+你可以使用預編譯或自編譯的內核進行測試；如果你選擇後者，可以使用 git 獲取源
+代碼，或者下載其 tar 存檔包。
+
+**使用預編譯的內核** ：這往往是最快速、最簡單、最安全的方法——尤其是在你不熟
+悉 Linux 內核的情況下。問題是：發行商或附加存儲庫提供的大多數版本都是從修改
+過的Linux原始碼構建的。因此它們不是普通的，通常不適合於測試和問題報告：這些
+更改可能會導致您面臨的問題或以某種方式影響問題。
+
+但是如果您使用的是流行的Linux發行版，那麼您就很幸運了：對於大部分的發行版，
+您可以在網上找到包含最新主線或穩定版本Linux內核包的存儲庫。使用這些是完全可
+以的，只要從存儲庫的描述中確認它們是普通的或者至少接近普通。此外，請確保軟體
+包包含kernel.org上提供的最新版本內核。如果這些軟體包的時間超過一周，那麼它們
+可能就不合適了，因爲新的主線和穩定版內核通常至少每周發布一次。
+
+請注意，您以後可能需要手動構建自己的內核：有時這是調試或測試修復程序所必需的，
+如後文所述。還要注意，預編譯的內核可能缺少在出現panic、Oops、warning或BUG時
+解碼內核列印的消息所需的調試符號；如果您計劃解碼這些消息，最好自己編譯內核
+（有關詳細信息，請參閱本小節結尾和「解碼失敗信息」小節）。
+
+**使用git** ：熟悉 git 的開發者和有經驗的 Linux 用戶通常最好直接從
+`kernel.org 上的官方開發倉庫
+<https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git/tree/>`_
+中獲取最新的 Linux 內核原始碼。這些很可能比最新的主線預發布版本更新一些。不
+用擔心：它們和正式的預發布版本一樣可靠，除非內核的開發周期目前正處於合併窗
+口中。不過即便如此，它們也是相當可靠的。
+
+**常規方法** ：不熟悉 git 的人通常最好從 `kernel.org <https://kernel.org/>`_
+下載源碼的tar 存檔包。
+
+如何實際構建一個內核並不在這裡描述，因爲許多網站已經解釋了必要的步驟。如果
+你是新手，可以考慮按照那些建議使用 ``make localmodconfig`` 來做，它將嘗試獲
+取你當前內核的配置，然後根據你的系統進行一些調整。這樣做並不能使編譯出來的
+內核更好，但可以更快地編譯。
+
+注意：如果您正在處理來自內核的pannc、Oops、warning或BUG，請在配置內核時嘗試
+啓用 CONFIG_KALLSYMS 選項。此外，還可以啓用 CONFIG_DEBUG_KERNEL 和
+CONFIG_DEBUG_INFO；後者是相關選項，但只有啓用前者才能開啓。請注意，
+CONFIG_DEBUG_INFO 會需要更多儲存空間來構建內核。但這是值得的，因爲這些選項將
+允許您稍後精確定位觸發問題的確切代碼行。下面的「解碼失敗信息」一節對此進行了更
+詳細的解釋。
+
+但請記住：始終記錄遇到的問題，以防難以重現。發送未解碼的報告總比不報告要好。
+
+
+檢查「汙染」標誌
+----------------
+
+    *確保您剛剛安裝的內核在運行時不會「汙染」自己。*
+
+正如上面已經詳細介紹過的：當發生一些可能會導致一些看起來完全不相關的後續錯
+誤的事情時，內核會設置一個「汙染」標誌。這就是爲什麼你需要檢查你剛剛安裝的內
+核是否有設置此標誌。如果有的話，幾乎在任何情況下你都需要在報告問題之前先消
+除它。詳細的操作方法請看上面的章節。
+
+
+用新內核重現問題
+------------------
+
+    *在您剛剛安裝的內核中復現這個問題。如果它沒有出現，請查看下方只發生在
+    穩定版和長期支持內核的問題的說明。*
+
+檢查這個問題是否發生在你剛剛安裝的新 Linux 內核版本上。如果新內核已經修復了，
+可以考慮使用此版本線，放棄報告問題。但是請記住，只要它沒有在 `kernel.org
+<https://kernel.org/>`_ 的穩定版和長期版（以及由這些版本衍生出來的廠商內核）
+中得到修復，其他用戶可能仍然會受到它的困擾。如果你喜歡使用其中的一個，或
+者只是想幫助它們的用戶，請前往下面的「報告只發生在較舊內核版本線的問題」一節。
+
+
+優化復現問題的描述
+--------------------
+
+    *優化你的筆記：試著找到並寫出最直接的復現問題的方法。確保最終結果包含所
+    有重要的細節，同時讓第一次聽說的人容易閱讀和理解。如果您在此過程中學到
+    了一些東西，請考慮再次搜索關於該問題的現有報告。*
+
+過於複雜的報告會讓別人很難理解。因此請儘量找到一個可以直接描述、易於以書面
+形式理解的再現方法。包含所有重要的細節，但同時也要儘量保持簡短。
+
+在這在前面的步驟中，你很可能已經了解了一些關於你所面臨的問題的點。利用這些
+知識，再次搜索可以轉而加入的現有報告。
+
+
+解碼失敗信息
+-------------
+
+    *如果失敗涉及「panic」、「Oops」、「warning」或「BUG」，請考慮解碼內核日誌以查找
+    觸發錯誤的代碼行。*
+
+當內核檢測到內部問題時，它會記錄一些有關已執行代碼的信息。這使得在原始碼中精
+確定位觸發問題的行並顯示如何調用它成爲可能。但只有在配置內核時啓用了
+CONFIG_DEBUG_INFO 和 CONFIG_KALLSYMS選項時，這種方法才起效。如果已啓用此選項，
+請考慮解碼內核日誌中的信息。這將使我們更容易理解是什麼導致了「panic」、「Oops」、
+「warning」或「BUG」，從而增加了有人提供修復的機率。
+
+解碼可以通過Linux原始碼樹中的腳本來完成。如果您運行的內核是之前自己編譯的，
+這樣這樣調用它::
+
+	[user@something ~]$ sudo dmesg | ./linux-5.10.5/scripts/decode_stacktrace.sh ./linux-5.10.5/vmlinux
+	/usr/lib/debug/lib/modules/5.10.10-4.1.x86_64/vmlinux /usr/src/kernels/5.10.10-4.1.x86_64/
+
+如果您運行的是打包好的普通內核，則可能需要安裝帶有調試符號的相應包。然後按以下
+方式調用腳本（如果發行版未打包，則可能需要從Linux原始碼獲取）::
+
+	[user@something ~]$ sudo dmesg | ./linux-5.10.5/scripts/decode_stacktrace.sh \
+	/usr/lib/debug/lib/modules/5.10.10-4.1.x86_64/vmlinux /usr/src/kernels/5.10.10-4.1.x86_64/
+
+腳本將解碼如下的日誌行，這些日誌行顯示內核在發生錯誤時正在執行的代碼的地址::
+
+	[   68.387301] RIP: 0010:test_module_init+0x5/0xffa [test_module]
+
+解碼之後，這些行將變成這樣::
+
+	[   68.387301] RIP: 0010:test_module_init (/home/username/linux-5.10.5/test-module/test-module.c:16) test_module
+
+在本例中，執行的代碼是從文件「~/linux-5.10.5/test-module/test-module.c」構建的，
+錯誤出現在第16行的指令中。
+
+該腳本也會如此解碼以「Call trace」開頭的部分中提到的地址，該部分顯示出現問題的
+函數的路徑。此外，腳本還會顯示內核正在執行的代碼部分的彙編輸出。
+
+注意，如果你沒法做到這一點，只需跳過這一步，並在報告中說明原因。如果你幸運的
+話，可能無需解碼。如果需要的話，也許有人會幫你做這件事情。還要注意，這只是解
+碼內核堆棧跟蹤的幾種方法之一。有時需要採取不同的步驟來檢索相關的詳細信息。
+別擔心，如果您碰到的情況需要這樣做，開發人員會告訴您該怎麼做。
+
+
+對回歸的特別關照
+-----------------
+
+    *如果您的問題是回歸問題，請儘可能縮小引入問題時的範圍。*
+
+Linux 首席開發者 Linus Torvalds 認爲 Linux 內核永遠不應惡化，這就是爲什麼他
+認爲回歸是不可接受的，並希望看到它們被迅速修復。這就是爲什麼引入了回歸的改
+動導致的問題若無法通過其他方式快速解決，通常會被迅速撤銷。因此，報告回歸有
+點像「王炸」，會迅速得到修復。但要做到這一點，需要知道導致回歸的變化。通常情
+況下，要由報告者來追查罪魁禍首，因爲維護者往往沒有時間或手頭設置不便來自行
+重現它。
+
+有一個叫做「二分」的過程可以來尋找變化，這在
+「Documentation/translations/zh_TW/admin-guide/bug-bisect.rst」文檔中進行了詳細
+的描述，這個過程通常需要你構建十到二十個內核鏡像，每次都嘗試在構建下一個鏡像
+之前重現問題。是的，這需要花費一些時間，但不用擔心，它比大多數人想像的要快得多。
+多虧了「binary search二進位搜索」，這將引導你在原始碼管理系統中找到導致回歸的提交。
+一旦你找到它，就在網上搜索其主題、提交ID和縮短的提交ID（提交ID的前12個字符）。
+如果有的話，這將引導您找到關於它的現有報告。
+
+需要注意的是，二分法需要一點竅門，不是每個人都懂得訣竅，也需要相當多的努力，
+不是每個人都願意投入。儘管如此，還是強烈建議自己進行一次二分。如果你真的
+不能或者不想走這條路，至少要找出是哪個主線內核引入的回歸。比如說從 5.5.15
+切換到 5.8.4 的時候出現了一些問題，那麼至少可以嘗試一下相近的所有的主線版本
+（5.6、5.7 和 5.8）來檢查它是什麼時候出現的。除非你想在一個穩定版或長期支持
+內核中找到一個回歸，否則要避免測試那些編號有三段的版本（5.6.12、5.7.8），因
+爲那會使結果難以解釋，可能會讓你的測試變得無用。一旦你找到了引入回歸的主要
+版本，就可以放心地繼續報告了。但請記住：在不知道罪魁禍首的情況下，開發人員
+是否能夠提供幫助取決於手頭的問題。有時他們可能會從報告中確認是什麼出現了問
+題，並能修復它；有時他們可能無法提供幫助，除非你進行二分。
+
+當處理回歸問題時，請確保你所面臨的問題真的是由內核引起的，而不是由其他東西
+引起的，如上文所述。
+
+在整個過程中，請記住：只有當舊內核和新內核的配置相似時，問題才算回歸。最好
+的方法是：把配置文件（``.config``）從舊的工作內核直接複製到你嘗試的每個新內
+核版本。之後運行 ``make oldnoconfig`` 來調整它以適應新版本的需要，而不啓用
+任何新的功能，因爲那些功能也可能導致回歸。
+
+
+撰寫並發送報告
+---------------
+
+    *通過詳細描述問題來開始編寫報告。記得包括以下條目：您爲復現而安裝的最新
+    內核版本、使用的Linux發行版以及關於如何復現該問題的說明。如果可能，將內
+    核構建配置（.config）和 ``dmesg`` 的輸出放在網上的某個地方，並連結到它。
+    包含或上傳所有其他可能相關的信息，如Oops的輸出/截圖或來自 ``lspci``
+    的輸出。一旦你寫完了這個主要部分，請在上方插入一個正常長度的段落快速概
+    述問題和影響。再在此之上添加一個簡單描述問題的句子，以得到人們的閱讀。
+    現在給出一個更短的描述性標題或主題。然後就可以像MAINTAINERS文件告訴你的
+    那樣發送或提交報告了，除非你在處理一個「高優先級問題」：它們需要按照下面
+    「高優先級問題的特殊處理」所述特別關照。*
+
+現在你已經準備好了一切，是時候寫你的報告了。上文前言中連結的三篇文檔對如何
+寫報告做了部分解釋。這就是爲什麼本文將只提到一些基本的內容以及 Linux 內核特
+有的東西。
+
+有一點是符合這兩類的：你的報告中最關鍵的部分是標題/主題、第一句話和第一段。
+開發者經常會收到許多郵件。因此，他們往往只是花幾秒鐘的時間瀏覽一下郵件，然
+後再決定繼續下一封或仔細查看。因此，你報告的開頭越好，有人研究並幫助你的機
+會就越大。這就是爲什麼你應該暫時忽略他們，先寫出詳細的報告。;-)
+
+每份報告都應提及的事項
+~~~~~~~~~~~~~~~~~~~~~~~~
+
+詳細描述你的問題是如何發生在你安裝的新純淨內核上的。試著包含你之前寫的和優
+化過的分步說明，概述你和其他人如何重現這個問題；在極少數無法重現的情況下，
+儘量描述你做了什麼來觸發它。
+
+還應包括其他人爲了解該問題及其環境而可能需要的所有相關信息。實際需要的東西
+在很大程度上取決於具體問題，但有些事項你總是應該包括在內：
+
+ * ``cat /proc/version`` 的輸出，其中包含 Linux 內核版本號和構建時的編譯器。
+
+ * 機器正在運行的 Linux 發行版（ ``hostnamectl | grep 「Operating System「`` ）
+
+ * CPU 和作業系統的架構（ ``uname -mi`` ）
+
+ * 如果您正在處理回歸，並進行了二分，請提及導致回歸的變更的主題和提交ID。
+
+許多情況下，讓讀你報告的人多了解兩件事也是明智之舉：
+
+ * 用於構建 Linux 內核的配置（「.config」文件）
+
+ * 內核的信息，你從 ``dmesg`` 得到的信息寫到一個文件里。確保它以像「Linux
+   version 5.8-1 (foobar@example.com) (gcc (GCC) 10.2.1, GNU ld version
+   2.34) #1 SMP Mon Aug 3 14:54:37 UTC 2020」這樣的行開始，如果沒有，那麼第
+   一次啓動階段的重要信息已經被丟棄了。在這種情況下，可以考慮使用
+   ``journalctl -b 0 -k`` ；或者你也可以重啓，重現這個問題，然後調用
+   ``dmesg`` 。
+
+這兩個文件很大，所以直接把它們放到你的報告中是個壞主意。如果你是在缺陷跟蹤
+器中提交問題，那麼將它們附加到工單中。如果你通過郵件報告問題，不要用附件附
+上它們，因爲那會使郵件變得太大，可以按下列之一做：
+
+ * 將文件上傳到某個公開的地方（你的網站，公共文件粘貼服務，在
+   `bugzilla.kernel.org <https://bugzilla.kernel.org/>`_ 上創建的工單……），
+   並在你的報告中放上連結。理想情況下請使用允許這些文件保存很多年的地方，因
+   爲它們可能在很多年後對別人有用；例如 5 年或 10 年後，一個開發者正在修改
+   一些代碼，而這些代碼正是爲了修復你的問題。
+
+ * 把文件放在一邊，然後說明你會在他人回復時再單獨發送。只要記得報告發出去後，
+   真正做到這一點就可以了。;-)
+
+提供這些東西可能是明智的
+~~~~~~~~~~~~~~~~~~~~~~~~~~
+
+根據問題的不同，你可能需要提供更多的背景數據。這裡有一些關於提供什麼比較好
+的建議：
+
+ * 如果你處理的是內核的「warning」、「OOPS」或「panic」，請包含它。如果你不能複製
+   粘貼它，試著用netconsole網絡終端遠程跟蹤或者至少拍一張屏幕的照片。
+
+ * 如果問題可能與你的電腦硬體有關，請說明你使用的是什麼系統。例如，如果你的
+   顯卡有問題，請提及它的製造商，顯卡的型號，以及使用的晶片。如果是筆記本電
+   腦，請提及它的型號名稱，但儘量確保意義明確。例如「戴爾 XPS 13」就不很明確，
+   因爲它可能是 2012 年的那款，那款除了看起來和現在銷售的沒有什麼不同之外，
+   兩者沒有任何共同之處。因此，在這種情況下，要加上準確的型號，例如 2019
+   年內推出的 XPS 13 型號爲「9380」或「7390」。像「聯想 Thinkpad T590」這樣的名字
+   也有些含糊不清：這款筆記本有帶獨立顯卡和不帶的子型號，所以要儘量找到準確
+   的型號名稱或註明主要部件。
+
+ * 說明正在使用的相關軟體。如果你在加載模塊時遇到了問題，你要說明正在使用的
+   kmod、systemd 和 udev 的版本。如果其中一個 DRM 驅動出現問題，你要說明
+   libdrm 和 Mesa 的版本；還要說明你的 Wayland 合成器或 X-Server 及其驅動。
+   如果你有文件系統問題，請註明相應的文件系統實用程序的版本（e2fsprogs,
+   btrfs-progs, xfsprogs……）。
+
+ * 從內核中收集可能有用的額外信息。例如， ``lspci -nn`` 的輸出可以幫助別人
+   識別你使用的硬體。如果你的硬體有問題，你甚至可以給出 ``sudo lspci -vvv``
+   的結果，因爲它提供了組件是如何配置的信息。對於一些問題，可能最好包含
+   ``/proc/cpuinfo`` ， ``/proc/ioports`` ， ``/proc/iomem`` ，
+   ``/proc/modules`` 或 ``/proc/scsi/scsi`` 等文件的內容。一些子系統還提
+   供了收集相關信息的工具。 ``alsa-info.sh`` `就是這樣一個工具，它是音頻/聲
+   音子系統開發者提供的  <https://www.alsa-project.org/wiki/AlsaInfo>`_ 。
+
+這些例子應該會給你一些知識點，讓你知道附上什麼數據可能是明智的，但你自己也
+要想一想，哪些數據對別人會有幫助。不要太擔心忘記一些東西，因爲開發人員會要
+求提供他們需要的額外細節。但從一開始就把所有重要的東西都提供出來，會增加別
+人仔細查看的機會。
+
+
+重要部分：報告的開頭
+~~~~~~~~~~~~~~~~~~~~~~
+
+現在你已經準備好了報告的詳細部分，讓我們進入最重要的部分：開頭幾句。現在到
+報告的最前面，在你剛才寫的部分之前加上類似「The detailed description:」（詳細
+描述）這樣的內容，並在最前面插入兩個新行。現在寫一個正常長度的段落，大致概
+述這個問題。去掉所有枯燥的細節，把重點放在讀者需要知道的關鍵部分，以讓人了
+解這是怎麼回事；如果你認爲這個缺陷影響了很多用戶，就提一下這點來吸引大家關
+注。
+
+做好這一點後，在頂部再插入兩行，寫一句話的摘要，快速解釋報告的內容。之後你
+要更加抽象，爲報告寫一個更短的主題/標題。
+
+現在你已經寫好了這部分，請花點時間來優化它，因爲它是你的報告中最重要的部分：
+很多人會先讀這部分，然後才會決定是否值得花時間閱讀其他部分。
+
+現在就像 :ref:`MAINTAINERS <maintainers>` 維護者文件告訴你的那樣發送或提交
+報告，除非它是前面概述的那些「高優先級問題」之一：在這種情況下，請先閱讀下一
+小節，然後再發送報告。
+
+高優先級問題的特殊處理
+~~~~~~~~~~~~~~~~~~~~~~~~
+
+高優先級問題的報告需要特殊處理。
+
+**非常嚴重的缺陷** ：確保在主題或工單標題以及第一段中明顯標出 severeness
+（非常嚴重的）。
+
+**回歸** ：如果問題是一個回歸，請在郵件的主題或缺陷跟蹤器的標題中添加
+[REGRESSION]。如果您沒有進行二分，請至少註明您測試的最新主線版本（比如 5.7）
+和出現問題的最新版本（比如 5.8）。如果您成功地進行了二分，請註明導致回歸
+的提交ID和主題。也請添加該變更的作者到你的報告中；如果您需要將您的缺陷提交
+到缺陷跟蹤器中，請將報告以私人郵件的形式轉發給他，並註明報告提交地點。
+
+**安全問題** ：對於這種問題，你將必須評估：如果細節被公開披露，是否會對其他
+用戶產生短期風險。如果不會，只需按照所述繼續報告問題。如果有此風險，你需要
+稍微調整一下報告流程。
+
+ * 如果 MAINTAINERS 文件指示您通過郵件報告問題，請不要抄送任何公共郵件列表。
+
+ * 如果你應該在缺陷跟蹤器中提交問題，請確保將工單標記爲「私有」或「安全問題」。
+   如果缺陷跟蹤器沒有提供保持報告私密性的方法，那就別想了，把你的報告以私人
+   郵件的形式發送給維護者吧。
+
+在這兩種情況下，都一定要將報告發到 MAINTAINERS 文件中「安全聯絡」部分列出的
+地址。理想的情況是在發送報告的時候直接抄送他們。如果您在缺陷跟蹤器中提交了
+報告，請將報告的文本轉發到這些地址；但請在報告的頂部加上注釋，表明您提交了
+報告，並附上工單連結。
+
+更多信息請參見「Documentation/translations/zh_TW/admin-guide/security-bugs.rst」。
+
+
+發布報告後的責任
+------------------
+
+    *等待別人的反應，繼續推進事情，直到你能夠接受這樣或那樣的結果。因此，請
+    公開和及時地回應任何詢問。測試提出的修復。積極地測試：至少重新測試每個
+    新主線版本的首個候選版本（RC），並報告你的結果。如果出現拖延，就友好地
+    提醒一下。如果你沒有得到任何幫助或者未能滿意，請試著自己幫助自己。*
+
+如果你的報告非常優秀，而且你真的很幸運，那麼某個開發者可能會立即發現導致問
+題的原因；然後他們可能會寫一個補丁來修復、測試它，並直接發送給主線集成，同
+時標記它以便以後回溯到需要它的穩定版和長期支持內核。那麼你需要做的就是回復
+一句「Thank you very much」（非常感謝），然後在發布後換上修復好的版本。
+
+但這種理想狀況很少發生。這就是爲什麼你把報告拿出來之後工作才開始。你要做的
+事情要視情況而定，但通常會是下面列出的事情。但在深入研究細節之前，這裡有幾
+件重要的事情，你需要記住這部分的過程。
+
+
+關於進一步互動的一般建議
+~~~~~~~~~~~~~~~~~~~~~~~~~~
+
+**總是公開回復** ：當你在缺陷跟蹤器中提交問題時，一定要在那裡回復，不要私下
+聯繫任何開發者。對於郵件報告，在回復您收到的任何郵件時，總是使用「全部回復」
+功能。這包括帶有任何你可能想要添加到你的報告中的額外數據的郵件：進入郵件應
+用程序「已發送」文件夾，並在郵件上使用「全部回復」來回復報告。這種方法可以確保
+公共郵件列表和其他所有參與者都能及時了解情況；它還能保持郵件線程的完整性，
+這對於郵件列表將所有相關郵件歸爲一類是非常重要的。
+
+只有兩種情況不適合在缺陷跟蹤器或「全部回復」中發表評論：
+
+ * 有人讓你私下發東西。
+
+ * 你被告知要發送一些東西，但注意到其中包含需要保密的敏感信息。在這種情況下，
+   可以私下發送給要求發送的開發者。但要在工單或郵件中註明你是這麼做的，這
+   樣其他人就知道你尊重了這個要求。
+
+**在請求解釋或幫助之前先研究一下** ：在這部分過程中，有人可能會告訴你用尚未
+掌握的技能做一些事情。例如你可能會被要求使用一些你從未聽說過的測試工具；或
+者你可能會被要求在 Linux 內核原始碼上應用一個補丁來測試它是否有幫助。在某些
+情況下，發個回復詢問如何做就可以了。但在走這條路之前，儘量通過在網際網路上搜
+索自行找到答案；或者考慮在其他地方詢問建議。比如詢問朋友，或者到你平時常去
+的聊天室或論壇發帖諮詢。
+
+**要有耐心** ：如果你真的很幸運，你可能會在幾個小時內收到對你的報告的答覆。
+但大多數情況下會花費更多的時間，因爲維護者分散在全球各地，因此可能在不同的
+時區——在那裡他們已經享受著遠離鍵盤的夜晚。
+
+一般來說，內核開發者需要一到五個工作日來回復報告。有時會花費更長的時間，因
+爲他們可能正忙於合併窗口、其他工作、參加開發者會議，或者只是在享受一個漫長
+的暑假。
+
+「高優先級的問題」（見上面的解釋）例外：維護者應該儘快解決這些問題；這就是爲
+什麼你應該最多等待一個星期（如果是緊急的事情，則只需兩天），然後再發送友好
+的提醒。
+
+有時維護者可能沒有及時回復；有時候可能會出現分歧，例如一個問題是否符合回歸
+的條件。在這種情況下，在郵件列表上提出你的顧慮，並請求其他人公開或私下回復
+如何繼續推進。如果失敗了，可能應該讓更高級別的維護者介入。如果是 WiFi 驅動，
+那就是無線維護者；如果沒有更高級別的維護者，或者其他一切努力都失敗了，那
+這可能是一種罕見的、可以讓 Linus Torvalds 參與進來的情況。
+
+**主動測試** ：每當一個新的主線內核版本的第一個預發布版本（rc1）發布的時候，
+去檢查一下這個問題是否得到了解決，或者是否有什麼重要的變化。在工單中或在
+回復報告的郵件中提及結果（確保所有參與討論的人都被抄送）。這將表明你的承諾
+和你願意幫忙。如果問題持續存在，它也會提醒開發者確保他們不會忘記它。其他一
+些不定期的重新測試（例如用rc3、rc5 和最終版本）也是一個好主意，但只有在相關
+的東西發生變化或者你正在寫什麼東西的時候才報告你的結果。
+
+這些些常規的事情就不說了，我們來談談報告後如何幫助解決問題的細節。
+
+查詢和測試請求
+~~~~~~~~~~~~~~~
+
+如果你的報告得到了回復則需履行以下責任：
+
+**檢查與你打交道的人** ：大多數情況下，會是維護者或特定代碼區域的開發人員對
+你的報告做出回應。但由於問題通常是公開報告的，所以回復的可能是任何人——包括
+那些想要幫忙的人，但最後可能會用他們的問題或請求引導你完全偏離軌道。這很少
+發生，但這是快速上網搜搜看你正在與誰互動是明智之舉的許多原因之一。通過這樣
+做，你也可以知道你的報告是否被正確的人聽到，因爲如果討論沒有導致滿意的問題
+解決方案而淡出，之後可能需要提醒維護者（見下文）。
+
+**查詢數據** ：通常你會被要求測試一些東西或提供更多細節。儘快提供所要求的信
+息，因爲你已經得到了可能會幫助你的人的注意，你等待的時間越長就有越可能失去
+關注；如果你不在數個工作日內提供信息，甚至可能出現這種結果。
+
+**測試請求** ：當你被要求測試一個診斷補丁或可能的修復時，也要儘量及時測試。
+但要做得恰當，一定不要急於求成：混淆事情很容易發生，這會給所有人帶來許多困
+惑。例如一個常見的錯誤是以爲應用了一個帶修復的建議補丁，但事實上並沒有。即
+使是有經驗的測試人員也會偶爾發生這樣的事情，但當有修復的內核和沒有修復的內
+核表現得一樣時，他們大多時候會注意到。
+
+當沒有任何實質性進展時該怎麼辦
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+
+有些報告不會得到負有相關責任的 Linux 內核開發者的任何反應；或者圍繞這個問題
+的討論有所發展，但漸漸淡出，沒有任何實質內容產出。
+
+在這種情況下，要等兩個星期（最好是三個星期）後再發出友好的提醒：也許當你的
+報告到達時，維護者剛剛離開鍵盤一段時間，或者有更重要的事情要處理。在寫提醒
+信的時候，要善意地問一下，是否還需要你這邊提供什麼來讓事情推進下去。如果報
+告是通過郵件發出來的，那就在郵件的第一行回覆你的初始郵件（見上文），其中包
+括下方的原始報告的完整引用：這是少數幾種情況下，這樣的「TOFU」（Text Over,
+Fullquote Under文字在上，完整引用在下）是正確的做法，因爲這樣所有的收件人都
+會以適當的順序立即讓細節到手頭上來。
+
+在提醒之後，再等三周的回覆。如果你仍然沒有得到適當的反饋，你首先應該重新考
+慮你的方法。你是否可能嘗試接觸了錯誤的人？是不是報告也許令人反感或者太混亂，
+以至於人們決定完全遠離它？排除這些因素的最好方法是：把報告給一兩個熟悉
+FLOSS 問題報告的人看，詢問他們的意見。同時徵求他們關於如何繼續推進的建議。
+這可能意味著：準備一份更好的報告，讓這些人在你發出去之前對它進行審查。這樣
+的方法完全可以；只需說明這是關於這個問題的第二份改進的報告，並附上第一份報
+告的連結。
+
+如果報告是恰當的，你可以發送第二封提醒信；在其中詢問爲什麼報告沒有得到任何
+回復。第二封提醒郵件的好時機是在新 Linux 內核版本的首個預發布版本（'rc1'）
+發布後不久，因爲無論如何你都應該在那個時候重新測試並提供狀態更新（見上文）。
+
+如果第二次提醒的結果又在一周內沒有任何反應，可以嘗試聯繫上級維護者詢問意見：
+即使再忙的維護者在這時候也至少應該發過某種確認。
+
+記住要做好失望的準備：理想狀況下維護者最好對每一個問題報告做出回應，但他們
+只有義務解決之前列出的「高優先級問題」。所以，如果你得到的回覆是「謝謝你的報告，
+我目前有更重要的問題要處理，在可預見的未來沒有時間去研究這個問題」，那請不
+要太沮喪。
+
+也有可能在缺陷跟蹤器或列表中進行了一些討論之後，什麼都沒有發生，提醒也無助
+於激勵大家進行修復。這種情況可能是毀滅性的，但在 Linux 內核開發中確實會發生。
+這些和其他得不到幫助的原因在本文結尾處的「爲什麼有些問題在被報告後沒有得到
+任何回應或者仍然沒有修復」中進行了解釋。
+
+如果你沒有得到任何幫助或問題最終沒有得到解決，不要沮喪：Linux 內核是 FLOSS，
+因此你仍然可以自己幫助自己。例如，你可以試著找到其他受影響的人，和他們一
+起合作來解決這個問題。這樣的團隊可以一起準備一份新的報告，提到團隊有多少人，
+爲什麼你們認爲這是應該得到解決的事情。也許你們還可以一起縮小確切原因或引
+入回歸的變化，這往往會使修復更容易。而且如果運氣好的話，團隊中可能會有懂點
+編程的人，也許能寫出一個修複方案。
+
+
+
+「報告穩定版和長期支持內核線的回歸」的參考
+------------------------------------------
+
+本小節提供了在穩定版和長期支持內核線中面對回歸時需要執行的步驟的詳細信息。
+
+確保特定版本線仍然受支持
+~~~~~~~~~~~~~~~~~~~~~~~~~
+
+    *檢查內核開發人員是否仍然維護你關心的Linux內核版本線：去 kernel.org 的
+    首頁，確保此特定版本線的最新版沒有「[EOL]」標記。*
+
+大多數內核版本線只支持三個月左右，因爲延長維護時間會帶來相當多的工作。因此，
+每年只會選擇一個版本來支持至少兩年（通常是六年）。這就是爲什麼你需要檢查
+內核開發者是否還支持你關心的版本線。
+
+注意，如果 `kernel.org <https://kernel.org/>`_ 在首頁上列出了兩個「穩定」版本，
+你應該考慮切換到較新的版本，而忘掉較舊的版本：對它的支持可能很快就會結束。
+然後，它將被標記爲「生命周期結束」（EOL）。達到這個程度的版本線仍然會在
+`kernel.org <https://kernel.org/>`_ 首頁上被顯示一兩周，但不適合用於測試和
+報告。
+
+搜索穩定版郵件列表
+~~~~~~~~~~~~~~~~~~~
+
+    *檢查Linux穩定版郵件列表中的現有報告。*
+
+也許你所面臨的問題已經被發現，並且已經或即將被修復。因此，請在 `Linux 穩定
+版郵件列表的檔案 <https://lore.kernel.org/stable/>`_ 中搜索類似問題的報告。
+如果你找到任何匹配的問題，可以考慮加入討論，除非修復工作已經完成並計劃很快
+得到應用。
+
+用最新版本復現問題
+~~~~~~~~~~~~~~~~~~~
+
+    *從特定的版本線安裝最新版本作爲純淨內核。確保這個內核沒有被汙染，並且仍
+    然存在問題，因爲問題可能已經在那裡被修復了。*
+
+在投入更多時間到這個過程中之前，你要檢查這個問題是否在你關注的版本線的最新
+版本中已經得到了修復。這個內核需要是純淨的，在問題發生之前不應該被汙染，正
+如上面已經在測試主線的過程中詳細介紹過的一樣。
+
+您是否是第一次注意到供應商內核的回歸？供應商的更改可能會發生變化。你需要重新
+檢查排除來這個問題。當您從5.10.4-vendor.42更新到5.10.5-vendor.43時，記錄損壞
+的信息。然後在測試了前一段中所述的最新5.10版本之後，檢查Linux 5.10.4的普通版本
+是否也可以正常工作。如果問題在那裡出現，那就不符合上游回歸的條件，您需要切換
+回主逐步指南來報告問題。
+
+報告回歸
+~~~~~~~~~~
+
+    *向Linux穩定版郵件列表發送一個簡短的問題報告(stable@vger.kernel.org)。
+    大致描述問題，並解釋如何復現。講清楚首個出現問題的版本和最後一個工作正常
+    的版本。然後等待進一步的指示。*
+
+當報告在穩定版或長期支持內核線內發生的回歸（例如在從5.10.4更新到5.10.5時），
+一份簡短的報告足以快速報告問題。因此只需要粗略的描述。
+
+但是請注意，如果您能夠指明引入問題的確切版本，這將對開發人員有很大幫助。因此
+如果有時間的話，請嘗試使用普通內核找到該版本。讓我們假設發行版發布Linux內核
+5.10.5到5.10.8的更新時發生了故障。那麼按照上面的指示，去檢查該版本線中的最新
+內核，比如5.10.9。如果問題出現，請嘗試普通5.10.5，以確保供應商應用的補丁不會
+干擾。如果問題沒有出現，那麼嘗試5.10.7，然後直到5.10.8或5.10.6（取決於結果）
+找到第一個引入問題的版本。在報告中寫明這一點，並指出5.10.9仍然存在故障。
+
+前一段基本粗略地概述了「二分」方法。一旦報告出來，您可能會被要求做一個正確的
+報告，因爲它允許精確地定位導致問題的確切更改（然後很容易被恢復以快速修復問題）。
+因此如果時間允許，考慮立即進行適當的二分。有關如何詳細信息，請參閱「對回歸的
+特別關照」部分和文檔「Documentation/translations/zh_TW/admin-guide/bug-bisect.rst」。
+
+
+「報告僅在舊內核版本線中發生的問題」的參考
+------------------------------------------
+
+本節詳細介紹了如果無法用主線內核重現問題，但希望在舊版本線（又稱穩定版內核和
+長期支持內核）中修復問題時需要採取的步驟。
+
+有些修復太複雜
+~~~~~~~~~~~~~~~
+
+    *請做好準備，接下來的幾個步驟可能無法在舊版本中解決問題：修復可能太大或
+    太冒險，無法移植到那裡。*
+
+即使是微小的、看似明顯的代碼變化，有時也會帶來新的、完全意想不到的問題。穩
+定版和長期支持內核的維護者非常清楚這一點，因此他們只對這些內核進行符合
+「Documentation/translations/zh_TW/process/stable-kernel-rules.rst」中所列出的
+規則的修改。
+
+複雜或有風險的修改不符合條件，因此只能應用於主線。其他的修復很容易被回溯到
+最新的穩定版和長期支持內核，但是風險太大，無法集成到舊版內核中。所以要注意
+你所希望的修復可能是那些不會被回溯到你所關心的版本線的修復之一。在這種情況
+下，你將別無選擇，要麼忍受這個問題，要麼切換到一個較新的 Linux 版本，除非你
+想自己把修復補丁應用到你的內核中。
+
+通用準備
+~~~~~~~~~~
+
+    *執行上面「報告僅在舊內核版本線中發生的問題」一節中的前三個步驟。*
+
+您需要執行本指南另一節中已經描述的幾個步驟。這些步驟將讓您：
+
+ * 檢查內核開發人員是否仍然維護您關心的Linux內核版本行。
+
+ * 在Linux穩定郵件列表中搜索退出的報告。
+
+ * 檢查最新版本。
+
+
+檢查代碼歷史和搜索現有的討論
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+
+    *在Linux內核版本控制系統中搜索修復主線問題的更改，因爲它的提交消息可能
+    會告訴你修復是否已經計劃好了支持。如果你沒有找到，搜索適當的郵件列表，
+    尋找討論此類問題或同行評議可能修復的帖子；然後檢查討論是否認爲修復不適
+    合支持。如果支持根本不被考慮，加入最新的討論，詢問是否有可能。*
+
+在許多情況下，你所處理的問題會發生在主線上，但已在主線上得到了解決。修正它
+的提交也需要被回溯才能解決這個問題。這就是爲什麼你要搜索它或任何相關討論。
+
+ * 首先嘗試在存放 Linux 內核原始碼的 Git 倉庫中找到修復。你可以通過
+   `kernel.org 上的網頁
+   <https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git/tree/>`_
+   或 `GitHub 上的鏡像 <https://github.com/torvalds/linux>`_ 來實現；如果你
+   有一個本地克隆，你也可以在命令行用 ``git log --grep=<pattern>`` 來搜索。
+
+   如果你找到了修復，請查看提交消息的尾部是否包含了類似這樣的「穩定版標籤」：
+
+          Cc: <stable@vger.kernel.org> # 5.4+
+
+   像上面這行，開發者標記了安全修復可以回傳到 5.4 及以後的版本。大多數情況
+   下，它會在兩周內被應用到那裡，但有時需要更長的時間。
+
+ * 如果提交沒有告訴你任何東西，或者你找不到修復，請再找找關於這個問題的討論。
+   用你最喜歡的搜尋引擎搜索網絡，以及 `Linux kernel developers mailing
+   list 內核開發者郵件列表 <https://lore.kernel.org/lkml/>`_ 的檔案。也可以
+   閱讀上面的 `定位導致問題的內核區域` 一節，然後按照說明找到導致問題的子系
+   統：它的缺陷跟蹤器或郵件列表存檔中可能有你要找的答案。
+
+ * 如果你看到了一個計劃的修復，請按上所述在版本控制系統中搜索它，因爲提交可
+   能會告訴你是否可以進行回溯。
+
+   * 檢查討論中是否有任何跡象表明，該修復程序可能風險太大，無法回溯到你關心
+     的版本線。如果是這樣的話，你必須忍受這個問題，或者切換到應用了修復的內
+     核版本線。
+
+   * 如果修復的問題未包含穩定版標籤，並且沒有討論過回溯問題，請加入討論：如
+     果合適的話，請提及你所面對的問題的版本，以及你希望看到它被修復。
+
+
+請求建議
+~~~~~~~~~
+
+    *前面的步驟之一應該會給出一個解決方案。如果仍未能成功，請向可能引起問題
+    的子系統的維護人員詢問建議；抄送特定子系統的郵件列表以及穩定版郵件列表。*
+
+如果前面的三個步驟都沒有讓你更接近解決方案，那麼只剩下一個選擇：請求建議。
+在你發給可能是問題根源的子系統的維護者的郵件中這樣做；抄送子系統的郵件列表
+以及穩定版郵件列表（stable@vger.kernel.org）。
+
+
+爲什麼有些問題在報告後沒有任何回應或仍未解決？
+===============================================
+
+當向 Linux 開發者報告問題時，要注意只有「高優先級的問題」（回歸、安全問題、嚴
+重問題）才一定會得到解決。如果維護者或其他人都失敗了，Linus Torvalds 他自己
+會確保這一點。他們和其他內核開發者也會解決很多其他問題。但是要知道，有時他
+們也會不能或不願幫忙；有時甚至沒有人發報告給他們。
+
+最好的解釋就是那些內核開發者常常是在業餘時間爲 Linux 內核做出貢獻。內核中的
+不少驅動程序都是由這樣的程式設計師編寫的，往往只是因爲他們想讓自己的硬體可以在
+自己喜歡的作業系統上使用。
+
+這些程式設計師大多數時候會很樂意修復別人報告的問題。但是沒有人可以強迫他們這樣
+做，因爲他們是自願貢獻的。
+
+還有一些情況下，這些開發者真的很想解決一個問題，但卻不能解決：有時他們缺乏
+硬體編程文檔來解決問題。這種情況往往由於公開的文檔太簡陋，或者驅動程序是通
+過逆向工程編寫的。
+
+業餘開發者遲早也會不再關心某驅動。也許他們的測試硬體壞了，被更高級的玩意取
+代了，或者是太老了以至於只能在計算機博物館裡找到。有時開發者根本就不關心他
+們的代碼和 Linux 了，因爲在他們的生活中一些不同的東西變得更重要了。在某些情
+況下，沒有人願意接手維護者的工作——也沒有人可以被強迫，因爲對 Linux 內核的貢
+獻是自願的。然而被遺棄的驅動程序仍然存在於內核中：它們對人們仍然有用，刪除
+它們可能導致回歸。
+
+對於那些爲 Linux 內核工作而獲得報酬的開發者來說，情況並沒有什麼不同。這些人
+現在貢獻了大部分的變更。但是他們的僱主遲早也會停止關注他們的代碼或者讓程序
+員專注於其他事情。例如，硬體廠商主要通過銷售新硬體來賺錢；因此，他們中的不
+少人並沒有投入太多時間和精力來維護他們多年前就停止銷售的東西的 Linux 內核驅
+動。企業級 Linux 發行商往往持續維護的時間比較長，但在新版本中往往會把對老舊
+和稀有硬體的支持放在一邊，以限制範圍。一旦公司拋棄了一些代碼，往往由業餘貢
+獻者接手，但正如上面提到的：他們遲早也會放下代碼。
+
+優先級是一些問題沒有被修復的另一個原因，因爲維護者相當多的時候是被迫設置這
+些優先級的，因爲在 Linux 上工作的時間是有限的。對於業餘時間或者僱主給予他們
+的開發人員用於上游內核維護工作的時間也是如此。有時維護人員也會被報告淹沒，
+即使一個驅動程序幾乎完美地工作。爲了不被完全纏住，程式設計師可能別無選擇，只能
+對問題報告進行優先級排序而拒絕其中的一些報告。
+
+不過這些都不用太過擔心，很多驅動都有積極的維護者，他們對儘可能多的解決問題
+相當感興趣。
+
+
+結束語
+=======
+
+與其他免費/自由&開源軟體（Free/Libre & Open Source Software，FLOSS）相比，
+向 Linux 內核開發者報告問題是很難的：這個文檔的長度和複雜性以及字裡行間的內
+涵都說明了這一點。但目前就是這樣了。這篇文字的主要作者希望通過記錄現狀來爲
+以後改善這種狀況打下一些基礎。
+
diff --git a/Documentation/translations/zh_TW/admin-guide/security-bugs.rst b/Documentation/translations/zh_TW/admin-guide/security-bugs.rst
new file mode 100644
index 000000000000..eed260ef0c37
--- /dev/null
+++ b/Documentation/translations/zh_TW/admin-guide/security-bugs.rst
@@ -0,0 +1,78 @@
+.. SPDX-License-Identifier: GPL-2.0
+
+.. include:: ../disclaimer-zh_TW.rst
+
+:Original: :doc:`../../../admin-guide/security-bugs`
+
+:譯者:
+
+ 吳想成 Wu XiangCheng <bobwxc@email.cn>
+ 胡皓文 Hu Haowen <src.res@email.cn>
+
+安全缺陷
+=========
+
+Linux內核開發人員非常重視安全性。因此我們想知道何時發現了安全漏洞，以便儘快
+修復和披露。請向Linux內核安全團隊報告安全漏洞。
+
+聯絡
+-----
+
+可以通過電子郵件<security@kernel.org>聯繫Linux內核安全團隊。這是一個安全人員
+的私有列表，他們將幫助驗證錯誤報告並開發和發布修復程序。如果您已經有了一個
+修復，請將其包含在您的報告中，這樣可以大大加快進程。安全團隊可能會從區域維護
+人員那裡獲得額外的幫助，以理解和修復安全漏洞。
+
+與任何缺陷一樣，提供的信息越多，診斷和修復就越容易。如果您不清楚哪些信息有用，
+請查看「Documentation/translations/zh_TW/admin-guide/reporting-issues.rst」中
+概述的步驟。任何利用漏洞的攻擊代碼都非常有用，未經報告者同意不會對外發布，除
+非已經公開。
+
+請儘可能發送無附件的純文本電子郵件。如果所有的細節都藏在附件里，那麼就很難對
+一個複雜的問題進行上下文引用的討論。把它想像成一個
+:doc:`常規的補丁提交 <../process/submitting-patches>` （即使你還沒有補丁）：
+描述問題和影響，列出復現步驟，然後給出一個建議的解決方案，所有這些都是純文本的。
+
+披露和限制信息
+---------------
+
+安全列表不是公開渠道。爲此，請參見下面的協作。
+
+一旦開發出了健壯的補丁，發布過程就開始了。對公開的缺陷的修復會立即發布。
+
+儘管我們傾向於在未公開缺陷的修復可用時即發布補丁，但應報告者或受影響方的請求，
+這可能會被推遲到發布過程開始後的7日內，如果根據缺陷的嚴重性需要更多的時間，
+則可額外延長到14天。推遲發布修復的唯一有效原因是爲了適應QA的邏輯和需要發布
+協調的大規模部署。
+
+雖然可能與受信任的個人共享受限信息以開發修復，但未經報告者許可，此類信息不會
+與修復程序一起發布或發布在任何其他披露渠道上。這包括但不限於原始錯誤報告和
+後續討論（如有）、漏洞、CVE信息或報告者的身份。
+
+換句話說，我們唯一感興趣的是修復缺陷。提交給安全列表的所有其他資料以及對報告
+的任何後續討論，即使在解除限制之後，也將永久保密。
+
+協調
+------
+
+對敏感缺陷（例如那些可能導致權限提升的缺陷）的修復可能需要與私有郵件列表
+<linux-distros@vs.openwall.org>進行協調，以便分發供應商做好準備，在公開披露
+上游補丁時發布一個已修復的內核。發行版將需要一些時間來測試建議的補丁，通常
+會要求至少幾天的限制，而供應商更新發布更傾向於周二至周四。若合適，安全團隊
+可以協助這種協調，或者報告者可以從一開始就包括linux發行版。在這種情況下，請
+記住在電子郵件主題行前面加上「[vs]」，如linux發行版wiki中所述：
+<http://oss-security.openwall.org/wiki/mailing-lists/distros#how-to-use-the-lists>。
+
+CVE分配
+--------
+
+安全團隊通常不分配CVE，我們也不需要它們來進行報告或修復，因爲這會使過程不必
+要的複雜化，並可能耽誤缺陷處理。如果報告者希望在公開披露之前分配一個CVE編號，
+他們需要聯繫上述的私有linux-distros列表。當在提供補丁之前已有這樣的CVE編號時，
+如報告者願意，最好在提交消息中提及它。
+
+保密協議
+---------
+
+Linux內核安全團隊不是一個正式的機構實體，因此無法簽訂任何保密協議。
+
diff --git a/Documentation/translations/zh_TW/admin-guide/tainted-kernels.rst b/Documentation/translations/zh_TW/admin-guide/tainted-kernels.rst
new file mode 100644
index 000000000000..d7b3c4276417
--- /dev/null
+++ b/Documentation/translations/zh_TW/admin-guide/tainted-kernels.rst
@@ -0,0 +1,161 @@
+.. SPDX-License-Identifier: GPL-2.0
+
+.. include:: ../disclaimer-zh_TW.rst
+
+:Original: :doc:`../../../admin-guide/tainted-kernels`
+
+:譯者:
+
+ 吳想成 Wu XiangCheng <bobwxc@email.cn>
+ 胡皓文 Hu Haowen <src.res@email.cn>
+
+受汙染的內核
+-------------
+
+當發生一些在稍後調查問題時可能相關的事件時，內核會將自己標記爲「受汙染
+（tainted）」的。不用太過擔心，大多數情況下運行受汙染的內核沒有問題；這些信息
+主要在有人想調查某個問題時才有意義的，因爲問題的真正原因可能是導致內核受汙染
+的事件。這就是爲什麼來自受汙染內核的缺陷報告常常被開發人員忽略，因此請嘗試用
+未受汙染的內核重現問題。
+
+請注意，即使在您消除導致汙染的原因（亦即卸載專有內核模塊）之後，內核仍將保持
+汙染狀態，以表示內核仍然不可信。這也是爲什麼內核在注意到內部問題（「kernel
+bug」）、可恢復錯誤（「kernel oops」）或不可恢復錯誤（「kernel panic」）時會列印
+受汙染狀態，並將有關此的調試信息寫入日誌 ``dmesg`` 輸出。也可以通過
+``/proc/`` 中的文件在運行時檢查受汙染的狀態。
+
+
+BUG、Oops或Panics消息中的汙染標誌
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+
+在頂部以「CPU:」開頭的一行中可以找到受汙染的狀態；內核是否受到汙染和原因會顯示
+在進程ID（「PID:」）和觸發事件命令的縮寫名稱（「Comm:」）之後::
+
+	BUG: unable to handle kernel NULL pointer dereference at 0000000000000000
+	Oops: 0002 [#1] SMP PTI
+	CPU: 0 PID: 4424 Comm: insmod Tainted: P        W  O      4.20.0-0.rc6.fc30 #1
+	Hardware name: Red Hat KVM, BIOS 0.5.1 01/01/2011
+	RIP: 0010:my_oops_init+0x13/0x1000 [kpanic]
+	[...]
+
+如果內核在事件發生時沒有被汙染，您將在那裡看到「Not-tainted:」；如果被汙染，那
+麼它將是「Tainted:」以及字母或空格。在上面的例子中，它看起來是這樣的::
+
+	Tainted: P        W  O
+
+下表解釋了這些字符的含義。在本例中，由於加載了專有模塊（ ``P`` ），出現了
+警告（ ``W`` ），並且加載了外部構建的模塊（ ``O`` ），所以內核早些時候受到
+了汙染。要解碼其他字符，請使用下表。
+
+
+解碼運行時的汙染狀態
+~~~~~~~~~~~~~~~~~~~~~
+
+在運行時，您可以通過讀取 ``cat /proc/sys/kernel/tainted`` 來查詢受汙染狀態。
+如果返回 ``0`` ，則內核沒有受到汙染；任何其他數字都表示受到汙染的原因。解碼
+這個數字的最簡單方法是使用腳本  ``tools/debugging/kernel-chktaint`` ，您的
+發行版可能會將其作爲名爲 ``linux-tools`` 或 ``kernel-tools`` 的包的一部分提
+供；如果沒有，您可以從
+`git.kernel.org <https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git/plain/tools/debugging/kernel-chktaint>`_
+網站下載此腳本並用 ``sh kernel-chktaint`` 執行，它會在上面引用的日誌中有類似
+語句的機器上列印這樣的內容::
+
+	Kernel is Tainted for following reasons:
+	 * Proprietary module was loaded (#0)
+	 * Kernel issued warning (#9)
+	 * Externally-built ('out-of-tree') module was loaded  (#12)
+	See Documentation/admin-guide/tainted-kernels.rst in the Linux kernel or
+	 https://www.kernel.org/doc/html/latest/admin-guide/tainted-kernels.html for
+	 a more details explanation of the various taint flags.
+	Raw taint value as int/string: 4609/'P        W  O     '
+
+你也可以試著自己解碼這個數字。如果內核被汙染的原因只有一個，那麼這很簡單，
+在本例中您可以通過下表找到數字。如果你需要解碼有多個原因的數字，因爲它是一
+個位域（bitfield），其中每個位表示一個特定類型的汙染的存在或不存在，最好讓
+前面提到的腳本來處理。但是如果您需要快速看一下，可以使用這個shell命令來檢查
+設置了哪些位::
+
+	$ for i in $(seq 18); do echo $(($i-1)) $(($(cat /proc/sys/kernel/tainted)>>($i-1)&1));done
+
+汙染狀態代碼表
+~~~~~~~~~~~~~~~
+
+===  =====  ======  ========================================================
+ 位  日誌     數字  內核被汙染的原因
+===  =====  ======  ========================================================
+  0   G/P        1  已加載專用模塊
+  1   _/F        2  模塊被強制加載
+  2   _/S        4  內核運行在不合規範的系統上
+  3   _/R        8  模塊被強制卸載
+  4   _/M       16  處理器報告了機器檢測異常（MCE）
+  5   _/B       32  引用了錯誤的頁或某些意外的頁標誌
+  6   _/U       64  用戶空間應用程式請求的汙染
+  7   _/D      128  內核最近死機了，即曾出現OOPS或BUG
+  8   _/A      256  ACPI表被用戶覆蓋
+  9   _/W      512  內核發出警告
+ 10   _/C     1024  已加載staging驅動程序
+ 11   _/I     2048  已應用平台固件缺陷的解決方案
+ 12   _/O     4096  已加載外部構建（「樹外」）模塊
+ 13   _/E     8192  已加載未簽名的模塊
+ 14   _/L    16384  發生軟鎖定
+ 15   _/K    32768  內核已實時打補丁
+ 16   _/X    65536  備用汙染，爲發行版定義並使用
+ 17   _/T   131072  內核是用結構隨機化插件構建的
+===  =====  ======  ========================================================
+
+註：字符 ``_`` 表示空白，以便於閱讀表。
+
+汙染的更詳細解釋
+~~~~~~~~~~~~~~~~~
+
+ 0)  ``G`` 加載的所有模塊都有GPL或兼容許可證， ``P`` 加載了任何專有模塊。
+     沒有MODULE_LICENSE（模塊許可證）或MODULE_LICENSE未被insmod認可爲GPL
+     兼容的模塊被認爲是專有的。
+
+
+ 1)  ``F`` 任何模塊被 ``insmod -f`` 強制加載， ``' '`` 所有模塊正常加載。
+
+ 2)  ``S`` 內核運行在不合規範的處理器或系統上：硬體已運行在不受支持的配置中，
+     因此無法保證正確執行。內核將被汙染，例如：
+
+     - 在x86上：PAE是通過intel CPU（如Pentium M）上的forcepae強制執行的，這些
+       CPU不報告PAE，但可能有功能實現，SMP內核在非官方支持的SMP Athlon CPU上
+       運行，MSR被暴露到用戶空間中。
+     - 在arm上：在某些CPU（如Keystone 2）上運行的內核，沒有啓用某些內核特性。
+     - 在arm64上：CPU之間存在不匹配的硬體特性，引導加載程序以不同的模式引導CPU。
+     - 某些驅動程序正在被用在不受支持的體系結構上（例如x86_64以外的其他系統
+       上的scsi/snic，非x86/x86_64/itanium上的scsi/ips，已經損壞了arm64上
+       irqchip/irq-gic的固件設置…）。
+
+ 3)  ``R`` 模塊被 ``rmmod -f`` 強制卸載， ``' '`` 所有模塊都正常卸載。
+
+ 4)  ``M`` 任何處理器報告了機器檢測異常， ``' '`` 未發生機器檢測異常。
+
+ 5)  ``B`` 頁面釋放函數發現錯誤的頁面引用或某些意外的頁面標誌。這表示硬體問題
+     或內核錯誤；日誌中應該有其他信息指示發生此汙染的原因。
+
+ 6)  ``U`` 用戶或用戶應用程式特意請求設置受汙染標誌，否則應爲 ``' '`` 。
+
+ 7)  ``D`` 內核最近死機了，即出現了OOPS或BUG。
+
+ 8)  ``A`` ACPI表被重寫。
+
+ 9)  ``W`` 內核之前已發出過警告（儘管有些警告可能會設置更具體的汙染標誌）。
+
+ 10) ``C`` 已加載staging驅動程序。
+
+ 11) ``I`` 內核正在處理平台固件（BIOS或類似軟體）中的嚴重錯誤。
+
+ 12) ``O`` 已加載外部構建（「樹外」）模塊。
+
+ 13) ``E`` 在支持模塊簽名的內核中加載了未簽名的模塊。
+
+ 14) ``L`` 系統上先前發生過軟鎖定。
+
+ 15) ``K`` 內核已經實時打了補丁。
+
+ 16) ``X`` 備用汙染，由Linux發行版定義和使用。
+
+ 17) ``T`` 內核構建時使用了randstruct插件，它可以有意生成非常不尋常的內核結構
+     布局（甚至是性能病態的布局），這在調試時非常有用。於構建時設置。
+
diff --git a/Documentation/translations/zh_TW/admin-guide/unicode.rst b/Documentation/translations/zh_TW/admin-guide/unicode.rst
new file mode 100644
index 000000000000..720875be5ef8
--- /dev/null
+++ b/Documentation/translations/zh_TW/admin-guide/unicode.rst
@@ -0,0 +1,174 @@
+.. SPDX-License-Identifier: GPL-2.0
+
+.. include:: ../disclaimer-zh_TW.rst
+
+:Original: Documentation/admin-guide/unicode.rst
+
+:譯者:
+
+ 吳想成 Wu XiangCheng <bobwxc@email.cn>
+ 胡皓文 Hu Haowen <src.res@email.cn>
+
+Unicode（統一碼）支持
+======================
+
+	（英文版）上次更新：2005-01-17，版本號 1.4
+
+此文檔由H. Peter Anvin <unicode@lanana.org>管理，是Linux註冊名稱與編號管理局
+（Linux Assigned Names And Numbers Authority，LANANA）項目的一部分。
+現行版本請見：
+
+	http://www.lanana.org/docs/unicode/admin-guide/unicode.rst
+
+簡介
+-----
+
+Linux內核代碼已被重寫以使用Unicode來將字符映射到字體。下載一個Unicode到字體
+（Unicode-to-font）表，八位字符集與UTF-8模式都將改用此字體來顯示。
+
+這微妙地改變了八位字符表的語義。現在的四個字符表是：
+
+=============== =============================== ================
+映射代號        映射名稱                        Escape代碼 (G0)
+=============== =============================== ================
+LAT1_MAP        Latin-1 (ISO 8859-1)            ESC ( B
+GRAF_MAP        DEC VT100 pseudographics        ESC ( 0
+IBMPC_MAP       IBM code page 437               ESC ( U
+USER_MAP        User defined                    ESC ( K
+=============== =============================== ================
+
+特別是 ESC ( U 不再是「直通字體」，因爲字體可能與IBM字符集完全不同。
+例如，即使加載了一個Latin-1字體，也允許使用塊圖形（block graphics）。
+
+請注意，儘管這些代碼與ISO 2022類似，但這些代碼及其用途都與ISO 2022不匹配；
+Linux有兩個八位代碼（G0和G1），而ISO 2022有四個七位代碼（G0-G3）。
+
+根據Unicode標準/ISO 10646，U+F000到U+F8FF被保留用於作業系統範圍內的分配
+（Unicode標準將其稱爲「團體區域（Corporate Zone）」，因爲這對於Linux是不準確
+的，所以我們稱之爲「Linux區域」）。選擇U+F000作爲起點，因爲它允許直接映射
+區域以2的大倍數開始（以防需要1024或2048個字符的字體）。這就留下U+E000到
+U+EFFF作爲最終用戶區。
+
+[v1.2]：Unicodes範圍從U+F000到U+F7FF已經被硬編碼爲直接映射到加載的字體，
+繞過了翻譯表。用戶定義的映射現在默認爲U+F000到U+F0FF，模擬前述行爲。實際上，
+此範圍可能較短；例如，vgacon只能處理256字符（U+F000..U+F0FF）或512字符
+（U+F000..U+F1FF）字體。
+
+Linux 區域中定義的實際字符
+---------------------------
+
+此外，還定義了Unicode 1.1.4中不存在的以下字符；這些字符由DEC VT圖形映射使用。
+[v1.2]此用法已過時，不應再使用；請參見下文。
+
+====== ======================================
+U+F800 DEC VT GRAPHICS HORIZONTAL LINE SCAN 1
+U+F801 DEC VT GRAPHICS HORIZONTAL LINE SCAN 3
+U+F803 DEC VT GRAPHICS HORIZONTAL LINE SCAN 7
+U+F804 DEC VT GRAPHICS HORIZONTAL LINE SCAN 9
+====== ======================================
+
+DEC VT220使用6x10字符矩陣，這些字符在DEC VT圖形字符集中形成一個平滑的過渡。
+我省略了掃描5行，因爲它也被用作塊圖形字符，因此被編碼爲U+2500 FORMS LIGHT
+HORIZONTAL。
+
+[v1.3]：這些字符已正式添加到Unicode 3.2.0中；它們在U+23BA、U+23BB、U+23BC、
+U+23BD處添加。Linux現在使用新值。
+
+[v1.2]：添加了以下字符來表示常見的鍵盤符號，這些符號不太可能被添加到Unicode
+中，因爲它們非常討厭地取決於特定供應商。當然，這是糟糕設計的一個好例子。
+
+====== ======================================
+U+F810 KEYBOARD SYMBOL FLYING FLAG
+U+F811 KEYBOARD SYMBOL PULLDOWN MENU
+U+F812 KEYBOARD SYMBOL OPEN APPLE
+U+F813 KEYBOARD SYMBOL SOLID APPLE
+====== ======================================
+
+克林貢（Klingon）語支持
+------------------------
+
+1996年，Linux是世界上第一個添加對人工語言克林貢支持的作業系統，克林貢是由
+Marc Okrand爲《星際迷航》電視連續劇創造的。這種編碼後來被徵募Unicode註冊表
+（ConScript Unicode Registry，CSUR）採用，並建議（但最終被拒絕）納入Unicode
+平面一。不過，它仍然是Linux區域中的Linux/CSUR私有分配。
+
+這種編碼已經得到克林貢語言研究所（Klingon Language Institute）的認可。
+有關更多信息，請聯繫他們：
+
+	http://www.kli.org/
+
+由於Linux CZ開頭部分的字符大多是dingbats/symbols/forms類型，而且這是一種
+語言，因此根據標準Unicode慣例，我將它放置在16單元的邊界上。
+
+.. note::
+
+  這個範圍現在由徵募Unicode註冊表正式管理。規範性引用文件爲：
+
+	https://www.evertype.com/standards/csur/klingon.html
+
+克林貢語有一個26個字符的字母表，一個10位數的位置數字書寫系統，從左到右
+，從上到下書寫。
+
+克林貢字母的幾種字形已經被提出。但是由於這組符號看起來始終是一致的，只有實際
+的形狀不同，因此按照標準Unicode慣例，這些差異被認爲是字體變體。
+
+======	=======================================================
+U+F8D0	KLINGON LETTER A
+U+F8D1	KLINGON LETTER B
+U+F8D2	KLINGON LETTER CH
+U+F8D3	KLINGON LETTER D
+U+F8D4	KLINGON LETTER E
+U+F8D5	KLINGON LETTER GH
+U+F8D6	KLINGON LETTER H
+U+F8D7	KLINGON LETTER I
+U+F8D8	KLINGON LETTER J
+U+F8D9	KLINGON LETTER L
+U+F8DA	KLINGON LETTER M
+U+F8DB	KLINGON LETTER N
+U+F8DC	KLINGON LETTER NG
+U+F8DD	KLINGON LETTER O
+U+F8DE	KLINGON LETTER P
+U+F8DF	KLINGON LETTER Q
+	- Written <q> in standard Okrand Latin transliteration
+U+F8E0	KLINGON LETTER QH
+	- Written <Q> in standard Okrand Latin transliteration
+U+F8E1	KLINGON LETTER R
+U+F8E2	KLINGON LETTER S
+U+F8E3	KLINGON LETTER T
+U+F8E4	KLINGON LETTER TLH
+U+F8E5	KLINGON LETTER U
+U+F8E6	KLINGON LETTER V
+U+F8E7	KLINGON LETTER W
+U+F8E8	KLINGON LETTER Y
+U+F8E9	KLINGON LETTER GLOTTAL STOP
+
+U+F8F0	KLINGON DIGIT ZERO
+U+F8F1	KLINGON DIGIT ONE
+U+F8F2	KLINGON DIGIT TWO
+U+F8F3	KLINGON DIGIT THREE
+U+F8F4	KLINGON DIGIT FOUR
+U+F8F5	KLINGON DIGIT FIVE
+U+F8F6	KLINGON DIGIT SIX
+U+F8F7	KLINGON DIGIT SEVEN
+U+F8F8	KLINGON DIGIT EIGHT
+U+F8F9	KLINGON DIGIT NINE
+
+U+F8FD	KLINGON COMMA
+U+F8FE	KLINGON FULL STOP
+U+F8FF	KLINGON SYMBOL FOR EMPIRE
+======	=======================================================
+
+其他虛構和人工字母
+-------------------
+
+自從分配了克林貢Linux Unicode塊之後，John Cowan <jcowan@reutershealth.com>
+和 Michael Everson <everson@evertype.com> 建立了一個虛構和人工字母的註冊表。
+徵募Unicode註冊表請訪問：
+
+	https://www.evertype.com/standards/csur/
+
+所使用的範圍位於最終用戶區域的低端，因此無法進行規範化分配，但建議希望對虛構
+字母進行編碼的人員使用這些代碼，以實現互操作性。對於克林貢語，CSUR採用了Linux
+編碼。CSUR的人正在推動將Tengwar和Cirth添加到Unicode平面一；將克林貢添加到
+Unicode平面一被拒絕，因此上述編碼仍然是官方的。
+
diff --git a/Documentation/translations/zh_TW/arm64/amu.rst b/Documentation/translations/zh_TW/arm64/amu.rst
new file mode 100644
index 000000000000..ffdc466e0f62
--- /dev/null
+++ b/Documentation/translations/zh_TW/arm64/amu.rst
@@ -0,0 +1,104 @@
+.. SPDX-License-Identifier: GPL-2.0
+
+.. include:: ../disclaimer-zh_TW.rst
+
+:Original: :ref:`Documentation/arm64/amu.rst <amu_index>`
+
+Translator: Bailu Lin <bailu.lin@vivo.com>
+            Hu Haowen <src.res@email.cn>
+
+==================================
+AArch64 Linux 中擴展的活動監控單元
+==================================
+
+作者: Ionela Voinescu <ionela.voinescu@arm.com>
+
+日期: 2019-09-10
+
+本文檔簡要描述了 AArch64 Linux 支持的活動監控單元的規範。
+
+
+架構總述
+--------
+
+活動監控是 ARMv8.4 CPU 架構引入的一個可選擴展特性。
+
+活動監控單元(在每個 CPU 中實現)爲系統管理提供了性能計數器。既可以通
+過系統寄存器的方式訪問計數器，同時也支持外部內存映射的方式訪問計數器。
+
+AMUv1 架構實現了一個由4個固定的64位事件計數器組成的計數器組。
+
+  - CPU 周期計數器：同 CPU 的頻率增長
+  - 常量計數器：同固定的系統時鐘頻率增長
+  - 淘汰指令計數器: 同每次架構指令執行增長
+  - 內存停頓周期計數器：計算由在時鐘域內的最後一級緩存中未命中而引起
+    的指令調度停頓周期數
+
+當處於 WFI 或者 WFE 狀態時，計數器不會增長。
+
+AMU 架構提供了一個高達16位的事件計數器空間，未來新的 AMU 版本中可能
+用它來實現新增的事件計數器。
+
+另外，AMUv1 實現了一個多達16個64位輔助事件計數器的計數器組。
+
+冷復位時所有的計數器會清零。
+
+
+基本支持
+--------
+
+內核可以安全地運行在支持 AMU 和不支持 AMU 的 CPU 組合中。
+因此，當配置 CONFIG_ARM64_AMU_EXTN 後我們無條件使能後續
+(secondary or hotplugged) CPU 檢測和使用這個特性。
+
+當在 CPU 上檢測到該特性時，我們會標記爲特性可用但是不能保證計數器的功能，
+僅表明有擴展屬性。
+
+固件(代碼運行在高異常級別，例如 arm-tf )需支持以下功能：
+
+ - 提供低異常級別(EL2 和 EL1)訪問 AMU 寄存器的能力。
+ - 使能計數器。如果未使能，它的值應爲 0。
+ - 在從電源關閉狀態啓動 CPU 前或後保存或者恢復計數器。
+
+當使用使能了該特性的內核啓動但固件損壞時，訪問計數器寄存器可能會遭遇
+panic 或者死鎖。即使未發現這些症狀，計數器寄存器返回的數據結果並不一
+定能反映真實情況。通常，計數器會返回 0，表明他們未被使能。
+
+如果固件沒有提供適當的支持最好關閉 CONFIG_ARM64_AMU_EXTN。
+值得注意的是，出於安全原因，不要繞過 AMUSERRENR_EL0 設置而捕獲從
+EL0(用戶空間) 訪問 EL1(內核空間)。 因此，固件應該確保訪問 AMU寄存器
+不會困在 EL2或EL3。
+
+AMUv1 的固定計數器可以通過如下系統寄存器訪問：
+
+ - SYS_AMEVCNTR0_CORE_EL0
+ - SYS_AMEVCNTR0_CONST_EL0
+ - SYS_AMEVCNTR0_INST_RET_EL0
+ - SYS_AMEVCNTR0_MEM_STALL_EL0
+
+特定輔助計數器可以通過 SYS_AMEVCNTR1_EL0(n) 訪問，其中n介於0到15。
+
+詳細信息定義在目錄：arch/arm64/include/asm/sysreg.h。
+
+
+用戶空間訪問
+------------
+
+由於以下原因，當前禁止從用戶空間訪問 AMU 的寄存器：
+
+  - 安全因數：可能會暴露處於安全模式執行的代碼信息。
+  - 意願：AMU 是用於系統管理的。
+
+同樣，該功能對用戶空間不可見。
+
+
+虛擬化
+------
+
+由於以下原因，當前禁止從 KVM 客戶端的用戶空間(EL0)和內核空間(EL1)
+訪問 AMU 的寄存器：
+
+  - 安全因數：可能會暴露給其他客戶端或主機端執行的代碼信息。
+
+任何試圖訪問 AMU 寄存器的行爲都會觸發一個註冊在客戶端的未定義異常。
+
diff --git a/Documentation/translations/zh_TW/arm64/booting.txt b/Documentation/translations/zh_TW/arm64/booting.txt
new file mode 100644
index 000000000000..b9439dd54012
--- /dev/null
+++ b/Documentation/translations/zh_TW/arm64/booting.txt
@@ -0,0 +1,251 @@
+SPDX-License-Identifier: GPL-2.0
+
+Chinese translated version of Documentation/arm64/booting.rst
+
+If you have any comment or update to the content, please contact the
+original document maintainer directly.  However, if you have a problem
+communicating in English you can also ask the Chinese maintainer for
+help.  Contact the Chinese maintainer if this translation is outdated
+or if there is a problem with the translation.
+
+M:	Will Deacon <will.deacon@arm.com>
+zh_CN:	Fu Wei <wefu@redhat.com>
+zh_TW:	Hu Haowen <src.res@email.cn>
+C:	55f058e7574c3615dea4615573a19bdb258696c6
+---------------------------------------------------------------------
+Documentation/arm64/booting.rst 的中文翻譯
+
+如果想評論或更新本文的內容，請直接聯繫原文檔的維護者。如果你使用英文
+交流有困難的話，也可以向中文版維護者求助。如果本翻譯更新不及時或者翻
+譯存在問題，請聯繫中文版維護者。
+
+英文版維護者： Will Deacon <will.deacon@arm.com>
+中文版維護者： 傅煒  Fu Wei <wefu@redhat.com>
+中文版翻譯者： 傅煒  Fu Wei <wefu@redhat.com>
+中文版校譯者： 傅煒  Fu Wei <wefu@redhat.com>
+繁體中文版校譯者： 胡皓文  Hu Haowen <src.res@email.cn>
+本文翻譯提交時的 Git 檢出點爲： 55f058e7574c3615dea4615573a19bdb258696c6
+
+以下爲正文
+---------------------------------------------------------------------
+			啓動 AArch64 Linux
+			==================
+
+作者: Will Deacon <will.deacon@arm.com>
+日期: 2012 年 09 月 07 日
+
+本文檔基於 Russell King 的 ARM 啓動文檔，且適用於所有公開發布的
+AArch64 Linux 內核代碼。
+
+AArch64 異常模型由多個異常級（EL0 - EL3）組成，對於 EL0 和 EL1 異常級
+有對應的安全和非安全模式。EL2 是系統管理級，且僅存在於非安全模式下。
+EL3 是最高特權級，且僅存在於安全模式下。
+
+基於本文檔的目的，我們將簡單地使用『引導裝載程序』（『boot loader』）
+這個術語來定義在將控制權交給 Linux 內核前 CPU 上執行的所有軟體。
+這可能包含安全監控和系統管理代碼，或者它可能只是一些用於準備最小啓動
+環境的指令。
+
+基本上，引導裝載程序（至少）應實現以下操作：
+
+1、設置和初始化 RAM
+2、設置設備樹數據
+3、解壓內核映像
+4、調用內核映像
+
+
+1、設置和初始化 RAM
+-----------------
+
+必要性: 強制
+
+引導裝載程序應該找到並初始化系統中所有內核用於保持系統變量數據的 RAM。
+這個操作的執行方式因設備而異。（它可能使用內部算法來自動定位和計算所有
+RAM，或可能使用對這個設備已知的 RAM 信息，還可能是引導裝載程序設計者
+想到的任何合適的方法。）
+
+
+2、設置設備樹數據
+---------------
+
+必要性: 強制
+
+設備樹數據塊（dtb）必須 8 字節對齊，且大小不能超過 2MB。由於設備樹
+數據塊將在使能緩存的情況下以 2MB 粒度被映射，故其不能被置於必須以特定
+屬性映射的2M區域內。
+
+註： v4.2 之前的版本同時要求設備樹數據塊被置於從內核映像以下
+text_offset 字節處算起第一個 512MB 內。
+
+3、解壓內核映像
+-------------
+
+必要性: 可選
+
+AArch64 內核當前沒有提供自解壓代碼，因此如果使用了壓縮內核映像文件
+（比如 Image.gz），則需要通過引導裝載程序（使用 gzip 等）來進行解壓。
+若引導裝載程序沒有實現這個功能，就要使用非壓縮內核映像文件。
+
+
+4、調用內核映像
+-------------
+
+必要性: 強制
+
+已解壓的內核映像包含一個 64 字節的頭，內容如下：
+
+  u32 code0;			/* 可執行代碼 */
+  u32 code1;			/* 可執行代碼 */
+  u64 text_offset;		/* 映像裝載偏移，小端模式 */
+  u64 image_size;		/* 映像實際大小, 小端模式 */
+  u64 flags;			/* 內核旗標, 小端模式 *
+  u64 res2	= 0;		/* 保留 */
+  u64 res3	= 0;		/* 保留 */
+  u64 res4	= 0;		/* 保留 */
+  u32 magic	= 0x644d5241;	/* 魔數, 小端, "ARM\x64" */
+  u32 res5;			/* 保留 （用於 PE COFF 偏移） */
+
+
+映像頭注釋：
+
+- 自 v3.17 起，除非另有說明，所有域都是小端模式。
+
+- code0/code1 負責跳轉到 stext.
+
+- 當通過 EFI 啓動時， 最初 code0/code1 被跳過。
+  res5 是到 PE 文件頭的偏移，而 PE 文件頭含有 EFI 的啓動入口點
+  （efi_stub_entry）。當 stub 代碼完成了它的使命，它會跳轉到 code0
+  繼續正常的啓動流程。
+
+- v3.17 之前，未明確指定 text_offset 的字節序。此時，image_size 爲零，
+  且 text_offset 依照內核字節序爲 0x80000。
+  當 image_size 非零，text_offset 爲小端模式且是有效值，應被引導加載
+  程序使用。當 image_size 爲零，text_offset 可假定爲 0x80000。
+
+- flags 域 (v3.17 引入) 爲 64 位小端模式，其編碼如下：
+  位 0: 	內核字節序。 1 表示大端模式，0 表示小端模式。
+  位 1-2:	內核頁大小。
+			0 - 未指定。
+			1 - 4K
+			2 - 16K
+			3 - 64K
+  位 3:		內核物理位置
+			0 - 2MB 對齊基址應儘量靠近內存起始處，因爲
+			    其基址以下的內存無法通過線性映射訪問
+			1 - 2MB 對齊基址可以在物理內存的任意位置
+  位 4-63:	保留。
+
+- 當 image_size 爲零時，引導裝載程序應試圖在內核映像末尾之後儘可能
+  多地保留空閒內存供內核直接使用。對內存空間的需求量因所選定的內核
+  特性而異, 並無實際限制。
+
+內核映像必須被放置在任意一個可用系統內存 2MB 對齊基址的 text_offset
+字節處，並從該處被調用。2MB 對齊基址和內核映像起始地址之間的區域對於
+內核來說沒有特殊意義，且可能被用於其他目的。
+從映像起始地址算起，最少必須準備 image_size 字節的空閒內存供內核使用。
+註： v4.6 之前的版本無法使用內核映像物理偏移以下的內存，所以當時建議
+將映像儘量放置在靠近系統內存起始的地方。
+
+任何提供給內核的內存（甚至在映像起始地址之前），若未從內核中標記爲保留
+(如在設備樹（dtb）的 memreserve 區域），都將被認爲對內核是可用。
+
+在跳轉入內核前，必須符合以下狀態：
+
+- 停止所有 DMA 設備，這樣內存數據就不會因爲虛假網絡包或磁碟數據而
+  被破壞。這可能可以節省你許多的調試時間。
+
+- 主 CPU 通用寄存器設置
+  x0 = 系統 RAM 中設備樹數據塊（dtb）的物理地址。
+  x1 = 0 (保留，將來可能使用)
+  x2 = 0 (保留，將來可能使用)
+  x3 = 0 (保留，將來可能使用)
+
+- CPU 模式
+  所有形式的中斷必須在 PSTATE.DAIF 中被屏蔽（Debug、SError、IRQ
+  和 FIQ）。
+  CPU 必須處於 EL2（推薦，可訪問虛擬化擴展）或非安全 EL1 模式下。
+
+- 高速緩存、MMU
+  MMU 必須關閉。
+  指令緩存開啓或關閉皆可。
+  已載入的內核映像的相應內存區必須被清理，以達到緩存一致性點（PoC）。
+  當存在系統緩存或其他使能緩存的一致性主控器時，通常需使用虛擬地址
+  維護其緩存，而非 set/way 操作。
+  遵從通過虛擬地址操作維護構架緩存的系統緩存必須被配置，並可以被使能。
+  而不通過虛擬地址操作維護構架緩存的系統緩存（不推薦），必須被配置且
+  禁用。
+
+  *譯者註：對於 PoC 以及緩存相關內容，請參考 ARMv8 構架參考手冊
+   ARM DDI 0487A
+
+- 架構計時器
+  CNTFRQ 必須設定爲計時器的頻率，且 CNTVOFF 必須設定爲對所有 CPU
+  都一致的值。如果在 EL1 模式下進入內核，則 CNTHCTL_EL2 中的
+  EL1PCTEN (bit 0) 必須置位。
+
+- 一致性
+  通過內核啓動的所有 CPU 在內核入口地址上必須處於相同的一致性域中。
+  這可能要根據具體實現來定義初始化過程，以使能每個CPU上對維護操作的
+  接收。
+
+- 系統寄存器
+  在進入內核映像的異常級中，所有構架中可寫的系統寄存器必須通過軟體
+  在一個更高的異常級別下初始化，以防止在 未知 狀態下運行。
+
+  對於擁有 GICv3 中斷控制器並以 v3 模式運行的系統：
+  - 如果 EL3 存在：
+    ICC_SRE_EL3.Enable (位 3) 必須初始化爲 0b1。
+    ICC_SRE_EL3.SRE (位 0) 必須初始化爲 0b1。
+  - 若內核運行在 EL1：
+    ICC_SRE_EL2.Enable (位 3) 必須初始化爲 0b1。
+    ICC_SRE_EL2.SRE (位 0) 必須初始化爲 0b1。
+  - 設備樹（DT）或 ACPI 表必須描述一個 GICv3 中斷控制器。
+
+  對於擁有 GICv3 中斷控制器並以兼容（v2）模式運行的系統：
+  - 如果 EL3 存在：
+    ICC_SRE_EL3.SRE (位 0) 必須初始化爲 0b0。
+  - 若內核運行在 EL1：
+    ICC_SRE_EL2.SRE (位 0) 必須初始化爲 0b0。
+  - 設備樹（DT）或 ACPI 表必須描述一個 GICv2 中斷控制器。
+
+以上對於 CPU 模式、高速緩存、MMU、架構計時器、一致性、系統寄存器的
+必要條件描述適用於所有 CPU。所有 CPU 必須在同一異常級別跳入內核。
+
+引導裝載程序必須在每個 CPU 處於以下狀態時跳入內核入口：
+
+- 主 CPU 必須直接跳入內核映像的第一條指令。通過此 CPU 傳遞的設備樹
+  數據塊必須在每個 CPU 節點中包含一個 『enable-method』 屬性，所
+  支持的 enable-method 請見下文。
+
+  引導裝載程序必須生成這些設備樹屬性，並在跳入內核入口之前將其插入
+  數據塊。
+
+- enable-method 爲 「spin-table」 的 CPU 必須在它們的 CPU
+  節點中包含一個 『cpu-release-addr』 屬性。這個屬性標識了一個
+  64 位自然對齊且初始化爲零的內存位置。
+
+  這些 CPU 必須在內存保留區（通過設備樹中的 /memreserve/ 域傳遞
+  給內核）中自旋於內核之外，輪詢它們的 cpu-release-addr 位置（必須
+  包含在保留區中）。可通過插入 wfe 指令來降低忙循環開銷，而主 CPU 將
+  發出 sev 指令。當對 cpu-release-addr 所指位置的讀取操作返回非零值
+  時，CPU 必須跳入此值所指向的地址。此值爲一個單獨的 64 位小端值，
+  因此 CPU 須在跳轉前將所讀取的值轉換爲其本身的端模式。
+
+- enable-method 爲 「psci」 的 CPU 保持在內核外（比如，在
+  memory 節點中描述爲內核空間的內存區外，或在通過設備樹 /memreserve/
+  域中描述爲內核保留區的空間中）。內核將會發起在 ARM 文檔（編號
+  ARM DEN 0022A：用於 ARM 上的電源狀態協調接口系統軟體）中描述的
+  CPU_ON 調用來將 CPU 帶入內核。
+
+  *譯者注: ARM DEN 0022A 已更新到 ARM DEN 0022C。
+
+  設備樹必須包含一個 『psci』 節點，請參考以下文檔：
+  Documentation/devicetree/bindings/arm/psci.yaml
+
+
+- 輔助 CPU 通用寄存器設置
+  x0 = 0 (保留，將來可能使用)
+  x1 = 0 (保留，將來可能使用)
+  x2 = 0 (保留，將來可能使用)
+  x3 = 0 (保留，將來可能使用)
+
diff --git a/Documentation/translations/zh_TW/arm64/elf_hwcaps.rst b/Documentation/translations/zh_TW/arm64/elf_hwcaps.rst
new file mode 100644
index 000000000000..3eb1c623ce31
--- /dev/null
+++ b/Documentation/translations/zh_TW/arm64/elf_hwcaps.rst
@@ -0,0 +1,244 @@
+.. SPDX-License-Identifier: GPL-2.0
+
+.. include:: ../disclaimer-zh_TW.rst
+
+:Original: :ref:`Documentation/arm64/elf_hwcaps.rst <elf_hwcaps_index>`
+
+Translator: Bailu Lin <bailu.lin@vivo.com>
+            Hu Haowen <src.res@email.cn>
+
+================
+ARM64 ELF hwcaps
+================
+
+這篇文檔描述了 arm64 ELF hwcaps 的用法和語義。
+
+
+1. 簡介
+-------
+
+有些硬體或軟體功能僅在某些 CPU 實現上和/或在具體某個內核配置上可用，但
+對於處於 EL0 的用戶空間代碼沒有可用的架構發現機制。內核通過在輔助向量表
+公開一組稱爲 hwcaps 的標誌而把這些功能暴露給用戶空間。
+
+用戶空間軟體可以通過獲取輔助向量的 AT_HWCAP 或 AT_HWCAP2 條目來測試功能，
+並測試是否設置了相關標誌，例如::
+
+	bool floating_point_is_present(void)
+	{
+		unsigned long hwcaps = getauxval(AT_HWCAP);
+		if (hwcaps & HWCAP_FP)
+			return true;
+
+		return false;
+	}
+
+如果軟體依賴於 hwcap 描述的功能，在嘗試使用該功能前則應檢查相關的 hwcap
+標誌以驗證該功能是否存在。
+
+不能通過其他方式探查這些功能。當一個功能不可用時，嘗試使用它可能導致不可
+預測的行爲，並且無法保證能確切的知道該功能不可用，例如 SIGILL。
+
+
+2. Hwcaps 的說明
+----------------
+
+大多數 hwcaps 旨在說明通過架構 ID 寄存器(處於 EL0 的用戶空間代碼無法訪問)
+描述的功能的存在。這些 hwcap 通過 ID 寄存器欄位定義，並且應根據 ARM 體系
+結構參考手冊（ARM ARM）中定義的欄位來解釋說明。
+
+這些 hwcaps 以下面的形式描述::
+
+    idreg.field == val 表示有某個功能。
+
+當 idreg.field 中有 val 時，hwcaps 表示 ARM ARM 定義的功能是有效的，但是
+並不是說要完全和 val 相等，也不是說 idreg.field 描述的其他功能就是缺失的。
+
+其他 hwcaps 可能表明無法僅由 ID 寄存器描述的功能的存在。這些 hwcaps 可能
+沒有被 ID 寄存器描述，需要參考其他文檔。
+
+
+3. AT_HWCAP 中揭示的 hwcaps
+---------------------------
+
+HWCAP_FP
+    ID_AA64PFR0_EL1.FP == 0b0000 表示有此功能。
+
+HWCAP_ASIMD
+    ID_AA64PFR0_EL1.AdvSIMD == 0b0000 表示有此功能。
+
+HWCAP_EVTSTRM
+    通用計時器頻率配置爲大約100KHz以生成事件。
+
+HWCAP_AES
+    ID_AA64ISAR0_EL1.AES == 0b0001 表示有此功能。
+
+HWCAP_PMULL
+    ID_AA64ISAR0_EL1.AES == 0b0010 表示有此功能。
+
+HWCAP_SHA1
+    ID_AA64ISAR0_EL1.SHA1 == 0b0001 表示有此功能。
+
+HWCAP_SHA2
+    ID_AA64ISAR0_EL1.SHA2 == 0b0001 表示有此功能。
+
+HWCAP_CRC32
+    ID_AA64ISAR0_EL1.CRC32 == 0b0001 表示有此功能。
+
+HWCAP_ATOMICS
+    ID_AA64ISAR0_EL1.Atomic == 0b0010 表示有此功能。
+
+HWCAP_FPHP
+    ID_AA64PFR0_EL1.FP == 0b0001 表示有此功能。
+
+HWCAP_ASIMDHP
+    ID_AA64PFR0_EL1.AdvSIMD == 0b0001 表示有此功能。
+
+HWCAP_CPUID
+    根據 Documentation/arm64/cpu-feature-registers.rst 描述，EL0 可以訪問
+    某些 ID 寄存器。
+
+    這些 ID 寄存器可能表示功能的可用性。
+
+HWCAP_ASIMDRDM
+    ID_AA64ISAR0_EL1.RDM == 0b0001 表示有此功能。
+
+HWCAP_JSCVT
+    ID_AA64ISAR1_EL1.JSCVT == 0b0001 表示有此功能。
+
+HWCAP_FCMA
+    ID_AA64ISAR1_EL1.FCMA == 0b0001 表示有此功能。
+
+HWCAP_LRCPC
+    ID_AA64ISAR1_EL1.LRCPC == 0b0001 表示有此功能。
+
+HWCAP_DCPOP
+    ID_AA64ISAR1_EL1.DPB == 0b0001 表示有此功能。
+
+HWCAP_SHA3
+    ID_AA64ISAR0_EL1.SHA3 == 0b0001 表示有此功能。
+
+HWCAP_SM3
+    ID_AA64ISAR0_EL1.SM3 == 0b0001 表示有此功能。
+
+HWCAP_SM4
+    ID_AA64ISAR0_EL1.SM4 == 0b0001 表示有此功能。
+
+HWCAP_ASIMDDP
+    ID_AA64ISAR0_EL1.DP == 0b0001 表示有此功能。
+
+HWCAP_SHA512
+    ID_AA64ISAR0_EL1.SHA2 == 0b0010 表示有此功能。
+
+HWCAP_SVE
+    ID_AA64PFR0_EL1.SVE == 0b0001 表示有此功能。
+
+HWCAP_ASIMDFHM
+    ID_AA64ISAR0_EL1.FHM == 0b0001 表示有此功能。
+
+HWCAP_DIT
+    ID_AA64PFR0_EL1.DIT == 0b0001 表示有此功能。
+
+HWCAP_USCAT
+    ID_AA64MMFR2_EL1.AT == 0b0001 表示有此功能。
+
+HWCAP_ILRCPC
+    ID_AA64ISAR1_EL1.LRCPC == 0b0010 表示有此功能。
+
+HWCAP_FLAGM
+    ID_AA64ISAR0_EL1.TS == 0b0001 表示有此功能。
+
+HWCAP_SSBS
+    ID_AA64PFR1_EL1.SSBS == 0b0010 表示有此功能。
+
+HWCAP_SB
+    ID_AA64ISAR1_EL1.SB == 0b0001 表示有此功能。
+
+HWCAP_PACA
+    如 Documentation/arm64/pointer-authentication.rst 所描述，
+    ID_AA64ISAR1_EL1.APA == 0b0001 或 ID_AA64ISAR1_EL1.API == 0b0001
+    表示有此功能。
+
+HWCAP_PACG
+    如 Documentation/arm64/pointer-authentication.rst 所描述，
+    ID_AA64ISAR1_EL1.GPA == 0b0001 或 ID_AA64ISAR1_EL1.GPI == 0b0001
+    表示有此功能。
+
+HWCAP2_DCPODP
+
+    ID_AA64ISAR1_EL1.DPB == 0b0010 表示有此功能。
+
+HWCAP2_SVE2
+
+    ID_AA64ZFR0_EL1.SVEVer == 0b0001 表示有此功能。
+
+HWCAP2_SVEAES
+
+    ID_AA64ZFR0_EL1.AES == 0b0001 表示有此功能。
+
+HWCAP2_SVEPMULL
+
+    ID_AA64ZFR0_EL1.AES == 0b0010 表示有此功能。
+
+HWCAP2_SVEBITPERM
+
+    ID_AA64ZFR0_EL1.BitPerm == 0b0001 表示有此功能。
+
+HWCAP2_SVESHA3
+
+    ID_AA64ZFR0_EL1.SHA3 == 0b0001 表示有此功能。
+
+HWCAP2_SVESM4
+
+    ID_AA64ZFR0_EL1.SM4 == 0b0001 表示有此功能。
+
+HWCAP2_FLAGM2
+
+    ID_AA64ISAR0_EL1.TS == 0b0010 表示有此功能。
+
+HWCAP2_FRINT
+
+    ID_AA64ISAR1_EL1.FRINTTS == 0b0001 表示有此功能。
+
+HWCAP2_SVEI8MM
+
+    ID_AA64ZFR0_EL1.I8MM == 0b0001 表示有此功能。
+
+HWCAP2_SVEF32MM
+
+    ID_AA64ZFR0_EL1.F32MM == 0b0001 表示有此功能。
+
+HWCAP2_SVEF64MM
+
+    ID_AA64ZFR0_EL1.F64MM == 0b0001 表示有此功能。
+
+HWCAP2_SVEBF16
+
+    ID_AA64ZFR0_EL1.BF16 == 0b0001 表示有此功能。
+
+HWCAP2_I8MM
+
+    ID_AA64ISAR1_EL1.I8MM == 0b0001 表示有此功能。
+
+HWCAP2_BF16
+
+    ID_AA64ISAR1_EL1.BF16 == 0b0001 表示有此功能。
+
+HWCAP2_DGH
+
+    ID_AA64ISAR1_EL1.DGH == 0b0001 表示有此功能。
+
+HWCAP2_RNG
+
+    ID_AA64ISAR0_EL1.RNDR == 0b0001 表示有此功能。
+
+HWCAP2_BTI
+
+    ID_AA64PFR0_EL1.BT == 0b0001 表示有此功能。
+
+
+4. 未使用的 AT_HWCAP 位
+-----------------------
+
+爲了與用戶空間交互，內核保證 AT_HWCAP 的第62、63位將始終返回0。
+
diff --git a/Documentation/translations/zh_TW/arm64/hugetlbpage.rst b/Documentation/translations/zh_TW/arm64/hugetlbpage.rst
new file mode 100644
index 000000000000..846b500dae97
--- /dev/null
+++ b/Documentation/translations/zh_TW/arm64/hugetlbpage.rst
@@ -0,0 +1,49 @@
+.. SPDX-License-Identifier: GPL-2.0
+
+.. include:: ../disclaimer-zh_TW.rst
+
+:Original: :ref:`Documentation/arm64/hugetlbpage.rst <hugetlbpage_index>`
+
+Translator: Bailu Lin <bailu.lin@vivo.com>
+            Hu Haowen <src.res@email.cn>
+
+=====================
+ARM64中的 HugeTLBpage
+=====================
+
+大頁依靠有效利用 TLBs 來提高地址翻譯的性能。這取決於以下
+兩點 -
+
+  - 大頁的大小
+  - TLBs 支持的條目大小
+
+ARM64 接口支持2種大頁方式。
+
+1) pud/pmd 級別的塊映射
+-----------------------
+
+這是常規大頁，他們的 pmd 或 pud 頁面表條目指向一個內存塊。
+不管 TLB 中支持的條目大小如何，塊映射可以減少翻譯大頁地址
+所需遍歷的頁表深度。
+
+2) 使用連續位
+-------------
+
+架構中轉換頁表條目(D4.5.3, ARM DDI 0487C.a)中提供一個連續
+位告訴 MMU 這個條目是一個連續條目集的一員，它可以被緩存在單
+個 TLB 條目中。
+
+在 Linux 中連續位用來增加 pmd 和 pte(最後一級)級別映射的大
+小。受支持的連續頁表條目數量因頁面大小和頁表級別而異。
+
+
+支持以下大頁尺寸配置 -
+
+  ====== ========   ====    ========    ===
+  -      CONT PTE    PMD    CONT PMD    PUD
+  ====== ========   ====    ========    ===
+  4K:         64K     2M         32M     1G
+  16K:         2M    32M          1G
+  64K:         2M   512M         16G
+  ====== ========   ====    ========    ===
+
diff --git a/Documentation/translations/zh_TW/arm64/index.rst b/Documentation/translations/zh_TW/arm64/index.rst
new file mode 100644
index 000000000000..2322783f3881
--- /dev/null
+++ b/Documentation/translations/zh_TW/arm64/index.rst
@@ -0,0 +1,23 @@
+.. SPDX-License-Identifier: GPL-2.0
+
+.. include:: ../disclaimer-zh_TW.rst
+
+:Original: :ref:`Documentation/arm64/index.rst <arm64_index>`
+:Translator: Bailu Lin <bailu.lin@vivo.com>
+             Hu Haowen <src.res@email.cn>
+
+.. _tw_arm64_index:
+
+
+==========
+ARM64 架構
+==========
+
+.. toctree::
+    :maxdepth: 2
+
+    amu
+    hugetlbpage
+    perf
+    elf_hwcaps
+
diff --git a/Documentation/translations/zh_TW/arm64/legacy_instructions.txt b/Documentation/translations/zh_TW/arm64/legacy_instructions.txt
new file mode 100644
index 000000000000..6d4454f77b9e
--- /dev/null
+++ b/Documentation/translations/zh_TW/arm64/legacy_instructions.txt
@@ -0,0 +1,77 @@
+SPDX-License-Identifier: GPL-2.0
+
+Chinese translated version of Documentation/arm64/legacy_instructions.rst
+
+If you have any comment or update to the content, please contact the
+original document maintainer directly.  However, if you have a problem
+communicating in English you can also ask the Chinese maintainer for
+help.  Contact the Chinese maintainer if this translation is outdated
+or if there is a problem with the translation.
+
+Maintainer: Punit Agrawal <punit.agrawal@arm.com>
+            Suzuki K. Poulose <suzuki.poulose@arm.com>
+Chinese maintainer: Fu Wei <wefu@redhat.com>
+Traditional Chinese maintainer: Hu Haowen <src.res@email.cn>
+---------------------------------------------------------------------
+Documentation/arm64/legacy_instructions.rst 的中文翻譯
+
+如果想評論或更新本文的內容，請直接聯繫原文檔的維護者。如果你使用英文
+交流有困難的話，也可以向中文版維護者求助。如果本翻譯更新不及時或者翻
+譯存在問題，請聯繫中文版維護者。
+
+本文翻譯提交時的 Git 檢出點爲： bc465aa9d045feb0e13b4a8f32cc33c1943f62d6
+
+英文版維護者： Punit Agrawal <punit.agrawal@arm.com>
+            Suzuki K. Poulose <suzuki.poulose@arm.com>
+中文版維護者： 傅煒  Fu Wei <wefu@redhat.com>
+中文版翻譯者： 傅煒  Fu Wei <wefu@redhat.com>
+中文版校譯者： 傅煒  Fu Wei <wefu@redhat.com>
+繁體中文版校譯者：胡皓文  Hu Haowen <src.res@email.cn>
+
+以下爲正文
+---------------------------------------------------------------------
+Linux 內核在 arm64 上的移植提供了一個基礎框架，以支持構架中正在被淘汰或已廢棄指令的模擬執行。
+這個基礎框架的代碼使用未定義指令鉤子（hooks）來支持模擬。如果指令存在，它也允許在硬體中啓用該指令。
+
+模擬模式可通過寫 sysctl 節點（/proc/sys/abi）來控制。
+不同的執行方式及 sysctl 節點的相應值，解釋如下：
+
+* Undef（未定義）
+  值： 0
+  產生未定義指令終止異常。它是那些構架中已廢棄的指令，如 SWP，的默認處理方式。
+
+* Emulate（模擬）
+  值： 1
+  使用軟體模擬方式。爲解決軟體遷移問題，這種模擬指令模式的使用是被跟蹤的，並會發出速率限制警告。
+  它是那些構架中正在被淘汰的指令，如 CP15 barriers（隔離指令），的默認處理方式。
+
+* Hardware Execution（硬體執行）
+  值： 2
+  雖然標記爲正在被淘汰，但一些實現可能提供硬體執行這些指令的使能/禁用操作。
+  使用硬體執行一般會有更好的性能，但將無法收集運行時對正被淘汰指令的使用統計數據。
+
+默認執行模式依賴於指令在構架中狀態。正在被淘汰的指令應該以模擬（Emulate）作爲默認模式，
+而已廢棄的指令必須默認使用未定義（Undef）模式
+
+注意：指令模擬可能無法應對所有情況。更多詳情請參考單獨的指令注釋。
+
+受支持的遺留指令
+-------------
+* SWP{B}
+節點: /proc/sys/abi/swp
+狀態: 已廢棄
+默認執行方式: Undef (0)
+
+* CP15 Barriers
+節點: /proc/sys/abi/cp15_barrier
+狀態: 正被淘汰，不推薦使用
+默認執行方式: Emulate (1)
+
+* SETEND
+節點: /proc/sys/abi/setend
+狀態: 正被淘汰，不推薦使用
+默認執行方式: Emulate (1)*
+註：爲了使能這個特性，系統中的所有 CPU 必須在 EL0 支持混合字節序。
+如果一個新的 CPU （不支持混合字節序） 在使能這個特性後被熱插入系統，
+在應用中可能會出現不可預期的結果。
+
diff --git a/Documentation/translations/zh_TW/arm64/memory.txt b/Documentation/translations/zh_TW/arm64/memory.txt
new file mode 100644
index 000000000000..99c2b78b5674
--- /dev/null
+++ b/Documentation/translations/zh_TW/arm64/memory.txt
@@ -0,0 +1,119 @@
+SPDX-License-Identifier: GPL-2.0
+
+Chinese translated version of Documentation/arm64/memory.rst
+
+If you have any comment or update to the content, please contact the
+original document maintainer directly.  However, if you have a problem
+communicating in English you can also ask the Chinese maintainer for
+help.  Contact the Chinese maintainer if this translation is outdated
+or if there is a problem with the translation.
+
+Maintainer: Catalin Marinas <catalin.marinas@arm.com>
+Chinese maintainer: Fu Wei <wefu@redhat.com>
+Traditional Chinese maintainer: Hu Haowen <src.res@email.cn>
+---------------------------------------------------------------------
+Documentation/arm64/memory.rst 的中文翻譯
+
+如果想評論或更新本文的內容，請直接聯繫原文檔的維護者。如果你使用英文
+交流有困難的話，也可以向中文版維護者求助。如果本翻譯更新不及時或者翻
+譯存在問題，請聯繫中文版維護者。
+
+本文翻譯提交時的 Git 檢出點爲： bc465aa9d045feb0e13b4a8f32cc33c1943f62d6
+
+英文版維護者： Catalin Marinas <catalin.marinas@arm.com>
+中文版維護者： 傅煒  Fu Wei <wefu@redhat.com>
+中文版翻譯者： 傅煒  Fu Wei <wefu@redhat.com>
+中文版校譯者： 傅煒  Fu Wei <wefu@redhat.com>
+繁體中文版校譯者： 胡皓文  Hu Haowen <src.res@email.cn>
+
+以下爲正文
+---------------------------------------------------------------------
+		     Linux 在 AArch64 中的內存布局
+		     ===========================
+
+作者: Catalin Marinas <catalin.marinas@arm.com>
+
+本文檔描述 AArch64 Linux 內核所使用的虛擬內存布局。此構架可以實現
+頁大小爲 4KB 的 4 級轉換表和頁大小爲 64KB 的 3 級轉換表。
+
+AArch64 Linux 使用 3 級或 4 級轉換表，其頁大小配置爲 4KB，對於用戶和內核
+分別都有 39-bit (512GB) 或 48-bit (256TB) 的虛擬地址空間。
+對於頁大小爲 64KB的配置，僅使用 2 級轉換表，有 42-bit (4TB) 的虛擬地址空間，但內存布局相同。
+
+用戶地址空間的 63:48 位爲 0，而內核地址空間的相應位爲 1。TTBRx 的
+選擇由虛擬地址的 63 位給出。swapper_pg_dir 僅包含內核（全局）映射，
+而用戶 pgd 僅包含用戶（非全局）映射。swapper_pg_dir 地址被寫入
+TTBR1 中，且從不寫入 TTBR0。
+
+
+AArch64 Linux 在頁大小爲 4KB，並使用 3 級轉換表時的內存布局：
+
+起始地址			結束地址			大小		用途
+-----------------------------------------------------------------------
+0000000000000000	0000007fffffffff	 512GB		用戶空間
+ffffff8000000000	ffffffffffffffff	 512GB		內核空間
+
+
+AArch64 Linux 在頁大小爲 4KB，並使用 4 級轉換表時的內存布局：
+
+起始地址			結束地址			大小		用途
+-----------------------------------------------------------------------
+0000000000000000	0000ffffffffffff	 256TB		用戶空間
+ffff000000000000	ffffffffffffffff	 256TB		內核空間
+
+
+AArch64 Linux 在頁大小爲 64KB，並使用 2 級轉換表時的內存布局：
+
+起始地址			結束地址			大小		用途
+-----------------------------------------------------------------------
+0000000000000000	000003ffffffffff	   4TB		用戶空間
+fffffc0000000000	ffffffffffffffff	   4TB		內核空間
+
+
+AArch64 Linux 在頁大小爲 64KB，並使用 3 級轉換表時的內存布局：
+
+起始地址			結束地址			大小		用途
+-----------------------------------------------------------------------
+0000000000000000	0000ffffffffffff	 256TB		用戶空間
+ffff000000000000	ffffffffffffffff	 256TB		內核空間
+
+
+更詳細的內核虛擬內存布局，請參閱內核啓動信息。
+
+
+4KB 頁大小的轉換表查找：
+
++--------+--------+--------+--------+--------+--------+--------+--------+
+|63    56|55    48|47    40|39    32|31    24|23    16|15     8|7      0|
++--------+--------+--------+--------+--------+--------+--------+--------+
+ |                 |         |         |         |         |
+ |                 |         |         |         |         v
+ |                 |         |         |         |   [11:0]  頁內偏移
+ |                 |         |         |         +-> [20:12] L3 索引
+ |                 |         |         +-----------> [29:21] L2 索引
+ |                 |         +---------------------> [38:30] L1 索引
+ |                 +-------------------------------> [47:39] L0 索引
+ +-------------------------------------------------> [63] TTBR0/1
+
+
+64KB 頁大小的轉換表查找：
+
++--------+--------+--------+--------+--------+--------+--------+--------+
+|63    56|55    48|47    40|39    32|31    24|23    16|15     8|7      0|
++--------+--------+--------+--------+--------+--------+--------+--------+
+ |                 |    |               |              |
+ |                 |    |               |              v
+ |                 |    |               |            [15:0]  頁內偏移
+ |                 |    |               +----------> [28:16] L3 索引
+ |                 |    +--------------------------> [41:29] L2 索引
+ |                 +-------------------------------> [47:42] L1 索引
+ +-------------------------------------------------> [63] TTBR0/1
+
+
+當使用 KVM 時, 管理程序（hypervisor）在 EL2 中通過相對內核虛擬地址的
+一個固定偏移來映射內核頁（內核虛擬地址的高 24 位設爲零）:
+
+起始地址			結束地址			大小		用途
+-----------------------------------------------------------------------
+0000004000000000	0000007fffffffff	 256GB		在 HYP 中映射的內核對象
+
diff --git a/Documentation/translations/zh_TW/arm64/perf.rst b/Documentation/translations/zh_TW/arm64/perf.rst
new file mode 100644
index 000000000000..f1ffd55dfe50
--- /dev/null
+++ b/Documentation/translations/zh_TW/arm64/perf.rst
@@ -0,0 +1,88 @@
+.. SPDX-License-Identifier: GPL-2.0
+
+.. include:: ../disclaimer-zh_TW.rst
+
+:Original: :ref:`Documentation/arm64/perf.rst <perf_index>`
+
+Translator: Bailu Lin <bailu.lin@vivo.com>
+            Hu Haowen <src.res@email.cn>
+
+=============
+Perf 事件屬性
+=============
+
+:作者: Andrew Murray <andrew.murray@arm.com>
+:日期: 2019-03-06
+
+exclude_user
+------------
+
+該屬性排除用戶空間。
+
+用戶空間始終運行在 EL0，因此該屬性將排除 EL0。
+
+
+exclude_kernel
+--------------
+
+該屬性排除內核空間。
+
+打開 VHE 時內核運行在 EL2，不打開 VHE 時內核運行在 EL1。客戶機
+內核總是運行在 EL1。
+
+對於宿主機，該屬性排除 EL1 和 VHE 上的 EL2。
+
+對於客戶機，該屬性排除 EL1。請注意客戶機從來不會運行在 EL2。
+
+
+exclude_hv
+----------
+
+該屬性排除虛擬機監控器。
+
+對於 VHE 宿主機該屬性將被忽略，此時我們認爲宿主機內核是虛擬機監
+控器。
+
+對於 non-VHE 宿主機該屬性將排除 EL2，因爲虛擬機監控器運行在 EL2
+的任何代碼主要用於客戶機和宿主機的切換。
+
+對於客戶機該屬性無效。請注意客戶機從來不會運行在 EL2。
+
+
+exclude_host / exclude_guest
+----------------------------
+
+這些屬性分別排除了 KVM 宿主機和客戶機。
+
+KVM 宿主機可能運行在 EL0（用戶空間），EL1（non-VHE 內核）和
+EL2（VHE 內核 或 non-VHE 虛擬機監控器）。
+
+KVM 客戶機可能運行在 EL0（用戶空間）和 EL1（內核）。
+
+由於宿主機和客戶機之間重疊的異常級別，我們不能僅僅依靠 PMU 的硬體異
+常過濾機制-因此我們必須啓用/禁用對於客戶機進入和退出的計數。而這在
+VHE 和 non-VHE 系統上表現不同。
+
+對於 non-VHE 系統的 exclude_host 屬性排除 EL2 - 在進入和退出客戶
+機時，我們會根據 exclude_host 和 exclude_guest 屬性在適當的情況下
+禁用/啓用該事件。
+
+對於 VHE 系統的 exclude_guest 屬性排除 EL1，而對其中的 exclude_host
+屬性同時排除 EL0，EL2。在進入和退出客戶機時，我們會適當地根據
+exclude_host 和 exclude_guest 屬性包括/排除 EL0。
+
+以上聲明也適用於在 not-VHE 客戶機使用這些屬性時，但是請注意客戶機從
+來不會運行在 EL2。
+
+
+準確性
+------
+
+在 non-VHE 宿主機上，我們在 EL2 進入/退出宿主機/客戶機的切換時啓用/
+關閉計數器 -但是在啓用/禁用計數器和進入/退出客戶機之間存在一段延時。
+對於 exclude_host， 我們可以通過過濾 EL2 消除在客戶機進入/退出邊界
+上用於計數客戶機事件的宿主機事件計數器。但是當使用 !exclude_hv 時，
+在客戶機進入/退出有一個小的停電窗口無法捕獲到宿主機的事件。
+
+在 VHE 系統沒有停電窗口。
+
diff --git a/Documentation/translations/zh_TW/arm64/silicon-errata.txt b/Documentation/translations/zh_TW/arm64/silicon-errata.txt
new file mode 100644
index 000000000000..bf2077197504
--- /dev/null
+++ b/Documentation/translations/zh_TW/arm64/silicon-errata.txt
@@ -0,0 +1,79 @@
+SPDX-License-Identifier: GPL-2.0
+
+Chinese translated version of Documentation/arm64/silicon-errata.rst
+
+If you have any comment or update to the content, please contact the
+original document maintainer directly.  However, if you have a problem
+communicating in English you can also ask the Chinese maintainer for
+help.  Contact the Chinese maintainer if this translation is outdated
+or if there is a problem with the translation.
+
+M:	Will Deacon <will.deacon@arm.com>
+zh_CN:	Fu Wei <wefu@redhat.com>
+zh_TW:	Hu Haowen <src.res@email.cn>
+C:	1926e54f115725a9248d0c4c65c22acaf94de4c4
+---------------------------------------------------------------------
+Documentation/arm64/silicon-errata.rst 的中文翻譯
+
+如果想評論或更新本文的內容，請直接聯繫原文檔的維護者。如果你使用英文
+交流有困難的話，也可以向中文版維護者求助。如果本翻譯更新不及時或者翻
+譯存在問題，請聯繫中文版維護者。
+
+英文版維護者： Will Deacon <will.deacon@arm.com>
+中文版維護者： 傅煒  Fu Wei <wefu@redhat.com>
+中文版翻譯者： 傅煒  Fu Wei <wefu@redhat.com>
+中文版校譯者： 傅煒  Fu Wei <wefu@redhat.com>
+繁體中文版校譯者： 胡皓文  Hu Haowen <src.res@email.cn>
+本文翻譯提交時的 Git 檢出點爲： 1926e54f115725a9248d0c4c65c22acaf94de4c4
+
+以下爲正文
+---------------------------------------------------------------------
+                晶片勘誤和軟體補救措施
+                ==================
+
+作者: Will Deacon <will.deacon@arm.com>
+日期: 2015年11月27日
+
+一個不幸的現實：硬體經常帶有一些所謂的「瑕疵（errata）」，導致其在
+某些特定情況下會違背構架定義的行爲。就基於 ARM 的硬體而言，這些瑕疵
+大體可分爲以下幾類：
+
+  A 類：無可行補救措施的嚴重缺陷。
+  B 類：有可接受的補救措施的重大或嚴重缺陷。
+  C 類：在正常操作中不會顯現的小瑕疵。
+
+更多資訊，請在 infocenter.arm.com （需註冊）中查閱「軟體開發者勘誤
+筆記」（「Software Developers Errata Notice」）文檔。
+
+對於 Linux 而言，B 類缺陷可能需要作業系統的某些特別處理。例如，避免
+一個特殊的代碼序列，或是以一種特定的方式配置處理器。在某種不太常見的
+情況下，爲將 A 類缺陷當作 C 類處理，可能需要用類似的手段。這些手段被
+統稱爲「軟體補救措施」，且僅在少數情況需要（例如，那些需要一個運行在
+非安全異常級的補救措施 *並且* 能被 Linux 觸發的情況）。
+
+對於尚在討論中的可能對未受瑕疵影響的系統產生干擾的軟體補救措施，有一個
+相應的內核配置（Kconfig）選項被加在 「內核特性（Kernel Features）」->
+「基於可選方法框架的 ARM 瑕疵補救措施（ARM errata workarounds via
+the alternatives framework）"。這些選項被默認開啓，若探測到受影響的CPU，
+補丁將在運行時被使用。至於對系統運行影響較小的補救措施，內核配置選項
+並不存在，且代碼以某種規避瑕疵的方式被構造（帶注釋爲宜）。
+
+這種做法對於在任意內核原始碼樹中準確地判斷出哪個瑕疵已被軟體方法所補救
+稍微有點麻煩，所以在 Linux 內核中此文件作爲軟體補救措施的註冊表，
+並將在新的軟體補救措施被提交和向後移植（backported）到穩定內核時被更新。
+
+| 實現者         | 受影響的組件    | 勘誤編號        | 內核配置                |
++----------------+-----------------+-----------------+-------------------------+
+| ARM            | Cortex-A53      | #826319         | ARM64_ERRATUM_826319    |
+| ARM            | Cortex-A53      | #827319         | ARM64_ERRATUM_827319    |
+| ARM            | Cortex-A53      | #824069         | ARM64_ERRATUM_824069    |
+| ARM            | Cortex-A53      | #819472         | ARM64_ERRATUM_819472    |
+| ARM            | Cortex-A53      | #845719         | ARM64_ERRATUM_845719    |
+| ARM            | Cortex-A53      | #843419         | ARM64_ERRATUM_843419    |
+| ARM            | Cortex-A57      | #832075         | ARM64_ERRATUM_832075    |
+| ARM            | Cortex-A57      | #852523         | N/A                     |
+| ARM            | Cortex-A57      | #834220         | ARM64_ERRATUM_834220    |
+|                |                 |                 |                         |
+| Cavium         | ThunderX ITS    | #22375, #24313  | CAVIUM_ERRATUM_22375    |
+| Cavium         | ThunderX GICv3  | #23154          | CAVIUM_ERRATUM_23154    |
+
diff --git a/Documentation/translations/zh_TW/arm64/tagged-pointers.txt b/Documentation/translations/zh_TW/arm64/tagged-pointers.txt
new file mode 100644
index 000000000000..87f88628401a
--- /dev/null
+++ b/Documentation/translations/zh_TW/arm64/tagged-pointers.txt
@@ -0,0 +1,57 @@
+SPDX-License-Identifier: GPL-2.0
+
+Chinese translated version of Documentation/arm64/tagged-pointers.rst
+
+If you have any comment or update to the content, please contact the
+original document maintainer directly.  However, if you have a problem
+communicating in English you can also ask the Chinese maintainer for
+help.  Contact the Chinese maintainer if this translation is outdated
+or if there is a problem with the translation.
+
+Maintainer: Will Deacon <will.deacon@arm.com>
+Chinese maintainer: Fu Wei <wefu@redhat.com>
+Traditional Chinese maintainer: Hu Haowen <src.res@email.cn>
+---------------------------------------------------------------------
+Documentation/arm64/tagged-pointers.rst 的中文翻譯
+
+如果想評論或更新本文的內容，請直接聯繫原文檔的維護者。如果你使用英文
+交流有困難的話，也可以向中文版維護者求助。如果本翻譯更新不及時或者翻
+譯存在問題，請聯繫中文版維護者。
+
+英文版維護者： Will Deacon <will.deacon@arm.com>
+中文版維護者： 傅煒  Fu Wei <wefu@redhat.com>
+中文版翻譯者： 傅煒  Fu Wei <wefu@redhat.com>
+中文版校譯者： 傅煒  Fu Wei <wefu@redhat.com>
+繁體中文版校譯者： 胡皓文  Hu Haowen <src.res@email.cn>
+
+以下爲正文
+---------------------------------------------------------------------
+		Linux 在 AArch64 中帶標記的虛擬地址
+		=================================
+
+作者: Will Deacon <will.deacon@arm.com>
+日期: 2013 年 06 月 12 日
+
+本文檔簡述了在 AArch64 地址轉換系統中提供的帶標記的虛擬地址及其在
+AArch64 Linux 中的潛在用途。
+
+內核提供的地址轉換表配置使通過 TTBR0 完成的虛擬地址轉換（即用戶空間
+映射），其虛擬地址的最高 8 位（63:56）會被轉換硬體所忽略。這種機制
+讓這些位可供應用程式自由使用，其注意事項如下：
+
+	(1) 內核要求所有傳遞到 EL1 的用戶空間地址帶有 0x00 標記。
+	    這意味著任何攜帶用戶空間虛擬地址的系統調用（syscall）
+	    參數 *必須* 在陷入內核前使它們的最高字節被清零。
+
+	(2) 非零標記在傳遞信號時不被保存。這意味著在應用程式中利用了
+	    標記的信號處理函數無法依賴 siginfo_t 的用戶空間虛擬
+	    地址所攜帶的包含其內部域信息的標記。此規則的一個例外是
+	    當信號是在調試觀察點的異常處理程序中產生的，此時標記的
+	    信息將被保存。
+
+	(3) 當使用帶標記的指針時需特別留心，因爲僅對兩個虛擬地址
+	    的高字節，C 編譯器很可能無法判斷它們是不同的。
+
+此構架會阻止對帶標記的 PC 指針的利用，因此在異常返回時，其高字節
+將被設置成一個爲 「55」 的擴展符。
+
diff --git a/Documentation/translations/zh_TW/cpu-freq/core.rst b/Documentation/translations/zh_TW/cpu-freq/core.rst
new file mode 100644
index 000000000000..3d890c2f2a61
--- /dev/null
+++ b/Documentation/translations/zh_TW/cpu-freq/core.rst
@@ -0,0 +1,108 @@
+.. SPDX-License-Identifier: GPL-2.0
+
+.. include:: ../disclaimer-zh_TW.rst
+
+:Original: :doc:`../../../cpu-freq/core`
+:Translator: Yanteng Si <siyanteng@loongson.cn>
+             Hu Haowen <src.res@email.cn>
+
+.. _tw_core.rst:
+
+
+====================================
+CPUFreq核心和CPUFreq通知器的通用說明
+====================================
+
+作者:
+	- Dominik Brodowski  <linux@brodo.de>
+	- David Kimdon <dwhedon@debian.org>
+	- Rafael J. Wysocki <rafael.j.wysocki@intel.com>
+	- Viresh Kumar <viresh.kumar@linaro.org>
+
+.. 目錄:
+
+   1.  CPUFreq核心和接口
+   2.  CPUFreq通知器
+   3.  含有Operating Performance Point (OPP)的CPUFreq表的生成
+
+1. CPUFreq核心和接口
+======================
+
+cpufreq核心代碼位於drivers/cpufreq/cpufreq.c中。這些cpufreq代碼爲CPUFreq架構的驅
+動程序（那些操作硬體切換頻率的代碼）以及 "通知器 "提供了一個標準化的接口。
+這些是設備驅動程序或需要了解策略變化的其它內核部分（如 ACPI 熱量管理）或所有頻率更改（除
+計時代碼外），甚至需要強制確定速度限制的通知器（如 ARM 架構上的 LCD 驅動程序）。
+此外， 內核 "常數" loops_per_jiffy會根據頻率變化而更新。
+
+cpufreq策略的引用計數由 cpufreq_cpu_get 和 cpufreq_cpu_put 來完成，以確保 cpufreq 驅
+動程序被正確地註冊到核心中，並且驅動程序在 cpufreq_put_cpu 被調用之前不會被卸載。這也保證
+了每個CPU核的cpufreq 策略在使用期間不會被釋放。
+
+2. CPUFreq 通知器
+====================
+
+CPUFreq通知器符合標準的內核通知器接口。
+關於通知器的細節請參閱 linux/include/linux/notifier.h。
+
+這裡有兩個不同的CPUfreq通知器 - 策略通知器和轉換通知器。
+
+
+2.1 CPUFreq策略通知器
+----------------------------
+
+當創建或移除策略時，這些都會被通知。
+
+階段是在通知器的第二個參數中指定的。當第一次創建策略時，階段是CPUFREQ_CREATE_POLICY，當
+策略被移除時，階段是CPUFREQ_REMOVE_POLICY。
+
+第三個參數 ``void *pointer`` 指向一個結構體cpufreq_policy，其包括min，max(新策略的下限和
+上限（單位爲kHz）)這幾個值。
+
+
+2.2 CPUFreq轉換通知器
+--------------------------------
+
+當CPUfreq驅動切換CPU核心頻率時，策略中的每個在線CPU都會收到兩次通知，這些變化沒有任何外部干
+預。
+
+第二個參數指定階段 - CPUFREQ_PRECHANGE or CPUFREQ_POSTCHANGE.
+
+第三個參數是一個包含如下值的結構體cpufreq_freqs：
+
+=====	====================
+cpu	受影響cpu的編號
+old	舊頻率
+new	新頻率
+flags	cpufreq驅動的標誌
+=====	====================
+
+3. 含有Operating Performance Point (OPP)的CPUFreq表的生成
+==================================================================
+關於OPP的細節請參閱 Documentation/power/opp.rst
+
+dev_pm_opp_init_cpufreq_table -
+	這個功能提供了一個隨時可用的轉換程序，用來將OPP層關於可用頻率的內部信息翻譯成一種容易提供給
+	cpufreq的格式。
+
+	.. Warning::
+
+		不要在中斷上下文中使用此函數。
+
+	例如::
+
+	 soc_pm_init()
+	 {
+		/* Do things */
+		r = dev_pm_opp_init_cpufreq_table(dev, &freq_table);
+		if (!r)
+			policy->freq_table = freq_table;
+		/* Do other things */
+	 }
+
+	.. note::
+
+		該函數只有在CONFIG_PM_OPP之外還啓用了CONFIG_CPU_FREQ時才可用。
+
+dev_pm_opp_free_cpufreq_table
+	釋放dev_pm_opp_init_cpufreq_table分配的表。
+
diff --git a/Documentation/translations/zh_TW/cpu-freq/cpu-drivers.rst b/Documentation/translations/zh_TW/cpu-freq/cpu-drivers.rst
new file mode 100644
index 000000000000..2bb8197cd320
--- /dev/null
+++ b/Documentation/translations/zh_TW/cpu-freq/cpu-drivers.rst
@@ -0,0 +1,256 @@
+.. SPDX-License-Identifier: GPL-2.0
+
+.. include:: ../disclaimer-zh_TW.rst
+
+:Original: :doc:`../../../cpu-freq/cpu-drivers`
+:Translator: Yanteng Si <siyanteng@loongson.cn>
+             Hu Haowen <src.res@email.cn>
+
+.. _tw_cpu-drivers.rst:
+
+
+=======================================
+如何實現一個新的CPUFreq處理器驅動程序？
+=======================================
+
+作者:
+
+
+	- Dominik Brodowski  <linux@brodo.de>
+	- Rafael J. Wysocki <rafael.j.wysocki@intel.com>
+	- Viresh Kumar <viresh.kumar@linaro.org>
+
+.. Contents
+
+   1.   怎麼做？
+   1.1  初始化
+   1.2  Per-CPU 初始化
+   1.3  驗證
+   1.4  target/target_index 或 setpolicy?
+   1.5  target/target_index
+   1.6  setpolicy
+   1.7  get_intermediate 與 target_intermediate
+   2.   頻率表助手
+
+
+
+1. 怎麼做？
+===========
+
+如此，你剛剛得到了一個全新的CPU/晶片組及其數據手冊，並希望爲這個CPU/晶片組添加cpufreq
+支持？很好，這裡有一些至關重要的提示：
+
+
+1.1 初始化
+----------
+
+首先，在__initcall_level_7 (module_init())或更靠後的函數中檢查這個內核是否
+運行在正確的CPU和正確的晶片組上。如果是，則使用cpufreq_register_driver()向
+CPUfreq核心層註冊一個cpufreq_driver結構體。
+
+結構體cpufreq_driver應該包含什麼成員?
+
+ .name - 驅動的名字。
+
+ .init - 一個指向per-policy初始化函數的指針。
+
+ .verify - 一個指向"verification"函數的指針。
+
+ .setpolicy 或 .fast_switch 或 .target 或 .target_index - 差異見
+ 下文。
+
+並且可選擇
+
+ .flags - cpufreq核的提示。
+
+ .driver_data - cpufreq驅動程序的特定數據。
+
+ .get_intermediate 和 target_intermediate - 用於在改變CPU頻率時切換到穩定
+ 的頻率。
+
+ .get - 返回CPU的當前頻率。
+
+ .bios_limit - 返回HW/BIOS對CPU的最大頻率限制值。
+
+ .exit - 一個指向per-policy清理函數的指針，該函數在cpu熱插拔過程的CPU_POST_DEAD
+ 階段被調用。
+
+ .suspend - 一個指向per-policy暫停函數的指針，該函數在關中斷且在該策略的調節器停止
+ 後被調用。
+
+ .resume - 一個指向per-policy恢復函數的指針，該函數在關中斷且在調節器再一次開始前被
+ 調用。
+
+ .ready - 一個指向per-policy準備函數的指針，該函數在策略完全初始化之後被調用。
+
+ .attr - 一個指向NULL結尾的"struct freq_attr"列表的指針，該函數允許導出值到
+ sysfs。
+
+ .boost_enabled - 如果設置，則啓用提升(boost)頻率。
+
+ .set_boost - 一個指向per-policy函數的指針，該函數用來開啓/關閉提升(boost)頻率功能。
+
+
+1.2 Per-CPU 初始化
+------------------
+
+每當一個新的CPU被註冊到設備模型中，或者在cpufreq驅動註冊自己之後，如果此CPU的cpufreq策
+略不存在，則會調用per-policy的初始化函數cpufreq_driver.init。請注意，.init()和.exit()程序
+只對策略調用一次，而不是對策略管理的每個CPU調用一次。它需要一個 ``struct cpufreq_policy
+*policy`` 作爲參數。現在該怎麼做呢？
+
+如果有必要，請在你的CPU上激活CPUfreq功能支持。
+
+然後，驅動程序必須填寫以下數值:
+
++-----------------------------------+--------------------------------------+
+|policy->cpuinfo.min_freq 和	   |					  |
+|policy->cpuinfo.max_freq	    | 該CPU支持的最低和最高頻率（kHz）     |
+|				    |                                      |
+|				    | 				           |
++-----------------------------------+--------------------------------------+
+|policy->cpuinfo.transition_latency |                                      |
+|				    | CPU在兩個頻率之間切換所需的時間，以  |
+|				    | 納秒爲單位（如適用，否則指定         |
+|				    | CPUFREQ_ETERNAL）                    |
++-----------------------------------+--------------------------------------+
+|policy->cur			    | 該CPU當前的工作頻率(如適用)          |
+|				    |                                      |
++-----------------------------------+--------------------------------------+
+|policy->min,			    |					   |
+|policy->max,			    |					   |
+|policy->policy and, if necessary,  |					   |
+|policy->governor		    | 必須包含該cpu的 「默認策略」。稍後   |
+|				    | 會用這些值調用                       |
+|				    | cpufreq_driver.verify and either     |
+|				    | cpufreq_driver.setpolicy or          |
+|				    | cpufreq_driver.target/target_index   |
+|				    | 		                           |
++-----------------------------------+--------------------------------------+
+|policy->cpus			    | 用與這個CPU一起做DVFS的(在線+離線)   |
+|				    | CPU(即與它共享時鐘/電壓軌)的掩碼更新 |
+|				    | 這個                                 |
+|				    |                                      |
++-----------------------------------+--------------------------------------+
+
+對於設置其中的一些值(cpuinfo.min[max]_freq, policy->min[max])，頻率表助手可能會有幫
+助。關於它們的更多信息，請參見第2節。
+
+
+1.3 驗證
+--------
+
+當用戶決定設置一個新的策略(由 「policy,governor,min,max組成」)時，必須對這個策略進行驗證，
+以便糾正不兼容的值。爲了驗證這些值，cpufreq_verify_within_limits(``struct cpufreq_policy
+*policy``, ``unsigned int min_freq``, ``unsigned int max_freq``)函數可能會有幫助。
+關於頻率表助手的詳細內容請參見第2節。
+
+您需要確保至少有一個有效頻率（或工作範圍）在 policy->min 和 policy->max 範圍內。如果有必
+要，先增加policy->max，只有在沒有辦法的情況下，才減少policy->min。
+
+
+1.4 target 或 target_index 或 setpolicy 或 fast_switch?
+-------------------------------------------------------
+
+大多數cpufreq驅動甚至大多數cpu頻率升降算法只允許將CPU頻率設置爲預定義的固定值。對於這些，你
+可以使用->target()，->target_index()或->fast_switch()回調。
+
+有些cpufreq功能的處理器可以自己在某些限制之間切換頻率。這些應使用->setpolicy()回調。
+
+
+1.5. target/target_index
+------------------------
+
+target_index調用有兩個參數：``struct cpufreq_policy * policy``和``unsigned int``
+索引(於列出的頻率表)。
+
+當調用這裡時，CPUfreq驅動必須設置新的頻率。實際頻率必須由freq_table[index].frequency決定。
+
+它應該總是在錯誤的情況下恢復到之前的頻率(即policy->restore_freq)，即使我們之前切換到中間頻率。
+
+已棄用
+----------
+目標調用有三個參數。``struct cpufreq_policy * policy``, unsigned int target_frequency,
+unsigned int relation.
+
+CPUfreq驅動在調用這裡時必須設置新的頻率。實際的頻率必須使用以下規則來確定。
+
+- 緊跟 "目標頻率"。
+- policy->min <= new_freq <= policy->max (這必須是有效的!!!)
+- 如果 relation==CPUFREQ_REL_L，嘗試選擇一個高於或等於 target_freq 的 new_freq。("L代表
+  最低，但不能低於")
+- 如果 relation==CPUFREQ_REL_H，嘗試選擇一個低於或等於 target_freq 的 new_freq。("H代表
+  最高，但不能高於")
+
+這裡，頻率表助手可能會幫助你--詳見第2節。
+
+1.6. fast_switch
+----------------
+
+這個函數用於從調度器的上下文進行頻率切換。並非所有的驅動都要實現它，因爲不允許在這個回調中睡眠。這
+個回調必須經過高度優化，以儘可能快地進行切換。
+
+這個函數有兩個參數： ``struct cpufreq_policy *policy`` 和 ``unsigned int target_frequency``。
+
+
+1.7 setpolicy
+-------------
+
+setpolicy調用只需要一個``struct cpufreq_policy * policy``作爲參數。需要將處理器內或晶片組內動態頻
+率切換的下限設置爲policy->min，上限設置爲policy->max，如果支持的話，當policy->policy爲
+CPUFREQ_POLICY_PERFORMANCE時選擇面向性能的設置，當CPUFREQ_POLICY_POWERSAVE時選擇面向省電的設置。
+也可以查看drivers/cpufreq/longrun.c中的參考實現。
+
+1.8 get_intermediate 和 target_intermediate
+--------------------------------------------
+
+僅適用於 target_index() 和 CPUFREQ_ASYNC_NOTIFICATION 未設置的驅動。
+
+get_intermediate應該返回一個平台想要切換到的穩定的中間頻率，target_intermediate()應該將CPU設置爲
+該頻率，然後再跳轉到'index'對應的頻率。核心會負責發送通知，驅動不必在target_intermediate()或
+target_index()中處理。
+
+在驅動程序不想因爲某個目標頻率切換到中間頻率的情況下，它們可以從get_intermediate()中返回'0'。在這種情況
+下，核心將直接調用->target_index()。
+
+注意：->target_index()應該在失敗的情況下恢復到policy->restore_freq，因爲core會爲此發送通知。
+
+
+2. 頻率表助手
+=============
+
+由於大多數cpufreq處理器只允許被設置爲幾個特定的頻率，因此，一個帶有一些函數的 「頻率表」可能會輔助處理器驅動
+程序的一些工作。這樣的 "頻率表" 由一個cpufreq_frequency_table條目構成的數組組成，"driver_data" 中包
+含了驅動程序的具體數值，"frequency" 中包含了相應的頻率，並設置了標誌。在表的最後，需要添加一個
+cpufreq_frequency_table條目，頻率設置爲CPUFREQ_TABLE_END。而如果想跳過表中的一個條目，則將頻率設置爲
+CPUFREQ_ENTRY_INVALID。這些條目不需要按照任何特定的順序排序，但如果它們是cpufreq 核心會對它們進行快速的DVFS，
+因爲搜索最佳匹配會更快。
+
+如果策略在其policy->freq_table欄位中包含一個有效的指針，cpufreq表就會被核心自動驗證。
+
+cpufreq_frequency_table_verify()保證至少有一個有效的頻率在policy->min和policy->max範圍內，並且所有其他
+標準都被滿足。這對->verify調用很有幫助。
+
+cpufreq_frequency_table_target()是對應於->target階段的頻率表助手。只要把數值傳遞給這個函數，這個函數就會返
+回包含CPU要設置的頻率的頻率表條目。
+
+以下宏可以作爲cpufreq_frequency_table的疊代器。
+
+cpufreq_for_each_entry(pos, table) - 遍歷頻率表的所有條目。
+
+cpufreq_for_each_valid_entry(pos, table) - 該函數遍歷所有條目，不包括CPUFREQ_ENTRY_INVALID頻率。
+使用參數 "pos"-一個``cpufreq_frequency_table * `` 作爲循環變量，使用參數 "table"-作爲你想疊代
+的``cpufreq_frequency_table * `` 。
+
+例如::
+
+	struct cpufreq_frequency_table *pos, *driver_freq_table;
+
+	cpufreq_for_each_entry(pos, driver_freq_table) {
+		/* Do something with pos */
+		pos->frequency = ...
+	}
+
+如果你需要在driver_freq_table中處理pos的位置，不要減去指針，因爲它的代價相當高。相反，使用宏
+cpufreq_for_each_entry_idx() 和 cpufreq_for_each_valid_entry_idx() 。
+
diff --git a/Documentation/translations/zh_TW/cpu-freq/cpufreq-stats.rst b/Documentation/translations/zh_TW/cpu-freq/cpufreq-stats.rst
new file mode 100644
index 000000000000..d80bfed50e8c
--- /dev/null
+++ b/Documentation/translations/zh_TW/cpu-freq/cpufreq-stats.rst
@@ -0,0 +1,132 @@
+.. SPDX-License-Identifier: GPL-2.0
+
+.. include:: ../disclaimer-zh_TW.rst
+
+:Original: :doc:`../../../cpu-freq/cpufreq-stats`
+:Translator: Yanteng Si <siyanteng@loongson.cn>
+             Hu Haowen <src.res@email.cn>
+
+.. _tw_cpufreq-stats.rst:
+
+
+==========================================
+sysfs CPUFreq Stats的一般說明
+==========================================
+
+用戶信息
+
+
+作者: Venkatesh Pallipadi <venkatesh.pallipadi@intel.com>
+
+.. Contents
+
+   1. 簡介
+   2. 提供的統計數據(舉例說明)
+   3. 配置cpufreq-stats
+
+
+1. 簡介
+===============
+
+cpufreq-stats是一個爲每個CPU提供CPU頻率統計的驅動。
+這些統計數據在/sysfs中以一堆只讀接口的形式提供。這個接口（在配置好後）將出現在
+/sysfs（<sysfs root>/devices/system/cpu/cpuX/cpufreq/stats/）中cpufreq下的一個單
+獨的目錄中，提供給每個CPU。
+各種統計數據將在此目錄下形成只讀文件。
+
+此驅動是獨立於任何可能運行在你所用CPU上的特定cpufreq_driver而設計的。因此，它將與所有
+cpufreq_driver一起工作。
+
+
+2. 提供的統計數據(舉例說明)
+=====================================
+
+cpufreq stats提供了以下統計數據（在下面詳細解釋）。
+
+-  time_in_state
+-  total_trans
+-  trans_table
+
+所有的統計數據將從統計驅動被載入的時間（或統計被重置的時間）開始，到某一統計數據被讀取的時間爲止。
+顯然，統計驅動不會有任何關於統計驅動載入之前的頻率轉換信息。
+
+::
+
+    <mysystem>:/sys/devices/system/cpu/cpu0/cpufreq/stats # ls -l
+    total 0
+    drwxr-xr-x  2 root root    0 May 14 16:06 .
+    drwxr-xr-x  3 root root    0 May 14 15:58 ..
+    --w-------  1 root root 4096 May 14 16:06 reset
+    -r--r--r--  1 root root 4096 May 14 16:06 time_in_state
+    -r--r--r--  1 root root 4096 May 14 16:06 total_trans
+    -r--r--r--  1 root root 4096 May 14 16:06 trans_table
+
+- **reset**
+
+只寫屬性，可用於重置統計計數器。這對於評估不同調節器下的系統行爲非常有用，且無需重啓。
+
+
+- **time_in_state**
+
+此項給出了這個CPU所支持的每個頻率所花費的時間。cat輸出的每一行都會有"<frequency>
+<time>"對，表示這個CPU在<frequency>上花費了<time>個usertime單位的時間。這裡的
+usertime單位是10mS（類似於/proc中輸出的其他時間）。
+
+::
+
+    <mysystem>:/sys/devices/system/cpu/cpu0/cpufreq/stats # cat time_in_state
+    3600000 2089
+    3400000 136
+    3200000 34
+    3000000 67
+    2800000 172488
+
+
+- **total_trans**
+
+給出了這個CPU上頻率轉換的總次數。cat的輸出將有一個單一的計數，這就是頻率轉換的總數。
+
+::
+
+    <mysystem>:/sys/devices/system/cpu/cpu0/cpufreq/stats # cat total_trans
+    20
+
+- **trans_table**
+
+這將提供所有CPU頻率轉換的細粒度信息。這裡的cat輸出是一個二維矩陣，其中一個條目<i, j>（第
+i行，第j列）代表從Freq_i到Freq_j的轉換次數。Freq_i行和Freq_j列遵循驅動最初提供給cpufreq
+核的頻率表的排序順序，因此可以排序（升序或降序）或不排序。 這裡的輸出也包含了每行每列的實際
+頻率值，以便更好地閱讀。
+
+如果轉換表大於PAGE_SIZE，讀取時將返回一個-EFBIG錯誤。
+
+::
+
+    <mysystem>:/sys/devices/system/cpu/cpu0/cpufreq/stats # cat trans_table
+    From  :    To
+	    :   3600000   3400000   3200000   3000000   2800000
+    3600000:         0         5         0         0         0
+    3400000:         4         0         2         0         0
+    3200000:         0         1         0         2         0
+    3000000:         0         0         1         0         3
+    2800000:         0         0         0         2         0
+
+3. 配置cpufreq-stats
+============================
+
+要在你的內核中配置cpufreq-stats::
+
+	Config Main Menu
+		Power management options (ACPI, APM)  --->
+			CPU Frequency scaling  --->
+				[*] CPU Frequency scaling
+				[*]   CPU frequency translation statistics
+
+
+"CPU Frequency scaling" (CONFIG_CPU_FREQ) 應該被啓用以配置cpufreq-stats。
+
+"CPU frequency translation statistics" (CONFIG_CPU_FREQ_STAT)提供了包括
+time_in_state、total_trans和trans_table的統計數據。
+
+一旦啓用了這個選項，並且你的CPU支持cpufrequency，你就可以在/sysfs中看到CPU頻率統計。
+
diff --git a/Documentation/translations/zh_TW/cpu-freq/index.rst b/Documentation/translations/zh_TW/cpu-freq/index.rst
new file mode 100644
index 000000000000..1a8e680f95ed
--- /dev/null
+++ b/Documentation/translations/zh_TW/cpu-freq/index.rst
@@ -0,0 +1,47 @@
+.. SPDX-License-Identifier: GPL-2.0
+
+.. include:: ../disclaimer-zh_TW.rst
+
+:Original: :doc:`../../../cpu-freq/index`
+:Translator: Yanteng Si <siyanteng@loongson.cn>
+             Hu Haowen <src.res@email.cn>
+
+.. _tw_index.rst:
+
+
+=======================================================
+Linux CPUFreq - Linux(TM)內核中的CPU頻率和電壓升降代碼
+=======================================================
+
+Author: Dominik Brodowski  <linux@brodo.de>
+
+      時鐘升降允許你在運行中改變CPU的時鐘速度。這是一個很好的節省電池電量的方法，因爲時
+      鐘速度越低，CPU消耗的電量越少。
+
+
+.. toctree::
+   :maxdepth: 1
+
+   core
+   cpu-drivers
+   cpufreq-stats
+
+郵件列表
+------------
+這裡有一個 CPU 頻率變化的 CVS 提交和通用列表，您可以在這裡報告bug、問題或提交補丁。要發
+布消息，請發送電子郵件到 linux-pm@vger.kernel.org。
+
+連結
+-----
+FTP檔案:
+* ftp://ftp.linux.org.uk/pub/linux/cpufreq/
+
+如何訪問CVS倉庫:
+* http://cvs.arm.linux.org.uk/
+
+CPUFreq郵件列表:
+* http://vger.kernel.org/vger-lists.html#linux-pm
+
+SA-1100的時鐘和電壓標度:
+* http://www.lartmaker.nl/projects/scaling
+
diff --git a/Documentation/translations/zh_TW/disclaimer-zh_TW.rst b/Documentation/translations/zh_TW/disclaimer-zh_TW.rst
new file mode 100644
index 000000000000..f4cf87d03dc5
--- /dev/null
+++ b/Documentation/translations/zh_TW/disclaimer-zh_TW.rst
@@ -0,0 +1,11 @@
+:orphan:
+
+.. warning::
+     此文件的目的是爲讓中文讀者更容易閱讀和理解，而不是作爲一個分支。因此，
+     如果您對此文件有任何意見或改動，請先嘗試更新原始英文文件。如果要更改或
+     修正某處翻譯文件，請將意見或補丁發送給維護者（聯繫方式見下）。
+
+.. note::
+     如果您發現本文檔與原始文件有任何不同或者有翻譯問題，請聯繫該文件的譯者，
+     或者發送電子郵件給胡皓文以獲取幫助：<src.res@email.cn>。
+
diff --git a/Documentation/translations/zh_TW/filesystems/debugfs.rst b/Documentation/translations/zh_TW/filesystems/debugfs.rst
new file mode 100644
index 000000000000..270dd94fddf1
--- /dev/null
+++ b/Documentation/translations/zh_TW/filesystems/debugfs.rst
@@ -0,0 +1,224 @@
+.. SPDX-License-Identifier: GPL-2.0
+
+.. include:: ../disclaimer-zh_TW.rst
+
+:Original: :doc:`../../../filesystems/debugfs`
+
+=======
+Debugfs
+=======
+
+譯者
+::
+
+	中文版維護者：羅楚成 Chucheng Luo <luochucheng@vivo.com>
+	中文版翻譯者：羅楚成 Chucheng Luo <luochucheng@vivo.com>
+	中文版校譯者: 羅楚成 Chucheng Luo <luochucheng@vivo.com>
+	繁體中文版校譯者: 胡皓文 Hu Haowen <src.res@email.cn>
+
+
+
+版權所有2020 羅楚成 <luochucheng@vivo.com>
+版權所有2021 胡皓文 Hu Haowen <src.res@email.cn>
+
+
+Debugfs是內核開發人員在用戶空間獲取信息的簡單方法。與/proc不同，proc只提供進程
+信息。也不像sysfs,具有嚴格的「每個文件一個值「的規則。debugfs根本沒有規則,開發
+人員可以在這裡放置他們想要的任何信息。debugfs文件系統也不能用作穩定的ABI接口。
+從理論上講，debugfs導出文件的時候沒有任何約束。但是[1]實際情況並不總是那麼
+簡單。即使是debugfs接口，也最好根據需要進行設計,並儘量保持接口不變。
+
+
+Debugfs通常使用以下命令安裝::
+
+    mount -t debugfs none /sys/kernel/debug
+
+（或等效的/etc/fstab行）。
+debugfs根目錄默認僅可由root用戶訪問。要更改對文件樹的訪問，請使用「 uid」，「 gid」
+和「 mode」掛載選項。請注意，debugfs API僅按照GPL協議導出到模塊。
+
+使用debugfs的代碼應包含<linux/debugfs.h>。然後，首先是創建至少一個目錄來保存
+一組debugfs文件::
+
+    struct dentry *debugfs_create_dir(const char *name, struct dentry *parent);
+
+如果成功，此調用將在指定的父目錄下創建一個名爲name的目錄。如果parent參數爲空，
+則會在debugfs根目錄中創建。創建目錄成功時，返回值是一個指向dentry結構體的指針。
+該dentry結構體的指針可用於在目錄中創建文件（以及最後將其清理乾淨）。ERR_PTR
+（-ERROR）返回值表明出錯。如果返回ERR_PTR（-ENODEV），則表明內核是在沒有debugfs
+支持的情況下構建的，並且下述函數都不會起作用。
+
+在debugfs目錄中創建文件的最通用方法是::
+
+    struct dentry *debugfs_create_file(const char *name, umode_t mode,
+				       struct dentry *parent, void *data,
+				       const struct file_operations *fops);
+
+在這裡，name是要創建的文件的名稱，mode描述了訪問文件應具有的權限，parent指向
+應該保存文件的目錄，data將存儲在產生的inode結構體的i_private欄位中，而fops是
+一組文件操作函數，這些函數中實現文件操作的具體行爲。至少，read（）和/或
+write（）操作應提供；其他可以根據需要包括在內。同樣的，返回值將是指向創建文件
+的dentry指針，錯誤時返回ERR_PTR（-ERROR），系統不支持debugfs時返回值爲ERR_PTR
+（-ENODEV）。創建一個初始大小的文件，可以使用以下函數代替::
+
+    struct dentry *debugfs_create_file_size(const char *name, umode_t mode,
+				struct dentry *parent, void *data,
+				const struct file_operations *fops,
+				loff_t file_size);
+
+file_size是初始文件大小。其他參數跟函數debugfs_create_file的相同。
+
+在許多情況下，沒必要自己去創建一組文件操作;對於一些簡單的情況,debugfs代碼提供
+了許多幫助函數。包含單個整數值的文件可以使用以下任何一項創建::
+
+    void debugfs_create_u8(const char *name, umode_t mode,
+			   struct dentry *parent, u8 *value);
+    void debugfs_create_u16(const char *name, umode_t mode,
+			    struct dentry *parent, u16 *value);
+    struct dentry *debugfs_create_u32(const char *name, umode_t mode,
+				      struct dentry *parent, u32 *value);
+    void debugfs_create_u64(const char *name, umode_t mode,
+			    struct dentry *parent, u64 *value);
+
+這些文件支持讀取和寫入給定值。如果某個文件不支持寫入，只需根據需要設置mode
+參數位。這些文件中的值以十進位表示；如果需要使用十六進位，可以使用以下函數
+替代::
+
+    void debugfs_create_x8(const char *name, umode_t mode,
+			   struct dentry *parent, u8 *value);
+    void debugfs_create_x16(const char *name, umode_t mode,
+			    struct dentry *parent, u16 *value);
+    void debugfs_create_x32(const char *name, umode_t mode,
+			    struct dentry *parent, u32 *value);
+    void debugfs_create_x64(const char *name, umode_t mode,
+			    struct dentry *parent, u64 *value);
+
+這些功能只有在開發人員知道導出值的大小的時候才有用。某些數據類型在不同的架構上
+有不同的寬度，這樣會使情況變得有些複雜。在這種特殊情況下可以使用以下函數::
+
+    void debugfs_create_size_t(const char *name, umode_t mode,
+			       struct dentry *parent, size_t *value);
+
+不出所料，此函數將創建一個debugfs文件來表示類型爲size_t的變量。
+
+同樣地，也有導出無符號長整型變量的函數，分別以十進位和十六進位表示如下::
+
+    struct dentry *debugfs_create_ulong(const char *name, umode_t mode,
+					struct dentry *parent,
+					unsigned long *value);
+    void debugfs_create_xul(const char *name, umode_t mode,
+			    struct dentry *parent, unsigned long *value);
+
+布爾值可以通過以下方式放置在debugfs中::
+
+    struct dentry *debugfs_create_bool(const char *name, umode_t mode,
+				       struct dentry *parent, bool *value);
+
+
+讀取結果文件將產生Y（對於非零值）或N，後跟換行符寫入的時候，它只接受大寫或小寫
+值或1或0。任何其他輸入將被忽略。
+
+同樣，atomic_t類型的值也可以放置在debugfs中::
+
+    void debugfs_create_atomic_t(const char *name, umode_t mode,
+				 struct dentry *parent, atomic_t *value)
+
+讀取此文件將獲得atomic_t值，寫入此文件將設置atomic_t值。
+
+另一個選擇是通過以下結構體和函數導出一個任意二進位數據塊::
+
+    struct debugfs_blob_wrapper {
+	void *data;
+	unsigned long size;
+    };
+
+    struct dentry *debugfs_create_blob(const char *name, umode_t mode,
+				       struct dentry *parent,
+				       struct debugfs_blob_wrapper *blob);
+
+讀取此文件將返回由指針指向debugfs_blob_wrapper結構體的數據。一些驅動使用「blobs」
+作爲一種返回幾行（靜態）格式化文本的簡單方法。這個函數可用於導出二進位信息，但
+似乎在主線中沒有任何代碼這樣做。請注意，使用debugfs_create_blob（）命令創建的
+所有文件是只讀的。
+
+如果您要轉儲一個寄存器塊（在開發過程中經常會這麼做，但是這樣的調試代碼很少上傳
+到主線中。Debugfs提供兩個函數：一個用於創建僅寄存器文件，另一個把一個寄存器塊
+插入一個順序文件中::
+
+    struct debugfs_reg32 {
+	char *name;
+	unsigned long offset;
+    };
+
+    struct debugfs_regset32 {
+	struct debugfs_reg32 *regs;
+	int nregs;
+	void __iomem *base;
+    };
+
+    struct dentry *debugfs_create_regset32(const char *name, umode_t mode,
+				     struct dentry *parent,
+				     struct debugfs_regset32 *regset);
+
+    void debugfs_print_regs32(struct seq_file *s, struct debugfs_reg32 *regs,
+			 int nregs, void __iomem *base, char *prefix);
+
+「base」參數可能爲0，但您可能需要使用__stringify構建reg32數組，實際上有許多寄存器
+名稱（宏）是寄存器塊在基址上的字節偏移量。
+
+如果要在debugfs中轉儲u32數組，可以使用以下函數創建文件::
+
+     void debugfs_create_u32_array(const char *name, umode_t mode,
+			struct dentry *parent,
+			u32 *array, u32 elements);
+
+「array」參數提供數據，而「elements」參數爲數組中元素的數量。注意：數組創建後，數組
+大小無法更改。
+
+有一個函數來創建與設備相關的seq_file::
+
+   struct dentry *debugfs_create_devm_seqfile(struct device *dev,
+				const char *name,
+				struct dentry *parent,
+				int (*read_fn)(struct seq_file *s,
+					void *data));
+
+「dev」參數是與此debugfs文件相關的設備，並且「read_fn」是一個函數指針，這個函數在
+列印seq_file內容的時候被回調。
+
+還有一些其他的面向目錄的函數::
+
+    struct dentry *debugfs_rename(struct dentry *old_dir,
+		                  struct dentry *old_dentry,
+		                  struct dentry *new_dir,
+				  const char *new_name);
+
+    struct dentry *debugfs_create_symlink(const char *name,
+                                          struct dentry *parent,
+                                          const char *target);
+
+調用debugfs_rename()將爲現有的debugfs文件重命名，可能同時切換目錄。 new_name
+函數調用之前不能存在；返回值爲old_dentry，其中包含更新的信息。可以使用
+debugfs_create_symlink（）創建符號連結。
+
+所有debugfs用戶必須考慮的一件事是：
+
+debugfs不會自動清除在其中創建的任何目錄。如果一個模塊在不顯式刪除debugfs目錄的
+情況下卸載模塊，結果將會遺留很多野指針，從而導致系統不穩定。因此，所有debugfs
+用戶-至少是那些可以作爲模塊構建的用戶-必須做模塊卸載的時候準備刪除在此創建的
+所有文件和目錄。一份文件可以通過以下方式刪除::
+
+    void debugfs_remove(struct dentry *dentry);
+
+dentry值可以爲NULL或錯誤值，在這種情況下，不會有任何文件被刪除。
+
+很久以前，內核開發者使用debugfs時需要記錄他們創建的每個dentry指針，以便最後所有
+文件都可以被清理掉。但是，現在debugfs用戶能調用以下函數遞歸清除之前創建的文件::
+
+    void debugfs_remove_recursive(struct dentry *dentry);
+
+如果將對應頂層目錄的dentry傳遞給以上函數，則該目錄下的整個層次結構將會被刪除。
+
+注釋：
+[1] http://lwn.net/Articles/309298/
+
diff --git a/Documentation/translations/zh_TW/filesystems/index.rst b/Documentation/translations/zh_TW/filesystems/index.rst
new file mode 100644
index 000000000000..4e5dde0dca3c
--- /dev/null
+++ b/Documentation/translations/zh_TW/filesystems/index.rst
@@ -0,0 +1,31 @@
+.. SPDX-License-Identifier: GPL-2.0
+
+.. include:: ../disclaimer-zh_TW.rst
+
+:Original: :ref:`Documentation/filesystems/index.rst <filesystems_index>`
+:Translator: Wang Wenhu <wenhu.wang@vivo.com>
+             Hu Haowen <src.res@email.cn>
+
+.. _tw_filesystems_index:
+
+========================
+Linux Kernel中的文件系統
+========================
+
+這份正在開發的手冊或許在未來某個輝煌的日子裡以易懂的形式將Linux虛擬\
+文件系統（VFS）層以及基於其上的各種文件系統如何工作呈現給大家。當前\
+可以看到下面的內容。
+
+文件系統
+========
+
+文件系統實現文檔。
+
+.. toctree::
+   :maxdepth: 2
+
+   virtiofs
+   debugfs
+   tmpfs
+
+
diff --git a/Documentation/translations/zh_TW/filesystems/sysfs.txt b/Documentation/translations/zh_TW/filesystems/sysfs.txt
new file mode 100644
index 000000000000..acd677f19d4f
--- /dev/null
+++ b/Documentation/translations/zh_TW/filesystems/sysfs.txt
@@ -0,0 +1,377 @@
+SPDX-License-Identifier: GPL-2.0
+
+Chinese translated version of Documentation/filesystems/sysfs.rst
+
+If you have any comment or update to the content, please contact the
+original document maintainer directly.  However, if you have a problem
+communicating in English you can also ask the Chinese maintainer for
+help.  Contact the Chinese maintainer if this translation is outdated
+or if there is a problem with the translation.
+
+Maintainer: Patrick Mochel	<mochel@osdl.org>
+		Mike Murphy <mamurph@cs.clemson.edu>
+Chinese maintainer: Fu Wei <tekkamanninja@gmail.com>
+---------------------------------------------------------------------
+Documentation/filesystems/sysfs.rst 的中文翻譯
+
+如果想評論或更新本文的內容，請直接聯繫原文檔的維護者。如果你使用英文
+交流有困難的話，也可以向中文版維護者求助。如果本翻譯更新不及時或者翻
+譯存在問題，請聯繫中文版維護者。
+英文版維護者： Patrick Mochel	<mochel@osdl.org>
+		Mike Murphy <mamurph@cs.clemson.edu>
+中文版維護者： 傅煒 Fu Wei <tekkamanninja@gmail.com>
+中文版翻譯者： 傅煒 Fu Wei <tekkamanninja@gmail.com>
+中文版校譯者： 傅煒 Fu Wei <tekkamanninja@gmail.com>
+繁體中文版校譯者：胡皓文 Hu Haowen <src.res@email.cn>
+
+
+以下爲正文
+---------------------------------------------------------------------
+sysfs - 用於導出內核對象(kobject)的文件系統
+
+Patrick Mochel	<mochel@osdl.org>
+Mike Murphy <mamurph@cs.clemson.edu>
+
+修訂:    16 August 2011
+原始版本:   10 January 2003
+
+
+sysfs 簡介:
+~~~~~~~~~~
+
+sysfs 是一個最初基於 ramfs 且位於內存的文件系統。它提供導出內核
+數據結構及其屬性，以及它們之間的關聯到用戶空間的方法。
+
+sysfs 始終與 kobject 的底層結構緊密相關。請閱讀
+Documentation/core-api/kobject.rst 文檔以獲得更多關於 kobject 接口的
+信息。
+
+
+使用 sysfs
+~~~~~~~~~~~
+
+只要內核配置中定義了 CONFIG_SYSFS ，sysfs 總是被編譯進內核。你可
+通過以下命令掛載它:
+
+    mount -t sysfs sysfs /sys
+
+
+創建目錄
+~~~~~~~~
+
+任何 kobject 在系統中註冊，就會有一個目錄在 sysfs 中被創建。這個
+目錄是作爲該 kobject 的父對象所在目錄的子目錄創建的，以準確地傳遞
+內核的對象層次到用戶空間。sysfs 中的頂層目錄代表著內核對象層次的
+共同祖先；例如：某些對象屬於某個子系統。
+
+Sysfs 在與其目錄關聯的 kernfs_node 對象中內部保存一個指向實現
+目錄的 kobject 的指針。以前，這個 kobject 指針被 sysfs 直接用於
+kobject 文件打開和關閉的引用計數。而現在的 sysfs 實現中，kobject
+引用計數只能通過 sysfs_schedule_callback() 函數直接修改。
+
+
+屬性
+~~~~
+
+kobject 的屬性可在文件系統中以普通文件的形式導出。Sysfs 爲屬性定義
+了面向文件 I/O 操作的方法，以提供對內核屬性的讀寫。
+
+
+屬性應爲 ASCII 碼文本文件。以一個文件只存儲一個屬性值爲宜。但一個
+文件只包含一個屬性值可能影響效率，所以一個包含相同數據類型的屬性值
+數組也被廣泛地接受。
+
+混合類型、表達多行數據以及一些怪異的數據格式會遭到強烈反對。這樣做是
+很丟臉的,而且其代碼會在未通知作者的情況下被重寫。
+
+
+一個簡單的屬性結構定義如下:
+
+struct attribute {
+        char                    * name;
+        struct module		*owner;
+        umode_t                 mode;
+};
+
+
+int sysfs_create_file(struct kobject * kobj, const struct attribute * attr);
+void sysfs_remove_file(struct kobject * kobj, const struct attribute * attr);
+
+
+一個單獨的屬性結構並不包含讀寫其屬性值的方法。子系統最好爲增刪特定
+對象類型的屬性定義自己的屬性結構體和封裝函數。
+
+例如:驅動程序模型定義的 device_attribute 結構體如下:
+
+struct device_attribute {
+	struct attribute	attr;
+	ssize_t (*show)(struct device *dev, struct device_attribute *attr,
+			char *buf);
+	ssize_t (*store)(struct device *dev, struct device_attribute *attr,
+			 const char *buf, size_t count);
+};
+
+int device_create_file(struct device *, const struct device_attribute *);
+void device_remove_file(struct device *, const struct device_attribute *);
+
+爲了定義設備屬性，同時定義了一下輔助宏:
+
+#define DEVICE_ATTR(_name, _mode, _show, _store) \
+struct device_attribute dev_attr_##_name = __ATTR(_name, _mode, _show, _store)
+
+例如:聲明
+
+static DEVICE_ATTR(foo, S_IWUSR | S_IRUGO, show_foo, store_foo);
+
+等同於如下代碼：
+
+static struct device_attribute dev_attr_foo = {
+       .attr	= {
+		.name = "foo",
+		.mode = S_IWUSR | S_IRUGO,
+		.show = show_foo,
+		.store = store_foo,
+	},
+};
+
+
+子系統特有的回調函數
+~~~~~~~~~~~~~~~~~~~
+
+當一個子系統定義一個新的屬性類型時，必須實現一系列的 sysfs 操作，
+以幫助讀寫調用實現屬性所有者的顯示和儲存方法。
+
+struct sysfs_ops {
+        ssize_t (*show)(struct kobject *, struct attribute *, char *);
+        ssize_t (*store)(struct kobject *, struct attribute *, const char *, size_t);
+};
+
+[子系統應已經定義了一個 struct kobj_type 結構體作爲這個類型的
+描述符，並在此保存 sysfs_ops 的指針。更多的信息參見 kobject 的
+文檔]
+
+sysfs 會爲這個類型調用適當的方法。當一個文件被讀寫時，這個方法會
+將一般的kobject 和 attribute 結構體指針轉換爲適當的指針類型後
+調用相關聯的函數。
+
+
+示例:
+
+#define to_dev_attr(_attr) container_of(_attr, struct device_attribute, attr)
+
+static ssize_t dev_attr_show(struct kobject *kobj, struct attribute *attr,
+                             char *buf)
+{
+        struct device_attribute *dev_attr = to_dev_attr(attr);
+        struct device *dev = kobj_to_dev(kobj);
+        ssize_t ret = -EIO;
+
+        if (dev_attr->show)
+                ret = dev_attr->show(dev, dev_attr, buf);
+        if (ret >= (ssize_t)PAGE_SIZE) {
+                printk("dev_attr_show: %pS returned bad count\n",
+                                dev_attr->show);
+        }
+        return ret;
+}
+
+
+
+讀寫屬性數據
+~~~~~~~~~~~~
+
+在聲明屬性時，必須指定 show() 或 store() 方法，以實現屬性的
+讀或寫。這些方法的類型應該和以下的設備屬性定義一樣簡單。
+
+ssize_t (*show)(struct device *dev, struct device_attribute *attr, char *buf);
+ssize_t (*store)(struct device *dev, struct device_attribute *attr,
+                 const char *buf, size_t count);
+
+也就是說,他們應只以一個處理對象、一個屬性和一個緩衝指針作爲參數。
+
+sysfs 會分配一個大小爲 (PAGE_SIZE) 的緩衝區並傳遞給這個方法。
+Sysfs 將會爲每次讀寫操作調用一次這個方法。這使得這些方法在執行時
+會出現以下的行爲:
+
+- 在讀方面（read(2)），show() 方法應該填充整個緩衝區。回想屬性
+  應只導出了一個屬性值或是一個同類型屬性值的數組，所以這個代價將
+  不會不太高。
+
+  這使得用戶空間可以局部地讀和任意的向前搜索整個文件。如果用戶空間
+  向後搜索到零或使用『0』偏移執行一個pread(2)操作，show()方法將
+  再次被調用，以重新填充緩存。
+
+- 在寫方面（write(2)），sysfs 希望在第一次寫操作時得到整個緩衝區。
+  之後 Sysfs 傳遞整個緩衝區給 store() 方法。
+
+  當要寫 sysfs 文件時，用戶空間進程應首先讀取整個文件，修該想要
+  改變的值，然後回寫整個緩衝區。
+
+  在讀寫屬性值時，屬性方法的執行應操作相同的緩衝區。
+
+註記:
+
+- 寫操作導致的 show() 方法重載，會忽略當前文件位置。
+
+- 緩衝區應總是 PAGE_SIZE 大小。對於i386，這個值爲4096。
+
+- show() 方法應該返回寫入緩衝區的字節數，也就是 scnprintf()的
+  返回值。
+
+- show() 方法在將格式化返回值返回用戶空間的時候，禁止使用snprintf()。
+  如果可以保證不會發生緩衝區溢出，可以使用sprintf()，否則必須使用
+  scnprintf()。
+
+- store() 應返回緩衝區的已用字節數。如果整個緩存都已填滿，只需返回
+  count 參數。
+
+- show() 或 store() 可以返回錯誤值。當得到一個非法值，必須返回一個
+  錯誤值。
+
+- 一個傳遞給方法的對象將會通過 sysfs 調用對象內嵌的引用計數固定在
+  內存中。儘管如此，對象代表的物理實體(如設備)可能已不存在。如有必要，
+  應該實現一個檢測機制。
+
+一個簡單的(未經實驗證實的)設備屬性實現如下：
+
+static ssize_t show_name(struct device *dev, struct device_attribute *attr,
+                         char *buf)
+{
+	return scnprintf(buf, PAGE_SIZE, "%s\n", dev->name);
+}
+
+static ssize_t store_name(struct device *dev, struct device_attribute *attr,
+                          const char *buf, size_t count)
+{
+        snprintf(dev->name, sizeof(dev->name), "%.*s",
+                 (int)min(count, sizeof(dev->name) - 1), buf);
+	return count;
+}
+
+static DEVICE_ATTR(name, S_IRUGO, show_name, store_name);
+
+
+（注意：真正的實現不允許用戶空間設置設備名。）
+
+頂層目錄布局
+~~~~~~~~~~~~
+
+sysfs 目錄的安排顯示了內核數據結構之間的關係。
+
+頂層 sysfs 目錄如下:
+
+block/
+bus/
+class/
+dev/
+devices/
+firmware/
+net/
+fs/
+
+devices/ 包含了一個設備樹的文件系統表示。他直接映射了內部的內核
+設備樹，反映了設備的層次結構。
+
+bus/ 包含了內核中各種總線類型的平面目錄布局。每個總線目錄包含兩個
+子目錄:
+
+	devices/
+	drivers/
+
+devices/ 包含了系統中出現的每個設備的符號連結，他們指向 root/ 下的
+設備目錄。
+
+drivers/ 包含了每個已爲特定總線上的設備而掛載的驅動程序的目錄(這裡
+假定驅動沒有跨越多個總線類型)。
+
+fs/ 包含了一個爲文件系統設立的目錄。現在每個想要導出屬性的文件系統必須
+在 fs/ 下創建自己的層次結構(參見Documentation/filesystems/fuse.rst)。
+
+dev/ 包含兩個子目錄： char/ 和 block/。在這兩個子目錄中，有以
+<major>:<minor> 格式命名的符號連結。這些符號連結指向 sysfs 目錄
+中相應的設備。/sys/dev 提供一個通過一個 stat(2) 操作結果，查找
+設備 sysfs 接口快捷的方法。
+
+更多有關 driver-model 的特性信息可以在 Documentation/driver-api/driver-model/
+中找到。
+
+
+TODO: 完成這一節。
+
+
+當前接口
+~~~~~~~~
+
+以下的接口層普遍存在於當前的sysfs中:
+
+- 設備 (include/linux/device.h)
+----------------------------------
+結構體:
+
+struct device_attribute {
+	struct attribute	attr;
+	ssize_t (*show)(struct device *dev, struct device_attribute *attr,
+			char *buf);
+	ssize_t (*store)(struct device *dev, struct device_attribute *attr,
+			 const char *buf, size_t count);
+};
+
+聲明:
+
+DEVICE_ATTR(_name, _mode, _show, _store);
+
+增/刪屬性:
+
+int device_create_file(struct device *dev, const struct device_attribute * attr);
+void device_remove_file(struct device *dev, const struct device_attribute * attr);
+
+
+- 總線驅動程序 (include/linux/device.h)
+--------------------------------------
+結構體:
+
+struct bus_attribute {
+        struct attribute        attr;
+        ssize_t (*show)(struct bus_type *, char * buf);
+        ssize_t (*store)(struct bus_type *, const char * buf, size_t count);
+};
+
+聲明:
+
+BUS_ATTR(_name, _mode, _show, _store)
+
+增/刪屬性:
+
+int bus_create_file(struct bus_type *, struct bus_attribute *);
+void bus_remove_file(struct bus_type *, struct bus_attribute *);
+
+
+- 設備驅動程序 (include/linux/device.h)
+-----------------------------------------
+
+結構體:
+
+struct driver_attribute {
+        struct attribute        attr;
+        ssize_t (*show)(struct device_driver *, char * buf);
+        ssize_t (*store)(struct device_driver *, const char * buf,
+                         size_t count);
+};
+
+聲明:
+
+DRIVER_ATTR(_name, _mode, _show, _store)
+
+增/刪屬性：
+
+int driver_create_file(struct device_driver *, const struct driver_attribute *);
+void driver_remove_file(struct device_driver *, const struct driver_attribute *);
+
+
+文檔
+~~~~
+
+sysfs 目錄結構以及其中包含的屬性定義了一個內核與用戶空間之間的 ABI。
+對於任何 ABI，其自身的穩定和適當的文檔是非常重要的。所有新的 sysfs
+屬性必須在 Documentation/ABI 中有文檔。詳見 Documentation/ABI/README。
+
diff --git a/Documentation/translations/zh_TW/filesystems/tmpfs.rst b/Documentation/translations/zh_TW/filesystems/tmpfs.rst
new file mode 100644
index 000000000000..8d753a34785b
--- /dev/null
+++ b/Documentation/translations/zh_TW/filesystems/tmpfs.rst
@@ -0,0 +1,148 @@
+.. SPDX-License-Identifier: GPL-2.0
+
+.. include:: ../disclaimer-zh_TW.rst
+
+:Original: Documentation/filesystems/tmpfs.rst
+
+Translated by Wang Qing <wangqing@vivo.com>
+and Hu Haowen <src.res@email.cn>
+
+=====
+Tmpfs
+=====
+
+Tmpfs是一個將所有文件都保存在虛擬內存中的文件系統。
+
+tmpfs中的所有內容都是臨時的，也就是說沒有任何文件會在硬碟上創建。
+如果卸載tmpfs實例，所有保存在其中的文件都會丟失。
+
+tmpfs將所有文件保存在內核緩存中，隨著文件內容增長或縮小可以將不需要的
+頁面swap出去。它具有最大限制，可以通過「mount -o remount ...」調整。
+
+和ramfs（創建tmpfs的模板）相比，tmpfs包含交換和限制檢查。和tmpfs相似的另
+一個東西是RAM磁碟（/dev/ram*），可以在物理RAM中模擬固定大小的硬碟，並在
+此之上創建一個普通的文件系統。Ramdisks無法swap，因此無法調整它們的大小。
+
+由於tmpfs完全保存於頁面緩存和swap中，因此所有tmpfs頁面將在/proc/meminfo
+中顯示爲「Shmem」，而在free(1)中顯示爲「Shared」。請注意，這些計數還包括
+共享內存(shmem，請參閱ipcs(1))。獲得計數的最可靠方法是使用df(1)和du(1)。
+
+tmpfs具有以下用途：
+
+1) 內核總有一個無法看到的內部掛載，用於共享匿名映射和SYSV共享內存。
+
+   掛載不依賴於CONFIG_TMPFS。如果CONFIG_TMPFS未設置，tmpfs對用戶不可見。
+   但是內部機制始終存在。
+
+2) glibc 2.2及更高版本期望將tmpfs掛載在/dev/shm上以用於POSIX共享內存
+   (shm_open，shm_unlink)。添加內容到/etc/fstab應注意如下：
+
+	tmpfs	/dev/shm	tmpfs	defaults	0 0
+
+   使用時需要記住創建掛載tmpfs的目錄。
+
+   SYSV共享內存無需掛載，內部已默認支持。(在2.3內核版本中，必須掛載
+   tmpfs的前身(shm fs)才能使用SYSV共享內存)
+
+3) 很多人（包括我）都覺的在/tmp和/var/tmp上掛載非常方便，並具有較大的
+   swap分區。目前循環掛載tmpfs可以正常工作，所以大多數發布都應當可以
+   使用mkinitrd通過/tmp訪問/tmp。
+
+4) 也許還有更多我不知道的地方:-)
+
+
+tmpfs有三個用於調整大小的掛載選項：
+
+=========  ===========================================================
+size       tmpfs實例分配的字節數限制。默認值是不swap時物理RAM的一半。
+           如果tmpfs實例過大，機器將死鎖，因爲OOM處理將無法釋放該內存。
+nr_blocks  與size相同，但以PAGE_SIZE爲單位。
+nr_inodes  tmpfs實例的最大inode個數。默認值是物理內存頁數的一半，或者
+           (有高端內存的機器)低端內存RAM的頁數，二者以較低者為準。
+=========  ===========================================================
+
+這些參數接受後綴k，m或g表示千，兆和千兆字節，可以在remount時更改。
+size參數也接受後綴％用來限制tmpfs實例占用物理RAM的百分比：
+未指定size或nr_blocks時，默認值爲size=50％
+
+如果nr_blocks=0（或size=0），block個數將不受限制；如果nr_inodes=0，
+inode個數將不受限制。這樣掛載通常是不明智的，因爲它允許任何具有寫權限的
+用戶通過訪問tmpfs耗盡機器上的所有內存；但同時這樣做也會增強在多個CPU的
+場景下的訪問。
+
+tmpfs具有爲所有文件設置NUMA內存分配策略掛載選項(如果啓用了CONFIG_NUMA),
+可以通過「mount -o remount ...」調整
+
+======================== =========================
+mpol=default             採用進程分配策略
+                         (請參閱 set_mempolicy(2))
+mpol=prefer:Node         傾向從給定的節點分配
+mpol=bind:NodeList       只允許從指定的鍊表分配
+mpol=interleave          傾向於依次從每個節點分配
+mpol=interleave:NodeList 依次從每個節點分配
+mpol=local               優先本地節點分配內存
+======================== =========================
+
+NodeList格式是以逗號分隔的十進位數字表示大小和範圍，最大和最小範圍是用-
+分隔符的十進位數來表示。例如，mpol=bind0-3,5,7,9-15
+
+帶有有效NodeList的內存策略將按指定格式保存，在創建文件時使用。當任務在該
+文件系統上創建文件時，會使用到掛載時的內存策略NodeList選項，如果設置的話，
+由調用任務的cpuset[請參見Documentation/admin-guide/cgroup-v1/cpusets.rst]
+以及下面列出的可選標誌約束。如果NodeLists爲設置爲空集，則文件的內存策略將
+恢復爲「默認」策略。
+
+NUMA內存分配策略有可選標誌，可以用於模式結合。在掛載tmpfs時指定這些可選
+標誌可以在NodeList之前生效。
+Documentation/admin-guide/mm/numa_memory_policy.rst列出所有可用的內存
+分配策略模式標誌及其對內存策略。
+
+::
+
+	=static		相當於	MPOL_F_STATIC_NODES
+	=relative	相當於	MPOL_F_RELATIVE_NODES
+
+例如，mpol=bind=staticNodeList相當於MPOL_BIND|MPOL_F_STATIC_NODES的分配策略
+
+請注意，如果內核不支持NUMA，那麼使用mpol選項掛載tmpfs將會失敗；nodelist指定不
+在線的節點也會失敗。如果您的系統依賴於此，但內核會運行不帶NUMA功能(也許是安全
+revocery內核)，或者具有較少的節點在線，建議從自動模式中省略mpol選項掛載選項。
+可以在以後通過「mount -o remount,mpol=Policy:NodeList MountPoint」添加到掛載點。
+
+要指定初始根目錄，可以使用如下掛載選項：
+
+====	====================
+模式	權限用八進位數字表示
+uid	用戶ID
+gid	組ID
+====	====================
+
+這些選項對remount沒有任何影響。您可以通過chmod(1),chown(1)和chgrp(1)的更改
+已經掛載的參數。
+
+tmpfs具有選擇32位還是64位inode的掛載選項：
+
+=======   =============
+inode64   使用64位inode
+inode32   使用32位inode
+=======   =============
+
+在32位內核上，默認是inode32，掛載時指定inode64會被拒絕。
+在64位內核上，默認配置是CONFIG_TMPFS_INODE64。inode64避免了單個設備上可能有多個
+具有相同inode編號的文件；比如32位應用程式使用glibc如果長期訪問tmpfs，一旦達到33
+位inode編號，就有EOVERFLOW失敗的危險，無法打開大於2GiB的文件，並返回EINVAL。
+
+所以'mount -t tmpfs -o size=10G,nr_inodes=10k,mode=700 tmpfs /mytmpfs'將在
+/mytmpfs上掛載tmpfs實例，分配只能由root用戶訪問的10GB RAM/SWAP，可以有10240個
+inode的實例。
+
+
+:作者:
+   Christoph Rohland <cr@sap.com>, 1.12.01
+:更新:
+   Hugh Dickins, 4 June 2007
+:更新:
+   KOSAKI Motohiro, 16 Mar 2010
+:更新:
+   Chris Down, 13 July 2020
+
diff --git a/Documentation/translations/zh_TW/filesystems/virtiofs.rst b/Documentation/translations/zh_TW/filesystems/virtiofs.rst
new file mode 100644
index 000000000000..2b05e84375dd
--- /dev/null
+++ b/Documentation/translations/zh_TW/filesystems/virtiofs.rst
@@ -0,0 +1,61 @@
+.. SPDX-License-Identifier: GPL-2.0
+
+.. include:: ../disclaimer-zh_TW.rst
+
+:Original: :ref:`Documentation/filesystems/virtiofs.rst <virtiofs_index>`
+
+譯者
+::
+
+	中文版維護者： 王文虎 Wang Wenhu <wenhu.wang@vivo.com>
+	中文版翻譯者： 王文虎 Wang Wenhu <wenhu.wang@vivo.com>
+	中文版校譯者： 王文虎 Wang Wenhu <wenhu.wang@vivo.com>
+	中文版校譯者： 王文虎 Wang Wenhu <wenhu.wang@vivo.com>
+	繁體中文版校譯者：胡皓文 Hu Haowen <src.res@email.cn>
+
+===========================================
+virtiofs: virtio-fs 主機<->客機共享文件系統
+===========================================
+
+- Copyright (C) 2020 Vivo Communication Technology Co. Ltd.
+
+介紹
+====
+Linux的virtiofs文件系統實現了一個半虛擬化VIRTIO類型「virtio-fs」設備的驅動，通過該\
+類型設備實現客機<->主機文件系統共享。它允許客機掛載一個已經導出到主機的目錄。
+
+客機通常需要訪問主機或者遠程系統上的文件。使用場景包括：在新客機安裝時讓文件對其\
+可見；從主機上的根文件系統啓動；對無狀態或臨時客機提供持久存儲和在客機之間共享目錄。
+
+儘管在某些任務可能通過使用已有的網絡文件系統完成，但是卻需要非常難以自動化的配置\
+步驟，且將存儲網絡暴露給客機。而virtio-fs設備通過提供不經過網絡的文件系統訪問文件\
+的設計方式解決了這些問題。
+
+另外，virto-fs設備發揮了主客機共存的優點提高了性能，並且提供了網絡文件系統所不具備
+的一些語義功能。
+
+用法
+====
+以``myfs``標籤將文件系統掛載到``/mnt``:
+
+.. code-block:: sh
+
+  guest# mount -t virtiofs myfs /mnt
+
+請查閱 https://virtio-fs.gitlab.io/ 了解配置QEMU和virtiofsd守護程序的詳細信息。
+
+內幕
+====
+由於virtio-fs設備將FUSE協議用於文件系統請求，因此Linux的virtiofs文件系統與FUSE文\
+件系統客戶端緊密集成在一起。客機充當FUSE客戶端而主機充當FUSE伺服器，內核與用戶空\
+間之間的/dev/fuse接口由virtio-fs設備接口代替。
+
+FUSE請求被置於虛擬隊列中由主機處理。主機填充緩衝區中的響應部分，而客機處理請求的完成部分。
+
+將/dev/fuse映射到虛擬隊列需要解決/dev/fuse和虛擬隊列之間語義上的差異。每次讀取\
+/dev/fuse設備時，FUSE客戶端都可以選擇要傳輸的請求，從而可以使某些請求優先於其他\
+請求。虛擬隊列有其隊列語義，無法更改已入隊請求的順序。在虛擬隊列已滿的情況下尤
+其關鍵，因爲此時不可能加入高優先級的請求。爲了解決此差異，virtio-fs設備採用「hiprio」\
+（高優先級）虛擬隊列，專門用於有別於普通請求的高優先級請求。
+
+
diff --git a/Documentation/translations/zh_TW/gpio.txt b/Documentation/translations/zh_TW/gpio.txt
new file mode 100644
index 000000000000..e3c076dd75a5
--- /dev/null
+++ b/Documentation/translations/zh_TW/gpio.txt
@@ -0,0 +1,651 @@
+Chinese translated version of Documentation/admin-guide/gpio
+
+If you have any comment or update to the content, please contact the
+original document maintainer directly.  However, if you have a problem
+communicating in English you can also ask the Chinese maintainer for
+help.  Contact the Chinese maintainer if this translation is outdated
+or if there is a problem with the translation.
+
+Maintainer: Grant Likely <grant.likely@secretlab.ca>
+		Linus Walleij <linus.walleij@linaro.org>
+Traditional Chinese maintainer: Hu Haowen <src.res@email.cn>
+---------------------------------------------------------------------
+Documentation/admin-guide/gpio 的繁體中文翻譯
+
+如果想評論或更新本文的內容，請直接聯繫原文檔的維護者。如果你使用英文
+交流有困難的話，也可以向繁體中文版維護者求助。如果本翻譯更新不及時或
+者翻譯存在問題，請聯繫繁體中文版維護者。
+
+英文版維護者： Grant Likely <grant.likely@secretlab.ca>
+		Linus Walleij <linus.walleij@linaro.org>
+繁體中文版維護者： 胡皓文 Hu Haowen <src.res@email.cn>
+繁體中文版翻譯者： 胡皓文 Hu Haowen <src.res@email.cn>
+繁體中文版校譯者： 胡皓文 Hu Haowen <src.res@email.cn>
+
+以下爲正文
+---------------------------------------------------------------------
+GPIO 接口
+
+本文檔提供了一個在Linux下訪問GPIO的公約概述。
+
+這些函數以 gpio_* 作爲前綴。其他的函數不允許使用這樣的前綴或相關的
+__gpio_* 前綴。
+
+
+什麼是GPIO?
+==========
+"通用輸入/輸出口"(GPIO)是一個靈活的由軟體控制的數位訊號。他們可
+由多種晶片提供,且對於從事嵌入式和定製硬體的 Linux 開發者來說是
+比較熟悉。每個GPIO 都代表一個連接到特定引腳或球柵陣列(BGA)封裝中
+「球珠」的一個位。電路板原理圖顯示了 GPIO 與外部硬體的連接關係。
+驅動可以編寫成通用代碼，以使板級啓動代碼可傳遞引腳配置數據給驅動。
+
+片上系統 (SOC) 處理器對 GPIO 有很大的依賴。在某些情況下,每個
+非專用引腳都可配置爲 GPIO,且大多數晶片都最少有一些 GPIO。
+可編程邏輯器件(類似 FPGA) 可以方便地提供 GPIO。像電源管理和
+音頻編解碼器這樣的多功能晶片經常留有一些這樣的引腳來幫助那些引腳
+匱乏的 SOC。同時還有通過 I2C 或 SPI 串行總線連接的「GPIO擴展器」
+晶片。大多數 PC 的南橋有一些擁有 GPIO 能力的引腳 (只有BIOS
+固件才知道如何使用他們)。
+
+GPIO 的實際功能因系統而異。通常用法有:
+
+  - 輸出值可寫 (高電平=1，低電平=0)。一些晶片也有如何驅動這些值的選項,
+    例如只允許輸出一個值、支持「線與」及其他取值類似的模式(值得注意的是
+    「開漏」信號)
+
+  - 輸入值可讀(1、0)。一些晶片支持引腳在配置爲「輸出」時回讀，這對於類似
+    「線與」的情況(以支持雙向信號)是非常有用的。GPIO 控制器可能有輸入
+    去毛刺/消抖邏輯,這有時需要軟體控制。
+
+  - 輸入通常可作爲 IRQ 信號,一般是沿觸發,但有時是電平觸發。這樣的 IRQ
+    可能配置爲系統喚醒事件,以將系統從低功耗狀態下喚醒。
+
+  - 通常一個 GPIO 根據不同產品電路板的需求,可以配置爲輸入或輸出,也有僅
+    支持單向的。
+
+  - 大部分 GPIO 可以在持有自旋鎖時訪問,但是通常由串行總線擴展的 GPIO
+    不允許持有自旋鎖。但某些系統也支持這種類型。
+
+對於給定的電路板,每個 GPIO 都用於某個特定的目的,如監控 MMC/SD 卡的
+插入/移除、檢測卡的防寫狀態、驅動 LED、配置收發器、模擬串行總線、
+復位硬體看門狗、感知開關狀態等等。
+
+
+GPIO 公約
+=========
+注意,這個叫做「公約」，因爲這不是強制性的，不遵循這個公約是無傷大雅的，
+因爲此時可移植性並不重要。GPIO 常用於板級特定的電路邏輯,甚至可能
+隨著電路板的版本而改變，且不可能在不同走線的電路板上使用。僅有在少數
+功能上才具有可移植性，其他功能是平台特定。這也是由於「膠合」的邏輯造成的。
+
+此外，這不需要任何的執行框架，只是一個接口。某個平台可能通過一個簡單地
+訪問晶片寄存器的內聯函數來實現它，其他平台可能通過委託一系列不同的GPIO
+控制器的抽象函數來實現它。(有一些可選的代碼能支持這種策略的實現,本文檔
+後面會介紹，但作爲 GPIO 接口的客戶端驅動程序必須與它的實現無關。)
+
+也就是說,如果在他們的平台上支持這個公約，驅動應儘可能的使用它。同時，平台
+必須在 Kconfig 中選擇 ARCH_REQUIRE_GPIOLIB 或者 ARCH_WANT_OPTIONAL_GPIOLIB
+選項。那些調用標準 GPIO 函數的驅動應該在 Kconfig 入口中聲明依賴GENERIC_GPIO。
+當驅動包含文件:
+
+	#include <linux/gpio.h>
+
+則 GPIO 函數是可用,無論是「真實代碼」還是經優化過的語句。如果你遵守
+這個公約，當你的代碼完成後，對其他的開發者來說會更容易看懂和維護。
+
+注意，這些操作包含所用平台的 I/O 屏障代碼，驅動無須顯式地調用他們。
+
+
+標識 GPIO
+---------
+GPIO 是通過無符號整型來標識的,範圍是 0 到 MAX_INT。保留「負」數
+用於其他目的,例如標識信號「在這個板子上不可用」或指示錯誤。未接觸底層
+硬體的代碼會忽略這些整數。
+
+平台會定義這些整數的用法,且通常使用 #define 來定義 GPIO，這樣
+板級特定的啓動代碼可以直接關聯相應的原理圖。相對來說，驅動應該僅使用
+啓動代碼傳遞過來的 GPIO 編號，使用 platform_data 保存板級特定
+引腳配置數據 (同時還有其他須要的板級特定數據)，避免可能出現的問題。
+
+例如一個平台使用編號 32-159 來標識 GPIO,而在另一個平台使用編號0-63
+標識一組 GPIO 控制器,64-79標識另一類 GPIO 控制器,且在一個含有
+FPGA 的特定板子上使用 80-95。編號不一定要連續,那些平台中，也可以
+使用編號2000-2063來標識一個 I2C 接口的 GPIO 擴展器中的 GPIO。
+
+如果你要初始化一個帶有無效 GPIO 編號的結構體,可以使用一些負編碼
+(如"-EINVAL")，那將使其永遠不會是有效。來測試這樣一個結構體中的編號
+是否關聯一個 GPIO，你可使用以下斷言:
+
+	int gpio_is_valid(int number);
+
+如果編號不存在，則請求和釋放 GPIO 的函數將拒絕執行相關操作(見下文)。
+其他編號也可能被拒絕,比如一個編號可能存在，但暫時在給定的電路上不可用。
+
+一個平台是否支持多個 GPIO 控制器爲平台特定的實現問題，就像是否可以
+在 GPIO 編號空間中有「空洞」和是否可以在運行時添加新的控制器一樣。
+這些問題會影響其他事情，包括相鄰的 GPIO 編號是否存在等。
+
+使用 GPIO
+---------
+對於一個 GPIO，系統應該做的第一件事情就是通過 gpio_request()
+函數分配它，見下文。
+
+接下來是設置I/O方向，這通常是在板級啓動代碼中爲所使用的 GPIO 設置
+platform_device 時完成。
+
+	/* 設置爲輸入或輸出, 返回 0 或負的錯誤代碼 */
+	int gpio_direction_input(unsigned gpio);
+	int gpio_direction_output(unsigned gpio, int value);
+
+返回值爲零代表成功，否則返回一個負的錯誤代碼。這個返回值需要檢查，因爲
+get/set(獲取/設置)函數調用沒法返回錯誤,且有可能是配置錯誤。通常，
+你應該在進程上下文中調用這些函數。然而,對於自旋鎖安全的 GPIO，在板子
+啓動的早期、進程啓動前使用他們也是可以的。
+
+對於作爲輸出的 GPIO，爲其提供初始輸出值，對於避免在系統啓動期間出現
+信號毛刺是很有幫助的。
+
+爲了與傳統的 GPIO 接口兼容, 在設置一個 GPIO 方向時，如果它還未被申請，
+則隱含了申請那個 GPIO 的操作(見下文)。這種兼容性正在從可選的 gpiolib
+框架中移除。
+
+如果這個 GPIO 編碼不存在，或者特定的 GPIO 不能用於那種模式，則方向
+設置可能失敗。依賴啓動固件來正確地設置方向通常是一個壞主意，因爲它可能
+除了啓動Linux，並沒有做更多的驗證工作。(同理, 板子的啓動代碼可能需要
+將這個復用的引腳設置爲 GPIO，並正確地配置上拉/下拉電阻。)
+
+
+訪問自旋鎖安全的 GPIO
+-------------------
+大多數 GPIO 控制器可以通過內存讀/寫指令來訪問。這些指令不會休眠,可以
+安全地在硬(非線程)中斷例程和類似的上下文中完成。
+
+對於那些用 gpio_cansleep()測試總是返回失敗的 GPIO(見下文)，使用
+以下的函數訪問:
+
+	/* GPIO 輸入:返回零或非零 */
+	int gpio_get_value(unsigned gpio);
+
+	/* GPIO 輸出 */
+	void gpio_set_value(unsigned gpio, int value);
+
+GPIO值是布爾值，零表示低電平，非零表示高電平。當讀取一個輸出引腳的值時，
+返回值應該是引腳上的值。這個值不總是和輸出值相符，因爲存在開漏輸出信號和
+輸出延遲問題。
+
+以上的 get/set 函數無錯誤返回值，因爲之前 gpio_direction_*()應已檢查過
+其是否爲「無效GPIO」。此外，還需要注意的是並不是所有平台都可以從輸出引腳
+中讀取數據，對於不能讀取的引腳應總返回零。另外，對那些在原子上下文中無法
+安全訪問的 GPIO (譯者註：因爲訪問可能導致休眠)使用這些函數是不合適的
+(見下文)。
+
+在 GPIO 編號(還有輸出、值)爲常數的情況下,鼓勵通過平台特定的實現來優化
+這兩個函數來訪問 GPIO 值。這種情況(讀寫一個硬體寄存器)下只需要幾條指令
+是很正常的,且無須自旋鎖。這種優化函數比起那些在子程序上花費許多指令的
+函數可以使得模擬接口(譯者注:例如 GPIO 模擬 I2C、1-wire 或 SPI)的
+應用(在空間和時間上都)更具效率。
+
+
+訪問可能休眠的 GPIO
+-----------------
+某些 GPIO 控制器必須通過基於總線(如 I2C 或 SPI)的消息訪問。讀或寫這些
+GPIO 值的命令需要等待其信息排到隊首才發送命令，再獲得其反饋。期間需要
+休眠，這不能在 IRQ 例程(中斷上下文)中執行。
+
+支持此類 GPIO 的平台通過以下函數返回非零值來區分出這種 GPIO。(此函數需要
+一個之前通過 gpio_request 分配到的有效 GPIO 編號):
+
+	int gpio_cansleep(unsigned gpio);
+
+爲了訪問這種 GPIO,內核定義了一套不同的函數:
+
+	/* GPIO 輸入:返回零或非零 ,可能會休眠 */
+	int gpio_get_value_cansleep(unsigned gpio);
+
+	/* GPIO 輸出,可能會休眠 */
+	void gpio_set_value_cansleep(unsigned gpio, int value);
+
+
+訪問這樣的 GPIO 需要一個允許休眠的上下文，例如線程 IRQ 處理例程，並用以上的
+訪問函數替換那些沒有 cansleep()後綴的自旋鎖安全訪問函數。
+
+除了這些訪問函數可能休眠，且它們操作的 GPIO 不能在硬體 IRQ 處理例程中訪問的
+事實，這些處理例程實際上和自旋鎖安全的函數是一樣的。
+
+** 除此之外 ** 調用設置和配置此類 GPIO 的函數也必須在允許休眠的上下文中，
+因爲它們可能也需要訪問 GPIO 控制器晶片: (這些設置函數通常在板級啓動代碼或者
+驅動探測/斷開代碼中，所以這是一個容易滿足的約束條件。)
+
+	gpio_direction_input()
+	gpio_direction_output()
+	gpio_request()
+
+## 	gpio_request_one()
+##	gpio_request_array()
+## 	gpio_free_array()
+
+	gpio_free()
+	gpio_set_debounce()
+
+
+
+聲明和釋放 GPIO
+----------------------------
+爲了有助於捕獲系統配置錯誤,定義了兩個函數。
+
+	/* 申請 GPIO, 返回 0 或負的錯誤代碼.
+	 * 非空標籤可能有助於診斷.
+	 */
+	int gpio_request(unsigned gpio, const char *label);
+
+	/* 釋放之前聲明的 GPIO */
+	void gpio_free(unsigned gpio);
+
+將無效的 GPIO 編碼傳遞給 gpio_request()會導致失敗，申請一個已使用這個
+函數聲明過的 GPIO 也會失敗。gpio_request()的返回值必須檢查。你應該在
+進程上下文中調用這些函數。然而,對於自旋鎖安全的 GPIO,在板子啓動的早期、
+進入進程之前是可以申請的。
+
+這個函數完成兩個基本的目標。一是標識那些實際上已作爲 GPIO 使用的信號線，
+這樣便於更好地診斷;系統可能需要服務幾百個可用的 GPIO，但是對於任何一個
+給定的電路板通常只有一些被使用。另一個目的是捕獲衝突，查明錯誤:如兩個或
+更多驅動錯誤地認爲他們已經獨占了某個信號線,或是錯誤地認爲移除一個管理著
+某個已激活信號的驅動是安全的。也就是說，申請 GPIO 的作用類似一種鎖機制。
+
+某些平台可能也使用 GPIO 作爲電源管理激活信號(例如通過關閉未使用晶片區和
+簡單地關閉未使用時鐘)。
+
+對於 GPIO 使用 pinctrl 子系統已知的引腳，子系統應該被告知其使用情況；
+一個 gpiolib 驅動的 .request()操作應調用 pinctrl_gpio_request()，
+而 gpiolib 驅動的 .free()操作應調用 pinctrl_gpio_free()。pinctrl
+子系統允許 pinctrl_gpio_request()在某個引腳或引腳組以復用形式「屬於」
+一個設備時都成功返回。
+
+任何須將 GPIO 信號導向適當引腳的引腳復用硬體的編程應該發生在 GPIO
+驅動的 .direction_input()或 .direction_output()函數中，以及
+任何輸出 GPIO 值的設置之後。這樣可使從引腳特殊功能到 GPIO 的轉換
+不會在引腳產生毛刺波形。有時當用一個 GPIO 實現其信號驅動一個非 GPIO
+硬體模塊的解決方案時，就需要這種機制。
+
+某些平台允許部分或所有 GPIO 信號使用不同的引腳。類似的，GPIO 或引腳的
+其他方面也需要配置，如上拉/下拉。平台軟體應該在對這些 GPIO 調用
+gpio_request()前將這類細節配置好，例如使用 pinctrl 子系統的映射表，
+使得 GPIO 的用戶無須關注這些細節。
+
+還有一個值得注意的是在釋放 GPIO 前，你必須停止使用它。
+
+
+注意:申請一個 GPIO 並沒有以任何方式配置它，只不過標識那個 GPIO 處於使用
+狀態。必須有另外的代碼來處理引腳配置(如控制 GPIO 使用的引腳、上拉/下拉)。
+考慮到大多數情況下聲明 GPIO 之後就會立即配置它們,所以定義了以下三個輔助函數:
+
+	/* 申請一個 GPIO 信號, 同時通過特定的'flags'初始化配置,
+	 * 其他和 gpio_request()的參數和返回值相同
+	 *
+	 */
+	int gpio_request_one(unsigned gpio, unsigned long flags, const char *label);
+
+	/* 在單個函數中申請多個 GPIO
+	 */
+	int gpio_request_array(struct gpio *array, size_t num);
+
+	/* 在單個函數中釋放多個 GPIO
+	 */
+	void gpio_free_array(struct gpio *array, size_t num);
+
+這裡 'flags' 當前定義可指定以下屬性:
+
+	* GPIOF_DIR_IN		- 配置方向爲輸入
+	* GPIOF_DIR_OUT		- 配置方向爲輸出
+
+	* GPIOF_INIT_LOW	- 在作爲輸出時,初始值爲低電平
+	* GPIOF_INIT_HIGH	- 在作爲輸出時,初始值爲高電平
+	* GPIOF_OPEN_DRAIN	- gpio引腳爲開漏信號
+	* GPIOF_OPEN_SOURCE	- gpio引腳爲源極開路信號
+
+	* GPIOF_EXPORT_DIR_FIXED	- 將 gpio 導出到 sysfs，並保持方向
+	* GPIOF_EXPORT_DIR_CHANGEABLE	- 同樣是導出, 但允許改變方向
+
+因爲 GPIOF_INIT_* 僅有在配置爲輸出的時候才存在,所以有效的組合爲:
+
+	* GPIOF_IN		- 配置爲輸入
+	* GPIOF_OUT_INIT_LOW	- 配置爲輸出,並初始化爲低電平
+	* GPIOF_OUT_INIT_HIGH	- 配置爲輸出,並初始化爲高電平
+
+當設置 flag 爲 GPIOF_OPEN_DRAIN 時，則假設引腳是開漏信號。這樣的引腳
+將不會在輸出模式下置1。這樣的引腳需要連接上拉電阻。通過使能這個標誌，gpio庫
+將會在被要求輸出模式下置1時將引腳變爲輸入狀態來使引腳置高。引腳在輸出模式下
+通過置0使其輸出低電平。
+
+當設置 flag 爲 GPIOF_OPEN_SOURCE 時，則假設引腳爲源極開路信號。這樣的引腳
+將不會在輸出模式下置0。這樣的引腳需要連接下拉電阻。通過使能這個標誌，gpio庫
+將會在被要求輸出模式下置0時將引腳變爲輸入狀態來使引腳置低。引腳在輸出模式下
+通過置1使其輸出高電平。
+
+將來這些標誌可能擴展到支持更多的屬性。
+
+更進一步,爲了更簡單地聲明/釋放多個 GPIO,'struct gpio'被引進來封裝所有
+這三個領域:
+
+	struct gpio {
+		unsigned	gpio;
+		unsigned long	flags;
+		const char	*label;
+	};
+
+一個典型的用例:
+
+	static struct gpio leds_gpios[] = {
+		{ 32, GPIOF_OUT_INIT_HIGH, "Power LED" }, /* 默認開啓 */
+		{ 33, GPIOF_OUT_INIT_LOW,  "Green LED" }, /* 默認關閉 */
+		{ 34, GPIOF_OUT_INIT_LOW,  "Red LED"   }, /* 默認關閉 */
+		{ 35, GPIOF_OUT_INIT_LOW,  "Blue LED"  }, /* 默認關閉 */
+		{ ... },
+	};
+
+	err = gpio_request_one(31, GPIOF_IN, "Reset Button");
+	if (err)
+		...
+
+	err = gpio_request_array(leds_gpios, ARRAY_SIZE(leds_gpios));
+	if (err)
+		...
+
+	gpio_free_array(leds_gpios, ARRAY_SIZE(leds_gpios));
+
+
+GPIO 映射到 IRQ
+--------------------
+GPIO 編號是無符號整數;IRQ 編號也是。這些構成了兩個邏輯上不同的命名空間
+(GPIO 0 不一定使用 IRQ 0)。你可以通過以下函數在它們之間實現映射:
+
+	/* 映射 GPIO 編號到 IRQ 編號 */
+	int gpio_to_irq(unsigned gpio);
+
+	/* 映射 IRQ 編號到 GPIO 編號 (儘量避免使用) */
+	int irq_to_gpio(unsigned irq);
+
+它們的返回值爲對應命名空間的相關編號，或是負的錯誤代碼(如果無法映射)。
+(例如,某些 GPIO 無法做爲 IRQ 使用。)以下的編號錯誤是未經檢測的:使用一個
+未通過 gpio_direction_input()配置爲輸入的 GPIO 編號，或者使用一個
+並非來源於gpio_to_irq()的 IRQ 編號。
+
+這兩個映射函數可能會在信號編號的加減計算過程上花些時間。它們不可休眠。
+
+gpio_to_irq()返回的非錯誤值可以傳遞給 request_irq()或者 free_irq()。
+它們通常通過板級特定的初始化代碼存放到平台設備的 IRQ 資源中。注意:IRQ
+觸發選項是 IRQ 接口的一部分，如 IRQF_TRIGGER_FALLING，系統喚醒能力
+也是如此。
+
+irq_to_gpio()返回的非錯誤值大多數通常可以被 gpio_get_value()所使用，
+比如在 IRQ 是沿觸發時初始化或更新驅動狀態。注意某些平台不支持反映射,所以
+你應該儘量避免使用它。
+
+
+模擬開漏信號
+----------------------------
+有時在只有低電平信號作爲實際驅動結果(譯者注:多個輸出連接於一點，邏輯電平
+結果爲所有輸出的邏輯與)的時候,共享的信號線需要使用「開漏」信號。(該術語
+適用於 CMOS 管；而 TTL 用「集電極開路」。)一個上拉電阻使信號爲高電平。這
+有時被稱爲「線與」。實際上，從負邏輯(低電平爲真)的角度來看，這是一個「線或」。
+
+一個開漏信號的常見例子是共享的低電平使能 IRQ 信號線。此外,有時雙向數據總線
+信號也使用漏極開路信號。
+
+某些 GPIO 控制器直接支持開漏輸出，還有許多不支持。當你需要開漏信號，但
+硬體又不直接支持的時候，一個常用的方法是用任何即可作輸入也可作輸出的 GPIO
+引腳來模擬:
+
+ LOW:	gpio_direction_output(gpio, 0) ... 這代碼驅動信號並覆蓋
+	上拉配置。
+
+ HIGH:	gpio_direction_input(gpio) ... 這代碼關閉輸出,所以上拉電阻
+	(或其他的一些器件)控制了信號。
+
+如果你將信號線「驅動」爲高電平，但是 gpio_get_value(gpio)報告了一個
+低電平(在適當的上升時間後)，你就可以知道是其他的一些組件將共享信號線拉低了。
+這不一定是錯誤的。一個常見的例子就是 I2C 時鐘的延長：一個需要較慢時鐘的
+從設備延遲 SCK 的上升沿，而 I2C 主設備相應地調整其信號傳輸速率。
+
+
+這些公約忽略了什麼?
+================
+這些公約忽略的最大一件事就是引腳復用，因爲這屬於高度晶片特定的屬性且
+沒有可移植性。某個平台可能不需要明確的復用信息；有的對於任意給定的引腳
+可能只有兩個功能選項；有的可能每個引腳有八個功能選項；有的可能可以將
+幾個引腳中的任何一個作爲給定的 GPIO。(是的，這些例子都來自於當前運行
+Linux 的系統。)
+
+在某些系統中,與引腳復用相關的是配置和使能集成的上、下拉模式。並不是所有
+平台都支持這種模式,或者不會以相同的方式來支持這種模式；且任何給定的電路板
+可能使用外置的上拉(或下拉)電阻,這時晶片上的就不應該使用。(當一個電路需要
+5kOhm 的拉動電阻,晶片上的 100 kOhm 電阻就不能做到。)同樣的，驅動能力
+(2 mA vs 20 mA)和電壓(1.8V vs 3.3V)是平台特定問題,就像模型一樣在
+可配置引腳和 GPIO 之間(沒)有一一對應的關係。
+
+還有其他一些系統特定的機制沒有在這裡指出，例如上述的輸入去毛刺和線與輸出
+選項。硬體可能支持批量讀或寫 GPIO，但是那一般是配置相關的：對於處於同一
+塊區(bank)的GPIO。(GPIO 通常以 16 或 32 個組成一個區塊，一個給定的
+片上系統一般有幾個這樣的區塊。)某些系統可以通過輸出 GPIO 觸發 IRQ，
+或者從並非以 GPIO 管理的引腳取值。這些機制的相關代碼沒有必要具有可移植性。
+
+當前，動態定義 GPIO 並不是標準的，例如作爲配置一個帶有某些 GPIO 擴展器的
+附加電路板的副作用。
+
+GPIO 實現者的框架 (可選)
+=====================
+前面提到了，有一個可選的實現框架，讓平台使用相同的編程接口，更加簡單地支持
+不同種類的 GPIO 控制器。這個框架稱爲"gpiolib"。
+
+作爲一個輔助調試功能，如果 debugfs 可用，就會有一個 /sys/kernel/debug/gpio
+文件。通過這個框架，它可以列出所有註冊的控制器,以及當前正在使用中的 GPIO
+的狀態。
+
+
+控制器驅動: gpio_chip
+-------------------
+在框架中每個 GPIO 控制器都包裝爲一個 "struct gpio_chip"，他包含了
+該類型的每個控制器的常用信息:
+
+ - 設置 GPIO 方向的方法
+ - 用於訪問 GPIO 值的方法
+ - 告知調用其方法是否可能休眠的標誌
+ - 可選的 debugfs 信息導出方法 (顯示類似上拉配置一樣的額外狀態)
+ - 診斷標籤
+
+也包含了來自 device.platform_data 的每個實例的數據：它第一個 GPIO 的
+編號和它可用的 GPIO 的數量。
+
+實現 gpio_chip 的代碼應支持多控制器實例，這可能使用驅動模型。那些代碼要
+配置每個 gpio_chip，並發起gpiochip_add()。卸載一個 GPIO 控制器很少見，
+但在必要的時候可以使用 gpiochip_remove()。
+
+大部分 gpio_chip 是一個實例特定結構體的一部分，而並不將 GPIO 接口單獨
+暴露出來,比如編址、電源管理等。類似編解碼器這樣的晶片會有複雜的非 GPIO
+狀態。
+
+任何一個 debugfs 信息導出方法通常應該忽略還未申請作爲 GPIO 的信號線。
+他們可以使用 gpiochip_is_requested()測試，當這個 GPIO 已經申請過了
+就返回相關的標籤，否則返回 NULL。
+
+
+平台支持
+-------
+爲了支持這個框架，一個平台的 Kconfig 文件將會 "select"(選擇)
+ARCH_REQUIRE_GPIOLIB 或 ARCH_WANT_OPTIONAL_GPIOLIB，並讓它的
+<asm/gpio.h> 包含 <asm-generic/gpio.h>，同時定義三個方法:
+gpio_get_value()、gpio_set_value()和 gpio_cansleep()。
+
+它也應提供一個 ARCH_NR_GPIOS 的定義值，這樣可以更好地反映該平台 GPIO
+的實際數量,節省靜態表的空間。(這個定義值應該包含片上系統內建 GPIO 和
+GPIO 擴展器中的數據。)
+
+ARCH_REQUIRE_GPIOLIB 意味著 gpiolib 核心在這個構架中將總是編譯進內核。
+
+ARCH_WANT_OPTIONAL_GPIOLIB 意味著 gpiolib 核心默認關閉,且用戶可以
+使能它,並將其編譯進內核(可選)。
+
+如果這些選項都沒被選擇,該平台就不通過 GPIO-lib 支持 GPIO,且代碼不可以
+被用戶使能。
+
+以下這些方法的實現可以直接使用框架代碼,並總是通過 gpio_chip 調度:
+
+  #define gpio_get_value	__gpio_get_value
+  #define gpio_set_value	__gpio_set_value
+  #define gpio_cansleep		__gpio_cansleep
+
+這些定義可以用更理想的實現方法替代，那就是使用經過邏輯優化的內聯函數來訪問
+基於特定片上系統的 GPIO。例如,若引用的 GPIO (寄存器位偏移)是常量「12」，
+讀取或設置它可能只需少則兩或三個指令，且不會休眠。當這樣的優化無法實現時，
+那些函數必須使用框架提供的代碼，那就至少要幾十條指令才可以實現。對於用 GPIO
+模擬的 I/O 接口, 如此精簡指令是很有意義的。
+
+對於片上系統，平台特定代碼爲片上 GPIO 每個區(bank)定義並註冊 gpio_chip
+實例。那些 GPIO 應該根據晶片廠商的文檔進行編碼/標籤,並直接和電路板原理圖
+對應。他們應該開始於零並終止於平台特定的限制。這些 GPIO(代碼)通常從
+arch_initcall()或者更早的地方集成進平台初始化代碼，使這些 GPIO 總是可用，
+且他們通常可以作爲 IRQ 使用。
+
+板級支持
+-------
+對於外部 GPIO 控制器(例如 I2C 或 SPI 擴展器、專用晶片、多功能器件、FPGA
+或 CPLD)，大多數常用板級特定代碼都可以註冊控制器設備，並保證他們的驅動知道
+gpiochip_add()所使用的 GPIO 編號。他們的起始編號通常跟在平台特定的 GPIO
+編號之後。
+
+例如板級啓動代碼應該創建結構體指明晶片公開的 GPIO 範圍，並使用 platform_data
+將其傳遞給每個 GPIO 擴展器晶片。然後晶片驅動中的 probe()例程可以將這個
+數據傳遞給 gpiochip_add()。
+
+初始化順序很重要。例如，如果一個設備依賴基於 I2C 的(擴展)GPIO，那麼它的
+probe()例程就應該在那個 GPIO 有效以後才可以被調用。這意味著設備應該在
+GPIO 可以工作之後才可被註冊。解決這類依賴的的一種方法是讓這種 gpio_chip
+控制器向板級特定代碼提供 setup()和 teardown()回調函數。一旦所有必須的
+資源可用之後，這些板級特定的回調函數將會註冊設備，並可以在這些 GPIO 控制器
+設備變成無效時移除它們。
+
+
+用戶空間的 Sysfs 接口(可選)
+========================
+使用「gpiolib」實現框架的平台可以選擇配置一個 GPIO 的 sysfs 用戶接口。
+這不同於 debugfs 接口，因爲它提供的是對 GPIO方向和值的控制，而不只顯示
+一個GPIO 的狀態摘要。此外,它可以出現在沒有調試支持的產品級系統中。
+
+例如，通過適當的系統硬體文檔，用戶空間可以知道 GIOP #23 控制 Flash
+存儲器的防寫(用於保護其中 Bootloader 分區)。產品的系統升級可能需要
+臨時解除這個保護：首先導入一個 GPIO，改變其輸出狀態，然後在重新使能防寫
+前升級代碼。通常情況下,GPIO #23 是不會被觸及的，並且內核也不需要知道他。
+
+根據適當的硬體文檔，某些系統的用戶空間 GPIO 可以用於確定系統配置數據，
+這些數據是標準內核不知道的。在某些任務中，簡單的用戶空間 GPIO 驅動可能是
+系統真正需要的。
+
+注意：標準內核驅動中已經存在通用的「LED 和按鍵」GPIO 任務，分別是:
+"leds-gpio" 和 "gpio_keys"。請使用這些來替代直接訪問 GPIO，因爲集成在
+內核框架中的這類驅動比你在用戶空間的代碼更好。
+
+
+Sysfs 中的路徑
+--------------
+在/sys/class/gpio 中有 3 類入口:
+
+   -	用於在用戶空間控制 GPIO 的控制接口;
+
+   -	GPIOs 本身;以及
+
+   -	GPIO 控制器 ("gpio_chip" 實例)。
+
+除了這些標準的文件,還包含「device」符號連結。
+
+控制接口是只寫的:
+
+    /sys/class/gpio/
+
+    	"export" ... 用戶空間可以通過寫其編號到這個文件，要求內核導出
+		一個 GPIO 的控制到用戶空間。
+
+		例如: 如果內核代碼沒有申請 GPIO #19,"echo 19 > export"
+		將會爲 GPIO #19 創建一個 "gpio19" 節點。
+
+    	"unexport" ... 導出到用戶空間的逆操作。
+
+		例如: "echo 19 > unexport" 將會移除使用"export"文件導出的
+		"gpio19" 節點。
+
+GPIO 信號的路徑類似 /sys/class/gpio/gpio42/ (對於 GPIO #42 來說)，
+並有如下的讀/寫屬性:
+
+    /sys/class/gpio/gpioN/
+
+	"direction" ... 讀取得到 "in" 或 "out"。這個值通常運行寫入。
+		寫入"out" 時,其引腳的默認輸出爲低電平。爲了確保無故障運行，
+		"low" 或 "high" 的電平值應該寫入 GPIO 的配置，作爲初始輸出值。
+
+		注意:如果內核不支持改變 GPIO 的方向，或者在導出時內核代碼沒有
+		明確允許用戶空間可以重新配置 GPIO 方向，那麼這個屬性將不存在。
+
+	"value" ... 讀取得到 0 (低電平) 或 1 (高電平)。如果 GPIO 配置爲
+		輸出,這個值允許寫操作。任何非零值都以高電平看待。
+
+		如果引腳可以配置爲中斷信號，且如果已經配置了產生中斷的模式
+		（見"edge"的描述），你可以對這個文件使用輪詢操作(poll(2))，
+		且輪詢操作會在任何中斷觸發時返回。如果你使用輪詢操作(poll(2))，
+		請在 events 中設置 POLLPRI 和 POLLERR。如果你使用輪詢操作
+		(select(2))，請在 exceptfds 設置你期望的文件描述符。在
+		輪詢操作(poll(2))返回之後，既可以通過 lseek(2)操作讀取
+		sysfs 文件的開始部分，也可以關閉這個文件並重新打開它來讀取數據。
+
+	"edge" ... 讀取得到「none」、「rising」、「falling」或者「both」。
+		將這些字符串寫入這個文件可以選擇沿觸發模式，會使得輪詢操作
+		(select(2))在"value"文件中返回。
+
+		這個文件僅有在這個引腳可以配置爲可產生中斷輸入引腳時，才存在。
+
+	"active_low" ... 讀取得到 0 (假) 或 1 (真)。寫入任何非零值可以
+		翻轉這個屬性的(讀寫)值。已存在或之後通過"edge"屬性設置了"rising"
+		和 "falling" 沿觸發模式的輪詢操作(poll(2))將會遵循這個設置。
+
+GPIO 控制器的路徑類似 /sys/class/gpio/gpiochip42/ (對於從#42 GPIO
+開始實現控制的控制器),並有著以下只讀屬性:
+
+    /sys/class/gpio/gpiochipN/
+
+    	"base" ... 與以上的 N 相同,代表此晶片管理的第一個 GPIO 的編號
+
+    	"label" ... 用於診斷 (並不總是只有唯一值)
+
+    	"ngpio" ... 此控制器所管理的 GPIO 數量(而 GPIO 編號從 N 到
+    		N + ngpio - 1)
+
+大多數情況下,電路板的文檔應當標明每個 GPIO 的使用目的。但是那些編號並不總是
+固定的,例如在擴展卡上的 GPIO會根據所使用的主板或所在堆疊架構中其他的板子而
+有所不同。在這種情況下,你可能需要使用 gpiochip 節點(儘可能地結合電路圖)來
+確定給定信號所用的 GPIO 編號。
+
+
+從內核代碼中導出
+-------------
+內核代碼可以明確地管理那些已通過 gpio_request()申請的 GPIO 的導出:
+
+	/* 導出 GPIO 到用戶空間 */
+	int gpio_export(unsigned gpio, bool direction_may_change);
+
+	/* gpio_export()的逆操作 */
+	void gpio_unexport();
+
+	/* 創建一個 sysfs 連接到已導出的 GPIO 節點 */
+	int gpio_export_link(struct device *dev, const char *name,
+		unsigned gpio)
+
+在一個內核驅動申請一個 GPIO 之後，它可以通過 gpio_export()使其在 sysfs
+接口中可見。該驅動可以控制信號方向是否可修改。這有助於防止用戶空間代碼無意間
+破壞重要的系統狀態。
+
+這個明確的導出有助於(通過使某些實驗更容易來)調試，也可以提供一個始終存在的接口，
+與文檔配合作爲板級支持包的一部分。
+
+在 GPIO 被導出之後，gpio_export_link()允許在 sysfs 文件系統的任何地方
+創建一個到這個 GPIO sysfs 節點的符號連結。這樣驅動就可以通過一個描述性的
+名字，在 sysfs 中他們所擁有的設備下提供一個(到這個 GPIO sysfs 節點的)接口。
+
diff --git a/Documentation/translations/zh_TW/index.rst b/Documentation/translations/zh_TW/index.rst
new file mode 100644
index 000000000000..2a281036c406
--- /dev/null
+++ b/Documentation/translations/zh_TW/index.rst
@@ -0,0 +1,177 @@
+.. SPDX-License-Identifier: GPL-2.0
+
+.. raw:: latex
+
+	\renewcommand\thesection*
+	\renewcommand\thesubsection*
+	\kerneldocCJKon
+	\kerneldocBeginTC
+
+.. _linux_doc_zh_tw:
+
+繁體中文翻譯
+============
+
+
+.. note::
+   內核文檔繁體中文版的翻譯工作正在進行中。如果您願意並且有時間參與這項工
+   作，歡迎提交補丁給胡皓文 <src.res@email.cn>。
+
+許可證文檔
+----------
+
+下面的文檔介紹了Linux內核原始碼的許可證（GPLv2）、如何在原始碼樹中正確標記
+單個文件的許可證、以及指向完整許可證文本的連結。
+
+Documentation/translations/zh_TW/process/license-rules.rst
+
+用戶文檔
+--------
+
+下面的手冊是爲內核用戶編寫的——即那些試圖讓它在給定系統上以最佳方式工作的
+用戶。
+
+.. toctree::
+   :maxdepth: 2
+
+   admin-guide/index
+
+TODOList:
+
+* kbuild/index
+
+固件相關文檔
+------------
+
+下列文檔描述了內核需要的平台固件相關信息。
+
+TODOList:
+
+* firmware-guide/index
+* devicetree/index
+
+應用程式開發人員文檔
+--------------------
+
+用戶空間API手冊涵蓋了描述應用程式開發人員可見內核接口方面的文檔。
+
+TODOlist:
+
+* userspace-api/index
+
+內核開發簡介
+------------
+
+這些手冊包含有關如何開發內核的整體信息。內核社區非常龐大，一年下來有數千名
+開發人員做出貢獻。與任何大型社區一樣，知道如何完成任務將使得更改合併的過程
+變得更加容易。
+
+.. toctree::
+   :maxdepth: 2
+
+   process/index
+
+TODOList:
+
+* dev-tools/index
+* doc-guide/index
+* kernel-hacking/index
+* trace/index
+* maintainer/index
+* fault-injection/index
+* livepatch/index
+* rust/index
+
+內核API文檔
+-----------
+
+以下手冊從內核開發人員的角度詳細介紹了特定的內核子系統是如何工作的。這裡的
+大部分信息都是直接從內核原始碼獲取的，並根據需要添加補充材料（或者至少是在
+我們設法添加的時候——可能不是所有的都是有需要的）。
+
+.. toctree::
+   :maxdepth: 2
+
+   cpu-freq/index
+   filesystems/index
+
+TODOList:
+
+* driver-api/index
+* core-api/index
+* locking/index
+* accounting/index
+* block/index
+* cdrom/index
+* ide/index
+* fb/index
+* fpga/index
+* hid/index
+* i2c/index
+* iio/index
+* isdn/index
+* infiniband/index
+* leds/index
+* netlabel/index
+* networking/index
+* pcmcia/index
+* power/index
+* target/index
+* timers/index
+* spi/index
+* w1/index
+* watchdog/index
+* virt/index
+* input/index
+* hwmon/index
+* gpu/index
+* security/index
+* sound/index
+* crypto/index
+* vm/index
+* bpf/index
+* usb/index
+* PCI/index
+* scsi/index
+* misc-devices/index
+* scheduler/index
+* mhi/index
+
+體系結構無關文檔
+----------------
+
+.. toctree::
+   :maxdepth: 2
+
+   arm64/index
+
+TODOList:
+
+* asm-annotations
+
+特定體系結構文檔
+----------------
+
+TODOList:
+
+* arch
+
+其他文檔
+--------
+
+有幾份未排序的文檔似乎不適合放在文檔的其他部分，或者可能需要進行一些調整和/或
+轉換爲reStructureText格式，也有可能太舊。
+
+TODOList:
+
+* staging/index
+* watch_queue
+
+目錄和表格
+----------
+
+* :ref:`genindex`
+
+.. raw:: latex
+
+	\kerneldocEndTC
diff --git a/Documentation/translations/zh_TW/io_ordering.txt b/Documentation/translations/zh_TW/io_ordering.txt
new file mode 100644
index 000000000000..1e99206c8421
--- /dev/null
+++ b/Documentation/translations/zh_TW/io_ordering.txt
@@ -0,0 +1,68 @@
+Chinese translated version of Documentation/driver-api/io_ordering.rst
+
+If you have any comment or update to the content, please contact the
+original document maintainer directly.  However, if you have a problem
+communicating in English you can also ask the Chinese maintainer for
+help.  Contact the Chinese maintainer if this translation is outdated
+or if there is a problem with the translation.
+
+Traditional Chinese maintainer: Hu Haowen <src.res@email.cn>
+---------------------------------------------------------------------
+Documentation/driver-api/io_ordering.rst 的繁體中文翻譯
+
+如果想評論或更新本文的內容，請直接聯繫原文檔的維護者。如果你使用英文
+交流有困難的話，也可以向繁體中文版維護者求助。如果本翻譯更新不及時或
+者翻譯存在問題，請聯繫繁體中文版維護者。
+
+繁體中文版維護者： 胡皓文 Hu Haowen <src.res@email.cn>
+繁體中文版翻譯者： 胡皓文 Hu Haowen <src.res@email.cn>
+繁體中文版校譯者： 胡皓文 Hu Haowen <src.res@email.cn>
+
+
+以下爲正文
+---------------------------------------------------------------------
+
+在某些平台上，所謂的內存映射I/O是弱順序。在這些平台上，驅動開發者有責任
+保證I/O內存映射地址的寫操作按程序圖意的順序達到設備。通常讀取一個「安全」
+設備寄存器或橋寄存器，觸發IO晶片清刷未處理的寫操作到達設備後才處理讀操作，
+而達到保證目的。驅動程序通常在spinlock保護的臨界區退出之前使用這種技術。
+這也可以保證後面的寫操作只在前面的寫操作之後到達設備（這非常類似於內存
+屏障操作，mb()，不過僅適用於I/O）。
+
+假設一個設備驅動程的具體例子：
+
+        ...
+CPU A:  spin_lock_irqsave(&dev_lock, flags)
+CPU A:  val = readl(my_status);
+CPU A:  ...
+CPU A:  writel(newval, ring_ptr);
+CPU A:  spin_unlock_irqrestore(&dev_lock, flags)
+        ...
+CPU B:  spin_lock_irqsave(&dev_lock, flags)
+CPU B:  val = readl(my_status);
+CPU B:  ...
+CPU B:  writel(newval2, ring_ptr);
+CPU B:  spin_unlock_irqrestore(&dev_lock, flags)
+        ...
+
+上述例子中，設備可能會先接收到newval2的值，然後接收到newval的值，問題就
+發生了。不過很容易通過下面方法來修復：
+
+        ...
+CPU A:  spin_lock_irqsave(&dev_lock, flags)
+CPU A:  val = readl(my_status);
+CPU A:  ...
+CPU A:  writel(newval, ring_ptr);
+CPU A:  (void)readl(safe_register); /* 配置寄存器？*/
+CPU A:  spin_unlock_irqrestore(&dev_lock, flags)
+        ...
+CPU B:  spin_lock_irqsave(&dev_lock, flags)
+CPU B:  val = readl(my_status);
+CPU B:  ...
+CPU B:  writel(newval2, ring_ptr);
+CPU B:  (void)readl(safe_register); /* 配置寄存器？*/
+CPU B:  spin_unlock_irqrestore(&dev_lock, flags)
+
+在解決方案中，讀取safe_register寄存器，觸發IO晶片清刷未處理的寫操作，
+再處理後面的讀操作，防止引發數據不一致問題。
+
diff --git a/Documentation/translations/zh_TW/oops-tracing.txt b/Documentation/translations/zh_TW/oops-tracing.txt
new file mode 100644
index 000000000000..be8e59f2abaf
--- /dev/null
+++ b/Documentation/translations/zh_TW/oops-tracing.txt
@@ -0,0 +1,212 @@
+Chinese translated version of Documentation/admin-guide/bug-hunting.rst
+
+If you have any comment or update to the content, please contact the
+original document maintainer directly.  However, if you have a problem
+communicating in English you can also ask the Chinese maintainer for
+help.  Contact the Chinese maintainer if this translation is outdated
+or if there is a problem with the translation.
+
+Traditional Chinese maintainer:  Hu Haowen <src.res@email.cn>
+---------------------------------------------------------------------
+Documentation/admin-guide/bug-hunting.rst 的繁體中文版翻譯
+
+如果想評論或更新本文的內容，請直接聯繫原文檔的維護者。如果你使用英文
+交流有困難的話，也可以向繁體中文版維護者求助。如果本翻譯更新不及時或
+者翻譯存在問題，請聯繫繁體中文版維護者。
+
+繁體中文版維護者： 胡皓文 Hu Haowen <src.res@email.cn>
+繁體中文版翻譯者： 胡皓文 Hu Haowen <src.res@email.cn>
+繁體中文版校譯者： 胡皓文 Hu Haowen <src.res@email.cn>
+
+以下爲正文
+---------------------------------------------------------------------
+
+注意： ksymoops 在2.6中是沒有用的。 請以原有格式使用Oops(來自dmesg，等等)。
+忽略任何這樣那樣關於「解碼Oops」或者「通過ksymoops運行」的文檔。 如果你貼出運行過
+ksymoops的來自2.6的Oops，人們只會讓你重貼一次。
+
+快速總結
+-------------
+
+發現Oops並發送給看似相關的內核領域的維護者。別太擔心對不上號。如果你不確定就發給
+和你所做的事情相關的代碼的負責人。 如果可重現試著描述怎樣重構。 那甚至比oops更有
+價值。
+
+如果你對於發送給誰一無所知， 發給linux-kernel@vger.kernel.org。感謝你幫助Linux
+儘可能地穩定。
+
+Oops在哪裡?
+----------------------
+
+通常Oops文本由klogd從內核緩衝區里讀取並傳給syslogd，由syslogd寫到syslog文件中，
+典型地是/var/log/messages(依賴於/etc/syslog.conf)。有時klogd崩潰了,這種情況下你
+能夠運行dmesg > file來從內核緩衝區中讀取數據並保存下來。 否則你可以
+cat /proc/kmsg > file， 然而你必須介入中止傳輸， kmsg是一個「永不結束的文件」。如
+果機器崩潰壞到你不能輸入命令或者磁碟不可用那麼你有三種選擇:-
+
+（1） 手抄屏幕上的文本待機器重啓後再輸入計算機。 麻煩但如果沒有針對崩潰的準備，
+這是僅有的選擇。 另外，你可以用數位相機把屏幕拍下來-不太好，但比沒有強。 如果信
+息滾動到了終端的上面，你會發現以高分辯率啓動（比如，vga=791）會讓你讀到更多的文
+本。（注意：這需要vesafb，所以對『早期』的oops沒有幫助）
+
+（2）用串口終端啓動（請參看Documentation/admin-guide/serial-console.rst），運行一個null
+modem到另一台機器並用你喜歡的通訊工具獲取輸出。Minicom工作地很好。
+
+（3）使用Kdump（請參看Documentation/admin-guide/kdump/kdump.rst），
+使用在Documentation/admin-guide/kdump/gdbmacros.txt中定義的dmesg gdb宏，從舊的內存中提取內核
+環形緩衝區。
+
+完整信息
+----------------
+
+注意：以下來自於Linus的郵件適用於2.4內核。 我因爲歷史原因保留了它，並且因爲其中
+一些信息仍然適用。 特別注意的是，請忽略任何ksymoops的引用。
+
+From: Linus Torvalds <torvalds@osdl.org>
+
+怎樣跟蹤Oops.. [原發到linux-kernel的一封郵件]
+
+主要的竅門是有五年和這些煩人的oops消息打交道的經驗;-)
+
+實際上，你有辦法使它更簡單。我有兩個不同的方法：
+
+	gdb /usr/src/linux/vmlinux
+	gdb> disassemble <offending_function>
+
+那是發現問題的簡單辦法，至少如果bug報告做的好的情況下（象這個一樣-運行ksymoops
+得到oops發生的函數及函數內的偏移）。
+
+哦，如果報告發生的內核以相同的編譯器和相似的配置編譯它會有幫助的。
+
+另一件要做的事是反彙編bug報告的「Code」部分：ksymoops也會用正確的工具來做這件事，
+但如果沒有那些工具你可以寫一個傻程序：
+
+	char str[] = "\xXX\xXX\xXX...";
+	main(){}
+
+並用gcc -g編譯它然後執行「disassemble str」（XX部分是由Oops報告的值-你可以僅剪切
+粘貼並用「\x」替換空格-我就是這麼做的，因爲我懶得寫程序自動做這一切）。
+
+另外，你可以用scripts/decodecode這個shell腳本。它的使用方法是：
+decodecode < oops.txt
+
+「Code」之後的十六進位字節可能（在某些架構上）有一些當前指令之前的指令字節以及
+當前和之後的指令字節
+
+Code: f9 0f 8d f9 00 00 00 8d 42 0c e8 dd 26 11 c7 a1 60 ea 2b f9 8b 50 08 a1
+64 ea 2b f9 8d 34 82 8b 1e 85 db 74 6d 8b 15 60 ea 2b f9 <8b> 43 04 39 42 54
+7e 04 40 89 42 54 8b 43 04 3b 05 00 f6 52 c0
+
+最後，如果你想知道代碼來自哪裡，你可以：
+
+	cd /usr/src/linux
+	make fs/buffer.s 	# 或任何產生BUG的文件
+
+然後你會比gdb反彙編更清楚的知道發生了什麼。
+
+現在，問題是把你所擁有的所有數據結合起來：C源碼（關於它應該怎樣的一般知識），
+彙編代碼及其反彙編得到的代碼（另外還有從「oops」消息得到的寄存器狀態-對了解毀壞的
+指針有用，而且當你有了彙編代碼你也能拿其它的寄存器和任何它們對應的C表達式做匹配
+）。
+
+實際上，你僅需看看哪裡不匹配（這個例子是「Code」反彙編和編譯器生成的代碼不匹配）。
+然後你須要找出爲什麼不匹配。通常很簡單-你看到代碼使用了空指針然後你看代碼想知道
+空指針是怎麼出現的，還有檢查它是否合法..
+
+現在，如果明白這是一項耗時的工作而且需要一丁點兒的專心，沒錯。這就是我爲什麼大多
+只是忽略那些沒有符號表信息的崩潰報告的原因：簡單的說太難查找了（我有一些
+程序用於在內核代碼段中搜索特定的模式，而且有時我也已經能找出那些崩潰的地方，但是
+僅僅是找出正確的序列也確實需要相當紮實的內核知識）
+
+_有時_會發生這種情況，我僅看到崩潰中的反彙編代碼序列， 然後我馬上就明白問題出在
+哪裡。這時我才意識到自己幹這個工作已經太長時間了;-)
+
+		Linus
+
+
+---------------------------------------------------------------------------
+關於Oops跟蹤的註解：
+
+爲了幫助Linus和其它內核開發者，klogd納入了大量的支持來處理保護錯誤。爲了擁有對
+地址解析的完整支持至少應該使用1.3-pl3的sysklogd包。
+
+當保護錯誤發生時，klogd守護進程自動把內核日誌信息中的重要地址翻譯成它們相應的符
+號。
+
+klogd執行兩種類型的地址解析。首先是靜態翻譯其次是動態翻譯。靜態翻譯和ksymoops
+一樣使用System.map文件。爲了做靜態翻譯klogd守護進程必須在初始化時能找到system
+map文件。關於klogd怎樣搜索map文件請參看klogd手冊頁。
+
+動態地址翻譯在使用內核可裝載模塊時很重要。 因爲內核模塊的內存是從內核動態內存池
+里分配的，所以不管是模塊開始位置還是模塊中函數和符號的位置都不是固定的。
+
+內核支持允許程序決定裝載哪些模塊和它們在內存中位置的系統調用。使用這些系統調用
+klogd守護進程生成一張符號表用於調試發生在可裝載模塊中的保護錯誤。
+
+至少klogd會提供產生保護錯誤的模塊名。還可有額外的符號信息供可裝載模塊開發者選擇
+以從模塊中輸出符號信息。
+
+因爲內核模塊環境可能是動態的，所以必須有一種機制當模塊環境發生改變時來通知klogd
+守護進程。 有一些可用的命令行選項允許klogd向當前執行中的守護進程發送信號，告知符
+號信息應該被刷新了。 更多信息請參看klogd手冊頁。
+
+sysklogd發布時包含一個補丁修改了modules-2.0.0包，無論何時一個模塊裝載或者卸載都
+會自動向klogd發送信號。打上這個補丁提供了必要的對調試發生於內核可裝載模塊的保護
+錯誤的無縫支持。
+
+以下是被klogd處理過的發生在可裝載模塊中的一個保護錯誤例子：
+---------------------------------------------------------------------------
+Aug 29 09:51:01 blizard kernel: Unable to handle kernel paging request at virtual address f15e97cc
+Aug 29 09:51:01 blizard kernel: current->tss.cr3 = 0062d000, %cr3 = 0062d000
+Aug 29 09:51:01 blizard kernel: *pde = 00000000
+Aug 29 09:51:01 blizard kernel: Oops: 0002
+Aug 29 09:51:01 blizard kernel: CPU:    0
+Aug 29 09:51:01 blizard kernel: EIP:    0010:[oops:_oops+16/3868]
+Aug 29 09:51:01 blizard kernel: EFLAGS: 00010212
+Aug 29 09:51:01 blizard kernel: eax: 315e97cc   ebx: 003a6f80   ecx: 001be77b   edx: 00237c0c
+Aug 29 09:51:01 blizard kernel: esi: 00000000   edi: bffffdb3   ebp: 00589f90   esp: 00589f8c
+Aug 29 09:51:01 blizard kernel: ds: 0018   es: 0018   fs: 002b   gs: 002b   ss: 0018
+Aug 29 09:51:01 blizard kernel: Process oops_test (pid: 3374, process nr: 21, stackpage=00589000)
+Aug 29 09:51:01 blizard kernel: Stack: 315e97cc 00589f98 0100b0b4 bffffed4 0012e38e 00240c64 003a6f80 00000001
+Aug 29 09:51:01 blizard kernel:        00000000 00237810 bfffff00 0010a7fa 00000003 00000001 00000000 bfffff00
+Aug 29 09:51:01 blizard kernel:        bffffdb3 bffffed4 ffffffda 0000002b 0007002b 0000002b 0000002b 00000036
+Aug 29 09:51:01 blizard kernel: Call Trace: [oops:_oops_ioctl+48/80] [_sys_ioctl+254/272] [_system_call+82/128]
+Aug 29 09:51:01 blizard kernel: Code: c7 00 05 00 00 00 eb 08 90 90 90 90 90 90 90 90 89 ec 5d c3
+---------------------------------------------------------------------------
+
+Dr. G.W. Wettstein           Oncology Research Div. Computing Facility
+Roger Maris Cancer Center    INTERNET: greg@wind.rmcc.com
+820 4th St. N.
+Fargo, ND  58122
+Phone: 701-234-7556
+
+
+---------------------------------------------------------------------------
+受汙染的內核
+
+一些oops報告在程序記數器之後包含字符串'Tainted: '。這表明內核已經被一些東西給汙
+染了。 該字符串之後緊跟著一系列的位置敏感的字符，每個代表一個特定的汙染值。
+
+  1：'G'如果所有裝載的模塊都有GPL或相容的許可證，'P'如果裝載了任何的專有模塊。
+沒有模塊MODULE_LICENSE或者帶有insmod認爲是與GPL不相容的的MODULE_LICENSE的模塊被
+認定是專有的。
+
+  2：'F'如果有任何通過「insmod -f」被強制裝載的模塊，' '如果所有模塊都被正常裝載。
+
+  3：'S'如果oops發生在SMP內核中，運行於沒有證明安全運行多處理器的硬體。 當前這種
+情況僅限於幾種不支持SMP的速龍處理器。
+
+  4：'R'如果模塊通過「insmod -f」被強制裝載，' '如果所有模塊都被正常裝載。
+
+  5：'M'如果任何處理器報告了機器檢查異常，' '如果沒有發生機器檢查異常。
+
+  6：'B'如果頁釋放函數發現了一個錯誤的頁引用或者一些非預期的頁標誌。
+
+  7：'U'如果用戶或者用戶應用程式特別請求設置汙染標誌，否則' '。
+
+  8：'D'如果內核剛剛死掉，比如有OOPS或者BUG。
+
+使用'Tainted: '字符串的主要原因是要告訴內核調試者，這是否是一個乾淨的內核亦或發
+生了任何的不正常的事。汙染是永久的：即使出錯的模塊已經被卸載了，汙染值仍然存在，
+以表明內核不再值得信任。
+
diff --git a/Documentation/translations/zh_TW/process/1.Intro.rst b/Documentation/translations/zh_TW/process/1.Intro.rst
new file mode 100644
index 000000000000..ca2b931be6c5
--- /dev/null
+++ b/Documentation/translations/zh_TW/process/1.Intro.rst
@@ -0,0 +1,199 @@
+.. SPDX-License-Identifier: GPL-2.0
+
+.. include:: ../disclaimer-zh_TW.rst
+
+:Original: :ref:`Documentation/process/1.Intro.rst <development_process_intro>`
+
+:Translator:
+
+ 時奎亮 Alex Shi <alex.shi@linux.alibaba.com>
+
+:校譯:
+
+ 吳想成 Wu XiangCheng <bobwxc@email.cn>
+ 胡皓文 Hu Haowen <src.res@email.cn>
+
+.. _tw_development_process_intro:
+
+引言
+====
+
+內容提要
+--------
+
+本節的其餘部分涵蓋了內核開發的過程，以及開發人員及其僱主在這方面可能遇到的
+各種問題。有很多原因使內核代碼應被合併到正式的（「主線」）內核中，包括對用戶
+的自動可用性、多種形式的社區支持以及影響內核開發方向的能力。提供給Linux內核
+的代碼必須在與GPL兼容的許可證下可用。
+
+:ref:`tw_development_process` 介紹了開發過程、內核發布周期和合併窗口的機制。
+涵蓋了補丁開發、審查和合併周期中的各個階段。還有一些關於工具和郵件列表的討論？
+鼓勵希望開始內核開發的開發人員跟蹤並修復缺陷以作爲初步練習。
+
+
+:ref:`tw_development_early_stage` 包括項目的早期規劃，重點是儘快讓開發社區
+參與進來。
+
+:ref:`tw_development_coding` 是關於編程過程的；介紹了其他開發人員遇到的幾個
+陷阱。也涵蓋了對補丁的一些要求，並且介紹了一些工具，這些工具有助於確保內核
+補丁是正確的。
+
+:ref:`tw_development_posting` 描述發布補丁以供評審的過程。爲了讓開發社區能
+認真對待，補丁必須被正確格式化和描述，並且必須發送到正確的地方。遵循本節中的
+建議有助於確保您的工作能被較好地接納。
+
+:ref:`tw_development_followthrough` 介紹了發布補丁之後發生的事情；工作在這時
+還遠遠沒有完成。與審閱者一起工作是開發過程中的一個重要部分；本節提供了一些
+關於如何在這個重要階段避免問題的提示。當補丁被合併到主線中時，開發人員要注意
+不要假定任務已經完成。
+
+:ref:`tw_development_advancedtopics` 介紹了兩個「高級」主題：使用Git管理補丁
+和查看其他人發布的補丁。
+
+:ref:`tw_development_conclusion` 總結了有關內核開發的更多信息，附帶有相關資源
+連結。
+
+這個文檔是關於什麼的
+--------------------
+
+Linux內核有超過800萬行代碼，每個版本的貢獻者超過1000人，是現存最大、最活躍的
+免費軟體項目之一。從1991年開始，這個內核已經發展成爲一個最好的作業系統組件，
+運行在袖珍數位音樂播放器、桌上型電腦、現存最大的超級計算機以及所有類型的系統上。
+它是一種適用於幾乎任何情況的健壯、高效和可擴展的解決方案。
+
+隨著Linux的發展，希望參與其開發的開發人員（和公司）的數量也在增加。硬體供應商
+希望確保Linux能夠很好地支持他們的產品，使這些產品對Linux用戶具有吸引力。嵌入
+式系統供應商使用Linux作爲集成產品的組件，希望Linux能夠儘可能地勝任手頭的任務。
+分銷商和其他基於Linux的軟體供應商切實關心Linux內核的功能、性能和可靠性。最終
+用戶也常常希望修改Linux，使之能更好地滿足他們的需求。
+
+Linux最引人注目的特性之一是這些開發人員可以訪問它；任何具備必要技能的人都可以
+改進Linux並影響其開發方向。專有產品不能提供這種開放性，這是自由軟體的一個特點。
+如果有什麼不同的話，那就是內核比大多數其他自由軟體項目更開放。一個典型的三個
+月內核開發周期可以涉及1000多個開發人員，他們爲100多個不同的公司（或者根本不
+隸屬公司）工作。
+
+與內核開發社區合作並不是特別困難。但儘管如此，仍有許多潛在的貢獻者在嘗試做
+內核工作時遇到了困難。內核社區已經發展出自己獨特的操作方式，使其能夠在每天
+都要更改數千行代碼的環境中順利運行（並生成高質量的產品）。因此，Linux內核開發
+過程與專有的開發模式有很大的不同也就不足爲奇了。
+
+對於新開發人員來說，內核的開發過程可能會讓人感到奇怪和恐懼，但這背後有充分的
+理由和堅實的經驗。一個不了解內核社區工作方式的開發人員（或者更糟的是，他們
+試圖拋棄或規避之）會得到令人沮喪的體驗。開發社區在幫助那些試圖學習的人的同時，
+沒有時間幫助那些不願意傾聽或不關心開發過程的人。
+
+希望閱讀本文的人能夠避免這種令人沮喪的經歷。這些材料很長，但閱讀它們時所做的
+努力會在短時間內得到回報。開發社區總是需要能讓內核變更好的開發人員；下面的
+文字應該幫助您或爲您工作的人員加入我們的社區。
+
+致謝
+----
+
+本文檔由Jonathan Corbet <corbet@lwn.net> 撰寫。以下人員的建議使之更爲完善：
+Johannes Berg, James Berry, Alex Chiang, Roland Dreier, Randy Dunlap,
+Jake Edge, Jiri Kosina, Matt Mackall, Arthur Marsh, Amanda McPherson,
+Andrew Morton, Andrew Price, Tsugikazu Shibata 和 Jochen Voß 。
+
+這項工作得到了Linux基金會的支持，特別感謝Amanda McPherson，他看到了這項工作
+的價值並將其變成現實。
+
+代碼進入主線的重要性
+--------------------
+
+有些公司和開發人員偶爾會想，爲什麼他們要費心學習如何與內核社區合作，並將代碼
+放入主線內核（「主線」是由Linus Torvalds維護的內核，Linux發行商將其用作基礎）。
+在短期內，貢獻代碼看起來像是一種可以避免的開銷；維護獨立代碼並直接支持用戶
+似乎更容易。事實上，保持代碼獨立（「樹外」）是在經濟上是錯誤的。
+
+爲了說明樹外代碼成本，下面給出內核開發過程的一些相關方面；本文稍後將更詳細地
+討論其中的大部分內容。請考慮：
+
+- 所有Linux用戶都可以使用合併到主線內核中的代碼。它將自動出現在所有啓用它的
+  發行版上。無需驅動程序磁碟、額外下載，也不需要爲多個發行版的多個版本提供
+  支持；這一切將方便所有開發人員和用戶。併入主線解決了大量的分發和支持問題。
+
+- 當內核開發人員努力維護一個穩定的用戶空間接口時，內核內部API處於不斷變化之中。
+  不維持穩定的內部接口是一個慎重的設計決策；它允許在任何時候進行基本的改進，
+  並產出更高質量的代碼。但該策略導致結果是，若要使用新的內核，任何樹外代碼都
+  需要持續的維護。維護樹外代碼會需要大量的工作才能使代碼保持正常運行。
+
+  相反，位於主線中的代碼不需要這樣做，因爲基本規則要求進行API更改的任何開發
+  人員也必須修復由於該更改而破壞的任何代碼。因此，合併到主線中的代碼大大降低
+  了維護成本。
+
+- 除此之外，內核中的代碼通常會被其他開發人員改進。您授權的用戶社區和客戶對您
+  產品的改進可能會令人驚喜。
+
+- 內核代碼在合併到主線之前和之後都要經過審查。無論原始開發人員的技能有多強，
+  這個審查過程總是能找到改進代碼的方法。審查經常發現嚴重的錯誤和安全問題。
+  對於在封閉環境中開發的代碼尤其如此；這種代碼從外部開發人員的審查中獲益匪淺。
+  樹外代碼是低質量代碼。
+
+- 參與開發過程是您影響內核開發方向的方式。旁觀者的抱怨會被聽到，但是活躍的
+  開發人員有更強的聲音——並且能夠實現使內核更好地滿足其需求的更改。
+
+- 當單獨維護代碼時，總是存在第三方爲類似功能提供不同實現的可能性。如果發生
+  這種情況，合併代碼將變得更加困難——甚至成爲不可能。之後，您將面臨以下令人
+  不快的選擇：（1）無限期地維護樹外的非標準特性，或（2）放棄代碼並將用戶遷移
+  到樹內版本。
+
+- 代碼的貢獻是使整個流程工作的根本。通過貢獻代碼，您可以向內核添加新功能，並
+  提供其他內核開發人員使用的功能和示例。如果您已經爲Linux開發了代碼（或者正在
+  考慮這樣做），那麼您顯然對這個平台的持續成功感興趣；貢獻代碼是確保成功的
+  最好方法之一。
+
+上述所有理由都適用於任何樹外內核代碼，包括以專有的、僅二進位形式分發的代碼。
+然而，在考慮任何類型的純二進位內核代碼分布之前，還需要考慮其他因素。包括：
+
+- 圍繞專有內核模塊分發的法律問題其實較爲模糊；相當多的內核版權所有者認爲，
+  大多數僅二進位的模塊是內核的派生產品，因此，它們的分發違反了GNU通用公共
+  許可證（下面將詳細介紹）。本文作者不是律師，本文檔中的任何內容都不可能被
+  視爲法律建議。封閉原始碼模塊的真實法律地位只能由法院決定。但不管怎樣，困擾
+  這些模塊的不確定性仍然存在。
+
+- 二進位模塊大大增加了調試內核問題的難度，以至於大多數內核開發人員甚至都不會
+  嘗試。因此，只分發二進位模塊將使您的用戶更難從社區獲得支持。
+
+- 對於僅二進位的模塊的發行者來說，支持也更加困難，他們必須爲他們希望支持的
+  每個發行版和每個內核版本提供不同版本的模塊。爲了提供較爲全面的覆蓋範圍，
+  可能需要一個模塊的幾十個構建，並且每次升級內核時，您的用戶都必須單獨升級
+  這些模塊。
+
+- 上面提到的關於代碼評審的所有問題都更加存在於封閉原始碼中。由於該代碼根本
+  不可得，因此社區無法對其進行審查，毫無疑問，它將存在嚴重問題。
+
+尤其是嵌入式系統的製造商，可能會傾向於忽視本節中所說的大部分內容；因爲他們
+相信自己正在商用一種使用凍結內核版本的獨立產品，在發布後不需要再進行開發。
+這個論點忽略了廣泛的代碼審查的價值以及允許用戶向產品添加功能的價值。但這些
+產品的商業壽命有限，之後必須發布新版本的產品。在這一點上，代碼在主線上並得到
+良好維護的供應商將能夠更好地占位，以使新產品快速上市。
+
+許可
+----
+
+代碼是根據一些許可證提供給Linux內核的，但是所有代碼都必須與GNU通用公共許可
+證（GPLV2）的版本2兼容，該版本是覆蓋整個內核分發的許可證。在實踐中，這意味
+著所有代碼貢獻都由GPLv2（可選地，語言允許在更高版本的GPL下分發）或3子句BSD
+許可（New BSD License，譯者注）覆蓋。任何不包含在兼容許可證中的貢獻都不會
+被接受到內核中。
+
+貢獻給內核的代碼不需要（或請求）版權分配。合併到主線內核中的所有代碼都保留
+其原始所有權；因此，內核現在擁有數千個所有者。
+
+這種所有權結構也暗示著，任何改變內核許可的嘗試都註定會失敗。很少有實際情況
+可以獲得所有版權所有者的同意（或者從內核中刪除他們的代碼）。因此，尤其是在
+可預見的將來，許可證不大可能遷移到GPL的版本3。
+
+所有貢獻給內核的代碼都必須是合法的免費軟體。因此，不接受匿名（或化名）貢獻
+者的代碼。所有貢獻者都需要在他們的代碼上「sign off（簽發）」，聲明代碼可以
+在GPL下與內核一起分發。無法提供未被其所有者許可爲免費軟體的代碼，或可能爲
+內核造成版權相關問題的代碼（例如，由缺乏適當保護的反向工程工作派生的代碼）
+不能被接受。
+
+有關版權問題的提問在Linux開發郵件列表中很常見。這樣的問題通常會得到不少答案，
+但請記住，回答這些問題的人不是律師，不能提供法律諮詢。如果您有關於Linux原始碼
+的法律問題，沒有什麼可以代替諮詢了解這一領域的律師。依賴從技術郵件列表中獲得
+的答案是一件冒險的事情。
+
+
diff --git a/Documentation/translations/zh_TW/process/2.Process.rst b/Documentation/translations/zh_TW/process/2.Process.rst
new file mode 100644
index 000000000000..b01cdd3a39ae
--- /dev/null
+++ b/Documentation/translations/zh_TW/process/2.Process.rst
@@ -0,0 +1,369 @@
+.. SPDX-License-Identifier: GPL-2.0
+
+.. include:: ../disclaimer-zh_TW.rst
+
+:Original: :ref:`Documentation/process/2.Process.rst <development_process>`
+
+:Translator:
+
+ 時奎亮 Alex Shi <alex.shi@linux.alibaba.com>
+
+:校譯:
+
+ 吳想成 Wu XiangCheng <bobwxc@email.cn>
+ 胡皓文 Hu Haowen <src.res@email.cn>
+
+.. _tw_development_process:
+
+開發流程如何進行
+================
+
+90年代早期的Linux內核開發是一件相當鬆散的事情，涉及的用戶和開發人員相對較少。
+由於擁有數以百萬計的用戶羣，且每年有大約2000名開發人員參與進來，內核因此必須
+發展出許多既定流程來保證開發的順利進行。要參與到流程中來，需要對此流程的進行
+方式有一個紮實的理解。
+
+總覽
+----
+
+內核開發人員使用一個鬆散的基於時間的發布過程，每兩到三個月發布一次新的主要
+內核版本。最近的發布歷史記錄如下：
+
+	======  =================
+	5.0	2019年3月3日
+	5.1	2019年5月5日
+	5.2	2019年7月7日
+	5.3	2019年9月15日
+	5.4	2019年11月24日
+	5.5	2020年1月6日
+	======  =================
+
+每個5.x版本都是一個主要的內核版本，具有新特性、內部API更改等等。一個典型的5.x
+版本包含大約13000個變更集，變更了幾十萬行代碼。因此，5.x是Linux內核開發的前
+沿；內核使用滾動開發模型，不斷集成重大變化。
+
+對於每個版本的補丁合併，遵循一個相對簡單的規則。在每個開發周期的開頭，「合併
+窗口」被打開。這時，被認爲足夠穩定（並且被開發社區接受）的代碼被合併到主線內
+核中。在這段時間內，新開發周期的大部分變更（以及所有主要變更）將以接近每天
+1000次變更（「補丁」或「變更集」）的速度合併。
+
+（順便說一句，值得注意的是，合併窗口期間集成的更改並不是憑空產生的；它們是經
+提前收集、測試和分級的。稍後將詳細描述該過程的工作方式。）
+
+合併窗口持續大約兩周。在這段時間結束時，LinusTorvalds將聲明窗口已關閉，並
+釋放第一個「rc」內核。例如，對於目標爲5.6的內核，在合併窗口結束時發生的釋放
+將被稱爲5.6-rc1。-rc1 版本是一個信號，表示合併新特性的時間已經過去，穩定下一
+個內核的時間已經到來。
+
+在接下來的6到10周內，只有修復問題的補丁才應該提交給主線。有時會允許更大的
+更改，但這種情況很少發生；試圖在合併窗口外合併新功能的開發人員往往受不到
+友好的接待。一般來說，如果您錯過了給定特性的合併窗口，最好的做法是等待下一
+個開發周期。（偶爾會對未支持硬體的驅動程序進行例外；如果它們不改變已有代碼，
+則不會導致回歸，應該可以隨時被安全地加入）。
+
+隨著修復程序進入主線，補丁速度將隨著時間的推移而變慢。Linus大約每周發布一次
+新的-rc內核；在內核被認爲足夠穩定並最終發布前，一般會達到-rc6到-rc9之間。
+然後，整個過程又重新開始了。
+
+例如，這裡是5.4的開發周期進行情況（2019年）:
+
+	==============  ==============================
+	九月 15         5.3 穩定版發布
+	九月 30         5.4-rc1 合併窗口關閉
+	十月 6          5.4-rc2
+	十月 13         5.4-rc3
+	十月 20         5.4-rc4
+	十月 27         5.4-rc5
+	十一月 3        5.4-rc6
+	十一月 10       5.4-rc7
+	十一月 17       5.4-rc8
+	十一月 24       5.4 穩定版發布
+	==============  ==============================
+
+開發人員如何決定何時結束開發周期並創建穩定版本？最重要的指標是以前版本的
+回歸列表。不歡迎出現任何錯誤，但是那些破壞了以前能工作的系統的錯誤被認爲是
+特別嚴重的。因此，導致回歸的補丁是不受歡迎的，很可能在穩定期內刪除。
+
+開發人員的目標是在穩定發布之前修復所有已知的回歸。在現實世界中，這種完美是
+很難實現的；在這種規模的項目中，變數太多了。需要說明的是，延遲最終版本只會
+使問題變得更糟；等待下一個合併窗口的更改將變多，導致下次出現更多的回歸錯誤。
+因此，大多數5.x內核都有一些已知的回歸錯誤，不過，希望沒有一個是嚴重的。
+
+一旦一個穩定的版本發布，它的持續維護工作就被移交給「穩定團隊」，目前由
+Greg Kroah-Hartman領導。穩定團隊將使用5.x.y編號方案不定期地發布穩定版本的
+更新。要合入更新版本，補丁必須（1）修復一個重要的缺陷，且（2）已經合併到
+下一個開發版本主線中。內核通常會在其初始版本後的一個以上的開發周期內收到
+穩定版更新。例如，5.2內核的歷史如下（2019年）：
+
+	==============  ===============================
+        七月 7 	        5.2 穩定版發布
+	七月 13	        5.2.1
+	七月 21	        5.2.2
+	七月 26	        5.2.3
+	七月 28	        5.2.4
+	七月 31	        5.2.5
+	...	        ...
+	十月 11         5.2.21
+	==============  ===============================
+
+5.2.21是5.2版本的最終穩定更新。
+
+有些內核被指定爲「長期」內核；它們將得到更長時間的支持。在本文中，當前的長期
+內核及其維護者是：
+
+	======  ================================	================
+	3.16	Ben Hutchings				（長期穩定內核）
+	4.4	Greg Kroah-Hartman & Sasha Levin	（長期穩定內核）
+	4.9	Greg Kroah-Hartman & Sasha Levin
+	4.14	Greg Kroah-Hartman & Sasha Levin
+	4.19	Greg Kroah-Hartman & Sasha Levin
+	5.4	Greg Kroah-Hartman & Sasha Levin
+	======  ================================	================
+
+長期支持內核的選擇純粹是維護人員是否有需求和時間來維護該版本的問題。
+目前還沒有爲即將發布的任何特定版本提供長期支持的已知計劃。
+
+補丁的生命周期
+--------------
+
+補丁不會直接從開發人員的鍵盤進入主線內核。相反，有一個稍微複雜（如果有些非
+正式）的過程，旨在確保對每個補丁進行質量審查，並確保每個補丁實現了一個在主線
+中需要的更改。對於小的修復，這個過程可能會很快完成，，而對於較大或有爭議的
+變更，可能會持續數年。許多開發人員的沮喪來自於對這個過程缺乏理解或者試圖繞過它。
+
+爲了減少這種挫敗，本文將描述補丁如何進入內核。下面的介紹以一種較爲理想化的
+方式描述了這個過程。更詳細的過程將在後面的章節中介紹。
+
+補丁通常要經歷以下階段：
+
+- 設計。這就是補丁的真正需求——以及滿足這些需求的方式——所在。設計工作通常
+  是在不涉及社區的情況下完成的，但是如果可能的話，最好是在公開的情況下完成
+  這項工作；這樣可以節省很多稍後再重新設計的時間。
+
+- 早期評審。補丁被發布到相關的郵件列表中，列表中的開發人員會回復他們可能有
+  的任何評論。如果一切順利的話，這個過程應該會發現補丁的任何主要問題。
+
+- 更廣泛的評審。當補丁接近準備好納入主線時，它應該被相關的子系統維護人員
+  接受——儘管這種接受並不能保證補丁會一直延伸到主線。補丁將出現在維護人員的
+  子系統樹中，並進入 -next 樹（如下所述）。當流程進行時，此步驟將會對補丁
+  進行更廣泛的審查，並發現由於將此補丁與其他人所做的工作合併而導致的任何
+  問題。
+
+- 請注意，大多數維護人員也有日常工作，因此合併補丁可能不是他們的最優先工作。
+  如果您的補丁得到了需要更改的反饋，那麼您應該進行這些更改，或者解釋爲何
+  不應該進行這些更改。如果您的補丁沒有評審意見，也沒有被其相應的子系統或
+  驅動程序維護者接受，那麼您應該堅持不懈地將補丁更新到當前內核使其可被正常
+  應用，並不斷地發送它以供審查和合併。
+
+- 合併到主線。最終，一個成功的補丁將被合併到由LinusTorvalds管理的主線存儲庫
+  中。此時可能會出現更多的評論和/或問題；對開發人員來說應對這些問題並解決
+  出現的任何問題仍很重要。
+
+- 穩定版發布。大量用戶可能受此補丁影響，因此可能再次出現新的問題。
+
+- 長期維護。雖然開發人員在合併代碼後可能會忘記代碼，但這種行爲往往會給開發
+  社區留下不良印象。合併代碼消除了一些維護負擔，因爲其他人將修復由API更改
+  引起的問題。但是，如果代碼要長期保持可用，原始開發人員應該繼續爲代碼負責。
+
+內核開發人員（或他們的僱主）犯的最大錯誤之一是試圖將流程簡化爲一個「合併到
+主線」步驟。這種方法總是會讓所有相關人員感到沮喪。
+
+補丁如何進入內核
+----------------
+
+只有一個人可以將補丁合併到主線內核存儲庫中：LinusTorvalds。但是，在進入
+2.6.38內核的9500多個補丁中，只有112個（大約1.3%）是由Linus自己直接選擇的。
+內核項目已經發展到一個沒有一個開發人員可以在沒有支持的情況下檢查和選擇每個
+補丁的規模。內核開發人員處理這種增長的方式是使用圍繞信任鏈構建的助理系統。
+
+內核代碼庫在邏輯上被分解爲一組子系統：網絡、特定體系結構支持、內存管理、視
+頻設備等。大多數子系統都有一個指定的維護人員，其總體負責該子系統中的代碼。
+這些子系統維護者（鬆散地）是他們所管理的內核部分的「守門員」；他們（通常）
+會接受一個補丁以包含到主線內核中。
+
+子系統維護人員每個人都管理著自己版本的內核原始碼樹，通常（並非總是）使用Git。
+Git等工具（以及Quilt或Mercurial等相關工具）允許維護人員跟蹤補丁列表，包括作者
+信息和其他元數據。在任何給定的時間，維護人員都可以確定他或她的存儲庫中的哪
+些補丁在主線中找不到。
+
+當合併窗口打開時，頂級維護人員將要求Linus從存儲庫中「拉出」他們爲合併選擇
+的補丁。如果Linus同意，補丁流將流向他的存儲庫，成爲主線內核的一部分。
+Linus對拉取中接收到的特定補丁的關注程度各不相同。很明顯，有時他看起來很
+關注。但是一般來說，Linus相信子系統維護人員不會向上游發送壞補丁。
+
+子系統維護人員反過來也可以從其他維護人員那裡獲取補丁。例如，網絡樹是由首先
+在專用於網絡設備驅動程序、無線網絡等的樹中積累的補丁構建的。此存儲鏈可以
+任意長，但很少超過兩個或三個連結。由於鏈中的每個維護者都信任那些管理較低
+級別樹的維護者，所以這個過程稱爲「信任鏈」。
+
+顯然，在這樣的系統中，獲取內核補丁取決於找到正確的維護者。直接向Linus發送
+補丁通常不是正確的方法。
+
+Next 樹
+-------
+
+子系統樹鏈引導補丁流到內核，但它也提出了一個有趣的問題：如果有人想查看爲
+下一個合併窗口準備的所有補丁怎麼辦？開發人員將感興趣的是，還有什麼其他的
+更改有待解決，以了解是否存在需要擔心的衝突；例如，更改核心內核函數原型的
+修補程序將與使用該函數舊形式的任何其他修補程序衝突。審查人員和測試人員希望
+在所有這些變更到達主線內核之前，能夠訪問它們的集成形式的變更。您可以從所有
+相關的子系統樹中提取更改，但這將是一項複雜且容易出錯的工作。
+
+解決方案以-next樹的形式出現，在這裡子系統樹被收集以供測試和審查。這些樹中
+由Andrew Morton維護的較老的一個，被稱爲「-mm」（用於內存管理，創建時爲此）。
+-mm 樹集成了一長串子系統樹中的補丁；它還包含一些旨在幫助調試的補丁。
+
+除此之外，-mm 還包含大量由Andrew直接選擇的補丁。這些補丁可能已經發布在郵件
+列表上，或者它們可能應用於內核中未指定子系統樹的部分。同時，-mm 作爲最後
+手段的子系統樹；如果沒有其他明顯的路徑可以讓補丁進入主線，那麼它很可能最
+終選擇-mm 樹。累積在-mm 中的各種補丁最終將被轉發到適當的子系統樹，或者直接
+發送到Linus。在典型的開發周期中，大約5-10%的補丁通過-mm 進入主線。
+
+當前-mm 補丁可在「mmotm」（-mm of the moment）目錄中找到：
+
+        https://www.ozlabs.org/~akpm/mmotm/
+
+然而，使用MMOTM樹可能會十分令人頭疼；它甚至可能無法編譯。
+
+下一個周期補丁合併的主要樹是linux-next，由Stephen Rothwell 維護。根據設計
+linux-next 是下一個合併窗口關閉後主線的快照。linux-next樹在Linux-kernel 和
+Linux-next 郵件列表中發布，可從以下位置下載：
+
+        https://www.kernel.org/pub/linux/kernel/next/
+
+Linux-next 已經成爲內核開發過程中不可或缺的一部分；在一個給定的合併窗口中合併
+的所有補丁都應該在合併窗口打開之前的一段時間內找到進入Linux-next 的方法。
+
+Staging 樹
+----------
+
+內核原始碼樹包含drivers/staging/目錄，其中有許多驅動程序或文件系統的子目錄
+正在被添加到內核樹中。它們在仍然需要更多的修正的時候可以保留在driver/staging/
+目錄中；一旦完成，就可以將它們移到內核中。這是一種跟蹤不符合Linux內核編碼或
+質量標準的驅動程序的方法，人們可能希望使用它們並跟蹤開發。
+
+Greg Kroah Hartman 目前負責維護staging 樹。仍需要修正的驅動程序將發送給他，
+每個驅動程序在drivers/staging/中都有自己的子目錄。除了驅動程序源文件之外，
+目錄中還應該有一個TODO文件。TODO文件列出了驅動程序需要接受的暫停的工作，
+以及驅動程序的任何補丁都應該抄送的人員列表。當前的規則要求，staging的驅動
+程序必須至少正確編譯。
+
+Staging 是一種讓新的驅動程序進入主線的相對容易的方法，它們會幸運地引起其他
+開發人員的注意，並迅速改進。然而，進入staging並不是故事的結尾；staging中
+沒有看到常規進展的代碼最終將被刪除。經銷商也傾向於相對不願意使用staging驅動
+程序。因此，在成爲一個合適的主線驅動的路上，staging 僅是一個中轉站。
+
+工具
+----
+
+從上面的文本可以看出，內核開發過程在很大程度上依賴於在不同方向上聚集補丁的
+能力。如果沒有適當強大的工具，整個系統將無法在任何地方正常工作。關於如何使用
+這些工具的教程遠遠超出了本文檔的範圍，但還是用一點篇幅介紹一些關鍵點。
+
+到目前爲止，內核社區使用的主要原始碼管理系統是git。Git是在自由軟體社區中開發
+的許多分布式版本控制系統之一。它非常適合內核開發，因爲它在處理大型存儲庫和
+大量補丁時性能非常好。它也以難以學習和使用而著稱，儘管隨著時間的推移它變得
+更好了。對於內核開發人員來說，對Git的某種熟悉幾乎是一種要求；即使他們不將它
+用於自己的工作，他們也需要Git來跟上其他開發人員（以及主線）正在做的事情。
+
+現在幾乎所有的Linux發行版都打包了Git。Git主頁位於：
+
+        https://git-scm.com/
+
+此頁面包含了文檔和教程的連結。
+
+在不使用git的內核開發人員中，最流行的選擇幾乎肯定是Mercurial：
+
+        http://www.seleric.com/mercurial/
+
+Mercurial與Git共享許多特性，但它提供了一個界面，許多人覺得它更易於使用。
+
+另一個值得了解的工具是Quilt:
+
+        https://savannah.nongnu.org/projects/quilt
+
+Quilt 是一個補丁管理系統，而不是原始碼管理系統。它不會隨著時間的推移跟蹤歷史；
+相反，它面向根據不斷發展的代碼庫跟蹤一組特定的更改。一些主要的子系統維護人員
+使用Quilt來管理打算向上游移動的補丁。對於某些樹的管理（例如-mm），quilt 是
+最好的工具。
+
+郵件列表
+--------
+
+大量的Linux內核開發工作是通過郵件列表完成的。如果不加入至少一個某個列表，
+就很難成爲社區中的一個「全功能」成員。但是，Linux郵件列表對開發人員來說也是
+一個潛在的危險，他們可能會被一堆電子郵件淹沒、違反Linux列表上使用的約定，
+或者兩者兼而有之。
+
+大多數內核郵件列表都在vger.kernel.org上運行；主列表位於：
+
+        http://vger.kernel.org/vger-lists.html
+
+不過，也有一些列表託管在別處；其中一些列表位於
+redhat.com/mailman/listinfo。
+
+當然，內核開發的核心郵件列表是linux-kernel。這個列表是一個令人生畏的地方：
+每天的信息量可以達到500條，噪音很高，談話技術性很強，且參與者並不總是表現出
+高度的禮貌。但是，沒有其他地方可以讓內核開發社區作爲一個整體聚集在一起；
+不使用此列表的開發人員將錯過重要信息。
+
+以下一些提示可以幫助在linux-kernel生存：
+
+- 將郵件轉移到單獨的文件夾，而不是主郵箱文件夾。我們必須能夠持續地忽略洪流。
+
+- 不要試圖跟上每一次談話——沒人會這樣。重要的是要篩選感興趣的主題（但請注意
+  長時間的對話可能會偏離原來的主題，儘管未改變電子郵件的主題）和參與的人。
+
+- 不要回復挑事的人。如果有人試圖激起憤怒，請忽略他們。
+
+- 當回復Linux內核電子郵件（或其他列表上的電子郵件）時，請爲所有相關人員保留
+  Cc: 抄送頭。如果沒有確實的理由（如明確的請求），則不應刪除收件人。一定要
+  確保你要回復的人在抄送列表中。這個慣例也使你不必在回覆郵件時明確要求被抄送。
+
+- 在提出問題之前，搜索列表存檔（和整個網絡）。有些開發人員可能會對那些顯然
+  沒有完成家庭作業的人感到不耐煩。
+
+- 避免頂部回復（把你的答案放在你要回復的引文上面的做法）。這會讓你的回答更難
+  理解，印象也很差。
+
+- 在正確的郵件列表發問。linux-kernel 可能是通用的討論場所，但它不是尋找所有
+  子系統開發人員的最佳場所。
+
+最後一點——找到正確的郵件列表——是開發人員常出錯的地方。在linux-kernel上
+提出與網絡相關的問題的人幾乎肯定會收到一個禮貌的建議，轉到netdev列表上提出，
+因爲這是大多數網絡開發人員經常出現的列表。還有其他列表可用於scsi、video4linux、
+ide、filesystem等子系統。查找郵件列表的最佳位置是與內核原始碼一起打包的
+MAINTAINERS文件。
+
+開始內核開發
+------------
+
+關於如何開始內核開發過程的問題很常見——個人和公司皆然。同樣常見的是失誤，這
+使得關係的開始比本應的更困難。
+
+公司通常希望聘請知名的開發人員來啓動開發團隊。實際上，這是一種有效的技術。
+但它也往往是昂貴的，而且對增加有經驗的內核開發人員的數量沒有多大幫助。考
+慮到時間投入，可以讓內部開發人員加快Linux內核的開發速度。利用這段時間可以
+讓僱主擁有一批既了解內核又了解公司的開發人員，還可以幫助培訓其他人。從中期
+來看，這通常是更有利可圖的方法。
+
+可以理解的是，單個開發人員往往對起步感到茫然。從一個大型項目開始可能會很
+嚇人；人們往往想先用一些較小的東西來試試水。由此，一些開發人員開始創建修補
+拼寫錯誤或輕微編碼風格問題的補丁。不幸的是，這樣的補丁會產生一定程度的噪音，
+這會分散整個開發社區的注意力，因此，它們越來越被人不看重。希望向社區介紹
+自己的新開發人員將無法通過這些方式獲得他們期待的反響。
+
+Andrew Morton 爲有抱負的內核開發人員提供了如下建議
+
+::
+
+	所有內核開發者的第一個項目肯定應該是「確保內核在您可以操作的所有
+	機器上始終完美運行」。通常的方法是和其他人一起解決問題（這可能需
+	要堅持！），但就是如此——這是內核開發的一部分。
+
+(http://lwn.net/articles/283982/)
+
+在沒有明顯問題需要解決的情況下，通常建議開發人員查看當前的回歸和開放缺陷
+列表。從來都不缺少需要解決的問題；通過解決這些問題，開發人員將從該過程獲得
+經驗，同時與開發社區的其他成員建立相互尊重。
+
diff --git a/Documentation/translations/zh_TW/process/3.Early-stage.rst b/Documentation/translations/zh_TW/process/3.Early-stage.rst
new file mode 100644
index 000000000000..ab2a45fd65a4
--- /dev/null
+++ b/Documentation/translations/zh_TW/process/3.Early-stage.rst
@@ -0,0 +1,172 @@
+.. SPDX-License-Identifier: GPL-2.0
+
+.. include:: ../disclaimer-zh_TW.rst
+
+:Original: :ref:`Documentation/process/3.Early-stage.rst <development_early_stage>`
+
+:Translator:
+
+ 時奎亮 Alex Shi <alex.shi@linux.alibaba.com>
+
+:校譯:
+
+ 吳想成 Wu XiangCheng <bobwxc@email.cn>
+ 胡皓文 Hu Haowen <src.res@email.cn>
+
+.. _tw_development_early_stage:
+
+早期規劃
+========
+
+當考慮一個Linux內核開發項目時，很可能會直接跳進去開始編碼。然而，與任何重要
+的項目一樣，許多成功的基礎最好是在第一行代碼編寫之前就打下。在早期計劃和
+溝通中花費一些時間可以在之後節省更多的時間。
+
+搞清問題
+--------
+
+與任何工程項目一樣，成功的內核改善從清晰描述要解決的問題開始。在某些情況
+下，這個步驟很容易：例如當某個特定硬體需要驅動程序時。不過，在其他情況下，
+很容易將實際問題與建議的解決方案混在一起，這可能會導致麻煩。
+
+舉個例子：幾年前，Linux音頻的開發人員尋求一種方法來運行應用程式，而不會因
+系統延遲過大而導致退出或其他問題。他們得到的解決方案是一個連接到Linux安全
+模塊（LSM）框架中的內核模塊；這個模塊可以配置爲允許特定的應用程式訪問實時
+調度程序。這個模塊被實現並發到linux-kernel郵件列表，在那裡它立即遇到了麻煩。
+
+對於音頻開發人員來說，這個安全模塊足以解決他們當前的問題。但是，對於更廣泛的
+內核社區來說，這被視爲對LSM框架的濫用（LSM框架並不打算授予他們原本不具備的
+進程特權），並對系統穩定性造成風險。他們首選的解決方案包括短期的通過rlimit
+機制進行實時調度訪問，以及長期的減少延遲的工作。
+
+然而，音頻社區無法超越他們實施的特定解決方案來看問題；他們不願意接受替代方案。
+由此產生的分歧使這些開發人員對整個內核開發過程感到失望；其中一個開發人員返回
+到audio列表並發布了以下內容：
+
+	有很多非常好的Linux內核開發人員，但他們往往會被一羣傲慢的傻瓜所壓倒。
+	試圖向這些人傳達用戶需求是浪費時間。他們太「聰明」了，根本聽不到少數
+	人的話。
+
+（http://lwn.net/articles/131776/）
+
+實際情況卻是不同的；與特定模塊相比，內核開發人員更關心系統穩定性、長期維護
+以及找到問題的正確解決方案。這個故事的寓意是把重點放在問題上——而不是具體的
+解決方案上——並在開始編寫代碼之前與開發社區討論這個問題。
+
+因此，在考慮一個內核開發項目時，我們應該得到一組簡短問題的答案：
+
+ - 需要解決的問題究竟是什麼？
+
+ - 受此問題影響的用戶有哪些？解決方案應該解決哪些使用案例？
+
+ - 內核現在爲何沒能解決這個問題？
+
+只有這樣，才能開始考慮可能的解決方案。
+
+
+早期討論
+--------
+
+在計劃內核開發項目時，在開始實施之前與社區進行討論是很有意義的。早期溝通可以
+通過多種方式節省時間和麻煩：
+
+ - 很可能問題是由內核以您不理解的方式解決的。Linux內核很大，具有許多不明顯
+   的特性和功能。並不是所有的內核功能都像人們所希望的那樣有文檔記錄，而且很
+   容易遺漏一些東西。某作者發布了一個完整的驅動程序，重複了一個其不
+   知道的現有驅動程序。重新發明現有輪子的代碼不僅浪費，而且不會被接受到主線
+   內核中。
+
+ - 建議的解決方案中可能有一些要素不適合併入主線。在編寫代碼之前，最好先了解
+   這樣的問題。
+
+ - 其他開發人員完全有可能考慮過這個問題；他們可能有更好的解決方案的想法，並且
+   可能願意幫助創建這個解決方案。
+
+在內核開發社區的多年經驗給了我們一個明確的教訓：閉門設計和開發的內核代碼總是
+有一些問題，這些問題只有在代碼發布到社區中時才會被發現。有時這些問題很嚴重，
+需要數月或數年的努力才能使代碼達到內核社區的標準。例如：
+
+ - 設計並實現了單處理器系統的DeviceScape網絡棧。只有使其適合於多處理器系統，
+   才能將其合併到主線中。在代碼中修改鎖等等是一項困難的任務；因此，這段代碼
+   （現在稱爲mac80211）的合併被推遲了一年多。
+
+ - Reiser4文件系統包含許多功能，核心內核開發人員認爲這些功能應該在虛擬文件
+   系統層中實現。它還包括一些特性，這些特性在不將系統暴露於用戶引起的死鎖的
+   情況下是不容易實現的。這些問題過遲發現——以及拒絕處理其中一些問題——已經
+   導致Reiser4置身主線內核之外。
+
+ - Apparmor安全模塊以被認爲不安全和不可靠的方式使用內部虛擬文件系統數據結構。
+   這種擔心（包括其他）使Apparmor多年來無法進入主線。
+
+在這些情況下，與內核開發人員的早期討論，可以避免大量的痛苦和額外的工作。
+
+找誰交流？
+----------
+
+當開發人員決定公開他們的計劃時，下一個問題是：我們從哪裡開始？答案是找到正確
+的郵件列表和正確的維護者。對於郵件列表，最好的方法是在維護者(MAINTAINERS)文件
+中查找要發布的相關位置。如果有一個合適的子系統列表，那麼其上發布通常比在
+linux-kernel上發布更可取；您更有可能接觸到在相關子系統中具有專業知識的開發
+人員，並且環境可能具支持性。
+
+找到維護人員可能會有點困難。同樣，維護者文件是開始的地方。但是，該文件往往不
+是最新的，並且並非所有子系統都在那裡顯示。實際上，維護者文件中列出的人員可能
+不是當前實際擔任該角色的人員。因此，當對聯繫誰有疑問時，一個有用的技巧是使用
+git（尤其是「git-log」）查看感興趣的子系統中當前活動的用戶。看看誰在寫補丁、
+誰會在這些補丁上加上Signed-off-by行簽名（如有）。這些人將是幫助新開發項目的
+最佳人選。
+
+找到合適的維護者有時是非常具有挑戰性的，以至於內核開發人員添加了一個腳本來
+簡化這個過程：
+
+::
+
+	.../scripts/get_maintainer.pl
+
+當給定「-f」選項時，此腳本將返回指定文件或目錄的當前維護者。如果在命令行上
+給出了一個補丁，它將列出可能接收補丁副本的維護人員。有許多選項可以調節
+get_maintainer.pl搜索維護者的嚴格程度；請小心使用更激進的選項，因爲最終結果
+可能會包括對您正在修改的代碼沒有真正興趣的開發人員。
+
+如果所有其他方法都失敗了，那麼與Andrew Morton交流是跟蹤特定代碼段維護人員
+的一種有效方法。
+
+何時郵寄？
+----------
+
+如果可能的話，在早期階段發布你的計劃只會更有幫助。描述正在解決的問題以及已經
+制定的關於如何實施的任何計劃。您可以提供的任何信息都可以幫助開發社區爲項目
+提供有用的輸入。
+
+在這個階段可能發生的一件令人沮喪的事情不是得到反對意見，而是很少或根本沒有
+反饋。令人傷心的事實是：（1）內核開發人員往往很忙；（2）不缺少有宏偉計劃但
+代碼（甚至代碼設想）很少的人去支持他們；（3）沒有人有義務審查或評論別人發表
+的想法。除此之外，高層級的設計常常隱藏著一些問題，這些問題只有在有人真正嘗試
+實現這些設計時才會被發現；因此，內核開發人員寧願看到代碼。
+
+如果發布請求評論（RFC）並沒得到什麼有用的評論，不要以爲這意味著無人對此項目
+有興趣，同時你也不能假設你的想法沒有問題。在這種情況下，最好的做法是繼續進
+行，把你的進展隨時通知社區。
+
+獲得官方認可
+-----------------------
+
+如果您的工作是在公司環境中完成的，就像大多數Linux內核工作一樣；顯然，在您將
+公司的計劃或代碼發布到公共郵件列表之前，必須獲得有適當權利經理的許可。發布
+不確定是否兼容GPL的代碼尤其會帶來問題；公司的管理層和法律人員越早能夠就發布
+內核開發項目達成一致，對參與的每個人都越好。
+
+一些讀者可能會認爲他們的核心工作是爲了支持還沒有正式承認存在的產品。將僱主
+的計劃公布在公共郵件列表上可能不是一個可行的選擇。在這種情況下，有必要考慮
+保密是否真的是必要的；通常不需要把開發計劃關在門內。
+
+的確，有些情況下一家公司在開發過程的早期無法合法地披露其計劃。擁有經驗豐富
+的內核開發人員的公司可能選擇以開環的方式進行開發，前提是他們以後能夠避免
+嚴重的集成問題。對於沒有這種內部專業知識的公司，最好的選擇往往是聘請外部
+開發者根據保密協議審查計劃。Linux基金會運行了一個NDA程序，旨在幫助解決這種
+情況；更多信息參見：
+
+    http://www.linuxfoundation.org/nda/
+
+這種審查通常足以避免以後出現嚴重問題，而無需公開披露項目。
+
diff --git a/Documentation/translations/zh_TW/process/4.Coding.rst b/Documentation/translations/zh_TW/process/4.Coding.rst
new file mode 100644
index 000000000000..ccc3946227a0
--- /dev/null
+++ b/Documentation/translations/zh_TW/process/4.Coding.rst
@@ -0,0 +1,297 @@
+.. SPDX-License-Identifier: GPL-2.0
+
+.. include:: ../disclaimer-zh_TW.rst
+
+:Original: :ref:`Documentation/process/4.Coding.rst <development_coding>`
+
+:Translator:
+
+ 時奎亮 Alex Shi <alex.shi@linux.alibaba.com>
+
+:校譯:
+
+ 吳想成 Wu XiangCheng <bobwxc@email.cn>
+ 胡皓文 Hu Haowen <src.res@email.cn>
+
+.. _tw_development_coding:
+
+使代碼正確
+======================
+
+雖然一個堅實的、面向社區的設計過程有很多值得說道的，但是任何內核開發項目工作
+的證明都反映在代碼中。它是將由其他開發人員檢查併合並（或不合併）到主線樹中
+的代碼。所以這段代碼的質量決定了項目的最終成功。
+
+本節將檢查編碼過程。我們將從內核開發人員常犯的幾種錯誤開始。然後重點將轉移
+到正確的做法和相關有用的工具上。
+
+陷阱
+----
+
+代碼風格
+********
+
+內核長期以來都有其標準的代碼風格，如
+:ref:`Documentation/translations/zh_TW/process/coding-style.rst <tw_codingstyle>`
+中所述。在多數時候，該文檔中描述的準則至多被認爲是建議性的。因此，內核中存在
+大量不符合代碼風格準則的代碼。這種代碼的存在會給內核開發人員帶來兩方面的危害。
+
+首先，相信內核代碼標準並不重要，也不強制執行。但事實上，如果沒有按照標準
+編寫代碼，那麼新代碼將很難加入到內核中；許多開發人員甚至會在審查代碼之前要求
+對代碼進行重新格式化。一個像內核這麼大的代碼庫需要一些統一格式的代碼，以使
+開發人員能夠快速理解其中的任何部分。所以再也經不起奇怪格式的代碼的折騰了。
+
+內核的代碼風格偶爾會與僱主的強制風格發生衝突。在這種情況下，必須在代碼合併
+之前遵從內核代碼風格。將代碼放入內核意味著以多種方式放棄一定程度的控制權——
+包括控制代碼樣式。
+
+另一個危害是認爲已經在內核中的代碼迫切需要修復代碼樣式。開發者可能會開始編寫
+重新格式化補丁，作爲熟悉開發過程的一種方式，或者作爲將其名字寫入內核變更日誌
+的一種方式，或者兩者兼而有之。但是純代碼風格的修復被開發社區視爲噪音，它們往
+往受到冷遇。因此，最好避免編寫這種類型的補丁。在由於其他原因處理一段代碼的
+同時順帶修復其樣式是很自然的，但是不應該僅爲了更改代碼樣式而更改之。
+
+代碼風格文檔也不應該被視爲絕對不可違反的規則。如果有一個足夠的理由反對這種
+樣式（例如爲了80列限制拆分行會導致可讀性大大降低），那麼就這樣做吧。
+
+注意您還可以使用 ``clang-format`` 工具來幫助您處理這些規則，快速自動重新格式
+化部分代碼，和審閱完整的文件以發現代碼樣式錯誤、拼寫錯誤和可能的改進。它還
+可以方便地排序 ``#includes`` 、對齊變量/宏、重排文本和其他類似任務。有關詳細
+信息，請參閱文檔 :ref:`Documentation/process/clang-format.rst <clangformat>`
+
+抽象層
+******
+
+計算機科學教授教學生以靈活性和信息隱藏的名義廣泛使用抽象層。當然，內核廣泛
+地使用了抽象；任何涉及數百萬行代碼的項目都必須做到這一點以存續下來。但經驗
+表明，過度或過早的抽象可能和過早的優化一樣有害。抽象應用在適當層級，
+不要過度。
+
+簡單點，先考慮一個調用時始終只有一個參數且總爲零的函數。我們可以保留這個參數，
+以在需要使用它時提供的額外靈活性。不過，在那時實現了這個額外參數的代碼很有
+可能以某種從未被注意到的微妙方式被破壞——因爲它從未被使用過。或者當需要額外
+的靈活性時，它並未以符合程式設計師當初期望的方式來實現。內核開發人員通常會提交
+補丁來刪除未使用的參數；一般來說，一開始就不應該添加這些參數。
+
+隱藏硬體訪問的抽象層——通常爲了允許大量的驅動程序兼容多個作業系統——尤其不受
+歡迎。這樣的層使代碼變得模糊，可能會造成性能損失；它們不屬於Linux內核。
+
+另一方面，如果您發現自己從另一個內核子系統複製了大量的代碼，那麼是時候
+了解一下：是否需要將這些代碼中的部分提取到單獨的庫中，或者在更高的層次上
+實現這些功能。在整個內核中複製相同的代碼沒有價值。
+
+#ifdef 和預處理
+***************
+
+C預處理器似乎給一些C程式設計師帶來了強大的誘惑，他們認爲它是一種將大量靈活性加入
+原始碼中的方法。但是預處理器不是C，大量使用它會導致代碼對其他人來說更難閱讀，
+對編譯器來說更難檢查正確性。使用了大量預處理器幾乎總是代碼需要一些
+清理工作的標誌。
+
+使用#ifdef的條件編譯實際上是一個強大的功能，它在內核中使用。但是很少有人希望
+看到代碼被鋪滿#ifdef塊。一般規定，ifdef的使用應儘可能限制在頭文件中。條件
+編譯代碼可以限制函數，如果代碼不存在，這些函數就直接變成空的。然後編譯器將
+悄悄地優化對空函數的調用。使得代碼更加清晰，更容易理解。
+
+C預處理器宏存在許多危險性，包括可能對具有副作用且沒有類型安全的表達式進行多
+重評估。如果您試圖定義宏，請考慮創建一個內聯函數替代。結果相同的代碼，內聯
+函數更容易閱讀，不會多次計算其參數，並且允許編譯器對參數和返回值執行類型檢查。
+
+內聯函數
+********
+
+不過，內聯函數本身也存在風險。程式設計師可以傾心於避免函數調用和用內聯函數填充源
+文件所固有的效率。然而，這些功能實際上會降低性能。因爲它們的代碼在每個調用站
+點都被複製一遍，所以最終會增加編譯內核的大小。此外，這也對處理器的內存緩存
+造成壓力，從而大大降低執行速度。通常內聯函數應該非常小，而且相對較少。畢竟
+函數調用的成本並不高；大量創建內聯函數是過早優化的典型例子。
+
+一般來說，內核程式設計師會自冒風險忽略緩存效果。在數據結構課程開頭中的經典
+時間/空間權衡通常不適用於當代硬體。空間 *就是* 時間，因爲一個大的程序比一個
+更緊湊的程序運行得慢。
+
+較新的編譯器越來越激進地決定一個給定函數是否應該內聯。因此，隨意放置使用
+「inline」關鍵字可能不僅僅是過度的，也可能是無用的。
+
+鎖
+**
+
+2006年5月，「deviceescape」網絡堆棧在前呼後擁下以GPL發布，並被納入主線內核。
+這是一個受歡迎的消息；Linux中對無線網絡的支持充其量被認爲是不合格的，而
+Deviceescape堆棧承諾修復這種情況。然而直到2007年6月（2.6.22），這段代碼才真
+正進入主線。發生了什麼？
+
+這段代碼出現了許多閉門造車的跡象。但一個大麻煩是，它並不是爲多處理器系統而
+設計。在合併這個網絡堆棧（現在稱爲mac80211）之前，需要對其進行一個鎖方案的
+改造。
+
+曾經，Linux內核代碼可以在不考慮多處理器系統所帶來的並發性問題的情況下進行
+開發。然而現在，這個文檔就是在雙核筆記本電腦上寫的。即使在單處理器系統上，
+爲提高響應能力所做的工作也會提高內核內的並發性水平。編寫內核代碼而不考慮鎖
+的日子早已遠去。
+
+可以由多個線程並發訪問的任何資源（數據結構、硬體寄存器等）必須由鎖保護。新
+的代碼應該謹記這一要求；事後修改鎖是一項相當困難的任務。內核開發人員應該花
+時間充分了解可用的鎖原語，以便爲工作選擇正確的工具。對並發性缺乏關注的代碼
+很難進入主線。
+
+回歸
+****
+
+最後一個值得一提的危險是回歸：它可能會引起導致現有用戶的某些東西中斷的改變
+（這也可能會帶來很大的改進）。這種變化被稱爲「回歸」，回歸已經成爲主線內核
+最不受歡迎的問題。除了少數例外情況，如果回歸不能及時修正，會導致回歸的修改
+將被取消。最好首先避免回歸發生。
+
+人們常常爭論，如果回歸帶來的功能遠超過產生的問題，那麼回歸是否爲可接受的。
+如果它破壞了一個系統卻爲十個系統帶來新的功能，爲何不改改態度呢？2007年7月，
+Linus對這個問題給出了最佳答案:
+
+::
+
+	所以我們不會通過引入新問題來修復錯誤。這種方式是靠不住的，沒人知道
+	是否真的有進展。是前進兩步、後退一步，還是前進一步、後退兩步？
+
+（http://lwn.net/articles/243460/）
+
+特別不受歡迎的一種回歸類型是用戶空間ABI的任何變化。一旦接口被導出到用戶空間，
+就必須無限期地支持它。這一事實使得用戶空間接口的創建特別具有挑戰性：因爲它們
+不能以不兼容的方式進行更改，所以必須一次就對。因此，用戶空間接口總是需要大量
+的思考、清晰的文檔和廣泛的審查。
+
+
+代碼檢查工具
+------------
+
+至少目前，編寫無錯誤代碼仍然是我們中很少人能達到的理想狀態。不過，我們希望做
+的是，在代碼進入主線內核之前，儘可能多地捕獲並修復這些錯誤。爲此，內核開發人
+員已經提供了一系列令人印象深刻的工具，可以自動捕獲各種各樣的隱藏問題。計算機
+發現的任何問題都是一個以後不會困擾用戶的問題，因此，只要有可能，就應該使用
+自動化工具。
+
+第一步是注意編譯器產生的警告。當前版本的GCC可以檢測（並警告）大量潛在錯誤。
+通常，這些警告都指向真正的問題。提交以供審閱的代碼一般不會產生任何編譯器警告。
+在消除警告時，注意了解真正的原因，並儘量避免僅「修復」使警告消失而不解決其原因。
+
+請注意，並非所有編譯器警告都默認啓用。使用「make KCFLAGS=-W」構建內核以
+獲得完整集合。
+
+內核提供了幾個配置選項，可以打開調試功能；大多數配置選項位於「kernel hacking」
+子菜單中。對於任何用於開發或測試目的的內核，都應該啓用其中幾個選項。特別是，
+您應該打開：
+
+ - FRAME_WARN 獲取大於給定數量的堆棧幀的警告。
+   這些警告生成的輸出可能比較冗長，但您不必擔心來自內核其他部分的警告。
+
+ - DEBUG_OBJECTS 將添加代碼以跟蹤內核創建的各種對象的生命周期，並在出現問題
+   時發出警告。如果你要添加創建（和導出）關於其自己的複雜對象的子系統，請
+   考慮打開對象調試基礎結構的支持。
+
+ - DEBUG_SLAB 可以發現各種內存分配和使用錯誤；它應該用於大多數開發內核。
+
+ - DEBUG_SPINLOCK, DEBUG_ATOMIC_SLEEP 和 DEBUG_MUTEXES 會發現許多常見的
+   鎖錯誤。
+
+還有很多其他調試選項，其中一些將在下面討論。其中一些有顯著的性能影響，不應
+一直使用。在學習可用選項上花費一些時間，可能會在短期內得到許多回報。
+
+其中一個較重的調試工具是鎖檢查器或「lockdep」。該工具將跟蹤系統中每個鎖
+（spinlock或mutex）的獲取和釋放、獲取鎖的相對順序、當前中斷環境等等。然後，
+它可以確保總是以相同的順序獲取鎖，相同的中斷假設適用於所有情況等等。換句話
+說，lockdep可以找到許多導致系統死鎖的場景。在部署的系統中，這種問題可能會
+很痛苦（對於開發人員和用戶而言）；LockDep允許提前以自動方式發現問題。具有
+任何類型的非普通鎖的代碼在提交合併前應在啓用lockdep的情況下運行測試。
+
+作爲一個勤奮的內核程式設計師，毫無疑問，您將檢查任何可能失敗的操作（如內存分配）
+的返回狀態。然而，事實上，最終的故障復現路徑可能完全沒有經過測試。未測試的
+代碼往往會出問題；如果所有這些錯誤處理路徑都被執行了幾次，那麼您可能對代碼
+更有信心。
+
+內核提供了一個可以做到這一點的錯誤注入框架，特別是在涉及內存分配的情況下。
+啓用故障注入後，內存分配的可配置失敗的百分比；這些失敗可以限定在特定的代碼
+範圍內。在啓用了故障注入的情況下運行，程式設計師可以看到當情況惡化時代碼如何響
+應。有關如何使用此工具的詳細信息，請參閱
+Documentation/fault-injection/fault-injection.rst。
+
+「sparse」靜態分析工具可以發現其他類型的錯誤。sparse可以警告程式設計師用戶空間
+和內核空間地址之間的混淆、大端序與小端序的混淆、在需要一組位標誌的地方傳遞
+整數值等等。sparse必須單獨安裝(如果您的分發伺服器沒有將其打包，
+可以在 https://sparse.wiki.kernel.org/index.php/Main_page 找到）,
+然後可以通過在make命令中添加「C=1」在代碼上運行它。
+
+「Coccinelle」工具 :ref:`http://coccinelle.lip6.fr/ <devtools_coccinelle>`
+能夠發現各種潛在的編碼問題；它還可以爲這些問題提出修複方案。在
+scripts/coccinelle目錄下已經打包了相當多的內核「語義補丁」；運行
+「make coccicheck」將運行這些語義補丁並報告發現的任何問題。有關詳細信息，請參閱
+:ref:`Documentation/dev-tools/coccinelle.rst <devtools_coccinelle>`
+
+
+其他類型的可移植性錯誤最好通過爲其他體系結構編譯代碼來發現。如果沒有S/390系統
+或Blackfin開發板，您仍然可以執行編譯步驟。可以在以下位置找到一大堆用於x86系統的
+交叉編譯器：
+
+        https://www.kernel.org/pub/tools/crosstool/
+
+花一些時間安裝和使用這些編譯器將有助於避免以後的尷尬。
+
+文檔
+----
+
+文檔通常比內核開發規則更爲例外。即便如此，足夠的文檔將有助於簡化將新代碼合併
+到內核中的過程，使其他開發人員的生活更輕鬆，並對您的用戶有所幫助。在許多情況
+下，添加文檔已基本上是強制性的。
+
+任何補丁的第一個文檔是其關聯的變更日誌。日誌條目應該描述正在解決的問題、解決
+方案的形式、處理補丁的人員、對性能的任何相關影響，以及理解補丁可能需要的任何
+其他內容。確保變更日誌說明了*爲什麼*補丁值得應用；大量開發者未能提供這些信息。
+
+任何添加新用戶空間接口的代碼——包括新的sysfs或/proc文件——都應該包含該接口
+的文檔，該文檔使用戶空間開發人員能夠知道他們在使用什麼。請參閱
+Documentation/ABI/README，了解如何此文檔格式以及需要提供哪些信息。
+
+文檔 :ref:`Documentation/admin-guide/kernel-parameters.rst <kernelparameters>`
+描述了內核的所有引導時間參數。任何添加新參數的補丁都應該向該文檔添加適當的
+條目。
+
+任何新的配置選項都必須附有幫助文本，幫助文本需清楚地解釋這些選項以及用戶可能
+希望何時使用它們。
+
+許多子系統的內部API信息通過專門格式化的注釋進行記錄；這些注釋可以通過
+「kernel-doc」腳本以多種方式提取和格式化。如果您在具有kerneldoc注釋的子系統中
+工作，則應該維護它們，並根據需要爲外部可用的功能添加它們。即使在沒有如此記錄
+的領域中，爲將來添加kerneldoc注釋也沒有壞處；實際上，這對於剛開始開發內核的人
+來說是一個有用的活動。這些注釋的格式以及如何創建kerneldoc模板的一些信息可以在
+:ref:`Documentation/doc-guide/ <doc_guide>` 上找到。
+
+任何閱讀大量現有內核代碼的人都會注意到，注釋的缺失往往是最值得注意的。同時，
+對新代碼的要求比過去更高；合併未注釋的代碼將更加困難。這就是說，人們並不期望
+詳細注釋的代碼。代碼本身應該是自解釋的，注釋闡釋了更微妙的方面。
+
+某些事情應該總是被注釋。使用內存屏障時，應附上一行文字，解釋爲什麼需要設置內存
+屏障。數據結構的鎖規則通常需要在某個地方解釋。一般來說，主要數據結構需要全面
+的文檔。應該指出代碼中分立的位之間不明顯的依賴性。任何可能誘使代碼管理人進行
+錯誤的「清理」的事情都需要一個注釋來說明爲什麼要這樣做。等等。
+
+
+內部API更改
+-----------
+
+內核提供給用戶空間的二進位接口不能被破壞，除非逼不得已。而內核的內部編程接口
+是高度流動的，當需要時可以更改。如果你發現自己不得不處理一個內核API，或者僅
+僅因爲它不滿足你的需求導致無法使用特定的功能，這可能是API需要改變的一個標誌。
+作爲內核開發人員，您有權進行此類更改。
+
+的確可以進行API更改，但更改必須是合理的。因此任何進行內部API更改的補丁都應該
+附帶關於更改內容和必要原因的描述。這種變化也應該拆分成一個單獨的補丁，而不是
+埋在一個更大的補丁中。
+
+另一個要點是，更改內部API的開發人員通常要負責修復內核樹中被更改破壞的任何代碼。
+對於一個廣泛使用的函數，這個責任可以導致成百上千的變化，其中許多變化可能與其他
+開發人員正在做的工作相衝突。不用說，這可能是一項大工程，所以最好確保理由是
+可靠的。請注意，coccinelle工具可以幫助進行廣泛的API更改。
+
+在進行不兼容的API更改時，應儘可能確保編譯器捕獲未更新的代碼。這將幫助您確保找
+到該接口的樹內用處。它還將警告開發人員樹外代碼存在他們需要響應的更改。支持樹外
+代碼不是內核開發人員需要擔心的事情，但是我們也不必使樹外開發人員的生活有不必要
+的困難。
+
diff --git a/Documentation/translations/zh_TW/process/5.Posting.rst b/Documentation/translations/zh_TW/process/5.Posting.rst
new file mode 100644
index 000000000000..5578bca403e6
--- /dev/null
+++ b/Documentation/translations/zh_TW/process/5.Posting.rst
@@ -0,0 +1,251 @@
+.. SPDX-License-Identifier: GPL-2.0
+
+.. include:: ../disclaimer-zh_TW.rst
+
+:Original: :ref:`Documentation/process/5.Posting.rst <development_posting>`
+
+:Translator:
+
+ 時奎亮 Alex Shi <alex.shi@linux.alibaba.com>
+
+:校譯:
+
+ 吳想成 Wu XiangCheng <bobwxc@email.cn>
+ 胡皓文 Hu Haowen <src.res@email.cn>
+
+.. _tw_development_posting:
+
+發布補丁
+========
+
+您的工作遲早會準備好提交給社區進行審查，並最終包含到主線內核中。毫不稀奇，
+內核開發社區已經發展出一套用於發布補丁的約定和過程；遵循這些約定和過程將使
+參與其中的每個人的生活更加輕鬆。本文檔試圖描述這些約定的部分細節；更多信息
+也可在以下文檔中找到
+:ref:`Documentation/translations/zh_TW/process/submitting-patches.rst <tw_submittingpatches>`，
+:ref:`Documentation/translations/zh_TW/process/submitting-drivers.rst <tw_submittingdrivers>`
+和 :ref:`Documentation/translations/zh_TW/process/submit-checklist.rst <tw_submitchecklist>`。
+
+何時郵寄
+--------
+
+在補丁完全「準備好」之前，避免發布補丁是一種持續的誘惑。對於簡單的補丁，這
+不是問題。但是如果正在完成的工作很複雜，那麼在工作完成之前從社區獲得反饋就
+可以獲得很多好處。因此，您應該考慮發布正在進行的工作，甚至維護一個可用的Git
+樹，以便感興趣的開發人員可以隨時趕上您的工作。
+
+當發布中有尚未準備好被包含的代碼，最好在發布中說明。還應提及任何有待完成的
+主要工作和任何已知問題。很少有人會願意看那些被認爲是半生不熟的補丁，但是
+那些願意的人會帶著他們的點子來一起幫助你把工作推向正確的方向。
+
+創建補丁之前
+------------
+
+在考慮將補丁發送到開發社區之前，有許多事情應該做。包括：
+
+ - 儘可能地測試代碼。利用內核的調試工具，確保內核使用了所有可能的配置選項組合
+   進行構建，使用交叉編譯器爲不同的體系結構進行構建等。
+
+ - 確保您的代碼符合內核代碼風格指南。
+
+ - 您的更改是否具有性能影響？如果是這樣，您應該運行基準測試來顯示您的變更的
+   影響（或好處）；結果的摘要應該包含在補丁中。
+
+ - 確保您有權發布代碼。如果這項工作是爲僱主完成的，僱主對這項工作具有所有權，
+   並且必須同意根據GPL對其進行發布。
+
+一般來說，在發布代碼之前進行一些額外的思考，幾乎總是能在短時間內得到回報。
+
+補丁準備
+--------
+
+準備補丁發布的工作量可能很驚人，但在此嘗試節省時間通常是不明智的，即使在短期
+內亦然。
+
+必須針對內核的特定版本準備補丁。一般來說，補丁應該基於Linus的Git樹中的當前
+主線。當以主線爲基礎時，請從一個衆所周知的發布點開始——如穩定版本或 -rc
+版本發布點——而不是在一個任意的主線分支點。
+
+也可能需要針對-mm、linux-next或子系統樹生成版本，以便於更廣泛的測試和審查。
+根據補丁的區域以及其他地方的情況，針對其他樹建立的補丁可能需要大量的工作來
+解決衝突和處理API更改。
+
+只有最簡單的更改才應格式化爲單個補丁；其他所有更改都應作爲一系列邏輯更改進行。
+分割補丁是一門藝術；一些開發人員花了很長時間來弄清楚如何按照社區期望的方式來
+分割。不過，這些經驗法則也許有幫助：
+
+ - 您發布的補丁系列幾乎肯定不會是開發過程中版本控制系統中的一系列更改。相反，
+   需要對您所做更改的最終形式加以考慮，然後以有意義的方式進行拆分。開發人員對
+   離散的、自包含的更改感興趣，而不是您創造這些更改的原始路徑。
+
+ - 每個邏輯上獨立的變更都應該格式化爲單獨的補丁。這些更改可以是小的（如「向
+   此結構體添加欄位」）或大的（如添加一個重要的新驅動程序），但它們在概念上
+   應該是小的，並且可以在一行內簡述。每個補丁都應該做一個特定的、可以單獨
+   檢查並驗證它所做的事情的更改。
+
+ - 換種方式重申上述準則，也就是說：不要在同一補丁中混合不同類型的更改。如果
+   一個補丁修復了一個關鍵的安全漏洞，又重新排列了一些結構，還重新格式化了代
+   碼，那麼它很有可能會被忽略，從而導致重要的修復丟失。
+
+ - 每個補丁都應該能創建一個可以正確地構建和運行的內核；如果補丁系列在中間被
+   斷開，那麼結果仍應是一個正常工作的內核。部分應用一系列補丁是使用
+   「git bisct」工具查找回歸的一個常見場景；如果結果是一個損壞的內核，那麼將使
+   那些從事追蹤問題的高尚工作的開發人員和用戶的生活更加艱難。
+
+ - 不要過分分割。一位開發人員曾經將一組針對單個文件的編輯分成500個單獨的補丁
+   發布，這並沒有使他成爲內核郵件列表中最受歡迎的人。一個補丁可以相當大，
+   只要它仍然包含一個單一的 *邏輯* 變更。
+
+ - 用一系列補丁添加一個全新的基礎設施，但是該設施在系列中的最後一個補丁啓用
+   整個變更之前不能使用，這看起來很誘人。如果可能的話，應該避免這種誘惑；
+   如果這個系列增加了回歸，那麼二分法將指出最後一個補丁是導致問題的補丁，
+   即使真正的bug在其他地方。只要有可能，添加新代碼的補丁程序應該立即激活該
+   代碼。
+
+創建完美補丁系列的工作可能是一個令人沮喪的過程，在完成「真正的工作」之後需要
+花費大量的時間和思考。但是如果做得好，花費的時間就是值得的。
+
+補丁格式和更改日誌
+------------------
+
+所以現在你有了一系列完美的補丁可以發布，但是這項工作還沒有完成。每個補丁都
+需要被格式化成一條消息，以快速而清晰地將其目的傳達到世界其他地方。爲此，
+每個補丁將由以下部分組成：
+
+ - 可選的「From」行，表明補丁作者。只有當你通過電子郵件發送別人的補丁時，這一行
+   才是必須的，但是爲防止疑問加上它也不會有什麼壞處。
+
+ - 一行描述，說明補丁的作用。對於在沒有其他上下文的情況下看到該消息的讀者來說，
+   該消息應足以確定修補程序的範圍；此行將顯示在「short form（簡短格式）」變更
+   日誌中。此消息通常需要先加上子系統名稱前綴，然後是補丁的目的。例如：
+
+   ::
+
+        gpio: fix build on CONFIG_GPIO_SYSFS=n
+
+ - 一行空白，後接補丁內容的詳細描述。此描述可以是任意需要的長度；它應該說明補丁
+   的作用以及爲什麼它應該應用於內核。
+
+ - 一個或多個標記行，至少有一個由補丁作者的 Signed-off-by 簽名。標記將在下面
+   詳細描述。
+
+上面的項目一起構成補丁的變更日誌。寫一則好的變更日誌是一門至關重要但常常被
+忽視的藝術；值得花一點時間來討論這個問題。當你編寫變更日誌時，你應該記住有
+很多不同的人會讀你的話。其中包括子系統維護人員和審查人員，他們需要決定是否
+應該合併補丁，分銷商和其他維護人員試圖決定是否應該將補丁反向移植到其他內核，
+缺陷搜尋人員想知道補丁是否導致他們正在追查的問題，以及想知道內核如何變化的
+用戶等等。一個好的變更日誌以最直接和最簡潔的方式向所有這些人傳達所需的信息。
+
+在結尾，總結行應該描述變更的影響和動機，以及在一行約束條件下可能發生的變化。
+然後，詳細的描述可以詳述這些主題，並提供任何需要的附加信息。如果補丁修復了
+一個缺陷，請引用引入該缺陷的提交（如果可能，請在引用提交時同時提供其 id 和
+標題）。如果某個問題與特定的日誌或編譯器輸出相關聯，請包含該輸出以幫助其他
+人搜索同一問題的解決方案。如果更改是爲了支持以後補丁中的其他更改，那麼應當
+說明。如果更改了內部API，請詳細說明這些更改以及其他開發人員應該如何響應。
+一般來說，你越把自己放在每個閱讀你變更日誌的人的位置上，變更日誌（和內核
+作爲一個整體）就越好。
+
+不消說，變更日誌是將變更提交到版本控制系統時使用的文本。接下來將是：
+
+ - 補丁本身，採用統一的（「-u」）補丁格式。使用「-p」選項來diff將使函數名與
+   更改相關聯，從而使結果補丁更容易被其他人讀取。
+
+您應該避免在補丁中包括與更改不相關文件（例如，構建過程生成的文件或編輯器
+備份文件）。文檔目錄中的「dontdiff」文件在這方面有幫助；使用「-X」選項將
+其傳遞給diff。
+
+上面提到的標籤（tag）用於描述各種開發人員如何與這個補丁的開發相關聯。
+:ref:`Documentation/translations/zh_TW/process/submitting-patches.rst <tw_submittingpatches>`
+文檔中對它們進行了詳細描述；下面是一個簡短的總結。每一行的格式如下：
+
+::
+
+	tag: Full Name <email address>  optional-other-stuff
+
+常用的標籤有：
+
+ - Signed-off-by: 這是一個開發人員的證明，證明他或她有權提交補丁以包含到內核
+   中。這表明同意開發者來源認證協議，其全文見
+   :ref:`Documentation/translations/zh_TW/process/submitting-patches.rst <tw_submittingpatches>`
+   如果沒有合適的簽字，則不能合併到主線中。
+
+ - Co-developed-by: 聲明補丁是由多個開發人員共同創建的；當幾個人在一個補丁上
+   工作時，它用於給出共同作者（除了 From: 所給出的作者之外）。由於
+   Co-developed-by: 表示作者身份，所以每個共同開發人，必須緊跟在相關合作作者
+   的Signed-off-by之後。具體內容和示例見以下文件
+   :ref:`Documentation/translations/zh_TW/process/submitting-patches.rst <tw_submittingpatches>`
+
+ - Acked-by: 表示另一個開發人員（通常是相關代碼的維護人員）同意補丁適合包含
+   在內核中。
+
+ - Tested-by: 聲明某人已經測試了補丁並確認它可以工作。
+
+ - Reviewed-by: 表示某開發人員已經審查了補丁的正確性；有關詳細信息，請參閱
+   :ref:`Documentation/translations/zh_TW/process/submitting-patches.rst <tw_submittingpatches>`
+
+ - Reported-by: 指定報告此補丁修復的問題的用戶；此標記用於表示感謝。
+
+ - Cc：指定某人收到了補丁的副本，並有機會對此發表評論。
+
+在補丁中添加標籤時要小心：只有Cc:才適合在沒有指定人員明確許可的情況下添加。
+
+發送補丁
+--------
+
+在寄出補丁之前，您還需要注意以下幾點：
+
+ - 您確定您的郵件發送程序不會損壞補丁嗎？被郵件客戶端更改空白或修飾了行的補丁
+   無法被另一端接受，並且通常不會進行任何詳細檢查。如果有任何疑問，先把補丁寄
+   給你自己，讓你自己確定它是完好無損的。
+
+   :ref:`Documentation/translations/zh_TW/process/email-clients.rst <tw_email_clients>`
+   提供了一些有用的提示，可以讓特定的郵件客戶端正常發送補丁。
+
+ - 你確定你的補丁沒有荒唐的錯誤嗎？您應該始終通過scripts/checkpatch.pl檢查
+   補丁程序，並解決它提出的問題。請記住，checkpatch.pl，雖然體現了對內核補丁
+   應該是什麼樣的大量思考，但它並不比您聰明。如果修復checkpatch.pl給的問題會
+   使代碼變得更糟，請不要這樣做。
+
+補丁應始終以純文本形式發送。請不要將它們作爲附件發送；這使得審閱者在答覆中更難
+引用補丁的部分。相反，只需將補丁直接放到您的消息中。
+
+寄出補丁時，重要的是將副本發送給任何可能感興趣的人。與其他一些項目不同，內核
+鼓勵人們甚至錯誤地發送過多的副本；不要假定相關人員會看到您在郵件列表中的發布。
+尤其是，副本應發送至：
+
+ - 受影響子系統的維護人員。如前所述，維護人員文件是查找這些人員的首選地方。
+
+ - 其他在同一領域工作的開發人員，尤其是那些現在可能在那裡工作的開發人員。使用
+   git查看還有誰修改了您正在處理的文件，這很有幫助。
+
+ - 如果您對某錯誤報告或功能請求做出響應，也可以抄送原始發送人。
+
+ - 將副本發送到相關郵件列表，或者若無相關列表，則發送到linux-kernel列表。
+
+ - 如果您正在修復一個缺陷，請考慮該修復是否應進入下一個穩定更新。如果是這樣，
+   補丁副本也應發到stable@vger.kernel.org 。另外，在補丁本身的標籤中添加一個
+   「Cc: stable@vger.kernel.org」；這將使穩定版團隊在修復進入主線時收到通知。
+
+當爲一個補丁選擇接收者時，最好清楚你認爲誰最終會接受這個補丁並將其合併。雖然
+可以將補丁直接發給Linus Torvalds並讓他合併，但通常情況下不會這樣做。Linus很
+忙，並且有子系統維護人員負責監視內核的特定部分。通常您會希望維護人員合併您的
+補丁。如果沒有明顯的維護人員，Andrew Morton通常是最後的補丁接收者。
+
+補丁需要好的主題行。補丁主題行的規範格式如下：
+
+::
+
+	[PATCH nn/mm] subsys: one-line description of the patch
+
+其中「nn」是補丁的序號，「mm」是系列中補丁的總數，「subsys」是受影響子系統的
+名稱。當然，一個單獨的補丁可以省略nn/mm。
+
+如果您有一系列重要的補丁，那麼通常發送一個簡介作爲第〇部分。不過，這個約定
+並沒有得到普遍遵循；如果您使用它，請記住簡介中的信息不會進入內核變更日誌。
+因此，請確保補丁本身具有完整的變更日誌信息。
+
+一般來說，多部分補丁的第二部分和後續部分應作爲對第一部分的回覆發送，以便它們
+在接收端都連接在一起。像git和coilt這樣的工具有命令，可以通過適當的線程發送
+一組補丁。但是，如果您有一長串補丁，並正使用git，請不要使用–-chain-reply-to
+選項，以避免創建過深的嵌套。
+
diff --git a/Documentation/translations/zh_TW/process/6.Followthrough.rst b/Documentation/translations/zh_TW/process/6.Followthrough.rst
new file mode 100644
index 000000000000..4af782742db3
--- /dev/null
+++ b/Documentation/translations/zh_TW/process/6.Followthrough.rst
@@ -0,0 +1,156 @@
+.. SPDX-License-Identifier: GPL-2.0
+
+.. include:: ../disclaimer-zh_TW.rst
+
+:Original: :ref:`Documentation/process/6.Followthrough.rst <development_followthrough>`
+
+:Translator:
+
+ 時奎亮 Alex Shi <alex.shi@linux.alibaba.com>
+
+:校譯:
+
+ 吳想成 Wu XiangCheng <bobwxc@email.cn>
+ 胡皓文 Hu Haowen <src.res@email.cn>
+
+.. _tw_development_followthrough:
+
+跟進
+====
+
+此時，您已經遵循了到目前爲止給出的指導方針，並且，隨著您自己的工程技能的增加，
+已經發布了一系列完美的補丁。即使是經驗豐富的內核開發人員也能犯的最大錯誤之一
+是，認爲他們的工作現在已經完成了。事實上，發布補丁意味著進入流程的下一個階段，
+可能還需要做很多工作。
+
+一個補丁在首次發布時就非常出色、沒有改進的餘地，這是很罕見的。內核開發流程已
+認識到這一事實，因此它非常注重對已發布代碼的改進。作爲代碼的作者，您應該與
+內核社區合作，以確保您的代碼符合內核的質量標準。如果不參與這個過程，很可能會
+無法將補丁合併到主線中。
+
+與審閱者合作
+------------
+
+任何意義上的補丁都會導致其他開發人員在審查代碼時發表大量評論。對於許多開發
+人員來說，與審閱人員合作可能是內核開發過程中最令人生畏的部分。但是如果你
+記住一些事情，生活會變得容易得多：
+
+ - 如果你已經很好地解釋了你的補丁，審閱人員會理解它的價值，以及爲什麼你會
+   費盡心思去寫它。但是這個並不能阻止他們提出一個基本的問題：在五年或十年後
+   維護含有此代碼的內核會怎麼樣？你可能被要求做出的許多改變——從編碼風格的
+   調整到大量的重寫——都來自於對Linux的理解，即從現在起十年後，Linux仍將
+   在開發中。
+
+ - 代碼審查是一項艱苦的工作，這是一項相對吃力不討好的工作；人們記得誰編寫了
+   內核代碼，但對於那些審查它的人來說，幾乎沒有什麼長久的名聲。因此，審閱
+   人員可能會變得暴躁，尤其是當他們看到同樣的錯誤被一遍又一遍地犯下時。如果
+   你得到了一個看起來憤怒、侮辱或完全冒犯你的評論，請抑制以同樣方式回應的衝動。
+   代碼審查是關於代碼的，而不是關於人的，代碼審閱人員不會親自攻擊您。
+
+ - 同樣，代碼審閱人員也不想以犧牲你僱主的利益爲代價來宣傳他們僱主的議程。
+   內核開發人員通常希望今後幾年能在內核上工作，但他們明白他們的僱主可能會改
+   變。他們真的，幾乎毫無例外地，致力於創造他們所能做到的最好的內核；他們並
+   沒有試圖給僱主的競爭對手造成不適。
+
+所有這些歸根結底就是，當審閱者向您發送評論時，您需要注意他們正在進行的技術
+評論。不要讓他們的表達方式或你自己的驕傲阻止此事。當你在一個補丁上得到評論
+時，花點時間去理解評論人想說什麼。如果可能的話，請修覆審閱者要求您修復的內
+容。然後回覆審閱者：謝謝他們，並描述你將如何回答他們的問題。
+
+請注意，您不必同意審閱者建議的每個更改。如果您認爲審閱者誤解了您的代碼，請
+解釋到底發生了什麼。如果您對建議的更改有技術上的異議，請描述它並證明您對該
+問題的解決方案是正確的。如果你的解釋有道理，審閱者會接受的。不過，如果你的
+解釋證明缺乏說服力，尤其是當其他人開始同意審稿人的觀點時，請花些時間重新考慮
+一下。你很容易對自己解決問題的方法視而不見，以至於你沒有意識到某些東西完全
+是錯誤的，或者你甚至沒有解決正確的問題。
+
+Andrew Morton建議，每一個不會導致代碼更改的審閱評論都應該產生一個額外的代碼
+注釋；這可以幫助未來的審閱人員避免第一次出現的問題。
+
+一個致命的錯誤是忽視評論，希望它們會消失。它們不會走的。如果您在沒有對之前
+收到的評論做出響應的情況下重新發布代碼，那麼很可能會發現補丁毫無用處。
+
+說到重新發布代碼：請記住，審閱者不會記住您上次發布的代碼的所有細節。因此，
+提醒審閱人員以前提出的問題以及您如何處理這些問題總是一個好主意；補丁變更
+日誌是提供此類信息的好地方。審閱者不必搜索列表檔案來熟悉上次所說的內容；
+如果您幫助他們直接開始，當他們重新查看您的代碼時，心情會更好。
+
+如果你已經試著做正確的事情，但事情仍然沒有進展呢？大多數技術上的分歧都可以
+通過討論來解決，但有時人們仍需要做出決定。如果你真的認爲這個決定對你不利，
+你可以試著向有更高權力的人上訴。對於本文，更高權力的人是 Andrew Morton 。
+Andrew 在內核開發社區中非常受尊敬；他經常爲似乎被絕望阻塞的事情清障。儘管
+如此，不應輕易就直接找 Andrew ，也不應在所有其他替代方案都被嘗試之前找他。
+當然，記住，他也可能不同意你的意見。
+
+接下來會發生什麼
+----------------
+
+如果一個補丁被認爲適合添加到內核中，並且大多數審查問題得到解決，下一步通常
+是進入子系統維護人員的樹中。工作方式因子系統而異；每個維護人員都有自己的
+工作方式。特別是可能有不止一棵樹——也許一棵樹專門用於計劃下一個合併窗口的
+補丁，另一棵樹用於長期工作。
+
+對於應用到不屬於明顯子系統樹（例如內存管理修補程序）的區域的修補程序，默認樹
+通常上溯到-mm。影響多個子系統的補丁也可以最終進入-mm樹。
+
+包含在子系統樹中可以提高補丁的可見性。現在，使用該樹的其他開發人員將默認獲
+得補丁。子系統樹通常也爲Linux提供支持，使其內容對整個開發社區可見。在這一點
+上，您很可能會從一組新的審閱者那裡得到更多的評論；這些評論需要像上一輪那樣
+得到回應。
+
+在這時也會發生點什麼，這取決於你的補丁的性質，是否與其他人正在做的工作發生
+衝突。在最壞的情況下，嚴重的補丁衝突可能會導致一些工作被擱置，以便剩餘的補丁
+可以成形併合並。另一些時候，衝突解決將涉及到與其他開發人員合作，可能還會
+在樹之間移動一些補丁，以確保所有的應用都是乾淨的。這項工作可能是一件痛苦的
+事情，但也需慶幸現在的幸福：在linux-next樹出現之前，這些衝突通常只在合併窗口
+中出現，必須迅速解決。現在可以在合併窗口打開之前的空閒時間解決這些問題。
+
+有朝一日，如果一切順利，您將登錄並看到您的補丁已經合併到主線內核中。祝賀你！
+然而，一旦慶祝完了（並且您已經將自己添加到維護人員文件中），就一定要記住
+一個重要的小事實：工作仍然沒有完成。併入主線也帶來了它的挑戰。
+
+首先，補丁的可見性再次提高。可能會有以前不知道這個補丁的開發者的新一輪評論。
+忽略它們可能很有誘惑力，因爲您的代碼不再存在任何被合併的問題。但是，要抵制
+這種誘惑，您仍然需要對有問題或建議的開發人員作出響應。
+
+不過，更重要的是：將代碼包含在主線中會將代碼交給更多的一些測試人員。即使您
+爲尚未可用的硬體提供了驅動程序，您也會驚訝於有多少人會將您的代碼構建到內核
+中。當然，如果有測試人員，也可能會有錯誤報告。
+
+最糟糕的錯誤報告是回歸。如果你的補丁導致回歸，你會發現多到讓你不舒服的眼睛盯
+著你；回歸需要儘快修復。如果您不願意或無法修復回歸（其他人都不會爲您修復），
+那麼在穩定期內，您的補丁幾乎肯定會被移除。除了否定您爲使補丁進入主線所做的
+所有工作之外，如果由於未能修復回歸而取消補丁，很可能會使將來的工作更難被合併。
+
+在處理完任何回歸之後，可能還有其他普通缺陷需要處理。穩定期是修復這些錯誤並
+確保代碼在主線內核版本中的首次發布儘可能可靠的最好機會。所以，請回應錯誤
+報告，並儘可能解決問題。這就是穩定期的目的；一旦解決了舊補丁的任何問題，就
+可以開始盡情創建新補丁。
+
+別忘了，還有其他節點也可能會創建缺陷報告：下一個主線穩定版本，當著名的發行
+商選擇包含您補丁的內核版本時等等。繼續響應這些報告是您工作的基本素養。但是
+如果這不能提供足夠的動機，那麼也需要考慮：開發社區會記住那些在合併後對代碼
+失去興趣的開發人員。下一次你發布補丁時，他們會以你以後不會持續維護它爲前提
+來評估它。
+
+其他可能發生的事情
+------------------
+
+某天，當你打開你的郵件客戶端時，看到有人給你寄了一個代碼補丁。畢竟，這是
+讓您的代碼公開存在的好處之一。如果您同意這個補丁，您可以將它轉發給子系統
+維護人員（確保包含一個正確的From:行，這樣屬性是正確的，並添加一個您自己的
+signoff ），或者回復一個 Acked-by: 讓原始發送者向上發送它。
+
+如果您不同意補丁，請禮貌地回復，解釋原因。如果可能的話，告訴作者需要做哪些
+更改才能讓您接受補丁。合併代碼的編寫者和維護者所反對的補丁的確存在著一定的
+阻力，但僅此而已。如果你被認爲不必要的阻礙了好的工作，那麼這些補丁最終會
+繞過你並進入主線。在Linux內核中，沒有人對任何代碼擁有絕對的否決權。可能除
+了Linus。
+
+在非常罕見的情況下，您可能會看到完全不同的東西：另一個開發人員發布了針對您
+的問題的不同解決方案。在這時，兩個補丁之一可能不會被合併，「我的補丁首先
+發布」不被認爲是一個令人信服的技術論據。如果有別人的補丁取代了你的補丁而進
+入了主線，那麼只有一種方法可以回應你：很高興你的問題解決了，請繼續工作吧。
+以這種方式把某人的工作推到一邊可能導致傷心和氣餒，但是社區會記住你的反應，
+即使很久以後他們已經忘記了誰的補丁真正被合併。
+
diff --git a/Documentation/translations/zh_TW/process/7.AdvancedTopics.rst b/Documentation/translations/zh_TW/process/7.AdvancedTopics.rst
new file mode 100644
index 000000000000..3de093d0f170
--- /dev/null
+++ b/Documentation/translations/zh_TW/process/7.AdvancedTopics.rst
@@ -0,0 +1,137 @@
+.. SPDX-License-Identifier: GPL-2.0
+
+.. include:: ../disclaimer-zh_TW.rst
+
+:Original: :ref:`Documentation/process/7.AdvancedTopics.rst <development_advancedtopics>`
+
+:Translator:
+
+ 時奎亮 Alex Shi <alex.shi@linux.alibaba.com>
+
+:校譯:
+
+ 吳想成 Wu XiangCheng <bobwxc@email.cn>
+ 胡皓文 Hu Haowen <src.res@email.cn>
+
+.. _tw_development_advancedtopics:
+
+高級主題
+========
+
+現在，希望您能夠掌握開發流程的工作方式。然而，還有更多的東西要學！本節將介紹
+一些主題，這些主題對希望成爲Linux內核開發過程常規部分的開發人員有幫助。
+
+使用Git管理補丁
+---------------
+
+內核使用分布式版本控制始於2002年初，當時Linus首次開始使用專有的Bitkeeper應用
+程序。雖然BitKeeper存在爭議，但它所體現的軟體版本管理方法卻肯定不是。分布式
+版本控制可以立即加速內核開發項目。現在有好幾種免費的BitKeeper替代品。
+但無論好壞，內核項目都已經選擇了Git作爲其工具。
+
+使用Git管理補丁可以使開發人員的生活更加輕鬆，尤其是隨著補丁數量的增長。Git也
+有其粗糙的邊角和一定的危險性，它是一個年輕和強大的工具，仍然在其開發人員完善
+中。本文檔不會試圖教會讀者如何使用git；這會是個巨長的文檔。相反，這裡的重點
+將是Git如何特別適合內核開發過程。想要加快用Git速度的開發人員可以在以下網站上
+找到更多信息：
+
+	https://git-scm.com/
+
+	https://www.kernel.org/pub/software/scm/git/docs/user-manual.html
+
+同時網上也能找到各種各樣的教程。
+
+在嘗試使用它生成補丁供他人使用之前，第一要務是閱讀上述網頁，對Git的工作方式
+有一個紮實的了解。使用Git的開發人員應能進行拉取主線存儲庫的副本，查詢修訂
+歷史，提交對樹的更改，使用分支等操作。了解Git用於重寫歷史的工具（如rebase）
+也很有用。Git有自己的術語和概念；Git的新用戶應該了解引用、遠程分支、索引、
+快進合併、推拉、游離頭等。一開始可能有點嚇人，但這些概念不難通過一點學習來
+理解。
+
+使用git生成通過電子郵件提交的補丁是提高速度的一個很好的練習。
+
+當您準備好開始建立Git樹供其他人查看時，無疑需要一個可以從中拉取的伺服器。
+如果您有一個可以訪問網際網路的系統，那麼使用git-daemon設置這樣的伺服器相對
+簡單。同時，免費的公共託管網站（例如github）也開始出現在網絡上。成熟的開發
+人員可以在kernel.org上獲得一個帳戶，但這些帳戶並不容易得到；更多有關信息，
+請參閱 https://kernel.org/faq/ 。
+
+正常的Git工作流程涉及到許多分支的使用。每一條開發線都可以分爲單獨的「主題
+分支」，並獨立維護。Git的分支很容易使用，沒有理由不使用它們。而且，在任何
+情況下，您都不應該在任何您打算讓其他人從中拉取的分支中進行開發。應該小心地
+創建公開可用的分支；當開發分支處於完整狀態並已準備好時(而不是之前）才合併
+開發分支的補丁。
+
+Git提供了一些強大的工具，可以讓您重寫開發歷史。一個不方便的補丁（比如說，
+一個打破二分法的補丁，或者有其他一些明顯的缺陷）可以在適當的位置修復，或者
+完全從歷史中消失。一個補丁系列可以被重寫，就好像它是在今天的主線上寫的一樣，
+即使你已經花了幾個月的時間在寫它。可以透明地將更改從一個分支轉移到另一個
+分支。等等。明智地使用git修改歷史的能力可以幫助創建問題更少的乾淨補丁集。
+
+然而，過度使用這種功能可能會導致其他問題，而不僅僅是對創建完美項目歷史的
+簡單癡迷。重寫歷史將重寫該歷史中包含的更改，將經過測試（希望如此）的內核樹
+變爲未經測試的內核樹。除此之外，如果開發人員沒有共享項目歷史，他們就無法
+輕鬆地協作；如果您重寫了其他開發人員拉入他們存儲庫的歷史，您將使這些開發
+人員的生活更加困難。因此，這裡有一個簡單的經驗法則：被導出到其他地方的歷史
+在此後通常被認爲是不可變的。
+
+因此，一旦將一組更改推送到公開可用的伺服器上，就不應該重寫這些更改。如果您
+嘗試強制進行無法快進合併的更改（即不共享同一歷史記錄的更改），Git將嘗試強制
+執行此規則。這可能覆蓋檢查，有時甚至需要重寫導出的樹。在樹之間移動變更集以
+避免linux-next中的衝突就是一個例子。但這種行爲應該是罕見的。這就是爲什麼
+開發應該在私有分支中進行（必要時可以重寫）並且只有在公共分支處於合理的較新
+狀態時才轉移到公共分支中的原因之一。
+
+當主線（或其他一組變更所基於的樹）前進時，很容易與該樹合併以保持領先地位。
+對於一個私有的分支，rebasing 可能是一個很容易跟上另一棵樹的方法，但是一旦
+一棵樹被導出到外界，rebasing就不可取了。一旦發生這種情況，就必須進行完全
+合併（merge）。合併有時是很有意義的，但是過於頻繁的合併會不必要地擾亂歷史。
+在這種情況下建議的做法是不要頻繁合併，通常只在特定的發布點（如主線-rc發布）
+合併。如果您對特定的更改感到緊張，則可以始終在私有分支中執行測試合併。在
+這種情況下，git「rerere」工具很有用；它能記住合併衝突是如何解決的，這樣您
+就不必重複相同的工作。
+
+關於Git這樣的工具的一個最大的反覆抱怨是：補丁從一個存儲庫到另一個存儲庫的
+大量移動使得很容易陷入錯誤建議的變更中，這些變更避開審查雷達進入主線。當內
+核開發人員看到這種情況發生時，他們往往會感到不高興；在Git樹上放置未審閱或
+主題外的補丁可能會影響您將來讓樹被拉取的能力。引用Linus的話:
+
+::
+
+   你可以給我發補丁，但當我從你那裡拉取一個Git補丁時，我需要知道你清楚
+   自己在做什麼，我需要能夠相信事情而 *無需* 手動檢查每個單獨的更改。
+
+（http://lwn.net/articles/224135/）。
+
+爲了避免這種情況，請確保給定分支中的所有補丁都與相關主題緊密相關；「驅動程序
+修復」分支不應更改核心內存管理代碼。而且，最重要的是，不要使用Git樹來繞過
+審查過程。不時的將樹的摘要發布到相關的列表中，在合適時候請求linux-next中
+包含該樹。
+
+如果其他人開始發送補丁以包含到您的樹中，不要忘記審閱它們。還要確保您維護正確
+的作者信息； git 「am」工具在這方面做得最好，但是如果補丁通過第三方轉發給您，
+您可能需要在補丁中添加「From:」行。
+
+請求拉取時，請務必提供所有相關信息：樹的位置、要拉取的分支以及拉取將導致的
+更改。在這方面 git request-pull 命令非常有用；它將按照其他開發人員所期望的
+格式化請求，並檢查以確保您已記得將這些更改推送到公共伺服器。
+
+審閱補丁
+--------
+
+一些讀者顯然會反對將本節與「高級主題」放在一起，因爲即使是剛開始的內核開發人員
+也應該審閱補丁。當然，沒有比查看其他人發布的代碼更好的方法來學習如何在內核環境
+中編程了。此外，審閱者永遠供不應求；通過審閱代碼，您可以對整個流程做出重大貢獻。
+
+審查代碼可能是一副令人生畏的圖景，特別是對一個新的內核開發人員來說，他們
+可能會對公開詢問代碼感到緊張，而這些代碼是由那些有更多經驗的人發布的。不過，
+即使是最有經驗的開發人員編寫的代碼也可以得到改進。也許對（所有）審閱者最好
+的建議是：把審閱評論當成問題而不是批評。詢問「在這條路徑中如何釋放鎖？」
+總是比說「這裡的鎖是錯誤的」更好。
+
+不同的開發人員將從不同的角度審查代碼。部分人會主要關注代碼風格以及代碼行是
+否有尾隨空格。其他人會主要關注補丁作爲一個整體實現的變更是否對內核有好處。
+同時也有人會檢查是否存在鎖問題、堆棧使用過度、可能的安全問題、在其他地方
+發現的代碼重複、足夠的文檔、對性能的不利影響、用戶空間ABI更改等。所有類型
+的檢查，只要它們能引導更好的代碼進入內核，都是受歡迎和值得的。
+
diff --git a/Documentation/translations/zh_TW/process/8.Conclusion.rst b/Documentation/translations/zh_TW/process/8.Conclusion.rst
new file mode 100644
index 000000000000..7572b17667d9
--- /dev/null
+++ b/Documentation/translations/zh_TW/process/8.Conclusion.rst
@@ -0,0 +1,74 @@
+.. SPDX-License-Identifier: GPL-2.0
+
+.. include:: ../disclaimer-zh_TW.rst
+
+:Original: :ref:`Documentation/process/8.Conclusion.rst <development_conclusion>`
+:Translator:
+
+ 時奎亮 Alex Shi <alex.shi@linux.alibaba.com>
+
+:校譯:
+
+ 吳想成 Wu XiangCheng <bobwxc@email.cn>
+ 胡皓文 Hu Haowen <src.res@email.cn>
+
+.. _tw_development_conclusion:
+
+更多信息
+========
+
+關於Linux內核開發和相關主題的信息來源很多。首先是在內核原始碼分發中找到的
+文檔目錄。頂級
+:ref:`Documentation/translations/zh_TW/process/howto.rst <tw_process_howto>`
+文件是一個重要的起點；
+:ref:`Documentation/translations/zh_TW/process/submitting-patches.rst <tw_submittingpatches>`
+和 :ref:`Documentation/translations/zh_TW/process/submitting-drivers.rst <tw_submittingdrivers>`
+也是所有內核開發人員都應該閱讀的內容。許多內部內核API都是使用kerneldoc機制
+記錄的；「make htmldocs」或「make pdfdocs」可用於以HTML或PDF格式生成這些文檔
+（儘管某些發行版提供的tex版本會遇到內部限制，無法正確處理文檔）。
+
+不同的網站在各個細節層次上討論內核開發。本文作者想謙虛地建議用 https://lwn.net/
+作爲來源；有關許多特定內核主題的信息可以通過以下網址的 LWN 內核索引找到：
+
+  http://lwn.net/kernel/index/
+
+除此之外，內核開發人員的一個寶貴資源是：
+
+  https://kernelnewbies.org/
+
+當然，也不應該忘記 https://kernel.org/ ，這是內核發布信息的最終位置。
+
+關於內核開發有很多書：
+
+  《Linux設備驅動程序》第三版（Jonathan Corbet、Alessandro Rubini和Greg Kroah Hartman）
+  線上版本在 http://lwn.net/kernel/ldd3/
+
+  《Linux內核設計與實現》（Robert Love）
+
+  《深入理解Linux內核》(Daniel Bovet和Marco Cesati）
+
+然而，所有這些書都有一個共同的缺點：它們上架時就往往有些過時，而且已經上架
+一段時間了。不過，在那裡還是可以找到相當多的好信息。
+
+有關git的文檔，請訪問：
+
+  https://www.kernel.org/pub/software/scm/git/docs/
+
+  https://www.kernel.org/pub/software/scm/git/docs/user-manual.html
+
+結論
+====
+
+祝賀所有通過這篇冗長的文檔的人。希望它能夠幫助您理解Linux內核是如何開發的，
+以及您如何參與這個過程。
+
+最後，重要的是參與。任何開源軟體項目都不會超過其貢獻者投入其中的總和。Linux
+內核的發展速度和以前一樣快，因爲它得到了大量開發人員的幫助，他們都在努力使它
+變得更好。內核是一個最成功的例子，說明了當成千上萬的人爲了一個共同的目標一起
+工作時，可以做出什麼。
+
+不過，內核總是可以從更大的開發人員基礎中獲益。總有更多的工作要做。但是同樣
+重要的是，Linux生態系統中的大多數其他參與者可以通過爲內核做出貢獻而受益。使
+代碼進入主線是提高代碼質量、降低維護和分發成本、提高對內核開發方向的影響程度
+等的關鍵。這是一種共贏的局面。啓動你的編輯器，來加入我們吧；你會非常受歡迎的。
+
diff --git a/Documentation/translations/zh_TW/process/code-of-conduct-interpretation.rst b/Documentation/translations/zh_TW/process/code-of-conduct-interpretation.rst
new file mode 100644
index 000000000000..949d831aaf6c
--- /dev/null
+++ b/Documentation/translations/zh_TW/process/code-of-conduct-interpretation.rst
@@ -0,0 +1,112 @@
+.. SPDX-License-Identifier: GPL-2.0
+
+.. include:: ../disclaimer-zh_TW.rst
+
+:Original: :ref:`Documentation/process/code-of-conduct-interpretation.rst <code_of_conduct_interpretation>`
+:Translator: Alex Shi <alex.shi@linux.alibaba.com>
+             Hu Haowen <src.res@email.cn>
+
+.. _tw_code_of_conduct_interpretation:
+
+Linux內核貢獻者契約行為準則解釋
+===============================
+
+:ref:`tw_code_of_conduct` 準則是一個通用文檔，旨在爲幾乎所有開源社區提供一套規則。
+每個開源社區都是獨一無二的，Linux內核也不例外。因此，本文描述了Linux內核社區中
+如何解釋它。我們也不希望這種解釋隨著時間的推移是靜態的，並將根據需要進行調整。
+
+與開發軟體的「傳統」方法相比，Linux內核開發工作是一個非常個人化的過程。你的貢獻
+和背後的想法將被仔細審查，往往導致批判和批評。審查將幾乎總是需要改進，材料才
+能包括在內核中。要知道這是因爲所有相關人員都希望看到Linux整體成功的最佳解決方
+案。這個開發過程已經被證明可以創建有史以來最健壯的作業系統內核，我們不想做任何
+事情來導致提交質量和最終結果的下降。
+
+維護者
+------
+
+行為準則多次使用「維護者」一詞。在內核社區中，「維護者」是負責子系統、驅動程序或
+文件的任何人，並在內核原始碼樹的維護者文件中列出。
+
+責任
+----
+
+《行為準則》提到了維護人員的權利和責任，這需要進一步澄清。
+
+首先，最重要的是，有一個合理的期望是由維護人員通過實例來領導。
+
+也就是說，我們的社區是廣闊的，對維護者沒有新的要求，他們單方面處理其他人在
+他們活躍的社區的行爲。這一責任由我們所有人承擔，最終《行為準則》記錄了最終的
+上訴路徑，以防有關行爲問題的問題懸而未決。
+
+維護人員應該願意在出現問題時提供幫助，並在需要時與社區中的其他人合作。如果您
+不確定如何處理出現的情況，請不要害怕聯繫技術諮詢委員會（TAB）或其他維護人員。
+除非您願意，否則不會將其視爲違規報告。如果您不確定是否該聯繫TAB 或任何其他維
+護人員，請聯繫我們的衝突調解人 Mishi Choudhary <mishi@linux.com>。
+
+最後，「善待對方」才是每個人的最終目標。我們知道每個人都是人，有時我們都會失敗，
+但我們所有人的首要目標應該是努力友好地解決問題。執行行為準則將是最後的選擇。
+
+我們的目標是創建一個強大的、技術先進的作業系統，以及所涉及的技術複雜性，這自
+然需要專業知識和決策。
+
+所需的專業知識因貢獻領域而異。它主要由上下文和技術複雜性決定，其次由貢獻者和
+維護者的期望決定。
+
+專家的期望和決策都要經過討論，但在最後，爲了取得進展，必須能夠做出決策。這一
+特權掌握在維護人員和項目領導的手中，預計將善意使用。
+
+因此，設定專業知識期望、作出決定和拒絕不適當的貢獻不被視爲違反行為準則。
+
+雖然維護人員一般都歡迎新來者，但他們幫助（新）貢獻者克服障礙的能力有限，因此
+他們必須確定優先事項。這也不應被視爲違反了行為準則。內核社區意識到這一點，並
+以各種形式提供入門級節目，如 kernelnewbies.org 。
+
+範圍
+----
+
+Linux內核社區主要在一組公共電子郵件列表上進行交互，這些列表分布在由多個不同
+公司或個人控制的多個不同伺服器上。所有這些列表都在內核原始碼樹中的
+MAINTAINERS 文件中定義。發送到這些郵件列表的任何電子郵件都被視爲包含在行爲
+準則中。
+
+使用 kernel.org bugzilla和其他子系統bugzilla 或bug跟蹤工具的開發人員應該遵循
+行為準則的指導原則。Linux內核社區沒有「官方」項目電子郵件地址或「官方」社交媒體
+地址。使用kernel.org電子郵件帳戶執行的任何活動必須遵循爲kernel.org發布的行爲
+準則，就像任何使用公司電子郵件帳戶的個人必須遵循該公司的特定規則一樣。
+
+行為準則並不禁止在郵件列表消息、內核更改日誌消息或代碼注釋中繼續包含名稱、
+電子郵件地址和相關注釋。
+
+其他論壇中的互動包括在適用於上述論壇的任何規則中，通常不包括在行為準則中。
+除了在極端情況下可考慮的例外情況。
+
+提交給內核的貢獻應該使用適當的語言。在行為準則之前已經存在的內容現在不會被
+視爲違反。然而，不適當的語言可以被視爲一個bug；如果任何相關方提交補丁，
+這樣的bug將被更快地修復。當前屬於用戶/內核API的一部分的表達式，或者反映已
+發布標準或規範中使用的術語的表達式，不被視爲bug。
+
+執行
+----
+
+行為準則中列出的地址屬於行為準則委員會。https://kernel.org/code-of-conduct.html
+列出了在任何給定時間接收這些電子郵件的確切成員。成員不能訪問在加入委員會之前
+或離開委員會之後所做的報告。
+
+最初的行為準則委員會由TAB的志願者以及作爲中立第三方的專業調解人組成。委員會
+的首要任務是建立文件化的流程，並將其公開。
+
+如果報告人不希望將整個委員會納入投訴或關切，可直接聯繫委員會的任何成員，包括
+調解人。
+
+行為準則委員會根據流程審查案例（見上文），並根據需要和適當與TAB協商，例如請求
+和接收有關內核社區的信息。
+
+委員會做出的任何決定都將提交到表中，以便在必要時與相關維護人員一起執行。行爲
+準則委員會的決定可以通過三分之二的投票推翻。
+
+每季度，行為準則委員會和標籤將提供一份報告，概述行為準則委員會收到的匿名報告
+及其狀態，以及任何否決決定的細節，包括完整和可識別的投票細節。
+
+我們希望在啓動期之後爲行為準則委員會人員配備建立一個不同的流程。發生此情況時，
+將使用該信息更新此文檔。
+
diff --git a/Documentation/translations/zh_TW/process/code-of-conduct.rst b/Documentation/translations/zh_TW/process/code-of-conduct.rst
new file mode 100644
index 000000000000..716e5843b6e9
--- /dev/null
+++ b/Documentation/translations/zh_TW/process/code-of-conduct.rst
@@ -0,0 +1,76 @@
+.. SPDX-License-Identifier: GPL-2.0
+
+.. include:: ../disclaimer-zh_TW.rst
+
+:Original: :ref:`Documentation/process/code-of-conduct.rst <code_of_conduct>`
+:Translator: Alex Shi <alex.shi@linux.alibaba.com>
+             Hu Haowen <src.res@email.cn>
+
+.. _tw_code_of_conduct:
+
+貢獻者契約行為準則
+++++++++++++++++++
+
+我們的誓言
+==========
+
+爲了營造一個開放、友好的環境，我們作爲貢獻者和維護人承諾，讓我們的社區和參
+與者，擁有一個無騷擾的體驗，無論年齡、體型、殘疾、種族、性別特徵、性別認同
+和表達、經驗水平、教育程度、社會狀況，經濟地位、國籍、個人外貌、種族、宗教
+或性身份和取向。
+
+我們的標準
+==========
+
+有助於創造積極環境的行爲包括：
+
+* 使用歡迎和包容的語言
+* 尊重不同的觀點和經驗
+* 優雅地接受建設性的批評
+* 關注什麼對社區最有利
+* 對其他社區成員表示同情
+
+參與者的不可接受行爲包括：
+
+* 使用性意味的語言或意象以及不受歡迎的性注意或者更過分的行爲
+* 煽動、侮辱/貶損評論以及個人或政治攻擊
+* 公開或私下騷擾
+* 未經明確許可，發布他人的私人信息，如物理或電子地址。
+* 在專業場合被合理認爲不適當的其他行爲
+
+我們的責任
+==========
+
+維護人員負責澄清可接受行爲的標準，並應針對任何不可接受行爲採取適當和公平的
+糾正措施。
+
+維護人員有權和責任刪除、編輯或拒絕與本行為準則不一致的評論、承諾、代碼、
+wiki編輯、問題和其他貢獻，或暫時或永久禁止任何貢獻者從事他們認爲不適當、
+威脅、冒犯或有害的其他行爲。
+
+範圍
+====
+
+當個人代表項目或其社區時，本行為準則既適用於項目空間，也適用於公共空間。
+代表一個項目或社區的例子包括使用一個正式的項目電子郵件地址，通過一個正式
+的社交媒體帳戶發布，或者在在線或離線事件中擔任指定的代表。項目維護人員可以
+進一步定義和澄清項目的表示。
+
+執行
+====
+
+如有濫用、騷擾或其他不可接受的行爲，可聯繫行為準則委員會<conduct@kernel.org>。
+所有投訴都將接受審查和調查，並將得到必要和適當的答覆。行為準則委員會有義務
+對事件報告人保密。具體執行政策的進一步細節可單獨公布。
+
+歸屬
+====
+
+本行為準則改編自《貢獻者契約》，版本1.4，可從
+https://www.contributor-covenant.org/version/1/4/code-of-conduct.html 獲取。
+
+解釋
+====
+
+有關Linux內核社區如何解釋此文檔，請參閱 :ref:`tw_code_of_conduct_interpretation`
+
diff --git a/Documentation/translations/zh_TW/process/coding-style.rst b/Documentation/translations/zh_TW/process/coding-style.rst
new file mode 100644
index 000000000000..61e614aad6a7
--- /dev/null
+++ b/Documentation/translations/zh_TW/process/coding-style.rst
@@ -0,0 +1,958 @@
+.. SPDX-License-Identifier: GPL-2.0
+
+.. include:: ../disclaimer-zh_TW.rst
+
+:Original: :ref:`Documentation/process/coding-style.rst <codingstyle>`
+
+.. _tw_codingstyle:
+
+譯者::
+
+  中文版維護者： 張樂 Zhang Le <r0bertz@gentoo.org>
+  中文版翻譯者： 張樂 Zhang Le <r0bertz@gentoo.org>
+  中文版校譯者： 王聰 Wang Cong <xiyou.wangcong@gmail.com>
+                 wheelz <kernel.zeng@gmail.com>
+                 管旭東 Xudong Guan <xudong.guan@gmail.com>
+                 Li Zefan <lizf@cn.fujitsu.com>
+                 Wang Chen <wangchen@cn.fujitsu.com>
+                 Hu Haowen <src.res@email.cn>
+
+Linux 內核代碼風格
+=========================
+
+這是一個簡短的文檔，描述了 linux 內核的首選代碼風格。代碼風格是因人而異的，
+而且我不願意把自己的觀點強加給任何人，但這就像我去做任何事情都必須遵循的原則
+那樣，我也希望在絕大多數事上保持這種的態度。請 (在寫代碼時) 至少考慮一下這裡
+的代碼風格。
+
+首先，我建議你列印一份 GNU 代碼規範，然後不要讀。燒了它，這是一個具有重大象徵
+性意義的動作。
+
+不管怎樣，現在我們開始：
+
+
+1) 縮進
+--------------
+
+制表符是 8 個字符，所以縮進也是 8 個字符。有些異端運動試圖將縮進變爲 4 (甚至
+2！) 字符深，這幾乎相當於嘗試將圓周率的值定義爲 3。
+
+理由：縮進的全部意義就在於清楚的定義一個控制塊起止於何處。尤其是當你盯著你的
+屏幕連續看了 20 小時之後，你將會發現大一點的縮進會使你更容易分辨縮進。
+
+現在，有些人會抱怨 8 個字符的縮進會使代碼向右邊移動的太遠，在 80 個字符的終端
+屏幕上就很難讀這樣的代碼。這個問題的答案是，如果你需要 3 級以上的縮進，不管用
+何種方式你的代碼已經有問題了，應該修正你的程序。
+
+簡而言之，8 個字符的縮進可以讓代碼更容易閱讀，還有一個好處是當你的函數嵌套太
+深的時候可以給你警告。留心這個警告。
+
+在 switch 語句中消除多級縮進的首選的方式是讓 ``switch`` 和從屬於它的 ``case``
+標籤對齊於同一列，而不要 ``兩次縮進`` ``case`` 標籤。比如：
+
+.. code-block:: c
+
+	switch (suffix) {
+	case 'G':
+	case 'g':
+		mem <<= 30;
+		break;
+	case 'M':
+	case 'm':
+		mem <<= 20;
+		break;
+	case 'K':
+	case 'k':
+		mem <<= 10;
+		fallthrough;
+	default:
+		break;
+	}
+
+不要把多個語句放在一行里，除非你有什麼東西要隱藏：
+
+.. code-block:: c
+
+	if (condition) do_this;
+	  do_something_everytime;
+
+也不要在一行里放多個賦值語句。內核代碼風格超級簡單。就是避免可能導致別人誤讀
+的表達式。
+
+除了注釋、文檔和 Kconfig 之外，不要使用空格來縮進，前面的例子是例外，是有意爲
+之。
+
+選用一個好的編輯器，不要在行尾留空格。
+
+
+2) 把長的行和字符串打散
+------------------------------
+
+代碼風格的意義就在於使用平常使用的工具來維持代碼的可讀性和可維護性。
+
+每一行的長度的限制是 80 列，我們強烈建議您遵守這個慣例。
+
+長於 80 列的語句要打散成有意義的片段。除非超過 80 列能顯著增加可讀性，並且不
+會隱藏信息。子片段要明顯短於母片段，並明顯靠右。這同樣適用於有著很長參數列表
+的函數頭。然而，絕對不要打散對用戶可見的字符串，例如 printk 信息，因爲這樣就
+很難對它們 grep。
+
+
+3) 大括號和空格的放置
+------------------------------
+
+C 語言風格中另外一個常見問題是大括號的放置。和縮進大小不同，選擇或棄用某种放
+置策略並沒有多少技術上的原因，不過首選的方式，就像 Kernighan 和 Ritchie 展示
+給我們的，是把起始大括號放在行尾，而把結束大括號放在行首，所以：
+
+.. code-block:: c
+
+	if (x is true) {
+		we do y
+	}
+
+這適用於所有的非函數語句塊 (if, switch, for, while, do)。比如：
+
+.. code-block:: c
+
+	switch (action) {
+	case KOBJ_ADD:
+		return "add";
+	case KOBJ_REMOVE:
+		return "remove";
+	case KOBJ_CHANGE:
+		return "change";
+	default:
+		return NULL;
+	}
+
+不過，有一個例外，那就是函數：函數的起始大括號放置於下一行的開頭，所以：
+
+.. code-block:: c
+
+	int function(int x)
+	{
+		body of function
+	}
+
+全世界的異端可能會抱怨這個不一致性是... 呃... 不一致的，不過所有思維健全的人
+都知道 (a) K&R 是 **正確的** 並且 (b) K&R 是正確的。此外，不管怎樣函數都是特
+殊的 (C 函數是不能嵌套的)。
+
+注意結束大括號獨自占據一行，除非它後面跟著同一個語句的剩餘部分，也就是 do 語
+句中的 "while" 或者 if 語句中的 "else"，像這樣：
+
+.. code-block:: c
+
+	do {
+		body of do-loop
+	} while (condition);
+
+和
+
+.. code-block:: c
+
+	if (x == y) {
+		..
+	} else if (x > y) {
+		...
+	} else {
+		....
+	}
+
+理由：K&R。
+
+也請注意這種大括號的放置方式也能使空 (或者差不多空的) 行的數量最小化，同時不
+失可讀性。因此，由於你的屏幕上的新行是不可再生資源 (想想 25 行的終端屏幕)，你
+將會有更多的空行來放置注釋。
+
+當只有一個單獨的語句的時候，不用加不必要的大括號。
+
+.. code-block:: c
+
+	if (condition)
+		action();
+
+和
+
+.. code-block:: c
+
+	if (condition)
+		do_this();
+	else
+		do_that();
+
+這並不適用於只有一個條件分支是單語句的情況；這時所有分支都要使用大括號：
+
+.. code-block:: c
+
+	if (condition) {
+		do_this();
+		do_that();
+	} else {
+		otherwise();
+	}
+
+3.1) 空格
+********************
+
+Linux 內核的空格使用方式 (主要) 取決於它是用於函數還是關鍵字。(大多數) 關鍵字
+後要加一個空格。值得注意的例外是 sizeof, typeof, alignof 和 __attribute__，這
+些關鍵字某些程度上看起來更像函數 (它們在 Linux 里也常常伴隨小括號而使用，儘管
+在 C 里這樣的小括號不是必需的，就像 ``struct fileinfo info;`` 聲明過後的
+``sizeof info``)。
+
+所以在這些關鍵字之後放一個空格::
+
+	if, switch, case, for, do, while
+
+但是不要在 sizeof, typeof, alignof 或者 __attribute__ 這些關鍵字之後放空格。
+例如，
+
+.. code-block:: c
+
+	s = sizeof(struct file);
+
+不要在小括號里的表達式兩側加空格。這是一個 **反例** ：
+
+.. code-block:: c
+
+	s = sizeof( struct file );
+
+當聲明指針類型或者返回指針類型的函數時， ``*`` 的首選使用方式是使之靠近變量名
+或者函數名，而不是靠近類型名。例子：
+
+.. code-block:: c
+
+	char *linux_banner;
+	unsigned long long memparse(char *ptr, char **retptr);
+	char *match_strdup(substring_t *s);
+
+在大多數二元和三元操作符兩側使用一個空格，例如下面所有這些操作符::
+
+	=  +  -  <  >  *  /  %  |  &  ^  <=  >=  ==  !=  ?  :
+
+但是一元操作符後不要加空格::
+
+	&  *  +  -  ~  !  sizeof  typeof  alignof  __attribute__  defined
+
+後綴自加和自減一元操作符前不加空格::
+
+	++  --
+
+前綴自加和自減一元操作符後不加空格::
+
+	++  --
+
+``.`` 和 ``->`` 結構體成員操作符前後不加空格。
+
+不要在行尾留空白。有些可以自動縮進的編輯器會在新行的行首加入適量的空白，然後
+你就可以直接在那一行輸入代碼。不過假如你最後沒有在那一行輸入代碼，有些編輯器
+就不會移除已經加入的空白，就像你故意留下一個只有空白的行。包含行尾空白的行就
+這樣產生了。
+
+當 git 發現補丁包含了行尾空白的時候會警告你，並且可以應你的要求去掉行尾空白；
+不過如果你是正在打一系列補丁，這樣做會導致後面的補丁失敗，因爲你改變了補丁的
+上下文。
+
+
+4) 命名
+------------------------------
+
+C 是一個簡樸的語言，你的命名也應該這樣。和 Modula-2 和 Pascal 程式設計師不同，
+C 程式設計師不使用類似 ThisVariableIsATemporaryCounter 這樣華麗的名字。C 程式設計師會
+稱那個變量爲 ``tmp`` ，這樣寫起來會更容易，而且至少不會令其難於理解。
+
+不過，雖然混用大小寫的名字是不提倡使用的，但是全局變量還是需要一個具描述性的
+名字。稱一個全局函數爲 ``foo`` 是一個難以饒恕的錯誤。
+
+全局變量 (只有當你 **真正** 需要它們的時候再用它) 需要有一個具描述性的名字，就
+像全局函數。如果你有一個可以計算活動用戶數量的函數，你應該叫它
+``count_active_users()`` 或者類似的名字，你不應該叫它 ``cntuser()`` 。
+
+在函數名中包含函數類型 (所謂的匈牙利命名法) 是腦子出了問題——編譯器知道那些類
+型而且能夠檢查那些類型，這樣做只能把程式設計師弄糊塗了。難怪微軟總是製造出有問題
+的程序。
+
+本地變量名應該簡短，而且能夠表達相關的含義。如果你有一些隨機的整數型的循環計
+數器，它應該被稱爲 ``i`` 。叫它 ``loop_counter`` 並無益處，如果它沒有被誤解的
+可能的話。類似的， ``tmp`` 可以用來稱呼任意類型的臨時變量。
+
+如果你怕混淆了你的本地變量名，你就遇到另一個問題了，叫做函數增長荷爾蒙失衡綜
+合症。請看第六章 (函數)。
+
+
+5) Typedef
+-----------
+
+不要使用類似 ``vps_t`` 之類的東西。
+
+對結構體和指針使用 typedef 是一個 **錯誤** 。當你在代碼里看到：
+
+.. code-block:: c
+
+	vps_t a;
+
+這代表什麼意思呢？
+
+相反，如果是這樣
+
+.. code-block:: c
+
+	struct virtual_container *a;
+
+你就知道 ``a`` 是什麼了。
+
+很多人認爲 typedef ``能提高可讀性`` 。實際不是這樣的。它們只在下列情況下有用：
+
+ (a) 完全不透明的對象 (這種情況下要主動使用 typedef 來 **隱藏** 這個對象實際上
+     是什麼)。
+
+     例如： ``pte_t`` 等不透明對象，你只能用合適的訪問函數來訪問它們。
+
+     .. note::
+
+       不透明性和 "訪問函數" 本身是不好的。我們使用 pte_t 等類型的原因在於真
+       的是完全沒有任何共用的可訪問信息。
+
+ (b) 清楚的整數類型，如此，這層抽象就可以 **幫助** 消除到底是 ``int`` 還是
+     ``long`` 的混淆。
+
+     u8/u16/u32 是完全沒有問題的 typedef，不過它們更符合類別 (d) 而不是這裡。
+
+     .. note::
+
+       要這樣做，必須事出有因。如果某個變量是 ``unsigned long`` ，那麼沒有必要
+
+	typedef unsigned long myflags_t;
+
+     不過如果有一個明確的原因，比如它在某種情況下可能會是一個 ``unsigned int``
+     而在其他情況下可能爲 ``unsigned long`` ，那麼就不要猶豫，請務必使用
+     typedef。
+
+ (c) 當你使用 sparse 按字面的創建一個 **新** 類型來做類型檢查的時候。
+
+ (d) 和標準 C99 類型相同的類型，在某些例外的情況下。
+
+     雖然讓眼睛和腦筋來適應新的標準類型比如 ``uint32_t`` 不需要花很多時間，可
+     是有些人仍然拒絕使用它們。
+
+     因此，Linux 特有的等同於標準類型的 ``u8/u16/u32/u64`` 類型和它們的有符號
+     類型是被允許的——儘管在你自己的新代碼中，它們不是強制要求要使用的。
+
+     當編輯已經使用了某個類型集的已有代碼時，你應該遵循那些代碼中已經做出的選
+     擇。
+
+ (e) 可以在用戶空間安全使用的類型。
+
+     在某些用戶空間可見的結構體裡，我們不能要求 C99 類型而且不能用上面提到的
+     ``u32`` 類型。因此，我們在與用戶空間共享的所有結構體中使用 __u32 和類似
+     的類型。
+
+可能還有其他的情況，不過基本的規則是 **永遠不要** 使用 typedef，除非你可以明
+確的應用上述某個規則中的一個。
+
+總的來說，如果一個指針或者一個結構體裡的元素可以合理的被直接訪問到，那麼它們
+就不應該是一個 typedef。
+
+
+6) 函數
+------------------------------
+
+函數應該簡短而漂亮，並且只完成一件事情。函數應該可以一屏或者兩屏顯示完 (我們
+都知道 ISO/ANSI 屏幕大小是 80x24)，只做一件事情，而且把它做好。
+
+一個函數的最大長度是和該函數的複雜度和縮進級數成反比的。所以，如果你有一個理
+論上很簡單的只有一個很長 (但是簡單) 的 case 語句的函數，而且你需要在每個 case
+里做很多很小的事情，這樣的函數儘管很長，但也是可以的。
+
+不過，如果你有一個複雜的函數，而且你懷疑一個天分不是很高的高中一年級學生可能
+甚至搞不清楚這個函數的目的，你應該嚴格遵守前面提到的長度限制。使用輔助函數，
+並爲之取個具描述性的名字 (如果你覺得它們的性能很重要的話，可以讓編譯器內聯它
+們，這樣的效果往往會比你寫一個複雜函數的效果要好。)
+
+函數的另外一個衡量標準是本地變量的數量。此數量不應超過 5－10 個，否則你的函數
+就有問題了。重新考慮一下你的函數，把它分拆成更小的函數。人的大腦一般可以輕鬆
+的同時跟蹤 7 個不同的事物，如果再增多的話，就會糊塗了。即便你聰穎過人，你也可
+能會記不清你 2 個星期前做過的事情。
+
+在源文件里，使用空行隔開不同的函數。如果該函數需要被導出，它的 **EXPORT** 宏
+應該緊貼在它的結束大括號之下。比如：
+
+.. code-block:: c
+
+	int system_is_up(void)
+	{
+		return system_state == SYSTEM_RUNNING;
+	}
+	EXPORT_SYMBOL(system_is_up);
+
+在函數原型中，包含函數名和它們的數據類型。雖然 C 語言裡沒有這樣的要求，在
+Linux 里這是提倡的做法，因爲這樣可以很簡單的給讀者提供更多的有價值的信息。
+
+
+7) 集中的函數退出途徑
+------------------------------
+
+雖然被某些人聲稱已經過時，但是 goto 語句的等價物還是經常被編譯器所使用，具體
+形式是無條件跳轉指令。
+
+當一個函數從多個位置退出，並且需要做一些類似清理的常見操作時，goto 語句就很方
+便了。如果並不需要清理操作，那麼直接 return 即可。
+
+選擇一個能夠說明 goto 行爲或它爲何存在的標籤名。如果 goto 要釋放 ``buffer``,
+一個不錯的名字可以是 ``out_free_buffer:`` 。別去使用像 ``err1:`` 和 ``err2:``
+這樣的GW_BASIC 名稱，因爲一旦你添加或刪除了 (函數的) 退出路徑，你就必須對它們
+重新編號，這樣會難以去檢驗正確性。
+
+使用 goto 的理由是：
+
+- 無條件語句容易理解和跟蹤
+- 嵌套程度減小
+- 可以避免由於修改時忘記更新個別的退出點而導致錯誤
+- 讓編譯器省去刪除冗餘代碼的工作 ;)
+
+.. code-block:: c
+
+	int fun(int a)
+	{
+		int result = 0;
+		char *buffer;
+
+		buffer = kmalloc(SIZE, GFP_KERNEL);
+		if (!buffer)
+			return -ENOMEM;
+
+		if (condition1) {
+			while (loop1) {
+				...
+			}
+			result = 1;
+			goto out_free_buffer;
+		}
+		...
+	out_free_buffer:
+		kfree(buffer);
+		return result;
+	}
+
+一個需要注意的常見錯誤是 ``一個 err 錯誤`` ，就像這樣：
+
+.. code-block:: c
+
+	err:
+		kfree(foo->bar);
+		kfree(foo);
+		return ret;
+
+這段代碼的錯誤是，在某些退出路徑上 ``foo`` 是 NULL。通常情況下，通過把它分離
+成兩個錯誤標籤 ``err_free_bar:`` 和 ``err_free_foo:`` 來修復這個錯誤：
+
+.. code-block:: c
+
+	 err_free_bar:
+		kfree(foo->bar);
+	 err_free_foo:
+		kfree(foo);
+		return ret;
+
+理想情況下，你應該模擬錯誤來測試所有退出路徑。
+
+
+8) 注釋
+------------------------------
+
+注釋是好的，不過有過度注釋的危險。永遠不要在注釋里解釋你的代碼是如何運作的：
+更好的做法是讓別人一看你的代碼就可以明白，解釋寫的很差的代碼是浪費時間。
+
+一般的，你想要你的注釋告訴別人你的代碼做了什麼，而不是怎麼做的。也請你不要把
+注釋放在一個函數體內部：如果函數複雜到你需要獨立的注釋其中的一部分，你很可能
+需要回到第六章看一看。你可以做一些小注釋來註明或警告某些很聰明 (或者槽糕) 的
+做法，但不要加太多。你應該做的，是把注釋放在函數的頭部，告訴人們它做了什麼，
+也可以加上它做這些事情的原因。
+
+當注釋內核 API 函數時，請使用 kernel-doc 格式。請看
+Documentation/doc-guide/ 和 scripts/kernel-doc 以獲得詳細信息。
+
+長 (多行) 注釋的首選風格是：
+
+.. code-block:: c
+
+	/*
+	 * This is the preferred style for multi-line
+	 * comments in the Linux kernel source code.
+	 * Please use it consistently.
+	 *
+	 * Description:  A column of asterisks on the left side,
+	 * with beginning and ending almost-blank lines.
+	 */
+
+對於在 net/ 和 drivers/net/ 的文件，首選的長 (多行) 注釋風格有些不同。
+
+.. code-block:: c
+
+	/* The preferred comment style for files in net/ and drivers/net
+	 * looks like this.
+	 *
+	 * It is nearly the same as the generally preferred comment style,
+	 * but there is no initial almost-blank line.
+	 */
+
+注釋數據也是很重要的，不管是基本類型還是衍生類型。爲了方便實現這一點，每一行
+應只聲明一個數據 (不要使用逗號來一次聲明多個數據)。這樣你就有空間來爲每個數據
+寫一段小注釋來解釋它們的用途了。
+
+
+9) 你已經把事情弄糟了
+------------------------------
+
+這沒什麼，我們都是這樣。可能你的使用了很長時間 Unix 的朋友已經告訴你
+``GNU emacs`` 能自動幫你格式化 C 原始碼，而且你也注意到了，確實是這樣，不過它
+所使用的默認值和我們想要的相去甚遠 (實際上，甚至比隨機打的還要差——無數個猴子
+在 GNU emacs 里打字永遠不會創造出一個好程序) (譯註：Infinite Monkey Theorem)
+
+所以你要麼放棄 GNU emacs，要麼改變它讓它使用更合理的設定。要採用後一個方案，
+你可以把下面這段粘貼到你的 .emacs 文件里。
+
+.. code-block:: none
+
+  (defun c-lineup-arglist-tabs-only (ignored)
+    "Line up argument lists by tabs, not spaces"
+    (let* ((anchor (c-langelem-pos c-syntactic-element))
+           (column (c-langelem-2nd-pos c-syntactic-element))
+           (offset (- (1+ column) anchor))
+           (steps (floor offset c-basic-offset)))
+      (* (max steps 1)
+         c-basic-offset)))
+
+  (dir-locals-set-class-variables
+   'linux-kernel
+   '((c-mode . (
+          (c-basic-offset . 8)
+          (c-label-minimum-indentation . 0)
+          (c-offsets-alist . (
+                  (arglist-close         . c-lineup-arglist-tabs-only)
+                  (arglist-cont-nonempty .
+		      (c-lineup-gcc-asm-reg c-lineup-arglist-tabs-only))
+                  (arglist-intro         . +)
+                  (brace-list-intro      . +)
+                  (c                     . c-lineup-C-comments)
+                  (case-label            . 0)
+                  (comment-intro         . c-lineup-comment)
+                  (cpp-define-intro      . +)
+                  (cpp-macro             . -1000)
+                  (cpp-macro-cont        . +)
+                  (defun-block-intro     . +)
+                  (else-clause           . 0)
+                  (func-decl-cont        . +)
+                  (inclass               . +)
+                  (inher-cont            . c-lineup-multi-inher)
+                  (knr-argdecl-intro     . 0)
+                  (label                 . -1000)
+                  (statement             . 0)
+                  (statement-block-intro . +)
+                  (statement-case-intro  . +)
+                  (statement-cont        . +)
+                  (substatement          . +)
+                  ))
+          (indent-tabs-mode . t)
+          (show-trailing-whitespace . t)
+          ))))
+
+  (dir-locals-set-directory-class
+   (expand-file-name "~/src/linux-trees")
+   'linux-kernel)
+
+這會讓 emacs 在 ``~/src/linux-trees`` 下的 C 源文件獲得更好的內核代碼風格。
+
+不過就算你嘗試讓 emacs 正確的格式化代碼失敗了，也並不意味著你失去了一切：還可
+以用 ``indent`` 。
+
+不過，GNU indent 也有和 GNU emacs 一樣有問題的設定，所以你需要給它一些命令選
+項。不過，這還不算太糟糕，因爲就算是 GNU indent 的作者也認同 K&R 的權威性
+(GNU 的人並不是壞人，他們只是在這個問題上被嚴重的誤導了)，所以你只要給 indent
+指定選項 ``-kr -i8`` (代表 ``K&R，8 字符縮進``)，或使用 ``scripts/Lindent``
+這樣就可以以最時髦的方式縮進原始碼。
+
+``indent`` 有很多選項，特別是重新格式化注釋的時候，你可能需要看一下它的手冊。
+不過記住： ``indent`` 不能修正壞的編程習慣。
+
+
+10) Kconfig 配置文件
+------------------------------
+
+對於遍布源碼樹的所有 Kconfig* 配置文件來說，它們縮進方式有所不同。緊挨著
+``config`` 定義的行，用一個制表符縮進，然而 help 信息的縮進則額外增加 2 個空
+格。舉個例子::
+
+  config AUDIT
+	bool "Auditing support"
+	depends on NET
+	help
+	  Enable auditing infrastructure that can be used with another
+	  kernel subsystem, such as SELinux (which requires this for
+	  logging of avc messages output).  Does not do system-call
+	  auditing without CONFIG_AUDITSYSCALL.
+
+而那些危險的功能 (比如某些文件系統的寫支持) 應該在它們的提示字符串里顯著的聲
+明這一點::
+
+  config ADFS_FS_RW
+	bool "ADFS write support (DANGEROUS)"
+	depends on ADFS_FS
+	...
+
+要查看配置文件的完整文檔，請看 Documentation/kbuild/kconfig-language.rst。
+
+
+11) 數據結構
+------------------------------
+
+如果一個數據結構，在創建和銷毀它的單線執行環境之外可見，那麼它必須要有一個引
+用計數器。內核里沒有垃圾收集 (並且內核之外的垃圾收集慢且效率低下)，這意味著你
+絕對需要記錄你對這種數據結構的使用情況。
+
+引用計數意味著你能夠避免上鎖，並且允許多個用戶並行訪問這個數據結構——而不需要
+擔心這個數據結構僅僅因爲暫時不被使用就消失了，那些用戶可能不過是沉睡了一陣或
+者做了一些其他事情而已。
+
+注意上鎖 **不能** 取代引用計數。上鎖是爲了保持數據結構的一致性，而引用計數是一
+個內存管理技巧。通常二者都需要，不要把兩個搞混了。
+
+很多數據結構實際上有 2 級引用計數，它們通常有不同 ``類`` 的用戶。子類計數器統
+計子類用戶的數量，每當子類計數器減至零時，全局計數器減一。
+
+這種 ``多級引用計數`` 的例子可以在內存管理 (``struct mm_struct``: mm_users 和
+mm_count)，和文件系統 (``struct super_block``: s_count 和 s_active) 中找到。
+
+記住：如果另一個執行線索可以找到你的數據結構，但這個數據結構沒有引用計數器，
+這裡幾乎肯定是一個 bug。
+
+
+12) 宏，枚舉和RTL
+------------------------------
+
+用於定義常量的宏的名字及枚舉里的標籤需要大寫。
+
+.. code-block:: c
+
+	#define CONSTANT 0x12345
+
+在定義幾個相關的常量時，最好用枚舉。
+
+宏的名字請用大寫字母，不過形如函數的宏的名字可以用小寫字母。
+
+一般的，如果能寫成內聯函數就不要寫成像函數的宏。
+
+含有多個語句的宏應該被包含在一個 do-while 代碼塊里：
+
+.. code-block:: c
+
+	#define macrofun(a, b, c)			\
+		do {					\
+			if (a == 5)			\
+				do_this(b, c);		\
+		} while (0)
+
+使用宏的時候應避免的事情：
+
+1) 影響控制流程的宏：
+
+.. code-block:: c
+
+	#define FOO(x)					\
+		do {					\
+			if (blah(x) < 0)		\
+				return -EBUGGERED;	\
+		} while (0)
+
+**非常** 不好。它看起來像一個函數，不過卻能導致 ``調用`` 它的函數退出；不要打
+亂讀者大腦里的語法分析器。
+
+2) 依賴於一個固定名字的本地變量的宏：
+
+.. code-block:: c
+
+	#define FOO(val) bar(index, val)
+
+可能看起來像是個不錯的東西，不過它非常容易把讀代碼的人搞糊塗，而且容易導致看起
+來不相關的改動帶來錯誤。
+
+3) 作爲左值的帶參數的宏： FOO(x) = y；如果有人把 FOO 變成一個內聯函數的話，這
+   種用法就會出錯了。
+
+4) 忘記了優先級：使用表達式定義常量的宏必須將表達式置於一對小括號之內。帶參數
+   的宏也要注意此類問題。
+
+.. code-block:: c
+
+	#define CONSTANT 0x4000
+	#define CONSTEXP (CONSTANT | 3)
+
+5) 在宏里定義類似函數的本地變量時命名衝突：
+
+.. code-block:: c
+
+	#define FOO(x)				\
+	({					\
+		typeof(x) ret;			\
+		ret = calc_ret(x);		\
+		(ret);				\
+	})
+
+ret 是本地變量的通用名字 - __foo_ret 更不容易與一個已存在的變量衝突。
+
+cpp 手冊對宏的講解很詳細。gcc internals 手冊也詳細講解了 RTL，內核里的彙編語
+言經常用到它。
+
+
+13) 列印內核消息
+------------------------------
+
+內核開發者應該是受過良好教育的。請一定注意內核信息的拼寫，以給人以好的印象。
+不要用不規範的單詞比如 ``dont``，而要用 ``do not`` 或者 ``don't`` 。保證這些信
+息簡單明了,無歧義。
+
+內核信息不必以英文句號結束。
+
+在小括號里列印數字 (%d) 沒有任何價值，應該避免這樣做。
+
+<linux/device.h> 里有一些驅動模型診斷宏，你應該使用它們，以確保信息對應於正確
+的設備和驅動，並且被標記了正確的消息級別。這些宏有：dev_err(), dev_warn(),
+dev_info() 等等。對於那些不和某個特定設備相關連的信息，<linux/printk.h> 定義
+了 pr_notice(), pr_info(), pr_warn(), pr_err() 和其他。
+
+寫出好的調試信息可以是一個很大的挑戰；一旦你寫出後，這些信息在遠程除錯時能提
+供極大的幫助。然而列印調試信息的處理方式同列印非調試信息不同。其他 pr_XXX()
+函數能無條件地列印，pr_debug() 卻不；默認情況下它不會被編譯，除非定義了 DEBUG
+或設定了 CONFIG_DYNAMIC_DEBUG。實際這同樣是爲了 dev_dbg()，一個相關約定是在一
+個已經開啓了 DEBUG 時，使用 VERBOSE_DEBUG 來添加 dev_vdbg()。
+
+許多子系統擁有 Kconfig 調試選項來開啓 -DDEBUG 在對應的 Makefile 裡面；在其他
+情況下，特殊文件使用 #define DEBUG。當一條調試信息需要被無條件列印時，例如，
+如果已經包含一個調試相關的 #ifdef 條件，printk(KERN_DEBUG ...) 就可被使用。
+
+
+14) 分配內存
+------------------------------
+
+內核提供了下面的一般用途的內存分配函數：
+kmalloc(), kzalloc(), kmalloc_array(), kcalloc(), vmalloc() 和 vzalloc()。
+請參考 API 文檔以獲取有關它們的詳細信息。
+
+傳遞結構體大小的首選形式是這樣的：
+
+.. code-block:: c
+
+	p = kmalloc(sizeof(*p), ...);
+
+另外一種傳遞方式中，sizeof 的操作數是結構體的名字，這樣會降低可讀性，並且可能
+會引入 bug。有可能指針變量類型被改變時，而對應的傳遞給內存分配函數的 sizeof
+的結果不變。
+
+強制轉換一個 void 指針返回值是多餘的。C 語言本身保證了從 void 指針到其他任何
+指針類型的轉換是沒有問題的。
+
+分配一個數組的首選形式是這樣的：
+
+.. code-block:: c
+
+	p = kmalloc_array(n, sizeof(...), ...);
+
+分配一個零長數組的首選形式是這樣的：
+
+.. code-block:: c
+
+	p = kcalloc(n, sizeof(...), ...);
+
+兩種形式檢查分配大小 n * sizeof(...) 的溢出，如果溢出返回 NULL。
+
+
+15) 內聯弊病
+------------------------------
+
+有一個常見的誤解是 ``內聯`` 是 gcc 提供的可以讓代碼運行更快的一個選項。雖然使
+用內聯函數有時候是恰當的 (比如作爲一種替代宏的方式，請看第十二章)，不過很多情
+況下不是這樣。inline 的過度使用會使內核變大，從而使整個系統運行速度變慢。
+因爲體積大內核會占用更多的指令高速緩存，而且會導致 pagecache 的可用內存減少。
+想像一下，一次 pagecache 未命中就會導致一次磁碟尋址，將耗時 5 毫秒。5 毫秒的
+時間內 CPU 能執行很多很多指令。
+
+一個基本的原則是如果一個函數有 3 行以上，就不要把它變成內聯函數。這個原則的一
+個例外是，如果你知道某個參數是一個編譯時常量，而且因爲這個常量你確定編譯器在
+編譯時能優化掉你的函數的大部分代碼，那仍然可以給它加上 inline 關鍵字。
+kmalloc() 內聯函數就是一個很好的例子。
+
+人們經常主張給 static 的而且只用了一次的函數加上 inline，如此不會有任何損失，
+因爲沒有什麼好權衡的。雖然從技術上說這是正確的，但是實際上這種情況下即使不加
+inline gcc 也可以自動使其內聯。而且其他用戶可能會要求移除 inline，由此而來的
+爭論會抵消 inline 自身的潛在價值，得不償失。
+
+
+16) 函數返回值及命名
+------------------------------
+
+函數可以返回多種不同類型的值，最常見的一種是表明函數執行成功或者失敗的值。這樣
+的一個值可以表示爲一個錯誤代碼整數 (-Exxx＝失敗，0＝成功) 或者一個 ``成功``
+布爾值 (0＝失敗，非0＝成功)。
+
+混合使用這兩種表達方式是難於發現的 bug 的來源。如果 C 語言本身嚴格區分整形和
+布爾型變量，那麼編譯器就能夠幫我們發現這些錯誤... 不過 C 語言不區分。爲了避免
+產生這種 bug，請遵循下面的慣例::
+
+	如果函數的名字是一個動作或者強制性的命令，那麼這個函數應該返回錯誤代
+	碼整數。如果是一個判斷，那麼函數應該返回一個 "成功" 布爾值。
+
+比如， ``add work`` 是一個命令，所以 add_work() 在成功時返回 0，在失敗時返回
+-EBUSY。類似的，因爲 ``PCI device present`` 是一個判斷，所以 pci_dev_present()
+在成功找到一個匹配的設備時應該返回 1，如果找不到時應該返回 0。
+
+所有 EXPORTed 函數都必須遵守這個慣例，所有的公共函數也都應該如此。私有
+(static) 函數不需要如此，但是我們也推薦這樣做。
+
+返回值是實際計算結果而不是計算是否成功的標誌的函數不受此慣例的限制。一般的，
+他們通過返回一些正常值範圍之外的結果來表示出錯。典型的例子是返回指針的函數，
+他們使用 NULL 或者 ERR_PTR 機制來報告錯誤。
+
+
+17) 不要重新發明內核宏
+------------------------------
+
+頭文件 include/linux/kernel.h 包含了一些宏，你應該使用它們，而不要自己寫一些
+它們的變種。比如，如果你需要計算一個數組的長度，使用這個宏
+
+.. code-block:: c
+
+	#define ARRAY_SIZE(x) (sizeof(x) / sizeof((x)[0]))
+
+類似的，如果你要計算某結構體成員的大小，使用
+
+.. code-block:: c
+
+	#define sizeof_field(t, f) (sizeof(((t*)0)->f))
+
+還有可以做嚴格的類型檢查的 min() 和 max() 宏，如果你需要可以使用它們。你可以
+自己看看那個頭文件里還定義了什麼你可以拿來用的東西，如果有定義的話，你就不應
+在你的代碼里自己重新定義。
+
+
+18) 編輯器模式行和其他需要羅嗦的事情
+--------------------------------------------------
+
+有一些編輯器可以解釋嵌入在源文件里的由一些特殊標記標明的配置信息。比如，emacs
+能夠解釋被標記成這樣的行：
+
+.. code-block:: c
+
+	-*- mode: c -*-
+
+或者這樣的：
+
+.. code-block:: c
+
+	/*
+	Local Variables:
+	compile-command: "gcc -DMAGIC_DEBUG_FLAG foo.c"
+	End:
+	*/
+
+Vim 能夠解釋這樣的標記：
+
+.. code-block:: c
+
+	/* vim:set sw=8 noet */
+
+不要在原始碼中包含任何這樣的內容。每個人都有他自己的編輯器配置，你的源文件不
+應該覆蓋別人的配置。這包括有關縮進和模式配置的標記。人們可以使用他們自己定製
+的模式，或者使用其他可以產生正確的縮進的巧妙方法。
+
+
+19) 內聯彙編
+------------------------------
+
+在特定架構的代碼中，你可能需要內聯彙編與 CPU 和平台相關功能連接。需要這麼做時
+就不要猶豫。然而，當 C 可以完成工作時，不要平白無故地使用內聯彙編。在可能的情
+況下，你可以並且應該用 C 和硬體溝通。
+
+請考慮去寫捆綁通用位元 (wrap common bits) 的內聯彙編的簡單輔助函數，別去重複
+地寫下只有細微差異內聯彙編。記住內聯彙編可以使用 C 參數。
+
+大型，有一定複雜度的彙編函數應該放在 .S 文件內，用相應的 C 原型定義在 C 頭文
+件中。彙編函數的 C 原型應該使用 ``asmlinkage`` 。
+
+你可能需要把彙編語句標記爲 volatile，用來阻止 GCC 在沒發現任何副作用後就把它
+移除了。你不必總是這樣做，儘管，這不必要的舉動會限制優化。
+
+在寫一個包含多條指令的單個內聯彙編語句時，把每條指令用引號分割而且各占一行，
+除了最後一條指令外，在每個指令結尾加上 \n\t，讓彙編輸出時可以正確地縮進下一條
+指令：
+
+.. code-block:: c
+
+	asm ("magic %reg1, #42\n\t"
+	     "more_magic %reg2, %reg3"
+	     : /* outputs */ : /* inputs */ : /* clobbers */);
+
+
+20) 條件編譯
+------------------------------
+
+只要可能，就不要在 .c 文件裡面使用預處理條件 (#if, #ifdef)；這樣做讓代碼更難
+閱讀並且更難去跟蹤邏輯。替代方案是，在頭文件中用預處理條件提供給那些 .c 文件
+使用，再給 #else 提供一個空樁 (no-op stub) 版本，然後在 .c 文件內無條件地調用
+那些 (定義在頭文件內的) 函數。這樣做，編譯器會避免爲樁函數 (stub) 的調用生成
+任何代碼，產生的結果是相同的，但邏輯將更加清晰。
+
+最好傾向於編譯整個函數，而不是函數的一部分或表達式的一部分。與其放一個 ifdef
+在表達式內，不如分解出部分或全部表達式，放進一個單獨的輔助函數，並應用預處理
+條件到這個輔助函數內。
+
+如果你有一個在特定配置中，可能變成未使用的函數或變量，編譯器會警告它定義了但
+未使用，把它標記爲 __maybe_unused 而不是將它包含在一個預處理條件中。(然而，如
+果一個函數或變量總是未使用，就直接刪除它。)
+
+在代碼中，儘可能地使用 IS_ENABLED 宏來轉化某個 Kconfig 標記爲 C 的布爾
+表達式，並在一般的 C 條件中使用它：
+
+.. code-block:: c
+
+	if (IS_ENABLED(CONFIG_SOMETHING)) {
+		...
+	}
+
+編譯器會做常量摺疊，然後就像使用 #ifdef 那樣去包含或排除代碼塊，所以這不會帶
+來任何運行時開銷。然而，這種方法依舊允許 C 編譯器查看塊內的代碼，並檢查它的正
+確性 (語法，類型，符號引用，等等)。因此，如果條件不滿足，代碼塊內的引用符號就
+不存在時，你還是必須去用 #ifdef。
+
+在任何有意義的 #if 或 #ifdef 塊的末尾 (超過幾行的)，在 #endif 同一行的後面寫下
+註解，注釋這個條件表達式。例如：
+
+.. code-block:: c
+
+	#ifdef CONFIG_SOMETHING
+	...
+	#endif /* CONFIG_SOMETHING */
+
+
+附錄 I) 參考
+-------------------
+
+The C Programming Language, 第二版
+作者：Brian W. Kernighan 和 Denni M. Ritchie.
+Prentice Hall, Inc., 1988.
+ISBN 0-13-110362-8 (軟皮), 0-13-110370-9 (硬皮).
+
+The Practice of Programming
+作者：Brian W. Kernighan 和 Rob Pike.
+Addison-Wesley, Inc., 1999.
+ISBN 0-201-61586-X.
+
+GNU 手冊 - 遵循 K&R 標準和此文本 - cpp, gcc, gcc internals and indent,
+都可以從 https://www.gnu.org/manual/ 找到
+
+WG14 是 C 語言的國際標準化工作組，URL: http://www.open-std.org/JTC1/SC22/WG14/
+
+Kernel process/coding-style.rst，作者 greg@kroah.com 發表於 OLS 2002：
+http://www.kroah.com/linux/talks/ols_2002_kernel_codingstyle_talk/html/
+
diff --git a/Documentation/translations/zh_TW/process/development-process.rst b/Documentation/translations/zh_TW/process/development-process.rst
new file mode 100644
index 000000000000..45e6385647cd
--- /dev/null
+++ b/Documentation/translations/zh_TW/process/development-process.rst
@@ -0,0 +1,30 @@
+.. SPDX-License-Identifier: GPL-2.0
+
+.. include:: ../disclaimer-zh_TW.rst
+
+:Original: :ref:`Documentation/process/development-process.rst <development_process_main>`
+:Translator: Alex Shi <alex.shi@linux.alibaba.com>
+             Hu Haowen <src.res@email.cn>
+
+.. _tw_development_process_main:
+
+內核開發過程指南
+================
+
+內容:
+
+.. toctree::
+   :numbered:
+   :maxdepth: 2
+
+   1.Intro
+   2.Process
+   3.Early-stage
+   4.Coding
+   5.Posting
+   6.Followthrough
+   7.AdvancedTopics
+   8.Conclusion
+
+本文檔的目的是幫助開發人員（及其經理）以最小的挫折感與開發社區合作。它試圖記錄這個社區如何以一種不熟悉Linux內核開發（或者實際上是自由軟體開發）的人可以訪問的方式工作。雖然這裡有一些技術資料，但這是一個面向過程的討論，不需要深入了解內核編程就可以理解。
+
diff --git a/Documentation/translations/zh_TW/process/email-clients.rst b/Documentation/translations/zh_TW/process/email-clients.rst
new file mode 100644
index 000000000000..4ba543d06f3b
--- /dev/null
+++ b/Documentation/translations/zh_TW/process/email-clients.rst
@@ -0,0 +1,252 @@
+.. SPDX-License-Identifier: GPL-2.0
+
+.. _tw_email_clients:
+
+.. include:: ../disclaimer-zh_TW.rst
+
+:Original: :ref:`Documentation/process/email-clients.rst <email_clients>`
+
+譯者::
+
+        中文版維護者： 賈威威  Harry Wei <harryxiyou@gmail.com>
+        中文版翻譯者： 賈威威  Harry Wei <harryxiyou@gmail.com>
+                       時奎亮  Alex Shi <alex.shi@linux.alibaba.com>
+        中文版校譯者： Yinglin Luan <synmyth@gmail.com>
+        	       Xiaochen Wang <wangxiaochen0@gmail.com>
+                       yaxinsn <yaxinsn@163.com>
+                      Hu Haowen <src.res@email.cn>
+
+Linux郵件客戶端配置信息
+=======================
+
+Git
+---
+
+現在大多數開發人員使用 ``git send-email`` 而不是常規的電子郵件客戶端。這方面
+的手冊非常好。在接收端，維護人員使用 ``git am`` 加載補丁。
+
+如果你是 ``git`` 新手，那麼把你的第一個補丁發送給你自己。將其保存爲包含所有
+標題的原始文本。運行 ``git am raw_email.txt`` ，然後使用 ``git log`` 查看更
+改日誌。如果工作正常，再將補丁發送到相應的郵件列表。
+
+
+普通配置
+--------
+Linux內核補丁是通過郵件被提交的，最好把補丁作爲郵件體的內嵌文本。有些維護者
+接收附件，但是附件的內容格式應該是"text/plain"。然而，附件一般是不贊成的，
+因爲這會使補丁的引用部分在評論過程中變的很困難。
+
+用來發送Linux內核補丁的郵件客戶端在發送補丁時應該處於文本的原始狀態。例如，
+他們不能改變或者刪除制表符或者空格，甚至是在每一行的開頭或者結尾。
+
+不要通過"format=flowed"模式發送補丁。這樣會引起不可預期以及有害的斷行。
+
+不要讓你的郵件客戶端進行自動換行。這樣也會破壞你的補丁。
+
+郵件客戶端不能改變文本的字符集編碼方式。要發送的補丁只能是ASCII或者UTF-8編碼方式，
+如果你使用UTF-8編碼方式發送郵件，那麼你將會避免一些可能發生的字符集問題。
+
+郵件客戶端應該形成並且保持 References: 或者 In-Reply-To: 標題，那麼
+郵件話題就不會中斷。
+
+複製粘帖(或者剪貼粘帖)通常不能用於補丁，因爲制表符會轉換爲空格。使用xclipboard, xclip
+或者xcutsel也許可以，但是最好測試一下或者避免使用複製粘帖。
+
+不要在使用PGP/GPG署名的郵件中包含補丁。這樣會使得很多腳本不能讀取和適用於你的補丁。
+（這個問題應該是可以修復的）
+
+在給內核郵件列表發送補丁之前，給自己發送一個補丁是個不錯的主意，保存接收到的
+郵件，將補丁用'patch'命令打上，如果成功了，再給內核郵件列表發送。
+
+
+一些郵件客戶端提示
+------------------
+這裡給出一些詳細的MUA配置提示，可以用於給Linux內核發送補丁。這些並不意味是
+所有的軟體包配置總結。
+
+說明：
+TUI = 以文本爲基礎的用戶接口
+GUI = 圖形界面用戶接口
+
+Alpine (TUI)
+~~~~~~~~~~~~
+
+配置選項：
+在"Sending Preferences"部分：
+
+- "Do Not Send Flowed Text"必須開啓
+- "Strip Whitespace Before Sending"必須關閉
+
+當寫郵件時，光標應該放在補丁會出現的地方，然後按下CTRL-R組合鍵，使指定的
+補丁文件嵌入到郵件中。
+
+Evolution (GUI)
+~~~~~~~~~~~~~~~
+
+一些開發者成功的使用它發送補丁
+
+當選擇郵件選項：Preformat
+  從Format->Heading->Preformatted (Ctrl-7)或者工具欄
+
+然後使用：
+  Insert->Text File... (Alt-n x)插入補丁文件。
+
+你還可以"diff -Nru old.c new.c | xclip"，選擇Preformat，然後使用中間鍵進行粘帖。
+
+Kmail (GUI)
+~~~~~~~~~~~
+
+一些開發者成功的使用它發送補丁。
+
+默認設置不爲HTML格式是合適的；不要啓用它。
+
+當書寫一封郵件的時候，在選項下面不要選擇自動換行。唯一的缺點就是你在郵件中輸入的任何文本
+都不會被自動換行，因此你必須在發送補丁之前手動換行。最簡單的方法就是啓用自動換行來書寫郵件，
+然後把它保存爲草稿。一旦你在草稿中再次打開它，它已經全部自動換行了，那麼你的郵件雖然沒有
+選擇自動換行，但是還不會失去已有的自動換行。
+
+在郵件的底部，插入補丁之前，放上常用的補丁定界符：三個連字號(---)。
+
+然後在"Message"菜單條目，選擇插入文件，接著選取你的補丁文件。還有一個額外的選項，你可以
+通過它配置你的郵件建立工具欄菜單，還可以帶上"insert file"圖標。
+
+你可以安全地通過GPG標記附件，但是內嵌補丁最好不要使用GPG標記它們。作爲內嵌文本的簽發補丁，
+當從GPG中提取7位編碼時會使他們變的更加複雜。
+
+如果你非要以附件的形式發送補丁，那麼就右鍵點擊附件，然後選中屬性，突出"Suggest automatic
+display"，這樣內嵌附件更容易讓讀者看到。
+
+當你要保存將要發送的內嵌文本補丁，你可以從消息列表窗格選擇包含補丁的郵件，然後右擊選擇
+"save as"。你可以使用一個沒有更改的包含補丁的郵件，如果它是以正確的形式組成。當你正真在它
+自己的窗口之下察看，那時沒有選項可以保存郵件--已經有一個這樣的bug被匯報到了kmail的bugzilla
+並且希望這將會被處理。郵件是以只針對某個用戶可讀寫的權限被保存的，所以如果你想把郵件複製到其他地方，
+你不得不把他們的權限改爲組或者整體可讀。
+
+Lotus Notes (GUI)
+~~~~~~~~~~~~~~~~~
+
+不要使用它。
+
+Mutt (TUI)
+~~~~~~~~~~
+
+很多Linux開發人員使用mutt客戶端，所以證明它肯定工作的非常漂亮。
+
+Mutt不自帶編輯器，所以不管你使用什麼編輯器都不應該帶有自動斷行。大多數編輯器都帶有
+一個"insert file"選項，它可以通過不改變文件內容的方式插入文件。
+
+'vim'作爲mutt的編輯器：
+  set editor="vi"
+
+  如果使用xclip，敲入以下命令
+  :set paste
+  按中鍵之前或者shift-insert或者使用
+  :r filename
+
+如果想要把補丁作爲內嵌文本。
+(a)ttach工作的很好，不帶有"set paste"。
+
+你可以通過 ``git format-patch`` 生成補丁，然後用 Mutt發送它們::
+
+        $ mutt -H 0001-some-bug-fix.patch
+
+配置選項：
+它應該以默認設置的形式工作。
+然而，把"send_charset"設置爲"us-ascii::utf-8"也是一個不錯的主意。
+
+Mutt 是高度可配置的。 這裡是個使用mutt通過 Gmail 發送的補丁的最小配置::
+
+  # .muttrc
+  # ================  IMAP ====================
+  set imap_user = 'yourusername@gmail.com'
+  set imap_pass = 'yourpassword'
+  set spoolfile = imaps://imap.gmail.com/INBOX
+  set folder = imaps://imap.gmail.com/
+  set record="imaps://imap.gmail.com/[Gmail]/Sent Mail"
+  set postponed="imaps://imap.gmail.com/[Gmail]/Drafts"
+  set mbox="imaps://imap.gmail.com/[Gmail]/All Mail"
+
+  # ================  SMTP  ====================
+  set smtp_url = "smtp://username@smtp.gmail.com:587/"
+  set smtp_pass = $imap_pass
+  set ssl_force_tls = yes # Require encrypted connection
+
+  # ================  Composition  ====================
+  set editor = `echo \$EDITOR`
+  set edit_headers = yes  # See the headers when editing
+  set charset = UTF-8     # value of $LANG; also fallback for send_charset
+  # Sender, email address, and sign-off line must match
+  unset use_domain        # because joe@localhost is just embarrassing
+  set realname = "YOUR NAME"
+  set from = "username@gmail.com"
+  set use_from = yes
+
+Mutt文檔含有更多信息:
+
+    http://dev.mutt.org/trac/wiki/UseCases/Gmail
+
+    http://dev.mutt.org/doc/manual.html
+
+Pine (TUI)
+~~~~~~~~~~
+
+Pine過去有一些空格刪減問題，但是這些現在應該都被修復了。
+
+如果可以，請使用alpine(pine的繼承者)
+
+配置選項：
+- 最近的版本需要消除流程文本
+- "no-strip-whitespace-before-send"選項也是需要的。
+
+
+Sylpheed (GUI)
+~~~~~~~~~~~~~~
+
+- 內嵌文本可以很好的工作（或者使用附件）。
+- 允許使用外部的編輯器。
+- 對於目錄較多時非常慢。
+- 如果通過non-SSL連接，無法使用TLS SMTP授權。
+- 在組成窗口中有一個很有用的ruler bar。
+- 給地址本中添加地址就不會正確的了解顯示名。
+
+Thunderbird (GUI)
+~~~~~~~~~~~~~~~~~
+
+默認情況下，thunderbird很容易損壞文本，但是還有一些方法可以強制它變得更好。
+
+- 在用戶帳號設置里，組成和尋址，不要選擇"Compose messages in HTML format"。
+
+- 編輯你的Thunderbird配置設置來使它不要拆行使用：user_pref("mailnews.wraplength", 0);
+
+- 編輯你的Thunderbird配置設置，使它不要使用"format=flowed"格式：user_pref("mailnews.
+  send_plaintext_flowed", false);
+
+- 你需要使Thunderbird變爲預先格式方式：
+  如果默認情況下你書寫的是HTML格式，那不是很難。僅僅從標題欄的下拉框中選擇"Preformat"格式。
+  如果默認情況下你書寫的是文本格式，你不得把它改爲HTML格式（僅僅作爲一次性的）來書寫新的消息，
+  然後強制使它回到文本格式，否則它就會拆行。要實現它，在寫信的圖標上使用shift鍵來使它變爲HTML
+  格式，然後標題欄的下拉框中選擇"Preformat"格式。
+
+- 允許使用外部的編輯器：
+  針對Thunderbird打補丁最簡單的方法就是使用一個"external editor"擴展，然後使用你最喜歡的
+  $EDITOR來讀取或者合併補丁到文本中。要實現它，可以下載並且安裝這個擴展，然後添加一個使用它的
+  按鍵View->Toolbars->Customize...最後當你書寫信息的時候僅僅點擊它就可以了。
+
+TkRat (GUI)
+~~~~~~~~~~~
+
+可以使用它。使用"Insert file..."或者外部的編輯器。
+
+Gmail (Web GUI)
+~~~~~~~~~~~~~~~
+
+不要使用它發送補丁。
+
+Gmail網頁客戶端自動地把制表符轉換爲空格。
+
+雖然制表符轉換爲空格問題可以被外部編輯器解決，同時它還會使用回車換行把每行拆分爲78個字符。
+
+另一個問題是Gmail還會把任何不是ASCII的字符的信息改爲base64編碼。它把東西變的像歐洲人的名字。
+
+                                ###
+
diff --git a/Documentation/translations/zh_TW/process/embargoed-hardware-issues.rst b/Documentation/translations/zh_TW/process/embargoed-hardware-issues.rst
new file mode 100644
index 000000000000..6c76fc96131a
--- /dev/null
+++ b/Documentation/translations/zh_TW/process/embargoed-hardware-issues.rst
@@ -0,0 +1,232 @@
+.. SPDX-License-Identifier: GPL-2.0
+
+.. include:: ../disclaimer-zh_TW.rst
+
+:Original: :ref:`Documentation/process/embargoed-hardware-issues.rst <embargoed_hardware_issues>`
+:Translator: Alex Shi <alex.shi@linux.alibaba.com>
+             Hu Haowen <src.res@email.cn>
+
+被限制的硬體問題
+================
+
+範圍
+----
+
+導致安全問題的硬體問題與只影響Linux內核的純軟體錯誤是不同的安全錯誤類別。
+
+必須區別對待諸如熔毀(Meltdown)、Spectre、L1TF等硬體問題，因爲它們通常會影響
+所有作業系統（「OS」），因此需要在不同的OS供應商、發行版、硬體供應商和其他各方
+之間進行協調。對於某些問題，軟體緩解可能依賴於微碼或固件更新，這需要進一步的
+協調。
+
+.. _tw_Contact:
+
+接觸
+----
+
+Linux內核硬體安全小組獨立於普通的Linux內核安全小組。
+
+該小組只負責協調被限制的硬體安全問題。Linux內核中純軟體安全漏洞的報告不由該
+小組處理，報告者將被引導至常規Linux內核安全小組(:ref:`Documentation/admin-guide/
+<securitybugs>`)聯繫。
+
+可以通過電子郵件 <hardware-security@kernel.org> 與小組聯繫。這是一份私密的安全
+官名單，他們將幫助您根據我們的文檔化流程協調問題。
+
+郵件列表是加密的，發送到列表的電子郵件可以通過PGP或S/MIME加密，並且必須使用報告
+者的PGP密鑰或S/MIME證書籤名。該列表的PGP密鑰和S/MIME證書可從
+https://www.kernel.org/.... 獲得。
+
+雖然硬體安全問題通常由受影響的硬體供應商處理，但我們歡迎發現潛在硬體缺陷的研究
+人員或個人與我們聯繫。
+
+硬體安全官
+^^^^^^^^^^
+
+目前的硬體安全官小組:
+
+  - Linus Torvalds（Linux基金會院士）
+  - Greg Kroah Hartman（Linux基金會院士）
+  - Thomas Gleixner（Linux基金會院士）
+
+郵件列表的操作
+^^^^^^^^^^^^^^
+
+處理流程中使用的加密郵件列表託管在Linux Foundation的IT基礎設施上。通過提供這項
+服務，Linux基金會的IT基礎設施安全總監在技術上有能力訪問被限制的信息，但根據他
+的僱傭合同，他必須保密。Linux基金會的IT基礎設施安全總監還負責 kernel.org 基礎
+設施。
+
+Linux基金會目前的IT基礎設施安全總監是 Konstantin Ryabitsev。
+
+保密協議
+--------
+
+Linux內核硬體安全小組不是正式的機構，因此無法簽訂任何保密協議。核心社區意識到
+這些問題的敏感性，並提供了一份諒解備忘錄。
+
+諒解備忘錄
+----------
+
+Linux內核社區深刻理解在不同作業系統供應商、發行商、硬體供應商和其他各方之間
+進行協調時，保持硬體安全問題處於限制狀態的要求。
+
+Linux內核社區在過去已經成功地處理了硬體安全問題，並且有必要的機制允許在限制
+限制下進行符合社區的開發。
+
+Linux內核社區有一個專門的硬體安全小組負責初始聯繫，並監督在限制規則下處理
+此類問題的過程。
+
+硬體安全小組確定開發人員（領域專家），他們將組成特定問題的初始響應小組。最初
+的響應小組可以引入更多的開發人員（領域專家）以最佳的技術方式解決這個問題。
+
+所有相關開發商承諾遵守限制規定，並對收到的信息保密。違反承諾將導致立即從當前
+問題中排除，並從所有相關郵件列表中刪除。此外，硬體安全小組還將把違反者排除在
+未來的問題之外。這一後果的影響在我們社區是一種非常有效的威懾。如果發生違規
+情況，硬體安全小組將立即通知相關方。如果您或任何人發現潛在的違規行爲，請立即
+向硬體安全人員報告。
+
+流程
+^^^^
+
+由於Linux內核開發的全球分布式特性，面對面的會議幾乎不可能解決硬體安全問題。
+由於時區和其他因素，電話會議很難協調，只能在絕對必要時使用。加密電子郵件已被
+證明是解決此類問題的最有效和最安全的通信方法。
+
+開始披露
+""""""""
+
+披露內容首先通過電子郵件聯繫Linux內核硬體安全小組。此初始聯繫人應包含問題的
+描述和任何已知受影響硬體的列表。如果您的組織製造或分發受影響的硬體，我們建議
+您也考慮哪些其他硬體可能會受到影響。
+
+硬體安全小組將提供一個特定於事件的加密郵件列表，用於與報告者進行初步討論、
+進一步披露和協調。
+
+硬體安全小組將向披露方提供一份開發人員（領域專家）名單，在與開發人員確認他們
+將遵守本諒解備忘錄和文件化流程後，應首先告知開發人員有關該問題的信息。這些開發
+人員組成初始響應小組，並在初始接觸後負責處理問題。硬體安全小組支持響應小組，
+但不一定參與緩解開發過程。
+
+雖然個別開發人員可能通過其僱主受到保密協議的保護，但他們不能以Linux內核開發
+人員的身份簽訂個別保密協議。但是，他們將同意遵守這一書面程序和諒解備忘錄。
+
+披露方應提供已經或應該被告知該問題的所有其他實體的聯繫人名單。這有幾個目的:
+
+  - 披露的實體列表允許跨行業通信，例如其他作業系統供應商、硬體供應商等。
+
+  - 可聯繫已披露的實體，指定應參與緩解措施開發的專家。
+
+  - 如果需要處理某一問題的專家受僱於某一上市實體或某一上市實體的成員，則響應
+    小組可要求該實體披露該專家。這確保專家也是實體反應小組的一部分。
+
+披露
+""""
+
+披露方通過特定的加密郵件列表向初始響應小組提供詳細信息。
+
+根據我們的經驗，這些問題的技術文檔通常是一個足夠的起點，最好通過電子郵件進行
+進一步的技術澄清。
+
+緩解開發
+""""""""
+
+初始響應小組設置加密郵件列表，或在適當的情況下重新修改現有郵件列表。
+
+使用郵件列表接近於正常的Linux開發過程，並且在過去已經成功地用於爲各種硬體安全
+問題開發緩解措施。
+
+郵件列表的操作方式與正常的Linux開發相同。發布、討論和審查修補程序，如果同意，
+則應用於非公共git存儲庫，參與開發人員只能通過安全連接訪問該存儲庫。存儲庫包含
+針對主線內核的主開發分支，並根據需要爲穩定的內核版本提供向後移植分支。
+
+最初的響應小組將根據需要從Linux內核開發人員社區中確定更多的專家。引進專家可以
+在開發過程中的任何時候發生，需要及時處理。
+
+如果專家受僱於披露方提供的披露清單上的實體或其成員，則相關實體將要求其參與。
+
+否則，披露方將被告知專家參與的情況。諒解備忘錄涵蓋了專家，要求披露方確認參與。
+如果披露方有令人信服的理由提出異議，則必須在五個工作日內提出異議，並立即與事件
+小組解決。如果披露方在五個工作日內未作出回應，則視爲默許。
+
+在確認或解決異議後，專家由事件小組披露，並進入開發過程。
+
+協調發布
+""""""""
+
+有關各方將協商限制結束的日期和時間。此時，準備好的緩解措施集成到相關的內核樹中
+並發布。
+
+雖然我們理解硬體安全問題需要協調限制時間，但限制時間應限制在所有有關各方制定、
+測試和準備緩解措施所需的最短時間內。人爲地延長限制時間以滿足會議討論日期或其他
+非技術原因，會給相關的開發人員和響應小組帶來了更多的工作和負擔，因爲補丁需要
+保持最新，以便跟蹤正在進行的上游內核開發，這可能會造成衝突的更改。
+
+CVE分配
+"""""""
+
+硬體安全小組和初始響應小組都不分配CVE，開發過程也不需要CVE。如果CVE是由披露方
+提供的，則可用於文檔中。
+
+流程專使
+--------
+
+爲了協助這一進程，我們在各組織設立了專使，他們可以回答有關報告流程和進一步處理
+的問題或提供指導。專使不參與特定問題的披露，除非響應小組或相關披露方提出要求。
+現任專使名單:
+
+  ============= ========================================================
+  ARM
+  AMD		Tom Lendacky <tom.lendacky@amd.com>
+  IBM
+  Intel		Tony Luck <tony.luck@intel.com>
+  Qualcomm	Trilok Soni <tsoni@codeaurora.org>
+
+  Microsoft	Sasha Levin <sashal@kernel.org>
+  VMware
+  Xen		Andrew Cooper <andrew.cooper3@citrix.com>
+
+  Canonical	John Johansen <john.johansen@canonical.com>
+  Debian	Ben Hutchings <ben@decadent.org.uk>
+  Oracle	Konrad Rzeszutek Wilk <konrad.wilk@oracle.com>
+  Red Hat	Josh Poimboeuf <jpoimboe@redhat.com>
+  SUSE		Jiri Kosina <jkosina@suse.cz>
+
+  Amazon
+  Google	Kees Cook <keescook@chromium.org>
+  ============= ========================================================
+
+如果要將您的組織添加到專使名單中，請與硬體安全小組聯繫。被提名的專使必須完全
+理解和支持我們的過程，並且在Linux內核社區中很容易聯繫。
+
+加密郵件列表
+------------
+
+我們使用加密郵件列表進行通信。這些列表的工作原理是，發送到列表的電子郵件使用
+列表的PGP密鑰或列表的/MIME證書進行加密。郵件列表軟體對電子郵件進行解密，並
+使用訂閱者的PGP密鑰或S/MIME證書爲每個訂閱者分別對其進行重新加密。有關郵件列表
+軟體和用於確保列表安全和數據保護的設置的詳細信息，請訪問:
+https://www.kernel.org/....
+
+關鍵點
+^^^^^^
+
+初次接觸見 :ref:`tw_Contact`. 對於特定於事件的郵件列表，密鑰和S/MIME證書通過
+特定列表發送的電子郵件傳遞給訂閱者。
+
+訂閱事件特定列表
+^^^^^^^^^^^^^^^^
+
+訂閱由響應小組處理。希望參與通信的披露方將潛在訂戶的列表發送給響應組，以便
+響應組可以驗證訂閱請求。
+
+每個訂戶都需要通過電子郵件向響應小組發送訂閱請求。電子郵件必須使用訂閱伺服器
+的PGP密鑰或S/MIME證書籤名。如果使用PGP密鑰，則必須從公鑰伺服器獲得該密鑰，
+並且理想情況下該密鑰連接到Linux內核的PGP信任網。另請參見:
+https://www.kernel.org/signature.html.
+
+響應小組驗證訂閱者，並將訂閱者添加到列表中。訂閱後，訂閱者將收到來自郵件列表
+的電子郵件，該郵件列表使用列表的PGP密鑰或列表的/MIME證書籤名。訂閱者的電子郵件
+客戶端可以從簽名中提取PGP密鑰或S/MIME證書，以便訂閱者可以向列表發送加密電子
+郵件。
+
diff --git a/Documentation/translations/zh_TW/process/howto.rst b/Documentation/translations/zh_TW/process/howto.rst
new file mode 100644
index 000000000000..2043691b92e3
--- /dev/null
+++ b/Documentation/translations/zh_TW/process/howto.rst
@@ -0,0 +1,500 @@
+.. SPDX-License-Identifier: GPL-2.0
+
+.. _tw_process_howto:
+
+.. include:: ../disclaimer-zh_TW.rst
+
+:Original: :ref:`Documentation/process/howto.rst <process_howto>`
+
+譯者::
+
+    英文版維護者： Greg Kroah-Hartman <greg@kroah.com>
+    中文版維護者： 李陽  Li Yang <leoyang.li@nxp.com>
+    中文版翻譯者： 李陽  Li Yang <leoyang.li@nxp.com>
+                   時奎亮 Alex Shi <alex.shi@linux.alibaba.com>
+    中文版校譯者:
+                   鍾宇  TripleX Chung <xxx.phy@gmail.com>
+                   陳琦  Maggie Chen <chenqi@beyondsoft.com>
+                   王聰  Wang Cong <xiyou.wangcong@gmail.com>
+                   胡皓文 Hu Haowen <src.res@email.cn>
+
+如何參與Linux內核開發
+=====================
+
+這是一篇將如何參與Linux內核開發的相關問題一網打盡的終極祕笈。它將指導你
+成爲一名Linux內核開發者，並且學會如何同Linux內核開發社區合作。它儘可能不
+包括任何關於內核編程的技術細節，但會給你指引一條獲得這些知識的正確途徑。
+
+如果這篇文章中的任何內容不再適用，請給文末列出的文件維護者發送補丁。
+
+
+入門
+----
+
+你想了解如何成爲一名Linux內核開發者？或者老闆吩咐你「給這個設備寫個Linux
+驅動程序」？這篇文章的目的就是教會你達成這些目標的全部訣竅，它將描述你需
+要經過的流程以及給出如何同內核社區合作的一些提示。它還將試圖解釋內核社區
+爲何這樣運作。
+
+Linux內核大部分是由C語言寫成的，一些體系結構相關的代碼用到了彙編語言。要
+參與內核開發，你必須精通C語言。除非你想爲某個架構開發底層代碼，否則你並
+不需要了解（任何體系結構的）彙編語言。下面列舉的書籍雖然不能替代紮實的C
+語言教育和多年的開發經驗，但如果需要的話，做爲參考還是不錯的：
+
+ - "The C Programming Language" by Kernighan and Ritchie [Prentice Hall]
+   《C程序設計語言（第2版·新版）》（徐寶文 李志 譯）[機械工業出版社]
+ - "Practical C Programming" by Steve Oualline [O'Reilly]
+   《實用C語言編程（第三版）》（郭大海 譯）[中國電力出版社]
+ - "C:  A Reference Manual" by Harbison and Steele [Prentice Hall]
+   《C語言參考手冊（原書第5版）》（邱仲潘 等譯）[機械工業出版社]
+
+Linux內核使用GNU C和GNU工具鏈開發。雖然它遵循ISO C89標準，但也用到了一些
+標準中沒有定義的擴展。內核是自給自足的C環境，不依賴於標準C庫的支持，所以
+並不支持C標準中的部分定義。比如long long類型的大數除法和浮點運算就不允許
+使用。有時候確實很難弄清楚內核對工具鏈的要求和它所使用的擴展，不幸的是目
+前還沒有明確的參考資料可以解釋它們。請查閱gcc信息頁（使用「info gcc」命令
+顯示）獲得一些這方面信息。
+
+請記住你是在學習怎麼和已經存在的開發社區打交道。它由一羣形形色色的人組成，
+他們對代碼、風格和過程有著很高的標準。這些標準是在長期實踐中總結出來的，
+適應於地理上分散的大型開發團隊。它們已經被很好得整理成檔，建議你在開發
+之前儘可能多的學習這些標準，而不要期望別人來適應你或者你公司的行爲方式。
+
+
+法律問題
+--------
+
+Linux內核原始碼都是在GPL（通用公共許可證）的保護下發布的。要了解這種許可
+的細節請查看原始碼主目錄下的COPYING文件。Linux內核許可準則和如何使用
+`SPDX <https://spdx.org/>` 標誌符說明在這個文件中
+:ref:`Documentation/translations/zh_TW/process/license-rules.rst <tw_kernel_licensing>`
+如果你對它還有更深入問題請聯繫律師，而不要在Linux內核郵件組上提問。因爲
+郵件組裡的人並不是律師，不要期望他們的話有法律效力。
+
+對於GPL的常見問題和解答，請訪問以下連結：
+	https://www.gnu.org/licenses/gpl-faq.html
+
+
+文檔
+----
+
+Linux內核代碼中包含有大量的文檔。這些文檔對於學習如何與內核社區互動有著
+不可估量的價值。當一個新的功能被加入內核，最好把解釋如何使用這個功能的文
+檔也放進內核。當內核的改動導致面向用戶空間的接口發生變化時，最好將相關信
+息或手冊頁(manpages)的補丁發到mtk.manpages@gmail.com，以向手冊頁(manpages)
+的維護者解釋這些變化。
+
+以下是內核代碼中需要閱讀的文檔：
+  :ref:`Documentation/admin-guide/README.rst <readme>`
+    文件簡要介紹了Linux內核的背景，並且描述了如何配置和編譯內核。內核的
+    新用戶應該從這裡開始。
+
+
+  :ref:`Documentation/process/changes.rst <changes>`
+    文件給出了用來編譯和使用內核所需要的最小軟體包列表。
+
+  :ref:`Documentation/translations/zh_TW/process/coding-style.rst <tw_codingstyle>`
+    描述Linux內核的代碼風格和理由。所有新代碼需要遵守這篇文檔中定義的規
+    范。大多數維護者只會接收符合規定的補丁，很多人也只會幫忙檢查符合風格
+    的代碼。
+
+  :ref:`Documentation/translations/zh_TW/process/submitting-patches.rst <tw_submittingpatches>`
+  :ref:`Documentation/process/submitting-drivers.rst <submittingdrivers>`
+
+    這兩份文檔明確描述如何創建和發送補丁，其中包括（但不僅限於)：
+       - 郵件內容
+       - 郵件格式
+       - 選擇收件人
+
+    遵守這些規定並不能保證提交成功（因爲所有補丁需要通過嚴格的內容和風格
+    審查），但是忽視他們幾乎就意味著失敗。
+
+    其他關於如何正確地生成補丁的優秀文檔包括：
+    "The Perfect Patch"
+
+        https://www.ozlabs.org/~akpm/stuff/tpp.txt
+
+    "Linux kernel patch submission format"
+
+        https://web.archive.org/web/20180829112450/http://linux.yyz.us/patch-format.html
+
+  :ref:`Documentation/translations/zh_TW/process/stable-api-nonsense.rst <tw_stable_api_nonsense>`
+    論證內核爲什麼特意不包括穩定的內核內部API，也就是說不包括像這樣的特
+    性：
+
+       - 子系統中間層（爲了兼容性？）
+       - 在不同作業系統間易於移植的驅動程序
+       - 減緩（甚至阻止）內核代碼的快速變化
+
+    這篇文檔對於理解Linux的開發哲學至關重要。對於將開發平台從其他操作系
+    統轉移到Linux的人來說也很重要。
+
+  :ref:`Documentation/admin-guide/security-bugs.rst <securitybugs>`
+    如果你認爲自己發現了Linux內核的安全性問題，請根據這篇文檔中的步驟來
+    提醒其他內核開發者並幫助解決這個問題。
+
+  :ref:`Documentation/translations/zh_TW/process/management-style.rst <tw_managementstyle>`
+
+    描述內核維護者的工作方法及其共有特點。這對於剛剛接觸內核開發（或者對
+    它感到好奇）的人來說很重要，因爲它解釋了很多對於內核維護者獨特行爲的
+    普遍誤解與迷惑。
+
+  :ref:`Documentation/process/stable-kernel-rules.rst <stable_kernel_rules>`
+    解釋了穩定版內核發布的規則，以及如何將改動放入這些版本的步驟。
+
+  :ref:`Documentation/process/kernel-docs.rst <kernel_docs>`
+    有助於內核開發的外部文檔列表。如果你在內核自帶的文檔中沒有找到你想找
+    的內容，可以查看這些文檔。
+
+  :ref:`Documentation/process/applying-patches.rst <applying_patches>`
+    關於補丁是什麼以及如何將它打在不同內核開發分支上的好介紹
+
+內核還擁有大量從代碼自動生成或者從 ReStructuredText(ReST) 標記生成的文檔，
+比如這個文檔，它包含內核內部API的全面介紹以及如何妥善處理加鎖的規則。所有
+這些文檔都可以通過運行以下命令從內核代碼中生成爲PDF或HTML文檔::
+
+    make pdfdocs
+    make htmldocs
+
+ReST格式的文檔會生成在 Documentation/output. 目錄中。
+它們也可以用下列命令生成 LaTeX 和 ePub 格式文檔::
+
+    make latexdocs
+    make epubdocs
+
+如何成爲內核開發者
+------------------
+如果你對Linux內核開發一無所知，你應該訪問「Linux內核新手」計劃：
+
+	https://kernelnewbies.org
+
+它擁有一個可以問各種最基本的內核開發問題的郵件列表（在提問之前一定要記得
+查找已往的郵件，確認是否有人已經回答過相同的問題）。它還擁有一個可以獲得
+實時反饋的IRC聊天頻道，以及大量對於學習Linux內核開發相當有幫助的文檔。
+
+網站簡要介紹了原始碼組織結構、子系統劃分以及目前正在進行的項目（包括內核
+中的和單獨維護的）。它還提供了一些基本的幫助信息，比如如何編譯內核和打補
+丁。
+
+如果你想加入內核開發社區並協助完成一些任務，卻找不到從哪裡開始，可以訪問
+「Linux內核房管員」計劃：
+
+	https://kernelnewbies.org/KernelJanitors
+
+這是極佳的起點。它提供一個相對簡單的任務列表，列出內核代碼中需要被重新
+整理或者改正的地方。通過和負責這個計劃的開發者們一同工作，你會學到將補丁
+集成進內核的基本原理。如果還沒有決定下一步要做什麼的話，你還可能會得到方
+向性的指點。
+
+在真正動手修改內核代碼之前，理解要修改的代碼如何運作是必需的。要達到這個
+目的，沒什麼辦法比直接讀代碼更有效了（大多數花招都會有相應的注釋），而且
+一些特製的工具還可以提供幫助。例如，「Linux代碼交叉引用」項目就是一個值得
+特別推薦的幫助工具，它將原始碼顯示在有編目和索引的網頁上。其中一個更新及
+時的內核源碼庫，可以通過以下地址訪問：
+
+        https://elixir.bootlin.com/
+
+
+開發流程
+--------
+
+目前Linux內核開發流程包括幾個「主內核分支」和很多子系統相關的內核分支。這
+些分支包括：
+
+  - Linus 的內核源碼樹
+  - 多個主要版本的穩定版內核樹
+  - 子系統相關的內核樹
+  - linux-next 集成測試樹
+
+
+主線樹
+------
+主線樹是由Linus Torvalds 維護的。你可以在https://kernel.org 網站或者代碼
+庫中下找到它。它的開發遵循以下步驟：
+
+  - 每當一個新版本的內核被發布，爲期兩周的集成窗口將被打開。在這段時間裡
+    維護者可以向Linus提交大段的修改，通常這些修改已經被放到-mm內核中幾個
+    星期了。提交大量修改的首選方式是使用git工具（內核的代碼版本管理工具
+    ，更多的信息可以在 https://git-scm.com/ 獲取），不過使用普通補丁也是
+    可以的。
+  - 兩個星期以後-rc1版本內核發布。之後只有不包含可能影響整個內核穩定性的
+    新功能的補丁才可能被接受。請注意一個全新的驅動程序（或者文件系統）有
+    可能在-rc1後被接受是因爲這樣的修改完全獨立，不會影響其他的代碼，所以
+    沒有造成內核退步的風險。在-rc1以後也可以用git向Linus提交補丁，不過所
+    有的補丁需要同時被發送到相應的公衆郵件列表以徵詢意見。
+  - 當Linus認爲當前的git源碼樹已經達到一個合理健全的狀態足以發布供人測試
+    時，一個新的-rc版本就會被發布。計劃是每周都發布新的-rc版本。
+  - 這個過程一直持續下去直到內核被認爲達到足夠穩定的狀態，持續時間大概是
+    6個星期。
+
+關於內核發布，值得一提的是Andrew Morton在linux-kernel郵件列表中如是說：
+	「沒有人知道新內核何時會被發布，因爲發布是根據已知bug的情況來決定
+	的，而不是根據一個事先制定好的時間表。」
+
+子系統特定樹
+------------
+
+各種內核子系統的維護者——以及許多內核子系統開發人員——在原始碼庫中公開了他們
+當前的開發狀態。這樣，其他人就可以看到內核的不同區域發生了什麼。在開發速度
+很快的領域，可能會要求開發人員將提交的內容建立在這樣的子系統內核樹上，這樣
+就避免了提交與其他已經進行的工作之間的衝突。
+
+這些存儲庫中的大多數都是Git樹，但是也有其他的scm在使用，或者補丁隊列被發布
+爲Quilt系列。這些子系統存儲庫的地址列在MAINTAINERS文件中。其中許多可以在
+https://git.kernel.org/上瀏覽。
+
+在將一個建議的補丁提交到這樣的子系統樹之前，需要對它進行審查，審查主要發生
+在郵件列表上（請參見下面相應的部分）。對於幾個內核子系統，這個審查過程是通
+過工具補丁跟蹤的。Patchwork提供了一個Web界面，顯示補丁發布、對補丁的任何評
+論或修訂，維護人員可以將補丁標記爲正在審查、接受或拒絕。大多數補丁網站都列
+在 https://patchwork.kernel.org/
+
+Linux-next 集成測試樹
+---------------------
+
+在將子系統樹的更新合併到主線樹之前，需要對它們進行集成測試。爲此，存在一個
+特殊的測試存儲庫，其中幾乎每天都會提取所有子系統樹：
+
+        https://git.kernel.org/？p=linux/kernel/git/next/linux-next.git
+
+通過這種方式，Linux-next 對下一個合併階段將進入主線內核的內容給出了一個概要
+展望。非常歡冒險的測試者運行測試Linux-next。
+
+多個主要版本的穩定版內核樹
+-----------------------------------
+由3個數字組成的內核版本號說明此內核是-stable版本。它們包含內核的相對較小且
+至關重要的修補，這些修補針對安全性問題或者嚴重的內核退步。
+
+這種版本的內核適用於那些期望獲得最新的穩定版內核並且不想參與測試開發版或
+者實驗版的用戶。
+
+穩定版內核樹版本由「穩定版」小組（郵件地址<stable@vger.kernel.org>）維護，一般
+隔周發布新版本。
+
+內核源碼中的 :ref:`Documentation/process/stable-kernel-rules.rst <stable_kernel_rules>`
+文件具體描述了可被穩定版內核接受的修改類型以及發布的流程。
+
+
+報告bug
+-------
+
+bugzilla.kernel.org是Linux內核開發者們用來跟蹤內核Bug的網站。我們鼓勵用
+戶在這個工具中報告找到的所有bug。如何使用內核bugzilla的細節請訪問：
+
+	http://test.kernel.org/bugzilla/faq.html
+
+內核源碼主目錄中的:ref:`admin-guide/reporting-bugs.rst <reportingbugs>`
+文件里有一個很好的模板。它指導用戶如何報告可能的內核bug以及需要提供哪些信息
+來幫助內核開發者們找到問題的根源。
+
+
+利用bug報告
+-----------
+
+練習內核開發技能的最好辦法就是修改其他人報告的bug。你不光可以幫助內核變
+得更加穩定，還可以學會如何解決實際問題從而提高自己的技能，並且讓其他開發
+者感受到你的存在。修改bug是贏得其他開發者讚譽的最好辦法，因爲並不是很多
+人都喜歡浪費時間去修改別人報告的bug。
+
+要嘗試修改已知的bug，請訪問 http://bugzilla.kernel.org 網址。
+
+
+郵件列表
+--------
+
+正如上面的文檔所描述，大多數的骨幹內核開發者都加入了Linux Kernel郵件列
+表。如何訂閱和退訂列表的細節可以在這裡找到：
+
+	http://vger.kernel.org/vger-lists.html#linux-kernel
+
+網上很多地方都有這個郵件列表的存檔(archive)。可以使用搜尋引擎來找到這些
+存檔。比如：
+
+	http://dir.gmane.org/gmane.linux.kernel
+
+在發信之前，我們強烈建議你先在存檔中搜索你想要討論的問題。很多已經被詳細
+討論過的問題只在郵件列表的存檔中可以找到。
+
+大多數內核子系統也有自己獨立的郵件列表來協調各自的開發工作。從
+MAINTAINERS文件中可以找到不同話題對應的郵件列表。
+
+很多郵件列表架設在kernel.org伺服器上。這些列表的信息可以在這裡找到：
+
+	http://vger.kernel.org/vger-lists.html
+
+在使用這些郵件列表時，請記住保持良好的行爲習慣。下面的連結提供了與這些列
+表（或任何其它郵件列表）交流的一些簡單規則，雖然內容有點濫竽充數。
+
+	http://www.albion.com/netiquette/
+
+當有很多人回覆你的郵件時，郵件的抄送列表會變得很長。請不要將任何人從抄送
+列表中刪除，除非你有足夠的理由這麼做。也不要只回復到郵件列表。請習慣於同
+一封郵件接收兩次（一封來自發送者一封來自郵件列表），而不要試圖通過添加一
+些奇特的郵件頭來解決這個問題，人們不會喜歡的。
+
+記住保留你所回復內容的上下文和源頭。在你回覆郵件的頂部保留「某某某說到……」
+這幾行。將你的評論加在被引用的段落之間而不要放在郵件的頂部。
+
+如果你在郵件中附帶補丁，請確認它們是可以直接閱讀的純文本（如
+:ref:`Documentation/translations/zh_TW/process/submitting-patches.rst <tw_submittingpatches>`
+文檔中所述）。內核開發者們不希望遇到附件或者被壓縮了的補丁。只有這樣才能
+保證他們可以直接評論你的每行代碼。請確保你使用的郵件發送程序不會修改空格
+和制表符。一個防範性的測試方法是先將郵件發送給自己，然後自己嘗試是否可以
+順利地打上收到的補丁。如果測試不成功，請調整或者更換你的郵件發送程序直到
+它正確工作爲止。
+
+總而言之，請尊重其他的郵件列表訂閱者。
+
+
+同內核社區合作
+----------------
+
+內核社區的目標就是提供盡善盡美的內核。所以當你提交補丁期望被接受進內核的
+時候，它的技術價值以及其他方面都將被評審。那麼你可能會得到什麼呢？
+
+  - 批評
+  - 評論
+  - 要求修改
+  - 要求證明修改的必要性
+  - 沉默
+
+要記住，這些是把補丁放進內核的正常情況。你必須學會聽取對補丁的批評和評論，
+從技術層面評估它們，然後要麼重寫你的補丁要麼簡明扼要地論證修改是不必要
+的。如果你發的郵件沒有得到任何回應，請過幾天後再試一次，因爲有時信件會湮
+沒在茫茫信海中。
+
+你不應該做的事情：
+
+  - 期望自己的補丁不受任何質疑就直接被接受
+  - 翻臉
+  - 忽略別人的評論
+  - 沒有按照別人的要求做任何修改就重新提交
+
+在一個努力追尋最好技術方案的社區里，對於一個補丁有多少好處總會有不同的見
+解。你必須要抱著合作的態度，願意改變自己的觀點來適應內核的風格。或者至少
+願意去證明你的想法是有價值的。記住，犯錯誤是允許的，只要你願意朝著正確的
+方案去努力。
+
+如果你的第一個補丁換來的是一堆修改建議，這是很正常的。這並不代表你的補丁
+不會被接受，也不意味著有人和你作對。你只需要改正所有提出的問題然後重新發
+送你的補丁。
+
+內核社區和公司文化的差異
+------------------------
+
+內核社區的工作模式同大多數傳統公司開發隊伍的工作模式並不相同。下面這些例
+子，可以幫助你避免某些可能發生問題：
+用這些話介紹你的修改提案會有好處：
+
+    - 它同時解決了多個問題
+    - 它刪除了2000行代碼
+    - 這是補丁，它已經解釋了我想要說明的
+    - 我在5種不同的體系結構上測試過它……
+    - 這是一系列小補丁用來……
+    - 這個修改提高了普通機器的性能……
+
+應該避免如下的說法：
+
+    - 我們在AIX/ptx/Solaris就是這麼做的，所以這麼做肯定是好的……
+    - 我做這行已經20年了，所以……
+    - 爲了我們公司賺錢考慮必須這麼做
+    - 這是我們的企業產品線所需要的
+    - 這裡是描述我觀點的1000頁設計文檔
+    - 這是一個5000行的補丁用來……
+    - 我重寫了現在亂七八糟的代碼，這就是……
+    - 我被規定了最後期限，所以這個補丁需要立刻被接受
+
+另外一個內核社區與大部分傳統公司的軟體開發隊伍不同的地方是無法面對面地交
+流。使用電子郵件和IRC聊天工具做爲主要溝通工具的一個好處是性別和種族歧視
+將會更少。Linux內核的工作環境更能接受婦女和少數族羣，因爲每個人在別人眼
+里只是一個郵件地址。國際化也幫助了公平的實現，因爲你無法通過姓名來判斷人
+的性別。男人有可能叫李麗，女人也有可能叫王剛。大多數在Linux內核上工作過
+並表達過看法的女性對在linux上工作的經歷都給出了正面的評價。
+
+對於一些不習慣使用英語的人來說，語言可能是一個引起問題的障礙。在郵件列表
+中要正確地表達想法必需良好地掌握語言，所以建議你在發送郵件之前最好檢查一
+下英文寫得是否正確。
+
+
+拆分修改
+--------
+
+Linux內核社區並不喜歡一下接收大段的代碼。修改需要被恰當地介紹、討論並且
+拆分成獨立的小段。這幾乎完全和公司中的習慣背道而馳。你的想法應該在開發最
+開始的階段就讓大家知道，這樣你就可以及時獲得對你正在進行的開發的反饋。這
+樣也會讓社區覺得你是在和他們協作，而不是僅僅把他們當作傾銷新功能的對象。
+無論如何，你不要一次性地向郵件列表發送50封信，你的補丁序列應該永遠用不到
+這麼多。
+
+將補丁拆開的原因如下：
+
+1) 小的補丁更有可能被接受，因爲它們不需要太多的時間和精力去驗證其正確性。
+   一個5行的補丁，可能在維護者看了一眼以後就會被接受。而500行的補丁則
+   需要數個小時來審查其正確性（所需時間隨補丁大小增加大約呈指數級增長）。
+
+   當出了問題的時候，小的補丁也會讓調試變得非常容易。一個一個補丁地回溯
+   將會比仔細剖析一個被打上的大補丁（這個補丁破壞了其他東西）容易得多。
+
+2）不光發送小的補丁很重要，在提交之前重新編排、化簡（或者僅僅重新排列）
+   補丁也是很重要的。
+
+這裡有內核開發者Al Viro打的一個比方：
+	「想像一個老師正在給學生批改數學作業。老師並不希望看到學生爲了得
+	到正確解法所進行的嘗試和產生的錯誤。他希望看到的是最乾淨最優雅的
+	解答。好學生了解這點，絕不會把最終解決之前的中間方案提交上去。」
+
+	內核開發也是這樣。維護者和評審者不希望看到一個人在解決問題時的思
+	考過程。他們只希望看到簡單和優雅的解決方案。
+
+直接給出一流的解決方案，和社區一起協作討論尚未完成的工作，這兩者之間似乎
+很難找到一個平衡點。所以最好儘早開始收集有利於你進行改進的反饋；同時也要
+保證修改分成很多小塊，這樣在整個項目都準備好被包含進內核之前，其中的一部
+分可能會先被接收。
+
+必須了解這樣做是不可接受的：試圖將未完成的工作提交進內核，然後再找時間修
+復。
+
+
+證明修改的必要性
+----------------
+除了將補丁拆成小塊，很重要的一點是讓Linux社區了解他們爲什麼需要這樣修改。
+你必須證明新功能是有人需要的並且是有用的。
+
+
+記錄修改
+--------
+
+當你發送補丁的時候，需要特別留意郵件正文的內容。因爲這裡的信息將會做爲補
+丁的修改記錄(ChangeLog)，會被一直保留以備大家查閱。它需要完全地描述補丁，
+包括：
+
+  - 爲什麼需要這個修改
+  - 補丁的總體設計
+  - 實現細節
+  - 測試結果
+
+想了解它具體應該看起來像什麼，請查閱以下文檔中的「ChangeLog」章節：
+  「The Perfect Patch」
+  	 https://www.ozlabs.org/~akpm/stuff/tpp.txt
+
+
+這些事情有時候做起來很難。要在任何方面都做到完美可能需要好幾年時間。這是
+一個持續提高的過程，它需要大量的耐心和決心。只要不放棄，你一定可以做到。
+很多人已經做到了，而他們都曾經和現在的你站在同樣的起點上。
+
+
+感謝
+----
+感謝Paolo Ciarrocchi允許「開發流程」部分基於他所寫的文章
+(http://www.kerneltravel.net/newbie/2.6-development_process)，感謝Randy
+Dunlap和Gerrit Huizenga完善了應該說和不該說的列表。感謝Pat Mochel, Hanna
+Linder, Randy Dunlap, Kay Sievers, Vojtech Pavlik, Jan Kara, Josh Boyer,
+Kees Cook, Andrew Morton, Andi Kleen, Vadim Lobanov, Jesper Juhl, Adrian
+Bunk, Keri Harris, Frans Pop, David A. Wheeler, Junio Hamano, Michael
+Kerrisk和Alex Shepard的評審、建議和貢獻。沒有他們的幫助，這篇文檔是不可
+能完成的。
+
+
+
+英文版維護者： Greg Kroah-Hartman <greg@kroah.com>
+
diff --git a/Documentation/translations/zh_TW/process/index.rst b/Documentation/translations/zh_TW/process/index.rst
new file mode 100644
index 000000000000..ec7ad14bfd13
--- /dev/null
+++ b/Documentation/translations/zh_TW/process/index.rst
@@ -0,0 +1,67 @@
+.. SPDX-License-Identifier: GPL-2.0
+
+.. raw:: latex
+
+	\renewcommand\thesection*
+	\renewcommand\thesubsection*
+
+.. include:: ../disclaimer-zh_TW.rst
+
+:Original: :ref:`Documentation/process/index.rst <process_index>`
+:Translator: Alex Shi <alex.shi@linux.alibaba.com>
+             Hu Haowen <src.res@email.cn>
+
+.. _tw_process_index:
+
+與Linux 內核社區一起工作
+========================
+
+你想成爲Linux內核開發人員嗎？歡迎之至！在學習許多關於內核的技術知識的同時，
+了解我們社區的工作方式也很重要。閱讀這些文檔可以讓您以更輕鬆的、麻煩更少的
+方式將更改合併到內核。
+
+以下是每位開發人員都應閱讀的基本指南：
+
+.. toctree::
+   :maxdepth: 1
+
+   howto
+   code-of-conduct
+   code-of-conduct-interpretation
+   submitting-patches
+   programming-language
+   coding-style
+   development-process
+   email-clients
+   license-rules
+   kernel-enforcement-statement
+   kernel-driver-statement
+
+其它大多數開發人員感興趣的社區指南：
+
+
+.. toctree::
+   :maxdepth: 1
+
+   submitting-drivers
+   submit-checklist
+   stable-api-nonsense
+   stable-kernel-rules
+   management-style
+   embargoed-hardware-issues
+
+這些是一些總體性技術指南，由於不大好分類而放在這裡：
+
+.. toctree::
+   :maxdepth: 1
+
+   magic-number
+   volatile-considered-harmful
+
+.. only::  subproject and html
+
+   目錄
+   ====
+
+   * :ref:`genindex`
+
diff --git a/Documentation/translations/zh_TW/process/kernel-driver-statement.rst b/Documentation/translations/zh_TW/process/kernel-driver-statement.rst
new file mode 100644
index 000000000000..8f225379b12c
--- /dev/null
+++ b/Documentation/translations/zh_TW/process/kernel-driver-statement.rst
@@ -0,0 +1,203 @@
+.. SPDX-License-Identifier: GPL-2.0
+
+.. _zh_process_statement_driver:
+
+.. include:: ../disclaimer-zh_TW.rst
+
+:Original: :ref:`Documentation/process/kernel-driver-statement.rst <process_statement_driver>`
+:Translator: Alex Shi <alex.shi@linux.alibaba.com>
+             Hu Haowen <src.res@email.cn>
+
+內核驅動聲明
+------------
+
+關於Linux內核模塊的立場聲明
+===========================
+
+我們，以下署名的Linux內核開發人員，認爲任何封閉源Linux內核模塊或驅動程序都是
+有害的和不可取的。我們已經一再發現它們對Linux用戶，企業和更大的Linux生態系統
+有害。這樣的模塊否定了Linux開發模型的開放性，穩定性，靈活性和可維護性，並使
+他們的用戶無法使用Linux社區的專業知識。提供閉源內核模塊的供應商迫使其客戶
+放棄Linux的主要優勢或選擇新的供應商。因此，爲了充分利用開源所提供的成本節省和
+共享支持優勢，我們敦促供應商採取措施，以開源內核代碼在Linux上爲其客戶提供支持。
+
+我們只爲自己說話，而不是我們今天可能會爲之工作，過去或將來會爲之工作的任何公司。
+
+ - Dave Airlie
+ - Nick Andrew
+ - Jens Axboe
+ - Ralf Baechle
+ - Felipe Balbi
+ - Ohad Ben-Cohen
+ - Muli Ben-Yehuda
+ - Jiri Benc
+ - Arnd Bergmann
+ - Thomas Bogendoerfer
+ - Vitaly Bordug
+ - James Bottomley
+ - Josh Boyer
+ - Neil Brown
+ - Mark Brown
+ - David Brownell
+ - Michael Buesch
+ - Franck Bui-Huu
+ - Adrian Bunk
+ - François Cami
+ - Ralph Campbell
+ - Luiz Fernando N. Capitulino
+ - Mauro Carvalho Chehab
+ - Denis Cheng
+ - Jonathan Corbet
+ - Glauber Costa
+ - Alan Cox
+ - Magnus Damm
+ - Ahmed S. Darwish
+ - Robert P. J. Day
+ - Hans de Goede
+ - Arnaldo Carvalho de Melo
+ - Helge Deller
+ - Jean Delvare
+ - Mathieu Desnoyers
+ - Sven-Thorsten Dietrich
+ - Alexey Dobriyan
+ - Daniel Drake
+ - Alex Dubov
+ - Randy Dunlap
+ - Michael Ellerman
+ - Pekka Enberg
+ - Jan Engelhardt
+ - Mark Fasheh
+ - J. Bruce Fields
+ - Larry Finger
+ - Jeremy Fitzhardinge
+ - Mike Frysinger
+ - Kumar Gala
+ - Robin Getz
+ - Liam Girdwood
+ - Jan-Benedict Glaw
+ - Thomas Gleixner
+ - Brice Goglin
+ - Cyrill Gorcunov
+ - Andy Gospodarek
+ - Thomas Graf
+ - Krzysztof Halasa
+ - Harvey Harrison
+ - Stephen Hemminger
+ - Michael Hennerich
+ - Tejun Heo
+ - Benjamin Herrenschmidt
+ - Kristian Høgsberg
+ - Henrique de Moraes Holschuh
+ - Marcel Holtmann
+ - Mike Isely
+ - Takashi Iwai
+ - Olof Johansson
+ - Dave Jones
+ - Jesper Juhl
+ - Matthias Kaehlcke
+ - Kenji Kaneshige
+ - Jan Kara
+ - Jeremy Kerr
+ - Russell King
+ - Olaf Kirch
+ - Roel Kluin
+ - Hans-Jürgen Koch
+ - Auke Kok
+ - Peter Korsgaard
+ - Jiri Kosina
+ - Aaro Koskinen
+ - Mariusz Kozlowski
+ - Greg Kroah-Hartman
+ - Michael Krufky
+ - Aneesh Kumar
+ - Clemens Ladisch
+ - Christoph Lameter
+ - Gunnar Larisch
+ - Anders Larsen
+ - Grant Likely
+ - John W. Linville
+ - Yinghai Lu
+ - Tony Luck
+ - Pavel Machek
+ - Matt Mackall
+ - Paul Mackerras
+ - Roland McGrath
+ - Patrick McHardy
+ - Kyle McMartin
+ - Paul Menage
+ - Thierry Merle
+ - Eric Miao
+ - Akinobu Mita
+ - Ingo Molnar
+ - James Morris
+ - Andrew Morton
+ - Paul Mundt
+ - Oleg Nesterov
+ - Luca Olivetti
+ - S.Çağlar Onur
+ - Pierre Ossman
+ - Keith Owens
+ - Venkatesh Pallipadi
+ - Nick Piggin
+ - Nicolas Pitre
+ - Evgeniy Polyakov
+ - Richard Purdie
+ - Mike Rapoport
+ - Sam Ravnborg
+ - Gerrit Renker
+ - Stefan Richter
+ - David Rientjes
+ - Luis R. Rodriguez
+ - Stefan Roese
+ - Francois Romieu
+ - Rami Rosen
+ - Stephen Rothwell
+ - Maciej W. Rozycki
+ - Mark Salyzyn
+ - Yoshinori Sato
+ - Deepak Saxena
+ - Holger Schurig
+ - Amit Shah
+ - Yoshihiro Shimoda
+ - Sergei Shtylyov
+ - Kay Sievers
+ - Sebastian Siewior
+ - Rik Snel
+ - Jes Sorensen
+ - Alexey Starikovskiy
+ - Alan Stern
+ - Timur Tabi
+ - Hirokazu Takata
+ - Eliezer Tamir
+ - Eugene Teo
+ - Doug Thompson
+ - FUJITA Tomonori
+ - Dmitry Torokhov
+ - Marcelo Tosatti
+ - Steven Toth
+ - Theodore Tso
+ - Matthias Urlichs
+ - Geert Uytterhoeven
+ - Arjan van de Ven
+ - Ivo van Doorn
+ - Rik van Riel
+ - Wim Van Sebroeck
+ - Hans Verkuil
+ - Horst H. von Brand
+ - Dmitri Vorobiev
+ - Anton Vorontsov
+ - Daniel Walker
+ - Johannes Weiner
+ - Harald Welte
+ - Matthew Wilcox
+ - Dan J. Williams
+ - Darrick J. Wong
+ - David Woodhouse
+ - Chris Wright
+ - Bryan Wu
+ - Rafael J. Wysocki
+ - Herbert Xu
+ - Vlad Yasevich
+ - Peter Zijlstra
+ - Bartlomiej Zolnierkiewicz
+
diff --git a/Documentation/translations/zh_TW/process/kernel-enforcement-statement.rst b/Documentation/translations/zh_TW/process/kernel-enforcement-statement.rst
new file mode 100644
index 000000000000..99e21d22800d
--- /dev/null
+++ b/Documentation/translations/zh_TW/process/kernel-enforcement-statement.rst
@@ -0,0 +1,155 @@
+.. SPDX-License-Identifier: GPL-2.0
+
+.. _tw_process_statement_kernel:
+
+.. include:: ../disclaimer-zh_TW.rst
+
+:Original: :ref:`Documentation/process/kernel-enforcement-statement.rst <process_statement_kernel>`
+:Translator: Alex Shi <alex.shi@linux.alibaba.com>
+             Hu Haowen <src.res@email.cn>
+
+Linux 內核執行聲明
+------------------
+
+作爲Linux內核的開發人員，我們對如何使用我們的軟體以及如何實施軟體許可證有著
+濃厚的興趣。遵守GPL-2.0的互惠共享義務對我們軟體和社區的長期可持續性至關重要。
+
+雖然有權強制執行對我們社區的貢獻中的單獨版權權益，但我們有共同的利益，即確保
+個人強制執行行動的方式有利於我們的社區，不會對我們軟體生態系統的健康和增長
+產生意外的負面影響。爲了阻止無益的執法行動，我們同意代表我們自己和我們版權
+利益的任何繼承人對Linux內核用戶作出以下符合我們開發社區最大利益的承諾:
+
+    儘管有GPL-2.0的終止條款，我們同意，採用以下GPL-3.0條款作爲我們許可證下的
+    附加許可，作爲任何對許可證下權利的非防禦性主張，這符合我們開發社區的最佳
+    利益。
+
+        但是，如果您停止所有違反本許可證的行爲，則您從特定版權持有人處獲得的
+        許可證將被恢復：（a）暫時恢復，除非版權持有人明確並最終終止您的許可證；
+        以及（b）永久恢復, 如果版權持有人未能在你終止違反後60天內以合理方式
+        通知您違反本許可證的行爲，則永久恢復您的許可證。
+
+        此外，如果版權所有者以某種合理的方式通知您違反了本許可，這是您第一次
+        從該版權所有者處收到違反本許可的通知（對於任何作品），並且您在收到通知
+        後的30天內糾正違規行爲。則您從特定版權所有者處獲得的許可將永久恢復.
+
+我們提供這些保證的目的是鼓勵更多地使用該軟體。我們希望公司和個人使用、修改和
+分發此軟體。我們希望以公開和透明的方式與用戶合作，以消除我們對法規遵從性或強制
+執行的任何不確定性，這些不確定性可能會限制我們軟體的採用。我們將法律行動視爲
+最後手段，只有在其他社區努力未能解決這一問題時才採取行動。
+
+最後，一旦一個不合規問題得到解決，我們希望用戶會感到歡迎，加入我們爲之努力的
+這個項目。共同努力，我們會更強大。
+
+除了下面提到的以外，我們只爲自己說話，而不是爲今天、過去或將來可能爲之工作的
+任何公司說話。
+
+  - Laura Abbott
+  - Bjorn Andersson (Linaro)
+  - Andrea Arcangeli
+  - Neil Armstrong
+  - Jens Axboe
+  - Pablo Neira Ayuso
+  - Khalid Aziz
+  - Ralf Baechle
+  - Felipe Balbi
+  - Arnd Bergmann
+  - Ard Biesheuvel
+  - Tim Bird
+  - Paolo Bonzini
+  - Christian Borntraeger
+  - Mark Brown (Linaro)
+  - Paul Burton
+  - Javier Martinez Canillas
+  - Rob Clark
+  - Kees Cook (Google)
+  - Jonathan Corbet
+  - Dennis Dalessandro
+  - Vivien Didelot (Savoir-faire Linux)
+  - Hans de Goede
+  - Mel Gorman (SUSE)
+  - Sven Eckelmann
+  - Alex Elder (Linaro)
+  - Fabio Estevam
+  - Larry Finger
+  - Bhumika Goyal
+  - Andy Gross
+  - Juergen Gross
+  - Shawn Guo
+  - Ulf Hansson
+  - Stephen Hemminger (Microsoft)
+  - Tejun Heo
+  - Rob Herring
+  - Masami Hiramatsu
+  - Michal Hocko
+  - Simon Horman
+  - Johan Hovold (Hovold Consulting AB)
+  - Christophe JAILLET
+  - Olof Johansson
+  - Lee Jones (Linaro)
+  - Heiner Kallweit
+  - Srinivas Kandagatla
+  - Jan Kara
+  - Shuah Khan (Samsung)
+  - David Kershner
+  - Jaegeuk Kim
+  - Namhyung Kim
+  - Colin Ian King
+  - Jeff Kirsher
+  - Greg Kroah-Hartman (Linux Foundation)
+  - Christian König
+  - Vinod Koul
+  - Krzysztof Kozlowski
+  - Viresh Kumar
+  - Aneesh Kumar K.V
+  - Julia Lawall
+  - Doug Ledford
+  - Chuck Lever (Oracle)
+  - Daniel Lezcano
+  - Shaohua Li
+  - Xin Long
+  - Tony Luck
+  - Catalin Marinas (Arm Ltd)
+  - Mike Marshall
+  - Chris Mason
+  - Paul E. McKenney
+  - Arnaldo Carvalho de Melo
+  - David S. Miller
+  - Ingo Molnar
+  - Kuninori Morimoto
+  - Trond Myklebust
+  - Martin K. Petersen (Oracle)
+  - Borislav Petkov
+  - Jiri Pirko
+  - Josh Poimboeuf
+  - Sebastian Reichel (Collabora)
+  - Guenter Roeck
+  - Joerg Roedel
+  - Leon Romanovsky
+  - Steven Rostedt (VMware)
+  - Frank Rowand
+  - Ivan Safonov
+  - Anna Schumaker
+  - Jes Sorensen
+  - K.Y. Srinivasan
+  - David Sterba (SUSE)
+  - Heiko Stuebner
+  - Jiri Kosina (SUSE)
+  - Willy Tarreau
+  - Dmitry Torokhov
+  - Linus Torvalds
+  - Thierry Reding
+  - Rik van Riel
+  - Luis R. Rodriguez
+  - Geert Uytterhoeven (Glider bvba)
+  - Eduardo Valentin (Amazon.com)
+  - Daniel Vetter
+  - Linus Walleij
+  - Richard Weinberger
+  - Dan Williams
+  - Rafael J. Wysocki
+  - Arvind Yadav
+  - Masahiro Yamada
+  - Wei Yongjun
+  - Lv Zheng
+  - Marc Zyngier (Arm Ltd)
+
diff --git a/Documentation/translations/zh_TW/process/license-rules.rst b/Documentation/translations/zh_TW/process/license-rules.rst
new file mode 100644
index 000000000000..ad2b80f97123
--- /dev/null
+++ b/Documentation/translations/zh_TW/process/license-rules.rst
@@ -0,0 +1,374 @@
+.. SPDX-License-Identifier: GPL-2.0
+
+.. SPDX-License-Identifier: GPL-2.0
+
+.. include:: ../disclaimer-zh_TW.rst
+
+:Original: :ref:`Documentation/process/license-rules.rst <kernel_licensing>`
+:Translator: Alex Shi <alex.shi@linux.alibaba.com>
+             Hu Haowen <src.res@email.cn>
+
+.. _tw_kernel_licensing:
+
+Linux內核許可規則
+=================
+
+Linux內核根據LICENSES/preferred/GPL-2.0中提供的GNU通用公共許可證版本2
+（GPL-2.0）的條款提供，並在LICENSES/exceptions/Linux-syscall-note中顯式
+描述了例外的系統調用，如COPYING文件中所述。
+
+此文檔文件提供了如何對每個源文件進行注釋以使其許可證清晰明確的說明。
+它不會取代內核的許可證。
+
+內核原始碼作爲一個整體適用於COPYING文件中描述的許可證，但是單個源文件可以
+具有不同的與GPL-20兼容的許可證::
+
+    GPL-1.0+ : GNU通用公共許可證v1.0或更高版本
+    GPL-2.0+ : GNU通用公共許可證v2.0或更高版本
+    LGPL-2.0 : 僅限GNU庫通用公共許可證v2
+    LGPL-2.0+: GNU 庫通用公共許可證v2或更高版本
+    LGPL-2.1 : 僅限GNU寬通用公共許可證v2.1
+    LGPL-2.1+: GNU寬通用公共許可證v2.1或更高版本
+
+除此之外，個人文件可以在雙重許可下提供，例如一個兼容的GPL變體，或者BSD，
+MIT等許可。
+
+用戶空間API（UAPI）頭文件描述了用戶空間程序與內核的接口，這是一種特殊情況。
+根據內核COPYING文件中的注釋，syscall接口是一個明確的邊界，它不會將GPL要求
+擴展到任何使用它與內核通信的軟體。由於UAPI頭文件必須包含在創建在Linux內核
+上運行的可執行文件的任何源文件中，因此此例外必須記錄在特別的許可證表述中。
+
+表達源文件許可證的常用方法是將匹配的樣板文本添加到文件的頂部注釋中。由於
+格式，拼寫錯誤等，這些「樣板」很難通過那些在上下文中使用的驗證許可證合規性
+的工具。
+
+樣板文本的替代方法是在每個源文件中使用軟體包數據交換（SPDX）許可證標識符。
+SPDX許可證標識符是機器可解析的，並且是用於提供文件內容的許可證的精確縮寫。
+SPDX許可證標識符由Linux 基金會的SPDX 工作組管理，並得到了整個行業，工具
+供應商和法律團隊的合作夥伴的一致同意。有關詳細信息，請參閱
+https://spdx.org/
+
+Linux內核需要所有源文件中的精確SPDX標識符。內核中使用的有效標識符在
+`許可標識符`_ 一節中進行了解釋，並且已可以在
+https://spdx.org/licenses/ 上的官方SPDX許可證列表中檢索，並附帶許可證
+文本。
+
+許可標識符語法
+--------------
+
+1.安置:
+
+   內核文件中的SPDX許可證標識符應添加到可包含注釋的文件中的第一行。對於大多
+   數文件，這是第一行，除了那些在第一行中需要'#!PATH_TO_INTERPRETER'的腳本。
+   對於這些腳本，SPDX標識符進入第二行。
+
+|
+
+2. 風格:
+
+   SPDX許可證標識符以注釋的形式添加。注釋樣式取決於文件類型::
+
+      C source:	// SPDX-License-Identifier: <SPDX License Expression>
+      C header:	/* SPDX-License-Identifier: <SPDX License Expression> */
+      ASM:	/* SPDX-License-Identifier: <SPDX License Expression> */
+      scripts:	# SPDX-License-Identifier: <SPDX License Expression>
+      .rst:	.. SPDX-License-Identifier: <SPDX License Expression>
+      .dts{i}:	// SPDX-License-Identifier: <SPDX License Expression>
+
+   如果特定工具無法處理標準注釋樣式，則應使用工具接受的相應注釋機制。這是在
+   C 頭文件中使用「/\*\*/」樣式注釋的原因。過去在使用生成的.lds文件中觀察到
+   構建被破壞，其中'ld'無法解析C++注釋。現在已經解決了這個問題，但仍然有較
+   舊的彙編程序工具無法處理C++樣式的注釋。
+
+|
+
+3. 句法:
+
+   <SPDX許可證表達式>是SPDX許可證列表中的SPDX短格式許可證標識符，或者在許可
+   證例外適用時由「WITH」分隔的兩個SPDX短格式許可證標識符的組合。當應用多個許
+   可證時，表達式由分隔子表達式的關鍵字「AND」，「OR」組成，並由「（」，「）」包圍。
+
+   帶有「或更高」選項的[L]GPL等許可證的許可證標識符通過使用「+」來表示「或更高」
+   選項來構建。::
+
+      // SPDX-License-Identifier: GPL-2.0+
+      // SPDX-License-Identifier: LGPL-2.1+
+
+   當需要修正的許可證時，應使用WITH。 例如，linux內核UAPI文件使用表達式::
+
+      // SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note
+      // SPDX-License-Identifier: GPL-2.0+ WITH Linux-syscall-note
+
+   其它在內核中使用WITH例外的事例如下::
+
+      // SPDX-License-Identifier: GPL-2.0 WITH mif-exception
+      // SPDX-License-Identifier: GPL-2.0+ WITH GCC-exception-2.0
+
+   例外只能與特定的許可證標識符一起使用。有效的許可證標識符列在異常文本文件
+   的標記中。有關詳細信息，請參閱 `許可標識符`_ 一章中的 `例外`_ 。
+
+   如果文件是雙重許可且只選擇一個許可證，則應使用OR。例如，一些dtsi文件在雙
+   許可下可用::
+
+      // SPDX-License-Identifier: GPL-2.0 OR BSD-3-Clause
+
+   內核中雙許可文件中許可表達式的示例::
+
+      // SPDX-License-Identifier: GPL-2.0 OR MIT
+      // SPDX-License-Identifier: GPL-2.0 OR BSD-2-Clause
+      // SPDX-License-Identifier: GPL-2.0 OR Apache-2.0
+      // SPDX-License-Identifier: GPL-2.0 OR MPL-1.1
+      // SPDX-License-Identifier: (GPL-2.0 WITH Linux-syscall-note) OR MIT
+      // SPDX-License-Identifier: GPL-1.0+ OR BSD-3-Clause OR OpenSSL
+
+   如果文件具有多個許可證，其條款全部適用於使用該文件，則應使用AND。例如，
+   如果代碼是從另一個項目繼承的，並且已經授予了將其放入內核的權限，但原始
+   許可條款需要保持有效::
+
+      // SPDX-License-Identifier: (GPL-2.0 WITH Linux-syscall-note) AND MIT
+
+   另一個需要遵守兩套許可條款的例子是::
+
+      // SPDX-License-Identifier: GPL-1.0+ AND LGPL-2.1+
+
+許可標識符
+----------
+
+當前使用的許可證以及添加到內核的代碼許可證可以分解爲：
+
+1. _`優先許可`:
+
+   應儘可能使用這些許可證，因爲它們已知完全兼容並廣泛使用。這些許可證在內核
+   目錄::
+
+      LICENSES/preferred/
+
+   此目錄中的文件包含完整的許可證文本和 `元標記`_ 。文件名與SPDX許可證標識
+   符相同，後者應用於源文件中的許可證。
+
+   例如::
+
+      LICENSES/preferred/GPL-2.0
+
+   包含GPLv2許可證文本和所需的元標籤::
+
+      LICENSES/preferred/MIT
+
+   包含MIT許可證文本和所需的元標記
+
+   _`元標記`:
+
+   許可證文件中必須包含以下元標記：
+
+   - Valid-License-Identifier:
+
+     一行或多行, 聲明那些許可標識符在項目內有效, 以引用此特定許可的文本。通
+     常這是一個有效的標識符，但是例如對於帶有'或更高'選項的許可證，兩個標識
+     符都有效。
+
+   - SPDX-URL:
+
+     SPDX頁面的URL，其中包含與許可證相關的其他信息.
+
+   - Usage-Guidance:
+
+     使用建議的自由格式文本。該文本必須包含SPDX許可證標識符的正確示例，因爲
+     它們應根據 `許可標識符語法`_ 指南放入源文件中。
+
+   - License-Text:
+
+     此標記之後的所有文本都被視爲原始許可文本
+
+   文件格式示例::
+
+      Valid-License-Identifier: GPL-2.0
+      Valid-License-Identifier: GPL-2.0+
+      SPDX-URL: https://spdx.org/licenses/GPL-2.0.html
+      Usage-Guide:
+        To use this license in source code, put one of the following SPDX
+	tag/value pairs into a comment according to the placement
+	guidelines in the licensing rules documentation.
+	For 'GNU General Public License (GPL) version 2 only' use:
+	  SPDX-License-Identifier: GPL-2.0
+	For 'GNU General Public License (GPL) version 2 or any later version' use:
+	  SPDX-License-Identifier: GPL-2.0+
+      License-Text:
+        Full license text
+
+   ::
+
+      SPDX-License-Identifier: MIT
+      SPDX-URL: https://spdx.org/licenses/MIT.html
+      Usage-Guide:
+	To use this license in source code, put the following SPDX
+	tag/value pair into a comment according to the placement
+	guidelines in the licensing rules documentation.
+	  SPDX-License-Identifier: MIT
+      License-Text:
+        Full license text
+
+|
+
+2. 不推薦的許可證:
+
+   這些許可證只應用於現有代碼或從其他項目導入代碼。這些許可證在內核目錄::
+
+      LICENSES/other/
+
+   此目錄中的文件包含完整的許可證文本和 `元標記`_ 。文件名與SPDX許可證標識
+   符相同，後者應用於源文件中的許可證。
+
+   例如::
+
+      LICENSES/other/ISC
+
+   包含國際系統聯合許可文本和所需的元標籤::
+
+      LICENSES/other/ZLib
+
+   包含ZLIB許可文本和所需的元標籤.
+
+   元標籤:
+
+   「其他」許可證的元標籤要求與 `優先許可`_ 的要求相同。
+
+   文件格式示例::
+
+      Valid-License-Identifier: ISC
+      SPDX-URL: https://spdx.org/licenses/ISC.html
+      Usage-Guide:
+        Usage of this license in the kernel for new code is discouraged
+	and it should solely be used for importing code from an already
+	existing project.
+        To use this license in source code, put the following SPDX
+	tag/value pair into a comment according to the placement
+	guidelines in the licensing rules documentation.
+	  SPDX-License-Identifier: ISC
+      License-Text:
+        Full license text
+
+|
+
+3. _`例外`:
+
+   某些許可證可以修改，並允許原始許可證不具有的某些例外權利。這些例外在
+   內核目錄::
+
+      LICENSES/exceptions/
+
+   此目錄中的文件包含完整的例外文本和所需的 `例外元標記`_ 。
+
+   例如::
+
+      LICENSES/exceptions/Linux-syscall-note
+
+   包含Linux內核的COPYING文件中記錄的Linux系統調用例外，該文件用於UAPI
+   頭文件。例如::
+
+      LICENSES/exceptions/GCC-exception-2.0
+
+   包含GCC'連結例外'，它允許獨立於其許可證的任何二進位文件與標記有此例外的
+   文件的編譯版本連結。這是從GPL不兼容原始碼創建可運行的可執行文件所必需的。
+
+   _`例外元標記`:
+
+   以下元標記必須在例外文件中可用：
+
+   - SPDX-Exception-Identifier:
+
+     一個可與SPDX許可證標識符一起使用的例外標識符。
+
+   - SPDX-URL:
+
+     SPDX頁面的URL，其中包含與例外相關的其他信息。
+
+   - SPDX-Licenses:
+
+     以逗號分隔的例外可用的SPDX許可證標識符列表。
+
+   - Usage-Guidance:
+
+     使用建議的自由格式文本。必須在文本後面加上SPDX許可證標識符的正確示例，
+     因爲它們應根據 `許可標識符語法`_ 指南放入源文件中。
+
+   - Exception-Text:
+
+     此標記之後的所有文本都被視爲原始異常文本
+
+   文件格式示例::
+
+      SPDX-Exception-Identifier: Linux-syscall-note
+      SPDX-URL: https://spdx.org/licenses/Linux-syscall-note.html
+      SPDX-Licenses: GPL-2.0, GPL-2.0+, GPL-1.0+, LGPL-2.0, LGPL-2.0+, LGPL-2.1, LGPL-2.1+
+      Usage-Guidance:
+        This exception is used together with one of the above SPDX-Licenses
+	to mark user-space API (uapi) header files so they can be included
+	into non GPL compliant user-space application code.
+        To use this exception add it with the keyword WITH to one of the
+	identifiers in the SPDX-Licenses tag:
+	  SPDX-License-Identifier: <SPDX-License> WITH Linux-syscall-note
+      Exception-Text:
+        Full exception text
+
+   ::
+
+      SPDX-Exception-Identifier: GCC-exception-2.0
+      SPDX-URL: https://spdx.org/licenses/GCC-exception-2.0.html
+      SPDX-Licenses: GPL-2.0, GPL-2.0+
+      Usage-Guidance:
+        The "GCC Runtime Library exception 2.0" is used together with one
+	of the above SPDX-Licenses for code imported from the GCC runtime
+	library.
+        To use this exception add it with the keyword WITH to one of the
+	identifiers in the SPDX-Licenses tag:
+	  SPDX-License-Identifier: <SPDX-License> WITH GCC-exception-2.0
+      Exception-Text:
+        Full exception text
+
+
+所有SPDX許可證標識符和例外都必須在LICENSES子目錄中具有相應的文件。這是允許
+工具驗證（例如checkpatch.pl）以及準備好從源讀取和提取許可證所必需的, 這是
+各種FOSS組織推薦的，例如 `FSFE REUSE initiative <https://reuse.software/>`_.
+
+_`模塊許可`
+-----------------
+
+   可加載內核模塊還需要MODULE_LICENSE（）標記。此標記既不替代正確的原始碼
+   許可證信息（SPDX-License-Identifier），也不以任何方式表示或確定提供模塊
+   原始碼的確切許可證。
+
+   此標記的唯一目的是提供足夠的信息，該模塊是否是自由軟體或者是內核模塊加
+   載器和用戶空間工具的專有模塊。
+
+   MODULE_LICENSE（）的有效許可證字符串是:
+
+    ============================= =============================================
+    "GPL"			  模塊是根據GPL版本2許可的。這並不表示僅限於
+                                  GPL-2.0或GPL-2.0或更高版本之間的任何區別。
+                                  最正確許可證信息只能通過相應源文件中的許可證
+                                  信息來確定
+
+    "GPL v2"			  和"GPL"相同，它的存在是因爲歷史原因。
+
+    "GPL and additional rights"   表示模塊源在GPL v2變體和MIT許可下雙重許可的
+                                  歷史變體。請不要在新代碼中使用。
+
+    "Dual MIT/GPL"		  表達該模塊在GPL v2變體或MIT許可證選擇下雙重
+                                  許可的正確方式。
+
+    "Dual BSD/GPL"		  該模塊根據GPL v2變體或BSD許可證選擇進行雙重
+                                  許可。 BSD許可證的確切變體只能通過相應源文件
+                                  中的許可證信息來確定。
+
+    "Dual MPL/GPL"		  該模塊根據GPL v2變體或Mozilla Public License
+                                  （MPL）選項進行雙重許可。 MPL許可證的確切變體
+                                  只能通過相應的源文件中的許可證信息來確定。
+
+    "Proprietary"		  該模塊屬於專有許可。此字符串僅用於專有的第三
+                                  方模塊，不能用於在內核樹中具有原始碼的模塊。
+                                  以這種方式標記的模塊在加載時會使用'P'標記汙
+                                  染內核，並且內核模塊加載器拒絕將這些模塊連結
+                                  到使用EXPORT_SYMBOL_GPL（）導出的符號。
+    ============================= =============================================
+
+
diff --git a/Documentation/translations/zh_TW/process/magic-number.rst b/Documentation/translations/zh_TW/process/magic-number.rst
new file mode 100644
index 000000000000..ae321a9aaece
--- /dev/null
+++ b/Documentation/translations/zh_TW/process/magic-number.rst
@@ -0,0 +1,148 @@
+.. SPDX-License-Identifier: GPL-2.0
+
+.. _tw_magicnumbers:
+
+.. include:: ../disclaimer-zh_TW.rst
+
+:Original: :ref:`Documentation/process/magic-number.rst <magicnumbers>`
+
+如果想評論或更新本文的內容，請直接發信到LKML。如果你使用英文交流有困難的話，也可
+以向中文版維護者求助。如果本翻譯更新不及時或者翻譯存在問題，請聯繫中文版維護者::
+
+        中文版維護者： 賈威威 Jia Wei Wei <harryxiyou@gmail.com>
+        中文版翻譯者： 賈威威 Jia Wei Wei <harryxiyou@gmail.com>
+        中文版校譯者： 賈威威 Jia Wei Wei <harryxiyou@gmail.com>
+                      胡皓文 Hu Haowen <src.res@email.cn>
+
+Linux 魔術數
+============
+
+這個文件是有關當前使用的魔術值註冊表。當你給一個結構添加了一個魔術值，你也應該把這個魔術值添加到這個文件，因爲我們最好把用於各種結構的魔術值統一起來。
+
+使用魔術值來保護內核數據結構是一個非常好的主意。這就允許你在運行期檢查(a)一個結構是否已經被攻擊，或者(b)你已經給一個例行程序通過了一個錯誤的結構。後一種情況特別地有用---特別是當你通過一個空指針指向結構體的時候。tty源碼，例如，經常通過特定驅動使用這種方法並且反覆地排列特定方面的結構。
+
+使用魔術值的方法是在結構的開始處聲明的，如下::
+
+        struct tty_ldisc {
+	        int	magic;
+        	...
+        };
+
+當你以後給內核添加增強功能的時候，請遵守這條規則！這樣就會節省數不清的調試時間，特別是一些古怪的情況，例如，數組超出範圍並且重新寫了超出部分。遵守這個規則，‪這些情況可以被快速地，安全地避免。
+
+		Theodore Ts'o
+		  31 Mar 94
+
+給當前的Linux 2.1.55添加魔術表。
+
+		Michael Chastain
+		<mailto:mec@shout.net>
+		22 Sep 1997
+
+現在應該最新的Linux 2.1.112.因爲在特性凍結期間，不能在2.2.x前改變任何東西。這些條目被數域所排序。
+
+		Krzysztof G.Baranowski
+	        <mailto: kgb@knm.org.pl>
+		29 Jul 1998
+
+更新魔術表到Linux 2.5.45。剛好越過特性凍結，但是有可能還會有一些新的魔術值在2.6.x之前融入到內核中。
+
+		Petr Baudis
+		<pasky@ucw.cz>
+		03 Nov 2002
+
+更新魔術表到Linux 2.5.74。
+
+		Fabian Frederick
+                <ffrederick@users.sourceforge.net>
+		09 Jul 2003
+
+===================== ================ ======================== ==========================================
+魔術數名              數字             結構                     文件
+===================== ================ ======================== ==========================================
+PG_MAGIC              'P'              pg_{read,write}_hdr      ``include/linux/pg.h``
+CMAGIC                0x0111           user                     ``include/linux/a.out.h``
+MKISS_DRIVER_MAGIC    0x04bf           mkiss_channel            ``drivers/net/mkiss.h``
+HDLC_MAGIC            0x239e           n_hdlc                   ``drivers/char/n_hdlc.c``
+APM_BIOS_MAGIC        0x4101           apm_user                 ``arch/x86/kernel/apm_32.c``
+DB_MAGIC              0x4442           fc_info                  ``drivers/net/iph5526_novram.c``
+DL_MAGIC              0x444d           fc_info                  ``drivers/net/iph5526_novram.c``
+FASYNC_MAGIC          0x4601           fasync_struct            ``include/linux/fs.h``
+FF_MAGIC              0x4646           fc_info                  ``drivers/net/iph5526_novram.c``
+PTY_MAGIC             0x5001                                    ``drivers/char/pty.c``
+PPP_MAGIC             0x5002           ppp                      ``include/linux/if_pppvar.h``
+SSTATE_MAGIC          0x5302           serial_state             ``include/linux/serial.h``
+SLIP_MAGIC            0x5302           slip                     ``drivers/net/slip.h``
+STRIP_MAGIC           0x5303           strip                    ``drivers/net/strip.c``
+SIXPACK_MAGIC         0x5304           sixpack                  ``drivers/net/hamradio/6pack.h``
+AX25_MAGIC            0x5316           ax_disp                  ``drivers/net/mkiss.h``
+TTY_MAGIC             0x5401           tty_struct               ``include/linux/tty.h``
+MGSL_MAGIC            0x5401           mgsl_info                ``drivers/char/synclink.c``
+TTY_DRIVER_MAGIC      0x5402           tty_driver               ``include/linux/tty_driver.h``
+MGSLPC_MAGIC          0x5402           mgslpc_info              ``drivers/char/pcmcia/synclink_cs.c``
+USB_SERIAL_MAGIC      0x6702           usb_serial               ``drivers/usb/serial/usb-serial.h``
+FULL_DUPLEX_MAGIC     0x6969                                    ``drivers/net/ethernet/dec/tulip/de2104x.c``
+USB_BLUETOOTH_MAGIC   0x6d02           usb_bluetooth            ``drivers/usb/class/bluetty.c``
+RFCOMM_TTY_MAGIC      0x6d02                                    ``net/bluetooth/rfcomm/tty.c``
+USB_SERIAL_PORT_MAGIC 0x7301           usb_serial_port          ``drivers/usb/serial/usb-serial.h``
+CG_MAGIC              0x00090255       ufs_cylinder_group       ``include/linux/ufs_fs.h``
+LSEMAGIC              0x05091998       lse                      ``drivers/fc4/fc.c``
+GDTIOCTL_MAGIC        0x06030f07       gdth_iowr_str            ``drivers/scsi/gdth_ioctl.h``
+RIEBL_MAGIC           0x09051990                                ``drivers/net/atarilance.c``
+NBD_REQUEST_MAGIC     0x12560953       nbd_request              ``include/linux/nbd.h``
+RED_MAGIC2            0x170fc2a5       (any)                    ``mm/slab.c``
+BAYCOM_MAGIC          0x19730510       baycom_state             ``drivers/net/baycom_epp.c``
+ISDN_X25IFACE_MAGIC   0x1e75a2b9       isdn_x25iface_proto_data ``drivers/isdn/isdn_x25iface.h``
+ECP_MAGIC             0x21504345       cdkecpsig                ``include/linux/cdk.h``
+LSOMAGIC              0x27091997       lso                      ``drivers/fc4/fc.c``
+LSMAGIC               0x2a3b4d2a       ls                       ``drivers/fc4/fc.c``
+WANPIPE_MAGIC         0x414C4453       sdla_{dump,exec}         ``include/linux/wanpipe.h``
+CS_CARD_MAGIC         0x43525553       cs_card                  ``sound/oss/cs46xx.c``
+LABELCL_MAGIC         0x4857434c       labelcl_info_s           ``include/asm/ia64/sn/labelcl.h``
+ISDN_ASYNC_MAGIC      0x49344C01       modem_info               ``include/linux/isdn.h``
+CTC_ASYNC_MAGIC       0x49344C01       ctc_tty_info             ``drivers/s390/net/ctctty.c``
+ISDN_NET_MAGIC        0x49344C02       isdn_net_local_s         ``drivers/isdn/i4l/isdn_net_lib.h``
+SAVEKMSG_MAGIC2       0x4B4D5347       savekmsg                 ``arch/*/amiga/config.c``
+CS_STATE_MAGIC        0x4c4f4749       cs_state                 ``sound/oss/cs46xx.c``
+SLAB_C_MAGIC          0x4f17a36d       kmem_cache               ``mm/slab.c``
+COW_MAGIC             0x4f4f4f4d       cow_header_v1            ``arch/um/drivers/ubd_user.c``
+I810_CARD_MAGIC       0x5072696E       i810_card                ``sound/oss/i810_audio.c``
+TRIDENT_CARD_MAGIC    0x5072696E       trident_card             ``sound/oss/trident.c``
+ROUTER_MAGIC          0x524d4157       wan_device               [in ``wanrouter.h`` pre 3.9]
+SAVEKMSG_MAGIC1       0x53415645       savekmsg                 ``arch/*/amiga/config.c``
+GDA_MAGIC             0x58464552       gda                      ``arch/mips/include/asm/sn/gda.h``
+RED_MAGIC1            0x5a2cf071       (any)                    ``mm/slab.c``
+EEPROM_MAGIC_VALUE    0x5ab478d2       lanai_dev                ``drivers/atm/lanai.c``
+HDLCDRV_MAGIC         0x5ac6e778       hdlcdrv_state            ``include/linux/hdlcdrv.h``
+PCXX_MAGIC            0x5c6df104       channel                  ``drivers/char/pcxx.h``
+KV_MAGIC              0x5f4b565f       kernel_vars_s            ``arch/mips/include/asm/sn/klkernvars.h``
+I810_STATE_MAGIC      0x63657373       i810_state               ``sound/oss/i810_audio.c``
+TRIDENT_STATE_MAGIC   0x63657373       trient_state             ``sound/oss/trident.c``
+M3_CARD_MAGIC         0x646e6f50       m3_card                  ``sound/oss/maestro3.c``
+FW_HEADER_MAGIC       0x65726F66       fw_header                ``drivers/atm/fore200e.h``
+SLOT_MAGIC            0x67267321       slot                     ``drivers/hotplug/cpqphp.h``
+SLOT_MAGIC            0x67267322       slot                     ``drivers/hotplug/acpiphp.h``
+LO_MAGIC              0x68797548       nbd_device               ``include/linux/nbd.h``
+M3_STATE_MAGIC        0x734d724d       m3_state                 ``sound/oss/maestro3.c``
+VMALLOC_MAGIC         0x87654320       snd_alloc_track          ``sound/core/memory.c``
+KMALLOC_MAGIC         0x87654321       snd_alloc_track          ``sound/core/memory.c``
+PWC_MAGIC             0x89DC10AB       pwc_device               ``drivers/usb/media/pwc.h``
+NBD_REPLY_MAGIC       0x96744668       nbd_reply                ``include/linux/nbd.h``
+ENI155_MAGIC          0xa54b872d       midway_eprom	        ``drivers/atm/eni.h``
+CODA_MAGIC            0xC0DAC0DA       coda_file_info           ``fs/coda/coda_fs_i.h``
+DPMEM_MAGIC           0xc0ffee11       gdt_pci_sram             ``drivers/scsi/gdth.h``
+YAM_MAGIC             0xF10A7654       yam_port                 ``drivers/net/hamradio/yam.c``
+CCB_MAGIC             0xf2691ad2       ccb                      ``drivers/scsi/ncr53c8xx.c``
+QUEUE_MAGIC_FREE      0xf7e1c9a3       queue_entry              ``drivers/scsi/arm/queue.c``
+QUEUE_MAGIC_USED      0xf7e1cc33       queue_entry              ``drivers/scsi/arm/queue.c``
+HTB_CMAGIC            0xFEFAFEF1       htb_class                ``net/sched/sch_htb.c``
+NMI_MAGIC             0x48414d4d455201 nmi_s                    ``arch/mips/include/asm/sn/nmi.h``
+===================== ================ ======================== ==========================================
+
+
+請注意，在聲音記憶管理中仍然有一些特殊的爲每個驅動定義的魔術值。查看include/sound/sndmagic.h來獲取他們完整的列表信息。很多OSS聲音驅動擁有自己從音效卡PCI ID構建的魔術值-他們也沒有被列在這裡。
+
+IrDA子系統也使用了大量的自己的魔術值，查看include/net/irda/irda.h來獲取他們完整的信息。
+
+HFS是另外一個比較大的使用魔術值的文件系統-你可以在fs/hfs/hfs.h中找到他們。
+
diff --git a/Documentation/translations/zh_TW/process/management-style.rst b/Documentation/translations/zh_TW/process/management-style.rst
new file mode 100644
index 000000000000..dce248470063
--- /dev/null
+++ b/Documentation/translations/zh_TW/process/management-style.rst
@@ -0,0 +1,211 @@
+.. SPDX-License-Identifier: GPL-2.0
+
+.. include:: ../disclaimer-zh_TW.rst
+
+:Original: :ref:`Documentation/process/management-style.rst <managementstyle>`
+:Translator: Alex Shi <alex.shi@linux.alibaba.com>
+             Hu Haowen <src.res@email.cn>
+
+.. _tw_managementstyle:
+
+Linux內核管理風格
+=================
+
+這是一個簡短的文檔，描述了Linux內核首選的（或胡編的，取決於您問誰）管理風格。
+它的目的是在某種程度上參照 :ref:`process/coding-style.rst <codingstyle>`
+主要是爲了避免反覆回答 [#cnf1]_ 相同（或類似）的問題。
+
+管理風格是非常個人化的，比簡單的編碼風格規則更難以量化，因此本文檔可能與實
+際情況有關，也可能與實際情況無關。起初它是一個玩笑，但這並不意味著它可能不
+是真的。你得自己決定。
+
+順便說一句，在談到「核心管理者」時，主要是技術負責人，而不是在公司內部進行傳
+統管理的人。如果你簽署了採購訂單或者對你的團隊的預算有任何了解，你幾乎肯定
+不是一個核心管理者。這些建議可能適用於您，也可能不適用於您。
+
+首先，我建議你購買「高效人的七個習慣」，而不是閱讀它。燒了它，這是一個偉大的
+象徵性姿態。
+
+.. [#cnf1] 本文件並不是通過回答問題，而是通過讓提問者痛苦地明白，我們不知道
+   答案是什麼。
+
+不管怎樣，這裡是：
+
+.. _tw_decisions:
+
+1）決策
+-------
+
+每個人都認爲管理者做決定，而且決策很重要。決定越大越痛苦，管理者就必須越高級。
+這很明顯，但事實並非如此。
+
+遊戲的名字是 **避免** 做出決定。尤其是，如果有人告訴你「選擇（a）或（b），
+我們真的需要你來做決定」，你就是陷入麻煩的管理者。你管理的人比你更了解細節，
+所以如果他們來找你做技術決策，你完蛋了。你顯然沒有能力爲他們做這個決定。
+
+（推論：如果你管理的人不比你更了解細節，你也會被搞砸，儘管原因完全不同。
+也就是說，你的工作是錯的，他們應該管理你的才智）
+
+所以遊戲的名字是 **避免** 做出決定，至少是那些大而痛苦的決定。做一些小的
+和非結果性的決定是很好的，並且使您看起來好像知道自己在做什麼，所以內核管理者
+需要做的是將那些大的和痛苦的決定變成那些沒有人真正關心的小事情。
+
+這有助於認識到一個大的決定和一個小的決定之間的關鍵區別是你是否可以在事後修正
+你的決定。任何決定都可以通過始終確保如果你錯了（而且你一定會錯），你以後總是
+可以通過回溯來彌補損失。突然間，你就要做兩個無關緊要的決定，一個是錯誤的，另
+一個是正確的。
+
+人們甚至會認爲這是真正的領導能力（咳，胡說，咳）。
+
+因此，避免重大決策的關鍵在於避免做那些無法挽回的事情。不要被引導到一個你無法
+逃離的角落。走投無路的老鼠可能很危險——走投無路的管理者真可憐。
+
+事實證明，由於沒有人會愚蠢到讓內核管理者承擔巨大的財政責任，所以通常很容易
+回溯。既然你不可能浪費掉你無法償還的巨額資金，你唯一可以回溯的就是技術決策，
+而回溯很容易：只要告訴大家你是個不稱職的傻瓜，說對不起，然後撤銷你去年讓別
+人所做的毫無價值的工作。突然間，你一年前做的決定不在是一個重大的決定，因爲
+它很容易被推翻。
+
+事實證明，有些人對接受這種方法有困難，原因有兩個：
+
+ - 承認你是個白癡比看起來更難。我們都喜歡保持形象，在公共場合說你錯了有時
+   確實很難。
+ - 如果有人告訴你，你去年所做的工作終究是不值得的，那麼對那些可憐的低級工
+   程師來說也是很困難的，雖然實際的 **工作** 很容易刪除，但你可能已經不可
+   挽回地失去了工程師的信任。記住：「不可撤銷」是我們一開始就試圖避免的，
+   而你的決定終究是一個重大的決定。
+
+令人欣慰的是，這兩個原因都可以通過預先承認你沒有任何線索，提前告訴人們你的
+決定完全是初步的，而且可能是錯誤的事情來有效地緩解。你應該始終保留改變主意
+的權利，並讓人們 **意識** 到這一點。當你 **還沒有** 做過真正愚蠢的事情的時
+候，承認自己是愚蠢的要容易得多。
+
+然後，當它真的被證明是愚蠢的時候，人們就轉動他們的眼珠說「哎呀，下次不要了」。
+
+這種對不稱職的先發制人的承認，也可能使真正做這項工作的人也會三思是否值得做。
+畢竟，如果他們不確定這是否是一個好主意，你肯定不應該通過向他們保證他們所做
+的工作將會進入（內核）鼓勵他們。在他們開始一項巨大的努力之前，至少讓他們三
+思而後行。
+
+記住：他們最好比你更了解細節，而且他們通常認爲他們對每件事都有答案。作爲一
+個管理者，你能做的最好的事情不是灌輸自信，而是對他們所做的事情進行健康的批
+判性思考。
+
+順便說一句，另一種避免做出決定的方法是看起來很可憐的抱怨 「我們不能兩者兼
+得嗎？」 相信我，它是有效的。如果不清楚哪種方法更好，他們最終會弄清楚的。
+最終的答案可能是兩個團隊都會因爲這種情況而感到沮喪，以至於他們放棄了。
+
+這聽起來像是一個失敗，但這通常是一個跡象，表明兩個項目都有問題，而參與其中
+的人不能做決定的原因是他們都是錯誤的。你最終會聞到玫瑰的味道，你避免了另一
+個你本可以搞砸的決定。
+
+2）人
+-----
+
+大多數人都是白癡，做一名管理者意味著你必須處理好這件事，也許更重要的是,
+**他們** 必須處理好你。
+
+事實證明，雖然很容易糾正技術錯誤，但不容易糾正人格障礙。你只能和他們的和
+你的（人格障礙）共處。
+
+但是，爲了做好作爲內核管理者的準備，最好記住不要燒掉任何橋樑，不要轟炸任何
+無辜的村民，也不要疏遠太多的內核開發人員。事實證明，疏遠人是相當容易的，而
+親近一個疏遠的人是很難的。因此，「疏遠」立即屬於「不可逆」的範疇，並根據
+:ref:`tw_decisions` 成爲絕不可以做的事情。
+
+這裡只有幾個簡單的規則：
+
+ (1) 不要叫人笨蛋（至少不要在公共場合）
+ (2) 學習如何在忘記規則(1)時道歉
+
+問題在於 #1 很容易去做，因爲你可以用數百萬種不同的方式說「你是一個笨蛋」 [#cnf2]_
+有時甚至沒有意識到，而且幾乎總是帶著一種白熱化的信念，認爲你是對的。
+
+你越確信自己是對的（讓我們面對現實吧，你可以把幾乎所有人都稱爲壞人，而且你
+經常是對的），事後道歉就越難。
+
+要解決此問題，您實際上只有兩個選項：
+
+ - 非常擅長道歉
+ - 把「愛」均勻地散開，沒有人會真正感覺到自己被不公平地瞄準了。讓它有足夠的
+   創造性，他們甚至可能會覺得好笑。
+
+選擇永遠保持禮貌是不存在的。沒有人會相信一個如此明顯地隱藏了他們真實性格的人。
+
+.. [#cnf2] 保羅·西蒙演唱了「離開愛人的50種方法」，因爲坦率地說，「告訴開發者
+   他們是D*CKHEAD" 的100萬種方法都無法確認。但我確信他已經這麼想了。
+
+3）人2 - 好人
+-------------
+
+雖然大多數人都是白癡，但不幸的是，據此推論你也是白癡，儘管我們都自我感覺良
+好，我們比普通人更好（讓我們面對現實吧，沒有人相信他們是普通人或低於普通人），
+我們也應該承認我們不是最鋒利的刀，而且會有其他人比你更不像白癡。
+
+有些人對聰明人反應不好。其他人利用它們。
+
+作爲內核維護人員，確保您在第二組中。接受他們，因爲他們會讓你的工作更容易。
+特別是，他們能夠爲你做決定，這就是遊戲的全部內容。
+
+所以當你發現一個比你聰明的人時，就順其自然吧。你的管理職責在很大程度上變成
+了「聽起來像是個好主意——去嘗試吧」，或者「聽起來不錯，但是XXX呢？」「。第二個版
+本尤其是一個很好的方法，要麼學習一些關於「XXX」的新東西，要麼通過指出一些聰明
+人沒有想到的東西來顯得更具管理性。無論哪種情況，你都會贏。
+
+要注意的一件事是認識到一個領域的偉大不一定會轉化爲其他領域。所以你可能會向
+特定的方向刺激人們，但讓我們面對現實吧，他們可能擅長他們所做的事情，而且對
+其他事情都很差勁。好消息是，人們往往會自然而然地重拾他們擅長的東西，所以當
+你向某個方向刺激他們時，你並不是在做不可逆轉的事情，只是不要用力推。
+
+4）責備
+-------
+
+事情會出問題的，人們希望去責備人。貼標籤，你就是受責備的人。
+
+事實上，接受責備並不難，尤其是當人們意識到這不 **全是** 你的過錯時。這讓我
+們找到了承擔責任的最佳方式：爲別人承擔這件事。你會感覺很好，他們會感覺很好，
+沒有受到指責. 那誰，失去了他們的全部36GB色情收藏的人，因爲你的無能將勉強承
+認，你至少沒有試圖逃避責任。
+
+然後讓真正搞砸了的開發人員（如果你能找到他們）私下知道他們搞砸了。不僅是爲
+了將來可以避免，而且爲了讓他們知道他們欠你一個人情。而且，也許更重要的是，
+他們也可能是能夠解決問題的人。因爲，讓我們面對現實吧，肯定不是你。
+
+承擔責任也是你首先成爲管理者的原因。這是讓人們信任你，讓你獲得潛在的榮耀的
+一部分，因爲你就是那個會說「我搞砸了」的人。如果你已經遵循了以前的規則，你現
+在已經很擅長說了。
+
+5）應避免的事情
+---------------
+
+有一件事人們甚至比被稱爲「笨蛋」更討厭，那就是在一個神聖的聲音中被稱爲「笨蛋」。
+第一個你可以道歉，第二個你不會真正得到機會。即使你做得很好，他們也可能不再
+傾聽。
+
+我們都認爲自己比別人強，這意味著當別人裝腔作勢時，這會讓我們很惱火。你也許
+在道德和智力上比你周圍的每個人都優越，但不要試圖太明顯，除非你真的打算激怒
+某人 [#cnf3]_
+
+同樣，不要對事情太客氣或太微妙。禮貌很容易落得落花流水，把問題隱藏起來，
+正如他們所說，「在網際網路上，沒人能聽到你的含蓄。」用一個鈍器把這一點錘進去，
+因爲你不能真的依靠別人來獲得你的觀點。
+
+一些幽默可以幫助緩和直率和道德化。過度到荒謬的地步，可以灌輸一個觀點，而不
+會讓接受者感到痛苦，他們只是認爲你是愚蠢的。因此，它可以幫助我們擺脫對批評
+的個人心理障礙。
+
+.. [#cnf3] 提示：與你的工作沒有直接關係的網絡新聞組是消除你對他人不滿的好
+   方法。偶爾寫些侮辱性的帖子，打個噴嚏，讓你的情緒得到淨化。別把牢騷帶回家
+
+6）爲什麼是我？
+---------------
+
+既然你的主要責任似乎是爲別人的錯誤承擔責任，並且讓別人痛苦地明白你是不稱職
+的，那麼顯而易見的問題之一就變成了爲什麼首先要這樣做。
+
+首先，雖然你可能會或可能不會聽到十幾歲女孩（或男孩，讓我們不要在這裡評判或
+性別歧視）敲你的更衣室門，你會得到一個巨大的個人成就感爲「負責」。別介意你真
+的在領導別人，你要跟上別人，儘可能快地追趕他們。每個人都會認爲你是負責人。
+
+如果你可以做到這個， 這是個偉大的工作！
+
diff --git a/Documentation/translations/zh_TW/process/programming-language.rst b/Documentation/translations/zh_TW/process/programming-language.rst
new file mode 100644
index 000000000000..54e3699eadf8
--- /dev/null
+++ b/Documentation/translations/zh_TW/process/programming-language.rst
@@ -0,0 +1,76 @@
+.. SPDX-License-Identifier: GPL-2.0
+
+.. include:: ../disclaimer-zh_TW.rst
+
+:Original: :ref:`Documentation/process/programming-language.rst <programming_language>`
+:Translator: Alex Shi <alex.shi@linux.alibaba.com>
+             Hu Haowen <src.res@email.cn>
+
+.. _tw_programming_language:
+
+程序設計語言
+============
+
+內核是用C語言 :ref:`c-language <tw_c-language>` 編寫的。更準確地說，內核通常是用 :ref:`gcc <tw_gcc>`
+在 ``-std=gnu89`` :ref:`gcc-c-dialect-options <tw_gcc-c-dialect-options>` 下編譯的：ISO C90的 GNU 方言（
+包括一些C99特性）
+
+這種方言包含對語言 :ref:`gnu-extensions <tw_gnu-extensions>` 的許多擴展，當然，它們許多都在內核中使用。
+
+對於一些體系結構，有一些使用 :ref:`clang <tw_clang>` 和 :ref:`icc <tw_icc>` 編譯內核
+的支持，儘管在編寫此文檔時還沒有完成，仍需要第三方補丁。
+
+屬性
+----
+
+在整個內核中使用的一個常見擴展是屬性（attributes） :ref:`gcc-attribute-syntax <tw_gcc-attribute-syntax>`
+屬性允許將實現定義的語義引入語言實體（如變量、函數或類型），而無需對語言進行
+重大的語法更改（例如添加新關鍵字） :ref:`n2049 <tw_n2049>`
+
+在某些情況下，屬性是可選的（即不支持這些屬性的編譯器仍然應該生成正確的代碼，
+即使其速度較慢或執行的編譯時檢查/診斷次數不夠）
+
+內核定義了僞關鍵字（例如， ``pure`` ），而不是直接使用GNU屬性語法（例如,
+``__attribute__((__pure__))`` ），以檢測可以使用哪些關鍵字和/或縮短代碼, 具體
+請參閱 ``include/linux/compiler_attributes.h``
+
+.. _tw_c-language:
+
+c-language
+   http://www.open-std.org/jtc1/sc22/wg14/www/standards
+
+.. _tw_gcc:
+
+gcc
+   https://gcc.gnu.org
+
+.. _tw_clang:
+
+clang
+   https://clang.llvm.org
+
+.. _tw_icc:
+
+icc
+   https://software.intel.com/en-us/c-compilers
+
+.. _tw_gcc-c-dialect-options:
+
+c-dialect-options
+   https://gcc.gnu.org/onlinedocs/gcc/C-Dialect-Options.html
+
+.. _tw_gnu-extensions:
+
+gnu-extensions
+   https://gcc.gnu.org/onlinedocs/gcc/C-Extensions.html
+
+.. _tw_gcc-attribute-syntax:
+
+gcc-attribute-syntax
+   https://gcc.gnu.org/onlinedocs/gcc/Attribute-Syntax.html
+
+.. _tw_n2049:
+
+n2049
+   http://www.open-std.org/jtc1/sc22/wg14/www/docs/n2049.pdf
+
diff --git a/Documentation/translations/zh_TW/process/stable-api-nonsense.rst b/Documentation/translations/zh_TW/process/stable-api-nonsense.rst
new file mode 100644
index 000000000000..22caa5b8d422
--- /dev/null
+++ b/Documentation/translations/zh_TW/process/stable-api-nonsense.rst
@@ -0,0 +1,159 @@
+.. SPDX-License-Identifier: GPL-2.0
+
+.. _tw_stable_api_nonsense:
+
+.. include:: ../disclaimer-zh_TW.rst
+
+:Original: :ref:`Documentation/process/stable-api-nonsense.rst
+           <stable_api_nonsense>`
+
+譯者::
+
+        中文版維護者： 鍾宇  TripleX Chung <xxx.phy@gmail.com>
+        中文版翻譯者： 鍾宇  TripleX Chung <xxx.phy@gmail.com>
+        中文版校譯者： 李陽  Li Yang <leoyang.li@nxp.com>
+                      胡皓文 Hu Haowen <src.res@email.cn>
+
+Linux 內核驅動接口
+==================
+
+寫作本文檔的目的，是爲了解釋爲什麼Linux既沒有二進位內核接口，也沒有穩定
+的內核接口。這裡所說的內核接口，是指內核里的接口，而不是內核和用戶空間
+的接口。內核到用戶空間的接口，是提供給應用程式使用的系統調用，系統調用
+在歷史上幾乎沒有過變化，將來也不會有變化。我有一些老應用程式是在0.9版本
+或者更早版本的內核上編譯的，在使用2.6版本內核的Linux發布上依然用得很好
+。用戶和應用程式作者可以將這個接口看成是穩定的。
+
+
+執行綱要
+--------
+
+你也許以爲自己想要穩定的內核接口，但是你不清楚你要的實際上不是它。你需
+要的其實是穩定的驅動程序，而你只有將驅動程序放到公版內核的原始碼樹里，
+才有可能達到這個目的。而且這樣做還有很多其它好處，正是因爲這些好處使得
+Linux能成爲強壯，穩定，成熟的作業系統，這也是你最開始選擇Linux的原因。
+
+
+入門
+-----
+
+只有那些寫驅動程序的「怪人」才會擔心內核接口的改變，對廣大用戶來說，既
+看不到內核接口，也不需要去關心它。
+
+首先，我不打算討論關於任何非GPL許可的內核驅動的法律問題，這些非GPL許可
+的驅動程序包括不公開原始碼，隱藏原始碼，二進位或者是用原始碼包裝，或者
+是其它任何形式的不能以GPL許可公開原始碼的驅動程序。如果有法律問題，請咨
+詢律師，我只是一個程式設計師，所以我只打算探討技術問題（不是小看法律問題，
+法律問題很實際，並且需要一直關注）。
+
+既然只談技術問題，我們就有了下面兩個主題：二進位內核接口和穩定的內核源
+代碼接口。這兩個問題是互相關聯的，讓我們先解決掉二進位接口的問題。
+
+
+二進位內核接口
+--------------
+假如我們有一個穩定的內核原始碼接口，那麼自然而然的，我們就擁有了穩定的
+二進位接口，是這樣的嗎？錯。讓我們看看關於Linux內核的幾點事實：
+
+    - 取決於所用的C編譯器的版本，不同的內核數據結構里的結構體的對齊方
+      式會有差別，代碼中不同函數的表現形式也不一樣（函數是不是被inline
+      編譯取決於編譯器行爲）。不同的函數的表現形式並不重要，但是數據
+      結構內部的對齊方式很關鍵。
+
+    - 取決於內核的配置選項，不同的選項會讓內核的很多東西發生改變：
+
+      - 同一個結構體可能包含不同的成員變量
+      - 有的函數可能根本不會被實現（比如編譯的時候沒有選擇SMP支持
+        一些鎖函數就會被定義成空函數）。
+      - 內核使用的內存會以不同的方式對齊，這取決於不同的內核配置選
+        項。
+
+    - Linux可以在很多的不同體系結構的處理器上運行。在某個體系結構上編
+      譯好的二進位驅動程序，不可能在另外一個體系結構上正確的運行。
+
+對於一個特定的內核，滿足這些條件並不難，使用同一個C編譯器和同樣的內核配
+置選項來編譯驅動程序模塊就可以了。這對於給一個特定Linux發布的特定版本提
+供驅動程序，是完全可以滿足需求的。但是如果你要給不同發布的不同版本都發
+布一個驅動程序，就需要在每個發布上用不同的內核設置參數都編譯一次內核，
+這簡直跟噩夢一樣。而且還要注意到，每個Linux發布還提供不同的Linux內核，
+這些內核都針對不同的硬體類型進行了優化（有很多種不同的處理器，還有不同
+的內核設置選項）。所以每發布一次驅動程序，都需要提供很多不同版本的內核
+模塊。
+
+相信我，如果你真的要採取這種發布方式，一定會慢慢瘋掉，我很久以前就有過
+深刻的教訓...
+
+
+穩定的內核原始碼接口
+--------------------
+
+如果有人不將他的內核驅動程序，放入公版內核的原始碼樹，而又想讓驅動程序
+一直保持在最新的內核中可用，那麼這個話題將會變得沒完沒了。
+內核開發是持續而且快節奏的，從來都不會慢下來。內核開發人員在當前接口中
+找到bug，或者找到更好的實現方式。一旦發現這些，他們就很快會去修改當前的
+接口。修改接口意味著，函數名可能會改變，結構體可能被擴充或者刪減，函數
+的參數也可能發生改變。一旦接口被修改，內核中使用這些接口的地方需要同時
+修正，這樣才能保證所有的東西繼續工作。
+
+舉一個例子，內核的USB驅動程序接口在USB子系統的整個生命周期中，至少經歷
+了三次重寫。這些重寫解決以下問題：
+
+    - 把數據流從同步模式改成非同步模式，這個改動減少了一些驅動程序的
+      複雜度，提高了所有USB驅動程序的吞吐率，這樣幾乎所有的USB設備都
+      能以最大速率工作了。
+    - 修改了USB核心代碼中爲USB驅動分配數據包內存的方式，所有的驅動都
+      需要提供更多的參數給USB核心，以修正了很多已經被記錄在案的死鎖。
+
+這和一些封閉原始碼的作業系統形成鮮明的對比，在那些作業系統上，不得不額
+外的維護舊的USB接口。這導致了一個可能性，新的開發者依然會不小心使用舊的
+接口，以不恰當的方式編寫代碼，進而影響到作業系統的穩定性。
+在上面的例子中，所有的開發者都同意這些重要的改動，在這樣的情況下修改代
+價很低。如果Linux保持一個穩定的內核原始碼接口，那麼就得創建一個新的接口
+；舊的，有問題的接口必須一直維護，給Linux USB開發者帶來額外的工作。既然
+所有的Linux USB驅動的作者都是利用自己的時間工作，那麼要求他們去做毫無意
+義的免費額外工作，是不可能的。
+安全問題對Linux來說十分重要。一個安全問題被發現，就會在短時間內得到修
+正。在很多情況下，這將導致Linux內核中的一些接口被重寫，以從根本上避免安
+全問題。一旦接口被重寫，所有使用這些接口的驅動程序，必須同時得到修正，
+以確定安全問題已經得到修復並且不可能在未來還有同樣的安全問題。如果內核
+內部接口不允許改變，那麼就不可能修復這樣的安全問題，也不可能確認這樣的
+安全問題以後不會發生。
+開發者一直在清理內核接口。如果一個接口沒有人在使用了，它就會被刪除。這
+樣可以確保內核儘可能的小，而且所有潛在的接口都會得到儘可能完整的測試
+（沒有人使用的接口是不可能得到良好的測試的）。
+
+
+要做什麼
+--------
+
+如果你寫了一個Linux內核驅動，但是它還不在Linux原始碼樹里，作爲一個開發
+者，你應該怎麼做？爲每個發布的每個版本提供一個二進位驅動，那簡直是一個
+噩夢，要跟上永遠處於變化之中的內核接口，也是一件辛苦活。
+很簡單，讓你的驅動進入內核原始碼樹（要記得我們在談論的是以GPL許可發行
+的驅動，如果你的代碼不符合GPL，那麼祝你好運，你只能自己解決這個問題了，
+你這個吸血鬼<把Andrew和Linus對吸血鬼的定義連結到這裡>）。當你的代碼加入
+公版內核原始碼樹之後，如果一個內核接口改變，你的驅動會直接被修改接口的
+那個人修改。保證你的驅動永遠都可以編譯通過，並且一直工作，你幾乎不需要
+做什麼事情。
+
+把驅動放到內核原始碼樹里會有很多的好處：
+
+    - 驅動的質量會提升，而維護成本（對原始作者來說）會下降。
+    - 其他人會給驅動添加新特性。
+    - 其他人會找到驅動中的bug並修復。
+    - 其他人會在驅動中找到性能優化的機會。
+    - 當外部的接口的改變需要修改驅動程序的時候，其他人會修改驅動程序
+    - 不需要聯繫任何發行商，這個驅動會自動的隨著所有的Linux發布一起發
+      布。
+
+和別的作業系統相比，Linux爲更多不同的設備提供現成的驅動，而且能在更多不
+同體系結構的處理器上支持這些設備。這個經過考驗的開發模式，必然是錯不了
+的 :)
+
+感謝
+----
+感謝 Randy Dunlap, Andrew Morton, David Brownell, Hanna Linder,
+Robert Love, and Nishanth Aravamudan 對於本文檔早期版本的評審和建議。
+
+英文版維護者： Greg Kroah-Hartman <greg@kroah.com>
+
diff --git a/Documentation/translations/zh_TW/process/stable-kernel-rules.rst b/Documentation/translations/zh_TW/process/stable-kernel-rules.rst
new file mode 100644
index 000000000000..9bb0d9b4f3ac
--- /dev/null
+++ b/Documentation/translations/zh_TW/process/stable-kernel-rules.rst
@@ -0,0 +1,68 @@
+.. SPDX-License-Identifier: GPL-2.0
+
+.. _tw_stable_kernel_rules:
+
+.. include:: ../disclaimer-zh_TW.rst
+
+:Original: :ref:`Documentation/process/stable-kernel-rules.rst <stable_kernel_rules>`
+
+如果想評論或更新本文的內容，請直接聯繫原文檔的維護者。如果你使用英文
+交流有困難的話，也可以向中文版維護者求助。如果本翻譯更新不及時或者翻
+譯存在問題，請聯繫中文版維護者::
+
+        中文版維護者： 鍾宇  TripleX Chung <xxx.phy@gmail.com>
+        中文版翻譯者： 鍾宇  TripleX Chung <xxx.phy@gmail.com>
+        中文版校譯者：
+            - 李陽  Li Yang <leoyang.li@nxp.com>
+            - Kangkai Yin <e12051@motorola.com>
+            - 胡皓文 Hu Haowen <src.res@email.cn>
+
+所有你想知道的事情 - 關於linux穩定版發布
+========================================
+
+關於Linux 2.6穩定版發布，所有你想知道的事情。
+
+關於哪些類型的補丁可以被接收進入穩定版代碼樹，哪些不可以的規則：
+----------------------------------------------------------------
+
+  - 必須是顯而易見的正確，並且經過測試的。
+  - 連同上下文，不能大於100行。
+  - 必須只修正一件事情。
+  - 必須修正了一個給大家帶來麻煩的真正的bug（不是「這也許是一個問題...」
+    那樣的東西）。
+  - 必須修正帶來如下後果的問題：編譯錯誤（對被標記爲CONFIG_BROKEN的例外），
+    內核崩潰，掛起，數據損壞，真正的安全問題，或者一些類似「哦，這不
+    好」的問題。簡短的說，就是一些致命的問題。
+  - 沒有「理論上的競爭條件」，除非能給出競爭條件如何被利用的解釋。
+  - 不能存在任何的「瑣碎的」修正（拼寫修正，去掉多餘空格之類的）。
+  - 必須被相關子系統的維護者接受。
+  - 必須遵循Documentation/translations/zh_TW/process/submitting-patches.rst里的規則。
+
+向穩定版代碼樹提交補丁的過程：
+------------------------------
+
+  - 在確認了補丁符合以上的規則後，將補丁發送到stable@vger.kernel.org。
+  - 如果補丁被接受到隊列里，發送者會收到一個ACK回復，如果沒有被接受，收
+    到的是NAK回復。回復需要幾天的時間，這取決於開發者的時間安排。
+  - 被接受的補丁會被加到穩定版本隊列里，等待其他開發者的審查。
+  - 安全方面的補丁不要發到這個列表，應該發送到security@kernel.org。
+
+審查周期：
+----------
+
+  - 當穩定版的維護者決定開始一個審查周期，補丁將被發送到審查委員會，以
+    及被補丁影響的領域的維護者（除非提交者就是該領域的維護者）並且抄送
+    到linux-kernel郵件列表。
+  - 審查委員會有48小時的時間，用來決定給該補丁回復ACK還是NAK。
+  - 如果委員會中有成員拒絕這個補丁，或者linux-kernel列表上有人反對這個
+    補丁，並提出維護者和審查委員會之前沒有意識到的問題，補丁會從隊列中
+    丟棄。
+  - 在審查周期結束的時候，那些得到ACK回應的補丁將會被加入到最新的穩定版
+    發布中，一個新的穩定版發布就此產生。
+  - 安全性補丁將從內核安全小組那裡直接接收到穩定版代碼樹中，而不是通過
+    通常的審查周期。請聯繫內核安全小組以獲得關於這個過程的更多細節。
+
+審查委員會：
+------------
+  - 由一些自願承擔這項任務的內核開發者，和幾個非志願的組成。
+
diff --git a/Documentation/translations/zh_TW/process/submit-checklist.rst b/Documentation/translations/zh_TW/process/submit-checklist.rst
new file mode 100644
index 000000000000..ff2f89cba83f
--- /dev/null
+++ b/Documentation/translations/zh_TW/process/submit-checklist.rst
@@ -0,0 +1,109 @@
+.. SPDX-License-Identifier: GPL-2.0
+
+.. include:: ../disclaimer-zh_TW.rst
+
+:Original: :ref:`Documentation/process/submit-checklist.rst <submitchecklist>`
+:Translator: Alex Shi <alex.shi@linux.alibaba.com>
+             Hu Haowen <src.res@email.cn>
+
+.. _tw_submitchecklist:
+
+Linux內核補丁提交清單
+~~~~~~~~~~~~~~~~~~~~~
+
+如果開發人員希望看到他們的內核補丁提交更快地被接受，那麼他們應該做一些基本
+的事情。
+
+這些都是在
+:ref:`Documentation/translations/zh_TW/process/submitting-patches.rst <tw_submittingpatches>`
+和其他有關提交Linux內核補丁的文檔中提供的。
+
+1) 如果使用工具，則包括定義/聲明該工具的文件。不要依賴於其他頭文件拉入您使用
+   的頭文件。
+
+2) 乾淨的編譯：
+
+   a) 使用適用或修改的 ``CONFIG`` 選項 ``=y``、``=m`` 和 ``=n`` 。沒有GCC
+      警告/錯誤，沒有連結器警告/錯誤。
+
+   b) 通過allnoconfig、allmodconfig
+
+   c) 使用 ``O=builddir`` 時可以成功編譯
+
+3) 通過使用本地交叉編譯工具或其他一些構建場在多個CPU體系結構上構建。
+
+4) PPC64是一種很好的交叉編譯檢查體系結構，因爲它傾向於對64位的數使用無符號
+   長整型。
+
+5) 如下所述 :ref:`Documentation/translations/zh_TW/process/coding-style.rst <tw_codingstyle>`.
+   檢查您的補丁是否爲常規樣式。在提交（ ``scripts/check patch.pl`` ）之前，
+   使用補丁樣式檢查器檢查是否有輕微的衝突。您應該能夠處理您的補丁中存在的所有
+   違規行爲。
+
+6) 任何新的或修改過的 ``CONFIG`` 選項都不會弄髒配置菜單，並默認爲關閉，除非
+   它們符合 ``Documentation/kbuild/kconfig-language.rst`` 中記錄的異常條件,
+   菜單屬性：默認值.
+
+7) 所有新的 ``kconfig`` 選項都有幫助文本。
+
+8) 已仔細審查了相關的 ``Kconfig`` 組合。這很難用測試來糾正——腦力在這裡是有
+   回報的。
+
+9) 用 sparse 檢查乾淨。
+
+10) 使用 ``make checkstack`` 和 ``make namespacecheck`` 並修復他們發現的任何
+    問題。
+
+    .. note::
+
+        ``checkstack`` 並沒有明確指出問題，但是任何一個在堆棧上使用超過512
+        字節的函數都可以進行更改。
+
+11) 包括 :ref:`kernel-doc <kernel_doc>` 內核文檔以記錄全局內核API。（靜態函數
+    不需要，但也可以。）使用 ``make htmldocs`` 或 ``make pdfdocs`` 檢查
+    :ref:`kernel-doc <kernel_doc>` 並修復任何問題。
+
+12) 通過以下選項同時啓用的測試 ``CONFIG_PREEMPT``, ``CONFIG_DEBUG_PREEMPT``,
+    ``CONFIG_DEBUG_SLAB``, ``CONFIG_DEBUG_PAGEALLOC``, ``CONFIG_DEBUG_MUTEXES``,
+    ``CONFIG_DEBUG_SPINLOCK``, ``CONFIG_DEBUG_ATOMIC_SLEEP``,
+    ``CONFIG_PROVE_RCU`` and ``CONFIG_DEBUG_OBJECTS_RCU_HEAD``
+
+13) 已經過構建和運行時測試，包括有或沒有 ``CONFIG_SMP``, ``CONFIG_PREEMPT``.
+
+14) 如果補丁程序影響IO/磁碟等：使用或不使用 ``CONFIG_LBDAF`` 進行測試。
+
+15) 所有代碼路徑都已在啓用所有lockdep功能的情況下運行。
+
+16) 所有新的/proc條目都記錄在 ``Documentation/``
+
+17) 所有新的內核引導參數都記錄在
+    Documentation/admin-guide/kernel-parameters.rst 中。
+
+18) 所有新的模塊參數都記錄在 ``MODULE_PARM_DESC()``
+
+19) 所有新的用戶空間接口都記錄在 ``Documentation/ABI/`` 中。有關詳細信息，
+    請參閱 ``Documentation/ABI/README`` 。更改用戶空間接口的補丁應該抄送
+    linux-api@vger.kernel.org。
+
+20) 已通過至少注入slab和page分配失敗進行檢查。請參閱 ``Documentation/fault-injection/``
+    如果新代碼是實質性的，那麼添加子系統特定的故障注入可能是合適的。
+
+21) 新添加的代碼已經用 ``gcc -W`` 編譯（使用 ``make EXTRA-CFLAGS=-W`` ）。這
+    將產生大量噪聲，但對於查找諸如「警告：有符號和無符號之間的比較」之類的錯誤
+    很有用。
+
+22) 在它被合併到-mm補丁集中之後進行測試，以確保它仍然與所有其他排隊的補丁以
+    及VM、VFS和其他子系統中的各種更改一起工作。
+
+23) 所有內存屏障例如 ``barrier()``, ``rmb()``, ``wmb()`` 都需要原始碼中的注
+    釋來解釋它們正在執行的操作及其原因的邏輯。
+
+24) 如果補丁添加了任何ioctl，那麼也要更新 ``Documentation/userspace-api/ioctl/ioctl-number.rst``
+
+25) 如果修改後的原始碼依賴或使用與以下 ``Kconfig`` 符號相關的任何內核API或
+    功能，則在禁用相關 ``Kconfig`` 符號和/或 ``=m`` （如果該選項可用）的情況
+    下測試以下多個構建[並非所有這些都同時存在，只是它們的各種/隨機組合]：
+
+    ``CONFIG_SMP``, ``CONFIG_SYSFS``, ``CONFIG_PROC_FS``, ``CONFIG_INPUT``, ``CONFIG_PCI``, ``CONFIG_BLOCK``, ``CONFIG_PM``, ``CONFIG_MAGIC_SYSRQ``,
+    ``CONFIG_NET``, ``CONFIG_INET=n`` (但是後者伴隨 ``CONFIG_NET=y``).
+
diff --git a/Documentation/translations/zh_TW/process/submitting-drivers.rst b/Documentation/translations/zh_TW/process/submitting-drivers.rst
new file mode 100644
index 000000000000..2fdd742318ba
--- /dev/null
+++ b/Documentation/translations/zh_TW/process/submitting-drivers.rst
@@ -0,0 +1,164 @@
+.. SPDX-License-Identifier: GPL-2.0
+
+.. _tw_submittingdrivers:
+
+.. include:: ../disclaimer-zh_TW.rst
+
+:Original: :ref:`Documentation/process/submitting-drivers.rst
+           <submittingdrivers>`
+
+如果想評論或更新本文的內容，請直接聯繫原文檔的維護者。如果你使用英文
+交流有困難的話，也可以向中文版維護者求助。如果本翻譯更新不及時或者翻
+譯存在問題，請聯繫中文版維護者::
+
+        中文版維護者： 李陽  Li Yang <leoyang.li@nxp.com>
+        中文版翻譯者： 李陽  Li Yang <leoyang.li@nxp.com>
+        中文版校譯者： 陳琦 Maggie Chen <chenqi@beyondsoft.com>
+                       王聰 Wang Cong <xiyou.wangcong@gmail.com>
+                       張巍 Zhang Wei <wezhang@outlook.com>
+                       胡皓文 Hu Haowen <src.res@email.cn>
+
+如何向 Linux 內核提交驅動程序
+=============================
+
+這篇文檔將會解釋如何向不同的內核源碼樹提交設備驅動程序。請注意，如果你感
+興趣的是顯卡驅動程序，你也許應該訪問 XFree86 項目(https://www.xfree86.org/)
+和／或 X.org 項目 (https://x.org)。
+
+另請參閱 Documentation/translations/zh_TW/process/submitting-patches.rst 文檔。
+
+
+分配設備號
+----------
+
+塊設備和字符設備的主設備號與從設備號是由 Linux 命名編號分配權威 LANANA（
+現在是 Torben Mathiasen）負責分配。申請的網址是 https://www.lanana.org/。
+即使不準備提交到主流內核的設備驅動也需要在這裡分配設備號。有關詳細信息，
+請參閱 Documentation/admin-guide/devices.rst。
+
+如果你使用的不是已經分配的設備號，那麼當你提交設備驅動的時候，它將會被強
+制分配一個新的設備號，即便這個設備號和你之前發給客戶的截然不同。
+
+設備驅動的提交對象
+------------------
+
+Linux 2.0:
+	此內核源碼樹不接受新的驅動程序。
+
+Linux 2.2:
+	此內核源碼樹不接受新的驅動程序。
+
+Linux 2.4:
+	如果所屬的代碼領域在內核的 MAINTAINERS 文件中列有一個總維護者，
+	那麼請將驅動程序提交給他。如果此維護者沒有回應或者你找不到恰當的
+	維護者，那麼請聯繫 Willy Tarreau <w@1wt.eu>。
+
+Linux 2.6:
+	除了遵循和 2.4 版內核同樣的規則外，你還需要在 linux-kernel 郵件
+	列表上跟蹤最新的 API 變化。向 Linux 2.6 內核提交驅動的頂級聯繫人
+	是 Andrew Morton <akpm@linux-foundation.org>。
+
+決定設備驅動能否被接受的條件
+----------------------------
+
+許可：		代碼必須使用 GNU 通用公開許可證 (GPL) 提交給 Linux，但是
+		我們並不要求 GPL 是唯一的許可。你或許會希望同時使用多種
+		許可證發布，如果希望驅動程序可以被其他開源社區（比如BSD）
+		使用。請參考 include/linux/module.h 文件中所列出的可被
+		接受共存的許可。
+
+版權：		版權所有者必須同意使用 GPL 許可。最好提交者和版權所有者
+		是相同個人或實體。否則，必需列出授權使用 GPL 的版權所有
+		人或實體，以備驗證之需。
+
+接口：		如果你的驅動程序使用現成的接口並且和其他同類的驅動程序行
+		爲相似，而不是去發明無謂的新接口，那麼它將會更容易被接受。
+		如果你需要一個 Linux 和 NT 的通用驅動接口，那麼請在用
+		戶空間實現它。
+
+代碼：		請使用 Documentation/process/coding-style.rst 中所描述的 Linux 代碼風
+		格。如果你的某些代碼段（例如那些與 Windows 驅動程序包共
+		享的代碼段）需要使用其他格式，而你卻只希望維護一份代碼，
+		那麼請將它們很好地區分出來，並且註明原因。
+
+可移植性：	請注意，指針並不永遠是 32 位的，不是所有的計算機都使用小
+		尾模式 (little endian) 存儲數據，不是所有的人都擁有浮點
+		單元，不要隨便在你的驅動程序里嵌入 x86 彙編指令。只能在
+		x86 上運行的驅動程序一般是不受歡迎的。雖然你可能只有 x86
+		硬體，很難測試驅動程序在其他平台上是否可用，但是確保代碼
+		可以被輕鬆地移植卻是很簡單的。
+
+清晰度：	做到所有人都能修補這個驅動程序將會很有好處，因爲這樣你將
+		會直接收到修復的補丁而不是 bug 報告。如果你提交一個試圖
+		隱藏硬體工作機理的驅動程序，那麼它將會被扔進廢紙簍。
+
+電源管理：	因爲 Linux 正在被很多行動裝置和桌面系統使用，所以你的驅
+		動程序也很有可能被使用在這些設備上。它應該支持最基本的電
+		源管理，即在需要的情況下實現系統級休眠和喚醒要用到的
+		.suspend 和 .resume 函數。你應該檢查你的驅動程序是否能正
+		確地處理休眠與喚醒，如果實在無法確認，請至少把 .suspend
+		函數定義成返回 -ENOSYS（功能未實現）錯誤。你還應該嘗試確
+		保你的驅動在什麼都不乾的情況下將耗電降到最低。要獲得驅動
+		程序測試的指導，請參閱
+		Documentation/power/drivers-testing.rst。有關驅動程序電
+		源管理問題相對全面的概述，請參閱
+		Documentation/driver-api/pm/devices.rst。
+
+管理：		如果一個驅動程序的作者還在進行有效的維護，那麼通常除了那
+		些明顯正確且不需要任何檢查的補丁以外，其他所有的補丁都會
+		被轉發給作者。如果你希望成爲驅動程序的聯繫人和更新者，最
+		好在代碼注釋中寫明並且在 MAINTAINERS 文件中加入這個驅動
+		程序的條目。
+
+不影響設備驅動能否被接受的條件
+------------------------------
+
+供應商：	由硬體供應商來維護驅動程序通常是一件好事。不過，如果源碼
+		樹里已經有其他人提供了可穩定工作的驅動程序，那麼請不要期
+		望「我是供應商」會成爲內核改用你的驅動程序的理由。理想的情
+		況是：供應商與現有驅動程序的作者合作，構建一個統一完美的
+		驅動程序。
+
+作者：		驅動程序是由大的 Linux 公司研發還是由你個人編寫，並不影
+		響其是否能被內核接受。沒有人對內核源碼樹享有特權。只要你
+		充分了解內核社區，你就會發現這一點。
+
+
+資源列表
+--------
+
+Linux 內核主源碼樹：
+	ftp.??.kernel.org:/pub/linux/kernel/...
+	?? == 你的國家代碼，例如 "cn"、"us"、"uk"、"fr" 等等
+
+Linux 內核郵件列表：
+	linux-kernel@vger.kernel.org
+	[可通過向majordomo@vger.kernel.org發郵件來訂閱]
+
+Linux 設備驅動程序，第三版（探討 2.6.10 版內核）：
+	https://lwn.net/Kernel/LDD3/ （免費版）
+
+LWN.net:
+	每周內核開發活動摘要 - https://lwn.net/
+
+	2.6 版中 API 的變更：
+
+		https://lwn.net/Articles/2.6-kernel-api/
+
+	將舊版內核的驅動程序移植到 2.6 版：
+
+		https://lwn.net/Articles/driver-porting/
+
+內核新手(KernelNewbies):
+	爲新的內核開發者提供文檔和幫助
+	https://kernelnewbies.org/
+
+Linux USB項目：
+	http://www.linux-usb.org/
+
+寫內核驅動的「不要」（Arjan van de Ven著）:
+	http://www.fenrus.org/how-to-not-write-a-device-driver-paper.pdf
+
+內核清潔工 (Kernel Janitor):
+	https://kernelnewbies.org/KernelJanitors
+
diff --git a/Documentation/translations/zh_TW/process/submitting-patches.rst b/Documentation/translations/zh_TW/process/submitting-patches.rst
new file mode 100644
index 000000000000..cdf0b52e4a98
--- /dev/null
+++ b/Documentation/translations/zh_TW/process/submitting-patches.rst
@@ -0,0 +1,686 @@
+.. SPDX-License-Identifier: GPL-2.0
+
+.. _tw_submittingpatches:
+
+.. include:: ../disclaimer-zh_TW.rst
+
+:Original: :ref:`Documentation/process/submitting-patches.rst <submittingpatches>`
+
+譯者::
+
+        中文版維護者： 鍾宇 TripleX Chung <xxx.phy@gmail.com>
+        中文版翻譯者： 鍾宇 TripleX Chung <xxx.phy@gmail.com>
+                       時奎亮 Alex Shi <alex.shi@linux.alibaba.com>
+        中文版校譯者： 李陽 Li Yang <leoyang.li@nxp.com>
+                       王聰 Wang Cong <xiyou.wangcong@gmail.com>
+                       胡皓文 Hu Haowen <src.res@email.cn>
+
+
+如何讓你的改動進入內核
+======================
+
+對於想要將改動提交到 Linux 內核的個人或者公司來說，如果不熟悉「規矩」，
+提交的流程會讓人畏懼。本文檔收集了一系列建議，這些建議可以大大的提高你
+的改動被接受的機會.
+
+以下文檔含有大量簡潔的建議， 具體請見：
+:ref:`Documentation/process <development_process_main>`
+同樣，:ref:`Documentation/translations/zh_TW/process/submit-checklist.rst <tw_submitchecklist>`
+給出在提交代碼前需要檢查的項目的列表。如果你在提交一個驅動程序，那麼
+同時閱讀一下:
+:ref:`Documentation/process/submitting-drivers.rst <submittingdrivers>`
+
+其中許多步驟描述了Git版本控制系統的默認行爲；如果您使用Git來準備補丁，
+您將發現它爲您完成的大部分機械工作，儘管您仍然需要準備和記錄一組合理的
+補丁。一般來說，使用git將使您作爲內核開發人員的生活更輕鬆。
+
+
+0) 獲取當前源碼樹
+-----------------
+
+如果您沒有一個可以使用當前內核原始碼的存儲庫，請使用git獲取一個。您將要
+從主線存儲庫開始，它可以通過以下方式獲取::
+
+        git clone git://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
+
+但是，請注意，您可能不希望直接針對主線樹進行開發。大多數子系統維護人員運
+行自己的樹，並希望看到針對這些樹準備的補丁。請參見MAINTAINERS文件中子系
+統的 **T:** 項以查找該樹，或者簡單地詢問維護者該樹是否未在其中列出。
+
+仍然可以通過tarballs下載內核版本（如下一節所述），但這是進行內核開發的
+一種困難的方式。
+
+1) "diff -up"
+-------------
+
+使用 "diff -up" 或者 "diff -uprN" 來創建補丁。
+
+所有內核的改動，都是以補丁的形式呈現的，補丁由 diff(1) 生成。創建補丁的
+時候，要確認它是以 "unified diff" 格式創建的，這種格式由 diff(1) 的 '-u'
+參數生成。而且，請使用 '-p' 參數，那樣會顯示每個改動所在的C函數，使得
+產生的補丁容易讀得多。補丁應該基於內核原始碼樹的根目錄，而不是裡邊的任
+何子目錄。
+
+爲一個單獨的文件創建補丁，一般來說這樣做就夠了::
+
+        SRCTREE=linux
+        MYFILE=drivers/net/mydriver.c
+
+        cd $SRCTREE
+        cp $MYFILE $MYFILE.orig
+        vi $MYFILE      # make your change
+        cd ..
+        diff -up $SRCTREE/$MYFILE{.orig,} > /tmp/patch
+
+爲多個文件創建補丁，你可以解開一個沒有修改過的內核原始碼樹，然後和你自
+己的代碼樹之間做 diff 。例如::
+
+        MYSRC=/devel/linux
+
+        tar xvfz linux-3.19.tar.gz
+        mv linux-3.19 linux-3.19-vanilla
+        diff -uprN -X linux-3.19-vanilla/Documentation/dontdiff \
+                linux-3.19-vanilla $MYSRC > /tmp/patch
+
+"dontdiff" 是內核在編譯的時候產生的文件的列表，列表中的文件在 diff(1)
+產生的補丁里會被跳過。
+
+確定你的補丁里沒有包含任何不屬於這次補丁提交的額外文件。記得在用diff(1)
+生成補丁之後，審閱一次補丁，以確保準確。
+
+如果你的改動很散亂，你應該研究一下如何將補丁分割成獨立的部分，將改動分
+割成一系列合乎邏輯的步驟。這樣更容易讓其他內核開發者審核，如果你想你的
+補丁被接受，這是很重要的。請參閱：
+:ref:`tw_split_changes`
+
+如果你用 ``git`` , ``git rebase -i`` 可以幫助你這一點。如果你不用 ``git``,
+``quilt`` <https://savannah.nongnu.org/projects/quilt> 另外一個流行的選擇。
+
+.. _tw_describe_changes:
+
+2) 描述你的改動
+---------------
+
+描述你的問題。無論您的補丁是一行錯誤修復還是5000行新功能，都必須有一個潛在
+的問題激勵您完成這項工作。讓審稿人相信有一個問題值得解決，讓他們讀完第一段
+是有意義的。
+
+描述用戶可見的影響。直接崩潰和鎖定是相當有說服力的，但並不是所有的錯誤都那麼
+明目張胆。即使在代碼審查期間發現了這個問題，也要描述一下您認爲它可能對用戶產
+生的影響。請記住，大多數Linux安裝運行的內核來自二級穩定樹或特定於供應商/產品
+的樹，只從上游精選特定的補丁，因此請包含任何可以幫助您將更改定位到下游的內容：
+觸發的場景、DMESG的摘錄、崩潰描述、性能回歸、延遲尖峯、鎖定等。
+
+量化優化和權衡。如果您聲稱在性能、內存消耗、堆棧占用空間或二進位大小方面有所
+改進，請包括支持它們的數字。但也要描述不明顯的成本。優化通常不是免費的，而是
+在CPU、內存和可讀性之間進行權衡；或者，探索性的工作，在不同的工作負載之間進
+行權衡。請描述優化的預期缺點，以便審閱者可以權衡成本和收益。
+
+一旦問題建立起來，就要詳細地描述一下您實際在做什麼。對於審閱者來說，用簡單的
+英語描述代碼的變化是很重要的，以驗證代碼的行爲是否符合您的意願。
+
+如果您將補丁描述寫在一個表單中，這個表單可以很容易地作爲「提交日誌」放入Linux
+的原始碼管理系統git中，那麼維護人員將非常感謝您。見 :ref:`tw_explicit_in_reply_to`.
+
+每個補丁只解決一個問題。如果你的描述開始變長，這就表明你可能需要拆分你的補丁。
+請見 :ref:`tw_split_changes`
+
+提交或重新提交修補程序或修補程序系列時，請包括完整的修補程序說明和理由。不要
+只說這是補丁（系列）的第幾版。不要期望子系統維護人員引用更早的補丁版本或引用
+URL來查找補丁描述並將其放入補丁中。也就是說，補丁（系列）及其描述應該是獨立的。
+這對維護人員和審查人員都有好處。一些評審者可能甚至沒有收到補丁的早期版本。
+
+描述你在命令語氣中的變化，例如「make xyzzy do frotz」而不是「[這個補丁]make
+xyzzy do frotz」或「[我]changed xyzzy to do frotz」，就好像你在命令代碼庫改變
+它的行爲一樣。
+
+如果修補程序修復了一個記錄的bug條目，請按編號和URL引用該bug條目。如果補丁來
+自郵件列表討論，請給出郵件列表存檔的URL；使用帶有 ``Message-ID`` 的
+https://lkml.kernel.org/ 重定向，以確保連結不會過時。
+
+但是，在沒有外部資源的情況下，儘量讓你的解釋可理解。除了提供郵件列表存檔或
+bug的URL之外，還要總結需要提交補丁的相關討論要點。
+
+如果您想要引用一個特定的提交，不要只引用提交的 SHA-1 ID。還請包括提交的一行
+摘要，以便於審閱者了解它是關於什麼的。例如::
+
+        Commit e21d2170f36602ae2708 ("video: remove unnecessary
+        platform_set_drvdata()") removed the unnecessary
+        platform_set_drvdata(), but left the variable "dev" unused,
+        delete it.
+
+您還應該確保至少使用前12位 SHA-1 ID. 內核存儲庫包含*許多*對象，使與較短的ID
+發生衝突的可能性很大。記住，即使現在不會與您的六個字符ID發生衝突，這種情況
+可能五年後改變。
+
+如果修補程序修復了特定提交中的錯誤，例如，使用 ``git bisct`` ，請使用帶有前
+12個字符SHA-1 ID 的"Fixes:"標記和單行摘要。爲了簡化不要將標記拆分爲多個，
+行、標記不受分析腳本「75列換行」規則的限制。例如::
+
+        Fixes: 54a4f0239f2e ("KVM: MMU: make kvm_mmu_zap_page() return the number of pages it actually freed")
+
+下列 ``git config`` 設置可以添加讓 ``git log``, ``git show`` 漂亮的顯示格式::
+
+	[core]
+		abbrev = 12
+	[pretty]
+		fixes = Fixes: %h (\"%s\")
+
+.. _tw_split_changes:
+
+3) 拆分你的改動
+---------------
+
+將每個邏輯更改分隔成一個單獨的補丁。
+
+例如，如果你的改動里同時有bug修正和性能優化，那麼把這些改動拆分到兩個或
+者更多的補丁文件中。如果你的改動包含對API的修改，並且修改了驅動程序來適
+應這些新的API，那麼把這些修改分成兩個補丁。
+
+另一方面，如果你將一個單獨的改動做成多個補丁文件，那麼將它們合併成一個
+單獨的補丁文件。這樣一個邏輯上單獨的改動只被包含在一個補丁文件里。
+
+如果有一個補丁依賴另外一個補丁來完成它的改動，那沒問題。簡單的在你的補
+丁描述里指出「這個補丁依賴某補丁」就好了。
+
+在將您的更改劃分爲一系列補丁時，要特別注意確保內核在系列中的每個補丁之後
+都能正常構建和運行。使用 ``git bisect`` 來追蹤問題的開發者可能會在任何時
+候分割你的補丁系列；如果你在中間引入錯誤，他們不會感謝你。
+
+如果你不能將補丁濃縮成更少的文件，那麼每次大約發送出15個，然後等待審查
+和集成。
+
+4) 檢查你的更改風格
+-------------------
+
+檢查您的補丁是否存在基本樣式衝突，詳細信息可在
+:ref:`Documentation/translations/zh_TW/process/coding-style.rst <tw_codingstyle>`
+中找到。如果不這樣做，只會浪費審稿人的時間，並且會導致你的補丁被拒絕，甚至
+可能沒有被閱讀。
+
+一個重要的例外是在將代碼從一個文件移動到另一個文件時——在這種情況下，您不應
+該在移動代碼的同一個補丁中修改移動的代碼。這清楚地描述了移動代碼和您的更改
+的行爲。這大大有助於審查實際差異，並允許工具更好地跟蹤代碼本身的歷史。
+
+在提交之前，使用補丁樣式檢查程序檢查補丁（scripts/check patch.pl）。不過，
+請注意，樣式檢查程序應該被視爲一個指南，而不是作爲人類判斷的替代品。如果您
+的代碼看起來更好，但有違規行爲，那麼最好不要使用它。
+
+檢查者報告三個級別：
+
+ - ERROR：很可能出錯的事情
+ - WARNING：需要仔細審查的事項
+ - CHECK：需要思考的事情
+
+您應該能夠判斷您的補丁中存在的所有違規行爲。
+
+5) 選擇補丁收件人
+-----------------
+
+您應該總是在任何補丁上複製相應的子系統維護人員，以獲得他們維護的代碼；查看
+維護人員文件和原始碼修訂歷史記錄，以了解這些維護人員是誰。腳本
+scripts/get_Maintainer.pl在這個步驟中非常有用。如果您找不到正在工作的子系統
+的維護人員，那麼Andrew Morton（akpm@linux-foundation.org）將充當最後的維護
+人員。
+
+您通常還應該選擇至少一個郵件列表來接收補丁集的。linux-kernel@vger.kernel.org
+作爲最後一個解決辦法的列表，但是這個列表上的體積已經引起了許多開發人員的拒絕。
+在MAINTAINERS文件中查找子系統特定的列表；您的補丁可能會在那裡得到更多的關注。
+不過，請不要發送垃圾郵件到無關的列表。
+
+許多與內核相關的列表託管在vger.kernel.org上；您可以在
+http://vger.kernel.org/vger-lists.html 上找到它們的列表。不過，也有與內核相關
+的列表託管在其他地方。
+
+不要一次發送超過15個補丁到vger郵件列表！！！！
+
+Linus Torvalds 是決定改動能否進入 Linux 內核的最終裁決者。他的 e-mail
+地址是 <torvalds@linux-foundation.org> 。他收到的 e-mail 很多，所以一般
+的說，最好別給他發 e-mail。
+
+如果您有修復可利用安全漏洞的補丁，請將該補丁發送到 security@kernel.org。對於
+嚴重的bug，可以考慮短期暫停以允許分銷商向用戶發布補丁；在這種情況下，顯然不應
+將補丁發送到任何公共列表。
+
+修復已發布內核中嚴重錯誤的補丁程序應該指向穩定版維護人員，方法是放這樣的一行::
+
+        Cc: stable@vger.kernel.org
+
+進入補丁的簽准區（注意，不是電子郵件收件人）。除了這個文件之外，您還應該閱讀
+:ref:`Documentation/process/stable-kernel-rules.rst <stable_kernel_rules>`
+
+但是，請注意，一些子系統維護人員希望得出他們自己的結論，即哪些補丁應該被放到
+穩定的樹上。尤其是網絡維護人員，不希望看到單個開發人員在補丁中添加像上面這樣
+的行。
+
+如果更改影響到用戶和內核接口，請向手冊頁維護人員（如維護人員文件中所列）發送
+手冊頁補丁，或至少發送更改通知，以便一些信息進入手冊頁。還應將用戶空間API
+更改複製到 linux-api@vger.kernel.org。
+
+對於小的補丁，你也許會CC到搜集瑣碎補丁的郵件列表(Trivial Patch Monkey)
+trivial@kernel.org，那裡專門收集瑣碎的補丁。下面這樣的補丁會被看作「瑣碎的」
+補丁：
+
+ - 文檔的拼寫修正。
+ - 修正會影響到 grep(1) 的拼寫。
+ - 警告信息修正(頻繁的列印無用的警告是不好的。)
+ - 編譯錯誤修正（代碼邏輯的確是對的，只是編譯有問題。）
+ - 運行時修正（只要真的修正了錯誤。）
+ - 移除使用了被廢棄的函數/宏的代碼(例如 check_region。)
+ - 聯繫方式和文檔修正。
+ - 用可移植的代碼替換不可移植的代碼（即使在體系結構相關的代碼中，既然有
+ - 人拷貝，只要它是瑣碎的）
+ - 任何文件的作者/維護者對該文件的改動（例如 patch monkey 在重傳模式下）
+
+(譯註，關於「瑣碎補丁」的一些說明：因爲原文的這一部分寫得比較簡單，所以不得不
+違例寫一下譯註。"trivial"這個英文單詞的本意是「瑣碎的，不重要的。」但是在這裡
+有稍微有一些變化，例如對一些明顯的NULL指針的修正，屬於運行時修正，會被歸類
+到瑣碎補丁里。雖然NULL指針的修正很重要，但是這樣的修正往往很小而且很容易得到
+檢驗，所以也被歸入瑣碎補丁。瑣碎補丁更精確的歸類應該是
+「simple, localized & easy to verify」，也就是說簡單的，局部的和易於檢驗的。
+trivial@kernel.org郵件列表的目的是針對這樣的補丁，爲提交者提供一個中心，來
+降低提交的門檻。)
+
+6) 沒有 MIME 編碼，沒有連結，沒有壓縮，沒有附件，只有純文本
+-----------------------------------------------------------
+
+Linus 和其他的內核開發者需要閱讀和評論你提交的改動。對於內核開發者來說
+，可以「引用」你的改動很重要，使用一般的 e-mail 工具，他們就可以在你的
+代碼的任何位置添加評論。
+
+因爲這個原因，所有的提交的補丁都是 e-mail 中「內嵌」的。
+
+.. warning::
+   如果你使用剪切-粘貼你的補丁，小心你的編輯器的自動換行功能破壞你的補丁
+
+不要將補丁作爲 MIME 編碼的附件，不管是否壓縮。很多流行的 e-mail 軟體不
+是任何時候都將 MIME 編碼的附件當作純文本發送的，這會使得別人無法在你的
+代碼中加評論。另外，MIME 編碼的附件會讓 Linus 多花一點時間來處理，這就
+降低了你的改動被接受的可能性。
+
+例外：如果你的郵遞員弄壞了補丁，那麼有人可能會要求你使用mime重新發送補丁
+
+請參閱 :ref:`Documentation/translations/zh_TW/process/email-clients.rst <tw_email_clients>`
+以獲取有關配置電子郵件客戶端以使其不受影響地發送修補程序的提示。
+
+7) e-mail 的大小
+----------------
+
+大的改動對郵件列表不合適，對某些維護者也不合適。如果你的補丁，在不壓縮
+的情況下，超過了300kB，那麼你最好將補丁放在一個能通過 internet 訪問的服
+務器上，然後用指向你的補丁的 URL 替代。但是請注意，如果您的補丁超過了
+300kb，那麼它幾乎肯定需要被破壞。
+
+8）回複評審意見
+---------------
+
+你的補丁幾乎肯定會得到評審者對補丁改進方法的評論。您必須對這些評論作出
+回應；讓補丁被忽略的一個好辦法就是忽略審閱者的意見。不會導致代碼更改的
+意見或問題幾乎肯定會帶來注釋或變更日誌的改變，以便下一個評審者更好地了解
+正在發生的事情。
+
+一定要告訴審稿人你在做什麼改變，並感謝他們的時間。代碼審查是一個累人且
+耗時的過程，審查人員有時會變得暴躁。即使在這種情況下，也要禮貌地回應並
+解決他們指出的問題。
+
+9）不要洩氣或不耐煩
+-------------------
+
+提交更改後，請耐心等待。審閱者是忙碌的人，可能無法立即訪問您的修補程序。
+
+曾幾何時，補丁曾在沒有評論的情況下消失在空白中，但開發過程比現在更加順利。
+您應該在一周左右的時間內收到評論；如果沒有收到評論，請確保您已將補丁發送
+到正確的位置。在重新提交或聯繫審閱者之前至少等待一周-在諸如合併窗口之類的
+繁忙時間可能更長。
+
+10）主題中包含 PATCH
+--------------------
+
+由於到linus和linux內核的電子郵件流量很高，通常會在主題行前面加上[PATCH]
+前綴. 這使Linus和其他內核開發人員更容易將補丁與其他電子郵件討論區分開。
+
+11）簽署你的作品-開發者原始認證
+-------------------------------
+
+爲了加強對誰做了何事的追蹤，尤其是對那些透過好幾層的維護者的補丁，我們
+建議在發送出去的補丁上加一個 「sign-off」 的過程。
+
+"sign-off" 是在補丁的注釋的最後的簡單的一行文字，認證你編寫了它或者其他
+人有權力將它作爲開放原始碼的補丁傳遞。規則很簡單：如果你能認證如下信息:
+
+開發者來源證書 1.1
+^^^^^^^^^^^^^^^^^^
+
+對於本項目的貢獻，我認證如下信息：
+
+      （a）這些貢獻是完全或者部分的由我創建，我有權利以文件中指出
+           的開放原始碼許可證提交它；或者
+      （b）這些貢獻基於以前的工作，據我所知，這些以前的工作受恰當的開放
+           原始碼許可證保護，而且，根據許可證，我有權提交修改後的貢獻，
+           無論是完全還是部分由我創造，這些貢獻都使用同一個開放原始碼許可證
+           （除非我被允許用其它的許可證），正如文件中指出的；或者
+      （c）這些貢獻由認證（a），（b）或者（c）的人直接提供給我，而
+           且我沒有修改它。
+      （d）我理解並同意這個項目和貢獻是公開的，貢獻的記錄（包括我
+           一起提交的個人記錄，包括 sign-off ）被永久維護並且可以和這個項目
+           或者開放原始碼的許可證同步地再發行。
+
+那麼加入這樣一行::
+
+       Signed-off-by: Random J Developer <random@developer.example.org>
+
+使用你的真名（抱歉，不能使用假名或者匿名。）
+
+有人在最後加上標籤。現在這些東西會被忽略，但是你可以這樣做，來標記公司
+內部的過程，或者只是指出關於 sign-off 的一些特殊細節。
+
+如果您是子系統或分支維護人員，有時需要稍微修改收到的補丁，以便合併它們，
+因爲樹和提交者中的代碼不完全相同。如果你嚴格遵守規則（c），你應該要求提交者
+重新發布，但這完全是在浪費時間和精力。規則（b）允許您調整代碼，但是更改一個
+提交者的代碼並讓他認可您的錯誤是非常不禮貌的。要解決此問題，建議在最後一個
+由簽名行和您的行之間添加一行，指示更改的性質。雖然這並不是強制性的，但似乎
+在描述前加上您的郵件和/或姓名（全部用方括號括起來），這足以讓人注意到您對最
+後一分鐘的更改負有責任。例如::
+
+	Signed-off-by: Random J Developer <random@developer.example.org>
+	[lucky@maintainer.example.org: struct foo moved from foo.c to foo.h]
+	Signed-off-by: Lucky K Maintainer <lucky@maintainer.example.org>
+
+如果您維護一個穩定的分支機構，同時希望對作者進行致謝、跟蹤更改、合併修復並
+保護提交者不受投訴，那麼這種做法尤其有用。請注意，在任何情況下都不能更改作者
+的ID（From 頭），因爲它是出現在更改日誌中的標識。
+
+對回合（back-porters）的特別說明：在提交消息的頂部（主題行之後）插入一個補丁
+的起源指示似乎是一種常見且有用的實踐，以便於跟蹤。例如，下面是我們在3.x穩定
+版本中看到的內容::
+
+  Date:   Tue Oct 7 07:26:38 2014 -0400
+
+    libata: Un-break ATA blacklist
+
+    commit 1c40279960bcd7d52dbdf1d466b20d24b99176c8 upstream.
+
+還有， 這裡是一個舊版內核中的一個回合補丁::
+
+    Date:   Tue May 13 22:12:27 2008 +0200
+
+        wireless, airo: waitbusy() won't delay
+
+        [backport of 2.6 commit b7acbdfbd1f277c1eb23f344f899cfa4cd0bf36a]
+
+12）何時使用Acked-by:，CC:，和Co-Developed by:
+----------------------------------------------
+
+Singed-off-by: 標記表示簽名者參與了補丁的開發，或者他/她在補丁的傳遞路徑中。
+
+如果一個人沒有直接參與補丁的準備或處理，但希望表示並記錄他們對補丁的批准，
+那麼他們可以要求在補丁的變更日誌中添加一個 Acked-by:
+
+Acked-by：通常由受影響代碼的維護者使用，當該維護者既沒有貢獻也沒有轉發補丁時。
+
+Acked-by: 不像簽字人那樣正式。這是一個記錄，確認人至少審查了補丁，並表示接受。
+因此，補丁合併有時會手動將Acker的「Yep，looks good to me」轉換爲 Acked-By：（但
+請注意，通常最好要求一個明確的Ack）。
+
+Acked-by：不一定表示對整個補丁的確認。例如，如果一個補丁影響多個子系統，並且
+有一個：來自一個子系統維護者，那麼這通常表示只確認影響維護者代碼的部分。這裡
+應該仔細判斷。如有疑問，應參考郵件列表檔案中的原始討論。
+
+如果某人有機會對補丁進行評論，但沒有提供此類評論，您可以選擇在補丁中添加 ``Cc:``
+這是唯一一個標籤，它可以在沒有被它命名的人顯式操作的情況下添加，但它應該表明
+這個人是在補丁上抄送的。討論中包含了潛在利益相關方。
+
+Co-developed-by: 聲明補丁是由多個開發人員共同創建的；當幾個人在一個補丁上工
+作時，它用於將屬性賦予共同作者（除了 From: 所賦予的作者之外）。因爲
+Co-developed-by: 表示作者身份，所以每個共同開發人：必須緊跟在相關合作作者的
+簽名之後。標準的簽核程序要求：標記的簽核順序應儘可能反映補丁的時間歷史，而不
+管作者是通過 From ：還是由 Co-developed-by: 共同開發的。值得注意的是，最後一
+個簽字人：必須始終是提交補丁的開發人員。
+
+注意，當作者也是電子郵件標題「發件人：」行中列出的人時，「From: 」 標記是可選的。
+
+作者提交的補丁程序示例::
+
+	<changelog>
+
+	Co-developed-by: First Co-Author <first@coauthor.example.org>
+	Signed-off-by: First Co-Author <first@coauthor.example.org>
+	Co-developed-by: Second Co-Author <second@coauthor.example.org>
+	Signed-off-by: Second Co-Author <second@coauthor.example.org>
+	Signed-off-by: From Author <from@author.example.org>
+
+合作開發者提交的補丁示例::
+
+	From: From Author <from@author.example.org>
+
+	<changelog>
+
+	Co-developed-by: Random Co-Author <random@coauthor.example.org>
+	Signed-off-by: Random Co-Author <random@coauthor.example.org>
+	Signed-off-by: From Author <from@author.example.org>
+	Co-developed-by: Submitting Co-Author <sub@coauthor.example.org>
+	Signed-off-by: Submitting Co-Author <sub@coauthor.example.org>
+
+
+13）使用報告人：、測試人：、審核人：、建議人：、修復人：
+--------------------------------------------------------
+
+Reported-by: 給那些發現錯誤並報告錯誤的人致謝，它希望激勵他們在將來再次幫助
+我們。請注意，如果bug是以私有方式報告的，那麼在使用Reported-by標記之前，請
+先請求權限。
+
+Tested-by: 標記表示補丁已由指定的人（在某些環境中）成功測試。這個標籤通知
+維護人員已經執行了一些測試，爲將來的補丁提供了一種定位測試人員的方法，並確
+保測試人員的信譽。
+
+Reviewed-by：相反，根據審查人的聲明，表明該補丁已被審查並被認爲是可接受的：
+
+
+審查人的監督聲明
+^^^^^^^^^^^^^^^^
+
+通過提供我的 Reviewed-by，我聲明：
+
+        (a) 我已經對這個補丁進行了一次技術審查，以評估它是否適合被包含到
+            主線內核中。
+
+        (b) 與補丁相關的任何問題、顧慮或問題都已反饋給提交者。我對提交者對
+            我的評論的回應感到滿意。
+
+        (c) 雖然這一提交可能會改進一些東西，但我相信，此時，（1）對內核
+            進行了有價值的修改，（2）沒有包含爭論中涉及的已知問題。
+
+        (d) 雖然我已經審查了補丁並認爲它是健全的，但我不會（除非另有明確
+            說明）作出任何保證或保證它將在任何給定情況下實現其規定的目的
+            或正常運行。
+
+Reviewed-by 是一種觀點聲明，即補丁是對內核的適當修改，沒有任何遺留的嚴重技術
+問題。任何感興趣的審閱者（完成工作的人）都可以爲一個補丁提供一個 Review-by
+標籤。此標籤用於向審閱者提供致謝，並通知維護者已在修補程序上完成的審閱程度。
+Reviewed-by: 當由已知了解主題區域並執行徹底檢查的審閱者提供時，通常會增加
+補丁進入內核的可能性。
+
+Suggested-by: 表示補丁的想法是由指定的人提出的，並確保將此想法歸功於指定的
+人。請注意，未經許可，不得添加此標籤，特別是如果該想法未在公共論壇上發布。
+這就是說，如果我們勤快地致謝我們的創意者，他們很有希望在未來得到鼓舞，再次
+幫助我們。
+
+Fixes: 指示補丁在以前的提交中修復了一個問題。它可以很容易地確定錯誤的來源，
+這有助於檢查錯誤修復。這個標記還幫助穩定內核團隊確定應該接收修復的穩定內核
+版本。這是指示補丁修復的錯誤的首選方法。請參閱 :ref:`tw_describe_changes`
+描述您的更改以了解更多詳細信息。
+
+.. _tw_the_canonical_patch_format:
+
+12）標準補丁格式
+----------------
+
+本節描述如何格式化補丁本身。請注意，如果您的補丁存儲在 ``Git`` 存儲庫中，則
+可以使用 ``git format-patch`` 進行正確的補丁格式設置。但是，這些工具無法創建
+必要的文本，因此請務必閱讀下面的說明。
+
+標準的補丁，標題行是::
+
+    Subject: [PATCH 001/123] 子系統:一句話概述
+
+標準補丁的信體存在如下部分：
+
+  - 一個 "from" 行指出補丁作者。後跟空行（僅當發送修補程序的人不是作者時才需要）。
+
+  - 解釋的正文，行以75列包裝，這將被複製到永久變更日誌來描述這個補丁。
+
+  - 一個空行
+
+  - 上面描述的「Signed-off-by」 行，也將出現在更改日誌中。
+
+  - 只包含 ``---`` 的標記線。
+
+  - 任何其他不適合放在變更日誌的注釋。
+
+  - 實際補丁（ ``diff`` 輸出）。
+
+標題行的格式，使得對標題行按字母序排序非常的容易 - 很多 e-mail 客戶端都
+可以支持 - 因爲序列號是用零填充的，所以按數字排序和按字母排序是一樣的。
+
+e-mail 標題中的「子系統」標識哪個內核子系統將被打補丁。
+
+e-mail 標題中的「一句話概述」扼要的描述 e-mail 中的補丁。「一句話概述」
+不應該是一個文件名。對於一個補丁系列（「補丁系列」指一系列的多個相關補
+丁），不要對每個補丁都使用同樣的「一句話概述」。
+
+記住 e-mail 的「一句話概述」會成爲該補丁的全局唯一標識。它會蔓延到 git
+的改動記錄里。然後「一句話概述」會被用在開發者的討論里，用來指代這個補
+丁。用戶將希望通過 google 來搜索"一句話概述"來找到那些討論這個補丁的文
+章。當人們在兩三個月後使用諸如 ``gitk`` 或 ``git log --oneline`` 之類
+的工具查看數千個補丁時，也會很快看到它。
+
+出於這些原因，概述必須不超過70-75個字符，並且必須描述補丁的更改以及爲
+什麼需要補丁。既要簡潔又要描述性很有挑戰性，但寫得好的概述應該這樣做。
+
+概述的前綴可以用方括號括起來：「Subject: [PATCH <tag>...] <概述>」。標記
+不被視爲概述的一部分，而是描述應該如何處理補丁。如果補丁的多個版本已發
+送出來以響應評審（即「v1，v2，v3」）或「rfc」，以指示評審請求，那麼通用標記
+可能包括版本描述符。如果一個補丁系列中有四個補丁，那麼各個補丁可以這樣
+編號：1/4、2/4、3/4、4/4。這可以確保開發人員了解補丁應用的順序，並且他們
+已經查看或應用了補丁系列中的所有補丁。
+
+一些標題的例子::
+
+    Subject: [patch 2/5] ext2: improve scalability of bitmap searching
+    Subject: [PATCHv2 001/207] x86: fix eflags tracking
+
+"From" 行是信體裡的最上面一行，具有如下格式：
+        From: Patch Author <author@example.com>
+
+"From" 行指明在永久改動日誌里，誰會被確認爲作者。如果沒有 "From" 行，那
+麼郵件頭裡的 "From: " 行會被用來決定改動日誌中的作者。
+
+說明的主題將會被提交到永久的原始碼改動日誌里，因此對那些早已經不記得和
+這個補丁相關的討論細節的有能力的讀者來說，是有意義的。包括補丁程序定位
+錯誤的（內核日誌消息、OOPS消息等）症狀，對於搜索提交日誌以尋找適用補丁的人
+尤其有用。如果一個補丁修復了一個編譯失敗，那麼可能不需要包含所有編譯失敗；
+只要足夠讓搜索補丁的人能夠找到它就行了。與概述一樣，既要簡潔又要描述性。
+
+"---" 標記行對於補丁處理工具要找到哪裡是改動日誌信息的結束，是不可缺少
+的。
+
+對於 "---" 標記之後的額外註解，一個好的用途就是用來寫 diffstat，用來顯
+示修改了什麼文件和每個文件都增加和刪除了多少行。diffstat 對於比較大的補
+丁特別有用。其餘那些只是和時刻或者開發者相關的註解，不合適放到永久的改
+動日誌里的，也應該放這裡。
+使用 diffstat的選項 "-p 1 -w 70" 這樣文件名就會從內核原始碼樹的目錄開始
+，不會占用太寬的空間（很容易適合80列的寬度，也許會有一些縮進。）
+
+在後面的參考資料中能看到適當的補丁格式的更多細節。
+
+.. _tw_explicit_in_reply_to:
+
+15) 明確回覆郵件頭(In-Reply-To)
+-------------------------------
+
+手動添加回復補丁的的標題頭(In-Reply_To:) 是有幫助的（例如，使用 ``git send-email`` ）
+將補丁與以前的相關討論關聯起來，例如，將bug修復程序連結到電子郵件和bug報告。
+但是，對於多補丁系列，最好避免在回復時使用連結到該系列的舊版本。這樣，
+補丁的多個版本就不會成爲電子郵件客戶端中無法管理的引用序列。如果連結有用，
+可以使用 https://lkml.kernel.org/ 重定向器（例如，在封面電子郵件文本中）
+連結到補丁系列的早期版本。
+
+16) 發送git pull請求
+--------------------
+
+如果您有一系列補丁，那麼讓維護人員通過git pull操作將它們直接拉入子系統存儲
+庫可能是最方便的。但是，請注意，從開發人員那裡獲取補丁比從郵件列表中獲取補
+丁需要更高的信任度。因此，許多子系統維護人員不願意接受請求，特別是來自新的
+未知開發人員的請求。如果有疑問，您可以在封面郵件中使用pull 請求作爲補丁系列
+正常發布的一個選項，讓維護人員可以選擇使用其中之一。
+
+pull 請求的主題行中應該有[Git Pull]。請求本身應該在一行中包含存儲庫名稱和
+感興趣的分支；它應該看起來像::
+
+  Please pull from
+
+      git://jdelvare.pck.nerim.net/jdelvare-2.6 i2c-for-linus
+
+  to get these changes:
+
+
+pull 請求還應該包含一條整體消息，說明請求中將包含什麼，一個補丁本身的 ``Git shortlog``
+以及一個顯示補丁系列整體效果的 ``diffstat`` 。當然，將所有這些信息收集在一起
+的最簡單方法是讓 ``git`` 使用 ``git request-pull`` 命令爲您完成這些工作。
+
+一些維護人員（包括Linus）希望看到來自已簽名提交的請求；這增加了他們對你的
+請求信心。特別是，在沒有簽名標籤的情況下，Linus 不會從像 Github 這樣的公共
+託管站點拉請求。
+
+創建此類簽名的第一步是生成一個 GNRPG 密鑰，並由一個或多個核心內核開發人員對
+其進行簽名。這一步對新開發人員來說可能很困難，但沒有辦法繞過它。參加會議是
+找到可以簽署您的密鑰的開發人員的好方法。
+
+一旦您在Git 中準備了一個您希望有人拉的補丁系列，就用 ``git tag -s`` 創建一
+個簽名標記。這將創建一個新標記，標識該系列中的最後一次提交，並包含用您的私
+鑰創建的簽名。您還可以將changelog樣式的消息添加到標記中；這是一個描述拉請求
+整體效果的理想位置。
+
+如果維護人員將要從中提取的樹不是您正在使用的存儲庫，請不要忘記將已簽名的標記
+顯式推送到公共樹。
+
+生成拉請求時，請使用已簽名的標記作爲目標。這樣的命令可以實現::
+
+  git request-pull master git://my.public.tree/linux.git my-signed-tag
+
+參考文獻
+--------
+
+Andrew Morton, "The perfect patch" (tpp).
+  <https://www.ozlabs.org/~akpm/stuff/tpp.txt>
+
+Jeff Garzik, "Linux kernel patch submission format".
+  <https://web.archive.org/web/20180829112450/http://linux.yyz.us/patch-format.html>
+
+Greg Kroah-Hartman, "How to piss off a kernel subsystem maintainer".
+  <http://www.kroah.com/log/linux/maintainer.html>
+
+  <http://www.kroah.com/log/linux/maintainer-02.html>
+
+  <http://www.kroah.com/log/linux/maintainer-03.html>
+
+  <http://www.kroah.com/log/linux/maintainer-04.html>
+
+  <http://www.kroah.com/log/linux/maintainer-05.html>
+
+  <http://www.kroah.com/log/linux/maintainer-06.html>
+
+NO!!!! No more huge patch bombs to linux-kernel@vger.kernel.org people!
+  <https://lore.kernel.org/r/20050711.125305.08322243.davem@davemloft.net>
+
+Kernel Documentation/process/coding-style.rst:
+  :ref:`Documentation/translations/zh_TW/process/coding-style.rst <tw_codingstyle>`
+
+Linus Torvalds's mail on the canonical patch format:
+  <https://lore.kernel.org/r/Pine.LNX.4.58.0504071023190.28951@ppc970.osdl.org>
+
+Andi Kleen, "On submitting kernel patches"
+  Some strategies to get difficult or controversial changes in.
+
+  http://halobates.de/on-submitting-patches.pdf
+
diff --git a/Documentation/translations/zh_TW/process/volatile-considered-harmful.rst b/Documentation/translations/zh_TW/process/volatile-considered-harmful.rst
new file mode 100644
index 000000000000..097fe80352cb
--- /dev/null
+++ b/Documentation/translations/zh_TW/process/volatile-considered-harmful.rst
@@ -0,0 +1,110 @@
+.. SPDX-License-Identifier: GPL-2.0
+
+.. _tw_volatile_considered_harmful:
+
+.. include:: ../disclaimer-zh_TW.rst
+
+:Original: :ref:`Documentation/process/volatile-considered-harmful.rst
+           <volatile_considered_harmful>`
+
+如果想評論或更新本文的內容，請直接聯繫原文檔的維護者。如果你使用英文
+交流有困難的話，也可以向中文版維護者求助。如果本翻譯更新不及時或者翻
+譯存在問題，請聯繫中文版維護者::
+
+        英文版維護者： Jonathan Corbet <corbet@lwn.net>
+        中文版維護者： 伍鵬  Bryan Wu <bryan.wu@analog.com>
+        中文版翻譯者： 伍鵬  Bryan Wu <bryan.wu@analog.com>
+        中文版校譯者： 張漢輝  Eugene Teo <eugeneteo@kernel.sg>
+                       楊瑞  Dave Young <hidave.darkstar@gmail.com>
+                       時奎亮 Alex Shi <alex.shi@linux.alibaba.com>
+                       胡皓文 Hu Haowen <src.res@email.cn>
+
+爲什麼不應該使用「volatile」類型
+================================
+
+C程式設計師通常認爲volatile表示某個變量可以在當前執行的線程之外被改變；因此，在內核
+中用到共享數據結構時，常常會有C程式設計師喜歡使用volatile這類變量。換句話說，他們經
+常會把volatile類型看成某種簡易的原子變量，當然它們不是。在內核中使用volatile幾
+乎總是錯誤的；本文檔將解釋爲什麼這樣。
+
+理解volatile的關鍵是知道它的目的是用來消除優化，實際上很少有人真正需要這樣的應
+用。在內核中，程式設計師必須防止意外的並發訪問破壞共享的數據結構，這其實是一個完全
+不同的任務。用來防止意外並發訪問的保護措施，可以更加高效的避免大多數優化相關的
+問題。
+
+像volatile一樣，內核提供了很多原語來保證並發訪問時的數據安全（自旋鎖, 互斥量,內
+存屏障等等），同樣可以防止意外的優化。如果可以正確使用這些內核原語，那麼就沒有
+必要再使用volatile。如果仍然必須使用volatile，那麼幾乎可以肯定在代碼的某處有一
+個bug。在正確設計的內核代碼中，volatile能帶來的僅僅是使事情變慢。
+
+思考一下這段典型的內核代碼::
+
+    spin_lock(&the_lock);
+    do_something_on(&shared_data);
+    do_something_else_with(&shared_data);
+    spin_unlock(&the_lock);
+
+如果所有的代碼都遵循加鎖規則，當持有the_lock的時候，不可能意外的改變shared_data的
+值。任何可能訪問該數據的其他代碼都會在這個鎖上等待。自旋鎖原語跟內存屏障一樣—— 它
+們顯式的用來書寫成這樣 —— 意味著數據訪問不會跨越它們而被優化。所以本來編譯器認爲
+它知道在shared_data裡面將有什麼，但是因爲spin_lock()調用跟內存屏障一樣，會強制編
+譯器忘記它所知道的一切。那麼在訪問這些數據時不會有優化的問題。
+
+如果shared_data被聲名爲volatile，鎖操作將仍然是必須的。就算我們知道沒有其他人正在
+使用它，編譯器也將被阻止優化對臨界區內shared_data的訪問。在鎖有效的同時，
+shared_data不是volatile的。在處理共享數據的時候，適當的鎖操作可以不再需要
+volatile —— 並且是有潛在危害的。
+
+volatile的存儲類型最初是爲那些內存映射的I/O寄存器而定義。在內核里，寄存器訪問也應
+該被鎖保護，但是人們也不希望編譯器「優化」臨界區內的寄存器訪問。內核里I/O的內存訪問
+是通過訪問函數完成的；不贊成通過指針對I/O內存的直接訪問，並且不是在所有體系架構上
+都能工作。那些訪問函數正是爲了防止意外優化而寫的，因此，再說一次，volatile類型不
+是必需的。
+
+另一種引起用戶可能使用volatile的情況是當處理器正忙著等待一個變量的值。正確執行一
+個忙等待的方法是::
+
+    while (my_variable != what_i_want)
+        cpu_relax();
+
+cpu_relax()調用會降低CPU的能量消耗或者讓位於超線程雙處理器；它也作爲內存屏障一樣出
+現，所以，再一次，volatile不是必需的。當然，忙等待一開始就是一種反常規的做法。
+
+在內核中，一些稀少的情況下volatile仍然是有意義的：
+
+  - 在一些體系架構的系統上，允許直接的I/0內存訪問，那麼前面提到的訪問函數可以使用
+    volatile。基本上，每一個訪問函數調用它自己都是一個小的臨界區域並且保證了按照
+    程式設計師期望的那樣發生訪問操作。
+
+  - 某些會改變內存的內聯彙編代碼雖然沒有什麼其他明顯的附作用，但是有被GCC刪除的可
+    能性。在彙編聲明中加上volatile關鍵字可以防止這種刪除操作。
+
+  - Jiffies變量是一種特殊情況，雖然每次引用它的時候都可以有不同的值，但讀jiffies
+    變量時不需要任何特殊的加鎖保護。所以jiffies變量可以使用volatile，但是不贊成
+    其他跟jiffies相同類型變量使用volatile。Jiffies被認爲是一種「愚蠢的遺留物"
+    （Linus的話）因爲解決這個問題比保持現狀要麻煩的多。
+
+  - 由於某些I/0設備可能會修改連續一致的內存,所以有時,指向連續一致內存的數據結構
+    的指針需要正確的使用volatile。網絡適配器使用的環狀緩存區正是這類情形的一個例
+    子，其中適配器用改變指針來表示哪些描述符已經處理過了。
+
+對於大多代碼，上述幾種可以使用volatile的情況都不適用。所以，使用volatile是一種
+bug並且需要對這樣的代碼額外仔細檢查。那些試圖使用volatile的開發人員需要退一步想想
+他們真正想實現的是什麼。
+
+非常歡迎刪除volatile變量的補丁 － 只要證明這些補丁完整的考慮了並發問題。
+
+注釋
+----
+
+[1] https://lwn.net/Articles/233481/
+[2] https://lwn.net/Articles/233482/
+
+致謝
+----
+
+最初由Randy Dunlap推動並作初步研究
+由Jonathan Corbet撰寫
+參考Satyam Sharma，Johannes Stezenbach，Jesper Juhl，Heikki Orsila，
+H. Peter Anvin，Philipp Hahn和Stefan Richter的意見改善了本檔。
+
diff --git a/Documentation/translations/zh_TW/sparse.txt b/Documentation/translations/zh_TW/sparse.txt
new file mode 100644
index 000000000000..c9acb2c926cb
--- /dev/null
+++ b/Documentation/translations/zh_TW/sparse.txt
@@ -0,0 +1,91 @@
+Chinese translated version of Documentation/dev-tools/sparse.rst
+
+If you have any comment or update to the content, please contact the
+original document maintainer directly.  However, if you have a problem
+communicating in English you can also ask the Chinese maintainer for
+help.  Contact the Chinese maintainer if this translation is outdated
+or if there is a problem with the translation.
+
+Traditional Chinese maintainer: Hu Haowen <src.res@email.cn>
+---------------------------------------------------------------------
+Documentation/dev-tools/sparse.rst 的繁體中文翻譯
+
+如果想評論或更新本文的內容，請直接聯繫原文檔的維護者。如果你使用英文
+交流有困難的話，也可以向繁體中文版維護者求助。如果本翻譯更新不及時或
+者翻譯存在問題，請聯繫繁體中文版維護者。
+
+繁體中文版維護者： 胡皓文 Hu Haowen <src.res@email.cn>
+繁體中文版翻譯者： 胡皓文 Hu Haowen <src.res@email.cn>
+
+以下爲正文
+---------------------------------------------------------------------
+
+Copyright 2004 Linus Torvalds
+Copyright 2004 Pavel Machek <pavel@ucw.cz>
+Copyright 2006 Bob Copeland <me@bobcopeland.com>
+
+使用 sparse 工具做類型檢查
+~~~~~~~~~~~~~~~~~~~~~~~~~~
+
+"__bitwise" 是一種類型屬性，所以你應該這樣使用它：
+
+        typedef int __bitwise pm_request_t;
+
+        enum pm_request {
+                PM_SUSPEND = (__force pm_request_t) 1,
+                PM_RESUME = (__force pm_request_t) 2
+        };
+
+這樣會使 PM_SUSPEND 和 PM_RESUME 成爲位方式(bitwise)整數（使用"__force"
+是因爲 sparse 會抱怨改變位方式的類型轉換，但是這裡我們確實需要強制進行轉
+換）。而且因爲所有枚舉值都使用了相同的類型，這裡的"enum pm_request"也將
+會使用那個類型做爲底層實現。
+
+而且使用 gcc 編譯的時候，所有的 __bitwise/__force 都會消失，最後在 gcc
+看來它們只不過是普通的整數。
+
+坦白來說，你並不需要使用枚舉類型。上面那些實際都可以濃縮成一個特殊的"int
+__bitwise"類型。
+
+所以更簡單的辦法只要這樣做：
+
+	typedef int __bitwise pm_request_t;
+
+	#define PM_SUSPEND ((__force pm_request_t) 1)
+	#define PM_RESUME ((__force pm_request_t) 2)
+
+現在你就有了嚴格的類型檢查所需要的所有基礎架構。
+
+一個小提醒：常數整數"0"是特殊的。你可以直接把常數零當作位方式整數使用而
+不用擔心 sparse 會抱怨。這是因爲"bitwise"（恰如其名）是用來確保不同位方
+式類型不會被弄混（小尾模式，大尾模式，cpu尾模式，或者其他），對他們來說
+常數"0"確實是特殊的。
+
+獲取 sparse 工具
+~~~~~~~~~~~~~~~~
+
+你可以從 Sparse 的主頁獲取最新的發布版本：
+
+	http://www.kernel.org/pub/linux/kernel/people/josh/sparse/
+
+或者，你也可以使用 git 克隆最新的 sparse 開發版本：
+
+	git://git.kernel.org/pub/scm/linux/kernel/git/josh/sparse.git
+
+一旦你下載了源碼，只要以普通用戶身份運行：
+
+	make
+	make install
+
+它將會被自動安裝到你的 ~/bin 目錄下。
+
+使用 sparse 工具
+~~~~~~~~~~~~~~~~
+
+用"make C=1"命令來編譯內核，會對所有重新編譯的 C 文件使用 sparse 工具。
+或者使用"make C=2"命令，無論文件是否被重新編譯都會對其使用 sparse 工具。
+如果你已經編譯了內核，用後一種方式可以很快地檢查整個源碼樹。
+
+make 的可選變量 CHECKFLAGS 可以用來向 sparse 工具傳遞參數。編譯系統會自
+動向 sparse 工具傳遞 -Wbitwise 參數。
+
diff --git a/Documentation/userspace-api/index.rst b/Documentation/userspace-api/index.rst
index 0b5eefed027e..c432be070f67 100644
--- a/Documentation/userspace-api/index.rst
+++ b/Documentation/userspace-api/index.rst
@@ -27,6 +27,7 @@ place where this information is gathered.
    iommu
    media/index
    sysfs-platform_profile
+   vduse
 
 .. only::  subproject and html
 
diff --git a/Documentation/userspace-api/ioctl/ioctl-number.rst b/Documentation/userspace-api/ioctl/ioctl-number.rst
index b7070d76f076..2e8134059c87 100644
--- a/Documentation/userspace-api/ioctl/ioctl-number.rst
+++ b/Documentation/userspace-api/ioctl/ioctl-number.rst
@@ -299,6 +299,7 @@ Code  Seq#    Include File                                           Comments
 'z'   10-4F  drivers/s390/crypto/zcrypt_api.h                        conflict!
 '|'   00-7F  linux/media.h
 0x80  00-1F  linux/fb.h
+0x81  00-1F  linux/vduse.h
 0x89  00-06  arch/x86/include/asm/sockios.h
 0x89  0B-DF  linux/sockios.h
 0x89  E0-EF  linux/sockios.h                                         SIOCPROTOPRIVATE range
diff --git a/Documentation/userspace-api/vduse.rst b/Documentation/userspace-api/vduse.rst
new file mode 100644
index 000000000000..42ef59ea5314
--- /dev/null
+++ b/Documentation/userspace-api/vduse.rst
@@ -0,0 +1,233 @@
+==================================
+VDUSE - "vDPA Device in Userspace"
+==================================
+
+vDPA (virtio data path acceleration) device is a device that uses a
+datapath which complies with the virtio specifications with vendor
+specific control path. vDPA devices can be both physically located on
+the hardware or emulated by software. VDUSE is a framework that makes it
+possible to implement software-emulated vDPA devices in userspace. And
+to make the device emulation more secure, the emulated vDPA device's
+control path is handled in the kernel and only the data path is
+implemented in the userspace.
+
+Note that only virtio block device is supported by VDUSE framework now,
+which can reduce security risks when the userspace process that implements
+the data path is run by an unprivileged user. The support for other device
+types can be added after the security issue of corresponding device driver
+is clarified or fixed in the future.
+
+Create/Destroy VDUSE devices
+------------------------
+
+VDUSE devices are created as follows:
+
+1. Create a new VDUSE instance with ioctl(VDUSE_CREATE_DEV) on
+   /dev/vduse/control.
+
+2. Setup each virtqueue with ioctl(VDUSE_VQ_SETUP) on /dev/vduse/$NAME.
+
+3. Begin processing VDUSE messages from /dev/vduse/$NAME. The first
+   messages will arrive while attaching the VDUSE instance to vDPA bus.
+
+4. Send the VDPA_CMD_DEV_NEW netlink message to attach the VDUSE
+   instance to vDPA bus.
+
+VDUSE devices are destroyed as follows:
+
+1. Send the VDPA_CMD_DEV_DEL netlink message to detach the VDUSE
+   instance from vDPA bus.
+
+2. Close the file descriptor referring to /dev/vduse/$NAME.
+
+3. Destroy the VDUSE instance with ioctl(VDUSE_DESTROY_DEV) on
+   /dev/vduse/control.
+
+The netlink messages can be sent via vdpa tool in iproute2 or use the
+below sample codes:
+
+.. code-block:: c
+
+	static int netlink_add_vduse(const char *name, enum vdpa_command cmd)
+	{
+		struct nl_sock *nlsock;
+		struct nl_msg *msg;
+		int famid;
+
+		nlsock = nl_socket_alloc();
+		if (!nlsock)
+			return -ENOMEM;
+
+		if (genl_connect(nlsock))
+			goto free_sock;
+
+		famid = genl_ctrl_resolve(nlsock, VDPA_GENL_NAME);
+		if (famid < 0)
+			goto close_sock;
+
+		msg = nlmsg_alloc();
+		if (!msg)
+			goto close_sock;
+
+		if (!genlmsg_put(msg, NL_AUTO_PORT, NL_AUTO_SEQ, famid, 0, 0, cmd, 0))
+			goto nla_put_failure;
+
+		NLA_PUT_STRING(msg, VDPA_ATTR_DEV_NAME, name);
+		if (cmd == VDPA_CMD_DEV_NEW)
+			NLA_PUT_STRING(msg, VDPA_ATTR_MGMTDEV_DEV_NAME, "vduse");
+
+		if (nl_send_sync(nlsock, msg))
+			goto close_sock;
+
+		nl_close(nlsock);
+		nl_socket_free(nlsock);
+
+		return 0;
+	nla_put_failure:
+		nlmsg_free(msg);
+	close_sock:
+		nl_close(nlsock);
+	free_sock:
+		nl_socket_free(nlsock);
+		return -1;
+	}
+
+How VDUSE works
+---------------
+
+As mentioned above, a VDUSE device is created by ioctl(VDUSE_CREATE_DEV) on
+/dev/vduse/control. With this ioctl, userspace can specify some basic configuration
+such as device name (uniquely identify a VDUSE device), virtio features, virtio
+configuration space, the number of virtqueues and so on for this emulated device.
+Then a char device interface (/dev/vduse/$NAME) is exported to userspace for device
+emulation. Userspace can use the VDUSE_VQ_SETUP ioctl on /dev/vduse/$NAME to
+add per-virtqueue configuration such as the max size of virtqueue to the device.
+
+After the initialization, the VDUSE device can be attached to vDPA bus via
+the VDPA_CMD_DEV_NEW netlink message. Userspace needs to read()/write() on
+/dev/vduse/$NAME to receive/reply some control messages from/to VDUSE kernel
+module as follows:
+
+.. code-block:: c
+
+	static int vduse_message_handler(int dev_fd)
+	{
+		int len;
+		struct vduse_dev_request req;
+		struct vduse_dev_response resp;
+
+		len = read(dev_fd, &req, sizeof(req));
+		if (len != sizeof(req))
+			return -1;
+
+		resp.request_id = req.request_id;
+
+		switch (req.type) {
+
+		/* handle different types of messages */
+
+		}
+
+		len = write(dev_fd, &resp, sizeof(resp));
+		if (len != sizeof(resp))
+			return -1;
+
+		return 0;
+	}
+
+There are now three types of messages introduced by VDUSE framework:
+
+- VDUSE_GET_VQ_STATE: Get the state for virtqueue, userspace should return
+  avail index for split virtqueue or the device/driver ring wrap counters and
+  the avail and used index for packed virtqueue.
+
+- VDUSE_SET_STATUS: Set the device status, userspace should follow
+  the virtio spec: https://docs.oasis-open.org/virtio/virtio/v1.1/virtio-v1.1.html
+  to process this message. For example, fail to set the FEATURES_OK device
+  status bit if the device can not accept the negotiated virtio features
+  get from the VDUSE_DEV_GET_FEATURES ioctl.
+
+- VDUSE_UPDATE_IOTLB: Notify userspace to update the memory mapping for specified
+  IOVA range, userspace should firstly remove the old mapping, then setup the new
+  mapping via the VDUSE_IOTLB_GET_FD ioctl.
+
+After DRIVER_OK status bit is set via the VDUSE_SET_STATUS message, userspace is
+able to start the dataplane processing as follows:
+
+1. Get the specified virtqueue's information with the VDUSE_VQ_GET_INFO ioctl,
+   including the size, the IOVAs of descriptor table, available ring and used ring,
+   the state and the ready status.
+
+2. Pass the above IOVAs to the VDUSE_IOTLB_GET_FD ioctl so that those IOVA regions
+   can be mapped into userspace. Some sample codes is shown below:
+
+.. code-block:: c
+
+	static int perm_to_prot(uint8_t perm)
+	{
+		int prot = 0;
+
+		switch (perm) {
+		case VDUSE_ACCESS_WO:
+			prot |= PROT_WRITE;
+			break;
+		case VDUSE_ACCESS_RO:
+			prot |= PROT_READ;
+			break;
+		case VDUSE_ACCESS_RW:
+			prot |= PROT_READ | PROT_WRITE;
+			break;
+		}
+
+		return prot;
+	}
+
+	static void *iova_to_va(int dev_fd, uint64_t iova, uint64_t *len)
+	{
+		int fd;
+		void *addr;
+		size_t size;
+		struct vduse_iotlb_entry entry;
+
+		entry.start = iova;
+		entry.last = iova;
+
+		/*
+		 * Find the first IOVA region that overlaps with the specified
+		 * range [start, last] and return the corresponding file descriptor.
+		 */
+		fd = ioctl(dev_fd, VDUSE_IOTLB_GET_FD, &entry);
+		if (fd < 0)
+			return NULL;
+
+		size = entry.last - entry.start + 1;
+		*len = entry.last - iova + 1;
+		addr = mmap(0, size, perm_to_prot(entry.perm), MAP_SHARED,
+			    fd, entry.offset);
+		close(fd);
+		if (addr == MAP_FAILED)
+			return NULL;
+
+		/*
+		 * Using some data structures such as linked list to store
+		 * the iotlb mapping. The munmap(2) should be called for the
+		 * cached mapping when the corresponding VDUSE_UPDATE_IOTLB
+		 * message is received or the device is reset.
+		 */
+
+		return addr + iova - entry.start;
+	}
+
+3. Setup the kick eventfd for the specified virtqueues with the VDUSE_VQ_SETUP_KICKFD
+   ioctl. The kick eventfd is used by VDUSE kernel module to notify userspace to
+   consume the available ring. This is optional since userspace can choose to poll the
+   available ring instead.
+
+4. Listen to the kick eventfd (optional) and consume the available ring. The buffer
+   described by the descriptors in the descriptor table should be also mapped into
+   userspace via the VDUSE_IOTLB_GET_FD ioctl before accessing.
+
+5. Inject an interrupt for specific virtqueue with the VDUSE_INJECT_VQ_IRQ ioctl
+   after the used ring is filled.
+
+For more details on the uAPI, please see include/uapi/linux/vduse.h.
diff --git a/Documentation/virt/kvm/api.rst b/Documentation/virt/kvm/api.rst
index dae68e68ca23..a6729c8cf063 100644
--- a/Documentation/virt/kvm/api.rst
+++ b/Documentation/virt/kvm/api.rst
@@ -3357,6 +3357,7 @@ flags which can include the following:
   - KVM_GUESTDBG_INJECT_DB:     inject DB type exception [x86]
   - KVM_GUESTDBG_INJECT_BP:     inject BP type exception [x86]
   - KVM_GUESTDBG_EXIT_PENDING:  trigger an immediate guest exit [s390]
+  - KVM_GUESTDBG_BLOCKIRQ:      avoid injecting interrupts/NMI/SMI [x86]
 
 For example KVM_GUESTDBG_USE_SW_BP indicates that software breakpoints
 are enabled in memory so we need to ensure breakpoint exceptions are
@@ -5077,7 +5078,7 @@ of bytes successfully copied is returned. If the call completes successfully
 then ``length`` is returned.
 
 4.131 KVM_GET_SREGS2
-------------------
+--------------------
 
 :Capability: KVM_CAP_SREGS2
 :Architectures: x86
@@ -5090,17 +5091,17 @@ This ioctl (when supported) replaces the KVM_GET_SREGS.
 
 ::
 
-struct kvm_sregs2 {
-	/* out (KVM_GET_SREGS2) / in (KVM_SET_SREGS2) */
-	struct kvm_segment cs, ds, es, fs, gs, ss;
-	struct kvm_segment tr, ldt;
-	struct kvm_dtable gdt, idt;
-	__u64 cr0, cr2, cr3, cr4, cr8;
-	__u64 efer;
-	__u64 apic_base;
-	__u64 flags;
-	__u64 pdptrs[4];
-};
+        struct kvm_sregs2 {
+                /* out (KVM_GET_SREGS2) / in (KVM_SET_SREGS2) */
+                struct kvm_segment cs, ds, es, fs, gs, ss;
+                struct kvm_segment tr, ldt;
+                struct kvm_dtable gdt, idt;
+                __u64 cr0, cr2, cr3, cr4, cr8;
+                __u64 efer;
+                __u64 apic_base;
+                __u64 flags;
+                __u64 pdptrs[4];
+        };
 
 flags values for ``kvm_sregs2``:
 
@@ -5110,7 +5111,7 @@ flags values for ``kvm_sregs2``:
 
 
 4.132 KVM_SET_SREGS2
-------------------
+--------------------
 
 :Capability: KVM_CAP_SREGS2
 :Architectures: x86
@@ -5201,12 +5202,16 @@ trailing ``'\0'``, is indicated by the ``name_size`` field in the header.
 The descriptors block is only needed to be read once for the lifetime of the
 file descriptor contains a sequence of ``struct kvm_stats_desc``, each followed
 by a string of size ``name_size``.
+::
 
 	#define KVM_STATS_TYPE_SHIFT		0
 	#define KVM_STATS_TYPE_MASK		(0xF << KVM_STATS_TYPE_SHIFT)
 	#define KVM_STATS_TYPE_CUMULATIVE	(0x0 << KVM_STATS_TYPE_SHIFT)
 	#define KVM_STATS_TYPE_INSTANT		(0x1 << KVM_STATS_TYPE_SHIFT)
 	#define KVM_STATS_TYPE_PEAK		(0x2 << KVM_STATS_TYPE_SHIFT)
+	#define KVM_STATS_TYPE_LINEAR_HIST	(0x3 << KVM_STATS_TYPE_SHIFT)
+	#define KVM_STATS_TYPE_LOG_HIST		(0x4 << KVM_STATS_TYPE_SHIFT)
+	#define KVM_STATS_TYPE_MAX		KVM_STATS_TYPE_LOG_HIST
 
 	#define KVM_STATS_UNIT_SHIFT		4
 	#define KVM_STATS_UNIT_MASK		(0xF << KVM_STATS_UNIT_SHIFT)
@@ -5214,18 +5219,20 @@ by a string of size ``name_size``.
 	#define KVM_STATS_UNIT_BYTES		(0x1 << KVM_STATS_UNIT_SHIFT)
 	#define KVM_STATS_UNIT_SECONDS		(0x2 << KVM_STATS_UNIT_SHIFT)
 	#define KVM_STATS_UNIT_CYCLES		(0x3 << KVM_STATS_UNIT_SHIFT)
+	#define KVM_STATS_UNIT_MAX		KVM_STATS_UNIT_CYCLES
 
 	#define KVM_STATS_BASE_SHIFT		8
 	#define KVM_STATS_BASE_MASK		(0xF << KVM_STATS_BASE_SHIFT)
 	#define KVM_STATS_BASE_POW10		(0x0 << KVM_STATS_BASE_SHIFT)
 	#define KVM_STATS_BASE_POW2		(0x1 << KVM_STATS_BASE_SHIFT)
+	#define KVM_STATS_BASE_MAX		KVM_STATS_BASE_POW2
 
 	struct kvm_stats_desc {
 		__u32 flags;
 		__s16 exponent;
 		__u16 size;
 		__u32 offset;
-		__u32 unused;
+		__u32 bucket_size;
 		char name[];
 	};
 
@@ -5234,24 +5241,40 @@ by this descriptor. Its endianness is CPU native.
 The following flags are supported:
 
 Bits 0-3 of ``flags`` encode the type:
+
   * ``KVM_STATS_TYPE_CUMULATIVE``
-    The statistics data is cumulative. The value of data can only be increased.
+    The statistics reports a cumulative count. The value of data can only be increased.
     Most of the counters used in KVM are of this type.
     The corresponding ``size`` field for this type is always 1.
     All cumulative statistics data are read/write.
   * ``KVM_STATS_TYPE_INSTANT``
-    The statistics data is instantaneous. Its value can be increased or
+    The statistics reports an instantaneous value. Its value can be increased or
     decreased. This type is usually used as a measurement of some resources,
     like the number of dirty pages, the number of large pages, etc.
     All instant statistics are read only.
     The corresponding ``size`` field for this type is always 1.
   * ``KVM_STATS_TYPE_PEAK``
-    The statistics data is peak. The value of data can only be increased, and
-    represents a peak value for a measurement, for example the maximum number
+    The statistics data reports a peak value, for example the maximum number
     of items in a hash table bucket, the longest time waited and so on.
+    The value of data can only be increased.
     The corresponding ``size`` field for this type is always 1.
+  * ``KVM_STATS_TYPE_LINEAR_HIST``
+    The statistic is reported as a linear histogram. The number of
+    buckets is specified by the ``size`` field. The size of buckets is specified
+    by the ``hist_param`` field. The range of the Nth bucket (1 <= N < ``size``)
+    is [``hist_param``*(N-1), ``hist_param``*N), while the range of the last
+    bucket is [``hist_param``*(``size``-1), +INF). (+INF means positive infinity
+    value.) The bucket value indicates how many samples fell in the bucket's range.
+  * ``KVM_STATS_TYPE_LOG_HIST``
+    The statistic is reported as a logarithmic histogram. The number of
+    buckets is specified by the ``size`` field. The range of the first bucket is
+    [0, 1), while the range of the last bucket is [pow(2, ``size``-2), +INF).
+    Otherwise, The Nth bucket (1 < N < ``size``) covers
+    [pow(2, N-2), pow(2, N-1)). The bucket value indicates how many samples fell
+    in the bucket's range.
 
 Bits 4-7 of ``flags`` encode the unit:
+
   * ``KVM_STATS_UNIT_NONE``
     There is no unit for the value of statistics data. This usually means that
     the value is a simple counter of an event.
@@ -5266,6 +5289,7 @@ Bits 4-7 of ``flags`` encode the unit:
 
 Bits 8-11 of ``flags``, together with ``exponent``, encode the scale of the
 unit:
+
   * ``KVM_STATS_BASE_POW10``
     The scale is based on power of 10. It is used for measurement of time and
     CPU clock cycles.  For example, an exponent of -9 can be used with
@@ -5282,9 +5306,9 @@ unsigned 64bit data.
 The ``offset`` field is the offset from the start of Data Block to the start of
 the corresponding statistics data.
 
-The ``unused`` field is reserved for future support for other types of
-statistics data, like log/linear histogram. Its value is always 0 for the types
-defined above.
+The ``bucket_size`` field is used as a parameter for histogram statistics data.
+It is only used by linear histogram statistics data, specifying the size of a
+bucket.
 
 The ``name`` field is the name string of the statistics data. The name string
 starts at the end of ``struct kvm_stats_desc``.  The maximum length including
@@ -7213,7 +7237,7 @@ supported in the host. A VMM can check whether the service is
 available to the guest on migration.
 
 8.33 KVM_CAP_HYPERV_ENFORCE_CPUID
------------------------------
+---------------------------------
 
 Architectures: x86
 
diff --git a/Documentation/virt/kvm/locking.rst b/Documentation/virt/kvm/locking.rst
index 88fa495abbac..5d27da356836 100644
--- a/Documentation/virt/kvm/locking.rst
+++ b/Documentation/virt/kvm/locking.rst
@@ -21,6 +21,12 @@ The acquisition orders for mutexes are as follows:
   can be taken inside a kvm->srcu read-side critical section,
   while kvm->slots_lock cannot.
 
+- kvm->mn_active_invalidate_count ensures that pairs of
+  invalidate_range_start() and invalidate_range_end() callbacks
+  use the same memslots array.  kvm->slots_lock and kvm->slots_arch_lock
+  are taken on the waiting side in install_new_memslots, so MMU notifiers
+  must not take either kvm->slots_lock or kvm->slots_arch_lock.
+
 On x86:
 
 - vcpu->mutex is taken outside kvm->arch.hyperv.hv_lock
diff --git a/Documentation/vm/damon/api.rst b/Documentation/vm/damon/api.rst
new file mode 100644
index 000000000000..08f34df45523
--- /dev/null
+++ b/Documentation/vm/damon/api.rst
@@ -0,0 +1,20 @@
+.. SPDX-License-Identifier: GPL-2.0
+
+=============
+API Reference
+=============
+
+Kernel space programs can use every feature of DAMON using below APIs.  All you
+need to do is including ``damon.h``, which is located in ``include/linux/`` of
+the source tree.
+
+Structures
+==========
+
+.. kernel-doc:: include/linux/damon.h
+
+
+Functions
+=========
+
+.. kernel-doc:: mm/damon/core.c
diff --git a/Documentation/vm/damon/design.rst b/Documentation/vm/damon/design.rst
new file mode 100644
index 000000000000..b05159c295f4
--- /dev/null
+++ b/Documentation/vm/damon/design.rst
@@ -0,0 +1,166 @@
+.. SPDX-License-Identifier: GPL-2.0
+
+======
+Design
+======
+
+Configurable Layers
+===================
+
+DAMON provides data access monitoring functionality while making the accuracy
+and the overhead controllable.  The fundamental access monitorings require
+primitives that dependent on and optimized for the target address space.  On
+the other hand, the accuracy and overhead tradeoff mechanism, which is the core
+of DAMON, is in the pure logic space.  DAMON separates the two parts in
+different layers and defines its interface to allow various low level
+primitives implementations configurable with the core logic.
+
+Due to this separated design and the configurable interface, users can extend
+DAMON for any address space by configuring the core logics with appropriate low
+level primitive implementations.  If appropriate one is not provided, users can
+implement the primitives on their own.
+
+For example, physical memory, virtual memory, swap space, those for specific
+processes, NUMA nodes, files, and backing memory devices would be supportable.
+Also, if some architectures or devices support special optimized access check
+primitives, those will be easily configurable.
+
+
+Reference Implementations of Address Space Specific Primitives
+==============================================================
+
+The low level primitives for the fundamental access monitoring are defined in
+two parts:
+
+1. Identification of the monitoring target address range for the address space.
+2. Access check of specific address range in the target space.
+
+DAMON currently provides the implementation of the primitives for only the
+virtual address spaces. Below two subsections describe how it works.
+
+
+VMA-based Target Address Range Construction
+-------------------------------------------
+
+Only small parts in the super-huge virtual address space of the processes are
+mapped to the physical memory and accessed.  Thus, tracking the unmapped
+address regions is just wasteful.  However, because DAMON can deal with some
+level of noise using the adaptive regions adjustment mechanism, tracking every
+mapping is not strictly required but could even incur a high overhead in some
+cases.  That said, too huge unmapped areas inside the monitoring target should
+be removed to not take the time for the adaptive mechanism.
+
+For the reason, this implementation converts the complex mappings to three
+distinct regions that cover every mapped area of the address space.  The two
+gaps between the three regions are the two biggest unmapped areas in the given
+address space.  The two biggest unmapped areas would be the gap between the
+heap and the uppermost mmap()-ed region, and the gap between the lowermost
+mmap()-ed region and the stack in most of the cases.  Because these gaps are
+exceptionally huge in usual address spaces, excluding these will be sufficient
+to make a reasonable trade-off.  Below shows this in detail::
+
+    <heap>
+    <BIG UNMAPPED REGION 1>
+    <uppermost mmap()-ed region>
+    (small mmap()-ed regions and munmap()-ed regions)
+    <lowermost mmap()-ed region>
+    <BIG UNMAPPED REGION 2>
+    <stack>
+
+
+PTE Accessed-bit Based Access Check
+-----------------------------------
+
+The implementation for the virtual address space uses PTE Accessed-bit for
+basic access checks.  It finds the relevant PTE Accessed bit from the address
+by walking the page table for the target task of the address.  In this way, the
+implementation finds and clears the bit for next sampling target address and
+checks whether the bit set again after one sampling period.  This could disturb
+other kernel subsystems using the Accessed bits, namely Idle page tracking and
+the reclaim logic.  To avoid such disturbances, DAMON makes it mutually
+exclusive with Idle page tracking and uses ``PG_idle`` and ``PG_young`` page
+flags to solve the conflict with the reclaim logic, as Idle page tracking does.
+
+
+Address Space Independent Core Mechanisms
+=========================================
+
+Below four sections describe each of the DAMON core mechanisms and the five
+monitoring attributes, ``sampling interval``, ``aggregation interval``,
+``regions update interval``, ``minimum number of regions``, and ``maximum
+number of regions``.
+
+
+Access Frequency Monitoring
+---------------------------
+
+The output of DAMON says what pages are how frequently accessed for a given
+duration.  The resolution of the access frequency is controlled by setting
+``sampling interval`` and ``aggregation interval``.  In detail, DAMON checks
+access to each page per ``sampling interval`` and aggregates the results.  In
+other words, counts the number of the accesses to each page.  After each
+``aggregation interval`` passes, DAMON calls callback functions that previously
+registered by users so that users can read the aggregated results and then
+clears the results.  This can be described in below simple pseudo-code::
+
+    while monitoring_on:
+        for page in monitoring_target:
+            if accessed(page):
+                nr_accesses[page] += 1
+        if time() % aggregation_interval == 0:
+            for callback in user_registered_callbacks:
+                callback(monitoring_target, nr_accesses)
+            for page in monitoring_target:
+                nr_accesses[page] = 0
+        sleep(sampling interval)
+
+The monitoring overhead of this mechanism will arbitrarily increase as the
+size of the target workload grows.
+
+
+Region Based Sampling
+---------------------
+
+To avoid the unbounded increase of the overhead, DAMON groups adjacent pages
+that assumed to have the same access frequencies into a region.  As long as the
+assumption (pages in a region have the same access frequencies) is kept, only
+one page in the region is required to be checked.  Thus, for each ``sampling
+interval``, DAMON randomly picks one page in each region, waits for one
+``sampling interval``, checks whether the page is accessed meanwhile, and
+increases the access frequency of the region if so.  Therefore, the monitoring
+overhead is controllable by setting the number of regions.  DAMON allows users
+to set the minimum and the maximum number of regions for the trade-off.
+
+This scheme, however, cannot preserve the quality of the output if the
+assumption is not guaranteed.
+
+
+Adaptive Regions Adjustment
+---------------------------
+
+Even somehow the initial monitoring target regions are well constructed to
+fulfill the assumption (pages in same region have similar access frequencies),
+the data access pattern can be dynamically changed.  This will result in low
+monitoring quality.  To keep the assumption as much as possible, DAMON
+adaptively merges and splits each region based on their access frequency.
+
+For each ``aggregation interval``, it compares the access frequencies of
+adjacent regions and merges those if the frequency difference is small.  Then,
+after it reports and clears the aggregated access frequency of each region, it
+splits each region into two or three regions if the total number of regions
+will not exceed the user-specified maximum number of regions after the split.
+
+In this way, DAMON provides its best-effort quality and minimal overhead while
+keeping the bounds users set for their trade-off.
+
+
+Dynamic Target Space Updates Handling
+-------------------------------------
+
+The monitoring target address range could dynamically changed.  For example,
+virtual memory could be dynamically mapped and unmapped.  Physical memory could
+be hot-plugged.
+
+As the changes could be quite frequent in some cases, DAMON checks the dynamic
+memory mapping changes and applies it to the abstracted target area only for
+each of a user-specified time interval (``regions update interval``).
diff --git a/Documentation/vm/damon/faq.rst b/Documentation/vm/damon/faq.rst
new file mode 100644
index 000000000000..cb3d8b585a8b
--- /dev/null
+++ b/Documentation/vm/damon/faq.rst
@@ -0,0 +1,51 @@
+.. SPDX-License-Identifier: GPL-2.0
+
+==========================
+Frequently Asked Questions
+==========================
+
+Why a new subsystem, instead of extending perf or other user space tools?
+=========================================================================
+
+First, because it needs to be lightweight as much as possible so that it can be
+used online, any unnecessary overhead such as kernel - user space context
+switching cost should be avoided.  Second, DAMON aims to be used by other
+programs including the kernel.  Therefore, having a dependency on specific
+tools like perf is not desirable.  These are the two biggest reasons why DAMON
+is implemented in the kernel space.
+
+
+Can 'idle pages tracking' or 'perf mem' substitute DAMON?
+=========================================================
+
+Idle page tracking is a low level primitive for access check of the physical
+address space.  'perf mem' is similar, though it can use sampling to minimize
+the overhead.  On the other hand, DAMON is a higher-level framework for the
+monitoring of various address spaces.  It is focused on memory management
+optimization and provides sophisticated accuracy/overhead handling mechanisms.
+Therefore, 'idle pages tracking' and 'perf mem' could provide a subset of
+DAMON's output, but cannot substitute DAMON.
+
+
+Does DAMON support virtual memory only?
+=======================================
+
+No.  The core of the DAMON is address space independent.  The address space
+specific low level primitive parts including monitoring target regions
+constructions and actual access checks can be implemented and configured on the
+DAMON core by the users.  In this way, DAMON users can monitor any address
+space with any access check technique.
+
+Nonetheless, DAMON provides vma tracking and PTE Accessed bit check based
+implementations of the address space dependent functions for the virtual memory
+by default, for a reference and convenient use.  In near future, we will
+provide those for physical memory address space.
+
+
+Can I simply monitor page granularity?
+======================================
+
+Yes.  You can do so by setting the ``min_nr_regions`` attribute higher than the
+working set size divided by the page size.  Because the monitoring target
+regions size is forced to be ``>=page size``, the region split will make no
+effect.
diff --git a/Documentation/vm/damon/index.rst b/Documentation/vm/damon/index.rst
new file mode 100644
index 000000000000..a2858baf3bf1
--- /dev/null
+++ b/Documentation/vm/damon/index.rst
@@ -0,0 +1,30 @@
+.. SPDX-License-Identifier: GPL-2.0
+
+==========================
+DAMON: Data Access MONitor
+==========================
+
+DAMON is a data access monitoring framework subsystem for the Linux kernel.
+The core mechanisms of DAMON (refer to :doc:`design` for the detail) make it
+
+ - *accurate* (the monitoring output is useful enough for DRAM level memory
+   management; It might not appropriate for CPU Cache levels, though),
+ - *light-weight* (the monitoring overhead is low enough to be applied online),
+   and
+ - *scalable* (the upper-bound of the overhead is in constant range regardless
+   of the size of target workloads).
+
+Using this framework, therefore, the kernel's memory management mechanisms can
+make advanced decisions.  Experimental memory management optimization works
+that incurring high data accesses monitoring overhead could implemented again.
+In user space, meanwhile, users who have some special workloads can write
+personalized applications for better understanding and optimizations of their
+workloads and systems.
+
+.. toctree::
+   :maxdepth: 2
+
+   faq
+   design
+   api
+   plans
diff --git a/Documentation/vm/hwpoison.rst b/Documentation/vm/hwpoison.rst
index a5c884293dac..89b5f7a52077 100644
--- a/Documentation/vm/hwpoison.rst
+++ b/Documentation/vm/hwpoison.rst
@@ -180,7 +180,6 @@ Limitations
 ===========
 - Not all page types are supported and never will. Most kernel internal
   objects cannot be recovered, only LRU pages for now.
-- Right now hugepage support is missing.
 
 ---
 Andi Kleen, Oct 2009
diff --git a/Documentation/vm/index.rst b/Documentation/vm/index.rst
index eff5fbd492d0..b51f0d8992f8 100644
--- a/Documentation/vm/index.rst
+++ b/Documentation/vm/index.rst
@@ -32,6 +32,7 @@ descriptions of data structures and algorithms.
    arch_pgtable_helpers
    balance
    cleancache
+   damon/index
    free_page_reporting
    frontswap
    highmem
diff --git a/Documentation/x86/x86_64/mm.rst b/Documentation/x86/x86_64/mm.rst
index ede1875719fb..9798676bb0bf 100644
--- a/Documentation/x86/x86_64/mm.rst
+++ b/Documentation/x86/x86_64/mm.rst
@@ -140,10 +140,6 @@ The direct mapping covers all memory in the system up to the highest
 memory address (this means in some cases it can also include PCI memory
 holes).
 
-vmalloc space is lazily synchronized into the different PML4/PML5 pages of
-the processes using the page fault handler, with init_top_pgt as
-reference.
-
 We map EFI runtime services in the 'efi_pgd' PGD in a 64Gb large virtual
 memory window (this size is arbitrary, it can be raised later if needed).
 The mappings are not part of any other kernel PGD and are only available
diff --git a/MAINTAINERS b/MAINTAINERS
index 0d4b8963f17b..eeb4c70b3d5b 100644
--- a/MAINTAINERS
+++ b/MAINTAINERS
@@ -333,7 +333,7 @@ S:	Maintained
 F:	drivers/platform/x86/acer-wmi.c
 
 ACPI
-M:	"Rafael J. Wysocki" <rjw@rjwysocki.net>
+M:	"Rafael J. Wysocki" <rafael@kernel.org>
 M:	Len Brown <lenb@kernel.org>
 L:	linux-acpi@vger.kernel.org
 S:	Supported
@@ -354,7 +354,7 @@ F:	include/linux/fwnode.h
 F:	tools/power/acpi/
 
 ACPI APEI
-M:	"Rafael J. Wysocki" <rjw@rjwysocki.net>
+M:	"Rafael J. Wysocki" <rafael@kernel.org>
 M:	Len Brown <lenb@kernel.org>
 R:	James Morse <james.morse@arm.com>
 R:	Tony Luck <tony.luck@intel.com>
@@ -364,7 +364,6 @@ F:	drivers/acpi/apei/
 
 ACPI COMPONENT ARCHITECTURE (ACPICA)
 M:	Robert Moore <robert.moore@intel.com>
-M:	Erik Kaneda <erik.kaneda@intel.com>
 M:	"Rafael J. Wysocki" <rafael.j.wysocki@intel.com>
 L:	linux-acpi@vger.kernel.org
 L:	devel@acpica.org
@@ -403,7 +402,7 @@ S:	Maintained
 F:	drivers/platform/x86/i2c-multi-instantiate.c
 
 ACPI PMIC DRIVERS
-M:	"Rafael J. Wysocki" <rjw@rjwysocki.net>
+M:	"Rafael J. Wysocki" <rafael@kernel.org>
 M:	Len Brown <lenb@kernel.org>
 R:	Andy Shevchenko <andy@kernel.org>
 R:	Mika Westerberg <mika.westerberg@linux.intel.com>
@@ -798,7 +797,7 @@ F:	Documentation/devicetree/bindings/i2c/i2c-altera.txt
 F:	drivers/i2c/busses/i2c-altera.c
 
 ALTERA MAILBOX DRIVER
-M:	Ley Foon Tan <ley.foon.tan@intel.com>
+M:	Joyce Ooi <joyce.ooi@intel.com>
 S:	Maintained
 F:	drivers/mailbox/mailbox-altera.c
 
@@ -985,6 +984,12 @@ S:	Supported
 T:	git https://gitlab.freedesktop.org/agd5f/linux.git
 F:	drivers/gpu/drm/amd/pm/powerplay/
 
+AMD PTDMA DRIVER
+M:	Sanjay R Mehta <sanju.mehta@amd.com>
+L:	dmaengine@vger.kernel.org
+S:	Maintained
+F:	drivers/dma/ptdma/
+
 AMD SEATTLE DEVICE TREE SUPPORT
 M:	Brijesh Singh <brijeshkumar.singh@amd.com>
 M:	Suravee Suthikulpanit <suravee.suthikulpanit@amd.com>
@@ -1268,6 +1273,13 @@ L:	linux-input@vger.kernel.org
 S:	Odd fixes
 F:	drivers/input/mouse/bcm5974.c
 
+APPLE DART IOMMU DRIVER
+M:	Sven Peter <sven@svenpeter.dev>
+L:	iommu@lists.linux-foundation.org
+S:	Maintained
+F:	Documentation/devicetree/bindings/iommu/apple,dart.yaml
+F:	drivers/iommu/apple-dart.c
+
 APPLE SMC DRIVER
 M:	Henrik Rydberg <rydberg@bitmath.org>
 L:	linux-hwmon@vger.kernel.org
@@ -1489,7 +1501,7 @@ F:	drivers/amba/
 F:	include/linux/amba/bus.h
 
 ARM PRIMECELL PL35X NAND CONTROLLER DRIVER
-M:	Miquel Raynal <miquel.raynal@bootlin.com@bootlin.com>
+M:	Miquel Raynal <miquel.raynal@bootlin.com>
 M:	Naga Sureshkumar Relli <nagasure@xilinx.com>
 L:	linux-mtd@lists.infradead.org
 S:	Maintained
@@ -1497,7 +1509,7 @@ F:	Documentation/devicetree/bindings/mtd/arm,pl353-nand-r2p1.yaml
 F:	drivers/mtd/nand/raw/pl35x-nand-controller.c
 
 ARM PRIMECELL PL35X SMC DRIVER
-M:	Miquel Raynal <miquel.raynal@bootlin.com@bootlin.com>
+M:	Miquel Raynal <miquel.raynal@bootlin.com>
 M:	Naga Sureshkumar Relli <nagasure@xilinx.com>
 L:	linux-arm-kernel@lists.infradead.org (moderated for non-subscribers)
 S:	Maintained
@@ -2264,7 +2276,6 @@ F:	drivers/iio/adc/ab8500-gpadc.c
 F:	drivers/mfd/ab8500*
 F:	drivers/mfd/abx500*
 F:	drivers/mfd/db8500*
-F:	drivers/mfd/dbx500*
 F:	drivers/pinctrl/nomadik/
 F:	drivers/rtc/rtc-ab8500.c
 F:	drivers/rtc/rtc-pl031.c
@@ -2322,14 +2333,14 @@ N:	oxnas
 
 ARM/PALM TREO SUPPORT
 M:	Tomas Cech <sleep_walker@suse.com>
-L:	linux-arm-kernel@lists.infradead.org
+L:	linux-arm-kernel@lists.infradead.org (moderated for non-subscribers)
 S:	Maintained
 W:	http://hackndev.com
 F:	arch/arm/mach-pxa/palmtreo.*
 
 ARM/PALMTX,PALMT5,PALMLD,PALMTE2,PALMTC SUPPORT
 M:	Marek Vasut <marek.vasut@gmail.com>
-L:	linux-arm-kernel@lists.infradead.org
+L:	linux-arm-kernel@lists.infradead.org (moderated for non-subscribers)
 S:	Maintained
 W:	http://hackndev.com
 F:	arch/arm/mach-pxa/include/mach/palmld.h
@@ -2343,7 +2354,7 @@ F:	arch/arm/mach-pxa/palmtx.c
 
 ARM/PALMZ72 SUPPORT
 M:	Sergey Lapin <slapin@ossfans.org>
-L:	linux-arm-kernel@lists.infradead.org
+L:	linux-arm-kernel@lists.infradead.org (moderated for non-subscribers)
 S:	Maintained
 W:	http://hackndev.com
 F:	arch/arm/mach-pxa/palmz72.*
@@ -2513,7 +2524,7 @@ N:	s5pv210
 
 ARM/SAMSUNG S5P SERIES 2D GRAPHICS ACCELERATION (G2D) SUPPORT
 M:	Andrzej Hajda <a.hajda@samsung.com>
-L:	linux-arm-kernel@lists.infradead.org
+L:	linux-arm-kernel@lists.infradead.org (moderated for non-subscribers)
 L:	linux-media@vger.kernel.org
 S:	Maintained
 F:	drivers/media/platform/s5p-g2d/
@@ -2530,14 +2541,14 @@ ARM/SAMSUNG S5P SERIES JPEG CODEC SUPPORT
 M:	Andrzej Pietrasiewicz <andrzejtp2010@gmail.com>
 M:	Jacek Anaszewski <jacek.anaszewski@gmail.com>
 M:	Sylwester Nawrocki <s.nawrocki@samsung.com>
-L:	linux-arm-kernel@lists.infradead.org
+L:	linux-arm-kernel@lists.infradead.org (moderated for non-subscribers)
 L:	linux-media@vger.kernel.org
 S:	Maintained
 F:	drivers/media/platform/s5p-jpeg/
 
 ARM/SAMSUNG S5P SERIES Multi Format Codec (MFC) SUPPORT
 M:	Andrzej Hajda <a.hajda@samsung.com>
-L:	linux-arm-kernel@lists.infradead.org
+L:	linux-arm-kernel@lists.infradead.org (moderated for non-subscribers)
 L:	linux-media@vger.kernel.org
 S:	Maintained
 F:	drivers/media/platform/s5p-mfc/
@@ -2727,11 +2738,13 @@ T:	git git://git.kernel.org/pub/scm/linux/kernel/git/iwamatsu/linux-visconti.git
 F:	Documentation/devicetree/bindings/arm/toshiba.yaml
 F:	Documentation/devicetree/bindings/net/toshiba,visconti-dwmac.yaml
 F:	Documentation/devicetree/bindings/gpio/toshiba,gpio-visconti.yaml
+F:	Documentation/devicetree/bindings/pci/toshiba,visconti-pcie.yaml
 F:	Documentation/devicetree/bindings/pinctrl/toshiba,tmpv7700-pinctrl.yaml
 F:	Documentation/devicetree/bindings/watchdog/toshiba,visconti-wdt.yaml
 F:	arch/arm64/boot/dts/toshiba/
 F:	drivers/net/ethernet/stmicro/stmmac/dwmac-visconti.c
 F:	drivers/gpio/gpio-visconti.c
+F:	drivers/pci/controller/dwc/pcie-visconti.c
 F:	drivers/pinctrl/visconti/
 F:	drivers/watchdog/visconti_wdt.c
 N:	visconti
@@ -3300,7 +3313,6 @@ S:	Maintained
 T:	git git://git.kernel.org/pub/scm/linux/kernel/git/axboe/linux-block.git
 F:	block/
 F:	drivers/block/
-F:	fs/block_dev.c
 F:	include/linux/blk*
 F:	kernel/trace/blktrace.c
 F:	lib/sbitmap.c
@@ -3554,7 +3566,7 @@ BROADCOM BCM5301X ARM ARCHITECTURE
 M:	Hauke Mehrtens <hauke@hauke-m.de>
 M:	Rafał Miłecki <zajec5@gmail.com>
 M:	bcm-kernel-feedback-list@broadcom.com
-L:	linux-arm-kernel@lists.infradead.org
+L:	linux-arm-kernel@lists.infradead.org (moderated for non-subscribers)
 S:	Maintained
 F:	arch/arm/boot/dts/bcm470*
 F:	arch/arm/boot/dts/bcm5301*
@@ -3564,7 +3576,7 @@ F:	arch/arm/mach-bcm/bcm_5301x.c
 BROADCOM BCM53573 ARM ARCHITECTURE
 M:	Rafał Miłecki <rafal@milecki.pl>
 L:	bcm-kernel-feedback-list@broadcom.com
-L:	linux-arm-kernel@lists.infradead.org
+L:	linux-arm-kernel@lists.infradead.org (moderated for non-subscribers)
 S:	Maintained
 F:	arch/arm/boot/dts/bcm47189*
 F:	arch/arm/boot/dts/bcm53573*
@@ -4518,13 +4530,14 @@ F:	.clang-format
 CLANG/LLVM BUILD SUPPORT
 M:	Nathan Chancellor <nathan@kernel.org>
 M:	Nick Desaulniers <ndesaulniers@google.com>
-L:	clang-built-linux@googlegroups.com
+L:	llvm@lists.linux.dev
 S:	Supported
 W:	https://clangbuiltlinux.github.io/
 B:	https://github.com/ClangBuiltLinux/linux/issues
 C:	irc://irc.libera.chat/clangbuiltlinux
 F:	Documentation/kbuild/llvm.rst
 F:	include/linux/compiler-clang.h
+F:	scripts/Makefile.clang
 F:	scripts/clang-tools/
 K:	\b(?i:clang|llvm)\b
 
@@ -4533,7 +4546,7 @@ M:	Sami Tolvanen <samitolvanen@google.com>
 M:	Kees Cook <keescook@chromium.org>
 R:	Nathan Chancellor <nathan@kernel.org>
 R:	Nick Desaulniers <ndesaulniers@google.com>
-L:	clang-built-linux@googlegroups.com
+L:	llvm@lists.linux.dev
 S:	Supported
 B:	https://github.com/ClangBuiltLinux/linux/issues
 T:	git git://git.kernel.org/pub/scm/linux/kernel/git/kees/linux.git for-next/clang/features
@@ -4671,6 +4684,7 @@ F:	drivers/platform/x86/compal-laptop.c
 
 COMPILER ATTRIBUTES
 M:	Miguel Ojeda <ojeda@kernel.org>
+R:	Nick Desaulniers <ndesaulniers@google.com>
 S:	Maintained
 F:	include/linux/compiler_attributes.h
 
@@ -4812,7 +4826,7 @@ W:	http://www.arm.com/products/processors/technologies/biglittleprocessing.php
 F:	drivers/cpufreq/vexpress-spc-cpufreq.c
 
 CPU FREQUENCY SCALING FRAMEWORK
-M:	"Rafael J. Wysocki" <rjw@rjwysocki.net>
+M:	"Rafael J. Wysocki" <rafael@kernel.org>
 M:	Viresh Kumar <viresh.kumar@linaro.org>
 L:	linux-pm@vger.kernel.org
 S:	Maintained
@@ -4830,7 +4844,7 @@ F:	kernel/sched/cpufreq*.c
 F:	tools/testing/selftests/cpufreq/
 
 CPU IDLE TIME MANAGEMENT FRAMEWORK
-M:	"Rafael J. Wysocki" <rjw@rjwysocki.net>
+M:	"Rafael J. Wysocki" <rafael@kernel.org>
 M:	Daniel Lezcano <daniel.lezcano@linaro.org>
 L:	linux-pm@vger.kernel.org
 S:	Maintained
@@ -4859,7 +4873,7 @@ CPUIDLE DRIVER - ARM BIG LITTLE
 M:	Lorenzo Pieralisi <lorenzo.pieralisi@arm.com>
 M:	Daniel Lezcano <daniel.lezcano@linaro.org>
 L:	linux-pm@vger.kernel.org
-L:	linux-arm-kernel@lists.infradead.org
+L:	linux-arm-kernel@lists.infradead.org (moderated for non-subscribers)
 S:	Maintained
 T:	git git://git.kernel.org/pub/scm/linux/kernel/git/rafael/linux-pm.git
 F:	drivers/cpuidle/cpuidle-big_little.c
@@ -4879,14 +4893,14 @@ CPUIDLE DRIVER - ARM PSCI
 M:	Lorenzo Pieralisi <lorenzo.pieralisi@arm.com>
 M:	Sudeep Holla <sudeep.holla@arm.com>
 L:	linux-pm@vger.kernel.org
-L:	linux-arm-kernel@lists.infradead.org
+L:	linux-arm-kernel@lists.infradead.org (moderated for non-subscribers)
 S:	Supported
 F:	drivers/cpuidle/cpuidle-psci.c
 
 CPUIDLE DRIVER - ARM PSCI PM DOMAIN
 M:	Ulf Hansson <ulf.hansson@linaro.org>
 L:	linux-pm@vger.kernel.org
-L:	linux-arm-kernel@lists.infradead.org
+L:	linux-arm-kernel@lists.infradead.org (moderated for non-subscribers)
 S:	Supported
 F:	drivers/cpuidle/cpuidle-psci.h
 F:	drivers/cpuidle/cpuidle-psci-domain.c
@@ -5140,6 +5154,17 @@ F:	net/ax25/ax25_out.c
 F:	net/ax25/ax25_timer.c
 F:	net/ax25/sysctl_net_ax25.c
 
+DATA ACCESS MONITOR
+M:	SeongJae Park <sjpark@amazon.de>
+L:	linux-mm@kvack.org
+S:	Maintained
+F:	Documentation/admin-guide/mm/damon/
+F:	Documentation/vm/damon/
+F:	include/linux/damon.h
+F:	include/trace/events/damon.h
+F:	mm/damon/
+F:	tools/testing/selftests/damon/
+
 DAVICOM FAST ETHERNET (DMFE) NETWORK DRIVER
 L:	netdev@vger.kernel.org
 S:	Orphan
@@ -6846,7 +6871,6 @@ F:	Documentation/admin-guide/media/em28xx*
 F:	drivers/media/usb/em28xx/
 
 EMBEDDED LINUX
-M:	Paul Gortmaker <paul.gortmaker@windriver.com>
 M:	Matt Mackall <mpm@selenic.com>
 M:	David Woodhouse <dwmw2@infradead.org>
 L:	linux-embedded@vger.kernel.org
@@ -7247,7 +7271,7 @@ F:	tools/firewire/
 
 FIRMWARE FRAMEWORK FOR ARMV8-A
 M:	Sudeep Holla <sudeep.holla@arm.com>
-L:	linux-arm-kernel@lists.infradead.org
+L:	linux-arm-kernel@lists.infradead.org (moderated for non-subscribers)
 S:	Maintained
 F:	drivers/firmware/arm_ffa/
 F:	include/linux/arm_ffa.h
@@ -7426,7 +7450,7 @@ F:	include/linux/platform_data/video-imxfb.h
 
 FREESCALE IMX DDR PMU DRIVER
 M:	Frank Li <Frank.li@nxp.com>
-L:	linux-arm-kernel@lists.infradead.org
+L:	linux-arm-kernel@lists.infradead.org (moderated for non-subscribers)
 S:	Maintained
 F:	Documentation/admin-guide/perf/imx-ddr.rst
 F:	Documentation/devicetree/bindings/perf/fsl-imx-ddr.yaml
@@ -7518,7 +7542,7 @@ F:	drivers/tty/serial/ucc_uart.c
 FREESCALE SOC DRIVERS
 M:	Li Yang <leoyang.li@nxp.com>
 L:	linuxppc-dev@lists.ozlabs.org
-L:	linux-arm-kernel@lists.infradead.org
+L:	linux-arm-kernel@lists.infradead.org (moderated for non-subscribers)
 S:	Maintained
 F:	Documentation/devicetree/bindings/misc/fsl,dpaa2-console.yaml
 F:	Documentation/devicetree/bindings/soc/fsl/
@@ -7566,7 +7590,7 @@ W:	ftp://ftp.openlinux.org/pub/people/hch/vxfs
 F:	fs/freevxfs/
 
 FREEZER
-M:	"Rafael J. Wysocki" <rjw@rjwysocki.net>
+M:	"Rafael J. Wysocki" <rafael@kernel.org>
 M:	Pavel Machek <pavel@ucw.cz>
 L:	linux-pm@vger.kernel.org
 S:	Supported
@@ -7819,7 +7843,7 @@ S:	Supported
 F:	drivers/i2c/muxes/i2c-demux-pinctrl.c
 
 GENERIC PM DOMAINS
-M:	"Rafael J. Wysocki" <rjw@rjwysocki.net>
+M:	"Rafael J. Wysocki" <rafael@kernel.org>
 M:	Kevin Hilman <khilman@kernel.org>
 M:	Ulf Hansson <ulf.hansson@linaro.org>
 L:	linux-pm@vger.kernel.org
@@ -8285,7 +8309,7 @@ W:	http://drama.obuda.kando.hu/~fero/cgi-bin/hgafb.shtml
 F:	drivers/video/fbdev/hgafb.c
 
 HIBERNATION (aka Software Suspend, aka swsusp)
-M:	"Rafael J. Wysocki" <rjw@rjwysocki.net>
+M:	"Rafael J. Wysocki" <rafael@kernel.org>
 M:	Pavel Machek <pavel@ucw.cz>
 L:	linux-pm@vger.kernel.org
 S:	Supported
@@ -8446,7 +8470,7 @@ F:	drivers/crypto/hisilicon/sgl.c
 F:	drivers/crypto/hisilicon/zip/
 
 HISILICON ROCE DRIVER
-M:	Lijun Ou <oulijun@huawei.com>
+M:	Wenpeng Liang <liangwenpeng@huawei.com>
 M:	Weihang Li <liweihang@huawei.com>
 L:	linux-rdma@vger.kernel.org
 S:	Maintained
@@ -9269,13 +9293,20 @@ INTEL ATOMISP2 DUMMY / POWER-MANAGEMENT DRIVER
 M:	Hans de Goede <hdegoede@redhat.com>
 L:	platform-driver-x86@vger.kernel.org
 S:	Maintained
-F:	drivers/platform/x86/intel_atomisp2_pm.c
+F:	drivers/platform/x86/intel/atomisp2/pm.c
 
 INTEL ATOMISP2 LED DRIVER
 M:	Hans de Goede <hdegoede@redhat.com>
 L:	platform-driver-x86@vger.kernel.org
 S:	Maintained
-F:	drivers/platform/x86/intel_atomisp2_led.c
+F:	drivers/platform/x86/intel/atomisp2/led.c
+
+INTEL BIOS SAR INT1092 DRIVER
+M:	Shravan S <s.shravan@intel.com>
+M:	Intel Corporation <linuxwwan@intel.com>
+L:	platform-driver-x86@vger.kernel.org
+S:	Maintained
+F:	drivers/platform/x86/intel/int1092/
 
 INTEL BROXTON PMC DRIVER
 M:	Mika Westerberg <mika.westerberg@linux.intel.com>
@@ -9371,7 +9402,7 @@ INTEL HID EVENT DRIVER
 M:	Alex Hung <alex.hung@canonical.com>
 L:	platform-driver-x86@vger.kernel.org
 S:	Maintained
-F:	drivers/platform/x86/intel-hid.c
+F:	drivers/platform/x86/intel/hid.c
 
 INTEL I/OAT DMA DRIVER
 M:	Dave Jiang <dave.jiang@intel.com>
@@ -9515,17 +9546,17 @@ F:	include/linux/mfd/intel-m10-bmc.h
 
 INTEL MENLOW THERMAL DRIVER
 M:	Sujith Thomas <sujith.thomas@intel.com>
-L:	platform-driver-x86@vger.kernel.org
+L:	linux-pm@vger.kernel.org
 S:	Supported
 W:	https://01.org/linux-acpi
-F:	drivers/platform/x86/intel_menlow.c
+F:	drivers/thermal/intel/intel_menlow.c
 
 INTEL P-Unit IPC DRIVER
 M:	Zha Qipeng <qipeng.zha@intel.com>
 L:	platform-driver-x86@vger.kernel.org
 S:	Maintained
 F:	arch/x86/include/asm/intel_punit_ipc.h
-F:	drivers/platform/x86/intel_punit_ipc.c
+F:	drivers/platform/x86/intel/punit_ipc.c
 
 INTEL PMC CORE DRIVER
 M:	Rajneesh Bhardwaj <irenic.rajneesh@gmail.com>
@@ -9533,7 +9564,7 @@ M:	David E Box <david.e.box@intel.com>
 L:	platform-driver-x86@vger.kernel.org
 S:	Maintained
 F:	Documentation/ABI/testing/sysfs-platform-intel-pmc
-F:	drivers/platform/x86/intel_pmc_core*
+F:	drivers/platform/x86/intel/pmc/
 
 INTEL PMIC GPIO DRIVERS
 M:	Andy Shevchenko <andy@kernel.org>
@@ -9551,7 +9582,7 @@ INTEL PMT DRIVER
 M:	"David E. Box" <david.e.box@linux.intel.com>
 S:	Maintained
 F:	drivers/mfd/intel_pmt.c
-F:	drivers/platform/x86/intel_pmt_*
+F:	drivers/platform/x86/intel/pmt/
 
 INTEL PRO/WIRELESS 2100, 2200BG, 2915ABG NETWORK CONNECTION SUPPORT
 M:	Stanislav Yakovlev <stas.yakovlev@gmail.com>
@@ -9588,7 +9619,7 @@ INTEL SPEED SELECT TECHNOLOGY
 M:	Srinivas Pandruvada <srinivas.pandruvada@linux.intel.com>
 L:	platform-driver-x86@vger.kernel.org
 S:	Maintained
-F:	drivers/platform/x86/intel_speed_select_if/
+F:	drivers/platform/x86/intel/speed_select_if/
 F:	include/uapi/linux/isst_if.h
 F:	tools/power/x86/intel-speed-select/
 
@@ -9609,19 +9640,19 @@ M:	"David E. Box" <david.e.box@linux.intel.com>
 L:	platform-driver-x86@vger.kernel.org
 S:	Maintained
 F:	arch/x86/include/asm/intel_telemetry.h
-F:	drivers/platform/x86/intel_telemetry*
+F:	drivers/platform/x86/intel/telemetry/
 
 INTEL UNCORE FREQUENCY CONTROL
 M:	Srinivas Pandruvada <srinivas.pandruvada@linux.intel.com>
 L:	platform-driver-x86@vger.kernel.org
 S:	Maintained
-F:	drivers/platform/x86/intel-uncore-frequency.c
+F:	drivers/platform/x86/intel/uncore-frequency.c
 
 INTEL VIRTUAL BUTTON DRIVER
 M:	AceLan Kao <acelan.kao@canonical.com>
 L:	platform-driver-x86@vger.kernel.org
 S:	Maintained
-F:	drivers/platform/x86/intel-vbtn.c
+F:	drivers/platform/x86/intel/vbtn.c
 
 INTEL WIRELESS 3945ABG/BG, 4965AGN (iwlegacy)
 M:	Stanislaw Gruszka <stf_xl@wp.pl>
@@ -9642,12 +9673,12 @@ M:	Jithu Joseph <jithu.joseph@intel.com>
 R:	Maurice Ma <maurice.ma@intel.com>
 S:	Maintained
 W:	https://slimbootloader.github.io/security/firmware-update.html
-F:	drivers/platform/x86/intel-wmi-sbl-fw-update.c
+F:	drivers/platform/x86/intel/wmi/sbl-fw-update.c
 
 INTEL WMI THUNDERBOLT FORCE POWER DRIVER
 L:	Dell.Client.Kernel@dell.com
 S:	Maintained
-F:	drivers/platform/x86/intel-wmi-thunderbolt.c
+F:	drivers/platform/x86/intel/wmi/thunderbolt.c
 
 INTEL WWAN IOSM DRIVER
 M:	M Chetan Kumar <m.chetan.kumar@intel.com>
@@ -10103,6 +10134,7 @@ F:	fs/autofs/
 KERNEL BUILD + files below scripts/ (unless maintained elsewhere)
 M:	Masahiro Yamada <masahiroy@kernel.org>
 M:	Michal Marek <michal.lkml@markovi.net>
+R:	Nick Desaulniers <ndesaulniers@google.com>
 L:	linux-kbuild@vger.kernel.org
 S:	Maintained
 T:	git git://git.kernel.org/pub/scm/linux/kernel/git/masahiroy/linux-kbuild.git
@@ -10590,10 +10622,10 @@ T:	git git://git.kernel.org/pub/scm/linux/kernel/git/axboe/linux-block.git
 F:	drivers/ata/sata_promise.*
 
 LIBATA SUBSYSTEM (Serial and Parallel ATA drivers)
-M:	Jens Axboe <axboe@kernel.dk>
+M:	Damien Le Moal <damien.lemoal@opensource.wdc.com>
 L:	linux-ide@vger.kernel.org
 S:	Maintained
-T:	git git://git.kernel.org/pub/scm/linux/kernel/git/axboe/linux-block.git
+T:	git git://git.kernel.org/pub/scm/linux/kernel/git/dlemoal/libata.git
 F:	Documentation/devicetree/bindings/ata/
 F:	drivers/ata/
 F:	include/linux/ata.h
@@ -11106,7 +11138,7 @@ MARDUK (CREATOR CI40) DEVICE TREE SUPPORT
 M:	Rahul Bedarkar <rahulbedarkar89@gmail.com>
 L:	linux-mips@vger.kernel.org
 S:	Maintained
-F:	arch/mips/boot/dts/img/pistachio_marduk.dts
+F:	arch/mips/boot/dts/img/pistachio*
 
 MARVELL 88E6XXX ETHERNET SWITCH FABRIC DRIVER
 M:	Andrew Lunn <andrew@lunn.ch>
@@ -11158,7 +11190,7 @@ F:	drivers/net/wireless/marvell/libertas/
 
 MARVELL MACCHIATOBIN SUPPORT
 M:	Russell King <linux@armlinux.org.uk>
-L:	linux-arm-kernel@lists.infradead.org
+L:	linux-arm-kernel@lists.infradead.org (moderated for non-subscribers)
 S:	Maintained
 F:	arch/arm64/boot/dts/marvell/armada-8040-mcbin.dts
 
@@ -12606,6 +12638,7 @@ Q:	http://patchwork.linuxtv.org/project/linux-media/list/
 F:	drivers/media/dvb-frontends/mn88473*
 
 MODULE SUPPORT
+M:	Luis Chamberlain <mcgrof@kernel.org>
 M:	Jessica Yu <jeyu@kernel.org>
 S:	Maintained
 T:	git git://git.kernel.org/pub/scm/linux/kernel/git/jeyu/linux.git modules-next
@@ -13325,6 +13358,15 @@ T:	git git://git.kernel.org/pub/scm/linux/kernel/git/aia21/ntfs.git
 F:	Documentation/filesystems/ntfs.rst
 F:	fs/ntfs/
 
+NTFS3 FILESYSTEM
+M:	Konstantin Komarov <almaz.alexandrovich@paragon-software.com>
+L:	ntfs3@lists.linux.dev
+S:	Supported
+W:	http://www.paragon-software.com/
+T:	git https://github.com/Paragon-Software-Group/linux-ntfs3.git
+F:	Documentation/filesystems/ntfs3.rst
+F:	fs/ntfs3/
+
 NUBUS SUBSYSTEM
 M:	Finn Thain <fthain@linux-m68k.org>
 L:	linux-m68k@lists.linux-m68k.org
@@ -13367,7 +13409,7 @@ F:	include/linux/nvme-fc.h
 NVM EXPRESS TARGET DRIVER
 M:	Christoph Hellwig <hch@lst.de>
 M:	Sagi Grimberg <sagi@grimberg.me>
-M:	Chaitanya Kulkarni <chaitanya.kulkarni@wdc.com>
+M:	Chaitanya Kulkarni <kch@nvidia.com>
 L:	linux-nvme@lists.infradead.org
 S:	Supported
 W:	http://git.infradead.org/nvme.git
@@ -14229,7 +14271,7 @@ F:	drivers/pci/controller/pcie-altera.c
 PCI DRIVER FOR APPLIEDMICRO XGENE
 M:	Toan Le <toan@os.amperecomputing.com>
 L:	linux-pci@vger.kernel.org
-L:	linux-arm-kernel@lists.infradead.org
+L:	linux-arm-kernel@lists.infradead.org (moderated for non-subscribers)
 S:	Maintained
 F:	Documentation/devicetree/bindings/pci/xgene-pci.txt
 F:	drivers/pci/controller/pci-xgene.c
@@ -14237,7 +14279,7 @@ F:	drivers/pci/controller/pci-xgene.c
 PCI DRIVER FOR ARM VERSATILE PLATFORM
 M:	Rob Herring <robh@kernel.org>
 L:	linux-pci@vger.kernel.org
-L:	linux-arm-kernel@lists.infradead.org
+L:	linux-arm-kernel@lists.infradead.org (moderated for non-subscribers)
 S:	Maintained
 F:	Documentation/devicetree/bindings/pci/versatile.yaml
 F:	drivers/pci/controller/pci-versatile.c
@@ -14245,7 +14287,7 @@ F:	drivers/pci/controller/pci-versatile.c
 PCI DRIVER FOR ARMADA 8K
 M:	Thomas Petazzoni <thomas.petazzoni@bootlin.com>
 L:	linux-pci@vger.kernel.org
-L:	linux-arm-kernel@lists.infradead.org
+L:	linux-arm-kernel@lists.infradead.org (moderated for non-subscribers)
 S:	Maintained
 F:	Documentation/devicetree/bindings/pci/pci-armada8k.txt
 F:	drivers/pci/controller/dwc/pcie-armada8k.c
@@ -14263,7 +14305,7 @@ M:	Mingkai Hu <mingkai.hu@nxp.com>
 M:	Roy Zang <roy.zang@nxp.com>
 L:	linuxppc-dev@lists.ozlabs.org
 L:	linux-pci@vger.kernel.org
-L:	linux-arm-kernel@lists.infradead.org
+L:	linux-arm-kernel@lists.infradead.org (moderated for non-subscribers)
 S:	Maintained
 F:	drivers/pci/controller/dwc/*layerscape*
 
@@ -14343,7 +14385,7 @@ F:	drivers/pci/controller/pci-tegra.c
 PCI DRIVER FOR NXP LAYERSCAPE GEN4 CONTROLLER
 M:	Hou Zhiqiang <Zhiqiang.Hou@nxp.com>
 L:	linux-pci@vger.kernel.org
-L:	linux-arm-kernel@lists.infradead.org
+L:	linux-arm-kernel@lists.infradead.org (moderated for non-subscribers)
 S:	Maintained
 F:	Documentation/devicetree/bindings/pci/layerscape-pcie-gen4.txt
 F:	drivers/pci/controller/mobiveil/pcie-layerscape-gen4.c
@@ -14378,7 +14420,7 @@ PCI DRIVER FOR TI DRA7XX/J721E
 M:	Kishon Vijay Abraham I <kishon@ti.com>
 L:	linux-omap@vger.kernel.org
 L:	linux-pci@vger.kernel.org
-L:	linux-arm-kernel@lists.infradead.org
+L:	linux-arm-kernel@lists.infradead.org (moderated for non-subscribers)
 S:	Supported
 F:	Documentation/devicetree/bindings/pci/ti-pci.txt
 F:	drivers/pci/controller/cadence/pci-j721e.c
@@ -14434,7 +14476,7 @@ F:	drivers/pci/controller/pcie-altera-msi.c
 PCI MSI DRIVER FOR APPLIEDMICRO XGENE
 M:	Toan Le <toan@os.amperecomputing.com>
 L:	linux-pci@vger.kernel.org
-L:	linux-arm-kernel@lists.infradead.org
+L:	linux-arm-kernel@lists.infradead.org (moderated for non-subscribers)
 S:	Maintained
 F:	Documentation/devicetree/bindings/pci/xgene-pci-msi.txt
 F:	drivers/pci/controller/pci-xgene-msi.c
@@ -14518,6 +14560,13 @@ S:	Maintained
 F:	Documentation/devicetree/bindings/pci/hisilicon-histb-pcie.txt
 F:	drivers/pci/controller/dwc/pcie-histb.c
 
+PCIE DRIVER FOR INTEL KEEM BAY
+M:	Srikanth Thokala <srikanth.thokala@intel.com>
+L:	linux-pci@vger.kernel.org
+S:	Supported
+F:	Documentation/devicetree/bindings/pci/intel,keembay-pcie*
+F:	drivers/pci/controller/dwc/pcie-keembay.c
+
 PCIE DRIVER FOR INTEL LGM GW SOC
 M:	Rahul Tanwar <rtanwar@maxlinear.com>
 L:	linux-pci@vger.kernel.org
@@ -14720,6 +14769,12 @@ F:	Documentation/driver-api/pin-control.rst
 F:	drivers/pinctrl/
 F:	include/linux/pinctrl/
 
+PIN CONTROLLER - AMD
+M:	Basavaraj Natikar <Basavaraj.Natikar@amd.com>
+M:	Shyam Sundar S K <Shyam-sundar.S-k@amd.com>
+S:	Maintained
+F:	drivers/pinctrl/pinctrl-amd.c
+
 PIN CONTROLLER - FREESCALE
 M:	Dong Aisheng <aisheng.dong@nxp.com>
 M:	Fabio Estevam <festevam@gmail.com>
@@ -14738,12 +14793,19 @@ S:	Maintained
 T:	git git://git.kernel.org/pub/scm/linux/kernel/git/pinctrl/intel.git
 F:	drivers/pinctrl/intel/
 
+PIN CONTROLLER - KEEMBAY
+M:	Lakshmi Sowjanya D <lakshmi.sowjanya.d@intel.com>
+S:	Supported
+F:	drivers/pinctrl/pinctrl-keembay*
+
 PIN CONTROLLER - MEDIATEK
 M:	Sean Wang <sean.wang@kernel.org>
 L:	linux-mediatek@lists.infradead.org (moderated for non-subscribers)
 S:	Maintained
-F:	Documentation/devicetree/bindings/pinctrl/pinctrl-mt65xx.txt
-F:	Documentation/devicetree/bindings/pinctrl/pinctrl-mt7622.txt
+F:	Documentation/devicetree/bindings/pinctrl/mediatek,mt65xx-pinctrl.yaml
+F:	Documentation/devicetree/bindings/pinctrl/mediatek,mt6797-pinctrl.yaml
+F:	Documentation/devicetree/bindings/pinctrl/mediatek,mt7622-pinctrl.yaml
+F:	Documentation/devicetree/bindings/pinctrl/mediatek,mt8183-pinctrl.yaml
 F:	drivers/pinctrl/mediatek/
 
 PIN CONTROLLER - MICROCHIP AT91
@@ -14797,14 +14859,6 @@ S:	Maintained
 W:	http://www.st.com/spear
 F:	drivers/pinctrl/spear/
 
-PISTACHIO SOC SUPPORT
-M:	James Hartley <james.hartley@sondrel.com>
-L:	linux-mips@vger.kernel.org
-S:	Odd Fixes
-F:	arch/mips/boot/dts/img/pistachio*
-F:	arch/mips/configs/pistachio*_defconfig
-F:	arch/mips/pistachio/
-
 PKTCDVD DRIVER
 M:	linux-block@vger.kernel.org
 S:	Orphan
@@ -14914,7 +14968,7 @@ F:	kernel/time/*timer*
 F:	kernel/time/namespace.c
 
 POWER MANAGEMENT CORE
-M:	"Rafael J. Wysocki" <rjw@rjwysocki.net>
+M:	"Rafael J. Wysocki" <rafael@kernel.org>
 L:	linux-pm@vger.kernel.org
 S:	Supported
 B:	https://bugzilla.kernel.org
@@ -14939,7 +14993,7 @@ F:	include/linux/dtpm.h
 POWER STATE COORDINATION INTERFACE (PSCI)
 M:	Mark Rutland <mark.rutland@arm.com>
 M:	Lorenzo Pieralisi <lorenzo.pieralisi@arm.com>
-L:	linux-arm-kernel@lists.infradead.org
+L:	linux-arm-kernel@lists.infradead.org (moderated for non-subscribers)
 S:	Maintained
 F:	drivers/firmware/psci/
 F:	include/linux/psci.h
@@ -15464,7 +15518,7 @@ F:	arch/hexagon/
 
 QUALCOMM HIDMA DRIVER
 M:	Sinan Kaya <okaya@kernel.org>
-L:	linux-arm-kernel@lists.infradead.org
+L:	linux-arm-kernel@lists.infradead.org (moderated for non-subscribers)
 L:	linux-arm-msm@vger.kernel.org
 L:	dmaengine@vger.kernel.org
 S:	Supported
@@ -16512,10 +16566,14 @@ L:	linux-samsung-soc@vger.kernel.org
 S:	Supported
 T:	git git://git.kernel.org/pub/scm/linux/kernel/git/snawrocki/clk.git
 F:	Documentation/devicetree/bindings/clock/exynos*.txt
+F:	Documentation/devicetree/bindings/clock/samsung,*.yaml
 F:	Documentation/devicetree/bindings/clock/samsung,s3c*
 F:	Documentation/devicetree/bindings/clock/samsung,s5p*
 F:	drivers/clk/samsung/
 F:	include/dt-bindings/clock/exynos*.h
+F:	include/dt-bindings/clock/s3c*.h
+F:	include/dt-bindings/clock/s5p*.h
+F:	include/dt-bindings/clock/samsung,*.h
 F:	include/linux/clk/samsung.h
 F:	include/linux/platform_data/clk-s3c2410.h
 
@@ -17174,7 +17232,7 @@ SECURE MONITOR CALL(SMC) CALLING CONVENTION (SMCCC)
 M:	Mark Rutland <mark.rutland@arm.com>
 M:	Lorenzo Pieralisi <lorenzo.pieralisi@arm.com>
 M:	Sudeep Holla <sudeep.holla@arm.com>
-L:	linux-arm-kernel@lists.infradead.org
+L:	linux-arm-kernel@lists.infradead.org (moderated for non-subscribers)
 S:	Maintained
 F:	drivers/firmware/smccc/
 F:	include/linux/arm-smccc.h
@@ -17291,7 +17349,7 @@ F:	drivers/media/pci/solo6x10/
 
 SOFTWARE DELEGATED EXCEPTION INTERFACE (SDEI)
 M:	James Morse <james.morse@arm.com>
-L:	linux-arm-kernel@lists.infradead.org
+L:	linux-arm-kernel@lists.infradead.org (moderated for non-subscribers)
 S:	Maintained
 F:	Documentation/devicetree/bindings/arm/firmware/sdei.txt
 F:	drivers/firmware/arm_sdei.c
@@ -17888,7 +17946,7 @@ F:	arch/sh/
 F:	drivers/sh/
 
 SUSPEND TO RAM
-M:	"Rafael J. Wysocki" <rjw@rjwysocki.net>
+M:	"Rafael J. Wysocki" <rafael@kernel.org>
 M:	Len Brown <len.brown@intel.com>
 M:	Pavel Machek <pavel@ucw.cz>
 L:	linux-pm@vger.kernel.org
@@ -18078,7 +18136,7 @@ F:	drivers/mfd/syscon.c
 SYSTEM CONTROL & POWER/MANAGEMENT INTERFACE (SCPI/SCMI) Message Protocol drivers
 M:	Sudeep Holla <sudeep.holla@arm.com>
 R:	Cristian Marussi <cristian.marussi@arm.com>
-L:	linux-arm-kernel@lists.infradead.org
+L:	linux-arm-kernel@lists.infradead.org (moderated for non-subscribers)
 S:	Maintained
 F:	Documentation/devicetree/bindings/firmware/arm,sc[mp]i.yaml
 F:	drivers/clk/clk-sc[mp]i.c
@@ -18451,7 +18509,7 @@ TEXAS INSTRUMENTS' SYSTEM CONTROL INTERFACE (TISCI) PROTOCOL DRIVER
 M:	Nishanth Menon <nm@ti.com>
 M:	Tero Kristo <kristo@kernel.org>
 M:	Santosh Shilimkar <ssantosh@kernel.org>
-L:	linux-arm-kernel@lists.infradead.org
+L:	linux-arm-kernel@lists.infradead.org (moderated for non-subscribers)
 S:	Maintained
 F:	Documentation/devicetree/bindings/arm/keystone/ti,k3-sci-common.yaml
 F:	Documentation/devicetree/bindings/arm/keystone/ti,sci.txt
@@ -18508,6 +18566,7 @@ F:	drivers/thermal/
 F:	include/linux/cpu_cooling.h
 F:	include/linux/thermal.h
 F:	include/uapi/linux/thermal.h
+F:	tools/thermal/
 
 THERMAL DRIVER FOR AMLOGIC SOCS
 M:	Guillaume La Roque <glaroque@baylibre.com>
@@ -18940,6 +18999,28 @@ F:	arch/x86/mm/testmmiotrace.c
 F:	include/linux/mmiotrace.h
 F:	kernel/trace/trace_mmiotrace.c
 
+TRACING OS NOISE / LATENCY TRACERS
+M:	Steven Rostedt <rostedt@goodmis.org>
+M:	Daniel Bristot de Oliveira <bristot@kernel.org>
+S:	Maintained
+F:	kernel/trace/trace_osnoise.c
+F:	include/trace/events/osnoise.h
+F:	kernel/trace/trace_hwlat.c
+F:	kernel/trace/trace_irqsoff.c
+F:	kernel/trace/trace_sched_wakeup.c
+F:	Documentation/trace/osnoise-tracer.rst
+F:	Documentation/trace/timerlat-tracer.rst
+F:	Documentation/trace/hwlat_detector.rst
+F:	arch/*/kernel/trace.c
+
+TRADITIONAL CHINESE DOCUMENTATION
+M:	Hu Haowen <src.res@email.cn>
+L:	linux-doc-tw-discuss@lists.sourceforge.net
+S:	Maintained
+W:	https://github.com/srcres258/linux-doc
+T:	git git://github.com/srcres258/linux-doc.git doc-zh-tw
+F:	Documentation/translations/zh_TW/
+
 TRIVIAL PATCHES
 M:	Jiri Kosina <trivial@kernel.org>
 S:	Maintained
@@ -19112,9 +19193,8 @@ W:	http://dotat.at/prog/unifdef
 F:	scripts/unifdef.c
 
 UNIFORM CDROM DRIVER
-M:	Jens Axboe <axboe@kernel.dk>
+M:	Phillip Potter <phil@philpotter.co.uk>
 S:	Maintained
-W:	http://www.kernel.dk
 F:	Documentation/cdrom/
 F:	drivers/cdrom/cdrom.c
 F:	include/linux/cdrom.h
@@ -19599,6 +19679,7 @@ T:	git git://github.com/awilliam/linux-vfio.git
 F:	Documentation/driver-api/vfio.rst
 F:	drivers/vfio/
 F:	include/linux/vfio.h
+F:	include/linux/vfio_pci_core.h
 F:	include/uapi/linux/vfio.h
 
 VFIO FSL-MC DRIVER
@@ -19707,18 +19788,11 @@ L:	kvm@vger.kernel.org
 L:	virtualization@lists.linux-foundation.org
 L:	netdev@vger.kernel.org
 S:	Maintained
-F:	drivers/net/vsockmon.c
 F:	drivers/vhost/vsock.c
 F:	include/linux/virtio_vsock.h
 F:	include/uapi/linux/virtio_vsock.h
-F:	include/uapi/linux/vm_sockets_diag.h
-F:	include/uapi/linux/vsockmon.h
-F:	net/vmw_vsock/af_vsock_tap.c
-F:	net/vmw_vsock/diag.c
 F:	net/vmw_vsock/virtio_transport.c
 F:	net/vmw_vsock/virtio_transport_common.c
-F:	net/vmw_vsock/vsock_loopback.c
-F:	tools/testing/vsock/
 
 VIRTIO BLOCK AND SCSI DRIVERS
 M:	"Michael S. Tsirkin" <mst@redhat.com>
@@ -19797,6 +19871,15 @@ F:	Documentation/filesystems/virtiofs.rst
 F:	fs/fuse/virtio_fs.c
 F:	include/uapi/linux/virtio_fs.h
 
+VIRTIO GPIO DRIVER
+M:	Enrico Weigelt, metux IT consult <info@metux.net>
+M:	Viresh Kumar <vireshk@kernel.org>
+L:	linux-gpio@vger.kernel.org
+L:	virtualization@lists.linux-foundation.org
+S:	Maintained
+F:	drivers/gpio/gpio-virtio.c
+F:	include/uapi/linux/virtio_gpio.h
+
 VIRTIO GPU DRIVER
 M:	David Airlie <airlied@linux.ie>
 M:	Gerd Hoffmann <kraxel@redhat.com>
@@ -19914,6 +19997,19 @@ F:	drivers/staging/vme/
 F:	drivers/vme/
 F:	include/linux/vme*
 
+VM SOCKETS (AF_VSOCK)
+M:	Stefano Garzarella <sgarzare@redhat.com>
+L:	virtualization@lists.linux-foundation.org
+L:	netdev@vger.kernel.org
+S:	Maintained
+F:	drivers/net/vsockmon.c
+F:	include/net/af_vsock.h
+F:	include/uapi/linux/vm_sockets.h
+F:	include/uapi/linux/vm_sockets_diag.h
+F:	include/uapi/linux/vsockmon.h
+F:	net/vmw_vsock/
+F:	tools/testing/vsock/
+
 VMWARE BALLOON DRIVER
 M:	Nadav Amit <namit@vmware.com>
 M:	"VMware, Inc." <pv-drivers@vmware.com>
@@ -20491,7 +20587,7 @@ R:	Srinivas Neeli <srinivas.neeli@xilinx.com>
 R:	Michal Simek <michal.simek@xilinx.com>
 S:	Maintained
 F:	Documentation/devicetree/bindings/gpio/gpio-xilinx.txt
-F:	Documentation/devicetree/bindings/gpio/gpio-zynq.txt
+F:	Documentation/devicetree/bindings/gpio/gpio-zynq.yaml
 F:	drivers/gpio/gpio-xilinx.c
 F:	drivers/gpio/gpio-zynq.c
 
diff --git a/Makefile b/Makefile
index 61741e9d9c6e..7cfe4ff36f44 100644
--- a/Makefile
+++ b/Makefile
@@ -1,8 +1,8 @@
 # SPDX-License-Identifier: GPL-2.0
 VERSION = 5
-PATCHLEVEL = 14
+PATCHLEVEL = 15
 SUBLEVEL = 0
-EXTRAVERSION =
+EXTRAVERSION = -rc1
 NAME = Opossums on Parade
 
 # *DOCUMENTATION*
@@ -191,10 +191,9 @@ endif
 ifneq ($(abs_srctree),$(abs_objtree))
 # Look for make include files relative to root of kernel src
 #
-# This does not become effective immediately because MAKEFLAGS is re-parsed
-# once after the Makefile is read. We need to invoke sub-make.
+# --included-dir is added for backward compatibility, but you should not rely on
+# it. Please add $(srctree)/ prefix to include Makefiles in the source tree.
 MAKEFLAGS += --include-dir=$(abs_srctree)
-need-sub-make := 1
 endif
 
 ifneq ($(filter 3.%,$(MAKE_VERSION)),)
@@ -404,6 +403,11 @@ ifeq ($(ARCH),sparc64)
        SRCARCH := sparc
 endif
 
+# Additional ARCH settings for parisc
+ifeq ($(ARCH),parisc64)
+       SRCARCH := parisc
+endif
+
 export cross_compiling :=
 ifneq ($(SRCARCH),$(SUBARCH))
 cross_compiling := 1
@@ -582,23 +586,10 @@ endif
 # Some architectures define CROSS_COMPILE in arch/$(SRCARCH)/Makefile.
 # CC_VERSION_TEXT is referenced from Kconfig (so it needs export),
 # and from include/config/auto.conf.cmd to detect the compiler upgrade.
-CC_VERSION_TEXT = $(subst $(pound),,$(shell $(CC) --version 2>/dev/null | head -n 1))
+CC_VERSION_TEXT = $(subst $(pound),,$(shell LC_ALL=C $(CC) --version 2>/dev/null | head -n 1))
 
 ifneq ($(findstring clang,$(CC_VERSION_TEXT)),)
-ifneq ($(CROSS_COMPILE),)
-CLANG_FLAGS	+= --target=$(notdir $(CROSS_COMPILE:%-=%))
-endif
-ifeq ($(LLVM_IAS),1)
-CLANG_FLAGS	+= -integrated-as
-else
-CLANG_FLAGS	+= -no-integrated-as
-GCC_TOOLCHAIN_DIR := $(dir $(shell which $(CROSS_COMPILE)elfedit))
-CLANG_FLAGS	+= --prefix=$(GCC_TOOLCHAIN_DIR)$(notdir $(CROSS_COMPILE))
-endif
-CLANG_FLAGS	+= -Werror=unknown-warning-option
-KBUILD_CFLAGS	+= $(CLANG_FLAGS)
-KBUILD_AFLAGS	+= $(CLANG_FLAGS)
-export CLANG_FLAGS
+include $(srctree)/scripts/Makefile.clang
 endif
 
 # Include this also for config targets because some architectures need
@@ -683,9 +674,10 @@ endif # KBUILD_EXTMOD
 # Defaults to vmlinux, but the arch makefile usually adds further targets
 all: vmlinux
 
-CFLAGS_GCOV	:= -fprofile-arcs -ftest-coverage \
-	$(call cc-option,-fno-tree-loop-im) \
-	$(call cc-disable-warning,maybe-uninitialized,)
+CFLAGS_GCOV	:= -fprofile-arcs -ftest-coverage
+ifdef CONFIG_CC_IS_GCC
+CFLAGS_GCOV	+= -fno-tree-loop-im
+endif
 export CFLAGS_GCOV
 
 # The arch Makefiles can override CC_FLAGS_FTRACE. We may also append it later.
@@ -693,12 +685,14 @@ ifdef CONFIG_FUNCTION_TRACER
   CC_FLAGS_FTRACE := -pg
 endif
 
-RETPOLINE_CFLAGS_GCC := -mindirect-branch=thunk-extern -mindirect-branch-register
-RETPOLINE_VDSO_CFLAGS_GCC := -mindirect-branch=thunk-inline -mindirect-branch-register
-RETPOLINE_CFLAGS_CLANG := -mretpoline-external-thunk
-RETPOLINE_VDSO_CFLAGS_CLANG := -mretpoline
-RETPOLINE_CFLAGS := $(call cc-option,$(RETPOLINE_CFLAGS_GCC),$(call cc-option,$(RETPOLINE_CFLAGS_CLANG)))
-RETPOLINE_VDSO_CFLAGS := $(call cc-option,$(RETPOLINE_VDSO_CFLAGS_GCC),$(call cc-option,$(RETPOLINE_VDSO_CFLAGS_CLANG)))
+ifdef CONFIG_CC_IS_GCC
+RETPOLINE_CFLAGS	:= $(call cc-option,-mindirect-branch=thunk-extern -mindirect-branch-register)
+RETPOLINE_VDSO_CFLAGS	:= $(call cc-option,-mindirect-branch=thunk-inline -mindirect-branch-register)
+endif
+ifdef CONFIG_CC_IS_CLANG
+RETPOLINE_CFLAGS	:= -mretpoline-external-thunk
+RETPOLINE_VDSO_CFLAGS	:= -mretpoline
+endif
 export RETPOLINE_CFLAGS
 export RETPOLINE_VDSO_CFLAGS
 
@@ -751,7 +745,7 @@ include/config/auto.conf:
 endif # may-sync-config
 endif # need-config
 
-KBUILD_CFLAGS	+= $(call cc-option,-fno-delete-null-pointer-checks,)
+KBUILD_CFLAGS	+= -fno-delete-null-pointer-checks
 KBUILD_CFLAGS	+= $(call cc-disable-warning,frame-address,)
 KBUILD_CFLAGS	+= $(call cc-disable-warning, format-truncation)
 KBUILD_CFLAGS	+= $(call cc-disable-warning, format-overflow)
@@ -766,17 +760,19 @@ KBUILD_CFLAGS += -Os
 endif
 
 # Tell gcc to never replace conditional load with a non-conditional one
+ifdef CONFIG_CC_IS_GCC
+# gcc-10 renamed --param=allow-store-data-races=0 to
+# -fno-allow-store-data-races.
 KBUILD_CFLAGS	+= $(call cc-option,--param=allow-store-data-races=0)
 KBUILD_CFLAGS	+= $(call cc-option,-fno-allow-store-data-races)
+endif
 
 ifdef CONFIG_READABLE_ASM
 # Disable optimizations that make assembler listings hard to read.
 # reorder blocks reorders the control in the function
 # ipa clone creates specialized cloned functions
 # partial inlining inlines only parts of functions
-KBUILD_CFLAGS += $(call cc-option,-fno-reorder-blocks,) \
-                 $(call cc-option,-fno-ipa-cp-clone,) \
-                 $(call cc-option,-fno-partial-inlining)
+KBUILD_CFLAGS += -fno-reorder-blocks -fno-ipa-cp-clone -fno-partial-inlining
 endif
 
 ifneq ($(CONFIG_FRAME_WARN),0)
@@ -789,9 +785,12 @@ stackp-flags-$(CONFIG_STACKPROTECTOR_STRONG)      := -fstack-protector-strong
 
 KBUILD_CFLAGS += $(stackp-flags-y)
 
+KBUILD_CFLAGS-$(CONFIG_WERROR) += -Werror
+KBUILD_CFLAGS += $(KBUILD_CFLAGS-y)
+
 ifdef CONFIG_CC_IS_CLANG
 KBUILD_CPPFLAGS += -Qunused-arguments
-KBUILD_CFLAGS += -Wno-format-invalid-specifier
+# The kernel builds with '-std=gnu89' so use of GNU extensions is acceptable.
 KBUILD_CFLAGS += -Wno-gnu
 # CLANG uses a _MergedGlobals as optimization, but this breaks modpost, as the
 # source of a reference will be _MergedGlobals and not on of the whitelisted names.
@@ -803,13 +802,15 @@ else
 # Disabled for clang while comment to attribute conversion happens and
 # https://github.com/ClangBuiltLinux/linux/issues/636 is discussed.
 KBUILD_CFLAGS += $(call cc-option,-Wimplicit-fallthrough=5,)
+# gcc inanely warns about local variables called 'main'
+KBUILD_CFLAGS += -Wno-main
 endif
 
 # These warnings generated too much noise in a regular build.
 # Use make W=1 to enable them (see scripts/Makefile.extrawarn)
 KBUILD_CFLAGS += $(call cc-disable-warning, unused-but-set-variable)
-
 KBUILD_CFLAGS += $(call cc-disable-warning, unused-const-variable)
+
 ifdef CONFIG_FRAME_POINTER
 KBUILD_CFLAGS	+= -fno-omit-frame-pointer -fno-optimize-sibling-calls
 else
@@ -841,6 +842,11 @@ endif
 # for the randomize_kstack_offset feature. Disable it for all compilers.
 KBUILD_CFLAGS	+= $(call cc-option, -fno-stack-clash-protection)
 
+# Clear used registers at func exit (to reduce data lifetime and ROP gadgets).
+ifdef CONFIG_ZERO_CALL_USED_REGS
+KBUILD_CFLAGS	+= -fzero-call-used-regs=used-gpr
+endif
+
 DEBUG_CFLAGS	:=
 
 # Workaround for GCC versions < 5.0
@@ -857,7 +863,7 @@ else
 DEBUG_CFLAGS	+= -g
 endif
 
-ifneq ($(LLVM_IAS),1)
+ifndef CONFIG_AS_IS_LLVM
 KBUILD_AFLAGS	+= -Wa,-gdwarf-2
 endif
 
@@ -868,8 +874,10 @@ DEBUG_CFLAGS	+= -gdwarf-$(dwarf-version-y)
 endif
 
 ifdef CONFIG_DEBUG_INFO_REDUCED
-DEBUG_CFLAGS	+= $(call cc-option, -femit-struct-debug-baseonly) \
-		   $(call cc-option,-fno-var-tracking)
+DEBUG_CFLAGS	+= -fno-var-tracking
+ifdef CONFIG_CC_IS_GCC
+DEBUG_CFLAGS	+= -femit-struct-debug-baseonly
+endif
 endif
 
 ifdef CONFIG_DEBUG_INFO_COMPRESSED
@@ -903,6 +911,7 @@ ifdef CONFIG_FTRACE_MCOUNT_USE_RECORDMCOUNT
   endif
 endif
 ifdef CONFIG_HAVE_FENTRY
+  # s390-linux-gnu-gcc did not support -mfentry until gcc-9.
   ifeq ($(call cc-option-yn, -mfentry),y)
     CC_FLAGS_FTRACE	+= -mfentry
     CC_FLAGS_USING	+= -DCC_USING_FENTRY
@@ -915,7 +924,7 @@ endif
 
 # We trigger additional mismatches with less inlining
 ifdef CONFIG_DEBUG_SECTION_MISMATCH
-KBUILD_CFLAGS += $(call cc-option, -fno-inline-functions-called-once)
+KBUILD_CFLAGS += -fno-inline-functions-called-once
 endif
 
 ifdef CONFIG_LD_DEAD_CODE_DATA_ELIMINATION
@@ -994,14 +1003,16 @@ KBUILD_CFLAGS += $(call cc-disable-warning, stringop-truncation)
 
 # We'll want to enable this eventually, but it's not going away for 5.7 at least
 KBUILD_CFLAGS += $(call cc-disable-warning, zero-length-bounds)
-KBUILD_CFLAGS += $(call cc-disable-warning, array-bounds)
+KBUILD_CFLAGS += -Wno-array-bounds
 KBUILD_CFLAGS += $(call cc-disable-warning, stringop-overflow)
 
 # Another good warning that we'll want to enable eventually
 KBUILD_CFLAGS += $(call cc-disable-warning, restrict)
 
 # Enabled with W=2, disabled by default as noisy
-KBUILD_CFLAGS += $(call cc-disable-warning, maybe-uninitialized)
+ifdef CONFIG_CC_IS_GCC
+KBUILD_CFLAGS += -Wno-maybe-uninitialized
+endif
 
 # disable invalid "can't wrap" optimizations for signed / pointers
 KBUILD_CFLAGS	+= -fno-strict-overflow
@@ -1010,7 +1021,9 @@ KBUILD_CFLAGS	+= -fno-strict-overflow
 KBUILD_CFLAGS  += -fno-stack-check
 
 # conserve stack if available
-KBUILD_CFLAGS   += $(call cc-option,-fconserve-stack)
+ifdef CONFIG_CC_IS_GCC
+KBUILD_CFLAGS   += -fconserve-stack
+endif
 
 # Prohibit date/time macros, which would make the build non-deterministic
 KBUILD_CFLAGS   += -Werror=date-time
@@ -1740,6 +1753,16 @@ clean-dirs := $(KBUILD_EXTMOD)
 clean: rm-files := $(KBUILD_EXTMOD)/Module.symvers $(KBUILD_EXTMOD)/modules.nsdeps \
 	$(KBUILD_EXTMOD)/compile_commands.json $(KBUILD_EXTMOD)/.thinlto-cache
 
+PHONY += prepare
+# now expand this into a simple variable to reduce the cost of shell evaluations
+prepare: CC_VERSION_TEXT := $(CC_VERSION_TEXT)
+prepare:
+	@if [ "$(CC_VERSION_TEXT)" != $(CONFIG_CC_VERSION_TEXT) ]; then \
+		echo >&2 "warning: the compiler differs from the one used to build the kernel"; \
+		echo >&2 "  The kernel was built by: "$(CONFIG_CC_VERSION_TEXT); \
+		echo >&2 "  You are using:           $(CC_VERSION_TEXT)"; \
+	fi
+
 PHONY += help
 help:
 	@echo  '  Building external modules.'
@@ -1751,7 +1774,7 @@ help:
 	@echo  ''
 
 # no-op for external module builds
-PHONY += prepare modules_prepare
+PHONY += modules_prepare
 
 endif # KBUILD_EXTMOD
 
diff --git a/arch/Kconfig b/arch/Kconfig
index 98db63496bab..8df1c7102643 100644
--- a/arch/Kconfig
+++ b/arch/Kconfig
@@ -197,6 +197,9 @@ config HAVE_FUNCTION_ERROR_INJECTION
 config HAVE_NMI
 	bool
 
+config TRACE_IRQFLAGS_SUPPORT
+	bool
+
 #
 # An arch should select this if it provides all these things:
 #
@@ -886,7 +889,7 @@ config HAVE_SOFTIRQ_ON_OWN_STACK
 	bool
 	help
 	  Architecture provides a function to run __do_softirq() on a
-	  seperate stack.
+	  separate stack.
 
 config PGTABLE_LEVELS
 	int
diff --git a/arch/alpha/include/asm/agp.h b/arch/alpha/include/asm/agp.h
index 7173eada1567..7874f063d000 100644
--- a/arch/alpha/include/asm/agp.h
+++ b/arch/alpha/include/asm/agp.h
@@ -6,8 +6,8 @@
 
 /* dummy for now */
 
-#define map_page_into_agp(page) 
-#define unmap_page_from_agp(page) 
+#define map_page_into_agp(page)		do { } while (0)
+#define unmap_page_from_agp(page)	do { } while (0)
 #define flush_agp_cache() mb()
 
 /* GATT allocation. Returns/accepts GATT kernel virtual address. */
diff --git a/arch/alpha/kernel/pci-sysfs.c b/arch/alpha/kernel/pci-sysfs.c
index 0021580d79ad..5808a66e2a81 100644
--- a/arch/alpha/kernel/pci-sysfs.c
+++ b/arch/alpha/kernel/pci-sysfs.c
@@ -60,6 +60,8 @@ static int __pci_mmap_fits(struct pci_dev *pdev, int num,
  * @sparse: address space type
  *
  * Use the bus mapping routines to map a PCI resource into userspace.
+ *
+ * Return: %0 on success, negative error code otherwise
  */
 static int pci_mmap_resource(struct kobject *kobj,
 			     struct bin_attribute *attr,
@@ -106,7 +108,7 @@ static int pci_mmap_resource_dense(struct file *filp, struct kobject *kobj,
 
 /**
  * pci_remove_resource_files - cleanup resource files
- * @dev: dev to cleanup
+ * @pdev: pci_dev to cleanup
  *
  * If we created resource files for @dev, remove them from sysfs and
  * free their resources.
@@ -221,10 +223,12 @@ static int pci_create_attr(struct pci_dev *pdev, int num)
 }
 
 /**
- * pci_create_resource_files - create resource files in sysfs for @dev
- * @dev: dev in question
+ * pci_create_resource_files - create resource files in sysfs for @pdev
+ * @pdev: pci_dev in question
  *
  * Walk the resources in @dev creating files for each resource available.
+ *
+ * Return: %0 on success, or negative error code
  */
 int pci_create_resource_files(struct pci_dev *pdev)
 {
@@ -296,7 +300,7 @@ int pci_mmap_legacy_page_range(struct pci_bus *bus, struct vm_area_struct *vma,
 
 /**
  * pci_adjust_legacy_attr - adjustment of legacy file attributes
- * @b: bus to create files under
+ * @bus: bus to create files under
  * @mmap_type: I/O port or memory
  *
  * Adjust file name and size for sparse mappings.
diff --git a/arch/alpha/kernel/pci_iommu.c b/arch/alpha/kernel/pci_iommu.c
index 35d7b3096d6e..21f9ac101324 100644
--- a/arch/alpha/kernel/pci_iommu.c
+++ b/arch/alpha/kernel/pci_iommu.c
@@ -649,7 +649,9 @@ static int alpha_pci_map_sg(struct device *dev, struct scatterlist *sg,
 		sg->dma_address
 		  = pci_map_single_1(pdev, SG_ENT_VIRT_ADDRESS(sg),
 				     sg->length, dac_allowed);
-		return sg->dma_address != DMA_MAPPING_ERROR;
+		if (sg->dma_address == DMA_MAPPING_ERROR)
+			return -EIO;
+		return 1;
 	}
 
 	start = sg;
@@ -685,8 +687,10 @@ static int alpha_pci_map_sg(struct device *dev, struct scatterlist *sg,
 	if (out < end)
 		out->dma_length = 0;
 
-	if (out - start == 0)
+	if (out - start == 0) {
 		printk(KERN_WARNING "pci_map_sg failed: no entries?\n");
+		return -ENOMEM;
+	}
 	DBGA("pci_map_sg: %ld entries\n", out - start);
 
 	return out - start;
@@ -699,7 +703,7 @@ static int alpha_pci_map_sg(struct device *dev, struct scatterlist *sg,
 	   entries.  Unmap them now.  */
 	if (out > start)
 		pci_unmap_sg(pdev, start, out - start, dir);
-	return 0;
+	return -ENOMEM;
 }
 
 /* Unmap a set of streaming mode DMA translations.  Again, cpu read
diff --git a/arch/alpha/kernel/syscalls/syscall.tbl b/arch/alpha/kernel/syscalls/syscall.tbl
index 7ac22e007d52..e4a041cd5715 100644
--- a/arch/alpha/kernel/syscalls/syscall.tbl
+++ b/arch/alpha/kernel/syscalls/syscall.tbl
@@ -486,3 +486,5 @@
 554	common	landlock_create_ruleset		sys_landlock_create_ruleset
 555	common	landlock_add_rule		sys_landlock_add_rule
 556	common	landlock_restrict_self		sys_landlock_restrict_self
+# 557 reserved for memfd_secret
+558	common	process_mrelease		sys_process_mrelease
diff --git a/arch/arc/Kconfig b/arch/arc/Kconfig
index b5bf68e74732..3a5a80f302e1 100644
--- a/arch/arc/Kconfig
+++ b/arch/arc/Kconfig
@@ -49,9 +49,7 @@ config ARC
 	select PERF_USE_VMALLOC if ARC_CACHE_VIPT_ALIASING
 	select HAVE_ARCH_JUMP_LABEL if ISA_ARCV2 && !CPU_ENDIAN_BE32
 	select SET_FS
-
-config TRACE_IRQFLAGS_SUPPORT
-	def_bool y
+	select TRACE_IRQFLAGS_SUPPORT
 
 config LOCKDEP_SUPPORT
 	def_bool y
@@ -116,16 +114,9 @@ choice
 	default ARC_CPU_770 if ISA_ARCOMPACT
 	default ARC_CPU_HS if ISA_ARCV2
 
-if ISA_ARCOMPACT
-
-config ARC_CPU_750D
-	bool "ARC750D"
-	select ARC_CANT_LLSC
-	help
-	  Support for ARC750 core
-
 config ARC_CPU_770
 	bool "ARC770"
+	depends on ISA_ARCOMPACT
 	select ARC_HAS_SWAPE
 	help
 	  Support for ARC770 core introduced with Rel 4.10 (Summer 2011)
@@ -135,8 +126,6 @@ config ARC_CPU_770
 	  -Caches: New Prog Model, Region Flush
 	  -Insns: endian swap, load-locked/store-conditional, time-stamp-ctr
 
-endif #ISA_ARCOMPACT
-
 config ARC_CPU_HS
 	bool "ARC-HS"
 	depends on ISA_ARCV2
@@ -274,33 +263,17 @@ config ARC_DCCM_BASE
 
 choice
 	prompt "MMU Version"
-	default ARC_MMU_V3 if ARC_CPU_770
-	default ARC_MMU_V2 if ARC_CPU_750D
-	default ARC_MMU_V4 if ARC_CPU_HS
-
-if ISA_ARCOMPACT
-
-config ARC_MMU_V1
-	bool "MMU v1"
-	help
-	  Orig ARC700 MMU
-
-config ARC_MMU_V2
-	bool "MMU v2"
-	help
-	  Fixed the deficiency of v1 - possible thrashing in memcpy scenario
-	  when 2 D-TLB and 1 I-TLB entries index into same 2way set.
+	default ARC_MMU_V3 if ISA_ARCOMPACT
+	default ARC_MMU_V4 if ISA_ARCV2
 
 config ARC_MMU_V3
 	bool "MMU v3"
-	depends on ARC_CPU_770
+	depends on ISA_ARCOMPACT
 	help
 	  Introduced with ARC700 4.10: New Features
 	  Variable Page size (1k-16k), var JTLB size 128 x (2 or 4)
 	  Shared Address Spaces (SASID)
 
-endif
-
 config ARC_MMU_V4
 	bool "MMU v4"
 	depends on ISA_ARCV2
@@ -319,7 +292,6 @@ config ARC_PAGE_SIZE_8K
 
 config ARC_PAGE_SIZE_16K
 	bool "16KB"
-	depends on ARC_MMU_V3 || ARC_MMU_V4
 
 config ARC_PAGE_SIZE_4K
 	bool "4KB"
@@ -340,6 +312,10 @@ config ARC_HUGEPAGE_16M
 
 endchoice
 
+config PGTABLE_LEVELS
+	int "Number of Page table levels"
+	default 2
+
 config ARC_COMPACT_IRQ_LEVELS
 	depends on ISA_ARCOMPACT
 	bool "Setup Timer IRQ as high Priority"
@@ -563,9 +539,6 @@ config ARC_DW2_UNWIND
 	  If you don't debug the kernel, you can say N, but we may not be able
 	  to solve problems without frame unwind information
 
-config ARC_DBG_TLB_PARANOIA
-	bool "Paranoia Checks in Low Level TLB Handlers"
-
 config ARC_DBG_JUMP_LABEL
 	bool "Paranoid checks in Static Keys (jump labels) code"
 	depends on JUMP_LABEL
diff --git a/arch/arc/Makefile b/arch/arc/Makefile
index c0d87ac2e221..8782a03f24a8 100644
--- a/arch/arc/Makefile
+++ b/arch/arc/Makefile
@@ -18,8 +18,7 @@ ifeq ($(CONFIG_ARC_TUNE_MCPU),"")
 cflags-y				+= $(tune-mcpu-def-y)
 else
 tune-mcpu				:= $(shell echo $(CONFIG_ARC_TUNE_MCPU))
-tune-mcpu-ok 				:= $(call cc-option-yn, $(tune-mcpu))
-ifeq ($(tune-mcpu-ok),y)
+ifneq ($(call cc-option,$(tune-mcpu)),)
 cflags-y				+= $(tune-mcpu)
 else
 # The flag provided by 'CONFIG_ARC_TUNE_MCPU' option isn't known by this compiler
diff --git a/arch/arc/include/asm/atomic-llsc.h b/arch/arc/include/asm/atomic-llsc.h
new file mode 100644
index 000000000000..088d348781c1
--- /dev/null
+++ b/arch/arc/include/asm/atomic-llsc.h
@@ -0,0 +1,97 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
+
+#ifndef _ASM_ARC_ATOMIC_LLSC_H
+#define _ASM_ARC_ATOMIC_LLSC_H
+
+#define arch_atomic_set(v, i) WRITE_ONCE(((v)->counter), (i))
+
+#define ATOMIC_OP(op, c_op, asm_op)					\
+static inline void arch_atomic_##op(int i, atomic_t *v)			\
+{									\
+	unsigned int val;						\
+									\
+	__asm__ __volatile__(						\
+	"1:	llock   %[val], [%[ctr]]		\n"		\
+	"	" #asm_op " %[val], %[val], %[i]	\n"		\
+	"	scond   %[val], [%[ctr]]		\n"		\
+	"	bnz     1b				\n"		\
+	: [val]	"=&r"	(val) /* Early clobber to prevent reg reuse */	\
+	: [ctr]	"r"	(&v->counter), /* Not "m": llock only supports reg direct addr mode */	\
+	  [i]	"ir"	(i)						\
+	: "cc");							\
+}									\
+
+#define ATOMIC_OP_RETURN(op, c_op, asm_op)				\
+static inline int arch_atomic_##op##_return_relaxed(int i, atomic_t *v)	\
+{									\
+	unsigned int val;						\
+									\
+	__asm__ __volatile__(						\
+	"1:	llock   %[val], [%[ctr]]		\n"		\
+	"	" #asm_op " %[val], %[val], %[i]	\n"		\
+	"	scond   %[val], [%[ctr]]		\n"		\
+	"	bnz     1b				\n"		\
+	: [val]	"=&r"	(val)						\
+	: [ctr]	"r"	(&v->counter),					\
+	  [i]	"ir"	(i)						\
+	: "cc");							\
+									\
+	return val;							\
+}
+
+#define arch_atomic_add_return_relaxed		arch_atomic_add_return_relaxed
+#define arch_atomic_sub_return_relaxed		arch_atomic_sub_return_relaxed
+
+#define ATOMIC_FETCH_OP(op, c_op, asm_op)				\
+static inline int arch_atomic_fetch_##op##_relaxed(int i, atomic_t *v)	\
+{									\
+	unsigned int val, orig;						\
+									\
+	__asm__ __volatile__(						\
+	"1:	llock   %[orig], [%[ctr]]		\n"		\
+	"	" #asm_op " %[val], %[orig], %[i]	\n"		\
+	"	scond   %[val], [%[ctr]]		\n"		\
+	"	bnz     1b				\n"		\
+	: [val]	"=&r"	(val),						\
+	  [orig] "=&r" (orig)						\
+	: [ctr]	"r"	(&v->counter),					\
+	  [i]	"ir"	(i)						\
+	: "cc");							\
+									\
+	return orig;							\
+}
+
+#define arch_atomic_fetch_add_relaxed		arch_atomic_fetch_add_relaxed
+#define arch_atomic_fetch_sub_relaxed		arch_atomic_fetch_sub_relaxed
+
+#define arch_atomic_fetch_and_relaxed		arch_atomic_fetch_and_relaxed
+#define arch_atomic_fetch_andnot_relaxed	arch_atomic_fetch_andnot_relaxed
+#define arch_atomic_fetch_or_relaxed		arch_atomic_fetch_or_relaxed
+#define arch_atomic_fetch_xor_relaxed		arch_atomic_fetch_xor_relaxed
+
+#define ATOMIC_OPS(op, c_op, asm_op)					\
+	ATOMIC_OP(op, c_op, asm_op)					\
+	ATOMIC_OP_RETURN(op, c_op, asm_op)				\
+	ATOMIC_FETCH_OP(op, c_op, asm_op)
+
+ATOMIC_OPS(add, +=, add)
+ATOMIC_OPS(sub, -=, sub)
+
+#undef ATOMIC_OPS
+#define ATOMIC_OPS(op, c_op, asm_op)					\
+	ATOMIC_OP(op, c_op, asm_op)					\
+	ATOMIC_FETCH_OP(op, c_op, asm_op)
+
+ATOMIC_OPS(and, &=, and)
+ATOMIC_OPS(andnot, &= ~, bic)
+ATOMIC_OPS(or, |=, or)
+ATOMIC_OPS(xor, ^=, xor)
+
+#define arch_atomic_andnot		arch_atomic_andnot
+
+#undef ATOMIC_OPS
+#undef ATOMIC_FETCH_OP
+#undef ATOMIC_OP_RETURN
+#undef ATOMIC_OP
+
+#endif
diff --git a/arch/arc/include/asm/atomic-spinlock.h b/arch/arc/include/asm/atomic-spinlock.h
new file mode 100644
index 000000000000..2c830347bfb4
--- /dev/null
+++ b/arch/arc/include/asm/atomic-spinlock.h
@@ -0,0 +1,102 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
+
+#ifndef _ASM_ARC_ATOMIC_SPLOCK_H
+#define _ASM_ARC_ATOMIC_SPLOCK_H
+
+/*
+ * Non hardware assisted Atomic-R-M-W
+ * Locking would change to irq-disabling only (UP) and spinlocks (SMP)
+ */
+
+static inline void arch_atomic_set(atomic_t *v, int i)
+{
+	/*
+	 * Independent of hardware support, all of the atomic_xxx() APIs need
+	 * to follow the same locking rules to make sure that a "hardware"
+	 * atomic insn (e.g. LD) doesn't clobber an "emulated" atomic insn
+	 * sequence
+	 *
+	 * Thus atomic_set() despite being 1 insn (and seemingly atomic)
+	 * requires the locking.
+	 */
+	unsigned long flags;
+
+	atomic_ops_lock(flags);
+	WRITE_ONCE(v->counter, i);
+	atomic_ops_unlock(flags);
+}
+
+#define arch_atomic_set_release(v, i)	arch_atomic_set((v), (i))
+
+#define ATOMIC_OP(op, c_op, asm_op)					\
+static inline void arch_atomic_##op(int i, atomic_t *v)			\
+{									\
+	unsigned long flags;						\
+									\
+	atomic_ops_lock(flags);						\
+	v->counter c_op i;						\
+	atomic_ops_unlock(flags);					\
+}
+
+#define ATOMIC_OP_RETURN(op, c_op, asm_op)				\
+static inline int arch_atomic_##op##_return(int i, atomic_t *v)		\
+{									\
+	unsigned long flags;						\
+	unsigned int temp;						\
+									\
+	/*								\
+	 * spin lock/unlock provides the needed smp_mb() before/after	\
+	 */								\
+	atomic_ops_lock(flags);						\
+	temp = v->counter;						\
+	temp c_op i;							\
+	v->counter = temp;						\
+	atomic_ops_unlock(flags);					\
+									\
+	return temp;							\
+}
+
+#define ATOMIC_FETCH_OP(op, c_op, asm_op)				\
+static inline int arch_atomic_fetch_##op(int i, atomic_t *v)		\
+{									\
+	unsigned long flags;						\
+	unsigned int orig;						\
+									\
+	/*								\
+	 * spin lock/unlock provides the needed smp_mb() before/after	\
+	 */								\
+	atomic_ops_lock(flags);						\
+	orig = v->counter;						\
+	v->counter c_op i;						\
+	atomic_ops_unlock(flags);					\
+									\
+	return orig;							\
+}
+
+#define ATOMIC_OPS(op, c_op, asm_op)					\
+	ATOMIC_OP(op, c_op, asm_op)					\
+	ATOMIC_OP_RETURN(op, c_op, asm_op)				\
+	ATOMIC_FETCH_OP(op, c_op, asm_op)
+
+ATOMIC_OPS(add, +=, add)
+ATOMIC_OPS(sub, -=, sub)
+
+#undef ATOMIC_OPS
+#define ATOMIC_OPS(op, c_op, asm_op)					\
+	ATOMIC_OP(op, c_op, asm_op)					\
+	ATOMIC_FETCH_OP(op, c_op, asm_op)
+
+ATOMIC_OPS(and, &=, and)
+ATOMIC_OPS(andnot, &= ~, bic)
+ATOMIC_OPS(or, |=, or)
+ATOMIC_OPS(xor, ^=, xor)
+
+#define arch_atomic_andnot		arch_atomic_andnot
+#define arch_atomic_fetch_andnot	arch_atomic_fetch_andnot
+
+#undef ATOMIC_OPS
+#undef ATOMIC_FETCH_OP
+#undef ATOMIC_OP_RETURN
+#undef ATOMIC_OP
+
+#endif
diff --git a/arch/arc/include/asm/atomic.h b/arch/arc/include/asm/atomic.h
index 7a36d79b5b2f..52ee51e1ff7c 100644
--- a/arch/arc/include/asm/atomic.h
+++ b/arch/arc/include/asm/atomic.h
@@ -17,435 +17,43 @@
 #define arch_atomic_read(v)  READ_ONCE((v)->counter)
 
 #ifdef CONFIG_ARC_HAS_LLSC
-
-#define arch_atomic_set(v, i) WRITE_ONCE(((v)->counter), (i))
-
-#define ATOMIC_OP(op, c_op, asm_op)					\
-static inline void arch_atomic_##op(int i, atomic_t *v)			\
-{									\
-	unsigned int val;						\
-									\
-	__asm__ __volatile__(						\
-	"1:	llock   %[val], [%[ctr]]		\n"		\
-	"	" #asm_op " %[val], %[val], %[i]	\n"		\
-	"	scond   %[val], [%[ctr]]		\n"		\
-	"	bnz     1b				\n"		\
-	: [val]	"=&r"	(val) /* Early clobber to prevent reg reuse */	\
-	: [ctr]	"r"	(&v->counter), /* Not "m": llock only supports reg direct addr mode */	\
-	  [i]	"ir"	(i)						\
-	: "cc");							\
-}									\
-
-#define ATOMIC_OP_RETURN(op, c_op, asm_op)				\
-static inline int arch_atomic_##op##_return(int i, atomic_t *v)		\
-{									\
-	unsigned int val;						\
-									\
-	/*								\
-	 * Explicit full memory barrier needed before/after as		\
-	 * LLOCK/SCOND themselves don't provide any such semantics	\
-	 */								\
-	smp_mb();							\
-									\
-	__asm__ __volatile__(						\
-	"1:	llock   %[val], [%[ctr]]		\n"		\
-	"	" #asm_op " %[val], %[val], %[i]	\n"		\
-	"	scond   %[val], [%[ctr]]		\n"		\
-	"	bnz     1b				\n"		\
-	: [val]	"=&r"	(val)						\
-	: [ctr]	"r"	(&v->counter),					\
-	  [i]	"ir"	(i)						\
-	: "cc");							\
-									\
-	smp_mb();							\
-									\
-	return val;							\
-}
-
-#define ATOMIC_FETCH_OP(op, c_op, asm_op)				\
-static inline int arch_atomic_fetch_##op(int i, atomic_t *v)		\
-{									\
-	unsigned int val, orig;						\
-									\
-	/*								\
-	 * Explicit full memory barrier needed before/after as		\
-	 * LLOCK/SCOND themselves don't provide any such semantics	\
-	 */								\
-	smp_mb();							\
-									\
-	__asm__ __volatile__(						\
-	"1:	llock   %[orig], [%[ctr]]		\n"		\
-	"	" #asm_op " %[val], %[orig], %[i]	\n"		\
-	"	scond   %[val], [%[ctr]]		\n"		\
-	"	bnz     1b				\n"		\
-	: [val]	"=&r"	(val),						\
-	  [orig] "=&r" (orig)						\
-	: [ctr]	"r"	(&v->counter),					\
-	  [i]	"ir"	(i)						\
-	: "cc");							\
-									\
-	smp_mb();							\
-									\
-	return orig;							\
-}
-
-#else	/* !CONFIG_ARC_HAS_LLSC */
-
-#ifndef CONFIG_SMP
-
- /* violating atomic_xxx API locking protocol in UP for optimization sake */
-#define arch_atomic_set(v, i) WRITE_ONCE(((v)->counter), (i))
-
+#include <asm/atomic-llsc.h>
 #else
+#include <asm/atomic-spinlock.h>
+#endif
 
-static inline void arch_atomic_set(atomic_t *v, int i)
-{
-	/*
-	 * Independent of hardware support, all of the atomic_xxx() APIs need
-	 * to follow the same locking rules to make sure that a "hardware"
-	 * atomic insn (e.g. LD) doesn't clobber an "emulated" atomic insn
-	 * sequence
-	 *
-	 * Thus atomic_set() despite being 1 insn (and seemingly atomic)
-	 * requires the locking.
-	 */
-	unsigned long flags;
+#define arch_atomic_cmpxchg(v, o, n)					\
+({									\
+	arch_cmpxchg(&((v)->counter), (o), (n));			\
+})
 
-	atomic_ops_lock(flags);
-	WRITE_ONCE(v->counter, i);
-	atomic_ops_unlock(flags);
-}
+#ifdef arch_cmpxchg_relaxed
+#define arch_atomic_cmpxchg_relaxed(v, o, n)				\
+({									\
+	arch_cmpxchg_relaxed(&((v)->counter), (o), (n));		\
+})
+#endif
 
-#define arch_atomic_set_release(v, i)	arch_atomic_set((v), (i))
+#define arch_atomic_xchg(v, n)						\
+({									\
+	arch_xchg(&((v)->counter), (n));				\
+})
 
+#ifdef arch_xchg_relaxed
+#define arch_atomic_xchg_relaxed(v, n)					\
+({									\
+	arch_xchg_relaxed(&((v)->counter), (n));			\
+})
 #endif
 
 /*
- * Non hardware assisted Atomic-R-M-W
- * Locking would change to irq-disabling only (UP) and spinlocks (SMP)
+ * 64-bit atomics
  */
-
-#define ATOMIC_OP(op, c_op, asm_op)					\
-static inline void arch_atomic_##op(int i, atomic_t *v)			\
-{									\
-	unsigned long flags;						\
-									\
-	atomic_ops_lock(flags);						\
-	v->counter c_op i;						\
-	atomic_ops_unlock(flags);					\
-}
-
-#define ATOMIC_OP_RETURN(op, c_op, asm_op)				\
-static inline int arch_atomic_##op##_return(int i, atomic_t *v)		\
-{									\
-	unsigned long flags;						\
-	unsigned long temp;						\
-									\
-	/*								\
-	 * spin lock/unlock provides the needed smp_mb() before/after	\
-	 */								\
-	atomic_ops_lock(flags);						\
-	temp = v->counter;						\
-	temp c_op i;							\
-	v->counter = temp;						\
-	atomic_ops_unlock(flags);					\
-									\
-	return temp;							\
-}
-
-#define ATOMIC_FETCH_OP(op, c_op, asm_op)				\
-static inline int arch_atomic_fetch_##op(int i, atomic_t *v)		\
-{									\
-	unsigned long flags;						\
-	unsigned long orig;						\
-									\
-	/*								\
-	 * spin lock/unlock provides the needed smp_mb() before/after	\
-	 */								\
-	atomic_ops_lock(flags);						\
-	orig = v->counter;						\
-	v->counter c_op i;						\
-	atomic_ops_unlock(flags);					\
-									\
-	return orig;							\
-}
-
-#endif /* !CONFIG_ARC_HAS_LLSC */
-
-#define ATOMIC_OPS(op, c_op, asm_op)					\
-	ATOMIC_OP(op, c_op, asm_op)					\
-	ATOMIC_OP_RETURN(op, c_op, asm_op)				\
-	ATOMIC_FETCH_OP(op, c_op, asm_op)
-
-ATOMIC_OPS(add, +=, add)
-ATOMIC_OPS(sub, -=, sub)
-
-#undef ATOMIC_OPS
-#define ATOMIC_OPS(op, c_op, asm_op)					\
-	ATOMIC_OP(op, c_op, asm_op)					\
-	ATOMIC_FETCH_OP(op, c_op, asm_op)
-
-ATOMIC_OPS(and, &=, and)
-ATOMIC_OPS(andnot, &= ~, bic)
-ATOMIC_OPS(or, |=, or)
-ATOMIC_OPS(xor, ^=, xor)
-
-#define arch_atomic_andnot		arch_atomic_andnot
-#define arch_atomic_fetch_andnot	arch_atomic_fetch_andnot
-
-#undef ATOMIC_OPS
-#undef ATOMIC_FETCH_OP
-#undef ATOMIC_OP_RETURN
-#undef ATOMIC_OP
-
 #ifdef CONFIG_GENERIC_ATOMIC64
-
 #include <asm-generic/atomic64.h>
-
-#else	/* Kconfig ensures this is only enabled with needed h/w assist */
-
-/*
- * ARCv2 supports 64-bit exclusive load (LLOCKD) / store (SCONDD)
- *  - The address HAS to be 64-bit aligned
- *  - There are 2 semantics involved here:
- *    = exclusive implies no interim update between load/store to same addr
- *    = both words are observed/updated together: this is guaranteed even
- *      for regular 64-bit load (LDD) / store (STD). Thus atomic64_set()
- *      is NOT required to use LLOCKD+SCONDD, STD suffices
- */
-
-typedef struct {
-	s64 __aligned(8) counter;
-} atomic64_t;
-
-#define ATOMIC64_INIT(a) { (a) }
-
-static inline s64 arch_atomic64_read(const atomic64_t *v)
-{
-	s64 val;
-
-	__asm__ __volatile__(
-	"	ldd   %0, [%1]	\n"
-	: "=r"(val)
-	: "r"(&v->counter));
-
-	return val;
-}
-
-static inline void arch_atomic64_set(atomic64_t *v, s64 a)
-{
-	/*
-	 * This could have been a simple assignment in "C" but would need
-	 * explicit volatile. Otherwise gcc optimizers could elide the store
-	 * which borked atomic64 self-test
-	 * In the inline asm version, memory clobber needed for exact same
-	 * reason, to tell gcc about the store.
-	 *
-	 * This however is not needed for sibling atomic64_add() etc since both
-	 * load/store are explicitly done in inline asm. As long as API is used
-	 * for each access, gcc has no way to optimize away any load/store
-	 */
-	__asm__ __volatile__(
-	"	std   %0, [%1]	\n"
-	:
-	: "r"(a), "r"(&v->counter)
-	: "memory");
-}
-
-#define ATOMIC64_OP(op, op1, op2)					\
-static inline void arch_atomic64_##op(s64 a, atomic64_t *v)		\
-{									\
-	s64 val;							\
-									\
-	__asm__ __volatile__(						\
-	"1:				\n"				\
-	"	llockd  %0, [%1]	\n"				\
-	"	" #op1 " %L0, %L0, %L2	\n"				\
-	"	" #op2 " %H0, %H0, %H2	\n"				\
-	"	scondd   %0, [%1]	\n"				\
-	"	bnz     1b		\n"				\
-	: "=&r"(val)							\
-	: "r"(&v->counter), "ir"(a)					\
-	: "cc");							\
-}									\
-
-#define ATOMIC64_OP_RETURN(op, op1, op2)		        	\
-static inline s64 arch_atomic64_##op##_return(s64 a, atomic64_t *v)	\
-{									\
-	s64 val;							\
-									\
-	smp_mb();							\
-									\
-	__asm__ __volatile__(						\
-	"1:				\n"				\
-	"	llockd   %0, [%1]	\n"				\
-	"	" #op1 " %L0, %L0, %L2	\n"				\
-	"	" #op2 " %H0, %H0, %H2	\n"				\
-	"	scondd   %0, [%1]	\n"				\
-	"	bnz     1b		\n"				\
-	: [val] "=&r"(val)						\
-	: "r"(&v->counter), "ir"(a)					\
-	: "cc");	/* memory clobber comes from smp_mb() */	\
-									\
-	smp_mb();							\
-									\
-	return val;							\
-}
-
-#define ATOMIC64_FETCH_OP(op, op1, op2)		        		\
-static inline s64 arch_atomic64_fetch_##op(s64 a, atomic64_t *v)	\
-{									\
-	s64 val, orig;							\
-									\
-	smp_mb();							\
-									\
-	__asm__ __volatile__(						\
-	"1:				\n"				\
-	"	llockd   %0, [%2]	\n"				\
-	"	" #op1 " %L1, %L0, %L3	\n"				\
-	"	" #op2 " %H1, %H0, %H3	\n"				\
-	"	scondd   %1, [%2]	\n"				\
-	"	bnz     1b		\n"				\
-	: "=&r"(orig), "=&r"(val)					\
-	: "r"(&v->counter), "ir"(a)					\
-	: "cc");	/* memory clobber comes from smp_mb() */	\
-									\
-	smp_mb();							\
-									\
-	return orig;							\
-}
-
-#define ATOMIC64_OPS(op, op1, op2)					\
-	ATOMIC64_OP(op, op1, op2)					\
-	ATOMIC64_OP_RETURN(op, op1, op2)				\
-	ATOMIC64_FETCH_OP(op, op1, op2)
-
-ATOMIC64_OPS(add, add.f, adc)
-ATOMIC64_OPS(sub, sub.f, sbc)
-ATOMIC64_OPS(and, and, and)
-ATOMIC64_OPS(andnot, bic, bic)
-ATOMIC64_OPS(or, or, or)
-ATOMIC64_OPS(xor, xor, xor)
-
-#define arch_atomic64_andnot		arch_atomic64_andnot
-#define arch_atomic64_fetch_andnot	arch_atomic64_fetch_andnot
-
-#undef ATOMIC64_OPS
-#undef ATOMIC64_FETCH_OP
-#undef ATOMIC64_OP_RETURN
-#undef ATOMIC64_OP
-
-static inline s64
-arch_atomic64_cmpxchg(atomic64_t *ptr, s64 expected, s64 new)
-{
-	s64 prev;
-
-	smp_mb();
-
-	__asm__ __volatile__(
-	"1:	llockd  %0, [%1]	\n"
-	"	brne    %L0, %L2, 2f	\n"
-	"	brne    %H0, %H2, 2f	\n"
-	"	scondd  %3, [%1]	\n"
-	"	bnz     1b		\n"
-	"2:				\n"
-	: "=&r"(prev)
-	: "r"(ptr), "ir"(expected), "r"(new)
-	: "cc");	/* memory clobber comes from smp_mb() */
-
-	smp_mb();
-
-	return prev;
-}
-
-static inline s64 arch_atomic64_xchg(atomic64_t *ptr, s64 new)
-{
-	s64 prev;
-
-	smp_mb();
-
-	__asm__ __volatile__(
-	"1:	llockd  %0, [%1]	\n"
-	"	scondd  %2, [%1]	\n"
-	"	bnz     1b		\n"
-	"2:				\n"
-	: "=&r"(prev)
-	: "r"(ptr), "r"(new)
-	: "cc");	/* memory clobber comes from smp_mb() */
-
-	smp_mb();
-
-	return prev;
-}
-
-/**
- * arch_atomic64_dec_if_positive - decrement by 1 if old value positive
- * @v: pointer of type atomic64_t
- *
- * The function returns the old value of *v minus 1, even if
- * the atomic variable, v, was not decremented.
- */
-
-static inline s64 arch_atomic64_dec_if_positive(atomic64_t *v)
-{
-	s64 val;
-
-	smp_mb();
-
-	__asm__ __volatile__(
-	"1:	llockd  %0, [%1]	\n"
-	"	sub.f   %L0, %L0, 1	# w0 - 1, set C on borrow\n"
-	"	sub.c   %H0, %H0, 1	# if C set, w1 - 1\n"
-	"	brlt    %H0, 0, 2f	\n"
-	"	scondd  %0, [%1]	\n"
-	"	bnz     1b		\n"
-	"2:				\n"
-	: "=&r"(val)
-	: "r"(&v->counter)
-	: "cc");	/* memory clobber comes from smp_mb() */
-
-	smp_mb();
-
-	return val;
-}
-#define arch_atomic64_dec_if_positive arch_atomic64_dec_if_positive
-
-/**
- * arch_atomic64_fetch_add_unless - add unless the number is a given value
- * @v: pointer of type atomic64_t
- * @a: the amount to add to v...
- * @u: ...unless v is equal to u.
- *
- * Atomically adds @a to @v, if it was not @u.
- * Returns the old value of @v
- */
-static inline s64 arch_atomic64_fetch_add_unless(atomic64_t *v, s64 a, s64 u)
-{
-	s64 old, temp;
-
-	smp_mb();
-
-	__asm__ __volatile__(
-	"1:	llockd  %0, [%2]	\n"
-	"	brne	%L0, %L4, 2f	# continue to add since v != u \n"
-	"	breq.d	%H0, %H4, 3f	# return since v == u \n"
-	"2:				\n"
-	"	add.f   %L1, %L0, %L3	\n"
-	"	adc     %H1, %H0, %H3	\n"
-	"	scondd  %1, [%2]	\n"
-	"	bnz     1b		\n"
-	"3:				\n"
-	: "=&r"(old), "=&r" (temp)
-	: "r"(&v->counter), "r"(a), "r"(u)
-	: "cc");	/* memory clobber comes from smp_mb() */
-
-	smp_mb();
-
-	return old;
-}
-#define arch_atomic64_fetch_add_unless arch_atomic64_fetch_add_unless
-
-#endif	/* !CONFIG_GENERIC_ATOMIC64 */
+#else
+#include <asm/atomic64-arcv2.h>
+#endif
 
 #endif	/* !__ASSEMBLY__ */
 
diff --git a/arch/arc/include/asm/atomic64-arcv2.h b/arch/arc/include/asm/atomic64-arcv2.h
new file mode 100644
index 000000000000..c5a8010fdc97
--- /dev/null
+++ b/arch/arc/include/asm/atomic64-arcv2.h
@@ -0,0 +1,250 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
+
+/*
+ * ARCv2 supports 64-bit exclusive load (LLOCKD) / store (SCONDD)
+ *  - The address HAS to be 64-bit aligned
+ */
+
+#ifndef _ASM_ARC_ATOMIC64_ARCV2_H
+#define _ASM_ARC_ATOMIC64_ARCV2_H
+
+typedef struct {
+	s64 __aligned(8) counter;
+} atomic64_t;
+
+#define ATOMIC64_INIT(a) { (a) }
+
+static inline s64 arch_atomic64_read(const atomic64_t *v)
+{
+	s64 val;
+
+	__asm__ __volatile__(
+	"	ldd   %0, [%1]	\n"
+	: "=r"(val)
+	: "r"(&v->counter));
+
+	return val;
+}
+
+static inline void arch_atomic64_set(atomic64_t *v, s64 a)
+{
+	/*
+	 * This could have been a simple assignment in "C" but would need
+	 * explicit volatile. Otherwise gcc optimizers could elide the store
+	 * which borked atomic64 self-test
+	 * In the inline asm version, memory clobber needed for exact same
+	 * reason, to tell gcc about the store.
+	 *
+	 * This however is not needed for sibling atomic64_add() etc since both
+	 * load/store are explicitly done in inline asm. As long as API is used
+	 * for each access, gcc has no way to optimize away any load/store
+	 */
+	__asm__ __volatile__(
+	"	std   %0, [%1]	\n"
+	:
+	: "r"(a), "r"(&v->counter)
+	: "memory");
+}
+
+#define ATOMIC64_OP(op, op1, op2)					\
+static inline void arch_atomic64_##op(s64 a, atomic64_t *v)		\
+{									\
+	s64 val;							\
+									\
+	__asm__ __volatile__(						\
+	"1:				\n"				\
+	"	llockd  %0, [%1]	\n"				\
+	"	" #op1 " %L0, %L0, %L2	\n"				\
+	"	" #op2 " %H0, %H0, %H2	\n"				\
+	"	scondd   %0, [%1]	\n"				\
+	"	bnz     1b		\n"				\
+	: "=&r"(val)							\
+	: "r"(&v->counter), "ir"(a)					\
+	: "cc");							\
+}									\
+
+#define ATOMIC64_OP_RETURN(op, op1, op2)		        	\
+static inline s64 arch_atomic64_##op##_return_relaxed(s64 a, atomic64_t *v)	\
+{									\
+	s64 val;							\
+									\
+	__asm__ __volatile__(						\
+	"1:				\n"				\
+	"	llockd   %0, [%1]	\n"				\
+	"	" #op1 " %L0, %L0, %L2	\n"				\
+	"	" #op2 " %H0, %H0, %H2	\n"				\
+	"	scondd   %0, [%1]	\n"				\
+	"	bnz     1b		\n"				\
+	: [val] "=&r"(val)						\
+	: "r"(&v->counter), "ir"(a)					\
+	: "cc");	/* memory clobber comes from smp_mb() */	\
+									\
+	return val;							\
+}
+
+#define arch_atomic64_add_return_relaxed	arch_atomic64_add_return_relaxed
+#define arch_atomic64_sub_return_relaxed	arch_atomic64_sub_return_relaxed
+
+#define ATOMIC64_FETCH_OP(op, op1, op2)		        		\
+static inline s64 arch_atomic64_fetch_##op##_relaxed(s64 a, atomic64_t *v)	\
+{									\
+	s64 val, orig;							\
+									\
+	__asm__ __volatile__(						\
+	"1:				\n"				\
+	"	llockd   %0, [%2]	\n"				\
+	"	" #op1 " %L1, %L0, %L3	\n"				\
+	"	" #op2 " %H1, %H0, %H3	\n"				\
+	"	scondd   %1, [%2]	\n"				\
+	"	bnz     1b		\n"				\
+	: "=&r"(orig), "=&r"(val)					\
+	: "r"(&v->counter), "ir"(a)					\
+	: "cc");	/* memory clobber comes from smp_mb() */	\
+									\
+	return orig;							\
+}
+
+#define arch_atomic64_fetch_add_relaxed		arch_atomic64_fetch_add_relaxed
+#define arch_atomic64_fetch_sub_relaxed		arch_atomic64_fetch_sub_relaxed
+
+#define arch_atomic64_fetch_and_relaxed		arch_atomic64_fetch_and_relaxed
+#define arch_atomic64_fetch_andnot_relaxed	arch_atomic64_fetch_andnot_relaxed
+#define arch_atomic64_fetch_or_relaxed		arch_atomic64_fetch_or_relaxed
+#define arch_atomic64_fetch_xor_relaxed		arch_atomic64_fetch_xor_relaxed
+
+#define ATOMIC64_OPS(op, op1, op2)					\
+	ATOMIC64_OP(op, op1, op2)					\
+	ATOMIC64_OP_RETURN(op, op1, op2)				\
+	ATOMIC64_FETCH_OP(op, op1, op2)
+
+ATOMIC64_OPS(add, add.f, adc)
+ATOMIC64_OPS(sub, sub.f, sbc)
+
+#undef ATOMIC64_OPS
+#define ATOMIC64_OPS(op, op1, op2)					\
+	ATOMIC64_OP(op, op1, op2)					\
+	ATOMIC64_FETCH_OP(op, op1, op2)
+
+ATOMIC64_OPS(and, and, and)
+ATOMIC64_OPS(andnot, bic, bic)
+ATOMIC64_OPS(or, or, or)
+ATOMIC64_OPS(xor, xor, xor)
+
+#define arch_atomic64_andnot		arch_atomic64_andnot
+
+#undef ATOMIC64_OPS
+#undef ATOMIC64_FETCH_OP
+#undef ATOMIC64_OP_RETURN
+#undef ATOMIC64_OP
+
+static inline s64
+arch_atomic64_cmpxchg(atomic64_t *ptr, s64 expected, s64 new)
+{
+	s64 prev;
+
+	smp_mb();
+
+	__asm__ __volatile__(
+	"1:	llockd  %0, [%1]	\n"
+	"	brne    %L0, %L2, 2f	\n"
+	"	brne    %H0, %H2, 2f	\n"
+	"	scondd  %3, [%1]	\n"
+	"	bnz     1b		\n"
+	"2:				\n"
+	: "=&r"(prev)
+	: "r"(ptr), "ir"(expected), "r"(new)
+	: "cc");	/* memory clobber comes from smp_mb() */
+
+	smp_mb();
+
+	return prev;
+}
+
+static inline s64 arch_atomic64_xchg(atomic64_t *ptr, s64 new)
+{
+	s64 prev;
+
+	smp_mb();
+
+	__asm__ __volatile__(
+	"1:	llockd  %0, [%1]	\n"
+	"	scondd  %2, [%1]	\n"
+	"	bnz     1b		\n"
+	"2:				\n"
+	: "=&r"(prev)
+	: "r"(ptr), "r"(new)
+	: "cc");	/* memory clobber comes from smp_mb() */
+
+	smp_mb();
+
+	return prev;
+}
+
+/**
+ * arch_atomic64_dec_if_positive - decrement by 1 if old value positive
+ * @v: pointer of type atomic64_t
+ *
+ * The function returns the old value of *v minus 1, even if
+ * the atomic variable, v, was not decremented.
+ */
+
+static inline s64 arch_atomic64_dec_if_positive(atomic64_t *v)
+{
+	s64 val;
+
+	smp_mb();
+
+	__asm__ __volatile__(
+	"1:	llockd  %0, [%1]	\n"
+	"	sub.f   %L0, %L0, 1	# w0 - 1, set C on borrow\n"
+	"	sub.c   %H0, %H0, 1	# if C set, w1 - 1\n"
+	"	brlt    %H0, 0, 2f	\n"
+	"	scondd  %0, [%1]	\n"
+	"	bnz     1b		\n"
+	"2:				\n"
+	: "=&r"(val)
+	: "r"(&v->counter)
+	: "cc");	/* memory clobber comes from smp_mb() */
+
+	smp_mb();
+
+	return val;
+}
+#define arch_atomic64_dec_if_positive arch_atomic64_dec_if_positive
+
+/**
+ * arch_atomic64_fetch_add_unless - add unless the number is a given value
+ * @v: pointer of type atomic64_t
+ * @a: the amount to add to v...
+ * @u: ...unless v is equal to u.
+ *
+ * Atomically adds @a to @v, if it was not @u.
+ * Returns the old value of @v
+ */
+static inline s64 arch_atomic64_fetch_add_unless(atomic64_t *v, s64 a, s64 u)
+{
+	s64 old, temp;
+
+	smp_mb();
+
+	__asm__ __volatile__(
+	"1:	llockd  %0, [%2]	\n"
+	"	brne	%L0, %L4, 2f	# continue to add since v != u \n"
+	"	breq.d	%H0, %H4, 3f	# return since v == u \n"
+	"2:				\n"
+	"	add.f   %L1, %L0, %L3	\n"
+	"	adc     %H1, %H0, %H3	\n"
+	"	scondd  %1, [%2]	\n"
+	"	bnz     1b		\n"
+	"3:				\n"
+	: "=&r"(old), "=&r" (temp)
+	: "r"(&v->counter), "r"(a), "r"(u)
+	: "cc");	/* memory clobber comes from smp_mb() */
+
+	smp_mb();
+
+	return old;
+}
+#define arch_atomic64_fetch_add_unless arch_atomic64_fetch_add_unless
+
+#endif
diff --git a/arch/arc/include/asm/bitops.h b/arch/arc/include/asm/bitops.h
index fb98440c0bd4..a7daaf64ae34 100644
--- a/arch/arc/include/asm/bitops.h
+++ b/arch/arc/include/asm/bitops.h
@@ -14,188 +14,6 @@
 
 #include <linux/types.h>
 #include <linux/compiler.h>
-#include <asm/barrier.h>
-#ifndef CONFIG_ARC_HAS_LLSC
-#include <asm/smp.h>
-#endif
-
-#ifdef CONFIG_ARC_HAS_LLSC
-
-/*
- * Hardware assisted Atomic-R-M-W
- */
-
-#define BIT_OP(op, c_op, asm_op)					\
-static inline void op##_bit(unsigned long nr, volatile unsigned long *m)\
-{									\
-	unsigned int temp;						\
-									\
-	m += nr >> 5;							\
-									\
-	nr &= 0x1f;							\
-									\
-	__asm__ __volatile__(						\
-	"1:	llock       %0, [%1]		\n"			\
-	"	" #asm_op " %0, %0, %2	\n"				\
-	"	scond       %0, [%1]		\n"			\
-	"	bnz         1b			\n"			\
-	: "=&r"(temp)	/* Early clobber, to prevent reg reuse */	\
-	: "r"(m),	/* Not "m": llock only supports reg direct addr mode */	\
-	  "ir"(nr)							\
-	: "cc");							\
-}
-
-/*
- * Semantically:
- *    Test the bit
- *    if clear
- *        set it and return 0 (old value)
- *    else
- *        return 1 (old value).
- *
- * Since ARC lacks a equivalent h/w primitive, the bit is set unconditionally
- * and the old value of bit is returned
- */
-#define TEST_N_BIT_OP(op, c_op, asm_op)					\
-static inline int test_and_##op##_bit(unsigned long nr, volatile unsigned long *m)\
-{									\
-	unsigned long old, temp;					\
-									\
-	m += nr >> 5;							\
-									\
-	nr &= 0x1f;							\
-									\
-	/*								\
-	 * Explicit full memory barrier needed before/after as		\
-	 * LLOCK/SCOND themselves don't provide any such smenatic	\
-	 */								\
-	smp_mb();							\
-									\
-	__asm__ __volatile__(						\
-	"1:	llock       %0, [%2]	\n"				\
-	"	" #asm_op " %1, %0, %3	\n"				\
-	"	scond       %1, [%2]	\n"				\
-	"	bnz         1b		\n"				\
-	: "=&r"(old), "=&r"(temp)					\
-	: "r"(m), "ir"(nr)						\
-	: "cc");							\
-									\
-	smp_mb();							\
-									\
-	return (old & (1 << nr)) != 0;					\
-}
-
-#else /* !CONFIG_ARC_HAS_LLSC */
-
-/*
- * Non hardware assisted Atomic-R-M-W
- * Locking would change to irq-disabling only (UP) and spinlocks (SMP)
- *
- * There's "significant" micro-optimization in writing our own variants of
- * bitops (over generic variants)
- *
- * (1) The generic APIs have "signed" @nr while we have it "unsigned"
- *     This avoids extra code to be generated for pointer arithmatic, since
- *     is "not sure" that index is NOT -ve
- * (2) Utilize the fact that ARCompact bit fidding insn (BSET/BCLR/ASL) etc
- *     only consider bottom 5 bits of @nr, so NO need to mask them off.
- *     (GCC Quirk: however for constant @nr we still need to do the masking
- *             at compile time)
- */
-
-#define BIT_OP(op, c_op, asm_op)					\
-static inline void op##_bit(unsigned long nr, volatile unsigned long *m)\
-{									\
-	unsigned long temp, flags;					\
-	m += nr >> 5;							\
-									\
-	/*								\
-	 * spin lock/unlock provide the needed smp_mb() before/after	\
-	 */								\
-	bitops_lock(flags);						\
-									\
-	temp = *m;							\
-	*m = temp c_op (1UL << (nr & 0x1f));					\
-									\
-	bitops_unlock(flags);						\
-}
-
-#define TEST_N_BIT_OP(op, c_op, asm_op)					\
-static inline int test_and_##op##_bit(unsigned long nr, volatile unsigned long *m)\
-{									\
-	unsigned long old, flags;					\
-	m += nr >> 5;							\
-									\
-	bitops_lock(flags);						\
-									\
-	old = *m;							\
-	*m = old c_op (1UL << (nr & 0x1f));				\
-									\
-	bitops_unlock(flags);						\
-									\
-	return (old & (1UL << (nr & 0x1f))) != 0;			\
-}
-
-#endif
-
-/***************************************
- * Non atomic variants
- **************************************/
-
-#define __BIT_OP(op, c_op, asm_op)					\
-static inline void __##op##_bit(unsigned long nr, volatile unsigned long *m)	\
-{									\
-	unsigned long temp;						\
-	m += nr >> 5;							\
-									\
-	temp = *m;							\
-	*m = temp c_op (1UL << (nr & 0x1f));				\
-}
-
-#define __TEST_N_BIT_OP(op, c_op, asm_op)				\
-static inline int __test_and_##op##_bit(unsigned long nr, volatile unsigned long *m)\
-{									\
-	unsigned long old;						\
-	m += nr >> 5;							\
-									\
-	old = *m;							\
-	*m = old c_op (1UL << (nr & 0x1f));				\
-									\
-	return (old & (1UL << (nr & 0x1f))) != 0;			\
-}
-
-#define BIT_OPS(op, c_op, asm_op)					\
-									\
-	/* set_bit(), clear_bit(), change_bit() */			\
-	BIT_OP(op, c_op, asm_op)					\
-									\
-	/* test_and_set_bit(), test_and_clear_bit(), test_and_change_bit() */\
-	TEST_N_BIT_OP(op, c_op, asm_op)					\
-									\
-	/* __set_bit(), __clear_bit(), __change_bit() */		\
-	__BIT_OP(op, c_op, asm_op)					\
-									\
-	/* __test_and_set_bit(), __test_and_clear_bit(), __test_and_change_bit() */\
-	__TEST_N_BIT_OP(op, c_op, asm_op)
-
-BIT_OPS(set, |, bset)
-BIT_OPS(clear, & ~, bclr)
-BIT_OPS(change, ^, bxor)
-
-/*
- * This routine doesn't need to be atomic.
- */
-static inline int
-test_bit(unsigned int nr, const volatile unsigned long *addr)
-{
-	unsigned long mask;
-
-	addr += nr >> 5;
-
-	mask = 1UL << (nr & 0x1f);
-
-	return ((mask & *addr) != 0);
-}
 
 #ifdef CONFIG_ISA_ARCOMPACT
 
@@ -296,7 +114,7 @@ static inline __attribute__ ((const)) unsigned long __ffs(unsigned long word)
  * @result: [1-32]
  * fls(1) = 1, fls(0x80000000) = 32, fls(0) = 0
  */
-static inline __attribute__ ((const)) int fls(unsigned long x)
+static inline __attribute__ ((const)) int fls(unsigned int x)
 {
 	int n;
 
@@ -323,7 +141,7 @@ static inline __attribute__ ((const)) int __fls(unsigned long x)
  * ffs = Find First Set in word (LSB to MSB)
  * @result: [1-32], 0 if all 0's
  */
-static inline __attribute__ ((const)) int ffs(unsigned long x)
+static inline __attribute__ ((const)) int ffs(unsigned int x)
 {
 	int n;
 
@@ -368,6 +186,8 @@ static inline __attribute__ ((const)) unsigned long __ffs(unsigned long x)
 #include <asm-generic/bitops/fls64.h>
 #include <asm-generic/bitops/sched.h>
 #include <asm-generic/bitops/lock.h>
+#include <asm-generic/bitops/atomic.h>
+#include <asm-generic/bitops/non-atomic.h>
 
 #include <asm-generic/bitops/find.h>
 #include <asm-generic/bitops/le.h>
diff --git a/arch/arc/include/asm/cache.h b/arch/arc/include/asm/cache.h
index d8ece4292388..f0f1fc5d62b6 100644
--- a/arch/arc/include/asm/cache.h
+++ b/arch/arc/include/asm/cache.h
@@ -62,10 +62,6 @@
 #define ARCH_SLAB_MINALIGN	8
 #endif
 
-extern void arc_cache_init(void);
-extern char *arc_cache_mumbojumbo(int cpu_id, char *buf, int len);
-extern void read_decode_cache_bcr(void);
-
 extern int ioc_enable;
 extern unsigned long perip_base, perip_end;
 
diff --git a/arch/arc/include/asm/cmpxchg.h b/arch/arc/include/asm/cmpxchg.h
index d42917e803e1..c5b544a5fe81 100644
--- a/arch/arc/include/asm/cmpxchg.h
+++ b/arch/arc/include/asm/cmpxchg.h
@@ -6,6 +6,7 @@
 #ifndef __ASM_ARC_CMPXCHG_H
 #define __ASM_ARC_CMPXCHG_H
 
+#include <linux/build_bug.h>
 #include <linux/types.h>
 
 #include <asm/barrier.h>
@@ -13,146 +14,130 @@
 
 #ifdef CONFIG_ARC_HAS_LLSC
 
-static inline unsigned long
-__cmpxchg(volatile void *ptr, unsigned long expected, unsigned long new)
-{
-	unsigned long prev;
-
-	/*
-	 * Explicit full memory barrier needed before/after as
-	 * LLOCK/SCOND themselves don't provide any such semantics
-	 */
-	smp_mb();
-
-	__asm__ __volatile__(
-	"1:	llock   %0, [%1]	\n"
-	"	brne    %0, %2, 2f	\n"
-	"	scond   %3, [%1]	\n"
-	"	bnz     1b		\n"
-	"2:				\n"
-	: "=&r"(prev)	/* Early clobber, to prevent reg reuse */
-	: "r"(ptr),	/* Not "m": llock only supports reg direct addr mode */
-	  "ir"(expected),
-	  "r"(new)	/* can't be "ir". scond can't take LIMM for "b" */
-	: "cc", "memory"); /* so that gcc knows memory is being written here */
-
-	smp_mb();
-
-	return prev;
-}
-
-#else /* !CONFIG_ARC_HAS_LLSC */
-
-static inline unsigned long
-__cmpxchg(volatile void *ptr, unsigned long expected, unsigned long new)
-{
-	unsigned long flags;
-	int prev;
-	volatile unsigned long *p = ptr;
-
-	/*
-	 * spin lock/unlock provide the needed smp_mb() before/after
-	 */
-	atomic_ops_lock(flags);
-	prev = *p;
-	if (prev == expected)
-		*p = new;
-	atomic_ops_unlock(flags);
-	return prev;
-}
-
-#endif
+/*
+ * if (*ptr == @old)
+ *      *ptr = @new
+ */
+#define __cmpxchg(ptr, old, new)					\
+({									\
+	__typeof__(*(ptr)) _prev;					\
+									\
+	__asm__ __volatile__(						\
+	"1:	llock  %0, [%1]	\n"					\
+	"	brne   %0, %2, 2f	\n"				\
+	"	scond  %3, [%1]	\n"					\
+	"	bnz     1b		\n"				\
+	"2:				\n"				\
+	: "=&r"(_prev)	/* Early clobber prevent reg reuse */		\
+	: "r"(ptr),	/* Not "m": llock only supports reg */		\
+	  "ir"(old),							\
+	  "r"(new)	/* Not "ir": scond can't take LIMM */		\
+	: "cc",								\
+	  "memory");	/* gcc knows memory is clobbered */		\
+									\
+	_prev;								\
+})
 
-#define arch_cmpxchg(ptr, o, n) ({			\
-	(typeof(*(ptr)))__cmpxchg((ptr),		\
-				  (unsigned long)(o),	\
-				  (unsigned long)(n));	\
+#define arch_cmpxchg_relaxed(ptr, old, new)				\
+({									\
+	__typeof__(ptr) _p_ = (ptr);					\
+	__typeof__(*(ptr)) _o_ = (old);					\
+	__typeof__(*(ptr)) _n_ = (new);					\
+	__typeof__(*(ptr)) _prev_;					\
+									\
+	switch(sizeof((_p_))) {						\
+	case 4:								\
+		_prev_ = __cmpxchg(_p_, _o_, _n_);			\
+		break;							\
+	default:							\
+		BUILD_BUG();						\
+	}								\
+	_prev_;								\
 })
 
-/*
- * atomic_cmpxchg is same as cmpxchg
- *   LLSC: only different in data-type, semantics are exactly same
- *  !LLSC: cmpxchg() has to use an external lock atomic_ops_lock to guarantee
- *         semantics, and this lock also happens to be used by atomic_*()
- */
-#define arch_atomic_cmpxchg(v, o, n) ((int)arch_cmpxchg(&((v)->counter), (o), (n)))
+#else
 
+#define arch_cmpxchg(ptr, old, new)				        \
+({									\
+	volatile __typeof__(ptr) _p_ = (ptr);				\
+	__typeof__(*(ptr)) _o_ = (old);					\
+	__typeof__(*(ptr)) _n_ = (new);					\
+	__typeof__(*(ptr)) _prev_;					\
+	unsigned long __flags;						\
+									\
+	BUILD_BUG_ON(sizeof(_p_) != 4);					\
+									\
+	/*								\
+	 * spin lock/unlock provide the needed smp_mb() before/after	\
+	 */								\
+	atomic_ops_lock(__flags);					\
+	_prev_ = *_p_;							\
+	if (_prev_ == _o_)						\
+		*_p_ = _n_;						\
+	atomic_ops_unlock(__flags);					\
+	_prev_;								\
+})
+
+#endif
 
 /*
- * xchg (reg with memory) based on "Native atomic" EX insn
+ * xchg
  */
-static inline unsigned long __xchg(unsigned long val, volatile void *ptr,
-				   int size)
-{
-	extern unsigned long __xchg_bad_pointer(void);
-
-	switch (size) {
-	case 4:
-		smp_mb();
-
-		__asm__ __volatile__(
-		"	ex  %0, [%1]	\n"
-		: "+r"(val)
-		: "r"(ptr)
-		: "memory");
+#ifdef CONFIG_ARC_HAS_LLSC
 
-		smp_mb();
+#define __xchg(ptr, val)						\
+({									\
+	__asm__ __volatile__(						\
+	"	ex  %0, [%1]	\n"	/* set new value */	        \
+	: "+r"(val)							\
+	: "r"(ptr)							\
+	: "memory");							\
+	_val_;		/* get old value */				\
+})
 
-		return val;
-	}
-	return __xchg_bad_pointer();
-}
+#define arch_xchg_relaxed(ptr, val)					\
+({									\
+	__typeof__(ptr) _p_ = (ptr);					\
+	__typeof__(*(ptr)) _val_ = (val);				\
+									\
+	switch(sizeof(*(_p_))) {					\
+	case 4:								\
+		_val_ = __xchg(_p_, _val_);				\
+		break;							\
+	default:							\
+		BUILD_BUG();						\
+	}								\
+	_val_;								\
+})
 
-#define _xchg(ptr, with) ((typeof(*(ptr)))__xchg((unsigned long)(with), (ptr), \
-						 sizeof(*(ptr))))
+#else  /* !CONFIG_ARC_HAS_LLSC */
 
 /*
- * xchg() maps directly to ARC EX instruction which guarantees atomicity.
- * However in !LLSC config, it also needs to be use @atomic_ops_lock spinlock
- * due to a subtle reason:
- *  - For !LLSC, cmpxchg() needs to use that lock (see above) and there is lot
- *    of  kernel code which calls xchg()/cmpxchg() on same data (see llist.h)
- *    Hence xchg() needs to follow same locking rules.
- *
- * Technically the lock is also needed for UP (boils down to irq save/restore)
- * but we can cheat a bit since cmpxchg() atomic_ops_lock() would cause irqs to
- * be disabled thus can't possibly be interrupted/preempted/clobbered by xchg()
- * Other way around, xchg is one instruction anyways, so can't be interrupted
- * as such
+ * EX instructions is baseline and present in !LLSC too. But in this
+ * regime it still needs use @atomic_ops_lock spinlock to allow interop
+ * with cmpxchg() which uses spinlock in !LLSC
+ * (llist.h use xchg and cmpxchg on sama data)
  */
 
-#if !defined(CONFIG_ARC_HAS_LLSC) && defined(CONFIG_SMP)
-
-#define arch_xchg(ptr, with)		\
-({					\
-	unsigned long flags;		\
-	typeof(*(ptr)) old_val;		\
-					\
-	atomic_ops_lock(flags);		\
-	old_val = _xchg(ptr, with);	\
-	atomic_ops_unlock(flags);	\
-	old_val;			\
+#define arch_xchg(ptr, val)					        \
+({									\
+	__typeof__(ptr) _p_ = (ptr);					\
+	__typeof__(*(ptr)) _val_ = (val);				\
+									\
+	unsigned long __flags;						\
+									\
+	atomic_ops_lock(__flags);					\
+									\
+	__asm__ __volatile__(						\
+	"	ex  %0, [%1]	\n"					\
+	: "+r"(_val_)							\
+	: "r"(_p_)							\
+	: "memory");							\
+									\
+	atomic_ops_unlock(__flags);					\
+	_val_;								\
 })
 
-#else
-
-#define arch_xchg(ptr, with)  _xchg(ptr, with)
-
 #endif
 
-/*
- * "atomic" variant of xchg()
- * REQ: It needs to follow the same serialization rules as other atomic_xxx()
- * Since xchg() doesn't always do that, it would seem that following definition
- * is incorrect. But here's the rationale:
- *   SMP : Even xchg() takes the atomic_ops_lock, so OK.
- *   LLSC: atomic_ops_lock are not relevant at all (even if SMP, since LLSC
- *         is natively "SMP safe", no serialization required).
- *   UP  : other atomics disable IRQ, so no way a difft ctxt atomic_xchg()
- *         could clobber them. atomic_xchg() itself would be 1 insn, so it
- *         can't be clobbered by others. Thus no serialization required when
- *         atomic_xchg is involved.
- */
-#define arch_atomic_xchg(v, new) (arch_xchg(&((v)->counter), new))
-
 #endif
diff --git a/arch/arc/include/asm/entry-compact.h b/arch/arc/include/asm/entry-compact.h
index 6dbf5cecc8cc..5aab4f93ab8a 100644
--- a/arch/arc/include/asm/entry-compact.h
+++ b/arch/arc/include/asm/entry-compact.h
@@ -126,19 +126,11 @@
  * to be saved again on kernel mode stack, as part of pt_regs.
  *-------------------------------------------------------------*/
 .macro PROLOG_FREEUP_REG	reg, mem
-#ifndef ARC_USE_SCRATCH_REG
-	sr  \reg, [ARC_REG_SCRATCH_DATA0]
-#else
 	st  \reg, [\mem]
-#endif
 .endm
 
 .macro PROLOG_RESTORE_REG	reg, mem
-#ifndef ARC_USE_SCRATCH_REG
-	lr  \reg, [ARC_REG_SCRATCH_DATA0]
-#else
 	ld  \reg, [\mem]
-#endif
 .endm
 
 /*--------------------------------------------------------------
diff --git a/arch/arc/include/asm/hugepage.h b/arch/arc/include/asm/hugepage.h
index 4eef17c5c1da..11b0ff26b97b 100644
--- a/arch/arc/include/asm/hugepage.h
+++ b/arch/arc/include/asm/hugepage.h
@@ -58,14 +58,6 @@ static inline void set_pmd_at(struct mm_struct *mm, unsigned long addr,
 extern void update_mmu_cache_pmd(struct vm_area_struct *vma, unsigned long addr,
 				 pmd_t *pmd);
 
-/* Generic variants assume pgtable_t is struct page *, hence need for these */
-#define __HAVE_ARCH_PGTABLE_DEPOSIT
-extern void pgtable_trans_huge_deposit(struct mm_struct *mm, pmd_t *pmdp,
-				       pgtable_t pgtable);
-
-#define __HAVE_ARCH_PGTABLE_WITHDRAW
-extern pgtable_t pgtable_trans_huge_withdraw(struct mm_struct *mm, pmd_t *pmdp);
-
 #define __HAVE_ARCH_FLUSH_PMD_TLB_RANGE
 extern void flush_pmd_tlb_range(struct vm_area_struct *vma, unsigned long start,
 				unsigned long end);
diff --git a/arch/arc/include/asm/mmu-arcv2.h b/arch/arc/include/asm/mmu-arcv2.h
new file mode 100644
index 000000000000..ed9036d4ede3
--- /dev/null
+++ b/arch/arc/include/asm/mmu-arcv2.h
@@ -0,0 +1,103 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
+/*
+ * Copyright (C) 2004, 2007-2010, 2011-2012, 2019-20 Synopsys, Inc. (www.synopsys.com)
+ *
+ * MMUv3 (arc700) / MMUv4 (archs) are software page walked and software managed.
+ * This file contains the TLB access registers and commands
+ */
+
+#ifndef _ASM_ARC_MMU_ARCV2_H
+#define _ASM_ARC_MMU_ARCV2_H
+
+/*
+ * TLB Management regs
+ */
+#define ARC_REG_MMU_BCR		0x06f
+
+#ifdef CONFIG_ARC_MMU_V3
+#define ARC_REG_TLBPD0		0x405
+#define ARC_REG_TLBPD1		0x406
+#define ARC_REG_TLBPD1HI	0	/* Dummy: allows common code */
+#define ARC_REG_TLBINDEX	0x407
+#define ARC_REG_TLBCOMMAND	0x408
+#define ARC_REG_PID		0x409
+#define ARC_REG_SCRATCH_DATA0	0x418
+#else
+#define ARC_REG_TLBPD0		0x460
+#define ARC_REG_TLBPD1		0x461
+#define ARC_REG_TLBPD1HI	0x463
+#define ARC_REG_TLBINDEX	0x464
+#define ARC_REG_TLBCOMMAND	0x465
+#define ARC_REG_PID		0x468
+#define ARC_REG_SCRATCH_DATA0	0x46c
+#endif
+
+/* Bits in MMU PID reg */
+#define __TLB_ENABLE		(1 << 31)
+#define __PROG_ENABLE		(1 << 30)
+#define MMU_ENABLE		(__TLB_ENABLE | __PROG_ENABLE)
+
+/* Bits in TLB Index reg */
+#define TLB_LKUP_ERR		0x80000000
+
+#ifdef CONFIG_ARC_MMU_V3
+#define TLB_DUP_ERR		(TLB_LKUP_ERR | 0x00000001)
+#else
+#define TLB_DUP_ERR		(TLB_LKUP_ERR | 0x40000000)
+#endif
+
+/*
+ * TLB Commands
+ */
+#define TLBWrite    		0x1
+#define TLBRead     		0x2
+#define TLBGetIndex 		0x3
+#define TLBProbe    		0x4
+#define TLBWriteNI		0x5  /* write JTLB without inv uTLBs */
+#define TLBIVUTLB		0x6  /* explicitly inv uTLBs */
+
+#ifdef CONFIG_ARC_MMU_V4
+#define TLBInsertEntry		0x7
+#define TLBDeleteEntry		0x8
+#endif
+
+/* Masks for actual TLB "PD"s */
+#define PTE_BITS_IN_PD0		(_PAGE_GLOBAL | _PAGE_PRESENT | _PAGE_HW_SZ)
+#define PTE_BITS_RWX		(_PAGE_EXECUTE | _PAGE_WRITE | _PAGE_READ)
+
+#define PTE_BITS_NON_RWX_IN_PD1	(PAGE_MASK_PHYS | _PAGE_CACHEABLE)
+
+#ifndef __ASSEMBLY__
+
+struct mm_struct;
+extern int pae40_exist_but_not_enab(void);
+
+static inline int is_pae40_enabled(void)
+{
+	return IS_ENABLED(CONFIG_ARC_HAS_PAE40);
+}
+
+static inline void mmu_setup_asid(struct mm_struct *mm, unsigned long asid)
+{
+	write_aux_reg(ARC_REG_PID, asid | MMU_ENABLE);
+}
+
+static inline void mmu_setup_pgd(struct mm_struct *mm, void *pgd)
+{
+	/* PGD cached in MMU reg to avoid 3 mem lookups: task->mm->pgd */
+#ifdef CONFIG_ISA_ARCV2
+	write_aux_reg(ARC_REG_SCRATCH_DATA0, (unsigned int)pgd);
+#endif
+}
+
+#else
+
+.macro ARC_MMU_REENABLE reg
+	lr \reg, [ARC_REG_PID]
+	or \reg, \reg, MMU_ENABLE
+	sr \reg, [ARC_REG_PID]
+.endm
+
+#endif /* !__ASSEMBLY__ */
+
+#endif
diff --git a/arch/arc/include/asm/mmu.h b/arch/arc/include/asm/mmu.h
index 26b731d32a2b..ca427c30f70e 100644
--- a/arch/arc/include/asm/mmu.h
+++ b/arch/arc/include/asm/mmu.h
@@ -7,98 +7,15 @@
 #define _ASM_ARC_MMU_H
 
 #ifndef __ASSEMBLY__
-#include <linux/threads.h>	/* NR_CPUS */
-#endif
-
-#if defined(CONFIG_ARC_MMU_V1)
-#define CONFIG_ARC_MMU_VER 1
-#elif defined(CONFIG_ARC_MMU_V2)
-#define CONFIG_ARC_MMU_VER 2
-#elif defined(CONFIG_ARC_MMU_V3)
-#define CONFIG_ARC_MMU_VER 3
-#elif defined(CONFIG_ARC_MMU_V4)
-#define CONFIG_ARC_MMU_VER 4
-#endif
-
-/* MMU Management regs */
-#define ARC_REG_MMU_BCR		0x06f
-#if (CONFIG_ARC_MMU_VER < 4)
-#define ARC_REG_TLBPD0		0x405
-#define ARC_REG_TLBPD1		0x406
-#define ARC_REG_TLBPD1HI	0	/* Dummy: allows code sharing with ARC700 */
-#define ARC_REG_TLBINDEX	0x407
-#define ARC_REG_TLBCOMMAND	0x408
-#define ARC_REG_PID		0x409
-#define ARC_REG_SCRATCH_DATA0	0x418
-#else
-#define ARC_REG_TLBPD0		0x460
-#define ARC_REG_TLBPD1		0x461
-#define ARC_REG_TLBPD1HI	0x463
-#define ARC_REG_TLBINDEX	0x464
-#define ARC_REG_TLBCOMMAND	0x465
-#define ARC_REG_PID		0x468
-#define ARC_REG_SCRATCH_DATA0	0x46c
-#endif
-
-#if defined(CONFIG_ISA_ARCV2) || !defined(CONFIG_SMP)
-#define	ARC_USE_SCRATCH_REG
-#endif
-
-/* Bits in MMU PID register */
-#define __TLB_ENABLE		(1 << 31)
-#define __PROG_ENABLE		(1 << 30)
-#define MMU_ENABLE		(__TLB_ENABLE | __PROG_ENABLE)
-
-/* Error code if probe fails */
-#define TLB_LKUP_ERR		0x80000000
-
-#if (CONFIG_ARC_MMU_VER < 4)
-#define TLB_DUP_ERR	(TLB_LKUP_ERR | 0x00000001)
-#else
-#define TLB_DUP_ERR	(TLB_LKUP_ERR | 0x40000000)
-#endif
-
-/* TLB Commands */
-#define TLBWrite    0x1
-#define TLBRead     0x2
-#define TLBGetIndex 0x3
-#define TLBProbe    0x4
-
-#if (CONFIG_ARC_MMU_VER >= 2)
-#define TLBWriteNI  0x5		/* write JTLB without inv uTLBs */
-#define TLBIVUTLB   0x6		/* explicitly inv uTLBs */
-#else
-#define TLBWriteNI  TLBWrite	/* Not present in hardware, fallback */
-#endif
-
-#if (CONFIG_ARC_MMU_VER >= 4)
-#define TLBInsertEntry	0x7
-#define TLBDeleteEntry	0x8
-#endif
 
-#ifndef __ASSEMBLY__
+#include <linux/threads.h>	/* NR_CPUS */
 
 typedef struct {
 	unsigned long asid[NR_CPUS];	/* 8 bit MMU PID + Generation cycle */
 } mm_context_t;
 
-#ifdef CONFIG_ARC_DBG_TLB_PARANOIA
-void tlb_paranoid_check(unsigned int mm_asid, unsigned long address);
-#else
-#define tlb_paranoid_check(a, b)
 #endif
 
-void arc_mmu_init(void);
-extern char *arc_mmu_mumbojumbo(int cpu_id, char *buf, int len);
-void read_decode_mmu_bcr(void);
-
-static inline int is_pae40_enabled(void)
-{
-	return IS_ENABLED(CONFIG_ARC_HAS_PAE40);
-}
-
-extern int pae40_exist_but_not_enab(void);
-
-#endif	/* !__ASSEMBLY__ */
+#include <asm/mmu-arcv2.h>
 
 #endif
diff --git a/arch/arc/include/asm/mmu_context.h b/arch/arc/include/asm/mmu_context.h
index df164066e172..dda471f5f05b 100644
--- a/arch/arc/include/asm/mmu_context.h
+++ b/arch/arc/include/asm/mmu_context.h
@@ -15,22 +15,23 @@
 #ifndef _ASM_ARC_MMU_CONTEXT_H
 #define _ASM_ARC_MMU_CONTEXT_H
 
-#include <asm/arcregs.h>
-#include <asm/tlb.h>
 #include <linux/sched/mm.h>
 
+#include <asm/tlb.h>
 #include <asm-generic/mm_hooks.h>
 
-/*		ARC700 ASID Management
+/*		ARC ASID Management
+ *
+ * MMU tags TLBs with an 8-bit ASID, avoiding need to flush the TLB on
+ * context-switch.
  *
- * ARC MMU provides 8-bit ASID (0..255) to TAG TLB entries, allowing entries
- * with same vaddr (different tasks) to co-exit. This provides for
- * "Fast Context Switch" i.e. no TLB flush on ctxt-switch
+ * ASID is managed per cpu, so task threads across CPUs can have different
+ * ASID. Global ASID management is needed if hardware supports TLB shootdown
+ * and/or shared TLB across cores, which ARC doesn't.
  *
- * Linux assigns each task a unique ASID. A simple round-robin allocation
- * of H/w ASID is done using software tracker @asid_cpu.
- * When it reaches max 255, the allocation cycle starts afresh by flushing
- * the entire TLB and wrapping ASID back to zero.
+ * Each task is assigned unique ASID, with a simple round-robin allocator
+ * tracked in @asid_cpu. When 8-bit value rolls over,a new cycle is started
+ * over from 0, and TLB is flushed
  *
  * A new allocation cycle, post rollover, could potentially reassign an ASID
  * to a different task. Thus the rule is to refresh the ASID in a new cycle.
@@ -93,7 +94,7 @@ static inline void get_new_mmu_context(struct mm_struct *mm)
 	asid_mm(mm, cpu) = asid_cpu(cpu);
 
 set_hw:
-	write_aux_reg(ARC_REG_PID, hw_pid(mm, cpu) | MMU_ENABLE);
+	mmu_setup_asid(mm, hw_pid(mm, cpu));
 
 	local_irq_restore(flags);
 }
@@ -146,10 +147,7 @@ static inline void switch_mm(struct mm_struct *prev, struct mm_struct *next,
 	 */
 	cpumask_set_cpu(cpu, mm_cpumask(next));
 
-#ifdef ARC_USE_SCRATCH_REG
-	/* PGD cached in MMU reg to avoid 3 mem lookups: task->mm->pgd */
-	write_aux_reg(ARC_REG_SCRATCH_DATA0, next->pgd);
-#endif
+	mmu_setup_pgd(next, next->pgd);
 
 	get_new_mmu_context(next);
 }
diff --git a/arch/arc/include/asm/page.h b/arch/arc/include/asm/page.h
index 4a9d33372fe2..9a62e1d87967 100644
--- a/arch/arc/include/asm/page.h
+++ b/arch/arc/include/asm/page.h
@@ -34,57 +34,55 @@ void copy_user_highpage(struct page *to, struct page *from,
 			unsigned long u_vaddr, struct vm_area_struct *vma);
 void clear_user_page(void *to, unsigned long u_vaddr, struct page *page);
 
-#undef STRICT_MM_TYPECHECKS
-
-#ifdef STRICT_MM_TYPECHECKS
-/*
- * These are used to make use of C type-checking..
- */
-typedef struct {
-#ifdef CONFIG_ARC_HAS_PAE40
-	unsigned long long pte;
-#else
-	unsigned long pte;
-#endif
-} pte_t;
 typedef struct {
 	unsigned long pgd;
 } pgd_t;
+
+#define pgd_val(x)	((x).pgd)
+#define __pgd(x)	((pgd_t) { (x) })
+
+#if CONFIG_PGTABLE_LEVELS > 3
+
 typedef struct {
-	unsigned long pgprot;
-} pgprot_t;
+	unsigned long pud;
+} pud_t;
 
-#define pte_val(x)      ((x).pte)
-#define pgd_val(x)      ((x).pgd)
-#define pgprot_val(x)   ((x).pgprot)
+#define pud_val(x)      	((x).pud)
+#define __pud(x)        	((pud_t) { (x) })
 
-#define __pte(x)        ((pte_t) { (x) })
-#define __pgd(x)        ((pgd_t) { (x) })
-#define __pgprot(x)     ((pgprot_t) { (x) })
+#endif
+
+#if CONFIG_PGTABLE_LEVELS > 2
 
-#define pte_pgprot(x) __pgprot(pte_val(x))
+typedef struct {
+	unsigned long pmd;
+} pmd_t;
 
-#else /* !STRICT_MM_TYPECHECKS */
+#define pmd_val(x)	((x).pmd)
+#define __pmd(x)	((pmd_t) { (x) })
 
+#endif
+
+typedef struct {
 #ifdef CONFIG_ARC_HAS_PAE40
-typedef unsigned long long pte_t;
+	unsigned long long pte;
 #else
-typedef unsigned long pte_t;
+	unsigned long pte;
 #endif
-typedef unsigned long pgd_t;
-typedef unsigned long pgprot_t;
+} pte_t;
 
-#define pte_val(x)	(x)
-#define pgd_val(x)	(x)
-#define pgprot_val(x)	(x)
-#define __pte(x)	(x)
-#define __pgd(x)	(x)
-#define __pgprot(x)	(x)
-#define pte_pgprot(x)	(x)
+#define pte_val(x)	((x).pte)
+#define __pte(x)	((pte_t) { (x) })
 
-#endif
+typedef struct {
+	unsigned long pgprot;
+} pgprot_t;
+
+#define pgprot_val(x)	((x).pgprot)
+#define __pgprot(x)	((pgprot_t) { (x) })
+#define pte_pgprot(x)	__pgprot(pte_val(x))
 
-typedef pte_t * pgtable_t;
+typedef struct page *pgtable_t;
 
 /*
  * Use virt_to_pfn with caution:
@@ -122,8 +120,8 @@ extern int pfn_valid(unsigned long pfn);
  * virt here means link-address/program-address as embedded in object code.
  * And for ARC, link-addr = physical address
  */
-#define __pa(vaddr)  ((unsigned long)(vaddr))
-#define __va(paddr)  ((void *)((unsigned long)(paddr)))
+#define __pa(vaddr)  		((unsigned long)(vaddr))
+#define __va(paddr)  		((void *)((unsigned long)(paddr)))
 
 #define virt_to_page(kaddr)	pfn_to_page(virt_to_pfn(kaddr))
 #define virt_addr_valid(kaddr)  pfn_valid(virt_to_pfn(kaddr))
diff --git a/arch/arc/include/asm/pgalloc.h b/arch/arc/include/asm/pgalloc.h
index a32ca3104ced..096b8ef58edb 100644
--- a/arch/arc/include/asm/pgalloc.h
+++ b/arch/arc/include/asm/pgalloc.h
@@ -31,30 +31,32 @@
 
 #include <linux/mm.h>
 #include <linux/log2.h>
+#include <asm-generic/pgalloc.h>
 
 static inline void
 pmd_populate_kernel(struct mm_struct *mm, pmd_t *pmd, pte_t *pte)
 {
-	pmd_set(pmd, pte);
+	/*
+	 * The cast to long below is OK in 32-bit PAE40 regime with long long pte
+	 * Despite "wider" pte, the pte table needs to be in non-PAE low memory
+	 * as all higher levels can only hold long pointers.
+	 *
+	 * The cast itself is needed given simplistic definition of set_pmd()
+	 */
+	set_pmd(pmd, __pmd((unsigned long)pte));
 }
 
-static inline void
-pmd_populate(struct mm_struct *mm, pmd_t *pmd, pgtable_t ptep)
+static inline void pmd_populate(struct mm_struct *mm, pmd_t *pmd, pgtable_t pte_page)
 {
-	pmd_set(pmd, (pte_t *) ptep);
-}
-
-static inline int __get_order_pgd(void)
-{
-	return get_order(PTRS_PER_PGD * sizeof(pgd_t));
+	set_pmd(pmd, __pmd((unsigned long)page_address(pte_page)));
 }
 
 static inline pgd_t *pgd_alloc(struct mm_struct *mm)
 {
-	int num, num2;
-	pgd_t *ret = (pgd_t *) __get_free_pages(GFP_KERNEL, __get_order_pgd());
+	pgd_t *ret = (pgd_t *) __get_free_page(GFP_KERNEL);
 
 	if (ret) {
+		int num, num2;
 		num = USER_PTRS_PER_PGD + USER_KERNEL_GUTTER / PGDIR_SIZE;
 		memzero(ret, num * sizeof(pgd_t));
 
@@ -68,64 +70,27 @@ static inline pgd_t *pgd_alloc(struct mm_struct *mm)
 	return ret;
 }
 
-static inline void pgd_free(struct mm_struct *mm, pgd_t *pgd)
-{
-	free_pages((unsigned long)pgd, __get_order_pgd());
-}
-
-
-/*
- * With software-only page-tables, addr-split for traversal is tweakable and
- * that directly governs how big tables would be at each level.
- * Further, the MMU page size is configurable.
- * Thus we need to programatically assert the size constraint
- * All of this is const math, allowing gcc to do constant folding/propagation.
- */
+#if CONFIG_PGTABLE_LEVELS > 3
 
-static inline int __get_order_pte(void)
+static inline void p4d_populate(struct mm_struct *mm, p4d_t *p4dp, pud_t *pudp)
 {
-	return get_order(PTRS_PER_PTE * sizeof(pte_t));
+	set_p4d(p4dp, __p4d((unsigned long)pudp));
 }
 
-static inline pte_t *pte_alloc_one_kernel(struct mm_struct *mm)
-{
-	pte_t *pte;
+#define __pud_free_tlb(tlb, pmd, addr)  pud_free((tlb)->mm, pmd)
 
-	pte = (pte_t *) __get_free_pages(GFP_KERNEL | __GFP_ZERO,
-					 __get_order_pte());
+#endif
 
-	return pte;
-}
+#if CONFIG_PGTABLE_LEVELS > 2
 
-static inline pgtable_t
-pte_alloc_one(struct mm_struct *mm)
+static inline void pud_populate(struct mm_struct *mm, pud_t *pudp, pmd_t *pmdp)
 {
-	pgtable_t pte_pg;
-	struct page *page;
-
-	pte_pg = (pgtable_t)__get_free_pages(GFP_KERNEL, __get_order_pte());
-	if (!pte_pg)
-		return 0;
-	memzero((void *)pte_pg, PTRS_PER_PTE * sizeof(pte_t));
-	page = virt_to_page(pte_pg);
-	if (!pgtable_pte_page_ctor(page)) {
-		__free_page(page);
-		return 0;
-	}
-
-	return pte_pg;
+	set_pud(pudp, __pud((unsigned long)pmdp));
 }
 
-static inline void pte_free_kernel(struct mm_struct *mm, pte_t *pte)
-{
-	free_pages((unsigned long)pte, __get_order_pte()); /* takes phy addr */
-}
+#define __pmd_free_tlb(tlb, pmd, addr)  pmd_free((tlb)->mm, pmd)
 
-static inline void pte_free(struct mm_struct *mm, pgtable_t ptep)
-{
-	pgtable_pte_page_dtor(virt_to_page(ptep));
-	free_pages((unsigned long)ptep, __get_order_pte());
-}
+#endif
 
 #define __pte_free_tlb(tlb, pte, addr)  pte_free((tlb)->mm, pte)
 
diff --git a/arch/arc/include/asm/pgtable-bits-arcv2.h b/arch/arc/include/asm/pgtable-bits-arcv2.h
new file mode 100644
index 000000000000..183d23bc1e00
--- /dev/null
+++ b/arch/arc/include/asm/pgtable-bits-arcv2.h
@@ -0,0 +1,149 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
+/*
+ * Copyright (C) 2004, 2007-2010, 2011-2012 Synopsys, Inc. (www.synopsys.com)
+ */
+
+/*
+ * page table flags for software walked/managed MMUv3 (ARC700) and MMUv4 (HS)
+ * There correspond to the corresponding bits in the TLB
+ */
+
+#ifndef _ASM_ARC_PGTABLE_BITS_ARCV2_H
+#define _ASM_ARC_PGTABLE_BITS_ARCV2_H
+
+#ifdef CONFIG_ARC_CACHE_PAGES
+#define _PAGE_CACHEABLE		(1 << 0)  /* Cached (H) */
+#else
+#define _PAGE_CACHEABLE		0
+#endif
+
+#define _PAGE_EXECUTE		(1 << 1)  /* User Execute  (H) */
+#define _PAGE_WRITE		(1 << 2)  /* User Write    (H) */
+#define _PAGE_READ		(1 << 3)  /* User Read     (H) */
+#define _PAGE_ACCESSED		(1 << 4)  /* Accessed      (s) */
+#define _PAGE_DIRTY		(1 << 5)  /* Modified      (s) */
+#define _PAGE_SPECIAL		(1 << 6)
+#define _PAGE_GLOBAL		(1 << 8)  /* ASID agnostic (H) */
+#define _PAGE_PRESENT		(1 << 9)  /* PTE/TLB Valid (H) */
+
+#ifdef CONFIG_ARC_MMU_V4
+#define _PAGE_HW_SZ		(1 << 10)  /* Normal/super (H) */
+#else
+#define _PAGE_HW_SZ		0
+#endif
+
+/* Defaults for every user page */
+#define ___DEF		(_PAGE_PRESENT | _PAGE_CACHEABLE)
+
+/* Set of bits not changed in pte_modify */
+#define _PAGE_CHG_MASK	(PAGE_MASK_PHYS | _PAGE_ACCESSED | _PAGE_DIRTY | \
+							   _PAGE_SPECIAL)
+
+/* More Abbrevaited helpers */
+#define PAGE_U_NONE     __pgprot(___DEF)
+#define PAGE_U_R        __pgprot(___DEF | _PAGE_READ)
+#define PAGE_U_W_R      __pgprot(___DEF | _PAGE_READ | _PAGE_WRITE)
+#define PAGE_U_X_R      __pgprot(___DEF | _PAGE_READ | _PAGE_EXECUTE)
+#define PAGE_U_X_W_R    __pgprot(___DEF \
+				| _PAGE_READ | _PAGE_WRITE | _PAGE_EXECUTE)
+#define PAGE_KERNEL     __pgprot(___DEF | _PAGE_GLOBAL \
+				| _PAGE_READ | _PAGE_WRITE | _PAGE_EXECUTE)
+
+#define PAGE_SHARED	PAGE_U_W_R
+
+#define pgprot_noncached(prot)	(__pgprot(pgprot_val(prot) & ~_PAGE_CACHEABLE))
+
+/*
+ * Mapping of vm_flags (Generic VM) to PTE flags (arch specific)
+ *
+ * Certain cases have 1:1 mapping
+ *  e.g. __P101 means VM_READ, VM_EXEC and !VM_SHARED
+ *       which directly corresponds to  PAGE_U_X_R
+ *
+ * Other rules which cause the divergence from 1:1 mapping
+ *
+ *  1. Although ARC700 can do exclusive execute/write protection (meaning R
+ *     can be tracked independet of X/W unlike some other CPUs), still to
+ *     keep things consistent with other archs:
+ *      -Write implies Read:   W => R
+ *      -Execute implies Read: X => R
+ *
+ *  2. Pvt Writable doesn't have Write Enabled initially: Pvt-W => !W
+ *     This is to enable COW mechanism
+ */
+	/* xwr */
+#define __P000  PAGE_U_NONE
+#define __P001  PAGE_U_R
+#define __P010  PAGE_U_R	/* Pvt-W => !W */
+#define __P011  PAGE_U_R	/* Pvt-W => !W */
+#define __P100  PAGE_U_X_R	/* X => R */
+#define __P101  PAGE_U_X_R
+#define __P110  PAGE_U_X_R	/* Pvt-W => !W and X => R */
+#define __P111  PAGE_U_X_R	/* Pvt-W => !W */
+
+#define __S000  PAGE_U_NONE
+#define __S001  PAGE_U_R
+#define __S010  PAGE_U_W_R	/* W => R */
+#define __S011  PAGE_U_W_R
+#define __S100  PAGE_U_X_R	/* X => R */
+#define __S101  PAGE_U_X_R
+#define __S110  PAGE_U_X_W_R	/* X => R */
+#define __S111  PAGE_U_X_W_R
+
+#ifndef __ASSEMBLY__
+
+#define pte_write(pte)		(pte_val(pte) & _PAGE_WRITE)
+#define pte_dirty(pte)		(pte_val(pte) & _PAGE_DIRTY)
+#define pte_young(pte)		(pte_val(pte) & _PAGE_ACCESSED)
+#define pte_special(pte)	(pte_val(pte) & _PAGE_SPECIAL)
+
+#define PTE_BIT_FUNC(fn, op) \
+	static inline pte_t pte_##fn(pte_t pte) { pte_val(pte) op; return pte; }
+
+PTE_BIT_FUNC(mknotpresent,     &= ~(_PAGE_PRESENT));
+PTE_BIT_FUNC(wrprotect,	&= ~(_PAGE_WRITE));
+PTE_BIT_FUNC(mkwrite,	|= (_PAGE_WRITE));
+PTE_BIT_FUNC(mkclean,	&= ~(_PAGE_DIRTY));
+PTE_BIT_FUNC(mkdirty,	|= (_PAGE_DIRTY));
+PTE_BIT_FUNC(mkold,	&= ~(_PAGE_ACCESSED));
+PTE_BIT_FUNC(mkyoung,	|= (_PAGE_ACCESSED));
+PTE_BIT_FUNC(mkspecial,	|= (_PAGE_SPECIAL));
+PTE_BIT_FUNC(mkhuge,	|= (_PAGE_HW_SZ));
+
+static inline pte_t pte_modify(pte_t pte, pgprot_t newprot)
+{
+	return __pte((pte_val(pte) & _PAGE_CHG_MASK) | pgprot_val(newprot));
+}
+
+static inline void set_pte_at(struct mm_struct *mm, unsigned long addr,
+			      pte_t *ptep, pte_t pteval)
+{
+	set_pte(ptep, pteval);
+}
+
+void update_mmu_cache(struct vm_area_struct *vma, unsigned long address,
+		      pte_t *ptep);
+
+/* Encode swap {type,off} tuple into PTE
+ * We reserve 13 bits for 5-bit @type, keeping bits 12-5 zero, ensuring that
+ * PAGE_PRESENT is zero in a PTE holding swap "identifier"
+ */
+#define __swp_entry(type, off)		((swp_entry_t) \
+					{ ((type) & 0x1f) | ((off) << 13) })
+
+/* Decode a PTE containing swap "identifier "into constituents */
+#define __swp_type(pte_lookalike)	(((pte_lookalike).val) & 0x1f)
+#define __swp_offset(pte_lookalike)	((pte_lookalike).val >> 13)
+
+#define __pte_to_swp_entry(pte)		((swp_entry_t) { pte_val(pte) })
+#define __swp_entry_to_pte(x)		((pte_t) { (x).val })
+
+#define kern_addr_valid(addr)	(1)
+
+#ifdef CONFIG_TRANSPARENT_HUGEPAGE
+#include <asm/hugepage.h>
+#endif
+
+#endif /* __ASSEMBLY__ */
+
+#endif
diff --git a/arch/arc/include/asm/pgtable-levels.h b/arch/arc/include/asm/pgtable-levels.h
new file mode 100644
index 000000000000..8084ef2f6491
--- /dev/null
+++ b/arch/arc/include/asm/pgtable-levels.h
@@ -0,0 +1,189 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
+/*
+ * Copyright (C) 2020 Synopsys, Inc. (www.synopsys.com)
+ */
+
+/*
+ * Helpers for implemenintg paging levels
+ */
+
+#ifndef _ASM_ARC_PGTABLE_LEVELS_H
+#define _ASM_ARC_PGTABLE_LEVELS_H
+
+#if CONFIG_PGTABLE_LEVELS == 2
+
+/*
+ * 2 level paging setup for software walked MMUv3 (ARC700) and MMUv4 (HS)
+ *
+ * [31]            32 bit virtual address              [0]
+ * -------------------------------------------------------
+ * |               | <---------- PGDIR_SHIFT ----------> |
+ * |               |                | <-- PAGE_SHIFT --> |
+ * -------------------------------------------------------
+ *       |                  |                |
+ *       |                  |                --> off in page frame
+ *       |                  ---> index into Page Table
+ *       ----> index into Page Directory
+ *
+ * Given software walk, the vaddr split is arbitrary set to 11:8:13
+ * However enabling of super page in a 2 level regime pegs PGDIR_SHIFT to
+ * super page size.
+ */
+
+#if defined(CONFIG_ARC_HUGEPAGE_16M)
+#define PGDIR_SHIFT		24
+#elif defined(CONFIG_ARC_HUGEPAGE_2M)
+#define PGDIR_SHIFT		21
+#else
+/*
+ * No Super page case
+ * Default value provides 11:8:13 (8K), 10:10:12 (4K)
+ * Limits imposed by pgtable_t only PAGE_SIZE long
+ * (so 4K page can only have 1K entries: or 10 bits)
+ */
+#ifdef CONFIG_ARC_PAGE_SIZE_4K
+#define PGDIR_SHIFT		22
+#else
+#define PGDIR_SHIFT		21
+#endif
+
+#endif
+
+#else /* CONFIG_PGTABLE_LEVELS != 2 */
+
+/*
+ * A default 3 level paging testing setup in software walked MMU
+ *   MMUv4 (8K page): <4> : <7> : <8> : <13>
+ * A default 4 level paging testing setup in software walked MMU
+ *   MMUv4 (8K page): <4> : <3> : <4> : <8> : <13>
+ */
+#define PGDIR_SHIFT		28
+#if CONFIG_PGTABLE_LEVELS > 3
+#define PUD_SHIFT		25
+#endif
+#if CONFIG_PGTABLE_LEVELS > 2
+#define PMD_SHIFT		21
+#endif
+
+#endif /* CONFIG_PGTABLE_LEVELS */
+
+#define PGDIR_SIZE		BIT(PGDIR_SHIFT)
+#define PGDIR_MASK		(~(PGDIR_SIZE - 1))
+#define PTRS_PER_PGD		BIT(32 - PGDIR_SHIFT)
+
+#if CONFIG_PGTABLE_LEVELS > 3
+#define PUD_SIZE		BIT(PUD_SHIFT)
+#define PUD_MASK		(~(PUD_SIZE - 1))
+#define PTRS_PER_PUD		BIT(PGDIR_SHIFT - PUD_SHIFT)
+#endif
+
+#if CONFIG_PGTABLE_LEVELS > 2
+#define PMD_SIZE		BIT(PMD_SHIFT)
+#define PMD_MASK		(~(PMD_SIZE - 1))
+#define PTRS_PER_PMD		BIT(PUD_SHIFT - PMD_SHIFT)
+#endif
+
+#define PTRS_PER_PTE		BIT(PMD_SHIFT - PAGE_SHIFT)
+
+#ifndef __ASSEMBLY__
+
+#if CONFIG_PGTABLE_LEVELS > 3
+#include <asm-generic/pgtable-nop4d.h>
+#elif CONFIG_PGTABLE_LEVELS > 2
+#include <asm-generic/pgtable-nopud.h>
+#else
+#include <asm-generic/pgtable-nopmd.h>
+#endif
+
+/*
+ * 1st level paging: pgd
+ */
+#define pgd_index(addr)		((addr) >> PGDIR_SHIFT)
+#define pgd_offset(mm, addr)	(((mm)->pgd) + pgd_index(addr))
+#define pgd_offset_k(addr)	pgd_offset(&init_mm, addr)
+#define pgd_ERROR(e) \
+	pr_crit("%s:%d: bad pgd %08lx.\n", __FILE__, __LINE__, pgd_val(e))
+
+#if CONFIG_PGTABLE_LEVELS > 3
+
+/* In 4 level paging, p4d_* macros work on pgd */
+#define p4d_none(x)		(!p4d_val(x))
+#define p4d_bad(x)		((p4d_val(x) & ~PAGE_MASK))
+#define p4d_present(x)		(p4d_val(x))
+#define p4d_clear(xp)		do { p4d_val(*(xp)) = 0; } while (0)
+#define p4d_pgtable(p4d)	((pud_t *)(p4d_val(p4d) & PAGE_MASK))
+#define p4d_page(p4d)		virt_to_page(p4d_pgtable(p4d))
+#define set_p4d(p4dp, p4d)	(*(p4dp) = p4d)
+
+/*
+ * 2nd level paging: pud
+ */
+#define pud_ERROR(e) \
+	pr_crit("%s:%d: bad pud %08lx.\n", __FILE__, __LINE__, pud_val(e))
+
+#endif
+
+#if CONFIG_PGTABLE_LEVELS > 2
+
+/*
+ * In 3 level paging, pud_* macros work on pgd
+ * In 4 level paging, pud_* macros work on pud
+ */
+#define pud_none(x)		(!pud_val(x))
+#define pud_bad(x)		((pud_val(x) & ~PAGE_MASK))
+#define pud_present(x)		(pud_val(x))
+#define pud_clear(xp)		do { pud_val(*(xp)) = 0; } while (0)
+#define pud_pgtable(pud)	((pmd_t *)(pud_val(pud) & PAGE_MASK))
+#define pud_page(pud)		virt_to_page(pud_pgtable(pud))
+#define set_pud(pudp, pud)	(*(pudp) = pud)
+
+/*
+ * 3rd level paging: pmd
+ */
+#define pmd_ERROR(e) \
+	pr_crit("%s:%d: bad pmd %08lx.\n", __FILE__, __LINE__, pmd_val(e))
+
+#define pmd_pfn(pmd)		((pmd_val(pmd) & PMD_MASK) >> PAGE_SHIFT)
+#define pfn_pmd(pfn,prot)	__pmd(((pfn) << PAGE_SHIFT) | pgprot_val(prot))
+#define mk_pmd(page,prot)	pfn_pmd(page_to_pfn(page),prot)
+
+#endif
+
+/*
+ * Due to the strange way generic pgtable level folding works, the pmd_* macros
+ *  - are valid even for 2 levels (which supposedly only has pgd - pte)
+ *  - behave differently for 2 vs. 3
+ * In 2  level paging        (pgd -> pte), pmd_* macros work on pgd
+ * In 3+ level paging (pgd -> pmd -> pte), pmd_* macros work on pmd
+ */
+#define pmd_none(x)		(!pmd_val(x))
+#define pmd_bad(x)		((pmd_val(x) & ~PAGE_MASK))
+#define pmd_present(x)		(pmd_val(x))
+#define pmd_clear(xp)		do { pmd_val(*(xp)) = 0; } while (0)
+#define pmd_page_vaddr(pmd)	(pmd_val(pmd) & PAGE_MASK)
+#define pmd_page(pmd)		virt_to_page(pmd_page_vaddr(pmd))
+#define set_pmd(pmdp, pmd)	(*(pmdp) = pmd)
+#define pmd_pgtable(pmd)	((pgtable_t) pmd_page_vaddr(pmd))
+
+/*
+ * 4th level paging: pte
+ */
+#define pte_ERROR(e) \
+	pr_crit("%s:%d: bad pte %08lx.\n", __FILE__, __LINE__, pte_val(e))
+
+#define pte_none(x)		(!pte_val(x))
+#define pte_present(x)		(pte_val(x) & _PAGE_PRESENT)
+#define pte_clear(mm,addr,ptep)	set_pte_at(mm, addr, ptep, __pte(0))
+#define pte_page(pte)		pfn_to_page(pte_pfn(pte))
+#define set_pte(ptep, pte)	((*(ptep)) = (pte))
+#define pte_pfn(pte)		(pte_val(pte) >> PAGE_SHIFT)
+#define pfn_pte(pfn, prot)	__pte(__pfn_to_phys(pfn) | pgprot_val(prot))
+#define mk_pte(page, prot)	pfn_pte(page_to_pfn(page), prot)
+
+#ifdef CONFIG_ISA_ARCV2
+#define pmd_leaf(x)		(pmd_val(x) & _PAGE_HW_SZ)
+#endif
+
+#endif	/* !__ASSEMBLY__ */
+
+#endif
diff --git a/arch/arc/include/asm/pgtable.h b/arch/arc/include/asm/pgtable.h
index 320cc0ae8a08..9320b04c04bf 100644
--- a/arch/arc/include/asm/pgtable.h
+++ b/arch/arc/include/asm/pgtable.h
@@ -1,220 +1,17 @@
 /* SPDX-License-Identifier: GPL-2.0-only */
 /*
  * Copyright (C) 2004, 2007-2010, 2011-2012 Synopsys, Inc. (www.synopsys.com)
- *
- * vineetg: May 2011
- *  -Folded PAGE_PRESENT (used by VM) and PAGE_VALID (used by MMU) into 1.
- *     They are semantically the same although in different contexts
- *     VALID marks a TLB entry exists and it will only happen if PRESENT
- *  - Utilise some unused free bits to confine PTE flags to 12 bits
- *     This is a must for 4k pg-sz
- *
- * vineetg: Mar 2011 - changes to accommodate MMU TLB Page Descriptor mods
- *  -TLB Locking never really existed, except for initial specs
- *  -SILENT_xxx not needed for our port
- *  -Per my request, MMU V3 changes the layout of some of the bits
- *     to avoid a few shifts in TLB Miss handlers.
- *
- * vineetg: April 2010
- *  -PGD entry no longer contains any flags. If empty it is 0, otherwise has
- *   Pg-Tbl ptr. Thus pmd_present(), pmd_valid(), pmd_set( ) become simpler
- *
- * vineetg: April 2010
- *  -Switched form 8:11:13 split for page table lookup to 11:8:13
- *  -this speeds up page table allocation itself as we now have to memset 1K
- *    instead of 8k per page table.
- * -TODO: Right now page table alloc is 8K and rest 7K is unused
- *    need to optimise it
- *
- * Amit Bhor, Sameer Dhavale: Codito Technologies 2004
  */
 
 #ifndef _ASM_ARC_PGTABLE_H
 #define _ASM_ARC_PGTABLE_H
 
 #include <linux/bits.h>
-#include <asm-generic/pgtable-nopmd.h>
-#include <asm/page.h>
-#include <asm/mmu.h>	/* to propagate CONFIG_ARC_MMU_VER <n> */
-
-/**************************************************************************
- * Page Table Flags
- *
- * ARC700 MMU only deals with softare managed TLB entries.
- * Page Tables are purely for Linux VM's consumption and the bits below are
- * suited to that (uniqueness). Hence some are not implemented in the TLB and
- * some have different value in TLB.
- * e.g. MMU v2: K_READ bit is 8 and so is GLOBAL (possible because they live in
- *      seperate PD0 and PD1, which combined forms a translation entry)
- *      while for PTE perspective, they are 8 and 9 respectively
- * with MMU v3: Most bits (except SHARED) represent the exact hardware pos
- *      (saves some bit shift ops in TLB Miss hdlrs)
- */
-
-#if (CONFIG_ARC_MMU_VER <= 2)
-
-#define _PAGE_ACCESSED      (1<<1)	/* Page is accessed (S) */
-#define _PAGE_CACHEABLE     (1<<2)	/* Page is cached (H) */
-#define _PAGE_EXECUTE       (1<<3)	/* Page has user execute perm (H) */
-#define _PAGE_WRITE         (1<<4)	/* Page has user write perm (H) */
-#define _PAGE_READ          (1<<5)	/* Page has user read perm (H) */
-#define _PAGE_DIRTY         (1<<6)	/* Page modified (dirty) (S) */
-#define _PAGE_SPECIAL       (1<<7)
-#define _PAGE_GLOBAL        (1<<8)	/* Page is global (H) */
-#define _PAGE_PRESENT       (1<<10)	/* TLB entry is valid (H) */
-
-#else	/* MMU v3 onwards */
-
-#define _PAGE_CACHEABLE     (1<<0)	/* Page is cached (H) */
-#define _PAGE_EXECUTE       (1<<1)	/* Page has user execute perm (H) */
-#define _PAGE_WRITE         (1<<2)	/* Page has user write perm (H) */
-#define _PAGE_READ          (1<<3)	/* Page has user read perm (H) */
-#define _PAGE_ACCESSED      (1<<4)	/* Page is accessed (S) */
-#define _PAGE_DIRTY         (1<<5)	/* Page modified (dirty) (S) */
-#define _PAGE_SPECIAL       (1<<6)
-
-#if (CONFIG_ARC_MMU_VER >= 4)
-#define _PAGE_WTHRU         (1<<7)	/* Page cache mode write-thru (H) */
-#endif
-
-#define _PAGE_GLOBAL        (1<<8)	/* Page is global (H) */
-#define _PAGE_PRESENT       (1<<9)	/* TLB entry is valid (H) */
-
-#if (CONFIG_ARC_MMU_VER >= 4)
-#define _PAGE_HW_SZ         (1<<10)	/* Page Size indicator (H): 0 normal, 1 super */
-#endif
-
-#define _PAGE_SHARED_CODE   (1<<11)	/* Shared Code page with cmn vaddr
-					   usable for shared TLB entries (H) */
-
-#define _PAGE_UNUSED_BIT    (1<<12)
-#endif
-
-/* vmalloc permissions */
-#define _K_PAGE_PERMS  (_PAGE_EXECUTE | _PAGE_WRITE | _PAGE_READ | \
-			_PAGE_GLOBAL | _PAGE_PRESENT)
-
-#ifndef CONFIG_ARC_CACHE_PAGES
-#undef _PAGE_CACHEABLE
-#define _PAGE_CACHEABLE 0
-#endif
 
-#ifndef _PAGE_HW_SZ
-#define _PAGE_HW_SZ	0
-#endif
-
-/* Defaults for every user page */
-#define ___DEF (_PAGE_PRESENT | _PAGE_CACHEABLE)
-
-/* Set of bits not changed in pte_modify */
-#define _PAGE_CHG_MASK	(PAGE_MASK_PHYS | _PAGE_ACCESSED | _PAGE_DIRTY | \
-							   _PAGE_SPECIAL)
-/* More Abbrevaited helpers */
-#define PAGE_U_NONE     __pgprot(___DEF)
-#define PAGE_U_R        __pgprot(___DEF | _PAGE_READ)
-#define PAGE_U_W_R      __pgprot(___DEF | _PAGE_READ | _PAGE_WRITE)
-#define PAGE_U_X_R      __pgprot(___DEF | _PAGE_READ | _PAGE_EXECUTE)
-#define PAGE_U_X_W_R    __pgprot(___DEF | _PAGE_READ | _PAGE_WRITE | \
-						       _PAGE_EXECUTE)
-
-#define PAGE_SHARED	PAGE_U_W_R
-
-/* While kernel runs out of unstranslated space, vmalloc/modules use a chunk of
- * user vaddr space - visible in all addr spaces, but kernel mode only
- * Thus Global, all-kernel-access, no-user-access, cached
- */
-#define PAGE_KERNEL          __pgprot(_K_PAGE_PERMS | _PAGE_CACHEABLE)
-
-/* ioremap */
-#define PAGE_KERNEL_NO_CACHE __pgprot(_K_PAGE_PERMS)
-
-/* Masks for actual TLB "PD"s */
-#define PTE_BITS_IN_PD0		(_PAGE_GLOBAL | _PAGE_PRESENT | _PAGE_HW_SZ)
-#define PTE_BITS_RWX		(_PAGE_EXECUTE | _PAGE_WRITE | _PAGE_READ)
-
-#define PTE_BITS_NON_RWX_IN_PD1	(PAGE_MASK_PHYS | _PAGE_CACHEABLE)
-
-/**************************************************************************
- * Mapping of vm_flags (Generic VM) to PTE flags (arch specific)
- *
- * Certain cases have 1:1 mapping
- *  e.g. __P101 means VM_READ, VM_EXEC and !VM_SHARED
- *       which directly corresponds to  PAGE_U_X_R
- *
- * Other rules which cause the divergence from 1:1 mapping
- *
- *  1. Although ARC700 can do exclusive execute/write protection (meaning R
- *     can be tracked independet of X/W unlike some other CPUs), still to
- *     keep things consistent with other archs:
- *      -Write implies Read:   W => R
- *      -Execute implies Read: X => R
- *
- *  2. Pvt Writable doesn't have Write Enabled initially: Pvt-W => !W
- *     This is to enable COW mechanism
- */
-	/* xwr */
-#define __P000  PAGE_U_NONE
-#define __P001  PAGE_U_R
-#define __P010  PAGE_U_R	/* Pvt-W => !W */
-#define __P011  PAGE_U_R	/* Pvt-W => !W */
-#define __P100  PAGE_U_X_R	/* X => R */
-#define __P101  PAGE_U_X_R
-#define __P110  PAGE_U_X_R	/* Pvt-W => !W and X => R */
-#define __P111  PAGE_U_X_R	/* Pvt-W => !W */
-
-#define __S000  PAGE_U_NONE
-#define __S001  PAGE_U_R
-#define __S010  PAGE_U_W_R	/* W => R */
-#define __S011  PAGE_U_W_R
-#define __S100  PAGE_U_X_R	/* X => R */
-#define __S101  PAGE_U_X_R
-#define __S110  PAGE_U_X_W_R	/* X => R */
-#define __S111  PAGE_U_X_W_R
-
-/****************************************************************
- * 2 tier (PGD:PTE) software page walker
- *
- * [31]		    32 bit virtual address              [0]
- * -------------------------------------------------------
- * |               | <------------ PGDIR_SHIFT ----------> |
- * |		   |					 |
- * | BITS_FOR_PGD  |  BITS_FOR_PTE  | <-- PAGE_SHIFT --> |
- * -------------------------------------------------------
- *       |                  |                |
- *       |                  |                --> off in page frame
- *       |                  ---> index into Page Table
- *       ----> index into Page Directory
- *
- * In a single page size configuration, only PAGE_SHIFT is fixed
- * So both PGD and PTE sizing can be tweaked
- *  e.g. 8K page (PAGE_SHIFT 13) can have
- *  - PGDIR_SHIFT 21  -> 11:8:13 address split
- *  - PGDIR_SHIFT 24  -> 8:11:13 address split
- *
- * If Super Page is configured, PGDIR_SHIFT becomes fixed too,
- * so the sizing flexibility is gone.
- */
-
-#if defined(CONFIG_ARC_HUGEPAGE_16M)
-#define PGDIR_SHIFT	24
-#elif defined(CONFIG_ARC_HUGEPAGE_2M)
-#define PGDIR_SHIFT	21
-#else
-/*
- * Only Normal page support so "hackable" (see comment above)
- * Default value provides 11:8:13 (8K), 11:9:12 (4K)
- */
-#define PGDIR_SHIFT	21
-#endif
-
-#define BITS_FOR_PTE	(PGDIR_SHIFT - PAGE_SHIFT)
-#define BITS_FOR_PGD	(32 - PGDIR_SHIFT)
-
-#define PGDIR_SIZE	BIT(PGDIR_SHIFT)	/* vaddr span, not PDG sz */
-#define PGDIR_MASK	(~(PGDIR_SIZE-1))
-
-#define	PTRS_PER_PTE	BIT(BITS_FOR_PTE)
-#define	PTRS_PER_PGD	BIT(BITS_FOR_PGD)
+#include <asm/pgtable-levels.h>
+#include <asm/pgtable-bits-arcv2.h>
+#include <asm/page.h>
+#include <asm/mmu.h>
 
 /*
  * Number of entries a user land program use.
@@ -222,143 +19,17 @@
  */
 #define	USER_PTRS_PER_PGD	(TASK_SIZE / PGDIR_SIZE)
 
-
-/****************************************************************
- * Bucket load of VM Helpers
- */
-
 #ifndef __ASSEMBLY__
 
-#define pte_ERROR(e) \
-	pr_crit("%s:%d: bad pte %08lx.\n", __FILE__, __LINE__, pte_val(e))
-#define pgd_ERROR(e) \
-	pr_crit("%s:%d: bad pgd %08lx.\n", __FILE__, __LINE__, pgd_val(e))
-
-/* the zero page used for uninitialized and anonymous pages */
 extern char empty_zero_page[PAGE_SIZE];
 #define ZERO_PAGE(vaddr)	(virt_to_page(empty_zero_page))
 
-#define set_pte(pteptr, pteval)	((*(pteptr)) = (pteval))
-#define set_pmd(pmdptr, pmdval)	(*(pmdptr) = pmdval)
-
-/* find the page descriptor of the Page Tbl ref by PMD entry */
-#define pmd_page(pmd)		virt_to_page(pmd_val(pmd) & PAGE_MASK)
-
-/* find the logical addr (phy for ARC) of the Page Tbl ref by PMD entry */
-#define pmd_page_vaddr(pmd)	(pmd_val(pmd) & PAGE_MASK)
-
-/* In a 2 level sys, setup the PGD entry with PTE value */
-static inline void pmd_set(pmd_t *pmdp, pte_t *ptep)
-{
-	pmd_val(*pmdp) = (unsigned long)ptep;
-}
-
-#define pte_none(x)			(!pte_val(x))
-#define pte_present(x)			(pte_val(x) & _PAGE_PRESENT)
-#define pte_clear(mm, addr, ptep)	set_pte_at(mm, addr, ptep, __pte(0))
-
-#define pmd_none(x)			(!pmd_val(x))
-#define	pmd_bad(x)			((pmd_val(x) & ~PAGE_MASK))
-#define pmd_present(x)			(pmd_val(x))
-#define pmd_leaf(x)			(pmd_val(x) & _PAGE_HW_SZ)
-#define pmd_clear(xp)			do { pmd_val(*(xp)) = 0; } while (0)
-
-#define pte_page(pte)		pfn_to_page(pte_pfn(pte))
-#define mk_pte(page, prot)	pfn_pte(page_to_pfn(page), prot)
-#define pfn_pte(pfn, prot)	__pte(__pfn_to_phys(pfn) | pgprot_val(prot))
-
-/* Don't use virt_to_pfn for macros below: could cause truncations for PAE40*/
-#define pte_pfn(pte)		(pte_val(pte) >> PAGE_SHIFT)
-
-/* Zoo of pte_xxx function */
-#define pte_read(pte)		(pte_val(pte) & _PAGE_READ)
-#define pte_write(pte)		(pte_val(pte) & _PAGE_WRITE)
-#define pte_dirty(pte)		(pte_val(pte) & _PAGE_DIRTY)
-#define pte_young(pte)		(pte_val(pte) & _PAGE_ACCESSED)
-#define pte_special(pte)	(pte_val(pte) & _PAGE_SPECIAL)
-
-#define PTE_BIT_FUNC(fn, op) \
-	static inline pte_t pte_##fn(pte_t pte) { pte_val(pte) op; return pte; }
-
-PTE_BIT_FUNC(mknotpresent,	&= ~(_PAGE_PRESENT));
-PTE_BIT_FUNC(wrprotect,	&= ~(_PAGE_WRITE));
-PTE_BIT_FUNC(mkwrite,	|= (_PAGE_WRITE));
-PTE_BIT_FUNC(mkclean,	&= ~(_PAGE_DIRTY));
-PTE_BIT_FUNC(mkdirty,	|= (_PAGE_DIRTY));
-PTE_BIT_FUNC(mkold,	&= ~(_PAGE_ACCESSED));
-PTE_BIT_FUNC(mkyoung,	|= (_PAGE_ACCESSED));
-PTE_BIT_FUNC(exprotect,	&= ~(_PAGE_EXECUTE));
-PTE_BIT_FUNC(mkexec,	|= (_PAGE_EXECUTE));
-PTE_BIT_FUNC(mkspecial,	|= (_PAGE_SPECIAL));
-PTE_BIT_FUNC(mkhuge,	|= (_PAGE_HW_SZ));
-
-static inline pte_t pte_modify(pte_t pte, pgprot_t newprot)
-{
-	return __pte((pte_val(pte) & _PAGE_CHG_MASK) | pgprot_val(newprot));
-}
+extern pgd_t swapper_pg_dir[] __aligned(PAGE_SIZE);
 
 /* Macro to mark a page protection as uncacheable */
 #define pgprot_noncached(prot)	(__pgprot(pgprot_val(prot) & ~_PAGE_CACHEABLE))
 
-static inline void set_pte_at(struct mm_struct *mm, unsigned long addr,
-			      pte_t *ptep, pte_t pteval)
-{
-	set_pte(ptep, pteval);
-}
-
-/*
- * Macro to quickly access the PGD entry, utlising the fact that some
- * arch may cache the pointer to Page Directory of "current" task
- * in a MMU register
- *
- * Thus task->mm->pgd (3 pointer dereferences, cache misses etc simply
- * becomes read a register
- *
- * ********CAUTION*******:
- * Kernel code might be dealing with some mm_struct of NON "current"
- * Thus use this macro only when you are certain that "current" is current
- * e.g. when dealing with signal frame setup code etc
- */
-#ifdef ARC_USE_SCRATCH_REG
-#define pgd_offset_fast(mm, addr)	\
-({					\
-	pgd_t *pgd_base = (pgd_t *) read_aux_reg(ARC_REG_SCRATCH_DATA0);  \
-	pgd_base + pgd_index(addr);	\
-})
-#else
-#define pgd_offset_fast(mm, addr)	pgd_offset(mm, addr)
-#endif
-
 extern pgd_t swapper_pg_dir[] __aligned(PAGE_SIZE);
-void update_mmu_cache(struct vm_area_struct *vma, unsigned long address,
-		      pte_t *ptep);
-
-/* Encode swap {type,off} tuple into PTE
- * We reserve 13 bits for 5-bit @type, keeping bits 12-5 zero, ensuring that
- * PAGE_PRESENT is zero in a PTE holding swap "identifier"
- */
-#define __swp_entry(type, off)	((swp_entry_t) { \
-					((type) & 0x1f) | ((off) << 13) })
-
-/* Decode a PTE containing swap "identifier "into constituents */
-#define __swp_type(pte_lookalike)	(((pte_lookalike).val) & 0x1f)
-#define __swp_offset(pte_lookalike)	((pte_lookalike).val >> 13)
-
-/* NOPs, to keep generic kernel happy */
-#define __pte_to_swp_entry(pte)	((swp_entry_t) { pte_val(pte) })
-#define __swp_entry_to_pte(x)	((pte_t) { (x).val })
-
-#define kern_addr_valid(addr)	(1)
-
-#define pmd_pgtable(pmd)       ((pgtable_t) pmd_page_vaddr(pmd))
-
-/*
- * remap a physical page `pfn' of size `size' with page protection `prot'
- * into virtual address `from'
- */
-#ifdef CONFIG_TRANSPARENT_HUGEPAGE
-#include <asm/hugepage.h>
-#endif
 
 /* to cope with aliasing VIPT cache */
 #define HAVE_ARCH_UNMAPPED_AREA
diff --git a/arch/arc/include/asm/processor.h b/arch/arc/include/asm/processor.h
index e4031ecd3c8c..f28afcf5c6d1 100644
--- a/arch/arc/include/asm/processor.h
+++ b/arch/arc/include/asm/processor.h
@@ -93,7 +93,7 @@ extern unsigned int get_wchan(struct task_struct *p);
 #define VMALLOC_START	(PAGE_OFFSET - (CONFIG_ARC_KVADDR_SIZE << 20))
 
 /* 1 PGDIR_SIZE each for fixmap/pkmap, 2 PGDIR_SIZE gutter (see asm/highmem.h) */
-#define VMALLOC_SIZE	((CONFIG_ARC_KVADDR_SIZE << 20) - PGDIR_SIZE * 4)
+#define VMALLOC_SIZE	((CONFIG_ARC_KVADDR_SIZE << 20) - PMD_SIZE * 4)
 
 #define VMALLOC_END	(VMALLOC_START + VMALLOC_SIZE)
 
diff --git a/arch/arc/include/asm/setup.h b/arch/arc/include/asm/setup.h
index 01f85478170d..028a8cf76206 100644
--- a/arch/arc/include/asm/setup.h
+++ b/arch/arc/include/asm/setup.h
@@ -2,8 +2,8 @@
 /*
  * Copyright (C) 2004, 2007-2010, 2011-2012 Synopsys, Inc. (www.synopsys.com)
  */
-#ifndef __ASMARC_SETUP_H
-#define __ASMARC_SETUP_H
+#ifndef __ASM_ARC_SETUP_H
+#define __ASM_ARC_SETUP_H
 
 
 #include <linux/types.h>
@@ -34,4 +34,12 @@ long __init arc_get_mem_sz(void);
 #define IS_AVAIL2(v, s, cfg)	IS_AVAIL1(v, s), IS_AVAIL1(v, IS_USED_CFG(cfg))
 #define IS_AVAIL3(v, v2, s)	IS_AVAIL1(v, s), IS_AVAIL1(v, IS_DISABLED_RUN(v2))
 
+extern void arc_mmu_init(void);
+extern char *arc_mmu_mumbojumbo(int cpu_id, char *buf, int len);
+extern void read_decode_mmu_bcr(void);
+
+extern void arc_cache_init(void);
+extern char *arc_cache_mumbojumbo(int cpu_id, char *buf, int len);
+extern void read_decode_cache_bcr(void);
+
 #endif /* __ASMARC_SETUP_H */
diff --git a/arch/arc/include/asm/smp.h b/arch/arc/include/asm/smp.h
index c5de4008d19f..d856491606ac 100644
--- a/arch/arc/include/asm/smp.h
+++ b/arch/arc/include/asm/smp.h
@@ -105,7 +105,6 @@ static inline const char *arc_platform_smp_cpuinfo(void)
 #include <asm/spinlock.h>
 
 extern arch_spinlock_t smp_atomic_ops_lock;
-extern arch_spinlock_t smp_bitops_lock;
 
 #define atomic_ops_lock(flags)	do {		\
 	local_irq_save(flags);			\
@@ -117,24 +116,11 @@ extern arch_spinlock_t smp_bitops_lock;
 	local_irq_restore(flags);		\
 } while (0)
 
-#define bitops_lock(flags)	do {		\
-	local_irq_save(flags);			\
-	arch_spin_lock(&smp_bitops_lock);	\
-} while (0)
-
-#define bitops_unlock(flags) do {		\
-	arch_spin_unlock(&smp_bitops_lock);	\
-	local_irq_restore(flags);		\
-} while (0)
-
 #else /* !CONFIG_SMP */
 
 #define atomic_ops_lock(flags)		local_irq_save(flags)
 #define atomic_ops_unlock(flags)	local_irq_restore(flags)
 
-#define bitops_lock(flags)		local_irq_save(flags)
-#define bitops_unlock(flags)		local_irq_restore(flags)
-
 #endif /* !CONFIG_SMP */
 
 #endif	/* !CONFIG_ARC_HAS_LLSC */
diff --git a/arch/arc/include/asm/tlb-mmu1.h b/arch/arc/include/asm/tlb-mmu1.h
deleted file mode 100644
index a3083b36f5f4..000000000000
--- a/arch/arc/include/asm/tlb-mmu1.h
+++ /dev/null
@@ -1,101 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0-only */
-/*
- * Copyright (C) 2004, 2007-2010, 2011-2012 Synopsys, Inc. (www.synopsys.com)
- */
-
-#ifndef __ASM_TLB_MMU_V1_H__
-#define __ASM_TLB_MMU_V1_H__
-
-#include <asm/mmu.h>
-
-#if defined(__ASSEMBLY__) && (CONFIG_ARC_MMU_VER == 1)
-
-.macro TLB_WRITE_HEURISTICS
-
-#define JH_HACK1
-#undef JH_HACK2
-#undef JH_HACK3
-
-#ifdef JH_HACK3
-; Calculate set index for 2-way MMU
-; -avoiding use of GetIndex from MMU
-;   and its unpleasant LFSR pseudo-random sequence
-;
-; r1 = TLBPD0 from TLB_RELOAD above
-;
-; -- jh_ex_way_set not cleared on startup
-;    didn't want to change setup.c
-;    hence extra instruction to clean
-;
-; -- should be in cache since in same line
-;    as r0/r1 saves above
-;
-ld  r0,[jh_ex_way_sel]  ; victim pointer
-and r0,r0,1         ; clean
-xor.f   r0,r0,1         ; flip
-st  r0,[jh_ex_way_sel]  ; store back
-asr r0,r1,12        ; get set # <<1, note bit 12=R=0
-or.nz   r0,r0,1         ; set way bit
-and r0,r0,0xff      ; clean
-sr  r0,[ARC_REG_TLBINDEX]
-#endif
-
-#ifdef JH_HACK2
-; JH hack #2
-;  Faster than hack #1 in non-thrash case, but hard-coded for 2-way MMU
-;  Slower in thrash case (where it matters) because more code is executed
-;  Inefficient due to two-register paradigm of this miss handler
-;
-/* r1 = data TLBPD0 at this point */
-lr      r0,[eret]               /* instruction address */
-xor     r0,r0,r1                /* compare set #       */
-and.f   r0,r0,0x000fe000        /* 2-way MMU mask      */
-bne     88f                     /* not in same set - no need to probe */
-
-lr      r0,[eret]               /* instruction address */
-and     r0,r0,PAGE_MASK         /* VPN of instruction address */
-; lr  r1,[ARC_REG_TLBPD0]     /* Data VPN+ASID - already in r1 from TLB_RELOAD*/
-and     r1,r1,0xff              /* Data ASID */
-or      r0,r0,r1                /* Instruction address + Data ASID */
-
-lr      r1,[ARC_REG_TLBPD0]     /* save TLBPD0 containing data TLB*/
-sr      r0,[ARC_REG_TLBPD0]     /* write instruction address to TLBPD0 */
-sr      TLBProbe, [ARC_REG_TLBCOMMAND] /* Look for instruction */
-lr      r0,[ARC_REG_TLBINDEX]   /* r0 = index where instruction is, if at all */
-sr      r1,[ARC_REG_TLBPD0]     /* restore TLBPD0 */
-
-xor     r0,r0,1                 /* flip bottom bit of data index */
-b.d     89f
-sr      r0,[ARC_REG_TLBINDEX]   /* and put it back */
-88:
-sr  TLBGetIndex, [ARC_REG_TLBCOMMAND]
-89:
-#endif
-
-#ifdef JH_HACK1
-;
-; Always checks whether instruction will be kicked out by dtlb miss
-;
-mov_s   r3, r1                  ; save PD0 prepared by TLB_RELOAD in r3
-lr      r0,[eret]               /* instruction address */
-and     r0,r0,PAGE_MASK         /* VPN of instruction address */
-bmsk    r1,r3,7                 /* Data ASID, bits 7-0 */
-or_s    r0,r0,r1                /* Instruction address + Data ASID */
-
-sr      r0,[ARC_REG_TLBPD0]     /* write instruction address to TLBPD0 */
-sr      TLBProbe, [ARC_REG_TLBCOMMAND] /* Look for instruction */
-lr      r0,[ARC_REG_TLBINDEX]   /* r0 = index where instruction is, if at all */
-sr      r3,[ARC_REG_TLBPD0]     /* restore TLBPD0 */
-
-sr      TLBGetIndex, [ARC_REG_TLBCOMMAND]
-lr      r1,[ARC_REG_TLBINDEX]   /* r1 = index where MMU wants to put data */
-cmp     r0,r1                   /* if no match on indices, go around */
-xor.eq  r1,r1,1                 /* flip bottom bit of data index */
-sr      r1,[ARC_REG_TLBINDEX]   /* and put it back */
-#endif
-
-.endm
-
-#endif
-
-#endif
diff --git a/arch/arc/kernel/entry-arcv2.S b/arch/arc/kernel/entry-arcv2.S
index 12d5f12d10d2..a7e6a2174187 100644
--- a/arch/arc/kernel/entry-arcv2.S
+++ b/arch/arc/kernel/entry-arcv2.S
@@ -10,6 +10,7 @@
 #include <asm/errno.h>
 #include <asm/arcregs.h>
 #include <asm/irqflags.h>
+#include <asm/mmu.h>
 
 ; A maximum number of supported interrupts in the core interrupt controller.
 ; This number is not equal to the maximum interrupt number (256) because
diff --git a/arch/arc/kernel/entry.S b/arch/arc/kernel/entry.S
index 2cb8dfe866b6..dd77a0c8f740 100644
--- a/arch/arc/kernel/entry.S
+++ b/arch/arc/kernel/entry.S
@@ -101,11 +101,8 @@ ENTRY(EV_MachineCheck)
 	lr  r0, [efa]
 	mov r1, sp
 
-	; hardware auto-disables MMU, re-enable it to allow kernel vaddr
-	; access for say stack unwinding of modules for crash dumps
-	lr	r3, [ARC_REG_PID]
-	or	r3, r3, MMU_ENABLE
-	sr	r3, [ARC_REG_PID]
+	; MC excpetions disable MMU
+	ARC_MMU_REENABLE r3
 
 	lsr  	r3, r2, 8
 	bmsk 	r3, r3, 7
diff --git a/arch/arc/kernel/intc-compact.c b/arch/arc/kernel/intc-compact.c
index a86641b91e65..6885e422870e 100644
--- a/arch/arc/kernel/intc-compact.c
+++ b/arch/arc/kernel/intc-compact.c
@@ -142,7 +142,7 @@ IRQCHIP_DECLARE(arc_intc, "snps,arc700-intc", init_onchip_IRQ);
  *    Time hard-ISR, timer_interrupt( ) calls spin_unlock_irq several times.
  *    Here local_irq_enable( ) shd not re-enable lower priority interrupts
  * -If called from soft-ISR, it must re-enable all interrupts
- *    soft ISR are low prioity jobs which can be very slow, thus all IRQs
+ *    soft ISR are low priority jobs which can be very slow, thus all IRQs
  *    must be enabled while they run.
  *    Now hardware context wise we may still be in L2 ISR (not done rtie)
  *    still we must re-enable both L1 and L2 IRQs
diff --git a/arch/arc/kernel/smp.c b/arch/arc/kernel/smp.c
index db0e104d6835..78e6d069b1c1 100644
--- a/arch/arc/kernel/smp.c
+++ b/arch/arc/kernel/smp.c
@@ -29,10 +29,8 @@
 
 #ifndef CONFIG_ARC_HAS_LLSC
 arch_spinlock_t smp_atomic_ops_lock = __ARCH_SPIN_LOCK_UNLOCKED;
-arch_spinlock_t smp_bitops_lock = __ARCH_SPIN_LOCK_UNLOCKED;
 
 EXPORT_SYMBOL_GPL(smp_atomic_ops_lock);
-EXPORT_SYMBOL_GPL(smp_bitops_lock);
 #endif
 
 struct plat_smp_ops  __weak plat_smp_ops;
@@ -283,7 +281,7 @@ static void ipi_send_msg_one(int cpu, enum ipi_msg_type msg)
 	/*
 	 * Call the platform specific IPI kick function, but avoid if possible:
 	 * Only do so if there's no pending msg from other concurrent sender(s).
-	 * Otherwise, recevier will see this msg as well when it takes the
+	 * Otherwise, receiver will see this msg as well when it takes the
 	 * IPI corresponding to that msg. This is true, even if it is already in
 	 * IPI handler, because !@old means it has not yet dequeued the msg(s)
 	 * so @new msg can be a free-loader
diff --git a/arch/arc/kernel/stacktrace.c b/arch/arc/kernel/stacktrace.c
index 1b9576d21e24..c376ff3147e7 100644
--- a/arch/arc/kernel/stacktrace.c
+++ b/arch/arc/kernel/stacktrace.c
@@ -149,7 +149,7 @@ arc_unwind_core(struct task_struct *tsk, struct pt_regs *regs,
 #else
 	/* On ARC, only Dward based unwinder works. fp based backtracing is
 	 * not possible (-fno-omit-frame-pointer) because of the way function
-	 * prelogue is setup (callee regs saved and then fp set and not other
+	 * prologue is setup (callee regs saved and then fp set and not other
 	 * way around
 	 */
 	pr_warn_once("CONFIG_ARC_DW2_UNWIND needs to be enabled\n");
diff --git a/arch/arc/kernel/traps.c b/arch/arc/kernel/traps.c
index 57235e5c0cea..6b83e3f2b41c 100644
--- a/arch/arc/kernel/traps.c
+++ b/arch/arc/kernel/traps.c
@@ -20,11 +20,6 @@
 #include <asm/unaligned.h>
 #include <asm/kprobes.h>
 
-void __init trap_init(void)
-{
-	return;
-}
-
 void die(const char *str, struct pt_regs *regs, unsigned long address)
 {
 	show_kernel_fault_diag(str, regs, address);
diff --git a/arch/arc/mm/cache.c b/arch/arc/mm/cache.c
index a2fbea3ee07c..8aa1231865d1 100644
--- a/arch/arc/mm/cache.c
+++ b/arch/arc/mm/cache.c
@@ -205,93 +205,24 @@ slc_chk:
 #define OP_INV_IC	0x4
 
 /*
- *		I-Cache Aliasing in ARC700 VIPT caches (MMU v1-v3)
+ * Cache Flush programming model
  *
- * ARC VIPT I-cache uses vaddr to index into cache and paddr to match the tag.
- * The orig Cache Management Module "CDU" only required paddr to invalidate a
- * certain line since it sufficed as index in Non-Aliasing VIPT cache-geometry.
- * Infact for distinct V1,V2,P: all of {V1-P},{V2-P},{P-P} would end up fetching
- * the exact same line.
+ * ARC700 MMUv3 I$ and D$ are both VIPT and can potentially alias.
+ * Programming model requires both paddr and vaddr irrespecive of aliasing
+ * considerations:
+ *  - vaddr in {I,D}C_IV?L
+ *  - paddr in {I,D}C_PTAG
  *
- * However for larger Caches (way-size > page-size) - i.e. in Aliasing config,
- * paddr alone could not be used to correctly index the cache.
+ * In HS38x (MMUv4), D$ is PIPT, I$ is VIPT and can still alias.
+ * Programming model is different for aliasing vs. non-aliasing I$
+ *  - D$ / Non-aliasing I$: only paddr in {I,D}C_IV?L
+ *  - Aliasing I$: same as ARC700 above (so MMUv3 routine used for MMUv4 I$)
  *
- * ------------------
- * MMU v1/v2 (Fixed Page Size 8k)
- * ------------------
- * The solution was to provide CDU with these additonal vaddr bits. These
- * would be bits [x:13], x would depend on cache-geometry, 13 comes from
- * standard page size of 8k.
- * H/w folks chose [17:13] to be a future safe range, and moreso these 5 bits
- * of vaddr could easily be "stuffed" in the paddr as bits [4:0] since the
- * orig 5 bits of paddr were anyways ignored by CDU line ops, as they
- * represent the offset within cache-line. The adv of using this "clumsy"
- * interface for additional info was no new reg was needed in CDU programming
- * model.
- *
- * 17:13 represented the max num of bits passable, actual bits needed were
- * fewer, based on the num-of-aliases possible.
- * -for 2 alias possibility, only bit 13 needed (32K cache)
- * -for 4 alias possibility, bits 14:13 needed (64K cache)
- *
- * ------------------
- * MMU v3
- * ------------------
- * This ver of MMU supports variable page sizes (1k-16k): although Linux will
- * only support 8k (default), 16k and 4k.
- * However from hardware perspective, smaller page sizes aggravate aliasing
- * meaning more vaddr bits needed to disambiguate the cache-line-op ;
- * the existing scheme of piggybacking won't work for certain configurations.
- * Two new registers IC_PTAG and DC_PTAG inttoduced.
- * "tag" bits are provided in PTAG, index bits in existing IVIL/IVDL/FLDL regs
+ *  - If PAE40 is enabled, independent of aliasing considerations, the higher
+ *    bits needs to be written into PTAG_HI
  */
 
 static inline
-void __cache_line_loop_v2(phys_addr_t paddr, unsigned long vaddr,
-			  unsigned long sz, const int op, const int full_page)
-{
-	unsigned int aux_cmd;
-	int num_lines;
-
-	if (op == OP_INV_IC) {
-		aux_cmd = ARC_REG_IC_IVIL;
-	} else {
-		/* d$ cmd: INV (discard or wback-n-discard) OR FLUSH (wback) */
-		aux_cmd = op & OP_INV ? ARC_REG_DC_IVDL : ARC_REG_DC_FLDL;
-	}
-
-	/* Ensure we properly floor/ceil the non-line aligned/sized requests
-	 * and have @paddr - aligned to cache line and integral @num_lines.
-	 * This however can be avoided for page sized since:
-	 *  -@paddr will be cache-line aligned already (being page aligned)
-	 *  -@sz will be integral multiple of line size (being page sized).
-	 */
-	if (!full_page) {
-		sz += paddr & ~CACHE_LINE_MASK;
-		paddr &= CACHE_LINE_MASK;
-		vaddr &= CACHE_LINE_MASK;
-	}
-
-	num_lines = DIV_ROUND_UP(sz, L1_CACHE_BYTES);
-
-	/* MMUv2 and before: paddr contains stuffed vaddrs bits */
-	paddr |= (vaddr >> PAGE_SHIFT) & 0x1F;
-
-	while (num_lines-- > 0) {
-		write_aux_reg(aux_cmd, paddr);
-		paddr += L1_CACHE_BYTES;
-	}
-}
-
-/*
- * For ARC700 MMUv3 I-cache and D-cache flushes
- *  - ARC700 programming model requires paddr and vaddr be passed in seperate
- *    AUX registers (*_IV*L and *_PTAG respectively) irrespective of whether the
- *    caches actually alias or not.
- * -  For HS38, only the aliasing I-cache configuration uses the PTAG reg
- *    (non aliasing I-cache version doesn't; while D-cache can't possibly alias)
- */
-static inline
 void __cache_line_loop_v3(phys_addr_t paddr, unsigned long vaddr,
 			  unsigned long sz, const int op, const int full_page)
 {
@@ -350,17 +281,6 @@ void __cache_line_loop_v3(phys_addr_t paddr, unsigned long vaddr,
 #ifndef USE_RGN_FLSH
 
 /*
- * In HS38x (MMU v4), I-cache is VIPT (can alias), D-cache is PIPT
- * Here's how cache ops are implemented
- *
- *  - D-cache: only paddr needed (in DC_IVDL/DC_FLDL)
- *  - I-cache Non Aliasing: Despite VIPT, only paddr needed (in IC_IVIL)
- *  - I-cache Aliasing: Both vaddr and paddr needed (in IC_IVIL, IC_PTAG
- *    respectively, similar to MMU v3 programming model, hence
- *    __cache_line_loop_v3() is used)
- *
- * If PAE40 is enabled, independent of aliasing considerations, the higher bits
- * needs to be written into PTAG_HI
  */
 static inline
 void __cache_line_loop_v4(phys_addr_t paddr, unsigned long vaddr,
@@ -460,11 +380,9 @@ void __cache_line_loop_v4(phys_addr_t paddr, unsigned long vaddr,
 
 #endif
 
-#if (CONFIG_ARC_MMU_VER < 3)
-#define __cache_line_loop	__cache_line_loop_v2
-#elif (CONFIG_ARC_MMU_VER == 3)
+#ifdef CONFIG_ARC_MMU_V3
 #define __cache_line_loop	__cache_line_loop_v3
-#elif (CONFIG_ARC_MMU_VER > 3)
+#else
 #define __cache_line_loop	__cache_line_loop_v4
 #endif
 
@@ -1123,7 +1041,7 @@ void clear_user_page(void *to, unsigned long u_vaddr, struct page *page)
 	clear_page(to);
 	clear_bit(PG_dc_clean, &page->flags);
 }
-
+EXPORT_SYMBOL(clear_user_page);
 
 /**********************************************************************
  * Explicit Cache flush request from user space via syscall
diff --git a/arch/arc/mm/fault.c b/arch/arc/mm/fault.c
index f5657cb68e4f..5787c261c9a4 100644
--- a/arch/arc/mm/fault.c
+++ b/arch/arc/mm/fault.c
@@ -33,28 +33,34 @@ noinline static int handle_kernel_vaddr_fault(unsigned long address)
 	pud_t *pud, *pud_k;
 	pmd_t *pmd, *pmd_k;
 
-	pgd = pgd_offset_fast(current->active_mm, address);
+	pgd = pgd_offset(current->active_mm, address);
 	pgd_k = pgd_offset_k(address);
 
-	if (!pgd_present(*pgd_k))
+	if (pgd_none (*pgd_k))
 		goto bad_area;
+	if (!pgd_present(*pgd))
+		set_pgd(pgd, *pgd_k);
 
 	p4d = p4d_offset(pgd, address);
 	p4d_k = p4d_offset(pgd_k, address);
-	if (!p4d_present(*p4d_k))
+	if (p4d_none(*p4d_k))
 		goto bad_area;
+	if (!p4d_present(*p4d))
+		set_p4d(p4d, *p4d_k);
 
 	pud = pud_offset(p4d, address);
 	pud_k = pud_offset(p4d_k, address);
-	if (!pud_present(*pud_k))
+	if (pud_none(*pud_k))
 		goto bad_area;
+	if (!pud_present(*pud))
+		set_pud(pud, *pud_k);
 
 	pmd = pmd_offset(pud, address);
 	pmd_k = pmd_offset(pud_k, address);
-	if (!pmd_present(*pmd_k))
+	if (pmd_none(*pmd_k))
 		goto bad_area;
-
-	set_pmd(pmd, *pmd_k);
+	if (!pmd_present(*pmd))
+		set_pmd(pmd, *pmd_k);
 
 	/* XXX: create the TLB entry here */
 	return 0;
diff --git a/arch/arc/mm/init.c b/arch/arc/mm/init.c
index c083bf660cec..699ecf119641 100644
--- a/arch/arc/mm/init.c
+++ b/arch/arc/mm/init.c
@@ -189,6 +189,11 @@ void __init mem_init(void)
 {
 	memblock_free_all();
 	highmem_init();
+
+	BUILD_BUG_ON((PTRS_PER_PGD * sizeof(pgd_t)) > PAGE_SIZE);
+	BUILD_BUG_ON((PTRS_PER_PUD * sizeof(pud_t)) > PAGE_SIZE);
+	BUILD_BUG_ON((PTRS_PER_PMD * sizeof(pmd_t)) > PAGE_SIZE);
+	BUILD_BUG_ON((PTRS_PER_PTE * sizeof(pte_t)) > PAGE_SIZE);
 }
 
 #ifdef CONFIG_HIGHMEM
diff --git a/arch/arc/mm/ioremap.c b/arch/arc/mm/ioremap.c
index 95c649fbc95a..0ee75aca6e10 100644
--- a/arch/arc/mm/ioremap.c
+++ b/arch/arc/mm/ioremap.c
@@ -39,7 +39,8 @@ void __iomem *ioremap(phys_addr_t paddr, unsigned long size)
 	if (arc_uncached_addr_space(paddr))
 		return (void __iomem *)(u32)paddr;
 
-	return ioremap_prot(paddr, size, PAGE_KERNEL_NO_CACHE);
+	return ioremap_prot(paddr, size,
+			    pgprot_val(pgprot_noncached(PAGE_KERNEL)));
 }
 EXPORT_SYMBOL(ioremap);
 
diff --git a/arch/arc/mm/tlb.c b/arch/arc/mm/tlb.c
index 9c7c68247289..5f71445f26bd 100644
--- a/arch/arc/mm/tlb.c
+++ b/arch/arc/mm/tlb.c
@@ -1,51 +1,9 @@
 // SPDX-License-Identifier: GPL-2.0-only
 /*
- * TLB Management (flush/create/diagnostics) for ARC700
+ * TLB Management (flush/create/diagnostics) for MMUv3 and MMUv4
  *
  * Copyright (C) 2004, 2007-2010, 2011-2012 Synopsys, Inc. (www.synopsys.com)
  *
- * vineetg: Aug 2011
- *  -Reintroduce duplicate PD fixup - some customer chips still have the issue
- *
- * vineetg: May 2011
- *  -No need to flush_cache_page( ) for each call to update_mmu_cache()
- *   some of the LMBench tests improved amazingly
- *      = page-fault thrice as fast (75 usec to 28 usec)
- *      = mmap twice as fast (9.6 msec to 4.6 msec),
- *      = fork (5.3 msec to 3.7 msec)
- *
- * vineetg: April 2011 :
- *  -MMU v3: PD{0,1} bits layout changed: They don't overlap anymore,
- *      helps avoid a shift when preparing PD0 from PTE
- *
- * vineetg: April 2011 : Preparing for MMU V3
- *  -MMU v2/v3 BCRs decoded differently
- *  -Remove TLB_SIZE hardcoding as it's variable now: 256 or 512
- *  -tlb_entry_erase( ) can be void
- *  -local_flush_tlb_range( ):
- *      = need not "ceil" @end
- *      = walks MMU only if range spans < 32 entries, as opposed to 256
- *
- * Vineetg: Sept 10th 2008
- *  -Changes related to MMU v2 (Rel 4.8)
- *
- * Vineetg: Aug 29th 2008
- *  -In TLB Flush operations (Metal Fix MMU) there is a explicit command to
- *    flush Micro-TLBS. If TLB Index Reg is invalid prior to TLBIVUTLB cmd,
- *    it fails. Thus need to load it with ANY valid value before invoking
- *    TLBIVUTLB cmd
- *
- * Vineetg: Aug 21th 2008:
- *  -Reduced the duration of IRQ lockouts in TLB Flush routines
- *  -Multiple copies of TLB erase code separated into a "single" function
- *  -In TLB Flush routines, interrupt disabling moved UP to retrieve ASID
- *       in interrupt-safe region.
- *
- * Vineetg: April 23rd Bug #93131
- *    Problem: tlb_flush_kernel_range() doesn't do anything if the range to
- *              flush is more than the size of TLB itself.
- *
- * Rahul Trivedi : Codito Technologies 2004
  */
 
 #include <linux/module.h>
@@ -57,47 +15,6 @@
 #include <asm/mmu_context.h>
 #include <asm/mmu.h>
 
-/*			Need for ARC MMU v2
- *
- * ARC700 MMU-v1 had a Joint-TLB for Code and Data and is 2 way set-assoc.
- * For a memcpy operation with 3 players (src/dst/code) such that all 3 pages
- * map into same set, there would be contention for the 2 ways causing severe
- * Thrashing.
- *
- * Although J-TLB is 2 way set assoc, ARC700 caches J-TLB into uTLBS which has
- * much higher associativity. u-D-TLB is 8 ways, u-I-TLB is 4 ways.
- * Given this, the thrashing problem should never happen because once the 3
- * J-TLB entries are created (even though 3rd will knock out one of the prev
- * two), the u-D-TLB and u-I-TLB will have what is required to accomplish memcpy
- *
- * Yet we still see the Thrashing because a J-TLB Write cause flush of u-TLBs.
- * This is a simple design for keeping them in sync. So what do we do?
- * The solution which James came up was pretty neat. It utilised the assoc
- * of uTLBs by not invalidating always but only when absolutely necessary.
- *
- * - Existing TLB commands work as before
- * - New command (TLBWriteNI) for TLB write without clearing uTLBs
- * - New command (TLBIVUTLB) to invalidate uTLBs.
- *
- * The uTLBs need only be invalidated when pages are being removed from the
- * OS page table. If a 'victim' TLB entry is being overwritten in the main TLB
- * as a result of a miss, the removed entry is still allowed to exist in the
- * uTLBs as it is still valid and present in the OS page table. This allows the
- * full associativity of the uTLBs to hide the limited associativity of the main
- * TLB.
- *
- * During a miss handler, the new "TLBWriteNI" command is used to load
- * entries without clearing the uTLBs.
- *
- * When the OS page table is updated, TLB entries that may be associated with a
- * removed page are removed (flushed) from the TLB using TLBWrite. In this
- * circumstance, the uTLBs must also be cleared. This is done by using the
- * existing TLBWrite command. An explicit IVUTLB is also required for those
- * corner cases when TLBWrite was not executed at all because the corresp
- * J-TLB entry got evicted/replaced.
- */
-
-
 /* A copy of the ASID from the PID reg is kept in asid_cache */
 DEFINE_PER_CPU(unsigned int, asid_cache) = MM_CTXT_FIRST_CYCLE;
 
@@ -120,32 +37,10 @@ static inline void __tlb_entry_erase(void)
 
 static void utlb_invalidate(void)
 {
-#if (CONFIG_ARC_MMU_VER >= 2)
-
-#if (CONFIG_ARC_MMU_VER == 2)
-	/* MMU v2 introduced the uTLB Flush command.
-	 * There was however an obscure hardware bug, where uTLB flush would
-	 * fail when a prior probe for J-TLB (both totally unrelated) would
-	 * return lkup err - because the entry didn't exist in MMU.
-	 * The Workaround was to set Index reg with some valid value, prior to
-	 * flush. This was fixed in MMU v3
-	 */
-	unsigned int idx;
-
-	/* make sure INDEX Reg is valid */
-	idx = read_aux_reg(ARC_REG_TLBINDEX);
-
-	/* If not write some dummy val */
-	if (unlikely(idx & TLB_LKUP_ERR))
-		write_aux_reg(ARC_REG_TLBINDEX, 0xa);
-#endif
-
 	write_aux_reg(ARC_REG_TLBCOMMAND, TLBIVUTLB);
-#endif
-
 }
 
-#if (CONFIG_ARC_MMU_VER < 4)
+#ifdef CONFIG_ARC_MMU_V3
 
 static inline unsigned int tlb_entry_lkup(unsigned long vaddr_n_asid)
 {
@@ -176,7 +71,7 @@ static void tlb_entry_erase(unsigned int vaddr_n_asid)
 	}
 }
 
-static void tlb_entry_insert(unsigned int pd0, pte_t pd1)
+static void tlb_entry_insert(unsigned int pd0, phys_addr_t pd1)
 {
 	unsigned int idx;
 
@@ -206,7 +101,7 @@ static void tlb_entry_insert(unsigned int pd0, pte_t pd1)
 	write_aux_reg(ARC_REG_TLBCOMMAND, TLBWrite);
 }
 
-#else	/* CONFIG_ARC_MMU_VER >= 4) */
+#else	/* MMUv4 */
 
 static void tlb_entry_erase(unsigned int vaddr_n_asid)
 {
@@ -214,13 +109,16 @@ static void tlb_entry_erase(unsigned int vaddr_n_asid)
 	write_aux_reg(ARC_REG_TLBCOMMAND, TLBDeleteEntry);
 }
 
-static void tlb_entry_insert(unsigned int pd0, pte_t pd1)
+static void tlb_entry_insert(unsigned int pd0, phys_addr_t pd1)
 {
 	write_aux_reg(ARC_REG_TLBPD0, pd0);
-	write_aux_reg(ARC_REG_TLBPD1, pd1);
 
-	if (is_pae40_enabled())
+	if (!is_pae40_enabled()) {
+		write_aux_reg(ARC_REG_TLBPD1, pd1);
+	} else {
+		write_aux_reg(ARC_REG_TLBPD1, pd1 & 0xFFFFFFFF);
 		write_aux_reg(ARC_REG_TLBPD1HI, (u64)pd1 >> 32);
+	}
 
 	write_aux_reg(ARC_REG_TLBCOMMAND, TLBInsertEntry);
 }
@@ -496,7 +394,7 @@ void create_tlb(struct vm_area_struct *vma, unsigned long vaddr, pte_t *ptep)
 	unsigned long flags;
 	unsigned int asid_or_sasid, rwx;
 	unsigned long pd0;
-	pte_t pd1;
+	phys_addr_t pd1;
 
 	/*
 	 * create_tlb() assumes that current->mm == vma->mm, since
@@ -505,7 +403,6 @@ void create_tlb(struct vm_area_struct *vma, unsigned long vaddr, pte_t *ptep)
 	 *
 	 * Removing the assumption involves
 	 * -Using vma->mm->context{ASID,SASID}, as opposed to MMU reg.
-	 * -Fix the TLB paranoid debug code to not trigger false negatives.
 	 * -More importantly it makes this handler inconsistent with fast-path
 	 *  TLB Refill handler which always deals with "current"
 	 *
@@ -528,8 +425,6 @@ void create_tlb(struct vm_area_struct *vma, unsigned long vaddr, pte_t *ptep)
 
 	local_irq_save(flags);
 
-	tlb_paranoid_check(asid_mm(vma->vm_mm, smp_processor_id()), vaddr);
-
 	vaddr &= PAGE_MASK;
 
 	/* update this PTE credentials */
@@ -639,43 +534,6 @@ void update_mmu_cache_pmd(struct vm_area_struct *vma, unsigned long addr,
 	update_mmu_cache(vma, addr, &pte);
 }
 
-void pgtable_trans_huge_deposit(struct mm_struct *mm, pmd_t *pmdp,
-				pgtable_t pgtable)
-{
-	struct list_head *lh = (struct list_head *) pgtable;
-
-	assert_spin_locked(&mm->page_table_lock);
-
-	/* FIFO */
-	if (!pmd_huge_pte(mm, pmdp))
-		INIT_LIST_HEAD(lh);
-	else
-		list_add(lh, (struct list_head *) pmd_huge_pte(mm, pmdp));
-	pmd_huge_pte(mm, pmdp) = pgtable;
-}
-
-pgtable_t pgtable_trans_huge_withdraw(struct mm_struct *mm, pmd_t *pmdp)
-{
-	struct list_head *lh;
-	pgtable_t pgtable;
-
-	assert_spin_locked(&mm->page_table_lock);
-
-	pgtable = pmd_huge_pte(mm, pmdp);
-	lh = (struct list_head *) pgtable;
-	if (list_empty(lh))
-		pmd_huge_pte(mm, pmdp) = NULL;
-	else {
-		pmd_huge_pte(mm, pmdp) = (pgtable_t) lh->next;
-		list_del(lh);
-	}
-
-	pte_val(pgtable[0]) = 0;
-	pte_val(pgtable[1]) = 0;
-
-	return pgtable;
-}
-
 void local_flush_pmd_tlb_range(struct vm_area_struct *vma, unsigned long start,
 			       unsigned long end)
 {
@@ -706,14 +564,6 @@ void read_decode_mmu_bcr(void)
 {
 	struct cpuinfo_arc_mmu *mmu = &cpuinfo_arc700[smp_processor_id()].mmu;
 	unsigned int tmp;
-	struct bcr_mmu_1_2 {
-#ifdef CONFIG_CPU_BIG_ENDIAN
-		unsigned int ver:8, ways:4, sets:4, u_itlb:8, u_dtlb:8;
-#else
-		unsigned int u_dtlb:8, u_itlb:8, sets:4, ways:4, ver:8;
-#endif
-	} *mmu2;
-
 	struct bcr_mmu_3 {
 #ifdef CONFIG_CPU_BIG_ENDIAN
 	unsigned int ver:8, ways:4, sets:4, res:3, sasid:1, pg_sz:4,
@@ -738,23 +588,14 @@ void read_decode_mmu_bcr(void)
 	tmp = read_aux_reg(ARC_REG_MMU_BCR);
 	mmu->ver = (tmp >> 24);
 
-	if (is_isa_arcompact()) {
-		if (mmu->ver <= 2) {
-			mmu2 = (struct bcr_mmu_1_2 *)&tmp;
-			mmu->pg_sz_k = TO_KB(0x2000);
-			mmu->sets = 1 << mmu2->sets;
-			mmu->ways = 1 << mmu2->ways;
-			mmu->u_dtlb = mmu2->u_dtlb;
-			mmu->u_itlb = mmu2->u_itlb;
-		} else {
-			mmu3 = (struct bcr_mmu_3 *)&tmp;
-			mmu->pg_sz_k = 1 << (mmu3->pg_sz - 1);
-			mmu->sets = 1 << mmu3->sets;
-			mmu->ways = 1 << mmu3->ways;
-			mmu->u_dtlb = mmu3->u_dtlb;
-			mmu->u_itlb = mmu3->u_itlb;
-			mmu->sasid = mmu3->sasid;
-		}
+	if (is_isa_arcompact() && mmu->ver == 3) {
+		mmu3 = (struct bcr_mmu_3 *)&tmp;
+		mmu->pg_sz_k = 1 << (mmu3->pg_sz - 1);
+		mmu->sets = 1 << mmu3->sets;
+		mmu->ways = 1 << mmu3->ways;
+		mmu->u_dtlb = mmu3->u_dtlb;
+		mmu->u_itlb = mmu3->u_itlb;
+		mmu->sasid = mmu3->sasid;
 	} else {
 		mmu4 = (struct bcr_mmu_4 *)&tmp;
 		mmu->pg_sz_k = 1 << (mmu4->sz0 - 1);
@@ -780,8 +621,8 @@ char *arc_mmu_mumbojumbo(int cpu_id, char *buf, int len)
 			  IS_USED_CFG(CONFIG_TRANSPARENT_HUGEPAGE));
 
 	n += scnprintf(buf + n, len - n,
-		      "MMU [v%x]\t: %dk PAGE, %sJTLB %d (%dx%d), uDTLB %d, uITLB %d%s%s\n",
-		       p_mmu->ver, p_mmu->pg_sz_k, super_pg,
+		      "MMU [v%x]\t: %dk PAGE, %s, swalk %d lvl, JTLB %d (%dx%d), uDTLB %d, uITLB %d%s%s\n",
+		       p_mmu->ver, p_mmu->pg_sz_k, super_pg,  CONFIG_PGTABLE_LEVELS,
 		       p_mmu->sets * p_mmu->ways, p_mmu->sets, p_mmu->ways,
 		       p_mmu->u_dtlb, p_mmu->u_itlb,
 		       IS_AVAIL2(p_mmu->pae, ", PAE40 ", CONFIG_ARC_HAS_PAE40));
@@ -815,22 +656,17 @@ void arc_mmu_init(void)
 
 	/*
 	 * Ensure that MMU features assumed by kernel exist in hardware.
-	 * For older ARC700 cpus, it has to be exact match, since the MMU
-	 * revisions were not backwards compatible (MMUv3 TLB layout changed
-	 * so even if kernel for v2 didn't use any new cmds of v3, it would
-	 * still not work.
-	 * For HS cpus, MMUv4 was baseline and v5 is backwards compatible
-	 * (will run older software).
+	 *  - For older ARC700 cpus, only v3 supported
+	 *  - For HS cpus, v4 was baseline and v5 is backwards compatible
+	 *    (will run older software).
 	 */
-	if (is_isa_arcompact() && mmu->ver == CONFIG_ARC_MMU_VER)
+	if (is_isa_arcompact() && mmu->ver == 3)
 		compat = 1;
-	else if (is_isa_arcv2() && mmu->ver >= CONFIG_ARC_MMU_VER)
+	else if (is_isa_arcv2() && mmu->ver >= 4)
 		compat = 1;
 
-	if (!compat) {
-		panic("MMU ver %d doesn't match kernel built for %d...\n",
-		      mmu->ver, CONFIG_ARC_MMU_VER);
-	}
+	if (!compat)
+		panic("MMU ver %d doesn't match kernel built for\n", mmu->ver);
 
 	if (mmu->pg_sz_k != TO_KB(PAGE_SIZE))
 		panic("MMU pg size != PAGE_SIZE (%luk)\n", TO_KB(PAGE_SIZE));
@@ -843,14 +679,11 @@ void arc_mmu_init(void)
 	if (IS_ENABLED(CONFIG_ARC_HAS_PAE40) && !mmu->pae)
 		panic("Hardware doesn't support PAE40\n");
 
-	/* Enable the MMU */
-	write_aux_reg(ARC_REG_PID, MMU_ENABLE);
+	/* Enable the MMU with ASID 0 */
+	mmu_setup_asid(NULL, 0);
 
-	/* In smp we use this reg for interrupt 1 scratch */
-#ifdef ARC_USE_SCRATCH_REG
-	/* swapper_pg_dir is the pgd for the kernel, used by vmalloc */
-	write_aux_reg(ARC_REG_SCRATCH_DATA0, swapper_pg_dir);
-#endif
+	/* cache the pgd pointer in MMU SCRATCH reg (ARCv2 only) */
+	mmu_setup_pgd(NULL, swapper_pg_dir);
 
 	if (pae40_exist_but_not_enab())
 		write_aux_reg(ARC_REG_TLBPD1HI, 0);
@@ -945,40 +778,3 @@ void do_tlb_overlap_fault(unsigned long cause, unsigned long address,
 
 	local_irq_restore(flags);
 }
-
-/***********************************************************************
- * Diagnostic Routines
- *  -Called from Low Level TLB Handlers if things don;t look good
- **********************************************************************/
-
-#ifdef CONFIG_ARC_DBG_TLB_PARANOIA
-
-/*
- * Low Level ASM TLB handler calls this if it finds that HW and SW ASIDS
- * don't match
- */
-void print_asid_mismatch(int mm_asid, int mmu_asid, int is_fast_path)
-{
-	pr_emerg("ASID Mismatch in %s Path Handler: sw-pid=0x%x hw-pid=0x%x\n",
-	       is_fast_path ? "Fast" : "Slow", mm_asid, mmu_asid);
-
-	__asm__ __volatile__("flag 1");
-}
-
-void tlb_paranoid_check(unsigned int mm_asid, unsigned long addr)
-{
-	unsigned int mmu_asid;
-
-	mmu_asid = read_aux_reg(ARC_REG_PID) & 0xff;
-
-	/*
-	 * At the time of a TLB miss/installation
-	 *   - HW version needs to match SW version
-	 *   - SW needs to have a valid ASID
-	 */
-	if (addr < 0x70000000 &&
-	    ((mm_asid == MM_CTXT_NO_ASID) ||
-	      (mmu_asid != (mm_asid & MM_CTXT_ASID_MASK))))
-		print_asid_mismatch(mm_asid, mmu_asid, 0);
-}
-#endif
diff --git a/arch/arc/mm/tlbex.S b/arch/arc/mm/tlbex.S
index 062fae46c3f8..e054780a8fe0 100644
--- a/arch/arc/mm/tlbex.S
+++ b/arch/arc/mm/tlbex.S
@@ -39,7 +39,6 @@
 #include <asm/arcregs.h>
 #include <asm/cache.h>
 #include <asm/processor.h>
-#include <asm/tlb-mmu1.h>
 
 #ifdef CONFIG_ISA_ARCOMPACT
 ;-----------------------------------------------------------------
@@ -94,11 +93,6 @@ ex_saved_reg1:
 	st_s  r1, [r0, 4]
 	st_s  r2, [r0, 8]
 	st_s  r3, [r0, 12]
-
-	; VERIFY if the ASID in MMU-PID Reg is same as
-	; one in Linux data structures
-
-	tlb_paranoid_check_asm
 .endm
 
 .macro TLBMISS_RESTORE_REGS
@@ -148,53 +142,16 @@ ex_saved_reg1:
 #endif
 
 ;============================================================================
-;  Troubleshooting Stuff
+;TLB Miss handling Code
 ;============================================================================
 
-; Linux keeps ASID (Address Space ID) in task->active_mm->context.asid
-; When Creating TLB Entries, instead of doing 3 dependent loads from memory,
-; we use the MMU PID Reg to get current ASID.
-; In bizzare scenrios SW and HW ASID can get out-of-sync which is trouble.
-; So we try to detect this in TLB Mis shandler
-
-.macro tlb_paranoid_check_asm
-
-#ifdef CONFIG_ARC_DBG_TLB_PARANOIA
-
-	GET_CURR_TASK_ON_CPU  r3
-	ld r0, [r3, TASK_ACT_MM]
-	ld r0, [r0, MM_CTXT+MM_CTXT_ASID]
-	breq r0, 0, 55f	; Error if no ASID allocated
-
-	lr r1, [ARC_REG_PID]
-	and r1, r1, 0xFF
-
-	and r2, r0, 0xFF	; MMU PID bits only for comparison
-	breq r1, r2, 5f
-
-55:
-	; Error if H/w and S/w ASID don't match, but NOT if in kernel mode
-	lr  r2, [erstatus]
-	bbit0 r2, STATUS_U_BIT, 5f
-
-	; We sure are in troubled waters, Flag the error, but to do so
-	; need to switch to kernel mode stack to call error routine
-	GET_TSK_STACK_BASE   r3, sp
-
-	; Call printk to shoutout aloud
-	mov r2, 1
-	j print_asid_mismatch
-
-5:	; ASIDs match so proceed normally
-	nop
-
+#ifndef PMD_SHIFT
+#define PMD_SHIFT PUD_SHIFT
 #endif
 
-.endm
-
-;============================================================================
-;TLB Miss handling Code
-;============================================================================
+#ifndef PUD_SHIFT
+#define PUD_SHIFT PGDIR_SHIFT
+#endif
 
 ;-----------------------------------------------------------------------------
 ; This macro does the page-table lookup for the faulting address.
@@ -203,7 +160,7 @@ ex_saved_reg1:
 
 	lr  r2, [efa]
 
-#ifdef ARC_USE_SCRATCH_REG
+#ifdef CONFIG_ISA_ARCV2
 	lr  r1, [ARC_REG_SCRATCH_DATA0] ; current pgd
 #else
 	GET_CURR_TASK_ON_CPU  r1
@@ -216,6 +173,24 @@ ex_saved_reg1:
 	tst	r3, r3
 	bz	do_slow_path_pf         ; if no Page Table, do page fault
 
+#if CONFIG_PGTABLE_LEVELS > 3
+	lsr     r0, r2, PUD_SHIFT	; Bits for indexing into PUD
+	and	r0, r0, (PTRS_PER_PUD - 1)
+	ld.as	r1, [r3, r0]		; PMD entry
+	tst	r1, r1
+	bz	do_slow_path_pf
+	mov	r3, r1
+#endif
+
+#if CONFIG_PGTABLE_LEVELS > 2
+	lsr     r0, r2, PMD_SHIFT	; Bits for indexing into PMD
+	and	r0, r0, (PTRS_PER_PMD - 1)
+	ld.as	r1, [r3, r0]		; PMD entry
+	tst	r1, r1
+	bz	do_slow_path_pf
+	mov	r3, r1
+#endif
+
 #ifdef CONFIG_TRANSPARENT_HUGEPAGE
 	and.f	0, r3, _PAGE_HW_SZ	; Is this Huge PMD (thp)
 	add2.nz	r1, r1, r0
@@ -279,7 +254,7 @@ ex_saved_reg1:
 ; Commit the TLB entry into MMU
 
 .macro COMMIT_ENTRY_TO_MMU
-#if (CONFIG_ARC_MMU_VER < 4)
+#ifdef CONFIG_ARC_MMU_V3
 
 	/* Get free TLB slot: Set = computed from vaddr, way = random */
 	sr  TLBGetIndex, [ARC_REG_TLBCOMMAND]
@@ -375,13 +350,6 @@ ENTRY(EV_TLBMissD)
 
 	CONV_PTE_TO_TLB
 
-#if (CONFIG_ARC_MMU_VER == 1)
-	; MMU with 2 way set assoc J-TLB, needs some help in pathetic case of
-	; memcpy where 3 parties contend for 2 ways, ensuing a livelock.
-	; But only for old MMU or one with Metal Fix
-	TLB_WRITE_HEURISTICS
-#endif
-
 	COMMIT_ENTRY_TO_MMU
 	TLBMISS_RESTORE_REGS
 EV_TLBMissD_fast_ret:	; additional label for VDK OS-kit instrumentation
diff --git a/arch/arm/Kconfig b/arch/arm/Kconfig
index 1a48c17a5909..fc196421b2ce 100644
--- a/arch/arm/Kconfig
+++ b/arch/arm/Kconfig
@@ -18,8 +18,8 @@ config ARM
 	select ARCH_HAS_SET_MEMORY
 	select ARCH_HAS_STRICT_KERNEL_RWX if MMU && !XIP_KERNEL
 	select ARCH_HAS_STRICT_MODULE_RWX if MMU
-	select ARCH_HAS_SYNC_DMA_FOR_DEVICE if SWIOTLB
-	select ARCH_HAS_SYNC_DMA_FOR_CPU if SWIOTLB
+	select ARCH_HAS_SYNC_DMA_FOR_DEVICE if SWIOTLB || !MMU
+	select ARCH_HAS_SYNC_DMA_FOR_CPU if SWIOTLB || !MMU
 	select ARCH_HAS_TEARDOWN_DMA_OPS if MMU
 	select ARCH_HAS_TICK_BROADCAST if GENERIC_CLOCKEVENTS_BROADCAST
 	select ARCH_HAVE_CUSTOM_GPIO_H
@@ -44,6 +44,7 @@ config ARM
 	select CPU_PM if SUSPEND || CPU_IDLE
 	select DCACHE_WORD_ACCESS if HAVE_EFFICIENT_UNALIGNED_ACCESS
 	select DMA_DECLARE_COHERENT
+	select DMA_GLOBAL_POOL if !MMU
 	select DMA_OPS
 	select DMA_REMAP if MMU
 	select EDAC_SUPPORT
@@ -123,8 +124,8 @@ config ARM
 	select PCI_SYSCALL if PCI
 	select PERF_USE_VMALLOC
 	select RTC_LIB
-	select SET_FS
 	select SYS_SUPPORTS_APM_EMULATION
+	select TRACE_IRQFLAGS_SUPPORT if !CPU_V7M
 	# Above selects are sorted alphabetically; please add new ones
 	# according to that.  Thanks.
 	help
@@ -188,10 +189,6 @@ config LOCKDEP_SUPPORT
 	bool
 	default y
 
-config TRACE_IRQFLAGS_SUPPORT
-	bool
-	default !CPU_V7M
-
 config ARCH_HAS_ILOG2_U32
 	bool
 
diff --git a/arch/arm/Makefile b/arch/arm/Makefile
index 173da685a52e..847c31e7c368 100644
--- a/arch/arm/Makefile
+++ b/arch/arm/Makefile
@@ -308,7 +308,8 @@ $(BOOT_TARGETS): vmlinux
 	@$(kecho) '  Kernel: $(boot)/$@ is ready'
 
 $(INSTALL_TARGETS):
-	$(Q)$(MAKE) $(build)=$(boot) MACHINE=$(MACHINE) $@
+	$(CONFIG_SHELL) $(srctree)/$(boot)/install.sh "$(KERNELRELEASE)" \
+	$(boot)/$(patsubst %install,%Image,$@) System.map "$(INSTALL_PATH)"
 
 PHONY += vdso_install
 vdso_install:
diff --git a/arch/arm/boot/Makefile b/arch/arm/boot/Makefile
index 0b3cd7a33a26..54a09f9464fb 100644
--- a/arch/arm/boot/Makefile
+++ b/arch/arm/boot/Makefile
@@ -96,23 +96,11 @@ $(obj)/bootp/bootp: $(obj)/zImage initrd FORCE
 $(obj)/bootpImage: $(obj)/bootp/bootp FORCE
 	$(call if_changed,objcopy)
 
-PHONY += initrd install zinstall uinstall
+PHONY += initrd
 initrd:
 	@test "$(INITRD_PHYS)" != "" || \
 	(echo This machine does not support INITRD; exit -1)
 	@test "$(INITRD)" != "" || \
 	(echo You must specify INITRD; exit -1)
 
-install:
-	$(CONFIG_SHELL) $(srctree)/$(src)/install.sh "$(KERNELRELEASE)" \
-	$(obj)/Image System.map "$(INSTALL_PATH)"
-
-zinstall:
-	$(CONFIG_SHELL) $(srctree)/$(src)/install.sh "$(KERNELRELEASE)" \
-	$(obj)/zImage System.map "$(INSTALL_PATH)"
-
-uinstall:
-	$(CONFIG_SHELL) $(srctree)/$(src)/install.sh "$(KERNELRELEASE)" \
-	$(obj)/uImage System.map "$(INSTALL_PATH)"
-
 subdir-	    := bootp compressed dts
diff --git a/arch/arm/boot/compressed/Makefile b/arch/arm/boot/compressed/Makefile
index 9d91ae1091b0..91265e7ff672 100644
--- a/arch/arm/boot/compressed/Makefile
+++ b/arch/arm/boot/compressed/Makefile
@@ -85,6 +85,8 @@ compress-$(CONFIG_KERNEL_LZ4)  = lz4
 libfdt_objs := fdt_rw.o fdt_ro.o fdt_wip.o fdt.o
 
 ifeq ($(CONFIG_ARM_ATAG_DTB_COMPAT),y)
+CFLAGS_REMOVE_atags_to_fdt.o += -Wframe-larger-than=${CONFIG_FRAME_WARN}
+CFLAGS_atags_to_fdt.o += -Wframe-larger-than=1280
 OBJS	+= $(libfdt_objs) atags_to_fdt.o
 endif
 ifeq ($(CONFIG_USE_OF),y)
diff --git a/arch/arm/boot/dts/mt8135.dtsi b/arch/arm/boot/dts/mt8135.dtsi
index 0e4e835026db..a031b3636318 100644
--- a/arch/arm/boot/dts/mt8135.dtsi
+++ b/arch/arm/boot/dts/mt8135.dtsi
@@ -9,7 +9,7 @@
 #include <dt-bindings/interrupt-controller/irq.h>
 #include <dt-bindings/interrupt-controller/arm-gic.h>
 #include <dt-bindings/reset/mt8135-resets.h>
-#include "mt8135-pinfunc.h"
+#include <dt-bindings/pinctrl/mt8135-pinfunc.h>
 
 / {
 	#address-cells = <2>;
diff --git a/arch/arm/boot/dts/omap34xx.dtsi b/arch/arm/boot/dts/omap34xx.dtsi
index feaa43b78535..8b8451399784 100644
--- a/arch/arm/boot/dts/omap34xx.dtsi
+++ b/arch/arm/boot/dts/omap34xx.dtsi
@@ -24,7 +24,6 @@
 		};
 	};
 
-	/* see Documentation/devicetree/bindings/opp/opp.txt */
 	cpu0_opp_table: opp-table {
 		compatible = "operating-points-v2-ti-cpu";
 		syscon = <&scm_conf>;
diff --git a/arch/arm/boot/dts/omap36xx.dtsi b/arch/arm/boot/dts/omap36xx.dtsi
index 20844dbc002e..22b33098b1a2 100644
--- a/arch/arm/boot/dts/omap36xx.dtsi
+++ b/arch/arm/boot/dts/omap36xx.dtsi
@@ -29,7 +29,6 @@
 		};
 	};
 
-	/* see Documentation/devicetree/bindings/opp/opp.txt */
 	cpu0_opp_table: opp-table {
 		compatible = "operating-points-v2-ti-cpu";
 		syscon = <&scm_conf>;
diff --git a/arch/arm/configs/dove_defconfig b/arch/arm/configs/dove_defconfig
index b935162a8bba..33074fdab2ea 100644
--- a/arch/arm/configs/dove_defconfig
+++ b/arch/arm/configs/dove_defconfig
@@ -56,7 +56,6 @@ CONFIG_ATA=y
 CONFIG_SATA_MV=y
 CONFIG_NETDEVICES=y
 CONFIG_MV643XX_ETH=y
-CONFIG_INPUT_POLLDEV=y
 # CONFIG_INPUT_MOUSEDEV is not set
 CONFIG_INPUT_EVDEV=y
 # CONFIG_KEYBOARD_ATKBD is not set
diff --git a/arch/arm/configs/pxa_defconfig b/arch/arm/configs/pxa_defconfig
index 363f1b1b08e3..58f4834289e6 100644
--- a/arch/arm/configs/pxa_defconfig
+++ b/arch/arm/configs/pxa_defconfig
@@ -284,7 +284,6 @@ CONFIG_RT2800USB=m
 CONFIG_MWIFIEX=m
 CONFIG_MWIFIEX_SDIO=m
 CONFIG_INPUT_FF_MEMLESS=m
-CONFIG_INPUT_POLLDEV=y
 CONFIG_INPUT_MATRIXKMAP=y
 CONFIG_INPUT_MOUSEDEV=m
 CONFIG_INPUT_MOUSEDEV_SCREEN_X=640
diff --git a/arch/arm/include/asm/cacheflush.h b/arch/arm/include/asm/cacheflush.h
index 2e24e765e6d3..5e56288e343b 100644
--- a/arch/arm/include/asm/cacheflush.h
+++ b/arch/arm/include/asm/cacheflush.h
@@ -291,6 +291,7 @@ extern void flush_cache_page(struct vm_area_struct *vma, unsigned long user_addr
 #define ARCH_IMPLEMENTS_FLUSH_DCACHE_PAGE 1
 extern void flush_dcache_page(struct page *);
 
+#define ARCH_IMPLEMENTS_FLUSH_KERNEL_VMAP_RANGE 1
 static inline void flush_kernel_vmap_range(void *addr, int size)
 {
 	if ((cache_is_vivt() || cache_is_vipt_aliasing()))
@@ -312,9 +313,6 @@ static inline void flush_anon_page(struct vm_area_struct *vma,
 		__flush_anon_page(vma, page, vmaddr);
 }
 
-#define ARCH_HAS_FLUSH_KERNEL_DCACHE_PAGE
-extern void flush_kernel_dcache_page(struct page *);
-
 #define flush_dcache_mmap_lock(mapping)		xa_lock_irq(&mapping->i_pages)
 #define flush_dcache_mmap_unlock(mapping)	xa_unlock_irq(&mapping->i_pages)
 
diff --git a/arch/arm/include/asm/div64.h b/arch/arm/include/asm/div64.h
index 595e538f5bfb..4b69cf850451 100644
--- a/arch/arm/include/asm/div64.h
+++ b/arch/arm/include/asm/div64.h
@@ -52,17 +52,6 @@ static inline uint32_t __div64_32(uint64_t *n, uint32_t base)
 
 #else
 
-/*
- * gcc versions earlier than 4.0 are simply too problematic for the
- * __div64_const32() code in asm-generic/div64.h. First there is
- * gcc PR 15089 that tend to trig on more complex constructs, spurious
- * .global __udivsi3 are inserted even if none of those symbols are
- * referenced in the generated code, and those gcc versions are not able
- * to do constant propagation on long long values anyway.
- */
-
-#define __div64_const32_is_OK (__GNUC__ >= 4)
-
 static inline uint64_t __arch_xprod_64(uint64_t m, uint64_t n, bool bias)
 {
 	unsigned long long res;
diff --git a/arch/arm/include/asm/gpio.h b/arch/arm/include/asm/gpio.h
index c50e383358c4..f3bb8a2bf788 100644
--- a/arch/arm/include/asm/gpio.h
+++ b/arch/arm/include/asm/gpio.h
@@ -2,10 +2,6 @@
 #ifndef _ARCH_ARM_GPIO_H
 #define _ARCH_ARM_GPIO_H
 
-#if CONFIG_ARCH_NR_GPIO > 0
-#define ARCH_NR_GPIOS CONFIG_ARCH_NR_GPIO
-#endif
-
 /* Note: this may rely upon the value of ARCH_NR_GPIOS set in mach/gpio.h */
 #include <asm-generic/gpio.h>
 
diff --git a/arch/arm/include/asm/ptrace.h b/arch/arm/include/asm/ptrace.h
index 91d6b7856be4..93051e2f402c 100644
--- a/arch/arm/include/asm/ptrace.h
+++ b/arch/arm/include/asm/ptrace.h
@@ -19,7 +19,6 @@ struct pt_regs {
 struct svc_pt_regs {
 	struct pt_regs regs;
 	u32 dacr;
-	u32 addr_limit;
 };
 
 #define to_svc_pt_regs(r) container_of(r, struct svc_pt_regs, regs)
diff --git a/arch/arm/include/asm/syscall.h b/arch/arm/include/asm/syscall.h
index fd02761ba06c..24c19d63ff0a 100644
--- a/arch/arm/include/asm/syscall.h
+++ b/arch/arm/include/asm/syscall.h
@@ -22,7 +22,21 @@ extern const unsigned long sys_call_table[];
 static inline int syscall_get_nr(struct task_struct *task,
 				 struct pt_regs *regs)
 {
-	return task_thread_info(task)->syscall;
+	if (IS_ENABLED(CONFIG_AEABI) && !IS_ENABLED(CONFIG_OABI_COMPAT))
+		return task_thread_info(task)->abi_syscall;
+
+	return task_thread_info(task)->abi_syscall & __NR_SYSCALL_MASK;
+}
+
+static inline bool __in_oabi_syscall(struct task_struct *task)
+{
+	return IS_ENABLED(CONFIG_OABI_COMPAT) &&
+		(task_thread_info(task)->abi_syscall & __NR_OABI_SYSCALL_BASE);
+}
+
+static inline bool in_oabi_syscall(void)
+{
+	return __in_oabi_syscall(current);
 }
 
 static inline void syscall_rollback(struct task_struct *task,
diff --git a/arch/arm/include/asm/thread_info.h b/arch/arm/include/asm/thread_info.h
index a02799bd0cdf..9a18da3e10cc 100644
--- a/arch/arm/include/asm/thread_info.h
+++ b/arch/arm/include/asm/thread_info.h
@@ -31,8 +31,6 @@ struct task_struct;
 
 #include <asm/types.h>
 
-typedef unsigned long mm_segment_t;
-
 struct cpu_context_save {
 	__u32	r4;
 	__u32	r5;
@@ -54,7 +52,6 @@ struct cpu_context_save {
 struct thread_info {
 	unsigned long		flags;		/* low level flags */
 	int			preempt_count;	/* 0 => preemptable, <0 => bug */
-	mm_segment_t		addr_limit;	/* address limit */
 	struct task_struct	*task;		/* main task structure */
 	__u32			cpu;		/* cpu */
 	__u32			cpu_domain;	/* cpu domain */
@@ -62,7 +59,7 @@ struct thread_info {
 	unsigned long		stack_canary;
 #endif
 	struct cpu_context_save	cpu_context;	/* cpu context */
-	__u32			syscall;	/* syscall number */
+	__u32			abi_syscall;	/* ABI type and syscall nr */
 	__u8			used_cp[16];	/* thread used copro */
 	unsigned long		tp_value[2];	/* TLS registers */
 	union fp_state		fpstate __attribute__((aligned(8)));
@@ -77,7 +74,6 @@ struct thread_info {
 	.task		= &tsk,						\
 	.flags		= 0,						\
 	.preempt_count	= INIT_PREEMPT_COUNT,				\
-	.addr_limit	= KERNEL_DS,					\
 }
 
 /*
diff --git a/arch/arm/include/asm/uaccess-asm.h b/arch/arm/include/asm/uaccess-asm.h
index e6eb7a2aaf1e..6451a433912c 100644
--- a/arch/arm/include/asm/uaccess-asm.h
+++ b/arch/arm/include/asm/uaccess-asm.h
@@ -84,12 +84,8 @@
 	 * if \disable is set.
 	 */
 	.macro	uaccess_entry, tsk, tmp0, tmp1, tmp2, disable
-	ldr	\tmp1, [\tsk, #TI_ADDR_LIMIT]
-	ldr	\tmp2, =TASK_SIZE
-	str	\tmp2, [\tsk, #TI_ADDR_LIMIT]
  DACR(	mrc	p15, 0, \tmp0, c3, c0, 0)
  DACR(	str	\tmp0, [sp, #SVC_DACR])
-	str	\tmp1, [sp, #SVC_ADDR_LIMIT]
 	.if \disable && IS_ENABLED(CONFIG_CPU_SW_DOMAIN_PAN)
 	/* kernel=client, user=no access */
 	mov	\tmp2, #DACR_UACCESS_DISABLE
@@ -106,9 +102,7 @@
 
 	/* Restore the user access state previously saved by uaccess_entry */
 	.macro	uaccess_exit, tsk, tmp0, tmp1
-	ldr	\tmp1, [sp, #SVC_ADDR_LIMIT]
  DACR(	ldr	\tmp0, [sp, #SVC_DACR])
-	str	\tmp1, [\tsk, #TI_ADDR_LIMIT]
  DACR(	mcr	p15, 0, \tmp0, c3, c0, 0)
 	.endm
 
diff --git a/arch/arm/include/asm/uaccess.h b/arch/arm/include/asm/uaccess.h
index a13d90206472..084d1c07c2d0 100644
--- a/arch/arm/include/asm/uaccess.h
+++ b/arch/arm/include/asm/uaccess.h
@@ -52,32 +52,8 @@ static __always_inline void uaccess_restore(unsigned int flags)
 extern int __get_user_bad(void);
 extern int __put_user_bad(void);
 
-/*
- * Note that this is actually 0x1,0000,0000
- */
-#define KERNEL_DS	0x00000000
-
 #ifdef CONFIG_MMU
 
-#define USER_DS		TASK_SIZE
-#define get_fs()	(current_thread_info()->addr_limit)
-
-static inline void set_fs(mm_segment_t fs)
-{
-	current_thread_info()->addr_limit = fs;
-
-	/*
-	 * Prevent a mispredicted conditional call to set_fs from forwarding
-	 * the wrong address limit to access_ok under speculation.
-	 */
-	dsb(nsh);
-	isb();
-
-	modify_domain(DOMAIN_KERNEL, fs ? DOMAIN_CLIENT : DOMAIN_MANAGER);
-}
-
-#define uaccess_kernel()	(get_fs() == KERNEL_DS)
-
 /*
  * We use 33-bit arithmetic here.  Success returns zero, failure returns
  * addr_limit.  We take advantage that addr_limit will be zero for KERNEL_DS,
@@ -89,7 +65,7 @@ static inline void set_fs(mm_segment_t fs)
 	__asm__(".syntax unified\n" \
 		"adds %1, %2, %3; sbcscc %1, %1, %0; movcc %0, #0" \
 		: "=&r" (flag), "=&r" (roksum) \
-		: "r" (addr), "Ir" (size), "0" (current_thread_info()->addr_limit) \
+		: "r" (addr), "Ir" (size), "0" (TASK_SIZE) \
 		: "cc"); \
 	flag; })
 
@@ -120,7 +96,7 @@ static inline void __user *__uaccess_mask_range_ptr(const void __user *ptr,
 	"	subshs	%1, %1, %2\n"
 	"	movlo	%0, #0\n"
 	: "+r" (safe_ptr), "=&r" (tmp)
-	: "r" (size), "r" (current_thread_info()->addr_limit)
+	: "r" (size), "r" (TASK_SIZE)
 	: "cc");
 
 	csdb();
@@ -194,7 +170,7 @@ extern int __get_user_64t_4(void *);
 
 #define __get_user_check(x, p)						\
 	({								\
-		unsigned long __limit = current_thread_info()->addr_limit - 1; \
+		unsigned long __limit = TASK_SIZE - 1; \
 		register typeof(*(p)) __user *__p asm("r0") = (p);	\
 		register __inttype(x) __r2 asm("r2");			\
 		register unsigned long __l asm("r1") = __limit;		\
@@ -245,7 +221,7 @@ extern int __put_user_8(void *, unsigned long long);
 
 #define __put_user_check(__pu_val, __ptr, __err, __s)			\
 	({								\
-		unsigned long __limit = current_thread_info()->addr_limit - 1; \
+		unsigned long __limit = TASK_SIZE - 1; \
 		register typeof(__pu_val) __r2 asm("r2") = __pu_val;	\
 		register const void __user *__p asm("r0") = __ptr;	\
 		register unsigned long __l asm("r1") = __limit;		\
@@ -262,19 +238,8 @@ extern int __put_user_8(void *, unsigned long long);
 
 #else /* CONFIG_MMU */
 
-/*
- * uClinux has only one addr space, so has simplified address limits.
- */
-#define USER_DS			KERNEL_DS
-
-#define uaccess_kernel()	(true)
 #define __addr_ok(addr)		((void)(addr), 1)
 #define __range_ok(addr, size)	((void)(addr), 0)
-#define get_fs()		(KERNEL_DS)
-
-static inline void set_fs(mm_segment_t fs)
-{
-}
 
 #define get_user(x, p)	__get_user(x, p)
 #define __put_user_check __put_user_nocheck
@@ -283,9 +248,6 @@ static inline void set_fs(mm_segment_t fs)
 
 #define access_ok(addr, size)	(__range_ok(addr, size) == 0)
 
-#define user_addr_max() \
-	(uaccess_kernel() ? ~0UL : get_fs())
-
 #ifdef CONFIG_CPU_SPECTRE
 /*
  * When mitigating Spectre variant 1, it is not worth fixing the non-
@@ -308,11 +270,11 @@ static inline void set_fs(mm_segment_t fs)
 #define __get_user(x, ptr)						\
 ({									\
 	long __gu_err = 0;						\
-	__get_user_err((x), (ptr), __gu_err);				\
+	__get_user_err((x), (ptr), __gu_err, TUSER());			\
 	__gu_err;							\
 })
 
-#define __get_user_err(x, ptr, err)					\
+#define __get_user_err(x, ptr, err, __t)				\
 do {									\
 	unsigned long __gu_addr = (unsigned long)(ptr);			\
 	unsigned long __gu_val;						\
@@ -321,18 +283,19 @@ do {									\
 	might_fault();							\
 	__ua_flags = uaccess_save_and_enable();				\
 	switch (sizeof(*(ptr))) {					\
-	case 1:	__get_user_asm_byte(__gu_val, __gu_addr, err);	break;	\
-	case 2:	__get_user_asm_half(__gu_val, __gu_addr, err);	break;	\
-	case 4:	__get_user_asm_word(__gu_val, __gu_addr, err);	break;	\
+	case 1:	__get_user_asm_byte(__gu_val, __gu_addr, err, __t); break;	\
+	case 2:	__get_user_asm_half(__gu_val, __gu_addr, err, __t); break;	\
+	case 4:	__get_user_asm_word(__gu_val, __gu_addr, err, __t); break;	\
 	default: (__gu_val) = __get_user_bad();				\
 	}								\
 	uaccess_restore(__ua_flags);					\
 	(x) = (__typeof__(*(ptr)))__gu_val;				\
 } while (0)
+#endif
 
 #define __get_user_asm(x, addr, err, instr)			\
 	__asm__ __volatile__(					\
-	"1:	" TUSER(instr) " %1, [%2], #0\n"		\
+	"1:	" instr " %1, [%2], #0\n"			\
 	"2:\n"							\
 	"	.pushsection .text.fixup,\"ax\"\n"		\
 	"	.align	2\n"					\
@@ -348,40 +311,38 @@ do {									\
 	: "r" (addr), "i" (-EFAULT)				\
 	: "cc")
 
-#define __get_user_asm_byte(x, addr, err)			\
-	__get_user_asm(x, addr, err, ldrb)
+#define __get_user_asm_byte(x, addr, err, __t)			\
+	__get_user_asm(x, addr, err, "ldrb" __t)
 
 #if __LINUX_ARM_ARCH__ >= 6
 
-#define __get_user_asm_half(x, addr, err)			\
-	__get_user_asm(x, addr, err, ldrh)
+#define __get_user_asm_half(x, addr, err, __t)			\
+	__get_user_asm(x, addr, err, "ldrh" __t)
 
 #else
 
 #ifndef __ARMEB__
-#define __get_user_asm_half(x, __gu_addr, err)			\
+#define __get_user_asm_half(x, __gu_addr, err, __t)		\
 ({								\
 	unsigned long __b1, __b2;				\
-	__get_user_asm_byte(__b1, __gu_addr, err);		\
-	__get_user_asm_byte(__b2, __gu_addr + 1, err);		\
+	__get_user_asm_byte(__b1, __gu_addr, err, __t);		\
+	__get_user_asm_byte(__b2, __gu_addr + 1, err, __t);	\
 	(x) = __b1 | (__b2 << 8);				\
 })
 #else
-#define __get_user_asm_half(x, __gu_addr, err)			\
+#define __get_user_asm_half(x, __gu_addr, err, __t)		\
 ({								\
 	unsigned long __b1, __b2;				\
-	__get_user_asm_byte(__b1, __gu_addr, err);		\
-	__get_user_asm_byte(__b2, __gu_addr + 1, err);		\
+	__get_user_asm_byte(__b1, __gu_addr, err, __t);		\
+	__get_user_asm_byte(__b2, __gu_addr + 1, err, __t);	\
 	(x) = (__b1 << 8) | __b2;				\
 })
 #endif
 
 #endif /* __LINUX_ARM_ARCH__ >= 6 */
 
-#define __get_user_asm_word(x, addr, err)			\
-	__get_user_asm(x, addr, err, ldr)
-#endif
-
+#define __get_user_asm_word(x, addr, err, __t)			\
+	__get_user_asm(x, addr, err, "ldr" __t)
 
 #define __put_user_switch(x, ptr, __err, __fn)				\
 	do {								\
@@ -425,7 +386,7 @@ do {									\
 #define __put_user_nocheck(x, __pu_ptr, __err, __size)			\
 	do {								\
 		unsigned long __pu_addr = (unsigned long)__pu_ptr;	\
-		__put_user_nocheck_##__size(x, __pu_addr, __err);	\
+		__put_user_nocheck_##__size(x, __pu_addr, __err, TUSER());\
 	} while (0)
 
 #define __put_user_nocheck_1 __put_user_asm_byte
@@ -433,9 +394,11 @@ do {									\
 #define __put_user_nocheck_4 __put_user_asm_word
 #define __put_user_nocheck_8 __put_user_asm_dword
 
+#endif /* !CONFIG_CPU_SPECTRE */
+
 #define __put_user_asm(x, __pu_addr, err, instr)		\
 	__asm__ __volatile__(					\
-	"1:	" TUSER(instr) " %1, [%2], #0\n"		\
+	"1:	" instr " %1, [%2], #0\n"		\
 	"2:\n"							\
 	"	.pushsection .text.fixup,\"ax\"\n"		\
 	"	.align	2\n"					\
@@ -450,36 +413,36 @@ do {									\
 	: "r" (x), "r" (__pu_addr), "i" (-EFAULT)		\
 	: "cc")
 
-#define __put_user_asm_byte(x, __pu_addr, err)			\
-	__put_user_asm(x, __pu_addr, err, strb)
+#define __put_user_asm_byte(x, __pu_addr, err, __t)		\
+	__put_user_asm(x, __pu_addr, err, "strb" __t)
 
 #if __LINUX_ARM_ARCH__ >= 6
 
-#define __put_user_asm_half(x, __pu_addr, err)			\
-	__put_user_asm(x, __pu_addr, err, strh)
+#define __put_user_asm_half(x, __pu_addr, err, __t)		\
+	__put_user_asm(x, __pu_addr, err, "strh" __t)
 
 #else
 
 #ifndef __ARMEB__
-#define __put_user_asm_half(x, __pu_addr, err)			\
+#define __put_user_asm_half(x, __pu_addr, err, __t)		\
 ({								\
 	unsigned long __temp = (__force unsigned long)(x);	\
-	__put_user_asm_byte(__temp, __pu_addr, err);		\
-	__put_user_asm_byte(__temp >> 8, __pu_addr + 1, err);	\
+	__put_user_asm_byte(__temp, __pu_addr, err, __t);	\
+	__put_user_asm_byte(__temp >> 8, __pu_addr + 1, err, __t);\
 })
 #else
-#define __put_user_asm_half(x, __pu_addr, err)			\
+#define __put_user_asm_half(x, __pu_addr, err, __t)		\
 ({								\
 	unsigned long __temp = (__force unsigned long)(x);	\
-	__put_user_asm_byte(__temp >> 8, __pu_addr, err);	\
-	__put_user_asm_byte(__temp, __pu_addr + 1, err);	\
+	__put_user_asm_byte(__temp >> 8, __pu_addr, err, __t);	\
+	__put_user_asm_byte(__temp, __pu_addr + 1, err, __t);	\
 })
 #endif
 
 #endif /* __LINUX_ARM_ARCH__ >= 6 */
 
-#define __put_user_asm_word(x, __pu_addr, err)			\
-	__put_user_asm(x, __pu_addr, err, str)
+#define __put_user_asm_word(x, __pu_addr, err, __t)		\
+	__put_user_asm(x, __pu_addr, err, "str" __t)
 
 #ifndef __ARMEB__
 #define	__reg_oper0	"%R2"
@@ -489,12 +452,12 @@ do {									\
 #define	__reg_oper1	"%R2"
 #endif
 
-#define __put_user_asm_dword(x, __pu_addr, err)			\
+#define __put_user_asm_dword(x, __pu_addr, err, __t)		\
 	__asm__ __volatile__(					\
- ARM(	"1:	" TUSER(str) "	" __reg_oper1 ", [%1], #4\n"	) \
- ARM(	"2:	" TUSER(str) "	" __reg_oper0 ", [%1]\n"	) \
- THUMB(	"1:	" TUSER(str) "	" __reg_oper1 ", [%1]\n"	) \
- THUMB(	"2:	" TUSER(str) "	" __reg_oper0 ", [%1, #4]\n"	) \
+ ARM(	"1:	str" __t "	" __reg_oper1 ", [%1], #4\n"  ) \
+ ARM(	"2:	str" __t "	" __reg_oper0 ", [%1]\n"      ) \
+ THUMB(	"1:	str" __t "	" __reg_oper1 ", [%1]\n"      ) \
+ THUMB(	"2:	str" __t "	" __reg_oper0 ", [%1, #4]\n"  ) \
 	"3:\n"							\
 	"	.pushsection .text.fixup,\"ax\"\n"		\
 	"	.align	2\n"					\
@@ -510,7 +473,49 @@ do {									\
 	: "r" (x), "i" (-EFAULT)				\
 	: "cc")
 
-#endif /* !CONFIG_CPU_SPECTRE */
+#define HAVE_GET_KERNEL_NOFAULT
+
+#define __get_kernel_nofault(dst, src, type, err_label)			\
+do {									\
+	const type *__pk_ptr = (src);					\
+	unsigned long __src = (unsigned long)(__pk_ptr);		\
+	type __val;							\
+	int __err = 0;							\
+	switch (sizeof(type)) {						\
+	case 1:	__get_user_asm_byte(__val, __src, __err, ""); break;	\
+	case 2: __get_user_asm_half(__val, __src, __err, ""); break;	\
+	case 4: __get_user_asm_word(__val, __src, __err, ""); break;	\
+	case 8: {							\
+		u32 *__v32 = (u32*)&__val;				\
+		__get_user_asm_word(__v32[0], __src, __err, "");	\
+		if (__err)						\
+			break;						\
+		__get_user_asm_word(__v32[1], __src+4, __err, "");	\
+		break;							\
+	}								\
+	default: __err = __get_user_bad(); break;			\
+	}								\
+	*(type *)(dst) = __val;						\
+	if (__err)							\
+		goto err_label;						\
+} while (0)
+
+#define __put_kernel_nofault(dst, src, type, err_label)			\
+do {									\
+	const type *__pk_ptr = (dst);					\
+	unsigned long __dst = (unsigned long)__pk_ptr;			\
+	int __err = 0;							\
+	type __val = *(type *)src;					\
+	switch (sizeof(type)) {						\
+	case 1: __put_user_asm_byte(__val, __dst, __err, ""); break;	\
+	case 2:	__put_user_asm_half(__val, __dst, __err, ""); break;	\
+	case 4:	__put_user_asm_word(__val, __dst, __err, ""); break;	\
+	case 8:	__put_user_asm_dword(__val, __dst, __err, ""); break;	\
+	default: __err = __put_user_bad(); break;			\
+	}								\
+	if (__err)							\
+		goto err_label;						\
+} while (0)
 
 #ifdef CONFIG_MMU
 extern unsigned long __must_check
diff --git a/arch/arm/include/asm/unified.h b/arch/arm/include/asm/unified.h
index 1e2c3eb04353..ce9689118dbb 100644
--- a/arch/arm/include/asm/unified.h
+++ b/arch/arm/include/asm/unified.h
@@ -24,10 +24,6 @@ __asm__(".syntax unified");
 
 #ifdef CONFIG_THUMB2_KERNEL
 
-#if __GNUC__ < 4
-#error Thumb-2 kernel requires gcc >= 4
-#endif
-
 /* The CPSR bit describing the instruction set (Thumb) */
 #define PSR_ISETSTATE	PSR_T_BIT
 
diff --git a/arch/arm/include/uapi/asm/unistd.h b/arch/arm/include/uapi/asm/unistd.h
index ae7749e15726..a1149911464c 100644
--- a/arch/arm/include/uapi/asm/unistd.h
+++ b/arch/arm/include/uapi/asm/unistd.h
@@ -15,6 +15,7 @@
 #define _UAPI__ASM_ARM_UNISTD_H
 
 #define __NR_OABI_SYSCALL_BASE	0x900000
+#define __NR_SYSCALL_MASK	0x0fffff
 
 #if defined(__thumb__) || defined(__ARM_EABI__)
 #define __NR_SYSCALL_BASE	0
diff --git a/arch/arm/kernel/asm-offsets.c b/arch/arm/kernel/asm-offsets.c
index 64944701bf6a..a646a3f6440f 100644
--- a/arch/arm/kernel/asm-offsets.c
+++ b/arch/arm/kernel/asm-offsets.c
@@ -43,11 +43,11 @@ int main(void)
   BLANK();
   DEFINE(TI_FLAGS,		offsetof(struct thread_info, flags));
   DEFINE(TI_PREEMPT,		offsetof(struct thread_info, preempt_count));
-  DEFINE(TI_ADDR_LIMIT,		offsetof(struct thread_info, addr_limit));
   DEFINE(TI_TASK,		offsetof(struct thread_info, task));
   DEFINE(TI_CPU,		offsetof(struct thread_info, cpu));
   DEFINE(TI_CPU_DOMAIN,		offsetof(struct thread_info, cpu_domain));
   DEFINE(TI_CPU_SAVE,		offsetof(struct thread_info, cpu_context));
+  DEFINE(TI_ABI_SYSCALL,	offsetof(struct thread_info, abi_syscall));
   DEFINE(TI_USED_CP,		offsetof(struct thread_info, used_cp));
   DEFINE(TI_TP_VALUE,		offsetof(struct thread_info, tp_value));
   DEFINE(TI_FPSTATE,		offsetof(struct thread_info, fpstate));
@@ -88,7 +88,6 @@ int main(void)
   DEFINE(S_OLD_R0,		offsetof(struct pt_regs, ARM_ORIG_r0));
   DEFINE(PT_REGS_SIZE,		sizeof(struct pt_regs));
   DEFINE(SVC_DACR,		offsetof(struct svc_pt_regs, dacr));
-  DEFINE(SVC_ADDR_LIMIT,	offsetof(struct svc_pt_regs, addr_limit));
   DEFINE(SVC_REGS_SIZE,		sizeof(struct svc_pt_regs));
   BLANK();
   DEFINE(SIGFRAME_RC3_OFFSET,	offsetof(struct sigframe, retcode[3]));
diff --git a/arch/arm/kernel/entry-common.S b/arch/arm/kernel/entry-common.S
index 7f0b7aba1498..d9c99db50243 100644
--- a/arch/arm/kernel/entry-common.S
+++ b/arch/arm/kernel/entry-common.S
@@ -49,10 +49,6 @@ __ret_fast_syscall:
  UNWIND(.fnstart	)
  UNWIND(.cantunwind	)
 	disable_irq_notrace			@ disable interrupts
-	ldr	r2, [tsk, #TI_ADDR_LIMIT]
-	ldr	r1, =TASK_SIZE
-	cmp	r2, r1
-	blne	addr_limit_check_failed
 	ldr	r1, [tsk, #TI_FLAGS]		@ re-check for syscall tracing
 	movs	r1, r1, lsl #16
 	bne	fast_work_pending
@@ -87,10 +83,6 @@ __ret_fast_syscall:
 	bl	do_rseq_syscall
 #endif
 	disable_irq_notrace			@ disable interrupts
-	ldr	r2, [tsk, #TI_ADDR_LIMIT]
-	ldr     r1, =TASK_SIZE
-	cmp     r2, r1
-	blne	addr_limit_check_failed
 	ldr	r1, [tsk, #TI_FLAGS]		@ re-check for syscall tracing
 	movs	r1, r1, lsl #16
 	beq	no_work_pending
@@ -129,10 +121,6 @@ ret_slow_syscall:
 #endif
 	disable_irq_notrace			@ disable interrupts
 ENTRY(ret_to_user_from_irq)
-	ldr	r2, [tsk, #TI_ADDR_LIMIT]
-	ldr     r1, =TASK_SIZE
-	cmp	r2, r1
-	blne	addr_limit_check_failed
 	ldr	r1, [tsk, #TI_FLAGS]
 	movs	r1, r1, lsl #16
 	bne	slow_work_pending
@@ -226,6 +214,7 @@ ENTRY(vector_swi)
 	/* saved_psr and saved_pc are now dead */
 
 	uaccess_disable tbl
+	get_thread_info tsk
 
 	adr	tbl, sys_call_table		@ load syscall table pointer
 
@@ -237,13 +226,17 @@ ENTRY(vector_swi)
 	 * get the old ABI syscall table address.
 	 */
 	bics	r10, r10, #0xff000000
+	strne	r10, [tsk, #TI_ABI_SYSCALL]
+	streq	scno, [tsk, #TI_ABI_SYSCALL]
 	eorne	scno, r10, #__NR_OABI_SYSCALL_BASE
 	ldrne	tbl, =sys_oabi_call_table
 #elif !defined(CONFIG_AEABI)
 	bic	scno, scno, #0xff000000		@ mask off SWI op-code
+	str	scno, [tsk, #TI_ABI_SYSCALL]
 	eor	scno, scno, #__NR_SYSCALL_BASE	@ check OS number
+#else
+	str	scno, [tsk, #TI_ABI_SYSCALL]
 #endif
-	get_thread_info tsk
 	/*
 	 * Reload the registers that may have been corrupted on entry to
 	 * the syscall assembly (by tracing or context tracking.)
@@ -288,7 +281,6 @@ ENDPROC(vector_swi)
 	 * context switches, and waiting for our parent to respond.
 	 */
 __sys_trace:
-	mov	r1, scno
 	add	r0, sp, #S_OFF
 	bl	syscall_trace_enter
 	mov	scno, r0
diff --git a/arch/arm/kernel/process.c b/arch/arm/kernel/process.c
index fc9e8b37eaa8..0e2d3051741e 100644
--- a/arch/arm/kernel/process.c
+++ b/arch/arm/kernel/process.c
@@ -5,8 +5,6 @@
  *  Copyright (C) 1996-2000 Russell King - Converted to ARM.
  *  Original Copyright (C) 1995  Linus Torvalds
  */
-#include <stdarg.h>
-
 #include <linux/export.h>
 #include <linux/sched.h>
 #include <linux/sched/debug.h>
@@ -108,7 +106,7 @@ void __show_regs(struct pt_regs *regs)
 	unsigned long flags;
 	char buf[64];
 #ifndef CONFIG_CPU_V7M
-	unsigned int domain, fs;
+	unsigned int domain;
 #ifdef CONFIG_CPU_SW_DOMAIN_PAN
 	/*
 	 * Get the domain register for the parent context. In user
@@ -117,14 +115,11 @@ void __show_regs(struct pt_regs *regs)
 	 */
 	if (user_mode(regs)) {
 		domain = DACR_UACCESS_ENABLE;
-		fs = get_fs();
 	} else {
 		domain = to_svc_pt_regs(regs)->dacr;
-		fs = to_svc_pt_regs(regs)->addr_limit;
 	}
 #else
 	domain = get_domain();
-	fs = get_fs();
 #endif
 #endif
 
@@ -160,8 +155,6 @@ void __show_regs(struct pt_regs *regs)
 		if ((domain & domain_mask(DOMAIN_USER)) ==
 		    domain_val(DOMAIN_USER, DOMAIN_NOACCESS))
 			segment = "none";
-		else if (fs == KERNEL_DS)
-			segment = "kernel";
 		else
 			segment = "user";
 
diff --git a/arch/arm/kernel/ptrace.c b/arch/arm/kernel/ptrace.c
index b008859680bc..43b963ea4a0e 100644
--- a/arch/arm/kernel/ptrace.c
+++ b/arch/arm/kernel/ptrace.c
@@ -25,6 +25,7 @@
 #include <linux/tracehook.h>
 #include <linux/unistd.h>
 
+#include <asm/syscall.h>
 #include <asm/traps.h>
 
 #define CREATE_TRACE_POINTS
@@ -785,7 +786,8 @@ long arch_ptrace(struct task_struct *child, long request,
 			break;
 
 		case PTRACE_SET_SYSCALL:
-			task_thread_info(child)->syscall = data;
+			task_thread_info(child)->abi_syscall = data &
+							__NR_SYSCALL_MASK;
 			ret = 0;
 			break;
 
@@ -844,14 +846,14 @@ static void tracehook_report_syscall(struct pt_regs *regs,
 	if (dir == PTRACE_SYSCALL_EXIT)
 		tracehook_report_syscall_exit(regs, 0);
 	else if (tracehook_report_syscall_entry(regs))
-		current_thread_info()->syscall = -1;
+		current_thread_info()->abi_syscall = -1;
 
 	regs->ARM_ip = ip;
 }
 
-asmlinkage int syscall_trace_enter(struct pt_regs *regs, int scno)
+asmlinkage int syscall_trace_enter(struct pt_regs *regs)
 {
-	current_thread_info()->syscall = scno;
+	int scno;
 
 	if (test_thread_flag(TIF_SYSCALL_TRACE))
 		tracehook_report_syscall(regs, PTRACE_SYSCALL_ENTER);
@@ -862,11 +864,11 @@ asmlinkage int syscall_trace_enter(struct pt_regs *regs, int scno)
 		return -1;
 #else
 	/* XXX: remove this once OABI gets fixed */
-	secure_computing_strict(current_thread_info()->syscall);
+	secure_computing_strict(syscall_get_nr(current, regs));
 #endif
 
 	/* Tracer or seccomp may have changed syscall. */
-	scno = current_thread_info()->syscall;
+	scno = syscall_get_nr(current, regs);
 
 	if (test_thread_flag(TIF_SYSCALL_TRACEPOINT))
 		trace_sys_enter(regs, scno);
diff --git a/arch/arm/kernel/setup.c b/arch/arm/kernel/setup.c
index f97eb2371672..284a80c0b6e1 100644
--- a/arch/arm/kernel/setup.c
+++ b/arch/arm/kernel/setup.c
@@ -1012,31 +1012,25 @@ static void __init reserve_crashkernel(void)
 		unsigned long long lowmem_max = __pa(high_memory - 1) + 1;
 		if (crash_max > lowmem_max)
 			crash_max = lowmem_max;
-		crash_base = memblock_find_in_range(CRASH_ALIGN, crash_max,
-						    crash_size, CRASH_ALIGN);
+
+		crash_base = memblock_phys_alloc_range(crash_size, CRASH_ALIGN,
+						       CRASH_ALIGN, crash_max);
 		if (!crash_base) {
 			pr_err("crashkernel reservation failed - No suitable area found.\n");
 			return;
 		}
 	} else {
+		unsigned long long crash_max = crash_base + crash_size;
 		unsigned long long start;
 
-		start = memblock_find_in_range(crash_base,
-					       crash_base + crash_size,
-					       crash_size, SECTION_SIZE);
-		if (start != crash_base) {
+		start = memblock_phys_alloc_range(crash_size, SECTION_SIZE,
+						  crash_base, crash_max);
+		if (!start) {
 			pr_err("crashkernel reservation failed - memory is in use.\n");
 			return;
 		}
 	}
 
-	ret = memblock_reserve(crash_base, crash_size);
-	if (ret < 0) {
-		pr_warn("crashkernel reservation failed - memory is in use (0x%lx)\n",
-			(unsigned long)crash_base);
-		return;
-	}
-
 	pr_info("Reserving %ldMB of memory at %ldMB for crashkernel (System RAM: %ldMB)\n",
 		(unsigned long)(crash_size >> 20),
 		(unsigned long)(crash_base >> 20),
diff --git a/arch/arm/kernel/signal.c b/arch/arm/kernel/signal.c
index 4e0dcff3f5b0..d0a800be0486 100644
--- a/arch/arm/kernel/signal.c
+++ b/arch/arm/kernel/signal.c
@@ -669,14 +669,6 @@ struct page *get_signal_page(void)
 	return page;
 }
 
-/* Defer to generic check */
-asmlinkage void addr_limit_check_failed(void)
-{
-#ifdef CONFIG_MMU
-	addr_limit_user_check();
-#endif
-}
-
 #ifdef CONFIG_DEBUG_RSEQ
 asmlinkage void do_rseq_syscall(struct pt_regs *regs)
 {
diff --git a/arch/arm/kernel/sys_oabi-compat.c b/arch/arm/kernel/sys_oabi-compat.c
index 075a2e0ed2c1..68112c172025 100644
--- a/arch/arm/kernel/sys_oabi-compat.c
+++ b/arch/arm/kernel/sys_oabi-compat.c
@@ -80,9 +80,12 @@
 #include <linux/socket.h>
 #include <linux/net.h>
 #include <linux/ipc.h>
+#include <linux/ipc_namespace.h>
 #include <linux/uaccess.h>
 #include <linux/slab.h>
 
+#include <asm/syscall.h>
+
 struct oldabi_stat64 {
 	unsigned long long st_dev;
 	unsigned int	__pad1;
@@ -191,60 +194,87 @@ struct oabi_flock64 {
 	pid_t	l_pid;
 } __attribute__ ((packed,aligned(4)));
 
-static long do_locks(unsigned int fd, unsigned int cmd,
-				 unsigned long arg)
+static int get_oabi_flock(struct flock64 *kernel, struct oabi_flock64 __user *arg)
 {
-	struct flock64 kernel;
 	struct oabi_flock64 user;
-	mm_segment_t fs;
-	long ret;
 
 	if (copy_from_user(&user, (struct oabi_flock64 __user *)arg,
 			   sizeof(user)))
 		return -EFAULT;
-	kernel.l_type	= user.l_type;
-	kernel.l_whence	= user.l_whence;
-	kernel.l_start	= user.l_start;
-	kernel.l_len	= user.l_len;
-	kernel.l_pid	= user.l_pid;
-
-	fs = get_fs();
-	set_fs(KERNEL_DS);
-	ret = sys_fcntl64(fd, cmd, (unsigned long)&kernel);
-	set_fs(fs);
-
-	if (!ret && (cmd == F_GETLK64 || cmd == F_OFD_GETLK)) {
-		user.l_type	= kernel.l_type;
-		user.l_whence	= kernel.l_whence;
-		user.l_start	= kernel.l_start;
-		user.l_len	= kernel.l_len;
-		user.l_pid	= kernel.l_pid;
-		if (copy_to_user((struct oabi_flock64 __user *)arg,
-				 &user, sizeof(user)))
-			ret = -EFAULT;
-	}
-	return ret;
+
+	kernel->l_type	 = user.l_type;
+	kernel->l_whence = user.l_whence;
+	kernel->l_start	 = user.l_start;
+	kernel->l_len	 = user.l_len;
+	kernel->l_pid	 = user.l_pid;
+
+	return 0;
+}
+
+static int put_oabi_flock(struct flock64 *kernel, struct oabi_flock64 __user *arg)
+{
+	struct oabi_flock64 user;
+
+	user.l_type	= kernel->l_type;
+	user.l_whence	= kernel->l_whence;
+	user.l_start	= kernel->l_start;
+	user.l_len	= kernel->l_len;
+	user.l_pid	= kernel->l_pid;
+
+	if (copy_to_user((struct oabi_flock64 __user *)arg,
+			 &user, sizeof(user)))
+		return -EFAULT;
+
+	return 0;
 }
 
 asmlinkage long sys_oabi_fcntl64(unsigned int fd, unsigned int cmd,
 				 unsigned long arg)
 {
+	void __user *argp = (void __user *)arg;
+	struct fd f = fdget_raw(fd);
+	struct flock64 flock;
+	long err = -EBADF;
+
+	if (!f.file)
+		goto out;
+
 	switch (cmd) {
-	case F_OFD_GETLK:
-	case F_OFD_SETLK:
-	case F_OFD_SETLKW:
 	case F_GETLK64:
+	case F_OFD_GETLK:
+		err = security_file_fcntl(f.file, cmd, arg);
+		if (err)
+			break;
+		err = get_oabi_flock(&flock, argp);
+		if (err)
+			break;
+		err = fcntl_getlk64(f.file, cmd, &flock);
+		if (!err)
+		       err = put_oabi_flock(&flock, argp);
+		break;
 	case F_SETLK64:
 	case F_SETLKW64:
-		return do_locks(fd, cmd, arg);
-
+	case F_OFD_SETLK:
+	case F_OFD_SETLKW:
+		err = security_file_fcntl(f.file, cmd, arg);
+		if (err)
+			break;
+		err = get_oabi_flock(&flock, argp);
+		if (err)
+			break;
+		err = fcntl_setlk64(fd, f.file, cmd, &flock);
+		break;
 	default:
-		return sys_fcntl64(fd, cmd, arg);
+		err = sys_fcntl64(fd, cmd, arg);
+		break;
 	}
+	fdput(f);
+out:
+	return err;
 }
 
 struct oabi_epoll_event {
-	__u32 events;
+	__poll_t events;
 	__u64 data;
 } __attribute__ ((packed,aligned(4)));
 
@@ -264,55 +294,34 @@ asmlinkage long sys_oabi_epoll_ctl(int epfd, int op, int fd,
 
 	return do_epoll_ctl(epfd, op, fd, &kernel, false);
 }
-
-asmlinkage long sys_oabi_epoll_wait(int epfd,
-				    struct oabi_epoll_event __user *events,
-				    int maxevents, int timeout)
-{
-	struct epoll_event *kbuf;
-	struct oabi_epoll_event e;
-	mm_segment_t fs;
-	long ret, err, i;
-
-	if (maxevents <= 0 ||
-			maxevents > (INT_MAX/sizeof(*kbuf)) ||
-			maxevents > (INT_MAX/sizeof(*events)))
-		return -EINVAL;
-	if (!access_ok(events, sizeof(*events) * maxevents))
-		return -EFAULT;
-	kbuf = kmalloc_array(maxevents, sizeof(*kbuf), GFP_KERNEL);
-	if (!kbuf)
-		return -ENOMEM;
-	fs = get_fs();
-	set_fs(KERNEL_DS);
-	ret = sys_epoll_wait(epfd, kbuf, maxevents, timeout);
-	set_fs(fs);
-	err = 0;
-	for (i = 0; i < ret; i++) {
-		e.events = kbuf[i].events;
-		e.data = kbuf[i].data;
-		err = __copy_to_user(events, &e, sizeof(e));
-		if (err)
-			break;
-		events++;
-	}
-	kfree(kbuf);
-	return err ? -EFAULT : ret;
-}
 #else
 asmlinkage long sys_oabi_epoll_ctl(int epfd, int op, int fd,
 				   struct oabi_epoll_event __user *event)
 {
 	return -EINVAL;
 }
+#endif
 
-asmlinkage long sys_oabi_epoll_wait(int epfd,
-				    struct oabi_epoll_event __user *events,
-				    int maxevents, int timeout)
+struct epoll_event __user *
+epoll_put_uevent(__poll_t revents, __u64 data,
+		 struct epoll_event __user *uevent)
 {
-	return -EINVAL;
+	if (in_oabi_syscall()) {
+		struct oabi_epoll_event __user *oevent = (void __user *)uevent;
+
+		if (__put_user(revents, &oevent->events) ||
+		    __put_user(data, &oevent->data))
+			return NULL;
+
+		return (void __user *)(oevent+1);
+	}
+
+	if (__put_user(revents, &uevent->events) ||
+	    __put_user(data, &uevent->data))
+		return NULL;
+
+	return uevent+1;
 }
-#endif
 
 struct oabi_sembuf {
 	unsigned short	sem_num;
@@ -321,46 +330,52 @@ struct oabi_sembuf {
 	unsigned short	__pad;
 };
 
+#define sc_semopm     sem_ctls[2]
+
+#ifdef CONFIG_SYSVIPC
 asmlinkage long sys_oabi_semtimedop(int semid,
 				    struct oabi_sembuf __user *tsops,
 				    unsigned nsops,
 				    const struct old_timespec32 __user *timeout)
 {
+	struct ipc_namespace *ns;
 	struct sembuf *sops;
-	struct old_timespec32 local_timeout;
 	long err;
 	int i;
 
+	ns = current->nsproxy->ipc_ns;
+	if (nsops > ns->sc_semopm)
+		return -E2BIG;
 	if (nsops < 1 || nsops > SEMOPM)
 		return -EINVAL;
-	if (!access_ok(tsops, sizeof(*tsops) * nsops))
-		return -EFAULT;
-	sops = kmalloc_array(nsops, sizeof(*sops), GFP_KERNEL);
+	sops = kvmalloc_array(nsops, sizeof(*sops), GFP_KERNEL);
 	if (!sops)
 		return -ENOMEM;
 	err = 0;
 	for (i = 0; i < nsops; i++) {
 		struct oabi_sembuf osb;
-		err |= __copy_from_user(&osb, tsops, sizeof(osb));
+		err |= copy_from_user(&osb, tsops, sizeof(osb));
 		sops[i].sem_num = osb.sem_num;
 		sops[i].sem_op = osb.sem_op;
 		sops[i].sem_flg = osb.sem_flg;
 		tsops++;
 	}
-	if (timeout) {
-		/* copy this as well before changing domain protection */
-		err |= copy_from_user(&local_timeout, timeout, sizeof(*timeout));
-		timeout = &local_timeout;
-	}
 	if (err) {
 		err = -EFAULT;
-	} else {
-		mm_segment_t fs = get_fs();
-		set_fs(KERNEL_DS);
-		err = sys_semtimedop_time32(semid, sops, nsops, timeout);
-		set_fs(fs);
+		goto out;
 	}
-	kfree(sops);
+
+	if (timeout) {
+		struct timespec64 ts;
+		err = get_old_timespec32(&ts, timeout);
+		if (err)
+			goto out;
+		err = __do_semtimedop(semid, sops, nsops, &ts, ns);
+		goto out;
+	}
+	err = __do_semtimedop(semid, sops, nsops, NULL, ns);
+out:
+	kvfree(sops);
 	return err;
 }
 
@@ -387,6 +402,27 @@ asmlinkage int sys_oabi_ipc(uint call, int first, int second, int third,
 		return sys_ipc(call, first, second, third, ptr, fifth);
 	}
 }
+#else
+asmlinkage long sys_oabi_semtimedop(int semid,
+				    struct oabi_sembuf __user *tsops,
+				    unsigned nsops,
+				    const struct old_timespec32 __user *timeout)
+{
+	return -ENOSYS;
+}
+
+asmlinkage long sys_oabi_semop(int semid, struct oabi_sembuf __user *tsops,
+			       unsigned nsops)
+{
+	return -ENOSYS;
+}
+
+asmlinkage int sys_oabi_ipc(uint call, int first, int second, int third,
+			    void __user *ptr, long fifth)
+{
+	return -ENOSYS;
+}
+#endif
 
 asmlinkage long sys_oabi_bind(int fd, struct sockaddr __user *addr, int addrlen)
 {
diff --git a/arch/arm/kernel/traps.c b/arch/arm/kernel/traps.c
index 64308e3a5d0c..4a7edc6e848f 100644
--- a/arch/arm/kernel/traps.c
+++ b/arch/arm/kernel/traps.c
@@ -122,17 +122,8 @@ static void dump_mem(const char *lvl, const char *str, unsigned long bottom,
 		     unsigned long top)
 {
 	unsigned long first;
-	mm_segment_t fs;
 	int i;
 
-	/*
-	 * We need to switch to kernel mode so that we can use __get_user
-	 * to safely read from kernel space.  Note that we now dump the
-	 * code first, just in case the backtrace kills us.
-	 */
-	fs = get_fs();
-	set_fs(KERNEL_DS);
-
 	printk("%s%s(0x%08lx to 0x%08lx)\n", lvl, str, bottom, top);
 
 	for (first = bottom & ~31; first < top; first += 32) {
@@ -145,7 +136,7 @@ static void dump_mem(const char *lvl, const char *str, unsigned long bottom,
 		for (p = first, i = 0; i < 8 && p < top; i++, p += 4) {
 			if (p >= bottom && p < top) {
 				unsigned long val;
-				if (__get_user(val, (unsigned long *)p) == 0)
+				if (get_kernel_nofault(val, (unsigned long *)p))
 					sprintf(str + i * 9, " %08lx", val);
 				else
 					sprintf(str + i * 9, " ????????");
@@ -153,11 +144,9 @@ static void dump_mem(const char *lvl, const char *str, unsigned long bottom,
 		}
 		printk("%s%04lx:%s\n", lvl, first & 0xffff, str);
 	}
-
-	set_fs(fs);
 }
 
-static void __dump_instr(const char *lvl, struct pt_regs *regs)
+static void dump_instr(const char *lvl, struct pt_regs *regs)
 {
 	unsigned long addr = instruction_pointer(regs);
 	const int thumb = thumb_mode(regs);
@@ -173,10 +162,20 @@ static void __dump_instr(const char *lvl, struct pt_regs *regs)
 	for (i = -4; i < 1 + !!thumb; i++) {
 		unsigned int val, bad;
 
-		if (thumb)
-			bad = get_user(val, &((u16 *)addr)[i]);
-		else
-			bad = get_user(val, &((u32 *)addr)[i]);
+		if (!user_mode(regs)) {
+			if (thumb) {
+				u16 val16;
+				bad = get_kernel_nofault(val16, &((u16 *)addr)[i]);
+				val = val16;
+			} else {
+				bad = get_kernel_nofault(val, &((u32 *)addr)[i]);
+			}
+		} else {
+			if (thumb)
+				bad = get_user(val, &((u16 *)addr)[i]);
+			else
+				bad = get_user(val, &((u32 *)addr)[i]);
+		}
 
 		if (!bad)
 			p += sprintf(p, i == 0 ? "(%0*x) " : "%0*x ",
@@ -189,20 +188,6 @@ static void __dump_instr(const char *lvl, struct pt_regs *regs)
 	printk("%sCode: %s\n", lvl, str);
 }
 
-static void dump_instr(const char *lvl, struct pt_regs *regs)
-{
-	mm_segment_t fs;
-
-	if (!user_mode(regs)) {
-		fs = get_fs();
-		set_fs(KERNEL_DS);
-		__dump_instr(lvl, regs);
-		set_fs(fs);
-	} else {
-		__dump_instr(lvl, regs);
-	}
-}
-
 #ifdef CONFIG_ARM_UNWIND
 static inline void dump_backtrace(struct pt_regs *regs, struct task_struct *tsk,
 				  const char *loglvl)
@@ -781,11 +766,6 @@ void abort(void)
 	panic("Oops failed to kill thread");
 }
 
-void __init trap_init(void)
-{
-	return;
-}
-
 #ifdef CONFIG_KUSER_HELPERS
 static void __init kuser_init(void *vectors)
 {
diff --git a/arch/arm/lib/copy_from_user.S b/arch/arm/lib/copy_from_user.S
index f8016e3db65d..480a20766137 100644
--- a/arch/arm/lib/copy_from_user.S
+++ b/arch/arm/lib/copy_from_user.S
@@ -109,8 +109,7 @@
 
 ENTRY(arm_copy_from_user)
 #ifdef CONFIG_CPU_SPECTRE
-	get_thread_info r3
-	ldr	r3, [r3, #TI_ADDR_LIMIT]
+	ldr	r3, =TASK_SIZE
 	uaccess_mask_range_ptr r1, r2, r3, ip
 #endif
 
diff --git a/arch/arm/lib/copy_to_user.S b/arch/arm/lib/copy_to_user.S
index ebfe4cb3d912..842ea5ede485 100644
--- a/arch/arm/lib/copy_to_user.S
+++ b/arch/arm/lib/copy_to_user.S
@@ -109,8 +109,7 @@
 ENTRY(__copy_to_user_std)
 WEAK(arm_copy_to_user)
 #ifdef CONFIG_CPU_SPECTRE
-	get_thread_info r3
-	ldr	r3, [r3, #TI_ADDR_LIMIT]
+	ldr	r3, =TASK_SIZE
 	uaccess_mask_range_ptr r0, r2, r3, ip
 #endif
 
diff --git a/arch/arm/mach-bcm/bcm_kona_smc.c b/arch/arm/mach-bcm/bcm_kona_smc.c
index 43a16f922b53..43829e49ad93 100644
--- a/arch/arm/mach-bcm/bcm_kona_smc.c
+++ b/arch/arm/mach-bcm/bcm_kona_smc.c
@@ -10,8 +10,6 @@
  * of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
  * GNU General Public License for more details.
  */
-
-#include <stdarg.h>
 #include <linux/smp.h>
 #include <linux/io.h>
 #include <linux/ioport.h>
diff --git a/arch/arm/mm/dma-mapping-nommu.c b/arch/arm/mm/dma-mapping-nommu.c
index 6bfd2b884505..cfd9c933d2f0 100644
--- a/arch/arm/mm/dma-mapping-nommu.c
+++ b/arch/arm/mm/dma-mapping-nommu.c
@@ -5,12 +5,7 @@
  *  Copyright (C) 2000-2004 Russell King
  */
 
-#include <linux/export.h>
-#include <linux/mm.h>
-#include <linux/dma-direct.h>
 #include <linux/dma-map-ops.h>
-#include <linux/scatterlist.h>
-
 #include <asm/cachetype.h>
 #include <asm/cacheflush.h>
 #include <asm/outercache.h>
@@ -18,65 +13,8 @@
 
 #include "dma.h"
 
-/*
- *  The generic direct mapping code is used if
- *   - MMU/MPU is off
- *   - cpu is v7m w/o cache support
- *   - device is coherent
- *  otherwise arm_nommu_dma_ops is used.
- *
- *  arm_nommu_dma_ops rely on consistent DMA memory (please, refer to
- *  [1] on how to declare such memory).
- *
- *  [1] Documentation/devicetree/bindings/reserved-memory/reserved-memory.txt
- */
-
-static void *arm_nommu_dma_alloc(struct device *dev, size_t size,
-				 dma_addr_t *dma_handle, gfp_t gfp,
-				 unsigned long attrs)
-
-{
-	void *ret = dma_alloc_from_global_coherent(dev, size, dma_handle);
-
-	/*
-	 * dma_alloc_from_global_coherent() may fail because:
-	 *
-	 * - no consistent DMA region has been defined, so we can't
-	 *   continue.
-	 * - there is no space left in consistent DMA region, so we
-	 *   only can fallback to generic allocator if we are
-	 *   advertised that consistency is not required.
-	 */
-
-	WARN_ON_ONCE(ret == NULL);
-	return ret;
-}
-
-static void arm_nommu_dma_free(struct device *dev, size_t size,
-			       void *cpu_addr, dma_addr_t dma_addr,
-			       unsigned long attrs)
-{
-	int ret = dma_release_from_global_coherent(get_order(size), cpu_addr);
-
-	WARN_ON_ONCE(ret == 0);
-}
-
-static int arm_nommu_dma_mmap(struct device *dev, struct vm_area_struct *vma,
-			      void *cpu_addr, dma_addr_t dma_addr, size_t size,
-			      unsigned long attrs)
-{
-	int ret;
-
-	if (dma_mmap_from_global_coherent(vma, cpu_addr, size, &ret))
-		return ret;
-	if (dma_mmap_from_dev_coherent(dev, vma, cpu_addr, size, &ret))
-		return ret;
-	return -ENXIO;
-}
-
-
-static void __dma_page_cpu_to_dev(phys_addr_t paddr, size_t size,
-				  enum dma_data_direction dir)
+void arch_sync_dma_for_device(phys_addr_t paddr, size_t size,
+		enum dma_data_direction dir)
 {
 	dmac_map_area(__va(paddr), size, dir);
 
@@ -86,8 +24,8 @@ static void __dma_page_cpu_to_dev(phys_addr_t paddr, size_t size,
 		outer_clean_range(paddr, paddr + size);
 }
 
-static void __dma_page_dev_to_cpu(phys_addr_t paddr, size_t size,
-				  enum dma_data_direction dir)
+void arch_sync_dma_for_cpu(phys_addr_t paddr, size_t size,
+		enum dma_data_direction dir)
 {
 	if (dir != DMA_TO_DEVICE) {
 		outer_inv_range(paddr, paddr + size);
@@ -95,102 +33,6 @@ static void __dma_page_dev_to_cpu(phys_addr_t paddr, size_t size,
 	}
 }
 
-static dma_addr_t arm_nommu_dma_map_page(struct device *dev, struct page *page,
-					 unsigned long offset, size_t size,
-					 enum dma_data_direction dir,
-					 unsigned long attrs)
-{
-	dma_addr_t handle = page_to_phys(page) + offset;
-
-	__dma_page_cpu_to_dev(handle, size, dir);
-
-	return handle;
-}
-
-static void arm_nommu_dma_unmap_page(struct device *dev, dma_addr_t handle,
-				     size_t size, enum dma_data_direction dir,
-				     unsigned long attrs)
-{
-	__dma_page_dev_to_cpu(handle, size, dir);
-}
-
-
-static int arm_nommu_dma_map_sg(struct device *dev, struct scatterlist *sgl,
-				int nents, enum dma_data_direction dir,
-				unsigned long attrs)
-{
-	int i;
-	struct scatterlist *sg;
-
-	for_each_sg(sgl, sg, nents, i) {
-		sg_dma_address(sg) = sg_phys(sg);
-		sg_dma_len(sg) = sg->length;
-		__dma_page_cpu_to_dev(sg_dma_address(sg), sg_dma_len(sg), dir);
-	}
-
-	return nents;
-}
-
-static void arm_nommu_dma_unmap_sg(struct device *dev, struct scatterlist *sgl,
-				   int nents, enum dma_data_direction dir,
-				   unsigned long attrs)
-{
-	struct scatterlist *sg;
-	int i;
-
-	for_each_sg(sgl, sg, nents, i)
-		__dma_page_dev_to_cpu(sg_dma_address(sg), sg_dma_len(sg), dir);
-}
-
-static void arm_nommu_dma_sync_single_for_device(struct device *dev,
-		dma_addr_t handle, size_t size, enum dma_data_direction dir)
-{
-	__dma_page_cpu_to_dev(handle, size, dir);
-}
-
-static void arm_nommu_dma_sync_single_for_cpu(struct device *dev,
-		dma_addr_t handle, size_t size, enum dma_data_direction dir)
-{
-	__dma_page_cpu_to_dev(handle, size, dir);
-}
-
-static void arm_nommu_dma_sync_sg_for_device(struct device *dev, struct scatterlist *sgl,
-					     int nents, enum dma_data_direction dir)
-{
-	struct scatterlist *sg;
-	int i;
-
-	for_each_sg(sgl, sg, nents, i)
-		__dma_page_cpu_to_dev(sg_dma_address(sg), sg_dma_len(sg), dir);
-}
-
-static void arm_nommu_dma_sync_sg_for_cpu(struct device *dev, struct scatterlist *sgl,
-					  int nents, enum dma_data_direction dir)
-{
-	struct scatterlist *sg;
-	int i;
-
-	for_each_sg(sgl, sg, nents, i)
-		__dma_page_dev_to_cpu(sg_dma_address(sg), sg_dma_len(sg), dir);
-}
-
-const struct dma_map_ops arm_nommu_dma_ops = {
-	.alloc			= arm_nommu_dma_alloc,
-	.free			= arm_nommu_dma_free,
-	.alloc_pages		= dma_direct_alloc_pages,
-	.free_pages		= dma_direct_free_pages,
-	.mmap			= arm_nommu_dma_mmap,
-	.map_page		= arm_nommu_dma_map_page,
-	.unmap_page		= arm_nommu_dma_unmap_page,
-	.map_sg			= arm_nommu_dma_map_sg,
-	.unmap_sg		= arm_nommu_dma_unmap_sg,
-	.sync_single_for_device	= arm_nommu_dma_sync_single_for_device,
-	.sync_single_for_cpu	= arm_nommu_dma_sync_single_for_cpu,
-	.sync_sg_for_device	= arm_nommu_dma_sync_sg_for_device,
-	.sync_sg_for_cpu	= arm_nommu_dma_sync_sg_for_cpu,
-};
-EXPORT_SYMBOL(arm_nommu_dma_ops);
-
 void arch_setup_dma_ops(struct device *dev, u64 dma_base, u64 size,
 			const struct iommu_ops *iommu, bool coherent)
 {
@@ -201,14 +43,11 @@ void arch_setup_dma_ops(struct device *dev, u64 dma_base, u64 size,
 		 * enough to check if MPU is in use or not since in absense of
 		 * MPU system memory map is used.
 		 */
-		dev->archdata.dma_coherent = (cacheid) ? coherent : true;
+		dev->dma_coherent = cacheid ? coherent : true;
 	} else {
 		/*
 		 * Assume coherent DMA in case MMU/MPU has not been set up.
 		 */
-		dev->archdata.dma_coherent = (get_cr() & CR_M) ? coherent : true;
+		dev->dma_coherent = (get_cr() & CR_M) ? coherent : true;
 	}
-
-	if (!dev->archdata.dma_coherent)
-		set_dma_ops(dev, &arm_nommu_dma_ops);
 }
diff --git a/arch/arm/mm/dma-mapping.c b/arch/arm/mm/dma-mapping.c
index c4b8df2ad328..4b61541853ea 100644
--- a/arch/arm/mm/dma-mapping.c
+++ b/arch/arm/mm/dma-mapping.c
@@ -980,7 +980,7 @@ int arm_dma_map_sg(struct device *dev, struct scatterlist *sg, int nents,
 {
 	const struct dma_map_ops *ops = get_dma_ops(dev);
 	struct scatterlist *s;
-	int i, j;
+	int i, j, ret;
 
 	for_each_sg(sg, s, nents, i) {
 #ifdef CONFIG_NEED_SG_DMA_LENGTH
@@ -988,15 +988,17 @@ int arm_dma_map_sg(struct device *dev, struct scatterlist *sg, int nents,
 #endif
 		s->dma_address = ops->map_page(dev, sg_page(s), s->offset,
 						s->length, dir, attrs);
-		if (dma_mapping_error(dev, s->dma_address))
+		if (dma_mapping_error(dev, s->dma_address)) {
+			ret = -EIO;
 			goto bad_mapping;
+		}
 	}
 	return nents;
 
  bad_mapping:
 	for_each_sg(sg, s, i, j)
 		ops->unmap_page(dev, sg_dma_address(s), sg_dma_len(s), dir, attrs);
-	return 0;
+	return ret;
 }
 
 /**
@@ -1622,7 +1624,7 @@ static int __iommu_map_sg(struct device *dev, struct scatterlist *sg, int nents,
 		     bool is_coherent)
 {
 	struct scatterlist *s = sg, *dma = sg, *start = sg;
-	int i, count = 0;
+	int i, count = 0, ret;
 	unsigned int offset = s->offset;
 	unsigned int size = s->offset + s->length;
 	unsigned int max = dma_get_max_seg_size(dev);
@@ -1630,12 +1632,13 @@ static int __iommu_map_sg(struct device *dev, struct scatterlist *sg, int nents,
 	for (i = 1; i < nents; i++) {
 		s = sg_next(s);
 
-		s->dma_address = DMA_MAPPING_ERROR;
 		s->dma_length = 0;
 
 		if (s->offset || (size & ~PAGE_MASK) || size + s->length > max) {
-			if (__map_sg_chunk(dev, start, size, &dma->dma_address,
-			    dir, attrs, is_coherent) < 0)
+			ret = __map_sg_chunk(dev, start, size,
+					     &dma->dma_address, dir, attrs,
+					     is_coherent);
+			if (ret < 0)
 				goto bad_mapping;
 
 			dma->dma_address += offset;
@@ -1648,8 +1651,9 @@ static int __iommu_map_sg(struct device *dev, struct scatterlist *sg, int nents,
 		}
 		size += s->length;
 	}
-	if (__map_sg_chunk(dev, start, size, &dma->dma_address, dir, attrs,
-		is_coherent) < 0)
+	ret = __map_sg_chunk(dev, start, size, &dma->dma_address, dir, attrs,
+			     is_coherent);
+	if (ret < 0)
 		goto bad_mapping;
 
 	dma->dma_address += offset;
@@ -1660,7 +1664,9 @@ static int __iommu_map_sg(struct device *dev, struct scatterlist *sg, int nents,
 bad_mapping:
 	for_each_sg(sg, s, count, i)
 		__iommu_remove_mapping(dev, sg_dma_address(s), sg_dma_len(s));
-	return 0;
+	if (ret == -ENOMEM)
+		return ret;
+	return -EINVAL;
 }
 
 /**
diff --git a/arch/arm/mm/flush.c b/arch/arm/mm/flush.c
index 6d89db7895d1..7ff9feea13a6 100644
--- a/arch/arm/mm/flush.c
+++ b/arch/arm/mm/flush.c
@@ -346,39 +346,6 @@ void flush_dcache_page(struct page *page)
 EXPORT_SYMBOL(flush_dcache_page);
 
 /*
- * Ensure cache coherency for the kernel mapping of this page. We can
- * assume that the page is pinned via kmap.
- *
- * If the page only exists in the page cache and there are no user
- * space mappings, this is a no-op since the page was already marked
- * dirty at creation.  Otherwise, we need to flush the dirty kernel
- * cache lines directly.
- */
-void flush_kernel_dcache_page(struct page *page)
-{
-	if (cache_is_vivt() || cache_is_vipt_aliasing()) {
-		struct address_space *mapping;
-
-		mapping = page_mapping_file(page);
-
-		if (!mapping || mapping_mapped(mapping)) {
-			void *addr;
-
-			addr = page_address(page);
-			/*
-			 * kmap_atomic() doesn't set the page virtual
-			 * address for highmem pages, and
-			 * kunmap_atomic() takes care of cache
-			 * flushing already.
-			 */
-			if (!IS_ENABLED(CONFIG_HIGHMEM) || addr)
-				__cpuc_flush_dcache_area(addr, PAGE_SIZE);
-		}
-	}
-}
-EXPORT_SYMBOL(flush_kernel_dcache_page);
-
-/*
  * Flush an anonymous page so that users of get_user_pages()
  * can safely access the data.  The expected sequence is:
  *
diff --git a/arch/arm/mm/nommu.c b/arch/arm/mm/nommu.c
index 8b3d7191e2b8..2658f52903da 100644
--- a/arch/arm/mm/nommu.c
+++ b/arch/arm/mm/nommu.c
@@ -166,12 +166,6 @@ void flush_dcache_page(struct page *page)
 }
 EXPORT_SYMBOL(flush_dcache_page);
 
-void flush_kernel_dcache_page(struct page *page)
-{
-	__cpuc_flush_dcache_area(page_address(page), PAGE_SIZE);
-}
-EXPORT_SYMBOL(flush_kernel_dcache_page);
-
 void copy_to_user_page(struct vm_area_struct *vma, struct page *page,
 		       unsigned long uaddr, void *dst, const void *src,
 		       unsigned long len)
diff --git a/arch/arm/tools/syscall.tbl b/arch/arm/tools/syscall.tbl
index f8a2d5aa17b7..e842209e135d 100644
--- a/arch/arm/tools/syscall.tbl
+++ b/arch/arm/tools/syscall.tbl
@@ -266,7 +266,7 @@
 249	common	lookup_dcookie		sys_lookup_dcookie
 250	common	epoll_create		sys_epoll_create
 251	common	epoll_ctl		sys_epoll_ctl		sys_oabi_epoll_ctl
-252	common	epoll_wait		sys_epoll_wait		sys_oabi_epoll_wait
+252	common	epoll_wait		sys_epoll_wait
 253	common	remap_file_pages	sys_remap_file_pages
 # 254 for set_thread_area
 # 255 for get_thread_area
@@ -460,3 +460,5 @@
 444	common	landlock_create_ruleset		sys_landlock_create_ruleset
 445	common	landlock_add_rule		sys_landlock_add_rule
 446	common	landlock_restrict_self		sys_landlock_restrict_self
+# 447 reserved for memfd_secret
+448	common	process_mrelease		sys_process_mrelease
diff --git a/arch/arm64/Kconfig b/arch/arm64/Kconfig
index 9dc1720a909f..077f2ec4eeb2 100644
--- a/arch/arm64/Kconfig
+++ b/arch/arm64/Kconfig
@@ -220,6 +220,7 @@ config ARM64
 	select SYSCTL_EXCEPTION_TRACE
 	select THREAD_INFO_IN_TASK
 	select HAVE_ARCH_USERFAULTFD_MINOR if USERFAULTFD
+	select TRACE_IRQFLAGS_SUPPORT
 	help
 	  ARM 64-bit (AArch64) Linux support.
 
@@ -287,9 +288,6 @@ config ILLEGAL_POINTER_VALUE
 config LOCKDEP_SUPPORT
 	def_bool y
 
-config TRACE_IRQFLAGS_SUPPORT
-	def_bool y
-
 config GENERIC_BUG
 	def_bool y
 	depends on BUG
diff --git a/arch/arm64/boot/dts/mediatek/mt8183.dtsi b/arch/arm64/boot/dts/mediatek/mt8183.dtsi
index dfb2fbf5a414..409cf827970c 100644
--- a/arch/arm64/boot/dts/mediatek/mt8183.dtsi
+++ b/arch/arm64/boot/dts/mediatek/mt8183.dtsi
@@ -14,7 +14,7 @@
 #include <dt-bindings/reset-controller/mt8183-resets.h>
 #include <dt-bindings/phy/phy.h>
 #include <dt-bindings/thermal/thermal.h>
-#include "mt8183-pinfunc.h"
+#include <dt-bindings/pinctrl/mt8183-pinfunc.h>
 
 / {
 	compatible = "mediatek,mt8183";
diff --git a/arch/arm64/include/asm/compat.h b/arch/arm64/include/asm/compat.h
index 79c1a750e357..eaa6ca062d89 100644
--- a/arch/arm64/include/asm/compat.h
+++ b/arch/arm64/include/asm/compat.h
@@ -107,11 +107,6 @@ struct compat_statfs {
 #define compat_user_stack_pointer() (user_stack_pointer(task_pt_regs(current)))
 #define COMPAT_MINSIGSTKSZ	2048
 
-static inline void __user *arch_compat_alloc_user_space(long len)
-{
-	return (void __user *)compat_user_stack_pointer() - len;
-}
-
 struct compat_ipc64_perm {
 	compat_key_t key;
 	__compat_uid32_t uid;
diff --git a/arch/arm64/include/asm/cpufeature.h b/arch/arm64/include/asm/cpufeature.h
index cdfa2a242e9f..ef6be92b1921 100644
--- a/arch/arm64/include/asm/cpufeature.h
+++ b/arch/arm64/include/asm/cpufeature.h
@@ -602,14 +602,14 @@ static inline bool id_aa64pfr0_32bit_el1(u64 pfr0)
 {
 	u32 val = cpuid_feature_extract_unsigned_field(pfr0, ID_AA64PFR0_EL1_SHIFT);
 
-	return val == ID_AA64PFR0_EL1_32BIT_64BIT;
+	return val == ID_AA64PFR0_ELx_32BIT_64BIT;
 }
 
 static inline bool id_aa64pfr0_32bit_el0(u64 pfr0)
 {
 	u32 val = cpuid_feature_extract_unsigned_field(pfr0, ID_AA64PFR0_EL0_SHIFT);
 
-	return val == ID_AA64PFR0_EL0_32BIT_64BIT;
+	return val == ID_AA64PFR0_ELx_32BIT_64BIT;
 }
 
 static inline bool id_aa64pfr0_sve(u64 pfr0)
@@ -784,13 +784,13 @@ extern int do_emulate_mrs(struct pt_regs *regs, u32 sys_reg, u32 rt);
 static inline u32 id_aa64mmfr0_parange_to_phys_shift(int parange)
 {
 	switch (parange) {
-	case 0: return 32;
-	case 1: return 36;
-	case 2: return 40;
-	case 3: return 42;
-	case 4: return 44;
-	case 5: return 48;
-	case 6: return 52;
+	case ID_AA64MMFR0_PARANGE_32: return 32;
+	case ID_AA64MMFR0_PARANGE_36: return 36;
+	case ID_AA64MMFR0_PARANGE_40: return 40;
+	case ID_AA64MMFR0_PARANGE_42: return 42;
+	case ID_AA64MMFR0_PARANGE_44: return 44;
+	case ID_AA64MMFR0_PARANGE_48: return 48;
+	case ID_AA64MMFR0_PARANGE_52: return 52;
 	/*
 	 * A future PE could use a value unknown to the kernel.
 	 * However, by the "D10.1.4 Principles of the ID scheme
diff --git a/arch/arm64/include/asm/kvm_arm.h b/arch/arm64/include/asm/kvm_arm.h
index d436831dd706..327120c0089f 100644
--- a/arch/arm64/include/asm/kvm_arm.h
+++ b/arch/arm64/include/asm/kvm_arm.h
@@ -12,8 +12,13 @@
 #include <asm/types.h>
 
 /* Hyp Configuration Register (HCR) bits */
+
+#define HCR_TID5	(UL(1) << 58)
+#define HCR_DCT		(UL(1) << 57)
 #define HCR_ATA_SHIFT	56
 #define HCR_ATA		(UL(1) << HCR_ATA_SHIFT)
+#define HCR_AMVOFFEN	(UL(1) << 51)
+#define HCR_FIEN	(UL(1) << 47)
 #define HCR_FWB		(UL(1) << 46)
 #define HCR_API		(UL(1) << 41)
 #define HCR_APK		(UL(1) << 40)
@@ -32,9 +37,9 @@
 #define HCR_TVM		(UL(1) << 26)
 #define HCR_TTLB	(UL(1) << 25)
 #define HCR_TPU		(UL(1) << 24)
-#define HCR_TPC		(UL(1) << 23)
+#define HCR_TPC		(UL(1) << 23) /* HCR_TPCP if FEAT_DPB */
 #define HCR_TSW		(UL(1) << 22)
-#define HCR_TAC		(UL(1) << 21)
+#define HCR_TACR	(UL(1) << 21)
 #define HCR_TIDCP	(UL(1) << 20)
 #define HCR_TSC		(UL(1) << 19)
 #define HCR_TID3	(UL(1) << 18)
@@ -56,12 +61,13 @@
 #define HCR_PTW		(UL(1) << 2)
 #define HCR_SWIO	(UL(1) << 1)
 #define HCR_VM		(UL(1) << 0)
+#define HCR_RES0	((UL(1) << 48) | (UL(1) << 39))
 
 /*
  * The bits we set in HCR:
  * TLOR:	Trap LORegion register accesses
  * RW:		64bit by default, can be overridden for 32bit VMs
- * TAC:		Trap ACTLR
+ * TACR:	Trap ACTLR
  * TSC:		Trap SMC
  * TSW:		Trap cache operations by set/way
  * TWE:		Trap WFE
@@ -76,7 +82,7 @@
  * PTW:		Take a stage2 fault if a stage1 walk steps in device memory
  */
 #define HCR_GUEST_FLAGS (HCR_TSC | HCR_TSW | HCR_TWE | HCR_TWI | HCR_VM | \
-			 HCR_BSU_IS | HCR_FB | HCR_TAC | \
+			 HCR_BSU_IS | HCR_FB | HCR_TACR | \
 			 HCR_AMO | HCR_SWIO | HCR_TIDCP | HCR_RW | HCR_TLOR | \
 			 HCR_FMO | HCR_IMO | HCR_PTW )
 #define HCR_VIRT_EXCP_MASK (HCR_VSE | HCR_VI | HCR_VF)
@@ -275,24 +281,40 @@
 #define CPTR_EL2_TTA	(1 << 20)
 #define CPTR_EL2_TFP	(1 << CPTR_EL2_TFP_SHIFT)
 #define CPTR_EL2_TZ	(1 << 8)
-#define CPTR_EL2_RES1	0x000032ff /* known RES1 bits in CPTR_EL2 */
-#define CPTR_EL2_DEFAULT	CPTR_EL2_RES1
+#define CPTR_NVHE_EL2_RES1	0x000032ff /* known RES1 bits in CPTR_EL2 (nVHE) */
+#define CPTR_EL2_DEFAULT	CPTR_NVHE_EL2_RES1
+#define CPTR_NVHE_EL2_RES0	(GENMASK(63, 32) |	\
+				 GENMASK(29, 21) |	\
+				 GENMASK(19, 14) |	\
+				 BIT(11))
 
 /* Hyp Debug Configuration Register bits */
 #define MDCR_EL2_E2TB_MASK	(UL(0x3))
 #define MDCR_EL2_E2TB_SHIFT	(UL(24))
-#define MDCR_EL2_TTRF		(1 << 19)
-#define MDCR_EL2_TPMS		(1 << 14)
+#define MDCR_EL2_HPMFZS		(UL(1) << 36)
+#define MDCR_EL2_HPMFZO		(UL(1) << 29)
+#define MDCR_EL2_MTPME		(UL(1) << 28)
+#define MDCR_EL2_TDCC		(UL(1) << 27)
+#define MDCR_EL2_HCCD		(UL(1) << 23)
+#define MDCR_EL2_TTRF		(UL(1) << 19)
+#define MDCR_EL2_HPMD		(UL(1) << 17)
+#define MDCR_EL2_TPMS		(UL(1) << 14)
 #define MDCR_EL2_E2PB_MASK	(UL(0x3))
 #define MDCR_EL2_E2PB_SHIFT	(UL(12))
-#define MDCR_EL2_TDRA		(1 << 11)
-#define MDCR_EL2_TDOSA		(1 << 10)
-#define MDCR_EL2_TDA		(1 << 9)
-#define MDCR_EL2_TDE		(1 << 8)
-#define MDCR_EL2_HPME		(1 << 7)
-#define MDCR_EL2_TPM		(1 << 6)
-#define MDCR_EL2_TPMCR		(1 << 5)
-#define MDCR_EL2_HPMN_MASK	(0x1F)
+#define MDCR_EL2_TDRA		(UL(1) << 11)
+#define MDCR_EL2_TDOSA		(UL(1) << 10)
+#define MDCR_EL2_TDA		(UL(1) << 9)
+#define MDCR_EL2_TDE		(UL(1) << 8)
+#define MDCR_EL2_HPME		(UL(1) << 7)
+#define MDCR_EL2_TPM		(UL(1) << 6)
+#define MDCR_EL2_TPMCR		(UL(1) << 5)
+#define MDCR_EL2_HPMN_MASK	(UL(0x1F))
+#define MDCR_EL2_RES0		(GENMASK(63, 37) |	\
+				 GENMASK(35, 30) |	\
+				 GENMASK(25, 24) |	\
+				 GENMASK(22, 20) |	\
+				 BIT(18) |		\
+				 GENMASK(16, 15))
 
 /* For compatibility with fault code shared with 32-bit */
 #define FSC_FAULT	ESR_ELx_FSC_FAULT
diff --git a/arch/arm64/include/asm/kvm_asm.h b/arch/arm64/include/asm/kvm_asm.h
index 9f0bf2109be7..e86045ac43ba 100644
--- a/arch/arm64/include/asm/kvm_asm.h
+++ b/arch/arm64/include/asm/kvm_asm.h
@@ -59,12 +59,11 @@
 #define __KVM_HOST_SMCCC_FUNC___vgic_v3_save_aprs		13
 #define __KVM_HOST_SMCCC_FUNC___vgic_v3_restore_aprs		14
 #define __KVM_HOST_SMCCC_FUNC___pkvm_init			15
-#define __KVM_HOST_SMCCC_FUNC___pkvm_create_mappings		16
+#define __KVM_HOST_SMCCC_FUNC___pkvm_host_share_hyp		16
 #define __KVM_HOST_SMCCC_FUNC___pkvm_create_private_mapping	17
 #define __KVM_HOST_SMCCC_FUNC___pkvm_cpu_set_vector		18
 #define __KVM_HOST_SMCCC_FUNC___pkvm_prot_finalize		19
-#define __KVM_HOST_SMCCC_FUNC___pkvm_mark_hyp			20
-#define __KVM_HOST_SMCCC_FUNC___kvm_adjust_pc			21
+#define __KVM_HOST_SMCCC_FUNC___kvm_adjust_pc			20
 
 #ifndef __ASSEMBLY__
 
@@ -210,7 +209,7 @@ extern u64 __vgic_v3_read_vmcr(void);
 extern void __vgic_v3_write_vmcr(u32 vmcr);
 extern void __vgic_v3_init_lrs(void);
 
-extern u32 __kvm_get_mdcr_el2(void);
+extern u64 __kvm_get_mdcr_el2(void);
 
 #define __KVM_EXTABLE(from, to)						\
 	"	.pushsection	__kvm_ex_table, \"a\"\n"		\
diff --git a/arch/arm64/include/asm/kvm_host.h b/arch/arm64/include/asm/kvm_host.h
index 41911585ae0c..f8be56d5342b 100644
--- a/arch/arm64/include/asm/kvm_host.h
+++ b/arch/arm64/include/asm/kvm_host.h
@@ -66,7 +66,7 @@ DECLARE_STATIC_KEY_FALSE(userspace_irqchip_in_use);
 extern unsigned int kvm_sve_max_vl;
 int kvm_arm_init_sve(void);
 
-int __attribute_const__ kvm_target_cpu(void);
+u32 __attribute_const__ kvm_target_cpu(void);
 int kvm_reset_vcpu(struct kvm_vcpu *vcpu);
 void kvm_arm_vcpu_destroy(struct kvm_vcpu *vcpu);
 
@@ -185,7 +185,6 @@ enum vcpu_sysreg {
 	PMCNTENSET_EL0,	/* Count Enable Set Register */
 	PMINTENSET_EL1,	/* Interrupt Enable Set Register */
 	PMOVSSET_EL0,	/* Overflow Flag Status Set Register */
-	PMSWINC_EL0,	/* Software Increment Register */
 	PMUSERENR_EL0,	/* User Enable Register */
 
 	/* Pointer Authentication Registers in a strict increasing order. */
@@ -287,9 +286,13 @@ struct kvm_vcpu_arch {
 	/* Stage 2 paging state used by the hardware on next switch */
 	struct kvm_s2_mmu *hw_mmu;
 
-	/* HYP configuration */
+	/* Values of trap registers for the guest. */
 	u64 hcr_el2;
-	u32 mdcr_el2;
+	u64 mdcr_el2;
+	u64 cptr_el2;
+
+	/* Values of trap registers for the host before guest entry. */
+	u64 mdcr_el2_host;
 
 	/* Exception Information */
 	struct kvm_vcpu_fault_info fault;
@@ -576,6 +579,7 @@ struct kvm_vcpu_stat {
 	u64 wfi_exit_stat;
 	u64 mmio_exit_user;
 	u64 mmio_exit_kernel;
+	u64 signal_exits;
 	u64 exits;
 };
 
@@ -771,6 +775,11 @@ void kvm_arch_free_vm(struct kvm *kvm);
 
 int kvm_arm_setup_stage2(struct kvm *kvm, unsigned long type);
 
+static inline bool kvm_vm_is_protected(struct kvm *kvm)
+{
+	return false;
+}
+
 int kvm_arm_vcpu_finalize(struct kvm_vcpu *vcpu, int feature);
 bool kvm_arm_vcpu_is_finalized(struct kvm_vcpu *vcpu);
 
diff --git a/arch/arm64/include/asm/kvm_hyp.h b/arch/arm64/include/asm/kvm_hyp.h
index 9d60b3006efc..657d0c94cf82 100644
--- a/arch/arm64/include/asm/kvm_hyp.h
+++ b/arch/arm64/include/asm/kvm_hyp.h
@@ -95,7 +95,7 @@ void __sve_restore_state(void *sve_pffr, u32 *fpsr);
 
 #ifndef __KVM_NVHE_HYPERVISOR__
 void activate_traps_vhe_load(struct kvm_vcpu *vcpu);
-void deactivate_traps_vhe_put(void);
+void deactivate_traps_vhe_put(struct kvm_vcpu *vcpu);
 #endif
 
 u64 __guest_enter(struct kvm_vcpu *vcpu);
diff --git a/arch/arm64/include/asm/kvm_mmu.h b/arch/arm64/include/asm/kvm_mmu.h
index b52c5c4b9a3d..02d378887743 100644
--- a/arch/arm64/include/asm/kvm_mmu.h
+++ b/arch/arm64/include/asm/kvm_mmu.h
@@ -252,6 +252,11 @@ static inline int kvm_write_guest_lock(struct kvm *kvm, gpa_t gpa,
 
 #define kvm_phys_to_vttbr(addr)		phys_to_ttbr(addr)
 
+/*
+ * When this is (directly or indirectly) used on the TLB invalidation
+ * path, we rely on a previously issued DSB so that page table updates
+ * and VMID reads are correctly ordered.
+ */
 static __always_inline u64 kvm_get_vttbr(struct kvm_s2_mmu *mmu)
 {
 	struct kvm_vmid *vmid = &mmu->vmid;
@@ -259,7 +264,7 @@ static __always_inline u64 kvm_get_vttbr(struct kvm_s2_mmu *mmu)
 	u64 cnp = system_supports_cnp() ? VTTBR_CNP_BIT : 0;
 
 	baddr = mmu->pgd_phys;
-	vmid_field = (u64)vmid->vmid << VTTBR_VMID_SHIFT;
+	vmid_field = (u64)READ_ONCE(vmid->vmid) << VTTBR_VMID_SHIFT;
 	return kvm_phys_to_vttbr(baddr) | vmid_field | cnp;
 }
 
@@ -267,9 +272,10 @@ static __always_inline u64 kvm_get_vttbr(struct kvm_s2_mmu *mmu)
  * Must be called from hyp code running at EL2 with an updated VTTBR
  * and interrupts disabled.
  */
-static __always_inline void __load_stage2(struct kvm_s2_mmu *mmu, unsigned long vtcr)
+static __always_inline void __load_stage2(struct kvm_s2_mmu *mmu,
+					  struct kvm_arch *arch)
 {
-	write_sysreg(vtcr, vtcr_el2);
+	write_sysreg(arch->vtcr, vtcr_el2);
 	write_sysreg(kvm_get_vttbr(mmu), vttbr_el2);
 
 	/*
@@ -280,11 +286,6 @@ static __always_inline void __load_stage2(struct kvm_s2_mmu *mmu, unsigned long
 	asm(ALTERNATIVE("nop", "isb", ARM64_WORKAROUND_SPECULATIVE_AT));
 }
 
-static __always_inline void __load_guest_stage2(struct kvm_s2_mmu *mmu)
-{
-	__load_stage2(mmu, kern_hyp_va(mmu->arch)->vtcr);
-}
-
 static inline struct kvm *kvm_s2_mmu_to_kvm(struct kvm_s2_mmu *mmu)
 {
 	return container_of(mmu->arch, struct kvm, arch);
diff --git a/arch/arm64/include/asm/kvm_pgtable.h b/arch/arm64/include/asm/kvm_pgtable.h
index f004c0115d89..027783829584 100644
--- a/arch/arm64/include/asm/kvm_pgtable.h
+++ b/arch/arm64/include/asm/kvm_pgtable.h
@@ -25,6 +25,46 @@ static inline u64 kvm_get_parange(u64 mmfr0)
 
 typedef u64 kvm_pte_t;
 
+#define KVM_PTE_VALID			BIT(0)
+
+#define KVM_PTE_ADDR_MASK		GENMASK(47, PAGE_SHIFT)
+#define KVM_PTE_ADDR_51_48		GENMASK(15, 12)
+
+static inline bool kvm_pte_valid(kvm_pte_t pte)
+{
+	return pte & KVM_PTE_VALID;
+}
+
+static inline u64 kvm_pte_to_phys(kvm_pte_t pte)
+{
+	u64 pa = pte & KVM_PTE_ADDR_MASK;
+
+	if (PAGE_SHIFT == 16)
+		pa |= FIELD_GET(KVM_PTE_ADDR_51_48, pte) << 48;
+
+	return pa;
+}
+
+static inline u64 kvm_granule_shift(u32 level)
+{
+	/* Assumes KVM_PGTABLE_MAX_LEVELS is 4 */
+	return ARM64_HW_PGTABLE_LEVEL_SHIFT(level);
+}
+
+static inline u64 kvm_granule_size(u32 level)
+{
+	return BIT(kvm_granule_shift(level));
+}
+
+static inline bool kvm_level_supports_block_mapping(u32 level)
+{
+	/*
+	 * Reject invalid block mappings and don't bother with 4TB mappings for
+	 * 52-bit PAs.
+	 */
+	return !(level == 0 || (PAGE_SIZE != SZ_4K && level == 1));
+}
+
 /**
  * struct kvm_pgtable_mm_ops - Memory management callbacks.
  * @zalloc_page:		Allocate a single zeroed memory page.
@@ -76,30 +116,15 @@ enum kvm_pgtable_stage2_flags {
 };
 
 /**
- * struct kvm_pgtable - KVM page-table.
- * @ia_bits:		Maximum input address size, in bits.
- * @start_level:	Level at which the page-table walk starts.
- * @pgd:		Pointer to the first top-level entry of the page-table.
- * @mm_ops:		Memory management callbacks.
- * @mmu:		Stage-2 KVM MMU struct. Unused for stage-1 page-tables.
- */
-struct kvm_pgtable {
-	u32					ia_bits;
-	u32					start_level;
-	kvm_pte_t				*pgd;
-	struct kvm_pgtable_mm_ops		*mm_ops;
-
-	/* Stage-2 only */
-	struct kvm_s2_mmu			*mmu;
-	enum kvm_pgtable_stage2_flags		flags;
-};
-
-/**
  * enum kvm_pgtable_prot - Page-table permissions and attributes.
  * @KVM_PGTABLE_PROT_X:		Execute permission.
  * @KVM_PGTABLE_PROT_W:		Write permission.
  * @KVM_PGTABLE_PROT_R:		Read permission.
  * @KVM_PGTABLE_PROT_DEVICE:	Device attributes.
+ * @KVM_PGTABLE_PROT_SW0:	Software bit 0.
+ * @KVM_PGTABLE_PROT_SW1:	Software bit 1.
+ * @KVM_PGTABLE_PROT_SW2:	Software bit 2.
+ * @KVM_PGTABLE_PROT_SW3:	Software bit 3.
  */
 enum kvm_pgtable_prot {
 	KVM_PGTABLE_PROT_X			= BIT(0),
@@ -107,21 +132,48 @@ enum kvm_pgtable_prot {
 	KVM_PGTABLE_PROT_R			= BIT(2),
 
 	KVM_PGTABLE_PROT_DEVICE			= BIT(3),
+
+	KVM_PGTABLE_PROT_SW0			= BIT(55),
+	KVM_PGTABLE_PROT_SW1			= BIT(56),
+	KVM_PGTABLE_PROT_SW2			= BIT(57),
+	KVM_PGTABLE_PROT_SW3			= BIT(58),
 };
 
-#define PAGE_HYP		(KVM_PGTABLE_PROT_R | KVM_PGTABLE_PROT_W)
+#define KVM_PGTABLE_PROT_RW	(KVM_PGTABLE_PROT_R | KVM_PGTABLE_PROT_W)
+#define KVM_PGTABLE_PROT_RWX	(KVM_PGTABLE_PROT_RW | KVM_PGTABLE_PROT_X)
+
+#define PKVM_HOST_MEM_PROT	KVM_PGTABLE_PROT_RWX
+#define PKVM_HOST_MMIO_PROT	KVM_PGTABLE_PROT_RW
+
+#define PAGE_HYP		KVM_PGTABLE_PROT_RW
 #define PAGE_HYP_EXEC		(KVM_PGTABLE_PROT_R | KVM_PGTABLE_PROT_X)
 #define PAGE_HYP_RO		(KVM_PGTABLE_PROT_R)
 #define PAGE_HYP_DEVICE		(PAGE_HYP | KVM_PGTABLE_PROT_DEVICE)
 
+typedef bool (*kvm_pgtable_force_pte_cb_t)(u64 addr, u64 end,
+					   enum kvm_pgtable_prot prot);
+
 /**
- * struct kvm_mem_range - Range of Intermediate Physical Addresses
- * @start:	Start of the range.
- * @end:	End of the range.
+ * struct kvm_pgtable - KVM page-table.
+ * @ia_bits:		Maximum input address size, in bits.
+ * @start_level:	Level at which the page-table walk starts.
+ * @pgd:		Pointer to the first top-level entry of the page-table.
+ * @mm_ops:		Memory management callbacks.
+ * @mmu:		Stage-2 KVM MMU struct. Unused for stage-1 page-tables.
+ * @flags:		Stage-2 page-table flags.
+ * @force_pte_cb:	Function that returns true if page level mappings must
+ *			be used instead of block mappings.
  */
-struct kvm_mem_range {
-	u64 start;
-	u64 end;
+struct kvm_pgtable {
+	u32					ia_bits;
+	u32					start_level;
+	kvm_pte_t				*pgd;
+	struct kvm_pgtable_mm_ops		*mm_ops;
+
+	/* Stage-2 only */
+	struct kvm_s2_mmu			*mmu;
+	enum kvm_pgtable_stage2_flags		flags;
+	kvm_pgtable_force_pte_cb_t		force_pte_cb;
 };
 
 /**
@@ -216,21 +268,24 @@ int kvm_pgtable_hyp_map(struct kvm_pgtable *pgt, u64 addr, u64 size, u64 phys,
 u64 kvm_get_vtcr(u64 mmfr0, u64 mmfr1, u32 phys_shift);
 
 /**
- * kvm_pgtable_stage2_init_flags() - Initialise a guest stage-2 page-table.
+ * __kvm_pgtable_stage2_init() - Initialise a guest stage-2 page-table.
  * @pgt:	Uninitialised page-table structure to initialise.
  * @arch:	Arch-specific KVM structure representing the guest virtual
  *		machine.
  * @mm_ops:	Memory management callbacks.
  * @flags:	Stage-2 configuration flags.
+ * @force_pte_cb: Function that returns true if page level mappings must
+ *		be used instead of block mappings.
  *
  * Return: 0 on success, negative error code on failure.
  */
-int kvm_pgtable_stage2_init_flags(struct kvm_pgtable *pgt, struct kvm_arch *arch,
-				  struct kvm_pgtable_mm_ops *mm_ops,
-				  enum kvm_pgtable_stage2_flags flags);
+int __kvm_pgtable_stage2_init(struct kvm_pgtable *pgt, struct kvm_arch *arch,
+			      struct kvm_pgtable_mm_ops *mm_ops,
+			      enum kvm_pgtable_stage2_flags flags,
+			      kvm_pgtable_force_pte_cb_t force_pte_cb);
 
 #define kvm_pgtable_stage2_init(pgt, arch, mm_ops) \
-	kvm_pgtable_stage2_init_flags(pgt, arch, mm_ops, 0)
+	__kvm_pgtable_stage2_init(pgt, arch, mm_ops, 0, NULL)
 
 /**
  * kvm_pgtable_stage2_destroy() - Destroy an unused guest stage-2 page-table.
@@ -374,7 +429,8 @@ kvm_pte_t kvm_pgtable_stage2_mkold(struct kvm_pgtable *pgt, u64 addr);
  * If there is a valid, leaf page-table entry used to translate @addr, then
  * relax the permissions in that entry according to the read, write and
  * execute permissions specified by @prot. No permissions are removed, and
- * TLB invalidation is performed after updating the entry.
+ * TLB invalidation is performed after updating the entry. Software bits cannot
+ * be set or cleared using kvm_pgtable_stage2_relax_perms().
  *
  * Return: 0 on success, negative error code on failure.
  */
@@ -433,22 +489,42 @@ int kvm_pgtable_walk(struct kvm_pgtable *pgt, u64 addr, u64 size,
 		     struct kvm_pgtable_walker *walker);
 
 /**
- * kvm_pgtable_stage2_find_range() - Find a range of Intermediate Physical
- *				     Addresses with compatible permission
- *				     attributes.
- * @pgt:	Page-table structure initialised by kvm_pgtable_stage2_init*().
- * @addr:	Address that must be covered by the range.
- * @prot:	Protection attributes that the range must be compatible with.
- * @range:	Range structure used to limit the search space at call time and
- *		that will hold the result.
+ * kvm_pgtable_get_leaf() - Walk a page-table and retrieve the leaf entry
+ *			    with its level.
+ * @pgt:	Page-table structure initialised by kvm_pgtable_*_init()
+ *		or a similar initialiser.
+ * @addr:	Input address for the start of the walk.
+ * @ptep:	Pointer to storage for the retrieved PTE.
+ * @level:	Pointer to storage for the level of the retrieved PTE.
+ *
+ * The offset of @addr within a page is ignored.
  *
- * The offset of @addr within a page is ignored. An IPA is compatible with @prot
- * iff its corresponding stage-2 page-table entry has default ownership and, if
- * valid, is mapped with protection attributes identical to @prot.
+ * The walker will walk the page-table entries corresponding to the input
+ * address specified, retrieving the leaf corresponding to this address.
+ * Invalid entries are treated as leaf entries.
  *
  * Return: 0 on success, negative error code on failure.
  */
-int kvm_pgtable_stage2_find_range(struct kvm_pgtable *pgt, u64 addr,
-				  enum kvm_pgtable_prot prot,
-				  struct kvm_mem_range *range);
+int kvm_pgtable_get_leaf(struct kvm_pgtable *pgt, u64 addr,
+			 kvm_pte_t *ptep, u32 *level);
+
+/**
+ * kvm_pgtable_stage2_pte_prot() - Retrieve the protection attributes of a
+ *				   stage-2 Page-Table Entry.
+ * @pte:	Page-table entry
+ *
+ * Return: protection attributes of the page-table entry in the enum
+ *	   kvm_pgtable_prot format.
+ */
+enum kvm_pgtable_prot kvm_pgtable_stage2_pte_prot(kvm_pte_t pte);
+
+/**
+ * kvm_pgtable_hyp_pte_prot() - Retrieve the protection attributes of a stage-1
+ *				Page-Table Entry.
+ * @pte:	Page-table entry
+ *
+ * Return: protection attributes of the page-table entry in the enum
+ *	   kvm_pgtable_prot format.
+ */
+enum kvm_pgtable_prot kvm_pgtable_hyp_pte_prot(kvm_pte_t pte);
 #endif	/* __ARM64_KVM_PGTABLE_H__ */
diff --git a/arch/arm64/include/asm/sysreg.h b/arch/arm64/include/asm/sysreg.h
index f2e06e7c0a31..b268082d67ed 100644
--- a/arch/arm64/include/asm/sysreg.h
+++ b/arch/arm64/include/asm/sysreg.h
@@ -784,14 +784,13 @@
 #define ID_AA64PFR0_AMU			0x1
 #define ID_AA64PFR0_SVE			0x1
 #define ID_AA64PFR0_RAS_V1		0x1
+#define ID_AA64PFR0_RAS_V1P1		0x2
 #define ID_AA64PFR0_FP_NI		0xf
 #define ID_AA64PFR0_FP_SUPPORTED	0x0
 #define ID_AA64PFR0_ASIMD_NI		0xf
 #define ID_AA64PFR0_ASIMD_SUPPORTED	0x0
-#define ID_AA64PFR0_EL1_64BIT_ONLY	0x1
-#define ID_AA64PFR0_EL1_32BIT_64BIT	0x2
-#define ID_AA64PFR0_EL0_64BIT_ONLY	0x1
-#define ID_AA64PFR0_EL0_32BIT_64BIT	0x2
+#define ID_AA64PFR0_ELx_64BIT_ONLY	0x1
+#define ID_AA64PFR0_ELx_32BIT_64BIT	0x2
 
 /* id_aa64pfr1 */
 #define ID_AA64PFR1_MPAMFRAC_SHIFT	16
@@ -847,6 +846,9 @@
 #define ID_AA64MMFR0_ASID_SHIFT		4
 #define ID_AA64MMFR0_PARANGE_SHIFT	0
 
+#define ID_AA64MMFR0_ASID_8		0x0
+#define ID_AA64MMFR0_ASID_16		0x2
+
 #define ID_AA64MMFR0_TGRAN4_NI			0xf
 #define ID_AA64MMFR0_TGRAN4_SUPPORTED_MIN	0x0
 #define ID_AA64MMFR0_TGRAN4_SUPPORTED_MAX	0x7
@@ -857,9 +859,16 @@
 #define ID_AA64MMFR0_TGRAN16_SUPPORTED_MIN	0x1
 #define ID_AA64MMFR0_TGRAN16_SUPPORTED_MAX	0xf
 
+#define ID_AA64MMFR0_PARANGE_32		0x0
+#define ID_AA64MMFR0_PARANGE_36		0x1
+#define ID_AA64MMFR0_PARANGE_40		0x2
+#define ID_AA64MMFR0_PARANGE_42		0x3
+#define ID_AA64MMFR0_PARANGE_44		0x4
 #define ID_AA64MMFR0_PARANGE_48		0x5
 #define ID_AA64MMFR0_PARANGE_52		0x6
 
+#define ARM64_MIN_PARANGE_BITS		32
+
 #define ID_AA64MMFR0_TGRAN_2_SUPPORTED_DEFAULT	0x0
 #define ID_AA64MMFR0_TGRAN_2_SUPPORTED_NONE	0x1
 #define ID_AA64MMFR0_TGRAN_2_SUPPORTED_MIN	0x2
@@ -904,6 +913,7 @@
 #define ID_AA64MMFR2_CNP_SHIFT		0
 
 /* id_aa64dfr0 */
+#define ID_AA64DFR0_MTPMU_SHIFT		48
 #define ID_AA64DFR0_TRBE_SHIFT		44
 #define ID_AA64DFR0_TRACE_FILT_SHIFT	40
 #define ID_AA64DFR0_DOUBLELOCK_SHIFT	36
@@ -1034,14 +1044,17 @@
 #define ID_AA64MMFR0_TGRAN_SHIFT		ID_AA64MMFR0_TGRAN4_SHIFT
 #define ID_AA64MMFR0_TGRAN_SUPPORTED_MIN	ID_AA64MMFR0_TGRAN4_SUPPORTED_MIN
 #define ID_AA64MMFR0_TGRAN_SUPPORTED_MAX	ID_AA64MMFR0_TGRAN4_SUPPORTED_MAX
+#define ID_AA64MMFR0_TGRAN_2_SHIFT		ID_AA64MMFR0_TGRAN4_2_SHIFT
 #elif defined(CONFIG_ARM64_16K_PAGES)
 #define ID_AA64MMFR0_TGRAN_SHIFT		ID_AA64MMFR0_TGRAN16_SHIFT
 #define ID_AA64MMFR0_TGRAN_SUPPORTED_MIN	ID_AA64MMFR0_TGRAN16_SUPPORTED_MIN
 #define ID_AA64MMFR0_TGRAN_SUPPORTED_MAX	ID_AA64MMFR0_TGRAN16_SUPPORTED_MAX
+#define ID_AA64MMFR0_TGRAN_2_SHIFT		ID_AA64MMFR0_TGRAN16_2_SHIFT
 #elif defined(CONFIG_ARM64_64K_PAGES)
 #define ID_AA64MMFR0_TGRAN_SHIFT		ID_AA64MMFR0_TGRAN64_SHIFT
 #define ID_AA64MMFR0_TGRAN_SUPPORTED_MIN	ID_AA64MMFR0_TGRAN64_SUPPORTED_MIN
 #define ID_AA64MMFR0_TGRAN_SUPPORTED_MAX	ID_AA64MMFR0_TGRAN64_SUPPORTED_MAX
+#define ID_AA64MMFR0_TGRAN_2_SHIFT		ID_AA64MMFR0_TGRAN64_2_SHIFT
 #endif
 
 #define MVFR2_FPMISC_SHIFT		4
@@ -1172,6 +1185,11 @@
 #define ICH_VTR_A3V_SHIFT	21
 #define ICH_VTR_A3V_MASK	(1 << ICH_VTR_A3V_SHIFT)
 
+#define ARM64_FEATURE_FIELD_BITS	4
+
+/* Create a mask for the feature bits of the specified feature. */
+#define ARM64_FEATURE_MASK(x)	(GENMASK_ULL(x##_SHIFT + ARM64_FEATURE_FIELD_BITS - 1, x##_SHIFT))
+
 #ifdef __ASSEMBLY__
 
 	.irp	num,0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22,23,24,25,26,27,28,29,30
diff --git a/arch/arm64/include/asm/uaccess.h b/arch/arm64/include/asm/uaccess.h
index b5f08621fa29..190b494e22ab 100644
--- a/arch/arm64/include/asm/uaccess.h
+++ b/arch/arm64/include/asm/uaccess.h
@@ -430,17 +430,6 @@ extern unsigned long __must_check __arch_copy_to_user(void __user *to, const voi
 	__actu_ret;							\
 })
 
-extern unsigned long __must_check __arch_copy_in_user(void __user *to, const void __user *from, unsigned long n);
-#define raw_copy_in_user(to, from, n)					\
-({									\
-	unsigned long __aciu_ret;					\
-	uaccess_ttbr0_enable();						\
-	__aciu_ret = __arch_copy_in_user(__uaccess_mask_ptr(to),	\
-				    __uaccess_mask_ptr(from), (n));	\
-	uaccess_ttbr0_disable();					\
-	__aciu_ret;							\
-})
-
 #define INLINE_COPY_TO_USER
 #define INLINE_COPY_FROM_USER
 
diff --git a/arch/arm64/include/asm/unistd.h b/arch/arm64/include/asm/unistd.h
index 727bfc3be99b..3cb206aea3db 100644
--- a/arch/arm64/include/asm/unistd.h
+++ b/arch/arm64/include/asm/unistd.h
@@ -38,7 +38,7 @@
 #define __ARM_NR_compat_set_tls		(__ARM_NR_COMPAT_BASE + 5)
 #define __ARM_NR_COMPAT_END		(__ARM_NR_COMPAT_BASE + 0x800)
 
-#define __NR_compat_syscalls		447
+#define __NR_compat_syscalls		449
 #endif
 
 #define __ARCH_WANT_SYS_CLONE
diff --git a/arch/arm64/include/asm/unistd32.h b/arch/arm64/include/asm/unistd32.h
index 03d4ca47d253..844f6ae58662 100644
--- a/arch/arm64/include/asm/unistd32.h
+++ b/arch/arm64/include/asm/unistd32.h
@@ -649,11 +649,11 @@ __SYSCALL(__NR_inotify_add_watch, sys_inotify_add_watch)
 #define __NR_inotify_rm_watch 318
 __SYSCALL(__NR_inotify_rm_watch, sys_inotify_rm_watch)
 #define __NR_mbind 319
-__SYSCALL(__NR_mbind, compat_sys_mbind)
+__SYSCALL(__NR_mbind, sys_mbind)
 #define __NR_get_mempolicy 320
-__SYSCALL(__NR_get_mempolicy, compat_sys_get_mempolicy)
+__SYSCALL(__NR_get_mempolicy, sys_get_mempolicy)
 #define __NR_set_mempolicy 321
-__SYSCALL(__NR_set_mempolicy, compat_sys_set_mempolicy)
+__SYSCALL(__NR_set_mempolicy, sys_set_mempolicy)
 #define __NR_openat 322
 __SYSCALL(__NR_openat, compat_sys_openat)
 #define __NR_mkdirat 323
@@ -699,7 +699,7 @@ __SYSCALL(__NR_tee, sys_tee)
 #define __NR_vmsplice 343
 __SYSCALL(__NR_vmsplice, sys_vmsplice)
 #define __NR_move_pages 344
-__SYSCALL(__NR_move_pages, compat_sys_move_pages)
+__SYSCALL(__NR_move_pages, sys_move_pages)
 #define __NR_getcpu 345
 __SYSCALL(__NR_getcpu, sys_getcpu)
 #define __NR_epoll_pwait 346
@@ -811,7 +811,7 @@ __SYSCALL(__NR_rseq, sys_rseq)
 #define __NR_io_pgetevents 399
 __SYSCALL(__NR_io_pgetevents, compat_sys_io_pgetevents)
 #define __NR_migrate_pages 400
-__SYSCALL(__NR_migrate_pages, compat_sys_migrate_pages)
+__SYSCALL(__NR_migrate_pages, sys_migrate_pages)
 #define __NR_kexec_file_load 401
 __SYSCALL(__NR_kexec_file_load, sys_kexec_file_load)
 /* 402 is unused */
@@ -901,6 +901,8 @@ __SYSCALL(__NR_landlock_create_ruleset, sys_landlock_create_ruleset)
 __SYSCALL(__NR_landlock_add_rule, sys_landlock_add_rule)
 #define __NR_landlock_restrict_self 446
 __SYSCALL(__NR_landlock_restrict_self, sys_landlock_restrict_self)
+#define __NR_process_mrelease 448
+__SYSCALL(__NR_process_mrelease, sys_process_mrelease)
 
 /*
  * Please add new compat syscalls above this comment and update
diff --git a/arch/arm64/kernel/cacheinfo.c b/arch/arm64/kernel/cacheinfo.c
index 7fa6828bb488..587543c6c51c 100644
--- a/arch/arm64/kernel/cacheinfo.c
+++ b/arch/arm64/kernel/cacheinfo.c
@@ -43,7 +43,7 @@ static void ci_leaf_init(struct cacheinfo *this_leaf,
 	this_leaf->type = type;
 }
 
-static int __init_cache_level(unsigned int cpu)
+int init_cache_level(unsigned int cpu)
 {
 	unsigned int ctype, level, leaves, fw_level;
 	struct cpu_cacheinfo *this_cpu_ci = get_cpu_cacheinfo(cpu);
@@ -78,7 +78,7 @@ static int __init_cache_level(unsigned int cpu)
 	return 0;
 }
 
-static int __populate_cache_leaves(unsigned int cpu)
+int populate_cache_leaves(unsigned int cpu)
 {
 	unsigned int level, idx;
 	enum cache_type type;
@@ -97,6 +97,3 @@ static int __populate_cache_leaves(unsigned int cpu)
 	}
 	return 0;
 }
-
-DEFINE_SMP_CALL_CACHE_FUNCTION(init_cache_level)
-DEFINE_SMP_CALL_CACHE_FUNCTION(populate_cache_leaves)
diff --git a/arch/arm64/kernel/cpufeature.c b/arch/arm64/kernel/cpufeature.c
index b2770d753ba3..f8a3067d10c6 100644
--- a/arch/arm64/kernel/cpufeature.c
+++ b/arch/arm64/kernel/cpufeature.c
@@ -240,8 +240,8 @@ static const struct arm64_ftr_bits ftr_id_aa64pfr0[] = {
 	S_ARM64_FTR_BITS(FTR_VISIBLE, FTR_STRICT, FTR_LOWER_SAFE, ID_AA64PFR0_FP_SHIFT, 4, ID_AA64PFR0_FP_NI),
 	ARM64_FTR_BITS(FTR_HIDDEN, FTR_NONSTRICT, FTR_LOWER_SAFE, ID_AA64PFR0_EL3_SHIFT, 4, 0),
 	ARM64_FTR_BITS(FTR_HIDDEN, FTR_NONSTRICT, FTR_LOWER_SAFE, ID_AA64PFR0_EL2_SHIFT, 4, 0),
-	ARM64_FTR_BITS(FTR_HIDDEN, FTR_NONSTRICT, FTR_LOWER_SAFE, ID_AA64PFR0_EL1_SHIFT, 4, ID_AA64PFR0_EL1_64BIT_ONLY),
-	ARM64_FTR_BITS(FTR_HIDDEN, FTR_NONSTRICT, FTR_LOWER_SAFE, ID_AA64PFR0_EL0_SHIFT, 4, ID_AA64PFR0_EL0_64BIT_ONLY),
+	ARM64_FTR_BITS(FTR_HIDDEN, FTR_NONSTRICT, FTR_LOWER_SAFE, ID_AA64PFR0_EL1_SHIFT, 4, ID_AA64PFR0_ELx_64BIT_ONLY),
+	ARM64_FTR_BITS(FTR_HIDDEN, FTR_NONSTRICT, FTR_LOWER_SAFE, ID_AA64PFR0_EL0_SHIFT, 4, ID_AA64PFR0_ELx_64BIT_ONLY),
 	ARM64_FTR_END,
 };
 
@@ -1983,7 +1983,7 @@ static const struct arm64_cpu_capabilities arm64_features[] = {
 		.sys_reg = SYS_ID_AA64PFR0_EL1,
 		.sign = FTR_UNSIGNED,
 		.field_pos = ID_AA64PFR0_EL0_SHIFT,
-		.min_field_value = ID_AA64PFR0_EL0_32BIT_64BIT,
+		.min_field_value = ID_AA64PFR0_ELx_32BIT_64BIT,
 	},
 #ifdef CONFIG_KVM
 	{
@@ -1994,7 +1994,7 @@ static const struct arm64_cpu_capabilities arm64_features[] = {
 		.sys_reg = SYS_ID_AA64PFR0_EL1,
 		.sign = FTR_UNSIGNED,
 		.field_pos = ID_AA64PFR0_EL1_SHIFT,
-		.min_field_value = ID_AA64PFR0_EL1_32BIT_64BIT,
+		.min_field_value = ID_AA64PFR0_ELx_32BIT_64BIT,
 	},
 	{
 		.desc = "Protected KVM",
diff --git a/arch/arm64/kernel/pci.c b/arch/arm64/kernel/pci.c
index 1006ed2d7c60..2276689b5411 100644
--- a/arch/arm64/kernel/pci.c
+++ b/arch/arm64/kernel/pci.c
@@ -82,14 +82,29 @@ int acpi_pci_bus_find_domain_nr(struct pci_bus *bus)
 
 int pcibios_root_bridge_prepare(struct pci_host_bridge *bridge)
 {
-	if (!acpi_disabled) {
-		struct pci_config_window *cfg = bridge->bus->sysdata;
-		struct acpi_device *adev = to_acpi_device(cfg->parent);
-		struct device *bus_dev = &bridge->bus->dev;
+	struct pci_config_window *cfg;
+	struct acpi_device *adev;
+	struct device *bus_dev;
 
-		ACPI_COMPANION_SET(&bridge->dev, adev);
-		set_dev_node(bus_dev, acpi_get_node(acpi_device_handle(adev)));
-	}
+	if (acpi_disabled)
+		return 0;
+
+	cfg = bridge->bus->sysdata;
+
+	/*
+	 * On Hyper-V there is no corresponding ACPI device for a root bridge,
+	 * therefore ->parent is set as NULL by the driver. And set 'adev' as
+	 * NULL in this case because there is no proper ACPI device.
+	 */
+	if (!cfg->parent)
+		adev = NULL;
+	else
+		adev = to_acpi_device(cfg->parent);
+
+	bus_dev = &bridge->bus->dev;
+
+	ACPI_COMPANION_SET(&bridge->dev, adev);
+	set_dev_node(bus_dev, acpi_get_node(acpi_device_handle(adev)));
 
 	return 0;
 }
diff --git a/arch/arm64/kernel/process.c b/arch/arm64/kernel/process.c
index 2bd270cd603e..19100fe8f7e4 100644
--- a/arch/arm64/kernel/process.c
+++ b/arch/arm64/kernel/process.c
@@ -6,9 +6,6 @@
  * Copyright (C) 1996-2000 Russell King - Converted to ARM.
  * Copyright (C) 2012 ARM Ltd.
  */
-
-#include <stdarg.h>
-
 #include <linux/compat.h>
 #include <linux/efi.h>
 #include <linux/elf.h>
diff --git a/arch/arm64/kernel/vmlinux.lds.S b/arch/arm64/kernel/vmlinux.lds.S
index 709d2c433c5e..f6b1a88245db 100644
--- a/arch/arm64/kernel/vmlinux.lds.S
+++ b/arch/arm64/kernel/vmlinux.lds.S
@@ -181,6 +181,8 @@ SECTIONS
 	/* everything from this point to __init_begin will be marked RO NX */
 	RO_DATA(PAGE_SIZE)
 
+	HYPERVISOR_DATA_SECTIONS
+
 	idmap_pg_dir = .;
 	. += IDMAP_DIR_SIZE;
 	idmap_pg_end = .;
@@ -260,8 +262,6 @@ SECTIONS
 	_sdata = .;
 	RW_DATA(L1_CACHE_BYTES, PAGE_SIZE, THREAD_ALIGN)
 
-	HYPERVISOR_DATA_SECTIONS
-
 	/*
 	 * Data written with the MMU off but read with the MMU on requires
 	 * cache lines to be invalidated, discarding up to a Cache Writeback
diff --git a/arch/arm64/kvm/Kconfig b/arch/arm64/kvm/Kconfig
index a4eba0908bfa..d7eec0b43744 100644
--- a/arch/arm64/kvm/Kconfig
+++ b/arch/arm64/kvm/Kconfig
@@ -26,6 +26,7 @@ menuconfig KVM
 	select HAVE_KVM_ARCH_TLB_FLUSH_ALL
 	select KVM_MMIO
 	select KVM_GENERIC_DIRTYLOG_READ_PROTECT
+	select KVM_XFER_TO_GUEST_WORK
 	select SRCU
 	select KVM_VFIO
 	select HAVE_KVM_EVENTFD
@@ -46,6 +47,15 @@ if KVM
 
 source "virt/kvm/Kconfig"
 
+config NVHE_EL2_DEBUG
+	bool "Debug mode for non-VHE EL2 object"
+	help
+	  Say Y here to enable the debug mode for the non-VHE KVM EL2 object.
+	  Failure reports will BUG() in the hypervisor. This is intended for
+	  local EL2 hypervisor development.
+
+	  If unsure, say N.
+
 endif # KVM
 
 endif # VIRTUALIZATION
diff --git a/arch/arm64/kvm/arm.c b/arch/arm64/kvm/arm.c
index 0ca72f5cda41..fe102cd2e518 100644
--- a/arch/arm64/kvm/arm.c
+++ b/arch/arm64/kvm/arm.c
@@ -6,6 +6,7 @@
 
 #include <linux/bug.h>
 #include <linux/cpu_pm.h>
+#include <linux/entry-kvm.h>
 #include <linux/errno.h>
 #include <linux/err.h>
 #include <linux/kvm_host.h>
@@ -15,6 +16,7 @@
 #include <linux/fs.h>
 #include <linux/mman.h>
 #include <linux/sched.h>
+#include <linux/kmemleak.h>
 #include <linux/kvm.h>
 #include <linux/kvm_irqfd.h>
 #include <linux/irqbypass.h>
@@ -42,10 +44,6 @@
 #include <kvm/arm_pmu.h>
 #include <kvm/arm_psci.h>
 
-#ifdef REQUIRES_VIRT
-__asm__(".arch_extension	virt");
-#endif
-
 static enum kvm_mode kvm_mode = KVM_MODE_DEFAULT;
 DEFINE_STATIC_KEY_FALSE(kvm_protected_mode_initialized);
 
@@ -575,7 +573,7 @@ static void update_vmid(struct kvm_vmid *vmid)
 		kvm_call_hyp(__kvm_flush_vm_context);
 	}
 
-	vmid->vmid = kvm_next_vmid;
+	WRITE_ONCE(vmid->vmid, kvm_next_vmid);
 	kvm_next_vmid++;
 	kvm_next_vmid &= (1 << kvm_get_vmid_bits()) - 1;
 
@@ -719,6 +717,45 @@ static bool vcpu_mode_is_bad_32bit(struct kvm_vcpu *vcpu)
 }
 
 /**
+ * kvm_vcpu_exit_request - returns true if the VCPU should *not* enter the guest
+ * @vcpu:	The VCPU pointer
+ * @ret:	Pointer to write optional return code
+ *
+ * Returns: true if the VCPU needs to return to a preemptible + interruptible
+ *	    and skip guest entry.
+ *
+ * This function disambiguates between two different types of exits: exits to a
+ * preemptible + interruptible kernel context and exits to userspace. For an
+ * exit to userspace, this function will write the return code to ret and return
+ * true. For an exit to preemptible + interruptible kernel context (i.e. check
+ * for pending work and re-enter), return true without writing to ret.
+ */
+static bool kvm_vcpu_exit_request(struct kvm_vcpu *vcpu, int *ret)
+{
+	struct kvm_run *run = vcpu->run;
+
+	/*
+	 * If we're using a userspace irqchip, then check if we need
+	 * to tell a userspace irqchip about timer or PMU level
+	 * changes and if so, exit to userspace (the actual level
+	 * state gets updated in kvm_timer_update_run and
+	 * kvm_pmu_update_run below).
+	 */
+	if (static_branch_unlikely(&userspace_irqchip_in_use)) {
+		if (kvm_timer_should_notify_user(vcpu) ||
+		    kvm_pmu_should_notify_user(vcpu)) {
+			*ret = -EINTR;
+			run->exit_reason = KVM_EXIT_INTR;
+			return true;
+		}
+	}
+
+	return kvm_request_pending(vcpu) ||
+			need_new_vmid_gen(&vcpu->arch.hw_mmu->vmid) ||
+			xfer_to_guest_mode_work_pending();
+}
+
+/**
  * kvm_arch_vcpu_ioctl_run - the main VCPU run function to execute guest code
  * @vcpu:	The VCPU pointer
  *
@@ -761,7 +798,9 @@ int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu)
 		/*
 		 * Check conditions before entering the guest
 		 */
-		cond_resched();
+		ret = xfer_to_guest_mode_handle_work(vcpu);
+		if (!ret)
+			ret = 1;
 
 		update_vmid(&vcpu->arch.hw_mmu->vmid);
 
@@ -781,30 +820,6 @@ int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu)
 		kvm_vgic_flush_hwstate(vcpu);
 
 		/*
-		 * Exit if we have a signal pending so that we can deliver the
-		 * signal to user space.
-		 */
-		if (signal_pending(current)) {
-			ret = -EINTR;
-			run->exit_reason = KVM_EXIT_INTR;
-		}
-
-		/*
-		 * If we're using a userspace irqchip, then check if we need
-		 * to tell a userspace irqchip about timer or PMU level
-		 * changes and if so, exit to userspace (the actual level
-		 * state gets updated in kvm_timer_update_run and
-		 * kvm_pmu_update_run below).
-		 */
-		if (static_branch_unlikely(&userspace_irqchip_in_use)) {
-			if (kvm_timer_should_notify_user(vcpu) ||
-			    kvm_pmu_should_notify_user(vcpu)) {
-				ret = -EINTR;
-				run->exit_reason = KVM_EXIT_INTR;
-			}
-		}
-
-		/*
 		 * Ensure we set mode to IN_GUEST_MODE after we disable
 		 * interrupts and before the final VCPU requests check.
 		 * See the comment in kvm_vcpu_exiting_guest_mode() and
@@ -812,8 +827,7 @@ int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu)
 		 */
 		smp_store_mb(vcpu->mode, IN_GUEST_MODE);
 
-		if (ret <= 0 || need_new_vmid_gen(&vcpu->arch.hw_mmu->vmid) ||
-		    kvm_request_pending(vcpu)) {
+		if (ret <= 0 || kvm_vcpu_exit_request(vcpu, &ret)) {
 			vcpu->mode = OUTSIDE_GUEST_MODE;
 			isb(); /* Ensure work in x_flush_hwstate is committed */
 			kvm_pmu_sync_hwstate(vcpu);
@@ -1039,7 +1053,7 @@ static int kvm_vcpu_set_target(struct kvm_vcpu *vcpu,
 			       const struct kvm_vcpu_init *init)
 {
 	unsigned int i, ret;
-	int phys_target = kvm_target_cpu();
+	u32 phys_target = kvm_target_cpu();
 
 	if (init->target != phys_target)
 		return -EINVAL;
@@ -1108,6 +1122,7 @@ static int kvm_arch_vcpu_ioctl_vcpu_init(struct kvm_vcpu *vcpu,
 	}
 
 	vcpu_reset_hcr(vcpu);
+	vcpu->arch.cptr_el2 = CPTR_EL2_DEFAULT;
 
 	/*
 	 * Handle the "start in power-off" case.
@@ -1219,6 +1234,14 @@ long kvm_arch_vcpu_ioctl(struct file *filp,
 		if (copy_from_user(&reg, argp, sizeof(reg)))
 			break;
 
+		/*
+		 * We could owe a reset due to PSCI. Handle the pending reset
+		 * here to ensure userspace register accesses are ordered after
+		 * the reset.
+		 */
+		if (kvm_check_request(KVM_REQ_VCPU_RESET, vcpu))
+			kvm_reset_vcpu(vcpu);
+
 		if (ioctl == KVM_SET_ONE_REG)
 			r = kvm_arm_set_reg(vcpu, &reg);
 		else
@@ -1700,11 +1723,6 @@ static bool init_psci_relay(void)
 	return true;
 }
 
-static int init_common_resources(void)
-{
-	return kvm_set_ipa_limit();
-}
-
 static int init_subsystems(void)
 {
 	int err = 0;
@@ -1958,56 +1976,17 @@ static void _kvm_host_prot_finalize(void *discard)
 	WARN_ON(kvm_call_hyp_nvhe(__pkvm_prot_finalize));
 }
 
-static inline int pkvm_mark_hyp(phys_addr_t start, phys_addr_t end)
-{
-	return kvm_call_hyp_nvhe(__pkvm_mark_hyp, start, end);
-}
-
-#define pkvm_mark_hyp_section(__section)		\
-	pkvm_mark_hyp(__pa_symbol(__section##_start),	\
-			__pa_symbol(__section##_end))
-
 static int finalize_hyp_mode(void)
 {
-	int cpu, ret;
-
 	if (!is_protected_kvm_enabled())
 		return 0;
 
-	ret = pkvm_mark_hyp_section(__hyp_idmap_text);
-	if (ret)
-		return ret;
-
-	ret = pkvm_mark_hyp_section(__hyp_text);
-	if (ret)
-		return ret;
-
-	ret = pkvm_mark_hyp_section(__hyp_rodata);
-	if (ret)
-		return ret;
-
-	ret = pkvm_mark_hyp_section(__hyp_bss);
-	if (ret)
-		return ret;
-
-	ret = pkvm_mark_hyp(hyp_mem_base, hyp_mem_base + hyp_mem_size);
-	if (ret)
-		return ret;
-
-	for_each_possible_cpu(cpu) {
-		phys_addr_t start = virt_to_phys((void *)kvm_arm_hyp_percpu_base[cpu]);
-		phys_addr_t end = start + (PAGE_SIZE << nvhe_percpu_order());
-
-		ret = pkvm_mark_hyp(start, end);
-		if (ret)
-			return ret;
-
-		start = virt_to_phys((void *)per_cpu(kvm_arm_hyp_stack_page, cpu));
-		end = start + PAGE_SIZE;
-		ret = pkvm_mark_hyp(start, end);
-		if (ret)
-			return ret;
-	}
+	/*
+	 * Exclude HYP BSS from kmemleak so that it doesn't get peeked
+	 * at, which would end badly once the section is inaccessible.
+	 * None of other sections should ever be introspected.
+	 */
+	kmemleak_free_part(__hyp_bss_start, __hyp_bss_end - __hyp_bss_start);
 
 	/*
 	 * Flip the static key upfront as that may no longer be possible
@@ -2019,11 +1998,6 @@ static int finalize_hyp_mode(void)
 	return 0;
 }
 
-static void check_kvm_target_cpu(void *ret)
-{
-	*(int *)ret = kvm_target_cpu();
-}
-
 struct kvm_vcpu *kvm_mpidr_to_vcpu(struct kvm *kvm, unsigned long mpidr)
 {
 	struct kvm_vcpu *vcpu;
@@ -2083,7 +2057,6 @@ void kvm_arch_irq_bypass_start(struct irq_bypass_consumer *cons)
 int kvm_arch_init(void *opaque)
 {
 	int err;
-	int ret, cpu;
 	bool in_hyp_mode;
 
 	if (!is_hyp_mode_available()) {
@@ -2098,15 +2071,7 @@ int kvm_arch_init(void *opaque)
 		kvm_info("Guests without required CPU erratum workarounds can deadlock system!\n" \
 			 "Only trusted guests should be used on this system.\n");
 
-	for_each_online_cpu(cpu) {
-		smp_call_function_single(cpu, check_kvm_target_cpu, &ret, 1);
-		if (ret < 0) {
-			kvm_err("Error, CPU %d not supported!\n", cpu);
-			return -ENODEV;
-		}
-	}
-
-	err = init_common_resources();
+	err = kvm_set_ipa_limit();
 	if (err)
 		return err;
 
diff --git a/arch/arm64/kvm/debug.c b/arch/arm64/kvm/debug.c
index d5e79d7ee6e9..db9361338b2a 100644
--- a/arch/arm64/kvm/debug.c
+++ b/arch/arm64/kvm/debug.c
@@ -21,7 +21,7 @@
 				DBG_MDSCR_KDE | \
 				DBG_MDSCR_MDE)
 
-static DEFINE_PER_CPU(u32, mdcr_el2);
+static DEFINE_PER_CPU(u64, mdcr_el2);
 
 /**
  * save/restore_guest_debug_regs
diff --git a/arch/arm64/kvm/guest.c b/arch/arm64/kvm/guest.c
index 1dfb83578277..5ce26bedf23c 100644
--- a/arch/arm64/kvm/guest.c
+++ b/arch/arm64/kvm/guest.c
@@ -31,8 +31,6 @@
 const struct _kvm_stats_desc kvm_vm_stats_desc[] = {
 	KVM_GENERIC_VM_STATS()
 };
-static_assert(ARRAY_SIZE(kvm_vm_stats_desc) ==
-		sizeof(struct kvm_vm_stat) / sizeof(u64));
 
 const struct kvm_stats_header kvm_vm_stats_header = {
 	.name_size = KVM_STATS_NAME_SIZE,
@@ -50,10 +48,9 @@ const struct _kvm_stats_desc kvm_vcpu_stats_desc[] = {
 	STATS_DESC_COUNTER(VCPU, wfi_exit_stat),
 	STATS_DESC_COUNTER(VCPU, mmio_exit_user),
 	STATS_DESC_COUNTER(VCPU, mmio_exit_kernel),
+	STATS_DESC_COUNTER(VCPU, signal_exits),
 	STATS_DESC_COUNTER(VCPU, exits)
 };
-static_assert(ARRAY_SIZE(kvm_vcpu_stats_desc) ==
-		sizeof(struct kvm_vcpu_stat) / sizeof(u64));
 
 const struct kvm_stats_header kvm_vcpu_stats_header = {
 	.name_size = KVM_STATS_NAME_SIZE,
@@ -842,7 +839,7 @@ int __kvm_arm_vcpu_set_events(struct kvm_vcpu *vcpu,
 	return 0;
 }
 
-int __attribute_const__ kvm_target_cpu(void)
+u32 __attribute_const__ kvm_target_cpu(void)
 {
 	unsigned long implementor = read_cpuid_implementor();
 	unsigned long part_number = read_cpuid_part_number();
@@ -874,7 +871,7 @@ int __attribute_const__ kvm_target_cpu(void)
 
 int kvm_vcpu_preferred_target(struct kvm_vcpu_init *init)
 {
-	int target = kvm_target_cpu();
+	u32 target = kvm_target_cpu();
 
 	if (target < 0)
 		return -ENODEV;
diff --git a/arch/arm64/kvm/handle_exit.c b/arch/arm64/kvm/handle_exit.c
index 6f48336b1d86..275a27368a04 100644
--- a/arch/arm64/kvm/handle_exit.c
+++ b/arch/arm64/kvm/handle_exit.c
@@ -113,34 +113,20 @@ static int kvm_handle_wfx(struct kvm_vcpu *vcpu)
  * guest and host are using the same debug facilities it will be up to
  * userspace to re-inject the correct exception for guest delivery.
  *
- * @return: 0 (while setting vcpu->run->exit_reason), -1 for error
+ * @return: 0 (while setting vcpu->run->exit_reason)
  */
 static int kvm_handle_guest_debug(struct kvm_vcpu *vcpu)
 {
 	struct kvm_run *run = vcpu->run;
 	u32 esr = kvm_vcpu_get_esr(vcpu);
-	int ret = 0;
 
 	run->exit_reason = KVM_EXIT_DEBUG;
 	run->debug.arch.hsr = esr;
 
-	switch (ESR_ELx_EC(esr)) {
-	case ESR_ELx_EC_WATCHPT_LOW:
+	if (ESR_ELx_EC(esr) == ESR_ELx_EC_WATCHPT_LOW)
 		run->debug.arch.far = vcpu->arch.fault.far_el2;
-		fallthrough;
-	case ESR_ELx_EC_SOFTSTP_LOW:
-	case ESR_ELx_EC_BREAKPT_LOW:
-	case ESR_ELx_EC_BKPT32:
-	case ESR_ELx_EC_BRK64:
-		break;
-	default:
-		kvm_err("%s: un-handled case esr: %#08x\n",
-			__func__, (unsigned int) esr);
-		ret = -1;
-		break;
-	}
 
-	return ret;
+	return 0;
 }
 
 static int kvm_handle_unknown_ec(struct kvm_vcpu *vcpu)
@@ -292,11 +278,12 @@ void handle_exit_early(struct kvm_vcpu *vcpu, int exception_index)
 		kvm_handle_guest_serror(vcpu, kvm_vcpu_get_esr(vcpu));
 }
 
-void __noreturn __cold nvhe_hyp_panic_handler(u64 esr, u64 spsr, u64 elr,
+void __noreturn __cold nvhe_hyp_panic_handler(u64 esr, u64 spsr,
+					      u64 elr_virt, u64 elr_phys,
 					      u64 par, uintptr_t vcpu,
 					      u64 far, u64 hpfar) {
-	u64 elr_in_kimg = __phys_to_kimg(__hyp_pa(elr));
-	u64 hyp_offset = elr_in_kimg - kaslr_offset() - elr;
+	u64 elr_in_kimg = __phys_to_kimg(elr_phys);
+	u64 hyp_offset = elr_in_kimg - kaslr_offset() - elr_virt;
 	u64 mode = spsr & PSR_MODE_MASK;
 
 	/*
@@ -309,20 +296,24 @@ void __noreturn __cold nvhe_hyp_panic_handler(u64 esr, u64 spsr, u64 elr,
 		kvm_err("Invalid host exception to nVHE hyp!\n");
 	} else if (ESR_ELx_EC(esr) == ESR_ELx_EC_BRK64 &&
 		   (esr & ESR_ELx_BRK64_ISS_COMMENT_MASK) == BUG_BRK_IMM) {
-		struct bug_entry *bug = find_bug(elr_in_kimg);
 		const char *file = NULL;
 		unsigned int line = 0;
 
 		/* All hyp bugs, including warnings, are treated as fatal. */
-		if (bug)
-			bug_get_file_line(bug, &file, &line);
+		if (!is_protected_kvm_enabled() ||
+		    IS_ENABLED(CONFIG_NVHE_EL2_DEBUG)) {
+			struct bug_entry *bug = find_bug(elr_in_kimg);
+
+			if (bug)
+				bug_get_file_line(bug, &file, &line);
+		}
 
 		if (file)
 			kvm_err("nVHE hyp BUG at: %s:%u!\n", file, line);
 		else
-			kvm_err("nVHE hyp BUG at: %016llx!\n", elr + hyp_offset);
+			kvm_err("nVHE hyp BUG at: %016llx!\n", elr_virt + hyp_offset);
 	} else {
-		kvm_err("nVHE hyp panic at: %016llx!\n", elr + hyp_offset);
+		kvm_err("nVHE hyp panic at: %016llx!\n", elr_virt + hyp_offset);
 	}
 
 	/*
@@ -334,5 +325,5 @@ void __noreturn __cold nvhe_hyp_panic_handler(u64 esr, u64 spsr, u64 elr,
 	kvm_err("Hyp Offset: 0x%llx\n", hyp_offset);
 
 	panic("HYP panic:\nPS:%08llx PC:%016llx ESR:%08llx\nFAR:%016llx HPFAR:%016llx PAR:%016llx\nVCPU:%016lx\n",
-	      spsr, elr, esr, far, hpfar, par, vcpu);
+	      spsr, elr_virt, esr, far, hpfar, par, vcpu);
 }
diff --git a/arch/arm64/kvm/hyp/include/hyp/switch.h b/arch/arm64/kvm/hyp/include/hyp/switch.h
index e4a2f295a394..a0e78a6027be 100644
--- a/arch/arm64/kvm/hyp/include/hyp/switch.h
+++ b/arch/arm64/kvm/hyp/include/hyp/switch.h
@@ -92,11 +92,15 @@ static inline void __activate_traps_common(struct kvm_vcpu *vcpu)
 		write_sysreg(0, pmselr_el0);
 		write_sysreg(ARMV8_PMU_USERENR_MASK, pmuserenr_el0);
 	}
+
+	vcpu->arch.mdcr_el2_host = read_sysreg(mdcr_el2);
 	write_sysreg(vcpu->arch.mdcr_el2, mdcr_el2);
 }
 
-static inline void __deactivate_traps_common(void)
+static inline void __deactivate_traps_common(struct kvm_vcpu *vcpu)
 {
+	write_sysreg(vcpu->arch.mdcr_el2_host, mdcr_el2);
+
 	write_sysreg(0, hstr_el2);
 	if (kvm_arm_support_pmu_v3())
 		write_sysreg(0, pmuserenr_el0);
diff --git a/arch/arm64/kvm/hyp/include/nvhe/mem_protect.h b/arch/arm64/kvm/hyp/include/nvhe/mem_protect.h
index 9c227d87c36d..b58c910babaf 100644
--- a/arch/arm64/kvm/hyp/include/nvhe/mem_protect.h
+++ b/arch/arm64/kvm/hyp/include/nvhe/mem_protect.h
@@ -12,6 +12,32 @@
 #include <asm/virt.h>
 #include <nvhe/spinlock.h>
 
+/*
+ * SW bits 0-1 are reserved to track the memory ownership state of each page:
+ *   00: The page is owned exclusively by the page-table owner.
+ *   01: The page is owned by the page-table owner, but is shared
+ *       with another entity.
+ *   10: The page is shared with, but not owned by the page-table owner.
+ *   11: Reserved for future use (lending).
+ */
+enum pkvm_page_state {
+	PKVM_PAGE_OWNED			= 0ULL,
+	PKVM_PAGE_SHARED_OWNED		= KVM_PGTABLE_PROT_SW0,
+	PKVM_PAGE_SHARED_BORROWED	= KVM_PGTABLE_PROT_SW1,
+};
+
+#define PKVM_PAGE_STATE_PROT_MASK	(KVM_PGTABLE_PROT_SW0 | KVM_PGTABLE_PROT_SW1)
+static inline enum kvm_pgtable_prot pkvm_mkstate(enum kvm_pgtable_prot prot,
+						 enum pkvm_page_state state)
+{
+	return (prot & ~PKVM_PAGE_STATE_PROT_MASK) | state;
+}
+
+static inline enum pkvm_page_state pkvm_getstate(enum kvm_pgtable_prot prot)
+{
+	return prot & PKVM_PAGE_STATE_PROT_MASK;
+}
+
 struct host_kvm {
 	struct kvm_arch arch;
 	struct kvm_pgtable pgt;
@@ -20,16 +46,21 @@ struct host_kvm {
 };
 extern struct host_kvm host_kvm;
 
+extern const u8 pkvm_hyp_id;
+
 int __pkvm_prot_finalize(void);
-int __pkvm_mark_hyp(phys_addr_t start, phys_addr_t end);
+int __pkvm_host_share_hyp(u64 pfn);
 
+bool addr_is_memory(phys_addr_t phys);
+int host_stage2_idmap_locked(phys_addr_t addr, u64 size, enum kvm_pgtable_prot prot);
+int host_stage2_set_owner_locked(phys_addr_t addr, u64 size, u8 owner_id);
 int kvm_host_prepare_stage2(void *pgt_pool_base);
 void handle_host_mem_abort(struct kvm_cpu_context *host_ctxt);
 
 static __always_inline void __load_host_stage2(void)
 {
 	if (static_branch_likely(&kvm_protected_mode_initialized))
-		__load_stage2(&host_kvm.arch.mmu, host_kvm.arch.vtcr);
+		__load_stage2(&host_kvm.arch.mmu, &host_kvm.arch);
 	else
 		write_sysreg(0, vttbr_el2);
 }
diff --git a/arch/arm64/kvm/hyp/include/nvhe/mm.h b/arch/arm64/kvm/hyp/include/nvhe/mm.h
index 8ec3a5a7744b..c9a8f535212e 100644
--- a/arch/arm64/kvm/hyp/include/nvhe/mm.h
+++ b/arch/arm64/kvm/hyp/include/nvhe/mm.h
@@ -23,8 +23,7 @@ int hyp_map_vectors(void);
 int hyp_back_vmemmap(phys_addr_t phys, unsigned long size, phys_addr_t back);
 int pkvm_cpu_set_vector(enum arm64_hyp_spectre_vector slot);
 int pkvm_create_mappings(void *from, void *to, enum kvm_pgtable_prot prot);
-int __pkvm_create_mappings(unsigned long start, unsigned long size,
-			   unsigned long phys, enum kvm_pgtable_prot prot);
+int pkvm_create_mappings_locked(void *from, void *to, enum kvm_pgtable_prot prot);
 unsigned long __pkvm_create_private_mapping(phys_addr_t phys, size_t size,
 					    enum kvm_pgtable_prot prot);
 
diff --git a/arch/arm64/kvm/hyp/include/nvhe/spinlock.h b/arch/arm64/kvm/hyp/include/nvhe/spinlock.h
index 76b537f8d1c6..4652fd04bdbe 100644
--- a/arch/arm64/kvm/hyp/include/nvhe/spinlock.h
+++ b/arch/arm64/kvm/hyp/include/nvhe/spinlock.h
@@ -15,6 +15,7 @@
 
 #include <asm/alternative.h>
 #include <asm/lse.h>
+#include <asm/rwonce.h>
 
 typedef union hyp_spinlock {
 	u32	__val;
@@ -89,4 +90,28 @@ static inline void hyp_spin_unlock(hyp_spinlock_t *lock)
 	: "memory");
 }
 
+static inline bool hyp_spin_is_locked(hyp_spinlock_t *lock)
+{
+	hyp_spinlock_t lockval = READ_ONCE(*lock);
+
+	return lockval.owner != lockval.next;
+}
+
+#ifdef CONFIG_NVHE_EL2_DEBUG
+static inline void hyp_assert_lock_held(hyp_spinlock_t *lock)
+{
+	/*
+	 * The __pkvm_init() path accesses protected data-structures without
+	 * holding locks as the other CPUs are guaranteed to not enter EL2
+	 * concurrently at this point in time. The point by which EL2 is
+	 * initialized on all CPUs is reflected in the pkvm static key, so
+	 * wait until it is set before checking the lock state.
+	 */
+	if (static_branch_likely(&kvm_protected_mode_initialized))
+		BUG_ON(!hyp_spin_is_locked(lock));
+}
+#else
+static inline void hyp_assert_lock_held(hyp_spinlock_t *lock) { }
+#endif
+
 #endif /* __ARM64_KVM_NVHE_SPINLOCK_H__ */
diff --git a/arch/arm64/kvm/hyp/nvhe/debug-sr.c b/arch/arm64/kvm/hyp/nvhe/debug-sr.c
index 7d3f25868cae..df361d839902 100644
--- a/arch/arm64/kvm/hyp/nvhe/debug-sr.c
+++ b/arch/arm64/kvm/hyp/nvhe/debug-sr.c
@@ -109,7 +109,7 @@ void __debug_switch_to_host(struct kvm_vcpu *vcpu)
 	__debug_switch_to_host_common(vcpu);
 }
 
-u32 __kvm_get_mdcr_el2(void)
+u64 __kvm_get_mdcr_el2(void)
 {
 	return read_sysreg(mdcr_el2);
 }
diff --git a/arch/arm64/kvm/hyp/nvhe/host.S b/arch/arm64/kvm/hyp/nvhe/host.S
index 2b23400e0fb3..4b652ffb591d 100644
--- a/arch/arm64/kvm/hyp/nvhe/host.S
+++ b/arch/arm64/kvm/hyp/nvhe/host.S
@@ -7,6 +7,7 @@
 #include <linux/linkage.h>
 
 #include <asm/assembler.h>
+#include <asm/kvm_arm.h>
 #include <asm/kvm_asm.h>
 #include <asm/kvm_mmu.h>
 
@@ -85,12 +86,24 @@ SYM_FUNC_START(__hyp_do_panic)
 
 	mov	x29, x0
 
+#ifdef CONFIG_NVHE_EL2_DEBUG
+	/* Ensure host stage-2 is disabled */
+	mrs	x0, hcr_el2
+	bic	x0, x0, #HCR_VM
+	msr	hcr_el2, x0
+	isb
+	tlbi	vmalls12e1
+	dsb	nsh
+#endif
+
 	/* Load the panic arguments into x0-7 */
 	mrs	x0, esr_el2
-	get_vcpu_ptr x4, x5
-	mrs	x5, far_el2
-	mrs	x6, hpfar_el2
-	mov	x7, xzr			// Unused argument
+	mov	x4, x3
+	mov	x3, x2
+	hyp_pa	x3, x6
+	get_vcpu_ptr x5, x6
+	mrs	x6, far_el2
+	mrs	x7, hpfar_el2
 
 	/* Enter the host, conditionally restoring the host context. */
 	cbz	x29, __host_enter_without_restoring
diff --git a/arch/arm64/kvm/hyp/nvhe/hyp-main.c b/arch/arm64/kvm/hyp/nvhe/hyp-main.c
index 1632f001f4ed..2da6aa8da868 100644
--- a/arch/arm64/kvm/hyp/nvhe/hyp-main.c
+++ b/arch/arm64/kvm/hyp/nvhe/hyp-main.c
@@ -140,14 +140,11 @@ static void handle___pkvm_cpu_set_vector(struct kvm_cpu_context *host_ctxt)
 	cpu_reg(host_ctxt, 1) = pkvm_cpu_set_vector(slot);
 }
 
-static void handle___pkvm_create_mappings(struct kvm_cpu_context *host_ctxt)
+static void handle___pkvm_host_share_hyp(struct kvm_cpu_context *host_ctxt)
 {
-	DECLARE_REG(unsigned long, start, host_ctxt, 1);
-	DECLARE_REG(unsigned long, size, host_ctxt, 2);
-	DECLARE_REG(unsigned long, phys, host_ctxt, 3);
-	DECLARE_REG(enum kvm_pgtable_prot, prot, host_ctxt, 4);
+	DECLARE_REG(u64, pfn, host_ctxt, 1);
 
-	cpu_reg(host_ctxt, 1) = __pkvm_create_mappings(start, size, phys, prot);
+	cpu_reg(host_ctxt, 1) = __pkvm_host_share_hyp(pfn);
 }
 
 static void handle___pkvm_create_private_mapping(struct kvm_cpu_context *host_ctxt)
@@ -163,14 +160,6 @@ static void handle___pkvm_prot_finalize(struct kvm_cpu_context *host_ctxt)
 {
 	cpu_reg(host_ctxt, 1) = __pkvm_prot_finalize();
 }
-
-static void handle___pkvm_mark_hyp(struct kvm_cpu_context *host_ctxt)
-{
-	DECLARE_REG(phys_addr_t, start, host_ctxt, 1);
-	DECLARE_REG(phys_addr_t, end, host_ctxt, 2);
-
-	cpu_reg(host_ctxt, 1) = __pkvm_mark_hyp(start, end);
-}
 typedef void (*hcall_t)(struct kvm_cpu_context *);
 
 #define HANDLE_FUNC(x)	[__KVM_HOST_SMCCC_FUNC_##x] = (hcall_t)handle_##x
@@ -193,10 +182,9 @@ static const hcall_t host_hcall[] = {
 	HANDLE_FUNC(__vgic_v3_restore_aprs),
 	HANDLE_FUNC(__pkvm_init),
 	HANDLE_FUNC(__pkvm_cpu_set_vector),
-	HANDLE_FUNC(__pkvm_create_mappings),
+	HANDLE_FUNC(__pkvm_host_share_hyp),
 	HANDLE_FUNC(__pkvm_create_private_mapping),
 	HANDLE_FUNC(__pkvm_prot_finalize),
-	HANDLE_FUNC(__pkvm_mark_hyp),
 };
 
 static void handle_host_hcall(struct kvm_cpu_context *host_ctxt)
diff --git a/arch/arm64/kvm/hyp/nvhe/mem_protect.c b/arch/arm64/kvm/hyp/nvhe/mem_protect.c
index a6ce991b1467..bacd493a4eac 100644
--- a/arch/arm64/kvm/hyp/nvhe/mem_protect.c
+++ b/arch/arm64/kvm/hyp/nvhe/mem_protect.c
@@ -31,7 +31,7 @@ static struct hyp_pool host_s2_pool;
 u64 id_aa64mmfr0_el1_sys_val;
 u64 id_aa64mmfr1_el1_sys_val;
 
-static const u8 pkvm_hyp_id = 1;
+const u8 pkvm_hyp_id = 1;
 
 static void *host_s2_zalloc_pages_exact(size_t size)
 {
@@ -89,6 +89,8 @@ static void prepare_host_vtcr(void)
 					  id_aa64mmfr1_el1_sys_val, phys_shift);
 }
 
+static bool host_stage2_force_pte_cb(u64 addr, u64 end, enum kvm_pgtable_prot prot);
+
 int kvm_host_prepare_stage2(void *pgt_pool_base)
 {
 	struct kvm_s2_mmu *mmu = &host_kvm.arch.mmu;
@@ -101,16 +103,17 @@ int kvm_host_prepare_stage2(void *pgt_pool_base)
 	if (ret)
 		return ret;
 
-	ret = kvm_pgtable_stage2_init_flags(&host_kvm.pgt, &host_kvm.arch,
-					    &host_kvm.mm_ops, KVM_HOST_S2_FLAGS);
+	ret = __kvm_pgtable_stage2_init(&host_kvm.pgt, &host_kvm.arch,
+					&host_kvm.mm_ops, KVM_HOST_S2_FLAGS,
+					host_stage2_force_pte_cb);
 	if (ret)
 		return ret;
 
 	mmu->pgd_phys = __hyp_pa(host_kvm.pgt.pgd);
 	mmu->arch = &host_kvm.arch;
 	mmu->pgt = &host_kvm.pgt;
-	mmu->vmid.vmid_gen = 0;
-	mmu->vmid.vmid = 0;
+	WRITE_ONCE(mmu->vmid.vmid_gen, 0);
+	WRITE_ONCE(mmu->vmid.vmid, 0);
 
 	return 0;
 }
@@ -126,7 +129,7 @@ int __pkvm_prot_finalize(void)
 	kvm_flush_dcache_to_poc(params, sizeof(*params));
 
 	write_sysreg(params->hcr_el2, hcr_el2);
-	__load_stage2(&host_kvm.arch.mmu, host_kvm.arch.vtcr);
+	__load_stage2(&host_kvm.arch.mmu, &host_kvm.arch);
 
 	/*
 	 * Make sure to have an ISB before the TLB maintenance below but only
@@ -159,6 +162,11 @@ static int host_stage2_unmap_dev_all(void)
 	return kvm_pgtable_stage2_unmap(pgt, addr, BIT(pgt->ia_bits) - addr);
 }
 
+struct kvm_mem_range {
+	u64 start;
+	u64 end;
+};
+
 static bool find_mem_range(phys_addr_t addr, struct kvm_mem_range *range)
 {
 	int cur, left = 0, right = hyp_memblock_nr;
@@ -189,16 +197,26 @@ static bool find_mem_range(phys_addr_t addr, struct kvm_mem_range *range)
 	return false;
 }
 
+bool addr_is_memory(phys_addr_t phys)
+{
+	struct kvm_mem_range range;
+
+	return find_mem_range(phys, &range);
+}
+
+static bool is_in_mem_range(u64 addr, struct kvm_mem_range *range)
+{
+	return range->start <= addr && addr < range->end;
+}
+
 static bool range_is_memory(u64 start, u64 end)
 {
-	struct kvm_mem_range r1, r2;
+	struct kvm_mem_range r;
 
-	if (!find_mem_range(start, &r1) || !find_mem_range(end - 1, &r2))
-		return false;
-	if (r1.start != r2.start)
+	if (!find_mem_range(start, &r))
 		return false;
 
-	return true;
+	return is_in_mem_range(end - 1, &r);
 }
 
 static inline int __host_stage2_idmap(u64 start, u64 end,
@@ -208,60 +226,208 @@ static inline int __host_stage2_idmap(u64 start, u64 end,
 				      prot, &host_s2_pool);
 }
 
+/*
+ * The pool has been provided with enough pages to cover all of memory with
+ * page granularity, but it is difficult to know how much of the MMIO range
+ * we will need to cover upfront, so we may need to 'recycle' the pages if we
+ * run out.
+ */
+#define host_stage2_try(fn, ...)					\
+	({								\
+		int __ret;						\
+		hyp_assert_lock_held(&host_kvm.lock);			\
+		__ret = fn(__VA_ARGS__);				\
+		if (__ret == -ENOMEM) {					\
+			__ret = host_stage2_unmap_dev_all();		\
+			if (!__ret)					\
+				__ret = fn(__VA_ARGS__);		\
+		}							\
+		__ret;							\
+	 })
+
+static inline bool range_included(struct kvm_mem_range *child,
+				  struct kvm_mem_range *parent)
+{
+	return parent->start <= child->start && child->end <= parent->end;
+}
+
+static int host_stage2_adjust_range(u64 addr, struct kvm_mem_range *range)
+{
+	struct kvm_mem_range cur;
+	kvm_pte_t pte;
+	u32 level;
+	int ret;
+
+	hyp_assert_lock_held(&host_kvm.lock);
+	ret = kvm_pgtable_get_leaf(&host_kvm.pgt, addr, &pte, &level);
+	if (ret)
+		return ret;
+
+	if (kvm_pte_valid(pte))
+		return -EAGAIN;
+
+	if (pte)
+		return -EPERM;
+
+	do {
+		u64 granule = kvm_granule_size(level);
+		cur.start = ALIGN_DOWN(addr, granule);
+		cur.end = cur.start + granule;
+		level++;
+	} while ((level < KVM_PGTABLE_MAX_LEVELS) &&
+			!(kvm_level_supports_block_mapping(level) &&
+			  range_included(&cur, range)));
+
+	*range = cur;
+
+	return 0;
+}
+
+int host_stage2_idmap_locked(phys_addr_t addr, u64 size,
+			     enum kvm_pgtable_prot prot)
+{
+	hyp_assert_lock_held(&host_kvm.lock);
+
+	return host_stage2_try(__host_stage2_idmap, addr, addr + size, prot);
+}
+
+int host_stage2_set_owner_locked(phys_addr_t addr, u64 size, u8 owner_id)
+{
+	hyp_assert_lock_held(&host_kvm.lock);
+
+	return host_stage2_try(kvm_pgtable_stage2_set_owner, &host_kvm.pgt,
+			       addr, size, &host_s2_pool, owner_id);
+}
+
+static bool host_stage2_force_pte_cb(u64 addr, u64 end, enum kvm_pgtable_prot prot)
+{
+	/*
+	 * Block mappings must be used with care in the host stage-2 as a
+	 * kvm_pgtable_stage2_map() operation targeting a page in the range of
+	 * an existing block will delete the block under the assumption that
+	 * mappings in the rest of the block range can always be rebuilt lazily.
+	 * That assumption is correct for the host stage-2 with RWX mappings
+	 * targeting memory or RW mappings targeting MMIO ranges (see
+	 * host_stage2_idmap() below which implements some of the host memory
+	 * abort logic). However, this is not safe for any other mappings where
+	 * the host stage-2 page-table is in fact the only place where this
+	 * state is stored. In all those cases, it is safer to use page-level
+	 * mappings, hence avoiding to lose the state because of side-effects in
+	 * kvm_pgtable_stage2_map().
+	 */
+	if (range_is_memory(addr, end))
+		return prot != PKVM_HOST_MEM_PROT;
+	else
+		return prot != PKVM_HOST_MMIO_PROT;
+}
+
 static int host_stage2_idmap(u64 addr)
 {
-	enum kvm_pgtable_prot prot = KVM_PGTABLE_PROT_R | KVM_PGTABLE_PROT_W;
 	struct kvm_mem_range range;
 	bool is_memory = find_mem_range(addr, &range);
+	enum kvm_pgtable_prot prot;
 	int ret;
 
-	if (is_memory)
-		prot |= KVM_PGTABLE_PROT_X;
+	prot = is_memory ? PKVM_HOST_MEM_PROT : PKVM_HOST_MMIO_PROT;
 
 	hyp_spin_lock(&host_kvm.lock);
-	ret = kvm_pgtable_stage2_find_range(&host_kvm.pgt, addr, prot, &range);
+	ret = host_stage2_adjust_range(addr, &range);
 	if (ret)
 		goto unlock;
 
-	ret = __host_stage2_idmap(range.start, range.end, prot);
-	if (ret != -ENOMEM)
+	ret = host_stage2_idmap_locked(range.start, range.end - range.start, prot);
+unlock:
+	hyp_spin_unlock(&host_kvm.lock);
+
+	return ret;
+}
+
+static inline bool check_prot(enum kvm_pgtable_prot prot,
+			      enum kvm_pgtable_prot required,
+			      enum kvm_pgtable_prot denied)
+{
+	return (prot & (required | denied)) == required;
+}
+
+int __pkvm_host_share_hyp(u64 pfn)
+{
+	phys_addr_t addr = hyp_pfn_to_phys(pfn);
+	enum kvm_pgtable_prot prot, cur;
+	void *virt = __hyp_va(addr);
+	enum pkvm_page_state state;
+	kvm_pte_t pte;
+	int ret;
+
+	if (!addr_is_memory(addr))
+		return -EINVAL;
+
+	hyp_spin_lock(&host_kvm.lock);
+	hyp_spin_lock(&pkvm_pgd_lock);
+
+	ret = kvm_pgtable_get_leaf(&host_kvm.pgt, addr, &pte, NULL);
+	if (ret)
 		goto unlock;
+	if (!pte)
+		goto map_shared;
 
 	/*
-	 * The pool has been provided with enough pages to cover all of memory
-	 * with page granularity, but it is difficult to know how much of the
-	 * MMIO range we will need to cover upfront, so we may need to 'recycle'
-	 * the pages if we run out.
+	 * Check attributes in the host stage-2 PTE. We need the page to be:
+	 *  - mapped RWX as we're sharing memory;
+	 *  - not borrowed, as that implies absence of ownership.
+	 * Otherwise, we can't let it got through
 	 */
-	ret = host_stage2_unmap_dev_all();
-	if (ret)
+	cur = kvm_pgtable_stage2_pte_prot(pte);
+	prot = pkvm_mkstate(0, PKVM_PAGE_SHARED_BORROWED);
+	if (!check_prot(cur, PKVM_HOST_MEM_PROT, prot)) {
+		ret = -EPERM;
 		goto unlock;
+	}
 
-	ret = __host_stage2_idmap(range.start, range.end, prot);
+	state = pkvm_getstate(cur);
+	if (state == PKVM_PAGE_OWNED)
+		goto map_shared;
 
-unlock:
-	hyp_spin_unlock(&host_kvm.lock);
+	/*
+	 * Tolerate double-sharing the same page, but this requires
+	 * cross-checking the hypervisor stage-1.
+	 */
+	if (state != PKVM_PAGE_SHARED_OWNED) {
+		ret = -EPERM;
+		goto unlock;
+	}
 
-	return ret;
-}
+	ret = kvm_pgtable_get_leaf(&pkvm_pgtable, (u64)virt, &pte, NULL);
+	if (ret)
+		goto unlock;
 
-int __pkvm_mark_hyp(phys_addr_t start, phys_addr_t end)
-{
-	int ret;
+	/*
+	 * If the page has been shared with the hypervisor, it must be
+	 * already mapped as SHARED_BORROWED in its stage-1.
+	 */
+	cur = kvm_pgtable_hyp_pte_prot(pte);
+	prot = pkvm_mkstate(PAGE_HYP, PKVM_PAGE_SHARED_BORROWED);
+	if (!check_prot(cur, prot, ~prot))
+		ret = -EPERM;
+	goto unlock;
 
+map_shared:
 	/*
-	 * host_stage2_unmap_dev_all() currently relies on MMIO mappings being
-	 * non-persistent, so don't allow changing page ownership in MMIO range.
+	 * If the page is not yet shared, adjust mappings in both page-tables
+	 * while both locks are held.
 	 */
-	if (!range_is_memory(start, end))
-		return -EINVAL;
+	prot = pkvm_mkstate(PAGE_HYP, PKVM_PAGE_SHARED_BORROWED);
+	ret = pkvm_create_mappings_locked(virt, virt + PAGE_SIZE, prot);
+	BUG_ON(ret);
 
-	hyp_spin_lock(&host_kvm.lock);
-	ret = kvm_pgtable_stage2_set_owner(&host_kvm.pgt, start, end - start,
-					   &host_s2_pool, pkvm_hyp_id);
+	prot = pkvm_mkstate(PKVM_HOST_MEM_PROT, PKVM_PAGE_SHARED_OWNED);
+	ret = host_stage2_idmap_locked(addr, PAGE_SIZE, prot);
+	BUG_ON(ret);
+
+unlock:
+	hyp_spin_unlock(&pkvm_pgd_lock);
 	hyp_spin_unlock(&host_kvm.lock);
 
-	return ret != -EAGAIN ? ret : 0;
+	return ret;
 }
 
 void handle_host_mem_abort(struct kvm_cpu_context *host_ctxt)
diff --git a/arch/arm64/kvm/hyp/nvhe/mm.c b/arch/arm64/kvm/hyp/nvhe/mm.c
index a8efdf0f9003..2fabeceb889a 100644
--- a/arch/arm64/kvm/hyp/nvhe/mm.c
+++ b/arch/arm64/kvm/hyp/nvhe/mm.c
@@ -23,8 +23,8 @@ u64 __io_map_base;
 struct memblock_region hyp_memory[HYP_MEMBLOCK_REGIONS];
 unsigned int hyp_memblock_nr;
 
-int __pkvm_create_mappings(unsigned long start, unsigned long size,
-			  unsigned long phys, enum kvm_pgtable_prot prot)
+static int __pkvm_create_mappings(unsigned long start, unsigned long size,
+				  unsigned long phys, enum kvm_pgtable_prot prot)
 {
 	int err;
 
@@ -67,13 +67,15 @@ out:
 	return addr;
 }
 
-int pkvm_create_mappings(void *from, void *to, enum kvm_pgtable_prot prot)
+int pkvm_create_mappings_locked(void *from, void *to, enum kvm_pgtable_prot prot)
 {
 	unsigned long start = (unsigned long)from;
 	unsigned long end = (unsigned long)to;
 	unsigned long virt_addr;
 	phys_addr_t phys;
 
+	hyp_assert_lock_held(&pkvm_pgd_lock);
+
 	start = start & PAGE_MASK;
 	end = PAGE_ALIGN(end);
 
@@ -81,7 +83,8 @@ int pkvm_create_mappings(void *from, void *to, enum kvm_pgtable_prot prot)
 		int err;
 
 		phys = hyp_virt_to_phys((void *)virt_addr);
-		err = __pkvm_create_mappings(virt_addr, PAGE_SIZE, phys, prot);
+		err = kvm_pgtable_hyp_map(&pkvm_pgtable, virt_addr, PAGE_SIZE,
+					  phys, prot);
 		if (err)
 			return err;
 	}
@@ -89,6 +92,17 @@ int pkvm_create_mappings(void *from, void *to, enum kvm_pgtable_prot prot)
 	return 0;
 }
 
+int pkvm_create_mappings(void *from, void *to, enum kvm_pgtable_prot prot)
+{
+	int ret;
+
+	hyp_spin_lock(&pkvm_pgd_lock);
+	ret = pkvm_create_mappings_locked(from, to, prot);
+	hyp_spin_unlock(&pkvm_pgd_lock);
+
+	return ret;
+}
+
 int hyp_back_vmemmap(phys_addr_t phys, unsigned long size, phys_addr_t back)
 {
 	unsigned long start, end;
diff --git a/arch/arm64/kvm/hyp/nvhe/setup.c b/arch/arm64/kvm/hyp/nvhe/setup.c
index 0b574d106519..57c27846320f 100644
--- a/arch/arm64/kvm/hyp/nvhe/setup.c
+++ b/arch/arm64/kvm/hyp/nvhe/setup.c
@@ -58,6 +58,7 @@ static int recreate_hyp_mappings(phys_addr_t phys, unsigned long size,
 {
 	void *start, *end, *virt = hyp_phys_to_virt(phys);
 	unsigned long pgt_size = hyp_s1_pgtable_pages() << PAGE_SHIFT;
+	enum kvm_pgtable_prot prot;
 	int ret, i;
 
 	/* Recreate the hyp page-table using the early page allocator */
@@ -83,10 +84,6 @@ static int recreate_hyp_mappings(phys_addr_t phys, unsigned long size,
 	if (ret)
 		return ret;
 
-	ret = pkvm_create_mappings(__start_rodata, __end_rodata, PAGE_HYP_RO);
-	if (ret)
-		return ret;
-
 	ret = pkvm_create_mappings(__hyp_rodata_start, __hyp_rodata_end, PAGE_HYP_RO);
 	if (ret)
 		return ret;
@@ -95,10 +92,6 @@ static int recreate_hyp_mappings(phys_addr_t phys, unsigned long size,
 	if (ret)
 		return ret;
 
-	ret = pkvm_create_mappings(__hyp_bss_end, __bss_stop, PAGE_HYP_RO);
-	if (ret)
-		return ret;
-
 	ret = pkvm_create_mappings(virt, virt + size, PAGE_HYP);
 	if (ret)
 		return ret;
@@ -117,6 +110,24 @@ static int recreate_hyp_mappings(phys_addr_t phys, unsigned long size,
 			return ret;
 	}
 
+	/*
+	 * Map the host's .bss and .rodata sections RO in the hypervisor, but
+	 * transfer the ownership from the host to the hypervisor itself to
+	 * make sure it can't be donated or shared with another entity.
+	 *
+	 * The ownership transition requires matching changes in the host
+	 * stage-2. This will be done later (see finalize_host_mappings()) once
+	 * the hyp_vmemmap is addressable.
+	 */
+	prot = pkvm_mkstate(PAGE_HYP_RO, PKVM_PAGE_SHARED_OWNED);
+	ret = pkvm_create_mappings(__start_rodata, __end_rodata, prot);
+	if (ret)
+		return ret;
+
+	ret = pkvm_create_mappings(__hyp_bss_end, __bss_stop, prot);
+	if (ret)
+		return ret;
+
 	return 0;
 }
 
@@ -148,6 +159,57 @@ static void hpool_put_page(void *addr)
 	hyp_put_page(&hpool, addr);
 }
 
+static int finalize_host_mappings_walker(u64 addr, u64 end, u32 level,
+					 kvm_pte_t *ptep,
+					 enum kvm_pgtable_walk_flags flag,
+					 void * const arg)
+{
+	enum kvm_pgtable_prot prot;
+	enum pkvm_page_state state;
+	kvm_pte_t pte = *ptep;
+	phys_addr_t phys;
+
+	if (!kvm_pte_valid(pte))
+		return 0;
+
+	if (level != (KVM_PGTABLE_MAX_LEVELS - 1))
+		return -EINVAL;
+
+	phys = kvm_pte_to_phys(pte);
+	if (!addr_is_memory(phys))
+		return 0;
+
+	/*
+	 * Adjust the host stage-2 mappings to match the ownership attributes
+	 * configured in the hypervisor stage-1.
+	 */
+	state = pkvm_getstate(kvm_pgtable_hyp_pte_prot(pte));
+	switch (state) {
+	case PKVM_PAGE_OWNED:
+		return host_stage2_set_owner_locked(phys, PAGE_SIZE, pkvm_hyp_id);
+	case PKVM_PAGE_SHARED_OWNED:
+		prot = pkvm_mkstate(PKVM_HOST_MEM_PROT, PKVM_PAGE_SHARED_BORROWED);
+		break;
+	case PKVM_PAGE_SHARED_BORROWED:
+		prot = pkvm_mkstate(PKVM_HOST_MEM_PROT, PKVM_PAGE_SHARED_OWNED);
+		break;
+	default:
+		return -EINVAL;
+	}
+
+	return host_stage2_idmap_locked(phys, PAGE_SIZE, prot);
+}
+
+static int finalize_host_mappings(void)
+{
+	struct kvm_pgtable_walker walker = {
+		.cb	= finalize_host_mappings_walker,
+		.flags	= KVM_PGTABLE_WALK_LEAF,
+	};
+
+	return kvm_pgtable_walk(&pkvm_pgtable, 0, BIT(pkvm_pgtable.ia_bits), &walker);
+}
+
 void __noreturn __pkvm_init_finalise(void)
 {
 	struct kvm_host_data *host_data = this_cpu_ptr(&kvm_host_data);
@@ -167,6 +229,10 @@ void __noreturn __pkvm_init_finalise(void)
 	if (ret)
 		goto out;
 
+	ret = finalize_host_mappings();
+	if (ret)
+		goto out;
+
 	pkvm_pgtable_mm_ops = (struct kvm_pgtable_mm_ops) {
 		.zalloc_page = hyp_zalloc_hyp_page,
 		.phys_to_virt = hyp_phys_to_virt,
diff --git a/arch/arm64/kvm/hyp/nvhe/switch.c b/arch/arm64/kvm/hyp/nvhe/switch.c
index f7af9688c1f7..a34b01cc8ab9 100644
--- a/arch/arm64/kvm/hyp/nvhe/switch.c
+++ b/arch/arm64/kvm/hyp/nvhe/switch.c
@@ -41,7 +41,7 @@ static void __activate_traps(struct kvm_vcpu *vcpu)
 	___activate_traps(vcpu);
 	__activate_traps_common(vcpu);
 
-	val = CPTR_EL2_DEFAULT;
+	val = vcpu->arch.cptr_el2;
 	val |= CPTR_EL2_TTA | CPTR_EL2_TAM;
 	if (!update_fp_enabled(vcpu)) {
 		val |= CPTR_EL2_TFP | CPTR_EL2_TZ;
@@ -69,12 +69,10 @@ static void __activate_traps(struct kvm_vcpu *vcpu)
 static void __deactivate_traps(struct kvm_vcpu *vcpu)
 {
 	extern char __kvm_hyp_host_vector[];
-	u64 mdcr_el2, cptr;
+	u64 cptr;
 
 	___deactivate_traps(vcpu);
 
-	mdcr_el2 = read_sysreg(mdcr_el2);
-
 	if (cpus_have_final_cap(ARM64_WORKAROUND_SPECULATIVE_AT)) {
 		u64 val;
 
@@ -92,13 +90,8 @@ static void __deactivate_traps(struct kvm_vcpu *vcpu)
 		isb();
 	}
 
-	__deactivate_traps_common();
-
-	mdcr_el2 &= MDCR_EL2_HPMN_MASK;
-	mdcr_el2 |= MDCR_EL2_E2PB_MASK << MDCR_EL2_E2PB_SHIFT;
-	mdcr_el2 |= MDCR_EL2_E2TB_MASK << MDCR_EL2_E2TB_SHIFT;
+	__deactivate_traps_common(vcpu);
 
-	write_sysreg(mdcr_el2, mdcr_el2);
 	write_sysreg(this_cpu_ptr(&kvm_init_params)->hcr_el2, hcr_el2);
 
 	cptr = CPTR_EL2_DEFAULT;
@@ -170,6 +163,7 @@ int __kvm_vcpu_run(struct kvm_vcpu *vcpu)
 {
 	struct kvm_cpu_context *host_ctxt;
 	struct kvm_cpu_context *guest_ctxt;
+	struct kvm_s2_mmu *mmu;
 	bool pmu_switch_needed;
 	u64 exit_code;
 
@@ -213,7 +207,8 @@ int __kvm_vcpu_run(struct kvm_vcpu *vcpu)
 	__sysreg32_restore_state(vcpu);
 	__sysreg_restore_state_nvhe(guest_ctxt);
 
-	__load_guest_stage2(kern_hyp_va(vcpu->arch.hw_mmu));
+	mmu = kern_hyp_va(vcpu->arch.hw_mmu);
+	__load_stage2(mmu, kern_hyp_va(mmu->arch));
 	__activate_traps(vcpu);
 
 	__hyp_vgic_restore_state(vcpu);
diff --git a/arch/arm64/kvm/hyp/nvhe/tlb.c b/arch/arm64/kvm/hyp/nvhe/tlb.c
index 38ed0f6f2703..d296d617f589 100644
--- a/arch/arm64/kvm/hyp/nvhe/tlb.c
+++ b/arch/arm64/kvm/hyp/nvhe/tlb.c
@@ -34,12 +34,12 @@ static void __tlb_switch_to_guest(struct kvm_s2_mmu *mmu,
 	}
 
 	/*
-	 * __load_guest_stage2() includes an ISB only when the AT
+	 * __load_stage2() includes an ISB only when the AT
 	 * workaround is applied. Take care of the opposite condition,
 	 * ensuring that we always have an ISB, but not two ISBs back
 	 * to back.
 	 */
-	__load_guest_stage2(mmu);
+	__load_stage2(mmu, kern_hyp_va(mmu->arch));
 	asm(ALTERNATIVE("isb", "nop", ARM64_WORKAROUND_SPECULATIVE_AT));
 }
 
diff --git a/arch/arm64/kvm/hyp/pgtable.c b/arch/arm64/kvm/hyp/pgtable.c
index 05321f4165e3..f8ceebe4982e 100644
--- a/arch/arm64/kvm/hyp/pgtable.c
+++ b/arch/arm64/kvm/hyp/pgtable.c
@@ -11,16 +11,12 @@
 #include <asm/kvm_pgtable.h>
 #include <asm/stage2_pgtable.h>
 
-#define KVM_PTE_VALID			BIT(0)
 
 #define KVM_PTE_TYPE			BIT(1)
 #define KVM_PTE_TYPE_BLOCK		0
 #define KVM_PTE_TYPE_PAGE		1
 #define KVM_PTE_TYPE_TABLE		1
 
-#define KVM_PTE_ADDR_MASK		GENMASK(47, PAGE_SHIFT)
-#define KVM_PTE_ADDR_51_48		GENMASK(15, 12)
-
 #define KVM_PTE_LEAF_ATTR_LO		GENMASK(11, 2)
 
 #define KVM_PTE_LEAF_ATTR_LO_S1_ATTRIDX	GENMASK(4, 2)
@@ -40,6 +36,8 @@
 
 #define KVM_PTE_LEAF_ATTR_HI		GENMASK(63, 51)
 
+#define KVM_PTE_LEAF_ATTR_HI_SW		GENMASK(58, 55)
+
 #define KVM_PTE_LEAF_ATTR_HI_S1_XN	BIT(54)
 
 #define KVM_PTE_LEAF_ATTR_HI_S2_XN	BIT(54)
@@ -48,9 +46,7 @@
 					 KVM_PTE_LEAF_ATTR_LO_S2_S2AP_W | \
 					 KVM_PTE_LEAF_ATTR_HI_S2_XN)
 
-#define KVM_PTE_LEAF_ATTR_S2_IGNORED	GENMASK(58, 55)
-
-#define KVM_INVALID_PTE_OWNER_MASK	GENMASK(63, 56)
+#define KVM_INVALID_PTE_OWNER_MASK	GENMASK(9, 2)
 #define KVM_MAX_OWNER_ID		1
 
 struct kvm_pgtable_walk_data {
@@ -61,17 +57,6 @@ struct kvm_pgtable_walk_data {
 	u64				end;
 };
 
-static u64 kvm_granule_shift(u32 level)
-{
-	/* Assumes KVM_PGTABLE_MAX_LEVELS is 4 */
-	return ARM64_HW_PGTABLE_LEVEL_SHIFT(level);
-}
-
-static u64 kvm_granule_size(u32 level)
-{
-	return BIT(kvm_granule_shift(level));
-}
-
 #define KVM_PHYS_INVALID (-1ULL)
 
 static bool kvm_phys_is_valid(u64 phys)
@@ -79,15 +64,6 @@ static bool kvm_phys_is_valid(u64 phys)
 	return phys < BIT(id_aa64mmfr0_parange_to_phys_shift(ID_AA64MMFR0_PARANGE_MAX));
 }
 
-static bool kvm_level_supports_block_mapping(u32 level)
-{
-	/*
-	 * Reject invalid block mappings and don't bother with 4TB mappings for
-	 * 52-bit PAs.
-	 */
-	return !(level == 0 || (PAGE_SIZE != SZ_4K && level == 1));
-}
-
 static bool kvm_block_mapping_supported(u64 addr, u64 end, u64 phys, u32 level)
 {
 	u64 granule = kvm_granule_size(level);
@@ -135,11 +111,6 @@ static u32 kvm_pgd_pages(u32 ia_bits, u32 start_level)
 	return __kvm_pgd_page_idx(&pgt, -1ULL) + 1;
 }
 
-static bool kvm_pte_valid(kvm_pte_t pte)
-{
-	return pte & KVM_PTE_VALID;
-}
-
 static bool kvm_pte_table(kvm_pte_t pte, u32 level)
 {
 	if (level == KVM_PGTABLE_MAX_LEVELS - 1)
@@ -151,16 +122,6 @@ static bool kvm_pte_table(kvm_pte_t pte, u32 level)
 	return FIELD_GET(KVM_PTE_TYPE, pte) == KVM_PTE_TYPE_TABLE;
 }
 
-static u64 kvm_pte_to_phys(kvm_pte_t pte)
-{
-	u64 pa = pte & KVM_PTE_ADDR_MASK;
-
-	if (PAGE_SHIFT == 16)
-		pa |= FIELD_GET(KVM_PTE_ADDR_51_48, pte) << 48;
-
-	return pa;
-}
-
 static kvm_pte_t kvm_phys_to_pte(u64 pa)
 {
 	kvm_pte_t pte = pa & KVM_PTE_ADDR_MASK;
@@ -326,6 +287,45 @@ int kvm_pgtable_walk(struct kvm_pgtable *pgt, u64 addr, u64 size,
 	return _kvm_pgtable_walk(&walk_data);
 }
 
+struct leaf_walk_data {
+	kvm_pte_t	pte;
+	u32		level;
+};
+
+static int leaf_walker(u64 addr, u64 end, u32 level, kvm_pte_t *ptep,
+		       enum kvm_pgtable_walk_flags flag, void * const arg)
+{
+	struct leaf_walk_data *data = arg;
+
+	data->pte   = *ptep;
+	data->level = level;
+
+	return 0;
+}
+
+int kvm_pgtable_get_leaf(struct kvm_pgtable *pgt, u64 addr,
+			 kvm_pte_t *ptep, u32 *level)
+{
+	struct leaf_walk_data data;
+	struct kvm_pgtable_walker walker = {
+		.cb	= leaf_walker,
+		.flags	= KVM_PGTABLE_WALK_LEAF,
+		.arg	= &data,
+	};
+	int ret;
+
+	ret = kvm_pgtable_walk(pgt, ALIGN_DOWN(addr, PAGE_SIZE),
+			       PAGE_SIZE, &walker);
+	if (!ret) {
+		if (ptep)
+			*ptep  = data.pte;
+		if (level)
+			*level = data.level;
+	}
+
+	return ret;
+}
+
 struct hyp_map_data {
 	u64				phys;
 	kvm_pte_t			attr;
@@ -357,11 +357,47 @@ static int hyp_set_prot_attr(enum kvm_pgtable_prot prot, kvm_pte_t *ptep)
 	attr |= FIELD_PREP(KVM_PTE_LEAF_ATTR_LO_S1_AP, ap);
 	attr |= FIELD_PREP(KVM_PTE_LEAF_ATTR_LO_S1_SH, sh);
 	attr |= KVM_PTE_LEAF_ATTR_LO_S1_AF;
+	attr |= prot & KVM_PTE_LEAF_ATTR_HI_SW;
 	*ptep = attr;
 
 	return 0;
 }
 
+enum kvm_pgtable_prot kvm_pgtable_hyp_pte_prot(kvm_pte_t pte)
+{
+	enum kvm_pgtable_prot prot = pte & KVM_PTE_LEAF_ATTR_HI_SW;
+	u32 ap;
+
+	if (!kvm_pte_valid(pte))
+		return prot;
+
+	if (!(pte & KVM_PTE_LEAF_ATTR_HI_S1_XN))
+		prot |= KVM_PGTABLE_PROT_X;
+
+	ap = FIELD_GET(KVM_PTE_LEAF_ATTR_LO_S1_AP, pte);
+	if (ap == KVM_PTE_LEAF_ATTR_LO_S1_AP_RO)
+		prot |= KVM_PGTABLE_PROT_R;
+	else if (ap == KVM_PTE_LEAF_ATTR_LO_S1_AP_RW)
+		prot |= KVM_PGTABLE_PROT_RW;
+
+	return prot;
+}
+
+static bool hyp_pte_needs_update(kvm_pte_t old, kvm_pte_t new)
+{
+	/*
+	 * Tolerate KVM recreating the exact same mapping, or changing software
+	 * bits if the existing mapping was valid.
+	 */
+	if (old == new)
+		return false;
+
+	if (!kvm_pte_valid(old))
+		return true;
+
+	return !WARN_ON((old ^ new) & ~KVM_PTE_LEAF_ATTR_HI_SW);
+}
+
 static bool hyp_map_walker_try_leaf(u64 addr, u64 end, u32 level,
 				    kvm_pte_t *ptep, struct hyp_map_data *data)
 {
@@ -371,9 +407,8 @@ static bool hyp_map_walker_try_leaf(u64 addr, u64 end, u32 level,
 	if (!kvm_block_mapping_supported(addr, end, phys, level))
 		return false;
 
-	/* Tolerate KVM recreating the exact same mapping */
 	new = kvm_init_valid_leaf_pte(phys, data->attr, level);
-	if (old != new && !WARN_ON(kvm_pte_valid(old)))
+	if (hyp_pte_needs_update(old, new))
 		smp_store_release(ptep, new);
 
 	data->phys += granule;
@@ -438,6 +473,8 @@ int kvm_pgtable_hyp_init(struct kvm_pgtable *pgt, u32 va_bits,
 	pgt->start_level	= KVM_PGTABLE_MAX_LEVELS - levels;
 	pgt->mm_ops		= mm_ops;
 	pgt->mmu		= NULL;
+	pgt->force_pte_cb	= NULL;
+
 	return 0;
 }
 
@@ -475,6 +512,9 @@ struct stage2_map_data {
 	void				*memcache;
 
 	struct kvm_pgtable_mm_ops	*mm_ops;
+
+	/* Force mappings to page granularity */
+	bool				force_pte;
 };
 
 u64 kvm_get_vtcr(u64 mmfr0, u64 mmfr1, u32 phys_shift)
@@ -539,11 +579,29 @@ static int stage2_set_prot_attr(struct kvm_pgtable *pgt, enum kvm_pgtable_prot p
 
 	attr |= FIELD_PREP(KVM_PTE_LEAF_ATTR_LO_S2_SH, sh);
 	attr |= KVM_PTE_LEAF_ATTR_LO_S2_AF;
+	attr |= prot & KVM_PTE_LEAF_ATTR_HI_SW;
 	*ptep = attr;
 
 	return 0;
 }
 
+enum kvm_pgtable_prot kvm_pgtable_stage2_pte_prot(kvm_pte_t pte)
+{
+	enum kvm_pgtable_prot prot = pte & KVM_PTE_LEAF_ATTR_HI_SW;
+
+	if (!kvm_pte_valid(pte))
+		return prot;
+
+	if (pte & KVM_PTE_LEAF_ATTR_LO_S2_S2AP_R)
+		prot |= KVM_PGTABLE_PROT_R;
+	if (pte & KVM_PTE_LEAF_ATTR_LO_S2_S2AP_W)
+		prot |= KVM_PGTABLE_PROT_W;
+	if (!(pte & KVM_PTE_LEAF_ATTR_HI_S2_XN))
+		prot |= KVM_PGTABLE_PROT_X;
+
+	return prot;
+}
+
 static bool stage2_pte_needs_update(kvm_pte_t old, kvm_pte_t new)
 {
 	if (!kvm_pte_valid(old) || !kvm_pte_valid(new))
@@ -588,6 +646,15 @@ static bool stage2_pte_executable(kvm_pte_t pte)
 	return !(pte & KVM_PTE_LEAF_ATTR_HI_S2_XN);
 }
 
+static bool stage2_leaf_mapping_allowed(u64 addr, u64 end, u32 level,
+					struct stage2_map_data *data)
+{
+	if (data->force_pte && (level < (KVM_PGTABLE_MAX_LEVELS - 1)))
+		return false;
+
+	return kvm_block_mapping_supported(addr, end, data->phys, level);
+}
+
 static int stage2_map_walker_try_leaf(u64 addr, u64 end, u32 level,
 				      kvm_pte_t *ptep,
 				      struct stage2_map_data *data)
@@ -597,7 +664,7 @@ static int stage2_map_walker_try_leaf(u64 addr, u64 end, u32 level,
 	struct kvm_pgtable *pgt = data->mmu->pgt;
 	struct kvm_pgtable_mm_ops *mm_ops = data->mm_ops;
 
-	if (!kvm_block_mapping_supported(addr, end, phys, level))
+	if (!stage2_leaf_mapping_allowed(addr, end, level, data))
 		return -E2BIG;
 
 	if (kvm_phys_is_valid(phys))
@@ -641,7 +708,7 @@ static int stage2_map_walk_table_pre(u64 addr, u64 end, u32 level,
 	if (data->anchor)
 		return 0;
 
-	if (!kvm_block_mapping_supported(addr, end, data->phys, level))
+	if (!stage2_leaf_mapping_allowed(addr, end, level, data))
 		return 0;
 
 	data->childp = kvm_pte_follow(*ptep, data->mm_ops);
@@ -771,6 +838,7 @@ int kvm_pgtable_stage2_map(struct kvm_pgtable *pgt, u64 addr, u64 size,
 		.mmu		= pgt->mmu,
 		.memcache	= mc,
 		.mm_ops		= pgt->mm_ops,
+		.force_pte	= pgt->force_pte_cb && pgt->force_pte_cb(addr, addr + size, prot),
 	};
 	struct kvm_pgtable_walker walker = {
 		.cb		= stage2_map_walker,
@@ -802,6 +870,7 @@ int kvm_pgtable_stage2_set_owner(struct kvm_pgtable *pgt, u64 addr, u64 size,
 		.memcache	= mc,
 		.mm_ops		= pgt->mm_ops,
 		.owner_id	= owner_id,
+		.force_pte	= true,
 	};
 	struct kvm_pgtable_walker walker = {
 		.cb		= stage2_map_walker,
@@ -995,6 +1064,9 @@ int kvm_pgtable_stage2_relax_perms(struct kvm_pgtable *pgt, u64 addr,
 	u32 level;
 	kvm_pte_t set = 0, clr = 0;
 
+	if (prot & KVM_PTE_LEAF_ATTR_HI_SW)
+		return -EINVAL;
+
 	if (prot & KVM_PGTABLE_PROT_R)
 		set |= KVM_PTE_LEAF_ATTR_LO_S2_S2AP_R;
 
@@ -1043,9 +1115,11 @@ int kvm_pgtable_stage2_flush(struct kvm_pgtable *pgt, u64 addr, u64 size)
 	return kvm_pgtable_walk(pgt, addr, size, &walker);
 }
 
-int kvm_pgtable_stage2_init_flags(struct kvm_pgtable *pgt, struct kvm_arch *arch,
-				  struct kvm_pgtable_mm_ops *mm_ops,
-				  enum kvm_pgtable_stage2_flags flags)
+
+int __kvm_pgtable_stage2_init(struct kvm_pgtable *pgt, struct kvm_arch *arch,
+			      struct kvm_pgtable_mm_ops *mm_ops,
+			      enum kvm_pgtable_stage2_flags flags,
+			      kvm_pgtable_force_pte_cb_t force_pte_cb)
 {
 	size_t pgd_sz;
 	u64 vtcr = arch->vtcr;
@@ -1063,6 +1137,7 @@ int kvm_pgtable_stage2_init_flags(struct kvm_pgtable *pgt, struct kvm_arch *arch
 	pgt->mm_ops		= mm_ops;
 	pgt->mmu		= &arch->mmu;
 	pgt->flags		= flags;
+	pgt->force_pte_cb	= force_pte_cb;
 
 	/* Ensure zeroed PGD pages are visible to the hardware walker */
 	dsb(ishst);
@@ -1102,77 +1177,3 @@ void kvm_pgtable_stage2_destroy(struct kvm_pgtable *pgt)
 	pgt->mm_ops->free_pages_exact(pgt->pgd, pgd_sz);
 	pgt->pgd = NULL;
 }
-
-#define KVM_PTE_LEAF_S2_COMPAT_MASK	(KVM_PTE_LEAF_ATTR_S2_PERMS | \
-					 KVM_PTE_LEAF_ATTR_LO_S2_MEMATTR | \
-					 KVM_PTE_LEAF_ATTR_S2_IGNORED)
-
-static int stage2_check_permission_walker(u64 addr, u64 end, u32 level,
-					  kvm_pte_t *ptep,
-					  enum kvm_pgtable_walk_flags flag,
-					  void * const arg)
-{
-	kvm_pte_t old_attr, pte = *ptep, *new_attr = arg;
-
-	/*
-	 * Compatible mappings are either invalid and owned by the page-table
-	 * owner (whose id is 0), or valid with matching permission attributes.
-	 */
-	if (kvm_pte_valid(pte)) {
-		old_attr = pte & KVM_PTE_LEAF_S2_COMPAT_MASK;
-		if (old_attr != *new_attr)
-			return -EEXIST;
-	} else if (pte) {
-		return -EEXIST;
-	}
-
-	return 0;
-}
-
-int kvm_pgtable_stage2_find_range(struct kvm_pgtable *pgt, u64 addr,
-				  enum kvm_pgtable_prot prot,
-				  struct kvm_mem_range *range)
-{
-	kvm_pte_t attr;
-	struct kvm_pgtable_walker check_perm_walker = {
-		.cb		= stage2_check_permission_walker,
-		.flags		= KVM_PGTABLE_WALK_LEAF,
-		.arg		= &attr,
-	};
-	u64 granule, start, end;
-	u32 level;
-	int ret;
-
-	ret = stage2_set_prot_attr(pgt, prot, &attr);
-	if (ret)
-		return ret;
-	attr &= KVM_PTE_LEAF_S2_COMPAT_MASK;
-
-	for (level = pgt->start_level; level < KVM_PGTABLE_MAX_LEVELS; level++) {
-		granule = kvm_granule_size(level);
-		start = ALIGN_DOWN(addr, granule);
-		end = start + granule;
-
-		if (!kvm_level_supports_block_mapping(level))
-			continue;
-
-		if (start < range->start || range->end < end)
-			continue;
-
-		/*
-		 * Check the presence of existing mappings with incompatible
-		 * permissions within the current block range, and try one level
-		 * deeper if one is found.
-		 */
-		ret = kvm_pgtable_walk(pgt, start, granule, &check_perm_walker);
-		if (ret != -EEXIST)
-			break;
-	}
-
-	if (!ret) {
-		range->start = start;
-		range->end = end;
-	}
-
-	return ret;
-}
diff --git a/arch/arm64/kvm/hyp/reserved_mem.c b/arch/arm64/kvm/hyp/reserved_mem.c
index d654921dd09b..578670e3f608 100644
--- a/arch/arm64/kvm/hyp/reserved_mem.c
+++ b/arch/arm64/kvm/hyp/reserved_mem.c
@@ -92,12 +92,10 @@ void __init kvm_hyp_reserve(void)
 	 * this is unmapped from the host stage-2, and fallback to PAGE_SIZE.
 	 */
 	hyp_mem_size = hyp_mem_pages << PAGE_SHIFT;
-	hyp_mem_base = memblock_find_in_range(0, memblock_end_of_DRAM(),
-					      ALIGN(hyp_mem_size, PMD_SIZE),
-					      PMD_SIZE);
+	hyp_mem_base = memblock_phys_alloc(ALIGN(hyp_mem_size, PMD_SIZE),
+					   PMD_SIZE);
 	if (!hyp_mem_base)
-		hyp_mem_base = memblock_find_in_range(0, memblock_end_of_DRAM(),
-						      hyp_mem_size, PAGE_SIZE);
+		hyp_mem_base = memblock_phys_alloc(hyp_mem_size, PAGE_SIZE);
 	else
 		hyp_mem_size = ALIGN(hyp_mem_size, PMD_SIZE);
 
@@ -105,7 +103,6 @@ void __init kvm_hyp_reserve(void)
 		kvm_err("Failed to reserve hyp memory\n");
 		return;
 	}
-	memblock_reserve(hyp_mem_base, hyp_mem_size);
 
 	kvm_info("Reserved %lld MiB at 0x%llx\n", hyp_mem_size >> 20,
 		 hyp_mem_base);
diff --git a/arch/arm64/kvm/hyp/vhe/debug-sr.c b/arch/arm64/kvm/hyp/vhe/debug-sr.c
index f1e2e5a00933..289689b2682d 100644
--- a/arch/arm64/kvm/hyp/vhe/debug-sr.c
+++ b/arch/arm64/kvm/hyp/vhe/debug-sr.c
@@ -20,7 +20,7 @@ void __debug_switch_to_host(struct kvm_vcpu *vcpu)
 	__debug_switch_to_host_common(vcpu);
 }
 
-u32 __kvm_get_mdcr_el2(void)
+u64 __kvm_get_mdcr_el2(void)
 {
 	return read_sysreg(mdcr_el2);
 }
diff --git a/arch/arm64/kvm/hyp/vhe/switch.c b/arch/arm64/kvm/hyp/vhe/switch.c
index b3229924d243..ded2c66675f0 100644
--- a/arch/arm64/kvm/hyp/vhe/switch.c
+++ b/arch/arm64/kvm/hyp/vhe/switch.c
@@ -91,17 +91,9 @@ void activate_traps_vhe_load(struct kvm_vcpu *vcpu)
 	__activate_traps_common(vcpu);
 }
 
-void deactivate_traps_vhe_put(void)
+void deactivate_traps_vhe_put(struct kvm_vcpu *vcpu)
 {
-	u64 mdcr_el2 = read_sysreg(mdcr_el2);
-
-	mdcr_el2 &= MDCR_EL2_HPMN_MASK |
-		    MDCR_EL2_E2PB_MASK << MDCR_EL2_E2PB_SHIFT |
-		    MDCR_EL2_TPMS;
-
-	write_sysreg(mdcr_el2, mdcr_el2);
-
-	__deactivate_traps_common();
+	__deactivate_traps_common(vcpu);
 }
 
 /* Switch to the guest for VHE systems running in EL2 */
@@ -124,11 +116,11 @@ static int __kvm_vcpu_run_vhe(struct kvm_vcpu *vcpu)
 	 *
 	 * We have already configured the guest's stage 1 translation in
 	 * kvm_vcpu_load_sysregs_vhe above.  We must now call
-	 * __load_guest_stage2 before __activate_traps, because
-	 * __load_guest_stage2 configures stage 2 translation, and
+	 * __load_stage2 before __activate_traps, because
+	 * __load_stage2 configures stage 2 translation, and
 	 * __activate_traps clear HCR_EL2.TGE (among other things).
 	 */
-	__load_guest_stage2(vcpu->arch.hw_mmu);
+	__load_stage2(vcpu->arch.hw_mmu, vcpu->arch.hw_mmu->arch);
 	__activate_traps(vcpu);
 
 	__kvm_adjust_pc(vcpu);
diff --git a/arch/arm64/kvm/hyp/vhe/sysreg-sr.c b/arch/arm64/kvm/hyp/vhe/sysreg-sr.c
index 2a0b8c88d74f..007a12dd4351 100644
--- a/arch/arm64/kvm/hyp/vhe/sysreg-sr.c
+++ b/arch/arm64/kvm/hyp/vhe/sysreg-sr.c
@@ -101,7 +101,7 @@ void kvm_vcpu_put_sysregs_vhe(struct kvm_vcpu *vcpu)
 	struct kvm_cpu_context *host_ctxt;
 
 	host_ctxt = &this_cpu_ptr(&kvm_host_data)->host_ctxt;
-	deactivate_traps_vhe_put();
+	deactivate_traps_vhe_put(vcpu);
 
 	__sysreg_save_el1_state(guest_ctxt);
 	__sysreg_save_user_state(guest_ctxt);
diff --git a/arch/arm64/kvm/hyp/vhe/tlb.c b/arch/arm64/kvm/hyp/vhe/tlb.c
index 66f17349f0c3..24cef9b87f9e 100644
--- a/arch/arm64/kvm/hyp/vhe/tlb.c
+++ b/arch/arm64/kvm/hyp/vhe/tlb.c
@@ -50,10 +50,10 @@ static void __tlb_switch_to_guest(struct kvm_s2_mmu *mmu,
 	 *
 	 * ARM erratum 1165522 requires some special handling (again),
 	 * as we need to make sure both stages of translation are in
-	 * place before clearing TGE. __load_guest_stage2() already
+	 * place before clearing TGE. __load_stage2() already
 	 * has an ISB in order to deal with this.
 	 */
-	__load_guest_stage2(mmu);
+	__load_stage2(mmu, mmu->arch);
 	val = read_sysreg(hcr_el2);
 	val &= ~HCR_TGE;
 	write_sysreg(val, hcr_el2);
diff --git a/arch/arm64/kvm/mmu.c b/arch/arm64/kvm/mmu.c
index 0625bf2353c2..1a94a7ca48f2 100644
--- a/arch/arm64/kvm/mmu.c
+++ b/arch/arm64/kvm/mmu.c
@@ -80,6 +80,7 @@ static bool memslot_is_logging(struct kvm_memory_slot *memslot)
  */
 void kvm_flush_remote_tlbs(struct kvm *kvm)
 {
+	++kvm->stat.generic.remote_tlb_flush_requests;
 	kvm_call_hyp(__kvm_tlb_flush_vmid, &kvm->arch.mmu);
 }
 
@@ -259,10 +260,8 @@ static int __create_hyp_mappings(unsigned long start, unsigned long size,
 {
 	int err;
 
-	if (!kvm_host_owns_hyp_mappings()) {
-		return kvm_call_hyp_nvhe(__pkvm_create_mappings,
-					 start, size, phys, prot);
-	}
+	if (WARN_ON(!kvm_host_owns_hyp_mappings()))
+		return -EINVAL;
 
 	mutex_lock(&kvm_hyp_pgd_mutex);
 	err = kvm_pgtable_hyp_map(hyp_pgtable, start, size, phys, prot);
@@ -282,6 +281,21 @@ static phys_addr_t kvm_kaddr_to_phys(void *kaddr)
 	}
 }
 
+static int pkvm_share_hyp(phys_addr_t start, phys_addr_t end)
+{
+	phys_addr_t addr;
+	int ret;
+
+	for (addr = ALIGN_DOWN(start, PAGE_SIZE); addr < end; addr += PAGE_SIZE) {
+		ret = kvm_call_hyp_nvhe(__pkvm_host_share_hyp,
+					__phys_to_pfn(addr));
+		if (ret)
+			return ret;
+	}
+
+	return 0;
+}
+
 /**
  * create_hyp_mappings - duplicate a kernel virtual address range in Hyp mode
  * @from:	The virtual kernel start address of the range
@@ -302,6 +316,13 @@ int create_hyp_mappings(void *from, void *to, enum kvm_pgtable_prot prot)
 	if (is_kernel_in_hyp_mode())
 		return 0;
 
+	if (!kvm_host_owns_hyp_mappings()) {
+		if (WARN_ON(prot != PAGE_HYP))
+			return -EPERM;
+		return pkvm_share_hyp(kvm_kaddr_to_phys(from),
+				      kvm_kaddr_to_phys(to));
+	}
+
 	start = start & PAGE_MASK;
 	end = PAGE_ALIGN(end);
 
@@ -433,6 +454,32 @@ int create_hyp_exec_mappings(phys_addr_t phys_addr, size_t size,
 	return 0;
 }
 
+static struct kvm_pgtable_mm_ops kvm_user_mm_ops = {
+	/* We shouldn't need any other callback to walk the PT */
+	.phys_to_virt		= kvm_host_va,
+};
+
+static int get_user_mapping_size(struct kvm *kvm, u64 addr)
+{
+	struct kvm_pgtable pgt = {
+		.pgd		= (kvm_pte_t *)kvm->mm->pgd,
+		.ia_bits	= VA_BITS,
+		.start_level	= (KVM_PGTABLE_MAX_LEVELS -
+				   CONFIG_PGTABLE_LEVELS),
+		.mm_ops		= &kvm_user_mm_ops,
+	};
+	kvm_pte_t pte = 0;	/* Keep GCC quiet... */
+	u32 level = ~0;
+	int ret;
+
+	ret = kvm_pgtable_get_leaf(&pgt, addr, &pte, &level);
+	VM_BUG_ON(ret);
+	VM_BUG_ON(level >= KVM_PGTABLE_MAX_LEVELS);
+	VM_BUG_ON(!(pte & PTE_VALID));
+
+	return BIT(ARM64_HW_PGTABLE_LEVEL_SHIFT(level));
+}
+
 static struct kvm_pgtable_mm_ops kvm_s2_mm_ops = {
 	.zalloc_page		= stage2_memcache_zalloc_page,
 	.zalloc_pages_exact	= kvm_host_zalloc_pages_exact,
@@ -485,7 +532,7 @@ int kvm_init_stage2_mmu(struct kvm *kvm, struct kvm_s2_mmu *mmu)
 	mmu->arch = &kvm->arch;
 	mmu->pgt = pgt;
 	mmu->pgd_phys = __pa(pgt->pgd);
-	mmu->vmid.vmid_gen = 0;
+	WRITE_ONCE(mmu->vmid.vmid_gen, 0);
 	return 0;
 
 out_destroy_pgtable:
@@ -780,7 +827,7 @@ static bool fault_supports_stage2_huge_mapping(struct kvm_memory_slot *memslot,
  * Returns the size of the mapping.
  */
 static unsigned long
-transparent_hugepage_adjust(struct kvm_memory_slot *memslot,
+transparent_hugepage_adjust(struct kvm *kvm, struct kvm_memory_slot *memslot,
 			    unsigned long hva, kvm_pfn_t *pfnp,
 			    phys_addr_t *ipap)
 {
@@ -791,8 +838,8 @@ transparent_hugepage_adjust(struct kvm_memory_slot *memslot,
 	 * sure that the HVA and IPA are sufficiently aligned and that the
 	 * block map is contained within the memslot.
 	 */
-	if (kvm_is_transparent_hugepage(pfn) &&
-	    fault_supports_stage2_huge_mapping(memslot, hva, PMD_SIZE)) {
+	if (fault_supports_stage2_huge_mapping(memslot, hva, PMD_SIZE) &&
+	    get_user_mapping_size(kvm, hva) >= PMD_SIZE) {
 		/*
 		 * The address we faulted on is backed by a transparent huge
 		 * page.  However, because we map the compound huge page and
@@ -814,7 +861,7 @@ transparent_hugepage_adjust(struct kvm_memory_slot *memslot,
 		*ipap &= PMD_MASK;
 		kvm_release_pfn_clean(pfn);
 		pfn &= ~(PTRS_PER_PMD - 1);
-		kvm_get_pfn(pfn);
+		get_page(pfn_to_page(pfn));
 		*pfnp = pfn;
 
 		return PMD_SIZE;
@@ -1050,9 +1097,14 @@ static int user_mem_abort(struct kvm_vcpu *vcpu, phys_addr_t fault_ipa,
 	 * If we are not forced to use page mapping, check if we are
 	 * backed by a THP and thus use block mapping if possible.
 	 */
-	if (vma_pagesize == PAGE_SIZE && !(force_pte || device))
-		vma_pagesize = transparent_hugepage_adjust(memslot, hva,
-							   &pfn, &fault_ipa);
+	if (vma_pagesize == PAGE_SIZE && !(force_pte || device)) {
+		if (fault_status == FSC_PERM && fault_granule > PAGE_SIZE)
+			vma_pagesize = fault_granule;
+		else
+			vma_pagesize = transparent_hugepage_adjust(kvm, memslot,
+								   hva, &pfn,
+								   &fault_ipa);
+	}
 
 	if (fault_status != FSC_PERM && !device && kvm_has_mte(kvm)) {
 		/* Check the VMM hasn't introduced a new VM_SHARED VMA */
diff --git a/arch/arm64/kvm/perf.c b/arch/arm64/kvm/perf.c
index 151c31fb9860..f9bb3b14130e 100644
--- a/arch/arm64/kvm/perf.c
+++ b/arch/arm64/kvm/perf.c
@@ -50,7 +50,7 @@ static struct perf_guest_info_callbacks kvm_guest_cbs = {
 
 int kvm_perf_init(void)
 {
-	if (kvm_pmu_probe_pmuver() != 0xf && !is_protected_kvm_enabled())
+	if (kvm_pmu_probe_pmuver() != ID_AA64DFR0_PMUVER_IMP_DEF && !is_protected_kvm_enabled())
 		static_branch_enable(&kvm_arm_pmu_available);
 
 	return perf_register_guest_info_callbacks(&kvm_guest_cbs);
diff --git a/arch/arm64/kvm/pmu-emul.c b/arch/arm64/kvm/pmu-emul.c
index f33825c995cb..f5065f23b413 100644
--- a/arch/arm64/kvm/pmu-emul.c
+++ b/arch/arm64/kvm/pmu-emul.c
@@ -373,7 +373,6 @@ static u64 kvm_pmu_overflow_status(struct kvm_vcpu *vcpu)
 		reg = __vcpu_sys_reg(vcpu, PMOVSSET_EL0);
 		reg &= __vcpu_sys_reg(vcpu, PMCNTENSET_EL0);
 		reg &= __vcpu_sys_reg(vcpu, PMINTENSET_EL1);
-		reg &= kvm_pmu_valid_counter_mask(vcpu);
 	}
 
 	return reg;
@@ -564,20 +563,21 @@ void kvm_pmu_software_increment(struct kvm_vcpu *vcpu, u64 val)
  */
 void kvm_pmu_handle_pmcr(struct kvm_vcpu *vcpu, u64 val)
 {
-	unsigned long mask = kvm_pmu_valid_counter_mask(vcpu);
 	int i;
 
 	if (val & ARMV8_PMU_PMCR_E) {
 		kvm_pmu_enable_counter_mask(vcpu,
-		       __vcpu_sys_reg(vcpu, PMCNTENSET_EL0) & mask);
+		       __vcpu_sys_reg(vcpu, PMCNTENSET_EL0));
 	} else {
-		kvm_pmu_disable_counter_mask(vcpu, mask);
+		kvm_pmu_disable_counter_mask(vcpu,
+		       __vcpu_sys_reg(vcpu, PMCNTENSET_EL0));
 	}
 
 	if (val & ARMV8_PMU_PMCR_C)
 		kvm_pmu_set_counter_value(vcpu, ARMV8_PMU_CYCLE_IDX, 0);
 
 	if (val & ARMV8_PMU_PMCR_P) {
+		unsigned long mask = kvm_pmu_valid_counter_mask(vcpu);
 		mask &= ~BIT(ARMV8_PMU_CYCLE_IDX);
 		for_each_set_bit(i, &mask, 32)
 			kvm_pmu_set_counter_value(vcpu, i, 0);
@@ -745,7 +745,7 @@ int kvm_pmu_probe_pmuver(void)
 	struct perf_event_attr attr = { };
 	struct perf_event *event;
 	struct arm_pmu *pmu;
-	int pmuver = 0xf;
+	int pmuver = ID_AA64DFR0_PMUVER_IMP_DEF;
 
 	/*
 	 * Create a dummy event that only counts user cycles. As we'll never
@@ -770,7 +770,7 @@ int kvm_pmu_probe_pmuver(void)
 	if (IS_ERR(event)) {
 		pr_err_once("kvm: pmu event creation failed %ld\n",
 			    PTR_ERR(event));
-		return 0xf;
+		return ID_AA64DFR0_PMUVER_IMP_DEF;
 	}
 
 	if (event->pmu) {
@@ -923,7 +923,7 @@ int kvm_arm_pmu_v3_set_attr(struct kvm_vcpu *vcpu, struct kvm_device_attr *attr)
 	if (!vcpu->kvm->arch.pmuver)
 		vcpu->kvm->arch.pmuver = kvm_pmu_probe_pmuver();
 
-	if (vcpu->kvm->arch.pmuver == 0xf)
+	if (vcpu->kvm->arch.pmuver == ID_AA64DFR0_PMUVER_IMP_DEF)
 		return -ENODEV;
 
 	switch (attr->attr) {
diff --git a/arch/arm64/kvm/psci.c b/arch/arm64/kvm/psci.c
index db4056ecccfd..74c47d420253 100644
--- a/arch/arm64/kvm/psci.c
+++ b/arch/arm64/kvm/psci.c
@@ -59,6 +59,12 @@ static void kvm_psci_vcpu_off(struct kvm_vcpu *vcpu)
 	kvm_vcpu_kick(vcpu);
 }
 
+static inline bool kvm_psci_valid_affinity(struct kvm_vcpu *vcpu,
+					   unsigned long affinity)
+{
+	return !(affinity & ~MPIDR_HWID_BITMASK);
+}
+
 static unsigned long kvm_psci_vcpu_on(struct kvm_vcpu *source_vcpu)
 {
 	struct vcpu_reset_state *reset_state;
@@ -66,9 +72,9 @@ static unsigned long kvm_psci_vcpu_on(struct kvm_vcpu *source_vcpu)
 	struct kvm_vcpu *vcpu = NULL;
 	unsigned long cpu_id;
 
-	cpu_id = smccc_get_arg1(source_vcpu) & MPIDR_HWID_BITMASK;
-	if (vcpu_mode_is_32bit(source_vcpu))
-		cpu_id &= ~((u32) 0);
+	cpu_id = smccc_get_arg1(source_vcpu);
+	if (!kvm_psci_valid_affinity(source_vcpu, cpu_id))
+		return PSCI_RET_INVALID_PARAMS;
 
 	vcpu = kvm_mpidr_to_vcpu(kvm, cpu_id);
 
@@ -126,6 +132,9 @@ static unsigned long kvm_psci_vcpu_affinity_info(struct kvm_vcpu *vcpu)
 	target_affinity = smccc_get_arg1(vcpu);
 	lowest_affinity_level = smccc_get_arg2(vcpu);
 
+	if (!kvm_psci_valid_affinity(vcpu, target_affinity))
+		return PSCI_RET_INVALID_PARAMS;
+
 	/* Determine target affinity mask */
 	target_affinity_mask = psci_affinity_mask(lowest_affinity_level);
 	if (!target_affinity_mask)
diff --git a/arch/arm64/kvm/reset.c b/arch/arm64/kvm/reset.c
index cba7872d69a8..5ce36b0a3343 100644
--- a/arch/arm64/kvm/reset.c
+++ b/arch/arm64/kvm/reset.c
@@ -210,10 +210,16 @@ static bool vcpu_allowed_register_width(struct kvm_vcpu *vcpu)
  */
 int kvm_reset_vcpu(struct kvm_vcpu *vcpu)
 {
+	struct vcpu_reset_state reset_state;
 	int ret;
 	bool loaded;
 	u32 pstate;
 
+	mutex_lock(&vcpu->kvm->lock);
+	reset_state = vcpu->arch.reset_state;
+	WRITE_ONCE(vcpu->arch.reset_state.reset, false);
+	mutex_unlock(&vcpu->kvm->lock);
+
 	/* Reset PMU outside of the non-preemptible section */
 	kvm_pmu_vcpu_reset(vcpu);
 
@@ -276,8 +282,8 @@ int kvm_reset_vcpu(struct kvm_vcpu *vcpu)
 	 * Additional reset state handling that PSCI may have imposed on us.
 	 * Must be done after all the sys_reg reset.
 	 */
-	if (vcpu->arch.reset_state.reset) {
-		unsigned long target_pc = vcpu->arch.reset_state.pc;
+	if (reset_state.reset) {
+		unsigned long target_pc = reset_state.pc;
 
 		/* Gracefully handle Thumb2 entry point */
 		if (vcpu_mode_is_32bit(vcpu) && (target_pc & 1)) {
@@ -286,13 +292,11 @@ int kvm_reset_vcpu(struct kvm_vcpu *vcpu)
 		}
 
 		/* Propagate caller endianness */
-		if (vcpu->arch.reset_state.be)
+		if (reset_state.be)
 			kvm_vcpu_set_be(vcpu);
 
 		*vcpu_pc(vcpu) = target_pc;
-		vcpu_set_reg(vcpu, 0, vcpu->arch.reset_state.r0);
-
-		vcpu->arch.reset_state.reset = false;
+		vcpu_set_reg(vcpu, 0, reset_state.r0);
 	}
 
 	/* Reset timer */
@@ -311,31 +315,26 @@ u32 get_kvm_ipa_limit(void)
 
 int kvm_set_ipa_limit(void)
 {
-	unsigned int parange, tgran_2;
+	unsigned int parange;
 	u64 mmfr0;
 
 	mmfr0 = read_sanitised_ftr_reg(SYS_ID_AA64MMFR0_EL1);
 	parange = cpuid_feature_extract_unsigned_field(mmfr0,
 				ID_AA64MMFR0_PARANGE_SHIFT);
+	/*
+	 * IPA size beyond 48 bits could not be supported
+	 * on either 4K or 16K page size. Hence let's cap
+	 * it to 48 bits, in case it's reported as larger
+	 * on the system.
+	 */
+	if (PAGE_SIZE != SZ_64K)
+		parange = min(parange, (unsigned int)ID_AA64MMFR0_PARANGE_48);
 
 	/*
 	 * Check with ARMv8.5-GTG that our PAGE_SIZE is supported at
 	 * Stage-2. If not, things will stop very quickly.
 	 */
-	switch (PAGE_SIZE) {
-	default:
-	case SZ_4K:
-		tgran_2 = ID_AA64MMFR0_TGRAN4_2_SHIFT;
-		break;
-	case SZ_16K:
-		tgran_2 = ID_AA64MMFR0_TGRAN16_2_SHIFT;
-		break;
-	case SZ_64K:
-		tgran_2 = ID_AA64MMFR0_TGRAN64_2_SHIFT;
-		break;
-	}
-
-	switch (cpuid_feature_extract_unsigned_field(mmfr0, tgran_2)) {
+	switch (cpuid_feature_extract_unsigned_field(mmfr0, ID_AA64MMFR0_TGRAN_2_SHIFT)) {
 	case ID_AA64MMFR0_TGRAN_2_SUPPORTED_NONE:
 		kvm_err("PAGE_SIZE not supported at Stage-2, giving up\n");
 		return -EINVAL;
@@ -369,7 +368,7 @@ int kvm_arm_setup_stage2(struct kvm *kvm, unsigned long type)
 	phys_shift = KVM_VM_TYPE_ARM_IPA_SIZE(type);
 	if (phys_shift) {
 		if (phys_shift > kvm_ipa_limit ||
-		    phys_shift < 32)
+		    phys_shift < ARM64_MIN_PARANGE_BITS)
 			return -EINVAL;
 	} else {
 		phys_shift = KVM_PHYS_SHIFT;
diff --git a/arch/arm64/kvm/sys_regs.c b/arch/arm64/kvm/sys_regs.c
index f6f126eb6ac1..1d46e185f31e 100644
--- a/arch/arm64/kvm/sys_regs.c
+++ b/arch/arm64/kvm/sys_regs.c
@@ -44,10 +44,6 @@
  * 64bit interface.
  */
 
-#define reg_to_encoding(x)						\
-	sys_reg((u32)(x)->Op0, (u32)(x)->Op1,				\
-		(u32)(x)->CRn, (u32)(x)->CRm, (u32)(x)->Op2)
-
 static bool read_from_write_only(struct kvm_vcpu *vcpu,
 				 struct sys_reg_params *params,
 				 const struct sys_reg_desc *r)
@@ -318,14 +314,14 @@ static bool trap_dbgauthstatus_el1(struct kvm_vcpu *vcpu,
 /*
  * We want to avoid world-switching all the DBG registers all the
  * time:
- * 
+ *
  * - If we've touched any debug register, it is likely that we're
  *   going to touch more of them. It then makes sense to disable the
  *   traps and start doing the save/restore dance
  * - If debug is active (DBG_MDSCR_KDE or DBG_MDSCR_MDE set), it is
  *   then mandatory to save/restore the registers, as the guest
  *   depends on them.
- * 
+ *
  * For this, we use a DIRTY bit, indicating the guest has modified the
  * debug registers, used as follow:
  *
@@ -603,6 +599,41 @@ static unsigned int pmu_visibility(const struct kvm_vcpu *vcpu,
 	return REG_HIDDEN;
 }
 
+static void reset_pmu_reg(struct kvm_vcpu *vcpu, const struct sys_reg_desc *r)
+{
+	u64 n, mask = BIT(ARMV8_PMU_CYCLE_IDX);
+
+	/* No PMU available, any PMU reg may UNDEF... */
+	if (!kvm_arm_support_pmu_v3())
+		return;
+
+	n = read_sysreg(pmcr_el0) >> ARMV8_PMU_PMCR_N_SHIFT;
+	n &= ARMV8_PMU_PMCR_N_MASK;
+	if (n)
+		mask |= GENMASK(n - 1, 0);
+
+	reset_unknown(vcpu, r);
+	__vcpu_sys_reg(vcpu, r->reg) &= mask;
+}
+
+static void reset_pmevcntr(struct kvm_vcpu *vcpu, const struct sys_reg_desc *r)
+{
+	reset_unknown(vcpu, r);
+	__vcpu_sys_reg(vcpu, r->reg) &= GENMASK(31, 0);
+}
+
+static void reset_pmevtyper(struct kvm_vcpu *vcpu, const struct sys_reg_desc *r)
+{
+	reset_unknown(vcpu, r);
+	__vcpu_sys_reg(vcpu, r->reg) &= ARMV8_PMU_EVTYPE_MASK;
+}
+
+static void reset_pmselr(struct kvm_vcpu *vcpu, const struct sys_reg_desc *r)
+{
+	reset_unknown(vcpu, r);
+	__vcpu_sys_reg(vcpu, r->reg) &= ARMV8_PMU_COUNTER_MASK;
+}
+
 static void reset_pmcr(struct kvm_vcpu *vcpu, const struct sys_reg_desc *r)
 {
 	u64 pmcr, val;
@@ -845,7 +876,7 @@ static bool access_pmcnten(struct kvm_vcpu *vcpu, struct sys_reg_params *p,
 			kvm_pmu_disable_counter_mask(vcpu, val);
 		}
 	} else {
-		p->regval = __vcpu_sys_reg(vcpu, PMCNTENSET_EL0) & mask;
+		p->regval = __vcpu_sys_reg(vcpu, PMCNTENSET_EL0);
 	}
 
 	return true;
@@ -869,7 +900,7 @@ static bool access_pminten(struct kvm_vcpu *vcpu, struct sys_reg_params *p,
 			/* accessing PMINTENCLR_EL1 */
 			__vcpu_sys_reg(vcpu, PMINTENSET_EL1) &= ~val;
 	} else {
-		p->regval = __vcpu_sys_reg(vcpu, PMINTENSET_EL1) & mask;
+		p->regval = __vcpu_sys_reg(vcpu, PMINTENSET_EL1);
 	}
 
 	return true;
@@ -891,7 +922,7 @@ static bool access_pmovs(struct kvm_vcpu *vcpu, struct sys_reg_params *p,
 			/* accessing PMOVSCLR_EL0 */
 			__vcpu_sys_reg(vcpu, PMOVSSET_EL0) &= ~(p->regval & mask);
 	} else {
-		p->regval = __vcpu_sys_reg(vcpu, PMOVSSET_EL0) & mask;
+		p->regval = __vcpu_sys_reg(vcpu, PMOVSSET_EL0);
 	}
 
 	return true;
@@ -944,16 +975,18 @@ static bool access_pmuserenr(struct kvm_vcpu *vcpu, struct sys_reg_params *p,
 	  trap_wcr, reset_wcr, 0, 0,  get_wcr, set_wcr }
 
 #define PMU_SYS_REG(r)						\
-	SYS_DESC(r), .reset = reset_unknown, .visibility = pmu_visibility
+	SYS_DESC(r), .reset = reset_pmu_reg, .visibility = pmu_visibility
 
 /* Macro to expand the PMEVCNTRn_EL0 register */
 #define PMU_PMEVCNTR_EL0(n)						\
 	{ PMU_SYS_REG(SYS_PMEVCNTRn_EL0(n)),				\
+	  .reset = reset_pmevcntr,					\
 	  .access = access_pmu_evcntr, .reg = (PMEVCNTR0_EL0 + n), }
 
 /* Macro to expand the PMEVTYPERn_EL0 register */
 #define PMU_PMEVTYPER_EL0(n)						\
 	{ PMU_SYS_REG(SYS_PMEVTYPERn_EL0(n)),				\
+	  .reset = reset_pmevtyper,					\
 	  .access = access_pmu_evtyper, .reg = (PMEVTYPER0_EL0 + n), }
 
 static bool undef_access(struct kvm_vcpu *vcpu, struct sys_reg_params *p,
@@ -1026,8 +1059,6 @@ static bool access_arch_timer(struct kvm_vcpu *vcpu,
 	return true;
 }
 
-#define FEATURE(x)	(GENMASK_ULL(x##_SHIFT + 3, x##_SHIFT))
-
 /* Read a sanitised cpufeature ID register by sys_reg_desc */
 static u64 read_id_reg(const struct kvm_vcpu *vcpu,
 		struct sys_reg_desc const *r, bool raz)
@@ -1038,40 +1069,40 @@ static u64 read_id_reg(const struct kvm_vcpu *vcpu,
 	switch (id) {
 	case SYS_ID_AA64PFR0_EL1:
 		if (!vcpu_has_sve(vcpu))
-			val &= ~FEATURE(ID_AA64PFR0_SVE);
-		val &= ~FEATURE(ID_AA64PFR0_AMU);
-		val &= ~FEATURE(ID_AA64PFR0_CSV2);
-		val |= FIELD_PREP(FEATURE(ID_AA64PFR0_CSV2), (u64)vcpu->kvm->arch.pfr0_csv2);
-		val &= ~FEATURE(ID_AA64PFR0_CSV3);
-		val |= FIELD_PREP(FEATURE(ID_AA64PFR0_CSV3), (u64)vcpu->kvm->arch.pfr0_csv3);
+			val &= ~ARM64_FEATURE_MASK(ID_AA64PFR0_SVE);
+		val &= ~ARM64_FEATURE_MASK(ID_AA64PFR0_AMU);
+		val &= ~ARM64_FEATURE_MASK(ID_AA64PFR0_CSV2);
+		val |= FIELD_PREP(ARM64_FEATURE_MASK(ID_AA64PFR0_CSV2), (u64)vcpu->kvm->arch.pfr0_csv2);
+		val &= ~ARM64_FEATURE_MASK(ID_AA64PFR0_CSV3);
+		val |= FIELD_PREP(ARM64_FEATURE_MASK(ID_AA64PFR0_CSV3), (u64)vcpu->kvm->arch.pfr0_csv3);
 		break;
 	case SYS_ID_AA64PFR1_EL1:
-		val &= ~FEATURE(ID_AA64PFR1_MTE);
+		val &= ~ARM64_FEATURE_MASK(ID_AA64PFR1_MTE);
 		if (kvm_has_mte(vcpu->kvm)) {
 			u64 pfr, mte;
 
 			pfr = read_sanitised_ftr_reg(SYS_ID_AA64PFR1_EL1);
 			mte = cpuid_feature_extract_unsigned_field(pfr, ID_AA64PFR1_MTE_SHIFT);
-			val |= FIELD_PREP(FEATURE(ID_AA64PFR1_MTE), mte);
+			val |= FIELD_PREP(ARM64_FEATURE_MASK(ID_AA64PFR1_MTE), mte);
 		}
 		break;
 	case SYS_ID_AA64ISAR1_EL1:
 		if (!vcpu_has_ptrauth(vcpu))
-			val &= ~(FEATURE(ID_AA64ISAR1_APA) |
-				 FEATURE(ID_AA64ISAR1_API) |
-				 FEATURE(ID_AA64ISAR1_GPA) |
-				 FEATURE(ID_AA64ISAR1_GPI));
+			val &= ~(ARM64_FEATURE_MASK(ID_AA64ISAR1_APA) |
+				 ARM64_FEATURE_MASK(ID_AA64ISAR1_API) |
+				 ARM64_FEATURE_MASK(ID_AA64ISAR1_GPA) |
+				 ARM64_FEATURE_MASK(ID_AA64ISAR1_GPI));
 		break;
 	case SYS_ID_AA64DFR0_EL1:
 		/* Limit debug to ARMv8.0 */
-		val &= ~FEATURE(ID_AA64DFR0_DEBUGVER);
-		val |= FIELD_PREP(FEATURE(ID_AA64DFR0_DEBUGVER), 6);
+		val &= ~ARM64_FEATURE_MASK(ID_AA64DFR0_DEBUGVER);
+		val |= FIELD_PREP(ARM64_FEATURE_MASK(ID_AA64DFR0_DEBUGVER), 6);
 		/* Limit guests to PMUv3 for ARMv8.4 */
 		val = cpuid_feature_cap_perfmon_field(val,
 						      ID_AA64DFR0_PMUVER_SHIFT,
 						      kvm_vcpu_has_pmu(vcpu) ? ID_AA64DFR0_PMUVER_8_4 : 0);
 		/* Hide SPE from guests */
-		val &= ~FEATURE(ID_AA64DFR0_PMSVER);
+		val &= ~ARM64_FEATURE_MASK(ID_AA64DFR0_PMSVER);
 		break;
 	case SYS_ID_DFR0_EL1:
 		/* Limit guests to PMUv3 for ARMv8.4 */
@@ -1249,6 +1280,20 @@ static int set_raz_id_reg(struct kvm_vcpu *vcpu, const struct sys_reg_desc *rd,
 	return __set_id_reg(vcpu, rd, uaddr, true);
 }
 
+static int set_wi_reg(struct kvm_vcpu *vcpu, const struct sys_reg_desc *rd,
+		      const struct kvm_one_reg *reg, void __user *uaddr)
+{
+	int err;
+	u64 val;
+
+	/* Perform the access even if we are going to ignore the value */
+	err = reg_from_user(&val, uaddr, sys_reg_to_index(rd));
+	if (err)
+		return err;
+
+	return 0;
+}
+
 static bool access_ctr(struct kvm_vcpu *vcpu, struct sys_reg_params *p,
 		       const struct sys_reg_desc *r)
 {
@@ -1592,16 +1637,21 @@ static const struct sys_reg_desc sys_reg_descs[] = {
 	  .access = access_pmcnten, .reg = PMCNTENSET_EL0 },
 	{ PMU_SYS_REG(SYS_PMOVSCLR_EL0),
 	  .access = access_pmovs, .reg = PMOVSSET_EL0 },
+	/*
+	 * PM_SWINC_EL0 is exposed to userspace as RAZ/WI, as it was
+	 * previously (and pointlessly) advertised in the past...
+	 */
 	{ PMU_SYS_REG(SYS_PMSWINC_EL0),
-	  .access = access_pmswinc, .reg = PMSWINC_EL0 },
+	  .get_user = get_raz_id_reg, .set_user = set_wi_reg,
+	  .access = access_pmswinc, .reset = NULL },
 	{ PMU_SYS_REG(SYS_PMSELR_EL0),
-	  .access = access_pmselr, .reg = PMSELR_EL0 },
+	  .access = access_pmselr, .reset = reset_pmselr, .reg = PMSELR_EL0 },
 	{ PMU_SYS_REG(SYS_PMCEID0_EL0),
 	  .access = access_pmceid, .reset = NULL },
 	{ PMU_SYS_REG(SYS_PMCEID1_EL0),
 	  .access = access_pmceid, .reset = NULL },
 	{ PMU_SYS_REG(SYS_PMCCNTR_EL0),
-	  .access = access_pmu_evcntr, .reg = PMCCNTR_EL0 },
+	  .access = access_pmu_evcntr, .reset = reset_unknown, .reg = PMCCNTR_EL0 },
 	{ PMU_SYS_REG(SYS_PMXEVTYPER_EL0),
 	  .access = access_pmu_evtyper, .reset = NULL },
 	{ PMU_SYS_REG(SYS_PMXEVCNTR_EL0),
@@ -2106,23 +2156,6 @@ static int check_sysreg_table(const struct sys_reg_desc *table, unsigned int n,
 	return 0;
 }
 
-static int match_sys_reg(const void *key, const void *elt)
-{
-	const unsigned long pval = (unsigned long)key;
-	const struct sys_reg_desc *r = elt;
-
-	return pval - reg_to_encoding(r);
-}
-
-static const struct sys_reg_desc *find_reg(const struct sys_reg_params *params,
-					 const struct sys_reg_desc table[],
-					 unsigned int num)
-{
-	unsigned long pval = reg_to_encoding(params);
-
-	return bsearch((void *)pval, table, num, sizeof(table[0]), match_sys_reg);
-}
-
 int kvm_handle_cp14_load_store(struct kvm_vcpu *vcpu)
 {
 	kvm_inject_undefined(vcpu);
@@ -2365,13 +2398,8 @@ int kvm_handle_sys_reg(struct kvm_vcpu *vcpu)
 
 	trace_kvm_handle_sys_reg(esr);
 
-	params.Op0 = (esr >> 20) & 3;
-	params.Op1 = (esr >> 14) & 0x7;
-	params.CRn = (esr >> 10) & 0xf;
-	params.CRm = (esr >> 1) & 0xf;
-	params.Op2 = (esr >> 17) & 0x7;
+	params = esr_sys64_to_params(esr);
 	params.regval = vcpu_get_reg(vcpu, Rt);
-	params.is_write = !(esr & 1);
 
 	ret = emulate_sys_reg(vcpu, &params);
 
diff --git a/arch/arm64/kvm/sys_regs.h b/arch/arm64/kvm/sys_regs.h
index 9d0621417c2a..cc0cc95a0280 100644
--- a/arch/arm64/kvm/sys_regs.h
+++ b/arch/arm64/kvm/sys_regs.h
@@ -11,6 +11,12 @@
 #ifndef __ARM64_KVM_SYS_REGS_LOCAL_H__
 #define __ARM64_KVM_SYS_REGS_LOCAL_H__
 
+#include <linux/bsearch.h>
+
+#define reg_to_encoding(x)						\
+	sys_reg((u32)(x)->Op0, (u32)(x)->Op1,				\
+		(u32)(x)->CRn, (u32)(x)->CRm, (u32)(x)->Op2)
+
 struct sys_reg_params {
 	u8	Op0;
 	u8	Op1;
@@ -21,6 +27,14 @@ struct sys_reg_params {
 	bool	is_write;
 };
 
+#define esr_sys64_to_params(esr)                                               \
+	((struct sys_reg_params){ .Op0 = ((esr) >> 20) & 3,                    \
+				  .Op1 = ((esr) >> 14) & 0x7,                  \
+				  .CRn = ((esr) >> 10) & 0xf,                  \
+				  .CRm = ((esr) >> 1) & 0xf,                   \
+				  .Op2 = ((esr) >> 17) & 0x7,                  \
+				  .is_write = !((esr) & 1) })
+
 struct sys_reg_desc {
 	/* Sysreg string for debug */
 	const char *name;
@@ -152,6 +166,23 @@ static inline int cmp_sys_reg(const struct sys_reg_desc *i1,
 	return i1->Op2 - i2->Op2;
 }
 
+static inline int match_sys_reg(const void *key, const void *elt)
+{
+	const unsigned long pval = (unsigned long)key;
+	const struct sys_reg_desc *r = elt;
+
+	return pval - reg_to_encoding(r);
+}
+
+static inline const struct sys_reg_desc *
+find_reg(const struct sys_reg_params *params, const struct sys_reg_desc table[],
+	 unsigned int num)
+{
+	unsigned long pval = reg_to_encoding(params);
+
+	return __inline_bsearch((void *)pval, table, num, sizeof(table[0]), match_sys_reg);
+}
+
 const struct sys_reg_desc *find_reg_by_id(u64 id,
 					  struct sys_reg_params *params,
 					  const struct sys_reg_desc table[],
diff --git a/arch/arm64/kvm/trace_handle_exit.h b/arch/arm64/kvm/trace_handle_exit.h
index 8d78acc4fba7..064a58c19f48 100644
--- a/arch/arm64/kvm/trace_handle_exit.h
+++ b/arch/arm64/kvm/trace_handle_exit.h
@@ -78,13 +78,17 @@ TRACE_EVENT(kvm_arm_clear_debug,
 	TP_printk("flags: 0x%08x", __entry->guest_debug)
 );
 
+/*
+ * The dreg32 name is a leftover from a distant past. This will really
+ * output a 64bit value...
+ */
 TRACE_EVENT(kvm_arm_set_dreg32,
-	TP_PROTO(const char *name, __u32 value),
+	TP_PROTO(const char *name, __u64 value),
 	TP_ARGS(name, value),
 
 	TP_STRUCT__entry(
 		__field(const char *, name)
-		__field(__u32, value)
+		__field(__u64, value)
 	),
 
 	TP_fast_assign(
@@ -92,7 +96,7 @@ TRACE_EVENT(kvm_arm_set_dreg32,
 		__entry->value = value;
 	),
 
-	TP_printk("%s: 0x%08x", __entry->name, __entry->value)
+	TP_printk("%s: 0x%llx", __entry->name, __entry->value)
 );
 
 TRACE_DEFINE_SIZEOF(__u64);
diff --git a/arch/arm64/kvm/vgic/vgic-mmio-v2.c b/arch/arm64/kvm/vgic/vgic-mmio-v2.c
index a016f07adc28..5f9014ae595b 100644
--- a/arch/arm64/kvm/vgic/vgic-mmio-v2.c
+++ b/arch/arm64/kvm/vgic/vgic-mmio-v2.c
@@ -282,7 +282,7 @@ static unsigned long vgic_mmio_read_vcpuif(struct kvm_vcpu *vcpu,
 	case GIC_CPU_PRIMASK:
 		/*
 		 * Our KVM_DEV_TYPE_ARM_VGIC_V2 device ABI exports the
-		 * the PMR field as GICH_VMCR.VMPriMask rather than
+		 * PMR field as GICH_VMCR.VMPriMask rather than
 		 * GICC_PMR.Priority, so we expose the upper five bits of
 		 * priority mask to userspace using the lower bits in the
 		 * unsigned long.
@@ -329,7 +329,7 @@ static void vgic_mmio_write_vcpuif(struct kvm_vcpu *vcpu,
 	case GIC_CPU_PRIMASK:
 		/*
 		 * Our KVM_DEV_TYPE_ARM_VGIC_V2 device ABI exports the
-		 * the PMR field as GICH_VMCR.VMPriMask rather than
+		 * PMR field as GICH_VMCR.VMPriMask rather than
 		 * GICC_PMR.Priority, so we expose the upper five bits of
 		 * priority mask to userspace using the lower bits in the
 		 * unsigned long.
diff --git a/arch/arm64/kvm/vgic/vgic-v2.c b/arch/arm64/kvm/vgic/vgic-v2.c
index 2c580204f1dc..95a18cec14a3 100644
--- a/arch/arm64/kvm/vgic/vgic-v2.c
+++ b/arch/arm64/kvm/vgic/vgic-v2.c
@@ -60,6 +60,7 @@ void vgic_v2_fold_lr_state(struct kvm_vcpu *vcpu)
 		u32 val = cpuif->vgic_lr[lr];
 		u32 cpuid, intid = val & GICH_LR_VIRTUALID;
 		struct vgic_irq *irq;
+		bool deactivated;
 
 		/* Extract the source vCPU id from the LR */
 		cpuid = val & GICH_LR_PHYSID_CPUID;
@@ -75,7 +76,8 @@ void vgic_v2_fold_lr_state(struct kvm_vcpu *vcpu)
 
 		raw_spin_lock(&irq->irq_lock);
 
-		/* Always preserve the active bit */
+		/* Always preserve the active bit, note deactivation */
+		deactivated = irq->active && !(val & GICH_LR_ACTIVE_BIT);
 		irq->active = !!(val & GICH_LR_ACTIVE_BIT);
 
 		if (irq->active && vgic_irq_is_sgi(intid))
@@ -96,36 +98,8 @@ void vgic_v2_fold_lr_state(struct kvm_vcpu *vcpu)
 		if (irq->config == VGIC_CONFIG_LEVEL && !(val & GICH_LR_STATE))
 			irq->pending_latch = false;
 
-		/*
-		 * Level-triggered mapped IRQs are special because we only
-		 * observe rising edges as input to the VGIC.
-		 *
-		 * If the guest never acked the interrupt we have to sample
-		 * the physical line and set the line level, because the
-		 * device state could have changed or we simply need to
-		 * process the still pending interrupt later.
-		 *
-		 * If this causes us to lower the level, we have to also clear
-		 * the physical active state, since we will otherwise never be
-		 * told when the interrupt becomes asserted again.
-		 *
-		 * Another case is when the interrupt requires a helping hand
-		 * on deactivation (no HW deactivation, for example).
-		 */
-		if (vgic_irq_is_mapped_level(irq)) {
-			bool resample = false;
-
-			if (val & GICH_LR_PENDING_BIT) {
-				irq->line_level = vgic_get_phys_line_level(irq);
-				resample = !irq->line_level;
-			} else if (vgic_irq_needs_resampling(irq) &&
-				   !(irq->active || irq->pending_latch)) {
-				resample = true;
-			}
-
-			if (resample)
-				vgic_irq_set_phys_active(irq, false);
-		}
+		/* Handle resampling for mapped interrupts if required */
+		vgic_irq_handle_resampling(irq, deactivated, val & GICH_LR_PENDING_BIT);
 
 		raw_spin_unlock(&irq->irq_lock);
 		vgic_put_irq(vcpu->kvm, irq);
diff --git a/arch/arm64/kvm/vgic/vgic-v3.c b/arch/arm64/kvm/vgic/vgic-v3.c
index 66004f61cd83..21a6207fb2ee 100644
--- a/arch/arm64/kvm/vgic/vgic-v3.c
+++ b/arch/arm64/kvm/vgic/vgic-v3.c
@@ -46,6 +46,7 @@ void vgic_v3_fold_lr_state(struct kvm_vcpu *vcpu)
 		u32 intid, cpuid;
 		struct vgic_irq *irq;
 		bool is_v2_sgi = false;
+		bool deactivated;
 
 		cpuid = val & GICH_LR_PHYSID_CPUID;
 		cpuid >>= GICH_LR_PHYSID_CPUID_SHIFT;
@@ -68,7 +69,8 @@ void vgic_v3_fold_lr_state(struct kvm_vcpu *vcpu)
 
 		raw_spin_lock(&irq->irq_lock);
 
-		/* Always preserve the active bit */
+		/* Always preserve the active bit, note deactivation */
+		deactivated = irq->active && !(val & ICH_LR_ACTIVE_BIT);
 		irq->active = !!(val & ICH_LR_ACTIVE_BIT);
 
 		if (irq->active && is_v2_sgi)
@@ -89,36 +91,8 @@ void vgic_v3_fold_lr_state(struct kvm_vcpu *vcpu)
 		if (irq->config == VGIC_CONFIG_LEVEL && !(val & ICH_LR_STATE))
 			irq->pending_latch = false;
 
-		/*
-		 * Level-triggered mapped IRQs are special because we only
-		 * observe rising edges as input to the VGIC.
-		 *
-		 * If the guest never acked the interrupt we have to sample
-		 * the physical line and set the line level, because the
-		 * device state could have changed or we simply need to
-		 * process the still pending interrupt later.
-		 *
-		 * If this causes us to lower the level, we have to also clear
-		 * the physical active state, since we will otherwise never be
-		 * told when the interrupt becomes asserted again.
-		 *
-		 * Another case is when the interrupt requires a helping hand
-		 * on deactivation (no HW deactivation, for example).
-		 */
-		if (vgic_irq_is_mapped_level(irq)) {
-			bool resample = false;
-
-			if (val & ICH_LR_PENDING_BIT) {
-				irq->line_level = vgic_get_phys_line_level(irq);
-				resample = !irq->line_level;
-			} else if (vgic_irq_needs_resampling(irq) &&
-				   !(irq->active || irq->pending_latch)) {
-				resample = true;
-			}
-
-			if (resample)
-				vgic_irq_set_phys_active(irq, false);
-		}
+		/* Handle resampling for mapped interrupts if required */
+		vgic_irq_handle_resampling(irq, deactivated, val & ICH_LR_PENDING_BIT);
 
 		raw_spin_unlock(&irq->irq_lock);
 		vgic_put_irq(vcpu->kvm, irq);
diff --git a/arch/arm64/kvm/vgic/vgic.c b/arch/arm64/kvm/vgic/vgic.c
index 111bff47e471..5dad4996cfb2 100644
--- a/arch/arm64/kvm/vgic/vgic.c
+++ b/arch/arm64/kvm/vgic/vgic.c
@@ -106,7 +106,6 @@ struct vgic_irq *vgic_get_irq(struct kvm *kvm, struct kvm_vcpu *vcpu,
 	if (intid >= VGIC_MIN_LPI)
 		return vgic_get_lpi(kvm, intid);
 
-	WARN(1, "Looking up struct vgic_irq for reserved INTID");
 	return NULL;
 }
 
@@ -1022,3 +1021,41 @@ bool kvm_vgic_map_is_active(struct kvm_vcpu *vcpu, unsigned int vintid)
 
 	return map_is_active;
 }
+
+/*
+ * Level-triggered mapped IRQs are special because we only observe rising
+ * edges as input to the VGIC.
+ *
+ * If the guest never acked the interrupt we have to sample the physical
+ * line and set the line level, because the device state could have changed
+ * or we simply need to process the still pending interrupt later.
+ *
+ * We could also have entered the guest with the interrupt active+pending.
+ * On the next exit, we need to re-evaluate the pending state, as it could
+ * otherwise result in a spurious interrupt by injecting a now potentially
+ * stale pending state.
+ *
+ * If this causes us to lower the level, we have to also clear the physical
+ * active state, since we will otherwise never be told when the interrupt
+ * becomes asserted again.
+ *
+ * Another case is when the interrupt requires a helping hand on
+ * deactivation (no HW deactivation, for example).
+ */
+void vgic_irq_handle_resampling(struct vgic_irq *irq,
+				bool lr_deactivated, bool lr_pending)
+{
+	if (vgic_irq_is_mapped_level(irq)) {
+		bool resample = false;
+
+		if (unlikely(vgic_irq_needs_resampling(irq))) {
+			resample = !(irq->active || irq->pending_latch);
+		} else if (lr_pending || (lr_deactivated && irq->line_level)) {
+			irq->line_level = vgic_get_phys_line_level(irq);
+			resample = !irq->line_level;
+		}
+
+		if (resample)
+			vgic_irq_set_phys_active(irq, false);
+	}
+}
diff --git a/arch/arm64/kvm/vgic/vgic.h b/arch/arm64/kvm/vgic/vgic.h
index dc1f3d1657ee..14a9218641f5 100644
--- a/arch/arm64/kvm/vgic/vgic.h
+++ b/arch/arm64/kvm/vgic/vgic.h
@@ -169,6 +169,8 @@ void vgic_irq_set_phys_active(struct vgic_irq *irq, bool active);
 bool vgic_queue_irq_unlock(struct kvm *kvm, struct vgic_irq *irq,
 			   unsigned long flags);
 void vgic_kick_vcpus(struct kvm *kvm);
+void vgic_irq_handle_resampling(struct vgic_irq *irq,
+				bool lr_deactivated, bool lr_pending);
 
 int vgic_check_ioaddr(struct kvm *kvm, phys_addr_t *ioaddr,
 		      phys_addr_t addr, phys_addr_t alignment);
diff --git a/arch/arm64/lib/Makefile b/arch/arm64/lib/Makefile
index 6dd56a49790a..0941180a86d3 100644
--- a/arch/arm64/lib/Makefile
+++ b/arch/arm64/lib/Makefile
@@ -1,6 +1,6 @@
 # SPDX-License-Identifier: GPL-2.0
 lib-y		:= clear_user.o delay.o copy_from_user.o		\
-		   copy_to_user.o copy_in_user.o copy_page.o		\
+		   copy_to_user.o copy_page.o				\
 		   clear_page.o csum.o insn.o memchr.o memcpy.o		\
 		   memset.o memcmp.o strcmp.o strncmp.o strlen.o	\
 		   strnlen.o strchr.o strrchr.o tishift.o
diff --git a/arch/arm64/lib/copy_in_user.S b/arch/arm64/lib/copy_in_user.S
deleted file mode 100644
index dbea3799c3ef..000000000000
--- a/arch/arm64/lib/copy_in_user.S
+++ /dev/null
@@ -1,77 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0-only */
-/*
- * Copy from user space to user space
- *
- * Copyright (C) 2012 ARM Ltd.
- */
-
-#include <linux/linkage.h>
-
-#include <asm/asm-uaccess.h>
-#include <asm/assembler.h>
-#include <asm/cache.h>
-
-/*
- * Copy from user space to user space (alignment handled by the hardware)
- *
- * Parameters:
- *	x0 - to
- *	x1 - from
- *	x2 - n
- * Returns:
- *	x0 - bytes not copied
- */
-	.macro ldrb1 reg, ptr, val
-	user_ldst 9998f, ldtrb, \reg, \ptr, \val
-	.endm
-
-	.macro strb1 reg, ptr, val
-	user_ldst 9998f, sttrb, \reg, \ptr, \val
-	.endm
-
-	.macro ldrh1 reg, ptr, val
-	user_ldst 9997f, ldtrh, \reg, \ptr, \val
-	.endm
-
-	.macro strh1 reg, ptr, val
-	user_ldst 9997f, sttrh, \reg, \ptr, \val
-	.endm
-
-	.macro ldr1 reg, ptr, val
-	user_ldst 9997f, ldtr, \reg, \ptr, \val
-	.endm
-
-	.macro str1 reg, ptr, val
-	user_ldst 9997f, sttr, \reg, \ptr, \val
-	.endm
-
-	.macro ldp1 reg1, reg2, ptr, val
-	user_ldp 9997f, \reg1, \reg2, \ptr, \val
-	.endm
-
-	.macro stp1 reg1, reg2, ptr, val
-	user_stp 9997f, \reg1, \reg2, \ptr, \val
-	.endm
-
-end	.req	x5
-srcin	.req	x15
-SYM_FUNC_START(__arch_copy_in_user)
-	add	end, x0, x2
-	mov	srcin, x1
-#include "copy_template.S"
-	mov	x0, #0
-	ret
-SYM_FUNC_END(__arch_copy_in_user)
-EXPORT_SYMBOL(__arch_copy_in_user)
-
-	.section .fixup,"ax"
-	.align	2
-9997:	cmp	dst, dstin
-	b.ne	9998f
-	// Before being absolutely sure we couldn't copy anything, try harder
-USER(9998f, ldtrb tmp1w, [srcin])
-USER(9998f, sttrb tmp1w, [dst])
-	add	dst, dst, #1
-9998:	sub	x0, end, dst			// bytes not copied
-	ret
-	.previous
diff --git a/arch/arm64/mm/init.c b/arch/arm64/mm/init.c
index edc8e950bada..37a81754d9b6 100644
--- a/arch/arm64/mm/init.c
+++ b/arch/arm64/mm/init.c
@@ -30,6 +30,7 @@
 #include <linux/crash_dump.h>
 #include <linux/hugetlb.h>
 #include <linux/acpi_iort.h>
+#include <linux/kmemleak.h>
 
 #include <asm/boot.h>
 #include <asm/fixmap.h>
@@ -74,6 +75,7 @@ phys_addr_t arm64_dma_phys_limit __ro_after_init;
 static void __init reserve_crashkernel(void)
 {
 	unsigned long long crash_base, crash_size;
+	unsigned long long crash_max = arm64_dma_phys_limit;
 	int ret;
 
 	ret = parse_crashkernel(boot_command_line, memblock_phys_mem_size(),
@@ -84,37 +86,27 @@ static void __init reserve_crashkernel(void)
 
 	crash_size = PAGE_ALIGN(crash_size);
 
-	if (crash_base == 0) {
-		/* Current arm64 boot protocol requires 2MB alignment */
-		crash_base = memblock_find_in_range(0, arm64_dma_phys_limit,
-				crash_size, SZ_2M);
-		if (crash_base == 0) {
-			pr_warn("cannot allocate crashkernel (size:0x%llx)\n",
-				crash_size);
-			return;
-		}
-	} else {
-		/* User specifies base address explicitly. */
-		if (!memblock_is_region_memory(crash_base, crash_size)) {
-			pr_warn("cannot reserve crashkernel: region is not memory\n");
-			return;
-		}
+	/* User specifies base address explicitly. */
+	if (crash_base)
+		crash_max = crash_base + crash_size;
 
-		if (memblock_is_region_reserved(crash_base, crash_size)) {
-			pr_warn("cannot reserve crashkernel: region overlaps reserved memory\n");
-			return;
-		}
-
-		if (!IS_ALIGNED(crash_base, SZ_2M)) {
-			pr_warn("cannot reserve crashkernel: base address is not 2MB aligned\n");
-			return;
-		}
+	/* Current arm64 boot protocol requires 2MB alignment */
+	crash_base = memblock_phys_alloc_range(crash_size, SZ_2M,
+					       crash_base, crash_max);
+	if (!crash_base) {
+		pr_warn("cannot allocate crashkernel (size:0x%llx)\n",
+			crash_size);
+		return;
 	}
-	memblock_reserve(crash_base, crash_size);
 
 	pr_info("crashkernel reserved: 0x%016llx - 0x%016llx (%lld MB)\n",
 		crash_base, crash_base + crash_size, crash_size >> 20);
 
+	/*
+	 * The crashkernel memory will be removed from the kernel linear
+	 * map. Inform kmemleak so that it won't try to access it.
+	 */
+	kmemleak_ignore_phys(crash_base);
 	crashk_res.start = crash_base;
 	crashk_res.end = crash_base + crash_size - 1;
 }
@@ -236,7 +228,21 @@ early_param("mem", early_mem);
 
 void __init arm64_memblock_init(void)
 {
-	const s64 linear_region_size = PAGE_END - _PAGE_OFFSET(vabits_actual);
+	s64 linear_region_size = PAGE_END - _PAGE_OFFSET(vabits_actual);
+
+	/*
+	 * Corner case: 52-bit VA capable systems running KVM in nVHE mode may
+	 * be limited in their ability to support a linear map that exceeds 51
+	 * bits of VA space, depending on the placement of the ID map. Given
+	 * that the placement of the ID map may be randomized, let's simply
+	 * limit the kernel's linear map to 51 bits as well if we detect this
+	 * configuration.
+	 */
+	if (IS_ENABLED(CONFIG_KVM) && vabits_actual == 52 &&
+	    is_hyp_mode_available() && !is_kernel_in_hyp_mode()) {
+		pr_info("Capping linear region to 51 bits for KVM in nVHE mode on LVA capable hardware.\n");
+		linear_region_size = min_t(u64, linear_region_size, BIT(51));
+	}
 
 	/* Remove memory above our supported physical address size */
 	memblock_remove(1ULL << PHYS_MASK_SHIFT, ULLONG_MAX);
diff --git a/arch/arm64/mm/mmu.c b/arch/arm64/mm/mmu.c
index 9ff0de1b2b93..cfd9deb347c3 100644
--- a/arch/arm64/mm/mmu.c
+++ b/arch/arm64/mm/mmu.c
@@ -1502,8 +1502,7 @@ int arch_add_memory(int nid, u64 start, u64 size,
 	return ret;
 }
 
-void arch_remove_memory(int nid, u64 start, u64 size,
-			struct vmem_altmap *altmap)
+void arch_remove_memory(u64 start, u64 size, struct vmem_altmap *altmap)
 {
 	unsigned long start_pfn = start >> PAGE_SHIFT;
 	unsigned long nr_pages = size >> PAGE_SHIFT;
diff --git a/arch/csky/Kconfig b/arch/csky/Kconfig
index 2716f6395ba7..9d4d898df76b 100644
--- a/arch/csky/Kconfig
+++ b/arch/csky/Kconfig
@@ -82,6 +82,7 @@ config CSKY
 	select PCI_SYSCALL if PCI
 	select PCI_MSI if PCI
 	select SET_FS
+	select TRACE_IRQFLAGS_SUPPORT
 
 config LOCKDEP_SUPPORT
 	def_bool y
@@ -139,9 +140,6 @@ config STACKTRACE_SUPPORT
 config TIME_LOW_RES
 	def_bool y
 
-config TRACE_IRQFLAGS_SUPPORT
-	def_bool y
-
 config CPU_TLB_SIZE
 	int
 	default "128"	if (CPU_CK610 || CPU_CK807 || CPU_CK810)
diff --git a/arch/csky/abiv1/cacheflush.c b/arch/csky/abiv1/cacheflush.c
index 07ff17ea33de..fb91b069dc69 100644
--- a/arch/csky/abiv1/cacheflush.c
+++ b/arch/csky/abiv1/cacheflush.c
@@ -56,17 +56,6 @@ void update_mmu_cache(struct vm_area_struct *vma, unsigned long addr,
 	}
 }
 
-void flush_kernel_dcache_page(struct page *page)
-{
-	struct address_space *mapping;
-
-	mapping = page_mapping_file(page);
-
-	if (!mapping || mapping_mapped(mapping))
-		dcache_wbinv_all();
-}
-EXPORT_SYMBOL(flush_kernel_dcache_page);
-
 void flush_cache_range(struct vm_area_struct *vma, unsigned long start,
 	unsigned long end)
 {
diff --git a/arch/csky/abiv1/inc/abi/cacheflush.h b/arch/csky/abiv1/inc/abi/cacheflush.h
index 6cab7afae962..ed62e2066ba7 100644
--- a/arch/csky/abiv1/inc/abi/cacheflush.h
+++ b/arch/csky/abiv1/inc/abi/cacheflush.h
@@ -14,12 +14,10 @@ extern void flush_dcache_page(struct page *);
 #define flush_cache_page(vma, page, pfn)	cache_wbinv_all()
 #define flush_cache_dup_mm(mm)			cache_wbinv_all()
 
-#define ARCH_HAS_FLUSH_KERNEL_DCACHE_PAGE
-extern void flush_kernel_dcache_page(struct page *);
-
 #define flush_dcache_mmap_lock(mapping)		xa_lock_irq(&mapping->i_pages)
 #define flush_dcache_mmap_unlock(mapping)	xa_unlock_irq(&mapping->i_pages)
 
+#define ARCH_IMPLEMENTS_FLUSH_KERNEL_VMAP_RANGE 1
 static inline void flush_kernel_vmap_range(void *addr, int size)
 {
 	dcache_wbinv_all();
diff --git a/arch/csky/kernel/probes/kprobes.c b/arch/csky/kernel/probes/kprobes.c
index 68b22b499aeb..8fffa34d4e1c 100644
--- a/arch/csky/kernel/probes/kprobes.c
+++ b/arch/csky/kernel/probes/kprobes.c
@@ -283,8 +283,7 @@ int __kprobes kprobe_fault_handler(struct pt_regs *regs, unsigned int trapnr)
 		 * normal page fault.
 		 */
 		regs->pc = (unsigned long) cur->addr;
-		if (!instruction_pointer(regs))
-			BUG();
+		BUG_ON(!instruction_pointer(regs));
 
 		if (kcb->kprobe_status == KPROBE_REENTER)
 			restore_previous_kprobe(kcb);
diff --git a/arch/h8300/kernel/traps.c b/arch/h8300/kernel/traps.c
index 5d8b969cd8f3..bdbe988d8dbc 100644
--- a/arch/h8300/kernel/traps.c
+++ b/arch/h8300/kernel/traps.c
@@ -39,10 +39,6 @@ void __init base_trap_init(void)
 {
 }
 
-void __init trap_init(void)
-{
-}
-
 asmlinkage void set_esp0(unsigned long ssp)
 {
 	current->thread.esp0 = ssp;
diff --git a/arch/hexagon/Kconfig b/arch/hexagon/Kconfig
index e5a852080730..15dd8f38b698 100644
--- a/arch/hexagon/Kconfig
+++ b/arch/hexagon/Kconfig
@@ -7,6 +7,7 @@ config HEXAGON
 	select ARCH_32BIT_OFF_T
 	select ARCH_HAS_SYNC_DMA_FOR_DEVICE
 	select ARCH_NO_PREEMPT
+	select DMA_GLOBAL_POOL
 	# Other pending projects/to-do items.
 	# select HAVE_REGS_AND_STACK_ACCESS_API
 	# select HAVE_HW_BREAKPOINT if PERF_EVENTS
@@ -31,6 +32,7 @@ config HEXAGON
 	select GENERIC_CPU_DEVICES
 	select SET_FS
 	select ARCH_WANT_LD_ORPHAN_WARN
+	select TRACE_IRQFLAGS_SUPPORT
 	help
 	  Qualcomm Hexagon is a processor architecture designed for high
 	  performance and low power across a wide variety of applications.
@@ -52,9 +54,6 @@ config EARLY_PRINTK
 config MMU
 	def_bool y
 
-config TRACE_IRQFLAGS_SUPPORT
-	def_bool y
-
 config GENERIC_CSUM
 	def_bool y
 
diff --git a/arch/hexagon/kernel/dma.c b/arch/hexagon/kernel/dma.c
index 00b9a81075dd..882680e81a30 100644
--- a/arch/hexagon/kernel/dma.c
+++ b/arch/hexagon/kernel/dma.c
@@ -7,54 +7,8 @@
 
 #include <linux/dma-map-ops.h>
 #include <linux/memblock.h>
-#include <linux/genalloc.h>
-#include <linux/module.h>
 #include <asm/page.h>
 
-static struct gen_pool *coherent_pool;
-
-
-/* Allocates from a pool of uncached memory that was reserved at boot time */
-
-void *arch_dma_alloc(struct device *dev, size_t size, dma_addr_t *dma_addr,
-		gfp_t flag, unsigned long attrs)
-{
-	void *ret;
-
-	/*
-	 * Our max_low_pfn should have been backed off by 16MB in
-	 * mm/init.c to create DMA coherent space.  Use that as the VA
-	 * for the pool.
-	 */
-
-	if (coherent_pool == NULL) {
-		coherent_pool = gen_pool_create(PAGE_SHIFT, -1);
-
-		if (coherent_pool == NULL)
-			panic("Can't create %s() memory pool!", __func__);
-		else
-			gen_pool_add(coherent_pool,
-				(unsigned long)pfn_to_virt(max_low_pfn),
-				hexagon_coherent_pool_size, -1);
-	}
-
-	ret = (void *) gen_pool_alloc(coherent_pool, size);
-
-	if (ret) {
-		memset(ret, 0, size);
-		*dma_addr = (dma_addr_t) virt_to_phys(ret);
-	} else
-		*dma_addr = ~0;
-
-	return ret;
-}
-
-void arch_dma_free(struct device *dev, size_t size, void *vaddr,
-		dma_addr_t dma_addr, unsigned long attrs)
-{
-	gen_pool_free(coherent_pool, (unsigned long) vaddr, size);
-}
-
 void arch_sync_dma_for_device(phys_addr_t paddr, size_t size,
 		enum dma_data_direction dir)
 {
@@ -77,3 +31,14 @@ void arch_sync_dma_for_device(phys_addr_t paddr, size_t size,
 		BUG();
 	}
 }
+
+/*
+ * Our max_low_pfn should have been backed off by 16MB in mm/init.c to create
+ * DMA coherent space.  Use that for the pool.
+ */
+static int __init hexagon_dma_init(void)
+{
+	return dma_init_global_coherent(PFN_PHYS(max_low_pfn),
+					hexagon_coherent_pool_size);
+}
+core_initcall(hexagon_dma_init);
diff --git a/arch/hexagon/kernel/traps.c b/arch/hexagon/kernel/traps.c
index 904134b37232..edfc35dafeb1 100644
--- a/arch/hexagon/kernel/traps.c
+++ b/arch/hexagon/kernel/traps.c
@@ -28,10 +28,6 @@
 #define TRAP_SYSCALL	1
 #define TRAP_DEBUG	0xdb
 
-void __init trap_init(void)
-{
-}
-
 #ifdef CONFIG_GENERIC_BUG
 /* Maybe should resemble arch/sh/kernel/traps.c ?? */
 int is_valid_bugaddr(unsigned long addr)
diff --git a/arch/ia64/Kbuild b/arch/ia64/Kbuild
index a4e40e534e6a..e77cc76d228c 100644
--- a/arch/ia64/Kbuild
+++ b/arch/ia64/Kbuild
@@ -1 +1,3 @@
 # SPDX-License-Identifier: GPL-2.0-only
+obj-y				+= kernel/ mm/
+obj-$(CONFIG_IA64_SGI_UV)	+= uv/
diff --git a/arch/ia64/Makefile b/arch/ia64/Makefile
index 467b7e7f967c..7e548c654a29 100644
--- a/arch/ia64/Makefile
+++ b/arch/ia64/Makefile
@@ -47,8 +47,6 @@ KBUILD_CFLAGS += $(cflags-y)
 head-y := arch/ia64/kernel/head.o
 
 libs-y				+= arch/ia64/lib/
-core-y				+= arch/ia64/kernel/ arch/ia64/mm/
-core-$(CONFIG_IA64_SGI_UV)	+= arch/ia64/uv/
 
 drivers-y			+= arch/ia64/pci/ arch/ia64/hp/common/
 
diff --git a/arch/ia64/hp/common/sba_iommu.c b/arch/ia64/hp/common/sba_iommu.c
index 9148ddbf02e5..8ad6946521d8 100644
--- a/arch/ia64/hp/common/sba_iommu.c
+++ b/arch/ia64/hp/common/sba_iommu.c
@@ -1459,7 +1459,7 @@ static int sba_map_sg_attrs(struct device *dev, struct scatterlist *sglist,
 		sglist->dma_address = sba_map_page(dev, sg_page(sglist),
 				sglist->offset, sglist->length, dir, attrs);
 		if (dma_mapping_error(dev, sglist->dma_address))
-			return 0;
+			return -EIO;
 		return 1;
 	}
 
@@ -1486,7 +1486,7 @@ static int sba_map_sg_attrs(struct device *dev, struct scatterlist *sglist,
 	coalesced = sba_coalesce_chunks(ioc, dev, sglist, nents);
 	if (coalesced < 0) {
 		sba_unmap_sg_attrs(dev, sglist, nents, dir, attrs);
-		return 0;
+		return -ENOMEM;
 	}
 
 	/*
diff --git a/arch/ia64/include/asm/meminit.h b/arch/ia64/include/asm/meminit.h
index 6c47a239fc26..f1d5bf2ba847 100644
--- a/arch/ia64/include/asm/meminit.h
+++ b/arch/ia64/include/asm/meminit.h
@@ -29,7 +29,6 @@ struct rsvd_region {
 };
 
 extern struct rsvd_region rsvd_region[IA64_MAX_RSVD_REGIONS + 1];
-extern int num_rsvd_regions;
 
 extern void find_memory (void);
 extern void reserve_memory (void);
@@ -40,7 +39,6 @@ extern unsigned long efi_memmap_init(u64 *s, u64 *e);
 extern int find_max_min_low_pfn (u64, u64, void *);
 
 extern unsigned long vmcore_find_descriptor_size(unsigned long address);
-extern int reserve_elfcorehdr(u64 *start, u64 *end);
 
 /*
  * For rounding an address to the next IA64_GRANULE_SIZE or order
diff --git a/arch/ia64/kernel/acpi.c b/arch/ia64/kernel/acpi.c
index e2af6b172200..96d13cb7c19f 100644
--- a/arch/ia64/kernel/acpi.c
+++ b/arch/ia64/kernel/acpi.c
@@ -906,6 +906,6 @@ EXPORT_SYMBOL(acpi_unregister_ioapic);
 /*
  * acpi_suspend_lowlevel() - save kernel state and suspend.
  *
- * TBD when when IA64 starts to support suspend...
+ * TBD when IA64 starts to support suspend...
  */
 int acpi_suspend_lowlevel(void) { return 0; }
diff --git a/arch/ia64/kernel/setup.c b/arch/ia64/kernel/setup.c
index dd595fbd8006..31fb84de2d21 100644
--- a/arch/ia64/kernel/setup.c
+++ b/arch/ia64/kernel/setup.c
@@ -131,7 +131,7 @@ unsigned long ia64_cache_stride_shift = ~0;
  * We use a special marker for the end of memory and it uses the extra (+1) slot
  */
 struct rsvd_region rsvd_region[IA64_MAX_RSVD_REGIONS + 1] __initdata;
-int num_rsvd_regions __initdata;
+static int num_rsvd_regions __initdata;
 
 
 /*
@@ -325,6 +325,31 @@ static inline void __init setup_crashkernel(unsigned long total, int *n)
 {}
 #endif
 
+#ifdef CONFIG_CRASH_DUMP
+static int __init reserve_elfcorehdr(u64 *start, u64 *end)
+{
+	u64 length;
+
+	/* We get the address using the kernel command line,
+	 * but the size is extracted from the EFI tables.
+	 * Both address and size are required for reservation
+	 * to work properly.
+	 */
+
+	if (!is_vmcore_usable())
+		return -EINVAL;
+
+	if ((length = vmcore_find_descriptor_size(elfcorehdr_addr)) == 0) {
+		vmcore_unusable();
+		return -EINVAL;
+	}
+
+	*start = (unsigned long)__va(elfcorehdr_addr);
+	*end = *start + length;
+	return 0;
+}
+#endif /* CONFIG_CRASH_DUMP */
+
 /**
  * reserve_memory - setup reserved memory areas
  *
@@ -522,32 +547,6 @@ static __init int setup_nomca(char *s)
 }
 early_param("nomca", setup_nomca);
 
-#ifdef CONFIG_CRASH_DUMP
-int __init reserve_elfcorehdr(u64 *start, u64 *end)
-{
-	u64 length;
-
-	/* We get the address using the kernel command line,
-	 * but the size is extracted from the EFI tables.
-	 * Both address and size are required for reservation
-	 * to work properly.
-	 */
-
-	if (!is_vmcore_usable())
-		return -EINVAL;
-
-	if ((length = vmcore_find_descriptor_size(elfcorehdr_addr)) == 0) {
-		vmcore_unusable();
-		return -EINVAL;
-	}
-
-	*start = (unsigned long)__va(elfcorehdr_addr);
-	*end = *start + length;
-	return 0;
-}
-
-#endif /* CONFIG_PROC_VMCORE */
-
 void __init
 setup_arch (char **cmdline_p)
 {
diff --git a/arch/ia64/kernel/syscalls/syscall.tbl b/arch/ia64/kernel/syscalls/syscall.tbl
index 4b20224b14d9..6fea1844fb95 100644
--- a/arch/ia64/kernel/syscalls/syscall.tbl
+++ b/arch/ia64/kernel/syscalls/syscall.tbl
@@ -367,3 +367,5 @@
 444	common	landlock_create_ruleset		sys_landlock_create_ruleset
 445	common	landlock_add_rule		sys_landlock_add_rule
 446	common	landlock_restrict_self		sys_landlock_restrict_self
+# 447 reserved for memfd_secret
+448	common	process_mrelease		sys_process_mrelease
diff --git a/arch/ia64/mm/init.c b/arch/ia64/mm/init.c
index 064a967a7b6e..5c6da8d83c1a 100644
--- a/arch/ia64/mm/init.c
+++ b/arch/ia64/mm/init.c
@@ -484,8 +484,7 @@ int arch_add_memory(int nid, u64 start, u64 size,
 	return ret;
 }
 
-void arch_remove_memory(int nid, u64 start, u64 size,
-			struct vmem_altmap *altmap)
+void arch_remove_memory(u64 start, u64 size, struct vmem_altmap *altmap)
 {
 	unsigned long start_pfn = start >> PAGE_SHIFT;
 	unsigned long nr_pages = size >> PAGE_SHIFT;
diff --git a/arch/m68k/kernel/syscalls/syscall.tbl b/arch/m68k/kernel/syscalls/syscall.tbl
index 3ec1291c268d..7976dff8f879 100644
--- a/arch/m68k/kernel/syscalls/syscall.tbl
+++ b/arch/m68k/kernel/syscalls/syscall.tbl
@@ -446,3 +446,5 @@
 444	common	landlock_create_ruleset		sys_landlock_create_ruleset
 445	common	landlock_add_rule		sys_landlock_add_rule
 446	common	landlock_restrict_self		sys_landlock_restrict_self
+# 447 reserved for memfd_secret
+448	common	process_mrelease		sys_process_mrelease
diff --git a/arch/microblaze/Kbuild b/arch/microblaze/Kbuild
index a4e40e534e6a..a1c597889319 100644
--- a/arch/microblaze/Kbuild
+++ b/arch/microblaze/Kbuild
@@ -1 +1,5 @@
 # SPDX-License-Identifier: GPL-2.0-only
+obj-y			+= kernel/
+obj-y			+= mm/
+obj-$(CONFIG_PCI)	+= pci/
+obj-y			+= boot/dts/
diff --git a/arch/microblaze/Kconfig b/arch/microblaze/Kconfig
index 14a67a42fcae..59798e43cdb0 100644
--- a/arch/microblaze/Kconfig
+++ b/arch/microblaze/Kconfig
@@ -44,6 +44,7 @@ config MICROBLAZE
 	select SPARSE_IRQ
 	select SET_FS
 	select ZONE_DMA
+	select TRACE_IRQFLAGS_SUPPORT
 
 # Endianness selection
 choice
diff --git a/arch/microblaze/Kconfig.debug b/arch/microblaze/Kconfig.debug
index 865527ac332a..a4e40e534e6a 100644
--- a/arch/microblaze/Kconfig.debug
+++ b/arch/microblaze/Kconfig.debug
@@ -1,6 +1 @@
 # SPDX-License-Identifier: GPL-2.0-only
-# For a description of the syntax of this configuration file,
-# see Documentation/kbuild/kconfig-language.rst.
-
-config TRACE_IRQFLAGS_SUPPORT
-	def_bool y
diff --git a/arch/microblaze/Makefile b/arch/microblaze/Makefile
index 6d4af39e3890..9adc6b6434df 100644
--- a/arch/microblaze/Makefile
+++ b/arch/microblaze/Makefile
@@ -50,17 +50,12 @@ KBUILD_CFLAGS += -ffixed-r31 $(CPUFLAGS-y) $(CPUFLAGS-1) $(CPUFLAGS-2)
 
 head-y := arch/microblaze/kernel/head.o
 libs-y += arch/microblaze/lib/
-core-y += arch/microblaze/kernel/
-core-y += arch/microblaze/mm/
-core-$(CONFIG_PCI) += arch/microblaze/pci/
 
 boot := arch/microblaze/boot
 
 # Are we making a simpleImage.<boardname> target? If so, crack out the boardname
 DTB:=$(subst simpleImage.,,$(filter simpleImage.%, $(MAKECMDGOALS)))
 
-core-y	+= $(boot)/dts/
-
 export DTB
 
 all: linux.bin
diff --git a/arch/microblaze/include/asm/page.h b/arch/microblaze/include/asm/page.h
index ce550978f4fc..4b8b2fa78fc5 100644
--- a/arch/microblaze/include/asm/page.h
+++ b/arch/microblaze/include/asm/page.h
@@ -112,8 +112,7 @@ extern int page_is_ram(unsigned long pfn);
 #  define page_to_phys(page)     (page_to_pfn(page) << PAGE_SHIFT)
 
 #  define ARCH_PFN_OFFSET	(memory_start >> PAGE_SHIFT)
-#  define pfn_valid(pfn)	((pfn) < (max_mapnr + ARCH_PFN_OFFSET))
-
+#  define pfn_valid(pfn)	((pfn) >= ARCH_PFN_OFFSET && (pfn) < (max_mapnr + ARCH_PFN_OFFSET))
 # endif /* __ASSEMBLY__ */
 
 #define	virt_addr_valid(vaddr)	(pfn_valid(virt_to_pfn(vaddr)))
diff --git a/arch/microblaze/include/asm/pgtable.h b/arch/microblaze/include/asm/pgtable.h
index 71cd547655d9..c136a01e467e 100644
--- a/arch/microblaze/include/asm/pgtable.h
+++ b/arch/microblaze/include/asm/pgtable.h
@@ -443,8 +443,6 @@ extern int mem_init_done;
 
 asmlinkage void __init mmu_init(void);
 
-void __init *early_get_page(void);
-
 #endif /* __ASSEMBLY__ */
 #endif /* __KERNEL__ */
 
diff --git a/arch/microblaze/kernel/syscalls/syscall.tbl b/arch/microblaze/kernel/syscalls/syscall.tbl
index 9be3ace12938..6b0e11362bd2 100644
--- a/arch/microblaze/kernel/syscalls/syscall.tbl
+++ b/arch/microblaze/kernel/syscalls/syscall.tbl
@@ -452,3 +452,5 @@
 444	common	landlock_create_ruleset		sys_landlock_create_ruleset
 445	common	landlock_add_rule		sys_landlock_add_rule
 446	common	landlock_restrict_self		sys_landlock_restrict_self
+# 447 reserved for memfd_secret
+448	common	process_mrelease		sys_process_mrelease
diff --git a/arch/microblaze/mm/init.c b/arch/microblaze/mm/init.c
index ab55c70380a5..952f35b335b2 100644
--- a/arch/microblaze/mm/init.c
+++ b/arch/microblaze/mm/init.c
@@ -265,18 +265,6 @@ asmlinkage void __init mmu_init(void)
 	dma_contiguous_reserve(memory_start + lowmem_size - 1);
 }
 
-/* This is only called until mem_init is done. */
-void __init *early_get_page(void)
-{
-	/*
-	 * Mem start + kernel_tlb -> here is limit
-	 * because of mem mapping from head.S
-	 */
-	return memblock_alloc_try_nid_raw(PAGE_SIZE, PAGE_SIZE,
-				MEMBLOCK_LOW_LIMIT, memory_start + kernel_tlb,
-				NUMA_NO_NODE);
-}
-
 void * __ref zalloc_maybe_bootmem(size_t size, gfp_t mask)
 {
 	void *p;
diff --git a/arch/microblaze/mm/pgtable.c b/arch/microblaze/mm/pgtable.c
index 38ccb909bc9d..c1833b159d3b 100644
--- a/arch/microblaze/mm/pgtable.c
+++ b/arch/microblaze/mm/pgtable.c
@@ -33,6 +33,7 @@
 #include <linux/init.h>
 #include <linux/mm_types.h>
 #include <linux/pgtable.h>
+#include <linux/memblock.h>
 
 #include <asm/pgalloc.h>
 #include <linux/io.h>
@@ -242,15 +243,13 @@ unsigned long iopa(unsigned long addr)
 
 __ref pte_t *pte_alloc_one_kernel(struct mm_struct *mm)
 {
-	pte_t *pte;
-	if (mem_init_done) {
-		pte = (pte_t *)__get_free_page(GFP_KERNEL | __GFP_ZERO);
-	} else {
-		pte = (pte_t *)early_get_page();
-		if (pte)
-			clear_page(pte);
-	}
-	return pte;
+	if (mem_init_done)
+		return (pte_t *)__get_free_page(GFP_KERNEL | __GFP_ZERO);
+	else
+		return memblock_alloc_try_nid(PAGE_SIZE, PAGE_SIZE,
+					      MEMBLOCK_LOW_LIMIT,
+					      memory_start + kernel_tlb,
+					      NUMA_NO_NODE);
 }
 
 void __set_fixmap(enum fixed_addresses idx, phys_addr_t phys, pgprot_t flags)
diff --git a/arch/mips/Kbuild.platforms b/arch/mips/Kbuild.platforms
index e4f6e49417a9..584081df89c2 100644
--- a/arch/mips/Kbuild.platforms
+++ b/arch/mips/Kbuild.platforms
@@ -21,7 +21,6 @@ platform-$(CONFIG_MIPS_MALTA)		+= mti-malta/
 platform-$(CONFIG_MACH_NINTENDO64)	+= n64/
 platform-$(CONFIG_NLM_COMMON)		+= netlogic/
 platform-$(CONFIG_PIC32MZDA)		+= pic32/
-platform-$(CONFIG_MACH_PISTACHIO)	+= pistachio/
 platform-$(CONFIG_RALINK)		+= ralink/
 platform-$(CONFIG_MIKROTIK_RB532)	+= rb532/
 platform-$(CONFIG_SGI_IP22)		+= sgi-ip22/
diff --git a/arch/mips/Kconfig b/arch/mips/Kconfig
index 24e374266fdc..771ca53af06d 100644
--- a/arch/mips/Kconfig
+++ b/arch/mips/Kconfig
@@ -98,6 +98,7 @@ config MIPS
 	select PCI_MSI_ARCH_FALLBACKS if PCI_MSI
 	select RTC_LIB
 	select SYSCTL_EXCEPTION_TRACE
+	select TRACE_IRQFLAGS_SUPPORT
 	select VIRT_TO_BUS
 	select ARCH_HAS_ELFCORE_COMPAT
 
@@ -514,35 +515,6 @@ config MACH_LOONGSON64
 	  and Loongson-2F which will be removed), developed by the Institute
 	  of Computing Technology (ICT), Chinese Academy of Sciences (CAS).
 
-config MACH_PISTACHIO
-	bool "IMG Pistachio SoC based boards"
-	select BOOT_ELF32
-	select BOOT_RAW
-	select CEVT_R4K
-	select CLKSRC_MIPS_GIC
-	select COMMON_CLK
-	select CSRC_R4K
-	select DMA_NONCOHERENT
-	select GPIOLIB
-	select IRQ_MIPS_CPU
-	select MFD_SYSCON
-	select MIPS_CPU_SCACHE
-	select MIPS_GIC
-	select PINCTRL
-	select REGULATOR
-	select SYS_HAS_CPU_MIPS32_R2
-	select SYS_SUPPORTS_32BIT_KERNEL
-	select SYS_SUPPORTS_LITTLE_ENDIAN
-	select SYS_SUPPORTS_MIPS_CPS
-	select SYS_SUPPORTS_MULTITHREADING
-	select SYS_SUPPORTS_RELOCATABLE
-	select SYS_SUPPORTS_ZBOOT
-	select SYS_HAS_EARLY_PRINTK
-	select USE_GENERIC_EARLY_PRINTK_8250
-	select USE_OF
-	help
-	  This enables support for the IMG Pistachio SoC platform.
-
 config MIPS_MALTA
 	bool "MIPS Malta board"
 	select ARCH_MAY_HAVE_PC_FDC
@@ -1089,7 +1061,6 @@ source "arch/mips/ingenic/Kconfig"
 source "arch/mips/jazz/Kconfig"
 source "arch/mips/lantiq/Kconfig"
 source "arch/mips/pic32/Kconfig"
-source "arch/mips/pistachio/Kconfig"
 source "arch/mips/ralink/Kconfig"
 source "arch/mips/sgi-ip27/Kconfig"
 source "arch/mips/sibyte/Kconfig"
diff --git a/arch/mips/Kconfig.debug b/arch/mips/Kconfig.debug
index 43dbf5930796..f4ae7900fcd3 100644
--- a/arch/mips/Kconfig.debug
+++ b/arch/mips/Kconfig.debug
@@ -1,9 +1,5 @@
 # SPDX-License-Identifier: GPL-2.0
 
-config TRACE_IRQFLAGS_SUPPORT
-	bool
-	default y
-
 config EARLY_PRINTK
 	bool "Early printk" if EXPERT
 	depends on SYS_HAS_EARLY_PRINTK
diff --git a/arch/mips/Makefile b/arch/mips/Makefile
index 653befc1b176..ea3cd080a1c7 100644
--- a/arch/mips/Makefile
+++ b/arch/mips/Makefile
@@ -254,7 +254,7 @@ endif
 #
 # Board-dependent options and extra files
 #
-include arch/mips/Kbuild.platforms
+include $(srctree)/arch/mips/Kbuild.platforms
 
 ifdef CONFIG_PHYSICAL_START
 load-y					= $(CONFIG_PHYSICAL_START)
@@ -560,6 +560,9 @@ sead3micro_defconfig-y		:= micro32r2el_defconfig BOARDS=sead-3
 legacy_defconfigs		+= xilfpga_defconfig
 xilfpga_defconfig-y		:= 32r2el_defconfig BOARDS=xilfpga
 
+legacy_defconfigs		+= pistachio_defconfig
+pistachio_defconfig-y		:= 32r2el_defconfig BOARDS=marduk
+
 .PHONY: $(legacy_defconfigs)
 $(legacy_defconfigs):
 	$(Q)$(MAKE) -f $(srctree)/Makefile $($@-y)
diff --git a/arch/mips/alchemy/devboards/db1200.c b/arch/mips/alchemy/devboards/db1200.c
index 421d651433b6..1864eb935ca5 100644
--- a/arch/mips/alchemy/devboards/db1200.c
+++ b/arch/mips/alchemy/devboards/db1200.c
@@ -835,7 +835,7 @@ int __init db1200_dev_setup(void)
 	if (!IS_ERR(c)) {
 		pfc = clk_round_rate(c, 50000000);
 		if ((pfc < 1) || (abs(50000000 - pfc) > 2500000))
-			pr_warn("DB1200: cant get I2C close to 50MHz\n");
+			pr_warn("DB1200: can't get I2C close to 50MHz\n");
 		else
 			clk_set_rate(c, pfc);
 		clk_prepare_enable(c);
diff --git a/arch/mips/boot/dts/Makefile b/arch/mips/boot/dts/Makefile
index 60bd7d2a9ad8..be96d35eb582 100644
--- a/arch/mips/boot/dts/Makefile
+++ b/arch/mips/boot/dts/Makefile
@@ -1,7 +1,7 @@
 # SPDX-License-Identifier: GPL-2.0
 subdir-$(CONFIG_BMIPS_GENERIC)		+= brcm
 subdir-$(CONFIG_CAVIUM_OCTEON_SOC)	+= cavium-octeon
-subdir-$(CONFIG_MACH_PISTACHIO)		+= img
+subdir-$(CONFIG_FIT_IMAGE_FDT_MARDUK)   += img
 subdir-$(CONFIG_FIT_IMAGE_FDT_BOSTON)	+= img
 subdir-$(CONFIG_MACH_INGENIC)		+= ingenic
 subdir-$(CONFIG_LANTIQ)			+= lantiq
diff --git a/arch/mips/boot/dts/img/Makefile b/arch/mips/boot/dts/img/Makefile
index 441a3c16efb0..ebb47490b04b 100644
--- a/arch/mips/boot/dts/img/Makefile
+++ b/arch/mips/boot/dts/img/Makefile
@@ -1,5 +1,4 @@
 # SPDX-License-Identifier: GPL-2.0
 dtb-$(CONFIG_FIT_IMAGE_FDT_BOSTON)	+= boston.dtb
 
-dtb-$(CONFIG_MACH_PISTACHIO)	+= pistachio_marduk.dtb
-obj-$(CONFIG_MACH_PISTACHIO)	+= pistachio_marduk.dtb.o
+dtb-$(CONFIG_FIT_IMAGE_FDT_MARDUK)	+= pistachio_marduk.dtb
diff --git a/arch/mips/boot/dts/img/pistachio.dtsi b/arch/mips/boot/dts/img/pistachio.dtsi
index dc3b7909de73..b1db8b8f446f 100644
--- a/arch/mips/boot/dts/img/pistachio.dtsi
+++ b/arch/mips/boot/dts/img/pistachio.dtsi
@@ -900,6 +900,16 @@
 		};
 	};
 
+	cpc: cpc@1bde0000 {
+		compatible = "mti,mips-cpc";
+		reg = <0x1bde0000 0x10000>;
+	};
+
+	cdmm: cdmm@1bdf0000 {
+		compatible = "mti,mips-cdmm";
+		reg = <0x1bdf0000 0x10000>;
+	};
+
 	usb_phy: usb-phy {
 		compatible = "img,pistachio-usb-phy";
 		clocks = <&clk_core CLK_USB_PHY>;
diff --git a/arch/mips/boot/dts/mscc/ocelot.dtsi b/arch/mips/boot/dts/mscc/ocelot.dtsi
index 535a98284dcb..e51db651af13 100644
--- a/arch/mips/boot/dts/mscc/ocelot.dtsi
+++ b/arch/mips/boot/dts/mscc/ocelot.dtsi
@@ -150,36 +150,47 @@
 
 				port0: port@0 {
 					reg = <0>;
+					status = "disabled";
 				};
 				port1: port@1 {
 					reg = <1>;
+					status = "disabled";
 				};
 				port2: port@2 {
 					reg = <2>;
+					status = "disabled";
 				};
 				port3: port@3 {
 					reg = <3>;
+					status = "disabled";
 				};
 				port4: port@4 {
 					reg = <4>;
+					status = "disabled";
 				};
 				port5: port@5 {
 					reg = <5>;
+					status = "disabled";
 				};
 				port6: port@6 {
 					reg = <6>;
+					status = "disabled";
 				};
 				port7: port@7 {
 					reg = <7>;
+					status = "disabled";
 				};
 				port8: port@8 {
 					reg = <8>;
+					status = "disabled";
 				};
 				port9: port@9 {
 					reg = <9>;
+					status = "disabled";
 				};
 				port10: port@10 {
 					reg = <10>;
+					status = "disabled";
 				};
 			};
 		};
diff --git a/arch/mips/boot/dts/mscc/ocelot_pcb120.dts b/arch/mips/boot/dts/mscc/ocelot_pcb120.dts
index 897de5025d7f..bd240690cb37 100644
--- a/arch/mips/boot/dts/mscc/ocelot_pcb120.dts
+++ b/arch/mips/boot/dts/mscc/ocelot_pcb120.dts
@@ -69,40 +69,52 @@
 };
 
 &port0 {
+	status = "okay";
 	phy-handle = <&phy0>;
+	phy-mode = "internal";
 };
 
 &port1 {
+	status = "okay";
 	phy-handle = <&phy1>;
+	phy-mode = "internal";
 };
 
 &port2 {
+	status = "okay";
 	phy-handle = <&phy2>;
+	phy-mode = "internal";
 };
 
 &port3 {
+	status = "okay";
 	phy-handle = <&phy3>;
+	phy-mode = "internal";
 };
 
 &port4 {
+	status = "okay";
 	phy-handle = <&phy7>;
 	phy-mode = "sgmii";
 	phys = <&serdes 4 SERDES1G(2)>;
 };
 
 &port5 {
+	status = "okay";
 	phy-handle = <&phy4>;
 	phy-mode = "sgmii";
 	phys = <&serdes 5 SERDES1G(5)>;
 };
 
 &port6 {
+	status = "okay";
 	phy-handle = <&phy6>;
 	phy-mode = "sgmii";
 	phys = <&serdes 6 SERDES1G(3)>;
 };
 
 &port9 {
+	status = "okay";
 	phy-handle = <&phy5>;
 	phy-mode = "sgmii";
 	phys = <&serdes 9 SERDES1G(4)>;
diff --git a/arch/mips/boot/dts/mscc/ocelot_pcb123.dts b/arch/mips/boot/dts/mscc/ocelot_pcb123.dts
index ef852f382da8..0185045c7630 100644
--- a/arch/mips/boot/dts/mscc/ocelot_pcb123.dts
+++ b/arch/mips/boot/dts/mscc/ocelot_pcb123.dts
@@ -47,17 +47,25 @@
 };
 
 &port0 {
+	status = "okay";
 	phy-handle = <&phy0>;
+	phy-mode = "internal";
 };
 
 &port1 {
+	status = "okay";
 	phy-handle = <&phy1>;
+	phy-mode = "internal";
 };
 
 &port2 {
+	status = "okay";
 	phy-handle = <&phy2>;
+	phy-mode = "internal";
 };
 
 &port3 {
+	status = "okay";
 	phy-handle = <&phy3>;
+	phy-mode = "internal";
 };
diff --git a/arch/mips/cavium-octeon/executive/cvmx-bootmem.c b/arch/mips/cavium-octeon/executive/cvmx-bootmem.c
index e794b2d53adf..b63ad5d42cc7 100644
--- a/arch/mips/cavium-octeon/executive/cvmx-bootmem.c
+++ b/arch/mips/cavium-octeon/executive/cvmx-bootmem.c
@@ -44,7 +44,7 @@ static struct cvmx_bootmem_desc *cvmx_bootmem_desc;
 
 /* See header file for descriptions of functions */
 
-/**
+/*
  * This macro returns a member of the
  * cvmx_bootmem_named_block_desc_t structure. These members can't
  * be directly addressed as they might be in memory not directly
@@ -60,7 +60,7 @@ static struct cvmx_bootmem_desc *cvmx_bootmem_desc;
 		offsetof(struct cvmx_bootmem_named_block_desc, field),	\
 		sizeof_field(struct cvmx_bootmem_named_block_desc, field))
 
-/**
+/*
  * This function is the implementation of the get macros defined
  * for individual structure members. The argument are generated
  * by the macros inorder to read only the needed memory.
@@ -115,7 +115,7 @@ static uint64_t cvmx_bootmem_phy_get_next(uint64_t addr)
 	return cvmx_read64_uint64((addr + NEXT_OFFSET) | (1ull << 63));
 }
 
-/**
+/*
  * Allocate a block of memory from the free list that was
  * passed to the application by the bootloader within a specified
  * address range. This is an allocate-only algorithm, so
@@ -550,7 +550,7 @@ bootmem_free_done:
 
 }
 
-/**
+/*
  * Finds a named memory block by name.
  * Also used for finding an unused entry in the named block table.
  *
@@ -657,7 +657,7 @@ struct cvmx_bootmem_named_block_desc *cvmx_bootmem_find_named_block(char *name)
 }
 EXPORT_SYMBOL(cvmx_bootmem_find_named_block);
 
-/**
+/*
  * Frees a named block.
  *
  * @name:   name of block to free
diff --git a/arch/mips/cavium-octeon/executive/cvmx-cmd-queue.c b/arch/mips/cavium-octeon/executive/cvmx-cmd-queue.c
index 3839feba68f2..20189e9ad94d 100644
--- a/arch/mips/cavium-octeon/executive/cvmx-cmd-queue.c
+++ b/arch/mips/cavium-octeon/executive/cvmx-cmd-queue.c
@@ -42,14 +42,14 @@
 #include <asm/octeon/cvmx-pexp-defs.h>
 #include <asm/octeon/cvmx-pko-defs.h>
 
-/**
+/*
  * This application uses this pointer to access the global queue
  * state. It points to a bootmem named block.
  */
 __cvmx_cmd_queue_all_state_t *__cvmx_cmd_queue_state_ptr;
 EXPORT_SYMBOL_GPL(__cvmx_cmd_queue_state_ptr);
 
-/**
+/*
  * Initialize the Global queue state pointer.
  *
  * Returns CVMX_CMD_QUEUE_SUCCESS or a failure code
@@ -57,27 +57,14 @@ EXPORT_SYMBOL_GPL(__cvmx_cmd_queue_state_ptr);
 static cvmx_cmd_queue_result_t __cvmx_cmd_queue_init_state_ptr(void)
 {
 	char *alloc_name = "cvmx_cmd_queues";
-#if defined(CONFIG_CAVIUM_RESERVE32) && CONFIG_CAVIUM_RESERVE32
-	extern uint64_t octeon_reserve32_memory;
-#endif
 
 	if (likely(__cvmx_cmd_queue_state_ptr))
 		return CVMX_CMD_QUEUE_SUCCESS;
 
-#if defined(CONFIG_CAVIUM_RESERVE32) && CONFIG_CAVIUM_RESERVE32
-	if (octeon_reserve32_memory)
-		__cvmx_cmd_queue_state_ptr =
-		    cvmx_bootmem_alloc_named_range(sizeof(*__cvmx_cmd_queue_state_ptr),
-						   octeon_reserve32_memory,
-						   octeon_reserve32_memory +
-						   (CONFIG_CAVIUM_RESERVE32 <<
-						    20) - 1, 128, alloc_name);
-	else
-#endif
-		__cvmx_cmd_queue_state_ptr =
-		    cvmx_bootmem_alloc_named(sizeof(*__cvmx_cmd_queue_state_ptr),
-					    128,
-					    alloc_name);
+	__cvmx_cmd_queue_state_ptr =
+		cvmx_bootmem_alloc_named(sizeof(*__cvmx_cmd_queue_state_ptr),
+					 128,
+					 alloc_name);
 	if (__cvmx_cmd_queue_state_ptr)
 		memset(__cvmx_cmd_queue_state_ptr, 0,
 		       sizeof(*__cvmx_cmd_queue_state_ptr));
@@ -97,7 +84,7 @@ static cvmx_cmd_queue_result_t __cvmx_cmd_queue_init_state_ptr(void)
 	return CVMX_CMD_QUEUE_SUCCESS;
 }
 
-/**
+/*
  * Initialize a command queue for use. The initial FPA buffer is
  * allocated and the hardware unit is configured to point to the
  * new command queue.
@@ -195,7 +182,7 @@ cvmx_cmd_queue_result_t cvmx_cmd_queue_initialize(cvmx_cmd_queue_id_t queue_id,
 	}
 }
 
-/**
+/*
  * Shutdown a queue a free it's command buffers to the FPA. The
  * hardware connected to the queue must be stopped before this
  * function is called.
@@ -231,7 +218,7 @@ cvmx_cmd_queue_result_t cvmx_cmd_queue_shutdown(cvmx_cmd_queue_id_t queue_id)
 	return CVMX_CMD_QUEUE_SUCCESS;
 }
 
-/**
+/*
  * Return the number of command words pending in the queue. This
  * function may be relatively slow for some hardware units.
  *
@@ -287,7 +274,7 @@ int cvmx_cmd_queue_length(cvmx_cmd_queue_id_t queue_id)
 	return CVMX_CMD_QUEUE_INVALID_PARAM;
 }
 
-/**
+/*
  * Return the command buffer to be written to. The purpose of this
  * function is to allow CVMX routine access t othe low level buffer
  * for initial hardware setup. User applications should not call this
diff --git a/arch/mips/cavium-octeon/executive/cvmx-helper-board.c b/arch/mips/cavium-octeon/executive/cvmx-helper-board.c
index abd11b7af22f..1daa0c6b6f4e 100644
--- a/arch/mips/cavium-octeon/executive/cvmx-helper-board.c
+++ b/arch/mips/cavium-octeon/executive/cvmx-helper-board.c
@@ -44,7 +44,7 @@
 #include <asm/octeon/cvmx-gmxx-defs.h>
 #include <asm/octeon/cvmx-asxx-defs.h>
 
-/**
+/*
  * Return the MII PHY address associated with the given IPD
  * port. A result of -1 means there isn't a MII capable PHY
  * connected to this port. On chips supporting multiple MII
@@ -189,7 +189,7 @@ int cvmx_helper_board_get_mii_address(int ipd_port)
 	return -1;
 }
 
-/**
+/*
  * This function is the board specific method of determining an
  * ethernet ports link speed. Most Octeon boards have Marvell PHYs
  * and are handled by the fall through case. This function must be
@@ -274,7 +274,7 @@ union cvmx_helper_link_info __cvmx_helper_board_link_get(int ipd_port)
 	return result;
 }
 
-/**
+/*
  * This function is called by cvmx_helper_interface_probe() after it
  * determines the number of ports Octeon can support on a specific
  * interface. This function is the per board location to override
@@ -320,7 +320,7 @@ int __cvmx_helper_board_interface_probe(int interface, int supported_ports)
 	return supported_ports;
 }
 
-/**
+/*
  * Get the clock type used for the USB block based on board type.
  * Used by the USB code for auto configuration of clock type.
  *
diff --git a/arch/mips/cavium-octeon/executive/cvmx-helper-rgmii.c b/arch/mips/cavium-octeon/executive/cvmx-helper-rgmii.c
index c4b58598aa9d..a8c3be4eb6f0 100644
--- a/arch/mips/cavium-octeon/executive/cvmx-helper-rgmii.c
+++ b/arch/mips/cavium-octeon/executive/cvmx-helper-rgmii.c
@@ -42,7 +42,7 @@
 #include <asm/octeon/cvmx-asxx-defs.h>
 #include <asm/octeon/cvmx-dbg-defs.h>
 
-/**
+/*
  * Probe RGMII ports and determine the number present
  *
  * @interface: Interface to probe
@@ -88,7 +88,7 @@ int __cvmx_helper_rgmii_probe(int interface)
 	return num_ports;
 }
 
-/**
+/*
  * Put an RGMII interface in loopback mode. Internal packets sent
  * out will be received back again on the same port. Externally
  * received packets will echo back out.
@@ -120,7 +120,7 @@ void cvmx_helper_rgmii_internal_loopback(int port)
 	cvmx_write_csr(CVMX_GMXX_PRTX_CFG(index, interface), gmx_cfg.u64);
 }
 
-/**
+/*
  * Workaround ASX setup errata with CN38XX pass1
  *
  * @interface: Interface to setup
@@ -148,7 +148,7 @@ static int __cvmx_helper_errata_asx_pass1(int interface, int port,
 	return 0;
 }
 
-/**
+/*
  * Configure all of the ASX, GMX, and PKO registers required
  * to get RGMII to function on the supplied interface.
  *
@@ -251,7 +251,7 @@ int __cvmx_helper_rgmii_enable(int interface)
 	return 0;
 }
 
-/**
+/*
  * Return the link state of an IPD/PKO port as returned by
  * auto negotiation. The result of this function may not match
  * Octeon's link config if auto negotiation has changed since
@@ -280,7 +280,7 @@ union cvmx_helper_link_info __cvmx_helper_rgmii_link_get(int ipd_port)
 		return __cvmx_helper_board_link_get(ipd_port);
 }
 
-/**
+/*
  * Configure an IPD/PKO port for the specified link state. This
  * function does not influence auto negotiation at the PHY level.
  * The passed link state must always match the link state returned
diff --git a/arch/mips/cavium-octeon/executive/cvmx-helper-xaui.c b/arch/mips/cavium-octeon/executive/cvmx-helper-xaui.c
index 842990e8404f..fea71a85bb29 100644
--- a/arch/mips/cavium-octeon/executive/cvmx-helper-xaui.c
+++ b/arch/mips/cavium-octeon/executive/cvmx-helper-xaui.c
@@ -54,7 +54,7 @@ int __cvmx_helper_xaui_enumerate(int interface)
 		return 1;
 }
 
-/**
+/*
  * Probe a XAUI interface and determine the number of ports
  * connected to it. The XAUI interface should still be down
  * after this call.
@@ -102,7 +102,7 @@ int __cvmx_helper_xaui_probe(int interface)
 	return __cvmx_helper_xaui_enumerate(interface);
 }
 
-/**
+/*
  * Bringup and enable a XAUI interface. After this call packet
  * I/O should be fully functional. This is called with IPD
  * enabled but PKO disabled.
@@ -249,7 +249,7 @@ int __cvmx_helper_xaui_enable(int interface)
 	return 0;
 }
 
-/**
+/*
  * Return the link state of an IPD/PKO port as returned by
  * auto negotiation. The result of this function may not match
  * Octeon's link config if auto negotiation has changed since
@@ -288,7 +288,7 @@ union cvmx_helper_link_info __cvmx_helper_xaui_link_get(int ipd_port)
 	return result;
 }
 
-/**
+/*
  * Configure an IPD/PKO port for the specified link state. This
  * function does not influence auto negotiation at the PHY level.
  * The passed link state must always match the link state returned
diff --git a/arch/mips/cavium-octeon/executive/cvmx-interrupt-decodes.c b/arch/mips/cavium-octeon/executive/cvmx-interrupt-decodes.c
index 2f415d9d0f3c..67d6da21d49f 100644
--- a/arch/mips/cavium-octeon/executive/cvmx-interrupt-decodes.c
+++ b/arch/mips/cavium-octeon/executive/cvmx-interrupt-decodes.c
@@ -46,7 +46,9 @@
 
 
 /**
- * __cvmx_interrupt_gmxx_rxx_int_en_enable enables all interrupt bits in cvmx_gmxx_rxx_int_en_t
+ * __cvmx_interrupt_gmxx_rxx_int_en_enable - enable all interrupt bits in cvmx_gmxx_rxx_int_en_t
+ * @index: interrupt register offset
+ * @block: interrupt register block_id
  */
 void __cvmx_interrupt_gmxx_rxx_int_en_enable(int index, int block)
 {
@@ -227,7 +229,9 @@ void __cvmx_interrupt_gmxx_rxx_int_en_enable(int index, int block)
 	cvmx_write_csr(CVMX_GMXX_RXX_INT_EN(index, block), gmx_rx_int_en.u64);
 }
 /**
- * __cvmx_interrupt_pcsx_intx_en_reg_enable enables all interrupt bits in cvmx_pcsx_intx_en_reg_t
+ * __cvmx_interrupt_pcsx_intx_en_reg_enable - enable all interrupt bits in cvmx_pcsx_intx_en_reg_t
+ * @index: interrupt register offset
+ * @block: interrupt register block_id
  */
 void __cvmx_interrupt_pcsx_intx_en_reg_enable(int index, int block)
 {
@@ -268,7 +272,8 @@ void __cvmx_interrupt_pcsx_intx_en_reg_enable(int index, int block)
 	cvmx_write_csr(CVMX_PCSX_INTX_EN_REG(index, block), pcs_int_en_reg.u64);
 }
 /**
- * __cvmx_interrupt_pcsxx_int_en_reg_enable enables all interrupt bits in cvmx_pcsxx_int_en_reg_t
+ * __cvmx_interrupt_pcsxx_int_en_reg_enable - enable all interrupt bits in cvmx_pcsxx_int_en_reg_t
+ * @index: interrupt register block_id
  */
 void __cvmx_interrupt_pcsxx_int_en_reg_enable(int index)
 {
@@ -298,7 +303,8 @@ void __cvmx_interrupt_pcsxx_int_en_reg_enable(int index)
 }
 
 /**
- * __cvmx_interrupt_spxx_int_msk_enable enables all interrupt bits in cvmx_spxx_int_msk_t
+ * __cvmx_interrupt_spxx_int_msk_enable - enable all interrupt bits in cvmx_spxx_int_msk_t
+ * @index: interrupt register block_id
  */
 void __cvmx_interrupt_spxx_int_msk_enable(int index)
 {
@@ -337,7 +343,8 @@ void __cvmx_interrupt_spxx_int_msk_enable(int index)
 	cvmx_write_csr(CVMX_SPXX_INT_MSK(index), spx_int_msk.u64);
 }
 /**
- * __cvmx_interrupt_stxx_int_msk_enable enables all interrupt bits in cvmx_stxx_int_msk_t
+ * __cvmx_interrupt_stxx_int_msk_enable - enable all interrupt bits in cvmx_stxx_int_msk_t
+ * @index: interrupt register block_id
  */
 void __cvmx_interrupt_stxx_int_msk_enable(int index)
 {
diff --git a/arch/mips/cavium-octeon/executive/cvmx-l2c.c b/arch/mips/cavium-octeon/executive/cvmx-l2c.c
index 83df0a963a8b..33b303691bc2 100644
--- a/arch/mips/cavium-octeon/executive/cvmx-l2c.c
+++ b/arch/mips/cavium-octeon/executive/cvmx-l2c.c
@@ -281,7 +281,7 @@ uint64_t cvmx_l2c_read_perf(uint32_t counter)
 	}
 }
 
-/**
+/*
  * @INTERNAL
  * Helper function use to fault in cache lines for L2 cache locking
  *
@@ -575,7 +575,7 @@ union __cvmx_l2c_tag {
 };
 
 
-/**
+/*
  * @INTERNAL
  * Function to read a L2C tag.  This code make the current core
  * the 'debug core' for the L2.  This code must only be executed by
@@ -764,9 +764,8 @@ int cvmx_l2c_get_cache_size_bytes(void)
 		CVMX_CACHE_LINE_SIZE;
 }
 
-/**
+/*
  * Return log base 2 of the number of sets in the L2 cache
- * Returns
  */
 int cvmx_l2c_get_set_bits(void)
 {
@@ -857,7 +856,7 @@ int cvmx_l2c_get_num_assoc(void)
 	return l2_assoc;
 }
 
-/**
+/*
  * Flush a line from the L2 cache
  * This should only be called from one core at a time, as this routine
  * sets the core to the 'debug' core in order to flush the line.
diff --git a/arch/mips/cavium-octeon/executive/cvmx-pko.c b/arch/mips/cavium-octeon/executive/cvmx-pko.c
index b0efc35e95c4..7c4879e74318 100644
--- a/arch/mips/cavium-octeon/executive/cvmx-pko.c
+++ b/arch/mips/cavium-octeon/executive/cvmx-pko.c
@@ -35,7 +35,7 @@
 #include <asm/octeon/cvmx-pko.h>
 #include <asm/octeon/cvmx-helper.h>
 
-/**
+/*
  * Internal state of packet output
  */
 
@@ -176,7 +176,7 @@ static void __cvmx_pko_chip_init(void)
 	}
 }
 
-/**
+/*
  * Call before any other calls to initialize the packet
  * output system.  This does chip global config, and should only be
  * done by one core.
@@ -229,7 +229,7 @@ void cvmx_pko_initialize_global(void)
 	}
 }
 
-/**
+/*
  * This function does per-core initialization required by the PKO routines.
  * This must be called on all cores that will do packet output, and must
  * be called after the FPA has been initialized and filled with pages.
@@ -243,7 +243,7 @@ int cvmx_pko_initialize_local(void)
 	return 0;
 }
 
-/**
+/*
  * Enables the packet output hardware. It must already be
  * configured.
  */
@@ -266,7 +266,7 @@ void cvmx_pko_enable(void)
 	cvmx_write_csr(CVMX_PKO_REG_FLAGS, flags.u64);
 }
 
-/**
+/*
  * Disables the packet output. Does not affect any configuration.
  */
 void cvmx_pko_disable(void)
@@ -278,7 +278,7 @@ void cvmx_pko_disable(void)
 }
 EXPORT_SYMBOL_GPL(cvmx_pko_disable);
 
-/**
+/*
  * Reset the packet output.
  */
 static void __cvmx_pko_reset(void)
@@ -289,7 +289,7 @@ static void __cvmx_pko_reset(void)
 	cvmx_write_csr(CVMX_PKO_REG_FLAGS, pko_reg_flags.u64);
 }
 
-/**
+/*
  * Shutdown and free resources required by packet output.
  */
 void cvmx_pko_shutdown(void)
@@ -320,7 +320,7 @@ void cvmx_pko_shutdown(void)
 }
 EXPORT_SYMBOL_GPL(cvmx_pko_shutdown);
 
-/**
+/*
  * Configure a output port and the associated queues for use.
  *
  * @port:	Port to configure.
@@ -548,7 +548,7 @@ cvmx_pko_status_t cvmx_pko_config_port(uint64_t port, uint64_t base_queue,
 }
 
 #ifdef PKO_DEBUG
-/**
+/*
  * Show map of ports -> queues for different cores.
  */
 void cvmx_pko_show_queue_map()
@@ -573,7 +573,7 @@ void cvmx_pko_show_queue_map()
 }
 #endif
 
-/**
+/*
  * Rate limit a PKO port to a max packets/sec. This function is only
  * supported on CN51XX and higher, excluding CN58XX.
  *
@@ -606,7 +606,7 @@ int cvmx_pko_rate_limit_packets(int port, int packets_s, int burst)
 	return 0;
 }
 
-/**
+/*
  * Rate limit a PKO port to a max bits/sec. This function is only
  * supported on CN51XX and higher, excluding CN58XX.
  *
diff --git a/arch/mips/cavium-octeon/executive/cvmx-spi.c b/arch/mips/cavium-octeon/executive/cvmx-spi.c
index f51957a3e915..eb9333e84a6b 100644
--- a/arch/mips/cavium-octeon/executive/cvmx-spi.c
+++ b/arch/mips/cavium-octeon/executive/cvmx-spi.c
@@ -66,7 +66,7 @@ static cvmx_spi_callbacks_t cvmx_spi_callbacks = {
 	.interface_up_cb = cvmx_spi_interface_up_cb
 };
 
-/**
+/*
  * Get current SPI4 initialization callbacks
  *
  * @callbacks:	Pointer to the callbacks structure.to fill
@@ -78,7 +78,7 @@ void cvmx_spi_get_callbacks(cvmx_spi_callbacks_t *callbacks)
 	memcpy(callbacks, &cvmx_spi_callbacks, sizeof(cvmx_spi_callbacks));
 }
 
-/**
+/*
  * Set new SPI4 initialization callbacks
  *
  * @new_callbacks:  Pointer to an updated callbacks structure.
@@ -88,7 +88,7 @@ void cvmx_spi_set_callbacks(cvmx_spi_callbacks_t *new_callbacks)
 	memcpy(&cvmx_spi_callbacks, new_callbacks, sizeof(cvmx_spi_callbacks));
 }
 
-/**
+/*
  * Initialize and start the SPI interface.
  *
  * @interface: The identifier of the packet interface to configure and
@@ -133,7 +133,7 @@ int cvmx_spi_start_interface(int interface, cvmx_spi_mode_t mode, int timeout,
 	return res;
 }
 
-/**
+/*
  * This routine restarts the SPI interface after it has lost synchronization
  * with its correspondent system.
  *
@@ -179,7 +179,7 @@ int cvmx_spi_restart_interface(int interface, cvmx_spi_mode_t mode, int timeout)
 }
 EXPORT_SYMBOL_GPL(cvmx_spi_restart_interface);
 
-/**
+/*
  * Callback to perform SPI4 reset
  *
  * @interface: The identifier of the packet interface to configure and
@@ -294,7 +294,7 @@ int cvmx_spi_reset_cb(int interface, cvmx_spi_mode_t mode)
 	return 0;
 }
 
-/**
+/*
  * Callback to setup calendar and miscellaneous settings before clock detection
  *
  * @interface: The identifier of the packet interface to configure and
@@ -413,7 +413,7 @@ int cvmx_spi_calendar_setup_cb(int interface, cvmx_spi_mode_t mode,
 	return 0;
 }
 
-/**
+/*
  * Callback to perform clock detection
  *
  * @interface: The identifier of the packet interface to configure and
@@ -491,7 +491,7 @@ int cvmx_spi_clock_detect_cb(int interface, cvmx_spi_mode_t mode, int timeout)
 	return 0;
 }
 
-/**
+/*
  * Callback to perform link training
  *
  * @interface: The identifier of the packet interface to configure and
@@ -560,7 +560,7 @@ int cvmx_spi_training_cb(int interface, cvmx_spi_mode_t mode, int timeout)
 	return 0;
 }
 
-/**
+/*
  * Callback to perform calendar data synchronization
  *
  * @interface: The identifier of the packet interface to configure and
@@ -617,7 +617,7 @@ int cvmx_spi_calendar_sync_cb(int interface, cvmx_spi_mode_t mode, int timeout)
 	return 0;
 }
 
-/**
+/*
  * Callback to handle interface up
  *
  * @interface: The identifier of the packet interface to configure and
diff --git a/arch/mips/cavium-octeon/flash_setup.c b/arch/mips/cavium-octeon/flash_setup.c
index a5e8f4a784af..c8a8c6d359b9 100644
--- a/arch/mips/cavium-octeon/flash_setup.c
+++ b/arch/mips/cavium-octeon/flash_setup.c
@@ -62,7 +62,7 @@ static void octeon_flash_map_copy_to(struct map_info *map, unsigned long to,
 	up(&octeon_bootbus_sem);
 }
 
-/**
+/*
  * Module/ driver initialization.
  *
  * Returns Zero on success
diff --git a/arch/mips/cavium-octeon/octeon-memcpy.S b/arch/mips/cavium-octeon/octeon-memcpy.S
index 600d018cf354..0a515cde1c18 100644
--- a/arch/mips/cavium-octeon/octeon-memcpy.S
+++ b/arch/mips/cavium-octeon/octeon-memcpy.S
@@ -154,8 +154,6 @@ FEXPORT(__raw_copy_from_user)
 EXPORT_SYMBOL(__raw_copy_from_user)
 FEXPORT(__raw_copy_to_user)
 EXPORT_SYMBOL(__raw_copy_to_user)
-FEXPORT(__raw_copy_in_user)
-EXPORT_SYMBOL(__raw_copy_in_user)
 	/*
 	 * Note: dst & src may be unaligned, len may be 0
 	 * Temps
diff --git a/arch/mips/cavium-octeon/setup.c b/arch/mips/cavium-octeon/setup.c
index ce4e2806159b..00bf269763cf 100644
--- a/arch/mips/cavium-octeon/setup.c
+++ b/arch/mips/cavium-octeon/setup.c
@@ -284,11 +284,6 @@ void octeon_crash_smp_send_stop(void)
 
 #endif /* CONFIG_KEXEC */
 
-#ifdef CONFIG_CAVIUM_RESERVE32
-uint64_t octeon_reserve32_memory;
-EXPORT_SYMBOL(octeon_reserve32_memory);
-#endif
-
 #ifdef CONFIG_KEXEC
 /* crashkernel cmdline parameter is parsed _after_ memory setup
  * we also parse it here (workaround for EHB5200) */
@@ -300,9 +295,10 @@ static int octeon_uart;
 extern asmlinkage void handle_int(void);
 
 /**
- * Return non zero if we are currently running in the Octeon simulator
+ * octeon_is_simulation - Return non-zero if we are currently running
+ * in the Octeon simulator
  *
- * Returns
+ * Return: non-0 if running in the Octeon simulator, 0 otherwise
  */
 int octeon_is_simulation(void)
 {
@@ -311,10 +307,10 @@ int octeon_is_simulation(void)
 EXPORT_SYMBOL(octeon_is_simulation);
 
 /**
- * Return true if Octeon is in PCI Host mode. This means
+ * octeon_is_pci_host - Return true if Octeon is in PCI Host mode. This means
  * Linux can control the PCI bus.
  *
- * Returns Non zero if Octeon in host mode.
+ * Return: Non-zero if Octeon is in host mode.
  */
 int octeon_is_pci_host(void)
 {
@@ -326,9 +322,9 @@ int octeon_is_pci_host(void)
 }
 
 /**
- * Get the clock rate of Octeon
+ * octeon_get_clock_rate - Get the clock rate of Octeon
  *
- * Returns Clock rate in HZ
+ * Return: Clock rate in HZ
  */
 uint64_t octeon_get_clock_rate(void)
 {
@@ -348,11 +344,11 @@ EXPORT_SYMBOL(octeon_get_io_clock_rate);
 
 
 /**
- * Write to the LCD display connected to the bootbus. This display
- * exists on most Cavium evaluation boards. If it doesn't exist, then
- * this function doesn't do anything.
- *
+ * octeon_write_lcd - Write to the LCD display connected to the bootbus.
  * @s:	    String to write
+ *
+ * This display exists on most Cavium evaluation boards. If it doesn't exist,
+ * then this function doesn't do anything.
  */
 static void octeon_write_lcd(const char *s)
 {
@@ -372,9 +368,9 @@ static void octeon_write_lcd(const char *s)
 }
 
 /**
- * Return the console uart passed by the bootloader
+ * octeon_get_boot_uart - Return the console uart passed by the bootloader
  *
- * Returns uart	  (0 or 1)
+ * Return: uart number (0 or 1)
  */
 static int octeon_get_boot_uart(void)
 {
@@ -383,9 +379,9 @@ static int octeon_get_boot_uart(void)
 }
 
 /**
- * Get the coremask Linux was booted on.
+ * octeon_get_boot_coremask - Get the coremask Linux was booted on.
  *
- * Returns Core mask
+ * Return: Core mask
  */
 int octeon_get_boot_coremask(void)
 {
@@ -393,7 +389,7 @@ int octeon_get_boot_coremask(void)
 }
 
 /**
- * Check the hardware BIST results for a CPU
+ * octeon_check_cpu_bist - Check the hardware BIST results for a CPU
  */
 void octeon_check_cpu_bist(void)
 {
@@ -424,7 +420,7 @@ void octeon_check_cpu_bist(void)
 }
 
 /**
- * Reboot Octeon
+ * octeon_restart - Reboot Octeon
  *
  * @command: Command to pass to the bootloader. Currently ignored.
  */
@@ -449,7 +445,7 @@ static void octeon_restart(char *command)
 
 
 /**
- * Permanently stop a core.
+ * octeon_kill_core - Permanently stop a core.
  *
  * @arg: Ignored.
  */
@@ -469,7 +465,7 @@ static void octeon_kill_core(void *arg)
 
 
 /**
- * Halt the system
+ * octeon_halt - Halt the system
  */
 static void octeon_halt(void)
 {
@@ -512,9 +508,9 @@ static void __init init_octeon_system_type(void)
 }
 
 /**
- * Return a string representing the system type
+ * octeon_board_type_string - Return a string representing the system type
  *
- * Returns
+ * Return: system type string
  */
 const char *octeon_board_type_string(void)
 {
@@ -655,7 +651,7 @@ void octeon_user_io_init(void)
 }
 
 /**
- * Early entry point for arch setup
+ * prom_init - Early entry point for arch setup
  */
 void __init prom_init(void)
 {
@@ -665,9 +661,7 @@ void __init prom_init(void)
 	int i;
 	u64 t;
 	int argc;
-#ifdef CONFIG_CAVIUM_RESERVE32
-	int64_t addr = -1;
-#endif
+
 	/*
 	 * The bootloader passes a pointer to the boot descriptor in
 	 * $a3, this is available as fw_arg3.
@@ -782,25 +776,6 @@ void __init prom_init(void)
 		cvmx_write_csr(CVMX_LED_UDD_DATX(1), 0);
 		cvmx_write_csr(CVMX_LED_EN, 1);
 	}
-#ifdef CONFIG_CAVIUM_RESERVE32
-	/*
-	 * We need to temporarily allocate all memory in the reserve32
-	 * region. This makes sure the kernel doesn't allocate this
-	 * memory when it is getting memory from the
-	 * bootloader. Later, after the memory allocations are
-	 * complete, the reserve32 will be freed.
-	 *
-	 * Allocate memory for RESERVED32 aligned on 2MB boundary. This
-	 * is in case we later use hugetlb entries with it.
-	 */
-	addr = cvmx_bootmem_phy_named_block_alloc(CONFIG_CAVIUM_RESERVE32 << 20,
-						0, 0, 2 << 20,
-						"CAVIUM_RESERVE32", 0);
-	if (addr < 0)
-		pr_err("Failed to allocate CAVIUM_RESERVE32 memory area\n");
-	else
-		octeon_reserve32_memory = addr;
-#endif
 
 #ifdef CONFIG_CAVIUM_OCTEON_LOCK_L2
 	if (cvmx_read_csr(CVMX_L2D_FUS3) & (3ull << 34)) {
@@ -1078,16 +1053,6 @@ void __init plat_mem_setup(void)
 	cvmx_bootmem_unlock();
 #endif /* CONFIG_CRASH_DUMP */
 
-#ifdef CONFIG_CAVIUM_RESERVE32
-	/*
-	 * Now that we've allocated the kernel memory it is safe to
-	 * free the reserved region. We free it here so that builtin
-	 * drivers can use the memory.
-	 */
-	if (octeon_reserve32_memory)
-		cvmx_bootmem_free_named("CAVIUM_RESERVE32");
-#endif /* CONFIG_CAVIUM_RESERVE32 */
-
 	if (total == 0)
 		panic("Unable to allocate memory from "
 		      "cvmx_bootmem_phy_alloc");
diff --git a/arch/mips/cavium-octeon/smp.c b/arch/mips/cavium-octeon/smp.c
index 66ce5527da54..89954f5f87fb 100644
--- a/arch/mips/cavium-octeon/smp.c
+++ b/arch/mips/cavium-octeon/smp.c
@@ -91,7 +91,7 @@ static irqreturn_t mailbox_interrupt(int irq, void *dev_id)
 	return IRQ_HANDLED;
 }
 
-/**
+/*
  * Cause the function described by call_data to be executed on the passed
  * cpu.	 When the function has finished, increment the finished field of
  * call_data.
@@ -115,7 +115,7 @@ static inline void octeon_send_ipi_mask(const struct cpumask *mask,
 		octeon_send_ipi_single(i, action);
 }
 
-/**
+/*
  * Detect available CPUs, populate cpu_possible_mask
  */
 static void octeon_smp_hotplug_setup(void)
@@ -202,9 +202,8 @@ int plat_post_relocation(long offset)
 }
 #endif /* CONFIG_RELOCATABLE */
 
-/**
+/*
  * Firmware CPU startup hook
- *
  */
 static int octeon_boot_secondary(int cpu, struct task_struct *idle)
 {
@@ -232,7 +231,7 @@ static int octeon_boot_secondary(int cpu, struct task_struct *idle)
 	return 0;
 }
 
-/**
+/*
  * After we've done initial boot, this function is called to allow the
  * board code to clean up state, if needed
  */
@@ -250,9 +249,8 @@ static void octeon_init_secondary(void)
 	octeon_irq_setup_secondary();
 }
 
-/**
+/*
  * Callout to firmware before smp_init
- *
  */
 static void __init octeon_prepare_cpus(unsigned int max_cpus)
 {
@@ -268,7 +266,7 @@ static void __init octeon_prepare_cpus(unsigned int max_cpus)
 	}
 }
 
-/**
+/*
  * Last chance for the board code to finish SMP initialization before
  * the CPU is "online".
  */
diff --git a/arch/mips/configs/generic/board-marduk.config b/arch/mips/configs/generic/board-marduk.config
new file mode 100644
index 000000000000..05ca34cd5a73
--- /dev/null
+++ b/arch/mips/configs/generic/board-marduk.config
@@ -0,0 +1,53 @@
+CONFIG_FIT_IMAGE_FDT_MARDUK=y
+
+CONFIG_SCSI=y
+CONFIG_BLK_DEV_SD=y
+
+CONFIG_CLKSRC_PISTACHIO=y
+
+CONFIG_COMMON_CLK_PISTACHIO=y
+
+CONFIG_DMADEVICES=y
+CONFIG_IMG_MDC_DMA=y
+
+CONFIG_GPIOLIB=y
+CONFIG_GPIO_SYSFS=y
+CONFIG_GPIO_PCH=y
+
+CONFIG_I2C=y
+CONFIG_I2C_IMG=y
+
+CONFIG_MMC=y
+CONFIG_MMC_SDHCI=y
+CONFIG_MMC_DW=y
+CONFIG_MMC_DW_PLTFM=y
+
+CONFIG_NETDEVICES=y
+CONFIG_STMMAC_ETH=y
+CONFIG_STMMAC_PLATFORM=y
+
+CONFIG_PHY_PISTACHIO_USB=y
+
+CONFIG_PINCTRL=y
+CONFIG_PINCTRL_PISTACHIO=y
+
+CONFIG_RESET_PISTACHIO=y
+
+CONFIG_SERIAL_8250=y
+CONFIG_SERIAL_8250_CONSOLE=y
+CONFIG_SERIAL_OF_PLATFORM=y
+CONFIG_SERIAL_8250_DW=y
+
+CONFIG_SPI=y
+CONFIG_SRAM=y
+
+CONFIG_USB=y
+CONFIG_USB_EHCI_HCD=y
+CONFIG_USB_OHCI_HCD=y
+CONFIG_USB_DWC2=y
+
+CONFIG_CRYPTO_DEV_IMGTEC_HASH=y
+CONFIG_IMGPDC_WDT=y
+CONFIG_IR_IMG=y
+CONFIG_CC10001_ADC=y
+CONFIG_SND_SOC_IMG=y
diff --git a/arch/mips/configs/lemote2f_defconfig b/arch/mips/configs/lemote2f_defconfig
index aaf9d5e0aa2c..791894c4d8fb 100644
--- a/arch/mips/configs/lemote2f_defconfig
+++ b/arch/mips/configs/lemote2f_defconfig
@@ -116,7 +116,6 @@ CONFIG_8139TOO=y
 CONFIG_R8169=y
 CONFIG_USB_USBNET=m
 CONFIG_USB_NET_CDC_EEM=m
-CONFIG_INPUT_POLLDEV=m
 CONFIG_INPUT_EVDEV=y
 # CONFIG_MOUSE_PS2_ALPS is not set
 # CONFIG_MOUSE_PS2_LOGIPS2PP is not set
diff --git a/arch/mips/configs/pic32mzda_defconfig b/arch/mips/configs/pic32mzda_defconfig
index 63fe2da1b37f..fd567247adc7 100644
--- a/arch/mips/configs/pic32mzda_defconfig
+++ b/arch/mips/configs/pic32mzda_defconfig
@@ -34,7 +34,6 @@ CONFIG_SCSI_CONSTANTS=y
 CONFIG_SCSI_SCAN_ASYNC=y
 # CONFIG_SCSI_LOWLEVEL is not set
 CONFIG_INPUT_LEDS=m
-CONFIG_INPUT_POLLDEV=y
 CONFIG_INPUT_MOUSEDEV=m
 CONFIG_INPUT_EVDEV=y
 CONFIG_INPUT_EVBUG=m
diff --git a/arch/mips/configs/pistachio_defconfig b/arch/mips/configs/pistachio_defconfig
deleted file mode 100644
index b9adf15ebbec..000000000000
--- a/arch/mips/configs/pistachio_defconfig
+++ /dev/null
@@ -1,316 +0,0 @@
-# CONFIG_LOCALVERSION_AUTO is not set
-CONFIG_DEFAULT_HOSTNAME="localhost"
-CONFIG_SYSVIPC=y
-CONFIG_NO_HZ=y
-CONFIG_HIGH_RES_TIMERS=y
-CONFIG_PREEMPT_VOLUNTARY=y
-CONFIG_IKCONFIG=m
-CONFIG_IKCONFIG_PROC=y
-CONFIG_LOG_BUF_SHIFT=18
-CONFIG_CGROUPS=y
-CONFIG_CGROUP_SCHED=y
-CONFIG_CFS_BANDWIDTH=y
-CONFIG_CGROUP_FREEZER=y
-CONFIG_NAMESPACES=y
-CONFIG_USER_NS=y
-CONFIG_BLK_DEV_INITRD=y
-# CONFIG_RD_BZIP2 is not set
-# CONFIG_RD_LZMA is not set
-# CONFIG_RD_LZO is not set
-# CONFIG_RD_LZ4 is not set
-CONFIG_CC_OPTIMIZE_FOR_SIZE=y
-CONFIG_EMBEDDED=y
-# CONFIG_COMPAT_BRK is not set
-CONFIG_PROFILING=y
-CONFIG_MACH_PISTACHIO=y
-CONFIG_MIPS_CPS=y
-CONFIG_NR_CPUS=4
-CONFIG_PM_DEBUG=y
-CONFIG_PM_ADVANCED_DEBUG=y
-CONFIG_CPU_IDLE=y
-# CONFIG_MIPS_CPS_CPUIDLE is not set
-CONFIG_MODULES=y
-CONFIG_MODULE_UNLOAD=y
-CONFIG_MODULE_FORCE_UNLOAD=y
-CONFIG_PARTITION_ADVANCED=y
-# CONFIG_COMPACTION is not set
-CONFIG_DEFAULT_MMAP_MIN_ADDR=32768
-CONFIG_ZSMALLOC=y
-CONFIG_NET=y
-CONFIG_PACKET=y
-CONFIG_UNIX=y
-CONFIG_NET_KEY=m
-CONFIG_INET=y
-CONFIG_IP_MULTICAST=y
-CONFIG_IP_ADVANCED_ROUTER=y
-CONFIG_IP_MULTIPLE_TABLES=y
-CONFIG_IP_ROUTE_MULTIPATH=y
-CONFIG_IP_ROUTE_VERBOSE=y
-CONFIG_IP_PNP=y
-CONFIG_IP_PNP_DHCP=y
-CONFIG_IP_MROUTE=y
-CONFIG_IP_PIMSM_V1=y
-CONFIG_IP_PIMSM_V2=y
-CONFIG_SYN_COOKIES=y
-CONFIG_INET_AH=m
-CONFIG_INET_ESP=m
-CONFIG_INET_IPCOMP=m
-CONFIG_INET_XFRM_MODE_TRANSPORT=m
-CONFIG_INET_XFRM_MODE_TUNNEL=m
-CONFIG_INET_XFRM_MODE_BEET=m
-# CONFIG_INET_DIAG is not set
-CONFIG_TCP_CONG_ADVANCED=y
-# CONFIG_TCP_CONG_BIC is not set
-# CONFIG_TCP_CONG_WESTWOOD is not set
-# CONFIG_TCP_CONG_HTCP is not set
-CONFIG_TCP_CONG_LP=m
-CONFIG_TCP_MD5SIG=y
-CONFIG_INET6_AH=m
-CONFIG_INET6_ESP=m
-CONFIG_INET6_XFRM_MODE_TRANSPORT=m
-CONFIG_INET6_XFRM_MODE_TUNNEL=m
-CONFIG_INET6_XFRM_MODE_BEET=m
-CONFIG_IPV6_SIT=m
-CONFIG_NETWORK_SECMARK=y
-CONFIG_NETFILTER=y
-# CONFIG_BRIDGE_NETFILTER is not set
-CONFIG_NF_CONNTRACK=y
-CONFIG_NF_CT_NETLINK=y
-CONFIG_NETFILTER_XT_MARK=m
-CONFIG_NETFILTER_XT_TARGET_CLASSIFY=y
-CONFIG_NETFILTER_XT_TARGET_DSCP=y
-CONFIG_NETFILTER_XT_TARGET_NFLOG=y
-CONFIG_NETFILTER_XT_TARGET_NFQUEUE=y
-CONFIG_NETFILTER_XT_TARGET_SECMARK=y
-CONFIG_NETFILTER_XT_TARGET_TCPMSS=m
-CONFIG_NETFILTER_XT_MATCH_CONNTRACK=y
-CONFIG_NETFILTER_XT_MATCH_DSCP=y
-CONFIG_NETFILTER_XT_MATCH_POLICY=y
-CONFIG_NETFILTER_XT_MATCH_STATE=y
-CONFIG_NF_NAT_IPV4=m
-CONFIG_IP_NF_IPTABLES=y
-CONFIG_IP_NF_FILTER=y
-CONFIG_IP_NF_TARGET_REJECT=y
-CONFIG_IP_NF_MANGLE=y
-CONFIG_NF_NAT_IPV6=m
-CONFIG_IP6_NF_IPTABLES=m
-CONFIG_IP6_NF_MATCH_IPV6HEADER=m
-CONFIG_IP6_NF_FILTER=m
-CONFIG_IP6_NF_TARGET_REJECT=m
-CONFIG_IP6_NF_MANGLE=m
-CONFIG_BRIDGE=m
-CONFIG_VLAN_8021Q=m
-CONFIG_NET_SCHED=y
-CONFIG_NET_SCH_HTB=m
-CONFIG_NET_SCH_CODEL=m
-CONFIG_NET_SCH_FQ_CODEL=m
-CONFIG_NET_CLS_U32=m
-CONFIG_CLS_U32_MARK=y
-CONFIG_BT=m
-CONFIG_BT_RFCOMM=m
-CONFIG_BT_HCIBTUSB=m
-CONFIG_BT_HCIBFUSB=m
-CONFIG_BT_HCIVHCI=m
-CONFIG_CFG80211=m
-CONFIG_NL80211_TESTMODE=y
-CONFIG_CFG80211_DEBUGFS=y
-CONFIG_CFG80211_WEXT=y
-CONFIG_MAC80211=m
-CONFIG_MAC80211_LEDS=y
-CONFIG_MAC80211_DEBUGFS=y
-CONFIG_MAC80211_DEBUG_MENU=y
-CONFIG_MAC80211_VERBOSE_DEBUG=y
-CONFIG_RFKILL=y
-CONFIG_DEVTMPFS=y
-CONFIG_DEVTMPFS_MOUNT=y
-CONFIG_DEBUG_DEVRES=y
-CONFIG_CONNECTOR=y
-CONFIG_MTD=y
-CONFIG_MTD_BLOCK=y
-CONFIG_MTD_SPI_NOR=y
-CONFIG_MTD_UBI=y
-CONFIG_MTD_UBI_BLOCK=y
-CONFIG_ZRAM=m
-CONFIG_BLK_DEV_LOOP=y
-CONFIG_SCSI=y
-CONFIG_BLK_DEV_SD=y
-CONFIG_BLK_DEV_SR=m
-CONFIG_SCSI_SPI_ATTRS=y
-CONFIG_MD=y
-CONFIG_BLK_DEV_DM=y
-CONFIG_DM_CRYPT=y
-CONFIG_DM_VERITY=y
-CONFIG_NETDEVICES=y
-CONFIG_TUN=m
-CONFIG_VETH=m
-# CONFIG_NET_VENDOR_MARVELL is not set
-# CONFIG_NET_VENDOR_MICREL is not set
-# CONFIG_NET_VENDOR_MICROCHIP is not set
-# CONFIG_NET_VENDOR_NATSEMI is not set
-# CONFIG_NET_VENDOR_SEEQ is not set
-# CONFIG_NET_VENDOR_SMSC is not set
-CONFIG_STMMAC_ETH=y
-# CONFIG_NET_VENDOR_VIA is not set
-CONFIG_PPP=m
-CONFIG_PPP_ASYNC=m
-CONFIG_USB_PEGASUS=m
-CONFIG_USB_RTL8150=m
-CONFIG_USB_RTL8152=m
-CONFIG_USB_NET_DM9601=m
-CONFIG_USB_NET_SMSC75XX=m
-CONFIG_USB_NET_SMSC95XX=m
-CONFIG_USB_NET_MCS7830=m
-# CONFIG_USB_NET_CDC_SUBSET is not set
-# CONFIG_USB_NET_ZAURUS is not set
-CONFIG_HOSTAP=m
-CONFIG_HOSTAP_FIRMWARE=y
-CONFIG_HOSTAP_FIRMWARE_NVRAM=y
-CONFIG_LIBERTAS_THINFIRM=m
-CONFIG_RT2X00=m
-CONFIG_RT2800USB=m
-CONFIG_MAC80211_HWSIM=m
-CONFIG_USB_NET_RNDIS_WLAN=m
-CONFIG_INPUT_EVDEV=y
-# CONFIG_KEYBOARD_ATKBD is not set
-CONFIG_KEYBOARD_GPIO=y
-# CONFIG_INPUT_MOUSE is not set
-# CONFIG_SERIO is not set
-# CONFIG_VT is not set
-# CONFIG_LEGACY_PTYS is not set
-CONFIG_SERIAL_8250=y
-# CONFIG_SERIAL_8250_DEPRECATED_OPTIONS is not set
-CONFIG_SERIAL_8250_CONSOLE=y
-CONFIG_SERIAL_8250_DW=y
-CONFIG_SERIAL_OF_PLATFORM=y
-CONFIG_HW_RANDOM=y
-CONFIG_TCG_TPM=y
-CONFIG_I2C=y
-CONFIG_I2C_CHARDEV=m
-CONFIG_I2C_IMG=y
-CONFIG_I2C_STUB=m
-CONFIG_SPI=y
-CONFIG_SPI_BITBANG=m
-CONFIG_SPI_IMG_SPFI=y
-CONFIG_SPI_SPIDEV=y
-CONFIG_DEBUG_GPIO=y
-CONFIG_GPIO_SYSFS=y
-CONFIG_POWER_SUPPLY=y
-CONFIG_THERMAL=y
-CONFIG_WATCHDOG=y
-CONFIG_IMGPDC_WDT=y
-CONFIG_REGULATOR_FIXED_VOLTAGE=y
-CONFIG_REGULATOR_GPIO=y
-CONFIG_RC_CORE=y
-CONFIG_RC_DEVICES=y
-CONFIG_IR_IMG=y
-CONFIG_IR_IMG_NEC=y
-CONFIG_IR_IMG_JVC=y
-CONFIG_IR_IMG_SONY=y
-CONFIG_IR_IMG_SHARP=y
-CONFIG_IR_IMG_SANYO=y
-CONFIG_IR_IMG_RC5=y
-CONFIG_IR_IMG_RC6=y
-CONFIG_MEDIA_SUPPORT=y
-CONFIG_FB=y
-CONFIG_FB_MODE_HELPERS=y
-# CONFIG_LCD_CLASS_DEVICE is not set
-CONFIG_BACKLIGHT_CLASS_DEVICE=y
-CONFIG_SOUND=y
-CONFIG_SND=y
-CONFIG_SND_HRTIMER=m
-CONFIG_SND_DYNAMIC_MINORS=y
-CONFIG_SND_SEQUENCER=m
-CONFIG_SND_SEQ_DUMMY=m
-# CONFIG_SND_SPI is not set
-CONFIG_SND_USB_AUDIO=m
-CONFIG_USB=y
-CONFIG_USB_ANNOUNCE_NEW_DEVICES=y
-# CONFIG_USB_DEFAULT_PERSIST is not set
-CONFIG_USB_MON=y
-CONFIG_USB_EHCI_HCD=y
-CONFIG_USB_EHCI_ROOT_HUB_TT=y
-CONFIG_USB_ACM=y
-CONFIG_USB_STORAGE=y
-CONFIG_USB_DWC2=y
-CONFIG_USB_SERIAL=y
-CONFIG_USB_SERIAL_GENERIC=y
-CONFIG_USB_SERIAL_CP210X=m
-CONFIG_USB_SERIAL_FTDI_SIO=m
-CONFIG_USB_SERIAL_KEYSPAN=m
-CONFIG_USB_SERIAL_PL2303=m
-CONFIG_USB_SERIAL_OTI6858=m
-CONFIG_USB_SERIAL_QUALCOMM=m
-CONFIG_USB_SERIAL_SIERRAWIRELESS=m
-CONFIG_USB_SERIAL_OPTION=m
-CONFIG_MMC=y
-CONFIG_MMC_BLOCK_MINORS=16
-CONFIG_MMC_TEST=m
-CONFIG_MMC_DW=y
-CONFIG_NEW_LEDS=y
-CONFIG_LEDS_CLASS=y
-CONFIG_RTC_CLASS=y
-CONFIG_DMADEVICES=y
-CONFIG_IMG_MDC_DMA=y
-CONFIG_STAGING=y
-CONFIG_ASHMEM=y
-# CONFIG_IOMMU_SUPPORT is not set
-CONFIG_MEMORY=y
-CONFIG_IIO=y
-CONFIG_CC10001_ADC=y
-CONFIG_PWM=y
-CONFIG_PWM_IMG=y
-CONFIG_PHY_PISTACHIO_USB=y
-CONFIG_ANDROID=y
-CONFIG_EXT4_FS=y
-CONFIG_EXT4_FS_POSIX_ACL=y
-CONFIG_EXT4_FS_SECURITY=y
-# CONFIG_DNOTIFY is not set
-CONFIG_FUSE_FS=m
-CONFIG_ISO9660_FS=m
-CONFIG_JOLIET=y
-CONFIG_ZISOFS=y
-CONFIG_UDF_FS=m
-CONFIG_VFAT_FS=m
-CONFIG_TMPFS=y
-CONFIG_TMPFS_POSIX_ACL=y
-CONFIG_ECRYPT_FS=y
-CONFIG_HFSPLUS_FS=m
-CONFIG_UBIFS_FS=y
-CONFIG_SQUASHFS=y
-CONFIG_SQUASHFS_FILE_DIRECT=y
-CONFIG_SQUASHFS_LZO=y
-CONFIG_PSTORE=y
-CONFIG_PSTORE_CONSOLE=y
-CONFIG_PSTORE_RAM=y
-CONFIG_NFS_FS=y
-CONFIG_ROOT_NFS=y
-CONFIG_NLS_DEFAULT="utf8"
-CONFIG_NLS_CODEPAGE_437=m
-CONFIG_NLS_ASCII=m
-CONFIG_NLS_ISO8859_1=m
-CONFIG_SECURITY=y
-CONFIG_SECURITY_NETWORK=y
-CONFIG_SECURITY_YAMA=y
-CONFIG_CRYPTO_AUTHENC=y
-CONFIG_CRYPTO_HMAC=y
-CONFIG_CRYPTO_SHA1=y
-CONFIG_CRYPTO_SHA256=y
-CONFIG_CRYPTO_SHA512=m
-CONFIG_CRYPTO_ARC4=y
-CONFIG_CRYPTO_DES=y
-CONFIG_CRC_CCITT=y
-CONFIG_CRC_T10DIF=m
-CONFIG_CRC7=m
-# CONFIG_XZ_DEC_X86 is not set
-CONFIG_PRINTK_TIME=y
-CONFIG_DEBUG_INFO=y
-CONFIG_MAGIC_SYSRQ=y
-CONFIG_MAGIC_SYSRQ_DEFAULT_ENABLE=0
-# CONFIG_SCHED_DEBUG is not set
-CONFIG_SCHEDSTATS=y
-CONFIG_DEBUG_SPINLOCK=y
-CONFIG_DEBUG_CREDENTIALS=y
-CONFIG_FUNCTION_TRACER=y
-CONFIG_BLK_DEV_IO_TRACE=y
-CONFIG_LKDTM=y
-CONFIG_TEST_UDELAY=m
diff --git a/arch/mips/configs/rt305x_defconfig b/arch/mips/configs/rt305x_defconfig
index fec5851c164b..eb359db15dba 100644
--- a/arch/mips/configs/rt305x_defconfig
+++ b/arch/mips/configs/rt305x_defconfig
@@ -90,7 +90,6 @@ CONFIG_PPPOE=m
 CONFIG_PPP_ASYNC=m
 CONFIG_ISDN=y
 CONFIG_INPUT=m
-CONFIG_INPUT_POLLDEV=m
 # CONFIG_KEYBOARD_ATKBD is not set
 # CONFIG_INPUT_MOUSE is not set
 CONFIG_INPUT_MISC=y
diff --git a/arch/mips/configs/xway_defconfig b/arch/mips/configs/xway_defconfig
index 9abbc0debc2a..eeb689f715cb 100644
--- a/arch/mips/configs/xway_defconfig
+++ b/arch/mips/configs/xway_defconfig
@@ -96,7 +96,6 @@ CONFIG_PPPOE=m
 CONFIG_PPP_ASYNC=m
 CONFIG_ISDN=y
 CONFIG_INPUT=m
-CONFIG_INPUT_POLLDEV=m
 # CONFIG_KEYBOARD_ATKBD is not set
 # CONFIG_INPUT_MOUSE is not set
 CONFIG_INPUT_MISC=y
diff --git a/arch/mips/generic/Kconfig b/arch/mips/generic/Kconfig
index 657dd93c5e76..7dc5b3821cc6 100644
--- a/arch/mips/generic/Kconfig
+++ b/arch/mips/generic/Kconfig
@@ -58,6 +58,12 @@ config FIT_IMAGE_FDT_BOSTON
 	  enable this if you wish to boot on a MIPS Boston board, as it is
 	  expected by the bootloader.
 
+config FIT_IMAGE_FDT_MARDUK
+	bool "Include FDT for IMG Pistachio Marduk (CI40) boards"
+	help
+	  Enable this to include the FDT for the IMG Pistachio Marduk (CI40)
+	  from Imagination Technologies in the FIT kernel image.
+
 config FIT_IMAGE_FDT_NI169445
 	bool "Include FDT for NI 169445"
 	help
diff --git a/arch/mips/generic/Platform b/arch/mips/generic/Platform
index b871af16b5b6..e1abc113b409 100644
--- a/arch/mips/generic/Platform
+++ b/arch/mips/generic/Platform
@@ -24,3 +24,4 @@ its-$(CONFIG_FIT_IMAGE_FDT_LUTON)	+= board-luton.its.S
 its-$(CONFIG_FIT_IMAGE_FDT_JAGUAR2)	+= board-jaguar2.its.S
 its-$(CONFIG_FIT_IMAGE_FDT_SERVAL)	+= board-serval.its.S
 its-$(CONFIG_FIT_IMAGE_FDT_XILFPGA)	+= board-xilfpga.its.S
+its-$(CONFIG_FIT_IMAGE_FDT_MARDUK)	+= board-marduk.its.S
diff --git a/arch/mips/generic/board-ingenic.c b/arch/mips/generic/board-ingenic.c
index 0cec0bea13d6..3f44f14bdb33 100644
--- a/arch/mips/generic/board-ingenic.c
+++ b/arch/mips/generic/board-ingenic.c
@@ -7,6 +7,8 @@
  * Copyright (C) 2020 Paul Cercueil <paul@crapouillou.net>
  */
 
+#include <linux/clk.h>
+#include <linux/of.h>
 #include <linux/of_address.h>
 #include <linux/of_fdt.h>
 #include <linux/pm.h>
@@ -21,6 +23,10 @@
 static __init char *ingenic_get_system_type(unsigned long machtype)
 {
 	switch (machtype) {
+	case MACH_INGENIC_X2100:
+		return "X2100";
+	case MACH_INGENIC_X2000H:
+		return "X2000H";
 	case MACH_INGENIC_X2000E:
 		return "X2000E";
 	case MACH_INGENIC_X2000:
@@ -37,8 +43,18 @@ static __init char *ingenic_get_system_type(unsigned long machtype)
 		return "JZ4775";
 	case MACH_INGENIC_JZ4770:
 		return "JZ4770";
+	case MACH_INGENIC_JZ4760B:
+		return "JZ4760B";
+	case MACH_INGENIC_JZ4760:
+		return "JZ4760";
+	case MACH_INGENIC_JZ4755:
+		return "JZ4755";
+	case MACH_INGENIC_JZ4750:
+		return "JZ4750";
 	case MACH_INGENIC_JZ4725B:
 		return "JZ4725B";
+	case MACH_INGENIC_JZ4730:
+		return "JZ4730";
 	default:
 		return "JZ4740";
 	}
@@ -61,8 +77,13 @@ static __init const void *ingenic_fixup_fdt(const void *fdt, const void *match_d
 }
 
 static const struct of_device_id ingenic_of_match[] __initconst = {
+	{ .compatible = "ingenic,jz4730", .data = (void *)MACH_INGENIC_JZ4730 },
 	{ .compatible = "ingenic,jz4740", .data = (void *)MACH_INGENIC_JZ4740 },
 	{ .compatible = "ingenic,jz4725b", .data = (void *)MACH_INGENIC_JZ4725B },
+	{ .compatible = "ingenic,jz4750", .data = (void *)MACH_INGENIC_JZ4750 },
+	{ .compatible = "ingenic,jz4755", .data = (void *)MACH_INGENIC_JZ4755 },
+	{ .compatible = "ingenic,jz4760", .data = (void *)MACH_INGENIC_JZ4760 },
+	{ .compatible = "ingenic,jz4760b", .data = (void *)MACH_INGENIC_JZ4760B },
 	{ .compatible = "ingenic,jz4770", .data = (void *)MACH_INGENIC_JZ4770 },
 	{ .compatible = "ingenic,jz4775", .data = (void *)MACH_INGENIC_JZ4775 },
 	{ .compatible = "ingenic,jz4780", .data = (void *)MACH_INGENIC_JZ4780 },
@@ -71,6 +92,8 @@ static const struct of_device_id ingenic_of_match[] __initconst = {
 	{ .compatible = "ingenic,x1830", .data = (void *)MACH_INGENIC_X1830 },
 	{ .compatible = "ingenic,x2000", .data = (void *)MACH_INGENIC_X2000 },
 	{ .compatible = "ingenic,x2000e", .data = (void *)MACH_INGENIC_X2000E },
+	{ .compatible = "ingenic,x2000h", .data = (void *)MACH_INGENIC_X2000H },
+	{ .compatible = "ingenic,x2100", .data = (void *)MACH_INGENIC_X2100 },
 	{}
 };
 
@@ -108,10 +131,36 @@ static const struct platform_suspend_ops ingenic_pm_ops __maybe_unused = {
 
 static int __init ingenic_pm_init(void)
 {
+	struct device_node *cpu_node;
+	struct clk *cpu0_clk;
+	int ret;
+
 	if (boot_cpu_type() == CPU_XBURST) {
 		if (IS_ENABLED(CONFIG_PM_SLEEP))
 			suspend_set_ops(&ingenic_pm_ops);
 		_machine_halt = ingenic_halt;
+
+		/*
+		 * Unconditionally enable the clock for the first CPU.
+		 * This makes sure that the PLL that feeds the CPU won't be
+		 * stopped while the kernel is running.
+		 */
+		cpu_node = of_get_cpu_node(0, NULL);
+		if (!cpu_node) {
+			pr_err("Unable to get CPU node\n");
+		} else {
+			cpu0_clk = of_clk_get(cpu_node, 0);
+			if (IS_ERR(cpu0_clk)) {
+				pr_err("Unable to get CPU0 clock\n");
+				return PTR_ERR(cpu0_clk);
+			}
+
+			ret = clk_prepare_enable(cpu0_clk);
+			if (ret) {
+				pr_err("Unable to enable CPU0 clock\n");
+				return ret;
+			}
+		}
 	}
 
 	return 0;
diff --git a/arch/mips/generic/board-marduk.its.S b/arch/mips/generic/board-marduk.its.S
new file mode 100644
index 000000000000..4f633794db90
--- /dev/null
+++ b/arch/mips/generic/board-marduk.its.S
@@ -0,0 +1,22 @@
+/ {
+	images {
+		fdt-marduk {
+			description = "img,pistachio-marduk Device Tree";
+			data = /incbin/("boot/dts/img/pistachio_marduk.dtb");
+			type = "flat_dt";
+			arch = "mips";
+			compression = "none";
+			hash {
+				algo = "sha1";
+			};
+		};
+	};
+
+	configurations {
+		conf-marduk {
+			description = "Marduk Linux kernel";
+			kernel = "kernel";
+			fdt = "fdt-marduk";
+		};
+	};
+};
diff --git a/arch/mips/generic/board-ocelot.c b/arch/mips/generic/board-ocelot.c
index c238e95190ac..7115410acb4f 100644
--- a/arch/mips/generic/board-ocelot.c
+++ b/arch/mips/generic/board-ocelot.c
@@ -26,13 +26,13 @@ static __init bool ocelot_detect(void)
 	tlb_probe_hazard();
 	idx = read_c0_index();
 	if (idx < 0)
-		return 0;
+		return false;
 
 	/* A TLB entry exists, lets assume its usable and check the CHIP ID */
 	rev = __raw_readl((void __iomem *)DEVCPU_GCB_CHIP_REGS_CHIP_ID);
 
 	if ((rev & CHIP_ID_PART_ID) != OCELOT_PART_ID)
-		return 0;
+		return false;
 
 	/* Copy command line from bootloader early for Initrd detection */
 	if (fw_arg0 < 10 && (fw_arg1 & 0xFFF00000) == 0x80000000) {
@@ -44,7 +44,7 @@ static __init bool ocelot_detect(void)
 			strcpy(arcs_cmdline, prom_argv[1]);
 	}
 
-	return 1;
+	return true;
 }
 
 static void __init ocelot_earlyprintk_init(void)
diff --git a/arch/mips/include/asm/atomic.h b/arch/mips/include/asm/atomic.h
index 95e1f7f3597f..a0b9e7c1e4fc 100644
--- a/arch/mips/include/asm/atomic.h
+++ b/arch/mips/include/asm/atomic.h
@@ -206,7 +206,7 @@ ATOMIC_OPS(atomic64, xor, s64, ^=, xor, lld, scd)
  * The function returns the old value of @v minus @i.
  */
 #define ATOMIC_SIP_OP(pfx, type, op, ll, sc)				\
-static __inline__ int arch_##pfx##_sub_if_positive(type i, pfx##_t * v)	\
+static __inline__ type arch_##pfx##_sub_if_positive(type i, pfx##_t * v)	\
 {									\
 	type temp, result;						\
 									\
diff --git a/arch/mips/include/asm/bootinfo.h b/arch/mips/include/asm/bootinfo.h
index 4c2e8173e6ec..2128ba903391 100644
--- a/arch/mips/include/asm/bootinfo.h
+++ b/arch/mips/include/asm/bootinfo.h
@@ -75,6 +75,7 @@ enum ingenic_machine_type {
 	MACH_INGENIC_JZ4750,
 	MACH_INGENIC_JZ4755,
 	MACH_INGENIC_JZ4760,
+	MACH_INGENIC_JZ4760B,
 	MACH_INGENIC_JZ4770,
 	MACH_INGENIC_JZ4775,
 	MACH_INGENIC_JZ4780,
@@ -83,6 +84,8 @@ enum ingenic_machine_type {
 	MACH_INGENIC_X1830,
 	MACH_INGENIC_X2000,
 	MACH_INGENIC_X2000E,
+	MACH_INGENIC_X2000H,
+	MACH_INGENIC_X2100,
 };
 
 extern char *system_type;
diff --git a/arch/mips/include/asm/cacheflush.h b/arch/mips/include/asm/cacheflush.h
index d687b40b9fbb..b3dc9c589442 100644
--- a/arch/mips/include/asm/cacheflush.h
+++ b/arch/mips/include/asm/cacheflush.h
@@ -125,13 +125,7 @@ static inline void kunmap_noncoherent(void)
 	kunmap_coherent();
 }
 
-#define ARCH_HAS_FLUSH_KERNEL_DCACHE_PAGE
-static inline void flush_kernel_dcache_page(struct page *page)
-{
-	BUG_ON(cpu_has_dc_aliases && PageHighMem(page));
-	flush_dcache_page(page);
-}
-
+#define ARCH_IMPLEMENTS_FLUSH_KERNEL_VMAP_RANGE 1
 /*
  * For now flush_kernel_vmap_range and invalidate_kernel_vmap_range both do a
  * cache writeback and invalidate operation.
diff --git a/arch/mips/include/asm/compat.h b/arch/mips/include/asm/compat.h
index 53f015a1b0a7..bbb3bc5a42fd 100644
--- a/arch/mips/include/asm/compat.h
+++ b/arch/mips/include/asm/compat.h
@@ -96,14 +96,6 @@ struct compat_statfs {
 
 #define COMPAT_OFF_T_MAX	0x7fffffff
 
-static inline void __user *arch_compat_alloc_user_space(long len)
-{
-	struct pt_regs *regs = (struct pt_regs *)
-		((unsigned long) current_thread_info() + THREAD_SIZE - 32) - 1;
-
-	return (void __user *) (regs->regs[29] - len);
-}
-
 struct compat_ipc64_perm {
 	compat_key_t key;
 	__compat_uid32_t uid;
diff --git a/arch/mips/include/asm/cpu.h b/arch/mips/include/asm/cpu.h
index 9e6211e6d76b..d45a52f65b7a 100644
--- a/arch/mips/include/asm/cpu.h
+++ b/arch/mips/include/asm/cpu.h
@@ -46,8 +46,8 @@
 #define PRID_COMP_NETLOGIC	0x0c0000
 #define PRID_COMP_CAVIUM	0x0d0000
 #define PRID_COMP_LOONGSON	0x140000
-#define PRID_COMP_INGENIC_13	0x130000	/* X2000 */
-#define PRID_COMP_INGENIC_D0	0xd00000	/* JZ4740, JZ4750, X1830 */
+#define PRID_COMP_INGENIC_13	0x130000	/* X2000, X2100 */
+#define PRID_COMP_INGENIC_D0	0xd00000	/* JZ4730, JZ4740, JZ4750, JZ4755, JZ4760, X1830 */
 #define PRID_COMP_INGENIC_D1	0xd10000	/* JZ4770, JZ4775, X1000 */
 #define PRID_COMP_INGENIC_E1	0xe10000	/* JZ4780 */
 
diff --git a/arch/mips/include/asm/uaccess.h b/arch/mips/include/asm/uaccess.h
index 783fecce65c8..f8f74f9f5883 100644
--- a/arch/mips/include/asm/uaccess.h
+++ b/arch/mips/include/asm/uaccess.h
@@ -428,7 +428,6 @@ do {									\
 
 extern size_t __raw_copy_from_user(void *__to, const void *__from, size_t __n);
 extern size_t __raw_copy_to_user(void *__to, const void *__from, size_t __n);
-extern size_t __raw_copy_in_user(void *__to, const void *__from, size_t __n);
 
 static inline unsigned long
 raw_copy_from_user(void *to, const void __user *from, unsigned long n)
@@ -480,31 +479,6 @@ raw_copy_to_user(void __user *to, const void *from, unsigned long n)
 #define INLINE_COPY_FROM_USER
 #define INLINE_COPY_TO_USER
 
-static inline unsigned long
-raw_copy_in_user(void __user *to, const void __user *from, unsigned long n)
-{
-	register void __user *__cu_to_r __asm__("$4");
-	register const void __user *__cu_from_r __asm__("$5");
-	register long __cu_len_r __asm__("$6");
-
-	__cu_to_r = to;
-	__cu_from_r = from;
-	__cu_len_r = n;
-
-	__asm__ __volatile__(
-		".set\tnoreorder\n\t"
-		__MODULE_JAL(__raw_copy_in_user)
-		".set\tnoat\n\t"
-		__UA_ADDU "\t$1, %1, %2\n\t"
-		".set\tat\n\t"
-		".set\treorder"
-		: "+r" (__cu_to_r), "+r" (__cu_from_r), "+r" (__cu_len_r)
-		:
-		: "$8", "$9", "$10", "$11", "$12", "$14", "$15", "$24", "$31",
-		  DADDI_SCRATCH, "memory");
-	return __cu_len_r;
-}
-
 extern __kernel_size_t __bzero(void __user *addr, __kernel_size_t size);
 
 /*
diff --git a/arch/mips/jazz/jazzdma.c b/arch/mips/jazz/jazzdma.c
index 461457b28982..eabddb89d221 100644
--- a/arch/mips/jazz/jazzdma.c
+++ b/arch/mips/jazz/jazzdma.c
@@ -552,7 +552,7 @@ static int jazz_dma_map_sg(struct device *dev, struct scatterlist *sglist,
 				dir);
 		sg->dma_address = vdma_alloc(sg_phys(sg), sg->length);
 		if (sg->dma_address == DMA_MAPPING_ERROR)
-			return 0;
+			return -EIO;
 		sg_dma_len(sg) = sg->length;
 	}
 
diff --git a/arch/mips/kernel/cacheinfo.c b/arch/mips/kernel/cacheinfo.c
index 53d8ea7d36e6..495dd058231d 100644
--- a/arch/mips/kernel/cacheinfo.c
+++ b/arch/mips/kernel/cacheinfo.c
@@ -17,7 +17,7 @@ do {								\
 	leaf++;							\
 } while (0)
 
-static int __init_cache_level(unsigned int cpu)
+int init_cache_level(unsigned int cpu)
 {
 	struct cpuinfo_mips *c = &current_cpu_data;
 	struct cpu_cacheinfo *this_cpu_ci = get_cpu_cacheinfo(cpu);
@@ -74,7 +74,7 @@ static void fill_cpumask_cluster(int cpu, cpumask_t *cpu_map)
 			cpumask_set_cpu(cpu1, cpu_map);
 }
 
-static int __populate_cache_leaves(unsigned int cpu)
+int populate_cache_leaves(unsigned int cpu)
 {
 	struct cpuinfo_mips *c = &current_cpu_data;
 	struct cpu_cacheinfo *this_cpu_ci = get_cpu_cacheinfo(cpu);
@@ -114,6 +114,3 @@ static int __populate_cache_leaves(unsigned int cpu)
 
 	return 0;
 }
-
-DEFINE_SMP_CALL_CACHE_FUNCTION(init_cache_level)
-DEFINE_SMP_CALL_CACHE_FUNCTION(populate_cache_leaves)
diff --git a/arch/mips/kernel/mips-mt-fpaff.c b/arch/mips/kernel/mips-mt-fpaff.c
index 6c590ef27648..67e130d3f038 100644
--- a/arch/mips/kernel/mips-mt-fpaff.c
+++ b/arch/mips/kernel/mips-mt-fpaff.c
@@ -76,13 +76,13 @@ asmlinkage long mipsmt_sys_sched_setaffinity(pid_t pid, unsigned int len,
 	if (copy_from_user(&new_mask, user_mask_ptr, sizeof(new_mask)))
 		return -EFAULT;
 
-	get_online_cpus();
+	cpus_read_lock();
 	rcu_read_lock();
 
 	p = find_process_by_pid(pid);
 	if (!p) {
 		rcu_read_unlock();
-		put_online_cpus();
+		cpus_read_unlock();
 		return -ESRCH;
 	}
 
@@ -147,7 +147,7 @@ out_free_cpus_allowed:
 	free_cpumask_var(cpus_allowed);
 out_put_task:
 	put_task_struct(p);
-	put_online_cpus();
+	cpus_read_unlock();
 	return retval;
 }
 
@@ -166,7 +166,7 @@ asmlinkage long mipsmt_sys_sched_getaffinity(pid_t pid, unsigned int len,
 	if (len < real_len)
 		return -EINVAL;
 
-	get_online_cpus();
+	cpus_read_lock();
 	rcu_read_lock();
 
 	retval = -ESRCH;
@@ -182,7 +182,7 @@ asmlinkage long mipsmt_sys_sched_getaffinity(pid_t pid, unsigned int len,
 
 out_unlock:
 	rcu_read_unlock();
-	put_online_cpus();
+	cpus_read_unlock();
 	if (retval)
 		return retval;
 	if (copy_to_user(user_mask_ptr, &mask, real_len))
diff --git a/arch/mips/kernel/process.c b/arch/mips/kernel/process.c
index 73c8e7990a97..95aa86fa6077 100644
--- a/arch/mips/kernel/process.c
+++ b/arch/mips/kernel/process.c
@@ -859,10 +859,10 @@ int mips_set_process_fp_mode(struct task_struct *task, unsigned int value)
 	 * scheduled in then it will already have picked up the new FP mode
 	 * whilst doing so.
 	 */
-	get_online_cpus();
+	cpus_read_lock();
 	for_each_cpu_and(cpu, &process_cpus, cpu_online_mask)
 		work_on_cpu(cpu, prepare_for_fp_mode_switch, NULL);
-	put_online_cpus();
+	cpus_read_unlock();
 
 	return 0;
 }
diff --git a/arch/mips/kernel/setup.c b/arch/mips/kernel/setup.c
index 23a140327a0b..f979adfd4fc2 100644
--- a/arch/mips/kernel/setup.c
+++ b/arch/mips/kernel/setup.c
@@ -452,8 +452,9 @@ static void __init mips_parse_crashkernel(void)
 		return;
 
 	if (crash_base <= 0) {
-		crash_base = memblock_find_in_range(CRASH_ALIGN, CRASH_ADDR_MAX,
-							crash_size, CRASH_ALIGN);
+		crash_base = memblock_phys_alloc_range(crash_size, CRASH_ALIGN,
+						       CRASH_ALIGN,
+						       CRASH_ADDR_MAX);
 		if (!crash_base) {
 			pr_warn("crashkernel reservation failed - No suitable area found.\n");
 			return;
@@ -461,8 +462,9 @@ static void __init mips_parse_crashkernel(void)
 	} else {
 		unsigned long long start;
 
-		start = memblock_find_in_range(crash_base, crash_base + crash_size,
-						crash_size, 1);
+		start = memblock_phys_alloc_range(crash_size, 1,
+						  crash_base,
+						  crash_base + crash_size);
 		if (start != crash_base) {
 			pr_warn("Invalid memory region reserved for crash kernel\n");
 			return;
@@ -656,10 +658,6 @@ static void __init arch_mem_init(char **cmdline_p)
 	mips_reserve_vmcore();
 
 	mips_parse_crashkernel();
-#ifdef CONFIG_KEXEC
-	if (crashk_res.start != crashk_res.end)
-		memblock_reserve(crashk_res.start, resource_size(&crashk_res));
-#endif
 	device_tree_init();
 
 	/*
diff --git a/arch/mips/kernel/syscalls/syscall_n32.tbl b/arch/mips/kernel/syscalls/syscall_n32.tbl
index c2d2e19abea8..70e32de2bcaa 100644
--- a/arch/mips/kernel/syscalls/syscall_n32.tbl
+++ b/arch/mips/kernel/syscalls/syscall_n32.tbl
@@ -239,9 +239,9 @@
 228	n32	clock_nanosleep			sys_clock_nanosleep_time32
 229	n32	tgkill				sys_tgkill
 230	n32	utimes				sys_utimes_time32
-231	n32	mbind				compat_sys_mbind
-232	n32	get_mempolicy			compat_sys_get_mempolicy
-233	n32	set_mempolicy			compat_sys_set_mempolicy
+231	n32	mbind				sys_mbind
+232	n32	get_mempolicy			sys_get_mempolicy
+233	n32	set_mempolicy			sys_set_mempolicy
 234	n32	mq_open				compat_sys_mq_open
 235	n32	mq_unlink			sys_mq_unlink
 236	n32	mq_timedsend			sys_mq_timedsend_time32
@@ -258,7 +258,7 @@
 247	n32	inotify_init			sys_inotify_init
 248	n32	inotify_add_watch		sys_inotify_add_watch
 249	n32	inotify_rm_watch		sys_inotify_rm_watch
-250	n32	migrate_pages			compat_sys_migrate_pages
+250	n32	migrate_pages			sys_migrate_pages
 251	n32	openat				sys_openat
 252	n32	mkdirat				sys_mkdirat
 253	n32	mknodat				sys_mknodat
@@ -279,7 +279,7 @@
 268	n32	sync_file_range			sys_sync_file_range
 269	n32	tee				sys_tee
 270	n32	vmsplice			sys_vmsplice
-271	n32	move_pages			compat_sys_move_pages
+271	n32	move_pages			sys_move_pages
 272	n32	set_robust_list			compat_sys_set_robust_list
 273	n32	get_robust_list			compat_sys_get_robust_list
 274	n32	kexec_load			compat_sys_kexec_load
@@ -385,3 +385,5 @@
 444	n32	landlock_create_ruleset		sys_landlock_create_ruleset
 445	n32	landlock_add_rule		sys_landlock_add_rule
 446	n32	landlock_restrict_self		sys_landlock_restrict_self
+# 447 reserved for memfd_secret
+448	n32	process_mrelease		sys_process_mrelease
diff --git a/arch/mips/kernel/syscalls/syscall_n64.tbl b/arch/mips/kernel/syscalls/syscall_n64.tbl
index ac653d08b1ea..1ca7bc337932 100644
--- a/arch/mips/kernel/syscalls/syscall_n64.tbl
+++ b/arch/mips/kernel/syscalls/syscall_n64.tbl
@@ -361,3 +361,5 @@
 444	n64	landlock_create_ruleset		sys_landlock_create_ruleset
 445	n64	landlock_add_rule		sys_landlock_add_rule
 446	n64	landlock_restrict_self		sys_landlock_restrict_self
+# 447 reserved for memfd_secret
+448	n64	process_mrelease		sys_process_mrelease
diff --git a/arch/mips/kernel/syscalls/syscall_o32.tbl b/arch/mips/kernel/syscalls/syscall_o32.tbl
index fae35882a165..a61c35edaa74 100644
--- a/arch/mips/kernel/syscalls/syscall_o32.tbl
+++ b/arch/mips/kernel/syscalls/syscall_o32.tbl
@@ -279,9 +279,9 @@
 265	o32	clock_nanosleep			sys_clock_nanosleep_time32
 266	o32	tgkill				sys_tgkill
 267	o32	utimes				sys_utimes_time32
-268	o32	mbind				sys_mbind			compat_sys_mbind
-269	o32	get_mempolicy			sys_get_mempolicy		compat_sys_get_mempolicy
-270	o32	set_mempolicy			sys_set_mempolicy		compat_sys_set_mempolicy
+268	o32	mbind				sys_mbind
+269	o32	get_mempolicy			sys_get_mempolicy
+270	o32	set_mempolicy			sys_set_mempolicy
 271	o32	mq_open				sys_mq_open			compat_sys_mq_open
 272	o32	mq_unlink			sys_mq_unlink
 273	o32	mq_timedsend			sys_mq_timedsend_time32
@@ -298,7 +298,7 @@
 284	o32	inotify_init			sys_inotify_init
 285	o32	inotify_add_watch		sys_inotify_add_watch
 286	o32	inotify_rm_watch		sys_inotify_rm_watch
-287	o32	migrate_pages			sys_migrate_pages		compat_sys_migrate_pages
+287	o32	migrate_pages			sys_migrate_pages
 288	o32	openat				sys_openat			compat_sys_openat
 289	o32	mkdirat				sys_mkdirat
 290	o32	mknodat				sys_mknodat
@@ -319,7 +319,7 @@
 305	o32	sync_file_range			sys_sync_file_range		sys32_sync_file_range
 306	o32	tee				sys_tee
 307	o32	vmsplice			sys_vmsplice
-308	o32	move_pages			sys_move_pages			compat_sys_move_pages
+308	o32	move_pages			sys_move_pages
 309	o32	set_robust_list			sys_set_robust_list		compat_sys_set_robust_list
 310	o32	get_robust_list			sys_get_robust_list		compat_sys_get_robust_list
 311	o32	kexec_load			sys_kexec_load			compat_sys_kexec_load
@@ -434,3 +434,5 @@
 444	o32	landlock_create_ruleset		sys_landlock_create_ruleset
 445	o32	landlock_add_rule		sys_landlock_add_rule
 446	o32	landlock_restrict_self		sys_landlock_restrict_self
+# 447 reserved for memfd_secret
+448	o32	process_mrelease		sys_process_mrelease
diff --git a/arch/mips/kernel/uprobes.c b/arch/mips/kernel/uprobes.c
index 6dbe4eab0a0e..9db2a6db5f62 100644
--- a/arch/mips/kernel/uprobes.c
+++ b/arch/mips/kernel/uprobes.c
@@ -75,7 +75,7 @@ bool is_trap_insn(uprobe_opcode_t *insn)
 		case tlt_op:
 		case tltu_op:
 		case tne_op:
-			return 1;
+			return true;
 		}
 		break;
 
@@ -87,12 +87,12 @@ bool is_trap_insn(uprobe_opcode_t *insn)
 		case tlti_op:
 		case tltiu_op:
 		case tnei_op:
-			return 1;
+			return true;
 		}
 		break;
 	}
 
-	return 0;
+	return false;
 }
 
 #define UPROBE_TRAP_NR	ULONG_MAX
@@ -254,9 +254,9 @@ unsigned long uprobe_get_swbp_addr(struct pt_regs *regs)
  * See if the instruction can be emulated.
  * Returns true if instruction was emulated, false otherwise.
  *
- * For now we always emulate so this function just returns 0.
+ * For now we always emulate so this function just returns false.
  */
 bool arch_uprobe_skip_sstep(struct arch_uprobe *auprobe, struct pt_regs *regs)
 {
-	return 0;
+	return false;
 }
diff --git a/arch/mips/kvm/Makefile b/arch/mips/kvm/Makefile
index c67250a956b8..d3710959da55 100644
--- a/arch/mips/kvm/Makefile
+++ b/arch/mips/kvm/Makefile
@@ -2,21 +2,18 @@
 # Makefile for KVM support for MIPS
 #
 
-common-objs-y = $(addprefix ../../../virt/kvm/, kvm_main.o coalesced_mmio.o eventfd.o binary_stats.o)
+ccflags-y += -Ivirt/kvm -Iarch/mips/kvm
 
-EXTRA_CFLAGS += -Ivirt/kvm -Iarch/mips/kvm
+kvm-y := $(addprefix ../../../virt/kvm/, kvm_main.o coalesced_mmio.o eventfd.o binary_stats.o)
+kvm-$(CONFIG_CPU_HAS_MSA) += msa.o
 
-common-objs-$(CONFIG_CPU_HAS_MSA) += msa.o
-
-kvm-objs := $(common-objs-y) mips.o emulate.o entry.o \
+kvm-y +=    mips.o emulate.o entry.o \
 	    interrupt.o stats.o \
 	    fpu.o
-kvm-objs += hypcall.o
-kvm-objs += mmu.o
-ifdef CONFIG_CPU_LOONGSON64
-kvm-objs += loongson_ipi.o
-endif
+kvm-y += hypcall.o
+kvm-y += mmu.o
+kvm-$(CONFIG_CPU_LOONGSON64) += loongson_ipi.o
 
-kvm-objs		+= vz.o
+kvm-y		+= vz.o
 obj-$(CONFIG_KVM)	+= kvm.o
 obj-y			+= callback.o tlb.o
diff --git a/arch/mips/kvm/mips.c b/arch/mips/kvm/mips.c
index af9dd029a4e1..75c6f264c626 100644
--- a/arch/mips/kvm/mips.c
+++ b/arch/mips/kvm/mips.c
@@ -41,8 +41,6 @@
 const struct _kvm_stats_desc kvm_vm_stats_desc[] = {
 	KVM_GENERIC_VM_STATS()
 };
-static_assert(ARRAY_SIZE(kvm_vm_stats_desc) ==
-		sizeof(struct kvm_vm_stat) / sizeof(u64));
 
 const struct kvm_stats_header kvm_vm_stats_header = {
 	.name_size = KVM_STATS_NAME_SIZE,
@@ -85,8 +83,6 @@ const struct _kvm_stats_desc kvm_vcpu_stats_desc[] = {
 	STATS_DESC_COUNTER(VCPU, vz_cpucfg_exits),
 #endif
 };
-static_assert(ARRAY_SIZE(kvm_vcpu_stats_desc) ==
-		sizeof(struct kvm_vcpu_stat) / sizeof(u64));
 
 const struct kvm_stats_header kvm_vcpu_stats_header = {
 	.name_size = KVM_STATS_NAME_SIZE,
diff --git a/arch/mips/kvm/mmu.c b/arch/mips/kvm/mmu.c
index 6d1f68cf4edf..1bfd1b501d82 100644
--- a/arch/mips/kvm/mmu.c
+++ b/arch/mips/kvm/mmu.c
@@ -442,7 +442,7 @@ static int kvm_mips_mkold_gpa_pt(struct kvm *kvm, gfn_t start_gfn,
 bool kvm_unmap_gfn_range(struct kvm *kvm, struct kvm_gfn_range *range)
 {
 	kvm_mips_flush_gpa_pt(kvm, range->start, range->end);
-	return 1;
+	return true;
 }
 
 bool kvm_set_spte_gfn(struct kvm *kvm, struct kvm_gfn_range *range)
@@ -486,7 +486,7 @@ bool kvm_test_age_gfn(struct kvm *kvm, struct kvm_gfn_range *range)
 	pte_t *gpa_pte = kvm_mips_pte_for_gpa(kvm, NULL, gpa);
 
 	if (!gpa_pte)
-		return 0;
+		return false;
 	return pte_young(*gpa_pte);
 }
 
diff --git a/arch/mips/kvm/vz.c b/arch/mips/kvm/vz.c
index 43cad10b877d..4adca5abbc72 100644
--- a/arch/mips/kvm/vz.c
+++ b/arch/mips/kvm/vz.c
@@ -388,7 +388,6 @@ static void _kvm_vz_restore_htimer(struct kvm_vcpu *vcpu,
 				   u32 compare, u32 cause)
 {
 	u32 start_count, after_count;
-	ktime_t freeze_time;
 	unsigned long flags;
 
 	/*
@@ -396,7 +395,7 @@ static void _kvm_vz_restore_htimer(struct kvm_vcpu *vcpu,
 	 * this with interrupts disabled to avoid latency.
 	 */
 	local_irq_save(flags);
-	freeze_time = kvm_mips_freeze_hrtimer(vcpu, &start_count);
+	kvm_mips_freeze_hrtimer(vcpu, &start_count);
 	write_c0_gtoffset(start_count - read_c0_count());
 	local_irq_restore(flags);
 
diff --git a/arch/mips/lib/memcpy.S b/arch/mips/lib/memcpy.S
index e19fb98b5d38..277c32296636 100644
--- a/arch/mips/lib/memcpy.S
+++ b/arch/mips/lib/memcpy.S
@@ -666,8 +666,6 @@ FEXPORT(__raw_copy_from_user)
 EXPORT_SYMBOL(__raw_copy_from_user)
 FEXPORT(__raw_copy_to_user)
 EXPORT_SYMBOL(__raw_copy_to_user)
-FEXPORT(__raw_copy_in_user)
-EXPORT_SYMBOL(__raw_copy_in_user)
 #endif
 	/* Legacy Mode, user <-> user */
 	__BUILD_COPY_USER LEGACY_MODE USEROP USEROP
@@ -703,13 +701,4 @@ EXPORT_SYMBOL(__raw_copy_to_user)
 __BUILD_COPY_USER EVA_MODE KERNELOP USEROP
 END(__raw_copy_to_user)
 
-/*
- * __copy_in_user (EVA)
- */
-
-LEAF(__raw_copy_in_user)
-EXPORT_SYMBOL(__raw_copy_in_user)
-__BUILD_COPY_USER EVA_MODE USEROP USEROP
-END(__raw_copy_in_user)
-
 #endif
diff --git a/arch/mips/loongson2ef/common/Makefile b/arch/mips/loongson2ef/common/Makefile
index d5ab3e543ea3..30ea8b5ca685 100644
--- a/arch/mips/loongson2ef/common/Makefile
+++ b/arch/mips/loongson2ef/common/Makefile
@@ -4,12 +4,14 @@
 #
 
 obj-y += setup.o init.o env.o time.o reset.o irq.o \
-    bonito-irq.o mem.o machtype.o platform.o serial.o
+    bonito-irq.o mem.o machtype.o platform.o
 obj-$(CONFIG_PCI) += pci.o
 
 #
 # Serial port support
 #
+obj-$(CONFIG_LOONGSON_UART_BASE) += serial.o
+obj-$(CONFIG_EARLY_PRINTK) += serial.o
 obj-$(CONFIG_LOONGSON_UART_BASE) += uart_base.o
 obj-$(CONFIG_LOONGSON_MC146818) += rtc.o
 
diff --git a/arch/mips/mm/c-octeon.c b/arch/mips/mm/c-octeon.c
index 8ae181e08311..ec2ae501539a 100644
--- a/arch/mips/mm/c-octeon.c
+++ b/arch/mips/mm/c-octeon.c
@@ -30,7 +30,7 @@
 unsigned long long cache_err_dcache[NR_CPUS];
 EXPORT_SYMBOL_GPL(cache_err_dcache);
 
-/**
+/*
  * Octeon automatically flushes the dcache on tlb changes, so
  * from Linux's viewpoint it acts much like a physically
  * tagged cache. No flushing is needed
@@ -56,8 +56,8 @@ static void local_octeon_flush_icache_range(unsigned long start,
 }
 
 /**
- * Flush caches as necessary for all cores affected by a
- * vma. If no vma is supplied, all cores are flushed.
+ * octeon_flush_icache_all_cores -  Flush caches as necessary for all cores
+ * affected by a vma. If no vma is supplied, all cores are flushed.
  *
  * @vma:    VMA to flush or NULL to flush all icaches.
  */
@@ -92,7 +92,7 @@ static void octeon_flush_icache_all_cores(struct vm_area_struct *vma)
 }
 
 
-/**
+/*
  * Called to flush the icache on all cores
  */
 static void octeon_flush_icache_all(void)
@@ -102,8 +102,7 @@ static void octeon_flush_icache_all(void)
 
 
 /**
- * Called to flush all memory associated with a memory
- * context.
+ * octeon_flush_cache_mm - flush all memory associated with a memory context.
  *
  * @mm:	    Memory context to flush
  */
@@ -116,7 +115,7 @@ static void octeon_flush_cache_mm(struct mm_struct *mm)
 }
 
 
-/**
+/*
  * Flush a range of kernel addresses out of the icache
  *
  */
@@ -127,11 +126,11 @@ static void octeon_flush_icache_range(unsigned long start, unsigned long end)
 
 
 /**
- * Flush a range out of a vma
+ * octeon_flush_cache_range - Flush a range out of a vma
  *
  * @vma:    VMA to flush
- * @start:
- * @end:
+ * @start:  beginning address for flush
+ * @end:    ending address for flush
  */
 static void octeon_flush_cache_range(struct vm_area_struct *vma,
 				     unsigned long start, unsigned long end)
@@ -142,11 +141,11 @@ static void octeon_flush_cache_range(struct vm_area_struct *vma,
 
 
 /**
- * Flush a specific page of a vma
+ * octeon_flush_cache_page - Flush a specific page of a vma
  *
  * @vma:    VMA to flush page for
  * @page:   Page to flush
- * @pfn:
+ * @pfn:    Page frame number
  */
 static void octeon_flush_cache_page(struct vm_area_struct *vma,
 				    unsigned long page, unsigned long pfn)
@@ -160,7 +159,7 @@ static void octeon_flush_kernel_vmap_range(unsigned long vaddr, int size)
 	BUG();
 }
 
-/**
+/*
  * Probe Octeon's caches
  *
  */
@@ -256,7 +255,7 @@ static void  octeon_cache_error_setup(void)
 	set_handler(0x100, &except_vec2_octeon, 0x80);
 }
 
-/**
+/*
  * Setup the Octeon cache flush routines
  *
  */
@@ -341,7 +340,7 @@ asmlinkage void cache_parity_error_octeon_recoverable(void)
 	co_cache_error_call_notifiers(0);
 }
 
-/**
+/*
  * Called when the the exception is not recoverable
  */
 
diff --git a/arch/mips/mti-malta/malta-dtshim.c b/arch/mips/mti-malta/malta-dtshim.c
index 0ddf03df6268..f451268f6c38 100644
--- a/arch/mips/mti-malta/malta-dtshim.c
+++ b/arch/mips/mti-malta/malta-dtshim.c
@@ -22,7 +22,7 @@
 #define  ROCIT_CONFIG_GEN1_MEMMAP_SHIFT	8
 #define  ROCIT_CONFIG_GEN1_MEMMAP_MASK	(0xf << 8)
 
-static unsigned char fdt_buf[16 << 10] __initdata;
+static unsigned char fdt_buf[16 << 10] __initdata __aligned(8);
 
 /* determined physical memory size, not overridden by command line args	 */
 extern unsigned long physical_memsize;
diff --git a/arch/mips/netlogic/xlr/fmn-config.c b/arch/mips/netlogic/xlr/fmn-config.c
index c7622c6e5f67..15483537e8cf 100644
--- a/arch/mips/netlogic/xlr/fmn-config.c
+++ b/arch/mips/netlogic/xlr/fmn-config.c
@@ -103,18 +103,19 @@ static void check_credit_distribution(void)
 }
 
 /**
- * Configure bucket size and credits for a device. 'size' is the size of
- * the buckets for the device. This size is distributed among all the CPUs
- * so that all of them can send messages to the device.
- *
- * The device is also given 'cpu_credits' to send messages to the CPUs
- *
+ * setup_fmn_cc -  Configure bucket size and credits for a device.
  * @dev_info: FMN information structure for each devices
  * @start_stn_id: Starting station id of dev_info
  * @end_stn_id: End station id of dev_info
  * @num_buckets: Total number of buckets for den_info
  * @cpu_credits: Allowed credits to cpu for each devices pointing by dev_info
  * @size: Size of the each buckets in the device station
+ *
+ * 'size' is the size of the buckets for the device. This size is
+ * distributed among all the CPUs
+ * so that all of them can send messages to the device.
+ *
+ * The device is also given 'cpu_credits' to send messages to the CPUs
  */
 static void setup_fmn_cc(struct xlr_fmn_info *dev_info, int start_stn_id,
 		int end_stn_id, int num_buckets, int cpu_credits, int size)
@@ -174,6 +175,8 @@ static void setup_cpu_fmninfo(struct xlr_fmn_info *cpu, int num_core)
 }
 
 /**
+ * xlr_board_info_setup - Setup FMN details
+ *
  * Setup the FMN details for each devices according to the device available
  * in each variant of XLR/XLS processor
  */
diff --git a/arch/mips/pistachio/Kconfig b/arch/mips/pistachio/Kconfig
deleted file mode 100644
index 9a0e06c95184..000000000000
--- a/arch/mips/pistachio/Kconfig
+++ /dev/null
@@ -1,14 +0,0 @@
-# SPDX-License-Identifier: GPL-2.0
-config PISTACHIO_GPTIMER_CLKSRC
-	bool "Enable General Purpose Timer based clocksource"
-	depends on MACH_PISTACHIO
-	select CLKSRC_PISTACHIO
-	select MIPS_EXTERNAL_TIMER
-	help
-	  This option enables a clocksource driver based on a Pistachio
-	  SoC General Purpose external timer.
-
-	  If you want to enable the CPUFreq, you need to enable
-	  this option.
-
-	  If you don't want to enable CPUFreq, you can leave this disabled.
diff --git a/arch/mips/pistachio/Makefile b/arch/mips/pistachio/Makefile
deleted file mode 100644
index 66f4af17fb66..000000000000
--- a/arch/mips/pistachio/Makefile
+++ /dev/null
@@ -1,2 +0,0 @@
-# SPDX-License-Identifier: GPL-2.0-only
-obj-y	+= init.o irq.o time.o
diff --git a/arch/mips/pistachio/Platform b/arch/mips/pistachio/Platform
deleted file mode 100644
index c59de86dbddf..000000000000
--- a/arch/mips/pistachio/Platform
+++ /dev/null
@@ -1,6 +0,0 @@
-#
-# IMG Pistachio SoC
-#
-load-$(CONFIG_MACH_PISTACHIO)		+= 0xffffffff80400000
-zload-$(CONFIG_MACH_PISTACHIO)		+= 0xffffffff81000000
-all-$(CONFIG_MACH_PISTACHIO)		:= uImage.gz
diff --git a/arch/mips/pistachio/init.c b/arch/mips/pistachio/init.c
deleted file mode 100644
index e0bacfc3c6b4..000000000000
--- a/arch/mips/pistachio/init.c
+++ /dev/null
@@ -1,125 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0-only
-/*
- * Pistachio platform setup
- *
- * Copyright (C) 2014 Google, Inc.
- * Copyright (C) 2016 Imagination Technologies
- */
-
-#include <linux/init.h>
-#include <linux/io.h>
-#include <linux/kernel.h>
-#include <linux/of_address.h>
-#include <linux/of_fdt.h>
-
-#include <asm/cacheflush.h>
-#include <asm/fw/fw.h>
-#include <asm/mips-boards/generic.h>
-#include <asm/mips-cps.h>
-#include <asm/prom.h>
-#include <asm/smp-ops.h>
-#include <asm/traps.h>
-
-/*
- * Core revision register decoding
- * Bits 23 to 20: Major rev
- * Bits 15 to 8: Minor rev
- * Bits 7 to 0: Maintenance rev
- */
-#define PISTACHIO_CORE_REV_REG	0xB81483D0
-#define PISTACHIO_CORE_REV_A1	0x00100006
-#define PISTACHIO_CORE_REV_B0	0x00100106
-
-const char *get_system_type(void)
-{
-	u32 core_rev;
-	const char *sys_type;
-
-	core_rev = __raw_readl((const void *)PISTACHIO_CORE_REV_REG);
-
-	switch (core_rev) {
-	case PISTACHIO_CORE_REV_B0:
-		sys_type = "IMG Pistachio SoC (B0)";
-		break;
-
-	case PISTACHIO_CORE_REV_A1:
-		sys_type = "IMG Pistachio SoC (A1)";
-		break;
-
-	default:
-		sys_type = "IMG Pistachio SoC";
-		break;
-	}
-
-	return sys_type;
-}
-
-void __init *plat_get_fdt(void)
-{
-	if (fw_arg0 != -2)
-		panic("Device-tree not present");
-	return (void *)fw_arg1;
-}
-
-void __init plat_mem_setup(void)
-{
-	__dt_setup_arch(plat_get_fdt());
-}
-
-#define DEFAULT_CPC_BASE_ADDR	0x1bde0000
-#define DEFAULT_CDMM_BASE_ADDR	0x1bdd0000
-
-phys_addr_t mips_cpc_default_phys_base(void)
-{
-	return DEFAULT_CPC_BASE_ADDR;
-}
-
-phys_addr_t mips_cdmm_phys_base(void)
-{
-	return DEFAULT_CDMM_BASE_ADDR;
-}
-
-static void __init mips_nmi_setup(void)
-{
-	void *base;
-
-	base = cpu_has_veic ?
-		(void *)(CAC_BASE + 0xa80) :
-		(void *)(CAC_BASE + 0x380);
-	memcpy(base, except_vec_nmi, 0x80);
-	flush_icache_range((unsigned long)base,
-			   (unsigned long)base + 0x80);
-}
-
-static void __init mips_ejtag_setup(void)
-{
-	void *base;
-	extern char except_vec_ejtag_debug[];
-
-	base = cpu_has_veic ?
-		(void *)(CAC_BASE + 0xa00) :
-		(void *)(CAC_BASE + 0x300);
-	memcpy(base, except_vec_ejtag_debug, 0x80);
-	flush_icache_range((unsigned long)base,
-			   (unsigned long)base + 0x80);
-}
-
-void __init prom_init(void)
-{
-	board_nmi_handler_setup = mips_nmi_setup;
-	board_ejtag_handler_setup = mips_ejtag_setup;
-
-	mips_cm_probe();
-	mips_cpc_probe();
-	register_cps_smp_ops();
-
-	pr_info("SoC Type: %s\n", get_system_type());
-}
-
-void __init device_tree_init(void)
-{
-	if (!initial_boot_params)
-		return;
-
-	unflatten_and_copy_device_tree();
-}
diff --git a/arch/mips/pistachio/irq.c b/arch/mips/pistachio/irq.c
deleted file mode 100644
index 437c3101ac45..000000000000
--- a/arch/mips/pistachio/irq.c
+++ /dev/null
@@ -1,24 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0-only
-/*
- * Pistachio IRQ setup
- *
- * Copyright (C) 2014 Google, Inc.
- */
-
-#include <linux/init.h>
-#include <linux/irqchip.h>
-#include <linux/kernel.h>
-
-#include <asm/cpu-features.h>
-#include <asm/irq_cpu.h>
-
-void __init arch_init_irq(void)
-{
-	pr_info("EIC is %s\n", cpu_has_veic ? "on" : "off");
-	pr_info("VINT is %s\n", cpu_has_vint ? "on" : "off");
-
-	if (!cpu_has_veic)
-		mips_cpu_irq_init();
-
-	irqchip_init();
-}
diff --git a/arch/mips/pistachio/time.c b/arch/mips/pistachio/time.c
deleted file mode 100644
index de64751dec40..000000000000
--- a/arch/mips/pistachio/time.c
+++ /dev/null
@@ -1,55 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0-only
-/*
- * Pistachio clocksource/timer setup
- *
- * Copyright (C) 2014 Google, Inc.
- */
-
-#include <linux/clk.h>
-#include <linux/clocksource.h>
-#include <linux/init.h>
-#include <linux/of.h>
-#include <linux/of_clk.h>
-
-#include <asm/mips-cps.h>
-#include <asm/time.h>
-
-unsigned int get_c0_compare_int(void)
-{
-	return gic_get_c0_compare_int();
-}
-
-int get_c0_perfcount_int(void)
-{
-	return gic_get_c0_perfcount_int();
-}
-EXPORT_SYMBOL_GPL(get_c0_perfcount_int);
-
-int get_c0_fdc_int(void)
-{
-	return gic_get_c0_fdc_int();
-}
-
-void __init plat_time_init(void)
-{
-	struct device_node *np;
-	struct clk *clk;
-
-	of_clk_init(NULL);
-	timer_probe();
-
-	np = of_get_cpu_node(0, NULL);
-	if (!np) {
-		pr_err("Failed to get CPU node\n");
-		return;
-	}
-
-	clk = of_clk_get(np, 0);
-	if (IS_ERR(clk)) {
-		pr_err("Failed to get CPU clock: %ld\n", PTR_ERR(clk));
-		return;
-	}
-
-	mips_hpt_frequency = clk_get_rate(clk) / 2;
-	clk_put(clk);
-}
diff --git a/arch/nds32/Kconfig b/arch/nds32/Kconfig
index 9c9f3877abf9..aea26e739543 100644
--- a/arch/nds32/Kconfig
+++ b/arch/nds32/Kconfig
@@ -46,6 +46,7 @@ config NDS32
 	select HAVE_FTRACE_MCOUNT_RECORD
 	select HAVE_DYNAMIC_FTRACE
 	select SET_FS
+	select TRACE_IRQFLAGS_SUPPORT
 	help
 	  Andes(nds32) Linux support.
 
@@ -62,9 +63,6 @@ config GENERIC_LOCKBREAK
 	def_bool y
 	depends on PREEMPTION
 
-config TRACE_IRQFLAGS_SUPPORT
-	def_bool y
-
 config STACKTRACE_SUPPORT
 	def_bool y
 
diff --git a/arch/nds32/include/asm/cacheflush.h b/arch/nds32/include/asm/cacheflush.h
index 7d6824f7c0e8..c2a222ebfa2a 100644
--- a/arch/nds32/include/asm/cacheflush.h
+++ b/arch/nds32/include/asm/cacheflush.h
@@ -36,8 +36,7 @@ void copy_from_user_page(struct vm_area_struct *vma, struct page *page,
 void flush_anon_page(struct vm_area_struct *vma,
 		     struct page *page, unsigned long vaddr);
 
-#define ARCH_HAS_FLUSH_KERNEL_DCACHE_PAGE
-void flush_kernel_dcache_page(struct page *page);
+#define ARCH_IMPLEMENTS_FLUSH_KERNEL_VMAP_RANGE 1
 void flush_kernel_vmap_range(void *addr, int size);
 void invalidate_kernel_vmap_range(void *addr, int size);
 #define flush_dcache_mmap_lock(mapping)   xa_lock_irq(&(mapping)->i_pages)
diff --git a/arch/nds32/kernel/setup.c b/arch/nds32/kernel/setup.c
index 41725eaf8bac..b3d34d646652 100644
--- a/arch/nds32/kernel/setup.c
+++ b/arch/nds32/kernel/setup.c
@@ -244,7 +244,6 @@ static void __init setup_memory(void)
 	unsigned long ram_start_pfn;
 	unsigned long free_ram_start_pfn;
 	phys_addr_t memory_start, memory_end;
-	struct memblock_region *region;
 
 	memory_end = memory_start = 0;
 
diff --git a/arch/nds32/kernel/traps.c b/arch/nds32/kernel/traps.c
index ee0d9ae192a5..f06421c645af 100644
--- a/arch/nds32/kernel/traps.c
+++ b/arch/nds32/kernel/traps.c
@@ -183,11 +183,6 @@ void __pgd_error(const char *file, int line, unsigned long val)
 }
 
 extern char *exception_vector, *exception_vector_end;
-void __init trap_init(void)
-{
-	return;
-}
-
 void __init early_trap_init(void)
 {
 	unsigned long ivb = 0;
diff --git a/arch/nds32/mm/cacheflush.c b/arch/nds32/mm/cacheflush.c
index ad5344ef5d33..07aac65d1cab 100644
--- a/arch/nds32/mm/cacheflush.c
+++ b/arch/nds32/mm/cacheflush.c
@@ -318,15 +318,6 @@ void flush_anon_page(struct vm_area_struct *vma,
 	local_irq_restore(flags);
 }
 
-void flush_kernel_dcache_page(struct page *page)
-{
-	unsigned long flags;
-	local_irq_save(flags);
-	cpu_dcache_wbinval_page((unsigned long)page_address(page));
-	local_irq_restore(flags);
-}
-EXPORT_SYMBOL(flush_kernel_dcache_page);
-
 void flush_kernel_vmap_range(void *addr, int size)
 {
 	unsigned long flags;
diff --git a/arch/nios2/Kconfig b/arch/nios2/Kconfig
index 3efe5533ea1c..33fd06f5fa41 100644
--- a/arch/nios2/Kconfig
+++ b/arch/nios2/Kconfig
@@ -41,9 +41,6 @@ config NO_IOPORT_MAP
 config FPU
 	def_bool n
 
-config TRACE_IRQFLAGS_SUPPORT
-	def_bool n
-
 menu "Kernel features"
 
 source "kernel/Kconfig.hz"
diff --git a/arch/nios2/kernel/traps.c b/arch/nios2/kernel/traps.c
index b172da4eb1a9..596986a74a26 100644
--- a/arch/nios2/kernel/traps.c
+++ b/arch/nios2/kernel/traps.c
@@ -105,11 +105,6 @@ void show_stack(struct task_struct *task, unsigned long *stack,
 	printk("%s\n", loglvl);
 }
 
-void __init trap_init(void)
-{
-	/* Nothing to do here */
-}
-
 /* Breakpoint handler */
 asmlinkage void breakpoint_c(struct pt_regs *fp)
 {
diff --git a/arch/openrisc/Kconfig b/arch/openrisc/Kconfig
index 50035a9816c8..e804026b4797 100644
--- a/arch/openrisc/Kconfig
+++ b/arch/openrisc/Kconfig
@@ -37,6 +37,7 @@ config OPENRISC
 	select GENERIC_IRQ_MULTI_HANDLER
 	select MMU_GATHER_NO_RANGE if MMU
 	select SET_FS
+	select TRACE_IRQFLAGS_SUPPORT
 
 config CPU_BIG_ENDIAN
 	def_bool y
@@ -50,9 +51,6 @@ config GENERIC_HWEIGHT
 config NO_IOPORT_MAP
 	def_bool y
 
-config TRACE_IRQFLAGS_SUPPORT
-	def_bool y
-
 # For now, use generic checksum functions
 #These can be reimplemented in assembly later if so inclined
 config GENERIC_CSUM
diff --git a/arch/openrisc/boot/dts/or1klitex.dts b/arch/openrisc/boot/dts/or1klitex.dts
index 3f9867aa3844..91c7173c50e6 100644
--- a/arch/openrisc/boot/dts/or1klitex.dts
+++ b/arch/openrisc/boot/dts/or1klitex.dts
@@ -41,10 +41,10 @@
 		interrupt-controller;
 	};
 
-	serial0: serial@e0002000 {
+	serial0: serial@e0006800 {
 		device_type = "serial";
 		compatible = "litex,liteuart";
-		reg = <0xe0002000 0x100>;
+		reg = <0xe0006800 0x100>;
 	};
 
 	soc_ctrl0: soc_controller@e0000000 {
@@ -52,4 +52,13 @@
 			reg = <0xe0000000 0xc>;
 			status = "okay";
 	};
+
+	ethernet@e0001000 {
+		compatible = "litex,liteeth";
+		reg = <0xe0001000 0x7c>,
+		      <0xe0001800 0x0a>,
+		      <0x80000000 0x2000>;
+		reg-names = "mac", "mdio", "buffer";
+		interrupts = <2>;
+	};
 };
diff --git a/arch/openrisc/configs/or1klitex_defconfig b/arch/openrisc/configs/or1klitex_defconfig
index 3c2c70d3d740..d695879a4d26 100644
--- a/arch/openrisc/configs/or1klitex_defconfig
+++ b/arch/openrisc/configs/or1klitex_defconfig
@@ -1,18 +1,24 @@
 CONFIG_BLK_DEV_INITRD=y
-CONFIG_BOOTPARAM_SOFTLOCKUP_PANIC=y
-CONFIG_BUG_ON_DATA_CORRUPTION=y
 CONFIG_CC_OPTIMIZE_FOR_SIZE=y
-CONFIG_DEVTMPFS=y
-CONFIG_DEVTMPFS_MOUNT=y
 CONFIG_EMBEDDED=y
+CONFIG_OPENRISC_BUILTIN_DTB="or1klitex"
 CONFIG_HZ_100=y
-CONFIG_INITRAMFS_SOURCE="openrisc-rootfs.cpio.gz"
+CONFIG_NET=y
+CONFIG_PACKET=y
+CONFIG_UNIX=y
+CONFIG_INET=y
+CONFIG_DEVTMPFS=y
+CONFIG_DEVTMPFS_MOUNT=y
 CONFIG_OF_OVERLAY=y
-CONFIG_OPENRISC_BUILTIN_DTB="or1klitex"
-CONFIG_PANIC_ON_OOPS=y
-CONFIG_PRINTK_TIME=y
-CONFIG_LITEX_SOC_CONTROLLER=y
+CONFIG_NETDEVICES=y
+CONFIG_LITEX_LITEETH=y
 CONFIG_SERIAL_LITEUART=y
 CONFIG_SERIAL_LITEUART_CONSOLE=y
-CONFIG_SOFTLOCKUP_DETECTOR=y
 CONFIG_TTY_PRINTK=y
+CONFIG_LITEX_SOC_CONTROLLER=y
+CONFIG_TMPFS=y
+CONFIG_PRINTK_TIME=y
+CONFIG_PANIC_ON_OOPS=y
+CONFIG_SOFTLOCKUP_DETECTOR=y
+CONFIG_BOOTPARAM_SOFTLOCKUP_PANIC=y
+CONFIG_BUG_ON_DATA_CORRUPTION=y
diff --git a/arch/openrisc/include/asm/pgtable.h b/arch/openrisc/include/asm/pgtable.h
index 4ac591c9ca33..cdd657f80bfa 100644
--- a/arch/openrisc/include/asm/pgtable.h
+++ b/arch/openrisc/include/asm/pgtable.h
@@ -12,7 +12,7 @@
  * et al.
  */
 
-/* or32 pgtable.h - macros and functions to manipulate page tables
+/* or1k pgtable.h - macros and functions to manipulate page tables
  *
  * Based on:
  * include/asm-cris/pgtable.h
@@ -29,14 +29,14 @@
 
 /*
  * The Linux memory management assumes a three-level page table setup. On
- * or32, we use that, but "fold" the mid level into the top-level page
+ * or1k, we use that, but "fold" the mid level into the top-level page
  * table. Since the MMU TLB is software loaded through an interrupt, it
  * supports any page table structure, so we could have used a three-level
  * setup, but for the amounts of memory we normally use, a two-level is
  * probably more efficient.
  *
  * This file contains the functions and defines necessary to modify and use
- * the or32 page table tree.
+ * the or1k page table tree.
  */
 
 extern void paging_init(void);
diff --git a/arch/openrisc/include/asm/setup.h b/arch/openrisc/include/asm/setup.h
new file mode 100644
index 000000000000..9acbc5deda69
--- /dev/null
+++ b/arch/openrisc/include/asm/setup.h
@@ -0,0 +1,15 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/*
+ * Copyright (C) 2021 Stafford Horne
+ */
+#ifndef _ASM_OR1K_SETUP_H
+#define _ASM_OR1K_SETUP_H
+
+#include <linux/init.h>
+#include <asm-generic/setup.h>
+
+#ifndef __ASSEMBLY__
+void __init or1k_early_setup(void *fdt);
+#endif
+
+#endif /* _ASM_OR1K_SETUP_H */
diff --git a/arch/openrisc/include/asm/thread_info.h b/arch/openrisc/include/asm/thread_info.h
index 4f9d2a261455..659834ab87fa 100644
--- a/arch/openrisc/include/asm/thread_info.h
+++ b/arch/openrisc/include/asm/thread_info.h
@@ -25,7 +25,7 @@
 
 /* THREAD_SIZE is the size of the task_struct/kernel_stack combo.
  * normally, the stack is found by doing something like p + THREAD_SIZE
- * in or32, a page is 8192 bytes, which seems like a sane size
+ * in or1k, a page is 8192 bytes, which seems like a sane size
  */
 
 #define THREAD_SIZE_ORDER 0
diff --git a/arch/openrisc/kernel/entry.S b/arch/openrisc/kernel/entry.S
index 947613f61d4a..edaa775a648e 100644
--- a/arch/openrisc/kernel/entry.S
+++ b/arch/openrisc/kernel/entry.S
@@ -326,7 +326,7 @@ EXCEPTION_ENTRY(_data_page_fault_handler)
 1:	l.ori   r6,r0,0x0                  // !write access
 2:
 
-	/* call fault.c handler in or32/mm/fault.c */
+	/* call fault.c handler in openrisc/mm/fault.c */
 	l.jal   do_page_fault
 	 l.nop
 	l.j     _ret_from_exception
@@ -348,7 +348,7 @@ EXCEPTION_ENTRY(_insn_page_fault_handler)
 	/* r4 set be EXCEPTION_HANDLE */   // effective address of fault
 	l.ori	r6,r0,0x0		   // !write access
 
-	/* call fault.c handler in or32/mm/fault.c */
+	/* call fault.c handler in openrisc/mm/fault.c */
 	l.jal   do_page_fault
 	 l.nop
 	l.j     _ret_from_exception
@@ -547,6 +547,7 @@ EXCEPTION_ENTRY(_external_irq_handler)
 	l.bnf	1f			// ext irq enabled, all ok.
 	l.nop
 
+#ifdef CONFIG_PRINTK
 	l.addi  r1,r1,-0x8
 	l.movhi r3,hi(42f)
 	l.ori	r3,r3,lo(42f)
@@ -560,6 +561,7 @@ EXCEPTION_ENTRY(_external_irq_handler)
 		.string "\n\rESR interrupt bug: in _external_irq_handler (ESR %x)\n\r"
 		.align 4
 	.previous
+#endif
 
 	l.ori	r4,r4,SPR_SR_IEE	// fix the bug
 //	l.sw	PT_SR(r1),r4
diff --git a/arch/openrisc/kernel/head.S b/arch/openrisc/kernel/head.S
index af355e3f4619..15f1b38dfe03 100644
--- a/arch/openrisc/kernel/head.S
+++ b/arch/openrisc/kernel/head.S
@@ -599,7 +599,7 @@ flush_tlb:
 	l.jal	_flush_tlb
 	 l.nop
 
-/* The MMU needs to be enabled before or32_early_setup is called */
+/* The MMU needs to be enabled before or1k_early_setup is called */
 
 enable_mmu:
 	/*
@@ -641,9 +641,9 @@ enable_mmu:
 	/* magic number mismatch, set fdt pointer to null */
 	l.or	r25,r0,r0
 _fdt_found:
-	/* pass fdt pointer to or32_early_setup in r3 */
+	/* pass fdt pointer to or1k_early_setup in r3 */
 	l.or	r3,r0,r25
-	LOAD_SYMBOL_2_GPR(r24, or32_early_setup)
+	LOAD_SYMBOL_2_GPR(r24, or1k_early_setup)
 	l.jalr r24
 	 l.nop
 
diff --git a/arch/openrisc/kernel/process.c b/arch/openrisc/kernel/process.c
index eb62429681fc..b0698d9ce14f 100644
--- a/arch/openrisc/kernel/process.c
+++ b/arch/openrisc/kernel/process.c
@@ -14,8 +14,6 @@
  */
 
 #define __KERNEL_SYSCALLS__
-#include <stdarg.h>
-
 #include <linux/errno.h>
 #include <linux/sched.h>
 #include <linux/sched/debug.h>
diff --git a/arch/openrisc/kernel/setup.c b/arch/openrisc/kernel/setup.c
index 8ae2da6ac097..0cd04d936a7a 100644
--- a/arch/openrisc/kernel/setup.c
+++ b/arch/openrisc/kernel/setup.c
@@ -209,7 +209,8 @@ void __init setup_cpuinfo(void)
 }
 
 /**
- * or32_early_setup
+ * or1k_early_setup
+ * @fdt: pointer to the start of the device tree in memory or NULL
  *
  * Handles the pointer to the device tree that this kernel is to use
  * for establishing the available platform devices.
@@ -217,7 +218,7 @@ void __init setup_cpuinfo(void)
  * Falls back on built-in device tree in case null pointer is passed.
  */
 
-void __init or32_early_setup(void *fdt)
+void __init or1k_early_setup(void *fdt)
 {
 	if (fdt)
 		pr_info("FDT at %p\n", fdt);
@@ -243,21 +244,6 @@ static inline unsigned long extract_value(unsigned long reg, unsigned long mask)
 	return mask & reg;
 }
 
-void __init detect_unit_config(unsigned long upr, unsigned long mask,
-			       char *text, void (*func) (void))
-{
-	if (text != NULL)
-		printk("%s", text);
-
-	if (upr & mask) {
-		if (func != NULL)
-			func();
-		else
-			printk("present\n");
-	} else
-		printk("not present\n");
-}
-
 /*
  * calibrate_delay
  *
diff --git a/arch/openrisc/kernel/traps.c b/arch/openrisc/kernel/traps.c
index 4d61333c2623..aa1e709405ac 100644
--- a/arch/openrisc/kernel/traps.c
+++ b/arch/openrisc/kernel/traps.c
@@ -231,11 +231,6 @@ void unhandled_exception(struct pt_regs *regs, int ea, int vector)
 	die("Oops", regs, 9);
 }
 
-void __init trap_init(void)
-{
-	/* Nothing needs to be done */
-}
-
 asmlinkage void do_trap(struct pt_regs *regs, unsigned long address)
 {
 	force_sig_fault(SIGTRAP, TRAP_BRKPT, (void __user *)regs->pc);
diff --git a/arch/openrisc/lib/Makefile b/arch/openrisc/lib/Makefile
index 79775aaa6baa..53327406b483 100644
--- a/arch/openrisc/lib/Makefile
+++ b/arch/openrisc/lib/Makefile
@@ -1,6 +1,6 @@
 # SPDX-License-Identifier: GPL-2.0-only
 #
-# Makefile for or32 specific library files..
+# Makefile for or1k specific library files..
 #
 
 obj-y	:= delay.o string.o memset.o memcpy.o
diff --git a/arch/openrisc/mm/fault.c b/arch/openrisc/mm/fault.c
index ca97d9baab51..c730d1a51686 100644
--- a/arch/openrisc/mm/fault.c
+++ b/arch/openrisc/mm/fault.c
@@ -28,7 +28,7 @@ unsigned long pte_misses;	/* updated by do_page_fault() */
 unsigned long pte_errors;	/* updated by do_page_fault() */
 
 /* __PHX__ :: - check the vmalloc_fault in do_page_fault()
- *            - also look into include/asm-or32/mmu_context.h
+ *            - also look into include/asm/mmu_context.h
  */
 volatile pgd_t *current_pgd[NR_CPUS];
 
diff --git a/arch/parisc/Kbuild b/arch/parisc/Kbuild
index a4e40e534e6a..3c068b700a81 100644
--- a/arch/parisc/Kbuild
+++ b/arch/parisc/Kbuild
@@ -1 +1,2 @@
 # SPDX-License-Identifier: GPL-2.0-only
+obj-y	+= mm/ kernel/ math-emu/
diff --git a/arch/parisc/Kconfig b/arch/parisc/Kconfig
index 95d4bbf4e455..4742b6f169b7 100644
--- a/arch/parisc/Kconfig
+++ b/arch/parisc/Kconfig
@@ -10,7 +10,6 @@ config PARISC
 	select ARCH_HAS_ELF_RANDOMIZE
 	select ARCH_HAS_STRICT_KERNEL_RWX
 	select ARCH_HAS_UBSAN_SANITIZE_ALL
-	select ARCH_HAS_STRNLEN_USER
 	select ARCH_NO_SG_CHAIN
 	select ARCH_SUPPORTS_HUGETLBFS if PA20
 	select ARCH_SUPPORTS_MEMORY_FAILURE
@@ -65,7 +64,7 @@ config PARISC
 	select HAVE_KPROBES_ON_FTRACE
 	select HAVE_DYNAMIC_FTRACE_WITH_REGS
 	select HAVE_SOFTIRQ_ON_OWN_STACK if IRQSTACKS
-	select SET_FS
+	select TRACE_IRQFLAGS_SUPPORT
 
 	help
 	  The PA-RISC microprocessor is designed by Hewlett-Packard and used
diff --git a/arch/parisc/Kconfig.debug b/arch/parisc/Kconfig.debug
index 1478ded0e247..f66554cd5c45 100644
--- a/arch/parisc/Kconfig.debug
+++ b/arch/parisc/Kconfig.debug
@@ -1,4 +1 @@
 # SPDX-License-Identifier: GPL-2.0
-
-config TRACE_IRQFLAGS_SUPPORT
-	def_bool y
diff --git a/arch/parisc/Makefile b/arch/parisc/Makefile
index aed8ea29268b..fcde3ffa0221 100644
--- a/arch/parisc/Makefile
+++ b/arch/parisc/Makefile
@@ -25,18 +25,18 @@ CHECKFLAGS	+= -D__hppa__=1
 ifdef CONFIG_64BIT
 UTS_MACHINE	:= parisc64
 CHECKFLAGS	+= -D__LP64__=1
-CC_ARCHES	= hppa64
 LD_BFD		:= elf64-hppa-linux
 else # 32-bit
-CC_ARCHES	= hppa hppa2.0 hppa1.1
 LD_BFD		:= elf32-hppa-linux
 endif
 
 # select defconfig based on actual architecture
-ifeq ($(shell uname -m),parisc64)
+ifeq ($(ARCH),parisc64)
 	KBUILD_DEFCONFIG := generic-64bit_defconfig
+	CC_ARCHES := hppa64
 else
 	KBUILD_DEFCONFIG := generic-32bit_defconfig
+	CC_ARCHES := hppa hppa2.0 hppa1.1
 endif
 
 export LD_BFD
@@ -111,9 +111,6 @@ KBUILD_CFLAGS	+= $(cflags-y)
 LIBGCC		:= $(shell $(CC) -print-libgcc-file-name)
 export LIBGCC
 
-kernel-y			:= mm/ kernel/ math-emu/
-
-core-y	+= $(addprefix arch/parisc/, $(kernel-y))
 libs-y	+= arch/parisc/lib/ $(LIBGCC)
 
 boot	:= arch/parisc/boot
diff --git a/arch/parisc/boot/Makefile b/arch/parisc/boot/Makefile
index 61f44142cfe1..b873ee4720ca 100644
--- a/arch/parisc/boot/Makefile
+++ b/arch/parisc/boot/Makefile
@@ -15,7 +15,3 @@ $(obj)/bzImage: $(obj)/compressed/vmlinux FORCE
 
 $(obj)/compressed/vmlinux: FORCE
 	$(Q)$(MAKE) $(build)=$(obj)/compressed $@
-
-install: $(CONFIGURE) $(obj)/bzImage
-	sh -x  $(srctree)/$(obj)/install.sh $(KERNELRELEASE) $(obj)/bzImage \
-	      System.map "$(INSTALL_PATH)"
diff --git a/arch/parisc/boot/compressed/Makefile b/arch/parisc/boot/compressed/Makefile
index dff453687530..9fe54878167d 100644
--- a/arch/parisc/boot/compressed/Makefile
+++ b/arch/parisc/boot/compressed/Makefile
@@ -26,7 +26,7 @@ endif
 OBJECTS += $(obj)/head.o $(obj)/real2.o $(obj)/firmware.o $(obj)/misc.o $(obj)/piggy.o
 
 LDFLAGS_vmlinux := -X -e startup --as-needed -T
-$(obj)/vmlinux: $(obj)/vmlinux.lds $(OBJECTS) $(LIBGCC)
+$(obj)/vmlinux: $(obj)/vmlinux.lds $(OBJECTS) $(LIBGCC) FORCE
 	$(call if_changed,ld)
 
 sed-sizes := -e 's/^\([0-9a-fA-F]*\) . \(__bss_start\|_end\|parisc_kernel_start\)$$/\#define SZ\2 0x\1/p'
@@ -34,7 +34,7 @@ sed-sizes := -e 's/^\([0-9a-fA-F]*\) . \(__bss_start\|_end\|parisc_kernel_start\
 quiet_cmd_sizes = GEN $@
       cmd_sizes = $(NM) $< | sed -n $(sed-sizes) > $@
 
-$(obj)/sizes.h: vmlinux
+$(obj)/sizes.h: vmlinux FORCE
 	$(call if_changed,sizes)
 
 AFLAGS_head.o += -I$(objtree)/$(obj) -DBOOTLOADER
@@ -70,19 +70,19 @@ suffix-$(CONFIG_KERNEL_LZMA)  := lzma
 suffix-$(CONFIG_KERNEL_LZO)  := lzo
 suffix-$(CONFIG_KERNEL_XZ)  := xz
 
-$(obj)/vmlinux.bin.gz: $(vmlinux.bin.all-y)
+$(obj)/vmlinux.bin.gz: $(vmlinux.bin.all-y) FORCE
 	$(call if_changed,gzip)
-$(obj)/vmlinux.bin.bz2: $(vmlinux.bin.all-y)
+$(obj)/vmlinux.bin.bz2: $(vmlinux.bin.all-y) FORCE
 	$(call if_changed,bzip2)
-$(obj)/vmlinux.bin.lz4: $(vmlinux.bin.all-y)
+$(obj)/vmlinux.bin.lz4: $(vmlinux.bin.all-y) FORCE
 	$(call if_changed,lz4)
-$(obj)/vmlinux.bin.lzma: $(vmlinux.bin.all-y)
+$(obj)/vmlinux.bin.lzma: $(vmlinux.bin.all-y) FORCE
 	$(call if_changed,lzma)
-$(obj)/vmlinux.bin.lzo: $(vmlinux.bin.all-y)
+$(obj)/vmlinux.bin.lzo: $(vmlinux.bin.all-y) FORCE
 	$(call if_changed,lzo)
-$(obj)/vmlinux.bin.xz: $(vmlinux.bin.all-y)
+$(obj)/vmlinux.bin.xz: $(vmlinux.bin.all-y) FORCE
 	$(call if_changed,xzkern)
 
 LDFLAGS_piggy.o := -r --format binary --oformat $(LD_BFD) -T
-$(obj)/piggy.o: $(obj)/vmlinux.scr $(obj)/vmlinux.bin.$(suffix-y)
+$(obj)/piggy.o: $(obj)/vmlinux.scr $(obj)/vmlinux.bin.$(suffix-y) FORCE
 	$(call if_changed,ld)
diff --git a/arch/parisc/boot/compressed/misc.c b/arch/parisc/boot/compressed/misc.c
index 2d395998f524..7ee49f5881d1 100644
--- a/arch/parisc/boot/compressed/misc.c
+++ b/arch/parisc/boot/compressed/misc.c
@@ -26,7 +26,7 @@
 extern char input_data[];
 extern int input_len;
 /* output_len is inserted by the linker possibly at an unaligned address */
-extern __le32 output_len __aligned(1);
+extern char output_len;
 extern char _text, _end;
 extern char _bss, _ebss;
 extern char _startcode_end;
diff --git a/arch/parisc/boot/install.sh b/arch/parisc/boot/install.sh
deleted file mode 100644
index 8f7c365fad83..000000000000
--- a/arch/parisc/boot/install.sh
+++ /dev/null
@@ -1,65 +0,0 @@
-#!/bin/sh
-#
-# arch/parisc/install.sh, derived from arch/i386/boot/install.sh
-#
-# This file is subject to the terms and conditions of the GNU General Public
-# License.  See the file "COPYING" in the main directory of this archive
-# for more details.
-#
-# Copyright (C) 1995 by Linus Torvalds
-#
-# Adapted from code in arch/i386/boot/Makefile by H. Peter Anvin
-#
-# "make install" script for i386 architecture
-#
-# Arguments:
-#   $1 - kernel version
-#   $2 - kernel image file
-#   $3 - kernel map file
-#   $4 - default install path (blank if root directory)
-#
-
-verify () {
-	if [ ! -f "$1" ]; then
-		echo ""                                                   1>&2
-		echo " *** Missing file: $1"                              1>&2
-		echo ' *** You need to run "make" before "make install".' 1>&2
-		echo ""                                                   1>&2
-		exit 1
-	fi
-}
-
-# Make sure the files actually exist
-
-verify "$2"
-verify "$3"
-
-# User may have a custom install script
-
-if [ -n "${INSTALLKERNEL}" ]; then
-  if [ -x ~/bin/${INSTALLKERNEL} ]; then exec ~/bin/${INSTALLKERNEL} "$@"; fi
-  if [ -x /sbin/${INSTALLKERNEL} ]; then exec /sbin/${INSTALLKERNEL} "$@"; fi
-fi
-
-# Default install
-
-if [ "$(basename $2)" = "zImage" ]; then
-# Compressed install
-  echo "Installing compressed kernel"
-  base=vmlinuz
-else
-# Normal install
-  echo "Installing normal kernel"
-  base=vmlinux
-fi
-
-if [ -f $4/$base-$1 ]; then
-  mv $4/$base-$1 $4/$base-$1.old
-fi
-cat $2 > $4/$base-$1
-
-# Install system map file
-if [ -f $4/System.map-$1 ]; then
-  mv $4/System.map-$1 $4/System.map-$1.old
-fi
-cp $3 $4/System.map-$1
diff --git a/arch/parisc/configs/generic-32bit_defconfig b/arch/parisc/configs/generic-32bit_defconfig
index 7611d48c599e..dd14e3131325 100644
--- a/arch/parisc/configs/generic-32bit_defconfig
+++ b/arch/parisc/configs/generic-32bit_defconfig
@@ -111,7 +111,6 @@ CONFIG_PPP_BSDCOMP=m
 CONFIG_PPP_DEFLATE=m
 CONFIG_PPPOE=m
 # CONFIG_WLAN is not set
-CONFIG_INPUT_POLLDEV=y
 CONFIG_KEYBOARD_HIL_OLD=m
 CONFIG_KEYBOARD_HIL=m
 CONFIG_MOUSE_SERIAL=y
diff --git a/arch/parisc/include/asm/cacheflush.h b/arch/parisc/include/asm/cacheflush.h
index 99663fc1f997..eef0096db5f8 100644
--- a/arch/parisc/include/asm/cacheflush.h
+++ b/arch/parisc/include/asm/cacheflush.h
@@ -36,16 +36,12 @@ void flush_cache_all_local(void);
 void flush_cache_all(void);
 void flush_cache_mm(struct mm_struct *mm);
 
-#define ARCH_HAS_FLUSH_KERNEL_DCACHE_PAGE
 void flush_kernel_dcache_page_addr(void *addr);
-static inline void flush_kernel_dcache_page(struct page *page)
-{
-	flush_kernel_dcache_page_addr(page_address(page));
-}
 
 #define flush_kernel_dcache_range(start,size) \
 	flush_kernel_dcache_range_asm((start), (start)+(size));
 
+#define ARCH_IMPLEMENTS_FLUSH_KERNEL_VMAP_RANGE 1
 void flush_kernel_vmap_range(void *vaddr, int size);
 void invalidate_kernel_vmap_range(void *vaddr, int size);
 
@@ -59,7 +55,7 @@ extern void flush_dcache_page(struct page *page);
 #define flush_dcache_mmap_unlock(mapping)	xa_unlock_irq(&mapping->i_pages)
 
 #define flush_icache_page(vma,page)	do { 		\
-	flush_kernel_dcache_page(page);			\
+	flush_kernel_dcache_page_addr(page_address(page)); \
 	flush_kernel_icache_page(page_address(page)); 	\
 } while (0)
 
diff --git a/arch/parisc/include/asm/compat.h b/arch/parisc/include/asm/compat.h
index b5d90e82b65d..c04f5a637c39 100644
--- a/arch/parisc/include/asm/compat.h
+++ b/arch/parisc/include/asm/compat.h
@@ -163,12 +163,6 @@ struct compat_shmid64_ds {
 #define COMPAT_ELF_NGREG 80
 typedef compat_ulong_t compat_elf_gregset_t[COMPAT_ELF_NGREG];
 
-static __inline__ void __user *arch_compat_alloc_user_space(long len)
-{
-	struct pt_regs *regs = &current->thread.regs;
-	return (void __user *)regs->gr[30];
-}
-
 static inline int __is_compat_task(struct task_struct *t)
 {
 	return test_tsk_thread_flag(t, TIF_32BIT);
diff --git a/arch/parisc/include/asm/parisc-device.h b/arch/parisc/include/asm/parisc-device.h
index d02d144c6012..4de3b391d812 100644
--- a/arch/parisc/include/asm/parisc-device.h
+++ b/arch/parisc/include/asm/parisc-device.h
@@ -34,8 +34,8 @@ struct parisc_driver {
 	struct parisc_driver *next;
 	char *name; 
 	const struct parisc_device_id *id_table;
-	int (*probe) (struct parisc_device *dev); /* New device discovered */
-	int (*remove) (struct parisc_device *dev);
+	int (*probe)(struct parisc_device *dev); /* New device discovered */
+	void (*remove)(struct parisc_device *dev);
 	struct device_driver drv;
 };
 
diff --git a/arch/parisc/include/asm/pgalloc.h b/arch/parisc/include/asm/pgalloc.h
index 6a7e98e71f1d..54b63374579b 100644
--- a/arch/parisc/include/asm/pgalloc.h
+++ b/arch/parisc/include/asm/pgalloc.h
@@ -48,15 +48,15 @@ static inline pmd_t *pmd_alloc_one(struct mm_struct *mm, unsigned long address)
 {
 	pmd_t *pmd;
 
-	pmd = (pmd_t *)__get_free_pages(GFP_PGTABLE_KERNEL, PMD_ORDER);
+	pmd = (pmd_t *)__get_free_pages(GFP_PGTABLE_KERNEL, PMD_TABLE_ORDER);
 	if (likely(pmd))
-		memset ((void *)pmd, 0, PAGE_SIZE << PMD_ORDER);
+		memset ((void *)pmd, 0, PAGE_SIZE << PMD_TABLE_ORDER);
 	return pmd;
 }
 
 static inline void pmd_free(struct mm_struct *mm, pmd_t *pmd)
 {
-	free_pages((unsigned long)pmd, PMD_ORDER);
+	free_pages((unsigned long)pmd, PMD_TABLE_ORDER);
 }
 #endif
 
diff --git a/arch/parisc/include/asm/pgtable.h b/arch/parisc/include/asm/pgtable.h
index 43937af127b1..7badd872f05a 100644
--- a/arch/parisc/include/asm/pgtable.h
+++ b/arch/parisc/include/asm/pgtable.h
@@ -112,7 +112,7 @@ static inline void purge_tlb_entries(struct mm_struct *mm, unsigned long addr)
 #define KERNEL_INITIAL_SIZE	(1 << KERNEL_INITIAL_ORDER)
 
 #if CONFIG_PGTABLE_LEVELS == 3
-#define PMD_ORDER	1
+#define PMD_TABLE_ORDER	1
 #define PGD_ORDER	0
 #else
 #define PGD_ORDER	1
@@ -131,7 +131,7 @@ static inline void purge_tlb_entries(struct mm_struct *mm, unsigned long addr)
 #define PMD_SHIFT       (PLD_SHIFT + BITS_PER_PTE)
 #define PMD_SIZE	(1UL << PMD_SHIFT)
 #define PMD_MASK	(~(PMD_SIZE-1))
-#define BITS_PER_PMD	(PAGE_SHIFT + PMD_ORDER - BITS_PER_PMD_ENTRY)
+#define BITS_PER_PMD	(PAGE_SHIFT + PMD_TABLE_ORDER - BITS_PER_PMD_ENTRY)
 #define PTRS_PER_PMD    (1UL << BITS_PER_PMD)
 #else
 #define BITS_PER_PMD	0
diff --git a/arch/parisc/include/asm/processor.h b/arch/parisc/include/asm/processor.h
index b5fbcd2c1780..eeb7da064289 100644
--- a/arch/parisc/include/asm/processor.h
+++ b/arch/parisc/include/asm/processor.h
@@ -101,10 +101,6 @@ DECLARE_PER_CPU(struct cpuinfo_parisc, cpu_data);
 
 #define CPU_HVERSION ((boot_cpu_data.hversion >> 4) & 0x0FFF)
 
-typedef struct {
-	int seg;  
-} mm_segment_t;
-
 #define ARCH_MIN_TASKALIGN	8
 
 struct thread_struct {
diff --git a/arch/parisc/include/asm/rt_sigframe.h b/arch/parisc/include/asm/rt_sigframe.h
index 2b3010ade00e..4b9e3d707571 100644
--- a/arch/parisc/include/asm/rt_sigframe.h
+++ b/arch/parisc/include/asm/rt_sigframe.h
@@ -2,7 +2,7 @@
 #ifndef _ASM_PARISC_RT_SIGFRAME_H
 #define _ASM_PARISC_RT_SIGFRAME_H
 
-#define SIGRETURN_TRAMP 4
+#define SIGRETURN_TRAMP 3
 #define SIGRESTARTBLOCK_TRAMP 5 
 #define TRAMP_SIZE (SIGRETURN_TRAMP + SIGRESTARTBLOCK_TRAMP)
 
diff --git a/arch/parisc/include/asm/thread_info.h b/arch/parisc/include/asm/thread_info.h
index 0bd38a972cea..00ad50fef769 100644
--- a/arch/parisc/include/asm/thread_info.h
+++ b/arch/parisc/include/asm/thread_info.h
@@ -11,7 +11,6 @@
 struct thread_info {
 	struct task_struct *task;	/* main task structure */
 	unsigned long flags;		/* thread_info flags (see TIF_*) */
-	mm_segment_t addr_limit;	/* user-level address space limit */
 	__u32 cpu;			/* current CPU */
 	int preempt_count;		/* 0=premptable, <0=BUG; will also serve as bh-counter */
 };
@@ -21,7 +20,6 @@ struct thread_info {
 	.task		= &tsk,			\
 	.flags		= 0,			\
 	.cpu		= 0,			\
-	.addr_limit	= KERNEL_DS,		\
 	.preempt_count	= INIT_PREEMPT_COUNT,	\
 }
 
diff --git a/arch/parisc/include/asm/uaccess.h b/arch/parisc/include/asm/uaccess.h
index ed2cd4fb479b..192ad9e11b25 100644
--- a/arch/parisc/include/asm/uaccess.h
+++ b/arch/parisc/include/asm/uaccess.h
@@ -11,14 +11,6 @@
 #include <linux/bug.h>
 #include <linux/string.h>
 
-#define KERNEL_DS	((mm_segment_t){0})
-#define USER_DS 	((mm_segment_t){1})
-
-#define uaccess_kernel() (get_fs().seg == KERNEL_DS.seg)
-
-#define get_fs()	(current_thread_info()->addr_limit)
-#define set_fs(x)	(current_thread_info()->addr_limit = (x))
-
 /*
  * Note that since kernel addresses are in a separate address space on
  * parisc, we don't need to do anything for access_ok().
@@ -33,11 +25,11 @@
 #define get_user __get_user
 
 #if !defined(CONFIG_64BIT)
-#define LDD_USER(val, ptr)	__get_user_asm64(val, ptr)
-#define STD_USER(x, ptr)	__put_user_asm64(x, ptr)
+#define LDD_USER(sr, val, ptr)	__get_user_asm64(sr, val, ptr)
+#define STD_USER(sr, x, ptr)	__put_user_asm64(sr, x, ptr)
 #else
-#define LDD_USER(val, ptr)	__get_user_asm(val, "ldd", ptr)
-#define STD_USER(x, ptr)	__put_user_asm("std", x, ptr)
+#define LDD_USER(sr, val, ptr)	__get_user_asm(sr, val, "ldd", ptr)
+#define STD_USER(sr, x, ptr)	__put_user_asm(sr, "std", x, ptr)
 #endif
 
 /*
@@ -67,28 +59,15 @@ struct exception_table_entry {
 #define ASM_EXCEPTIONTABLE_ENTRY_EFAULT( fault_addr, except_addr )\
 	ASM_EXCEPTIONTABLE_ENTRY( fault_addr, except_addr + 1)
 
-/*
- * load_sr2() preloads the space register %%sr2 - based on the value of
- * get_fs() - with either a value of 0 to access kernel space (KERNEL_DS which
- * is 0), or with the current value of %%sr3 to access user space (USER_DS)
- * memory. The following __get_user_asm() and __put_user_asm() functions have
- * %%sr2 hard-coded to access the requested memory.
- */
-#define load_sr2() \
-	__asm__(" or,=  %0,%%r0,%%r0\n\t"	\
-		" mfsp %%sr3,%0\n\t"		\
-		" mtsp %0,%%sr2\n\t"		\
-		: : "r"(get_fs()) : )
-
-#define __get_user_internal(val, ptr)			\
+#define __get_user_internal(sr, val, ptr)		\
 ({							\
 	register long __gu_err __asm__ ("r8") = 0;	\
 							\
 	switch (sizeof(*(ptr))) {			\
-	case 1: __get_user_asm(val, "ldb", ptr); break;	\
-	case 2: __get_user_asm(val, "ldh", ptr); break; \
-	case 4: __get_user_asm(val, "ldw", ptr); break; \
-	case 8: LDD_USER(val, ptr); break;		\
+	case 1: __get_user_asm(sr, val, "ldb", ptr); break; \
+	case 2: __get_user_asm(sr, val, "ldh", ptr); break; \
+	case 4: __get_user_asm(sr, val, "ldw", ptr); break; \
+	case 8: LDD_USER(sr, val, ptr); break;		\
 	default: BUILD_BUG();				\
 	}						\
 							\
@@ -97,15 +76,14 @@ struct exception_table_entry {
 
 #define __get_user(val, ptr)				\
 ({							\
-	load_sr2();					\
-	__get_user_internal(val, ptr);			\
+	__get_user_internal("%%sr3,", val, ptr);	\
 })
 
-#define __get_user_asm(val, ldx, ptr)			\
+#define __get_user_asm(sr, val, ldx, ptr)		\
 {							\
 	register long __gu_val;				\
 							\
-	__asm__("1: " ldx " 0(%%sr2,%2),%0\n"		\
+	__asm__("1: " ldx " 0(" sr "%2),%0\n"		\
 		"9:\n"					\
 		ASM_EXCEPTIONTABLE_ENTRY_EFAULT(1b, 9b)	\
 		: "=r"(__gu_val), "=r"(__gu_err)        \
@@ -114,9 +92,22 @@ struct exception_table_entry {
 	(val) = (__force __typeof__(*(ptr))) __gu_val;	\
 }
 
+#define HAVE_GET_KERNEL_NOFAULT
+#define __get_kernel_nofault(dst, src, type, err_label)	\
+{							\
+	type __z;					\
+	long __err;					\
+	__err = __get_user_internal("%%sr0,", __z, (type *)(src)); \
+	if (unlikely(__err))				\
+		goto err_label;				\
+	else						\
+		*(type *)(dst) = __z;			\
+}
+
+
 #if !defined(CONFIG_64BIT)
 
-#define __get_user_asm64(val, ptr)			\
+#define __get_user_asm64(sr, val, ptr)			\
 {							\
 	union {						\
 		unsigned long long	l;		\
@@ -124,8 +115,8 @@ struct exception_table_entry {
 	} __gu_tmp;					\
 							\
 	__asm__("   copy %%r0,%R0\n"			\
-		"1: ldw 0(%%sr2,%2),%0\n"		\
-		"2: ldw 4(%%sr2,%2),%R0\n"		\
+		"1: ldw 0(" sr "%2),%0\n"		\
+		"2: ldw 4(" sr "%2),%R0\n"		\
 		"9:\n"					\
 		ASM_EXCEPTIONTABLE_ENTRY_EFAULT(1b, 9b)	\
 		ASM_EXCEPTIONTABLE_ENTRY_EFAULT(2b, 9b)	\
@@ -138,16 +129,16 @@ struct exception_table_entry {
 #endif /* !defined(CONFIG_64BIT) */
 
 
-#define __put_user_internal(x, ptr)				\
+#define __put_user_internal(sr, x, ptr)				\
 ({								\
 	register long __pu_err __asm__ ("r8") = 0;      	\
         __typeof__(*(ptr)) __x = (__typeof__(*(ptr)))(x);	\
 								\
 	switch (sizeof(*(ptr))) {				\
-	case 1: __put_user_asm("stb", __x, ptr); break;		\
-	case 2: __put_user_asm("sth", __x, ptr); break;		\
-	case 4: __put_user_asm("stw", __x, ptr); break;		\
-	case 8: STD_USER(__x, ptr); break;			\
+	case 1: __put_user_asm(sr, "stb", __x, ptr); break;	\
+	case 2: __put_user_asm(sr, "sth", __x, ptr); break;	\
+	case 4: __put_user_asm(sr, "stw", __x, ptr); break;	\
+	case 8: STD_USER(sr, __x, ptr); break;			\
 	default: BUILD_BUG();					\
 	}							\
 								\
@@ -156,10 +147,20 @@ struct exception_table_entry {
 
 #define __put_user(x, ptr)					\
 ({								\
-	load_sr2();						\
-	__put_user_internal(x, ptr);				\
+	__put_user_internal("%%sr3,", x, ptr);			\
 })
 
+#define __put_kernel_nofault(dst, src, type, err_label)		\
+{								\
+	type __z = *(type *)(src);				\
+	long __err;						\
+	__err = __put_user_internal("%%sr0,", __z, (type *)(dst)); \
+	if (unlikely(__err))					\
+		goto err_label;					\
+}
+
+
+
 
 /*
  * The "__put_user/kernel_asm()" macros tell gcc they read from memory
@@ -170,26 +171,26 @@ struct exception_table_entry {
  * r8 is already listed as err.
  */
 
-#define __put_user_asm(stx, x, ptr)                         \
-	__asm__ __volatile__ (                              \
-		"1: " stx " %2,0(%%sr2,%1)\n"		    \
-		"9:\n"					    \
-		ASM_EXCEPTIONTABLE_ENTRY_EFAULT(1b, 9b)	    \
-		: "=r"(__pu_err)                            \
+#define __put_user_asm(sr, stx, x, ptr)				\
+	__asm__ __volatile__ (					\
+		"1: " stx " %2,0(" sr "%1)\n"			\
+		"9:\n"						\
+		ASM_EXCEPTIONTABLE_ENTRY_EFAULT(1b, 9b)		\
+		: "=r"(__pu_err)				\
 		: "r"(ptr), "r"(x), "0"(__pu_err))
 
 
 #if !defined(CONFIG_64BIT)
 
-#define __put_user_asm64(__val, ptr) do {	    	    \
-	__asm__ __volatile__ (				    \
-		"1: stw %2,0(%%sr2,%1)\n"		    \
-		"2: stw %R2,4(%%sr2,%1)\n"		    \
-		"9:\n"					    \
-		ASM_EXCEPTIONTABLE_ENTRY_EFAULT(1b, 9b)	    \
-		ASM_EXCEPTIONTABLE_ENTRY_EFAULT(2b, 9b)	    \
-		: "=r"(__pu_err)                            \
-		: "r"(ptr), "r"(__val), "0"(__pu_err));	    \
+#define __put_user_asm64(sr, __val, ptr) do {			\
+	__asm__ __volatile__ (					\
+		"1: stw %2,0(" sr "%1)\n"			\
+		"2: stw %R2,4(" sr "%1)\n"			\
+		"9:\n"						\
+		ASM_EXCEPTIONTABLE_ENTRY_EFAULT(1b, 9b)		\
+		ASM_EXCEPTIONTABLE_ENTRY_EFAULT(2b, 9b)		\
+		: "=r"(__pu_err)				\
+		: "r"(ptr), "r"(__val), "0"(__pu_err));		\
 } while (0)
 
 #endif /* !defined(CONFIG_64BIT) */
@@ -200,14 +201,12 @@ struct exception_table_entry {
  */
 
 extern long strncpy_from_user(char *, const char __user *, long);
-extern unsigned lclear_user(void __user *, unsigned long);
-extern long lstrnlen_user(const char __user *, long);
+extern __must_check unsigned lclear_user(void __user *, unsigned long);
+extern __must_check long strnlen_user(const char __user *src, long n);
 /*
  * Complex access routines -- macros
  */
-#define user_addr_max() (~0UL)
 
-#define strnlen_user lstrnlen_user
 #define clear_user lclear_user
 #define __clear_user lclear_user
 
@@ -215,8 +214,6 @@ unsigned long __must_check raw_copy_to_user(void __user *dst, const void *src,
 					    unsigned long len);
 unsigned long __must_check raw_copy_from_user(void *dst, const void __user *src,
 					    unsigned long len);
-unsigned long __must_check raw_copy_in_user(void __user *dst, const void __user *src,
-					    unsigned long len);
 #define INLINE_COPY_TO_USER
 #define INLINE_COPY_FROM_USER
 
diff --git a/arch/parisc/include/uapi/asm/swab.h b/arch/parisc/include/uapi/asm/swab.h
deleted file mode 100644
index 35fb2d1bfbbd..000000000000
--- a/arch/parisc/include/uapi/asm/swab.h
+++ /dev/null
@@ -1,68 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */
-#ifndef _PARISC_SWAB_H
-#define _PARISC_SWAB_H
-
-#include <asm/bitsperlong.h>
-#include <linux/types.h>
-#include <linux/compiler.h>
-
-#define __SWAB_64_THRU_32__
-
-static inline __attribute_const__ __u16 __arch_swab16(__u16 x)
-{
-	__asm__("dep %0, 15, 8, %0\n\t"		/* deposit 00ab -> 0bab */
-		"shd %%r0, %0, 8, %0"		/* shift 000000ab -> 00ba */
-		: "=r" (x)
-		: "0" (x));
-	return x;
-}
-#define __arch_swab16 __arch_swab16
-
-static inline __attribute_const__ __u32 __arch_swab24(__u32 x)
-{
-	__asm__("shd %0, %0, 8, %0\n\t"		/* shift xabcxabc -> cxab */
-		"dep %0, 15, 8, %0\n\t"		/* deposit cxab -> cbab */
-		"shd %%r0, %0, 8, %0"		/* shift 0000cbab -> 0cba */
-		: "=r" (x)
-		: "0" (x));
-	return x;
-}
-
-static inline __attribute_const__ __u32 __arch_swab32(__u32 x)
-{
-	unsigned int temp;
-	__asm__("shd %0, %0, 16, %1\n\t"	/* shift abcdabcd -> cdab */
-		"dep %1, 15, 8, %1\n\t"		/* deposit cdab -> cbab */
-		"shd %0, %1, 8, %0"		/* shift abcdcbab -> dcba */
-		: "=r" (x), "=&r" (temp)
-		: "0" (x));
-	return x;
-}
-#define __arch_swab32 __arch_swab32
-
-#if __BITS_PER_LONG > 32
-/*
-** From "PA-RISC 2.0 Architecture", HP Professional Books.
-** See Appendix I page 8 , "Endian Byte Swapping".
-**
-** Pretty cool algorithm: (* == zero'd bits)
-**      PERMH   01234567 -> 67452301 into %0
-**      HSHL    67452301 -> 7*5*3*1* into %1
-**      HSHR    67452301 -> *6*4*2*0 into %0
-**      OR      %0 | %1  -> 76543210 into %0 (all done!)
-*/
-static inline __attribute_const__ __u64 __arch_swab64(__u64 x)
-{
-	__u64 temp;
-	__asm__("permh,3210 %0, %0\n\t"
-		"hshl %0, 8, %1\n\t"
-		"hshr,u %0, 8, %0\n\t"
-		"or %1, %0, %0"
-		: "=r" (x), "=&r" (temp)
-		: "0" (x));
-	return x;
-}
-#define __arch_swab64 __arch_swab64
-#endif /* __BITS_PER_LONG > 32 */
-
-#endif /* _PARISC_SWAB_H */
diff --git a/arch/parisc/kernel/asm-offsets.c b/arch/parisc/kernel/asm-offsets.c
index 33113ba24054..22924a3f1728 100644
--- a/arch/parisc/kernel/asm-offsets.c
+++ b/arch/parisc/kernel/asm-offsets.c
@@ -230,7 +230,6 @@ int main(void)
 	DEFINE(TI_TASK, offsetof(struct thread_info, task));
 	DEFINE(TI_FLAGS, offsetof(struct thread_info, flags));
 	DEFINE(TI_CPU, offsetof(struct thread_info, cpu));
-	DEFINE(TI_SEGMENT, offsetof(struct thread_info, addr_limit));
 	DEFINE(TI_PRE_COUNT, offsetof(struct thread_info, preempt_count));
 	DEFINE(THREAD_SZ, sizeof(struct thread_info));
 	/* THREAD_SZ_ALGN includes space for a stack frame. */
diff --git a/arch/parisc/kernel/cache.c b/arch/parisc/kernel/cache.c
index 86a1a63563fd..39e02227e231 100644
--- a/arch/parisc/kernel/cache.c
+++ b/arch/parisc/kernel/cache.c
@@ -334,7 +334,7 @@ void flush_dcache_page(struct page *page)
 		return;
 	}
 
-	flush_kernel_dcache_page(page);
+	flush_kernel_dcache_page_addr(page_address(page));
 
 	if (!mapping)
 		return;
@@ -375,7 +375,6 @@ EXPORT_SYMBOL(flush_dcache_page);
 
 /* Defined in arch/parisc/kernel/pacache.S */
 EXPORT_SYMBOL(flush_kernel_dcache_range_asm);
-EXPORT_SYMBOL(flush_kernel_dcache_page_asm);
 EXPORT_SYMBOL(flush_data_cache_local);
 EXPORT_SYMBOL(flush_kernel_icache_range_asm);
 
diff --git a/arch/parisc/kernel/firmware.c b/arch/parisc/kernel/firmware.c
index 665b70086685..7034227dbdf3 100644
--- a/arch/parisc/kernel/firmware.c
+++ b/arch/parisc/kernel/firmware.c
@@ -51,7 +51,7 @@
  *					prumpf	991016	
  */
 
-#include <stdarg.h>
+#include <linux/stdarg.h>
 
 #include <linux/delay.h>
 #include <linux/init.h>
diff --git a/arch/parisc/kernel/parisc_ksyms.c b/arch/parisc/kernel/parisc_ksyms.c
index e8a6a751dfd8..00297e8e1c88 100644
--- a/arch/parisc/kernel/parisc_ksyms.c
+++ b/arch/parisc/kernel/parisc_ksyms.c
@@ -32,7 +32,6 @@ EXPORT_SYMBOL(__xchg64);
 
 #include <linux/uaccess.h>
 EXPORT_SYMBOL(lclear_user);
-EXPORT_SYMBOL(lstrnlen_user);
 
 #ifndef CONFIG_64BIT
 /* Needed so insmod can set dp value */
diff --git a/arch/parisc/kernel/process.c b/arch/parisc/kernel/process.c
index 184ec3c1eae4..38ec4ae81239 100644
--- a/arch/parisc/kernel/process.c
+++ b/arch/parisc/kernel/process.c
@@ -17,9 +17,6 @@
  *    Copyright (C) 2001-2014 Helge Deller <deller@gmx.de>
  *    Copyright (C) 2002 Randolph Chung <tausq with parisc-linux.org>
  */
-
-#include <stdarg.h>
-
 #include <linux/elf.h>
 #include <linux/errno.h>
 #include <linux/kernel.h>
diff --git a/arch/parisc/kernel/setup.c b/arch/parisc/kernel/setup.c
index 3fb86ee507dd..cceb09855e03 100644
--- a/arch/parisc/kernel/setup.c
+++ b/arch/parisc/kernel/setup.c
@@ -150,8 +150,6 @@ void __init setup_arch(char **cmdline_p)
 #ifdef CONFIG_PA11
 	dma_ops_init();
 #endif
-
-	clear_sched_clock_stable();
 }
 
 /*
diff --git a/arch/parisc/kernel/signal.c b/arch/parisc/kernel/signal.c
index fb1e94a3982b..bbfe23c40c01 100644
--- a/arch/parisc/kernel/signal.c
+++ b/arch/parisc/kernel/signal.c
@@ -237,12 +237,22 @@ setup_rt_frame(struct ksignal *ksig, sigset_t *set, struct pt_regs *regs,
 #endif
 	
 	usp = (regs->gr[30] & ~(0x01UL));
-	/*FIXME: frame_size parameter is unused, remove it. */
-	frame = get_sigframe(&ksig->ka, usp, sizeof(*frame));
+	sigframe_size = PARISC_RT_SIGFRAME_SIZE;
+#ifdef CONFIG_64BIT
+	if (is_compat_task()) {
+		/* The gcc alloca implementation leaves garbage in the upper 32 bits of sp */
+		usp = (compat_uint_t)usp;
+		sigframe_size = PARISC_RT_SIGFRAME_SIZE32;
+	}
+#endif
+	frame = get_sigframe(&ksig->ka, usp, sigframe_size);
 
 	DBG(1,"SETUP_RT_FRAME: START\n");
 	DBG(1,"setup_rt_frame: frame %p info %p\n", frame, ksig->info);
 
+	start = (unsigned long) frame;
+	if (start >= user_addr_max() - sigframe_size)
+		return -EFAULT;
 	
 #ifdef CONFIG_64BIT
 
@@ -278,32 +288,21 @@ setup_rt_frame(struct ksignal *ksig, sigset_t *set, struct pt_regs *regs,
 	   already in userspace. The first words of tramp are used to
 	   save the previous sigrestartblock trampoline that might be
 	   on the stack. We start the sigreturn trampoline at 
-	   SIGRESTARTBLOCK_TRAMP+X. */
+	   SIGRESTARTBLOCK_TRAMP. */
 	err |= __put_user(in_syscall ? INSN_LDI_R25_1 : INSN_LDI_R25_0,
 			&frame->tramp[SIGRESTARTBLOCK_TRAMP+0]);
-	err |= __put_user(INSN_LDI_R20, 
-			&frame->tramp[SIGRESTARTBLOCK_TRAMP+1]);
 	err |= __put_user(INSN_BLE_SR2_R0, 
+			&frame->tramp[SIGRESTARTBLOCK_TRAMP+1]);
+	err |= __put_user(INSN_LDI_R20,
 			&frame->tramp[SIGRESTARTBLOCK_TRAMP+2]);
-	err |= __put_user(INSN_NOP, &frame->tramp[SIGRESTARTBLOCK_TRAMP+3]);
-
-#if DEBUG_SIG
-	/* Assert that we're flushing in the correct space... */
-	{
-		unsigned long sid;
-		asm ("mfsp %%sr3,%0" : "=r" (sid));
-		DBG(1,"setup_rt_frame: Flushing 64 bytes at space %#x offset %p\n",
-		       sid, frame->tramp);
-	}
-#endif
 
-	start = (unsigned long) &frame->tramp[0];
-	end = (unsigned long) &frame->tramp[TRAMP_SIZE];
+	start = (unsigned long) &frame->tramp[SIGRESTARTBLOCK_TRAMP+0];
+	end = (unsigned long) &frame->tramp[SIGRESTARTBLOCK_TRAMP+3];
 	flush_user_dcache_range_asm(start, end);
 	flush_user_icache_range_asm(start, end);
 
 	/* TRAMP Words 0-4, Length 5 = SIGRESTARTBLOCK_TRAMP
-	 * TRAMP Words 5-9, Length 4 = SIGRETURN_TRAMP
+	 * TRAMP Words 5-7, Length 3 = SIGRETURN_TRAMP
 	 * So the SIGRETURN_TRAMP is at the end of SIGRESTARTBLOCK_TRAMP
 	 */
 	rp = (unsigned long) &frame->tramp[SIGRESTARTBLOCK_TRAMP];
@@ -347,11 +346,6 @@ setup_rt_frame(struct ksignal *ksig, sigset_t *set, struct pt_regs *regs,
 
 	/* The syscall return path will create IAOQ values from r31.
 	 */
-	sigframe_size = PARISC_RT_SIGFRAME_SIZE;
-#ifdef CONFIG_64BIT
-	if (is_compat_task())
-		sigframe_size = PARISC_RT_SIGFRAME_SIZE32;
-#endif
 	if (in_syscall) {
 		regs->gr[31] = haddr;
 #ifdef CONFIG_64BIT
@@ -495,7 +489,6 @@ syscall_restart(struct pt_regs *regs, struct k_sigaction *ka)
 		DBG(1,"ERESTARTNOHAND: returning -EINTR\n");
 		regs->gr[28] = -EINTR;
 		break;
-
 	case -ERESTARTSYS:
 		if (!(ka->sa.sa_flags & SA_RESTART)) {
 			DBG(1,"ERESTARTSYS: putting -EINTR\n");
@@ -523,6 +516,10 @@ insert_restart_trampoline(struct pt_regs *regs)
 		unsigned long end  = (unsigned long) &usp[5];
 		long err = 0;
 
+		/* check that we don't exceed the stack */
+		if (A(&usp[0]) >= user_addr_max() - 5 * sizeof(int))
+			return;
+
 		/* Setup a trampoline to restart the syscall
 		 * with __NR_restart_syscall
 		 *
@@ -563,10 +560,6 @@ insert_restart_trampoline(struct pt_regs *regs)
 }
 
 /*
- * Note that 'init' is a special process: it doesn't get signals it doesn't
- * want to handle. Thus you cannot kill init even with a SIGKILL even by
- * mistake.
- *
  * We need to be able to restore the syscall arguments (r21-r26) to
  * restart syscalls.  Thus, the syscall path should save them in the
  * pt_regs structure (it's okay to do so since they are caller-save
diff --git a/arch/parisc/kernel/signal32.h b/arch/parisc/kernel/signal32.h
index f166250f2d06..a5bdbb5678b7 100644
--- a/arch/parisc/kernel/signal32.h
+++ b/arch/parisc/kernel/signal32.h
@@ -36,7 +36,7 @@ struct compat_regfile {
         compat_int_t rf_sar;
 };
 
-#define COMPAT_SIGRETURN_TRAMP 4
+#define COMPAT_SIGRETURN_TRAMP 3
 #define COMPAT_SIGRESTARTBLOCK_TRAMP 5
 #define COMPAT_TRAMP_SIZE (COMPAT_SIGRETURN_TRAMP + \
 				COMPAT_SIGRESTARTBLOCK_TRAMP)
diff --git a/arch/parisc/kernel/syscalls/syscall.tbl b/arch/parisc/kernel/syscalls/syscall.tbl
index eaf0603ae781..bf751e0732b7 100644
--- a/arch/parisc/kernel/syscalls/syscall.tbl
+++ b/arch/parisc/kernel/syscalls/syscall.tbl
@@ -292,9 +292,9 @@
 258	32	clock_nanosleep		sys_clock_nanosleep_time32
 258	64	clock_nanosleep		sys_clock_nanosleep
 259	common	tgkill			sys_tgkill
-260	common	mbind			sys_mbind			compat_sys_mbind
-261	common	get_mempolicy		sys_get_mempolicy		compat_sys_get_mempolicy
-262	common	set_mempolicy		sys_set_mempolicy		compat_sys_set_mempolicy
+260	common	mbind			sys_mbind
+261	common	get_mempolicy		sys_get_mempolicy
+262	common	set_mempolicy		sys_set_mempolicy
 # 263 was vserver
 264	common	add_key			sys_add_key
 265	common	request_key		sys_request_key
@@ -331,7 +331,7 @@
 292	64	sync_file_range		sys_sync_file_range
 293	common	tee			sys_tee
 294	common	vmsplice		sys_vmsplice
-295	common	move_pages		sys_move_pages			compat_sys_move_pages
+295	common	move_pages		sys_move_pages
 296	common	getcpu			sys_getcpu
 297	common	epoll_pwait		sys_epoll_pwait			compat_sys_epoll_pwait
 298	common	statfs64		sys_statfs64			compat_sys_statfs64
@@ -444,3 +444,5 @@
 444	common	landlock_create_ruleset		sys_landlock_create_ruleset
 445	common	landlock_add_rule		sys_landlock_add_rule
 446	common	landlock_restrict_self		sys_landlock_restrict_self
+# 447 reserved for memfd_secret
+448	common	process_mrelease		sys_process_mrelease
diff --git a/arch/parisc/kernel/time.c b/arch/parisc/kernel/time.c
index 08e4d480abe1..9fb1e794831b 100644
--- a/arch/parisc/kernel/time.c
+++ b/arch/parisc/kernel/time.c
@@ -265,6 +265,9 @@ static int __init init_cr16_clocksource(void)
 			    (cpu0_loc == per_cpu(cpu_data, cpu).cpu_loc))
 				continue;
 
+			/* mark sched_clock unstable */
+			clear_sched_clock_stable();
+
 			clocksource_cr16.name = "cr16_unstable";
 			clocksource_cr16.flags = CLOCK_SOURCE_UNSTABLE;
 			clocksource_cr16.rating = 0;
@@ -272,10 +275,6 @@ static int __init init_cr16_clocksource(void)
 		}
 	}
 
-	/* XXX: We may want to mark sched_clock stable here if cr16 clocks are
-	 *	in sync:
-	 *	(clocksource_cr16.flags == CLOCK_SOURCE_IS_CONTINUOUS) */
-
 	/* register at clocksource framework */
 	clocksource_register_hz(&clocksource_cr16,
 		100 * PAGE0->mem_10msec);
diff --git a/arch/parisc/kernel/traps.c b/arch/parisc/kernel/traps.c
index 8d8441d4562a..747c328fb886 100644
--- a/arch/parisc/kernel/traps.c
+++ b/arch/parisc/kernel/traps.c
@@ -859,7 +859,3 @@ void  __init early_trap_init(void)
 
 	initialize_ivt(&fault_vector_20);
 }
-
-void __init trap_init(void)
-{
-}
diff --git a/arch/parisc/lib/lusercopy.S b/arch/parisc/lib/lusercopy.S
index 36d6a8638ead..b428d29e45fb 100644
--- a/arch/parisc/lib/lusercopy.S
+++ b/arch/parisc/lib/lusercopy.S
@@ -28,21 +28,6 @@
 #include <linux/linkage.h>
 
 	/*
-	 * get_sr gets the appropriate space value into
-	 * sr1 for kernel/user space access, depending
-	 * on the flag stored in the task structure.
-	 */
-
-	.macro  get_sr
-	mfctl       %cr30,%r1
-	ldw         TI_SEGMENT(%r1),%r22
-	mfsp        %sr3,%r1
-	or,<>       %r22,%r0,%r0
-	copy        %r0,%r1
-	mtsp        %r1,%sr1
-	.endm
-
-	/*
 	 * unsigned long lclear_user(void *to, unsigned long n)
 	 *
 	 * Returns 0 for success.
@@ -51,10 +36,9 @@
 
 ENTRY_CFI(lclear_user)
 	comib,=,n   0,%r25,$lclu_done
-	get_sr
 $lclu_loop:
 	addib,<>    -1,%r25,$lclu_loop
-1:      stbs,ma     %r0,1(%sr1,%r26)
+1:	stbs,ma     %r0,1(%sr3,%r26)
 
 $lclu_done:
 	bv          %r0(%r2)
@@ -67,40 +51,6 @@ $lclu_done:
 ENDPROC_CFI(lclear_user)
 
 
-	/*
-	 * long lstrnlen_user(char *s, long n)
-	 *
-	 * Returns 0 if exception before zero byte or reaching N,
-	 *         N+1 if N would be exceeded,
-	 *         else strlen + 1 (i.e. includes zero byte).
-	 */
-
-ENTRY_CFI(lstrnlen_user)
-	comib,=     0,%r25,$lslen_nzero
-	copy	    %r26,%r24
-	get_sr
-1:      ldbs,ma     1(%sr1,%r26),%r1
-$lslen_loop:
-	comib,=,n   0,%r1,$lslen_done
-	addib,<>    -1,%r25,$lslen_loop
-2:      ldbs,ma     1(%sr1,%r26),%r1
-$lslen_done:
-	bv          %r0(%r2)
-	sub	    %r26,%r24,%r28
-
-$lslen_nzero:
-	b           $lslen_done
-	ldo         1(%r26),%r26 /* special case for N == 0 */
-
-3:      b	    $lslen_done
-	copy        %r24,%r26    /* reset r26 so 0 is returned on fault */
-
-	ASM_EXCEPTIONTABLE_ENTRY(1b,3b)
-	ASM_EXCEPTIONTABLE_ENTRY(2b,3b)
-
-ENDPROC_CFI(lstrnlen_user)
-
-
 /*
  * unsigned long pa_memcpy(void *dstp, const void *srcp, unsigned long len)
  *
diff --git a/arch/parisc/lib/memcpy.c b/arch/parisc/lib/memcpy.c
index 4b75388190b4..ea70a0e08321 100644
--- a/arch/parisc/lib/memcpy.c
+++ b/arch/parisc/lib/memcpy.c
@@ -38,14 +38,6 @@ unsigned long raw_copy_from_user(void *dst, const void __user *src,
 }
 EXPORT_SYMBOL(raw_copy_from_user);
 
-unsigned long raw_copy_in_user(void __user *dst, const void __user *src, unsigned long len)
-{
-	mtsp(get_user_space(), 1);
-	mtsp(get_user_space(), 2);
-	return pa_memcpy((void __force *)dst, (void __force *)src, len);
-}
-
-
 void * memcpy(void * dst,const void *src, size_t count)
 {
 	mtsp(get_kernel_space(), 1);
@@ -54,7 +46,6 @@ void * memcpy(void * dst,const void *src, size_t count)
 	return dst;
 }
 
-EXPORT_SYMBOL(raw_copy_in_user);
 EXPORT_SYMBOL(memcpy);
 
 bool copy_from_kernel_nofault_allowed(const void *unsafe_src, size_t size)
diff --git a/arch/parisc/math-emu/decode_exc.c b/arch/parisc/math-emu/decode_exc.c
index cd8ffc6ceadf..494ca41df05d 100644
--- a/arch/parisc/math-emu/decode_exc.c
+++ b/arch/parisc/math-emu/decode_exc.c
@@ -46,7 +46,7 @@
 #define SIGNALCODE(signal, code) ((signal) << 24 | (code))
 #define copropbit	1<<31-2	/* bit position 2 */
 #define opclass		9	/* bits 21 & 22 */
-#define fmt		11	/* bits 19 & 20 */
+#define fmtbits		11	/* bits 19 & 20 */
 #define df		13	/* bits 17 & 18 */
 #define twobits		3	/* mask low-order 2 bits */
 #define fivebits	31	/* mask low-order 5 bits */
@@ -57,7 +57,7 @@
 #define Excp_instr(index) Instructionfield(Fpu_register[index])
 #define Clear_excp_register(index) Allexception(Fpu_register[index]) = 0
 #define Excp_format() \
-    (current_ir >> ((current_ir>>opclass & twobits)==1 ? df : fmt) & twobits)
+	(current_ir >> ((current_ir>>opclass & twobits) == 1 ? df : fmtbits) & twobits)
 
 /* Miscellaneous definitions */
 #define Fpu_sgl(index) Fpu_register[index*2]
diff --git a/arch/parisc/math-emu/fpudispatch.c b/arch/parisc/math-emu/fpudispatch.c
index 7c46969ead9b..01ed133227c2 100644
--- a/arch/parisc/math-emu/fpudispatch.c
+++ b/arch/parisc/math-emu/fpudispatch.c
@@ -310,12 +310,15 @@ decode_0c(u_int ir, u_int class, u_int subop, u_int fpregs[])
 					r1 &= ~3;
 					fpregs[t+3] = fpregs[r1+3];
 					fpregs[t+2] = fpregs[r1+2];
+					fallthrough;
 				    case 1: /* double */
 					fpregs[t+1] = fpregs[r1+1];
+					fallthrough;
 				    case 0: /* single */
 					fpregs[t] = fpregs[r1];
 					return(NOEXCEPTION);
 				}
+				BUG();
 			case 3: /* FABS */
 				switch (fmt) {
 				    case 2: /* illegal */
@@ -325,13 +328,16 @@ decode_0c(u_int ir, u_int class, u_int subop, u_int fpregs[])
 					r1 &= ~3;
 					fpregs[t+3] = fpregs[r1+3];
 					fpregs[t+2] = fpregs[r1+2];
+					fallthrough;
 				    case 1: /* double */
 					fpregs[t+1] = fpregs[r1+1];
+					fallthrough;
 				    case 0: /* single */
 					/* copy and clear sign bit */
 					fpregs[t] = fpregs[r1] & 0x7fffffff;
 					return(NOEXCEPTION);
 				}
+				BUG();
 			case 6: /* FNEG */
 				switch (fmt) {
 				    case 2: /* illegal */
@@ -341,13 +347,16 @@ decode_0c(u_int ir, u_int class, u_int subop, u_int fpregs[])
 					r1 &= ~3;
 					fpregs[t+3] = fpregs[r1+3];
 					fpregs[t+2] = fpregs[r1+2];
+					fallthrough;
 				    case 1: /* double */
 					fpregs[t+1] = fpregs[r1+1];
+					fallthrough;
 				    case 0: /* single */
 					/* copy and invert sign bit */
 					fpregs[t] = fpregs[r1] ^ 0x80000000;
 					return(NOEXCEPTION);
 				}
+				BUG();
 			case 7: /* FNEGABS */
 				switch (fmt) {
 				    case 2: /* illegal */
@@ -357,13 +366,16 @@ decode_0c(u_int ir, u_int class, u_int subop, u_int fpregs[])
 					r1 &= ~3;
 					fpregs[t+3] = fpregs[r1+3];
 					fpregs[t+2] = fpregs[r1+2];
+					fallthrough;
 				    case 1: /* double */
 					fpregs[t+1] = fpregs[r1+1];
+					fallthrough;
 				    case 0: /* single */
 					/* copy and set sign bit */
 					fpregs[t] = fpregs[r1] | 0x80000000;
 					return(NOEXCEPTION);
 				}
+				BUG();
 			case 4: /* FSQRT */
 				switch (fmt) {
 				    case 0:
@@ -376,6 +388,7 @@ decode_0c(u_int ir, u_int class, u_int subop, u_int fpregs[])
 				    case 3: /* quad not implemented */
 					return(MAJOR_0C_EXCP);
 				}
+				BUG();
 			case 5: /* FRND */
 				switch (fmt) {
 				    case 0:
@@ -389,7 +402,7 @@ decode_0c(u_int ir, u_int class, u_int subop, u_int fpregs[])
 					return(MAJOR_0C_EXCP);
 				}
 		} /* end of switch (subop) */
-
+		BUG();
 	case 1: /* class 1 */
 		df = extru(ir,fpdfpos,2); /* get dest format */
 		if ((df & 2) || (fmt & 2)) {
@@ -419,6 +432,7 @@ decode_0c(u_int ir, u_int class, u_int subop, u_int fpregs[])
 				    case 3: /* dbl/dbl */
 					return(MAJOR_0C_EXCP);
 				}
+				BUG();
 			case 1: /* FCNVXF */
 				switch(fmt) {
 				    case 0: /* sgl/sgl */
@@ -434,6 +448,7 @@ decode_0c(u_int ir, u_int class, u_int subop, u_int fpregs[])
 					return(dbl_to_dbl_fcnvxf(&fpregs[r1],0,
 						&fpregs[t],status));
 				}
+				BUG();
 			case 2: /* FCNVFX */
 				switch(fmt) {
 				    case 0: /* sgl/sgl */
@@ -449,6 +464,7 @@ decode_0c(u_int ir, u_int class, u_int subop, u_int fpregs[])
 					return(dbl_to_dbl_fcnvfx(&fpregs[r1],0,
 						&fpregs[t],status));
 				}
+				BUG();
 			case 3: /* FCNVFXT */
 				switch(fmt) {
 				    case 0: /* sgl/sgl */
@@ -464,6 +480,7 @@ decode_0c(u_int ir, u_int class, u_int subop, u_int fpregs[])
 					return(dbl_to_dbl_fcnvfxt(&fpregs[r1],0,
 						&fpregs[t],status));
 				}
+				BUG();
 			case 5: /* FCNVUF (PA2.0 only) */
 				switch(fmt) {
 				    case 0: /* sgl/sgl */
@@ -479,6 +496,7 @@ decode_0c(u_int ir, u_int class, u_int subop, u_int fpregs[])
 					return(dbl_to_dbl_fcnvuf(&fpregs[r1],0,
 						&fpregs[t],status));
 				}
+				BUG();
 			case 6: /* FCNVFU (PA2.0 only) */
 				switch(fmt) {
 				    case 0: /* sgl/sgl */
@@ -494,6 +512,7 @@ decode_0c(u_int ir, u_int class, u_int subop, u_int fpregs[])
 					return(dbl_to_dbl_fcnvfu(&fpregs[r1],0,
 						&fpregs[t],status));
 				}
+				BUG();
 			case 7: /* FCNVFUT (PA2.0 only) */
 				switch(fmt) {
 				    case 0: /* sgl/sgl */
@@ -509,10 +528,11 @@ decode_0c(u_int ir, u_int class, u_int subop, u_int fpregs[])
 					return(dbl_to_dbl_fcnvfut(&fpregs[r1],0,
 						&fpregs[t],status));
 				}
+				BUG();
 			case 4: /* undefined */
 				return(MAJOR_0C_EXCP);
 		} /* end of switch subop */
-
+		BUG();
 	case 2: /* class 2 */
 		fpu_type_flags=fpregs[FPU_TYPE_FLAG_POS];
 		r2 = extru(ir, fpr2pos, 5) * sizeof(double)/sizeof(u_int);
@@ -590,6 +610,7 @@ decode_0c(u_int ir, u_int class, u_int subop, u_int fpregs[])
 				    case 3: /* quad not implemented */
 					return(MAJOR_0C_EXCP);
 				}
+				BUG();
 			case 1: /* FTEST */
 				switch (fmt) {
 				    case 0:
@@ -609,8 +630,10 @@ decode_0c(u_int ir, u_int class, u_int subop, u_int fpregs[])
 				    case 3:
 					return(MAJOR_0C_EXCP);
 				}
+				BUG();
 		    } /* end of switch subop */
 		} /* end of else for PA1.0 & PA1.1 */
+		BUG();
 	case 3: /* class 3 */
 		r2 = extru(ir,fpr2pos,5) * sizeof(double)/sizeof(u_int);
 		if (r2 == 0)
@@ -633,6 +656,7 @@ decode_0c(u_int ir, u_int class, u_int subop, u_int fpregs[])
 				    case 3: /* quad not implemented */
 					return(MAJOR_0C_EXCP);
 				}
+				BUG();
 			case 1: /* FSUB */
 				switch (fmt) {
 				    case 0:
@@ -645,6 +669,7 @@ decode_0c(u_int ir, u_int class, u_int subop, u_int fpregs[])
 				    case 3: /* quad not implemented */
 					return(MAJOR_0C_EXCP);
 				}
+				BUG();
 			case 2: /* FMPY */
 				switch (fmt) {
 				    case 0:
@@ -657,6 +682,7 @@ decode_0c(u_int ir, u_int class, u_int subop, u_int fpregs[])
 				    case 3: /* quad not implemented */
 					return(MAJOR_0C_EXCP);
 				}
+				BUG();
 			case 3: /* FDIV */
 				switch (fmt) {
 				    case 0:
@@ -669,6 +695,7 @@ decode_0c(u_int ir, u_int class, u_int subop, u_int fpregs[])
 				    case 3: /* quad not implemented */
 					return(MAJOR_0C_EXCP);
 				}
+				BUG();
 			case 4: /* FREM */
 				switch (fmt) {
 				    case 0:
@@ -681,6 +708,7 @@ decode_0c(u_int ir, u_int class, u_int subop, u_int fpregs[])
 				    case 3: /* quad not implemented */
 					return(MAJOR_0C_EXCP);
 				}
+				BUG();
 		} /* end of class 3 switch */
 	} /* end of switch(class) */
 
@@ -736,10 +764,12 @@ u_int fpregs[];
 					return(MAJOR_0E_EXCP);
 				    case 1: /* double */
 					fpregs[t+1] = fpregs[r1+1];
+					fallthrough;
 				    case 0: /* single */
 					fpregs[t] = fpregs[r1];
 					return(NOEXCEPTION);
 				}
+				BUG();
 			case 3: /* FABS */
 				switch (fmt) {
 				    case 2:
@@ -747,10 +777,12 @@ u_int fpregs[];
 					return(MAJOR_0E_EXCP);
 				    case 1: /* double */
 					fpregs[t+1] = fpregs[r1+1];
+					fallthrough;
 				    case 0: /* single */
 					fpregs[t] = fpregs[r1] & 0x7fffffff;
 					return(NOEXCEPTION);
 				}
+				BUG();
 			case 6: /* FNEG */
 				switch (fmt) {
 				    case 2:
@@ -758,10 +790,12 @@ u_int fpregs[];
 					return(MAJOR_0E_EXCP);
 				    case 1: /* double */
 					fpregs[t+1] = fpregs[r1+1];
+					fallthrough;
 				    case 0: /* single */
 					fpregs[t] = fpregs[r1] ^ 0x80000000;
 					return(NOEXCEPTION);
 				}
+				BUG();
 			case 7: /* FNEGABS */
 				switch (fmt) {
 				    case 2:
@@ -769,10 +803,12 @@ u_int fpregs[];
 					return(MAJOR_0E_EXCP);
 				    case 1: /* double */
 					fpregs[t+1] = fpregs[r1+1];
+					fallthrough;
 				    case 0: /* single */
 					fpregs[t] = fpregs[r1] | 0x80000000;
 					return(NOEXCEPTION);
 				}
+				BUG();
 			case 4: /* FSQRT */
 				switch (fmt) {
 				    case 0:
@@ -785,6 +821,7 @@ u_int fpregs[];
 				    case 3:
 					return(MAJOR_0E_EXCP);
 				}
+				BUG();
 			case 5: /* FRMD */
 				switch (fmt) {
 				    case 0:
@@ -798,7 +835,7 @@ u_int fpregs[];
 					return(MAJOR_0E_EXCP);
 				}
 		} /* end of switch (subop */
-	
+		BUG();
 	case 1: /* class 1 */
 		df = extru(ir,fpdfpos,2); /* get dest format */
 		/*
@@ -826,6 +863,7 @@ u_int fpregs[];
 				    case 3: /* dbl/dbl */
 					return(MAJOR_0E_EXCP);
 				}
+				BUG();
 			case 1: /* FCNVXF */
 				switch(fmt) {
 				    case 0: /* sgl/sgl */
@@ -841,6 +879,7 @@ u_int fpregs[];
 					return(dbl_to_dbl_fcnvxf(&fpregs[r1],0,
 						&fpregs[t],status));
 				}
+				BUG();
 			case 2: /* FCNVFX */
 				switch(fmt) {
 				    case 0: /* sgl/sgl */
@@ -856,6 +895,7 @@ u_int fpregs[];
 					return(dbl_to_dbl_fcnvfx(&fpregs[r1],0,
 						&fpregs[t],status));
 				}
+				BUG();
 			case 3: /* FCNVFXT */
 				switch(fmt) {
 				    case 0: /* sgl/sgl */
@@ -871,6 +911,7 @@ u_int fpregs[];
 					return(dbl_to_dbl_fcnvfxt(&fpregs[r1],0,
 						&fpregs[t],status));
 				}
+				BUG();
 			case 5: /* FCNVUF (PA2.0 only) */
 				switch(fmt) {
 				    case 0: /* sgl/sgl */
@@ -886,6 +927,7 @@ u_int fpregs[];
 					return(dbl_to_dbl_fcnvuf(&fpregs[r1],0,
 						&fpregs[t],status));
 				}
+				BUG();
 			case 6: /* FCNVFU (PA2.0 only) */
 				switch(fmt) {
 				    case 0: /* sgl/sgl */
@@ -901,6 +943,7 @@ u_int fpregs[];
 					return(dbl_to_dbl_fcnvfu(&fpregs[r1],0,
 						&fpregs[t],status));
 				}
+				BUG();
 			case 7: /* FCNVFUT (PA2.0 only) */
 				switch(fmt) {
 				    case 0: /* sgl/sgl */
@@ -916,9 +959,11 @@ u_int fpregs[];
 					return(dbl_to_dbl_fcnvfut(&fpregs[r1],0,
 						&fpregs[t],status));
 				}
+				BUG();
 			case 4: /* undefined */
 				return(MAJOR_0C_EXCP);
 		} /* end of switch subop */
+		BUG();
 	case 2: /* class 2 */
 		/*
 		 * Be careful out there.
@@ -994,6 +1039,7 @@ u_int fpregs[];
 				}
 		    } /* end of switch subop */
 		} /* end of else for PA1.0 & PA1.1 */
+		BUG();
 	case 3: /* class 3 */
 		/*
 		 * Be careful out there.
@@ -1026,6 +1072,7 @@ u_int fpregs[];
 					return(dbl_fadd(&fpregs[r1],&fpregs[r2],
 						&fpregs[t],status));
 				}
+				BUG();
 			case 1: /* FSUB */
 				switch (fmt) {
 				    case 0:
@@ -1035,6 +1082,7 @@ u_int fpregs[];
 					return(dbl_fsub(&fpregs[r1],&fpregs[r2],
 						&fpregs[t],status));
 				}
+				BUG();
 			case 2: /* FMPY or XMPYU */
 				/*
 				 * check for integer multiply (x bit set)
@@ -1071,6 +1119,7 @@ u_int fpregs[];
 					       &fpregs[r2],&fpregs[t],status));
 				    }
 				}
+				BUG();
 			case 3: /* FDIV */
 				switch (fmt) {
 				    case 0:
@@ -1080,6 +1129,7 @@ u_int fpregs[];
 					return(dbl_fdiv(&fpregs[r1],&fpregs[r2],
 						&fpregs[t],status));
 				}
+				BUG();
 			case 4: /* FREM */
 				switch (fmt) {
 				    case 0:
diff --git a/arch/parisc/mm/init.c b/arch/parisc/mm/init.c
index 591a4e939415..3f7d6d5b56ac 100644
--- a/arch/parisc/mm/init.c
+++ b/arch/parisc/mm/init.c
@@ -378,8 +378,8 @@ static void __init map_pages(unsigned long start_vaddr,
 
 #if CONFIG_PGTABLE_LEVELS == 3
 		if (pud_none(*pud)) {
-			pmd = memblock_alloc(PAGE_SIZE << PMD_ORDER,
-					     PAGE_SIZE << PMD_ORDER);
+			pmd = memblock_alloc(PAGE_SIZE << PMD_TABLE_ORDER,
+					     PAGE_SIZE << PMD_TABLE_ORDER);
 			if (!pmd)
 				panic("pmd allocation failed.\n");
 			pud_populate(NULL, pud, pmd);
diff --git a/arch/powerpc/Kconfig b/arch/powerpc/Kconfig
index 678a0acbad7b..ba5b66189358 100644
--- a/arch/powerpc/Kconfig
+++ b/arch/powerpc/Kconfig
@@ -94,10 +94,6 @@ config STACKTRACE_SUPPORT
 	bool
 	default y
 
-config TRACE_IRQFLAGS_SUPPORT
-	bool
-	default y
-
 config LOCKDEP_SUPPORT
 	bool
 	default y
@@ -123,6 +119,7 @@ config PPC
 	select ARCH_HAS_COPY_MC			if PPC64
 	select ARCH_HAS_DEBUG_VIRTUAL
 	select ARCH_HAS_DEBUG_VM_PGTABLE
+	select ARCH_HAS_DEBUG_WX		if STRICT_KERNEL_RWX
 	select ARCH_HAS_DEVMEM_IS_ALLOWED
 	select ARCH_HAS_DMA_MAP_DIRECT 		if PPC_PSERIES
 	select ARCH_HAS_ELF_RANDOMIZE
@@ -182,6 +179,7 @@ config PPC
 	select GENERIC_IRQ_SHOW
 	select GENERIC_IRQ_SHOW_LEVEL
 	select GENERIC_PCI_IOMAP		if PCI
+	select GENERIC_PTDUMP
 	select GENERIC_SMP_IDLE_THREAD
 	select GENERIC_TIME_VSYSCALL
 	select GENERIC_VDSO_TIME_NS
@@ -268,6 +266,7 @@ config PPC
 	select STRICT_KERNEL_RWX if STRICT_MODULE_RWX
 	select SYSCTL_EXCEPTION_TRACE
 	select THREAD_INFO_IN_TASK
+	select TRACE_IRQFLAGS_SUPPORT
 	select VIRT_TO_BUS			if !PPC64
 	#
 	# Please keep this list sorted alphabetically.
diff --git a/arch/powerpc/Kconfig.debug b/arch/powerpc/Kconfig.debug
index 205cd77f321f..192f0ed0097f 100644
--- a/arch/powerpc/Kconfig.debug
+++ b/arch/powerpc/Kconfig.debug
@@ -365,36 +365,6 @@ config FAIL_IOMMU
 
 	  If you are unsure, say N.
 
-config PPC_PTDUMP
-	bool "Export kernel pagetable layout to userspace via debugfs"
-	depends on DEBUG_KERNEL && DEBUG_FS
-	help
-	  This option exports the state of the kernel pagetables to a
-	  debugfs file. This is only useful for kernel developers who are
-	  working in architecture specific areas of the kernel - probably
-	  not a good idea to enable this feature in a production kernel.
-
-	  If you are unsure, say N.
-
-config PPC_DEBUG_WX
-	bool "Warn on W+X mappings at boot"
-	depends on PPC_PTDUMP && STRICT_KERNEL_RWX
-	help
-	  Generate a warning if any W+X mappings are found at boot.
-
-	  This is useful for discovering cases where the kernel is leaving
-	  W+X mappings after applying NX, as such mappings are a security risk.
-
-	  Note that even if the check fails, your kernel is possibly
-	  still fine, as W+X mappings are not a security hole in
-	  themselves, what they do is that they make the exploitation
-	  of other unfixed kernel bugs easier.
-
-	  There is no runtime or memory usage effect of this option
-	  once the kernel has booted up - it's a one time check.
-
-	  If in doubt, say "Y".
-
 config PPC_FAST_ENDIAN_SWITCH
 	bool "Deprecated fast endian-switch syscall"
 	depends on DEBUG_KERNEL && PPC_BOOK3S_64
diff --git a/arch/powerpc/Makefile b/arch/powerpc/Makefile
index 6505d66f1193..aa6808e70647 100644
--- a/arch/powerpc/Makefile
+++ b/arch/powerpc/Makefile
@@ -122,6 +122,7 @@ endif
 
 LDFLAGS_vmlinux-y := -Bstatic
 LDFLAGS_vmlinux-$(CONFIG_RELOCATABLE) := -pie
+LDFLAGS_vmlinux-$(CONFIG_RELOCATABLE) += -z notext
 LDFLAGS_vmlinux	:= $(LDFLAGS_vmlinux-y)
 
 ifdef CONFIG_PPC64
@@ -407,7 +408,8 @@ endef
 
 PHONY += install
 install:
-	$(Q)$(MAKE) $(build)=$(boot) install
+	sh -x $(srctree)/$(boot)/install.sh "$(KERNELRELEASE)" vmlinux \
+	System.map "$(INSTALL_PATH)"
 
 archclean:
 	$(Q)$(MAKE) $(clean)=$(boot)
diff --git a/arch/powerpc/boot/Makefile b/arch/powerpc/boot/Makefile
index e312ea802aa6..6900d0ac2421 100644
--- a/arch/powerpc/boot/Makefile
+++ b/arch/powerpc/boot/Makefile
@@ -341,7 +341,6 @@ image-$(CONFIG_TQM8541)			+= cuImage.tqm8541
 image-$(CONFIG_TQM8548)			+= cuImage.tqm8548
 image-$(CONFIG_TQM8555)			+= cuImage.tqm8555
 image-$(CONFIG_TQM8560)			+= cuImage.tqm8560
-image-$(CONFIG_SBC8548)			+= cuImage.sbc8548
 image-$(CONFIG_KSI8560)			+= cuImage.ksi8560
 
 # Board ports in arch/powerpc/platform/86xx/Kconfig
@@ -444,16 +443,6 @@ $(obj)/zImage:		$(addprefix $(obj)/, $(image-y))
 $(obj)/zImage.initrd:	$(addprefix $(obj)/, $(initrd-y))
 	$(Q)rm -f $@; ln $< $@
 
-# Only install the vmlinux
-install: $(CONFIGURE) $(addprefix $(obj)/, $(image-y))
-	sh -x $(srctree)/$(src)/install.sh "$(KERNELRELEASE)" vmlinux System.map "$(INSTALL_PATH)"
-
-# Install the vmlinux and other built boot targets.
-zInstall: $(CONFIGURE) $(addprefix $(obj)/, $(image-y))
-	sh -x $(srctree)/$(src)/install.sh "$(KERNELRELEASE)" vmlinux System.map "$(INSTALL_PATH)" $^
-
-PHONY += install zInstall
-
 # anything not in $(targets)
 clean-files += $(image-) $(initrd-) cuImage.* dtbImage.* treeImage.* \
 	zImage zImage.initrd zImage.chrp zImage.coff zImage.holly \
diff --git a/arch/powerpc/boot/dts/fsl/sbc8641d.dts b/arch/powerpc/boot/dts/fsl/sbc8641d.dts
deleted file mode 100644
index 3dca10acc161..000000000000
--- a/arch/powerpc/boot/dts/fsl/sbc8641d.dts
+++ /dev/null
@@ -1,176 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0-or-later
-/*
- * SBC8641D Device Tree Source
- *
- * Copyright 2008 Wind River Systems Inc.
- *
- * Paul Gortmaker (see MAINTAINERS for contact information)
- *
- * Based largely on the mpc8641_hpcn.dts by Freescale Semiconductor Inc.
- */
-
-/include/ "mpc8641si-pre.dtsi"
-
-/ {
-	model = "SBC8641D";
-	compatible = "wind,sbc8641";
-
-	memory {
-		device_type = "memory";
-		reg = <0x00000000 0x20000000>;	// 512M at 0x0
-	};
-
-	lbc: localbus@f8005000 {
-		reg = <0xf8005000 0x1000>;
-
-		ranges = <0 0 0xff000000 0x01000000	// 16MB Boot flash
-			  1 0 0xf0000000 0x00010000	// 64KB EEPROM
-			  2 0 0xf1000000 0x00100000	// EPLD (1MB)
-			  3 0 0xe0000000 0x04000000	// 64MB LB SDRAM (CS3)
-			  4 0 0xe4000000 0x04000000	// 64MB LB SDRAM (CS4)
-			  6 0 0xf4000000 0x00100000	// LCD display (1MB)
-			  7 0 0xe8000000 0x04000000>;	// 64MB OneNAND
-
-		flash@0,0 {
-			compatible = "cfi-flash";
-			reg = <0 0 0x01000000>;
-			bank-width = <2>;
-			device-width = <2>;
-			#address-cells = <1>;
-			#size-cells = <1>;
-			partition@0 {
-				label = "dtb";
-				reg = <0x00000000 0x00100000>;
-				read-only;
-			};
-			partition@300000 {
-				label = "kernel";
-				reg = <0x00100000 0x00400000>;
-				read-only;
-			};
-			partition@400000 {
-				label = "fs";
-				reg = <0x00500000 0x00a00000>;
-			};
-			partition@700000 {
-				label = "firmware";
-				reg = <0x00f00000 0x00100000>;
-				read-only;
-			};
-		};
-
-		epld@2,0 {
-			compatible = "wrs,epld-localbus";
-			#address-cells = <2>;
-			#size-cells = <1>;
-			reg = <2 0 0x100000>;
-			ranges = <0 0 5 0 1	// User switches
-				  1 0 5 1 1	// Board ID/Rev
-				  3 0 5 3 1>;	// LEDs
-		};
-	};
-
-	soc: soc@f8000000 {
-		ranges = <0x00000000 0xf8000000 0x00100000>;
-
-		enet0: ethernet@24000 {
-			tbi-handle = <&tbi0>;
-			phy-handle = <&phy0>;
-			phy-connection-type = "rgmii-id";
-		};
-
-		mdio@24520 {
-			phy0: ethernet-phy@1f {
-				reg = <0x1f>;
-			};
-			phy1: ethernet-phy@0 {
-				reg = <0>;
-			};
-			phy2: ethernet-phy@1 {
-				reg = <1>;
-			};
-			phy3: ethernet-phy@2 {
-				reg = <2>;
-			};
-			tbi0: tbi-phy@11 {
-				reg = <0x11>;
-				device_type = "tbi-phy";
-			};
-		};
-
-		enet1: ethernet@25000 {
-			tbi-handle = <&tbi1>;
-			phy-handle = <&phy1>;
-			phy-connection-type = "rgmii-id";
-		};
-
-		mdio@25520 {
-			tbi1: tbi-phy@11 {
-				reg = <0x11>;
-				device_type = "tbi-phy";
-			};
-		};
-
-		enet2: ethernet@26000 {
-			tbi-handle = <&tbi2>;
-			phy-handle = <&phy2>;
-			phy-connection-type = "rgmii-id";
-		};
-
-		mdio@26520 {
-			tbi2: tbi-phy@11 {
-				reg = <0x11>;
-				device_type = "tbi-phy";
-			};
-		};
-
-		enet3: ethernet@27000 {
-			tbi-handle = <&tbi3>;
-			phy-handle = <&phy3>;
-			phy-connection-type = "rgmii-id";
-		};
-
-		mdio@27520 {
-			tbi3: tbi-phy@11 {
-				reg = <0x11>;
-				device_type = "tbi-phy";
-			};
-		};
-	};
-
-	pci0: pcie@f8008000 {
-		reg = <0xf8008000 0x1000>;
-		ranges = <0x02000000 0x0 0x80000000 0x80000000 0x0 0x20000000
-			  0x01000000 0x0 0x00000000 0xe2000000 0x0 0x00100000>;
-		interrupt-map-mask = <0xff00 0 0 7>;
-
-		pcie@0 {
-			ranges = <0x02000000 0x0 0x80000000
-				  0x02000000 0x0 0x80000000
-				  0x0 0x20000000
-
-				  0x01000000 0x0 0x00000000
-				  0x01000000 0x0 0x00000000
-				  0x0 0x00100000>;
-		};
-
-	};
-
-	pci1: pcie@f8009000 {
-		reg = <0xf8009000 0x1000>;
-		ranges = <0x02000000 0x0 0xa0000000 0xa0000000 0x0 0x20000000
-			  0x01000000 0x0 0x00000000 0xe3000000 0x0 0x00100000>;
-
-		pcie@0 {
-			ranges = <0x02000000 0x0 0xa0000000
-				  0x02000000 0x0 0xa0000000
-				  0x0 0x20000000
-
-				  0x01000000 0x0 0x00000000
-				  0x01000000 0x0 0x00000000
-				  0x0 0x00100000>;
-		};
-	};
-};
-
-/include/ "mpc8641si-post.dtsi"
diff --git a/arch/powerpc/boot/dts/microwatt.dts b/arch/powerpc/boot/dts/microwatt.dts
index 974abbdda249..65b270a90f94 100644
--- a/arch/powerpc/boot/dts/microwatt.dts
+++ b/arch/powerpc/boot/dts/microwatt.dts
@@ -127,6 +127,18 @@
 			fifo-size = <16>;
 			interrupts = <0x10 0x1>;
 		};
+
+		ethernet@8020000 {
+			compatible = "litex,liteeth";
+			reg = <0x8021000 0x100
+				0x8020800 0x100
+				0x8030000 0x2000>;
+			reg-names = "mac", "mido", "buffer";
+			litex,rx-slots = <2>;
+			litex,tx-slots = <2>;
+			litex,slot-size = <0x800>;
+			interrupts = <0x11 0x1>;
+		};
 	};
 
 	chosen {
diff --git a/arch/powerpc/boot/dts/sbc8548-altflash.dts b/arch/powerpc/boot/dts/sbc8548-altflash.dts
deleted file mode 100644
index bb7a1e712bb7..000000000000
--- a/arch/powerpc/boot/dts/sbc8548-altflash.dts
+++ /dev/null
@@ -1,111 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0-or-later
-/*
- * SBC8548 Device Tree Source
- *
- * Configured for booting off the alternate (64MB SODIMM) flash.
- * Requires switching JP12 jumpers and changing SW2.8 setting.
- *
- * Copyright 2013 Wind River Systems Inc.
- *
- * Paul Gortmaker (see MAINTAINERS for contact information)
- */
-
-
-/dts-v1/;
-
-/include/ "sbc8548-pre.dtsi"
-
-/{
-	localbus@e0000000 {
-		#address-cells = <2>;
-		#size-cells = <1>;
-		compatible = "simple-bus";
-		reg = <0xe0000000 0x5000>;
-		interrupt-parent = <&mpic>;
-
-		ranges = <0x0 0x0 0xfc000000 0x04000000		/*64MB Flash*/
-			  0x3 0x0 0xf0000000 0x04000000		/*64MB SDRAM*/
-			  0x4 0x0 0xf4000000 0x04000000 	/*64MB SDRAM*/
-			  0x5 0x0 0xf8000000 0x00b10000		/* EPLD */
-			  0x6 0x0 0xef800000 0x00800000>;	/*8MB Flash*/
-
-		flash@0,0 {
-			#address-cells = <1>;
-			#size-cells = <1>;
-			reg = <0x0 0x0 0x04000000>;
-			compatible = "intel,JS28F128", "cfi-flash";
-			bank-width = <4>;
-			device-width = <1>;
-			partition@0 {
-				label = "space";
-				/* FC000000 -> FFEFFFFF */
-				reg = <0x00000000 0x03f00000>;
-			};
-			partition@3f00000 {
-				label = "bootloader";
-				/* FFF00000 -> FFFFFFFF */
-				reg = <0x03f00000 0x00100000>;
-				read-only;
-			};
-                };
-
-
-		epld@5,0 {
-			compatible = "wrs,epld-localbus";
-			#address-cells = <2>;
-			#size-cells = <1>;
-			reg = <0x5 0x0 0x00b10000>;
-			ranges = <
-				0x0 0x0 0x5 0x000000 0x1fff	/* LED */
-				0x1 0x0 0x5 0x100000 0x1fff	/* Switches */
-				0x3 0x0 0x5 0x300000 0x1fff	/* HW Rev. */
-				0xb 0x0	0x5 0xb00000 0x1fff	/* EEPROM */
-			>;
-
-			led@0,0 {
-				compatible = "led";
-				reg = <0x0 0x0 0x1fff>;
-			};
-
-			switches@1,0 {
-				compatible = "switches";
-				reg = <0x1 0x0 0x1fff>;
-			};
-
-			hw-rev@3,0 {
-				compatible = "hw-rev";
-				reg = <0x3 0x0 0x1fff>;
-			};
-
-			eeprom@b,0 {
-				compatible = "eeprom";
-				reg = <0xb 0 0x1fff>;
-			};
-
-		};
-
-		alt-flash@6,0 {
-			#address-cells = <1>;
-			#size-cells = <1>;
-			compatible = "intel,JS28F640", "cfi-flash";
-			reg = <0x6 0x0 0x800000>;
-			bank-width = <1>;
-			device-width = <1>;
-			partition@0 {
-				label = "space";
-				/* EF800000 -> EFF9FFFF */
-				reg = <0x00000000 0x007a0000>;
-			};
-			partition@7a0000 {
-				label = "bootloader";
-				/* EFFA0000 -> EFFFFFFF */
-				reg = <0x007a0000 0x00060000>;
-				read-only;
-			};
-		};
-
-
-        };
-};
-
-/include/ "sbc8548-post.dtsi"
diff --git a/arch/powerpc/boot/dts/sbc8548-post.dtsi b/arch/powerpc/boot/dts/sbc8548-post.dtsi
deleted file mode 100644
index 9d848d409408..000000000000
--- a/arch/powerpc/boot/dts/sbc8548-post.dtsi
+++ /dev/null
@@ -1,289 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0-or-later
-/*
- * SBC8548 Device Tree Source
- *
- * Copyright 2007 Wind River Systems Inc.
- *
- * Paul Gortmaker (see MAINTAINERS for contact information)
- */
-
-/{
-	soc8548@e0000000 {
-		#address-cells = <1>;
-		#size-cells = <1>;
-		device_type = "soc";
-		ranges = <0x00000000 0xe0000000 0x00100000>;
-		bus-frequency = <0>;
-		compatible = "simple-bus";
-
-		ecm-law@0 {
-			compatible = "fsl,ecm-law";
-			reg = <0x0 0x1000>;
-			fsl,num-laws = <10>;
-		};
-
-		ecm@1000 {
-			compatible = "fsl,mpc8548-ecm", "fsl,ecm";
-			reg = <0x1000 0x1000>;
-			interrupts = <17 2>;
-			interrupt-parent = <&mpic>;
-		};
-
-		memory-controller@2000 {
-			compatible = "fsl,mpc8548-memory-controller";
-			reg = <0x2000 0x1000>;
-			interrupt-parent = <&mpic>;
-			interrupts = <0x12 0x2>;
-		};
-
-		L2: l2-cache-controller@20000 {
-			compatible = "fsl,mpc8548-l2-cache-controller";
-			reg = <0x20000 0x1000>;
-			cache-line-size = <0x20>;	// 32 bytes
-			cache-size = <0x80000>;	// L2, 512K
-			interrupt-parent = <&mpic>;
-			interrupts = <0x10 0x2>;
-		};
-
-		i2c@3000 {
-			#address-cells = <1>;
-			#size-cells = <0>;
-			cell-index = <0>;
-			compatible = "fsl-i2c";
-			reg = <0x3000 0x100>;
-			interrupts = <0x2b 0x2>;
-			interrupt-parent = <&mpic>;
-			dfsrr;
-		};
-
-		i2c@3100 {
-			#address-cells = <1>;
-			#size-cells = <0>;
-			cell-index = <1>;
-			compatible = "fsl-i2c";
-			reg = <0x3100 0x100>;
-			interrupts = <0x2b 0x2>;
-			interrupt-parent = <&mpic>;
-			dfsrr;
-		};
-
-		dma@21300 {
-			#address-cells = <1>;
-			#size-cells = <1>;
-			compatible = "fsl,mpc8548-dma", "fsl,eloplus-dma";
-			reg = <0x21300 0x4>;
-			ranges = <0x0 0x21100 0x200>;
-			cell-index = <0>;
-			dma-channel@0 {
-				compatible = "fsl,mpc8548-dma-channel",
-						"fsl,eloplus-dma-channel";
-				reg = <0x0 0x80>;
-				cell-index = <0>;
-				interrupt-parent = <&mpic>;
-				interrupts = <20 2>;
-			};
-			dma-channel@80 {
-				compatible = "fsl,mpc8548-dma-channel",
-						"fsl,eloplus-dma-channel";
-				reg = <0x80 0x80>;
-				cell-index = <1>;
-				interrupt-parent = <&mpic>;
-				interrupts = <21 2>;
-			};
-			dma-channel@100 {
-				compatible = "fsl,mpc8548-dma-channel",
-						"fsl,eloplus-dma-channel";
-				reg = <0x100 0x80>;
-				cell-index = <2>;
-				interrupt-parent = <&mpic>;
-				interrupts = <22 2>;
-			};
-			dma-channel@180 {
-				compatible = "fsl,mpc8548-dma-channel",
-						"fsl,eloplus-dma-channel";
-				reg = <0x180 0x80>;
-				cell-index = <3>;
-				interrupt-parent = <&mpic>;
-				interrupts = <23 2>;
-			};
-		};
-
-		enet0: ethernet@24000 {
-			#address-cells = <1>;
-			#size-cells = <1>;
-			cell-index = <0>;
-			device_type = "network";
-			model = "eTSEC";
-			compatible = "gianfar";
-			reg = <0x24000 0x1000>;
-			ranges = <0x0 0x24000 0x1000>;
-			local-mac-address = [ 00 00 00 00 00 00 ];
-			interrupts = <0x1d 0x2 0x1e 0x2 0x22 0x2>;
-			interrupt-parent = <&mpic>;
-			tbi-handle = <&tbi0>;
-			phy-handle = <&phy0>;
-
-			mdio@520 {
-				#address-cells = <1>;
-				#size-cells = <0>;
-				compatible = "fsl,gianfar-mdio";
-				reg = <0x520 0x20>;
-
-				phy0: ethernet-phy@19 {
-					interrupt-parent = <&mpic>;
-					interrupts = <0x6 0x1>;
-					reg = <0x19>;
-				};
-				phy1: ethernet-phy@1a {
-					interrupt-parent = <&mpic>;
-					interrupts = <0x7 0x1>;
-					reg = <0x1a>;
-				};
-				tbi0: tbi-phy@11 {
-					reg = <0x11>;
-					device_type = "tbi-phy";
-				};
-			};
-		};
-
-		enet1: ethernet@25000 {
-			#address-cells = <1>;
-			#size-cells = <1>;
-			cell-index = <1>;
-			device_type = "network";
-			model = "eTSEC";
-			compatible = "gianfar";
-			reg = <0x25000 0x1000>;
-			ranges = <0x0 0x25000 0x1000>;
-			local-mac-address = [ 00 00 00 00 00 00 ];
-			interrupts = <0x23 0x2 0x24 0x2 0x28 0x2>;
-			interrupt-parent = <&mpic>;
-			tbi-handle = <&tbi1>;
-			phy-handle = <&phy1>;
-
-			mdio@520 {
-				#address-cells = <1>;
-				#size-cells = <0>;
-				compatible = "fsl,gianfar-tbi";
-				reg = <0x520 0x20>;
-
-				tbi1: tbi-phy@11 {
-					reg = <0x11>;
-					device_type = "tbi-phy";
-				};
-			};
-		};
-
-		serial0: serial@4500 {
-			cell-index = <0>;
-			device_type = "serial";
-			compatible = "fsl,ns16550", "ns16550";
-			reg = <0x4500 0x100>;	// reg base, size
-			clock-frequency = <0>;	// should we fill in in uboot?
-			interrupts = <0x2a 0x2>;
-			interrupt-parent = <&mpic>;
-		};
-
-		serial1: serial@4600 {
-			cell-index = <1>;
-			device_type = "serial";
-			compatible = "fsl,ns16550", "ns16550";
-			reg = <0x4600 0x100>;	// reg base, size
-			clock-frequency = <0>;	// should we fill in in uboot?
-			interrupts = <0x2a 0x2>;
-			interrupt-parent = <&mpic>;
-		};
-
-		global-utilities@e0000 {	//global utilities reg
-			compatible = "fsl,mpc8548-guts";
-			reg = <0xe0000 0x1000>;
-			fsl,has-rstcr;
-		};
-
-		crypto@30000 {
-			compatible = "fsl,sec2.1", "fsl,sec2.0";
-			reg = <0x30000 0x10000>;
-			interrupts = <45 2>;
-			interrupt-parent = <&mpic>;
-			fsl,num-channels = <4>;
-			fsl,channel-fifo-len = <24>;
-			fsl,exec-units-mask = <0xfe>;
-			fsl,descriptor-types-mask = <0x12b0ebf>;
-		};
-
-		mpic: pic@40000 {
-			interrupt-controller;
-			#address-cells = <0>;
-			#interrupt-cells = <2>;
-			reg = <0x40000 0x40000>;
-			compatible = "chrp,open-pic";
-			device_type = "open-pic";
-		};
-	};
-
-	pci0: pci@e0008000 {
-		interrupt-map-mask = <0xf800 0x0 0x0 0x7>;
-		interrupt-map = <
-			/* IDSEL 0x01 (PCI-X slot) @66MHz */
-			0x0800 0x0 0x0 0x1 &mpic 0x2 0x1
-			0x0800 0x0 0x0 0x2 &mpic 0x3 0x1
-			0x0800 0x0 0x0 0x3 &mpic 0x4 0x1
-			0x0800 0x0 0x0 0x4 &mpic 0x1 0x1
-
-			/* IDSEL 0x11 (PCI, 3.3V 32bit) @33MHz */
-			0x8800 0x0 0x0 0x1 &mpic 0x2 0x1
-			0x8800 0x0 0x0 0x2 &mpic 0x3 0x1
-			0x8800 0x0 0x0 0x3 &mpic 0x4 0x1
-			0x8800 0x0 0x0 0x4 &mpic 0x1 0x1>;
-
-		interrupt-parent = <&mpic>;
-		interrupts = <0x18 0x2>;
-		bus-range = <0 0>;
-		ranges = <0x02000000 0x0 0x80000000 0x80000000 0x0 0x10000000
-			  0x01000000 0x0 0x00000000 0xe2000000 0x0 0x00800000>;
-		clock-frequency = <66000000>;
-		#interrupt-cells = <1>;
-		#size-cells = <2>;
-		#address-cells = <3>;
-		reg = <0xe0008000 0x1000>;
-		compatible = "fsl,mpc8540-pcix", "fsl,mpc8540-pci";
-		device_type = "pci";
-	};
-
-	pci1: pcie@e000a000 {
-		interrupt-map-mask = <0xf800 0x0 0x0 0x7>;
-		interrupt-map = <
-
-			/* IDSEL 0x0 (PEX) */
-			0x0000 0x0 0x0 0x1 &mpic 0x0 0x1
-			0x0000 0x0 0x0 0x2 &mpic 0x1 0x1
-			0x0000 0x0 0x0 0x3 &mpic 0x2 0x1
-			0x0000 0x0 0x0 0x4 &mpic 0x3 0x1>;
-
-		interrupt-parent = <&mpic>;
-		interrupts = <0x1a 0x2>;
-		bus-range = <0x0 0xff>;
-		ranges = <0x02000000 0x0 0xa0000000 0xa0000000 0x0 0x10000000
-			  0x01000000 0x0 0x00000000 0xe2800000 0x0 0x08000000>;
-		clock-frequency = <33000000>;
-		#interrupt-cells = <1>;
-		#size-cells = <2>;
-		#address-cells = <3>;
-		reg = <0xe000a000 0x1000>;
-		compatible = "fsl,mpc8548-pcie";
-		device_type = "pci";
-		pcie@0 {
-			reg = <0x0 0x0 0x0 0x0 0x0>;
-			#size-cells = <2>;
-			#address-cells = <3>;
-			device_type = "pci";
-			ranges = <0x02000000 0x0 0xa0000000
-				  0x02000000 0x0 0xa0000000
-				  0x0 0x10000000
-
-				  0x01000000 0x0 0x00000000
-				  0x01000000 0x0 0x00000000
-				  0x0 0x00800000>;
-		};
-	};
-};
diff --git a/arch/powerpc/boot/dts/sbc8548-pre.dtsi b/arch/powerpc/boot/dts/sbc8548-pre.dtsi
deleted file mode 100644
index 0e3665fd15d0..000000000000
--- a/arch/powerpc/boot/dts/sbc8548-pre.dtsi
+++ /dev/null
@@ -1,48 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0-or-later
-/*
- * SBC8548 Device Tree Source
- *
- * Copyright 2007 Wind River Systems Inc.
- *
- * Paul Gortmaker (see MAINTAINERS for contact information)
- */
-
-/{
-	model = "SBC8548";
-	compatible = "SBC8548";
-	#address-cells = <1>;
-	#size-cells = <1>;
-
-	aliases {
-		ethernet0 = &enet0;
-		ethernet1 = &enet1;
-		serial0 = &serial0;
-		serial1 = &serial1;
-		pci0 = &pci0;
-		pci1 = &pci1;
-	};
-
-	cpus {
-		#address-cells = <1>;
-		#size-cells = <0>;
-
-		PowerPC,8548@0 {
-			device_type = "cpu";
-			reg = <0>;
-			d-cache-line-size = <0x20>;	// 32 bytes
-			i-cache-line-size = <0x20>;	// 32 bytes
-			d-cache-size = <0x8000>;	// L1, 32K
-			i-cache-size = <0x8000>;	// L1, 32K
-			timebase-frequency = <0>;	// From uboot
-			bus-frequency = <0>;
-			clock-frequency = <0>;
-			next-level-cache = <&L2>;
-		};
-	};
-
-	memory {
-		device_type = "memory";
-		reg = <0x00000000 0x10000000>;
-	};
-
-};
diff --git a/arch/powerpc/boot/dts/sbc8548.dts b/arch/powerpc/boot/dts/sbc8548.dts
deleted file mode 100644
index ce0a119f496e..000000000000
--- a/arch/powerpc/boot/dts/sbc8548.dts
+++ /dev/null
@@ -1,106 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0-or-later
-/*
- * SBC8548 Device Tree Source
- *
- * Copyright 2007 Wind River Systems Inc.
- *
- * Paul Gortmaker (see MAINTAINERS for contact information)
- */
-
-
-/dts-v1/;
-
-/include/ "sbc8548-pre.dtsi"
-
-/{
-	localbus@e0000000 {
-		#address-cells = <2>;
-		#size-cells = <1>;
-		compatible = "simple-bus";
-		reg = <0xe0000000 0x5000>;
-		interrupt-parent = <&mpic>;
-
-		ranges = <0x0 0x0 0xff800000 0x00800000		/*8MB Flash*/
-			  0x3 0x0 0xf0000000 0x04000000		/*64MB SDRAM*/
-			  0x4 0x0 0xf4000000 0x04000000 	/*64MB SDRAM*/
-			  0x5 0x0 0xf8000000 0x00b10000		/* EPLD */
-			  0x6 0x0 0xec000000 0x04000000>;	/*64MB Flash*/
-
-
-		flash@0,0 {
-			#address-cells = <1>;
-			#size-cells = <1>;
-			compatible = "intel,JS28F640", "cfi-flash";
-			reg = <0x0 0x0 0x800000>;
-			bank-width = <1>;
-			device-width = <1>;
-			partition@0 {
-				label = "space";
-				/* FF800000 -> FFF9FFFF */
-				reg = <0x00000000 0x007a0000>;
-			};
-			partition@7a0000 {
-				label = "bootloader";
-				/* FFFA0000 -> FFFFFFFF */
-				reg = <0x007a0000 0x00060000>;
-				read-only;
-			};
-		};
-
-		epld@5,0 {
-			compatible = "wrs,epld-localbus";
-			#address-cells = <2>;
-			#size-cells = <1>;
-			reg = <0x5 0x0 0x00b10000>;
-			ranges = <
-				0x0 0x0 0x5 0x000000 0x1fff	/* LED */
-				0x1 0x0 0x5 0x100000 0x1fff	/* Switches */
-				0x3 0x0 0x5 0x300000 0x1fff	/* HW Rev. */
-				0xb 0x0	0x5 0xb00000 0x1fff	/* EEPROM */
-			>;
-
-			led@0,0 {
-				compatible = "led";
-				reg = <0x0 0x0 0x1fff>;
-			};
-
-			switches@1,0 {
-				compatible = "switches";
-				reg = <0x1 0x0 0x1fff>;
-			};
-
-			hw-rev@3,0 {
-				compatible = "hw-rev";
-				reg = <0x3 0x0 0x1fff>;
-			};
-
-			eeprom@b,0 {
-				compatible = "eeprom";
-				reg = <0xb 0 0x1fff>;
-			};
-
-		};
-
-		alt-flash@6,0 {
-			#address-cells = <1>;
-			#size-cells = <1>;
-			reg = <0x6 0x0 0x04000000>;
-			compatible = "intel,JS28F128", "cfi-flash";
-			bank-width = <4>;
-			device-width = <1>;
-			partition@0 {
-				label = "space";
-				/* EC000000 -> EFEFFFFF */
-				reg = <0x00000000 0x03f00000>;
-			};
-			partition@3f00000 {
-				label = "bootloader";
-				/* EFF00000 -> EFFFFFFF */
-				reg = <0x03f00000 0x00100000>;
-				read-only;
-			};
-                };
-        };
-};
-
-/include/ "sbc8548-post.dtsi"
diff --git a/arch/powerpc/boot/dts/wii.dts b/arch/powerpc/boot/dts/wii.dts
index aaa381da1906..e9c945b123c6 100644
--- a/arch/powerpc/boot/dts/wii.dts
+++ b/arch/powerpc/boot/dts/wii.dts
@@ -216,7 +216,18 @@
 
 		control@d800100 {
 			compatible = "nintendo,hollywood-control";
-			reg = <0x0d800100 0x300>;
+			/*
+			 * Both the address and length are wrong, according to
+			 * Wiibrew this should be <0x0d800000 0x400>, but it
+			 * requires refactoring the PIC1, GPIO and OTP nodes
+			 * before changing that.
+			 */
+			reg = <0x0d800100 0xa0>;
+		};
+
+		otp@d8001ec {
+			compatible = "nintendo,hollywood-otp";
+			reg = <0x0d8001ec 0x8>;
 		};
 
 		disk@d806000 {
diff --git a/arch/powerpc/boot/install.sh b/arch/powerpc/boot/install.sh
index b6a256bc96ee..14473150ddb4 100644
--- a/arch/powerpc/boot/install.sh
+++ b/arch/powerpc/boot/install.sh
@@ -15,12 +15,25 @@
 #   $2 - kernel image file
 #   $3 - kernel map file
 #   $4 - default install path (blank if root directory)
-#   $5 and more - kernel boot files; zImage*, uImage, cuImage.*, etc.
 #
 
 # Bail with error code if anything goes wrong
 set -e
 
+verify () {
+	if [ ! -f "$1" ]; then
+		echo ""                                                   1>&2
+		echo " *** Missing file: $1"                              1>&2
+		echo ' *** You need to run "make" before "make install".' 1>&2
+		echo ""                                                   1>&2
+		exit 1
+	fi
+}
+
+# Make sure the files actually exist
+verify "$2"
+verify "$3"
+
 # User may have a custom install script
 
 if [ -x ~/bin/${INSTALLKERNEL} ]; then exec ~/bin/${INSTALLKERNEL} "$@"; fi
@@ -41,15 +54,3 @@ fi
 
 cat $2 > $4/$image_name
 cp $3 $4/System.map
-
-# Copy all the bootable image files
-path=$4
-shift 4
-while [ $# -ne 0 ]; do
-	image_name=`basename $1`
-	if [ -f $path/$image_name ]; then
-		mv $path/$image_name $path/$image_name.old
-	fi
-	cat $1 > $path/$image_name
-	shift
-done;
diff --git a/arch/powerpc/boot/wrapper b/arch/powerpc/boot/wrapper
index 1f4676bab786..1cd82564c996 100755
--- a/arch/powerpc/boot/wrapper
+++ b/arch/powerpc/boot/wrapper
@@ -298,7 +298,7 @@ cuboot*)
     *-tqm8541|*-mpc8560*|*-tqm8560|*-tqm8555|*-ksi8560*)
         platformo=$object/cuboot-85xx-cpm2.o
         ;;
-    *-mpc85*|*-tqm85*|*-sbc85*)
+    *-mpc85*|*-tqm85*)
         platformo=$object/cuboot-85xx.o
         ;;
     *-amigaone)
diff --git a/arch/powerpc/configs/85xx/sbc8548_defconfig b/arch/powerpc/configs/85xx/sbc8548_defconfig
deleted file mode 100644
index 258881727119..000000000000
--- a/arch/powerpc/configs/85xx/sbc8548_defconfig
+++ /dev/null
@@ -1,50 +0,0 @@
-CONFIG_PPC_85xx=y
-CONFIG_SYSVIPC=y
-CONFIG_LOG_BUF_SHIFT=14
-CONFIG_BLK_DEV_INITRD=y
-CONFIG_EXPERT=y
-CONFIG_SLAB=y
-# CONFIG_BLK_DEV_BSG is not set
-CONFIG_SBC8548=y
-CONFIG_GEN_RTC=y
-CONFIG_BINFMT_MISC=y
-CONFIG_MATH_EMULATION=y
-# CONFIG_SECCOMP is not set
-CONFIG_PCI=y
-CONFIG_NET=y
-CONFIG_PACKET=y
-CONFIG_UNIX=y
-CONFIG_XFRM_USER=y
-CONFIG_INET=y
-CONFIG_IP_MULTICAST=y
-CONFIG_IP_PNP=y
-CONFIG_IP_PNP_DHCP=y
-CONFIG_IP_PNP_BOOTP=y
-CONFIG_SYN_COOKIES=y
-# CONFIG_IPV6 is not set
-# CONFIG_FW_LOADER is not set
-CONFIG_MTD=y
-CONFIG_MTD_BLOCK=y
-CONFIG_MTD_CFI=y
-CONFIG_MTD_CFI_ADV_OPTIONS=y
-CONFIG_MTD_CFI_GEOMETRY=y
-CONFIG_MTD_CFI_I4=y
-CONFIG_MTD_CFI_INTELEXT=y
-CONFIG_MTD_PHYSMAP_OF=y
-CONFIG_BLK_DEV_LOOP=y
-CONFIG_BLK_DEV_RAM=y
-CONFIG_NETDEVICES=y
-CONFIG_GIANFAR=y
-CONFIG_BROADCOM_PHY=y
-# CONFIG_INPUT_KEYBOARD is not set
-# CONFIG_INPUT_MOUSE is not set
-# CONFIG_SERIO is not set
-# CONFIG_VT is not set
-CONFIG_SERIAL_8250=y
-CONFIG_SERIAL_8250_CONSOLE=y
-# CONFIG_HW_RANDOM is not set
-# CONFIG_USB_SUPPORT is not set
-CONFIG_PROC_KCORE=y
-CONFIG_TMPFS=y
-CONFIG_NFS_FS=y
-CONFIG_ROOT_NFS=y
diff --git a/arch/powerpc/configs/microwatt_defconfig b/arch/powerpc/configs/microwatt_defconfig
index a08b739123da..9465209b8c5b 100644
--- a/arch/powerpc/configs/microwatt_defconfig
+++ b/arch/powerpc/configs/microwatt_defconfig
@@ -5,6 +5,7 @@ CONFIG_PREEMPT_VOLUNTARY=y
 CONFIG_TICK_CPU_ACCOUNTING=y
 CONFIG_LOG_BUF_SHIFT=16
 CONFIG_PRINTK_SAFE_LOG_BUF_SHIFT=12
+CONFIG_CGROUPS=y
 CONFIG_BLK_DEV_INITRD=y
 CONFIG_CC_OPTIMIZE_FOR_SIZE=y
 CONFIG_KALLSYMS_ALL=y
@@ -53,10 +54,12 @@ CONFIG_MTD_SPI_NOR=y
 CONFIG_BLK_DEV_LOOP=y
 CONFIG_BLK_DEV_RAM=y
 CONFIG_NETDEVICES=y
+CONFIG_LITEX_LITEETH=y
 # CONFIG_WLAN is not set
 # CONFIG_INPUT is not set
 # CONFIG_SERIO is not set
 # CONFIG_VT is not set
+# CONFIG_LEGACY_PTYS is not set
 CONFIG_SERIAL_8250=y
 # CONFIG_SERIAL_8250_DEPRECATED_OPTIONS is not set
 CONFIG_SERIAL_8250_CONSOLE=y
@@ -76,8 +79,10 @@ CONFIG_SPI_SPIDEV=y
 CONFIG_EXT4_FS=y
 # CONFIG_FILE_LOCKING is not set
 # CONFIG_DNOTIFY is not set
-# CONFIG_INOTIFY_USER is not set
+CONFIG_AUTOFS_FS=y
+CONFIG_TMPFS=y
 # CONFIG_MISC_FILESYSTEMS is not set
+CONFIG_CRYPTO_SHA256=y
 # CONFIG_CRYPTO_HW is not set
 # CONFIG_XZ_DEC_X86 is not set
 # CONFIG_XZ_DEC_IA64 is not set
diff --git a/arch/powerpc/configs/mpc85xx_base.config b/arch/powerpc/configs/mpc85xx_base.config
index b1593fe6f70b..85907b776908 100644
--- a/arch/powerpc/configs/mpc85xx_base.config
+++ b/arch/powerpc/configs/mpc85xx_base.config
@@ -13,7 +13,6 @@ CONFIG_P1022_DS=y
 CONFIG_P1022_RDK=y
 CONFIG_P1023_RDB=y
 CONFIG_TWR_P102x=y
-CONFIG_SBC8548=y
 CONFIG_SOCRATES=y
 CONFIG_STX_GP3=y
 CONFIG_TQM8540=y
diff --git a/arch/powerpc/configs/mpc86xx_base.config b/arch/powerpc/configs/mpc86xx_base.config
index 67bd1fa036ee..588870e6af3b 100644
--- a/arch/powerpc/configs/mpc86xx_base.config
+++ b/arch/powerpc/configs/mpc86xx_base.config
@@ -1,6 +1,5 @@
 CONFIG_PPC_86xx=y
 CONFIG_MPC8641_HPCN=y
-CONFIG_SBC8641D=y
 CONFIG_MPC8610_HPCD=y
 CONFIG_GEF_PPC9A=y
 CONFIG_GEF_SBC310=y
diff --git a/arch/powerpc/configs/mpc885_ads_defconfig b/arch/powerpc/configs/mpc885_ads_defconfig
index d21f266cea9a..c74dc76b1d0d 100644
--- a/arch/powerpc/configs/mpc885_ads_defconfig
+++ b/arch/powerpc/configs/mpc885_ads_defconfig
@@ -1,19 +1,30 @@
-CONFIG_PPC_8xx=y
 # CONFIG_SWAP is not set
 CONFIG_SYSVIPC=y
 CONFIG_NO_HZ=y
 CONFIG_HIGH_RES_TIMERS=y
+CONFIG_BPF_JIT=y
+CONFIG_VIRT_CPU_ACCOUNTING_NATIVE=y
 CONFIG_LOG_BUF_SHIFT=14
 CONFIG_EXPERT=y
 # CONFIG_ELF_CORE is not set
 # CONFIG_BASE_FULL is not set
 # CONFIG_FUTEX is not set
+CONFIG_PERF_EVENTS=y
 # CONFIG_VM_EVENT_COUNTERS is not set
-# CONFIG_BLK_DEV_BSG is not set
-CONFIG_PARTITION_ADVANCED=y
+CONFIG_PPC_8xx=y
+CONFIG_8xx_GPIO=y
+CONFIG_SMC_UCODE_PATCH=y
+CONFIG_PIN_TLB=y
 CONFIG_GEN_RTC=y
 CONFIG_HZ_100=y
+CONFIG_MATH_EMULATION=y
+CONFIG_PPC_16K_PAGES=y
+CONFIG_ADVANCED_OPTIONS=y
 # CONFIG_SECCOMP is not set
+CONFIG_STRICT_KERNEL_RWX=y
+CONFIG_MODULES=y
+# CONFIG_BLK_DEV_BSG is not set
+CONFIG_PARTITION_ADVANCED=y
 CONFIG_NET=y
 CONFIG_PACKET=y
 CONFIG_UNIX=y
@@ -21,7 +32,6 @@ CONFIG_INET=y
 CONFIG_IP_MULTICAST=y
 CONFIG_IP_PNP=y
 CONFIG_SYN_COOKIES=y
-# CONFIG_IPV6 is not set
 # CONFIG_FW_LOADER is not set
 CONFIG_MTD=y
 CONFIG_MTD_BLOCK=y
@@ -34,6 +44,7 @@ CONFIG_MTD_CFI_GEOMETRY=y
 # CONFIG_MTD_CFI_I2 is not set
 CONFIG_MTD_CFI_I4=y
 CONFIG_MTD_CFI_AMDSTD=y
+CONFIG_MTD_PHYSMAP=y
 CONFIG_MTD_PHYSMAP_OF=y
 # CONFIG_BLK_DEV is not set
 CONFIG_NETDEVICES=y
@@ -46,39 +57,25 @@ CONFIG_DAVICOM_PHY=y
 # CONFIG_LEGACY_PTYS is not set
 CONFIG_SERIAL_CPM=y
 CONFIG_SERIAL_CPM_CONSOLE=y
+CONFIG_SPI=y
+CONFIG_SPI_FSL_SPI=y
 # CONFIG_HWMON is not set
+CONFIG_WATCHDOG=y
+CONFIG_8xxx_WDT=y
 # CONFIG_USB_SUPPORT is not set
 # CONFIG_DNOTIFY is not set
 CONFIG_TMPFS=y
 CONFIG_CRAMFS=y
 CONFIG_NFS_FS=y
 CONFIG_ROOT_NFS=y
+CONFIG_CRYPTO=y
+CONFIG_CRYPTO_DEV_TALITOS=y
 CONFIG_CRC32_SLICEBY4=y
 CONFIG_DEBUG_INFO=y
 CONFIG_MAGIC_SYSRQ=y
-CONFIG_DETECT_HUNG_TASK=y
-CONFIG_PPC_16K_PAGES=y
-CONFIG_DEBUG_KERNEL=y
 CONFIG_DEBUG_FS=y
-CONFIG_PPC_PTDUMP=y
-CONFIG_MODULES=y
-CONFIG_SPI=y
-CONFIG_SPI_FSL_SPI=y
-CONFIG_CRYPTO=y
-CONFIG_CRYPTO_DEV_TALITOS=y
-CONFIG_8xx_GPIO=y
-CONFIG_WATCHDOG=y
-CONFIG_8xxx_WDT=y
-CONFIG_SMC_UCODE_PATCH=y
-CONFIG_ADVANCED_OPTIONS=y
-CONFIG_PIN_TLB=y
-CONFIG_PERF_EVENTS=y
-CONFIG_MATH_EMULATION=y
-CONFIG_VIRT_CPU_ACCOUNTING_NATIVE=y
-CONFIG_STRICT_KERNEL_RWX=y
-CONFIG_IPV6=y
-CONFIG_BPF_JIT=y
 CONFIG_DEBUG_VM_PGTABLE=y
+CONFIG_DETECT_HUNG_TASK=y
 CONFIG_BDI_SWITCH=y
 CONFIG_PPC_EARLY_DEBUG=y
-CONFIG_PPC_EARLY_DEBUG_CPM_ADDR=0xff002008
+CONFIG_PPC_PTDUMP=y
diff --git a/arch/powerpc/configs/ppc6xx_defconfig b/arch/powerpc/configs/ppc6xx_defconfig
index ee09f7bb6ea9..6697c5e6682f 100644
--- a/arch/powerpc/configs/ppc6xx_defconfig
+++ b/arch/powerpc/configs/ppc6xx_defconfig
@@ -55,7 +55,6 @@ CONFIG_MPC837x_RDB=y
 CONFIG_ASP834x=y
 CONFIG_PPC_86xx=y
 CONFIG_MPC8641_HPCN=y
-CONFIG_SBC8641D=y
 CONFIG_MPC8610_HPCD=y
 CONFIG_GEF_SBC610=y
 CONFIG_CPU_FREQ=y
diff --git a/arch/powerpc/configs/wii_defconfig b/arch/powerpc/configs/wii_defconfig
index 379c171f3ddd..a0c45bf2bfb1 100644
--- a/arch/powerpc/configs/wii_defconfig
+++ b/arch/powerpc/configs/wii_defconfig
@@ -99,6 +99,7 @@ CONFIG_LEDS_TRIGGER_HEARTBEAT=y
 CONFIG_LEDS_TRIGGER_PANIC=y
 CONFIG_RTC_CLASS=y
 CONFIG_RTC_DRV_GENERIC=y
+CONFIG_NVMEM_NINTENDO_OTP=y
 CONFIG_EXT2_FS=y
 CONFIG_EXT4_FS=y
 CONFIG_FUSE_FS=m
diff --git a/arch/powerpc/include/asm/asm-compat.h b/arch/powerpc/include/asm/asm-compat.h
index 19b70c5b5f18..2b736d9fbb1b 100644
--- a/arch/powerpc/include/asm/asm-compat.h
+++ b/arch/powerpc/include/asm/asm-compat.h
@@ -17,7 +17,7 @@
 #define PPC_LONG	stringify_in_c(.8byte)
 #define PPC_LONG_ALIGN	stringify_in_c(.balign 8)
 #define PPC_TLNEI	stringify_in_c(tdnei)
-#define PPC_LLARX(t, a, b, eh)	PPC_LDARX(t, a, b, eh)
+#define PPC_LLARX	stringify_in_c(ldarx)
 #define PPC_STLCX	stringify_in_c(stdcx.)
 #define PPC_CNTLZL	stringify_in_c(cntlzd)
 #define PPC_MTOCRF(FXM, RS) MTOCRF((FXM), RS)
@@ -50,7 +50,7 @@
 #define PPC_LONG	stringify_in_c(.long)
 #define PPC_LONG_ALIGN	stringify_in_c(.balign 4)
 #define PPC_TLNEI	stringify_in_c(twnei)
-#define PPC_LLARX(t, a, b, eh)	PPC_LWARX(t, a, b, eh)
+#define PPC_LLARX	stringify_in_c(lwarx)
 #define PPC_STLCX	stringify_in_c(stwcx.)
 #define PPC_CNTLZL	stringify_in_c(cntlzw)
 #define PPC_MTOCRF	stringify_in_c(mtcrf)
diff --git a/arch/powerpc/include/asm/atomic.h b/arch/powerpc/include/asm/atomic.h
index a1732a79e92a..6a53ef178bfd 100644
--- a/arch/powerpc/include/asm/atomic.h
+++ b/arch/powerpc/include/asm/atomic.h
@@ -207,7 +207,7 @@ arch_atomic_try_cmpxchg_lock(atomic_t *v, int *old, int new)
 	int r, o = *old;
 
 	__asm__ __volatile__ (
-"1:\t"	PPC_LWARX(%0,0,%2,1) "	# atomic_try_cmpxchg_acquire	\n"
+"1:	lwarx	%0,0,%2,%5	# atomic_try_cmpxchg_acquire		\n"
 "	cmpw	0,%0,%3							\n"
 "	bne-	2f							\n"
 "	stwcx.	%4,0,%2							\n"
@@ -215,7 +215,7 @@ arch_atomic_try_cmpxchg_lock(atomic_t *v, int *old, int new)
 "\t"	PPC_ACQUIRE_BARRIER "						\n"
 "2:									\n"
 	: "=&r" (r), "+m" (v->counter)
-	: "r" (&v->counter), "r" (o), "r" (new)
+	: "r" (&v->counter), "r" (o), "r" (new), "i" (IS_ENABLED(CONFIG_PPC64) ? 1 : 0)
 	: "cr0", "memory");
 
 	if (unlikely(r != o))
diff --git a/arch/powerpc/include/asm/bitops.h b/arch/powerpc/include/asm/bitops.h
index 299ab33505a6..11847b6a244e 100644
--- a/arch/powerpc/include/asm/bitops.h
+++ b/arch/powerpc/include/asm/bitops.h
@@ -70,7 +70,7 @@ static inline void fn(unsigned long mask,	\
 	unsigned long *p = (unsigned long *)_p;	\
 	__asm__ __volatile__ (			\
 	prefix					\
-"1:"	PPC_LLARX(%0,0,%3,0) "\n"		\
+"1:"	PPC_LLARX "%0,0,%3,0\n"			\
 	stringify_in_c(op) "%0,%0,%2\n"		\
 	PPC_STLCX "%0,0,%3\n"			\
 	"bne- 1b\n"				\
@@ -115,13 +115,13 @@ static inline unsigned long fn(			\
 	unsigned long *p = (unsigned long *)_p;		\
 	__asm__ __volatile__ (				\
 	prefix						\
-"1:"	PPC_LLARX(%0,0,%3,eh) "\n"			\
+"1:"	PPC_LLARX "%0,0,%3,%4\n"			\
 	stringify_in_c(op) "%1,%0,%2\n"			\
 	PPC_STLCX "%1,0,%3\n"				\
 	"bne- 1b\n"					\
 	postfix						\
 	: "=&r" (old), "=&r" (t)			\
-	: "r" (mask), "r" (p)				\
+	: "r" (mask), "r" (p), "i" (IS_ENABLED(CONFIG_PPC64) ? eh : 0)	\
 	: "cc", "memory");				\
 	return (old & mask);				\
 }
@@ -170,7 +170,7 @@ clear_bit_unlock_return_word(int nr, volatile unsigned long *addr)
 
 	__asm__ __volatile__ (
 	PPC_RELEASE_BARRIER
-"1:"	PPC_LLARX(%0,0,%3,0) "\n"
+"1:"	PPC_LLARX "%0,0,%3,0\n"
 	"andc %1,%0,%2\n"
 	PPC_STLCX "%1,0,%3\n"
 	"bne- 1b\n"
diff --git a/arch/powerpc/include/asm/book3s/64/kup.h b/arch/powerpc/include/asm/book3s/64/kup.h
index a1cc73a88710..170339969b7c 100644
--- a/arch/powerpc/include/asm/book3s/64/kup.h
+++ b/arch/powerpc/include/asm/book3s/64/kup.h
@@ -90,7 +90,7 @@
 	/* Prevent access to userspace using any key values */
 	LOAD_REG_IMMEDIATE(\gpr2, AMR_KUAP_BLOCKED)
 999:	tdne	\gpr1, \gpr2
-	EMIT_BUG_ENTRY 999b, __FILE__, __LINE__, (BUGFLAG_WARNING | BUGFLAG_ONCE)
+	EMIT_WARN_ENTRY 999b, __FILE__, __LINE__, (BUGFLAG_WARNING | BUGFLAG_ONCE)
 	END_MMU_FTR_SECTION_NESTED_IFSET(MMU_FTR_BOOK3S_KUAP, 67)
 #endif
 .endm
diff --git a/arch/powerpc/include/asm/bug.h b/arch/powerpc/include/asm/bug.h
index 0b2162890d8b..02c08d1492f8 100644
--- a/arch/powerpc/include/asm/bug.h
+++ b/arch/powerpc/include/asm/bug.h
@@ -4,6 +4,7 @@
 #ifdef __KERNEL__
 
 #include <asm/asm-compat.h>
+#include <asm/extable.h>
 
 #ifdef CONFIG_BUG
 
@@ -30,6 +31,11 @@
 .endm
 #endif /* verbose */
 
+.macro EMIT_WARN_ENTRY addr,file,line,flags
+	EX_TABLE(\addr,\addr+4)
+	EMIT_BUG_ENTRY \addr,\file,\line,\flags
+.endm
+
 #else /* !__ASSEMBLY__ */
 /* _EMIT_BUG_ENTRY expects args %0,%1,%2,%3 to be FILE, LINE, flags and
    sizeof(struct bug_entry), respectively */
@@ -58,6 +64,16 @@
 		  "i" (sizeof(struct bug_entry)),	\
 		  ##__VA_ARGS__)
 
+#define WARN_ENTRY(insn, flags, label, ...)		\
+	asm_volatile_goto(				\
+		"1:	" insn "\n"			\
+		EX_TABLE(1b, %l[label])			\
+		_EMIT_BUG_ENTRY				\
+		: : "i" (__FILE__), "i" (__LINE__),	\
+		  "i" (flags),				\
+		  "i" (sizeof(struct bug_entry)),	\
+		  ##__VA_ARGS__ : : label)
+
 /*
  * BUG_ON() and WARN_ON() do their best to cooperate with compile-time
  * optimisations. However depending on the complexity of the condition
@@ -68,7 +84,19 @@
 	BUG_ENTRY("twi 31, 0, 0", 0);				\
 	unreachable();						\
 } while (0)
+#define HAVE_ARCH_BUG
 
+#define __WARN_FLAGS(flags) do {				\
+	__label__ __label_warn_on;				\
+								\
+	WARN_ENTRY("twi 31, 0, 0", BUGFLAG_WARNING | (flags), __label_warn_on); \
+	unreachable();						\
+								\
+__label_warn_on:						\
+	break;							\
+} while (0)
+
+#ifdef CONFIG_PPC64
 #define BUG_ON(x) do {						\
 	if (__builtin_constant_p(x)) {				\
 		if (x)						\
@@ -78,31 +106,43 @@
 	}							\
 } while (0)
 
-#define __WARN_FLAGS(flags) BUG_ENTRY("twi 31, 0, 0", BUGFLAG_WARNING | (flags))
-
 #define WARN_ON(x) ({						\
-	int __ret_warn_on = !!(x);				\
-	if (__builtin_constant_p(__ret_warn_on)) {		\
-		if (__ret_warn_on)				\
+	bool __ret_warn_on = false;				\
+	do {							\
+		if (__builtin_constant_p((x))) {		\
+			if (!(x)) 				\
+				break; 				\
 			__WARN();				\
-	} else {						\
-		BUG_ENTRY(PPC_TLNEI " %4, 0",			\
-			  BUGFLAG_WARNING | BUGFLAG_TAINT(TAINT_WARN),	\
-			  "r" (__ret_warn_on));	\
-	}							\
+			__ret_warn_on = true;			\
+		} else {					\
+			__label__ __label_warn_on;		\
+								\
+			WARN_ENTRY(PPC_TLNEI " %4, 0",		\
+				   BUGFLAG_WARNING | BUGFLAG_TAINT(TAINT_WARN),	\
+				   __label_warn_on,		\
+				   "r" ((__force long)(x)));	\
+			break;					\
+__label_warn_on:						\
+			__ret_warn_on = true;			\
+		}						\
+	} while (0);						\
 	unlikely(__ret_warn_on);				\
 })
 
-#define HAVE_ARCH_BUG
 #define HAVE_ARCH_BUG_ON
 #define HAVE_ARCH_WARN_ON
+#endif
+
 #endif /* __ASSEMBLY __ */
 #else
 #ifdef __ASSEMBLY__
 .macro EMIT_BUG_ENTRY addr,file,line,flags
 .endm
+.macro EMIT_WARN_ENTRY addr,file,line,flags
+.endm
 #else /* !__ASSEMBLY__ */
 #define _EMIT_BUG_ENTRY
+#define _EMIT_WARN_ENTRY
 #endif
 #endif /* CONFIG_BUG */
 
diff --git a/arch/powerpc/include/asm/compat.h b/arch/powerpc/include/asm/compat.h
index e33dcf134cdd..7afc96fb6524 100644
--- a/arch/powerpc/include/asm/compat.h
+++ b/arch/powerpc/include/asm/compat.h
@@ -83,22 +83,6 @@ struct compat_statfs {
 
 #define COMPAT_OFF_T_MAX	0x7fffffff
 
-static inline void __user *arch_compat_alloc_user_space(long len)
-{
-	struct pt_regs *regs = current->thread.regs;
-	unsigned long usp = regs->gpr[1];
-
-	/*
-	 * We can't access below the stack pointer in the 32bit ABI and
-	 * can access 288 bytes in the 64bit big-endian ABI,
-	 * or 512 bytes with the new ELFv2 little-endian ABI.
-	 */
-	if (!is_32bit_task())
-		usp -= USER_REDZONE_SIZE;
-
-	return (void __user *) (usp - len);
-}
-
 /*
  * ipc64_perm is actually 32/64bit clean but since the compat layer refers to
  * it we may as well define it.
diff --git a/arch/powerpc/include/asm/debugfs.h b/arch/powerpc/include/asm/debugfs.h
deleted file mode 100644
index 2c5c48571d75..000000000000
--- a/arch/powerpc/include/asm/debugfs.h
+++ /dev/null
@@ -1,13 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0-or-later */
-#ifndef _ASM_POWERPC_DEBUGFS_H
-#define _ASM_POWERPC_DEBUGFS_H
-
-/*
- * Copyright 2017, Michael Ellerman, IBM Corporation.
- */
-
-#include <linux/debugfs.h>
-
-extern struct dentry *powerpc_debugfs_root;
-
-#endif /* _ASM_POWERPC_DEBUGFS_H */
diff --git a/arch/powerpc/include/asm/drmem.h b/arch/powerpc/include/asm/drmem.h
index bf2402fed3e0..4265d5e95c2c 100644
--- a/arch/powerpc/include/asm/drmem.h
+++ b/arch/powerpc/include/asm/drmem.h
@@ -111,6 +111,7 @@ int drmem_update_dt(void);
 int __init
 walk_drmem_lmbs_early(unsigned long node, void *data,
 		      int (*func)(struct drmem_lmb *, const __be32 **, void *));
+void drmem_update_lmbs(struct property *prop);
 #endif
 
 static inline void invalidate_lmb_associativity_index(struct drmem_lmb *lmb)
diff --git a/arch/powerpc/include/asm/extable.h b/arch/powerpc/include/asm/extable.h
index eb91b2d2935a..26ce2e5c0fa8 100644
--- a/arch/powerpc/include/asm/extable.h
+++ b/arch/powerpc/include/asm/extable.h
@@ -17,6 +17,8 @@
 
 #define ARCH_HAS_RELATIVE_EXTABLE
 
+#ifndef __ASSEMBLY__
+
 struct exception_table_entry {
 	int insn;
 	int fixup;
@@ -28,3 +30,15 @@ static inline unsigned long extable_fixup(const struct exception_table_entry *x)
 }
 
 #endif
+
+/*
+ * Helper macro for exception table entries
+ */
+#define EX_TABLE(_fault, _target)		\
+	stringify_in_c(.section __ex_table,"a";)\
+	stringify_in_c(.balign 4;)		\
+	stringify_in_c(.long (_fault) - . ;)	\
+	stringify_in_c(.long (_target) - . ;)	\
+	stringify_in_c(.previous)
+
+#endif
diff --git a/arch/powerpc/include/asm/firmware.h b/arch/powerpc/include/asm/firmware.h
index 7604673787d6..97a3bd9ffeb9 100644
--- a/arch/powerpc/include/asm/firmware.h
+++ b/arch/powerpc/include/asm/firmware.h
@@ -44,7 +44,7 @@
 #define FW_FEATURE_OPAL		ASM_CONST(0x0000000010000000)
 #define FW_FEATURE_SET_MODE	ASM_CONST(0x0000000040000000)
 #define FW_FEATURE_BEST_ENERGY	ASM_CONST(0x0000000080000000)
-#define FW_FEATURE_TYPE1_AFFINITY ASM_CONST(0x0000000100000000)
+#define FW_FEATURE_FORM1_AFFINITY ASM_CONST(0x0000000100000000)
 #define FW_FEATURE_PRRN		ASM_CONST(0x0000000200000000)
 #define FW_FEATURE_DRMEM_V2	ASM_CONST(0x0000000400000000)
 #define FW_FEATURE_DRC_INFO	ASM_CONST(0x0000000800000000)
@@ -53,6 +53,7 @@
 #define FW_FEATURE_ULTRAVISOR	ASM_CONST(0x0000004000000000)
 #define FW_FEATURE_STUFF_TCE	ASM_CONST(0x0000008000000000)
 #define FW_FEATURE_RPT_INVALIDATE ASM_CONST(0x0000010000000000)
+#define FW_FEATURE_FORM2_AFFINITY ASM_CONST(0x0000020000000000)
 
 #ifndef __ASSEMBLY__
 
@@ -69,11 +70,11 @@ enum {
 		FW_FEATURE_SPLPAR | FW_FEATURE_LPAR |
 		FW_FEATURE_CMO | FW_FEATURE_VPHN | FW_FEATURE_XCMO |
 		FW_FEATURE_SET_MODE | FW_FEATURE_BEST_ENERGY |
-		FW_FEATURE_TYPE1_AFFINITY | FW_FEATURE_PRRN |
+		FW_FEATURE_FORM1_AFFINITY | FW_FEATURE_PRRN |
 		FW_FEATURE_HPT_RESIZE | FW_FEATURE_DRMEM_V2 |
 		FW_FEATURE_DRC_INFO | FW_FEATURE_BLOCK_REMOVE |
 		FW_FEATURE_PAPR_SCM | FW_FEATURE_ULTRAVISOR |
-		FW_FEATURE_RPT_INVALIDATE,
+		FW_FEATURE_RPT_INVALIDATE | FW_FEATURE_FORM2_AFFINITY,
 	FW_FEATURE_PSERIES_ALWAYS = 0,
 	FW_FEATURE_POWERNV_POSSIBLE = FW_FEATURE_OPAL | FW_FEATURE_ULTRAVISOR,
 	FW_FEATURE_POWERNV_ALWAYS = 0,
diff --git a/arch/powerpc/include/asm/iommu.h b/arch/powerpc/include/asm/iommu.h
index deef7c94d7b6..bf3b84128525 100644
--- a/arch/powerpc/include/asm/iommu.h
+++ b/arch/powerpc/include/asm/iommu.h
@@ -154,6 +154,7 @@ extern int iommu_tce_table_put(struct iommu_table *tbl);
  */
 extern struct iommu_table *iommu_init_table(struct iommu_table *tbl,
 		int nid, unsigned long res_start, unsigned long res_end);
+bool iommu_table_in_use(struct iommu_table *tbl);
 
 #define IOMMU_TABLE_GROUP_MAX_TABLES	2
 
diff --git a/arch/powerpc/include/asm/kvm_book3s_64.h b/arch/powerpc/include/asm/kvm_book3s_64.h
index eaf3a562bf1e..19b6942c6969 100644
--- a/arch/powerpc/include/asm/kvm_book3s_64.h
+++ b/arch/powerpc/include/asm/kvm_book3s_64.h
@@ -39,6 +39,7 @@ struct kvm_nested_guest {
 	pgd_t *shadow_pgtable;		/* our page table for this guest */
 	u64 l1_gr_to_hr;		/* L1's addr of part'n-scoped table */
 	u64 process_table;		/* process table entry for this guest */
+	u64 hfscr;			/* HFSCR that the L1 requested for this nested guest */
 	long refcnt;			/* number of pointers to this struct */
 	struct mutex tlb_lock;		/* serialize page faults and tlbies */
 	struct kvm_nested_guest *next;
diff --git a/arch/powerpc/include/asm/kvm_host.h b/arch/powerpc/include/asm/kvm_host.h
index 9f52f282b1aa..080a7feb7731 100644
--- a/arch/powerpc/include/asm/kvm_host.h
+++ b/arch/powerpc/include/asm/kvm_host.h
@@ -103,7 +103,6 @@ struct kvm_vcpu_stat {
 	u64 emulated_inst_exits;
 	u64 dec_exits;
 	u64 ext_intr_exits;
-	u64 halt_wait_ns;
 	u64 halt_successful_wait;
 	u64 dbell_exits;
 	u64 gdbell_exits;
@@ -811,6 +810,8 @@ struct kvm_vcpu_arch {
 
 	u32 online;
 
+	u64 hfscr_permitted;	/* A mask of permitted HFSCR facilities */
+
 	/* For support of nested guests */
 	struct kvm_nested_guest *nested;
 	u32 nested_vcpu_id;
diff --git a/arch/powerpc/include/asm/kvm_ppc.h b/arch/powerpc/include/asm/kvm_ppc.h
index 2d88944f9f34..671fbd1a765e 100644
--- a/arch/powerpc/include/asm/kvm_ppc.h
+++ b/arch/powerpc/include/asm/kvm_ppc.h
@@ -664,9 +664,9 @@ extern int kvmppc_xive_connect_vcpu(struct kvm_device *dev,
 				    struct kvm_vcpu *vcpu, u32 cpu);
 extern void kvmppc_xive_cleanup_vcpu(struct kvm_vcpu *vcpu);
 extern int kvmppc_xive_set_mapped(struct kvm *kvm, unsigned long guest_irq,
-				  struct irq_desc *host_desc);
+				  unsigned long host_irq);
 extern int kvmppc_xive_clr_mapped(struct kvm *kvm, unsigned long guest_irq,
-				  struct irq_desc *host_desc);
+				  unsigned long host_irq);
 extern u64 kvmppc_xive_get_icp(struct kvm_vcpu *vcpu);
 extern int kvmppc_xive_set_icp(struct kvm_vcpu *vcpu, u64 icpval);
 
diff --git a/arch/powerpc/include/asm/membarrier.h b/arch/powerpc/include/asm/membarrier.h
index 6e20bb5c74ea..de7f79157918 100644
--- a/arch/powerpc/include/asm/membarrier.h
+++ b/arch/powerpc/include/asm/membarrier.h
@@ -12,7 +12,8 @@ static inline void membarrier_arch_switch_mm(struct mm_struct *prev,
 	 * when switching from userspace to kernel is not needed after
 	 * store to rq->curr.
 	 */
-	if (likely(!(atomic_read(&next->membarrier_state) &
+	if (IS_ENABLED(CONFIG_SMP) &&
+	    likely(!(atomic_read(&next->membarrier_state) &
 		     (MEMBARRIER_STATE_PRIVATE_EXPEDITED |
 		      MEMBARRIER_STATE_GLOBAL_EXPEDITED)) || !prev))
 		return;
diff --git a/arch/powerpc/include/asm/mmu.h b/arch/powerpc/include/asm/mmu.h
index 27016b98ecb2..8abe8e42e045 100644
--- a/arch/powerpc/include/asm/mmu.h
+++ b/arch/powerpc/include/asm/mmu.h
@@ -324,7 +324,7 @@ static inline void assert_pte_locked(struct mm_struct *mm, unsigned long addr)
 }
 #endif /* !CONFIG_DEBUG_VM */
 
-static inline bool radix_enabled(void)
+static __always_inline bool radix_enabled(void)
 {
 	return mmu_has_feature(MMU_FTR_TYPE_RADIX);
 }
diff --git a/arch/powerpc/include/asm/pci-bridge.h b/arch/powerpc/include/asm/pci-bridge.h
index 74424c14515c..90f488fa4c17 100644
--- a/arch/powerpc/include/asm/pci-bridge.h
+++ b/arch/powerpc/include/asm/pci-bridge.h
@@ -126,6 +126,11 @@ struct pci_controller {
 #endif	/* CONFIG_PPC64 */
 
 	void *private_data;
+
+	/* IRQ domain hierarchy */
+	struct irq_domain	*dev_domain;
+	struct irq_domain	*msi_domain;
+	struct fwnode_handle	*fwnode;
 };
 
 /* These are used for config access before all the PCI probing
diff --git a/arch/powerpc/include/asm/pmc.h b/arch/powerpc/include/asm/pmc.h
index c6bbe9778d3c..3c09109e708e 100644
--- a/arch/powerpc/include/asm/pmc.h
+++ b/arch/powerpc/include/asm/pmc.h
@@ -34,6 +34,13 @@ static inline void ppc_set_pmu_inuse(int inuse)
 #endif
 }
 
+#ifdef CONFIG_KVM_BOOK3S_HV_POSSIBLE
+static inline int ppc_get_pmu_inuse(void)
+{
+	return get_paca()->pmcregs_in_use;
+}
+#endif
+
 extern void power4_enable_pmcs(void);
 
 #else /* CONFIG_PPC64 */
diff --git a/arch/powerpc/include/asm/pnv-pci.h b/arch/powerpc/include/asm/pnv-pci.h
index d0ee0ede5767..b3f480799352 100644
--- a/arch/powerpc/include/asm/pnv-pci.h
+++ b/arch/powerpc/include/asm/pnv-pci.h
@@ -33,7 +33,7 @@ int pnv_cxl_alloc_hwirqs(struct pci_dev *dev, int num);
 void pnv_cxl_release_hwirqs(struct pci_dev *dev, int hwirq, int num);
 int pnv_cxl_get_irq_count(struct pci_dev *dev);
 struct device_node *pnv_pci_get_phb_node(struct pci_dev *dev);
-int64_t pnv_opal_pci_msi_eoi(struct irq_chip *chip, unsigned int hw_irq);
+int64_t pnv_opal_pci_msi_eoi(struct irq_data *d);
 bool is_pnv_opal_msi(struct irq_chip *chip);
 
 #ifdef CONFIG_CXL_BASE
diff --git a/arch/powerpc/include/asm/ppc-opcode.h b/arch/powerpc/include/asm/ppc-opcode.h
index bede76dd3db7..baea657bc868 100644
--- a/arch/powerpc/include/asm/ppc-opcode.h
+++ b/arch/powerpc/include/asm/ppc-opcode.h
@@ -576,8 +576,6 @@
 #define	PPC_DIVDE(t, a, b)	stringify_in_c(.long PPC_RAW_DIVDE(t, a, b))
 #define	PPC_DIVDEU(t, a, b)	stringify_in_c(.long PPC_RAW_DIVDEU(t, a, b))
 #define PPC_LQARX(t, a, b, eh)	stringify_in_c(.long PPC_RAW_LQARX(t, a, b, eh))
-#define PPC_LDARX(t, a, b, eh)	stringify_in_c(.long PPC_RAW_LDARX(t, a, b, eh))
-#define PPC_LWARX(t, a, b, eh)	stringify_in_c(.long PPC_RAW_LWARX(t, a, b, eh))
 #define PPC_STQCX(t, a, b)	stringify_in_c(.long PPC_RAW_STQCX(t, a, b))
 #define PPC_MADDHD(t, a, b, c)	stringify_in_c(.long PPC_RAW_MADDHD(t, a, b, c))
 #define PPC_MADDHDU(t, a, b, c)	stringify_in_c(.long PPC_RAW_MADDHDU(t, a, b, c))
diff --git a/arch/powerpc/include/asm/ppc_asm.h b/arch/powerpc/include/asm/ppc_asm.h
index 116c1519728a..1c538a9a11e0 100644
--- a/arch/powerpc/include/asm/ppc_asm.h
+++ b/arch/powerpc/include/asm/ppc_asm.h
@@ -10,6 +10,7 @@
 #include <asm/ppc-opcode.h>
 #include <asm/firmware.h>
 #include <asm/feature-fixups.h>
+#include <asm/extable.h>
 
 #ifdef __ASSEMBLY__
 
@@ -259,7 +260,7 @@ n:
 
 /* Be careful, this will clobber the lr register. */
 #define LOAD_REG_ADDR_PIC(reg, name)		\
-	bl	0f;				\
+	bcl	20,31,$+4;			\
 0:	mflr	reg;				\
 	addis	reg,reg,(name - 0b)@ha;		\
 	addi	reg,reg,(name - 0b)@l;
@@ -752,16 +753,6 @@ END_FTR_SECTION_NESTED(CPU_FTR_CELL_TB_BUG, CPU_FTR_CELL_TB_BUG, 96)
 
 #endif /*  __ASSEMBLY__ */
 
-/*
- * Helper macro for exception table entries
- */
-#define EX_TABLE(_fault, _target)		\
-	stringify_in_c(.section __ex_table,"a";)\
-	stringify_in_c(.balign 4;)		\
-	stringify_in_c(.long (_fault) - . ;)	\
-	stringify_in_c(.long (_target) - . ;)	\
-	stringify_in_c(.previous)
-
 #define SOFT_MASK_TABLE(_start, _end)		\
 	stringify_in_c(.section __soft_mask_table,"a";)\
 	stringify_in_c(.balign 8;)		\
diff --git a/arch/powerpc/include/asm/prom.h b/arch/powerpc/include/asm/prom.h
index 324a13351749..5c80152e8f18 100644
--- a/arch/powerpc/include/asm/prom.h
+++ b/arch/powerpc/include/asm/prom.h
@@ -147,8 +147,9 @@ extern int of_read_drc_info_cell(struct property **prop,
 #define OV5_MSI			0x0201	/* PCIe/MSI support */
 #define OV5_CMO			0x0480	/* Cooperative Memory Overcommitment */
 #define OV5_XCMO		0x0440	/* Page Coalescing */
-#define OV5_TYPE1_AFFINITY	0x0580	/* Type 1 NUMA affinity */
+#define OV5_FORM1_AFFINITY	0x0580	/* FORM1 NUMA affinity */
 #define OV5_PRRN		0x0540	/* Platform Resource Reassignment */
+#define OV5_FORM2_AFFINITY	0x0520	/* Form2 NUMA affinity */
 #define OV5_HP_EVT		0x0604	/* Hot Plug Event support */
 #define OV5_RESIZE_HPT		0x0601	/* Hash Page Table resizing */
 #define OV5_PFO_HW_RNG		0x1180	/* PFO Random Number Generator */
diff --git a/arch/powerpc/include/asm/ptrace.h b/arch/powerpc/include/asm/ptrace.h
index 14422e851494..6e560f035614 100644
--- a/arch/powerpc/include/asm/ptrace.h
+++ b/arch/powerpc/include/asm/ptrace.h
@@ -22,6 +22,7 @@
 #include <linux/err.h>
 #include <uapi/asm/ptrace.h>
 #include <asm/asm-const.h>
+#include <asm/reg.h>
 
 #ifndef __ASSEMBLY__
 struct pt_regs
@@ -43,8 +44,14 @@ struct pt_regs
 			unsigned long mq;
 #endif
 			unsigned long trap;
-			unsigned long dar;
-			unsigned long dsisr;
+			union {
+				unsigned long dar;
+				unsigned long dear;
+			};
+			union {
+				unsigned long dsisr;
+				unsigned long esr;
+			};
 			unsigned long result;
 		};
 	};
@@ -197,11 +204,7 @@ static inline unsigned long frame_pointer(struct pt_regs *regs)
 	return 0;
 }
 
-#ifdef __powerpc64__
-#define user_mode(regs) ((((regs)->msr) >> MSR_PR_LG) & 0x1)
-#else
 #define user_mode(regs) (((regs)->msr & MSR_PR) != 0)
-#endif
 
 #define force_successful_syscall_return()   \
 	do { \
@@ -286,6 +289,28 @@ static inline void regs_set_return_value(struct pt_regs *regs, unsigned long rc)
 	regs->gpr[3] = rc;
 }
 
+static inline bool cpu_has_msr_ri(void)
+{
+	return !IS_ENABLED(CONFIG_BOOKE) && !IS_ENABLED(CONFIG_40x);
+}
+
+static inline bool regs_is_unrecoverable(struct pt_regs *regs)
+{
+	return unlikely(cpu_has_msr_ri() && !(regs->msr & MSR_RI));
+}
+
+static inline void regs_set_recoverable(struct pt_regs *regs)
+{
+	if (cpu_has_msr_ri())
+		regs_set_return_msr(regs, regs->msr | MSR_RI);
+}
+
+static inline void regs_set_unrecoverable(struct pt_regs *regs)
+{
+	if (cpu_has_msr_ri())
+		regs_set_return_msr(regs, regs->msr & ~MSR_RI);
+}
+
 #define arch_has_single_step()	(1)
 #define arch_has_block_step()	(true)
 #define ARCH_HAS_USER_SINGLE_STEP_REPORT
diff --git a/arch/powerpc/include/asm/reg.h b/arch/powerpc/include/asm/reg.h
index be85cf156a1f..e9d27265253b 100644
--- a/arch/powerpc/include/asm/reg.h
+++ b/arch/powerpc/include/asm/reg.h
@@ -415,6 +415,7 @@
 #define   FSCR_TAR	__MASK(FSCR_TAR_LG)
 #define   FSCR_EBB	__MASK(FSCR_EBB_LG)
 #define   FSCR_DSCR	__MASK(FSCR_DSCR_LG)
+#define   FSCR_INTR_CAUSE (ASM_CONST(0xFF) << 56)	/* interrupt cause */
 #define SPRN_HFSCR	0xbe	/* HV=1 Facility Status & Control Register */
 #define   HFSCR_PREFIX	__MASK(FSCR_PREFIX_LG)
 #define   HFSCR_MSGP	__MASK(FSCR_MSGP_LG)
@@ -426,7 +427,7 @@
 #define   HFSCR_DSCR	__MASK(FSCR_DSCR_LG)
 #define   HFSCR_VECVSX	__MASK(FSCR_VECVSX_LG)
 #define   HFSCR_FP	__MASK(FSCR_FP_LG)
-#define   HFSCR_INTR_CAUSE (ASM_CONST(0xFF) << 56)	/* interrupt cause */
+#define   HFSCR_INTR_CAUSE FSCR_INTR_CAUSE
 #define SPRN_TAR	0x32f	/* Target Address Register */
 #define SPRN_LPCR	0x13E	/* LPAR Control Register */
 #define   LPCR_VPM0		ASM_CONST(0x8000000000000000)
diff --git a/arch/powerpc/include/asm/sections.h b/arch/powerpc/include/asm/sections.h
index 324d7b298ec3..6e4af4492a14 100644
--- a/arch/powerpc/include/asm/sections.h
+++ b/arch/powerpc/include/asm/sections.h
@@ -38,14 +38,6 @@ extern char start_virt_trampolines[];
 extern char end_virt_trampolines[];
 #endif
 
-static inline int in_kernel_text(unsigned long addr)
-{
-	if (addr >= (unsigned long)_stext && addr < (unsigned long)__init_end)
-		return 1;
-
-	return 0;
-}
-
 static inline unsigned long kernel_toc_addr(void)
 {
 	/* Defined by the linker, see vmlinux.lds.S */
diff --git a/arch/powerpc/include/asm/simple_spinlock.h b/arch/powerpc/include/asm/simple_spinlock.h
index 552f325412cc..8985791a2ba5 100644
--- a/arch/powerpc/include/asm/simple_spinlock.h
+++ b/arch/powerpc/include/asm/simple_spinlock.h
@@ -51,7 +51,7 @@ static inline unsigned long __arch_spin_trylock(arch_spinlock_t *lock)
 
 	token = LOCK_TOKEN;
 	__asm__ __volatile__(
-"1:	" PPC_LWARX(%0,0,%2,1) "\n\
+"1:	lwarx		%0,0,%2,1\n\
 	cmpwi		0,%0,0\n\
 	bne-		2f\n\
 	stwcx.		%1,0,%2\n\
@@ -179,7 +179,7 @@ static inline long __arch_read_trylock(arch_rwlock_t *rw)
 	long tmp;
 
 	__asm__ __volatile__(
-"1:	" PPC_LWARX(%0,0,%1,1) "\n"
+"1:	lwarx		%0,0,%1,1\n"
 	__DO_SIGN_EXTEND
 "	addic.		%0,%0,1\n\
 	ble-		2f\n"
@@ -203,7 +203,7 @@ static inline long __arch_write_trylock(arch_rwlock_t *rw)
 
 	token = WRLOCK_TOKEN;
 	__asm__ __volatile__(
-"1:	" PPC_LWARX(%0,0,%2,1) "\n\
+"1:	lwarx		%0,0,%2,1\n\
 	cmpwi		0,%0,0\n\
 	bne-		2f\n"
 "	stwcx.		%1,0,%2\n\
diff --git a/arch/powerpc/include/asm/smp.h b/arch/powerpc/include/asm/smp.h
index 03b3d010cbab..7ef1cd8168a0 100644
--- a/arch/powerpc/include/asm/smp.h
+++ b/arch/powerpc/include/asm/smp.h
@@ -33,6 +33,10 @@ extern bool coregroup_enabled;
 extern int cpu_to_chip_id(int cpu);
 extern int *chip_id_lookup_table;
 
+DECLARE_PER_CPU(cpumask_var_t, thread_group_l1_cache_map);
+DECLARE_PER_CPU(cpumask_var_t, thread_group_l2_cache_map);
+DECLARE_PER_CPU(cpumask_var_t, thread_group_l3_cache_map);
+
 #ifdef CONFIG_SMP
 
 struct smp_ops_t {
@@ -141,6 +145,7 @@ extern int cpu_to_core_id(int cpu);
 
 extern bool has_big_cores;
 extern bool thread_group_shares_l2;
+extern bool thread_group_shares_l3;
 
 #define cpu_smt_mask cpu_smt_mask
 #ifdef CONFIG_SCHED_SMT
@@ -195,6 +200,7 @@ extern void __cpu_die(unsigned int cpu);
 #define hard_smp_processor_id()		get_hard_smp_processor_id(0)
 #define smp_setup_cpu_maps()
 #define thread_group_shares_l2  0
+#define thread_group_shares_l3	0
 static inline void inhibit_secondary_onlining(void) {}
 static inline void uninhibit_secondary_onlining(void) {}
 static inline const struct cpumask *cpu_sibling_mask(int cpu)
diff --git a/arch/powerpc/include/asm/syscall.h b/arch/powerpc/include/asm/syscall.h
index ba0f88f3a30d..c60ebd04b2ed 100644
--- a/arch/powerpc/include/asm/syscall.h
+++ b/arch/powerpc/include/asm/syscall.h
@@ -90,10 +90,9 @@ static inline void syscall_get_arguments(struct task_struct *task,
 	unsigned long val, mask = -1UL;
 	unsigned int n = 6;
 
-#ifdef CONFIG_COMPAT
-	if (test_tsk_thread_flag(task, TIF_32BIT))
+	if (is_32bit_task())
 		mask = 0xffffffff;
-#endif
+
 	while (n--) {
 		if (n == 0)
 			val = regs->orig_gpr3;
@@ -116,16 +115,11 @@ static inline void syscall_set_arguments(struct task_struct *task,
 
 static inline int syscall_get_arch(struct task_struct *task)
 {
-	int arch;
-
-	if (IS_ENABLED(CONFIG_PPC64) && !test_tsk_thread_flag(task, TIF_32BIT))
-		arch = AUDIT_ARCH_PPC64;
+	if (is_32bit_task())
+		return AUDIT_ARCH_PPC;
+	else if (IS_ENABLED(CONFIG_CPU_LITTLE_ENDIAN))
+		return AUDIT_ARCH_PPC64LE;
 	else
-		arch = AUDIT_ARCH_PPC;
-
-#ifdef __LITTLE_ENDIAN__
-	arch |= __AUDIT_ARCH_LE;
-#endif
-	return arch;
+		return AUDIT_ARCH_PPC64;
 }
 #endif	/* _ASM_SYSCALL_H */
diff --git a/arch/powerpc/include/asm/syscalls.h b/arch/powerpc/include/asm/syscalls.h
index 398171fdcd9f..7ee66ae5444d 100644
--- a/arch/powerpc/include/asm/syscalls.h
+++ b/arch/powerpc/include/asm/syscalls.h
@@ -6,6 +6,7 @@
 #include <linux/compiler.h>
 #include <linux/linkage.h>
 #include <linux/types.h>
+#include <linux/compat.h>
 
 struct rtas_args;
 
@@ -18,5 +19,34 @@ asmlinkage long sys_mmap2(unsigned long addr, size_t len,
 asmlinkage long ppc64_personality(unsigned long personality);
 asmlinkage long sys_rtas(struct rtas_args __user *uargs);
 
+#ifdef CONFIG_COMPAT
+unsigned long compat_sys_mmap2(unsigned long addr, size_t len,
+			       unsigned long prot, unsigned long flags,
+			       unsigned long fd, unsigned long pgoff);
+
+compat_ssize_t compat_sys_pread64(unsigned int fd, char __user *ubuf, compat_size_t count,
+				  u32 reg6, u32 pos1, u32 pos2);
+
+compat_ssize_t compat_sys_pwrite64(unsigned int fd, const char __user *ubuf, compat_size_t count,
+				   u32 reg6, u32 pos1, u32 pos2);
+
+compat_ssize_t compat_sys_readahead(int fd, u32 r4, u32 offset1, u32 offset2, u32 count);
+
+int compat_sys_truncate64(const char __user *path, u32 reg4,
+			  unsigned long len1, unsigned long len2);
+
+long compat_sys_fallocate(int fd, int mode, u32 offset1, u32 offset2, u32 len1, u32 len2);
+
+int compat_sys_ftruncate64(unsigned int fd, u32 reg4, unsigned long len1,
+			   unsigned long len2);
+
+long ppc32_fadvise64(int fd, u32 unused, u32 offset1, u32 offset2,
+		     size_t len, int advice);
+
+long compat_sys_sync_file_range2(int fd, unsigned int flags,
+				 unsigned int offset1, unsigned int offset2,
+				 unsigned int nbytes1, unsigned int nbytes2);
+#endif
+
 #endif /* __KERNEL__ */
 #endif /* __ASM_POWERPC_SYSCALLS_H */
diff --git a/arch/powerpc/include/asm/tce.h b/arch/powerpc/include/asm/tce.h
index db5fc2f2262d..0c34d2756d92 100644
--- a/arch/powerpc/include/asm/tce.h
+++ b/arch/powerpc/include/asm/tce.h
@@ -19,15 +19,7 @@
 #define TCE_VB			0
 #define TCE_PCI			1
 
-/* TCE page size is 4096 bytes (1 << 12) */
-
-#define TCE_SHIFT	12
-#define TCE_PAGE_SIZE	(1 << TCE_SHIFT)
-
 #define TCE_ENTRY_SIZE		8		/* each TCE is 64 bits */
-
-#define TCE_RPN_MASK		0xfffffffffful  /* 40-bit RPN (4K pages) */
-#define TCE_RPN_SHIFT		12
 #define TCE_VALID		0x800		/* TCE valid */
 #define TCE_ALLIO		0x400		/* TCE valid for all lpars */
 #define TCE_PCI_WRITE		0x2		/* write from PCI allowed */
diff --git a/arch/powerpc/include/asm/topology.h b/arch/powerpc/include/asm/topology.h
index e4db64c0e184..36fcafb1fd6d 100644
--- a/arch/powerpc/include/asm/topology.h
+++ b/arch/powerpc/include/asm/topology.h
@@ -36,7 +36,7 @@ static inline int pcibus_to_node(struct pci_bus *bus)
 				 cpu_all_mask :				\
 				 cpumask_of_node(pcibus_to_node(bus)))
 
-extern int cpu_distance(__be32 *cpu1_assoc, __be32 *cpu2_assoc);
+int cpu_relative_distance(__be32 *cpu1_assoc, __be32 *cpu2_assoc);
 extern int __node_distance(int, int);
 #define node_distance(a, b) __node_distance(a, b)
 
@@ -64,6 +64,12 @@ static inline int early_cpu_to_node(int cpu)
 }
 
 int of_drconf_to_nid_single(struct drmem_lmb *lmb);
+void update_numa_distance(struct device_node *node);
+
+extern void map_cpu_to_node(int cpu, int node);
+#ifdef CONFIG_HOTPLUG_CPU
+extern void unmap_cpu_from_node(unsigned long cpu);
+#endif /* CONFIG_HOTPLUG_CPU */
 
 #else
 
@@ -83,7 +89,7 @@ static inline void sysfs_remove_device_from_node(struct device *dev,
 
 static inline void update_numa_cpu_lookup_table(unsigned int cpu, int node) {}
 
-static inline int cpu_distance(__be32 *cpu1_assoc, __be32 *cpu2_assoc)
+static inline int cpu_relative_distance(__be32 *cpu1_assoc, __be32 *cpu2_assoc)
 {
 	return 0;
 }
@@ -93,6 +99,15 @@ static inline int of_drconf_to_nid_single(struct drmem_lmb *lmb)
 	return first_online_node;
 }
 
+static inline void update_numa_distance(struct device_node *node) {}
+
+#ifdef CONFIG_SMP
+static inline void map_cpu_to_node(int cpu, int node) {}
+#ifdef CONFIG_HOTPLUG_CPU
+static inline void unmap_cpu_from_node(unsigned long cpu) {}
+#endif /* CONFIG_HOTPLUG_CPU */
+#endif /* CONFIG_SMP */
+
 #endif /* CONFIG_NUMA */
 
 #if defined(CONFIG_NUMA) && defined(CONFIG_PPC_SPLPAR)
diff --git a/arch/powerpc/include/asm/unistd.h b/arch/powerpc/include/asm/unistd.h
index b541c690a31c..5eb462af6766 100644
--- a/arch/powerpc/include/asm/unistd.h
+++ b/arch/powerpc/include/asm/unistd.h
@@ -9,8 +9,6 @@
 
 #define NR_syscalls	__NR_syscalls
 
-#define __NR__exit __NR_exit
-
 #ifndef __ASSEMBLY__
 
 #include <linux/types.h>
diff --git a/arch/powerpc/include/asm/vdso/processor.h b/arch/powerpc/include/asm/vdso/processor.h
index e072577bc7c0..8d79f994b4aa 100644
--- a/arch/powerpc/include/asm/vdso/processor.h
+++ b/arch/powerpc/include/asm/vdso/processor.h
@@ -5,12 +5,21 @@
 #ifndef __ASSEMBLY__
 
 /* Macros for adjusting thread priority (hardware multi-threading) */
+#ifdef CONFIG_PPC64
 #define HMT_very_low()		asm volatile("or 31, 31, 31	# very low priority")
 #define HMT_low()		asm volatile("or 1, 1, 1	# low priority")
 #define HMT_medium_low()	asm volatile("or 6, 6, 6	# medium low priority")
 #define HMT_medium()		asm volatile("or 2, 2, 2	# medium priority")
 #define HMT_medium_high()	asm volatile("or 5, 5, 5	# medium high priority")
 #define HMT_high()		asm volatile("or 3, 3, 3	# high priority")
+#else
+#define HMT_very_low()
+#define HMT_low()
+#define HMT_medium_low()
+#define HMT_medium()
+#define HMT_medium_high()
+#define HMT_high()
+#endif
 
 #ifdef CONFIG_PPC64
 #define cpu_relax()	do { HMT_low(); HMT_medium(); barrier(); } while (0)
diff --git a/arch/powerpc/include/asm/xics.h b/arch/powerpc/include/asm/xics.h
index d9cf192368ad..0ac9bfddf704 100644
--- a/arch/powerpc/include/asm/xics.h
+++ b/arch/powerpc/include/asm/xics.h
@@ -89,10 +89,11 @@ static inline int ics_opal_init(void) { return -ENODEV; }
 /* ICS instance, hooked up to chip_data of an irq */
 struct ics {
 	struct list_head link;
-	int (*map)(struct ics *ics, unsigned int virq);
+	int (*check)(struct ics *ics, unsigned int hwirq);
 	void (*mask_unknown)(struct ics *ics, unsigned long vec);
 	long (*get_server)(struct ics *ics, unsigned long vec);
 	int (*host_match)(struct ics *ics, struct device_node *node);
+	struct irq_chip *chip;
 	char data[];
 };
 
diff --git a/arch/powerpc/include/asm/xive-regs.h b/arch/powerpc/include/asm/xive-regs.h
index 8b211faa0e42..cf8bb6ac4463 100644
--- a/arch/powerpc/include/asm/xive-regs.h
+++ b/arch/powerpc/include/asm/xive-regs.h
@@ -80,10 +80,13 @@
 #define   TM_QW0W2_VU		PPC_BIT32(0)
 #define   TM_QW0W2_LOGIC_SERV	PPC_BITMASK32(1,31) // XX 2,31 ?
 #define   TM_QW1W2_VO		PPC_BIT32(0)
+#define   TM_QW1W2_HO           PPC_BIT32(1) /* P10 XIVE2 */
 #define   TM_QW1W2_OS_CAM	PPC_BITMASK32(8,31)
 #define   TM_QW2W2_VP		PPC_BIT32(0)
+#define   TM_QW2W2_HP           PPC_BIT32(1) /* P10 XIVE2 */
 #define   TM_QW2W2_POOL_CAM	PPC_BITMASK32(8,31)
 #define   TM_QW3W2_VT		PPC_BIT32(0)
+#define   TM_QW3W2_HT           PPC_BIT32(1) /* P10 XIVE2 */
 #define   TM_QW3W2_LP		PPC_BIT32(6)
 #define   TM_QW3W2_LE		PPC_BIT32(7)
 #define   TM_QW3W2_T		PPC_BIT32(31)
diff --git a/arch/powerpc/include/asm/xive.h b/arch/powerpc/include/asm/xive.h
index aa094a8655b0..92930b0b5d0e 100644
--- a/arch/powerpc/include/asm/xive.h
+++ b/arch/powerpc/include/asm/xive.h
@@ -111,6 +111,7 @@ void xive_native_free_vp_block(u32 vp_base);
 int xive_native_populate_irq_data(u32 hw_irq,
 				  struct xive_irq_data *data);
 void xive_cleanup_irq_data(struct xive_irq_data *xd);
+void xive_irq_free_data(unsigned int virq);
 void xive_native_free_irq(u32 irq);
 int xive_native_configure_irq(u32 hw_irq, u32 target, u8 prio, u32 sw_irq);
 
@@ -125,6 +126,7 @@ int xive_native_enable_vp(u32 vp_id, bool single_escalation);
 int xive_native_disable_vp(u32 vp_id);
 int xive_native_get_vp_info(u32 vp_id, u32 *out_cam_id, u32 *out_chip_id);
 bool xive_native_has_single_escalation(void);
+bool xive_native_has_save_restore(void);
 
 int xive_native_get_queue_info(u32 vp_id, uint32_t prio,
 			       u64 *out_qpage,
diff --git a/arch/powerpc/kernel/Makefile b/arch/powerpc/kernel/Makefile
index f66b63e81c3b..7be36c1e1db6 100644
--- a/arch/powerpc/kernel/Makefile
+++ b/arch/powerpc/kernel/Makefile
@@ -46,7 +46,8 @@ obj-y				:= cputable.o syscalls.o \
 				   prom.o traps.o setup-common.o \
 				   udbg.o misc.o io.o misc_$(BITS).o \
 				   of_platform.o prom_parse.o firmware.o \
-				   hw_breakpoint_constraints.o interrupt.o
+				   hw_breakpoint_constraints.o interrupt.o \
+				   kdebugfs.o
 obj-y				+= ptrace/
 obj-$(CONFIG_PPC64)		+= setup_64.o \
 				   paca.o nvram_64.o note.o
diff --git a/arch/powerpc/kernel/asm-offsets.c b/arch/powerpc/kernel/asm-offsets.c
index 5bee245d832b..e563d3222d69 100644
--- a/arch/powerpc/kernel/asm-offsets.c
+++ b/arch/powerpc/kernel/asm-offsets.c
@@ -286,23 +286,16 @@ int main(void)
 	STACK_PT_REGS_OFFSET(_CCR, ccr);
 	STACK_PT_REGS_OFFSET(_XER, xer);
 	STACK_PT_REGS_OFFSET(_DAR, dar);
+	STACK_PT_REGS_OFFSET(_DEAR, dear);
 	STACK_PT_REGS_OFFSET(_DSISR, dsisr);
+	STACK_PT_REGS_OFFSET(_ESR, esr);
 	STACK_PT_REGS_OFFSET(ORIG_GPR3, orig_gpr3);
 	STACK_PT_REGS_OFFSET(RESULT, result);
 	STACK_PT_REGS_OFFSET(_TRAP, trap);
-#ifndef CONFIG_PPC64
-	/*
-	 * The PowerPC 400-class & Book-E processors have neither the DAR
-	 * nor the DSISR SPRs. Hence, we overload them to hold the similar
-	 * DEAR and ESR SPRs for such processors.  For critical interrupts
-	 * we use them to hold SRR0 and SRR1.
-	 */
-	STACK_PT_REGS_OFFSET(_DEAR, dar);
-	STACK_PT_REGS_OFFSET(_ESR, dsisr);
-#else /* CONFIG_PPC64 */
+#ifdef CONFIG_PPC64
 	STACK_PT_REGS_OFFSET(SOFTE, softe);
 	STACK_PT_REGS_OFFSET(_PPR, ppr);
-#endif /* CONFIG_PPC64 */
+#endif
 
 #ifdef CONFIG_PPC_PKEY
 	STACK_PT_REGS_OFFSET(STACK_REGS_AMR, amr);
diff --git a/arch/powerpc/kernel/cacheinfo.c b/arch/powerpc/kernel/cacheinfo.c
index 6f903e9aa20b..cf1be75b7833 100644
--- a/arch/powerpc/kernel/cacheinfo.c
+++ b/arch/powerpc/kernel/cacheinfo.c
@@ -120,6 +120,7 @@ struct cache {
 	struct cpumask shared_cpu_map; /* online CPUs using this cache */
 	int type;                      /* split cache disambiguation */
 	int level;                     /* level not explicit in device tree */
+	int group_id;                  /* id of the group of threads that share this cache */
 	struct list_head list;         /* global list of cache objects */
 	struct cache *next_local;      /* next cache of >= level */
 };
@@ -142,22 +143,24 @@ static const char *cache_type_string(const struct cache *cache)
 }
 
 static void cache_init(struct cache *cache, int type, int level,
-		       struct device_node *ofnode)
+		       struct device_node *ofnode, int group_id)
 {
 	cache->type = type;
 	cache->level = level;
 	cache->ofnode = of_node_get(ofnode);
+	cache->group_id = group_id;
 	INIT_LIST_HEAD(&cache->list);
 	list_add(&cache->list, &cache_list);
 }
 
-static struct cache *new_cache(int type, int level, struct device_node *ofnode)
+static struct cache *new_cache(int type, int level,
+			       struct device_node *ofnode, int group_id)
 {
 	struct cache *cache;
 
 	cache = kzalloc(sizeof(*cache), GFP_KERNEL);
 	if (cache)
-		cache_init(cache, type, level, ofnode);
+		cache_init(cache, type, level, ofnode, group_id);
 
 	return cache;
 }
@@ -309,20 +312,24 @@ static struct cache *cache_find_first_sibling(struct cache *cache)
 		return cache;
 
 	list_for_each_entry(iter, &cache_list, list)
-		if (iter->ofnode == cache->ofnode && iter->next_local == cache)
+		if (iter->ofnode == cache->ofnode &&
+		    iter->group_id == cache->group_id &&
+		    iter->next_local == cache)
 			return iter;
 
 	return cache;
 }
 
-/* return the first cache on a local list matching node */
-static struct cache *cache_lookup_by_node(const struct device_node *node)
+/* return the first cache on a local list matching node and thread-group id */
+static struct cache *cache_lookup_by_node_group(const struct device_node *node,
+						int group_id)
 {
 	struct cache *cache = NULL;
 	struct cache *iter;
 
 	list_for_each_entry(iter, &cache_list, list) {
-		if (iter->ofnode != node)
+		if (iter->ofnode != node ||
+		    iter->group_id != group_id)
 			continue;
 		cache = cache_find_first_sibling(iter);
 		break;
@@ -352,14 +359,15 @@ static int cache_is_unified_d(const struct device_node *np)
 		CACHE_TYPE_UNIFIED_D : CACHE_TYPE_UNIFIED;
 }
 
-static struct cache *cache_do_one_devnode_unified(struct device_node *node, int level)
+static struct cache *cache_do_one_devnode_unified(struct device_node *node, int group_id,
+						  int level)
 {
 	pr_debug("creating L%d ucache for %pOFP\n", level, node);
 
-	return new_cache(cache_is_unified_d(node), level, node);
+	return new_cache(cache_is_unified_d(node), level, node, group_id);
 }
 
-static struct cache *cache_do_one_devnode_split(struct device_node *node,
+static struct cache *cache_do_one_devnode_split(struct device_node *node, int group_id,
 						int level)
 {
 	struct cache *dcache, *icache;
@@ -367,8 +375,8 @@ static struct cache *cache_do_one_devnode_split(struct device_node *node,
 	pr_debug("creating L%d dcache and icache for %pOFP\n", level,
 		 node);
 
-	dcache = new_cache(CACHE_TYPE_DATA, level, node);
-	icache = new_cache(CACHE_TYPE_INSTRUCTION, level, node);
+	dcache = new_cache(CACHE_TYPE_DATA, level, node, group_id);
+	icache = new_cache(CACHE_TYPE_INSTRUCTION, level, node, group_id);
 
 	if (!dcache || !icache)
 		goto err;
@@ -382,31 +390,32 @@ err:
 	return NULL;
 }
 
-static struct cache *cache_do_one_devnode(struct device_node *node, int level)
+static struct cache *cache_do_one_devnode(struct device_node *node, int group_id, int level)
 {
 	struct cache *cache;
 
 	if (cache_node_is_unified(node))
-		cache = cache_do_one_devnode_unified(node, level);
+		cache = cache_do_one_devnode_unified(node, group_id, level);
 	else
-		cache = cache_do_one_devnode_split(node, level);
+		cache = cache_do_one_devnode_split(node, group_id, level);
 
 	return cache;
 }
 
 static struct cache *cache_lookup_or_instantiate(struct device_node *node,
+						 int group_id,
 						 int level)
 {
 	struct cache *cache;
 
-	cache = cache_lookup_by_node(node);
+	cache = cache_lookup_by_node_group(node, group_id);
 
 	WARN_ONCE(cache && cache->level != level,
 		  "cache level mismatch on lookup (got %d, expected %d)\n",
 		  cache->level, level);
 
 	if (!cache)
-		cache = cache_do_one_devnode(node, level);
+		cache = cache_do_one_devnode(node, group_id, level);
 
 	return cache;
 }
@@ -443,7 +452,30 @@ static void do_subsidiary_caches_debugcheck(struct cache *cache)
 		  of_node_get_device_type(cache->ofnode));
 }
 
-static void do_subsidiary_caches(struct cache *cache)
+/*
+ * If sub-groups of threads in a core containing @cpu_id share the
+ * L@level-cache (information obtained via "ibm,thread-groups"
+ * device-tree property), then we identify the group by the first
+ * thread-sibling in the group. We define this to be the group-id.
+ *
+ * In the absence of any thread-group information for L@level-cache,
+ * this function returns -1.
+ */
+static int get_group_id(unsigned int cpu_id, int level)
+{
+	if (has_big_cores && level == 1)
+		return cpumask_first(per_cpu(thread_group_l1_cache_map,
+					     cpu_id));
+	else if (thread_group_shares_l2 && level == 2)
+		return cpumask_first(per_cpu(thread_group_l2_cache_map,
+					     cpu_id));
+	else if (thread_group_shares_l3 && level == 3)
+		return cpumask_first(per_cpu(thread_group_l3_cache_map,
+					     cpu_id));
+	return -1;
+}
+
+static void do_subsidiary_caches(struct cache *cache, unsigned int cpu_id)
 {
 	struct device_node *subcache_node;
 	int level = cache->level;
@@ -452,9 +484,11 @@ static void do_subsidiary_caches(struct cache *cache)
 
 	while ((subcache_node = of_find_next_cache_node(cache->ofnode))) {
 		struct cache *subcache;
+		int group_id;
 
 		level++;
-		subcache = cache_lookup_or_instantiate(subcache_node, level);
+		group_id = get_group_id(cpu_id, level);
+		subcache = cache_lookup_or_instantiate(subcache_node, group_id, level);
 		of_node_put(subcache_node);
 		if (!subcache)
 			break;
@@ -468,6 +502,7 @@ static struct cache *cache_chain_instantiate(unsigned int cpu_id)
 {
 	struct device_node *cpu_node;
 	struct cache *cpu_cache = NULL;
+	int group_id;
 
 	pr_debug("creating cache object(s) for CPU %i\n", cpu_id);
 
@@ -476,11 +511,13 @@ static struct cache *cache_chain_instantiate(unsigned int cpu_id)
 	if (!cpu_node)
 		goto out;
 
-	cpu_cache = cache_lookup_or_instantiate(cpu_node, 1);
+	group_id = get_group_id(cpu_id, 1);
+
+	cpu_cache = cache_lookup_or_instantiate(cpu_node, group_id, 1);
 	if (!cpu_cache)
 		goto out;
 
-	do_subsidiary_caches(cpu_cache);
+	do_subsidiary_caches(cpu_cache, cpu_id);
 
 	cache_cpu_set(cpu_cache, cpu_id);
 out:
@@ -641,45 +678,6 @@ static ssize_t level_show(struct kobject *k, struct kobj_attribute *attr, char *
 static struct kobj_attribute cache_level_attr =
 	__ATTR(level, 0444, level_show, NULL);
 
-static unsigned int index_dir_to_cpu(struct cache_index_dir *index)
-{
-	struct kobject *index_dir_kobj = &index->kobj;
-	struct kobject *cache_dir_kobj = index_dir_kobj->parent;
-	struct kobject *cpu_dev_kobj = cache_dir_kobj->parent;
-	struct device *dev = kobj_to_dev(cpu_dev_kobj);
-
-	return dev->id;
-}
-
-/*
- * On big-core systems, each core has two groups of CPUs each of which
- * has its own L1-cache. The thread-siblings which share l1-cache with
- * @cpu can be obtained via cpu_smallcore_mask().
- *
- * On some big-core systems, the L2 cache is shared only between some
- * groups of siblings. This is already parsed and encoded in
- * cpu_l2_cache_mask().
- *
- * TODO: cache_lookup_or_instantiate() needs to be made aware of the
- *       "ibm,thread-groups" property so that cache->shared_cpu_map
- *       reflects the correct siblings on platforms that have this
- *       device-tree property. This helper function is only a stop-gap
- *       solution so that we report the correct siblings to the
- *       userspace via sysfs.
- */
-static const struct cpumask *get_shared_cpu_map(struct cache_index_dir *index, struct cache *cache)
-{
-	if (has_big_cores) {
-		int cpu = index_dir_to_cpu(index);
-		if (cache->level == 1)
-			return cpu_smallcore_mask(cpu);
-		if (cache->level == 2 && thread_group_shares_l2)
-			return cpu_l2_cache_mask(cpu);
-	}
-
-	return &cache->shared_cpu_map;
-}
-
 static ssize_t
 show_shared_cpumap(struct kobject *k, struct kobj_attribute *attr, char *buf, bool list)
 {
@@ -690,7 +688,7 @@ show_shared_cpumap(struct kobject *k, struct kobj_attribute *attr, char *buf, bo
 	index = kobj_to_cache_index_dir(k);
 	cache = index->cache;
 
-	mask = get_shared_cpu_map(index, cache);
+	mask = &cache->shared_cpu_map;
 
 	return cpumap_print_to_pagebuf(list, buf, mask);
 }
@@ -848,13 +846,15 @@ static struct cache *cache_lookup_by_cpu(unsigned int cpu_id)
 {
 	struct device_node *cpu_node;
 	struct cache *cache;
+	int group_id;
 
 	cpu_node = of_get_cpu_node(cpu_id, NULL);
 	WARN_ONCE(!cpu_node, "no OF node found for CPU %i\n", cpu_id);
 	if (!cpu_node)
 		return NULL;
 
-	cache = cache_lookup_by_node(cpu_node);
+	group_id = get_group_id(cpu_id, 1);
+	cache = cache_lookup_by_node_group(cpu_node, group_id);
 	of_node_put(cpu_node);
 
 	return cache;
diff --git a/arch/powerpc/kernel/dawr.c b/arch/powerpc/kernel/dawr.c
index cdc2dccb987d..64e423d2fe0f 100644
--- a/arch/powerpc/kernel/dawr.c
+++ b/arch/powerpc/kernel/dawr.c
@@ -9,7 +9,6 @@
 #include <linux/export.h>
 #include <linux/fs.h>
 #include <linux/debugfs.h>
-#include <asm/debugfs.h>
 #include <asm/machdep.h>
 #include <asm/hvcall.h>
 
@@ -101,7 +100,7 @@ static int __init dawr_force_setup(void)
 	if (PVR_VER(mfspr(SPRN_PVR)) == PVR_POWER9) {
 		/* Turn DAWR off by default, but allow admin to turn it on */
 		debugfs_create_file_unsafe("dawr_enable_dangerous", 0600,
-					   powerpc_debugfs_root,
+					   arch_debugfs_dir,
 					   &dawr_force_enable,
 					   &dawr_enable_fops);
 	}
diff --git a/arch/powerpc/kernel/eeh.c b/arch/powerpc/kernel/eeh.c
index 3bbdcc86d01b..e9b597ed423c 100644
--- a/arch/powerpc/kernel/eeh.c
+++ b/arch/powerpc/kernel/eeh.c
@@ -21,9 +21,9 @@
 #include <linux/spinlock.h>
 #include <linux/export.h>
 #include <linux/of.h>
+#include <linux/debugfs.h>
 
 #include <linux/atomic.h>
-#include <asm/debugfs.h>
 #include <asm/eeh.h>
 #include <asm/eeh_event.h>
 #include <asm/io.h>
@@ -1901,24 +1901,24 @@ static int __init eeh_init_proc(void)
 		proc_create_single("powerpc/eeh", 0, NULL, proc_eeh_show);
 #ifdef CONFIG_DEBUG_FS
 		debugfs_create_file_unsafe("eeh_enable", 0600,
-					   powerpc_debugfs_root, NULL,
+					   arch_debugfs_dir, NULL,
 					   &eeh_enable_dbgfs_ops);
 		debugfs_create_u32("eeh_max_freezes", 0600,
-				powerpc_debugfs_root, &eeh_max_freezes);
+				arch_debugfs_dir, &eeh_max_freezes);
 		debugfs_create_bool("eeh_disable_recovery", 0600,
-				powerpc_debugfs_root,
+				arch_debugfs_dir,
 				&eeh_debugfs_no_recover);
 		debugfs_create_file_unsafe("eeh_dev_check", 0600,
-				powerpc_debugfs_root, NULL,
+				arch_debugfs_dir, NULL,
 				&eeh_dev_check_fops);
 		debugfs_create_file_unsafe("eeh_dev_break", 0600,
-				powerpc_debugfs_root, NULL,
+				arch_debugfs_dir, NULL,
 				&eeh_dev_break_fops);
 		debugfs_create_file_unsafe("eeh_force_recover", 0600,
-				powerpc_debugfs_root, NULL,
+				arch_debugfs_dir, NULL,
 				&eeh_force_recover_fops);
 		debugfs_create_file_unsafe("eeh_dev_can_recover", 0600,
-				powerpc_debugfs_root, NULL,
+				arch_debugfs_dir, NULL,
 				&eeh_dev_can_recover_fops);
 		eeh_cache_debugfs_init();
 #endif
diff --git a/arch/powerpc/kernel/eeh_cache.c b/arch/powerpc/kernel/eeh_cache.c
index bf3270426d82..9bdaaf7fddc9 100644
--- a/arch/powerpc/kernel/eeh_cache.c
+++ b/arch/powerpc/kernel/eeh_cache.c
@@ -12,8 +12,8 @@
 #include <linux/slab.h>
 #include <linux/spinlock.h>
 #include <linux/atomic.h>
+#include <linux/debugfs.h>
 #include <asm/pci-bridge.h>
-#include <asm/debugfs.h>
 #include <asm/ppc-pci.h>
 
 
@@ -283,6 +283,6 @@ DEFINE_SHOW_ATTRIBUTE(eeh_addr_cache);
 void eeh_cache_debugfs_init(void)
 {
 	debugfs_create_file_unsafe("eeh_address_cache", 0400,
-			powerpc_debugfs_root, NULL,
+			arch_debugfs_dir, NULL,
 			&eeh_addr_cache_fops);
 }
diff --git a/arch/powerpc/kernel/entry_32.S b/arch/powerpc/kernel/entry_32.S
index 0273a1349006..61fdd53cdd9a 100644
--- a/arch/powerpc/kernel/entry_32.S
+++ b/arch/powerpc/kernel/entry_32.S
@@ -161,10 +161,10 @@ ret_from_fork:
 ret_from_kernel_thread:
 	REST_NVGPRS(r1)
 	bl	schedule_tail
-	mtlr	r14
+	mtctr	r14
 	mr	r3,r15
 	PPC440EP_ERR42
-	blrl
+	bctrl
 	li	r3,0
 	b	ret_from_syscall
 
diff --git a/arch/powerpc/kernel/entry_64.S b/arch/powerpc/kernel/entry_64.S
index 15720f8661a1..70cff7b49e17 100644
--- a/arch/powerpc/kernel/entry_64.S
+++ b/arch/powerpc/kernel/entry_64.S
@@ -309,7 +309,7 @@ _GLOBAL(enter_rtas)
 	 */
 	lbz	r0,PACAIRQSOFTMASK(r13)
 1:	tdeqi	r0,IRQS_ENABLED
-	EMIT_BUG_ENTRY 1b,__FILE__,__LINE__,BUGFLAG_WARNING
+	EMIT_WARN_ENTRY 1b,__FILE__,__LINE__,BUGFLAG_WARNING
 #endif
 
 	/* Hard-disable interrupts */
diff --git a/arch/powerpc/kernel/exceptions-64e.S b/arch/powerpc/kernel/exceptions-64e.S
index 1401787b0b93..711c66b76df1 100644
--- a/arch/powerpc/kernel/exceptions-64e.S
+++ b/arch/powerpc/kernel/exceptions-64e.S
@@ -545,8 +545,8 @@ __end_interrupts:
 				PROLOG_ADDITION_2REGS)
 	mfspr	r14,SPRN_DEAR
 	mfspr	r15,SPRN_ESR
-	std	r14,_DAR(r1)
-	std	r15,_DSISR(r1)
+	std	r14,_DEAR(r1)
+	std	r15,_ESR(r1)
 	ld	r14,PACA_EXGEN+EX_R14(r13)
 	ld	r15,PACA_EXGEN+EX_R15(r13)
 	EXCEPTION_COMMON(0x300)
@@ -558,8 +558,8 @@ __end_interrupts:
 				PROLOG_ADDITION_2REGS)
 	li	r15,0
 	mr	r14,r10
-	std	r14,_DAR(r1)
-	std	r15,_DSISR(r1)
+	std	r14,_DEAR(r1)
+	std	r15,_ESR(r1)
 	ld	r14,PACA_EXGEN+EX_R14(r13)
 	ld	r15,PACA_EXGEN+EX_R15(r13)
 	EXCEPTION_COMMON(0x400)
@@ -575,8 +575,8 @@ __end_interrupts:
 				PROLOG_ADDITION_2REGS)
 	mfspr	r14,SPRN_DEAR
 	mfspr	r15,SPRN_ESR
-	std	r14,_DAR(r1)
-	std	r15,_DSISR(r1)
+	std	r14,_DEAR(r1)
+	std	r15,_ESR(r1)
 	ld	r14,PACA_EXGEN+EX_R14(r13)
 	ld	r15,PACA_EXGEN+EX_R15(r13)
 	EXCEPTION_COMMON(0x600)
@@ -587,7 +587,7 @@ __end_interrupts:
 	NORMAL_EXCEPTION_PROLOG(0x700, BOOKE_INTERRUPT_PROGRAM,
 				PROLOG_ADDITION_1REG)
 	mfspr	r14,SPRN_ESR
-	std	r14,_DSISR(r1)
+	std	r14,_ESR(r1)
 	ld	r14,PACA_EXGEN+EX_R14(r13)
 	EXCEPTION_COMMON(0x700)
 	addi	r3,r1,STACK_FRAME_OVERHEAD
@@ -1057,8 +1057,8 @@ bad_stack_book3e:
 	std	r11,_CCR(r1)
 	mfspr	r10,SPRN_DEAR
 	mfspr	r11,SPRN_ESR
-	std	r10,_DAR(r1)
-	std	r11,_DSISR(r1)
+	std	r10,_DEAR(r1)
+	std	r11,_ESR(r1)
 	std	r0,GPR0(r1);		/* save r0 in stackframe */	    \
 	std	r2,GPR2(r1);		/* save r2 in stackframe */	    \
 	SAVE_4GPRS(3, r1);		/* save r3 - r6 in stackframe */    \
@@ -1127,7 +1127,7 @@ found_iprot:
  * r3 = MAS0_TLBSEL (for the iprot array)
  * r4 = SPRN_TLBnCFG
  */
-	bl	invstr				/* Find our address */
+	bcl	20,31,$+4			/* Find our address */
 invstr:	mflr	r6				/* Make it accessible */
 	mfmsr	r7
 	rlwinm	r5,r7,27,31,31			/* extract MSR[IS] */
@@ -1196,7 +1196,7 @@ skpinv:	addi	r6,r6,1				/* Increment */
 	mfmsr	r6
 	xori	r6,r6,MSR_IS
 	mtspr	SPRN_SRR1,r6
-	bl	1f		/* Find our address */
+	bcl	20,31,$+4	/* Find our address */
 1:	mflr	r6
 	addi	r6,r6,(2f - 1b)
 	mtspr	SPRN_SRR0,r6
@@ -1256,7 +1256,7 @@ skpinv:	addi	r6,r6,1				/* Increment */
  * r4 = MAS0 w/TLBSEL & ESEL for the temp mapping
  */
 	/* Now we branch the new virtual address mapped by this entry */
-	bl	1f		/* Find our address */
+	bcl	20,31,$+4	/* Find our address */
 1:	mflr	r6
 	addi	r6,r6,(2f - 1b)
 	tovirt(r6,r6)
diff --git a/arch/powerpc/kernel/fadump.c b/arch/powerpc/kernel/fadump.c
index b990075285f5..b7ceb041743c 100644
--- a/arch/powerpc/kernel/fadump.c
+++ b/arch/powerpc/kernel/fadump.c
@@ -24,8 +24,8 @@
 #include <linux/slab.h>
 #include <linux/cma.h>
 #include <linux/hugetlb.h>
+#include <linux/debugfs.h>
 
-#include <asm/debugfs.h>
 #include <asm/page.h>
 #include <asm/prom.h>
 #include <asm/fadump.h>
@@ -1557,7 +1557,7 @@ static void fadump_init_files(void)
 		return;
 	}
 
-	debugfs_create_file("fadump_region", 0444, powerpc_debugfs_root, NULL,
+	debugfs_create_file("fadump_region", 0444, arch_debugfs_dir, NULL,
 			    &fadump_region_fops);
 
 	if (fw_dump.dump_active) {
diff --git a/arch/powerpc/kernel/fpu.S b/arch/powerpc/kernel/fpu.S
index 6010adcee16e..ba4afe3b5a9c 100644
--- a/arch/powerpc/kernel/fpu.S
+++ b/arch/powerpc/kernel/fpu.S
@@ -91,8 +91,7 @@ END_FTR_SECTION_IFSET(CPU_FTR_VSX)
 	isync
 	/* enable use of FP after return */
 #ifdef CONFIG_PPC32
-	mfspr	r5,SPRN_SPRG_THREAD	/* current task's THREAD (phys) */
-	tovirt(r5, r5)
+	addi	r5,r2,THREAD
 	lwz	r4,THREAD_FPEXC_MODE(r5)
 	ori	r9,r9,MSR_FP		/* enable FP for current */
 	or	r9,r9,r4
diff --git a/arch/powerpc/kernel/fsl_booke_entry_mapping.S b/arch/powerpc/kernel/fsl_booke_entry_mapping.S
index 8bccce6544b5..dedc17fac8f8 100644
--- a/arch/powerpc/kernel/fsl_booke_entry_mapping.S
+++ b/arch/powerpc/kernel/fsl_booke_entry_mapping.S
@@ -1,7 +1,7 @@
 /* SPDX-License-Identifier: GPL-2.0 */
 
 /* 1. Find the index of the entry we're executing in */
-	bl	invstr				/* Find our address */
+	bcl	20,31,$+4				/* Find our address */
 invstr:	mflr	r6				/* Make it accessible */
 	mfmsr	r7
 	rlwinm	r4,r7,27,31,31			/* extract MSR[IS] */
@@ -85,7 +85,7 @@ skpinv:	addi	r6,r6,1				/* Increment */
 	addi	r6,r6,10
 	slw	r6,r8,r6	/* convert to mask */
 
-	bl	1f		/* Find our address */
+	bcl	20,31,$+4	/* Find our address */
 1:	mflr	r7
 
 	mfspr	r8,SPRN_MAS3
@@ -117,7 +117,7 @@ skpinv:	addi	r6,r6,1				/* Increment */
 
 	xori	r6,r4,1
 	slwi	r6,r6,5		/* setup new context with other address space */
-	bl	1f		/* Find our address */
+	bcl	20,31,$+4	/* Find our address */
 1:	mflr	r9
 	rlwimi	r7,r9,0,20,31
 	addi	r7,r7,(2f - 1b)
@@ -207,7 +207,7 @@ next_tlb_setup:
 
 	lis	r7,MSR_KERNEL@h
 	ori	r7,r7,MSR_KERNEL@l
-	bl	1f			/* Find our address */
+	bcl	20,31,$+4		/* Find our address */
 1:	mflr	r9
 	rlwimi	r6,r9,0,20,31
 	addi	r6,r6,(2f - 1b)
diff --git a/arch/powerpc/kernel/head_44x.S b/arch/powerpc/kernel/head_44x.S
index ddc978a2d381..02d2928d1e01 100644
--- a/arch/powerpc/kernel/head_44x.S
+++ b/arch/powerpc/kernel/head_44x.S
@@ -70,7 +70,7 @@ _ENTRY(_start);
  * address.
  * r21 will be loaded with the physical runtime address of _stext
  */
-	bl	0f				/* Get our runtime address */
+	bcl	20,31,$+4			/* Get our runtime address */
 0:	mflr	r21				/* Make it accessible */
 	addis	r21,r21,(_stext - 0b)@ha
 	addi	r21,r21,(_stext - 0b)@l 	/* Get our current runtime base */
@@ -853,7 +853,7 @@ _GLOBAL(init_cpu_state)
 wmmucr:	mtspr	SPRN_MMUCR,r3			/* Put MMUCR */
 	sync
 
-	bl	invstr				/* Find our address */
+	bcl	20,31,$+4			/* Find our address */
 invstr:	mflr	r5				/* Make it accessible */
 	tlbsx	r23,0,r5			/* Find entry we are in */
 	li	r4,0				/* Start at TLB entry 0 */
@@ -1045,7 +1045,7 @@ head_start_47x:
 	sync
 
 	/* Find the entry we are running from */
-	bl	1f
+	bcl	20,31,$+4
 1:	mflr	r23
 	tlbsx	r23,0,r23
 	tlbre	r24,r23,0
diff --git a/arch/powerpc/kernel/head_64.S b/arch/powerpc/kernel/head_64.S
index 79930b0bc781..f17ae2083733 100644
--- a/arch/powerpc/kernel/head_64.S
+++ b/arch/powerpc/kernel/head_64.S
@@ -712,6 +712,8 @@ _GLOBAL(copy_and_flush)
 	isync
 	blr
 
+_ASM_NOKPROBE_SYMBOL(copy_and_flush); /* Called in real mode */
+
 .align 8
 copy_to_here:
 
diff --git a/arch/powerpc/kernel/head_fsl_booke.S b/arch/powerpc/kernel/head_fsl_booke.S
index 9a2f4265e6d2..0a9a0f301474 100644
--- a/arch/powerpc/kernel/head_fsl_booke.S
+++ b/arch/powerpc/kernel/head_fsl_booke.S
@@ -79,7 +79,7 @@ _ENTRY(_start);
 	mr	r23,r3
 	mr	r25,r4
 
-	bl	0f
+	bcl	20,31,$+4
 0:	mflr	r8
 	addis	r3,r8,(is_second_reloc - 0b)@ha
 	lwz	r19,(is_second_reloc - 0b)@l(r3)
@@ -1132,7 +1132,7 @@ _GLOBAL(switch_to_as1)
 	bne	1b
 
 	/* Get the tlb entry used by the current running code */
-	bl	0f
+	bcl	20,31,$+4
 0:	mflr	r4
 	tlbsx	0,r4
 
@@ -1166,7 +1166,7 @@ _GLOBAL(switch_to_as1)
 _GLOBAL(restore_to_as0)
 	mflr	r0
 
-	bl	0f
+	bcl	20,31,$+4
 0:	mflr	r9
 	addi	r9,r9,1f - 0b
 
diff --git a/arch/powerpc/kernel/hw_breakpoint.c b/arch/powerpc/kernel/hw_breakpoint.c
index 21a638aff72f..91a3be14808b 100644
--- a/arch/powerpc/kernel/hw_breakpoint.c
+++ b/arch/powerpc/kernel/hw_breakpoint.c
@@ -22,7 +22,6 @@
 #include <asm/processor.h>
 #include <asm/sstep.h>
 #include <asm/debug.h>
-#include <asm/debugfs.h>
 #include <asm/hvcall.h>
 #include <asm/inst.h>
 #include <linux/uaccess.h>
diff --git a/arch/powerpc/kernel/interrupt.c b/arch/powerpc/kernel/interrupt.c
index 21bbd615ca41..a73f3f70a657 100644
--- a/arch/powerpc/kernel/interrupt.c
+++ b/arch/powerpc/kernel/interrupt.c
@@ -8,7 +8,6 @@
 #include <asm/asm-prototypes.h>
 #include <asm/kup.h>
 #include <asm/cputime.h>
-#include <asm/interrupt.h>
 #include <asm/hw_irq.h>
 #include <asm/interrupt.h>
 #include <asm/kprobes.h>
@@ -93,8 +92,7 @@ notrace long system_call_exception(long r3, long r4, long r5,
 	CT_WARN_ON(ct_state() == CONTEXT_KERNEL);
 	user_exit_irqoff();
 
-	if (!IS_ENABLED(CONFIG_BOOKE) && !IS_ENABLED(CONFIG_40x))
-		BUG_ON(!(regs->msr & MSR_RI));
+	BUG_ON(regs_is_unrecoverable(regs));
 	BUG_ON(!(regs->msr & MSR_PR));
 	BUG_ON(arch_irq_disabled_regs(regs));
 
@@ -463,9 +461,7 @@ notrace unsigned long interrupt_exit_user_prepare(struct pt_regs *regs)
 {
 	unsigned long ret;
 
-	if (!IS_ENABLED(CONFIG_BOOKE) && !IS_ENABLED(CONFIG_40x))
-		BUG_ON(!(regs->msr & MSR_RI));
-	BUG_ON(!(regs->msr & MSR_PR));
+	BUG_ON(regs_is_unrecoverable(regs));
 	BUG_ON(arch_irq_disabled_regs(regs));
 	CT_WARN_ON(ct_state() == CONTEXT_USER);
 
@@ -496,10 +492,8 @@ notrace unsigned long interrupt_exit_kernel_prepare(struct pt_regs *regs)
 	bool stack_store = current_thread_info()->flags &
 						_TIF_EMULATE_STACK_STORE;
 
-	if (!IS_ENABLED(CONFIG_BOOKE) && !IS_ENABLED(CONFIG_40x) &&
-	    unlikely(!(regs->msr & MSR_RI)))
+	if (regs_is_unrecoverable(regs))
 		unrecoverable_exception(regs);
-	BUG_ON(regs->msr & MSR_PR);
 	/*
 	 * CT_WARN_ON comes here via program_check_exception,
 	 * so avoid recursion.
diff --git a/arch/powerpc/kernel/iommu.c b/arch/powerpc/kernel/iommu.c
index 2af89a5e379f..07093b7cdcb9 100644
--- a/arch/powerpc/kernel/iommu.c
+++ b/arch/powerpc/kernel/iommu.c
@@ -473,7 +473,7 @@ int ppc_iommu_map_sg(struct device *dev, struct iommu_table *tbl,
 	BUG_ON(direction == DMA_NONE);
 
 	if ((nelems == 0) || !tbl)
-		return 0;
+		return -EINVAL;
 
 	outs = s = segstart = &sglist[0];
 	outcount = 1;
@@ -575,7 +575,6 @@ int ppc_iommu_map_sg(struct device *dev, struct iommu_table *tbl,
 	 */
 	if (outcount < incount) {
 		outs = sg_next(outs);
-		outs->dma_address = DMA_MAPPING_ERROR;
 		outs->dma_length = 0;
 	}
 
@@ -593,13 +592,12 @@ int ppc_iommu_map_sg(struct device *dev, struct iommu_table *tbl,
 			npages = iommu_num_pages(s->dma_address, s->dma_length,
 						 IOMMU_PAGE_SIZE(tbl));
 			__iommu_free(tbl, vaddr, npages);
-			s->dma_address = DMA_MAPPING_ERROR;
 			s->dma_length = 0;
 		}
 		if (s == outs)
 			break;
 	}
-	return 0;
+	return -EIO;
 }
 
 
@@ -690,32 +688,24 @@ static void iommu_table_reserve_pages(struct iommu_table *tbl,
 	if (tbl->it_offset == 0)
 		set_bit(0, tbl->it_map);
 
-	tbl->it_reserved_start = res_start;
-	tbl->it_reserved_end = res_end;
-
-	/* Check if res_start..res_end isn't empty and overlaps the table */
-	if (res_start && res_end &&
-			(tbl->it_offset + tbl->it_size < res_start ||
-			 res_end < tbl->it_offset))
-		return;
+	if (res_start < tbl->it_offset)
+		res_start = tbl->it_offset;
 
-	for (i = tbl->it_reserved_start; i < tbl->it_reserved_end; ++i)
-		set_bit(i - tbl->it_offset, tbl->it_map);
-}
+	if (res_end > (tbl->it_offset + tbl->it_size))
+		res_end = tbl->it_offset + tbl->it_size;
 
-static void iommu_table_release_pages(struct iommu_table *tbl)
-{
-	int i;
+	/* Check if res_start..res_end is a valid range in the table */
+	if (res_start >= res_end) {
+		tbl->it_reserved_start = tbl->it_offset;
+		tbl->it_reserved_end = tbl->it_offset;
+		return;
+	}
 
-	/*
-	 * In case we have reserved the first bit, we should not emit
-	 * the warning below.
-	 */
-	if (tbl->it_offset == 0)
-		clear_bit(0, tbl->it_map);
+	tbl->it_reserved_start = res_start;
+	tbl->it_reserved_end = res_end;
 
 	for (i = tbl->it_reserved_start; i < tbl->it_reserved_end; ++i)
-		clear_bit(i - tbl->it_offset, tbl->it_map);
+		set_bit(i - tbl->it_offset, tbl->it_map);
 }
 
 /*
@@ -779,6 +769,22 @@ struct iommu_table *iommu_init_table(struct iommu_table *tbl, int nid,
 	return tbl;
 }
 
+bool iommu_table_in_use(struct iommu_table *tbl)
+{
+	unsigned long start = 0, end;
+
+	/* ignore reserved bit0 */
+	if (tbl->it_offset == 0)
+		start = 1;
+	end = tbl->it_reserved_start - tbl->it_offset;
+	if (find_next_bit(tbl->it_map, end, start) != end)
+		return true;
+
+	start = tbl->it_reserved_end - tbl->it_offset;
+	end = tbl->it_size;
+	return find_next_bit(tbl->it_map, end, start) != end;
+}
+
 static void iommu_table_free(struct kref *kref)
 {
 	struct iommu_table *tbl;
@@ -795,10 +801,8 @@ static void iommu_table_free(struct kref *kref)
 
 	iommu_debugfs_del(tbl);
 
-	iommu_table_release_pages(tbl);
-
 	/* verify that table contains no entries */
-	if (!bitmap_empty(tbl->it_map, tbl->it_size))
+	if (iommu_table_in_use(tbl))
 		pr_warn("%s: Unexpected TCEs\n", __func__);
 
 	/* free bitmap */
@@ -1099,14 +1103,9 @@ int iommu_take_ownership(struct iommu_table *tbl)
 	for (i = 0; i < tbl->nr_pools; i++)
 		spin_lock_nest_lock(&tbl->pools[i].lock, &tbl->large_pool.lock);
 
-	iommu_table_release_pages(tbl);
-
-	if (!bitmap_empty(tbl->it_map, tbl->it_size)) {
+	if (iommu_table_in_use(tbl)) {
 		pr_err("iommu_tce: it_map is not empty");
 		ret = -EBUSY;
-		/* Undo iommu_table_release_pages, i.e. restore bit#0, etc */
-		iommu_table_reserve_pages(tbl, tbl->it_reserved_start,
-				tbl->it_reserved_end);
 	} else {
 		memset(tbl->it_map, 0xff, sz);
 	}
diff --git a/arch/powerpc/kernel/kdebugfs.c b/arch/powerpc/kernel/kdebugfs.c
new file mode 100644
index 000000000000..36d3124d5a8b
--- /dev/null
+++ b/arch/powerpc/kernel/kdebugfs.c
@@ -0,0 +1,14 @@
+// SPDX-License-Identifier: GPL-2.0
+#include <linux/debugfs.h>
+#include <linux/export.h>
+#include <linux/init.h>
+
+struct dentry *arch_debugfs_dir;
+EXPORT_SYMBOL(arch_debugfs_dir);
+
+static int __init arch_kdebugfs_init(void)
+{
+	arch_debugfs_dir = debugfs_create_dir("powerpc", NULL);
+	return 0;
+}
+arch_initcall(arch_kdebugfs_init);
diff --git a/arch/powerpc/kernel/misc.S b/arch/powerpc/kernel/misc.S
index 5be96feccb55..fb7de3543c03 100644
--- a/arch/powerpc/kernel/misc.S
+++ b/arch/powerpc/kernel/misc.S
@@ -29,7 +29,7 @@ _GLOBAL(reloc_offset)
 	li	r3, 0
 _GLOBAL(add_reloc_offset)
 	mflr	r0
-	bl	1f
+	bcl	20,31,$+4
 1:	mflr	r5
 	PPC_LL	r4,(2f-1b)(r5)
 	subf	r5,r4,r5
diff --git a/arch/powerpc/kernel/misc_32.S b/arch/powerpc/kernel/misc_32.S
index 39ab15419592..e5127b19fec2 100644
--- a/arch/powerpc/kernel/misc_32.S
+++ b/arch/powerpc/kernel/misc_32.S
@@ -67,7 +67,7 @@ _GLOBAL(reloc_got2)
 	srwi.	r8,r8,2
 	beqlr
 	mtctr	r8
-	bl	1f
+	bcl	20,31,$+4
 1:	mflr	r0
 	lis	r4,1b@ha
 	addi	r4,r4,1b@l
@@ -237,7 +237,7 @@ _GLOBAL(copy_page)
 	addi	r3,r3,-4
 
 0:	twnei	r5, 0	/* WARN if r3 is not cache aligned */
-	EMIT_BUG_ENTRY 0b,__FILE__,__LINE__, BUGFLAG_WARNING
+	EMIT_WARN_ENTRY 0b,__FILE__,__LINE__, BUGFLAG_WARNING
 
 	addi	r4,r4,-4
 
diff --git a/arch/powerpc/kernel/misc_64.S b/arch/powerpc/kernel/misc_64.S
index 4b761a18a74d..d38a019b38e1 100644
--- a/arch/powerpc/kernel/misc_64.S
+++ b/arch/powerpc/kernel/misc_64.S
@@ -255,7 +255,7 @@ _GLOBAL(scom970_write)
  * Physical (hardware) cpu id should be in r3.
  */
 _GLOBAL(kexec_wait)
-	bl	1f
+	bcl	20,31,$+4
 1:	mflr	r5
 	addi	r5,r5,kexec_flag-1b
 
diff --git a/arch/powerpc/kernel/pci-common.c b/arch/powerpc/kernel/pci-common.c
index 001e90cd8948..c3573430919d 100644
--- a/arch/powerpc/kernel/pci-common.c
+++ b/arch/powerpc/kernel/pci-common.c
@@ -29,6 +29,7 @@
 #include <linux/slab.h>
 #include <linux/vgaarb.h>
 #include <linux/numa.h>
+#include <linux/msi.h>
 
 #include <asm/processor.h>
 #include <asm/io.h>
@@ -1060,11 +1061,16 @@ void pcibios_bus_add_device(struct pci_dev *dev)
 
 int pcibios_add_device(struct pci_dev *dev)
 {
+	struct irq_domain *d;
+
 #ifdef CONFIG_PCI_IOV
 	if (ppc_md.pcibios_fixup_sriov)
 		ppc_md.pcibios_fixup_sriov(dev);
 #endif /* CONFIG_PCI_IOV */
 
+	d = dev_get_msi_domain(&dev->bus->dev);
+	if (d)
+		dev_set_msi_domain(&dev->dev, d);
 	return 0;
 }
 
diff --git a/arch/powerpc/kernel/process.c b/arch/powerpc/kernel/process.c
index 185beb290580..50436b52c213 100644
--- a/arch/powerpc/kernel/process.c
+++ b/arch/powerpc/kernel/process.c
@@ -1499,7 +1499,7 @@ static void __show_regs(struct pt_regs *regs)
 	    trap == INTERRUPT_DATA_STORAGE ||
 	    trap == INTERRUPT_ALIGNMENT) {
 		if (IS_ENABLED(CONFIG_4xx) || IS_ENABLED(CONFIG_BOOKE))
-			pr_cont("DEAR: "REG" ESR: "REG" ", regs->dar, regs->dsisr);
+			pr_cont("DEAR: "REG" ESR: "REG" ", regs->dear, regs->esr);
 		else
 			pr_cont("DAR: "REG" DSISR: %08lx ", regs->dar, regs->dsisr);
 	}
diff --git a/arch/powerpc/kernel/prom.c b/arch/powerpc/kernel/prom.c
index f620e04dc9bf..2e67588f6f6e 100644
--- a/arch/powerpc/kernel/prom.c
+++ b/arch/powerpc/kernel/prom.c
@@ -11,7 +11,6 @@
 
 #undef DEBUG
 
-#include <stdarg.h>
 #include <linux/kernel.h>
 #include <linux/string.h>
 #include <linux/init.h>
@@ -640,7 +639,9 @@ static void __init early_reserve_mem(void)
 	}
 #endif /* CONFIG_BLK_DEV_INITRD */
 
-#ifdef CONFIG_PPC32
+	if (!IS_ENABLED(CONFIG_PPC32))
+		return;
+
 	/* 
 	 * Handle the case where we might be booting from an old kexec
 	 * image that setup the mem_rsvmap as pairs of 32-bit values
@@ -661,7 +662,6 @@ static void __init early_reserve_mem(void)
 		}
 		return;
 	}
-#endif
 }
 
 #ifdef CONFIG_PPC_TRANSACTIONAL_MEM
diff --git a/arch/powerpc/kernel/prom_init.c b/arch/powerpc/kernel/prom_init.c
index a5bf355ce1d6..18b04b08b983 100644
--- a/arch/powerpc/kernel/prom_init.c
+++ b/arch/powerpc/kernel/prom_init.c
@@ -14,7 +14,7 @@
 /* we cannot use FORTIFY as it brings in new symbols */
 #define __NO_FORTIFY
 
-#include <stdarg.h>
+#include <linux/stdarg.h>
 #include <linux/kernel.h>
 #include <linux/string.h>
 #include <linux/init.h>
@@ -1096,7 +1096,8 @@ static const struct ibm_arch_vec ibm_architecture_vec_template __initconst = {
 #else
 		0,
 #endif
-		.associativity = OV5_FEAT(OV5_TYPE1_AFFINITY) | OV5_FEAT(OV5_PRRN),
+		.associativity = OV5_FEAT(OV5_FORM1_AFFINITY) | OV5_FEAT(OV5_PRRN) |
+		OV5_FEAT(OV5_FORM2_AFFINITY),
 		.bin_opts = OV5_FEAT(OV5_RESIZE_HPT) | OV5_FEAT(OV5_HP_EVT),
 		.micro_checkpoint = 0,
 		.reserved0 = 0,
diff --git a/arch/powerpc/kernel/ptrace/ptrace.c b/arch/powerpc/kernel/ptrace/ptrace.c
index 0a0a33eb0d28..7c7093c17c45 100644
--- a/arch/powerpc/kernel/ptrace/ptrace.c
+++ b/arch/powerpc/kernel/ptrace/ptrace.c
@@ -373,8 +373,12 @@ void __init pt_regs_check(void)
 		     offsetof(struct user_pt_regs, trap));
 	BUILD_BUG_ON(offsetof(struct pt_regs, dar) !=
 		     offsetof(struct user_pt_regs, dar));
+	BUILD_BUG_ON(offsetof(struct pt_regs, dear) !=
+		     offsetof(struct user_pt_regs, dar));
 	BUILD_BUG_ON(offsetof(struct pt_regs, dsisr) !=
 		     offsetof(struct user_pt_regs, dsisr));
+	BUILD_BUG_ON(offsetof(struct pt_regs, esr) !=
+		     offsetof(struct user_pt_regs, dsisr));
 	BUILD_BUG_ON(offsetof(struct pt_regs, result) !=
 		     offsetof(struct user_pt_regs, result));
 
diff --git a/arch/powerpc/kernel/reloc_32.S b/arch/powerpc/kernel/reloc_32.S
index 10e96f3e22fe..0508c14b4c28 100644
--- a/arch/powerpc/kernel/reloc_32.S
+++ b/arch/powerpc/kernel/reloc_32.S
@@ -30,7 +30,7 @@ R_PPC_RELATIVE = 22
 _GLOBAL(relocate)
 
 	mflr	r0		/* Save our LR */
-	bl	0f		/* Find our current runtime address */
+	bcl	20,31,$+4	/* Find our current runtime address */
 0:	mflr	r12		/* Make it accessible */
 	mtlr	r0
 
diff --git a/arch/powerpc/kernel/rtas.c b/arch/powerpc/kernel/rtas.c
index 99f2cce635fb..ff80bbad22a5 100644
--- a/arch/powerpc/kernel/rtas.c
+++ b/arch/powerpc/kernel/rtas.c
@@ -7,7 +7,7 @@
  * Copyright (C) 2001 IBM.
  */
 
-#include <stdarg.h>
+#include <linux/stdarg.h>
 #include <linux/kernel.h>
 #include <linux/types.h>
 #include <linux/spinlock.h>
diff --git a/arch/powerpc/kernel/rtasd.c b/arch/powerpc/kernel/rtasd.c
index 8561dfb33f24..32ee17753eb4 100644
--- a/arch/powerpc/kernel/rtasd.c
+++ b/arch/powerpc/kernel/rtasd.c
@@ -429,7 +429,7 @@ static void rtas_event_scan(struct work_struct *w)
 
 	do_event_scan();
 
-	get_online_cpus();
+	cpus_read_lock();
 
 	/* raw_ OK because just using CPU as starting point. */
 	cpu = cpumask_next(raw_smp_processor_id(), cpu_online_mask);
@@ -451,7 +451,7 @@ static void rtas_event_scan(struct work_struct *w)
 	schedule_delayed_work_on(cpu, &event_scan_work,
 		__round_jiffies_relative(event_scan_delay, cpu));
 
-	put_online_cpus();
+	cpus_read_unlock();
 }
 
 #ifdef CONFIG_PPC64
diff --git a/arch/powerpc/kernel/security.c b/arch/powerpc/kernel/security.c
index cc51fa52e783..1a998490fe60 100644
--- a/arch/powerpc/kernel/security.c
+++ b/arch/powerpc/kernel/security.c
@@ -11,10 +11,10 @@
 #include <linux/nospec.h>
 #include <linux/prctl.h>
 #include <linux/seq_buf.h>
+#include <linux/debugfs.h>
 
 #include <asm/asm-prototypes.h>
 #include <asm/code-patching.h>
-#include <asm/debugfs.h>
 #include <asm/security_features.h>
 #include <asm/setup.h>
 #include <asm/inst.h>
@@ -106,7 +106,7 @@ DEFINE_DEBUGFS_ATTRIBUTE(fops_barrier_nospec, barrier_nospec_get,
 static __init int barrier_nospec_debugfs_init(void)
 {
 	debugfs_create_file_unsafe("barrier_nospec", 0600,
-				   powerpc_debugfs_root, NULL,
+				   arch_debugfs_dir, NULL,
 				   &fops_barrier_nospec);
 	return 0;
 }
@@ -114,7 +114,7 @@ device_initcall(barrier_nospec_debugfs_init);
 
 static __init int security_feature_debugfs_init(void)
 {
-	debugfs_create_x64("security_features", 0400, powerpc_debugfs_root,
+	debugfs_create_x64("security_features", 0400, arch_debugfs_dir,
 			   &powerpc_security_features);
 	return 0;
 }
@@ -420,7 +420,7 @@ DEFINE_DEBUGFS_ATTRIBUTE(fops_stf_barrier, stf_barrier_get, stf_barrier_set,
 
 static __init int stf_barrier_debugfs_init(void)
 {
-	debugfs_create_file_unsafe("stf_barrier", 0600, powerpc_debugfs_root,
+	debugfs_create_file_unsafe("stf_barrier", 0600, arch_debugfs_dir,
 				   NULL, &fops_stf_barrier);
 	return 0;
 }
@@ -748,7 +748,7 @@ DEFINE_DEBUGFS_ATTRIBUTE(fops_count_cache_flush, count_cache_flush_get,
 static __init int count_cache_flush_debugfs_init(void)
 {
 	debugfs_create_file_unsafe("count_cache_flush", 0600,
-				   powerpc_debugfs_root, NULL,
+				   arch_debugfs_dir, NULL,
 				   &fops_count_cache_flush);
 	return 0;
 }
@@ -834,9 +834,9 @@ DEFINE_SIMPLE_ATTRIBUTE(fops_uaccess_flush, uaccess_flush_get, uaccess_flush_set
 
 static __init int rfi_flush_debugfs_init(void)
 {
-	debugfs_create_file("rfi_flush", 0600, powerpc_debugfs_root, NULL, &fops_rfi_flush);
-	debugfs_create_file("entry_flush", 0600, powerpc_debugfs_root, NULL, &fops_entry_flush);
-	debugfs_create_file("uaccess_flush", 0600, powerpc_debugfs_root, NULL, &fops_uaccess_flush);
+	debugfs_create_file("rfi_flush", 0600, arch_debugfs_dir, NULL, &fops_rfi_flush);
+	debugfs_create_file("entry_flush", 0600, arch_debugfs_dir, NULL, &fops_entry_flush);
+	debugfs_create_file("uaccess_flush", 0600, arch_debugfs_dir, NULL, &fops_uaccess_flush);
 	return 0;
 }
 device_initcall(rfi_flush_debugfs_init);
diff --git a/arch/powerpc/kernel/setup-common.c b/arch/powerpc/kernel/setup-common.c
index aa9c2d01424a..b1e43b69a559 100644
--- a/arch/powerpc/kernel/setup-common.c
+++ b/arch/powerpc/kernel/setup-common.c
@@ -33,7 +33,6 @@
 #include <linux/of_platform.h>
 #include <linux/hugetlb.h>
 #include <linux/pgtable.h>
-#include <asm/debugfs.h>
 #include <asm/io.h>
 #include <asm/paca.h>
 #include <asm/prom.h>
@@ -773,18 +772,6 @@ static int __init check_cache_coherency(void)
 late_initcall(check_cache_coherency);
 #endif /* CONFIG_CHECK_CACHE_COHERENCY */
 
-#ifdef CONFIG_DEBUG_FS
-struct dentry *powerpc_debugfs_root;
-EXPORT_SYMBOL(powerpc_debugfs_root);
-
-static int powerpc_debugfs_init(void)
-{
-	powerpc_debugfs_root = debugfs_create_dir("powerpc", NULL);
-	return 0;
-}
-arch_initcall(powerpc_debugfs_init);
-#endif
-
 void ppc_printk_progress(char *s, unsigned short hex)
 {
 	pr_info("%s\n", s);
diff --git a/arch/powerpc/kernel/setup_64.c b/arch/powerpc/kernel/setup_64.c
index 1ff258f6c76c..eaa79a0996d1 100644
--- a/arch/powerpc/kernel/setup_64.c
+++ b/arch/powerpc/kernel/setup_64.c
@@ -32,7 +32,6 @@
 #include <linux/nmi.h>
 #include <linux/pgtable.h>
 
-#include <asm/debugfs.h>
 #include <asm/kvm_guest.h>
 #include <asm/io.h>
 #include <asm/kdump.h>
diff --git a/arch/powerpc/kernel/smp.c b/arch/powerpc/kernel/smp.c
index 447b78a87c8f..9cc7d3dbf439 100644
--- a/arch/powerpc/kernel/smp.c
+++ b/arch/powerpc/kernel/smp.c
@@ -78,6 +78,7 @@ struct task_struct *secondary_current;
 bool has_big_cores;
 bool coregroup_enabled;
 bool thread_group_shares_l2;
+bool thread_group_shares_l3;
 
 DEFINE_PER_CPU(cpumask_var_t, cpu_sibling_map);
 DEFINE_PER_CPU(cpumask_var_t, cpu_smallcore_map);
@@ -101,7 +102,7 @@ enum {
 
 #define MAX_THREAD_LIST_SIZE	8
 #define THREAD_GROUP_SHARE_L1   1
-#define THREAD_GROUP_SHARE_L2   2
+#define THREAD_GROUP_SHARE_L2_L3 2
 struct thread_groups {
 	unsigned int property;
 	unsigned int nr_groups;
@@ -122,14 +123,20 @@ static struct thread_groups_list tgl[NR_CPUS] __initdata;
  * On big-cores system, thread_group_l1_cache_map for each CPU corresponds to
  * the set its siblings that share the L1-cache.
  */
-static DEFINE_PER_CPU(cpumask_var_t, thread_group_l1_cache_map);
+DEFINE_PER_CPU(cpumask_var_t, thread_group_l1_cache_map);
 
 /*
  * On some big-cores system, thread_group_l2_cache_map for each CPU
  * corresponds to the set its siblings within the core that share the
  * L2-cache.
  */
-static DEFINE_PER_CPU(cpumask_var_t, thread_group_l2_cache_map);
+DEFINE_PER_CPU(cpumask_var_t, thread_group_l2_cache_map);
+
+/*
+ * On P10, thread_group_l3_cache_map for each CPU is equal to the
+ * thread_group_l2_cache_map
+ */
+DEFINE_PER_CPU(cpumask_var_t, thread_group_l3_cache_map);
 
 /* SMP operations for this machine */
 struct smp_ops_t *smp_ops;
@@ -889,19 +896,41 @@ out:
 	return tg;
 }
 
+static int update_mask_from_threadgroup(cpumask_var_t *mask, struct thread_groups *tg, int cpu, int cpu_group_start)
+{
+	int first_thread = cpu_first_thread_sibling(cpu);
+	int i;
+
+	zalloc_cpumask_var_node(mask, GFP_KERNEL, cpu_to_node(cpu));
+
+	for (i = first_thread; i < first_thread + threads_per_core; i++) {
+		int i_group_start = get_cpu_thread_group_start(i, tg);
+
+		if (unlikely(i_group_start == -1)) {
+			WARN_ON_ONCE(1);
+			return -ENODATA;
+		}
+
+		if (i_group_start == cpu_group_start)
+			cpumask_set_cpu(i, *mask);
+	}
+
+	return 0;
+}
+
 static int __init init_thread_group_cache_map(int cpu, int cache_property)
 
 {
-	int first_thread = cpu_first_thread_sibling(cpu);
-	int i, cpu_group_start = -1, err = 0;
+	int cpu_group_start = -1, err = 0;
 	struct thread_groups *tg = NULL;
 	cpumask_var_t *mask = NULL;
 
 	if (cache_property != THREAD_GROUP_SHARE_L1 &&
-	    cache_property != THREAD_GROUP_SHARE_L2)
+	    cache_property != THREAD_GROUP_SHARE_L2_L3)
 		return -EINVAL;
 
 	tg = get_thread_groups(cpu, cache_property, &err);
+
 	if (!tg)
 		return err;
 
@@ -912,25 +941,18 @@ static int __init init_thread_group_cache_map(int cpu, int cache_property)
 		return -ENODATA;
 	}
 
-	if (cache_property == THREAD_GROUP_SHARE_L1)
+	if (cache_property == THREAD_GROUP_SHARE_L1) {
 		mask = &per_cpu(thread_group_l1_cache_map, cpu);
-	else if (cache_property == THREAD_GROUP_SHARE_L2)
+		update_mask_from_threadgroup(mask, tg, cpu, cpu_group_start);
+	}
+	else if (cache_property == THREAD_GROUP_SHARE_L2_L3) {
 		mask = &per_cpu(thread_group_l2_cache_map, cpu);
-
-	zalloc_cpumask_var_node(mask, GFP_KERNEL, cpu_to_node(cpu));
-
-	for (i = first_thread; i < first_thread + threads_per_core; i++) {
-		int i_group_start = get_cpu_thread_group_start(i, tg);
-
-		if (unlikely(i_group_start == -1)) {
-			WARN_ON_ONCE(1);
-			return -ENODATA;
-		}
-
-		if (i_group_start == cpu_group_start)
-			cpumask_set_cpu(i, *mask);
+		update_mask_from_threadgroup(mask, tg, cpu, cpu_group_start);
+		mask = &per_cpu(thread_group_l3_cache_map, cpu);
+		update_mask_from_threadgroup(mask, tg, cpu, cpu_group_start);
 	}
 
+
 	return 0;
 }
 
@@ -1020,14 +1042,16 @@ static int __init init_big_cores(void)
 	has_big_cores = true;
 
 	for_each_possible_cpu(cpu) {
-		int err = init_thread_group_cache_map(cpu, THREAD_GROUP_SHARE_L2);
+		int err = init_thread_group_cache_map(cpu, THREAD_GROUP_SHARE_L2_L3);
 
 		if (err)
 			return err;
 	}
 
 	thread_group_shares_l2 = true;
-	pr_debug("L2 cache only shared by the threads in the small core\n");
+	thread_group_shares_l3 = true;
+	pr_debug("L2/L3 cache only shared by the threads in the small core\n");
+
 	return 0;
 }
 
@@ -1085,7 +1109,7 @@ void __init smp_prepare_cpus(unsigned int max_cpus)
 	}
 
 	if (cpu_to_chip_id(boot_cpuid) != -1) {
-		int idx = num_possible_cpus() / threads_per_core;
+		int idx = DIV_ROUND_UP(num_possible_cpus(), threads_per_core);
 
 		/*
 		 * All threads of a core will all belong to the same core,
@@ -1376,7 +1400,7 @@ static bool update_mask_by_l2(int cpu, cpumask_var_t *mask)
 	l2_cache = cpu_to_l2cache(cpu);
 	if (!l2_cache || !*mask) {
 		/* Assume only core siblings share cache with this CPU */
-		for_each_cpu(i, submask_fn(cpu))
+		for_each_cpu(i, cpu_sibling_mask(cpu))
 			set_cpus_related(cpu, i, cpu_l2_cache_mask);
 
 		return false;
@@ -1418,6 +1442,8 @@ static void remove_cpu_from_masks(int cpu)
 	struct cpumask *(*mask_fn)(int) = cpu_sibling_mask;
 	int i;
 
+	unmap_cpu_from_node(cpu);
+
 	if (shared_caches)
 		mask_fn = cpu_l2_cache_mask;
 
@@ -1502,7 +1528,9 @@ static void add_cpu_to_masks(int cpu)
 	 * This CPU will not be in the online mask yet so we need to manually
 	 * add it to it's own thread sibling mask.
 	 */
+	map_cpu_to_node(cpu, cpu_to_node(cpu));
 	cpumask_set_cpu(cpu, cpu_sibling_mask(cpu));
+	cpumask_set_cpu(cpu, cpu_core_mask(cpu));
 
 	for (i = first_thread; i < first_thread + threads_per_core; i++)
 		if (cpu_online(i))
@@ -1520,11 +1548,6 @@ static void add_cpu_to_masks(int cpu)
 	if (chip_id_lookup_table && ret)
 		chip_id = cpu_to_chip_id(cpu);
 
-	if (chip_id == -1) {
-		cpumask_copy(per_cpu(cpu_core_map, cpu), cpu_cpu_mask(cpu));
-		goto out;
-	}
-
 	if (shared_caches)
 		submask_fn = cpu_l2_cache_mask;
 
@@ -1534,6 +1557,10 @@ static void add_cpu_to_masks(int cpu)
 	/* Skip all CPUs already part of current CPU core mask */
 	cpumask_andnot(mask, cpu_online_mask, cpu_core_mask(cpu));
 
+	/* If chip_id is -1; limit the cpu_core_mask to within DIE*/
+	if (chip_id == -1)
+		cpumask_and(mask, mask, cpu_cpu_mask(cpu));
+
 	for_each_cpu(i, mask) {
 		if (chip_id == cpu_to_chip_id(i)) {
 			or_cpumasks_related(cpu, i, submask_fn, cpu_core_mask);
@@ -1543,7 +1570,6 @@ static void add_cpu_to_masks(int cpu)
 		}
 	}
 
-out:
 	free_cpumask_var(mask);
 }
 
diff --git a/arch/powerpc/kernel/stacktrace.c b/arch/powerpc/kernel/stacktrace.c
index 2b0d04a1b7d2..9e4a4a7af380 100644
--- a/arch/powerpc/kernel/stacktrace.c
+++ b/arch/powerpc/kernel/stacktrace.c
@@ -8,6 +8,7 @@
  * Copyright 2018 Nick Piggin, Michael Ellerman, IBM Corp.
  */
 
+#include <linux/delay.h>
 #include <linux/export.h>
 #include <linux/kallsyms.h>
 #include <linux/module.h>
diff --git a/arch/powerpc/kernel/syscalls.c b/arch/powerpc/kernel/syscalls.c
index bf4ae0f0e36c..825931e400df 100644
--- a/arch/powerpc/kernel/syscalls.c
+++ b/arch/powerpc/kernel/syscalls.c
@@ -41,20 +41,13 @@ static inline long do_mmap2(unsigned long addr, size_t len,
 			unsigned long prot, unsigned long flags,
 			unsigned long fd, unsigned long off, int shift)
 {
-	long ret = -EINVAL;
-
 	if (!arch_validate_prot(prot, addr))
-		goto out;
+		return -EINVAL;
 
-	if (shift) {
-		if (off & ((1 << shift) - 1))
-			goto out;
-		off >>= shift;
-	}
+	if (!IS_ALIGNED(off, 1 << shift))
+		return -EINVAL;
 
-	ret = ksys_mmap_pgoff(addr, len, prot, flags, fd, off);
-out:
-	return ret;
+	return ksys_mmap_pgoff(addr, len, prot, flags, fd, off >> shift);
 }
 
 SYSCALL_DEFINE6(mmap2, unsigned long, addr, size_t, len,
diff --git a/arch/powerpc/kernel/syscalls/syscall.tbl b/arch/powerpc/kernel/syscalls/syscall.tbl
index 6f3953f2a0d5..7bef917cc84e 100644
--- a/arch/powerpc/kernel/syscalls/syscall.tbl
+++ b/arch/powerpc/kernel/syscalls/syscall.tbl
@@ -330,10 +330,10 @@
 256	64	sys_debug_setcontext		sys_ni_syscall
 256	spu	sys_debug_setcontext		sys_ni_syscall
 # 257 reserved for vserver
-258	nospu	migrate_pages			sys_migrate_pages		compat_sys_migrate_pages
-259	nospu	mbind				sys_mbind			compat_sys_mbind
-260	nospu	get_mempolicy			sys_get_mempolicy		compat_sys_get_mempolicy
-261	nospu	set_mempolicy			sys_set_mempolicy		compat_sys_set_mempolicy
+258	nospu	migrate_pages			sys_migrate_pages
+259	nospu	mbind				sys_mbind
+260	nospu	get_mempolicy			sys_get_mempolicy
+261	nospu	set_mempolicy			sys_set_mempolicy
 262	nospu	mq_open				sys_mq_open			compat_sys_mq_open
 263	nospu	mq_unlink			sys_mq_unlink
 264	32	mq_timedsend			sys_mq_timedsend_time32
@@ -381,7 +381,7 @@
 298	common	faccessat			sys_faccessat
 299	common	get_robust_list			sys_get_robust_list		compat_sys_get_robust_list
 300	common	set_robust_list			sys_set_robust_list		compat_sys_set_robust_list
-301	common	move_pages			sys_move_pages			compat_sys_move_pages
+301	common	move_pages			sys_move_pages
 302	common	getcpu				sys_getcpu
 303	nospu	epoll_pwait			sys_epoll_pwait			compat_sys_epoll_pwait
 304	32	utimensat			sys_utimensat_time32
@@ -526,3 +526,5 @@
 444	common	landlock_create_ruleset		sys_landlock_create_ruleset
 445	common	landlock_add_rule		sys_landlock_add_rule
 446	common	landlock_restrict_self		sys_landlock_restrict_self
+# 447 reserved for memfd_secret
+448	common	process_mrelease		sys_process_mrelease
diff --git a/arch/powerpc/kernel/tau_6xx.c b/arch/powerpc/kernel/tau_6xx.c
index b9a047d92ec0..8e83d19fe8fa 100644
--- a/arch/powerpc/kernel/tau_6xx.c
+++ b/arch/powerpc/kernel/tau_6xx.c
@@ -164,7 +164,7 @@ static void tau_work_func(struct work_struct *work)
 	queue_work(tau_workq, work);
 }
 
-DECLARE_WORK(tau_work, tau_work_func);
+static DECLARE_WORK(tau_work, tau_work_func);
 
 /*
  * setup the TAU
diff --git a/arch/powerpc/kernel/time.c b/arch/powerpc/kernel/time.c
index c487ba5a6e11..934d8ae66cc6 100644
--- a/arch/powerpc/kernel/time.c
+++ b/arch/powerpc/kernel/time.c
@@ -31,6 +31,7 @@
 #include <linux/export.h>
 #include <linux/sched.h>
 #include <linux/sched/clock.h>
+#include <linux/sched/cputime.h>
 #include <linux/kernel.h>
 #include <linux/param.h>
 #include <linux/string.h>
@@ -52,8 +53,6 @@
 #include <linux/irq_work.h>
 #include <linux/of_clk.h>
 #include <linux/suspend.h>
-#include <linux/sched/cputime.h>
-#include <linux/sched/clock.h>
 #include <linux/processor.h>
 #include <asm/trace.h>
 
diff --git a/arch/powerpc/kernel/traps.c b/arch/powerpc/kernel/traps.c
index cd5f633c9b1b..aac8c0412ff9 100644
--- a/arch/powerpc/kernel/traps.c
+++ b/arch/powerpc/kernel/traps.c
@@ -37,10 +37,10 @@
 #include <linux/smp.h>
 #include <linux/console.h>
 #include <linux/kmsg_dump.h>
+#include <linux/debugfs.h>
 
 #include <asm/emulated_ops.h>
 #include <linux/uaccess.h>
-#include <asm/debugfs.h>
 #include <asm/interrupt.h>
 #include <asm/io.h>
 #include <asm/machdep.h>
@@ -427,7 +427,7 @@ void hv_nmi_check_nonrecoverable(struct pt_regs *regs)
 	return;
 
 nonrecoverable:
-	regs_set_return_msr(regs, regs->msr & ~MSR_RI);
+	regs_set_unrecoverable(regs);
 #endif
 }
 DEFINE_INTERRUPT_HANDLER_NMI(system_reset_exception)
@@ -497,7 +497,7 @@ out:
 		die("Unrecoverable nested System Reset", regs, SIGABRT);
 #endif
 	/* Must die if the interrupt is not recoverable */
-	if (!(regs->msr & MSR_RI)) {
+	if (regs_is_unrecoverable(regs)) {
 		/* For the reason explained in die_mce, nmi_exit before die */
 		nmi_exit();
 		die("Unrecoverable System Reset", regs, SIGABRT);
@@ -549,7 +549,7 @@ static inline int check_io_access(struct pt_regs *regs)
 			printk(KERN_DEBUG "%s bad port %lx at %p\n",
 			       (*nip & 0x100)? "OUT to": "IN from",
 			       regs->gpr[rb] - _IO_BASE, nip);
-			regs_set_return_msr(regs, regs->msr | MSR_RI);
+			regs_set_recoverable(regs);
 			regs_set_return_ip(regs, extable_fixup(entry));
 			return 1;
 		}
@@ -561,7 +561,7 @@ static inline int check_io_access(struct pt_regs *regs)
 #ifdef CONFIG_PPC_ADV_DEBUG_REGS
 /* On 4xx, the reason for the machine check or program exception
    is in the ESR. */
-#define get_reason(regs)	((regs)->dsisr)
+#define get_reason(regs)	((regs)->esr)
 #define REASON_FP		ESR_FP
 #define REASON_ILLEGAL		(ESR_PIL | ESR_PUO)
 #define REASON_PRIVILEGED	ESR_PPR
@@ -839,7 +839,7 @@ DEFINE_INTERRUPT_HANDLER_NMI(machine_check_exception)
 
 bail:
 	/* Must die if the interrupt is not recoverable */
-	if (!(regs->msr & MSR_RI))
+	if (regs_is_unrecoverable(regs))
 		die_mce("Unrecoverable Machine check", regs, SIGBUS);
 
 #ifdef CONFIG_PPC_BOOK3S_64
@@ -1481,8 +1481,13 @@ static void do_program_check(struct pt_regs *regs)
 
 		if (!(regs->msr & MSR_PR) &&  /* not user-mode */
 		    report_bug(bugaddr, regs) == BUG_TRAP_TYPE_WARN) {
-			regs_add_return_ip(regs, 4);
-			return;
+			const struct exception_table_entry *entry;
+
+			entry = search_exception_tables(bugaddr);
+			if (entry) {
+				regs_set_return_ip(regs, extable_fixup(entry) + regs->nip - bugaddr);
+				return;
+			}
 		}
 		_exception(SIGTRAP, regs, TRAP_BRKPT, regs->nip);
 		return;
@@ -2214,11 +2219,6 @@ DEFINE_INTERRUPT_HANDLER(kernel_bad_stack)
 	die("Bad kernel stack pointer", regs, SIGABRT);
 }
 
-void __init trap_init(void)
-{
-}
-
-
 #ifdef CONFIG_PPC_EMULATED_STATS
 
 #define WARN_EMULATED_SETUP(type)	.type = { .name = #type }
@@ -2271,7 +2271,7 @@ static int __init ppc_warn_emulated_init(void)
 	struct ppc_emulated_entry *entries = (void *)&ppc_emulated;
 
 	dir = debugfs_create_dir("emulated_instructions",
-				 powerpc_debugfs_root);
+				 arch_debugfs_dir);
 
 	debugfs_create_u32("do_warn", 0644, dir, &ppc_warn_emulated);
 
diff --git a/arch/powerpc/kernel/udbg.c b/arch/powerpc/kernel/udbg.c
index 01595e8cafe7..b1544b2f6321 100644
--- a/arch/powerpc/kernel/udbg.c
+++ b/arch/powerpc/kernel/udbg.c
@@ -5,7 +5,7 @@
  * c 2001 PPC 64 Team, IBM Corp
  */
 
-#include <stdarg.h>
+#include <linux/stdarg.h>
 #include <linux/types.h>
 #include <linux/sched.h>
 #include <linux/console.h>
diff --git a/arch/powerpc/kernel/vector.S b/arch/powerpc/kernel/vector.S
index fc120fac1910..ba03eedfdcd8 100644
--- a/arch/powerpc/kernel/vector.S
+++ b/arch/powerpc/kernel/vector.S
@@ -65,9 +65,8 @@ _GLOBAL(load_up_altivec)
 1:
 	/* enable use of VMX after return */
 #ifdef CONFIG_PPC32
-	mfspr	r5,SPRN_SPRG_THREAD		/* current task's THREAD (phys) */
+	addi	r5,r2,THREAD
 	oris	r9,r9,MSR_VEC@h
-	tovirt(r5, r5)
 #else
 	ld	r4,PACACURRENT(r13)
 	addi	r5,r4,THREAD		/* Get THREAD */
@@ -81,7 +80,6 @@ _GLOBAL(load_up_altivec)
 	li	r4,1
 	stb	r4,THREAD_LOAD_VEC(r5)
 	addi	r6,r5,THREAD_VRSTATE
-	li	r4,1
 	li	r10,VRSTATE_VSCR
 	stw	r4,THREAD_USED_VR(r5)
 	lvx	v0,r10,r6
diff --git a/arch/powerpc/kexec/core_64.c b/arch/powerpc/kexec/core_64.c
index 8a449b2d8715..89c069d664a5 100644
--- a/arch/powerpc/kexec/core_64.c
+++ b/arch/powerpc/kexec/core_64.c
@@ -64,15 +64,18 @@ int default_machine_kexec_prepare(struct kimage *image)
 			begin = image->segment[i].mem;
 			end = begin + image->segment[i].memsz;
 
-			if ((begin < high) && (end > low))
+			if ((begin < high) && (end > low)) {
+				of_node_put(node);
 				return -ETXTBSY;
+			}
 		}
 	}
 
 	return 0;
 }
 
-static void copy_segments(unsigned long ind)
+/* Called during kexec sequence with MMU off */
+static notrace void copy_segments(unsigned long ind)
 {
 	unsigned long entry;
 	unsigned long *ptr;
@@ -105,7 +108,8 @@ static void copy_segments(unsigned long ind)
 	}
 }
 
-void kexec_copy_flush(struct kimage *image)
+/* Called during kexec sequence with MMU off */
+notrace void kexec_copy_flush(struct kimage *image)
 {
 	long i, nr_segments = image->nr_segments;
 	struct  kexec_segment ranges[KEXEC_SEGMENT_MAX];
diff --git a/arch/powerpc/kexec/relocate_32.S b/arch/powerpc/kexec/relocate_32.S
index 61946c19e07c..cf6e52bdf8d8 100644
--- a/arch/powerpc/kexec/relocate_32.S
+++ b/arch/powerpc/kexec/relocate_32.S
@@ -93,7 +93,7 @@ wmmucr:
 	 * Invalidate all the TLB entries except the current entry
 	 * where we are running from
 	 */
-	bl	0f				/* Find our address */
+	bcl	20,31,$+4			/* Find our address */
 0:	mflr	r5				/* Make it accessible */
 	tlbsx	r23,0,r5			/* Find entry we are in */
 	li	r4,0				/* Start at TLB entry 0 */
@@ -158,7 +158,7 @@ write_out:
 	/* Switch to other address space in MSR */
 	insrwi	r9, r7, 1, 26		/* Set MSR[IS] = r7 */
 
-	bl	1f
+	bcl	20,31,$+4
 1:	mflr	r8
 	addi	r8, r8, (2f-1b)		/* Find the target offset */
 
@@ -202,7 +202,7 @@ next_tlb:
 	li	r9,0
 	insrwi	r9, r7, 1, 26			/* Set MSR[IS] = r7 */
 
-	bl	1f
+	bcl	20,31,$+4
 1:	mflr	r8
 	and	r8, r8, r11			/* Get our offset within page */
 	addi	r8, r8, (2f-1b)
@@ -240,7 +240,7 @@ setup_map_47x:
 	sync
 
 	/* Find the entry we are running from */
-	bl	2f
+	bcl	20,31,$+4
 2:	mflr	r23
 	tlbsx	r23, 0, r23
 	tlbre	r24, r23, 0			/* TLB Word 0 */
@@ -296,7 +296,7 @@ clear_utlb_entry:
 	/* Update the msr to the new TS */
 	insrwi	r5, r7, 1, 26
 
-	bl	1f
+	bcl	20,31,$+4
 1:	mflr	r6
 	addi	r6, r6, (2f-1b)
 
@@ -355,7 +355,7 @@ write_utlb:
 	/* Defaults to 256M */
 	lis	r10, 0x1000
 
-	bl	1f
+	bcl	20,31,$+4
 1:	mflr	r4
 	addi	r4, r4, (2f-1b)			/* virtual address  of 2f */
 
diff --git a/arch/powerpc/kvm/Kconfig b/arch/powerpc/kvm/Kconfig
index e45644657d49..ff581d70f20c 100644
--- a/arch/powerpc/kvm/Kconfig
+++ b/arch/powerpc/kvm/Kconfig
@@ -38,7 +38,6 @@ config KVM_BOOK3S_32_HANDLER
 config KVM_BOOK3S_64_HANDLER
 	bool
 	select KVM_BOOK3S_HANDLER
-	select PPC_DAWR_FORCE_ENABLE
 
 config KVM_BOOK3S_PR_POSSIBLE
 	bool
diff --git a/arch/powerpc/kvm/book3s.c b/arch/powerpc/kvm/book3s.c
index 79833f78d1da..b785f6772391 100644
--- a/arch/powerpc/kvm/book3s.c
+++ b/arch/powerpc/kvm/book3s.c
@@ -43,8 +43,6 @@ const struct _kvm_stats_desc kvm_vm_stats_desc[] = {
 	STATS_DESC_ICOUNTER(VM, num_2M_pages),
 	STATS_DESC_ICOUNTER(VM, num_1G_pages)
 };
-static_assert(ARRAY_SIZE(kvm_vm_stats_desc) ==
-		sizeof(struct kvm_vm_stat) / sizeof(u64));
 
 const struct kvm_stats_header kvm_vm_stats_header = {
 	.name_size = KVM_STATS_NAME_SIZE,
@@ -71,7 +69,6 @@ const struct _kvm_stats_desc kvm_vcpu_stats_desc[] = {
 	STATS_DESC_COUNTER(VCPU, emulated_inst_exits),
 	STATS_DESC_COUNTER(VCPU, dec_exits),
 	STATS_DESC_COUNTER(VCPU, ext_intr_exits),
-	STATS_DESC_TIME_NSEC(VCPU, halt_wait_ns),
 	STATS_DESC_COUNTER(VCPU, halt_successful_wait),
 	STATS_DESC_COUNTER(VCPU, dbell_exits),
 	STATS_DESC_COUNTER(VCPU, gdbell_exits),
@@ -88,8 +85,6 @@ const struct _kvm_stats_desc kvm_vcpu_stats_desc[] = {
 	STATS_DESC_COUNTER(VCPU, pthru_host),
 	STATS_DESC_COUNTER(VCPU, pthru_bad_aff)
 };
-static_assert(ARRAY_SIZE(kvm_vcpu_stats_desc) ==
-		sizeof(struct kvm_vcpu_stat) / sizeof(u64));
 
 const struct kvm_stats_header kvm_vcpu_stats_header = {
 	.name_size = KVM_STATS_NAME_SIZE,
diff --git a/arch/powerpc/kvm/book3s.h b/arch/powerpc/kvm/book3s.h
index 740e51def5a5..58391b4b32ed 100644
--- a/arch/powerpc/kvm/book3s.h
+++ b/arch/powerpc/kvm/book3s.h
@@ -23,7 +23,8 @@ extern int kvmppc_core_emulate_mtspr_pr(struct kvm_vcpu *vcpu,
 extern int kvmppc_core_emulate_mfspr_pr(struct kvm_vcpu *vcpu,
 					int sprn, ulong *spr_val);
 extern int kvmppc_book3s_init_pr(void);
-extern void kvmppc_book3s_exit_pr(void);
+void kvmppc_book3s_exit_pr(void);
+extern int kvmppc_handle_exit_pr(struct kvm_vcpu *vcpu, unsigned int exit_nr);
 
 #ifdef CONFIG_PPC_TRANSACTIONAL_MEM
 extern void kvmppc_emulate_tabort(struct kvm_vcpu *vcpu, int ra_val);
diff --git a/arch/powerpc/kvm/book3s_64_mmu.c b/arch/powerpc/kvm/book3s_64_mmu.c
index 26b8b27a3755..feee40cb2ba1 100644
--- a/arch/powerpc/kvm/book3s_64_mmu.c
+++ b/arch/powerpc/kvm/book3s_64_mmu.c
@@ -196,7 +196,7 @@ static int kvmppc_mmu_book3s_64_xlate(struct kvm_vcpu *vcpu, gva_t eaddr,
 	hva_t ptegp;
 	u64 pteg[16];
 	u64 avpn = 0;
-	u64 v, r;
+	u64 r;
 	u64 v_val, v_mask;
 	u64 eaddr_mask;
 	int i;
@@ -285,7 +285,6 @@ do_second:
 		goto do_second;
 	}
 
-	v = be64_to_cpu(pteg[i]);
 	r = be64_to_cpu(pteg[i+1]);
 	pp = (r & HPTE_R_PP) | key;
 	if (r & HPTE_R_PP0)
diff --git a/arch/powerpc/kvm/book3s_64_mmu_radix.c b/arch/powerpc/kvm/book3s_64_mmu_radix.c
index b5905ae4377c..16359525a40f 100644
--- a/arch/powerpc/kvm/book3s_64_mmu_radix.c
+++ b/arch/powerpc/kvm/book3s_64_mmu_radix.c
@@ -44,6 +44,9 @@ unsigned long __kvmhv_copy_tofrom_guest_radix(int lpid, int pid,
 					  (to != NULL) ? __pa(to): 0,
 					  (from != NULL) ? __pa(from): 0, n);
 
+	if (eaddr & (0xFFFUL << 52))
+		return ret;
+
 	quadrant = 1;
 	if (!pid)
 		quadrant = 2;
@@ -65,10 +68,12 @@ unsigned long __kvmhv_copy_tofrom_guest_radix(int lpid, int pid,
 	}
 	isync();
 
+	pagefault_disable();
 	if (is_load)
-		ret = copy_from_user_nofault(to, (const void __user *)from, n);
+		ret = __copy_from_user_inatomic(to, (const void __user *)from, n);
 	else
-		ret = copy_to_user_nofault((void __user *)to, from, n);
+		ret = __copy_to_user_inatomic((void __user *)to, from, n);
+	pagefault_enable();
 
 	/* switch the pid first to avoid running host with unallocated pid */
 	if (quadrant == 1 && pid != old_pid)
@@ -81,7 +86,6 @@ unsigned long __kvmhv_copy_tofrom_guest_radix(int lpid, int pid,
 
 	return ret;
 }
-EXPORT_SYMBOL_GPL(__kvmhv_copy_tofrom_guest_radix);
 
 static long kvmhv_copy_tofrom_guest_radix(struct kvm_vcpu *vcpu, gva_t eaddr,
 					  void *to, void *from, unsigned long n)
@@ -117,14 +121,12 @@ long kvmhv_copy_from_guest_radix(struct kvm_vcpu *vcpu, gva_t eaddr, void *to,
 
 	return ret;
 }
-EXPORT_SYMBOL_GPL(kvmhv_copy_from_guest_radix);
 
 long kvmhv_copy_to_guest_radix(struct kvm_vcpu *vcpu, gva_t eaddr, void *from,
 			       unsigned long n)
 {
 	return kvmhv_copy_tofrom_guest_radix(vcpu, eaddr, NULL, from, n);
 }
-EXPORT_SYMBOL_GPL(kvmhv_copy_to_guest_radix);
 
 int kvmppc_mmu_walk_radix_tree(struct kvm_vcpu *vcpu, gva_t eaddr,
 			       struct kvmppc_pte *gpte, u64 root,
diff --git a/arch/powerpc/kvm/book3s_64_vio.c b/arch/powerpc/kvm/book3s_64_vio.c
index 8da93fdfa59e..6365087f3160 100644
--- a/arch/powerpc/kvm/book3s_64_vio.c
+++ b/arch/powerpc/kvm/book3s_64_vio.c
@@ -346,7 +346,7 @@ static long kvmppc_tce_to_ua(struct kvm *kvm, unsigned long tce,
 	unsigned long gfn = tce >> PAGE_SHIFT;
 	struct kvm_memory_slot *memslot;
 
-	memslot = search_memslots(kvm_memslots(kvm), gfn);
+	memslot = __gfn_to_memslot(kvm_memslots(kvm), gfn);
 	if (!memslot)
 		return -EINVAL;
 
diff --git a/arch/powerpc/kvm/book3s_64_vio_hv.c b/arch/powerpc/kvm/book3s_64_vio_hv.c
index dc6591548f0c..870b7f0c7ea5 100644
--- a/arch/powerpc/kvm/book3s_64_vio_hv.c
+++ b/arch/powerpc/kvm/book3s_64_vio_hv.c
@@ -80,7 +80,7 @@ static long kvmppc_rm_tce_to_ua(struct kvm *kvm,
 	unsigned long gfn = tce >> PAGE_SHIFT;
 	struct kvm_memory_slot *memslot;
 
-	memslot = search_memslots(kvm_memslots_raw(kvm), gfn);
+	memslot = __gfn_to_memslot(kvm_memslots_raw(kvm), gfn);
 	if (!memslot)
 		return -EINVAL;
 
@@ -173,10 +173,13 @@ static void kvmppc_rm_tce_put(struct kvmppc_spapr_tce_table *stt,
 	idx -= stt->offset;
 	page = stt->pages[idx / TCES_PER_PAGE];
 	/*
-	 * page must not be NULL in real mode,
-	 * kvmppc_rm_ioba_validate() must have taken care of this.
+	 * kvmppc_rm_ioba_validate() allows pages not be allocated if TCE is
+	 * being cleared, otherwise it returns H_TOO_HARD and we skip this.
 	 */
-	WARN_ON_ONCE_RM(!page);
+	if (!page) {
+		WARN_ON_ONCE_RM(tce != 0);
+		return;
+	}
 	tbl = kvmppc_page_address(page);
 
 	tbl[idx % TCES_PER_PAGE] = tce;
diff --git a/arch/powerpc/kvm/book3s_hv.c b/arch/powerpc/kvm/book3s_hv.c
index 085fb8ecbf68..2acb1c96cfaf 100644
--- a/arch/powerpc/kvm/book3s_hv.c
+++ b/arch/powerpc/kvm/book3s_hv.c
@@ -59,6 +59,7 @@
 #include <asm/kvm_book3s.h>
 #include <asm/mmu_context.h>
 #include <asm/lppaca.h>
+#include <asm/pmc.h>
 #include <asm/processor.h>
 #include <asm/cputhreads.h>
 #include <asm/page.h>
@@ -1165,7 +1166,7 @@ int kvmppc_pseries_do_hcall(struct kvm_vcpu *vcpu)
 		break;
 #endif
 	case H_RANDOM:
-		if (!powernv_get_random_long(&vcpu->arch.regs.gpr[4]))
+		if (!arch_get_random_seed_long(&vcpu->arch.regs.gpr[4]))
 			ret = H_HARDWARE;
 		break;
 	case H_RPT_INVALIDATE:
@@ -1679,6 +1680,21 @@ static int kvmppc_handle_exit_hv(struct kvm_vcpu *vcpu,
 			r = RESUME_GUEST;
 		}
 		break;
+
+#ifdef CONFIG_PPC_TRANSACTIONAL_MEM
+	case BOOK3S_INTERRUPT_HV_SOFTPATCH:
+		/*
+		 * This occurs for various TM-related instructions that
+		 * we need to emulate on POWER9 DD2.2.  We have already
+		 * handled the cases where the guest was in real-suspend
+		 * mode and was transitioning to transactional state.
+		 */
+		r = kvmhv_p9_tm_emulation(vcpu);
+		if (r != -1)
+			break;
+		fallthrough; /* go to facility unavailable handler */
+#endif
+
 	/*
 	 * This occurs if the guest (kernel or userspace), does something that
 	 * is prohibited by HFSCR.
@@ -1697,18 +1713,6 @@ static int kvmppc_handle_exit_hv(struct kvm_vcpu *vcpu,
 		}
 		break;
 
-#ifdef CONFIG_PPC_TRANSACTIONAL_MEM
-	case BOOK3S_INTERRUPT_HV_SOFTPATCH:
-		/*
-		 * This occurs for various TM-related instructions that
-		 * we need to emulate on POWER9 DD2.2.  We have already
-		 * handled the cases where the guest was in real-suspend
-		 * mode and was transitioning to transactional state.
-		 */
-		r = kvmhv_p9_tm_emulation(vcpu);
-		break;
-#endif
-
 	case BOOK3S_INTERRUPT_HV_RM_HARD:
 		r = RESUME_PASSTHROUGH;
 		break;
@@ -1727,6 +1731,7 @@ static int kvmppc_handle_exit_hv(struct kvm_vcpu *vcpu,
 
 static int kvmppc_handle_nested_exit(struct kvm_vcpu *vcpu)
 {
+	struct kvm_nested_guest *nested = vcpu->arch.nested;
 	int r;
 	int srcu_idx;
 
@@ -1811,9 +1816,41 @@ static int kvmppc_handle_nested_exit(struct kvm_vcpu *vcpu)
 		 * mode and was transitioning to transactional state.
 		 */
 		r = kvmhv_p9_tm_emulation(vcpu);
-		break;
+		if (r != -1)
+			break;
+		fallthrough; /* go to facility unavailable handler */
 #endif
 
+	case BOOK3S_INTERRUPT_H_FAC_UNAVAIL: {
+		u64 cause = vcpu->arch.hfscr >> 56;
+
+		/*
+		 * Only pass HFU interrupts to the L1 if the facility is
+		 * permitted but disabled by the L1's HFSCR, otherwise
+		 * the interrupt does not make sense to the L1 so turn
+		 * it into a HEAI.
+		 */
+		if (!(vcpu->arch.hfscr_permitted & (1UL << cause)) ||
+					(nested->hfscr & (1UL << cause))) {
+			vcpu->arch.trap = BOOK3S_INTERRUPT_H_EMUL_ASSIST;
+
+			/*
+			 * If the fetch failed, return to guest and
+			 * try executing it again.
+			 */
+			r = kvmppc_get_last_inst(vcpu, INST_GENERIC,
+						 &vcpu->arch.emul_inst);
+			if (r != EMULATE_DONE)
+				r = RESUME_GUEST;
+			else
+				r = RESUME_HOST;
+		} else {
+			r = RESUME_HOST;
+		}
+
+		break;
+	}
+
 	case BOOK3S_INTERRUPT_HV_RM_HARD:
 		vcpu->arch.trap = 0;
 		r = RESUME_GUEST;
@@ -2684,6 +2721,7 @@ static int kvmppc_core_vcpu_create_hv(struct kvm_vcpu *vcpu)
 	spin_lock_init(&vcpu->arch.vpa_update_lock);
 	spin_lock_init(&vcpu->arch.tbacct_lock);
 	vcpu->arch.busy_preempt = TB_NIL;
+	vcpu->arch.shregs.msr = MSR_ME;
 	vcpu->arch.intr_msr = MSR_SF | MSR_ME;
 
 	/*
@@ -2705,6 +2743,8 @@ static int kvmppc_core_vcpu_create_hv(struct kvm_vcpu *vcpu)
 	if (cpu_has_feature(CPU_FTR_TM_COMP))
 		vcpu->arch.hfscr |= HFSCR_TM;
 
+	vcpu->arch.hfscr_permitted = vcpu->arch.hfscr;
+
 	kvmppc_mmu_book3s_hv_init(vcpu);
 
 	vcpu->arch.state = KVMPPC_VCPU_NOTREADY;
@@ -3727,7 +3767,6 @@ static void load_spr_state(struct kvm_vcpu *vcpu)
 	mtspr(SPRN_EBBHR, vcpu->arch.ebbhr);
 	mtspr(SPRN_EBBRR, vcpu->arch.ebbrr);
 	mtspr(SPRN_BESCR, vcpu->arch.bescr);
-	mtspr(SPRN_WORT, vcpu->arch.wort);
 	mtspr(SPRN_TIDR, vcpu->arch.tid);
 	mtspr(SPRN_AMR, vcpu->arch.amr);
 	mtspr(SPRN_UAMOR, vcpu->arch.uamor);
@@ -3754,7 +3793,6 @@ static void store_spr_state(struct kvm_vcpu *vcpu)
 	vcpu->arch.ebbhr = mfspr(SPRN_EBBHR);
 	vcpu->arch.ebbrr = mfspr(SPRN_EBBRR);
 	vcpu->arch.bescr = mfspr(SPRN_BESCR);
-	vcpu->arch.wort = mfspr(SPRN_WORT);
 	vcpu->arch.tid = mfspr(SPRN_TIDR);
 	vcpu->arch.amr = mfspr(SPRN_AMR);
 	vcpu->arch.uamor = mfspr(SPRN_UAMOR);
@@ -3786,7 +3824,6 @@ static void restore_p9_host_os_sprs(struct kvm_vcpu *vcpu,
 				    struct p9_host_os_sprs *host_os_sprs)
 {
 	mtspr(SPRN_PSPB, 0);
-	mtspr(SPRN_WORT, 0);
 	mtspr(SPRN_UAMOR, 0);
 
 	mtspr(SPRN_DSCR, host_os_sprs->dscr);
@@ -3852,6 +3889,18 @@ static int kvmhv_p9_guest_entry(struct kvm_vcpu *vcpu, u64 time_limit,
 	    cpu_has_feature(CPU_FTR_P9_TM_HV_ASSIST))
 		kvmppc_restore_tm_hv(vcpu, vcpu->arch.shregs.msr, true);
 
+#ifdef CONFIG_PPC_PSERIES
+	if (kvmhv_on_pseries()) {
+		barrier();
+		if (vcpu->arch.vpa.pinned_addr) {
+			struct lppaca *lp = vcpu->arch.vpa.pinned_addr;
+			get_lppaca()->pmcregs_in_use = lp->pmcregs_in_use;
+		} else {
+			get_lppaca()->pmcregs_in_use = 1;
+		}
+		barrier();
+	}
+#endif
 	kvmhv_load_guest_pmu(vcpu);
 
 	msr_check_and_set(MSR_FP | MSR_VEC | MSR_VSX);
@@ -3986,6 +4035,13 @@ static int kvmhv_p9_guest_entry(struct kvm_vcpu *vcpu, u64 time_limit,
 	save_pmu |= nesting_enabled(vcpu->kvm);
 
 	kvmhv_save_guest_pmu(vcpu, save_pmu);
+#ifdef CONFIG_PPC_PSERIES
+	if (kvmhv_on_pseries()) {
+		barrier();
+		get_lppaca()->pmcregs_in_use = ppc_get_pmu_inuse();
+		barrier();
+	}
+#endif
 
 	vc->entry_exit_map = 0x101;
 	vc->in_guest = 0;
@@ -4146,19 +4202,31 @@ out:
 
 	/* Attribute wait time */
 	if (do_sleep) {
-		vc->runner->stat.halt_wait_ns +=
+		vc->runner->stat.generic.halt_wait_ns +=
 			ktime_to_ns(cur) - ktime_to_ns(start_wait);
+		KVM_STATS_LOG_HIST_UPDATE(
+				vc->runner->stat.generic.halt_wait_hist,
+				ktime_to_ns(cur) - ktime_to_ns(start_wait));
 		/* Attribute failed poll time */
-		if (vc->halt_poll_ns)
+		if (vc->halt_poll_ns) {
 			vc->runner->stat.generic.halt_poll_fail_ns +=
 				ktime_to_ns(start_wait) -
 				ktime_to_ns(start_poll);
+			KVM_STATS_LOG_HIST_UPDATE(
+				vc->runner->stat.generic.halt_poll_fail_hist,
+				ktime_to_ns(start_wait) -
+				ktime_to_ns(start_poll));
+		}
 	} else {
 		/* Attribute successful poll time */
-		if (vc->halt_poll_ns)
+		if (vc->halt_poll_ns) {
 			vc->runner->stat.generic.halt_poll_success_ns +=
 				ktime_to_ns(cur) -
 				ktime_to_ns(start_poll);
+			KVM_STATS_LOG_HIST_UPDATE(
+				vc->runner->stat.generic.halt_poll_success_hist,
+				ktime_to_ns(cur) - ktime_to_ns(start_poll));
+		}
 	}
 
 	/* Adjust poll time */
@@ -5328,6 +5396,7 @@ static int kvmppc_set_passthru_irq(struct kvm *kvm, int host_irq, int guest_gsi)
 	struct kvmppc_passthru_irqmap *pimap;
 	struct irq_chip *chip;
 	int i, rc = 0;
+	struct irq_data *host_data;
 
 	if (!kvm_irq_bypass)
 		return 1;
@@ -5355,7 +5424,7 @@ static int kvmppc_set_passthru_irq(struct kvm *kvm, int host_irq, int guest_gsi)
 	 * what our real-mode EOI code does, or a XIVE interrupt
 	 */
 	chip = irq_data_get_irq_chip(&desc->irq_data);
-	if (!chip || !(is_pnv_opal_msi(chip) || is_xive_irq(chip))) {
+	if (!chip || !is_pnv_opal_msi(chip)) {
 		pr_warn("kvmppc_set_passthru_irq_hv: Could not assign IRQ map for (%d,%d)\n",
 			host_irq, guest_gsi);
 		mutex_unlock(&kvm->lock);
@@ -5392,15 +5461,22 @@ static int kvmppc_set_passthru_irq(struct kvm *kvm, int host_irq, int guest_gsi)
 	 * the KVM real mode handler.
 	 */
 	smp_wmb();
-	irq_map->r_hwirq = desc->irq_data.hwirq;
+
+	/*
+	 * The 'host_irq' number is mapped in the PCI-MSI domain but
+	 * the underlying calls, which will EOI the interrupt in real
+	 * mode, need an HW IRQ number mapped in the XICS IRQ domain.
+	 */
+	host_data = irq_domain_get_irq_data(irq_get_default_host(), host_irq);
+	irq_map->r_hwirq = (unsigned int)irqd_to_hwirq(host_data);
 
 	if (i == pimap->n_mapped)
 		pimap->n_mapped++;
 
 	if (xics_on_xive())
-		rc = kvmppc_xive_set_mapped(kvm, guest_gsi, desc);
+		rc = kvmppc_xive_set_mapped(kvm, guest_gsi, host_irq);
 	else
-		kvmppc_xics_set_mapped(kvm, guest_gsi, desc->irq_data.hwirq);
+		kvmppc_xics_set_mapped(kvm, guest_gsi, irq_map->r_hwirq);
 	if (rc)
 		irq_map->r_hwirq = 0;
 
@@ -5439,7 +5515,7 @@ static int kvmppc_clr_passthru_irq(struct kvm *kvm, int host_irq, int guest_gsi)
 	}
 
 	if (xics_on_xive())
-		rc = kvmppc_xive_clr_mapped(kvm, guest_gsi, pimap->mapped[i].desc);
+		rc = kvmppc_xive_clr_mapped(kvm, guest_gsi, host_irq);
 	else
 		kvmppc_xics_clr_mapped(kvm, guest_gsi, pimap->mapped[i].r_hwirq);
 
diff --git a/arch/powerpc/kvm/book3s_hv_builtin.c b/arch/powerpc/kvm/book3s_hv_builtin.c
index be8ef1c5b1bf..fcf4760a3a0e 100644
--- a/arch/powerpc/kvm/book3s_hv_builtin.c
+++ b/arch/powerpc/kvm/book3s_hv_builtin.c
@@ -137,23 +137,23 @@ long int kvmppc_rm_h_confer(struct kvm_vcpu *vcpu, int target,
  * exist in the system. We use a counter of VMs to track this.
  *
  * One of the operations we need to block is onlining of secondaries, so we
- * protect hv_vm_count with get/put_online_cpus().
+ * protect hv_vm_count with cpus_read_lock/unlock().
  */
 static atomic_t hv_vm_count;
 
 void kvm_hv_vm_activated(void)
 {
-	get_online_cpus();
+	cpus_read_lock();
 	atomic_inc(&hv_vm_count);
-	put_online_cpus();
+	cpus_read_unlock();
 }
 EXPORT_SYMBOL_GPL(kvm_hv_vm_activated);
 
 void kvm_hv_vm_deactivated(void)
 {
-	get_online_cpus();
+	cpus_read_lock();
 	atomic_dec(&hv_vm_count);
-	put_online_cpus();
+	cpus_read_unlock();
 }
 EXPORT_SYMBOL_GPL(kvm_hv_vm_deactivated);
 
diff --git a/arch/powerpc/kvm/book3s_hv_nested.c b/arch/powerpc/kvm/book3s_hv_nested.c
index 898f942eb198..ed8a2c9f5629 100644
--- a/arch/powerpc/kvm/book3s_hv_nested.c
+++ b/arch/powerpc/kvm/book3s_hv_nested.c
@@ -99,13 +99,12 @@ static void byteswap_hv_regs(struct hv_guest_state *hr)
 	hr->dawrx1 = swab64(hr->dawrx1);
 }
 
-static void save_hv_return_state(struct kvm_vcpu *vcpu, int trap,
+static void save_hv_return_state(struct kvm_vcpu *vcpu,
 				 struct hv_guest_state *hr)
 {
 	struct kvmppc_vcore *vc = vcpu->arch.vcore;
 
 	hr->dpdes = vc->dpdes;
-	hr->hfscr = vcpu->arch.hfscr;
 	hr->purr = vcpu->arch.purr;
 	hr->spurr = vcpu->arch.spurr;
 	hr->ic = vcpu->arch.ic;
@@ -119,7 +118,7 @@ static void save_hv_return_state(struct kvm_vcpu *vcpu, int trap,
 	hr->pidr = vcpu->arch.pid;
 	hr->cfar = vcpu->arch.cfar;
 	hr->ppr = vcpu->arch.ppr;
-	switch (trap) {
+	switch (vcpu->arch.trap) {
 	case BOOK3S_INTERRUPT_H_DATA_STORAGE:
 		hr->hdar = vcpu->arch.fault_dar;
 		hr->hdsisr = vcpu->arch.fault_dsisr;
@@ -128,55 +127,17 @@ static void save_hv_return_state(struct kvm_vcpu *vcpu, int trap,
 	case BOOK3S_INTERRUPT_H_INST_STORAGE:
 		hr->asdr = vcpu->arch.fault_gpa;
 		break;
+	case BOOK3S_INTERRUPT_H_FAC_UNAVAIL:
+		hr->hfscr = ((~HFSCR_INTR_CAUSE & hr->hfscr) |
+			     (HFSCR_INTR_CAUSE & vcpu->arch.hfscr));
+		break;
 	case BOOK3S_INTERRUPT_H_EMUL_ASSIST:
 		hr->heir = vcpu->arch.emul_inst;
 		break;
 	}
 }
 
-/*
- * This can result in some L0 HV register state being leaked to an L1
- * hypervisor when the hv_guest_state is copied back to the guest after
- * being modified here.
- *
- * There is no known problem with such a leak, and in many cases these
- * register settings could be derived by the guest by observing behaviour
- * and timing, interrupts, etc., but it is an issue to consider.
- */
-static void sanitise_hv_regs(struct kvm_vcpu *vcpu, struct hv_guest_state *hr)
-{
-	struct kvmppc_vcore *vc = vcpu->arch.vcore;
-	u64 mask;
-
-	/*
-	 * Don't let L1 change LPCR bits for the L2 except these:
-	 */
-	mask = LPCR_DPFD | LPCR_ILE | LPCR_TC | LPCR_AIL | LPCR_LD |
-		LPCR_LPES | LPCR_MER;
-
-	/*
-	 * Additional filtering is required depending on hardware
-	 * and configuration.
-	 */
-	hr->lpcr = kvmppc_filter_lpcr_hv(vcpu->kvm,
-			(vc->lpcr & ~mask) | (hr->lpcr & mask));
-
-	/*
-	 * Don't let L1 enable features for L2 which we've disabled for L1,
-	 * but preserve the interrupt cause field.
-	 */
-	hr->hfscr &= (HFSCR_INTR_CAUSE | vcpu->arch.hfscr);
-
-	/* Don't let data address watchpoint match in hypervisor state */
-	hr->dawrx0 &= ~DAWRX_HYP;
-	hr->dawrx1 &= ~DAWRX_HYP;
-
-	/* Don't let completed instruction address breakpt match in HV state */
-	if ((hr->ciabr & CIABR_PRIV) == CIABR_PRIV_HYPER)
-		hr->ciabr &= ~CIABR_PRIV;
-}
-
-static void restore_hv_regs(struct kvm_vcpu *vcpu, struct hv_guest_state *hr)
+static void restore_hv_regs(struct kvm_vcpu *vcpu, const struct hv_guest_state *hr)
 {
 	struct kvmppc_vcore *vc = vcpu->arch.vcore;
 
@@ -288,6 +249,43 @@ static int kvmhv_write_guest_state_and_regs(struct kvm_vcpu *vcpu,
 				     sizeof(struct pt_regs));
 }
 
+static void load_l2_hv_regs(struct kvm_vcpu *vcpu,
+			    const struct hv_guest_state *l2_hv,
+			    const struct hv_guest_state *l1_hv, u64 *lpcr)
+{
+	struct kvmppc_vcore *vc = vcpu->arch.vcore;
+	u64 mask;
+
+	restore_hv_regs(vcpu, l2_hv);
+
+	/*
+	 * Don't let L1 change LPCR bits for the L2 except these:
+	 */
+	mask = LPCR_DPFD | LPCR_ILE | LPCR_TC | LPCR_AIL | LPCR_LD |
+		LPCR_LPES | LPCR_MER;
+
+	/*
+	 * Additional filtering is required depending on hardware
+	 * and configuration.
+	 */
+	*lpcr = kvmppc_filter_lpcr_hv(vcpu->kvm,
+				      (vc->lpcr & ~mask) | (*lpcr & mask));
+
+	/*
+	 * Don't let L1 enable features for L2 which we don't allow for L1,
+	 * but preserve the interrupt cause field.
+	 */
+	vcpu->arch.hfscr = l2_hv->hfscr & (HFSCR_INTR_CAUSE | vcpu->arch.hfscr_permitted);
+
+	/* Don't let data address watchpoint match in hypervisor state */
+	vcpu->arch.dawrx0 = l2_hv->dawrx0 & ~DAWRX_HYP;
+	vcpu->arch.dawrx1 = l2_hv->dawrx1 & ~DAWRX_HYP;
+
+	/* Don't let completed instruction address breakpt match in HV state */
+	if ((l2_hv->ciabr & CIABR_PRIV) == CIABR_PRIV_HYPER)
+		vcpu->arch.ciabr = l2_hv->ciabr & ~CIABR_PRIV;
+}
+
 long kvmhv_enter_nested_guest(struct kvm_vcpu *vcpu)
 {
 	long int err, r;
@@ -296,7 +294,7 @@ long kvmhv_enter_nested_guest(struct kvm_vcpu *vcpu)
 	struct hv_guest_state l2_hv = {0}, saved_l1_hv;
 	struct kvmppc_vcore *vc = vcpu->arch.vcore;
 	u64 hv_ptr, regs_ptr;
-	u64 hdec_exp;
+	u64 hdec_exp, lpcr;
 	s64 delta_purr, delta_spurr, delta_ic, delta_vtb;
 
 	if (vcpu->kvm->arch.l1_ptcr == 0)
@@ -364,13 +362,14 @@ long kvmhv_enter_nested_guest(struct kvm_vcpu *vcpu)
 	/* set L1 state to L2 state */
 	vcpu->arch.nested = l2;
 	vcpu->arch.nested_vcpu_id = l2_hv.vcpu_token;
+	l2->hfscr = l2_hv.hfscr;
 	vcpu->arch.regs = l2_regs;
 
 	/* Guest must always run with ME enabled, HV disabled. */
 	vcpu->arch.shregs.msr = (vcpu->arch.regs.msr | MSR_ME) & ~MSR_HV;
 
-	sanitise_hv_regs(vcpu, &l2_hv);
-	restore_hv_regs(vcpu, &l2_hv);
+	lpcr = l2_hv.lpcr;
+	load_l2_hv_regs(vcpu, &l2_hv, &saved_l1_hv, &lpcr);
 
 	vcpu->arch.ret = RESUME_GUEST;
 	vcpu->arch.trap = 0;
@@ -380,7 +379,7 @@ long kvmhv_enter_nested_guest(struct kvm_vcpu *vcpu)
 			r = RESUME_HOST;
 			break;
 		}
-		r = kvmhv_run_single_vcpu(vcpu, hdec_exp, l2_hv.lpcr);
+		r = kvmhv_run_single_vcpu(vcpu, hdec_exp, lpcr);
 	} while (is_kvmppc_resume_guest(r));
 
 	/* save L2 state for return */
@@ -390,7 +389,7 @@ long kvmhv_enter_nested_guest(struct kvm_vcpu *vcpu)
 	delta_spurr = vcpu->arch.spurr - l2_hv.spurr;
 	delta_ic = vcpu->arch.ic - l2_hv.ic;
 	delta_vtb = vc->vtb - l2_hv.vtb;
-	save_hv_return_state(vcpu, vcpu->arch.trap, &l2_hv);
+	save_hv_return_state(vcpu, &l2_hv);
 
 	/* restore L1 state */
 	vcpu->arch.nested = NULL;
diff --git a/arch/powerpc/kvm/book3s_hv_rm_xics.c b/arch/powerpc/kvm/book3s_hv_rm_xics.c
index 0a11ec88a0ae..587c33fc4564 100644
--- a/arch/powerpc/kvm/book3s_hv_rm_xics.c
+++ b/arch/powerpc/kvm/book3s_hv_rm_xics.c
@@ -706,6 +706,7 @@ static int ics_rm_eoi(struct kvm_vcpu *vcpu, u32 irq)
 		icp->rm_eoied_irq = irq;
 	}
 
+	/* Handle passthrough interrupts */
 	if (state->host_irq) {
 		++vcpu->stat.pthru_all;
 		if (state->intr_cpu != -1) {
@@ -759,12 +760,12 @@ int xics_rm_h_eoi(struct kvm_vcpu *vcpu, unsigned long xirr)
 
 static unsigned long eoi_rc;
 
-static void icp_eoi(struct irq_chip *c, u32 hwirq, __be32 xirr, bool *again)
+static void icp_eoi(struct irq_data *d, u32 hwirq, __be32 xirr, bool *again)
 {
 	void __iomem *xics_phys;
 	int64_t rc;
 
-	rc = pnv_opal_pci_msi_eoi(c, hwirq);
+	rc = pnv_opal_pci_msi_eoi(d);
 
 	if (rc)
 		eoi_rc = rc;
@@ -872,8 +873,7 @@ long kvmppc_deliver_irq_passthru(struct kvm_vcpu *vcpu,
 		icp_rm_deliver_irq(xics, icp, irq, false);
 
 	/* EOI the interrupt */
-	icp_eoi(irq_desc_get_chip(irq_map->desc), irq_map->r_hwirq, xirr,
-		again);
+	icp_eoi(irq_desc_get_irq_data(irq_map->desc), irq_map->r_hwirq, xirr, again);
 
 	if (check_too_hard(xics, icp) == H_TOO_HARD)
 		return 2;
diff --git a/arch/powerpc/kvm/book3s_hv_rmhandlers.S b/arch/powerpc/kvm/book3s_hv_rmhandlers.S
index 8dd437d7a2c6..75079397c2a5 100644
--- a/arch/powerpc/kvm/book3s_hv_rmhandlers.S
+++ b/arch/powerpc/kvm/book3s_hv_rmhandlers.S
@@ -1088,12 +1088,6 @@ END_FTR_SECTION_IFSET(CPU_FTR_HAS_PPR)
 	cmpwi	r12, BOOK3S_INTERRUPT_H_INST_STORAGE
 	beq	kvmppc_hisi
 
-#ifdef CONFIG_PPC_TRANSACTIONAL_MEM
-	/* For softpatch interrupt, go off and do TM instruction emulation */
-	cmpwi	r12, BOOK3S_INTERRUPT_HV_SOFTPATCH
-	beq	kvmppc_tm_emul
-#endif
-
 	/* See if this is a leftover HDEC interrupt */
 	cmpwi	r12,BOOK3S_INTERRUPT_HV_DECREMENTER
 	bne	2f
@@ -1599,42 +1593,6 @@ maybe_reenter_guest:
 	blt	deliver_guest_interrupt
 	b	guest_exit_cont
 
-#ifdef CONFIG_PPC_TRANSACTIONAL_MEM
-/*
- * Softpatch interrupt for transactional memory emulation cases
- * on POWER9 DD2.2.  This is early in the guest exit path - we
- * haven't saved registers or done a treclaim yet.
- */
-kvmppc_tm_emul:
-	/* Save instruction image in HEIR */
-	mfspr	r3, SPRN_HEIR
-	stw	r3, VCPU_HEIR(r9)
-
-	/*
-	 * The cases we want to handle here are those where the guest
-	 * is in real suspend mode and is trying to transition to
-	 * transactional mode.
-	 */
-	lbz	r0, HSTATE_FAKE_SUSPEND(r13)
-	cmpwi	r0, 0		/* keep exiting guest if in fake suspend */
-	bne	guest_exit_cont
-	rldicl	r3, r11, 64 - MSR_TS_S_LG, 62
-	cmpwi	r3, 1		/* or if not in suspend state */
-	bne	guest_exit_cont
-
-	/* Call C code to do the emulation */
-	mr	r3, r9
-	bl	kvmhv_p9_tm_emulation_early
-	nop
-	ld	r9, HSTATE_KVM_VCPU(r13)
-	li	r12, BOOK3S_INTERRUPT_HV_SOFTPATCH
-	cmpwi	r3, 0
-	beq	guest_exit_cont		/* continue exiting if not handled */
-	ld	r10, VCPU_PC(r9)
-	ld	r11, VCPU_MSR(r9)
-	b	fast_interrupt_c_return	/* go back to guest if handled */
-#endif /* CONFIG_PPC_TRANSACTIONAL_MEM */
-
 /*
  * Check whether an HDSI is an HPTE not found fault or something else.
  * If it is an HPTE not found fault that is due to the guest accessing
diff --git a/arch/powerpc/kvm/book3s_hv_tm.c b/arch/powerpc/kvm/book3s_hv_tm.c
index cc90b8b82329..866cadd70094 100644
--- a/arch/powerpc/kvm/book3s_hv_tm.c
+++ b/arch/powerpc/kvm/book3s_hv_tm.c
@@ -47,6 +47,15 @@ int kvmhv_p9_tm_emulation(struct kvm_vcpu *vcpu)
 	int ra, rs;
 
 	/*
+	 * The TM softpatch interrupt sets NIP to the instruction following
+	 * the faulting instruction, which is not executed. Rewind nip to the
+	 * faulting instruction so it looks like a normal synchronous
+	 * interrupt, then update nip in the places where the instruction is
+	 * emulated.
+	 */
+	vcpu->arch.regs.nip -= 4;
+
+	/*
 	 * rfid, rfebb, and mtmsrd encode bit 31 = 0 since it's a reserved bit
 	 * in these instructions, so masking bit 31 out doesn't change these
 	 * instructions. For treclaim., tsr., and trechkpt. instructions if bit
@@ -67,7 +76,7 @@ int kvmhv_p9_tm_emulation(struct kvm_vcpu *vcpu)
 			       (newmsr & MSR_TM)));
 		newmsr = sanitize_msr(newmsr);
 		vcpu->arch.shregs.msr = newmsr;
-		vcpu->arch.cfar = vcpu->arch.regs.nip - 4;
+		vcpu->arch.cfar = vcpu->arch.regs.nip;
 		vcpu->arch.regs.nip = vcpu->arch.shregs.srr0;
 		return RESUME_GUEST;
 
@@ -79,14 +88,15 @@ int kvmhv_p9_tm_emulation(struct kvm_vcpu *vcpu)
 		}
 		/* check EBB facility is available */
 		if (!(vcpu->arch.hfscr & HFSCR_EBB)) {
-			/* generate an illegal instruction interrupt */
-			kvmppc_core_queue_program(vcpu, SRR1_PROGILL);
-			return RESUME_GUEST;
+			vcpu->arch.hfscr &= ~HFSCR_INTR_CAUSE;
+			vcpu->arch.hfscr |= (u64)FSCR_EBB_LG << 56;
+			vcpu->arch.trap = BOOK3S_INTERRUPT_H_FAC_UNAVAIL;
+			return -1; /* rerun host interrupt handler */
 		}
 		if ((msr & MSR_PR) && !(vcpu->arch.fscr & FSCR_EBB)) {
 			/* generate a facility unavailable interrupt */
-			vcpu->arch.fscr = (vcpu->arch.fscr & ~(0xffull << 56)) |
-				((u64)FSCR_EBB_LG << 56);
+			vcpu->arch.fscr &= ~FSCR_INTR_CAUSE;
+			vcpu->arch.fscr |= (u64)FSCR_EBB_LG << 56;
 			kvmppc_book3s_queue_irqprio(vcpu, BOOK3S_INTERRUPT_FAC_UNAVAIL);
 			return RESUME_GUEST;
 		}
@@ -100,7 +110,7 @@ int kvmhv_p9_tm_emulation(struct kvm_vcpu *vcpu)
 		vcpu->arch.bescr = bescr;
 		msr = (msr & ~MSR_TS_MASK) | MSR_TS_T;
 		vcpu->arch.shregs.msr = msr;
-		vcpu->arch.cfar = vcpu->arch.regs.nip - 4;
+		vcpu->arch.cfar = vcpu->arch.regs.nip;
 		vcpu->arch.regs.nip = vcpu->arch.ebbrr;
 		return RESUME_GUEST;
 
@@ -116,6 +126,7 @@ int kvmhv_p9_tm_emulation(struct kvm_vcpu *vcpu)
 		newmsr = (newmsr & ~MSR_LE) | (msr & MSR_LE);
 		newmsr = sanitize_msr(newmsr);
 		vcpu->arch.shregs.msr = newmsr;
+		vcpu->arch.regs.nip += 4;
 		return RESUME_GUEST;
 
 	/* ignore bit 31, see comment above */
@@ -128,14 +139,15 @@ int kvmhv_p9_tm_emulation(struct kvm_vcpu *vcpu)
 		}
 		/* check for TM disabled in the HFSCR or MSR */
 		if (!(vcpu->arch.hfscr & HFSCR_TM)) {
-			/* generate an illegal instruction interrupt */
-			kvmppc_core_queue_program(vcpu, SRR1_PROGILL);
-			return RESUME_GUEST;
+			vcpu->arch.hfscr &= ~HFSCR_INTR_CAUSE;
+			vcpu->arch.hfscr |= (u64)FSCR_TM_LG << 56;
+			vcpu->arch.trap = BOOK3S_INTERRUPT_H_FAC_UNAVAIL;
+			return -1; /* rerun host interrupt handler */
 		}
 		if (!(msr & MSR_TM)) {
 			/* generate a facility unavailable interrupt */
-			vcpu->arch.fscr = (vcpu->arch.fscr & ~(0xffull << 56)) |
-				((u64)FSCR_TM_LG << 56);
+			vcpu->arch.fscr &= ~FSCR_INTR_CAUSE;
+			vcpu->arch.fscr |= (u64)FSCR_TM_LG << 56;
 			kvmppc_book3s_queue_irqprio(vcpu,
 						BOOK3S_INTERRUPT_FAC_UNAVAIL);
 			return RESUME_GUEST;
@@ -152,20 +164,22 @@ int kvmhv_p9_tm_emulation(struct kvm_vcpu *vcpu)
 				msr = (msr & ~MSR_TS_MASK) | MSR_TS_S;
 		}
 		vcpu->arch.shregs.msr = msr;
+		vcpu->arch.regs.nip += 4;
 		return RESUME_GUEST;
 
 	/* ignore bit 31, see comment above */
 	case (PPC_INST_TRECLAIM & PO_XOP_OPCODE_MASK):
 		/* check for TM disabled in the HFSCR or MSR */
 		if (!(vcpu->arch.hfscr & HFSCR_TM)) {
-			/* generate an illegal instruction interrupt */
-			kvmppc_core_queue_program(vcpu, SRR1_PROGILL);
-			return RESUME_GUEST;
+			vcpu->arch.hfscr &= ~HFSCR_INTR_CAUSE;
+			vcpu->arch.hfscr |= (u64)FSCR_TM_LG << 56;
+			vcpu->arch.trap = BOOK3S_INTERRUPT_H_FAC_UNAVAIL;
+			return -1; /* rerun host interrupt handler */
 		}
 		if (!(msr & MSR_TM)) {
 			/* generate a facility unavailable interrupt */
-			vcpu->arch.fscr = (vcpu->arch.fscr & ~(0xffull << 56)) |
-				((u64)FSCR_TM_LG << 56);
+			vcpu->arch.fscr &= ~FSCR_INTR_CAUSE;
+			vcpu->arch.fscr |= (u64)FSCR_TM_LG << 56;
 			kvmppc_book3s_queue_irqprio(vcpu,
 						BOOK3S_INTERRUPT_FAC_UNAVAIL);
 			return RESUME_GUEST;
@@ -189,6 +203,7 @@ int kvmhv_p9_tm_emulation(struct kvm_vcpu *vcpu)
 		vcpu->arch.regs.ccr = (vcpu->arch.regs.ccr & 0x0fffffff) |
 			(((msr & MSR_TS_MASK) >> MSR_TS_S_LG) << 29);
 		vcpu->arch.shregs.msr &= ~MSR_TS_MASK;
+		vcpu->arch.regs.nip += 4;
 		return RESUME_GUEST;
 
 	/* ignore bit 31, see comment above */
@@ -196,14 +211,15 @@ int kvmhv_p9_tm_emulation(struct kvm_vcpu *vcpu)
 		/* XXX do we need to check for PR=0 here? */
 		/* check for TM disabled in the HFSCR or MSR */
 		if (!(vcpu->arch.hfscr & HFSCR_TM)) {
-			/* generate an illegal instruction interrupt */
-			kvmppc_core_queue_program(vcpu, SRR1_PROGILL);
-			return RESUME_GUEST;
+			vcpu->arch.hfscr &= ~HFSCR_INTR_CAUSE;
+			vcpu->arch.hfscr |= (u64)FSCR_TM_LG << 56;
+			vcpu->arch.trap = BOOK3S_INTERRUPT_H_FAC_UNAVAIL;
+			return -1; /* rerun host interrupt handler */
 		}
 		if (!(msr & MSR_TM)) {
 			/* generate a facility unavailable interrupt */
-			vcpu->arch.fscr = (vcpu->arch.fscr & ~(0xffull << 56)) |
-				((u64)FSCR_TM_LG << 56);
+			vcpu->arch.fscr &= ~FSCR_INTR_CAUSE;
+			vcpu->arch.fscr |= (u64)FSCR_TM_LG << 56;
 			kvmppc_book3s_queue_irqprio(vcpu,
 						BOOK3S_INTERRUPT_FAC_UNAVAIL);
 			return RESUME_GUEST;
@@ -220,6 +236,7 @@ int kvmhv_p9_tm_emulation(struct kvm_vcpu *vcpu)
 		vcpu->arch.regs.ccr = (vcpu->arch.regs.ccr & 0x0fffffff) |
 			(((msr & MSR_TS_MASK) >> MSR_TS_S_LG) << 29);
 		vcpu->arch.shregs.msr = msr | MSR_TS_S;
+		vcpu->arch.regs.nip += 4;
 		return RESUME_GUEST;
 	}
 
diff --git a/arch/powerpc/kvm/book3s_xics.c b/arch/powerpc/kvm/book3s_xics.c
index 303e3cb096db..ebd5d920de8c 100644
--- a/arch/powerpc/kvm/book3s_xics.c
+++ b/arch/powerpc/kvm/book3s_xics.c
@@ -10,13 +10,13 @@
 #include <linux/gfp.h>
 #include <linux/anon_inodes.h>
 #include <linux/spinlock.h>
-
+#include <linux/debugfs.h>
 #include <linux/uaccess.h>
+
 #include <asm/kvm_book3s.h>
 #include <asm/kvm_ppc.h>
 #include <asm/hvcall.h>
 #include <asm/xics.h>
-#include <asm/debugfs.h>
 #include <asm/time.h>
 
 #include <linux/seq_file.h>
@@ -1024,7 +1024,7 @@ static void xics_debugfs_init(struct kvmppc_xics *xics)
 		return;
 	}
 
-	xics->dentry = debugfs_create_file(name, 0444, powerpc_debugfs_root,
+	xics->dentry = debugfs_create_file(name, 0444, arch_debugfs_dir,
 					   xics, &xics_debug_fops);
 
 	pr_debug("%s: created %s\n", __func__, name);
diff --git a/arch/powerpc/kvm/book3s_xive.c b/arch/powerpc/kvm/book3s_xive.c
index 8cfab3547494..a18db9e16ea4 100644
--- a/arch/powerpc/kvm/book3s_xive.c
+++ b/arch/powerpc/kvm/book3s_xive.c
@@ -22,7 +22,6 @@
 #include <asm/xive.h>
 #include <asm/xive-regs.h>
 #include <asm/debug.h>
-#include <asm/debugfs.h>
 #include <asm/time.h>
 #include <asm/opal.h>
 
@@ -59,6 +58,25 @@
  */
 #define XIVE_Q_GAP	2
 
+static bool kvmppc_xive_vcpu_has_save_restore(struct kvm_vcpu *vcpu)
+{
+	struct kvmppc_xive_vcpu *xc = vcpu->arch.xive_vcpu;
+
+	/* Check enablement at VP level */
+	return xc->vp_cam & TM_QW1W2_HO;
+}
+
+bool kvmppc_xive_check_save_restore(struct kvm_vcpu *vcpu)
+{
+	struct kvmppc_xive_vcpu *xc = vcpu->arch.xive_vcpu;
+	struct kvmppc_xive *xive = xc->xive;
+
+	if (xive->flags & KVMPPC_XIVE_FLAG_SAVE_RESTORE)
+		return kvmppc_xive_vcpu_has_save_restore(vcpu);
+
+	return true;
+}
+
 /*
  * Push a vcpu's context to the XIVE on guest entry.
  * This assumes we are in virtual mode (MMU on)
@@ -77,7 +95,8 @@ void kvmppc_xive_push_vcpu(struct kvm_vcpu *vcpu)
 		return;
 
 	eieio();
-	__raw_writeq(vcpu->arch.xive_saved_state.w01, tima + TM_QW1_OS);
+	if (!kvmppc_xive_vcpu_has_save_restore(vcpu))
+		__raw_writeq(vcpu->arch.xive_saved_state.w01, tima + TM_QW1_OS);
 	__raw_writel(vcpu->arch.xive_cam_word, tima + TM_QW1_OS + TM_WORD2);
 	vcpu->arch.xive_pushed = 1;
 	eieio();
@@ -149,7 +168,8 @@ void kvmppc_xive_pull_vcpu(struct kvm_vcpu *vcpu)
 	/* First load to pull the context, we ignore the value */
 	__raw_readl(tima + TM_SPC_PULL_OS_CTX);
 	/* Second load to recover the context state (Words 0 and 1) */
-	vcpu->arch.xive_saved_state.w01 = __raw_readq(tima + TM_QW1_OS);
+	if (!kvmppc_xive_vcpu_has_save_restore(vcpu))
+		vcpu->arch.xive_saved_state.w01 = __raw_readq(tima + TM_QW1_OS);
 
 	/* Fixup some of the state for the next load */
 	vcpu->arch.xive_saved_state.lsmfb = 0;
@@ -363,9 +383,9 @@ static int xive_check_provisioning(struct kvm *kvm, u8 prio)
 		if (!vcpu->arch.xive_vcpu)
 			continue;
 		rc = xive_provision_queue(vcpu, prio);
-		if (rc == 0 && !xive->single_escalation)
+		if (rc == 0 && !kvmppc_xive_has_single_escalation(xive))
 			kvmppc_xive_attach_escalation(vcpu, prio,
-						      xive->single_escalation);
+						      kvmppc_xive_has_single_escalation(xive));
 		if (rc)
 			return rc;
 	}
@@ -922,13 +942,13 @@ int kvmppc_xive_set_icp(struct kvm_vcpu *vcpu, u64 icpval)
 }
 
 int kvmppc_xive_set_mapped(struct kvm *kvm, unsigned long guest_irq,
-			   struct irq_desc *host_desc)
+			   unsigned long host_irq)
 {
 	struct kvmppc_xive *xive = kvm->arch.xive;
 	struct kvmppc_xive_src_block *sb;
 	struct kvmppc_xive_irq_state *state;
-	struct irq_data *host_data = irq_desc_get_irq_data(host_desc);
-	unsigned int host_irq = irq_desc_get_irq(host_desc);
+	struct irq_data *host_data =
+		irq_domain_get_irq_data(irq_get_default_host(), host_irq);
 	unsigned int hw_irq = (unsigned int)irqd_to_hwirq(host_data);
 	u16 idx;
 	u8 prio;
@@ -937,7 +957,8 @@ int kvmppc_xive_set_mapped(struct kvm *kvm, unsigned long guest_irq,
 	if (!xive)
 		return -ENODEV;
 
-	pr_devel("set_mapped girq 0x%lx host HW irq 0x%x...\n",guest_irq, hw_irq);
+	pr_debug("%s: GIRQ 0x%lx host IRQ %ld XIVE HW IRQ 0x%x\n",
+		 __func__, guest_irq, host_irq, hw_irq);
 
 	sb = kvmppc_xive_find_source(xive, guest_irq, &idx);
 	if (!sb)
@@ -959,7 +980,7 @@ int kvmppc_xive_set_mapped(struct kvm *kvm, unsigned long guest_irq,
 	 */
 	rc = irq_set_vcpu_affinity(host_irq, state);
 	if (rc) {
-		pr_err("Failed to set VCPU affinity for irq %d\n", host_irq);
+		pr_err("Failed to set VCPU affinity for host IRQ %ld\n", host_irq);
 		return rc;
 	}
 
@@ -1019,12 +1040,11 @@ int kvmppc_xive_set_mapped(struct kvm *kvm, unsigned long guest_irq,
 EXPORT_SYMBOL_GPL(kvmppc_xive_set_mapped);
 
 int kvmppc_xive_clr_mapped(struct kvm *kvm, unsigned long guest_irq,
-			   struct irq_desc *host_desc)
+			   unsigned long host_irq)
 {
 	struct kvmppc_xive *xive = kvm->arch.xive;
 	struct kvmppc_xive_src_block *sb;
 	struct kvmppc_xive_irq_state *state;
-	unsigned int host_irq = irq_desc_get_irq(host_desc);
 	u16 idx;
 	u8 prio;
 	int rc;
@@ -1032,7 +1052,7 @@ int kvmppc_xive_clr_mapped(struct kvm *kvm, unsigned long guest_irq,
 	if (!xive)
 		return -ENODEV;
 
-	pr_devel("clr_mapped girq 0x%lx...\n", guest_irq);
+	pr_debug("%s: GIRQ 0x%lx host IRQ %ld\n", __func__, guest_irq, host_irq);
 
 	sb = kvmppc_xive_find_source(xive, guest_irq, &idx);
 	if (!sb)
@@ -1059,7 +1079,7 @@ int kvmppc_xive_clr_mapped(struct kvm *kvm, unsigned long guest_irq,
 	/* Release the passed-through interrupt to the host */
 	rc = irq_set_vcpu_affinity(host_irq, NULL);
 	if (rc) {
-		pr_err("Failed to clr VCPU affinity for irq %d\n", host_irq);
+		pr_err("Failed to clr VCPU affinity for host IRQ %ld\n", host_irq);
 		return rc;
 	}
 
@@ -1199,7 +1219,7 @@ void kvmppc_xive_cleanup_vcpu(struct kvm_vcpu *vcpu)
 	/* Free escalations */
 	for (i = 0; i < KVMPPC_XIVE_Q_COUNT; i++) {
 		if (xc->esc_virq[i]) {
-			if (xc->xive->single_escalation)
+			if (kvmppc_xive_has_single_escalation(xc->xive))
 				xive_cleanup_single_escalation(vcpu, xc,
 							xc->esc_virq[i]);
 			free_irq(xc->esc_virq[i], vcpu);
@@ -1319,6 +1339,12 @@ int kvmppc_xive_connect_vcpu(struct kvm_device *dev,
 	if (r)
 		goto bail;
 
+	if (!kvmppc_xive_check_save_restore(vcpu)) {
+		pr_err("inconsistent save-restore setup for VCPU %d\n", cpu);
+		r = -EIO;
+		goto bail;
+	}
+
 	/* Configure VCPU fields for use by assembly push/pull */
 	vcpu->arch.xive_saved_state.w01 = cpu_to_be64(0xff000000);
 	vcpu->arch.xive_cam_word = cpu_to_be32(xc->vp_cam | TM_QW1W2_VO);
@@ -1340,7 +1366,7 @@ int kvmppc_xive_connect_vcpu(struct kvm_device *dev,
 	 * Enable the VP first as the single escalation mode will
 	 * affect escalation interrupts numbering
 	 */
-	r = xive_native_enable_vp(xc->vp_id, xive->single_escalation);
+	r = xive_native_enable_vp(xc->vp_id, kvmppc_xive_has_single_escalation(xive));
 	if (r) {
 		pr_err("Failed to enable VP in OPAL, err %d\n", r);
 		goto bail;
@@ -1357,15 +1383,15 @@ int kvmppc_xive_connect_vcpu(struct kvm_device *dev,
 		struct xive_q *q = &xc->queues[i];
 
 		/* Single escalation, no queue 7 */
-		if (i == 7 && xive->single_escalation)
+		if (i == 7 && kvmppc_xive_has_single_escalation(xive))
 			break;
 
 		/* Is queue already enabled ? Provision it */
 		if (xive->qmap & (1 << i)) {
 			r = xive_provision_queue(vcpu, i);
-			if (r == 0 && !xive->single_escalation)
+			if (r == 0 && !kvmppc_xive_has_single_escalation(xive))
 				kvmppc_xive_attach_escalation(
-					vcpu, i, xive->single_escalation);
+					vcpu, i, kvmppc_xive_has_single_escalation(xive));
 			if (r)
 				goto bail;
 		} else {
@@ -1380,7 +1406,7 @@ int kvmppc_xive_connect_vcpu(struct kvm_device *dev,
 	}
 
 	/* If not done above, attach priority 0 escalation */
-	r = kvmppc_xive_attach_escalation(vcpu, 0, xive->single_escalation);
+	r = kvmppc_xive_attach_escalation(vcpu, 0, kvmppc_xive_has_single_escalation(xive));
 	if (r)
 		goto bail;
 
@@ -2135,7 +2161,11 @@ static int kvmppc_xive_create(struct kvm_device *dev, u32 type)
 	 */
 	xive->nr_servers = KVM_MAX_VCPUS;
 
-	xive->single_escalation = xive_native_has_single_escalation();
+	if (xive_native_has_single_escalation())
+		xive->flags |= KVMPPC_XIVE_FLAG_SINGLE_ESCALATION;
+
+	if (xive_native_has_save_restore())
+		xive->flags |= KVMPPC_XIVE_FLAG_SAVE_RESTORE;
 
 	kvm->arch.xive = xive;
 	return 0;
@@ -2329,7 +2359,7 @@ static void xive_debugfs_init(struct kvmppc_xive *xive)
 		return;
 	}
 
-	xive->dentry = debugfs_create_file(name, S_IRUGO, powerpc_debugfs_root,
+	xive->dentry = debugfs_create_file(name, S_IRUGO, arch_debugfs_dir,
 					   xive, &xive_debug_fops);
 
 	pr_debug("%s: created %s\n", __func__, name);
diff --git a/arch/powerpc/kvm/book3s_xive.h b/arch/powerpc/kvm/book3s_xive.h
index afe9eeac6d56..e6a9651c6f1e 100644
--- a/arch/powerpc/kvm/book3s_xive.h
+++ b/arch/powerpc/kvm/book3s_xive.h
@@ -97,6 +97,9 @@ struct kvmppc_xive_ops {
 	int (*reset_mapped)(struct kvm *kvm, unsigned long guest_irq);
 };
 
+#define KVMPPC_XIVE_FLAG_SINGLE_ESCALATION 0x1
+#define KVMPPC_XIVE_FLAG_SAVE_RESTORE 0x2
+
 struct kvmppc_xive {
 	struct kvm *kvm;
 	struct kvm_device *dev;
@@ -133,7 +136,7 @@ struct kvmppc_xive {
 	u32	q_page_order;
 
 	/* Flags */
-	u8	single_escalation;
+	u8	flags;
 
 	/* Number of entries in the VP block */
 	u32	nr_servers;
@@ -307,6 +310,12 @@ void xive_cleanup_single_escalation(struct kvm_vcpu *vcpu,
 				    struct kvmppc_xive_vcpu *xc, int irq);
 int kvmppc_xive_compute_vp_id(struct kvmppc_xive *xive, u32 cpu, u32 *vp);
 int kvmppc_xive_set_nr_servers(struct kvmppc_xive *xive, u64 addr);
+bool kvmppc_xive_check_save_restore(struct kvm_vcpu *vcpu);
+
+static inline bool kvmppc_xive_has_single_escalation(struct kvmppc_xive *xive)
+{
+	return xive->flags & KVMPPC_XIVE_FLAG_SINGLE_ESCALATION;
+}
 
 #endif /* CONFIG_KVM_XICS */
 #endif /* _KVM_PPC_BOOK3S_XICS_H */
diff --git a/arch/powerpc/kvm/book3s_xive_native.c b/arch/powerpc/kvm/book3s_xive_native.c
index 573ecaab3597..99db9ac49901 100644
--- a/arch/powerpc/kvm/book3s_xive_native.c
+++ b/arch/powerpc/kvm/book3s_xive_native.c
@@ -20,7 +20,6 @@
 #include <asm/xive.h>
 #include <asm/xive-regs.h>
 #include <asm/debug.h>
-#include <asm/debugfs.h>
 #include <asm/opal.h>
 
 #include <linux/debugfs.h>
@@ -93,7 +92,7 @@ void kvmppc_xive_native_cleanup_vcpu(struct kvm_vcpu *vcpu)
 	for (i = 0; i < KVMPPC_XIVE_Q_COUNT; i++) {
 		/* Free the escalation irq */
 		if (xc->esc_virq[i]) {
-			if (xc->xive->single_escalation)
+			if (kvmppc_xive_has_single_escalation(xc->xive))
 				xive_cleanup_single_escalation(vcpu, xc,
 							xc->esc_virq[i]);
 			free_irq(xc->esc_virq[i], vcpu);
@@ -168,11 +167,17 @@ int kvmppc_xive_native_connect_vcpu(struct kvm_device *dev,
 		goto bail;
 	}
 
+	if (!kvmppc_xive_check_save_restore(vcpu)) {
+		pr_err("inconsistent save-restore setup for VCPU %d\n", server_num);
+		rc = -EIO;
+		goto bail;
+	}
+
 	/*
 	 * Enable the VP first as the single escalation mode will
 	 * affect escalation interrupts numbering
 	 */
-	rc = xive_native_enable_vp(xc->vp_id, xive->single_escalation);
+	rc = xive_native_enable_vp(xc->vp_id, kvmppc_xive_has_single_escalation(xive));
 	if (rc) {
 		pr_err("Failed to enable VP in OPAL: %d\n", rc);
 		goto bail;
@@ -693,7 +698,7 @@ static int kvmppc_xive_native_set_queue_config(struct kvmppc_xive *xive,
 	}
 
 	rc = kvmppc_xive_attach_escalation(vcpu, priority,
-					   xive->single_escalation);
+					   kvmppc_xive_has_single_escalation(xive));
 error:
 	if (rc)
 		kvmppc_xive_native_cleanup_queue(vcpu, priority);
@@ -820,7 +825,7 @@ static int kvmppc_xive_reset(struct kvmppc_xive *xive)
 		for (prio = 0; prio < KVMPPC_XIVE_Q_COUNT; prio++) {
 
 			/* Single escalation, no queue 7 */
-			if (prio == 7 && xive->single_escalation)
+			if (prio == 7 && kvmppc_xive_has_single_escalation(xive))
 				break;
 
 			if (xc->esc_virq[prio]) {
@@ -1111,7 +1116,12 @@ static int kvmppc_xive_native_create(struct kvm_device *dev, u32 type)
 	 */
 	xive->nr_servers = KVM_MAX_VCPUS;
 
-	xive->single_escalation = xive_native_has_single_escalation();
+	if (xive_native_has_single_escalation())
+		xive->flags |= KVMPPC_XIVE_FLAG_SINGLE_ESCALATION;
+
+	if (xive_native_has_save_restore())
+		xive->flags |= KVMPPC_XIVE_FLAG_SAVE_RESTORE;
+
 	xive->ops = &kvmppc_xive_native_ops;
 
 	kvm->arch.xive = xive;
@@ -1257,7 +1267,7 @@ static void xive_native_debugfs_init(struct kvmppc_xive *xive)
 		return;
 	}
 
-	xive->dentry = debugfs_create_file(name, 0444, powerpc_debugfs_root,
+	xive->dentry = debugfs_create_file(name, 0444, arch_debugfs_dir,
 					   xive, &xive_native_debug_fops);
 
 	pr_debug("%s: created %s\n", __func__, name);
diff --git a/arch/powerpc/kvm/booke.c b/arch/powerpc/kvm/booke.c
index 551b30d84aee..977801c83aff 100644
--- a/arch/powerpc/kvm/booke.c
+++ b/arch/powerpc/kvm/booke.c
@@ -41,8 +41,6 @@ const struct _kvm_stats_desc kvm_vm_stats_desc[] = {
 	STATS_DESC_ICOUNTER(VM, num_2M_pages),
 	STATS_DESC_ICOUNTER(VM, num_1G_pages)
 };
-static_assert(ARRAY_SIZE(kvm_vm_stats_desc) ==
-		sizeof(struct kvm_vm_stat) / sizeof(u64));
 
 const struct kvm_stats_header kvm_vm_stats_header = {
 	.name_size = KVM_STATS_NAME_SIZE,
@@ -69,7 +67,6 @@ const struct _kvm_stats_desc kvm_vcpu_stats_desc[] = {
 	STATS_DESC_COUNTER(VCPU, emulated_inst_exits),
 	STATS_DESC_COUNTER(VCPU, dec_exits),
 	STATS_DESC_COUNTER(VCPU, ext_intr_exits),
-	STATS_DESC_TIME_NSEC(VCPU, halt_wait_ns),
 	STATS_DESC_COUNTER(VCPU, halt_successful_wait),
 	STATS_DESC_COUNTER(VCPU, dbell_exits),
 	STATS_DESC_COUNTER(VCPU, gdbell_exits),
@@ -79,8 +76,6 @@ const struct _kvm_stats_desc kvm_vcpu_stats_desc[] = {
 	STATS_DESC_COUNTER(VCPU, pthru_host),
 	STATS_DESC_COUNTER(VCPU, pthru_bad_aff)
 };
-static_assert(ARRAY_SIZE(kvm_vcpu_stats_desc) ==
-		sizeof(struct kvm_vcpu_stat) / sizeof(u64));
 
 const struct kvm_stats_header kvm_vcpu_stats_header = {
 	.name_size = KVM_STATS_NAME_SIZE,
diff --git a/arch/powerpc/mm/Makefile b/arch/powerpc/mm/Makefile
index eae4ec2988fc..df8172da2301 100644
--- a/arch/powerpc/mm/Makefile
+++ b/arch/powerpc/mm/Makefile
@@ -18,5 +18,5 @@ obj-$(CONFIG_PPC_MM_SLICES)	+= slice.o
 obj-$(CONFIG_HUGETLB_PAGE)	+= hugetlbpage.o
 obj-$(CONFIG_NOT_COHERENT_CACHE) += dma-noncoherent.o
 obj-$(CONFIG_PPC_COPRO_BASE)	+= copro_fault.o
-obj-$(CONFIG_PPC_PTDUMP)	+= ptdump/
+obj-$(CONFIG_PTDUMP_CORE)	+= ptdump/
 obj-$(CONFIG_KASAN)		+= kasan/
diff --git a/arch/powerpc/mm/book3s64/hash_native.c b/arch/powerpc/mm/book3s64/hash_native.c
index 52e170bd95ae..d8279bfe68ea 100644
--- a/arch/powerpc/mm/book3s64/hash_native.c
+++ b/arch/powerpc/mm/book3s64/hash_native.c
@@ -787,7 +787,7 @@ static void hpte_decode(struct hash_pte *hpte, unsigned long slot,
  * TODO: add batching support when enabled.  remember, no dynamic memory here,
  * although there is the control page available...
  */
-static void native_hpte_clear(void)
+static notrace void native_hpte_clear(void)
 {
 	unsigned long vpn = 0;
 	unsigned long slot, slots;
diff --git a/arch/powerpc/mm/book3s64/hash_utils.c b/arch/powerpc/mm/book3s64/hash_utils.c
index ac5720371c0d..c145776d3ae5 100644
--- a/arch/powerpc/mm/book3s64/hash_utils.c
+++ b/arch/powerpc/mm/book3s64/hash_utils.c
@@ -36,8 +36,8 @@
 #include <linux/hugetlb.h>
 #include <linux/cpu.h>
 #include <linux/pgtable.h>
+#include <linux/debugfs.h>
 
-#include <asm/debugfs.h>
 #include <asm/interrupt.h>
 #include <asm/processor.h>
 #include <asm/mmu.h>
@@ -2072,7 +2072,7 @@ DEFINE_DEBUGFS_ATTRIBUTE(fops_hpt_order, hpt_order_get, hpt_order_set, "%llu\n")
 
 static int __init hash64_debugfs(void)
 {
-	debugfs_create_file("hpt_order", 0600, powerpc_debugfs_root, NULL,
+	debugfs_create_file("hpt_order", 0600, arch_debugfs_dir, NULL,
 			    &fops_hpt_order);
 	return 0;
 }
diff --git a/arch/powerpc/mm/book3s64/pgtable.c b/arch/powerpc/mm/book3s64/pgtable.c
index 9ffa65074cb0..9e16c7b1a6c5 100644
--- a/arch/powerpc/mm/book3s64/pgtable.c
+++ b/arch/powerpc/mm/book3s64/pgtable.c
@@ -6,9 +6,9 @@
 #include <linux/sched.h>
 #include <linux/mm_types.h>
 #include <linux/memblock.h>
+#include <linux/debugfs.h>
 #include <misc/cxl-base.h>
 
-#include <asm/debugfs.h>
 #include <asm/pgalloc.h>
 #include <asm/tlb.h>
 #include <asm/trace.h>
@@ -172,8 +172,8 @@ pmd_t pmd_modify(pmd_t pmd, pgprot_t newprot)
 }
 #endif /* CONFIG_TRANSPARENT_HUGEPAGE */
 
-/* For use by kexec */
-void mmu_cleanup_all(void)
+/* For use by kexec, called with MMU off */
+notrace void mmu_cleanup_all(void)
 {
 	if (radix_enabled())
 		radix__mmu_cleanup_all();
@@ -520,7 +520,7 @@ static int __init pgtable_debugfs_setup(void)
 	 * invalidated as expected.
 	 */
 	debugfs_create_bool("tlbie_enabled", 0600,
-			powerpc_debugfs_root,
+			arch_debugfs_dir,
 			&tlbie_enabled);
 
 	return 0;
diff --git a/arch/powerpc/mm/book3s64/radix_pgtable.c b/arch/powerpc/mm/book3s64/radix_pgtable.c
index e50ddf129c15..ae20add7954a 100644
--- a/arch/powerpc/mm/book3s64/radix_pgtable.c
+++ b/arch/powerpc/mm/book3s64/radix_pgtable.c
@@ -679,7 +679,8 @@ void radix__early_init_mmu_secondary(void)
 	mtspr(SPRN_UAMOR, 0);
 }
 
-void radix__mmu_cleanup_all(void)
+/* Called during kexec sequence with MMU off */
+notrace void radix__mmu_cleanup_all(void)
 {
 	unsigned long lpcr;
 
diff --git a/arch/powerpc/mm/book3s64/radix_tlb.c b/arch/powerpc/mm/book3s64/radix_tlb.c
index aefc100d79a7..7724af19ed7e 100644
--- a/arch/powerpc/mm/book3s64/radix_tlb.c
+++ b/arch/powerpc/mm/book3s64/radix_tlb.c
@@ -10,6 +10,7 @@
 #include <linux/memblock.h>
 #include <linux/mmu_context.h>
 #include <linux/sched/mm.h>
+#include <linux/debugfs.h>
 
 #include <asm/ppc-opcode.h>
 #include <asm/tlb.h>
@@ -1106,8 +1107,8 @@ EXPORT_SYMBOL(radix__flush_tlb_kernel_range);
  * invalidating a full PID, so it has a far lower threshold to change from
  * individual page flushes to full-pid flushes.
  */
-static unsigned long tlb_single_page_flush_ceiling __read_mostly = 33;
-static unsigned long tlb_local_single_page_flush_ceiling __read_mostly = POWER9_TLB_SETS_RADIX * 2;
+static u32 tlb_single_page_flush_ceiling __read_mostly = 33;
+static u32 tlb_local_single_page_flush_ceiling __read_mostly = POWER9_TLB_SETS_RADIX * 2;
 
 static inline void __radix__flush_tlb_range(struct mm_struct *mm,
 					    unsigned long start, unsigned long end)
@@ -1524,3 +1525,14 @@ void do_h_rpt_invalidate_prt(unsigned long pid, unsigned long lpid,
 EXPORT_SYMBOL_GPL(do_h_rpt_invalidate_prt);
 
 #endif /* CONFIG_KVM_BOOK3S_HV_POSSIBLE */
+
+static int __init create_tlb_single_page_flush_ceiling(void)
+{
+	debugfs_create_u32("tlb_single_page_flush_ceiling", 0600,
+			   arch_debugfs_dir, &tlb_single_page_flush_ceiling);
+	debugfs_create_u32("tlb_local_single_page_flush_ceiling", 0600,
+			   arch_debugfs_dir, &tlb_local_single_page_flush_ceiling);
+	return 0;
+}
+late_initcall(create_tlb_single_page_flush_ceiling);
+
diff --git a/arch/powerpc/mm/book3s64/slb.c b/arch/powerpc/mm/book3s64/slb.c
index c91bd85eb90e..f0037bcc47a0 100644
--- a/arch/powerpc/mm/book3s64/slb.c
+++ b/arch/powerpc/mm/book3s64/slb.c
@@ -822,7 +822,7 @@ DEFINE_INTERRUPT_HANDLER_RAW(do_slb_fault)
 	/* IRQs are not reconciled here, so can't check irqs_disabled */
 	VM_WARN_ON(mfmsr() & MSR_EE);
 
-	if (unlikely(!(regs->msr & MSR_RI)))
+	if (regs_is_unrecoverable(regs))
 		return -EINVAL;
 
 	/*
diff --git a/arch/powerpc/mm/drmem.c b/arch/powerpc/mm/drmem.c
index 9af3832c9d8d..22197b18d85e 100644
--- a/arch/powerpc/mm/drmem.c
+++ b/arch/powerpc/mm/drmem.c
@@ -18,6 +18,7 @@ static int n_root_addr_cells, n_root_size_cells;
 
 static struct drmem_lmb_info __drmem_info;
 struct drmem_lmb_info *drmem_info = &__drmem_info;
+static bool in_drmem_update;
 
 u64 drmem_lmb_memory_max(void)
 {
@@ -178,6 +179,11 @@ int drmem_update_dt(void)
 	if (!memory)
 		return -1;
 
+	/*
+	 * Set in_drmem_update to prevent the notifier callback to process the
+	 * DT property back since the change is coming from the LMB tree.
+	 */
+	in_drmem_update = true;
 	prop = of_find_property(memory, "ibm,dynamic-memory", NULL);
 	if (prop) {
 		rc = drmem_update_dt_v1(memory, prop);
@@ -186,6 +192,7 @@ int drmem_update_dt(void)
 		if (prop)
 			rc = drmem_update_dt_v2(memory, prop);
 	}
+	in_drmem_update = false;
 
 	of_node_put(memory);
 	return rc;
@@ -307,6 +314,45 @@ int __init walk_drmem_lmbs_early(unsigned long node, void *data,
 	return ret;
 }
 
+/*
+ * Update the LMB associativity index.
+ */
+static int update_lmb(struct drmem_lmb *updated_lmb,
+		      __maybe_unused const __be32 **usm,
+		      __maybe_unused void *data)
+{
+	struct drmem_lmb *lmb;
+
+	for_each_drmem_lmb(lmb) {
+		if (lmb->drc_index != updated_lmb->drc_index)
+			continue;
+
+		lmb->aa_index = updated_lmb->aa_index;
+		break;
+	}
+	return 0;
+}
+
+/*
+ * Update the LMB associativity index.
+ *
+ * This needs to be called when the hypervisor is updating the
+ * dynamic-reconfiguration-memory node property.
+ */
+void drmem_update_lmbs(struct property *prop)
+{
+	/*
+	 * Don't update the LMBs if triggered by the update done in
+	 * drmem_update_dt(), the LMB values have been used to the update the DT
+	 * property in that case.
+	 */
+	if (in_drmem_update)
+		return;
+	if (!strcmp(prop->name, "ibm,dynamic-memory"))
+		__walk_drmem_v1_lmbs(prop->value, NULL, NULL, update_lmb);
+	else if (!strcmp(prop->name, "ibm,dynamic-memory-v2"))
+		__walk_drmem_v2_lmbs(prop->value, NULL, NULL, update_lmb);
+}
 #endif
 
 static int init_drmem_lmb_size(struct device_node *dn)
diff --git a/arch/powerpc/mm/mem.c b/arch/powerpc/mm/mem.c
index ad198b439222..c3c4e31462ec 100644
--- a/arch/powerpc/mm/mem.c
+++ b/arch/powerpc/mm/mem.c
@@ -119,8 +119,7 @@ int __ref arch_add_memory(int nid, u64 start, u64 size,
 	return rc;
 }
 
-void __ref arch_remove_memory(int nid, u64 start, u64 size,
-			      struct vmem_altmap *altmap)
+void __ref arch_remove_memory(u64 start, u64 size, struct vmem_altmap *altmap)
 {
 	unsigned long start_pfn = start >> PAGE_SHIFT;
 	unsigned long nr_pages = size >> PAGE_SHIFT;
diff --git a/arch/powerpc/mm/mmu_decl.h b/arch/powerpc/mm/mmu_decl.h
index 7dac910c0b21..dd1cabc2ea0f 100644
--- a/arch/powerpc/mm/mmu_decl.h
+++ b/arch/powerpc/mm/mmu_decl.h
@@ -180,7 +180,7 @@ static inline void mmu_mark_rodata_ro(void) { }
 void __init mmu_mapin_immr(void);
 #endif
 
-#ifdef CONFIG_PPC_DEBUG_WX
+#ifdef CONFIG_DEBUG_WX
 void ptdump_check_wx(void);
 #else
 static inline void ptdump_check_wx(void) { }
diff --git a/arch/powerpc/mm/nohash/tlb_low.S b/arch/powerpc/mm/nohash/tlb_low.S
index 4613bf8e9aae..5add4a51e51f 100644
--- a/arch/powerpc/mm/nohash/tlb_low.S
+++ b/arch/powerpc/mm/nohash/tlb_low.S
@@ -199,7 +199,7 @@ END_FTR_SECTION_IFSET(CPU_FTR_476_DD2)
  * Touch enough instruction cache lines to ensure cache hits
  */
 1:	mflr	r9
-	bl	2f
+	bcl	20,31,$+4
 2:	mflr	r6
 	li	r7,32
 	PPC_ICBT(0,R6,R7)		/* touch next cache line */
@@ -414,7 +414,7 @@ _GLOBAL(loadcam_multi)
 	 * Set up temporary TLB entry that is the same as what we're
 	 * running from, but in AS=1.
 	 */
-	bl	1f
+	bcl	20,31,$+4
 1:	mflr	r6
 	tlbsx	0,r8
 	mfspr	r6,SPRN_MAS1
diff --git a/arch/powerpc/mm/numa.c b/arch/powerpc/mm/numa.c
index f2bf98bdcea2..6f14c8fb6359 100644
--- a/arch/powerpc/mm/numa.c
+++ b/arch/powerpc/mm/numa.c
@@ -40,9 +40,6 @@ static int numa_enabled = 1;
 
 static char *cmdline __initdata;
 
-static int numa_debug;
-#define dbg(args...) if (numa_debug) { printk(KERN_INFO args); }
-
 int numa_cpu_lookup_table[NR_CPUS];
 cpumask_var_t node_to_cpumask_map[MAX_NUMNODES];
 struct pglist_data *node_data[MAX_NUMNODES];
@@ -51,14 +48,22 @@ EXPORT_SYMBOL(numa_cpu_lookup_table);
 EXPORT_SYMBOL(node_to_cpumask_map);
 EXPORT_SYMBOL(node_data);
 
-static int min_common_depth;
+static int primary_domain_index;
 static int n_mem_addr_cells, n_mem_size_cells;
-static int form1_affinity;
+
+#define FORM0_AFFINITY 0
+#define FORM1_AFFINITY 1
+#define FORM2_AFFINITY 2
+static int affinity_form;
 
 #define MAX_DISTANCE_REF_POINTS 4
 static int distance_ref_points_depth;
 static const __be32 *distance_ref_points;
 static int distance_lookup_table[MAX_NUMNODES][MAX_DISTANCE_REF_POINTS];
+static int numa_distance_table[MAX_NUMNODES][MAX_NUMNODES] = {
+	[0 ... MAX_NUMNODES - 1] = { [0 ... MAX_NUMNODES - 1] = -1 }
+};
+static int numa_id_index_table[MAX_NUMNODES] = { [0 ... MAX_NUMNODES - 1] = NUMA_NO_NODE };
 
 /*
  * Allocate node_to_cpumask_map based on number of available nodes
@@ -79,7 +84,7 @@ static void __init setup_node_to_cpumask_map(void)
 		alloc_bootmem_cpumask_var(&node_to_cpumask_map[node]);
 
 	/* cpumask_of_node() will now work */
-	dbg("Node to cpumask map for %u nodes\n", nr_node_ids);
+	pr_debug("Node to cpumask map for %u nodes\n", nr_node_ids);
 }
 
 static int __init fake_numa_create_new_node(unsigned long end_pfn,
@@ -123,7 +128,7 @@ static int __init fake_numa_create_new_node(unsigned long end_pfn,
 		cmdline = p;
 		fake_nid++;
 		*nid = fake_nid;
-		dbg("created new fake_node with id %d\n", fake_nid);
+		pr_debug("created new fake_node with id %d\n", fake_nid);
 		return 1;
 	}
 	return 0;
@@ -137,33 +142,79 @@ static void reset_numa_cpu_lookup_table(void)
 		numa_cpu_lookup_table[cpu] = -1;
 }
 
-static void map_cpu_to_node(int cpu, int node)
+void map_cpu_to_node(int cpu, int node)
 {
 	update_numa_cpu_lookup_table(cpu, node);
 
-	dbg("adding cpu %d to node %d\n", cpu, node);
-
-	if (!(cpumask_test_cpu(cpu, node_to_cpumask_map[node])))
+	if (!(cpumask_test_cpu(cpu, node_to_cpumask_map[node]))) {
+		pr_debug("adding cpu %d to node %d\n", cpu, node);
 		cpumask_set_cpu(cpu, node_to_cpumask_map[node]);
+	}
 }
 
 #if defined(CONFIG_HOTPLUG_CPU) || defined(CONFIG_PPC_SPLPAR)
-static void unmap_cpu_from_node(unsigned long cpu)
+void unmap_cpu_from_node(unsigned long cpu)
 {
 	int node = numa_cpu_lookup_table[cpu];
 
-	dbg("removing cpu %lu from node %d\n", cpu, node);
-
 	if (cpumask_test_cpu(cpu, node_to_cpumask_map[node])) {
 		cpumask_clear_cpu(cpu, node_to_cpumask_map[node]);
+		pr_debug("removing cpu %lu from node %d\n", cpu, node);
 	} else {
-		printk(KERN_ERR "WARNING: cpu %lu not found in node %d\n",
-		       cpu, node);
+		pr_warn("Warning: cpu %lu not found in node %d\n", cpu, node);
 	}
 }
 #endif /* CONFIG_HOTPLUG_CPU || CONFIG_PPC_SPLPAR */
 
-int cpu_distance(__be32 *cpu1_assoc, __be32 *cpu2_assoc)
+static int __associativity_to_nid(const __be32 *associativity,
+				  int max_array_sz)
+{
+	int nid;
+	/*
+	 * primary_domain_index is 1 based array index.
+	 */
+	int index = primary_domain_index  - 1;
+
+	if (!numa_enabled || index >= max_array_sz)
+		return NUMA_NO_NODE;
+
+	nid = of_read_number(&associativity[index], 1);
+
+	/* POWER4 LPAR uses 0xffff as invalid node */
+	if (nid == 0xffff || nid >= nr_node_ids)
+		nid = NUMA_NO_NODE;
+	return nid;
+}
+/*
+ * Returns nid in the range [0..nr_node_ids], or -1 if no useful NUMA
+ * info is found.
+ */
+static int associativity_to_nid(const __be32 *associativity)
+{
+	int array_sz = of_read_number(associativity, 1);
+
+	/* Skip the first element in the associativity array */
+	return __associativity_to_nid((associativity + 1), array_sz);
+}
+
+static int __cpu_form2_relative_distance(__be32 *cpu1_assoc, __be32 *cpu2_assoc)
+{
+	int dist;
+	int node1, node2;
+
+	node1 = associativity_to_nid(cpu1_assoc);
+	node2 = associativity_to_nid(cpu2_assoc);
+
+	dist = numa_distance_table[node1][node2];
+	if (dist <= LOCAL_DISTANCE)
+		return 0;
+	else if (dist <= REMOTE_DISTANCE)
+		return 1;
+	else
+		return 2;
+}
+
+static int __cpu_form1_relative_distance(__be32 *cpu1_assoc, __be32 *cpu2_assoc)
 {
 	int dist = 0;
 
@@ -179,6 +230,15 @@ int cpu_distance(__be32 *cpu1_assoc, __be32 *cpu2_assoc)
 	return dist;
 }
 
+int cpu_relative_distance(__be32 *cpu1_assoc, __be32 *cpu2_assoc)
+{
+	/* We should not get called with FORM0 */
+	VM_WARN_ON(affinity_form == FORM0_AFFINITY);
+	if (affinity_form == FORM1_AFFINITY)
+		return __cpu_form1_relative_distance(cpu1_assoc, cpu2_assoc);
+	return __cpu_form2_relative_distance(cpu1_assoc, cpu2_assoc);
+}
+
 /* must hold reference to node during call */
 static const __be32 *of_get_associativity(struct device_node *dev)
 {
@@ -190,7 +250,9 @@ int __node_distance(int a, int b)
 	int i;
 	int distance = LOCAL_DISTANCE;
 
-	if (!form1_affinity)
+	if (affinity_form == FORM2_AFFINITY)
+		return numa_distance_table[a][b];
+	else if (affinity_form == FORM0_AFFINITY)
 		return ((a == b) ? LOCAL_DISTANCE : REMOTE_DISTANCE);
 
 	for (i = 0; i < distance_ref_points_depth; i++) {
@@ -205,52 +267,6 @@ int __node_distance(int a, int b)
 }
 EXPORT_SYMBOL(__node_distance);
 
-static void initialize_distance_lookup_table(int nid,
-		const __be32 *associativity)
-{
-	int i;
-
-	if (!form1_affinity)
-		return;
-
-	for (i = 0; i < distance_ref_points_depth; i++) {
-		const __be32 *entry;
-
-		entry = &associativity[be32_to_cpu(distance_ref_points[i]) - 1];
-		distance_lookup_table[nid][i] = of_read_number(entry, 1);
-	}
-}
-
-/*
- * Returns nid in the range [0..nr_node_ids], or -1 if no useful NUMA
- * info is found.
- */
-static int associativity_to_nid(const __be32 *associativity)
-{
-	int nid = NUMA_NO_NODE;
-
-	if (!numa_enabled)
-		goto out;
-
-	if (of_read_number(associativity, 1) >= min_common_depth)
-		nid = of_read_number(&associativity[min_common_depth], 1);
-
-	/* POWER4 LPAR uses 0xffff as invalid node */
-	if (nid == 0xffff || nid >= nr_node_ids)
-		nid = NUMA_NO_NODE;
-
-	if (nid > 0 &&
-		of_read_number(associativity, 1) >= distance_ref_points_depth) {
-		/*
-		 * Skip the length field and send start of associativity array
-		 */
-		initialize_distance_lookup_table(nid, associativity + 1);
-	}
-
-out:
-	return nid;
-}
-
 /* Returns the nid associated with the given device tree node,
  * or -1 if not found.
  */
@@ -284,11 +300,159 @@ int of_node_to_nid(struct device_node *device)
 }
 EXPORT_SYMBOL(of_node_to_nid);
 
-static int __init find_min_common_depth(void)
+static void __initialize_form1_numa_distance(const __be32 *associativity,
+					     int max_array_sz)
+{
+	int i, nid;
+
+	if (affinity_form != FORM1_AFFINITY)
+		return;
+
+	nid = __associativity_to_nid(associativity, max_array_sz);
+	if (nid != NUMA_NO_NODE) {
+		for (i = 0; i < distance_ref_points_depth; i++) {
+			const __be32 *entry;
+			int index = be32_to_cpu(distance_ref_points[i]) - 1;
+
+			/*
+			 * broken hierarchy, return with broken distance table
+			 */
+			if (WARN(index >= max_array_sz, "Broken ibm,associativity property"))
+				return;
+
+			entry = &associativity[index];
+			distance_lookup_table[nid][i] = of_read_number(entry, 1);
+		}
+	}
+}
+
+static void initialize_form1_numa_distance(const __be32 *associativity)
+{
+	int array_sz;
+
+	array_sz = of_read_number(associativity, 1);
+	/* Skip the first element in the associativity array */
+	__initialize_form1_numa_distance(associativity + 1, array_sz);
+}
+
+/*
+ * Used to update distance information w.r.t newly added node.
+ */
+void update_numa_distance(struct device_node *node)
 {
-	int depth;
+	int nid;
+
+	if (affinity_form == FORM0_AFFINITY)
+		return;
+	else if (affinity_form == FORM1_AFFINITY) {
+		const __be32 *associativity;
+
+		associativity = of_get_associativity(node);
+		if (!associativity)
+			return;
+
+		initialize_form1_numa_distance(associativity);
+		return;
+	}
+
+	/* FORM2 affinity  */
+	nid = of_node_to_nid_single(node);
+	if (nid == NUMA_NO_NODE)
+		return;
+
+	/*
+	 * With FORM2 we expect NUMA distance of all possible NUMA
+	 * nodes to be provided during boot.
+	 */
+	WARN(numa_distance_table[nid][nid] == -1,
+	     "NUMA distance details for node %d not provided\n", nid);
+}
+
+/*
+ * ibm,numa-lookup-index-table= {N, domainid1, domainid2, ..... domainidN}
+ * ibm,numa-distance-table = { N, 1, 2, 4, 5, 1, 6, .... N elements}
+ */
+static void initialize_form2_numa_distance_lookup_table(void)
+{
+	int i, j;
+	struct device_node *root;
+	const __u8 *numa_dist_table;
+	const __be32 *numa_lookup_index;
+	int numa_dist_table_length;
+	int max_numa_index, distance_index;
+
+	if (firmware_has_feature(FW_FEATURE_OPAL))
+		root = of_find_node_by_path("/ibm,opal");
+	else
+		root = of_find_node_by_path("/rtas");
+	if (!root)
+		root = of_find_node_by_path("/");
+
+	numa_lookup_index = of_get_property(root, "ibm,numa-lookup-index-table", NULL);
+	max_numa_index = of_read_number(&numa_lookup_index[0], 1);
+
+	/* first element of the array is the size and is encode-int */
+	numa_dist_table = of_get_property(root, "ibm,numa-distance-table", NULL);
+	numa_dist_table_length = of_read_number((const __be32 *)&numa_dist_table[0], 1);
+	/* Skip the size which is encoded int */
+	numa_dist_table += sizeof(__be32);
+
+	pr_debug("numa_dist_table_len = %d, numa_dist_indexes_len = %d\n",
+		 numa_dist_table_length, max_numa_index);
+
+	for (i = 0; i < max_numa_index; i++)
+		/* +1 skip the max_numa_index in the property */
+		numa_id_index_table[i] = of_read_number(&numa_lookup_index[i + 1], 1);
+
+
+	if (numa_dist_table_length != max_numa_index * max_numa_index) {
+		WARN(1, "Wrong NUMA distance information\n");
+		/* consider everybody else just remote. */
+		for (i = 0;  i < max_numa_index; i++) {
+			for (j = 0; j < max_numa_index; j++) {
+				int nodeA = numa_id_index_table[i];
+				int nodeB = numa_id_index_table[j];
+
+				if (nodeA == nodeB)
+					numa_distance_table[nodeA][nodeB] = LOCAL_DISTANCE;
+				else
+					numa_distance_table[nodeA][nodeB] = REMOTE_DISTANCE;
+			}
+		}
+	}
+
+	distance_index = 0;
+	for (i = 0;  i < max_numa_index; i++) {
+		for (j = 0; j < max_numa_index; j++) {
+			int nodeA = numa_id_index_table[i];
+			int nodeB = numa_id_index_table[j];
+
+			numa_distance_table[nodeA][nodeB] = numa_dist_table[distance_index++];
+			pr_debug("dist[%d][%d]=%d ", nodeA, nodeB, numa_distance_table[nodeA][nodeB]);
+		}
+	}
+	of_node_put(root);
+}
+
+static int __init find_primary_domain_index(void)
+{
+	int index;
 	struct device_node *root;
 
+	/*
+	 * Check for which form of affinity.
+	 */
+	if (firmware_has_feature(FW_FEATURE_OPAL)) {
+		affinity_form = FORM1_AFFINITY;
+	} else if (firmware_has_feature(FW_FEATURE_FORM2_AFFINITY)) {
+		pr_debug("Using form 2 affinity\n");
+		affinity_form = FORM2_AFFINITY;
+	} else if (firmware_has_feature(FW_FEATURE_FORM1_AFFINITY)) {
+		pr_debug("Using form 1 affinity\n");
+		affinity_form = FORM1_AFFINITY;
+	} else
+		affinity_form = FORM0_AFFINITY;
+
 	if (firmware_has_feature(FW_FEATURE_OPAL))
 		root = of_find_node_by_path("/ibm,opal");
 	else
@@ -313,42 +477,37 @@ static int __init find_min_common_depth(void)
 					&distance_ref_points_depth);
 
 	if (!distance_ref_points) {
-		dbg("NUMA: ibm,associativity-reference-points not found.\n");
+		pr_debug("ibm,associativity-reference-points not found.\n");
 		goto err;
 	}
 
 	distance_ref_points_depth /= sizeof(int);
-
-	if (firmware_has_feature(FW_FEATURE_OPAL) ||
-	    firmware_has_feature(FW_FEATURE_TYPE1_AFFINITY)) {
-		dbg("Using form 1 affinity\n");
-		form1_affinity = 1;
-	}
-
-	if (form1_affinity) {
-		depth = of_read_number(distance_ref_points, 1);
-	} else {
+	if (affinity_form == FORM0_AFFINITY) {
 		if (distance_ref_points_depth < 2) {
-			printk(KERN_WARNING "NUMA: "
-				"short ibm,associativity-reference-points\n");
+			pr_warn("short ibm,associativity-reference-points\n");
 			goto err;
 		}
 
-		depth = of_read_number(&distance_ref_points[1], 1);
+		index = of_read_number(&distance_ref_points[1], 1);
+	} else {
+		/*
+		 * Both FORM1 and FORM2 affinity find the primary domain details
+		 * at the same offset.
+		 */
+		index = of_read_number(distance_ref_points, 1);
 	}
-
 	/*
 	 * Warn and cap if the hardware supports more than
 	 * MAX_DISTANCE_REF_POINTS domains.
 	 */
 	if (distance_ref_points_depth > MAX_DISTANCE_REF_POINTS) {
-		printk(KERN_WARNING "NUMA: distance array capped at "
-			"%d entries\n", MAX_DISTANCE_REF_POINTS);
+		pr_warn("distance array capped at %d entries\n",
+			MAX_DISTANCE_REF_POINTS);
 		distance_ref_points_depth = MAX_DISTANCE_REF_POINTS;
 	}
 
 	of_node_put(root);
-	return depth;
+	return index;
 
 err:
 	of_node_put(root);
@@ -426,6 +585,38 @@ static int of_get_assoc_arrays(struct assoc_arrays *aa)
 	return 0;
 }
 
+static int get_nid_and_numa_distance(struct drmem_lmb *lmb)
+{
+	struct assoc_arrays aa = { .arrays = NULL };
+	int default_nid = NUMA_NO_NODE;
+	int nid = default_nid;
+	int rc, index;
+
+	if ((primary_domain_index < 0) || !numa_enabled)
+		return default_nid;
+
+	rc = of_get_assoc_arrays(&aa);
+	if (rc)
+		return default_nid;
+
+	if (primary_domain_index <= aa.array_sz &&
+	    !(lmb->flags & DRCONF_MEM_AI_INVALID) && lmb->aa_index < aa.n_arrays) {
+		const __be32 *associativity;
+
+		index = lmb->aa_index * aa.array_sz;
+		associativity = &aa.arrays[index];
+		nid = __associativity_to_nid(associativity, aa.array_sz);
+		if (nid > 0 && affinity_form == FORM1_AFFINITY) {
+			/*
+			 * lookup array associativity entries have
+			 * no length of the array as the first element.
+			 */
+			__initialize_form1_numa_distance(associativity, aa.array_sz);
+		}
+	}
+	return nid;
+}
+
 /*
  * This is like of_node_to_nid_single() for memory represented in the
  * ibm,dynamic-reconfiguration-memory node.
@@ -437,35 +628,28 @@ int of_drconf_to_nid_single(struct drmem_lmb *lmb)
 	int nid = default_nid;
 	int rc, index;
 
-	if ((min_common_depth < 0) || !numa_enabled)
+	if ((primary_domain_index < 0) || !numa_enabled)
 		return default_nid;
 
 	rc = of_get_assoc_arrays(&aa);
 	if (rc)
 		return default_nid;
 
-	if (min_common_depth <= aa.array_sz &&
+	if (primary_domain_index <= aa.array_sz &&
 	    !(lmb->flags & DRCONF_MEM_AI_INVALID) && lmb->aa_index < aa.n_arrays) {
-		index = lmb->aa_index * aa.array_sz + min_common_depth - 1;
-		nid = of_read_number(&aa.arrays[index], 1);
-
-		if (nid == 0xffff || nid >= nr_node_ids)
-			nid = default_nid;
+		const __be32 *associativity;
 
-		if (nid > 0) {
-			index = lmb->aa_index * aa.array_sz;
-			initialize_distance_lookup_table(nid,
-							&aa.arrays[index]);
-		}
+		index = lmb->aa_index * aa.array_sz;
+		associativity = &aa.arrays[index];
+		nid = __associativity_to_nid(associativity, aa.array_sz);
 	}
-
 	return nid;
 }
 
 #ifdef CONFIG_PPC_SPLPAR
-static int vphn_get_nid(long lcpu)
+
+static int __vphn_get_associativity(long lcpu, __be32 *associativity)
 {
-	__be32 associativity[VPHN_ASSOC_BUFSIZE] = {0};
 	long rc, hwid;
 
 	/*
@@ -485,12 +669,30 @@ static int vphn_get_nid(long lcpu)
 
 		rc = hcall_vphn(hwid, VPHN_FLAG_VCPU, associativity);
 		if (rc == H_SUCCESS)
-			return associativity_to_nid(associativity);
+			return 0;
 	}
 
+	return -1;
+}
+
+static int vphn_get_nid(long lcpu)
+{
+	__be32 associativity[VPHN_ASSOC_BUFSIZE] = {0};
+
+
+	if (!__vphn_get_associativity(lcpu, associativity))
+		return associativity_to_nid(associativity);
+
 	return NUMA_NO_NODE;
+
 }
 #else
+
+static int __vphn_get_associativity(long lcpu, __be32 *associativity)
+{
+	return -1;
+}
+
 static int vphn_get_nid(long unused)
 {
 	return NUMA_NO_NODE;
@@ -598,9 +800,6 @@ static int ppc_numa_cpu_prepare(unsigned int cpu)
 
 static int ppc_numa_cpu_dead(unsigned int cpu)
 {
-#ifdef CONFIG_HOTPLUG_CPU
-	unmap_cpu_from_node(cpu);
-#endif
 	return 0;
 }
 
@@ -685,7 +884,7 @@ static int __init numa_setup_drmem_lmb(struct drmem_lmb *lmb,
 			size = read_n_cells(n_mem_size_cells, usm);
 		}
 
-		nid = of_drconf_to_nid_single(lmb);
+		nid = get_nid_and_numa_distance(lmb);
 		fake_numa_create_new_node(((base + size) >> PAGE_SHIFT),
 					  &nid);
 		node_set_online(nid);
@@ -702,24 +901,31 @@ static int __init parse_numa_properties(void)
 	struct device_node *memory;
 	int default_nid = 0;
 	unsigned long i;
+	const __be32 *associativity;
 
 	if (numa_enabled == 0) {
-		printk(KERN_WARNING "NUMA disabled by user\n");
+		pr_warn("disabled by user\n");
 		return -1;
 	}
 
-	min_common_depth = find_min_common_depth();
+	primary_domain_index = find_primary_domain_index();
 
-	if (min_common_depth < 0) {
+	if (primary_domain_index < 0) {
 		/*
-		 * if we fail to parse min_common_depth from device tree
+		 * if we fail to parse primary_domain_index from device tree
 		 * mark the numa disabled, boot with numa disabled.
 		 */
 		numa_enabled = false;
-		return min_common_depth;
+		return primary_domain_index;
 	}
 
-	dbg("NUMA associativity depth for CPU/Memory: %d\n", min_common_depth);
+	pr_debug("associativity depth for CPU/Memory: %d\n", primary_domain_index);
+
+	/*
+	 * If it is FORM2 initialize the distance table here.
+	 */
+	if (affinity_form == FORM2_AFFINITY)
+		initialize_form2_numa_distance_lookup_table();
 
 	/*
 	 * Even though we connect cpus to numa domains later in SMP
@@ -727,18 +933,30 @@ static int __init parse_numa_properties(void)
 	 * each node to be onlined must have NODE_DATA etc backing it.
 	 */
 	for_each_present_cpu(i) {
+		__be32 vphn_assoc[VPHN_ASSOC_BUFSIZE];
 		struct device_node *cpu;
-		int nid = vphn_get_nid(i);
+		int nid = NUMA_NO_NODE;
 
-		/*
-		 * Don't fall back to default_nid yet -- we will plug
-		 * cpus into nodes once the memory scan has discovered
-		 * the topology.
-		 */
-		if (nid == NUMA_NO_NODE) {
+		memset(vphn_assoc, 0, VPHN_ASSOC_BUFSIZE * sizeof(__be32));
+
+		if (__vphn_get_associativity(i, vphn_assoc) == 0) {
+			nid = associativity_to_nid(vphn_assoc);
+			initialize_form1_numa_distance(vphn_assoc);
+		} else {
+
+			/*
+			 * Don't fall back to default_nid yet -- we will plug
+			 * cpus into nodes once the memory scan has discovered
+			 * the topology.
+			 */
 			cpu = of_get_cpu_node(i, NULL);
 			BUG_ON(!cpu);
-			nid = of_node_to_nid_single(cpu);
+
+			associativity = of_get_associativity(cpu);
+			if (associativity) {
+				nid = associativity_to_nid(associativity);
+				initialize_form1_numa_distance(associativity);
+			}
 			of_node_put(cpu);
 		}
 
@@ -774,8 +992,11 @@ new_range:
 		 * have associativity properties.  If none, then
 		 * everything goes to default_nid.
 		 */
-		nid = of_node_to_nid_single(memory);
-		if (nid < 0)
+		associativity = of_get_associativity(memory);
+		if (associativity) {
+			nid = associativity_to_nid(associativity);
+			initialize_form1_numa_distance(associativity);
+		} else
 			nid = default_nid;
 
 		fake_numa_create_new_node(((start + size) >> PAGE_SHIFT), &nid);
@@ -811,10 +1032,8 @@ static void __init setup_nonnuma(void)
 	unsigned int nid = 0;
 	int i;
 
-	printk(KERN_DEBUG "Top of RAM: 0x%lx, Total RAM: 0x%lx\n",
-	       top_of_ram, total_ram);
-	printk(KERN_DEBUG "Memory hole size: %ldMB\n",
-	       (top_of_ram - total_ram) >> 20);
+	pr_debug("Top of RAM: 0x%lx, Total RAM: 0x%lx\n", top_of_ram, total_ram);
+	pr_debug("Memory hole size: %ldMB\n", (top_of_ram - total_ram) >> 20);
 
 	for_each_mem_pfn_range(i, MAX_NUMNODES, &start_pfn, &end_pfn, NULL) {
 		fake_numa_create_new_node(end_pfn, &nid);
@@ -893,7 +1112,7 @@ static void __init setup_node_data(int nid, u64 start_pfn, u64 end_pfn)
 static void __init find_possible_nodes(void)
 {
 	struct device_node *rtas;
-	const __be32 *domains;
+	const __be32 *domains = NULL;
 	int prop_length, max_nodes;
 	u32 i;
 
@@ -909,9 +1128,14 @@ static void __init find_possible_nodes(void)
 	 * it doesn't exist, then fallback on ibm,max-associativity-domains.
 	 * Current denotes what the platform can support compared to max
 	 * which denotes what the Hypervisor can support.
+	 *
+	 * If the LPAR is migratable, new nodes might be activated after a LPM,
+	 * so we should consider the max number in that case.
 	 */
-	domains = of_get_property(rtas, "ibm,current-associativity-domains",
-					&prop_length);
+	if (!of_get_property(of_root, "ibm,migratable-partition", NULL))
+		domains = of_get_property(rtas,
+					  "ibm,current-associativity-domains",
+					  &prop_length);
 	if (!domains) {
 		domains = of_get_property(rtas, "ibm,max-associativity-domains",
 					&prop_length);
@@ -919,14 +1143,16 @@ static void __init find_possible_nodes(void)
 			goto out;
 	}
 
-	max_nodes = of_read_number(&domains[min_common_depth], 1);
+	max_nodes = of_read_number(&domains[primary_domain_index], 1);
+	pr_info("Partition configured for %d NUMA nodes.\n", max_nodes);
+
 	for (i = 0; i < max_nodes; i++) {
 		if (!node_possible(i))
 			node_set(i, node_possible_map);
 	}
 
 	prop_length /= sizeof(int);
-	if (prop_length > min_common_depth + 2)
+	if (prop_length > primary_domain_index + 2)
 		coregroup_enabled = 1;
 
 out:
@@ -1014,9 +1240,6 @@ static int __init early_numa(char *p)
 	if (strstr(p, "off"))
 		numa_enabled = 0;
 
-	if (strstr(p, "debug"))
-		numa_debug = 1;
-
 	p = strstr(p, "fake=");
 	if (p)
 		cmdline = p + strlen("fake=");
@@ -1179,7 +1402,7 @@ static long vphn_get_associativity(unsigned long cpu,
 
 	switch (rc) {
 	case H_SUCCESS:
-		dbg("VPHN hcall succeeded. Reset polling...\n");
+		pr_debug("VPHN hcall succeeded. Reset polling...\n");
 		goto out;
 
 	case H_FUNCTION:
@@ -1259,7 +1482,7 @@ int cpu_to_coregroup_id(int cpu)
 		goto out;
 
 	index = of_read_number(associativity, 1);
-	if (index > min_common_depth + 1)
+	if (index > primary_domain_index + 1)
 		return of_read_number(&associativity[index - 1], 1);
 
 out:
diff --git a/arch/powerpc/mm/ptdump/8xx.c b/arch/powerpc/mm/ptdump/8xx.c
index 86da2a669680..fac932eb8f9a 100644
--- a/arch/powerpc/mm/ptdump/8xx.c
+++ b/arch/powerpc/mm/ptdump/8xx.c
@@ -75,8 +75,10 @@ static const struct flag_info flag_array[] = {
 };
 
 struct pgtable_level pg_level[5] = {
-	{
-	}, { /* pgd */
+	{ /* pgd */
+		.flag	= flag_array,
+		.num	= ARRAY_SIZE(flag_array),
+	}, { /* p4d */
 		.flag	= flag_array,
 		.num	= ARRAY_SIZE(flag_array),
 	}, { /* pud */
diff --git a/arch/powerpc/mm/ptdump/Makefile b/arch/powerpc/mm/ptdump/Makefile
index 712762be3cb1..4050cbb55acf 100644
--- a/arch/powerpc/mm/ptdump/Makefile
+++ b/arch/powerpc/mm/ptdump/Makefile
@@ -5,5 +5,10 @@ obj-y	+= ptdump.o
 obj-$(CONFIG_4xx)		+= shared.o
 obj-$(CONFIG_PPC_8xx)		+= 8xx.o
 obj-$(CONFIG_PPC_BOOK3E_MMU)	+= shared.o
-obj-$(CONFIG_PPC_BOOK3S_32)	+= shared.o bats.o segment_regs.o
-obj-$(CONFIG_PPC_BOOK3S_64)	+= book3s64.o hashpagetable.o
+obj-$(CONFIG_PPC_BOOK3S_32)	+= shared.o
+obj-$(CONFIG_PPC_BOOK3S_64)	+= book3s64.o
+
+ifdef CONFIG_PTDUMP_DEBUGFS
+obj-$(CONFIG_PPC_BOOK3S_32)	+= bats.o segment_regs.o
+obj-$(CONFIG_PPC_BOOK3S_64)	+= hashpagetable.o
+endif
diff --git a/arch/powerpc/mm/ptdump/bats.c b/arch/powerpc/mm/ptdump/bats.c
index c4c628b03cf8..820c119013e4 100644
--- a/arch/powerpc/mm/ptdump/bats.c
+++ b/arch/powerpc/mm/ptdump/bats.c
@@ -7,7 +7,7 @@
  */
 
 #include <linux/pgtable.h>
-#include <asm/debugfs.h>
+#include <linux/debugfs.h>
 #include <asm/cpu_has_feature.h>
 
 #include "ptdump.h"
@@ -57,7 +57,7 @@ static void bat_show_603(struct seq_file *m, int idx, u32 lower, u32 upper, bool
 
 #define BAT_SHOW_603(_m, _n, _l, _u, _d) bat_show_603(_m, _n, mfspr(_l), mfspr(_u), _d)
 
-static int bats_show_603(struct seq_file *m, void *v)
+static int bats_show(struct seq_file *m, void *v)
 {
 	seq_puts(m, "---[ Instruction Block Address Translation ]---\n");
 
@@ -88,22 +88,12 @@ static int bats_show_603(struct seq_file *m, void *v)
 	return 0;
 }
 
-static int bats_open(struct inode *inode, struct file *file)
-{
-	return single_open(file, bats_show_603, NULL);
-}
-
-static const struct file_operations bats_fops = {
-	.open		= bats_open,
-	.read		= seq_read,
-	.llseek		= seq_lseek,
-	.release	= single_release,
-};
+DEFINE_SHOW_ATTRIBUTE(bats);
 
 static int __init bats_init(void)
 {
 	debugfs_create_file("block_address_translation", 0400,
-			    powerpc_debugfs_root, NULL, &bats_fops);
+			    arch_debugfs_dir, NULL, &bats_fops);
 	return 0;
 }
 device_initcall(bats_init);
diff --git a/arch/powerpc/mm/ptdump/book3s64.c b/arch/powerpc/mm/ptdump/book3s64.c
index 14f73868db66..5ad92d9dc5d1 100644
--- a/arch/powerpc/mm/ptdump/book3s64.c
+++ b/arch/powerpc/mm/ptdump/book3s64.c
@@ -103,8 +103,10 @@ static const struct flag_info flag_array[] = {
 };
 
 struct pgtable_level pg_level[5] = {
-	{
-	}, { /* pgd */
+	{ /* pgd */
+		.flag	= flag_array,
+		.num	= ARRAY_SIZE(flag_array),
+	}, { /* p4d */
 		.flag	= flag_array,
 		.num	= ARRAY_SIZE(flag_array),
 	}, { /* pud */
diff --git a/arch/powerpc/mm/ptdump/hashpagetable.c b/arch/powerpc/mm/ptdump/hashpagetable.c
index ad6df9a2e7c8..c7f824d294b2 100644
--- a/arch/powerpc/mm/ptdump/hashpagetable.c
+++ b/arch/powerpc/mm/ptdump/hashpagetable.c
@@ -526,17 +526,7 @@ static int ptdump_show(struct seq_file *m, void *v)
 	return 0;
 }
 
-static int ptdump_open(struct inode *inode, struct file *file)
-{
-	return single_open(file, ptdump_show, NULL);
-}
-
-static const struct file_operations ptdump_fops = {
-	.open		= ptdump_open,
-	.read		= seq_read,
-	.llseek		= seq_lseek,
-	.release	= single_release,
-};
+DEFINE_SHOW_ATTRIBUTE(ptdump);
 
 static int ptdump_init(void)
 {
diff --git a/arch/powerpc/mm/ptdump/ptdump.c b/arch/powerpc/mm/ptdump/ptdump.c
index 5062c58b1e5b..bf251191e78d 100644
--- a/arch/powerpc/mm/ptdump/ptdump.c
+++ b/arch/powerpc/mm/ptdump/ptdump.c
@@ -16,6 +16,7 @@
 #include <linux/io.h>
 #include <linux/mm.h>
 #include <linux/highmem.h>
+#include <linux/ptdump.h>
 #include <linux/sched.h>
 #include <linux/seq_file.h>
 #include <asm/fixmap.h>
@@ -54,11 +55,12 @@
  *
  */
 struct pg_state {
+	struct ptdump_state ptdump;
 	struct seq_file *seq;
 	const struct addr_marker *marker;
 	unsigned long start_address;
 	unsigned long start_pa;
-	unsigned int level;
+	int level;
 	u64 current_flags;
 	bool check_wx;
 	unsigned long wx_pages;
@@ -102,6 +104,11 @@ static struct addr_marker address_markers[] = {
 	{ -1,	NULL },
 };
 
+static struct ptdump_range ptdump_range[] __ro_after_init = {
+	{TASK_SIZE_MAX, ~0UL},
+	{0, 0}
+};
+
 #define pt_dump_seq_printf(m, fmt, args...)	\
 ({						\
 	if (m)					\
@@ -188,10 +195,9 @@ static void note_prot_wx(struct pg_state *st, unsigned long addr)
 	st->wx_pages += (addr - st->start_address) / PAGE_SIZE;
 }
 
-static void note_page_update_state(struct pg_state *st, unsigned long addr,
-				   unsigned int level, u64 val, unsigned long page_size)
+static void note_page_update_state(struct pg_state *st, unsigned long addr, int level, u64 val)
 {
-	u64 flag = val & pg_level[level].mask;
+	u64 flag = level >= 0 ? val & pg_level[level].mask : 0;
 	u64 pa = val & PTE_RPN_MASK;
 
 	st->level = level;
@@ -205,15 +211,15 @@ static void note_page_update_state(struct pg_state *st, unsigned long addr,
 	}
 }
 
-static void note_page(struct pg_state *st, unsigned long addr,
-	       unsigned int level, u64 val, unsigned long page_size)
+static void note_page(struct ptdump_state *pt_st, unsigned long addr, int level, u64 val)
 {
-	u64 flag = val & pg_level[level].mask;
+	u64 flag = level >= 0 ? val & pg_level[level].mask : 0;
+	struct pg_state *st = container_of(pt_st, struct pg_state, ptdump);
 
 	/* At first no level is set */
-	if (!st->level) {
+	if (st->level == -1) {
 		pt_dump_seq_printf(st->seq, "---[ %s ]---\n", st->marker->name);
-		note_page_update_state(st, addr, level, val, page_size);
+		note_page_update_state(st, addr, level, val);
 	/*
 	 * Dump the section of virtual memory when:
 	 *   - the PTE flags from one entry to the next differs.
@@ -242,95 +248,7 @@ static void note_page(struct pg_state *st, unsigned long addr,
 		 * Address indicates we have passed the end of the
 		 * current section of virtual memory
 		 */
-		note_page_update_state(st, addr, level, val, page_size);
-	}
-}
-
-static void walk_pte(struct pg_state *st, pmd_t *pmd, unsigned long start)
-{
-	pte_t *pte = pte_offset_kernel(pmd, 0);
-	unsigned long addr;
-	unsigned int i;
-
-	for (i = 0; i < PTRS_PER_PTE; i++, pte++) {
-		addr = start + i * PAGE_SIZE;
-		note_page(st, addr, 4, pte_val(*pte), PAGE_SIZE);
-
-	}
-}
-
-static void walk_hugepd(struct pg_state *st, hugepd_t *phpd, unsigned long start,
-			int pdshift, int level)
-{
-#ifdef CONFIG_ARCH_HAS_HUGEPD
-	unsigned int i;
-	int shift = hugepd_shift(*phpd);
-	int ptrs_per_hpd = pdshift - shift > 0 ? 1 << (pdshift - shift) : 1;
-
-	if (start & ((1 << shift) - 1))
-		return;
-
-	for (i = 0; i < ptrs_per_hpd; i++) {
-		unsigned long addr = start + (i << shift);
-		pte_t *pte = hugepte_offset(*phpd, addr, pdshift);
-
-		note_page(st, addr, level + 1, pte_val(*pte), 1 << shift);
-	}
-#endif
-}
-
-static void walk_pmd(struct pg_state *st, pud_t *pud, unsigned long start)
-{
-	pmd_t *pmd = pmd_offset(pud, 0);
-	unsigned long addr;
-	unsigned int i;
-
-	for (i = 0; i < PTRS_PER_PMD; i++, pmd++) {
-		addr = start + i * PMD_SIZE;
-		if (!pmd_none(*pmd) && !pmd_is_leaf(*pmd))
-			/* pmd exists */
-			walk_pte(st, pmd, addr);
-		else
-			note_page(st, addr, 3, pmd_val(*pmd), PMD_SIZE);
-	}
-}
-
-static void walk_pud(struct pg_state *st, p4d_t *p4d, unsigned long start)
-{
-	pud_t *pud = pud_offset(p4d, 0);
-	unsigned long addr;
-	unsigned int i;
-
-	for (i = 0; i < PTRS_PER_PUD; i++, pud++) {
-		addr = start + i * PUD_SIZE;
-		if (!pud_none(*pud) && !pud_is_leaf(*pud))
-			/* pud exists */
-			walk_pmd(st, pud, addr);
-		else
-			note_page(st, addr, 2, pud_val(*pud), PUD_SIZE);
-	}
-}
-
-static void walk_pagetables(struct pg_state *st)
-{
-	unsigned int i;
-	unsigned long addr = st->start_address & PGDIR_MASK;
-	pgd_t *pgd = pgd_offset_k(addr);
-
-	/*
-	 * Traverse the linux pagetable structure and dump pages that are in
-	 * the hash pagetable.
-	 */
-	for (i = pgd_index(addr); i < PTRS_PER_PGD; i++, pgd++, addr += PGDIR_SIZE) {
-		p4d_t *p4d = p4d_offset(pgd, 0);
-
-		if (p4d_none(*p4d) || p4d_is_leaf(*p4d))
-			note_page(st, addr, 1, p4d_val(*p4d), PGDIR_SIZE);
-		else if (is_hugepd(__hugepd(p4d_val(*p4d))))
-			walk_hugepd(st, (hugepd_t *)p4d, addr, PGDIR_SHIFT, 1);
-		else
-			/* p4d exists */
-			walk_pud(st, p4d, addr);
+		note_page_update_state(st, addr, level, val);
 	}
 }
 
@@ -383,32 +301,19 @@ static int ptdump_show(struct seq_file *m, void *v)
 	struct pg_state st = {
 		.seq = m,
 		.marker = address_markers,
-		.start_address = IS_ENABLED(CONFIG_PPC64) ? PAGE_OFFSET : TASK_SIZE,
+		.level = -1,
+		.ptdump = {
+			.note_page = note_page,
+			.range = ptdump_range,
+		}
 	};
 
-#ifdef CONFIG_PPC64
-	if (!radix_enabled())
-		st.start_address = KERN_VIRT_START;
-#endif
-
 	/* Traverse kernel page tables */
-	walk_pagetables(&st);
-	note_page(&st, 0, 0, 0, 0);
+	ptdump_walk_pgd(&st.ptdump, &init_mm, NULL);
 	return 0;
 }
 
-
-static int ptdump_open(struct inode *inode, struct file *file)
-{
-	return single_open(file, ptdump_show, NULL);
-}
-
-static const struct file_operations ptdump_fops = {
-	.open		= ptdump_open,
-	.read		= seq_read,
-	.llseek		= seq_lseek,
-	.release	= single_release,
-};
+DEFINE_SHOW_ATTRIBUTE(ptdump);
 
 static void build_pgtable_complete_mask(void)
 {
@@ -420,22 +325,24 @@ static void build_pgtable_complete_mask(void)
 				pg_level[i].mask |= pg_level[i].flag[j].mask;
 }
 
-#ifdef CONFIG_PPC_DEBUG_WX
+#ifdef CONFIG_DEBUG_WX
 void ptdump_check_wx(void)
 {
 	struct pg_state st = {
 		.seq = NULL,
-		.marker = address_markers,
+		.marker = (struct addr_marker[]) {
+			{ 0, NULL},
+			{ -1, NULL},
+		},
+		.level = -1,
 		.check_wx = true,
-		.start_address = IS_ENABLED(CONFIG_PPC64) ? PAGE_OFFSET : TASK_SIZE,
+		.ptdump = {
+			.note_page = note_page,
+			.range = ptdump_range,
+		}
 	};
 
-#ifdef CONFIG_PPC64
-	if (!radix_enabled())
-		st.start_address = KERN_VIRT_START;
-#endif
-
-	walk_pagetables(&st);
+	ptdump_walk_pgd(&st.ptdump, &init_mm, NULL);
 
 	if (st.wx_pages)
 		pr_warn("Checked W+X mappings: FAILED, %lu W+X pages found\n",
@@ -445,12 +352,23 @@ void ptdump_check_wx(void)
 }
 #endif
 
-static int ptdump_init(void)
+static int __init ptdump_init(void)
 {
+#ifdef CONFIG_PPC64
+	if (!radix_enabled())
+		ptdump_range[0].start = KERN_VIRT_START;
+	else
+		ptdump_range[0].start = PAGE_OFFSET;
+
+	ptdump_range[0].end = PAGE_OFFSET + (PGDIR_SIZE * PTRS_PER_PGD);
+#endif
+
 	populate_markers();
 	build_pgtable_complete_mask();
-	debugfs_create_file("kernel_page_tables", 0400, NULL, NULL,
-			    &ptdump_fops);
+
+	if (IS_ENABLED(CONFIG_PTDUMP_DEBUGFS))
+		debugfs_create_file("kernel_page_tables", 0400, NULL, NULL, &ptdump_fops);
+
 	return 0;
 }
 device_initcall(ptdump_init);
diff --git a/arch/powerpc/mm/ptdump/segment_regs.c b/arch/powerpc/mm/ptdump/segment_regs.c
index 565048a0c9be..9df3af8d481f 100644
--- a/arch/powerpc/mm/ptdump/segment_regs.c
+++ b/arch/powerpc/mm/ptdump/segment_regs.c
@@ -6,7 +6,7 @@
  * This dumps the content of Segment Registers
  */
 
-#include <asm/debugfs.h>
+#include <linux/debugfs.h>
 
 static void seg_show(struct seq_file *m, int i)
 {
@@ -41,21 +41,11 @@ static int sr_show(struct seq_file *m, void *v)
 	return 0;
 }
 
-static int sr_open(struct inode *inode, struct file *file)
-{
-	return single_open(file, sr_show, NULL);
-}
-
-static const struct file_operations sr_fops = {
-	.open		= sr_open,
-	.read		= seq_read,
-	.llseek		= seq_lseek,
-	.release	= single_release,
-};
+DEFINE_SHOW_ATTRIBUTE(sr);
 
 static int __init sr_init(void)
 {
-	debugfs_create_file("segment_registers", 0400, powerpc_debugfs_root,
+	debugfs_create_file("segment_registers", 0400, arch_debugfs_dir,
 			    NULL, &sr_fops);
 	return 0;
 }
diff --git a/arch/powerpc/mm/ptdump/shared.c b/arch/powerpc/mm/ptdump/shared.c
index c005fe041c18..03607ab90c66 100644
--- a/arch/powerpc/mm/ptdump/shared.c
+++ b/arch/powerpc/mm/ptdump/shared.c
@@ -68,8 +68,10 @@ static const struct flag_info flag_array[] = {
 };
 
 struct pgtable_level pg_level[5] = {
-	{
-	}, { /* pgd */
+	{ /* pgd */
+		.flag	= flag_array,
+		.num	= ARRAY_SIZE(flag_array),
+	}, { /* p4d */
 		.flag	= flag_array,
 		.num	= ARRAY_SIZE(flag_array),
 	}, { /* pud */
diff --git a/arch/powerpc/perf/core-book3s.c b/arch/powerpc/perf/core-book3s.c
index bb0ee716de91..73e62e9b179b 100644
--- a/arch/powerpc/perf/core-book3s.c
+++ b/arch/powerpc/perf/core-book3s.c
@@ -340,6 +340,13 @@ static inline void perf_read_regs(struct pt_regs *regs)
 	 * If the PMU doesn't update the SIAR for non marked events use
 	 * pt_regs.
 	 *
+	 * If regs is a kernel interrupt, always use SIAR. Some PMUs have an
+	 * issue with regs_sipr not being in synch with SIAR in interrupt entry
+	 * and return sequences, which can result in regs_sipr being true for
+	 * kernel interrupts and SIAR, which has the effect of causing samples
+	 * to pile up at mtmsrd MSR[EE] 0->1 or pending irq replay around
+	 * interrupt entry/exit.
+	 *
 	 * If the PMU has HV/PR flags then check to see if they
 	 * place the exception in userspace. If so, use pt_regs. In
 	 * continuous sampling mode the SIAR and the PMU exception are
@@ -356,6 +363,8 @@ static inline void perf_read_regs(struct pt_regs *regs)
 		use_siar = 1;
 	else if ((ppmu->flags & PPMU_NO_CONT_SAMPLING))
 		use_siar = 0;
+	else if (!user_mode(regs))
+		use_siar = 1;
 	else if (!(ppmu->flags & PPMU_NO_SIPR) && regs_sipr(regs))
 		use_siar = 0;
 	else
@@ -2251,18 +2260,10 @@ unsigned long perf_misc_flags(struct pt_regs *regs)
  */
 unsigned long perf_instruction_pointer(struct pt_regs *regs)
 {
-	bool use_siar = regs_use_siar(regs);
 	unsigned long siar = mfspr(SPRN_SIAR);
 
-	if (ppmu && (ppmu->flags & PPMU_P10_DD1)) {
-		if (siar)
-			return siar;
-		else
-			return regs->nip;
-	} else if (use_siar && siar_valid(regs))
-		return mfspr(SPRN_SIAR) + perf_ip_adjust(regs);
-	else if (use_siar)
-		return 0;		// no valid instruction pointer
+	if (regs_use_siar(regs) && siar_valid(regs) && siar)
+		return siar + perf_ip_adjust(regs);
 	else
 		return regs->nip;
 }
diff --git a/arch/powerpc/perf/hv-gpci.c b/arch/powerpc/perf/hv-gpci.c
index d48413e28c39..c756228a081f 100644
--- a/arch/powerpc/perf/hv-gpci.c
+++ b/arch/powerpc/perf/hv-gpci.c
@@ -175,7 +175,7 @@ static unsigned long single_gpci_request(u32 req, u32 starting_index,
 	 */
 	count = 0;
 	for (i = offset; i < offset + length; i++)
-		count |= arg->bytes[i] << (i - offset);
+		count |= (u64)(arg->bytes[i]) << ((length - 1 - (i - offset)) * 8);
 
 	*value = count;
 out:
diff --git a/arch/powerpc/platforms/44x/machine_check.c b/arch/powerpc/platforms/44x/machine_check.c
index a5c898bb9bab..5d19daacd78a 100644
--- a/arch/powerpc/platforms/44x/machine_check.c
+++ b/arch/powerpc/platforms/44x/machine_check.c
@@ -11,7 +11,7 @@
 
 int machine_check_440A(struct pt_regs *regs)
 {
-	unsigned long reason = regs->dsisr;
+	unsigned long reason = regs->esr;
 
 	printk("Machine check in kernel mode.\n");
 	if (reason & ESR_IMCP){
@@ -48,7 +48,7 @@ int machine_check_440A(struct pt_regs *regs)
 #ifdef CONFIG_PPC_47x
 int machine_check_47x(struct pt_regs *regs)
 {
-	unsigned long reason = regs->dsisr;
+	unsigned long reason = regs->esr;
 	u32 mcsr;
 
 	printk(KERN_ERR "Machine check in kernel mode.\n");
diff --git a/arch/powerpc/platforms/4xx/machine_check.c b/arch/powerpc/platforms/4xx/machine_check.c
index a71c29892a91..a905da1d6f41 100644
--- a/arch/powerpc/platforms/4xx/machine_check.c
+++ b/arch/powerpc/platforms/4xx/machine_check.c
@@ -10,7 +10,7 @@
 
 int machine_check_4xx(struct pt_regs *regs)
 {
-	unsigned long reason = regs->dsisr;
+	unsigned long reason = regs->esr;
 
 	if (reason & ESR_IMCP) {
 		printk("Instruction");
diff --git a/arch/powerpc/platforms/85xx/Kconfig b/arch/powerpc/platforms/85xx/Kconfig
index b77cbb0a50e1..4142ebf01382 100644
--- a/arch/powerpc/platforms/85xx/Kconfig
+++ b/arch/powerpc/platforms/85xx/Kconfig
@@ -208,12 +208,6 @@ config TQM8560
 	select TQM85xx
 	select CPM2
 
-config SBC8548
-	bool "Wind River SBC8548"
-	select DEFAULT_UIMAGE
-	help
-	  This option enables support for the Wind River SBC8548 board
-
 config PPA8548
 	bool "Prodrive PPA8548"
 	help
diff --git a/arch/powerpc/platforms/85xx/Makefile b/arch/powerpc/platforms/85xx/Makefile
index d1dd0dca5ebf..60e4e97a929d 100644
--- a/arch/powerpc/platforms/85xx/Makefile
+++ b/arch/powerpc/platforms/85xx/Makefile
@@ -26,7 +26,6 @@ obj-$(CONFIG_CORENET_GENERIC)   += corenet_generic.o
 obj-$(CONFIG_FB_FSL_DIU)	+= t1042rdb_diu.o
 obj-$(CONFIG_STX_GP3)	  += stx_gp3.o
 obj-$(CONFIG_TQM85xx)	  += tqm85xx.o
-obj-$(CONFIG_SBC8548)     += sbc8548.o
 obj-$(CONFIG_PPA8548)     += ppa8548.o
 obj-$(CONFIG_SOCRATES)    += socrates.o socrates_fpga_pic.o
 obj-$(CONFIG_KSI8560)	  += ksi8560.o
diff --git a/arch/powerpc/platforms/85xx/sbc8548.c b/arch/powerpc/platforms/85xx/sbc8548.c
deleted file mode 100644
index e4acf5ce6b07..000000000000
--- a/arch/powerpc/platforms/85xx/sbc8548.c
+++ /dev/null
@@ -1,134 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0-or-later
-/*
- * Wind River SBC8548 setup and early boot code.
- *
- * Copyright 2007 Wind River Systems Inc.
- *
- * By Paul Gortmaker (see MAINTAINERS for contact information)
- *
- * Based largely on the MPC8548CDS support - Copyright 2005 Freescale Inc.
- */
-
-#include <linux/stddef.h>
-#include <linux/kernel.h>
-#include <linux/init.h>
-#include <linux/errno.h>
-#include <linux/reboot.h>
-#include <linux/pci.h>
-#include <linux/kdev_t.h>
-#include <linux/major.h>
-#include <linux/console.h>
-#include <linux/delay.h>
-#include <linux/seq_file.h>
-#include <linux/initrd.h>
-#include <linux/interrupt.h>
-#include <linux/fsl_devices.h>
-#include <linux/of_platform.h>
-#include <linux/pgtable.h>
-
-#include <asm/page.h>
-#include <linux/atomic.h>
-#include <asm/time.h>
-#include <asm/io.h>
-#include <asm/machdep.h>
-#include <asm/ipic.h>
-#include <asm/pci-bridge.h>
-#include <asm/irq.h>
-#include <mm/mmu_decl.h>
-#include <asm/prom.h>
-#include <asm/udbg.h>
-#include <asm/mpic.h>
-
-#include <sysdev/fsl_soc.h>
-#include <sysdev/fsl_pci.h>
-
-#include "mpc85xx.h"
-
-static int sbc_rev;
-
-static void __init sbc8548_pic_init(void)
-{
-	struct mpic *mpic = mpic_alloc(NULL, 0, MPIC_BIG_ENDIAN,
-			0, 256, " OpenPIC  ");
-	BUG_ON(mpic == NULL);
-	mpic_init(mpic);
-}
-
-/* Extract the HW Rev from the EPLD on the board */
-static int __init sbc8548_hw_rev(void)
-{
-	struct device_node *np;
-	struct resource res;
-	unsigned int *rev;
-	int board_rev = 0;
-
-	np = of_find_compatible_node(NULL, NULL, "hw-rev");
-	if (np == NULL) {
-		printk("No HW-REV found in DTB.\n");
-		return -ENODEV;
-	}
-
-	of_address_to_resource(np, 0, &res);
-	of_node_put(np);
-
-	rev = ioremap(res.start,sizeof(unsigned int));
-	board_rev = (*rev) >> 28;
-	iounmap(rev);
-
-	return board_rev;
-}
-
-/*
- * Setup the architecture
- */
-static void __init sbc8548_setup_arch(void)
-{
-	if (ppc_md.progress)
-		ppc_md.progress("sbc8548_setup_arch()", 0);
-
-	fsl_pci_assign_primary();
-
-	sbc_rev = sbc8548_hw_rev();
-}
-
-static void sbc8548_show_cpuinfo(struct seq_file *m)
-{
-	uint pvid, svid, phid1;
-
-	pvid = mfspr(SPRN_PVR);
-	svid = mfspr(SPRN_SVR);
-
-	seq_printf(m, "Vendor\t\t: Wind River\n");
-	seq_printf(m, "Machine\t\t: SBC8548 v%d\n", sbc_rev);
-	seq_printf(m, "PVR\t\t: 0x%x\n", pvid);
-	seq_printf(m, "SVR\t\t: 0x%x\n", svid);
-
-	/* Display cpu Pll setting */
-	phid1 = mfspr(SPRN_HID1);
-	seq_printf(m, "PLL setting\t: 0x%x\n", ((phid1 >> 24) & 0x3f));
-}
-
-machine_arch_initcall(sbc8548, mpc85xx_common_publish_devices);
-
-/*
- * Called very early, device-tree isn't unflattened
- */
-static int __init sbc8548_probe(void)
-{
-	return of_machine_is_compatible("SBC8548");
-}
-
-define_machine(sbc8548) {
-	.name		= "SBC8548",
-	.probe		= sbc8548_probe,
-	.setup_arch	= sbc8548_setup_arch,
-	.init_IRQ	= sbc8548_pic_init,
-	.show_cpuinfo	= sbc8548_show_cpuinfo,
-	.get_irq	= mpic_get_irq,
-#ifdef CONFIG_PCI
-	.pcibios_fixup_bus	= fsl_pcibios_fixup_bus,
-	.pcibios_fixup_phb      = fsl_pcibios_fixup_phb,
-#endif
-	.calibrate_decr = generic_calibrate_decr,
-	.progress	= udbg_progress,
-};
diff --git a/arch/powerpc/platforms/86xx/Kconfig b/arch/powerpc/platforms/86xx/Kconfig
index 07a9d60c618a..be867abebc83 100644
--- a/arch/powerpc/platforms/86xx/Kconfig
+++ b/arch/powerpc/platforms/86xx/Kconfig
@@ -20,12 +20,6 @@ config MPC8641_HPCN
 	help
 	  This option enables support for the MPC8641 HPCN board.
 
-config SBC8641D
-	bool "Wind River SBC8641D"
-	select DEFAULT_UIMAGE
-	help
-	  This option enables support for the WRS SBC8641D board.
-
 config MPC8610_HPCD
 	bool "Freescale MPC8610 HPCD"
 	select DEFAULT_UIMAGE
@@ -74,7 +68,7 @@ config MPC8641
 	select FSL_PCI if PCI
 	select PPC_UDBG_16550
 	select MPIC
-	default y if MPC8641_HPCN || SBC8641D || GEF_SBC610 || GEF_SBC310 || GEF_PPC9A \
+	default y if MPC8641_HPCN || GEF_SBC610 || GEF_SBC310 || GEF_PPC9A \
 			|| MVME7100
 
 config MPC8610
diff --git a/arch/powerpc/platforms/86xx/Makefile b/arch/powerpc/platforms/86xx/Makefile
index 2c04449be107..5bbe1475bf26 100644
--- a/arch/powerpc/platforms/86xx/Makefile
+++ b/arch/powerpc/platforms/86xx/Makefile
@@ -6,7 +6,6 @@
 obj-y				:= pic.o common.o
 obj-$(CONFIG_SMP)		+= mpc86xx_smp.o
 obj-$(CONFIG_MPC8641_HPCN)	+= mpc86xx_hpcn.o
-obj-$(CONFIG_SBC8641D)		+= sbc8641d.o
 obj-$(CONFIG_MPC8610_HPCD)	+= mpc8610_hpcd.o
 obj-$(CONFIG_GEF_SBC610)	+= gef_sbc610.o
 obj-$(CONFIG_GEF_SBC310)	+= gef_sbc310.o
diff --git a/arch/powerpc/platforms/86xx/sbc8641d.c b/arch/powerpc/platforms/86xx/sbc8641d.c
deleted file mode 100644
index dc23dd383d6e..000000000000
--- a/arch/powerpc/platforms/86xx/sbc8641d.c
+++ /dev/null
@@ -1,87 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0-or-later
-/*
- * SBC8641D board specific routines
- *
- * Copyright 2008 Wind River Systems Inc.
- *
- * By Paul Gortmaker (see MAINTAINERS for contact information)
- *
- * Based largely on the 8641 HPCN support by Freescale Semiconductor Inc.
- */
-
-#include <linux/stddef.h>
-#include <linux/kernel.h>
-#include <linux/pci.h>
-#include <linux/kdev_t.h>
-#include <linux/delay.h>
-#include <linux/seq_file.h>
-#include <linux/of_platform.h>
-
-#include <asm/time.h>
-#include <asm/machdep.h>
-#include <asm/pci-bridge.h>
-#include <asm/prom.h>
-#include <mm/mmu_decl.h>
-#include <asm/udbg.h>
-
-#include <asm/mpic.h>
-
-#include <sysdev/fsl_pci.h>
-#include <sysdev/fsl_soc.h>
-
-#include "mpc86xx.h"
-
-static void __init
-sbc8641_setup_arch(void)
-{
-	if (ppc_md.progress)
-		ppc_md.progress("sbc8641_setup_arch()", 0);
-
-	printk("SBC8641 board from Wind River\n");
-
-#ifdef CONFIG_SMP
-	mpc86xx_smp_init();
-#endif
-
-	fsl_pci_assign_primary();
-}
-
-
-static void
-sbc8641_show_cpuinfo(struct seq_file *m)
-{
-	uint svid = mfspr(SPRN_SVR);
-
-	seq_printf(m, "Vendor\t\t: Wind River Systems\n");
-
-	seq_printf(m, "SVR\t\t: 0x%x\n", svid);
-}
-
-
-/*
- * Called very early, device-tree isn't unflattened
- */
-static int __init sbc8641_probe(void)
-{
-	if (of_machine_is_compatible("wind,sbc8641"))
-		return 1;	/* Looks good */
-
-	return 0;
-}
-
-machine_arch_initcall(sbc8641, mpc86xx_common_publish_devices);
-
-define_machine(sbc8641) {
-	.name			= "SBC8641D",
-	.probe			= sbc8641_probe,
-	.setup_arch		= sbc8641_setup_arch,
-	.init_IRQ		= mpc86xx_init_irq,
-	.show_cpuinfo		= sbc8641_show_cpuinfo,
-	.get_irq		= mpic_get_irq,
-	.time_init		= mpc86xx_time_init,
-	.calibrate_decr		= generic_calibrate_decr,
-	.progress		= udbg_progress,
-#ifdef CONFIG_PCI
-	.pcibios_fixup_bus	= fsl_pcibios_fixup_bus,
-#endif
-};
diff --git a/arch/powerpc/platforms/cell/axon_msi.c b/arch/powerpc/platforms/cell/axon_msi.c
index ca2555b8a0c2..82335e364c44 100644
--- a/arch/powerpc/platforms/cell/axon_msi.c
+++ b/arch/powerpc/platforms/cell/axon_msi.c
@@ -12,8 +12,8 @@
 #include <linux/export.h>
 #include <linux/of_platform.h>
 #include <linux/slab.h>
+#include <linux/debugfs.h>
 
-#include <asm/debugfs.h>
 #include <asm/dcr.h>
 #include <asm/machdep.h>
 #include <asm/prom.h>
@@ -480,6 +480,6 @@ void axon_msi_debug_setup(struct device_node *dn, struct axon_msic *msic)
 
 	snprintf(name, sizeof(name), "msic_%d", of_node_to_nid(dn));
 
-	debugfs_create_file(name, 0600, powerpc_debugfs_root, msic, &fops_msic);
+	debugfs_create_file(name, 0600, arch_debugfs_dir, msic, &fops_msic);
 }
 #endif /* DEBUG */
diff --git a/arch/powerpc/platforms/embedded6xx/holly.c b/arch/powerpc/platforms/embedded6xx/holly.c
index 85521b3e7098..7a85b117f7a4 100644
--- a/arch/powerpc/platforms/embedded6xx/holly.c
+++ b/arch/powerpc/platforms/embedded6xx/holly.c
@@ -251,7 +251,7 @@ static int ppc750_machine_check_exception(struct pt_regs *regs)
 	/* Are we prepared to handle this fault */
 	if ((entry = search_exception_tables(regs->nip)) != NULL) {
 		tsi108_clear_pci_cfg_error();
-		regs_set_return_msr(regs, regs->msr | MSR_RI);
+		regs_set_recoverable(regs);
 		regs_set_return_ip(regs, extable_fixup(entry));
 		return 1;
 	}
diff --git a/arch/powerpc/platforms/embedded6xx/mpc7448_hpc2.c b/arch/powerpc/platforms/embedded6xx/mpc7448_hpc2.c
index d8da6a483e59..9eb9abb5bce2 100644
--- a/arch/powerpc/platforms/embedded6xx/mpc7448_hpc2.c
+++ b/arch/powerpc/platforms/embedded6xx/mpc7448_hpc2.c
@@ -173,7 +173,7 @@ static int mpc7448_machine_check_exception(struct pt_regs *regs)
 	/* Are we prepared to handle this fault */
 	if ((entry = search_exception_tables(regs->nip)) != NULL) {
 		tsi108_clear_pci_cfg_error();
-		regs_set_return_msr(regs, regs->msr | MSR_RI);
+		regs_set_recoverable(regs);
 		regs_set_return_ip(regs, extable_fixup(entry));
 		return 1;
 	}
diff --git a/arch/powerpc/platforms/pasemi/idle.c b/arch/powerpc/platforms/pasemi/idle.c
index 534b0317fc15..6087c70ed2ef 100644
--- a/arch/powerpc/platforms/pasemi/idle.c
+++ b/arch/powerpc/platforms/pasemi/idle.c
@@ -59,7 +59,7 @@ static int pasemi_system_reset_exception(struct pt_regs *regs)
 	restore_astate(hard_smp_processor_id());
 
 	/* everything handled */
-	regs_set_return_msr(regs, regs->msr | MSR_RI);
+	regs_set_recoverable(regs);
 	return 1;
 }
 
diff --git a/arch/powerpc/platforms/powernv/idle.c b/arch/powerpc/platforms/powernv/idle.c
index 528a7e0cf83a..e3ffdc8e8567 100644
--- a/arch/powerpc/platforms/powernv/idle.c
+++ b/arch/powerpc/platforms/powernv/idle.c
@@ -199,12 +199,12 @@ static ssize_t store_fastsleep_workaround_applyonce(struct device *dev,
 	 */
 	power7_fastsleep_workaround_exit = false;
 
-	get_online_cpus();
+	cpus_read_lock();
 	primary_thread_mask = cpu_online_cores_map();
 	on_each_cpu_mask(&primary_thread_mask,
 				pnv_fastsleep_workaround_apply,
 				&err, 1);
-	put_online_cpus();
+	cpus_read_unlock();
 	if (err) {
 		pr_err("fastsleep_workaround_applyonce change failed while running pnv_fastsleep_workaround_apply");
 		goto fail;
@@ -667,7 +667,6 @@ static unsigned long power9_idle_stop(unsigned long psscr)
 		sprs.purr	= mfspr(SPRN_PURR);
 		sprs.spurr	= mfspr(SPRN_SPURR);
 		sprs.dscr	= mfspr(SPRN_DSCR);
-		sprs.wort	= mfspr(SPRN_WORT);
 		sprs.ciabr	= mfspr(SPRN_CIABR);
 
 		sprs.mmcra	= mfspr(SPRN_MMCRA);
@@ -785,7 +784,6 @@ core_woken:
 	mtspr(SPRN_PURR,	sprs.purr);
 	mtspr(SPRN_SPURR,	sprs.spurr);
 	mtspr(SPRN_DSCR,	sprs.dscr);
-	mtspr(SPRN_WORT,	sprs.wort);
 	mtspr(SPRN_CIABR,	sprs.ciabr);
 
 	mtspr(SPRN_MMCRA,	sprs.mmcra);
diff --git a/arch/powerpc/platforms/powernv/memtrace.c b/arch/powerpc/platforms/powernv/memtrace.c
index 537a4daed614..877720c64515 100644
--- a/arch/powerpc/platforms/powernv/memtrace.c
+++ b/arch/powerpc/platforms/powernv/memtrace.c
@@ -18,7 +18,6 @@
 #include <linux/memory_hotplug.h>
 #include <linux/numa.h>
 #include <asm/machdep.h>
-#include <asm/debugfs.h>
 #include <asm/cacheflush.h>
 
 /* This enables us to keep track of the memory removed from each node. */
@@ -330,7 +329,7 @@ DEFINE_SIMPLE_ATTRIBUTE(memtrace_init_fops, memtrace_enable_get,
 static int memtrace_init(void)
 {
 	memtrace_debugfs_dir = debugfs_create_dir("memtrace",
-						  powerpc_debugfs_root);
+						  arch_debugfs_dir);
 
 	debugfs_create_file("enable", 0600, memtrace_debugfs_dir,
 			    NULL, &memtrace_init_fops);
diff --git a/arch/powerpc/platforms/powernv/opal-imc.c b/arch/powerpc/platforms/powernv/opal-imc.c
index 7824cc364bc4..05d3832019b9 100644
--- a/arch/powerpc/platforms/powernv/opal-imc.c
+++ b/arch/powerpc/platforms/powernv/opal-imc.c
@@ -13,11 +13,11 @@
 #include <linux/of_address.h>
 #include <linux/of_platform.h>
 #include <linux/crash_dump.h>
+#include <linux/debugfs.h>
 #include <asm/opal.h>
 #include <asm/io.h>
 #include <asm/imc-pmu.h>
 #include <asm/cputhreads.h>
-#include <asm/debugfs.h>
 
 static struct dentry *imc_debugfs_parent;
 
@@ -56,7 +56,7 @@ static void export_imc_mode_and_cmd(struct device_node *node,
 	u32 cb_offset;
 	struct imc_mem_info *ptr = pmu_ptr->mem_info;
 
-	imc_debugfs_parent = debugfs_create_dir("imc", powerpc_debugfs_root);
+	imc_debugfs_parent = debugfs_create_dir("imc", arch_debugfs_dir);
 
 	if (of_property_read_u32(node, "cb_offset", &cb_offset))
 		cb_offset = IMC_CNTL_BLK_OFFSET;
@@ -186,7 +186,7 @@ static void disable_nest_pmu_counters(void)
 	int nid, cpu;
 	const struct cpumask *l_cpumask;
 
-	get_online_cpus();
+	cpus_read_lock();
 	for_each_node_with_cpus(nid) {
 		l_cpumask = cpumask_of_node(nid);
 		cpu = cpumask_first_and(l_cpumask, cpu_online_mask);
@@ -195,7 +195,7 @@ static void disable_nest_pmu_counters(void)
 		opal_imc_counters_stop(OPAL_IMC_COUNTERS_NEST,
 				       get_hard_smp_processor_id(cpu));
 	}
-	put_online_cpus();
+	cpus_read_unlock();
 }
 
 static void disable_core_pmu_counters(void)
@@ -203,7 +203,7 @@ static void disable_core_pmu_counters(void)
 	cpumask_t cores_map;
 	int cpu, rc;
 
-	get_online_cpus();
+	cpus_read_lock();
 	/* Disable the IMC Core functions */
 	cores_map = cpu_online_cores_map();
 	for_each_cpu(cpu, &cores_map) {
@@ -213,7 +213,7 @@ static void disable_core_pmu_counters(void)
 			pr_err("%s: Failed to stop Core (cpu = %d)\n",
 				__FUNCTION__, cpu);
 	}
-	put_online_cpus();
+	cpus_read_unlock();
 }
 
 int get_max_nest_dev(void)
diff --git a/arch/powerpc/platforms/powernv/opal-lpc.c b/arch/powerpc/platforms/powernv/opal-lpc.c
index 608569082ba0..1e5d51db40f8 100644
--- a/arch/powerpc/platforms/powernv/opal-lpc.c
+++ b/arch/powerpc/platforms/powernv/opal-lpc.c
@@ -10,13 +10,13 @@
 #include <linux/bug.h>
 #include <linux/io.h>
 #include <linux/slab.h>
+#include <linux/debugfs.h>
 
 #include <asm/machdep.h>
 #include <asm/firmware.h>
 #include <asm/opal.h>
 #include <asm/prom.h>
 #include <linux/uaccess.h>
-#include <asm/debugfs.h>
 #include <asm/isa-bridge.h>
 
 static int opal_lpc_chip_id = -1;
@@ -371,7 +371,7 @@ static int opal_lpc_init_debugfs(void)
 	if (opal_lpc_chip_id < 0)
 		return -ENODEV;
 
-	root = debugfs_create_dir("lpc", powerpc_debugfs_root);
+	root = debugfs_create_dir("lpc", arch_debugfs_dir);
 
 	rc |= opal_lpc_debugfs_create_type(root, "io", OPAL_LPC_IO);
 	rc |= opal_lpc_debugfs_create_type(root, "mem", OPAL_LPC_MEM);
diff --git a/arch/powerpc/platforms/powernv/opal-xscom.c b/arch/powerpc/platforms/powernv/opal-xscom.c
index fd510d961b8c..6b4eed2ef4fa 100644
--- a/arch/powerpc/platforms/powernv/opal-xscom.c
+++ b/arch/powerpc/platforms/powernv/opal-xscom.c
@@ -14,11 +14,11 @@
 #include <linux/gfp.h>
 #include <linux/slab.h>
 #include <linux/uaccess.h>
+#include <linux/debugfs.h>
 
 #include <asm/machdep.h>
 #include <asm/firmware.h>
 #include <asm/opal.h>
-#include <asm/debugfs.h>
 #include <asm/prom.h>
 
 static u64 opal_scom_unmangle(u64 addr)
@@ -189,7 +189,7 @@ static int scom_debug_init(void)
 	if (!firmware_has_feature(FW_FEATURE_OPAL))
 		return 0;
 
-	root = debugfs_create_dir("scom", powerpc_debugfs_root);
+	root = debugfs_create_dir("scom", arch_debugfs_dir);
 	if (!root)
 		return -1;
 
diff --git a/arch/powerpc/platforms/powernv/opal.c b/arch/powerpc/platforms/powernv/opal.c
index 2835376e61a4..e9d18519e650 100644
--- a/arch/powerpc/platforms/powernv/opal.c
+++ b/arch/powerpc/platforms/powernv/opal.c
@@ -588,7 +588,7 @@ static int opal_recover_mce(struct pt_regs *regs,
 {
 	int recovered = 0;
 
-	if (!(regs->msr & MSR_RI)) {
+	if (regs_is_unrecoverable(regs)) {
 		/* If MSR_RI isn't set, we cannot recover */
 		pr_err("Machine check interrupt unrecoverable: MSR(RI=0)\n");
 		recovered = 0;
diff --git a/arch/powerpc/platforms/powernv/pci-ioda.c b/arch/powerpc/platforms/powernv/pci-ioda.c
index 7de464679292..3dd35c327d1c 100644
--- a/arch/powerpc/platforms/powernv/pci-ioda.c
+++ b/arch/powerpc/platforms/powernv/pci-ioda.c
@@ -20,6 +20,7 @@
 #include <linux/iommu.h>
 #include <linux/rculist.h>
 #include <linux/sizes.h>
+#include <linux/debugfs.h>
 
 #include <asm/sections.h>
 #include <asm/io.h>
@@ -32,10 +33,10 @@
 #include <asm/iommu.h>
 #include <asm/tce.h>
 #include <asm/xics.h>
-#include <asm/debugfs.h>
 #include <asm/firmware.h>
 #include <asm/pnv-pci.h>
 #include <asm/mmzone.h>
+#include <asm/xive.h>
 
 #include <misc/cxl-base.h>
 
@@ -1962,27 +1963,40 @@ void pnv_pci_ioda2_setup_dma_pe(struct pnv_phb *phb,
 	pe->dma_setup_done = true;
 }
 
-int64_t pnv_opal_pci_msi_eoi(struct irq_chip *chip, unsigned int hw_irq)
+/*
+ * Called from KVM in real mode to EOI passthru interrupts. The ICP
+ * EOI is handled directly in KVM in kvmppc_deliver_irq_passthru().
+ *
+ * The IRQ data is mapped in the PCI-MSI domain and the EOI OPAL call
+ * needs an HW IRQ number mapped in the XICS IRQ domain. The HW IRQ
+ * numbers of the in-the-middle MSI domain are vector numbers and it's
+ * good enough for OPAL. Use that.
+ */
+int64_t pnv_opal_pci_msi_eoi(struct irq_data *d)
 {
-	struct pnv_phb *phb = container_of(chip, struct pnv_phb,
-					   ioda.irq_chip);
+	struct pci_controller *hose = irq_data_get_irq_chip_data(d->parent_data);
+	struct pnv_phb *phb = hose->private_data;
 
-	return opal_pci_msi_eoi(phb->opal_id, hw_irq);
+	return opal_pci_msi_eoi(phb->opal_id, d->parent_data->hwirq);
 }
 
+/*
+ * The IRQ data is mapped in the XICS domain, with OPAL HW IRQ numbers
+ */
 static void pnv_ioda2_msi_eoi(struct irq_data *d)
 {
 	int64_t rc;
 	unsigned int hw_irq = (unsigned int)irqd_to_hwirq(d);
-	struct irq_chip *chip = irq_data_get_irq_chip(d);
+	struct pci_controller *hose = irq_data_get_irq_chip_data(d);
+	struct pnv_phb *phb = hose->private_data;
 
-	rc = pnv_opal_pci_msi_eoi(chip, hw_irq);
+	rc = opal_pci_msi_eoi(phb->opal_id, hw_irq);
 	WARN_ON_ONCE(rc);
 
 	icp_native_eoi(d);
 }
 
-
+/* P8/CXL only */
 void pnv_set_msi_irq_chip(struct pnv_phb *phb, unsigned int virq)
 {
 	struct irq_data *idata;
@@ -2004,27 +2018,32 @@ void pnv_set_msi_irq_chip(struct pnv_phb *phb, unsigned int virq)
 		phb->ioda.irq_chip.irq_eoi = pnv_ioda2_msi_eoi;
 	}
 	irq_set_chip(virq, &phb->ioda.irq_chip);
+	irq_set_chip_data(virq, phb->hose);
 }
 
+static struct irq_chip pnv_pci_msi_irq_chip;
+
 /*
  * Returns true iff chip is something that we could call
  * pnv_opal_pci_msi_eoi for.
  */
 bool is_pnv_opal_msi(struct irq_chip *chip)
 {
-	return chip->irq_eoi == pnv_ioda2_msi_eoi;
+	return chip == &pnv_pci_msi_irq_chip;
 }
 EXPORT_SYMBOL_GPL(is_pnv_opal_msi);
 
-static int pnv_pci_ioda_msi_setup(struct pnv_phb *phb, struct pci_dev *dev,
-				  unsigned int hwirq, unsigned int virq,
-				  unsigned int is_64, struct msi_msg *msg)
+static int __pnv_pci_ioda_msi_setup(struct pnv_phb *phb, struct pci_dev *dev,
+				    unsigned int xive_num,
+				    unsigned int is_64, struct msi_msg *msg)
 {
 	struct pnv_ioda_pe *pe = pnv_ioda_get_pe(dev);
-	unsigned int xive_num = hwirq - phb->msi_base;
 	__be32 data;
 	int rc;
 
+	dev_dbg(&dev->dev, "%s: setup %s-bit MSI for vector #%d\n", __func__,
+		is_64 ? "64" : "32", xive_num);
+
 	/* No PE assigned ? bail out ... no MSI for you ! */
 	if (pe == NULL)
 		return -ENXIO;
@@ -2072,12 +2091,209 @@ static int pnv_pci_ioda_msi_setup(struct pnv_phb *phb, struct pci_dev *dev,
 	}
 	msg->data = be32_to_cpu(data);
 
-	pnv_set_msi_irq_chip(phb, virq);
+	return 0;
+}
+
+/*
+ * The msi_free() op is called before irq_domain_free_irqs_top() when
+ * the handler data is still available. Use that to clear the XIVE
+ * controller.
+ */
+static void pnv_msi_ops_msi_free(struct irq_domain *domain,
+				 struct msi_domain_info *info,
+				 unsigned int irq)
+{
+	if (xive_enabled())
+		xive_irq_free_data(irq);
+}
+
+static struct msi_domain_ops pnv_pci_msi_domain_ops = {
+	.msi_free	= pnv_msi_ops_msi_free,
+};
+
+static void pnv_msi_shutdown(struct irq_data *d)
+{
+	d = d->parent_data;
+	if (d->chip->irq_shutdown)
+		d->chip->irq_shutdown(d);
+}
+
+static void pnv_msi_mask(struct irq_data *d)
+{
+	pci_msi_mask_irq(d);
+	irq_chip_mask_parent(d);
+}
+
+static void pnv_msi_unmask(struct irq_data *d)
+{
+	pci_msi_unmask_irq(d);
+	irq_chip_unmask_parent(d);
+}
+
+static struct irq_chip pnv_pci_msi_irq_chip = {
+	.name		= "PNV-PCI-MSI",
+	.irq_shutdown	= pnv_msi_shutdown,
+	.irq_mask	= pnv_msi_mask,
+	.irq_unmask	= pnv_msi_unmask,
+	.irq_eoi	= irq_chip_eoi_parent,
+};
+
+static struct msi_domain_info pnv_msi_domain_info = {
+	.flags = (MSI_FLAG_USE_DEF_DOM_OPS | MSI_FLAG_USE_DEF_CHIP_OPS |
+		  MSI_FLAG_MULTI_PCI_MSI  | MSI_FLAG_PCI_MSIX),
+	.ops   = &pnv_pci_msi_domain_ops,
+	.chip  = &pnv_pci_msi_irq_chip,
+};
+
+static void pnv_msi_compose_msg(struct irq_data *d, struct msi_msg *msg)
+{
+	struct msi_desc *entry = irq_data_get_msi_desc(d);
+	struct pci_dev *pdev = msi_desc_to_pci_dev(entry);
+	struct pci_controller *hose = irq_data_get_irq_chip_data(d);
+	struct pnv_phb *phb = hose->private_data;
+	int rc;
+
+	rc = __pnv_pci_ioda_msi_setup(phb, pdev, d->hwirq,
+				      entry->msi_attrib.is_64, msg);
+	if (rc)
+		dev_err(&pdev->dev, "Failed to setup %s-bit MSI #%ld : %d\n",
+			entry->msi_attrib.is_64 ? "64" : "32", d->hwirq, rc);
+}
+
+/*
+ * The IRQ data is mapped in the MSI domain in which HW IRQ numbers
+ * correspond to vector numbers.
+ */
+static void pnv_msi_eoi(struct irq_data *d)
+{
+	struct pci_controller *hose = irq_data_get_irq_chip_data(d);
+	struct pnv_phb *phb = hose->private_data;
+
+	if (phb->model == PNV_PHB_MODEL_PHB3) {
+		/*
+		 * The EOI OPAL call takes an OPAL HW IRQ number but
+		 * since it is translated into a vector number in
+		 * OPAL, use that directly.
+		 */
+		WARN_ON_ONCE(opal_pci_msi_eoi(phb->opal_id, d->hwirq));
+	}
+
+	irq_chip_eoi_parent(d);
+}
+
+static struct irq_chip pnv_msi_irq_chip = {
+	.name			= "PNV-MSI",
+	.irq_shutdown		= pnv_msi_shutdown,
+	.irq_mask		= irq_chip_mask_parent,
+	.irq_unmask		= irq_chip_unmask_parent,
+	.irq_eoi		= pnv_msi_eoi,
+	.irq_set_affinity	= irq_chip_set_affinity_parent,
+	.irq_compose_msi_msg	= pnv_msi_compose_msg,
+};
+
+static int pnv_irq_parent_domain_alloc(struct irq_domain *domain,
+				       unsigned int virq, int hwirq)
+{
+	struct irq_fwspec parent_fwspec;
+	int ret;
+
+	parent_fwspec.fwnode = domain->parent->fwnode;
+	parent_fwspec.param_count = 2;
+	parent_fwspec.param[0] = hwirq;
+	parent_fwspec.param[1] = IRQ_TYPE_EDGE_RISING;
+
+	ret = irq_domain_alloc_irqs_parent(domain, virq, 1, &parent_fwspec);
+	if (ret)
+		return ret;
+
+	return 0;
+}
+
+static int pnv_irq_domain_alloc(struct irq_domain *domain, unsigned int virq,
+				unsigned int nr_irqs, void *arg)
+{
+	struct pci_controller *hose = domain->host_data;
+	struct pnv_phb *phb = hose->private_data;
+	msi_alloc_info_t *info = arg;
+	struct pci_dev *pdev = msi_desc_to_pci_dev(info->desc);
+	int hwirq;
+	int i, ret;
+
+	hwirq = msi_bitmap_alloc_hwirqs(&phb->msi_bmp, nr_irqs);
+	if (hwirq < 0) {
+		dev_warn(&pdev->dev, "failed to find a free MSI\n");
+		return -ENOSPC;
+	}
+
+	dev_dbg(&pdev->dev, "%s bridge %pOF %d/%x #%d\n", __func__,
+		hose->dn, virq, hwirq, nr_irqs);
+
+	for (i = 0; i < nr_irqs; i++) {
+		ret = pnv_irq_parent_domain_alloc(domain, virq + i,
+						  phb->msi_base + hwirq + i);
+		if (ret)
+			goto out;
+
+		irq_domain_set_hwirq_and_chip(domain, virq + i, hwirq + i,
+					      &pnv_msi_irq_chip, hose);
+	}
+
+	return 0;
+
+out:
+	irq_domain_free_irqs_parent(domain, virq, i - 1);
+	msi_bitmap_free_hwirqs(&phb->msi_bmp, hwirq, nr_irqs);
+	return ret;
+}
+
+static void pnv_irq_domain_free(struct irq_domain *domain, unsigned int virq,
+				unsigned int nr_irqs)
+{
+	struct irq_data *d = irq_domain_get_irq_data(domain, virq);
+	struct pci_controller *hose = irq_data_get_irq_chip_data(d);
+	struct pnv_phb *phb = hose->private_data;
+
+	pr_debug("%s bridge %pOF %d/%lx #%d\n", __func__, hose->dn,
+		 virq, d->hwirq, nr_irqs);
+
+	msi_bitmap_free_hwirqs(&phb->msi_bmp, d->hwirq, nr_irqs);
+	/* XIVE domain is cleared through ->msi_free() */
+}
+
+static const struct irq_domain_ops pnv_irq_domain_ops = {
+	.alloc  = pnv_irq_domain_alloc,
+	.free   = pnv_irq_domain_free,
+};
+
+static int pnv_msi_allocate_domains(struct pci_controller *hose, unsigned int count)
+{
+	struct pnv_phb *phb = hose->private_data;
+	struct irq_domain *parent = irq_get_default_host();
+
+	hose->fwnode = irq_domain_alloc_named_id_fwnode("PNV-MSI", phb->opal_id);
+	if (!hose->fwnode)
+		return -ENOMEM;
 
-	pr_devel("%s: %s-bit MSI on hwirq %x (xive #%d),"
-		 " address=%x_%08x data=%x PE# %x\n",
-		 pci_name(dev), is_64 ? "64" : "32", hwirq, xive_num,
-		 msg->address_hi, msg->address_lo, data, pe->pe_number);
+	hose->dev_domain = irq_domain_create_hierarchy(parent, 0, count,
+						       hose->fwnode,
+						       &pnv_irq_domain_ops, hose);
+	if (!hose->dev_domain) {
+		pr_err("PCI: failed to create IRQ domain bridge %pOF (domain %d)\n",
+		       hose->dn, hose->global_number);
+		irq_domain_free_fwnode(hose->fwnode);
+		return -ENOMEM;
+	}
+
+	hose->msi_domain = pci_msi_create_irq_domain(of_node_to_fwnode(hose->dn),
+						     &pnv_msi_domain_info,
+						     hose->dev_domain);
+	if (!hose->msi_domain) {
+		pr_err("PCI: failed to create MSI IRQ domain bridge %pOF (domain %d)\n",
+		       hose->dn, hose->global_number);
+		irq_domain_free_fwnode(hose->fwnode);
+		irq_domain_remove(hose->dev_domain);
+		return -ENOMEM;
+	}
 
 	return 0;
 }
@@ -2102,10 +2318,10 @@ static void pnv_pci_init_ioda_msis(struct pnv_phb *phb)
 		return;
 	}
 
-	phb->msi_setup = pnv_pci_ioda_msi_setup;
-	phb->msi32_support = 1;
 	pr_info("  Allocated bitmap for %d MSIs (base IRQ 0x%x)\n",
 		count, phb->msi_base);
+
+	pnv_msi_allocate_domains(phb->hose, count);
 }
 
 static void pnv_ioda_setup_pe_res(struct pnv_ioda_pe *pe,
@@ -2259,7 +2475,7 @@ static void pnv_pci_ioda_create_dbgfs(void)
 		phb = hose->private_data;
 
 		sprintf(name, "PCI%04x", hose->global_number);
-		phb->dbgfs = debugfs_create_dir(name, powerpc_debugfs_root);
+		phb->dbgfs = debugfs_create_dir(name, arch_debugfs_dir);
 
 		debugfs_create_file_unsafe("dump_diag_regs", 0200, phb->dbgfs,
 					   phb, &pnv_pci_diag_data_fops);
@@ -2709,8 +2925,6 @@ static const struct pci_controller_ops pnv_pci_ioda_controller_ops = {
 	.dma_dev_setup		= pnv_pci_ioda_dma_dev_setup,
 	.dma_bus_setup		= pnv_pci_ioda_dma_bus_setup,
 	.iommu_bypass_supported	= pnv_pci_ioda_iommu_bypass_supported,
-	.setup_msi_irqs		= pnv_setup_msi_irqs,
-	.teardown_msi_irqs	= pnv_teardown_msi_irqs,
 	.enable_device_hook	= pnv_pci_enable_device_hook,
 	.release_device		= pnv_pci_release_device,
 	.window_alignment	= pnv_pci_window_alignment,
diff --git a/arch/powerpc/platforms/powernv/pci.c b/arch/powerpc/platforms/powernv/pci.c
index 6bb3c52633fb..9a8391b983d1 100644
--- a/arch/powerpc/platforms/powernv/pci.c
+++ b/arch/powerpc/platforms/powernv/pci.c
@@ -160,73 +160,6 @@ exit:
 }
 EXPORT_SYMBOL_GPL(pnv_pci_set_power_state);
 
-int pnv_setup_msi_irqs(struct pci_dev *pdev, int nvec, int type)
-{
-	struct pnv_phb *phb = pci_bus_to_pnvhb(pdev->bus);
-	struct msi_desc *entry;
-	struct msi_msg msg;
-	int hwirq;
-	unsigned int virq;
-	int rc;
-
-	if (WARN_ON(!phb) || !phb->msi_bmp.bitmap)
-		return -ENODEV;
-
-	if (pdev->no_64bit_msi && !phb->msi32_support)
-		return -ENODEV;
-
-	for_each_pci_msi_entry(entry, pdev) {
-		if (!entry->msi_attrib.is_64 && !phb->msi32_support) {
-			pr_warn("%s: Supports only 64-bit MSIs\n",
-				pci_name(pdev));
-			return -ENXIO;
-		}
-		hwirq = msi_bitmap_alloc_hwirqs(&phb->msi_bmp, 1);
-		if (hwirq < 0) {
-			pr_warn("%s: Failed to find a free MSI\n",
-				pci_name(pdev));
-			return -ENOSPC;
-		}
-		virq = irq_create_mapping(NULL, phb->msi_base + hwirq);
-		if (!virq) {
-			pr_warn("%s: Failed to map MSI to linux irq\n",
-				pci_name(pdev));
-			msi_bitmap_free_hwirqs(&phb->msi_bmp, hwirq, 1);
-			return -ENOMEM;
-		}
-		rc = phb->msi_setup(phb, pdev, phb->msi_base + hwirq,
-				    virq, entry->msi_attrib.is_64, &msg);
-		if (rc) {
-			pr_warn("%s: Failed to setup MSI\n", pci_name(pdev));
-			irq_dispose_mapping(virq);
-			msi_bitmap_free_hwirqs(&phb->msi_bmp, hwirq, 1);
-			return rc;
-		}
-		irq_set_msi_desc(virq, entry);
-		pci_write_msi_msg(virq, &msg);
-	}
-	return 0;
-}
-
-void pnv_teardown_msi_irqs(struct pci_dev *pdev)
-{
-	struct pnv_phb *phb = pci_bus_to_pnvhb(pdev->bus);
-	struct msi_desc *entry;
-	irq_hw_number_t hwirq;
-
-	if (WARN_ON(!phb))
-		return;
-
-	for_each_pci_msi_entry(entry, pdev) {
-		if (!entry->irq)
-			continue;
-		hwirq = virq_to_hw(entry->irq);
-		irq_set_msi_desc(entry->irq, NULL);
-		irq_dispose_mapping(entry->irq);
-		msi_bitmap_free_hwirqs(&phb->msi_bmp, hwirq - phb->msi_base, 1);
-	}
-}
-
 /* Nicely print the contents of the PE State Tables (PEST). */
 static void pnv_pci_dump_pest(__be64 pestA[], __be64 pestB[], int pest_size)
 {
diff --git a/arch/powerpc/platforms/powernv/pci.h b/arch/powerpc/platforms/powernv/pci.h
index c8d4f222a86f..966a9eb64339 100644
--- a/arch/powerpc/platforms/powernv/pci.h
+++ b/arch/powerpc/platforms/powernv/pci.h
@@ -123,11 +123,7 @@ struct pnv_phb {
 #endif
 
 	unsigned int		msi_base;
-	unsigned int		msi32_support;
 	struct msi_bitmap	msi_bmp;
-	int (*msi_setup)(struct pnv_phb *phb, struct pci_dev *dev,
-			 unsigned int hwirq, unsigned int virq,
-			 unsigned int is_64, struct msi_msg *msg);
 	int (*init_m64)(struct pnv_phb *phb);
 	int (*get_pe_state)(struct pnv_phb *phb, int pe_no);
 	void (*freeze_pe)(struct pnv_phb *phb, int pe_no);
@@ -289,8 +285,6 @@ extern void pnv_pci_init_npu2_opencapi_phb(struct device_node *np);
 extern void pnv_pci_reset_secondary_bus(struct pci_dev *dev);
 extern int pnv_eeh_phb_reset(struct pci_controller *hose, int option);
 
-extern int pnv_setup_msi_irqs(struct pci_dev *pdev, int nvec, int type);
-extern void pnv_teardown_msi_irqs(struct pci_dev *pdev);
 extern struct pnv_ioda_pe *pnv_pci_bdfn_to_pe(struct pnv_phb *phb, u16 bdfn);
 extern struct pnv_ioda_pe *pnv_ioda_get_pe(struct pci_dev *dev);
 extern void pnv_set_msi_irq_chip(struct pnv_phb *phb, unsigned int virq);
diff --git a/arch/powerpc/platforms/ps3/htab.c b/arch/powerpc/platforms/ps3/htab.c
index 7ddc7ec6a7c0..ef710a715903 100644
--- a/arch/powerpc/platforms/ps3/htab.c
+++ b/arch/powerpc/platforms/ps3/htab.c
@@ -169,7 +169,8 @@ static void ps3_hpte_invalidate(unsigned long slot, unsigned long vpn,
 	spin_unlock_irqrestore(&ps3_htab_lock, flags);
 }
 
-static void ps3_hpte_clear(void)
+/* Called during kexec sequence with MMU off */
+static notrace void ps3_hpte_clear(void)
 {
 	unsigned long hpte_count = (1UL << ppc64_pft_size) >> 4;
 	u64 i;
diff --git a/arch/powerpc/platforms/ps3/mm.c b/arch/powerpc/platforms/ps3/mm.c
index a81eac35d900..9c44f335c0b9 100644
--- a/arch/powerpc/platforms/ps3/mm.c
+++ b/arch/powerpc/platforms/ps3/mm.c
@@ -195,9 +195,11 @@ fail:
 
 /**
  * ps3_mm_vas_destroy -
+ *
+ * called during kexec sequence with MMU off.
  */
 
-void ps3_mm_vas_destroy(void)
+notrace void ps3_mm_vas_destroy(void)
 {
 	int result;
 
@@ -1243,9 +1245,11 @@ void __init ps3_mm_init(void)
 
 /**
  * ps3_mm_shutdown - final cleanup of address space
+ *
+ * called during kexec sequence with MMU off.
  */
 
-void ps3_mm_shutdown(void)
+notrace void ps3_mm_shutdown(void)
 {
 	ps3_mm_region_destroy(&map.r1);
 }
diff --git a/arch/powerpc/platforms/ps3/system-bus.c b/arch/powerpc/platforms/ps3/system-bus.c
index cc5774c64fae..c8b50fec56bf 100644
--- a/arch/powerpc/platforms/ps3/system-bus.c
+++ b/arch/powerpc/platforms/ps3/system-bus.c
@@ -662,7 +662,7 @@ static int ps3_ioc0_map_sg(struct device *_dev, struct scatterlist *sg,
 			   unsigned long attrs)
 {
 	BUG();
-	return 0;
+	return -EINVAL;
 }
 
 static void ps3_sb_unmap_sg(struct device *_dev, struct scatterlist *sg,
diff --git a/arch/powerpc/platforms/pseries/dtl.c b/arch/powerpc/platforms/pseries/dtl.c
index 982f069e4c31..352af5b14a0f 100644
--- a/arch/powerpc/platforms/pseries/dtl.c
+++ b/arch/powerpc/platforms/pseries/dtl.c
@@ -11,10 +11,10 @@
 #include <linux/spinlock.h>
 #include <asm/smp.h>
 #include <linux/uaccess.h>
+#include <linux/debugfs.h>
 #include <asm/firmware.h>
 #include <asm/dtl.h>
 #include <asm/lppaca.h>
-#include <asm/debugfs.h>
 #include <asm/plpar_wrappers.h>
 #include <asm/machdep.h>
 
@@ -338,7 +338,7 @@ static int dtl_init(void)
 
 	/* set up common debugfs structure */
 
-	dtl_dir = debugfs_create_dir("dtl", powerpc_debugfs_root);
+	dtl_dir = debugfs_create_dir("dtl", arch_debugfs_dir);
 
 	debugfs_create_x8("dtl_event_mask", 0600, dtl_dir, &dtl_event_mask);
 	debugfs_create_u32("dtl_buf_entries", 0400, dtl_dir, &dtl_buf_entries);
diff --git a/arch/powerpc/platforms/pseries/firmware.c b/arch/powerpc/platforms/pseries/firmware.c
index 4c7b7f5a2ebc..f162156b7b68 100644
--- a/arch/powerpc/platforms/pseries/firmware.c
+++ b/arch/powerpc/platforms/pseries/firmware.c
@@ -119,10 +119,11 @@ struct vec5_fw_feature {
 
 static __initdata struct vec5_fw_feature
 vec5_fw_features_table[] = {
-	{FW_FEATURE_TYPE1_AFFINITY,	OV5_TYPE1_AFFINITY},
+	{FW_FEATURE_FORM1_AFFINITY,	OV5_FORM1_AFFINITY},
 	{FW_FEATURE_PRRN,		OV5_PRRN},
 	{FW_FEATURE_DRMEM_V2,		OV5_DRMEM_V2},
 	{FW_FEATURE_DRC_INFO,		OV5_DRC_INFO},
+	{FW_FEATURE_FORM2_AFFINITY,	OV5_FORM2_AFFINITY},
 };
 
 static void __init fw_vec5_feature_init(const char *vec5, unsigned long len)
diff --git a/arch/powerpc/platforms/pseries/hotplug-cpu.c b/arch/powerpc/platforms/pseries/hotplug-cpu.c
index 7e970f81d8ff..d646c22e94ab 100644
--- a/arch/powerpc/platforms/pseries/hotplug-cpu.c
+++ b/arch/powerpc/platforms/pseries/hotplug-cpu.c
@@ -39,6 +39,12 @@
 /* This version can't take the spinlock, because it never returns */
 static int rtas_stop_self_token = RTAS_UNKNOWN_SERVICE;
 
+/*
+ * Record the CPU ids used on each nodes.
+ * Protected by cpu_add_remove_lock.
+ */
+static cpumask_var_t node_recorded_ids_map[MAX_NUMNODES];
+
 static void rtas_stop_self(void)
 {
 	static struct rtas_args args;
@@ -139,72 +145,148 @@ static void pseries_cpu_die(unsigned int cpu)
 	paca_ptrs[cpu]->cpu_start = 0;
 }
 
+/**
+ * find_cpu_id_range - found a linear ranger of @nthreads free CPU ids.
+ * @nthreads : the number of threads (cpu ids)
+ * @assigned_node : the node it belongs to or NUMA_NO_NODE if free ids from any
+ *                  node can be peek.
+ * @cpu_mask: the returned CPU mask.
+ *
+ * Returns 0 on success.
+ */
+static int find_cpu_id_range(unsigned int nthreads, int assigned_node,
+			     cpumask_var_t *cpu_mask)
+{
+	cpumask_var_t candidate_mask;
+	unsigned int cpu, node;
+	int rc = -ENOSPC;
+
+	if (!zalloc_cpumask_var(&candidate_mask, GFP_KERNEL))
+		return -ENOMEM;
+
+	cpumask_clear(*cpu_mask);
+	for (cpu = 0; cpu < nthreads; cpu++)
+		cpumask_set_cpu(cpu, *cpu_mask);
+
+	BUG_ON(!cpumask_subset(cpu_present_mask, cpu_possible_mask));
+
+	/* Get a bitmap of unoccupied slots. */
+	cpumask_xor(candidate_mask, cpu_possible_mask, cpu_present_mask);
+
+	if (assigned_node != NUMA_NO_NODE) {
+		/*
+		 * Remove free ids previously assigned on the other nodes. We
+		 * can walk only online nodes because once a node became online
+		 * it is not turned offlined back.
+		 */
+		for_each_online_node(node) {
+			if (node == assigned_node)
+				continue;
+			cpumask_andnot(candidate_mask, candidate_mask,
+				       node_recorded_ids_map[node]);
+		}
+	}
+
+	if (cpumask_empty(candidate_mask))
+		goto out;
+
+	while (!cpumask_empty(*cpu_mask)) {
+		if (cpumask_subset(*cpu_mask, candidate_mask))
+			/* Found a range where we can insert the new cpu(s) */
+			break;
+		cpumask_shift_left(*cpu_mask, *cpu_mask, nthreads);
+	}
+
+	if (!cpumask_empty(*cpu_mask))
+		rc = 0;
+
+out:
+	free_cpumask_var(candidate_mask);
+	return rc;
+}
+
 /*
  * Update cpu_present_mask and paca(s) for a new cpu node.  The wrinkle
- * here is that a cpu device node may represent up to two logical cpus
+ * here is that a cpu device node may represent multiple logical cpus
  * in the SMT case.  We must honor the assumption in other code that
  * the logical ids for sibling SMT threads x and y are adjacent, such
  * that x^1 == y and y^1 == x.
  */
 static int pseries_add_processor(struct device_node *np)
 {
-	unsigned int cpu;
-	cpumask_var_t candidate_mask, tmp;
-	int err = -ENOSPC, len, nthreads, i;
+	int len, nthreads, node, cpu, assigned_node;
+	int rc = 0;
+	cpumask_var_t cpu_mask;
 	const __be32 *intserv;
 
 	intserv = of_get_property(np, "ibm,ppc-interrupt-server#s", &len);
 	if (!intserv)
 		return 0;
 
-	zalloc_cpumask_var(&candidate_mask, GFP_KERNEL);
-	zalloc_cpumask_var(&tmp, GFP_KERNEL);
-
 	nthreads = len / sizeof(u32);
-	for (i = 0; i < nthreads; i++)
-		cpumask_set_cpu(i, tmp);
 
-	cpu_maps_update_begin();
+	if (!alloc_cpumask_var(&cpu_mask, GFP_KERNEL))
+		return -ENOMEM;
 
-	BUG_ON(!cpumask_subset(cpu_present_mask, cpu_possible_mask));
+	/*
+	 * Fetch from the DT nodes read by dlpar_configure_connector() the NUMA
+	 * node id the added CPU belongs to.
+	 */
+	node = of_node_to_nid(np);
+	if (node < 0 || !node_possible(node))
+		node = first_online_node;
 
-	/* Get a bitmap of unoccupied slots. */
-	cpumask_xor(candidate_mask, cpu_possible_mask, cpu_present_mask);
-	if (cpumask_empty(candidate_mask)) {
-		/* If we get here, it most likely means that NR_CPUS is
-		 * less than the partition's max processors setting.
+	BUG_ON(node == NUMA_NO_NODE);
+	assigned_node = node;
+
+	cpu_maps_update_begin();
+
+	rc = find_cpu_id_range(nthreads, node, &cpu_mask);
+	if (rc && nr_node_ids > 1) {
+		/*
+		 * Try again, considering the free CPU ids from the other node.
 		 */
-		printk(KERN_ERR "Cannot add cpu %pOF; this system configuration"
-		       " supports %d logical cpus.\n", np,
-		       num_possible_cpus());
-		goto out_unlock;
+		node = NUMA_NO_NODE;
+		rc = find_cpu_id_range(nthreads, NUMA_NO_NODE, &cpu_mask);
 	}
 
-	while (!cpumask_empty(tmp))
-		if (cpumask_subset(tmp, candidate_mask))
-			/* Found a range where we can insert the new cpu(s) */
-			break;
-		else
-			cpumask_shift_left(tmp, tmp, nthreads);
-
-	if (cpumask_empty(tmp)) {
-		printk(KERN_ERR "Unable to find space in cpu_present_mask for"
-		       " processor %pOFn with %d thread(s)\n", np,
-		       nthreads);
-		goto out_unlock;
+	if (rc) {
+		pr_err("Cannot add cpu %pOF; this system configuration"
+		       " supports %d logical cpus.\n", np, num_possible_cpus());
+		goto out;
 	}
 
-	for_each_cpu(cpu, tmp) {
+	for_each_cpu(cpu, cpu_mask) {
 		BUG_ON(cpu_present(cpu));
 		set_cpu_present(cpu, true);
 		set_hard_smp_processor_id(cpu, be32_to_cpu(*intserv++));
 	}
-	err = 0;
-out_unlock:
+
+	/* Record the newly used CPU ids for the associate node. */
+	cpumask_or(node_recorded_ids_map[assigned_node],
+		   node_recorded_ids_map[assigned_node], cpu_mask);
+
+	/*
+	 * If node is set to NUMA_NO_NODE, CPU ids have be reused from
+	 * another node, remove them from its mask.
+	 */
+	if (node == NUMA_NO_NODE) {
+		cpu = cpumask_first(cpu_mask);
+		pr_warn("Reusing free CPU ids %d-%d from another node\n",
+			cpu, cpu + nthreads - 1);
+		for_each_online_node(node) {
+			if (node == assigned_node)
+				continue;
+			cpumask_andnot(node_recorded_ids_map[node],
+				       node_recorded_ids_map[node],
+				       cpu_mask);
+		}
+	}
+
+out:
 	cpu_maps_update_done();
-	free_cpumask_var(candidate_mask);
-	free_cpumask_var(tmp);
-	return err;
+	free_cpumask_var(cpu_mask);
+	return rc;
 }
 
 /*
@@ -498,6 +580,8 @@ static ssize_t dlpar_cpu_add(u32 drc_index)
 		return saved_rc;
 	}
 
+	update_numa_distance(dn);
+
 	rc = dlpar_online_cpu(dn);
 	if (rc) {
 		saved_rc = rc;
@@ -908,6 +992,7 @@ static struct notifier_block pseries_smp_nb = {
 static int __init pseries_cpu_hotplug_init(void)
 {
 	int qcss_tok;
+	unsigned int node;
 
 #ifdef CONFIG_ARCH_CPU_PROBE_RELEASE
 	ppc_md.cpu_probe = dlpar_cpu_probe;
@@ -929,8 +1014,18 @@ static int __init pseries_cpu_hotplug_init(void)
 	smp_ops->cpu_die = pseries_cpu_die;
 
 	/* Processors can be added/removed only on LPAR */
-	if (firmware_has_feature(FW_FEATURE_LPAR))
+	if (firmware_has_feature(FW_FEATURE_LPAR)) {
+		for_each_node(node) {
+			alloc_bootmem_cpumask_var(&node_recorded_ids_map[node]);
+
+			/* Record ids of CPU added at boot time */
+			cpumask_or(node_recorded_ids_map[node],
+				   node_recorded_ids_map[node],
+				   cpumask_of_node(node));
+		}
+
 		of_reconfig_notifier_register(&pseries_smp_nb);
+	}
 
 	return 0;
 }
diff --git a/arch/powerpc/platforms/pseries/hotplug-memory.c b/arch/powerpc/platforms/pseries/hotplug-memory.c
index 377d852f5a9a..91cf23495ccb 100644
--- a/arch/powerpc/platforms/pseries/hotplug-memory.c
+++ b/arch/powerpc/platforms/pseries/hotplug-memory.c
@@ -180,6 +180,8 @@ static int update_lmb_associativity_index(struct drmem_lmb *lmb)
 		return -ENODEV;
 	}
 
+	update_numa_distance(lmb_node);
+
 	dr_node = of_find_node_by_path("/ibm,dynamic-reconfiguration-memory");
 	if (!dr_node) {
 		dlpar_free_cc_nodes(lmb_node);
@@ -211,13 +213,11 @@ static int update_lmb_associativity_index(struct drmem_lmb *lmb)
 static struct memory_block *lmb_to_memblock(struct drmem_lmb *lmb)
 {
 	unsigned long section_nr;
-	struct mem_section *mem_sect;
 	struct memory_block *mem_block;
 
 	section_nr = pfn_to_section_nr(PFN_DOWN(lmb->base_addr));
-	mem_sect = __nr_to_section(section_nr);
 
-	mem_block = find_memory_block(mem_sect);
+	mem_block = find_memory_block(section_nr);
 	return mem_block;
 }
 
@@ -286,7 +286,7 @@ static int pseries_remove_memblock(unsigned long base, unsigned long memblock_si
 {
 	unsigned long block_sz, start_pfn;
 	int sections_per_block;
-	int i, nid;
+	int i;
 
 	start_pfn = base >> PAGE_SHIFT;
 
@@ -297,10 +297,9 @@ static int pseries_remove_memblock(unsigned long base, unsigned long memblock_si
 
 	block_sz = pseries_memory_block_size();
 	sections_per_block = block_sz / MIN_MEMORY_BLOCK_SIZE;
-	nid = memory_add_physaddr_to_nid(base);
 
 	for (i = 0; i < sections_per_block; i++) {
-		__remove_memory(nid, base, MIN_MEMORY_BLOCK_SIZE);
+		__remove_memory(base, MIN_MEMORY_BLOCK_SIZE);
 		base += MIN_MEMORY_BLOCK_SIZE;
 	}
 
@@ -387,7 +386,7 @@ static int dlpar_remove_lmb(struct drmem_lmb *lmb)
 
 	block_sz = pseries_memory_block_size();
 
-	__remove_memory(mem_block->nid, lmb->base_addr, block_sz);
+	__remove_memory(lmb->base_addr, block_sz);
 	put_device(&mem_block->dev);
 
 	/* Update memory regions for memory remove */
@@ -660,7 +659,7 @@ static int dlpar_add_lmb(struct drmem_lmb *lmb)
 
 	rc = dlpar_online_lmb(lmb);
 	if (rc) {
-		__remove_memory(nid, lmb->base_addr, block_sz);
+		__remove_memory(lmb->base_addr, block_sz);
 		invalidate_lmb_associativity_index(lmb);
 	} else {
 		lmb->flags |= DRCONF_MEM_ASSIGNED;
@@ -979,6 +978,10 @@ static int pseries_memory_notifier(struct notifier_block *nb,
 	case OF_RECONFIG_DETACH_NODE:
 		err = pseries_remove_mem_node(rd->dn);
 		break;
+	case OF_RECONFIG_UPDATE_PROPERTY:
+		if (!strcmp(rd->dn->name,
+			    "ibm,dynamic-reconfiguration-memory"))
+			drmem_update_lmbs(rd->prop);
 	}
 	return notifier_from_errno(err);
 }
diff --git a/arch/powerpc/platforms/pseries/iommu.c b/arch/powerpc/platforms/pseries/iommu.c
index 0c55b991f665..dab5c56ffd0e 100644
--- a/arch/powerpc/platforms/pseries/iommu.c
+++ b/arch/powerpc/platforms/pseries/iommu.c
@@ -53,28 +53,31 @@ enum {
 	DDW_EXT_QUERY_OUT_SIZE = 2
 };
 
-static struct iommu_table_group *iommu_pseries_alloc_group(int node)
+static struct iommu_table *iommu_pseries_alloc_table(int node)
 {
-	struct iommu_table_group *table_group;
 	struct iommu_table *tbl;
 
-	table_group = kzalloc_node(sizeof(struct iommu_table_group), GFP_KERNEL,
-			   node);
-	if (!table_group)
-		return NULL;
-
 	tbl = kzalloc_node(sizeof(struct iommu_table), GFP_KERNEL, node);
 	if (!tbl)
-		goto free_group;
+		return NULL;
 
 	INIT_LIST_HEAD_RCU(&tbl->it_group_list);
 	kref_init(&tbl->it_kref);
+	return tbl;
+}
+
+static struct iommu_table_group *iommu_pseries_alloc_group(int node)
+{
+	struct iommu_table_group *table_group;
 
-	table_group->tables[0] = tbl;
+	table_group = kzalloc_node(sizeof(*table_group), GFP_KERNEL, node);
+	if (!table_group)
+		return NULL;
 
-	return table_group;
+	table_group->tables[0] = iommu_pseries_alloc_table(node);
+	if (table_group->tables[0])
+		return table_group;
 
-free_group:
 	kfree(table_group);
 	return NULL;
 }
@@ -107,6 +110,8 @@ static int tce_build_pSeries(struct iommu_table *tbl, long index,
 	u64 proto_tce;
 	__be64 *tcep;
 	u64 rpn;
+	const unsigned long tceshift = tbl->it_page_shift;
+	const unsigned long pagesize = IOMMU_PAGE_SIZE(tbl);
 
 	proto_tce = TCE_PCI_READ; // Read allowed
 
@@ -117,10 +122,10 @@ static int tce_build_pSeries(struct iommu_table *tbl, long index,
 
 	while (npages--) {
 		/* can't move this out since we might cross MEMBLOCK boundary */
-		rpn = __pa(uaddr) >> TCE_SHIFT;
-		*tcep = cpu_to_be64(proto_tce | (rpn & TCE_RPN_MASK) << TCE_RPN_SHIFT);
+		rpn = __pa(uaddr) >> tceshift;
+		*tcep = cpu_to_be64(proto_tce | rpn << tceshift);
 
-		uaddr += TCE_PAGE_SIZE;
+		uaddr += pagesize;
 		tcep++;
 	}
 	return 0;
@@ -146,7 +151,7 @@ static unsigned long tce_get_pseries(struct iommu_table *tbl, long index)
 	return be64_to_cpu(*tcep);
 }
 
-static void tce_free_pSeriesLP(unsigned long liobn, long, long);
+static void tce_free_pSeriesLP(unsigned long liobn, long, long, long);
 static void tce_freemulti_pSeriesLP(struct iommu_table*, long, long);
 
 static int tce_build_pSeriesLP(unsigned long liobn, long tcenum, long tceshift,
@@ -166,12 +171,12 @@ static int tce_build_pSeriesLP(unsigned long liobn, long tcenum, long tceshift,
 		proto_tce |= TCE_PCI_WRITE;
 
 	while (npages--) {
-		tce = proto_tce | (rpn & TCE_RPN_MASK) << tceshift;
+		tce = proto_tce | rpn << tceshift;
 		rc = plpar_tce_put((u64)liobn, (u64)tcenum << tceshift, tce);
 
 		if (unlikely(rc == H_NOT_ENOUGH_RESOURCES)) {
 			ret = (int)rc;
-			tce_free_pSeriesLP(liobn, tcenum_start,
+			tce_free_pSeriesLP(liobn, tcenum_start, tceshift,
 			                   (npages_start - (npages + 1)));
 			break;
 		}
@@ -205,10 +210,11 @@ static int tce_buildmulti_pSeriesLP(struct iommu_table *tbl, long tcenum,
 	long tcenum_start = tcenum, npages_start = npages;
 	int ret = 0;
 	unsigned long flags;
+	const unsigned long tceshift = tbl->it_page_shift;
 
 	if ((npages == 1) || !firmware_has_feature(FW_FEATURE_PUT_TCE_IND)) {
 		return tce_build_pSeriesLP(tbl->it_index, tcenum,
-					   tbl->it_page_shift, npages, uaddr,
+					   tceshift, npages, uaddr,
 		                           direction, attrs);
 	}
 
@@ -225,13 +231,13 @@ static int tce_buildmulti_pSeriesLP(struct iommu_table *tbl, long tcenum,
 		if (!tcep) {
 			local_irq_restore(flags);
 			return tce_build_pSeriesLP(tbl->it_index, tcenum,
-					tbl->it_page_shift,
+					tceshift,
 					npages, uaddr, direction, attrs);
 		}
 		__this_cpu_write(tce_page, tcep);
 	}
 
-	rpn = __pa(uaddr) >> TCE_SHIFT;
+	rpn = __pa(uaddr) >> tceshift;
 	proto_tce = TCE_PCI_READ;
 	if (direction != DMA_TO_DEVICE)
 		proto_tce |= TCE_PCI_WRITE;
@@ -245,12 +251,12 @@ static int tce_buildmulti_pSeriesLP(struct iommu_table *tbl, long tcenum,
 		limit = min_t(long, npages, 4096/TCE_ENTRY_SIZE);
 
 		for (l = 0; l < limit; l++) {
-			tcep[l] = cpu_to_be64(proto_tce | (rpn & TCE_RPN_MASK) << TCE_RPN_SHIFT);
+			tcep[l] = cpu_to_be64(proto_tce | rpn << tceshift);
 			rpn++;
 		}
 
 		rc = plpar_tce_put_indirect((u64)tbl->it_index,
-					    (u64)tcenum << 12,
+					    (u64)tcenum << tceshift,
 					    (u64)__pa(tcep),
 					    limit);
 
@@ -277,12 +283,13 @@ static int tce_buildmulti_pSeriesLP(struct iommu_table *tbl, long tcenum,
 	return ret;
 }
 
-static void tce_free_pSeriesLP(unsigned long liobn, long tcenum, long npages)
+static void tce_free_pSeriesLP(unsigned long liobn, long tcenum, long tceshift,
+			       long npages)
 {
 	u64 rc;
 
 	while (npages--) {
-		rc = plpar_tce_put((u64)liobn, (u64)tcenum << 12, 0);
+		rc = plpar_tce_put((u64)liobn, (u64)tcenum << tceshift, 0);
 
 		if (rc && printk_ratelimit()) {
 			printk("tce_free_pSeriesLP: plpar_tce_put failed. rc=%lld\n", rc);
@@ -301,9 +308,11 @@ static void tce_freemulti_pSeriesLP(struct iommu_table *tbl, long tcenum, long n
 	u64 rc;
 
 	if (!firmware_has_feature(FW_FEATURE_STUFF_TCE))
-		return tce_free_pSeriesLP(tbl->it_index, tcenum, npages);
+		return tce_free_pSeriesLP(tbl->it_index, tcenum,
+					  tbl->it_page_shift, npages);
 
-	rc = plpar_tce_stuff((u64)tbl->it_index, (u64)tcenum << 12, 0, npages);
+	rc = plpar_tce_stuff((u64)tbl->it_index,
+			     (u64)tcenum << tbl->it_page_shift, 0, npages);
 
 	if (rc && printk_ratelimit()) {
 		printk("tce_freemulti_pSeriesLP: plpar_tce_stuff failed\n");
@@ -319,7 +328,8 @@ static unsigned long tce_get_pSeriesLP(struct iommu_table *tbl, long tcenum)
 	u64 rc;
 	unsigned long tce_ret;
 
-	rc = plpar_tce_get((u64)tbl->it_index, (u64)tcenum << 12, &tce_ret);
+	rc = plpar_tce_get((u64)tbl->it_index,
+			   (u64)tcenum << tbl->it_page_shift, &tce_ret);
 
 	if (rc && printk_ratelimit()) {
 		printk("tce_get_pSeriesLP: plpar_tce_get failed. rc=%lld\n", rc);
@@ -339,7 +349,7 @@ struct dynamic_dma_window_prop {
 	__be32	window_shift;	/* ilog2(tce_window_size) */
 };
 
-struct direct_window {
+struct dma_win {
 	struct device_node *device;
 	const struct dynamic_dma_window_prop *prop;
 	struct list_head list;
@@ -359,12 +369,13 @@ struct ddw_create_response {
 	u32 addr_lo;
 };
 
-static LIST_HEAD(direct_window_list);
+static LIST_HEAD(dma_win_list);
 /* prevents races between memory on/offline and window creation */
-static DEFINE_SPINLOCK(direct_window_list_lock);
+static DEFINE_SPINLOCK(dma_win_list_lock);
 /* protects initializing window twice for same device */
-static DEFINE_MUTEX(direct_window_init_mutex);
+static DEFINE_MUTEX(dma_win_init_mutex);
 #define DIRECT64_PROPNAME "linux,direct64-ddr-window-info"
+#define DMA64_PROPNAME "linux,dma64-ddr-window-info"
 
 static int tce_clearrange_multi_pSeriesLP(unsigned long start_pfn,
 					unsigned long num_pfn, const void *arg)
@@ -491,6 +502,24 @@ static int tce_setrange_multi_pSeriesLP_walk(unsigned long start_pfn,
 	return tce_setrange_multi_pSeriesLP(start_pfn, num_pfn, arg);
 }
 
+static void iommu_table_setparms_common(struct iommu_table *tbl, unsigned long busno,
+					unsigned long liobn, unsigned long win_addr,
+					unsigned long window_size, unsigned long page_shift,
+					void *base, struct iommu_table_ops *table_ops)
+{
+	tbl->it_busno = busno;
+	tbl->it_index = liobn;
+	tbl->it_offset = win_addr >> page_shift;
+	tbl->it_size = window_size >> page_shift;
+	tbl->it_page_shift = page_shift;
+	tbl->it_base = (unsigned long)base;
+	tbl->it_blocksize = 16;
+	tbl->it_type = TCE_PCI;
+	tbl->it_ops = table_ops;
+}
+
+struct iommu_table_ops iommu_table_pseries_ops;
+
 static void iommu_table_setparms(struct pci_controller *phb,
 				 struct device_node *dn,
 				 struct iommu_table *tbl)
@@ -499,8 +528,13 @@ static void iommu_table_setparms(struct pci_controller *phb,
 	const unsigned long *basep;
 	const u32 *sizep;
 
-	node = phb->dn;
+	/* Test if we are going over 2GB of DMA space */
+	if (phb->dma_window_base_cur + phb->dma_window_size > SZ_2G) {
+		udbg_printf("PCI_DMA: Unexpected number of IOAs under this PHB.\n");
+		panic("PCI_DMA: Unexpected number of IOAs under this PHB.\n");
+	}
 
+	node = phb->dn;
 	basep = of_get_property(node, "linux,tce-base", NULL);
 	sizep = of_get_property(node, "linux,tce-size", NULL);
 	if (basep == NULL || sizep == NULL) {
@@ -509,33 +543,18 @@ static void iommu_table_setparms(struct pci_controller *phb,
 		return;
 	}
 
-	tbl->it_base = (unsigned long)__va(*basep);
+	iommu_table_setparms_common(tbl, phb->bus->number, 0, phb->dma_window_base_cur,
+				    phb->dma_window_size, IOMMU_PAGE_SHIFT_4K,
+				    __va(*basep), &iommu_table_pseries_ops);
 
 	if (!is_kdump_kernel())
 		memset((void *)tbl->it_base, 0, *sizep);
 
-	tbl->it_busno = phb->bus->number;
-	tbl->it_page_shift = IOMMU_PAGE_SHIFT_4K;
-
-	/* Units of tce entries */
-	tbl->it_offset = phb->dma_window_base_cur >> tbl->it_page_shift;
-
-	/* Test if we are going over 2GB of DMA space */
-	if (phb->dma_window_base_cur + phb->dma_window_size > 0x80000000ul) {
-		udbg_printf("PCI_DMA: Unexpected number of IOAs under this PHB.\n");
-		panic("PCI_DMA: Unexpected number of IOAs under this PHB.\n");
-	}
-
 	phb->dma_window_base_cur += phb->dma_window_size;
-
-	/* Set the tce table size - measured in entries */
-	tbl->it_size = phb->dma_window_size >> tbl->it_page_shift;
-
-	tbl->it_index = 0;
-	tbl->it_blocksize = 16;
-	tbl->it_type = TCE_PCI;
 }
 
+struct iommu_table_ops iommu_table_lpar_multi_ops;
+
 /*
  * iommu_table_setparms_lpar
  *
@@ -547,17 +566,13 @@ static void iommu_table_setparms_lpar(struct pci_controller *phb,
 				      struct iommu_table_group *table_group,
 				      const __be32 *dma_window)
 {
-	unsigned long offset, size;
+	unsigned long offset, size, liobn;
 
-	of_parse_dma_window(dn, dma_window, &tbl->it_index, &offset, &size);
+	of_parse_dma_window(dn, dma_window, &liobn, &offset, &size);
+
+	iommu_table_setparms_common(tbl, phb->bus->number, liobn, offset, size, IOMMU_PAGE_SHIFT_4K, NULL,
+				    &iommu_table_lpar_multi_ops);
 
-	tbl->it_busno = phb->bus->number;
-	tbl->it_page_shift = IOMMU_PAGE_SHIFT_4K;
-	tbl->it_base   = 0;
-	tbl->it_blocksize  = 16;
-	tbl->it_type = TCE_PCI;
-	tbl->it_offset = offset >> tbl->it_page_shift;
-	tbl->it_size = size >> tbl->it_page_shift;
 
 	table_group->tce32_start = offset;
 	table_group->tce32_size = size;
@@ -637,7 +652,7 @@ static void pci_dma_bus_setup_pSeries(struct pci_bus *bus)
 	tbl = pci->table_group->tables[0];
 
 	iommu_table_setparms(pci->phb, dn, tbl);
-	tbl->it_ops = &iommu_table_pseries_ops;
+
 	if (!iommu_init_table(tbl, pci->phb->node, 0, 0))
 		panic("Failed to initialize iommu table");
 
@@ -698,7 +713,10 @@ static void pci_dma_bus_setup_pSeriesLP(struct pci_bus *bus)
 	pr_debug("pci_dma_bus_setup_pSeriesLP: setting up bus %pOF\n",
 		 dn);
 
-	/* Find nearest ibm,dma-window, walking up the device tree */
+	/*
+	 * Find nearest ibm,dma-window (default DMA window), walking up the
+	 * device tree
+	 */
 	for (pdn = dn; pdn != NULL; pdn = pdn->parent) {
 		dma_window = of_get_property(pdn, "ibm,dma-window", NULL);
 		if (dma_window != NULL)
@@ -720,7 +738,7 @@ static void pci_dma_bus_setup_pSeriesLP(struct pci_bus *bus)
 		tbl = ppci->table_group->tables[0];
 		iommu_table_setparms_lpar(ppci->phb, pdn, tbl,
 				ppci->table_group, dma_window);
-		tbl->it_ops = &iommu_table_lpar_multi_ops;
+
 		if (!iommu_init_table(tbl, ppci->phb->node, 0, 0))
 			panic("Failed to initialize iommu table");
 		iommu_register_group(ppci->table_group,
@@ -750,7 +768,7 @@ static void pci_dma_dev_setup_pSeries(struct pci_dev *dev)
 		PCI_DN(dn)->table_group = iommu_pseries_alloc_group(phb->node);
 		tbl = PCI_DN(dn)->table_group->tables[0];
 		iommu_table_setparms(phb, dn, tbl);
-		tbl->it_ops = &iommu_table_pseries_ops;
+
 		if (!iommu_init_table(tbl, phb->node, 0, 0))
 			panic("Failed to initialize iommu table");
 
@@ -785,17 +803,10 @@ static int __init disable_ddw_setup(char *str)
 
 early_param("disable_ddw", disable_ddw_setup);
 
-static void remove_dma_window(struct device_node *np, u32 *ddw_avail,
-			      struct property *win)
+static void clean_dma_window(struct device_node *np, struct dynamic_dma_window_prop *dwp)
 {
-	struct dynamic_dma_window_prop *dwp;
-	u64 liobn;
 	int ret;
 
-	dwp = win->value;
-	liobn = (u64)be32_to_cpu(dwp->liobn);
-
-	/* clear the whole window, note the arg is in kernel pages */
 	ret = tce_clearrange_multi_pSeriesLP(0,
 		1ULL << (be32_to_cpu(dwp->window_shift) - PAGE_SHIFT), dwp);
 	if (ret)
@@ -804,94 +815,136 @@ static void remove_dma_window(struct device_node *np, u32 *ddw_avail,
 	else
 		pr_debug("%pOF successfully cleared tces in window.\n",
 			 np);
+}
+
+/*
+ * Call only if DMA window is clean.
+ */
+static void __remove_dma_window(struct device_node *np, u32 *ddw_avail, u64 liobn)
+{
+	int ret;
 
 	ret = rtas_call(ddw_avail[DDW_REMOVE_PE_DMA_WIN], 1, 1, NULL, liobn);
 	if (ret)
-		pr_warn("%pOF: failed to remove direct window: rtas returned "
+		pr_warn("%pOF: failed to remove DMA window: rtas returned "
 			"%d to ibm,remove-pe-dma-window(%x) %llx\n",
 			np, ret, ddw_avail[DDW_REMOVE_PE_DMA_WIN], liobn);
 	else
-		pr_debug("%pOF: successfully removed direct window: rtas returned "
+		pr_debug("%pOF: successfully removed DMA window: rtas returned "
 			"%d to ibm,remove-pe-dma-window(%x) %llx\n",
 			np, ret, ddw_avail[DDW_REMOVE_PE_DMA_WIN], liobn);
 }
 
-static void remove_ddw(struct device_node *np, bool remove_prop)
+static void remove_dma_window(struct device_node *np, u32 *ddw_avail,
+			      struct property *win)
+{
+	struct dynamic_dma_window_prop *dwp;
+	u64 liobn;
+
+	dwp = win->value;
+	liobn = (u64)be32_to_cpu(dwp->liobn);
+
+	clean_dma_window(np, dwp);
+	__remove_dma_window(np, ddw_avail, liobn);
+}
+
+static int remove_ddw(struct device_node *np, bool remove_prop, const char *win_name)
 {
 	struct property *win;
 	u32 ddw_avail[DDW_APPLICABLE_SIZE];
 	int ret = 0;
 
+	win = of_find_property(np, win_name, NULL);
+	if (!win)
+		return -EINVAL;
+
 	ret = of_property_read_u32_array(np, "ibm,ddw-applicable",
 					 &ddw_avail[0], DDW_APPLICABLE_SIZE);
 	if (ret)
-		return;
+		return 0;
 
-	win = of_find_property(np, DIRECT64_PROPNAME, NULL);
-	if (!win)
-		return;
 
 	if (win->length >= sizeof(struct dynamic_dma_window_prop))
 		remove_dma_window(np, ddw_avail, win);
 
 	if (!remove_prop)
-		return;
+		return 0;
 
 	ret = of_remove_property(np, win);
 	if (ret)
-		pr_warn("%pOF: failed to remove direct window property: %d\n",
+		pr_warn("%pOF: failed to remove DMA window property: %d\n",
 			np, ret);
+	return 0;
 }
 
-static u64 find_existing_ddw(struct device_node *pdn, int *window_shift)
+static bool find_existing_ddw(struct device_node *pdn, u64 *dma_addr, int *window_shift)
 {
-	struct direct_window *window;
-	const struct dynamic_dma_window_prop *direct64;
-	u64 dma_addr = 0;
+	struct dma_win *window;
+	const struct dynamic_dma_window_prop *dma64;
+	bool found = false;
 
-	spin_lock(&direct_window_list_lock);
+	spin_lock(&dma_win_list_lock);
 	/* check if we already created a window and dupe that config if so */
-	list_for_each_entry(window, &direct_window_list, list) {
+	list_for_each_entry(window, &dma_win_list, list) {
 		if (window->device == pdn) {
-			direct64 = window->prop;
-			dma_addr = be64_to_cpu(direct64->dma_base);
-			*window_shift = be32_to_cpu(direct64->window_shift);
+			dma64 = window->prop;
+			*dma_addr = be64_to_cpu(dma64->dma_base);
+			*window_shift = be32_to_cpu(dma64->window_shift);
+			found = true;
 			break;
 		}
 	}
-	spin_unlock(&direct_window_list_lock);
+	spin_unlock(&dma_win_list_lock);
 
-	return dma_addr;
+	return found;
 }
 
-static int find_existing_ddw_windows(void)
+static struct dma_win *ddw_list_new_entry(struct device_node *pdn,
+					  const struct dynamic_dma_window_prop *dma64)
 {
-	int len;
-	struct device_node *pdn;
-	struct direct_window *window;
-	const struct dynamic_dma_window_prop *direct64;
+	struct dma_win *window;
 
-	if (!firmware_has_feature(FW_FEATURE_LPAR))
-		return 0;
+	window = kzalloc(sizeof(*window), GFP_KERNEL);
+	if (!window)
+		return NULL;
 
-	for_each_node_with_property(pdn, DIRECT64_PROPNAME) {
-		direct64 = of_get_property(pdn, DIRECT64_PROPNAME, &len);
-		if (!direct64)
-			continue;
+	window->device = pdn;
+	window->prop = dma64;
 
-		window = kzalloc(sizeof(*window), GFP_KERNEL);
-		if (!window || len < sizeof(struct dynamic_dma_window_prop)) {
-			kfree(window);
-			remove_ddw(pdn, true);
+	return window;
+}
+
+static void find_existing_ddw_windows_named(const char *name)
+{
+	int len;
+	struct device_node *pdn;
+	struct dma_win *window;
+	const struct dynamic_dma_window_prop *dma64;
+
+	for_each_node_with_property(pdn, name) {
+		dma64 = of_get_property(pdn, name, &len);
+		if (!dma64 || len < sizeof(*dma64)) {
+			remove_ddw(pdn, true, name);
 			continue;
 		}
 
-		window->device = pdn;
-		window->prop = direct64;
-		spin_lock(&direct_window_list_lock);
-		list_add(&window->list, &direct_window_list);
-		spin_unlock(&direct_window_list_lock);
+		window = ddw_list_new_entry(pdn, dma64);
+		if (!window)
+			break;
+
+		spin_lock(&dma_win_list_lock);
+		list_add(&window->list, &dma_win_list);
+		spin_unlock(&dma_win_list_lock);
 	}
+}
+
+static int find_existing_ddw_windows(void)
+{
+	if (!firmware_has_feature(FW_FEATURE_LPAR))
+		return 0;
+
+	find_existing_ddw_windows_named(DIRECT64_PROPNAME);
+	find_existing_ddw_windows_named(DMA64_PROPNAME);
 
 	return 0;
 }
@@ -1130,6 +1183,35 @@ static int iommu_get_page_shift(u32 query_page_size)
 	return 0;
 }
 
+static struct property *ddw_property_create(const char *propname, u32 liobn, u64 dma_addr,
+					    u32 page_shift, u32 window_shift)
+{
+	struct dynamic_dma_window_prop *ddwprop;
+	struct property *win64;
+
+	win64 = kzalloc(sizeof(*win64), GFP_KERNEL);
+	if (!win64)
+		return NULL;
+
+	win64->name = kstrdup(propname, GFP_KERNEL);
+	ddwprop = kzalloc(sizeof(*ddwprop), GFP_KERNEL);
+	win64->value = ddwprop;
+	win64->length = sizeof(*ddwprop);
+	if (!win64->name || !win64->value) {
+		kfree(win64->name);
+		kfree(win64->value);
+		kfree(win64);
+		return NULL;
+	}
+
+	ddwprop->liobn = cpu_to_be32(liobn);
+	ddwprop->dma_base = cpu_to_be64(dma_addr);
+	ddwprop->tce_shift = cpu_to_be32(page_shift);
+	ddwprop->window_shift = cpu_to_be32(window_shift);
+
+	return win64;
+}
+
 /*
  * If the PE supports dynamic dma windows, and there is space for a table
  * that can map all pages in a linear offset, then setup such a table,
@@ -1139,34 +1221,39 @@ static int iommu_get_page_shift(u32 query_page_size)
  * pdn: the parent pe node with the ibm,dma_window property
  * Future: also check if we can remap the base window for our base page size
  *
- * returns the dma offset for use by the direct mapped DMA code.
+ * returns true if can map all pages (direct mapping), false otherwise..
  */
-static u64 enable_ddw(struct pci_dev *dev, struct device_node *pdn)
+static bool enable_ddw(struct pci_dev *dev, struct device_node *pdn)
 {
 	int len = 0, ret;
 	int max_ram_len = order_base_2(ddw_memory_hotplug_max());
 	struct ddw_query_response query;
 	struct ddw_create_response create;
 	int page_shift;
-	u64 dma_addr;
+	u64 win_addr;
+	const char *win_name;
 	struct device_node *dn;
 	u32 ddw_avail[DDW_APPLICABLE_SIZE];
-	struct direct_window *window;
+	struct dma_win *window;
 	struct property *win64;
-	struct dynamic_dma_window_prop *ddwprop;
+	bool ddw_enabled = false;
 	struct failed_ddw_pdn *fpdn;
-	bool default_win_removed = false;
+	bool default_win_removed = false, direct_mapping = false;
 	bool pmem_present;
+	struct pci_dn *pci = PCI_DN(pdn);
+	struct iommu_table *tbl = pci->table_group->tables[0];
 
 	dn = of_find_node_by_type(NULL, "ibm,pmemory");
 	pmem_present = dn != NULL;
 	of_node_put(dn);
 
-	mutex_lock(&direct_window_init_mutex);
+	mutex_lock(&dma_win_init_mutex);
 
-	dma_addr = find_existing_ddw(pdn, &len);
-	if (dma_addr != 0)
+	if (find_existing_ddw(pdn, &dev->dev.archdata.dma_offset, &len)) {
+		direct_mapping = (len >= max_ram_len);
+		ddw_enabled = true;
 		goto out_unlock;
+	}
 
 	/*
 	 * If we already went through this for a previous function of
@@ -1240,12 +1327,12 @@ static u64 enable_ddw(struct pci_dev *dev, struct device_node *pdn)
 
 	page_shift = iommu_get_page_shift(query.page_size);
 	if (!page_shift) {
-		dev_dbg(&dev->dev, "no supported direct page size in mask %x",
-			  query.page_size);
+		dev_dbg(&dev->dev, "no supported page size in mask %x",
+			query.page_size);
 		goto out_failed;
 	}
-	/* verify the window * number of ptes will map the partition */
-	/* check largest block * page size > max memory hotplug addr */
+
+
 	/*
 	 * The "ibm,pmemory" can appear anywhere in the address space.
 	 * Assuming it is still backed by page structs, try MAX_PHYSMEM_BITS
@@ -1261,81 +1348,127 @@ static u64 enable_ddw(struct pci_dev *dev, struct device_node *pdn)
 			dev_info(&dev->dev, "Skipping ibm,pmemory");
 	}
 
+	/* check if the available block * number of ptes will map everything */
 	if (query.largest_available_block < (1ULL << (len - page_shift))) {
 		dev_dbg(&dev->dev,
 			"can't map partition max 0x%llx with %llu %llu-sized pages\n",
 			1ULL << len,
 			query.largest_available_block,
 			1ULL << page_shift);
-		goto out_failed;
-	}
-	win64 = kzalloc(sizeof(struct property), GFP_KERNEL);
-	if (!win64) {
-		dev_info(&dev->dev,
-			"couldn't allocate property for 64bit dma window\n");
-		goto out_failed;
-	}
-	win64->name = kstrdup(DIRECT64_PROPNAME, GFP_KERNEL);
-	win64->value = ddwprop = kmalloc(sizeof(*ddwprop), GFP_KERNEL);
-	win64->length = sizeof(*ddwprop);
-	if (!win64->name || !win64->value) {
-		dev_info(&dev->dev,
-			"couldn't allocate property name and value\n");
-		goto out_free_prop;
+
+		/* DDW + IOMMU on single window may fail if there is any allocation */
+		if (default_win_removed && iommu_table_in_use(tbl)) {
+			dev_dbg(&dev->dev, "current IOMMU table in use, can't be replaced.\n");
+			goto out_failed;
+		}
+
+		len = order_base_2(query.largest_available_block << page_shift);
+		win_name = DMA64_PROPNAME;
+	} else {
+		direct_mapping = true;
+		win_name = DIRECT64_PROPNAME;
 	}
 
 	ret = create_ddw(dev, ddw_avail, &create, page_shift, len);
 	if (ret != 0)
-		goto out_free_prop;
-
-	ddwprop->liobn = cpu_to_be32(create.liobn);
-	ddwprop->dma_base = cpu_to_be64(((u64)create.addr_hi << 32) |
-			create.addr_lo);
-	ddwprop->tce_shift = cpu_to_be32(page_shift);
-	ddwprop->window_shift = cpu_to_be32(len);
+		goto out_failed;
 
 	dev_dbg(&dev->dev, "created tce table LIOBN 0x%x for %pOF\n",
 		  create.liobn, dn);
 
-	window = kzalloc(sizeof(*window), GFP_KERNEL);
-	if (!window)
-		goto out_clear_window;
+	win_addr = ((u64)create.addr_hi << 32) | create.addr_lo;
+	win64 = ddw_property_create(win_name, create.liobn, win_addr, page_shift, len);
 
-	ret = walk_system_ram_range(0, memblock_end_of_DRAM() >> PAGE_SHIFT,
-			win64->value, tce_setrange_multi_pSeriesLP_walk);
-	if (ret) {
-		dev_info(&dev->dev, "failed to map direct window for %pOF: %d\n",
-			 dn, ret);
-		goto out_free_window;
+	if (!win64) {
+		dev_info(&dev->dev,
+			 "couldn't allocate property, property name, or value\n");
+		goto out_remove_win;
 	}
 
 	ret = of_add_property(pdn, win64);
 	if (ret) {
-		dev_err(&dev->dev, "unable to add dma window property for %pOF: %d",
-			 pdn, ret);
-		goto out_free_window;
+		dev_err(&dev->dev, "unable to add DMA window property for %pOF: %d",
+			pdn, ret);
+		goto out_free_prop;
 	}
 
-	window->device = pdn;
-	window->prop = ddwprop;
-	spin_lock(&direct_window_list_lock);
-	list_add(&window->list, &direct_window_list);
-	spin_unlock(&direct_window_list_lock);
+	window = ddw_list_new_entry(pdn, win64->value);
+	if (!window)
+		goto out_del_prop;
+
+	if (direct_mapping) {
+		/* DDW maps the whole partition, so enable direct DMA mapping */
+		ret = walk_system_ram_range(0, memblock_end_of_DRAM() >> PAGE_SHIFT,
+					    win64->value, tce_setrange_multi_pSeriesLP_walk);
+		if (ret) {
+			dev_info(&dev->dev, "failed to map DMA window for %pOF: %d\n",
+				 dn, ret);
+
+		/* Make sure to clean DDW if any TCE was set*/
+		clean_dma_window(pdn, win64->value);
+			goto out_del_list;
+		}
+	} else {
+		struct iommu_table *newtbl;
+		int i;
+
+		for (i = 0; i < ARRAY_SIZE(pci->phb->mem_resources); i++) {
+			const unsigned long mask = IORESOURCE_MEM_64 | IORESOURCE_MEM;
+
+			/* Look for MMIO32 */
+			if ((pci->phb->mem_resources[i].flags & mask) == IORESOURCE_MEM)
+				break;
+		}
+
+		if (i == ARRAY_SIZE(pci->phb->mem_resources))
+			goto out_del_list;
+
+		/* New table for using DDW instead of the default DMA window */
+		newtbl = iommu_pseries_alloc_table(pci->phb->node);
+		if (!newtbl) {
+			dev_dbg(&dev->dev, "couldn't create new IOMMU table\n");
+			goto out_del_list;
+		}
+
+		iommu_table_setparms_common(newtbl, pci->phb->bus->number, create.liobn, win_addr,
+					    1UL << len, page_shift, NULL, &iommu_table_lpar_multi_ops);
+		iommu_init_table(newtbl, pci->phb->node, pci->phb->mem_resources[i].start,
+				 pci->phb->mem_resources[i].end);
 
-	dma_addr = be64_to_cpu(ddwprop->dma_base);
+		pci->table_group->tables[1] = newtbl;
+
+		/* Keep default DMA window stuct if removed */
+		if (default_win_removed) {
+			tbl->it_size = 0;
+			kfree(tbl->it_map);
+		}
+
+		set_iommu_table_base(&dev->dev, newtbl);
+	}
+
+	spin_lock(&dma_win_list_lock);
+	list_add(&window->list, &dma_win_list);
+	spin_unlock(&dma_win_list_lock);
+
+	dev->dev.archdata.dma_offset = win_addr;
+	ddw_enabled = true;
 	goto out_unlock;
 
-out_free_window:
+out_del_list:
 	kfree(window);
 
-out_clear_window:
-	remove_ddw(pdn, true);
+out_del_prop:
+	of_remove_property(pdn, win64);
 
 out_free_prop:
 	kfree(win64->name);
 	kfree(win64->value);
 	kfree(win64);
 
+out_remove_win:
+	/* DDW is clean, so it's ok to call this directly. */
+	__remove_dma_window(pdn, ddw_avail, create.liobn);
+
 out_failed:
 	if (default_win_removed)
 		reset_dma_window(dev, pdn);
@@ -1347,17 +1480,17 @@ out_failed:
 	list_add(&fpdn->list, &failed_ddw_pdn_list);
 
 out_unlock:
-	mutex_unlock(&direct_window_init_mutex);
+	mutex_unlock(&dma_win_init_mutex);
 
 	/*
 	 * If we have persistent memory and the window size is only as big
 	 * as RAM, then we failed to create a window to cover persistent
 	 * memory and need to set the DMA limit.
 	 */
-	if (pmem_present && dma_addr && (len == max_ram_len))
-		dev->dev.bus_dma_limit = dma_addr + (1ULL << len);
+	if (pmem_present && ddw_enabled && direct_mapping && len == max_ram_len)
+		dev->dev.bus_dma_limit = dev->dev.archdata.dma_offset + (1ULL << len);
 
-	return dma_addr;
+    return ddw_enabled && direct_mapping;
 }
 
 static void pci_dma_dev_setup_pSeriesLP(struct pci_dev *dev)
@@ -1399,7 +1532,7 @@ static void pci_dma_dev_setup_pSeriesLP(struct pci_dev *dev)
 		tbl = pci->table_group->tables[0];
 		iommu_table_setparms_lpar(pci->phb, pdn, tbl,
 				pci->table_group, dma_window);
-		tbl->it_ops = &iommu_table_lpar_multi_ops;
+
 		iommu_init_table(tbl, pci->phb->node, 0, 0);
 		iommu_register_group(pci->table_group,
 				pci_domain_nr(pci->phb->bus), 0);
@@ -1436,11 +1569,8 @@ static bool iommu_bypass_supported_pSeriesLP(struct pci_dev *pdev, u64 dma_mask)
 			break;
 	}
 
-	if (pdn && PCI_DN(pdn)) {
-		pdev->dev.archdata.dma_offset = enable_ddw(pdev, pdn);
-		if (pdev->dev.archdata.dma_offset)
-			return true;
-	}
+	if (pdn && PCI_DN(pdn))
+		return enable_ddw(pdev, pdn);
 
 	return false;
 }
@@ -1448,29 +1578,29 @@ static bool iommu_bypass_supported_pSeriesLP(struct pci_dev *pdev, u64 dma_mask)
 static int iommu_mem_notifier(struct notifier_block *nb, unsigned long action,
 		void *data)
 {
-	struct direct_window *window;
+	struct dma_win *window;
 	struct memory_notify *arg = data;
 	int ret = 0;
 
 	switch (action) {
 	case MEM_GOING_ONLINE:
-		spin_lock(&direct_window_list_lock);
-		list_for_each_entry(window, &direct_window_list, list) {
+		spin_lock(&dma_win_list_lock);
+		list_for_each_entry(window, &dma_win_list, list) {
 			ret |= tce_setrange_multi_pSeriesLP(arg->start_pfn,
 					arg->nr_pages, window->prop);
 			/* XXX log error */
 		}
-		spin_unlock(&direct_window_list_lock);
+		spin_unlock(&dma_win_list_lock);
 		break;
 	case MEM_CANCEL_ONLINE:
 	case MEM_OFFLINE:
-		spin_lock(&direct_window_list_lock);
-		list_for_each_entry(window, &direct_window_list, list) {
+		spin_lock(&dma_win_list_lock);
+		list_for_each_entry(window, &dma_win_list, list) {
 			ret |= tce_clearrange_multi_pSeriesLP(arg->start_pfn,
 					arg->nr_pages, window->prop);
 			/* XXX log error */
 		}
-		spin_unlock(&direct_window_list_lock);
+		spin_unlock(&dma_win_list_lock);
 		break;
 	default:
 		break;
@@ -1491,7 +1621,7 @@ static int iommu_reconfig_notifier(struct notifier_block *nb, unsigned long acti
 	struct of_reconfig_data *rd = data;
 	struct device_node *np = rd->dn;
 	struct pci_dn *pci = PCI_DN(np);
-	struct direct_window *window;
+	struct dma_win *window;
 
 	switch (action) {
 	case OF_RECONFIG_DETACH_NODE:
@@ -1502,20 +1632,22 @@ static int iommu_reconfig_notifier(struct notifier_block *nb, unsigned long acti
 		 * we have to remove the property when releasing
 		 * the device node.
 		 */
-		remove_ddw(np, false);
+		if (remove_ddw(np, false, DIRECT64_PROPNAME))
+			remove_ddw(np, false, DMA64_PROPNAME);
+
 		if (pci && pci->table_group)
 			iommu_pseries_free_group(pci->table_group,
 					np->full_name);
 
-		spin_lock(&direct_window_list_lock);
-		list_for_each_entry(window, &direct_window_list, list) {
+		spin_lock(&dma_win_list_lock);
+		list_for_each_entry(window, &dma_win_list, list) {
 			if (window->device == np) {
 				list_del(&window->list);
 				kfree(window);
 				break;
 			}
 		}
-		spin_unlock(&direct_window_list_lock);
+		spin_unlock(&dma_win_list_lock);
 		break;
 	default:
 		err = NOTIFY_DONE;
diff --git a/arch/powerpc/platforms/pseries/lpar.c b/arch/powerpc/platforms/pseries/lpar.c
index dab356e3ff87..3df6bdfea475 100644
--- a/arch/powerpc/platforms/pseries/lpar.c
+++ b/arch/powerpc/platforms/pseries/lpar.c
@@ -22,6 +22,8 @@
 #include <linux/workqueue.h>
 #include <linux/proc_fs.h>
 #include <linux/pgtable.h>
+#include <linux/debugfs.h>
+
 #include <asm/processor.h>
 #include <asm/mmu.h>
 #include <asm/page.h>
@@ -39,7 +41,6 @@
 #include <asm/kexec.h>
 #include <asm/fadump.h>
 #include <asm/asm-prototypes.h>
-#include <asm/debugfs.h>
 #include <asm/dtl.h>
 
 #include "pseries.h"
@@ -261,7 +262,7 @@ static int cpu_relative_dispatch_distance(int last_disp_cpu, int cur_disp_cpu)
 	if (!last_disp_cpu_assoc || !cur_disp_cpu_assoc)
 		return -EIO;
 
-	return cpu_distance(last_disp_cpu_assoc, cur_disp_cpu_assoc);
+	return cpu_relative_distance(last_disp_cpu_assoc, cur_disp_cpu_assoc);
 }
 
 static int cpu_home_node_dispatch_distance(int disp_cpu)
@@ -281,7 +282,7 @@ static int cpu_home_node_dispatch_distance(int disp_cpu)
 	if (!disp_cpu_assoc || !vcpu_assoc)
 		return -EIO;
 
-	return cpu_distance(disp_cpu_assoc, vcpu_assoc);
+	return cpu_relative_distance(disp_cpu_assoc, vcpu_assoc);
 }
 
 static void update_vcpu_disp_stat(int disp_cpu)
@@ -801,7 +802,8 @@ static long pSeries_lpar_hpte_remove(unsigned long hpte_group)
 	return -1;
 }
 
-static void manual_hpte_clear_all(void)
+/* Called during kexec sequence with MMU off */
+static notrace void manual_hpte_clear_all(void)
 {
 	unsigned long size_bytes = 1UL << ppc64_pft_size;
 	unsigned long hpte_count = size_bytes >> 4;
@@ -834,7 +836,8 @@ static void manual_hpte_clear_all(void)
 	}
 }
 
-static int hcall_hpte_clear_all(void)
+/* Called during kexec sequence with MMU off */
+static notrace int hcall_hpte_clear_all(void)
 {
 	int rc;
 
@@ -845,7 +848,8 @@ static int hcall_hpte_clear_all(void)
 	return rc;
 }
 
-static void pseries_hpte_clear_all(void)
+/* Called during kexec sequence with MMU off */
+static notrace void pseries_hpte_clear_all(void)
 {
 	int rc;
 
@@ -2016,7 +2020,7 @@ static int __init vpa_debugfs_init(void)
 	if (!firmware_has_feature(FW_FEATURE_SPLPAR))
 		return 0;
 
-	vpa_dir = debugfs_create_dir("vpa", powerpc_debugfs_root);
+	vpa_dir = debugfs_create_dir("vpa", arch_debugfs_dir);
 
 	/* set up the per-cpu vpa file*/
 	for_each_possible_cpu(i) {
diff --git a/arch/powerpc/platforms/pseries/msi.c b/arch/powerpc/platforms/pseries/msi.c
index 637300330507..1b305e411862 100644
--- a/arch/powerpc/platforms/pseries/msi.c
+++ b/arch/powerpc/platforms/pseries/msi.c
@@ -13,6 +13,7 @@
 #include <asm/hw_irq.h>
 #include <asm/ppc-pci.h>
 #include <asm/machdep.h>
+#include <asm/xive.h>
 
 #include "pseries.h"
 
@@ -110,21 +111,6 @@ static int rtas_query_irq_number(struct pci_dn *pdn, int offset)
 	return rtas_ret[0];
 }
 
-static void rtas_teardown_msi_irqs(struct pci_dev *pdev)
-{
-	struct msi_desc *entry;
-
-	for_each_pci_msi_entry(entry, pdev) {
-		if (!entry->irq)
-			continue;
-
-		irq_set_msi_desc(entry->irq, NULL);
-		irq_dispose_mapping(entry->irq);
-	}
-
-	rtas_disable_msi(pdev);
-}
-
 static int check_req(struct pci_dev *pdev, int nvec, char *prop_name)
 {
 	struct device_node *dn;
@@ -164,12 +150,12 @@ static int check_req_msix(struct pci_dev *pdev, int nvec)
 
 /* Quota calculation */
 
-static struct device_node *find_pe_total_msi(struct pci_dev *dev, int *total)
+static struct device_node *__find_pe_total_msi(struct device_node *node, int *total)
 {
 	struct device_node *dn;
 	const __be32 *p;
 
-	dn = of_node_get(pci_device_to_OF_node(dev));
+	dn = of_node_get(node);
 	while (dn) {
 		p = of_get_property(dn, "ibm,pe-total-#msi", NULL);
 		if (p) {
@@ -185,6 +171,11 @@ static struct device_node *find_pe_total_msi(struct pci_dev *dev, int *total)
 	return NULL;
 }
 
+static struct device_node *find_pe_total_msi(struct pci_dev *dev, int *total)
+{
+	return __find_pe_total_msi(pci_device_to_OF_node(dev), total);
+}
+
 static struct device_node *find_pe_dn(struct pci_dev *dev, int *total)
 {
 	struct device_node *dn;
@@ -368,12 +359,11 @@ static void rtas_hack_32bit_msi_gen2(struct pci_dev *pdev)
 	pci_write_config_dword(pdev, pdev->msi_cap + PCI_MSI_ADDRESS_HI, 0);
 }
 
-static int rtas_setup_msi_irqs(struct pci_dev *pdev, int nvec_in, int type)
+static int rtas_prepare_msi_irqs(struct pci_dev *pdev, int nvec_in, int type,
+				 msi_alloc_info_t *arg)
 {
 	struct pci_dn *pdn;
-	int hwirq, virq, i, quota, rc;
-	struct msi_desc *entry;
-	struct msi_msg msg;
+	int quota, rc;
 	int nvec = nvec_in;
 	int use_32bit_msi_hack = 0;
 
@@ -451,53 +441,229 @@ again:
 		return rc;
 	}
 
-	i = 0;
-	for_each_pci_msi_entry(entry, pdev) {
-		hwirq = rtas_query_irq_number(pdn, i++);
-		if (hwirq < 0) {
-			pr_debug("rtas_msi: error (%d) getting hwirq\n", rc);
-			return hwirq;
-		}
+	return 0;
+}
 
-		/*
-		 * Depending on the number of online CPUs in the original
-		 * kernel, it is likely for CPU #0 to be offline in a kdump
-		 * kernel. The associated IRQs in the affinity mappings
-		 * provided by irq_create_affinity_masks() are thus not
-		 * started by irq_startup(), as per-design for managed IRQs.
-		 * This can be a problem with multi-queue block devices driven
-		 * by blk-mq : such a non-started IRQ is very likely paired
-		 * with the single queue enforced by blk-mq during kdump (see
-		 * blk_mq_alloc_tag_set()). This causes the device to remain
-		 * silent and likely hangs the guest at some point.
-		 *
-		 * We don't really care for fine-grained affinity when doing
-		 * kdump actually : simply ignore the pre-computed affinity
-		 * masks in this case and let the default mask with all CPUs
-		 * be used when creating the IRQ mappings.
-		 */
-		if (is_kdump_kernel())
-			virq = irq_create_mapping(NULL, hwirq);
-		else
-			virq = irq_create_mapping_affinity(NULL, hwirq,
-							   entry->affinity);
+static int pseries_msi_ops_prepare(struct irq_domain *domain, struct device *dev,
+				   int nvec, msi_alloc_info_t *arg)
+{
+	struct pci_dev *pdev = to_pci_dev(dev);
+	struct msi_desc *desc = first_pci_msi_entry(pdev);
+	int type = desc->msi_attrib.is_msix ? PCI_CAP_ID_MSIX : PCI_CAP_ID_MSI;
 
-		if (!virq) {
-			pr_debug("rtas_msi: Failed mapping hwirq %d\n", hwirq);
-			return -ENOSPC;
-		}
+	return rtas_prepare_msi_irqs(pdev, nvec, type, arg);
+}
+
+/*
+ * ->msi_free() is called before irq_domain_free_irqs_top() when the
+ * handler data is still available. Use that to clear the XIVE
+ * controller data.
+ */
+static void pseries_msi_ops_msi_free(struct irq_domain *domain,
+				     struct msi_domain_info *info,
+				     unsigned int irq)
+{
+	if (xive_enabled())
+		xive_irq_free_data(irq);
+}
+
+/*
+ * RTAS can not disable one MSI at a time. It's all or nothing. Do it
+ * at the end after all IRQs have been freed.
+ */
+static void pseries_msi_domain_free_irqs(struct irq_domain *domain,
+					 struct device *dev)
+{
+	if (WARN_ON_ONCE(!dev_is_pci(dev)))
+		return;
+
+	__msi_domain_free_irqs(domain, dev);
+
+	rtas_disable_msi(to_pci_dev(dev));
+}
+
+static struct msi_domain_ops pseries_pci_msi_domain_ops = {
+	.msi_prepare	= pseries_msi_ops_prepare,
+	.msi_free	= pseries_msi_ops_msi_free,
+	.domain_free_irqs = pseries_msi_domain_free_irqs,
+};
+
+static void pseries_msi_shutdown(struct irq_data *d)
+{
+	d = d->parent_data;
+	if (d->chip->irq_shutdown)
+		d->chip->irq_shutdown(d);
+}
+
+static void pseries_msi_mask(struct irq_data *d)
+{
+	pci_msi_mask_irq(d);
+	irq_chip_mask_parent(d);
+}
+
+static void pseries_msi_unmask(struct irq_data *d)
+{
+	pci_msi_unmask_irq(d);
+	irq_chip_unmask_parent(d);
+}
 
-		dev_dbg(&pdev->dev, "rtas_msi: allocated virq %d\n", virq);
-		irq_set_msi_desc(virq, entry);
+static struct irq_chip pseries_pci_msi_irq_chip = {
+	.name		= "pSeries-PCI-MSI",
+	.irq_shutdown	= pseries_msi_shutdown,
+	.irq_mask	= pseries_msi_mask,
+	.irq_unmask	= pseries_msi_unmask,
+	.irq_eoi	= irq_chip_eoi_parent,
+};
+
+static struct msi_domain_info pseries_msi_domain_info = {
+	.flags = (MSI_FLAG_USE_DEF_DOM_OPS | MSI_FLAG_USE_DEF_CHIP_OPS |
+		  MSI_FLAG_MULTI_PCI_MSI  | MSI_FLAG_PCI_MSIX),
+	.ops   = &pseries_pci_msi_domain_ops,
+	.chip  = &pseries_pci_msi_irq_chip,
+};
+
+static void pseries_msi_compose_msg(struct irq_data *data, struct msi_msg *msg)
+{
+	__pci_read_msi_msg(irq_data_get_msi_desc(data), msg);
+}
+
+static struct irq_chip pseries_msi_irq_chip = {
+	.name			= "pSeries-MSI",
+	.irq_shutdown		= pseries_msi_shutdown,
+	.irq_mask		= irq_chip_mask_parent,
+	.irq_unmask		= irq_chip_unmask_parent,
+	.irq_eoi		= irq_chip_eoi_parent,
+	.irq_set_affinity	= irq_chip_set_affinity_parent,
+	.irq_compose_msi_msg	= pseries_msi_compose_msg,
+};
+
+static int pseries_irq_parent_domain_alloc(struct irq_domain *domain, unsigned int virq,
+					   irq_hw_number_t hwirq)
+{
+	struct irq_fwspec parent_fwspec;
+	int ret;
+
+	parent_fwspec.fwnode = domain->parent->fwnode;
+	parent_fwspec.param_count = 2;
+	parent_fwspec.param[0] = hwirq;
+	parent_fwspec.param[1] = IRQ_TYPE_EDGE_RISING;
+
+	ret = irq_domain_alloc_irqs_parent(domain, virq, 1, &parent_fwspec);
+	if (ret)
+		return ret;
+
+	return 0;
+}
+
+static int pseries_irq_domain_alloc(struct irq_domain *domain, unsigned int virq,
+				    unsigned int nr_irqs, void *arg)
+{
+	struct pci_controller *phb = domain->host_data;
+	msi_alloc_info_t *info = arg;
+	struct msi_desc *desc = info->desc;
+	struct pci_dev *pdev = msi_desc_to_pci_dev(desc);
+	int hwirq;
+	int i, ret;
+
+	hwirq = rtas_query_irq_number(pci_get_pdn(pdev), desc->msi_attrib.entry_nr);
+	if (hwirq < 0) {
+		dev_err(&pdev->dev, "Failed to query HW IRQ: %d\n", hwirq);
+		return hwirq;
+	}
+
+	dev_dbg(&pdev->dev, "%s bridge %pOF %d/%x #%d\n", __func__,
+		phb->dn, virq, hwirq, nr_irqs);
+
+	for (i = 0; i < nr_irqs; i++) {
+		ret = pseries_irq_parent_domain_alloc(domain, virq + i, hwirq + i);
+		if (ret)
+			goto out;
+
+		irq_domain_set_hwirq_and_chip(domain, virq + i, hwirq + i,
+					      &pseries_msi_irq_chip, domain->host_data);
+	}
+
+	return 0;
+
+out:
+	/* TODO: handle RTAS cleanup in ->msi_finish() ? */
+	irq_domain_free_irqs_parent(domain, virq, i - 1);
+	return ret;
+}
+
+static void pseries_irq_domain_free(struct irq_domain *domain, unsigned int virq,
+				    unsigned int nr_irqs)
+{
+	struct irq_data *d = irq_domain_get_irq_data(domain, virq);
+	struct pci_controller *phb = irq_data_get_irq_chip_data(d);
+
+	pr_debug("%s bridge %pOF %d #%d\n", __func__, phb->dn, virq, nr_irqs);
+
+	/* XIVE domain data is cleared through ->msi_free() */
+}
 
-		/* Read config space back so we can restore after reset */
-		__pci_read_msi_msg(entry, &msg);
-		entry->msg = msg;
+static const struct irq_domain_ops pseries_irq_domain_ops = {
+	.alloc  = pseries_irq_domain_alloc,
+	.free   = pseries_irq_domain_free,
+};
+
+static int __pseries_msi_allocate_domains(struct pci_controller *phb,
+					  unsigned int count)
+{
+	struct irq_domain *parent = irq_get_default_host();
+
+	phb->fwnode = irq_domain_alloc_named_id_fwnode("pSeries-MSI",
+						       phb->global_number);
+	if (!phb->fwnode)
+		return -ENOMEM;
+
+	phb->dev_domain = irq_domain_create_hierarchy(parent, 0, count,
+						      phb->fwnode,
+						      &pseries_irq_domain_ops, phb);
+	if (!phb->dev_domain) {
+		pr_err("PCI: failed to create IRQ domain bridge %pOF (domain %d)\n",
+		       phb->dn, phb->global_number);
+		irq_domain_free_fwnode(phb->fwnode);
+		return -ENOMEM;
+	}
+
+	phb->msi_domain = pci_msi_create_irq_domain(of_node_to_fwnode(phb->dn),
+						    &pseries_msi_domain_info,
+						    phb->dev_domain);
+	if (!phb->msi_domain) {
+		pr_err("PCI: failed to create MSI IRQ domain bridge %pOF (domain %d)\n",
+		       phb->dn, phb->global_number);
+		irq_domain_free_fwnode(phb->fwnode);
+		irq_domain_remove(phb->dev_domain);
+		return -ENOMEM;
 	}
 
 	return 0;
 }
 
+int pseries_msi_allocate_domains(struct pci_controller *phb)
+{
+	int count;
+
+	if (!__find_pe_total_msi(phb->dn, &count)) {
+		pr_err("PCI: failed to find MSIs for bridge %pOF (domain %d)\n",
+		       phb->dn, phb->global_number);
+		return -ENOSPC;
+	}
+
+	return __pseries_msi_allocate_domains(phb, count);
+}
+
+void pseries_msi_free_domains(struct pci_controller *phb)
+{
+	if (phb->msi_domain)
+		irq_domain_remove(phb->msi_domain);
+	if (phb->dev_domain)
+		irq_domain_remove(phb->dev_domain);
+	if (phb->fwnode)
+		irq_domain_free_fwnode(phb->fwnode);
+}
+
 static void rtas_msi_pci_irq_fixup(struct pci_dev *pdev)
 {
 	/* No LSI -> leave MSIs (if any) configured */
@@ -518,8 +684,6 @@ static void rtas_msi_pci_irq_fixup(struct pci_dev *pdev)
 
 static int rtas_msi_init(void)
 {
-	struct pci_controller *phb;
-
 	query_token  = rtas_token("ibm,query-interrupt-source-number");
 	change_token = rtas_token("ibm,change-msi");
 
@@ -531,16 +695,6 @@ static int rtas_msi_init(void)
 
 	pr_debug("rtas_msi: Registering RTAS MSI callbacks.\n");
 
-	WARN_ON(pseries_pci_controller_ops.setup_msi_irqs);
-	pseries_pci_controller_ops.setup_msi_irqs = rtas_setup_msi_irqs;
-	pseries_pci_controller_ops.teardown_msi_irqs = rtas_teardown_msi_irqs;
-
-	list_for_each_entry(phb, &hose_list, list_node) {
-		WARN_ON(phb->controller_ops.setup_msi_irqs);
-		phb->controller_ops.setup_msi_irqs = rtas_setup_msi_irqs;
-		phb->controller_ops.teardown_msi_irqs = rtas_teardown_msi_irqs;
-	}
-
 	WARN_ON(ppc_md.pci_irq_fixup);
 	ppc_md.pci_irq_fixup = rtas_msi_pci_irq_fixup;
 
diff --git a/arch/powerpc/platforms/pseries/pci_dlpar.c b/arch/powerpc/platforms/pseries/pci_dlpar.c
index a8f9140a24fa..90c9d3531694 100644
--- a/arch/powerpc/platforms/pseries/pci_dlpar.c
+++ b/arch/powerpc/platforms/pseries/pci_dlpar.c
@@ -33,6 +33,8 @@ struct pci_controller *init_phb_dynamic(struct device_node *dn)
 
 	pci_devs_phb_init_dynamic(phb);
 
+	pseries_msi_allocate_domains(phb);
+
 	/* Create EEH devices for the PHB */
 	eeh_phb_pe_create(phb);
 
@@ -74,6 +76,8 @@ int remove_phb_dynamic(struct pci_controller *phb)
 		}
 	}
 
+	pseries_msi_free_domains(phb);
+
 	/* Remove the PCI bus and unregister the bridge device from sysfs */
 	phb->bus = NULL;
 	pci_remove_bus(b);
diff --git a/arch/powerpc/platforms/pseries/pseries.h b/arch/powerpc/platforms/pseries/pseries.h
index 1f051a786fb3..3544778e06d0 100644
--- a/arch/powerpc/platforms/pseries/pseries.h
+++ b/arch/powerpc/platforms/pseries/pseries.h
@@ -85,6 +85,8 @@ struct pci_host_bridge;
 int pseries_root_bridge_prepare(struct pci_host_bridge *bridge);
 
 extern struct pci_controller_ops pseries_pci_controller_ops;
+int pseries_msi_allocate_domains(struct pci_controller *phb);
+void pseries_msi_free_domains(struct pci_controller *phb);
 
 unsigned long pseries_memory_block_size(void);
 
diff --git a/arch/powerpc/platforms/pseries/ras.c b/arch/powerpc/platforms/pseries/ras.c
index 167f2e1b8d39..56092dccfdb8 100644
--- a/arch/powerpc/platforms/pseries/ras.c
+++ b/arch/powerpc/platforms/pseries/ras.c
@@ -783,7 +783,7 @@ static int recover_mce(struct pt_regs *regs, struct machine_check_event *evt)
 {
 	int recovered = 0;
 
-	if (!(regs->msr & MSR_RI)) {
+	if (regs_is_unrecoverable(regs)) {
 		/* If MSR_RI isn't set, we cannot recover */
 		pr_err("Machine check interrupt unrecoverable: MSR(RI=0)\n");
 		recovered = 0;
diff --git a/arch/powerpc/platforms/pseries/setup.c b/arch/powerpc/platforms/pseries/setup.c
index 0dfaa6ab44cc..f79126f16258 100644
--- a/arch/powerpc/platforms/pseries/setup.c
+++ b/arch/powerpc/platforms/pseries/setup.c
@@ -486,6 +486,8 @@ static void __init pSeries_discover_phbs(void)
 
 		/* create pci_dn's for DT nodes under this PHB */
 		pci_devs_phb_init_dynamic(phb);
+
+		pseries_msi_allocate_domains(phb);
 	}
 
 	of_node_put(root);
diff --git a/arch/powerpc/platforms/pseries/svm.c b/arch/powerpc/platforms/pseries/svm.c
index 1d829e257996..87f001b4c4e4 100644
--- a/arch/powerpc/platforms/pseries/svm.c
+++ b/arch/powerpc/platforms/pseries/svm.c
@@ -63,6 +63,9 @@ void __init svm_swiotlb_init(void)
 
 int set_memory_encrypted(unsigned long addr, int numpages)
 {
+	if (!mem_encrypt_active())
+		return 0;
+
 	if (!PAGE_ALIGNED(addr))
 		return -EINVAL;
 
@@ -73,6 +76,9 @@ int set_memory_encrypted(unsigned long addr, int numpages)
 
 int set_memory_decrypted(unsigned long addr, int numpages)
 {
+	if (!mem_encrypt_active())
+		return 0;
+
 	if (!PAGE_ALIGNED(addr))
 		return -EINVAL;
 
diff --git a/arch/powerpc/platforms/pseries/vas.c b/arch/powerpc/platforms/pseries/vas.c
index b5c1cf1bc64d..b043e3936d21 100644
--- a/arch/powerpc/platforms/pseries/vas.c
+++ b/arch/powerpc/platforms/pseries/vas.c
@@ -184,7 +184,7 @@ static int h_get_nx_fault(u32 winid, u64 buffer)
  * Note: The hypervisor forwards an interrupt for each fault request.
  *	So one fault CRB to process for each H_GET_NX_FAULT hcall.
  */
-irqreturn_t pseries_vas_fault_thread_fn(int irq, void *data)
+static irqreturn_t pseries_vas_fault_thread_fn(int irq, void *data)
 {
 	struct pseries_vas_window *txwin = data;
 	struct coprocessor_request_block crb;
diff --git a/arch/powerpc/platforms/pseries/vio.c b/arch/powerpc/platforms/pseries/vio.c
index 58283cecbd52..feafcb582e1b 100644
--- a/arch/powerpc/platforms/pseries/vio.c
+++ b/arch/powerpc/platforms/pseries/vio.c
@@ -560,7 +560,8 @@ static int vio_dma_iommu_map_sg(struct device *dev, struct scatterlist *sglist,
 	for_each_sg(sglist, sgl, nelems, count)
 		alloc_size += roundup(sgl->length, IOMMU_PAGE_SIZE(tbl));
 
-	if (vio_cmo_alloc(viodev, alloc_size))
+	ret = vio_cmo_alloc(viodev, alloc_size);
+	if (ret)
 		goto out_fail;
 	ret = ppc_iommu_map_sg(dev, tbl, sglist, nelems, dma_get_mask(dev),
 			direction, attrs);
@@ -577,7 +578,7 @@ out_deallocate:
 	vio_cmo_dealloc(viodev, alloc_size);
 out_fail:
 	atomic_inc(&viodev->cmo.allocs_failed);
-	return 0;
+	return ret;
 }
 
 static void vio_dma_iommu_unmap_sg(struct device *dev,
diff --git a/arch/powerpc/sysdev/fsl_rio.c b/arch/powerpc/sysdev/fsl_rio.c
index 5a95b8ea23d8..ff7906b48ca1 100644
--- a/arch/powerpc/sysdev/fsl_rio.c
+++ b/arch/powerpc/sysdev/fsl_rio.c
@@ -108,7 +108,7 @@ int fsl_rio_mcheck_exception(struct pt_regs *regs)
 				 __func__);
 			out_be32((u32 *)(rio_regs_win + RIO_LTLEDCSR),
 				 0);
-			regs_set_return_msr(regs, regs->msr | MSR_RI);
+			regs_set_recoverable(regs);
 			regs_set_return_ip(regs, extable_fixup(entry));
 			return 1;
 		}
diff --git a/arch/powerpc/sysdev/xics/ics-native.c b/arch/powerpc/sysdev/xics/ics-native.c
index d450502f4053..dec7d93a8ba1 100644
--- a/arch/powerpc/sysdev/xics/ics-native.c
+++ b/arch/powerpc/sysdev/xics/ics-native.c
@@ -131,19 +131,15 @@ static struct irq_chip ics_native_irq_chip = {
 	.irq_retrigger		= xics_retrigger,
 };
 
-static int ics_native_map(struct ics *ics, unsigned int virq)
+static int ics_native_check(struct ics *ics, unsigned int hw_irq)
 {
-	unsigned int vec = (unsigned int)virq_to_hw(virq);
 	struct ics_native *in = to_ics_native(ics);
 
-	pr_devel("%s: vec=0x%x\n", __func__, vec);
+	pr_devel("%s: hw_irq=0x%x\n", __func__, hw_irq);
 
-	if (vec < in->ibase || vec >= (in->ibase + in->icount))
+	if (hw_irq < in->ibase || hw_irq >= (in->ibase + in->icount))
 		return -EINVAL;
 
-	irq_set_chip_and_handler(virq, &ics_native_irq_chip, handle_fasteoi_irq);
-	irq_set_chip_data(virq, ics);
-
 	return 0;
 }
 
@@ -177,10 +173,11 @@ static int ics_native_host_match(struct ics *ics, struct device_node *node)
 }
 
 static struct ics ics_native_template = {
-	.map		= ics_native_map,
+	.check		= ics_native_check,
 	.mask_unknown	= ics_native_mask_unknown,
 	.get_server	= ics_native_get_server,
 	.host_match	= ics_native_host_match,
+	.chip = &ics_native_irq_chip,
 };
 
 static int __init ics_native_add_one(struct device_node *np)
diff --git a/arch/powerpc/sysdev/xics/ics-opal.c b/arch/powerpc/sysdev/xics/ics-opal.c
index 823f6c9664cd..c4d95d8beb6f 100644
--- a/arch/powerpc/sysdev/xics/ics-opal.c
+++ b/arch/powerpc/sysdev/xics/ics-opal.c
@@ -62,17 +62,6 @@ static void ics_opal_unmask_irq(struct irq_data *d)
 
 static unsigned int ics_opal_startup(struct irq_data *d)
 {
-#ifdef CONFIG_PCI_MSI
-	/*
-	 * The generic MSI code returns with the interrupt disabled on the
-	 * card, using the MSI mask bits. Firmware doesn't appear to unmask
-	 * at that level, so we do it here by hand.
-	 */
-	if (irq_data_get_msi_desc(d))
-		pci_msi_unmask_irq(d);
-#endif
-
-	/* unmask it */
 	ics_opal_unmask_irq(d);
 	return 0;
 }
@@ -133,7 +122,7 @@ static int ics_opal_set_affinity(struct irq_data *d,
 	}
 	server = ics_opal_mangle_server(wanted_server);
 
-	pr_devel("ics-hal: set-affinity irq %d [hw 0x%x] server: 0x%x/0x%x\n",
+	pr_debug("ics-hal: set-affinity irq %d [hw 0x%x] server: 0x%x/0x%x\n",
 		 d->irq, hw_irq, wanted_server, server);
 
 	rc = opal_set_xive(hw_irq, server, priority);
@@ -157,26 +146,13 @@ static struct irq_chip ics_opal_irq_chip = {
 	.irq_retrigger = xics_retrigger,
 };
 
-static int ics_opal_map(struct ics *ics, unsigned int virq);
-static void ics_opal_mask_unknown(struct ics *ics, unsigned long vec);
-static long ics_opal_get_server(struct ics *ics, unsigned long vec);
-
 static int ics_opal_host_match(struct ics *ics, struct device_node *node)
 {
 	return 1;
 }
 
-/* Only one global & state struct ics */
-static struct ics ics_hal = {
-	.map		= ics_opal_map,
-	.mask_unknown	= ics_opal_mask_unknown,
-	.get_server	= ics_opal_get_server,
-	.host_match	= ics_opal_host_match,
-};
-
-static int ics_opal_map(struct ics *ics, unsigned int virq)
+static int ics_opal_check(struct ics *ics, unsigned int hw_irq)
 {
-	unsigned int hw_irq = (unsigned int)virq_to_hw(virq);
 	int64_t rc;
 	__be16 server;
 	int8_t priority;
@@ -189,9 +165,6 @@ static int ics_opal_map(struct ics *ics, unsigned int virq)
 	if (rc != OPAL_SUCCESS)
 		return -ENXIO;
 
-	irq_set_chip_and_handler(virq, &ics_opal_irq_chip, handle_fasteoi_irq);
-	irq_set_chip_data(virq, &ics_hal);
-
 	return 0;
 }
 
@@ -222,6 +195,15 @@ static long ics_opal_get_server(struct ics *ics, unsigned long vec)
 	return ics_opal_unmangle_server(be16_to_cpu(server));
 }
 
+/* Only one global & state struct ics */
+static struct ics ics_hal = {
+	.check		= ics_opal_check,
+	.mask_unknown	= ics_opal_mask_unknown,
+	.get_server	= ics_opal_get_server,
+	.host_match	= ics_opal_host_match,
+	.chip		= &ics_opal_irq_chip,
+};
+
 int __init ics_opal_init(void)
 {
 	if (!firmware_has_feature(FW_FEATURE_OPAL))
diff --git a/arch/powerpc/sysdev/xics/ics-rtas.c b/arch/powerpc/sysdev/xics/ics-rtas.c
index 4cf18000f07c..b9da317b7a2d 100644
--- a/arch/powerpc/sysdev/xics/ics-rtas.c
+++ b/arch/powerpc/sysdev/xics/ics-rtas.c
@@ -24,19 +24,6 @@ static int ibm_set_xive;
 static int ibm_int_on;
 static int ibm_int_off;
 
-static int ics_rtas_map(struct ics *ics, unsigned int virq);
-static void ics_rtas_mask_unknown(struct ics *ics, unsigned long vec);
-static long ics_rtas_get_server(struct ics *ics, unsigned long vec);
-static int ics_rtas_host_match(struct ics *ics, struct device_node *node);
-
-/* Only one global & state struct ics */
-static struct ics ics_rtas = {
-	.map		= ics_rtas_map,
-	.mask_unknown	= ics_rtas_mask_unknown,
-	.get_server	= ics_rtas_get_server,
-	.host_match	= ics_rtas_host_match,
-};
-
 static void ics_rtas_unmask_irq(struct irq_data *d)
 {
 	unsigned int hw_irq = (unsigned int)irqd_to_hwirq(d);
@@ -70,15 +57,6 @@ static void ics_rtas_unmask_irq(struct irq_data *d)
 
 static unsigned int ics_rtas_startup(struct irq_data *d)
 {
-#ifdef CONFIG_PCI_MSI
-	/*
-	 * The generic MSI code returns with the interrupt disabled on the
-	 * card, using the MSI mask bits. Firmware doesn't appear to unmask
-	 * at that level, so we do it here by hand.
-	 */
-	if (irq_data_get_msi_desc(d))
-		pci_msi_unmask_irq(d);
-#endif
 	/* unmask it */
 	ics_rtas_unmask_irq(d);
 	return 0;
@@ -146,6 +124,9 @@ static int ics_rtas_set_affinity(struct irq_data *d,
 		return -1;
 	}
 
+	pr_debug("%s: irq %d [hw 0x%x] server: 0x%x\n", __func__, d->irq,
+		 hw_irq, irq_server);
+
 	status = rtas_call_reentrant(ibm_set_xive, 3, 1, NULL,
 				     hw_irq, irq_server, xics_status[1]);
 
@@ -169,9 +150,8 @@ static struct irq_chip ics_rtas_irq_chip = {
 	.irq_retrigger = xics_retrigger,
 };
 
-static int ics_rtas_map(struct ics *ics, unsigned int virq)
+static int ics_rtas_check(struct ics *ics, unsigned int hw_irq)
 {
-	unsigned int hw_irq = (unsigned int)virq_to_hw(virq);
 	int status[2];
 	int rc;
 
@@ -183,9 +163,6 @@ static int ics_rtas_map(struct ics *ics, unsigned int virq)
 	if (rc)
 		return -ENXIO;
 
-	irq_set_chip_and_handler(virq, &ics_rtas_irq_chip, handle_fasteoi_irq);
-	irq_set_chip_data(virq, &ics_rtas);
-
 	return 0;
 }
 
@@ -213,6 +190,15 @@ static int ics_rtas_host_match(struct ics *ics, struct device_node *node)
 	return !of_device_is_compatible(node, "chrp,iic");
 }
 
+/* Only one global & state struct ics */
+static struct ics ics_rtas = {
+	.check		= ics_rtas_check,
+	.mask_unknown	= ics_rtas_mask_unknown,
+	.get_server	= ics_rtas_get_server,
+	.host_match	= ics_rtas_host_match,
+	.chip = &ics_rtas_irq_chip,
+};
+
 __init int ics_rtas_init(void)
 {
 	ibm_get_xive = rtas_token("ibm,get-xive");
diff --git a/arch/powerpc/sysdev/xics/xics-common.c b/arch/powerpc/sysdev/xics/xics-common.c
index b14c502e56a8..5c1a157a83b8 100644
--- a/arch/powerpc/sysdev/xics/xics-common.c
+++ b/arch/powerpc/sysdev/xics/xics-common.c
@@ -38,7 +38,7 @@ DEFINE_PER_CPU(struct xics_cppr, xics_cppr);
 
 struct irq_domain *xics_host;
 
-static LIST_HEAD(ics_list);
+static struct ics *xics_ics;
 
 void xics_update_irq_servers(void)
 {
@@ -111,12 +111,11 @@ void xics_setup_cpu(void)
 
 void xics_mask_unknown_vec(unsigned int vec)
 {
-	struct ics *ics;
-
 	pr_err("Interrupt 0x%x (real) is invalid, disabling it.\n", vec);
 
-	list_for_each_entry(ics, &ics_list, link)
-		ics->mask_unknown(ics, vec);
+	if (WARN_ON(!xics_ics))
+		return;
+	xics_ics->mask_unknown(xics_ics, vec);
 }
 
 
@@ -133,7 +132,7 @@ static void xics_request_ipi(void)
 	 * IPIs are marked IRQF_PERCPU. The handler was set in map.
 	 */
 	BUG_ON(request_irq(ipi, icp_ops->ipi_action,
-			   IRQF_PERCPU | IRQF_NO_THREAD, "IPI", NULL));
+			   IRQF_NO_DEBUG | IRQF_PERCPU | IRQF_NO_THREAD, "IPI", NULL));
 }
 
 void __init xics_smp_probe(void)
@@ -184,6 +183,8 @@ void xics_migrate_irqs_away(void)
 	unsigned int irq, virq;
 	struct irq_desc *desc;
 
+	pr_debug("%s: CPU %u\n", __func__, cpu);
+
 	/* If we used to be the default server, move to the new "boot_cpuid" */
 	if (hw_cpu == xics_default_server)
 		xics_update_irq_servers();
@@ -198,7 +199,7 @@ void xics_migrate_irqs_away(void)
 		struct irq_chip *chip;
 		long server;
 		unsigned long flags;
-		struct ics *ics;
+		struct irq_data *irqd;
 
 		/* We can't set affinity on ISA interrupts */
 		if (virq < NR_IRQS_LEGACY)
@@ -206,9 +207,11 @@ void xics_migrate_irqs_away(void)
 		/* We only need to migrate enabled IRQS */
 		if (!desc->action)
 			continue;
-		if (desc->irq_data.domain != xics_host)
+		/* We need a mapping in the XICS IRQ domain */
+		irqd = irq_domain_get_irq_data(xics_host, virq);
+		if (!irqd)
 			continue;
-		irq = desc->irq_data.hwirq;
+		irq = irqd_to_hwirq(irqd);
 		/* We need to get IPIs still. */
 		if (irq == XICS_IPI || irq == XICS_IRQ_SPURIOUS)
 			continue;
@@ -219,13 +222,10 @@ void xics_migrate_irqs_away(void)
 		raw_spin_lock_irqsave(&desc->lock, flags);
 
 		/* Locate interrupt server */
-		server = -1;
-		ics = irq_desc_get_chip_data(desc);
-		if (ics)
-			server = ics->get_server(ics, irq);
+		server = xics_ics->get_server(xics_ics, irq);
 		if (server < 0) {
-			printk(KERN_ERR "%s: Can't find server for irq %d\n",
-			       __func__, irq);
+			pr_err("%s: Can't find server for irq %d/%x\n",
+			       __func__, virq, irq);
 			goto unlock;
 		}
 
@@ -307,13 +307,9 @@ int xics_get_irq_server(unsigned int virq, const struct cpumask *cpumask,
 static int xics_host_match(struct irq_domain *h, struct device_node *node,
 			   enum irq_domain_bus_token bus_token)
 {
-	struct ics *ics;
-
-	list_for_each_entry(ics, &ics_list, link)
-		if (ics->host_match(ics, node))
-			return 1;
-
-	return 0;
+	if (WARN_ON(!xics_ics))
+		return 0;
+	return xics_ics->host_match(xics_ics, node) ? 1 : 0;
 }
 
 /* Dummies */
@@ -327,12 +323,10 @@ static struct irq_chip xics_ipi_chip = {
 	.irq_unmask = xics_ipi_unmask,
 };
 
-static int xics_host_map(struct irq_domain *h, unsigned int virq,
-			 irq_hw_number_t hw)
+static int xics_host_map(struct irq_domain *domain, unsigned int virq,
+			 irq_hw_number_t hwirq)
 {
-	struct ics *ics;
-
-	pr_devel("xics: map virq %d, hwirq 0x%lx\n", virq, hw);
+	pr_devel("xics: map virq %d, hwirq 0x%lx\n", virq, hwirq);
 
 	/*
 	 * Mark interrupts as edge sensitive by default so that resend
@@ -342,18 +336,23 @@ static int xics_host_map(struct irq_domain *h, unsigned int virq,
 	irq_clear_status_flags(virq, IRQ_LEVEL);
 
 	/* Don't call into ICS for IPIs */
-	if (hw == XICS_IPI) {
+	if (hwirq == XICS_IPI) {
 		irq_set_chip_and_handler(virq, &xics_ipi_chip,
 					 handle_percpu_irq);
 		return 0;
 	}
 
-	/* Let the ICS setup the chip data */
-	list_for_each_entry(ics, &ics_list, link)
-		if (ics->map(ics, virq) == 0)
-			return 0;
+	if (WARN_ON(!xics_ics))
+		return -EINVAL;
+
+	if (xics_ics->check(xics_ics, hwirq))
+		return -EINVAL;
+
+	/* No chip data for the XICS domain */
+	irq_domain_set_info(domain, virq, hwirq, xics_ics->chip,
+			    NULL, handle_fasteoi_irq, NULL, NULL);
 
-	return -EINVAL;
+	return 0;
 }
 
 static int xics_host_xlate(struct irq_domain *h, struct device_node *ct,
@@ -412,22 +411,76 @@ int xics_retrigger(struct irq_data *data)
 	return 0;
 }
 
+#ifdef	CONFIG_IRQ_DOMAIN_HIERARCHY
+static int xics_host_domain_translate(struct irq_domain *d, struct irq_fwspec *fwspec,
+				      unsigned long *hwirq, unsigned int *type)
+{
+	return xics_host_xlate(d, to_of_node(fwspec->fwnode), fwspec->param,
+			       fwspec->param_count, hwirq, type);
+}
+
+static int xics_host_domain_alloc(struct irq_domain *domain, unsigned int virq,
+				  unsigned int nr_irqs, void *arg)
+{
+	struct irq_fwspec *fwspec = arg;
+	irq_hw_number_t hwirq;
+	unsigned int type = IRQ_TYPE_NONE;
+	int i, rc;
+
+	rc = xics_host_domain_translate(domain, fwspec, &hwirq, &type);
+	if (rc)
+		return rc;
+
+	pr_debug("%s %d/%lx #%d\n", __func__, virq, hwirq, nr_irqs);
+
+	for (i = 0; i < nr_irqs; i++)
+		irq_domain_set_info(domain, virq + i, hwirq + i, xics_ics->chip,
+				    xics_ics, handle_fasteoi_irq, NULL, NULL);
+
+	return 0;
+}
+
+static void xics_host_domain_free(struct irq_domain *domain,
+				  unsigned int virq, unsigned int nr_irqs)
+{
+	pr_debug("%s %d #%d\n", __func__, virq, nr_irqs);
+}
+#endif
+
 static const struct irq_domain_ops xics_host_ops = {
+#ifdef	CONFIG_IRQ_DOMAIN_HIERARCHY
+	.alloc	= xics_host_domain_alloc,
+	.free	= xics_host_domain_free,
+	.translate = xics_host_domain_translate,
+#endif
 	.match = xics_host_match,
 	.map = xics_host_map,
 	.xlate = xics_host_xlate,
 };
 
-static void __init xics_init_host(void)
+static int __init xics_allocate_domain(void)
 {
-	xics_host = irq_domain_add_tree(NULL, &xics_host_ops, NULL);
-	BUG_ON(xics_host == NULL);
+	struct fwnode_handle *fn;
+
+	fn = irq_domain_alloc_named_fwnode("XICS");
+	if (!fn)
+		return -ENOMEM;
+
+	xics_host = irq_domain_create_tree(fn, &xics_host_ops, NULL);
+	if (!xics_host) {
+		irq_domain_free_fwnode(fn);
+		return -ENOMEM;
+	}
+
 	irq_set_default_host(xics_host);
+	return 0;
 }
 
 void __init xics_register_ics(struct ics *ics)
 {
-	list_add(&ics->link, &ics_list);
+	if (WARN_ONCE(xics_ics, "XICS: Source Controller is already defined !"))
+		return;
+	xics_ics = ics;
 }
 
 static void __init xics_get_server_size(void)
@@ -484,6 +537,8 @@ void __init xics_init(void)
 	/* Initialize common bits */
 	xics_get_server_size();
 	xics_update_irq_servers();
-	xics_init_host();
+	rc = xics_allocate_domain();
+	if (rc < 0)
+		pr_err("XICS: Failed to create IRQ domain");
 	xics_setup_cpu();
 }
diff --git a/arch/powerpc/sysdev/xive/common.c b/arch/powerpc/sysdev/xive/common.c
index 8183ca343675..c732ce5a3e1a 100644
--- a/arch/powerpc/sysdev/xive/common.c
+++ b/arch/powerpc/sysdev/xive/common.c
@@ -21,7 +21,6 @@
 #include <linux/msi.h>
 #include <linux/vmalloc.h>
 
-#include <asm/debugfs.h>
 #include <asm/prom.h>
 #include <asm/io.h>
 #include <asm/smp.h>
@@ -313,11 +312,10 @@ void xmon_xive_get_irq_all(void)
 	struct irq_desc *desc;
 
 	for_each_irq_desc(i, desc) {
-		struct irq_data *d = irq_desc_get_irq_data(desc);
-		unsigned int hwirq = (unsigned int)irqd_to_hwirq(d);
+		struct irq_data *d = irq_domain_get_irq_data(xive_irq_domain, i);
 
-		if (d->domain == xive_irq_domain)
-			xmon_xive_get_irq_config(hwirq, d);
+		if (d)
+			xmon_xive_get_irq_config(irqd_to_hwirq(d), d);
 	}
 }
 
@@ -617,16 +615,6 @@ static unsigned int xive_irq_startup(struct irq_data *d)
 	pr_devel("xive_irq_startup: irq %d [0x%x] data @%p\n",
 		 d->irq, hw_irq, d);
 
-#ifdef CONFIG_PCI_MSI
-	/*
-	 * The generic MSI code returns with the interrupt disabled on the
-	 * card, using the MSI mask bits. Firmware doesn't appear to unmask
-	 * at that level, so we do it here by hand.
-	 */
-	if (irq_data_get_msi_desc(d))
-		pci_msi_unmask_irq(d);
-#endif
-
 	/* Pick a target */
 	target = xive_pick_irq_target(d, irq_data_get_affinity_mask(d));
 	if (target == XIVE_INVALID_TARGET) {
@@ -714,16 +702,12 @@ static int xive_irq_set_affinity(struct irq_data *d,
 	u32 target, old_target;
 	int rc = 0;
 
-	pr_devel("xive_irq_set_affinity: irq %d\n", d->irq);
+	pr_debug("%s: irq %d/%x\n", __func__, d->irq, hw_irq);
 
 	/* Is this valid ? */
 	if (cpumask_any_and(cpumask, cpu_online_mask) >= nr_cpu_ids)
 		return -EINVAL;
 
-	/* Don't do anything if the interrupt isn't started */
-	if (!irqd_is_started(d))
-		return IRQ_SET_MASK_OK;
-
 	/*
 	 * If existing target is already in the new mask, and is
 	 * online then do nothing.
@@ -759,7 +743,7 @@ static int xive_irq_set_affinity(struct irq_data *d,
 		return rc;
 	}
 
-	pr_devel("  target: 0x%x\n", target);
+	pr_debug("  target: 0x%x\n", target);
 	xd->target = target;
 
 	/* Give up previous target */
@@ -990,6 +974,8 @@ EXPORT_SYMBOL_GPL(is_xive_irq);
 
 void xive_cleanup_irq_data(struct xive_irq_data *xd)
 {
+	pr_debug("%s for HW %x\n", __func__, xd->hw_irq);
+
 	if (xd->eoi_mmio) {
 		iounmap(xd->eoi_mmio);
 		if (xd->eoi_mmio == xd->trig_mmio)
@@ -1031,7 +1017,7 @@ static int xive_irq_alloc_data(unsigned int virq, irq_hw_number_t hw)
 	return 0;
 }
 
-static void xive_irq_free_data(unsigned int virq)
+void xive_irq_free_data(unsigned int virq)
 {
 	struct xive_irq_data *xd = irq_get_handler_data(virq);
 
@@ -1041,6 +1027,7 @@ static void xive_irq_free_data(unsigned int virq)
 	xive_cleanup_irq_data(xd);
 	kfree(xd);
 }
+EXPORT_SYMBOL_GPL(xive_irq_free_data);
 
 #ifdef CONFIG_SMP
 
@@ -1179,7 +1166,7 @@ static int xive_request_ipi(unsigned int cpu)
 		return 0;
 
 	ret = request_irq(xid->irq, xive_muxed_ipi_action,
-			  IRQF_PERCPU | IRQF_NO_THREAD,
+			  IRQF_NO_DEBUG | IRQF_PERCPU | IRQF_NO_THREAD,
 			  xid->name, NULL);
 
 	WARN(ret < 0, "Failed to request IPI %d: %d\n", xid->irq, ret);
@@ -1379,7 +1366,71 @@ static void xive_irq_domain_debug_show(struct seq_file *m, struct irq_domain *d,
 }
 #endif
 
+#ifdef	CONFIG_IRQ_DOMAIN_HIERARCHY
+static int xive_irq_domain_translate(struct irq_domain *d,
+				     struct irq_fwspec *fwspec,
+				     unsigned long *hwirq,
+				     unsigned int *type)
+{
+	return xive_irq_domain_xlate(d, to_of_node(fwspec->fwnode),
+				     fwspec->param, fwspec->param_count,
+				     hwirq, type);
+}
+
+static int xive_irq_domain_alloc(struct irq_domain *domain, unsigned int virq,
+				 unsigned int nr_irqs, void *arg)
+{
+	struct irq_fwspec *fwspec = arg;
+	irq_hw_number_t hwirq;
+	unsigned int type = IRQ_TYPE_NONE;
+	int i, rc;
+
+	rc = xive_irq_domain_translate(domain, fwspec, &hwirq, &type);
+	if (rc)
+		return rc;
+
+	pr_debug("%s %d/%lx #%d\n", __func__, virq, hwirq, nr_irqs);
+
+	for (i = 0; i < nr_irqs; i++) {
+		/* TODO: call xive_irq_domain_map() */
+
+		/*
+		 * Mark interrupts as edge sensitive by default so that resend
+		 * actually works. Will fix that up below if needed.
+		 */
+		irq_clear_status_flags(virq, IRQ_LEVEL);
+
+		/* allocates and sets handler data */
+		rc = xive_irq_alloc_data(virq + i, hwirq + i);
+		if (rc)
+			return rc;
+
+		irq_domain_set_hwirq_and_chip(domain, virq + i, hwirq + i,
+					      &xive_irq_chip, domain->host_data);
+		irq_set_handler(virq + i, handle_fasteoi_irq);
+	}
+
+	return 0;
+}
+
+static void xive_irq_domain_free(struct irq_domain *domain,
+				 unsigned int virq, unsigned int nr_irqs)
+{
+	int i;
+
+	pr_debug("%s %d #%d\n", __func__, virq, nr_irqs);
+
+	for (i = 0; i < nr_irqs; i++)
+		xive_irq_free_data(virq + i);
+}
+#endif
+
 static const struct irq_domain_ops xive_irq_domain_ops = {
+#ifdef	CONFIG_IRQ_DOMAIN_HIERARCHY
+	.alloc	= xive_irq_domain_alloc,
+	.free	= xive_irq_domain_free,
+	.translate = xive_irq_domain_translate,
+#endif
 	.match = xive_irq_domain_match,
 	.map = xive_irq_domain_map,
 	.unmap = xive_irq_domain_unmap,
@@ -1717,9 +1768,9 @@ static int xive_core_debug_show(struct seq_file *m, void *private)
 		xive_debug_show_cpu(m, cpu);
 
 	for_each_irq_desc(i, desc) {
-		struct irq_data *d = irq_desc_get_irq_data(desc);
+		struct irq_data *d = irq_domain_get_irq_data(xive_irq_domain, i);
 
-		if (d->domain == xive_irq_domain)
+		if (d)
 			xive_debug_show_irq(m, d);
 	}
 	return 0;
@@ -1729,7 +1780,7 @@ DEFINE_SHOW_ATTRIBUTE(xive_core_debug);
 int xive_core_debug_init(void)
 {
 	if (xive_enabled())
-		debugfs_create_file("xive", 0400, powerpc_debugfs_root,
+		debugfs_create_file("xive", 0400, arch_debugfs_dir,
 				    NULL, &xive_core_debug_fops);
 	return 0;
 }
diff --git a/arch/powerpc/sysdev/xive/native.c b/arch/powerpc/sysdev/xive/native.c
index 57e3f1540435..1aec282cd650 100644
--- a/arch/powerpc/sysdev/xive/native.c
+++ b/arch/powerpc/sysdev/xive/native.c
@@ -41,6 +41,7 @@ static u32 xive_queue_shift;
 static u32 xive_pool_vps = XIVE_INVALID_VP;
 static struct kmem_cache *xive_provision_cache;
 static bool xive_has_single_esc;
+static bool xive_has_save_restore;
 
 int xive_native_populate_irq_data(u32 hw_irq, struct xive_irq_data *data)
 {
@@ -588,6 +589,9 @@ bool __init xive_native_init(void)
 	if (of_get_property(np, "single-escalation-support", NULL) != NULL)
 		xive_has_single_esc = true;
 
+	if (of_get_property(np, "vp-save-restore", NULL))
+		xive_has_save_restore = true;
+
 	/* Configure Thread Management areas for KVM */
 	for_each_possible_cpu(cpu)
 		kvmppc_set_xive_tima(cpu, r.start, tima);
@@ -752,6 +756,12 @@ bool xive_native_has_single_escalation(void)
 }
 EXPORT_SYMBOL_GPL(xive_native_has_single_escalation);
 
+bool xive_native_has_save_restore(void)
+{
+	return xive_has_save_restore;
+}
+EXPORT_SYMBOL_GPL(xive_native_has_save_restore);
+
 int xive_native_get_queue_info(u32 vp_id, u32 prio,
 			       u64 *out_qpage,
 			       u64 *out_qsize,
diff --git a/arch/powerpc/tools/head_check.sh b/arch/powerpc/tools/head_check.sh
index e32d3162e5ed..689907cda996 100644
--- a/arch/powerpc/tools/head_check.sh
+++ b/arch/powerpc/tools/head_check.sh
@@ -49,30 +49,30 @@ vmlinux="$2"
 $nm "$vmlinux" | grep -e " [TA] _stext$" -e " t start_first_256B$" -e " a text_start$" -e " t start_text$" > .tmp_symbols.txt
 
 
-vma=$(cat .tmp_symbols.txt | grep -e " [TA] _stext$" | cut -d' ' -f1)
+vma=$(grep -e " [TA] _stext$" .tmp_symbols.txt | cut -d' ' -f1)
 
-expected_start_head_addr=$vma
+expected_start_head_addr="$vma"
 
-start_head_addr=$(cat .tmp_symbols.txt | grep " t start_first_256B$" | cut -d' ' -f1)
+start_head_addr=$(grep " t start_first_256B$" .tmp_symbols.txt | cut -d' ' -f1)
 
 if [ "$start_head_addr" != "$expected_start_head_addr" ]; then
-	echo "ERROR: head code starts at $start_head_addr, should be $expected_start_head_addr"
-	echo "ERROR: try to enable LD_HEAD_STUB_CATCH config option"
-	echo "ERROR: see comments in arch/powerpc/tools/head_check.sh"
+	echo "ERROR: head code starts at $start_head_addr, should be $expected_start_head_addr" 1>&2
+	echo "ERROR: try to enable LD_HEAD_STUB_CATCH config option" 1>&2
+	echo "ERROR: see comments in arch/powerpc/tools/head_check.sh" 1>&2
 
 	exit 1
 fi
 
-top_vma=$(echo $vma | cut -d'0' -f1)
+top_vma=$(echo "$vma" | cut -d'0' -f1)
 
-expected_start_text_addr=$(cat .tmp_symbols.txt | grep " a text_start$" | cut -d' ' -f1 | sed "s/^0/$top_vma/")
+expected_start_text_addr=$(grep " a text_start$" .tmp_symbols.txt | cut -d' ' -f1 | sed "s/^0/$top_vma/")
 
-start_text_addr=$(cat .tmp_symbols.txt | grep " t start_text$" | cut -d' ' -f1)
+start_text_addr=$(grep " t start_text$" .tmp_symbols.txt | cut -d' ' -f1)
 
 if [ "$start_text_addr" != "$expected_start_text_addr" ]; then
-	echo "ERROR: start_text address is $start_text_addr, should be $expected_start_text_addr"
-	echo "ERROR: try to enable LD_HEAD_STUB_CATCH config option"
-	echo "ERROR: see comments in arch/powerpc/tools/head_check.sh"
+	echo "ERROR: start_text address is $start_text_addr, should be $expected_start_text_addr" 1>&2
+	echo "ERROR: try to enable LD_HEAD_STUB_CATCH config option" 1>&2
+	echo "ERROR: see comments in arch/powerpc/tools/head_check.sh" 1>&2
 
 	exit 1
 fi
diff --git a/arch/powerpc/xmon/xmon.c b/arch/powerpc/xmon/xmon.c
index da4d7f225a40..dd8241c009e5 100644
--- a/arch/powerpc/xmon/xmon.c
+++ b/arch/powerpc/xmon/xmon.c
@@ -26,8 +26,8 @@
 #include <linux/ctype.h>
 #include <linux/highmem.h>
 #include <linux/security.h>
+#include <linux/debugfs.h>
 
-#include <asm/debugfs.h>
 #include <asm/ptrace.h>
 #include <asm/smp.h>
 #include <asm/string.h>
@@ -482,16 +482,6 @@ static inline void get_output_lock(void) {}
 static inline void release_output_lock(void) {}
 #endif
 
-static inline int unrecoverable_excp(struct pt_regs *regs)
-{
-#if defined(CONFIG_4xx) || defined(CONFIG_PPC_BOOK3E)
-	/* We have no MSR_RI bit on 4xx or Book3e, so we simply return false */
-	return 0;
-#else
-	return ((regs->msr & MSR_RI) == 0);
-#endif
-}
-
 static void xmon_touch_watchdogs(void)
 {
 	touch_softlockup_watchdog_sync();
@@ -565,7 +555,7 @@ static int xmon_core(struct pt_regs *regs, volatile int fromipi)
 	bp = NULL;
 	if ((regs->msr & (MSR_IR|MSR_PR|MSR_64BIT)) == (MSR_IR|MSR_64BIT))
 		bp = at_breakpoint(regs->nip);
-	if (bp || unrecoverable_excp(regs))
+	if (bp || regs_is_unrecoverable(regs))
 		fromipi = 0;
 
 	if (!fromipi) {
@@ -577,7 +567,7 @@ static int xmon_core(struct pt_regs *regs, volatile int fromipi)
 			       cpu, BP_NUM(bp));
 			xmon_print_symbol(regs->nip, " ", ")\n");
 		}
-		if (unrecoverable_excp(regs))
+		if (regs_is_unrecoverable(regs))
 			printf("WARNING: exception is not recoverable, "
 			       "can't continue\n");
 		release_output_lock();
@@ -693,7 +683,7 @@ static int xmon_core(struct pt_regs *regs, volatile int fromipi)
 			printf("Stopped at breakpoint %tx (", BP_NUM(bp));
 			xmon_print_symbol(regs->nip, " ", ")\n");
 		}
-		if (unrecoverable_excp(regs))
+		if (regs_is_unrecoverable(regs))
 			printf("WARNING: exception is not recoverable, "
 			       "can't continue\n");
 		remove_bpts();
@@ -4077,8 +4067,8 @@ DEFINE_SIMPLE_ATTRIBUTE(xmon_dbgfs_ops, xmon_dbgfs_get,
 
 static int __init setup_xmon_dbgfs(void)
 {
-	debugfs_create_file("xmon", 0600, powerpc_debugfs_root, NULL,
-				&xmon_dbgfs_ops);
+	debugfs_create_file("xmon", 0600, arch_debugfs_dir, NULL,
+			    &xmon_dbgfs_ops);
 	return 0;
 }
 device_initcall(setup_xmon_dbgfs);
diff --git a/arch/riscv/Kconfig b/arch/riscv/Kconfig
index 1452584f83be..c3f3fd583e04 100644
--- a/arch/riscv/Kconfig
+++ b/arch/riscv/Kconfig
@@ -13,9 +13,7 @@ config 32BIT
 config RISCV
 	def_bool y
 	select ARCH_CLOCKSOURCE_INIT
-	select ARCH_SUPPORTS_ATOMIC_RMW
-	select ARCH_SUPPORTS_DEBUG_PAGEALLOC if MMU
-	select ARCH_STACKWALK
+	select ARCH_ENABLE_HUGEPAGE_MIGRATION if HUGETLB_PAGE && MIGRATION
 	select ARCH_HAS_BINFMT_FLAT
 	select ARCH_HAS_DEBUG_VM_PGTABLE
 	select ARCH_HAS_DEBUG_VIRTUAL if MMU
@@ -31,14 +29,19 @@ config RISCV
 	select ARCH_HAS_STRICT_KERNEL_RWX if MMU && !XIP_KERNEL
 	select ARCH_HAS_STRICT_MODULE_RWX if MMU && !XIP_KERNEL
 	select ARCH_HAS_TICK_BROADCAST if GENERIC_CLOCKEVENTS_BROADCAST
+	select ARCH_HAS_UBSAN_SANITIZE_ALL
 	select ARCH_OPTIONAL_KERNEL_RWX if ARCH_HAS_STRICT_KERNEL_RWX
 	select ARCH_OPTIONAL_KERNEL_RWX_DEFAULT
+	select ARCH_STACKWALK
+	select ARCH_SUPPORTS_ATOMIC_RMW
+	select ARCH_SUPPORTS_DEBUG_PAGEALLOC if MMU
 	select ARCH_SUPPORTS_HUGETLBFS if MMU
 	select ARCH_USE_MEMTEST
 	select ARCH_WANT_DEFAULT_TOPDOWN_MMAP_LAYOUT if MMU
 	select ARCH_WANT_FRAME_POINTERS
 	select ARCH_WANT_HUGE_PMD_SHARE if 64BIT
 	select BINFMT_FLAT_NO_DATA_START_OFFSET if !MMU
+	select BUILDTIME_TABLE_SORT if MMU
 	select CLONE_BACKWARDS
 	select CLINT_TIMER if !MMU
 	select COMMON_CLK
@@ -48,9 +51,11 @@ config RISCV
 	select GENERIC_CLOCKEVENTS_BROADCAST if SMP
 	select GENERIC_EARLY_IOREMAP
 	select GENERIC_GETTIMEOFDAY if HAVE_GENERIC_VDSO
-	select GENERIC_IOREMAP
+	select GENERIC_IDLE_POLL_SETUP
+	select GENERIC_IOREMAP if MMU
 	select GENERIC_IRQ_MULTI_HANDLER
 	select GENERIC_IRQ_SHOW
+	select GENERIC_IRQ_SHOW_LEVEL
 	select GENERIC_LIB_DEVMEM_IS_ALLOWED
 	select GENERIC_PCI_IOMAP
 	select GENERIC_PTDUMP if MMU
@@ -70,6 +75,7 @@ config RISCV
 	select HAVE_ARCH_SECCOMP_FILTER
 	select HAVE_ARCH_TRACEHOOK
 	select HAVE_ARCH_TRANSPARENT_HUGEPAGE if 64BIT && MMU
+	select HAVE_ARCH_THREAD_STRUCT_WHITELIST
 	select HAVE_ARCH_VMAP_STACK if MMU && 64BIT
 	select HAVE_ASM_MODVERSIONS
 	select HAVE_CONTEXT_TRACKING
@@ -95,6 +101,7 @@ config RISCV
 	select HAVE_STACKPROTECTOR
 	select HAVE_SYSCALL_TRACEPOINTS
 	select IRQ_DOMAIN
+	select IRQ_FORCED_THREADING
 	select MODULES_USE_ELF_RELA if MODULES
 	select MODULE_SECTIONS if MODULES
 	select OF
@@ -107,6 +114,7 @@ config RISCV
 	select SPARSE_IRQ
 	select SYSCTL_EXCEPTION_TRACE
 	select THREAD_INFO_IN_TASK
+	select TRACE_IRQFLAGS_SUPPORT
 	select UACCESS_MEMCPY if !MMU
 	select ZONE_DMA32 if 64BIT
 
@@ -176,9 +184,6 @@ config ARCH_SUPPORTS_UPROBES
 config STACKTRACE_SUPPORT
 	def_bool y
 
-config TRACE_IRQFLAGS_SUPPORT
-	def_bool y
-
 config GENERIC_BUG
 	def_bool y
 	depends on BUG
diff --git a/arch/riscv/Makefile b/arch/riscv/Makefile
index bc74afdbf31e..0eb4568fbd29 100644
--- a/arch/riscv/Makefile
+++ b/arch/riscv/Makefile
@@ -41,7 +41,7 @@ endif
 ifeq ($(CONFIG_LD_IS_LLD),y)
 	KBUILD_CFLAGS += -mno-relax
 	KBUILD_AFLAGS += -mno-relax
-ifneq ($(LLVM_IAS),1)
+ifndef CONFIG_AS_IS_LLVM
 	KBUILD_CFLAGS += -Wa,-mno-relax
 	KBUILD_AFLAGS += -Wa,-mno-relax
 endif
@@ -108,6 +108,12 @@ PHONY += vdso_install
 vdso_install:
 	$(Q)$(MAKE) $(build)=arch/riscv/kernel/vdso $@
 
+ifeq ($(CONFIG_MMU),y)
+prepare: vdso_prepare
+vdso_prepare: prepare0
+	$(Q)$(MAKE) $(build)=arch/riscv/kernel/vdso include/generated/vdso-offsets.h
+endif
+
 ifneq ($(CONFIG_XIP_KERNEL),y)
 ifeq ($(CONFIG_RISCV_M_MODE)$(CONFIG_SOC_CANAAN),yy)
 KBUILD_IMAGE := $(boot)/loader.bin
@@ -126,8 +132,11 @@ $(BOOT_TARGETS): vmlinux
 Image.%: Image
 	$(Q)$(MAKE) $(build)=$(boot) $(boot)/$@
 
-zinstall install:
-	$(Q)$(MAKE) $(build)=$(boot) $@
+install: install-image = Image
+zinstall: install-image = Image.gz
+install zinstall:
+	$(CONFIG_SHELL) $(srctree)/$(boot)/install.sh $(KERNELRELEASE) \
+	$(boot)/$(install-image) System.map "$(INSTALL_PATH)"
 
 archclean:
 	$(Q)$(MAKE) $(clean)=$(boot)
diff --git a/arch/riscv/boot/Makefile b/arch/riscv/boot/Makefile
index 6bf299f70c27..becd0621071c 100644
--- a/arch/riscv/boot/Makefile
+++ b/arch/riscv/boot/Makefile
@@ -58,11 +58,3 @@ $(obj)/Image.lzo: $(obj)/Image FORCE
 
 $(obj)/loader.bin: $(obj)/loader FORCE
 	$(call if_changed,objcopy)
-
-install:
-	$(CONFIG_SHELL) $(srctree)/$(src)/install.sh $(KERNELRELEASE) \
-	$(obj)/Image System.map "$(INSTALL_PATH)"
-
-zinstall:
-	$(CONFIG_SHELL) $(srctree)/$(src)/install.sh $(KERNELRELEASE) \
-	$(obj)/Image.gz System.map "$(INSTALL_PATH)"
diff --git a/arch/riscv/boot/dts/microchip/microchip-mpfs-icicle-kit.dts b/arch/riscv/boot/dts/microchip/microchip-mpfs-icicle-kit.dts
index baea7d204639..b254c60589a1 100644
--- a/arch/riscv/boot/dts/microchip/microchip-mpfs-icicle-kit.dts
+++ b/arch/riscv/boot/dts/microchip/microchip-mpfs-icicle-kit.dts
@@ -16,10 +16,14 @@
 
 	aliases {
 		ethernet0 = &emac1;
+		serial0 = &serial0;
+		serial1 = &serial1;
+		serial2 = &serial2;
+		serial3 = &serial3;
 	};
 
 	chosen {
-		stdout-path = &serial0;
+		stdout-path = "serial0:115200n8";
 	};
 
 	cpus {
diff --git a/arch/riscv/configs/defconfig b/arch/riscv/configs/defconfig
index bc68231a8fb7..4ebc80315f01 100644
--- a/arch/riscv/configs/defconfig
+++ b/arch/riscv/configs/defconfig
@@ -39,10 +39,12 @@ CONFIG_PCI=y
 CONFIG_PCIEPORTBUS=y
 CONFIG_PCI_HOST_GENERIC=y
 CONFIG_PCIE_XILINX=y
+CONFIG_PCIE_FU740=y
 CONFIG_DEVTMPFS=y
 CONFIG_DEVTMPFS_MOUNT=y
 CONFIG_BLK_DEV_LOOP=y
 CONFIG_VIRTIO_BLK=y
+CONFIG_BLK_DEV_NVME=m
 CONFIG_BLK_DEV_SD=y
 CONFIG_BLK_DEV_SR=y
 CONFIG_SCSI_VIRTIO=y
@@ -108,6 +110,8 @@ CONFIG_NFS_V4_1=y
 CONFIG_NFS_V4_2=y
 CONFIG_ROOT_NFS=y
 CONFIG_9P_FS=y
+CONFIG_NLS_CODEPAGE_437=y
+CONFIG_NLS_ISO8859_1=m
 CONFIG_CRYPTO_USER_API_HASH=y
 CONFIG_CRYPTO_DEV_VIRTIO=y
 CONFIG_PRINTK_TIME=y
diff --git a/arch/riscv/include/asm/elf.h b/arch/riscv/include/asm/elf.h
index f4b490cd0e5d..f53c40026c7a 100644
--- a/arch/riscv/include/asm/elf.h
+++ b/arch/riscv/include/asm/elf.h
@@ -42,6 +42,9 @@
  */
 #define ELF_ET_DYN_BASE		((TASK_SIZE / 3) * 2)
 
+#ifdef CONFIG_64BIT
+#define STACK_RND_MASK		(0x3ffff >> (PAGE_SHIFT - 12))
+#endif
 /*
  * This yields a mask that user programs can use to figure out what
  * instruction set this CPU supports.  This could be done in user space,
diff --git a/arch/riscv/include/asm/page.h b/arch/riscv/include/asm/page.h
index b0ca5058e7ae..109c97e991a6 100644
--- a/arch/riscv/include/asm/page.h
+++ b/arch/riscv/include/asm/page.h
@@ -79,8 +79,8 @@ typedef struct page *pgtable_t;
 #endif
 
 #ifdef CONFIG_MMU
-extern unsigned long pfn_base;
-#define ARCH_PFN_OFFSET		(pfn_base)
+extern unsigned long riscv_pfn_base;
+#define ARCH_PFN_OFFSET		(riscv_pfn_base)
 #else
 #define ARCH_PFN_OFFSET		(PAGE_OFFSET >> PAGE_SHIFT)
 #endif /* CONFIG_MMU */
@@ -91,10 +91,8 @@ struct kernel_mapping {
 	uintptr_t size;
 	/* Offset between linear mapping virtual address and kernel load address */
 	unsigned long va_pa_offset;
-#ifdef CONFIG_64BIT
 	/* Offset between kernel mapping virtual address and kernel load address */
 	unsigned long va_kernel_pa_offset;
-#endif
 	unsigned long va_kernel_xip_pa_offset;
 #ifdef CONFIG_XIP_KERNEL
 	uintptr_t xiprom;
@@ -105,11 +103,11 @@ struct kernel_mapping {
 extern struct kernel_mapping kernel_map;
 extern phys_addr_t phys_ram_base;
 
-#ifdef CONFIG_64BIT
 #define is_kernel_mapping(x)	\
 	((x) >= kernel_map.virt_addr && (x) < (kernel_map.virt_addr + kernel_map.size))
+
 #define is_linear_mapping(x)	\
-	((x) >= PAGE_OFFSET && (x) < kernel_map.virt_addr)
+	((x) >= PAGE_OFFSET && (!IS_ENABLED(CONFIG_64BIT) || (x) < kernel_map.virt_addr))
 
 #define linear_mapping_pa_to_va(x)	((void *)((unsigned long)(x) + kernel_map.va_pa_offset))
 #define kernel_mapping_pa_to_va(y)	({						\
@@ -123,7 +121,7 @@ extern phys_addr_t phys_ram_base;
 #define linear_mapping_va_to_pa(x)	((unsigned long)(x) - kernel_map.va_pa_offset)
 #define kernel_mapping_va_to_pa(y) ({						\
 	unsigned long _y = y;							\
-	(_y < kernel_map.virt_addr + XIP_OFFSET) ?					\
+	(IS_ENABLED(CONFIG_XIP_KERNEL) && _y < kernel_map.virt_addr + XIP_OFFSET) ?	\
 		((unsigned long)(_y) - kernel_map.va_kernel_xip_pa_offset) :		\
 		((unsigned long)(_y) - kernel_map.va_kernel_pa_offset - XIP_OFFSET);	\
 	})
@@ -133,15 +131,6 @@ extern phys_addr_t phys_ram_base;
 	is_linear_mapping(_x) ?							\
 		linear_mapping_va_to_pa(_x) : kernel_mapping_va_to_pa(_x);	\
 	})
-#else
-#define is_kernel_mapping(x)	\
-	((x) >= kernel_map.virt_addr && (x) < (kernel_map.virt_addr + kernel_map.size))
-#define is_linear_mapping(x)	\
-	((x) >= PAGE_OFFSET)
-
-#define __pa_to_va_nodebug(x)  ((void *)((unsigned long) (x) + kernel_map.va_pa_offset))
-#define __va_to_pa_nodebug(x)  ((unsigned long)(x) - kernel_map.va_pa_offset)
-#endif /* CONFIG_64BIT */
 
 #ifdef CONFIG_DEBUG_VIRTUAL
 extern phys_addr_t __virt_to_phys(unsigned long x);
diff --git a/arch/riscv/include/asm/processor.h b/arch/riscv/include/asm/processor.h
index 021ed64ee608..46b492c78cbb 100644
--- a/arch/riscv/include/asm/processor.h
+++ b/arch/riscv/include/asm/processor.h
@@ -37,6 +37,14 @@ struct thread_struct {
 	unsigned long bad_cause;
 };
 
+/* Whitelist the fstate from the task_struct for hardened usercopy */
+static inline void arch_thread_struct_whitelist(unsigned long *offset,
+						unsigned long *size)
+{
+	*offset = offsetof(struct thread_struct, fstate);
+	*size = sizeof_field(struct thread_struct, fstate);
+}
+
 #define INIT_THREAD {					\
 	.sp = sizeof(init_stack) + (long)&init_stack,	\
 }
diff --git a/arch/riscv/include/asm/vdso.h b/arch/riscv/include/asm/vdso.h
index 1453a2f563bc..893e47195e30 100644
--- a/arch/riscv/include/asm/vdso.h
+++ b/arch/riscv/include/asm/vdso.h
@@ -8,26 +8,25 @@
 #ifndef _ASM_RISCV_VDSO_H
 #define _ASM_RISCV_VDSO_H
 
+
+/*
+ * All systems with an MMU have a VDSO, but systems without an MMU don't
+ * support shared libraries and therefor don't have one.
+ */
+#ifdef CONFIG_MMU
+
 #include <linux/types.h>
+#include <generated/vdso-offsets.h>
 
 #ifndef CONFIG_GENERIC_TIME_VSYSCALL
 struct vdso_data {
 };
 #endif
 
-/*
- * The VDSO symbols are mapped into Linux so we can just use regular symbol
- * addressing to get their offsets in userspace.  The symbols are mapped at an
- * offset of 0, but since the linker must support setting weak undefined
- * symbols to the absolute address 0 it also happens to support other low
- * addresses even when the code model suggests those low addresses would not
- * otherwise be availiable.
- */
 #define VDSO_SYMBOL(base, name)							\
-({										\
-	extern const char __vdso_##name[];					\
-	(void __user *)((unsigned long)(base) + __vdso_##name);			\
-})
+	(void __user *)((unsigned long)(base) + __vdso_##name##_offset)
+
+#endif /* CONFIG_MMU */
 
 asmlinkage long sys_riscv_flush_icache(uintptr_t, uintptr_t, uintptr_t);
 
diff --git a/arch/riscv/kernel/cacheinfo.c b/arch/riscv/kernel/cacheinfo.c
index d86781357044..90deabfe63ea 100644
--- a/arch/riscv/kernel/cacheinfo.c
+++ b/arch/riscv/kernel/cacheinfo.c
@@ -113,7 +113,7 @@ static void fill_cacheinfo(struct cacheinfo **this_leaf,
 	}
 }
 
-static int __init_cache_level(unsigned int cpu)
+int init_cache_level(unsigned int cpu)
 {
 	struct cpu_cacheinfo *this_cpu_ci = get_cpu_cacheinfo(cpu);
 	struct device_node *np = of_cpu_device_node_get(cpu);
@@ -155,7 +155,7 @@ static int __init_cache_level(unsigned int cpu)
 	return 0;
 }
 
-static int __populate_cache_leaves(unsigned int cpu)
+int populate_cache_leaves(unsigned int cpu)
 {
 	struct cpu_cacheinfo *this_cpu_ci = get_cpu_cacheinfo(cpu);
 	struct cacheinfo *this_leaf = this_cpu_ci->info_list;
@@ -187,6 +187,3 @@ static int __populate_cache_leaves(unsigned int cpu)
 
 	return 0;
 }
-
-DEFINE_SMP_CALL_CACHE_FUNCTION(init_cache_level)
-DEFINE_SMP_CALL_CACHE_FUNCTION(populate_cache_leaves)
diff --git a/arch/riscv/kernel/probes/decode-insn.c b/arch/riscv/kernel/probes/decode-insn.c
index 0ed043acc882..64f6183b4717 100644
--- a/arch/riscv/kernel/probes/decode-insn.c
+++ b/arch/riscv/kernel/probes/decode-insn.c
@@ -38,11 +38,10 @@ riscv_probe_decode_insn(probe_opcode_t *addr, struct arch_probe_insn *api)
 	RISCV_INSN_REJECTED(c_ebreak,		insn);
 #endif
 
-	RISCV_INSN_REJECTED(auipc,		insn);
-	RISCV_INSN_REJECTED(branch,		insn);
-
 	RISCV_INSN_SET_SIMULATE(jal,		insn);
 	RISCV_INSN_SET_SIMULATE(jalr,		insn);
+	RISCV_INSN_SET_SIMULATE(auipc,		insn);
+	RISCV_INSN_SET_SIMULATE(branch,		insn);
 
 	return INSN_GOOD;
 }
diff --git a/arch/riscv/kernel/probes/simulate-insn.c b/arch/riscv/kernel/probes/simulate-insn.c
index 2519ce26377d..d73e96f6ed7c 100644
--- a/arch/riscv/kernel/probes/simulate-insn.c
+++ b/arch/riscv/kernel/probes/simulate-insn.c
@@ -83,3 +83,115 @@ bool __kprobes simulate_jalr(u32 opcode, unsigned long addr, struct pt_regs *reg
 
 	return ret;
 }
+
+#define auipc_rd_idx(opcode) \
+	((opcode >> 7) & 0x1f)
+
+#define auipc_imm(opcode) \
+	((((opcode) >> 12) & 0xfffff) << 12)
+
+#if __riscv_xlen == 64
+#define auipc_offset(opcode)	sign_extend64(auipc_imm(opcode), 31)
+#elif __riscv_xlen == 32
+#define auipc_offset(opcode)	auipc_imm(opcode)
+#else
+#error "Unexpected __riscv_xlen"
+#endif
+
+bool __kprobes simulate_auipc(u32 opcode, unsigned long addr, struct pt_regs *regs)
+{
+	/*
+	 * auipc instruction:
+	 *  31        12 11 7 6      0
+	 * | imm[31:12] | rd | opcode |
+	 *        20       5     7
+	 */
+
+	u32 rd_idx = auipc_rd_idx(opcode);
+	unsigned long rd_val = addr + auipc_offset(opcode);
+
+	if (!rv_insn_reg_set_val(regs, rd_idx, rd_val))
+		return false;
+
+	instruction_pointer_set(regs, addr + 4);
+
+	return true;
+}
+
+#define branch_rs1_idx(opcode) \
+	(((opcode) >> 15) & 0x1f)
+
+#define branch_rs2_idx(opcode) \
+	(((opcode) >> 20) & 0x1f)
+
+#define branch_funct3(opcode) \
+	(((opcode) >> 12) & 0x7)
+
+#define branch_imm(opcode) \
+	(((((opcode) >>  8) & 0xf ) <<  1) | \
+	 ((((opcode) >> 25) & 0x3f) <<  5) | \
+	 ((((opcode) >>  7) & 0x1 ) << 11) | \
+	 ((((opcode) >> 31) & 0x1 ) << 12))
+
+#define branch_offset(opcode) \
+	sign_extend32((branch_imm(opcode)), 12)
+
+#define BRANCH_BEQ	0x0
+#define BRANCH_BNE	0x1
+#define BRANCH_BLT	0x4
+#define BRANCH_BGE	0x5
+#define BRANCH_BLTU	0x6
+#define BRANCH_BGEU	0x7
+
+bool __kprobes simulate_branch(u32 opcode, unsigned long addr, struct pt_regs *regs)
+{
+	/*
+	 * branch instructions:
+	 *      31    30       25 24 20 19 15 14    12 11       8    7      6      0
+	 * | imm[12] | imm[10:5] | rs2 | rs1 | funct3 | imm[4:1] | imm[11] | opcode |
+	 *     1           6        5     5      3         4         1         7
+	 *     imm[12|10:5]        rs2   rs1    000       imm[4:1|11]       1100011  BEQ
+	 *     imm[12|10:5]        rs2   rs1    001       imm[4:1|11]       1100011  BNE
+	 *     imm[12|10:5]        rs2   rs1    100       imm[4:1|11]       1100011  BLT
+	 *     imm[12|10:5]        rs2   rs1    101       imm[4:1|11]       1100011  BGE
+	 *     imm[12|10:5]        rs2   rs1    110       imm[4:1|11]       1100011  BLTU
+	 *     imm[12|10:5]        rs2   rs1    111       imm[4:1|11]       1100011  BGEU
+	 */
+
+	s32 offset;
+	s32 offset_tmp;
+	unsigned long rs1_val;
+	unsigned long rs2_val;
+
+	if (!rv_insn_reg_get_val(regs, branch_rs1_idx(opcode), &rs1_val) ||
+	    !rv_insn_reg_get_val(regs, branch_rs2_idx(opcode), &rs2_val))
+		return false;
+
+	offset_tmp = branch_offset(opcode);
+	switch (branch_funct3(opcode)) {
+	case BRANCH_BEQ:
+		offset = (rs1_val == rs2_val) ? offset_tmp : 4;
+		break;
+	case BRANCH_BNE:
+		offset = (rs1_val != rs2_val) ? offset_tmp : 4;
+		break;
+	case BRANCH_BLT:
+		offset = ((long)rs1_val < (long)rs2_val) ? offset_tmp : 4;
+		break;
+	case BRANCH_BGE:
+		offset = ((long)rs1_val >= (long)rs2_val) ? offset_tmp : 4;
+		break;
+	case BRANCH_BLTU:
+		offset = (rs1_val < rs2_val) ? offset_tmp : 4;
+		break;
+	case BRANCH_BGEU:
+		offset = (rs1_val >= rs2_val) ? offset_tmp : 4;
+		break;
+	default:
+		return false;
+	}
+
+	instruction_pointer_set(regs, addr + offset);
+
+	return true;
+}
diff --git a/arch/riscv/kernel/setup.c b/arch/riscv/kernel/setup.c
index 120b2f6f71bc..b9620e5f00ba 100644
--- a/arch/riscv/kernel/setup.c
+++ b/arch/riscv/kernel/setup.c
@@ -255,7 +255,7 @@ static void __init parse_dtb(void)
 
 	pr_err("No DTB passed to the kernel\n");
 #ifdef CONFIG_CMDLINE_FORCE
-	strlcpy(boot_command_line, CONFIG_CMDLINE, COMMAND_LINE_SIZE);
+	strscpy(boot_command_line, CONFIG_CMDLINE, COMMAND_LINE_SIZE);
 	pr_info("Forcing kernel command line to: %s\n", boot_command_line);
 #endif
 }
diff --git a/arch/riscv/kernel/traps.c b/arch/riscv/kernel/traps.c
index 0a98fd0ddfe9..0daaa3e4630d 100644
--- a/arch/riscv/kernel/traps.c
+++ b/arch/riscv/kernel/traps.c
@@ -199,11 +199,6 @@ int is_valid_bugaddr(unsigned long pc)
 }
 #endif /* CONFIG_GENERIC_BUG */
 
-/* stvec & scratch is already set from head.S */
-void __init trap_init(void)
-{
-}
-
 #ifdef CONFIG_VMAP_STACK
 static DEFINE_PER_CPU(unsigned long [OVERFLOW_STACK_SIZE/sizeof(long)],
 		overflow_stack)__aligned(16);
diff --git a/arch/riscv/kernel/vdso/Makefile b/arch/riscv/kernel/vdso/Makefile
index 24d936c147cd..f2e065671e4d 100644
--- a/arch/riscv/kernel/vdso/Makefile
+++ b/arch/riscv/kernel/vdso/Makefile
@@ -23,10 +23,10 @@ ifneq ($(c-gettimeofday-y),)
 endif
 
 # Build rules
-targets := $(obj-vdso) vdso.so vdso.so.dbg vdso.lds vdso-syms.S
+targets := $(obj-vdso) vdso.so vdso.so.dbg vdso.lds
 obj-vdso := $(addprefix $(obj)/, $(obj-vdso))
 
-obj-y += vdso.o vdso-syms.o
+obj-y += vdso.o
 CPPFLAGS_vdso.lds += -P -C -U$(ARCH)
 
 # Disable -pg to prevent insert call site
@@ -36,6 +36,7 @@ CFLAGS_REMOVE_vgettimeofday.o = $(CC_FLAGS_FTRACE) -Os
 GCOV_PROFILE := n
 KCOV_INSTRUMENT := n
 KASAN_SANITIZE := n
+UBSAN_SANITIZE := n
 
 # Force dependency
 $(obj)/vdso.o: $(obj)/vdso.so
@@ -43,20 +44,22 @@ $(obj)/vdso.o: $(obj)/vdso.so
 # link rule for the .so file, .lds has to be first
 $(obj)/vdso.so.dbg: $(obj)/vdso.lds $(obj-vdso) FORCE
 	$(call if_changed,vdsold)
-LDFLAGS_vdso.so.dbg = -shared -s -soname=linux-vdso.so.1 \
+LDFLAGS_vdso.so.dbg = -shared -S -soname=linux-vdso.so.1 \
 	--build-id=sha1 --hash-style=both --eh-frame-hdr
 
-# We also create a special relocatable object that should mirror the symbol
-# table and layout of the linked DSO. With ld --just-symbols we can then
-# refer to these symbols in the kernel code rather than hand-coded addresses.
-$(obj)/vdso-syms.S: $(obj)/vdso.so FORCE
-	$(call if_changed,so2s)
-
 # strip rule for the .so file
 $(obj)/%.so: OBJCOPYFLAGS := -S
 $(obj)/%.so: $(obj)/%.so.dbg FORCE
 	$(call if_changed,objcopy)
 
+# Generate VDSO offsets using helper script
+gen-vdsosym := $(srctree)/$(src)/gen_vdso_offsets.sh
+quiet_cmd_vdsosym = VDSOSYM $@
+	cmd_vdsosym = $(NM) $< | $(gen-vdsosym) | LC_ALL=C sort > $@
+
+include/generated/vdso-offsets.h: $(obj)/vdso.so.dbg FORCE
+	$(call if_changed,vdsosym)
+
 # actual build commands
 # The DSO images are built using a special linker script
 # Make sure only to export the intended __vdso_xxx symbol offsets.
@@ -65,11 +68,6 @@ quiet_cmd_vdsold = VDSOLD  $@
                    $(OBJCOPY) $(patsubst %, -G __vdso_%, $(vdso-syms)) $@.tmp $@ && \
                    rm $@.tmp
 
-# Extracts symbol offsets from the VDSO, converting them into an assembly file
-# that contains the same symbols at the same offsets.
-quiet_cmd_so2s = SO2S    $@
-      cmd_so2s = $(NM) -D $< | $(srctree)/$(src)/so2s.sh > $@
-
 # install commands for the unstripped file
 quiet_cmd_vdso_install = INSTALL $@
       cmd_vdso_install = cp $(obj)/$@.dbg $(MODLIB)/vdso/$@
diff --git a/arch/riscv/kernel/vdso/gen_vdso_offsets.sh b/arch/riscv/kernel/vdso/gen_vdso_offsets.sh
new file mode 100755
index 000000000000..c2e5613f3495
--- /dev/null
+++ b/arch/riscv/kernel/vdso/gen_vdso_offsets.sh
@@ -0,0 +1,5 @@
+#!/bin/sh
+# SPDX-License-Identifier: GPL-2.0
+
+LC_ALL=C
+sed -n -e 's/^[0]\+\(0[0-9a-fA-F]*\) . \(__vdso_[a-zA-Z0-9_]*\)$/\#define \2_offset\t0x\1/p'
diff --git a/arch/riscv/kernel/vdso/so2s.sh b/arch/riscv/kernel/vdso/so2s.sh
deleted file mode 100755
index e64cb6d9440e..000000000000
--- a/arch/riscv/kernel/vdso/so2s.sh
+++ /dev/null
@@ -1,6 +0,0 @@
-#!/bin/sh
-# SPDX-License-Identifier: GPL-2.0+
-# Copyright 2020 Palmer Dabbelt <palmerdabbelt@google.com>
-
-sed 's!\([0-9a-f]*\) T \([a-z0-9_]*\)\(@@LINUX_4.15\)*!.global \2\n.set \2,0x\1!' \
-| grep '^\.'
diff --git a/arch/riscv/kernel/vmlinux-xip.lds.S b/arch/riscv/kernel/vmlinux-xip.lds.S
index af776555ded9..9c9f35091ef0 100644
--- a/arch/riscv/kernel/vmlinux-xip.lds.S
+++ b/arch/riscv/kernel/vmlinux-xip.lds.S
@@ -121,7 +121,6 @@ SECTIONS
 	}
 
 	BSS_SECTION(PAGE_SIZE, PAGE_SIZE, 0)
-	EXCEPTION_TABLE(0x10)
 
 	.rel.dyn : AT(ADDR(.rel.dyn) - LOAD_OFFSET) {
 		*(.rel.dyn*)
diff --git a/arch/riscv/kernel/vmlinux.lds.S b/arch/riscv/kernel/vmlinux.lds.S
index 502d0826ecb1..5104f3a871e3 100644
--- a/arch/riscv/kernel/vmlinux.lds.S
+++ b/arch/riscv/kernel/vmlinux.lds.S
@@ -4,6 +4,8 @@
  * Copyright (C) 2017 SiFive
  */
 
+#define RO_EXCEPTION_TABLE_ALIGN	16
+
 #ifdef CONFIG_XIP_KERNEL
 #include "vmlinux-xip.lds.S"
 #else
@@ -112,8 +114,6 @@ SECTIONS
 		*(.srodata*)
 	}
 
-	EXCEPTION_TABLE(0x10)
-
 	. = ALIGN(SECTION_ALIGN);
 	_data = .;
 
diff --git a/arch/riscv/mm/init.c b/arch/riscv/mm/init.c
index 93720b015bb6..c0cddf0fc22d 100644
--- a/arch/riscv/mm/init.c
+++ b/arch/riscv/mm/init.c
@@ -19,6 +19,7 @@
 #include <linux/set_memory.h>
 #include <linux/dma-map-ops.h>
 #include <linux/crash_dump.h>
+#include <linux/hugetlb.h>
 
 #include <asm/fixmap.h>
 #include <asm/tlbflush.h>
@@ -222,6 +223,8 @@ static void __init setup_bootmem(void)
 
 	early_init_fdt_scan_reserved_mem();
 	dma_contiguous_reserve(dma32_phys_limit);
+	if (IS_ENABLED(CONFIG_64BIT))
+		hugetlb_cma_reserve(PUD_SHIFT - PAGE_SHIFT);
 	memblock_allow_resize();
 }
 
@@ -234,14 +237,15 @@ static struct pt_alloc_ops _pt_ops __initdata;
 #define pt_ops _pt_ops
 #endif
 
-unsigned long pfn_base __ro_after_init;
-EXPORT_SYMBOL(pfn_base);
+unsigned long riscv_pfn_base __ro_after_init;
+EXPORT_SYMBOL(riscv_pfn_base);
 
 pgd_t swapper_pg_dir[PTRS_PER_PGD] __page_aligned_bss;
 pgd_t trampoline_pg_dir[PTRS_PER_PGD] __page_aligned_bss;
 static pte_t fixmap_pte[PTRS_PER_PTE] __page_aligned_bss;
 
 pgd_t early_pg_dir[PTRS_PER_PGD] __initdata __aligned(PAGE_SIZE);
+static pmd_t __maybe_unused early_dtb_pmd[PTRS_PER_PMD] __initdata __aligned(PAGE_SIZE);
 
 #ifdef CONFIG_XIP_KERNEL
 #define trampoline_pg_dir      ((pgd_t *)XIP_FIXUP(trampoline_pg_dir))
@@ -322,7 +326,6 @@ static void __init create_pte_mapping(pte_t *ptep,
 static pmd_t trampoline_pmd[PTRS_PER_PMD] __page_aligned_bss;
 static pmd_t fixmap_pmd[PTRS_PER_PMD] __page_aligned_bss;
 static pmd_t early_pmd[PTRS_PER_PMD] __initdata __aligned(PAGE_SIZE);
-static pmd_t early_dtb_pmd[PTRS_PER_PMD] __initdata __aligned(PAGE_SIZE);
 
 #ifdef CONFIG_XIP_KERNEL
 #define trampoline_pmd ((pmd_t *)XIP_FIXUP(trampoline_pmd))
@@ -408,6 +411,7 @@ static void __init create_pmd_mapping(pmd_t *pmdp,
 #define create_pgd_next_mapping(__nextp, __va, __pa, __sz, __prot)	\
 	create_pte_mapping(__nextp, __va, __pa, __sz, __prot)
 #define fixmap_pgd_next		fixmap_pte
+#define create_pmd_mapping(__pmdp, __va, __pa, __sz, __prot)
 #endif
 
 void __init create_pgd_mapping(pgd_t *pgdp,
@@ -515,49 +519,80 @@ static __init pgprot_t pgprot_from_va(uintptr_t va)
 #endif
 
 #ifdef CONFIG_XIP_KERNEL
-static void __init create_kernel_page_table(pgd_t *pgdir, uintptr_t map_size,
+static void __init create_kernel_page_table(pgd_t *pgdir,
 					    __always_unused bool early)
 {
 	uintptr_t va, end_va;
 
 	/* Map the flash resident part */
 	end_va = kernel_map.virt_addr + kernel_map.xiprom_sz;
-	for (va = kernel_map.virt_addr; va < end_va; va += map_size)
+	for (va = kernel_map.virt_addr; va < end_va; va += PMD_SIZE)
 		create_pgd_mapping(pgdir, va,
 				   kernel_map.xiprom + (va - kernel_map.virt_addr),
-				   map_size, PAGE_KERNEL_EXEC);
+				   PMD_SIZE, PAGE_KERNEL_EXEC);
 
 	/* Map the data in RAM */
 	end_va = kernel_map.virt_addr + XIP_OFFSET + kernel_map.size;
-	for (va = kernel_map.virt_addr + XIP_OFFSET; va < end_va; va += map_size)
+	for (va = kernel_map.virt_addr + XIP_OFFSET; va < end_va; va += PMD_SIZE)
 		create_pgd_mapping(pgdir, va,
 				   kernel_map.phys_addr + (va - (kernel_map.virt_addr + XIP_OFFSET)),
-				   map_size, PAGE_KERNEL);
+				   PMD_SIZE, PAGE_KERNEL);
 }
 #else
-static void __init create_kernel_page_table(pgd_t *pgdir, uintptr_t map_size,
-					    bool early)
+static void __init create_kernel_page_table(pgd_t *pgdir, bool early)
 {
 	uintptr_t va, end_va;
 
 	end_va = kernel_map.virt_addr + kernel_map.size;
-	for (va = kernel_map.virt_addr; va < end_va; va += map_size)
+	for (va = kernel_map.virt_addr; va < end_va; va += PMD_SIZE)
 		create_pgd_mapping(pgdir, va,
 				   kernel_map.phys_addr + (va - kernel_map.virt_addr),
-				   map_size,
+				   PMD_SIZE,
 				   early ?
 					PAGE_KERNEL_EXEC : pgprot_from_va(va));
 }
 #endif
 
-asmlinkage void __init setup_vm(uintptr_t dtb_pa)
+/*
+ * Setup a 4MB mapping that encompasses the device tree: for 64-bit kernel,
+ * this means 2 PMD entries whereas for 32-bit kernel, this is only 1 PGDIR
+ * entry.
+ */
+static void __init create_fdt_early_page_table(pgd_t *pgdir, uintptr_t dtb_pa)
 {
-	uintptr_t __maybe_unused pa;
-	uintptr_t map_size;
-#ifndef __PAGETABLE_PMD_FOLDED
-	pmd_t fix_bmap_spmd, fix_bmap_epmd;
+#ifndef CONFIG_BUILTIN_DTB
+	uintptr_t pa = dtb_pa & ~(PMD_SIZE - 1);
+
+	create_pgd_mapping(early_pg_dir, DTB_EARLY_BASE_VA,
+			   IS_ENABLED(CONFIG_64BIT) ? (uintptr_t)early_dtb_pmd : pa,
+			   PGDIR_SIZE,
+			   IS_ENABLED(CONFIG_64BIT) ? PAGE_TABLE : PAGE_KERNEL);
+
+	if (IS_ENABLED(CONFIG_64BIT)) {
+		create_pmd_mapping(early_dtb_pmd, DTB_EARLY_BASE_VA,
+				   pa, PMD_SIZE, PAGE_KERNEL);
+		create_pmd_mapping(early_dtb_pmd, DTB_EARLY_BASE_VA + PMD_SIZE,
+				   pa + PMD_SIZE, PMD_SIZE, PAGE_KERNEL);
+	}
+
+	dtb_early_va = (void *)DTB_EARLY_BASE_VA + (dtb_pa & (PMD_SIZE - 1));
+#else
+	/*
+	 * For 64-bit kernel, __va can't be used since it would return a linear
+	 * mapping address whereas dtb_early_va will be used before
+	 * setup_vm_final installs the linear mapping. For 32-bit kernel, as the
+	 * kernel is mapped in the linear mapping, that makes no difference.
+	 */
+	dtb_early_va = kernel_mapping_pa_to_va(XIP_FIXUP(dtb_pa));
 #endif
 
+	dtb_early_pa = dtb_pa;
+}
+
+asmlinkage void __init setup_vm(uintptr_t dtb_pa)
+{
+	pmd_t __maybe_unused fix_bmap_spmd, fix_bmap_epmd;
+
 	kernel_map.virt_addr = KERNEL_LINK_ADDR;
 
 #ifdef CONFIG_XIP_KERNEL
@@ -573,23 +608,14 @@ asmlinkage void __init setup_vm(uintptr_t dtb_pa)
 	kernel_map.phys_addr = (uintptr_t)(&_start);
 	kernel_map.size = (uintptr_t)(&_end) - kernel_map.phys_addr;
 #endif
-
 	kernel_map.va_pa_offset = PAGE_OFFSET - kernel_map.phys_addr;
-#ifdef CONFIG_64BIT
 	kernel_map.va_kernel_pa_offset = kernel_map.virt_addr - kernel_map.phys_addr;
-#endif
 
-	pfn_base = PFN_DOWN(kernel_map.phys_addr);
-
-	/*
-	 * Enforce boot alignment requirements of RV32 and
-	 * RV64 by only allowing PMD or PGD mappings.
-	 */
-	map_size = PMD_SIZE;
+	riscv_pfn_base = PFN_DOWN(kernel_map.phys_addr);
 
 	/* Sanity check alignment and size */
 	BUG_ON((PAGE_OFFSET % PGDIR_SIZE) != 0);
-	BUG_ON((kernel_map.phys_addr % map_size) != 0);
+	BUG_ON((kernel_map.phys_addr % PMD_SIZE) != 0);
 
 #ifdef CONFIG_64BIT
 	/*
@@ -634,50 +660,10 @@ asmlinkage void __init setup_vm(uintptr_t dtb_pa)
 	 * us to reach paging_init(). We map all memory banks later
 	 * in setup_vm_final() below.
 	 */
-	create_kernel_page_table(early_pg_dir, map_size, true);
+	create_kernel_page_table(early_pg_dir, true);
 
-#ifndef __PAGETABLE_PMD_FOLDED
-	/* Setup early PMD for DTB */
-	create_pgd_mapping(early_pg_dir, DTB_EARLY_BASE_VA,
-			   (uintptr_t)early_dtb_pmd, PGDIR_SIZE, PAGE_TABLE);
-#ifndef CONFIG_BUILTIN_DTB
-	/* Create two consecutive PMD mappings for FDT early scan */
-	pa = dtb_pa & ~(PMD_SIZE - 1);
-	create_pmd_mapping(early_dtb_pmd, DTB_EARLY_BASE_VA,
-			   pa, PMD_SIZE, PAGE_KERNEL);
-	create_pmd_mapping(early_dtb_pmd, DTB_EARLY_BASE_VA + PMD_SIZE,
-			   pa + PMD_SIZE, PMD_SIZE, PAGE_KERNEL);
-	dtb_early_va = (void *)DTB_EARLY_BASE_VA + (dtb_pa & (PMD_SIZE - 1));
-#else /* CONFIG_BUILTIN_DTB */
-#ifdef CONFIG_64BIT
-	/*
-	 * __va can't be used since it would return a linear mapping address
-	 * whereas dtb_early_va will be used before setup_vm_final installs
-	 * the linear mapping.
-	 */
-	dtb_early_va = kernel_mapping_pa_to_va(XIP_FIXUP(dtb_pa));
-#else
-	dtb_early_va = __va(dtb_pa);
-#endif /* CONFIG_64BIT */
-#endif /* CONFIG_BUILTIN_DTB */
-#else
-#ifndef CONFIG_BUILTIN_DTB
-	/* Create two consecutive PGD mappings for FDT early scan */
-	pa = dtb_pa & ~(PGDIR_SIZE - 1);
-	create_pgd_mapping(early_pg_dir, DTB_EARLY_BASE_VA,
-			   pa, PGDIR_SIZE, PAGE_KERNEL);
-	create_pgd_mapping(early_pg_dir, DTB_EARLY_BASE_VA + PGDIR_SIZE,
-			   pa + PGDIR_SIZE, PGDIR_SIZE, PAGE_KERNEL);
-	dtb_early_va = (void *)DTB_EARLY_BASE_VA + (dtb_pa & (PGDIR_SIZE - 1));
-#else /* CONFIG_BUILTIN_DTB */
-#ifdef CONFIG_64BIT
-	dtb_early_va = kernel_mapping_pa_to_va(XIP_FIXUP(dtb_pa));
-#else
-	dtb_early_va = __va(dtb_pa);
-#endif /* CONFIG_64BIT */
-#endif /* CONFIG_BUILTIN_DTB */
-#endif
-	dtb_early_pa = dtb_pa;
+	/* Setup early mapping for FDT early scan */
+	create_fdt_early_page_table(early_pg_dir, dtb_pa);
 
 	/*
 	 * Bootime fixmap only can handle PMD_SIZE mapping. Thus, boot-ioremap
@@ -752,7 +738,7 @@ static void __init setup_vm_final(void)
 
 #ifdef CONFIG_64BIT
 	/* Map the kernel */
-	create_kernel_page_table(swapper_pg_dir, PMD_SIZE, false);
+	create_kernel_page_table(swapper_pg_dir, false);
 #endif
 
 	/* Clear fixmap PTE and PMD mappings */
@@ -819,38 +805,22 @@ static void __init reserve_crashkernel(void)
 
 	crash_size = PAGE_ALIGN(crash_size);
 
-	if (crash_base == 0) {
-		/*
-		 * Current riscv boot protocol requires 2MB alignment for
-		 * RV64 and 4MB alignment for RV32 (hugepage size)
-		 */
-		crash_base = memblock_find_in_range(search_start, search_end,
-						    crash_size, PMD_SIZE);
-
-		if (crash_base == 0) {
-			pr_warn("crashkernel: couldn't allocate %lldKB\n",
-				crash_size >> 10);
-			return;
-		}
-	} else {
-		/* User specifies base address explicitly. */
-		if (!memblock_is_region_memory(crash_base, crash_size)) {
-			pr_warn("crashkernel: requested region is not memory\n");
-			return;
-		}
-
-		if (memblock_is_region_reserved(crash_base, crash_size)) {
-			pr_warn("crashkernel: requested region is reserved\n");
-			return;
-		}
-
+	if (crash_base) {
+		search_start = crash_base;
+		search_end = crash_base + crash_size;
+	}
 
-		if (!IS_ALIGNED(crash_base, PMD_SIZE)) {
-			pr_warn("crashkernel: requested region is misaligned\n");
-			return;
-		}
+	/*
+	 * Current riscv boot protocol requires 2MB alignment for
+	 * RV64 and 4MB alignment for RV32 (hugepage size)
+	 */
+	crash_base = memblock_phys_alloc_range(crash_size, PMD_SIZE,
+					       search_start, search_end);
+	if (crash_base == 0) {
+		pr_warn("crashkernel: couldn't allocate %lldKB\n",
+			crash_size >> 10);
+		return;
 	}
-	memblock_reserve(crash_base, crash_size);
 
 	pr_info("crashkernel: reserved 0x%016llx - 0x%016llx (%lld MB)\n",
 		crash_base, crash_base + crash_size, crash_size >> 20);
diff --git a/arch/s390/Kconfig b/arch/s390/Kconfig
index 92c0a1b4c528..2bd90c51efd3 100644
--- a/arch/s390/Kconfig
+++ b/arch/s390/Kconfig
@@ -110,6 +110,7 @@ config S390
 	select ARCH_STACKWALK
 	select ARCH_SUPPORTS_ATOMIC_RMW
 	select ARCH_SUPPORTS_DEBUG_PAGEALLOC
+	select ARCH_SUPPORTS_HUGETLBFS
 	select ARCH_SUPPORTS_NUMA_BALANCING
 	select ARCH_USE_BUILTIN_BSWAP
 	select ARCH_USE_CMPXCHG_LOCKREF
@@ -209,6 +210,7 @@ config S390
 	select SWIOTLB
 	select SYSCTL_EXCEPTION_TRACE
 	select THREAD_INFO_IN_TASK
+	select TRACE_IRQFLAGS_SUPPORT
 	select TTY
 	select VIRT_CPU_ACCOUNTING
 	select ZONE_DMA
diff --git a/arch/s390/Kconfig.debug b/arch/s390/Kconfig.debug
index 9ea6e61d5858..e94a2a7f6bf4 100644
--- a/arch/s390/Kconfig.debug
+++ b/arch/s390/Kconfig.debug
@@ -1,8 +1,5 @@
 # SPDX-License-Identifier: GPL-2.0
 
-config TRACE_IRQFLAGS_SUPPORT
-	def_bool y
-
 config EARLY_PRINTK
 	def_bool y
 
diff --git a/arch/s390/Makefile b/arch/s390/Makefile
index 17dc4f1ac4fa..a3cf33ad009f 100644
--- a/arch/s390/Makefile
+++ b/arch/s390/Makefile
@@ -70,7 +70,7 @@ cflags-y += -Wa,-I$(srctree)/arch/$(ARCH)/include
 #
 cflags-$(CONFIG_FRAME_POINTER) += -fno-optimize-sibling-calls
 
-ifeq ($(call cc-option-yn,-mpacked-stack -mbackchain -msoft-float),y)
+ifneq ($(call cc-option,-mpacked-stack -mbackchain -msoft-float),)
 cflags-$(CONFIG_PACK_STACK)  += -mpacked-stack -D__PACK_STACK
 aflags-$(CONFIG_PACK_STACK)  += -D__PACK_STACK
 endif
@@ -78,22 +78,22 @@ endif
 KBUILD_AFLAGS_DECOMPRESSOR += $(aflags-y)
 KBUILD_CFLAGS_DECOMPRESSOR += $(cflags-y)
 
-ifeq ($(call cc-option-yn,-mstack-size=8192 -mstack-guard=128),y)
+ifneq ($(call cc-option,-mstack-size=8192 -mstack-guard=128),)
 cflags-$(CONFIG_CHECK_STACK) += -mstack-size=$(STACK_SIZE)
-ifneq ($(call cc-option-yn,-mstack-size=8192),y)
+ifeq ($(call cc-option,-mstack-size=8192),)
 cflags-$(CONFIG_CHECK_STACK) += -mstack-guard=$(CONFIG_STACK_GUARD)
 endif
 endif
 
 ifdef CONFIG_WARN_DYNAMIC_STACK
-  ifeq ($(call cc-option-yn,-mwarn-dynamicstack),y)
+  ifneq ($(call cc-option,-mwarn-dynamicstack),)
     KBUILD_CFLAGS += -mwarn-dynamicstack
     KBUILD_CFLAGS_DECOMPRESSOR += -mwarn-dynamicstack
   endif
 endif
 
 ifdef CONFIG_EXPOLINE
-  ifeq ($(call cc-option-yn,$(CC_FLAGS_MARCH) -mindirect-branch=thunk),y)
+  ifneq ($(call cc-option,$(CC_FLAGS_MARCH) -mindirect-branch=thunk),)
     CC_FLAGS_EXPOLINE := -mindirect-branch=thunk
     CC_FLAGS_EXPOLINE += -mfunction-return=thunk
     CC_FLAGS_EXPOLINE += -mindirect-branch-table
@@ -104,10 +104,10 @@ ifdef CONFIG_EXPOLINE
 endif
 
 ifdef CONFIG_FUNCTION_TRACER
-  ifeq ($(call cc-option-yn,-mfentry -mnop-mcount),n)
+  ifeq ($(call cc-option,-mfentry -mnop-mcount),)
     # make use of hotpatch feature if the compiler supports it
     cc_hotpatch	:= -mhotpatch=0,3
-    ifeq ($(call cc-option-yn,$(cc_hotpatch)),y)
+    ifneq ($(call cc-option,$(cc_hotpatch)),)
       CC_FLAGS_FTRACE := $(cc_hotpatch)
       KBUILD_AFLAGS	+= -DCC_USING_HOTPATCH
       KBUILD_CFLAGS	+= -DCC_USING_HOTPATCH
diff --git a/arch/s390/boot/pgm_check_info.c b/arch/s390/boot/pgm_check_info.c
index 209f6ae5a197..75bcbfa27941 100644
--- a/arch/s390/boot/pgm_check_info.c
+++ b/arch/s390/boot/pgm_check_info.c
@@ -1,5 +1,6 @@
 // SPDX-License-Identifier: GPL-2.0
 #include <linux/kernel.h>
+#include <linux/stdarg.h>
 #include <linux/string.h>
 #include <linux/ctype.h>
 #include <asm/stacktrace.h>
@@ -8,7 +9,6 @@
 #include <asm/setup.h>
 #include <asm/sclp.h>
 #include <asm/uv.h>
-#include <stdarg.h>
 #include "boot.h"
 
 const char hex_asc[] = "0123456789abcdef";
diff --git a/arch/s390/configs/debug_defconfig b/arch/s390/configs/debug_defconfig
index 11ffc7c37ada..37b6115ed80e 100644
--- a/arch/s390/configs/debug_defconfig
+++ b/arch/s390/configs/debug_defconfig
@@ -804,6 +804,7 @@ CONFIG_DEBUG_VM_PGFLAGS=y
 CONFIG_DEBUG_MEMORY_INIT=y
 CONFIG_MEMORY_NOTIFIER_ERROR_INJECT=m
 CONFIG_DEBUG_PER_CPU_MAPS=y
+CONFIG_KFENCE=y
 CONFIG_DEBUG_SHIRQ=y
 CONFIG_PANIC_ON_OOPS=y
 CONFIG_DETECT_HUNG_TASK=y
diff --git a/arch/s390/configs/defconfig b/arch/s390/configs/defconfig
index e1642d2cba59..56a1cc85c5d7 100644
--- a/arch/s390/configs/defconfig
+++ b/arch/s390/configs/defconfig
@@ -397,7 +397,6 @@ CONFIG_BLK_DEV_DRBD=m
 CONFIG_BLK_DEV_NBD=m
 CONFIG_BLK_DEV_RAM=y
 CONFIG_BLK_DEV_RAM_SIZE=32768
-# CONFIG_BLK_DEV_XPRAM is not set
 CONFIG_VIRTIO_BLK=y
 CONFIG_BLK_DEV_RBD=m
 CONFIG_BLK_DEV_NVME=m
diff --git a/arch/s390/configs/zfcpdump_defconfig b/arch/s390/configs/zfcpdump_defconfig
index d576aaab27c9..aceccf3b9a88 100644
--- a/arch/s390/configs/zfcpdump_defconfig
+++ b/arch/s390/configs/zfcpdump_defconfig
@@ -35,7 +35,6 @@ CONFIG_NET=y
 # CONFIG_ETHTOOL_NETLINK is not set
 CONFIG_DEVTMPFS=y
 CONFIG_BLK_DEV_RAM=y
-# CONFIG_BLK_DEV_XPRAM is not set
 # CONFIG_DCSSBLK is not set
 # CONFIG_DASD is not set
 CONFIG_ENCLOSURE_SERVICES=y
diff --git a/arch/s390/include/asm/compat.h b/arch/s390/include/asm/compat.h
index 8d49505b4a43..cdc7ae72529d 100644
--- a/arch/s390/include/asm/compat.h
+++ b/arch/s390/include/asm/compat.h
@@ -176,16 +176,6 @@ static inline int is_compat_task(void)
 	return test_thread_flag(TIF_31BIT);
 }
 
-static inline void __user *arch_compat_alloc_user_space(long len)
-{
-	unsigned long stack;
-
-	stack = KSTK_ESP(current);
-	if (is_compat_task())
-		stack &= 0x7fffffffUL;
-	return (void __user *) (stack - len);
-}
-
 #endif
 
 struct compat_ipc64_perm {
diff --git a/arch/s390/include/asm/cpu_mcf.h b/arch/s390/include/asm/cpu_mcf.h
index ca0e0e5ddbc4..f87a4788c19c 100644
--- a/arch/s390/include/asm/cpu_mcf.h
+++ b/arch/s390/include/asm/cpu_mcf.h
@@ -24,13 +24,6 @@ enum cpumf_ctr_set {
 
 #define CPUMF_LCCTL_ENABLE_SHIFT    16
 #define CPUMF_LCCTL_ACTCTL_SHIFT     0
-static const u64 cpumf_ctr_ctl[CPUMF_CTR_SET_MAX] = {
-	[CPUMF_CTR_SET_BASIC]	= 0x02,
-	[CPUMF_CTR_SET_USER]	= 0x04,
-	[CPUMF_CTR_SET_CRYPTO]	= 0x08,
-	[CPUMF_CTR_SET_EXT]	= 0x01,
-	[CPUMF_CTR_SET_MT_DIAG] = 0x20,
-};
 
 static inline void ctr_set_enable(u64 *state, u64 ctrsets)
 {
diff --git a/arch/s390/include/asm/kvm_host.h b/arch/s390/include/asm/kvm_host.h
index 161a9e12bfb8..a604d51acfc8 100644
--- a/arch/s390/include/asm/kvm_host.h
+++ b/arch/s390/include/asm/kvm_host.h
@@ -244,6 +244,7 @@ struct kvm_s390_sie_block {
 	__u8	fpf;			/* 0x0060 */
 #define ECB_GS		0x40
 #define ECB_TE		0x10
+#define ECB_SPECI	0x08
 #define ECB_SRSI	0x04
 #define ECB_HOSTPROTINT	0x02
 	__u8	ecb;			/* 0x0061 */
@@ -798,14 +799,12 @@ struct kvm_s390_cpu_model {
 	unsigned short ibc;
 };
 
-struct kvm_s390_module_hook {
-	int (*hook)(struct kvm_vcpu *vcpu);
-	struct module *owner;
-};
+typedef int (*crypto_hook)(struct kvm_vcpu *vcpu);
 
 struct kvm_s390_crypto {
 	struct kvm_s390_crypto_cb *crycb;
-	struct kvm_s390_module_hook *pqap_hook;
+	struct rw_semaphore pqap_hook_rwsem;
+	crypto_hook *pqap_hook;
 	__u32 crycbd;
 	__u8 aes_kw;
 	__u8 dea_kw;
@@ -957,6 +956,7 @@ struct kvm_arch{
 	atomic64_t cmma_dirty_pages;
 	/* subset of available cpu features enabled by user space */
 	DECLARE_BITMAP(cpu_feat, KVM_S390_VM_CPU_FEAT_NR_BITS);
+	/* indexed by vcpu_idx */
 	DECLARE_BITMAP(idle_mask, KVM_MAX_VCPUS);
 	struct kvm_s390_gisa_interrupt gisa_int;
 	struct kvm_s390_pv pv;
diff --git a/arch/s390/include/asm/smp.h b/arch/s390/include/asm/smp.h
index e317fd4866c1..f16f4d054ae2 100644
--- a/arch/s390/include/asm/smp.h
+++ b/arch/s390/include/asm/smp.h
@@ -18,6 +18,7 @@ extern struct mutex smp_cpu_state_mutex;
 extern unsigned int smp_cpu_mt_shift;
 extern unsigned int smp_cpu_mtid;
 extern __vector128 __initdata boot_cpu_vector_save_area[__NUM_VXRS];
+extern cpumask_t cpu_setup_mask;
 
 extern int __cpu_up(unsigned int cpu, struct task_struct *tidle);
 
diff --git a/arch/s390/include/asm/stacktrace.h b/arch/s390/include/asm/stacktrace.h
index 3d8a4b94c620..dd00d98804ec 100644
--- a/arch/s390/include/asm/stacktrace.h
+++ b/arch/s390/include/asm/stacktrace.h
@@ -34,16 +34,6 @@ static inline bool on_stack(struct stack_info *info,
 	return addr >= info->begin && addr + len <= info->end;
 }
 
-static __always_inline unsigned long get_stack_pointer(struct task_struct *task,
-						       struct pt_regs *regs)
-{
-	if (regs)
-		return (unsigned long) kernel_stack_pointer(regs);
-	if (task == current)
-		return current_stack_pointer();
-	return (unsigned long) task->thread.ksp;
-}
-
 /*
  * Stack layout of a C stack frame.
  */
@@ -74,6 +64,16 @@ struct stack_frame {
 	((unsigned long)__builtin_frame_address(0) -			\
 	 offsetof(struct stack_frame, back_chain))
 
+static __always_inline unsigned long get_stack_pointer(struct task_struct *task,
+						       struct pt_regs *regs)
+{
+	if (regs)
+		return (unsigned long)kernel_stack_pointer(regs);
+	if (task == current)
+		return current_frame_address();
+	return (unsigned long)task->thread.ksp;
+}
+
 /*
  * To keep this simple mark register 2-6 as being changed (volatile)
  * by the called function, even though register 6 is saved/nonvolatile.
diff --git a/arch/s390/include/asm/uaccess.h b/arch/s390/include/asm/uaccess.h
index 9ed9aa37e836..ce550d06abc3 100644
--- a/arch/s390/include/asm/uaccess.h
+++ b/arch/s390/include/asm/uaccess.h
@@ -227,9 +227,6 @@ static inline int __get_user_fn(void *x, const void __user *ptr, unsigned long s
 	__get_user(x, ptr);					\
 })
 
-unsigned long __must_check
-raw_copy_in_user(void __user *to, const void __user *from, unsigned long n);
-
 /*
  * Copy a null terminated string from userspace.
  */
diff --git a/arch/s390/include/asm/unwind.h b/arch/s390/include/asm/unwind.h
index de9006b0cfeb..5ebf534ef753 100644
--- a/arch/s390/include/asm/unwind.h
+++ b/arch/s390/include/asm/unwind.h
@@ -55,10 +55,10 @@ static inline bool unwind_error(struct unwind_state *state)
 	return state->error;
 }
 
-static inline void unwind_start(struct unwind_state *state,
-				struct task_struct *task,
-				struct pt_regs *regs,
-				unsigned long first_frame)
+static __always_inline void unwind_start(struct unwind_state *state,
+					 struct task_struct *task,
+					 struct pt_regs *regs,
+					 unsigned long first_frame)
 {
 	task = task ?: current;
 	first_frame = first_frame ?: get_stack_pointer(task, regs);
diff --git a/arch/s390/kernel/entry.S b/arch/s390/kernel/entry.S
index b9716a7e326d..4c9b967290ae 100644
--- a/arch/s390/kernel/entry.S
+++ b/arch/s390/kernel/entry.S
@@ -140,10 +140,10 @@ _LPP_OFFSET	= __LC_LPP
 	TSTMSK	__LC_MCCK_CODE,(MCCK_CODE_STG_ERROR|MCCK_CODE_STG_KEY_ERROR)
 	jnz	\errlabel
 	TSTMSK	__LC_MCCK_CODE,MCCK_CODE_STG_DEGRAD
-	jz	oklabel\@
+	jz	.Loklabel\@
 	TSTMSK	__LC_MCCK_CODE,MCCK_CODE_STG_FAIL_ADDR
 	jnz	\errlabel
-oklabel\@:
+.Loklabel\@:
 	.endm
 
 #if IS_ENABLED(CONFIG_KVM)
diff --git a/arch/s390/kernel/ftrace.c b/arch/s390/kernel/ftrace.c
index 0a464d328467..1d94ffdf347b 100644
--- a/arch/s390/kernel/ftrace.c
+++ b/arch/s390/kernel/ftrace.c
@@ -341,13 +341,13 @@ NOKPROBE_SYMBOL(prepare_ftrace_return);
  */
 int ftrace_enable_ftrace_graph_caller(void)
 {
-	brcl_disable(__va(ftrace_graph_caller));
+	brcl_disable(ftrace_graph_caller);
 	return 0;
 }
 
 int ftrace_disable_ftrace_graph_caller(void)
 {
-	brcl_enable(__va(ftrace_graph_caller));
+	brcl_enable(ftrace_graph_caller);
 	return 0;
 }
 
diff --git a/arch/s390/kernel/perf_cpum_cf.c b/arch/s390/kernel/perf_cpum_cf.c
index 2e3bb633acf6..4a99154fe651 100644
--- a/arch/s390/kernel/perf_cpum_cf.c
+++ b/arch/s390/kernel/perf_cpum_cf.c
@@ -158,6 +158,14 @@ static size_t cfdiag_getctrset(struct cf_ctrset_entry *ctrdata, int ctrset,
 	return need;
 }
 
+static const u64 cpumf_ctr_ctl[CPUMF_CTR_SET_MAX] = {
+	[CPUMF_CTR_SET_BASIC]	= 0x02,
+	[CPUMF_CTR_SET_USER]	= 0x04,
+	[CPUMF_CTR_SET_CRYPTO]	= 0x08,
+	[CPUMF_CTR_SET_EXT]	= 0x01,
+	[CPUMF_CTR_SET_MT_DIAG] = 0x20,
+};
+
 /* Read out all counter sets and save them in the provided data buffer.
  * The last 64 byte host an artificial trailer entry.
  */
diff --git a/arch/s390/kernel/setup.c b/arch/s390/kernel/setup.c
index fe14beb338e5..67e5fff96ee0 100644
--- a/arch/s390/kernel/setup.c
+++ b/arch/s390/kernel/setup.c
@@ -50,6 +50,7 @@
 #include <linux/compat.h>
 #include <linux/start_kernel.h>
 #include <linux/hugetlb.h>
+#include <linux/kmemleak.h>
 
 #include <asm/boot_data.h>
 #include <asm/ipl.h>
@@ -356,9 +357,12 @@ void *restart_stack;
 unsigned long stack_alloc(void)
 {
 #ifdef CONFIG_VMAP_STACK
-	return (unsigned long)__vmalloc_node(THREAD_SIZE, THREAD_SIZE,
-			THREADINFO_GFP, NUMA_NO_NODE,
-			__builtin_return_address(0));
+	void *ret;
+
+	ret = __vmalloc_node(THREAD_SIZE, THREAD_SIZE, THREADINFO_GFP,
+			     NUMA_NO_NODE, __builtin_return_address(0));
+	kmemleak_not_leak(ret);
+	return (unsigned long)ret;
 #else
 	return __get_free_pages(GFP_KERNEL, THREAD_SIZE_ORDER);
 #endif
@@ -677,8 +681,9 @@ static void __init reserve_crashkernel(void)
 			return;
 		}
 		low = crash_base ?: low;
-		crash_base = memblock_find_in_range(low, high, crash_size,
-						    KEXEC_CRASH_MEM_ALIGN);
+		crash_base = memblock_phys_alloc_range(crash_size,
+						       KEXEC_CRASH_MEM_ALIGN,
+						       low, high);
 	}
 
 	if (!crash_base) {
@@ -687,8 +692,10 @@ static void __init reserve_crashkernel(void)
 		return;
 	}
 
-	if (register_memory_notifier(&kdump_mem_nb))
+	if (register_memory_notifier(&kdump_mem_nb)) {
+		memblock_free(crash_base, crash_size);
 		return;
+	}
 
 	if (!oldmem_data.start && MACHINE_IS_VM)
 		diag10_range(PFN_DOWN(crash_base), PFN_DOWN(crash_size));
diff --git a/arch/s390/kernel/smp.c b/arch/s390/kernel/smp.c
index 2a991e43ead3..1a04e5bdf655 100644
--- a/arch/s390/kernel/smp.c
+++ b/arch/s390/kernel/smp.c
@@ -95,6 +95,7 @@ __vector128 __initdata boot_cpu_vector_save_area[__NUM_VXRS];
 #endif
 
 static unsigned int smp_max_threads __initdata = -1U;
+cpumask_t cpu_setup_mask;
 
 static int __init early_nosmt(char *s)
 {
@@ -902,13 +903,14 @@ static void smp_start_secondary(void *cpuvoid)
 	vtime_init();
 	vdso_getcpu_init();
 	pfault_init();
+	cpumask_set_cpu(cpu, &cpu_setup_mask);
+	update_cpu_masks();
 	notify_cpu_starting(cpu);
 	if (topology_cpu_dedicated(cpu))
 		set_cpu_flag(CIF_DEDICATED_CPU);
 	else
 		clear_cpu_flag(CIF_DEDICATED_CPU);
 	set_cpu_online(cpu, true);
-	update_cpu_masks();
 	inc_irq_stat(CPU_RST);
 	local_irq_enable();
 	cpu_startup_entry(CPUHP_AP_ONLINE_IDLE);
@@ -950,10 +952,13 @@ early_param("possible_cpus", _setup_possible_cpus);
 int __cpu_disable(void)
 {
 	unsigned long cregs[16];
+	int cpu;
 
 	/* Handle possible pending IPIs */
 	smp_handle_ext_call();
-	set_cpu_online(smp_processor_id(), false);
+	cpu = smp_processor_id();
+	set_cpu_online(cpu, false);
+	cpumask_clear_cpu(cpu, &cpu_setup_mask);
 	update_cpu_masks();
 	/* Disable pseudo page faults on this cpu. */
 	pfault_fini();
diff --git a/arch/s390/kernel/syscalls/syscall.tbl b/arch/s390/kernel/syscalls/syscall.tbl
index aa705e1bd0dc..df5261e5cfe1 100644
--- a/arch/s390/kernel/syscalls/syscall.tbl
+++ b/arch/s390/kernel/syscalls/syscall.tbl
@@ -274,9 +274,9 @@
 265  common	statfs64		sys_statfs64			compat_sys_statfs64
 266  common	fstatfs64		sys_fstatfs64			compat_sys_fstatfs64
 267  common	remap_file_pages	sys_remap_file_pages		sys_remap_file_pages
-268  common	mbind			sys_mbind			compat_sys_mbind
-269  common	get_mempolicy		sys_get_mempolicy		compat_sys_get_mempolicy
-270  common	set_mempolicy		sys_set_mempolicy		compat_sys_set_mempolicy
+268  common	mbind			sys_mbind			sys_mbind
+269  common	get_mempolicy		sys_get_mempolicy		sys_get_mempolicy
+270  common	set_mempolicy		sys_set_mempolicy		sys_set_mempolicy
 271  common	mq_open			sys_mq_open			compat_sys_mq_open
 272  common	mq_unlink		sys_mq_unlink			sys_mq_unlink
 273  common	mq_timedsend		sys_mq_timedsend		sys_mq_timedsend_time32
@@ -293,7 +293,7 @@
 284  common	inotify_init		sys_inotify_init		sys_inotify_init
 285  common	inotify_add_watch	sys_inotify_add_watch		sys_inotify_add_watch
 286  common	inotify_rm_watch	sys_inotify_rm_watch		sys_inotify_rm_watch
-287  common	migrate_pages		sys_migrate_pages		compat_sys_migrate_pages
+287  common	migrate_pages		sys_migrate_pages		sys_migrate_pages
 288  common	openat			sys_openat			compat_sys_openat
 289  common	mkdirat			sys_mkdirat			sys_mkdirat
 290  common	mknodat			sys_mknodat			sys_mknodat
@@ -317,7 +317,7 @@
 307  common	sync_file_range		sys_sync_file_range		compat_sys_s390_sync_file_range
 308  common	tee			sys_tee				sys_tee
 309  common	vmsplice		sys_vmsplice			sys_vmsplice
-310  common	move_pages		sys_move_pages			compat_sys_move_pages
+310  common	move_pages		sys_move_pages			sys_move_pages
 311  common	getcpu			sys_getcpu			sys_getcpu
 312  common	epoll_pwait		sys_epoll_pwait			compat_sys_epoll_pwait
 313  common	utimes			sys_utimes			sys_utimes_time32
@@ -449,3 +449,5 @@
 444  common	landlock_create_ruleset	sys_landlock_create_ruleset	sys_landlock_create_ruleset
 445  common	landlock_add_rule	sys_landlock_add_rule		sys_landlock_add_rule
 446  common	landlock_restrict_self	sys_landlock_restrict_self	sys_landlock_restrict_self
+# 447 reserved for memfd_secret
+448  common	process_mrelease	sys_process_mrelease		sys_process_mrelease
diff --git a/arch/s390/kernel/topology.c b/arch/s390/kernel/topology.c
index d2458a29618f..58f8291950cb 100644
--- a/arch/s390/kernel/topology.c
+++ b/arch/s390/kernel/topology.c
@@ -67,7 +67,7 @@ static void cpu_group_map(cpumask_t *dst, struct mask_info *info, unsigned int c
 	static cpumask_t mask;
 
 	cpumask_clear(&mask);
-	if (!cpu_online(cpu))
+	if (!cpumask_test_cpu(cpu, &cpu_setup_mask))
 		goto out;
 	cpumask_set_cpu(cpu, &mask);
 	switch (topology_mode) {
@@ -88,7 +88,7 @@ static void cpu_group_map(cpumask_t *dst, struct mask_info *info, unsigned int c
 	case TOPOLOGY_MODE_SINGLE:
 		break;
 	}
-	cpumask_and(&mask, &mask, cpu_online_mask);
+	cpumask_and(&mask, &mask, &cpu_setup_mask);
 out:
 	cpumask_copy(dst, &mask);
 }
@@ -99,16 +99,16 @@ static void cpu_thread_map(cpumask_t *dst, unsigned int cpu)
 	int i;
 
 	cpumask_clear(&mask);
-	if (!cpu_online(cpu))
+	if (!cpumask_test_cpu(cpu, &cpu_setup_mask))
 		goto out;
 	cpumask_set_cpu(cpu, &mask);
 	if (topology_mode != TOPOLOGY_MODE_HW)
 		goto out;
 	cpu -= cpu % (smp_cpu_mtid + 1);
-	for (i = 0; i <= smp_cpu_mtid; i++)
-		if (cpu_present(cpu + i))
+	for (i = 0; i <= smp_cpu_mtid; i++) {
+		if (cpumask_test_cpu(cpu + i, &cpu_setup_mask))
 			cpumask_set_cpu(cpu + i, &mask);
-	cpumask_and(&mask, &mask, cpu_online_mask);
+	}
 out:
 	cpumask_copy(dst, &mask);
 }
@@ -569,6 +569,7 @@ void __init topology_init_early(void)
 	alloc_masks(info, &book_info, 2);
 	alloc_masks(info, &drawer_info, 3);
 out:
+	cpumask_set_cpu(0, &cpu_setup_mask);
 	__arch_update_cpu_topology();
 	__arch_update_dedicated_flag(NULL);
 }
diff --git a/arch/s390/kvm/interrupt.c b/arch/s390/kvm/interrupt.c
index d548d60caed2..16256e17a544 100644
--- a/arch/s390/kvm/interrupt.c
+++ b/arch/s390/kvm/interrupt.c
@@ -419,13 +419,13 @@ static unsigned long deliverable_irqs(struct kvm_vcpu *vcpu)
 static void __set_cpu_idle(struct kvm_vcpu *vcpu)
 {
 	kvm_s390_set_cpuflags(vcpu, CPUSTAT_WAIT);
-	set_bit(vcpu->vcpu_id, vcpu->kvm->arch.idle_mask);
+	set_bit(kvm_vcpu_get_idx(vcpu), vcpu->kvm->arch.idle_mask);
 }
 
 static void __unset_cpu_idle(struct kvm_vcpu *vcpu)
 {
 	kvm_s390_clear_cpuflags(vcpu, CPUSTAT_WAIT);
-	clear_bit(vcpu->vcpu_id, vcpu->kvm->arch.idle_mask);
+	clear_bit(kvm_vcpu_get_idx(vcpu), vcpu->kvm->arch.idle_mask);
 }
 
 static void __reset_intercept_indicators(struct kvm_vcpu *vcpu)
@@ -3050,18 +3050,18 @@ int kvm_s390_get_irq_state(struct kvm_vcpu *vcpu, __u8 __user *buf, int len)
 
 static void __airqs_kick_single_vcpu(struct kvm *kvm, u8 deliverable_mask)
 {
-	int vcpu_id, online_vcpus = atomic_read(&kvm->online_vcpus);
+	int vcpu_idx, online_vcpus = atomic_read(&kvm->online_vcpus);
 	struct kvm_s390_gisa_interrupt *gi = &kvm->arch.gisa_int;
 	struct kvm_vcpu *vcpu;
 
-	for_each_set_bit(vcpu_id, kvm->arch.idle_mask, online_vcpus) {
-		vcpu = kvm_get_vcpu(kvm, vcpu_id);
+	for_each_set_bit(vcpu_idx, kvm->arch.idle_mask, online_vcpus) {
+		vcpu = kvm_get_vcpu(kvm, vcpu_idx);
 		if (psw_ioint_disabled(vcpu))
 			continue;
 		deliverable_mask &= (u8)(vcpu->arch.sie_block->gcr[6] >> 24);
 		if (deliverable_mask) {
 			/* lately kicked but not yet running */
-			if (test_and_set_bit(vcpu_id, gi->kicked_mask))
+			if (test_and_set_bit(vcpu_idx, gi->kicked_mask))
 				return;
 			kvm_s390_vcpu_wakeup(vcpu);
 			return;
diff --git a/arch/s390/kvm/kvm-s390.c b/arch/s390/kvm/kvm-s390.c
index 4527ac7b5961..752a0ffab9bf 100644
--- a/arch/s390/kvm/kvm-s390.c
+++ b/arch/s390/kvm/kvm-s390.c
@@ -66,8 +66,6 @@ const struct _kvm_stats_desc kvm_vm_stats_desc[] = {
 	STATS_DESC_COUNTER(VM, inject_service_signal),
 	STATS_DESC_COUNTER(VM, inject_virtio)
 };
-static_assert(ARRAY_SIZE(kvm_vm_stats_desc) ==
-		sizeof(struct kvm_vm_stat) / sizeof(u64));
 
 const struct kvm_stats_header kvm_vm_stats_header = {
 	.name_size = KVM_STATS_NAME_SIZE,
@@ -174,8 +172,6 @@ const struct _kvm_stats_desc kvm_vcpu_stats_desc[] = {
 	STATS_DESC_COUNTER(VCPU, instruction_diagnose_other),
 	STATS_DESC_COUNTER(VCPU, pfault_sync)
 };
-static_assert(ARRAY_SIZE(kvm_vcpu_stats_desc) ==
-		sizeof(struct kvm_vcpu_stat) / sizeof(u64));
 
 const struct kvm_stats_header kvm_vcpu_stats_header = {
 	.name_size = KVM_STATS_NAME_SIZE,
@@ -1953,7 +1949,7 @@ out:
 static int gfn_to_memslot_approx(struct kvm_memslots *slots, gfn_t gfn)
 {
 	int start = 0, end = slots->used_slots;
-	int slot = atomic_read(&slots->lru_slot);
+	int slot = atomic_read(&slots->last_used_slot);
 	struct kvm_memory_slot *memslots = slots->memslots;
 
 	if (gfn >= memslots[slot].base_gfn &&
@@ -1974,7 +1970,7 @@ static int gfn_to_memslot_approx(struct kvm_memslots *slots, gfn_t gfn)
 
 	if (gfn >= memslots[start].base_gfn &&
 	    gfn < memslots[start].base_gfn + memslots[start].npages) {
-		atomic_set(&slots->lru_slot, start);
+		atomic_set(&slots->last_used_slot, start);
 	}
 
 	return start;
@@ -2559,12 +2555,26 @@ static void kvm_s390_set_crycb_format(struct kvm *kvm)
 		kvm->arch.crypto.crycbd |= CRYCB_FORMAT1;
 }
 
+/*
+ * kvm_arch_crypto_set_masks
+ *
+ * @kvm: pointer to the target guest's KVM struct containing the crypto masks
+ *	 to be set.
+ * @apm: the mask identifying the accessible AP adapters
+ * @aqm: the mask identifying the accessible AP domains
+ * @adm: the mask identifying the accessible AP control domains
+ *
+ * Set the masks that identify the adapters, domains and control domains to
+ * which the KVM guest is granted access.
+ *
+ * Note: The kvm->lock mutex must be locked by the caller before invoking this
+ *	 function.
+ */
 void kvm_arch_crypto_set_masks(struct kvm *kvm, unsigned long *apm,
 			       unsigned long *aqm, unsigned long *adm)
 {
 	struct kvm_s390_crypto_cb *crycb = kvm->arch.crypto.crycb;
 
-	mutex_lock(&kvm->lock);
 	kvm_s390_vcpu_block_all(kvm);
 
 	switch (kvm->arch.crypto.crycbd & CRYCB_FORMAT_MASK) {
@@ -2595,13 +2605,23 @@ void kvm_arch_crypto_set_masks(struct kvm *kvm, unsigned long *apm,
 	/* recreate the shadow crycb for each vcpu */
 	kvm_s390_sync_request_broadcast(kvm, KVM_REQ_VSIE_RESTART);
 	kvm_s390_vcpu_unblock_all(kvm);
-	mutex_unlock(&kvm->lock);
 }
 EXPORT_SYMBOL_GPL(kvm_arch_crypto_set_masks);
 
+/*
+ * kvm_arch_crypto_clear_masks
+ *
+ * @kvm: pointer to the target guest's KVM struct containing the crypto masks
+ *	 to be cleared.
+ *
+ * Clear the masks that identify the adapters, domains and control domains to
+ * which the KVM guest is granted access.
+ *
+ * Note: The kvm->lock mutex must be locked by the caller before invoking this
+ *	 function.
+ */
 void kvm_arch_crypto_clear_masks(struct kvm *kvm)
 {
-	mutex_lock(&kvm->lock);
 	kvm_s390_vcpu_block_all(kvm);
 
 	memset(&kvm->arch.crypto.crycb->apcb0, 0,
@@ -2613,7 +2633,6 @@ void kvm_arch_crypto_clear_masks(struct kvm *kvm)
 	/* recreate the shadow crycb for each vcpu */
 	kvm_s390_sync_request_broadcast(kvm, KVM_REQ_VSIE_RESTART);
 	kvm_s390_vcpu_unblock_all(kvm);
-	mutex_unlock(&kvm->lock);
 }
 EXPORT_SYMBOL_GPL(kvm_arch_crypto_clear_masks);
 
@@ -2630,6 +2649,7 @@ static void kvm_s390_crypto_init(struct kvm *kvm)
 {
 	kvm->arch.crypto.crycb = &kvm->arch.sie_page2->crycb;
 	kvm_s390_set_crycb_format(kvm);
+	init_rwsem(&kvm->arch.crypto.pqap_hook_rwsem);
 
 	if (!test_kvm_facility(kvm, 76))
 		return;
@@ -3200,6 +3220,8 @@ static int kvm_s390_vcpu_setup(struct kvm_vcpu *vcpu)
 		vcpu->arch.sie_block->ecb |= ECB_SRSI;
 	if (test_kvm_facility(vcpu->kvm, 73))
 		vcpu->arch.sie_block->ecb |= ECB_TE;
+	if (!kvm_is_ucontrol(vcpu->kvm))
+		vcpu->arch.sie_block->ecb |= ECB_SPECI;
 
 	if (test_kvm_facility(vcpu->kvm, 8) && vcpu->kvm->arch.use_pfmfi)
 		vcpu->arch.sie_block->ecb2 |= ECB2_PFMFI;
@@ -4044,7 +4066,7 @@ static int vcpu_pre_run(struct kvm_vcpu *vcpu)
 		kvm_s390_patch_guest_per_regs(vcpu);
 	}
 
-	clear_bit(vcpu->vcpu_id, vcpu->kvm->arch.gisa_int.kicked_mask);
+	clear_bit(kvm_vcpu_get_idx(vcpu), vcpu->kvm->arch.gisa_int.kicked_mask);
 
 	vcpu->arch.sie_block->icptcode = 0;
 	cpuflags = atomic_read(&vcpu->arch.sie_block->cpuflags);
diff --git a/arch/s390/kvm/kvm-s390.h b/arch/s390/kvm/kvm-s390.h
index 9fad25109b0d..ecd741ee3276 100644
--- a/arch/s390/kvm/kvm-s390.h
+++ b/arch/s390/kvm/kvm-s390.h
@@ -79,7 +79,7 @@ static inline int is_vcpu_stopped(struct kvm_vcpu *vcpu)
 
 static inline int is_vcpu_idle(struct kvm_vcpu *vcpu)
 {
-	return test_bit(vcpu->vcpu_id, vcpu->kvm->arch.idle_mask);
+	return test_bit(kvm_vcpu_get_idx(vcpu), vcpu->kvm->arch.idle_mask);
 }
 
 static inline int kvm_is_ucontrol(struct kvm *kvm)
diff --git a/arch/s390/kvm/priv.c b/arch/s390/kvm/priv.c
index 9928f785c677..53da4ceb16a3 100644
--- a/arch/s390/kvm/priv.c
+++ b/arch/s390/kvm/priv.c
@@ -610,6 +610,7 @@ static int handle_io_inst(struct kvm_vcpu *vcpu)
 static int handle_pqap(struct kvm_vcpu *vcpu)
 {
 	struct ap_queue_status status = {};
+	crypto_hook pqap_hook;
 	unsigned long reg0;
 	int ret;
 	uint8_t fc;
@@ -654,18 +655,20 @@ static int handle_pqap(struct kvm_vcpu *vcpu)
 		return kvm_s390_inject_program_int(vcpu, PGM_SPECIFICATION);
 
 	/*
-	 * Verify that the hook callback is registered, lock the owner
-	 * and call the hook.
+	 * If the hook callback is registered, there will be a pointer to the
+	 * hook function pointer in the kvm_s390_crypto structure. Lock the
+	 * owner, retrieve the hook function pointer and call the hook.
 	 */
+	down_read(&vcpu->kvm->arch.crypto.pqap_hook_rwsem);
 	if (vcpu->kvm->arch.crypto.pqap_hook) {
-		if (!try_module_get(vcpu->kvm->arch.crypto.pqap_hook->owner))
-			return -EOPNOTSUPP;
-		ret = vcpu->kvm->arch.crypto.pqap_hook->hook(vcpu);
-		module_put(vcpu->kvm->arch.crypto.pqap_hook->owner);
+		pqap_hook = *vcpu->kvm->arch.crypto.pqap_hook;
+		ret = pqap_hook(vcpu);
 		if (!ret && vcpu->run->s.regs.gprs[1] & 0x00ff0000)
 			kvm_s390_set_psw_cc(vcpu, 3);
+		up_read(&vcpu->kvm->arch.crypto.pqap_hook_rwsem);
 		return ret;
 	}
+	up_read(&vcpu->kvm->arch.crypto.pqap_hook_rwsem);
 	/*
 	 * A vfio_driver must register a hook.
 	 * No hook means no driver to enable the SIE CRYCB and no queues.
diff --git a/arch/s390/kvm/vsie.c b/arch/s390/kvm/vsie.c
index 4002a24bc43a..acda4b6fc851 100644
--- a/arch/s390/kvm/vsie.c
+++ b/arch/s390/kvm/vsie.c
@@ -510,6 +510,8 @@ static int shadow_scb(struct kvm_vcpu *vcpu, struct vsie_page *vsie_page)
 			prefix_unmapped(vsie_page);
 		scb_s->ecb |= ECB_TE;
 	}
+	/* specification exception interpretation */
+	scb_s->ecb |= scb_o->ecb & ECB_SPECI;
 	/* branch prediction */
 	if (test_kvm_facility(vcpu->kvm, 82))
 		scb_s->fpf |= scb_o->fpf & FPF_BPBC;
diff --git a/arch/s390/lib/uaccess.c b/arch/s390/lib/uaccess.c
index 94ca99bde59d..a596e69d3c47 100644
--- a/arch/s390/lib/uaccess.c
+++ b/arch/s390/lib/uaccess.c
@@ -204,69 +204,6 @@ unsigned long raw_copy_to_user(void __user *to, const void *from, unsigned long
 }
 EXPORT_SYMBOL(raw_copy_to_user);
 
-static inline unsigned long copy_in_user_mvcos(void __user *to, const void __user *from,
-					       unsigned long size)
-{
-	unsigned long tmp1, tmp2;
-
-	tmp1 = -4096UL;
-	/* FIXME: copy with reduced length. */
-	asm volatile(
-		"   lgr	  0,%[spec]\n"
-		"0: .insn ss,0xc80000000000,0(%0,%1),0(%2),0\n"
-		"   jz	  2f\n"
-		"1: algr  %0,%3\n"
-		"   slgr  %1,%3\n"
-		"   slgr  %2,%3\n"
-		"   j	  0b\n"
-		"2:slgr  %0,%0\n"
-		"3: \n"
-		EX_TABLE(0b,3b)
-		: "+a" (size), "+a" (to), "+a" (from), "+a" (tmp1), "=a" (tmp2)
-		: [spec] "d" (0x810081UL)
-		: "cc", "memory", "0");
-	return size;
-}
-
-static inline unsigned long copy_in_user_mvc(void __user *to, const void __user *from,
-					     unsigned long size)
-{
-	unsigned long tmp1;
-
-	asm volatile(
-		"   sacf  256\n"
-		"   aghi  %0,-1\n"
-		"   jo	  5f\n"
-		"   bras  %3,3f\n"
-		"0: aghi  %0,257\n"
-		"1: mvc	  0(1,%1),0(%2)\n"
-		"   la	  %1,1(%1)\n"
-		"   la	  %2,1(%2)\n"
-		"   aghi  %0,-1\n"
-		"   jnz	  1b\n"
-		"   j	  5f\n"
-		"2: mvc	  0(256,%1),0(%2)\n"
-		"   la	  %1,256(%1)\n"
-		"   la	  %2,256(%2)\n"
-		"3: aghi  %0,-256\n"
-		"   jnm	  2b\n"
-		"4: ex	  %0,1b-0b(%3)\n"
-		"5: slgr  %0,%0\n"
-		"6: sacf  768\n"
-		EX_TABLE(1b,6b) EX_TABLE(2b,0b) EX_TABLE(4b,0b)
-		: "+a" (size), "+a" (to), "+a" (from), "=a" (tmp1)
-		: : "cc", "memory");
-	return size;
-}
-
-unsigned long raw_copy_in_user(void __user *to, const void __user *from, unsigned long n)
-{
-	if (copy_with_mvcos())
-		return copy_in_user_mvcos(to, from, n);
-	return copy_in_user_mvc(to, from, n);
-}
-EXPORT_SYMBOL(raw_copy_in_user);
-
 static inline unsigned long clear_user_mvcos(void __user *to, unsigned long size)
 {
 	unsigned long tmp1, tmp2;
diff --git a/arch/s390/mm/gmap.c b/arch/s390/mm/gmap.c
index 9bb2c7512cd5..4d3b33ce81c6 100644
--- a/arch/s390/mm/gmap.c
+++ b/arch/s390/mm/gmap.c
@@ -27,7 +27,6 @@
 
 /**
  * gmap_alloc - allocate and initialize a guest address space
- * @mm: pointer to the parent mm_struct
  * @limit: maximum address of the gmap address space
  *
  * Returns a guest address space structure.
@@ -504,7 +503,7 @@ EXPORT_SYMBOL_GPL(gmap_translate);
 
 /**
  * gmap_unlink - disconnect a page table from the gmap shadow tables
- * @gmap: pointer to guest mapping meta data structure
+ * @mm: pointer to the parent mm_struct
  * @table: pointer to the host page table
  * @vmaddr: vm address associated with the host page table
  */
@@ -527,7 +526,7 @@ static void gmap_pmdp_xchg(struct gmap *gmap, pmd_t *old, pmd_t new,
 			   unsigned long gaddr);
 
 /**
- * gmap_link - set up shadow page tables to connect a host to a guest address
+ * __gmap_link - set up shadow page tables to connect a host to a guest address
  * @gmap: pointer to guest mapping meta data structure
  * @gaddr: guest address
  * @vmaddr: vm address
@@ -1971,7 +1970,7 @@ out_free:
 EXPORT_SYMBOL_GPL(gmap_shadow_sgt);
 
 /**
- * gmap_shadow_lookup_pgtable - find a shadow page table
+ * gmap_shadow_pgt_lookup - find a shadow page table
  * @sg: pointer to the shadow guest address space structure
  * @saddr: the address in the shadow aguest address space
  * @pgt: parent gmap address of the page table to get shadowed
@@ -2165,7 +2164,7 @@ int gmap_shadow_page(struct gmap *sg, unsigned long saddr, pte_t pte)
 }
 EXPORT_SYMBOL_GPL(gmap_shadow_page);
 
-/**
+/*
  * gmap_shadow_notify - handle notifications for shadow gmap
  *
  * Called with sg->parent->shadow_lock.
@@ -2225,7 +2224,7 @@ static void gmap_shadow_notify(struct gmap *sg, unsigned long vmaddr,
 /**
  * ptep_notify - call all invalidation callbacks for a specific pte.
  * @mm: pointer to the process mm_struct
- * @addr: virtual address in the process address space
+ * @vmaddr: virtual address in the process address space
  * @pte: pointer to the page table entry
  * @bits: bits from the pgste that caused the notify call
  *
diff --git a/arch/s390/mm/init.c b/arch/s390/mm/init.c
index f3db3caa8447..a04faf49001a 100644
--- a/arch/s390/mm/init.c
+++ b/arch/s390/mm/init.c
@@ -187,9 +187,9 @@ static void pv_init(void)
 		return;
 
 	/* make sure bounce buffers are shared */
+	swiotlb_force = SWIOTLB_FORCE;
 	swiotlb_init(1);
 	swiotlb_update_mem_attributes();
-	swiotlb_force = SWIOTLB_FORCE;
 }
 
 void __init mem_init(void)
@@ -307,8 +307,7 @@ int arch_add_memory(int nid, u64 start, u64 size,
 	return rc;
 }
 
-void arch_remove_memory(int nid, u64 start, u64 size,
-			struct vmem_altmap *altmap)
+void arch_remove_memory(u64 start, u64 size, struct vmem_altmap *altmap)
 {
 	unsigned long start_pfn = start >> PAGE_SHIFT;
 	unsigned long nr_pages = size >> PAGE_SHIFT;
diff --git a/arch/s390/mm/pgtable.c b/arch/s390/mm/pgtable.c
index eec3a9d7176e..034721a68d8f 100644
--- a/arch/s390/mm/pgtable.c
+++ b/arch/s390/mm/pgtable.c
@@ -834,7 +834,7 @@ int set_guest_storage_key(struct mm_struct *mm, unsigned long addr,
 }
 EXPORT_SYMBOL(set_guest_storage_key);
 
-/**
+/*
  * Conditionally set a guest storage key (handling csske).
  * oldkey will be updated when either mr or mc is set and a pointer is given.
  *
@@ -867,7 +867,7 @@ int cond_set_guest_storage_key(struct mm_struct *mm, unsigned long addr,
 }
 EXPORT_SYMBOL(cond_set_guest_storage_key);
 
-/**
+/*
  * Reset a guest reference bit (rrbe), returning the reference and changed bit.
  *
  * Returns < 0 in case of error, otherwise the cc to be reported to the guest.
diff --git a/arch/s390/pci/pci_clp.c b/arch/s390/pci/pci_clp.c
index 51dc2215a2b7..be077b39da33 100644
--- a/arch/s390/pci/pci_clp.c
+++ b/arch/s390/pci/pci_clp.c
@@ -383,8 +383,8 @@ static int clp_find_pci(struct clp_req_rsp_list_pci *rrb, u32 fid,
 		rc = clp_list_pci_req(rrb, &resume_token, &nentries);
 		if (rc)
 			return rc;
+		fh_list = rrb->response.fh_list;
 		for (i = 0; i < nentries; i++) {
-			fh_list = rrb->response.fh_list;
 			if (fh_list[i].fid == fid) {
 				*entry = fh_list[i];
 				return 0;
@@ -449,14 +449,17 @@ int clp_get_state(u32 fid, enum zpci_state *state)
 	struct clp_fh_list_entry entry;
 	int rc;
 
-	*state = ZPCI_FN_STATE_RESERVED;
 	rrb = clp_alloc_block(GFP_ATOMIC);
 	if (!rrb)
 		return -ENOMEM;
 
 	rc = clp_find_pci(rrb, fid, &entry);
-	if (!rc)
+	if (!rc) {
 		*state = entry.config_state;
+	} else if (rc == -ENODEV) {
+		*state = ZPCI_FN_STATE_RESERVED;
+		rc = 0;
+	}
 
 	clp_free_block(rrb);
 	return rc;
diff --git a/arch/s390/pci/pci_dma.c b/arch/s390/pci/pci_dma.c
index 58f2f7abea96..93223bd110c3 100644
--- a/arch/s390/pci/pci_dma.c
+++ b/arch/s390/pci/pci_dma.c
@@ -487,18 +487,18 @@ static int s390_dma_map_sg(struct device *dev, struct scatterlist *sg,
 	unsigned int max = dma_get_max_seg_size(dev);
 	unsigned int size = s->offset + s->length;
 	unsigned int offset = s->offset;
-	int count = 0, i;
+	int count = 0, i, ret;
 
 	for (i = 1; i < nr_elements; i++) {
 		s = sg_next(s);
 
-		s->dma_address = DMA_MAPPING_ERROR;
 		s->dma_length = 0;
 
 		if (s->offset || (size & ~PAGE_MASK) ||
 		    size + s->length > max) {
-			if (__s390_dma_map_sg(dev, start, size,
-					      &dma->dma_address, dir))
+			ret = __s390_dma_map_sg(dev, start, size,
+						&dma->dma_address, dir);
+			if (ret)
 				goto unmap;
 
 			dma->dma_address += offset;
@@ -511,7 +511,8 @@ static int s390_dma_map_sg(struct device *dev, struct scatterlist *sg,
 		}
 		size += s->length;
 	}
-	if (__s390_dma_map_sg(dev, start, size, &dma->dma_address, dir))
+	ret = __s390_dma_map_sg(dev, start, size, &dma->dma_address, dir);
+	if (ret)
 		goto unmap;
 
 	dma->dma_address += offset;
@@ -523,7 +524,7 @@ unmap:
 		s390_dma_unmap_pages(dev, sg_dma_address(s), sg_dma_len(s),
 				     dir, attrs);
 
-	return 0;
+	return ret;
 }
 
 static void s390_dma_unmap_sg(struct device *dev, struct scatterlist *sg,
diff --git a/arch/sh/Kconfig b/arch/sh/Kconfig
index f37280e805ea..6904f4bdbf00 100644
--- a/arch/sh/Kconfig
+++ b/arch/sh/Kconfig
@@ -69,6 +69,7 @@ config SUPERH
 	select RTC_LIB
 	select SET_FS
 	select SPARSE_IRQ
+	select TRACE_IRQFLAGS_SUPPORT
 	help
 	  The SuperH is a RISC processor targeted for use in embedded systems
 	  and consumer electronics; it was also used in the Sega Dreamcast
diff --git a/arch/sh/Kconfig.debug b/arch/sh/Kconfig.debug
index 28a43d63bde1..958f790273ab 100644
--- a/arch/sh/Kconfig.debug
+++ b/arch/sh/Kconfig.debug
@@ -1,8 +1,5 @@
 # SPDX-License-Identifier: GPL-2.0
 
-config TRACE_IRQFLAGS_SUPPORT
-	def_bool y
-
 config SH_STANDARD_BIOS
 	bool "Use LinuxSH standard BIOS"
 	help
diff --git a/arch/sh/boot/compressed/install.sh b/arch/sh/boot/compressed/install.sh
deleted file mode 100644
index f9f41818b17e..000000000000
--- a/arch/sh/boot/compressed/install.sh
+++ /dev/null
@@ -1,56 +0,0 @@
-#!/bin/sh
-#
-# arch/sh/boot/install.sh
-#
-# This file is subject to the terms and conditions of the GNU General Public
-# License.  See the file "COPYING" in the main directory of this archive
-# for more details.
-#
-# Copyright (C) 1995 by Linus Torvalds
-#
-# Adapted from code in arch/i386/boot/Makefile by H. Peter Anvin
-# Adapted from code in arch/i386/boot/install.sh by Russell King
-# Adapted from code in arch/arm/boot/install.sh by Stuart Menefy
-#
-# "make install" script for sh architecture
-#
-# Arguments:
-#   $1 - kernel version
-#   $2 - kernel image file
-#   $3 - kernel map file
-#   $4 - default install path (blank if root directory)
-#
-
-# User may have a custom install script
-
-if [ -x /sbin/${INSTALLKERNEL} ]; then
-  exec /sbin/${INSTALLKERNEL} "$@"
-fi
-
-if [ "$2" = "zImage" ]; then
-# Compressed install
-  echo "Installing compressed kernel"
-  if [ -f $4/vmlinuz-$1 ]; then
-    mv $4/vmlinuz-$1 $4/vmlinuz.old
-  fi
-
-  if [ -f $4/System.map-$1 ]; then
-    mv $4/System.map-$1 $4/System.old
-  fi
-
-  cat $2 > $4/vmlinuz-$1
-  cp $3 $4/System.map-$1
-else
-# Normal install
-  echo "Installing normal kernel"
-  if [ -f $4/vmlinux-$1 ]; then
-    mv $4/vmlinux-$1 $4/vmlinux.old
-  fi
-
-  if [ -f $4/System.map ]; then
-    mv $4/System.map $4/System.old
-  fi
-
-  cat $2 > $4/vmlinux-$1
-  cp $3 $4/System.map
-fi
diff --git a/arch/sh/include/asm/cacheflush.h b/arch/sh/include/asm/cacheflush.h
index 4486a865ff62..372afa82fee6 100644
--- a/arch/sh/include/asm/cacheflush.h
+++ b/arch/sh/include/asm/cacheflush.h
@@ -63,6 +63,8 @@ static inline void flush_anon_page(struct vm_area_struct *vma,
 	if (boot_cpu_data.dcache.n_aliases && PageAnon(page))
 		__flush_anon_page(page, vmaddr);
 }
+
+#define ARCH_IMPLEMENTS_FLUSH_KERNEL_VMAP_RANGE 1
 static inline void flush_kernel_vmap_range(void *addr, int size)
 {
 	__flush_wback_region(addr, size);
@@ -72,12 +74,6 @@ static inline void invalidate_kernel_vmap_range(void *addr, int size)
 	__flush_invalidate_region(addr, size);
 }
 
-#define ARCH_HAS_FLUSH_KERNEL_DCACHE_PAGE
-static inline void flush_kernel_dcache_page(struct page *page)
-{
-	flush_dcache_page(page);
-}
-
 extern void copy_to_user_page(struct vm_area_struct *vma,
 	struct page *page, unsigned long vaddr, void *dst, const void *src,
 	unsigned long len);
diff --git a/arch/sh/kernel/syscalls/syscall.tbl b/arch/sh/kernel/syscalls/syscall.tbl
index 7bbd6700ae4b..208f131659c5 100644
--- a/arch/sh/kernel/syscalls/syscall.tbl
+++ b/arch/sh/kernel/syscalls/syscall.tbl
@@ -449,3 +449,5 @@
 444	common	landlock_create_ruleset		sys_landlock_create_ruleset
 445	common	landlock_add_rule		sys_landlock_add_rule
 446	common	landlock_restrict_self		sys_landlock_restrict_self
+# 447 reserved for memfd_secret
+448	common	process_mrelease		sys_process_mrelease
diff --git a/arch/sh/mm/init.c b/arch/sh/mm/init.c
index ce26c7f8950a..506784702430 100644
--- a/arch/sh/mm/init.c
+++ b/arch/sh/mm/init.c
@@ -414,8 +414,7 @@ int arch_add_memory(int nid, u64 start, u64 size,
 	return ret;
 }
 
-void arch_remove_memory(int nid, u64 start, u64 size,
-			struct vmem_altmap *altmap)
+void arch_remove_memory(u64 start, u64 size, struct vmem_altmap *altmap)
 {
 	unsigned long start_pfn = PFN_DOWN(start);
 	unsigned long nr_pages = size >> PAGE_SHIFT;
diff --git a/arch/sparc/Kconfig b/arch/sparc/Kconfig
index fa650e4eadba..b120ed947f50 100644
--- a/arch/sparc/Kconfig
+++ b/arch/sparc/Kconfig
@@ -47,6 +47,7 @@ config SPARC
 	select NEED_DMA_MAP_STATE
 	select NEED_SG_DMA_LENGTH
 	select SET_FS
+	select TRACE_IRQFLAGS_SUPPORT
 
 config SPARC32
 	def_bool !64BIT
diff --git a/arch/sparc/Kconfig.debug b/arch/sparc/Kconfig.debug
index 50a918d496c8..6b2bec1888b3 100644
--- a/arch/sparc/Kconfig.debug
+++ b/arch/sparc/Kconfig.debug
@@ -1,9 +1,5 @@
 # SPDX-License-Identifier: GPL-2.0
 
-config TRACE_IRQFLAGS_SUPPORT
-	bool
-	default y
-
 config DEBUG_DCFLUSH
 	bool "D-cache flush debugging"
 	depends on SPARC64 && DEBUG_KERNEL
diff --git a/arch/sparc/Makefile b/arch/sparc/Makefile
index 4e65245bc755..24fb5a99f439 100644
--- a/arch/sparc/Makefile
+++ b/arch/sparc/Makefile
@@ -72,7 +72,8 @@ image zImage uImage tftpboot.img vmlinux.aout: vmlinux
 	$(Q)$(MAKE) $(build)=$(boot) $(boot)/$@
 
 install:
-	$(Q)$(MAKE) $(build)=$(boot) $@
+	sh $(srctree)/$(boot)/install.sh $(KERNELRELEASE) $(KBUILD_IMAGE) \
+		System.map "$(INSTALL_PATH)"
 
 archclean:
 	$(Q)$(MAKE) $(clean)=$(boot)
diff --git a/arch/sparc/boot/Makefile b/arch/sparc/boot/Makefile
index 380e2b018992..849236d4eca4 100644
--- a/arch/sparc/boot/Makefile
+++ b/arch/sparc/boot/Makefile
@@ -70,7 +70,3 @@ $(obj)/image: vmlinux FORCE
 $(obj)/tftpboot.img: $(obj)/image $(obj)/piggyback System.map $(ROOT_IMG) FORCE
 	$(call if_changed,elftoaout)
 	$(call if_changed,piggy)
-
-install:
-	sh $(srctree)/$(src)/install.sh $(KERNELRELEASE) $(obj)/zImage \
-		System.map "$(INSTALL_PATH)"
diff --git a/arch/sparc/include/asm/compat.h b/arch/sparc/include/asm/compat.h
index 8b63410e830f..bd949fcf9d63 100644
--- a/arch/sparc/include/asm/compat.h
+++ b/arch/sparc/include/asm/compat.h
@@ -116,25 +116,6 @@ struct compat_statfs {
 
 #define COMPAT_OFF_T_MAX	0x7fffffff
 
-#ifdef CONFIG_COMPAT
-static inline void __user *arch_compat_alloc_user_space(long len)
-{
-	struct pt_regs *regs = current_thread_info()->kregs;
-	unsigned long usp = regs->u_regs[UREG_I6];
-
-	if (test_thread_64bit_stack(usp))
-		usp += STACK_BIAS;
-
-	if (test_thread_flag(TIF_32BIT))
-		usp &= 0xffffffffUL;
-
-	usp -= len;
-	usp &= ~0x7UL;
-
-	return (void __user *) usp;
-}
-#endif
-
 struct compat_ipc64_perm {
 	compat_key_t key;
 	__compat_uid32_t uid;
diff --git a/arch/sparc/kernel/iommu.c b/arch/sparc/kernel/iommu.c
index a034f571d869..da0363692528 100644
--- a/arch/sparc/kernel/iommu.c
+++ b/arch/sparc/kernel/iommu.c
@@ -448,7 +448,7 @@ static int dma_4u_map_sg(struct device *dev, struct scatterlist *sglist,
 	iommu = dev->archdata.iommu;
 	strbuf = dev->archdata.stc;
 	if (nelems == 0 || !iommu)
-		return 0;
+		return -EINVAL;
 
 	spin_lock_irqsave(&iommu->lock, flags);
 
@@ -546,7 +546,6 @@ static int dma_4u_map_sg(struct device *dev, struct scatterlist *sglist,
 
 	if (outcount < incount) {
 		outs = sg_next(outs);
-		outs->dma_address = DMA_MAPPING_ERROR;
 		outs->dma_length = 0;
 	}
 
@@ -572,7 +571,6 @@ iommu_map_failed:
 			iommu_tbl_range_free(&iommu->tbl, vaddr, npages,
 					     IOMMU_ERROR_CODE);
 
-			s->dma_address = DMA_MAPPING_ERROR;
 			s->dma_length = 0;
 		}
 		if (s == outs)
@@ -580,7 +578,7 @@ iommu_map_failed:
 	}
 	spin_unlock_irqrestore(&iommu->lock, flags);
 
-	return 0;
+	return -EINVAL;
 }
 
 /* If contexts are being used, they are the same in all of the mappings
diff --git a/arch/sparc/kernel/pci_sun4v.c b/arch/sparc/kernel/pci_sun4v.c
index 9de57e88f7a1..384480971805 100644
--- a/arch/sparc/kernel/pci_sun4v.c
+++ b/arch/sparc/kernel/pci_sun4v.c
@@ -486,7 +486,7 @@ static int dma_4v_map_sg(struct device *dev, struct scatterlist *sglist,
 
 	iommu = dev->archdata.iommu;
 	if (nelems == 0 || !iommu)
-		return 0;
+		return -EINVAL;
 	atu = iommu->atu;
 
 	prot = HV_PCI_MAP_ATTR_READ;
@@ -594,7 +594,6 @@ static int dma_4v_map_sg(struct device *dev, struct scatterlist *sglist,
 
 	if (outcount < incount) {
 		outs = sg_next(outs);
-		outs->dma_address = DMA_MAPPING_ERROR;
 		outs->dma_length = 0;
 	}
 
@@ -611,7 +610,6 @@ iommu_map_failed:
 			iommu_tbl_range_free(tbl, vaddr, npages,
 					     IOMMU_ERROR_CODE);
 			/* XXX demap? XXX */
-			s->dma_address = DMA_MAPPING_ERROR;
 			s->dma_length = 0;
 		}
 		if (s == outs)
@@ -619,7 +617,7 @@ iommu_map_failed:
 	}
 	local_irq_restore(flags);
 
-	return 0;
+	return -EINVAL;
 }
 
 static void dma_4v_unmap_sg(struct device *dev, struct scatterlist *sglist,
diff --git a/arch/sparc/kernel/process_32.c b/arch/sparc/kernel/process_32.c
index 93983d6d431d..bbbe0cfef746 100644
--- a/arch/sparc/kernel/process_32.c
+++ b/arch/sparc/kernel/process_32.c
@@ -8,9 +8,6 @@
 /*
  * This file handles the architecture-dependent parts of process handling..
  */
-
-#include <stdarg.h>
-
 #include <linux/elfcore.h>
 #include <linux/errno.h>
 #include <linux/module.h>
diff --git a/arch/sparc/kernel/process_64.c b/arch/sparc/kernel/process_64.c
index 547b06b49ce3..d1cc410d2f64 100644
--- a/arch/sparc/kernel/process_64.c
+++ b/arch/sparc/kernel/process_64.c
@@ -9,9 +9,6 @@
 /*
  * This file handles the architecture-dependent parts of process handling..
  */
-
-#include <stdarg.h>
-
 #include <linux/errno.h>
 #include <linux/export.h>
 #include <linux/sched.h>
@@ -458,7 +455,7 @@ static unsigned long clone_stackframe(unsigned long csp, unsigned long psp)
 
 	distance = fp - psp;
 	rval = (csp - distance);
-	if (copy_in_user((void __user *) rval, (void __user *) psp, distance))
+	if (raw_copy_in_user((void __user *)rval, (void __user *)psp, distance))
 		rval = 0;
 	else if (!stack_64bit) {
 		if (put_user(((u32)csp),
diff --git a/arch/sparc/kernel/signal32.c b/arch/sparc/kernel/signal32.c
index 4276b9e003ca..6cc124a3bb98 100644
--- a/arch/sparc/kernel/signal32.c
+++ b/arch/sparc/kernel/signal32.c
@@ -435,9 +435,9 @@ static int setup_frame32(struct ksignal *ksig, struct pt_regs *regs,
 			      (_COMPAT_NSIG_WORDS - 1) * sizeof(unsigned int));
 
 	if (!wsaved) {
-		err |= copy_in_user((u32 __user *)sf,
-				    (u32 __user *)(regs->u_regs[UREG_FP]),
-				    sizeof(struct reg_window32));
+		err |= raw_copy_in_user((u32 __user *)sf,
+					(u32 __user *)(regs->u_regs[UREG_FP]),
+					sizeof(struct reg_window32));
 	} else {
 		struct reg_window *rp;
 
@@ -567,9 +567,9 @@ static int setup_rt_frame32(struct ksignal *ksig, struct pt_regs *regs,
 	err |= put_compat_sigset(&sf->mask, oldset, sizeof(compat_sigset_t));
 
 	if (!wsaved) {
-		err |= copy_in_user((u32 __user *)sf,
-				    (u32 __user *)(regs->u_regs[UREG_FP]),
-				    sizeof(struct reg_window32));
+		err |= raw_copy_in_user((u32 __user *)sf,
+					(u32 __user *)(regs->u_regs[UREG_FP]),
+					sizeof(struct reg_window32));
 	} else {
 		struct reg_window *rp;
 
diff --git a/arch/sparc/kernel/signal_64.c b/arch/sparc/kernel/signal_64.c
index cea23cf95600..2a78d2af1265 100644
--- a/arch/sparc/kernel/signal_64.c
+++ b/arch/sparc/kernel/signal_64.c
@@ -406,10 +406,10 @@ setup_rt_frame(struct ksignal *ksig, struct pt_regs *regs)
 	err |= copy_to_user(&sf->mask, sigmask_to_save(), sizeof(sigset_t));
 
 	if (!wsaved) {
-		err |= copy_in_user((u64 __user *)sf,
-				    (u64 __user *)(regs->u_regs[UREG_FP] +
-						   STACK_BIAS),
-				    sizeof(struct reg_window));
+		err |= raw_copy_in_user((u64 __user *)sf,
+					(u64 __user *)(regs->u_regs[UREG_FP] +
+					   STACK_BIAS),
+					sizeof(struct reg_window));
 	} else {
 		struct reg_window *rp;
 
diff --git a/arch/sparc/kernel/syscalls/syscall.tbl b/arch/sparc/kernel/syscalls/syscall.tbl
index f520e9cd2c78..c37764dc764d 100644
--- a/arch/sparc/kernel/syscalls/syscall.tbl
+++ b/arch/sparc/kernel/syscalls/syscall.tbl
@@ -365,12 +365,12 @@
 299	common	unshare			sys_unshare
 300	common	set_robust_list		sys_set_robust_list		compat_sys_set_robust_list
 301	common	get_robust_list		sys_get_robust_list		compat_sys_get_robust_list
-302	common	migrate_pages		sys_migrate_pages		compat_sys_migrate_pages
-303	common	mbind			sys_mbind			compat_sys_mbind
-304	common	get_mempolicy		sys_get_mempolicy		compat_sys_get_mempolicy
-305	common	set_mempolicy		sys_set_mempolicy		compat_sys_set_mempolicy
+302	common	migrate_pages		sys_migrate_pages
+303	common	mbind			sys_mbind
+304	common	get_mempolicy		sys_get_mempolicy
+305	common	set_mempolicy		sys_set_mempolicy
 306	common	kexec_load		sys_kexec_load			compat_sys_kexec_load
-307	common	move_pages		sys_move_pages			compat_sys_move_pages
+307	common	move_pages		sys_move_pages
 308	common	getcpu			sys_getcpu
 309	common	epoll_pwait		sys_epoll_pwait			compat_sys_epoll_pwait
 310	32	utimensat		sys_utimensat_time32
@@ -492,3 +492,5 @@
 444	common	landlock_create_ruleset		sys_landlock_create_ruleset
 445	common	landlock_add_rule		sys_landlock_add_rule
 446	common	landlock_restrict_self		sys_landlock_restrict_self
+# 447 reserved for memfd_secret
+448	common	process_mrelease		sys_process_mrelease
diff --git a/arch/sparc/mm/iommu.c b/arch/sparc/mm/iommu.c
index 0c0342e5b10d..9e3f6933ca13 100644
--- a/arch/sparc/mm/iommu.c
+++ b/arch/sparc/mm/iommu.c
@@ -256,7 +256,7 @@ static int __sbus_iommu_map_sg(struct device *dev, struct scatterlist *sgl,
 		sg->dma_address =__sbus_iommu_map_page(dev, sg_page(sg),
 				sg->offset, sg->length, per_page_flush);
 		if (sg->dma_address == DMA_MAPPING_ERROR)
-			return 0;
+			return -EIO;
 		sg->dma_length = sg->length;
 	}
 
diff --git a/arch/um/Kconfig b/arch/um/Kconfig
index 77e66d3719f6..c18b45f75d41 100644
--- a/arch/um/Kconfig
+++ b/arch/um/Kconfig
@@ -22,7 +22,9 @@ config UML
 	select GENERIC_CPU_DEVICES
 	select HAVE_GCC_PLUGINS
 	select SET_FS
+	select TRACE_IRQFLAGS_SUPPORT
 	select TTY # Needed for line.c
+	select HAVE_ARCH_VMAP_STACK
 
 config MMU
 	bool
@@ -52,10 +54,6 @@ config ISA
 config SBUS
 	bool
 
-config TRACE_IRQFLAGS_SUPPORT
-	bool
-	default y
-
 config LOCKDEP_SUPPORT
 	bool
 	default y
diff --git a/arch/um/Makefile b/arch/um/Makefile
index 12a7acef0357..f2fe63bfd819 100644
--- a/arch/um/Makefile
+++ b/arch/um/Makefile
@@ -41,8 +41,8 @@ endif
 
 HOST_DIR := arch/$(HEADER_ARCH)
 
-include $(ARCH_DIR)/Makefile-skas
-include $(HOST_DIR)/Makefile.um
+include $(srctree)/$(ARCH_DIR)/Makefile-skas
+include $(srctree)/$(HOST_DIR)/Makefile.um
 
 core-y += $(HOST_DIR)/um/
 
@@ -76,7 +76,7 @@ USER_CFLAGS = $(patsubst $(KERNEL_DEFINES),,$(patsubst -I%,,$(KBUILD_CFLAGS))) \
 		-idirafter $(objtree)/include -D__KERNEL__ -D__UM_HOST__
 
 #This will adjust *FLAGS accordingly to the platform.
-include $(ARCH_DIR)/Makefile-os-$(OS)
+include $(srctree)/$(ARCH_DIR)/Makefile-os-$(OS)
 
 KBUILD_CPPFLAGS += -I$(srctree)/$(HOST_DIR)/include \
 		   -I$(srctree)/$(HOST_DIR)/include/uapi \
diff --git a/arch/um/drivers/rtc_user.c b/arch/um/drivers/rtc_user.c
index 4016bc1d577e..7c3cec4c68cf 100644
--- a/arch/um/drivers/rtc_user.c
+++ b/arch/um/drivers/rtc_user.c
@@ -3,6 +3,7 @@
  * Copyright (C) 2020 Intel Corporation
  * Author: Johannes Berg <johannes@sipsolutions.net>
  */
+#include <stdbool.h>
 #include <os.h>
 #include <errno.h>
 #include <sched.h>
diff --git a/arch/um/drivers/vector_user.c b/arch/um/drivers/vector_user.c
index bae53220ce26..e4ffeb9a1fa4 100644
--- a/arch/um/drivers/vector_user.c
+++ b/arch/um/drivers/vector_user.c
@@ -3,6 +3,7 @@
  * Copyright (C) 2001 - 2007 Jeff Dike (jdike@{addtoit,linux.intel}.com)
  */
 
+#include <stdbool.h>
 #include <stdio.h>
 #include <unistd.h>
 #include <stdarg.h>
diff --git a/arch/um/drivers/virt-pci.c b/arch/um/drivers/virt-pci.c
index 0b802834f40a..c08066633023 100644
--- a/arch/um/drivers/virt-pci.c
+++ b/arch/um/drivers/virt-pci.c
@@ -56,6 +56,13 @@ static unsigned long um_pci_msi_used[BITS_TO_LONGS(MAX_MSI_VECTORS)];
 
 #define UM_VIRT_PCI_MAXDELAY 40000
 
+struct um_pci_message_buffer {
+	struct virtio_pcidev_msg hdr;
+	u8 data[8];
+};
+
+static struct um_pci_message_buffer __percpu *um_pci_msg_bufs;
+
 static int um_pci_send_cmd(struct um_pci_device *dev,
 			   struct virtio_pcidev_msg *cmd,
 			   unsigned int cmd_size,
@@ -68,11 +75,12 @@ static int um_pci_send_cmd(struct um_pci_device *dev,
 		[1] = extra ? &extra_sg : &in_sg,
 		[2] = extra ? &in_sg : NULL,
 	};
+	struct um_pci_message_buffer *buf;
 	int delay_count = 0;
 	int ret, len;
 	bool posted;
 
-	if (WARN_ON(cmd_size < sizeof(*cmd)))
+	if (WARN_ON(cmd_size < sizeof(*cmd) || cmd_size > sizeof(*buf)))
 		return -EINVAL;
 
 	switch (cmd->op) {
@@ -88,6 +96,9 @@ static int um_pci_send_cmd(struct um_pci_device *dev,
 		break;
 	}
 
+	buf = get_cpu_var(um_pci_msg_bufs);
+	memcpy(buf, cmd, cmd_size);
+
 	if (posted) {
 		u8 *ncmd = kmalloc(cmd_size + extra_size, GFP_ATOMIC);
 
@@ -102,7 +113,10 @@ static int um_pci_send_cmd(struct um_pci_device *dev,
 		} else {
 			/* try without allocating memory */
 			posted = false;
+			cmd = (void *)buf;
 		}
+	} else {
+		cmd = (void *)buf;
 	}
 
 	sg_init_one(&out_sg, cmd, cmd_size);
@@ -118,11 +132,12 @@ static int um_pci_send_cmd(struct um_pci_device *dev,
 				posted ? cmd : HANDLE_NO_FREE(cmd),
 				GFP_ATOMIC);
 	if (ret)
-		return ret;
+		goto out;
 
 	if (posted) {
 		virtqueue_kick(dev->cmd_vq);
-		return 0;
+		ret = 0;
+		goto out;
 	}
 
 	/* kick and poll for getting a response on the queue */
@@ -148,6 +163,8 @@ static int um_pci_send_cmd(struct um_pci_device *dev,
 	}
 	clear_bit(UM_PCI_STAT_WAITING, &dev->status);
 
+out:
+	put_cpu_var(um_pci_msg_bufs);
 	return ret;
 }
 
@@ -161,12 +178,17 @@ static unsigned long um_pci_cfgspace_read(void *priv, unsigned int offset,
 		.size = size,
 		.addr = offset,
 	};
-	/* maximum size - we may only use parts of it */
-	u8 data[8];
+	/* buf->data is maximum size - we may only use parts of it */
+	struct um_pci_message_buffer *buf;
+	u8 *data;
+	unsigned long ret = ~0ULL;
 
 	if (!dev)
 		return ~0ULL;
 
+	buf = get_cpu_var(um_pci_msg_bufs);
+	data = buf->data;
+
 	memset(data, 0xff, sizeof(data));
 
 	switch (size) {
@@ -179,27 +201,34 @@ static unsigned long um_pci_cfgspace_read(void *priv, unsigned int offset,
 		break;
 	default:
 		WARN(1, "invalid config space read size %d\n", size);
-		return ~0ULL;
+		goto out;
 	}
 
-	if (um_pci_send_cmd(dev, &hdr, sizeof(hdr), NULL, 0,
-			    data, sizeof(data)))
-		return ~0ULL;
+	if (um_pci_send_cmd(dev, &hdr, sizeof(hdr), NULL, 0, data, 8))
+		goto out;
 
 	switch (size) {
 	case 1:
-		return data[0];
+		ret = data[0];
+		break;
 	case 2:
-		return le16_to_cpup((void *)data);
+		ret = le16_to_cpup((void *)data);
+		break;
 	case 4:
-		return le32_to_cpup((void *)data);
+		ret = le32_to_cpup((void *)data);
+		break;
 #ifdef CONFIG_64BIT
 	case 8:
-		return le64_to_cpup((void *)data);
+		ret = le64_to_cpup((void *)data);
+		break;
 #endif
 	default:
-		return ~0ULL;
+		break;
 	}
+
+out:
+	put_cpu_var(um_pci_msg_bufs);
+	return ret;
 }
 
 static void um_pci_cfgspace_write(void *priv, unsigned int offset, int size,
@@ -272,8 +301,13 @@ static void um_pci_bar_copy_from(void *priv, void *buffer,
 static unsigned long um_pci_bar_read(void *priv, unsigned int offset,
 				     int size)
 {
-	/* maximum size - we may only use parts of it */
-	u8 data[8];
+	/* buf->data is maximum size - we may only use parts of it */
+	struct um_pci_message_buffer *buf;
+	u8 *data;
+	unsigned long ret = ~0ULL;
+
+	buf = get_cpu_var(um_pci_msg_bufs);
+	data = buf->data;
 
 	switch (size) {
 	case 1:
@@ -285,25 +319,33 @@ static unsigned long um_pci_bar_read(void *priv, unsigned int offset,
 		break;
 	default:
 		WARN(1, "invalid config space read size %d\n", size);
-		return ~0ULL;
+		goto out;
 	}
 
 	um_pci_bar_copy_from(priv, data, offset, size);
 
 	switch (size) {
 	case 1:
-		return data[0];
+		ret = data[0];
+		break;
 	case 2:
-		return le16_to_cpup((void *)data);
+		ret = le16_to_cpup((void *)data);
+		break;
 	case 4:
-		return le32_to_cpup((void *)data);
+		ret = le32_to_cpup((void *)data);
+		break;
 #ifdef CONFIG_64BIT
 	case 8:
-		return le64_to_cpup((void *)data);
+		ret = le64_to_cpup((void *)data);
+		break;
 #endif
 	default:
-		return ~0ULL;
+		break;
 	}
+
+out:
+	put_cpu_var(um_pci_msg_bufs);
+	return ret;
 }
 
 static void um_pci_bar_copy_to(void *priv, unsigned int offset,
@@ -810,7 +852,7 @@ void *pci_root_bus_fwnode(struct pci_bus *bus)
 	return um_pci_fwnode;
 }
 
-int um_pci_init(void)
+static int um_pci_init(void)
 {
 	int err, i;
 
@@ -823,10 +865,16 @@ int um_pci_init(void)
 		 "No virtio device ID configured for PCI - no PCI support\n"))
 		return 0;
 
-	bridge = pci_alloc_host_bridge(0);
-	if (!bridge)
+	um_pci_msg_bufs = alloc_percpu(struct um_pci_message_buffer);
+	if (!um_pci_msg_bufs)
 		return -ENOMEM;
 
+	bridge = pci_alloc_host_bridge(0);
+	if (!bridge) {
+		err = -ENOMEM;
+		goto free;
+	}
+
 	um_pci_fwnode = irq_domain_alloc_named_fwnode("um-pci");
 	if (!um_pci_fwnode) {
 		err = -ENOMEM;
@@ -878,18 +926,22 @@ free:
 		irq_domain_remove(um_pci_inner_domain);
 	if (um_pci_fwnode)
 		irq_domain_free_fwnode(um_pci_fwnode);
-	pci_free_resource_list(&bridge->windows);
-	pci_free_host_bridge(bridge);
+	if (bridge) {
+		pci_free_resource_list(&bridge->windows);
+		pci_free_host_bridge(bridge);
+	}
+	free_percpu(um_pci_msg_bufs);
 	return err;
 }
 module_init(um_pci_init);
 
-void um_pci_exit(void)
+static void um_pci_exit(void)
 {
 	unregister_virtio_driver(&um_pci_virtio_driver);
 	irq_domain_remove(um_pci_msi_domain);
 	irq_domain_remove(um_pci_inner_domain);
 	pci_free_resource_list(&bridge->windows);
 	pci_free_host_bridge(bridge);
+	free_percpu(um_pci_msg_bufs);
 }
 module_exit(um_pci_exit);
diff --git a/arch/um/drivers/virtio_uml.c b/arch/um/drivers/virtio_uml.c
index 4412d6febade..d51e445df797 100644
--- a/arch/um/drivers/virtio_uml.c
+++ b/arch/um/drivers/virtio_uml.c
@@ -27,6 +27,7 @@
 #include <linux/virtio_config.h>
 #include <linux/virtio_ring.h>
 #include <linux/time-internal.h>
+#include <linux/virtio-uml.h>
 #include <shared/as-layout.h>
 #include <irq_kern.h>
 #include <init.h>
@@ -1139,7 +1140,7 @@ static int virtio_uml_probe(struct platform_device *pdev)
 		rc = os_connect_socket(pdata->socket_path);
 	} while (rc == -EINTR);
 	if (rc < 0)
-		return rc;
+		goto error_free;
 	vu_dev->sock = rc;
 
 	spin_lock_init(&vu_dev->sock_lock);
@@ -1160,6 +1161,8 @@ static int virtio_uml_probe(struct platform_device *pdev)
 
 error_init:
 	os_close_file(vu_dev->sock);
+error_free:
+	kfree(vu_dev);
 	return rc;
 }
 
diff --git a/arch/um/include/shared/irq_user.h b/arch/um/include/shared/irq_user.h
index 065829f443ae..86a8a573b65c 100644
--- a/arch/um/include/shared/irq_user.h
+++ b/arch/um/include/shared/irq_user.h
@@ -7,7 +7,6 @@
 #define __IRQ_USER_H__
 
 #include <sysdep/ptrace.h>
-#include <stdbool.h>
 
 enum um_irq_type {
 	IRQ_READ,
diff --git a/arch/um/include/shared/os.h b/arch/um/include/shared/os.h
index 60b84edc8a68..96d400387c93 100644
--- a/arch/um/include/shared/os.h
+++ b/arch/um/include/shared/os.h
@@ -8,7 +8,6 @@
 #ifndef __OS_H__
 #define __OS_H__
 
-#include <stdarg.h>
 #include <irq_user.h>
 #include <longjmp.h>
 #include <mm_id.h>
diff --git a/arch/um/kernel/skas/clone.c b/arch/um/kernel/skas/clone.c
index 5afac0fef24e..ff5061f29167 100644
--- a/arch/um/kernel/skas/clone.c
+++ b/arch/um/kernel/skas/clone.c
@@ -24,8 +24,7 @@
 void __attribute__ ((__section__ (".__syscall_stub")))
 stub_clone_handler(void)
 {
-	int stack;
-	struct stub_data *data = (void *) ((unsigned long)&stack & ~(UM_KERN_PAGE_SIZE - 1));
+	struct stub_data *data = get_stub_page();
 	long err;
 
 	err = stub_syscall2(__NR_clone, CLONE_PARENT | CLONE_FILES | SIGCHLD,
diff --git a/arch/um/kernel/trap.c b/arch/um/kernel/trap.c
index ad12f78bda7e..3198c4767387 100644
--- a/arch/um/kernel/trap.c
+++ b/arch/um/kernel/trap.c
@@ -311,7 +311,3 @@ void winch(int sig, struct siginfo *unused_si, struct uml_pt_regs *regs)
 {
 	do_IRQ(WINCH_IRQ, regs);
 }
-
-void trap_init(void)
-{
-}
diff --git a/arch/um/os-Linux/signal.c b/arch/um/os-Linux/signal.c
index 6de99bb16113..6cf098c23a39 100644
--- a/arch/um/os-Linux/signal.c
+++ b/arch/um/os-Linux/signal.c
@@ -67,7 +67,7 @@ int signals_enabled;
 #ifdef UML_CONFIG_UML_TIME_TRAVEL_SUPPORT
 static int signals_blocked;
 #else
-#define signals_blocked false
+#define signals_blocked 0
 #endif
 static unsigned int signals_pending;
 static unsigned int signals_active = 0;
diff --git a/arch/um/os-Linux/util.c b/arch/um/os-Linux/util.c
index 07327425d06e..41297ec404bf 100644
--- a/arch/um/os-Linux/util.c
+++ b/arch/um/os-Linux/util.c
@@ -3,6 +3,7 @@
  * Copyright (C) 2000 - 2007 Jeff Dike (jdike@{addtoit,linux.intel}.com)
  */
 
+#include <stdarg.h>
 #include <stdio.h>
 #include <stdlib.h>
 #include <unistd.h>
diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig
index 1146b85d708b..4e001bbbb425 100644
--- a/arch/x86/Kconfig
+++ b/arch/x86/Kconfig
@@ -259,6 +259,7 @@ config X86
 	select STACK_VALIDATION			if HAVE_STACK_VALIDATION && (HAVE_STATIC_CALL_INLINE || RETPOLINE)
 	select SYSCTL_EXCEPTION_TRACE
 	select THREAD_INFO_IN_TASK
+	select TRACE_IRQFLAGS_SUPPORT
 	select USER_STACKTRACE_SUPPORT
 	select VIRT_TO_BUS
 	select HAVE_ARCH_KCSAN			if X86_64
diff --git a/arch/x86/Kconfig.debug b/arch/x86/Kconfig.debug
index 80b57e7f4947..d3a6f74a94bd 100644
--- a/arch/x86/Kconfig.debug
+++ b/arch/x86/Kconfig.debug
@@ -1,8 +1,5 @@
 # SPDX-License-Identifier: GPL-2.0
 
-config TRACE_IRQFLAGS_SUPPORT
-	def_bool y
-
 config TRACE_IRQFLAGS_NMI_SUPPORT
 	def_bool y
 
diff --git a/arch/x86/Makefile b/arch/x86/Makefile
index d82d01490dd3..7488cfbbd2f6 100644
--- a/arch/x86/Makefile
+++ b/arch/x86/Makefile
@@ -73,7 +73,7 @@ ifeq ($(CONFIG_X86_32),y)
         KBUILD_CFLAGS += $(cc_stack_align4)
 
         # CPU-specific tuning. Anything which can be shared with UML should go here.
-        include arch/x86/Makefile_32.cpu
+        include $(srctree)/arch/x86/Makefile_32.cpu
         KBUILD_CFLAGS += $(cflags-y)
 
         # temporary until string.h is fixed
diff --git a/arch/x86/Makefile_32.cpu b/arch/x86/Makefile_32.cpu
index cd3056759880..e7355f8b51c2 100644
--- a/arch/x86/Makefile_32.cpu
+++ b/arch/x86/Makefile_32.cpu
@@ -2,13 +2,7 @@
 # CPU tuning section - shared with UML.
 # Must change only cflags-y (or [yn]), not CFLAGS! That makes a difference for UML.
 
-#-mtune exists since gcc 3.4
-HAS_MTUNE	:= $(call cc-option-yn, -mtune=i386)
-ifeq ($(HAS_MTUNE),y)
 tune		= $(call cc-option,-mtune=$(1),$(2))
-else
-tune		= $(call cc-option,-mcpu=$(1),$(2))
-endif
 
 cflags-$(CONFIG_M486SX)		+= -march=i486
 cflags-$(CONFIG_M486)		+= -march=i486
diff --git a/arch/x86/boot/boot.h b/arch/x86/boot/boot.h
index ca866f1cca2e..34c9dbb6a47d 100644
--- a/arch/x86/boot/boot.h
+++ b/arch/x86/boot/boot.h
@@ -18,7 +18,7 @@
 
 #ifndef __ASSEMBLY__
 
-#include <stdarg.h>
+#include <linux/stdarg.h>
 #include <linux/types.h>
 #include <linux/edd.h>
 #include <asm/setup.h>
diff --git a/arch/x86/configs/i386_defconfig b/arch/x86/configs/i386_defconfig
index 9c9c4a888b1d..e81885384f60 100644
--- a/arch/x86/configs/i386_defconfig
+++ b/arch/x86/configs/i386_defconfig
@@ -156,7 +156,6 @@ CONFIG_FORCEDETH=y
 CONFIG_8139TOO=y
 # CONFIG_8139TOO_PIO is not set
 CONFIG_R8169=y
-CONFIG_INPUT_POLLDEV=y
 CONFIG_INPUT_EVDEV=y
 CONFIG_INPUT_JOYSTICK=y
 CONFIG_INPUT_TABLET=y
diff --git a/arch/x86/configs/x86_64_defconfig b/arch/x86/configs/x86_64_defconfig
index b60bd2d86034..e8a7a0af2bda 100644
--- a/arch/x86/configs/x86_64_defconfig
+++ b/arch/x86/configs/x86_64_defconfig
@@ -148,7 +148,6 @@ CONFIG_SKY2=y
 CONFIG_FORCEDETH=y
 CONFIG_8139TOO=y
 CONFIG_R8169=y
-CONFIG_INPUT_POLLDEV=y
 CONFIG_INPUT_EVDEV=y
 CONFIG_INPUT_JOYSTICK=y
 CONFIG_INPUT_TABLET=y
diff --git a/arch/x86/entry/syscalls/syscall_32.tbl b/arch/x86/entry/syscalls/syscall_32.tbl
index a5beae6daf20..960a021d543e 100644
--- a/arch/x86/entry/syscalls/syscall_32.tbl
+++ b/arch/x86/entry/syscalls/syscall_32.tbl
@@ -286,7 +286,7 @@
 272	i386	fadvise64_64		sys_ia32_fadvise64_64
 273	i386	vserver
 274	i386	mbind			sys_mbind
-275	i386	get_mempolicy		sys_get_mempolicy		compat_sys_get_mempolicy
+275	i386	get_mempolicy		sys_get_mempolicy
 276	i386	set_mempolicy		sys_set_mempolicy
 277	i386	mq_open			sys_mq_open			compat_sys_mq_open
 278	i386	mq_unlink		sys_mq_unlink
@@ -328,7 +328,7 @@
 314	i386	sync_file_range		sys_ia32_sync_file_range
 315	i386	tee			sys_tee
 316	i386	vmsplice		sys_vmsplice
-317	i386	move_pages		sys_move_pages			compat_sys_move_pages
+317	i386	move_pages		sys_move_pages
 318	i386	getcpu			sys_getcpu
 319	i386	epoll_pwait		sys_epoll_pwait
 320	i386	utimensat		sys_utimensat_time32
@@ -452,3 +452,4 @@
 445	i386	landlock_add_rule	sys_landlock_add_rule
 446	i386	landlock_restrict_self	sys_landlock_restrict_self
 447	i386	memfd_secret		sys_memfd_secret
+448	i386	process_mrelease	sys_process_mrelease
diff --git a/arch/x86/entry/syscalls/syscall_64.tbl b/arch/x86/entry/syscalls/syscall_64.tbl
index f6b57799c1ea..18b5500ea8bf 100644
--- a/arch/x86/entry/syscalls/syscall_64.tbl
+++ b/arch/x86/entry/syscalls/syscall_64.tbl
@@ -369,6 +369,7 @@
 445	common	landlock_add_rule	sys_landlock_add_rule
 446	common	landlock_restrict_self	sys_landlock_restrict_self
 447	common	memfd_secret		sys_memfd_secret
+448	common	process_mrelease	sys_process_mrelease
 
 #
 # Due to a historical design error, certain syscalls are numbered differently
@@ -397,7 +398,7 @@
 530	x32	set_robust_list		compat_sys_set_robust_list
 531	x32	get_robust_list		compat_sys_get_robust_list
 532	x32	vmsplice		sys_vmsplice
-533	x32	move_pages		compat_sys_move_pages
+533	x32	move_pages		sys_move_pages
 534	x32	preadv			compat_sys_preadv64
 535	x32	pwritev			compat_sys_pwritev64
 536	x32	rt_tgsigqueueinfo	compat_sys_rt_tgsigqueueinfo
diff --git a/arch/x86/entry/vdso/Makefile b/arch/x86/entry/vdso/Makefile
index 05c4abc2fdfd..a2dddcc189f6 100644
--- a/arch/x86/entry/vdso/Makefile
+++ b/arch/x86/entry/vdso/Makefile
@@ -131,7 +131,7 @@ $(obj)/%-x32.o: $(obj)/%.o FORCE
 targets += vdsox32.lds $(vobjx32s-y)
 
 $(obj)/%.so: OBJCOPYFLAGS := -S --remove-section __ex_table
-$(obj)/%.so: $(obj)/%.so.dbg
+$(obj)/%.so: $(obj)/%.so.dbg FORCE
 	$(call if_changed,objcopy)
 
 $(obj)/vdsox32.so.dbg: $(obj)/vdsox32.lds $(vobjx32s) FORCE
diff --git a/arch/x86/ia32/ia32_aout.c b/arch/x86/ia32/ia32_aout.c
index 5e5b9fc2747f..9bd15241fadb 100644
--- a/arch/x86/ia32/ia32_aout.c
+++ b/arch/x86/ia32/ia32_aout.c
@@ -202,8 +202,7 @@ static int load_aout_binary(struct linux_binprm *bprm)
 
 		error = vm_mmap(bprm->file, N_TXTADDR(ex), ex.a_text,
 				PROT_READ | PROT_EXEC,
-				MAP_FIXED | MAP_PRIVATE | MAP_DENYWRITE |
-				MAP_32BIT,
+				MAP_FIXED | MAP_PRIVATE | MAP_32BIT,
 				fd_offset);
 
 		if (error != N_TXTADDR(ex))
@@ -211,8 +210,7 @@ static int load_aout_binary(struct linux_binprm *bprm)
 
 		error = vm_mmap(bprm->file, N_DATADDR(ex), ex.a_data,
 				PROT_READ | PROT_WRITE | PROT_EXEC,
-				MAP_FIXED | MAP_PRIVATE | MAP_DENYWRITE |
-				MAP_32BIT,
+				MAP_FIXED | MAP_PRIVATE | MAP_32BIT,
 				fd_offset + ex.a_text);
 		if (error != N_DATADDR(ex))
 			return error;
@@ -293,7 +291,7 @@ static int load_aout_library(struct file *file)
 	/* Now use mmap to map the library into memory. */
 	error = vm_mmap(file, start_addr, ex.a_text + ex.a_data,
 			PROT_READ | PROT_WRITE | PROT_EXEC,
-			MAP_FIXED | MAP_PRIVATE | MAP_DENYWRITE | MAP_32BIT,
+			MAP_FIXED | MAP_PRIVATE | MAP_32BIT,
 			N_TXTOFF(ex));
 	retval = error;
 	if (error != start_addr)
diff --git a/arch/x86/include/asm/compat.h b/arch/x86/include/asm/compat.h
index 4ae01cdb99de..7516e4199b3c 100644
--- a/arch/x86/include/asm/compat.h
+++ b/arch/x86/include/asm/compat.h
@@ -156,19 +156,6 @@ struct compat_shmid64_ds {
 	(!!(task_pt_regs(current)->orig_ax & __X32_SYSCALL_BIT))
 #endif
 
-static inline void __user *arch_compat_alloc_user_space(long len)
-{
-	compat_uptr_t sp = task_pt_regs(current)->sp;
-
-	/*
-	 * -128 for the x32 ABI redzone.  For IA32, it is not strictly
-	 * necessary, but not harmful.
-	 */
-	sp -= 128;
-
-	return (void __user *)round_down(sp - len, 16);
-}
-
 static inline bool in_x32_syscall(void)
 {
 #ifdef CONFIG_X86_X32_ABI
diff --git a/arch/x86/include/asm/kvm-x86-ops.h b/arch/x86/include/asm/kvm-x86-ops.h
index a12a4987154e..cefe1d81e2e8 100644
--- a/arch/x86/include/asm/kvm-x86-ops.h
+++ b/arch/x86/include/asm/kvm-x86-ops.h
@@ -72,7 +72,6 @@ KVM_X86_OP(enable_nmi_window)
 KVM_X86_OP(enable_irq_window)
 KVM_X86_OP(update_cr8_intercept)
 KVM_X86_OP(check_apicv_inhibit_reasons)
-KVM_X86_OP_NULL(pre_update_apicv_exec_ctrl)
 KVM_X86_OP(refresh_apicv_exec_ctrl)
 KVM_X86_OP(hwapic_irr_update)
 KVM_X86_OP(hwapic_isr_update)
diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h
index af6ce8d4c86a..f8f48a7ec577 100644
--- a/arch/x86/include/asm/kvm_host.h
+++ b/arch/x86/include/asm/kvm_host.h
@@ -37,9 +37,21 @@
 
 #define __KVM_HAVE_ARCH_VCPU_DEBUGFS
 
-#define KVM_MAX_VCPUS 288
-#define KVM_SOFT_MAX_VCPUS 240
-#define KVM_MAX_VCPU_ID 1023
+#define KVM_MAX_VCPUS 1024
+#define KVM_SOFT_MAX_VCPUS 710
+
+/*
+ * In x86, the VCPU ID corresponds to the APIC ID, and APIC IDs
+ * might be larger than the actual number of VCPUs because the
+ * APIC ID encodes CPU topology information.
+ *
+ * In the worst case, we'll need less than one extra bit for the
+ * Core ID, and less than one extra bit for the Package (Die) ID,
+ * so ratio of 4 should be enough.
+ */
+#define KVM_VCPU_ID_RATIO 4
+#define KVM_MAX_VCPU_ID (KVM_MAX_VCPUS * KVM_VCPU_ID_RATIO)
+
 /* memory slots that are not exposed to userspace */
 #define KVM_PRIVATE_MEM_SLOTS 3
 
@@ -124,13 +136,6 @@
 #define KVM_HPAGE_MASK(x)	(~(KVM_HPAGE_SIZE(x) - 1))
 #define KVM_PAGES_PER_HPAGE(x)	(KVM_HPAGE_SIZE(x) / PAGE_SIZE)
 
-static inline gfn_t gfn_to_index(gfn_t gfn, gfn_t base_gfn, int level)
-{
-	/* KVM_HPAGE_GFN_SHIFT(PG_LEVEL_4K) must be 0. */
-	return (gfn >> KVM_HPAGE_GFN_SHIFT(level)) -
-		(base_gfn >> KVM_HPAGE_GFN_SHIFT(level));
-}
-
 #define KVM_PERMILLE_MMU_PAGES 20
 #define KVM_MIN_ALLOC_MMU_PAGES 64UL
 #define KVM_MMU_HASH_SHIFT 12
@@ -229,7 +234,8 @@ enum x86_intercept_stage;
 	KVM_GUESTDBG_USE_HW_BP | \
 	KVM_GUESTDBG_USE_SW_BP | \
 	KVM_GUESTDBG_INJECT_BP | \
-	KVM_GUESTDBG_INJECT_DB)
+	KVM_GUESTDBG_INJECT_DB | \
+	KVM_GUESTDBG_BLOCKIRQ)
 
 
 #define PFERR_PRESENT_BIT 0
@@ -447,6 +453,7 @@ struct kvm_mmu {
 
 	u64 *pae_root;
 	u64 *pml4_root;
+	u64 *pml5_root;
 
 	/*
 	 * check zero bits on shadow page table entries, these
@@ -482,6 +489,7 @@ struct kvm_pmc {
 	 * ctrl value for fixed counters.
 	 */
 	u64 current_config;
+	bool is_paused;
 };
 
 struct kvm_pmu {
@@ -522,7 +530,6 @@ struct kvm_pmu_ops;
 enum {
 	KVM_DEBUGREG_BP_ENABLED = 1,
 	KVM_DEBUGREG_WONT_EXIT = 2,
-	KVM_DEBUGREG_RELOAD = 4,
 };
 
 struct kvm_mtrr_range {
@@ -723,7 +730,6 @@ struct kvm_vcpu_arch {
 
 	u64 reserved_gpa_bits;
 	int maxphyaddr;
-	int max_tdp_level;
 
 	/* emulate context */
 
@@ -988,6 +994,12 @@ struct kvm_hv {
 	/* How many vCPUs have VP index != vCPU index */
 	atomic_t num_mismatched_vp_indexes;
 
+	/*
+	 * How many SynICs use 'AutoEOI' feature
+	 * (protected by arch.apicv_update_lock)
+	 */
+	unsigned int synic_auto_eoi_used;
+
 	struct hv_partition_assist_pg *hv_pa_pg;
 	struct kvm_hv_syndbg hv_syndbg;
 };
@@ -1002,9 +1014,8 @@ struct msr_bitmap_range {
 /* Xen emulation context */
 struct kvm_xen {
 	bool long_mode;
-	bool shinfo_set;
 	u8 upcall_vector;
-	struct gfn_to_hva_cache shinfo_cache;
+	gfn_t shinfo_gfn;
 };
 
 enum kvm_irqchip_mode {
@@ -1061,6 +1072,9 @@ struct kvm_arch {
 	struct kvm_apic_map __rcu *apic_map;
 	atomic_t apic_map_dirty;
 
+	/* Protects apic_access_memslot_enabled and apicv_inhibit_reasons */
+	struct mutex apicv_update_lock;
+
 	bool apic_access_memslot_enabled;
 	unsigned long apicv_inhibit_reasons;
 
@@ -1213,9 +1227,17 @@ struct kvm_vm_stat {
 	u64 mmu_recycled;
 	u64 mmu_cache_miss;
 	u64 mmu_unsync;
-	u64 lpages;
+	union {
+		struct {
+			atomic64_t pages_4k;
+			atomic64_t pages_2m;
+			atomic64_t pages_1g;
+		};
+		atomic64_t pages[KVM_NR_PAGE_SIZES];
+	};
 	u64 nx_lpage_splits;
 	u64 max_mmu_page_hash_collisions;
+	u64 max_mmu_rmap_size;
 };
 
 struct kvm_vcpu_stat {
@@ -1359,7 +1381,6 @@ struct kvm_x86_ops {
 	void (*enable_irq_window)(struct kvm_vcpu *vcpu);
 	void (*update_cr8_intercept)(struct kvm_vcpu *vcpu, int tpr, int irr);
 	bool (*check_apicv_inhibit_reasons)(ulong bit);
-	void (*pre_update_apicv_exec_ctrl)(struct kvm *kvm, bool activate);
 	void (*refresh_apicv_exec_ctrl)(struct kvm_vcpu *vcpu);
 	void (*hwapic_irr_update)(struct kvm_vcpu *vcpu, int max_irr);
 	void (*hwapic_isr_update)(struct kvm_vcpu *vcpu, int isr);
@@ -1543,12 +1564,12 @@ void kvm_mmu_uninit_vm(struct kvm *kvm);
 void kvm_mmu_after_set_cpuid(struct kvm_vcpu *vcpu);
 void kvm_mmu_reset_context(struct kvm_vcpu *vcpu);
 void kvm_mmu_slot_remove_write_access(struct kvm *kvm,
-				      struct kvm_memory_slot *memslot,
+				      const struct kvm_memory_slot *memslot,
 				      int start_level);
 void kvm_mmu_zap_collapsible_sptes(struct kvm *kvm,
 				   const struct kvm_memory_slot *memslot);
 void kvm_mmu_slot_leaf_clear_dirty(struct kvm *kvm,
-				   struct kvm_memory_slot *memslot);
+				   const struct kvm_memory_slot *memslot);
 void kvm_mmu_zap_all(struct kvm *kvm);
 void kvm_mmu_invalidate_mmio_sptes(struct kvm *kvm, u64 gen);
 unsigned long kvm_mmu_calculate_default_mmu_pages(struct kvm *kvm);
@@ -1744,6 +1765,9 @@ void kvm_vcpu_update_apicv(struct kvm_vcpu *vcpu);
 void kvm_request_apicv_update(struct kvm *kvm, bool activate,
 			      unsigned long bit);
 
+void __kvm_request_apicv_update(struct kvm *kvm, bool activate,
+				unsigned long bit);
+
 int kvm_emulate_hypercall(struct kvm_vcpu *vcpu);
 
 int kvm_mmu_page_fault(struct kvm_vcpu *vcpu, gpa_t cr2_or_gpa, u64 error_code,
@@ -1754,8 +1778,8 @@ void kvm_mmu_invalidate_gva(struct kvm_vcpu *vcpu, struct kvm_mmu *mmu,
 void kvm_mmu_invpcid_gva(struct kvm_vcpu *vcpu, gva_t gva, unsigned long pcid);
 void kvm_mmu_new_pgd(struct kvm_vcpu *vcpu, gpa_t new_pgd);
 
-void kvm_configure_mmu(bool enable_tdp, int tdp_max_root_level,
-		       int tdp_huge_page_level);
+void kvm_configure_mmu(bool enable_tdp, int tdp_forced_root_level,
+		       int tdp_max_root_level, int tdp_huge_page_level);
 
 static inline u16 kvm_read_ldt(void)
 {
@@ -1779,11 +1803,6 @@ static inline unsigned long read_msr(unsigned long msr)
 }
 #endif
 
-static inline u32 get_rdx_init_val(void)
-{
-	return 0x600; /* P6 family */
-}
-
 static inline void kvm_inject_gp(struct kvm_vcpu *vcpu, u32 error_code)
 {
 	kvm_queue_exception_e(vcpu, GP_VECTOR, error_code);
@@ -1816,31 +1835,6 @@ enum {
 #define kvm_arch_vcpu_memslots_id(vcpu) ((vcpu)->arch.hflags & HF_SMM_MASK ? 1 : 0)
 #define kvm_memslots_for_spte_role(kvm, role) __kvm_memslots(kvm, (role).smm)
 
-asmlinkage void kvm_spurious_fault(void);
-
-/*
- * Hardware virtualization extension instructions may fault if a
- * reboot turns off virtualization while processes are running.
- * Usually after catching the fault we just panic; during reboot
- * instead the instruction is ignored.
- */
-#define __kvm_handle_fault_on_reboot(insn)				\
-	"666: \n\t"							\
-	insn "\n\t"							\
-	"jmp	668f \n\t"						\
-	"667: \n\t"							\
-	"1: \n\t"							\
-	".pushsection .discard.instr_begin \n\t"			\
-	".long 1b - . \n\t"						\
-	".popsection \n\t"						\
-	"call	kvm_spurious_fault \n\t"				\
-	"1: \n\t"							\
-	".pushsection .discard.instr_end \n\t"				\
-	".long 1b - . \n\t"						\
-	".popsection \n\t"						\
-	"668: \n\t"							\
-	_ASM_EXTABLE(666b, 667b)
-
 #define KVM_ARCH_WANT_MMU_NOTIFIER
 
 int kvm_cpu_has_injectable_intr(struct kvm_vcpu *v);
diff --git a/arch/x86/include/asm/uaccess_64.h b/arch/x86/include/asm/uaccess_64.h
index e7265a552f4f..45697e04d771 100644
--- a/arch/x86/include/asm/uaccess_64.h
+++ b/arch/x86/include/asm/uaccess_64.h
@@ -58,13 +58,6 @@ raw_copy_to_user(void __user *dst, const void *src, unsigned long size)
 	return copy_user_generic((__force void *)dst, src, size);
 }
 
-static __always_inline __must_check
-unsigned long raw_copy_in_user(void __user *dst, const void __user *src, unsigned long size)
-{
-	return copy_user_generic((__force void *)dst,
-				 (__force void *)src, size);
-}
-
 extern long __copy_user_nocache(void *dst, const void __user *src,
 				unsigned size, int zerorest);
 
diff --git a/arch/x86/include/uapi/asm/kvm.h b/arch/x86/include/uapi/asm/kvm.h
index a6c327f8ad9e..2ef1f6513c68 100644
--- a/arch/x86/include/uapi/asm/kvm.h
+++ b/arch/x86/include/uapi/asm/kvm.h
@@ -295,6 +295,7 @@ struct kvm_debug_exit_arch {
 #define KVM_GUESTDBG_USE_HW_BP		0x00020000
 #define KVM_GUESTDBG_INJECT_DB		0x00040000
 #define KVM_GUESTDBG_INJECT_BP		0x00080000
+#define KVM_GUESTDBG_BLOCKIRQ		0x00100000
 
 /* for KVM_SET_GUEST_DEBUG */
 struct kvm_guest_debug_arch {
diff --git a/arch/x86/kernel/amd_gart_64.c b/arch/x86/kernel/amd_gart_64.c
index 9ac696487b13..ed837383de5c 100644
--- a/arch/x86/kernel/amd_gart_64.c
+++ b/arch/x86/kernel/amd_gart_64.c
@@ -331,7 +331,7 @@ static int __dma_map_cont(struct device *dev, struct scatterlist *start,
 	int i;
 
 	if (iommu_start == -1)
-		return -1;
+		return -ENOMEM;
 
 	for_each_sg(start, s, nelems, i) {
 		unsigned long pages, addr;
@@ -380,13 +380,13 @@ static int gart_map_sg(struct device *dev, struct scatterlist *sg, int nents,
 		       enum dma_data_direction dir, unsigned long attrs)
 {
 	struct scatterlist *s, *ps, *start_sg, *sgmap;
-	int need = 0, nextneed, i, out, start;
+	int need = 0, nextneed, i, out, start, ret;
 	unsigned long pages = 0;
 	unsigned int seg_size;
 	unsigned int max_seg_size;
 
 	if (nents == 0)
-		return 0;
+		return -EINVAL;
 
 	out		= 0;
 	start		= 0;
@@ -414,8 +414,9 @@ static int gart_map_sg(struct device *dev, struct scatterlist *sg, int nents,
 			if (!iommu_merge || !nextneed || !need || s->offset ||
 			    (s->length + seg_size > max_seg_size) ||
 			    (ps->offset + ps->length) % PAGE_SIZE) {
-				if (dma_map_cont(dev, start_sg, i - start,
-						 sgmap, pages, need) < 0)
+				ret = dma_map_cont(dev, start_sg, i - start,
+						   sgmap, pages, need);
+				if (ret < 0)
 					goto error;
 				out++;
 
@@ -432,7 +433,8 @@ static int gart_map_sg(struct device *dev, struct scatterlist *sg, int nents,
 		pages += iommu_num_pages(s->offset, s->length, PAGE_SIZE);
 		ps = s;
 	}
-	if (dma_map_cont(dev, start_sg, i - start, sgmap, pages, need) < 0)
+	ret = dma_map_cont(dev, start_sg, i - start, sgmap, pages, need);
+	if (ret < 0)
 		goto error;
 	out++;
 	flush_gart();
@@ -456,9 +458,7 @@ error:
 		panic("dma_map_sg: overflow on %lu pages\n", pages);
 
 	iommu_full(dev, pages << PAGE_SHIFT, dir);
-	for_each_sg(sg, s, nents, i)
-		s->dma_address = DMA_MAPPING_ERROR;
-	return 0;
+	return ret;
 }
 
 /* allocate and map a coherent mapping */
diff --git a/arch/x86/kernel/aperture_64.c b/arch/x86/kernel/aperture_64.c
index 294ed4392a0e..10562885f5fc 100644
--- a/arch/x86/kernel/aperture_64.c
+++ b/arch/x86/kernel/aperture_64.c
@@ -109,14 +109,13 @@ static u32 __init allocate_aperture(void)
 	 * memory. Unfortunately we cannot move it up because that would
 	 * make the IOMMU useless.
 	 */
-	addr = memblock_find_in_range(GART_MIN_ADDR, GART_MAX_ADDR,
-				      aper_size, aper_size);
+	addr = memblock_phys_alloc_range(aper_size, aper_size,
+					 GART_MIN_ADDR, GART_MAX_ADDR);
 	if (!addr) {
 		pr_err("Cannot allocate aperture memory hole [mem %#010lx-%#010lx] (%uKB)\n",
 		       addr, addr + aper_size - 1, aper_size >> 10);
 		return 0;
 	}
-	memblock_reserve(addr, aper_size);
 	pr_info("Mapping aperture over RAM [mem %#010lx-%#010lx] (%uKB)\n",
 		addr, addr + aper_size - 1, aper_size >> 10);
 	register_nosave_region(addr >> PAGE_SHIFT,
diff --git a/arch/x86/kernel/cpu/cacheinfo.c b/arch/x86/kernel/cpu/cacheinfo.c
index d66af2950e06..b5e36bd0425b 100644
--- a/arch/x86/kernel/cpu/cacheinfo.c
+++ b/arch/x86/kernel/cpu/cacheinfo.c
@@ -985,7 +985,7 @@ static void ci_leaf_init(struct cacheinfo *this_leaf,
 	this_leaf->priv = base->nb;
 }
 
-static int __init_cache_level(unsigned int cpu)
+int init_cache_level(unsigned int cpu)
 {
 	struct cpu_cacheinfo *this_cpu_ci = get_cpu_cacheinfo(cpu);
 
@@ -1014,7 +1014,7 @@ static void get_cache_id(int cpu, struct _cpuid4_info_regs *id4_regs)
 	id4_regs->id = c->apicid >> index_msb;
 }
 
-static int __populate_cache_leaves(unsigned int cpu)
+int populate_cache_leaves(unsigned int cpu)
 {
 	unsigned int idx, ret;
 	struct cpu_cacheinfo *this_cpu_ci = get_cpu_cacheinfo(cpu);
@@ -1033,6 +1033,3 @@ static int __populate_cache_leaves(unsigned int cpu)
 
 	return 0;
 }
-
-DEFINE_SMP_CALL_CACHE_FUNCTION(init_cache_level)
-DEFINE_SMP_CALL_CACHE_FUNCTION(populate_cache_leaves)
diff --git a/arch/x86/kernel/kvm.c b/arch/x86/kernel/kvm.c
index a26643dc6bd6..b656456c3a94 100644
--- a/arch/x86/kernel/kvm.c
+++ b/arch/x86/kernel/kvm.c
@@ -884,10 +884,11 @@ static void kvm_wait(u8 *ptr, u8 val)
 	} else {
 		local_irq_disable();
 
+		/* safe_halt() will enable IRQ */
 		if (READ_ONCE(*ptr) == val)
 			safe_halt();
-
-		local_irq_enable();
+		else
+			local_irq_enable();
 	}
 }
 
diff --git a/arch/x86/kernel/ldt.c b/arch/x86/kernel/ldt.c
index aa15132228da..525876e7b9f4 100644
--- a/arch/x86/kernel/ldt.c
+++ b/arch/x86/kernel/ldt.c
@@ -154,7 +154,7 @@ static struct ldt_struct *alloc_ldt_struct(unsigned int num_entries)
 	if (num_entries > LDT_ENTRIES)
 		return NULL;
 
-	new_ldt = kmalloc(sizeof(struct ldt_struct), GFP_KERNEL);
+	new_ldt = kmalloc(sizeof(struct ldt_struct), GFP_KERNEL_ACCOUNT);
 	if (!new_ldt)
 		return NULL;
 
@@ -168,9 +168,9 @@ static struct ldt_struct *alloc_ldt_struct(unsigned int num_entries)
 	 * than PAGE_SIZE.
 	 */
 	if (alloc_size > PAGE_SIZE)
-		new_ldt->entries = vzalloc(alloc_size);
+		new_ldt->entries = __vmalloc(alloc_size, GFP_KERNEL_ACCOUNT | __GFP_ZERO);
 	else
-		new_ldt->entries = (void *)get_zeroed_page(GFP_KERNEL);
+		new_ldt->entries = (void *)get_zeroed_page(GFP_KERNEL_ACCOUNT);
 
 	if (!new_ldt->entries) {
 		kfree(new_ldt);
diff --git a/arch/x86/kvm/debugfs.c b/arch/x86/kvm/debugfs.c
index 95a98413dc32..54a83a744538 100644
--- a/arch/x86/kvm/debugfs.c
+++ b/arch/x86/kvm/debugfs.c
@@ -7,6 +7,8 @@
 #include <linux/kvm_host.h>
 #include <linux/debugfs.h>
 #include "lapic.h"
+#include "mmu.h"
+#include "mmu/mmu_internal.h"
 
 static int vcpu_get_timer_advance_ns(void *data, u64 *val)
 {
@@ -73,3 +75,112 @@ void kvm_arch_create_vcpu_debugfs(struct kvm_vcpu *vcpu, struct dentry *debugfs_
 				    &vcpu_tsc_scaling_frac_fops);
 	}
 }
+
+/*
+ * This covers statistics <1024 (11=log(1024)+1), which should be enough to
+ * cover RMAP_RECYCLE_THRESHOLD.
+ */
+#define  RMAP_LOG_SIZE  11
+
+static const char *kvm_lpage_str[KVM_NR_PAGE_SIZES] = { "4K", "2M", "1G" };
+
+static int kvm_mmu_rmaps_stat_show(struct seq_file *m, void *v)
+{
+	struct kvm_rmap_head *rmap;
+	struct kvm *kvm = m->private;
+	struct kvm_memory_slot *slot;
+	struct kvm_memslots *slots;
+	unsigned int lpage_size, index;
+	/* Still small enough to be on the stack */
+	unsigned int *log[KVM_NR_PAGE_SIZES], *cur;
+	int i, j, k, l, ret;
+
+	ret = -ENOMEM;
+	memset(log, 0, sizeof(log));
+	for (i = 0; i < KVM_NR_PAGE_SIZES; i++) {
+		log[i] = kcalloc(RMAP_LOG_SIZE, sizeof(unsigned int), GFP_KERNEL);
+		if (!log[i])
+			goto out;
+	}
+
+	mutex_lock(&kvm->slots_lock);
+	write_lock(&kvm->mmu_lock);
+
+	for (i = 0; i < KVM_ADDRESS_SPACE_NUM; i++) {
+		slots = __kvm_memslots(kvm, i);
+		for (j = 0; j < slots->used_slots; j++) {
+			slot = &slots->memslots[j];
+			for (k = 0; k < KVM_NR_PAGE_SIZES; k++) {
+				rmap = slot->arch.rmap[k];
+				lpage_size = kvm_mmu_slot_lpages(slot, k + 1);
+				cur = log[k];
+				for (l = 0; l < lpage_size; l++) {
+					index = ffs(pte_list_count(&rmap[l]));
+					if (WARN_ON_ONCE(index >= RMAP_LOG_SIZE))
+						index = RMAP_LOG_SIZE - 1;
+					cur[index]++;
+				}
+			}
+		}
+	}
+
+	write_unlock(&kvm->mmu_lock);
+	mutex_unlock(&kvm->slots_lock);
+
+	/* index=0 counts no rmap; index=1 counts 1 rmap */
+	seq_printf(m, "Rmap_Count:\t0\t1\t");
+	for (i = 2; i < RMAP_LOG_SIZE; i++) {
+		j = 1 << (i - 1);
+		k = (1 << i) - 1;
+		seq_printf(m, "%d-%d\t", j, k);
+	}
+	seq_printf(m, "\n");
+
+	for (i = 0; i < KVM_NR_PAGE_SIZES; i++) {
+		seq_printf(m, "Level=%s:\t", kvm_lpage_str[i]);
+		cur = log[i];
+		for (j = 0; j < RMAP_LOG_SIZE; j++)
+			seq_printf(m, "%d\t", cur[j]);
+		seq_printf(m, "\n");
+	}
+
+	ret = 0;
+out:
+	for (i = 0; i < KVM_NR_PAGE_SIZES; i++)
+		kfree(log[i]);
+
+	return ret;
+}
+
+static int kvm_mmu_rmaps_stat_open(struct inode *inode, struct file *file)
+{
+	struct kvm *kvm = inode->i_private;
+
+	if (!kvm_get_kvm_safe(kvm))
+		return -ENOENT;
+
+	return single_open(file, kvm_mmu_rmaps_stat_show, kvm);
+}
+
+static int kvm_mmu_rmaps_stat_release(struct inode *inode, struct file *file)
+{
+	struct kvm *kvm = inode->i_private;
+
+	kvm_put_kvm(kvm);
+
+	return single_release(inode, file);
+}
+
+static const struct file_operations mmu_rmaps_stat_fops = {
+	.open		= kvm_mmu_rmaps_stat_open,
+	.read		= seq_read,
+	.llseek		= seq_lseek,
+	.release	= kvm_mmu_rmaps_stat_release,
+};
+
+int kvm_arch_create_vm_debugfs(struct kvm *kvm)
+{
+	debugfs_create_file("mmu_rmaps_stat", 0644, kvm->debugfs_dentry, kvm,
+			    &mmu_rmaps_stat_fops);
+	return 0;
+}
diff --git a/arch/x86/kvm/hyperv.c b/arch/x86/kvm/hyperv.c
index 41d2a53c5dea..232a86a6faaf 100644
--- a/arch/x86/kvm/hyperv.c
+++ b/arch/x86/kvm/hyperv.c
@@ -88,6 +88,10 @@ static bool synic_has_vector_auto_eoi(struct kvm_vcpu_hv_synic *synic,
 static void synic_update_vector(struct kvm_vcpu_hv_synic *synic,
 				int vector)
 {
+	struct kvm_vcpu *vcpu = hv_synic_to_vcpu(synic);
+	struct kvm_hv *hv = to_kvm_hv(vcpu->kvm);
+	int auto_eoi_old, auto_eoi_new;
+
 	if (vector < HV_SYNIC_FIRST_VALID_VECTOR)
 		return;
 
@@ -96,10 +100,30 @@ static void synic_update_vector(struct kvm_vcpu_hv_synic *synic,
 	else
 		__clear_bit(vector, synic->vec_bitmap);
 
+	auto_eoi_old = bitmap_weight(synic->auto_eoi_bitmap, 256);
+
 	if (synic_has_vector_auto_eoi(synic, vector))
 		__set_bit(vector, synic->auto_eoi_bitmap);
 	else
 		__clear_bit(vector, synic->auto_eoi_bitmap);
+
+	auto_eoi_new = bitmap_weight(synic->auto_eoi_bitmap, 256);
+
+	if (!!auto_eoi_old == !!auto_eoi_new)
+		return;
+
+	mutex_lock(&vcpu->kvm->arch.apicv_update_lock);
+
+	if (auto_eoi_new)
+		hv->synic_auto_eoi_used++;
+	else
+		hv->synic_auto_eoi_used--;
+
+	__kvm_request_apicv_update(vcpu->kvm,
+				   !hv->synic_auto_eoi_used,
+				   APICV_INHIBIT_REASON_HYPERV);
+
+	mutex_unlock(&vcpu->kvm->arch.apicv_update_lock);
 }
 
 static int synic_set_sint(struct kvm_vcpu_hv_synic *synic, int sint,
@@ -933,12 +957,6 @@ int kvm_hv_activate_synic(struct kvm_vcpu *vcpu, bool dont_zero_synic_pages)
 
 	synic = to_hv_synic(vcpu);
 
-	/*
-	 * Hyper-V SynIC auto EOI SINT's are
-	 * not compatible with APICV, so request
-	 * to deactivate APICV permanently.
-	 */
-	kvm_request_apicv_update(vcpu->kvm, false, APICV_INHIBIT_REASON_HYPERV);
 	synic->active = true;
 	synic->dont_zero_synic_pages = dont_zero_synic_pages;
 	synic->control = HV_SYNIC_CONTROL_ENABLE;
@@ -2476,6 +2494,8 @@ int kvm_get_hv_cpuid(struct kvm_vcpu *vcpu, struct kvm_cpuid2 *cpuid,
 				ent->eax |= HV_X64_ENLIGHTENED_VMCS_RECOMMENDED;
 			if (!cpu_smt_possible())
 				ent->eax |= HV_X64_NO_NONARCH_CORESHARING;
+
+			ent->eax |= HV_DEPRECATING_AEOI_RECOMMENDED;
 			/*
 			 * Default number of spinlock retry attempts, matches
 			 * HyperV 2016.
diff --git a/arch/x86/kvm/i8254.c b/arch/x86/kvm/i8254.c
index a6e218c6140d..5a69cce4d72d 100644
--- a/arch/x86/kvm/i8254.c
+++ b/arch/x86/kvm/i8254.c
@@ -220,7 +220,8 @@ void __kvm_migrate_pit_timer(struct kvm_vcpu *vcpu)
 	struct kvm_pit *pit = vcpu->kvm->arch.vpit;
 	struct hrtimer *timer;
 
-	if (!kvm_vcpu_is_bsp(vcpu) || !pit)
+	/* Somewhat arbitrarily make vcpu0 the owner of the PIT. */
+	if (vcpu->vcpu_id || !pit)
 		return;
 
 	timer = &pit->pit_state.timer;
diff --git a/arch/x86/kvm/ioapic.h b/arch/x86/kvm/ioapic.h
index 11e4065e1617..bbd4a5d18b5d 100644
--- a/arch/x86/kvm/ioapic.h
+++ b/arch/x86/kvm/ioapic.h
@@ -35,11 +35,7 @@ struct kvm_vcpu;
 #define	IOAPIC_INIT			0x5
 #define	IOAPIC_EXTINT			0x7
 
-#ifdef CONFIG_X86
 #define RTC_GSI 8
-#else
-#define RTC_GSI -1U
-#endif
 
 struct dest_map {
 	/* vcpu bitmap where IRQ has been sent */
diff --git a/arch/x86/kvm/lapic.c b/arch/x86/kvm/lapic.c
index ba5a27879f1d..76fb00921203 100644
--- a/arch/x86/kvm/lapic.c
+++ b/arch/x86/kvm/lapic.c
@@ -192,6 +192,9 @@ void kvm_recalculate_apic_map(struct kvm *kvm)
 	if (atomic_read_acquire(&kvm->arch.apic_map_dirty) == CLEAN)
 		return;
 
+	WARN_ONCE(!irqchip_in_kernel(kvm),
+		  "Dirty APIC map without an in-kernel local APIC");
+
 	mutex_lock(&kvm->arch.apic_map_lock);
 	/*
 	 * Read kvm->arch.apic_map_dirty before kvm->arch.apic_map
@@ -2265,9 +2268,6 @@ void kvm_lapic_set_base(struct kvm_vcpu *vcpu, u64 value)
 	u64 old_value = vcpu->arch.apic_base;
 	struct kvm_lapic *apic = vcpu->arch.apic;
 
-	if (!apic)
-		value |= MSR_IA32_APICBASE_BSP;
-
 	vcpu->arch.apic_base = value;
 
 	if ((old_value ^ value) & MSR_IA32_APICBASE_ENABLE)
@@ -2323,6 +2323,13 @@ void kvm_lapic_reset(struct kvm_vcpu *vcpu, bool init_event)
 	struct kvm_lapic *apic = vcpu->arch.apic;
 	int i;
 
+	if (!init_event) {
+		vcpu->arch.apic_base = APIC_DEFAULT_PHYS_BASE |
+				       MSR_IA32_APICBASE_ENABLE;
+		if (kvm_vcpu_is_reset_bsp(vcpu))
+			vcpu->arch.apic_base |= MSR_IA32_APICBASE_BSP;
+	}
+
 	if (!apic)
 		return;
 
@@ -2330,8 +2337,8 @@ void kvm_lapic_reset(struct kvm_vcpu *vcpu, bool init_event)
 	hrtimer_cancel(&apic->lapic_timer.timer);
 
 	if (!init_event) {
-		kvm_lapic_set_base(vcpu, APIC_DEFAULT_PHYS_BASE |
-		                         MSR_IA32_APICBASE_ENABLE);
+		apic->base_address = APIC_DEFAULT_PHYS_BASE;
+
 		kvm_apic_set_xapic_id(apic, vcpu->vcpu_id);
 	}
 	kvm_apic_set_version(apic->vcpu);
@@ -2364,9 +2371,7 @@ void kvm_lapic_reset(struct kvm_vcpu *vcpu, bool init_event)
 	apic->highest_isr_cache = -1;
 	update_divide_count(apic);
 	atomic_set(&apic->lapic_timer.pending, 0);
-	if (kvm_vcpu_is_bsp(vcpu))
-		kvm_lapic_set_base(vcpu,
-				vcpu->arch.apic_base | MSR_IA32_APICBASE_BSP);
+
 	vcpu->arch.pv_eoi.msr_val = 0;
 	apic_update_ppr(apic);
 	if (vcpu->arch.apicv_active) {
@@ -2476,11 +2481,6 @@ int kvm_create_lapic(struct kvm_vcpu *vcpu, int timer_advance_ns)
 		lapic_timer_advance_dynamic = false;
 	}
 
-	/*
-	 * APIC is created enabled. This will prevent kvm_lapic_set_base from
-	 * thinking that APIC state has changed.
-	 */
-	vcpu->arch.apic_base = MSR_IA32_APICBASE_ENABLE;
 	static_branch_inc(&apic_sw_disabled.key); /* sw disabled at reset */
 	kvm_iodevice_init(&apic->dev, &apic_mmio_ops);
 
diff --git a/arch/x86/kvm/mmu.h b/arch/x86/kvm/mmu.h
index 83e6c6965f1e..e9688a9f7b57 100644
--- a/arch/x86/kvm/mmu.h
+++ b/arch/x86/kvm/mmu.h
@@ -240,4 +240,29 @@ static inline bool kvm_memslots_have_rmaps(struct kvm *kvm)
 	return smp_load_acquire(&kvm->arch.memslots_have_rmaps);
 }
 
+static inline gfn_t gfn_to_index(gfn_t gfn, gfn_t base_gfn, int level)
+{
+	/* KVM_HPAGE_GFN_SHIFT(PG_LEVEL_4K) must be 0. */
+	return (gfn >> KVM_HPAGE_GFN_SHIFT(level)) -
+		(base_gfn >> KVM_HPAGE_GFN_SHIFT(level));
+}
+
+static inline unsigned long
+__kvm_mmu_slot_lpages(struct kvm_memory_slot *slot, unsigned long npages,
+		      int level)
+{
+	return gfn_to_index(slot->base_gfn + npages - 1,
+			    slot->base_gfn, level) + 1;
+}
+
+static inline unsigned long
+kvm_mmu_slot_lpages(struct kvm_memory_slot *slot, int level)
+{
+	return __kvm_mmu_slot_lpages(slot, slot->npages, level);
+}
+
+static inline void kvm_update_page_stats(struct kvm *kvm, int level, int count)
+{
+	atomic64_add(count, &kvm->stat.pages[level - 1]);
+}
 #endif
diff --git a/arch/x86/kvm/mmu/mmu.c b/arch/x86/kvm/mmu/mmu.c
index 47b765270239..2d7e61122af8 100644
--- a/arch/x86/kvm/mmu/mmu.c
+++ b/arch/x86/kvm/mmu/mmu.c
@@ -97,6 +97,7 @@ module_param_named(flush_on_reuse, force_flush_and_sync_on_reuse, bool, 0644);
 bool tdp_enabled = false;
 
 static int max_huge_page_level __read_mostly;
+static int tdp_root_level __read_mostly;
 static int max_tdp_level __read_mostly;
 
 enum {
@@ -137,12 +138,22 @@ module_param(dbg, bool, 0644);
 
 #include <trace/events/kvm.h>
 
-/* make pte_list_desc fit well in cache line */
-#define PTE_LIST_EXT 3
+/* make pte_list_desc fit well in cache lines */
+#define PTE_LIST_EXT 14
 
+/*
+ * Slight optimization of cacheline layout, by putting `more' and `spte_count'
+ * at the start; then accessing it will only use one single cacheline for
+ * either full (entries==PTE_LIST_EXT) case or entries<=6.
+ */
 struct pte_list_desc {
-	u64 *sptes[PTE_LIST_EXT];
 	struct pte_list_desc *more;
+	/*
+	 * Stores number of entries stored in the pte_list_desc.  No need to be
+	 * u64 but just for easier alignment.  When PTE_LIST_EXT, means full.
+	 */
+	u64 spte_count;
+	u64 *sptes[PTE_LIST_EXT];
 };
 
 struct kvm_shadow_walk_iterator {
@@ -193,7 +204,7 @@ struct kvm_mmu_role_regs {
  * the single source of truth for the MMU's state.
  */
 #define BUILD_MMU_ROLE_REGS_ACCESSOR(reg, name, flag)			\
-static inline bool ____is_##reg##_##name(struct kvm_mmu_role_regs *regs)\
+static inline bool __maybe_unused ____is_##reg##_##name(struct kvm_mmu_role_regs *regs)\
 {									\
 	return !!(regs->reg & flag);					\
 }
@@ -215,7 +226,7 @@ BUILD_MMU_ROLE_REGS_ACCESSOR(efer, lma, EFER_LMA);
  * and the vCPU may be incorrect/irrelevant.
  */
 #define BUILD_MMU_ROLE_ACCESSOR(base_or_ext, reg, name)		\
-static inline bool is_##reg##_##name(struct kvm_mmu *mmu)	\
+static inline bool __maybe_unused is_##reg##_##name(struct kvm_mmu *mmu)	\
 {								\
 	return !!(mmu->mmu_role. base_or_ext . reg##_##name);	\
 }
@@ -323,12 +334,6 @@ static bool check_mmio_spte(struct kvm_vcpu *vcpu, u64 spte)
 static gpa_t translate_gpa(struct kvm_vcpu *vcpu, gpa_t gpa, u32 access,
                                   struct x86_exception *exception)
 {
-	/* Check if guest physical address doesn't exceed guest maximum */
-	if (kvm_vcpu_is_illegal_gpa(vcpu, gpa)) {
-		exception->error_code |= PFERR_RSVD_MASK;
-		return UNMAPPED_GVA;
-	}
-
         return gpa;
 }
 
@@ -592,12 +597,13 @@ static bool mmu_spte_update(u64 *sptep, u64 new_spte)
  * Rules for using mmu_spte_clear_track_bits:
  * It sets the sptep from present to nonpresent, and track the
  * state bits, it is used to clear the last level sptep.
- * Returns non-zero if the PTE was previously valid.
+ * Returns the old PTE.
  */
-static int mmu_spte_clear_track_bits(u64 *sptep)
+static int mmu_spte_clear_track_bits(struct kvm *kvm, u64 *sptep)
 {
 	kvm_pfn_t pfn;
 	u64 old_spte = *sptep;
+	int level = sptep_to_sp(sptep)->role.level;
 
 	if (!spte_has_volatile_bits(old_spte))
 		__update_clear_spte_fast(sptep, 0ull);
@@ -605,7 +611,9 @@ static int mmu_spte_clear_track_bits(u64 *sptep)
 		old_spte = __update_clear_spte_slow(sptep, 0ull);
 
 	if (!is_shadow_present_pte(old_spte))
-		return 0;
+		return old_spte;
+
+	kvm_update_page_stats(kvm, level, -1);
 
 	pfn = spte_to_pfn(old_spte);
 
@@ -622,7 +630,7 @@ static int mmu_spte_clear_track_bits(u64 *sptep)
 	if (is_dirty_spte(old_spte))
 		kvm_set_pfn_dirty(pfn);
 
-	return 1;
+	return old_spte;
 }
 
 /*
@@ -686,28 +694,36 @@ static bool mmu_spte_age(u64 *sptep)
 
 static void walk_shadow_page_lockless_begin(struct kvm_vcpu *vcpu)
 {
-	/*
-	 * Prevent page table teardown by making any free-er wait during
-	 * kvm_flush_remote_tlbs() IPI to all active vcpus.
-	 */
-	local_irq_disable();
+	if (is_tdp_mmu(vcpu->arch.mmu)) {
+		kvm_tdp_mmu_walk_lockless_begin();
+	} else {
+		/*
+		 * Prevent page table teardown by making any free-er wait during
+		 * kvm_flush_remote_tlbs() IPI to all active vcpus.
+		 */
+		local_irq_disable();
 
-	/*
-	 * Make sure a following spte read is not reordered ahead of the write
-	 * to vcpu->mode.
-	 */
-	smp_store_mb(vcpu->mode, READING_SHADOW_PAGE_TABLES);
+		/*
+		 * Make sure a following spte read is not reordered ahead of the write
+		 * to vcpu->mode.
+		 */
+		smp_store_mb(vcpu->mode, READING_SHADOW_PAGE_TABLES);
+	}
 }
 
 static void walk_shadow_page_lockless_end(struct kvm_vcpu *vcpu)
 {
-	/*
-	 * Make sure the write to vcpu->mode is not reordered in front of
-	 * reads to sptes.  If it does, kvm_mmu_commit_zap_page() can see us
-	 * OUTSIDE_GUEST_MODE and proceed to free the shadow page table.
-	 */
-	smp_store_release(&vcpu->mode, OUTSIDE_GUEST_MODE);
-	local_irq_enable();
+	if (is_tdp_mmu(vcpu->arch.mmu)) {
+		kvm_tdp_mmu_walk_lockless_end();
+	} else {
+		/*
+		 * Make sure the write to vcpu->mode is not reordered in front of
+		 * reads to sptes.  If it does, kvm_mmu_commit_zap_page() can see us
+		 * OUTSIDE_GUEST_MODE and proceed to free the shadow page table.
+		 */
+		smp_store_release(&vcpu->mode, OUTSIDE_GUEST_MODE);
+		local_irq_enable();
+	}
 }
 
 static int mmu_topup_memory_caches(struct kvm_vcpu *vcpu, bool maybe_indirect)
@@ -786,7 +802,7 @@ static struct kvm_lpage_info *lpage_info_slot(gfn_t gfn,
 	return &slot->arch.lpage_info[level - 2][idx];
 }
 
-static void update_gfn_disallow_lpage_count(struct kvm_memory_slot *slot,
+static void update_gfn_disallow_lpage_count(const struct kvm_memory_slot *slot,
 					    gfn_t gfn, int count)
 {
 	struct kvm_lpage_info *linfo;
@@ -799,12 +815,12 @@ static void update_gfn_disallow_lpage_count(struct kvm_memory_slot *slot,
 	}
 }
 
-void kvm_mmu_gfn_disallow_lpage(struct kvm_memory_slot *slot, gfn_t gfn)
+void kvm_mmu_gfn_disallow_lpage(const struct kvm_memory_slot *slot, gfn_t gfn)
 {
 	update_gfn_disallow_lpage_count(slot, gfn, 1);
 }
 
-void kvm_mmu_gfn_allow_lpage(struct kvm_memory_slot *slot, gfn_t gfn)
+void kvm_mmu_gfn_allow_lpage(const struct kvm_memory_slot *slot, gfn_t gfn)
 {
 	update_gfn_disallow_lpage_count(slot, gfn, -1);
 }
@@ -893,7 +909,7 @@ static int pte_list_add(struct kvm_vcpu *vcpu, u64 *spte,
 			struct kvm_rmap_head *rmap_head)
 {
 	struct pte_list_desc *desc;
-	int i, count = 0;
+	int count = 0;
 
 	if (!rmap_head->val) {
 		rmap_printk("%p %llx 0->1\n", spte, *spte);
@@ -903,24 +919,24 @@ static int pte_list_add(struct kvm_vcpu *vcpu, u64 *spte,
 		desc = mmu_alloc_pte_list_desc(vcpu);
 		desc->sptes[0] = (u64 *)rmap_head->val;
 		desc->sptes[1] = spte;
+		desc->spte_count = 2;
 		rmap_head->val = (unsigned long)desc | 1;
 		++count;
 	} else {
 		rmap_printk("%p %llx many->many\n", spte, *spte);
 		desc = (struct pte_list_desc *)(rmap_head->val & ~1ul);
-		while (desc->sptes[PTE_LIST_EXT-1]) {
+		while (desc->spte_count == PTE_LIST_EXT) {
 			count += PTE_LIST_EXT;
-
 			if (!desc->more) {
 				desc->more = mmu_alloc_pte_list_desc(vcpu);
 				desc = desc->more;
+				desc->spte_count = 0;
 				break;
 			}
 			desc = desc->more;
 		}
-		for (i = 0; desc->sptes[i]; ++i)
-			++count;
-		desc->sptes[i] = spte;
+		count += desc->spte_count;
+		desc->sptes[desc->spte_count++] = spte;
 	}
 	return count;
 }
@@ -930,13 +946,12 @@ pte_list_desc_remove_entry(struct kvm_rmap_head *rmap_head,
 			   struct pte_list_desc *desc, int i,
 			   struct pte_list_desc *prev_desc)
 {
-	int j;
+	int j = desc->spte_count - 1;
 
-	for (j = PTE_LIST_EXT - 1; !desc->sptes[j] && j > i; --j)
-		;
 	desc->sptes[i] = desc->sptes[j];
 	desc->sptes[j] = NULL;
-	if (j != 0)
+	desc->spte_count--;
+	if (desc->spte_count)
 		return;
 	if (!prev_desc && !desc->more)
 		rmap_head->val = 0;
@@ -969,7 +984,7 @@ static void __pte_list_remove(u64 *spte, struct kvm_rmap_head *rmap_head)
 		desc = (struct pte_list_desc *)(rmap_head->val & ~1ul);
 		prev_desc = NULL;
 		while (desc) {
-			for (i = 0; i < PTE_LIST_EXT && desc->sptes[i]; ++i) {
+			for (i = 0; i < desc->spte_count; ++i) {
 				if (desc->sptes[i] == spte) {
 					pte_list_desc_remove_entry(rmap_head,
 							desc, i, prev_desc);
@@ -984,30 +999,68 @@ static void __pte_list_remove(u64 *spte, struct kvm_rmap_head *rmap_head)
 	}
 }
 
-static void pte_list_remove(struct kvm_rmap_head *rmap_head, u64 *sptep)
+static void pte_list_remove(struct kvm *kvm, struct kvm_rmap_head *rmap_head,
+			    u64 *sptep)
 {
-	mmu_spte_clear_track_bits(sptep);
+	mmu_spte_clear_track_bits(kvm, sptep);
 	__pte_list_remove(sptep, rmap_head);
 }
 
-static struct kvm_rmap_head *__gfn_to_rmap(gfn_t gfn, int level,
-					   struct kvm_memory_slot *slot)
+/* Return true if rmap existed, false otherwise */
+static bool pte_list_destroy(struct kvm *kvm, struct kvm_rmap_head *rmap_head)
 {
-	unsigned long idx;
+	struct pte_list_desc *desc, *next;
+	int i;
 
-	idx = gfn_to_index(gfn, slot->base_gfn, level);
-	return &slot->arch.rmap[level - PG_LEVEL_4K][idx];
+	if (!rmap_head->val)
+		return false;
+
+	if (!(rmap_head->val & 1)) {
+		mmu_spte_clear_track_bits(kvm, (u64 *)rmap_head->val);
+		goto out;
+	}
+
+	desc = (struct pte_list_desc *)(rmap_head->val & ~1ul);
+
+	for (; desc; desc = next) {
+		for (i = 0; i < desc->spte_count; i++)
+			mmu_spte_clear_track_bits(kvm, desc->sptes[i]);
+		next = desc->more;
+		mmu_free_pte_list_desc(desc);
+	}
+out:
+	/* rmap_head is meaningless now, remember to reset it */
+	rmap_head->val = 0;
+	return true;
 }
 
-static struct kvm_rmap_head *gfn_to_rmap(struct kvm *kvm, gfn_t gfn,
-					 struct kvm_mmu_page *sp)
+unsigned int pte_list_count(struct kvm_rmap_head *rmap_head)
 {
-	struct kvm_memslots *slots;
-	struct kvm_memory_slot *slot;
+	struct pte_list_desc *desc;
+	unsigned int count = 0;
 
-	slots = kvm_memslots_for_spte_role(kvm, sp->role);
-	slot = __gfn_to_memslot(slots, gfn);
-	return __gfn_to_rmap(gfn, sp->role.level, slot);
+	if (!rmap_head->val)
+		return 0;
+	else if (!(rmap_head->val & 1))
+		return 1;
+
+	desc = (struct pte_list_desc *)(rmap_head->val & ~1ul);
+
+	while (desc) {
+		count += desc->spte_count;
+		desc = desc->more;
+	}
+
+	return count;
+}
+
+static struct kvm_rmap_head *gfn_to_rmap(gfn_t gfn, int level,
+					 const struct kvm_memory_slot *slot)
+{
+	unsigned long idx;
+
+	idx = gfn_to_index(gfn, slot->base_gfn, level);
+	return &slot->arch.rmap[level - PG_LEVEL_4K][idx];
 }
 
 static bool rmap_can_add(struct kvm_vcpu *vcpu)
@@ -1020,24 +1073,39 @@ static bool rmap_can_add(struct kvm_vcpu *vcpu)
 
 static int rmap_add(struct kvm_vcpu *vcpu, u64 *spte, gfn_t gfn)
 {
+	struct kvm_memory_slot *slot;
 	struct kvm_mmu_page *sp;
 	struct kvm_rmap_head *rmap_head;
 
 	sp = sptep_to_sp(spte);
 	kvm_mmu_page_set_gfn(sp, spte - sp->spt, gfn);
-	rmap_head = gfn_to_rmap(vcpu->kvm, gfn, sp);
+	slot = kvm_vcpu_gfn_to_memslot(vcpu, gfn);
+	rmap_head = gfn_to_rmap(gfn, sp->role.level, slot);
 	return pte_list_add(vcpu, spte, rmap_head);
 }
 
+
 static void rmap_remove(struct kvm *kvm, u64 *spte)
 {
+	struct kvm_memslots *slots;
+	struct kvm_memory_slot *slot;
 	struct kvm_mmu_page *sp;
 	gfn_t gfn;
 	struct kvm_rmap_head *rmap_head;
 
 	sp = sptep_to_sp(spte);
 	gfn = kvm_mmu_page_get_gfn(sp, spte - sp->spt);
-	rmap_head = gfn_to_rmap(kvm, gfn, sp);
+
+	/*
+	 * Unlike rmap_add and rmap_recycle, rmap_remove does not run in the
+	 * context of a vCPU so have to determine which memslots to use based
+	 * on context information in sp->role.
+	 */
+	slots = kvm_memslots_for_spte_role(kvm, sp->role);
+
+	slot = __gfn_to_memslot(slots, gfn);
+	rmap_head = gfn_to_rmap(gfn, sp->role.level, slot);
+
 	__pte_list_remove(spte, rmap_head);
 }
 
@@ -1119,7 +1187,9 @@ out:
 
 static void drop_spte(struct kvm *kvm, u64 *sptep)
 {
-	if (mmu_spte_clear_track_bits(sptep))
+	u64 old_spte = mmu_spte_clear_track_bits(kvm, sptep);
+
+	if (is_shadow_present_pte(old_spte))
 		rmap_remove(kvm, sptep);
 }
 
@@ -1129,7 +1199,6 @@ static bool __drop_large_spte(struct kvm *kvm, u64 *sptep)
 	if (is_large_pte(*sptep)) {
 		WARN_ON(sptep_to_sp(sptep)->role.level == PG_LEVEL_4K);
 		drop_spte(kvm, sptep);
-		--kvm->stat.lpages;
 		return true;
 	}
 
@@ -1218,7 +1287,7 @@ static bool spte_wrprot_for_clear_dirty(u64 *sptep)
  * Returns true iff any D or W bits were cleared.
  */
 static bool __rmap_clear_dirty(struct kvm *kvm, struct kvm_rmap_head *rmap_head,
-			       struct kvm_memory_slot *slot)
+			       const struct kvm_memory_slot *slot)
 {
 	u64 *sptep;
 	struct rmap_iterator iter;
@@ -1256,8 +1325,8 @@ static void kvm_mmu_write_protect_pt_masked(struct kvm *kvm,
 		return;
 
 	while (mask) {
-		rmap_head = __gfn_to_rmap(slot->base_gfn + gfn_offset + __ffs(mask),
-					  PG_LEVEL_4K, slot);
+		rmap_head = gfn_to_rmap(slot->base_gfn + gfn_offset + __ffs(mask),
+					PG_LEVEL_4K, slot);
 		__rmap_write_protect(kvm, rmap_head, false);
 
 		/* clear the first set bit */
@@ -1289,8 +1358,8 @@ static void kvm_mmu_clear_dirty_pt_masked(struct kvm *kvm,
 		return;
 
 	while (mask) {
-		rmap_head = __gfn_to_rmap(slot->base_gfn + gfn_offset + __ffs(mask),
-					  PG_LEVEL_4K, slot);
+		rmap_head = gfn_to_rmap(slot->base_gfn + gfn_offset + __ffs(mask),
+					PG_LEVEL_4K, slot);
 		__rmap_clear_dirty(kvm, rmap_head, slot);
 
 		/* clear the first set bit */
@@ -1356,7 +1425,7 @@ bool kvm_mmu_slot_gfn_write_protect(struct kvm *kvm,
 
 	if (kvm_memslots_have_rmaps(kvm)) {
 		for (i = min_level; i <= KVM_MAX_HUGEPAGE_LEVEL; ++i) {
-			rmap_head = __gfn_to_rmap(gfn, i, slot);
+			rmap_head = gfn_to_rmap(gfn, i, slot);
 			write_protected |= __rmap_write_protect(kvm, rmap_head, true);
 		}
 	}
@@ -1377,20 +1446,9 @@ static bool rmap_write_protect(struct kvm_vcpu *vcpu, u64 gfn)
 }
 
 static bool kvm_zap_rmapp(struct kvm *kvm, struct kvm_rmap_head *rmap_head,
-			  struct kvm_memory_slot *slot)
+			  const struct kvm_memory_slot *slot)
 {
-	u64 *sptep;
-	struct rmap_iterator iter;
-	bool flush = false;
-
-	while ((sptep = rmap_get_first(rmap_head, &iter))) {
-		rmap_printk("spte %p %llx.\n", sptep, *sptep);
-
-		pte_list_remove(rmap_head, sptep);
-		flush = true;
-	}
-
-	return flush;
+	return pte_list_destroy(kvm, rmap_head);
 }
 
 static bool kvm_unmap_rmapp(struct kvm *kvm, struct kvm_rmap_head *rmap_head,
@@ -1421,13 +1479,13 @@ restart:
 		need_flush = 1;
 
 		if (pte_write(pte)) {
-			pte_list_remove(rmap_head, sptep);
+			pte_list_remove(kvm, rmap_head, sptep);
 			goto restart;
 		} else {
 			new_spte = kvm_mmu_changed_pte_notifier_make_spte(
 					*sptep, new_pfn);
 
-			mmu_spte_clear_track_bits(sptep);
+			mmu_spte_clear_track_bits(kvm, sptep);
 			mmu_spte_set(sptep, new_spte);
 		}
 	}
@@ -1442,7 +1500,7 @@ restart:
 
 struct slot_rmap_walk_iterator {
 	/* input fields. */
-	struct kvm_memory_slot *slot;
+	const struct kvm_memory_slot *slot;
 	gfn_t start_gfn;
 	gfn_t end_gfn;
 	int start_level;
@@ -1462,14 +1520,13 @@ rmap_walk_init_level(struct slot_rmap_walk_iterator *iterator, int level)
 {
 	iterator->level = level;
 	iterator->gfn = iterator->start_gfn;
-	iterator->rmap = __gfn_to_rmap(iterator->gfn, level, iterator->slot);
-	iterator->end_rmap = __gfn_to_rmap(iterator->end_gfn, level,
-					   iterator->slot);
+	iterator->rmap = gfn_to_rmap(iterator->gfn, level, iterator->slot);
+	iterator->end_rmap = gfn_to_rmap(iterator->end_gfn, level, iterator->slot);
 }
 
 static void
 slot_rmap_walk_init(struct slot_rmap_walk_iterator *iterator,
-		    struct kvm_memory_slot *slot, int start_level,
+		    const struct kvm_memory_slot *slot, int start_level,
 		    int end_level, gfn_t start_gfn, gfn_t end_gfn)
 {
 	iterator->slot = slot;
@@ -1584,12 +1641,13 @@ static bool kvm_test_age_rmapp(struct kvm *kvm, struct kvm_rmap_head *rmap_head,
 
 static void rmap_recycle(struct kvm_vcpu *vcpu, u64 *spte, gfn_t gfn)
 {
+	struct kvm_memory_slot *slot;
 	struct kvm_rmap_head *rmap_head;
 	struct kvm_mmu_page *sp;
 
 	sp = sptep_to_sp(spte);
-
-	rmap_head = gfn_to_rmap(vcpu->kvm, gfn, sp);
+	slot = kvm_vcpu_gfn_to_memslot(vcpu, gfn);
+	rmap_head = gfn_to_rmap(gfn, sp->role.level, slot);
 
 	kvm_unmap_rmapp(vcpu->kvm, rmap_head, NULL, gfn, sp->role.level, __pte(0));
 	kvm_flush_remote_tlbs_with_address(vcpu->kvm, sp->gfn,
@@ -2232,8 +2290,6 @@ static int mmu_page_zap_pte(struct kvm *kvm, struct kvm_mmu_page *sp,
 	if (is_shadow_present_pte(pte)) {
 		if (is_last_spte(pte, sp->role.level)) {
 			drop_spte(kvm, spte);
-			if (is_large_pte(pte))
-				--kvm->stat.lpages;
 		} else {
 			child = to_shadow_page(pte & PT64_BASE_ADDR_MASK);
 			drop_parent_pte(child, spte);
@@ -2716,15 +2772,12 @@ static int mmu_set_spte(struct kvm_vcpu *vcpu, u64 *sptep,
 
 	pgprintk("%s: setting spte %llx\n", __func__, *sptep);
 	trace_kvm_mmu_set_spte(level, gfn, sptep);
-	if (!was_rmapped && is_large_pte(*sptep))
-		++vcpu->kvm->stat.lpages;
 
-	if (is_shadow_present_pte(*sptep)) {
-		if (!was_rmapped) {
-			rmap_count = rmap_add(vcpu, sptep, gfn);
-			if (rmap_count > RMAP_RECYCLE_THRESHOLD)
-				rmap_recycle(vcpu, sptep, gfn);
-		}
+	if (!was_rmapped) {
+		kvm_update_page_stats(vcpu->kvm, level, 1);
+		rmap_count = rmap_add(vcpu, sptep, gfn);
+		if (rmap_count > RMAP_RECYCLE_THRESHOLD)
+			rmap_recycle(vcpu, sptep, gfn);
 	}
 
 	return ret;
@@ -2852,6 +2905,7 @@ int kvm_mmu_max_mapping_level(struct kvm *kvm,
 			      kvm_pfn_t pfn, int max_level)
 {
 	struct kvm_lpage_info *linfo;
+	int host_level;
 
 	max_level = min(max_level, max_huge_page_level);
 	for ( ; max_level > PG_LEVEL_4K; max_level--) {
@@ -2863,7 +2917,8 @@ int kvm_mmu_max_mapping_level(struct kvm *kvm,
 	if (max_level == PG_LEVEL_4K)
 		return PG_LEVEL_4K;
 
-	return host_pfn_mapping_level(kvm, gfn, pfn, slot);
+	host_level = host_pfn_mapping_level(kvm, gfn, pfn, slot);
+	return min(host_level, max_level);
 }
 
 int kvm_mmu_hugepage_adjust(struct kvm_vcpu *vcpu, gfn_t gfn,
@@ -2887,17 +2942,12 @@ int kvm_mmu_hugepage_adjust(struct kvm_vcpu *vcpu, gfn_t gfn,
 	if (!slot)
 		return PG_LEVEL_4K;
 
-	level = kvm_mmu_max_mapping_level(vcpu->kvm, slot, gfn, pfn, max_level);
-	if (level == PG_LEVEL_4K)
-		return level;
-
-	*req_level = level = min(level, max_level);
-
 	/*
 	 * Enforce the iTLB multihit workaround after capturing the requested
 	 * level, which will be used to do precise, accurate accounting.
 	 */
-	if (huge_page_disallowed)
+	*req_level = level = kvm_mmu_max_mapping_level(vcpu->kvm, slot, gfn, pfn, max_level);
+	if (level == PG_LEVEL_4K || huge_page_disallowed)
 		return PG_LEVEL_4K;
 
 	/*
@@ -2965,15 +3015,16 @@ static int __direct_map(struct kvm_vcpu *vcpu, gpa_t gpa, u32 error_code,
 			break;
 
 		drop_large_spte(vcpu, it.sptep);
-		if (!is_shadow_present_pte(*it.sptep)) {
-			sp = kvm_mmu_get_page(vcpu, base_gfn, it.addr,
-					      it.level - 1, true, ACC_ALL);
-
-			link_shadow_page(vcpu, it.sptep, sp);
-			if (is_tdp && huge_page_disallowed &&
-			    req_level >= it.level)
-				account_huge_nx_page(vcpu->kvm, sp);
-		}
+		if (is_shadow_present_pte(*it.sptep))
+			continue;
+
+		sp = kvm_mmu_get_page(vcpu, base_gfn, it.addr,
+				      it.level - 1, true, ACC_ALL);
+
+		link_shadow_page(vcpu, it.sptep, sp);
+		if (is_tdp && huge_page_disallowed &&
+		    req_level >= it.level)
+			account_huge_nx_page(vcpu->kvm, sp);
 	}
 
 	ret = mmu_set_spte(vcpu, it.sptep, ACC_ALL,
@@ -3122,15 +3173,40 @@ static bool is_access_allowed(u32 fault_err_code, u64 spte)
 }
 
 /*
- * Returns one of RET_PF_INVALID, RET_PF_FIXED or RET_PF_SPURIOUS.
+ * Returns the last level spte pointer of the shadow page walk for the given
+ * gpa, and sets *spte to the spte value. This spte may be non-preset. If no
+ * walk could be performed, returns NULL and *spte does not contain valid data.
+ *
+ * Contract:
+ *  - Must be called between walk_shadow_page_lockless_{begin,end}.
+ *  - The returned sptep must not be used after walk_shadow_page_lockless_end.
  */
-static int fast_page_fault(struct kvm_vcpu *vcpu, gpa_t cr2_or_gpa,
-			   u32 error_code)
+static u64 *fast_pf_get_last_sptep(struct kvm_vcpu *vcpu, gpa_t gpa, u64 *spte)
 {
 	struct kvm_shadow_walk_iterator iterator;
+	u64 old_spte;
+	u64 *sptep = NULL;
+
+	for_each_shadow_entry_lockless(vcpu, gpa, iterator, old_spte) {
+		sptep = iterator.sptep;
+		*spte = old_spte;
+
+		if (!is_shadow_present_pte(old_spte))
+			break;
+	}
+
+	return sptep;
+}
+
+/*
+ * Returns one of RET_PF_INVALID, RET_PF_FIXED or RET_PF_SPURIOUS.
+ */
+static int fast_page_fault(struct kvm_vcpu *vcpu, gpa_t gpa, u32 error_code)
+{
 	struct kvm_mmu_page *sp;
 	int ret = RET_PF_INVALID;
 	u64 spte = 0ull;
+	u64 *sptep = NULL;
 	uint retry_count = 0;
 
 	if (!page_fault_can_be_fast(error_code))
@@ -3141,14 +3217,15 @@ static int fast_page_fault(struct kvm_vcpu *vcpu, gpa_t cr2_or_gpa,
 	do {
 		u64 new_spte;
 
-		for_each_shadow_entry_lockless(vcpu, cr2_or_gpa, iterator, spte)
-			if (!is_shadow_present_pte(spte))
-				break;
+		if (is_tdp_mmu(vcpu->arch.mmu))
+			sptep = kvm_tdp_mmu_fast_pf_get_last_sptep(vcpu, gpa, &spte);
+		else
+			sptep = fast_pf_get_last_sptep(vcpu, gpa, &spte);
 
 		if (!is_shadow_present_pte(spte))
 			break;
 
-		sp = sptep_to_sp(iterator.sptep);
+		sp = sptep_to_sp(sptep);
 		if (!is_last_spte(spte, sp->role.level))
 			break;
 
@@ -3206,8 +3283,7 @@ static int fast_page_fault(struct kvm_vcpu *vcpu, gpa_t cr2_or_gpa,
 		 * since the gfn is not stable for indirect shadow page. See
 		 * Documentation/virt/kvm/locking.rst to get more detail.
 		 */
-		if (fast_pf_fix_direct_spte(vcpu, sp, iterator.sptep, spte,
-					    new_spte)) {
+		if (fast_pf_fix_direct_spte(vcpu, sp, sptep, spte, new_spte)) {
 			ret = RET_PF_FIXED;
 			break;
 		}
@@ -3220,8 +3296,7 @@ static int fast_page_fault(struct kvm_vcpu *vcpu, gpa_t cr2_or_gpa,
 
 	} while (true);
 
-	trace_fast_page_fault(vcpu, cr2_or_gpa, error_code, iterator.sptep,
-			      spte, ret);
+	trace_fast_page_fault(vcpu, gpa, error_code, sptep, spte, ret);
 	walk_shadow_page_lockless_end(vcpu);
 
 	return ret;
@@ -3455,15 +3530,22 @@ static int mmu_alloc_shadow_roots(struct kvm_vcpu *vcpu)
 	 * the shadow page table may be a PAE or a long mode page table.
 	 */
 	pm_mask = PT_PRESENT_MASK | shadow_me_mask;
-	if (mmu->shadow_root_level == PT64_ROOT_4LEVEL) {
+	if (mmu->shadow_root_level >= PT64_ROOT_4LEVEL) {
 		pm_mask |= PT_ACCESSED_MASK | PT_WRITABLE_MASK | PT_USER_MASK;
 
 		if (WARN_ON_ONCE(!mmu->pml4_root)) {
 			r = -EIO;
 			goto out_unlock;
 		}
-
 		mmu->pml4_root[0] = __pa(mmu->pae_root) | pm_mask;
+
+		if (mmu->shadow_root_level == PT64_ROOT_5LEVEL) {
+			if (WARN_ON_ONCE(!mmu->pml5_root)) {
+				r = -EIO;
+				goto out_unlock;
+			}
+			mmu->pml5_root[0] = __pa(mmu->pml4_root) | pm_mask;
+		}
 	}
 
 	for (i = 0; i < 4; ++i) {
@@ -3482,7 +3564,9 @@ static int mmu_alloc_shadow_roots(struct kvm_vcpu *vcpu)
 		mmu->pae_root[i] = root | pm_mask;
 	}
 
-	if (mmu->shadow_root_level == PT64_ROOT_4LEVEL)
+	if (mmu->shadow_root_level == PT64_ROOT_5LEVEL)
+		mmu->root_hpa = __pa(mmu->pml5_root);
+	else if (mmu->shadow_root_level == PT64_ROOT_4LEVEL)
 		mmu->root_hpa = __pa(mmu->pml4_root);
 	else
 		mmu->root_hpa = __pa(mmu->pae_root);
@@ -3498,7 +3582,10 @@ out_unlock:
 static int mmu_alloc_special_roots(struct kvm_vcpu *vcpu)
 {
 	struct kvm_mmu *mmu = vcpu->arch.mmu;
-	u64 *pml4_root, *pae_root;
+	bool need_pml5 = mmu->shadow_root_level > PT64_ROOT_4LEVEL;
+	u64 *pml5_root = NULL;
+	u64 *pml4_root = NULL;
+	u64 *pae_root;
 
 	/*
 	 * When shadowing 32-bit or PAE NPT with 64-bit NPT, the PML4 and PDP
@@ -3511,20 +3598,21 @@ static int mmu_alloc_special_roots(struct kvm_vcpu *vcpu)
 		return 0;
 
 	/*
-	 * This mess only works with 4-level paging and needs to be updated to
-	 * work with 5-level paging.
+	 * NPT, the only paging mode that uses this horror, uses a fixed number
+	 * of levels for the shadow page tables, e.g. all MMUs are 4-level or
+	 * all MMus are 5-level.  Thus, this can safely require that pml5_root
+	 * is allocated if the other roots are valid and pml5 is needed, as any
+	 * prior MMU would also have required pml5.
 	 */
-	if (WARN_ON_ONCE(mmu->shadow_root_level != PT64_ROOT_4LEVEL))
-		return -EIO;
-
-	if (mmu->pae_root && mmu->pml4_root)
+	if (mmu->pae_root && mmu->pml4_root && (!need_pml5 || mmu->pml5_root))
 		return 0;
 
 	/*
 	 * The special roots should always be allocated in concert.  Yell and
 	 * bail if KVM ends up in a state where only one of the roots is valid.
 	 */
-	if (WARN_ON_ONCE(!tdp_enabled || mmu->pae_root || mmu->pml4_root))
+	if (WARN_ON_ONCE(!tdp_enabled || mmu->pae_root || mmu->pml4_root ||
+			 (need_pml5 && mmu->pml5_root)))
 		return -EIO;
 
 	/*
@@ -3535,16 +3623,31 @@ static int mmu_alloc_special_roots(struct kvm_vcpu *vcpu)
 	if (!pae_root)
 		return -ENOMEM;
 
+#ifdef CONFIG_X86_64
 	pml4_root = (void *)get_zeroed_page(GFP_KERNEL_ACCOUNT);
-	if (!pml4_root) {
-		free_page((unsigned long)pae_root);
-		return -ENOMEM;
+	if (!pml4_root)
+		goto err_pml4;
+
+	if (need_pml5) {
+		pml5_root = (void *)get_zeroed_page(GFP_KERNEL_ACCOUNT);
+		if (!pml5_root)
+			goto err_pml5;
 	}
+#endif
 
 	mmu->pae_root = pae_root;
 	mmu->pml4_root = pml4_root;
+	mmu->pml5_root = pml5_root;
 
 	return 0;
+
+#ifdef CONFIG_X86_64
+err_pml5:
+	free_page((unsigned long)pml4_root);
+err_pml4:
+	free_page((unsigned long)pae_root);
+	return -ENOMEM;
+#endif
 }
 
 void kvm_mmu_sync_roots(struct kvm_vcpu *vcpu)
@@ -3640,6 +3743,8 @@ static bool mmio_info_in_cache(struct kvm_vcpu *vcpu, u64 addr, bool direct)
 /*
  * Return the level of the lowest level SPTE added to sptes.
  * That SPTE may be non-present.
+ *
+ * Must be called between walk_shadow_page_lockless_{begin,end}.
  */
 static int get_walk(struct kvm_vcpu *vcpu, u64 addr, u64 *sptes, int *root_level)
 {
@@ -3647,8 +3752,6 @@ static int get_walk(struct kvm_vcpu *vcpu, u64 addr, u64 *sptes, int *root_level
 	int leaf = -1;
 	u64 spte;
 
-	walk_shadow_page_lockless_begin(vcpu);
-
 	for (shadow_walk_init(&iterator, vcpu, addr),
 	     *root_level = iterator.level;
 	     shadow_walk_okay(&iterator);
@@ -3662,8 +3765,6 @@ static int get_walk(struct kvm_vcpu *vcpu, u64 addr, u64 *sptes, int *root_level
 			break;
 	}
 
-	walk_shadow_page_lockless_end(vcpu);
-
 	return leaf;
 }
 
@@ -3675,11 +3776,15 @@ static bool get_mmio_spte(struct kvm_vcpu *vcpu, u64 addr, u64 *sptep)
 	int root, leaf, level;
 	bool reserved = false;
 
+	walk_shadow_page_lockless_begin(vcpu);
+
 	if (is_tdp_mmu(vcpu->arch.mmu))
 		leaf = kvm_tdp_mmu_get_walk(vcpu, addr, sptes, &root);
 	else
 		leaf = get_walk(vcpu, addr, sptes, &root);
 
+	walk_shadow_page_lockless_end(vcpu);
+
 	if (unlikely(leaf < 0)) {
 		*sptep = 0ull;
 		return reserved;
@@ -3795,9 +3900,9 @@ static bool kvm_arch_setup_async_pf(struct kvm_vcpu *vcpu, gpa_t cr2_or_gpa,
 				  kvm_vcpu_gfn_to_hva(vcpu, gfn), &arch);
 }
 
-static bool try_async_pf(struct kvm_vcpu *vcpu, bool prefault, gfn_t gfn,
+static bool kvm_faultin_pfn(struct kvm_vcpu *vcpu, bool prefault, gfn_t gfn,
 			 gpa_t cr2_or_gpa, kvm_pfn_t *pfn, hva_t *hva,
-			 bool write, bool *writable)
+			 bool write, bool *writable, int *r)
 {
 	struct kvm_memory_slot *slot = kvm_vcpu_gfn_to_memslot(vcpu, gfn);
 	bool async;
@@ -3808,13 +3913,26 @@ static bool try_async_pf(struct kvm_vcpu *vcpu, bool prefault, gfn_t gfn,
 	 * be zapped before KVM inserts a new MMIO SPTE for the gfn.
 	 */
 	if (slot && (slot->flags & KVM_MEMSLOT_INVALID))
-		return true;
-
-	/* Don't expose private memslots to L2. */
-	if (is_guest_mode(vcpu) && !kvm_is_visible_memslot(slot)) {
-		*pfn = KVM_PFN_NOSLOT;
-		*writable = false;
-		return false;
+		goto out_retry;
+
+	if (!kvm_is_visible_memslot(slot)) {
+		/* Don't expose private memslots to L2. */
+		if (is_guest_mode(vcpu)) {
+			*pfn = KVM_PFN_NOSLOT;
+			*writable = false;
+			return false;
+		}
+		/*
+		 * If the APIC access page exists but is disabled, go directly
+		 * to emulation without caching the MMIO access or creating a
+		 * MMIO SPTE.  That way the cache doesn't need to be purged
+		 * when the AVIC is re-enabled.
+		 */
+		if (slot && slot->id == APIC_ACCESS_PAGE_PRIVATE_MEMSLOT &&
+		    !kvm_apicv_activated(vcpu->kvm)) {
+			*r = RET_PF_EMULATE;
+			return true;
+		}
 	}
 
 	async = false;
@@ -3828,14 +3946,17 @@ static bool try_async_pf(struct kvm_vcpu *vcpu, bool prefault, gfn_t gfn,
 		if (kvm_find_async_pf_gfn(vcpu, gfn)) {
 			trace_kvm_async_pf_doublefault(cr2_or_gpa, gfn);
 			kvm_make_request(KVM_REQ_APF_HALT, vcpu);
-			return true;
+			goto out_retry;
 		} else if (kvm_arch_setup_async_pf(vcpu, cr2_or_gpa, gfn))
-			return true;
+			goto out_retry;
 	}
 
 	*pfn = __gfn_to_pfn_memslot(slot, gfn, false, NULL,
 				    write, writable, hva);
-	return false;
+
+out_retry:
+	*r = RET_PF_RETRY;
+	return true;
 }
 
 static int direct_page_fault(struct kvm_vcpu *vcpu, gpa_t gpa, u32 error_code,
@@ -3854,11 +3975,9 @@ static int direct_page_fault(struct kvm_vcpu *vcpu, gpa_t gpa, u32 error_code,
 	if (page_fault_handle_page_track(vcpu, error_code, gfn))
 		return RET_PF_EMULATE;
 
-	if (!is_tdp_mmu_fault) {
-		r = fast_page_fault(vcpu, gpa, error_code);
-		if (r != RET_PF_INVALID)
-			return r;
-	}
+	r = fast_page_fault(vcpu, gpa, error_code);
+	if (r != RET_PF_INVALID)
+		return r;
 
 	r = mmu_topup_memory_caches(vcpu, false);
 	if (r)
@@ -3867,9 +3986,9 @@ static int direct_page_fault(struct kvm_vcpu *vcpu, gpa_t gpa, u32 error_code,
 	mmu_seq = vcpu->kvm->mmu_notifier_seq;
 	smp_rmb();
 
-	if (try_async_pf(vcpu, prefault, gfn, gpa, &pfn, &hva,
-			 write, &map_writable))
-		return RET_PF_RETRY;
+	if (kvm_faultin_pfn(vcpu, prefault, gfn, gpa, &pfn, &hva,
+			 write, &map_writable, &r))
+		return r;
 
 	if (handle_abnormal_pfn(vcpu, is_tdp ? 0 : gpa, gfn, pfn, ACC_ALL, &r))
 		return r;
@@ -4588,6 +4707,10 @@ static union kvm_mmu_role kvm_calc_mmu_role_common(struct kvm_vcpu *vcpu,
 
 static inline int kvm_mmu_get_tdp_level(struct kvm_vcpu *vcpu)
 {
+	/* tdp_root_level is architecture forced level, use it if nonzero */
+	if (tdp_root_level)
+		return tdp_root_level;
+
 	/* Use 5-level TDP if and only if it's useful/necessary. */
 	if (max_tdp_level == 5 && cpuid_maxphyaddr(vcpu) <= 48)
 		return 4;
@@ -5160,7 +5283,7 @@ int kvm_mmu_page_fault(struct kvm_vcpu *vcpu, gpa_t cr2_or_gpa, u64 error_code,
 	if (r == RET_PF_INVALID) {
 		r = kvm_mmu_do_page_fault(vcpu, cr2_or_gpa,
 					  lower_32_bits(error_code), false);
-		if (WARN_ON_ONCE(r == RET_PF_INVALID))
+		if (KVM_BUG_ON(r == RET_PF_INVALID, vcpu->kvm))
 			return -EIO;
 	}
 
@@ -5279,10 +5402,11 @@ void kvm_mmu_invpcid_gva(struct kvm_vcpu *vcpu, gva_t gva, unsigned long pcid)
 	 */
 }
 
-void kvm_configure_mmu(bool enable_tdp, int tdp_max_root_level,
-		       int tdp_huge_page_level)
+void kvm_configure_mmu(bool enable_tdp, int tdp_forced_root_level,
+		       int tdp_max_root_level, int tdp_huge_page_level)
 {
 	tdp_enabled = enable_tdp;
+	tdp_root_level = tdp_forced_root_level;
 	max_tdp_level = tdp_max_root_level;
 
 	/*
@@ -5302,12 +5426,13 @@ void kvm_configure_mmu(bool enable_tdp, int tdp_max_root_level,
 EXPORT_SYMBOL_GPL(kvm_configure_mmu);
 
 /* The return value indicates if tlb flush on all vcpus is needed. */
-typedef bool (*slot_level_handler) (struct kvm *kvm, struct kvm_rmap_head *rmap_head,
-				    struct kvm_memory_slot *slot);
+typedef bool (*slot_level_handler) (struct kvm *kvm,
+				    struct kvm_rmap_head *rmap_head,
+				    const struct kvm_memory_slot *slot);
 
 /* The caller should hold mmu-lock before calling this function. */
 static __always_inline bool
-slot_handle_level_range(struct kvm *kvm, struct kvm_memory_slot *memslot,
+slot_handle_level_range(struct kvm *kvm, const struct kvm_memory_slot *memslot,
 			slot_level_handler fn, int start_level, int end_level,
 			gfn_t start_gfn, gfn_t end_gfn, bool flush_on_yield,
 			bool flush)
@@ -5334,7 +5459,7 @@ slot_handle_level_range(struct kvm *kvm, struct kvm_memory_slot *memslot,
 }
 
 static __always_inline bool
-slot_handle_level(struct kvm *kvm, struct kvm_memory_slot *memslot,
+slot_handle_level(struct kvm *kvm, const struct kvm_memory_slot *memslot,
 		  slot_level_handler fn, int start_level, int end_level,
 		  bool flush_on_yield)
 {
@@ -5345,7 +5470,7 @@ slot_handle_level(struct kvm *kvm, struct kvm_memory_slot *memslot,
 }
 
 static __always_inline bool
-slot_handle_leaf(struct kvm *kvm, struct kvm_memory_slot *memslot,
+slot_handle_leaf(struct kvm *kvm, const struct kvm_memory_slot *memslot,
 		 slot_level_handler fn, bool flush_on_yield)
 {
 	return slot_handle_level(kvm, memslot, fn, PG_LEVEL_4K,
@@ -5358,6 +5483,7 @@ static void free_mmu_pages(struct kvm_mmu *mmu)
 		set_memory_encrypted((unsigned long)mmu->pae_root, 1);
 	free_page((unsigned long)mmu->pae_root);
 	free_page((unsigned long)mmu->pml4_root);
+	free_page((unsigned long)mmu->pml5_root);
 }
 
 static int __kvm_mmu_create(struct kvm_vcpu *vcpu, struct kvm_mmu *mmu)
@@ -5587,6 +5713,10 @@ void kvm_mmu_uninit_vm(struct kvm *kvm)
 	kvm_mmu_uninit_tdp_mmu(kvm);
 }
 
+/*
+ * Invalidate (zap) SPTEs that cover GFNs from gfn_start and up to gfn_end
+ * (not including it)
+ */
 void kvm_zap_gfn_range(struct kvm *kvm, gfn_t gfn_start, gfn_t gfn_end)
 {
 	struct kvm_memslots *slots;
@@ -5594,8 +5724,11 @@ void kvm_zap_gfn_range(struct kvm *kvm, gfn_t gfn_start, gfn_t gfn_end)
 	int i;
 	bool flush = false;
 
+	write_lock(&kvm->mmu_lock);
+
+	kvm_inc_notifier_count(kvm, gfn_start, gfn_end);
+
 	if (kvm_memslots_have_rmaps(kvm)) {
-		write_lock(&kvm->mmu_lock);
 		for (i = 0; i < KVM_ADDRESS_SPACE_NUM; i++) {
 			slots = __kvm_memslots(kvm, i);
 			kvm_for_each_memslot(memslot, slots) {
@@ -5606,41 +5739,44 @@ void kvm_zap_gfn_range(struct kvm *kvm, gfn_t gfn_start, gfn_t gfn_end)
 				if (start >= end)
 					continue;
 
-				flush = slot_handle_level_range(kvm, memslot,
+				flush = slot_handle_level_range(kvm,
+						(const struct kvm_memory_slot *) memslot,
 						kvm_zap_rmapp, PG_LEVEL_4K,
 						KVM_MAX_HUGEPAGE_LEVEL, start,
 						end - 1, true, flush);
 			}
 		}
 		if (flush)
-			kvm_flush_remote_tlbs_with_address(kvm, gfn_start, gfn_end);
-		write_unlock(&kvm->mmu_lock);
+			kvm_flush_remote_tlbs_with_address(kvm, gfn_start,
+							   gfn_end - gfn_start);
 	}
 
 	if (is_tdp_mmu_enabled(kvm)) {
-		flush = false;
-
-		read_lock(&kvm->mmu_lock);
 		for (i = 0; i < KVM_ADDRESS_SPACE_NUM; i++)
 			flush = kvm_tdp_mmu_zap_gfn_range(kvm, i, gfn_start,
-							  gfn_end, flush, true);
+							  gfn_end, flush);
 		if (flush)
 			kvm_flush_remote_tlbs_with_address(kvm, gfn_start,
-							   gfn_end);
-
-		read_unlock(&kvm->mmu_lock);
+							   gfn_end - gfn_start);
 	}
+
+	if (flush)
+		kvm_flush_remote_tlbs_with_address(kvm, gfn_start, gfn_end);
+
+	kvm_dec_notifier_count(kvm, gfn_start, gfn_end);
+
+	write_unlock(&kvm->mmu_lock);
 }
 
 static bool slot_rmap_write_protect(struct kvm *kvm,
 				    struct kvm_rmap_head *rmap_head,
-				    struct kvm_memory_slot *slot)
+				    const struct kvm_memory_slot *slot)
 {
 	return __rmap_write_protect(kvm, rmap_head, false);
 }
 
 void kvm_mmu_slot_remove_write_access(struct kvm *kvm,
-				      struct kvm_memory_slot *memslot,
+				      const struct kvm_memory_slot *memslot,
 				      int start_level)
 {
 	bool flush = false;
@@ -5676,7 +5812,7 @@ void kvm_mmu_slot_remove_write_access(struct kvm *kvm,
 
 static bool kvm_mmu_zap_collapsible_spte(struct kvm *kvm,
 					 struct kvm_rmap_head *rmap_head,
-					 struct kvm_memory_slot *slot)
+					 const struct kvm_memory_slot *slot)
 {
 	u64 *sptep;
 	struct rmap_iterator iter;
@@ -5699,7 +5835,7 @@ restart:
 		if (sp->role.direct && !kvm_is_reserved_pfn(pfn) &&
 		    sp->role.level < kvm_mmu_max_mapping_level(kvm, slot, sp->gfn,
 							       pfn, PG_LEVEL_NUM)) {
-			pte_list_remove(rmap_head, sptep);
+			pte_list_remove(kvm, rmap_head, sptep);
 
 			if (kvm_available_flush_tlb_with_range())
 				kvm_flush_remote_tlbs_with_address(kvm, sp->gfn,
@@ -5715,10 +5851,8 @@ restart:
 }
 
 void kvm_mmu_zap_collapsible_sptes(struct kvm *kvm,
-				   const struct kvm_memory_slot *memslot)
+				   const struct kvm_memory_slot *slot)
 {
-	/* FIXME: const-ify all uses of struct kvm_memory_slot.  */
-	struct kvm_memory_slot *slot = (struct kvm_memory_slot *)memslot;
 	bool flush = false;
 
 	if (kvm_memslots_have_rmaps(kvm)) {
@@ -5754,7 +5888,7 @@ void kvm_arch_flush_remote_tlbs_memslot(struct kvm *kvm,
 }
 
 void kvm_mmu_slot_leaf_clear_dirty(struct kvm *kvm,
-				   struct kvm_memory_slot *memslot)
+				   const struct kvm_memory_slot *memslot)
 {
 	bool flush = false;
 
diff --git a/arch/x86/kvm/mmu/mmu_audit.c b/arch/x86/kvm/mmu/mmu_audit.c
index cedc17b2f60e..9e7dcf999f08 100644
--- a/arch/x86/kvm/mmu/mmu_audit.c
+++ b/arch/x86/kvm/mmu/mmu_audit.c
@@ -147,7 +147,7 @@ static void inspect_spte_has_rmap(struct kvm *kvm, u64 *sptep)
 		return;
 	}
 
-	rmap_head = __gfn_to_rmap(gfn, rev_sp->role.level, slot);
+	rmap_head = gfn_to_rmap(gfn, rev_sp->role.level, slot);
 	if (!rmap_head->val) {
 		if (!__ratelimit(&ratelimit_state))
 			return;
@@ -200,7 +200,7 @@ static void audit_write_protection(struct kvm *kvm, struct kvm_mmu_page *sp)
 
 	slots = kvm_memslots_for_spte_role(kvm, sp->role);
 	slot = __gfn_to_memslot(slots, sp->gfn);
-	rmap_head = __gfn_to_rmap(sp->gfn, PG_LEVEL_4K, slot);
+	rmap_head = gfn_to_rmap(sp->gfn, PG_LEVEL_4K, slot);
 
 	for_each_rmap_spte(rmap_head, &iter, sptep) {
 		if (is_writable_pte(*sptep))
diff --git a/arch/x86/kvm/mmu/mmu_internal.h b/arch/x86/kvm/mmu/mmu_internal.h
index 35567293c1fd..bf2bdbf333c2 100644
--- a/arch/x86/kvm/mmu/mmu_internal.h
+++ b/arch/x86/kvm/mmu/mmu_internal.h
@@ -31,13 +31,16 @@ extern bool dbg;
 #define IS_VALID_PAE_ROOT(x)	(!!(x))
 
 struct kvm_mmu_page {
+	/*
+	 * Note, "link" through "spt" fit in a single 64 byte cache line on
+	 * 64-bit kernels, keep it that way unless there's a reason not to.
+	 */
 	struct list_head link;
 	struct hlist_node hash_link;
-	struct list_head lpage_disallowed_link;
 
+	bool tdp_mmu_page;
 	bool unsync;
 	u8 mmu_valid_gen;
-	bool mmio_cached;
 	bool lpage_disallowed; /* Can't be replaced by an equiv large page */
 
 	/*
@@ -59,6 +62,7 @@ struct kvm_mmu_page {
 	struct kvm_rmap_head parent_ptes; /* rmap pointers to parent sptes */
 	DECLARE_BITMAP(unsync_child_bitmap, 512);
 
+	struct list_head lpage_disallowed_link;
 #ifdef CONFIG_X86_32
 	/*
 	 * Used out of the mmu-lock to avoid reading spte values while an
@@ -71,8 +75,6 @@ struct kvm_mmu_page {
 	atomic_t write_flooding_count;
 
 #ifdef CONFIG_X86_64
-	bool tdp_mmu_page;
-
 	/* Used for freeing the page asynchronously if it is a TDP MMU page. */
 	struct rcu_head rcu_head;
 #endif
@@ -124,13 +126,14 @@ static inline bool is_nx_huge_page_enabled(void)
 
 int mmu_try_to_unsync_pages(struct kvm_vcpu *vcpu, gfn_t gfn, bool can_unsync);
 
-void kvm_mmu_gfn_disallow_lpage(struct kvm_memory_slot *slot, gfn_t gfn);
-void kvm_mmu_gfn_allow_lpage(struct kvm_memory_slot *slot, gfn_t gfn);
+void kvm_mmu_gfn_disallow_lpage(const struct kvm_memory_slot *slot, gfn_t gfn);
+void kvm_mmu_gfn_allow_lpage(const struct kvm_memory_slot *slot, gfn_t gfn);
 bool kvm_mmu_slot_gfn_write_protect(struct kvm *kvm,
 				    struct kvm_memory_slot *slot, u64 gfn,
 				    int min_level);
 void kvm_flush_remote_tlbs_with_address(struct kvm *kvm,
 					u64 start_gfn, u64 pages);
+unsigned int pte_list_count(struct kvm_rmap_head *rmap_head);
 
 /*
  * Return values of handle_mmio_page_fault, mmu.page_fault, and fast_page_fault().
@@ -140,6 +143,9 @@ void kvm_flush_remote_tlbs_with_address(struct kvm *kvm,
  * RET_PF_INVALID: the spte is invalid, let the real page fault path update it.
  * RET_PF_FIXED: The faulting entry has been fixed.
  * RET_PF_SPURIOUS: The faulting entry was already fixed, e.g. by another vCPU.
+ *
+ * Any names added to this enum should be exported to userspace for use in
+ * tracepoints via TRACE_DEFINE_ENUM() in mmutrace.h
  */
 enum {
 	RET_PF_RETRY = 0,
diff --git a/arch/x86/kvm/mmu/mmutrace.h b/arch/x86/kvm/mmu/mmutrace.h
index efbad33a0645..2924a4081a19 100644
--- a/arch/x86/kvm/mmu/mmutrace.h
+++ b/arch/x86/kvm/mmu/mmutrace.h
@@ -54,6 +54,12 @@
 	{ PFERR_RSVD_MASK, "RSVD" },	\
 	{ PFERR_FETCH_MASK, "F" }
 
+TRACE_DEFINE_ENUM(RET_PF_RETRY);
+TRACE_DEFINE_ENUM(RET_PF_EMULATE);
+TRACE_DEFINE_ENUM(RET_PF_INVALID);
+TRACE_DEFINE_ENUM(RET_PF_FIXED);
+TRACE_DEFINE_ENUM(RET_PF_SPURIOUS);
+
 /*
  * A pagetable walk has started
  */
diff --git a/arch/x86/kvm/mmu/page_track.c b/arch/x86/kvm/mmu/page_track.c
index 91a9f7e0fd91..269f11f92fd0 100644
--- a/arch/x86/kvm/mmu/page_track.c
+++ b/arch/x86/kvm/mmu/page_track.c
@@ -16,6 +16,7 @@
 
 #include <asm/kvm_page_track.h>
 
+#include "mmu.h"
 #include "mmu_internal.h"
 
 void kvm_page_track_free_memslot(struct kvm_memory_slot *slot)
diff --git a/arch/x86/kvm/mmu/paging_tmpl.h b/arch/x86/kvm/mmu/paging_tmpl.h
index ee044d357b5f..7d03e9b7ccfa 100644
--- a/arch/x86/kvm/mmu/paging_tmpl.h
+++ b/arch/x86/kvm/mmu/paging_tmpl.h
@@ -881,9 +881,9 @@ static int FNAME(page_fault)(struct kvm_vcpu *vcpu, gpa_t addr, u32 error_code,
 	mmu_seq = vcpu->kvm->mmu_notifier_seq;
 	smp_rmb();
 
-	if (try_async_pf(vcpu, prefault, walker.gfn, addr, &pfn, &hva,
-			 write_fault, &map_writable))
-		return RET_PF_RETRY;
+	if (kvm_faultin_pfn(vcpu, prefault, walker.gfn, addr, &pfn, &hva,
+			 write_fault, &map_writable, &r))
+		return r;
 
 	if (handle_abnormal_pfn(vcpu, addr, walker.gfn, pfn, walker.pte_access, &r))
 		return r;
diff --git a/arch/x86/kvm/mmu/tdp_mmu.c b/arch/x86/kvm/mmu/tdp_mmu.c
index d80cb122b5f3..64ccfc1fa553 100644
--- a/arch/x86/kvm/mmu/tdp_mmu.c
+++ b/arch/x86/kvm/mmu/tdp_mmu.c
@@ -10,7 +10,7 @@
 #include <asm/cmpxchg.h>
 #include <trace/events/kvm.h>
 
-static bool __read_mostly tdp_mmu_enabled = false;
+static bool __read_mostly tdp_mmu_enabled = true;
 module_param_named(tdp_mmu, tdp_mmu_enabled, bool, 0644);
 
 /* Initializes the TDP MMU for the VM, if enabled. */
@@ -255,26 +255,17 @@ static void handle_changed_spte_dirty_log(struct kvm *kvm, int as_id, gfn_t gfn,
  *
  * @kvm: kvm instance
  * @sp: the new page
- * @shared: This operation may not be running under the exclusive use of
- *	    the MMU lock and the operation must synchronize with other
- *	    threads that might be adding or removing pages.
  * @account_nx: This page replaces a NX large page and should be marked for
  *		eventual reclaim.
  */
 static void tdp_mmu_link_page(struct kvm *kvm, struct kvm_mmu_page *sp,
-			      bool shared, bool account_nx)
+			      bool account_nx)
 {
-	if (shared)
-		spin_lock(&kvm->arch.tdp_mmu_pages_lock);
-	else
-		lockdep_assert_held_write(&kvm->mmu_lock);
-
+	spin_lock(&kvm->arch.tdp_mmu_pages_lock);
 	list_add(&sp->link, &kvm->arch.tdp_mmu_pages);
 	if (account_nx)
 		account_huge_nx_page(kvm, sp);
-
-	if (shared)
-		spin_unlock(&kvm->arch.tdp_mmu_pages_lock);
+	spin_unlock(&kvm->arch.tdp_mmu_pages_lock);
 }
 
 /**
@@ -445,13 +436,6 @@ static void __handle_changed_spte(struct kvm *kvm, int as_id, gfn_t gfn,
 
 	trace_kvm_tdp_mmu_spte_changed(as_id, gfn, level, old_spte, new_spte);
 
-	if (is_large_pte(old_spte) != is_large_pte(new_spte)) {
-		if (is_large_pte(old_spte))
-			atomic64_sub(1, (atomic64_t*)&kvm->stat.lpages);
-		else
-			atomic64_add(1, (atomic64_t*)&kvm->stat.lpages);
-	}
-
 	/*
 	 * The only times a SPTE should be changed from a non-present to
 	 * non-present state is when an MMIO entry is installed/modified/
@@ -477,6 +461,8 @@ static void __handle_changed_spte(struct kvm *kvm, int as_id, gfn_t gfn,
 		return;
 	}
 
+	if (is_leaf != was_leaf)
+		kvm_update_page_stats(kvm, level, is_leaf ? 1 : -1);
 
 	if (was_leaf && is_dirty_spte(old_spte) &&
 	    (!is_present || !is_dirty_spte(new_spte) || pfn_changed))
@@ -526,6 +512,10 @@ static inline bool tdp_mmu_set_spte_atomic_no_dirty_log(struct kvm *kvm,
 	if (is_removed_spte(iter->old_spte))
 		return false;
 
+	/*
+	 * Note, fast_pf_fix_direct_spte() can also modify TDP MMU SPTEs and
+	 * does not hold the mmu_lock.
+	 */
 	if (cmpxchg64(rcu_dereference(iter->sptep), iter->old_spte,
 		      new_spte) != iter->old_spte)
 		return false;
@@ -537,15 +527,40 @@ static inline bool tdp_mmu_set_spte_atomic_no_dirty_log(struct kvm *kvm,
 	return true;
 }
 
-static inline bool tdp_mmu_set_spte_atomic(struct kvm *kvm,
-					   struct tdp_iter *iter,
-					   u64 new_spte)
+/*
+ * tdp_mmu_map_set_spte_atomic - Set a leaf TDP MMU SPTE atomically to resolve a
+ * TDP page fault.
+ *
+ * @vcpu: The vcpu instance that took the TDP page fault.
+ * @iter: a tdp_iter instance currently on the SPTE that should be set
+ * @new_spte: The value the SPTE should be set to
+ *
+ * Returns: true if the SPTE was set, false if it was not. If false is returned,
+ *	    this function will have no side-effects.
+ */
+static inline bool tdp_mmu_map_set_spte_atomic(struct kvm_vcpu *vcpu,
+					       struct tdp_iter *iter,
+					       u64 new_spte)
 {
+	struct kvm *kvm = vcpu->kvm;
+
 	if (!tdp_mmu_set_spte_atomic_no_dirty_log(kvm, iter, new_spte))
 		return false;
 
-	handle_changed_spte_dirty_log(kvm, iter->as_id, iter->gfn,
-				      iter->old_spte, new_spte, iter->level);
+	/*
+	 * Use kvm_vcpu_gfn_to_memslot() instead of going through
+	 * handle_changed_spte_dirty_log() to leverage vcpu->last_used_slot.
+	 */
+	if (is_writable_pte(new_spte)) {
+		struct kvm_memory_slot *slot = kvm_vcpu_gfn_to_memslot(vcpu, iter->gfn);
+
+		if (slot && kvm_slot_dirty_track_enabled(slot)) {
+			/* Enforced by kvm_mmu_hugepage_adjust. */
+			WARN_ON_ONCE(iter->level > PG_LEVEL_4K);
+			mark_page_dirty_in_slot(kvm, slot, iter->gfn);
+		}
+	}
+
 	return true;
 }
 
@@ -558,7 +573,7 @@ static inline bool tdp_mmu_zap_spte_atomic(struct kvm *kvm,
 	 * immediately installing a present entry in its place
 	 * before the TLBs are flushed.
 	 */
-	if (!tdp_mmu_set_spte_atomic(kvm, iter, REMOVED_SPTE))
+	if (!tdp_mmu_set_spte_atomic_no_dirty_log(kvm, iter, REMOVED_SPTE))
 		return false;
 
 	kvm_flush_remote_tlbs_with_address(kvm, iter->gfn,
@@ -789,21 +804,15 @@ retry:
  * non-root pages mapping GFNs strictly within that range. Returns true if
  * SPTEs have been cleared and a TLB flush is needed before releasing the
  * MMU lock.
- *
- * If shared is true, this thread holds the MMU lock in read mode and must
- * account for the possibility that other threads are modifying the paging
- * structures concurrently. If shared is false, this thread should hold the
- * MMU in write mode.
  */
 bool __kvm_tdp_mmu_zap_gfn_range(struct kvm *kvm, int as_id, gfn_t start,
-				 gfn_t end, bool can_yield, bool flush,
-				 bool shared)
+				 gfn_t end, bool can_yield, bool flush)
 {
 	struct kvm_mmu_page *root;
 
-	for_each_tdp_mmu_root_yield_safe(kvm, root, as_id, shared)
+	for_each_tdp_mmu_root_yield_safe(kvm, root, as_id, false)
 		flush = zap_gfn_range(kvm, root, start, end, can_yield, flush,
-				      shared);
+				      false);
 
 	return flush;
 }
@@ -814,8 +823,7 @@ void kvm_tdp_mmu_zap_all(struct kvm *kvm)
 	int i;
 
 	for (i = 0; i < KVM_ADDRESS_SPACE_NUM; i++)
-		flush = kvm_tdp_mmu_zap_gfn_range(kvm, i, 0, -1ull,
-						  flush, false);
+		flush = kvm_tdp_mmu_zap_gfn_range(kvm, i, 0, -1ull, flush);
 
 	if (flush)
 		kvm_flush_remote_tlbs(kvm);
@@ -940,7 +948,7 @@ static int tdp_mmu_map_handle_target_level(struct kvm_vcpu *vcpu, int write,
 
 	if (new_spte == iter->old_spte)
 		ret = RET_PF_SPURIOUS;
-	else if (!tdp_mmu_set_spte_atomic(vcpu->kvm, iter, new_spte))
+	else if (!tdp_mmu_map_set_spte_atomic(vcpu, iter, new_spte))
 		return RET_PF_RETRY;
 
 	/*
@@ -1044,9 +1052,8 @@ int kvm_tdp_mmu_map(struct kvm_vcpu *vcpu, gpa_t gpa, u32 error_code,
 			new_spte = make_nonleaf_spte(child_pt,
 						     !shadow_accessed_mask);
 
-			if (tdp_mmu_set_spte_atomic(vcpu->kvm, &iter,
-						    new_spte)) {
-				tdp_mmu_link_page(vcpu->kvm, sp, true,
+			if (tdp_mmu_set_spte_atomic_no_dirty_log(vcpu->kvm, &iter, new_spte)) {
+				tdp_mmu_link_page(vcpu->kvm, sp,
 						  huge_page_disallowed &&
 						  req_level >= iter.level);
 
@@ -1255,8 +1262,8 @@ retry:
  * only affect leaf SPTEs down to min_level.
  * Returns true if an SPTE has been changed and the TLBs need to be flushed.
  */
-bool kvm_tdp_mmu_wrprot_slot(struct kvm *kvm, struct kvm_memory_slot *slot,
-			     int min_level)
+bool kvm_tdp_mmu_wrprot_slot(struct kvm *kvm,
+			     const struct kvm_memory_slot *slot, int min_level)
 {
 	struct kvm_mmu_page *root;
 	bool spte_set = false;
@@ -1326,7 +1333,8 @@ retry:
  * each SPTE. Returns true if an SPTE has been changed and the TLBs need to
  * be flushed.
  */
-bool kvm_tdp_mmu_clear_dirty_slot(struct kvm *kvm, struct kvm_memory_slot *slot)
+bool kvm_tdp_mmu_clear_dirty_slot(struct kvm *kvm,
+				  const struct kvm_memory_slot *slot)
 {
 	struct kvm_mmu_page *root;
 	bool spte_set = false;
@@ -1529,6 +1537,8 @@ bool kvm_tdp_mmu_write_protect_gfn(struct kvm *kvm,
 /*
  * Return the level of the lowest level SPTE added to sptes.
  * That SPTE may be non-present.
+ *
+ * Must be called between kvm_tdp_mmu_walk_lockless_{begin,end}.
  */
 int kvm_tdp_mmu_get_walk(struct kvm_vcpu *vcpu, u64 addr, u64 *sptes,
 			 int *root_level)
@@ -1540,14 +1550,47 @@ int kvm_tdp_mmu_get_walk(struct kvm_vcpu *vcpu, u64 addr, u64 *sptes,
 
 	*root_level = vcpu->arch.mmu->shadow_root_level;
 
-	rcu_read_lock();
-
 	tdp_mmu_for_each_pte(iter, mmu, gfn, gfn + 1) {
 		leaf = iter.level;
 		sptes[leaf] = iter.old_spte;
 	}
 
-	rcu_read_unlock();
-
 	return leaf;
 }
+
+/*
+ * Returns the last level spte pointer of the shadow page walk for the given
+ * gpa, and sets *spte to the spte value. This spte may be non-preset. If no
+ * walk could be performed, returns NULL and *spte does not contain valid data.
+ *
+ * Contract:
+ *  - Must be called between kvm_tdp_mmu_walk_lockless_{begin,end}.
+ *  - The returned sptep must not be used after kvm_tdp_mmu_walk_lockless_end.
+ *
+ * WARNING: This function is only intended to be called during fast_page_fault.
+ */
+u64 *kvm_tdp_mmu_fast_pf_get_last_sptep(struct kvm_vcpu *vcpu, u64 addr,
+					u64 *spte)
+{
+	struct tdp_iter iter;
+	struct kvm_mmu *mmu = vcpu->arch.mmu;
+	gfn_t gfn = addr >> PAGE_SHIFT;
+	tdp_ptep_t sptep = NULL;
+
+	tdp_mmu_for_each_pte(iter, mmu, gfn, gfn + 1) {
+		*spte = iter.old_spte;
+		sptep = iter.sptep;
+	}
+
+	/*
+	 * Perform the rcu_dereference to get the raw spte pointer value since
+	 * we are passing it up to fast_page_fault, which is shared with the
+	 * legacy MMU and thus does not retain the TDP MMU-specific __rcu
+	 * annotation.
+	 *
+	 * This is safe since fast_page_fault obeys the contracts of this
+	 * function as well as all TDP MMU contracts around modifying SPTEs
+	 * outside of mmu_lock.
+	 */
+	return rcu_dereference(sptep);
+}
diff --git a/arch/x86/kvm/mmu/tdp_mmu.h b/arch/x86/kvm/mmu/tdp_mmu.h
index 1cae4485b3bc..358f447d4012 100644
--- a/arch/x86/kvm/mmu/tdp_mmu.h
+++ b/arch/x86/kvm/mmu/tdp_mmu.h
@@ -20,14 +20,11 @@ void kvm_tdp_mmu_put_root(struct kvm *kvm, struct kvm_mmu_page *root,
 			  bool shared);
 
 bool __kvm_tdp_mmu_zap_gfn_range(struct kvm *kvm, int as_id, gfn_t start,
-				 gfn_t end, bool can_yield, bool flush,
-				 bool shared);
+				 gfn_t end, bool can_yield, bool flush);
 static inline bool kvm_tdp_mmu_zap_gfn_range(struct kvm *kvm, int as_id,
-					     gfn_t start, gfn_t end, bool flush,
-					     bool shared)
+					     gfn_t start, gfn_t end, bool flush)
 {
-	return __kvm_tdp_mmu_zap_gfn_range(kvm, as_id, start, end, true, flush,
-					   shared);
+	return __kvm_tdp_mmu_zap_gfn_range(kvm, as_id, start, end, true, flush);
 }
 static inline bool kvm_tdp_mmu_zap_sp(struct kvm *kvm, struct kvm_mmu_page *sp)
 {
@@ -44,7 +41,7 @@ static inline bool kvm_tdp_mmu_zap_sp(struct kvm *kvm, struct kvm_mmu_page *sp)
 	 */
 	lockdep_assert_held_write(&kvm->mmu_lock);
 	return __kvm_tdp_mmu_zap_gfn_range(kvm, kvm_mmu_page_as_id(sp),
-					   sp->gfn, end, false, false, false);
+					   sp->gfn, end, false, false);
 }
 
 void kvm_tdp_mmu_zap_all(struct kvm *kvm);
@@ -61,10 +58,10 @@ bool kvm_tdp_mmu_age_gfn_range(struct kvm *kvm, struct kvm_gfn_range *range);
 bool kvm_tdp_mmu_test_age_gfn(struct kvm *kvm, struct kvm_gfn_range *range);
 bool kvm_tdp_mmu_set_spte_gfn(struct kvm *kvm, struct kvm_gfn_range *range);
 
-bool kvm_tdp_mmu_wrprot_slot(struct kvm *kvm, struct kvm_memory_slot *slot,
-			     int min_level);
+bool kvm_tdp_mmu_wrprot_slot(struct kvm *kvm,
+			     const struct kvm_memory_slot *slot, int min_level);
 bool kvm_tdp_mmu_clear_dirty_slot(struct kvm *kvm,
-				  struct kvm_memory_slot *slot);
+				  const struct kvm_memory_slot *slot);
 void kvm_tdp_mmu_clear_dirty_pt_masked(struct kvm *kvm,
 				       struct kvm_memory_slot *slot,
 				       gfn_t gfn, unsigned long mask,
@@ -77,8 +74,20 @@ bool kvm_tdp_mmu_write_protect_gfn(struct kvm *kvm,
 				   struct kvm_memory_slot *slot, gfn_t gfn,
 				   int min_level);
 
+static inline void kvm_tdp_mmu_walk_lockless_begin(void)
+{
+	rcu_read_lock();
+}
+
+static inline void kvm_tdp_mmu_walk_lockless_end(void)
+{
+	rcu_read_unlock();
+}
+
 int kvm_tdp_mmu_get_walk(struct kvm_vcpu *vcpu, u64 addr, u64 *sptes,
 			 int *root_level);
+u64 *kvm_tdp_mmu_fast_pf_get_last_sptep(struct kvm_vcpu *vcpu, u64 addr,
+					u64 *spte);
 
 #ifdef CONFIG_X86_64
 bool kvm_mmu_init_tdp_mmu(struct kvm *kvm);
diff --git a/arch/x86/kvm/pmu.c b/arch/x86/kvm/pmu.c
index 827886c12c16..0772bad9165c 100644
--- a/arch/x86/kvm/pmu.c
+++ b/arch/x86/kvm/pmu.c
@@ -137,18 +137,20 @@ static void pmc_reprogram_counter(struct kvm_pmc *pmc, u32 type,
 	pmc->perf_event = event;
 	pmc_to_pmu(pmc)->event_count++;
 	clear_bit(pmc->idx, pmc_to_pmu(pmc)->reprogram_pmi);
+	pmc->is_paused = false;
 }
 
 static void pmc_pause_counter(struct kvm_pmc *pmc)
 {
 	u64 counter = pmc->counter;
 
-	if (!pmc->perf_event)
+	if (!pmc->perf_event || pmc->is_paused)
 		return;
 
 	/* update counter, reset event value to avoid redundant accumulation */
 	counter += perf_event_pause(pmc->perf_event, true);
 	pmc->counter = counter & pmc_bitmask(pmc);
+	pmc->is_paused = true;
 }
 
 static bool pmc_resume_counter(struct kvm_pmc *pmc)
@@ -163,6 +165,7 @@ static bool pmc_resume_counter(struct kvm_pmc *pmc)
 
 	/* reuse perf_event to serve as pmc_reprogram_counter() does*/
 	perf_event_enable(pmc->perf_event);
+	pmc->is_paused = false;
 
 	clear_bit(pmc->idx, (unsigned long *)&pmc_to_pmu(pmc)->reprogram_pmi);
 	return true;
diff --git a/arch/x86/kvm/pmu.h b/arch/x86/kvm/pmu.h
index 67e753edfa22..0e4f2b1fa9fb 100644
--- a/arch/x86/kvm/pmu.h
+++ b/arch/x86/kvm/pmu.h
@@ -55,7 +55,7 @@ static inline u64 pmc_read_counter(struct kvm_pmc *pmc)
 	u64 counter, enabled, running;
 
 	counter = pmc->counter;
-	if (pmc->perf_event)
+	if (pmc->perf_event && !pmc->is_paused)
 		counter += perf_event_read_value(pmc->perf_event,
 						 &enabled, &running);
 	/* FIXME: Scaling needed? */
diff --git a/arch/x86/kvm/svm/avic.c b/arch/x86/kvm/svm/avic.c
index a8ad78a2faa1..8052d92069e0 100644
--- a/arch/x86/kvm/svm/avic.c
+++ b/arch/x86/kvm/svm/avic.c
@@ -197,6 +197,8 @@ void avic_init_vmcb(struct vcpu_svm *svm)
 	vmcb->control.avic_logical_id = lpa & AVIC_HPA_MASK;
 	vmcb->control.avic_physical_id = ppa & AVIC_HPA_MASK;
 	vmcb->control.avic_physical_id |= AVIC_MAX_PHYSICAL_ID_COUNT;
+	vmcb->control.avic_vapic_bar = APIC_DEFAULT_PHYS_BASE & VMCB_AVIC_APIC_BAR_MASK;
+
 	if (kvm_apicv_activated(svm->vcpu.kvm))
 		vmcb->control.int_ctl |= AVIC_ENABLE_MASK;
 	else
@@ -225,31 +227,26 @@ static u64 *avic_get_physical_id_entry(struct kvm_vcpu *vcpu,
  * field of the VMCB. Therefore, we set up the
  * APIC_ACCESS_PAGE_PRIVATE_MEMSLOT (4KB) here.
  */
-static int avic_update_access_page(struct kvm *kvm, bool activate)
+static int avic_alloc_access_page(struct kvm *kvm)
 {
 	void __user *ret;
 	int r = 0;
 
 	mutex_lock(&kvm->slots_lock);
-	/*
-	 * During kvm_destroy_vm(), kvm_pit_set_reinject() could trigger
-	 * APICv mode change, which update APIC_ACCESS_PAGE_PRIVATE_MEMSLOT
-	 * memory region. So, we need to ensure that kvm->mm == current->mm.
-	 */
-	if ((kvm->arch.apic_access_memslot_enabled == activate) ||
-	    (kvm->mm != current->mm))
+
+	if (kvm->arch.apic_access_memslot_enabled)
 		goto out;
 
 	ret = __x86_set_memory_region(kvm,
 				      APIC_ACCESS_PAGE_PRIVATE_MEMSLOT,
 				      APIC_DEFAULT_PHYS_BASE,
-				      activate ? PAGE_SIZE : 0);
+				      PAGE_SIZE);
 	if (IS_ERR(ret)) {
 		r = PTR_ERR(ret);
 		goto out;
 	}
 
-	kvm->arch.apic_access_memslot_enabled = activate;
+	kvm->arch.apic_access_memslot_enabled = true;
 out:
 	mutex_unlock(&kvm->slots_lock);
 	return r;
@@ -270,7 +267,7 @@ static int avic_init_backing_page(struct kvm_vcpu *vcpu)
 	if (kvm_apicv_activated(vcpu->kvm)) {
 		int ret;
 
-		ret = avic_update_access_page(vcpu->kvm, true);
+		ret = avic_alloc_access_page(vcpu->kvm);
 		if (ret)
 			return ret;
 	}
@@ -587,17 +584,6 @@ void avic_post_state_restore(struct kvm_vcpu *vcpu)
 	avic_handle_ldr_update(vcpu);
 }
 
-void svm_toggle_avic_for_irq_window(struct kvm_vcpu *vcpu, bool activate)
-{
-	if (!enable_apicv || !lapic_in_kernel(vcpu))
-		return;
-
-	srcu_read_unlock(&vcpu->kvm->srcu, vcpu->srcu_idx);
-	kvm_request_apicv_update(vcpu->kvm, activate,
-				 APICV_INHIBIT_REASON_IRQWIN);
-	vcpu->srcu_idx = srcu_read_lock(&vcpu->kvm->srcu);
-}
-
 void svm_set_virtual_apic_mode(struct kvm_vcpu *vcpu)
 {
 	return;
@@ -667,6 +653,11 @@ void svm_refresh_apicv_exec_ctrl(struct kvm_vcpu *vcpu)
 	}
 	vmcb_mark_dirty(vmcb, VMCB_AVIC);
 
+	if (activated)
+		avic_vcpu_load(vcpu, vcpu->cpu);
+	else
+		avic_vcpu_put(vcpu);
+
 	svm_set_pi_irte_mode(vcpu, activated);
 }
 
@@ -918,10 +909,6 @@ bool svm_check_apicv_inhibit_reasons(ulong bit)
 	return supported & BIT(bit);
 }
 
-void svm_pre_update_apicv_exec_ctrl(struct kvm *kvm, bool activate)
-{
-	avic_update_access_page(kvm, activate);
-}
 
 static inline int
 avic_update_iommu_vcpu_affinity(struct kvm_vcpu *vcpu, int cpu, bool r)
@@ -960,9 +947,6 @@ void avic_vcpu_load(struct kvm_vcpu *vcpu, int cpu)
 	int h_physical_id = kvm_cpu_get_apicid(cpu);
 	struct vcpu_svm *svm = to_svm(vcpu);
 
-	if (!kvm_vcpu_apicv_active(vcpu))
-		return;
-
 	/*
 	 * Since the host physical APIC id is 8 bits,
 	 * we can support host APIC ID upto 255.
@@ -990,9 +974,6 @@ void avic_vcpu_put(struct kvm_vcpu *vcpu)
 	u64 entry;
 	struct vcpu_svm *svm = to_svm(vcpu);
 
-	if (!kvm_vcpu_apicv_active(vcpu))
-		return;
-
 	entry = READ_ONCE(*(svm->avic_physical_id_cache));
 	if (entry & AVIC_PHYSICAL_ID_ENTRY_IS_RUNNING_MASK)
 		avic_update_iommu_vcpu_affinity(vcpu, -1, 0);
@@ -1009,6 +990,10 @@ static void avic_set_running(struct kvm_vcpu *vcpu, bool is_run)
 	struct vcpu_svm *svm = to_svm(vcpu);
 
 	svm->avic_is_running = is_run;
+
+	if (!kvm_vcpu_apicv_active(vcpu))
+		return;
+
 	if (is_run)
 		avic_vcpu_load(vcpu, vcpu->cpu);
 	else
diff --git a/arch/x86/kvm/svm/nested.c b/arch/x86/kvm/svm/nested.c
index e5515477c30a..2545d0c61985 100644
--- a/arch/x86/kvm/svm/nested.c
+++ b/arch/x86/kvm/svm/nested.c
@@ -666,11 +666,6 @@ int nested_svm_vmrun(struct kvm_vcpu *vcpu)
 		goto out;
 	}
 
-
-	/* Clear internal status */
-	kvm_clear_exception_queue(vcpu);
-	kvm_clear_interrupt_queue(vcpu);
-
 	/*
 	 * Since vmcb01 is not in use, we can use it to store some of the L1
 	 * state.
diff --git a/arch/x86/kvm/svm/sev.c b/arch/x86/kvm/svm/sev.c
index 7fbce342eec4..75e0b21ad07c 100644
--- a/arch/x86/kvm/svm/sev.c
+++ b/arch/x86/kvm/svm/sev.c
@@ -28,8 +28,6 @@
 #include "cpuid.h"
 #include "trace.h"
 
-#define __ex(x) __kvm_handle_fault_on_reboot(x)
-
 #ifndef CONFIG_KVM_AMD_SEV
 /*
  * When this config is not defined, SEV feature is not supported and APIs in
@@ -584,6 +582,7 @@ static int sev_es_sync_vmsa(struct vcpu_svm *svm)
 	save->xcr0 = svm->vcpu.arch.xcr0;
 	save->pkru = svm->vcpu.arch.pkru;
 	save->xss  = svm->vcpu.arch.ia32_xss;
+	save->dr6  = svm->vcpu.arch.dr6;
 
 	/*
 	 * SEV-ES will use a VMSA that is pointed to by the VMCB, not
diff --git a/arch/x86/kvm/svm/svm.c b/arch/x86/kvm/svm/svm.c
index 69639f9624f5..05e8d4d27969 100644
--- a/arch/x86/kvm/svm/svm.c
+++ b/arch/x86/kvm/svm/svm.c
@@ -46,8 +46,6 @@
 #include "kvm_onhyperv.h"
 #include "svm_onhyperv.h"
 
-#define __ex(x) __kvm_handle_fault_on_reboot(x)
-
 MODULE_AUTHOR("Qumranet");
 MODULE_LICENSE("GPL");
 
@@ -261,7 +259,7 @@ u32 svm_msrpm_offset(u32 msr)
 static int get_max_npt_level(void)
 {
 #ifdef CONFIG_X86_64
-	return PT64_ROOT_4LEVEL;
+	return pgtable_l5_enabled() ? PT64_ROOT_5LEVEL : PT64_ROOT_4LEVEL;
 #else
 	return PT32E_ROOT_LEVEL;
 #endif
@@ -462,11 +460,6 @@ static int has_svm(void)
 		return 0;
 	}
 
-	if (pgtable_l5_enabled()) {
-		pr_info("KVM doesn't yet support 5-level paging on AMD SVM\n");
-		return 0;
-	}
-
 	return 1;
 }
 
@@ -1015,7 +1008,9 @@ static __init int svm_hardware_setup(void)
 	if (!boot_cpu_has(X86_FEATURE_NPT))
 		npt_enabled = false;
 
-	kvm_configure_mmu(npt_enabled, get_max_npt_level(), PG_LEVEL_1G);
+	/* Force VM NPT level equal to the host's max NPT level */
+	kvm_configure_mmu(npt_enabled, get_max_npt_level(),
+			  get_max_npt_level(), PG_LEVEL_1G);
 	pr_info("kvm: Nested Paging %sabled\n", npt_enabled ? "en" : "dis");
 
 	/* Note, SEV setup consumes npt_enabled. */
@@ -1161,8 +1156,6 @@ static void init_vmcb(struct kvm_vcpu *vcpu)
 	struct vmcb_control_area *control = &svm->vmcb->control;
 	struct vmcb_save_area *save = &svm->vmcb->save;
 
-	vcpu->arch.hflags = 0;
-
 	svm_set_intercept(svm, INTERCEPT_CR0_READ);
 	svm_set_intercept(svm, INTERCEPT_CR3_READ);
 	svm_set_intercept(svm, INTERCEPT_CR4_READ);
@@ -1241,29 +1234,14 @@ static void init_vmcb(struct kvm_vcpu *vcpu)
 		SVM_SELECTOR_S_MASK | SVM_SELECTOR_CODE_MASK;
 	save->cs.limit = 0xffff;
 
+	save->gdtr.base = 0;
 	save->gdtr.limit = 0xffff;
+	save->idtr.base = 0;
 	save->idtr.limit = 0xffff;
 
 	init_sys_seg(&save->ldtr, SEG_TYPE_LDT);
 	init_sys_seg(&save->tr, SEG_TYPE_BUSY_TSS16);
 
-	svm_set_cr4(vcpu, 0);
-	svm_set_efer(vcpu, 0);
-	save->dr6 = 0xffff0ff0;
-	kvm_set_rflags(vcpu, X86_EFLAGS_FIXED);
-	save->rip = 0x0000fff0;
-	vcpu->arch.regs[VCPU_REGS_RIP] = save->rip;
-
-	/*
-	 * svm_set_cr0() sets PG and WP and clears NW and CD on save->cr0.
-	 * It also updates the guest-visible cr0 value.
-	 */
-	svm_set_cr0(vcpu, X86_CR0_NW | X86_CR0_CD | X86_CR0_ET);
-	kvm_mmu_reset_context(vcpu);
-
-	save->cr4 = X86_CR4_PAE;
-	/* rdx = ?? */
-
 	if (npt_enabled) {
 		/* Setup VMCB for Nested Paging */
 		control->nested_ctl |= SVM_NESTED_CTL_NP_ENABLE;
@@ -1273,14 +1251,12 @@ static void init_vmcb(struct kvm_vcpu *vcpu)
 		svm_clr_intercept(svm, INTERCEPT_CR3_WRITE);
 		save->g_pat = vcpu->arch.pat;
 		save->cr3 = 0;
-		save->cr4 = 0;
 	}
 	svm->current_vmcb->asid_generation = 0;
 	svm->asid = 0;
 
 	svm->nested.vmcb12_gpa = INVALID_GPA;
 	svm->nested.last_vmcb12_gpa = INVALID_GPA;
-	vcpu->arch.hflags = 0;
 
 	if (!kvm_pause_in_guest(vcpu->kvm)) {
 		control->pause_filter_count = pause_filter_count;
@@ -1330,25 +1306,11 @@ static void init_vmcb(struct kvm_vcpu *vcpu)
 static void svm_vcpu_reset(struct kvm_vcpu *vcpu, bool init_event)
 {
 	struct vcpu_svm *svm = to_svm(vcpu);
-	u32 dummy;
-	u32 eax = 1;
 
 	svm->spec_ctrl = 0;
 	svm->virt_spec_ctrl = 0;
 
-	if (!init_event) {
-		vcpu->arch.apic_base = APIC_DEFAULT_PHYS_BASE |
-				       MSR_IA32_APICBASE_ENABLE;
-		if (kvm_vcpu_is_reset_bsp(vcpu))
-			vcpu->arch.apic_base |= MSR_IA32_APICBASE_BSP;
-	}
 	init_vmcb(vcpu);
-
-	kvm_cpuid(vcpu, &eax, &dummy, &dummy, &dummy, false);
-	kvm_rdx_write(vcpu, eax);
-
-	if (kvm_vcpu_apicv_active(vcpu) && !init_event)
-		avic_update_vapic_bar(svm, APIC_DEFAULT_PHYS_BASE);
 }
 
 void svm_switch_vmcb(struct vcpu_svm *svm, struct kvm_vmcb_info *target_vmcb)
@@ -1513,12 +1475,15 @@ static void svm_vcpu_load(struct kvm_vcpu *vcpu, int cpu)
 		sd->current_vmcb = svm->vmcb;
 		indirect_branch_prediction_barrier();
 	}
-	avic_vcpu_load(vcpu, cpu);
+	if (kvm_vcpu_apicv_active(vcpu))
+		avic_vcpu_load(vcpu, cpu);
 }
 
 static void svm_vcpu_put(struct kvm_vcpu *vcpu)
 {
-	avic_vcpu_put(vcpu);
+	if (kvm_vcpu_apicv_active(vcpu))
+		avic_vcpu_put(vcpu);
+
 	svm_prepare_host_switch(vcpu);
 
 	++vcpu->stat.host_state_reload;
@@ -1560,7 +1525,7 @@ static void svm_cache_reg(struct kvm_vcpu *vcpu, enum kvm_reg reg)
 		load_pdptrs(vcpu, vcpu->arch.walk_mmu, kvm_read_cr3(vcpu));
 		break;
 	default:
-		WARN_ON_ONCE(1);
+		KVM_BUG_ON(1, vcpu->kvm);
 	}
 }
 
@@ -2078,11 +2043,15 @@ static int shutdown_interception(struct kvm_vcpu *vcpu)
 		return -EINVAL;
 
 	/*
-	 * VMCB is undefined after a SHUTDOWN intercept
-	 * so reinitialize it.
+	 * VMCB is undefined after a SHUTDOWN intercept.  INIT the vCPU to put
+	 * the VMCB in a known good state.  Unfortuately, KVM doesn't have
+	 * KVM_MP_STATE_SHUTDOWN and can't add it without potentially breaking
+	 * userspace.  At a platform view, INIT is acceptable behavior as
+	 * there exist bare metal platforms that automatically INIT the CPU
+	 * in response to shutdown.
 	 */
 	clear_page(svm->vmcb);
-	init_vmcb(vcpu);
+	kvm_vcpu_reset(vcpu, true);
 
 	kvm_run->exit_reason = KVM_EXIT_SHUTDOWN;
 	return 0;
@@ -2993,10 +2962,6 @@ static int svm_set_msr(struct kvm_vcpu *vcpu, struct msr_data *msr)
 		svm->msr_decfg = data;
 		break;
 	}
-	case MSR_IA32_APICBASE:
-		if (kvm_vcpu_apicv_active(vcpu))
-			avic_update_vapic_bar(to_svm(vcpu), data);
-		fallthrough;
 	default:
 		return kvm_set_msr_common(vcpu, msr);
 	}
@@ -3021,7 +2986,7 @@ static int interrupt_window_interception(struct kvm_vcpu *vcpu)
 	 * In this case AVIC was temporarily disabled for
 	 * requesting the IRQ window and we have to re-enable it.
 	 */
-	svm_toggle_avic_for_irq_window(vcpu, true);
+	kvm_request_apicv_update(vcpu->kvm, true, APICV_INHIBIT_REASON_IRQWIN);
 
 	++vcpu->stat.irq_window_exits;
 	return 1;
@@ -3269,12 +3234,14 @@ static void dump_vmcb(struct kvm_vcpu *vcpu)
 	       "excp_to:", save->last_excp_to);
 }
 
-static int svm_handle_invalid_exit(struct kvm_vcpu *vcpu, u64 exit_code)
+static bool svm_check_exit_valid(struct kvm_vcpu *vcpu, u64 exit_code)
 {
-	if (exit_code < ARRAY_SIZE(svm_exit_handlers) &&
-	    svm_exit_handlers[exit_code])
-		return 0;
+	return (exit_code < ARRAY_SIZE(svm_exit_handlers) &&
+		svm_exit_handlers[exit_code]);
+}
 
+static int svm_handle_invalid_exit(struct kvm_vcpu *vcpu, u64 exit_code)
+{
 	vcpu_unimpl(vcpu, "svm: unexpected exit reason 0x%llx\n", exit_code);
 	dump_vmcb(vcpu);
 	vcpu->run->exit_reason = KVM_EXIT_INTERNAL_ERROR;
@@ -3282,14 +3249,13 @@ static int svm_handle_invalid_exit(struct kvm_vcpu *vcpu, u64 exit_code)
 	vcpu->run->internal.ndata = 2;
 	vcpu->run->internal.data[0] = exit_code;
 	vcpu->run->internal.data[1] = vcpu->arch.last_vmentry_cpu;
-
-	return -EINVAL;
+	return 0;
 }
 
 int svm_invoke_exit_handler(struct kvm_vcpu *vcpu, u64 exit_code)
 {
-	if (svm_handle_invalid_exit(vcpu, exit_code))
-		return 0;
+	if (!svm_check_exit_valid(vcpu, exit_code))
+		return svm_handle_invalid_exit(vcpu, exit_code);
 
 #ifdef CONFIG_RETPOLINE
 	if (exit_code == SVM_EXIT_MSR)
@@ -3573,7 +3539,7 @@ static void svm_enable_irq_window(struct kvm_vcpu *vcpu)
 		 * via AVIC. In such case, we need to temporarily disable AVIC,
 		 * and fallback to injecting IRQ via V_IRQ.
 		 */
-		svm_toggle_avic_for_irq_window(vcpu, false);
+		kvm_request_apicv_update(vcpu->kvm, false, APICV_INHIBIT_REASON_IRQWIN);
 		svm_set_vintr(svm);
 	}
 }
@@ -3808,6 +3774,8 @@ static __no_kcsan fastpath_t svm_vcpu_run(struct kvm_vcpu *vcpu)
 
 	pre_svm_run(vcpu);
 
+	WARN_ON_ONCE(kvm_apicv_activated(vcpu->kvm) != kvm_vcpu_apicv_active(vcpu));
+
 	sync_lapic_to_cr8(vcpu);
 
 	if (unlikely(svm->asid != svm->vmcb->control.asid)) {
@@ -4610,7 +4578,6 @@ static struct kvm_x86_ops svm_x86_ops __initdata = {
 	.set_virtual_apic_mode = svm_set_virtual_apic_mode,
 	.refresh_apicv_exec_ctrl = svm_refresh_apicv_exec_ctrl,
 	.check_apicv_inhibit_reasons = svm_check_apicv_inhibit_reasons,
-	.pre_update_apicv_exec_ctrl = svm_pre_update_apicv_exec_ctrl,
 	.load_eoi_exitmap = svm_load_eoi_exitmap,
 	.hwapic_irr_update = svm_hwapic_irr_update,
 	.hwapic_isr_update = svm_hwapic_isr_update,
diff --git a/arch/x86/kvm/svm/svm.h b/arch/x86/kvm/svm/svm.h
index bd0fe94c2920..524d943f3efc 100644
--- a/arch/x86/kvm/svm/svm.h
+++ b/arch/x86/kvm/svm/svm.h
@@ -503,12 +503,6 @@ extern struct kvm_x86_nested_ops svm_nested_ops;
 
 #define VMCB_AVIC_APIC_BAR_MASK		0xFFFFFFFFFF000ULL
 
-static inline void avic_update_vapic_bar(struct vcpu_svm *svm, u64 data)
-{
-	svm->vmcb->control.avic_vapic_bar = data & VMCB_AVIC_APIC_BAR_MASK;
-	vmcb_mark_dirty(svm->vmcb, VMCB_AVIC);
-}
-
 static inline bool avic_vcpu_is_running(struct kvm_vcpu *vcpu)
 {
 	struct vcpu_svm *svm = to_svm(vcpu);
@@ -524,7 +518,6 @@ int avic_ga_log_notifier(u32 ga_tag);
 void avic_vm_destroy(struct kvm *kvm);
 int avic_vm_init(struct kvm *kvm);
 void avic_init_vmcb(struct vcpu_svm *svm);
-void svm_toggle_avic_for_irq_window(struct kvm_vcpu *vcpu, bool activate);
 int avic_incomplete_ipi_interception(struct kvm_vcpu *vcpu);
 int avic_unaccelerated_access_interception(struct kvm_vcpu *vcpu);
 int avic_init_vcpu(struct vcpu_svm *svm);
@@ -534,7 +527,6 @@ void avic_post_state_restore(struct kvm_vcpu *vcpu);
 void svm_set_virtual_apic_mode(struct kvm_vcpu *vcpu);
 void svm_refresh_apicv_exec_ctrl(struct kvm_vcpu *vcpu);
 bool svm_check_apicv_inhibit_reasons(ulong bit);
-void svm_pre_update_apicv_exec_ctrl(struct kvm *kvm, bool activate);
 void svm_load_eoi_exitmap(struct kvm_vcpu *vcpu, u64 *eoi_exit_bitmap);
 void svm_hwapic_irr_update(struct kvm_vcpu *vcpu, int max_irr);
 void svm_hwapic_isr_update(struct kvm_vcpu *vcpu, int max_isr);
diff --git a/arch/x86/kvm/svm/svm_ops.h b/arch/x86/kvm/svm/svm_ops.h
index 8170f2a5a16f..22e2b019de37 100644
--- a/arch/x86/kvm/svm/svm_ops.h
+++ b/arch/x86/kvm/svm/svm_ops.h
@@ -4,7 +4,7 @@
 
 #include <linux/compiler_types.h>
 
-#include <asm/kvm_host.h>
+#include "x86.h"
 
 #define svm_asm(insn, clobber...)				\
 do {								\
diff --git a/arch/x86/kvm/vmx/evmcs.c b/arch/x86/kvm/vmx/evmcs.c
index 896b2a50b4aa..0dab1b7b529f 100644
--- a/arch/x86/kvm/vmx/evmcs.c
+++ b/arch/x86/kvm/vmx/evmcs.c
@@ -14,7 +14,6 @@ DEFINE_STATIC_KEY_FALSE(enable_evmcs);
 
 #if IS_ENABLED(CONFIG_HYPERV)
 
-#define ROL16(val, n) ((u16)(((u16)(val) << (n)) | ((u16)(val) >> (16 - (n)))))
 #define EVMCS1_OFFSET(x) offsetof(struct hv_enlightened_vmcs, x)
 #define EVMCS1_FIELD(number, name, clean_field)[ROL16(number, 6)] = \
 		{EVMCS1_OFFSET(name), clean_field}
diff --git a/arch/x86/kvm/vmx/evmcs.h b/arch/x86/kvm/vmx/evmcs.h
index 2ec9b46f0d0c..152ab0aa82cf 100644
--- a/arch/x86/kvm/vmx/evmcs.h
+++ b/arch/x86/kvm/vmx/evmcs.h
@@ -73,8 +73,6 @@ struct evmcs_field {
 extern const struct evmcs_field vmcs_field_to_evmcs_1[];
 extern const unsigned int nr_evmcs_1_fields;
 
-#define ROL16(val, n) ((u16)(((u16)(val) << (n)) | ((u16)(val) >> (16 - (n)))))
-
 static __always_inline int get_evmcs_offset(unsigned long field,
 					    u16 *clean_field)
 {
@@ -95,8 +93,6 @@ static __always_inline int get_evmcs_offset(unsigned long field,
 	return evmcs_field->offset;
 }
 
-#undef ROL16
-
 static inline void evmcs_write64(unsigned long field, u64 value)
 {
 	u16 clean_field;
diff --git a/arch/x86/kvm/vmx/nested.c b/arch/x86/kvm/vmx/nested.c
index b3f77d18eb5a..ccb03d69546c 100644
--- a/arch/x86/kvm/vmx/nested.c
+++ b/arch/x86/kvm/vmx/nested.c
@@ -2207,7 +2207,8 @@ static void prepare_vmcs02_early_rare(struct vcpu_vmx *vmx,
 	}
 }
 
-static void prepare_vmcs02_early(struct vcpu_vmx *vmx, struct vmcs12 *vmcs12)
+static void prepare_vmcs02_early(struct vcpu_vmx *vmx, struct loaded_vmcs *vmcs01,
+				 struct vmcs12 *vmcs12)
 {
 	u32 exec_control;
 	u64 guest_efer = nested_vmx_calc_efer(vmx, vmcs12);
@@ -2218,23 +2219,22 @@ static void prepare_vmcs02_early(struct vcpu_vmx *vmx, struct vmcs12 *vmcs12)
 	/*
 	 * PIN CONTROLS
 	 */
-	exec_control = vmx_pin_based_exec_ctrl(vmx);
+	exec_control = __pin_controls_get(vmcs01);
 	exec_control |= (vmcs12->pin_based_vm_exec_control &
 			 ~PIN_BASED_VMX_PREEMPTION_TIMER);
 
 	/* Posted interrupts setting is only taken from vmcs12.  */
-	if (nested_cpu_has_posted_intr(vmcs12)) {
+	vmx->nested.pi_pending = false;
+	if (nested_cpu_has_posted_intr(vmcs12))
 		vmx->nested.posted_intr_nv = vmcs12->posted_intr_nv;
-		vmx->nested.pi_pending = false;
-	} else {
+	else
 		exec_control &= ~PIN_BASED_POSTED_INTR;
-	}
 	pin_controls_set(vmx, exec_control);
 
 	/*
 	 * EXEC CONTROLS
 	 */
-	exec_control = vmx_exec_control(vmx); /* L0's desires */
+	exec_control = __exec_controls_get(vmcs01); /* L0's desires */
 	exec_control &= ~CPU_BASED_INTR_WINDOW_EXITING;
 	exec_control &= ~CPU_BASED_NMI_WINDOW_EXITING;
 	exec_control &= ~CPU_BASED_TPR_SHADOW;
@@ -2271,10 +2271,11 @@ static void prepare_vmcs02_early(struct vcpu_vmx *vmx, struct vmcs12 *vmcs12)
 	 * SECONDARY EXEC CONTROLS
 	 */
 	if (cpu_has_secondary_exec_ctrls()) {
-		exec_control = vmx->secondary_exec_control;
+		exec_control = __secondary_exec_controls_get(vmcs01);
 
 		/* Take the following fields only from vmcs12 */
 		exec_control &= ~(SECONDARY_EXEC_VIRTUALIZE_APIC_ACCESSES |
+				  SECONDARY_EXEC_VIRTUALIZE_X2APIC_MODE |
 				  SECONDARY_EXEC_ENABLE_INVPCID |
 				  SECONDARY_EXEC_ENABLE_RDTSCP |
 				  SECONDARY_EXEC_XSAVES |
@@ -2282,7 +2283,9 @@ static void prepare_vmcs02_early(struct vcpu_vmx *vmx, struct vmcs12 *vmcs12)
 				  SECONDARY_EXEC_VIRTUAL_INTR_DELIVERY |
 				  SECONDARY_EXEC_APIC_REGISTER_VIRT |
 				  SECONDARY_EXEC_ENABLE_VMFUNC |
-				  SECONDARY_EXEC_TSC_SCALING);
+				  SECONDARY_EXEC_TSC_SCALING |
+				  SECONDARY_EXEC_DESC);
+
 		if (nested_cpu_has(vmcs12,
 				   CPU_BASED_ACTIVATE_SECONDARY_CONTROLS))
 			exec_control |= vmcs12->secondary_vm_exec_control;
@@ -2322,8 +2325,9 @@ static void prepare_vmcs02_early(struct vcpu_vmx *vmx, struct vmcs12 *vmcs12)
 	 * on the related bits (if supported by the CPU) in the hope that
 	 * we can avoid VMWrites during vmx_set_efer().
 	 */
-	exec_control = (vmcs12->vm_entry_controls | vmx_vmentry_ctrl()) &
-			~VM_ENTRY_IA32E_MODE & ~VM_ENTRY_LOAD_IA32_EFER;
+	exec_control = __vm_entry_controls_get(vmcs01);
+	exec_control |= vmcs12->vm_entry_controls;
+	exec_control &= ~(VM_ENTRY_IA32E_MODE | VM_ENTRY_LOAD_IA32_EFER);
 	if (cpu_has_load_ia32_efer()) {
 		if (guest_efer & EFER_LMA)
 			exec_control |= VM_ENTRY_IA32E_MODE;
@@ -2339,9 +2343,11 @@ static void prepare_vmcs02_early(struct vcpu_vmx *vmx, struct vmcs12 *vmcs12)
 	 * we should use its exit controls. Note that VM_EXIT_LOAD_IA32_EFER
 	 * bits may be modified by vmx_set_efer() in prepare_vmcs02().
 	 */
-	exec_control = vmx_vmexit_ctrl();
+	exec_control = __vm_exit_controls_get(vmcs01);
 	if (cpu_has_load_ia32_efer() && guest_efer != host_efer)
 		exec_control |= VM_EXIT_LOAD_IA32_EFER;
+	else
+		exec_control &= ~VM_EXIT_LOAD_IA32_EFER;
 	vm_exit_controls_set(vmx, exec_control);
 
 	/*
@@ -3384,7 +3390,7 @@ enum nvmx_vmentry_status nested_vmx_enter_non_root_mode(struct kvm_vcpu *vcpu,
 
 	vmx_switch_vmcs(vcpu, &vmx->nested.vmcs02);
 
-	prepare_vmcs02_early(vmx, vmcs12);
+	prepare_vmcs02_early(vmx, &vmx->vmcs01, vmcs12);
 
 	if (from_vmentry) {
 		if (unlikely(!nested_get_vmcs12_pages(vcpu))) {
@@ -4304,7 +4310,7 @@ static void load_vmcs12_host_state(struct kvm_vcpu *vcpu,
 		seg.l = 1;
 	else
 		seg.db = 1;
-	vmx_set_segment(vcpu, &seg, VCPU_SREG_CS);
+	__vmx_set_segment(vcpu, &seg, VCPU_SREG_CS);
 	seg = (struct kvm_segment) {
 		.base = 0,
 		.limit = 0xFFFFFFFF,
@@ -4315,17 +4321,17 @@ static void load_vmcs12_host_state(struct kvm_vcpu *vcpu,
 		.g = 1
 	};
 	seg.selector = vmcs12->host_ds_selector;
-	vmx_set_segment(vcpu, &seg, VCPU_SREG_DS);
+	__vmx_set_segment(vcpu, &seg, VCPU_SREG_DS);
 	seg.selector = vmcs12->host_es_selector;
-	vmx_set_segment(vcpu, &seg, VCPU_SREG_ES);
+	__vmx_set_segment(vcpu, &seg, VCPU_SREG_ES);
 	seg.selector = vmcs12->host_ss_selector;
-	vmx_set_segment(vcpu, &seg, VCPU_SREG_SS);
+	__vmx_set_segment(vcpu, &seg, VCPU_SREG_SS);
 	seg.selector = vmcs12->host_fs_selector;
 	seg.base = vmcs12->host_fs_base;
-	vmx_set_segment(vcpu, &seg, VCPU_SREG_FS);
+	__vmx_set_segment(vcpu, &seg, VCPU_SREG_FS);
 	seg.selector = vmcs12->host_gs_selector;
 	seg.base = vmcs12->host_gs_base;
-	vmx_set_segment(vcpu, &seg, VCPU_SREG_GS);
+	__vmx_set_segment(vcpu, &seg, VCPU_SREG_GS);
 	seg = (struct kvm_segment) {
 		.base = vmcs12->host_tr_base,
 		.limit = 0x67,
@@ -4333,14 +4339,15 @@ static void load_vmcs12_host_state(struct kvm_vcpu *vcpu,
 		.type = 11,
 		.present = 1
 	};
-	vmx_set_segment(vcpu, &seg, VCPU_SREG_TR);
+	__vmx_set_segment(vcpu, &seg, VCPU_SREG_TR);
+
+	memset(&seg, 0, sizeof(seg));
+	seg.unusable = 1;
+	__vmx_set_segment(vcpu, &seg, VCPU_SREG_LDTR);
 
 	kvm_set_dr(vcpu, 7, 0x400);
 	vmcs_write64(GUEST_IA32_DEBUGCTL, 0);
 
-	if (cpu_has_vmx_msr_bitmap())
-		vmx_update_msr_bitmap(vcpu);
-
 	if (nested_vmx_load_msr(vcpu, vmcs12->vm_exit_msr_load_addr,
 				vmcs12->vm_exit_msr_load_count))
 		nested_vmx_abort(vcpu, VMX_ABORT_LOAD_HOST_MSR_FAIL);
@@ -4419,9 +4426,6 @@ static void nested_vmx_restore_host_state(struct kvm_vcpu *vcpu)
 
 	kvm_mmu_reset_context(vcpu);
 
-	if (cpu_has_vmx_msr_bitmap())
-		vmx_update_msr_bitmap(vcpu);
-
 	/*
 	 * This nasty bit of open coding is a compromise between blindly
 	 * loading L1's MSRs using the exit load lists (incorrect emulation
diff --git a/arch/x86/kvm/vmx/pmu_intel.c b/arch/x86/kvm/vmx/pmu_intel.c
index 9efc1a6b8693..10cc4f65c4ef 100644
--- a/arch/x86/kvm/vmx/pmu_intel.c
+++ b/arch/x86/kvm/vmx/pmu_intel.c
@@ -437,13 +437,13 @@ static int intel_pmu_set_msr(struct kvm_vcpu *vcpu, struct msr_data *msr_info)
 			    !(msr & MSR_PMC_FULL_WIDTH_BIT))
 				data = (s64)(s32)data;
 			pmc->counter += data - pmc_read_counter(pmc);
-			if (pmc->perf_event)
+			if (pmc->perf_event && !pmc->is_paused)
 				perf_event_period(pmc->perf_event,
 						  get_sample_period(pmc, data));
 			return 0;
 		} else if ((pmc = get_fixed_pmc(pmu, msr))) {
 			pmc->counter += data - pmc_read_counter(pmc);
-			if (pmc->perf_event)
+			if (pmc->perf_event && !pmc->is_paused)
 				perf_event_period(pmc->perf_event,
 						  get_sample_period(pmc, data));
 			return 0;
diff --git a/arch/x86/kvm/vmx/vmcs.h b/arch/x86/kvm/vmx/vmcs.h
index 4b9957e2bf5b..6e5de2e2b0da 100644
--- a/arch/x86/kvm/vmx/vmcs.h
+++ b/arch/x86/kvm/vmx/vmcs.h
@@ -11,6 +11,8 @@
 
 #include "capabilities.h"
 
+#define ROL16(val, n) ((u16)(((u16)(val) << (n)) | ((u16)(val) >> (16 - (n)))))
+
 struct vmcs_hdr {
 	u32 revision_id:31;
 	u32 shadow_vmcs:1;
diff --git a/arch/x86/kvm/vmx/vmcs12.c b/arch/x86/kvm/vmx/vmcs12.c
index d9f5d7c56ae3..cab6ba7a5005 100644
--- a/arch/x86/kvm/vmx/vmcs12.c
+++ b/arch/x86/kvm/vmx/vmcs12.c
@@ -2,7 +2,6 @@
 
 #include "vmcs12.h"
 
-#define ROL16(val, n) ((u16)(((u16)(val) << (n)) | ((u16)(val) >> (16 - (n)))))
 #define VMCS12_OFFSET(x) offsetof(struct vmcs12, x)
 #define FIELD(number, name)	[ROL16(number, 6)] = VMCS12_OFFSET(name)
 #define FIELD64(number, name)						\
diff --git a/arch/x86/kvm/vmx/vmcs12.h b/arch/x86/kvm/vmx/vmcs12.h
index 5e0e1b39f495..2a45f026ee11 100644
--- a/arch/x86/kvm/vmx/vmcs12.h
+++ b/arch/x86/kvm/vmx/vmcs12.h
@@ -364,8 +364,6 @@ static inline void vmx_check_vmcs12_offsets(void)
 extern const unsigned short vmcs_field_to_offset_table[];
 extern const unsigned int nr_vmcs12_fields;
 
-#define ROL16(val, n) ((u16)(((u16)(val) << (n)) | ((u16)(val) >> (16 - (n)))))
-
 static inline short vmcs_field_to_offset(unsigned long field)
 {
 	unsigned short offset;
@@ -385,8 +383,6 @@ static inline short vmcs_field_to_offset(unsigned long field)
 	return offset;
 }
 
-#undef ROL16
-
 static inline u64 vmcs12_read_any(struct vmcs12 *vmcs12, unsigned long field,
 				  u16 offset)
 {
diff --git a/arch/x86/kvm/vmx/vmx.c b/arch/x86/kvm/vmx/vmx.c
index 927a552393b9..0c2c0d5ae873 100644
--- a/arch/x86/kvm/vmx/vmx.c
+++ b/arch/x86/kvm/vmx/vmx.c
@@ -136,8 +136,7 @@ module_param(allow_smaller_maxphyaddr, bool, S_IRUGO);
 #define KVM_VM_CR0_ALWAYS_OFF (X86_CR0_NW | X86_CR0_CD)
 #define KVM_VM_CR0_ALWAYS_ON_UNRESTRICTED_GUEST X86_CR0_NE
 #define KVM_VM_CR0_ALWAYS_ON				\
-	(KVM_VM_CR0_ALWAYS_ON_UNRESTRICTED_GUEST | 	\
-	 X86_CR0_WP | X86_CR0_PG | X86_CR0_PE)
+	(KVM_VM_CR0_ALWAYS_ON_UNRESTRICTED_GUEST | X86_CR0_PG | X86_CR0_PE)
 
 #define KVM_VM_CR4_ALWAYS_ON_UNRESTRICTED_GUEST X86_CR4_VMXE
 #define KVM_PMODE_VM_CR4_ALWAYS_ON (X86_CR4_PAE | X86_CR4_VMXE)
@@ -1648,11 +1647,12 @@ static void vmx_setup_uret_msr(struct vcpu_vmx *vmx, unsigned int msr,
 }
 
 /*
- * Set up the vmcs to automatically save and restore system
- * msrs.  Don't touch the 64-bit msrs if the guest is in legacy
- * mode, as fiddling with msrs is very expensive.
+ * Configuring user return MSRs to automatically save, load, and restore MSRs
+ * that need to be shoved into hardware when running the guest.  Note, omitting
+ * an MSR here does _NOT_ mean it's not emulated, only that it will not be
+ * loaded into hardware when running the guest.
  */
-static void setup_msrs(struct vcpu_vmx *vmx)
+static void vmx_setup_uret_msrs(struct vcpu_vmx *vmx)
 {
 #ifdef CONFIG_X86_64
 	bool load_syscall_msrs;
@@ -1682,9 +1682,6 @@ static void setup_msrs(struct vcpu_vmx *vmx)
 	 */
 	vmx_setup_uret_msr(vmx, MSR_IA32_TSX_CTRL, boot_cpu_has(X86_FEATURE_RTM));
 
-	if (cpu_has_vmx_msr_bitmap())
-		vmx_update_msr_bitmap(&vmx->vcpu);
-
 	/*
 	 * The set of MSRs to load may have changed, reload MSRs before the
 	 * next VM-Enter.
@@ -2263,8 +2260,11 @@ static void vmx_cache_reg(struct kvm_vcpu *vcpu, enum kvm_reg reg)
 		vcpu->arch.cr0 |= vmcs_readl(GUEST_CR0) & guest_owned_bits;
 		break;
 	case VCPU_EXREG_CR3:
-		if (is_unrestricted_guest(vcpu) ||
-		    (enable_ept && is_paging(vcpu)))
+		/*
+		 * When intercepting CR3 loads, e.g. for shadowing paging, KVM's
+		 * CR3 is loaded into hardware, not the guest's CR3.
+		 */
+		if (!(exec_controls_get(to_vmx(vcpu)) & CPU_BASED_CR3_LOAD_EXITING))
 			vcpu->arch.cr3 = vmcs_readl(GUEST_CR3);
 		break;
 	case VCPU_EXREG_CR4:
@@ -2274,7 +2274,7 @@ static void vmx_cache_reg(struct kvm_vcpu *vcpu, enum kvm_reg reg)
 		vcpu->arch.cr4 |= vmcs_readl(GUEST_CR4) & guest_owned_bits;
 		break;
 	default:
-		WARN_ON_ONCE(1);
+		KVM_BUG_ON(1, vcpu->kvm);
 		break;
 	}
 }
@@ -2733,7 +2733,7 @@ static void fix_pmode_seg(struct kvm_vcpu *vcpu, int seg,
 		save->dpl = save->selector & SEGMENT_RPL_MASK;
 		save->s = 1;
 	}
-	vmx_set_segment(vcpu, save, seg);
+	__vmx_set_segment(vcpu, save, seg);
 }
 
 static void enter_pmode(struct kvm_vcpu *vcpu)
@@ -2754,7 +2754,7 @@ static void enter_pmode(struct kvm_vcpu *vcpu)
 
 	vmx->rmode.vm86_active = 0;
 
-	vmx_set_segment(vcpu, &vmx->rmode.segs[VCPU_SREG_TR], VCPU_SREG_TR);
+	__vmx_set_segment(vcpu, &vmx->rmode.segs[VCPU_SREG_TR], VCPU_SREG_TR);
 
 	flags = vmcs_readl(GUEST_RFLAGS);
 	flags &= RMODE_GUEST_OWNED_EFLAGS_BITS;
@@ -2852,8 +2852,6 @@ static void enter_rmode(struct kvm_vcpu *vcpu)
 	fix_rmode_seg(VCPU_SREG_DS, &vmx->rmode.segs[VCPU_SREG_DS]);
 	fix_rmode_seg(VCPU_SREG_GS, &vmx->rmode.segs[VCPU_SREG_GS]);
 	fix_rmode_seg(VCPU_SREG_FS, &vmx->rmode.segs[VCPU_SREG_FS]);
-
-	kvm_mmu_reset_context(vcpu);
 }
 
 int vmx_set_efer(struct kvm_vcpu *vcpu, u64 efer)
@@ -2874,7 +2872,7 @@ int vmx_set_efer(struct kvm_vcpu *vcpu, u64 efer)
 
 		msr->data = efer & ~EFER_LME;
 	}
-	setup_msrs(vmx);
+	vmx_setup_uret_msrs(vmx);
 	return 0;
 }
 
@@ -2997,42 +2995,24 @@ void ept_save_pdptrs(struct kvm_vcpu *vcpu)
 	kvm_register_mark_dirty(vcpu, VCPU_EXREG_PDPTR);
 }
 
-static void ept_update_paging_mode_cr0(unsigned long *hw_cr0,
-					unsigned long cr0,
-					struct kvm_vcpu *vcpu)
-{
-	struct vcpu_vmx *vmx = to_vmx(vcpu);
-
-	if (!kvm_register_is_available(vcpu, VCPU_EXREG_CR3))
-		vmx_cache_reg(vcpu, VCPU_EXREG_CR3);
-	if (!(cr0 & X86_CR0_PG)) {
-		/* From paging/starting to nonpaging */
-		exec_controls_setbit(vmx, CPU_BASED_CR3_LOAD_EXITING |
-					  CPU_BASED_CR3_STORE_EXITING);
-		vcpu->arch.cr0 = cr0;
-		vmx_set_cr4(vcpu, kvm_read_cr4(vcpu));
-	} else if (!is_paging(vcpu)) {
-		/* From nonpaging to paging */
-		exec_controls_clearbit(vmx, CPU_BASED_CR3_LOAD_EXITING |
-					    CPU_BASED_CR3_STORE_EXITING);
-		vcpu->arch.cr0 = cr0;
-		vmx_set_cr4(vcpu, kvm_read_cr4(vcpu));
-	}
-
-	if (!(cr0 & X86_CR0_WP))
-		*hw_cr0 &= ~X86_CR0_WP;
-}
+#define CR3_EXITING_BITS (CPU_BASED_CR3_LOAD_EXITING | \
+			  CPU_BASED_CR3_STORE_EXITING)
 
 void vmx_set_cr0(struct kvm_vcpu *vcpu, unsigned long cr0)
 {
 	struct vcpu_vmx *vmx = to_vmx(vcpu);
-	unsigned long hw_cr0;
+	unsigned long hw_cr0, old_cr0_pg;
+	u32 tmp;
+
+	old_cr0_pg = kvm_read_cr0_bits(vcpu, X86_CR0_PG);
 
 	hw_cr0 = (cr0 & ~KVM_VM_CR0_ALWAYS_OFF);
 	if (is_unrestricted_guest(vcpu))
 		hw_cr0 |= KVM_VM_CR0_ALWAYS_ON_UNRESTRICTED_GUEST;
 	else {
 		hw_cr0 |= KVM_VM_CR0_ALWAYS_ON;
+		if (!enable_ept)
+			hw_cr0 |= X86_CR0_WP;
 
 		if (vmx->rmode.vm86_active && (cr0 & X86_CR0_PE))
 			enter_pmode(vcpu);
@@ -3041,22 +3021,60 @@ void vmx_set_cr0(struct kvm_vcpu *vcpu, unsigned long cr0)
 			enter_rmode(vcpu);
 	}
 
+	vmcs_writel(CR0_READ_SHADOW, cr0);
+	vmcs_writel(GUEST_CR0, hw_cr0);
+	vcpu->arch.cr0 = cr0;
+	kvm_register_mark_available(vcpu, VCPU_EXREG_CR0);
+
 #ifdef CONFIG_X86_64
 	if (vcpu->arch.efer & EFER_LME) {
-		if (!is_paging(vcpu) && (cr0 & X86_CR0_PG))
+		if (!old_cr0_pg && (cr0 & X86_CR0_PG))
 			enter_lmode(vcpu);
-		if (is_paging(vcpu) && !(cr0 & X86_CR0_PG))
+		else if (old_cr0_pg && !(cr0 & X86_CR0_PG))
 			exit_lmode(vcpu);
 	}
 #endif
 
-	if (enable_ept && !is_unrestricted_guest(vcpu))
-		ept_update_paging_mode_cr0(&hw_cr0, cr0, vcpu);
+	if (enable_ept && !is_unrestricted_guest(vcpu)) {
+		/*
+		 * Ensure KVM has an up-to-date snapshot of the guest's CR3.  If
+		 * the below code _enables_ CR3 exiting, vmx_cache_reg() will
+		 * (correctly) stop reading vmcs.GUEST_CR3 because it thinks
+		 * KVM's CR3 is installed.
+		 */
+		if (!kvm_register_is_available(vcpu, VCPU_EXREG_CR3))
+			vmx_cache_reg(vcpu, VCPU_EXREG_CR3);
 
-	vmcs_writel(CR0_READ_SHADOW, cr0);
-	vmcs_writel(GUEST_CR0, hw_cr0);
-	vcpu->arch.cr0 = cr0;
-	kvm_register_mark_available(vcpu, VCPU_EXREG_CR0);
+		/*
+		 * When running with EPT but not unrestricted guest, KVM must
+		 * intercept CR3 accesses when paging is _disabled_.  This is
+		 * necessary because restricted guests can't actually run with
+		 * paging disabled, and so KVM stuffs its own CR3 in order to
+		 * run the guest when identity mapped page tables.
+		 *
+		 * Do _NOT_ check the old CR0.PG, e.g. to optimize away the
+		 * update, it may be stale with respect to CR3 interception,
+		 * e.g. after nested VM-Enter.
+		 *
+		 * Lastly, honor L1's desires, i.e. intercept CR3 loads and/or
+		 * stores to forward them to L1, even if KVM does not need to
+		 * intercept them to preserve its identity mapped page tables.
+		 */
+		if (!(cr0 & X86_CR0_PG)) {
+			exec_controls_setbit(vmx, CR3_EXITING_BITS);
+		} else if (!is_guest_mode(vcpu)) {
+			exec_controls_clearbit(vmx, CR3_EXITING_BITS);
+		} else {
+			tmp = exec_controls_get(vmx);
+			tmp &= ~CR3_EXITING_BITS;
+			tmp |= get_vmcs12(vcpu)->cpu_based_vm_exec_control & CR3_EXITING_BITS;
+			exec_controls_set(vmx, tmp);
+		}
+
+		/* Note, vmx_set_cr4() consumes the new vcpu->arch.cr0. */
+		if ((old_cr0_pg ^ cr0) & X86_CR0_PG)
+			vmx_set_cr4(vcpu, kvm_read_cr4(vcpu));
+	}
 
 	/* depends on vcpu->arch.cr0 to be set to a new value */
 	vmx->emulation_required = emulation_required(vcpu);
@@ -3271,7 +3289,7 @@ static u32 vmx_segment_access_rights(struct kvm_segment *var)
 	return ar;
 }
 
-void vmx_set_segment(struct kvm_vcpu *vcpu, struct kvm_segment *var, int seg)
+void __vmx_set_segment(struct kvm_vcpu *vcpu, struct kvm_segment *var, int seg)
 {
 	struct vcpu_vmx *vmx = to_vmx(vcpu);
 	const struct kvm_vmx_segment_field *sf = &kvm_vmx_segment_fields[seg];
@@ -3284,7 +3302,7 @@ void vmx_set_segment(struct kvm_vcpu *vcpu, struct kvm_segment *var, int seg)
 			vmcs_write16(sf->selector, var->selector);
 		else if (var->s)
 			fix_rmode_seg(seg, &vmx->rmode.segs[seg]);
-		goto out;
+		return;
 	}
 
 	vmcs_writel(sf->base, var->base);
@@ -3306,9 +3324,13 @@ void vmx_set_segment(struct kvm_vcpu *vcpu, struct kvm_segment *var, int seg)
 		var->type |= 0x1; /* Accessed */
 
 	vmcs_write32(sf->ar_bytes, vmx_segment_access_rights(var));
+}
 
-out:
-	vmx->emulation_required = emulation_required(vcpu);
+static void vmx_set_segment(struct kvm_vcpu *vcpu, struct kvm_segment *var, int seg)
+{
+	__vmx_set_segment(vcpu, var, seg);
+
+	to_vmx(vcpu)->emulation_required = emulation_required(vcpu);
 }
 
 static void vmx_get_cs_db_l_bits(struct kvm_vcpu *vcpu, int *db, int *l)
@@ -3790,21 +3812,6 @@ void vmx_enable_intercept_for_msr(struct kvm_vcpu *vcpu, u32 msr, int type)
 		vmx_set_msr_bitmap_write(msr_bitmap, msr);
 }
 
-static u8 vmx_msr_bitmap_mode(struct kvm_vcpu *vcpu)
-{
-	u8 mode = 0;
-
-	if (cpu_has_secondary_exec_ctrls() &&
-	    (secondary_exec_controls_get(to_vmx(vcpu)) &
-	     SECONDARY_EXEC_VIRTUALIZE_X2APIC_MODE)) {
-		mode |= MSR_BITMAP_MODE_X2APIC;
-		if (enable_apicv && kvm_vcpu_apicv_active(vcpu))
-			mode |= MSR_BITMAP_MODE_X2APIC_APICV;
-	}
-
-	return mode;
-}
-
 static void vmx_reset_x2apic_msrs(struct kvm_vcpu *vcpu, u8 mode)
 {
 	unsigned long *msr_bitmap = to_vmx(vcpu)->vmcs01.msr_bitmap;
@@ -3822,11 +3829,29 @@ static void vmx_reset_x2apic_msrs(struct kvm_vcpu *vcpu, u8 mode)
 	}
 }
 
-static void vmx_update_msr_bitmap_x2apic(struct kvm_vcpu *vcpu, u8 mode)
+static void vmx_update_msr_bitmap_x2apic(struct kvm_vcpu *vcpu)
 {
+	struct vcpu_vmx *vmx = to_vmx(vcpu);
+	u8 mode;
+
 	if (!cpu_has_vmx_msr_bitmap())
 		return;
 
+	if (cpu_has_secondary_exec_ctrls() &&
+	    (secondary_exec_controls_get(vmx) &
+	     SECONDARY_EXEC_VIRTUALIZE_X2APIC_MODE)) {
+		mode = MSR_BITMAP_MODE_X2APIC;
+		if (enable_apicv && kvm_vcpu_apicv_active(vcpu))
+			mode |= MSR_BITMAP_MODE_X2APIC_APICV;
+	} else {
+		mode = 0;
+	}
+
+	if (mode == vmx->x2apic_msr_bitmap_mode)
+		return;
+
+	vmx->x2apic_msr_bitmap_mode = mode;
+
 	vmx_reset_x2apic_msrs(vcpu, mode);
 
 	/*
@@ -3843,21 +3868,6 @@ static void vmx_update_msr_bitmap_x2apic(struct kvm_vcpu *vcpu, u8 mode)
 	}
 }
 
-void vmx_update_msr_bitmap(struct kvm_vcpu *vcpu)
-{
-	struct vcpu_vmx *vmx = to_vmx(vcpu);
-	u8 mode = vmx_msr_bitmap_mode(vcpu);
-	u8 changed = mode ^ vmx->msr_bitmap_mode;
-
-	if (!changed)
-		return;
-
-	if (changed & (MSR_BITMAP_MODE_X2APIC | MSR_BITMAP_MODE_X2APIC_APICV))
-		vmx_update_msr_bitmap_x2apic(vcpu, mode);
-
-	vmx->msr_bitmap_mode = mode;
-}
-
 void pt_update_intercept_for_msr(struct kvm_vcpu *vcpu)
 {
 	struct vcpu_vmx *vmx = to_vmx(vcpu);
@@ -3914,7 +3924,6 @@ static void vmx_msr_filter_changed(struct kvm_vcpu *vcpu)
 	}
 
 	pt_update_intercept_for_msr(vcpu);
-	vmx_update_msr_bitmap_x2apic(vcpu, vmx_msr_bitmap_mode(vcpu));
 }
 
 static inline bool kvm_vcpu_trigger_posted_interrupt(struct kvm_vcpu *vcpu,
@@ -4086,7 +4095,7 @@ void set_cr4_guest_host_mask(struct vcpu_vmx *vmx)
 	vmcs_writel(CR4_GUEST_HOST_MASK, ~vcpu->arch.cr4_guest_owned_bits);
 }
 
-u32 vmx_pin_based_exec_ctrl(struct vcpu_vmx *vmx)
+static u32 vmx_pin_based_exec_ctrl(struct vcpu_vmx *vmx)
 {
 	u32 pin_based_exec_ctrl = vmcs_config.pin_based_exec_ctrl;
 
@@ -4102,6 +4111,30 @@ u32 vmx_pin_based_exec_ctrl(struct vcpu_vmx *vmx)
 	return pin_based_exec_ctrl;
 }
 
+static u32 vmx_vmentry_ctrl(void)
+{
+	u32 vmentry_ctrl = vmcs_config.vmentry_ctrl;
+
+	if (vmx_pt_mode_is_system())
+		vmentry_ctrl &= ~(VM_ENTRY_PT_CONCEAL_PIP |
+				  VM_ENTRY_LOAD_IA32_RTIT_CTL);
+	/* Loading of EFER and PERF_GLOBAL_CTRL are toggled dynamically */
+	return vmentry_ctrl &
+		~(VM_ENTRY_LOAD_IA32_PERF_GLOBAL_CTRL | VM_ENTRY_LOAD_IA32_EFER);
+}
+
+static u32 vmx_vmexit_ctrl(void)
+{
+	u32 vmexit_ctrl = vmcs_config.vmexit_ctrl;
+
+	if (vmx_pt_mode_is_system())
+		vmexit_ctrl &= ~(VM_EXIT_PT_CONCEAL_PIP |
+				 VM_EXIT_CLEAR_IA32_RTIT_CTL);
+	/* Loading of EFER and PERF_GLOBAL_CTRL are toggled dynamically */
+	return vmexit_ctrl &
+		~(VM_EXIT_LOAD_IA32_PERF_GLOBAL_CTRL | VM_EXIT_LOAD_IA32_EFER);
+}
+
 static void vmx_refresh_apicv_exec_ctrl(struct kvm_vcpu *vcpu)
 {
 	struct vcpu_vmx *vmx = to_vmx(vcpu);
@@ -4118,11 +4151,10 @@ static void vmx_refresh_apicv_exec_ctrl(struct kvm_vcpu *vcpu)
 					SECONDARY_EXEC_VIRTUAL_INTR_DELIVERY);
 	}
 
-	if (cpu_has_vmx_msr_bitmap())
-		vmx_update_msr_bitmap(vcpu);
+	vmx_update_msr_bitmap_x2apic(vcpu);
 }
 
-u32 vmx_exec_control(struct vcpu_vmx *vmx)
+static u32 vmx_exec_control(struct vcpu_vmx *vmx)
 {
 	u32 exec_control = vmcs_config.cpu_based_exec_ctrl;
 
@@ -4204,7 +4236,7 @@ vmx_adjust_secondary_exec_control(struct vcpu_vmx *vmx, u32 *exec_control,
 #define vmx_adjust_sec_exec_exiting(vmx, exec_control, lname, uname) \
 	vmx_adjust_sec_exec_control(vmx, exec_control, lname, uname, uname##_EXITING, true)
 
-static void vmx_compute_secondary_exec_control(struct vcpu_vmx *vmx)
+static u32 vmx_secondary_exec_control(struct vcpu_vmx *vmx)
 {
 	struct kvm_vcpu *vcpu = &vmx->vcpu;
 
@@ -4290,7 +4322,7 @@ static void vmx_compute_secondary_exec_control(struct vcpu_vmx *vmx)
 	if (!vcpu->kvm->arch.bus_lock_detection_enabled)
 		exec_control &= ~SECONDARY_EXEC_BUS_LOCK_DETECTION;
 
-	vmx->secondary_exec_control = exec_control;
+	return exec_control;
 }
 
 #define VMX_XSS_EXIT_BITMAP 0
@@ -4314,10 +4346,8 @@ static void init_vmcs(struct vcpu_vmx *vmx)
 
 	exec_controls_set(vmx, vmx_exec_control(vmx));
 
-	if (cpu_has_secondary_exec_ctrls()) {
-		vmx_compute_secondary_exec_control(vmx);
-		secondary_exec_controls_set(vmx, vmx->secondary_exec_control);
-	}
+	if (cpu_has_secondary_exec_ctrls())
+		secondary_exec_controls_set(vmx, vmx_secondary_exec_control(vmx));
 
 	if (kvm_vcpu_apicv_active(&vmx->vcpu)) {
 		vmcs_write64(EOI_EXIT_BITMAP0, 0);
@@ -4388,32 +4418,35 @@ static void init_vmcs(struct vcpu_vmx *vmx)
 		vmx->pt_desc.guest.output_mask = 0x7F;
 		vmcs_write64(GUEST_IA32_RTIT_CTL, 0);
 	}
+
+	vmcs_write32(GUEST_SYSENTER_CS, 0);
+	vmcs_writel(GUEST_SYSENTER_ESP, 0);
+	vmcs_writel(GUEST_SYSENTER_EIP, 0);
+	vmcs_write64(GUEST_IA32_DEBUGCTL, 0);
+
+	if (cpu_has_vmx_tpr_shadow()) {
+		vmcs_write64(VIRTUAL_APIC_PAGE_ADDR, 0);
+		if (cpu_need_tpr_shadow(&vmx->vcpu))
+			vmcs_write64(VIRTUAL_APIC_PAGE_ADDR,
+				     __pa(vmx->vcpu.arch.apic->regs));
+		vmcs_write32(TPR_THRESHOLD, 0);
+	}
+
+	vmx_setup_uret_msrs(vmx);
 }
 
 static void vmx_vcpu_reset(struct kvm_vcpu *vcpu, bool init_event)
 {
 	struct vcpu_vmx *vmx = to_vmx(vcpu);
-	struct msr_data apic_base_msr;
-	u64 cr0;
 
 	vmx->rmode.vm86_active = 0;
 	vmx->spec_ctrl = 0;
 
 	vmx->msr_ia32_umwait_control = 0;
 
-	vmx->vcpu.arch.regs[VCPU_REGS_RDX] = get_rdx_init_val();
 	vmx->hv_deadline_tsc = -1;
 	kvm_set_cr8(vcpu, 0);
 
-	if (!init_event) {
-		apic_base_msr.data = APIC_DEFAULT_PHYS_BASE |
-				     MSR_IA32_APICBASE_ENABLE;
-		if (kvm_vcpu_is_reset_bsp(vcpu))
-			apic_base_msr.data |= MSR_IA32_APICBASE_BSP;
-		apic_base_msr.host_initiated = true;
-		kvm_set_apic_base(vcpu, &apic_base_msr);
-	}
-
 	vmx_segment_cache_clear(vmx);
 
 	seg_setup(VCPU_SREG_CS);
@@ -4436,16 +4469,6 @@ static void vmx_vcpu_reset(struct kvm_vcpu *vcpu, bool init_event)
 	vmcs_write32(GUEST_LDTR_LIMIT, 0xffff);
 	vmcs_write32(GUEST_LDTR_AR_BYTES, 0x00082);
 
-	if (!init_event) {
-		vmcs_write32(GUEST_SYSENTER_CS, 0);
-		vmcs_writel(GUEST_SYSENTER_ESP, 0);
-		vmcs_writel(GUEST_SYSENTER_EIP, 0);
-		vmcs_write64(GUEST_IA32_DEBUGCTL, 0);
-	}
-
-	kvm_set_rflags(vcpu, X86_EFLAGS_FIXED);
-	kvm_rip_write(vcpu, 0xfff0);
-
 	vmcs_writel(GUEST_GDTR_BASE, 0);
 	vmcs_write32(GUEST_GDTR_LIMIT, 0xffff);
 
@@ -4458,31 +4481,11 @@ static void vmx_vcpu_reset(struct kvm_vcpu *vcpu, bool init_event)
 	if (kvm_mpx_supported())
 		vmcs_write64(GUEST_BNDCFGS, 0);
 
-	setup_msrs(vmx);
-
 	vmcs_write32(VM_ENTRY_INTR_INFO_FIELD, 0);  /* 22.2.1 */
 
-	if (cpu_has_vmx_tpr_shadow() && !init_event) {
-		vmcs_write64(VIRTUAL_APIC_PAGE_ADDR, 0);
-		if (cpu_need_tpr_shadow(vcpu))
-			vmcs_write64(VIRTUAL_APIC_PAGE_ADDR,
-				     __pa(vcpu->arch.apic->regs));
-		vmcs_write32(TPR_THRESHOLD, 0);
-	}
-
 	kvm_make_request(KVM_REQ_APIC_PAGE_RELOAD, vcpu);
 
-	cr0 = X86_CR0_NW | X86_CR0_CD | X86_CR0_ET;
-	vmx->vcpu.arch.cr0 = cr0;
-	vmx_set_cr0(vcpu, cr0); /* enter rmode */
-	vmx_set_cr4(vcpu, 0);
-	vmx_set_efer(vcpu, 0);
-
-	vmx_update_exception_bitmap(vcpu);
-
 	vpid_sync_context(vmx->vpid);
-	if (init_event)
-		vmx_clear_hlt(vcpu);
 }
 
 static void vmx_enable_irq_window(struct kvm_vcpu *vcpu)
@@ -4996,6 +4999,7 @@ static int handle_cr(struct kvm_vcpu *vcpu)
 			return kvm_complete_insn_gp(vcpu, err);
 		case 3:
 			WARN_ON_ONCE(enable_unrestricted_guest);
+
 			err = kvm_set_cr3(vcpu, val);
 			return kvm_complete_insn_gp(vcpu, err);
 		case 4:
@@ -5021,14 +5025,13 @@ static int handle_cr(struct kvm_vcpu *vcpu)
 		}
 		break;
 	case 2: /* clts */
-		WARN_ONCE(1, "Guest should always own CR0.TS");
-		vmx_set_cr0(vcpu, kvm_read_cr0_bits(vcpu, ~X86_CR0_TS));
-		trace_kvm_cr_write(0, kvm_read_cr0(vcpu));
-		return kvm_skip_emulated_instruction(vcpu);
+		KVM_BUG(1, vcpu->kvm, "Guest always owns CR0.TS");
+		return -EIO;
 	case 1: /*mov from cr*/
 		switch (cr) {
 		case 3:
 			WARN_ON_ONCE(enable_unrestricted_guest);
+
 			val = kvm_read_cr3(vcpu);
 			kvm_register_write(vcpu, reg, val);
 			trace_kvm_cr_read(cr, val);
@@ -5129,6 +5132,12 @@ static void vmx_sync_dirty_debug_regs(struct kvm_vcpu *vcpu)
 
 	vcpu->arch.switch_db_regs &= ~KVM_DEBUGREG_WONT_EXIT;
 	exec_controls_setbit(to_vmx(vcpu), CPU_BASED_MOV_DR_EXITING);
+
+	/*
+	 * exc_debug expects dr6 to be cleared after it runs, avoid that it sees
+	 * a stale dr6 from the guest.
+	 */
+	set_debugreg(DR6_RESERVED, 6);
 }
 
 static void vmx_set_dr7(struct kvm_vcpu *vcpu, unsigned long val)
@@ -5338,7 +5347,9 @@ static int handle_ept_misconfig(struct kvm_vcpu *vcpu)
 
 static int handle_nmi_window(struct kvm_vcpu *vcpu)
 {
-	WARN_ON_ONCE(!enable_vnmi);
+	if (KVM_BUG_ON(!enable_vnmi, vcpu->kvm))
+		return -EIO;
+
 	exec_controls_clearbit(to_vmx(vcpu), CPU_BASED_NMI_WINDOW_EXITING);
 	++vcpu->stat.nmi_window_exits;
 	kvm_make_request(KVM_REQ_EVENT, vcpu);
@@ -5896,7 +5907,8 @@ static int __vmx_handle_exit(struct kvm_vcpu *vcpu, fastpath_t exit_fastpath)
 	 * below) should never happen as that means we incorrectly allowed a
 	 * nested VM-Enter with an invalid vmcs12.
 	 */
-	WARN_ON_ONCE(vmx->nested.nested_run_pending);
+	if (KVM_BUG_ON(vmx->nested.nested_run_pending, vcpu->kvm))
+		return -EIO;
 
 	/* If guest state is invalid, start emulating */
 	if (vmx->emulation_required)
@@ -6189,7 +6201,7 @@ void vmx_set_virtual_apic_mode(struct kvm_vcpu *vcpu)
 	}
 	secondary_exec_controls_set(vmx, sec_exec_control);
 
-	vmx_update_msr_bitmap(vcpu);
+	vmx_update_msr_bitmap_x2apic(vcpu);
 }
 
 static void vmx_set_apic_access_page_addr(struct kvm_vcpu *vcpu)
@@ -6274,7 +6286,9 @@ static int vmx_sync_pir_to_irr(struct kvm_vcpu *vcpu)
 	int max_irr;
 	bool max_irr_updated;
 
-	WARN_ON(!vcpu->arch.apicv_active);
+	if (KVM_BUG_ON(!vcpu->arch.apicv_active, vcpu->kvm))
+		return -EIO;
+
 	if (pi_test_on(&vmx->pi_desc)) {
 		pi_clear_on(&vmx->pi_desc);
 		/*
@@ -6357,7 +6371,7 @@ static void handle_external_interrupt_irqoff(struct kvm_vcpu *vcpu)
 	unsigned int vector = intr_info & INTR_INFO_VECTOR_MASK;
 	gate_desc *desc = (gate_desc *)host_idt_base + vector;
 
-	if (WARN_ONCE(!is_external_intr(intr_info),
+	if (KVM_BUG(!is_external_intr(intr_info), vcpu->kvm,
 	    "KVM: unexpected VM-Exit interrupt info: 0x%x", intr_info))
 		return;
 
@@ -6368,6 +6382,9 @@ static void vmx_handle_exit_irqoff(struct kvm_vcpu *vcpu)
 {
 	struct vcpu_vmx *vmx = to_vmx(vcpu);
 
+	if (vmx->emulation_required)
+		return;
+
 	if (vmx->exit_reason.basic == EXIT_REASON_EXTERNAL_INTERRUPT)
 		handle_external_interrupt_irqoff(vcpu);
 	else if (vmx->exit_reason.basic == EXIT_REASON_EXCEPTION_NMI)
@@ -6639,6 +6656,10 @@ static fastpath_t vmx_vcpu_run(struct kvm_vcpu *vcpu)
 		vmx->loaded_vmcs->host_state.cr4 = cr4;
 	}
 
+	/* When KVM_DEBUGREG_WONT_EXIT, dr6 is accessible in guest. */
+	if (unlikely(vcpu->arch.switch_db_regs & KVM_DEBUGREG_WONT_EXIT))
+		set_debugreg(vcpu->arch.dr6, 6);
+
 	/* When single-stepping over STI and MOV SS, we must clear the
 	 * corresponding interruptibility bits in the guest state. Otherwise
 	 * vmentry fails as it then expects bit 14 (BS) in pending debug
@@ -6838,7 +6859,6 @@ static int vmx_create_vcpu(struct kvm_vcpu *vcpu)
 		vmx_disable_intercept_for_msr(vcpu, MSR_CORE_C6_RESIDENCY, MSR_TYPE_R);
 		vmx_disable_intercept_for_msr(vcpu, MSR_CORE_C7_RESIDENCY, MSR_TYPE_R);
 	}
-	vmx->msr_bitmap_mode = 0;
 
 	vmx->loaded_vmcs = &vmx->vmcs01;
 	cpu = get_cpu();
@@ -6997,7 +7017,7 @@ exit:
 	return (cache << VMX_EPT_MT_EPTE_SHIFT) | ipat;
 }
 
-static void vmcs_set_secondary_exec_control(struct vcpu_vmx *vmx)
+static void vmcs_set_secondary_exec_control(struct vcpu_vmx *vmx, u32 new_ctl)
 {
 	/*
 	 * These bits in the secondary execution controls field
@@ -7011,7 +7031,6 @@ static void vmcs_set_secondary_exec_control(struct vcpu_vmx *vmx)
 		SECONDARY_EXEC_VIRTUALIZE_APIC_ACCESSES |
 		SECONDARY_EXEC_DESC;
 
-	u32 new_ctl = vmx->secondary_exec_control;
 	u32 cur_ctl = secondary_exec_controls_get(vmx);
 
 	secondary_exec_controls_set(vmx, (new_ctl & ~mask) | (cur_ctl & mask));
@@ -7154,10 +7173,11 @@ static void vmx_vcpu_after_set_cpuid(struct kvm_vcpu *vcpu)
 	/* xsaves_enabled is recomputed in vmx_compute_secondary_exec_control(). */
 	vcpu->arch.xsaves_enabled = false;
 
-	if (cpu_has_secondary_exec_ctrls()) {
-		vmx_compute_secondary_exec_control(vmx);
-		vmcs_set_secondary_exec_control(vmx);
-	}
+	vmx_setup_uret_msrs(vmx);
+
+	if (cpu_has_secondary_exec_ctrls())
+		vmcs_set_secondary_exec_control(vmx,
+						vmx_secondary_exec_control(vmx));
 
 	if (nested_vmx_allowed(vcpu))
 		to_vmx(vcpu)->msr_ia32_feature_control_valid_bits |=
@@ -7803,7 +7823,8 @@ static __init int hardware_setup(void)
 		ept_lpage_level = PG_LEVEL_2M;
 	else
 		ept_lpage_level = PG_LEVEL_4K;
-	kvm_configure_mmu(enable_ept, vmx_get_max_tdp_level(), ept_lpage_level);
+	kvm_configure_mmu(enable_ept, 0, vmx_get_max_tdp_level(),
+			  ept_lpage_level);
 
 	/*
 	 * Only enable PML when hardware supports PML feature, and both EPT
diff --git a/arch/x86/kvm/vmx/vmx.h b/arch/x86/kvm/vmx/vmx.h
index 17a1cb4b059d..4858c5fd95f2 100644
--- a/arch/x86/kvm/vmx/vmx.h
+++ b/arch/x86/kvm/vmx/vmx.h
@@ -227,7 +227,7 @@ struct nested_vmx {
 struct vcpu_vmx {
 	struct kvm_vcpu       vcpu;
 	u8                    fail;
-	u8		      msr_bitmap_mode;
+	u8		      x2apic_msr_bitmap_mode;
 
 	/*
 	 * If true, host state has been stored in vmx->loaded_vmcs for
@@ -263,8 +263,6 @@ struct vcpu_vmx {
 	u64		      spec_ctrl;
 	u32		      msr_ia32_umwait_control;
 
-	u32 secondary_exec_control;
-
 	/*
 	 * loaded_vmcs points to the VMCS currently used in this vcpu. For a
 	 * non-nested (L1) guest, it always points to vmcs01. For a nested
@@ -371,12 +369,11 @@ void vmx_set_cr4(struct kvm_vcpu *vcpu, unsigned long cr4);
 void set_cr4_guest_host_mask(struct vcpu_vmx *vmx);
 void ept_save_pdptrs(struct kvm_vcpu *vcpu);
 void vmx_get_segment(struct kvm_vcpu *vcpu, struct kvm_segment *var, int seg);
-void vmx_set_segment(struct kvm_vcpu *vcpu, struct kvm_segment *var, int seg);
+void __vmx_set_segment(struct kvm_vcpu *vcpu, struct kvm_segment *var, int seg);
 u64 construct_eptp(struct kvm_vcpu *vcpu, hpa_t root_hpa, int root_level);
 
 bool vmx_guest_inject_ac(struct kvm_vcpu *vcpu);
 void vmx_update_exception_bitmap(struct kvm_vcpu *vcpu);
-void vmx_update_msr_bitmap(struct kvm_vcpu *vcpu);
 bool vmx_nmi_blocked(struct kvm_vcpu *vcpu);
 bool vmx_interrupt_blocked(struct kvm_vcpu *vcpu);
 bool vmx_get_nmi_mask(struct kvm_vcpu *vcpu);
@@ -419,9 +416,13 @@ static inline void lname##_controls_set(struct vcpu_vmx *vmx, u32 val)	    \
 		vmx->loaded_vmcs->controls_shadow.lname = val;		    \
 	}								    \
 }									    \
+static inline u32 __##lname##_controls_get(struct loaded_vmcs *vmcs)	    \
+{									    \
+	return vmcs->controls_shadow.lname;				    \
+}									    \
 static inline u32 lname##_controls_get(struct vcpu_vmx *vmx)		    \
 {									    \
-	return vmx->loaded_vmcs->controls_shadow.lname;			    \
+	return __##lname##_controls_get(vmx->loaded_vmcs);		    \
 }									    \
 static inline void lname##_controls_setbit(struct vcpu_vmx *vmx, u32 val)   \
 {									    \
@@ -451,31 +452,6 @@ static inline void vmx_register_cache_reset(struct kvm_vcpu *vcpu)
 	vcpu->arch.regs_dirty = 0;
 }
 
-static inline u32 vmx_vmentry_ctrl(void)
-{
-	u32 vmentry_ctrl = vmcs_config.vmentry_ctrl;
-	if (vmx_pt_mode_is_system())
-		vmentry_ctrl &= ~(VM_ENTRY_PT_CONCEAL_PIP |
-				  VM_ENTRY_LOAD_IA32_RTIT_CTL);
-	/* Loading of EFER and PERF_GLOBAL_CTRL are toggled dynamically */
-	return vmentry_ctrl &
-		~(VM_ENTRY_LOAD_IA32_PERF_GLOBAL_CTRL | VM_ENTRY_LOAD_IA32_EFER);
-}
-
-static inline u32 vmx_vmexit_ctrl(void)
-{
-	u32 vmexit_ctrl = vmcs_config.vmexit_ctrl;
-	if (vmx_pt_mode_is_system())
-		vmexit_ctrl &= ~(VM_EXIT_PT_CONCEAL_PIP |
-				 VM_EXIT_CLEAR_IA32_RTIT_CTL);
-	/* Loading of EFER and PERF_GLOBAL_CTRL are toggled dynamically */
-	return vmexit_ctrl &
-		~(VM_EXIT_LOAD_IA32_PERF_GLOBAL_CTRL | VM_EXIT_LOAD_IA32_EFER);
-}
-
-u32 vmx_exec_control(struct vcpu_vmx *vmx);
-u32 vmx_pin_based_exec_ctrl(struct vcpu_vmx *vmx);
-
 static inline struct kvm_vmx *to_kvm_vmx(struct kvm *kvm)
 {
 	return container_of(kvm, struct kvm_vmx, kvm);
diff --git a/arch/x86/kvm/vmx/vmx_ops.h b/arch/x86/kvm/vmx/vmx_ops.h
index 164b64f65a8f..9e9ef47e988c 100644
--- a/arch/x86/kvm/vmx/vmx_ops.h
+++ b/arch/x86/kvm/vmx/vmx_ops.h
@@ -4,13 +4,11 @@
 
 #include <linux/nospec.h>
 
-#include <asm/kvm_host.h>
 #include <asm/vmx.h>
 
 #include "evmcs.h"
 #include "vmcs.h"
-
-#define __ex(x) __kvm_handle_fault_on_reboot(x)
+#include "x86.h"
 
 asmlinkage void vmread_error(unsigned long field, bool fault);
 __attribute__((regparm(0))) void vmread_error_trampoline(unsigned long field,
diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
index e5d5c5ed7dd4..28ef14155726 100644
--- a/arch/x86/kvm/x86.c
+++ b/arch/x86/kvm/x86.c
@@ -233,12 +233,13 @@ const struct _kvm_stats_desc kvm_vm_stats_desc[] = {
 	STATS_DESC_COUNTER(VM, mmu_recycled),
 	STATS_DESC_COUNTER(VM, mmu_cache_miss),
 	STATS_DESC_ICOUNTER(VM, mmu_unsync),
-	STATS_DESC_ICOUNTER(VM, lpages),
+	STATS_DESC_ICOUNTER(VM, pages_4k),
+	STATS_DESC_ICOUNTER(VM, pages_2m),
+	STATS_DESC_ICOUNTER(VM, pages_1g),
 	STATS_DESC_ICOUNTER(VM, nx_lpage_splits),
+	STATS_DESC_PCOUNTER(VM, max_mmu_rmap_size),
 	STATS_DESC_PCOUNTER(VM, max_mmu_page_hash_collisions)
 };
-static_assert(ARRAY_SIZE(kvm_vm_stats_desc) ==
-		sizeof(struct kvm_vm_stat) / sizeof(u64));
 
 const struct kvm_stats_header kvm_vm_stats_header = {
 	.name_size = KVM_STATS_NAME_SIZE,
@@ -278,8 +279,6 @@ const struct _kvm_stats_desc kvm_vcpu_stats_desc[] = {
 	STATS_DESC_COUNTER(VCPU, directed_yield_successful),
 	STATS_DESC_ICOUNTER(VCPU, guest_mode)
 };
-static_assert(ARRAY_SIZE(kvm_vcpu_stats_desc) ==
-		sizeof(struct kvm_vcpu_stat) / sizeof(u64));
 
 const struct kvm_stats_header kvm_vcpu_stats_header = {
 	.name_size = KVM_STATS_NAME_SIZE,
@@ -485,7 +484,14 @@ int kvm_set_apic_base(struct kvm_vcpu *vcpu, struct msr_data *msr_info)
 }
 EXPORT_SYMBOL_GPL(kvm_set_apic_base);
 
-asmlinkage __visible noinstr void kvm_spurious_fault(void)
+/*
+ * Handle a fault on a hardware virtualization (VMX or SVM) instruction.
+ *
+ * Hardware virtualization extension instructions may fault if a reboot turns
+ * off virtualization while processes are running.  Usually after catching the
+ * fault we just panic; during reboot instead the instruction is ignored.
+ */
+noinstr void kvm_spurious_fault(void)
 {
 	/* Fault while not rebooting.  We want the trace. */
 	BUG_ON(!kvm_rebooting);
@@ -1180,7 +1186,6 @@ static void kvm_update_dr0123(struct kvm_vcpu *vcpu)
 	if (!(vcpu->guest_debug & KVM_GUESTDBG_USE_HW_BP)) {
 		for (i = 0; i < KVM_NR_DB_REGS; i++)
 			vcpu->arch.eff_db[i] = vcpu->arch.db[i];
-		vcpu->arch.switch_db_regs |= KVM_DEBUGREG_RELOAD;
 	}
 }
 
@@ -3316,6 +3321,10 @@ int kvm_set_msr_common(struct kvm_vcpu *vcpu, struct msr_data *msr_info)
 			if (!msr_info->host_initiated) {
 				s64 adj = data - vcpu->arch.ia32_tsc_adjust_msr;
 				adjust_tsc_offset_guest(vcpu, adj);
+				/* Before back to guest, tsc_timestamp must be adjusted
+				 * as well, otherwise guest's percpu pvclock time could jump.
+				 */
+				kvm_make_request(KVM_REQ_CLOCK_UPDATE, vcpu);
 			}
 			vcpu->arch.ia32_tsc_adjust_msr = data;
 		}
@@ -4310,12 +4319,6 @@ void kvm_arch_vcpu_put(struct kvm_vcpu *vcpu)
 
 	static_call(kvm_x86_vcpu_put)(vcpu);
 	vcpu->arch.last_host_tsc = rdtsc();
-	/*
-	 * If userspace has set any breakpoints or watchpoints, dr6 is restored
-	 * on every vmexit, but if not, we might have a stale dr6 from the
-	 * guest. do_debug expects dr6 to be cleared after it runs, do the same.
-	 */
-	set_debugreg(0, 6);
 }
 
 static int kvm_vcpu_ioctl_get_lapic(struct kvm_vcpu *vcpu,
@@ -6567,9 +6570,9 @@ static int vcpu_mmio_gva_to_gpa(struct kvm_vcpu *vcpu, unsigned long gva,
 	 * there is no pkey in EPT page table for L1 guest or EPT
 	 * shadow page table for L2 guest.
 	 */
-	if (vcpu_match_mmio_gva(vcpu, gva)
-	    && !permission_fault(vcpu, vcpu->arch.walk_mmu,
-				 vcpu->arch.mmio_access, 0, access)) {
+	if (vcpu_match_mmio_gva(vcpu, gva) && (!is_paging(vcpu) ||
+	    !permission_fault(vcpu, vcpu->arch.walk_mmu,
+			      vcpu->arch.mmio_access, 0, access))) {
 		*gpa = vcpu->arch.mmio_gfn << PAGE_SHIFT |
 					(gva & (PAGE_SIZE - 1));
 		trace_vcpu_match_mmio(gva, *gpa, write, false);
@@ -8578,6 +8581,8 @@ EXPORT_SYMBOL_GPL(kvm_apicv_activated);
 
 static void kvm_apicv_init(struct kvm *kvm)
 {
+	mutex_init(&kvm->arch.apicv_update_lock);
+
 	if (enable_apicv)
 		clear_bit(APICV_INHIBIT_REASON_DISABLE,
 			  &kvm->arch.apicv_inhibit_reasons);
@@ -8891,6 +8896,10 @@ static int inject_pending_event(struct kvm_vcpu *vcpu, bool *req_immediate_exit)
 		can_inject = false;
 	}
 
+	/* Don't inject interrupts if the user asked to avoid doing so */
+	if (vcpu->guest_debug & KVM_GUESTDBG_BLOCKIRQ)
+		return 0;
+
 	/*
 	 * Finally, inject interrupt events.  If an event cannot be injected
 	 * due to architectural conditions (e.g. IF=0) a window-open exit
@@ -9236,10 +9245,18 @@ void kvm_make_scan_ioapic_request(struct kvm *kvm)
 
 void kvm_vcpu_update_apicv(struct kvm_vcpu *vcpu)
 {
+	bool activate;
+
 	if (!lapic_in_kernel(vcpu))
 		return;
 
-	vcpu->arch.apicv_active = kvm_apicv_activated(vcpu->kvm);
+	mutex_lock(&vcpu->kvm->arch.apicv_update_lock);
+
+	activate = kvm_apicv_activated(vcpu->kvm);
+	if (vcpu->arch.apicv_active == activate)
+		goto out;
+
+	vcpu->arch.apicv_active = activate;
 	kvm_apic_update_apicv(vcpu);
 	static_call(kvm_x86_refresh_apicv_exec_ctrl)(vcpu);
 
@@ -9251,54 +9268,45 @@ void kvm_vcpu_update_apicv(struct kvm_vcpu *vcpu)
 	 */
 	if (!vcpu->arch.apicv_active)
 		kvm_make_request(KVM_REQ_EVENT, vcpu);
+
+out:
+	mutex_unlock(&vcpu->kvm->arch.apicv_update_lock);
 }
 EXPORT_SYMBOL_GPL(kvm_vcpu_update_apicv);
 
-/*
- * NOTE: Do not hold any lock prior to calling this.
- *
- * In particular, kvm_request_apicv_update() expects kvm->srcu not to be
- * locked, because it calls __x86_set_memory_region() which does
- * synchronize_srcu(&kvm->srcu).
- */
-void kvm_request_apicv_update(struct kvm *kvm, bool activate, ulong bit)
+void __kvm_request_apicv_update(struct kvm *kvm, bool activate, ulong bit)
 {
-	struct kvm_vcpu *except;
-	unsigned long old, new, expected;
+	unsigned long old, new;
 
 	if (!kvm_x86_ops.check_apicv_inhibit_reasons ||
 	    !static_call(kvm_x86_check_apicv_inhibit_reasons)(bit))
 		return;
 
-	old = READ_ONCE(kvm->arch.apicv_inhibit_reasons);
-	do {
-		expected = new = old;
-		if (activate)
-			__clear_bit(bit, &new);
-		else
-			__set_bit(bit, &new);
-		if (new == old)
-			break;
-		old = cmpxchg(&kvm->arch.apicv_inhibit_reasons, expected, new);
-	} while (old != expected);
-
-	if (!!old == !!new)
-		return;
+	old = new = kvm->arch.apicv_inhibit_reasons;
 
-	trace_kvm_apicv_update_request(activate, bit);
-	if (kvm_x86_ops.pre_update_apicv_exec_ctrl)
-		static_call(kvm_x86_pre_update_apicv_exec_ctrl)(kvm, activate);
+	if (activate)
+		__clear_bit(bit, &new);
+	else
+		__set_bit(bit, &new);
+
+	if (!!old != !!new) {
+		trace_kvm_apicv_update_request(activate, bit);
+		kvm_make_all_cpus_request(kvm, KVM_REQ_APICV_UPDATE);
+		kvm->arch.apicv_inhibit_reasons = new;
+		if (new) {
+			unsigned long gfn = gpa_to_gfn(APIC_DEFAULT_PHYS_BASE);
+			kvm_zap_gfn_range(kvm, gfn, gfn+1);
+		}
+	} else
+		kvm->arch.apicv_inhibit_reasons = new;
+}
+EXPORT_SYMBOL_GPL(__kvm_request_apicv_update);
 
-	/*
-	 * Sending request to update APICV for all other vcpus,
-	 * while update the calling vcpu immediately instead of
-	 * waiting for another #VMEXIT to handle the request.
-	 */
-	except = kvm_get_running_vcpu();
-	kvm_make_all_cpus_request_except(kvm, KVM_REQ_APICV_UPDATE,
-					 except);
-	if (except)
-		kvm_vcpu_update_apicv(except);
+void kvm_request_apicv_update(struct kvm *kvm, bool activate, ulong bit)
+{
+	mutex_lock(&kvm->arch.apicv_update_lock);
+	__kvm_request_apicv_update(kvm, activate, bit);
+	mutex_unlock(&kvm->arch.apicv_update_lock);
 }
 EXPORT_SYMBOL_GPL(kvm_request_apicv_update);
 
@@ -9395,6 +9403,10 @@ static int vcpu_enter_guest(struct kvm_vcpu *vcpu)
 	}
 
 	if (kvm_request_pending(vcpu)) {
+		if (kvm_check_request(KVM_REQ_VM_BUGGED, vcpu)) {
+			r = -EIO;
+			goto out;
+		}
 		if (kvm_check_request(KVM_REQ_GET_NESTED_STATE_PAGES, vcpu)) {
 			if (unlikely(!kvm_x86_ops.nested_ops->get_nested_state_pages(vcpu))) {
 				r = 0;
@@ -9608,8 +9620,6 @@ static int vcpu_enter_guest(struct kvm_vcpu *vcpu)
 		set_debugreg(vcpu->arch.eff_db[1], 1);
 		set_debugreg(vcpu->arch.eff_db[2], 2);
 		set_debugreg(vcpu->arch.eff_db[3], 3);
-		set_debugreg(vcpu->arch.dr6, 6);
-		vcpu->arch.switch_db_regs &= ~KVM_DEBUGREG_RELOAD;
 	} else if (unlikely(hw_breakpoint_active())) {
 		set_debugreg(0, 7);
 	}
@@ -9639,7 +9649,6 @@ static int vcpu_enter_guest(struct kvm_vcpu *vcpu)
 		static_call(kvm_x86_sync_dirty_debug_regs)(vcpu);
 		kvm_update_dr0123(vcpu);
 		kvm_update_dr7(vcpu);
-		vcpu->arch.switch_db_regs &= ~KVM_DEBUGREG_RELOAD;
 	}
 
 	/*
@@ -9976,7 +9985,8 @@ int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu)
 		goto out;
 	}
 
-	if (kvm_run->kvm_valid_regs & ~KVM_SYNC_X86_VALID_FIELDS) {
+	if ((kvm_run->kvm_valid_regs & ~KVM_SYNC_X86_VALID_FIELDS) ||
+	    (kvm_run->kvm_dirty_regs & ~KVM_SYNC_X86_VALID_FIELDS)) {
 		r = -EINVAL;
 		goto out;
 	}
@@ -10581,9 +10591,6 @@ static void store_regs(struct kvm_vcpu *vcpu)
 
 static int sync_regs(struct kvm_vcpu *vcpu)
 {
-	if (vcpu->run->kvm_dirty_regs & ~KVM_SYNC_X86_VALID_FIELDS)
-		return -EINVAL;
-
 	if (vcpu->run->kvm_dirty_regs & KVM_SYNC_X86_REGS) {
 		__set_regs(vcpu, &vcpu->run->s.regs.regs);
 		vcpu->run->kvm_dirty_regs &= ~KVM_SYNC_X86_REGS;
@@ -10799,6 +10806,8 @@ void kvm_arch_vcpu_destroy(struct kvm_vcpu *vcpu)
 void kvm_vcpu_reset(struct kvm_vcpu *vcpu, bool init_event)
 {
 	unsigned long old_cr0 = kvm_read_cr0(vcpu);
+	unsigned long new_cr0;
+	u32 eax, dummy;
 
 	kvm_lapic_reset(vcpu, init_event);
 
@@ -10865,10 +10874,41 @@ void kvm_vcpu_reset(struct kvm_vcpu *vcpu, bool init_event)
 	vcpu->arch.regs_avail = ~0;
 	vcpu->arch.regs_dirty = ~0;
 
+	/*
+	 * Fall back to KVM's default Family/Model/Stepping of 0x600 (P6/Athlon)
+	 * if no CPUID match is found.  Note, it's impossible to get a match at
+	 * RESET since KVM emulates RESET before exposing the vCPU to userspace,
+	 * i.e. it'simpossible for kvm_cpuid() to find a valid entry on RESET.
+	 * But, go through the motions in case that's ever remedied.
+	 */
+	eax = 1;
+	if (!kvm_cpuid(vcpu, &eax, &dummy, &dummy, &dummy, true))
+		eax = 0x600;
+	kvm_rdx_write(vcpu, eax);
+
 	vcpu->arch.ia32_xss = 0;
 
 	static_call(kvm_x86_vcpu_reset)(vcpu, init_event);
 
+	kvm_set_rflags(vcpu, X86_EFLAGS_FIXED);
+	kvm_rip_write(vcpu, 0xfff0);
+
+	/*
+	 * CR0.CD/NW are set on RESET, preserved on INIT.  Note, some versions
+	 * of Intel's SDM list CD/NW as being set on INIT, but they contradict
+	 * (or qualify) that with a footnote stating that CD/NW are preserved.
+	 */
+	new_cr0 = X86_CR0_ET;
+	if (init_event)
+		new_cr0 |= (old_cr0 & (X86_CR0_NW | X86_CR0_CD));
+	else
+		new_cr0 |= X86_CR0_NW | X86_CR0_CD;
+
+	static_call(kvm_x86_set_cr0)(vcpu, new_cr0);
+	static_call(kvm_x86_set_cr4)(vcpu, 0);
+	static_call(kvm_x86_set_efer)(vcpu, 0);
+	static_call(kvm_x86_update_exception_bitmap)(vcpu);
+
 	/*
 	 * Reset the MMU context if paging was enabled prior to INIT (which is
 	 * implied if CR0.PG=1 as CR0 will be '0' prior to RESET).  Unlike the
@@ -10879,7 +10919,20 @@ void kvm_vcpu_reset(struct kvm_vcpu *vcpu, bool init_event)
 	 */
 	if (old_cr0 & X86_CR0_PG)
 		kvm_mmu_reset_context(vcpu);
+
+	/*
+	 * Intel's SDM states that all TLB entries are flushed on INIT.  AMD's
+	 * APM states the TLBs are untouched by INIT, but it also states that
+	 * the TLBs are flushed on "External initialization of the processor."
+	 * Flush the guest TLB regardless of vendor, there is no meaningful
+	 * benefit in relying on the guest to flush the TLB immediately after
+	 * INIT.  A spurious TLB flush is benign and likely negligible from a
+	 * performance perspective.
+	 */
+	if (init_event)
+		kvm_make_request(KVM_REQ_TLB_FLUSH_GUEST, vcpu);
 }
+EXPORT_SYMBOL_GPL(kvm_vcpu_reset);
 
 void kvm_vcpu_deliver_sipi_vector(struct kvm_vcpu *vcpu, u8 vector)
 {
@@ -11123,6 +11176,7 @@ int kvm_arch_init_vm(struct kvm *kvm, unsigned long type)
 	kvm_hv_init_vm(kvm);
 	kvm_page_track_init(kvm);
 	kvm_mmu_init_vm(kvm);
+	kvm_xen_init_vm(kvm);
 
 	return static_call(kvm_x86_vm_init)(kvm);
 }
@@ -11312,8 +11366,7 @@ static int memslot_rmap_alloc(struct kvm_memory_slot *slot,
 
 	for (i = 0; i < KVM_NR_PAGE_SIZES; ++i) {
 		int level = i + 1;
-		int lpages = gfn_to_index(slot->base_gfn + npages - 1,
-					  slot->base_gfn, level) + 1;
+		int lpages = __kvm_mmu_slot_lpages(slot, npages, level);
 
 		WARN_ON(slot->arch.rmap[i]);
 
@@ -11396,8 +11449,7 @@ static int kvm_alloc_memslot_metadata(struct kvm *kvm,
 		int lpages;
 		int level = i + 1;
 
-		lpages = gfn_to_index(slot->base_gfn + npages - 1,
-				      slot->base_gfn, level) + 1;
+		lpages = __kvm_mmu_slot_lpages(slot, npages, level);
 
 		linfo = kvcalloc(lpages, sizeof(*linfo), GFP_KERNEL_ACCOUNT);
 		if (!linfo)
@@ -11481,7 +11533,7 @@ static void kvm_mmu_update_cpu_dirty_logging(struct kvm *kvm, bool enable)
 
 static void kvm_mmu_slot_apply_flags(struct kvm *kvm,
 				     struct kvm_memory_slot *old,
-				     struct kvm_memory_slot *new,
+				     const struct kvm_memory_slot *new,
 				     enum kvm_mr_change change)
 {
 	bool log_dirty_pages = new->flags & KVM_MEM_LOG_DIRTY_PAGES;
@@ -11561,10 +11613,7 @@ void kvm_arch_commit_memory_region(struct kvm *kvm,
 		kvm_mmu_change_mmu_pages(kvm,
 				kvm_mmu_calculate_default_mmu_pages(kvm));
 
-	/*
-	 * FIXME: const-ify all uses of struct kvm_memory_slot.
-	 */
-	kvm_mmu_slot_apply_flags(kvm, old, (struct kvm_memory_slot *) new, change);
+	kvm_mmu_slot_apply_flags(kvm, old, new, change);
 
 	/* Free the arrays associated with the old memslot. */
 	if (change == KVM_MR_MOVE)
diff --git a/arch/x86/kvm/x86.h b/arch/x86/kvm/x86.h
index 44ae10312740..7d66d63dc55a 100644
--- a/arch/x86/kvm/x86.h
+++ b/arch/x86/kvm/x86.h
@@ -8,6 +8,8 @@
 #include "kvm_cache_regs.h"
 #include "kvm_emulate.h"
 
+void kvm_spurious_fault(void);
+
 static __always_inline void kvm_guest_enter_irqoff(void)
 {
 	/*
diff --git a/arch/x86/kvm/xen.c b/arch/x86/kvm/xen.c
index ae17250e1efe..9ea9c3dabe37 100644
--- a/arch/x86/kvm/xen.c
+++ b/arch/x86/kvm/xen.c
@@ -25,15 +25,14 @@ static int kvm_xen_shared_info_init(struct kvm *kvm, gfn_t gfn)
 {
 	gpa_t gpa = gfn_to_gpa(gfn);
 	int wc_ofs, sec_hi_ofs;
-	int ret;
+	int ret = 0;
 	int idx = srcu_read_lock(&kvm->srcu);
 
-	ret = kvm_gfn_to_hva_cache_init(kvm, &kvm->arch.xen.shinfo_cache,
-					gpa, PAGE_SIZE);
-	if (ret)
+	if (kvm_is_error_hva(gfn_to_hva(kvm, gfn))) {
+		ret = -EFAULT;
 		goto out;
-
-	kvm->arch.xen.shinfo_set = true;
+	}
+	kvm->arch.xen.shinfo_gfn = gfn;
 
 	/* Paranoia checks on the 32-bit struct layout */
 	BUILD_BUG_ON(offsetof(struct compat_shared_info, wc) != 0x900);
@@ -245,7 +244,7 @@ int kvm_xen_hvm_set_attr(struct kvm *kvm, struct kvm_xen_hvm_attr *data)
 
 	case KVM_XEN_ATTR_TYPE_SHARED_INFO:
 		if (data->u.shared_info.gfn == GPA_INVALID) {
-			kvm->arch.xen.shinfo_set = false;
+			kvm->arch.xen.shinfo_gfn = GPA_INVALID;
 			r = 0;
 			break;
 		}
@@ -283,10 +282,7 @@ int kvm_xen_hvm_get_attr(struct kvm *kvm, struct kvm_xen_hvm_attr *data)
 		break;
 
 	case KVM_XEN_ATTR_TYPE_SHARED_INFO:
-		if (kvm->arch.xen.shinfo_set)
-			data->u.shared_info.gfn = gpa_to_gfn(kvm->arch.xen.shinfo_cache.gpa);
-		else
-			data->u.shared_info.gfn = GPA_INVALID;
+		data->u.shared_info.gfn = gpa_to_gfn(kvm->arch.xen.shinfo_gfn);
 		r = 0;
 		break;
 
@@ -646,6 +642,11 @@ int kvm_xen_hvm_config(struct kvm *kvm, struct kvm_xen_hvm_config *xhc)
 	return 0;
 }
 
+void kvm_xen_init_vm(struct kvm *kvm)
+{
+	kvm->arch.xen.shinfo_gfn = GPA_INVALID;
+}
+
 void kvm_xen_destroy_vm(struct kvm *kvm)
 {
 	if (kvm->arch.xen_hvm_config.msr)
diff --git a/arch/x86/kvm/xen.h b/arch/x86/kvm/xen.h
index 463a7844a8ca..cc0cf5f37450 100644
--- a/arch/x86/kvm/xen.h
+++ b/arch/x86/kvm/xen.h
@@ -21,6 +21,7 @@ int kvm_xen_hvm_set_attr(struct kvm *kvm, struct kvm_xen_hvm_attr *data);
 int kvm_xen_hvm_get_attr(struct kvm *kvm, struct kvm_xen_hvm_attr *data);
 int kvm_xen_write_hypercall_page(struct kvm_vcpu *vcpu, u64 data);
 int kvm_xen_hvm_config(struct kvm *kvm, struct kvm_xen_hvm_config *xhc);
+void kvm_xen_init_vm(struct kvm *kvm);
 void kvm_xen_destroy_vm(struct kvm *kvm);
 
 static inline bool kvm_xen_msr_enabled(struct kvm *kvm)
@@ -50,6 +51,10 @@ static inline int kvm_xen_write_hypercall_page(struct kvm_vcpu *vcpu, u64 data)
 	return 1;
 }
 
+static inline void kvm_xen_init_vm(struct kvm *kvm)
+{
+}
+
 static inline void kvm_xen_destroy_vm(struct kvm *kvm)
 {
 }
diff --git a/arch/x86/mm/init.c b/arch/x86/mm/init.c
index 75ef19aa8903..23a14d82e783 100644
--- a/arch/x86/mm/init.c
+++ b/arch/x86/mm/init.c
@@ -127,14 +127,12 @@ __ref void *alloc_low_pages(unsigned int num)
 		unsigned long ret = 0;
 
 		if (min_pfn_mapped < max_pfn_mapped) {
-			ret = memblock_find_in_range(
+			ret = memblock_phys_alloc_range(
+					PAGE_SIZE * num, PAGE_SIZE,
 					min_pfn_mapped << PAGE_SHIFT,
-					max_pfn_mapped << PAGE_SHIFT,
-					PAGE_SIZE * num , PAGE_SIZE);
+					max_pfn_mapped << PAGE_SHIFT);
 		}
-		if (ret)
-			memblock_reserve(ret, PAGE_SIZE * num);
-		else if (can_use_brk_pgt)
+		if (!ret && can_use_brk_pgt)
 			ret = __pa(extend_brk(PAGE_SIZE * num, PAGE_SIZE));
 
 		if (!ret)
@@ -610,8 +608,17 @@ static void __init memory_map_top_down(unsigned long map_start,
 	unsigned long addr;
 	unsigned long mapped_ram_size = 0;
 
-	/* xen has big range in reserved near end of ram, skip it at first.*/
-	addr = memblock_find_in_range(map_start, map_end, PMD_SIZE, PMD_SIZE);
+	/*
+	 * Systems that have many reserved areas near top of the memory,
+	 * e.g. QEMU with less than 1G RAM and EFI enabled, or Xen, will
+	 * require lots of 4K mappings which may exhaust pgt_buf.
+	 * Start with top-most PMD_SIZE range aligned at PMD_SIZE to ensure
+	 * there is enough mapped memory that can be allocated from
+	 * memblock.
+	 */
+	addr = memblock_phys_alloc_range(PMD_SIZE, PMD_SIZE, map_start,
+					 map_end);
+	memblock_free(addr, PMD_SIZE);
 	real_end = addr + PMD_SIZE;
 
 	/* step_size need to be small so pgt_buf from BRK could cover it */
diff --git a/arch/x86/mm/init_32.c b/arch/x86/mm/init_32.c
index 74b78840182d..bd90b8fe81e4 100644
--- a/arch/x86/mm/init_32.c
+++ b/arch/x86/mm/init_32.c
@@ -801,8 +801,7 @@ int arch_add_memory(int nid, u64 start, u64 size,
 	return __add_pages(nid, start_pfn, nr_pages, params);
 }
 
-void arch_remove_memory(int nid, u64 start, u64 size,
-			struct vmem_altmap *altmap)
+void arch_remove_memory(u64 start, u64 size, struct vmem_altmap *altmap)
 {
 	unsigned long start_pfn = start >> PAGE_SHIFT;
 	unsigned long nr_pages = size >> PAGE_SHIFT;
diff --git a/arch/x86/mm/init_64.c b/arch/x86/mm/init_64.c
index ddeaba947eb3..a6e11763763f 100644
--- a/arch/x86/mm/init_64.c
+++ b/arch/x86/mm/init_64.c
@@ -1255,8 +1255,7 @@ kernel_physical_mapping_remove(unsigned long start, unsigned long end)
 	remove_pagetable(start, end, true, NULL);
 }
 
-void __ref arch_remove_memory(int nid, u64 start, u64 size,
-			      struct vmem_altmap *altmap)
+void __ref arch_remove_memory(u64 start, u64 size, struct vmem_altmap *altmap)
 {
 	unsigned long start_pfn = start >> PAGE_SHIFT;
 	unsigned long nr_pages = size >> PAGE_SHIFT;
diff --git a/arch/x86/mm/numa.c b/arch/x86/mm/numa.c
index e94da744386f..a1b5c71099e6 100644
--- a/arch/x86/mm/numa.c
+++ b/arch/x86/mm/numa.c
@@ -376,15 +376,14 @@ static int __init numa_alloc_distance(void)
 	cnt++;
 	size = cnt * cnt * sizeof(numa_distance[0]);
 
-	phys = memblock_find_in_range(0, PFN_PHYS(max_pfn_mapped),
-				      size, PAGE_SIZE);
+	phys = memblock_phys_alloc_range(size, PAGE_SIZE, 0,
+					 PFN_PHYS(max_pfn_mapped));
 	if (!phys) {
 		pr_warn("Warning: can't allocate distance table!\n");
 		/* don't retry until explicitly reset */
 		numa_distance = (void *)1LU;
 		return -ENOMEM;
 	}
-	memblock_reserve(phys, size);
 
 	numa_distance = __va(phys);
 	numa_distance_cnt = cnt;
diff --git a/arch/x86/mm/numa_emulation.c b/arch/x86/mm/numa_emulation.c
index 87d77cc52f86..737491b13728 100644
--- a/arch/x86/mm/numa_emulation.c
+++ b/arch/x86/mm/numa_emulation.c
@@ -447,13 +447,12 @@ void __init numa_emulation(struct numa_meminfo *numa_meminfo, int numa_dist_cnt)
 	if (numa_dist_cnt) {
 		u64 phys;
 
-		phys = memblock_find_in_range(0, PFN_PHYS(max_pfn_mapped),
-					      phys_size, PAGE_SIZE);
+		phys = memblock_phys_alloc_range(phys_size, PAGE_SIZE, 0,
+						 PFN_PHYS(max_pfn_mapped));
 		if (!phys) {
 			pr_warn("NUMA: Warning: can't allocate copy of distance table, disabling emulation\n");
 			goto no_emu;
 		}
-		memblock_reserve(phys, phys_size);
 		phys_dist = __va(phys);
 
 		for (i = 0; i < numa_dist_cnt; i++)
diff --git a/arch/x86/pci/numachip.c b/arch/x86/pci/numachip.c
index 01a085d9135a..4f0147d4e225 100644
--- a/arch/x86/pci/numachip.c
+++ b/arch/x86/pci/numachip.c
@@ -12,6 +12,7 @@
 
 #include <linux/pci.h>
 #include <asm/pci_x86.h>
+#include <asm/numachip/numachip.h>
 
 static u8 limit __read_mostly;
 
diff --git a/arch/x86/pci/sta2x11-fixup.c b/arch/x86/pci/sta2x11-fixup.c
index 7d2525691854..101081ad64b6 100644
--- a/arch/x86/pci/sta2x11-fixup.c
+++ b/arch/x86/pci/sta2x11-fixup.c
@@ -146,8 +146,7 @@ static void sta2x11_map_ep(struct pci_dev *pdev)
 		dev_err(dev, "sta2x11: could not set DMA offset\n");
 
 	dev->bus_dma_limit = max_amba_addr;
-	pci_set_consistent_dma_mask(pdev, max_amba_addr);
-	pci_set_dma_mask(pdev, max_amba_addr);
+	dma_set_mask_and_coherent(&pdev->dev, max_amba_addr);
 
 	/* Configure AHB mapping */
 	pci_write_config_dword(pdev, AHB_PEXLBASE(0), 0);
diff --git a/arch/x86/realmode/init.c b/arch/x86/realmode/init.c
index 6534c92d0f83..31b5856010cb 100644
--- a/arch/x86/realmode/init.c
+++ b/arch/x86/realmode/init.c
@@ -28,7 +28,7 @@ void __init reserve_real_mode(void)
 	WARN_ON(slab_is_available());
 
 	/* Has to be under 1M so we can execute real-mode AP code. */
-	mem = memblock_find_in_range(0, 1<<20, size, PAGE_SIZE);
+	mem = memblock_phys_alloc_range(size, PAGE_SIZE, 0, 1<<20);
 	if (!mem)
 		pr_info("No sub-1M memory is available for the trampoline\n");
 	else
diff --git a/arch/x86/um/shared/sysdep/stub_32.h b/arch/x86/um/shared/sysdep/stub_32.h
index b95db9daf0e8..4c6c2be0c899 100644
--- a/arch/x86/um/shared/sysdep/stub_32.h
+++ b/arch/x86/um/shared/sysdep/stub_32.h
@@ -101,4 +101,16 @@ static inline void remap_stack_and_trap(void)
 		"memory");
 }
 
+static __always_inline void *get_stub_page(void)
+{
+	unsigned long ret;
+
+	asm volatile (
+		"movl %%esp,%0 ;"
+		"andl %1,%0"
+		: "=a" (ret)
+		: "g" (~(UM_KERN_PAGE_SIZE - 1)));
+
+	return (void *)ret;
+}
 #endif
diff --git a/arch/x86/um/shared/sysdep/stub_64.h b/arch/x86/um/shared/sysdep/stub_64.h
index 6e2626b77a2e..e9c4b2b38803 100644
--- a/arch/x86/um/shared/sysdep/stub_64.h
+++ b/arch/x86/um/shared/sysdep/stub_64.h
@@ -108,4 +108,16 @@ static inline void remap_stack_and_trap(void)
 		__syscall_clobber, "r10", "r8", "r9");
 }
 
+static __always_inline void *get_stub_page(void)
+{
+	unsigned long ret;
+
+	asm volatile (
+		"movq %%rsp,%0 ;"
+		"andq %1,%0"
+		: "=a" (ret)
+		: "g" (~(UM_KERN_PAGE_SIZE - 1)));
+
+	return (void *)ret;
+}
 #endif
diff --git a/arch/x86/um/stub_segv.c b/arch/x86/um/stub_segv.c
index 21836eaf1725..f7eefba034f9 100644
--- a/arch/x86/um/stub_segv.c
+++ b/arch/x86/um/stub_segv.c
@@ -11,9 +11,8 @@
 void __attribute__ ((__section__ (".__syscall_stub")))
 stub_segv_handler(int sig, siginfo_t *info, void *p)
 {
-	int stack;
+	struct faultinfo *f = get_stub_page();
 	ucontext_t *uc = p;
-	struct faultinfo *f = (void *)(((unsigned long)&stack) & ~(UM_KERN_PAGE_SIZE - 1));
 
 	GET_FAULTINFO_FROM_MC(*f, &uc->uc_mcontext);
 	trap_myself();
diff --git a/arch/x86/xen/enlighten_pv.c b/arch/x86/xen/enlighten_pv.c
index 03149422dce2..753f63734c13 100644
--- a/arch/x86/xen/enlighten_pv.c
+++ b/arch/x86/xen/enlighten_pv.c
@@ -116,9 +116,8 @@ static void __init xen_banner(void)
 	HYPERVISOR_xen_version(XENVER_extraversion, &extra);
 
 	pr_info("Booting paravirtualized kernel on %s\n", pv_info.name);
-	printk(KERN_INFO "Xen version: %d.%d%s%s\n",
-	       version >> 16, version & 0xffff, extra.extraversion,
-	       xen_feature(XENFEAT_mmu_pt_update_preserve_ad) ? " (preserve-AD)" : "");
+	pr_info("Xen version: %d.%d%s (preserve-AD)\n",
+		version >> 16, version & 0xffff, extra.extraversion);
 }
 
 static void __init xen_pv_init_platform(void)
@@ -1302,13 +1301,6 @@ asmlinkage __visible void __init xen_start_kernel(void)
 	xen_init_apic();
 #endif
 
-	if (xen_feature(XENFEAT_mmu_pt_update_preserve_ad)) {
-		pv_ops.mmu.ptep_modify_prot_start =
-			xen_ptep_modify_prot_start;
-		pv_ops.mmu.ptep_modify_prot_commit =
-			xen_ptep_modify_prot_commit;
-	}
-
 	machine_ops = xen_machine_ops;
 
 	/*
diff --git a/arch/x86/xen/mmu_pv.c b/arch/x86/xen/mmu_pv.c
index ade789e73ee4..1df5f01529e5 100644
--- a/arch/x86/xen/mmu_pv.c
+++ b/arch/x86/xen/mmu_pv.c
@@ -2099,8 +2099,8 @@ static const struct pv_mmu_ops xen_mmu_ops __initconst = {
 	.set_pte = xen_set_pte_init,
 	.set_pmd = xen_set_pmd_hyper,
 
-	.ptep_modify_prot_start = __ptep_modify_prot_start,
-	.ptep_modify_prot_commit = __ptep_modify_prot_commit,
+	.ptep_modify_prot_start = xen_ptep_modify_prot_start,
+	.ptep_modify_prot_commit = xen_ptep_modify_prot_commit,
 
 	.pte_val = PV_CALLEE_SAVE(xen_pte_val),
 	.pgd_val = PV_CALLEE_SAVE(xen_pgd_val),
diff --git a/arch/x86/xen/p2m.c b/arch/x86/xen/p2m.c
index ac06ca32e9ef..5e6e236977c7 100644
--- a/arch/x86/xen/p2m.c
+++ b/arch/x86/xen/p2m.c
@@ -618,8 +618,8 @@ int xen_alloc_p2m_entry(unsigned long pfn)
 	}
 
 	/* Expanded the p2m? */
-	if (pfn > xen_p2m_last_pfn) {
-		xen_p2m_last_pfn = pfn;
+	if (pfn >= xen_p2m_last_pfn) {
+		xen_p2m_last_pfn = ALIGN(pfn + 1, P2M_PER_PAGE);
 		HYPERVISOR_shared_info->arch.max_pfn = xen_p2m_last_pfn;
 	}
 
diff --git a/arch/x86/xen/platform-pci-unplug.c b/arch/x86/xen/platform-pci-unplug.c
index 96d7f7d39cb9..62ac4898df1a 100644
--- a/arch/x86/xen/platform-pci-unplug.c
+++ b/arch/x86/xen/platform-pci-unplug.c
@@ -7,6 +7,8 @@
  * Copyright (c) 2010, Citrix
  */
 
+#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
+
 #include <linux/init.h>
 #include <linux/io.h>
 #include <linux/export.h>
@@ -30,13 +32,13 @@ static int check_platform_magic(void)
 
 	magic = inw(XEN_IOPORT_MAGIC);
 	if (magic != XEN_IOPORT_MAGIC_VAL) {
-		printk(KERN_ERR "Xen Platform PCI: unrecognised magic value\n");
+		pr_err("Xen Platform PCI: unrecognised magic value\n");
 		return XEN_PLATFORM_ERR_MAGIC;
 	}
 
 	protocol = inb(XEN_IOPORT_PROTOVER);
 
-	printk(KERN_DEBUG "Xen Platform PCI: I/O protocol version %d\n",
+	pr_debug("Xen Platform PCI: I/O protocol version %d\n",
 			protocol);
 
 	switch (protocol) {
@@ -44,12 +46,12 @@ static int check_platform_magic(void)
 		outw(XEN_IOPORT_LINUX_PRODNUM, XEN_IOPORT_PRODNUM);
 		outl(XEN_IOPORT_LINUX_DRVVER, XEN_IOPORT_DRVVER);
 		if (inw(XEN_IOPORT_MAGIC) != XEN_IOPORT_MAGIC_VAL) {
-			printk(KERN_ERR "Xen Platform: blacklisted by host\n");
+			pr_err("Xen Platform: blacklisted by host\n");
 			return XEN_PLATFORM_ERR_BLACKLIST;
 		}
 		break;
 	default:
-		printk(KERN_WARNING "Xen Platform PCI: unknown I/O protocol version\n");
+		pr_warn("Xen Platform PCI: unknown I/O protocol version\n");
 		return XEN_PLATFORM_ERR_PROTOCOL;
 	}
 
@@ -155,12 +157,12 @@ void xen_unplug_emulated_devices(void)
 	 * been compiled for this kernel (modules or built-in are both OK). */
 	if (!xen_emul_unplug) {
 		if (xen_must_unplug_nics()) {
-			printk(KERN_INFO "Netfront and the Xen platform PCI driver have "
+			pr_info("Netfront and the Xen platform PCI driver have "
 					"been compiled for this kernel: unplug emulated NICs.\n");
 			xen_emul_unplug |= XEN_UNPLUG_ALL_NICS;
 		}
 		if (xen_must_unplug_disks()) {
-			printk(KERN_INFO "Blkfront and the Xen platform PCI driver have "
+			pr_info("Blkfront and the Xen platform PCI driver have "
 					"been compiled for this kernel: unplug emulated disks.\n"
 					"You might have to change the root device\n"
 					"from /dev/hd[a-d] to /dev/xvd[a-d]\n"
@@ -200,7 +202,7 @@ static int __init parse_xen_emul_unplug(char *arg)
 		else if (!strncmp(p, "never", l))
 			xen_emul_unplug |= XEN_UNPLUG_NEVER;
 		else
-			printk(KERN_WARNING "unrecognised option '%s' "
+			pr_warn("unrecognised option '%s' "
 				 "in parameter 'xen_emul_unplug'\n", p);
 	}
 	return 0;
diff --git a/arch/x86/xen/smp_pv.c b/arch/x86/xen/smp_pv.c
index c2ac319f11a4..96afadf9878e 100644
--- a/arch/x86/xen/smp_pv.c
+++ b/arch/x86/xen/smp_pv.c
@@ -64,7 +64,6 @@ static void cpu_bringup(void)
 	cr4_init();
 	cpu_init();
 	touch_softlockup_watchdog();
-	preempt_disable();
 
 	/* PVH runs in ring 0 and allows us to do native syscalls. Yay! */
 	if (!xen_feature(XENFEAT_supervisor_mode_kernel)) {
diff --git a/arch/xtensa/Kbuild b/arch/xtensa/Kbuild
index a4e40e534e6a..fd12f61745ba 100644
--- a/arch/xtensa/Kbuild
+++ b/arch/xtensa/Kbuild
@@ -1 +1,2 @@
 # SPDX-License-Identifier: GPL-2.0-only
+obj-y += kernel/ mm/ platforms/ boot/dts/
diff --git a/arch/xtensa/Kconfig b/arch/xtensa/Kconfig
index f02b8850b01b..0e56bad058fa 100644
--- a/arch/xtensa/Kconfig
+++ b/arch/xtensa/Kconfig
@@ -31,7 +31,7 @@ config XTENSA
 	select HAVE_DMA_CONTIGUOUS
 	select HAVE_EXIT_THREAD
 	select HAVE_FUNCTION_TRACER
-	select HAVE_FUTEX_CMPXCHG if !MMU
+	select HAVE_FUTEX_CMPXCHG if !MMU && FUTEX
 	select HAVE_HW_BREAKPOINT if PERF_EVENTS
 	select HAVE_IRQ_TIME_ACCOUNTING
 	select HAVE_PCI
@@ -42,6 +42,7 @@ config XTENSA
 	select MODULES_USE_ELF_RELA
 	select PERF_USE_VMALLOC
 	select SET_FS
+	select TRACE_IRQFLAGS_SUPPORT
 	select VIRT_TO_BUS
 	help
 	  Xtensa processors are 32-bit RISC machines designed by Tensilica
@@ -73,9 +74,6 @@ config LOCKDEP_SUPPORT
 config STACKTRACE_SUPPORT
 	def_bool y
 
-config TRACE_IRQFLAGS_SUPPORT
-	def_bool y
-
 config MMU
 	def_bool n
 
diff --git a/arch/xtensa/Makefile b/arch/xtensa/Makefile
index e9c8f064c44d..96714ef7c89e 100644
--- a/arch/xtensa/Makefile
+++ b/arch/xtensa/Makefile
@@ -17,7 +17,6 @@
 variant-y := $(patsubst "%",%,$(CONFIG_XTENSA_VARIANT_NAME))
 
 VARIANT = $(variant-y)
-export VARIANT
 
 ifneq ($(VARIANT),)
   ifdef cross_compiling
@@ -33,9 +32,6 @@ platform-$(CONFIG_XTENSA_PLATFORM_XT2000)	:= xt2000
 platform-$(CONFIG_XTENSA_PLATFORM_ISS)		:= iss
 platform-$(CONFIG_XTENSA_PLATFORM_XTFPGA)	:= xtfpga
 
-PLATFORM = $(platform-y)
-export PLATFORM
-
 # temporarily until string.h is fixed
 KBUILD_CFLAGS += -ffreestanding -D__linux__
 KBUILD_CFLAGS += -pipe -mlongcalls -mtext-section-literals
@@ -57,19 +53,11 @@ KBUILD_CPPFLAGS += $(patsubst %,-I$(srctree)/%include,$(vardirs) $(plfdirs))
 
 KBUILD_DEFCONFIG := iss_defconfig
 
-# Only build variant and/or platform if it includes a Makefile
-
-buildvar := $(shell test -e $(srctree)/arch/xtensa/variants/$(VARIANT)/Makefile && echo arch/xtensa/variants/$(VARIANT)/)
-buildplf := $(shell test -e $(srctree)/arch/xtensa/platforms/$(PLATFORM)/Makefile && echo arch/xtensa/platforms/$(PLATFORM)/)
-
 # Find libgcc.a
 
 LIBGCC := $(shell $(CC) $(KBUILD_CFLAGS) -print-libgcc-file-name)
 
 head-y		:= arch/xtensa/kernel/head.o
-core-y		+= arch/xtensa/kernel/ arch/xtensa/mm/
-core-y		+= $(buildvar) $(buildplf)
-core-y 		+= arch/xtensa/boot/dts/
 
 libs-y		+= arch/xtensa/lib/ $(LIBGCC)
 
diff --git a/arch/xtensa/kernel/syscalls/syscall.tbl b/arch/xtensa/kernel/syscalls/syscall.tbl
index b3d1bc8a9095..104b327f8ac9 100644
--- a/arch/xtensa/kernel/syscalls/syscall.tbl
+++ b/arch/xtensa/kernel/syscalls/syscall.tbl
@@ -417,3 +417,5 @@
 444	common	landlock_create_ruleset		sys_landlock_create_ruleset
 445	common	landlock_add_rule		sys_landlock_add_rule
 446	common	landlock_restrict_self		sys_landlock_restrict_self
+# 447 reserved for memfd_secret
+448	common	process_mrelease		sys_process_mrelease
diff --git a/arch/xtensa/kernel/traps.c b/arch/xtensa/kernel/traps.c
index efc3a29cde80..874b6efc6fb3 100644
--- a/arch/xtensa/kernel/traps.c
+++ b/arch/xtensa/kernel/traps.c
@@ -268,6 +268,7 @@ void do_interrupt(struct pt_regs *regs)
 		XCHAL_INTLEVEL7_MASK,
 	};
 	struct pt_regs *old_regs;
+	unsigned unhandled = ~0u;
 
 	trace_hardirqs_off();
 
@@ -283,6 +284,10 @@ void do_interrupt(struct pt_regs *regs)
 		for (level = LOCKLEVEL; level > 0; --level) {
 			if (int_at_level & int_level_mask[level]) {
 				int_at_level &= int_level_mask[level];
+				if (int_at_level & unhandled)
+					int_at_level &= unhandled;
+				else
+					unhandled |= int_level_mask[level];
 				break;
 			}
 		}
@@ -290,6 +295,8 @@ void do_interrupt(struct pt_regs *regs)
 		if (level == 0)
 			break;
 
+		/* clear lowest pending irq in the unhandled mask */
+		unhandled ^= (int_at_level & -int_at_level);
 		do_IRQ(__ffs(int_at_level), regs);
 	}
 
diff --git a/arch/xtensa/platforms/Makefile b/arch/xtensa/platforms/Makefile
new file mode 100644
index 000000000000..e2e7e0726979
--- /dev/null
+++ b/arch/xtensa/platforms/Makefile
@@ -0,0 +1,4 @@
+# SPDX-License-Identifier: GPL-2.0-only
+obj-$(CONFIG_XTENSA_PLATFORM_XT2000)	+= xt2000/
+obj-$(CONFIG_XTENSA_PLATFORM_ISS)	+= iss/
+obj-$(CONFIG_XTENSA_PLATFORM_XTFPGA)	+= xtfpga/
diff --git a/arch/xtensa/platforms/iss/console.c b/arch/xtensa/platforms/iss/console.c
index 81f988914d9a..81d7c7e8f7e9 100644
--- a/arch/xtensa/platforms/iss/console.c
+++ b/arch/xtensa/platforms/iss/console.c
@@ -199,10 +199,10 @@ late_initcall(rs_init);
 
 static void iss_console_write(struct console *co, const char *s, unsigned count)
 {
-	int len = strlen(s);
-
-	if (s != 0 && *s != 0)
+	if (s && *s != 0) {
+		int len = strlen(s);
 		simc_write(1, s, count < len ? count : len);
+	}
 }
 
 static struct tty_driver* iss_console_device(struct console *c, int *index)
diff --git a/block/Kconfig b/block/Kconfig
index bac87d773c54..8e28ae7718bd 100644
--- a/block/Kconfig
+++ b/block/Kconfig
@@ -29,35 +29,15 @@ if BLOCK
 config BLK_RQ_ALLOC_TIME
 	bool
 
-config BLK_SCSI_REQUEST
-	bool
-
 config BLK_CGROUP_RWSTAT
 	bool
 
-config BLK_DEV_BSG
-	bool "Block layer SG support v4"
-	default y
-	select BLK_SCSI_REQUEST
-	help
-	  Saying Y here will enable generic SG (SCSI generic) v4 support
-	  for any block device.
-
-	  Unlike SG v3 (aka block/scsi_ioctl.c drivers/scsi/sg.c), SG v4
-	  can handle complicated SCSI commands: tagged variable length cdbs
-	  with bidirectional data transfers and generic request/response
-	  protocols (e.g. Task Management Functions and SMP in Serial
-	  Attached SCSI).
-
-	  This option is required by recent UDEV versions to properly
-	  access device serial numbers, etc.
-
-	  If unsure, say Y.
+config BLK_DEV_BSG_COMMON
+	tristate
 
 config BLK_DEV_BSGLIB
 	bool "Block layer SG support v4 helper lib"
-	select BLK_DEV_BSG
-	select BLK_SCSI_REQUEST
+	select BLK_DEV_BSG_COMMON
 	help
 	  Subsystems will normally enable this if needed. Users will not
 	  normally need to manually enable this.
diff --git a/block/Makefile b/block/Makefile
index 1d0d466f2182..41aa1ba69c90 100644
--- a/block/Makefile
+++ b/block/Makefile
@@ -3,7 +3,7 @@
 # Makefile for the kernel block layer
 #
 
-obj-$(CONFIG_BLOCK) := bio.o elevator.o blk-core.o blk-sysfs.o \
+obj-$(CONFIG_BLOCK) := bdev.o fops.o bio.o elevator.o blk-core.o blk-sysfs.o \
 			blk-flush.o blk-settings.o blk-ioc.o blk-map.o \
 			blk-exec.o blk-merge.o blk-timeout.o \
 			blk-lib.o blk-mq.o blk-mq-tag.o blk-stat.o \
@@ -12,8 +12,7 @@ obj-$(CONFIG_BLOCK) := bio.o elevator.o blk-core.o blk-sysfs.o \
 			disk-events.o
 
 obj-$(CONFIG_BOUNCE)		+= bounce.o
-obj-$(CONFIG_BLK_SCSI_REQUEST)	+= scsi_ioctl.o
-obj-$(CONFIG_BLK_DEV_BSG)	+= bsg.o
+obj-$(CONFIG_BLK_DEV_BSG_COMMON) += bsg.o
 obj-$(CONFIG_BLK_DEV_BSGLIB)	+= bsg-lib.o
 obj-$(CONFIG_BLK_CGROUP)	+= blk-cgroup.o
 obj-$(CONFIG_BLK_CGROUP_RWSTAT)	+= blk-cgroup-rwstat.o
diff --git a/fs/block_dev.c b/block/bdev.c
index 45df6cbccf12..cf2780cb44a7 100644
--- a/fs/block_dev.c
+++ b/block/bdev.c
@@ -7,12 +7,10 @@
 
 #include <linux/init.h>
 #include <linux/mm.h>
-#include <linux/fcntl.h>
 #include <linux/slab.h>
 #include <linux/kmod.h>
 #include <linux/major.h>
 #include <linux/device_cgroup.h>
-#include <linux/highmem.h>
 #include <linux/blkdev.h>
 #include <linux/backing-dev.h>
 #include <linux/module.h>
@@ -20,30 +18,22 @@
 #include <linux/magic.h>
 #include <linux/buffer_head.h>
 #include <linux/swap.h>
-#include <linux/pagevec.h>
 #include <linux/writeback.h>
-#include <linux/mpage.h>
 #include <linux/mount.h>
 #include <linux/pseudo_fs.h>
 #include <linux/uio.h>
 #include <linux/namei.h>
-#include <linux/log2.h>
 #include <linux/cleancache.h>
-#include <linux/task_io_accounting_ops.h>
-#include <linux/falloc.h>
 #include <linux/part_stat.h>
 #include <linux/uaccess.h>
-#include <linux/suspend.h>
-#include "internal.h"
-#include "../block/blk.h"
+#include "../fs/internal.h"
+#include "blk.h"
 
 struct bdev_inode {
 	struct block_device bdev;
 	struct inode vfs_inode;
 };
 
-static const struct address_space_operations def_blk_aops;
-
 static inline struct bdev_inode *BDEV_I(struct inode *inode)
 {
 	return container_of(inode, struct bdev_inode, vfs_inode);
@@ -194,332 +184,6 @@ int sb_min_blocksize(struct super_block *sb, int size)
 
 EXPORT_SYMBOL(sb_min_blocksize);
 
-static int
-blkdev_get_block(struct inode *inode, sector_t iblock,
-		struct buffer_head *bh, int create)
-{
-	bh->b_bdev = I_BDEV(inode);
-	bh->b_blocknr = iblock;
-	set_buffer_mapped(bh);
-	return 0;
-}
-
-static struct inode *bdev_file_inode(struct file *file)
-{
-	return file->f_mapping->host;
-}
-
-static unsigned int dio_bio_write_op(struct kiocb *iocb)
-{
-	unsigned int op = REQ_OP_WRITE | REQ_SYNC | REQ_IDLE;
-
-	/* avoid the need for a I/O completion work item */
-	if (iocb->ki_flags & IOCB_DSYNC)
-		op |= REQ_FUA;
-	return op;
-}
-
-#define DIO_INLINE_BIO_VECS 4
-
-static void blkdev_bio_end_io_simple(struct bio *bio)
-{
-	struct task_struct *waiter = bio->bi_private;
-
-	WRITE_ONCE(bio->bi_private, NULL);
-	blk_wake_io_task(waiter);
-}
-
-static ssize_t
-__blkdev_direct_IO_simple(struct kiocb *iocb, struct iov_iter *iter,
-		unsigned int nr_pages)
-{
-	struct file *file = iocb->ki_filp;
-	struct block_device *bdev = I_BDEV(bdev_file_inode(file));
-	struct bio_vec inline_vecs[DIO_INLINE_BIO_VECS], *vecs;
-	loff_t pos = iocb->ki_pos;
-	bool should_dirty = false;
-	struct bio bio;
-	ssize_t ret;
-	blk_qc_t qc;
-
-	if ((pos | iov_iter_alignment(iter)) &
-	    (bdev_logical_block_size(bdev) - 1))
-		return -EINVAL;
-
-	if (nr_pages <= DIO_INLINE_BIO_VECS)
-		vecs = inline_vecs;
-	else {
-		vecs = kmalloc_array(nr_pages, sizeof(struct bio_vec),
-				     GFP_KERNEL);
-		if (!vecs)
-			return -ENOMEM;
-	}
-
-	bio_init(&bio, vecs, nr_pages);
-	bio_set_dev(&bio, bdev);
-	bio.bi_iter.bi_sector = pos >> 9;
-	bio.bi_write_hint = iocb->ki_hint;
-	bio.bi_private = current;
-	bio.bi_end_io = blkdev_bio_end_io_simple;
-	bio.bi_ioprio = iocb->ki_ioprio;
-
-	ret = bio_iov_iter_get_pages(&bio, iter);
-	if (unlikely(ret))
-		goto out;
-	ret = bio.bi_iter.bi_size;
-
-	if (iov_iter_rw(iter) == READ) {
-		bio.bi_opf = REQ_OP_READ;
-		if (iter_is_iovec(iter))
-			should_dirty = true;
-	} else {
-		bio.bi_opf = dio_bio_write_op(iocb);
-		task_io_account_write(ret);
-	}
-	if (iocb->ki_flags & IOCB_NOWAIT)
-		bio.bi_opf |= REQ_NOWAIT;
-	if (iocb->ki_flags & IOCB_HIPRI)
-		bio_set_polled(&bio, iocb);
-
-	qc = submit_bio(&bio);
-	for (;;) {
-		set_current_state(TASK_UNINTERRUPTIBLE);
-		if (!READ_ONCE(bio.bi_private))
-			break;
-		if (!(iocb->ki_flags & IOCB_HIPRI) ||
-		    !blk_poll(bdev_get_queue(bdev), qc, true))
-			blk_io_schedule();
-	}
-	__set_current_state(TASK_RUNNING);
-
-	bio_release_pages(&bio, should_dirty);
-	if (unlikely(bio.bi_status))
-		ret = blk_status_to_errno(bio.bi_status);
-
-out:
-	if (vecs != inline_vecs)
-		kfree(vecs);
-
-	bio_uninit(&bio);
-
-	return ret;
-}
-
-struct blkdev_dio {
-	union {
-		struct kiocb		*iocb;
-		struct task_struct	*waiter;
-	};
-	size_t			size;
-	atomic_t		ref;
-	bool			multi_bio : 1;
-	bool			should_dirty : 1;
-	bool			is_sync : 1;
-	struct bio		bio;
-};
-
-static struct bio_set blkdev_dio_pool;
-
-static int blkdev_iopoll(struct kiocb *kiocb, bool wait)
-{
-	struct block_device *bdev = I_BDEV(kiocb->ki_filp->f_mapping->host);
-	struct request_queue *q = bdev_get_queue(bdev);
-
-	return blk_poll(q, READ_ONCE(kiocb->ki_cookie), wait);
-}
-
-static void blkdev_bio_end_io(struct bio *bio)
-{
-	struct blkdev_dio *dio = bio->bi_private;
-	bool should_dirty = dio->should_dirty;
-
-	if (bio->bi_status && !dio->bio.bi_status)
-		dio->bio.bi_status = bio->bi_status;
-
-	if (!dio->multi_bio || atomic_dec_and_test(&dio->ref)) {
-		if (!dio->is_sync) {
-			struct kiocb *iocb = dio->iocb;
-			ssize_t ret;
-
-			if (likely(!dio->bio.bi_status)) {
-				ret = dio->size;
-				iocb->ki_pos += ret;
-			} else {
-				ret = blk_status_to_errno(dio->bio.bi_status);
-			}
-
-			dio->iocb->ki_complete(iocb, ret, 0);
-			if (dio->multi_bio)
-				bio_put(&dio->bio);
-		} else {
-			struct task_struct *waiter = dio->waiter;
-
-			WRITE_ONCE(dio->waiter, NULL);
-			blk_wake_io_task(waiter);
-		}
-	}
-
-	if (should_dirty) {
-		bio_check_pages_dirty(bio);
-	} else {
-		bio_release_pages(bio, false);
-		bio_put(bio);
-	}
-}
-
-static ssize_t __blkdev_direct_IO(struct kiocb *iocb, struct iov_iter *iter,
-		unsigned int nr_pages)
-{
-	struct file *file = iocb->ki_filp;
-	struct inode *inode = bdev_file_inode(file);
-	struct block_device *bdev = I_BDEV(inode);
-	struct blk_plug plug;
-	struct blkdev_dio *dio;
-	struct bio *bio;
-	bool is_poll = (iocb->ki_flags & IOCB_HIPRI) != 0;
-	bool is_read = (iov_iter_rw(iter) == READ), is_sync;
-	loff_t pos = iocb->ki_pos;
-	blk_qc_t qc = BLK_QC_T_NONE;
-	int ret = 0;
-
-	if ((pos | iov_iter_alignment(iter)) &
-	    (bdev_logical_block_size(bdev) - 1))
-		return -EINVAL;
-
-	bio = bio_alloc_kiocb(iocb, nr_pages, &blkdev_dio_pool);
-
-	dio = container_of(bio, struct blkdev_dio, bio);
-	dio->is_sync = is_sync = is_sync_kiocb(iocb);
-	if (dio->is_sync) {
-		dio->waiter = current;
-		bio_get(bio);
-	} else {
-		dio->iocb = iocb;
-	}
-
-	dio->size = 0;
-	dio->multi_bio = false;
-	dio->should_dirty = is_read && iter_is_iovec(iter);
-
-	/*
-	 * Don't plug for HIPRI/polled IO, as those should go straight
-	 * to issue
-	 */
-	if (!is_poll)
-		blk_start_plug(&plug);
-
-	for (;;) {
-		bio_set_dev(bio, bdev);
-		bio->bi_iter.bi_sector = pos >> 9;
-		bio->bi_write_hint = iocb->ki_hint;
-		bio->bi_private = dio;
-		bio->bi_end_io = blkdev_bio_end_io;
-		bio->bi_ioprio = iocb->ki_ioprio;
-
-		ret = bio_iov_iter_get_pages(bio, iter);
-		if (unlikely(ret)) {
-			bio->bi_status = BLK_STS_IOERR;
-			bio_endio(bio);
-			break;
-		}
-
-		if (is_read) {
-			bio->bi_opf = REQ_OP_READ;
-			if (dio->should_dirty)
-				bio_set_pages_dirty(bio);
-		} else {
-			bio->bi_opf = dio_bio_write_op(iocb);
-			task_io_account_write(bio->bi_iter.bi_size);
-		}
-		if (iocb->ki_flags & IOCB_NOWAIT)
-			bio->bi_opf |= REQ_NOWAIT;
-
-		dio->size += bio->bi_iter.bi_size;
-		pos += bio->bi_iter.bi_size;
-
-		nr_pages = bio_iov_vecs_to_alloc(iter, BIO_MAX_VECS);
-		if (!nr_pages) {
-			bool polled = false;
-
-			if (iocb->ki_flags & IOCB_HIPRI) {
-				bio_set_polled(bio, iocb);
-				polled = true;
-			}
-
-			qc = submit_bio(bio);
-
-			if (polled)
-				WRITE_ONCE(iocb->ki_cookie, qc);
-			break;
-		}
-
-		if (!dio->multi_bio) {
-			/*
-			 * AIO needs an extra reference to ensure the dio
-			 * structure which is embedded into the first bio
-			 * stays around.
-			 */
-			if (!is_sync)
-				bio_get(bio);
-			dio->multi_bio = true;
-			atomic_set(&dio->ref, 2);
-		} else {
-			atomic_inc(&dio->ref);
-		}
-
-		submit_bio(bio);
-		bio = bio_alloc(GFP_KERNEL, nr_pages);
-	}
-
-	if (!is_poll)
-		blk_finish_plug(&plug);
-
-	if (!is_sync)
-		return -EIOCBQUEUED;
-
-	for (;;) {
-		set_current_state(TASK_UNINTERRUPTIBLE);
-		if (!READ_ONCE(dio->waiter))
-			break;
-
-		if (!(iocb->ki_flags & IOCB_HIPRI) ||
-		    !blk_poll(bdev_get_queue(bdev), qc, true))
-			blk_io_schedule();
-	}
-	__set_current_state(TASK_RUNNING);
-
-	if (!ret)
-		ret = blk_status_to_errno(dio->bio.bi_status);
-	if (likely(!ret))
-		ret = dio->size;
-
-	bio_put(&dio->bio);
-	return ret;
-}
-
-static ssize_t
-blkdev_direct_IO(struct kiocb *iocb, struct iov_iter *iter)
-{
-	unsigned int nr_pages;
-
-	if (!iov_iter_count(iter))
-		return 0;
-
-	nr_pages = bio_iov_vecs_to_alloc(iter, BIO_MAX_VECS + 1);
-	if (is_sync_kiocb(iocb) && nr_pages <= BIO_MAX_VECS)
-		return __blkdev_direct_IO_simple(iocb, iter, nr_pages);
-
-	return __blkdev_direct_IO(iocb, iter, bio_max_segs(nr_pages));
-}
-
-static __init int blkdev_init(void)
-{
-	return bioset_init(&blkdev_dio_pool, 4,
-				offsetof(struct blkdev_dio, bio),
-				BIOSET_NEED_BVECS|BIOSET_PERCPU_CACHE);
-}
-module_init(blkdev_init);
-
 int __sync_blockdev(struct block_device *bdev, int wait)
 {
 	if (!bdev)
@@ -637,81 +301,6 @@ out:
 }
 EXPORT_SYMBOL(thaw_bdev);
 
-static int blkdev_writepage(struct page *page, struct writeback_control *wbc)
-{
-	return block_write_full_page(page, blkdev_get_block, wbc);
-}
-
-static int blkdev_readpage(struct file * file, struct page * page)
-{
-	return block_read_full_page(page, blkdev_get_block);
-}
-
-static void blkdev_readahead(struct readahead_control *rac)
-{
-	mpage_readahead(rac, blkdev_get_block);
-}
-
-static int blkdev_write_begin(struct file *file, struct address_space *mapping,
-			loff_t pos, unsigned len, unsigned flags,
-			struct page **pagep, void **fsdata)
-{
-	return block_write_begin(mapping, pos, len, flags, pagep,
-				 blkdev_get_block);
-}
-
-static int blkdev_write_end(struct file *file, struct address_space *mapping,
-			loff_t pos, unsigned len, unsigned copied,
-			struct page *page, void *fsdata)
-{
-	int ret;
-	ret = block_write_end(file, mapping, pos, len, copied, page, fsdata);
-
-	unlock_page(page);
-	put_page(page);
-
-	return ret;
-}
-
-/*
- * private llseek:
- * for a block special file file_inode(file)->i_size is zero
- * so we compute the size by hand (just as in block_read/write above)
- */
-static loff_t block_llseek(struct file *file, loff_t offset, int whence)
-{
-	struct inode *bd_inode = bdev_file_inode(file);
-	loff_t retval;
-
-	inode_lock(bd_inode);
-	retval = fixed_size_llseek(file, offset, whence, i_size_read(bd_inode));
-	inode_unlock(bd_inode);
-	return retval;
-}
-	
-static int blkdev_fsync(struct file *filp, loff_t start, loff_t end,
-		int datasync)
-{
-	struct inode *bd_inode = bdev_file_inode(filp);
-	struct block_device *bdev = I_BDEV(bd_inode);
-	int error;
-	
-	error = file_write_and_wait_range(filp, start, end);
-	if (error)
-		return error;
-
-	/*
-	 * There is no need to serialise calls to blkdev_issue_flush with
-	 * i_mutex and doing so causes performance issues with concurrent
-	 * O_SYNC writers to a block device.
-	 */
-	error = blkdev_issue_flush(bdev);
-	if (error == -EOPNOTSUPP)
-		error = 0;
-
-	return error;
-}
-
 /**
  * bdev_read_page() - Start reading a page from a block device
  * @bdev: The device to read the page from
@@ -1305,35 +894,6 @@ struct block_device *blkdev_get_by_path(const char *path, fmode_t mode,
 }
 EXPORT_SYMBOL(blkdev_get_by_path);
 
-static int blkdev_open(struct inode * inode, struct file * filp)
-{
-	struct block_device *bdev;
-
-	/*
-	 * Preserve backwards compatibility and allow large file access
-	 * even if userspace doesn't ask for it explicitly. Some mkfs
-	 * binary needs it. We might want to drop this workaround
-	 * during an unstable branch.
-	 */
-	filp->f_flags |= O_LARGEFILE;
-
-	filp->f_mode |= FMODE_NOWAIT | FMODE_BUF_RASYNC;
-
-	if (filp->f_flags & O_NDELAY)
-		filp->f_mode |= FMODE_NDELAY;
-	if (filp->f_flags & O_EXCL)
-		filp->f_mode |= FMODE_EXCL;
-	if ((filp->f_flags & O_ACCMODE) == 3)
-		filp->f_mode |= FMODE_WRITE_IOCTL;
-
-	bdev = blkdev_get_by_dev(inode->i_rdev, filp->f_mode, filp);
-	if (IS_ERR(bdev))
-		return PTR_ERR(bdev);
-	filp->f_mapping = bdev->bd_inode->i_mapping;
-	filp->f_wb_err = filemap_sample_wb_err(filp->f_mapping);
-	return 0;
-}
-
 void blkdev_put(struct block_device *bdev, fmode_t mode)
 {
 	struct gendisk *disk = bdev->bd_disk;
@@ -1397,203 +957,6 @@ void blkdev_put(struct block_device *bdev, fmode_t mode)
 }
 EXPORT_SYMBOL(blkdev_put);
 
-static int blkdev_close(struct inode * inode, struct file * filp)
-{
-	struct block_device *bdev = I_BDEV(bdev_file_inode(filp));
-	blkdev_put(bdev, filp->f_mode);
-	return 0;
-}
-
-static long block_ioctl(struct file *file, unsigned cmd, unsigned long arg)
-{
-	struct block_device *bdev = I_BDEV(bdev_file_inode(file));
-	fmode_t mode = file->f_mode;
-
-	/*
-	 * O_NDELAY can be altered using fcntl(.., F_SETFL, ..), so we have
-	 * to updated it before every ioctl.
-	 */
-	if (file->f_flags & O_NDELAY)
-		mode |= FMODE_NDELAY;
-	else
-		mode &= ~FMODE_NDELAY;
-
-	return blkdev_ioctl(bdev, mode, cmd, arg);
-}
-
-/*
- * Write data to the block device.  Only intended for the block device itself
- * and the raw driver which basically is a fake block device.
- *
- * Does not take i_mutex for the write and thus is not for general purpose
- * use.
- */
-static ssize_t blkdev_write_iter(struct kiocb *iocb, struct iov_iter *from)
-{
-	struct file *file = iocb->ki_filp;
-	struct inode *bd_inode = bdev_file_inode(file);
-	loff_t size = i_size_read(bd_inode);
-	struct blk_plug plug;
-	size_t shorted = 0;
-	ssize_t ret;
-
-	if (bdev_read_only(I_BDEV(bd_inode)))
-		return -EPERM;
-
-	if (IS_SWAPFILE(bd_inode) && !is_hibernate_resume_dev(bd_inode->i_rdev))
-		return -ETXTBSY;
-
-	if (!iov_iter_count(from))
-		return 0;
-
-	if (iocb->ki_pos >= size)
-		return -ENOSPC;
-
-	if ((iocb->ki_flags & (IOCB_NOWAIT | IOCB_DIRECT)) == IOCB_NOWAIT)
-		return -EOPNOTSUPP;
-
-	size -= iocb->ki_pos;
-	if (iov_iter_count(from) > size) {
-		shorted = iov_iter_count(from) - size;
-		iov_iter_truncate(from, size);
-	}
-
-	blk_start_plug(&plug);
-	ret = __generic_file_write_iter(iocb, from);
-	if (ret > 0)
-		ret = generic_write_sync(iocb, ret);
-	iov_iter_reexpand(from, iov_iter_count(from) + shorted);
-	blk_finish_plug(&plug);
-	return ret;
-}
-
-static ssize_t blkdev_read_iter(struct kiocb *iocb, struct iov_iter *to)
-{
-	struct file *file = iocb->ki_filp;
-	struct inode *bd_inode = bdev_file_inode(file);
-	loff_t size = i_size_read(bd_inode);
-	loff_t pos = iocb->ki_pos;
-	size_t shorted = 0;
-	ssize_t ret;
-
-	if (pos >= size)
-		return 0;
-
-	size -= pos;
-	if (iov_iter_count(to) > size) {
-		shorted = iov_iter_count(to) - size;
-		iov_iter_truncate(to, size);
-	}
-
-	ret = generic_file_read_iter(iocb, to);
-	iov_iter_reexpand(to, iov_iter_count(to) + shorted);
-	return ret;
-}
-
-static int blkdev_writepages(struct address_space *mapping,
-			     struct writeback_control *wbc)
-{
-	return generic_writepages(mapping, wbc);
-}
-
-static const struct address_space_operations def_blk_aops = {
-	.set_page_dirty	= __set_page_dirty_buffers,
-	.readpage	= blkdev_readpage,
-	.readahead	= blkdev_readahead,
-	.writepage	= blkdev_writepage,
-	.write_begin	= blkdev_write_begin,
-	.write_end	= blkdev_write_end,
-	.writepages	= blkdev_writepages,
-	.direct_IO	= blkdev_direct_IO,
-	.migratepage	= buffer_migrate_page_norefs,
-	.is_dirty_writeback = buffer_check_dirty_writeback,
-};
-
-#define	BLKDEV_FALLOC_FL_SUPPORTED					\
-		(FALLOC_FL_KEEP_SIZE | FALLOC_FL_PUNCH_HOLE |		\
-		 FALLOC_FL_ZERO_RANGE | FALLOC_FL_NO_HIDE_STALE)
-
-static long blkdev_fallocate(struct file *file, int mode, loff_t start,
-			     loff_t len)
-{
-	struct block_device *bdev = I_BDEV(bdev_file_inode(file));
-	loff_t end = start + len - 1;
-	loff_t isize;
-	int error;
-
-	/* Fail if we don't recognize the flags. */
-	if (mode & ~BLKDEV_FALLOC_FL_SUPPORTED)
-		return -EOPNOTSUPP;
-
-	/* Don't go off the end of the device. */
-	isize = i_size_read(bdev->bd_inode);
-	if (start >= isize)
-		return -EINVAL;
-	if (end >= isize) {
-		if (mode & FALLOC_FL_KEEP_SIZE) {
-			len = isize - start;
-			end = start + len - 1;
-		} else
-			return -EINVAL;
-	}
-
-	/*
-	 * Don't allow IO that isn't aligned to logical block size.
-	 */
-	if ((start | len) & (bdev_logical_block_size(bdev) - 1))
-		return -EINVAL;
-
-	/* Invalidate the page cache, including dirty pages. */
-	error = truncate_bdev_range(bdev, file->f_mode, start, end);
-	if (error)
-		return error;
-
-	switch (mode) {
-	case FALLOC_FL_ZERO_RANGE:
-	case FALLOC_FL_ZERO_RANGE | FALLOC_FL_KEEP_SIZE:
-		error = blkdev_issue_zeroout(bdev, start >> 9, len >> 9,
-					    GFP_KERNEL, BLKDEV_ZERO_NOUNMAP);
-		break;
-	case FALLOC_FL_PUNCH_HOLE | FALLOC_FL_KEEP_SIZE:
-		error = blkdev_issue_zeroout(bdev, start >> 9, len >> 9,
-					     GFP_KERNEL, BLKDEV_ZERO_NOFALLBACK);
-		break;
-	case FALLOC_FL_PUNCH_HOLE | FALLOC_FL_KEEP_SIZE | FALLOC_FL_NO_HIDE_STALE:
-		error = blkdev_issue_discard(bdev, start >> 9, len >> 9,
-					     GFP_KERNEL, 0);
-		break;
-	default:
-		return -EOPNOTSUPP;
-	}
-	if (error)
-		return error;
-
-	/*
-	 * Invalidate the page cache again; if someone wandered in and dirtied
-	 * a page, we just discard it - userspace has no way of knowing whether
-	 * the write happened before or after discard completing...
-	 */
-	return truncate_bdev_range(bdev, file->f_mode, start, end);
-}
-
-const struct file_operations def_blk_fops = {
-	.open		= blkdev_open,
-	.release	= blkdev_close,
-	.llseek		= block_llseek,
-	.read_iter	= blkdev_read_iter,
-	.write_iter	= blkdev_write_iter,
-	.iopoll		= blkdev_iopoll,
-	.mmap		= generic_file_mmap,
-	.fsync		= blkdev_fsync,
-	.unlocked_ioctl	= block_ioctl,
-#ifdef CONFIG_COMPAT
-	.compat_ioctl	= compat_blkdev_ioctl,
-#endif
-	.splice_read	= generic_file_splice_read,
-	.splice_write	= iter_file_splice_write,
-	.fallocate	= blkdev_fallocate,
-};
-
 /**
  * lookup_bdev  - lookup a struct block_device by name
  * @pathname:	special file representing the block device
diff --git a/block/bfq-iosched.c b/block/bfq-iosched.c
index 480e1a134859..dd13c2bbc29c 100644
--- a/block/bfq-iosched.c
+++ b/block/bfq-iosched.c
@@ -2662,6 +2662,15 @@ bfq_setup_merge(struct bfq_queue *bfqq, struct bfq_queue *new_bfqq)
 	 * are likely to increase the throughput.
 	 */
 	bfqq->new_bfqq = new_bfqq;
+	/*
+	 * The above assignment schedules the following redirections:
+	 * each time some I/O for bfqq arrives, the process that
+	 * generated that I/O is disassociated from bfqq and
+	 * associated with new_bfqq. Here we increases new_bfqq->ref
+	 * in advance, adding the number of processes that are
+	 * expected to be associated with new_bfqq as they happen to
+	 * issue I/O.
+	 */
 	new_bfqq->ref += process_refs;
 	return new_bfqq;
 }
@@ -2724,6 +2733,10 @@ bfq_setup_cooperator(struct bfq_data *bfqd, struct bfq_queue *bfqq,
 {
 	struct bfq_queue *in_service_bfqq, *new_bfqq;
 
+	/* if a merge has already been setup, then proceed with that first */
+	if (bfqq->new_bfqq)
+		return bfqq->new_bfqq;
+
 	/*
 	 * Check delayed stable merge for rotational or non-queueing
 	 * devs. For this branch to be executed, bfqq must not be
@@ -2825,9 +2838,6 @@ bfq_setup_cooperator(struct bfq_data *bfqd, struct bfq_queue *bfqq,
 	if (bfq_too_late_for_merging(bfqq))
 		return NULL;
 
-	if (bfqq->new_bfqq)
-		return bfqq->new_bfqq;
-
 	if (!io_struct || unlikely(bfqq == &bfqd->oom_bfqq))
 		return NULL;
 
diff --git a/block/bio.c b/block/bio.c
index e16849f46b0e..5df3dd282e40 100644
--- a/block/bio.c
+++ b/block/bio.c
@@ -1688,7 +1688,7 @@ EXPORT_SYMBOL(bioset_init_from_src);
 /**
  * bio_alloc_kiocb - Allocate a bio from bio_set based on kiocb
  * @kiocb:	kiocb describing the IO
- * @nr_iovecs:	number of iovecs to pre-allocate
+ * @nr_vecs:	number of iovecs to pre-allocate
  * @bs:		bio_set to allocate from
  *
  * Description:
diff --git a/block/blk-map.c b/block/blk-map.c
index d1448aaad980..4526adde0156 100644
--- a/block/blk-map.c
+++ b/block/blk-map.c
@@ -309,7 +309,7 @@ static int bio_map_user_iov(struct request *rq, struct iov_iter *iter,
 
 static void bio_invalidate_vmalloc_pages(struct bio *bio)
 {
-#ifdef ARCH_HAS_FLUSH_KERNEL_DCACHE_PAGE
+#ifdef ARCH_IMPLEMENTS_FLUSH_KERNEL_VMAP_RANGE
 	if (bio->bi_private && !op_is_write(bio_op(bio))) {
 		unsigned long i, len = 0;
 
diff --git a/block/blk-mq.c b/block/blk-mq.c
index 944049982e6e..108a352051be 100644
--- a/block/blk-mq.c
+++ b/block/blk-mq.c
@@ -2135,6 +2135,18 @@ static void blk_add_rq_to_plug(struct blk_plug *plug, struct request *rq)
 	}
 }
 
+/*
+ * Allow 4x BLK_MAX_REQUEST_COUNT requests on plug queue for multiple
+ * queues. This is important for md arrays to benefit from merging
+ * requests.
+ */
+static inline unsigned short blk_plug_max_rq_count(struct blk_plug *plug)
+{
+	if (plug->multiple_queues)
+		return BLK_MAX_REQUEST_COUNT * 4;
+	return BLK_MAX_REQUEST_COUNT;
+}
+
 /**
  * blk_mq_submit_bio - Create and send a request to block device.
  * @bio: Bio pointer.
@@ -2231,7 +2243,7 @@ blk_qc_t blk_mq_submit_bio(struct bio *bio)
 		else
 			last = list_entry_rq(plug->mq_list.prev);
 
-		if (request_count >= BLK_MAX_REQUEST_COUNT || (last &&
+		if (request_count >= blk_plug_max_rq_count(plug) || (last &&
 		    blk_rq_bytes(last) >= BLK_PLUG_FLUSH_SIZE)) {
 			blk_flush_plug_list(plug, false);
 			trace_block_plug(q);
@@ -3280,8 +3292,6 @@ int blk_mq_init_allocated_queue(struct blk_mq_tag_set *set,
 	    set->map[HCTX_TYPE_POLL].nr_queues)
 		blk_queue_flag_set(QUEUE_FLAG_POLL, q);
 
-	q->sg_reserved_size = INT_MAX;
-
 	INIT_DELAYED_WORK(&q->requeue_work, blk_mq_requeue_work);
 	INIT_LIST_HEAD(&q->requeue_list);
 	spin_lock_init(&q->requeue_lock);
diff --git a/block/blk-throttle.c b/block/blk-throttle.c
index 55c49015e533..7c4e7993ba97 100644
--- a/block/blk-throttle.c
+++ b/block/blk-throttle.c
@@ -2458,6 +2458,7 @@ int blk_throtl_init(struct request_queue *q)
 void blk_throtl_exit(struct request_queue *q)
 {
 	BUG_ON(!q->td);
+	del_timer_sync(&q->td->service_queue.pending_timer);
 	throtl_shutdown_wq(q);
 	blkcg_deactivate_policy(q, &blkcg_policy_throtl);
 	free_percpu(q->td->latency_buckets[READ]);
diff --git a/block/blk.h b/block/blk.h
index 8c96b0c90c48..7d2a0ba7ed21 100644
--- a/block/blk.h
+++ b/block/blk.h
@@ -373,4 +373,6 @@ static inline void bio_clear_hipri(struct bio *bio)
 	bio->bi_opf &= ~REQ_HIPRI;
 }
 
+extern const struct address_space_operations def_blk_aops;
+
 #endif /* BLK_INTERNAL_H */
diff --git a/block/bsg-lib.c b/block/bsg-lib.c
index a89d80102304..ccb98276c964 100644
--- a/block/bsg-lib.c
+++ b/block/bsg-lib.c
@@ -6,6 +6,7 @@
  *  Copyright (C) 2011   Red Hat, Inc.  All rights reserved.
  *  Copyright (C) 2011   Mike Christie
  */
+#include <linux/bsg.h>
 #include <linux/slab.h>
 #include <linux/blk-mq.h>
 #include <linux/delay.h>
@@ -19,36 +20,44 @@
 
 struct bsg_set {
 	struct blk_mq_tag_set	tag_set;
+	struct bsg_device	*bd;
 	bsg_job_fn		*job_fn;
 	bsg_timeout_fn		*timeout_fn;
 };
 
-static int bsg_transport_check_proto(struct sg_io_v4 *hdr)
+static int bsg_transport_sg_io_fn(struct request_queue *q, struct sg_io_v4 *hdr,
+		fmode_t mode, unsigned int timeout)
 {
+	struct bsg_job *job;
+	struct request *rq;
+	struct bio *bio;
+	int ret;
+
 	if (hdr->protocol != BSG_PROTOCOL_SCSI  ||
 	    hdr->subprotocol != BSG_SUB_PROTOCOL_SCSI_TRANSPORT)
 		return -EINVAL;
 	if (!capable(CAP_SYS_RAWIO))
 		return -EPERM;
-	return 0;
-}
 
-static int bsg_transport_fill_hdr(struct request *rq, struct sg_io_v4 *hdr,
-		fmode_t mode)
-{
-	struct bsg_job *job = blk_mq_rq_to_pdu(rq);
-	int ret;
+	rq = blk_get_request(q, hdr->dout_xfer_len ?
+			     REQ_OP_DRV_OUT : REQ_OP_DRV_IN, 0);
+	if (IS_ERR(rq))
+		return PTR_ERR(rq);
+	rq->timeout = timeout;
 
+	job = blk_mq_rq_to_pdu(rq);
 	job->request_len = hdr->request_len;
 	job->request = memdup_user(uptr64(hdr->request), hdr->request_len);
-	if (IS_ERR(job->request))
-		return PTR_ERR(job->request);
+	if (IS_ERR(job->request)) {
+		ret = PTR_ERR(job->request);
+		goto out_put_request;
+	}
 
 	if (hdr->dout_xfer_len && hdr->din_xfer_len) {
 		job->bidi_rq = blk_get_request(rq->q, REQ_OP_DRV_IN, 0);
 		if (IS_ERR(job->bidi_rq)) {
 			ret = PTR_ERR(job->bidi_rq);
-			goto out;
+			goto out_free_job_request;
 		}
 
 		ret = blk_rq_map_user(rq->q, job->bidi_rq, NULL,
@@ -63,20 +72,20 @@ static int bsg_transport_fill_hdr(struct request *rq, struct sg_io_v4 *hdr,
 		job->bidi_bio = NULL;
 	}
 
-	return 0;
+	ret = 0;
+	if (hdr->dout_xfer_len) {
+		ret = blk_rq_map_user(rq->q, rq, NULL, uptr64(hdr->dout_xferp),
+				hdr->dout_xfer_len, GFP_KERNEL);
+	} else if (hdr->din_xfer_len) {
+		ret = blk_rq_map_user(rq->q, rq, NULL, uptr64(hdr->din_xferp),
+				hdr->din_xfer_len, GFP_KERNEL);
+	}
 
-out_free_bidi_rq:
-	if (job->bidi_rq)
-		blk_put_request(job->bidi_rq);
-out:
-	kfree(job->request);
-	return ret;
-}
+	if (ret)
+		goto out_unmap_bidi_rq;
 
-static int bsg_transport_complete_rq(struct request *rq, struct sg_io_v4 *hdr)
-{
-	struct bsg_job *job = blk_mq_rq_to_pdu(rq);
-	int ret = 0;
+	bio = rq->bio;
+	blk_execute_rq(NULL, rq, !(hdr->flags & BSG_FLAG_Q_AT_TAIL));
 
 	/*
 	 * The assignments below don't make much sense, but are kept for
@@ -119,28 +128,20 @@ static int bsg_transport_complete_rq(struct request *rq, struct sg_io_v4 *hdr)
 		hdr->din_resid = 0;
 	}
 
-	return ret;
-}
-
-static void bsg_transport_free_rq(struct request *rq)
-{
-	struct bsg_job *job = blk_mq_rq_to_pdu(rq);
-
-	if (job->bidi_rq) {
+	blk_rq_unmap_user(bio);
+out_unmap_bidi_rq:
+	if (job->bidi_rq)
 		blk_rq_unmap_user(job->bidi_bio);
+out_free_bidi_rq:
+	if (job->bidi_rq)
 		blk_put_request(job->bidi_rq);
-	}
-
+out_free_job_request:
 	kfree(job->request);
+out_put_request:
+	blk_put_request(rq);
+	return ret;
 }
 
-static const struct bsg_ops bsg_transport_ops = {
-	.check_proto		= bsg_transport_check_proto,
-	.fill_hdr		= bsg_transport_fill_hdr,
-	.complete_rq		= bsg_transport_complete_rq,
-	.free_rq		= bsg_transport_free_rq,
-};
-
 /**
  * bsg_teardown_job - routine to teardown a bsg job
  * @kref: kref inside bsg_job that is to be torn down
@@ -327,7 +328,7 @@ void bsg_remove_queue(struct request_queue *q)
 		struct bsg_set *bset =
 			container_of(q->tag_set, struct bsg_set, tag_set);
 
-		bsg_unregister_queue(q);
+		bsg_unregister_queue(bset->bd);
 		blk_cleanup_queue(q);
 		blk_mq_free_tag_set(&bset->tag_set);
 		kfree(bset);
@@ -396,10 +397,9 @@ struct request_queue *bsg_setup_queue(struct device *dev, const char *name,
 	q->queuedata = dev;
 	blk_queue_rq_timeout(q, BLK_DEFAULT_SG_TIMEOUT);
 
-	ret = bsg_register_queue(q, dev, name, &bsg_transport_ops);
-	if (ret) {
-		printk(KERN_ERR "%s: bsg interface failed to "
-		       "initialize - register queue\n", dev->kobj.name);
+	bset->bd = bsg_register_queue(q, dev, name, bsg_transport_sg_io_fn);
+	if (IS_ERR(bset->bd)) {
+		ret = PTR_ERR(bset->bd);
 		goto out_cleanup_queue;
 	}
 
diff --git a/block/bsg.c b/block/bsg.c
index 1f196563ae6c..351095193788 100644
--- a/block/bsg.c
+++ b/block/bsg.c
@@ -15,343 +15,97 @@
 
 #include <scsi/scsi.h>
 #include <scsi/scsi_ioctl.h>
-#include <scsi/scsi_cmnd.h>
-#include <scsi/scsi_device.h>
-#include <scsi/scsi_driver.h>
 #include <scsi/sg.h>
 
 #define BSG_DESCRIPTION	"Block layer SCSI generic (bsg) driver"
 #define BSG_VERSION	"0.4"
 
-#define bsg_dbg(bd, fmt, ...) \
-	pr_debug("%s: " fmt, (bd)->name, ##__VA_ARGS__)
-
 struct bsg_device {
 	struct request_queue *queue;
-	spinlock_t lock;
-	struct hlist_node dev_list;
-	refcount_t ref_count;
-	char name[20];
+	struct device device;
+	struct cdev cdev;
 	int max_queue;
+	unsigned int timeout;
+	unsigned int reserved_size;
+	bsg_sg_io_fn *sg_io_fn;
 };
 
+static inline struct bsg_device *to_bsg_device(struct inode *inode)
+{
+	return container_of(inode->i_cdev, struct bsg_device, cdev);
+}
+
 #define BSG_DEFAULT_CMDS	64
 #define BSG_MAX_DEVS		32768
 
-static DEFINE_MUTEX(bsg_mutex);
-static DEFINE_IDR(bsg_minor_idr);
-
-#define BSG_LIST_ARRAY_SIZE	8
-static struct hlist_head bsg_device_list[BSG_LIST_ARRAY_SIZE];
-
+static DEFINE_IDA(bsg_minor_ida);
 static struct class *bsg_class;
 static int bsg_major;
 
-static inline struct hlist_head *bsg_dev_idx_hash(int index)
-{
-	return &bsg_device_list[index & (BSG_LIST_ARRAY_SIZE - 1)];
-}
-
-#define uptr64(val) ((void __user *)(uintptr_t)(val))
-
-static int bsg_scsi_check_proto(struct sg_io_v4 *hdr)
-{
-	if (hdr->protocol != BSG_PROTOCOL_SCSI  ||
-	    hdr->subprotocol != BSG_SUB_PROTOCOL_SCSI_CMD)
-		return -EINVAL;
-	return 0;
-}
-
-static int bsg_scsi_fill_hdr(struct request *rq, struct sg_io_v4 *hdr,
-		fmode_t mode)
-{
-	struct scsi_request *sreq = scsi_req(rq);
-
-	if (hdr->dout_xfer_len && hdr->din_xfer_len) {
-		pr_warn_once("BIDI support in bsg has been removed.\n");
-		return -EOPNOTSUPP;
-	}
-
-	sreq->cmd_len = hdr->request_len;
-	if (sreq->cmd_len > BLK_MAX_CDB) {
-		sreq->cmd = kzalloc(sreq->cmd_len, GFP_KERNEL);
-		if (!sreq->cmd)
-			return -ENOMEM;
-	}
-
-	if (copy_from_user(sreq->cmd, uptr64(hdr->request), sreq->cmd_len))
-		return -EFAULT;
-	if (blk_verify_command(sreq->cmd, mode))
-		return -EPERM;
-	return 0;
-}
-
-static int bsg_scsi_complete_rq(struct request *rq, struct sg_io_v4 *hdr)
+static unsigned int bsg_timeout(struct bsg_device *bd, struct sg_io_v4 *hdr)
 {
-	struct scsi_request *sreq = scsi_req(rq);
-	int ret = 0;
-
-	/*
-	 * fill in all the output members
-	 */
-	hdr->device_status = sreq->result & 0xff;
-	hdr->transport_status = host_byte(sreq->result);
-	hdr->driver_status = 0;
-	if (scsi_status_is_check_condition(sreq->result))
-		hdr->driver_status = DRIVER_SENSE;
-	hdr->info = 0;
-	if (hdr->device_status || hdr->transport_status || hdr->driver_status)
-		hdr->info |= SG_INFO_CHECK;
-	hdr->response_len = 0;
-
-	if (sreq->sense_len && hdr->response) {
-		int len = min_t(unsigned int, hdr->max_response_len,
-					sreq->sense_len);
-
-		if (copy_to_user(uptr64(hdr->response), sreq->sense, len))
-			ret = -EFAULT;
-		else
-			hdr->response_len = len;
-	}
+	unsigned int timeout = BLK_DEFAULT_SG_TIMEOUT;
 
-	if (rq_data_dir(rq) == READ)
-		hdr->din_resid = sreq->resid_len;
-	else
-		hdr->dout_resid = sreq->resid_len;
+	if (hdr->timeout)
+		timeout = msecs_to_jiffies(hdr->timeout);
+	else if (bd->timeout)
+		timeout = bd->timeout;
 
-	return ret;
+	return max_t(unsigned int, timeout, BLK_MIN_SG_TIMEOUT);
 }
 
-static void bsg_scsi_free_rq(struct request *rq)
+static int bsg_sg_io(struct bsg_device *bd, fmode_t mode, void __user *uarg)
 {
-	scsi_req_free_cmd(scsi_req(rq));
-}
-
-static const struct bsg_ops bsg_scsi_ops = {
-	.check_proto		= bsg_scsi_check_proto,
-	.fill_hdr		= bsg_scsi_fill_hdr,
-	.complete_rq		= bsg_scsi_complete_rq,
-	.free_rq		= bsg_scsi_free_rq,
-};
-
-static int bsg_sg_io(struct request_queue *q, fmode_t mode, void __user *uarg)
-{
-	struct request *rq;
-	struct bio *bio;
 	struct sg_io_v4 hdr;
 	int ret;
 
 	if (copy_from_user(&hdr, uarg, sizeof(hdr)))
 		return -EFAULT;
-
-	if (!q->bsg_dev.class_dev)
-		return -ENXIO;
-
 	if (hdr.guard != 'Q')
 		return -EINVAL;
-	ret = q->bsg_dev.ops->check_proto(&hdr);
-	if (ret)
-		return ret;
-
-	rq = blk_get_request(q, hdr.dout_xfer_len ?
-			REQ_OP_DRV_OUT : REQ_OP_DRV_IN, 0);
-	if (IS_ERR(rq))
-		return PTR_ERR(rq);
-
-	ret = q->bsg_dev.ops->fill_hdr(rq, &hdr, mode);
-	if (ret) {
-		blk_put_request(rq);
-		return ret;
-	}
-
-	rq->timeout = msecs_to_jiffies(hdr.timeout);
-	if (!rq->timeout)
-		rq->timeout = q->sg_timeout;
-	if (!rq->timeout)
-		rq->timeout = BLK_DEFAULT_SG_TIMEOUT;
-	if (rq->timeout < BLK_MIN_SG_TIMEOUT)
-		rq->timeout = BLK_MIN_SG_TIMEOUT;
-
-	if (hdr.dout_xfer_len) {
-		ret = blk_rq_map_user(q, rq, NULL, uptr64(hdr.dout_xferp),
-				hdr.dout_xfer_len, GFP_KERNEL);
-	} else if (hdr.din_xfer_len) {
-		ret = blk_rq_map_user(q, rq, NULL, uptr64(hdr.din_xferp),
-				hdr.din_xfer_len, GFP_KERNEL);
-	}
-
-	if (ret)
-		goto out_free_rq;
-
-	bio = rq->bio;
-
-	blk_execute_rq(NULL, rq, !(hdr.flags & BSG_FLAG_Q_AT_TAIL));
-	ret = rq->q->bsg_dev.ops->complete_rq(rq, &hdr);
-	blk_rq_unmap_user(bio);
-
-out_free_rq:
-	rq->q->bsg_dev.ops->free_rq(rq);
-	blk_put_request(rq);
+	ret = bd->sg_io_fn(bd->queue, &hdr, mode, bsg_timeout(bd, &hdr));
 	if (!ret && copy_to_user(uarg, &hdr, sizeof(hdr)))
 		return -EFAULT;
 	return ret;
 }
 
-static struct bsg_device *bsg_alloc_device(void)
-{
-	struct bsg_device *bd;
-
-	bd = kzalloc(sizeof(struct bsg_device), GFP_KERNEL);
-	if (unlikely(!bd))
-		return NULL;
-
-	spin_lock_init(&bd->lock);
-	bd->max_queue = BSG_DEFAULT_CMDS;
-	INIT_HLIST_NODE(&bd->dev_list);
-	return bd;
-}
-
-static int bsg_put_device(struct bsg_device *bd)
-{
-	struct request_queue *q = bd->queue;
-
-	mutex_lock(&bsg_mutex);
-
-	if (!refcount_dec_and_test(&bd->ref_count)) {
-		mutex_unlock(&bsg_mutex);
-		return 0;
-	}
-
-	hlist_del(&bd->dev_list);
-	mutex_unlock(&bsg_mutex);
-
-	bsg_dbg(bd, "tearing down\n");
-
-	/*
-	 * close can always block
-	 */
-	kfree(bd);
-	blk_put_queue(q);
-	return 0;
-}
-
-static struct bsg_device *bsg_add_device(struct inode *inode,
-					 struct request_queue *rq,
-					 struct file *file)
-{
-	struct bsg_device *bd;
-	unsigned char buf[32];
-
-	lockdep_assert_held(&bsg_mutex);
-
-	if (!blk_get_queue(rq))
-		return ERR_PTR(-ENXIO);
-
-	bd = bsg_alloc_device();
-	if (!bd) {
-		blk_put_queue(rq);
-		return ERR_PTR(-ENOMEM);
-	}
-
-	bd->queue = rq;
-
-	refcount_set(&bd->ref_count, 1);
-	hlist_add_head(&bd->dev_list, bsg_dev_idx_hash(iminor(inode)));
-
-	strncpy(bd->name, dev_name(rq->bsg_dev.class_dev), sizeof(bd->name) - 1);
-	bsg_dbg(bd, "bound to <%s>, max queue %d\n",
-		format_dev_t(buf, inode->i_rdev), bd->max_queue);
-
-	return bd;
-}
-
-static struct bsg_device *__bsg_get_device(int minor, struct request_queue *q)
-{
-	struct bsg_device *bd;
-
-	lockdep_assert_held(&bsg_mutex);
-
-	hlist_for_each_entry(bd, bsg_dev_idx_hash(minor), dev_list) {
-		if (bd->queue == q) {
-			refcount_inc(&bd->ref_count);
-			goto found;
-		}
-	}
-	bd = NULL;
-found:
-	return bd;
-}
-
-static struct bsg_device *bsg_get_device(struct inode *inode, struct file *file)
-{
-	struct bsg_device *bd;
-	struct bsg_class_device *bcd;
-
-	/*
-	 * find the class device
-	 */
-	mutex_lock(&bsg_mutex);
-	bcd = idr_find(&bsg_minor_idr, iminor(inode));
-
-	if (!bcd) {
-		bd = ERR_PTR(-ENODEV);
-		goto out_unlock;
-	}
-
-	bd = __bsg_get_device(iminor(inode), bcd->queue);
-	if (!bd)
-		bd = bsg_add_device(inode, bcd->queue, file);
-
-out_unlock:
-	mutex_unlock(&bsg_mutex);
-	return bd;
-}
-
 static int bsg_open(struct inode *inode, struct file *file)
 {
-	struct bsg_device *bd;
-
-	bd = bsg_get_device(inode, file);
-
-	if (IS_ERR(bd))
-		return PTR_ERR(bd);
-
-	file->private_data = bd;
+	if (!blk_get_queue(to_bsg_device(inode)->queue))
+		return -ENXIO;
 	return 0;
 }
 
 static int bsg_release(struct inode *inode, struct file *file)
 {
-	struct bsg_device *bd = file->private_data;
-
-	file->private_data = NULL;
-	return bsg_put_device(bd);
+	blk_put_queue(to_bsg_device(inode)->queue);
+	return 0;
 }
 
 static int bsg_get_command_q(struct bsg_device *bd, int __user *uarg)
 {
-	return put_user(bd->max_queue, uarg);
+	return put_user(READ_ONCE(bd->max_queue), uarg);
 }
 
 static int bsg_set_command_q(struct bsg_device *bd, int __user *uarg)
 {
-	int queue;
+	int max_queue;
 
-	if (get_user(queue, uarg))
+	if (get_user(max_queue, uarg))
 		return -EFAULT;
-	if (queue < 1)
+	if (max_queue < 1)
 		return -EINVAL;
-
-	spin_lock_irq(&bd->lock);
-	bd->max_queue = queue;
-	spin_unlock_irq(&bd->lock);
+	WRITE_ONCE(bd->max_queue, max_queue);
 	return 0;
 }
 
 static long bsg_ioctl(struct file *file, unsigned int cmd, unsigned long arg)
 {
-	struct bsg_device *bd = file->private_data;
+	struct bsg_device *bd = to_bsg_device(file_inode(file));
+	struct request_queue *q = bd->queue;
 	void __user *uarg = (void __user *) arg;
+	int __user *intp = uarg;
+	int val;
 
 	switch (cmd) {
 	/*
@@ -366,17 +120,37 @@ static long bsg_ioctl(struct file *file, unsigned int cmd, unsigned long arg)
 	 * SCSI/sg ioctls
 	 */
 	case SG_GET_VERSION_NUM:
+		return put_user(30527, intp);
 	case SCSI_IOCTL_GET_IDLUN:
+		return put_user(0, intp);
 	case SCSI_IOCTL_GET_BUS_NUMBER:
+		return put_user(0, intp);
 	case SG_SET_TIMEOUT:
+		if (get_user(val, intp))
+			return -EFAULT;
+		bd->timeout = clock_t_to_jiffies(val);
+		return 0;
 	case SG_GET_TIMEOUT:
+		return jiffies_to_clock_t(bd->timeout);
 	case SG_GET_RESERVED_SIZE:
+		return put_user(min(bd->reserved_size, queue_max_bytes(q)),
+				intp);
 	case SG_SET_RESERVED_SIZE:
+		if (get_user(val, intp))
+			return -EFAULT;
+		if (val < 0)
+			return -EINVAL;
+		bd->reserved_size =
+			min_t(unsigned int, val, queue_max_bytes(q));
+		return 0;
 	case SG_EMULATED_HOST:
-	case SCSI_IOCTL_SEND_COMMAND:
-		return scsi_cmd_ioctl(bd->queue, NULL, file->f_mode, cmd, uarg);
+		return put_user(1, intp);
 	case SG_IO:
-		return bsg_sg_io(bd->queue, file->f_mode, uarg);
+		return bsg_sg_io(bd, file->f_mode, uarg);
+	case SCSI_IOCTL_SEND_COMMAND:
+		pr_warn_ratelimited("%s: calling unsupported SCSI_IOCTL_SEND_COMMAND\n",
+				current->comm);
+		return -EINVAL;
 	default:
 		return -ENOTTY;
 	}
@@ -391,92 +165,65 @@ static const struct file_operations bsg_fops = {
 	.llseek		=	default_llseek,
 };
 
-void bsg_unregister_queue(struct request_queue *q)
+void bsg_unregister_queue(struct bsg_device *bd)
 {
-	struct bsg_class_device *bcd = &q->bsg_dev;
-
-	if (!bcd->class_dev)
-		return;
-
-	mutex_lock(&bsg_mutex);
-	idr_remove(&bsg_minor_idr, bcd->minor);
-	if (q->kobj.sd)
-		sysfs_remove_link(&q->kobj, "bsg");
-	device_unregister(bcd->class_dev);
-	bcd->class_dev = NULL;
-	mutex_unlock(&bsg_mutex);
+	if (bd->queue->kobj.sd)
+		sysfs_remove_link(&bd->queue->kobj, "bsg");
+	cdev_device_del(&bd->cdev, &bd->device);
+	ida_simple_remove(&bsg_minor_ida, MINOR(bd->device.devt));
+	kfree(bd);
 }
 EXPORT_SYMBOL_GPL(bsg_unregister_queue);
 
-int bsg_register_queue(struct request_queue *q, struct device *parent,
-		const char *name, const struct bsg_ops *ops)
+struct bsg_device *bsg_register_queue(struct request_queue *q,
+		struct device *parent, const char *name, bsg_sg_io_fn *sg_io_fn)
 {
-	struct bsg_class_device *bcd;
-	dev_t dev;
+	struct bsg_device *bd;
 	int ret;
-	struct device *class_dev = NULL;
 
-	/*
-	 * we need a proper transport to send commands, not a stacked device
-	 */
-	if (!queue_is_mq(q))
-		return 0;
-
-	bcd = &q->bsg_dev;
-	memset(bcd, 0, sizeof(*bcd));
-
-	mutex_lock(&bsg_mutex);
+	bd = kzalloc(sizeof(*bd), GFP_KERNEL);
+	if (!bd)
+		return ERR_PTR(-ENOMEM);
+	bd->max_queue = BSG_DEFAULT_CMDS;
+	bd->reserved_size = INT_MAX;
+	bd->queue = q;
+	bd->sg_io_fn = sg_io_fn;
 
-	ret = idr_alloc(&bsg_minor_idr, bcd, 0, BSG_MAX_DEVS, GFP_KERNEL);
+	ret = ida_simple_get(&bsg_minor_ida, 0, BSG_MAX_DEVS, GFP_KERNEL);
 	if (ret < 0) {
-		if (ret == -ENOSPC) {
-			printk(KERN_ERR "bsg: too many bsg devices\n");
-			ret = -EINVAL;
-		}
-		goto unlock;
-	}
-
-	bcd->minor = ret;
-	bcd->queue = q;
-	bcd->ops = ops;
-	dev = MKDEV(bsg_major, bcd->minor);
-	class_dev = device_create(bsg_class, parent, dev, NULL, "%s", name);
-	if (IS_ERR(class_dev)) {
-		ret = PTR_ERR(class_dev);
-		goto idr_remove;
+		if (ret == -ENOSPC)
+			dev_err(parent, "bsg: too many bsg devices\n");
+		goto out_kfree;
 	}
-	bcd->class_dev = class_dev;
+	bd->device.devt = MKDEV(bsg_major, ret);
+	bd->device.class = bsg_class;
+	bd->device.parent = parent;
+	dev_set_name(&bd->device, "%s", name);
+	device_initialize(&bd->device);
+
+	cdev_init(&bd->cdev, &bsg_fops);
+	bd->cdev.owner = THIS_MODULE;
+	ret = cdev_device_add(&bd->cdev, &bd->device);
+	if (ret)
+		goto out_ida_remove;
 
 	if (q->kobj.sd) {
-		ret = sysfs_create_link(&q->kobj, &bcd->class_dev->kobj, "bsg");
+		ret = sysfs_create_link(&q->kobj, &bd->device.kobj, "bsg");
 		if (ret)
-			goto unregister_class_dev;
+			goto out_device_del;
 	}
 
-	mutex_unlock(&bsg_mutex);
-	return 0;
-
-unregister_class_dev:
-	device_unregister(class_dev);
-idr_remove:
-	idr_remove(&bsg_minor_idr, bcd->minor);
-unlock:
-	mutex_unlock(&bsg_mutex);
-	return ret;
-}
-
-int bsg_scsi_register_queue(struct request_queue *q, struct device *parent)
-{
-	if (!blk_queue_scsi_passthrough(q)) {
-		WARN_ONCE(true, "Attempt to register a non-SCSI queue\n");
-		return -EINVAL;
-	}
+	return bd;
 
-	return bsg_register_queue(q, parent, dev_name(parent), &bsg_scsi_ops);
+out_device_del:
+	cdev_device_del(&bd->cdev, &bd->device);
+out_ida_remove:
+	ida_simple_remove(&bsg_minor_ida, MINOR(bd->device.devt));
+out_kfree:
+	kfree(bd);
+	return ERR_PTR(ret);
 }
-EXPORT_SYMBOL_GPL(bsg_scsi_register_queue);
-
-static struct cdev bsg_cdev;
+EXPORT_SYMBOL_GPL(bsg_register_queue);
 
 static char *bsg_devnode(struct device *dev, umode_t *mode)
 {
@@ -485,11 +232,8 @@ static char *bsg_devnode(struct device *dev, umode_t *mode)
 
 static int __init bsg_init(void)
 {
-	int ret, i;
 	dev_t devid;
-
-	for (i = 0; i < BSG_LIST_ARRAY_SIZE; i++)
-		INIT_HLIST_HEAD(&bsg_device_list[i]);
+	int ret;
 
 	bsg_class = class_create(THIS_MODULE, "bsg");
 	if (IS_ERR(bsg_class))
@@ -499,19 +243,12 @@ static int __init bsg_init(void)
 	ret = alloc_chrdev_region(&devid, 0, BSG_MAX_DEVS, "bsg");
 	if (ret)
 		goto destroy_bsg_class;
-
 	bsg_major = MAJOR(devid);
 
-	cdev_init(&bsg_cdev, &bsg_fops);
-	ret = cdev_add(&bsg_cdev, MKDEV(bsg_major, 0), BSG_MAX_DEVS);
-	if (ret)
-		goto unregister_chrdev;
-
 	printk(KERN_INFO BSG_DESCRIPTION " version " BSG_VERSION
 	       " loaded (major %d)\n", bsg_major);
 	return 0;
-unregister_chrdev:
-	unregister_chrdev_region(MKDEV(bsg_major, 0), BSG_MAX_DEVS);
+
 destroy_bsg_class:
 	class_destroy(bsg_class);
 	return ret;
diff --git a/block/fops.c b/block/fops.c
new file mode 100644
index 000000000000..ffce6f6c68dd
--- /dev/null
+++ b/block/fops.c
@@ -0,0 +1,640 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * Copyright (C) 1991, 1992  Linus Torvalds
+ * Copyright (C) 2001  Andrea Arcangeli <andrea@suse.de> SuSE
+ * Copyright (C) 2016 - 2020 Christoph Hellwig
+ */
+#include <linux/init.h>
+#include <linux/mm.h>
+#include <linux/blkdev.h>
+#include <linux/buffer_head.h>
+#include <linux/mpage.h>
+#include <linux/uio.h>
+#include <linux/namei.h>
+#include <linux/task_io_accounting_ops.h>
+#include <linux/falloc.h>
+#include <linux/suspend.h>
+#include "blk.h"
+
+static struct inode *bdev_file_inode(struct file *file)
+{
+	return file->f_mapping->host;
+}
+
+static int blkdev_get_block(struct inode *inode, sector_t iblock,
+		struct buffer_head *bh, int create)
+{
+	bh->b_bdev = I_BDEV(inode);
+	bh->b_blocknr = iblock;
+	set_buffer_mapped(bh);
+	return 0;
+}
+
+static unsigned int dio_bio_write_op(struct kiocb *iocb)
+{
+	unsigned int op = REQ_OP_WRITE | REQ_SYNC | REQ_IDLE;
+
+	/* avoid the need for a I/O completion work item */
+	if (iocb->ki_flags & IOCB_DSYNC)
+		op |= REQ_FUA;
+	return op;
+}
+
+#define DIO_INLINE_BIO_VECS 4
+
+static void blkdev_bio_end_io_simple(struct bio *bio)
+{
+	struct task_struct *waiter = bio->bi_private;
+
+	WRITE_ONCE(bio->bi_private, NULL);
+	blk_wake_io_task(waiter);
+}
+
+static ssize_t __blkdev_direct_IO_simple(struct kiocb *iocb,
+		struct iov_iter *iter, unsigned int nr_pages)
+{
+	struct file *file = iocb->ki_filp;
+	struct block_device *bdev = I_BDEV(bdev_file_inode(file));
+	struct bio_vec inline_vecs[DIO_INLINE_BIO_VECS], *vecs;
+	loff_t pos = iocb->ki_pos;
+	bool should_dirty = false;
+	struct bio bio;
+	ssize_t ret;
+	blk_qc_t qc;
+
+	if ((pos | iov_iter_alignment(iter)) &
+	    (bdev_logical_block_size(bdev) - 1))
+		return -EINVAL;
+
+	if (nr_pages <= DIO_INLINE_BIO_VECS)
+		vecs = inline_vecs;
+	else {
+		vecs = kmalloc_array(nr_pages, sizeof(struct bio_vec),
+				     GFP_KERNEL);
+		if (!vecs)
+			return -ENOMEM;
+	}
+
+	bio_init(&bio, vecs, nr_pages);
+	bio_set_dev(&bio, bdev);
+	bio.bi_iter.bi_sector = pos >> 9;
+	bio.bi_write_hint = iocb->ki_hint;
+	bio.bi_private = current;
+	bio.bi_end_io = blkdev_bio_end_io_simple;
+	bio.bi_ioprio = iocb->ki_ioprio;
+
+	ret = bio_iov_iter_get_pages(&bio, iter);
+	if (unlikely(ret))
+		goto out;
+	ret = bio.bi_iter.bi_size;
+
+	if (iov_iter_rw(iter) == READ) {
+		bio.bi_opf = REQ_OP_READ;
+		if (iter_is_iovec(iter))
+			should_dirty = true;
+	} else {
+		bio.bi_opf = dio_bio_write_op(iocb);
+		task_io_account_write(ret);
+	}
+	if (iocb->ki_flags & IOCB_NOWAIT)
+		bio.bi_opf |= REQ_NOWAIT;
+	if (iocb->ki_flags & IOCB_HIPRI)
+		bio_set_polled(&bio, iocb);
+
+	qc = submit_bio(&bio);
+	for (;;) {
+		set_current_state(TASK_UNINTERRUPTIBLE);
+		if (!READ_ONCE(bio.bi_private))
+			break;
+		if (!(iocb->ki_flags & IOCB_HIPRI) ||
+		    !blk_poll(bdev_get_queue(bdev), qc, true))
+			blk_io_schedule();
+	}
+	__set_current_state(TASK_RUNNING);
+
+	bio_release_pages(&bio, should_dirty);
+	if (unlikely(bio.bi_status))
+		ret = blk_status_to_errno(bio.bi_status);
+
+out:
+	if (vecs != inline_vecs)
+		kfree(vecs);
+
+	bio_uninit(&bio);
+
+	return ret;
+}
+
+struct blkdev_dio {
+	union {
+		struct kiocb		*iocb;
+		struct task_struct	*waiter;
+	};
+	size_t			size;
+	atomic_t		ref;
+	bool			multi_bio : 1;
+	bool			should_dirty : 1;
+	bool			is_sync : 1;
+	struct bio		bio;
+};
+
+static struct bio_set blkdev_dio_pool;
+
+static int blkdev_iopoll(struct kiocb *kiocb, bool wait)
+{
+	struct block_device *bdev = I_BDEV(kiocb->ki_filp->f_mapping->host);
+	struct request_queue *q = bdev_get_queue(bdev);
+
+	return blk_poll(q, READ_ONCE(kiocb->ki_cookie), wait);
+}
+
+static void blkdev_bio_end_io(struct bio *bio)
+{
+	struct blkdev_dio *dio = bio->bi_private;
+	bool should_dirty = dio->should_dirty;
+
+	if (bio->bi_status && !dio->bio.bi_status)
+		dio->bio.bi_status = bio->bi_status;
+
+	if (!dio->multi_bio || atomic_dec_and_test(&dio->ref)) {
+		if (!dio->is_sync) {
+			struct kiocb *iocb = dio->iocb;
+			ssize_t ret;
+
+			if (likely(!dio->bio.bi_status)) {
+				ret = dio->size;
+				iocb->ki_pos += ret;
+			} else {
+				ret = blk_status_to_errno(dio->bio.bi_status);
+			}
+
+			dio->iocb->ki_complete(iocb, ret, 0);
+			if (dio->multi_bio)
+				bio_put(&dio->bio);
+		} else {
+			struct task_struct *waiter = dio->waiter;
+
+			WRITE_ONCE(dio->waiter, NULL);
+			blk_wake_io_task(waiter);
+		}
+	}
+
+	if (should_dirty) {
+		bio_check_pages_dirty(bio);
+	} else {
+		bio_release_pages(bio, false);
+		bio_put(bio);
+	}
+}
+
+static ssize_t __blkdev_direct_IO(struct kiocb *iocb, struct iov_iter *iter,
+		unsigned int nr_pages)
+{
+	struct file *file = iocb->ki_filp;
+	struct inode *inode = bdev_file_inode(file);
+	struct block_device *bdev = I_BDEV(inode);
+	struct blk_plug plug;
+	struct blkdev_dio *dio;
+	struct bio *bio;
+	bool is_poll = (iocb->ki_flags & IOCB_HIPRI) != 0;
+	bool is_read = (iov_iter_rw(iter) == READ), is_sync;
+	loff_t pos = iocb->ki_pos;
+	blk_qc_t qc = BLK_QC_T_NONE;
+	int ret = 0;
+
+	if ((pos | iov_iter_alignment(iter)) &
+	    (bdev_logical_block_size(bdev) - 1))
+		return -EINVAL;
+
+	bio = bio_alloc_kiocb(iocb, nr_pages, &blkdev_dio_pool);
+
+	dio = container_of(bio, struct blkdev_dio, bio);
+	dio->is_sync = is_sync = is_sync_kiocb(iocb);
+	if (dio->is_sync) {
+		dio->waiter = current;
+		bio_get(bio);
+	} else {
+		dio->iocb = iocb;
+	}
+
+	dio->size = 0;
+	dio->multi_bio = false;
+	dio->should_dirty = is_read && iter_is_iovec(iter);
+
+	/*
+	 * Don't plug for HIPRI/polled IO, as those should go straight
+	 * to issue
+	 */
+	if (!is_poll)
+		blk_start_plug(&plug);
+
+	for (;;) {
+		bio_set_dev(bio, bdev);
+		bio->bi_iter.bi_sector = pos >> 9;
+		bio->bi_write_hint = iocb->ki_hint;
+		bio->bi_private = dio;
+		bio->bi_end_io = blkdev_bio_end_io;
+		bio->bi_ioprio = iocb->ki_ioprio;
+
+		ret = bio_iov_iter_get_pages(bio, iter);
+		if (unlikely(ret)) {
+			bio->bi_status = BLK_STS_IOERR;
+			bio_endio(bio);
+			break;
+		}
+
+		if (is_read) {
+			bio->bi_opf = REQ_OP_READ;
+			if (dio->should_dirty)
+				bio_set_pages_dirty(bio);
+		} else {
+			bio->bi_opf = dio_bio_write_op(iocb);
+			task_io_account_write(bio->bi_iter.bi_size);
+		}
+		if (iocb->ki_flags & IOCB_NOWAIT)
+			bio->bi_opf |= REQ_NOWAIT;
+
+		dio->size += bio->bi_iter.bi_size;
+		pos += bio->bi_iter.bi_size;
+
+		nr_pages = bio_iov_vecs_to_alloc(iter, BIO_MAX_VECS);
+		if (!nr_pages) {
+			bool polled = false;
+
+			if (iocb->ki_flags & IOCB_HIPRI) {
+				bio_set_polled(bio, iocb);
+				polled = true;
+			}
+
+			qc = submit_bio(bio);
+
+			if (polled)
+				WRITE_ONCE(iocb->ki_cookie, qc);
+			break;
+		}
+
+		if (!dio->multi_bio) {
+			/*
+			 * AIO needs an extra reference to ensure the dio
+			 * structure which is embedded into the first bio
+			 * stays around.
+			 */
+			if (!is_sync)
+				bio_get(bio);
+			dio->multi_bio = true;
+			atomic_set(&dio->ref, 2);
+		} else {
+			atomic_inc(&dio->ref);
+		}
+
+		submit_bio(bio);
+		bio = bio_alloc(GFP_KERNEL, nr_pages);
+	}
+
+	if (!is_poll)
+		blk_finish_plug(&plug);
+
+	if (!is_sync)
+		return -EIOCBQUEUED;
+
+	for (;;) {
+		set_current_state(TASK_UNINTERRUPTIBLE);
+		if (!READ_ONCE(dio->waiter))
+			break;
+
+		if (!(iocb->ki_flags & IOCB_HIPRI) ||
+		    !blk_poll(bdev_get_queue(bdev), qc, true))
+			blk_io_schedule();
+	}
+	__set_current_state(TASK_RUNNING);
+
+	if (!ret)
+		ret = blk_status_to_errno(dio->bio.bi_status);
+	if (likely(!ret))
+		ret = dio->size;
+
+	bio_put(&dio->bio);
+	return ret;
+}
+
+static ssize_t blkdev_direct_IO(struct kiocb *iocb, struct iov_iter *iter)
+{
+	unsigned int nr_pages;
+
+	if (!iov_iter_count(iter))
+		return 0;
+
+	nr_pages = bio_iov_vecs_to_alloc(iter, BIO_MAX_VECS + 1);
+	if (is_sync_kiocb(iocb) && nr_pages <= BIO_MAX_VECS)
+		return __blkdev_direct_IO_simple(iocb, iter, nr_pages);
+
+	return __blkdev_direct_IO(iocb, iter, bio_max_segs(nr_pages));
+}
+
+static int blkdev_writepage(struct page *page, struct writeback_control *wbc)
+{
+	return block_write_full_page(page, blkdev_get_block, wbc);
+}
+
+static int blkdev_readpage(struct file * file, struct page * page)
+{
+	return block_read_full_page(page, blkdev_get_block);
+}
+
+static void blkdev_readahead(struct readahead_control *rac)
+{
+	mpage_readahead(rac, blkdev_get_block);
+}
+
+static int blkdev_write_begin(struct file *file, struct address_space *mapping,
+		loff_t pos, unsigned len, unsigned flags, struct page **pagep,
+		void **fsdata)
+{
+	return block_write_begin(mapping, pos, len, flags, pagep,
+				 blkdev_get_block);
+}
+
+static int blkdev_write_end(struct file *file, struct address_space *mapping,
+		loff_t pos, unsigned len, unsigned copied, struct page *page,
+		void *fsdata)
+{
+	int ret;
+	ret = block_write_end(file, mapping, pos, len, copied, page, fsdata);
+
+	unlock_page(page);
+	put_page(page);
+
+	return ret;
+}
+
+static int blkdev_writepages(struct address_space *mapping,
+			     struct writeback_control *wbc)
+{
+	return generic_writepages(mapping, wbc);
+}
+
+const struct address_space_operations def_blk_aops = {
+	.set_page_dirty	= __set_page_dirty_buffers,
+	.readpage	= blkdev_readpage,
+	.readahead	= blkdev_readahead,
+	.writepage	= blkdev_writepage,
+	.write_begin	= blkdev_write_begin,
+	.write_end	= blkdev_write_end,
+	.writepages	= blkdev_writepages,
+	.direct_IO	= blkdev_direct_IO,
+	.migratepage	= buffer_migrate_page_norefs,
+	.is_dirty_writeback = buffer_check_dirty_writeback,
+};
+
+/*
+ * for a block special file file_inode(file)->i_size is zero
+ * so we compute the size by hand (just as in block_read/write above)
+ */
+static loff_t blkdev_llseek(struct file *file, loff_t offset, int whence)
+{
+	struct inode *bd_inode = bdev_file_inode(file);
+	loff_t retval;
+
+	inode_lock(bd_inode);
+	retval = fixed_size_llseek(file, offset, whence, i_size_read(bd_inode));
+	inode_unlock(bd_inode);
+	return retval;
+}
+
+static int blkdev_fsync(struct file *filp, loff_t start, loff_t end,
+		int datasync)
+{
+	struct inode *bd_inode = bdev_file_inode(filp);
+	struct block_device *bdev = I_BDEV(bd_inode);
+	int error;
+
+	error = file_write_and_wait_range(filp, start, end);
+	if (error)
+		return error;
+
+	/*
+	 * There is no need to serialise calls to blkdev_issue_flush with
+	 * i_mutex and doing so causes performance issues with concurrent
+	 * O_SYNC writers to a block device.
+	 */
+	error = blkdev_issue_flush(bdev);
+	if (error == -EOPNOTSUPP)
+		error = 0;
+
+	return error;
+}
+
+static int blkdev_open(struct inode *inode, struct file *filp)
+{
+	struct block_device *bdev;
+
+	/*
+	 * Preserve backwards compatibility and allow large file access
+	 * even if userspace doesn't ask for it explicitly. Some mkfs
+	 * binary needs it. We might want to drop this workaround
+	 * during an unstable branch.
+	 */
+	filp->f_flags |= O_LARGEFILE;
+	filp->f_mode |= FMODE_NOWAIT | FMODE_BUF_RASYNC;
+
+	if (filp->f_flags & O_NDELAY)
+		filp->f_mode |= FMODE_NDELAY;
+	if (filp->f_flags & O_EXCL)
+		filp->f_mode |= FMODE_EXCL;
+	if ((filp->f_flags & O_ACCMODE) == 3)
+		filp->f_mode |= FMODE_WRITE_IOCTL;
+
+	bdev = blkdev_get_by_dev(inode->i_rdev, filp->f_mode, filp);
+	if (IS_ERR(bdev))
+		return PTR_ERR(bdev);
+	filp->f_mapping = bdev->bd_inode->i_mapping;
+	filp->f_wb_err = filemap_sample_wb_err(filp->f_mapping);
+	return 0;
+}
+
+static int blkdev_close(struct inode *inode, struct file *filp)
+{
+	struct block_device *bdev = I_BDEV(bdev_file_inode(filp));
+
+	blkdev_put(bdev, filp->f_mode);
+	return 0;
+}
+
+static long block_ioctl(struct file *file, unsigned cmd, unsigned long arg)
+{
+	struct block_device *bdev = I_BDEV(bdev_file_inode(file));
+	fmode_t mode = file->f_mode;
+
+	/*
+	 * O_NDELAY can be altered using fcntl(.., F_SETFL, ..), so we have
+	 * to updated it before every ioctl.
+	 */
+	if (file->f_flags & O_NDELAY)
+		mode |= FMODE_NDELAY;
+	else
+		mode &= ~FMODE_NDELAY;
+
+	return blkdev_ioctl(bdev, mode, cmd, arg);
+}
+
+/*
+ * Write data to the block device.  Only intended for the block device itself
+ * and the raw driver which basically is a fake block device.
+ *
+ * Does not take i_mutex for the write and thus is not for general purpose
+ * use.
+ */
+static ssize_t blkdev_write_iter(struct kiocb *iocb, struct iov_iter *from)
+{
+	struct file *file = iocb->ki_filp;
+	struct inode *bd_inode = bdev_file_inode(file);
+	loff_t size = i_size_read(bd_inode);
+	struct blk_plug plug;
+	size_t shorted = 0;
+	ssize_t ret;
+
+	if (bdev_read_only(I_BDEV(bd_inode)))
+		return -EPERM;
+
+	if (IS_SWAPFILE(bd_inode) && !is_hibernate_resume_dev(bd_inode->i_rdev))
+		return -ETXTBSY;
+
+	if (!iov_iter_count(from))
+		return 0;
+
+	if (iocb->ki_pos >= size)
+		return -ENOSPC;
+
+	if ((iocb->ki_flags & (IOCB_NOWAIT | IOCB_DIRECT)) == IOCB_NOWAIT)
+		return -EOPNOTSUPP;
+
+	size -= iocb->ki_pos;
+	if (iov_iter_count(from) > size) {
+		shorted = iov_iter_count(from) - size;
+		iov_iter_truncate(from, size);
+	}
+
+	blk_start_plug(&plug);
+	ret = __generic_file_write_iter(iocb, from);
+	if (ret > 0)
+		ret = generic_write_sync(iocb, ret);
+	iov_iter_reexpand(from, iov_iter_count(from) + shorted);
+	blk_finish_plug(&plug);
+	return ret;
+}
+
+static ssize_t blkdev_read_iter(struct kiocb *iocb, struct iov_iter *to)
+{
+	struct file *file = iocb->ki_filp;
+	struct inode *bd_inode = bdev_file_inode(file);
+	loff_t size = i_size_read(bd_inode);
+	loff_t pos = iocb->ki_pos;
+	size_t shorted = 0;
+	ssize_t ret;
+
+	if (pos >= size)
+		return 0;
+
+	size -= pos;
+	if (iov_iter_count(to) > size) {
+		shorted = iov_iter_count(to) - size;
+		iov_iter_truncate(to, size);
+	}
+
+	ret = generic_file_read_iter(iocb, to);
+	iov_iter_reexpand(to, iov_iter_count(to) + shorted);
+	return ret;
+}
+
+#define	BLKDEV_FALLOC_FL_SUPPORTED					\
+		(FALLOC_FL_KEEP_SIZE | FALLOC_FL_PUNCH_HOLE |		\
+		 FALLOC_FL_ZERO_RANGE | FALLOC_FL_NO_HIDE_STALE)
+
+static long blkdev_fallocate(struct file *file, int mode, loff_t start,
+			     loff_t len)
+{
+	struct block_device *bdev = I_BDEV(bdev_file_inode(file));
+	loff_t end = start + len - 1;
+	loff_t isize;
+	int error;
+
+	/* Fail if we don't recognize the flags. */
+	if (mode & ~BLKDEV_FALLOC_FL_SUPPORTED)
+		return -EOPNOTSUPP;
+
+	/* Don't go off the end of the device. */
+	isize = i_size_read(bdev->bd_inode);
+	if (start >= isize)
+		return -EINVAL;
+	if (end >= isize) {
+		if (mode & FALLOC_FL_KEEP_SIZE) {
+			len = isize - start;
+			end = start + len - 1;
+		} else
+			return -EINVAL;
+	}
+
+	/*
+	 * Don't allow IO that isn't aligned to logical block size.
+	 */
+	if ((start | len) & (bdev_logical_block_size(bdev) - 1))
+		return -EINVAL;
+
+	/* Invalidate the page cache, including dirty pages. */
+	error = truncate_bdev_range(bdev, file->f_mode, start, end);
+	if (error)
+		return error;
+
+	switch (mode) {
+	case FALLOC_FL_ZERO_RANGE:
+	case FALLOC_FL_ZERO_RANGE | FALLOC_FL_KEEP_SIZE:
+		error = blkdev_issue_zeroout(bdev, start >> 9, len >> 9,
+					    GFP_KERNEL, BLKDEV_ZERO_NOUNMAP);
+		break;
+	case FALLOC_FL_PUNCH_HOLE | FALLOC_FL_KEEP_SIZE:
+		error = blkdev_issue_zeroout(bdev, start >> 9, len >> 9,
+					     GFP_KERNEL, BLKDEV_ZERO_NOFALLBACK);
+		break;
+	case FALLOC_FL_PUNCH_HOLE | FALLOC_FL_KEEP_SIZE | FALLOC_FL_NO_HIDE_STALE:
+		error = blkdev_issue_discard(bdev, start >> 9, len >> 9,
+					     GFP_KERNEL, 0);
+		break;
+	default:
+		return -EOPNOTSUPP;
+	}
+	if (error)
+		return error;
+
+	/*
+	 * Invalidate the page cache again; if someone wandered in and dirtied
+	 * a page, we just discard it - userspace has no way of knowing whether
+	 * the write happened before or after discard completing...
+	 */
+	return truncate_bdev_range(bdev, file->f_mode, start, end);
+}
+
+const struct file_operations def_blk_fops = {
+	.open		= blkdev_open,
+	.release	= blkdev_close,
+	.llseek		= blkdev_llseek,
+	.read_iter	= blkdev_read_iter,
+	.write_iter	= blkdev_write_iter,
+	.iopoll		= blkdev_iopoll,
+	.mmap		= generic_file_mmap,
+	.fsync		= blkdev_fsync,
+	.unlocked_ioctl	= block_ioctl,
+#ifdef CONFIG_COMPAT
+	.compat_ioctl	= compat_blkdev_ioctl,
+#endif
+	.splice_read	= generic_file_splice_read,
+	.splice_write	= iter_file_splice_write,
+	.fallocate	= blkdev_fallocate,
+};
+
+static __init int blkdev_init(void)
+{
+	return bioset_init(&blkdev_dio_pool, 4,
+				offsetof(struct blkdev_dio, bio),
+				BIOSET_NEED_BVECS|BIOSET_PERCPU_CACHE);
+}
+module_init(blkdev_init);
diff --git a/block/genhd.c b/block/genhd.c
index 567549a011d1..7b6e5e1cf956 100644
--- a/block/genhd.c
+++ b/block/genhd.c
@@ -183,6 +183,7 @@ static struct blk_major_name {
 	void (*probe)(dev_t devt);
 } *major_names[BLKDEV_MAJOR_HASH_SIZE];
 static DEFINE_MUTEX(major_names_lock);
+static DEFINE_SPINLOCK(major_names_spinlock);
 
 /* index in the above - for now: assume no multimajor ranges */
 static inline int major_to_index(unsigned major)
@@ -195,11 +196,11 @@ void blkdev_show(struct seq_file *seqf, off_t offset)
 {
 	struct blk_major_name *dp;
 
-	mutex_lock(&major_names_lock);
+	spin_lock(&major_names_spinlock);
 	for (dp = major_names[major_to_index(offset)]; dp; dp = dp->next)
 		if (dp->major == offset)
 			seq_printf(seqf, "%3d %s\n", dp->major, dp->name);
-	mutex_unlock(&major_names_lock);
+	spin_unlock(&major_names_spinlock);
 }
 #endif /* CONFIG_PROC_FS */
 
@@ -271,6 +272,7 @@ int __register_blkdev(unsigned int major, const char *name,
 	p->next = NULL;
 	index = major_to_index(major);
 
+	spin_lock(&major_names_spinlock);
 	for (n = &major_names[index]; *n; n = &(*n)->next) {
 		if ((*n)->major == major)
 			break;
@@ -279,6 +281,7 @@ int __register_blkdev(unsigned int major, const char *name,
 		*n = p;
 	else
 		ret = -EBUSY;
+	spin_unlock(&major_names_spinlock);
 
 	if (ret < 0) {
 		printk("register_blkdev: cannot get major %u for %s\n",
@@ -298,6 +301,7 @@ void unregister_blkdev(unsigned int major, const char *name)
 	int index = major_to_index(major);
 
 	mutex_lock(&major_names_lock);
+	spin_lock(&major_names_spinlock);
 	for (n = &major_names[index]; *n; n = &(*n)->next)
 		if ((*n)->major == major)
 			break;
@@ -307,6 +311,7 @@ void unregister_blkdev(unsigned int major, const char *name)
 		p = *n;
 		*n = p->next;
 	}
+	spin_unlock(&major_names_spinlock);
 	mutex_unlock(&major_names_lock);
 	kfree(p);
 }
diff --git a/block/mq-deadline.c b/block/mq-deadline.c
index 3c3693c34f06..7f3c3932b723 100644
--- a/block/mq-deadline.c
+++ b/block/mq-deadline.c
@@ -270,12 +270,6 @@ deadline_move_request(struct deadline_data *dd, struct dd_per_prio *per_prio,
 	deadline_remove_request(rq->q, per_prio, rq);
 }
 
-/* Number of requests queued for a given priority level. */
-static u32 dd_queued(struct deadline_data *dd, enum dd_prio prio)
-{
-	return dd_sum(dd, inserted, prio) - dd_sum(dd, completed, prio);
-}
-
 /*
  * deadline_check_fifo returns 0 if there are no expired requests on the fifo,
  * 1 otherwise. Requires !list_empty(&dd->fifo_list[data_dir])
@@ -953,6 +947,12 @@ static int dd_async_depth_show(void *data, struct seq_file *m)
 	return 0;
 }
 
+/* Number of requests queued for a given priority level. */
+static u32 dd_queued(struct deadline_data *dd, enum dd_prio prio)
+{
+	return dd_sum(dd, inserted, prio) - dd_sum(dd, completed, prio);
+}
+
 static int dd_queued_show(void *data, struct seq_file *m)
 {
 	struct request_queue *q = data;
diff --git a/block/scsi_ioctl.c b/block/scsi_ioctl.c
deleted file mode 100644
index d247431a6853..000000000000
--- a/block/scsi_ioctl.c
+++ /dev/null
@@ -1,890 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0
-/*
- * Copyright (C) 2001 Jens Axboe <axboe@suse.de>
- */
-#include <linux/compat.h>
-#include <linux/kernel.h>
-#include <linux/errno.h>
-#include <linux/string.h>
-#include <linux/module.h>
-#include <linux/blkdev.h>
-#include <linux/capability.h>
-#include <linux/completion.h>
-#include <linux/cdrom.h>
-#include <linux/ratelimit.h>
-#include <linux/slab.h>
-#include <linux/times.h>
-#include <linux/uio.h>
-#include <linux/uaccess.h>
-
-#include <scsi/scsi.h>
-#include <scsi/scsi_ioctl.h>
-#include <scsi/scsi_cmnd.h>
-#include <scsi/sg.h>
-
-struct blk_cmd_filter {
-	unsigned long read_ok[BLK_SCSI_CMD_PER_LONG];
-	unsigned long write_ok[BLK_SCSI_CMD_PER_LONG];
-};
-
-static struct blk_cmd_filter blk_default_cmd_filter;
-
-/* Command group 3 is reserved and should never be used.  */
-const unsigned char scsi_command_size_tbl[8] =
-{
-	6, 10, 10, 12,
-	16, 12, 10, 10
-};
-EXPORT_SYMBOL(scsi_command_size_tbl);
-
-static int sg_get_version(int __user *p)
-{
-	static const int sg_version_num = 30527;
-	return put_user(sg_version_num, p);
-}
-
-static int scsi_get_idlun(struct request_queue *q, int __user *p)
-{
-	return put_user(0, p);
-}
-
-static int scsi_get_bus(struct request_queue *q, int __user *p)
-{
-	return put_user(0, p);
-}
-
-static int sg_get_timeout(struct request_queue *q)
-{
-	return jiffies_to_clock_t(q->sg_timeout);
-}
-
-static int sg_set_timeout(struct request_queue *q, int __user *p)
-{
-	int timeout, err = get_user(timeout, p);
-
-	if (!err)
-		q->sg_timeout = clock_t_to_jiffies(timeout);
-
-	return err;
-}
-
-static int max_sectors_bytes(struct request_queue *q)
-{
-	unsigned int max_sectors = queue_max_sectors(q);
-
-	max_sectors = min_t(unsigned int, max_sectors, INT_MAX >> 9);
-
-	return max_sectors << 9;
-}
-
-static int sg_get_reserved_size(struct request_queue *q, int __user *p)
-{
-	int val = min_t(int, q->sg_reserved_size, max_sectors_bytes(q));
-
-	return put_user(val, p);
-}
-
-static int sg_set_reserved_size(struct request_queue *q, int __user *p)
-{
-	int size, err = get_user(size, p);
-
-	if (err)
-		return err;
-
-	if (size < 0)
-		return -EINVAL;
-
-	q->sg_reserved_size = min(size, max_sectors_bytes(q));
-	return 0;
-}
-
-/*
- * will always return that we are ATAPI even for a real SCSI drive, I'm not
- * so sure this is worth doing anything about (why would you care??)
- */
-static int sg_emulated_host(struct request_queue *q, int __user *p)
-{
-	return put_user(1, p);
-}
-
-static void blk_set_cmd_filter_defaults(struct blk_cmd_filter *filter)
-{
-	/* Basic read-only commands */
-	__set_bit(TEST_UNIT_READY, filter->read_ok);
-	__set_bit(REQUEST_SENSE, filter->read_ok);
-	__set_bit(READ_6, filter->read_ok);
-	__set_bit(READ_10, filter->read_ok);
-	__set_bit(READ_12, filter->read_ok);
-	__set_bit(READ_16, filter->read_ok);
-	__set_bit(READ_BUFFER, filter->read_ok);
-	__set_bit(READ_DEFECT_DATA, filter->read_ok);
-	__set_bit(READ_CAPACITY, filter->read_ok);
-	__set_bit(READ_LONG, filter->read_ok);
-	__set_bit(INQUIRY, filter->read_ok);
-	__set_bit(MODE_SENSE, filter->read_ok);
-	__set_bit(MODE_SENSE_10, filter->read_ok);
-	__set_bit(LOG_SENSE, filter->read_ok);
-	__set_bit(START_STOP, filter->read_ok);
-	__set_bit(GPCMD_VERIFY_10, filter->read_ok);
-	__set_bit(VERIFY_16, filter->read_ok);
-	__set_bit(REPORT_LUNS, filter->read_ok);
-	__set_bit(SERVICE_ACTION_IN_16, filter->read_ok);
-	__set_bit(RECEIVE_DIAGNOSTIC, filter->read_ok);
-	__set_bit(MAINTENANCE_IN, filter->read_ok);
-	__set_bit(GPCMD_READ_BUFFER_CAPACITY, filter->read_ok);
-
-	/* Audio CD commands */
-	__set_bit(GPCMD_PLAY_CD, filter->read_ok);
-	__set_bit(GPCMD_PLAY_AUDIO_10, filter->read_ok);
-	__set_bit(GPCMD_PLAY_AUDIO_MSF, filter->read_ok);
-	__set_bit(GPCMD_PLAY_AUDIO_TI, filter->read_ok);
-	__set_bit(GPCMD_PAUSE_RESUME, filter->read_ok);
-
-	/* CD/DVD data reading */
-	__set_bit(GPCMD_READ_CD, filter->read_ok);
-	__set_bit(GPCMD_READ_CD_MSF, filter->read_ok);
-	__set_bit(GPCMD_READ_DISC_INFO, filter->read_ok);
-	__set_bit(GPCMD_READ_CDVD_CAPACITY, filter->read_ok);
-	__set_bit(GPCMD_READ_DVD_STRUCTURE, filter->read_ok);
-	__set_bit(GPCMD_READ_HEADER, filter->read_ok);
-	__set_bit(GPCMD_READ_TRACK_RZONE_INFO, filter->read_ok);
-	__set_bit(GPCMD_READ_SUBCHANNEL, filter->read_ok);
-	__set_bit(GPCMD_READ_TOC_PMA_ATIP, filter->read_ok);
-	__set_bit(GPCMD_REPORT_KEY, filter->read_ok);
-	__set_bit(GPCMD_SCAN, filter->read_ok);
-	__set_bit(GPCMD_GET_CONFIGURATION, filter->read_ok);
-	__set_bit(GPCMD_READ_FORMAT_CAPACITIES, filter->read_ok);
-	__set_bit(GPCMD_GET_EVENT_STATUS_NOTIFICATION, filter->read_ok);
-	__set_bit(GPCMD_GET_PERFORMANCE, filter->read_ok);
-	__set_bit(GPCMD_SEEK, filter->read_ok);
-	__set_bit(GPCMD_STOP_PLAY_SCAN, filter->read_ok);
-
-	/* Basic writing commands */
-	__set_bit(WRITE_6, filter->write_ok);
-	__set_bit(WRITE_10, filter->write_ok);
-	__set_bit(WRITE_VERIFY, filter->write_ok);
-	__set_bit(WRITE_12, filter->write_ok);
-	__set_bit(WRITE_VERIFY_12, filter->write_ok);
-	__set_bit(WRITE_16, filter->write_ok);
-	__set_bit(WRITE_LONG, filter->write_ok);
-	__set_bit(WRITE_LONG_2, filter->write_ok);
-	__set_bit(WRITE_SAME, filter->write_ok);
-	__set_bit(WRITE_SAME_16, filter->write_ok);
-	__set_bit(WRITE_SAME_32, filter->write_ok);
-	__set_bit(ERASE, filter->write_ok);
-	__set_bit(GPCMD_MODE_SELECT_10, filter->write_ok);
-	__set_bit(MODE_SELECT, filter->write_ok);
-	__set_bit(LOG_SELECT, filter->write_ok);
-	__set_bit(GPCMD_BLANK, filter->write_ok);
-	__set_bit(GPCMD_CLOSE_TRACK, filter->write_ok);
-	__set_bit(GPCMD_FLUSH_CACHE, filter->write_ok);
-	__set_bit(GPCMD_FORMAT_UNIT, filter->write_ok);
-	__set_bit(GPCMD_REPAIR_RZONE_TRACK, filter->write_ok);
-	__set_bit(GPCMD_RESERVE_RZONE_TRACK, filter->write_ok);
-	__set_bit(GPCMD_SEND_DVD_STRUCTURE, filter->write_ok);
-	__set_bit(GPCMD_SEND_EVENT, filter->write_ok);
-	__set_bit(GPCMD_SEND_KEY, filter->write_ok);
-	__set_bit(GPCMD_SEND_OPC, filter->write_ok);
-	__set_bit(GPCMD_SEND_CUE_SHEET, filter->write_ok);
-	__set_bit(GPCMD_SET_SPEED, filter->write_ok);
-	__set_bit(GPCMD_PREVENT_ALLOW_MEDIUM_REMOVAL, filter->write_ok);
-	__set_bit(GPCMD_LOAD_UNLOAD, filter->write_ok);
-	__set_bit(GPCMD_SET_STREAMING, filter->write_ok);
-	__set_bit(GPCMD_SET_READ_AHEAD, filter->write_ok);
-
-	/* ZBC Commands */
-	__set_bit(ZBC_OUT, filter->write_ok);
-	__set_bit(ZBC_IN, filter->read_ok);
-}
-
-int blk_verify_command(unsigned char *cmd, fmode_t mode)
-{
-	struct blk_cmd_filter *filter = &blk_default_cmd_filter;
-
-	/* root can do any command. */
-	if (capable(CAP_SYS_RAWIO))
-		return 0;
-
-	/* Anybody who can open the device can do a read-safe command */
-	if (test_bit(cmd[0], filter->read_ok))
-		return 0;
-
-	/* Write-safe commands require a writable open */
-	if (test_bit(cmd[0], filter->write_ok) && (mode & FMODE_WRITE))
-		return 0;
-
-	return -EPERM;
-}
-EXPORT_SYMBOL(blk_verify_command);
-
-static int blk_fill_sghdr_rq(struct request_queue *q, struct request *rq,
-			     struct sg_io_hdr *hdr, fmode_t mode)
-{
-	struct scsi_request *req = scsi_req(rq);
-
-	if (copy_from_user(req->cmd, hdr->cmdp, hdr->cmd_len))
-		return -EFAULT;
-	if (blk_verify_command(req->cmd, mode))
-		return -EPERM;
-
-	/*
-	 * fill in request structure
-	 */
-	req->cmd_len = hdr->cmd_len;
-
-	rq->timeout = msecs_to_jiffies(hdr->timeout);
-	if (!rq->timeout)
-		rq->timeout = q->sg_timeout;
-	if (!rq->timeout)
-		rq->timeout = BLK_DEFAULT_SG_TIMEOUT;
-	if (rq->timeout < BLK_MIN_SG_TIMEOUT)
-		rq->timeout = BLK_MIN_SG_TIMEOUT;
-
-	return 0;
-}
-
-static int blk_complete_sghdr_rq(struct request *rq, struct sg_io_hdr *hdr,
-				 struct bio *bio)
-{
-	struct scsi_request *req = scsi_req(rq);
-	int r, ret = 0;
-
-	/*
-	 * fill in all the output members
-	 */
-	hdr->status = req->result & 0xff;
-	hdr->masked_status = status_byte(req->result);
-	hdr->msg_status = COMMAND_COMPLETE;
-	hdr->host_status = host_byte(req->result);
-	hdr->driver_status = 0;
-	if (scsi_status_is_check_condition(hdr->status))
-		hdr->driver_status = DRIVER_SENSE;
-	hdr->info = 0;
-	if (hdr->masked_status || hdr->host_status || hdr->driver_status)
-		hdr->info |= SG_INFO_CHECK;
-	hdr->resid = req->resid_len;
-	hdr->sb_len_wr = 0;
-
-	if (req->sense_len && hdr->sbp) {
-		int len = min((unsigned int) hdr->mx_sb_len, req->sense_len);
-
-		if (!copy_to_user(hdr->sbp, req->sense, len))
-			hdr->sb_len_wr = len;
-		else
-			ret = -EFAULT;
-	}
-
-	r = blk_rq_unmap_user(bio);
-	if (!ret)
-		ret = r;
-
-	return ret;
-}
-
-static int sg_io(struct request_queue *q, struct gendisk *bd_disk,
-		struct sg_io_hdr *hdr, fmode_t mode)
-{
-	unsigned long start_time;
-	ssize_t ret = 0;
-	int writing = 0;
-	int at_head = 0;
-	struct request *rq;
-	struct scsi_request *req;
-	struct bio *bio;
-
-	if (hdr->interface_id != 'S')
-		return -EINVAL;
-
-	if (hdr->dxfer_len > (queue_max_hw_sectors(q) << 9))
-		return -EIO;
-
-	if (hdr->dxfer_len)
-		switch (hdr->dxfer_direction) {
-		default:
-			return -EINVAL;
-		case SG_DXFER_TO_DEV:
-			writing = 1;
-			break;
-		case SG_DXFER_TO_FROM_DEV:
-		case SG_DXFER_FROM_DEV:
-			break;
-		}
-	if (hdr->flags & SG_FLAG_Q_AT_HEAD)
-		at_head = 1;
-
-	ret = -ENOMEM;
-	rq = blk_get_request(q, writing ? REQ_OP_DRV_OUT : REQ_OP_DRV_IN, 0);
-	if (IS_ERR(rq))
-		return PTR_ERR(rq);
-	req = scsi_req(rq);
-
-	if (hdr->cmd_len > BLK_MAX_CDB) {
-		req->cmd = kzalloc(hdr->cmd_len, GFP_KERNEL);
-		if (!req->cmd)
-			goto out_put_request;
-	}
-
-	ret = blk_fill_sghdr_rq(q, rq, hdr, mode);
-	if (ret < 0)
-		goto out_free_cdb;
-
-	ret = 0;
-	if (hdr->iovec_count) {
-		struct iov_iter i;
-		struct iovec *iov = NULL;
-
-		ret = import_iovec(rq_data_dir(rq), hdr->dxferp,
-				   hdr->iovec_count, 0, &iov, &i);
-		if (ret < 0)
-			goto out_free_cdb;
-
-		/* SG_IO howto says that the shorter of the two wins */
-		iov_iter_truncate(&i, hdr->dxfer_len);
-
-		ret = blk_rq_map_user_iov(q, rq, NULL, &i, GFP_KERNEL);
-		kfree(iov);
-	} else if (hdr->dxfer_len)
-		ret = blk_rq_map_user(q, rq, NULL, hdr->dxferp, hdr->dxfer_len,
-				      GFP_KERNEL);
-
-	if (ret)
-		goto out_free_cdb;
-
-	bio = rq->bio;
-	req->retries = 0;
-
-	start_time = jiffies;
-
-	blk_execute_rq(bd_disk, rq, at_head);
-
-	hdr->duration = jiffies_to_msecs(jiffies - start_time);
-
-	ret = blk_complete_sghdr_rq(rq, hdr, bio);
-
-out_free_cdb:
-	scsi_req_free_cmd(req);
-out_put_request:
-	blk_put_request(rq);
-	return ret;
-}
-
-/**
- * sg_scsi_ioctl  --  handle deprecated SCSI_IOCTL_SEND_COMMAND ioctl
- * @q:		request queue to send scsi commands down
- * @disk:	gendisk to operate on (option)
- * @mode:	mode used to open the file through which the ioctl has been
- *		submitted
- * @sic:	userspace structure describing the command to perform
- *
- * Send down the scsi command described by @sic to the device below
- * the request queue @q.  If @file is non-NULL it's used to perform
- * fine-grained permission checks that allow users to send down
- * non-destructive SCSI commands.  If the caller has a struct gendisk
- * available it should be passed in as @disk to allow the low level
- * driver to use the information contained in it.  A non-NULL @disk
- * is only allowed if the caller knows that the low level driver doesn't
- * need it (e.g. in the scsi subsystem).
- *
- * Notes:
- *   -  This interface is deprecated - users should use the SG_IO
- *      interface instead, as this is a more flexible approach to
- *      performing SCSI commands on a device.
- *   -  The SCSI command length is determined by examining the 1st byte
- *      of the given command. There is no way to override this.
- *   -  Data transfers are limited to PAGE_SIZE
- *   -  The length (x + y) must be at least OMAX_SB_LEN bytes long to
- *      accommodate the sense buffer when an error occurs.
- *      The sense buffer is truncated to OMAX_SB_LEN (16) bytes so that
- *      old code will not be surprised.
- *   -  If a Unix error occurs (e.g. ENOMEM) then the user will receive
- *      a negative return and the Unix error code in 'errno'.
- *      If the SCSI command succeeds then 0 is returned.
- *      Positive numbers returned are the compacted SCSI error codes (4
- *      bytes in one int) where the lowest byte is the SCSI status.
- */
-int sg_scsi_ioctl(struct request_queue *q, struct gendisk *disk, fmode_t mode,
-		struct scsi_ioctl_command __user *sic)
-{
-	enum { OMAX_SB_LEN = 16 };	/* For backward compatibility */
-	struct request *rq;
-	struct scsi_request *req;
-	int err;
-	unsigned int in_len, out_len, bytes, opcode, cmdlen;
-	char *buffer = NULL;
-
-	if (!sic)
-		return -EINVAL;
-
-	/*
-	 * get in an out lengths, verify they don't exceed a page worth of data
-	 */
-	if (get_user(in_len, &sic->inlen))
-		return -EFAULT;
-	if (get_user(out_len, &sic->outlen))
-		return -EFAULT;
-	if (in_len > PAGE_SIZE || out_len > PAGE_SIZE)
-		return -EINVAL;
-	if (get_user(opcode, sic->data))
-		return -EFAULT;
-
-	bytes = max(in_len, out_len);
-	if (bytes) {
-		buffer = kzalloc(bytes, GFP_NOIO | GFP_USER | __GFP_NOWARN);
-		if (!buffer)
-			return -ENOMEM;
-
-	}
-
-	rq = blk_get_request(q, in_len ? REQ_OP_DRV_OUT : REQ_OP_DRV_IN, 0);
-	if (IS_ERR(rq)) {
-		err = PTR_ERR(rq);
-		goto error_free_buffer;
-	}
-	req = scsi_req(rq);
-
-	cmdlen = COMMAND_SIZE(opcode);
-
-	/*
-	 * get command and data to send to device, if any
-	 */
-	err = -EFAULT;
-	req->cmd_len = cmdlen;
-	if (copy_from_user(req->cmd, sic->data, cmdlen))
-		goto error;
-
-	if (in_len && copy_from_user(buffer, sic->data + cmdlen, in_len))
-		goto error;
-
-	err = blk_verify_command(req->cmd, mode);
-	if (err)
-		goto error;
-
-	/* default.  possible overriden later */
-	req->retries = 5;
-
-	switch (opcode) {
-	case SEND_DIAGNOSTIC:
-	case FORMAT_UNIT:
-		rq->timeout = FORMAT_UNIT_TIMEOUT;
-		req->retries = 1;
-		break;
-	case START_STOP:
-		rq->timeout = START_STOP_TIMEOUT;
-		break;
-	case MOVE_MEDIUM:
-		rq->timeout = MOVE_MEDIUM_TIMEOUT;
-		break;
-	case READ_ELEMENT_STATUS:
-		rq->timeout = READ_ELEMENT_STATUS_TIMEOUT;
-		break;
-	case READ_DEFECT_DATA:
-		rq->timeout = READ_DEFECT_DATA_TIMEOUT;
-		req->retries = 1;
-		break;
-	default:
-		rq->timeout = BLK_DEFAULT_SG_TIMEOUT;
-		break;
-	}
-
-	if (bytes) {
-		err = blk_rq_map_kern(q, rq, buffer, bytes, GFP_NOIO);
-		if (err)
-			goto error;
-	}
-
-	blk_execute_rq(disk, rq, 0);
-
-	err = req->result & 0xff;	/* only 8 bit SCSI status */
-	if (err) {
-		if (req->sense_len && req->sense) {
-			bytes = (OMAX_SB_LEN > req->sense_len) ?
-				req->sense_len : OMAX_SB_LEN;
-			if (copy_to_user(sic->data, req->sense, bytes))
-				err = -EFAULT;
-		}
-	} else {
-		if (copy_to_user(sic->data, buffer, out_len))
-			err = -EFAULT;
-	}
-	
-error:
-	blk_put_request(rq);
-
-error_free_buffer:
-	kfree(buffer);
-
-	return err;
-}
-EXPORT_SYMBOL_GPL(sg_scsi_ioctl);
-
-/* Send basic block requests */
-static int __blk_send_generic(struct request_queue *q, struct gendisk *bd_disk,
-			      int cmd, int data)
-{
-	struct request *rq;
-	int err;
-
-	rq = blk_get_request(q, REQ_OP_DRV_OUT, 0);
-	if (IS_ERR(rq))
-		return PTR_ERR(rq);
-	rq->timeout = BLK_DEFAULT_SG_TIMEOUT;
-	scsi_req(rq)->cmd[0] = cmd;
-	scsi_req(rq)->cmd[4] = data;
-	scsi_req(rq)->cmd_len = 6;
-	blk_execute_rq(bd_disk, rq, 0);
-	err = scsi_req(rq)->result ? -EIO : 0;
-	blk_put_request(rq);
-
-	return err;
-}
-
-static inline int blk_send_start_stop(struct request_queue *q,
-				      struct gendisk *bd_disk, int data)
-{
-	return __blk_send_generic(q, bd_disk, GPCMD_START_STOP_UNIT, data);
-}
-
-int put_sg_io_hdr(const struct sg_io_hdr *hdr, void __user *argp)
-{
-#ifdef CONFIG_COMPAT
-	if (in_compat_syscall()) {
-		struct compat_sg_io_hdr hdr32 =  {
-			.interface_id	 = hdr->interface_id,
-			.dxfer_direction = hdr->dxfer_direction,
-			.cmd_len	 = hdr->cmd_len,
-			.mx_sb_len	 = hdr->mx_sb_len,
-			.iovec_count	 = hdr->iovec_count,
-			.dxfer_len	 = hdr->dxfer_len,
-			.dxferp		 = (uintptr_t)hdr->dxferp,
-			.cmdp		 = (uintptr_t)hdr->cmdp,
-			.sbp		 = (uintptr_t)hdr->sbp,
-			.timeout	 = hdr->timeout,
-			.flags		 = hdr->flags,
-			.pack_id	 = hdr->pack_id,
-			.usr_ptr	 = (uintptr_t)hdr->usr_ptr,
-			.status		 = hdr->status,
-			.masked_status	 = hdr->masked_status,
-			.msg_status	 = hdr->msg_status,
-			.sb_len_wr	 = hdr->sb_len_wr,
-			.host_status	 = hdr->host_status,
-			.driver_status	 = hdr->driver_status,
-			.resid		 = hdr->resid,
-			.duration	 = hdr->duration,
-			.info		 = hdr->info,
-		};
-
-		if (copy_to_user(argp, &hdr32, sizeof(hdr32)))
-			return -EFAULT;
-
-		return 0;
-	}
-#endif
-
-	if (copy_to_user(argp, hdr, sizeof(*hdr)))
-		return -EFAULT;
-
-	return 0;
-}
-EXPORT_SYMBOL(put_sg_io_hdr);
-
-int get_sg_io_hdr(struct sg_io_hdr *hdr, const void __user *argp)
-{
-#ifdef CONFIG_COMPAT
-	struct compat_sg_io_hdr hdr32;
-
-	if (in_compat_syscall()) {
-		if (copy_from_user(&hdr32, argp, sizeof(hdr32)))
-			return -EFAULT;
-
-		*hdr = (struct sg_io_hdr) {
-			.interface_id	 = hdr32.interface_id,
-			.dxfer_direction = hdr32.dxfer_direction,
-			.cmd_len	 = hdr32.cmd_len,
-			.mx_sb_len	 = hdr32.mx_sb_len,
-			.iovec_count	 = hdr32.iovec_count,
-			.dxfer_len	 = hdr32.dxfer_len,
-			.dxferp		 = compat_ptr(hdr32.dxferp),
-			.cmdp		 = compat_ptr(hdr32.cmdp),
-			.sbp		 = compat_ptr(hdr32.sbp),
-			.timeout	 = hdr32.timeout,
-			.flags		 = hdr32.flags,
-			.pack_id	 = hdr32.pack_id,
-			.usr_ptr	 = compat_ptr(hdr32.usr_ptr),
-			.status		 = hdr32.status,
-			.masked_status	 = hdr32.masked_status,
-			.msg_status	 = hdr32.msg_status,
-			.sb_len_wr	 = hdr32.sb_len_wr,
-			.host_status	 = hdr32.host_status,
-			.driver_status	 = hdr32.driver_status,
-			.resid		 = hdr32.resid,
-			.duration	 = hdr32.duration,
-			.info		 = hdr32.info,
-		};
-
-		return 0;
-	}
-#endif
-
-	if (copy_from_user(hdr, argp, sizeof(*hdr)))
-		return -EFAULT;
-
-	return 0;
-}
-EXPORT_SYMBOL(get_sg_io_hdr);
-
-#ifdef CONFIG_COMPAT
-struct compat_cdrom_generic_command {
-	unsigned char	cmd[CDROM_PACKET_SIZE];
-	compat_caddr_t	buffer;
-	compat_uint_t	buflen;
-	compat_int_t	stat;
-	compat_caddr_t	sense;
-	unsigned char	data_direction;
-	unsigned char	pad[3];
-	compat_int_t	quiet;
-	compat_int_t	timeout;
-	compat_caddr_t	unused;
-};
-#endif
-
-static int scsi_get_cdrom_generic_arg(struct cdrom_generic_command *cgc,
-				      const void __user *arg)
-{
-#ifdef CONFIG_COMPAT
-	if (in_compat_syscall()) {
-		struct compat_cdrom_generic_command cgc32;
-
-		if (copy_from_user(&cgc32, arg, sizeof(cgc32)))
-			return -EFAULT;
-
-		*cgc = (struct cdrom_generic_command) {
-			.buffer		= compat_ptr(cgc32.buffer),
-			.buflen		= cgc32.buflen,
-			.stat		= cgc32.stat,
-			.sense		= compat_ptr(cgc32.sense),
-			.data_direction	= cgc32.data_direction,
-			.quiet		= cgc32.quiet,
-			.timeout	= cgc32.timeout,
-			.unused		= compat_ptr(cgc32.unused),
-		};
-		memcpy(&cgc->cmd, &cgc32.cmd, CDROM_PACKET_SIZE);
-		return 0;
-	}
-#endif
-	if (copy_from_user(cgc, arg, sizeof(*cgc)))
-		return -EFAULT;
-
-	return 0;
-}
-
-static int scsi_put_cdrom_generic_arg(const struct cdrom_generic_command *cgc,
-				      void __user *arg)
-{
-#ifdef CONFIG_COMPAT
-	if (in_compat_syscall()) {
-		struct compat_cdrom_generic_command cgc32 = {
-			.buffer		= (uintptr_t)(cgc->buffer),
-			.buflen		= cgc->buflen,
-			.stat		= cgc->stat,
-			.sense		= (uintptr_t)(cgc->sense),
-			.data_direction	= cgc->data_direction,
-			.quiet		= cgc->quiet,
-			.timeout	= cgc->timeout,
-			.unused		= (uintptr_t)(cgc->unused),
-		};
-		memcpy(&cgc32.cmd, &cgc->cmd, CDROM_PACKET_SIZE);
-
-		if (copy_to_user(arg, &cgc32, sizeof(cgc32)))
-			return -EFAULT;
-
-		return 0;
-	}
-#endif
-	if (copy_to_user(arg, cgc, sizeof(*cgc)))
-		return -EFAULT;
-
-	return 0;
-}
-
-static int scsi_cdrom_send_packet(struct request_queue *q,
-				  struct gendisk *bd_disk,
-				  fmode_t mode, void __user *arg)
-{
-	struct cdrom_generic_command cgc;
-	struct sg_io_hdr hdr;
-	int err;
-
-	err = scsi_get_cdrom_generic_arg(&cgc, arg);
-	if (err)
-		return err;
-
-	cgc.timeout = clock_t_to_jiffies(cgc.timeout);
-	memset(&hdr, 0, sizeof(hdr));
-	hdr.interface_id = 'S';
-	hdr.cmd_len = sizeof(cgc.cmd);
-	hdr.dxfer_len = cgc.buflen;
-	switch (cgc.data_direction) {
-		case CGC_DATA_UNKNOWN:
-			hdr.dxfer_direction = SG_DXFER_UNKNOWN;
-			break;
-		case CGC_DATA_WRITE:
-			hdr.dxfer_direction = SG_DXFER_TO_DEV;
-			break;
-		case CGC_DATA_READ:
-			hdr.dxfer_direction = SG_DXFER_FROM_DEV;
-			break;
-		case CGC_DATA_NONE:
-			hdr.dxfer_direction = SG_DXFER_NONE;
-			break;
-		default:
-			return -EINVAL;
-	}
-
-	hdr.dxferp = cgc.buffer;
-	hdr.sbp = cgc.sense;
-	if (hdr.sbp)
-		hdr.mx_sb_len = sizeof(struct request_sense);
-	hdr.timeout = jiffies_to_msecs(cgc.timeout);
-	hdr.cmdp = ((struct cdrom_generic_command __user*) arg)->cmd;
-	hdr.cmd_len = sizeof(cgc.cmd);
-
-	err = sg_io(q, bd_disk, &hdr, mode);
-	if (err == -EFAULT)
-		return -EFAULT;
-
-	if (hdr.status)
-		return -EIO;
-
-	cgc.stat = err;
-	cgc.buflen = hdr.resid;
-	if (scsi_put_cdrom_generic_arg(&cgc, arg))
-		return -EFAULT;
-
-	return err;
-}
-
-int scsi_cmd_ioctl(struct request_queue *q, struct gendisk *bd_disk, fmode_t mode,
-		   unsigned int cmd, void __user *arg)
-{
-	int err;
-
-	if (!q)
-		return -ENXIO;
-
-	switch (cmd) {
-		/*
-		 * new sgv3 interface
-		 */
-		case SG_GET_VERSION_NUM:
-			err = sg_get_version(arg);
-			break;
-		case SCSI_IOCTL_GET_IDLUN:
-			err = scsi_get_idlun(q, arg);
-			break;
-		case SCSI_IOCTL_GET_BUS_NUMBER:
-			err = scsi_get_bus(q, arg);
-			break;
-		case SG_SET_TIMEOUT:
-			err = sg_set_timeout(q, arg);
-			break;
-		case SG_GET_TIMEOUT:
-			err = sg_get_timeout(q);
-			break;
-		case SG_GET_RESERVED_SIZE:
-			err = sg_get_reserved_size(q, arg);
-			break;
-		case SG_SET_RESERVED_SIZE:
-			err = sg_set_reserved_size(q, arg);
-			break;
-		case SG_EMULATED_HOST:
-			err = sg_emulated_host(q, arg);
-			break;
-		case SG_IO: {
-			struct sg_io_hdr hdr;
-
-			err = get_sg_io_hdr(&hdr, arg);
-			if (err)
-				break;
-			err = sg_io(q, bd_disk, &hdr, mode);
-			if (err == -EFAULT)
-				break;
-
-			if (put_sg_io_hdr(&hdr, arg))
-				err = -EFAULT;
-			break;
-		}
-		case CDROM_SEND_PACKET:
-			err = scsi_cdrom_send_packet(q, bd_disk, mode, arg);
-			break;
-
-		/*
-		 * old junk scsi send command ioctl
-		 */
-		case SCSI_IOCTL_SEND_COMMAND:
-			printk(KERN_WARNING "program %s is using a deprecated SCSI ioctl, please convert it to SG_IO\n", current->comm);
-			err = -EINVAL;
-			if (!arg)
-				break;
-
-			err = sg_scsi_ioctl(q, bd_disk, mode, arg);
-			break;
-		case CDROMCLOSETRAY:
-			err = blk_send_start_stop(q, bd_disk, 0x03);
-			break;
-		case CDROMEJECT:
-			err = blk_send_start_stop(q, bd_disk, 0x02);
-			break;
-		default:
-			err = -ENOTTY;
-	}
-
-	return err;
-}
-EXPORT_SYMBOL(scsi_cmd_ioctl);
-
-int scsi_verify_blk_ioctl(struct block_device *bd, unsigned int cmd)
-{
-	if (bd && !bdev_is_partition(bd))
-		return 0;
-
-	if (capable(CAP_SYS_RAWIO))
-		return 0;
-
-	return -ENOIOCTLCMD;
-}
-EXPORT_SYMBOL(scsi_verify_blk_ioctl);
-
-int scsi_cmd_blk_ioctl(struct block_device *bd, fmode_t mode,
-		       unsigned int cmd, void __user *arg)
-{
-	int ret;
-
-	ret = scsi_verify_blk_ioctl(bd, cmd);
-	if (ret < 0)
-		return ret;
-
-	return scsi_cmd_ioctl(bd->bd_disk->queue, bd->bd_disk, mode, cmd, arg);
-}
-EXPORT_SYMBOL(scsi_cmd_blk_ioctl);
-
-/**
- * scsi_req_init - initialize certain fields of a scsi_request structure
- * @req: Pointer to a scsi_request structure.
- * Initializes .__cmd[], .cmd, .cmd_len and .sense_len but no other members
- * of struct scsi_request.
- */
-void scsi_req_init(struct scsi_request *req)
-{
-	memset(req->__cmd, 0, sizeof(req->__cmd));
-	req->cmd = req->__cmd;
-	req->cmd_len = BLK_MAX_CDB;
-	req->sense_len = 0;
-}
-EXPORT_SYMBOL(scsi_req_init);
-
-static int __init blk_scsi_ioctl_init(void)
-{
-	blk_set_cmd_filter_defaults(&blk_default_cmd_filter);
-	return 0;
-}
-fs_initcall(blk_scsi_ioctl_init);
diff --git a/drivers/acpi/acpi_lpss.c b/drivers/acpi/acpi_lpss.c
index 894b7e6ae144..30b1f511c2af 100644
--- a/drivers/acpi/acpi_lpss.c
+++ b/drivers/acpi/acpi_lpss.c
@@ -385,7 +385,9 @@ static struct platform_device *lpss_clk_dev;
 
 static inline void lpt_register_clock_device(void)
 {
-	lpss_clk_dev = platform_device_register_simple("clk-lpt", -1, NULL, 0);
+	lpss_clk_dev = platform_device_register_simple("clk-lpss-atom",
+						       PLATFORM_DEVID_NONE,
+						       NULL, 0);
 }
 
 static int register_device_clock(struct acpi_device *adev,
@@ -434,8 +436,8 @@ static int register_device_clock(struct acpi_device *adev,
 		if (!clk_name)
 			return -ENOMEM;
 		clk = clk_register_fractional_divider(NULL, clk_name, parent,
-						      0, prv_base,
-						      1, 15, 16, 15, 0, NULL);
+						      CLK_FRAC_DIVIDER_POWER_OF_TWO_PS,
+						      prv_base, 1, 15, 16, 15, 0, NULL);
 		parent = clk_name;
 
 		clk_name = kasprintf(GFP_KERNEL, "%s-update", devname);
@@ -1337,7 +1339,7 @@ void __init acpi_lpss_init(void)
 	const struct x86_cpu_id *id;
 	int ret;
 
-	ret = lpt_clk_init();
+	ret = lpss_atom_clk_init();
 	if (ret)
 		return;
 
diff --git a/drivers/acpi/acpi_memhotplug.c b/drivers/acpi/acpi_memhotplug.c
index 8cc195c4c861..24f662d8bd39 100644
--- a/drivers/acpi/acpi_memhotplug.c
+++ b/drivers/acpi/acpi_memhotplug.c
@@ -54,6 +54,7 @@ struct acpi_memory_info {
 struct acpi_memory_device {
 	struct acpi_device *device;
 	struct list_head res_list;
+	int mgid;
 };
 
 static acpi_status
@@ -169,12 +170,33 @@ static void acpi_unbind_memory_blocks(struct acpi_memory_info *info)
 static int acpi_memory_enable_device(struct acpi_memory_device *mem_device)
 {
 	acpi_handle handle = mem_device->device->handle;
+	mhp_t mhp_flags = MHP_NID_IS_MGID;
 	int result, num_enabled = 0;
 	struct acpi_memory_info *info;
-	mhp_t mhp_flags = MHP_NONE;
-	int node;
+	u64 total_length = 0;
+	int node, mgid;
 
 	node = acpi_get_node(handle);
+
+	list_for_each_entry(info, &mem_device->res_list, list) {
+		if (!info->length)
+			continue;
+		/* We want a single node for the whole memory group */
+		if (node < 0)
+			node = memory_add_physaddr_to_nid(info->start_addr);
+		total_length += info->length;
+	}
+
+	if (!total_length) {
+		dev_err(&mem_device->device->dev, "device is empty\n");
+		return -EINVAL;
+	}
+
+	mgid = memory_group_register_static(node, PFN_UP(total_length));
+	if (mgid < 0)
+		return mgid;
+	mem_device->mgid = mgid;
+
 	/*
 	 * Tell the VM there is more memory here...
 	 * Note: Assume that this function returns zero on success
@@ -182,22 +204,16 @@ static int acpi_memory_enable_device(struct acpi_memory_device *mem_device)
 	 * (i.e. memory-hot-remove function)
 	 */
 	list_for_each_entry(info, &mem_device->res_list, list) {
-		if (info->enabled) { /* just sanity check...*/
-			num_enabled++;
-			continue;
-		}
 		/*
 		 * If the memory block size is zero, please ignore it.
 		 * Don't try to do the following memory hotplug flowchart.
 		 */
 		if (!info->length)
 			continue;
-		if (node < 0)
-			node = memory_add_physaddr_to_nid(info->start_addr);
 
 		if (mhp_supports_memmap_on_memory(info->length))
 			mhp_flags |= MHP_MEMMAP_ON_MEMORY;
-		result = __add_memory(node, info->start_addr, info->length,
+		result = __add_memory(mgid, info->start_addr, info->length,
 				      mhp_flags);
 
 		/*
@@ -239,19 +255,14 @@ static int acpi_memory_enable_device(struct acpi_memory_device *mem_device)
 
 static void acpi_memory_remove_memory(struct acpi_memory_device *mem_device)
 {
-	acpi_handle handle = mem_device->device->handle;
 	struct acpi_memory_info *info, *n;
-	int nid = acpi_get_node(handle);
 
 	list_for_each_entry_safe(info, n, &mem_device->res_list, list) {
 		if (!info->enabled)
 			continue;
 
-		if (nid == NUMA_NO_NODE)
-			nid = memory_add_physaddr_to_nid(info->start_addr);
-
 		acpi_unbind_memory_blocks(info);
-		__remove_memory(nid, info->start_addr, info->length);
+		__remove_memory(info->start_addr, info->length);
 		list_del(&info->list);
 		kfree(info);
 	}
@@ -262,6 +273,10 @@ static void acpi_memory_device_free(struct acpi_memory_device *mem_device)
 	if (!mem_device)
 		return;
 
+	/* In case we succeeded adding *some* memory, unregistering fails. */
+	if (mem_device->mgid >= 0)
+		memory_group_unregister(mem_device->mgid);
+
 	acpi_memory_free_device_resources(mem_device);
 	mem_device->device->driver_data = NULL;
 	kfree(mem_device);
@@ -282,6 +297,7 @@ static int acpi_memory_device_add(struct acpi_device *device,
 
 	INIT_LIST_HEAD(&mem_device->res_list);
 	mem_device->device = device;
+	mem_device->mgid = -1;
 	sprintf(acpi_device_name(device), "%s", ACPI_MEMORY_DEVICE_NAME);
 	sprintf(acpi_device_class(device), "%s", ACPI_MEMORY_DEVICE_CLASS);
 	device->driver_data = mem_device;
diff --git a/drivers/acpi/cppc_acpi.c b/drivers/acpi/cppc_acpi.c
index a4d4eebba1da..bd482108310c 100644
--- a/drivers/acpi/cppc_acpi.c
+++ b/drivers/acpi/cppc_acpi.c
@@ -1008,23 +1008,14 @@ static int cpc_write(int cpu, struct cpc_register_resource *reg_res, u64 val)
 	return ret_val;
 }
 
-/**
- * cppc_get_desired_perf - Get the value of desired performance register.
- * @cpunum: CPU from which to get desired performance.
- * @desired_perf: address of a variable to store the returned desired performance
- *
- * Return: 0 for success, -EIO otherwise.
- */
-int cppc_get_desired_perf(int cpunum, u64 *desired_perf)
+static int cppc_get_perf(int cpunum, enum cppc_regs reg_idx, u64 *perf)
 {
 	struct cpc_desc *cpc_desc = per_cpu(cpc_desc_ptr, cpunum);
-	int pcc_ss_id = per_cpu(cpu_pcc_subspace_idx, cpunum);
-	struct cpc_register_resource *desired_reg;
-	struct cppc_pcc_data *pcc_ss_data = NULL;
-
-	desired_reg = &cpc_desc->cpc_regs[DESIRED_PERF];
+	struct cpc_register_resource *reg = &cpc_desc->cpc_regs[reg_idx];
 
-	if (CPC_IN_PCC(desired_reg)) {
+	if (CPC_IN_PCC(reg)) {
+		int pcc_ss_id = per_cpu(cpu_pcc_subspace_idx, cpunum);
+		struct cppc_pcc_data *pcc_ss_data = NULL;
 		int ret = 0;
 
 		if (pcc_ss_id < 0)
@@ -1035,7 +1026,7 @@ int cppc_get_desired_perf(int cpunum, u64 *desired_perf)
 		down_write(&pcc_ss_data->pcc_lock);
 
 		if (send_pcc_cmd(pcc_ss_id, CMD_READ) >= 0)
-			cpc_read(cpunum, desired_reg, desired_perf);
+			cpc_read(cpunum, reg, perf);
 		else
 			ret = -EIO;
 
@@ -1044,13 +1035,37 @@ int cppc_get_desired_perf(int cpunum, u64 *desired_perf)
 		return ret;
 	}
 
-	cpc_read(cpunum, desired_reg, desired_perf);
+	cpc_read(cpunum, reg, perf);
 
 	return 0;
 }
+
+/**
+ * cppc_get_desired_perf - Get the desired performance register value.
+ * @cpunum: CPU from which to get desired performance.
+ * @desired_perf: Return address.
+ *
+ * Return: 0 for success, -EIO otherwise.
+ */
+int cppc_get_desired_perf(int cpunum, u64 *desired_perf)
+{
+	return cppc_get_perf(cpunum, DESIRED_PERF, desired_perf);
+}
 EXPORT_SYMBOL_GPL(cppc_get_desired_perf);
 
 /**
+ * cppc_get_nominal_perf - Get the nominal performance register value.
+ * @cpunum: CPU from which to get nominal performance.
+ * @nominal_perf: Return address.
+ *
+ * Return: 0 for success, -EIO otherwise.
+ */
+int cppc_get_nominal_perf(int cpunum, u64 *nominal_perf)
+{
+	return cppc_get_perf(cpunum, NOMINAL_PERF, nominal_perf);
+}
+
+/**
  * cppc_get_perf_caps - Get a CPU's performance capabilities.
  * @cpunum: CPU from which to get capabilities info.
  * @perf_caps: ptr to cppc_perf_caps. See cppc_acpi.h
diff --git a/drivers/acpi/prmt.c b/drivers/acpi/prmt.c
index 1f6007abcf18..89c22bc55057 100644
--- a/drivers/acpi/prmt.c
+++ b/drivers/acpi/prmt.c
@@ -288,10 +288,18 @@ invalid_guid:
 
 void __init init_prmt(void)
 {
+	struct acpi_table_header *tbl;
 	acpi_status status;
-	int mc = acpi_table_parse_entries(ACPI_SIG_PRMT, sizeof(struct acpi_table_prmt) +
+	int mc;
+
+	status = acpi_get_table(ACPI_SIG_PRMT, 0, &tbl);
+	if (ACPI_FAILURE(status))
+		return;
+
+	mc = acpi_table_parse_entries(ACPI_SIG_PRMT, sizeof(struct acpi_table_prmt) +
 					  sizeof (struct acpi_table_prmt_header),
 					  0, acpi_parse_prmt, 0);
+	acpi_put_table(tbl);
 	/*
 	 * Return immediately if PRMT table is not present or no PRM module found.
 	 */
diff --git a/drivers/acpi/scan.c b/drivers/acpi/scan.c
index b24513ec3fae..5b54c80b9d32 100644
--- a/drivers/acpi/scan.c
+++ b/drivers/acpi/scan.c
@@ -16,7 +16,6 @@
 #include <linux/signal.h>
 #include <linux/kthread.h>
 #include <linux/dmi.h>
-#include <linux/nls.h>
 #include <linux/dma-map-ops.h>
 #include <linux/platform_data/x86/apple.h>
 #include <linux/pgtable.h>
diff --git a/drivers/acpi/tables.c b/drivers/acpi/tables.c
index a37a1532a575..f9383736fa0f 100644
--- a/drivers/acpi/tables.c
+++ b/drivers/acpi/tables.c
@@ -583,8 +583,8 @@ void __init acpi_table_upgrade(void)
 	}
 
 	acpi_tables_addr =
-		memblock_find_in_range(0, ACPI_TABLE_UPGRADE_MAX_PHYS,
-				       all_tables_size, PAGE_SIZE);
+		memblock_phys_alloc_range(all_tables_size, PAGE_SIZE,
+					  0, ACPI_TABLE_UPGRADE_MAX_PHYS);
 	if (!acpi_tables_addr) {
 		WARN_ON(1);
 		return;
@@ -599,7 +599,6 @@ void __init acpi_table_upgrade(void)
 	 * Both memblock_reserve and e820__range_add (via arch_reserve_mem_area)
 	 * works fine.
 	 */
-	memblock_reserve(acpi_tables_addr, all_tables_size);
 	arch_reserve_mem_area(acpi_tables_addr, all_tables_size);
 
 	/*
diff --git a/drivers/acpi/x86/s2idle.c b/drivers/acpi/x86/s2idle.c
index 3a308461246a..bd92b549fd5a 100644
--- a/drivers/acpi/x86/s2idle.c
+++ b/drivers/acpi/x86/s2idle.c
@@ -449,25 +449,30 @@ int acpi_s2idle_prepare_late(void)
 	if (pm_debug_messages_on)
 		lpi_check_constraints();
 
-	if (lps0_dsm_func_mask_microsoft > 0) {
+	/* Screen off */
+	if (lps0_dsm_func_mask > 0)
+		acpi_sleep_run_lps0_dsm(acpi_s2idle_vendor_amd() ?
+					ACPI_LPS0_SCREEN_OFF_AMD :
+					ACPI_LPS0_SCREEN_OFF,
+					lps0_dsm_func_mask, lps0_dsm_guid);
+
+	if (lps0_dsm_func_mask_microsoft > 0)
 		acpi_sleep_run_lps0_dsm(ACPI_LPS0_SCREEN_OFF,
 				lps0_dsm_func_mask_microsoft, lps0_dsm_guid_microsoft);
-		acpi_sleep_run_lps0_dsm(ACPI_LPS0_MS_ENTRY,
-				lps0_dsm_func_mask_microsoft, lps0_dsm_guid_microsoft);
+
+	/* LPS0 entry */
+	if (lps0_dsm_func_mask > 0)
+		acpi_sleep_run_lps0_dsm(acpi_s2idle_vendor_amd() ?
+					ACPI_LPS0_ENTRY_AMD :
+					ACPI_LPS0_ENTRY,
+					lps0_dsm_func_mask, lps0_dsm_guid);
+	if (lps0_dsm_func_mask_microsoft > 0) {
 		acpi_sleep_run_lps0_dsm(ACPI_LPS0_ENTRY,
 				lps0_dsm_func_mask_microsoft, lps0_dsm_guid_microsoft);
-	} else if (acpi_s2idle_vendor_amd()) {
-		acpi_sleep_run_lps0_dsm(ACPI_LPS0_SCREEN_OFF_AMD,
-				lps0_dsm_func_mask, lps0_dsm_guid);
-		acpi_sleep_run_lps0_dsm(ACPI_LPS0_ENTRY_AMD,
-				lps0_dsm_func_mask, lps0_dsm_guid);
-	} else {
-		acpi_sleep_run_lps0_dsm(ACPI_LPS0_SCREEN_OFF,
-				lps0_dsm_func_mask, lps0_dsm_guid);
-		acpi_sleep_run_lps0_dsm(ACPI_LPS0_ENTRY,
-				lps0_dsm_func_mask, lps0_dsm_guid);
+		/* modern standby entry */
+		acpi_sleep_run_lps0_dsm(ACPI_LPS0_MS_ENTRY,
+				lps0_dsm_func_mask_microsoft, lps0_dsm_guid_microsoft);
 	}
-
 	return 0;
 }
 
@@ -476,24 +481,30 @@ void acpi_s2idle_restore_early(void)
 	if (!lps0_device_handle || sleep_no_lps0)
 		return;
 
-	if (lps0_dsm_func_mask_microsoft > 0) {
-		acpi_sleep_run_lps0_dsm(ACPI_LPS0_EXIT,
-				lps0_dsm_func_mask_microsoft, lps0_dsm_guid_microsoft);
+	/* Modern standby exit */
+	if (lps0_dsm_func_mask_microsoft > 0)
 		acpi_sleep_run_lps0_dsm(ACPI_LPS0_MS_EXIT,
 				lps0_dsm_func_mask_microsoft, lps0_dsm_guid_microsoft);
-		acpi_sleep_run_lps0_dsm(ACPI_LPS0_SCREEN_ON,
-				lps0_dsm_func_mask_microsoft, lps0_dsm_guid_microsoft);
-	} else if (acpi_s2idle_vendor_amd()) {
-		acpi_sleep_run_lps0_dsm(ACPI_LPS0_EXIT_AMD,
-				lps0_dsm_func_mask, lps0_dsm_guid);
-		acpi_sleep_run_lps0_dsm(ACPI_LPS0_SCREEN_ON_AMD,
-				lps0_dsm_func_mask, lps0_dsm_guid);
-	} else {
+
+	/* LPS0 exit */
+	if (lps0_dsm_func_mask > 0)
+		acpi_sleep_run_lps0_dsm(acpi_s2idle_vendor_amd() ?
+					ACPI_LPS0_EXIT_AMD :
+					ACPI_LPS0_EXIT,
+					lps0_dsm_func_mask, lps0_dsm_guid);
+	if (lps0_dsm_func_mask_microsoft > 0)
 		acpi_sleep_run_lps0_dsm(ACPI_LPS0_EXIT,
-				lps0_dsm_func_mask, lps0_dsm_guid);
+				lps0_dsm_func_mask_microsoft, lps0_dsm_guid_microsoft);
+
+	/* Screen on */
+	if (lps0_dsm_func_mask_microsoft > 0)
 		acpi_sleep_run_lps0_dsm(ACPI_LPS0_SCREEN_ON,
-				lps0_dsm_func_mask, lps0_dsm_guid);
-	}
+				lps0_dsm_func_mask_microsoft, lps0_dsm_guid_microsoft);
+	if (lps0_dsm_func_mask > 0)
+		acpi_sleep_run_lps0_dsm(acpi_s2idle_vendor_amd() ?
+					ACPI_LPS0_SCREEN_ON_AMD :
+					ACPI_LPS0_SCREEN_ON,
+					lps0_dsm_func_mask, lps0_dsm_guid);
 }
 
 static const struct platform_s2idle_ops acpi_s2idle_ops_lps0 = {
diff --git a/drivers/ata/libata-core.c b/drivers/ata/libata-core.c
index b8459c54f739..eed65311b5d1 100644
--- a/drivers/ata/libata-core.c
+++ b/drivers/ata/libata-core.c
@@ -2186,6 +2186,25 @@ not_supported:
 	dev->flags &= ~ATA_DFLAG_NCQ_PRIO;
 }
 
+static bool ata_dev_check_adapter(struct ata_device *dev,
+				  unsigned short vendor_id)
+{
+	struct pci_dev *pcidev = NULL;
+	struct device *parent_dev = NULL;
+
+	for (parent_dev = dev->tdev.parent; parent_dev != NULL;
+	     parent_dev = parent_dev->parent) {
+		if (dev_is_pci(parent_dev)) {
+			pcidev = to_pci_dev(parent_dev);
+			if (pcidev->vendor == vendor_id)
+				return true;
+			break;
+		}
+	}
+
+	return false;
+}
+
 static int ata_dev_config_ncq(struct ata_device *dev,
 			       char *desc, size_t desc_sz)
 {
@@ -2204,6 +2223,13 @@ static int ata_dev_config_ncq(struct ata_device *dev,
 		snprintf(desc, desc_sz, "NCQ (not used)");
 		return 0;
 	}
+
+	if (dev->horkage & ATA_HORKAGE_NO_NCQ_ON_ATI &&
+	    ata_dev_check_adapter(dev, PCI_VENDOR_ID_ATI)) {
+		snprintf(desc, desc_sz, "NCQ (not used)");
+		return 0;
+	}
+
 	if (ap->flags & ATA_FLAG_NCQ) {
 		hdepth = min(ap->scsi_host->can_queue, ATA_MAX_QUEUE);
 		dev->flags |= ATA_DFLAG_NCQ;
@@ -3970,6 +3996,12 @@ static const struct ata_blacklist_entry ata_device_blacklist [] = {
 						ATA_HORKAGE_ZERO_AFTER_TRIM, },
 	{ "Samsung SSD 850*",		NULL,	ATA_HORKAGE_NO_NCQ_TRIM |
 						ATA_HORKAGE_ZERO_AFTER_TRIM, },
+	{ "Samsung SSD 860*",		NULL,	ATA_HORKAGE_NO_NCQ_TRIM |
+						ATA_HORKAGE_ZERO_AFTER_TRIM |
+						ATA_HORKAGE_NO_NCQ_ON_ATI, },
+	{ "Samsung SSD 870*",		NULL,	ATA_HORKAGE_NO_NCQ_TRIM |
+						ATA_HORKAGE_ZERO_AFTER_TRIM |
+						ATA_HORKAGE_NO_NCQ_ON_ATI, },
 	{ "FCCT*M500*",			NULL,	ATA_HORKAGE_NO_NCQ_TRIM |
 						ATA_HORKAGE_ZERO_AFTER_TRIM, },
 
@@ -6124,6 +6156,8 @@ static int __init ata_parse_force_one(char **cur,
 		{ "ncq",	.horkage_off	= ATA_HORKAGE_NONCQ },
 		{ "noncqtrim",	.horkage_on	= ATA_HORKAGE_NO_NCQ_TRIM },
 		{ "ncqtrim",	.horkage_off	= ATA_HORKAGE_NO_NCQ_TRIM },
+		{ "noncqati",	.horkage_on	= ATA_HORKAGE_NO_NCQ_ON_ATI },
+		{ "ncqati",	.horkage_off	= ATA_HORKAGE_NO_NCQ_ON_ATI },
 		{ "dump_id",	.horkage_on	= ATA_HORKAGE_DUMP_ID },
 		{ "pio0",	.xfer_mask	= 1 << (ATA_SHIFT_PIO + 0) },
 		{ "pio1",	.xfer_mask	= 1 << (ATA_SHIFT_PIO + 1) },
diff --git a/drivers/ata/libata-eh.c b/drivers/ata/libata-eh.c
index bb3637762985..bf9c4b6c5c3d 100644
--- a/drivers/ata/libata-eh.c
+++ b/drivers/ata/libata-eh.c
@@ -912,7 +912,7 @@ void ata_qc_schedule_eh(struct ata_queued_cmd *qc)
 	 * Note that ATA_QCFLAG_FAILED is unconditionally set after
 	 * this function completes.
 	 */
-	blk_abort_request(qc->scsicmd->request);
+	blk_abort_request(scsi_cmd_to_rq(qc->scsicmd));
 }
 
 /**
@@ -1893,8 +1893,7 @@ static inline int ata_eh_worth_retry(struct ata_queued_cmd *qc)
  */
 static inline bool ata_eh_quiet(struct ata_queued_cmd *qc)
 {
-	if (qc->scsicmd &&
-	    qc->scsicmd->request->rq_flags & RQF_QUIET)
+	if (qc->scsicmd && scsi_cmd_to_rq(qc->scsicmd)->rq_flags & RQF_QUIET)
 		qc->flags |= ATA_QCFLAG_QUIET;
 	return qc->flags & ATA_QCFLAG_QUIET;
 }
diff --git a/drivers/ata/libata-scsi.c b/drivers/ata/libata-scsi.c
index 0b7b4624e4df..1fb4611f7eeb 100644
--- a/drivers/ata/libata-scsi.c
+++ b/drivers/ata/libata-scsi.c
@@ -631,7 +631,7 @@ static struct ata_queued_cmd *ata_scsi_qc_new(struct ata_device *dev,
 {
 	struct ata_queued_cmd *qc;
 
-	qc = ata_qc_new_init(dev, cmd->request->tag);
+	qc = ata_qc_new_init(dev, scsi_cmd_to_rq(cmd)->tag);
 	if (qc) {
 		qc->scsicmd = cmd;
 		qc->scsidone = cmd->scsi_done;
@@ -639,7 +639,7 @@ static struct ata_queued_cmd *ata_scsi_qc_new(struct ata_device *dev,
 		qc->sg = scsi_sglist(cmd);
 		qc->n_elem = scsi_sg_count(cmd);
 
-		if (cmd->request->rq_flags & RQF_QUIET)
+		if (scsi_cmd_to_rq(cmd)->rq_flags & RQF_QUIET)
 			qc->flags |= ATA_QCFLAG_QUIET;
 	} else {
 		cmd->result = (DID_OK << 16) | SAM_STAT_TASK_SET_FULL;
@@ -1496,7 +1496,7 @@ nothing_to_do:
 
 static bool ata_check_nblocks(struct scsi_cmnd *scmd, u32 n_blocks)
 {
-	struct request *rq = scmd->request;
+	struct request *rq = scsi_cmd_to_rq(scmd);
 	u32 req_blocks;
 
 	if (!blk_rq_is_passthrough(rq))
@@ -1531,7 +1531,7 @@ static unsigned int ata_scsi_rw_xlat(struct ata_queued_cmd *qc)
 {
 	struct scsi_cmnd *scmd = qc->scsicmd;
 	const u8 *cdb = scmd->cmnd;
-	struct request *rq = scmd->request;
+	struct request *rq = scsi_cmd_to_rq(scmd);
 	int class = IOPRIO_PRIO_CLASS(req_get_ioprio(rq));
 	unsigned int tf_flags = 0;
 	u64 block;
@@ -3139,7 +3139,7 @@ static unsigned int ata_scsi_write_same_xlat(struct ata_queued_cmd *qc)
 	 * as it modifies the DATA OUT buffer, which would corrupt user
 	 * memory for SG_IO commands.
 	 */
-	if (unlikely(blk_rq_is_passthrough(scmd->request)))
+	if (unlikely(blk_rq_is_passthrough(scsi_cmd_to_rq(scmd))))
 		goto invalid_opcode;
 
 	if (unlikely(scmd->cmd_len < 16)) {
diff --git a/drivers/ata/pata_falcon.c b/drivers/ata/pata_falcon.c
index 9d0dd8f4c21c..121635aa8c00 100644
--- a/drivers/ata/pata_falcon.c
+++ b/drivers/ata/pata_falcon.c
@@ -48,8 +48,8 @@ static unsigned int pata_falcon_data_xfer(struct ata_queued_cmd *qc,
 	struct scsi_cmnd *cmd = qc->scsicmd;
 	bool swap = 1;
 
-	if (dev->class == ATA_DEV_ATA && cmd && cmd->request &&
-	    !blk_rq_is_passthrough(cmd->request))
+	if (dev->class == ATA_DEV_ATA && cmd &&
+	    !blk_rq_is_passthrough(scsi_cmd_to_rq(cmd)))
 		swap = 0;
 
 	/* Transfer multiple of 2 bytes */
diff --git a/drivers/auxdisplay/cfag12864b.c b/drivers/auxdisplay/cfag12864b.c
index fd430e6866a1..6526aa51fb1d 100644
--- a/drivers/auxdisplay/cfag12864b.c
+++ b/drivers/auxdisplay/cfag12864b.c
@@ -33,7 +33,7 @@
  */
 
 static unsigned int cfag12864b_rate = CONFIG_CFAG12864B_RATE;
-module_param(cfag12864b_rate, uint, S_IRUGO);
+module_param(cfag12864b_rate, uint, 0444);
 MODULE_PARM_DESC(cfag12864b_rate,
 	"Refresh rate (hertz)");
 
diff --git a/drivers/auxdisplay/charlcd.c b/drivers/auxdisplay/charlcd.c
index 24fd6f369ebe..304accde365c 100644
--- a/drivers/auxdisplay/charlcd.c
+++ b/drivers/auxdisplay/charlcd.c
@@ -637,9 +637,7 @@ static int panel_notify_sys(struct notifier_block *this, unsigned long code,
 }
 
 static struct notifier_block panel_notifier = {
-	panel_notify_sys,
-	NULL,
-	0
+	.notifier_call = panel_notify_sys,
 };
 
 int charlcd_register(struct charlcd *lcd)
diff --git a/drivers/auxdisplay/hd44780.c b/drivers/auxdisplay/hd44780.c
index 2e5e7c993933..8b2a0eb3f32a 100644
--- a/drivers/auxdisplay/hd44780.c
+++ b/drivers/auxdisplay/hd44780.c
@@ -323,8 +323,8 @@ static int hd44780_remove(struct platform_device *pdev)
 {
 	struct charlcd *lcd = platform_get_drvdata(pdev);
 
-	kfree(lcd->drvdata);
 	charlcd_unregister(lcd);
+	kfree(lcd->drvdata);
 
 	kfree(lcd);
 	return 0;
diff --git a/drivers/auxdisplay/ks0108.c b/drivers/auxdisplay/ks0108.c
index 03c95ad4216c..e871b94a1911 100644
--- a/drivers/auxdisplay/ks0108.c
+++ b/drivers/auxdisplay/ks0108.c
@@ -28,11 +28,11 @@
  */
 
 static unsigned int ks0108_port = CONFIG_KS0108_PORT;
-module_param(ks0108_port, uint, S_IRUGO);
+module_param(ks0108_port, uint, 0444);
 MODULE_PARM_DESC(ks0108_port, "Parallel port where the LCD is connected");
 
 static unsigned int ks0108_delay = CONFIG_KS0108_DELAY;
-module_param(ks0108_delay, uint, S_IRUGO);
+module_param(ks0108_delay, uint, 0444);
 MODULE_PARM_DESC(ks0108_delay, "Delay between each control writing (microseconds)");
 
 /*
@@ -167,19 +167,7 @@ static struct parport_driver ks0108_parport_driver = {
 	.detach = ks0108_parport_detach,
 	.devmodel = true,
 };
-
-static int __init ks0108_init(void)
-{
-	return parport_register_driver(&ks0108_parport_driver);
-}
-
-static void __exit ks0108_exit(void)
-{
-	parport_unregister_driver(&ks0108_parport_driver);
-}
-
-module_init(ks0108_init);
-module_exit(ks0108_exit);
+module_parport_driver(ks0108_parport_driver);
 
 MODULE_LICENSE("GPL v2");
 MODULE_AUTHOR("Miguel Ojeda <ojeda@kernel.org>");
diff --git a/drivers/base/arch_numa.c b/drivers/base/arch_numa.c
index 4cc4e117727d..46c503486e96 100644
--- a/drivers/base/arch_numa.c
+++ b/drivers/base/arch_numa.c
@@ -279,13 +279,10 @@ static int __init numa_alloc_distance(void)
 	int i, j;
 
 	size = nr_node_ids * nr_node_ids * sizeof(numa_distance[0]);
-	phys = memblock_find_in_range(0, PFN_PHYS(max_pfn),
-				      size, PAGE_SIZE);
+	phys = memblock_phys_alloc_range(size, PAGE_SIZE, 0, PFN_PHYS(max_pfn));
 	if (WARN_ON(!phys))
 		return -ENOMEM;
 
-	memblock_reserve(phys, size);
-
 	numa_distance = __va(phys);
 	numa_distance_cnt = nr_node_ids;
 
diff --git a/drivers/base/arch_topology.c b/drivers/base/arch_topology.c
index 921312a8d957..43407665918f 100644
--- a/drivers/base/arch_topology.c
+++ b/drivers/base/arch_topology.c
@@ -149,6 +149,7 @@ void topology_set_freq_scale(const struct cpumask *cpus, unsigned long cur_freq,
 }
 
 DEFINE_PER_CPU(unsigned long, cpu_scale) = SCHED_CAPACITY_SCALE;
+EXPORT_PER_CPU_SYMBOL_GPL(cpu_scale);
 
 void topology_set_cpu_scale(unsigned int cpu, unsigned long capacity)
 {
@@ -165,6 +166,7 @@ void topology_set_thermal_pressure(const struct cpumask *cpus,
 	for_each_cpu(cpu, cpus)
 		WRITE_ONCE(per_cpu(thermal_pressure, cpu), th_pressure);
 }
+EXPORT_SYMBOL_GPL(topology_set_thermal_pressure);
 
 static ssize_t cpu_capacity_show(struct device *dev,
 				 struct device_attribute *attr,
diff --git a/drivers/base/core.c b/drivers/base/core.c
index 280425409b40..e65dd803a453 100644
--- a/drivers/base/core.c
+++ b/drivers/base/core.c
@@ -27,6 +27,7 @@
 #include <linux/netdevice.h>
 #include <linux/sched/signal.h>
 #include <linux/sched/mm.h>
+#include <linux/swiotlb.h>
 #include <linux/sysfs.h>
 #include <linux/dma-map-ops.h> /* for dma_default_coherent */
 
@@ -886,6 +887,8 @@ static void device_link_put_kref(struct device_link *link)
 {
 	if (link->flags & DL_FLAG_STATELESS)
 		kref_put(&link->kref, __device_link_del);
+	else if (!device_is_registered(link->consumer))
+		__device_link_del(&link->kref);
 	else
 		WARN(1, "Unable to drop a managed device link reference\n");
 }
@@ -2849,6 +2852,9 @@ void device_initialize(struct device *dev)
     defined(CONFIG_ARCH_HAS_SYNC_DMA_FOR_CPU_ALL)
 	dev->dma_coherent = dma_default_coherent;
 #endif
+#ifdef CONFIG_SWIOTLB
+	dev->dma_io_tlb_mem = &io_tlb_default_mem;
+#endif
 }
 EXPORT_SYMBOL_GPL(device_initialize);
 
diff --git a/drivers/base/memory.c b/drivers/base/memory.c
index aa31a21f33d7..365cd4a7f239 100644
--- a/drivers/base/memory.c
+++ b/drivers/base/memory.c
@@ -82,6 +82,12 @@ static struct bus_type memory_subsys = {
  */
 static DEFINE_XARRAY(memory_blocks);
 
+/*
+ * Memory groups, indexed by memory group id (mgid).
+ */
+static DEFINE_XARRAY_FLAGS(memory_groups, XA_FLAGS_ALLOC);
+#define MEMORY_GROUP_MARK_DYNAMIC	XA_MARK_1
+
 static BLOCKING_NOTIFIER_HEAD(memory_chain);
 
 int register_memory_notifier(struct notifier_block *nb)
@@ -177,7 +183,8 @@ static int memory_block_online(struct memory_block *mem)
 	struct zone *zone;
 	int ret;
 
-	zone = zone_for_pfn_range(mem->online_type, mem->nid, start_pfn, nr_pages);
+	zone = zone_for_pfn_range(mem->online_type, mem->nid, mem->group,
+				  start_pfn, nr_pages);
 
 	/*
 	 * Although vmemmap pages have a different lifecycle than the pages
@@ -193,7 +200,7 @@ static int memory_block_online(struct memory_block *mem)
 	}
 
 	ret = online_pages(start_pfn + nr_vmemmap_pages,
-			   nr_pages - nr_vmemmap_pages, zone);
+			   nr_pages - nr_vmemmap_pages, zone, mem->group);
 	if (ret) {
 		if (nr_vmemmap_pages)
 			mhp_deinit_memmap_on_memory(start_pfn, nr_vmemmap_pages);
@@ -205,7 +212,8 @@ static int memory_block_online(struct memory_block *mem)
 	 * now already properly populated.
 	 */
 	if (nr_vmemmap_pages)
-		adjust_present_page_count(zone, nr_vmemmap_pages);
+		adjust_present_page_count(pfn_to_page(start_pfn), mem->group,
+					  nr_vmemmap_pages);
 
 	return ret;
 }
@@ -215,24 +223,23 @@ static int memory_block_offline(struct memory_block *mem)
 	unsigned long start_pfn = section_nr_to_pfn(mem->start_section_nr);
 	unsigned long nr_pages = PAGES_PER_SECTION * sections_per_block;
 	unsigned long nr_vmemmap_pages = mem->nr_vmemmap_pages;
-	struct zone *zone;
 	int ret;
 
 	/*
 	 * Unaccount before offlining, such that unpopulated zone and kthreads
 	 * can properly be torn down in offline_pages().
 	 */
-	if (nr_vmemmap_pages) {
-		zone = page_zone(pfn_to_page(start_pfn));
-		adjust_present_page_count(zone, -nr_vmemmap_pages);
-	}
+	if (nr_vmemmap_pages)
+		adjust_present_page_count(pfn_to_page(start_pfn), mem->group,
+					  -nr_vmemmap_pages);
 
 	ret = offline_pages(start_pfn + nr_vmemmap_pages,
-			    nr_pages - nr_vmemmap_pages);
+			    nr_pages - nr_vmemmap_pages, mem->group);
 	if (ret) {
 		/* offline_pages() failed. Account back. */
 		if (nr_vmemmap_pages)
-			adjust_present_page_count(zone, nr_vmemmap_pages);
+			adjust_present_page_count(pfn_to_page(start_pfn),
+						  mem->group, nr_vmemmap_pages);
 		return ret;
 	}
 
@@ -374,12 +381,13 @@ static ssize_t phys_device_show(struct device *dev,
 
 #ifdef CONFIG_MEMORY_HOTREMOVE
 static int print_allowed_zone(char *buf, int len, int nid,
+			      struct memory_group *group,
 			      unsigned long start_pfn, unsigned long nr_pages,
 			      int online_type, struct zone *default_zone)
 {
 	struct zone *zone;
 
-	zone = zone_for_pfn_range(online_type, nid, start_pfn, nr_pages);
+	zone = zone_for_pfn_range(online_type, nid, group, start_pfn, nr_pages);
 	if (zone == default_zone)
 		return 0;
 
@@ -392,9 +400,10 @@ static ssize_t valid_zones_show(struct device *dev,
 	struct memory_block *mem = to_memory_block(dev);
 	unsigned long start_pfn = section_nr_to_pfn(mem->start_section_nr);
 	unsigned long nr_pages = PAGES_PER_SECTION * sections_per_block;
+	struct memory_group *group = mem->group;
 	struct zone *default_zone;
+	int nid = mem->nid;
 	int len = 0;
-	int nid;
 
 	/*
 	 * Check the existing zone. Make sure that we do that only on the
@@ -413,14 +422,13 @@ static ssize_t valid_zones_show(struct device *dev,
 		goto out;
 	}
 
-	nid = mem->nid;
-	default_zone = zone_for_pfn_range(MMOP_ONLINE, nid, start_pfn,
-					  nr_pages);
+	default_zone = zone_for_pfn_range(MMOP_ONLINE, nid, group,
+					  start_pfn, nr_pages);
 
 	len += sysfs_emit_at(buf, len, "%s", default_zone->name);
-	len += print_allowed_zone(buf, len, nid, start_pfn, nr_pages,
+	len += print_allowed_zone(buf, len, nid, group, start_pfn, nr_pages,
 				  MMOP_ONLINE_KERNEL, default_zone);
-	len += print_allowed_zone(buf, len, nid, start_pfn, nr_pages,
+	len += print_allowed_zone(buf, len, nid, group, start_pfn, nr_pages,
 				  MMOP_ONLINE_MOVABLE, default_zone);
 out:
 	len += sysfs_emit_at(buf, len, "\n");
@@ -578,9 +586,9 @@ static struct memory_block *find_memory_block_by_id(unsigned long block_id)
 /*
  * Called under device_hotplug_lock.
  */
-struct memory_block *find_memory_block(struct mem_section *section)
+struct memory_block *find_memory_block(unsigned long section_nr)
 {
-	unsigned long block_id = memory_block_id(__section_nr(section));
+	unsigned long block_id = memory_block_id(section_nr);
 
 	return find_memory_block_by_id(block_id);
 }
@@ -634,7 +642,8 @@ int register_memory(struct memory_block *memory)
 }
 
 static int init_memory_block(unsigned long block_id, unsigned long state,
-			     unsigned long nr_vmemmap_pages)
+			     unsigned long nr_vmemmap_pages,
+			     struct memory_group *group)
 {
 	struct memory_block *mem;
 	int ret = 0;
@@ -652,6 +661,12 @@ static int init_memory_block(unsigned long block_id, unsigned long state,
 	mem->state = state;
 	mem->nid = NUMA_NO_NODE;
 	mem->nr_vmemmap_pages = nr_vmemmap_pages;
+	INIT_LIST_HEAD(&mem->group_next);
+
+	if (group) {
+		mem->group = group;
+		list_add(&mem->group_next, &group->memory_blocks);
+	}
 
 	ret = register_memory(mem);
 
@@ -671,7 +686,7 @@ static int add_memory_block(unsigned long base_section_nr)
 	if (section_count == 0)
 		return 0;
 	return init_memory_block(memory_block_id(base_section_nr),
-				 MEM_ONLINE, 0);
+				 MEM_ONLINE, 0,  NULL);
 }
 
 static void unregister_memory(struct memory_block *memory)
@@ -681,6 +696,11 @@ static void unregister_memory(struct memory_block *memory)
 
 	WARN_ON(xa_erase(&memory_blocks, memory->dev.id) == NULL);
 
+	if (memory->group) {
+		list_del(&memory->group_next);
+		memory->group = NULL;
+	}
+
 	/* drop the ref. we got via find_memory_block() */
 	put_device(&memory->dev);
 	device_unregister(&memory->dev);
@@ -694,7 +714,8 @@ static void unregister_memory(struct memory_block *memory)
  * Called under device_hotplug_lock.
  */
 int create_memory_block_devices(unsigned long start, unsigned long size,
-				unsigned long vmemmap_pages)
+				unsigned long vmemmap_pages,
+				struct memory_group *group)
 {
 	const unsigned long start_block_id = pfn_to_block_id(PFN_DOWN(start));
 	unsigned long end_block_id = pfn_to_block_id(PFN_DOWN(start + size));
@@ -707,7 +728,8 @@ int create_memory_block_devices(unsigned long start, unsigned long size,
 		return -EINVAL;
 
 	for (block_id = start_block_id; block_id != end_block_id; block_id++) {
-		ret = init_memory_block(block_id, MEM_OFFLINE, vmemmap_pages);
+		ret = init_memory_block(block_id, MEM_OFFLINE, vmemmap_pages,
+					group);
 		if (ret)
 			break;
 	}
@@ -891,3 +913,164 @@ int for_each_memory_block(void *arg, walk_memory_blocks_func_t func)
 	return bus_for_each_dev(&memory_subsys, NULL, &cb_data,
 				for_each_memory_block_cb);
 }
+
+/*
+ * This is an internal helper to unify allocation and initialization of
+ * memory groups. Note that the passed memory group will be copied to a
+ * dynamically allocated memory group. After this call, the passed
+ * memory group should no longer be used.
+ */
+static int memory_group_register(struct memory_group group)
+{
+	struct memory_group *new_group;
+	uint32_t mgid;
+	int ret;
+
+	if (!node_possible(group.nid))
+		return -EINVAL;
+
+	new_group = kzalloc(sizeof(group), GFP_KERNEL);
+	if (!new_group)
+		return -ENOMEM;
+	*new_group = group;
+	INIT_LIST_HEAD(&new_group->memory_blocks);
+
+	ret = xa_alloc(&memory_groups, &mgid, new_group, xa_limit_31b,
+		       GFP_KERNEL);
+	if (ret) {
+		kfree(new_group);
+		return ret;
+	} else if (group.is_dynamic) {
+		xa_set_mark(&memory_groups, mgid, MEMORY_GROUP_MARK_DYNAMIC);
+	}
+	return mgid;
+}
+
+/**
+ * memory_group_register_static() - Register a static memory group.
+ * @nid: The node id.
+ * @max_pages: The maximum number of pages we'll have in this static memory
+ *	       group.
+ *
+ * Register a new static memory group and return the memory group id.
+ * All memory in the group belongs to a single unit, such as a DIMM. All
+ * memory belonging to a static memory group is added in one go to be removed
+ * in one go -- it's static.
+ *
+ * Returns an error if out of memory, if the node id is invalid, if no new
+ * memory groups can be registered, or if max_pages is invalid (0). Otherwise,
+ * returns the new memory group id.
+ */
+int memory_group_register_static(int nid, unsigned long max_pages)
+{
+	struct memory_group group = {
+		.nid = nid,
+		.s = {
+			.max_pages = max_pages,
+		},
+	};
+
+	if (!max_pages)
+		return -EINVAL;
+	return memory_group_register(group);
+}
+EXPORT_SYMBOL_GPL(memory_group_register_static);
+
+/**
+ * memory_group_register_dynamic() - Register a dynamic memory group.
+ * @nid: The node id.
+ * @unit_pages: Unit in pages in which is memory added/removed in this dynamic
+ *		memory group.
+ *
+ * Register a new dynamic memory group and return the memory group id.
+ * Memory within a dynamic memory group is added/removed dynamically
+ * in unit_pages.
+ *
+ * Returns an error if out of memory, if the node id is invalid, if no new
+ * memory groups can be registered, or if unit_pages is invalid (0, not a
+ * power of two, smaller than a single memory block). Otherwise, returns the
+ * new memory group id.
+ */
+int memory_group_register_dynamic(int nid, unsigned long unit_pages)
+{
+	struct memory_group group = {
+		.nid = nid,
+		.is_dynamic = true,
+		.d = {
+			.unit_pages = unit_pages,
+		},
+	};
+
+	if (!unit_pages || !is_power_of_2(unit_pages) ||
+	    unit_pages < PHYS_PFN(memory_block_size_bytes()))
+		return -EINVAL;
+	return memory_group_register(group);
+}
+EXPORT_SYMBOL_GPL(memory_group_register_dynamic);
+
+/**
+ * memory_group_unregister() - Unregister a memory group.
+ * @mgid: the memory group id
+ *
+ * Unregister a memory group. If any memory block still belongs to this
+ * memory group, unregistering will fail.
+ *
+ * Returns -EINVAL if the memory group id is invalid, returns -EBUSY if some
+ * memory blocks still belong to this memory group and returns 0 if
+ * unregistering succeeded.
+ */
+int memory_group_unregister(int mgid)
+{
+	struct memory_group *group;
+
+	if (mgid < 0)
+		return -EINVAL;
+
+	group = xa_load(&memory_groups, mgid);
+	if (!group)
+		return -EINVAL;
+	if (!list_empty(&group->memory_blocks))
+		return -EBUSY;
+	xa_erase(&memory_groups, mgid);
+	kfree(group);
+	return 0;
+}
+EXPORT_SYMBOL_GPL(memory_group_unregister);
+
+/*
+ * This is an internal helper only to be used in core memory hotplug code to
+ * lookup a memory group. We don't care about locking, as we don't expect a
+ * memory group to get unregistered while adding memory to it -- because
+ * the group and the memory is managed by the same driver.
+ */
+struct memory_group *memory_group_find_by_id(int mgid)
+{
+	return xa_load(&memory_groups, mgid);
+}
+
+/*
+ * This is an internal helper only to be used in core memory hotplug code to
+ * walk all dynamic memory groups excluding a given memory group, either
+ * belonging to a specific node, or belonging to any node.
+ */
+int walk_dynamic_memory_groups(int nid, walk_memory_groups_func_t func,
+			       struct memory_group *excluded, void *arg)
+{
+	struct memory_group *group;
+	unsigned long index;
+	int ret = 0;
+
+	xa_for_each_marked(&memory_groups, index, group,
+			   MEMORY_GROUP_MARK_DYNAMIC) {
+		if (group == excluded)
+			continue;
+#ifdef CONFIG_NUMA
+		if (nid != NUMA_NO_NODE && group->nid != nid)
+			continue;
+#endif /* CONFIG_NUMA */
+		ret = func(group, arg);
+		if (ret)
+			break;
+	}
+	return ret;
+}
diff --git a/drivers/base/node.c b/drivers/base/node.c
index be16bbff11cc..c56d34f8158f 100644
--- a/drivers/base/node.c
+++ b/drivers/base/node.c
@@ -785,8 +785,6 @@ int unregister_cpu_under_node(unsigned int cpu, unsigned int nid)
 #ifdef CONFIG_MEMORY_HOTPLUG_SPARSE
 static int __ref get_nid_for_pfn(unsigned long pfn)
 {
-	if (!pfn_valid_within(pfn))
-		return -1;
 #ifdef CONFIG_DEFERRED_STRUCT_PAGE_INIT
 	if (system_state < SYSTEM_RUNNING)
 		return early_pfn_to_nid(pfn);
diff --git a/drivers/base/power/clock_ops.c b/drivers/base/power/clock_ops.c
index 0251f3e6e61d..4110c19c08dc 100644
--- a/drivers/base/power/clock_ops.c
+++ b/drivers/base/power/clock_ops.c
@@ -519,6 +519,23 @@ void pm_clk_destroy(struct device *dev)
 }
 EXPORT_SYMBOL_GPL(pm_clk_destroy);
 
+static void pm_clk_destroy_action(void *data)
+{
+	pm_clk_destroy(data);
+}
+
+int devm_pm_clk_create(struct device *dev)
+{
+	int ret;
+
+	ret = pm_clk_create(dev);
+	if (ret)
+		return ret;
+
+	return devm_add_action_or_reset(dev, pm_clk_destroy_action, dev);
+}
+EXPORT_SYMBOL_GPL(devm_pm_clk_create);
+
 /**
  * pm_clk_suspend - Disable clocks in a device's PM clock list.
  * @dev: Device to disable the clocks for.
diff --git a/drivers/base/power/main.c b/drivers/base/power/main.c
index d568772152c2..cbea78e79f3d 100644
--- a/drivers/base/power/main.c
+++ b/drivers/base/power/main.c
@@ -1642,7 +1642,7 @@ static int __device_suspend(struct device *dev, pm_message_t state, bool async)
 	}
 
 	dev->power.may_skip_resume = true;
-	dev->power.must_resume = false;
+	dev->power.must_resume = !dev_pm_test_driver_flags(dev, DPM_FLAG_MAY_SKIP_RESUME);
 
 	dpm_watchdog_set(&wd, dev);
 	device_lock(dev);
diff --git a/drivers/base/power/runtime.c b/drivers/base/power/runtime.c
index 8a66eaf731e4..ec94049442b9 100644
--- a/drivers/base/power/runtime.c
+++ b/drivers/base/power/runtime.c
@@ -1447,6 +1447,23 @@ void pm_runtime_enable(struct device *dev)
 }
 EXPORT_SYMBOL_GPL(pm_runtime_enable);
 
+static void pm_runtime_disable_action(void *data)
+{
+	pm_runtime_disable(data);
+}
+
+/**
+ * devm_pm_runtime_enable - devres-enabled version of pm_runtime_enable.
+ * @dev: Device to handle.
+ */
+int devm_pm_runtime_enable(struct device *dev)
+{
+	pm_runtime_enable(dev);
+
+	return devm_add_action_or_reset(dev, pm_runtime_disable_action, dev);
+}
+EXPORT_SYMBOL_GPL(devm_pm_runtime_enable);
+
 /**
  * pm_runtime_forbid - Block runtime PM of a device.
  * @dev: Device to handle.
diff --git a/drivers/base/power/wakeirq.c b/drivers/base/power/wakeirq.c
index 3bad3266a2ad..b91a3a9bf9f6 100644
--- a/drivers/base/power/wakeirq.c
+++ b/drivers/base/power/wakeirq.c
@@ -12,14 +12,11 @@
 /**
  * dev_pm_attach_wake_irq - Attach device interrupt as a wake IRQ
  * @dev: Device entry
- * @irq: Device wake-up capable interrupt
  * @wirq: Wake irq specific data
  *
- * Internal function to attach either a device IO interrupt or a
- * dedicated wake-up interrupt as a wake IRQ.
+ * Internal function to attach a dedicated wake-up interrupt as a wake IRQ.
  */
-static int dev_pm_attach_wake_irq(struct device *dev, int irq,
-				  struct wake_irq *wirq)
+static int dev_pm_attach_wake_irq(struct device *dev, struct wake_irq *wirq)
 {
 	unsigned long flags;
 
@@ -65,7 +62,7 @@ int dev_pm_set_wake_irq(struct device *dev, int irq)
 	wirq->dev = dev;
 	wirq->irq = irq;
 
-	err = dev_pm_attach_wake_irq(dev, irq, wirq);
+	err = dev_pm_attach_wake_irq(dev, wirq);
 	if (err)
 		kfree(wirq);
 
@@ -196,7 +193,7 @@ int dev_pm_set_dedicated_wake_irq(struct device *dev, int irq)
 	if (err)
 		goto err_free_name;
 
-	err = dev_pm_attach_wake_irq(dev, irq, wirq);
+	err = dev_pm_attach_wake_irq(dev, wirq);
 	if (err)
 		goto err_free_irq;
 
diff --git a/drivers/block/Kconfig b/drivers/block/Kconfig
index fbb3a558139f..ab3e37aa1830 100644
--- a/drivers/block/Kconfig
+++ b/drivers/block/Kconfig
@@ -74,7 +74,6 @@ config N64CART
 
 config CDROM
 	tristate
-	select BLK_SCSI_REQUEST
 
 config GDROM
 	tristate "SEGA Dreamcast GD-ROM drive"
@@ -306,7 +305,7 @@ config CDROM_PKTCDVD
 	tristate "Packet writing on CD/DVD media (DEPRECATED)"
 	depends on !UML
 	select CDROM
-	select BLK_SCSI_REQUEST
+	select SCSI_COMMON
 	help
 	  Note: This driver is deprecated and will be removed from the
 	  kernel in the near future!
diff --git a/drivers/block/loop.c b/drivers/block/loop.c
index fa1c298a8cfb..7bf4686af774 100644
--- a/drivers/block/loop.c
+++ b/drivers/block/loop.c
@@ -2111,18 +2111,6 @@ int loop_register_transfer(struct loop_func_table *funcs)
 	return 0;
 }
 
-static int unregister_transfer_cb(int id, void *ptr, void *data)
-{
-	struct loop_device *lo = ptr;
-	struct loop_func_table *xfer = data;
-
-	mutex_lock(&lo->lo_mutex);
-	if (lo->lo_encryption == xfer)
-		loop_release_xfer(lo);
-	mutex_unlock(&lo->lo_mutex);
-	return 0;
-}
-
 int loop_unregister_transfer(int number)
 {
 	unsigned int n = number;
@@ -2130,9 +2118,20 @@ int loop_unregister_transfer(int number)
 
 	if (n == 0 || n >= MAX_LO_CRYPT || (xfer = xfer_funcs[n]) == NULL)
 		return -EINVAL;
+	/*
+	 * This function is called from only cleanup_cryptoloop().
+	 * Given that each loop device that has a transfer enabled holds a
+	 * reference to the module implementing it we should never get here
+	 * with a transfer that is set (unless forced module unloading is
+	 * requested). Thus, check module's refcount and warn if this is
+	 * not a clean unloading.
+	 */
+#ifdef CONFIG_MODULE_UNLOAD
+	if (xfer->owner && module_refcount(xfer->owner) != -1)
+		pr_err("Danger! Unregistering an in use transfer function.\n");
+#endif
 
 	xfer_funcs[n] = NULL;
-	idr_for_each(&loop_index_idr, &unregister_transfer_cb, xfer);
 	return 0;
 }
 
@@ -2323,8 +2322,9 @@ static int loop_add(int i)
 	} else {
 		err = idr_alloc(&loop_index_idr, lo, 0, 0, GFP_KERNEL);
 	}
+	mutex_unlock(&loop_ctl_mutex);
 	if (err < 0)
-		goto out_unlock;
+		goto out_free_dev;
 	i = err;
 
 	err = -ENOMEM;
@@ -2393,15 +2393,19 @@ static int loop_add(int i)
 	disk->events		= DISK_EVENT_MEDIA_CHANGE;
 	disk->event_flags	= DISK_EVENT_FLAG_UEVENT;
 	sprintf(disk->disk_name, "loop%d", i);
+	/* Make this loop device reachable from pathname. */
 	add_disk(disk);
+	/* Show this loop device. */
+	mutex_lock(&loop_ctl_mutex);
+	lo->idr_visible = true;
 	mutex_unlock(&loop_ctl_mutex);
 	return i;
 
 out_cleanup_tags:
 	blk_mq_free_tag_set(&lo->tag_set);
 out_free_idr:
+	mutex_lock(&loop_ctl_mutex);
 	idr_remove(&loop_index_idr, i);
-out_unlock:
 	mutex_unlock(&loop_ctl_mutex);
 out_free_dev:
 	kfree(lo);
@@ -2411,9 +2415,14 @@ out:
 
 static void loop_remove(struct loop_device *lo)
 {
+	/* Make this loop device unreachable from pathname. */
 	del_gendisk(lo->lo_disk);
 	blk_cleanup_disk(lo->lo_disk);
 	blk_mq_free_tag_set(&lo->tag_set);
+	mutex_lock(&loop_ctl_mutex);
+	idr_remove(&loop_index_idr, lo->lo_number);
+	mutex_unlock(&loop_ctl_mutex);
+	/* There is no route which can find this loop device. */
 	mutex_destroy(&lo->lo_mutex);
 	kfree(lo);
 }
@@ -2437,31 +2446,40 @@ static int loop_control_remove(int idx)
 		return -EINVAL;
 	}
 		
+	/* Hide this loop device for serialization. */
 	ret = mutex_lock_killable(&loop_ctl_mutex);
 	if (ret)
 		return ret;
-
 	lo = idr_find(&loop_index_idr, idx);
-	if (!lo) {
+	if (!lo || !lo->idr_visible)
 		ret = -ENODEV;
-		goto out_unlock_ctrl;
-	}
+	else
+		lo->idr_visible = false;
+	mutex_unlock(&loop_ctl_mutex);
+	if (ret)
+		return ret;
 
+	/* Check whether this loop device can be removed. */
 	ret = mutex_lock_killable(&lo->lo_mutex);
 	if (ret)
-		goto out_unlock_ctrl;
+		goto mark_visible;
 	if (lo->lo_state != Lo_unbound ||
 	    atomic_read(&lo->lo_refcnt) > 0) {
 		mutex_unlock(&lo->lo_mutex);
 		ret = -EBUSY;
-		goto out_unlock_ctrl;
+		goto mark_visible;
 	}
+	/* Mark this loop device no longer open()-able. */
 	lo->lo_state = Lo_deleting;
 	mutex_unlock(&lo->lo_mutex);
 
-	idr_remove(&loop_index_idr, lo->lo_number);
 	loop_remove(lo);
-out_unlock_ctrl:
+	return 0;
+
+mark_visible:
+	/* Show this loop device again. */
+	mutex_lock(&loop_ctl_mutex);
+	lo->idr_visible = true;
 	mutex_unlock(&loop_ctl_mutex);
 	return ret;
 }
@@ -2475,7 +2493,8 @@ static int loop_control_get_free(int idx)
 	if (ret)
 		return ret;
 	idr_for_each_entry(&loop_index_idr, lo, id) {
-		if (lo->lo_state == Lo_unbound)
+		/* Hitting a race results in creating a new loop device which is harmless. */
+		if (lo->idr_visible && data_race(lo->lo_state) == Lo_unbound)
 			goto found;
 	}
 	mutex_unlock(&loop_ctl_mutex);
@@ -2591,10 +2610,14 @@ static void __exit loop_exit(void)
 	unregister_blkdev(LOOP_MAJOR, "loop");
 	misc_deregister(&loop_misc);
 
-	mutex_lock(&loop_ctl_mutex);
+	/*
+	 * There is no need to use loop_ctl_mutex here, for nobody else can
+	 * access loop_index_idr when this module is unloading (unless forced
+	 * module unloading is requested). If this is not a clean unloading,
+	 * we have no means to avoid kernel crash.
+	 */
 	idr_for_each_entry(&loop_index_idr, lo, id)
 		loop_remove(lo);
-	mutex_unlock(&loop_ctl_mutex);
 
 	idr_destroy(&loop_index_idr);
 }
diff --git a/drivers/block/loop.h b/drivers/block/loop.h
index 1988899db63a..04c88dd6eabd 100644
--- a/drivers/block/loop.h
+++ b/drivers/block/loop.h
@@ -68,6 +68,7 @@ struct loop_device {
 	struct blk_mq_tag_set	tag_set;
 	struct gendisk		*lo_disk;
 	struct mutex		lo_mutex;
+	bool			idr_visible;
 };
 
 struct loop_cmd {
diff --git a/drivers/block/n64cart.c b/drivers/block/n64cart.c
index c84be0028f63..26798da661bd 100644
--- a/drivers/block/n64cart.c
+++ b/drivers/block/n64cart.c
@@ -129,8 +129,8 @@ static int __init n64cart_probe(struct platform_device *pdev)
 	}
 
 	reg_base = devm_platform_ioremap_resource(pdev, 0);
-	if (!reg_base)
-		return -EINVAL;
+	if (IS_ERR(reg_base))
+		return PTR_ERR(reg_base);
 
 	disk = blk_alloc_disk(NUMA_NO_NODE);
 	if (!disk)
diff --git a/drivers/block/paride/Kconfig b/drivers/block/paride/Kconfig
index 7c6ae1036927..a295634597ba 100644
--- a/drivers/block/paride/Kconfig
+++ b/drivers/block/paride/Kconfig
@@ -27,7 +27,6 @@ config PARIDE_PCD
 	tristate "Parallel port ATAPI CD-ROMs"
 	depends on PARIDE
 	select CDROM
-	select BLK_SCSI_REQUEST # only for the generic cdrom code
 	help
 	  This option enables the high-level driver for ATAPI CD-ROM devices
 	  connected through a parallel port. If you chose to build PARIDE
diff --git a/drivers/block/virtio_blk.c b/drivers/block/virtio_blk.c
index 57c6ae7debd9..9b3bd083b411 100644
--- a/drivers/block/virtio_blk.c
+++ b/drivers/block/virtio_blk.c
@@ -762,7 +762,7 @@ static int virtblk_probe(struct virtio_device *vdev)
 		goto out_free_vblk;
 
 	/* Default queue sizing is to fill the ring. */
-	if (likely(!virtblk_queue_depth)) {
+	if (!virtblk_queue_depth) {
 		queue_depth = vblk->vqs[0].vq->num_free;
 		/* ... but without indirect descs, we use 2 descs per req */
 		if (!virtio_has_feature(vdev, VIRTIO_RING_F_INDIRECT_DESC))
@@ -836,7 +836,7 @@ static int virtblk_probe(struct virtio_device *vdev)
 	else
 		blk_size = queue_logical_block_size(q);
 
-	if (unlikely(blk_size < SECTOR_SIZE || blk_size > PAGE_SIZE)) {
+	if (blk_size < SECTOR_SIZE || blk_size > PAGE_SIZE) {
 		dev_err(&vdev->dev,
 			"block size is changed unexpectedly, now is %u\n",
 			blk_size);
diff --git a/drivers/block/xen-blkback/xenbus.c b/drivers/block/xen-blkback/xenbus.c
index 125b22205d38..33eba3df4dd9 100644
--- a/drivers/block/xen-blkback/xenbus.c
+++ b/drivers/block/xen-blkback/xenbus.c
@@ -8,7 +8,6 @@
 
 #define pr_fmt(fmt) "xen-blkback: " fmt
 
-#include <stdarg.h>
 #include <linux/module.h>
 #include <linux/kthread.h>
 #include <xen/events.h>
diff --git a/drivers/block/xen-blkfront.c b/drivers/block/xen-blkfront.c
index 715bfa8aca7f..72902104f111 100644
--- a/drivers/block/xen-blkfront.c
+++ b/drivers/block/xen-blkfront.c
@@ -80,6 +80,7 @@ enum blkif_state {
 	BLKIF_STATE_DISCONNECTED,
 	BLKIF_STATE_CONNECTED,
 	BLKIF_STATE_SUSPENDED,
+	BLKIF_STATE_ERROR,
 };
 
 struct grant {
@@ -89,6 +90,7 @@ struct grant {
 };
 
 enum blk_req_status {
+	REQ_PROCESSING,
 	REQ_WAITING,
 	REQ_DONE,
 	REQ_ERROR,
@@ -530,10 +532,10 @@ static unsigned long blkif_ring_get_request(struct blkfront_ring_info *rinfo,
 
 	id = get_id_from_freelist(rinfo);
 	rinfo->shadow[id].request = req;
-	rinfo->shadow[id].status = REQ_WAITING;
+	rinfo->shadow[id].status = REQ_PROCESSING;
 	rinfo->shadow[id].associated_id = NO_ASSOCIATED_ID;
 
-	(*ring_req)->u.rw.id = id;
+	rinfo->shadow[id].req.u.rw.id = id;
 
 	return id;
 }
@@ -541,11 +543,12 @@ static unsigned long blkif_ring_get_request(struct blkfront_ring_info *rinfo,
 static int blkif_queue_discard_req(struct request *req, struct blkfront_ring_info *rinfo)
 {
 	struct blkfront_info *info = rinfo->dev_info;
-	struct blkif_request *ring_req;
+	struct blkif_request *ring_req, *final_ring_req;
 	unsigned long id;
 
 	/* Fill out a communications ring structure. */
-	id = blkif_ring_get_request(rinfo, req, &ring_req);
+	id = blkif_ring_get_request(rinfo, req, &final_ring_req);
+	ring_req = &rinfo->shadow[id].req;
 
 	ring_req->operation = BLKIF_OP_DISCARD;
 	ring_req->u.discard.nr_sectors = blk_rq_sectors(req);
@@ -556,8 +559,9 @@ static int blkif_queue_discard_req(struct request *req, struct blkfront_ring_inf
 	else
 		ring_req->u.discard.flag = 0;
 
-	/* Keep a private copy so we can reissue requests when recovering. */
-	rinfo->shadow[id].req = *ring_req;
+	/* Copy the request to the ring page. */
+	*final_ring_req = *ring_req;
+	rinfo->shadow[id].status = REQ_WAITING;
 
 	return 0;
 }
@@ -690,6 +694,7 @@ static int blkif_queue_rw_req(struct request *req, struct blkfront_ring_info *ri
 {
 	struct blkfront_info *info = rinfo->dev_info;
 	struct blkif_request *ring_req, *extra_ring_req = NULL;
+	struct blkif_request *final_ring_req, *final_extra_ring_req = NULL;
 	unsigned long id, extra_id = NO_ASSOCIATED_ID;
 	bool require_extra_req = false;
 	int i;
@@ -734,7 +739,8 @@ static int blkif_queue_rw_req(struct request *req, struct blkfront_ring_info *ri
 	}
 
 	/* Fill out a communications ring structure. */
-	id = blkif_ring_get_request(rinfo, req, &ring_req);
+	id = blkif_ring_get_request(rinfo, req, &final_ring_req);
+	ring_req = &rinfo->shadow[id].req;
 
 	num_sg = blk_rq_map_sg(req->q, req, rinfo->shadow[id].sg);
 	num_grant = 0;
@@ -785,7 +791,9 @@ static int blkif_queue_rw_req(struct request *req, struct blkfront_ring_info *ri
 		ring_req->u.rw.nr_segments = num_grant;
 		if (unlikely(require_extra_req)) {
 			extra_id = blkif_ring_get_request(rinfo, req,
-							  &extra_ring_req);
+							  &final_extra_ring_req);
+			extra_ring_req = &rinfo->shadow[extra_id].req;
+
 			/*
 			 * Only the first request contains the scatter-gather
 			 * list.
@@ -827,10 +835,13 @@ static int blkif_queue_rw_req(struct request *req, struct blkfront_ring_info *ri
 	if (setup.segments)
 		kunmap_atomic(setup.segments);
 
-	/* Keep a private copy so we can reissue requests when recovering. */
-	rinfo->shadow[id].req = *ring_req;
-	if (unlikely(require_extra_req))
-		rinfo->shadow[extra_id].req = *extra_ring_req;
+	/* Copy request(s) to the ring page. */
+	*final_ring_req = *ring_req;
+	rinfo->shadow[id].status = REQ_WAITING;
+	if (unlikely(require_extra_req)) {
+		*final_extra_ring_req = *extra_ring_req;
+		rinfo->shadow[extra_id].status = REQ_WAITING;
+	}
 
 	if (new_persistent_gnts)
 		gnttab_free_grant_references(setup.gref_head);
@@ -1353,8 +1364,8 @@ static enum blk_req_status blkif_rsp_to_req_status(int rsp)
 static int blkif_get_final_status(enum blk_req_status s1,
 				  enum blk_req_status s2)
 {
-	BUG_ON(s1 == REQ_WAITING);
-	BUG_ON(s2 == REQ_WAITING);
+	BUG_ON(s1 < REQ_DONE);
+	BUG_ON(s2 < REQ_DONE);
 
 	if (s1 == REQ_ERROR || s2 == REQ_ERROR)
 		return BLKIF_RSP_ERROR;
@@ -1387,7 +1398,7 @@ static bool blkif_completion(unsigned long *id,
 		s->status = blkif_rsp_to_req_status(bret->status);
 
 		/* Wait the second response if not yet here. */
-		if (s2->status == REQ_WAITING)
+		if (s2->status < REQ_DONE)
 			return false;
 
 		bret->status = blkif_get_final_status(s->status,
@@ -1495,7 +1506,7 @@ static bool blkif_completion(unsigned long *id,
 static irqreturn_t blkif_interrupt(int irq, void *dev_id)
 {
 	struct request *req;
-	struct blkif_response *bret;
+	struct blkif_response bret;
 	RING_IDX i, rp;
 	unsigned long flags;
 	struct blkfront_ring_info *rinfo = (struct blkfront_ring_info *)dev_id;
@@ -1506,54 +1517,76 @@ static irqreturn_t blkif_interrupt(int irq, void *dev_id)
 
 	spin_lock_irqsave(&rinfo->ring_lock, flags);
  again:
-	rp = rinfo->ring.sring->rsp_prod;
-	rmb(); /* Ensure we see queued responses up to 'rp'. */
+	rp = READ_ONCE(rinfo->ring.sring->rsp_prod);
+	virt_rmb(); /* Ensure we see queued responses up to 'rp'. */
+	if (RING_RESPONSE_PROD_OVERFLOW(&rinfo->ring, rp)) {
+		pr_alert("%s: illegal number of responses %u\n",
+			 info->gd->disk_name, rp - rinfo->ring.rsp_cons);
+		goto err;
+	}
 
 	for (i = rinfo->ring.rsp_cons; i != rp; i++) {
 		unsigned long id;
+		unsigned int op;
+
+		RING_COPY_RESPONSE(&rinfo->ring, i, &bret);
+		id = bret.id;
 
-		bret = RING_GET_RESPONSE(&rinfo->ring, i);
-		id   = bret->id;
 		/*
 		 * The backend has messed up and given us an id that we would
 		 * never have given to it (we stamp it up to BLK_RING_SIZE -
 		 * look in get_id_from_freelist.
 		 */
 		if (id >= BLK_RING_SIZE(info)) {
-			WARN(1, "%s: response to %s has incorrect id (%ld)\n",
-			     info->gd->disk_name, op_name(bret->operation), id);
-			/* We can't safely get the 'struct request' as
-			 * the id is busted. */
-			continue;
+			pr_alert("%s: response has incorrect id (%ld)\n",
+				 info->gd->disk_name, id);
+			goto err;
 		}
+		if (rinfo->shadow[id].status != REQ_WAITING) {
+			pr_alert("%s: response references no pending request\n",
+				 info->gd->disk_name);
+			goto err;
+		}
+
+		rinfo->shadow[id].status = REQ_PROCESSING;
 		req  = rinfo->shadow[id].request;
 
-		if (bret->operation != BLKIF_OP_DISCARD) {
+		op = rinfo->shadow[id].req.operation;
+		if (op == BLKIF_OP_INDIRECT)
+			op = rinfo->shadow[id].req.u.indirect.indirect_op;
+		if (bret.operation != op) {
+			pr_alert("%s: response has wrong operation (%u instead of %u)\n",
+				 info->gd->disk_name, bret.operation, op);
+			goto err;
+		}
+
+		if (bret.operation != BLKIF_OP_DISCARD) {
 			/*
 			 * We may need to wait for an extra response if the
 			 * I/O request is split in 2
 			 */
-			if (!blkif_completion(&id, rinfo, bret))
+			if (!blkif_completion(&id, rinfo, &bret))
 				continue;
 		}
 
 		if (add_id_to_freelist(rinfo, id)) {
 			WARN(1, "%s: response to %s (id %ld) couldn't be recycled!\n",
-			     info->gd->disk_name, op_name(bret->operation), id);
+			     info->gd->disk_name, op_name(bret.operation), id);
 			continue;
 		}
 
-		if (bret->status == BLKIF_RSP_OKAY)
+		if (bret.status == BLKIF_RSP_OKAY)
 			blkif_req(req)->error = BLK_STS_OK;
 		else
 			blkif_req(req)->error = BLK_STS_IOERR;
 
-		switch (bret->operation) {
+		switch (bret.operation) {
 		case BLKIF_OP_DISCARD:
-			if (unlikely(bret->status == BLKIF_RSP_EOPNOTSUPP)) {
+			if (unlikely(bret.status == BLKIF_RSP_EOPNOTSUPP)) {
 				struct request_queue *rq = info->rq;
-				printk(KERN_WARNING "blkfront: %s: %s op failed\n",
-					   info->gd->disk_name, op_name(bret->operation));
+
+				pr_warn_ratelimited("blkfront: %s: %s op failed\n",
+					   info->gd->disk_name, op_name(bret.operation));
 				blkif_req(req)->error = BLK_STS_NOTSUPP;
 				info->feature_discard = 0;
 				info->feature_secdiscard = 0;
@@ -1563,15 +1596,15 @@ static irqreturn_t blkif_interrupt(int irq, void *dev_id)
 			break;
 		case BLKIF_OP_FLUSH_DISKCACHE:
 		case BLKIF_OP_WRITE_BARRIER:
-			if (unlikely(bret->status == BLKIF_RSP_EOPNOTSUPP)) {
-				printk(KERN_WARNING "blkfront: %s: %s op failed\n",
-				       info->gd->disk_name, op_name(bret->operation));
+			if (unlikely(bret.status == BLKIF_RSP_EOPNOTSUPP)) {
+				pr_warn_ratelimited("blkfront: %s: %s op failed\n",
+				       info->gd->disk_name, op_name(bret.operation));
 				blkif_req(req)->error = BLK_STS_NOTSUPP;
 			}
-			if (unlikely(bret->status == BLKIF_RSP_ERROR &&
+			if (unlikely(bret.status == BLKIF_RSP_ERROR &&
 				     rinfo->shadow[id].req.u.rw.nr_segments == 0)) {
-				printk(KERN_WARNING "blkfront: %s: empty %s op failed\n",
-				       info->gd->disk_name, op_name(bret->operation));
+				pr_warn_ratelimited("blkfront: %s: empty %s op failed\n",
+				       info->gd->disk_name, op_name(bret.operation));
 				blkif_req(req)->error = BLK_STS_NOTSUPP;
 			}
 			if (unlikely(blkif_req(req)->error)) {
@@ -1584,9 +1617,10 @@ static irqreturn_t blkif_interrupt(int irq, void *dev_id)
 			fallthrough;
 		case BLKIF_OP_READ:
 		case BLKIF_OP_WRITE:
-			if (unlikely(bret->status != BLKIF_RSP_OKAY))
-				dev_dbg(&info->xbdev->dev, "Bad return from blkdev data "
-					"request: %x\n", bret->status);
+			if (unlikely(bret.status != BLKIF_RSP_OKAY))
+				dev_dbg_ratelimited(&info->xbdev->dev,
+					"Bad return from blkdev data request: %#x\n",
+					bret.status);
 
 			break;
 		default:
@@ -1612,6 +1646,14 @@ static irqreturn_t blkif_interrupt(int irq, void *dev_id)
 	spin_unlock_irqrestore(&rinfo->ring_lock, flags);
 
 	return IRQ_HANDLED;
+
+ err:
+	info->connected = BLKIF_STATE_ERROR;
+
+	spin_unlock_irqrestore(&rinfo->ring_lock, flags);
+
+	pr_alert("%s disabled for further use\n", info->gd->disk_name);
+	return IRQ_HANDLED;
 }
 
 
diff --git a/drivers/cdrom/cdrom.c b/drivers/cdrom/cdrom.c
index feb827eefd1a..bd2e5b1560f5 100644
--- a/drivers/cdrom/cdrom.c
+++ b/drivers/cdrom/cdrom.c
@@ -629,7 +629,7 @@ int register_cdrom(struct gendisk *disk, struct cdrom_device_info *cdi)
 	if (CDROM_CAN(CDC_MRW_W))
 		cdi->exit = cdrom_mrw_exit;
 
-	if (cdi->disk)
+	if (cdi->ops->read_cdda_bpc)
 		cdi->cdda_method = CDDA_BPC_FULL;
 	else
 		cdi->cdda_method = CDDA_OLD;
@@ -2159,81 +2159,26 @@ static int cdrom_read_cdda_old(struct cdrom_device_info *cdi, __u8 __user *ubuf,
 static int cdrom_read_cdda_bpc(struct cdrom_device_info *cdi, __u8 __user *ubuf,
 			       int lba, int nframes)
 {
-	struct request_queue *q = cdi->disk->queue;
-	struct request *rq;
-	struct scsi_request *req;
-	struct bio *bio;
-	unsigned int len;
+	int max_frames = (queue_max_sectors(cdi->disk->queue) << 9) /
+			  CD_FRAMESIZE_RAW;
 	int nr, ret = 0;
 
-	if (!q)
-		return -ENXIO;
-
-	if (!blk_queue_scsi_passthrough(q)) {
-		WARN_ONCE(true,
-			  "Attempt read CDDA info through a non-SCSI queue\n");
-		return -EINVAL;
-	}
-
 	cdi->last_sense = 0;
 
 	while (nframes) {
-		nr = nframes;
 		if (cdi->cdda_method == CDDA_BPC_SINGLE)
 			nr = 1;
-		if (nr * CD_FRAMESIZE_RAW > (queue_max_sectors(q) << 9))
-			nr = (queue_max_sectors(q) << 9) / CD_FRAMESIZE_RAW;
-
-		len = nr * CD_FRAMESIZE_RAW;
-
-		rq = blk_get_request(q, REQ_OP_DRV_IN, 0);
-		if (IS_ERR(rq)) {
-			ret = PTR_ERR(rq);
-			break;
-		}
-		req = scsi_req(rq);
-
-		ret = blk_rq_map_user(q, rq, NULL, ubuf, len, GFP_KERNEL);
-		if (ret) {
-			blk_put_request(rq);
-			break;
-		}
-
-		req->cmd[0] = GPCMD_READ_CD;
-		req->cmd[1] = 1 << 2;
-		req->cmd[2] = (lba >> 24) & 0xff;
-		req->cmd[3] = (lba >> 16) & 0xff;
-		req->cmd[4] = (lba >>  8) & 0xff;
-		req->cmd[5] = lba & 0xff;
-		req->cmd[6] = (nr >> 16) & 0xff;
-		req->cmd[7] = (nr >>  8) & 0xff;
-		req->cmd[8] = nr & 0xff;
-		req->cmd[9] = 0xf8;
-
-		req->cmd_len = 12;
-		rq->timeout = 60 * HZ;
-		bio = rq->bio;
-
-		blk_execute_rq(cdi->disk, rq, 0);
-		if (scsi_req(rq)->result) {
-			struct scsi_sense_hdr sshdr;
-
-			ret = -EIO;
-			scsi_normalize_sense(req->sense, req->sense_len,
-					     &sshdr);
-			cdi->last_sense = sshdr.sense_key;
-		}
-
-		if (blk_rq_unmap_user(bio))
-			ret = -EFAULT;
-		blk_put_request(rq);
+		else
+			nr = min(nframes, max_frames);
 
+		ret = cdi->ops->read_cdda_bpc(cdi, ubuf, lba, nr,
+					      &cdi->last_sense);
 		if (ret)
 			break;
 
 		nframes -= nr;
 		lba += nr;
-		ubuf += len;
+		ubuf += (nr * CD_FRAMESIZE_RAW);
 	}
 
 	return ret;
@@ -3357,13 +3302,6 @@ int cdrom_ioctl(struct cdrom_device_info *cdi, struct block_device *bdev,
 	void __user *argp = (void __user *)arg;
 	int ret;
 
-	/*
-	 * Try the generic SCSI command ioctl's first.
-	 */
-	ret = scsi_cmd_blk_ioctl(bdev, mode, cmd, argp);
-	if (ret != -ENOTTY)
-		return ret;
-
 	switch (cmd) {
 	case CDROMMULTISESSION:
 		return cdrom_ioctl_multisession(cdi, argp);
diff --git a/drivers/char/ipmi/ipmi_si.h b/drivers/char/ipmi/ipmi_si.h
index 0a4c69539f24..a7ead2a4c753 100644
--- a/drivers/char/ipmi/ipmi_si.h
+++ b/drivers/char/ipmi/ipmi_si.h
@@ -73,7 +73,7 @@ irqreturn_t ipmi_si_irq_handler(int irq, void *data);
 void ipmi_irq_start_cleanup(struct si_sm_io *io);
 int ipmi_std_irq_setup(struct si_sm_io *io);
 void ipmi_irq_finish_setup(struct si_sm_io *io);
-int ipmi_si_remove_by_dev(struct device *dev);
+void ipmi_si_remove_by_dev(struct device *dev);
 struct device *ipmi_si_remove_by_data(int addr_space, enum si_type si_type,
 				      unsigned long addr);
 void ipmi_hardcode_init(void);
diff --git a/drivers/char/ipmi/ipmi_si_intf.c b/drivers/char/ipmi/ipmi_si_intf.c
index 62929a3e397e..6f3272b58ced 100644
--- a/drivers/char/ipmi/ipmi_si_intf.c
+++ b/drivers/char/ipmi/ipmi_si_intf.c
@@ -591,7 +591,7 @@ static void handle_transaction_done(struct smi_info *smi_info)
 		smi_info->handlers->get_result(smi_info->si_sm, msg, 3);
 		if (msg[2] != 0) {
 			/* Error clearing flags */
-			dev_warn(smi_info->io.dev,
+			dev_warn_ratelimited(smi_info->io.dev,
 				 "Error clearing flags: %2.2x\n", msg[2]);
 		}
 		smi_info->si_state = SI_NORMAL;
@@ -683,10 +683,10 @@ static void handle_transaction_done(struct smi_info *smi_info)
 		/* We got the flags from the SMI, now handle them. */
 		smi_info->handlers->get_result(smi_info->si_sm, msg, 4);
 		if (msg[2] != 0) {
-			dev_warn(smi_info->io.dev,
-				 "Couldn't get irq info: %x.\n", msg[2]);
-			dev_warn(smi_info->io.dev,
-				 "Maybe ok, but ipmi might run very slowly.\n");
+			dev_warn_ratelimited(smi_info->io.dev,
+				"Couldn't get irq info: %x,\n"
+				"Maybe ok, but ipmi might run very slowly.\n",
+				msg[2]);
 			smi_info->si_state = SI_NORMAL;
 			break;
 		}
@@ -721,7 +721,7 @@ static void handle_transaction_done(struct smi_info *smi_info)
 
 		smi_info->handlers->get_result(smi_info->si_sm, msg, 4);
 		if (msg[2] != 0)
-			dev_warn(smi_info->io.dev,
+			dev_warn_ratelimited(smi_info->io.dev,
 				 "Could not set the global enables: 0x%x.\n",
 				 msg[2]);
 
@@ -1343,7 +1343,7 @@ retry:
 
 		if (cc != IPMI_CC_NO_ERROR &&
 		    ++retry_count <= GET_DEVICE_ID_MAX_RETRY) {
-			dev_warn(smi_info->io.dev,
+			dev_warn_ratelimited(smi_info->io.dev,
 			    "BMC returned 0x%2.2x, retry get bmc device id\n",
 			    cc);
 			goto retry;
@@ -1605,7 +1605,7 @@ static ssize_t name##_show(struct device *dev,			\
 									\
 	return snprintf(buf, 10, "%u\n", smi_get_stat(smi_info, name));	\
 }									\
-static DEVICE_ATTR(name, 0444, name##_show, NULL)
+static DEVICE_ATTR_RO(name)
 
 static ssize_t type_show(struct device *dev,
 			 struct device_attribute *attr,
@@ -1615,7 +1615,7 @@ static ssize_t type_show(struct device *dev,
 
 	return snprintf(buf, 10, "%s\n", si_to_str[smi_info->io.si_type]);
 }
-static DEVICE_ATTR(type, 0444, type_show, NULL);
+static DEVICE_ATTR_RO(type);
 
 static ssize_t interrupts_enabled_show(struct device *dev,
 				       struct device_attribute *attr,
@@ -1626,8 +1626,7 @@ static ssize_t interrupts_enabled_show(struct device *dev,
 
 	return snprintf(buf, 10, "%d\n", enabled);
 }
-static DEVICE_ATTR(interrupts_enabled, 0444,
-		   interrupts_enabled_show, NULL);
+static DEVICE_ATTR_RO(interrupts_enabled);
 
 IPMI_SI_ATTR(short_timeouts);
 IPMI_SI_ATTR(long_timeouts);
@@ -1658,7 +1657,7 @@ static ssize_t params_show(struct device *dev,
 			smi_info->io.irq,
 			smi_info->io.slave_addr);
 }
-static DEVICE_ATTR(params, 0444, params_show, NULL);
+static DEVICE_ATTR_RO(params);
 
 static struct attribute *ipmi_si_dev_attrs[] = {
 	&dev_attr_type.attr,
@@ -2228,22 +2227,18 @@ static void cleanup_one_si(struct smi_info *smi_info)
 	kfree(smi_info);
 }
 
-int ipmi_si_remove_by_dev(struct device *dev)
+void ipmi_si_remove_by_dev(struct device *dev)
 {
 	struct smi_info *e;
-	int rv = -ENOENT;
 
 	mutex_lock(&smi_infos_lock);
 	list_for_each_entry(e, &smi_infos, link) {
 		if (e->io.dev == dev) {
 			cleanup_one_si(e);
-			rv = 0;
 			break;
 		}
 	}
 	mutex_unlock(&smi_infos_lock);
-
-	return rv;
 }
 
 struct device *ipmi_si_remove_by_data(int addr_space, enum si_type si_type,
diff --git a/drivers/char/ipmi/ipmi_si_parisc.c b/drivers/char/ipmi/ipmi_si_parisc.c
index 11c9160275df..2be2967f6b5f 100644
--- a/drivers/char/ipmi/ipmi_si_parisc.c
+++ b/drivers/char/ipmi/ipmi_si_parisc.c
@@ -29,9 +29,9 @@ static int __init ipmi_parisc_probe(struct parisc_device *dev)
 	return ipmi_si_add_smi(&io);
 }
 
-static int __exit ipmi_parisc_remove(struct parisc_device *dev)
+static void __exit ipmi_parisc_remove(struct parisc_device *dev)
 {
-	return ipmi_si_remove_by_dev(&dev->dev);
+	ipmi_si_remove_by_dev(&dev->dev);
 }
 
 static const struct parisc_device_id ipmi_parisc_tbl[] __initconst = {
diff --git a/drivers/char/ipmi/ipmi_si_platform.c b/drivers/char/ipmi/ipmi_si_platform.c
index 380a6a542890..505cc978c97a 100644
--- a/drivers/char/ipmi/ipmi_si_platform.c
+++ b/drivers/char/ipmi/ipmi_si_platform.c
@@ -411,7 +411,9 @@ static int ipmi_probe(struct platform_device *pdev)
 
 static int ipmi_remove(struct platform_device *pdev)
 {
-	return ipmi_si_remove_by_dev(&pdev->dev);
+	ipmi_si_remove_by_dev(&pdev->dev);
+
+	return 0;
 }
 
 static int pdev_match_name(struct device *dev, const void *data)
diff --git a/drivers/clk/Kconfig b/drivers/clk/Kconfig
index e873f9ea2e65..c5b3dc97396a 100644
--- a/drivers/clk/Kconfig
+++ b/drivers/clk/Kconfig
@@ -403,6 +403,7 @@ source "drivers/clk/mediatek/Kconfig"
 source "drivers/clk/meson/Kconfig"
 source "drivers/clk/mstar/Kconfig"
 source "drivers/clk/mvebu/Kconfig"
+source "drivers/clk/pistachio/Kconfig"
 source "drivers/clk/qcom/Kconfig"
 source "drivers/clk/ralink/Kconfig"
 source "drivers/clk/renesas/Kconfig"
diff --git a/drivers/clk/Makefile b/drivers/clk/Makefile
index 2b91d34c582b..e42312121e51 100644
--- a/drivers/clk/Makefile
+++ b/drivers/clk/Makefile
@@ -97,7 +97,7 @@ obj-y					+= mstar/
 obj-y					+= mvebu/
 obj-$(CONFIG_ARCH_MXS)			+= mxs/
 obj-$(CONFIG_COMMON_CLK_NXP)		+= nxp/
-obj-$(CONFIG_MACH_PISTACHIO)		+= pistachio/
+obj-$(CONFIG_COMMON_CLK_PISTACHIO)	+= pistachio/
 obj-$(CONFIG_COMMON_CLK_PXA)		+= pxa/
 obj-$(CONFIG_COMMON_CLK_QCOM)		+= qcom/
 obj-y					+= ralink/
diff --git a/drivers/clk/at91/clk-generated.c b/drivers/clk/at91/clk-generated.c
index b4fc8d71daf2..b656d25a9767 100644
--- a/drivers/clk/at91/clk-generated.c
+++ b/drivers/clk/at91/clk-generated.c
@@ -128,6 +128,12 @@ static int clk_generated_determine_rate(struct clk_hw *hw,
 	int i;
 	u32 div;
 
+	/* do not look for a rate that is outside of our range */
+	if (gck->range.max && req->rate > gck->range.max)
+		req->rate = gck->range.max;
+	if (gck->range.min && req->rate < gck->range.min)
+		req->rate = gck->range.min;
+
 	for (i = 0; i < clk_hw_get_num_parents(hw); i++) {
 		if (gck->chg_pid == i)
 			continue;
diff --git a/drivers/clk/at91/sama7g5.c b/drivers/clk/at91/sama7g5.c
index 9e1ec48c4474..cf8c079aa086 100644
--- a/drivers/clk/at91/sama7g5.c
+++ b/drivers/clk/at91/sama7g5.c
@@ -35,7 +35,7 @@ static DEFINE_SPINLOCK(pmc_pll_lock);
 static DEFINE_SPINLOCK(pmc_mck0_lock);
 static DEFINE_SPINLOCK(pmc_mckX_lock);
 
-/**
+/*
  * PLL clocks identifiers
  * @PLL_ID_CPU:		CPU PLL identifier
  * @PLL_ID_SYS:		System PLL identifier
@@ -56,7 +56,7 @@ enum pll_ids {
 	PLL_ID_MAX,
 };
 
-/**
+/*
  * PLL type identifiers
  * @PLL_TYPE_FRAC:	fractional PLL identifier
  * @PLL_TYPE_DIV:	divider PLL identifier
@@ -118,7 +118,7 @@ static const struct clk_pll_characteristics pll_characteristics = {
 	.output = pll_outputs,
 };
 
-/**
+/*
  * PLL clocks description
  * @n:		clock name
  * @p:		clock parent
@@ -285,7 +285,7 @@ static const struct {
 	},
 };
 
-/**
+/*
  * Master clock (MCK[1..4]) description
  * @n:			clock name
  * @ep:			extra parents names array
@@ -337,7 +337,7 @@ static const struct {
 	  .c = 1, },
 };
 
-/**
+/*
  * System clock description
  * @n:	clock name
  * @p:	clock parent name
@@ -361,7 +361,7 @@ static const struct {
 /* Mux table for programmable clocks. */
 static u32 sama7g5_prog_mux_table[] = { 0, 1, 2, 5, 6, 7, 8, 9, 10, };
 
-/**
+/*
  * Peripheral clock description
  * @n:		clock name
  * @p:		clock parent name
@@ -449,7 +449,7 @@ static const struct {
 	{ .n = "uhphs_clk",	.p = "mck1", .id = 106, },
 };
 
-/**
+/*
  * Generic clock description
  * @n:			clock name
  * @pp:			PLL parents
diff --git a/drivers/clk/bcm/clk-bcm2835.c b/drivers/clk/bcm/clk-bcm2835.c
index 1ac803e14fa3..a254512965eb 100644
--- a/drivers/clk/bcm/clk-bcm2835.c
+++ b/drivers/clk/bcm/clk-bcm2835.c
@@ -805,11 +805,10 @@ static int bcm2835_pll_divider_is_on(struct clk_hw *hw)
 	return !(cprman_read(cprman, data->a2w_reg) & A2W_PLL_CHANNEL_DISABLE);
 }
 
-static long bcm2835_pll_divider_round_rate(struct clk_hw *hw,
-					   unsigned long rate,
-					   unsigned long *parent_rate)
+static int bcm2835_pll_divider_determine_rate(struct clk_hw *hw,
+					      struct clk_rate_request *req)
 {
-	return clk_divider_ops.round_rate(hw, rate, parent_rate);
+	return clk_divider_ops.determine_rate(hw, req);
 }
 
 static unsigned long bcm2835_pll_divider_get_rate(struct clk_hw *hw,
@@ -901,7 +900,7 @@ static const struct clk_ops bcm2835_pll_divider_clk_ops = {
 	.unprepare = bcm2835_pll_divider_off,
 	.recalc_rate = bcm2835_pll_divider_get_rate,
 	.set_rate = bcm2835_pll_divider_set_rate,
-	.round_rate = bcm2835_pll_divider_round_rate,
+	.determine_rate = bcm2835_pll_divider_determine_rate,
 	.debug_init = bcm2835_pll_divider_debug_init,
 };
 
diff --git a/drivers/clk/clk-divider.c b/drivers/clk/clk-divider.c
index 87ba4966b0e8..f6b2bf558486 100644
--- a/drivers/clk/clk-divider.c
+++ b/drivers/clk/clk-divider.c
@@ -446,6 +446,27 @@ static long clk_divider_round_rate(struct clk_hw *hw, unsigned long rate,
 				  divider->width, divider->flags);
 }
 
+static int clk_divider_determine_rate(struct clk_hw *hw,
+				      struct clk_rate_request *req)
+{
+	struct clk_divider *divider = to_clk_divider(hw);
+
+	/* if read only, just return current value */
+	if (divider->flags & CLK_DIVIDER_READ_ONLY) {
+		u32 val;
+
+		val = clk_div_readl(divider) >> divider->shift;
+		val &= clk_div_mask(divider->width);
+
+		return divider_ro_determine_rate(hw, req, divider->table,
+						 divider->width,
+						 divider->flags, val);
+	}
+
+	return divider_determine_rate(hw, req, divider->table, divider->width,
+				      divider->flags);
+}
+
 int divider_get_val(unsigned long rate, unsigned long parent_rate,
 		    const struct clk_div_table *table, u8 width,
 		    unsigned long flags)
@@ -501,6 +522,7 @@ static int clk_divider_set_rate(struct clk_hw *hw, unsigned long rate,
 const struct clk_ops clk_divider_ops = {
 	.recalc_rate = clk_divider_recalc_rate,
 	.round_rate = clk_divider_round_rate,
+	.determine_rate = clk_divider_determine_rate,
 	.set_rate = clk_divider_set_rate,
 };
 EXPORT_SYMBOL_GPL(clk_divider_ops);
@@ -508,6 +530,7 @@ EXPORT_SYMBOL_GPL(clk_divider_ops);
 const struct clk_ops clk_divider_ro_ops = {
 	.recalc_rate = clk_divider_recalc_rate,
 	.round_rate = clk_divider_round_rate,
+	.determine_rate = clk_divider_determine_rate,
 };
 EXPORT_SYMBOL_GPL(clk_divider_ro_ops);
 
diff --git a/drivers/clk/clk-fractional-divider.c b/drivers/clk/clk-fractional-divider.c
index b1e556f20911..4274540327ce 100644
--- a/drivers/clk/clk-fractional-divider.c
+++ b/drivers/clk/clk-fractional-divider.c
@@ -3,8 +3,39 @@
  * Copyright (C) 2014 Intel Corporation
  *
  * Adjustable fractional divider clock implementation.
- * Output rate = (m / n) * parent_rate.
  * Uses rational best approximation algorithm.
+ *
+ * Output is calculated as
+ *
+ *	rate = (m / n) * parent_rate				(1)
+ *
+ * This is useful when we have a prescaler block which asks for
+ * m (numerator) and n (denominator) values to be provided to satisfy
+ * the (1) as much as possible.
+ *
+ * Since m and n have the limitation by a range, e.g.
+ *
+ *	n >= 1, n < N_width, where N_width = 2^nwidth		(2)
+ *
+ * for some cases the output may be saturated. Hence, from (1) and (2),
+ * assuming the worst case when m = 1, the inequality
+ *
+ *	floor(log2(parent_rate / rate)) <= nwidth		(3)
+ *
+ * may be derived. Thus, in cases when
+ *
+ *	(parent_rate / rate) >> N_width				(4)
+ *
+ * we might scale up the rate by 2^scale (see the description of
+ * CLK_FRAC_DIVIDER_POWER_OF_TWO_PS for additional information), where
+ *
+ *	scale = floor(log2(parent_rate / rate)) - nwidth	(5)
+ *
+ * and assume that the IP, that needs m and n, has also its own
+ * prescaler, which is capable to divide by 2^scale. In this way
+ * we get the denominator to satisfy the desired range (2) and
+ * at the same time much much better result of m and n than simple
+ * saturated values.
  */
 
 #include <linux/clk-provider.h>
@@ -14,6 +45,8 @@
 #include <linux/slab.h>
 #include <linux/rational.h>
 
+#include "clk-fractional-divider.h"
+
 static inline u32 clk_fd_readl(struct clk_fractional_divider *fd)
 {
 	if (fd->flags & CLK_FRAC_DIVIDER_BIG_ENDIAN)
@@ -68,21 +101,26 @@ static unsigned long clk_fd_recalc_rate(struct clk_hw *hw,
 	return ret;
 }
 
-static void clk_fd_general_approximation(struct clk_hw *hw, unsigned long rate,
-					 unsigned long *parent_rate,
-					 unsigned long *m, unsigned long *n)
+void clk_fractional_divider_general_approximation(struct clk_hw *hw,
+						  unsigned long rate,
+						  unsigned long *parent_rate,
+						  unsigned long *m, unsigned long *n)
 {
 	struct clk_fractional_divider *fd = to_clk_fd(hw);
-	unsigned long scale;
 
 	/*
 	 * Get rate closer to *parent_rate to guarantee there is no overflow
 	 * for m and n. In the result it will be the nearest rate left shifted
 	 * by (scale - fd->nwidth) bits.
+	 *
+	 * For the detailed explanation see the top comment in this file.
 	 */
-	scale = fls_long(*parent_rate / rate - 1);
-	if (scale > fd->nwidth)
-		rate <<= scale - fd->nwidth;
+	if (fd->flags & CLK_FRAC_DIVIDER_POWER_OF_TWO_PS) {
+		unsigned long scale = fls_long(*parent_rate / rate - 1);
+
+		if (scale > fd->nwidth)
+			rate <<= scale - fd->nwidth;
+	}
 
 	rational_best_approximation(rate, *parent_rate,
 			GENMASK(fd->mwidth - 1, 0), GENMASK(fd->nwidth - 1, 0),
@@ -102,7 +140,7 @@ static long clk_fd_round_rate(struct clk_hw *hw, unsigned long rate,
 	if (fd->approximation)
 		fd->approximation(hw, rate, parent_rate, &m, &n);
 	else
-		clk_fd_general_approximation(hw, rate, parent_rate, &m, &n);
+		clk_fractional_divider_general_approximation(hw, rate, parent_rate, &m, &n);
 
 	ret = (u64)*parent_rate * m;
 	do_div(ret, n);
diff --git a/drivers/clk/clk-fractional-divider.h b/drivers/clk/clk-fractional-divider.h
new file mode 100644
index 000000000000..f0f71d23797b
--- /dev/null
+++ b/drivers/clk/clk-fractional-divider.h
@@ -0,0 +1,15 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef _CLK_FRACTIONAL_DIV_H
+#define _CLK_FRACTIONAL_DIV_H
+
+struct clk_hw;
+
+extern const struct clk_ops clk_fractional_divider_ops;
+
+void clk_fractional_divider_general_approximation(struct clk_hw *hw,
+						  unsigned long rate,
+						  unsigned long *parent_rate,
+						  unsigned long *m,
+						  unsigned long *n);
+
+#endif
diff --git a/drivers/clk/clk-lmk04832.c b/drivers/clk/clk-lmk04832.c
index c7a3a029fb1e..8f02c0b88000 100644
--- a/drivers/clk/clk-lmk04832.c
+++ b/drivers/clk/clk-lmk04832.c
@@ -269,23 +269,14 @@ static bool lmk04832_regmap_rd_regs(struct device *dev, unsigned int reg)
 {
 	switch (reg) {
 	case LMK04832_REG_RST3W ... LMK04832_REG_ID_MASKREV:
-		fallthrough;
 	case LMK04832_REG_ID_VNDR_MSB:
-		fallthrough;
 	case LMK04832_REG_ID_VNDR_LSB:
-		fallthrough;
 	case LMK04832_REG_CLKOUT_CTRL0(0) ... LMK04832_REG_PLL2_DLD_CNT_LSB:
-		fallthrough;
 	case LMK04832_REG_PLL2_LD:
-		fallthrough;
 	case LMK04832_REG_PLL2_PD:
-		fallthrough;
 	case LMK04832_REG_PLL1R_RST:
-		fallthrough;
 	case LMK04832_REG_CLR_PLL_LOST ... LMK04832_REG_RB_DAC_VAL_LSB:
-		fallthrough;
 	case LMK04832_REG_RB_HOLDOVER:
-		fallthrough;
 	case LMK04832_REG_SPI_LOCK:
 		return true;
 	default:
@@ -297,27 +288,18 @@ static bool lmk04832_regmap_wr_regs(struct device *dev, unsigned int reg)
 {
 	switch (reg) {
 	case LMK04832_REG_RST3W:
-		fallthrough;
 	case LMK04832_REG_POWERDOWN:
 		return true;
 	case LMK04832_REG_ID_DEV_TYPE ... LMK04832_REG_ID_MASKREV:
-		fallthrough;
 	case LMK04832_REG_ID_VNDR_MSB:
-		fallthrough;
 	case LMK04832_REG_ID_VNDR_LSB:
 		return false;
 	case LMK04832_REG_CLKOUT_CTRL0(0) ... LMK04832_REG_PLL2_DLD_CNT_LSB:
-		fallthrough;
 	case LMK04832_REG_PLL2_LD:
-		fallthrough;
 	case LMK04832_REG_PLL2_PD:
-		fallthrough;
 	case LMK04832_REG_PLL1R_RST:
-		fallthrough;
 	case LMK04832_REG_CLR_PLL_LOST ... LMK04832_REG_RB_DAC_VAL_LSB:
-		fallthrough;
 	case LMK04832_REG_RB_HOLDOVER:
-		fallthrough;
 	case LMK04832_REG_SPI_LOCK:
 		return true;
 	default:
diff --git a/drivers/clk/clk-palmas.c b/drivers/clk/clk-palmas.c
index e41a3a9f7528..b8c3d0da1918 100644
--- a/drivers/clk/clk-palmas.c
+++ b/drivers/clk/clk-palmas.c
@@ -1,3 +1,4 @@
+// SPDX-License-Identifier: GPL-2.0
 /*
  * Clock driver for Palmas device.
  *
@@ -6,15 +7,6 @@
  *
  * Author:	Laxman Dewangan <ldewangan@nvidia.com>
  *		Peter Ujfalusi <peter.ujfalusi@ti.com>
- *
- * This program is free software; you can redistribute it and/or
- * modify it under the terms of the GNU General Public License as
- * published by the Free Software Foundation version 2.
- *
- * This program is distributed "as is" WITHOUT ANY WARRANTY of any kind,
- * whether express or implied; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- * General Public License for more details.
  */
 
 #include <linux/clk.h>
diff --git a/drivers/clk/clk-stm32f4.c b/drivers/clk/clk-stm32f4.c
index 5c75e3d906c2..af46176ad053 100644
--- a/drivers/clk/clk-stm32f4.c
+++ b/drivers/clk/clk-stm32f4.c
@@ -709,10 +709,10 @@ static unsigned long stm32f4_pll_div_recalc_rate(struct clk_hw *hw,
 	return clk_divider_ops.recalc_rate(hw, parent_rate);
 }
 
-static long stm32f4_pll_div_round_rate(struct clk_hw *hw, unsigned long rate,
-				unsigned long *prate)
+static int stm32f4_pll_div_determine_rate(struct clk_hw *hw,
+					  struct clk_rate_request *req)
 {
-	return clk_divider_ops.round_rate(hw, rate, prate);
+	return clk_divider_ops.determine_rate(hw, req);
 }
 
 static int stm32f4_pll_div_set_rate(struct clk_hw *hw, unsigned long rate,
@@ -738,7 +738,7 @@ static int stm32f4_pll_div_set_rate(struct clk_hw *hw, unsigned long rate,
 
 static const struct clk_ops stm32f4_pll_div_ops = {
 	.recalc_rate = stm32f4_pll_div_recalc_rate,
-	.round_rate = stm32f4_pll_div_round_rate,
+	.determine_rate = stm32f4_pll_div_determine_rate,
 	.set_rate = stm32f4_pll_div_set_rate,
 };
 
diff --git a/drivers/clk/clk-stm32h7.c b/drivers/clk/clk-stm32h7.c
index 0ea7261d15e0..1a701eada0c1 100644
--- a/drivers/clk/clk-stm32h7.c
+++ b/drivers/clk/clk-stm32h7.c
@@ -845,10 +845,10 @@ static unsigned long odf_divider_recalc_rate(struct clk_hw *hw,
 	return clk_divider_ops.recalc_rate(hw, parent_rate);
 }
 
-static long odf_divider_round_rate(struct clk_hw *hw, unsigned long rate,
-		unsigned long *prate)
+static int odf_divider_determine_rate(struct clk_hw *hw,
+				      struct clk_rate_request *req)
 {
-	return clk_divider_ops.round_rate(hw, rate, prate);
+	return clk_divider_ops.determine_rate(hw, req);
 }
 
 static int odf_divider_set_rate(struct clk_hw *hw, unsigned long rate,
@@ -875,7 +875,7 @@ static int odf_divider_set_rate(struct clk_hw *hw, unsigned long rate,
 
 static const struct clk_ops odf_divider_ops = {
 	.recalc_rate	= odf_divider_recalc_rate,
-	.round_rate	= odf_divider_round_rate,
+	.determine_rate	= odf_divider_determine_rate,
 	.set_rate	= odf_divider_set_rate,
 };
 
diff --git a/drivers/clk/clk-stm32mp1.c b/drivers/clk/clk-stm32mp1.c
index 256575bd29b9..4bd1fe7d8af4 100644
--- a/drivers/clk/clk-stm32mp1.c
+++ b/drivers/clk/clk-stm32mp1.c
@@ -1076,14 +1076,10 @@ static int clk_divider_rtc_set_rate(struct clk_hw *hw, unsigned long rate,
 
 static int clk_divider_rtc_determine_rate(struct clk_hw *hw, struct clk_rate_request *req)
 {
-	unsigned long best_parent_rate = req->best_parent_rate;
+	if (req->best_parent_hw == clk_hw_get_parent_by_index(hw, HSE_RTC))
+		return clk_divider_ops.determine_rate(hw, req);
 
-	if (req->best_parent_hw == clk_hw_get_parent_by_index(hw, HSE_RTC)) {
-		req->rate = clk_divider_ops.round_rate(hw, req->rate, &best_parent_rate);
-		req->best_parent_rate = best_parent_rate;
-	} else {
-		req->rate = best_parent_rate;
-	}
+	req->rate = req->best_parent_rate;
 
 	return 0;
 }
diff --git a/drivers/clk/clk-versaclock5.c b/drivers/clk/clk-versaclock5.c
index 3c737742c2a9..c6d3b1ab3d55 100644
--- a/drivers/clk/clk-versaclock5.c
+++ b/drivers/clk/clk-versaclock5.c
@@ -907,6 +907,7 @@ static const struct of_device_id clk_vc5_of_match[];
 
 static int vc5_probe(struct i2c_client *client, const struct i2c_device_id *id)
 {
+	unsigned int oe, sd, src_mask = 0, src_val = 0;
 	struct vc5_driver_data *vc5;
 	struct clk_init_data init;
 	const char *parent_names[2];
@@ -930,11 +931,33 @@ static int vc5_probe(struct i2c_client *client, const struct i2c_device_id *id)
 		return -EPROBE_DEFER;
 
 	vc5->regmap = devm_regmap_init_i2c(client, &vc5_regmap_config);
-	if (IS_ERR(vc5->regmap)) {
-		dev_err(&client->dev, "failed to allocate register map\n");
-		return PTR_ERR(vc5->regmap);
+	if (IS_ERR(vc5->regmap))
+		return dev_err_probe(&client->dev, PTR_ERR(vc5->regmap),
+				     "failed to allocate register map\n");
+
+	ret = of_property_read_u32(client->dev.of_node, "idt,shutdown", &sd);
+	if (!ret) {
+		src_mask |= VC5_PRIM_SRC_SHDN_EN_GBL_SHDN;
+		if (sd)
+			src_val |= VC5_PRIM_SRC_SHDN_EN_GBL_SHDN;
+	} else if (ret != -EINVAL) {
+		return dev_err_probe(&client->dev, ret,
+				     "could not read idt,shutdown\n");
 	}
 
+	ret = of_property_read_u32(client->dev.of_node,
+				   "idt,output-enable-active", &oe);
+	if (!ret) {
+		src_mask |= VC5_PRIM_SRC_SHDN_SP;
+		if (oe)
+			src_val |= VC5_PRIM_SRC_SHDN_SP;
+	} else if (ret != -EINVAL) {
+		return dev_err_probe(&client->dev, ret,
+				     "could not read idt,output-enable-active\n");
+	}
+
+	regmap_update_bits(vc5->regmap, VC5_PRIM_SRC_SHDN, src_mask, src_val);
+
 	/* Register clock input mux */
 	memset(&init, 0, sizeof(init));
 
@@ -957,10 +980,9 @@ static int vc5_probe(struct i2c_client *client, const struct i2c_device_id *id)
 		    __clk_get_name(vc5->pin_clkin);
 	}
 
-	if (!init.num_parents) {
-		dev_err(&client->dev, "no input clock specified!\n");
-		return -EINVAL;
-	}
+	if (!init.num_parents)
+		return dev_err_probe(&client->dev, -EINVAL,
+				     "no input clock specified!\n");
 
 	/* Configure Optional Loading Capacitance for external XTAL */
 	if (!(vc5->chip_info->flags & VC5_HAS_INTERNAL_XTAL)) {
@@ -1099,14 +1121,16 @@ static int vc5_probe(struct i2c_client *client, const struct i2c_device_id *id)
 
 	ret = of_clk_add_hw_provider(client->dev.of_node, vc5_of_clk_get, vc5);
 	if (ret) {
-		dev_err(&client->dev, "unable to add clk provider\n");
+		dev_err_probe(&client->dev, ret,
+			      "unable to add clk provider\n");
 		goto err_clk;
 	}
 
 	return 0;
 
 err_clk_register:
-	dev_err(&client->dev, "unable to register %s\n", init.name);
+	dev_err_probe(&client->dev, ret,
+		      "unable to register %s\n", init.name);
 	kfree(init.name); /* clock framework made a copy of the name */
 err_clk:
 	if (vc5->chip_info->flags & VC5_HAS_INTERNAL_XTAL)
diff --git a/drivers/clk/imx/clk-composite-7ulp.c b/drivers/clk/imx/clk-composite-7ulp.c
index 7c4f31b31eb0..d85ba78abbb1 100644
--- a/drivers/clk/imx/clk-composite-7ulp.c
+++ b/drivers/clk/imx/clk-composite-7ulp.c
@@ -10,6 +10,7 @@
 #include <linux/err.h>
 #include <linux/slab.h>
 
+#include "../clk-fractional-divider.h"
 #include "clk.h"
 
 #define PCG_PCS_SHIFT	24
diff --git a/drivers/clk/imx/clk-composite-8m.c b/drivers/clk/imx/clk-composite-8m.c
index 2c309e3dc8e3..04e728538cef 100644
--- a/drivers/clk/imx/clk-composite-8m.c
+++ b/drivers/clk/imx/clk-composite-8m.c
@@ -216,7 +216,8 @@ struct clk_hw *imx8m_clk_hw_composite_flags(const char *name,
 		div->width = PCG_PREDIV_WIDTH;
 		divider_ops = &imx8m_clk_composite_divider_ops;
 		mux_ops = &clk_mux_ops;
-		flags |= CLK_SET_PARENT_GATE;
+		if (!(composite_flags & IMX_COMPOSITE_FW_MANAGED))
+			flags |= CLK_SET_PARENT_GATE;
 	}
 
 	div->lock = &imx_ccm_lock;
diff --git a/drivers/clk/imx/clk-divider-gate.c b/drivers/clk/imx/clk-divider-gate.c
index 0322a843d245..26b210cba9be 100644
--- a/drivers/clk/imx/clk-divider-gate.c
+++ b/drivers/clk/imx/clk-divider-gate.c
@@ -64,10 +64,10 @@ static unsigned long clk_divider_gate_recalc_rate(struct clk_hw *hw,
 				   div->flags, div->width);
 }
 
-static long clk_divider_round_rate(struct clk_hw *hw, unsigned long rate,
-				   unsigned long *prate)
+static int clk_divider_determine_rate(struct clk_hw *hw,
+				      struct clk_rate_request *req)
 {
-	return clk_divider_ops.round_rate(hw, rate, prate);
+	return clk_divider_ops.determine_rate(hw, req);
 }
 
 static int clk_divider_gate_set_rate(struct clk_hw *hw, unsigned long rate,
@@ -154,12 +154,12 @@ static int clk_divider_is_enabled(struct clk_hw *hw)
 
 static const struct clk_ops clk_divider_gate_ro_ops = {
 	.recalc_rate = clk_divider_gate_recalc_rate_ro,
-	.round_rate = clk_divider_round_rate,
+	.determine_rate = clk_divider_determine_rate,
 };
 
 static const struct clk_ops clk_divider_gate_ops = {
 	.recalc_rate = clk_divider_gate_recalc_rate,
-	.round_rate = clk_divider_round_rate,
+	.determine_rate = clk_divider_determine_rate,
 	.set_rate = clk_divider_gate_set_rate,
 	.enable = clk_divider_enable,
 	.disable = clk_divider_disable,
diff --git a/drivers/clk/imx/clk-imx8mm.c b/drivers/clk/imx/clk-imx8mm.c
index f1919fafb124..e92621fa8b9c 100644
--- a/drivers/clk/imx/clk-imx8mm.c
+++ b/drivers/clk/imx/clk-imx8mm.c
@@ -407,10 +407,10 @@ static int imx8mm_clocks_probe(struct platform_device *pdev)
 	hws[IMX8MM_SYS_PLL2_500M] = imx_clk_hw_fixed_factor("sys_pll2_500m", "sys_pll2_500m_cg", 1, 2);
 	hws[IMX8MM_SYS_PLL2_1000M] = imx_clk_hw_fixed_factor("sys_pll2_1000m", "sys_pll2_out", 1, 1);
 
-	hws[IMX8MM_CLK_CLKOUT1_SEL] = imx_clk_hw_mux("clkout1_sel", base + 0x128, 4, 4, clkout_sels, ARRAY_SIZE(clkout_sels));
+	hws[IMX8MM_CLK_CLKOUT1_SEL] = imx_clk_hw_mux2("clkout1_sel", base + 0x128, 4, 4, clkout_sels, ARRAY_SIZE(clkout_sels));
 	hws[IMX8MM_CLK_CLKOUT1_DIV] = imx_clk_hw_divider("clkout1_div", "clkout1_sel", base + 0x128, 0, 4);
 	hws[IMX8MM_CLK_CLKOUT1] = imx_clk_hw_gate("clkout1", "clkout1_div", base + 0x128, 8);
-	hws[IMX8MM_CLK_CLKOUT2_SEL] = imx_clk_hw_mux("clkout2_sel", base + 0x128, 20, 4, clkout_sels, ARRAY_SIZE(clkout_sels));
+	hws[IMX8MM_CLK_CLKOUT2_SEL] = imx_clk_hw_mux2("clkout2_sel", base + 0x128, 20, 4, clkout_sels, ARRAY_SIZE(clkout_sels));
 	hws[IMX8MM_CLK_CLKOUT2_DIV] = imx_clk_hw_divider("clkout2_div", "clkout2_sel", base + 0x128, 16, 4);
 	hws[IMX8MM_CLK_CLKOUT2] = imx_clk_hw_gate("clkout2", "clkout2_div", base + 0x128, 24);
 
@@ -470,10 +470,11 @@ static int imx8mm_clocks_probe(struct platform_device *pdev)
 
 	/*
 	 * DRAM clocks are manipulated from TF-A outside clock framework.
-	 * Mark with GET_RATE_NOCACHE to always read div value from hardware
+	 * The fw_managed helper sets GET_RATE_NOCACHE and clears SET_PARENT_GATE
+	 * as div value should always be read from hardware
 	 */
-	hws[IMX8MM_CLK_DRAM_ALT] = __imx8m_clk_hw_composite("dram_alt", imx8mm_dram_alt_sels, base + 0xa000, CLK_GET_RATE_NOCACHE);
-	hws[IMX8MM_CLK_DRAM_APB] = __imx8m_clk_hw_composite("dram_apb", imx8mm_dram_apb_sels, base + 0xa080, CLK_IS_CRITICAL | CLK_GET_RATE_NOCACHE);
+	hws[IMX8MM_CLK_DRAM_ALT] = imx8m_clk_hw_fw_managed_composite("dram_alt", imx8mm_dram_alt_sels, base + 0xa000);
+	hws[IMX8MM_CLK_DRAM_APB] = imx8m_clk_hw_fw_managed_composite_critical("dram_apb", imx8mm_dram_apb_sels, base + 0xa080);
 
 	/* IP */
 	hws[IMX8MM_CLK_VPU_G1] = imx8m_clk_hw_composite("vpu_g1", imx8mm_vpu_g1_sels, base + 0xa100);
diff --git a/drivers/clk/imx/clk-imx8mn.c b/drivers/clk/imx/clk-imx8mn.c
index 88f6630cd472..c55577604e16 100644
--- a/drivers/clk/imx/clk-imx8mn.c
+++ b/drivers/clk/imx/clk-imx8mn.c
@@ -40,6 +40,9 @@ static const char * const imx8mn_a53_sels[] = {"osc_24m", "arm_pll_out", "sys_pl
 
 static const char * const imx8mn_a53_core_sels[] = {"arm_a53_div", "arm_pll_out", };
 
+static const char * const imx8mn_m7_sels[] = {"osc_24m", "sys_pll2_200m", "sys_pll2_250m", "vpu_pll_out",
+				       "sys_pll1_800m", "audio_pll1_out", "video_pll1_out", "sys_pll3_out", };
+
 static const char * const imx8mn_gpu_core_sels[] = {"osc_24m", "gpu_pll_out", "sys_pll1_800m",
 						    "sys_pll3_out", "sys_pll2_1000m", "audio_pll1_out",
 						    "video_pll1_out", "audio_pll2_out", };
@@ -402,10 +405,10 @@ static int imx8mn_clocks_probe(struct platform_device *pdev)
 	hws[IMX8MN_SYS_PLL2_500M] = imx_clk_hw_fixed_factor("sys_pll2_500m", "sys_pll2_500m_cg", 1, 2);
 	hws[IMX8MN_SYS_PLL2_1000M] = imx_clk_hw_fixed_factor("sys_pll2_1000m", "sys_pll2_out", 1, 1);
 
-	hws[IMX8MN_CLK_CLKOUT1_SEL] = imx_clk_hw_mux("clkout1_sel", base + 0x128, 4, 4, clkout_sels, ARRAY_SIZE(clkout_sels));
+	hws[IMX8MN_CLK_CLKOUT1_SEL] = imx_clk_hw_mux2("clkout1_sel", base + 0x128, 4, 4, clkout_sels, ARRAY_SIZE(clkout_sels));
 	hws[IMX8MN_CLK_CLKOUT1_DIV] = imx_clk_hw_divider("clkout1_div", "clkout1_sel", base + 0x128, 0, 4);
 	hws[IMX8MN_CLK_CLKOUT1] = imx_clk_hw_gate("clkout1", "clkout1_div", base + 0x128, 8);
-	hws[IMX8MN_CLK_CLKOUT2_SEL] = imx_clk_hw_mux("clkout2_sel", base + 0x128, 20, 4, clkout_sels, ARRAY_SIZE(clkout_sels));
+	hws[IMX8MN_CLK_CLKOUT2_SEL] = imx_clk_hw_mux2("clkout2_sel", base + 0x128, 20, 4, clkout_sels, ARRAY_SIZE(clkout_sels));
 	hws[IMX8MN_CLK_CLKOUT2_DIV] = imx_clk_hw_divider("clkout2_div", "clkout2_sel", base + 0x128, 16, 4);
 	hws[IMX8MN_CLK_CLKOUT2] = imx_clk_hw_gate("clkout2", "clkout2_div", base + 0x128, 24);
 
@@ -421,6 +424,8 @@ static int imx8mn_clocks_probe(struct platform_device *pdev)
 	hws[IMX8MN_CLK_A53_SRC] = hws[IMX8MN_CLK_A53_DIV];
 	hws[IMX8MN_CLK_A53_CG] = hws[IMX8MN_CLK_A53_DIV];
 
+	hws[IMX8MN_CLK_M7_CORE] = imx8m_clk_hw_composite_core("arm_m7_core", imx8mn_m7_sels, base + 0x8080);
+
 	hws[IMX8MN_CLK_GPU_CORE] = imx8m_clk_hw_composite_core("gpu_core", imx8mn_gpu_core_sels, base + 0x8180);
 	hws[IMX8MN_CLK_GPU_SHADER] = imx8m_clk_hw_composite_core("gpu_shader", imx8mn_gpu_shader_sels, base + 0x8200);
 
@@ -453,10 +458,11 @@ static int imx8mn_clocks_probe(struct platform_device *pdev)
 
 	/*
 	 * DRAM clocks are manipulated from TF-A outside clock framework.
-	 * Mark with GET_RATE_NOCACHE to always read div value from hardware
+	 * The fw_managed helper sets GET_RATE_NOCACHE and clears SET_PARENT_GATE
+	 * as div value should always be read from hardware
 	 */
-	hws[IMX8MN_CLK_DRAM_ALT] = __imx8m_clk_hw_composite("dram_alt", imx8mn_dram_alt_sels, base + 0xa000, CLK_GET_RATE_NOCACHE);
-	hws[IMX8MN_CLK_DRAM_APB] = __imx8m_clk_hw_composite("dram_apb", imx8mn_dram_apb_sels, base + 0xa080, CLK_IS_CRITICAL | CLK_GET_RATE_NOCACHE);
+	hws[IMX8MN_CLK_DRAM_ALT] = imx8m_clk_hw_fw_managed_composite("dram_alt", imx8mn_dram_alt_sels, base + 0xa000);
+	hws[IMX8MN_CLK_DRAM_APB] = imx8m_clk_hw_fw_managed_composite_critical("dram_apb", imx8mn_dram_apb_sels, base + 0xa080);
 
 	hws[IMX8MN_CLK_DISP_PIXEL] = imx8m_clk_hw_composite("disp_pixel", imx8mn_disp_pixel_sels, base + 0xa500);
 	hws[IMX8MN_CLK_SAI2] = imx8m_clk_hw_composite("sai2", imx8mn_sai2_sels, base + 0xa600);
diff --git a/drivers/clk/imx/clk-imx8mq.c b/drivers/clk/imx/clk-imx8mq.c
index c491bc9c61ce..83cc2b1c3294 100644
--- a/drivers/clk/imx/clk-imx8mq.c
+++ b/drivers/clk/imx/clk-imx8mq.c
@@ -449,11 +449,12 @@ static int imx8mq_clocks_probe(struct platform_device *pdev)
 
 	/*
 	 * DRAM clocks are manipulated from TF-A outside clock framework.
-	 * Mark with GET_RATE_NOCACHE to always read div value from hardware
+	 * The fw_managed helper sets GET_RATE_NOCACHE and clears SET_PARENT_GATE
+	 * as div value should always be read from hardware
 	 */
 	hws[IMX8MQ_CLK_DRAM_CORE] = imx_clk_hw_mux2_flags("dram_core_clk", base + 0x9800, 24, 1, imx8mq_dram_core_sels, ARRAY_SIZE(imx8mq_dram_core_sels), CLK_IS_CRITICAL);
-	hws[IMX8MQ_CLK_DRAM_ALT] = __imx8m_clk_hw_composite("dram_alt", imx8mq_dram_alt_sels, base + 0xa000, CLK_GET_RATE_NOCACHE);
-	hws[IMX8MQ_CLK_DRAM_APB] = __imx8m_clk_hw_composite("dram_apb", imx8mq_dram_apb_sels, base + 0xa080, CLK_IS_CRITICAL | CLK_GET_RATE_NOCACHE);
+	hws[IMX8MQ_CLK_DRAM_ALT] = imx8m_clk_hw_fw_managed_composite("dram_alt", imx8mq_dram_alt_sels, base + 0xa000);
+	hws[IMX8MQ_CLK_DRAM_APB] = imx8m_clk_hw_fw_managed_composite_critical("dram_apb", imx8mq_dram_apb_sels, base + 0xa080);
 
 	/* IP */
 	hws[IMX8MQ_CLK_VPU_G1] = imx8m_clk_hw_composite("vpu_g1", imx8mq_vpu_g1_sels, base + 0xa100);
diff --git a/drivers/clk/imx/clk.h b/drivers/clk/imx/clk.h
index 7571603bee23..e144f983fd8c 100644
--- a/drivers/clk/imx/clk.h
+++ b/drivers/clk/imx/clk.h
@@ -530,8 +530,9 @@ struct clk_hw *imx_clk_hw_cpu(const char *name, const char *parent_name,
 		struct clk *div, struct clk *mux, struct clk *pll,
 		struct clk *step);
 
-#define IMX_COMPOSITE_CORE	BIT(0)
-#define IMX_COMPOSITE_BUS	BIT(1)
+#define IMX_COMPOSITE_CORE		BIT(0)
+#define IMX_COMPOSITE_BUS		BIT(1)
+#define IMX_COMPOSITE_FW_MANAGED	BIT(2)
 
 struct clk_hw *imx8m_clk_hw_composite_flags(const char *name,
 					    const char * const *parent_names,
@@ -567,6 +568,17 @@ struct clk_hw *imx8m_clk_hw_composite_flags(const char *name,
 		ARRAY_SIZE(parent_names), reg, 0, \
 		flags | CLK_SET_RATE_NO_REPARENT | CLK_OPS_PARENT_ENABLE)
 
+#define __imx8m_clk_hw_fw_managed_composite(name, parent_names, reg, flags) \
+	imx8m_clk_hw_composite_flags(name, parent_names, \
+		ARRAY_SIZE(parent_names), reg, IMX_COMPOSITE_FW_MANAGED, \
+		flags | CLK_GET_RATE_NOCACHE | CLK_SET_RATE_NO_REPARENT | CLK_OPS_PARENT_ENABLE)
+
+#define imx8m_clk_hw_fw_managed_composite(name, parent_names, reg) \
+	__imx8m_clk_hw_fw_managed_composite(name, parent_names, reg, 0)
+
+#define imx8m_clk_hw_fw_managed_composite_critical(name, parent_names, reg) \
+	__imx8m_clk_hw_fw_managed_composite(name, parent_names, reg, CLK_IS_CRITICAL)
+
 #define __imx8m_clk_composite(name, parent_names, reg, flags) \
 	to_clk(__imx8m_clk_hw_composite(name, parent_names, reg, flags))
 
diff --git a/drivers/clk/mediatek/Kconfig b/drivers/clk/mediatek/Kconfig
index 886e2d9fced5..439b7c8d0d07 100644
--- a/drivers/clk/mediatek/Kconfig
+++ b/drivers/clk/mediatek/Kconfig
@@ -362,41 +362,36 @@ config COMMON_CLK_MT8167
 
 config COMMON_CLK_MT8167_AUDSYS
 	bool "Clock driver for MediaTek MT8167 audsys"
-	depends on (ARCH_MEDIATEK && ARM64) || COMPILE_TEST
-	select COMMON_CLK_MEDIATEK
-	default ARCH_MEDIATEK
+	depends on COMMON_CLK_MT8167
+	default COMMON_CLK_MT8167
 	help
 	  This driver supports MediaTek MT8167 audsys clocks.
 
 config COMMON_CLK_MT8167_IMGSYS
 	bool "Clock driver for MediaTek MT8167 imgsys"
-	depends on (ARCH_MEDIATEK && ARM64) || COMPILE_TEST
-	select COMMON_CLK_MEDIATEK
-	default ARCH_MEDIATEK
+	depends on COMMON_CLK_MT8167
+	default COMMON_CLK_MT8167
 	help
 	  This driver supports MediaTek MT8167 imgsys clocks.
 
 config COMMON_CLK_MT8167_MFGCFG
 	bool "Clock driver for MediaTek MT8167 mfgcfg"
-	depends on (ARCH_MEDIATEK && ARM64) || COMPILE_TEST
-	select COMMON_CLK_MEDIATEK
-	default ARCH_MEDIATEK
+	depends on COMMON_CLK_MT8167
+	default COMMON_CLK_MT8167
 	help
 	  This driver supports MediaTek MT8167 mfgcfg clocks.
 
 config COMMON_CLK_MT8167_MMSYS
 	bool "Clock driver for MediaTek MT8167 mmsys"
-	depends on (ARCH_MEDIATEK && ARM64) || COMPILE_TEST
-	select COMMON_CLK_MEDIATEK
-	default ARCH_MEDIATEK
+	depends on COMMON_CLK_MT8167
+	default COMMON_CLK_MT8167
 	help
 	  This driver supports MediaTek MT8167 mmsys clocks.
 
 config COMMON_CLK_MT8167_VDECSYS
 	bool "Clock driver for MediaTek MT8167 vdecsys"
-	depends on (ARCH_MEDIATEK && ARM64) || COMPILE_TEST
-	select COMMON_CLK_MEDIATEK
-	default ARCH_MEDIATEK
+	depends on COMMON_CLK_MT8167
+	default COMMON_CLK_MT8167
 	help
 	  This driver supports MediaTek MT8167 vdecsys clocks.
 
@@ -500,6 +495,86 @@ config COMMON_CLK_MT8183_VENCSYS
 	help
 	  This driver supports MediaTek MT8183 vencsys clocks.
 
+config COMMON_CLK_MT8192
+	bool "Clock driver for MediaTek MT8192"
+	depends on ARM64 || COMPILE_TEST
+	select COMMON_CLK_MEDIATEK
+	default ARM64
+	help
+	  This driver supports MediaTek MT8192 basic clocks.
+
+config COMMON_CLK_MT8192_AUDSYS
+	bool "Clock driver for MediaTek MT8192 audsys"
+	depends on COMMON_CLK_MT8192
+	help
+	  This driver supports MediaTek MT8192 audsys clocks.
+
+config COMMON_CLK_MT8192_CAMSYS
+	bool "Clock driver for MediaTek MT8192 camsys"
+	depends on COMMON_CLK_MT8192
+	help
+	  This driver supports MediaTek MT8192 camsys and camsys_raw clocks.
+
+config COMMON_CLK_MT8192_IMGSYS
+	bool "Clock driver for MediaTek MT8192 imgsys"
+	depends on COMMON_CLK_MT8192
+	help
+	  This driver supports MediaTek MT8192 imgsys and imgsys2 clocks.
+
+config COMMON_CLK_MT8192_IMP_IIC_WRAP
+	bool "Clock driver for MediaTek MT8192 imp_iic_wrap"
+	depends on COMMON_CLK_MT8192
+	help
+	  This driver supports MediaTek MT8192 imp_iic_wrap clocks.
+
+config COMMON_CLK_MT8192_IPESYS
+	bool "Clock driver for MediaTek MT8192 ipesys"
+	depends on COMMON_CLK_MT8192
+	help
+	  This driver supports MediaTek MT8192 ipesys clocks.
+
+config COMMON_CLK_MT8192_MDPSYS
+	bool "Clock driver for MediaTek MT8192 mdpsys"
+	depends on COMMON_CLK_MT8192
+	help
+	  This driver supports MediaTek MT8192 mdpsys clocks.
+
+config COMMON_CLK_MT8192_MFGCFG
+	bool "Clock driver for MediaTek MT8192 mfgcfg"
+	depends on COMMON_CLK_MT8192
+	help
+	  This driver supports MediaTek MT8192 mfgcfg clocks.
+
+config COMMON_CLK_MT8192_MMSYS
+	bool "Clock driver for MediaTek MT8192 mmsys"
+	depends on COMMON_CLK_MT8192
+	help
+	  This driver supports MediaTek MT8192 mmsys clocks.
+
+config COMMON_CLK_MT8192_MSDC
+	bool "Clock driver for MediaTek MT8192 msdc"
+	depends on COMMON_CLK_MT8192
+	help
+	  This driver supports MediaTek MT8192 msdc and msdc_top clocks.
+
+config COMMON_CLK_MT8192_SCP_ADSP
+	bool "Clock driver for MediaTek MT8192 scp_adsp"
+	depends on COMMON_CLK_MT8192
+	help
+	  This driver supports MediaTek MT8192 scp_adsp clocks.
+
+config COMMON_CLK_MT8192_VDECSYS
+	bool "Clock driver for MediaTek MT8192 vdecsys"
+	depends on COMMON_CLK_MT8192
+	help
+	  This driver supports MediaTek MT8192 vdecsys and vdecsys_soc clocks.
+
+config COMMON_CLK_MT8192_VENCSYS
+	bool "Clock driver for MediaTek MT8192 vencsys"
+	depends on COMMON_CLK_MT8192
+	help
+	  This driver supports MediaTek MT8192 vencsys clocks.
+
 config COMMON_CLK_MT8516
 	bool "Clock driver for MediaTek MT8516"
 	depends on ARCH_MEDIATEK || COMPILE_TEST
diff --git a/drivers/clk/mediatek/Makefile b/drivers/clk/mediatek/Makefile
index 3b0c2be73824..15bc045f0b71 100644
--- a/drivers/clk/mediatek/Makefile
+++ b/drivers/clk/mediatek/Makefile
@@ -67,5 +67,18 @@ obj-$(CONFIG_COMMON_CLK_MT8183_MFGCFG) += clk-mt8183-mfgcfg.o
 obj-$(CONFIG_COMMON_CLK_MT8183_MMSYS) += clk-mt8183-mm.o
 obj-$(CONFIG_COMMON_CLK_MT8183_VDECSYS) += clk-mt8183-vdec.o
 obj-$(CONFIG_COMMON_CLK_MT8183_VENCSYS) += clk-mt8183-venc.o
+obj-$(CONFIG_COMMON_CLK_MT8192) += clk-mt8192.o
+obj-$(CONFIG_COMMON_CLK_MT8192_AUDSYS) += clk-mt8192-aud.o
+obj-$(CONFIG_COMMON_CLK_MT8192_CAMSYS) += clk-mt8192-cam.o
+obj-$(CONFIG_COMMON_CLK_MT8192_IMGSYS) += clk-mt8192-img.o
+obj-$(CONFIG_COMMON_CLK_MT8192_IMP_IIC_WRAP) += clk-mt8192-imp_iic_wrap.o
+obj-$(CONFIG_COMMON_CLK_MT8192_IPESYS) += clk-mt8192-ipe.o
+obj-$(CONFIG_COMMON_CLK_MT8192_MDPSYS) += clk-mt8192-mdp.o
+obj-$(CONFIG_COMMON_CLK_MT8192_MFGCFG) += clk-mt8192-mfg.o
+obj-$(CONFIG_COMMON_CLK_MT8192_MMSYS) += clk-mt8192-mm.o
+obj-$(CONFIG_COMMON_CLK_MT8192_MSDC) += clk-mt8192-msdc.o
+obj-$(CONFIG_COMMON_CLK_MT8192_SCP_ADSP) += clk-mt8192-scp_adsp.o
+obj-$(CONFIG_COMMON_CLK_MT8192_VDECSYS) += clk-mt8192-vdec.o
+obj-$(CONFIG_COMMON_CLK_MT8192_VENCSYS) += clk-mt8192-venc.o
 obj-$(CONFIG_COMMON_CLK_MT8516) += clk-mt8516.o
 obj-$(CONFIG_COMMON_CLK_MT8516_AUDSYS) += clk-mt8516-aud.o
diff --git a/drivers/clk/mediatek/clk-cpumux.c b/drivers/clk/mediatek/clk-cpumux.c
index 79fe09028742..61eeae4e60fb 100644
--- a/drivers/clk/mediatek/clk-cpumux.c
+++ b/drivers/clk/mediatek/clk-cpumux.c
@@ -84,7 +84,7 @@ int mtk_clk_register_cpumuxes(struct device_node *node,
 	struct clk *clk;
 	struct regmap *regmap;
 
-	regmap = syscon_node_to_regmap(node);
+	regmap = device_node_to_regmap(node);
 	if (IS_ERR(regmap)) {
 		pr_err("Cannot find regmap for %pOF: %ld\n", node,
 		       PTR_ERR(regmap));
diff --git a/drivers/clk/mediatek/clk-mt8192-aud.c b/drivers/clk/mediatek/clk-mt8192-aud.c
new file mode 100644
index 000000000000..f28d56628045
--- /dev/null
+++ b/drivers/clk/mediatek/clk-mt8192-aud.c
@@ -0,0 +1,118 @@
+// SPDX-License-Identifier: GPL-2.0-only
+//
+// Copyright (c) 2021 MediaTek Inc.
+// Author: Chun-Jie Chen <chun-jie.chen@mediatek.com>
+
+#include <linux/clk-provider.h>
+#include <linux/of_platform.h>
+#include <linux/platform_device.h>
+
+#include "clk-mtk.h"
+#include "clk-gate.h"
+
+#include <dt-bindings/clock/mt8192-clk.h>
+
+static const struct mtk_gate_regs aud0_cg_regs = {
+	.set_ofs = 0x0,
+	.clr_ofs = 0x0,
+	.sta_ofs = 0x0,
+};
+
+static const struct mtk_gate_regs aud1_cg_regs = {
+	.set_ofs = 0x4,
+	.clr_ofs = 0x4,
+	.sta_ofs = 0x4,
+};
+
+static const struct mtk_gate_regs aud2_cg_regs = {
+	.set_ofs = 0x8,
+	.clr_ofs = 0x8,
+	.sta_ofs = 0x8,
+};
+
+#define GATE_AUD0(_id, _name, _parent, _shift)	\
+	GATE_MTK(_id, _name, _parent, &aud0_cg_regs, _shift, &mtk_clk_gate_ops_no_setclr)
+
+#define GATE_AUD1(_id, _name, _parent, _shift)	\
+	GATE_MTK(_id, _name, _parent, &aud1_cg_regs, _shift, &mtk_clk_gate_ops_no_setclr)
+
+#define GATE_AUD2(_id, _name, _parent, _shift)	\
+	GATE_MTK(_id, _name, _parent, &aud2_cg_regs, _shift, &mtk_clk_gate_ops_no_setclr)
+
+static const struct mtk_gate aud_clks[] = {
+	/* AUD0 */
+	GATE_AUD0(CLK_AUD_AFE, "aud_afe", "audio_sel", 2),
+	GATE_AUD0(CLK_AUD_22M, "aud_22m", "aud_engen1_sel", 8),
+	GATE_AUD0(CLK_AUD_24M, "aud_24m", "aud_engen2_sel", 9),
+	GATE_AUD0(CLK_AUD_APLL2_TUNER, "aud_apll2_tuner", "aud_engen2_sel", 18),
+	GATE_AUD0(CLK_AUD_APLL_TUNER, "aud_apll_tuner", "aud_engen1_sel", 19),
+	GATE_AUD0(CLK_AUD_TDM, "aud_tdm", "aud_1_sel", 20),
+	GATE_AUD0(CLK_AUD_ADC, "aud_adc", "audio_sel", 24),
+	GATE_AUD0(CLK_AUD_DAC, "aud_dac", "audio_sel", 25),
+	GATE_AUD0(CLK_AUD_DAC_PREDIS, "aud_dac_predis", "audio_sel", 26),
+	GATE_AUD0(CLK_AUD_TML, "aud_tml", "audio_sel", 27),
+	GATE_AUD0(CLK_AUD_NLE, "aud_nle", "audio_sel", 28),
+	/* AUD1 */
+	GATE_AUD1(CLK_AUD_I2S1_B, "aud_i2s1_b", "audio_sel", 4),
+	GATE_AUD1(CLK_AUD_I2S2_B, "aud_i2s2_b", "audio_sel", 5),
+	GATE_AUD1(CLK_AUD_I2S3_B, "aud_i2s3_b", "audio_sel", 6),
+	GATE_AUD1(CLK_AUD_I2S4_B, "aud_i2s4_b", "audio_sel", 7),
+	GATE_AUD1(CLK_AUD_CONNSYS_I2S_ASRC, "aud_connsys_i2s_asrc", "audio_sel", 12),
+	GATE_AUD1(CLK_AUD_GENERAL1_ASRC, "aud_general1_asrc", "audio_sel", 13),
+	GATE_AUD1(CLK_AUD_GENERAL2_ASRC, "aud_general2_asrc", "audio_sel", 14),
+	GATE_AUD1(CLK_AUD_DAC_HIRES, "aud_dac_hires", "audio_h_sel", 15),
+	GATE_AUD1(CLK_AUD_ADC_HIRES, "aud_adc_hires", "audio_h_sel", 16),
+	GATE_AUD1(CLK_AUD_ADC_HIRES_TML, "aud_adc_hires_tml", "audio_h_sel", 17),
+	GATE_AUD1(CLK_AUD_ADDA6_ADC, "aud_adda6_adc", "audio_sel", 20),
+	GATE_AUD1(CLK_AUD_ADDA6_ADC_HIRES, "aud_adda6_adc_hires", "audio_h_sel", 21),
+	GATE_AUD1(CLK_AUD_3RD_DAC, "aud_3rd_dac", "audio_sel", 28),
+	GATE_AUD1(CLK_AUD_3RD_DAC_PREDIS, "aud_3rd_dac_predis", "audio_sel", 29),
+	GATE_AUD1(CLK_AUD_3RD_DAC_TML, "aud_3rd_dac_tml", "audio_sel", 30),
+	GATE_AUD1(CLK_AUD_3RD_DAC_HIRES, "aud_3rd_dac_hires", "audio_h_sel", 31),
+	/* AUD2 */
+	GATE_AUD2(CLK_AUD_I2S5_B, "aud_i2s5_b", "audio_sel", 0),
+	GATE_AUD2(CLK_AUD_I2S6_B, "aud_i2s6_b", "audio_sel", 1),
+	GATE_AUD2(CLK_AUD_I2S7_B, "aud_i2s7_b", "audio_sel", 2),
+	GATE_AUD2(CLK_AUD_I2S8_B, "aud_i2s8_b", "audio_sel", 3),
+	GATE_AUD2(CLK_AUD_I2S9_B, "aud_i2s9_b", "audio_sel", 4),
+};
+
+static int clk_mt8192_aud_probe(struct platform_device *pdev)
+{
+	struct clk_onecell_data *clk_data;
+	struct device_node *node = pdev->dev.of_node;
+	int r;
+
+	clk_data = mtk_alloc_clk_data(CLK_AUD_NR_CLK);
+	if (!clk_data)
+		return -ENOMEM;
+
+	r = mtk_clk_register_gates(node, aud_clks, ARRAY_SIZE(aud_clks), clk_data);
+	if (r)
+		return r;
+
+	r = of_clk_add_provider(node, of_clk_src_onecell_get, clk_data);
+	if (r)
+		return r;
+
+	r = devm_of_platform_populate(&pdev->dev);
+	if (r)
+		of_clk_del_provider(node);
+
+	return r;
+}
+
+static const struct of_device_id of_match_clk_mt8192_aud[] = {
+	{ .compatible = "mediatek,mt8192-audsys", },
+	{}
+};
+
+static struct platform_driver clk_mt8192_aud_drv = {
+	.probe = clk_mt8192_aud_probe,
+	.driver = {
+		.name = "clk-mt8192-aud",
+		.of_match_table = of_match_clk_mt8192_aud,
+	},
+};
+
+builtin_platform_driver(clk_mt8192_aud_drv);
diff --git a/drivers/clk/mediatek/clk-mt8192-cam.c b/drivers/clk/mediatek/clk-mt8192-cam.c
new file mode 100644
index 000000000000..fc74cd80b4b0
--- /dev/null
+++ b/drivers/clk/mediatek/clk-mt8192-cam.c
@@ -0,0 +1,107 @@
+// SPDX-License-Identifier: GPL-2.0-only
+//
+// Copyright (c) 2021 MediaTek Inc.
+// Author: Chun-Jie Chen <chun-jie.chen@mediatek.com>
+
+#include <linux/clk-provider.h>
+#include <linux/of_device.h>
+#include <linux/platform_device.h>
+
+#include "clk-mtk.h"
+#include "clk-gate.h"
+
+#include <dt-bindings/clock/mt8192-clk.h>
+
+static const struct mtk_gate_regs cam_cg_regs = {
+	.set_ofs = 0x4,
+	.clr_ofs = 0x8,
+	.sta_ofs = 0x0,
+};
+
+#define GATE_CAM(_id, _name, _parent, _shift)	\
+	GATE_MTK(_id, _name, _parent, &cam_cg_regs, _shift, &mtk_clk_gate_ops_setclr)
+
+static const struct mtk_gate cam_clks[] = {
+	GATE_CAM(CLK_CAM_LARB13, "cam_larb13", "cam_sel", 0),
+	GATE_CAM(CLK_CAM_DFP_VAD, "cam_dfp_vad", "cam_sel", 1),
+	GATE_CAM(CLK_CAM_LARB14, "cam_larb14", "cam_sel", 2),
+	GATE_CAM(CLK_CAM_CAM, "cam_cam", "cam_sel", 6),
+	GATE_CAM(CLK_CAM_CAMTG, "cam_camtg", "cam_sel", 7),
+	GATE_CAM(CLK_CAM_SENINF, "cam_seninf", "cam_sel", 8),
+	GATE_CAM(CLK_CAM_CAMSV0, "cam_camsv0", "cam_sel", 9),
+	GATE_CAM(CLK_CAM_CAMSV1, "cam_camsv1", "cam_sel", 10),
+	GATE_CAM(CLK_CAM_CAMSV2, "cam_camsv2", "cam_sel", 11),
+	GATE_CAM(CLK_CAM_CAMSV3, "cam_camsv3", "cam_sel", 12),
+	GATE_CAM(CLK_CAM_CCU0, "cam_ccu0", "cam_sel", 13),
+	GATE_CAM(CLK_CAM_CCU1, "cam_ccu1", "cam_sel", 14),
+	GATE_CAM(CLK_CAM_MRAW0, "cam_mraw0", "cam_sel", 15),
+	GATE_CAM(CLK_CAM_FAKE_ENG, "cam_fake_eng", "cam_sel", 17),
+	GATE_CAM(CLK_CAM_CCU_GALS, "cam_ccu_gals", "cam_sel", 18),
+	GATE_CAM(CLK_CAM_CAM2MM_GALS, "cam2mm_gals", "cam_sel", 19),
+};
+
+static const struct mtk_gate cam_rawa_clks[] = {
+	GATE_CAM(CLK_CAM_RAWA_LARBX, "cam_rawa_larbx", "cam_sel", 0),
+	GATE_CAM(CLK_CAM_RAWA_CAM, "cam_rawa_cam", "cam_sel", 1),
+	GATE_CAM(CLK_CAM_RAWA_CAMTG, "cam_rawa_camtg", "cam_sel", 2),
+};
+
+static const struct mtk_gate cam_rawb_clks[] = {
+	GATE_CAM(CLK_CAM_RAWB_LARBX, "cam_rawb_larbx", "cam_sel", 0),
+	GATE_CAM(CLK_CAM_RAWB_CAM, "cam_rawb_cam", "cam_sel", 1),
+	GATE_CAM(CLK_CAM_RAWB_CAMTG, "cam_rawb_camtg", "cam_sel", 2),
+};
+
+static const struct mtk_gate cam_rawc_clks[] = {
+	GATE_CAM(CLK_CAM_RAWC_LARBX, "cam_rawc_larbx", "cam_sel", 0),
+	GATE_CAM(CLK_CAM_RAWC_CAM, "cam_rawc_cam", "cam_sel", 1),
+	GATE_CAM(CLK_CAM_RAWC_CAMTG, "cam_rawc_camtg", "cam_sel", 2),
+};
+
+static const struct mtk_clk_desc cam_desc = {
+	.clks = cam_clks,
+	.num_clks = ARRAY_SIZE(cam_clks),
+};
+
+static const struct mtk_clk_desc cam_rawa_desc = {
+	.clks = cam_rawa_clks,
+	.num_clks = ARRAY_SIZE(cam_rawa_clks),
+};
+
+static const struct mtk_clk_desc cam_rawb_desc = {
+	.clks = cam_rawb_clks,
+	.num_clks = ARRAY_SIZE(cam_rawb_clks),
+};
+
+static const struct mtk_clk_desc cam_rawc_desc = {
+	.clks = cam_rawc_clks,
+	.num_clks = ARRAY_SIZE(cam_rawc_clks),
+};
+
+static const struct of_device_id of_match_clk_mt8192_cam[] = {
+	{
+		.compatible = "mediatek,mt8192-camsys",
+		.data = &cam_desc,
+	}, {
+		.compatible = "mediatek,mt8192-camsys_rawa",
+		.data = &cam_rawa_desc,
+	}, {
+		.compatible = "mediatek,mt8192-camsys_rawb",
+		.data = &cam_rawb_desc,
+	}, {
+		.compatible = "mediatek,mt8192-camsys_rawc",
+		.data = &cam_rawc_desc,
+	}, {
+		/* sentinel */
+	}
+};
+
+static struct platform_driver clk_mt8192_cam_drv = {
+	.probe = mtk_clk_simple_probe,
+	.driver = {
+		.name = "clk-mt8192-cam",
+		.of_match_table = of_match_clk_mt8192_cam,
+	},
+};
+
+builtin_platform_driver(clk_mt8192_cam_drv);
diff --git a/drivers/clk/mediatek/clk-mt8192-img.c b/drivers/clk/mediatek/clk-mt8192-img.c
new file mode 100644
index 000000000000..7ce3abe42577
--- /dev/null
+++ b/drivers/clk/mediatek/clk-mt8192-img.c
@@ -0,0 +1,70 @@
+// SPDX-License-Identifier: GPL-2.0-only
+//
+// Copyright (c) 2021 MediaTek Inc.
+// Author: Chun-Jie Chen <chun-jie.chen@mediatek.com>
+
+#include <linux/clk-provider.h>
+#include <linux/of_device.h>
+#include <linux/platform_device.h>
+
+#include "clk-mtk.h"
+#include "clk-gate.h"
+
+#include <dt-bindings/clock/mt8192-clk.h>
+
+static const struct mtk_gate_regs img_cg_regs = {
+	.set_ofs = 0x4,
+	.clr_ofs = 0x8,
+	.sta_ofs = 0x0,
+};
+
+#define GATE_IMG(_id, _name, _parent, _shift)	\
+	GATE_MTK(_id, _name, _parent, &img_cg_regs, _shift, &mtk_clk_gate_ops_setclr)
+
+static const struct mtk_gate img_clks[] = {
+	GATE_IMG(CLK_IMG_LARB9, "img_larb9", "img1_sel", 0),
+	GATE_IMG(CLK_IMG_LARB10, "img_larb10", "img1_sel", 1),
+	GATE_IMG(CLK_IMG_DIP, "img_dip", "img1_sel", 2),
+	GATE_IMG(CLK_IMG_GALS, "img_gals", "img1_sel", 12),
+};
+
+static const struct mtk_gate img2_clks[] = {
+	GATE_IMG(CLK_IMG2_LARB11, "img2_larb11", "img1_sel", 0),
+	GATE_IMG(CLK_IMG2_LARB12, "img2_larb12", "img1_sel", 1),
+	GATE_IMG(CLK_IMG2_MFB, "img2_mfb", "img1_sel", 6),
+	GATE_IMG(CLK_IMG2_WPE, "img2_wpe", "img1_sel", 7),
+	GATE_IMG(CLK_IMG2_MSS, "img2_mss", "img1_sel", 8),
+	GATE_IMG(CLK_IMG2_GALS, "img2_gals", "img1_sel", 12),
+};
+
+static const struct mtk_clk_desc img_desc = {
+	.clks = img_clks,
+	.num_clks = ARRAY_SIZE(img_clks),
+};
+
+static const struct mtk_clk_desc img2_desc = {
+	.clks = img2_clks,
+	.num_clks = ARRAY_SIZE(img2_clks),
+};
+
+static const struct of_device_id of_match_clk_mt8192_img[] = {
+	{
+		.compatible = "mediatek,mt8192-imgsys",
+		.data = &img_desc,
+	}, {
+		.compatible = "mediatek,mt8192-imgsys2",
+		.data = &img2_desc,
+	}, {
+		/* sentinel */
+	}
+};
+
+static struct platform_driver clk_mt8192_img_drv = {
+	.probe = mtk_clk_simple_probe,
+	.driver = {
+		.name = "clk-mt8192-img",
+		.of_match_table = of_match_clk_mt8192_img,
+	},
+};
+
+builtin_platform_driver(clk_mt8192_img_drv);
diff --git a/drivers/clk/mediatek/clk-mt8192-imp_iic_wrap.c b/drivers/clk/mediatek/clk-mt8192-imp_iic_wrap.c
new file mode 100644
index 000000000000..700356ac6a58
--- /dev/null
+++ b/drivers/clk/mediatek/clk-mt8192-imp_iic_wrap.c
@@ -0,0 +1,119 @@
+// SPDX-License-Identifier: GPL-2.0-only
+//
+// Copyright (c) 2021 MediaTek Inc.
+// Author: Chun-Jie Chen <chun-jie.chen@mediatek.com>
+
+#include <linux/clk-provider.h>
+#include <linux/of_device.h>
+#include <linux/platform_device.h>
+
+#include "clk-mtk.h"
+#include "clk-gate.h"
+
+#include <dt-bindings/clock/mt8192-clk.h>
+
+static const struct mtk_gate_regs imp_iic_wrap_cg_regs = {
+	.set_ofs = 0xe08,
+	.clr_ofs = 0xe04,
+	.sta_ofs = 0xe00,
+};
+
+#define GATE_IMP_IIC_WRAP(_id, _name, _parent, _shift)			\
+	GATE_MTK_FLAGS(_id, _name, _parent, &imp_iic_wrap_cg_regs, _shift,	\
+		&mtk_clk_gate_ops_setclr, CLK_OPS_PARENT_ENABLE)
+
+static const struct mtk_gate imp_iic_wrap_c_clks[] = {
+	GATE_IMP_IIC_WRAP(CLK_IMP_IIC_WRAP_C_I2C10, "imp_iic_wrap_c_i2c10", "infra_i2c0", 0),
+	GATE_IMP_IIC_WRAP(CLK_IMP_IIC_WRAP_C_I2C11, "imp_iic_wrap_c_i2c11", "infra_i2c0", 1),
+	GATE_IMP_IIC_WRAP(CLK_IMP_IIC_WRAP_C_I2C12, "imp_iic_wrap_c_i2c12", "infra_i2c0", 2),
+	GATE_IMP_IIC_WRAP(CLK_IMP_IIC_WRAP_C_I2C13, "imp_iic_wrap_c_i2c13", "infra_i2c0", 3),
+};
+
+static const struct mtk_gate imp_iic_wrap_e_clks[] = {
+	GATE_IMP_IIC_WRAP(CLK_IMP_IIC_WRAP_E_I2C3, "imp_iic_wrap_e_i2c3", "infra_i2c0", 0),
+};
+
+static const struct mtk_gate imp_iic_wrap_n_clks[] = {
+	GATE_IMP_IIC_WRAP(CLK_IMP_IIC_WRAP_N_I2C0, "imp_iic_wrap_n_i2c0", "infra_i2c0", 0),
+	GATE_IMP_IIC_WRAP(CLK_IMP_IIC_WRAP_N_I2C6, "imp_iic_wrap_n_i2c6", "infra_i2c0", 1),
+};
+
+static const struct mtk_gate imp_iic_wrap_s_clks[] = {
+	GATE_IMP_IIC_WRAP(CLK_IMP_IIC_WRAP_S_I2C7, "imp_iic_wrap_s_i2c7", "infra_i2c0", 0),
+	GATE_IMP_IIC_WRAP(CLK_IMP_IIC_WRAP_S_I2C8, "imp_iic_wrap_s_i2c8", "infra_i2c0", 1),
+	GATE_IMP_IIC_WRAP(CLK_IMP_IIC_WRAP_S_I2C9, "imp_iic_wrap_s_i2c9", "infra_i2c0", 2),
+};
+
+static const struct mtk_gate imp_iic_wrap_w_clks[] = {
+	GATE_IMP_IIC_WRAP(CLK_IMP_IIC_WRAP_W_I2C5, "imp_iic_wrap_w_i2c5", "infra_i2c0", 0),
+};
+
+static const struct mtk_gate imp_iic_wrap_ws_clks[] = {
+	GATE_IMP_IIC_WRAP(CLK_IMP_IIC_WRAP_WS_I2C1, "imp_iic_wrap_ws_i2c1", "infra_i2c0", 0),
+	GATE_IMP_IIC_WRAP(CLK_IMP_IIC_WRAP_WS_I2C2, "imp_iic_wrap_ws_i2c2", "infra_i2c0", 1),
+	GATE_IMP_IIC_WRAP(CLK_IMP_IIC_WRAP_WS_I2C4, "imp_iic_wrap_ws_i2c4", "infra_i2c0", 2),
+};
+
+static const struct mtk_clk_desc imp_iic_wrap_c_desc = {
+	.clks = imp_iic_wrap_c_clks,
+	.num_clks = ARRAY_SIZE(imp_iic_wrap_c_clks),
+};
+
+static const struct mtk_clk_desc imp_iic_wrap_e_desc = {
+	.clks = imp_iic_wrap_e_clks,
+	.num_clks = ARRAY_SIZE(imp_iic_wrap_e_clks),
+};
+
+static const struct mtk_clk_desc imp_iic_wrap_n_desc = {
+	.clks = imp_iic_wrap_n_clks,
+	.num_clks = ARRAY_SIZE(imp_iic_wrap_n_clks),
+};
+
+static const struct mtk_clk_desc imp_iic_wrap_s_desc = {
+	.clks = imp_iic_wrap_s_clks,
+	.num_clks = ARRAY_SIZE(imp_iic_wrap_s_clks),
+};
+
+static const struct mtk_clk_desc imp_iic_wrap_w_desc = {
+	.clks = imp_iic_wrap_w_clks,
+	.num_clks = ARRAY_SIZE(imp_iic_wrap_w_clks),
+};
+
+static const struct mtk_clk_desc imp_iic_wrap_ws_desc = {
+	.clks = imp_iic_wrap_ws_clks,
+	.num_clks = ARRAY_SIZE(imp_iic_wrap_ws_clks),
+};
+
+static const struct of_device_id of_match_clk_mt8192_imp_iic_wrap[] = {
+	{
+		.compatible = "mediatek,mt8192-imp_iic_wrap_c",
+		.data = &imp_iic_wrap_c_desc,
+	}, {
+		.compatible = "mediatek,mt8192-imp_iic_wrap_e",
+		.data = &imp_iic_wrap_e_desc,
+	}, {
+		.compatible = "mediatek,mt8192-imp_iic_wrap_n",
+		.data = &imp_iic_wrap_n_desc,
+	}, {
+		.compatible = "mediatek,mt8192-imp_iic_wrap_s",
+		.data = &imp_iic_wrap_s_desc,
+	}, {
+		.compatible = "mediatek,mt8192-imp_iic_wrap_w",
+		.data = &imp_iic_wrap_w_desc,
+	}, {
+		.compatible = "mediatek,mt8192-imp_iic_wrap_ws",
+		.data = &imp_iic_wrap_ws_desc,
+	}, {
+		/* sentinel */
+	}
+};
+
+static struct platform_driver clk_mt8192_imp_iic_wrap_drv = {
+	.probe = mtk_clk_simple_probe,
+	.driver = {
+		.name = "clk-mt8192-imp_iic_wrap",
+		.of_match_table = of_match_clk_mt8192_imp_iic_wrap,
+	},
+};
+
+builtin_platform_driver(clk_mt8192_imp_iic_wrap_drv);
diff --git a/drivers/clk/mediatek/clk-mt8192-ipe.c b/drivers/clk/mediatek/clk-mt8192-ipe.c
new file mode 100644
index 000000000000..730d91b64b3f
--- /dev/null
+++ b/drivers/clk/mediatek/clk-mt8192-ipe.c
@@ -0,0 +1,57 @@
+// SPDX-License-Identifier: GPL-2.0-only
+//
+// Copyright (c) 2021 MediaTek Inc.
+// Author: Chun-Jie Chen <chun-jie.chen@mediatek.com>
+
+#include <linux/clk-provider.h>
+#include <linux/of_device.h>
+#include <linux/platform_device.h>
+
+#include "clk-mtk.h"
+#include "clk-gate.h"
+
+#include <dt-bindings/clock/mt8192-clk.h>
+
+static const struct mtk_gate_regs ipe_cg_regs = {
+	.set_ofs = 0x4,
+	.clr_ofs = 0x8,
+	.sta_ofs = 0x0,
+};
+
+#define GATE_IPE(_id, _name, _parent, _shift)	\
+	GATE_MTK(_id, _name, _parent, &ipe_cg_regs, _shift, &mtk_clk_gate_ops_setclr)
+
+static const struct mtk_gate ipe_clks[] = {
+	GATE_IPE(CLK_IPE_LARB19, "ipe_larb19", "ipe_sel", 0),
+	GATE_IPE(CLK_IPE_LARB20, "ipe_larb20", "ipe_sel", 1),
+	GATE_IPE(CLK_IPE_SMI_SUBCOM, "ipe_smi_subcom", "ipe_sel", 2),
+	GATE_IPE(CLK_IPE_FD, "ipe_fd", "ipe_sel", 3),
+	GATE_IPE(CLK_IPE_FE, "ipe_fe", "ipe_sel", 4),
+	GATE_IPE(CLK_IPE_RSC, "ipe_rsc", "ipe_sel", 5),
+	GATE_IPE(CLK_IPE_DPE, "ipe_dpe", "ipe_sel", 6),
+	GATE_IPE(CLK_IPE_GALS, "ipe_gals", "ipe_sel", 8),
+};
+
+static const struct mtk_clk_desc ipe_desc = {
+	.clks = ipe_clks,
+	.num_clks = ARRAY_SIZE(ipe_clks),
+};
+
+static const struct of_device_id of_match_clk_mt8192_ipe[] = {
+	{
+		.compatible = "mediatek,mt8192-ipesys",
+		.data = &ipe_desc,
+	}, {
+		/* sentinel */
+	}
+};
+
+static struct platform_driver clk_mt8192_ipe_drv = {
+	.probe = mtk_clk_simple_probe,
+	.driver = {
+		.name = "clk-mt8192-ipe",
+		.of_match_table = of_match_clk_mt8192_ipe,
+	},
+};
+
+builtin_platform_driver(clk_mt8192_ipe_drv);
diff --git a/drivers/clk/mediatek/clk-mt8192-mdp.c b/drivers/clk/mediatek/clk-mt8192-mdp.c
new file mode 100644
index 000000000000..93c87ae2f332
--- /dev/null
+++ b/drivers/clk/mediatek/clk-mt8192-mdp.c
@@ -0,0 +1,82 @@
+// SPDX-License-Identifier: GPL-2.0-only
+//
+// Copyright (c) 2021 MediaTek Inc.
+// Author: Chun-Jie Chen <chun-jie.chen@mediatek.com>
+
+#include <linux/clk-provider.h>
+#include <linux/of_device.h>
+#include <linux/platform_device.h>
+
+#include "clk-mtk.h"
+#include "clk-gate.h"
+
+#include <dt-bindings/clock/mt8192-clk.h>
+
+static const struct mtk_gate_regs mdp0_cg_regs = {
+	.set_ofs = 0x104,
+	.clr_ofs = 0x108,
+	.sta_ofs = 0x100,
+};
+
+static const struct mtk_gate_regs mdp1_cg_regs = {
+	.set_ofs = 0x124,
+	.clr_ofs = 0x128,
+	.sta_ofs = 0x120,
+};
+
+#define GATE_MDP0(_id, _name, _parent, _shift)	\
+	GATE_MTK(_id, _name, _parent, &mdp0_cg_regs, _shift, &mtk_clk_gate_ops_setclr)
+
+#define GATE_MDP1(_id, _name, _parent, _shift)	\
+	GATE_MTK(_id, _name, _parent, &mdp1_cg_regs, _shift, &mtk_clk_gate_ops_setclr)
+
+static const struct mtk_gate mdp_clks[] = {
+	/* MDP0 */
+	GATE_MDP0(CLK_MDP_RDMA0, "mdp_mdp_rdma0", "mdp_sel", 0),
+	GATE_MDP0(CLK_MDP_TDSHP0, "mdp_mdp_tdshp0", "mdp_sel", 1),
+	GATE_MDP0(CLK_MDP_IMG_DL_ASYNC0, "mdp_img_dl_async0", "mdp_sel", 2),
+	GATE_MDP0(CLK_MDP_IMG_DL_ASYNC1, "mdp_img_dl_async1", "mdp_sel", 3),
+	GATE_MDP0(CLK_MDP_RDMA1, "mdp_mdp_rdma1", "mdp_sel", 4),
+	GATE_MDP0(CLK_MDP_TDSHP1, "mdp_mdp_tdshp1", "mdp_sel", 5),
+	GATE_MDP0(CLK_MDP_SMI0, "mdp_smi0", "mdp_sel", 6),
+	GATE_MDP0(CLK_MDP_APB_BUS, "mdp_apb_bus", "mdp_sel", 7),
+	GATE_MDP0(CLK_MDP_WROT0, "mdp_mdp_wrot0", "mdp_sel", 8),
+	GATE_MDP0(CLK_MDP_RSZ0, "mdp_mdp_rsz0", "mdp_sel", 9),
+	GATE_MDP0(CLK_MDP_HDR0, "mdp_mdp_hdr0", "mdp_sel", 10),
+	GATE_MDP0(CLK_MDP_MUTEX0, "mdp_mdp_mutex0", "mdp_sel", 11),
+	GATE_MDP0(CLK_MDP_WROT1, "mdp_mdp_wrot1", "mdp_sel", 12),
+	GATE_MDP0(CLK_MDP_RSZ1, "mdp_mdp_rsz1", "mdp_sel", 13),
+	GATE_MDP0(CLK_MDP_HDR1, "mdp_mdp_hdr1", "mdp_sel", 14),
+	GATE_MDP0(CLK_MDP_FAKE_ENG0, "mdp_mdp_fake_eng0", "mdp_sel", 15),
+	GATE_MDP0(CLK_MDP_AAL0, "mdp_mdp_aal0", "mdp_sel", 16),
+	GATE_MDP0(CLK_MDP_AAL1, "mdp_mdp_aal1", "mdp_sel", 17),
+	GATE_MDP0(CLK_MDP_COLOR0, "mdp_mdp_color0", "mdp_sel", 18),
+	GATE_MDP0(CLK_MDP_COLOR1, "mdp_mdp_color1", "mdp_sel", 19),
+	/* MDP1 */
+	GATE_MDP1(CLK_MDP_IMG_DL_RELAY0_ASYNC0, "mdp_img_dl_relay0_async0", "mdp_sel", 0),
+	GATE_MDP1(CLK_MDP_IMG_DL_RELAY1_ASYNC1, "mdp_img_dl_relay1_async1", "mdp_sel", 8),
+};
+
+static const struct mtk_clk_desc mdp_desc = {
+	.clks = mdp_clks,
+	.num_clks = ARRAY_SIZE(mdp_clks),
+};
+
+static const struct of_device_id of_match_clk_mt8192_mdp[] = {
+	{
+		.compatible = "mediatek,mt8192-mdpsys",
+		.data = &mdp_desc,
+	}, {
+		/* sentinel */
+	}
+};
+
+static struct platform_driver clk_mt8192_mdp_drv = {
+	.probe = mtk_clk_simple_probe,
+	.driver = {
+		.name = "clk-mt8192-mdp",
+		.of_match_table = of_match_clk_mt8192_mdp,
+	},
+};
+
+builtin_platform_driver(clk_mt8192_mdp_drv);
diff --git a/drivers/clk/mediatek/clk-mt8192-mfg.c b/drivers/clk/mediatek/clk-mt8192-mfg.c
new file mode 100644
index 000000000000..3bbc7469f0e4
--- /dev/null
+++ b/drivers/clk/mediatek/clk-mt8192-mfg.c
@@ -0,0 +1,50 @@
+// SPDX-License-Identifier: GPL-2.0-only
+//
+// Copyright (c) 2021 MediaTek Inc.
+// Author: Chun-Jie Chen <chun-jie.chen@mediatek.com>
+
+#include <linux/clk-provider.h>
+#include <linux/of_device.h>
+#include <linux/platform_device.h>
+
+#include "clk-mtk.h"
+#include "clk-gate.h"
+
+#include <dt-bindings/clock/mt8192-clk.h>
+
+static const struct mtk_gate_regs mfg_cg_regs = {
+	.set_ofs = 0x4,
+	.clr_ofs = 0x8,
+	.sta_ofs = 0x0,
+};
+
+#define GATE_MFG(_id, _name, _parent, _shift)	\
+	GATE_MTK(_id, _name, _parent, &mfg_cg_regs, _shift, &mtk_clk_gate_ops_setclr)
+
+static const struct mtk_gate mfg_clks[] = {
+	GATE_MFG(CLK_MFG_BG3D, "mfg_bg3d", "mfg_pll_sel", 0),
+};
+
+static const struct mtk_clk_desc mfg_desc = {
+	.clks = mfg_clks,
+	.num_clks = ARRAY_SIZE(mfg_clks),
+};
+
+static const struct of_device_id of_match_clk_mt8192_mfg[] = {
+	{
+		.compatible = "mediatek,mt8192-mfgcfg",
+		.data = &mfg_desc,
+	}, {
+		/* sentinel */
+	}
+};
+
+static struct platform_driver clk_mt8192_mfg_drv = {
+	.probe = mtk_clk_simple_probe,
+	.driver = {
+		.name = "clk-mt8192-mfg",
+		.of_match_table = of_match_clk_mt8192_mfg,
+	},
+};
+
+builtin_platform_driver(clk_mt8192_mfg_drv);
diff --git a/drivers/clk/mediatek/clk-mt8192-mm.c b/drivers/clk/mediatek/clk-mt8192-mm.c
new file mode 100644
index 000000000000..4a0b4c4bc06a
--- /dev/null
+++ b/drivers/clk/mediatek/clk-mt8192-mm.c
@@ -0,0 +1,108 @@
+// SPDX-License-Identifier: GPL-2.0-only
+//
+// Copyright (c) 2021 MediaTek Inc.
+// Author: Chun-Jie Chen <chun-jie.chen@mediatek.com>
+
+#include <linux/clk-provider.h>
+#include <linux/platform_device.h>
+
+#include "clk-mtk.h"
+#include "clk-gate.h"
+
+#include <dt-bindings/clock/mt8192-clk.h>
+
+static const struct mtk_gate_regs mm0_cg_regs = {
+	.set_ofs = 0x104,
+	.clr_ofs = 0x108,
+	.sta_ofs = 0x100,
+};
+
+static const struct mtk_gate_regs mm1_cg_regs = {
+	.set_ofs = 0x114,
+	.clr_ofs = 0x118,
+	.sta_ofs = 0x110,
+};
+
+static const struct mtk_gate_regs mm2_cg_regs = {
+	.set_ofs = 0x1a4,
+	.clr_ofs = 0x1a8,
+	.sta_ofs = 0x1a0,
+};
+
+#define GATE_MM0(_id, _name, _parent, _shift)	\
+	GATE_MTK(_id, _name, _parent, &mm0_cg_regs, _shift, &mtk_clk_gate_ops_setclr)
+
+#define GATE_MM1(_id, _name, _parent, _shift)	\
+	GATE_MTK(_id, _name, _parent, &mm1_cg_regs, _shift, &mtk_clk_gate_ops_setclr)
+
+#define GATE_MM2(_id, _name, _parent, _shift)	\
+	GATE_MTK(_id, _name, _parent, &mm2_cg_regs, _shift, &mtk_clk_gate_ops_setclr)
+
+static const struct mtk_gate mm_clks[] = {
+	/* MM0 */
+	GATE_MM0(CLK_MM_DISP_MUTEX0, "mm_disp_mutex0", "disp_sel", 0),
+	GATE_MM0(CLK_MM_DISP_CONFIG, "mm_disp_config", "disp_sel", 1),
+	GATE_MM0(CLK_MM_DISP_OVL0, "mm_disp_ovl0", "disp_sel", 2),
+	GATE_MM0(CLK_MM_DISP_RDMA0, "mm_disp_rdma0", "disp_sel", 3),
+	GATE_MM0(CLK_MM_DISP_OVL0_2L, "mm_disp_ovl0_2l", "disp_sel", 4),
+	GATE_MM0(CLK_MM_DISP_WDMA0, "mm_disp_wdma0", "disp_sel", 5),
+	GATE_MM0(CLK_MM_DISP_UFBC_WDMA0, "mm_disp_ufbc_wdma0", "disp_sel", 6),
+	GATE_MM0(CLK_MM_DISP_RSZ0, "mm_disp_rsz0", "disp_sel", 7),
+	GATE_MM0(CLK_MM_DISP_AAL0, "mm_disp_aal0", "disp_sel", 8),
+	GATE_MM0(CLK_MM_DISP_CCORR0, "mm_disp_ccorr0", "disp_sel", 9),
+	GATE_MM0(CLK_MM_DISP_DITHER0, "mm_disp_dither0", "disp_sel", 10),
+	GATE_MM0(CLK_MM_SMI_INFRA, "mm_smi_infra", "disp_sel", 11),
+	GATE_MM0(CLK_MM_DISP_GAMMA0, "mm_disp_gamma0", "disp_sel", 12),
+	GATE_MM0(CLK_MM_DISP_POSTMASK0, "mm_disp_postmask0", "disp_sel", 13),
+	GATE_MM0(CLK_MM_DISP_DSC_WRAP0, "mm_disp_dsc_wrap0", "disp_sel", 14),
+	GATE_MM0(CLK_MM_DSI0, "mm_dsi0", "disp_sel", 15),
+	GATE_MM0(CLK_MM_DISP_COLOR0, "mm_disp_color0", "disp_sel", 16),
+	GATE_MM0(CLK_MM_SMI_COMMON, "mm_smi_common", "disp_sel", 17),
+	GATE_MM0(CLK_MM_DISP_FAKE_ENG0, "mm_disp_fake_eng0", "disp_sel", 18),
+	GATE_MM0(CLK_MM_DISP_FAKE_ENG1, "mm_disp_fake_eng1", "disp_sel", 19),
+	GATE_MM0(CLK_MM_MDP_TDSHP4, "mm_mdp_tdshp4", "disp_sel", 20),
+	GATE_MM0(CLK_MM_MDP_RSZ4, "mm_mdp_rsz4", "disp_sel", 21),
+	GATE_MM0(CLK_MM_MDP_AAL4, "mm_mdp_aal4", "disp_sel", 22),
+	GATE_MM0(CLK_MM_MDP_HDR4, "mm_mdp_hdr4", "disp_sel", 23),
+	GATE_MM0(CLK_MM_MDP_RDMA4, "mm_mdp_rdma4", "disp_sel", 24),
+	GATE_MM0(CLK_MM_MDP_COLOR4, "mm_mdp_color4", "disp_sel", 25),
+	GATE_MM0(CLK_MM_DISP_Y2R0, "mm_disp_y2r0", "disp_sel", 26),
+	GATE_MM0(CLK_MM_SMI_GALS, "mm_smi_gals", "disp_sel", 27),
+	GATE_MM0(CLK_MM_DISP_OVL2_2L, "mm_disp_ovl2_2l", "disp_sel", 28),
+	GATE_MM0(CLK_MM_DISP_RDMA4, "mm_disp_rdma4", "disp_sel", 29),
+	GATE_MM0(CLK_MM_DISP_DPI0, "mm_disp_dpi0", "disp_sel", 30),
+	/* MM1 */
+	GATE_MM1(CLK_MM_SMI_IOMMU, "mm_smi_iommu", "disp_sel", 0),
+	/* MM2 */
+	GATE_MM2(CLK_MM_DSI_DSI0, "mm_dsi_dsi0", "disp_sel", 0),
+	GATE_MM2(CLK_MM_DPI_DPI0, "mm_dpi_dpi0", "dpi_sel", 8),
+	GATE_MM2(CLK_MM_26MHZ, "mm_26mhz", "clk26m", 24),
+	GATE_MM2(CLK_MM_32KHZ, "mm_32khz", "clk32k", 25),
+};
+
+static int clk_mt8192_mm_probe(struct platform_device *pdev)
+{
+	struct device *dev = &pdev->dev;
+	struct device_node *node = dev->parent->of_node;
+	struct clk_onecell_data *clk_data;
+	int r;
+
+	clk_data = mtk_alloc_clk_data(CLK_MM_NR_CLK);
+	if (!clk_data)
+		return -ENOMEM;
+
+	r = mtk_clk_register_gates(node, mm_clks, ARRAY_SIZE(mm_clks), clk_data);
+	if (r)
+		return r;
+
+	return of_clk_add_provider(node, of_clk_src_onecell_get, clk_data);
+}
+
+static struct platform_driver clk_mt8192_mm_drv = {
+	.probe = clk_mt8192_mm_probe,
+	.driver = {
+		.name = "clk-mt8192-mm",
+	},
+};
+
+builtin_platform_driver(clk_mt8192_mm_drv);
diff --git a/drivers/clk/mediatek/clk-mt8192-msdc.c b/drivers/clk/mediatek/clk-mt8192-msdc.c
new file mode 100644
index 000000000000..87c3b79b79cf
--- /dev/null
+++ b/drivers/clk/mediatek/clk-mt8192-msdc.c
@@ -0,0 +1,85 @@
+// SPDX-License-Identifier: GPL-2.0-only
+//
+// Copyright (c) 2021 MediaTek Inc.
+// Author: Chun-Jie Chen <chun-jie.chen@mediatek.com>
+
+#include <linux/clk-provider.h>
+#include <linux/of_device.h>
+#include <linux/platform_device.h>
+
+#include "clk-mtk.h"
+#include "clk-gate.h"
+
+#include <dt-bindings/clock/mt8192-clk.h>
+
+static const struct mtk_gate_regs msdc_cg_regs = {
+	.set_ofs = 0xb4,
+	.clr_ofs = 0xb4,
+	.sta_ofs = 0xb4,
+};
+
+static const struct mtk_gate_regs msdc_top_cg_regs = {
+	.set_ofs = 0x0,
+	.clr_ofs = 0x0,
+	.sta_ofs = 0x0,
+};
+
+#define GATE_MSDC(_id, _name, _parent, _shift)	\
+	GATE_MTK(_id, _name, _parent, &msdc_cg_regs, _shift, &mtk_clk_gate_ops_no_setclr_inv)
+
+#define GATE_MSDC_TOP(_id, _name, _parent, _shift)	\
+	GATE_MTK(_id, _name, _parent, &msdc_top_cg_regs, _shift, &mtk_clk_gate_ops_no_setclr_inv)
+
+static const struct mtk_gate msdc_clks[] = {
+	GATE_MSDC(CLK_MSDC_AXI_WRAP, "msdc_axi_wrap", "axi_sel", 22),
+};
+
+static const struct mtk_gate msdc_top_clks[] = {
+	GATE_MSDC_TOP(CLK_MSDC_TOP_AES_0P, "msdc_top_aes_0p", "aes_msdcfde_sel", 0),
+	GATE_MSDC_TOP(CLK_MSDC_TOP_SRC_0P, "msdc_top_src_0p", "infra_msdc0_src", 1),
+	GATE_MSDC_TOP(CLK_MSDC_TOP_SRC_1P, "msdc_top_src_1p", "infra_msdc1_src", 2),
+	GATE_MSDC_TOP(CLK_MSDC_TOP_SRC_2P, "msdc_top_src_2p", "infra_msdc2_src", 3),
+	GATE_MSDC_TOP(CLK_MSDC_TOP_P_MSDC0, "msdc_top_p_msdc0", "axi_sel", 4),
+	GATE_MSDC_TOP(CLK_MSDC_TOP_P_MSDC1, "msdc_top_p_msdc1", "axi_sel", 5),
+	GATE_MSDC_TOP(CLK_MSDC_TOP_P_MSDC2, "msdc_top_p_msdc2", "axi_sel", 6),
+	GATE_MSDC_TOP(CLK_MSDC_TOP_P_CFG, "msdc_top_p_cfg", "axi_sel", 7),
+	GATE_MSDC_TOP(CLK_MSDC_TOP_AXI, "msdc_top_axi", "axi_sel", 8),
+	GATE_MSDC_TOP(CLK_MSDC_TOP_H_MST_0P, "msdc_top_h_mst_0p", "infra_msdc0", 9),
+	GATE_MSDC_TOP(CLK_MSDC_TOP_H_MST_1P, "msdc_top_h_mst_1p", "infra_msdc1", 10),
+	GATE_MSDC_TOP(CLK_MSDC_TOP_H_MST_2P, "msdc_top_h_mst_2p", "infra_msdc2", 11),
+	GATE_MSDC_TOP(CLK_MSDC_TOP_MEM_OFF_DLY_26M, "msdc_top_mem_off_dly_26m", "clk26m", 12),
+	GATE_MSDC_TOP(CLK_MSDC_TOP_32K, "msdc_top_32k", "clk32k", 13),
+	GATE_MSDC_TOP(CLK_MSDC_TOP_AHB2AXI_BRG_AXI, "msdc_top_ahb2axi_brg_axi", "axi_sel", 14),
+};
+
+static const struct mtk_clk_desc msdc_desc = {
+	.clks = msdc_clks,
+	.num_clks = ARRAY_SIZE(msdc_clks),
+};
+
+static const struct mtk_clk_desc msdc_top_desc = {
+	.clks = msdc_top_clks,
+	.num_clks = ARRAY_SIZE(msdc_top_clks),
+};
+
+static const struct of_device_id of_match_clk_mt8192_msdc[] = {
+	{
+		.compatible = "mediatek,mt8192-msdc",
+		.data = &msdc_desc,
+	}, {
+		.compatible = "mediatek,mt8192-msdc_top",
+		.data = &msdc_top_desc,
+	}, {
+		/* sentinel */
+	}
+};
+
+static struct platform_driver clk_mt8192_msdc_drv = {
+	.probe = mtk_clk_simple_probe,
+	.driver = {
+		.name = "clk-mt8192-msdc",
+		.of_match_table = of_match_clk_mt8192_msdc,
+	},
+};
+
+builtin_platform_driver(clk_mt8192_msdc_drv);
diff --git a/drivers/clk/mediatek/clk-mt8192-scp_adsp.c b/drivers/clk/mediatek/clk-mt8192-scp_adsp.c
new file mode 100644
index 000000000000..58725d79dd13
--- /dev/null
+++ b/drivers/clk/mediatek/clk-mt8192-scp_adsp.c
@@ -0,0 +1,50 @@
+// SPDX-License-Identifier: GPL-2.0-only
+//
+// Copyright (c) 2021 MediaTek Inc.
+// Author: Chun-Jie Chen <chun-jie.chen@mediatek.com>
+
+#include <linux/clk-provider.h>
+#include <linux/of_device.h>
+#include <linux/platform_device.h>
+
+#include "clk-mtk.h"
+#include "clk-gate.h"
+
+#include <dt-bindings/clock/mt8192-clk.h>
+
+static const struct mtk_gate_regs scp_adsp_cg_regs = {
+	.set_ofs = 0x180,
+	.clr_ofs = 0x180,
+	.sta_ofs = 0x180,
+};
+
+#define GATE_SCP_ADSP(_id, _name, _parent, _shift)	\
+	GATE_MTK(_id, _name, _parent, &scp_adsp_cg_regs, _shift, &mtk_clk_gate_ops_no_setclr)
+
+static const struct mtk_gate scp_adsp_clks[] = {
+	GATE_SCP_ADSP(CLK_SCP_ADSP_AUDIODSP, "scp_adsp_audiodsp", "adsp_sel", 0),
+};
+
+static const struct mtk_clk_desc scp_adsp_desc = {
+	.clks = scp_adsp_clks,
+	.num_clks = ARRAY_SIZE(scp_adsp_clks),
+};
+
+static const struct of_device_id of_match_clk_mt8192_scp_adsp[] = {
+	{
+		.compatible = "mediatek,mt8192-scp_adsp",
+		.data = &scp_adsp_desc,
+	}, {
+		/* sentinel */
+	}
+};
+
+static struct platform_driver clk_mt8192_scp_adsp_drv = {
+	.probe = mtk_clk_simple_probe,
+	.driver = {
+		.name = "clk-mt8192-scp_adsp",
+		.of_match_table = of_match_clk_mt8192_scp_adsp,
+	},
+};
+
+builtin_platform_driver(clk_mt8192_scp_adsp_drv);
diff --git a/drivers/clk/mediatek/clk-mt8192-vdec.c b/drivers/clk/mediatek/clk-mt8192-vdec.c
new file mode 100644
index 000000000000..b1d95cfbf22a
--- /dev/null
+++ b/drivers/clk/mediatek/clk-mt8192-vdec.c
@@ -0,0 +1,94 @@
+// SPDX-License-Identifier: GPL-2.0-only
+//
+// Copyright (c) 2021 MediaTek Inc.
+// Author: Chun-Jie Chen <chun-jie.chen@mediatek.com>
+
+#include <linux/clk-provider.h>
+#include <linux/of_device.h>
+#include <linux/platform_device.h>
+
+#include "clk-mtk.h"
+#include "clk-gate.h"
+
+#include <dt-bindings/clock/mt8192-clk.h>
+
+static const struct mtk_gate_regs vdec0_cg_regs = {
+	.set_ofs = 0x0,
+	.clr_ofs = 0x4,
+	.sta_ofs = 0x0,
+};
+
+static const struct mtk_gate_regs vdec1_cg_regs = {
+	.set_ofs = 0x200,
+	.clr_ofs = 0x204,
+	.sta_ofs = 0x200,
+};
+
+static const struct mtk_gate_regs vdec2_cg_regs = {
+	.set_ofs = 0x8,
+	.clr_ofs = 0xc,
+	.sta_ofs = 0x8,
+};
+
+#define GATE_VDEC0(_id, _name, _parent, _shift)	\
+	GATE_MTK(_id, _name, _parent, &vdec0_cg_regs, _shift, &mtk_clk_gate_ops_setclr_inv)
+
+#define GATE_VDEC1(_id, _name, _parent, _shift)	\
+	GATE_MTK(_id, _name, _parent, &vdec1_cg_regs, _shift, &mtk_clk_gate_ops_setclr_inv)
+
+#define GATE_VDEC2(_id, _name, _parent, _shift)	\
+	GATE_MTK(_id, _name, _parent, &vdec2_cg_regs, _shift, &mtk_clk_gate_ops_setclr_inv)
+
+static const struct mtk_gate vdec_clks[] = {
+	/* VDEC0 */
+	GATE_VDEC0(CLK_VDEC_VDEC, "vdec_vdec", "vdec_sel", 0),
+	GATE_VDEC0(CLK_VDEC_ACTIVE, "vdec_active", "vdec_sel", 4),
+	/* VDEC1 */
+	GATE_VDEC1(CLK_VDEC_LAT, "vdec_lat", "vdec_sel", 0),
+	GATE_VDEC1(CLK_VDEC_LAT_ACTIVE, "vdec_lat_active", "vdec_sel", 4),
+	/* VDEC2 */
+	GATE_VDEC2(CLK_VDEC_LARB1, "vdec_larb1", "vdec_sel", 0),
+};
+
+static const struct mtk_gate vdec_soc_clks[] = {
+	/* VDEC_SOC0 */
+	GATE_VDEC0(CLK_VDEC_SOC_VDEC, "vdec_soc_vdec", "vdec_sel", 0),
+	GATE_VDEC0(CLK_VDEC_SOC_VDEC_ACTIVE, "vdec_soc_vdec_active", "vdec_sel", 4),
+	/* VDEC_SOC1 */
+	GATE_VDEC1(CLK_VDEC_SOC_LAT, "vdec_soc_lat", "vdec_sel", 0),
+	GATE_VDEC1(CLK_VDEC_SOC_LAT_ACTIVE, "vdec_soc_lat_active", "vdec_sel", 4),
+	/* VDEC_SOC2 */
+	GATE_VDEC2(CLK_VDEC_SOC_LARB1, "vdec_soc_larb1", "vdec_sel", 0),
+};
+
+static const struct mtk_clk_desc vdec_desc = {
+	.clks = vdec_clks,
+	.num_clks = ARRAY_SIZE(vdec_clks),
+};
+
+static const struct mtk_clk_desc vdec_soc_desc = {
+	.clks = vdec_soc_clks,
+	.num_clks = ARRAY_SIZE(vdec_soc_clks),
+};
+
+static const struct of_device_id of_match_clk_mt8192_vdec[] = {
+	{
+		.compatible = "mediatek,mt8192-vdecsys",
+		.data = &vdec_desc,
+	}, {
+		.compatible = "mediatek,mt8192-vdecsys_soc",
+		.data = &vdec_soc_desc,
+	}, {
+		/* sentinel */
+	}
+};
+
+static struct platform_driver clk_mt8192_vdec_drv = {
+	.probe = mtk_clk_simple_probe,
+	.driver = {
+		.name = "clk-mt8192-vdec",
+		.of_match_table = of_match_clk_mt8192_vdec,
+	},
+};
+
+builtin_platform_driver(clk_mt8192_vdec_drv);
diff --git a/drivers/clk/mediatek/clk-mt8192-venc.c b/drivers/clk/mediatek/clk-mt8192-venc.c
new file mode 100644
index 000000000000..c0d867bff09e
--- /dev/null
+++ b/drivers/clk/mediatek/clk-mt8192-venc.c
@@ -0,0 +1,53 @@
+// SPDX-License-Identifier: GPL-2.0-only
+//
+// Copyright (c) 2021 MediaTek Inc.
+// Author: Chun-Jie Chen <chun-jie.chen@mediatek.com>
+
+#include <linux/clk-provider.h>
+#include <linux/of_device.h>
+#include <linux/platform_device.h>
+
+#include "clk-mtk.h"
+#include "clk-gate.h"
+
+#include <dt-bindings/clock/mt8192-clk.h>
+
+static const struct mtk_gate_regs venc_cg_regs = {
+	.set_ofs = 0x4,
+	.clr_ofs = 0x8,
+	.sta_ofs = 0x0,
+};
+
+#define GATE_VENC(_id, _name, _parent, _shift)	\
+	GATE_MTK(_id, _name, _parent, &venc_cg_regs, _shift, &mtk_clk_gate_ops_setclr_inv)
+
+static const struct mtk_gate venc_clks[] = {
+	GATE_VENC(CLK_VENC_SET0_LARB, "venc_set0_larb", "venc_sel", 0),
+	GATE_VENC(CLK_VENC_SET1_VENC, "venc_set1_venc", "venc_sel", 4),
+	GATE_VENC(CLK_VENC_SET2_JPGENC, "venc_set2_jpgenc", "venc_sel", 8),
+	GATE_VENC(CLK_VENC_SET5_GALS, "venc_set5_gals", "venc_sel", 28),
+};
+
+static const struct mtk_clk_desc venc_desc = {
+	.clks = venc_clks,
+	.num_clks = ARRAY_SIZE(venc_clks),
+};
+
+static const struct of_device_id of_match_clk_mt8192_venc[] = {
+	{
+		.compatible = "mediatek,mt8192-vencsys",
+		.data = &venc_desc,
+	}, {
+		/* sentinel */
+	}
+};
+
+static struct platform_driver clk_mt8192_venc_drv = {
+	.probe = mtk_clk_simple_probe,
+	.driver = {
+		.name = "clk-mt8192-venc",
+		.of_match_table = of_match_clk_mt8192_venc,
+	},
+};
+
+builtin_platform_driver(clk_mt8192_venc_drv);
diff --git a/drivers/clk/mediatek/clk-mt8192.c b/drivers/clk/mediatek/clk-mt8192.c
new file mode 100644
index 000000000000..cbc7c6dbe0f4
--- /dev/null
+++ b/drivers/clk/mediatek/clk-mt8192.c
@@ -0,0 +1,1326 @@
+// SPDX-License-Identifier: GPL-2.0-only
+//
+// Copyright (c) 2021 MediaTek Inc.
+// Author: Chun-Jie Chen <chun-jie.chen@mediatek.com>
+
+#include <linux/clk.h>
+#include <linux/delay.h>
+#include <linux/mfd/syscon.h>
+#include <linux/of.h>
+#include <linux/of_address.h>
+#include <linux/of_device.h>
+#include <linux/platform_device.h>
+#include <linux/slab.h>
+
+#include "clk-mtk.h"
+#include "clk-mux.h"
+#include "clk-gate.h"
+
+#include <dt-bindings/clock/mt8192-clk.h>
+
+static DEFINE_SPINLOCK(mt8192_clk_lock);
+
+static const struct mtk_fixed_clk top_fixed_clks[] = {
+	FIXED_CLK(CLK_TOP_ULPOSC, "ulposc", NULL, 260000000),
+};
+
+static const struct mtk_fixed_factor top_early_divs[] = {
+	FACTOR(CLK_TOP_CSW_F26M_D2, "csw_f26m_d2", "clk26m", 1, 2),
+};
+
+static const struct mtk_fixed_factor top_divs[] = {
+	FACTOR(CLK_TOP_MAINPLL_D3, "mainpll_d3", "mainpll", 1, 3),
+	FACTOR(CLK_TOP_MAINPLL_D4, "mainpll_d4", "mainpll", 1, 4),
+	FACTOR(CLK_TOP_MAINPLL_D4_D2, "mainpll_d4_d2", "mainpll_d4", 1, 2),
+	FACTOR(CLK_TOP_MAINPLL_D4_D4, "mainpll_d4_d4", "mainpll_d4", 1, 4),
+	FACTOR(CLK_TOP_MAINPLL_D4_D8, "mainpll_d4_d8", "mainpll_d4", 1, 8),
+	FACTOR(CLK_TOP_MAINPLL_D4_D16, "mainpll_d4_d16", "mainpll_d4", 1, 16),
+	FACTOR(CLK_TOP_MAINPLL_D5, "mainpll_d5", "mainpll", 1, 5),
+	FACTOR(CLK_TOP_MAINPLL_D5_D2, "mainpll_d5_d2", "mainpll_d5", 1, 2),
+	FACTOR(CLK_TOP_MAINPLL_D5_D4, "mainpll_d5_d4", "mainpll_d5", 1, 4),
+	FACTOR(CLK_TOP_MAINPLL_D5_D8, "mainpll_d5_d8", "mainpll_d5", 1, 8),
+	FACTOR(CLK_TOP_MAINPLL_D6, "mainpll_d6", "mainpll", 1, 6),
+	FACTOR(CLK_TOP_MAINPLL_D6_D2, "mainpll_d6_d2", "mainpll_d6", 1, 2),
+	FACTOR(CLK_TOP_MAINPLL_D6_D4, "mainpll_d6_d4", "mainpll_d6", 1, 4),
+	FACTOR(CLK_TOP_MAINPLL_D7, "mainpll_d7", "mainpll", 1, 7),
+	FACTOR(CLK_TOP_MAINPLL_D7_D2, "mainpll_d7_d2", "mainpll_d7", 1, 2),
+	FACTOR(CLK_TOP_MAINPLL_D7_D4, "mainpll_d7_d4", "mainpll_d7", 1, 4),
+	FACTOR(CLK_TOP_MAINPLL_D7_D8, "mainpll_d7_d8", "mainpll_d7", 1, 8),
+	FACTOR(CLK_TOP_UNIVPLL_D3, "univpll_d3", "univpll", 1, 3),
+	FACTOR(CLK_TOP_UNIVPLL_D4, "univpll_d4", "univpll", 1, 4),
+	FACTOR(CLK_TOP_UNIVPLL_D4_D2, "univpll_d4_d2", "univpll_d4", 1, 2),
+	FACTOR(CLK_TOP_UNIVPLL_D4_D4, "univpll_d4_d4", "univpll_d4", 1, 4),
+	FACTOR(CLK_TOP_UNIVPLL_D4_D8, "univpll_d4_d8", "univpll_d4", 1, 8),
+	FACTOR(CLK_TOP_UNIVPLL_D5, "univpll_d5", "univpll", 1, 5),
+	FACTOR(CLK_TOP_UNIVPLL_D5_D2, "univpll_d5_d2", "univpll_d5", 1, 2),
+	FACTOR(CLK_TOP_UNIVPLL_D5_D4, "univpll_d5_d4", "univpll_d5", 1, 4),
+	FACTOR(CLK_TOP_UNIVPLL_D5_D8, "univpll_d5_d8", "univpll_d5", 1, 8),
+	FACTOR(CLK_TOP_UNIVPLL_D6, "univpll_d6", "univpll", 1, 6),
+	FACTOR(CLK_TOP_UNIVPLL_D6_D2, "univpll_d6_d2", "univpll_d6", 1, 2),
+	FACTOR(CLK_TOP_UNIVPLL_D6_D4, "univpll_d6_d4", "univpll_d6", 1, 4),
+	FACTOR(CLK_TOP_UNIVPLL_D6_D8, "univpll_d6_d8", "univpll_d6", 1, 8),
+	FACTOR(CLK_TOP_UNIVPLL_D6_D16, "univpll_d6_d16", "univpll_d6", 1, 16),
+	FACTOR(CLK_TOP_UNIVPLL_D7, "univpll_d7", "univpll", 1, 7),
+	FACTOR(CLK_TOP_APLL1, "apll1_ck", "apll1", 1, 1),
+	FACTOR(CLK_TOP_APLL1_D2, "apll1_d2", "apll1", 1, 2),
+	FACTOR(CLK_TOP_APLL1_D4, "apll1_d4", "apll1", 1, 4),
+	FACTOR(CLK_TOP_APLL1_D8, "apll1_d8", "apll1", 1, 8),
+	FACTOR(CLK_TOP_APLL2, "apll2_ck", "apll2", 1, 1),
+	FACTOR(CLK_TOP_APLL2_D2, "apll2_d2", "apll2", 1, 2),
+	FACTOR(CLK_TOP_APLL2_D4, "apll2_d4", "apll2", 1, 4),
+	FACTOR(CLK_TOP_APLL2_D8, "apll2_d8", "apll2", 1, 8),
+	FACTOR(CLK_TOP_MMPLL_D4, "mmpll_d4", "mmpll", 1, 4),
+	FACTOR(CLK_TOP_MMPLL_D4_D2, "mmpll_d4_d2", "mmpll_d4", 1, 2),
+	FACTOR(CLK_TOP_MMPLL_D5, "mmpll_d5", "mmpll", 1, 5),
+	FACTOR(CLK_TOP_MMPLL_D5_D2, "mmpll_d5_d2", "mmpll_d5", 1, 2),
+	FACTOR(CLK_TOP_MMPLL_D6, "mmpll_d6", "mmpll", 1, 6),
+	FACTOR(CLK_TOP_MMPLL_D6_D2, "mmpll_d6_d2", "mmpll_d6", 1, 2),
+	FACTOR(CLK_TOP_MMPLL_D7, "mmpll_d7", "mmpll", 1, 7),
+	FACTOR(CLK_TOP_MMPLL_D9, "mmpll_d9", "mmpll", 1, 9),
+	FACTOR(CLK_TOP_APUPLL, "apupll_ck", "apupll", 1, 2),
+	FACTOR(CLK_TOP_NPUPLL, "npupll_ck", "npupll", 1, 1),
+	FACTOR(CLK_TOP_TVDPLL, "tvdpll_ck", "tvdpll", 1, 1),
+	FACTOR(CLK_TOP_TVDPLL_D2, "tvdpll_d2", "tvdpll", 1, 2),
+	FACTOR(CLK_TOP_TVDPLL_D4, "tvdpll_d4", "tvdpll", 1, 4),
+	FACTOR(CLK_TOP_TVDPLL_D8, "tvdpll_d8", "tvdpll", 1, 8),
+	FACTOR(CLK_TOP_TVDPLL_D16, "tvdpll_d16", "tvdpll", 1, 16),
+	FACTOR(CLK_TOP_MSDCPLL, "msdcpll_ck", "msdcpll", 1, 1),
+	FACTOR(CLK_TOP_MSDCPLL_D2, "msdcpll_d2", "msdcpll", 1, 2),
+	FACTOR(CLK_TOP_MSDCPLL_D4, "msdcpll_d4", "msdcpll", 1, 4),
+	FACTOR(CLK_TOP_OSC_D2, "osc_d2", "ulposc", 1, 2),
+	FACTOR(CLK_TOP_OSC_D4, "osc_d4", "ulposc", 1, 4),
+	FACTOR(CLK_TOP_OSC_D8, "osc_d8", "ulposc", 1, 8),
+	FACTOR(CLK_TOP_OSC_D10, "osc_d10", "ulposc", 1, 10),
+	FACTOR(CLK_TOP_OSC_D16, "osc_d16", "ulposc", 1, 16),
+	FACTOR(CLK_TOP_OSC_D20, "osc_d20", "ulposc", 1, 20),
+	FACTOR(CLK_TOP_ADSPPLL, "adsppll_ck", "adsppll", 1, 1),
+	FACTOR(CLK_TOP_UNIVPLL_192M, "univpll_192m", "univpll", 1, 13),
+	FACTOR(CLK_TOP_UNIVPLL_192M_D2, "univpll_192m_d2", "univpll_192m", 1, 2),
+	FACTOR(CLK_TOP_UNIVPLL_192M_D4, "univpll_192m_d4", "univpll_192m", 1, 4),
+	FACTOR(CLK_TOP_UNIVPLL_192M_D8, "univpll_192m_d8", "univpll_192m", 1, 8),
+	FACTOR(CLK_TOP_UNIVPLL_192M_D16, "univpll_192m_d16", "univpll_192m", 1, 16),
+	FACTOR(CLK_TOP_UNIVPLL_192M_D32, "univpll_192m_d32", "univpll_192m", 1, 32),
+};
+
+static const char * const axi_parents[] = {
+	"clk26m",
+	"mainpll_d4_d4",
+	"mainpll_d7_d2",
+	"mainpll_d4_d2",
+	"mainpll_d5_d2",
+	"mainpll_d6_d2",
+	"osc_d4"
+};
+
+static const char * const spm_parents[] = {
+	"clk26m",
+	"osc_d10",
+	"mainpll_d7_d4",
+	"clk32k"
+};
+
+static const char * const scp_parents[] = {
+	"clk26m",
+	"univpll_d5",
+	"mainpll_d6_d2",
+	"mainpll_d6",
+	"univpll_d6",
+	"mainpll_d4_d2",
+	"mainpll_d5_d2",
+	"univpll_d4_d2"
+};
+
+static const char * const bus_aximem_parents[] = {
+	"clk26m",
+	"mainpll_d7_d2",
+	"mainpll_d4_d2",
+	"mainpll_d5_d2",
+	"mainpll_d6"
+};
+
+static const char * const disp_parents[] = {
+	"clk26m",
+	"univpll_d6_d2",
+	"mainpll_d5_d2",
+	"mmpll_d6_d2",
+	"univpll_d5_d2",
+	"univpll_d4_d2",
+	"mmpll_d7",
+	"univpll_d6",
+	"mainpll_d4",
+	"mmpll_d5_d2"
+};
+
+static const char * const mdp_parents[] = {
+	"clk26m",
+	"mainpll_d5_d2",
+	"mmpll_d6_d2",
+	"mainpll_d4_d2",
+	"mmpll_d4_d2",
+	"mainpll_d6",
+	"univpll_d6",
+	"mainpll_d4",
+	"tvdpll_ck",
+	"univpll_d4",
+	"mmpll_d5_d2"
+};
+
+static const char * const img1_parents[] = {
+	"clk26m",
+	"univpll_d4",
+	"tvdpll_ck",
+	"mainpll_d4",
+	"univpll_d5",
+	"mmpll_d6",
+	"univpll_d6",
+	"mainpll_d6",
+	"mmpll_d4_d2",
+	"mainpll_d4_d2",
+	"mmpll_d6_d2",
+	"mmpll_d5_d2"
+};
+
+static const char * const img2_parents[] = {
+	"clk26m",
+	"univpll_d4",
+	"tvdpll_ck",
+	"mainpll_d4",
+	"univpll_d5",
+	"mmpll_d6",
+	"univpll_d6",
+	"mainpll_d6",
+	"mmpll_d4_d2",
+	"mainpll_d4_d2",
+	"mmpll_d6_d2",
+	"mmpll_d5_d2"
+};
+
+static const char * const ipe_parents[] = {
+	"clk26m",
+	"mainpll_d4",
+	"mmpll_d6",
+	"univpll_d6",
+	"mainpll_d6",
+	"univpll_d4_d2",
+	"mainpll_d4_d2",
+	"mmpll_d6_d2",
+	"mmpll_d5_d2"
+};
+
+static const char * const dpe_parents[] = {
+	"clk26m",
+	"mainpll_d4",
+	"mmpll_d6",
+	"univpll_d6",
+	"mainpll_d6",
+	"univpll_d4_d2",
+	"univpll_d5_d2",
+	"mmpll_d6_d2"
+};
+
+static const char * const cam_parents[] = {
+	"clk26m",
+	"mainpll_d4",
+	"mmpll_d6",
+	"univpll_d4",
+	"univpll_d5",
+	"univpll_d6",
+	"mmpll_d7",
+	"univpll_d4_d2",
+	"mainpll_d4_d2",
+	"univpll_d6_d2"
+};
+
+static const char * const ccu_parents[] = {
+	"clk26m",
+	"mainpll_d4",
+	"mmpll_d6",
+	"mainpll_d6",
+	"mmpll_d7",
+	"univpll_d4_d2",
+	"mmpll_d6_d2",
+	"mmpll_d5_d2",
+	"univpll_d5",
+	"univpll_d6_d2"
+};
+
+static const char * const dsp7_parents[] = {
+	"clk26m",
+	"mainpll_d4_d2",
+	"mainpll_d6",
+	"mmpll_d6",
+	"univpll_d5",
+	"mmpll_d5",
+	"univpll_d4",
+	"mmpll_d4"
+};
+
+static const char * const mfg_ref_parents[] = {
+	"clk26m",
+	"clk26m",
+	"univpll_d6",
+	"mainpll_d5_d2"
+};
+
+static const char * const mfg_pll_parents[] = {
+	"mfg_ref_sel",
+	"mfgpll"
+};
+
+static const char * const camtg_parents[] = {
+	"clk26m",
+	"univpll_192m_d8",
+	"univpll_d6_d8",
+	"univpll_192m_d4",
+	"univpll_d6_d16",
+	"csw_f26m_d2",
+	"univpll_192m_d16",
+	"univpll_192m_d32"
+};
+
+static const char * const camtg2_parents[] = {
+	"clk26m",
+	"univpll_192m_d8",
+	"univpll_d6_d8",
+	"univpll_192m_d4",
+	"univpll_d6_d16",
+	"csw_f26m_d2",
+	"univpll_192m_d16",
+	"univpll_192m_d32"
+};
+
+static const char * const camtg3_parents[] = {
+	"clk26m",
+	"univpll_192m_d8",
+	"univpll_d6_d8",
+	"univpll_192m_d4",
+	"univpll_d6_d16",
+	"csw_f26m_d2",
+	"univpll_192m_d16",
+	"univpll_192m_d32"
+};
+
+static const char * const camtg4_parents[] = {
+	"clk26m",
+	"univpll_192m_d8",
+	"univpll_d6_d8",
+	"univpll_192m_d4",
+	"univpll_d6_d16",
+	"csw_f26m_d2",
+	"univpll_192m_d16",
+	"univpll_192m_d32"
+};
+
+static const char * const camtg5_parents[] = {
+	"clk26m",
+	"univpll_192m_d8",
+	"univpll_d6_d8",
+	"univpll_192m_d4",
+	"univpll_d6_d16",
+	"csw_f26m_d2",
+	"univpll_192m_d16",
+	"univpll_192m_d32"
+};
+
+static const char * const camtg6_parents[] = {
+	"clk26m",
+	"univpll_192m_d8",
+	"univpll_d6_d8",
+	"univpll_192m_d4",
+	"univpll_d6_d16",
+	"csw_f26m_d2",
+	"univpll_192m_d16",
+	"univpll_192m_d32"
+};
+
+static const char * const uart_parents[] = {
+	"clk26m",
+	"univpll_d6_d8"
+};
+
+static const char * const spi_parents[] = {
+	"clk26m",
+	"mainpll_d5_d4",
+	"mainpll_d6_d4",
+	"msdcpll_d4"
+};
+
+static const char * const msdc50_0_h_parents[] = {
+	"clk26m",
+	"mainpll_d4_d2",
+	"mainpll_d6_d2"
+};
+
+static const char * const msdc50_0_parents[] = {
+	"clk26m",
+	"msdcpll_ck",
+	"msdcpll_d2",
+	"univpll_d4_d4",
+	"mainpll_d6_d2",
+	"univpll_d4_d2"
+};
+
+static const char * const msdc30_1_parents[] = {
+	"clk26m",
+	"univpll_d6_d2",
+	"mainpll_d6_d2",
+	"mainpll_d7_d2",
+	"msdcpll_d2"
+};
+
+static const char * const msdc30_2_parents[] = {
+	"clk26m",
+	"univpll_d6_d2",
+	"mainpll_d6_d2",
+	"mainpll_d7_d2",
+	"msdcpll_d2"
+};
+
+static const char * const audio_parents[] = {
+	"clk26m",
+	"mainpll_d5_d8",
+	"mainpll_d7_d8",
+	"mainpll_d4_d16"
+};
+
+static const char * const aud_intbus_parents[] = {
+	"clk26m",
+	"mainpll_d4_d4",
+	"mainpll_d7_d4"
+};
+
+static const char * const pwrap_ulposc_parents[] = {
+	"osc_d10",
+	"clk26m",
+	"osc_d4",
+	"osc_d8",
+	"osc_d16"
+};
+
+static const char * const atb_parents[] = {
+	"clk26m",
+	"mainpll_d4_d2",
+	"mainpll_d5_d2"
+};
+
+static const char * const dpi_parents[] = {
+	"clk26m",
+	"tvdpll_d2",
+	"tvdpll_d4",
+	"tvdpll_d8",
+	"tvdpll_d16"
+};
+
+static const char * const scam_parents[] = {
+	"clk26m",
+	"mainpll_d5_d4"
+};
+
+static const char * const disp_pwm_parents[] = {
+	"clk26m",
+	"univpll_d6_d4",
+	"osc_d2",
+	"osc_d4",
+	"osc_d16"
+};
+
+static const char * const usb_top_parents[] = {
+	"clk26m",
+	"univpll_d5_d4",
+	"univpll_d6_d4",
+	"univpll_d5_d2"
+};
+
+static const char * const ssusb_xhci_parents[] = {
+	"clk26m",
+	"univpll_d5_d4",
+	"univpll_d6_d4",
+	"univpll_d5_d2"
+};
+
+static const char * const i2c_parents[] = {
+	"clk26m",
+	"mainpll_d4_d8",
+	"univpll_d5_d4"
+};
+
+static const char * const seninf_parents[] = {
+	"clk26m",
+	"univpll_d4_d4",
+	"univpll_d6_d2",
+	"univpll_d4_d2",
+	"univpll_d7",
+	"univpll_d6",
+	"mmpll_d6",
+	"univpll_d5"
+};
+
+static const char * const seninf1_parents[] = {
+	"clk26m",
+	"univpll_d4_d4",
+	"univpll_d6_d2",
+	"univpll_d4_d2",
+	"univpll_d7",
+	"univpll_d6",
+	"mmpll_d6",
+	"univpll_d5"
+};
+
+static const char * const seninf2_parents[] = {
+	"clk26m",
+	"univpll_d4_d4",
+	"univpll_d6_d2",
+	"univpll_d4_d2",
+	"univpll_d7",
+	"univpll_d6",
+	"mmpll_d6",
+	"univpll_d5"
+};
+
+static const char * const seninf3_parents[] = {
+	"clk26m",
+	"univpll_d4_d4",
+	"univpll_d6_d2",
+	"univpll_d4_d2",
+	"univpll_d7",
+	"univpll_d6",
+	"mmpll_d6",
+	"univpll_d5"
+};
+
+static const char * const tl_parents[] = {
+	"clk26m",
+	"univpll_192m_d2",
+	"mainpll_d6_d4"
+};
+
+static const char * const dxcc_parents[] = {
+	"clk26m",
+	"mainpll_d4_d2",
+	"mainpll_d4_d4",
+	"mainpll_d4_d8"
+};
+
+static const char * const aud_engen1_parents[] = {
+	"clk26m",
+	"apll1_d2",
+	"apll1_d4",
+	"apll1_d8"
+};
+
+static const char * const aud_engen2_parents[] = {
+	"clk26m",
+	"apll2_d2",
+	"apll2_d4",
+	"apll2_d8"
+};
+
+static const char * const aes_ufsfde_parents[] = {
+	"clk26m",
+	"mainpll_d4",
+	"mainpll_d4_d2",
+	"mainpll_d6",
+	"mainpll_d4_d4",
+	"univpll_d4_d2",
+	"univpll_d6"
+};
+
+static const char * const ufs_parents[] = {
+	"clk26m",
+	"mainpll_d4_d4",
+	"mainpll_d4_d8",
+	"univpll_d4_d4",
+	"mainpll_d6_d2",
+	"mainpll_d5_d2",
+	"msdcpll_d2"
+};
+
+static const char * const aud_1_parents[] = {
+	"clk26m",
+	"apll1_ck"
+};
+
+static const char * const aud_2_parents[] = {
+	"clk26m",
+	"apll2_ck"
+};
+
+static const char * const adsp_parents[] = {
+	"clk26m",
+	"mainpll_d6",
+	"mainpll_d5_d2",
+	"univpll_d4_d4",
+	"univpll_d4",
+	"univpll_d6",
+	"ulposc",
+	"adsppll_ck"
+};
+
+static const char * const dpmaif_main_parents[] = {
+	"clk26m",
+	"univpll_d4_d4",
+	"mainpll_d6",
+	"mainpll_d4_d2",
+	"univpll_d4_d2"
+};
+
+static const char * const venc_parents[] = {
+	"clk26m",
+	"mmpll_d7",
+	"mainpll_d6",
+	"univpll_d4_d2",
+	"mainpll_d4_d2",
+	"univpll_d6",
+	"mmpll_d6",
+	"mainpll_d5_d2",
+	"mainpll_d6_d2",
+	"mmpll_d9",
+	"univpll_d4_d4",
+	"mainpll_d4",
+	"univpll_d4",
+	"univpll_d5",
+	"univpll_d5_d2",
+	"mainpll_d5"
+};
+
+static const char * const vdec_parents[] = {
+	"clk26m",
+	"univpll_192m_d2",
+	"univpll_d5_d4",
+	"mainpll_d5",
+	"mainpll_d5_d2",
+	"mmpll_d6_d2",
+	"univpll_d5_d2",
+	"mainpll_d4_d2",
+	"univpll_d4_d2",
+	"univpll_d7",
+	"mmpll_d7",
+	"mmpll_d6",
+	"univpll_d5",
+	"mainpll_d4",
+	"univpll_d4",
+	"univpll_d6"
+};
+
+static const char * const camtm_parents[] = {
+	"clk26m",
+	"univpll_d7",
+	"univpll_d6_d2",
+	"univpll_d4_d2"
+};
+
+static const char * const pwm_parents[] = {
+	"clk26m",
+	"univpll_d4_d8"
+};
+
+static const char * const audio_h_parents[] = {
+	"clk26m",
+	"univpll_d7",
+	"apll1_ck",
+	"apll2_ck"
+};
+
+static const char * const spmi_mst_parents[] = {
+	"clk26m",
+	"csw_f26m_d2",
+	"osc_d8",
+	"osc_d10",
+	"osc_d16",
+	"osc_d20",
+	"clk32k"
+};
+
+static const char * const aes_msdcfde_parents[] = {
+	"clk26m",
+	"mainpll_d4_d2",
+	"mainpll_d6",
+	"mainpll_d4_d4",
+	"univpll_d4_d2",
+	"univpll_d6"
+};
+
+static const char * const sflash_parents[] = {
+	"clk26m",
+	"mainpll_d7_d8",
+	"univpll_d6_d8",
+	"univpll_d5_d8"
+};
+
+static const char * const apll_i2s0_m_parents[] = {
+	"aud_1_sel",
+	"aud_2_sel"
+};
+
+static const char * const apll_i2s1_m_parents[] = {
+	"aud_1_sel",
+	"aud_2_sel"
+};
+
+static const char * const apll_i2s2_m_parents[] = {
+	"aud_1_sel",
+	"aud_2_sel"
+};
+
+static const char * const apll_i2s3_m_parents[] = {
+	"aud_1_sel",
+	"aud_2_sel"
+};
+
+static const char * const apll_i2s4_m_parents[] = {
+	"aud_1_sel",
+	"aud_2_sel"
+};
+
+static const char * const apll_i2s5_m_parents[] = {
+	"aud_1_sel",
+	"aud_2_sel"
+};
+
+static const char * const apll_i2s6_m_parents[] = {
+	"aud_1_sel",
+	"aud_2_sel"
+};
+
+static const char * const apll_i2s7_m_parents[] = {
+	"aud_1_sel",
+	"aud_2_sel"
+};
+
+static const char * const apll_i2s8_m_parents[] = {
+	"aud_1_sel",
+	"aud_2_sel"
+};
+
+static const char * const apll_i2s9_m_parents[] = {
+	"aud_1_sel",
+	"aud_2_sel"
+};
+
+/*
+ * CRITICAL CLOCK:
+ * axi_sel is the main bus clock of whole SOC.
+ * spm_sel is the clock of the always-on co-processor.
+ * bus_aximem_sel is clock of the bus that access emi.
+ */
+static const struct mtk_mux top_mtk_muxes[] = {
+	/* CLK_CFG_0 */
+	MUX_GATE_CLR_SET_UPD_FLAGS(CLK_TOP_AXI_SEL, "axi_sel",
+				   axi_parents, 0x010, 0x014, 0x018, 0, 3, 7, 0x004, 0,
+				   CLK_IS_CRITICAL),
+	MUX_GATE_CLR_SET_UPD_FLAGS(CLK_TOP_SPM_SEL, "spm_sel",
+				   spm_parents, 0x010, 0x014, 0x018, 8, 2, 15, 0x004, 1,
+				   CLK_IS_CRITICAL),
+	MUX_GATE_CLR_SET_UPD(CLK_TOP_SCP_SEL, "scp_sel",
+			     scp_parents, 0x010, 0x014, 0x018, 16, 3, 23, 0x004, 2),
+	MUX_GATE_CLR_SET_UPD_FLAGS(CLK_TOP_BUS_AXIMEM_SEL, "bus_aximem_sel",
+				   bus_aximem_parents, 0x010, 0x014, 0x018, 24, 3, 31, 0x004, 3,
+				   CLK_IS_CRITICAL),
+	/* CLK_CFG_1 */
+	MUX_GATE_CLR_SET_UPD(CLK_TOP_DISP_SEL, "disp_sel",
+			     disp_parents, 0x020, 0x024, 0x028, 0, 4, 7, 0x004, 4),
+	MUX_GATE_CLR_SET_UPD(CLK_TOP_MDP_SEL, "mdp_sel",
+			     mdp_parents, 0x020, 0x024, 0x028, 8, 4, 15, 0x004, 5),
+	MUX_GATE_CLR_SET_UPD(CLK_TOP_IMG1_SEL, "img1_sel",
+			     img1_parents, 0x020, 0x024, 0x028, 16, 4, 23, 0x004, 6),
+	MUX_GATE_CLR_SET_UPD(CLK_TOP_IMG2_SEL, "img2_sel",
+			     img2_parents, 0x020, 0x024, 0x028, 24, 4, 31, 0x004, 7),
+	/* CLK_CFG_2 */
+	MUX_GATE_CLR_SET_UPD(CLK_TOP_IPE_SEL, "ipe_sel",
+			     ipe_parents, 0x030, 0x034, 0x038, 0, 4, 7, 0x004, 8),
+	MUX_GATE_CLR_SET_UPD(CLK_TOP_DPE_SEL, "dpe_sel",
+			     dpe_parents, 0x030, 0x034, 0x038, 8, 3, 15, 0x004, 9),
+	MUX_GATE_CLR_SET_UPD(CLK_TOP_CAM_SEL, "cam_sel",
+			     cam_parents, 0x030, 0x034, 0x038, 16, 4, 23, 0x004, 10),
+	MUX_GATE_CLR_SET_UPD(CLK_TOP_CCU_SEL, "ccu_sel",
+			     ccu_parents, 0x030, 0x034, 0x038, 24, 4, 31, 0x004, 11),
+	/* CLK_CFG_4 */
+	MUX_GATE_CLR_SET_UPD(CLK_TOP_DSP7_SEL, "dsp7_sel",
+			     dsp7_parents, 0x050, 0x054, 0x058, 0, 3, 7, 0x004, 16),
+	MUX_GATE_CLR_SET_UPD(CLK_TOP_MFG_REF_SEL, "mfg_ref_sel",
+			     mfg_ref_parents, 0x050, 0x054, 0x058, 16, 2, 23, 0x004, 18),
+	MUX_CLR_SET_UPD(CLK_TOP_MFG_PLL_SEL, "mfg_pll_sel",
+			mfg_pll_parents, 0x050, 0x054, 0x058, 18, 1, -1, -1),
+	MUX_GATE_CLR_SET_UPD(CLK_TOP_CAMTG_SEL, "camtg_sel",
+			     camtg_parents, 0x050, 0x054, 0x058, 24, 3, 31, 0x004, 19),
+	/* CLK_CFG_5 */
+	MUX_GATE_CLR_SET_UPD(CLK_TOP_CAMTG2_SEL, "camtg2_sel",
+			     camtg2_parents, 0x060, 0x064, 0x068, 0, 3, 7, 0x004, 20),
+	MUX_GATE_CLR_SET_UPD(CLK_TOP_CAMTG3_SEL, "camtg3_sel",
+			     camtg3_parents, 0x060, 0x064, 0x068, 8, 3, 15, 0x004, 21),
+	MUX_GATE_CLR_SET_UPD(CLK_TOP_CAMTG4_SEL, "camtg4_sel",
+			     camtg4_parents, 0x060, 0x064, 0x068, 16, 3, 23, 0x004, 22),
+	MUX_GATE_CLR_SET_UPD(CLK_TOP_CAMTG5_SEL, "camtg5_sel",
+			     camtg5_parents, 0x060, 0x064, 0x068, 24, 3, 31, 0x004, 23),
+	/* CLK_CFG_6 */
+	MUX_GATE_CLR_SET_UPD(CLK_TOP_CAMTG6_SEL, "camtg6_sel",
+			     camtg6_parents, 0x070, 0x074, 0x078, 0, 3, 7, 0x004, 24),
+	MUX_GATE_CLR_SET_UPD(CLK_TOP_UART_SEL, "uart_sel",
+			     uart_parents, 0x070, 0x074, 0x078, 8, 1, 15, 0x004, 25),
+	MUX_GATE_CLR_SET_UPD(CLK_TOP_SPI_SEL, "spi_sel",
+			     spi_parents, 0x070, 0x074, 0x078, 16, 2, 23, 0x004, 26),
+	MUX_GATE_CLR_SET_UPD(CLK_TOP_MSDC50_0_H_SEL, "msdc50_0_h_sel",
+			     msdc50_0_h_parents, 0x070, 0x074, 0x078, 24, 2, 31, 0x004, 27),
+	/* CLK_CFG_7 */
+	MUX_GATE_CLR_SET_UPD(CLK_TOP_MSDC50_0_SEL, "msdc50_0_sel",
+			     msdc50_0_parents, 0x080, 0x084, 0x088, 0, 3, 7, 0x004, 28),
+	MUX_GATE_CLR_SET_UPD(CLK_TOP_MSDC30_1_SEL, "msdc30_1_sel",
+			     msdc30_1_parents, 0x080, 0x084, 0x088, 8, 3, 15, 0x004, 29),
+	MUX_GATE_CLR_SET_UPD(CLK_TOP_MSDC30_2_SEL, "msdc30_2_sel",
+			     msdc30_2_parents, 0x080, 0x084, 0x088, 16, 3, 23, 0x004, 30),
+	MUX_GATE_CLR_SET_UPD(CLK_TOP_AUDIO_SEL, "audio_sel",
+			     audio_parents, 0x080, 0x084, 0x088, 24, 2, 31, 0x008, 0),
+	/* CLK_CFG_8 */
+	MUX_GATE_CLR_SET_UPD(CLK_TOP_AUD_INTBUS_SEL, "aud_intbus_sel",
+			     aud_intbus_parents, 0x090, 0x094, 0x098, 0, 2, 7, 0x008, 1),
+	MUX_GATE_CLR_SET_UPD(CLK_TOP_PWRAP_ULPOSC_SEL, "pwrap_ulposc_sel",
+			     pwrap_ulposc_parents, 0x090, 0x094, 0x098, 8, 3, 15, 0x008, 2),
+	MUX_GATE_CLR_SET_UPD(CLK_TOP_ATB_SEL, "atb_sel",
+			     atb_parents, 0x090, 0x094, 0x098, 16, 2, 23, 0x008, 3),
+	/* CLK_CFG_9 */
+	MUX_GATE_CLR_SET_UPD(CLK_TOP_DPI_SEL, "dpi_sel",
+			     dpi_parents, 0x0a0, 0x0a4, 0x0a8, 0, 3, 7, 0x008, 5),
+	MUX_GATE_CLR_SET_UPD(CLK_TOP_SCAM_SEL, "scam_sel",
+			     scam_parents, 0x0a0, 0x0a4, 0x0a8, 8, 1, 15, 0x008, 6),
+	MUX_GATE_CLR_SET_UPD(CLK_TOP_DISP_PWM_SEL, "disp_pwm_sel",
+			     disp_pwm_parents, 0x0a0, 0x0a4, 0x0a8, 16, 3, 23, 0x008, 7),
+	MUX_GATE_CLR_SET_UPD(CLK_TOP_USB_TOP_SEL, "usb_top_sel",
+			     usb_top_parents, 0x0a0, 0x0a4, 0x0a8, 24, 2, 31, 0x008, 8),
+	/* CLK_CFG_10 */
+	MUX_GATE_CLR_SET_UPD(CLK_TOP_SSUSB_XHCI_SEL, "ssusb_xhci_sel",
+			     ssusb_xhci_parents, 0x0b0, 0x0b4, 0x0b8, 0, 2, 7, 0x008, 9),
+	MUX_GATE_CLR_SET_UPD(CLK_TOP_I2C_SEL, "i2c_sel",
+			     i2c_parents, 0x0b0, 0x0b4, 0x0b8, 8, 2, 15, 0x008, 10),
+	MUX_GATE_CLR_SET_UPD(CLK_TOP_SENINF_SEL, "seninf_sel",
+			     seninf_parents, 0x0b0, 0x0b4, 0x0b8, 16, 3, 23, 0x008, 11),
+	MUX_GATE_CLR_SET_UPD(CLK_TOP_SENINF1_SEL, "seninf1_sel",
+			     seninf1_parents, 0x0b0, 0x0b4, 0x0b8, 24, 3, 31, 0x008, 12),
+	/* CLK_CFG_11 */
+	MUX_GATE_CLR_SET_UPD(CLK_TOP_SENINF2_SEL, "seninf2_sel",
+			     seninf2_parents, 0x0c0, 0x0c4, 0x0c8, 0, 3, 7, 0x008, 13),
+	MUX_GATE_CLR_SET_UPD(CLK_TOP_SENINF3_SEL, "seninf3_sel",
+			     seninf3_parents, 0x0c0, 0x0c4, 0x0c8, 8, 3, 15, 0x008, 14),
+	MUX_GATE_CLR_SET_UPD(CLK_TOP_TL_SEL, "tl_sel",
+			     tl_parents, 0x0c0, 0x0c4, 0x0c8, 16, 2, 23, 0x008, 15),
+	MUX_GATE_CLR_SET_UPD(CLK_TOP_DXCC_SEL, "dxcc_sel",
+			     dxcc_parents, 0x0c0, 0x0c4, 0x0c8, 24, 2, 31, 0x008, 16),
+	/* CLK_CFG_12 */
+	MUX_GATE_CLR_SET_UPD(CLK_TOP_AUD_ENGEN1_SEL, "aud_engen1_sel",
+			     aud_engen1_parents, 0x0d0, 0x0d4, 0x0d8, 0, 2, 7, 0x008, 17),
+	MUX_GATE_CLR_SET_UPD(CLK_TOP_AUD_ENGEN2_SEL, "aud_engen2_sel",
+			     aud_engen2_parents, 0x0d0, 0x0d4, 0x0d8, 8, 2, 15, 0x008, 18),
+	MUX_GATE_CLR_SET_UPD(CLK_TOP_AES_UFSFDE_SEL, "aes_ufsfde_sel",
+			     aes_ufsfde_parents, 0x0d0, 0x0d4, 0x0d8, 16, 3, 23, 0x008, 19),
+	MUX_GATE_CLR_SET_UPD(CLK_TOP_UFS_SEL, "ufs_sel",
+			     ufs_parents, 0x0d0, 0x0d4, 0x0d8, 24, 3, 31, 0x008, 20),
+	/* CLK_CFG_13 */
+	MUX_GATE_CLR_SET_UPD(CLK_TOP_AUD_1_SEL, "aud_1_sel",
+			     aud_1_parents, 0x0e0, 0x0e4, 0x0e8, 0, 1, 7, 0x008, 21),
+	MUX_GATE_CLR_SET_UPD(CLK_TOP_AUD_2_SEL, "aud_2_sel",
+			     aud_2_parents, 0x0e0, 0x0e4, 0x0e8, 8, 1, 15, 0x008, 22),
+	MUX_GATE_CLR_SET_UPD(CLK_TOP_ADSP_SEL, "adsp_sel",
+			     adsp_parents, 0x0e0, 0x0e4, 0x0e8, 16, 3, 23, 0x008, 23),
+	MUX_GATE_CLR_SET_UPD(CLK_TOP_DPMAIF_MAIN_SEL, "dpmaif_main_sel",
+			     dpmaif_main_parents, 0x0e0, 0x0e4, 0x0e8, 24, 3, 31, 0x008, 24),
+	/* CLK_CFG_14 */
+	MUX_GATE_CLR_SET_UPD(CLK_TOP_VENC_SEL, "venc_sel",
+			     venc_parents, 0x0f0, 0x0f4, 0x0f8, 0, 4, 7, 0x008, 25),
+	MUX_GATE_CLR_SET_UPD(CLK_TOP_VDEC_SEL, "vdec_sel",
+			     vdec_parents, 0x0f0, 0x0f4, 0x0f8, 8, 4, 15, 0x008, 26),
+	MUX_GATE_CLR_SET_UPD(CLK_TOP_CAMTM_SEL, "camtm_sel",
+			     camtm_parents, 0x0f0, 0x0f4, 0x0f8, 16, 2, 23, 0x008, 27),
+	MUX_GATE_CLR_SET_UPD(CLK_TOP_PWM_SEL, "pwm_sel",
+			     pwm_parents, 0x0f0, 0x0f4, 0x0f8, 24, 1, 31, 0x008, 28),
+	/* CLK_CFG_15 */
+	MUX_GATE_CLR_SET_UPD(CLK_TOP_AUDIO_H_SEL, "audio_h_sel",
+			     audio_h_parents, 0x100, 0x104, 0x108, 0, 2, 7, 0x008, 29),
+	MUX_GATE_CLR_SET_UPD(CLK_TOP_SPMI_MST_SEL, "spmi_mst_sel",
+			     spmi_mst_parents, 0x100, 0x104, 0x108, 8, 3, 15, 0x008, 30),
+	MUX_GATE_CLR_SET_UPD(CLK_TOP_AES_MSDCFDE_SEL, "aes_msdcfde_sel",
+			     aes_msdcfde_parents, 0x100, 0x104, 0x108, 24, 3, 31, 0x00c, 1),
+	/* CLK_CFG_16 */
+	MUX_GATE_CLR_SET_UPD(CLK_TOP_SFLASH_SEL, "sflash_sel",
+			     sflash_parents, 0x110, 0x114, 0x118, 8, 2, 15, 0x00c, 3),
+};
+
+static struct mtk_composite top_muxes[] = {
+	/* CLK_AUDDIV_0 */
+	MUX(CLK_TOP_APLL_I2S0_M_SEL, "apll_i2s0_m_sel", apll_i2s0_m_parents, 0x320, 16, 1),
+	MUX(CLK_TOP_APLL_I2S1_M_SEL, "apll_i2s1_m_sel", apll_i2s1_m_parents, 0x320, 17, 1),
+	MUX(CLK_TOP_APLL_I2S2_M_SEL, "apll_i2s2_m_sel", apll_i2s2_m_parents, 0x320, 18, 1),
+	MUX(CLK_TOP_APLL_I2S3_M_SEL, "apll_i2s3_m_sel", apll_i2s3_m_parents, 0x320, 19, 1),
+	MUX(CLK_TOP_APLL_I2S4_M_SEL, "apll_i2s4_m_sel", apll_i2s4_m_parents, 0x320, 20, 1),
+	MUX(CLK_TOP_APLL_I2S5_M_SEL, "apll_i2s5_m_sel", apll_i2s5_m_parents, 0x320, 21, 1),
+	MUX(CLK_TOP_APLL_I2S6_M_SEL, "apll_i2s6_m_sel", apll_i2s6_m_parents, 0x320, 22, 1),
+	MUX(CLK_TOP_APLL_I2S7_M_SEL, "apll_i2s7_m_sel", apll_i2s7_m_parents, 0x320, 23, 1),
+	MUX(CLK_TOP_APLL_I2S8_M_SEL, "apll_i2s8_m_sel", apll_i2s8_m_parents, 0x320, 24, 1),
+	MUX(CLK_TOP_APLL_I2S9_M_SEL, "apll_i2s9_m_sel", apll_i2s9_m_parents, 0x320, 25, 1),
+};
+
+static const struct mtk_composite top_adj_divs[] = {
+	DIV_GATE(CLK_TOP_APLL12_DIV0, "apll12_div0", "apll_i2s0_m_sel", 0x320, 0, 0x328, 8, 0),
+	DIV_GATE(CLK_TOP_APLL12_DIV1, "apll12_div1", "apll_i2s1_m_sel", 0x320, 1, 0x328, 8, 8),
+	DIV_GATE(CLK_TOP_APLL12_DIV2, "apll12_div2", "apll_i2s2_m_sel", 0x320, 2, 0x328, 8, 16),
+	DIV_GATE(CLK_TOP_APLL12_DIV3, "apll12_div3", "apll_i2s3_m_sel", 0x320, 3, 0x328, 8, 24),
+	DIV_GATE(CLK_TOP_APLL12_DIV4, "apll12_div4", "apll_i2s4_m_sel", 0x320, 4, 0x334, 8, 0),
+	DIV_GATE(CLK_TOP_APLL12_DIVB, "apll12_divb", "apll12_div4", 0x320, 5, 0x334, 8, 8),
+	DIV_GATE(CLK_TOP_APLL12_DIV5, "apll12_div5", "apll_i2s5_m_sel", 0x320, 6, 0x334, 8, 16),
+	DIV_GATE(CLK_TOP_APLL12_DIV6, "apll12_div6", "apll_i2s6_m_sel", 0x320, 7, 0x334, 8, 24),
+	DIV_GATE(CLK_TOP_APLL12_DIV7, "apll12_div7", "apll_i2s7_m_sel", 0x320, 8, 0x338, 8, 0),
+	DIV_GATE(CLK_TOP_APLL12_DIV8, "apll12_div8", "apll_i2s8_m_sel", 0x320, 9, 0x338, 8, 8),
+	DIV_GATE(CLK_TOP_APLL12_DIV9, "apll12_div9", "apll_i2s9_m_sel", 0x320, 10, 0x338, 8, 16),
+};
+
+static const struct mtk_gate_regs apmixed_cg_regs = {
+	.set_ofs = 0x14,
+	.clr_ofs = 0x14,
+	.sta_ofs = 0x14,
+};
+
+#define GATE_APMIXED(_id, _name, _parent, _shift)	\
+	GATE_MTK(_id, _name, _parent, &apmixed_cg_regs, _shift, &mtk_clk_gate_ops_no_setclr_inv)
+
+static const struct mtk_gate apmixed_clks[] = {
+	GATE_APMIXED(CLK_APMIXED_MIPID26M, "mipid26m", "clk26m", 16),
+};
+
+static const struct mtk_gate_regs infra0_cg_regs = {
+	.set_ofs = 0x80,
+	.clr_ofs = 0x84,
+	.sta_ofs = 0x90,
+};
+
+static const struct mtk_gate_regs infra1_cg_regs = {
+	.set_ofs = 0x88,
+	.clr_ofs = 0x8c,
+	.sta_ofs = 0x94,
+};
+
+static const struct mtk_gate_regs infra2_cg_regs = {
+	.set_ofs = 0xa4,
+	.clr_ofs = 0xa8,
+	.sta_ofs = 0xac,
+};
+
+static const struct mtk_gate_regs infra3_cg_regs = {
+	.set_ofs = 0xc0,
+	.clr_ofs = 0xc4,
+	.sta_ofs = 0xc8,
+};
+
+static const struct mtk_gate_regs infra4_cg_regs = {
+	.set_ofs = 0xd0,
+	.clr_ofs = 0xd4,
+	.sta_ofs = 0xd8,
+};
+
+static const struct mtk_gate_regs infra5_cg_regs = {
+	.set_ofs = 0xe0,
+	.clr_ofs = 0xe4,
+	.sta_ofs = 0xe8,
+};
+
+#define GATE_INFRA0(_id, _name, _parent, _shift)	\
+	GATE_MTK(_id, _name, _parent, &infra0_cg_regs, _shift, &mtk_clk_gate_ops_setclr)
+
+#define GATE_INFRA1_FLAGS(_id, _name, _parent, _shift, _flag)		\
+	GATE_MTK_FLAGS(_id, _name, _parent, &infra1_cg_regs, _shift,	\
+		&mtk_clk_gate_ops_setclr, _flag)
+
+#define GATE_INFRA1(_id, _name, _parent, _shift)	\
+	GATE_INFRA1_FLAGS(_id, _name, _parent, _shift, 0)
+
+#define GATE_INFRA2(_id, _name, _parent, _shift)	\
+	GATE_MTK(_id, _name, _parent, &infra2_cg_regs, _shift, &mtk_clk_gate_ops_setclr)
+
+#define GATE_INFRA3_FLAGS(_id, _name, _parent, _shift, _flag)		\
+	GATE_MTK_FLAGS(_id, _name, _parent, &infra3_cg_regs, _shift,	\
+		&mtk_clk_gate_ops_setclr, _flag)
+
+#define GATE_INFRA3(_id, _name, _parent, _shift)	\
+	GATE_INFRA3_FLAGS(_id, _name, _parent, _shift, 0)
+
+#define GATE_INFRA4(_id, _name, _parent, _shift)	\
+	GATE_MTK(_id, _name, _parent, &infra4_cg_regs, _shift, &mtk_clk_gate_ops_setclr)
+
+#define GATE_INFRA5_FLAGS(_id, _name, _parent, _shift, _flag)		\
+	GATE_MTK_FLAGS(_id, _name, _parent, &infra5_cg_regs, _shift,	\
+		&mtk_clk_gate_ops_setclr, _flag)
+
+#define GATE_INFRA5(_id, _name, _parent, _shift)	\
+	GATE_INFRA5_FLAGS(_id, _name, _parent, _shift, 0)
+
+/*
+ * CRITICAL CLOCK:
+ * infra_133m and infra_66m are main peripheral bus clocks of SOC.
+ * infra_device_apc and infra_device_apc_sync are for device access permission control module.
+ */
+static const struct mtk_gate infra_clks[] = {
+	/* INFRA0 */
+	GATE_INFRA0(CLK_INFRA_PMIC_TMR, "infra_pmic_tmr", "pwrap_ulposc_sel", 0),
+	GATE_INFRA0(CLK_INFRA_PMIC_AP, "infra_pmic_ap", "pwrap_ulposc_sel", 1),
+	GATE_INFRA0(CLK_INFRA_PMIC_MD, "infra_pmic_md", "pwrap_ulposc_sel", 2),
+	GATE_INFRA0(CLK_INFRA_PMIC_CONN, "infra_pmic_conn", "pwrap_ulposc_sel", 3),
+	GATE_INFRA0(CLK_INFRA_SCPSYS, "infra_scpsys", "scp_sel", 4),
+	GATE_INFRA0(CLK_INFRA_SEJ, "infra_sej", "axi_sel", 5),
+	GATE_INFRA0(CLK_INFRA_APXGPT, "infra_apxgpt", "axi_sel", 6),
+	GATE_INFRA0(CLK_INFRA_GCE, "infra_gce", "axi_sel", 8),
+	GATE_INFRA0(CLK_INFRA_GCE2, "infra_gce2", "axi_sel", 9),
+	GATE_INFRA0(CLK_INFRA_THERM, "infra_therm", "axi_sel", 10),
+	GATE_INFRA0(CLK_INFRA_I2C0, "infra_i2c0", "i2c_sel", 11),
+	GATE_INFRA0(CLK_INFRA_AP_DMA_PSEUDO, "infra_ap_dma_pseudo", "axi_sel", 12),
+	GATE_INFRA0(CLK_INFRA_I2C2, "infra_i2c2", "i2c_sel", 13),
+	GATE_INFRA0(CLK_INFRA_I2C3, "infra_i2c3", "i2c_sel", 14),
+	GATE_INFRA0(CLK_INFRA_PWM_H, "infra_pwm_h", "axi_sel", 15),
+	GATE_INFRA0(CLK_INFRA_PWM1, "infra_pwm1", "pwm_sel", 16),
+	GATE_INFRA0(CLK_INFRA_PWM2, "infra_pwm2", "pwm_sel", 17),
+	GATE_INFRA0(CLK_INFRA_PWM3, "infra_pwm3", "pwm_sel", 18),
+	GATE_INFRA0(CLK_INFRA_PWM4, "infra_pwm4", "pwm_sel", 19),
+	GATE_INFRA0(CLK_INFRA_PWM, "infra_pwm", "pwm_sel", 21),
+	GATE_INFRA0(CLK_INFRA_UART0, "infra_uart0", "uart_sel", 22),
+	GATE_INFRA0(CLK_INFRA_UART1, "infra_uart1", "uart_sel", 23),
+	GATE_INFRA0(CLK_INFRA_UART2, "infra_uart2", "uart_sel", 24),
+	GATE_INFRA0(CLK_INFRA_UART3, "infra_uart3", "uart_sel", 25),
+	GATE_INFRA0(CLK_INFRA_GCE_26M, "infra_gce_26m", "axi_sel", 27),
+	GATE_INFRA0(CLK_INFRA_CQ_DMA_FPC, "infra_cq_dma_fpc", "axi_sel", 28),
+	GATE_INFRA0(CLK_INFRA_BTIF, "infra_btif", "axi_sel", 31),
+	/* INFRA1 */
+	GATE_INFRA1(CLK_INFRA_SPI0, "infra_spi0", "spi_sel", 1),
+	GATE_INFRA1(CLK_INFRA_MSDC0, "infra_msdc0", "msdc50_0_h_sel", 2),
+	GATE_INFRA1(CLK_INFRA_MSDC1, "infra_msdc1", "msdc50_0_h_sel", 4),
+	GATE_INFRA1(CLK_INFRA_MSDC2, "infra_msdc2", "msdc50_0_h_sel", 5),
+	GATE_INFRA1(CLK_INFRA_MSDC0_SRC, "infra_msdc0_src", "msdc50_0_sel", 6),
+	GATE_INFRA1(CLK_INFRA_GCPU, "infra_gcpu", "axi_sel", 8),
+	GATE_INFRA1(CLK_INFRA_TRNG, "infra_trng", "axi_sel", 9),
+	GATE_INFRA1(CLK_INFRA_AUXADC, "infra_auxadc", "clk26m", 10),
+	GATE_INFRA1(CLK_INFRA_CPUM, "infra_cpum", "axi_sel", 11),
+	GATE_INFRA1(CLK_INFRA_CCIF1_AP, "infra_ccif1_ap", "axi_sel", 12),
+	GATE_INFRA1(CLK_INFRA_CCIF1_MD, "infra_ccif1_md", "axi_sel", 13),
+	GATE_INFRA1(CLK_INFRA_AUXADC_MD, "infra_auxadc_md", "clk26m", 14),
+	GATE_INFRA1(CLK_INFRA_PCIE_TL_26M, "infra_pcie_tl_26m", "axi_sel", 15),
+	GATE_INFRA1(CLK_INFRA_MSDC1_SRC, "infra_msdc1_src", "msdc30_1_sel", 16),
+	GATE_INFRA1(CLK_INFRA_MSDC2_SRC, "infra_msdc2_src", "msdc30_2_sel", 17),
+	GATE_INFRA1(CLK_INFRA_PCIE_TL_96M, "infra_pcie_tl_96m", "tl_sel", 18),
+	GATE_INFRA1(CLK_INFRA_PCIE_PL_P_250M, "infra_pcie_pl_p_250m", "axi_sel", 19),
+	GATE_INFRA1_FLAGS(CLK_INFRA_DEVICE_APC, "infra_device_apc", "axi_sel", 20, CLK_IS_CRITICAL),
+	GATE_INFRA1(CLK_INFRA_CCIF_AP, "infra_ccif_ap", "axi_sel", 23),
+	GATE_INFRA1(CLK_INFRA_DEBUGSYS, "infra_debugsys", "axi_sel", 24),
+	GATE_INFRA1(CLK_INFRA_AUDIO, "infra_audio", "axi_sel", 25),
+	GATE_INFRA1(CLK_INFRA_CCIF_MD, "infra_ccif_md", "axi_sel", 26),
+	GATE_INFRA1(CLK_INFRA_DXCC_SEC_CORE, "infra_dxcc_sec_core", "dxcc_sel", 27),
+	GATE_INFRA1(CLK_INFRA_DXCC_AO, "infra_dxcc_ao", "dxcc_sel", 28),
+	GATE_INFRA1(CLK_INFRA_DBG_TRACE, "infra_dbg_trace", "axi_sel", 29),
+	GATE_INFRA1(CLK_INFRA_DEVMPU_B, "infra_devmpu_b", "axi_sel", 30),
+	GATE_INFRA1(CLK_INFRA_DRAMC_F26M, "infra_dramc_f26m", "clk26m", 31),
+	/* INFRA2 */
+	GATE_INFRA2(CLK_INFRA_IRTX, "infra_irtx", "clk26m", 0),
+	GATE_INFRA2(CLK_INFRA_SSUSB, "infra_ssusb", "usb_top_sel", 1),
+	GATE_INFRA2(CLK_INFRA_DISP_PWM, "infra_disp_pwm", "axi_sel", 2),
+	GATE_INFRA2(CLK_INFRA_CLDMA_B, "infra_cldma_b", "axi_sel", 3),
+	GATE_INFRA2(CLK_INFRA_AUDIO_26M_B, "infra_audio_26m_b", "clk26m", 4),
+	GATE_INFRA2(CLK_INFRA_MODEM_TEMP_SHARE, "infra_modem_temp_share", "clk26m", 5),
+	GATE_INFRA2(CLK_INFRA_SPI1, "infra_spi1", "spi_sel", 6),
+	GATE_INFRA2(CLK_INFRA_I2C4, "infra_i2c4", "i2c_sel", 7),
+	GATE_INFRA2(CLK_INFRA_SPI2, "infra_spi2", "spi_sel", 9),
+	GATE_INFRA2(CLK_INFRA_SPI3, "infra_spi3", "spi_sel", 10),
+	GATE_INFRA2(CLK_INFRA_UNIPRO_SYS, "infra_unipro_sys", "ufs_sel", 11),
+	GATE_INFRA2(CLK_INFRA_UNIPRO_TICK, "infra_unipro_tick", "clk26m", 12),
+	GATE_INFRA2(CLK_INFRA_UFS_MP_SAP_B, "infra_ufs_mp_sap_b", "clk26m", 13),
+	GATE_INFRA2(CLK_INFRA_MD32_B, "infra_md32_b", "axi_sel", 14),
+	GATE_INFRA2(CLK_INFRA_UNIPRO_MBIST, "infra_unipro_mbist", "axi_sel", 16),
+	GATE_INFRA2(CLK_INFRA_I2C5, "infra_i2c5", "i2c_sel", 18),
+	GATE_INFRA2(CLK_INFRA_I2C5_ARBITER, "infra_i2c5_arbiter", "i2c_sel", 19),
+	GATE_INFRA2(CLK_INFRA_I2C5_IMM, "infra_i2c5_imm", "i2c_sel", 20),
+	GATE_INFRA2(CLK_INFRA_I2C1_ARBITER, "infra_i2c1_arbiter", "i2c_sel", 21),
+	GATE_INFRA2(CLK_INFRA_I2C1_IMM, "infra_i2c1_imm", "i2c_sel", 22),
+	GATE_INFRA2(CLK_INFRA_I2C2_ARBITER, "infra_i2c2_arbiter", "i2c_sel", 23),
+	GATE_INFRA2(CLK_INFRA_I2C2_IMM, "infra_i2c2_imm", "i2c_sel", 24),
+	GATE_INFRA2(CLK_INFRA_SPI4, "infra_spi4", "spi_sel", 25),
+	GATE_INFRA2(CLK_INFRA_SPI5, "infra_spi5", "spi_sel", 26),
+	GATE_INFRA2(CLK_INFRA_CQ_DMA, "infra_cq_dma", "axi_sel", 27),
+	GATE_INFRA2(CLK_INFRA_UFS, "infra_ufs", "ufs_sel", 28),
+	GATE_INFRA2(CLK_INFRA_AES_UFSFDE, "infra_aes_ufsfde", "aes_ufsfde_sel", 29),
+	GATE_INFRA2(CLK_INFRA_UFS_TICK, "infra_ufs_tick", "ufs_sel", 30),
+	GATE_INFRA2(CLK_INFRA_SSUSB_XHCI, "infra_ssusb_xhci", "ssusb_xhci_sel", 31),
+	/* INFRA3 */
+	GATE_INFRA3(CLK_INFRA_MSDC0_SELF, "infra_msdc0_self", "msdc50_0_sel", 0),
+	GATE_INFRA3(CLK_INFRA_MSDC1_SELF, "infra_msdc1_self", "msdc50_0_sel", 1),
+	GATE_INFRA3(CLK_INFRA_MSDC2_SELF, "infra_msdc2_self", "msdc50_0_sel", 2),
+	GATE_INFRA3(CLK_INFRA_UFS_AXI, "infra_ufs_axi", "axi_sel", 5),
+	GATE_INFRA3(CLK_INFRA_I2C6, "infra_i2c6", "i2c_sel", 6),
+	GATE_INFRA3(CLK_INFRA_AP_MSDC0, "infra_ap_msdc0", "msdc50_0_sel", 7),
+	GATE_INFRA3(CLK_INFRA_MD_MSDC0, "infra_md_msdc0", "msdc50_0_sel", 8),
+	GATE_INFRA3(CLK_INFRA_CCIF5_AP, "infra_ccif5_ap", "axi_sel", 9),
+	GATE_INFRA3(CLK_INFRA_CCIF5_MD, "infra_ccif5_md", "axi_sel", 10),
+	GATE_INFRA3(CLK_INFRA_PCIE_TOP_H_133M, "infra_pcie_top_h_133m", "axi_sel", 11),
+	GATE_INFRA3(CLK_INFRA_FLASHIF_TOP_H_133M, "infra_flashif_top_h_133m", "axi_sel", 14),
+	GATE_INFRA3(CLK_INFRA_PCIE_PERI_26M, "infra_pcie_peri_26m", "axi_sel", 15),
+	GATE_INFRA3(CLK_INFRA_CCIF2_AP, "infra_ccif2_ap", "axi_sel", 16),
+	GATE_INFRA3(CLK_INFRA_CCIF2_MD, "infra_ccif2_md", "axi_sel", 17),
+	GATE_INFRA3(CLK_INFRA_CCIF3_AP, "infra_ccif3_ap", "axi_sel", 18),
+	GATE_INFRA3(CLK_INFRA_CCIF3_MD, "infra_ccif3_md", "axi_sel", 19),
+	GATE_INFRA3(CLK_INFRA_SEJ_F13M, "infra_sej_f13m", "clk26m", 20),
+	GATE_INFRA3(CLK_INFRA_AES, "infra_aes", "axi_sel", 21),
+	GATE_INFRA3(CLK_INFRA_I2C7, "infra_i2c7", "i2c_sel", 22),
+	GATE_INFRA3(CLK_INFRA_I2C8, "infra_i2c8", "i2c_sel", 23),
+	GATE_INFRA3(CLK_INFRA_FBIST2FPC, "infra_fbist2fpc", "msdc50_0_sel", 24),
+	GATE_INFRA3_FLAGS(CLK_INFRA_DEVICE_APC_SYNC, "infra_device_apc_sync", "axi_sel", 25,
+			  CLK_IS_CRITICAL),
+	GATE_INFRA3(CLK_INFRA_DPMAIF_MAIN, "infra_dpmaif_main", "dpmaif_main_sel", 26),
+	GATE_INFRA3(CLK_INFRA_PCIE_TL_32K, "infra_pcie_tl_32k", "axi_sel", 27),
+	GATE_INFRA3(CLK_INFRA_CCIF4_AP, "infra_ccif4_ap", "axi_sel", 28),
+	GATE_INFRA3(CLK_INFRA_CCIF4_MD, "infra_ccif4_md", "axi_sel", 29),
+	GATE_INFRA3(CLK_INFRA_SPI6, "infra_spi6", "spi_sel", 30),
+	GATE_INFRA3(CLK_INFRA_SPI7, "infra_spi7", "spi_sel", 31),
+	/* INFRA4 */
+	GATE_INFRA4(CLK_INFRA_AP_DMA, "infra_ap_dma", "infra_ap_dma_pseudo", 31),
+	/* INFRA5 */
+	GATE_INFRA5_FLAGS(CLK_INFRA_133M, "infra_133m", "axi_sel", 0, CLK_IS_CRITICAL),
+	GATE_INFRA5_FLAGS(CLK_INFRA_66M, "infra_66m", "axi_sel", 1, CLK_IS_CRITICAL),
+	GATE_INFRA5(CLK_INFRA_66M_PERI_BUS, "infra_66m_peri_bus", "axi_sel", 2),
+	GATE_INFRA5(CLK_INFRA_FREE_DCM_133M, "infra_free_dcm_133m", "axi_sel", 3),
+	GATE_INFRA5(CLK_INFRA_FREE_DCM_66M, "infra_free_dcm_66m", "axi_sel", 4),
+	GATE_INFRA5(CLK_INFRA_PERI_BUS_DCM_133M, "infra_peri_bus_dcm_133m", "axi_sel", 5),
+	GATE_INFRA5(CLK_INFRA_PERI_BUS_DCM_66M, "infra_peri_bus_dcm_66m", "axi_sel", 6),
+	GATE_INFRA5(CLK_INFRA_FLASHIF_PERI_26M, "infra_flashif_peri_26m", "axi_sel", 30),
+	GATE_INFRA5(CLK_INFRA_FLASHIF_SFLASH, "infra_flashif_fsflash", "axi_sel", 31),
+};
+
+static const struct mtk_gate_regs peri_cg_regs = {
+	.set_ofs = 0x20c,
+	.clr_ofs = 0x20c,
+	.sta_ofs = 0x20c,
+};
+
+#define GATE_PERI(_id, _name, _parent, _shift)	\
+	GATE_MTK(_id, _name, _parent, &peri_cg_regs, _shift, &mtk_clk_gate_ops_no_setclr_inv)
+
+static const struct mtk_gate peri_clks[] = {
+	GATE_PERI(CLK_PERI_PERIAXI, "peri_periaxi", "axi_sel", 31),
+};
+
+static const struct mtk_gate_regs top_cg_regs = {
+	.set_ofs = 0x150,
+	.clr_ofs = 0x150,
+	.sta_ofs = 0x150,
+};
+
+#define GATE_TOP(_id, _name, _parent, _shift)	\
+	GATE_MTK(_id, _name, _parent, &top_cg_regs, _shift, &mtk_clk_gate_ops_no_setclr_inv)
+
+static const struct mtk_gate top_clks[] = {
+	GATE_TOP(CLK_TOP_SSUSB_TOP_REF, "ssusb_top_ref", "clk26m", 24),
+	GATE_TOP(CLK_TOP_SSUSB_PHY_REF, "ssusb_phy_ref", "clk26m", 25),
+};
+
+#define MT8192_PLL_FMAX		(3800UL * MHZ)
+#define MT8192_PLL_FMIN		(1500UL * MHZ)
+#define MT8192_INTEGER_BITS	8
+
+#define PLL(_id, _name, _reg, _pwr_reg, _en_mask, _flags,		\
+			_rst_bar_mask, _pcwbits, _pd_reg, _pd_shift,	\
+			_tuner_reg, _tuner_en_reg, _tuner_en_bit,	\
+			_pcw_reg, _pcw_shift, _pcw_chg_reg,		\
+			_en_reg, _pll_en_bit) {				\
+		.id = _id,						\
+		.name = _name,						\
+		.reg = _reg,						\
+		.pwr_reg = _pwr_reg,					\
+		.en_mask = _en_mask,					\
+		.flags = _flags,					\
+		.rst_bar_mask = _rst_bar_mask,				\
+		.fmax = MT8192_PLL_FMAX,				\
+		.fmin = MT8192_PLL_FMIN,				\
+		.pcwbits = _pcwbits,					\
+		.pcwibits = MT8192_INTEGER_BITS,			\
+		.pd_reg = _pd_reg,					\
+		.pd_shift = _pd_shift,					\
+		.tuner_reg = _tuner_reg,				\
+		.tuner_en_reg = _tuner_en_reg,				\
+		.tuner_en_bit = _tuner_en_bit,				\
+		.pcw_reg = _pcw_reg,					\
+		.pcw_shift = _pcw_shift,				\
+		.pcw_chg_reg = _pcw_chg_reg,				\
+		.en_reg = _en_reg,					\
+		.pll_en_bit = _pll_en_bit,				\
+	}
+
+#define PLL_B(_id, _name, _reg, _pwr_reg, _en_mask, _flags,		\
+			_rst_bar_mask, _pcwbits, _pd_reg, _pd_shift,	\
+			_tuner_reg, _tuner_en_reg, _tuner_en_bit,	\
+			_pcw_reg, _pcw_shift)				\
+		PLL(_id, _name, _reg, _pwr_reg, _en_mask, _flags,	\
+			_rst_bar_mask, _pcwbits, _pd_reg, _pd_shift,	\
+			_tuner_reg, _tuner_en_reg, _tuner_en_bit,	\
+			_pcw_reg, _pcw_shift, 0, 0, 0)
+
+static const struct mtk_pll_data plls[] = {
+	PLL_B(CLK_APMIXED_MAINPLL, "mainpll", 0x0340, 0x034c, 0xff000000,
+	      HAVE_RST_BAR, BIT(23), 22, 0x0344, 24, 0, 0, 0, 0x0344, 0),
+	PLL_B(CLK_APMIXED_UNIVPLL, "univpll", 0x0308, 0x0314, 0xff000000,
+	      HAVE_RST_BAR, BIT(23), 22, 0x030c, 24, 0, 0, 0, 0x030c, 0),
+	PLL(CLK_APMIXED_USBPLL, "usbpll", 0x03c4, 0x03cc, 0x00000000,
+	    0, 0, 22, 0x03c4, 24, 0, 0, 0, 0x03c4, 0, 0x03c4, 0x03cc, 2),
+	PLL_B(CLK_APMIXED_MSDCPLL, "msdcpll", 0x0350, 0x035c, 0x00000000,
+	      0, 0, 22, 0x0354, 24, 0, 0, 0, 0x0354, 0),
+	PLL_B(CLK_APMIXED_MMPLL, "mmpll", 0x0360, 0x036c, 0xff000000,
+	      HAVE_RST_BAR, BIT(23), 22, 0x0364, 24, 0, 0, 0, 0x0364, 0),
+	PLL_B(CLK_APMIXED_ADSPPLL, "adsppll", 0x0370, 0x037c, 0xff000000,
+	      HAVE_RST_BAR, BIT(23), 22, 0x0374, 24, 0, 0, 0, 0x0374, 0),
+	PLL_B(CLK_APMIXED_MFGPLL, "mfgpll", 0x0268, 0x0274, 0x00000000,
+	      0, 0, 22, 0x026c, 24, 0, 0, 0, 0x026c, 0),
+	PLL_B(CLK_APMIXED_TVDPLL, "tvdpll", 0x0380, 0x038c, 0x00000000,
+	      0, 0, 22, 0x0384, 24, 0, 0, 0, 0x0384, 0),
+	PLL_B(CLK_APMIXED_APLL1, "apll1", 0x0318, 0x0328, 0x00000000,
+	      0, 0, 32, 0x031c, 24, 0x0040, 0x000c, 0, 0x0320, 0),
+	PLL_B(CLK_APMIXED_APLL2, "apll2", 0x032c, 0x033c, 0x00000000,
+	      0, 0, 32, 0x0330, 24, 0, 0, 0, 0x0334, 0),
+};
+
+static struct clk_onecell_data *top_clk_data;
+
+static void clk_mt8192_top_init_early(struct device_node *node)
+{
+	int i;
+
+	top_clk_data = mtk_alloc_clk_data(CLK_TOP_NR_CLK);
+	if (!top_clk_data)
+		return;
+
+	for (i = 0; i < CLK_TOP_NR_CLK; i++)
+		top_clk_data->clks[i] = ERR_PTR(-EPROBE_DEFER);
+
+	mtk_clk_register_factors(top_early_divs, ARRAY_SIZE(top_early_divs), top_clk_data);
+
+	of_clk_add_provider(node, of_clk_src_onecell_get, top_clk_data);
+}
+
+CLK_OF_DECLARE_DRIVER(mt8192_topckgen, "mediatek,mt8192-topckgen",
+		      clk_mt8192_top_init_early);
+
+static int clk_mt8192_top_probe(struct platform_device *pdev)
+{
+	struct device_node *node = pdev->dev.of_node;
+	int r;
+	void __iomem *base;
+
+	base = devm_platform_ioremap_resource(pdev, 0);
+	if (IS_ERR(base))
+		return PTR_ERR(base);
+
+	mtk_clk_register_fixed_clks(top_fixed_clks, ARRAY_SIZE(top_fixed_clks), top_clk_data);
+	mtk_clk_register_factors(top_early_divs, ARRAY_SIZE(top_early_divs), top_clk_data);
+	mtk_clk_register_factors(top_divs, ARRAY_SIZE(top_divs), top_clk_data);
+	mtk_clk_register_muxes(top_mtk_muxes, ARRAY_SIZE(top_mtk_muxes), node, &mt8192_clk_lock,
+			       top_clk_data);
+	mtk_clk_register_composites(top_muxes, ARRAY_SIZE(top_muxes), base, &mt8192_clk_lock,
+				    top_clk_data);
+	mtk_clk_register_composites(top_adj_divs, ARRAY_SIZE(top_adj_divs), base, &mt8192_clk_lock,
+				    top_clk_data);
+	r = mtk_clk_register_gates(node, top_clks, ARRAY_SIZE(top_clks), top_clk_data);
+	if (r)
+		return r;
+
+	return of_clk_add_provider(node, of_clk_src_onecell_get, top_clk_data);
+}
+
+static int clk_mt8192_infra_probe(struct platform_device *pdev)
+{
+	struct clk_onecell_data *clk_data;
+	struct device_node *node = pdev->dev.of_node;
+	int r;
+
+	clk_data = mtk_alloc_clk_data(CLK_INFRA_NR_CLK);
+	if (!clk_data)
+		return -ENOMEM;
+
+	r = mtk_clk_register_gates(node, infra_clks, ARRAY_SIZE(infra_clks), clk_data);
+	if (r)
+		return r;
+
+	return of_clk_add_provider(node, of_clk_src_onecell_get, clk_data);
+}
+
+static int clk_mt8192_peri_probe(struct platform_device *pdev)
+{
+	struct clk_onecell_data *clk_data;
+	struct device_node *node = pdev->dev.of_node;
+	int r;
+
+	clk_data = mtk_alloc_clk_data(CLK_PERI_NR_CLK);
+	if (!clk_data)
+		return -ENOMEM;
+
+	r = mtk_clk_register_gates(node, peri_clks, ARRAY_SIZE(peri_clks), clk_data);
+	if (r)
+		return r;
+
+	return of_clk_add_provider(node, of_clk_src_onecell_get, clk_data);
+}
+
+static int clk_mt8192_apmixed_probe(struct platform_device *pdev)
+{
+	struct clk_onecell_data *clk_data;
+	struct device_node *node = pdev->dev.of_node;
+	int r;
+
+	clk_data = mtk_alloc_clk_data(CLK_APMIXED_NR_CLK);
+	if (!clk_data)
+		return -ENOMEM;
+
+	mtk_clk_register_plls(node, plls, ARRAY_SIZE(plls), clk_data);
+	r = mtk_clk_register_gates(node, apmixed_clks, ARRAY_SIZE(apmixed_clks), clk_data);
+	if (r)
+		return r;
+
+	return of_clk_add_provider(node, of_clk_src_onecell_get, clk_data);
+}
+
+static const struct of_device_id of_match_clk_mt8192[] = {
+	{
+		.compatible = "mediatek,mt8192-apmixedsys",
+		.data = clk_mt8192_apmixed_probe,
+	}, {
+		.compatible = "mediatek,mt8192-topckgen",
+		.data = clk_mt8192_top_probe,
+	}, {
+		.compatible = "mediatek,mt8192-infracfg",
+		.data = clk_mt8192_infra_probe,
+	}, {
+		.compatible = "mediatek,mt8192-pericfg",
+		.data = clk_mt8192_peri_probe,
+	}, {
+		/* sentinel */
+	}
+};
+
+static int clk_mt8192_probe(struct platform_device *pdev)
+{
+	int (*clk_probe)(struct platform_device *pdev);
+	int r;
+
+	clk_probe = of_device_get_match_data(&pdev->dev);
+	if (!clk_probe)
+		return -EINVAL;
+
+	r = clk_probe(pdev);
+	if (r)
+		dev_err(&pdev->dev, "could not register clock provider: %s: %d\n", pdev->name, r);
+
+	return r;
+}
+
+static struct platform_driver clk_mt8192_drv = {
+	.probe = clk_mt8192_probe,
+	.driver = {
+		.name = "clk-mt8192",
+		.of_match_table = of_match_clk_mt8192,
+	},
+};
+
+static int __init clk_mt8192_init(void)
+{
+	return platform_driver_register(&clk_mt8192_drv);
+}
+
+arch_initcall(clk_mt8192_init);
diff --git a/drivers/clk/mediatek/clk-mtk.c b/drivers/clk/mediatek/clk-mtk.c
index cec1c8a27211..4b6096c44d74 100644
--- a/drivers/clk/mediatek/clk-mtk.c
+++ b/drivers/clk/mediatek/clk-mtk.c
@@ -13,6 +13,7 @@
 #include <linux/clkdev.h>
 #include <linux/mfd/syscon.h>
 #include <linux/device.h>
+#include <linux/of_device.h>
 
 #include "clk-mtk.h"
 #include "clk-gate.h"
@@ -106,7 +107,7 @@ int mtk_clk_register_gates_with_dev(struct device_node *node,
 	if (!clk_data)
 		return -ENOMEM;
 
-	regmap = syscon_node_to_regmap(node);
+	regmap = device_node_to_regmap(node);
 	if (IS_ERR(regmap)) {
 		pr_err("Cannot find regmap for %pOF: %ld\n", node,
 				PTR_ERR(regmap));
@@ -286,3 +287,25 @@ void mtk_clk_register_dividers(const struct mtk_clk_divider *mcds,
 			clk_data->clks[mcd->id] = clk;
 	}
 }
+
+int mtk_clk_simple_probe(struct platform_device *pdev)
+{
+	const struct mtk_clk_desc *mcd;
+	struct clk_onecell_data *clk_data;
+	struct device_node *node = pdev->dev.of_node;
+	int r;
+
+	mcd = of_device_get_match_data(&pdev->dev);
+	if (!mcd)
+		return -EINVAL;
+
+	clk_data = mtk_alloc_clk_data(mcd->num_clks);
+	if (!clk_data)
+		return -ENOMEM;
+
+	r = mtk_clk_register_gates(node, mcd->clks, mcd->num_clks, clk_data);
+	if (r)
+		return r;
+
+	return of_clk_add_provider(node, of_clk_src_onecell_get, clk_data);
+}
diff --git a/drivers/clk/mediatek/clk-mtk.h b/drivers/clk/mediatek/clk-mtk.h
index c3d6756b0c7e..7de41c3b3206 100644
--- a/drivers/clk/mediatek/clk-mtk.h
+++ b/drivers/clk/mediatek/clk-mtk.h
@@ -10,6 +10,7 @@
 #include <linux/regmap.h>
 #include <linux/bitops.h>
 #include <linux/clk-provider.h>
+#include <linux/platform_device.h>
 
 struct clk;
 struct clk_onecell_data;
@@ -213,13 +214,13 @@ struct mtk_pll_div_table {
 struct mtk_pll_data {
 	int id;
 	const char *name;
-	uint32_t reg;
-	uint32_t pwr_reg;
-	uint32_t en_mask;
-	uint32_t pd_reg;
-	uint32_t tuner_reg;
-	uint32_t tuner_en_reg;
-	uint8_t tuner_en_bit;
+	u32 reg;
+	u32 pwr_reg;
+	u32 en_mask;
+	u32 pd_reg;
+	u32 tuner_reg;
+	u32 tuner_en_reg;
+	u8 tuner_en_bit;
 	int pd_shift;
 	unsigned int flags;
 	const struct clk_ops *ops;
@@ -228,11 +229,13 @@ struct mtk_pll_data {
 	unsigned long fmax;
 	int pcwbits;
 	int pcwibits;
-	uint32_t pcw_reg;
+	u32 pcw_reg;
 	int pcw_shift;
-	uint32_t pcw_chg_reg;
+	u32 pcw_chg_reg;
 	const struct mtk_pll_div_table *div_table;
 	const char *parent_name;
+	u32 en_reg;
+	u8 pll_en_bit; /* Assume 0, indicates BIT(0) by default */
 };
 
 void mtk_clk_register_plls(struct device_node *node,
@@ -248,4 +251,11 @@ void mtk_register_reset_controller(struct device_node *np,
 void mtk_register_reset_controller_set_clr(struct device_node *np,
 	unsigned int num_regs, int regofs);
 
+struct mtk_clk_desc {
+	const struct mtk_gate *clks;
+	size_t num_clks;
+};
+
+int mtk_clk_simple_probe(struct platform_device *pdev);
+
 #endif /* __DRV_CLK_MTK_H */
diff --git a/drivers/clk/mediatek/clk-mux.c b/drivers/clk/mediatek/clk-mux.c
index b0c61709bacc..855b0a1f7eb9 100644
--- a/drivers/clk/mediatek/clk-mux.c
+++ b/drivers/clk/mediatek/clk-mux.c
@@ -116,7 +116,12 @@ static int mtk_clk_mux_set_parent_setclr_lock(struct clk_hw *hw, u8 index)
 	return 0;
 }
 
-static const struct clk_ops mtk_mux_ops = {
+const struct clk_ops mtk_mux_clr_set_upd_ops = {
+	.get_parent = mtk_clk_mux_get_parent,
+	.set_parent = mtk_clk_mux_set_parent_setclr_lock,
+};
+
+const struct clk_ops mtk_mux_gate_clr_set_upd_ops  = {
 	.enable = mtk_clk_mux_enable_setclr,
 	.disable = mtk_clk_mux_disable_setclr,
 	.is_enabled = mtk_clk_mux_is_enabled,
@@ -140,7 +145,7 @@ static struct clk *mtk_clk_register_mux(const struct mtk_mux *mux,
 	init.flags = mux->flags | CLK_SET_RATE_PARENT;
 	init.parent_names = mux->parent_names;
 	init.num_parents = mux->num_parents;
-	init.ops = &mtk_mux_ops;
+	init.ops = mux->ops;
 
 	clk_mux->regmap = regmap;
 	clk_mux->data = mux;
@@ -165,7 +170,7 @@ int mtk_clk_register_muxes(const struct mtk_mux *muxes,
 	struct clk *clk;
 	int i;
 
-	regmap = syscon_node_to_regmap(node);
+	regmap = device_node_to_regmap(node);
 	if (IS_ERR(regmap)) {
 		pr_err("Cannot find regmap for %pOF: %ld\n", node,
 		       PTR_ERR(regmap));
diff --git a/drivers/clk/mediatek/clk-mux.h b/drivers/clk/mediatek/clk-mux.h
index f1946161ade1..27841d649118 100644
--- a/drivers/clk/mediatek/clk-mux.h
+++ b/drivers/clk/mediatek/clk-mux.h
@@ -33,12 +33,13 @@ struct mtk_mux {
 	u8 gate_shift;
 	s8 upd_shift;
 
+	const struct clk_ops *ops;
 	signed char num_parents;
 };
 
 #define GATE_CLR_SET_UPD_FLAGS(_id, _name, _parents, _mux_ofs,		\
 			_mux_set_ofs, _mux_clr_ofs, _shift, _width,	\
-			_gate, _upd_ofs, _upd, _flags) {		\
+			_gate, _upd_ofs, _upd, _flags, _ops) {		\
 		.id = _id,						\
 		.name = _name,						\
 		.mux_ofs = _mux_ofs,					\
@@ -52,14 +53,19 @@ struct mtk_mux {
 		.parent_names = _parents,				\
 		.num_parents = ARRAY_SIZE(_parents),			\
 		.flags = _flags,					\
+		.ops = &_ops,						\
 	}
 
+extern const struct clk_ops mtk_mux_clr_set_upd_ops;
+extern const struct clk_ops mtk_mux_gate_clr_set_upd_ops;
+
 #define MUX_GATE_CLR_SET_UPD_FLAGS(_id, _name, _parents, _mux_ofs,	\
 			_mux_set_ofs, _mux_clr_ofs, _shift, _width,	\
 			_gate, _upd_ofs, _upd, _flags)			\
 		GATE_CLR_SET_UPD_FLAGS(_id, _name, _parents, _mux_ofs,	\
 			_mux_set_ofs, _mux_clr_ofs, _shift, _width,	\
-			_gate, _upd_ofs, _upd, _flags)			\
+			_gate, _upd_ofs, _upd, _flags,			\
+			mtk_mux_gate_clr_set_upd_ops)
 
 #define MUX_GATE_CLR_SET_UPD(_id, _name, _parents, _mux_ofs,		\
 			_mux_set_ofs, _mux_clr_ofs, _shift, _width,	\
@@ -69,6 +75,14 @@ struct mtk_mux {
 			_width, _gate, _upd_ofs, _upd,			\
 			CLK_SET_RATE_PARENT)
 
+#define MUX_CLR_SET_UPD(_id, _name, _parents, _mux_ofs,			\
+			_mux_set_ofs, _mux_clr_ofs, _shift, _width,	\
+			_upd_ofs, _upd)					\
+		GATE_CLR_SET_UPD_FLAGS(_id, _name, _parents, _mux_ofs,	\
+			_mux_set_ofs, _mux_clr_ofs, _shift, _width,	\
+			0, _upd_ofs, _upd, CLK_SET_RATE_PARENT,		\
+			mtk_mux_clr_set_upd_ops)
+
 int mtk_clk_register_muxes(const struct mtk_mux *muxes,
 			   int num, struct device_node *node,
 			   spinlock_t *lock,
diff --git a/drivers/clk/mediatek/clk-pll.c b/drivers/clk/mediatek/clk-pll.c
index f440f2cd0b69..7fb001a4e7d8 100644
--- a/drivers/clk/mediatek/clk-pll.c
+++ b/drivers/clk/mediatek/clk-pll.c
@@ -44,6 +44,7 @@ struct mtk_clk_pll {
 	void __iomem	*tuner_en_addr;
 	void __iomem	*pcw_addr;
 	void __iomem	*pcw_chg_addr;
+	void __iomem	*en_addr;
 	const struct mtk_pll_data *data;
 };
 
@@ -56,7 +57,7 @@ static int mtk_pll_is_prepared(struct clk_hw *hw)
 {
 	struct mtk_clk_pll *pll = to_mtk_clk_pll(hw);
 
-	return (readl(pll->base_addr + REG_CON0) & CON0_BASE_EN) != 0;
+	return (readl(pll->en_addr) & BIT(pll->data->pll_en_bit)) != 0;
 }
 
 static unsigned long __mtk_pll_recalc_rate(struct mtk_clk_pll *pll, u32 fin,
@@ -238,6 +239,7 @@ static int mtk_pll_prepare(struct clk_hw *hw)
 {
 	struct mtk_clk_pll *pll = to_mtk_clk_pll(hw);
 	u32 r;
+	u32 div_en_mask;
 
 	r = readl(pll->pwr_addr) | CON0_PWR_ON;
 	writel(r, pll->pwr_addr);
@@ -247,9 +249,14 @@ static int mtk_pll_prepare(struct clk_hw *hw)
 	writel(r, pll->pwr_addr);
 	udelay(1);
 
-	r = readl(pll->base_addr + REG_CON0);
-	r |= pll->data->en_mask;
-	writel(r, pll->base_addr + REG_CON0);
+	r = readl(pll->en_addr) | BIT(pll->data->pll_en_bit);
+	writel(r, pll->en_addr);
+
+	div_en_mask = pll->data->en_mask & ~CON0_BASE_EN;
+	if (div_en_mask) {
+		r = readl(pll->base_addr + REG_CON0) | div_en_mask;
+		writel(r, pll->base_addr + REG_CON0);
+	}
 
 	__mtk_pll_tuner_enable(pll);
 
@@ -268,6 +275,7 @@ static void mtk_pll_unprepare(struct clk_hw *hw)
 {
 	struct mtk_clk_pll *pll = to_mtk_clk_pll(hw);
 	u32 r;
+	u32 div_en_mask;
 
 	if (pll->data->flags & HAVE_RST_BAR) {
 		r = readl(pll->base_addr + REG_CON0);
@@ -277,9 +285,14 @@ static void mtk_pll_unprepare(struct clk_hw *hw)
 
 	__mtk_pll_tuner_disable(pll);
 
-	r = readl(pll->base_addr + REG_CON0);
-	r &= ~CON0_BASE_EN;
-	writel(r, pll->base_addr + REG_CON0);
+	div_en_mask = pll->data->en_mask & ~CON0_BASE_EN;
+	if (div_en_mask) {
+		r = readl(pll->base_addr + REG_CON0) & ~div_en_mask;
+		writel(r, pll->base_addr + REG_CON0);
+	}
+
+	r = readl(pll->en_addr) & ~BIT(pll->data->pll_en_bit);
+	writel(r, pll->en_addr);
 
 	r = readl(pll->pwr_addr) | CON0_ISO_EN;
 	writel(r, pll->pwr_addr);
@@ -321,6 +334,10 @@ static struct clk *mtk_clk_register_pll(const struct mtk_pll_data *data,
 		pll->tuner_addr = base + data->tuner_reg;
 	if (data->tuner_en_reg)
 		pll->tuner_en_addr = base + data->tuner_en_reg;
+	if (data->en_reg)
+		pll->en_addr = base + data->en_reg;
+	else
+		pll->en_addr = pll->base_addr + REG_CON0;
 	pll->hw.init = &init;
 	pll->data = data;
 
diff --git a/drivers/clk/mediatek/reset.c b/drivers/clk/mediatek/reset.c
index cb939c071b0c..e562dc3c10a4 100644
--- a/drivers/clk/mediatek/reset.c
+++ b/drivers/clk/mediatek/reset.c
@@ -98,7 +98,7 @@ static void mtk_register_reset_controller_common(struct device_node *np,
 	int ret;
 	struct regmap *regmap;
 
-	regmap = syscon_node_to_regmap(np);
+	regmap = device_node_to_regmap(np);
 	if (IS_ERR(regmap)) {
 		pr_err("Cannot find regmap for %pOF: %ld\n", np,
 				PTR_ERR(regmap));
diff --git a/drivers/clk/mvebu/kirkwood.c b/drivers/clk/mvebu/kirkwood.c
index 47680237d0be..8bc893df4736 100644
--- a/drivers/clk/mvebu/kirkwood.c
+++ b/drivers/clk/mvebu/kirkwood.c
@@ -265,6 +265,7 @@ static const char *powersave_parents[] = {
 static const struct clk_muxing_soc_desc kirkwood_mux_desc[] __initconst = {
 	{ "powersave", powersave_parents, ARRAY_SIZE(powersave_parents),
 		11, 1, 0 },
+	{ }
 };
 
 static struct clk *clk_muxing_get_src(
diff --git a/drivers/clk/pistachio/Kconfig b/drivers/clk/pistachio/Kconfig
new file mode 100644
index 000000000000..d00f7b4a25fc
--- /dev/null
+++ b/drivers/clk/pistachio/Kconfig
@@ -0,0 +1,8 @@
+# SPDX-License-Identifier: GPL-2.0
+
+config COMMON_CLK_PISTACHIO
+	bool "Support for IMG Pistachio SoC clock controllers"
+	depends on MIPS || COMPILE_TEST
+	help
+	  Support for the IMG Pistachio SoC clock controller.
+	  Say Y if you want to include clock support.
diff --git a/drivers/clk/qcom/Kconfig b/drivers/clk/qcom/Kconfig
index 62e00e15495c..0a5596797b93 100644
--- a/drivers/clk/qcom/Kconfig
+++ b/drivers/clk/qcom/Kconfig
@@ -240,6 +240,14 @@ config MSM_MMCC_8960
 	  Say Y if you want to support multimedia devices such as display,
 	  graphics, video encode/decode, camera, etc.
 
+config MSM_GCC_8953
+	tristate "MSM8953 Global Clock Controller"
+	select QCOM_GDSC
+	help
+	  Support for the global clock controller on msm8953 devices.
+	  Say Y if you want to use devices such as UART, SPI i2c, USB,
+	  SD/eMMC, display, graphics, camera etc.
+
 config MSM_GCC_8974
 	tristate "MSM8974 Global Clock Controller"
 	select QCOM_GDSC
@@ -257,6 +265,15 @@ config MSM_MMCC_8974
 	  Say Y if you want to support multimedia devices such as display,
 	  graphics, video encode/decode, camera, etc.
 
+config MSM_MMCC_8994
+	tristate "MSM8994 Multimedia Clock Controller"
+	select MSM_GCC_8994
+	select QCOM_GDSC
+	help
+	  Support for the multimedia clock controller on msm8994 devices.
+	  Say Y if you want to support multimedia devices such as display,
+	  graphics, video encode/decode, camera, etc.
+
 config MSM_GCC_8994
 	tristate "MSM8994 Global Clock Controller"
 	help
@@ -332,6 +349,15 @@ config SC_DISPCC_7180
 	  Say Y if you want to support display devices and functionality such as
 	  splash screen.
 
+config SC_DISPCC_7280
+	tristate "SC7280 Display Clock Controller"
+	select SC_GCC_7280
+	help
+	  Support for the display clock controller on Qualcomm Technologies, Inc.
+	  SC7280 devices.
+	  Say Y if you want to support display devices and functionality such as
+	  splash screen.
+
 config SC_GCC_7180
 	tristate "SC7180 Global Clock Controller"
 	select QCOM_GDSC
@@ -376,6 +402,14 @@ config SC_GPUCC_7180
 	  Say Y if you want to support graphics controller devices and
 	  functionality such as 3D graphics.
 
+config SC_GPUCC_7280
+	tristate "SC7280 Graphics Clock Controller"
+	select SC_GCC_7280
+	help
+	  Support for the graphics clock controller on SC7280 devices.
+	  Say Y if you want to support graphics controller devices and
+	  functionality such as 3D graphics.
+
 config SC_MSS_7180
 	tristate "SC7180 Modem Clock Controller"
 	select SC_GCC_7180
@@ -393,6 +427,14 @@ config SC_VIDEOCC_7180
 	  Say Y if you want to support video devices and functionality such as
 	  video encode and decode.
 
+config SC_VIDEOCC_7280
+	tristate "SC7280 Video Clock Controller"
+	select SC_GCC_7280
+	help
+	  Support for the video clock controller on SC7280 devices.
+	  Say Y if you want to support video devices and functionality such as
+	  video encode and decode.
+
 config SDM_CAMCC_845
 	tristate "SDM845 Camera Clock Controller"
 	select SDM_GCC_845
@@ -506,6 +548,13 @@ config SM_DISPCC_8250
 	  Say Y if you want to support display devices and functionality such as
 	  splash screen.
 
+config SM_GCC_6115
+	tristate "SM6115 and SM4250 Global Clock Controller"
+	help
+	  Support for the global clock controller on SM6115 and SM4250 devices.
+	  Say Y if you want to use peripheral devices such as UART, SPI,
+	  i2C, USB, UFS, SDDC, PCIe, etc.
+
 config SM_GCC_6125
 	tristate "SM6125 Global Clock Controller"
 	help
@@ -513,6 +562,13 @@ config SM_GCC_6125
 	  Say Y if you want to use peripheral devices such as UART,
 	  SPI, I2C, USB, SD/UFS, PCIe etc.
 
+config SM_GCC_6350
+	tristate "SM6350 Global Clock Controller"
+	help
+	  Support for the global clock controller on SM6350 devices.
+	  Say Y if you want to use peripheral devices such as UART,
+	  SPI, I2C, USB, SD/UFS, PCIe etc.
+
 config SM_GCC_8150
 	tristate "SM8150 Global Clock Controller"
 	help
@@ -554,7 +610,7 @@ config SM_GPUCC_8250
 
 config SM_VIDEOCC_8150
 	tristate "SM8150 Video Clock Controller"
-	select SDM_GCC_8150
+	select SM_GCC_8150
 	select QCOM_GDSC
 	help
 	  Support for the video clock controller on SM8150 devices.
@@ -563,7 +619,7 @@ config SM_VIDEOCC_8150
 
 config SM_VIDEOCC_8250
 	tristate "SM8250 Video Clock Controller"
-	select SDM_GCC_8250
+	select SM_GCC_8250
 	select QCOM_GDSC
 	help
 	  Support for the video clock controller on SM8250 devices.
diff --git a/drivers/clk/qcom/Makefile b/drivers/clk/qcom/Makefile
index c2a1cafb31bc..9825ef843f4a 100644
--- a/drivers/clk/qcom/Makefile
+++ b/drivers/clk/qcom/Makefile
@@ -33,6 +33,7 @@ obj-$(CONFIG_MDM_LCC_9615) += lcc-mdm9615.o
 obj-$(CONFIG_MSM_GCC_8660) += gcc-msm8660.o
 obj-$(CONFIG_MSM_GCC_8916) += gcc-msm8916.o
 obj-$(CONFIG_MSM_GCC_8939) += gcc-msm8939.o
+obj-$(CONFIG_MSM_GCC_8953) += gcc-msm8953.o
 obj-$(CONFIG_MSM_GCC_8960) += gcc-msm8960.o
 obj-$(CONFIG_MSM_GCC_8974) += gcc-msm8974.o
 obj-$(CONFIG_MSM_GCC_8994) += gcc-msm8994.o
@@ -42,6 +43,7 @@ obj-$(CONFIG_MSM_GCC_8998) += gcc-msm8998.o
 obj-$(CONFIG_MSM_GPUCC_8998) += gpucc-msm8998.o
 obj-$(CONFIG_MSM_MMCC_8960) += mmcc-msm8960.o
 obj-$(CONFIG_MSM_MMCC_8974) += mmcc-msm8974.o
+obj-$(CONFIG_MSM_MMCC_8994) += mmcc-msm8994.o
 obj-$(CONFIG_MSM_MMCC_8996) += mmcc-msm8996.o
 obj-$(CONFIG_MSM_MMCC_8998) += mmcc-msm8998.o
 obj-$(CONFIG_QCOM_A53PLL) += a53-pll.o
@@ -57,13 +59,16 @@ obj-$(CONFIG_QCS_Q6SSTOP_404) += q6sstop-qcs404.o
 obj-$(CONFIG_QCS_TURING_404) += turingcc-qcs404.o
 obj-$(CONFIG_SC_CAMCC_7180) += camcc-sc7180.o
 obj-$(CONFIG_SC_DISPCC_7180) += dispcc-sc7180.o
+obj-$(CONFIG_SC_DISPCC_7280) += dispcc-sc7280.o
 obj-$(CONFIG_SC_GCC_7180) += gcc-sc7180.o
 obj-$(CONFIG_SC_GCC_7280) += gcc-sc7280.o
 obj-$(CONFIG_SC_GCC_8180X) += gcc-sc8180x.o
 obj-$(CONFIG_SC_GPUCC_7180) += gpucc-sc7180.o
+obj-$(CONFIG_SC_GPUCC_7280) += gpucc-sc7280.o
 obj-$(CONFIG_SC_LPASS_CORECC_7180) += lpasscorecc-sc7180.o
 obj-$(CONFIG_SC_MSS_7180) += mss-sc7180.o
 obj-$(CONFIG_SC_VIDEOCC_7180) += videocc-sc7180.o
+obj-$(CONFIG_SC_VIDEOCC_7280) += videocc-sc7280.o
 obj-$(CONFIG_SDM_CAMCC_845) += camcc-sdm845.o
 obj-$(CONFIG_SDM_DISPCC_845) += dispcc-sdm845.o
 obj-$(CONFIG_SDM_GCC_660) += gcc-sdm660.o
@@ -76,7 +81,9 @@ obj-$(CONFIG_SDM_VIDEOCC_845) += videocc-sdm845.o
 obj-$(CONFIG_SDX_GCC_55) += gcc-sdx55.o
 obj-$(CONFIG_SM_CAMCC_8250) += camcc-sm8250.o
 obj-$(CONFIG_SM_DISPCC_8250) += dispcc-sm8250.o
+obj-$(CONFIG_SM_GCC_6115) += gcc-sm6115.o
 obj-$(CONFIG_SM_GCC_6125) += gcc-sm6125.o
+obj-$(CONFIG_SM_GCC_6350) += gcc-sm6350.o
 obj-$(CONFIG_SM_GCC_8150) += gcc-sm8150.o
 obj-$(CONFIG_SM_GCC_8250) += gcc-sm8250.o
 obj-$(CONFIG_SM_GCC_8350) += gcc-sm8350.o
diff --git a/drivers/clk/qcom/a53-pll.c b/drivers/clk/qcom/a53-pll.c
index af6ac17c7dae..9e6decb9c26f 100644
--- a/drivers/clk/qcom/a53-pll.c
+++ b/drivers/clk/qcom/a53-pll.c
@@ -6,9 +6,11 @@
  * Author: Georgi Djakov <georgi.djakov@linaro.org>
  */
 
+#include <linux/clk.h>
 #include <linux/clk-provider.h>
 #include <linux/kernel.h>
 #include <linux/platform_device.h>
+#include <linux/pm_opp.h>
 #include <linux/regmap.h>
 #include <linux/module.h>
 
@@ -34,9 +36,59 @@ static const struct regmap_config a53pll_regmap_config = {
 	.fast_io		= true,
 };
 
+static struct pll_freq_tbl *qcom_a53pll_get_freq_tbl(struct device *dev)
+{
+	struct pll_freq_tbl *freq_tbl;
+	unsigned long xo_freq;
+	unsigned long freq;
+	struct clk *xo_clk;
+	int count;
+	int ret;
+	int i;
+
+	xo_clk = devm_clk_get(dev, "xo");
+	if (IS_ERR(xo_clk))
+		return NULL;
+
+	xo_freq = clk_get_rate(xo_clk);
+
+	ret = devm_pm_opp_of_add_table(dev);
+	if (ret)
+		return NULL;
+
+	count = dev_pm_opp_get_opp_count(dev);
+	if (count <= 0)
+		return NULL;
+
+	freq_tbl = devm_kcalloc(dev, count + 1, sizeof(*freq_tbl), GFP_KERNEL);
+	if (!freq_tbl)
+		return NULL;
+
+	for (i = 0, freq = 0; i < count; i++, freq++) {
+		struct dev_pm_opp *opp;
+
+		opp = dev_pm_opp_find_freq_ceil(dev, &freq);
+		if (IS_ERR(opp))
+			return NULL;
+
+		/* Skip the freq that is not divisible */
+		if (freq % xo_freq)
+			continue;
+
+		freq_tbl[i].freq = freq;
+		freq_tbl[i].l = freq / xo_freq;
+		freq_tbl[i].n = 1;
+
+		dev_pm_opp_put(opp);
+	}
+
+	return freq_tbl;
+}
+
 static int qcom_a53pll_probe(struct platform_device *pdev)
 {
 	struct device *dev = &pdev->dev;
+	struct device_node *np = dev->of_node;
 	struct regmap *regmap;
 	struct resource *res;
 	struct clk_pll *pll;
@@ -64,13 +116,22 @@ static int qcom_a53pll_probe(struct platform_device *pdev)
 	pll->mode_reg = 0x00;
 	pll->status_reg = 0x1c;
 	pll->status_bit = 16;
-	pll->freq_tbl = a53pll_freq;
 
-	init.name = "a53pll";
+	pll->freq_tbl = qcom_a53pll_get_freq_tbl(dev);
+	if (!pll->freq_tbl) {
+		/* Fall on a53pll_freq if no freq_tbl is found from OPP */
+		pll->freq_tbl = a53pll_freq;
+	}
+
+	/* Use an unique name by appending @unit-address */
+	init.name = devm_kasprintf(dev, GFP_KERNEL, "a53pll%s",
+				   strchrnul(np->full_name, '@'));
+	if (!init.name)
+		return -ENOMEM;
+
 	init.parent_names = (const char *[]){ "xo" };
 	init.num_parents = 1;
 	init.ops = &clk_pll_sr2_ops;
-	init.flags = CLK_IS_CRITICAL;
 	pll->clkr.hw.init = &init;
 
 	ret = devm_clk_register_regmap(dev, &pll->clkr);
@@ -91,6 +152,7 @@ static int qcom_a53pll_probe(struct platform_device *pdev)
 
 static const struct of_device_id qcom_a53pll_match_table[] = {
 	{ .compatible = "qcom,msm8916-a53pll" },
+	{ .compatible = "qcom,msm8939-a53pll" },
 	{ }
 };
 MODULE_DEVICE_TABLE(of, qcom_a53pll_match_table);
diff --git a/drivers/clk/qcom/apcs-msm8916.c b/drivers/clk/qcom/apcs-msm8916.c
index cf69a97d0439..89e0730810ac 100644
--- a/drivers/clk/qcom/apcs-msm8916.c
+++ b/drivers/clk/qcom/apcs-msm8916.c
@@ -46,6 +46,7 @@ static int qcom_apcs_msm8916_clk_probe(struct platform_device *pdev)
 {
 	struct device *dev = &pdev->dev;
 	struct device *parent = dev->parent;
+	struct device_node *np = parent->of_node;
 	struct clk_regmap_mux_div *a53cc;
 	struct regmap *regmap;
 	struct clk_init_data init = { };
@@ -61,11 +62,16 @@ static int qcom_apcs_msm8916_clk_probe(struct platform_device *pdev)
 	if (!a53cc)
 		return -ENOMEM;
 
-	init.name = "a53mux";
+	/* Use an unique name by appending parent's @unit-address */
+	init.name = devm_kasprintf(dev, GFP_KERNEL, "a53mux%s",
+				   strchrnul(np->full_name, '@'));
+	if (!init.name)
+		return -ENOMEM;
+
 	init.parent_data = pdata;
 	init.num_parents = ARRAY_SIZE(pdata);
 	init.ops = &clk_regmap_mux_div_ops;
-	init.flags = CLK_SET_RATE_PARENT;
+	init.flags = CLK_IS_CRITICAL | CLK_SET_RATE_PARENT;
 
 	a53cc->clkr.hw.init = &init;
 	a53cc->clkr.regmap = regmap;
diff --git a/drivers/clk/qcom/camcc-sc7180.c b/drivers/clk/qcom/camcc-sc7180.c
index 9bcf2f8ed4de..ce73ee9037cb 100644
--- a/drivers/clk/qcom/camcc-sc7180.c
+++ b/drivers/clk/qcom/camcc-sc7180.c
@@ -1652,32 +1652,35 @@ static int cam_cc_sc7180_probe(struct platform_device *pdev)
 	struct regmap *regmap;
 	int ret;
 
-	pm_runtime_enable(&pdev->dev);
-	ret = pm_clk_create(&pdev->dev);
+	ret = devm_pm_runtime_enable(&pdev->dev);
+	if (ret < 0)
+		return ret;
+
+	ret = devm_pm_clk_create(&pdev->dev);
 	if (ret < 0)
 		return ret;
 
 	ret = pm_clk_add(&pdev->dev, "xo");
 	if (ret < 0) {
 		dev_err(&pdev->dev, "Failed to acquire XO clock\n");
-		goto disable_pm_runtime;
+		return ret;
 	}
 
 	ret = pm_clk_add(&pdev->dev, "iface");
 	if (ret < 0) {
 		dev_err(&pdev->dev, "Failed to acquire iface clock\n");
-		goto disable_pm_runtime;
+		return ret;
 	}
 
 	ret = pm_runtime_get(&pdev->dev);
 	if (ret)
-		goto destroy_pm_clk;
+		return ret;
 
 	regmap = qcom_cc_map(pdev, &cam_cc_sc7180_desc);
 	if (IS_ERR(regmap)) {
 		ret = PTR_ERR(regmap);
 		pm_runtime_put(&pdev->dev);
-		goto destroy_pm_clk;
+		return ret;
 	}
 
 	clk_fabia_pll_configure(&cam_cc_pll0, regmap, &cam_cc_pll0_config);
@@ -1689,18 +1692,10 @@ static int cam_cc_sc7180_probe(struct platform_device *pdev)
 	pm_runtime_put(&pdev->dev);
 	if (ret < 0) {
 		dev_err(&pdev->dev, "Failed to register CAM CC clocks\n");
-		goto destroy_pm_clk;
+		return ret;
 	}
 
 	return 0;
-
-destroy_pm_clk:
-	pm_clk_destroy(&pdev->dev);
-
-disable_pm_runtime:
-	pm_runtime_disable(&pdev->dev);
-
-	return ret;
 }
 
 static const struct dev_pm_ops cam_cc_pm_ops = {
diff --git a/drivers/clk/qcom/clk-rpmh.c b/drivers/clk/qcom/clk-rpmh.c
index 552d1cbfea4c..441d7a20e6f3 100644
--- a/drivers/clk/qcom/clk-rpmh.c
+++ b/drivers/clk/qcom/clk-rpmh.c
@@ -536,6 +536,26 @@ static const struct clk_rpmh_desc clk_rpmh_sc7280 = {
 	.num_clks = ARRAY_SIZE(sc7280_rpmh_clocks),
 };
 
+DEFINE_CLK_RPMH_VRM(sm6350, ln_bb_clk2, ln_bb_clk2_ao, "lnbclkg2", 4);
+DEFINE_CLK_RPMH_VRM(sm6350, ln_bb_clk3, ln_bb_clk3_ao, "lnbclkg3", 4);
+DEFINE_CLK_RPMH_ARC(sm6350, qlink, qlink_ao, "qphy.lvl", 0x1, 4);
+
+static struct clk_hw *sm6350_rpmh_clocks[] = {
+	[RPMH_CXO_CLK]		= &sc7280_bi_tcxo.hw,
+	[RPMH_CXO_CLK_A]	= &sc7280_bi_tcxo_ao.hw,
+	[RPMH_LN_BB_CLK2]	= &sm6350_ln_bb_clk2.hw,
+	[RPMH_LN_BB_CLK2_A]	= &sm6350_ln_bb_clk2_ao.hw,
+	[RPMH_LN_BB_CLK3]	= &sm6350_ln_bb_clk3.hw,
+	[RPMH_LN_BB_CLK3_A]	= &sm6350_ln_bb_clk3_ao.hw,
+	[RPMH_QLINK_CLK]	= &sm6350_qlink.hw,
+	[RPMH_QLINK_CLK_A]	= &sm6350_qlink_ao.hw,
+};
+
+static const struct clk_rpmh_desc clk_rpmh_sm6350 = {
+	.clks = sm6350_rpmh_clocks,
+	.num_clks = ARRAY_SIZE(sm6350_rpmh_clocks),
+};
+
 static struct clk_hw *of_clk_rpmh_hw_get(struct of_phandle_args *clkspec,
 					 void *data)
 {
@@ -623,6 +643,7 @@ static const struct of_device_id clk_rpmh_match_table[] = {
 	{ .compatible = "qcom,sc8180x-rpmh-clk", .data = &clk_rpmh_sc8180x},
 	{ .compatible = "qcom,sdm845-rpmh-clk", .data = &clk_rpmh_sdm845},
 	{ .compatible = "qcom,sdx55-rpmh-clk",  .data = &clk_rpmh_sdx55},
+	{ .compatible = "qcom,sm6350-rpmh-clk", .data = &clk_rpmh_sm6350},
 	{ .compatible = "qcom,sm8150-rpmh-clk", .data = &clk_rpmh_sm8150},
 	{ .compatible = "qcom,sm8250-rpmh-clk", .data = &clk_rpmh_sm8250},
 	{ .compatible = "qcom,sm8350-rpmh-clk", .data = &clk_rpmh_sm8350},
diff --git a/drivers/clk/qcom/clk-smd-rpm.c b/drivers/clk/qcom/clk-smd-rpm.c
index b2c142f3a649..66d7807ee38e 100644
--- a/drivers/clk/qcom/clk-smd-rpm.c
+++ b/drivers/clk/qcom/clk-smd-rpm.c
@@ -913,10 +913,166 @@ static const struct rpm_smd_clk_desc rpm_clk_sdm660 = {
 	.num_clks = ARRAY_SIZE(sdm660_clks),
 };
 
+static struct clk_smd_rpm *mdm9607_clks[] = {
+	[RPM_SMD_XO_CLK_SRC]		= &sdm660_bi_tcxo,
+	[RPM_SMD_XO_A_CLK_SRC]		= &sdm660_bi_tcxo_a,
+	[RPM_SMD_PCNOC_CLK]		= &msm8916_pcnoc_clk,
+	[RPM_SMD_PCNOC_A_CLK]		= &msm8916_pcnoc_a_clk,
+	[RPM_SMD_BIMC_CLK]		= &msm8916_bimc_clk,
+	[RPM_SMD_BIMC_A_CLK]		= &msm8916_bimc_a_clk,
+	[RPM_SMD_QPIC_CLK]		= &qcs404_qpic_clk,
+	[RPM_SMD_QPIC_CLK_A]		= &qcs404_qpic_a_clk,
+	[RPM_SMD_QDSS_CLK]		= &msm8916_qdss_clk,
+	[RPM_SMD_QDSS_A_CLK]		= &msm8916_qdss_a_clk,
+	[RPM_SMD_BB_CLK1]		= &msm8916_bb_clk1,
+	[RPM_SMD_BB_CLK1_A]		= &msm8916_bb_clk1_a,
+	[RPM_SMD_BB_CLK1_PIN]		= &msm8916_bb_clk1_pin,
+	[RPM_SMD_BB_CLK1_A_PIN]		= &msm8916_bb_clk1_a_pin,
+};
+
+static const struct rpm_smd_clk_desc rpm_clk_mdm9607 = {
+	.clks = mdm9607_clks,
+	.num_clks = ARRAY_SIZE(mdm9607_clks),
+};
+
+static struct clk_smd_rpm *msm8953_clks[] = {
+	[RPM_SMD_XO_CLK_SRC]		= &sdm660_bi_tcxo,
+	[RPM_SMD_XO_A_CLK_SRC]		= &sdm660_bi_tcxo_a,
+	[RPM_SMD_PCNOC_CLK]		= &msm8916_pcnoc_clk,
+	[RPM_SMD_PCNOC_A_CLK]		= &msm8916_pcnoc_a_clk,
+	[RPM_SMD_SNOC_CLK]		= &msm8916_snoc_clk,
+	[RPM_SMD_SNOC_A_CLK]		= &msm8916_snoc_a_clk,
+	[RPM_SMD_BIMC_CLK]		= &msm8916_bimc_clk,
+	[RPM_SMD_BIMC_A_CLK]		= &msm8916_bimc_a_clk,
+	[RPM_SMD_IPA_CLK]		= &msm8976_ipa_clk,
+	[RPM_SMD_IPA_A_CLK]		= &msm8976_ipa_a_clk,
+	[RPM_SMD_SYSMMNOC_CLK]		= &msm8936_sysmmnoc_clk,
+	[RPM_SMD_SYSMMNOC_A_CLK]	= &msm8936_sysmmnoc_a_clk,
+	[RPM_SMD_QDSS_CLK]		= &msm8916_qdss_clk,
+	[RPM_SMD_QDSS_A_CLK]		= &msm8916_qdss_a_clk,
+	[RPM_SMD_BB_CLK1]		= &msm8916_bb_clk1,
+	[RPM_SMD_BB_CLK1_A]		= &msm8916_bb_clk1_a,
+	[RPM_SMD_BB_CLK2]		= &msm8916_bb_clk2,
+	[RPM_SMD_BB_CLK2_A]		= &msm8916_bb_clk2_a,
+	[RPM_SMD_RF_CLK2]		= &msm8916_rf_clk2,
+	[RPM_SMD_RF_CLK2_A]		= &msm8916_rf_clk2_a,
+	[RPM_SMD_RF_CLK3]		= &msm8992_ln_bb_clk,
+	[RPM_SMD_RF_CLK3_A]		= &msm8992_ln_bb_a_clk,
+	[RPM_SMD_DIV_CLK2]		= &msm8974_div_clk2,
+	[RPM_SMD_DIV_A_CLK2]		= &msm8974_div_a_clk2,
+	[RPM_SMD_BB_CLK1_PIN]		= &msm8916_bb_clk1_pin,
+	[RPM_SMD_BB_CLK1_A_PIN]		= &msm8916_bb_clk1_a_pin,
+	[RPM_SMD_BB_CLK2_PIN]		= &msm8916_bb_clk2_pin,
+	[RPM_SMD_BB_CLK2_A_PIN]		= &msm8916_bb_clk2_a_pin,
+};
+
+static const struct rpm_smd_clk_desc rpm_clk_msm8953 = {
+	.clks = msm8953_clks,
+	.num_clks = ARRAY_SIZE(msm8953_clks),
+};
+
+/* SM6125 */
+DEFINE_CLK_SMD_RPM(sm6125, cnoc_clk, cnoc_a_clk, QCOM_SMD_RPM_BUS_CLK, 1);
+DEFINE_CLK_SMD_RPM(sm6125, snoc_clk, snoc_a_clk, QCOM_SMD_RPM_BUS_CLK, 2);
+DEFINE_CLK_SMD_RPM_BRANCH(sm6125, qdss_clk, qdss_a_clk,
+					QCOM_SMD_RPM_MISC_CLK, 1, 19200000);
+DEFINE_CLK_SMD_RPM(sm6125, qup_clk, qup_a_clk, QCOM_SMD_RPM_QUP_CLK, 0);
+DEFINE_CLK_SMD_RPM(sm6125, mmnrt_clk, mmnrt_a_clk, QCOM_SMD_RPM_MMAXI_CLK, 0);
+DEFINE_CLK_SMD_RPM(sm6125, mmrt_clk, mmrt_a_clk, QCOM_SMD_RPM_MMAXI_CLK, 1);
+DEFINE_CLK_SMD_RPM(sm6125, snoc_periph_clk, snoc_periph_a_clk,
+						QCOM_SMD_RPM_BUS_CLK, 0);
+DEFINE_CLK_SMD_RPM(sm6125, snoc_lpass_clk, snoc_lpass_a_clk,
+						QCOM_SMD_RPM_BUS_CLK, 5);
+
+static struct clk_smd_rpm *sm6125_clks[] = {
+	[RPM_SMD_XO_CLK_SRC] = &sdm660_bi_tcxo,
+	[RPM_SMD_XO_A_CLK_SRC] = &sdm660_bi_tcxo_a,
+	[RPM_SMD_SNOC_CLK] = &sm6125_snoc_clk,
+	[RPM_SMD_SNOC_A_CLK] = &sm6125_snoc_a_clk,
+	[RPM_SMD_BIMC_CLK] = &msm8916_bimc_clk,
+	[RPM_SMD_BIMC_A_CLK] = &msm8916_bimc_a_clk,
+	[RPM_SMD_QDSS_CLK] = &sm6125_qdss_clk,
+	[RPM_SMD_QDSS_A_CLK] = &sm6125_qdss_a_clk,
+	[RPM_SMD_RF_CLK1] = &msm8916_rf_clk1,
+	[RPM_SMD_RF_CLK1_A] = &msm8916_rf_clk1_a,
+	[RPM_SMD_RF_CLK2] = &msm8916_rf_clk2,
+	[RPM_SMD_RF_CLK2_A] = &msm8916_rf_clk2_a,
+	[RPM_SMD_CNOC_CLK] = &sm6125_cnoc_clk,
+	[RPM_SMD_CNOC_A_CLK] = &sm6125_cnoc_a_clk,
+	[RPM_SMD_IPA_CLK] = &msm8976_ipa_clk,
+	[RPM_SMD_IPA_A_CLK] = &msm8976_ipa_a_clk,
+	[RPM_SMD_CE1_CLK] = &msm8992_ce1_clk,
+	[RPM_SMD_CE1_A_CLK] = &msm8992_ce1_a_clk,
+	[RPM_SMD_LN_BB_CLK1] = &msm8916_bb_clk1,
+	[RPM_SMD_LN_BB_CLK1_A] = &msm8916_bb_clk1_a,
+	[RPM_SMD_LN_BB_CLK2] = &msm8916_bb_clk2,
+	[RPM_SMD_LN_BB_CLK2_A] = &msm8916_bb_clk2_a,
+	[RPM_SMD_LN_BB_CLK3] = &sdm660_ln_bb_clk3,
+	[RPM_SMD_LN_BB_CLK3_A] = &sdm660_ln_bb_clk3_a,
+	[RPM_SMD_QUP_CLK] = &sm6125_qup_clk,
+	[RPM_SMD_QUP_A_CLK] = &sm6125_qup_a_clk,
+	[RPM_SMD_MMRT_CLK] = &sm6125_mmrt_clk,
+	[RPM_SMD_MMRT_A_CLK] = &sm6125_mmrt_a_clk,
+	[RPM_SMD_MMNRT_CLK] = &sm6125_mmnrt_clk,
+	[RPM_SMD_MMNRT_A_CLK] = &sm6125_mmnrt_a_clk,
+	[RPM_SMD_SNOC_PERIPH_CLK] = &sm6125_snoc_periph_clk,
+	[RPM_SMD_SNOC_PERIPH_A_CLK] = &sm6125_snoc_periph_a_clk,
+	[RPM_SMD_SNOC_LPASS_CLK] = &sm6125_snoc_lpass_clk,
+	[RPM_SMD_SNOC_LPASS_A_CLK] = &sm6125_snoc_lpass_a_clk,
+};
+
+static const struct rpm_smd_clk_desc rpm_clk_sm6125 = {
+	.clks = sm6125_clks,
+	.num_clks = ARRAY_SIZE(sm6125_clks),
+};
+
+/* SM6115 */
+static struct clk_smd_rpm *sm6115_clks[] = {
+	[RPM_SMD_XO_CLK_SRC] = &sdm660_bi_tcxo,
+	[RPM_SMD_XO_A_CLK_SRC] = &sdm660_bi_tcxo_a,
+	[RPM_SMD_SNOC_CLK] = &sm6125_snoc_clk,
+	[RPM_SMD_SNOC_A_CLK] = &sm6125_snoc_a_clk,
+	[RPM_SMD_BIMC_CLK] = &msm8916_bimc_clk,
+	[RPM_SMD_BIMC_A_CLK] = &msm8916_bimc_a_clk,
+	[RPM_SMD_QDSS_CLK] = &sm6125_qdss_clk,
+	[RPM_SMD_QDSS_A_CLK] = &sm6125_qdss_a_clk,
+	[RPM_SMD_RF_CLK1] = &msm8916_rf_clk1,
+	[RPM_SMD_RF_CLK1_A] = &msm8916_rf_clk1_a,
+	[RPM_SMD_RF_CLK2] = &msm8916_rf_clk2,
+	[RPM_SMD_RF_CLK2_A] = &msm8916_rf_clk2_a,
+	[RPM_SMD_CNOC_CLK] = &sm6125_cnoc_clk,
+	[RPM_SMD_CNOC_A_CLK] = &sm6125_cnoc_a_clk,
+	[RPM_SMD_IPA_CLK] = &msm8976_ipa_clk,
+	[RPM_SMD_IPA_A_CLK] = &msm8976_ipa_a_clk,
+	[RPM_SMD_CE1_CLK] = &msm8992_ce1_clk,
+	[RPM_SMD_CE1_A_CLK] = &msm8992_ce1_a_clk,
+	[RPM_SMD_QUP_CLK] = &sm6125_qup_clk,
+	[RPM_SMD_QUP_A_CLK] = &sm6125_qup_a_clk,
+	[RPM_SMD_MMRT_CLK] = &sm6125_mmrt_clk,
+	[RPM_SMD_MMRT_A_CLK] = &sm6125_mmrt_a_clk,
+	[RPM_SMD_MMNRT_CLK] = &sm6125_mmnrt_clk,
+	[RPM_SMD_MMNRT_A_CLK] = &sm6125_mmnrt_a_clk,
+	[RPM_SMD_SNOC_PERIPH_CLK] = &sm6125_snoc_periph_clk,
+	[RPM_SMD_SNOC_PERIPH_A_CLK] = &sm6125_snoc_periph_a_clk,
+	[RPM_SMD_SNOC_LPASS_CLK] = &sm6125_snoc_lpass_clk,
+	[RPM_SMD_SNOC_LPASS_A_CLK] = &sm6125_snoc_lpass_a_clk,
+	[RPM_SMD_RF_CLK1_PIN] = &msm8916_rf_clk1_pin,
+	[RPM_SMD_RF_CLK1_A_PIN] = &msm8916_rf_clk1_a_pin,
+	[RPM_SMD_RF_CLK2_PIN] = &msm8916_rf_clk2_pin,
+	[RPM_SMD_RF_CLK2_A_PIN] = &msm8916_rf_clk2_a_pin,
+};
+
+static const struct rpm_smd_clk_desc rpm_clk_sm6115 = {
+	.clks = sm6115_clks,
+	.num_clks = ARRAY_SIZE(sm6115_clks),
+};
+
 static const struct of_device_id rpm_smd_clk_match_table[] = {
+	{ .compatible = "qcom,rpmcc-mdm9607", .data = &rpm_clk_mdm9607 },
 	{ .compatible = "qcom,rpmcc-msm8226", .data = &rpm_clk_msm8974 },
 	{ .compatible = "qcom,rpmcc-msm8916", .data = &rpm_clk_msm8916 },
 	{ .compatible = "qcom,rpmcc-msm8936", .data = &rpm_clk_msm8936 },
+	{ .compatible = "qcom,rpmcc-msm8953", .data = &rpm_clk_msm8953 },
 	{ .compatible = "qcom,rpmcc-msm8974", .data = &rpm_clk_msm8974 },
 	{ .compatible = "qcom,rpmcc-msm8976", .data = &rpm_clk_msm8976 },
 	{ .compatible = "qcom,rpmcc-msm8992", .data = &rpm_clk_msm8992 },
@@ -925,6 +1081,8 @@ static const struct of_device_id rpm_smd_clk_match_table[] = {
 	{ .compatible = "qcom,rpmcc-msm8998", .data = &rpm_clk_msm8998 },
 	{ .compatible = "qcom,rpmcc-qcs404",  .data = &rpm_clk_qcs404  },
 	{ .compatible = "qcom,rpmcc-sdm660",  .data = &rpm_clk_sdm660  },
+	{ .compatible = "qcom,rpmcc-sm6115",  .data = &rpm_clk_sm6115  },
+	{ .compatible = "qcom,rpmcc-sm6125",  .data = &rpm_clk_sm6125  },
 	{ }
 };
 MODULE_DEVICE_TABLE(of, rpm_smd_clk_match_table);
diff --git a/drivers/clk/qcom/dispcc-sc7280.c b/drivers/clk/qcom/dispcc-sc7280.c
new file mode 100644
index 000000000000..4ef4ae231794
--- /dev/null
+++ b/drivers/clk/qcom/dispcc-sc7280.c
@@ -0,0 +1,908 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * Copyright (c) 2021, The Linux Foundation. All rights reserved.
+ */
+
+#include <linux/clk-provider.h>
+#include <linux/module.h>
+#include <linux/platform_device.h>
+#include <linux/regmap.h>
+
+#include <dt-bindings/clock/qcom,dispcc-sc7280.h>
+
+#include "clk-alpha-pll.h"
+#include "clk-branch.h"
+#include "clk-rcg.h"
+#include "clk-regmap-divider.h"
+#include "common.h"
+#include "gdsc.h"
+
+enum {
+	P_BI_TCXO,
+	P_DISP_CC_PLL0_OUT_EVEN,
+	P_DISP_CC_PLL0_OUT_MAIN,
+	P_DP_PHY_PLL_LINK_CLK,
+	P_DP_PHY_PLL_VCO_DIV_CLK,
+	P_DSI0_PHY_PLL_OUT_BYTECLK,
+	P_DSI0_PHY_PLL_OUT_DSICLK,
+	P_EDP_PHY_PLL_LINK_CLK,
+	P_EDP_PHY_PLL_VCO_DIV_CLK,
+	P_GCC_DISP_GPLL0_CLK,
+};
+
+static const struct pll_vco lucid_vco[] = {
+	{ 249600000, 2000000000, 0 },
+};
+
+/* 1520MHz Configuration*/
+static const struct alpha_pll_config disp_cc_pll0_config = {
+	.l = 0x4F,
+	.alpha = 0x2AAA,
+	.config_ctl_val = 0x20485699,
+	.config_ctl_hi_val = 0x00002261,
+	.config_ctl_hi1_val = 0x329A299C,
+	.user_ctl_val = 0x00000001,
+	.user_ctl_hi_val = 0x00000805,
+	.user_ctl_hi1_val = 0x00000000,
+};
+
+static struct clk_alpha_pll disp_cc_pll0 = {
+	.offset = 0x0,
+	.vco_table = lucid_vco,
+	.num_vco = ARRAY_SIZE(lucid_vco),
+	.regs = clk_alpha_pll_regs[CLK_ALPHA_PLL_TYPE_LUCID],
+	.clkr = {
+		.hw.init = &(struct clk_init_data){
+			.name = "disp_cc_pll0",
+			.parent_data = &(const struct clk_parent_data){
+				.fw_name = "bi_tcxo",
+			},
+			.num_parents = 1,
+			.ops = &clk_alpha_pll_lucid_ops,
+		},
+	},
+};
+
+static const struct parent_map disp_cc_parent_map_0[] = {
+	{ P_BI_TCXO, 0 },
+};
+
+static const struct clk_parent_data disp_cc_parent_data_0[] = {
+	{ .fw_name = "bi_tcxo" },
+};
+
+static const struct parent_map disp_cc_parent_map_1[] = {
+	{ P_BI_TCXO, 0 },
+	{ P_DP_PHY_PLL_LINK_CLK, 1 },
+	{ P_DP_PHY_PLL_VCO_DIV_CLK, 2 },
+};
+
+static const struct clk_parent_data disp_cc_parent_data_1[] = {
+	{ .fw_name = "bi_tcxo" },
+	{ .fw_name = "dp_phy_pll_link_clk" },
+	{ .fw_name = "dp_phy_pll_vco_div_clk" },
+};
+
+static const struct parent_map disp_cc_parent_map_2[] = {
+	{ P_BI_TCXO, 0 },
+	{ P_DSI0_PHY_PLL_OUT_BYTECLK, 1 },
+};
+
+static const struct clk_parent_data disp_cc_parent_data_2[] = {
+	{ .fw_name = "bi_tcxo" },
+	{ .fw_name = "dsi0_phy_pll_out_byteclk" },
+};
+
+static const struct parent_map disp_cc_parent_map_3[] = {
+	{ P_BI_TCXO, 0 },
+	{ P_EDP_PHY_PLL_LINK_CLK, 1 },
+	{ P_EDP_PHY_PLL_VCO_DIV_CLK, 2 },
+};
+
+static const struct clk_parent_data disp_cc_parent_data_3[] = {
+	{ .fw_name = "bi_tcxo" },
+	{ .fw_name = "edp_phy_pll_link_clk" },
+	{ .fw_name = "edp_phy_pll_vco_div_clk" },
+};
+
+static const struct parent_map disp_cc_parent_map_4[] = {
+	{ P_BI_TCXO, 0 },
+	{ P_DISP_CC_PLL0_OUT_MAIN, 1 },
+	{ P_GCC_DISP_GPLL0_CLK, 4 },
+	{ P_DISP_CC_PLL0_OUT_EVEN, 5 },
+};
+
+static const struct clk_parent_data disp_cc_parent_data_4[] = {
+	{ .fw_name = "bi_tcxo" },
+	{ .hw = &disp_cc_pll0.clkr.hw },
+	{ .fw_name = "gcc_disp_gpll0_clk" },
+	{ .hw = &disp_cc_pll0.clkr.hw },
+};
+
+static const struct parent_map disp_cc_parent_map_5[] = {
+	{ P_BI_TCXO, 0 },
+	{ P_GCC_DISP_GPLL0_CLK, 4 },
+};
+
+static const struct clk_parent_data disp_cc_parent_data_5[] = {
+	{ .fw_name = "bi_tcxo" },
+	{ .fw_name = "gcc_disp_gpll0_clk" },
+};
+
+static const struct parent_map disp_cc_parent_map_6[] = {
+	{ P_BI_TCXO, 0 },
+	{ P_DSI0_PHY_PLL_OUT_DSICLK, 1 },
+};
+
+static const struct clk_parent_data disp_cc_parent_data_6[] = {
+	{ .fw_name = "bi_tcxo" },
+	{ .fw_name = "dsi0_phy_pll_out_dsiclk" },
+};
+
+static const struct freq_tbl ftbl_disp_cc_mdss_ahb_clk_src[] = {
+	F(19200000, P_BI_TCXO, 1, 0, 0),
+	F(37500000, P_GCC_DISP_GPLL0_CLK, 16, 0, 0),
+	F(75000000, P_GCC_DISP_GPLL0_CLK, 8, 0, 0),
+	{ }
+};
+
+static struct clk_rcg2 disp_cc_mdss_ahb_clk_src = {
+	.cmd_rcgr = 0x1170,
+	.mnd_width = 0,
+	.hid_width = 5,
+	.parent_map = disp_cc_parent_map_5,
+	.freq_tbl = ftbl_disp_cc_mdss_ahb_clk_src,
+	.clkr.hw.init = &(struct clk_init_data){
+		.name = "disp_cc_mdss_ahb_clk_src",
+		.parent_data = disp_cc_parent_data_5,
+		.num_parents = ARRAY_SIZE(disp_cc_parent_data_5),
+		.ops = &clk_rcg2_shared_ops,
+	},
+};
+
+static struct clk_rcg2 disp_cc_mdss_byte0_clk_src = {
+	.cmd_rcgr = 0x10d8,
+	.mnd_width = 0,
+	.hid_width = 5,
+	.parent_map = disp_cc_parent_map_2,
+	.clkr.hw.init = &(struct clk_init_data){
+		.name = "disp_cc_mdss_byte0_clk_src",
+		.parent_data = disp_cc_parent_data_2,
+		.num_parents = ARRAY_SIZE(disp_cc_parent_data_2),
+		.flags = CLK_SET_RATE_PARENT,
+		.ops = &clk_byte2_ops,
+	},
+};
+
+static const struct freq_tbl ftbl_disp_cc_mdss_dp_aux_clk_src[] = {
+	F(19200000, P_BI_TCXO, 1, 0, 0),
+	{ }
+};
+
+static struct clk_rcg2 disp_cc_mdss_dp_aux_clk_src = {
+	.cmd_rcgr = 0x1158,
+	.mnd_width = 0,
+	.hid_width = 5,
+	.parent_map = disp_cc_parent_map_0,
+	.freq_tbl = ftbl_disp_cc_mdss_dp_aux_clk_src,
+	.clkr.hw.init = &(struct clk_init_data){
+		.name = "disp_cc_mdss_dp_aux_clk_src",
+		.parent_data = disp_cc_parent_data_0,
+		.num_parents = ARRAY_SIZE(disp_cc_parent_data_0),
+		.ops = &clk_rcg2_ops,
+	},
+};
+
+static struct clk_rcg2 disp_cc_mdss_dp_crypto_clk_src = {
+	.cmd_rcgr = 0x1128,
+	.mnd_width = 0,
+	.hid_width = 5,
+	.parent_map = disp_cc_parent_map_1,
+	.clkr.hw.init = &(struct clk_init_data){
+		.name = "disp_cc_mdss_dp_crypto_clk_src",
+		.parent_data = disp_cc_parent_data_1,
+		.num_parents = ARRAY_SIZE(disp_cc_parent_data_1),
+		.ops = &clk_byte2_ops,
+	},
+};
+
+static struct clk_rcg2 disp_cc_mdss_dp_link_clk_src = {
+	.cmd_rcgr = 0x110c,
+	.mnd_width = 0,
+	.hid_width = 5,
+	.parent_map = disp_cc_parent_map_1,
+	.clkr.hw.init = &(struct clk_init_data){
+		.name = "disp_cc_mdss_dp_link_clk_src",
+		.parent_data = disp_cc_parent_data_1,
+		.num_parents = ARRAY_SIZE(disp_cc_parent_data_1),
+		.ops = &clk_byte2_ops,
+	},
+};
+
+static struct clk_rcg2 disp_cc_mdss_dp_pixel_clk_src = {
+	.cmd_rcgr = 0x1140,
+	.mnd_width = 16,
+	.hid_width = 5,
+	.parent_map = disp_cc_parent_map_1,
+	.clkr.hw.init = &(struct clk_init_data){
+		.name = "disp_cc_mdss_dp_pixel_clk_src",
+		.parent_data = disp_cc_parent_data_1,
+		.num_parents = ARRAY_SIZE(disp_cc_parent_data_1),
+		.ops = &clk_dp_ops,
+	},
+};
+
+static struct clk_rcg2 disp_cc_mdss_edp_aux_clk_src = {
+	.cmd_rcgr = 0x11d0,
+	.mnd_width = 0,
+	.hid_width = 5,
+	.parent_map = disp_cc_parent_map_0,
+	.freq_tbl = ftbl_disp_cc_mdss_dp_aux_clk_src,
+	.clkr.hw.init = &(struct clk_init_data){
+		.name = "disp_cc_mdss_edp_aux_clk_src",
+		.parent_data = disp_cc_parent_data_0,
+		.num_parents = ARRAY_SIZE(disp_cc_parent_data_0),
+		.ops = &clk_rcg2_ops,
+	},
+};
+
+static struct clk_rcg2 disp_cc_mdss_edp_link_clk_src = {
+	.cmd_rcgr = 0x11a0,
+	.mnd_width = 0,
+	.hid_width = 5,
+	.parent_map = disp_cc_parent_map_3,
+	.clkr.hw.init = &(struct clk_init_data){
+		.name = "disp_cc_mdss_edp_link_clk_src",
+		.parent_data = disp_cc_parent_data_3,
+		.num_parents = ARRAY_SIZE(disp_cc_parent_data_3),
+		.flags = CLK_SET_RATE_PARENT,
+		.ops = &clk_byte2_ops,
+	},
+};
+
+static struct clk_rcg2 disp_cc_mdss_edp_pixel_clk_src = {
+	.cmd_rcgr = 0x1188,
+	.mnd_width = 16,
+	.hid_width = 5,
+	.parent_map = disp_cc_parent_map_3,
+	.clkr.hw.init = &(struct clk_init_data){
+		.name = "disp_cc_mdss_edp_pixel_clk_src",
+		.parent_data = disp_cc_parent_data_3,
+		.num_parents = ARRAY_SIZE(disp_cc_parent_data_3),
+		.ops = &clk_dp_ops,
+	},
+};
+
+static struct clk_rcg2 disp_cc_mdss_esc0_clk_src = {
+	.cmd_rcgr = 0x10f4,
+	.mnd_width = 0,
+	.hid_width = 5,
+	.parent_map = disp_cc_parent_map_2,
+	.freq_tbl = ftbl_disp_cc_mdss_dp_aux_clk_src,
+	.clkr.hw.init = &(struct clk_init_data){
+		.name = "disp_cc_mdss_esc0_clk_src",
+		.parent_data = disp_cc_parent_data_2,
+		.num_parents = ARRAY_SIZE(disp_cc_parent_data_2),
+		.ops = &clk_rcg2_ops,
+	},
+};
+
+static const struct freq_tbl ftbl_disp_cc_mdss_mdp_clk_src[] = {
+	F(200000000, P_GCC_DISP_GPLL0_CLK, 3, 0, 0),
+	F(300000000, P_GCC_DISP_GPLL0_CLK, 2, 0, 0),
+	F(380000000, P_DISP_CC_PLL0_OUT_MAIN, 4, 0, 0),
+	F(506666667, P_DISP_CC_PLL0_OUT_MAIN, 3, 0, 0),
+	F(608000000, P_DISP_CC_PLL0_OUT_MAIN, 2.5, 0, 0),
+	{ }
+};
+
+static struct clk_rcg2 disp_cc_mdss_mdp_clk_src = {
+	.cmd_rcgr = 0x1090,
+	.mnd_width = 0,
+	.hid_width = 5,
+	.parent_map = disp_cc_parent_map_4,
+	.freq_tbl = ftbl_disp_cc_mdss_mdp_clk_src,
+	.clkr.hw.init = &(struct clk_init_data){
+		.name = "disp_cc_mdss_mdp_clk_src",
+		.parent_data = disp_cc_parent_data_4,
+		.num_parents = ARRAY_SIZE(disp_cc_parent_data_4),
+		.ops = &clk_rcg2_shared_ops,
+	},
+};
+
+static struct clk_rcg2 disp_cc_mdss_pclk0_clk_src = {
+	.cmd_rcgr = 0x1078,
+	.mnd_width = 8,
+	.hid_width = 5,
+	.parent_map = disp_cc_parent_map_6,
+	.clkr.hw.init = &(struct clk_init_data){
+		.name = "disp_cc_mdss_pclk0_clk_src",
+		.parent_data = disp_cc_parent_data_6,
+		.num_parents = ARRAY_SIZE(disp_cc_parent_data_6),
+		.flags = CLK_SET_RATE_PARENT,
+		.ops = &clk_pixel_ops,
+	},
+};
+
+static struct clk_rcg2 disp_cc_mdss_rot_clk_src = {
+	.cmd_rcgr = 0x10a8,
+	.mnd_width = 0,
+	.hid_width = 5,
+	.parent_map = disp_cc_parent_map_4,
+	.freq_tbl = ftbl_disp_cc_mdss_mdp_clk_src,
+	.clkr.hw.init = &(struct clk_init_data){
+		.name = "disp_cc_mdss_rot_clk_src",
+		.parent_data = disp_cc_parent_data_4,
+		.num_parents = ARRAY_SIZE(disp_cc_parent_data_4),
+		.ops = &clk_rcg2_shared_ops,
+	},
+};
+
+static struct clk_rcg2 disp_cc_mdss_vsync_clk_src = {
+	.cmd_rcgr = 0x10c0,
+	.mnd_width = 0,
+	.hid_width = 5,
+	.parent_map = disp_cc_parent_map_0,
+	.freq_tbl = ftbl_disp_cc_mdss_dp_aux_clk_src,
+	.clkr.hw.init = &(struct clk_init_data){
+		.name = "disp_cc_mdss_vsync_clk_src",
+		.parent_data = disp_cc_parent_data_0,
+		.num_parents = ARRAY_SIZE(disp_cc_parent_data_0),
+		.ops = &clk_rcg2_ops,
+	},
+};
+
+static struct clk_regmap_div disp_cc_mdss_byte0_div_clk_src = {
+	.reg = 0x10f0,
+	.shift = 0,
+	.width = 4,
+	.clkr.hw.init = &(struct clk_init_data) {
+		.name = "disp_cc_mdss_byte0_div_clk_src",
+		.parent_hws = (const struct clk_hw*[]){
+			&disp_cc_mdss_byte0_clk_src.clkr.hw,
+		},
+		.num_parents = 1,
+		.ops = &clk_regmap_div_ops,
+	},
+};
+
+static struct clk_regmap_div disp_cc_mdss_dp_link_div_clk_src = {
+	.reg = 0x1124,
+	.shift = 0,
+	.width = 4,
+	.clkr.hw.init = &(struct clk_init_data) {
+		.name = "disp_cc_mdss_dp_link_div_clk_src",
+		.parent_hws = (const struct clk_hw*[]){
+			&disp_cc_mdss_dp_link_clk_src.clkr.hw,
+		},
+		.num_parents = 1,
+		.ops = &clk_regmap_div_ro_ops,
+	},
+};
+
+static struct clk_regmap_div disp_cc_mdss_edp_link_div_clk_src = {
+	.reg = 0x11b8,
+	.shift = 0,
+	.width = 4,
+	.clkr.hw.init = &(struct clk_init_data) {
+		.name = "disp_cc_mdss_edp_link_div_clk_src",
+		.parent_hws = (const struct clk_hw*[]){
+			&disp_cc_mdss_edp_link_clk_src.clkr.hw,
+		},
+		.num_parents = 1,
+		.ops = &clk_regmap_div_ro_ops,
+	},
+};
+
+static struct clk_branch disp_cc_mdss_ahb_clk = {
+	.halt_reg = 0x1050,
+	.halt_check = BRANCH_HALT,
+	.clkr = {
+		.enable_reg = 0x1050,
+		.enable_mask = BIT(0),
+		.hw.init = &(struct clk_init_data){
+			.name = "disp_cc_mdss_ahb_clk",
+			.parent_hws = (const struct clk_hw*[]){
+				&disp_cc_mdss_ahb_clk_src.clkr.hw,
+			},
+			.num_parents = 1,
+			.flags = CLK_SET_RATE_PARENT,
+			.ops = &clk_branch2_ops,
+		},
+	},
+};
+
+static struct clk_branch disp_cc_mdss_byte0_clk = {
+	.halt_reg = 0x1030,
+	.halt_check = BRANCH_HALT,
+	.clkr = {
+		.enable_reg = 0x1030,
+		.enable_mask = BIT(0),
+		.hw.init = &(struct clk_init_data){
+			.name = "disp_cc_mdss_byte0_clk",
+			.parent_hws = (const struct clk_hw*[]){
+				&disp_cc_mdss_byte0_clk_src.clkr.hw,
+			},
+			.num_parents = 1,
+			.flags = CLK_SET_RATE_PARENT,
+			.ops = &clk_branch2_ops,
+		},
+	},
+};
+
+static struct clk_branch disp_cc_mdss_byte0_intf_clk = {
+	.halt_reg = 0x1034,
+	.halt_check = BRANCH_HALT,
+	.clkr = {
+		.enable_reg = 0x1034,
+		.enable_mask = BIT(0),
+		.hw.init = &(struct clk_init_data){
+			.name = "disp_cc_mdss_byte0_intf_clk",
+			.parent_hws = (const struct clk_hw*[]){
+				&disp_cc_mdss_byte0_div_clk_src.clkr.hw,
+			},
+			.num_parents = 1,
+			.flags = CLK_SET_RATE_PARENT,
+			.ops = &clk_branch2_ops,
+		},
+	},
+};
+
+static struct clk_branch disp_cc_mdss_dp_aux_clk = {
+	.halt_reg = 0x104c,
+	.halt_check = BRANCH_HALT,
+	.clkr = {
+		.enable_reg = 0x104c,
+		.enable_mask = BIT(0),
+		.hw.init = &(struct clk_init_data){
+			.name = "disp_cc_mdss_dp_aux_clk",
+			.parent_hws = (const struct clk_hw*[]){
+				&disp_cc_mdss_dp_aux_clk_src.clkr.hw,
+			},
+			.num_parents = 1,
+			.flags = CLK_SET_RATE_PARENT,
+			.ops = &clk_branch2_ops,
+		},
+	},
+};
+
+static struct clk_branch disp_cc_mdss_dp_crypto_clk = {
+	.halt_reg = 0x1044,
+	.halt_check = BRANCH_HALT,
+	.clkr = {
+		.enable_reg = 0x1044,
+		.enable_mask = BIT(0),
+		.hw.init = &(struct clk_init_data){
+			.name = "disp_cc_mdss_dp_crypto_clk",
+			.parent_hws = (const struct clk_hw*[]){
+				&disp_cc_mdss_dp_crypto_clk_src.clkr.hw,
+			},
+			.num_parents = 1,
+			.flags = CLK_SET_RATE_PARENT,
+			.ops = &clk_branch2_ops,
+		},
+	},
+};
+
+static struct clk_branch disp_cc_mdss_dp_link_clk = {
+	.halt_reg = 0x103c,
+	.halt_check = BRANCH_HALT,
+	.clkr = {
+		.enable_reg = 0x103c,
+		.enable_mask = BIT(0),
+		.hw.init = &(struct clk_init_data){
+			.name = "disp_cc_mdss_dp_link_clk",
+			.parent_hws = (const struct clk_hw*[]){
+				&disp_cc_mdss_dp_link_clk_src.clkr.hw,
+			},
+			.num_parents = 1,
+			.flags = CLK_SET_RATE_PARENT,
+			.ops = &clk_branch2_ops,
+		},
+	},
+};
+
+static struct clk_branch disp_cc_mdss_dp_link_intf_clk = {
+	.halt_reg = 0x1040,
+	.halt_check = BRANCH_HALT,
+	.clkr = {
+		.enable_reg = 0x1040,
+		.enable_mask = BIT(0),
+		.hw.init = &(struct clk_init_data){
+			.name = "disp_cc_mdss_dp_link_intf_clk",
+			.parent_hws = (const struct clk_hw*[]){
+				&disp_cc_mdss_dp_link_div_clk_src.clkr.hw,
+			},
+			.num_parents = 1,
+			.flags = CLK_SET_RATE_PARENT,
+			.ops = &clk_branch2_ops,
+		},
+	},
+};
+
+static struct clk_branch disp_cc_mdss_dp_pixel_clk = {
+	.halt_reg = 0x1048,
+	.halt_check = BRANCH_HALT,
+	.clkr = {
+		.enable_reg = 0x1048,
+		.enable_mask = BIT(0),
+		.hw.init = &(struct clk_init_data){
+			.name = "disp_cc_mdss_dp_pixel_clk",
+			.parent_hws = (const struct clk_hw*[]){
+				&disp_cc_mdss_dp_pixel_clk_src.clkr.hw,
+			},
+			.num_parents = 1,
+			.flags = CLK_SET_RATE_PARENT,
+			.ops = &clk_branch2_ops,
+		},
+	},
+};
+
+static struct clk_branch disp_cc_mdss_edp_aux_clk = {
+	.halt_reg = 0x1060,
+	.halt_check = BRANCH_HALT,
+	.clkr = {
+		.enable_reg = 0x1060,
+		.enable_mask = BIT(0),
+		.hw.init = &(struct clk_init_data){
+			.name = "disp_cc_mdss_edp_aux_clk",
+			.parent_hws = (const struct clk_hw*[]){
+				&disp_cc_mdss_edp_aux_clk_src.clkr.hw,
+			},
+			.num_parents = 1,
+			.flags = CLK_SET_RATE_PARENT,
+			.ops = &clk_branch2_ops,
+		},
+	},
+};
+
+static struct clk_branch disp_cc_mdss_edp_link_clk = {
+	.halt_reg = 0x1058,
+	.halt_check = BRANCH_HALT,
+	.clkr = {
+		.enable_reg = 0x1058,
+		.enable_mask = BIT(0),
+		.hw.init = &(struct clk_init_data){
+			.name = "disp_cc_mdss_edp_link_clk",
+			.parent_hws = (const struct clk_hw*[]){
+				&disp_cc_mdss_edp_link_clk_src.clkr.hw,
+			},
+			.num_parents = 1,
+			.flags = CLK_SET_RATE_PARENT,
+			.ops = &clk_branch2_ops,
+		},
+	},
+};
+
+static struct clk_branch disp_cc_mdss_edp_link_intf_clk = {
+	.halt_reg = 0x105c,
+	.halt_check = BRANCH_HALT,
+	.clkr = {
+		.enable_reg = 0x105c,
+		.enable_mask = BIT(0),
+		.hw.init = &(struct clk_init_data){
+			.name = "disp_cc_mdss_edp_link_intf_clk",
+			.parent_hws = (const struct clk_hw*[]){
+				&disp_cc_mdss_edp_link_div_clk_src.clkr.hw
+			},
+			.num_parents = 1,
+			.flags = CLK_SET_RATE_PARENT,
+			.ops = &clk_branch2_ops,
+		},
+	},
+};
+
+static struct clk_branch disp_cc_mdss_edp_pixel_clk = {
+	.halt_reg = 0x1054,
+	.halt_check = BRANCH_HALT,
+	.clkr = {
+		.enable_reg = 0x1054,
+		.enable_mask = BIT(0),
+		.hw.init = &(struct clk_init_data){
+			.name = "disp_cc_mdss_edp_pixel_clk",
+			.parent_hws = (const struct clk_hw*[]){
+				&disp_cc_mdss_edp_pixel_clk_src.clkr.hw,
+			},
+			.num_parents = 1,
+			.flags = CLK_SET_RATE_PARENT,
+			.ops = &clk_branch2_ops,
+		},
+	},
+};
+
+static struct clk_branch disp_cc_mdss_esc0_clk = {
+	.halt_reg = 0x1038,
+	.halt_check = BRANCH_HALT,
+	.clkr = {
+		.enable_reg = 0x1038,
+		.enable_mask = BIT(0),
+		.hw.init = &(struct clk_init_data){
+			.name = "disp_cc_mdss_esc0_clk",
+			.parent_hws = (const struct clk_hw*[]){
+				&disp_cc_mdss_esc0_clk_src.clkr.hw,
+			},
+			.num_parents = 1,
+			.flags = CLK_SET_RATE_PARENT,
+			.ops = &clk_branch2_ops,
+		},
+	},
+};
+
+static struct clk_branch disp_cc_mdss_mdp_clk = {
+	.halt_reg = 0x1014,
+	.halt_check = BRANCH_HALT,
+	.clkr = {
+		.enable_reg = 0x1014,
+		.enable_mask = BIT(0),
+		.hw.init = &(struct clk_init_data){
+			.name = "disp_cc_mdss_mdp_clk",
+			.parent_hws = (const struct clk_hw*[]){
+				&disp_cc_mdss_mdp_clk_src.clkr.hw,
+			},
+			.num_parents = 1,
+			.flags = CLK_SET_RATE_PARENT,
+			.ops = &clk_branch2_ops,
+		},
+	},
+};
+
+static struct clk_branch disp_cc_mdss_mdp_lut_clk = {
+	.halt_reg = 0x1024,
+	.halt_check = BRANCH_HALT_VOTED,
+	.clkr = {
+		.enable_reg = 0x1024,
+		.enable_mask = BIT(0),
+		.hw.init = &(struct clk_init_data){
+			.name = "disp_cc_mdss_mdp_lut_clk",
+			.parent_hws = (const struct clk_hw*[]){
+				&disp_cc_mdss_mdp_clk_src.clkr.hw,
+			},
+			.num_parents = 1,
+			.flags = CLK_SET_RATE_PARENT,
+			.ops = &clk_branch2_ops,
+		},
+	},
+};
+
+static struct clk_branch disp_cc_mdss_non_gdsc_ahb_clk = {
+	.halt_reg = 0x2004,
+	.halt_check = BRANCH_HALT_VOTED,
+	.clkr = {
+		.enable_reg = 0x2004,
+		.enable_mask = BIT(0),
+		.hw.init = &(struct clk_init_data){
+			.name = "disp_cc_mdss_non_gdsc_ahb_clk",
+			.parent_hws = (const struct clk_hw*[]){
+				&disp_cc_mdss_ahb_clk_src.clkr.hw,
+			},
+			.num_parents = 1,
+			.flags = CLK_SET_RATE_PARENT,
+			.ops = &clk_branch2_ops,
+		},
+	},
+};
+
+static struct clk_branch disp_cc_mdss_pclk0_clk = {
+	.halt_reg = 0x1010,
+	.halt_check = BRANCH_HALT,
+	.clkr = {
+		.enable_reg = 0x1010,
+		.enable_mask = BIT(0),
+		.hw.init = &(struct clk_init_data){
+			.name = "disp_cc_mdss_pclk0_clk",
+			.parent_hws = (const struct clk_hw*[]){
+				&disp_cc_mdss_pclk0_clk_src.clkr.hw,
+			},
+			.num_parents = 1,
+			.flags = CLK_SET_RATE_PARENT,
+			.ops = &clk_branch2_ops,
+		},
+	},
+};
+
+static struct clk_branch disp_cc_mdss_rot_clk = {
+	.halt_reg = 0x101c,
+	.halt_check = BRANCH_HALT,
+	.clkr = {
+		.enable_reg = 0x101c,
+		.enable_mask = BIT(0),
+		.hw.init = &(struct clk_init_data){
+			.name = "disp_cc_mdss_rot_clk",
+			.parent_hws = (const struct clk_hw*[]){
+				&disp_cc_mdss_rot_clk_src.clkr.hw,
+			},
+			.num_parents = 1,
+			.flags = CLK_SET_RATE_PARENT,
+			.ops = &clk_branch2_ops,
+		},
+	},
+};
+
+static struct clk_branch disp_cc_mdss_rscc_ahb_clk = {
+	.halt_reg = 0x200c,
+	.halt_check = BRANCH_HALT,
+	.clkr = {
+		.enable_reg = 0x200c,
+		.enable_mask = BIT(0),
+		.hw.init = &(struct clk_init_data){
+			.name = "disp_cc_mdss_rscc_ahb_clk",
+			.parent_hws = (const struct clk_hw*[]){
+				&disp_cc_mdss_ahb_clk_src.clkr.hw,
+			},
+			.num_parents = 1,
+			.flags = CLK_SET_RATE_PARENT,
+			.ops = &clk_branch2_ops,
+		},
+	},
+};
+
+static struct clk_branch disp_cc_mdss_rscc_vsync_clk = {
+	.halt_reg = 0x2008,
+	.halt_check = BRANCH_HALT,
+	.clkr = {
+		.enable_reg = 0x2008,
+		.enable_mask = BIT(0),
+		.hw.init = &(struct clk_init_data){
+			.name = "disp_cc_mdss_rscc_vsync_clk",
+			.parent_hws = (const struct clk_hw*[]){
+				&disp_cc_mdss_vsync_clk_src.clkr.hw,
+			},
+			.num_parents = 1,
+			.flags = CLK_SET_RATE_PARENT,
+			.ops = &clk_branch2_ops,
+		},
+	},
+};
+
+static struct clk_branch disp_cc_mdss_vsync_clk = {
+	.halt_reg = 0x102c,
+	.halt_check = BRANCH_HALT,
+	.clkr = {
+		.enable_reg = 0x102c,
+		.enable_mask = BIT(0),
+		.hw.init = &(struct clk_init_data){
+			.name = "disp_cc_mdss_vsync_clk",
+			.parent_hws = (const struct clk_hw*[]){
+				&disp_cc_mdss_vsync_clk_src.clkr.hw,
+			},
+			.num_parents = 1,
+			.flags = CLK_SET_RATE_PARENT,
+			.ops = &clk_branch2_ops,
+		},
+	},
+};
+
+static struct clk_branch disp_cc_sleep_clk = {
+	.halt_reg = 0x5004,
+	.halt_check = BRANCH_HALT,
+	.clkr = {
+		.enable_reg = 0x5004,
+		.enable_mask = BIT(0),
+		.hw.init = &(struct clk_init_data){
+			.name = "disp_cc_sleep_clk",
+			.ops = &clk_branch2_ops,
+		},
+	},
+};
+
+static struct gdsc disp_cc_mdss_core_gdsc = {
+	.gdscr = 0x1004,
+	.pd = {
+		.name = "disp_cc_mdss_core_gdsc",
+	},
+	.pwrsts = PWRSTS_OFF_ON,
+	.flags = HW_CTRL | RETAIN_FF_ENABLE,
+};
+
+static struct clk_regmap *disp_cc_sc7280_clocks[] = {
+	[DISP_CC_MDSS_AHB_CLK] = &disp_cc_mdss_ahb_clk.clkr,
+	[DISP_CC_MDSS_AHB_CLK_SRC] = &disp_cc_mdss_ahb_clk_src.clkr,
+	[DISP_CC_MDSS_BYTE0_CLK] = &disp_cc_mdss_byte0_clk.clkr,
+	[DISP_CC_MDSS_BYTE0_CLK_SRC] = &disp_cc_mdss_byte0_clk_src.clkr,
+	[DISP_CC_MDSS_BYTE0_DIV_CLK_SRC] = &disp_cc_mdss_byte0_div_clk_src.clkr,
+	[DISP_CC_MDSS_BYTE0_INTF_CLK] = &disp_cc_mdss_byte0_intf_clk.clkr,
+	[DISP_CC_MDSS_DP_AUX_CLK] = &disp_cc_mdss_dp_aux_clk.clkr,
+	[DISP_CC_MDSS_DP_AUX_CLK_SRC] = &disp_cc_mdss_dp_aux_clk_src.clkr,
+	[DISP_CC_MDSS_DP_CRYPTO_CLK] = &disp_cc_mdss_dp_crypto_clk.clkr,
+	[DISP_CC_MDSS_DP_CRYPTO_CLK_SRC] = &disp_cc_mdss_dp_crypto_clk_src.clkr,
+	[DISP_CC_MDSS_DP_LINK_CLK] = &disp_cc_mdss_dp_link_clk.clkr,
+	[DISP_CC_MDSS_DP_LINK_CLK_SRC] = &disp_cc_mdss_dp_link_clk_src.clkr,
+	[DISP_CC_MDSS_DP_LINK_DIV_CLK_SRC] =
+		&disp_cc_mdss_dp_link_div_clk_src.clkr,
+	[DISP_CC_MDSS_DP_LINK_INTF_CLK] = &disp_cc_mdss_dp_link_intf_clk.clkr,
+	[DISP_CC_MDSS_DP_PIXEL_CLK] = &disp_cc_mdss_dp_pixel_clk.clkr,
+	[DISP_CC_MDSS_DP_PIXEL_CLK_SRC] = &disp_cc_mdss_dp_pixel_clk_src.clkr,
+	[DISP_CC_MDSS_EDP_AUX_CLK] = &disp_cc_mdss_edp_aux_clk.clkr,
+	[DISP_CC_MDSS_EDP_AUX_CLK_SRC] = &disp_cc_mdss_edp_aux_clk_src.clkr,
+	[DISP_CC_MDSS_EDP_LINK_CLK] = &disp_cc_mdss_edp_link_clk.clkr,
+	[DISP_CC_MDSS_EDP_LINK_CLK_SRC] = &disp_cc_mdss_edp_link_clk_src.clkr,
+	[DISP_CC_MDSS_EDP_LINK_DIV_CLK_SRC] =
+		&disp_cc_mdss_edp_link_div_clk_src.clkr,
+	[DISP_CC_MDSS_EDP_LINK_INTF_CLK] = &disp_cc_mdss_edp_link_intf_clk.clkr,
+	[DISP_CC_MDSS_EDP_PIXEL_CLK] = &disp_cc_mdss_edp_pixel_clk.clkr,
+	[DISP_CC_MDSS_EDP_PIXEL_CLK_SRC] = &disp_cc_mdss_edp_pixel_clk_src.clkr,
+	[DISP_CC_MDSS_ESC0_CLK] = &disp_cc_mdss_esc0_clk.clkr,
+	[DISP_CC_MDSS_ESC0_CLK_SRC] = &disp_cc_mdss_esc0_clk_src.clkr,
+	[DISP_CC_MDSS_MDP_CLK] = &disp_cc_mdss_mdp_clk.clkr,
+	[DISP_CC_MDSS_MDP_CLK_SRC] = &disp_cc_mdss_mdp_clk_src.clkr,
+	[DISP_CC_MDSS_MDP_LUT_CLK] = &disp_cc_mdss_mdp_lut_clk.clkr,
+	[DISP_CC_MDSS_NON_GDSC_AHB_CLK] = &disp_cc_mdss_non_gdsc_ahb_clk.clkr,
+	[DISP_CC_MDSS_PCLK0_CLK] = &disp_cc_mdss_pclk0_clk.clkr,
+	[DISP_CC_MDSS_PCLK0_CLK_SRC] = &disp_cc_mdss_pclk0_clk_src.clkr,
+	[DISP_CC_MDSS_ROT_CLK] = &disp_cc_mdss_rot_clk.clkr,
+	[DISP_CC_MDSS_ROT_CLK_SRC] = &disp_cc_mdss_rot_clk_src.clkr,
+	[DISP_CC_MDSS_RSCC_AHB_CLK] = &disp_cc_mdss_rscc_ahb_clk.clkr,
+	[DISP_CC_MDSS_RSCC_VSYNC_CLK] = &disp_cc_mdss_rscc_vsync_clk.clkr,
+	[DISP_CC_MDSS_VSYNC_CLK] = &disp_cc_mdss_vsync_clk.clkr,
+	[DISP_CC_MDSS_VSYNC_CLK_SRC] = &disp_cc_mdss_vsync_clk_src.clkr,
+	[DISP_CC_PLL0] = &disp_cc_pll0.clkr,
+	[DISP_CC_SLEEP_CLK] = &disp_cc_sleep_clk.clkr,
+};
+
+static struct gdsc *disp_cc_sc7280_gdscs[] = {
+	[DISP_CC_MDSS_CORE_GDSC] = &disp_cc_mdss_core_gdsc,
+};
+
+static const struct regmap_config disp_cc_sc7280_regmap_config = {
+	.reg_bits = 32,
+	.reg_stride = 4,
+	.val_bits = 32,
+	.max_register = 0x10000,
+	.fast_io = true,
+};
+
+static const struct qcom_cc_desc disp_cc_sc7280_desc = {
+	.config = &disp_cc_sc7280_regmap_config,
+	.clks = disp_cc_sc7280_clocks,
+	.num_clks = ARRAY_SIZE(disp_cc_sc7280_clocks),
+	.gdscs = disp_cc_sc7280_gdscs,
+	.num_gdscs = ARRAY_SIZE(disp_cc_sc7280_gdscs),
+};
+
+static const struct of_device_id disp_cc_sc7280_match_table[] = {
+	{ .compatible = "qcom,sc7280-dispcc" },
+	{ }
+};
+MODULE_DEVICE_TABLE(of, disp_cc_sc7280_match_table);
+
+static int disp_cc_sc7280_probe(struct platform_device *pdev)
+{
+	struct regmap *regmap;
+
+	regmap = qcom_cc_map(pdev, &disp_cc_sc7280_desc);
+	if (IS_ERR(regmap))
+		return PTR_ERR(regmap);
+
+	clk_lucid_pll_configure(&disp_cc_pll0, regmap, &disp_cc_pll0_config);
+
+	/*
+	 * Keep the clocks always-ON
+	 * DISP_CC_XO_CLK
+	 */
+	regmap_update_bits(regmap, 0x5008, BIT(0), BIT(0));
+
+	return qcom_cc_really_probe(pdev, &disp_cc_sc7280_desc, regmap);
+}
+
+static struct platform_driver disp_cc_sc7280_driver = {
+	.probe = disp_cc_sc7280_probe,
+	.driver = {
+		.name = "disp_cc-sc7280",
+		.of_match_table = disp_cc_sc7280_match_table,
+	},
+};
+
+static int __init disp_cc_sc7280_init(void)
+{
+	return platform_driver_register(&disp_cc_sc7280_driver);
+}
+subsys_initcall(disp_cc_sc7280_init);
+
+static void __exit disp_cc_sc7280_exit(void)
+{
+	platform_driver_unregister(&disp_cc_sc7280_driver);
+}
+module_exit(disp_cc_sc7280_exit);
+
+MODULE_DESCRIPTION("QTI DISP_CC sc7280 Driver");
+MODULE_LICENSE("GPL v2");
diff --git a/drivers/clk/qcom/dispcc-sm8250.c b/drivers/clk/qcom/dispcc-sm8250.c
index 601c7c0ba483..bf9ffe1a1cf4 100644
--- a/drivers/clk/qcom/dispcc-sm8250.c
+++ b/drivers/clk/qcom/dispcc-sm8250.c
@@ -26,6 +26,10 @@ enum {
 	P_DISP_CC_PLL1_OUT_MAIN,
 	P_DP_PHY_PLL_LINK_CLK,
 	P_DP_PHY_PLL_VCO_DIV_CLK,
+	P_DPTX1_PHY_PLL_LINK_CLK,
+	P_DPTX1_PHY_PLL_VCO_DIV_CLK,
+	P_DPTX2_PHY_PLL_LINK_CLK,
+	P_DPTX2_PHY_PLL_VCO_DIV_CLK,
 	P_EDP_PHY_PLL_LINK_CLK,
 	P_EDP_PHY_PLL_VCO_DIV_CLK,
 	P_DSI0_PHY_PLL_OUT_BYTECLK,
@@ -98,12 +102,20 @@ static const struct parent_map disp_cc_parent_map_0[] = {
 	{ P_BI_TCXO, 0 },
 	{ P_DP_PHY_PLL_LINK_CLK, 1 },
 	{ P_DP_PHY_PLL_VCO_DIV_CLK, 2 },
+	{ P_DPTX1_PHY_PLL_LINK_CLK, 3 },
+	{ P_DPTX1_PHY_PLL_VCO_DIV_CLK, 4 },
+	{ P_DPTX2_PHY_PLL_LINK_CLK, 5 },
+	{ P_DPTX2_PHY_PLL_VCO_DIV_CLK, 6 },
 };
 
 static const struct clk_parent_data disp_cc_parent_data_0[] = {
 	{ .fw_name = "bi_tcxo" },
 	{ .fw_name = "dp_phy_pll_link_clk" },
 	{ .fw_name = "dp_phy_pll_vco_div_clk" },
+	{ .fw_name = "dptx1_phy_pll_link_clk" },
+	{ .fw_name = "dptx1_phy_pll_vco_div_clk" },
+	{ .fw_name = "dptx2_phy_pll_link_clk" },
+	{ .fw_name = "dptx2_phy_pll_vco_div_clk" },
 };
 
 static const struct parent_map disp_cc_parent_map_1[] = {
@@ -269,20 +281,11 @@ static struct clk_rcg2 disp_cc_mdss_dp_aux_clk_src = {
 	},
 };
 
-static const struct freq_tbl ftbl_disp_cc_mdss_dp_link1_clk_src[] = {
-	F(162000000, P_DP_PHY_PLL_LINK_CLK, 1, 0, 0),
-	F(270000000, P_DP_PHY_PLL_LINK_CLK, 1, 0, 0),
-	F(540000000, P_DP_PHY_PLL_LINK_CLK, 1, 0, 0),
-	F(810000000, P_DP_PHY_PLL_LINK_CLK, 1, 0, 0),
-	{ }
-};
-
 static struct clk_rcg2 disp_cc_mdss_dp_link1_clk_src = {
 	.cmd_rcgr = 0x220c,
 	.mnd_width = 0,
 	.hid_width = 5,
 	.parent_map = disp_cc_parent_map_0,
-	.freq_tbl = ftbl_disp_cc_mdss_dp_link1_clk_src,
 	.clkr.hw.init = &(struct clk_init_data){
 		.name = "disp_cc_mdss_dp_link1_clk_src",
 		.parent_data = disp_cc_parent_data_0,
@@ -296,7 +299,6 @@ static struct clk_rcg2 disp_cc_mdss_dp_link_clk_src = {
 	.mnd_width = 0,
 	.hid_width = 5,
 	.parent_map = disp_cc_parent_map_0,
-	.freq_tbl = ftbl_disp_cc_mdss_dp_link1_clk_src,
 	.clkr.hw.init = &(struct clk_init_data){
 		.name = "disp_cc_mdss_dp_link_clk_src",
 		.parent_data = disp_cc_parent_data_0,
diff --git a/drivers/clk/qcom/gcc-msm8953.c b/drivers/clk/qcom/gcc-msm8953.c
new file mode 100644
index 000000000000..49513f1366ff
--- /dev/null
+++ b/drivers/clk/qcom/gcc-msm8953.c
@@ -0,0 +1,4250 @@
+// SPDX-License-Identifier: GPL-2.0-only
+// Copyright (c) 2021, The Linux Foundation. All rights reserved.
+
+#include <linux/kernel.h>
+#include <linux/bitops.h>
+#include <linux/err.h>
+#include <linux/module.h>
+#include <linux/platform_device.h>
+#include <linux/of.h>
+#include <linux/of_device.h>
+#include <linux/clk-provider.h>
+#include <linux/regmap.h>
+#include <linux/reset-controller.h>
+
+#include <dt-bindings/clock/qcom,gcc-msm8953.h>
+
+#include "clk-alpha-pll.h"
+#include "clk-branch.h"
+#include "clk-rcg.h"
+#include "common.h"
+#include "gdsc.h"
+#include "reset.h"
+
+enum {
+	P_XO,
+	P_SLEEP_CLK,
+	P_GPLL0,
+	P_GPLL0_DIV2,
+	P_GPLL2,
+	P_GPLL3,
+	P_GPLL4,
+	P_GPLL6,
+	P_GPLL6_DIV2,
+	P_DSI0PLL,
+	P_DSI0PLL_BYTE,
+	P_DSI1PLL,
+	P_DSI1PLL_BYTE,
+};
+
+static struct clk_alpha_pll gpll0_early = {
+	.offset = 0x21000,
+	.regs = clk_alpha_pll_regs[CLK_ALPHA_PLL_TYPE_DEFAULT],
+	.clkr = {
+		.enable_reg = 0x45000,
+		.enable_mask = BIT(0),
+		.hw.init = &(struct clk_init_data) {
+			.name = "gpll0_early",
+			.parent_data = &(const struct clk_parent_data) {
+				.fw_name = "xo",
+			},
+			.num_parents = 1,
+			.ops = &clk_alpha_pll_fixed_ops,
+		},
+	},
+};
+
+static struct clk_fixed_factor gpll0_early_div = {
+	.mult = 1,
+	.div = 2,
+	.hw.init = &(struct clk_init_data){
+		.name = "gpll0_early_div",
+		.parent_hws = (const struct clk_hw*[]){
+			&gpll0_early.clkr.hw,
+		},
+		.num_parents = 1,
+		.ops = &clk_fixed_factor_ops,
+	},
+};
+
+static struct clk_alpha_pll_postdiv gpll0 = {
+	.offset = 0x21000,
+	.regs = clk_alpha_pll_regs[CLK_ALPHA_PLL_TYPE_DEFAULT],
+	.clkr.hw.init = &(struct clk_init_data){
+		.name = "gpll0",
+		.parent_hws = (const struct clk_hw*[]){
+			&gpll0_early.clkr.hw,
+		},
+		.num_parents = 1,
+		.ops = &clk_alpha_pll_postdiv_ro_ops,
+	},
+};
+
+static struct clk_alpha_pll gpll2_early = {
+	.offset = 0x4a000,
+	.regs = clk_alpha_pll_regs[CLK_ALPHA_PLL_TYPE_DEFAULT],
+	.clkr = {
+		.enable_reg = 0x45000,
+		.enable_mask = BIT(2),
+		.hw.init = &(struct clk_init_data){
+			.name = "gpll2_early",
+			.parent_data = &(const struct clk_parent_data) {
+				.fw_name = "xo",
+			},
+			.num_parents = 1,
+			.ops = &clk_alpha_pll_fixed_ops,
+		},
+	},
+};
+
+static struct clk_alpha_pll_postdiv gpll2 = {
+	.offset = 0x4a000,
+	.regs = clk_alpha_pll_regs[CLK_ALPHA_PLL_TYPE_DEFAULT],
+	.clkr.hw.init = &(struct clk_init_data){
+		.name = "gpll2",
+		.parent_hws = (const struct clk_hw*[]){
+			&gpll2_early.clkr.hw,
+		},
+		.num_parents = 1,
+		.ops = &clk_alpha_pll_postdiv_ro_ops,
+	},
+};
+
+static const struct pll_vco gpll3_p_vco[] = {
+	{ 1000000000, 2000000000, 0 },
+};
+
+static const struct alpha_pll_config gpll3_early_config = {
+	.l = 63,
+	.config_ctl_val = 0x4001055b,
+	.early_output_mask = 0,
+	.post_div_mask = GENMASK(11, 8),
+	.post_div_val = BIT(8),
+};
+
+static struct clk_alpha_pll gpll3_early = {
+	.offset = 0x22000,
+	.regs = clk_alpha_pll_regs[CLK_ALPHA_PLL_TYPE_DEFAULT],
+	.vco_table = gpll3_p_vco,
+	.num_vco = ARRAY_SIZE(gpll3_p_vco),
+	.flags = SUPPORTS_DYNAMIC_UPDATE,
+	.clkr = {
+		.hw.init = &(struct clk_init_data){
+			.name = "gpll3_early",
+			.parent_data = &(const struct clk_parent_data) {
+				.fw_name = "xo",
+			},
+			.num_parents = 1,
+			.ops = &clk_alpha_pll_ops,
+		},
+	},
+};
+
+static struct clk_alpha_pll_postdiv gpll3 = {
+	.offset = 0x22000,
+	.regs = clk_alpha_pll_regs[CLK_ALPHA_PLL_TYPE_DEFAULT],
+	.clkr.hw.init = &(struct clk_init_data){
+		.name = "gpll3",
+		.parent_hws = (const struct clk_hw*[]){
+			&gpll3_early.clkr.hw,
+		},
+		.num_parents = 1,
+		.ops = &clk_alpha_pll_postdiv_ops,
+		.flags = CLK_SET_RATE_PARENT,
+	},
+};
+
+static struct clk_alpha_pll gpll4_early = {
+	.offset = 0x24000,
+	.regs = clk_alpha_pll_regs[CLK_ALPHA_PLL_TYPE_DEFAULT],
+	.clkr = {
+		.enable_reg = 0x45000,
+		.enable_mask = BIT(5),
+		.hw.init = &(struct clk_init_data){
+			.name = "gpll4_early",
+			.parent_data = &(const struct clk_parent_data) {
+				.fw_name = "xo",
+			},
+			.num_parents = 1,
+			.ops = &clk_alpha_pll_fixed_ops,
+		},
+	},
+};
+
+static struct clk_alpha_pll_postdiv gpll4 = {
+	.offset = 0x24000,
+	.regs = clk_alpha_pll_regs[CLK_ALPHA_PLL_TYPE_DEFAULT],
+	.clkr.hw.init = &(struct clk_init_data){
+		.name = "gpll4",
+		.parent_hws = (const struct clk_hw*[]){
+			&gpll4_early.clkr.hw,
+		},
+		.num_parents = 1,
+		.ops = &clk_alpha_pll_postdiv_ro_ops,
+	},
+};
+
+static struct clk_alpha_pll gpll6_early = {
+	.offset = 0x37000,
+	.regs = clk_alpha_pll_regs[CLK_ALPHA_PLL_TYPE_DEFAULT],
+	.clkr = {
+		.enable_reg = 0x45000,
+		.enable_mask = BIT(7),
+		.hw.init = &(struct clk_init_data){
+			.name = "gpll6_early",
+			.parent_data = &(const struct clk_parent_data) {
+				.fw_name = "xo",
+			},
+			.num_parents = 1,
+			.ops = &clk_alpha_pll_fixed_ops,
+		},
+	},
+};
+
+static struct clk_fixed_factor gpll6_early_div = {
+	.mult = 1,
+	.div = 2,
+	.hw.init = &(struct clk_init_data){
+		.name = "gpll6_early_div",
+		.parent_hws = (const struct clk_hw*[]){
+			&gpll6_early.clkr.hw,
+		},
+		.num_parents = 1,
+		.ops = &clk_fixed_factor_ops,
+	},
+};
+
+static struct clk_alpha_pll_postdiv gpll6 = {
+	.offset = 0x37000,
+	.regs = clk_alpha_pll_regs[CLK_ALPHA_PLL_TYPE_DEFAULT],
+	.clkr.hw.init = &(struct clk_init_data){
+		.name = "gpll6",
+		.parent_hws = (const struct clk_hw*[]){
+			&gpll6_early.clkr.hw,
+		},
+		.num_parents = 1,
+		.ops = &clk_alpha_pll_postdiv_ro_ops,
+	},
+};
+
+static const struct parent_map gcc_xo_gpll0_gpll0div2_2_map[] = {
+	{ P_XO, 0 },
+	{ P_GPLL0, 1 },
+	{ P_GPLL0_DIV2, 2 },
+};
+
+static const struct parent_map gcc_xo_gpll0_gpll0div2_4_map[] = {
+	{ P_XO, 0 },
+	{ P_GPLL0, 1 },
+	{ P_GPLL0_DIV2, 4 },
+};
+
+static const struct clk_parent_data gcc_xo_gpll0_gpll0div2_data[] = {
+	{ .fw_name = "xo" },
+	{ .hw = &gpll0.clkr.hw },
+	{ .hw = &gpll0_early_div.hw },
+};
+
+static const struct parent_map gcc_apc_droop_detector_map[] = {
+	{ P_XO, 0 },
+	{ P_GPLL0, 1 },
+	{ P_GPLL4, 2 },
+};
+
+static const struct clk_parent_data gcc_apc_droop_detector_data[] = {
+	{ .fw_name = "xo" },
+	{ .hw = &gpll0.clkr.hw },
+	{ .hw = &gpll4.clkr.hw },
+};
+
+static const struct freq_tbl ftbl_apc_droop_detector_clk_src[] = {
+	F(19200000, P_XO, 1, 0, 0),
+	F(400000000, P_GPLL0, 2, 0, 0),
+	F(576000000, P_GPLL4, 2, 0, 0),
+	{ }
+};
+
+static struct clk_rcg2 apc0_droop_detector_clk_src = {
+	.cmd_rcgr = 0x78008,
+	.hid_width = 5,
+	.freq_tbl = ftbl_apc_droop_detector_clk_src,
+	.parent_map = gcc_apc_droop_detector_map,
+	.clkr.hw.init = &(struct clk_init_data) {
+		.name = "apc0_droop_detector_clk_src",
+		.parent_data = gcc_apc_droop_detector_data,
+		.num_parents = ARRAY_SIZE(gcc_apc_droop_detector_data),
+		.ops = &clk_rcg2_ops,
+	}
+};
+static struct clk_rcg2 apc1_droop_detector_clk_src = {
+	.cmd_rcgr = 0x79008,
+	.hid_width = 5,
+	.freq_tbl = ftbl_apc_droop_detector_clk_src,
+	.parent_map = gcc_apc_droop_detector_map,
+	.clkr.hw.init = &(struct clk_init_data) {
+		.name = "apc1_droop_detector_clk_src",
+		.parent_data = gcc_apc_droop_detector_data,
+		.num_parents = ARRAY_SIZE(gcc_apc_droop_detector_data),
+		.ops = &clk_rcg2_ops,
+	}
+};
+
+static const struct freq_tbl ftbl_apss_ahb_clk_src[] = {
+	F(19200000, P_XO, 1, 0, 0),
+	F(25000000, P_GPLL0_DIV2, 16, 0, 0),
+	F(50000000, P_GPLL0, 16, 0, 0),
+	F(100000000, P_GPLL0, 8, 0, 0),
+	F(133330000, P_GPLL0, 6, 0, 0),
+	{ }
+};
+
+static struct clk_rcg2 apss_ahb_clk_src = {
+	.cmd_rcgr = 0x46000,
+	.hid_width = 5,
+	.freq_tbl = ftbl_apss_ahb_clk_src,
+	.parent_map = gcc_xo_gpll0_gpll0div2_4_map,
+	.clkr.hw.init = &(struct clk_init_data) {
+		.name = "apss_ahb_clk_src",
+		.parent_data = gcc_xo_gpll0_gpll0div2_data,
+		.num_parents = ARRAY_SIZE(gcc_xo_gpll0_gpll0div2_data),
+		.ops = &clk_rcg2_ops,
+	}
+};
+
+static const struct freq_tbl ftbl_blsp_i2c_apps_clk_src[] = {
+	F(19200000, P_XO, 1, 0, 0),
+	F(25000000, P_GPLL0_DIV2, 16, 0, 0),
+	F(50000000, P_GPLL0, 16, 0, 0),
+	{ }
+};
+
+static struct clk_rcg2 blsp1_qup1_i2c_apps_clk_src = {
+	.cmd_rcgr = 0x0200c,
+	.hid_width = 5,
+	.freq_tbl = ftbl_blsp_i2c_apps_clk_src,
+	.parent_map = gcc_xo_gpll0_gpll0div2_2_map,
+	.clkr.hw.init = &(struct clk_init_data) {
+		.name = "blsp1_qup1_i2c_apps_clk_src",
+		.parent_data = gcc_xo_gpll0_gpll0div2_data,
+		.num_parents = ARRAY_SIZE(gcc_xo_gpll0_gpll0div2_data),
+		.ops = &clk_rcg2_ops,
+	}
+};
+
+static struct clk_rcg2 blsp1_qup2_i2c_apps_clk_src = {
+	.cmd_rcgr = 0x03000,
+	.hid_width = 5,
+	.freq_tbl = ftbl_blsp_i2c_apps_clk_src,
+	.parent_map = gcc_xo_gpll0_gpll0div2_2_map,
+	.clkr.hw.init = &(struct clk_init_data) {
+		.name = "blsp1_qup2_i2c_apps_clk_src",
+		.parent_data = gcc_xo_gpll0_gpll0div2_data,
+		.num_parents = ARRAY_SIZE(gcc_xo_gpll0_gpll0div2_data),
+		.ops = &clk_rcg2_ops,
+	}
+};
+
+static struct clk_rcg2 blsp1_qup3_i2c_apps_clk_src = {
+	.cmd_rcgr = 0x04000,
+	.hid_width = 5,
+	.freq_tbl = ftbl_blsp_i2c_apps_clk_src,
+	.parent_map = gcc_xo_gpll0_gpll0div2_2_map,
+	.clkr.hw.init = &(struct clk_init_data) {
+		.name = "blsp1_qup3_i2c_apps_clk_src",
+		.parent_data = gcc_xo_gpll0_gpll0div2_data,
+		.num_parents = ARRAY_SIZE(gcc_xo_gpll0_gpll0div2_data),
+		.ops = &clk_rcg2_ops,
+	}
+};
+
+static struct clk_rcg2 blsp1_qup4_i2c_apps_clk_src = {
+	.cmd_rcgr = 0x05000,
+	.hid_width = 5,
+	.freq_tbl = ftbl_blsp_i2c_apps_clk_src,
+	.parent_map = gcc_xo_gpll0_gpll0div2_2_map,
+	.clkr.hw.init = &(struct clk_init_data) {
+		.name = "blsp1_qup4_i2c_apps_clk_src",
+		.parent_data = gcc_xo_gpll0_gpll0div2_data,
+		.num_parents = ARRAY_SIZE(gcc_xo_gpll0_gpll0div2_data),
+		.ops = &clk_rcg2_ops,
+	}
+};
+
+static struct clk_rcg2 blsp2_qup1_i2c_apps_clk_src = {
+	.cmd_rcgr = 0x0c00c,
+	.hid_width = 5,
+	.freq_tbl = ftbl_blsp_i2c_apps_clk_src,
+	.parent_map = gcc_xo_gpll0_gpll0div2_2_map,
+	.clkr.hw.init = &(struct clk_init_data) {
+		.name = "blsp2_qup1_i2c_apps_clk_src",
+		.parent_data = gcc_xo_gpll0_gpll0div2_data,
+		.num_parents = ARRAY_SIZE(gcc_xo_gpll0_gpll0div2_data),
+		.ops = &clk_rcg2_ops,
+	}
+};
+
+static struct clk_rcg2 blsp2_qup2_i2c_apps_clk_src = {
+	.cmd_rcgr = 0x0d000,
+	.hid_width = 5,
+	.freq_tbl = ftbl_blsp_i2c_apps_clk_src,
+	.parent_map = gcc_xo_gpll0_gpll0div2_2_map,
+	.clkr.hw.init = &(struct clk_init_data) {
+		.name = "blsp2_qup2_i2c_apps_clk_src",
+		.parent_data = gcc_xo_gpll0_gpll0div2_data,
+		.num_parents = ARRAY_SIZE(gcc_xo_gpll0_gpll0div2_data),
+		.ops = &clk_rcg2_ops,
+	}
+};
+
+static struct clk_rcg2 blsp2_qup3_i2c_apps_clk_src = {
+	.cmd_rcgr = 0x0f000,
+	.hid_width = 5,
+	.freq_tbl = ftbl_blsp_i2c_apps_clk_src,
+	.parent_map = gcc_xo_gpll0_gpll0div2_2_map,
+	.clkr.hw.init = &(struct clk_init_data) {
+		.name = "blsp2_qup3_i2c_apps_clk_src",
+		.parent_data = gcc_xo_gpll0_gpll0div2_data,
+		.num_parents = ARRAY_SIZE(gcc_xo_gpll0_gpll0div2_data),
+		.ops = &clk_rcg2_ops,
+	}
+};
+
+static struct clk_rcg2 blsp2_qup4_i2c_apps_clk_src = {
+	.cmd_rcgr = 0x18000,
+	.hid_width = 5,
+	.freq_tbl = ftbl_blsp_i2c_apps_clk_src,
+	.parent_map = gcc_xo_gpll0_gpll0div2_2_map,
+	.clkr.hw.init = &(struct clk_init_data) {
+		.name = "blsp2_qup4_i2c_apps_clk_src",
+		.parent_data = gcc_xo_gpll0_gpll0div2_data,
+		.num_parents = ARRAY_SIZE(gcc_xo_gpll0_gpll0div2_data),
+		.ops = &clk_rcg2_ops,
+	}
+};
+
+static const struct freq_tbl ftbl_blsp_spi_apps_clk_src[] = {
+	F(960000, P_XO, 10, 1, 2),
+	F(4800000, P_XO, 4, 0, 0),
+	F(9600000, P_XO, 2, 0, 0),
+	F(12500000, P_GPLL0_DIV2, 16, 1, 2),
+	F(16000000, P_GPLL0, 10, 1, 5),
+	F(19200000, P_XO, 1, 0, 0),
+	F(25000000, P_GPLL0, 16, 1, 2),
+	F(50000000, P_GPLL0, 16, 0, 0),
+	{ }
+};
+
+static struct clk_rcg2 blsp1_qup1_spi_apps_clk_src = {
+	.cmd_rcgr = 0x02024,
+	.hid_width = 5,
+	.mnd_width = 8,
+	.freq_tbl = ftbl_blsp_spi_apps_clk_src,
+	.parent_map = gcc_xo_gpll0_gpll0div2_2_map,
+	.clkr.hw.init = &(struct clk_init_data) {
+		.name = "blsp1_qup1_spi_apps_clk_src",
+		.parent_data = gcc_xo_gpll0_gpll0div2_data,
+		.num_parents = ARRAY_SIZE(gcc_xo_gpll0_gpll0div2_data),
+		.ops = &clk_rcg2_ops,
+	}
+};
+
+static struct clk_rcg2 blsp1_qup2_spi_apps_clk_src = {
+	.cmd_rcgr = 0x03014,
+	.hid_width = 5,
+	.mnd_width = 8,
+	.freq_tbl = ftbl_blsp_spi_apps_clk_src,
+	.parent_map = gcc_xo_gpll0_gpll0div2_2_map,
+	.clkr.hw.init = &(struct clk_init_data) {
+		.name = "blsp1_qup2_spi_apps_clk_src",
+		.parent_data = gcc_xo_gpll0_gpll0div2_data,
+		.num_parents = ARRAY_SIZE(gcc_xo_gpll0_gpll0div2_data),
+		.ops = &clk_rcg2_ops,
+	}
+};
+
+static struct clk_rcg2 blsp1_qup3_spi_apps_clk_src = {
+	.cmd_rcgr = 0x04024,
+	.hid_width = 5,
+	.mnd_width = 8,
+	.freq_tbl = ftbl_blsp_spi_apps_clk_src,
+	.parent_map = gcc_xo_gpll0_gpll0div2_2_map,
+	.clkr.hw.init = &(struct clk_init_data) {
+		.name = "blsp1_qup3_spi_apps_clk_src",
+		.parent_data = gcc_xo_gpll0_gpll0div2_data,
+		.num_parents = ARRAY_SIZE(gcc_xo_gpll0_gpll0div2_data),
+		.ops = &clk_rcg2_ops,
+	}
+};
+
+static struct clk_rcg2 blsp1_qup4_spi_apps_clk_src = {
+	.cmd_rcgr = 0x05024,
+	.hid_width = 5,
+	.mnd_width = 8,
+	.freq_tbl = ftbl_blsp_spi_apps_clk_src,
+	.parent_map = gcc_xo_gpll0_gpll0div2_2_map,
+	.clkr.hw.init = &(struct clk_init_data) {
+		.name = "blsp1_qup4_spi_apps_clk_src",
+		.parent_data = gcc_xo_gpll0_gpll0div2_data,
+		.num_parents = ARRAY_SIZE(gcc_xo_gpll0_gpll0div2_data),
+		.ops = &clk_rcg2_ops,
+	}
+};
+
+static struct clk_rcg2 blsp2_qup1_spi_apps_clk_src = {
+	.cmd_rcgr = 0x0c024,
+	.hid_width = 5,
+	.mnd_width = 8,
+	.freq_tbl = ftbl_blsp_spi_apps_clk_src,
+	.parent_map = gcc_xo_gpll0_gpll0div2_2_map,
+	.clkr.hw.init = &(struct clk_init_data) {
+		.name = "blsp2_qup1_spi_apps_clk_src",
+		.parent_data = gcc_xo_gpll0_gpll0div2_data,
+		.num_parents = ARRAY_SIZE(gcc_xo_gpll0_gpll0div2_data),
+		.ops = &clk_rcg2_ops,
+	}
+};
+
+static struct clk_rcg2 blsp2_qup2_spi_apps_clk_src = {
+	.cmd_rcgr = 0x0d014,
+	.hid_width = 5,
+	.mnd_width = 8,
+	.freq_tbl = ftbl_blsp_spi_apps_clk_src,
+	.parent_map = gcc_xo_gpll0_gpll0div2_2_map,
+	.clkr.hw.init = &(struct clk_init_data) {
+		.name = "blsp2_qup2_spi_apps_clk_src",
+		.parent_data = gcc_xo_gpll0_gpll0div2_data,
+		.num_parents = ARRAY_SIZE(gcc_xo_gpll0_gpll0div2_data),
+		.ops = &clk_rcg2_ops,
+	}
+};
+
+static struct clk_rcg2 blsp2_qup3_spi_apps_clk_src = {
+	.cmd_rcgr = 0x0f024,
+	.hid_width = 5,
+	.mnd_width = 8,
+	.freq_tbl = ftbl_blsp_spi_apps_clk_src,
+	.parent_map = gcc_xo_gpll0_gpll0div2_2_map,
+	.clkr.hw.init = &(struct clk_init_data) {
+		.name = "blsp2_qup3_spi_apps_clk_src",
+		.parent_data = gcc_xo_gpll0_gpll0div2_data,
+		.num_parents = ARRAY_SIZE(gcc_xo_gpll0_gpll0div2_data),
+		.ops = &clk_rcg2_ops,
+	}
+};
+
+static struct clk_rcg2 blsp2_qup4_spi_apps_clk_src = {
+	.cmd_rcgr = 0x18024,
+	.hid_width = 5,
+	.mnd_width = 8,
+	.freq_tbl = ftbl_blsp_spi_apps_clk_src,
+	.parent_map = gcc_xo_gpll0_gpll0div2_2_map,
+	.clkr.hw.init = &(struct clk_init_data) {
+		.name = "blsp2_qup4_spi_apps_clk_src",
+		.parent_data = gcc_xo_gpll0_gpll0div2_data,
+		.num_parents = ARRAY_SIZE(gcc_xo_gpll0_gpll0div2_data),
+		.ops = &clk_rcg2_ops,
+	}
+};
+
+static const struct freq_tbl ftbl_blsp_uart_apps_clk_src[] = {
+	F(3686400, P_GPLL0_DIV2, 1, 144, 15625),
+	F(7372800, P_GPLL0_DIV2, 1, 288, 15625),
+	F(14745600, P_GPLL0_DIV2, 1, 576, 15625),
+	F(16000000, P_GPLL0_DIV2, 5, 1, 5),
+	F(19200000, P_XO, 1, 0, 0),
+	F(24000000, P_GPLL0, 1, 3, 100),
+	F(25000000, P_GPLL0, 16, 1, 2),
+	F(32000000, P_GPLL0, 1, 1, 25),
+	F(40000000, P_GPLL0, 1, 1, 20),
+	F(46400000, P_GPLL0, 1, 29, 500),
+	F(48000000, P_GPLL0, 1, 3, 50),
+	F(51200000, P_GPLL0, 1, 8, 125),
+	F(56000000, P_GPLL0, 1, 7, 100),
+	F(58982400, P_GPLL0, 1, 1152, 15625),
+	F(60000000, P_GPLL0, 1, 3, 40),
+	F(64000000, P_GPLL0, 1, 2, 25),
+	{ }
+};
+
+static struct clk_rcg2 blsp1_uart1_apps_clk_src = {
+	.cmd_rcgr = 0x02044,
+	.hid_width = 5,
+	.mnd_width = 16,
+	.freq_tbl = ftbl_blsp_uart_apps_clk_src,
+	.parent_map = gcc_xo_gpll0_gpll0div2_4_map,
+	.clkr.hw.init = &(struct clk_init_data) {
+		.name = "blsp1_uart1_apps_clk_src",
+		.parent_data = gcc_xo_gpll0_gpll0div2_data,
+		.num_parents = ARRAY_SIZE(gcc_xo_gpll0_gpll0div2_data),
+		.ops = &clk_rcg2_ops,
+	}
+};
+
+static struct clk_rcg2 blsp1_uart2_apps_clk_src = {
+	.cmd_rcgr = 0x03034,
+	.hid_width = 5,
+	.mnd_width = 16,
+	.freq_tbl = ftbl_blsp_uart_apps_clk_src,
+	.parent_map = gcc_xo_gpll0_gpll0div2_4_map,
+	.clkr.hw.init = &(struct clk_init_data) {
+		.name = "blsp1_uart2_apps_clk_src",
+		.parent_data = gcc_xo_gpll0_gpll0div2_data,
+		.num_parents = ARRAY_SIZE(gcc_xo_gpll0_gpll0div2_data),
+		.ops = &clk_rcg2_ops,
+	}
+};
+
+static struct clk_rcg2 blsp2_uart1_apps_clk_src = {
+	.cmd_rcgr = 0x0c044,
+	.hid_width = 5,
+	.mnd_width = 16,
+	.freq_tbl = ftbl_blsp_uart_apps_clk_src,
+	.parent_map = gcc_xo_gpll0_gpll0div2_4_map,
+	.clkr.hw.init = &(struct clk_init_data) {
+		.name = "blsp2_uart1_apps_clk_src",
+		.parent_data = gcc_xo_gpll0_gpll0div2_data,
+		.num_parents = ARRAY_SIZE(gcc_xo_gpll0_gpll0div2_data),
+		.ops = &clk_rcg2_ops,
+	}
+};
+
+static struct clk_rcg2 blsp2_uart2_apps_clk_src = {
+	.cmd_rcgr = 0x0d034,
+	.hid_width = 5,
+	.mnd_width = 16,
+	.freq_tbl = ftbl_blsp_uart_apps_clk_src,
+	.parent_map = gcc_xo_gpll0_gpll0div2_4_map,
+	.clkr.hw.init = &(struct clk_init_data) {
+		.name = "blsp2_uart2_apps_clk_src",
+		.parent_data = gcc_xo_gpll0_gpll0div2_data,
+		.num_parents = ARRAY_SIZE(gcc_xo_gpll0_gpll0div2_data),
+		.ops = &clk_rcg2_ops,
+	}
+};
+
+static const struct parent_map gcc_byte0_map[] = {
+	{ P_XO, 0 },
+	{ P_DSI0PLL_BYTE, 1 },
+	{ P_DSI1PLL_BYTE, 3 },
+};
+
+static const struct parent_map gcc_byte1_map[] = {
+	{ P_XO, 0 },
+	{ P_DSI0PLL_BYTE, 3 },
+	{ P_DSI1PLL_BYTE, 1 },
+};
+
+static const struct clk_parent_data gcc_byte_data[] = {
+	{ .fw_name = "xo" },
+	{ .fw_name = "dsi0pllbyte", .name = "dsi0pllbyte" },
+	{ .fw_name = "dsi1pllbyte", .name = "dsi1pllbyte" },
+};
+
+static struct clk_rcg2 byte0_clk_src = {
+	.cmd_rcgr = 0x4d044,
+	.hid_width = 5,
+	.parent_map = gcc_byte0_map,
+	.clkr.hw.init = &(struct clk_init_data) {
+		.name = "byte0_clk_src",
+		.parent_data = gcc_byte_data,
+		.num_parents = ARRAY_SIZE(gcc_byte_data),
+		.ops = &clk_byte2_ops,
+		.flags = CLK_SET_RATE_PARENT,
+	}
+};
+
+static struct clk_rcg2 byte1_clk_src = {
+	.cmd_rcgr = 0x4d0b0,
+	.hid_width = 5,
+	.parent_map = gcc_byte1_map,
+	.clkr.hw.init = &(struct clk_init_data) {
+		.name = "byte1_clk_src",
+		.parent_data = gcc_byte_data,
+		.num_parents = ARRAY_SIZE(gcc_byte_data),
+		.ops = &clk_byte2_ops,
+		.flags = CLK_SET_RATE_PARENT,
+	}
+};
+
+static const struct parent_map gcc_gp_map[] = {
+	{ P_XO, 0 },
+	{ P_GPLL0, 1 },
+	{ P_GPLL6, 2 },
+	{ P_GPLL0_DIV2, 4 },
+	{ P_SLEEP_CLK, 6 },
+};
+
+static const struct clk_parent_data gcc_gp_data[] = {
+	{ .fw_name = "xo" },
+	{ .hw = &gpll0.clkr.hw },
+	{ .hw = &gpll6.clkr.hw },
+	{ .hw = &gpll0_early_div.hw },
+	{ .fw_name = "sleep", .name = "sleep" },
+};
+
+static const struct freq_tbl ftbl_camss_gp_clk_src[] = {
+	F(50000000, P_GPLL0_DIV2, 8, 0, 0),
+	F(100000000, P_GPLL0, 8, 0, 0),
+	F(200000000, P_GPLL0, 4, 0, 0),
+	F(266670000, P_GPLL0, 3, 0, 0),
+	{ }
+};
+
+static struct clk_rcg2 camss_gp0_clk_src = {
+	.cmd_rcgr = 0x54000,
+	.hid_width = 5,
+	.mnd_width = 8,
+	.freq_tbl = ftbl_camss_gp_clk_src,
+	.parent_map = gcc_gp_map,
+	.clkr.hw.init = &(struct clk_init_data) {
+		.name = "camss_gp0_clk_src",
+		.parent_data = gcc_gp_data,
+		.num_parents = ARRAY_SIZE(gcc_gp_data),
+		.ops = &clk_rcg2_ops,
+	}
+};
+
+static struct clk_rcg2 camss_gp1_clk_src = {
+	.cmd_rcgr = 0x55000,
+	.hid_width = 5,
+	.mnd_width = 8,
+	.freq_tbl = ftbl_camss_gp_clk_src,
+	.parent_map = gcc_gp_map,
+	.clkr.hw.init = &(struct clk_init_data) {
+		.name = "camss_gp1_clk_src",
+		.parent_data = gcc_gp_data,
+		.num_parents = ARRAY_SIZE(gcc_gp_data),
+		.ops = &clk_rcg2_ops,
+	}
+};
+
+static const struct freq_tbl ftbl_camss_top_ahb_clk_src[] = {
+	F(40000000, P_GPLL0_DIV2, 10, 0, 0),
+	F(80000000, P_GPLL0, 10, 0, 0),
+	{ }
+};
+
+static struct clk_rcg2 camss_top_ahb_clk_src = {
+	.cmd_rcgr = 0x5a000,
+	.hid_width = 5,
+	.freq_tbl = ftbl_camss_top_ahb_clk_src,
+	.parent_map = gcc_xo_gpll0_gpll0div2_2_map,
+	.clkr.hw.init = &(struct clk_init_data) {
+		.name = "camss_top_ahb_clk_src",
+		.parent_data = gcc_xo_gpll0_gpll0div2_data,
+		.num_parents = ARRAY_SIZE(gcc_xo_gpll0_gpll0div2_data),
+		.ops = &clk_rcg2_ops,
+	}
+};
+
+static const struct parent_map gcc_cci_map[] = {
+	{ P_XO, 0 },
+	{ P_GPLL0, 2 },
+	{ P_GPLL0_DIV2, 3 },
+	{ P_SLEEP_CLK, 6 },
+};
+
+static const struct clk_parent_data gcc_cci_data[] = {
+	{ .fw_name = "xo" },
+	{ .hw = &gpll0.clkr.hw },
+	{ .hw = &gpll0_early_div.hw },
+	{ .fw_name = "sleep", .name = "sleep" },
+};
+
+static const struct freq_tbl ftbl_cci_clk_src[] = {
+	F(19200000, P_XO, 1, 0, 0),
+	F(37500000, P_GPLL0_DIV2, 1, 3, 32),
+	{ }
+};
+
+static struct clk_rcg2 cci_clk_src = {
+	.cmd_rcgr = 0x51000,
+	.hid_width = 5,
+	.mnd_width = 8,
+	.freq_tbl = ftbl_cci_clk_src,
+	.parent_map = gcc_cci_map,
+	.clkr.hw.init = &(struct clk_init_data) {
+		.name = "cci_clk_src",
+		.parent_data = gcc_cci_data,
+		.num_parents = ARRAY_SIZE(gcc_cci_data),
+		.ops = &clk_rcg2_ops,
+	}
+};
+
+static const struct parent_map gcc_cpp_map[] = {
+	{ P_XO, 0 },
+	{ P_GPLL0, 1 },
+	{ P_GPLL6, 3 },
+	{ P_GPLL2, 4 },
+	{ P_GPLL0_DIV2, 5 },
+};
+
+static const struct clk_parent_data gcc_cpp_data[] = {
+	{ .fw_name = "xo" },
+	{ .hw = &gpll0.clkr.hw },
+	{ .hw = &gpll6.clkr.hw },
+	{ .hw = &gpll2.clkr.hw },
+	{ .hw = &gpll0_early_div.hw },
+};
+
+static const struct freq_tbl ftbl_cpp_clk_src[] = {
+	F(100000000, P_GPLL0_DIV2, 4, 0, 0),
+	F(200000000, P_GPLL0, 4, 0, 0),
+	F(266670000, P_GPLL0, 3, 0, 0),
+	F(320000000, P_GPLL0, 2.5, 0, 0),
+	F(400000000, P_GPLL0, 2, 0, 0),
+	F(465000000, P_GPLL2, 2, 0, 0),
+	{ }
+};
+
+static struct clk_rcg2 cpp_clk_src = {
+	.cmd_rcgr = 0x58018,
+	.hid_width = 5,
+	.freq_tbl = ftbl_cpp_clk_src,
+	.parent_map = gcc_cpp_map,
+	.clkr.hw.init = &(struct clk_init_data) {
+		.name = "cpp_clk_src",
+		.parent_data = gcc_cpp_data,
+		.num_parents = ARRAY_SIZE(gcc_cpp_data),
+		.ops = &clk_rcg2_ops,
+	}
+};
+
+static const struct freq_tbl ftbl_crypto_clk_src[] = {
+	F(40000000, P_GPLL0_DIV2, 10, 0, 0),
+	F(80000000, P_GPLL0, 10, 0, 0),
+	F(100000000, P_GPLL0, 8, 0, 0),
+	F(160000000, P_GPLL0, 5, 0, 0),
+	{ }
+};
+
+static struct clk_rcg2 crypto_clk_src = {
+	.cmd_rcgr = 0x16004,
+	.hid_width = 5,
+	.freq_tbl = ftbl_crypto_clk_src,
+	.parent_map = gcc_xo_gpll0_gpll0div2_4_map,
+	.clkr.hw.init = &(struct clk_init_data) {
+		.name = "crypto_clk_src",
+		.parent_data = gcc_xo_gpll0_gpll0div2_data,
+		.num_parents = ARRAY_SIZE(gcc_xo_gpll0_gpll0div2_data),
+		.ops = &clk_rcg2_ops,
+	}
+};
+
+static const struct parent_map gcc_csi0_map[] = {
+	{ P_XO, 0 },
+	{ P_GPLL0, 1 },
+	{ P_GPLL2, 4 },
+	{ P_GPLL0_DIV2, 5 },
+};
+
+static const struct parent_map gcc_csi12_map[] = {
+	{ P_XO, 0 },
+	{ P_GPLL0, 1 },
+	{ P_GPLL2, 5 },
+	{ P_GPLL0_DIV2, 4 },
+};
+
+static const struct clk_parent_data gcc_csi_data[] = {
+	{ .fw_name = "xo" },
+	{ .hw = &gpll0.clkr.hw },
+	{ .hw = &gpll2.clkr.hw },
+	{ .hw = &gpll0_early_div.hw },
+};
+
+static const struct freq_tbl ftbl_csi_clk_src[] = {
+	F(100000000, P_GPLL0_DIV2, 4, 0, 0),
+	F(200000000, P_GPLL0, 4, 0, 0),
+	F(310000000, P_GPLL2, 3, 0, 0),
+	F(400000000, P_GPLL0, 2, 0, 0),
+	F(465000000, P_GPLL2, 2, 0, 0),
+	{ }
+};
+
+static struct clk_rcg2 csi0_clk_src = {
+	.cmd_rcgr = 0x4e020,
+	.hid_width = 5,
+	.freq_tbl = ftbl_csi_clk_src,
+	.parent_map = gcc_csi0_map,
+	.clkr.hw.init = &(struct clk_init_data) {
+		.name = "csi0_clk_src",
+		.parent_data = gcc_csi_data,
+		.num_parents = ARRAY_SIZE(gcc_csi_data),
+		.ops = &clk_rcg2_ops,
+	}
+};
+
+static struct clk_rcg2 csi1_clk_src = {
+	.cmd_rcgr = 0x4f020,
+	.hid_width = 5,
+	.freq_tbl = ftbl_csi_clk_src,
+	.parent_map = gcc_csi12_map,
+	.clkr.hw.init = &(struct clk_init_data) {
+		.name = "csi1_clk_src",
+		.parent_data = gcc_csi_data,
+		.num_parents = ARRAY_SIZE(gcc_csi_data),
+		.ops = &clk_rcg2_ops,
+	}
+};
+
+static struct clk_rcg2 csi2_clk_src = {
+	.cmd_rcgr = 0x3c020,
+	.hid_width = 5,
+	.freq_tbl = ftbl_csi_clk_src,
+	.parent_map = gcc_csi12_map,
+	.clkr.hw.init = &(struct clk_init_data) {
+		.name = "csi2_clk_src",
+		.parent_data = gcc_csi_data,
+		.num_parents = ARRAY_SIZE(gcc_csi_data),
+		.ops = &clk_rcg2_ops,
+	}
+};
+
+static const struct parent_map gcc_csip_map[] = {
+	{ P_XO, 0 },
+	{ P_GPLL0, 1 },
+	{ P_GPLL4, 3 },
+	{ P_GPLL2, 4 },
+	{ P_GPLL0_DIV2, 5 },
+};
+
+static const struct clk_parent_data gcc_csip_data[] = {
+	{ .fw_name = "xo" },
+	{ .hw = &gpll0.clkr.hw },
+	{ .hw = &gpll4.clkr.hw },
+	{ .hw = &gpll2.clkr.hw },
+	{ .hw = &gpll0_early_div.hw },
+};
+
+static const struct freq_tbl ftbl_csi_p_clk_src[] = {
+	F(66670000, P_GPLL0_DIV2, 6, 0, 0),
+	F(133330000, P_GPLL0, 6, 0, 0),
+	F(200000000, P_GPLL0, 4, 0, 0),
+	F(266670000, P_GPLL0, 3, 0, 0),
+	F(310000000, P_GPLL2, 3, 0, 0),
+	{ }
+};
+
+static struct clk_rcg2 csi0p_clk_src = {
+	.cmd_rcgr = 0x58084,
+	.hid_width = 5,
+	.freq_tbl = ftbl_csi_p_clk_src,
+	.parent_map = gcc_csip_map,
+	.clkr.hw.init = &(struct clk_init_data) {
+		.name = "csi0p_clk_src",
+		.parent_data = gcc_csip_data,
+		.num_parents = ARRAY_SIZE(gcc_csip_data),
+		.ops = &clk_rcg2_ops,
+	}
+};
+
+static struct clk_rcg2 csi1p_clk_src = {
+	.cmd_rcgr = 0x58094,
+	.hid_width = 5,
+	.freq_tbl = ftbl_csi_p_clk_src,
+	.parent_map = gcc_csip_map,
+	.clkr.hw.init = &(struct clk_init_data) {
+		.name = "csi1p_clk_src",
+		.parent_data = gcc_csip_data,
+		.num_parents = ARRAY_SIZE(gcc_csip_data),
+		.ops = &clk_rcg2_ops,
+	}
+};
+
+static struct clk_rcg2 csi2p_clk_src = {
+	.cmd_rcgr = 0x580a4,
+	.hid_width = 5,
+	.freq_tbl = ftbl_csi_p_clk_src,
+	.parent_map = gcc_csip_map,
+	.clkr.hw.init = &(struct clk_init_data) {
+		.name = "csi2p_clk_src",
+		.parent_data = gcc_csip_data,
+		.num_parents = ARRAY_SIZE(gcc_csip_data),
+		.ops = &clk_rcg2_ops,
+	}
+};
+
+static const struct freq_tbl ftbl_csi_phytimer_clk_src[] = {
+	F(100000000, P_GPLL0_DIV2, 4, 0, 0),
+	F(200000000, P_GPLL0, 4, 0, 0),
+	F(266670000, P_GPLL0, 3, 0, 0),
+	{ }
+};
+
+static struct clk_rcg2 csi0phytimer_clk_src = {
+	.cmd_rcgr = 0x4e000,
+	.hid_width = 5,
+	.freq_tbl = ftbl_csi_phytimer_clk_src,
+	.parent_map = gcc_xo_gpll0_gpll0div2_2_map,
+	.clkr.hw.init = &(struct clk_init_data) {
+		.name = "csi0phytimer_clk_src",
+		.parent_data = gcc_xo_gpll0_gpll0div2_data,
+		.num_parents = ARRAY_SIZE(gcc_xo_gpll0_gpll0div2_data),
+		.ops = &clk_rcg2_ops,
+	}
+};
+
+static struct clk_rcg2 csi1phytimer_clk_src = {
+	.cmd_rcgr = 0x4f000,
+	.hid_width = 5,
+	.freq_tbl = ftbl_csi_phytimer_clk_src,
+	.parent_map = gcc_xo_gpll0_gpll0div2_2_map,
+	.clkr.hw.init = &(struct clk_init_data) {
+		.name = "csi1phytimer_clk_src",
+		.parent_data = gcc_xo_gpll0_gpll0div2_data,
+		.num_parents = ARRAY_SIZE(gcc_xo_gpll0_gpll0div2_data),
+		.ops = &clk_rcg2_ops,
+	}
+};
+
+static struct clk_rcg2 csi2phytimer_clk_src = {
+	.cmd_rcgr = 0x4f05c,
+	.hid_width = 5,
+	.freq_tbl = ftbl_csi_phytimer_clk_src,
+	.parent_map = gcc_xo_gpll0_gpll0div2_2_map,
+	.clkr.hw.init = &(struct clk_init_data) {
+		.name = "csi2phytimer_clk_src",
+		.parent_data = gcc_xo_gpll0_gpll0div2_data,
+		.num_parents = ARRAY_SIZE(gcc_xo_gpll0_gpll0div2_data),
+		.ops = &clk_rcg2_ops,
+	}
+};
+
+static const struct parent_map gcc_esc_map[] = {
+	{ P_XO, 0 },
+	{ P_GPLL0, 3 },
+};
+
+static const struct clk_parent_data gcc_esc_vsync_data[] = {
+	{ .fw_name = "xo" },
+	{ .hw = &gpll0.clkr.hw },
+};
+
+static const struct freq_tbl ftbl_esc0_1_clk_src[] = {
+	F(19200000, P_XO, 1, 0, 0),
+	{ }
+};
+
+static struct clk_rcg2 esc0_clk_src = {
+	.cmd_rcgr = 0x4d05c,
+	.hid_width = 5,
+	.freq_tbl = ftbl_esc0_1_clk_src,
+	.parent_map = gcc_esc_map,
+	.clkr.hw.init = &(struct clk_init_data) {
+		.name = "esc0_clk_src",
+		.parent_data = gcc_esc_vsync_data,
+		.num_parents = ARRAY_SIZE(gcc_esc_vsync_data),
+		.ops = &clk_rcg2_ops,
+	}
+};
+
+static struct clk_rcg2 esc1_clk_src = {
+	.cmd_rcgr = 0x4d0a8,
+	.hid_width = 5,
+	.freq_tbl = ftbl_esc0_1_clk_src,
+	.parent_map = gcc_esc_map,
+	.clkr.hw.init = &(struct clk_init_data) {
+		.name = "esc1_clk_src",
+		.parent_data = gcc_esc_vsync_data,
+		.num_parents = ARRAY_SIZE(gcc_esc_vsync_data),
+		.ops = &clk_rcg2_ops,
+	}
+};
+
+static const struct parent_map gcc_gfx3d_map[] = {
+	{ P_XO, 0 },
+	{ P_GPLL0, 1 },
+	{ P_GPLL3, 2 },
+	{ P_GPLL6, 3 },
+	{ P_GPLL4, 4 },
+	{ P_GPLL0_DIV2, 5 },
+	{ P_GPLL6_DIV2, 6 },
+};
+
+static const struct clk_parent_data gcc_gfx3d_data[] = {
+	{ .fw_name = "xo" },
+	{ .hw = &gpll0.clkr.hw },
+	{ .hw = &gpll3.clkr.hw },
+	{ .hw = &gpll6.clkr.hw },
+	{ .hw = &gpll4.clkr.hw },
+	{ .hw = &gpll0_early_div.hw },
+	{ .hw = &gpll6_early_div.hw },
+};
+
+static const struct freq_tbl ftbl_gfx3d_clk_src[] = {
+	F(19200000, P_XO, 1, 0, 0),
+	F(50000000, P_GPLL0_DIV2, 8, 0, 0),
+	F(80000000, P_GPLL0_DIV2, 5, 0, 0),
+	F(100000000, P_GPLL0_DIV2, 4, 0, 0),
+	F(133330000, P_GPLL0_DIV2, 3, 0, 0),
+	F(160000000, P_GPLL0_DIV2, 2.5, 0, 0),
+	F(200000000, P_GPLL0_DIV2, 2, 0, 0),
+	F(266670000, P_GPLL0, 3.0, 0, 0),
+	F(320000000, P_GPLL0, 2.5, 0, 0),
+	F(400000000, P_GPLL0, 2, 0, 0),
+	F(460800000, P_GPLL4, 2.5, 0, 0),
+	F(510000000, P_GPLL3, 2, 0, 0),
+	F(560000000, P_GPLL3, 2, 0, 0),
+	F(600000000, P_GPLL3, 2, 0, 0),
+	F(650000000, P_GPLL3, 2, 0, 0),
+	F(685000000, P_GPLL3, 2, 0, 0),
+	F(725000000, P_GPLL3, 2, 0, 0),
+	{ }
+};
+
+static struct clk_rcg2 gfx3d_clk_src = {
+	.cmd_rcgr = 0x59000,
+	.hid_width = 5,
+	.freq_tbl = ftbl_gfx3d_clk_src,
+	.parent_map = gcc_gfx3d_map,
+	.clkr.hw.init = &(struct clk_init_data) {
+		.name = "gfx3d_clk_src",
+		.parent_data = gcc_gfx3d_data,
+		.num_parents = ARRAY_SIZE(gcc_gfx3d_data),
+		.ops = &clk_rcg2_floor_ops,
+		.flags = CLK_SET_RATE_PARENT,
+	}
+};
+
+static const struct freq_tbl ftbl_gp_clk_src[] = {
+	F(19200000, P_XO, 1, 0, 0),
+	{ }
+};
+
+static struct clk_rcg2 gp1_clk_src = {
+	.cmd_rcgr = 0x08004,
+	.hid_width = 5,
+	.mnd_width = 8,
+	.freq_tbl = ftbl_gp_clk_src,
+	.parent_map = gcc_gp_map,
+	.clkr.hw.init = &(struct clk_init_data) {
+		.name = "gp1_clk_src",
+		.parent_data = gcc_gp_data,
+		.num_parents = ARRAY_SIZE(gcc_gp_data),
+		.ops = &clk_rcg2_ops,
+	}
+};
+
+static struct clk_rcg2 gp2_clk_src = {
+	.cmd_rcgr = 0x09004,
+	.hid_width = 5,
+	.mnd_width = 8,
+	.freq_tbl = ftbl_gp_clk_src,
+	.parent_map = gcc_gp_map,
+	.clkr.hw.init = &(struct clk_init_data) {
+		.name = "gp2_clk_src",
+		.parent_data = gcc_gp_data,
+		.num_parents = ARRAY_SIZE(gcc_gp_data),
+		.ops = &clk_rcg2_ops,
+	}
+};
+
+static struct clk_rcg2 gp3_clk_src = {
+	.cmd_rcgr = 0x0a004,
+	.hid_width = 5,
+	.mnd_width = 8,
+	.freq_tbl = ftbl_gp_clk_src,
+	.parent_map = gcc_gp_map,
+	.clkr.hw.init = &(struct clk_init_data) {
+		.name = "gp3_clk_src",
+		.parent_data = gcc_gp_data,
+		.num_parents = ARRAY_SIZE(gcc_gp_data),
+		.ops = &clk_rcg2_ops,
+	}
+};
+
+static const struct parent_map gcc_jpeg0_map[] = {
+	{ P_XO, 0 },
+	{ P_GPLL0, 1 },
+	{ P_GPLL6, 2 },
+	{ P_GPLL0_DIV2, 4 },
+	{ P_GPLL2, 5 },
+};
+
+static const struct clk_parent_data gcc_jpeg0_data[] = {
+	{ .fw_name = "xo" },
+	{ .hw = &gpll0.clkr.hw },
+	{ .hw = &gpll6.clkr.hw },
+	{ .hw = &gpll0_early_div.hw },
+	{ .hw = &gpll2.clkr.hw },
+};
+
+static const struct freq_tbl ftbl_jpeg0_clk_src[] = {
+	F(66670000, P_GPLL0_DIV2, 6, 0, 0),
+	F(133330000, P_GPLL0, 6, 0, 0),
+	F(200000000, P_GPLL0, 4, 0, 0),
+	F(266670000, P_GPLL0, 3, 0, 0),
+	F(310000000, P_GPLL2, 3, 0, 0),
+	F(320000000, P_GPLL0, 2.5, 0, 0),
+	{ }
+};
+
+static struct clk_rcg2 jpeg0_clk_src = {
+	.cmd_rcgr = 0x57000,
+	.hid_width = 5,
+	.freq_tbl = ftbl_jpeg0_clk_src,
+	.parent_map = gcc_jpeg0_map,
+	.clkr.hw.init = &(struct clk_init_data) {
+		.name = "jpeg0_clk_src",
+		.parent_data = gcc_jpeg0_data,
+		.num_parents = ARRAY_SIZE(gcc_jpeg0_data),
+		.ops = &clk_rcg2_ops,
+	}
+};
+
+static const struct parent_map gcc_mclk_map[] = {
+	{ P_XO, 0 },
+	{ P_GPLL0, 1 },
+	{ P_GPLL6, 2 },
+	{ P_GPLL0_DIV2, 4 },
+	{ P_GPLL6_DIV2, 5 },
+	{ P_SLEEP_CLK, 6 },
+};
+
+static const struct clk_parent_data gcc_mclk_data[] = {
+	{ .fw_name = "xo" },
+	{ .hw = &gpll0.clkr.hw },
+	{ .hw = &gpll6.clkr.hw },
+	{ .hw = &gpll0_early_div.hw },
+	{ .hw = &gpll6_early_div.hw },
+	{ .fw_name = "sleep", .name = "sleep" },
+};
+
+static const struct freq_tbl ftbl_mclk_clk_src[] = {
+	F(19200000, P_GPLL6, 5, 4, 45),
+	F(24000000, P_GPLL6_DIV2, 1, 2, 45),
+	F(26000000, P_GPLL0, 1, 4, 123),
+	F(33330000, P_GPLL0_DIV2, 12, 0, 0),
+	F(36610000, P_GPLL6, 1, 2, 59),
+	F(66667000, P_GPLL0, 12, 0, 0),
+	{ }
+};
+
+static struct clk_rcg2 mclk0_clk_src = {
+	.cmd_rcgr = 0x52000,
+	.hid_width = 5,
+	.mnd_width = 8,
+	.freq_tbl = ftbl_mclk_clk_src,
+	.parent_map = gcc_mclk_map,
+	.clkr.hw.init = &(struct clk_init_data) {
+		.name = "mclk0_clk_src",
+		.parent_data = gcc_mclk_data,
+		.num_parents = ARRAY_SIZE(gcc_mclk_data),
+		.ops = &clk_rcg2_ops,
+	}
+};
+
+static struct clk_rcg2 mclk1_clk_src = {
+	.cmd_rcgr = 0x53000,
+	.hid_width = 5,
+	.mnd_width = 8,
+	.freq_tbl = ftbl_mclk_clk_src,
+	.parent_map = gcc_mclk_map,
+	.clkr.hw.init = &(struct clk_init_data) {
+		.name = "mclk1_clk_src",
+		.parent_data = gcc_mclk_data,
+		.num_parents = ARRAY_SIZE(gcc_mclk_data),
+		.ops = &clk_rcg2_ops,
+	}
+};
+
+static struct clk_rcg2 mclk2_clk_src = {
+	.cmd_rcgr = 0x5c000,
+	.hid_width = 5,
+	.mnd_width = 8,
+	.freq_tbl = ftbl_mclk_clk_src,
+	.parent_map = gcc_mclk_map,
+	.clkr.hw.init = &(struct clk_init_data) {
+		.name = "mclk2_clk_src",
+		.parent_data = gcc_mclk_data,
+		.num_parents = ARRAY_SIZE(gcc_mclk_data),
+		.ops = &clk_rcg2_ops,
+	}
+};
+
+static struct clk_rcg2 mclk3_clk_src = {
+	.cmd_rcgr = 0x5e000,
+	.hid_width = 5,
+	.mnd_width = 8,
+	.freq_tbl = ftbl_mclk_clk_src,
+	.parent_map = gcc_mclk_map,
+	.clkr.hw.init = &(struct clk_init_data) {
+		.name = "mclk3_clk_src",
+		.parent_data = gcc_mclk_data,
+		.num_parents = ARRAY_SIZE(gcc_mclk_data),
+		.ops = &clk_rcg2_ops,
+	}
+};
+
+static const struct parent_map gcc_mdp_map[] = {
+	{ P_XO, 0 },
+	{ P_GPLL0, 1 },
+	{ P_GPLL6, 3 },
+	{ P_GPLL0_DIV2, 4 },
+};
+
+static const struct clk_parent_data gcc_mdp_data[] = {
+	{ .fw_name = "xo" },
+	{ .hw = &gpll0.clkr.hw },
+	{ .hw = &gpll6.clkr.hw },
+	{ .hw = &gpll0_early_div.hw },
+};
+
+static const struct freq_tbl ftbl_mdp_clk_src[] = {
+	F(50000000, P_GPLL0_DIV2, 8, 0, 0),
+	F(80000000, P_GPLL0_DIV2, 5, 0, 0),
+	F(160000000, P_GPLL0_DIV2, 2.5, 0, 0),
+	F(200000000, P_GPLL0, 4, 0, 0),
+	F(266670000, P_GPLL0, 3, 0, 0),
+	F(320000000, P_GPLL0, 2.5, 0, 0),
+	F(400000000, P_GPLL0, 2, 0, 0),
+	{ }
+};
+
+static struct clk_rcg2 mdp_clk_src = {
+	.cmd_rcgr = 0x4d014,
+	.hid_width = 5,
+	.freq_tbl = ftbl_mdp_clk_src,
+	.parent_map = gcc_mdp_map,
+	.clkr.hw.init = &(struct clk_init_data) {
+		.name = "mdp_clk_src",
+		.parent_data = gcc_mdp_data,
+		.num_parents = ARRAY_SIZE(gcc_mdp_data),
+		.ops = &clk_rcg2_ops,
+	}
+};
+
+static const struct parent_map gcc_pclk0_map[] = {
+	{ P_XO, 0 },
+	{ P_DSI0PLL, 1 },
+	{ P_DSI1PLL, 3 },
+};
+
+static const struct parent_map gcc_pclk1_map[] = {
+	{ P_XO, 0 },
+	{ P_DSI0PLL, 3 },
+	{ P_DSI1PLL, 1 },
+};
+
+static const struct clk_parent_data gcc_pclk_data[] = {
+	{ .fw_name = "xo" },
+	{ .fw_name = "dsi0pll", .name = "dsi0pll" },
+	{ .fw_name = "dsi1pll", .name = "dsi1pll" },
+};
+
+static struct clk_rcg2 pclk0_clk_src = {
+	.cmd_rcgr = 0x4d000,
+	.hid_width = 5,
+	.mnd_width = 8,
+	.parent_map = gcc_pclk0_map,
+	.clkr.hw.init = &(struct clk_init_data) {
+		.name = "pclk0_clk_src",
+		.parent_data = gcc_pclk_data,
+		.num_parents = ARRAY_SIZE(gcc_pclk_data),
+		.ops = &clk_pixel_ops,
+		.flags = CLK_SET_RATE_PARENT,
+	}
+};
+
+static struct clk_rcg2 pclk1_clk_src = {
+	.cmd_rcgr = 0x4d0b8,
+	.hid_width = 5,
+	.mnd_width = 8,
+	.parent_map = gcc_pclk1_map,
+	.clkr.hw.init = &(struct clk_init_data) {
+		.name = "pclk1_clk_src",
+		.parent_data = gcc_pclk_data,
+		.num_parents = ARRAY_SIZE(gcc_pclk_data),
+		.ops = &clk_pixel_ops,
+		.flags = CLK_SET_RATE_PARENT,
+	}
+};
+
+static const struct freq_tbl ftbl_pdm2_clk_src[] = {
+	F(32000000, P_GPLL0_DIV2, 12.5, 0, 0),
+	F(64000000, P_GPLL0, 12.5, 0, 0),
+	{ }
+};
+
+static struct clk_rcg2 pdm2_clk_src = {
+	.cmd_rcgr = 0x44010,
+	.hid_width = 5,
+	.freq_tbl = ftbl_pdm2_clk_src,
+	.parent_map = gcc_xo_gpll0_gpll0div2_2_map,
+	.clkr.hw.init = &(struct clk_init_data) {
+		.name = "pdm2_clk_src",
+		.parent_data = gcc_xo_gpll0_gpll0div2_data,
+		.num_parents = ARRAY_SIZE(gcc_xo_gpll0_gpll0div2_data),
+		.ops = &clk_rcg2_ops,
+	}
+};
+
+static const struct freq_tbl ftbl_rbcpr_gfx_clk_src[] = {
+	F(19200000, P_XO, 1, 0, 0),
+	F(50000000, P_GPLL0, 16, 0, 0),
+	{ }
+};
+
+static struct clk_rcg2 rbcpr_gfx_clk_src = {
+	.cmd_rcgr = 0x3a00c,
+	.hid_width = 5,
+	.freq_tbl = ftbl_rbcpr_gfx_clk_src,
+	.parent_map = gcc_xo_gpll0_gpll0div2_4_map,
+	.clkr.hw.init = &(struct clk_init_data) {
+		.name = "rbcpr_gfx_clk_src",
+		.parent_data = gcc_xo_gpll0_gpll0div2_data,
+		.num_parents = ARRAY_SIZE(gcc_xo_gpll0_gpll0div2_data),
+		.ops = &clk_rcg2_ops,
+	}
+};
+
+static const struct parent_map gcc_sdcc1_ice_core_map[] = {
+	{ P_XO, 0 },
+	{ P_GPLL0, 1 },
+	{ P_GPLL6, 2 },
+	{ P_GPLL0_DIV2, 4 },
+};
+
+static const struct clk_parent_data gcc_sdcc1_ice_core_data[] = {
+	{ .fw_name = "xo" },
+	{ .hw = &gpll0.clkr.hw },
+	{ .hw = &gpll6.clkr.hw },
+	{ .hw = &gpll0_early_div.hw },
+};
+
+static const struct freq_tbl ftbl_sdcc1_ice_core_clk_src[] = {
+	F(80000000, P_GPLL0_DIV2, 5, 0, 0),
+	F(160000000, P_GPLL0, 5, 0, 0),
+	F(270000000, P_GPLL6, 4, 0, 0),
+	{ }
+};
+
+static struct clk_rcg2 sdcc1_ice_core_clk_src = {
+	.cmd_rcgr = 0x5d000,
+	.hid_width = 5,
+	.freq_tbl = ftbl_sdcc1_ice_core_clk_src,
+	.parent_map = gcc_sdcc1_ice_core_map,
+	.clkr.hw.init = &(struct clk_init_data) {
+		.name = "sdcc1_ice_core_clk_src",
+		.parent_data = gcc_sdcc1_ice_core_data,
+		.num_parents = ARRAY_SIZE(gcc_sdcc1_ice_core_data),
+		.ops = &clk_rcg2_ops,
+	}
+};
+
+static const struct parent_map gcc_sdcc_apps_map[] = {
+	{ P_XO, 0 },
+	{ P_GPLL0, 1 },
+	{ P_GPLL4, 2 },
+	{ P_GPLL0_DIV2, 4 },
+};
+
+static const struct clk_parent_data gcc_sdcc_apss_data[] = {
+	{ .fw_name = "xo" },
+	{ .hw = &gpll0.clkr.hw },
+	{ .hw = &gpll4.clkr.hw },
+	{ .hw = &gpll0_early_div.hw },
+};
+
+static const struct freq_tbl ftbl_sdcc1_apps_clk_src[] = {
+	F(144000, P_XO, 16, 3, 25),
+	F(400000, P_XO, 12, 1, 4),
+	F(20000000, P_GPLL0_DIV2, 5, 1, 4),
+	F(25000000, P_GPLL0_DIV2, 16, 0, 0),
+	F(50000000, P_GPLL0, 16, 0, 0),
+	F(100000000, P_GPLL0, 8, 0, 0),
+	F(177770000, P_GPLL0, 4.5, 0, 0),
+	F(192000000, P_GPLL4, 6, 0, 0),
+	F(384000000, P_GPLL4, 3, 0, 0),
+	{ }
+};
+
+static struct clk_rcg2 sdcc1_apps_clk_src = {
+	.cmd_rcgr = 0x42004,
+	.hid_width = 5,
+	.mnd_width = 8,
+	.freq_tbl = ftbl_sdcc1_apps_clk_src,
+	.parent_map = gcc_sdcc_apps_map,
+	.clkr.hw.init = &(struct clk_init_data) {
+		.name = "sdcc1_apps_clk_src",
+		.parent_data = gcc_sdcc_apss_data,
+		.num_parents = ARRAY_SIZE(gcc_sdcc_apss_data),
+		.ops = &clk_rcg2_floor_ops,
+	}
+};
+
+static const struct freq_tbl ftbl_sdcc2_apps_clk_src[] = {
+	F(144000, P_XO, 16, 3, 25),
+	F(400000, P_XO, 12, 1, 4),
+	F(20000000, P_GPLL0_DIV2, 5, 1, 4),
+	F(25000000, P_GPLL0_DIV2, 16, 0, 0),
+	F(50000000, P_GPLL0, 16, 0, 0),
+	F(100000000, P_GPLL0, 8, 0, 0),
+	F(177770000, P_GPLL0, 4.5, 0, 0),
+	F(192000000, P_GPLL4, 6, 0, 0),
+	F(200000000, P_GPLL0, 4, 0, 0),
+	{ }
+};
+
+static struct clk_rcg2 sdcc2_apps_clk_src = {
+	.cmd_rcgr = 0x43004,
+	.hid_width = 5,
+	.mnd_width = 8,
+	.freq_tbl = ftbl_sdcc2_apps_clk_src,
+	.parent_map = gcc_sdcc_apps_map,
+	.clkr.hw.init = &(struct clk_init_data) {
+		.name = "sdcc2_apps_clk_src",
+		.parent_data = gcc_sdcc_apss_data,
+		.num_parents = ARRAY_SIZE(gcc_sdcc_apss_data),
+		.ops = &clk_rcg2_floor_ops,
+	}
+};
+
+static const struct freq_tbl ftbl_usb30_master_clk_src[] = {
+	F(80000000, P_GPLL0_DIV2, 5, 0, 0),
+	F(100000000, P_GPLL0, 8, 0, 0),
+	F(133330000, P_GPLL0, 6, 0, 0),
+	{ }
+};
+
+static struct clk_rcg2 usb30_master_clk_src = {
+	.cmd_rcgr = 0x3f00c,
+	.hid_width = 5,
+	.freq_tbl = ftbl_usb30_master_clk_src,
+	.parent_map = gcc_xo_gpll0_gpll0div2_2_map,
+	.clkr.hw.init = &(struct clk_init_data) {
+		.name = "usb30_master_clk_src",
+		.parent_data = gcc_xo_gpll0_gpll0div2_data,
+		.num_parents = ARRAY_SIZE(gcc_xo_gpll0_gpll0div2_data),
+		.ops = &clk_rcg2_ops,
+	}
+};
+
+static const struct parent_map gcc_usb30_mock_utmi_map[] = {
+	{ P_XO, 0 },
+	{ P_GPLL6, 1 },
+	{ P_GPLL6_DIV2, 2 },
+	{ P_GPLL0, 3 },
+	{ P_GPLL0_DIV2, 4 },
+};
+
+static const struct clk_parent_data gcc_usb30_mock_utmi_data[] = {
+	{ .fw_name = "xo" },
+	{ .hw = &gpll6.clkr.hw },
+	{ .hw = &gpll6_early_div.hw },
+	{ .hw = &gpll0.clkr.hw },
+	{ .hw = &gpll0_early_div.hw },
+};
+
+static const struct freq_tbl ftbl_usb30_mock_utmi_clk_src[] = {
+	F(19200000, P_XO, 1, 0, 0),
+	F(60000000, P_GPLL6_DIV2, 9, 1, 1),
+	{ }
+};
+
+static struct clk_rcg2 usb30_mock_utmi_clk_src = {
+	.cmd_rcgr = 0x3f020,
+	.hid_width = 5,
+	.mnd_width = 8,
+	.freq_tbl = ftbl_usb30_mock_utmi_clk_src,
+	.parent_map = gcc_usb30_mock_utmi_map,
+	.clkr.hw.init = &(struct clk_init_data) {
+		.name = "usb30_mock_utmi_clk_src",
+		.parent_data = gcc_usb30_mock_utmi_data,
+		.num_parents = ARRAY_SIZE(gcc_usb30_mock_utmi_data),
+		.ops = &clk_rcg2_ops,
+	}
+};
+
+static const struct parent_map gcc_usb3_aux_map[] = {
+	{ P_XO, 0 },
+	{ P_SLEEP_CLK, 6 },
+};
+
+static const struct clk_parent_data gcc_usb3_aux_data[] = {
+	{ .fw_name = "xo" },
+	{ .fw_name = "sleep", .name = "sleep" },
+};
+
+static const struct freq_tbl ftbl_usb3_aux_clk_src[] = {
+	F(19200000, P_XO, 1, 0, 0),
+	{ }
+};
+
+static struct clk_rcg2 usb3_aux_clk_src = {
+	.cmd_rcgr = 0x3f05c,
+	.hid_width = 5,
+	.mnd_width = 8,
+	.freq_tbl = ftbl_usb3_aux_clk_src,
+	.parent_map = gcc_usb3_aux_map,
+	.clkr.hw.init = &(struct clk_init_data) {
+		.name = "usb3_aux_clk_src",
+		.parent_data = gcc_usb3_aux_data,
+		.num_parents = ARRAY_SIZE(gcc_usb3_aux_data),
+		.ops = &clk_rcg2_ops,
+	}
+};
+
+static const struct parent_map gcc_vcodec0_map[] = {
+	{ P_XO, 0 },
+	{ P_GPLL0, 1 },
+	{ P_GPLL6, 2 },
+	{ P_GPLL2, 3 },
+	{ P_GPLL0_DIV2, 4 },
+};
+
+static const struct clk_parent_data gcc_vcodec0_data[] = {
+	{ .fw_name = "xo" },
+	{ .hw = &gpll0.clkr.hw },
+	{ .hw = &gpll6.clkr.hw },
+	{ .hw = &gpll2.clkr.hw },
+	{ .hw = &gpll0_early_div.hw },
+};
+
+static const struct freq_tbl ftbl_vcodec0_clk_src[] = {
+	F(114290000, P_GPLL0_DIV2, 3.5, 0, 0),
+	F(228570000, P_GPLL0, 3.5, 0, 0),
+	F(310000000, P_GPLL2, 3, 0, 0),
+	F(360000000, P_GPLL6, 3, 0, 0),
+	F(400000000, P_GPLL0, 2, 0, 0),
+	F(465000000, P_GPLL2, 2, 0, 0),
+	F(540000000, P_GPLL6, 2, 0, 0),
+	{ }
+};
+
+static struct clk_rcg2 vcodec0_clk_src = {
+	.cmd_rcgr = 0x4c000,
+	.hid_width = 5,
+	.freq_tbl = ftbl_vcodec0_clk_src,
+	.parent_map = gcc_vcodec0_map,
+	.clkr.hw.init = &(struct clk_init_data) {
+		.name = "vcodec0_clk_src",
+		.parent_data = gcc_vcodec0_data,
+		.num_parents = ARRAY_SIZE(gcc_vcodec0_data),
+		.ops = &clk_rcg2_ops,
+	}
+};
+
+static const struct parent_map gcc_vfe_map[] = {
+	{ P_XO, 0 },
+	{ P_GPLL0, 1 },
+	{ P_GPLL6, 2 },
+	{ P_GPLL4, 3 },
+	{ P_GPLL2, 4 },
+	{ P_GPLL0_DIV2, 5 },
+};
+
+static const struct clk_parent_data gcc_vfe_data[] = {
+	{ .fw_name = "xo" },
+	{ .hw = &gpll0.clkr.hw },
+	{ .hw = &gpll6.clkr.hw },
+	{ .hw = &gpll4.clkr.hw },
+	{ .hw = &gpll2.clkr.hw },
+	{ .hw = &gpll0_early_div.hw },
+};
+
+static const struct freq_tbl ftbl_vfe_clk_src[] = {
+	F(50000000, P_GPLL0_DIV2, 8, 0, 0),
+	F(100000000, P_GPLL0_DIV2, 4, 0, 0),
+	F(133330000, P_GPLL0, 6, 0, 0),
+	F(160000000, P_GPLL0, 5, 0, 0),
+	F(200000000, P_GPLL0, 4, 0, 0),
+	F(266670000, P_GPLL0, 3, 0, 0),
+	F(310000000, P_GPLL2, 3, 0, 0),
+	F(400000000, P_GPLL0, 2, 0, 0),
+	F(465000000, P_GPLL2, 2, 0, 0),
+	{ }
+};
+
+static struct clk_rcg2 vfe0_clk_src = {
+	.cmd_rcgr = 0x58000,
+	.hid_width = 5,
+	.freq_tbl = ftbl_vfe_clk_src,
+	.parent_map = gcc_vfe_map,
+	.clkr.hw.init = &(struct clk_init_data) {
+		.name = "vfe0_clk_src",
+		.parent_data = gcc_vfe_data,
+		.num_parents = ARRAY_SIZE(gcc_vfe_data),
+		.ops = &clk_rcg2_ops,
+	}
+};
+
+static struct clk_rcg2 vfe1_clk_src = {
+	.cmd_rcgr = 0x58054,
+	.hid_width = 5,
+	.freq_tbl = ftbl_vfe_clk_src,
+	.parent_map = gcc_vfe_map,
+	.clkr.hw.init = &(struct clk_init_data) {
+		.name = "vfe1_clk_src",
+		.parent_data = gcc_vfe_data,
+		.num_parents = ARRAY_SIZE(gcc_vfe_data),
+		.ops = &clk_rcg2_ops,
+	}
+};
+
+static const struct parent_map gcc_vsync_map[] = {
+	{ P_XO, 0 },
+	{ P_GPLL0, 2 },
+};
+
+static const struct freq_tbl ftbl_vsync_clk_src[] = {
+	F(19200000, P_XO, 1, 0, 0),
+	{ }
+};
+
+static struct clk_rcg2 vsync_clk_src = {
+	.cmd_rcgr = 0x4d02c,
+	.hid_width = 5,
+	.freq_tbl = ftbl_vsync_clk_src,
+	.parent_map = gcc_vsync_map,
+	.clkr.hw.init = &(struct clk_init_data) {
+		.name = "vsync_clk_src",
+		.parent_data = gcc_esc_vsync_data,
+		.num_parents = ARRAY_SIZE(gcc_esc_vsync_data),
+		.ops = &clk_rcg2_ops,
+	}
+};
+
+static struct clk_branch gcc_apc0_droop_detector_gpll0_clk = {
+	.halt_reg = 0x78004,
+	.halt_check = BRANCH_HALT,
+	.clkr = {
+		.enable_reg = 0x78004,
+		.enable_mask = BIT(0),
+		.hw.init = &(struct clk_init_data) {
+			.name = "gcc_apc0_droop_detector_gpll0_clk",
+			.parent_hws = (const struct clk_hw*[]){
+				&apc0_droop_detector_clk_src.clkr.hw,
+			},
+			.num_parents = 1,
+			.ops = &clk_branch2_ops,
+			.flags = CLK_SET_RATE_PARENT,
+		}
+	}
+};
+
+static struct clk_branch gcc_apc1_droop_detector_gpll0_clk = {
+	.halt_reg = 0x79004,
+	.halt_check = BRANCH_HALT,
+	.clkr = {
+		.enable_reg = 0x79004,
+		.enable_mask = BIT(0),
+		.hw.init = &(struct clk_init_data) {
+			.name = "gcc_apc1_droop_detector_gpll0_clk",
+			.parent_hws = (const struct clk_hw*[]){
+				&apc1_droop_detector_clk_src.clkr.hw,
+			},
+			.num_parents = 1,
+			.ops = &clk_branch2_ops,
+			.flags = CLK_SET_RATE_PARENT,
+		}
+	}
+};
+
+static struct clk_branch gcc_apss_ahb_clk = {
+	.halt_reg = 0x4601c,
+	.halt_check = BRANCH_HALT_VOTED,
+	.clkr = {
+		.enable_reg = 0x45004,
+		.enable_mask = BIT(14),
+		.hw.init = &(struct clk_init_data) {
+			.name = "gcc_apss_ahb_clk",
+			.parent_hws = (const struct clk_hw*[]){
+				&apss_ahb_clk_src.clkr.hw,
+			},
+			.num_parents = 1,
+			.ops = &clk_branch2_ops,
+			.flags = CLK_SET_RATE_PARENT,
+		}
+	}
+};
+
+static struct clk_branch gcc_apss_axi_clk = {
+	.halt_reg = 0x46020,
+	.halt_check = BRANCH_HALT_VOTED,
+	.clkr = {
+		.enable_reg = 0x45004,
+		.enable_mask = BIT(13),
+		.hw.init = &(struct clk_init_data) {
+			.name = "gcc_apss_axi_clk",
+			.ops = &clk_branch2_ops,
+		}
+	}
+};
+
+static struct clk_branch gcc_apss_tcu_async_clk = {
+	.halt_reg = 0x12018,
+	.halt_check = BRANCH_HALT_VOTED,
+	.clkr = {
+		.enable_reg = 0x4500c,
+		.enable_mask = BIT(1),
+		.hw.init = &(struct clk_init_data) {
+			.name = "gcc_apss_tcu_async_clk",
+			.ops = &clk_branch2_ops,
+		}
+	}
+};
+
+static struct clk_branch gcc_bimc_gfx_clk = {
+	.halt_reg = 0x59034,
+	.halt_check = BRANCH_HALT,
+	.clkr = {
+		.enable_reg = 0x59034,
+		.enable_mask = BIT(0),
+		.hw.init = &(struct clk_init_data) {
+			.name = "gcc_bimc_gfx_clk",
+			.ops = &clk_branch2_ops,
+		}
+	}
+};
+
+static struct clk_branch gcc_bimc_gpu_clk = {
+	.halt_reg = 0x59030,
+	.halt_check = BRANCH_HALT,
+	.clkr = {
+		.enable_reg = 0x59030,
+		.enable_mask = BIT(0),
+		.hw.init = &(struct clk_init_data) {
+			.name = "gcc_bimc_gpu_clk",
+			.ops = &clk_branch2_ops,
+		}
+	}
+};
+
+static struct clk_branch gcc_blsp1_ahb_clk = {
+	.halt_reg = 0x01008,
+	.halt_check = BRANCH_HALT_VOTED,
+	.clkr = {
+		.enable_reg = 0x45004,
+		.enable_mask = BIT(10),
+		.hw.init = &(struct clk_init_data) {
+			.name = "gcc_blsp1_ahb_clk",
+			.ops = &clk_branch2_ops,
+		}
+	}
+};
+
+static struct clk_branch gcc_blsp2_ahb_clk = {
+	.halt_reg = 0x0b008,
+	.halt_check = BRANCH_HALT_VOTED,
+	.clkr = {
+		.enable_reg = 0x45004,
+		.enable_mask = BIT(20),
+		.hw.init = &(struct clk_init_data) {
+			.name = "gcc_blsp2_ahb_clk",
+			.ops = &clk_branch2_ops,
+		}
+	}
+};
+
+static struct clk_branch gcc_blsp1_qup1_i2c_apps_clk = {
+	.halt_reg = 0x02008,
+	.halt_check = BRANCH_HALT,
+	.clkr = {
+		.enable_reg = 0x02008,
+		.enable_mask = BIT(0),
+		.hw.init = &(struct clk_init_data) {
+			.name = "gcc_blsp1_qup1_i2c_apps_clk",
+			.parent_hws = (const struct clk_hw*[]){
+				&blsp1_qup1_i2c_apps_clk_src.clkr.hw,
+			},
+			.num_parents = 1,
+			.ops = &clk_branch2_ops,
+			.flags = CLK_SET_RATE_PARENT,
+		}
+	}
+};
+
+static struct clk_branch gcc_blsp1_qup2_i2c_apps_clk = {
+	.halt_reg = 0x03010,
+	.halt_check = BRANCH_HALT,
+	.clkr = {
+		.enable_reg = 0x03010,
+		.enable_mask = BIT(0),
+		.hw.init = &(struct clk_init_data) {
+			.name = "gcc_blsp1_qup2_i2c_apps_clk",
+			.parent_hws = (const struct clk_hw*[]){
+				&blsp1_qup2_i2c_apps_clk_src.clkr.hw,
+			},
+			.num_parents = 1,
+			.ops = &clk_branch2_ops,
+			.flags = CLK_SET_RATE_PARENT,
+		}
+	}
+};
+
+static struct clk_branch gcc_blsp1_qup3_i2c_apps_clk = {
+	.halt_reg = 0x04020,
+	.halt_check = BRANCH_HALT,
+	.clkr = {
+		.enable_reg = 0x04020,
+		.enable_mask = BIT(0),
+		.hw.init = &(struct clk_init_data) {
+			.name = "gcc_blsp1_qup3_i2c_apps_clk",
+			.parent_hws = (const struct clk_hw*[]){
+				&blsp1_qup3_i2c_apps_clk_src.clkr.hw,
+			},
+			.num_parents = 1,
+			.ops = &clk_branch2_ops,
+			.flags = CLK_SET_RATE_PARENT,
+		}
+	}
+};
+
+static struct clk_branch gcc_blsp1_qup4_i2c_apps_clk = {
+	.halt_reg = 0x05020,
+	.halt_check = BRANCH_HALT,
+	.clkr = {
+		.enable_reg = 0x05020,
+		.enable_mask = BIT(0),
+		.hw.init = &(struct clk_init_data) {
+			.name = "gcc_blsp1_qup4_i2c_apps_clk",
+			.parent_hws = (const struct clk_hw*[]){
+				&blsp1_qup4_i2c_apps_clk_src.clkr.hw,
+			},
+			.num_parents = 1,
+			.ops = &clk_branch2_ops,
+			.flags = CLK_SET_RATE_PARENT,
+		}
+	}
+};
+
+static struct clk_branch gcc_blsp2_qup1_i2c_apps_clk = {
+	.halt_reg = 0x0c008,
+	.halt_check = BRANCH_HALT,
+	.clkr = {
+		.enable_reg = 0x0c008,
+		.enable_mask = BIT(0),
+		.hw.init = &(struct clk_init_data) {
+			.name = "gcc_blsp2_qup1_i2c_apps_clk",
+			.parent_hws = (const struct clk_hw*[]){
+				&blsp2_qup1_i2c_apps_clk_src.clkr.hw,
+			},
+			.num_parents = 1,
+			.ops = &clk_branch2_ops,
+			.flags = CLK_SET_RATE_PARENT,
+		}
+	}
+};
+
+static struct clk_branch gcc_blsp2_qup2_i2c_apps_clk = {
+	.halt_reg = 0x0d010,
+	.halt_check = BRANCH_HALT,
+	.clkr = {
+		.enable_reg = 0x0d010,
+		.enable_mask = BIT(0),
+		.hw.init = &(struct clk_init_data) {
+			.name = "gcc_blsp2_qup2_i2c_apps_clk",
+			.parent_hws = (const struct clk_hw*[]){
+				&blsp2_qup2_i2c_apps_clk_src.clkr.hw,
+			},
+			.num_parents = 1,
+			.ops = &clk_branch2_ops,
+			.flags = CLK_SET_RATE_PARENT,
+		}
+	}
+};
+
+static struct clk_branch gcc_blsp2_qup3_i2c_apps_clk = {
+	.halt_reg = 0x0f020,
+	.halt_check = BRANCH_HALT,
+	.clkr = {
+		.enable_reg = 0x0f020,
+		.enable_mask = BIT(0),
+		.hw.init = &(struct clk_init_data) {
+			.name = "gcc_blsp2_qup3_i2c_apps_clk",
+			.parent_hws = (const struct clk_hw*[]){
+				&blsp2_qup3_i2c_apps_clk_src.clkr.hw,
+			},
+			.num_parents = 1,
+			.ops = &clk_branch2_ops,
+			.flags = CLK_SET_RATE_PARENT,
+		}
+	}
+};
+
+static struct clk_branch gcc_blsp2_qup4_i2c_apps_clk = {
+	.halt_reg = 0x18020,
+	.halt_check = BRANCH_HALT,
+	.clkr = {
+		.enable_reg = 0x18020,
+		.enable_mask = BIT(0),
+		.hw.init = &(struct clk_init_data) {
+			.name = "gcc_blsp2_qup4_i2c_apps_clk",
+			.parent_hws = (const struct clk_hw*[]){
+				&blsp2_qup4_i2c_apps_clk_src.clkr.hw,
+			},
+			.num_parents = 1,
+			.ops = &clk_branch2_ops,
+			.flags = CLK_SET_RATE_PARENT,
+		}
+	}
+};
+
+static struct clk_branch gcc_blsp1_qup1_spi_apps_clk = {
+	.halt_reg = 0x02004,
+	.halt_check = BRANCH_HALT,
+	.clkr = {
+		.enable_reg = 0x02004,
+		.enable_mask = BIT(0),
+		.hw.init = &(struct clk_init_data) {
+			.name = "gcc_blsp1_qup1_spi_apps_clk",
+			.parent_hws = (const struct clk_hw*[]){
+				&blsp1_qup1_spi_apps_clk_src.clkr.hw,
+			},
+			.num_parents = 1,
+			.ops = &clk_branch2_ops,
+			.flags = CLK_SET_RATE_PARENT,
+		}
+	}
+};
+
+static struct clk_branch gcc_blsp1_qup2_spi_apps_clk = {
+	.halt_reg = 0x0300c,
+	.halt_check = BRANCH_HALT,
+	.clkr = {
+		.enable_reg = 0x0300c,
+		.enable_mask = BIT(0),
+		.hw.init = &(struct clk_init_data) {
+			.name = "gcc_blsp1_qup2_spi_apps_clk",
+			.parent_hws = (const struct clk_hw*[]){
+				&blsp1_qup2_spi_apps_clk_src.clkr.hw,
+			},
+			.num_parents = 1,
+			.ops = &clk_branch2_ops,
+			.flags = CLK_SET_RATE_PARENT,
+		}
+	}
+};
+
+static struct clk_branch gcc_blsp1_qup3_spi_apps_clk = {
+	.halt_reg = 0x0401c,
+	.halt_check = BRANCH_HALT,
+	.clkr = {
+		.enable_reg = 0x0401c,
+		.enable_mask = BIT(0),
+		.hw.init = &(struct clk_init_data) {
+			.name = "gcc_blsp1_qup3_spi_apps_clk",
+			.parent_hws = (const struct clk_hw*[]){
+				&blsp1_qup3_spi_apps_clk_src.clkr.hw,
+			},
+			.num_parents = 1,
+			.ops = &clk_branch2_ops,
+			.flags = CLK_SET_RATE_PARENT,
+		}
+	}
+};
+
+static struct clk_branch gcc_blsp1_qup4_spi_apps_clk = {
+	.halt_reg = 0x0501c,
+	.halt_check = BRANCH_HALT,
+	.clkr = {
+		.enable_reg = 0x0501c,
+		.enable_mask = BIT(0),
+		.hw.init = &(struct clk_init_data) {
+			.name = "gcc_blsp1_qup4_spi_apps_clk",
+			.parent_hws = (const struct clk_hw*[]){
+				&blsp1_qup4_spi_apps_clk_src.clkr.hw,
+			},
+			.num_parents = 1,
+			.ops = &clk_branch2_ops,
+			.flags = CLK_SET_RATE_PARENT,
+		}
+	}
+};
+
+static struct clk_branch gcc_blsp2_qup1_spi_apps_clk = {
+	.halt_reg = 0x0c004,
+	.halt_check = BRANCH_HALT,
+	.clkr = {
+		.enable_reg = 0x0c004,
+		.enable_mask = BIT(0),
+		.hw.init = &(struct clk_init_data) {
+			.name = "gcc_blsp2_qup1_spi_apps_clk",
+			.parent_hws = (const struct clk_hw*[]){
+				&blsp2_qup1_spi_apps_clk_src.clkr.hw,
+			},
+			.num_parents = 1,
+			.ops = &clk_branch2_ops,
+			.flags = CLK_SET_RATE_PARENT,
+		}
+	}
+};
+
+static struct clk_branch gcc_blsp2_qup2_spi_apps_clk = {
+	.halt_reg = 0x0d00c,
+	.halt_check = BRANCH_HALT,
+	.clkr = {
+		.enable_reg = 0x0d00c,
+		.enable_mask = BIT(0),
+		.hw.init = &(struct clk_init_data) {
+			.name = "gcc_blsp2_qup2_spi_apps_clk",
+			.parent_hws = (const struct clk_hw*[]){
+				&blsp2_qup2_spi_apps_clk_src.clkr.hw,
+			},
+			.num_parents = 1,
+			.ops = &clk_branch2_ops,
+			.flags = CLK_SET_RATE_PARENT,
+		}
+	}
+};
+
+static struct clk_branch gcc_blsp2_qup3_spi_apps_clk = {
+	.halt_reg = 0x0f01c,
+	.halt_check = BRANCH_HALT,
+	.clkr = {
+		.enable_reg = 0x0f01c,
+		.enable_mask = BIT(0),
+		.hw.init = &(struct clk_init_data) {
+			.name = "gcc_blsp2_qup3_spi_apps_clk",
+			.parent_hws = (const struct clk_hw*[]){
+				&blsp2_qup3_spi_apps_clk_src.clkr.hw,
+			},
+			.num_parents = 1,
+			.ops = &clk_branch2_ops,
+			.flags = CLK_SET_RATE_PARENT,
+		}
+	}
+};
+
+static struct clk_branch gcc_blsp2_qup4_spi_apps_clk = {
+	.halt_reg = 0x1801c,
+	.halt_check = BRANCH_HALT,
+	.clkr = {
+		.enable_reg = 0x1801c,
+		.enable_mask = BIT(0),
+		.hw.init = &(struct clk_init_data) {
+			.name = "gcc_blsp2_qup4_spi_apps_clk",
+			.parent_hws = (const struct clk_hw*[]){
+				&blsp2_qup4_spi_apps_clk_src.clkr.hw,
+			},
+			.num_parents = 1,
+			.ops = &clk_branch2_ops,
+			.flags = CLK_SET_RATE_PARENT,
+		}
+	}
+};
+
+static struct clk_branch gcc_blsp1_uart1_apps_clk = {
+	.halt_reg = 0x0203c,
+	.halt_check = BRANCH_HALT,
+	.clkr = {
+		.enable_reg = 0x0203c,
+		.enable_mask = BIT(0),
+		.hw.init = &(struct clk_init_data) {
+			.name = "gcc_blsp1_uart1_apps_clk",
+			.parent_hws = (const struct clk_hw*[]){
+				&blsp1_uart1_apps_clk_src.clkr.hw,
+			},
+			.num_parents = 1,
+			.ops = &clk_branch2_ops,
+			.flags = CLK_SET_RATE_PARENT,
+		}
+	}
+};
+
+static struct clk_branch gcc_blsp1_uart2_apps_clk = {
+	.halt_reg = 0x0302c,
+	.halt_check = BRANCH_HALT,
+	.clkr = {
+		.enable_reg = 0x0302c,
+		.enable_mask = BIT(0),
+		.hw.init = &(struct clk_init_data) {
+			.name = "gcc_blsp1_uart2_apps_clk",
+			.parent_hws = (const struct clk_hw*[]){
+				&blsp1_uart2_apps_clk_src.clkr.hw,
+			},
+			.num_parents = 1,
+			.ops = &clk_branch2_ops,
+			.flags = CLK_SET_RATE_PARENT,
+		}
+	}
+};
+
+static struct clk_branch gcc_blsp2_uart1_apps_clk = {
+	.halt_reg = 0x0c03c,
+	.halt_check = BRANCH_HALT,
+	.clkr = {
+		.enable_reg = 0x0c03c,
+		.enable_mask = BIT(0),
+		.hw.init = &(struct clk_init_data) {
+			.name = "gcc_blsp2_uart1_apps_clk",
+			.parent_hws = (const struct clk_hw*[]){
+				&blsp2_uart1_apps_clk_src.clkr.hw,
+			},
+			.num_parents = 1,
+			.ops = &clk_branch2_ops,
+			.flags = CLK_SET_RATE_PARENT,
+		}
+	}
+};
+
+static struct clk_branch gcc_blsp2_uart2_apps_clk = {
+	.halt_reg = 0x0d02c,
+	.halt_check = BRANCH_HALT,
+	.clkr = {
+		.enable_reg = 0x0d02c,
+		.enable_mask = BIT(0),
+		.hw.init = &(struct clk_init_data) {
+			.name = "gcc_blsp2_uart2_apps_clk",
+			.parent_hws = (const struct clk_hw*[]){
+				&blsp2_uart2_apps_clk_src.clkr.hw,
+			},
+			.num_parents = 1,
+			.ops = &clk_branch2_ops,
+			.flags = CLK_SET_RATE_PARENT,
+		}
+	}
+};
+
+static struct clk_branch gcc_boot_rom_ahb_clk = {
+	.halt_reg = 0x1300c,
+	.halt_check = BRANCH_HALT_VOTED,
+	.clkr = {
+		.enable_reg = 0x45004,
+		.enable_mask = BIT(7),
+		.hw.init = &(struct clk_init_data) {
+			.name = "gcc_boot_rom_ahb_clk",
+			.ops = &clk_branch2_ops,
+		}
+	}
+};
+
+static struct clk_branch gcc_camss_ahb_clk = {
+	.halt_reg = 0x56004,
+	.halt_check = BRANCH_HALT,
+	.clkr = {
+		.enable_reg = 0x56004,
+		.enable_mask = BIT(0),
+		.hw.init = &(struct clk_init_data) {
+			.name = "gcc_camss_ahb_clk",
+			.ops = &clk_branch2_ops,
+		}
+	}
+};
+
+static struct clk_branch gcc_camss_cci_ahb_clk = {
+	.halt_reg = 0x5101c,
+	.halt_check = BRANCH_HALT,
+	.clkr = {
+		.enable_reg = 0x5101c,
+		.enable_mask = BIT(0),
+		.hw.init = &(struct clk_init_data) {
+			.name = "gcc_camss_cci_ahb_clk",
+			.parent_hws = (const struct clk_hw*[]){
+				&camss_top_ahb_clk_src.clkr.hw,
+			},
+			.num_parents = 1,
+			.ops = &clk_branch2_ops,
+			.flags = CLK_SET_RATE_PARENT,
+		}
+	}
+};
+
+static struct clk_branch gcc_camss_cci_clk = {
+	.halt_reg = 0x51018,
+	.halt_check = BRANCH_HALT,
+	.clkr = {
+		.enable_reg = 0x51018,
+		.enable_mask = BIT(0),
+		.hw.init = &(struct clk_init_data) {
+			.name = "gcc_camss_cci_clk",
+			.parent_hws = (const struct clk_hw*[]){
+				&cci_clk_src.clkr.hw,
+			},
+			.num_parents = 1,
+			.ops = &clk_branch2_ops,
+			.flags = CLK_SET_RATE_PARENT,
+		}
+	}
+};
+
+static struct clk_branch gcc_camss_cpp_ahb_clk = {
+	.halt_reg = 0x58040,
+	.halt_check = BRANCH_HALT,
+	.clkr = {
+		.enable_reg = 0x58040,
+		.enable_mask = BIT(0),
+		.hw.init = &(struct clk_init_data) {
+			.name = "gcc_camss_cpp_ahb_clk",
+			.parent_hws = (const struct clk_hw*[]){
+				&camss_top_ahb_clk_src.clkr.hw,
+			},
+			.num_parents = 1,
+			.ops = &clk_branch2_ops,
+			.flags = CLK_SET_RATE_PARENT,
+		}
+	}
+};
+
+static struct clk_branch gcc_camss_cpp_axi_clk = {
+	.halt_reg = 0x58064,
+	.halt_check = BRANCH_HALT,
+	.clkr = {
+		.enable_reg = 0x58064,
+		.enable_mask = BIT(0),
+		.hw.init = &(struct clk_init_data) {
+			.name = "gcc_camss_cpp_axi_clk",
+			.ops = &clk_branch2_ops,
+		}
+	}
+};
+
+static struct clk_branch gcc_camss_cpp_clk = {
+	.halt_reg = 0x5803c,
+	.halt_check = BRANCH_HALT,
+	.clkr = {
+		.enable_reg = 0x5803c,
+		.enable_mask = BIT(0),
+		.hw.init = &(struct clk_init_data) {
+			.name = "gcc_camss_cpp_clk",
+			.parent_hws = (const struct clk_hw*[]){
+				&cpp_clk_src.clkr.hw,
+			},
+			.num_parents = 1,
+			.ops = &clk_branch2_ops,
+			.flags = CLK_SET_RATE_PARENT,
+		}
+	}
+};
+
+static struct clk_branch gcc_camss_csi0_ahb_clk = {
+	.halt_reg = 0x4e040,
+	.halt_check = BRANCH_HALT,
+	.clkr = {
+		.enable_reg = 0x4e040,
+		.enable_mask = BIT(0),
+		.hw.init = &(struct clk_init_data) {
+			.name = "gcc_camss_csi0_ahb_clk",
+			.parent_hws = (const struct clk_hw*[]){
+				&camss_top_ahb_clk_src.clkr.hw,
+			},
+			.num_parents = 1,
+			.ops = &clk_branch2_ops,
+			.flags = CLK_SET_RATE_PARENT,
+		}
+	}
+};
+
+static struct clk_branch gcc_camss_csi1_ahb_clk = {
+	.halt_reg = 0x4f040,
+	.halt_check = BRANCH_HALT,
+	.clkr = {
+		.enable_reg = 0x4f040,
+		.enable_mask = BIT(0),
+		.hw.init = &(struct clk_init_data) {
+			.name = "gcc_camss_csi1_ahb_clk",
+			.parent_hws = (const struct clk_hw*[]){
+				&camss_top_ahb_clk_src.clkr.hw,
+			},
+			.num_parents = 1,
+			.ops = &clk_branch2_ops,
+			.flags = CLK_SET_RATE_PARENT,
+		}
+	}
+};
+
+static struct clk_branch gcc_camss_csi2_ahb_clk = {
+	.halt_reg = 0x3c040,
+	.halt_check = BRANCH_HALT,
+	.clkr = {
+		.enable_reg = 0x3c040,
+		.enable_mask = BIT(0),
+		.hw.init = &(struct clk_init_data) {
+			.name = "gcc_camss_csi2_ahb_clk",
+			.parent_hws = (const struct clk_hw*[]){
+				&camss_top_ahb_clk_src.clkr.hw,
+			},
+			.num_parents = 1,
+			.ops = &clk_branch2_ops,
+			.flags = CLK_SET_RATE_PARENT,
+		}
+	}
+};
+
+static struct clk_branch gcc_camss_csi0_clk = {
+	.halt_reg = 0x4e03c,
+	.halt_check = BRANCH_HALT,
+	.clkr = {
+		.enable_reg = 0x4e03c,
+		.enable_mask = BIT(0),
+		.hw.init = &(struct clk_init_data) {
+			.name = "gcc_camss_csi0_clk",
+			.parent_hws = (const struct clk_hw*[]){
+				&csi0_clk_src.clkr.hw,
+			},
+			.num_parents = 1,
+			.ops = &clk_branch2_ops,
+			.flags = CLK_SET_RATE_PARENT,
+		}
+	}
+};
+
+static struct clk_branch gcc_camss_csi1_clk = {
+	.halt_reg = 0x4f03c,
+	.halt_check = BRANCH_HALT,
+	.clkr = {
+		.enable_reg = 0x4f03c,
+		.enable_mask = BIT(0),
+		.hw.init = &(struct clk_init_data) {
+			.name = "gcc_camss_csi1_clk",
+			.parent_hws = (const struct clk_hw*[]){
+				&csi1_clk_src.clkr.hw,
+			},
+			.num_parents = 1,
+			.ops = &clk_branch2_ops,
+			.flags = CLK_SET_RATE_PARENT,
+		}
+	}
+};
+
+static struct clk_branch gcc_camss_csi2_clk = {
+	.halt_reg = 0x3c03c,
+	.halt_check = BRANCH_HALT,
+	.clkr = {
+		.enable_reg = 0x3c03c,
+		.enable_mask = BIT(0),
+		.hw.init = &(struct clk_init_data) {
+			.name = "gcc_camss_csi2_clk",
+			.parent_hws = (const struct clk_hw*[]){
+				&csi2_clk_src.clkr.hw,
+			},
+			.num_parents = 1,
+			.ops = &clk_branch2_ops,
+			.flags = CLK_SET_RATE_PARENT,
+		}
+	}
+};
+
+static struct clk_branch gcc_camss_csi0_csiphy_3p_clk = {
+	.halt_reg = 0x58090,
+	.halt_check = BRANCH_HALT,
+	.clkr = {
+		.enable_reg = 0x58090,
+		.enable_mask = BIT(0),
+		.hw.init = &(struct clk_init_data) {
+			.name = "gcc_camss_csi0_csiphy_3p_clk",
+			.parent_hws = (const struct clk_hw*[]){
+				&csi0p_clk_src.clkr.hw,
+			},
+			.num_parents = 1,
+			.ops = &clk_branch2_ops,
+			.flags = CLK_SET_RATE_PARENT,
+		}
+	}
+};
+
+static struct clk_branch gcc_camss_csi1_csiphy_3p_clk = {
+	.halt_reg = 0x580a0,
+	.halt_check = BRANCH_HALT,
+	.clkr = {
+		.enable_reg = 0x580a0,
+		.enable_mask = BIT(0),
+		.hw.init = &(struct clk_init_data) {
+			.name = "gcc_camss_csi1_csiphy_3p_clk",
+			.parent_hws = (const struct clk_hw*[]){
+				&csi1p_clk_src.clkr.hw,
+			},
+			.num_parents = 1,
+			.ops = &clk_branch2_ops,
+			.flags = CLK_SET_RATE_PARENT,
+		}
+	}
+};
+
+static struct clk_branch gcc_camss_csi2_csiphy_3p_clk = {
+	.halt_reg = 0x580b0,
+	.halt_check = BRANCH_HALT,
+	.clkr = {
+		.enable_reg = 0x580b0,
+		.enable_mask = BIT(0),
+		.hw.init = &(struct clk_init_data) {
+			.name = "gcc_camss_csi2_csiphy_3p_clk",
+			.parent_hws = (const struct clk_hw*[]){
+				&csi2p_clk_src.clkr.hw,
+			},
+			.num_parents = 1,
+			.ops = &clk_branch2_ops,
+			.flags = CLK_SET_RATE_PARENT,
+		}
+	}
+};
+
+static struct clk_branch gcc_camss_csi0phy_clk = {
+	.halt_reg = 0x4e048,
+	.halt_check = BRANCH_HALT,
+	.clkr = {
+		.enable_reg = 0x4e048,
+		.enable_mask = BIT(0),
+		.hw.init = &(struct clk_init_data) {
+			.name = "gcc_camss_csi0phy_clk",
+			.parent_hws = (const struct clk_hw*[]){
+				&csi0_clk_src.clkr.hw,
+			},
+			.num_parents = 1,
+			.ops = &clk_branch2_ops,
+			.flags = CLK_SET_RATE_PARENT,
+		}
+	}
+};
+
+static struct clk_branch gcc_camss_csi1phy_clk = {
+	.halt_reg = 0x4f048,
+	.halt_check = BRANCH_HALT,
+	.clkr = {
+		.enable_reg = 0x4f048,
+		.enable_mask = BIT(0),
+		.hw.init = &(struct clk_init_data) {
+			.name = "gcc_camss_csi1phy_clk",
+			.parent_hws = (const struct clk_hw*[]){
+				&csi1_clk_src.clkr.hw,
+			},
+			.num_parents = 1,
+			.ops = &clk_branch2_ops,
+			.flags = CLK_SET_RATE_PARENT,
+		}
+	}
+};
+
+static struct clk_branch gcc_camss_csi2phy_clk = {
+	.halt_reg = 0x3c048,
+	.halt_check = BRANCH_HALT,
+	.clkr = {
+		.enable_reg = 0x3c048,
+		.enable_mask = BIT(0),
+		.hw.init = &(struct clk_init_data) {
+			.name = "gcc_camss_csi2phy_clk",
+			.parent_hws = (const struct clk_hw*[]){
+				&csi2_clk_src.clkr.hw,
+			},
+			.num_parents = 1,
+			.ops = &clk_branch2_ops,
+			.flags = CLK_SET_RATE_PARENT,
+		}
+	}
+};
+
+static struct clk_branch gcc_camss_csi0phytimer_clk = {
+	.halt_reg = 0x4e01c,
+	.halt_check = BRANCH_HALT,
+	.clkr = {
+		.enable_reg = 0x4e01c,
+		.enable_mask = BIT(0),
+		.hw.init = &(struct clk_init_data) {
+			.name = "gcc_camss_csi0phytimer_clk",
+			.parent_hws = (const struct clk_hw*[]){
+				&csi0phytimer_clk_src.clkr.hw,
+			},
+			.num_parents = 1,
+			.ops = &clk_branch2_ops,
+			.flags = CLK_SET_RATE_PARENT,
+		}
+	}
+};
+
+static struct clk_branch gcc_camss_csi1phytimer_clk = {
+	.halt_reg = 0x4f01c,
+	.halt_check = BRANCH_HALT,
+	.clkr = {
+		.enable_reg = 0x4f01c,
+		.enable_mask = BIT(0),
+		.hw.init = &(struct clk_init_data) {
+			.name = "gcc_camss_csi1phytimer_clk",
+			.parent_hws = (const struct clk_hw*[]){
+				&csi1phytimer_clk_src.clkr.hw,
+			},
+			.num_parents = 1,
+			.ops = &clk_branch2_ops,
+			.flags = CLK_SET_RATE_PARENT,
+		}
+	}
+};
+
+static struct clk_branch gcc_camss_csi2phytimer_clk = {
+	.halt_reg = 0x4f068,
+	.halt_check = BRANCH_HALT,
+	.clkr = {
+		.enable_reg = 0x4f068,
+		.enable_mask = BIT(0),
+		.hw.init = &(struct clk_init_data) {
+			.name = "gcc_camss_csi2phytimer_clk",
+			.parent_hws = (const struct clk_hw*[]){
+				&csi2phytimer_clk_src.clkr.hw,
+			},
+			.num_parents = 1,
+			.ops = &clk_branch2_ops,
+			.flags = CLK_SET_RATE_PARENT,
+		}
+	}
+};
+
+static struct clk_branch gcc_camss_csi0pix_clk = {
+	.halt_reg = 0x4e058,
+	.halt_check = BRANCH_HALT,
+	.clkr = {
+		.enable_reg = 0x4e058,
+		.enable_mask = BIT(0),
+		.hw.init = &(struct clk_init_data) {
+			.name = "gcc_camss_csi0pix_clk",
+			.parent_hws = (const struct clk_hw*[]){
+				&csi0_clk_src.clkr.hw,
+			},
+			.num_parents = 1,
+			.ops = &clk_branch2_ops,
+			.flags = CLK_SET_RATE_PARENT,
+		}
+	}
+};
+
+static struct clk_branch gcc_camss_csi1pix_clk = {
+	.halt_reg = 0x4f058,
+	.halt_check = BRANCH_HALT,
+	.clkr = {
+		.enable_reg = 0x4f058,
+		.enable_mask = BIT(0),
+		.hw.init = &(struct clk_init_data) {
+			.name = "gcc_camss_csi1pix_clk",
+			.parent_hws = (const struct clk_hw*[]){
+				&csi1_clk_src.clkr.hw,
+			},
+			.num_parents = 1,
+			.ops = &clk_branch2_ops,
+			.flags = CLK_SET_RATE_PARENT,
+		}
+	}
+};
+
+static struct clk_branch gcc_camss_csi2pix_clk = {
+	.halt_reg = 0x3c058,
+	.halt_check = BRANCH_HALT,
+	.clkr = {
+		.enable_reg = 0x3c058,
+		.enable_mask = BIT(0),
+		.hw.init = &(struct clk_init_data) {
+			.name = "gcc_camss_csi2pix_clk",
+			.parent_hws = (const struct clk_hw*[]){
+				&csi2_clk_src.clkr.hw,
+			},
+			.num_parents = 1,
+			.ops = &clk_branch2_ops,
+			.flags = CLK_SET_RATE_PARENT,
+		}
+	}
+};
+
+static struct clk_branch gcc_camss_csi0rdi_clk = {
+	.halt_reg = 0x4e050,
+	.halt_check = BRANCH_HALT,
+	.clkr = {
+		.enable_reg = 0x4e050,
+		.enable_mask = BIT(0),
+		.hw.init = &(struct clk_init_data) {
+			.name = "gcc_camss_csi0rdi_clk",
+			.parent_hws = (const struct clk_hw*[]){
+				&csi0_clk_src.clkr.hw,
+			},
+			.num_parents = 1,
+			.ops = &clk_branch2_ops,
+			.flags = CLK_SET_RATE_PARENT,
+		}
+	}
+};
+
+static struct clk_branch gcc_camss_csi1rdi_clk = {
+	.halt_reg = 0x4f050,
+	.halt_check = BRANCH_HALT,
+	.clkr = {
+		.enable_reg = 0x4f050,
+		.enable_mask = BIT(0),
+		.hw.init = &(struct clk_init_data) {
+			.name = "gcc_camss_csi1rdi_clk",
+			.parent_hws = (const struct clk_hw*[]){
+				&csi1_clk_src.clkr.hw,
+			},
+			.num_parents = 1,
+			.ops = &clk_branch2_ops,
+			.flags = CLK_SET_RATE_PARENT,
+		}
+	}
+};
+
+static struct clk_branch gcc_camss_csi2rdi_clk = {
+	.halt_reg = 0x3c050,
+	.halt_check = BRANCH_HALT,
+	.clkr = {
+		.enable_reg = 0x3c050,
+		.enable_mask = BIT(0),
+		.hw.init = &(struct clk_init_data) {
+			.name = "gcc_camss_csi2rdi_clk",
+			.parent_hws = (const struct clk_hw*[]){
+				&csi2_clk_src.clkr.hw,
+			},
+			.num_parents = 1,
+			.ops = &clk_branch2_ops,
+			.flags = CLK_SET_RATE_PARENT,
+		}
+	}
+};
+
+static struct clk_branch gcc_camss_csi_vfe0_clk = {
+	.halt_reg = 0x58050,
+	.halt_check = BRANCH_HALT,
+	.clkr = {
+		.enable_reg = 0x58050,
+		.enable_mask = BIT(0),
+		.hw.init = &(struct clk_init_data) {
+			.name = "gcc_camss_csi_vfe0_clk",
+			.parent_hws = (const struct clk_hw*[]){
+				&vfe0_clk_src.clkr.hw,
+			},
+			.num_parents = 1,
+			.ops = &clk_branch2_ops,
+			.flags = CLK_SET_RATE_PARENT,
+		}
+	}
+};
+
+static struct clk_branch gcc_camss_csi_vfe1_clk = {
+	.halt_reg = 0x58074,
+	.halt_check = BRANCH_HALT,
+	.clkr = {
+		.enable_reg = 0x58074,
+		.enable_mask = BIT(0),
+		.hw.init = &(struct clk_init_data) {
+			.name = "gcc_camss_csi_vfe1_clk",
+			.parent_hws = (const struct clk_hw*[]){
+				&vfe1_clk_src.clkr.hw,
+			},
+			.num_parents = 1,
+			.ops = &clk_branch2_ops,
+			.flags = CLK_SET_RATE_PARENT,
+		}
+	}
+};
+
+static struct clk_branch gcc_camss_gp0_clk = {
+	.halt_reg = 0x54018,
+	.halt_check = BRANCH_HALT,
+	.clkr = {
+		.enable_reg = 0x54018,
+		.enable_mask = BIT(0),
+		.hw.init = &(struct clk_init_data) {
+			.name = "gcc_camss_gp0_clk",
+			.parent_hws = (const struct clk_hw*[]){
+				&camss_gp0_clk_src.clkr.hw,
+			},
+			.num_parents = 1,
+			.ops = &clk_branch2_ops,
+			.flags = CLK_SET_RATE_PARENT,
+		}
+	}
+};
+
+static struct clk_branch gcc_camss_gp1_clk = {
+	.halt_reg = 0x55018,
+	.halt_check = BRANCH_HALT,
+	.clkr = {
+		.enable_reg = 0x55018,
+		.enable_mask = BIT(0),
+		.hw.init = &(struct clk_init_data) {
+			.name = "gcc_camss_gp1_clk",
+			.parent_hws = (const struct clk_hw*[]){
+				&camss_gp1_clk_src.clkr.hw,
+			},
+			.num_parents = 1,
+			.ops = &clk_branch2_ops,
+			.flags = CLK_SET_RATE_PARENT,
+		}
+	}
+};
+
+static struct clk_branch gcc_camss_ispif_ahb_clk = {
+	.halt_reg = 0x50004,
+	.halt_check = BRANCH_HALT,
+	.clkr = {
+		.enable_reg = 0x50004,
+		.enable_mask = BIT(0),
+		.hw.init = &(struct clk_init_data) {
+			.name = "gcc_camss_ispif_ahb_clk",
+			.parent_hws = (const struct clk_hw*[]){
+				&camss_top_ahb_clk_src.clkr.hw,
+			},
+			.num_parents = 1,
+			.ops = &clk_branch2_ops,
+			.flags = CLK_SET_RATE_PARENT,
+		}
+	}
+};
+
+static struct clk_branch gcc_camss_jpeg0_clk = {
+	.halt_reg = 0x57020,
+	.halt_check = BRANCH_HALT,
+	.clkr = {
+		.enable_reg = 0x57020,
+		.enable_mask = BIT(0),
+		.hw.init = &(struct clk_init_data) {
+			.name = "gcc_camss_jpeg0_clk",
+			.parent_hws = (const struct clk_hw*[]){
+				&jpeg0_clk_src.clkr.hw,
+			},
+			.num_parents = 1,
+			.ops = &clk_branch2_ops,
+			.flags = CLK_SET_RATE_PARENT,
+		}
+	}
+};
+
+static struct clk_branch gcc_camss_jpeg_ahb_clk = {
+	.halt_reg = 0x57024,
+	.halt_check = BRANCH_HALT,
+	.clkr = {
+		.enable_reg = 0x57024,
+		.enable_mask = BIT(0),
+		.hw.init = &(struct clk_init_data) {
+			.name = "gcc_camss_jpeg_ahb_clk",
+			.parent_hws = (const struct clk_hw*[]){
+				&camss_top_ahb_clk_src.clkr.hw,
+			},
+			.num_parents = 1,
+			.ops = &clk_branch2_ops,
+			.flags = CLK_SET_RATE_PARENT,
+		}
+	}
+};
+
+static struct clk_branch gcc_camss_jpeg_axi_clk = {
+	.halt_reg = 0x57028,
+	.halt_check = BRANCH_HALT,
+	.clkr = {
+		.enable_reg = 0x57028,
+		.enable_mask = BIT(0),
+		.hw.init = &(struct clk_init_data) {
+			.name = "gcc_camss_jpeg_axi_clk",
+			.ops = &clk_branch2_ops,
+		}
+	}
+};
+
+static struct clk_branch gcc_camss_mclk0_clk = {
+	.halt_reg = 0x52018,
+	.halt_check = BRANCH_HALT,
+	.clkr = {
+		.enable_reg = 0x52018,
+		.enable_mask = BIT(0),
+		.hw.init = &(struct clk_init_data) {
+			.name = "gcc_camss_mclk0_clk",
+			.parent_hws = (const struct clk_hw*[]){
+				&mclk0_clk_src.clkr.hw,
+			},
+			.num_parents = 1,
+			.ops = &clk_branch2_ops,
+			.flags = CLK_SET_RATE_PARENT,
+		}
+	}
+};
+
+static struct clk_branch gcc_camss_mclk1_clk = {
+	.halt_reg = 0x53018,
+	.halt_check = BRANCH_HALT,
+	.clkr = {
+		.enable_reg = 0x53018,
+		.enable_mask = BIT(0),
+		.hw.init = &(struct clk_init_data) {
+			.name = "gcc_camss_mclk1_clk",
+			.parent_hws = (const struct clk_hw*[]){
+				&mclk1_clk_src.clkr.hw,
+			},
+			.num_parents = 1,
+			.ops = &clk_branch2_ops,
+			.flags = CLK_SET_RATE_PARENT,
+		}
+	}
+};
+
+static struct clk_branch gcc_camss_mclk2_clk = {
+	.halt_reg = 0x5c018,
+	.halt_check = BRANCH_HALT,
+	.clkr = {
+		.enable_reg = 0x5c018,
+		.enable_mask = BIT(0),
+		.hw.init = &(struct clk_init_data) {
+			.name = "gcc_camss_mclk2_clk",
+			.parent_hws = (const struct clk_hw*[]){
+				&mclk2_clk_src.clkr.hw,
+			},
+			.num_parents = 1,
+			.ops = &clk_branch2_ops,
+			.flags = CLK_SET_RATE_PARENT,
+		}
+	}
+};
+
+static struct clk_branch gcc_camss_mclk3_clk = {
+	.halt_reg = 0x5e018,
+	.halt_check = BRANCH_HALT,
+	.clkr = {
+		.enable_reg = 0x5e018,
+		.enable_mask = BIT(0),
+		.hw.init = &(struct clk_init_data) {
+			.name = "gcc_camss_mclk3_clk",
+			.parent_hws = (const struct clk_hw*[]){
+				&mclk3_clk_src.clkr.hw,
+			},
+			.num_parents = 1,
+			.ops = &clk_branch2_ops,
+			.flags = CLK_SET_RATE_PARENT,
+		}
+	}
+};
+
+static struct clk_branch gcc_camss_micro_ahb_clk = {
+	.halt_reg = 0x5600c,
+	.halt_check = BRANCH_HALT,
+	.clkr = {
+		.enable_reg = 0x5600c,
+		.enable_mask = BIT(0),
+		.hw.init = &(struct clk_init_data) {
+			.name = "gcc_camss_micro_ahb_clk",
+			.parent_hws = (const struct clk_hw*[]){
+				&camss_top_ahb_clk_src.clkr.hw,
+			},
+			.num_parents = 1,
+			.ops = &clk_branch2_ops,
+			.flags = CLK_SET_RATE_PARENT,
+		}
+	}
+};
+
+static struct clk_branch gcc_camss_top_ahb_clk = {
+	.halt_reg = 0x5a014,
+	.halt_check = BRANCH_HALT,
+	.clkr = {
+		.enable_reg = 0x5a014,
+		.enable_mask = BIT(0),
+		.hw.init = &(struct clk_init_data) {
+			.name = "gcc_camss_top_ahb_clk",
+			.parent_hws = (const struct clk_hw*[]){
+				&camss_top_ahb_clk_src.clkr.hw,
+			},
+			.num_parents = 1,
+			.ops = &clk_branch2_ops,
+			.flags = CLK_SET_RATE_PARENT,
+		}
+	}
+};
+
+static struct clk_branch gcc_camss_vfe0_ahb_clk = {
+	.halt_reg = 0x58044,
+	.halt_check = BRANCH_HALT,
+	.clkr = {
+		.enable_reg = 0x58044,
+		.enable_mask = BIT(0),
+		.hw.init = &(struct clk_init_data) {
+			.name = "gcc_camss_vfe0_ahb_clk",
+			.parent_hws = (const struct clk_hw*[]){
+				&camss_top_ahb_clk_src.clkr.hw,
+			},
+			.num_parents = 1,
+			.ops = &clk_branch2_ops,
+			.flags = CLK_SET_RATE_PARENT,
+		}
+	}
+};
+
+static struct clk_branch gcc_camss_vfe0_axi_clk = {
+	.halt_reg = 0x58048,
+	.halt_check = BRANCH_HALT,
+	.clkr = {
+		.enable_reg = 0x58048,
+		.enable_mask = BIT(0),
+		.hw.init = &(struct clk_init_data) {
+			.name = "gcc_camss_vfe0_axi_clk",
+			.ops = &clk_branch2_ops,
+		}
+	}
+};
+
+static struct clk_branch gcc_camss_vfe0_clk = {
+	.halt_reg = 0x58038,
+	.halt_check = BRANCH_HALT,
+	.clkr = {
+		.enable_reg = 0x58038,
+		.enable_mask = BIT(0),
+		.hw.init = &(struct clk_init_data) {
+			.name = "gcc_camss_vfe0_clk",
+			.parent_hws = (const struct clk_hw*[]){
+				&vfe0_clk_src.clkr.hw,
+			},
+			.num_parents = 1,
+			.ops = &clk_branch2_ops,
+			.flags = CLK_SET_RATE_PARENT,
+		}
+	}
+};
+
+static struct clk_branch gcc_camss_vfe1_ahb_clk = {
+	.halt_reg = 0x58060,
+	.halt_check = BRANCH_HALT,
+	.clkr = {
+		.enable_reg = 0x58060,
+		.enable_mask = BIT(0),
+		.hw.init = &(struct clk_init_data) {
+			.name = "gcc_camss_vfe1_ahb_clk",
+			.parent_hws = (const struct clk_hw*[]){
+				&camss_top_ahb_clk_src.clkr.hw,
+			},
+			.num_parents = 1,
+			.ops = &clk_branch2_ops,
+			.flags = CLK_SET_RATE_PARENT,
+		}
+	}
+};
+
+static struct clk_branch gcc_camss_vfe1_axi_clk = {
+	.halt_reg = 0x58068,
+	.halt_check = BRANCH_HALT,
+	.clkr = {
+		.enable_reg = 0x58068,
+		.enable_mask = BIT(0),
+		.hw.init = &(struct clk_init_data) {
+			.name = "gcc_camss_vfe1_axi_clk",
+			.ops = &clk_branch2_ops,
+		}
+	}
+};
+
+static struct clk_branch gcc_camss_vfe1_clk = {
+	.halt_reg = 0x5805c,
+	.halt_check = BRANCH_HALT,
+	.clkr = {
+		.enable_reg = 0x5805c,
+		.enable_mask = BIT(0),
+		.hw.init = &(struct clk_init_data) {
+			.name = "gcc_camss_vfe1_clk",
+			.parent_hws = (const struct clk_hw*[]){
+				&vfe1_clk_src.clkr.hw,
+			},
+			.num_parents = 1,
+			.ops = &clk_branch2_ops,
+			.flags = CLK_SET_RATE_PARENT,
+		}
+	}
+};
+
+static struct clk_branch gcc_cpp_tbu_clk = {
+	.halt_reg = 0x12040,
+	.halt_check = BRANCH_HALT_VOTED,
+	.clkr = {
+		.enable_reg = 0x4500c,
+		.enable_mask = BIT(14),
+		.hw.init = &(struct clk_init_data) {
+			.name = "gcc_cpp_tbu_clk",
+			.ops = &clk_branch2_ops,
+		}
+	}
+};
+
+static struct clk_branch gcc_crypto_ahb_clk = {
+	.halt_reg = 0x16024,
+	.halt_check = BRANCH_HALT_VOTED,
+	.clkr = {
+		.enable_reg = 0x45004,
+		.enable_mask = BIT(0),
+		.hw.init = &(struct clk_init_data) {
+			.name = "gcc_crypto_ahb_clk",
+			.ops = &clk_branch2_ops,
+		}
+	}
+};
+
+static struct clk_branch gcc_crypto_axi_clk = {
+	.halt_reg = 0x16020,
+	.halt_check = BRANCH_HALT_VOTED,
+	.clkr = {
+		.enable_reg = 0x45004,
+		.enable_mask = BIT(1),
+		.hw.init = &(struct clk_init_data) {
+			.name = "gcc_crypto_axi_clk",
+			.ops = &clk_branch2_ops,
+		}
+	}
+};
+
+static struct clk_branch gcc_crypto_clk = {
+	.halt_reg = 0x1601c,
+	.halt_check = BRANCH_HALT_VOTED,
+	.clkr = {
+		.enable_reg = 0x45004,
+		.enable_mask = BIT(2),
+		.hw.init = &(struct clk_init_data) {
+			.name = "gcc_crypto_clk",
+			.parent_hws = (const struct clk_hw*[]){
+				&crypto_clk_src.clkr.hw,
+			},
+			.num_parents = 1,
+			.ops = &clk_branch2_ops,
+			.flags = CLK_SET_RATE_PARENT,
+		}
+	}
+};
+
+static struct clk_branch gcc_dcc_clk = {
+	.halt_reg = 0x77004,
+	.halt_check = BRANCH_HALT,
+	.clkr = {
+		.enable_reg = 0x77004,
+		.enable_mask = BIT(0),
+		.hw.init = &(struct clk_init_data) {
+			.name = "gcc_dcc_clk",
+			.ops = &clk_branch2_ops,
+		}
+	}
+};
+
+static struct clk_branch gcc_gp1_clk = {
+	.halt_reg = 0x08000,
+	.halt_check = BRANCH_HALT,
+	.clkr = {
+		.enable_reg = 0x08000,
+		.enable_mask = BIT(0),
+		.hw.init = &(struct clk_init_data) {
+			.name = "gcc_gp1_clk",
+			.parent_hws = (const struct clk_hw*[]){
+				&gp1_clk_src.clkr.hw,
+			},
+			.num_parents = 1,
+			.ops = &clk_branch2_ops,
+			.flags = CLK_SET_RATE_PARENT,
+		}
+	}
+};
+
+static struct clk_branch gcc_gp2_clk = {
+	.halt_reg = 0x09000,
+	.halt_check = BRANCH_HALT,
+	.clkr = {
+		.enable_reg = 0x09000,
+		.enable_mask = BIT(0),
+		.hw.init = &(struct clk_init_data) {
+			.name = "gcc_gp2_clk",
+			.parent_hws = (const struct clk_hw*[]){
+				&gp2_clk_src.clkr.hw,
+			},
+			.num_parents = 1,
+			.ops = &clk_branch2_ops,
+			.flags = CLK_SET_RATE_PARENT,
+		}
+	}
+};
+
+static struct clk_branch gcc_gp3_clk = {
+	.halt_reg = 0x0a000,
+	.halt_check = BRANCH_HALT,
+	.clkr = {
+		.enable_reg = 0x0a000,
+		.enable_mask = BIT(0),
+		.hw.init = &(struct clk_init_data) {
+			.name = "gcc_gp3_clk",
+			.parent_hws = (const struct clk_hw*[]){
+				&gp3_clk_src.clkr.hw,
+			},
+			.num_parents = 1,
+			.ops = &clk_branch2_ops,
+			.flags = CLK_SET_RATE_PARENT,
+		}
+	}
+};
+
+static struct clk_branch gcc_jpeg_tbu_clk = {
+	.halt_reg = 0x12034,
+	.halt_check = BRANCH_HALT_VOTED,
+	.clkr = {
+		.enable_reg = 0x4500c,
+		.enable_mask = BIT(10),
+		.hw.init = &(struct clk_init_data) {
+			.name = "gcc_jpeg_tbu_clk",
+			.ops = &clk_branch2_ops,
+		}
+	}
+};
+
+static struct clk_branch gcc_mdp_tbu_clk = {
+	.halt_reg = 0x1201c,
+	.halt_check = BRANCH_HALT_VOTED,
+	.clkr = {
+		.enable_reg = 0x4500c,
+		.enable_mask = BIT(4),
+		.hw.init = &(struct clk_init_data) {
+			.name = "gcc_mdp_tbu_clk",
+			.ops = &clk_branch2_ops,
+		}
+	}
+};
+
+static struct clk_branch gcc_mdss_ahb_clk = {
+	.halt_reg = 0x4d07c,
+	.halt_check = BRANCH_HALT,
+	.clkr = {
+		.enable_reg = 0x4d07c,
+		.enable_mask = BIT(0),
+		.hw.init = &(struct clk_init_data) {
+			.name = "gcc_mdss_ahb_clk",
+			.ops = &clk_branch2_ops,
+		}
+	}
+};
+
+static struct clk_branch gcc_mdss_axi_clk = {
+	.halt_reg = 0x4d080,
+	.halt_check = BRANCH_HALT,
+	.clkr = {
+		.enable_reg = 0x4d080,
+		.enable_mask = BIT(0),
+		.hw.init = &(struct clk_init_data) {
+			.name = "gcc_mdss_axi_clk",
+			.ops = &clk_branch2_ops,
+		}
+	}
+};
+
+static struct clk_branch gcc_mdss_byte0_clk = {
+	.halt_reg = 0x4d094,
+	.halt_check = BRANCH_HALT,
+	.clkr = {
+		.enable_reg = 0x4d094,
+		.enable_mask = BIT(0),
+		.hw.init = &(struct clk_init_data) {
+			.name = "gcc_mdss_byte0_clk",
+			.parent_hws = (const struct clk_hw*[]){
+				&byte0_clk_src.clkr.hw,
+			},
+			.num_parents = 1,
+			.ops = &clk_branch2_ops,
+			.flags = CLK_SET_RATE_PARENT,
+		}
+	}
+};
+
+static struct clk_branch gcc_mdss_byte1_clk = {
+	.halt_reg = 0x4d0a0,
+	.halt_check = BRANCH_HALT,
+	.clkr = {
+		.enable_reg = 0x4d0a0,
+		.enable_mask = BIT(0),
+		.hw.init = &(struct clk_init_data) {
+			.name = "gcc_mdss_byte1_clk",
+			.parent_hws = (const struct clk_hw*[]){
+				&byte1_clk_src.clkr.hw,
+			},
+			.num_parents = 1,
+			.ops = &clk_branch2_ops,
+			.flags = CLK_SET_RATE_PARENT,
+		}
+	}
+};
+
+static struct clk_branch gcc_mdss_esc0_clk = {
+	.halt_reg = 0x4d098,
+	.halt_check = BRANCH_HALT,
+	.clkr = {
+		.enable_reg = 0x4d098,
+		.enable_mask = BIT(0),
+		.hw.init = &(struct clk_init_data) {
+			.name = "gcc_mdss_esc0_clk",
+			.parent_hws = (const struct clk_hw*[]){
+				&esc0_clk_src.clkr.hw,
+			},
+			.num_parents = 1,
+			.ops = &clk_branch2_ops,
+			.flags = CLK_SET_RATE_PARENT,
+		}
+	}
+};
+
+static struct clk_branch gcc_mdss_esc1_clk = {
+	.halt_reg = 0x4d09c,
+	.halt_check = BRANCH_HALT,
+	.clkr = {
+		.enable_reg = 0x4d09c,
+		.enable_mask = BIT(0),
+		.hw.init = &(struct clk_init_data) {
+			.name = "gcc_mdss_esc1_clk",
+			.parent_hws = (const struct clk_hw*[]){
+				&esc1_clk_src.clkr.hw,
+			},
+			.num_parents = 1,
+			.ops = &clk_branch2_ops,
+			.flags = CLK_SET_RATE_PARENT,
+		}
+	}
+};
+
+static struct clk_branch gcc_mdss_mdp_clk = {
+	.halt_reg = 0x4d088,
+	.halt_check = BRANCH_HALT,
+	.clkr = {
+		.enable_reg = 0x4d088,
+		.enable_mask = BIT(0),
+		.hw.init = &(struct clk_init_data) {
+			.name = "gcc_mdss_mdp_clk",
+			.parent_hws = (const struct clk_hw*[]){
+				&mdp_clk_src.clkr.hw,
+			},
+			.num_parents = 1,
+			.ops = &clk_branch2_ops,
+			.flags = CLK_SET_RATE_PARENT,
+		}
+	}
+};
+
+static struct clk_branch gcc_mdss_pclk0_clk = {
+	.halt_reg = 0x4d084,
+	.halt_check = BRANCH_HALT,
+	.clkr = {
+		.enable_reg = 0x4d084,
+		.enable_mask = BIT(0),
+		.hw.init = &(struct clk_init_data) {
+			.name = "gcc_mdss_pclk0_clk",
+			.parent_hws = (const struct clk_hw*[]){
+				&pclk0_clk_src.clkr.hw,
+			},
+			.num_parents = 1,
+			.ops = &clk_branch2_ops,
+			.flags = CLK_SET_RATE_PARENT,
+		}
+	}
+};
+
+static struct clk_branch gcc_mdss_pclk1_clk = {
+	.halt_reg = 0x4d0a4,
+	.halt_check = BRANCH_HALT,
+	.clkr = {
+		.enable_reg = 0x4d0a4,
+		.enable_mask = BIT(0),
+		.hw.init = &(struct clk_init_data) {
+			.name = "gcc_mdss_pclk1_clk",
+			.parent_hws = (const struct clk_hw*[]){
+				&pclk1_clk_src.clkr.hw,
+			},
+			.num_parents = 1,
+			.ops = &clk_branch2_ops,
+			.flags = CLK_SET_RATE_PARENT,
+		}
+	}
+};
+
+static struct clk_branch gcc_mdss_vsync_clk = {
+	.halt_reg = 0x4d090,
+	.halt_check = BRANCH_HALT,
+	.clkr = {
+		.enable_reg = 0x4d090,
+		.enable_mask = BIT(0),
+		.hw.init = &(struct clk_init_data) {
+			.name = "gcc_mdss_vsync_clk",
+			.parent_hws = (const struct clk_hw*[]){
+				&vsync_clk_src.clkr.hw,
+			},
+			.num_parents = 1,
+			.ops = &clk_branch2_ops,
+			.flags = CLK_SET_RATE_PARENT,
+		}
+	}
+};
+
+static struct clk_branch gcc_mss_cfg_ahb_clk = {
+	.halt_reg = 0x49000,
+	.halt_check = BRANCH_HALT,
+	.clkr = {
+		.enable_reg = 0x49000,
+		.enable_mask = BIT(0),
+		.hw.init = &(struct clk_init_data) {
+			.name = "gcc_mss_cfg_ahb_clk",
+			.ops = &clk_branch2_ops,
+		}
+	}
+};
+
+static struct clk_branch gcc_mss_q6_bimc_axi_clk = {
+	.halt_reg = 0x49004,
+	.halt_check = BRANCH_HALT,
+	.clkr = {
+		.enable_reg = 0x49004,
+		.enable_mask = BIT(0),
+		.hw.init = &(struct clk_init_data) {
+			.name = "gcc_mss_q6_bimc_axi_clk",
+			.ops = &clk_branch2_ops,
+		}
+	}
+};
+
+static struct clk_branch gcc_oxili_ahb_clk = {
+	.halt_reg = 0x59028,
+	.halt_check = BRANCH_HALT,
+	.clkr = {
+		.enable_reg = 0x59028,
+		.enable_mask = BIT(0),
+		.hw.init = &(struct clk_init_data) {
+			.name = "gcc_oxili_ahb_clk",
+			.ops = &clk_branch2_ops,
+		}
+	}
+};
+
+static struct clk_branch gcc_oxili_aon_clk = {
+	.halt_reg = 0x59044,
+	.halt_check = BRANCH_HALT,
+	.clkr = {
+		.enable_reg = 0x59044,
+		.enable_mask = BIT(0),
+		.hw.init = &(struct clk_init_data) {
+			.name = "gcc_oxili_aon_clk",
+			.parent_hws = (const struct clk_hw*[]){
+				&gfx3d_clk_src.clkr.hw,
+			},
+			.num_parents = 1,
+			.ops = &clk_branch2_ops,
+		}
+	}
+};
+
+static struct clk_branch gcc_oxili_gfx3d_clk = {
+	.halt_reg = 0x59020,
+	.halt_check = BRANCH_HALT,
+	.clkr = {
+		.enable_reg = 0x59020,
+		.enable_mask = BIT(0),
+		.hw.init = &(struct clk_init_data) {
+			.name = "gcc_oxili_gfx3d_clk",
+			.parent_hws = (const struct clk_hw*[]){
+				&gfx3d_clk_src.clkr.hw,
+			},
+			.num_parents = 1,
+			.ops = &clk_branch2_ops,
+			.flags = CLK_SET_RATE_PARENT,
+		}
+	}
+};
+
+static struct clk_branch gcc_oxili_timer_clk = {
+	.halt_reg = 0x59040,
+	.halt_check = BRANCH_HALT,
+	.clkr = {
+		.enable_reg = 0x59040,
+		.enable_mask = BIT(0),
+		.hw.init = &(struct clk_init_data) {
+			.name = "gcc_oxili_timer_clk",
+			.ops = &clk_branch2_ops,
+		}
+	}
+};
+
+static struct clk_branch gcc_pcnoc_usb3_axi_clk = {
+	.halt_reg = 0x3f038,
+	.halt_check = BRANCH_HALT,
+	.clkr = {
+		.enable_reg = 0x3f038,
+		.enable_mask = BIT(0),
+		.hw.init = &(struct clk_init_data) {
+			.name = "gcc_pcnoc_usb3_axi_clk",
+			.parent_hws = (const struct clk_hw*[]){
+				&usb30_master_clk_src.clkr.hw,
+			},
+			.num_parents = 1,
+			.ops = &clk_branch2_ops,
+			.flags = CLK_SET_RATE_PARENT,
+		}
+	}
+};
+
+static struct clk_branch gcc_pdm2_clk = {
+	.halt_reg = 0x4400c,
+	.halt_check = BRANCH_HALT,
+	.clkr = {
+		.enable_reg = 0x4400c,
+		.enable_mask = BIT(0),
+		.hw.init = &(struct clk_init_data) {
+			.name = "gcc_pdm2_clk",
+			.parent_hws = (const struct clk_hw*[]){
+				&pdm2_clk_src.clkr.hw,
+			},
+			.num_parents = 1,
+			.ops = &clk_branch2_ops,
+			.flags = CLK_SET_RATE_PARENT,
+		}
+	}
+};
+
+static struct clk_branch gcc_pdm_ahb_clk = {
+	.halt_reg = 0x44004,
+	.halt_check = BRANCH_HALT,
+	.clkr = {
+		.enable_reg = 0x44004,
+		.enable_mask = BIT(0),
+		.hw.init = &(struct clk_init_data) {
+			.name = "gcc_pdm_ahb_clk",
+			.ops = &clk_branch2_ops,
+		}
+	}
+};
+
+static struct clk_branch gcc_prng_ahb_clk = {
+	.halt_reg = 0x13004,
+	.halt_check = BRANCH_HALT_VOTED,
+	.clkr = {
+		.enable_reg = 0x45004,
+		.enable_mask = BIT(8),
+		.hw.init = &(struct clk_init_data) {
+			.name = "gcc_prng_ahb_clk",
+			.ops = &clk_branch2_ops,
+		}
+	}
+};
+
+static struct clk_branch gcc_qdss_dap_clk = {
+	.halt_reg = 0x29084,
+	.halt_check = BRANCH_HALT_VOTED,
+	.clkr = {
+		.enable_reg = 0x45004,
+		.enable_mask = BIT(11),
+		.hw.init = &(struct clk_init_data) {
+			.name = "gcc_qdss_dap_clk",
+			.ops = &clk_branch2_ops,
+		}
+	}
+};
+
+static struct clk_branch gcc_qusb_ref_clk = {
+	.halt_reg = 0,
+	.halt_check = BRANCH_HALT_SKIP,
+	.clkr = {
+		.enable_reg = 0x41030,
+		.enable_mask = BIT(0),
+		.hw.init = &(struct clk_init_data) {
+			.name = "gcc_qusb_ref_clk",
+			.ops = &clk_branch2_ops,
+		}
+	}
+};
+
+static struct clk_branch gcc_rbcpr_gfx_clk = {
+	.halt_reg = 0x3a004,
+	.halt_check = BRANCH_HALT,
+	.clkr = {
+		.enable_reg = 0x3a004,
+		.enable_mask = BIT(0),
+		.hw.init = &(struct clk_init_data) {
+			.name = "gcc_rbcpr_gfx_clk",
+			.parent_hws = (const struct clk_hw*[]){
+				&rbcpr_gfx_clk_src.clkr.hw,
+			},
+			.num_parents = 1,
+			.ops = &clk_branch2_ops,
+			.flags = CLK_SET_RATE_PARENT,
+		}
+	}
+};
+
+static struct clk_branch gcc_sdcc1_ice_core_clk = {
+	.halt_reg = 0x5d014,
+	.halt_check = BRANCH_HALT,
+	.clkr = {
+		.enable_reg = 0x5d014,
+		.enable_mask = BIT(0),
+		.hw.init = &(struct clk_init_data) {
+			.name = "gcc_sdcc1_ice_core_clk",
+			.parent_hws = (const struct clk_hw*[]){
+				&sdcc1_ice_core_clk_src.clkr.hw,
+			},
+			.num_parents = 1,
+			.ops = &clk_branch2_ops,
+			.flags = CLK_SET_RATE_PARENT,
+		}
+	}
+};
+
+static struct clk_branch gcc_sdcc1_ahb_clk = {
+	.halt_reg = 0x4201c,
+	.halt_check = BRANCH_HALT,
+	.clkr = {
+		.enable_reg = 0x4201c,
+		.enable_mask = BIT(0),
+		.hw.init = &(struct clk_init_data) {
+			.name = "gcc_sdcc1_ahb_clk",
+			.ops = &clk_branch2_ops,
+		}
+	}
+};
+
+static struct clk_branch gcc_sdcc2_ahb_clk = {
+	.halt_reg = 0x4301c,
+	.halt_check = BRANCH_HALT,
+	.clkr = {
+		.enable_reg = 0x4301c,
+		.enable_mask = BIT(0),
+		.hw.init = &(struct clk_init_data) {
+			.name = "gcc_sdcc2_ahb_clk",
+			.ops = &clk_branch2_ops,
+		}
+	}
+};
+
+static struct clk_branch gcc_sdcc1_apps_clk = {
+	.halt_reg = 0x42018,
+	.halt_check = BRANCH_HALT,
+	.clkr = {
+		.enable_reg = 0x42018,
+		.enable_mask = BIT(0),
+		.hw.init = &(struct clk_init_data) {
+			.name = "gcc_sdcc1_apps_clk",
+			.parent_hws = (const struct clk_hw*[]){
+				&sdcc1_apps_clk_src.clkr.hw,
+			},
+			.num_parents = 1,
+			.ops = &clk_branch2_ops,
+			.flags = CLK_SET_RATE_PARENT,
+		}
+	}
+};
+
+static struct clk_branch gcc_sdcc2_apps_clk = {
+	.halt_reg = 0x43018,
+	.halt_check = BRANCH_HALT,
+	.clkr = {
+		.enable_reg = 0x43018,
+		.enable_mask = BIT(0),
+		.hw.init = &(struct clk_init_data) {
+			.name = "gcc_sdcc2_apps_clk",
+			.parent_hws = (const struct clk_hw*[]){
+				&sdcc2_apps_clk_src.clkr.hw,
+			},
+			.num_parents = 1,
+			.ops = &clk_branch2_ops,
+			.flags = CLK_SET_RATE_PARENT,
+		}
+	}
+};
+
+static struct clk_branch gcc_smmu_cfg_clk = {
+	.halt_reg = 0x12038,
+	.halt_check = BRANCH_HALT_VOTED,
+	.clkr = {
+		.enable_reg = 0x4500c,
+		.enable_mask = BIT(12),
+		.hw.init = &(struct clk_init_data) {
+			.name = "gcc_smmu_cfg_clk",
+			.ops = &clk_branch2_ops,
+		}
+	}
+};
+
+static struct clk_branch gcc_usb30_master_clk = {
+	.halt_reg = 0x3f000,
+	.halt_check = BRANCH_HALT,
+	.clkr = {
+		.enable_reg = 0x3f000,
+		.enable_mask = BIT(0),
+		.hw.init = &(struct clk_init_data) {
+			.name = "gcc_usb30_master_clk",
+			.parent_hws = (const struct clk_hw*[]){
+				&usb30_master_clk_src.clkr.hw,
+			},
+			.num_parents = 1,
+			.ops = &clk_branch2_ops,
+			.flags = CLK_SET_RATE_PARENT,
+		}
+	}
+};
+
+static struct clk_branch gcc_usb30_mock_utmi_clk = {
+	.halt_reg = 0x3f008,
+	.halt_check = BRANCH_HALT,
+	.clkr = {
+		.enable_reg = 0x3f008,
+		.enable_mask = BIT(0),
+		.hw.init = &(struct clk_init_data) {
+			.name = "gcc_usb30_mock_utmi_clk",
+			.parent_hws = (const struct clk_hw*[]){
+				&usb30_mock_utmi_clk_src.clkr.hw,
+			},
+			.num_parents = 1,
+			.ops = &clk_branch2_ops,
+			.flags = CLK_SET_RATE_PARENT,
+		}
+	}
+};
+
+static struct clk_branch gcc_usb30_sleep_clk = {
+	.halt_reg = 0x3f004,
+	.halt_check = BRANCH_HALT,
+	.clkr = {
+		.enable_reg = 0x3f004,
+		.enable_mask = BIT(0),
+		.hw.init = &(struct clk_init_data) {
+			.name = "gcc_usb30_sleep_clk",
+			.ops = &clk_branch2_ops,
+		}
+	}
+};
+
+static struct clk_branch gcc_usb3_aux_clk = {
+	.halt_reg = 0x3f044,
+	.halt_check = BRANCH_HALT,
+	.clkr = {
+		.enable_reg = 0x3f044,
+		.enable_mask = BIT(0),
+		.hw.init = &(struct clk_init_data) {
+			.name = "gcc_usb3_aux_clk",
+			.parent_hws = (const struct clk_hw*[]){
+				&usb3_aux_clk_src.clkr.hw,
+			},
+			.num_parents = 1,
+			.ops = &clk_branch2_ops,
+			.flags = CLK_SET_RATE_PARENT,
+		}
+	}
+};
+
+static struct clk_branch gcc_usb3_pipe_clk = {
+	.halt_reg = 0,
+	.halt_check = BRANCH_HALT_DELAY,
+	.clkr = {
+		.enable_reg = 0x3f040,
+		.enable_mask = BIT(0),
+		.hw.init = &(struct clk_init_data) {
+			.name = "gcc_usb3_pipe_clk",
+			.ops = &clk_branch2_ops,
+		}
+	}
+};
+
+static struct clk_branch gcc_usb_phy_cfg_ahb_clk = {
+	.halt_reg = 0x3f080,
+	.halt_check = BRANCH_VOTED,
+	.clkr = {
+		.enable_reg = 0x3f080,
+		.enable_mask = BIT(0),
+		.hw.init = &(struct clk_init_data) {
+			.name = "gcc_usb_phy_cfg_ahb_clk",
+			.ops = &clk_branch2_ops,
+		}
+	}
+};
+
+static struct clk_branch gcc_usb_ss_ref_clk = {
+	.halt_reg = 0,
+	.halt_check = BRANCH_HALT_SKIP,
+	.clkr = {
+		.enable_reg = 0x3f07c,
+		.enable_mask = BIT(0),
+		.hw.init = &(struct clk_init_data) {
+			.name = "gcc_usb_ss_ref_clk",
+			.ops = &clk_branch2_ops,
+		}
+	}
+};
+
+static struct clk_branch gcc_venus0_ahb_clk = {
+	.halt_reg = 0x4c020,
+	.halt_check = BRANCH_HALT,
+	.clkr = {
+		.enable_reg = 0x4c020,
+		.enable_mask = BIT(0),
+		.hw.init = &(struct clk_init_data) {
+			.name = "gcc_venus0_ahb_clk",
+			.ops = &clk_branch2_ops,
+		}
+	}
+};
+
+static struct clk_branch gcc_venus0_axi_clk = {
+	.halt_reg = 0x4c024,
+	.halt_check = BRANCH_HALT,
+	.clkr = {
+		.enable_reg = 0x4c024,
+		.enable_mask = BIT(0),
+		.hw.init = &(struct clk_init_data) {
+			.name = "gcc_venus0_axi_clk",
+			.ops = &clk_branch2_ops,
+		}
+	}
+};
+
+static struct clk_branch gcc_venus0_core0_vcodec0_clk = {
+	.halt_reg = 0x4c02c,
+	.halt_check = BRANCH_HALT,
+	.clkr = {
+		.enable_reg = 0x4c02c,
+		.enable_mask = BIT(0),
+		.hw.init = &(struct clk_init_data) {
+			.name = "gcc_venus0_core0_vcodec0_clk",
+			.parent_hws = (const struct clk_hw*[]){
+				&vcodec0_clk_src.clkr.hw,
+			},
+			.num_parents = 1,
+			.ops = &clk_branch2_ops,
+			.flags = CLK_SET_RATE_PARENT,
+		}
+	}
+};
+
+static struct clk_branch gcc_venus0_vcodec0_clk = {
+	.halt_reg = 0x4c01c,
+	.halt_check = BRANCH_HALT,
+	.clkr = {
+		.enable_reg = 0x4c01c,
+		.enable_mask = BIT(0),
+		.hw.init = &(struct clk_init_data) {
+			.name = "gcc_venus0_vcodec0_clk",
+			.parent_hws = (const struct clk_hw*[]){
+				&vcodec0_clk_src.clkr.hw,
+			},
+			.num_parents = 1,
+			.ops = &clk_branch2_ops,
+			.flags = CLK_SET_RATE_PARENT,
+		}
+	}
+};
+
+static struct clk_branch gcc_venus_tbu_clk = {
+	.halt_reg = 0x12014,
+	.halt_check = BRANCH_HALT_VOTED,
+	.clkr = {
+		.enable_reg = 0x4500c,
+		.enable_mask = BIT(5),
+		.hw.init = &(struct clk_init_data) {
+			.name = "gcc_venus_tbu_clk",
+			.ops = &clk_branch2_ops,
+		}
+	}
+};
+
+static struct clk_branch gcc_vfe1_tbu_clk = {
+	.halt_reg = 0x12090,
+	.halt_check = BRANCH_HALT_VOTED,
+	.clkr = {
+		.enable_reg = 0x4500c,
+		.enable_mask = BIT(17),
+		.hw.init = &(struct clk_init_data) {
+			.name = "gcc_vfe1_tbu_clk",
+			.ops = &clk_branch2_ops,
+		}
+	}
+};
+
+static struct clk_branch gcc_vfe_tbu_clk = {
+	.halt_reg = 0x1203c,
+	.halt_check = BRANCH_HALT_VOTED,
+	.clkr = {
+		.enable_reg = 0x4500c,
+		.enable_mask = BIT(9),
+		.hw.init = &(struct clk_init_data) {
+			.name = "gcc_vfe_tbu_clk",
+			.ops = &clk_branch2_ops,
+		}
+	}
+};
+
+static struct gdsc usb30_gdsc = {
+	.gdscr = 0x3f078,
+	.pd = {
+		.name = "usb30_gdsc",
+	},
+	.pwrsts = PWRSTS_OFF_ON,
+	/*
+	 * FIXME: dwc3 usb gadget cannot resume after GDSC power off
+	 * dwc3 7000000.dwc3: failed to enable ep0out
+	 */
+	.flags = ALWAYS_ON,
+};
+
+static struct gdsc venus_gdsc = {
+	.gdscr = 0x4c018,
+	.cxcs = (unsigned int []){ 0x4c024, 0x4c01c },
+	.cxc_count = 2,
+	.pd = {
+		.name = "venus_gdsc",
+	},
+	.pwrsts = PWRSTS_OFF_ON,
+};
+
+static struct gdsc venus_core0_gdsc = {
+	.gdscr = 0x4c028,
+	.cxcs = (unsigned int []){ 0x4c02c },
+	.cxc_count = 1,
+	.pd = {
+		.name = "venus_core0",
+	},
+	.flags = HW_CTRL,
+	.pwrsts = PWRSTS_OFF_ON,
+};
+
+static struct gdsc mdss_gdsc = {
+	.gdscr = 0x4d078,
+	.cxcs = (unsigned int []){ 0x4d080, 0x4d088 },
+	.cxc_count = 2,
+	.pd = {
+		.name = "mdss_gdsc",
+	},
+	.pwrsts = PWRSTS_OFF_ON,
+};
+
+static struct gdsc jpeg_gdsc = {
+	.gdscr = 0x5701c,
+	.cxcs = (unsigned int []){ 0x57020, 0x57028 },
+	.cxc_count = 2,
+	.pd = {
+		.name = "jpeg_gdsc",
+	},
+	.pwrsts = PWRSTS_OFF_ON,
+};
+
+static struct gdsc vfe0_gdsc = {
+	.gdscr = 0x58034,
+	.cxcs = (unsigned int []){ 0x58038, 0x58048, 0x5600c, 0x58050 },
+	.cxc_count = 4,
+	.pd = {
+		.name = "vfe0_gdsc",
+	},
+	.pwrsts = PWRSTS_OFF_ON,
+};
+
+static struct gdsc vfe1_gdsc = {
+	.gdscr = 0x5806c,
+	.cxcs = (unsigned int []){ 0x5805c, 0x58068, 0x5600c, 0x58074 },
+	.cxc_count = 4,
+	.pd = {
+		.name = "vfe1_gdsc",
+	},
+	.pwrsts = PWRSTS_OFF_ON,
+};
+
+static struct gdsc oxili_gx_gdsc = {
+	.gdscr = 0x5901c,
+	.clamp_io_ctrl = 0x5b00c,
+	.cxcs = (unsigned int []){ 0x59000, 0x59024 },
+	.cxc_count = 2,
+	.pd = {
+		.name = "oxili_gx_gdsc",
+	},
+	.pwrsts = PWRSTS_OFF_ON,
+	.flags = CLAMP_IO,
+};
+
+static struct gdsc oxili_cx_gdsc = {
+	.gdscr = 0x5904c,
+	.cxcs = (unsigned int []){ 0x59020 },
+	.cxc_count = 1,
+	.pd = {
+		.name = "oxili_cx_gdsc",
+	},
+	.pwrsts = PWRSTS_OFF_ON,
+};
+
+static struct gdsc cpp_gdsc = {
+	.gdscr = 0x58078,
+	.cxcs = (unsigned int []){ 0x5803c, 0x58064 },
+	.cxc_count = 2,
+	.pd = {
+		.name = "cpp_gdsc",
+	},
+	.flags = ALWAYS_ON,
+	.pwrsts = PWRSTS_OFF_ON,
+};
+
+static struct clk_hw *gcc_msm8953_hws[] = {
+	&gpll0_early_div.hw,
+	&gpll6_early_div.hw,
+};
+
+static struct clk_regmap *gcc_msm8953_clocks[] = {
+	[GPLL0] = &gpll0.clkr,
+	[GPLL0_EARLY] = &gpll0_early.clkr,
+	[GPLL2] = &gpll2.clkr,
+	[GPLL2_EARLY] = &gpll2_early.clkr,
+	[GPLL3] = &gpll3.clkr,
+	[GPLL3_EARLY] = &gpll3_early.clkr,
+	[GPLL4] = &gpll4.clkr,
+	[GPLL4_EARLY] = &gpll4_early.clkr,
+	[GPLL6] = &gpll6.clkr,
+	[GPLL6_EARLY] = &gpll6_early.clkr,
+	[GCC_APSS_AHB_CLK] = &gcc_apss_ahb_clk.clkr,
+	[GCC_APSS_AXI_CLK] = &gcc_apss_axi_clk.clkr,
+	[GCC_BLSP1_AHB_CLK] = &gcc_blsp1_ahb_clk.clkr,
+	[GCC_BLSP2_AHB_CLK] = &gcc_blsp2_ahb_clk.clkr,
+	[GCC_BOOT_ROM_AHB_CLK] = &gcc_boot_rom_ahb_clk.clkr,
+	[GCC_CRYPTO_AHB_CLK] = &gcc_crypto_ahb_clk.clkr,
+	[GCC_CRYPTO_AXI_CLK] = &gcc_crypto_axi_clk.clkr,
+	[GCC_CRYPTO_CLK] = &gcc_crypto_clk.clkr,
+	[GCC_PRNG_AHB_CLK] = &gcc_prng_ahb_clk.clkr,
+	[GCC_QDSS_DAP_CLK] = &gcc_qdss_dap_clk.clkr,
+	[GCC_APSS_TCU_ASYNC_CLK] = &gcc_apss_tcu_async_clk.clkr,
+	[GCC_CPP_TBU_CLK] = &gcc_cpp_tbu_clk.clkr,
+	[GCC_JPEG_TBU_CLK] = &gcc_jpeg_tbu_clk.clkr,
+	[GCC_MDP_TBU_CLK] = &gcc_mdp_tbu_clk.clkr,
+	[GCC_SMMU_CFG_CLK] = &gcc_smmu_cfg_clk.clkr,
+	[GCC_VENUS_TBU_CLK] = &gcc_venus_tbu_clk.clkr,
+	[GCC_VFE1_TBU_CLK] = &gcc_vfe1_tbu_clk.clkr,
+	[GCC_VFE_TBU_CLK] = &gcc_vfe_tbu_clk.clkr,
+	[CAMSS_TOP_AHB_CLK_SRC] = &camss_top_ahb_clk_src.clkr,
+	[CSI0_CLK_SRC] = &csi0_clk_src.clkr,
+	[APSS_AHB_CLK_SRC] = &apss_ahb_clk_src.clkr,
+	[CSI1_CLK_SRC] = &csi1_clk_src.clkr,
+	[CSI2_CLK_SRC] = &csi2_clk_src.clkr,
+	[VFE0_CLK_SRC] = &vfe0_clk_src.clkr,
+	[VCODEC0_CLK_SRC] = &vcodec0_clk_src.clkr,
+	[CPP_CLK_SRC] = &cpp_clk_src.clkr,
+	[JPEG0_CLK_SRC] = &jpeg0_clk_src.clkr,
+	[USB30_MASTER_CLK_SRC] = &usb30_master_clk_src.clkr,
+	[VFE1_CLK_SRC] = &vfe1_clk_src.clkr,
+	[APC0_DROOP_DETECTOR_CLK_SRC] = &apc0_droop_detector_clk_src.clkr,
+	[APC1_DROOP_DETECTOR_CLK_SRC] = &apc1_droop_detector_clk_src.clkr,
+	[BLSP1_QUP1_I2C_APPS_CLK_SRC] = &blsp1_qup1_i2c_apps_clk_src.clkr,
+	[BLSP1_QUP1_SPI_APPS_CLK_SRC] = &blsp1_qup1_spi_apps_clk_src.clkr,
+	[BLSP1_QUP2_I2C_APPS_CLK_SRC] = &blsp1_qup2_i2c_apps_clk_src.clkr,
+	[BLSP1_QUP2_SPI_APPS_CLK_SRC] = &blsp1_qup2_spi_apps_clk_src.clkr,
+	[BLSP1_QUP3_I2C_APPS_CLK_SRC] = &blsp1_qup3_i2c_apps_clk_src.clkr,
+	[BLSP1_QUP3_SPI_APPS_CLK_SRC] = &blsp1_qup3_spi_apps_clk_src.clkr,
+	[BLSP1_QUP4_I2C_APPS_CLK_SRC] = &blsp1_qup4_i2c_apps_clk_src.clkr,
+	[BLSP1_QUP4_SPI_APPS_CLK_SRC] = &blsp1_qup4_spi_apps_clk_src.clkr,
+	[BLSP1_UART1_APPS_CLK_SRC] = &blsp1_uart1_apps_clk_src.clkr,
+	[BLSP1_UART2_APPS_CLK_SRC] = &blsp1_uart2_apps_clk_src.clkr,
+	[BLSP2_QUP1_I2C_APPS_CLK_SRC] = &blsp2_qup1_i2c_apps_clk_src.clkr,
+	[BLSP2_QUP1_SPI_APPS_CLK_SRC] = &blsp2_qup1_spi_apps_clk_src.clkr,
+	[BLSP2_QUP2_I2C_APPS_CLK_SRC] = &blsp2_qup2_i2c_apps_clk_src.clkr,
+	[BLSP2_QUP2_SPI_APPS_CLK_SRC] = &blsp2_qup2_spi_apps_clk_src.clkr,
+	[BLSP2_QUP3_I2C_APPS_CLK_SRC] = &blsp2_qup3_i2c_apps_clk_src.clkr,
+	[BLSP2_QUP3_SPI_APPS_CLK_SRC] = &blsp2_qup3_spi_apps_clk_src.clkr,
+	[BLSP2_QUP4_I2C_APPS_CLK_SRC] = &blsp2_qup4_i2c_apps_clk_src.clkr,
+	[BLSP2_QUP4_SPI_APPS_CLK_SRC] = &blsp2_qup4_spi_apps_clk_src.clkr,
+	[BLSP2_UART1_APPS_CLK_SRC] = &blsp2_uart1_apps_clk_src.clkr,
+	[BLSP2_UART2_APPS_CLK_SRC] = &blsp2_uart2_apps_clk_src.clkr,
+	[CCI_CLK_SRC] = &cci_clk_src.clkr,
+	[CSI0P_CLK_SRC] = &csi0p_clk_src.clkr,
+	[CSI1P_CLK_SRC] = &csi1p_clk_src.clkr,
+	[CSI2P_CLK_SRC] = &csi2p_clk_src.clkr,
+	[CAMSS_GP0_CLK_SRC] = &camss_gp0_clk_src.clkr,
+	[CAMSS_GP1_CLK_SRC] = &camss_gp1_clk_src.clkr,
+	[MCLK0_CLK_SRC] = &mclk0_clk_src.clkr,
+	[MCLK1_CLK_SRC] = &mclk1_clk_src.clkr,
+	[MCLK2_CLK_SRC] = &mclk2_clk_src.clkr,
+	[MCLK3_CLK_SRC] = &mclk3_clk_src.clkr,
+	[CSI0PHYTIMER_CLK_SRC] = &csi0phytimer_clk_src.clkr,
+	[CSI1PHYTIMER_CLK_SRC] = &csi1phytimer_clk_src.clkr,
+	[CSI2PHYTIMER_CLK_SRC] = &csi2phytimer_clk_src.clkr,
+	[CRYPTO_CLK_SRC] = &crypto_clk_src.clkr,
+	[GP1_CLK_SRC] = &gp1_clk_src.clkr,
+	[GP2_CLK_SRC] = &gp2_clk_src.clkr,
+	[GP3_CLK_SRC] = &gp3_clk_src.clkr,
+	[PDM2_CLK_SRC] = &pdm2_clk_src.clkr,
+	[RBCPR_GFX_CLK_SRC] = &rbcpr_gfx_clk_src.clkr,
+	[SDCC1_APPS_CLK_SRC] = &sdcc1_apps_clk_src.clkr,
+	[SDCC1_ICE_CORE_CLK_SRC] = &sdcc1_ice_core_clk_src.clkr,
+	[SDCC2_APPS_CLK_SRC] = &sdcc2_apps_clk_src.clkr,
+	[USB30_MOCK_UTMI_CLK_SRC] = &usb30_mock_utmi_clk_src.clkr,
+	[USB3_AUX_CLK_SRC] = &usb3_aux_clk_src.clkr,
+	[GCC_APC0_DROOP_DETECTOR_GPLL0_CLK] = &gcc_apc0_droop_detector_gpll0_clk.clkr,
+	[GCC_APC1_DROOP_DETECTOR_GPLL0_CLK] = &gcc_apc1_droop_detector_gpll0_clk.clkr,
+	[GCC_BLSP1_QUP1_I2C_APPS_CLK] = &gcc_blsp1_qup1_i2c_apps_clk.clkr,
+	[GCC_BLSP1_QUP1_SPI_APPS_CLK] = &gcc_blsp1_qup1_spi_apps_clk.clkr,
+	[GCC_BLSP1_QUP2_I2C_APPS_CLK] = &gcc_blsp1_qup2_i2c_apps_clk.clkr,
+	[GCC_BLSP1_QUP2_SPI_APPS_CLK] = &gcc_blsp1_qup2_spi_apps_clk.clkr,
+	[GCC_BLSP1_QUP3_I2C_APPS_CLK] = &gcc_blsp1_qup3_i2c_apps_clk.clkr,
+	[GCC_BLSP1_QUP3_SPI_APPS_CLK] = &gcc_blsp1_qup3_spi_apps_clk.clkr,
+	[GCC_BLSP1_QUP4_I2C_APPS_CLK] = &gcc_blsp1_qup4_i2c_apps_clk.clkr,
+	[GCC_BLSP1_QUP4_SPI_APPS_CLK] = &gcc_blsp1_qup4_spi_apps_clk.clkr,
+	[GCC_BLSP1_UART1_APPS_CLK] = &gcc_blsp1_uart1_apps_clk.clkr,
+	[GCC_BLSP1_UART2_APPS_CLK] = &gcc_blsp1_uart2_apps_clk.clkr,
+	[GCC_BLSP2_QUP1_I2C_APPS_CLK] = &gcc_blsp2_qup1_i2c_apps_clk.clkr,
+	[GCC_BLSP2_QUP1_SPI_APPS_CLK] = &gcc_blsp2_qup1_spi_apps_clk.clkr,
+	[GCC_BLSP2_QUP2_I2C_APPS_CLK] = &gcc_blsp2_qup2_i2c_apps_clk.clkr,
+	[GCC_BLSP2_QUP2_SPI_APPS_CLK] = &gcc_blsp2_qup2_spi_apps_clk.clkr,
+	[GCC_BLSP2_QUP3_I2C_APPS_CLK] = &gcc_blsp2_qup3_i2c_apps_clk.clkr,
+	[GCC_BLSP2_QUP3_SPI_APPS_CLK] = &gcc_blsp2_qup3_spi_apps_clk.clkr,
+	[GCC_BLSP2_QUP4_I2C_APPS_CLK] = &gcc_blsp2_qup4_i2c_apps_clk.clkr,
+	[GCC_BLSP2_QUP4_SPI_APPS_CLK] = &gcc_blsp2_qup4_spi_apps_clk.clkr,
+	[GCC_BLSP2_UART1_APPS_CLK] = &gcc_blsp2_uart1_apps_clk.clkr,
+	[GCC_BLSP2_UART2_APPS_CLK] = &gcc_blsp2_uart2_apps_clk.clkr,
+	[GCC_CAMSS_CCI_AHB_CLK] = &gcc_camss_cci_ahb_clk.clkr,
+	[GCC_CAMSS_CCI_CLK] = &gcc_camss_cci_clk.clkr,
+	[GCC_CAMSS_CPP_AHB_CLK] = &gcc_camss_cpp_ahb_clk.clkr,
+	[GCC_CAMSS_CPP_AXI_CLK] = &gcc_camss_cpp_axi_clk.clkr,
+	[GCC_CAMSS_CPP_CLK] = &gcc_camss_cpp_clk.clkr,
+	[GCC_CAMSS_CSI0_AHB_CLK] = &gcc_camss_csi0_ahb_clk.clkr,
+	[GCC_CAMSS_CSI0_CLK] = &gcc_camss_csi0_clk.clkr,
+	[GCC_CAMSS_CSI0_CSIPHY_3P_CLK] = &gcc_camss_csi0_csiphy_3p_clk.clkr,
+	[GCC_CAMSS_CSI0PHY_CLK] = &gcc_camss_csi0phy_clk.clkr,
+	[GCC_CAMSS_CSI0PIX_CLK] = &gcc_camss_csi0pix_clk.clkr,
+	[GCC_CAMSS_CSI0RDI_CLK] = &gcc_camss_csi0rdi_clk.clkr,
+	[GCC_CAMSS_CSI1_AHB_CLK] = &gcc_camss_csi1_ahb_clk.clkr,
+	[GCC_CAMSS_CSI1_CLK] = &gcc_camss_csi1_clk.clkr,
+	[GCC_CAMSS_CSI1_CSIPHY_3P_CLK] = &gcc_camss_csi1_csiphy_3p_clk.clkr,
+	[GCC_CAMSS_CSI1PHY_CLK] = &gcc_camss_csi1phy_clk.clkr,
+	[GCC_CAMSS_CSI1PIX_CLK] = &gcc_camss_csi1pix_clk.clkr,
+	[GCC_CAMSS_CSI1RDI_CLK] = &gcc_camss_csi1rdi_clk.clkr,
+	[GCC_CAMSS_CSI2_AHB_CLK] = &gcc_camss_csi2_ahb_clk.clkr,
+	[GCC_CAMSS_CSI2_CLK] = &gcc_camss_csi2_clk.clkr,
+	[GCC_CAMSS_CSI2_CSIPHY_3P_CLK] = &gcc_camss_csi2_csiphy_3p_clk.clkr,
+	[GCC_CAMSS_CSI2PHY_CLK] = &gcc_camss_csi2phy_clk.clkr,
+	[GCC_CAMSS_CSI2PIX_CLK] = &gcc_camss_csi2pix_clk.clkr,
+	[GCC_CAMSS_CSI2RDI_CLK] = &gcc_camss_csi2rdi_clk.clkr,
+	[GCC_CAMSS_CSI_VFE0_CLK] = &gcc_camss_csi_vfe0_clk.clkr,
+	[GCC_CAMSS_CSI_VFE1_CLK] = &gcc_camss_csi_vfe1_clk.clkr,
+	[GCC_CAMSS_GP0_CLK] = &gcc_camss_gp0_clk.clkr,
+	[GCC_CAMSS_GP1_CLK] = &gcc_camss_gp1_clk.clkr,
+	[GCC_CAMSS_ISPIF_AHB_CLK] = &gcc_camss_ispif_ahb_clk.clkr,
+	[GCC_CAMSS_JPEG0_CLK] = &gcc_camss_jpeg0_clk.clkr,
+	[GCC_CAMSS_JPEG_AHB_CLK] = &gcc_camss_jpeg_ahb_clk.clkr,
+	[GCC_CAMSS_JPEG_AXI_CLK] = &gcc_camss_jpeg_axi_clk.clkr,
+	[GCC_CAMSS_MCLK0_CLK] = &gcc_camss_mclk0_clk.clkr,
+	[GCC_CAMSS_MCLK1_CLK] = &gcc_camss_mclk1_clk.clkr,
+	[GCC_CAMSS_MCLK2_CLK] = &gcc_camss_mclk2_clk.clkr,
+	[GCC_CAMSS_MCLK3_CLK] = &gcc_camss_mclk3_clk.clkr,
+	[GCC_CAMSS_MICRO_AHB_CLK] = &gcc_camss_micro_ahb_clk.clkr,
+	[GCC_CAMSS_CSI0PHYTIMER_CLK] = &gcc_camss_csi0phytimer_clk.clkr,
+	[GCC_CAMSS_CSI1PHYTIMER_CLK] = &gcc_camss_csi1phytimer_clk.clkr,
+	[GCC_CAMSS_CSI2PHYTIMER_CLK] = &gcc_camss_csi2phytimer_clk.clkr,
+	[GCC_CAMSS_AHB_CLK] = &gcc_camss_ahb_clk.clkr,
+	[GCC_CAMSS_TOP_AHB_CLK] = &gcc_camss_top_ahb_clk.clkr,
+	[GCC_CAMSS_VFE0_CLK] = &gcc_camss_vfe0_clk.clkr,
+	[GCC_CAMSS_VFE0_AHB_CLK] = &gcc_camss_vfe0_ahb_clk.clkr,
+	[GCC_CAMSS_VFE0_AXI_CLK] = &gcc_camss_vfe0_axi_clk.clkr,
+	[GCC_CAMSS_VFE1_AHB_CLK] = &gcc_camss_vfe1_ahb_clk.clkr,
+	[GCC_CAMSS_VFE1_AXI_CLK] = &gcc_camss_vfe1_axi_clk.clkr,
+	[GCC_CAMSS_VFE1_CLK] = &gcc_camss_vfe1_clk.clkr,
+	[GCC_DCC_CLK] = &gcc_dcc_clk.clkr,
+	[GCC_GP1_CLK] = &gcc_gp1_clk.clkr,
+	[GCC_GP2_CLK] = &gcc_gp2_clk.clkr,
+	[GCC_GP3_CLK] = &gcc_gp3_clk.clkr,
+	[GCC_MSS_CFG_AHB_CLK] = &gcc_mss_cfg_ahb_clk.clkr,
+	[GCC_MSS_Q6_BIMC_AXI_CLK] = &gcc_mss_q6_bimc_axi_clk.clkr,
+	[GCC_PCNOC_USB3_AXI_CLK] = &gcc_pcnoc_usb3_axi_clk.clkr,
+	[GCC_PDM2_CLK] = &gcc_pdm2_clk.clkr,
+	[GCC_PDM_AHB_CLK] = &gcc_pdm_ahb_clk.clkr,
+	[GCC_RBCPR_GFX_CLK] = &gcc_rbcpr_gfx_clk.clkr,
+	[GCC_SDCC1_AHB_CLK] = &gcc_sdcc1_ahb_clk.clkr,
+	[GCC_SDCC1_APPS_CLK] = &gcc_sdcc1_apps_clk.clkr,
+	[GCC_SDCC1_ICE_CORE_CLK] = &gcc_sdcc1_ice_core_clk.clkr,
+	[GCC_SDCC2_AHB_CLK] = &gcc_sdcc2_ahb_clk.clkr,
+	[GCC_SDCC2_APPS_CLK] = &gcc_sdcc2_apps_clk.clkr,
+	[GCC_USB30_MASTER_CLK] = &gcc_usb30_master_clk.clkr,
+	[GCC_USB30_MOCK_UTMI_CLK] = &gcc_usb30_mock_utmi_clk.clkr,
+	[GCC_USB30_SLEEP_CLK] = &gcc_usb30_sleep_clk.clkr,
+	[GCC_USB3_AUX_CLK] = &gcc_usb3_aux_clk.clkr,
+	[GCC_USB_PHY_CFG_AHB_CLK] = &gcc_usb_phy_cfg_ahb_clk.clkr,
+	[GCC_VENUS0_AHB_CLK] = &gcc_venus0_ahb_clk.clkr,
+	[GCC_VENUS0_AXI_CLK] = &gcc_venus0_axi_clk.clkr,
+	[GCC_VENUS0_CORE0_VCODEC0_CLK] = &gcc_venus0_core0_vcodec0_clk.clkr,
+	[GCC_VENUS0_VCODEC0_CLK] = &gcc_venus0_vcodec0_clk.clkr,
+	[GCC_QUSB_REF_CLK] = &gcc_qusb_ref_clk.clkr,
+	[GCC_USB_SS_REF_CLK] = &gcc_usb_ss_ref_clk.clkr,
+	[GCC_USB3_PIPE_CLK] = &gcc_usb3_pipe_clk.clkr,
+	[MDP_CLK_SRC] = &mdp_clk_src.clkr,
+	[PCLK0_CLK_SRC] = &pclk0_clk_src.clkr,
+	[BYTE0_CLK_SRC] = &byte0_clk_src.clkr,
+	[ESC0_CLK_SRC] = &esc0_clk_src.clkr,
+	[PCLK1_CLK_SRC] = &pclk1_clk_src.clkr,
+	[BYTE1_CLK_SRC] = &byte1_clk_src.clkr,
+	[ESC1_CLK_SRC] = &esc1_clk_src.clkr,
+	[VSYNC_CLK_SRC] = &vsync_clk_src.clkr,
+	[GCC_MDSS_AHB_CLK] = &gcc_mdss_ahb_clk.clkr,
+	[GCC_MDSS_AXI_CLK] = &gcc_mdss_axi_clk.clkr,
+	[GCC_MDSS_PCLK0_CLK] = &gcc_mdss_pclk0_clk.clkr,
+	[GCC_MDSS_BYTE0_CLK] = &gcc_mdss_byte0_clk.clkr,
+	[GCC_MDSS_ESC0_CLK] = &gcc_mdss_esc0_clk.clkr,
+	[GCC_MDSS_PCLK1_CLK] = &gcc_mdss_pclk1_clk.clkr,
+	[GCC_MDSS_BYTE1_CLK] = &gcc_mdss_byte1_clk.clkr,
+	[GCC_MDSS_ESC1_CLK] = &gcc_mdss_esc1_clk.clkr,
+	[GCC_MDSS_MDP_CLK] = &gcc_mdss_mdp_clk.clkr,
+	[GCC_MDSS_VSYNC_CLK] = &gcc_mdss_vsync_clk.clkr,
+	[GCC_OXILI_TIMER_CLK] = &gcc_oxili_timer_clk.clkr,
+	[GCC_OXILI_GFX3D_CLK] = &gcc_oxili_gfx3d_clk.clkr,
+	[GCC_OXILI_AON_CLK] = &gcc_oxili_aon_clk.clkr,
+	[GCC_OXILI_AHB_CLK] = &gcc_oxili_ahb_clk.clkr,
+	[GCC_BIMC_GFX_CLK] = &gcc_bimc_gfx_clk.clkr,
+	[GCC_BIMC_GPU_CLK] = &gcc_bimc_gpu_clk.clkr,
+	[GFX3D_CLK_SRC] = &gfx3d_clk_src.clkr,
+};
+
+static const struct qcom_reset_map gcc_msm8953_resets[] = {
+	[GCC_CAMSS_MICRO_BCR]	= { 0x56008 },
+	[GCC_MSS_BCR]		= { 0x71000 },
+	[GCC_QUSB2_PHY_BCR]	= { 0x4103c },
+	[GCC_USB3PHY_PHY_BCR]	= { 0x3f03c },
+	[GCC_USB3_PHY_BCR]	= { 0x3f034 },
+	[GCC_USB_30_BCR]	= { 0x3f070 },
+};
+
+static const struct regmap_config gcc_msm8953_regmap_config = {
+	.reg_bits	= 32,
+	.reg_stride	= 4,
+	.val_bits	= 32,
+	.max_register	= 0x80000,
+	.fast_io	= true,
+};
+
+static struct gdsc *gcc_msm8953_gdscs[] = {
+	[CPP_GDSC] = &cpp_gdsc,
+	[JPEG_GDSC] = &jpeg_gdsc,
+	[MDSS_GDSC] = &mdss_gdsc,
+	[OXILI_CX_GDSC] = &oxili_cx_gdsc,
+	[OXILI_GX_GDSC] = &oxili_gx_gdsc,
+	[USB30_GDSC] = &usb30_gdsc,
+	[VENUS_CORE0_GDSC] = &venus_core0_gdsc,
+	[VENUS_GDSC] = &venus_gdsc,
+	[VFE0_GDSC] = &vfe0_gdsc,
+	[VFE1_GDSC] = &vfe1_gdsc,
+};
+
+static const struct qcom_cc_desc gcc_msm8953_desc = {
+	.config = &gcc_msm8953_regmap_config,
+	.clks = gcc_msm8953_clocks,
+	.num_clks = ARRAY_SIZE(gcc_msm8953_clocks),
+	.resets = gcc_msm8953_resets,
+	.num_resets = ARRAY_SIZE(gcc_msm8953_resets),
+	.gdscs = gcc_msm8953_gdscs,
+	.num_gdscs = ARRAY_SIZE(gcc_msm8953_gdscs),
+	.clk_hws = gcc_msm8953_hws,
+	.num_clk_hws = ARRAY_SIZE(gcc_msm8953_hws),
+};
+
+static int gcc_msm8953_probe(struct platform_device *pdev)
+{
+	struct regmap *regmap;
+
+	regmap  = qcom_cc_map(pdev, &gcc_msm8953_desc);
+	if (IS_ERR(regmap))
+		return PTR_ERR(regmap);
+
+	clk_alpha_pll_configure(&gpll3_early, regmap, &gpll3_early_config);
+
+	return qcom_cc_really_probe(pdev, &gcc_msm8953_desc, regmap);
+}
+
+static const struct of_device_id gcc_msm8953_match_table[] = {
+	{ .compatible = "qcom,gcc-msm8953" },
+	{},
+};
+
+static struct platform_driver gcc_msm8953_driver = {
+	.probe = gcc_msm8953_probe,
+	.driver = {
+		.name = "gcc-msm8953",
+		.of_match_table = gcc_msm8953_match_table,
+		.owner = THIS_MODULE,
+	},
+};
+
+static int __init gcc_msm8953_init(void)
+{
+	return platform_driver_register(&gcc_msm8953_driver);
+}
+core_initcall(gcc_msm8953_init);
+
+static void __exit gcc_msm8953_exit(void)
+{
+	platform_driver_unregister(&gcc_msm8953_driver);
+}
+module_exit(gcc_msm8953_exit);
+
+MODULE_DESCRIPTION("Qualcomm GCC MSM8953 Driver");
+MODULE_LICENSE("GPL v2");
diff --git a/drivers/clk/qcom/gcc-sdm660.c b/drivers/clk/qcom/gcc-sdm660.c
index 6394257ca8c0..4d36f96e9ae2 100644
--- a/drivers/clk/qcom/gcc-sdm660.c
+++ b/drivers/clk/qcom/gcc-sdm660.c
@@ -37,114 +37,14 @@ enum {
 	P_GPLL1_EARLY_DIV,
 };
 
-static const struct parent_map gcc_parent_map_xo_gpll0_gpll0_early_div[] = {
-	{ P_XO, 0 },
-	{ P_GPLL0, 1 },
-	{ P_GPLL0_EARLY_DIV, 6 },
-};
-
-static const char * const gcc_parent_names_xo_gpll0_gpll0_early_div[] = {
-	"xo",
-	"gpll0",
-	"gpll0_early_div",
-};
-
-static const struct parent_map gcc_parent_map_xo_gpll0[] = {
-	{ P_XO, 0 },
-	{ P_GPLL0, 1 },
-};
-
-static const char * const gcc_parent_names_xo_gpll0[] = {
-	"xo",
-	"gpll0",
-};
-
-static const struct parent_map gcc_parent_map_xo_gpll0_sleep_clk_gpll0_early_div[] = {
-	{ P_XO, 0 },
-	{ P_GPLL0, 1 },
-	{ P_SLEEP_CLK, 5 },
-	{ P_GPLL0_EARLY_DIV, 6 },
-};
-
-static const char * const gcc_parent_names_xo_gpll0_sleep_clk_gpll0_early_div[] = {
-	"xo",
-	"gpll0",
-	"sleep_clk",
-	"gpll0_early_div",
-};
-
-static const struct parent_map gcc_parent_map_xo_sleep_clk[] = {
-	{ P_XO, 0 },
-	{ P_SLEEP_CLK, 5 },
-};
-
-static const char * const gcc_parent_names_xo_sleep_clk[] = {
-	"xo",
-	"sleep_clk",
-};
-
-static const struct parent_map gcc_parent_map_xo_gpll4[] = {
-	{ P_XO, 0 },
-	{ P_GPLL4, 5 },
-};
-
-static const char * const gcc_parent_names_xo_gpll4[] = {
-	"xo",
-	"gpll4",
-};
-
-static const struct parent_map gcc_parent_map_xo_gpll0_gpll0_early_div_gpll1_gpll4_gpll1_early_div[] = {
-	{ P_XO, 0 },
-	{ P_GPLL0, 1 },
-	{ P_GPLL0_EARLY_DIV, 3 },
-	{ P_GPLL1, 4 },
-	{ P_GPLL4, 5 },
-	{ P_GPLL1_EARLY_DIV, 6 },
-};
-
-static const char * const gcc_parent_names_xo_gpll0_gpll0_early_div_gpll1_gpll4_gpll1_early_div[] = {
-	"xo",
-	"gpll0",
-	"gpll0_early_div",
-	"gpll1",
-	"gpll4",
-	"gpll1_early_div",
-};
-
-static const struct parent_map gcc_parent_map_xo_gpll0_gpll4_gpll0_early_div[] = {
-	{ P_XO, 0 },
-	{ P_GPLL0, 1 },
-	{ P_GPLL4, 5 },
-	{ P_GPLL0_EARLY_DIV, 6 },
-};
-
-static const char * const gcc_parent_names_xo_gpll0_gpll4_gpll0_early_div[] = {
-	"xo",
-	"gpll0",
-	"gpll4",
-	"gpll0_early_div",
-};
-
-static const struct parent_map gcc_parent_map_xo_gpll0_gpll0_early_div_gpll4[] = {
-	{ P_XO, 0 },
-	{ P_GPLL0, 1 },
-	{ P_GPLL0_EARLY_DIV, 2 },
-	{ P_GPLL4, 5 },
-};
-
-static const char * const gcc_parent_names_xo_gpll0_gpll0_early_div_gpll4[] = {
-	"xo",
-	"gpll0",
-	"gpll0_early_div",
-	"gpll4",
-};
-
 static struct clk_fixed_factor xo = {
 	.mult = 1,
 	.div = 1,
 	.hw.init = &(struct clk_init_data){
 		.name = "xo",
-		.parent_names = (const char *[]){ "xo_board" },
+		.parent_data = &(const struct clk_parent_data) {
+			.fw_name = "xo"
+		},
 		.num_parents = 1,
 		.ops = &clk_fixed_factor_ops,
 	},
@@ -158,7 +58,9 @@ static struct clk_alpha_pll gpll0_early = {
 		.enable_mask = BIT(0),
 		.hw.init = &(struct clk_init_data){
 			.name = "gpll0_early",
-			.parent_names = (const char *[]){ "xo" },
+			.parent_data = &(const struct clk_parent_data){
+				.fw_name = "xo",
+			},
 			.num_parents = 1,
 			.ops = &clk_alpha_pll_ops,
 		},
@@ -170,7 +72,9 @@ static struct clk_fixed_factor gpll0_early_div = {
 	.div = 2,
 	.hw.init = &(struct clk_init_data){
 		.name = "gpll0_early_div",
-		.parent_names = (const char *[]){ "gpll0_early" },
+		.parent_hws = (const struct clk_hw*[]){
+			&gpll0_early.clkr.hw,
+		},
 		.num_parents = 1,
 		.ops = &clk_fixed_factor_ops,
 	},
@@ -181,7 +85,9 @@ static struct clk_alpha_pll_postdiv gpll0 = {
 	.regs = clk_alpha_pll_regs[CLK_ALPHA_PLL_TYPE_DEFAULT],
 	.clkr.hw.init = &(struct clk_init_data){
 		.name = "gpll0",
-		.parent_names = (const char *[]){ "gpll0_early" },
+		.parent_hws = (const struct clk_hw*[]){
+			&gpll0_early.clkr.hw,
+		},
 		.num_parents = 1,
 		.ops = &clk_alpha_pll_postdiv_ops,
 	},
@@ -195,7 +101,9 @@ static struct clk_alpha_pll gpll1_early = {
 		.enable_mask = BIT(1),
 		.hw.init = &(struct clk_init_data){
 			.name = "gpll1_early",
-			.parent_names = (const char *[]){ "xo" },
+			.parent_data = &(const struct clk_parent_data){
+				.fw_name = "xo",
+			},
 			.num_parents = 1,
 			.ops = &clk_alpha_pll_ops,
 		},
@@ -207,7 +115,9 @@ static struct clk_fixed_factor gpll1_early_div = {
 	.div = 2,
 	.hw.init = &(struct clk_init_data){
 		.name = "gpll1_early_div",
-		.parent_names = (const char *[]){ "gpll1_early" },
+		.parent_hws = (const struct clk_hw*[]){
+			&gpll1_early.clkr.hw,
+		},
 		.num_parents = 1,
 		.ops = &clk_fixed_factor_ops,
 	},
@@ -218,7 +128,9 @@ static struct clk_alpha_pll_postdiv gpll1 = {
 	.regs = clk_alpha_pll_regs[CLK_ALPHA_PLL_TYPE_DEFAULT],
 	.clkr.hw.init = &(struct clk_init_data){
 		.name = "gpll1",
-		.parent_names = (const char *[]){ "gpll1_early" },
+		.parent_hws = (const struct clk_hw*[]){
+			&gpll1_early.clkr.hw,
+		},
 		.num_parents = 1,
 		.ops = &clk_alpha_pll_postdiv_ops,
 	},
@@ -232,7 +144,9 @@ static struct clk_alpha_pll gpll4_early = {
 		.enable_mask = BIT(4),
 		.hw.init = &(struct clk_init_data){
 			.name = "gpll4_early",
-			.parent_names = (const char *[]){ "xo" },
+			.parent_data = &(const struct clk_parent_data){
+				.fw_name = "xo",
+			},
 			.num_parents = 1,
 			.ops = &clk_alpha_pll_ops,
 		},
@@ -245,12 +159,116 @@ static struct clk_alpha_pll_postdiv gpll4 = {
 	.clkr.hw.init = &(struct clk_init_data)
 	{
 		.name = "gpll4",
-		.parent_names = (const char *[]) { "gpll4_early" },
+		.parent_hws = (const struct clk_hw*[]){
+			&gpll4_early.clkr.hw,
+		},
 		.num_parents = 1,
 		.ops = &clk_alpha_pll_postdiv_ops,
 	},
 };
 
+static const struct parent_map gcc_parent_map_xo_gpll0_gpll0_early_div[] = {
+	{ P_XO, 0 },
+	{ P_GPLL0, 1 },
+	{ P_GPLL0_EARLY_DIV, 6 },
+};
+
+static const struct clk_parent_data gcc_parent_data_xo_gpll0_gpll0_early_div[] = {
+	{ .fw_name = "xo" },
+	{ .hw = &gpll0.clkr.hw },
+	{ .hw = &gpll0_early_div.hw },
+};
+
+static const struct parent_map gcc_parent_map_xo_gpll0[] = {
+	{ P_XO, 0 },
+	{ P_GPLL0, 1 },
+};
+
+static const struct clk_parent_data gcc_parent_data_xo_gpll0[] = {
+	{ .fw_name = "xo" },
+	{ .hw = &gpll0.clkr.hw },
+};
+
+static const struct parent_map gcc_parent_map_xo_gpll0_sleep_clk_gpll0_early_div[] = {
+	{ P_XO, 0 },
+	{ P_GPLL0, 1 },
+	{ P_SLEEP_CLK, 5 },
+	{ P_GPLL0_EARLY_DIV, 6 },
+};
+
+static const struct clk_parent_data gcc_parent_data_xo_gpll0_sleep_clk_gpll0_early_div[] = {
+	{ .fw_name = "xo" },
+	{ .hw = &gpll0.clkr.hw },
+	{ .fw_name = "sleep_clk" },
+	{ .hw = &gpll0_early_div.hw },
+};
+
+static const struct parent_map gcc_parent_map_xo_sleep_clk[] = {
+	{ P_XO, 0 },
+	{ P_SLEEP_CLK, 5 },
+};
+
+static const struct clk_parent_data gcc_parent_data_xo_sleep_clk[] = {
+	{ .fw_name = "xo" },
+	{ .fw_name = "sleep_clk" },
+};
+
+static const struct parent_map gcc_parent_map_xo_gpll4[] = {
+	{ P_XO, 0 },
+	{ P_GPLL4, 5 },
+};
+
+static const struct clk_parent_data gcc_parent_data_xo_gpll4[] = {
+	{ .fw_name = "xo" },
+	{ .hw = &gpll4.clkr.hw },
+};
+
+static const struct parent_map gcc_parent_map_xo_gpll0_gpll0_early_div_gpll1_gpll4_gpll1_early_div[] = {
+	{ P_XO, 0 },
+	{ P_GPLL0, 1 },
+	{ P_GPLL0_EARLY_DIV, 3 },
+	{ P_GPLL1, 4 },
+	{ P_GPLL4, 5 },
+	{ P_GPLL1_EARLY_DIV, 6 },
+};
+
+static const struct clk_parent_data gcc_parent_data_xo_gpll0_gpll0_early_div_gpll1_gpll4_gpll1_early_div[] = {
+	{ .fw_name = "xo" },
+	{ .hw = &gpll0.clkr.hw },
+	{ .hw = &gpll0_early_div.hw },
+	{ .hw = &gpll1.clkr.hw },
+	{ .hw = &gpll4.clkr.hw },
+	{ .hw = &gpll1_early_div.hw },
+};
+
+static const struct parent_map gcc_parent_map_xo_gpll0_gpll4_gpll0_early_div[] = {
+	{ P_XO, 0 },
+	{ P_GPLL0, 1 },
+	{ P_GPLL4, 5 },
+	{ P_GPLL0_EARLY_DIV, 6 },
+};
+
+static const struct clk_parent_data gcc_parent_data_xo_gpll0_gpll4_gpll0_early_div[] = {
+	{ .fw_name = "xo" },
+	{ .hw = &gpll0.clkr.hw },
+	{ .hw = &gpll4.clkr.hw },
+	{ .hw = &gpll0_early_div.hw },
+};
+
+static const struct parent_map gcc_parent_map_xo_gpll0_gpll0_early_div_gpll4[] = {
+	{ P_XO, 0 },
+	{ P_GPLL0, 1 },
+	{ P_GPLL0_EARLY_DIV, 2 },
+	{ P_GPLL4, 5 },
+};
+
+static const struct clk_parent_data gcc_parent_data_xo_gpll0_gpll0_early_div_gpll4[] = {
+	{ .fw_name = "xo" },
+	{ .hw = &gpll0.clkr.hw },
+	{ .hw = &gpll0_early_div.hw },
+	{ .hw = &gpll4.clkr.hw },
+};
+
 static const struct freq_tbl ftbl_blsp1_qup1_i2c_apps_clk_src[] = {
 	F(19200000, P_XO, 1, 0, 0),
 	F(50000000, P_GPLL0, 12, 0, 0),
@@ -265,7 +283,7 @@ static struct clk_rcg2 blsp1_qup1_i2c_apps_clk_src = {
 	.freq_tbl = ftbl_blsp1_qup1_i2c_apps_clk_src,
 	.clkr.hw.init = &(struct clk_init_data){
 		.name = "blsp1_qup1_i2c_apps_clk_src",
-		.parent_names = gcc_parent_names_xo_gpll0_gpll0_early_div,
+		.parent_data = gcc_parent_data_xo_gpll0_gpll0_early_div,
 		.num_parents = 3,
 		.ops = &clk_rcg2_ops,
 	},
@@ -290,7 +308,7 @@ static struct clk_rcg2 blsp1_qup1_spi_apps_clk_src = {
 	.freq_tbl = ftbl_blsp1_qup1_spi_apps_clk_src,
 	.clkr.hw.init = &(struct clk_init_data){
 		.name = "blsp1_qup1_spi_apps_clk_src",
-		.parent_names = gcc_parent_names_xo_gpll0_gpll0_early_div,
+		.parent_data = gcc_parent_data_xo_gpll0_gpll0_early_div,
 		.num_parents = 3,
 		.ops = &clk_rcg2_ops,
 	},
@@ -304,7 +322,7 @@ static struct clk_rcg2 blsp1_qup2_i2c_apps_clk_src = {
 	.freq_tbl = ftbl_blsp1_qup1_i2c_apps_clk_src,
 	.clkr.hw.init = &(struct clk_init_data){
 		.name = "blsp1_qup2_i2c_apps_clk_src",
-		.parent_names = gcc_parent_names_xo_gpll0_gpll0_early_div,
+		.parent_data = gcc_parent_data_xo_gpll0_gpll0_early_div,
 		.num_parents = 3,
 		.ops = &clk_rcg2_ops,
 	},
@@ -318,7 +336,7 @@ static struct clk_rcg2 blsp1_qup2_spi_apps_clk_src = {
 	.freq_tbl = ftbl_blsp1_qup1_spi_apps_clk_src,
 	.clkr.hw.init = &(struct clk_init_data){
 		.name = "blsp1_qup2_spi_apps_clk_src",
-		.parent_names = gcc_parent_names_xo_gpll0_gpll0_early_div,
+		.parent_data = gcc_parent_data_xo_gpll0_gpll0_early_div,
 		.num_parents = 3,
 		.ops = &clk_rcg2_ops,
 	},
@@ -332,7 +350,7 @@ static struct clk_rcg2 blsp1_qup3_i2c_apps_clk_src = {
 	.freq_tbl = ftbl_blsp1_qup1_i2c_apps_clk_src,
 	.clkr.hw.init = &(struct clk_init_data){
 		.name = "blsp1_qup3_i2c_apps_clk_src",
-		.parent_names = gcc_parent_names_xo_gpll0_gpll0_early_div,
+		.parent_data = gcc_parent_data_xo_gpll0_gpll0_early_div,
 		.num_parents = 3,
 		.ops = &clk_rcg2_ops,
 	},
@@ -346,7 +364,7 @@ static struct clk_rcg2 blsp1_qup3_spi_apps_clk_src = {
 	.freq_tbl = ftbl_blsp1_qup1_spi_apps_clk_src,
 	.clkr.hw.init = &(struct clk_init_data){
 		.name = "blsp1_qup3_spi_apps_clk_src",
-		.parent_names = gcc_parent_names_xo_gpll0_gpll0_early_div,
+		.parent_data = gcc_parent_data_xo_gpll0_gpll0_early_div,
 		.num_parents = 3,
 		.ops = &clk_rcg2_ops,
 	},
@@ -360,7 +378,7 @@ static struct clk_rcg2 blsp1_qup4_i2c_apps_clk_src = {
 	.freq_tbl = ftbl_blsp1_qup1_i2c_apps_clk_src,
 	.clkr.hw.init = &(struct clk_init_data){
 		.name = "blsp1_qup4_i2c_apps_clk_src",
-		.parent_names = gcc_parent_names_xo_gpll0_gpll0_early_div,
+		.parent_data = gcc_parent_data_xo_gpll0_gpll0_early_div,
 		.num_parents = 3,
 		.ops = &clk_rcg2_ops,
 	},
@@ -374,7 +392,7 @@ static struct clk_rcg2 blsp1_qup4_spi_apps_clk_src = {
 	.freq_tbl = ftbl_blsp1_qup1_spi_apps_clk_src,
 	.clkr.hw.init = &(struct clk_init_data){
 		.name = "blsp1_qup4_spi_apps_clk_src",
-		.parent_names = gcc_parent_names_xo_gpll0_gpll0_early_div,
+		.parent_data = gcc_parent_data_xo_gpll0_gpll0_early_div,
 		.num_parents = 3,
 		.ops = &clk_rcg2_ops,
 	},
@@ -407,7 +425,7 @@ static struct clk_rcg2 blsp1_uart1_apps_clk_src = {
 	.freq_tbl = ftbl_blsp1_uart1_apps_clk_src,
 	.clkr.hw.init = &(struct clk_init_data){
 		.name = "blsp1_uart1_apps_clk_src",
-		.parent_names = gcc_parent_names_xo_gpll0_gpll0_early_div,
+		.parent_data = gcc_parent_data_xo_gpll0_gpll0_early_div,
 		.num_parents = 3,
 		.ops = &clk_rcg2_ops,
 	},
@@ -421,7 +439,7 @@ static struct clk_rcg2 blsp1_uart2_apps_clk_src = {
 	.freq_tbl = ftbl_blsp1_uart1_apps_clk_src,
 	.clkr.hw.init = &(struct clk_init_data){
 		.name = "blsp1_uart2_apps_clk_src",
-		.parent_names = gcc_parent_names_xo_gpll0_gpll0_early_div,
+		.parent_data = gcc_parent_data_xo_gpll0_gpll0_early_div,
 		.num_parents = 3,
 		.ops = &clk_rcg2_ops,
 	},
@@ -435,7 +453,7 @@ static struct clk_rcg2 blsp2_qup1_i2c_apps_clk_src = {
 	.freq_tbl = ftbl_blsp1_qup1_i2c_apps_clk_src,
 	.clkr.hw.init = &(struct clk_init_data){
 		.name = "blsp2_qup1_i2c_apps_clk_src",
-		.parent_names = gcc_parent_names_xo_gpll0_gpll0_early_div,
+		.parent_data = gcc_parent_data_xo_gpll0_gpll0_early_div,
 		.num_parents = 3,
 		.ops = &clk_rcg2_ops,
 	},
@@ -449,7 +467,7 @@ static struct clk_rcg2 blsp2_qup1_spi_apps_clk_src = {
 	.freq_tbl = ftbl_blsp1_qup1_spi_apps_clk_src,
 	.clkr.hw.init = &(struct clk_init_data){
 		.name = "blsp2_qup1_spi_apps_clk_src",
-		.parent_names = gcc_parent_names_xo_gpll0_gpll0_early_div,
+		.parent_data = gcc_parent_data_xo_gpll0_gpll0_early_div,
 		.num_parents = 3,
 		.ops = &clk_rcg2_ops,
 	},
@@ -463,7 +481,7 @@ static struct clk_rcg2 blsp2_qup2_i2c_apps_clk_src = {
 	.freq_tbl = ftbl_blsp1_qup1_i2c_apps_clk_src,
 	.clkr.hw.init = &(struct clk_init_data){
 		.name = "blsp2_qup2_i2c_apps_clk_src",
-		.parent_names = gcc_parent_names_xo_gpll0_gpll0_early_div,
+		.parent_data = gcc_parent_data_xo_gpll0_gpll0_early_div,
 		.num_parents = 3,
 		.ops = &clk_rcg2_ops,
 	},
@@ -477,7 +495,7 @@ static struct clk_rcg2 blsp2_qup2_spi_apps_clk_src = {
 	.freq_tbl = ftbl_blsp1_qup1_spi_apps_clk_src,
 	.clkr.hw.init = &(struct clk_init_data){
 		.name = "blsp2_qup2_spi_apps_clk_src",
-		.parent_names = gcc_parent_names_xo_gpll0_gpll0_early_div,
+		.parent_data = gcc_parent_data_xo_gpll0_gpll0_early_div,
 		.num_parents = 3,
 		.ops = &clk_rcg2_ops,
 	},
@@ -491,7 +509,7 @@ static struct clk_rcg2 blsp2_qup3_i2c_apps_clk_src = {
 	.freq_tbl = ftbl_blsp1_qup1_i2c_apps_clk_src,
 	.clkr.hw.init = &(struct clk_init_data){
 		.name = "blsp2_qup3_i2c_apps_clk_src",
-		.parent_names = gcc_parent_names_xo_gpll0_gpll0_early_div,
+		.parent_data = gcc_parent_data_xo_gpll0_gpll0_early_div,
 		.num_parents = 3,
 		.ops = &clk_rcg2_ops,
 	},
@@ -505,7 +523,7 @@ static struct clk_rcg2 blsp2_qup3_spi_apps_clk_src = {
 	.freq_tbl = ftbl_blsp1_qup1_spi_apps_clk_src,
 	.clkr.hw.init = &(struct clk_init_data){
 		.name = "blsp2_qup3_spi_apps_clk_src",
-		.parent_names = gcc_parent_names_xo_gpll0_gpll0_early_div,
+		.parent_data = gcc_parent_data_xo_gpll0_gpll0_early_div,
 		.num_parents = 3,
 		.ops = &clk_rcg2_ops,
 	},
@@ -519,7 +537,7 @@ static struct clk_rcg2 blsp2_qup4_i2c_apps_clk_src = {
 	.freq_tbl = ftbl_blsp1_qup1_i2c_apps_clk_src,
 	.clkr.hw.init = &(struct clk_init_data){
 		.name = "blsp2_qup4_i2c_apps_clk_src",
-		.parent_names = gcc_parent_names_xo_gpll0_gpll0_early_div,
+		.parent_data = gcc_parent_data_xo_gpll0_gpll0_early_div,
 		.num_parents = 3,
 		.ops = &clk_rcg2_ops,
 	},
@@ -533,7 +551,7 @@ static struct clk_rcg2 blsp2_qup4_spi_apps_clk_src = {
 	.freq_tbl = ftbl_blsp1_qup1_spi_apps_clk_src,
 	.clkr.hw.init = &(struct clk_init_data){
 		.name = "blsp2_qup4_spi_apps_clk_src",
-		.parent_names = gcc_parent_names_xo_gpll0_gpll0_early_div,
+		.parent_data = gcc_parent_data_xo_gpll0_gpll0_early_div,
 		.num_parents = 3,
 		.ops = &clk_rcg2_ops,
 	},
@@ -547,7 +565,7 @@ static struct clk_rcg2 blsp2_uart1_apps_clk_src = {
 	.freq_tbl = ftbl_blsp1_uart1_apps_clk_src,
 	.clkr.hw.init = &(struct clk_init_data){
 		.name = "blsp2_uart1_apps_clk_src",
-		.parent_names = gcc_parent_names_xo_gpll0_gpll0_early_div,
+		.parent_data = gcc_parent_data_xo_gpll0_gpll0_early_div,
 		.num_parents = 3,
 		.ops = &clk_rcg2_ops,
 	},
@@ -561,7 +579,7 @@ static struct clk_rcg2 blsp2_uart2_apps_clk_src = {
 	.freq_tbl = ftbl_blsp1_uart1_apps_clk_src,
 	.clkr.hw.init = &(struct clk_init_data){
 		.name = "blsp2_uart2_apps_clk_src",
-		.parent_names = gcc_parent_names_xo_gpll0_gpll0_early_div,
+		.parent_data = gcc_parent_data_xo_gpll0_gpll0_early_div,
 		.num_parents = 3,
 		.ops = &clk_rcg2_ops,
 	},
@@ -582,7 +600,7 @@ static struct clk_rcg2 gp1_clk_src = {
 	.freq_tbl = ftbl_gp1_clk_src,
 	.clkr.hw.init = &(struct clk_init_data){
 		.name = "gp1_clk_src",
-		.parent_names = gcc_parent_names_xo_gpll0_sleep_clk_gpll0_early_div,
+		.parent_data = gcc_parent_data_xo_gpll0_sleep_clk_gpll0_early_div,
 		.num_parents = 4,
 		.ops = &clk_rcg2_ops,
 	},
@@ -596,7 +614,7 @@ static struct clk_rcg2 gp2_clk_src = {
 	.freq_tbl = ftbl_gp1_clk_src,
 	.clkr.hw.init = &(struct clk_init_data){
 		.name = "gp2_clk_src",
-		.parent_names = gcc_parent_names_xo_gpll0_sleep_clk_gpll0_early_div,
+		.parent_data = gcc_parent_data_xo_gpll0_sleep_clk_gpll0_early_div,
 		.num_parents = 4,
 		.ops = &clk_rcg2_ops,
 	},
@@ -610,7 +628,7 @@ static struct clk_rcg2 gp3_clk_src = {
 	.freq_tbl = ftbl_gp1_clk_src,
 	.clkr.hw.init = &(struct clk_init_data){
 		.name = "gp3_clk_src",
-		.parent_names = gcc_parent_names_xo_gpll0_sleep_clk_gpll0_early_div,
+		.parent_data = gcc_parent_data_xo_gpll0_sleep_clk_gpll0_early_div,
 		.num_parents = 4,
 		.ops = &clk_rcg2_ops,
 	},
@@ -630,7 +648,7 @@ static struct clk_rcg2 hmss_gpll0_clk_src = {
 	.freq_tbl = ftbl_hmss_gpll0_clk_src,
 	.clkr.hw.init = &(struct clk_init_data){
 		.name = "hmss_gpll0_clk_src",
-		.parent_names = gcc_parent_names_xo_gpll0_gpll0_early_div,
+		.parent_data = gcc_parent_data_xo_gpll0_gpll0_early_div,
 		.num_parents = 3,
 		.ops = &clk_rcg2_ops,
 	},
@@ -651,7 +669,7 @@ static struct clk_rcg2 hmss_gpll4_clk_src = {
 	.freq_tbl = ftbl_hmss_gpll4_clk_src,
 	.clkr.hw.init = &(struct clk_init_data){
 		.name = "hmss_gpll4_clk_src",
-		.parent_names = gcc_parent_names_xo_gpll4,
+		.parent_data = gcc_parent_data_xo_gpll4,
 		.num_parents = 2,
 		.ops = &clk_rcg2_ops,
 	},
@@ -670,7 +688,7 @@ static struct clk_rcg2 hmss_rbcpr_clk_src = {
 	.freq_tbl = ftbl_hmss_rbcpr_clk_src,
 	.clkr.hw.init = &(struct clk_init_data){
 		.name = "hmss_rbcpr_clk_src",
-		.parent_names = gcc_parent_names_xo_gpll0,
+		.parent_data = gcc_parent_data_xo_gpll0,
 		.num_parents = 2,
 		.ops = &clk_rcg2_ops,
 	},
@@ -689,7 +707,7 @@ static struct clk_rcg2 pdm2_clk_src = {
 	.freq_tbl = ftbl_pdm2_clk_src,
 	.clkr.hw.init = &(struct clk_init_data){
 		.name = "pdm2_clk_src",
-		.parent_names = gcc_parent_names_xo_gpll0_gpll0_early_div,
+		.parent_data = gcc_parent_data_xo_gpll0_gpll0_early_div,
 		.num_parents = 3,
 		.ops = &clk_rcg2_ops,
 	},
@@ -711,7 +729,7 @@ static struct clk_rcg2 qspi_ser_clk_src = {
 	.freq_tbl = ftbl_qspi_ser_clk_src,
 	.clkr.hw.init = &(struct clk_init_data){
 		.name = "qspi_ser_clk_src",
-		.parent_names = gcc_parent_names_xo_gpll0_gpll0_early_div_gpll1_gpll4_gpll1_early_div,
+		.parent_data = gcc_parent_data_xo_gpll0_gpll0_early_div_gpll1_gpll4_gpll1_early_div,
 		.num_parents = 6,
 		.ops = &clk_rcg2_ops,
 	},
@@ -737,7 +755,7 @@ static struct clk_rcg2 sdcc1_apps_clk_src = {
 	.freq_tbl = ftbl_sdcc1_apps_clk_src,
 	.clkr.hw.init = &(struct clk_init_data){
 		.name = "sdcc1_apps_clk_src",
-		.parent_names = gcc_parent_names_xo_gpll0_gpll4_gpll0_early_div,
+		.parent_data = gcc_parent_data_xo_gpll0_gpll4_gpll0_early_div,
 		.num_parents = 4,
 		.ops = &clk_rcg2_ops,
 	},
@@ -759,7 +777,7 @@ static struct clk_rcg2 sdcc1_ice_core_clk_src = {
 	.freq_tbl = ftbl_sdcc1_ice_core_clk_src,
 	.clkr.hw.init = &(struct clk_init_data){
 		.name = "sdcc1_ice_core_clk_src",
-		.parent_names = gcc_parent_names_xo_gpll0_gpll0_early_div,
+		.parent_data = gcc_parent_data_xo_gpll0_gpll0_early_div,
 		.num_parents = 3,
 		.ops = &clk_rcg2_ops,
 	},
@@ -785,7 +803,7 @@ static struct clk_rcg2 sdcc2_apps_clk_src = {
 	.freq_tbl = ftbl_sdcc2_apps_clk_src,
 	.clkr.hw.init = &(struct clk_init_data){
 		.name = "sdcc2_apps_clk_src",
-		.parent_names = gcc_parent_names_xo_gpll0_gpll0_early_div_gpll4,
+		.parent_data = gcc_parent_data_xo_gpll0_gpll0_early_div_gpll4,
 		.num_parents = 4,
 		.ops = &clk_rcg2_floor_ops,
 	},
@@ -808,7 +826,7 @@ static struct clk_rcg2 ufs_axi_clk_src = {
 	.freq_tbl = ftbl_ufs_axi_clk_src,
 	.clkr.hw.init = &(struct clk_init_data){
 		.name = "ufs_axi_clk_src",
-		.parent_names = gcc_parent_names_xo_gpll0_gpll0_early_div,
+		.parent_data = gcc_parent_data_xo_gpll0_gpll0_early_div,
 		.num_parents = 3,
 		.ops = &clk_rcg2_ops,
 	},
@@ -829,7 +847,7 @@ static struct clk_rcg2 ufs_ice_core_clk_src = {
 	.freq_tbl = ftbl_ufs_ice_core_clk_src,
 	.clkr.hw.init = &(struct clk_init_data){
 		.name = "ufs_ice_core_clk_src",
-		.parent_names = gcc_parent_names_xo_gpll0_gpll0_early_div,
+		.parent_data = gcc_parent_data_xo_gpll0_gpll0_early_div,
 		.num_parents = 3,
 		.ops = &clk_rcg2_ops,
 	},
@@ -843,7 +861,7 @@ static struct clk_rcg2 ufs_phy_aux_clk_src = {
 	.freq_tbl = ftbl_hmss_rbcpr_clk_src,
 	.clkr.hw.init = &(struct clk_init_data){
 		.name = "ufs_phy_aux_clk_src",
-		.parent_names = gcc_parent_names_xo_sleep_clk,
+		.parent_data = gcc_parent_data_xo_sleep_clk,
 		.num_parents = 2,
 		.ops = &clk_rcg2_ops,
 	},
@@ -864,7 +882,7 @@ static struct clk_rcg2 ufs_unipro_core_clk_src = {
 	.freq_tbl = ftbl_ufs_unipro_core_clk_src,
 	.clkr.hw.init = &(struct clk_init_data){
 		.name = "ufs_unipro_core_clk_src",
-		.parent_names = gcc_parent_names_xo_gpll0_gpll0_early_div,
+		.parent_data = gcc_parent_data_xo_gpll0_gpll0_early_div,
 		.num_parents = 3,
 		.ops = &clk_rcg2_ops,
 	},
@@ -885,7 +903,7 @@ static struct clk_rcg2 usb20_master_clk_src = {
 	.freq_tbl = ftbl_usb20_master_clk_src,
 	.clkr.hw.init = &(struct clk_init_data){
 		.name = "usb20_master_clk_src",
-		.parent_names = gcc_parent_names_xo_gpll0_gpll0_early_div,
+		.parent_data = gcc_parent_data_xo_gpll0_gpll0_early_div,
 		.num_parents = 3,
 		.ops = &clk_rcg2_ops,
 	},
@@ -905,7 +923,7 @@ static struct clk_rcg2 usb20_mock_utmi_clk_src = {
 	.freq_tbl = ftbl_usb20_mock_utmi_clk_src,
 	.clkr.hw.init = &(struct clk_init_data){
 		.name = "usb20_mock_utmi_clk_src",
-		.parent_names = gcc_parent_names_xo_gpll0_gpll0_early_div,
+		.parent_data = gcc_parent_data_xo_gpll0_gpll0_early_div,
 		.num_parents = 3,
 		.ops = &clk_rcg2_ops,
 	},
@@ -930,7 +948,7 @@ static struct clk_rcg2 usb30_master_clk_src = {
 	.freq_tbl = ftbl_usb30_master_clk_src,
 	.clkr.hw.init = &(struct clk_init_data){
 		.name = "usb30_master_clk_src",
-		.parent_names = gcc_parent_names_xo_gpll0_gpll0_early_div,
+		.parent_data = gcc_parent_data_xo_gpll0_gpll0_early_div,
 		.num_parents = 3,
 		.ops = &clk_rcg2_ops,
 	},
@@ -951,7 +969,7 @@ static struct clk_rcg2 usb30_mock_utmi_clk_src = {
 	.freq_tbl = ftbl_usb30_mock_utmi_clk_src,
 	.clkr.hw.init = &(struct clk_init_data){
 		.name = "usb30_mock_utmi_clk_src",
-		.parent_names = gcc_parent_names_xo_gpll0_gpll0_early_div,
+		.parent_data = gcc_parent_data_xo_gpll0_gpll0_early_div,
 		.num_parents = 3,
 		.ops = &clk_rcg2_ops,
 	},
@@ -971,7 +989,7 @@ static struct clk_rcg2 usb3_phy_aux_clk_src = {
 	.freq_tbl = ftbl_usb3_phy_aux_clk_src,
 	.clkr.hw.init = &(struct clk_init_data){
 		.name = "usb3_phy_aux_clk_src",
-		.parent_names = gcc_parent_names_xo_sleep_clk,
+		.parent_data = gcc_parent_data_xo_sleep_clk,
 		.num_parents = 2,
 		.ops = &clk_rcg2_ops,
 	},
@@ -985,8 +1003,8 @@ static struct clk_branch gcc_aggre2_ufs_axi_clk = {
 		.enable_mask = BIT(0),
 		.hw.init = &(struct clk_init_data){
 			.name = "gcc_aggre2_ufs_axi_clk",
-			.parent_names = (const char *[]){
-				"ufs_axi_clk_src",
+			.parent_hws = (const struct clk_hw*[]) {
+				&ufs_axi_clk_src.clkr.hw,
 			},
 			.num_parents = 1,
 			.ops = &clk_branch2_ops,
@@ -1002,8 +1020,8 @@ static struct clk_branch gcc_aggre2_usb3_axi_clk = {
 		.enable_mask = BIT(0),
 		.hw.init = &(struct clk_init_data){
 			.name = "gcc_aggre2_usb3_axi_clk",
-			.parent_names = (const char *[]){
-				"usb30_master_clk_src",
+			.parent_hws = (const struct clk_hw*[]) {
+				&usb30_master_clk_src.clkr.hw,
 			},
 			.num_parents = 1,
 			.ops = &clk_branch2_ops,
@@ -1071,8 +1089,8 @@ static struct clk_branch gcc_blsp1_qup1_i2c_apps_clk = {
 		.enable_mask = BIT(0),
 		.hw.init = &(struct clk_init_data){
 			.name = "gcc_blsp1_qup1_i2c_apps_clk",
-			.parent_names = (const char *[]){
-				"blsp1_qup1_i2c_apps_clk_src",
+			.parent_hws = (const struct clk_hw*[]) {
+				&blsp1_qup1_i2c_apps_clk_src.clkr.hw,
 			},
 			.num_parents = 1,
 			.flags = CLK_SET_RATE_PARENT,
@@ -1089,8 +1107,8 @@ static struct clk_branch gcc_blsp1_qup1_spi_apps_clk = {
 		.enable_mask = BIT(0),
 		.hw.init = &(struct clk_init_data){
 			.name = "gcc_blsp1_qup1_spi_apps_clk",
-			.parent_names = (const char *[]){
-				"blsp1_qup1_spi_apps_clk_src",
+			.parent_hws = (const struct clk_hw*[]) {
+				&blsp1_qup1_spi_apps_clk_src.clkr.hw,
 			},
 			.num_parents = 1,
 			.flags = CLK_SET_RATE_PARENT,
@@ -1107,8 +1125,8 @@ static struct clk_branch gcc_blsp1_qup2_i2c_apps_clk = {
 		.enable_mask = BIT(0),
 		.hw.init = &(struct clk_init_data){
 			.name = "gcc_blsp1_qup2_i2c_apps_clk",
-			.parent_names = (const char *[]){
-				"blsp1_qup2_i2c_apps_clk_src",
+			.parent_hws = (const struct clk_hw*[]) {
+				&blsp1_qup2_i2c_apps_clk_src.clkr.hw,
 			},
 			.num_parents = 1,
 			.flags = CLK_SET_RATE_PARENT,
@@ -1125,8 +1143,8 @@ static struct clk_branch gcc_blsp1_qup2_spi_apps_clk = {
 		.enable_mask = BIT(0),
 		.hw.init = &(struct clk_init_data){
 			.name = "gcc_blsp1_qup2_spi_apps_clk",
-			.parent_names = (const char *[]){
-				"blsp1_qup2_spi_apps_clk_src",
+			.parent_hws = (const struct clk_hw*[]) {
+				&blsp1_qup2_spi_apps_clk_src.clkr.hw,
 			},
 			.num_parents = 1,
 			.flags = CLK_SET_RATE_PARENT,
@@ -1143,8 +1161,8 @@ static struct clk_branch gcc_blsp1_qup3_i2c_apps_clk = {
 		.enable_mask = BIT(0),
 		.hw.init = &(struct clk_init_data){
 			.name = "gcc_blsp1_qup3_i2c_apps_clk",
-			.parent_names = (const char *[]){
-				"blsp1_qup3_i2c_apps_clk_src",
+			.parent_hws = (const struct clk_hw*[]) {
+				&blsp1_qup3_i2c_apps_clk_src.clkr.hw,
 			},
 			.num_parents = 1,
 			.flags = CLK_SET_RATE_PARENT,
@@ -1161,8 +1179,8 @@ static struct clk_branch gcc_blsp1_qup3_spi_apps_clk = {
 		.enable_mask = BIT(0),
 		.hw.init = &(struct clk_init_data){
 			.name = "gcc_blsp1_qup3_spi_apps_clk",
-			.parent_names = (const char *[]){
-				"blsp1_qup3_spi_apps_clk_src",
+			.parent_hws = (const struct clk_hw*[]) {
+				&blsp1_qup3_spi_apps_clk_src.clkr.hw,
 			},
 			.num_parents = 1,
 			.flags = CLK_SET_RATE_PARENT,
@@ -1179,8 +1197,8 @@ static struct clk_branch gcc_blsp1_qup4_i2c_apps_clk = {
 		.enable_mask = BIT(0),
 		.hw.init = &(struct clk_init_data){
 			.name = "gcc_blsp1_qup4_i2c_apps_clk",
-			.parent_names = (const char *[]){
-				"blsp1_qup4_i2c_apps_clk_src",
+			.parent_hws = (const struct clk_hw*[]) {
+				&blsp1_qup4_i2c_apps_clk_src.clkr.hw,
 			},
 			.num_parents = 1,
 			.flags = CLK_SET_RATE_PARENT,
@@ -1197,8 +1215,8 @@ static struct clk_branch gcc_blsp1_qup4_spi_apps_clk = {
 		.enable_mask = BIT(0),
 		.hw.init = &(struct clk_init_data){
 			.name = "gcc_blsp1_qup4_spi_apps_clk",
-			.parent_names = (const char *[]){
-				"blsp1_qup4_spi_apps_clk_src",
+			.parent_hws = (const struct clk_hw*[]) {
+				&blsp1_qup4_spi_apps_clk_src.clkr.hw,
 			},
 			.num_parents = 1,
 			.flags = CLK_SET_RATE_PARENT,
@@ -1215,8 +1233,8 @@ static struct clk_branch gcc_blsp1_uart1_apps_clk = {
 		.enable_mask = BIT(0),
 		.hw.init = &(struct clk_init_data){
 			.name = "gcc_blsp1_uart1_apps_clk",
-			.parent_names = (const char *[]){
-				"blsp1_uart1_apps_clk_src",
+			.parent_hws = (const struct clk_hw*[]) {
+				&blsp1_uart1_apps_clk_src.clkr.hw,
 			},
 			.num_parents = 1,
 			.flags = CLK_SET_RATE_PARENT,
@@ -1233,8 +1251,8 @@ static struct clk_branch gcc_blsp1_uart2_apps_clk = {
 		.enable_mask = BIT(0),
 		.hw.init = &(struct clk_init_data){
 			.name = "gcc_blsp1_uart2_apps_clk",
-			.parent_names = (const char *[]){
-				"blsp1_uart2_apps_clk_src",
+			.parent_hws = (const struct clk_hw*[]) {
+				&blsp1_uart2_apps_clk_src.clkr.hw,
 			},
 			.num_parents = 1,
 			.flags = CLK_SET_RATE_PARENT,
@@ -1264,8 +1282,8 @@ static struct clk_branch gcc_blsp2_qup1_i2c_apps_clk = {
 		.enable_mask = BIT(0),
 		.hw.init = &(struct clk_init_data){
 			.name = "gcc_blsp2_qup1_i2c_apps_clk",
-			.parent_names = (const char *[]){
-				"blsp2_qup1_i2c_apps_clk_src",
+			.parent_hws = (const struct clk_hw*[]) {
+				&blsp2_qup1_i2c_apps_clk_src.clkr.hw,
 			},
 			.num_parents = 1,
 			.flags = CLK_SET_RATE_PARENT,
@@ -1282,8 +1300,8 @@ static struct clk_branch gcc_blsp2_qup1_spi_apps_clk = {
 		.enable_mask = BIT(0),
 		.hw.init = &(struct clk_init_data){
 			.name = "gcc_blsp2_qup1_spi_apps_clk",
-			.parent_names = (const char *[]){
-				"blsp2_qup1_spi_apps_clk_src",
+			.parent_hws = (const struct clk_hw*[]) {
+				&blsp2_qup1_spi_apps_clk_src.clkr.hw,
 			},
 			.num_parents = 1,
 			.flags = CLK_SET_RATE_PARENT,
@@ -1300,8 +1318,8 @@ static struct clk_branch gcc_blsp2_qup2_i2c_apps_clk = {
 		.enable_mask = BIT(0),
 		.hw.init = &(struct clk_init_data){
 			.name = "gcc_blsp2_qup2_i2c_apps_clk",
-			.parent_names = (const char *[]){
-				"blsp2_qup2_i2c_apps_clk_src",
+			.parent_hws = (const struct clk_hw*[]) {
+				&blsp2_qup2_i2c_apps_clk_src.clkr.hw,
 			},
 			.num_parents = 1,
 			.flags = CLK_SET_RATE_PARENT,
@@ -1318,8 +1336,8 @@ static struct clk_branch gcc_blsp2_qup2_spi_apps_clk = {
 		.enable_mask = BIT(0),
 		.hw.init = &(struct clk_init_data){
 			.name = "gcc_blsp2_qup2_spi_apps_clk",
-			.parent_names = (const char *[]){
-				"blsp2_qup2_spi_apps_clk_src",
+			.parent_hws = (const struct clk_hw*[]) {
+				&blsp2_qup2_spi_apps_clk_src.clkr.hw,
 			},
 			.num_parents = 1,
 			.flags = CLK_SET_RATE_PARENT,
@@ -1336,8 +1354,8 @@ static struct clk_branch gcc_blsp2_qup3_i2c_apps_clk = {
 		.enable_mask = BIT(0),
 		.hw.init = &(struct clk_init_data){
 			.name = "gcc_blsp2_qup3_i2c_apps_clk",
-			.parent_names = (const char *[]){
-				"blsp2_qup3_i2c_apps_clk_src",
+			.parent_hws = (const struct clk_hw*[]) {
+				&blsp2_qup3_i2c_apps_clk_src.clkr.hw,
 			},
 			.num_parents = 1,
 			.flags = CLK_SET_RATE_PARENT,
@@ -1354,8 +1372,8 @@ static struct clk_branch gcc_blsp2_qup3_spi_apps_clk = {
 		.enable_mask = BIT(0),
 		.hw.init = &(struct clk_init_data){
 			.name = "gcc_blsp2_qup3_spi_apps_clk",
-			.parent_names = (const char *[]){
-				"blsp2_qup3_spi_apps_clk_src",
+			.parent_hws = (const struct clk_hw*[]) {
+				&blsp2_qup3_spi_apps_clk_src.clkr.hw,
 			},
 			.num_parents = 1,
 			.flags = CLK_SET_RATE_PARENT,
@@ -1372,8 +1390,8 @@ static struct clk_branch gcc_blsp2_qup4_i2c_apps_clk = {
 		.enable_mask = BIT(0),
 		.hw.init = &(struct clk_init_data){
 			.name = "gcc_blsp2_qup4_i2c_apps_clk",
-			.parent_names = (const char *[]){
-				"blsp2_qup4_i2c_apps_clk_src",
+			.parent_hws = (const struct clk_hw*[]) {
+				&blsp2_qup4_i2c_apps_clk_src.clkr.hw,
 			},
 			.num_parents = 1,
 			.flags = CLK_SET_RATE_PARENT,
@@ -1390,8 +1408,8 @@ static struct clk_branch gcc_blsp2_qup4_spi_apps_clk = {
 		.enable_mask = BIT(0),
 		.hw.init = &(struct clk_init_data){
 			.name = "gcc_blsp2_qup4_spi_apps_clk",
-			.parent_names = (const char *[]){
-				"blsp2_qup4_spi_apps_clk_src",
+			.parent_hws = (const struct clk_hw*[]) {
+				&blsp2_qup4_spi_apps_clk_src.clkr.hw,
 			},
 			.num_parents = 1,
 			.flags = CLK_SET_RATE_PARENT,
@@ -1408,8 +1426,8 @@ static struct clk_branch gcc_blsp2_uart1_apps_clk = {
 		.enable_mask = BIT(0),
 		.hw.init = &(struct clk_init_data){
 			.name = "gcc_blsp2_uart1_apps_clk",
-			.parent_names = (const char *[]){
-				"blsp2_uart1_apps_clk_src",
+			.parent_hws = (const struct clk_hw*[]) {
+				&blsp2_uart1_apps_clk_src.clkr.hw,
 			},
 			.num_parents = 1,
 			.flags = CLK_SET_RATE_PARENT,
@@ -1426,8 +1444,8 @@ static struct clk_branch gcc_blsp2_uart2_apps_clk = {
 		.enable_mask = BIT(0),
 		.hw.init = &(struct clk_init_data){
 			.name = "gcc_blsp2_uart2_apps_clk",
-			.parent_names = (const char *[]){
-				"blsp2_uart2_apps_clk_src",
+			.parent_hws = (const struct clk_hw*[]) {
+				&blsp2_uart2_apps_clk_src.clkr.hw,
 			},
 			.num_parents = 1,
 			.flags = CLK_SET_RATE_PARENT,
@@ -1457,8 +1475,8 @@ static struct clk_branch gcc_cfg_noc_usb2_axi_clk = {
 		.enable_mask = BIT(0),
 		.hw.init = &(struct clk_init_data){
 			.name = "gcc_cfg_noc_usb2_axi_clk",
-			.parent_names = (const char *[]){
-				"usb20_master_clk_src",
+			.parent_hws = (const struct clk_hw*[]) {
+				&usb20_master_clk_src.clkr.hw,
 			},
 			.num_parents = 1,
 			.ops = &clk_branch2_ops,
@@ -1474,8 +1492,8 @@ static struct clk_branch gcc_cfg_noc_usb3_axi_clk = {
 		.enable_mask = BIT(0),
 		.hw.init = &(struct clk_init_data){
 			.name = "gcc_cfg_noc_usb3_axi_clk",
-			.parent_names = (const char *[]){
-				"usb30_master_clk_src",
+			.parent_hws = (const struct clk_hw*[]) {
+				&usb30_master_clk_src.clkr.hw,
 			},
 			.num_parents = 1,
 			.ops = &clk_branch2_ops,
@@ -1503,8 +1521,8 @@ static struct clk_branch gcc_gp1_clk = {
 		.enable_mask = BIT(0),
 		.hw.init = &(struct clk_init_data){
 			.name = "gcc_gp1_clk",
-			.parent_names = (const char *[]){
-				"gp1_clk_src",
+			.parent_hws = (const struct clk_hw*[]) {
+				&gp1_clk_src.clkr.hw,
 			},
 			.num_parents = 1,
 			.flags = CLK_SET_RATE_PARENT,
@@ -1521,8 +1539,8 @@ static struct clk_branch gcc_gp2_clk = {
 		.enable_mask = BIT(0),
 		.hw.init = &(struct clk_init_data){
 			.name = "gcc_gp2_clk",
-			.parent_names = (const char *[]){
-				"gp2_clk_src",
+			.parent_hws = (const struct clk_hw*[]) {
+				&gp2_clk_src.clkr.hw,
 			},
 			.num_parents = 1,
 			.flags = CLK_SET_RATE_PARENT,
@@ -1539,8 +1557,8 @@ static struct clk_branch gcc_gp3_clk = {
 		.enable_mask = BIT(0),
 		.hw.init = &(struct clk_init_data){
 			.name = "gcc_gp3_clk",
-			.parent_names = (const char *[]){
-				"gp3_clk_src",
+			.parent_hws = (const struct clk_hw*[]) {
+				&gp3_clk_src.clkr.hw,
 			},
 			.num_parents = 1,
 			.flags = CLK_SET_RATE_PARENT,
@@ -1584,8 +1602,8 @@ static struct clk_branch gcc_gpu_gpll0_clk = {
 		.enable_mask = BIT(4),
 		.hw.init = &(struct clk_init_data){
 			.name = "gcc_gpu_gpll0_clk",
-			.parent_names = (const char *[]){
-				"gpll0",
+			.parent_hws = (const struct clk_hw*[]) {
+				&gpll0.clkr.hw,
 			},
 			.num_parents = 1,
 			.ops = &clk_branch2_ops,
@@ -1601,8 +1619,8 @@ static struct clk_branch gcc_gpu_gpll0_div_clk = {
 		.enable_mask = BIT(3),
 		.hw.init = &(struct clk_init_data){
 			.name = "gcc_gpu_gpll0_div_clk",
-			.parent_names = (const char *[]){
-				"gpll0_early_div",
+			.parent_hws = (const struct clk_hw*[]) {
+				&gpll0_early_div.hw,
 			},
 			.num_parents = 1,
 			.ops = &clk_branch2_ops,
@@ -1632,8 +1650,8 @@ static struct clk_branch gcc_hmss_rbcpr_clk = {
 		.enable_mask = BIT(0),
 		.hw.init = &(struct clk_init_data){
 			.name = "gcc_hmss_rbcpr_clk",
-			.parent_names = (const char *[]){
-				"hmss_rbcpr_clk_src",
+			.parent_hws = (const struct clk_hw*[]) {
+				&hmss_rbcpr_clk_src.clkr.hw,
 			},
 			.num_parents = 1,
 			.flags = CLK_SET_RATE_PARENT,
@@ -1650,8 +1668,8 @@ static struct clk_branch gcc_mmss_gpll0_clk = {
 		.enable_mask = BIT(1),
 		.hw.init = &(struct clk_init_data){
 			.name = "gcc_mmss_gpll0_clk",
-			.parent_names = (const char *[]){
-				"gpll0",
+			.parent_hws = (const struct clk_hw*[]) {
+				&gpll0.clkr.hw,
 			},
 			.num_parents = 1,
 			.ops = &clk_branch2_ops,
@@ -1667,8 +1685,8 @@ static struct clk_branch gcc_mmss_gpll0_div_clk = {
 		.enable_mask = BIT(0),
 		.hw.init = &(struct clk_init_data){
 			.name = "gcc_mmss_gpll0_div_clk",
-			.parent_names = (const char *[]){
-				"gpll0_early_div",
+			.parent_hws = (const struct clk_hw*[]) {
+				&gpll0_early_div.hw,
 			},
 			.num_parents = 1,
 			.ops = &clk_branch2_ops,
@@ -1767,8 +1785,8 @@ static struct clk_branch gcc_pdm2_clk = {
 		.enable_mask = BIT(0),
 		.hw.init = &(struct clk_init_data){
 			.name = "gcc_pdm2_clk",
-			.parent_names = (const char *[]){
-				"pdm2_clk_src",
+			.parent_hws = (const struct clk_hw*[]) {
+				&pdm2_clk_src.clkr.hw,
 			},
 			.num_parents = 1,
 			.flags = CLK_SET_RATE_PARENT,
@@ -1824,8 +1842,8 @@ static struct clk_branch gcc_qspi_ser_clk = {
 		.enable_mask = BIT(0),
 		.hw.init = &(struct clk_init_data){
 			.name = "gcc_qspi_ser_clk",
-			.parent_names = (const char *[]){
-				"qspi_ser_clk_src",
+			.parent_hws = (const struct clk_hw*[]) {
+				&qspi_ser_clk_src.clkr.hw,
 			},
 			.num_parents = 1,
 			.flags = CLK_SET_RATE_PARENT,
@@ -1881,8 +1899,8 @@ static struct clk_branch gcc_sdcc1_apps_clk = {
 		.enable_mask = BIT(0),
 		.hw.init = &(struct clk_init_data){
 			.name = "gcc_sdcc1_apps_clk",
-			.parent_names = (const char *[]){
-				"sdcc1_apps_clk_src",
+			.parent_hws = (const struct clk_hw*[]) {
+				&sdcc1_apps_clk_src.clkr.hw,
 			},
 			.num_parents = 1,
 			.flags = CLK_SET_RATE_PARENT,
@@ -1899,8 +1917,8 @@ static struct clk_branch gcc_sdcc1_ice_core_clk = {
 		.enable_mask = BIT(0),
 		.hw.init = &(struct clk_init_data){
 			.name = "gcc_sdcc1_ice_core_clk",
-			.parent_names = (const char *[]){
-				"sdcc1_ice_core_clk_src",
+			.parent_hws = (const struct clk_hw*[]) {
+				&sdcc1_ice_core_clk_src.clkr.hw,
 			},
 			.num_parents = 1,
 			.flags = CLK_SET_RATE_PARENT,
@@ -1930,8 +1948,8 @@ static struct clk_branch gcc_sdcc2_apps_clk = {
 		.enable_mask = BIT(0),
 		.hw.init = &(struct clk_init_data){
 			.name = "gcc_sdcc2_apps_clk",
-			.parent_names = (const char *[]){
-				"sdcc2_apps_clk_src",
+			.parent_hws = (const struct clk_hw*[]) {
+				&sdcc2_apps_clk_src.clkr.hw,
 			},
 			.num_parents = 1,
 			.flags = CLK_SET_RATE_PARENT,
@@ -1961,8 +1979,8 @@ static struct clk_branch gcc_ufs_axi_clk = {
 		.enable_mask = BIT(0),
 		.hw.init = &(struct clk_init_data){
 			.name = "gcc_ufs_axi_clk",
-			.parent_names = (const char *[]){
-				"ufs_axi_clk_src",
+			.parent_hws = (const struct clk_hw*[]) {
+				&ufs_axi_clk_src.clkr.hw,
 			},
 			.num_parents = 1,
 			.flags = CLK_SET_RATE_PARENT,
@@ -1992,8 +2010,8 @@ static struct clk_branch gcc_ufs_ice_core_clk = {
 		.enable_mask = BIT(0),
 		.hw.init = &(struct clk_init_data){
 			.name = "gcc_ufs_ice_core_clk",
-			.parent_names = (const char *[]){
-				"ufs_ice_core_clk_src",
+			.parent_hws = (const struct clk_hw*[]) {
+				&ufs_ice_core_clk_src.clkr.hw,
 			},
 			.num_parents = 1,
 			.flags = CLK_SET_RATE_PARENT,
@@ -2010,8 +2028,8 @@ static struct clk_branch gcc_ufs_phy_aux_clk = {
 		.enable_mask = BIT(0),
 		.hw.init = &(struct clk_init_data){
 			.name = "gcc_ufs_phy_aux_clk",
-			.parent_names = (const char *[]){
-				"ufs_phy_aux_clk_src",
+			.parent_hws = (const struct clk_hw*[]) {
+				&ufs_phy_aux_clk_src.clkr.hw,
 			},
 			.num_parents = 1,
 			.flags = CLK_SET_RATE_PARENT,
@@ -2067,8 +2085,8 @@ static struct clk_branch gcc_ufs_unipro_core_clk = {
 		.enable_mask = BIT(0),
 		.hw.init = &(struct clk_init_data){
 			.name = "gcc_ufs_unipro_core_clk",
-			.parent_names = (const char *[]){
-				"ufs_unipro_core_clk_src",
+			.parent_hws = (const struct clk_hw*[]) {
+				&ufs_unipro_core_clk_src.clkr.hw,
 			},
 			.flags = CLK_SET_RATE_PARENT,
 			.num_parents = 1,
@@ -2085,8 +2103,8 @@ static struct clk_branch gcc_usb20_master_clk = {
 		.enable_mask = BIT(0),
 		.hw.init = &(struct clk_init_data){
 			.name = "gcc_usb20_master_clk",
-			.parent_names = (const char *[]){
-				"usb20_master_clk_src"
+			.parent_hws = (const struct clk_hw*[]) {
+				&usb20_master_clk_src.clkr.hw,
 			},
 			.flags = CLK_SET_RATE_PARENT,
 			.num_parents = 1,
@@ -2103,8 +2121,8 @@ static struct clk_branch gcc_usb20_mock_utmi_clk = {
 		.enable_mask = BIT(0),
 		.hw.init = &(struct clk_init_data){
 			.name = "gcc_usb20_mock_utmi_clk",
-			.parent_names = (const char *[]){
-				"usb20_mock_utmi_clk_src",
+			.parent_hws = (const struct clk_hw*[]) {
+				&usb20_mock_utmi_clk_src.clkr.hw,
 			},
 			.num_parents = 1,
 			.flags = CLK_SET_RATE_PARENT,
@@ -2134,8 +2152,8 @@ static struct clk_branch gcc_usb30_master_clk = {
 		.enable_mask = BIT(0),
 		.hw.init = &(struct clk_init_data){
 			.name = "gcc_usb30_master_clk",
-			.parent_names = (const char *[]){
-				"usb30_master_clk_src",
+			.parent_hws = (const struct clk_hw*[]) {
+				&usb30_master_clk_src.clkr.hw,
 			},
 			.num_parents = 1,
 			.flags = CLK_SET_RATE_PARENT,
@@ -2152,8 +2170,8 @@ static struct clk_branch gcc_usb30_mock_utmi_clk = {
 		.enable_mask = BIT(0),
 		.hw.init = &(struct clk_init_data){
 			.name = "gcc_usb30_mock_utmi_clk",
-			.parent_names = (const char *[]){
-				"usb30_mock_utmi_clk_src",
+			.parent_hws = (const struct clk_hw*[]) {
+				&usb30_mock_utmi_clk_src.clkr.hw,
 			},
 			.num_parents = 1,
 			.flags = CLK_SET_RATE_PARENT,
@@ -2196,8 +2214,8 @@ static struct clk_branch gcc_usb3_phy_aux_clk = {
 		.enable_mask = BIT(0),
 		.hw.init = &(struct clk_init_data){
 			.name = "gcc_usb3_phy_aux_clk",
-			.parent_names = (const char *[]){
-				"usb3_phy_aux_clk_src",
+			.parent_hws = (const struct clk_hw*[]) {
+				&usb3_phy_aux_clk_src.clkr.hw,
 			},
 			.num_parents = 1,
 			.flags = CLK_SET_RATE_PARENT,
diff --git a/drivers/clk/qcom/gcc-sm6115.c b/drivers/clk/qcom/gcc-sm6115.c
new file mode 100644
index 000000000000..bc09736ece76
--- /dev/null
+++ b/drivers/clk/qcom/gcc-sm6115.c
@@ -0,0 +1,3544 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * Copyright (c) 2019-2021, The Linux Foundation. All rights reserved.
+ */
+
+#include <linux/err.h>
+#include <linux/kernel.h>
+#include <linux/module.h>
+#include <linux/of_device.h>
+#include <linux/clk-provider.h>
+#include <linux/regmap.h>
+#include <linux/reset-controller.h>
+
+#include <dt-bindings/clock/qcom,gcc-sm6115.h>
+
+#include "clk-alpha-pll.h"
+#include "clk-branch.h"
+#include "clk-pll.h"
+#include "clk-rcg.h"
+#include "clk-regmap.h"
+#include "clk-regmap-divider.h"
+#include "common.h"
+#include "gdsc.h"
+#include "reset.h"
+
+enum {
+	P_BI_TCXO,
+	P_GPLL0_OUT_AUX2,
+	P_GPLL0_OUT_EARLY,
+	P_GPLL10_OUT_MAIN,
+	P_GPLL11_OUT_MAIN,
+	P_GPLL3_OUT_EARLY,
+	P_GPLL4_OUT_MAIN,
+	P_GPLL6_OUT_EARLY,
+	P_GPLL6_OUT_MAIN,
+	P_GPLL7_OUT_MAIN,
+	P_GPLL8_OUT_EARLY,
+	P_GPLL8_OUT_MAIN,
+	P_GPLL9_OUT_EARLY,
+	P_GPLL9_OUT_MAIN,
+	P_SLEEP_CLK,
+};
+
+static struct pll_vco default_vco[] = {
+	{ 500000000, 1000000000, 2 },
+};
+
+static struct pll_vco gpll9_vco[] = {
+	{ 500000000, 1250000000, 0 },
+};
+
+static struct pll_vco gpll10_vco[] = {
+	{ 750000000, 1500000000, 1 },
+};
+
+static struct clk_alpha_pll gpll0 = {
+	.offset = 0x0,
+	.vco_table = default_vco,
+	.num_vco = ARRAY_SIZE(default_vco),
+	.regs = clk_alpha_pll_regs[CLK_ALPHA_PLL_TYPE_DEFAULT],
+	.clkr = {
+		.enable_reg = 0x79000,
+		.enable_mask = BIT(0),
+		.hw.init = &(struct clk_init_data){
+			.name = "gpll0",
+			.parent_data = &(const struct clk_parent_data){
+				.fw_name = "bi_tcxo",
+			},
+			.num_parents = 1,
+			.ops = &clk_alpha_pll_ops,
+		},
+	},
+};
+
+static const struct clk_div_table post_div_table_gpll0_out_aux2[] = {
+	{ 0x1, 2 },
+	{ }
+};
+
+static struct clk_alpha_pll_postdiv gpll0_out_aux2 = {
+	.offset = 0x0,
+	.post_div_shift = 8,
+	.post_div_table = post_div_table_gpll0_out_aux2,
+	.num_post_div = ARRAY_SIZE(post_div_table_gpll0_out_aux2),
+	.width = 4,
+	.regs = clk_alpha_pll_regs[CLK_ALPHA_PLL_TYPE_DEFAULT],
+	.clkr.hw.init = &(struct clk_init_data){
+		.name = "gpll0_out_aux2",
+		.parent_hws = (const struct clk_hw *[]){ &gpll0.clkr.hw },
+		.num_parents = 1,
+		.ops = &clk_alpha_pll_postdiv_ro_ops,
+	},
+};
+
+/* listed as BRAMMO, but it doesn't really match */
+static const u8 clk_gpll9_regs[PLL_OFF_MAX_REGS] = {
+	[PLL_OFF_L_VAL] = 0x04,
+	[PLL_OFF_ALPHA_VAL] = 0x08,
+	[PLL_OFF_ALPHA_VAL_U] = 0x0c,
+	[PLL_OFF_TEST_CTL] = 0x10,
+	[PLL_OFF_TEST_CTL_U] = 0x14,
+	[PLL_OFF_USER_CTL] = 0x18,
+	[PLL_OFF_CONFIG_CTL] = 0x1C,
+	[PLL_OFF_STATUS] = 0x20,
+};
+
+static const struct clk_div_table post_div_table_gpll0_out_main[] = {
+	{ 0x0, 1 },
+	{ }
+};
+
+static struct clk_alpha_pll_postdiv gpll0_out_main = {
+	.offset = 0x0,
+	.post_div_shift = 8,
+	.post_div_table = post_div_table_gpll0_out_main,
+	.num_post_div = ARRAY_SIZE(post_div_table_gpll0_out_main),
+	.width = 4,
+	.regs = clk_alpha_pll_regs[CLK_ALPHA_PLL_TYPE_DEFAULT],
+	.clkr.hw.init = &(struct clk_init_data){
+		.name = "gpll0_out_main",
+		.parent_hws = (const struct clk_hw *[]){ &gpll0.clkr.hw },
+		.num_parents = 1,
+		.ops = &clk_alpha_pll_postdiv_ro_ops,
+	},
+};
+
+/* 1152MHz configuration */
+static const struct alpha_pll_config gpll10_config = {
+	.l = 0x3c,
+	.vco_val = 0x1 << 20,
+	.vco_mask = GENMASK(21, 20),
+	.main_output_mask = BIT(0),
+	.config_ctl_val = 0x4001055b,
+};
+
+static struct clk_alpha_pll gpll10 = {
+	.offset = 0xa000,
+	.vco_table = gpll10_vco,
+	.num_vco = ARRAY_SIZE(gpll10_vco),
+	.regs = clk_alpha_pll_regs[CLK_ALPHA_PLL_TYPE_DEFAULT],
+	.clkr = {
+		.enable_reg = 0x79000,
+		.enable_mask = BIT(10),
+		.hw.init = &(struct clk_init_data){
+			.name = "gpll10",
+			.parent_data = &(const struct clk_parent_data){
+				.fw_name = "bi_tcxo",
+			},
+			.num_parents = 1,
+			.ops = &clk_alpha_pll_ops,
+		},
+	},
+};
+
+static const struct clk_div_table post_div_table_gpll10_out_main[] = {
+	{ 0x0, 1 },
+	{ }
+};
+
+static struct clk_alpha_pll_postdiv gpll10_out_main = {
+	.offset = 0xa000,
+	.post_div_shift = 8,
+	.post_div_table = post_div_table_gpll10_out_main,
+	.num_post_div = ARRAY_SIZE(post_div_table_gpll10_out_main),
+	.width = 4,
+	.regs = clk_alpha_pll_regs[CLK_ALPHA_PLL_TYPE_DEFAULT],
+	.clkr.hw.init = &(struct clk_init_data){
+		.name = "gpll10_out_main",
+		.parent_hws = (const struct clk_hw *[]){ &gpll10.clkr.hw },
+		.num_parents = 1,
+		.flags = CLK_SET_RATE_PARENT,
+		.ops = &clk_alpha_pll_postdiv_ops,
+	},
+};
+
+/* 600MHz configuration */
+static const struct alpha_pll_config gpll11_config = {
+	.l = 0x1F,
+	.alpha = 0x0,
+	.alpha_hi = 0x40,
+	.alpha_en_mask = BIT(24),
+	.vco_val = 0x2 << 20,
+	.vco_mask = GENMASK(21, 20),
+	.config_ctl_val = 0x4001055b,
+};
+
+static struct clk_alpha_pll gpll11 = {
+	.offset = 0xb000,
+	.vco_table = default_vco,
+	.num_vco = ARRAY_SIZE(default_vco),
+	.flags = SUPPORTS_DYNAMIC_UPDATE,
+	.regs = clk_alpha_pll_regs[CLK_ALPHA_PLL_TYPE_DEFAULT],
+	.clkr = {
+		.enable_reg = 0x79000,
+		.enable_mask = BIT(11),
+		.hw.init = &(struct clk_init_data){
+			.name = "gpll11",
+			.parent_data = &(const struct clk_parent_data){
+				.fw_name = "bi_tcxo",
+			},
+			.num_parents = 1,
+			.ops = &clk_alpha_pll_ops,
+		},
+	},
+};
+
+static const struct clk_div_table post_div_table_gpll11_out_main[] = {
+	{ 0x0, 1 },
+	{ }
+};
+
+static struct clk_alpha_pll_postdiv gpll11_out_main = {
+	.offset = 0xb000,
+	.post_div_shift = 8,
+	.post_div_table = post_div_table_gpll11_out_main,
+	.num_post_div = ARRAY_SIZE(post_div_table_gpll11_out_main),
+	.width = 4,
+	.regs = clk_alpha_pll_regs[CLK_ALPHA_PLL_TYPE_DEFAULT],
+	.clkr.hw.init = &(struct clk_init_data){
+		.name = "gpll11_out_main",
+		.parent_hws = (const struct clk_hw *[]){ &gpll11.clkr.hw },
+		.num_parents = 1,
+		.flags = CLK_SET_RATE_PARENT,
+		.ops = &clk_alpha_pll_postdiv_ops,
+	},
+};
+
+static struct clk_alpha_pll gpll3 = {
+	.offset = 0x3000,
+	.vco_table = default_vco,
+	.num_vco = ARRAY_SIZE(default_vco),
+	.regs = clk_alpha_pll_regs[CLK_ALPHA_PLL_TYPE_DEFAULT],
+	.clkr = {
+		.enable_reg = 0x79000,
+		.enable_mask = BIT(3),
+		.hw.init = &(struct clk_init_data){
+			.name = "gpll3",
+			.parent_data = &(const struct clk_parent_data){
+				.fw_name = "bi_tcxo",
+			},
+			.num_parents = 1,
+			.ops = &clk_alpha_pll_ops,
+		},
+	},
+};
+
+static struct clk_alpha_pll gpll4 = {
+	.offset = 0x4000,
+	.vco_table = default_vco,
+	.num_vco = ARRAY_SIZE(default_vco),
+	.regs = clk_alpha_pll_regs[CLK_ALPHA_PLL_TYPE_DEFAULT],
+	.clkr = {
+		.enable_reg = 0x79000,
+		.enable_mask = BIT(4),
+		.hw.init = &(struct clk_init_data){
+			.name = "gpll4",
+			.parent_data = &(const struct clk_parent_data){
+				.fw_name = "bi_tcxo",
+			},
+			.num_parents = 1,
+			.ops = &clk_alpha_pll_ops,
+		},
+	},
+};
+
+static const struct clk_div_table post_div_table_gpll4_out_main[] = {
+	{ 0x0, 1 },
+	{ }
+};
+
+static struct clk_alpha_pll_postdiv gpll4_out_main = {
+	.offset = 0x4000,
+	.post_div_shift = 8,
+	.post_div_table = post_div_table_gpll4_out_main,
+	.num_post_div = ARRAY_SIZE(post_div_table_gpll4_out_main),
+	.width = 4,
+	.regs = clk_alpha_pll_regs[CLK_ALPHA_PLL_TYPE_DEFAULT],
+	.clkr.hw.init = &(struct clk_init_data){
+		.name = "gpll4_out_main",
+		.parent_hws = (const struct clk_hw *[]){ &gpll4.clkr.hw },
+		.num_parents = 1,
+		.ops = &clk_alpha_pll_postdiv_ro_ops,
+	},
+};
+
+static struct clk_alpha_pll gpll6 = {
+	.offset = 0x6000,
+	.vco_table = default_vco,
+	.num_vco = ARRAY_SIZE(default_vco),
+	.regs = clk_alpha_pll_regs[CLK_ALPHA_PLL_TYPE_DEFAULT],
+	.clkr = {
+		.enable_reg = 0x79000,
+		.enable_mask = BIT(6),
+		.hw.init = &(struct clk_init_data){
+			.name = "gpll6",
+			.parent_data = &(const struct clk_parent_data){
+				.fw_name = "bi_tcxo",
+			},
+			.num_parents = 1,
+			.ops = &clk_alpha_pll_ops,
+		},
+	},
+};
+
+static const struct clk_div_table post_div_table_gpll6_out_main[] = {
+	{ 0x1, 2 },
+	{ }
+};
+
+static struct clk_alpha_pll_postdiv gpll6_out_main = {
+	.offset = 0x6000,
+	.post_div_shift = 8,
+	.post_div_table = post_div_table_gpll6_out_main,
+	.num_post_div = ARRAY_SIZE(post_div_table_gpll6_out_main),
+	.width = 4,
+	.regs = clk_alpha_pll_regs[CLK_ALPHA_PLL_TYPE_DEFAULT],
+	.clkr.hw.init = &(struct clk_init_data){
+		.name = "gpll6_out_main",
+		.parent_hws = (const struct clk_hw *[]){ &gpll6.clkr.hw },
+		.num_parents = 1,
+		.ops = &clk_alpha_pll_postdiv_ro_ops,
+	},
+};
+
+static struct clk_alpha_pll gpll7 = {
+	.offset = 0x7000,
+	.vco_table = default_vco,
+	.num_vco = ARRAY_SIZE(default_vco),
+	.regs = clk_alpha_pll_regs[CLK_ALPHA_PLL_TYPE_DEFAULT],
+	.clkr = {
+		.enable_reg = 0x79000,
+		.enable_mask = BIT(7),
+		.hw.init = &(struct clk_init_data){
+			.name = "gpll7",
+			.parent_data = &(const struct clk_parent_data){
+				.fw_name = "bi_tcxo",
+			},
+			.num_parents = 1,
+			.ops = &clk_alpha_pll_ops,
+		},
+	},
+};
+
+static const struct clk_div_table post_div_table_gpll7_out_main[] = {
+	{ 0x0, 1 },
+	{ }
+};
+
+static struct clk_alpha_pll_postdiv gpll7_out_main = {
+	.offset = 0x7000,
+	.post_div_shift = 8,
+	.post_div_table = post_div_table_gpll7_out_main,
+	.num_post_div = ARRAY_SIZE(post_div_table_gpll7_out_main),
+	.width = 4,
+	.regs = clk_alpha_pll_regs[CLK_ALPHA_PLL_TYPE_DEFAULT],
+	.clkr.hw.init = &(struct clk_init_data){
+		.name = "gpll7_out_main",
+		.parent_hws = (const struct clk_hw *[]){ &gpll7.clkr.hw },
+		.num_parents = 1,
+		.ops = &clk_alpha_pll_postdiv_ro_ops,
+	},
+};
+
+/* 800MHz configuration */
+static const struct alpha_pll_config gpll8_config = {
+	.l = 0x29,
+	.alpha = 0xAAAAAAAA,
+	.alpha_hi = 0xAA,
+	.alpha_en_mask = BIT(24),
+	.vco_val = 0x2 << 20,
+	.vco_mask = GENMASK(21, 20),
+	.main_output_mask = BIT(0),
+	.early_output_mask = BIT(3),
+	.post_div_val = 0x1 << 8,
+	.post_div_mask = GENMASK(11, 8),
+	.config_ctl_val = 0x4001055b,
+};
+
+static struct clk_alpha_pll gpll8 = {
+	.offset = 0x8000,
+	.vco_table = default_vco,
+	.num_vco = ARRAY_SIZE(default_vco),
+	.regs = clk_alpha_pll_regs[CLK_ALPHA_PLL_TYPE_DEFAULT],
+	.flags = SUPPORTS_DYNAMIC_UPDATE,
+	.clkr = {
+		.enable_reg = 0x79000,
+		.enable_mask = BIT(8),
+		.hw.init = &(struct clk_init_data){
+			.name = "gpll8",
+			.parent_data = &(const struct clk_parent_data){
+				.fw_name = "bi_tcxo",
+			},
+			.num_parents = 1,
+			.ops = &clk_alpha_pll_ops,
+		},
+	},
+};
+
+static const struct clk_div_table post_div_table_gpll8_out_main[] = {
+	{ 0x1, 2 },
+	{ }
+};
+
+static struct clk_alpha_pll_postdiv gpll8_out_main = {
+	.offset = 0x8000,
+	.post_div_shift = 8,
+	.post_div_table = post_div_table_gpll8_out_main,
+	.num_post_div = ARRAY_SIZE(post_div_table_gpll8_out_main),
+	.width = 4,
+	.regs = clk_alpha_pll_regs[CLK_ALPHA_PLL_TYPE_DEFAULT],
+	.clkr.hw.init = &(struct clk_init_data){
+		.name = "gpll8_out_main",
+		.parent_hws = (const struct clk_hw *[]){ &gpll8.clkr.hw },
+		.num_parents = 1,
+		.flags = CLK_SET_RATE_PARENT,
+		.ops = &clk_alpha_pll_postdiv_ro_ops,
+	},
+};
+
+/* 1152MHz configuration */
+static const struct alpha_pll_config gpll9_config = {
+	.l = 0x3C,
+	.alpha = 0x0,
+	.post_div_val = 0x1 << 8,
+	.post_div_mask = GENMASK(9, 8),
+	.main_output_mask = BIT(0),
+	.config_ctl_val = 0x00004289,
+};
+
+static struct clk_alpha_pll gpll9 = {
+	.offset = 0x9000,
+	.vco_table = gpll9_vco,
+	.num_vco = ARRAY_SIZE(gpll9_vco),
+	.regs = clk_gpll9_regs,
+	.clkr = {
+		.enable_reg = 0x79000,
+		.enable_mask = BIT(9),
+		.hw.init = &(struct clk_init_data){
+			.name = "gpll9",
+			.parent_data = &(const struct clk_parent_data){
+				.fw_name = "bi_tcxo",
+			},
+			.num_parents = 1,
+			.ops = &clk_alpha_pll_ops,
+		},
+	},
+};
+
+static const struct clk_div_table post_div_table_gpll9_out_main[] = {
+	{ 0x1, 2 },
+	{ }
+};
+
+static struct clk_alpha_pll_postdiv gpll9_out_main = {
+	.offset = 0x9000,
+	.post_div_shift = 8,
+	.post_div_table = post_div_table_gpll9_out_main,
+	.num_post_div = ARRAY_SIZE(post_div_table_gpll9_out_main),
+	.width = 2,
+	.regs = clk_gpll9_regs,
+	.clkr.hw.init = &(struct clk_init_data){
+		.name = "gpll9_out_main",
+		.parent_hws = (const struct clk_hw *[]){ &gpll9.clkr.hw },
+		.num_parents = 1,
+		.flags = CLK_SET_RATE_PARENT,
+		.ops = &clk_alpha_pll_postdiv_ops,
+	},
+};
+
+static const struct parent_map gcc_parent_map_0[] = {
+	{ P_BI_TCXO, 0 },
+	{ P_GPLL0_OUT_EARLY, 1 },
+	{ P_GPLL0_OUT_AUX2, 2 },
+};
+
+static const struct clk_parent_data gcc_parents_0[] = {
+	{ .fw_name = "bi_tcxo" },
+	{ .hw = &gpll0.clkr.hw },
+	{ .hw = &gpll0_out_aux2.clkr.hw },
+};
+
+static const struct parent_map gcc_parent_map_1[] = {
+	{ P_BI_TCXO, 0 },
+	{ P_GPLL0_OUT_EARLY, 1 },
+	{ P_GPLL0_OUT_AUX2, 2 },
+	{ P_GPLL6_OUT_MAIN, 4 },
+};
+
+static const struct clk_parent_data gcc_parents_1[] = {
+	{ .fw_name = "bi_tcxo" },
+	{ .hw = &gpll0.clkr.hw },
+	{ .hw = &gpll0_out_aux2.clkr.hw },
+	{ .hw = &gpll6_out_main.clkr.hw },
+};
+
+static const struct parent_map gcc_parent_map_2[] = {
+	{ P_BI_TCXO, 0 },
+	{ P_GPLL0_OUT_EARLY, 1 },
+	{ P_GPLL0_OUT_AUX2, 2 },
+	{ P_SLEEP_CLK, 5 },
+};
+
+static const struct clk_parent_data gcc_parents_2[] = {
+	{ .fw_name = "bi_tcxo" },
+	{ .hw = &gpll0.clkr.hw },
+	{ .hw = &gpll0_out_aux2.clkr.hw },
+	{ .fw_name = "sleep_clk" },
+};
+
+static const struct parent_map gcc_parent_map_3[] = {
+	{ P_BI_TCXO, 0 },
+	{ P_GPLL0_OUT_EARLY, 1 },
+	{ P_GPLL9_OUT_EARLY, 2 },
+	{ P_GPLL10_OUT_MAIN, 3 },
+	{ P_GPLL9_OUT_MAIN, 5 },
+};
+
+static const struct clk_parent_data gcc_parents_3[] = {
+	{ .fw_name = "bi_tcxo" },
+	{ .hw = &gpll0.clkr.hw },
+	{ .hw = &gpll9.clkr.hw },
+	{ .hw = &gpll10_out_main.clkr.hw },
+	{ .hw = &gpll9_out_main.clkr.hw },
+};
+
+static const struct parent_map gcc_parent_map_4[] = {
+	{ P_BI_TCXO, 0 },
+	{ P_GPLL0_OUT_EARLY, 1 },
+	{ P_GPLL0_OUT_AUX2, 2 },
+	{ P_GPLL4_OUT_MAIN, 5 },
+};
+
+static const struct clk_parent_data gcc_parents_4[] = {
+	{ .fw_name = "bi_tcxo" },
+	{ .hw = &gpll0.clkr.hw },
+	{ .hw = &gpll0_out_aux2.clkr.hw },
+	{ .hw = &gpll4_out_main.clkr.hw },
+};
+
+static const struct parent_map gcc_parent_map_5[] = {
+	{ P_BI_TCXO, 0 },
+	{ P_GPLL0_OUT_EARLY, 1 },
+	{ P_GPLL8_OUT_EARLY, 2 },
+	{ P_GPLL10_OUT_MAIN, 3 },
+	{ P_GPLL8_OUT_MAIN, 4 },
+	{ P_GPLL9_OUT_MAIN, 5 },
+};
+
+static const struct clk_parent_data gcc_parents_5[] = {
+	{ .fw_name = "bi_tcxo" },
+	{ .hw = &gpll0.clkr.hw },
+	{ .hw = &gpll8.clkr.hw },
+	{ .hw = &gpll10_out_main.clkr.hw },
+	{ .hw = &gpll8_out_main.clkr.hw },
+	{ .hw = &gpll9_out_main.clkr.hw },
+};
+
+static const struct parent_map gcc_parent_map_6[] = {
+	{ P_BI_TCXO, 0 },
+	{ P_GPLL0_OUT_EARLY, 1 },
+	{ P_GPLL8_OUT_EARLY, 2 },
+	{ P_GPLL10_OUT_MAIN, 3 },
+	{ P_GPLL6_OUT_MAIN, 4 },
+	{ P_GPLL9_OUT_MAIN, 5 },
+	{ P_GPLL3_OUT_EARLY, 6 },
+};
+
+static const struct clk_parent_data gcc_parents_6[] = {
+	{ .fw_name = "bi_tcxo" },
+	{ .hw = &gpll0.clkr.hw },
+	{ .hw = &gpll8.clkr.hw },
+	{ .hw = &gpll10_out_main.clkr.hw },
+	{ .hw = &gpll6_out_main.clkr.hw },
+	{ .hw = &gpll9_out_main.clkr.hw },
+	{ .hw = &gpll3.clkr.hw },
+};
+
+static const struct parent_map gcc_parent_map_7[] = {
+	{ P_BI_TCXO, 0 },
+	{ P_GPLL0_OUT_EARLY, 1 },
+	{ P_GPLL0_OUT_AUX2, 2 },
+	{ P_GPLL10_OUT_MAIN, 3 },
+	{ P_GPLL4_OUT_MAIN, 5 },
+	{ P_GPLL3_OUT_EARLY, 6 },
+};
+
+static const struct clk_parent_data gcc_parents_7[] = {
+	{ .fw_name = "bi_tcxo" },
+	{ .hw = &gpll0.clkr.hw },
+	{ .hw = &gpll0_out_aux2.clkr.hw },
+	{ .hw = &gpll10_out_main.clkr.hw },
+	{ .hw = &gpll4_out_main.clkr.hw },
+	{ .hw = &gpll3.clkr.hw },
+};
+
+static const struct parent_map gcc_parent_map_8[] = {
+	{ P_BI_TCXO, 0 },
+	{ P_GPLL0_OUT_EARLY, 1 },
+	{ P_GPLL8_OUT_EARLY, 2 },
+	{ P_GPLL10_OUT_MAIN, 3 },
+	{ P_GPLL8_OUT_MAIN, 4 },
+	{ P_GPLL9_OUT_MAIN, 5 },
+	{ P_GPLL3_OUT_EARLY, 6 },
+};
+
+static const struct clk_parent_data gcc_parents_8[] = {
+	{ .fw_name = "bi_tcxo" },
+	{ .hw = &gpll0.clkr.hw },
+	{ .hw = &gpll8.clkr.hw },
+	{ .hw = &gpll10_out_main.clkr.hw },
+	{ .hw = &gpll8_out_main.clkr.hw },
+	{ .hw = &gpll9_out_main.clkr.hw },
+	{ .hw = &gpll3.clkr.hw },
+};
+
+static const struct parent_map gcc_parent_map_9[] = {
+	{ P_BI_TCXO, 0 },
+	{ P_GPLL0_OUT_EARLY, 1 },
+	{ P_GPLL0_OUT_AUX2, 2 },
+	{ P_GPLL10_OUT_MAIN, 3 },
+	{ P_GPLL8_OUT_MAIN, 4 },
+	{ P_GPLL9_OUT_MAIN, 5 },
+	{ P_GPLL3_OUT_EARLY, 6 },
+};
+
+static const struct clk_parent_data gcc_parents_9[] = {
+	{ .fw_name = "bi_tcxo" },
+	{ .hw = &gpll0.clkr.hw },
+	{ .hw = &gpll0_out_aux2.clkr.hw },
+	{ .hw = &gpll10_out_main.clkr.hw },
+	{ .hw = &gpll8_out_main.clkr.hw },
+	{ .hw = &gpll9_out_main.clkr.hw },
+	{ .hw = &gpll3.clkr.hw },
+};
+
+static const struct parent_map gcc_parent_map_10[] = {
+	{ P_BI_TCXO, 0 },
+	{ P_GPLL0_OUT_EARLY, 1 },
+	{ P_GPLL8_OUT_EARLY, 2 },
+	{ P_GPLL10_OUT_MAIN, 3 },
+	{ P_GPLL6_OUT_EARLY, 4 },
+	{ P_GPLL9_OUT_MAIN, 5 },
+};
+
+static const struct clk_parent_data gcc_parents_10[] = {
+	{ .fw_name = "bi_tcxo" },
+	{ .hw = &gpll0.clkr.hw },
+	{ .hw = &gpll8.clkr.hw },
+	{ .hw = &gpll10_out_main.clkr.hw },
+	{ .hw = &gpll6.clkr.hw },
+	{ .hw = &gpll9_out_main.clkr.hw },
+};
+
+static const struct parent_map gcc_parent_map_11[] = {
+	{ P_BI_TCXO, 0 },
+	{ P_GPLL0_OUT_EARLY, 1 },
+	{ P_GPLL0_OUT_AUX2, 2 },
+	{ P_GPLL7_OUT_MAIN, 3 },
+	{ P_GPLL4_OUT_MAIN, 5 },
+};
+
+static const struct clk_parent_data gcc_parents_11[] = {
+	{ .fw_name = "bi_tcxo" },
+	{ .hw = &gpll0.clkr.hw },
+	{ .hw = &gpll0_out_aux2.clkr.hw },
+	{ .hw = &gpll7_out_main.clkr.hw },
+	{ .hw = &gpll4_out_main.clkr.hw },
+};
+
+static const struct parent_map gcc_parent_map_12[] = {
+	{ P_BI_TCXO, 0 },
+	{ P_SLEEP_CLK, 5 },
+};
+
+static const struct clk_parent_data gcc_parents_12[] = {
+	{ .fw_name = "bi_tcxo" },
+	{ .fw_name = "sleep_clk" },
+};
+
+static const struct parent_map gcc_parent_map_13[] = {
+	{ P_BI_TCXO, 0 },
+	{ P_GPLL11_OUT_MAIN, 1 },
+};
+
+static const struct clk_parent_data gcc_parents_13[] = {
+	{ .fw_name = "bi_tcxo" },
+	{ .hw = &gpll11_out_main.clkr.hw },
+};
+
+static const struct freq_tbl ftbl_gcc_camss_axi_clk_src[] = {
+	F(19200000, P_BI_TCXO, 1, 0, 0),
+	F(150000000, P_GPLL0_OUT_AUX2, 2, 0, 0),
+	F(200000000, P_GPLL0_OUT_AUX2, 1.5, 0, 0),
+	F(300000000, P_GPLL0_OUT_AUX2, 1, 0, 0),
+	{ }
+};
+
+static struct clk_rcg2 gcc_camss_axi_clk_src = {
+	.cmd_rcgr = 0x5802c,
+	.mnd_width = 0,
+	.hid_width = 5,
+	.parent_map = gcc_parent_map_7,
+	.freq_tbl = ftbl_gcc_camss_axi_clk_src,
+	.clkr.hw.init = &(struct clk_init_data){
+		.name = "gcc_camss_axi_clk_src",
+		.parent_data = gcc_parents_7,
+		.num_parents = ARRAY_SIZE(gcc_parents_7),
+		.flags = CLK_SET_RATE_PARENT,
+		.ops = &clk_rcg2_ops,
+	},
+};
+
+static const struct freq_tbl ftbl_gcc_camss_cci_clk_src[] = {
+	F(19200000, P_BI_TCXO, 1, 0, 0),
+	F(37500000, P_GPLL0_OUT_AUX2, 8, 0, 0),
+	{ }
+};
+
+static struct clk_rcg2 gcc_camss_cci_clk_src = {
+	.cmd_rcgr = 0x56000,
+	.mnd_width = 0,
+	.hid_width = 5,
+	.parent_map = gcc_parent_map_9,
+	.freq_tbl = ftbl_gcc_camss_cci_clk_src,
+	.clkr.hw.init = &(struct clk_init_data){
+		.name = "gcc_camss_cci_clk_src",
+		.parent_data = gcc_parents_9,
+		.num_parents = ARRAY_SIZE(gcc_parents_9),
+		.flags = CLK_SET_RATE_PARENT | CLK_OPS_PARENT_ENABLE,
+		.ops = &clk_rcg2_ops,
+	},
+};
+
+static const struct freq_tbl ftbl_gcc_camss_csi0phytimer_clk_src[] = {
+	F(19200000, P_BI_TCXO, 1, 0, 0),
+	F(100000000, P_GPLL0_OUT_AUX2, 3, 0, 0),
+	F(200000000, P_GPLL0_OUT_AUX2, 1.5, 0, 0),
+	F(268800000, P_GPLL4_OUT_MAIN, 3, 0, 0),
+	{ }
+};
+
+static struct clk_rcg2 gcc_camss_csi0phytimer_clk_src = {
+	.cmd_rcgr = 0x59000,
+	.mnd_width = 0,
+	.hid_width = 5,
+	.parent_map = gcc_parent_map_4,
+	.freq_tbl = ftbl_gcc_camss_csi0phytimer_clk_src,
+	.clkr.hw.init = &(struct clk_init_data){
+		.name = "gcc_camss_csi0phytimer_clk_src",
+		.parent_data = gcc_parents_4,
+		.num_parents = ARRAY_SIZE(gcc_parents_4),
+		.flags = CLK_SET_RATE_PARENT | CLK_OPS_PARENT_ENABLE,
+		.ops = &clk_rcg2_ops,
+	},
+};
+
+static struct clk_rcg2 gcc_camss_csi1phytimer_clk_src = {
+	.cmd_rcgr = 0x5901c,
+	.mnd_width = 0,
+	.hid_width = 5,
+	.parent_map = gcc_parent_map_4,
+	.freq_tbl = ftbl_gcc_camss_csi0phytimer_clk_src,
+	.clkr.hw.init = &(struct clk_init_data){
+		.name = "gcc_camss_csi1phytimer_clk_src",
+		.parent_data = gcc_parents_4,
+		.num_parents = ARRAY_SIZE(gcc_parents_4),
+		.flags = CLK_SET_RATE_PARENT | CLK_OPS_PARENT_ENABLE,
+		.ops = &clk_rcg2_ops,
+	},
+};
+
+static struct clk_rcg2 gcc_camss_csi2phytimer_clk_src = {
+	.cmd_rcgr = 0x59038,
+	.mnd_width = 0,
+	.hid_width = 5,
+	.parent_map = gcc_parent_map_4,
+	.freq_tbl = ftbl_gcc_camss_csi0phytimer_clk_src,
+	.clkr.hw.init = &(struct clk_init_data){
+		.name = "gcc_camss_csi2phytimer_clk_src",
+		.parent_data = gcc_parents_4,
+		.num_parents = ARRAY_SIZE(gcc_parents_4),
+		.flags = CLK_SET_RATE_PARENT | CLK_OPS_PARENT_ENABLE,
+		.ops = &clk_rcg2_ops,
+	},
+};
+
+static const struct freq_tbl ftbl_gcc_camss_mclk0_clk_src[] = {
+	F(19200000, P_BI_TCXO, 1, 0, 0),
+	F(24000000, P_GPLL9_OUT_MAIN, 1, 1, 24),
+	F(64000000, P_GPLL9_OUT_MAIN, 1, 1, 9),
+	{ }
+};
+
+static struct clk_rcg2 gcc_camss_mclk0_clk_src = {
+	.cmd_rcgr = 0x51000,
+	.mnd_width = 8,
+	.hid_width = 5,
+	.parent_map = gcc_parent_map_3,
+	.freq_tbl = ftbl_gcc_camss_mclk0_clk_src,
+	.clkr.hw.init = &(struct clk_init_data){
+		.name = "gcc_camss_mclk0_clk_src",
+		.parent_data = gcc_parents_3,
+		.num_parents = ARRAY_SIZE(gcc_parents_3),
+		.flags = CLK_SET_RATE_PARENT | CLK_OPS_PARENT_ENABLE,
+		.ops = &clk_rcg2_ops,
+	},
+};
+
+static struct clk_rcg2 gcc_camss_mclk1_clk_src = {
+	.cmd_rcgr = 0x5101c,
+	.mnd_width = 8,
+	.hid_width = 5,
+	.parent_map = gcc_parent_map_3,
+	.freq_tbl = ftbl_gcc_camss_mclk0_clk_src,
+	.clkr.hw.init = &(struct clk_init_data){
+		.name = "gcc_camss_mclk1_clk_src",
+		.parent_data = gcc_parents_3,
+		.num_parents = ARRAY_SIZE(gcc_parents_3),
+		.flags = CLK_SET_RATE_PARENT | CLK_OPS_PARENT_ENABLE,
+		.ops = &clk_rcg2_ops,
+	},
+};
+
+static struct clk_rcg2 gcc_camss_mclk2_clk_src = {
+	.cmd_rcgr = 0x51038,
+	.mnd_width = 8,
+	.hid_width = 5,
+	.parent_map = gcc_parent_map_3,
+	.freq_tbl = ftbl_gcc_camss_mclk0_clk_src,
+	.clkr.hw.init = &(struct clk_init_data){
+		.name = "gcc_camss_mclk2_clk_src",
+		.parent_data = gcc_parents_3,
+		.num_parents = ARRAY_SIZE(gcc_parents_3),
+		.flags = CLK_SET_RATE_PARENT | CLK_OPS_PARENT_ENABLE,
+		.ops = &clk_rcg2_ops,
+	},
+};
+
+static struct clk_rcg2 gcc_camss_mclk3_clk_src = {
+	.cmd_rcgr = 0x51054,
+	.mnd_width = 8,
+	.hid_width = 5,
+	.parent_map = gcc_parent_map_3,
+	.freq_tbl = ftbl_gcc_camss_mclk0_clk_src,
+	.clkr.hw.init = &(struct clk_init_data){
+		.name = "gcc_camss_mclk3_clk_src",
+		.parent_data = gcc_parents_3,
+		.num_parents = ARRAY_SIZE(gcc_parents_3),
+		.flags = CLK_SET_RATE_PARENT | CLK_OPS_PARENT_ENABLE,
+		.ops = &clk_rcg2_ops,
+	},
+};
+
+static const struct freq_tbl ftbl_gcc_camss_ope_ahb_clk_src[] = {
+	F(19200000, P_BI_TCXO, 1, 0, 0),
+	F(171428571, P_GPLL0_OUT_EARLY, 3.5, 0, 0),
+	F(240000000, P_GPLL0_OUT_EARLY, 2.5, 0, 0),
+	{ }
+};
+
+static struct clk_rcg2 gcc_camss_ope_ahb_clk_src = {
+	.cmd_rcgr = 0x55024,
+	.mnd_width = 0,
+	.hid_width = 5,
+	.parent_map = gcc_parent_map_8,
+	.freq_tbl = ftbl_gcc_camss_ope_ahb_clk_src,
+	.clkr.hw.init = &(struct clk_init_data){
+		.name = "gcc_camss_ope_ahb_clk_src",
+		.parent_data = gcc_parents_8,
+		.num_parents = ARRAY_SIZE(gcc_parents_8),
+		.flags = CLK_SET_RATE_PARENT,
+		.ops = &clk_rcg2_ops,
+	},
+};
+
+static const struct freq_tbl ftbl_gcc_camss_ope_clk_src[] = {
+	F(19200000, P_BI_TCXO, 1, 0, 0),
+	F(200000000, P_GPLL8_OUT_MAIN, 2, 0, 0),
+	F(266600000, P_GPLL8_OUT_MAIN, 1, 0, 0),
+	F(465000000, P_GPLL8_OUT_MAIN, 1, 0, 0),
+	F(576000000, P_GPLL9_OUT_MAIN, 1, 0, 0),
+	{ }
+};
+
+static struct clk_rcg2 gcc_camss_ope_clk_src = {
+	.cmd_rcgr = 0x55004,
+	.mnd_width = 0,
+	.hid_width = 5,
+	.parent_map = gcc_parent_map_8,
+	.freq_tbl = ftbl_gcc_camss_ope_clk_src,
+	.clkr.hw.init = &(struct clk_init_data){
+		.name = "gcc_camss_ope_clk_src",
+		.parent_data = gcc_parents_8,
+		.num_parents = ARRAY_SIZE(gcc_parents_8),
+		.flags = CLK_SET_RATE_PARENT,
+		.ops = &clk_rcg2_ops,
+	},
+};
+
+static const struct freq_tbl ftbl_gcc_camss_tfe_0_clk_src[] = {
+	F(19200000, P_BI_TCXO, 1, 0, 0),
+	F(128000000, P_GPLL10_OUT_MAIN, 9, 0, 0),
+	F(135529412, P_GPLL10_OUT_MAIN, 8.5, 0, 0),
+	F(144000000, P_GPLL10_OUT_MAIN, 8, 0, 0),
+	F(153600000, P_GPLL10_OUT_MAIN, 7.5, 0, 0),
+	F(164571429, P_GPLL10_OUT_MAIN, 7, 0, 0),
+	F(177230769, P_GPLL10_OUT_MAIN, 6.5, 0, 0),
+	F(192000000, P_GPLL10_OUT_MAIN, 6, 0, 0),
+	F(209454545, P_GPLL10_OUT_MAIN, 5.5, 0, 0),
+	F(230400000, P_GPLL10_OUT_MAIN, 5, 0, 0),
+	F(256000000, P_GPLL10_OUT_MAIN, 4.5, 0, 0),
+	F(288000000, P_GPLL10_OUT_MAIN, 4, 0, 0),
+	F(329142857, P_GPLL10_OUT_MAIN, 3.5, 0, 0),
+	F(384000000, P_GPLL10_OUT_MAIN, 3, 0, 0),
+	F(460800000, P_GPLL10_OUT_MAIN, 2.5, 0, 0),
+	F(576000000, P_GPLL10_OUT_MAIN, 2, 0, 0),
+	{ }
+};
+
+static struct clk_rcg2 gcc_camss_tfe_0_clk_src = {
+	.cmd_rcgr = 0x52004,
+	.mnd_width = 8,
+	.hid_width = 5,
+	.parent_map = gcc_parent_map_5,
+	.freq_tbl = ftbl_gcc_camss_tfe_0_clk_src,
+	.clkr.hw.init = &(struct clk_init_data){
+		.name = "gcc_camss_tfe_0_clk_src",
+		.parent_data = gcc_parents_5,
+		.num_parents = ARRAY_SIZE(gcc_parents_5),
+		.flags = CLK_SET_RATE_PARENT,
+		.ops = &clk_rcg2_ops,
+	},
+};
+
+static const struct freq_tbl ftbl_gcc_camss_tfe_0_csid_clk_src[] = {
+	F(19200000, P_BI_TCXO, 1, 0, 0),
+	F(120000000, P_GPLL0_OUT_EARLY, 5, 0, 0),
+	F(192000000, P_GPLL6_OUT_MAIN, 2, 0, 0),
+	F(240000000, P_GPLL0_OUT_EARLY, 2.5, 0, 0),
+	F(384000000, P_GPLL6_OUT_MAIN, 1, 0, 0),
+	F(426400000, P_GPLL3_OUT_EARLY, 2.5, 0, 0),
+	{ }
+};
+
+static struct clk_rcg2 gcc_camss_tfe_0_csid_clk_src = {
+	.cmd_rcgr = 0x52094,
+	.mnd_width = 0,
+	.hid_width = 5,
+	.parent_map = gcc_parent_map_6,
+	.freq_tbl = ftbl_gcc_camss_tfe_0_csid_clk_src,
+	.clkr.hw.init = &(struct clk_init_data){
+		.name = "gcc_camss_tfe_0_csid_clk_src",
+		.parent_data = gcc_parents_6,
+		.num_parents = ARRAY_SIZE(gcc_parents_6),
+		.flags = CLK_SET_RATE_PARENT,
+		.ops = &clk_rcg2_ops,
+	},
+};
+
+static struct clk_rcg2 gcc_camss_tfe_1_clk_src = {
+	.cmd_rcgr = 0x52024,
+	.mnd_width = 8,
+	.hid_width = 5,
+	.parent_map = gcc_parent_map_5,
+	.freq_tbl = ftbl_gcc_camss_tfe_0_clk_src,
+	.clkr.hw.init = &(struct clk_init_data){
+		.name = "gcc_camss_tfe_1_clk_src",
+		.parent_data = gcc_parents_5,
+		.num_parents = ARRAY_SIZE(gcc_parents_5),
+		.flags = CLK_SET_RATE_PARENT,
+		.ops = &clk_rcg2_ops,
+	},
+};
+
+static struct clk_rcg2 gcc_camss_tfe_1_csid_clk_src = {
+	.cmd_rcgr = 0x520b4,
+	.mnd_width = 0,
+	.hid_width = 5,
+	.parent_map = gcc_parent_map_6,
+	.freq_tbl = ftbl_gcc_camss_tfe_0_csid_clk_src,
+	.clkr.hw.init = &(struct clk_init_data){
+		.name = "gcc_camss_tfe_1_csid_clk_src",
+		.parent_data = gcc_parents_6,
+		.num_parents = ARRAY_SIZE(gcc_parents_6),
+		.flags = CLK_SET_RATE_PARENT,
+		.ops = &clk_rcg2_ops,
+	},
+};
+
+static struct clk_rcg2 gcc_camss_tfe_2_clk_src = {
+	.cmd_rcgr = 0x52044,
+	.mnd_width = 8,
+	.hid_width = 5,
+	.parent_map = gcc_parent_map_5,
+	.freq_tbl = ftbl_gcc_camss_tfe_0_clk_src,
+	.clkr.hw.init = &(struct clk_init_data){
+		.name = "gcc_camss_tfe_2_clk_src",
+		.parent_data = gcc_parents_5,
+		.num_parents = ARRAY_SIZE(gcc_parents_5),
+		.flags = CLK_SET_RATE_PARENT,
+		.ops = &clk_rcg2_ops,
+	},
+};
+
+static struct clk_rcg2 gcc_camss_tfe_2_csid_clk_src = {
+	.cmd_rcgr = 0x520d4,
+	.mnd_width = 0,
+	.hid_width = 5,
+	.parent_map = gcc_parent_map_6,
+	.freq_tbl = ftbl_gcc_camss_tfe_0_csid_clk_src,
+	.clkr.hw.init = &(struct clk_init_data){
+		.name = "gcc_camss_tfe_2_csid_clk_src",
+		.parent_data = gcc_parents_6,
+		.num_parents = ARRAY_SIZE(gcc_parents_6),
+		.flags = CLK_SET_RATE_PARENT,
+		.ops = &clk_rcg2_ops,
+	},
+};
+
+static const struct freq_tbl ftbl_gcc_camss_tfe_cphy_rx_clk_src[] = {
+	F(19200000, P_BI_TCXO, 1, 0, 0),
+	F(240000000, P_GPLL0_OUT_EARLY, 2.5, 0, 0),
+	F(341333333, P_GPLL6_OUT_EARLY, 1, 4, 9),
+	F(384000000, P_GPLL6_OUT_EARLY, 2, 0, 0),
+	{ }
+};
+
+static struct clk_rcg2 gcc_camss_tfe_cphy_rx_clk_src = {
+	.cmd_rcgr = 0x52064,
+	.mnd_width = 16,
+	.hid_width = 5,
+	.parent_map = gcc_parent_map_10,
+	.freq_tbl = ftbl_gcc_camss_tfe_cphy_rx_clk_src,
+	.clkr.hw.init = &(struct clk_init_data){
+		.name = "gcc_camss_tfe_cphy_rx_clk_src",
+		.parent_data = gcc_parents_10,
+		.num_parents = ARRAY_SIZE(gcc_parents_10),
+		.flags = CLK_SET_RATE_PARENT | CLK_OPS_PARENT_ENABLE,
+		.ops = &clk_rcg2_ops,
+	},
+};
+
+static const struct freq_tbl ftbl_gcc_camss_top_ahb_clk_src[] = {
+	F(19200000, P_BI_TCXO, 1, 0, 0),
+	F(40000000, P_GPLL0_OUT_AUX2, 7.5, 0, 0),
+	F(80000000, P_GPLL0_OUT_EARLY, 7.5, 0, 0),
+	{ }
+};
+
+static struct clk_rcg2 gcc_camss_top_ahb_clk_src = {
+	.cmd_rcgr = 0x58010,
+	.mnd_width = 0,
+	.hid_width = 5,
+	.parent_map = gcc_parent_map_7,
+	.freq_tbl = ftbl_gcc_camss_top_ahb_clk_src,
+	.clkr.hw.init = &(struct clk_init_data){
+		.name = "gcc_camss_top_ahb_clk_src",
+		.parent_data = gcc_parents_7,
+		.num_parents = ARRAY_SIZE(gcc_parents_7),
+		.flags = CLK_SET_RATE_PARENT | CLK_OPS_PARENT_ENABLE,
+		.ops = &clk_rcg2_ops,
+	},
+};
+
+static const struct freq_tbl ftbl_gcc_gp1_clk_src[] = {
+	F(25000000, P_GPLL0_OUT_AUX2, 12, 0, 0),
+	F(50000000, P_GPLL0_OUT_AUX2, 6, 0, 0),
+	F(100000000, P_GPLL0_OUT_AUX2, 3, 0, 0),
+	F(200000000, P_GPLL0_OUT_AUX2, 1.5, 0, 0),
+	{ }
+};
+
+static struct clk_rcg2 gcc_gp1_clk_src = {
+	.cmd_rcgr = 0x4d004,
+	.mnd_width = 8,
+	.hid_width = 5,
+	.parent_map = gcc_parent_map_2,
+	.freq_tbl = ftbl_gcc_gp1_clk_src,
+	.clkr.hw.init = &(struct clk_init_data){
+		.name = "gcc_gp1_clk_src",
+		.parent_data = gcc_parents_2,
+		.num_parents = ARRAY_SIZE(gcc_parents_2),
+		.ops = &clk_rcg2_ops,
+	},
+};
+
+static struct clk_rcg2 gcc_gp2_clk_src = {
+	.cmd_rcgr = 0x4e004,
+	.mnd_width = 8,
+	.hid_width = 5,
+	.parent_map = gcc_parent_map_2,
+	.freq_tbl = ftbl_gcc_gp1_clk_src,
+	.clkr.hw.init = &(struct clk_init_data){
+		.name = "gcc_gp2_clk_src",
+		.parent_data = gcc_parents_2,
+		.num_parents = ARRAY_SIZE(gcc_parents_2),
+		.ops = &clk_rcg2_ops,
+	},
+};
+
+static struct clk_rcg2 gcc_gp3_clk_src = {
+	.cmd_rcgr = 0x4f004,
+	.mnd_width = 8,
+	.hid_width = 5,
+	.parent_map = gcc_parent_map_2,
+	.freq_tbl = ftbl_gcc_gp1_clk_src,
+	.clkr.hw.init = &(struct clk_init_data){
+		.name = "gcc_gp3_clk_src",
+		.parent_data = gcc_parents_2,
+		.num_parents = ARRAY_SIZE(gcc_parents_2),
+		.ops = &clk_rcg2_ops,
+	},
+};
+
+static const struct freq_tbl ftbl_gcc_pdm2_clk_src[] = {
+	F(19200000, P_BI_TCXO, 1, 0, 0),
+	F(60000000, P_GPLL0_OUT_AUX2, 5, 0, 0),
+	{ }
+};
+
+static struct clk_rcg2 gcc_pdm2_clk_src = {
+	.cmd_rcgr = 0x20010,
+	.mnd_width = 0,
+	.hid_width = 5,
+	.parent_map = gcc_parent_map_0,
+	.freq_tbl = ftbl_gcc_pdm2_clk_src,
+	.clkr.hw.init = &(struct clk_init_data){
+		.name = "gcc_pdm2_clk_src",
+		.parent_data = gcc_parents_0,
+		.num_parents = ARRAY_SIZE(gcc_parents_0),
+		.ops = &clk_rcg2_ops,
+	},
+};
+
+static const struct freq_tbl ftbl_gcc_qupv3_wrap0_s0_clk_src[] = {
+	F(7372800, P_GPLL0_OUT_AUX2, 1, 384, 15625),
+	F(14745600, P_GPLL0_OUT_AUX2, 1, 768, 15625),
+	F(19200000, P_BI_TCXO, 1, 0, 0),
+	F(29491200, P_GPLL0_OUT_AUX2, 1, 1536, 15625),
+	F(32000000, P_GPLL0_OUT_AUX2, 1, 8, 75),
+	F(48000000, P_GPLL0_OUT_AUX2, 1, 4, 25),
+	F(64000000, P_GPLL0_OUT_AUX2, 1, 16, 75),
+	F(75000000, P_GPLL0_OUT_AUX2, 4, 0, 0),
+	F(80000000, P_GPLL0_OUT_AUX2, 1, 4, 15),
+	F(96000000, P_GPLL0_OUT_AUX2, 1, 8, 25),
+	F(100000000, P_GPLL0_OUT_AUX2, 3, 0, 0),
+	F(102400000, P_GPLL0_OUT_AUX2, 1, 128, 375),
+	F(112000000, P_GPLL0_OUT_AUX2, 1, 28, 75),
+	F(117964800, P_GPLL0_OUT_AUX2, 1, 6144, 15625),
+	F(120000000, P_GPLL0_OUT_AUX2, 2.5, 0, 0),
+	F(128000000, P_GPLL6_OUT_MAIN, 3, 0, 0),
+	{ }
+};
+
+static struct clk_init_data gcc_qupv3_wrap0_s0_clk_src_init = {
+	.name = "gcc_qupv3_wrap0_s0_clk_src",
+	.parent_data = gcc_parents_1,
+	.num_parents = ARRAY_SIZE(gcc_parents_1),
+	.ops = &clk_rcg2_ops,
+};
+
+static struct clk_rcg2 gcc_qupv3_wrap0_s0_clk_src = {
+	.cmd_rcgr = 0x1f148,
+	.mnd_width = 16,
+	.hid_width = 5,
+	.parent_map = gcc_parent_map_1,
+	.freq_tbl = ftbl_gcc_qupv3_wrap0_s0_clk_src,
+	.clkr.hw.init = &gcc_qupv3_wrap0_s0_clk_src_init,
+};
+
+static struct clk_init_data gcc_qupv3_wrap0_s1_clk_src_init = {
+	.name = "gcc_qupv3_wrap0_s1_clk_src",
+	.parent_data = gcc_parents_1,
+	.num_parents = ARRAY_SIZE(gcc_parents_1),
+	.ops = &clk_rcg2_ops,
+};
+
+static struct clk_rcg2 gcc_qupv3_wrap0_s1_clk_src = {
+	.cmd_rcgr = 0x1f278,
+	.mnd_width = 16,
+	.hid_width = 5,
+	.parent_map = gcc_parent_map_1,
+	.freq_tbl = ftbl_gcc_qupv3_wrap0_s0_clk_src,
+	.clkr.hw.init = &gcc_qupv3_wrap0_s1_clk_src_init,
+};
+
+static struct clk_init_data gcc_qupv3_wrap0_s2_clk_src_init = {
+	.name = "gcc_qupv3_wrap0_s2_clk_src",
+	.parent_data = gcc_parents_1,
+	.num_parents = ARRAY_SIZE(gcc_parents_1),
+	.ops = &clk_rcg2_ops,
+};
+
+static struct clk_rcg2 gcc_qupv3_wrap0_s2_clk_src = {
+	.cmd_rcgr = 0x1f3a8,
+	.mnd_width = 16,
+	.hid_width = 5,
+	.parent_map = gcc_parent_map_1,
+	.freq_tbl = ftbl_gcc_qupv3_wrap0_s0_clk_src,
+	.clkr.hw.init = &gcc_qupv3_wrap0_s2_clk_src_init,
+};
+
+static struct clk_init_data gcc_qupv3_wrap0_s3_clk_src_init = {
+	.name = "gcc_qupv3_wrap0_s3_clk_src",
+	.parent_data = gcc_parents_1,
+	.num_parents = ARRAY_SIZE(gcc_parents_1),
+	.ops = &clk_rcg2_ops,
+};
+
+static struct clk_rcg2 gcc_qupv3_wrap0_s3_clk_src = {
+	.cmd_rcgr = 0x1f4d8,
+	.mnd_width = 16,
+	.hid_width = 5,
+	.parent_map = gcc_parent_map_1,
+	.freq_tbl = ftbl_gcc_qupv3_wrap0_s0_clk_src,
+	.clkr.hw.init = &gcc_qupv3_wrap0_s3_clk_src_init,
+};
+
+static struct clk_init_data gcc_qupv3_wrap0_s4_clk_src_init = {
+	.name = "gcc_qupv3_wrap0_s4_clk_src",
+	.parent_data = gcc_parents_1,
+	.num_parents = ARRAY_SIZE(gcc_parents_1),
+	.ops = &clk_rcg2_ops,
+};
+
+static struct clk_rcg2 gcc_qupv3_wrap0_s4_clk_src = {
+	.cmd_rcgr = 0x1f608,
+	.mnd_width = 16,
+	.hid_width = 5,
+	.parent_map = gcc_parent_map_1,
+	.freq_tbl = ftbl_gcc_qupv3_wrap0_s0_clk_src,
+	.clkr.hw.init = &gcc_qupv3_wrap0_s4_clk_src_init,
+};
+
+static struct clk_init_data gcc_qupv3_wrap0_s5_clk_src_init = {
+	.name = "gcc_qupv3_wrap0_s5_clk_src",
+	.parent_data = gcc_parents_1,
+	.num_parents = ARRAY_SIZE(gcc_parents_1),
+	.ops = &clk_rcg2_ops,
+};
+
+static struct clk_rcg2 gcc_qupv3_wrap0_s5_clk_src = {
+	.cmd_rcgr = 0x1f738,
+	.mnd_width = 16,
+	.hid_width = 5,
+	.parent_map = gcc_parent_map_1,
+	.freq_tbl = ftbl_gcc_qupv3_wrap0_s0_clk_src,
+	.clkr.hw.init = &gcc_qupv3_wrap0_s5_clk_src_init,
+};
+
+static const struct freq_tbl ftbl_gcc_sdcc1_apps_clk_src[] = {
+	F(144000, P_BI_TCXO, 16, 3, 25),
+	F(400000, P_BI_TCXO, 12, 1, 4),
+	F(20000000, P_GPLL0_OUT_AUX2, 5, 1, 3),
+	F(25000000, P_GPLL0_OUT_AUX2, 6, 1, 2),
+	F(50000000, P_GPLL0_OUT_AUX2, 6, 0, 0),
+	F(100000000, P_GPLL0_OUT_AUX2, 3, 0, 0),
+	F(192000000, P_GPLL6_OUT_MAIN, 2, 0, 0),
+	F(384000000, P_GPLL6_OUT_MAIN, 1, 0, 0),
+	{ }
+};
+
+static struct clk_rcg2 gcc_sdcc1_apps_clk_src = {
+	.cmd_rcgr = 0x38028,
+	.mnd_width = 8,
+	.hid_width = 5,
+	.parent_map = gcc_parent_map_1,
+	.freq_tbl = ftbl_gcc_sdcc1_apps_clk_src,
+	.clkr.hw.init = &(struct clk_init_data){
+		.name = "gcc_sdcc1_apps_clk_src",
+		.parent_data = gcc_parents_1,
+		.num_parents = ARRAY_SIZE(gcc_parents_1),
+		.ops = &clk_rcg2_ops,
+	},
+};
+
+static const struct freq_tbl ftbl_gcc_sdcc1_ice_core_clk_src[] = {
+	F(75000000, P_GPLL0_OUT_AUX2, 4, 0, 0),
+	F(100000000, P_GPLL0_OUT_AUX2, 3, 0, 0),
+	F(150000000, P_GPLL0_OUT_AUX2, 2, 0, 0),
+	F(200000000, P_GPLL0_OUT_EARLY, 3, 0, 0),
+	F(300000000, P_GPLL0_OUT_AUX2, 1, 0, 0),
+	{ }
+};
+
+static struct clk_rcg2 gcc_sdcc1_ice_core_clk_src = {
+	.cmd_rcgr = 0x38010,
+	.mnd_width = 0,
+	.hid_width = 5,
+	.parent_map = gcc_parent_map_0,
+	.freq_tbl = ftbl_gcc_sdcc1_ice_core_clk_src,
+	.clkr.hw.init = &(struct clk_init_data){
+		.name = "gcc_sdcc1_ice_core_clk_src",
+		.parent_data = gcc_parents_0,
+		.num_parents = ARRAY_SIZE(gcc_parents_0),
+		.ops = &clk_rcg2_ops,
+	},
+};
+
+static const struct freq_tbl ftbl_gcc_sdcc2_apps_clk_src[] = {
+	F(400000, P_BI_TCXO, 12, 1, 4),
+	F(19200000, P_BI_TCXO, 1, 0, 0),
+	F(25000000, P_GPLL0_OUT_AUX2, 12, 0, 0),
+	F(50000000, P_GPLL0_OUT_AUX2, 6, 0, 0),
+	F(100000000, P_GPLL0_OUT_AUX2, 3, 0, 0),
+	F(200000000, P_GPLL0_OUT_EARLY, 3, 0, 0),
+	{ }
+};
+
+static struct clk_rcg2 gcc_sdcc2_apps_clk_src = {
+	.cmd_rcgr = 0x1e00c,
+	.mnd_width = 8,
+	.hid_width = 5,
+	.parent_map = gcc_parent_map_11,
+	.freq_tbl = ftbl_gcc_sdcc2_apps_clk_src,
+	.clkr.hw.init = &(struct clk_init_data){
+		.name = "gcc_sdcc2_apps_clk_src",
+		.parent_data = gcc_parents_11,
+		.num_parents = ARRAY_SIZE(gcc_parents_11),
+		.ops = &clk_rcg2_ops,
+		.flags = CLK_OPS_PARENT_ENABLE,
+	},
+};
+
+static const struct freq_tbl ftbl_gcc_ufs_phy_axi_clk_src[] = {
+	F(25000000, P_GPLL0_OUT_AUX2, 12, 0, 0),
+	F(50000000, P_GPLL0_OUT_AUX2, 6, 0, 0),
+	F(100000000, P_GPLL0_OUT_AUX2, 3, 0, 0),
+	F(200000000, P_GPLL0_OUT_EARLY, 3, 0, 0),
+	F(240000000, P_GPLL0_OUT_EARLY, 2.5, 0, 0),
+	{ }
+};
+
+static struct clk_rcg2 gcc_ufs_phy_axi_clk_src = {
+	.cmd_rcgr = 0x45020,
+	.mnd_width = 8,
+	.hid_width = 5,
+	.parent_map = gcc_parent_map_0,
+	.freq_tbl = ftbl_gcc_ufs_phy_axi_clk_src,
+	.clkr.hw.init = &(struct clk_init_data){
+		.name = "gcc_ufs_phy_axi_clk_src",
+		.parent_data = gcc_parents_0,
+		.num_parents = ARRAY_SIZE(gcc_parents_0),
+		.ops = &clk_rcg2_ops,
+	},
+};
+
+static const struct freq_tbl ftbl_gcc_ufs_phy_ice_core_clk_src[] = {
+	F(37500000, P_GPLL0_OUT_AUX2, 8, 0, 0),
+	F(75000000, P_GPLL0_OUT_AUX2, 4, 0, 0),
+	F(150000000, P_GPLL0_OUT_AUX2, 2, 0, 0),
+	F(300000000, P_GPLL0_OUT_AUX2, 1, 0, 0),
+	{ }
+};
+
+static struct clk_rcg2 gcc_ufs_phy_ice_core_clk_src = {
+	.cmd_rcgr = 0x45048,
+	.mnd_width = 0,
+	.hid_width = 5,
+	.parent_map = gcc_parent_map_0,
+	.freq_tbl = ftbl_gcc_ufs_phy_ice_core_clk_src,
+	.clkr.hw.init = &(struct clk_init_data){
+		.name = "gcc_ufs_phy_ice_core_clk_src",
+		.parent_data = gcc_parents_0,
+		.num_parents = ARRAY_SIZE(gcc_parents_0),
+		.ops = &clk_rcg2_ops,
+	},
+};
+
+static const struct freq_tbl ftbl_gcc_ufs_phy_phy_aux_clk_src[] = {
+	F(9600000, P_BI_TCXO, 2, 0, 0),
+	F(19200000, P_BI_TCXO, 1, 0, 0),
+	{ }
+};
+
+static struct clk_rcg2 gcc_ufs_phy_phy_aux_clk_src = {
+	.cmd_rcgr = 0x4507c,
+	.mnd_width = 0,
+	.hid_width = 5,
+	.parent_map = gcc_parent_map_0,
+	.freq_tbl = ftbl_gcc_ufs_phy_phy_aux_clk_src,
+	.clkr.hw.init = &(struct clk_init_data){
+		.name = "gcc_ufs_phy_phy_aux_clk_src",
+		.parent_data = gcc_parents_0,
+		.num_parents = ARRAY_SIZE(gcc_parents_0),
+		.ops = &clk_rcg2_ops,
+	},
+};
+
+static const struct freq_tbl ftbl_gcc_ufs_phy_unipro_core_clk_src[] = {
+	F(37500000, P_GPLL0_OUT_AUX2, 8, 0, 0),
+	F(75000000, P_GPLL0_OUT_AUX2, 4, 0, 0),
+	F(150000000, P_GPLL0_OUT_AUX2, 2, 0, 0),
+	{ }
+};
+
+static struct clk_rcg2 gcc_ufs_phy_unipro_core_clk_src = {
+	.cmd_rcgr = 0x45060,
+	.mnd_width = 0,
+	.hid_width = 5,
+	.parent_map = gcc_parent_map_0,
+	.freq_tbl = ftbl_gcc_ufs_phy_unipro_core_clk_src,
+	.clkr.hw.init = &(struct clk_init_data){
+		.name = "gcc_ufs_phy_unipro_core_clk_src",
+		.parent_data = gcc_parents_0,
+		.num_parents = ARRAY_SIZE(gcc_parents_0),
+		.ops = &clk_rcg2_ops,
+	},
+};
+
+static const struct freq_tbl ftbl_gcc_usb30_prim_master_clk_src[] = {
+	F(66666667, P_GPLL0_OUT_AUX2, 4.5, 0, 0),
+	F(133333333, P_GPLL0_OUT_EARLY, 4.5, 0, 0),
+	F(200000000, P_GPLL0_OUT_EARLY, 3, 0, 0),
+	F(240000000, P_GPLL0_OUT_EARLY, 2.5, 0, 0),
+	{ }
+};
+
+static struct clk_rcg2 gcc_usb30_prim_master_clk_src = {
+	.cmd_rcgr = 0x1a01c,
+	.mnd_width = 8,
+	.hid_width = 5,
+	.parent_map = gcc_parent_map_0,
+	.freq_tbl = ftbl_gcc_usb30_prim_master_clk_src,
+	.clkr.hw.init = &(struct clk_init_data){
+		.name = "gcc_usb30_prim_master_clk_src",
+		.parent_data = gcc_parents_0,
+		.num_parents = ARRAY_SIZE(gcc_parents_0),
+		.ops = &clk_rcg2_ops,
+	},
+};
+
+static const struct freq_tbl ftbl_gcc_usb30_prim_mock_utmi_clk_src[] = {
+	F(19200000, P_BI_TCXO, 1, 0, 0),
+	{ }
+};
+
+static struct clk_rcg2 gcc_usb30_prim_mock_utmi_clk_src = {
+	.cmd_rcgr = 0x1a034,
+	.mnd_width = 0,
+	.hid_width = 5,
+	.parent_map = gcc_parent_map_0,
+	.freq_tbl = ftbl_gcc_usb30_prim_mock_utmi_clk_src,
+	.clkr.hw.init = &(struct clk_init_data){
+		.name = "gcc_usb30_prim_mock_utmi_clk_src",
+		.parent_data = gcc_parents_0,
+		.num_parents = ARRAY_SIZE(gcc_parents_0),
+		.ops = &clk_rcg2_ops,
+	},
+};
+
+static struct clk_regmap_div gcc_usb30_prim_mock_utmi_postdiv_clk_src = {
+	.reg = 0x1a04c,
+	.shift = 0,
+	.width = 2,
+	.clkr.hw.init = &(struct clk_init_data) {
+		.name = "gcc_usb30_prim_mock_utmi_postdiv_clk_src",
+		.parent_hws = (const struct clk_hw *[]) {
+			&gcc_usb30_prim_mock_utmi_clk_src.clkr.hw },
+		.num_parents = 1,
+		.ops = &clk_regmap_div_ro_ops,
+	},
+};
+
+static struct clk_rcg2 gcc_usb3_prim_phy_aux_clk_src = {
+	.cmd_rcgr = 0x1a060,
+	.mnd_width = 0,
+	.hid_width = 5,
+	.parent_map = gcc_parent_map_12,
+	.freq_tbl = ftbl_gcc_usb30_prim_mock_utmi_clk_src,
+	.clkr.hw.init = &(struct clk_init_data){
+		.name = "gcc_usb3_prim_phy_aux_clk_src",
+		.parent_data = gcc_parents_12,
+		.num_parents = ARRAY_SIZE(gcc_parents_12),
+		.ops = &clk_rcg2_ops,
+	},
+};
+
+static const struct freq_tbl ftbl_gcc_video_venus_clk_src[] = {
+	F(133333333, P_GPLL11_OUT_MAIN, 4.5, 0, 0),
+	F(240000000, P_GPLL11_OUT_MAIN, 2.5, 0, 0),
+	F(300000000, P_GPLL11_OUT_MAIN, 2, 0, 0),
+	F(384000000, P_GPLL11_OUT_MAIN, 2, 0, 0),
+	{ }
+};
+
+static struct clk_rcg2 gcc_video_venus_clk_src = {
+	.cmd_rcgr = 0x58060,
+	.mnd_width = 0,
+	.hid_width = 5,
+	.parent_map = gcc_parent_map_13,
+	.freq_tbl = ftbl_gcc_video_venus_clk_src,
+	.clkr.hw.init = &(struct clk_init_data){
+		.name = "gcc_video_venus_clk_src",
+		.parent_data = gcc_parents_13,
+		.num_parents = ARRAY_SIZE(gcc_parents_13),
+		.flags = CLK_SET_RATE_PARENT,
+		.ops = &clk_rcg2_ops,
+	},
+};
+
+static struct clk_branch gcc_ahb2phy_csi_clk = {
+	.halt_reg = 0x1d004,
+	.halt_check = BRANCH_HALT,
+	.hwcg_reg = 0x1d004,
+	.hwcg_bit = 1,
+	.clkr = {
+		.enable_reg = 0x1d004,
+		.enable_mask = BIT(0),
+		.hw.init = &(struct clk_init_data){
+			.name = "gcc_ahb2phy_csi_clk",
+			.ops = &clk_branch2_ops,
+		},
+	},
+};
+
+static struct clk_branch gcc_ahb2phy_usb_clk = {
+	.halt_reg = 0x1d008,
+	.halt_check = BRANCH_HALT,
+	.hwcg_reg = 0x1d008,
+	.hwcg_bit = 1,
+	.clkr = {
+		.enable_reg = 0x1d008,
+		.enable_mask = BIT(0),
+		.hw.init = &(struct clk_init_data){
+			.name = "gcc_ahb2phy_usb_clk",
+			.ops = &clk_branch2_ops,
+		},
+	},
+};
+
+static struct clk_branch gcc_bimc_gpu_axi_clk = {
+	.halt_reg = 0x71154,
+	.halt_check = BRANCH_HALT_DELAY,
+	.hwcg_reg = 0x71154,
+	.hwcg_bit = 1,
+	.clkr = {
+		.enable_reg = 0x71154,
+		.enable_mask = BIT(0),
+		.hw.init = &(struct clk_init_data){
+			.name = "gcc_bimc_gpu_axi_clk",
+			.ops = &clk_branch2_ops,
+		},
+	},
+};
+
+static struct clk_branch gcc_boot_rom_ahb_clk = {
+	.halt_reg = 0x23004,
+	.halt_check = BRANCH_HALT_VOTED,
+	.hwcg_reg = 0x23004,
+	.hwcg_bit = 1,
+	.clkr = {
+		.enable_reg = 0x79004,
+		.enable_mask = BIT(10),
+		.hw.init = &(struct clk_init_data){
+			.name = "gcc_boot_rom_ahb_clk",
+			.ops = &clk_branch2_ops,
+		},
+	},
+};
+
+static struct clk_branch gcc_cam_throttle_nrt_clk = {
+	.halt_reg = 0x17070,
+	.halt_check = BRANCH_HALT_VOTED,
+	.hwcg_reg = 0x17070,
+	.hwcg_bit = 1,
+	.clkr = {
+		.enable_reg = 0x79004,
+		.enable_mask = BIT(27),
+		.hw.init = &(struct clk_init_data){
+			.name = "gcc_cam_throttle_nrt_clk",
+			.ops = &clk_branch2_ops,
+		},
+	},
+};
+
+static struct clk_branch gcc_cam_throttle_rt_clk = {
+	.halt_reg = 0x1706c,
+	.halt_check = BRANCH_HALT_VOTED,
+	.hwcg_reg = 0x1706c,
+	.hwcg_bit = 1,
+	.clkr = {
+		.enable_reg = 0x79004,
+		.enable_mask = BIT(26),
+		.hw.init = &(struct clk_init_data){
+			.name = "gcc_cam_throttle_rt_clk",
+			.ops = &clk_branch2_ops,
+		},
+	},
+};
+
+static struct clk_branch gcc_camera_ahb_clk = {
+	.halt_reg = 0x17008,
+	.halt_check = BRANCH_HALT_DELAY,
+	.hwcg_reg = 0x17008,
+	.hwcg_bit = 1,
+	.clkr = {
+		.enable_reg = 0x17008,
+		.enable_mask = BIT(0),
+		.hw.init = &(struct clk_init_data){
+			.name = "gcc_camera_ahb_clk",
+			.flags = CLK_IS_CRITICAL,
+			.ops = &clk_branch2_ops,
+		},
+	},
+};
+
+static struct clk_branch gcc_camera_xo_clk = {
+	.halt_reg = 0x17028,
+	.halt_check = BRANCH_HALT,
+	.clkr = {
+		.enable_reg = 0x17028,
+		.enable_mask = BIT(0),
+		.hw.init = &(struct clk_init_data){
+			.name = "gcc_camera_xo_clk",
+			.flags = CLK_IS_CRITICAL,
+			.ops = &clk_branch2_ops,
+		},
+	},
+};
+
+static struct clk_branch gcc_camss_axi_clk = {
+	.halt_reg = 0x58044,
+	.halt_check = BRANCH_HALT,
+	.clkr = {
+		.enable_reg = 0x58044,
+		.enable_mask = BIT(0),
+		.hw.init = &(struct clk_init_data){
+			.name = "gcc_camss_axi_clk",
+			.parent_hws = (const struct clk_hw *[]){
+				&gcc_camss_axi_clk_src.clkr.hw,
+			},
+			.num_parents = 1,
+			.flags = CLK_SET_RATE_PARENT,
+			.ops = &clk_branch2_ops,
+		},
+	},
+};
+
+static struct clk_branch gcc_camss_camnoc_atb_clk = {
+	.halt_reg = 0x5804c,
+	.halt_check = BRANCH_HALT_DELAY,
+	.hwcg_reg = 0x5804c,
+	.hwcg_bit = 1,
+	.clkr = {
+		.enable_reg = 0x5804c,
+		.enable_mask = BIT(0),
+		.hw.init = &(struct clk_init_data){
+			.name = "gcc_camss_camnoc_atb_clk",
+			.ops = &clk_branch2_ops,
+		},
+	},
+};
+
+static struct clk_branch gcc_camss_camnoc_nts_xo_clk = {
+	.halt_reg = 0x58050,
+	.halt_check = BRANCH_HALT_DELAY,
+	.hwcg_reg = 0x58050,
+	.hwcg_bit = 1,
+	.clkr = {
+		.enable_reg = 0x58050,
+		.enable_mask = BIT(0),
+		.hw.init = &(struct clk_init_data){
+			.name = "gcc_camss_camnoc_nts_xo_clk",
+			.ops = &clk_branch2_ops,
+		},
+	},
+};
+
+static struct clk_branch gcc_camss_cci_0_clk = {
+	.halt_reg = 0x56018,
+	.halt_check = BRANCH_HALT,
+	.clkr = {
+		.enable_reg = 0x56018,
+		.enable_mask = BIT(0),
+		.hw.init = &(struct clk_init_data){
+			.name = "gcc_camss_cci_0_clk",
+			.parent_hws = (const struct clk_hw *[]){
+				&gcc_camss_cci_clk_src.clkr.hw,
+			},
+			.num_parents = 1,
+			.flags = CLK_SET_RATE_PARENT,
+			.ops = &clk_branch2_ops,
+		},
+	},
+};
+
+static struct clk_branch gcc_camss_cphy_0_clk = {
+	.halt_reg = 0x52088,
+	.halt_check = BRANCH_HALT,
+	.clkr = {
+		.enable_reg = 0x52088,
+		.enable_mask = BIT(0),
+		.hw.init = &(struct clk_init_data){
+			.name = "gcc_camss_cphy_0_clk",
+			.parent_hws = (const struct clk_hw *[]){
+				&gcc_camss_tfe_cphy_rx_clk_src.clkr.hw,
+			},
+			.num_parents = 1,
+			.flags = CLK_SET_RATE_PARENT,
+			.ops = &clk_branch2_ops,
+		},
+	},
+};
+
+static struct clk_branch gcc_camss_cphy_1_clk = {
+	.halt_reg = 0x5208c,
+	.halt_check = BRANCH_HALT,
+	.clkr = {
+		.enable_reg = 0x5208c,
+		.enable_mask = BIT(0),
+		.hw.init = &(struct clk_init_data){
+			.name = "gcc_camss_cphy_1_clk",
+			.parent_hws = (const struct clk_hw *[]){
+				&gcc_camss_tfe_cphy_rx_clk_src.clkr.hw,
+			},
+			.num_parents = 1,
+			.flags = CLK_SET_RATE_PARENT,
+			.ops = &clk_branch2_ops,
+		},
+	},
+};
+
+static struct clk_branch gcc_camss_cphy_2_clk = {
+	.halt_reg = 0x52090,
+	.halt_check = BRANCH_HALT,
+	.clkr = {
+		.enable_reg = 0x52090,
+		.enable_mask = BIT(0),
+		.hw.init = &(struct clk_init_data){
+			.name = "gcc_camss_cphy_2_clk",
+			.parent_hws = (const struct clk_hw *[]){
+				&gcc_camss_tfe_cphy_rx_clk_src.clkr.hw,
+			},
+			.num_parents = 1,
+			.flags = CLK_SET_RATE_PARENT,
+			.ops = &clk_branch2_ops,
+		},
+	},
+};
+
+static struct clk_branch gcc_camss_csi0phytimer_clk = {
+	.halt_reg = 0x59018,
+	.halt_check = BRANCH_HALT,
+	.clkr = {
+		.enable_reg = 0x59018,
+		.enable_mask = BIT(0),
+		.hw.init = &(struct clk_init_data){
+			.name = "gcc_camss_csi0phytimer_clk",
+			.parent_hws = (const struct clk_hw *[]){
+				&gcc_camss_csi0phytimer_clk_src.clkr.hw,
+			},
+			.num_parents = 1,
+			.flags = CLK_SET_RATE_PARENT,
+			.ops = &clk_branch2_ops,
+		},
+	},
+};
+
+static struct clk_branch gcc_camss_csi1phytimer_clk = {
+	.halt_reg = 0x59034,
+	.halt_check = BRANCH_HALT,
+	.clkr = {
+		.enable_reg = 0x59034,
+		.enable_mask = BIT(0),
+		.hw.init = &(struct clk_init_data){
+			.name = "gcc_camss_csi1phytimer_clk",
+			.parent_hws = (const struct clk_hw *[]){
+				&gcc_camss_csi1phytimer_clk_src.clkr.hw,
+			},
+			.num_parents = 1,
+			.flags = CLK_SET_RATE_PARENT,
+			.ops = &clk_branch2_ops,
+		},
+	},
+};
+
+static struct clk_branch gcc_camss_csi2phytimer_clk = {
+	.halt_reg = 0x59050,
+	.halt_check = BRANCH_HALT,
+	.clkr = {
+		.enable_reg = 0x59050,
+		.enable_mask = BIT(0),
+		.hw.init = &(struct clk_init_data){
+			.name = "gcc_camss_csi2phytimer_clk",
+			.parent_hws = (const struct clk_hw *[]){
+				&gcc_camss_csi2phytimer_clk_src.clkr.hw,
+			},
+			.num_parents = 1,
+			.flags = CLK_SET_RATE_PARENT,
+			.ops = &clk_branch2_ops,
+		},
+	},
+};
+
+static struct clk_branch gcc_camss_mclk0_clk = {
+	.halt_reg = 0x51018,
+	.halt_check = BRANCH_HALT,
+	.clkr = {
+		.enable_reg = 0x51018,
+		.enable_mask = BIT(0),
+		.hw.init = &(struct clk_init_data){
+			.name = "gcc_camss_mclk0_clk",
+			.parent_hws = (const struct clk_hw *[]){
+				&gcc_camss_mclk0_clk_src.clkr.hw,
+			},
+			.num_parents = 1,
+			.flags = CLK_SET_RATE_PARENT,
+			.ops = &clk_branch2_ops,
+		},
+	},
+};
+
+static struct clk_branch gcc_camss_mclk1_clk = {
+	.halt_reg = 0x51034,
+	.halt_check = BRANCH_HALT,
+	.clkr = {
+		.enable_reg = 0x51034,
+		.enable_mask = BIT(0),
+		.hw.init = &(struct clk_init_data){
+			.name = "gcc_camss_mclk1_clk",
+			.parent_hws = (const struct clk_hw *[]){
+				&gcc_camss_mclk1_clk_src.clkr.hw,
+			},
+			.num_parents = 1,
+			.flags = CLK_SET_RATE_PARENT,
+			.ops = &clk_branch2_ops,
+		},
+	},
+};
+
+static struct clk_branch gcc_camss_mclk2_clk = {
+	.halt_reg = 0x51050,
+	.halt_check = BRANCH_HALT,
+	.clkr = {
+		.enable_reg = 0x51050,
+		.enable_mask = BIT(0),
+		.hw.init = &(struct clk_init_data){
+			.name = "gcc_camss_mclk2_clk",
+			.parent_hws = (const struct clk_hw *[]){
+				&gcc_camss_mclk2_clk_src.clkr.hw,
+			},
+			.num_parents = 1,
+			.flags = CLK_SET_RATE_PARENT,
+			.ops = &clk_branch2_ops,
+		},
+	},
+};
+
+static struct clk_branch gcc_camss_mclk3_clk = {
+	.halt_reg = 0x5106c,
+	.halt_check = BRANCH_HALT,
+	.clkr = {
+		.enable_reg = 0x5106c,
+		.enable_mask = BIT(0),
+		.hw.init = &(struct clk_init_data){
+			.name = "gcc_camss_mclk3_clk",
+			.parent_hws = (const struct clk_hw *[]){
+				&gcc_camss_mclk3_clk_src.clkr.hw,
+			},
+			.num_parents = 1,
+			.flags = CLK_SET_RATE_PARENT,
+			.ops = &clk_branch2_ops,
+		},
+	},
+};
+
+static struct clk_branch gcc_camss_nrt_axi_clk = {
+	.halt_reg = 0x58054,
+	.halt_check = BRANCH_HALT,
+	.clkr = {
+		.enable_reg = 0x58054,
+		.enable_mask = BIT(0),
+		.hw.init = &(struct clk_init_data){
+			.name = "gcc_camss_nrt_axi_clk",
+			.ops = &clk_branch2_ops,
+		},
+	},
+};
+
+static struct clk_branch gcc_camss_ope_ahb_clk = {
+	.halt_reg = 0x5503c,
+	.halt_check = BRANCH_HALT,
+	.clkr = {
+		.enable_reg = 0x5503c,
+		.enable_mask = BIT(0),
+		.hw.init = &(struct clk_init_data){
+			.name = "gcc_camss_ope_ahb_clk",
+			.parent_hws = (const struct clk_hw *[]){
+				&gcc_camss_ope_ahb_clk_src.clkr.hw,
+			},
+			.num_parents = 1,
+			.flags = CLK_SET_RATE_PARENT,
+			.ops = &clk_branch2_ops,
+		},
+	},
+};
+
+static struct clk_branch gcc_camss_ope_clk = {
+	.halt_reg = 0x5501c,
+	.halt_check = BRANCH_HALT,
+	.clkr = {
+		.enable_reg = 0x5501c,
+		.enable_mask = BIT(0),
+		.hw.init = &(struct clk_init_data){
+			.name = "gcc_camss_ope_clk",
+			.parent_hws = (const struct clk_hw *[]){
+				&gcc_camss_ope_clk_src.clkr.hw,
+			},
+			.num_parents = 1,
+			.flags = CLK_SET_RATE_PARENT,
+			.ops = &clk_branch2_ops,
+		},
+	},
+};
+
+static struct clk_branch gcc_camss_rt_axi_clk = {
+	.halt_reg = 0x5805c,
+	.halt_check = BRANCH_HALT,
+	.clkr = {
+		.enable_reg = 0x5805c,
+		.enable_mask = BIT(0),
+		.hw.init = &(struct clk_init_data){
+			.name = "gcc_camss_rt_axi_clk",
+			.ops = &clk_branch2_ops,
+		},
+	},
+};
+
+static struct clk_branch gcc_camss_tfe_0_clk = {
+	.halt_reg = 0x5201c,
+	.halt_check = BRANCH_HALT,
+	.clkr = {
+		.enable_reg = 0x5201c,
+		.enable_mask = BIT(0),
+		.hw.init = &(struct clk_init_data){
+			.name = "gcc_camss_tfe_0_clk",
+			.parent_hws = (const struct clk_hw *[]){
+				&gcc_camss_tfe_0_clk_src.clkr.hw,
+			},
+			.num_parents = 1,
+			.flags = CLK_SET_RATE_PARENT,
+			.ops = &clk_branch2_ops,
+		},
+	},
+};
+
+static struct clk_branch gcc_camss_tfe_0_cphy_rx_clk = {
+	.halt_reg = 0x5207c,
+	.halt_check = BRANCH_HALT,
+	.clkr = {
+		.enable_reg = 0x5207c,
+		.enable_mask = BIT(0),
+		.hw.init = &(struct clk_init_data){
+			.name = "gcc_camss_tfe_0_cphy_rx_clk",
+			.parent_hws = (const struct clk_hw *[]){
+				&gcc_camss_tfe_cphy_rx_clk_src.clkr.hw,
+			},
+			.num_parents = 1,
+			.flags = CLK_SET_RATE_PARENT,
+			.ops = &clk_branch2_ops,
+		},
+	},
+};
+
+static struct clk_branch gcc_camss_tfe_0_csid_clk = {
+	.halt_reg = 0x520ac,
+	.halt_check = BRANCH_HALT,
+	.clkr = {
+		.enable_reg = 0x520ac,
+		.enable_mask = BIT(0),
+		.hw.init = &(struct clk_init_data){
+			.name = "gcc_camss_tfe_0_csid_clk",
+			.parent_hws = (const struct clk_hw *[]){
+				&gcc_camss_tfe_0_csid_clk_src.clkr.hw,
+			},
+			.num_parents = 1,
+			.flags = CLK_SET_RATE_PARENT,
+			.ops = &clk_branch2_ops,
+		},
+	},
+};
+
+static struct clk_branch gcc_camss_tfe_1_clk = {
+	.halt_reg = 0x5203c,
+	.halt_check = BRANCH_HALT,
+	.clkr = {
+		.enable_reg = 0x5203c,
+		.enable_mask = BIT(0),
+		.hw.init = &(struct clk_init_data){
+			.name = "gcc_camss_tfe_1_clk",
+			.parent_hws = (const struct clk_hw *[]){
+				&gcc_camss_tfe_1_clk_src.clkr.hw,
+			},
+			.num_parents = 1,
+			.flags = CLK_SET_RATE_PARENT,
+			.ops = &clk_branch2_ops,
+		},
+	},
+};
+
+static struct clk_branch gcc_camss_tfe_1_cphy_rx_clk = {
+	.halt_reg = 0x52080,
+	.halt_check = BRANCH_HALT,
+	.clkr = {
+		.enable_reg = 0x52080,
+		.enable_mask = BIT(0),
+		.hw.init = &(struct clk_init_data){
+			.name = "gcc_camss_tfe_1_cphy_rx_clk",
+			.parent_hws = (const struct clk_hw *[]){
+				&gcc_camss_tfe_cphy_rx_clk_src.clkr.hw,
+			},
+			.num_parents = 1,
+			.flags = CLK_SET_RATE_PARENT,
+			.ops = &clk_branch2_ops,
+		},
+	},
+};
+
+static struct clk_branch gcc_camss_tfe_1_csid_clk = {
+	.halt_reg = 0x520cc,
+	.halt_check = BRANCH_HALT,
+	.clkr = {
+		.enable_reg = 0x520cc,
+		.enable_mask = BIT(0),
+		.hw.init = &(struct clk_init_data){
+			.name = "gcc_camss_tfe_1_csid_clk",
+			.parent_hws = (const struct clk_hw *[]){
+				&gcc_camss_tfe_1_csid_clk_src.clkr.hw,
+			},
+			.num_parents = 1,
+			.flags = CLK_SET_RATE_PARENT,
+			.ops = &clk_branch2_ops,
+		},
+	},
+};
+
+static struct clk_branch gcc_camss_tfe_2_clk = {
+	.halt_reg = 0x5205c,
+	.halt_check = BRANCH_HALT,
+	.clkr = {
+		.enable_reg = 0x5205c,
+		.enable_mask = BIT(0),
+		.hw.init = &(struct clk_init_data){
+			.name = "gcc_camss_tfe_2_clk",
+			.parent_hws = (const struct clk_hw *[]){
+				&gcc_camss_tfe_2_clk_src.clkr.hw,
+			},
+			.num_parents = 1,
+			.flags = CLK_SET_RATE_PARENT,
+			.ops = &clk_branch2_ops,
+		},
+	},
+};
+
+static struct clk_branch gcc_camss_tfe_2_cphy_rx_clk = {
+	.halt_reg = 0x52084,
+	.halt_check = BRANCH_HALT,
+	.clkr = {
+		.enable_reg = 0x52084,
+		.enable_mask = BIT(0),
+		.hw.init = &(struct clk_init_data){
+			.name = "gcc_camss_tfe_2_cphy_rx_clk",
+			.parent_hws = (const struct clk_hw *[]){
+				&gcc_camss_tfe_cphy_rx_clk_src.clkr.hw,
+			},
+			.num_parents = 1,
+			.flags = CLK_SET_RATE_PARENT,
+			.ops = &clk_branch2_ops,
+		},
+	},
+};
+
+static struct clk_branch gcc_camss_tfe_2_csid_clk = {
+	.halt_reg = 0x520ec,
+	.halt_check = BRANCH_HALT,
+	.clkr = {
+		.enable_reg = 0x520ec,
+		.enable_mask = BIT(0),
+		.hw.init = &(struct clk_init_data){
+			.name = "gcc_camss_tfe_2_csid_clk",
+			.parent_hws = (const struct clk_hw *[]){
+				&gcc_camss_tfe_2_csid_clk_src.clkr.hw,
+			},
+			.num_parents = 1,
+			.flags = CLK_SET_RATE_PARENT,
+			.ops = &clk_branch2_ops,
+		},
+	},
+};
+
+static struct clk_branch gcc_camss_top_ahb_clk = {
+	.halt_reg = 0x58028,
+	.halt_check = BRANCH_HALT,
+	.clkr = {
+		.enable_reg = 0x58028,
+		.enable_mask = BIT(0),
+		.hw.init = &(struct clk_init_data){
+			.name = "gcc_camss_top_ahb_clk",
+			.parent_hws = (const struct clk_hw *[]){
+				&gcc_camss_top_ahb_clk_src.clkr.hw,
+			},
+			.num_parents = 1,
+			.flags = CLK_SET_RATE_PARENT,
+			.ops = &clk_branch2_ops,
+		},
+	},
+};
+
+static struct clk_branch gcc_cfg_noc_usb3_prim_axi_clk = {
+	.halt_reg = 0x1a084,
+	.halt_check = BRANCH_HALT,
+	.hwcg_reg = 0x1a084,
+	.hwcg_bit = 1,
+	.clkr = {
+		.enable_reg = 0x1a084,
+		.enable_mask = BIT(0),
+		.hw.init = &(struct clk_init_data){
+			.name = "gcc_cfg_noc_usb3_prim_axi_clk",
+			.parent_hws = (const struct clk_hw *[]){
+				&gcc_usb30_prim_master_clk_src.clkr.hw,
+			},
+			.num_parents = 1,
+			.flags = CLK_SET_RATE_PARENT,
+			.ops = &clk_branch2_ops,
+		},
+	},
+};
+
+static struct clk_branch gcc_cpuss_gnoc_clk = {
+	.halt_reg = 0x2b004,
+	.halt_check = BRANCH_HALT_VOTED,
+	.hwcg_reg = 0x2b004,
+	.hwcg_bit = 1,
+	.clkr = {
+		.enable_reg = 0x79004,
+		.enable_mask = BIT(22),
+		.hw.init = &(struct clk_init_data){
+			.name = "gcc_cpuss_gnoc_clk",
+			.flags = CLK_IS_CRITICAL,
+			.ops = &clk_branch2_ops,
+		},
+	},
+};
+
+static struct clk_branch gcc_disp_ahb_clk = {
+	.halt_reg = 0x1700c,
+	.halt_check = BRANCH_HALT,
+	.hwcg_reg = 0x1700c,
+	.hwcg_bit = 1,
+	.clkr = {
+		.enable_reg = 0x1700c,
+		.enable_mask = BIT(0),
+		.hw.init = &(struct clk_init_data){
+			.name = "gcc_disp_ahb_clk",
+			.flags = CLK_IS_CRITICAL,
+			.ops = &clk_branch2_ops,
+		},
+	},
+};
+
+static struct clk_regmap_div gcc_disp_gpll0_clk_src = {
+	.reg = 0x17058,
+	.shift = 0,
+	.width = 2,
+	.clkr.hw.init = &(struct clk_init_data) {
+		.name = "gcc_disp_gpll0_clk_src",
+		.parent_hws = (const struct clk_hw *[]){ &gpll0.clkr.hw },
+		.num_parents = 1,
+		.ops = &clk_regmap_div_ops,
+	},
+};
+
+static struct clk_branch gcc_disp_gpll0_div_clk_src = {
+	.halt_check = BRANCH_HALT_DELAY,
+	.clkr = {
+		.enable_reg = 0x79004,
+		.enable_mask = BIT(20),
+		.hw.init = &(struct clk_init_data){
+			.name = "gcc_disp_gpll0_div_clk_src",
+			.parent_hws = (const struct clk_hw *[]){
+				&gcc_disp_gpll0_clk_src.clkr.hw,
+			},
+			.num_parents = 1,
+			.flags = CLK_SET_RATE_PARENT,
+			.ops = &clk_branch2_ops,
+		},
+	},
+};
+
+static struct clk_branch gcc_disp_hf_axi_clk = {
+	.halt_reg = 0x17020,
+	.halt_check = BRANCH_HALT,
+	.hwcg_reg = 0x17020,
+	.hwcg_bit = 1,
+	.clkr = {
+		.enable_reg = 0x17020,
+		.enable_mask = BIT(0),
+		.hw.init = &(struct clk_init_data){
+			.name = "gcc_disp_hf_axi_clk",
+			.ops = &clk_branch2_ops,
+		},
+	},
+};
+
+static struct clk_branch gcc_disp_throttle_core_clk = {
+	.halt_reg = 0x17064,
+	.halt_check = BRANCH_HALT_VOTED,
+	.hwcg_reg = 0x17064,
+	.hwcg_bit = 1,
+	.clkr = {
+		.enable_reg = 0x7900c,
+		.enable_mask = BIT(5),
+		.hw.init = &(struct clk_init_data){
+			.name = "gcc_disp_throttle_core_clk",
+			.ops = &clk_branch2_ops,
+		},
+	},
+};
+
+static struct clk_branch gcc_disp_xo_clk = {
+	.halt_reg = 0x1702c,
+	.halt_check = BRANCH_HALT,
+	.clkr = {
+		.enable_reg = 0x1702c,
+		.enable_mask = BIT(0),
+		.hw.init = &(struct clk_init_data){
+			.name = "gcc_disp_xo_clk",
+			.flags = CLK_IS_CRITICAL,
+			.ops = &clk_branch2_ops,
+		},
+	},
+};
+
+static struct clk_branch gcc_gp1_clk = {
+	.halt_reg = 0x4d000,
+	.halt_check = BRANCH_HALT,
+	.clkr = {
+		.enable_reg = 0x4d000,
+		.enable_mask = BIT(0),
+		.hw.init = &(struct clk_init_data){
+			.name = "gcc_gp1_clk",
+			.parent_hws = (const struct clk_hw *[]){
+				&gcc_gp1_clk_src.clkr.hw,
+			},
+			.num_parents = 1,
+			.flags = CLK_SET_RATE_PARENT,
+			.ops = &clk_branch2_ops,
+		},
+	},
+};
+
+static struct clk_branch gcc_gp2_clk = {
+	.halt_reg = 0x4e000,
+	.halt_check = BRANCH_HALT,
+	.clkr = {
+		.enable_reg = 0x4e000,
+		.enable_mask = BIT(0),
+		.hw.init = &(struct clk_init_data){
+			.name = "gcc_gp2_clk",
+			.parent_hws = (const struct clk_hw *[]){
+				&gcc_gp2_clk_src.clkr.hw,
+			},
+			.num_parents = 1,
+			.flags = CLK_SET_RATE_PARENT,
+			.ops = &clk_branch2_ops,
+		},
+	},
+};
+
+static struct clk_branch gcc_gp3_clk = {
+	.halt_reg = 0x4f000,
+	.halt_check = BRANCH_HALT,
+	.clkr = {
+		.enable_reg = 0x4f000,
+		.enable_mask = BIT(0),
+		.hw.init = &(struct clk_init_data){
+			.name = "gcc_gp3_clk",
+			.parent_hws = (const struct clk_hw *[]){
+				&gcc_gp3_clk_src.clkr.hw,
+			},
+			.num_parents = 1,
+			.flags = CLK_SET_RATE_PARENT,
+			.ops = &clk_branch2_ops,
+		},
+	},
+};
+
+static struct clk_branch gcc_gpu_cfg_ahb_clk = {
+	.halt_reg = 0x36004,
+	.halt_check = BRANCH_HALT,
+	.hwcg_reg = 0x36004,
+	.hwcg_bit = 1,
+	.clkr = {
+		.enable_reg = 0x36004,
+		.enable_mask = BIT(0),
+		.hw.init = &(struct clk_init_data){
+			.name = "gcc_gpu_cfg_ahb_clk",
+			.flags = CLK_IS_CRITICAL,
+			.ops = &clk_branch2_ops,
+		},
+	},
+};
+
+static struct clk_branch gcc_gpu_gpll0_clk_src = {
+	.halt_check = BRANCH_HALT_DELAY,
+	.clkr = {
+		.enable_reg = 0x79004,
+		.enable_mask = BIT(15),
+		.hw.init = &(struct clk_init_data){
+			.name = "gcc_gpu_gpll0_clk_src",
+			.parent_hws = (const struct clk_hw *[]){
+				&gpll0.clkr.hw,
+			},
+			.num_parents = 1,
+			.flags = CLK_SET_RATE_PARENT,
+			.ops = &clk_branch2_ops,
+		},
+	},
+};
+
+static struct clk_branch gcc_gpu_gpll0_div_clk_src = {
+	.halt_check = BRANCH_HALT_DELAY,
+	.clkr = {
+		.enable_reg = 0x79004,
+		.enable_mask = BIT(16),
+		.hw.init = &(struct clk_init_data){
+			.name = "gcc_gpu_gpll0_div_clk_src",
+			.parent_hws = (const struct clk_hw *[]){
+				&gpll0_out_aux2.clkr.hw,
+			},
+			.num_parents = 1,
+			.flags = CLK_SET_RATE_PARENT,
+			.ops = &clk_branch2_ops,
+		},
+	},
+};
+
+static struct clk_branch gcc_gpu_iref_clk = {
+	.halt_reg = 0x36100,
+	.halt_check = BRANCH_HALT_DELAY,
+	.clkr = {
+		.enable_reg = 0x36100,
+		.enable_mask = BIT(0),
+		.hw.init = &(struct clk_init_data){
+			.name = "gcc_gpu_iref_clk",
+			.ops = &clk_branch2_ops,
+		},
+	},
+};
+
+static struct clk_branch gcc_gpu_memnoc_gfx_clk = {
+	.halt_reg = 0x3600c,
+	.halt_check = BRANCH_VOTED,
+	.hwcg_reg = 0x3600c,
+	.hwcg_bit = 1,
+	.clkr = {
+		.enable_reg = 0x3600c,
+		.enable_mask = BIT(0),
+		.hw.init = &(struct clk_init_data){
+			.name = "gcc_gpu_memnoc_gfx_clk",
+			.ops = &clk_branch2_ops,
+		},
+	},
+};
+
+static struct clk_branch gcc_gpu_snoc_dvm_gfx_clk = {
+	.halt_reg = 0x36018,
+	.halt_check = BRANCH_HALT,
+	.clkr = {
+		.enable_reg = 0x36018,
+		.enable_mask = BIT(0),
+		.hw.init = &(struct clk_init_data){
+			.name = "gcc_gpu_snoc_dvm_gfx_clk",
+			.ops = &clk_branch2_ops,
+		},
+	},
+};
+
+static struct clk_branch gcc_gpu_throttle_core_clk = {
+	.halt_reg = 0x36048,
+	.halt_check = BRANCH_HALT_VOTED,
+	.hwcg_reg = 0x36048,
+	.hwcg_bit = 1,
+	.clkr = {
+		.enable_reg = 0x79004,
+		.enable_mask = BIT(31),
+		.hw.init = &(struct clk_init_data){
+			.name = "gcc_gpu_throttle_core_clk",
+			.ops = &clk_branch2_ops,
+		},
+	},
+};
+
+static struct clk_branch gcc_pdm2_clk = {
+	.halt_reg = 0x2000c,
+	.halt_check = BRANCH_HALT,
+	.clkr = {
+		.enable_reg = 0x2000c,
+		.enable_mask = BIT(0),
+		.hw.init = &(struct clk_init_data){
+			.name = "gcc_pdm2_clk",
+			.parent_hws = (const struct clk_hw *[]){
+				&gcc_pdm2_clk_src.clkr.hw,
+			},
+			.num_parents = 1,
+			.flags = CLK_SET_RATE_PARENT,
+			.ops = &clk_branch2_ops,
+		},
+	},
+};
+
+static struct clk_branch gcc_pdm_ahb_clk = {
+	.halt_reg = 0x20004,
+	.halt_check = BRANCH_HALT,
+	.hwcg_reg = 0x20004,
+	.hwcg_bit = 1,
+	.clkr = {
+		.enable_reg = 0x20004,
+		.enable_mask = BIT(0),
+		.hw.init = &(struct clk_init_data){
+			.name = "gcc_pdm_ahb_clk",
+			.ops = &clk_branch2_ops,
+		},
+	},
+};
+
+static struct clk_branch gcc_pdm_xo4_clk = {
+	.halt_reg = 0x20008,
+	.halt_check = BRANCH_HALT,
+	.clkr = {
+		.enable_reg = 0x20008,
+		.enable_mask = BIT(0),
+		.hw.init = &(struct clk_init_data){
+			.name = "gcc_pdm_xo4_clk",
+			.ops = &clk_branch2_ops,
+		},
+	},
+};
+
+static struct clk_branch gcc_prng_ahb_clk = {
+	.halt_reg = 0x21004,
+	.halt_check = BRANCH_HALT_VOTED,
+	.hwcg_reg = 0x21004,
+	.hwcg_bit = 1,
+	.clkr = {
+		.enable_reg = 0x79004,
+		.enable_mask = BIT(13),
+		.hw.init = &(struct clk_init_data){
+			.name = "gcc_prng_ahb_clk",
+			.ops = &clk_branch2_ops,
+		},
+	},
+};
+
+static struct clk_branch gcc_qmip_camera_nrt_ahb_clk = {
+	.halt_reg = 0x17014,
+	.halt_check = BRANCH_HALT_VOTED,
+	.hwcg_reg = 0x17014,
+	.hwcg_bit = 1,
+	.clkr = {
+		.enable_reg = 0x7900c,
+		.enable_mask = BIT(0),
+		.hw.init = &(struct clk_init_data){
+			.name = "gcc_qmip_camera_nrt_ahb_clk",
+			.ops = &clk_branch2_ops,
+		},
+	},
+};
+
+static struct clk_branch gcc_qmip_camera_rt_ahb_clk = {
+	.halt_reg = 0x17060,
+	.halt_check = BRANCH_HALT_VOTED,
+	.hwcg_reg = 0x17060,
+	.hwcg_bit = 1,
+	.clkr = {
+		.enable_reg = 0x7900c,
+		.enable_mask = BIT(2),
+		.hw.init = &(struct clk_init_data){
+			.name = "gcc_qmip_camera_rt_ahb_clk",
+			.ops = &clk_branch2_ops,
+		},
+	},
+};
+
+static struct clk_branch gcc_qmip_disp_ahb_clk = {
+	.halt_reg = 0x17018,
+	.halt_check = BRANCH_HALT_VOTED,
+	.hwcg_reg = 0x17018,
+	.hwcg_bit = 1,
+	.clkr = {
+		.enable_reg = 0x7900c,
+		.enable_mask = BIT(1),
+		.hw.init = &(struct clk_init_data){
+			.name = "gcc_qmip_disp_ahb_clk",
+			.ops = &clk_branch2_ops,
+		},
+	},
+};
+
+static struct clk_branch gcc_qmip_gpu_cfg_ahb_clk = {
+	.halt_reg = 0x36040,
+	.halt_check = BRANCH_HALT_VOTED,
+	.hwcg_reg = 0x36040,
+	.hwcg_bit = 1,
+	.clkr = {
+		.enable_reg = 0x7900c,
+		.enable_mask = BIT(4),
+		.hw.init = &(struct clk_init_data){
+			.name = "gcc_qmip_gpu_cfg_ahb_clk",
+			.ops = &clk_branch2_ops,
+		},
+	},
+};
+
+static struct clk_branch gcc_qmip_video_vcodec_ahb_clk = {
+	.halt_reg = 0x17010,
+	.halt_check = BRANCH_HALT_VOTED,
+	.hwcg_reg = 0x17010,
+	.hwcg_bit = 1,
+	.clkr = {
+		.enable_reg = 0x79004,
+		.enable_mask = BIT(25),
+		.hw.init = &(struct clk_init_data){
+			.name = "gcc_qmip_video_vcodec_ahb_clk",
+			.ops = &clk_branch2_ops,
+		},
+	},
+};
+
+static struct clk_branch gcc_qupv3_wrap0_core_2x_clk = {
+	.halt_reg = 0x1f014,
+	.halt_check = BRANCH_HALT_VOTED,
+	.clkr = {
+		.enable_reg = 0x7900c,
+		.enable_mask = BIT(9),
+		.hw.init = &(struct clk_init_data){
+			.name = "gcc_qupv3_wrap0_core_2x_clk",
+			.ops = &clk_branch2_ops,
+		},
+	},
+};
+
+static struct clk_branch gcc_qupv3_wrap0_core_clk = {
+	.halt_reg = 0x1f00c,
+	.halt_check = BRANCH_HALT_VOTED,
+	.clkr = {
+		.enable_reg = 0x7900c,
+		.enable_mask = BIT(8),
+		.hw.init = &(struct clk_init_data){
+			.name = "gcc_qupv3_wrap0_core_clk",
+			.ops = &clk_branch2_ops,
+		},
+	},
+};
+
+static struct clk_branch gcc_qupv3_wrap0_s0_clk = {
+	.halt_reg = 0x1f144,
+	.halt_check = BRANCH_HALT_VOTED,
+	.clkr = {
+		.enable_reg = 0x7900c,
+		.enable_mask = BIT(10),
+		.hw.init = &(struct clk_init_data){
+			.name = "gcc_qupv3_wrap0_s0_clk",
+			.parent_hws = (const struct clk_hw *[]){
+				&gcc_qupv3_wrap0_s0_clk_src.clkr.hw,
+			},
+			.num_parents = 1,
+			.flags = CLK_SET_RATE_PARENT,
+			.ops = &clk_branch2_ops,
+		},
+	},
+};
+
+static struct clk_branch gcc_qupv3_wrap0_s1_clk = {
+	.halt_reg = 0x1f274,
+	.halt_check = BRANCH_HALT_VOTED,
+	.clkr = {
+		.enable_reg = 0x7900c,
+		.enable_mask = BIT(11),
+		.hw.init = &(struct clk_init_data){
+			.name = "gcc_qupv3_wrap0_s1_clk",
+			.parent_hws = (const struct clk_hw *[]){
+				&gcc_qupv3_wrap0_s1_clk_src.clkr.hw,
+			},
+			.num_parents = 1,
+			.flags = CLK_SET_RATE_PARENT,
+			.ops = &clk_branch2_ops,
+		},
+	},
+};
+
+static struct clk_branch gcc_qupv3_wrap0_s2_clk = {
+	.halt_reg = 0x1f3a4,
+	.halt_check = BRANCH_HALT_VOTED,
+	.clkr = {
+		.enable_reg = 0x7900c,
+		.enable_mask = BIT(12),
+		.hw.init = &(struct clk_init_data){
+			.name = "gcc_qupv3_wrap0_s2_clk",
+			.parent_hws = (const struct clk_hw *[]){
+				&gcc_qupv3_wrap0_s2_clk_src.clkr.hw,
+			},
+			.num_parents = 1,
+			.flags = CLK_SET_RATE_PARENT,
+			.ops = &clk_branch2_ops,
+		},
+	},
+};
+
+static struct clk_branch gcc_qupv3_wrap0_s3_clk = {
+	.halt_reg = 0x1f4d4,
+	.halt_check = BRANCH_HALT_VOTED,
+	.clkr = {
+		.enable_reg = 0x7900c,
+		.enable_mask = BIT(13),
+		.hw.init = &(struct clk_init_data){
+			.name = "gcc_qupv3_wrap0_s3_clk",
+			.parent_hws = (const struct clk_hw *[]){
+				&gcc_qupv3_wrap0_s3_clk_src.clkr.hw,
+			},
+			.num_parents = 1,
+			.flags = CLK_SET_RATE_PARENT,
+			.ops = &clk_branch2_ops,
+		},
+	},
+};
+
+static struct clk_branch gcc_qupv3_wrap0_s4_clk = {
+	.halt_reg = 0x1f604,
+	.halt_check = BRANCH_HALT_VOTED,
+	.clkr = {
+		.enable_reg = 0x7900c,
+		.enable_mask = BIT(14),
+		.hw.init = &(struct clk_init_data){
+			.name = "gcc_qupv3_wrap0_s4_clk",
+			.parent_hws = (const struct clk_hw *[]){
+				&gcc_qupv3_wrap0_s4_clk_src.clkr.hw,
+			},
+			.num_parents = 1,
+			.flags = CLK_SET_RATE_PARENT,
+			.ops = &clk_branch2_ops,
+		},
+	},
+};
+
+static struct clk_branch gcc_qupv3_wrap0_s5_clk = {
+	.halt_reg = 0x1f734,
+	.halt_check = BRANCH_HALT_VOTED,
+	.clkr = {
+		.enable_reg = 0x7900c,
+		.enable_mask = BIT(15),
+		.hw.init = &(struct clk_init_data){
+			.name = "gcc_qupv3_wrap0_s5_clk",
+			.parent_hws = (const struct clk_hw *[]){
+				&gcc_qupv3_wrap0_s5_clk_src.clkr.hw,
+			},
+			.num_parents = 1,
+			.flags = CLK_SET_RATE_PARENT,
+			.ops = &clk_branch2_ops,
+		},
+	},
+};
+
+static struct clk_branch gcc_qupv3_wrap_0_m_ahb_clk = {
+	.halt_reg = 0x1f004,
+	.halt_check = BRANCH_HALT_VOTED,
+	.hwcg_reg = 0x1f004,
+	.hwcg_bit = 1,
+	.clkr = {
+		.enable_reg = 0x7900c,
+		.enable_mask = BIT(6),
+		.hw.init = &(struct clk_init_data){
+			.name = "gcc_qupv3_wrap_0_m_ahb_clk",
+			.ops = &clk_branch2_ops,
+		},
+	},
+};
+
+static struct clk_branch gcc_qupv3_wrap_0_s_ahb_clk = {
+	.halt_reg = 0x1f008,
+	.halt_check = BRANCH_HALT_VOTED,
+	.hwcg_reg = 0x1f008,
+	.hwcg_bit = 1,
+	.clkr = {
+		.enable_reg = 0x7900c,
+		.enable_mask = BIT(7),
+		.hw.init = &(struct clk_init_data){
+			.name = "gcc_qupv3_wrap_0_s_ahb_clk",
+			.ops = &clk_branch2_ops,
+		},
+	},
+};
+
+static struct clk_branch gcc_sdcc1_ahb_clk = {
+	.halt_reg = 0x38008,
+	.halt_check = BRANCH_HALT,
+	.clkr = {
+		.enable_reg = 0x38008,
+		.enable_mask = BIT(0),
+		.hw.init = &(struct clk_init_data){
+			.name = "gcc_sdcc1_ahb_clk",
+			.ops = &clk_branch2_ops,
+		},
+	},
+};
+
+static struct clk_branch gcc_sdcc1_apps_clk = {
+	.halt_reg = 0x38004,
+	.halt_check = BRANCH_HALT,
+	.clkr = {
+		.enable_reg = 0x38004,
+		.enable_mask = BIT(0),
+		.hw.init = &(struct clk_init_data){
+			.name = "gcc_sdcc1_apps_clk",
+			.parent_hws = (const struct clk_hw *[]){
+				&gcc_sdcc1_apps_clk_src.clkr.hw,
+			},
+			.num_parents = 1,
+			.flags = CLK_SET_RATE_PARENT /* | CLK_ENABLE_HAND_OFF */,
+			.ops = &clk_branch2_ops,
+		},
+	},
+};
+
+static struct clk_branch gcc_sdcc1_ice_core_clk = {
+	.halt_reg = 0x3800c,
+	.halt_check = BRANCH_HALT,
+	.hwcg_reg = 0x3800c,
+	.hwcg_bit = 1,
+	.clkr = {
+		.enable_reg = 0x3800c,
+		.enable_mask = BIT(0),
+		.hw.init = &(struct clk_init_data){
+			.name = "gcc_sdcc1_ice_core_clk",
+			.parent_hws = (const struct clk_hw *[]){
+				&gcc_sdcc1_ice_core_clk_src.clkr.hw,
+			},
+			.num_parents = 1,
+			.flags = CLK_SET_RATE_PARENT,
+			.ops = &clk_branch2_ops,
+		},
+	},
+};
+
+static struct clk_branch gcc_sdcc2_ahb_clk = {
+	.halt_reg = 0x1e008,
+	.halt_check = BRANCH_HALT,
+	.clkr = {
+		.enable_reg = 0x1e008,
+		.enable_mask = BIT(0),
+		.hw.init = &(struct clk_init_data){
+			.name = "gcc_sdcc2_ahb_clk",
+			.ops = &clk_branch2_ops,
+		},
+	},
+};
+
+static struct clk_branch gcc_sdcc2_apps_clk = {
+	.halt_reg = 0x1e004,
+	.halt_check = BRANCH_HALT,
+	.clkr = {
+		.enable_reg = 0x1e004,
+		.enable_mask = BIT(0),
+		.hw.init = &(struct clk_init_data){
+			.name = "gcc_sdcc2_apps_clk",
+			.parent_hws = (const struct clk_hw *[]){
+				&gcc_sdcc2_apps_clk_src.clkr.hw,
+			},
+			.num_parents = 1,
+			.flags = CLK_SET_RATE_PARENT,
+			.ops = &clk_branch2_ops,
+		},
+	},
+};
+
+static struct clk_branch gcc_sys_noc_cpuss_ahb_clk = {
+	.halt_reg = 0x2b06c,
+	.halt_check = BRANCH_HALT_VOTED,
+	.hwcg_reg = 0x2b06c,
+	.hwcg_bit = 1,
+	.clkr = {
+		.enable_reg = 0x79004,
+		.enable_mask = BIT(0),
+		.hw.init = &(struct clk_init_data){
+			.name = "gcc_sys_noc_cpuss_ahb_clk",
+			.flags = CLK_IS_CRITICAL,
+			.ops = &clk_branch2_ops,
+		},
+	},
+};
+
+static struct clk_branch gcc_sys_noc_ufs_phy_axi_clk = {
+	.halt_reg = 0x45098,
+	.halt_check = BRANCH_HALT,
+	.clkr = {
+		.enable_reg = 0x45098,
+		.enable_mask = BIT(0),
+		.hw.init = &(struct clk_init_data){
+			.name = "gcc_sys_noc_ufs_phy_axi_clk",
+			.parent_hws = (const struct clk_hw *[]){
+				&gcc_ufs_phy_axi_clk_src.clkr.hw,
+			},
+			.num_parents = 1,
+			.flags = CLK_SET_RATE_PARENT,
+			.ops = &clk_branch2_ops,
+		},
+	},
+};
+
+static struct clk_branch gcc_sys_noc_usb3_prim_axi_clk = {
+	.halt_reg = 0x1a080,
+	.halt_check = BRANCH_HALT,
+	.hwcg_reg = 0x1a080,
+	.hwcg_bit = 1,
+	.clkr = {
+		.enable_reg = 0x1a080,
+		.enable_mask = BIT(0),
+		.hw.init = &(struct clk_init_data){
+			.name = "gcc_sys_noc_usb3_prim_axi_clk",
+			.parent_hws = (const struct clk_hw *[]){
+				&gcc_usb30_prim_master_clk_src.clkr.hw,
+			},
+			.num_parents = 1,
+			.flags = CLK_SET_RATE_PARENT,
+			.ops = &clk_branch2_ops,
+		},
+	},
+};
+
+static struct clk_branch gcc_ufs_clkref_clk = {
+	.halt_reg = 0x8c000,
+	.halt_check = BRANCH_HALT,
+	.clkr = {
+		.enable_reg = 0x8c000,
+		.enable_mask = BIT(0),
+		.hw.init = &(struct clk_init_data){
+			.name = "gcc_ufs_clkref_clk",
+			.ops = &clk_branch2_ops,
+		},
+	},
+};
+
+static struct clk_branch gcc_ufs_phy_ahb_clk = {
+	.halt_reg = 0x45014,
+	.halt_check = BRANCH_HALT,
+	.hwcg_reg = 0x45014,
+	.hwcg_bit = 1,
+	.clkr = {
+		.enable_reg = 0x45014,
+		.enable_mask = BIT(0),
+		.hw.init = &(struct clk_init_data){
+			.name = "gcc_ufs_phy_ahb_clk",
+			.ops = &clk_branch2_ops,
+		},
+	},
+};
+
+static struct clk_branch gcc_ufs_phy_axi_clk = {
+	.halt_reg = 0x45010,
+	.halt_check = BRANCH_HALT,
+	.hwcg_reg = 0x45010,
+	.hwcg_bit = 1,
+	.clkr = {
+		.enable_reg = 0x45010,
+		.enable_mask = BIT(0),
+		.hw.init = &(struct clk_init_data){
+			.name = "gcc_ufs_phy_axi_clk",
+			.parent_hws = (const struct clk_hw *[]){
+				&gcc_ufs_phy_axi_clk_src.clkr.hw,
+			},
+			.num_parents = 1,
+			.flags = CLK_SET_RATE_PARENT,
+			.ops = &clk_branch2_ops,
+		},
+	},
+};
+
+static struct clk_branch gcc_ufs_phy_ice_core_clk = {
+	.halt_reg = 0x45044,
+	.halt_check = BRANCH_HALT,
+	.hwcg_reg = 0x45044,
+	.hwcg_bit = 1,
+	.clkr = {
+		.enable_reg = 0x45044,
+		.enable_mask = BIT(0),
+		.hw.init = &(struct clk_init_data){
+			.name = "gcc_ufs_phy_ice_core_clk",
+			.parent_hws = (const struct clk_hw *[]){
+				&gcc_ufs_phy_ice_core_clk_src.clkr.hw,
+			},
+			.num_parents = 1,
+			.flags = CLK_SET_RATE_PARENT,
+			.ops = &clk_branch2_ops,
+		},
+	},
+};
+
+static struct clk_branch gcc_ufs_phy_phy_aux_clk = {
+	.halt_reg = 0x45078,
+	.halt_check = BRANCH_HALT,
+	.hwcg_reg = 0x45078,
+	.hwcg_bit = 1,
+	.clkr = {
+		.enable_reg = 0x45078,
+		.enable_mask = BIT(0),
+		.hw.init = &(struct clk_init_data){
+			.name = "gcc_ufs_phy_phy_aux_clk",
+			.parent_hws = (const struct clk_hw *[]){
+				&gcc_ufs_phy_phy_aux_clk_src.clkr.hw,
+			},
+			.num_parents = 1,
+			.flags = CLK_SET_RATE_PARENT,
+			.ops = &clk_branch2_ops,
+		},
+	},
+};
+
+static struct clk_branch gcc_ufs_phy_rx_symbol_0_clk = {
+	.halt_reg = 0x4501c,
+	.halt_check = BRANCH_HALT_SKIP,
+	.clkr = {
+		.enable_reg = 0x4501c,
+		.enable_mask = BIT(0),
+		.hw.init = &(struct clk_init_data){
+			.name = "gcc_ufs_phy_rx_symbol_0_clk",
+			.ops = &clk_branch2_ops,
+		},
+	},
+};
+
+static struct clk_branch gcc_ufs_phy_tx_symbol_0_clk = {
+	.halt_reg = 0x45018,
+	.halt_check = BRANCH_HALT_SKIP,
+	.clkr = {
+		.enable_reg = 0x45018,
+		.enable_mask = BIT(0),
+		.hw.init = &(struct clk_init_data){
+			.name = "gcc_ufs_phy_tx_symbol_0_clk",
+			.ops = &clk_branch2_ops,
+		},
+	},
+};
+
+static struct clk_branch gcc_ufs_phy_unipro_core_clk = {
+	.halt_reg = 0x45040,
+	.halt_check = BRANCH_HALT,
+	.hwcg_reg = 0x45040,
+	.hwcg_bit = 1,
+	.clkr = {
+		.enable_reg = 0x45040,
+		.enable_mask = BIT(0),
+		.hw.init = &(struct clk_init_data){
+			.name = "gcc_ufs_phy_unipro_core_clk",
+			.parent_hws = (const struct clk_hw *[]){
+				&gcc_ufs_phy_unipro_core_clk_src.clkr.hw,
+			},
+			.num_parents = 1,
+			.flags = CLK_SET_RATE_PARENT,
+			.ops = &clk_branch2_ops,
+		},
+	},
+};
+
+static struct clk_branch gcc_usb30_prim_master_clk = {
+	.halt_reg = 0x1a010,
+	.halt_check = BRANCH_HALT,
+	.clkr = {
+		.enable_reg = 0x1a010,
+		.enable_mask = BIT(0),
+		.hw.init = &(struct clk_init_data){
+			.name = "gcc_usb30_prim_master_clk",
+			.parent_hws = (const struct clk_hw *[]){
+				&gcc_usb30_prim_master_clk_src.clkr.hw,
+			},
+			.num_parents = 1,
+			.flags = CLK_SET_RATE_PARENT,
+			.ops = &clk_branch2_ops,
+		},
+	},
+};
+
+static struct clk_branch gcc_usb30_prim_mock_utmi_clk = {
+	.halt_reg = 0x1a018,
+	.halt_check = BRANCH_HALT,
+	.clkr = {
+		.enable_reg = 0x1a018,
+		.enable_mask = BIT(0),
+		.hw.init = &(struct clk_init_data){
+			.name = "gcc_usb30_prim_mock_utmi_clk",
+			.parent_hws = (const struct clk_hw *[]){
+				&gcc_usb30_prim_mock_utmi_postdiv_clk_src.clkr.hw,
+			},
+			.num_parents = 1,
+			.flags = CLK_SET_RATE_PARENT,
+			.ops = &clk_branch2_ops,
+		},
+	},
+};
+
+static struct clk_branch gcc_usb30_prim_sleep_clk = {
+	.halt_reg = 0x1a014,
+	.halt_check = BRANCH_HALT,
+	.clkr = {
+		.enable_reg = 0x1a014,
+		.enable_mask = BIT(0),
+		.hw.init = &(struct clk_init_data){
+			.name = "gcc_usb30_prim_sleep_clk",
+			.ops = &clk_branch2_ops,
+		},
+	},
+};
+
+static struct clk_branch gcc_usb3_prim_clkref_clk = {
+	.halt_reg = 0x9f000,
+	.halt_check = BRANCH_HALT,
+	.clkr = {
+		.enable_reg = 0x9f000,
+		.enable_mask = BIT(0),
+		.hw.init = &(struct clk_init_data){
+			.name = "gcc_usb3_prim_clkref_clk",
+			.ops = &clk_branch2_ops,
+		},
+	},
+};
+
+static struct clk_branch gcc_usb3_prim_phy_com_aux_clk = {
+	.halt_reg = 0x1a054,
+	.halt_check = BRANCH_HALT,
+	.clkr = {
+		.enable_reg = 0x1a054,
+		.enable_mask = BIT(0),
+		.hw.init = &(struct clk_init_data){
+			.name = "gcc_usb3_prim_phy_com_aux_clk",
+			.parent_hws = (const struct clk_hw *[]){
+				&gcc_usb3_prim_phy_aux_clk_src.clkr.hw,
+			},
+			.num_parents = 1,
+			.flags = CLK_SET_RATE_PARENT,
+			.ops = &clk_branch2_ops,
+		},
+	},
+};
+
+static struct clk_branch gcc_usb3_prim_phy_pipe_clk = {
+	.halt_reg = 0x1a058,
+	.halt_check = BRANCH_HALT_SKIP,
+	.hwcg_reg = 0x1a058,
+	.hwcg_bit = 1,
+	.clkr = {
+		.enable_reg = 0x1a058,
+		.enable_mask = BIT(0),
+		.hw.init = &(struct clk_init_data){
+			.name = "gcc_usb3_prim_phy_pipe_clk",
+			.ops = &clk_branch2_ops,
+		},
+	},
+};
+
+static struct clk_branch gcc_vcodec0_axi_clk = {
+	.halt_reg = 0x6e008,
+	.halt_check = BRANCH_HALT,
+	.clkr = {
+		.enable_reg = 0x6e008,
+		.enable_mask = BIT(0),
+		.hw.init = &(struct clk_init_data){
+			.name = "gcc_vcodec0_axi_clk",
+			.ops = &clk_branch2_ops,
+		},
+	},
+};
+
+static struct clk_branch gcc_venus_ahb_clk = {
+	.halt_reg = 0x6e010,
+	.halt_check = BRANCH_HALT,
+	.clkr = {
+		.enable_reg = 0x6e010,
+		.enable_mask = BIT(0),
+		.hw.init = &(struct clk_init_data){
+			.name = "gcc_venus_ahb_clk",
+			.ops = &clk_branch2_ops,
+		},
+	},
+};
+
+static struct clk_branch gcc_venus_ctl_axi_clk = {
+	.halt_reg = 0x6e004,
+	.halt_check = BRANCH_HALT,
+	.clkr = {
+		.enable_reg = 0x6e004,
+		.enable_mask = BIT(0),
+		.hw.init = &(struct clk_init_data){
+			.name = "gcc_venus_ctl_axi_clk",
+			.ops = &clk_branch2_ops,
+		},
+	},
+};
+
+static struct clk_branch gcc_video_ahb_clk = {
+	.halt_reg = 0x17004,
+	.halt_check = BRANCH_HALT,
+	.hwcg_reg = 0x17004,
+	.hwcg_bit = 1,
+	.clkr = {
+		.enable_reg = 0x17004,
+		.enable_mask = BIT(0),
+		.hw.init = &(struct clk_init_data){
+			.name = "gcc_video_ahb_clk",
+			.ops = &clk_branch2_ops,
+		},
+	},
+};
+
+static struct clk_branch gcc_video_axi0_clk = {
+	.halt_reg = 0x1701c,
+	.halt_check = BRANCH_HALT,
+	.hwcg_reg = 0x1701c,
+	.hwcg_bit = 1,
+	.clkr = {
+		.enable_reg = 0x1701c,
+		.enable_mask = BIT(0),
+		.hw.init = &(struct clk_init_data){
+			.name = "gcc_video_axi0_clk",
+			.ops = &clk_branch2_ops,
+		},
+	},
+};
+
+static struct clk_branch gcc_video_throttle_core_clk = {
+	.halt_reg = 0x17068,
+	.halt_check = BRANCH_HALT_VOTED,
+	.hwcg_reg = 0x17068,
+	.hwcg_bit = 1,
+	.clkr = {
+		.enable_reg = 0x79004,
+		.enable_mask = BIT(28),
+		.hw.init = &(struct clk_init_data){
+			.name = "gcc_video_throttle_core_clk",
+			.ops = &clk_branch2_ops,
+		},
+	},
+};
+
+static struct clk_branch gcc_video_vcodec0_sys_clk = {
+	.halt_reg = 0x580a4,
+	.halt_check = BRANCH_HALT_DELAY,
+	.hwcg_reg = 0x580a4,
+	.hwcg_bit = 1,
+	.clkr = {
+		.enable_reg = 0x580a4,
+		.enable_mask = BIT(0),
+		.hw.init = &(struct clk_init_data){
+			.name = "gcc_video_vcodec0_sys_clk",
+			.parent_hws = (const struct clk_hw *[]){
+				&gcc_video_venus_clk_src.clkr.hw,
+			},
+			.num_parents = 1,
+			.flags = CLK_SET_RATE_PARENT,
+			.ops = &clk_branch2_ops,
+		},
+	},
+};
+
+static struct clk_branch gcc_video_venus_ctl_clk = {
+	.halt_reg = 0x5808c,
+	.halt_check = BRANCH_HALT,
+	.clkr = {
+		.enable_reg = 0x5808c,
+		.enable_mask = BIT(0),
+		.hw.init = &(struct clk_init_data){
+			.name = "gcc_video_venus_ctl_clk",
+			.parent_hws = (const struct clk_hw *[]){
+				&gcc_video_venus_clk_src.clkr.hw,
+			},
+			.num_parents = 1,
+			.flags = CLK_SET_RATE_PARENT,
+			.ops = &clk_branch2_ops,
+		},
+	},
+};
+
+static struct clk_branch gcc_video_xo_clk = {
+	.halt_reg = 0x17024,
+	.halt_check = BRANCH_HALT,
+	.clkr = {
+		.enable_reg = 0x17024,
+		.enable_mask = BIT(0),
+		.hw.init = &(struct clk_init_data){
+			.name = "gcc_video_xo_clk",
+			.ops = &clk_branch2_ops,
+		},
+	},
+};
+
+static struct gdsc gcc_camss_top_gdsc = {
+	.gdscr = 0x58004,
+	.pd = {
+		.name = "gcc_camss_top",
+	},
+	.pwrsts = PWRSTS_OFF_ON,
+};
+
+static struct gdsc gcc_ufs_phy_gdsc = {
+	.gdscr = 0x45004,
+	.pd = {
+		.name = "gcc_ufs_phy",
+	},
+	.pwrsts = PWRSTS_OFF_ON,
+};
+
+static struct gdsc gcc_usb30_prim_gdsc = {
+	.gdscr = 0x1a004,
+	.pd = {
+		.name = "gcc_usb30_prim",
+	},
+	.pwrsts = PWRSTS_OFF_ON,
+};
+
+static struct gdsc gcc_vcodec0_gdsc = {
+	.gdscr = 0x58098,
+	.pd = {
+		.name = "gcc_vcodec0",
+	},
+	.pwrsts = PWRSTS_OFF_ON,
+};
+
+static struct gdsc gcc_venus_gdsc = {
+	.gdscr = 0x5807c,
+	.pd = {
+		.name = "gcc_venus",
+	},
+	.pwrsts = PWRSTS_OFF_ON,
+};
+
+static struct gdsc hlos1_vote_turing_mmu_tbu1_gdsc = {
+	.gdscr = 0x7d060,
+	.pd = {
+		.name = "hlos1_vote_turing_mmu_tbu1",
+	},
+	.pwrsts = PWRSTS_OFF_ON,
+	.flags = VOTABLE,
+};
+
+static struct gdsc hlos1_vote_turing_mmu_tbu0_gdsc = {
+	.gdscr = 0x7d060,
+	.pd = {
+		.name = "hlos1_vote_turing_mmu_tbu0",
+	},
+	.pwrsts = PWRSTS_OFF_ON,
+	.flags = VOTABLE,
+};
+
+static struct gdsc hlos1_vote_mm_snoc_mmu_tbu_rt_gdsc = {
+	.gdscr = 0x7d074,
+	.pd = {
+		.name = "hlos1_vote_mm_snoc_mmu_tbu_rt",
+	},
+	.pwrsts = PWRSTS_OFF_ON,
+	.flags = VOTABLE,
+};
+
+static struct gdsc hlos1_vote_mm_snoc_mmu_tbu_nrt_gdsc = {
+	.gdscr = 0x7d078,
+	.pd = {
+		.name = "hlos1_vote_mm_snoc_mmu_tbu_nrt",
+	},
+	.pwrsts = PWRSTS_OFF_ON,
+	.flags = VOTABLE,
+};
+
+static struct clk_regmap *gcc_sm6115_clocks[] = {
+	[GCC_AHB2PHY_CSI_CLK] = &gcc_ahb2phy_csi_clk.clkr,
+	[GCC_AHB2PHY_USB_CLK] = &gcc_ahb2phy_usb_clk.clkr,
+	[GCC_BIMC_GPU_AXI_CLK] = &gcc_bimc_gpu_axi_clk.clkr,
+	[GCC_BOOT_ROM_AHB_CLK] = &gcc_boot_rom_ahb_clk.clkr,
+	[GCC_CAM_THROTTLE_NRT_CLK] = &gcc_cam_throttle_nrt_clk.clkr,
+	[GCC_CAM_THROTTLE_RT_CLK] = &gcc_cam_throttle_rt_clk.clkr,
+	[GCC_CAMERA_AHB_CLK] = &gcc_camera_ahb_clk.clkr,
+	[GCC_CAMERA_XO_CLK] = &gcc_camera_xo_clk.clkr,
+	[GCC_CAMSS_AXI_CLK] = &gcc_camss_axi_clk.clkr,
+	[GCC_CAMSS_AXI_CLK_SRC] = &gcc_camss_axi_clk_src.clkr,
+	[GCC_CAMSS_CAMNOC_ATB_CLK] = &gcc_camss_camnoc_atb_clk.clkr,
+	[GCC_CAMSS_CAMNOC_NTS_XO_CLK] = &gcc_camss_camnoc_nts_xo_clk.clkr,
+	[GCC_CAMSS_CCI_0_CLK] = &gcc_camss_cci_0_clk.clkr,
+	[GCC_CAMSS_CCI_CLK_SRC] = &gcc_camss_cci_clk_src.clkr,
+	[GCC_CAMSS_CPHY_0_CLK] = &gcc_camss_cphy_0_clk.clkr,
+	[GCC_CAMSS_CPHY_1_CLK] = &gcc_camss_cphy_1_clk.clkr,
+	[GCC_CAMSS_CPHY_2_CLK] = &gcc_camss_cphy_2_clk.clkr,
+	[GCC_CAMSS_CSI0PHYTIMER_CLK] = &gcc_camss_csi0phytimer_clk.clkr,
+	[GCC_CAMSS_CSI0PHYTIMER_CLK_SRC] = &gcc_camss_csi0phytimer_clk_src.clkr,
+	[GCC_CAMSS_CSI1PHYTIMER_CLK] = &gcc_camss_csi1phytimer_clk.clkr,
+	[GCC_CAMSS_CSI1PHYTIMER_CLK_SRC] = &gcc_camss_csi1phytimer_clk_src.clkr,
+	[GCC_CAMSS_CSI2PHYTIMER_CLK] = &gcc_camss_csi2phytimer_clk.clkr,
+	[GCC_CAMSS_CSI2PHYTIMER_CLK_SRC] = &gcc_camss_csi2phytimer_clk_src.clkr,
+	[GCC_CAMSS_MCLK0_CLK] = &gcc_camss_mclk0_clk.clkr,
+	[GCC_CAMSS_MCLK0_CLK_SRC] = &gcc_camss_mclk0_clk_src.clkr,
+	[GCC_CAMSS_MCLK1_CLK] = &gcc_camss_mclk1_clk.clkr,
+	[GCC_CAMSS_MCLK1_CLK_SRC] = &gcc_camss_mclk1_clk_src.clkr,
+	[GCC_CAMSS_MCLK2_CLK] = &gcc_camss_mclk2_clk.clkr,
+	[GCC_CAMSS_MCLK2_CLK_SRC] = &gcc_camss_mclk2_clk_src.clkr,
+	[GCC_CAMSS_MCLK3_CLK] = &gcc_camss_mclk3_clk.clkr,
+	[GCC_CAMSS_MCLK3_CLK_SRC] = &gcc_camss_mclk3_clk_src.clkr,
+	[GCC_CAMSS_NRT_AXI_CLK] = &gcc_camss_nrt_axi_clk.clkr,
+	[GCC_CAMSS_OPE_AHB_CLK] = &gcc_camss_ope_ahb_clk.clkr,
+	[GCC_CAMSS_OPE_AHB_CLK_SRC] = &gcc_camss_ope_ahb_clk_src.clkr,
+	[GCC_CAMSS_OPE_CLK] = &gcc_camss_ope_clk.clkr,
+	[GCC_CAMSS_OPE_CLK_SRC] = &gcc_camss_ope_clk_src.clkr,
+	[GCC_CAMSS_RT_AXI_CLK] = &gcc_camss_rt_axi_clk.clkr,
+	[GCC_CAMSS_TFE_0_CLK] = &gcc_camss_tfe_0_clk.clkr,
+	[GCC_CAMSS_TFE_0_CLK_SRC] = &gcc_camss_tfe_0_clk_src.clkr,
+	[GCC_CAMSS_TFE_0_CPHY_RX_CLK] = &gcc_camss_tfe_0_cphy_rx_clk.clkr,
+	[GCC_CAMSS_TFE_0_CSID_CLK] = &gcc_camss_tfe_0_csid_clk.clkr,
+	[GCC_CAMSS_TFE_0_CSID_CLK_SRC] = &gcc_camss_tfe_0_csid_clk_src.clkr,
+	[GCC_CAMSS_TFE_1_CLK] = &gcc_camss_tfe_1_clk.clkr,
+	[GCC_CAMSS_TFE_1_CLK_SRC] = &gcc_camss_tfe_1_clk_src.clkr,
+	[GCC_CAMSS_TFE_1_CPHY_RX_CLK] = &gcc_camss_tfe_1_cphy_rx_clk.clkr,
+	[GCC_CAMSS_TFE_1_CSID_CLK] = &gcc_camss_tfe_1_csid_clk.clkr,
+	[GCC_CAMSS_TFE_1_CSID_CLK_SRC] = &gcc_camss_tfe_1_csid_clk_src.clkr,
+	[GCC_CAMSS_TFE_2_CLK] = &gcc_camss_tfe_2_clk.clkr,
+	[GCC_CAMSS_TFE_2_CLK_SRC] = &gcc_camss_tfe_2_clk_src.clkr,
+	[GCC_CAMSS_TFE_2_CPHY_RX_CLK] = &gcc_camss_tfe_2_cphy_rx_clk.clkr,
+	[GCC_CAMSS_TFE_2_CSID_CLK] = &gcc_camss_tfe_2_csid_clk.clkr,
+	[GCC_CAMSS_TFE_2_CSID_CLK_SRC] = &gcc_camss_tfe_2_csid_clk_src.clkr,
+	[GCC_CAMSS_TFE_CPHY_RX_CLK_SRC] = &gcc_camss_tfe_cphy_rx_clk_src.clkr,
+	[GCC_CAMSS_TOP_AHB_CLK] = &gcc_camss_top_ahb_clk.clkr,
+	[GCC_CAMSS_TOP_AHB_CLK_SRC] = &gcc_camss_top_ahb_clk_src.clkr,
+	[GCC_CFG_NOC_USB3_PRIM_AXI_CLK] = &gcc_cfg_noc_usb3_prim_axi_clk.clkr,
+	[GCC_CPUSS_GNOC_CLK] = &gcc_cpuss_gnoc_clk.clkr,
+	[GCC_DISP_AHB_CLK] = &gcc_disp_ahb_clk.clkr,
+	[GCC_DISP_GPLL0_CLK_SRC] = &gcc_disp_gpll0_clk_src.clkr,
+	[GCC_DISP_GPLL0_DIV_CLK_SRC] = &gcc_disp_gpll0_div_clk_src.clkr,
+	[GCC_DISP_HF_AXI_CLK] = &gcc_disp_hf_axi_clk.clkr,
+	[GCC_DISP_THROTTLE_CORE_CLK] = &gcc_disp_throttle_core_clk.clkr,
+	[GCC_DISP_XO_CLK] = &gcc_disp_xo_clk.clkr,
+	[GCC_GP1_CLK] = &gcc_gp1_clk.clkr,
+	[GCC_GP1_CLK_SRC] = &gcc_gp1_clk_src.clkr,
+	[GCC_GP2_CLK] = &gcc_gp2_clk.clkr,
+	[GCC_GP2_CLK_SRC] = &gcc_gp2_clk_src.clkr,
+	[GCC_GP3_CLK] = &gcc_gp3_clk.clkr,
+	[GCC_GP3_CLK_SRC] = &gcc_gp3_clk_src.clkr,
+	[GCC_GPU_CFG_AHB_CLK] = &gcc_gpu_cfg_ahb_clk.clkr,
+	[GCC_GPU_GPLL0_CLK_SRC] = &gcc_gpu_gpll0_clk_src.clkr,
+	[GCC_GPU_GPLL0_DIV_CLK_SRC] = &gcc_gpu_gpll0_div_clk_src.clkr,
+	[GCC_GPU_IREF_CLK] = &gcc_gpu_iref_clk.clkr,
+	[GCC_GPU_MEMNOC_GFX_CLK] = &gcc_gpu_memnoc_gfx_clk.clkr,
+	[GCC_GPU_SNOC_DVM_GFX_CLK] = &gcc_gpu_snoc_dvm_gfx_clk.clkr,
+	[GCC_GPU_THROTTLE_CORE_CLK] = &gcc_gpu_throttle_core_clk.clkr,
+	[GCC_PDM2_CLK] = &gcc_pdm2_clk.clkr,
+	[GCC_PDM2_CLK_SRC] = &gcc_pdm2_clk_src.clkr,
+	[GCC_PDM_AHB_CLK] = &gcc_pdm_ahb_clk.clkr,
+	[GCC_PDM_XO4_CLK] = &gcc_pdm_xo4_clk.clkr,
+	[GCC_PRNG_AHB_CLK] = &gcc_prng_ahb_clk.clkr,
+	[GCC_QMIP_CAMERA_NRT_AHB_CLK] = &gcc_qmip_camera_nrt_ahb_clk.clkr,
+	[GCC_QMIP_CAMERA_RT_AHB_CLK] = &gcc_qmip_camera_rt_ahb_clk.clkr,
+	[GCC_QMIP_DISP_AHB_CLK] = &gcc_qmip_disp_ahb_clk.clkr,
+	[GCC_QMIP_GPU_CFG_AHB_CLK] = &gcc_qmip_gpu_cfg_ahb_clk.clkr,
+	[GCC_QMIP_VIDEO_VCODEC_AHB_CLK] = &gcc_qmip_video_vcodec_ahb_clk.clkr,
+	[GCC_QUPV3_WRAP0_CORE_2X_CLK] = &gcc_qupv3_wrap0_core_2x_clk.clkr,
+	[GCC_QUPV3_WRAP0_CORE_CLK] = &gcc_qupv3_wrap0_core_clk.clkr,
+	[GCC_QUPV3_WRAP0_S0_CLK] = &gcc_qupv3_wrap0_s0_clk.clkr,
+	[GCC_QUPV3_WRAP0_S0_CLK_SRC] = &gcc_qupv3_wrap0_s0_clk_src.clkr,
+	[GCC_QUPV3_WRAP0_S1_CLK] = &gcc_qupv3_wrap0_s1_clk.clkr,
+	[GCC_QUPV3_WRAP0_S1_CLK_SRC] = &gcc_qupv3_wrap0_s1_clk_src.clkr,
+	[GCC_QUPV3_WRAP0_S2_CLK] = &gcc_qupv3_wrap0_s2_clk.clkr,
+	[GCC_QUPV3_WRAP0_S2_CLK_SRC] = &gcc_qupv3_wrap0_s2_clk_src.clkr,
+	[GCC_QUPV3_WRAP0_S3_CLK] = &gcc_qupv3_wrap0_s3_clk.clkr,
+	[GCC_QUPV3_WRAP0_S3_CLK_SRC] = &gcc_qupv3_wrap0_s3_clk_src.clkr,
+	[GCC_QUPV3_WRAP0_S4_CLK] = &gcc_qupv3_wrap0_s4_clk.clkr,
+	[GCC_QUPV3_WRAP0_S4_CLK_SRC] = &gcc_qupv3_wrap0_s4_clk_src.clkr,
+	[GCC_QUPV3_WRAP0_S5_CLK] = &gcc_qupv3_wrap0_s5_clk.clkr,
+	[GCC_QUPV3_WRAP0_S5_CLK_SRC] = &gcc_qupv3_wrap0_s5_clk_src.clkr,
+	[GCC_QUPV3_WRAP_0_M_AHB_CLK] = &gcc_qupv3_wrap_0_m_ahb_clk.clkr,
+	[GCC_QUPV3_WRAP_0_S_AHB_CLK] = &gcc_qupv3_wrap_0_s_ahb_clk.clkr,
+	[GCC_SDCC1_AHB_CLK] = &gcc_sdcc1_ahb_clk.clkr,
+	[GCC_SDCC1_APPS_CLK] = &gcc_sdcc1_apps_clk.clkr,
+	[GCC_SDCC1_APPS_CLK_SRC] = &gcc_sdcc1_apps_clk_src.clkr,
+	[GCC_SDCC1_ICE_CORE_CLK] = &gcc_sdcc1_ice_core_clk.clkr,
+	[GCC_SDCC1_ICE_CORE_CLK_SRC] = &gcc_sdcc1_ice_core_clk_src.clkr,
+	[GCC_SDCC2_AHB_CLK] = &gcc_sdcc2_ahb_clk.clkr,
+	[GCC_SDCC2_APPS_CLK] = &gcc_sdcc2_apps_clk.clkr,
+	[GCC_SDCC2_APPS_CLK_SRC] = &gcc_sdcc2_apps_clk_src.clkr,
+	[GCC_SYS_NOC_CPUSS_AHB_CLK] = &gcc_sys_noc_cpuss_ahb_clk.clkr,
+	[GCC_SYS_NOC_UFS_PHY_AXI_CLK] = &gcc_sys_noc_ufs_phy_axi_clk.clkr,
+	[GCC_SYS_NOC_USB3_PRIM_AXI_CLK] = &gcc_sys_noc_usb3_prim_axi_clk.clkr,
+	[GCC_UFS_CLKREF_CLK] = &gcc_ufs_clkref_clk.clkr,
+	[GCC_UFS_PHY_AHB_CLK] = &gcc_ufs_phy_ahb_clk.clkr,
+	[GCC_UFS_PHY_AXI_CLK] = &gcc_ufs_phy_axi_clk.clkr,
+	[GCC_UFS_PHY_AXI_CLK_SRC] = &gcc_ufs_phy_axi_clk_src.clkr,
+	[GCC_UFS_PHY_ICE_CORE_CLK] = &gcc_ufs_phy_ice_core_clk.clkr,
+	[GCC_UFS_PHY_ICE_CORE_CLK_SRC] = &gcc_ufs_phy_ice_core_clk_src.clkr,
+	[GCC_UFS_PHY_PHY_AUX_CLK] = &gcc_ufs_phy_phy_aux_clk.clkr,
+	[GCC_UFS_PHY_PHY_AUX_CLK_SRC] = &gcc_ufs_phy_phy_aux_clk_src.clkr,
+	[GCC_UFS_PHY_RX_SYMBOL_0_CLK] = &gcc_ufs_phy_rx_symbol_0_clk.clkr,
+	[GCC_UFS_PHY_TX_SYMBOL_0_CLK] = &gcc_ufs_phy_tx_symbol_0_clk.clkr,
+	[GCC_UFS_PHY_UNIPRO_CORE_CLK] = &gcc_ufs_phy_unipro_core_clk.clkr,
+	[GCC_UFS_PHY_UNIPRO_CORE_CLK_SRC] =
+		&gcc_ufs_phy_unipro_core_clk_src.clkr,
+	[GCC_USB30_PRIM_MASTER_CLK] = &gcc_usb30_prim_master_clk.clkr,
+	[GCC_USB30_PRIM_MASTER_CLK_SRC] = &gcc_usb30_prim_master_clk_src.clkr,
+	[GCC_USB30_PRIM_MOCK_UTMI_CLK] = &gcc_usb30_prim_mock_utmi_clk.clkr,
+	[GCC_USB30_PRIM_MOCK_UTMI_CLK_SRC] =
+		&gcc_usb30_prim_mock_utmi_clk_src.clkr,
+	[GCC_USB30_PRIM_MOCK_UTMI_POSTDIV_CLK_SRC] =
+		&gcc_usb30_prim_mock_utmi_postdiv_clk_src.clkr,
+	[GCC_USB30_PRIM_SLEEP_CLK] = &gcc_usb30_prim_sleep_clk.clkr,
+	[GCC_USB3_PRIM_CLKREF_CLK] = &gcc_usb3_prim_clkref_clk.clkr,
+	[GCC_USB3_PRIM_PHY_AUX_CLK_SRC] = &gcc_usb3_prim_phy_aux_clk_src.clkr,
+	[GCC_USB3_PRIM_PHY_COM_AUX_CLK] = &gcc_usb3_prim_phy_com_aux_clk.clkr,
+	[GCC_USB3_PRIM_PHY_PIPE_CLK] = &gcc_usb3_prim_phy_pipe_clk.clkr,
+	[GCC_VCODEC0_AXI_CLK] = &gcc_vcodec0_axi_clk.clkr,
+	[GCC_VENUS_AHB_CLK] = &gcc_venus_ahb_clk.clkr,
+	[GCC_VENUS_CTL_AXI_CLK] = &gcc_venus_ctl_axi_clk.clkr,
+	[GCC_VIDEO_AHB_CLK] = &gcc_video_ahb_clk.clkr,
+	[GCC_VIDEO_AXI0_CLK] = &gcc_video_axi0_clk.clkr,
+	[GCC_VIDEO_THROTTLE_CORE_CLK] = &gcc_video_throttle_core_clk.clkr,
+	[GCC_VIDEO_VCODEC0_SYS_CLK] = &gcc_video_vcodec0_sys_clk.clkr,
+	[GCC_VIDEO_VENUS_CLK_SRC] = &gcc_video_venus_clk_src.clkr,
+	[GCC_VIDEO_VENUS_CTL_CLK] = &gcc_video_venus_ctl_clk.clkr,
+	[GCC_VIDEO_XO_CLK] = &gcc_video_xo_clk.clkr,
+	[GPLL0] = &gpll0.clkr,
+	[GPLL0_OUT_AUX2] = &gpll0_out_aux2.clkr,
+	[GPLL0_OUT_MAIN] = &gpll0_out_main.clkr,
+	[GPLL10] = &gpll10.clkr,
+	[GPLL10_OUT_MAIN] = &gpll10_out_main.clkr,
+	[GPLL11] = &gpll11.clkr,
+	[GPLL11_OUT_MAIN] = &gpll11_out_main.clkr,
+	[GPLL3] = &gpll3.clkr,
+	[GPLL4] = &gpll4.clkr,
+	[GPLL4_OUT_MAIN] = &gpll4_out_main.clkr,
+	[GPLL6] = &gpll6.clkr,
+	[GPLL6_OUT_MAIN] = &gpll6_out_main.clkr,
+	[GPLL7] = &gpll7.clkr,
+	[GPLL7_OUT_MAIN] = &gpll7_out_main.clkr,
+	[GPLL8] = &gpll8.clkr,
+	[GPLL8_OUT_MAIN] = &gpll8_out_main.clkr,
+	[GPLL9] = &gpll9.clkr,
+	[GPLL9_OUT_MAIN] = &gpll9_out_main.clkr,
+};
+
+static const struct qcom_reset_map gcc_sm6115_resets[] = {
+	[GCC_QUSB2PHY_PRIM_BCR] = { 0x1c000 },
+	[GCC_QUSB2PHY_SEC_BCR] = { 0x1c004 },
+	[GCC_SDCC1_BCR] = { 0x38000 },
+	[GCC_SDCC2_BCR] = { 0x1e000 },
+	[GCC_UFS_PHY_BCR] = { 0x45000 },
+	[GCC_USB30_PRIM_BCR] = { 0x1a000 },
+	[GCC_USB_PHY_CFG_AHB2PHY_BCR] = { 0x1d000 },
+	[GCC_USB3PHY_PHY_PRIM_SP0_BCR] = { 0x1b008 },
+	[GCC_USB3_PHY_PRIM_SP0_BCR] = { 0x1b000 },
+	[GCC_VCODEC0_BCR] = { 0x58094 },
+	[GCC_VENUS_BCR] = { 0x58078 },
+	[GCC_VIDEO_INTERFACE_BCR] = { 0x6e000 },
+};
+
+static struct gdsc *gcc_sm6115_gdscs[] = {
+	[GCC_CAMSS_TOP_GDSC] = &gcc_camss_top_gdsc,
+	[GCC_UFS_PHY_GDSC] = &gcc_ufs_phy_gdsc,
+	[GCC_USB30_PRIM_GDSC] = &gcc_usb30_prim_gdsc,
+	[GCC_VCODEC0_GDSC] = &gcc_vcodec0_gdsc,
+	[GCC_VENUS_GDSC] = &gcc_venus_gdsc,
+	[HLOS1_VOTE_TURING_MMU_TBU1_GDSC] = &hlos1_vote_turing_mmu_tbu1_gdsc,
+	[HLOS1_VOTE_TURING_MMU_TBU0_GDSC] = &hlos1_vote_turing_mmu_tbu0_gdsc,
+	[HLOS1_VOTE_MM_SNOC_MMU_TBU_RT_GDSC] = &hlos1_vote_mm_snoc_mmu_tbu_rt_gdsc,
+	[HLOS1_VOTE_MM_SNOC_MMU_TBU_NRT_GDSC] = &hlos1_vote_mm_snoc_mmu_tbu_nrt_gdsc,
+};
+
+static const struct clk_rcg_dfs_data gcc_dfs_clocks[] = {
+	DEFINE_RCG_DFS(gcc_qupv3_wrap0_s0_clk_src),
+	DEFINE_RCG_DFS(gcc_qupv3_wrap0_s1_clk_src),
+	DEFINE_RCG_DFS(gcc_qupv3_wrap0_s2_clk_src),
+	DEFINE_RCG_DFS(gcc_qupv3_wrap0_s3_clk_src),
+	DEFINE_RCG_DFS(gcc_qupv3_wrap0_s4_clk_src),
+	DEFINE_RCG_DFS(gcc_qupv3_wrap0_s5_clk_src),
+};
+
+static const struct regmap_config gcc_sm6115_regmap_config = {
+	.reg_bits = 32,
+	.reg_stride = 4,
+	.val_bits = 32,
+	.max_register = 0xc7000,
+	.fast_io = true,
+};
+
+static const struct qcom_cc_desc gcc_sm6115_desc = {
+	.config = &gcc_sm6115_regmap_config,
+	.clks = gcc_sm6115_clocks,
+	.num_clks = ARRAY_SIZE(gcc_sm6115_clocks),
+	.resets = gcc_sm6115_resets,
+	.num_resets = ARRAY_SIZE(gcc_sm6115_resets),
+	.gdscs = gcc_sm6115_gdscs,
+	.num_gdscs = ARRAY_SIZE(gcc_sm6115_gdscs),
+};
+
+static const struct of_device_id gcc_sm6115_match_table[] = {
+	{ .compatible = "qcom,gcc-sm6115" },
+	{ }
+};
+MODULE_DEVICE_TABLE(of, gcc_sm6115_match_table);
+
+static int gcc_sm6115_probe(struct platform_device *pdev)
+{
+	struct regmap *regmap;
+	int ret;
+
+	regmap = qcom_cc_map(pdev, &gcc_sm6115_desc);
+	if (IS_ERR(regmap))
+		return PTR_ERR(regmap);
+
+	ret = qcom_cc_register_rcg_dfs(regmap, gcc_dfs_clocks,
+			ARRAY_SIZE(gcc_dfs_clocks));
+	if (ret)
+		return ret;
+
+	clk_alpha_pll_configure(&gpll8, regmap, &gpll8_config);
+	clk_alpha_pll_configure(&gpll9, regmap, &gpll9_config);
+	clk_alpha_pll_configure(&gpll10, regmap, &gpll10_config);
+	clk_alpha_pll_configure(&gpll11, regmap, &gpll11_config);
+
+	return qcom_cc_really_probe(pdev, &gcc_sm6115_desc, regmap);
+}
+
+static struct platform_driver gcc_sm6115_driver = {
+	.probe = gcc_sm6115_probe,
+	.driver = {
+		.name = "gcc-sm6115",
+		.of_match_table = gcc_sm6115_match_table,
+	},
+};
+
+static int __init gcc_sm6115_init(void)
+{
+	return platform_driver_register(&gcc_sm6115_driver);
+}
+subsys_initcall(gcc_sm6115_init);
+
+static void __exit gcc_sm6115_exit(void)
+{
+	platform_driver_unregister(&gcc_sm6115_driver);
+}
+module_exit(gcc_sm6115_exit);
+
+MODULE_DESCRIPTION("QTI GCC SM6115 and SM4250 Driver");
+MODULE_LICENSE("GPL v2");
+MODULE_ALIAS("platform:gcc-sm6115");
diff --git a/drivers/clk/qcom/gcc-sm6350.c b/drivers/clk/qcom/gcc-sm6350.c
new file mode 100644
index 000000000000..3236706771b1
--- /dev/null
+++ b/drivers/clk/qcom/gcc-sm6350.c
@@ -0,0 +1,2584 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * Copyright (c) 2021, The Linux Foundation. All rights reserved.
+ * Copyright (c) 2021, Konrad Dybcio <konrad.dybcio@somainline.org>
+ */
+
+#include <linux/module.h>
+#include <linux/platform_device.h>
+#include <linux/regmap.h>
+
+#include <dt-bindings/clock/qcom,gcc-sm6350.h>
+
+#include "clk-alpha-pll.h"
+#include "clk-branch.h"
+#include "clk-rcg.h"
+#include "clk-regmap.h"
+#include "clk-regmap-divider.h"
+#include "clk-regmap-mux.h"
+#include "common.h"
+#include "gdsc.h"
+#include "reset.h"
+
+enum {
+	P_BI_TCXO,
+	P_GPLL0_OUT_EVEN,
+	P_GPLL0_OUT_MAIN,
+	P_GPLL0_OUT_ODD,
+	P_GPLL6_OUT_EVEN,
+	P_GPLL7_OUT_MAIN,
+	P_SLEEP_CLK,
+};
+
+static struct clk_alpha_pll gpll0 = {
+	.offset = 0x0,
+	.regs = clk_alpha_pll_regs[CLK_ALPHA_PLL_TYPE_FABIA],
+	.clkr = {
+		.enable_reg = 0x52010,
+		.enable_mask = BIT(0),
+		.hw.init = &(struct clk_init_data){
+			.name = "gpll0",
+			.parent_data = &(const struct clk_parent_data){
+				.fw_name = "bi_tcxo",
+			},
+			.num_parents = 1,
+			.ops = &clk_alpha_pll_fixed_fabia_ops,
+		},
+	},
+};
+
+static const struct clk_div_table post_div_table_gpll0_out_even[] = {
+	{ 0x1, 2 },
+	{ }
+};
+
+static struct clk_alpha_pll_postdiv gpll0_out_even = {
+	.offset = 0x0,
+	.post_div_shift = 8,
+	.post_div_table = post_div_table_gpll0_out_even,
+	.num_post_div = ARRAY_SIZE(post_div_table_gpll0_out_even),
+	.width = 4,
+	.regs = clk_alpha_pll_regs[CLK_ALPHA_PLL_TYPE_FABIA],
+	.clkr.hw.init = &(struct clk_init_data){
+		.name = "gpll0_out_even",
+		.parent_hws = (const struct clk_hw*[]){
+			&gpll0.clkr.hw,
+		},
+		.num_parents = 1,
+		.ops = &clk_alpha_pll_postdiv_fabia_ops,
+	},
+};
+
+static const struct clk_div_table post_div_table_gpll0_out_odd[] = {
+	{ 0x3, 3 },
+	{ }
+};
+
+static struct clk_alpha_pll_postdiv gpll0_out_odd = {
+	.offset = 0x0,
+	.post_div_shift = 12,
+	.post_div_table = post_div_table_gpll0_out_odd,
+	.num_post_div = ARRAY_SIZE(post_div_table_gpll0_out_odd),
+	.width = 4,
+	.regs = clk_alpha_pll_regs[CLK_ALPHA_PLL_TYPE_FABIA],
+	.clkr.hw.init = &(struct clk_init_data){
+		.name = "gpll0_out_odd",
+		.parent_hws = (const struct clk_hw*[]){
+			&gpll0.clkr.hw,
+		},
+		.num_parents = 1,
+		.ops = &clk_alpha_pll_postdiv_fabia_ops,
+	},
+};
+
+static struct clk_alpha_pll gpll6 = {
+	.offset = 0x6000,
+	.regs = clk_alpha_pll_regs[CLK_ALPHA_PLL_TYPE_FABIA],
+	.clkr = {
+		.enable_reg = 0x52010,
+		.enable_mask = BIT(6),
+		.hw.init = &(struct clk_init_data){
+			.name = "gpll6",
+			.parent_hws = (const struct clk_hw*[]){
+				&gpll0.clkr.hw,
+			},
+			.num_parents = 1,
+			.ops = &clk_alpha_pll_fixed_fabia_ops,
+		},
+	},
+};
+
+static const struct clk_div_table post_div_table_gpll6_out_even[] = {
+	{ 0x1, 2 },
+	{ }
+};
+
+static struct clk_alpha_pll_postdiv gpll6_out_even = {
+	.offset = 0x6000,
+	.post_div_shift = 8,
+	.post_div_table = post_div_table_gpll6_out_even,
+	.num_post_div = ARRAY_SIZE(post_div_table_gpll6_out_even),
+	.width = 4,
+	.regs = clk_alpha_pll_regs[CLK_ALPHA_PLL_TYPE_FABIA],
+	.clkr.hw.init = &(struct clk_init_data){
+		.name = "gpll6_out_even",
+		.parent_hws = (const struct clk_hw*[]){
+			&gpll0.clkr.hw,
+		},
+		.num_parents = 1,
+		.ops = &clk_alpha_pll_postdiv_fabia_ops,
+	},
+};
+
+static struct clk_alpha_pll gpll7 = {
+	.offset = 0x7000,
+	.regs = clk_alpha_pll_regs[CLK_ALPHA_PLL_TYPE_FABIA],
+	.clkr = {
+		.enable_reg = 0x52010,
+		.enable_mask = BIT(7),
+		.hw.init = &(struct clk_init_data){
+			.name = "gpll7",
+			.parent_hws = (const struct clk_hw*[]){
+				&gpll0.clkr.hw,
+			},
+			.num_parents = 1,
+			.ops = &clk_alpha_pll_fixed_fabia_ops,
+		},
+	},
+};
+
+static const struct parent_map gcc_parent_map_0[] = {
+	{ P_BI_TCXO, 0 },
+	{ P_GPLL0_OUT_MAIN, 1 },
+	{ P_GPLL6_OUT_EVEN, 2 },
+	{ P_GPLL0_OUT_EVEN, 6 },
+};
+
+static const struct clk_parent_data gcc_parent_data_0[] = {
+	{ .fw_name = "bi_tcxo" },
+	{ .hw = &gpll0.clkr.hw },
+	{ .hw = &gpll6_out_even.clkr.hw },
+	{ .hw = &gpll0_out_even.clkr.hw },
+};
+
+static const struct parent_map gcc_parent_map_1[] = {
+	{ P_BI_TCXO, 0 },
+	{ P_GPLL0_OUT_EVEN, 6 },
+};
+
+static const struct clk_parent_data gcc_parent_data_1[] = {
+	{ .fw_name = "bi_tcxo" },
+	{ .hw = &gpll0_out_even.clkr.hw },
+};
+
+static const struct parent_map gcc_parent_map_2[] = {
+	{ P_BI_TCXO, 0 },
+	{ P_GPLL0_OUT_ODD, 2 },
+};
+
+static const struct clk_parent_data gcc_parent_data_2_ao[] = {
+	{ .fw_name = "bi_tcxo_ao" },
+	{ .hw = &gpll0_out_odd.clkr.hw },
+};
+
+static const struct parent_map gcc_parent_map_4[] = {
+	{ P_BI_TCXO, 0 },
+	{ P_GPLL0_OUT_MAIN, 1 },
+	{ P_GPLL0_OUT_ODD, 2 },
+};
+
+static const struct clk_parent_data gcc_parent_data_4[] = {
+	{ .fw_name = "bi_tcxo" },
+	{ .hw = &gpll0.clkr.hw },
+	{ .hw = &gpll0_out_odd.clkr.hw },
+};
+
+static const struct parent_map gcc_parent_map_5[] = {
+	{ P_BI_TCXO, 0 },
+	{ P_GPLL0_OUT_ODD, 2 },
+	{ P_SLEEP_CLK, 5 },
+	{ P_GPLL0_OUT_EVEN, 6 },
+};
+
+static const struct clk_parent_data gcc_parent_data_5[] = {
+	{ .fw_name = "bi_tcxo" },
+	{ .hw = &gpll0_out_odd.clkr.hw },
+	{ .fw_name = "sleep_clk" },
+	{ .hw = &gpll0_out_even.clkr.hw },
+};
+
+static const struct parent_map gcc_parent_map_6[] = {
+	{ P_BI_TCXO, 0 },
+	{ P_SLEEP_CLK, 5 },
+};
+
+static const struct clk_parent_data gcc_parent_data_6[] = {
+	{ .fw_name = "bi_tcxo" },
+	{ .fw_name = "sleep_clk" }
+};
+
+static const struct parent_map gcc_parent_map_7[] = {
+	{ P_BI_TCXO, 0 },
+	{ P_GPLL6_OUT_EVEN, 2 },
+	{ P_GPLL0_OUT_EVEN, 6 },
+};
+
+static const struct clk_parent_data gcc_parent_data_7[] = {
+	{ .fw_name = "bi_tcxo" },
+	{ .hw = &gpll6_out_even.clkr.hw },
+	{ .hw = &gpll0_out_even.clkr.hw },
+};
+
+static const struct parent_map gcc_parent_map_8[] = {
+	{ P_BI_TCXO, 0 },
+	{ P_GPLL0_OUT_ODD, 2 },
+	{ P_GPLL7_OUT_MAIN, 3 },
+};
+
+static const struct clk_parent_data gcc_parent_data_8[] = {
+	{ .fw_name = "bi_tcxo" },
+	{ .hw = &gpll0_out_odd.clkr.hw },
+	{ .hw = &gpll7.clkr.hw },
+};
+
+static struct clk_regmap_div gcc_gpu_gpll0_main_div_clk_src = {
+	.reg = 0x4514C,
+	.shift = 0,
+	.width = 2,
+	.clkr.hw.init = &(struct clk_init_data) {
+		.name = "gcc_gpu_gpll0_main_div_clk_src",
+		.parent_hws = (const struct clk_hw*[]){
+			&gpll0.clkr.hw,
+		},
+		.num_parents = 1,
+		.ops = &clk_regmap_div_ro_ops,
+	},
+};
+
+static struct clk_regmap_div gcc_npu_pll0_main_div_clk_src = {
+	.reg = 0x4ce00,
+	.shift = 0,
+	.width = 2,
+	.clkr.hw.init = &(struct clk_init_data) {
+		.name = "gcc_npu_pll0_main_div_clk_src",
+		.parent_hws = (const struct clk_hw*[]){
+			&gpll0.clkr.hw,
+		},
+		.num_parents = 1,
+		.ops = &clk_regmap_div_ro_ops,
+	},
+};
+
+static const struct freq_tbl ftbl_gcc_cpuss_ahb_clk_src[] = {
+	F(19200000, P_BI_TCXO, 1, 0, 0),
+	{ }
+};
+
+static struct clk_rcg2 gcc_cpuss_ahb_clk_src = {
+	.cmd_rcgr = 0x30014,
+	.mnd_width = 0,
+	.hid_width = 5,
+	.parent_map = gcc_parent_map_2,
+	.freq_tbl = ftbl_gcc_cpuss_ahb_clk_src,
+	.clkr.hw.init = &(struct clk_init_data){
+		.name = "gcc_cpuss_ahb_clk_src",
+		.parent_data = gcc_parent_data_2_ao,
+		.num_parents = ARRAY_SIZE(gcc_parent_data_2_ao),
+		.ops = &clk_rcg2_ops,
+	},
+};
+
+static const struct freq_tbl ftbl_gcc_gp1_clk_src[] = {
+	F(19200000, P_BI_TCXO, 1, 0, 0),
+	F(25000000, P_GPLL0_OUT_EVEN, 12, 0, 0),
+	F(50000000, P_GPLL0_OUT_ODD, 4, 0, 0),
+	F(100000000, P_GPLL0_OUT_ODD, 2, 0, 0),
+	F(200000000, P_GPLL0_OUT_ODD, 1, 0, 0),
+	{ }
+};
+
+static struct clk_rcg2 gcc_gp1_clk_src = {
+	.cmd_rcgr = 0x37004,
+	.mnd_width = 8,
+	.hid_width = 5,
+	.parent_map = gcc_parent_map_5,
+	.freq_tbl = ftbl_gcc_gp1_clk_src,
+	.clkr.hw.init = &(struct clk_init_data){
+		.name = "gcc_gp1_clk_src",
+		.parent_data = gcc_parent_data_5,
+		.num_parents = ARRAY_SIZE(gcc_parent_data_5),
+		.ops = &clk_rcg2_ops,
+	},
+};
+
+static struct clk_rcg2 gcc_gp2_clk_src = {
+	.cmd_rcgr = 0x38004,
+	.mnd_width = 8,
+	.hid_width = 5,
+	.parent_map = gcc_parent_map_5,
+	.freq_tbl = ftbl_gcc_gp1_clk_src,
+	.clkr.hw.init = &(struct clk_init_data){
+		.name = "gcc_gp2_clk_src",
+		.parent_data = gcc_parent_data_5,
+		.num_parents = ARRAY_SIZE(gcc_parent_data_5),
+		.ops = &clk_rcg2_ops,
+	},
+};
+
+static struct clk_rcg2 gcc_gp3_clk_src = {
+	.cmd_rcgr = 0x39004,
+	.mnd_width = 8,
+	.hid_width = 5,
+	.parent_map = gcc_parent_map_5,
+	.freq_tbl = ftbl_gcc_gp1_clk_src,
+	.clkr.hw.init = &(struct clk_init_data){
+		.name = "gcc_gp3_clk_src",
+		.parent_data = gcc_parent_data_5,
+		.num_parents = ARRAY_SIZE(gcc_parent_data_5),
+		.ops = &clk_rcg2_ops,
+	},
+};
+
+static const struct freq_tbl ftbl_gcc_pdm2_clk_src[] = {
+	F(19200000, P_BI_TCXO, 1, 0, 0),
+	F(60000000, P_GPLL0_OUT_EVEN, 5, 0, 0),
+	{ }
+};
+
+static struct clk_rcg2 gcc_pdm2_clk_src = {
+	.cmd_rcgr = 0x23010,
+	.mnd_width = 0,
+	.hid_width = 5,
+	.parent_map = gcc_parent_map_1,
+	.freq_tbl = ftbl_gcc_pdm2_clk_src,
+	.clkr.hw.init = &(struct clk_init_data){
+		.name = "gcc_pdm2_clk_src",
+		.parent_data = gcc_parent_data_1,
+		.num_parents = ARRAY_SIZE(gcc_parent_data_1),
+		.ops = &clk_rcg2_ops,
+	},
+};
+
+static const struct freq_tbl ftbl_gcc_qupv3_wrap0_s0_clk_src[] = {
+	F(7372800, P_GPLL0_OUT_EVEN, 1, 384, 15625),
+	F(14745600, P_GPLL0_OUT_EVEN, 1, 768, 15625),
+	F(19200000, P_BI_TCXO, 1, 0, 0),
+	F(29491200, P_GPLL0_OUT_EVEN, 1, 1536, 15625),
+	F(32000000, P_GPLL0_OUT_EVEN, 1, 8, 75),
+	F(48000000, P_GPLL0_OUT_EVEN, 1, 4, 25),
+	F(64000000, P_GPLL0_OUT_EVEN, 1, 16, 75),
+	F(75000000, P_GPLL0_OUT_EVEN, 4, 0, 0),
+	F(80000000, P_GPLL0_OUT_EVEN, 1, 4, 15),
+	F(96000000, P_GPLL0_OUT_EVEN, 1, 8, 25),
+	F(100000000, P_GPLL0_OUT_EVEN, 3, 0, 0),
+	F(102400000, P_GPLL0_OUT_EVEN, 1, 128, 375),
+	F(112000000, P_GPLL0_OUT_EVEN, 1, 28, 75),
+	F(117964800, P_GPLL0_OUT_EVEN, 1, 6144, 15625),
+	F(120000000, P_GPLL0_OUT_EVEN, 2.5, 0, 0),
+	F(128000000, P_GPLL6_OUT_EVEN, 3, 0, 0),
+	{ }
+};
+
+static struct clk_init_data gcc_qupv3_wrap0_s0_clk_src_init = {
+	.name = "gcc_qupv3_wrap0_s0_clk_src",
+	.parent_data = gcc_parent_data_0,
+	.num_parents = ARRAY_SIZE(gcc_parent_data_0),
+	.ops = &clk_rcg2_ops,
+};
+
+static struct clk_rcg2 gcc_qupv3_wrap0_s0_clk_src = {
+	.cmd_rcgr = 0x21148,
+	.mnd_width = 16,
+	.hid_width = 5,
+	.parent_map = gcc_parent_map_0,
+	.freq_tbl = ftbl_gcc_qupv3_wrap0_s0_clk_src,
+	.clkr.hw.init = &gcc_qupv3_wrap0_s0_clk_src_init,
+};
+
+static struct clk_init_data gcc_qupv3_wrap0_s1_clk_src_init = {
+	.name = "gcc_qupv3_wrap0_s1_clk_src",
+	.parent_data = gcc_parent_data_0,
+	.num_parents = ARRAY_SIZE(gcc_parent_data_0),
+	.ops = &clk_rcg2_ops,
+};
+
+static struct clk_rcg2 gcc_qupv3_wrap0_s1_clk_src = {
+	.cmd_rcgr = 0x21278,
+	.mnd_width = 16,
+	.hid_width = 5,
+	.parent_map = gcc_parent_map_0,
+	.freq_tbl = ftbl_gcc_qupv3_wrap0_s0_clk_src,
+	.clkr.hw.init = &gcc_qupv3_wrap0_s1_clk_src_init,
+};
+
+static struct clk_init_data gcc_qupv3_wrap0_s2_clk_src_init = {
+	.name = "gcc_qupv3_wrap0_s2_clk_src",
+	.parent_data = gcc_parent_data_0,
+	.num_parents = ARRAY_SIZE(gcc_parent_data_0),
+	.ops = &clk_rcg2_ops,
+};
+
+static struct clk_rcg2 gcc_qupv3_wrap0_s2_clk_src = {
+	.cmd_rcgr = 0x213a8,
+	.mnd_width = 16,
+	.hid_width = 5,
+	.parent_map = gcc_parent_map_0,
+	.freq_tbl = ftbl_gcc_qupv3_wrap0_s0_clk_src,
+	.clkr.hw.init = &gcc_qupv3_wrap0_s2_clk_src_init,
+};
+
+static struct clk_init_data gcc_qupv3_wrap0_s3_clk_src_init = {
+	.name = "gcc_qupv3_wrap0_s3_clk_src",
+	.parent_data = gcc_parent_data_0,
+	.num_parents = ARRAY_SIZE(gcc_parent_data_0),
+	.ops = &clk_rcg2_ops,
+};
+
+static struct clk_rcg2 gcc_qupv3_wrap0_s3_clk_src = {
+	.cmd_rcgr = 0x214d8,
+	.mnd_width = 16,
+	.hid_width = 5,
+	.parent_map = gcc_parent_map_0,
+	.freq_tbl = ftbl_gcc_qupv3_wrap0_s0_clk_src,
+	.clkr.hw.init = &gcc_qupv3_wrap0_s3_clk_src_init,
+};
+
+static struct clk_init_data gcc_qupv3_wrap0_s4_clk_src_init = {
+	.name = "gcc_qupv3_wrap0_s4_clk_src",
+	.parent_data = gcc_parent_data_0,
+	.num_parents = ARRAY_SIZE(gcc_parent_data_0),
+	.ops = &clk_rcg2_ops,
+};
+
+static struct clk_rcg2 gcc_qupv3_wrap0_s4_clk_src = {
+	.cmd_rcgr = 0x21608,
+	.mnd_width = 16,
+	.hid_width = 5,
+	.parent_map = gcc_parent_map_0,
+	.freq_tbl = ftbl_gcc_qupv3_wrap0_s0_clk_src,
+	.clkr.hw.init = &gcc_qupv3_wrap0_s4_clk_src_init,
+};
+
+static struct clk_init_data gcc_qupv3_wrap0_s5_clk_src_init = {
+	.name = "gcc_qupv3_wrap0_s5_clk_src",
+	.parent_data = gcc_parent_data_0,
+	.num_parents = ARRAY_SIZE(gcc_parent_data_0),
+	.ops = &clk_rcg2_ops,
+};
+
+static struct clk_rcg2 gcc_qupv3_wrap0_s5_clk_src = {
+	.cmd_rcgr = 0x21738,
+	.mnd_width = 16,
+	.hid_width = 5,
+	.parent_map = gcc_parent_map_0,
+	.freq_tbl = ftbl_gcc_qupv3_wrap0_s0_clk_src,
+	.clkr.hw.init = &gcc_qupv3_wrap0_s5_clk_src_init,
+};
+
+static struct clk_init_data gcc_qupv3_wrap1_s0_clk_src_init = {
+	.name = "gcc_qupv3_wrap1_s0_clk_src",
+	.parent_data = gcc_parent_data_0,
+	.num_parents = ARRAY_SIZE(gcc_parent_data_0),
+	.ops = &clk_rcg2_ops,
+};
+
+static struct clk_rcg2 gcc_qupv3_wrap1_s0_clk_src = {
+	.cmd_rcgr = 0x22018,
+	.mnd_width = 16,
+	.hid_width = 5,
+	.parent_map = gcc_parent_map_0,
+	.freq_tbl = ftbl_gcc_qupv3_wrap0_s0_clk_src,
+	.clkr.hw.init = &gcc_qupv3_wrap1_s0_clk_src_init,
+};
+
+static struct clk_init_data gcc_qupv3_wrap1_s1_clk_src_init = {
+	.name = "gcc_qupv3_wrap1_s1_clk_src",
+	.parent_data = gcc_parent_data_0,
+	.num_parents = ARRAY_SIZE(gcc_parent_data_0),
+	.ops = &clk_rcg2_ops,
+};
+
+static struct clk_rcg2 gcc_qupv3_wrap1_s1_clk_src = {
+	.cmd_rcgr = 0x22148,
+	.mnd_width = 16,
+	.hid_width = 5,
+	.parent_map = gcc_parent_map_0,
+	.freq_tbl = ftbl_gcc_qupv3_wrap0_s0_clk_src,
+	.clkr.hw.init = &gcc_qupv3_wrap1_s1_clk_src_init,
+};
+
+static struct clk_init_data gcc_qupv3_wrap1_s2_clk_src_init = {
+	.name = "gcc_qupv3_wrap1_s2_clk_src",
+	.parent_data = gcc_parent_data_0,
+	.num_parents = ARRAY_SIZE(gcc_parent_data_0),
+	.ops = &clk_rcg2_ops,
+};
+
+static struct clk_rcg2 gcc_qupv3_wrap1_s2_clk_src = {
+	.cmd_rcgr = 0x22278,
+	.mnd_width = 16,
+	.hid_width = 5,
+	.parent_map = gcc_parent_map_0,
+	.freq_tbl = ftbl_gcc_qupv3_wrap0_s0_clk_src,
+	.clkr.hw.init = &gcc_qupv3_wrap1_s2_clk_src_init,
+};
+
+static struct clk_init_data gcc_qupv3_wrap1_s3_clk_src_init = {
+	.name = "gcc_qupv3_wrap1_s3_clk_src",
+	.parent_data = gcc_parent_data_0,
+	.num_parents = ARRAY_SIZE(gcc_parent_data_0),
+	.ops = &clk_rcg2_ops,
+};
+
+static struct clk_rcg2 gcc_qupv3_wrap1_s3_clk_src = {
+	.cmd_rcgr = 0x223a8,
+	.mnd_width = 16,
+	.hid_width = 5,
+	.parent_map = gcc_parent_map_0,
+	.freq_tbl = ftbl_gcc_qupv3_wrap0_s0_clk_src,
+	.clkr.hw.init = &gcc_qupv3_wrap1_s3_clk_src_init,
+};
+
+static struct clk_init_data gcc_qupv3_wrap1_s4_clk_src_init = {
+	.name = "gcc_qupv3_wrap1_s4_clk_src",
+	.parent_data = gcc_parent_data_0,
+	.num_parents = ARRAY_SIZE(gcc_parent_data_0),
+	.ops = &clk_rcg2_ops,
+};
+
+static struct clk_rcg2 gcc_qupv3_wrap1_s4_clk_src = {
+	.cmd_rcgr = 0x224d8,
+	.mnd_width = 16,
+	.hid_width = 5,
+	.parent_map = gcc_parent_map_0,
+	.freq_tbl = ftbl_gcc_qupv3_wrap0_s0_clk_src,
+	.clkr.hw.init = &gcc_qupv3_wrap1_s4_clk_src_init,
+};
+
+static struct clk_init_data gcc_qupv3_wrap1_s5_clk_src_init = {
+	.name = "gcc_qupv3_wrap1_s5_clk_src",
+	.parent_data = gcc_parent_data_0,
+	.num_parents = ARRAY_SIZE(gcc_parent_data_0),
+	.ops = &clk_rcg2_ops,
+};
+
+static struct clk_rcg2 gcc_qupv3_wrap1_s5_clk_src = {
+	.cmd_rcgr = 0x22608,
+	.mnd_width = 16,
+	.hid_width = 5,
+	.parent_map = gcc_parent_map_0,
+	.freq_tbl = ftbl_gcc_qupv3_wrap0_s0_clk_src,
+	.clkr.hw.init = &gcc_qupv3_wrap1_s5_clk_src_init,
+};
+
+static const struct freq_tbl ftbl_gcc_sdcc1_apps_clk_src[] = {
+	F(144000, P_BI_TCXO, 16, 3, 25),
+	F(400000, P_BI_TCXO, 12, 1, 4),
+	F(19200000, P_BI_TCXO, 1, 0, 0),
+	F(20000000, P_GPLL0_OUT_EVEN, 5, 1, 3),
+	F(25000000, P_GPLL0_OUT_EVEN, 12, 0, 0),
+	F(50000000, P_GPLL0_OUT_EVEN, 6, 0, 0),
+	F(100000000, P_GPLL0_OUT_EVEN, 3, 0, 0),
+	F(192000000, P_GPLL6_OUT_EVEN, 2, 0, 0),
+	F(384000000, P_GPLL6_OUT_EVEN, 1, 0, 0),
+	{ }
+};
+
+static struct clk_rcg2 gcc_sdcc1_apps_clk_src = {
+	.cmd_rcgr = 0x4b024,
+	.mnd_width = 8,
+	.hid_width = 5,
+	.parent_map = gcc_parent_map_7,
+	.freq_tbl = ftbl_gcc_sdcc1_apps_clk_src,
+	.clkr.hw.init = &(struct clk_init_data){
+		.name = "gcc_sdcc1_apps_clk_src",
+		.parent_data = gcc_parent_data_7,
+		.num_parents = ARRAY_SIZE(gcc_parent_data_7),
+		.ops = &clk_rcg2_ops,
+	},
+};
+
+static const struct freq_tbl ftbl_gcc_sdcc1_ice_core_clk_src[] = {
+	F(100000000, P_GPLL0_OUT_EVEN, 3, 0, 0),
+	F(150000000, P_GPLL0_OUT_EVEN, 2, 0, 0),
+	F(300000000, P_GPLL0_OUT_EVEN, 1, 0, 0),
+	{ }
+};
+
+static struct clk_rcg2 gcc_sdcc1_ice_core_clk_src = {
+	.cmd_rcgr = 0x4b00c,
+	.mnd_width = 0,
+	.hid_width = 5,
+	.parent_map = gcc_parent_map_1,
+	.freq_tbl = ftbl_gcc_sdcc1_ice_core_clk_src,
+	.clkr.hw.init = &(struct clk_init_data){
+		.name = "gcc_sdcc1_ice_core_clk_src",
+		.parent_data = gcc_parent_data_1,
+		.num_parents = ARRAY_SIZE(gcc_parent_data_1),
+		.ops = &clk_rcg2_ops,
+	},
+};
+
+static const struct freq_tbl ftbl_gcc_sdcc2_apps_clk_src[] = {
+	F(400000, P_BI_TCXO, 12, 1, 4),
+	F(9600000, P_BI_TCXO, 2, 0, 0),
+	F(19200000, P_BI_TCXO, 1, 0, 0),
+	F(25000000, P_GPLL0_OUT_ODD, 8, 0, 0),
+	F(50000000, P_GPLL0_OUT_ODD, 4, 0, 0),
+	F(100000000, P_GPLL0_OUT_ODD, 2, 0, 0),
+	F(202000000, P_GPLL7_OUT_MAIN, 4, 0, 0),
+	{ }
+};
+
+static struct clk_rcg2 gcc_sdcc2_apps_clk_src = {
+	.cmd_rcgr = 0x2000c,
+	.mnd_width = 8,
+	.hid_width = 5,
+	.parent_map = gcc_parent_map_8,
+	.freq_tbl = ftbl_gcc_sdcc2_apps_clk_src,
+	.clkr.hw.init = &(struct clk_init_data){
+		.name = "gcc_sdcc2_apps_clk_src",
+		.parent_data = gcc_parent_data_8,
+		.num_parents = ARRAY_SIZE(gcc_parent_data_8),
+		.ops = &clk_rcg2_floor_ops,
+	},
+};
+
+static const struct freq_tbl ftbl_gcc_ufs_phy_axi_clk_src[] = {
+	F(25000000, P_GPLL0_OUT_ODD, 8, 0, 0),
+	F(50000000, P_GPLL0_OUT_ODD, 4, 0, 0),
+	F(100000000, P_GPLL0_OUT_ODD, 2, 0, 0),
+	F(200000000, P_GPLL0_OUT_ODD, 1, 0, 0),
+	F(240000000, P_GPLL0_OUT_MAIN, 2.5, 0, 0),
+	{ }
+};
+
+static struct clk_rcg2 gcc_ufs_phy_axi_clk_src = {
+	.cmd_rcgr = 0x3a01c,
+	.mnd_width = 8,
+	.hid_width = 5,
+	.parent_map = gcc_parent_map_4,
+	.freq_tbl = ftbl_gcc_ufs_phy_axi_clk_src,
+	.clkr.hw.init = &(struct clk_init_data){
+		.name = "gcc_ufs_phy_axi_clk_src",
+		.parent_data = gcc_parent_data_4,
+		.num_parents = ARRAY_SIZE(gcc_parent_data_4),
+		.ops = &clk_rcg2_ops,
+	},
+};
+
+static const struct freq_tbl ftbl_gcc_ufs_phy_ice_core_clk_src[] = {
+	F(37500000, P_GPLL0_OUT_EVEN, 8, 0, 0),
+	F(75000000, P_GPLL0_OUT_EVEN, 4, 0, 0),
+	F(150000000, P_GPLL0_OUT_EVEN, 2, 0, 0),
+	F(300000000, P_GPLL0_OUT_EVEN, 1, 0, 0),
+	{ }
+};
+
+static struct clk_rcg2 gcc_ufs_phy_ice_core_clk_src = {
+	.cmd_rcgr = 0x3a048,
+	.mnd_width = 0,
+	.hid_width = 5,
+	.parent_map = gcc_parent_map_1,
+	.freq_tbl = ftbl_gcc_ufs_phy_ice_core_clk_src,
+	.clkr.hw.init = &(struct clk_init_data){
+		.name = "gcc_ufs_phy_ice_core_clk_src",
+		.parent_data = gcc_parent_data_1,
+		.num_parents = ARRAY_SIZE(gcc_parent_data_1),
+		.ops = &clk_rcg2_ops,
+	},
+};
+
+static const struct freq_tbl ftbl_gcc_ufs_phy_phy_aux_clk_src[] = {
+	F(9600000, P_BI_TCXO, 2, 0, 0),
+	F(19200000, P_BI_TCXO, 1, 0, 0),
+	{ }
+};
+
+static struct clk_rcg2 gcc_ufs_phy_phy_aux_clk_src = {
+	.cmd_rcgr = 0x3a0b0,
+	.mnd_width = 0,
+	.hid_width = 5,
+	.freq_tbl = ftbl_gcc_ufs_phy_phy_aux_clk_src,
+	.clkr.hw.init = &(struct clk_init_data){
+		.name = "gcc_ufs_phy_phy_aux_clk_src",
+		.parent_data = &(const struct clk_parent_data){
+			.fw_name = "bi_tcxo",
+		},
+		.num_parents = 1,
+		.ops = &clk_rcg2_ops,
+	},
+};
+
+static const struct freq_tbl ftbl_gcc_ufs_phy_unipro_core_clk_src[] = {
+	F(37500000, P_GPLL0_OUT_EVEN, 8, 0, 0),
+	F(75000000, P_GPLL0_OUT_EVEN, 4, 0, 0),
+	F(150000000, P_GPLL0_OUT_EVEN, 2, 0, 0),
+	{ }
+};
+
+static struct clk_rcg2 gcc_ufs_phy_unipro_core_clk_src = {
+	.cmd_rcgr = 0x3a060,
+	.mnd_width = 0,
+	.hid_width = 5,
+	.parent_map = gcc_parent_map_1,
+	.freq_tbl = ftbl_gcc_ufs_phy_unipro_core_clk_src,
+	.clkr.hw.init = &(struct clk_init_data){
+		.name = "gcc_ufs_phy_unipro_core_clk_src",
+		.parent_data = gcc_parent_data_1,
+		.num_parents = ARRAY_SIZE(gcc_parent_data_1),
+		.ops = &clk_rcg2_ops,
+	},
+};
+
+static const struct freq_tbl ftbl_gcc_usb30_prim_master_clk_src[] = {
+	F(66666667, P_GPLL0_OUT_ODD, 3, 0, 0),
+	F(133333333, P_GPLL0_OUT_MAIN, 4.5, 0, 0),
+	F(200000000, P_GPLL0_OUT_MAIN, 3, 0, 0),
+	F(240000000, P_GPLL0_OUT_MAIN, 2.5, 0, 0),
+	{ }
+};
+
+static struct clk_rcg2 gcc_usb30_prim_master_clk_src = {
+	.cmd_rcgr = 0x1a01c,
+	.mnd_width = 8,
+	.hid_width = 5,
+	.parent_map = gcc_parent_map_4,
+	.freq_tbl = ftbl_gcc_usb30_prim_master_clk_src,
+	.clkr.hw.init = &(struct clk_init_data){
+		.name = "gcc_usb30_prim_master_clk_src",
+		.parent_data = gcc_parent_data_4,
+		.num_parents = ARRAY_SIZE(gcc_parent_data_4),
+		.ops = &clk_rcg2_ops,
+	},
+};
+
+static const struct freq_tbl ftbl_gcc_usb30_prim_mock_utmi_clk_src[] = {
+	F(19200000, P_BI_TCXO, 1, 0, 0),
+	{ }
+};
+
+static struct clk_rcg2 gcc_usb30_prim_mock_utmi_clk_src = {
+	.cmd_rcgr = 0x1a034,
+	.mnd_width = 0,
+	.hid_width = 5,
+	.freq_tbl = ftbl_gcc_usb30_prim_mock_utmi_clk_src,
+	.clkr.hw.init = &(struct clk_init_data){
+		.name = "gcc_usb30_prim_mock_utmi_clk_src",
+		.parent_data = &(const struct clk_parent_data){
+			.fw_name = "bi_tcxo",
+		},
+		.num_parents = 1,
+		.ops = &clk_rcg2_ops,
+	},
+};
+
+static struct clk_rcg2 gcc_usb3_prim_phy_aux_clk_src = {
+	.cmd_rcgr = 0x1a060,
+	.mnd_width = 0,
+	.hid_width = 5,
+	.parent_map = gcc_parent_map_6,
+	.freq_tbl = ftbl_gcc_usb30_prim_mock_utmi_clk_src,
+	.clkr.hw.init = &(struct clk_init_data){
+		.name = "gcc_usb3_prim_phy_aux_clk_src",
+		.parent_data = gcc_parent_data_6,
+		.num_parents = ARRAY_SIZE(gcc_parent_data_6),
+		.ops = &clk_rcg2_ops,
+	},
+};
+
+static struct clk_branch gcc_aggre_ufs_phy_axi_clk = {
+	.halt_reg = 0x3e014,
+	.halt_check = BRANCH_HALT_DELAY,
+	.hwcg_reg = 0x3e014,
+	.hwcg_bit = 1,
+	.clkr = {
+		.enable_reg = 0x3e014,
+		.enable_mask = BIT(0),
+		.hw.init = &(struct clk_init_data){
+			.name = "gcc_aggre_ufs_phy_axi_clk",
+			.parent_hws = (const struct clk_hw*[]){
+				&gcc_ufs_phy_axi_clk_src.clkr.hw,
+			},
+			.num_parents = 1,
+			.flags = CLK_SET_RATE_PARENT,
+			.ops = &clk_branch2_ops,
+		},
+	},
+};
+
+static struct clk_branch gcc_aggre_ufs_phy_axi_hw_ctl_clk = {
+	.halt_reg = 0x3e014,
+	.halt_check = BRANCH_HALT,
+	.hwcg_reg = 0x3e014,
+	.hwcg_bit = 1,
+	.clkr = {
+		.enable_reg = 0x3e014,
+		.enable_mask = BIT(1),
+		.hw.init = &(struct clk_init_data){
+			.name = "gcc_aggre_ufs_phy_axi_hw_ctl_clk",
+			.parent_hws = (const struct clk_hw*[]){
+				&gcc_ufs_phy_axi_clk_src.clkr.hw,
+			},
+			.num_parents = 1,
+			.flags = CLK_SET_RATE_PARENT,
+			.ops = &clk_branch2_ops,
+		},
+	},
+};
+
+static struct clk_branch gcc_ufs_phy_axi_hw_ctl_clk = {
+	.halt_reg = 0x3e014,
+	.halt_check = BRANCH_HALT,
+	.hwcg_reg = 0x3e014,
+	.hwcg_bit = 1,
+	.clkr = {
+		.enable_reg = 0x3e014,
+		.enable_mask = BIT(1),
+		.hw.init = &(struct clk_init_data){
+			.name = "gcc_ufs_phy_axi_hw_ctl_clk",
+			.parent_hws = (const struct clk_hw*[]){
+				&gcc_ufs_phy_axi_clk_src.clkr.hw,
+			},
+			.num_parents = 1,
+			.flags = CLK_SET_RATE_PARENT,
+			.ops = &clk_branch2_ops,
+		},
+	},
+};
+
+static struct clk_branch gcc_aggre_usb3_prim_axi_clk = {
+	.halt_reg = 0x3e010,
+	.halt_check = BRANCH_HALT,
+	.hwcg_reg = 0x3e010,
+	.hwcg_bit = 1,
+	.clkr = {
+		.enable_reg = 0x3e010,
+		.enable_mask = BIT(0),
+		.hw.init = &(struct clk_init_data){
+			.name = "gcc_aggre_usb3_prim_axi_clk",
+			.parent_hws = (const struct clk_hw*[]){
+				&gcc_usb30_prim_master_clk_src.clkr.hw,
+			},
+			.num_parents = 1,
+			.flags = CLK_SET_RATE_PARENT,
+			.ops = &clk_branch2_ops,
+		},
+	},
+};
+
+static struct clk_branch gcc_boot_rom_ahb_clk = {
+	.halt_reg = 0x26004,
+	.halt_check = BRANCH_HALT_VOTED,
+	.hwcg_reg = 0x26004,
+	.hwcg_bit = 1,
+	.clkr = {
+		.enable_reg = 0x52000,
+		.enable_mask = BIT(28),
+		.hw.init = &(struct clk_init_data){
+			.name = "gcc_boot_rom_ahb_clk",
+			.ops = &clk_branch2_ops,
+		},
+	},
+};
+
+static struct clk_branch gcc_camera_ahb_clk = {
+	.halt_reg = 0x17008,
+	.halt_check = BRANCH_HALT,
+	.hwcg_reg = 0x17008,
+	.hwcg_bit = 1,
+	.clkr = {
+		.enable_reg = 0x17008,
+		.enable_mask = BIT(0),
+		.hw.init = &(struct clk_init_data){
+			.name = "gcc_camera_ahb_clk",
+			.flags = CLK_IS_CRITICAL,
+			.ops = &clk_branch2_ops,
+		},
+	},
+};
+
+static struct clk_branch gcc_camera_axi_clk = {
+	.halt_reg = 0x17018,
+	.halt_check = BRANCH_HALT,
+	.hwcg_reg = 0x17018,
+	.hwcg_bit = 1,
+	.clkr = {
+		.enable_reg = 0x17018,
+		.enable_mask = BIT(0),
+		.hw.init = &(struct clk_init_data){
+			.name = "gcc_camera_axi_clk",
+			.ops = &clk_branch2_ops,
+		},
+	},
+};
+
+static struct clk_branch gcc_camera_throttle_nrt_axi_clk = {
+	.halt_reg = 0x17078,
+	.halt_check = BRANCH_VOTED,
+	.hwcg_reg = 0x17078,
+	.hwcg_bit = 1,
+	.clkr = {
+		.enable_reg = 0x17078,
+		.enable_mask = BIT(0),
+		.hw.init = &(struct clk_init_data){
+			.name = "gcc_camera_throttle_nrt_axi_clk",
+			.ops = &clk_branch2_ops,
+		},
+	},
+};
+
+static struct clk_branch gcc_camera_throttle_rt_axi_clk = {
+	.halt_reg = 0x17024,
+	.halt_check = BRANCH_VOTED,
+	.hwcg_reg = 0x17024,
+	.hwcg_bit = 1,
+	.clkr = {
+		.enable_reg = 0x17024,
+		.enable_mask = BIT(0),
+		.hw.init = &(struct clk_init_data){
+			.name = "gcc_camera_throttle_rt_axi_clk",
+			.ops = &clk_branch2_ops,
+		},
+	},
+};
+
+static struct clk_branch gcc_camera_xo_clk = {
+	.halt_reg = 0x17030,
+	.halt_check = BRANCH_HALT,
+	.clkr = {
+		.enable_reg = 0x17030,
+		.enable_mask = BIT(0),
+		.hw.init = &(struct clk_init_data){
+			.name = "gcc_camera_xo_clk",
+			.flags = CLK_IS_CRITICAL,
+			.ops = &clk_branch2_ops,
+		},
+	},
+};
+
+static struct clk_branch gcc_ce1_ahb_clk = {
+	.halt_reg = 0x2b00c,
+	.halt_check = BRANCH_HALT_VOTED,
+	.hwcg_reg = 0x2b00c,
+	.hwcg_bit = 1,
+	.clkr = {
+		.enable_reg = 0x52008,
+		.enable_mask = BIT(3),
+		.hw.init = &(struct clk_init_data){
+			.name = "gcc_ce1_ahb_clk",
+			.ops = &clk_branch2_ops,
+		},
+	},
+};
+
+static struct clk_branch gcc_ce1_axi_clk = {
+	.halt_reg = 0x2b008,
+	.halt_check = BRANCH_HALT_VOTED,
+	.clkr = {
+		.enable_reg = 0x52008,
+		.enable_mask = BIT(2),
+		.hw.init = &(struct clk_init_data){
+			.name = "gcc_ce1_axi_clk",
+			.ops = &clk_branch2_ops,
+		},
+	},
+};
+
+static struct clk_branch gcc_ce1_clk = {
+	.halt_reg = 0x2b004,
+	.halt_check = BRANCH_HALT_VOTED,
+	.clkr = {
+		.enable_reg = 0x52008,
+		.enable_mask = BIT(1),
+		.hw.init = &(struct clk_init_data){
+			.name = "gcc_ce1_clk",
+			.ops = &clk_branch2_ops,
+		},
+	},
+};
+
+static struct clk_branch gcc_cfg_noc_usb3_prim_axi_clk = {
+	.halt_reg = 0x1101c,
+	.halt_check = BRANCH_HALT,
+	.hwcg_reg = 0x1101c,
+	.hwcg_bit = 1,
+	.clkr = {
+		.enable_reg = 0x1101c,
+		.enable_mask = BIT(0),
+		.hw.init = &(struct clk_init_data){
+			.name = "gcc_cfg_noc_usb3_prim_axi_clk",
+			.parent_hws = (const struct clk_hw*[]){
+				&gcc_usb30_prim_master_clk_src.clkr.hw,
+			},
+			.num_parents = 1,
+			.flags = CLK_SET_RATE_PARENT,
+			.ops = &clk_branch2_ops,
+		},
+	},
+};
+
+static struct clk_branch gcc_cpuss_ahb_clk = {
+	.halt_reg = 0x30000,
+	.halt_check = BRANCH_HALT_VOTED,
+	.hwcg_reg = 0x30000,
+	.hwcg_bit = 1,
+	.clkr = {
+		.enable_reg = 0x52008,
+		.enable_mask = BIT(4),
+		.hw.init = &(struct clk_init_data){
+			.name = "gcc_cpuss_ahb_clk",
+			.parent_hws = (const struct clk_hw*[]){
+				&gcc_cpuss_ahb_clk_src.clkr.hw,
+			},
+			.num_parents = 1,
+			.flags = CLK_IS_CRITICAL | CLK_SET_RATE_PARENT,
+			.ops = &clk_branch2_ops,
+		},
+	},
+};
+
+static struct clk_branch gcc_cpuss_gnoc_clk = {
+	.halt_reg = 0x30004,
+	.halt_check = BRANCH_HALT_VOTED,
+	.hwcg_reg = 0x30004,
+	.hwcg_bit = 1,
+	.clkr = {
+		.enable_reg = 0x52008,
+		.enable_mask = BIT(5),
+		.hw.init = &(struct clk_init_data){
+			.name = "gcc_cpuss_gnoc_clk",
+			.flags = CLK_IS_CRITICAL,
+			.ops = &clk_branch2_ops,
+		},
+	},
+};
+
+static struct clk_branch gcc_cpuss_rbcpr_clk = {
+	.halt_reg = 0x30008,
+	.halt_check = BRANCH_HALT,
+	.clkr = {
+		.enable_reg = 0x30008,
+		.enable_mask = BIT(0),
+		.hw.init = &(struct clk_init_data){
+			.name = "gcc_cpuss_rbcpr_clk",
+			.ops = &clk_branch2_ops,
+		},
+	},
+};
+
+static struct clk_branch gcc_ddrss_gpu_axi_clk = {
+	.halt_reg = 0x2d038,
+	.halt_check = BRANCH_VOTED,
+	.hwcg_reg = 0x2d038,
+	.hwcg_bit = 1,
+	.clkr = {
+		.enable_reg = 0x2d038,
+		.enable_mask = BIT(0),
+		.hw.init = &(struct clk_init_data){
+			.name = "gcc_ddrss_gpu_axi_clk",
+			.ops = &clk_branch2_ops,
+		},
+	},
+};
+
+static struct clk_branch gcc_disp_ahb_clk = {
+	.halt_reg = 0x1700c,
+	.halt_check = BRANCH_HALT,
+	.hwcg_reg = 0x1700c,
+	.hwcg_bit = 1,
+	.clkr = {
+		.enable_reg = 0x1700c,
+		.enable_mask = BIT(0),
+		.hw.init = &(struct clk_init_data){
+			.name = "gcc_disp_ahb_clk",
+			.flags = CLK_IS_CRITICAL,
+			.ops = &clk_branch2_ops,
+		},
+	},
+};
+
+static struct clk_branch gcc_disp_axi_clk = {
+	.halt_reg = 0x1701c,
+	.halt_check = BRANCH_HALT,
+	.hwcg_reg = 0x1701c,
+	.hwcg_bit = 1,
+	.clkr = {
+		.enable_reg = 0x1701c,
+		.enable_mask = BIT(0),
+		.hw.init = &(struct clk_init_data){
+			.name = "gcc_disp_axi_clk",
+			.ops = &clk_branch2_ops,
+		},
+	},
+};
+
+static struct clk_branch gcc_disp_cc_sleep_clk = {
+	.halt_reg = 0x17074,
+	.halt_check = BRANCH_HALT_DELAY,
+	.hwcg_reg = 0x17074,
+	.hwcg_bit = 1,
+	.clkr = {
+		.enable_reg = 0x17074,
+		.enable_mask = BIT(0),
+		.hw.init = &(struct clk_init_data){
+			.name = "gcc_disp_cc_sleep_clk",
+			.ops = &clk_branch2_ops,
+		},
+	},
+};
+
+static struct clk_branch gcc_disp_cc_xo_clk = {
+	.halt_reg = 0x17070,
+	.halt_check = BRANCH_HALT,
+	.hwcg_reg = 0x17070,
+	.hwcg_bit = 1,
+	.clkr = {
+		.enable_reg = 0x17070,
+		.enable_mask = BIT(0),
+		.hw.init = &(struct clk_init_data){
+			.name = "gcc_disp_cc_xo_clk",
+			.flags = CLK_IS_CRITICAL,
+			.ops = &clk_branch2_ops,
+		},
+	},
+};
+
+static struct clk_branch gcc_disp_gpll0_clk = {
+	.halt_check = BRANCH_HALT_DELAY,
+	.clkr = {
+		.enable_reg = 0x52000,
+		.enable_mask = BIT(2),
+		.hw.init = &(struct clk_init_data){
+			.name = "gcc_disp_gpll0_clk",
+			.parent_hws = (const struct clk_hw*[]){
+				&gpll0.clkr.hw,
+			},
+			.num_parents = 1,
+			.ops = &clk_branch2_ops,
+		},
+	},
+};
+
+static struct clk_branch gcc_disp_throttle_axi_clk = {
+	.halt_reg = 0x17028,
+	.halt_check = BRANCH_HALT,
+	.hwcg_reg = 0x17028,
+	.hwcg_bit = 1,
+	.clkr = {
+		.enable_reg = 0x17028,
+		.enable_mask = BIT(0),
+		.hw.init = &(struct clk_init_data){
+			.name = "gcc_disp_throttle_axi_clk",
+			.ops = &clk_branch2_ops,
+		},
+	},
+};
+
+static struct clk_branch gcc_disp_xo_clk = {
+	.halt_reg = 0x17034,
+	.halt_check = BRANCH_HALT,
+	.clkr = {
+		.enable_reg = 0x17034,
+		.enable_mask = BIT(0),
+		.hw.init = &(struct clk_init_data){
+			.name = "gcc_disp_xo_clk",
+			.ops = &clk_branch2_ops,
+		},
+	},
+};
+
+static struct clk_branch gcc_gp1_clk = {
+	.halt_reg = 0x37000,
+	.halt_check = BRANCH_HALT,
+	.clkr = {
+		.enable_reg = 0x37000,
+		.enable_mask = BIT(0),
+		.hw.init = &(struct clk_init_data){
+			.name = "gcc_gp1_clk",
+			.parent_hws = (const struct clk_hw*[]){
+				&gcc_gp1_clk_src.clkr.hw,
+			},
+			.num_parents = 1,
+			.flags = CLK_SET_RATE_PARENT,
+			.ops = &clk_branch2_ops,
+		},
+	},
+};
+
+static struct clk_branch gcc_gp2_clk = {
+	.halt_reg = 0x38000,
+	.halt_check = BRANCH_HALT,
+	.clkr = {
+		.enable_reg = 0x38000,
+		.enable_mask = BIT(0),
+		.hw.init = &(struct clk_init_data){
+			.name = "gcc_gp2_clk",
+			.parent_hws = (const struct clk_hw*[]){
+				&gcc_gp2_clk_src.clkr.hw,
+			},
+			.num_parents = 1,
+			.flags = CLK_SET_RATE_PARENT,
+			.ops = &clk_branch2_ops,
+		},
+	},
+};
+
+static struct clk_branch gcc_gp3_clk = {
+	.halt_reg = 0x39000,
+	.halt_check = BRANCH_HALT,
+	.clkr = {
+		.enable_reg = 0x39000,
+		.enable_mask = BIT(0),
+		.hw.init = &(struct clk_init_data){
+			.name = "gcc_gp3_clk",
+			.parent_hws = (const struct clk_hw*[]){
+				&gcc_gp3_clk_src.clkr.hw,
+			},
+			.num_parents = 1,
+			.flags = CLK_SET_RATE_PARENT,
+			.ops = &clk_branch2_ops,
+		},
+	},
+};
+
+static struct clk_branch gcc_gpu_cfg_ahb_clk = {
+	.halt_reg = 0x45004,
+	.halt_check = BRANCH_HALT,
+	.hwcg_reg = 0x45004,
+	.hwcg_bit = 1,
+	.clkr = {
+		.enable_reg = 0x45004,
+		.enable_mask = BIT(0),
+		.hw.init = &(struct clk_init_data){
+			.name = "gcc_gpu_cfg_ahb_clk",
+			.flags = CLK_IS_CRITICAL,
+			.ops = &clk_branch2_ops,
+		},
+	},
+};
+
+static struct clk_branch gcc_gpu_gpll0_clk = {
+	.halt_check = BRANCH_HALT_DELAY,
+	.clkr = {
+		.enable_reg = 0x52008,
+		.enable_mask = BIT(7),
+		.hw.init = &(struct clk_init_data){
+			.name = "gcc_gpu_gpll0_clk",
+			.parent_hws = (const struct clk_hw*[]){
+				&gpll0.clkr.hw,
+			},
+			.num_parents = 1,
+			.ops = &clk_branch2_ops,
+		},
+	},
+};
+
+static struct clk_branch gcc_gpu_gpll0_div_clk = {
+	.halt_check = BRANCH_HALT_DELAY,
+	.clkr = {
+		.enable_reg = 0x52008,
+		.enable_mask = BIT(8),
+		.hw.init = &(struct clk_init_data){
+			.name = "gcc_gpu_gpll0_div_clk",
+			.parent_hws = (const struct clk_hw*[]){
+				&gcc_gpu_gpll0_main_div_clk_src.clkr.hw,
+			},
+			.num_parents = 1,
+			.ops = &clk_branch2_ops,
+		},
+	},
+};
+
+static struct clk_branch gcc_gpu_memnoc_gfx_clk = {
+	.halt_reg = 0x4500c,
+	.halt_check = BRANCH_VOTED,
+	.hwcg_reg = 0x4500c,
+	.hwcg_bit = 1,
+	.clkr = {
+		.enable_reg = 0x4500c,
+		.enable_mask = BIT(0),
+		.hw.init = &(struct clk_init_data){
+			.name = "gcc_gpu_memnoc_gfx_clk",
+			.ops = &clk_branch2_ops,
+		},
+	},
+};
+
+static struct clk_branch gcc_gpu_snoc_dvm_gfx_clk = {
+	.halt_reg = 0x45014,
+	.halt_check = BRANCH_HALT,
+	.hwcg_reg = 0x45014,
+	.hwcg_bit = 1,
+	.clkr = {
+		.enable_reg = 0x45014,
+		.enable_mask = BIT(0),
+		.hw.init = &(struct clk_init_data){
+			.name = "gcc_gpu_snoc_dvm_gfx_clk",
+			.ops = &clk_branch2_ops,
+		},
+	},
+};
+
+static struct clk_branch gcc_npu_axi_clk = {
+	.halt_reg = 0x4c008,
+	.halt_check = BRANCH_VOTED,
+	.hwcg_reg = 0x4c008,
+	.hwcg_bit = 1,
+	.clkr = {
+		.enable_reg = 0x4c008,
+		.enable_mask = BIT(0),
+		.hw.init = &(struct clk_init_data){
+			.name = "gcc_npu_axi_clk",
+			.ops = &clk_branch2_ops,
+		},
+	},
+};
+
+static struct clk_branch gcc_npu_bwmon_axi_clk = {
+	.halt_reg = 0x4d004,
+	.halt_check = BRANCH_HALT_DELAY,
+	.hwcg_reg = 0x4d004,
+	.hwcg_bit = 1,
+	.clkr = {
+		.enable_reg = 0x4d004,
+		.enable_mask = BIT(0),
+		.hw.init = &(struct clk_init_data){
+			.name = "gcc_npu_bwmon_axi_clk",
+			.ops = &clk_branch2_ops,
+		},
+	},
+};
+
+static struct clk_branch gcc_npu_bwmon_dma_cfg_ahb_clk = {
+	.halt_reg = 0x4d008,
+	.halt_check = BRANCH_HALT,
+	.clkr = {
+		.enable_reg = 0x4d008,
+		.enable_mask = BIT(0),
+		.hw.init = &(struct clk_init_data){
+			.name = "gcc_npu_bwmon_dma_cfg_ahb_clk",
+			.ops = &clk_branch2_ops,
+		},
+	},
+};
+
+static struct clk_branch gcc_npu_bwmon_dsp_cfg_ahb_clk = {
+	.halt_reg = 0x4d00c,
+	.halt_check = BRANCH_HALT,
+	.clkr = {
+		.enable_reg = 0x4d00c,
+		.enable_mask = BIT(0),
+		.hw.init = &(struct clk_init_data){
+			.name = "gcc_npu_bwmon_dsp_cfg_ahb_clk",
+			.ops = &clk_branch2_ops,
+		},
+	},
+};
+
+static struct clk_branch gcc_npu_cfg_ahb_clk = {
+	.halt_reg = 0x4c004,
+	.halt_check = BRANCH_HALT,
+	.hwcg_reg = 0x4c004,
+	.hwcg_bit = 1,
+	.clkr = {
+		.enable_reg = 0x4c004,
+		.enable_mask = BIT(0),
+		.hw.init = &(struct clk_init_data){
+			.name = "gcc_npu_cfg_ahb_clk",
+			.flags = CLK_IS_CRITICAL,
+			.ops = &clk_branch2_ops,
+		},
+	},
+};
+
+static struct clk_branch gcc_npu_dma_clk = {
+	.halt_reg = 0x4c140,
+	.halt_check = BRANCH_VOTED,
+	.hwcg_reg = 0x4c140,
+	.hwcg_bit = 1,
+	.clkr = {
+		.enable_reg = 0x4c140,
+		.enable_mask = BIT(0),
+		.hw.init = &(struct clk_init_data){
+			.name = "gcc_npu_dma_clk",
+			.ops = &clk_branch2_ops,
+		},
+	},
+};
+
+static struct clk_branch gcc_npu_gpll0_clk = {
+	.halt_check = BRANCH_HALT_DELAY,
+	.clkr = {
+		.enable_reg = 0x52008,
+		.enable_mask = BIT(9),
+		.hw.init = &(struct clk_init_data){
+			.name = "gcc_npu_gpll0_clk",
+			.parent_hws = (const struct clk_hw*[]){
+				&gpll0.clkr.hw,
+			},
+			.num_parents = 1,
+			.ops = &clk_branch2_ops,
+		},
+	},
+};
+
+static struct clk_branch gcc_npu_gpll0_div_clk = {
+	.halt_check = BRANCH_HALT_DELAY,
+	.clkr = {
+		.enable_reg = 0x52008,
+		.enable_mask = BIT(10),
+		.hw.init = &(struct clk_init_data){
+			.name = "gcc_npu_gpll0_div_clk",
+			.parent_hws = (const struct clk_hw*[]){
+				&gcc_npu_pll0_main_div_clk_src.clkr.hw,
+			},
+			.num_parents = 1,
+			.ops = &clk_branch2_ops,
+		},
+	},
+};
+
+static struct clk_branch gcc_pdm2_clk = {
+	.halt_reg = 0x2300c,
+	.halt_check = BRANCH_HALT,
+	.clkr = {
+		.enable_reg = 0x2300c,
+		.enable_mask = BIT(0),
+		.hw.init = &(struct clk_init_data){
+			.name = "gcc_pdm2_clk",
+			.parent_hws = (const struct clk_hw*[]){
+				&gcc_pdm2_clk_src.clkr.hw,
+			},
+			.num_parents = 1,
+			.flags = CLK_SET_RATE_PARENT,
+			.ops = &clk_branch2_ops,
+		},
+	},
+};
+
+static struct clk_branch gcc_pdm_ahb_clk = {
+	.halt_reg = 0x23004,
+	.halt_check = BRANCH_HALT,
+	.hwcg_reg = 0x23004,
+	.hwcg_bit = 1,
+	.clkr = {
+		.enable_reg = 0x23004,
+		.enable_mask = BIT(0),
+		.hw.init = &(struct clk_init_data){
+			.name = "gcc_pdm_ahb_clk",
+			.ops = &clk_branch2_ops,
+		},
+	},
+};
+
+static struct clk_branch gcc_pdm_xo4_clk = {
+	.halt_reg = 0x23008,
+	.halt_check = BRANCH_HALT,
+	.clkr = {
+		.enable_reg = 0x23008,
+		.enable_mask = BIT(0),
+		.hw.init = &(struct clk_init_data){
+			.name = "gcc_pdm_xo4_clk",
+			.ops = &clk_branch2_ops,
+		},
+	},
+};
+
+static struct clk_branch gcc_prng_ahb_clk = {
+	.halt_reg = 0x24004,
+	.halt_check = BRANCH_HALT_VOTED,
+	.hwcg_reg = 0x24004,
+	.hwcg_bit = 1,
+	.clkr = {
+		.enable_reg = 0x52000,
+		.enable_mask = BIT(26),
+		.hw.init = &(struct clk_init_data){
+			.name = "gcc_prng_ahb_clk",
+			.ops = &clk_branch2_ops,
+		},
+	},
+};
+
+static struct clk_branch gcc_qupv3_wrap0_core_2x_clk = {
+	.halt_reg = 0x21014,
+	.halt_check = BRANCH_HALT_VOTED,
+	.clkr = {
+		.enable_reg = 0x52000,
+		.enable_mask = BIT(9),
+		.hw.init = &(struct clk_init_data){
+			.name = "gcc_qupv3_wrap0_core_2x_clk",
+			.ops = &clk_branch2_ops,
+		},
+	},
+};
+
+static struct clk_branch gcc_qupv3_wrap0_core_clk = {
+	.halt_reg = 0x2100c,
+	.halt_check = BRANCH_HALT_VOTED,
+	.clkr = {
+		.enable_reg = 0x52000,
+		.enable_mask = BIT(8),
+		.hw.init = &(struct clk_init_data){
+			.name = "gcc_qupv3_wrap0_core_clk",
+			.ops = &clk_branch2_ops,
+		},
+	},
+};
+
+static struct clk_branch gcc_qupv3_wrap0_s0_clk = {
+	.halt_reg = 0x21144,
+	.halt_check = BRANCH_HALT_VOTED,
+	.clkr = {
+		.enable_reg = 0x52000,
+		.enable_mask = BIT(10),
+		.hw.init = &(struct clk_init_data){
+			.name = "gcc_qupv3_wrap0_s0_clk",
+			.parent_hws = (const struct clk_hw*[]){
+				&gcc_qupv3_wrap0_s0_clk_src.clkr.hw,
+			},
+			.num_parents = 1,
+			.flags = CLK_SET_RATE_PARENT,
+			.ops = &clk_branch2_ops,
+		},
+	},
+};
+
+static struct clk_branch gcc_qupv3_wrap0_s1_clk = {
+	.halt_reg = 0x21274,
+	.halt_check = BRANCH_HALT_VOTED,
+	.clkr = {
+		.enable_reg = 0x52000,
+		.enable_mask = BIT(11),
+		.hw.init = &(struct clk_init_data){
+			.name = "gcc_qupv3_wrap0_s1_clk",
+			.parent_hws = (const struct clk_hw*[]){
+				&gcc_qupv3_wrap0_s1_clk_src.clkr.hw,
+			},
+			.num_parents = 1,
+			.flags = CLK_SET_RATE_PARENT,
+			.ops = &clk_branch2_ops,
+		},
+	},
+};
+
+static struct clk_branch gcc_qupv3_wrap0_s2_clk = {
+	.halt_reg = 0x213a4,
+	.halt_check = BRANCH_HALT_VOTED,
+	.clkr = {
+		.enable_reg = 0x52000,
+		.enable_mask = BIT(12),
+		.hw.init = &(struct clk_init_data){
+			.name = "gcc_qupv3_wrap0_s2_clk",
+			.parent_hws = (const struct clk_hw*[]){
+				&gcc_qupv3_wrap0_s2_clk_src.clkr.hw,
+			},
+			.num_parents = 1,
+			.flags = CLK_SET_RATE_PARENT,
+			.ops = &clk_branch2_ops,
+		},
+	},
+};
+
+static struct clk_branch gcc_qupv3_wrap0_s3_clk = {
+	.halt_reg = 0x214d4,
+	.halt_check = BRANCH_HALT_VOTED,
+	.clkr = {
+		.enable_reg = 0x52000,
+		.enable_mask = BIT(13),
+		.hw.init = &(struct clk_init_data){
+			.name = "gcc_qupv3_wrap0_s3_clk",
+			.parent_hws = (const struct clk_hw*[]){
+				&gcc_qupv3_wrap0_s3_clk_src.clkr.hw,
+			},
+			.num_parents = 1,
+			.flags = CLK_SET_RATE_PARENT,
+			.ops = &clk_branch2_ops,
+		},
+	},
+};
+
+static struct clk_branch gcc_qupv3_wrap0_s4_clk = {
+	.halt_reg = 0x21604,
+	.halt_check = BRANCH_HALT_VOTED,
+	.clkr = {
+		.enable_reg = 0x52000,
+		.enable_mask = BIT(14),
+		.hw.init = &(struct clk_init_data){
+			.name = "gcc_qupv3_wrap0_s4_clk",
+			.parent_hws = (const struct clk_hw*[]){
+				&gcc_qupv3_wrap0_s4_clk_src.clkr.hw,
+			},
+			.num_parents = 1,
+			.flags = CLK_SET_RATE_PARENT,
+			.ops = &clk_branch2_ops,
+		},
+	},
+};
+
+static struct clk_branch gcc_qupv3_wrap0_s5_clk = {
+	.halt_reg = 0x21734,
+	.halt_check = BRANCH_HALT_VOTED,
+	.clkr = {
+		.enable_reg = 0x52000,
+		.enable_mask = BIT(15),
+		.hw.init = &(struct clk_init_data){
+			.name = "gcc_qupv3_wrap0_s5_clk",
+			.parent_hws = (const struct clk_hw*[]){
+				&gcc_qupv3_wrap0_s5_clk_src.clkr.hw,
+			},
+			.num_parents = 1,
+			.flags = CLK_SET_RATE_PARENT,
+			.ops = &clk_branch2_ops,
+		},
+	},
+};
+
+static struct clk_branch gcc_qupv3_wrap1_core_2x_clk = {
+	.halt_reg = 0x22004,
+	.halt_check = BRANCH_HALT_VOTED,
+	.clkr = {
+		.enable_reg = 0x52000,
+		.enable_mask = BIT(16),
+		.hw.init = &(struct clk_init_data){
+			.name = "gcc_qupv3_wrap1_core_2x_clk",
+			.ops = &clk_branch2_ops,
+		},
+	},
+};
+
+static struct clk_branch gcc_qupv3_wrap1_core_clk = {
+	.halt_reg = 0x22008,
+	.halt_check = BRANCH_HALT_VOTED,
+	.clkr = {
+		.enable_reg = 0x52000,
+		.enable_mask = BIT(17),
+		.hw.init = &(struct clk_init_data){
+			.name = "gcc_qupv3_wrap1_core_clk",
+			.ops = &clk_branch2_ops,
+		},
+	},
+};
+
+static struct clk_branch gcc_qupv3_wrap1_s0_clk = {
+	.halt_reg = 0x22014,
+	.halt_check = BRANCH_HALT_VOTED,
+	.clkr = {
+		.enable_reg = 0x52000,
+		.enable_mask = BIT(20),
+		.hw.init = &(struct clk_init_data){
+			.name = "gcc_qupv3_wrap1_s0_clk",
+			.parent_hws = (const struct clk_hw*[]){
+				&gcc_qupv3_wrap1_s0_clk_src.clkr.hw,
+			},
+			.num_parents = 1,
+			.flags = CLK_SET_RATE_PARENT,
+			.ops = &clk_branch2_ops,
+		},
+	},
+};
+
+static struct clk_branch gcc_qupv3_wrap1_s1_clk = {
+	.halt_reg = 0x22144,
+	.halt_check = BRANCH_HALT_VOTED,
+	.clkr = {
+		.enable_reg = 0x52000,
+		.enable_mask = BIT(21),
+		.hw.init = &(struct clk_init_data){
+			.name = "gcc_qupv3_wrap1_s1_clk",
+			.parent_hws = (const struct clk_hw*[]){
+				&gcc_qupv3_wrap1_s1_clk_src.clkr.hw,
+			},
+			.num_parents = 1,
+			.flags = CLK_SET_RATE_PARENT,
+			.ops = &clk_branch2_ops,
+		},
+	},
+};
+
+static struct clk_branch gcc_qupv3_wrap1_s2_clk = {
+	.halt_reg = 0x22274,
+	.halt_check = BRANCH_HALT_VOTED,
+	.clkr = {
+		.enable_reg = 0x52000,
+		.enable_mask = BIT(22),
+		.hw.init = &(struct clk_init_data){
+			.name = "gcc_qupv3_wrap1_s2_clk",
+			.parent_hws = (const struct clk_hw*[]){
+				&gcc_qupv3_wrap1_s2_clk_src.clkr.hw,
+			},
+			.num_parents = 1,
+			.flags = CLK_SET_RATE_PARENT,
+			.ops = &clk_branch2_ops,
+		},
+	},
+};
+
+static struct clk_branch gcc_qupv3_wrap1_s3_clk = {
+	.halt_reg = 0x223a4,
+	.halt_check = BRANCH_HALT_VOTED,
+	.clkr = {
+		.enable_reg = 0x52000,
+		.enable_mask = BIT(23),
+		.hw.init = &(struct clk_init_data){
+			.name = "gcc_qupv3_wrap1_s3_clk",
+			.parent_hws = (const struct clk_hw*[]){
+				&gcc_qupv3_wrap1_s3_clk_src.clkr.hw,
+			},
+			.num_parents = 1,
+			.flags = CLK_SET_RATE_PARENT,
+			.ops = &clk_branch2_ops,
+		},
+	},
+};
+
+static struct clk_branch gcc_qupv3_wrap1_s4_clk = {
+	.halt_reg = 0x224d4,
+	.halt_check = BRANCH_HALT_VOTED,
+	.clkr = {
+		.enable_reg = 0x52000,
+		.enable_mask = BIT(24),
+		.hw.init = &(struct clk_init_data){
+			.name = "gcc_qupv3_wrap1_s4_clk",
+			.parent_hws = (const struct clk_hw*[]){
+				&gcc_qupv3_wrap1_s4_clk_src.clkr.hw,
+			},
+			.num_parents = 1,
+			.flags = CLK_SET_RATE_PARENT,
+			.ops = &clk_branch2_ops,
+		},
+	},
+};
+
+static struct clk_branch gcc_qupv3_wrap1_s5_clk = {
+	.halt_reg = 0x22604,
+	.halt_check = BRANCH_HALT_VOTED,
+	.clkr = {
+		.enable_reg = 0x52000,
+		.enable_mask = BIT(25),
+		.hw.init = &(struct clk_init_data){
+			.name = "gcc_qupv3_wrap1_s5_clk",
+			.parent_hws = (const struct clk_hw*[]){
+				&gcc_qupv3_wrap1_s5_clk_src.clkr.hw,
+			},
+			.num_parents = 1,
+			.flags = CLK_SET_RATE_PARENT,
+			.ops = &clk_branch2_ops,
+		},
+	},
+};
+
+static struct clk_branch gcc_qupv3_wrap_0_m_ahb_clk = {
+	.halt_reg = 0x21004,
+	.halt_check = BRANCH_HALT_VOTED,
+	.hwcg_reg = 0x21004,
+	.hwcg_bit = 1,
+	.clkr = {
+		.enable_reg = 0x52000,
+		.enable_mask = BIT(6),
+		.hw.init = &(struct clk_init_data){
+			.name = "gcc_qupv3_wrap_0_m_ahb_clk",
+			.ops = &clk_branch2_ops,
+		},
+	},
+};
+
+static struct clk_branch gcc_qupv3_wrap_0_s_ahb_clk = {
+	.halt_reg = 0x21008,
+	.halt_check = BRANCH_HALT_VOTED,
+	.hwcg_reg = 0x21008,
+	.hwcg_bit = 1,
+	.clkr = {
+		.enable_reg = 0x52000,
+		.enable_mask = BIT(7),
+		.hw.init = &(struct clk_init_data){
+			.name = "gcc_qupv3_wrap_0_s_ahb_clk",
+			.ops = &clk_branch2_ops,
+		},
+	},
+};
+
+static struct clk_branch gcc_qupv3_wrap_1_m_ahb_clk = {
+	.halt_reg = 0x2200c,
+	.halt_check = BRANCH_HALT_VOTED,
+	.hwcg_reg = 0x2200c,
+	.hwcg_bit = 1,
+	.clkr = {
+		.enable_reg = 0x52000,
+		.enable_mask = BIT(18),
+		.hw.init = &(struct clk_init_data){
+			.name = "gcc_qupv3_wrap_1_m_ahb_clk",
+			.ops = &clk_branch2_ops,
+		},
+	},
+};
+
+static struct clk_branch gcc_qupv3_wrap_1_s_ahb_clk = {
+	.halt_reg = 0x22010,
+	.halt_check = BRANCH_HALT_VOTED,
+	.hwcg_reg = 0x22010,
+	.hwcg_bit = 1,
+	.clkr = {
+		.enable_reg = 0x52000,
+		.enable_mask = BIT(19),
+		.hw.init = &(struct clk_init_data){
+			.name = "gcc_qupv3_wrap_1_s_ahb_clk",
+			.ops = &clk_branch2_ops,
+		},
+	},
+};
+
+static struct clk_branch gcc_sdcc1_ahb_clk = {
+	.halt_reg = 0x4b004,
+	.halt_check = BRANCH_HALT,
+	.clkr = {
+		.enable_reg = 0x4b004,
+		.enable_mask = BIT(0),
+		.hw.init = &(struct clk_init_data){
+			.name = "gcc_sdcc1_ahb_clk",
+			.ops = &clk_branch2_ops,
+		},
+	},
+};
+
+static struct clk_branch gcc_sdcc1_apps_clk = {
+	.halt_reg = 0x4b008,
+	.halt_check = BRANCH_HALT,
+	.clkr = {
+		.enable_reg = 0x4b008,
+		.enable_mask = BIT(0),
+		.hw.init = &(struct clk_init_data){
+			.name = "gcc_sdcc1_apps_clk",
+			.parent_hws = (const struct clk_hw*[]){
+				&gcc_sdcc1_apps_clk_src.clkr.hw,
+			},
+			.num_parents = 1,
+			.flags = CLK_SET_RATE_PARENT,
+			.ops = &clk_branch2_ops,
+		},
+	},
+};
+
+static struct clk_branch gcc_sdcc1_ice_core_clk = {
+	.halt_reg = 0x4b03c,
+	.halt_check = BRANCH_HALT,
+	.hwcg_reg = 0x4b03c,
+	.hwcg_bit = 1,
+	.clkr = {
+		.enable_reg = 0x4b03c,
+		.enable_mask = BIT(0),
+		.hw.init = &(struct clk_init_data){
+			.name = "gcc_sdcc1_ice_core_clk",
+			.parent_hws = (const struct clk_hw*[]){
+				&gcc_sdcc1_ice_core_clk_src.clkr.hw,
+			},
+			.num_parents = 1,
+			.flags = CLK_SET_RATE_PARENT,
+			.ops = &clk_branch2_ops,
+		},
+	},
+};
+
+static struct clk_branch gcc_sdcc2_ahb_clk = {
+	.halt_reg = 0x20008,
+	.halt_check = BRANCH_HALT,
+	.clkr = {
+		.enable_reg = 0x20008,
+		.enable_mask = BIT(0),
+		.hw.init = &(struct clk_init_data){
+			.name = "gcc_sdcc2_ahb_clk",
+			.ops = &clk_branch2_ops,
+		},
+	},
+};
+
+static struct clk_branch gcc_sdcc2_apps_clk = {
+	.halt_reg = 0x20004,
+	.halt_check = BRANCH_HALT,
+	.clkr = {
+		.enable_reg = 0x20004,
+		.enable_mask = BIT(0),
+		.hw.init = &(struct clk_init_data){
+			.name = "gcc_sdcc2_apps_clk",
+			.parent_hws = (const struct clk_hw*[]){
+				&gcc_sdcc2_apps_clk_src.clkr.hw,
+			},
+			.num_parents = 1,
+			.flags = CLK_SET_RATE_PARENT,
+			.ops = &clk_branch2_ops,
+		},
+	},
+};
+
+static struct clk_branch gcc_sys_noc_cpuss_ahb_clk = {
+	.halt_reg = 0x10140,
+	.halt_check = BRANCH_HALT_VOTED,
+	.hwcg_reg = 0x10140,
+	.hwcg_bit = 1,
+	.clkr = {
+		.enable_reg = 0x52000,
+		.enable_mask = BIT(0),
+		.hw.init = &(struct clk_init_data){
+			.name = "gcc_sys_noc_cpuss_ahb_clk",
+			.parent_hws = (const struct clk_hw*[]){
+				&gcc_cpuss_ahb_clk_src.clkr.hw,
+			},
+			.num_parents = 1,
+			.flags = CLK_IS_CRITICAL | CLK_SET_RATE_PARENT,
+			.ops = &clk_branch2_ops,
+		},
+	},
+};
+
+static struct clk_branch gcc_ufs_mem_clkref_clk = {
+	.halt_reg = 0x8c000,
+	.halt_check = BRANCH_HALT,
+	.clkr = {
+		.enable_reg = 0x8c000,
+		.enable_mask = BIT(0),
+		.hw.init = &(struct clk_init_data){
+			.name = "gcc_ufs_mem_clkref_clk",
+			.ops = &clk_branch2_ops,
+		},
+	},
+};
+
+static struct clk_branch gcc_ufs_phy_ahb_clk = {
+	.halt_reg = 0x3a00c,
+	.halt_check = BRANCH_HALT,
+	.hwcg_reg = 0x3a00c,
+	.hwcg_bit = 1,
+	.clkr = {
+		.enable_reg = 0x3a00c,
+		.enable_mask = BIT(0),
+		.hw.init = &(struct clk_init_data){
+			.name = "gcc_ufs_phy_ahb_clk",
+			.ops = &clk_branch2_ops,
+		},
+	},
+};
+
+static struct clk_branch gcc_ufs_phy_axi_clk = {
+	.halt_reg = 0x3a034,
+	.halt_check = BRANCH_HALT,
+	.hwcg_reg = 0x3a034,
+	.hwcg_bit = 1,
+	.clkr = {
+		.enable_reg = 0x3a034,
+		.enable_mask = BIT(0),
+		.hw.init = &(struct clk_init_data){
+			.name = "gcc_ufs_phy_axi_clk",
+			.parent_hws = (const struct clk_hw*[]){
+				&gcc_ufs_phy_axi_clk_src.clkr.hw,
+			},
+			.num_parents = 1,
+			.flags = CLK_SET_RATE_PARENT,
+			.ops = &clk_branch2_ops,
+		},
+	},
+};
+
+static struct clk_branch gcc_ufs_phy_ice_core_clk = {
+	.halt_reg = 0x3a0a4,
+	.halt_check = BRANCH_HALT,
+	.hwcg_reg = 0x3a0a4,
+	.hwcg_bit = 1,
+	.clkr = {
+		.enable_reg = 0x3a0a4,
+		.enable_mask = BIT(0),
+		.hw.init = &(struct clk_init_data){
+			.name = "gcc_ufs_phy_ice_core_clk",
+			.parent_hws = (const struct clk_hw*[]){
+				&gcc_ufs_phy_ice_core_clk_src.clkr.hw,
+			},
+			.num_parents = 1,
+			.flags = CLK_SET_RATE_PARENT,
+			.ops = &clk_branch2_ops,
+		},
+	},
+};
+
+static struct clk_branch gcc_ufs_phy_ice_core_hw_ctl_clk = {
+	.halt_reg = 0x3a0a4,
+	.halt_check = BRANCH_HALT,
+	.hwcg_reg = 0x3a0a4,
+	.hwcg_bit = 1,
+	.clkr = {
+		.enable_reg = 0x3a0a4,
+		.enable_mask = BIT(1),
+		.hw.init = &(struct clk_init_data){
+			.name = "gcc_ufs_phy_ice_core_hw_ctl_clk",
+			.parent_hws = (const struct clk_hw*[]){
+				&gcc_ufs_phy_ice_core_clk_src.clkr.hw,
+			},
+			.num_parents = 1,
+			.flags = CLK_SET_RATE_PARENT,
+			.ops = &clk_branch2_ops,
+		},
+	},
+};
+
+static struct clk_branch gcc_ufs_phy_phy_aux_clk = {
+	.halt_reg = 0x3a0ac,
+	.halt_check = BRANCH_HALT,
+	.hwcg_reg = 0x3a0ac,
+	.hwcg_bit = 1,
+	.clkr = {
+		.enable_reg = 0x3a0ac,
+		.enable_mask = BIT(0),
+		.hw.init = &(struct clk_init_data){
+			.name = "gcc_ufs_phy_phy_aux_clk",
+			.parent_hws = (const struct clk_hw*[]){
+				&gcc_ufs_phy_phy_aux_clk_src.clkr.hw,
+			},
+			.num_parents = 1,
+			.flags = CLK_SET_RATE_PARENT,
+			.ops = &clk_branch2_ops,
+		},
+	},
+};
+
+static struct clk_branch gcc_ufs_phy_phy_aux_hw_ctl_clk = {
+	.halt_reg = 0x3a0ac,
+	.halt_check = BRANCH_HALT,
+	.hwcg_reg = 0x3a0ac,
+	.hwcg_bit = 1,
+	.clkr = {
+		.enable_reg = 0x3a0ac,
+		.enable_mask = BIT(1),
+		.hw.init = &(struct clk_init_data){
+			.name = "gcc_ufs_phy_phy_aux_hw_ctl_clk",
+			.parent_hws = (const struct clk_hw*[]){
+				&gcc_ufs_phy_phy_aux_clk_src.clkr.hw,
+			},
+			.num_parents = 1,
+			.flags = CLK_SET_RATE_PARENT,
+			.ops = &clk_branch2_ops,
+		},
+	},
+};
+
+static struct clk_branch gcc_ufs_phy_rx_symbol_0_clk = {
+	.halt_reg = 0x3a014,
+	.halt_check = BRANCH_HALT_SKIP,
+	.clkr = {
+		.enable_reg = 0x3a014,
+		.enable_mask = BIT(0),
+		.hw.init = &(struct clk_init_data){
+			.name = "gcc_ufs_phy_rx_symbol_0_clk",
+			.ops = &clk_branch2_ops,
+		},
+	},
+};
+
+static struct clk_branch gcc_ufs_phy_rx_symbol_1_clk = {
+	.halt_reg = 0x3a018,
+	.halt_check = BRANCH_HALT_SKIP,
+	.clkr = {
+		.enable_reg = 0x3a018,
+		.enable_mask = BIT(0),
+		.hw.init = &(struct clk_init_data){
+			.name = "gcc_ufs_phy_rx_symbol_1_clk",
+			.ops = &clk_branch2_ops,
+		},
+	},
+};
+
+static struct clk_branch gcc_ufs_phy_tx_symbol_0_clk = {
+	.halt_reg = 0x3a010,
+	.halt_check = BRANCH_HALT_SKIP,
+	.clkr = {
+		.enable_reg = 0x3a010,
+		.enable_mask = BIT(0),
+		.hw.init = &(struct clk_init_data){
+			.name = "gcc_ufs_phy_tx_symbol_0_clk",
+			.ops = &clk_branch2_ops,
+		},
+	},
+};
+
+static struct clk_branch gcc_ufs_phy_unipro_core_clk = {
+	.halt_reg = 0x3a09c,
+	.halt_check = BRANCH_HALT,
+	.hwcg_reg = 0x3a09c,
+	.hwcg_bit = 1,
+	.clkr = {
+		.enable_reg = 0x3a09c,
+		.enable_mask = BIT(0),
+		.hw.init = &(struct clk_init_data){
+			.name = "gcc_ufs_phy_unipro_core_clk",
+			.parent_hws = (const struct clk_hw*[]){
+				&gcc_ufs_phy_unipro_core_clk_src.clkr.hw,
+			},
+			.num_parents = 1,
+			.flags = CLK_SET_RATE_PARENT,
+			.ops = &clk_branch2_ops,
+		},
+	},
+};
+
+static struct clk_branch gcc_ufs_phy_unipro_core_hw_ctl_clk = {
+	.halt_reg = 0x3a09c,
+	.halt_check = BRANCH_HALT,
+	.hwcg_reg = 0x3a09c,
+	.hwcg_bit = 1,
+	.clkr = {
+		.enable_reg = 0x3a09c,
+		.enable_mask = BIT(1),
+		.hw.init = &(struct clk_init_data){
+			.name = "gcc_ufs_phy_unipro_core_hw_ctl_clk",
+			.parent_hws = (const struct clk_hw*[]){
+				&gcc_ufs_phy_unipro_core_clk_src.clkr.hw,
+			},
+			.num_parents = 1,
+			.flags = CLK_SET_RATE_PARENT,
+			.ops = &clk_branch2_ops,
+		},
+	},
+};
+
+static struct clk_branch gcc_usb30_prim_master_clk = {
+	.halt_reg = 0x1a00c,
+	.halt_check = BRANCH_HALT,
+	.clkr = {
+		.enable_reg = 0x1a00c,
+		.enable_mask = BIT(0),
+		.hw.init = &(struct clk_init_data){
+			.name = "gcc_usb30_prim_master_clk",
+			.parent_hws = (const struct clk_hw*[]){
+				&gcc_usb30_prim_master_clk_src.clkr.hw,
+			},
+			.num_parents = 1,
+			.flags = CLK_SET_RATE_PARENT,
+			.ops = &clk_branch2_ops,
+		},
+	},
+};
+
+static struct clk_branch gcc_usb30_prim_mock_utmi_clk = {
+	.halt_reg = 0x1a018,
+	.halt_check = BRANCH_HALT,
+	.clkr = {
+		.enable_reg = 0x1a018,
+		.enable_mask = BIT(0),
+		.hw.init = &(struct clk_init_data){
+			.name = "gcc_usb30_prim_mock_utmi_clk",
+			.parent_hws = (const struct clk_hw*[]){
+				&gcc_usb30_prim_mock_utmi_clk_src.clkr.hw,
+			},
+			.num_parents = 1,
+			.flags = CLK_SET_RATE_PARENT,
+			.ops = &clk_branch2_ops,
+		},
+	},
+};
+
+static struct clk_branch gcc_usb30_prim_sleep_clk = {
+	.halt_reg = 0x1a014,
+	.halt_check = BRANCH_HALT,
+	.clkr = {
+		.enable_reg = 0x1a014,
+		.enable_mask = BIT(0),
+		.hw.init = &(struct clk_init_data){
+			.name = "gcc_usb30_prim_sleep_clk",
+			.ops = &clk_branch2_ops,
+		},
+	},
+};
+
+static struct clk_branch gcc_usb3_prim_clkref_clk = {
+	.halt_reg = 0x8c010,
+	.halt_check = BRANCH_HALT,
+	.clkr = {
+		.enable_reg = 0x8c010,
+		.enable_mask = BIT(0),
+		.hw.init = &(struct clk_init_data){
+			.name = "gcc_usb3_prim_clkref_clk",
+			.ops = &clk_branch2_ops,
+		},
+	},
+};
+
+static struct clk_branch gcc_usb3_prim_phy_aux_clk = {
+	.halt_reg = 0x1a050,
+	.halt_check = BRANCH_HALT,
+	.clkr = {
+		.enable_reg = 0x1a050,
+		.enable_mask = BIT(0),
+		.hw.init = &(struct clk_init_data){
+			.name = "gcc_usb3_prim_phy_aux_clk",
+			.parent_hws = (const struct clk_hw*[]){
+				&gcc_usb3_prim_phy_aux_clk_src.clkr.hw,
+			},
+			.num_parents = 1,
+			.flags = CLK_SET_RATE_PARENT,
+			.ops = &clk_branch2_ops,
+		},
+	},
+};
+
+static struct clk_branch gcc_usb3_prim_phy_com_aux_clk = {
+	.halt_reg = 0x1a054,
+	.halt_check = BRANCH_HALT,
+	.clkr = {
+		.enable_reg = 0x1a054,
+		.enable_mask = BIT(0),
+		.hw.init = &(struct clk_init_data){
+			.name = "gcc_usb3_prim_phy_com_aux_clk",
+			.parent_hws = (const struct clk_hw*[]){
+				&gcc_usb3_prim_phy_aux_clk_src.clkr.hw,
+			},
+			.num_parents = 1,
+			.flags = CLK_SET_RATE_PARENT,
+			.ops = &clk_branch2_ops,
+		},
+	},
+};
+
+static struct clk_branch gcc_usb3_prim_phy_pipe_clk = {
+	.halt_reg = 0x1a058,
+	.halt_check = BRANCH_HALT_SKIP,
+	.hwcg_reg = 0x1a058,
+	.hwcg_bit = 1,
+	.clkr = {
+		.enable_reg = 0x1a058,
+		.enable_mask = BIT(0),
+		.hw.init = &(struct clk_init_data){
+			.name = "gcc_usb3_prim_phy_pipe_clk",
+			.ops = &clk_branch2_ops,
+		},
+	},
+};
+
+static struct clk_branch gcc_video_ahb_clk = {
+	.halt_reg = 0x17004,
+	.halt_check = BRANCH_HALT,
+	.hwcg_reg = 0x17004,
+	.hwcg_bit = 1,
+	.clkr = {
+		.enable_reg = 0x17004,
+		.enable_mask = BIT(0),
+		.hw.init = &(struct clk_init_data){
+			.name = "gcc_video_ahb_clk",
+			.flags = CLK_IS_CRITICAL,
+			.ops = &clk_branch2_ops,
+		},
+	},
+};
+
+static struct clk_branch gcc_video_axi_clk = {
+	.halt_reg = 0x17014,
+	.halt_check = BRANCH_HALT,
+	.hwcg_reg = 0x17014,
+	.hwcg_bit = 1,
+	.clkr = {
+		.enable_reg = 0x17014,
+		.enable_mask = BIT(0),
+		.hw.init = &(struct clk_init_data){
+			.name = "gcc_video_axi_clk",
+			.ops = &clk_branch2_ops,
+		},
+	},
+};
+
+static struct clk_branch gcc_video_throttle_axi_clk = {
+	.halt_reg = 0x17020,
+	.halt_check = BRANCH_HALT,
+	.hwcg_reg = 0x17020,
+	.hwcg_bit = 1,
+	.clkr = {
+		.enable_reg = 0x17020,
+		.enable_mask = BIT(0),
+		.hw.init = &(struct clk_init_data){
+			.name = "gcc_video_throttle_axi_clk",
+			.ops = &clk_branch2_ops,
+		},
+	},
+};
+
+static struct clk_branch gcc_video_xo_clk = {
+	.halt_reg = 0x1702c,
+	.halt_check = BRANCH_HALT,
+	.clkr = {
+		.enable_reg = 0x1702c,
+		.enable_mask = BIT(0),
+		.hw.init = &(struct clk_init_data){
+			.name = "gcc_video_xo_clk",
+			.flags = CLK_IS_CRITICAL,
+			.ops = &clk_branch2_ops,
+		},
+	},
+};
+
+static struct gdsc usb30_prim_gdsc = {
+	.gdscr = 0x1a004,
+	.pd = {
+		.name = "usb30_prim_gdsc",
+	},
+	.pwrsts = PWRSTS_OFF_ON,
+};
+
+static struct gdsc ufs_phy_gdsc = {
+	.gdscr = 0x3a004,
+	.pd = {
+		.name = "ufs_phy_gdsc",
+	},
+	.pwrsts = PWRSTS_OFF_ON,
+};
+
+static struct gdsc hlos1_vote_mmnoc_mmu_tbu_hf0_gdsc = {
+	.gdscr = 0xb7040,
+	.pd = {
+		.name = "hlos1_vote_mmnoc_mmu_tbu_hf0_gdsc",
+	},
+	.pwrsts = PWRSTS_OFF_ON,
+	.flags = VOTABLE,
+};
+
+static struct gdsc hlos1_vote_mmnoc_mmu_tbu_hf1_gdsc = {
+	.gdscr = 0xb7044,
+	.pd = {
+		.name = "hlos1_vote_mmnoc_mmu_tbu_hf1_gdsc",
+	},
+	.pwrsts = PWRSTS_OFF_ON,
+	.flags = VOTABLE,
+};
+
+static struct clk_regmap *gcc_sm6350_clocks[] = {
+	[GCC_AGGRE_UFS_PHY_AXI_CLK] = &gcc_aggre_ufs_phy_axi_clk.clkr,
+	[GCC_AGGRE_USB3_PRIM_AXI_CLK] = &gcc_aggre_usb3_prim_axi_clk.clkr,
+	[GCC_BOOT_ROM_AHB_CLK] = &gcc_boot_rom_ahb_clk.clkr,
+	[GCC_CAMERA_AHB_CLK] = &gcc_camera_ahb_clk.clkr,
+	[GCC_CAMERA_AXI_CLK] = &gcc_camera_axi_clk.clkr,
+	[GCC_CAMERA_THROTTLE_NRT_AXI_CLK] =
+		&gcc_camera_throttle_nrt_axi_clk.clkr,
+	[GCC_CAMERA_THROTTLE_RT_AXI_CLK] = &gcc_camera_throttle_rt_axi_clk.clkr,
+	[GCC_CAMERA_XO_CLK] = &gcc_camera_xo_clk.clkr,
+	[GCC_CE1_AHB_CLK] = &gcc_ce1_ahb_clk.clkr,
+	[GCC_CE1_AXI_CLK] = &gcc_ce1_axi_clk.clkr,
+	[GCC_CE1_CLK] = &gcc_ce1_clk.clkr,
+	[GCC_CFG_NOC_USB3_PRIM_AXI_CLK] = &gcc_cfg_noc_usb3_prim_axi_clk.clkr,
+	[GCC_CPUSS_AHB_CLK] = &gcc_cpuss_ahb_clk.clkr,
+	[GCC_CPUSS_AHB_CLK_SRC] = &gcc_cpuss_ahb_clk_src.clkr,
+	[GCC_CPUSS_GNOC_CLK] = &gcc_cpuss_gnoc_clk.clkr,
+	[GCC_CPUSS_RBCPR_CLK] = &gcc_cpuss_rbcpr_clk.clkr,
+	[GCC_DDRSS_GPU_AXI_CLK] = &gcc_ddrss_gpu_axi_clk.clkr,
+	[GCC_DISP_AHB_CLK] = &gcc_disp_ahb_clk.clkr,
+	[GCC_DISP_AXI_CLK] = &gcc_disp_axi_clk.clkr,
+	[GCC_DISP_CC_SLEEP_CLK] = &gcc_disp_cc_sleep_clk.clkr,
+	[GCC_DISP_CC_XO_CLK] = &gcc_disp_cc_xo_clk.clkr,
+	[GCC_DISP_GPLL0_CLK] = &gcc_disp_gpll0_clk.clkr,
+	[GCC_DISP_THROTTLE_AXI_CLK] = &gcc_disp_throttle_axi_clk.clkr,
+	[GCC_DISP_XO_CLK] = &gcc_disp_xo_clk.clkr,
+	[GCC_GP1_CLK] = &gcc_gp1_clk.clkr,
+	[GCC_GP1_CLK_SRC] = &gcc_gp1_clk_src.clkr,
+	[GCC_GP2_CLK] = &gcc_gp2_clk.clkr,
+	[GCC_GP2_CLK_SRC] = &gcc_gp2_clk_src.clkr,
+	[GCC_GP3_CLK] = &gcc_gp3_clk.clkr,
+	[GCC_GP3_CLK_SRC] = &gcc_gp3_clk_src.clkr,
+	[GCC_GPU_CFG_AHB_CLK] = &gcc_gpu_cfg_ahb_clk.clkr,
+	[GCC_GPU_GPLL0_CLK] = &gcc_gpu_gpll0_clk.clkr,
+	[GCC_GPU_GPLL0_DIV_CLK] = &gcc_gpu_gpll0_div_clk.clkr,
+	[GCC_GPU_MEMNOC_GFX_CLK] = &gcc_gpu_memnoc_gfx_clk.clkr,
+	[GCC_GPU_SNOC_DVM_GFX_CLK] = &gcc_gpu_snoc_dvm_gfx_clk.clkr,
+	[GCC_NPU_AXI_CLK] = &gcc_npu_axi_clk.clkr,
+	[GCC_NPU_BWMON_AXI_CLK] = &gcc_npu_bwmon_axi_clk.clkr,
+	[GCC_NPU_BWMON_DMA_CFG_AHB_CLK] = &gcc_npu_bwmon_dma_cfg_ahb_clk.clkr,
+	[GCC_NPU_BWMON_DSP_CFG_AHB_CLK] = &gcc_npu_bwmon_dsp_cfg_ahb_clk.clkr,
+	[GCC_NPU_CFG_AHB_CLK] = &gcc_npu_cfg_ahb_clk.clkr,
+	[GCC_NPU_DMA_CLK] = &gcc_npu_dma_clk.clkr,
+	[GCC_NPU_GPLL0_CLK] = &gcc_npu_gpll0_clk.clkr,
+	[GCC_NPU_GPLL0_DIV_CLK] = &gcc_npu_gpll0_div_clk.clkr,
+	[GCC_PDM2_CLK] = &gcc_pdm2_clk.clkr,
+	[GCC_PDM2_CLK_SRC] = &gcc_pdm2_clk_src.clkr,
+	[GCC_PDM_AHB_CLK] = &gcc_pdm_ahb_clk.clkr,
+	[GCC_PDM_XO4_CLK] = &gcc_pdm_xo4_clk.clkr,
+	[GCC_PRNG_AHB_CLK] = &gcc_prng_ahb_clk.clkr,
+	[GCC_QUPV3_WRAP0_CORE_2X_CLK] = &gcc_qupv3_wrap0_core_2x_clk.clkr,
+	[GCC_QUPV3_WRAP0_CORE_CLK] = &gcc_qupv3_wrap0_core_clk.clkr,
+	[GCC_QUPV3_WRAP0_S0_CLK] = &gcc_qupv3_wrap0_s0_clk.clkr,
+	[GCC_QUPV3_WRAP0_S0_CLK_SRC] = &gcc_qupv3_wrap0_s0_clk_src.clkr,
+	[GCC_QUPV3_WRAP0_S1_CLK] = &gcc_qupv3_wrap0_s1_clk.clkr,
+	[GCC_QUPV3_WRAP0_S1_CLK_SRC] = &gcc_qupv3_wrap0_s1_clk_src.clkr,
+	[GCC_QUPV3_WRAP0_S2_CLK] = &gcc_qupv3_wrap0_s2_clk.clkr,
+	[GCC_QUPV3_WRAP0_S2_CLK_SRC] = &gcc_qupv3_wrap0_s2_clk_src.clkr,
+	[GCC_QUPV3_WRAP0_S3_CLK] = &gcc_qupv3_wrap0_s3_clk.clkr,
+	[GCC_QUPV3_WRAP0_S3_CLK_SRC] = &gcc_qupv3_wrap0_s3_clk_src.clkr,
+	[GCC_QUPV3_WRAP0_S4_CLK] = &gcc_qupv3_wrap0_s4_clk.clkr,
+	[GCC_QUPV3_WRAP0_S4_CLK_SRC] = &gcc_qupv3_wrap0_s4_clk_src.clkr,
+	[GCC_QUPV3_WRAP0_S5_CLK] = &gcc_qupv3_wrap0_s5_clk.clkr,
+	[GCC_QUPV3_WRAP0_S5_CLK_SRC] = &gcc_qupv3_wrap0_s5_clk_src.clkr,
+	[GCC_QUPV3_WRAP1_CORE_2X_CLK] = &gcc_qupv3_wrap1_core_2x_clk.clkr,
+	[GCC_QUPV3_WRAP1_CORE_CLK] = &gcc_qupv3_wrap1_core_clk.clkr,
+	[GCC_QUPV3_WRAP1_S0_CLK] = &gcc_qupv3_wrap1_s0_clk.clkr,
+	[GCC_QUPV3_WRAP1_S0_CLK_SRC] = &gcc_qupv3_wrap1_s0_clk_src.clkr,
+	[GCC_QUPV3_WRAP1_S1_CLK] = &gcc_qupv3_wrap1_s1_clk.clkr,
+	[GCC_QUPV3_WRAP1_S1_CLK_SRC] = &gcc_qupv3_wrap1_s1_clk_src.clkr,
+	[GCC_QUPV3_WRAP1_S2_CLK] = &gcc_qupv3_wrap1_s2_clk.clkr,
+	[GCC_QUPV3_WRAP1_S2_CLK_SRC] = &gcc_qupv3_wrap1_s2_clk_src.clkr,
+	[GCC_QUPV3_WRAP1_S3_CLK] = &gcc_qupv3_wrap1_s3_clk.clkr,
+	[GCC_QUPV3_WRAP1_S3_CLK_SRC] = &gcc_qupv3_wrap1_s3_clk_src.clkr,
+	[GCC_QUPV3_WRAP1_S4_CLK] = &gcc_qupv3_wrap1_s4_clk.clkr,
+	[GCC_QUPV3_WRAP1_S4_CLK_SRC] = &gcc_qupv3_wrap1_s4_clk_src.clkr,
+	[GCC_QUPV3_WRAP1_S5_CLK] = &gcc_qupv3_wrap1_s5_clk.clkr,
+	[GCC_QUPV3_WRAP1_S5_CLK_SRC] = &gcc_qupv3_wrap1_s5_clk_src.clkr,
+	[GCC_QUPV3_WRAP_0_M_AHB_CLK] = &gcc_qupv3_wrap_0_m_ahb_clk.clkr,
+	[GCC_QUPV3_WRAP_0_S_AHB_CLK] = &gcc_qupv3_wrap_0_s_ahb_clk.clkr,
+	[GCC_QUPV3_WRAP_1_M_AHB_CLK] = &gcc_qupv3_wrap_1_m_ahb_clk.clkr,
+	[GCC_QUPV3_WRAP_1_S_AHB_CLK] = &gcc_qupv3_wrap_1_s_ahb_clk.clkr,
+	[GCC_SDCC1_AHB_CLK] = &gcc_sdcc1_ahb_clk.clkr,
+	[GCC_SDCC1_APPS_CLK] = &gcc_sdcc1_apps_clk.clkr,
+	[GCC_SDCC1_APPS_CLK_SRC] = &gcc_sdcc1_apps_clk_src.clkr,
+	[GCC_SDCC1_ICE_CORE_CLK] = &gcc_sdcc1_ice_core_clk.clkr,
+	[GCC_SDCC1_ICE_CORE_CLK_SRC] = &gcc_sdcc1_ice_core_clk_src.clkr,
+	[GCC_SDCC2_AHB_CLK] = &gcc_sdcc2_ahb_clk.clkr,
+	[GCC_SDCC2_APPS_CLK] = &gcc_sdcc2_apps_clk.clkr,
+	[GCC_SDCC2_APPS_CLK_SRC] = &gcc_sdcc2_apps_clk_src.clkr,
+	[GCC_SYS_NOC_CPUSS_AHB_CLK] = &gcc_sys_noc_cpuss_ahb_clk.clkr,
+	[GCC_UFS_MEM_CLKREF_CLK] = &gcc_ufs_mem_clkref_clk.clkr,
+	[GCC_UFS_PHY_AHB_CLK] = &gcc_ufs_phy_ahb_clk.clkr,
+	[GCC_UFS_PHY_AXI_CLK] = &gcc_ufs_phy_axi_clk.clkr,
+	[GCC_UFS_PHY_AXI_CLK_SRC] = &gcc_ufs_phy_axi_clk_src.clkr,
+	[GCC_UFS_PHY_ICE_CORE_CLK] = &gcc_ufs_phy_ice_core_clk.clkr,
+	[GCC_UFS_PHY_ICE_CORE_CLK_SRC] = &gcc_ufs_phy_ice_core_clk_src.clkr,
+	[GCC_UFS_PHY_PHY_AUX_CLK] = &gcc_ufs_phy_phy_aux_clk.clkr,
+	[GCC_UFS_PHY_PHY_AUX_CLK_SRC] = &gcc_ufs_phy_phy_aux_clk_src.clkr,
+	[GCC_UFS_PHY_RX_SYMBOL_0_CLK] = &gcc_ufs_phy_rx_symbol_0_clk.clkr,
+	[GCC_UFS_PHY_RX_SYMBOL_1_CLK] = &gcc_ufs_phy_rx_symbol_1_clk.clkr,
+	[GCC_UFS_PHY_TX_SYMBOL_0_CLK] = &gcc_ufs_phy_tx_symbol_0_clk.clkr,
+	[GCC_UFS_PHY_UNIPRO_CORE_CLK] = &gcc_ufs_phy_unipro_core_clk.clkr,
+	[GCC_UFS_PHY_UNIPRO_CORE_CLK_SRC] =
+		&gcc_ufs_phy_unipro_core_clk_src.clkr,
+	[GCC_USB30_PRIM_MASTER_CLK] = &gcc_usb30_prim_master_clk.clkr,
+	[GCC_USB30_PRIM_MASTER_CLK_SRC] = &gcc_usb30_prim_master_clk_src.clkr,
+	[GCC_USB30_PRIM_MOCK_UTMI_CLK] = &gcc_usb30_prim_mock_utmi_clk.clkr,
+	[GCC_USB30_PRIM_MOCK_UTMI_CLK_SRC] =
+		&gcc_usb30_prim_mock_utmi_clk_src.clkr,
+	[GCC_USB30_PRIM_SLEEP_CLK] = &gcc_usb30_prim_sleep_clk.clkr,
+	[GCC_USB3_PRIM_CLKREF_CLK] = &gcc_usb3_prim_clkref_clk.clkr,
+	[GCC_USB3_PRIM_PHY_AUX_CLK] = &gcc_usb3_prim_phy_aux_clk.clkr,
+	[GCC_USB3_PRIM_PHY_AUX_CLK_SRC] = &gcc_usb3_prim_phy_aux_clk_src.clkr,
+	[GCC_USB3_PRIM_PHY_COM_AUX_CLK] = &gcc_usb3_prim_phy_com_aux_clk.clkr,
+	[GCC_USB3_PRIM_PHY_PIPE_CLK] = &gcc_usb3_prim_phy_pipe_clk.clkr,
+	[GCC_VIDEO_AHB_CLK] = &gcc_video_ahb_clk.clkr,
+	[GCC_VIDEO_AXI_CLK] = &gcc_video_axi_clk.clkr,
+	[GCC_VIDEO_THROTTLE_AXI_CLK] = &gcc_video_throttle_axi_clk.clkr,
+	[GCC_VIDEO_XO_CLK] = &gcc_video_xo_clk.clkr,
+	[GPLL0] = &gpll0.clkr,
+	[GPLL0_OUT_EVEN] = &gpll0_out_even.clkr,
+	[GPLL0_OUT_ODD] = &gpll0_out_odd.clkr,
+	[GPLL6] = &gpll6.clkr,
+	[GPLL6_OUT_EVEN] = &gpll6_out_even.clkr,
+	[GPLL7] = &gpll7.clkr,
+	[GCC_UFS_PHY_PHY_AUX_HW_CTL_CLK] = &gcc_ufs_phy_phy_aux_hw_ctl_clk.clkr,
+	[GCC_UFS_PHY_AXI_HW_CTL_CLK] = &gcc_ufs_phy_axi_hw_ctl_clk.clkr,
+	[GCC_AGGRE_UFS_PHY_AXI_HW_CTL_CLK] =
+				&gcc_aggre_ufs_phy_axi_hw_ctl_clk.clkr,
+	[GCC_UFS_PHY_UNIPRO_CORE_HW_CTL_CLK] =
+				&gcc_ufs_phy_unipro_core_hw_ctl_clk.clkr,
+	[GCC_UFS_PHY_ICE_CORE_HW_CTL_CLK] =
+				&gcc_ufs_phy_ice_core_hw_ctl_clk.clkr,
+	[GCC_GPU_GPLL0_MAIN_DIV_CLK_SRC] = &gcc_gpu_gpll0_main_div_clk_src.clkr,
+	[GCC_NPU_PLL0_MAIN_DIV_CLK_SRC] = &gcc_npu_pll0_main_div_clk_src.clkr,
+};
+
+static struct gdsc *gcc_sm6350_gdscs[] = {
+	[USB30_PRIM_GDSC] = &usb30_prim_gdsc,
+	[UFS_PHY_GDSC] = &ufs_phy_gdsc,
+	[HLOS1_VOTE_MMNOC_MMU_TBU_HF0_GDSC] = &hlos1_vote_mmnoc_mmu_tbu_hf0_gdsc,
+	[HLOS1_VOTE_MMNOC_MMU_TBU_HF1_GDSC] = &hlos1_vote_mmnoc_mmu_tbu_hf1_gdsc,
+};
+
+static const struct qcom_reset_map gcc_sm6350_resets[] = {
+	[GCC_QUSB2PHY_PRIM_BCR] = { 0x1d000 },
+	[GCC_QUSB2PHY_SEC_BCR] = { 0x1e000 },
+	[GCC_SDCC1_BCR] = { 0x4b000 },
+	[GCC_SDCC2_BCR] = { 0x20000 },
+	[GCC_UFS_PHY_BCR] = { 0x3a000 },
+	[GCC_USB30_PRIM_BCR] = { 0x1a000 },
+	[GCC_USB3_PHY_PRIM_BCR] = { 0x1c000 },
+	[GCC_USB3_DP_PHY_PRIM_BCR] = { 0x1c008 },
+};
+
+static const struct clk_rcg_dfs_data gcc_dfs_clocks[] = {
+	DEFINE_RCG_DFS(gcc_qupv3_wrap0_s0_clk_src),
+	DEFINE_RCG_DFS(gcc_qupv3_wrap0_s1_clk_src),
+	DEFINE_RCG_DFS(gcc_qupv3_wrap0_s2_clk_src),
+	DEFINE_RCG_DFS(gcc_qupv3_wrap0_s3_clk_src),
+	DEFINE_RCG_DFS(gcc_qupv3_wrap0_s4_clk_src),
+	DEFINE_RCG_DFS(gcc_qupv3_wrap0_s5_clk_src),
+	DEFINE_RCG_DFS(gcc_qupv3_wrap1_s0_clk_src),
+	DEFINE_RCG_DFS(gcc_qupv3_wrap1_s1_clk_src),
+	DEFINE_RCG_DFS(gcc_qupv3_wrap1_s2_clk_src),
+	DEFINE_RCG_DFS(gcc_qupv3_wrap1_s3_clk_src),
+	DEFINE_RCG_DFS(gcc_qupv3_wrap1_s4_clk_src),
+	DEFINE_RCG_DFS(gcc_qupv3_wrap1_s5_clk_src),
+};
+
+static const struct regmap_config gcc_sm6350_regmap_config = {
+	.reg_bits = 32,
+	.reg_stride = 4,
+	.val_bits = 32,
+	.max_register = 0xbf030,
+	.fast_io = true,
+};
+
+static const struct qcom_cc_desc gcc_sm6350_desc = {
+	.config = &gcc_sm6350_regmap_config,
+	.clks = gcc_sm6350_clocks,
+	.num_clks = ARRAY_SIZE(gcc_sm6350_clocks),
+	.resets = gcc_sm6350_resets,
+	.num_resets = ARRAY_SIZE(gcc_sm6350_resets),
+	.gdscs = gcc_sm6350_gdscs,
+	.num_gdscs = ARRAY_SIZE(gcc_sm6350_gdscs),
+};
+
+static const struct of_device_id gcc_sm6350_match_table[] = {
+	{ .compatible = "qcom,gcc-sm6350" },
+	{ }
+};
+MODULE_DEVICE_TABLE(of, gcc_sm6350_match_table);
+
+static int gcc_sm6350_probe(struct platform_device *pdev)
+{
+	struct regmap *regmap;
+	int ret;
+
+	regmap = qcom_cc_map(pdev, &gcc_sm6350_desc);
+	if (IS_ERR(regmap))
+		return PTR_ERR(regmap);
+
+	/* Disable the GPLL0 active input to NPU and GPU via MISC registers */
+	regmap_update_bits(regmap, 0x4cf00, 0x3, 0x3);
+	regmap_update_bits(regmap, 0x45f00, 0x3, 0x3);
+
+	ret = qcom_cc_register_rcg_dfs(regmap, gcc_dfs_clocks,
+			ARRAY_SIZE(gcc_dfs_clocks));
+	if (ret)
+		return ret;
+
+	return qcom_cc_really_probe(pdev, &gcc_sm6350_desc, regmap);;
+}
+
+static struct platform_driver gcc_sm6350_driver = {
+	.probe = gcc_sm6350_probe,
+	.driver = {
+		.name = "gcc-sm6350",
+		.of_match_table = gcc_sm6350_match_table,
+	},
+};
+
+static int __init gcc_sm6350_init(void)
+{
+	return platform_driver_register(&gcc_sm6350_driver);
+}
+core_initcall(gcc_sm6350_init);
+
+static void __exit gcc_sm6350_exit(void)
+{
+	platform_driver_unregister(&gcc_sm6350_driver);
+}
+module_exit(gcc_sm6350_exit);
+
+MODULE_DESCRIPTION("QTI GCC SM6350 Driver");
+MODULE_LICENSE("GPL v2");
diff --git a/drivers/clk/qcom/gpucc-sc7280.c b/drivers/clk/qcom/gpucc-sc7280.c
new file mode 100644
index 000000000000..9a832f2bcf49
--- /dev/null
+++ b/drivers/clk/qcom/gpucc-sc7280.c
@@ -0,0 +1,491 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * Copyright (c) 2021, The Linux Foundation. All rights reserved.
+ */
+
+#include <linux/clk-provider.h>
+#include <linux/module.h>
+#include <linux/platform_device.h>
+#include <linux/regmap.h>
+
+#include <dt-bindings/clock/qcom,gpucc-sc7280.h>
+
+#include "clk-alpha-pll.h"
+#include "clk-branch.h"
+#include "clk-rcg.h"
+#include "clk-regmap-divider.h"
+#include "common.h"
+#include "reset.h"
+#include "gdsc.h"
+
+enum {
+	P_BI_TCXO,
+	P_GCC_GPU_GPLL0_CLK_SRC,
+	P_GCC_GPU_GPLL0_DIV_CLK_SRC,
+	P_GPU_CC_PLL0_OUT_MAIN,
+	P_GPU_CC_PLL1_OUT_MAIN,
+};
+
+static const struct pll_vco lucid_vco[] = {
+	{ 249600000, 2000000000, 0 },
+};
+
+static struct clk_alpha_pll gpu_cc_pll0 = {
+	.offset = 0x0,
+	.vco_table = lucid_vco,
+	.num_vco = ARRAY_SIZE(lucid_vco),
+	.regs = clk_alpha_pll_regs[CLK_ALPHA_PLL_TYPE_LUCID],
+	.clkr = {
+		.hw.init = &(struct clk_init_data){
+			.name = "gpu_cc_pll0",
+			.parent_data = &(const struct clk_parent_data){
+				.fw_name = "bi_tcxo",
+			},
+			.num_parents = 1,
+			.ops = &clk_alpha_pll_lucid_ops,
+		},
+	},
+};
+
+/* 500MHz Configuration */
+static const struct alpha_pll_config gpu_cc_pll1_config = {
+	.l = 0x1A,
+	.alpha = 0xAAA,
+	.config_ctl_val = 0x20485699,
+	.config_ctl_hi_val = 0x00002261,
+	.config_ctl_hi1_val = 0x329A299C,
+	.user_ctl_val = 0x00000001,
+	.user_ctl_hi_val = 0x00000805,
+	.user_ctl_hi1_val = 0x00000000,
+};
+
+static struct clk_alpha_pll gpu_cc_pll1 = {
+	.offset = 0x100,
+	.vco_table = lucid_vco,
+	.num_vco = ARRAY_SIZE(lucid_vco),
+	.regs = clk_alpha_pll_regs[CLK_ALPHA_PLL_TYPE_LUCID],
+	.clkr = {
+		.hw.init = &(struct clk_init_data){
+			.name = "gpu_cc_pll1",
+			.parent_data = &(const struct clk_parent_data){
+				.fw_name = "bi_tcxo",
+			},
+			.num_parents = 1,
+			.ops = &clk_alpha_pll_lucid_ops,
+		},
+	},
+};
+
+static const struct parent_map gpu_cc_parent_map_0[] = {
+	{ P_BI_TCXO, 0 },
+	{ P_GPU_CC_PLL0_OUT_MAIN, 1 },
+	{ P_GPU_CC_PLL1_OUT_MAIN, 3 },
+	{ P_GCC_GPU_GPLL0_CLK_SRC, 5 },
+	{ P_GCC_GPU_GPLL0_DIV_CLK_SRC, 6 },
+};
+
+static const struct clk_parent_data gpu_cc_parent_data_0[] = {
+	{ .fw_name = "bi_tcxo" },
+	{ .hw = &gpu_cc_pll0.clkr.hw },
+	{ .hw = &gpu_cc_pll1.clkr.hw },
+	{ .fw_name = "gcc_gpu_gpll0_clk_src" },
+	{ .fw_name = "gcc_gpu_gpll0_div_clk_src" },
+};
+
+static const struct parent_map gpu_cc_parent_map_1[] = {
+	{ P_BI_TCXO, 0 },
+	{ P_GPU_CC_PLL1_OUT_MAIN, 3 },
+	{ P_GCC_GPU_GPLL0_CLK_SRC, 5 },
+	{ P_GCC_GPU_GPLL0_DIV_CLK_SRC, 6 },
+};
+
+static const struct clk_parent_data gpu_cc_parent_data_1[] = {
+	{ .fw_name = "bi_tcxo", },
+	{ .hw = &gpu_cc_pll1.clkr.hw },
+	{ .fw_name = "gcc_gpu_gpll0_clk_src", },
+	{ .fw_name = "gcc_gpu_gpll0_div_clk_src", },
+};
+
+static const struct freq_tbl ftbl_gpu_cc_gmu_clk_src[] = {
+	F(19200000, P_BI_TCXO, 1, 0, 0),
+	F(200000000, P_GCC_GPU_GPLL0_DIV_CLK_SRC, 1.5, 0, 0),
+	F(500000000, P_GPU_CC_PLL1_OUT_MAIN, 1, 0, 0),
+	{ }
+};
+
+static struct clk_rcg2 gpu_cc_gmu_clk_src = {
+	.cmd_rcgr = 0x1120,
+	.mnd_width = 0,
+	.hid_width = 5,
+	.parent_map = gpu_cc_parent_map_0,
+	.freq_tbl = ftbl_gpu_cc_gmu_clk_src,
+	.clkr.hw.init = &(struct clk_init_data){
+		.name = "gpu_cc_gmu_clk_src",
+		.parent_data = gpu_cc_parent_data_0,
+		.num_parents = ARRAY_SIZE(gpu_cc_parent_data_0),
+		.ops = &clk_rcg2_shared_ops,
+	},
+};
+
+static const struct freq_tbl ftbl_gpu_cc_hub_clk_src[] = {
+	F(150000000, P_GCC_GPU_GPLL0_DIV_CLK_SRC, 2, 0, 0),
+	F(240000000, P_GCC_GPU_GPLL0_CLK_SRC, 2.5, 0, 0),
+	F(300000000, P_GCC_GPU_GPLL0_CLK_SRC, 2, 0, 0),
+	{ }
+};
+
+static struct clk_rcg2 gpu_cc_hub_clk_src = {
+	.cmd_rcgr = 0x117c,
+	.mnd_width = 0,
+	.hid_width = 5,
+	.parent_map = gpu_cc_parent_map_1,
+	.freq_tbl = ftbl_gpu_cc_hub_clk_src,
+	.clkr.hw.init = &(struct clk_init_data){
+		.name = "gpu_cc_hub_clk_src",
+		.parent_data = gpu_cc_parent_data_1,
+		.num_parents = ARRAY_SIZE(gpu_cc_parent_data_1),
+		.ops = &clk_rcg2_shared_ops,
+	},
+};
+
+static struct clk_regmap_div gpu_cc_hub_ahb_div_clk_src = {
+	.reg = 0x11c0,
+	.shift = 0,
+	.width = 4,
+	.clkr.hw.init = &(struct clk_init_data) {
+		.name = "gpu_cc_hub_ahb_div_clk_src",
+		.parent_hws = (const struct clk_hw*[]){
+			&gpu_cc_hub_clk_src.clkr.hw,
+		},
+		.num_parents = 1,
+		.flags = CLK_SET_RATE_PARENT,
+		.ops = &clk_regmap_div_ro_ops,
+	},
+};
+
+static struct clk_regmap_div gpu_cc_hub_cx_int_div_clk_src = {
+	.reg = 0x11bc,
+	.shift = 0,
+	.width = 4,
+	.clkr.hw.init = &(struct clk_init_data) {
+		.name = "gpu_cc_hub_cx_int_div_clk_src",
+		.parent_hws = (const struct clk_hw*[]){
+			&gpu_cc_hub_clk_src.clkr.hw,
+		},
+		.num_parents = 1,
+		.flags = CLK_SET_RATE_PARENT,
+		.ops = &clk_regmap_div_ro_ops,
+	},
+};
+
+static struct clk_branch gpu_cc_ahb_clk = {
+	.halt_reg = 0x1078,
+	.halt_check = BRANCH_HALT_DELAY,
+	.clkr = {
+		.enable_reg = 0x1078,
+		.enable_mask = BIT(0),
+		.hw.init = &(struct clk_init_data){
+			.name = "gpu_cc_ahb_clk",
+			.parent_hws = (const struct clk_hw*[]){
+				&gpu_cc_hub_ahb_div_clk_src.clkr.hw,
+			},
+			.num_parents = 1,
+			.flags = CLK_SET_RATE_PARENT,
+			.ops = &clk_branch2_ops,
+		},
+	},
+};
+
+static struct clk_branch gpu_cc_crc_ahb_clk = {
+	.halt_reg = 0x107c,
+	.halt_check = BRANCH_HALT_VOTED,
+	.clkr = {
+		.enable_reg = 0x107c,
+		.enable_mask = BIT(0),
+		.hw.init = &(struct clk_init_data){
+			.name = "gpu_cc_crc_ahb_clk",
+			.parent_hws = (const struct clk_hw*[]){
+				&gpu_cc_hub_ahb_div_clk_src.clkr.hw,
+			},
+			.num_parents = 1,
+			.flags = CLK_SET_RATE_PARENT,
+			.ops = &clk_branch2_ops,
+		},
+	},
+};
+
+static struct clk_branch gpu_cc_cx_gmu_clk = {
+	.halt_reg = 0x1098,
+	.halt_check = BRANCH_HALT,
+	.clkr = {
+		.enable_reg = 0x1098,
+		.enable_mask = BIT(0),
+		.hw.init = &(struct clk_init_data){
+			.name = "gpu_cc_cx_gmu_clk",
+			.parent_hws = (const struct clk_hw*[]){
+				&gpu_cc_gmu_clk_src.clkr.hw,
+			},
+			.num_parents = 1,
+			.flags = CLK_SET_RATE_PARENT,
+			.ops = &clk_branch2_aon_ops,
+		},
+	},
+};
+
+static struct clk_branch gpu_cc_cx_snoc_dvm_clk = {
+	.halt_reg = 0x108c,
+	.halt_check = BRANCH_HALT_VOTED,
+	.clkr = {
+		.enable_reg = 0x108c,
+		.enable_mask = BIT(0),
+		.hw.init = &(struct clk_init_data){
+			.name = "gpu_cc_cx_snoc_dvm_clk",
+			.ops = &clk_branch2_ops,
+		},
+	},
+};
+
+static struct clk_branch gpu_cc_cxo_aon_clk = {
+	.halt_reg = 0x1004,
+	.halt_check = BRANCH_HALT_VOTED,
+	.clkr = {
+		.enable_reg = 0x1004,
+		.enable_mask = BIT(0),
+		.hw.init = &(struct clk_init_data){
+			.name = "gpu_cc_cxo_aon_clk",
+			.ops = &clk_branch2_ops,
+		},
+	},
+};
+
+static struct clk_branch gpu_cc_cxo_clk = {
+	.halt_reg = 0x109c,
+	.halt_check = BRANCH_HALT,
+	.clkr = {
+		.enable_reg = 0x109c,
+		.enable_mask = BIT(0),
+		.hw.init = &(struct clk_init_data){
+			.name = "gpu_cc_cxo_clk",
+			.ops = &clk_branch2_aon_ops,
+		},
+	},
+};
+
+static struct clk_branch gpu_cc_gx_gmu_clk = {
+	.halt_reg = 0x1064,
+	.halt_check = BRANCH_HALT,
+	.clkr = {
+		.enable_reg = 0x1064,
+		.enable_mask = BIT(0),
+		.hw.init = &(struct clk_init_data){
+			.name = "gpu_cc_gx_gmu_clk",
+			.parent_hws = (const struct clk_hw*[]){
+				&gpu_cc_gmu_clk_src.clkr.hw,
+			},
+			.num_parents = 1,
+			.flags = CLK_SET_RATE_PARENT,
+			.ops = &clk_branch2_ops,
+		},
+	},
+};
+
+static struct clk_branch gpu_cc_hlos1_vote_gpu_smmu_clk = {
+	.halt_reg = 0x5000,
+	.halt_check = BRANCH_VOTED,
+	.clkr = {
+		.enable_reg = 0x5000,
+		.enable_mask = BIT(0),
+		.hw.init = &(struct clk_init_data){
+			.name = "gpu_cc_hlos1_vote_gpu_smmu_clk",
+			.ops = &clk_branch2_ops,
+		},
+	},
+};
+
+static struct clk_branch gpu_cc_hub_aon_clk = {
+	.halt_reg = 0x1178,
+	.halt_check = BRANCH_HALT,
+	.clkr = {
+		.enable_reg = 0x1178,
+		.enable_mask = BIT(0),
+		.hw.init = &(struct clk_init_data){
+			.name = "gpu_cc_hub_aon_clk",
+			.parent_hws = (const struct clk_hw*[]){
+				&gpu_cc_hub_clk_src.clkr.hw,
+			},
+			.num_parents = 1,
+			.flags = CLK_SET_RATE_PARENT,
+			.ops = &clk_branch2_aon_ops,
+		},
+	},
+};
+
+static struct clk_branch gpu_cc_hub_cx_int_clk = {
+	.halt_reg = 0x1204,
+	.halt_check = BRANCH_HALT,
+	.clkr = {
+		.enable_reg = 0x1204,
+		.enable_mask = BIT(0),
+		.hw.init = &(struct clk_init_data){
+			.name = "gpu_cc_hub_cx_int_clk",
+			.parent_hws = (const struct clk_hw*[]){
+				&gpu_cc_hub_cx_int_div_clk_src.clkr.hw,
+			},
+			.num_parents = 1,
+			.flags = CLK_SET_RATE_PARENT,
+			.ops = &clk_branch2_aon_ops,
+		},
+	},
+};
+
+static struct clk_branch gpu_cc_mnd1x_0_gfx3d_clk = {
+	.halt_reg = 0x802c,
+	.halt_check = BRANCH_HALT_SKIP,
+	.clkr = {
+		.enable_reg = 0x802c,
+		.enable_mask = BIT(0),
+		.hw.init = &(struct clk_init_data){
+			.name = "gpu_cc_mnd1x_0_gfx3d_clk",
+			.ops = &clk_branch2_ops,
+		},
+	},
+};
+
+static struct clk_branch gpu_cc_mnd1x_1_gfx3d_clk = {
+	.halt_reg = 0x8030,
+	.halt_check = BRANCH_HALT_SKIP,
+	.clkr = {
+		.enable_reg = 0x8030,
+		.enable_mask = BIT(0),
+		.hw.init = &(struct clk_init_data){
+			.name = "gpu_cc_mnd1x_1_gfx3d_clk",
+			.ops = &clk_branch2_ops,
+		},
+	},
+};
+
+static struct clk_branch gpu_cc_sleep_clk = {
+	.halt_reg = 0x1090,
+	.halt_check = BRANCH_HALT_VOTED,
+	.clkr = {
+		.enable_reg = 0x1090,
+		.enable_mask = BIT(0),
+		.hw.init = &(struct clk_init_data){
+			.name = "gpu_cc_sleep_clk",
+			.ops = &clk_branch2_ops,
+		},
+	},
+};
+
+static struct gdsc cx_gdsc = {
+	.gdscr = 0x106c,
+	.gds_hw_ctrl = 0x1540,
+	.pd = {
+		.name = "cx_gdsc",
+	},
+	.pwrsts = PWRSTS_OFF_ON,
+	.flags = VOTABLE | RETAIN_FF_ENABLE,
+};
+
+static struct gdsc gx_gdsc = {
+	.gdscr = 0x100c,
+	.clamp_io_ctrl = 0x1508,
+	.pd = {
+		.name = "gx_gdsc",
+		.power_on = gdsc_gx_do_nothing_enable,
+	},
+	.pwrsts = PWRSTS_OFF_ON,
+	.flags = CLAMP_IO | RETAIN_FF_ENABLE,
+};
+
+static struct gdsc *gpu_cc_sc7180_gdscs[] = {
+	[GPU_CC_CX_GDSC] = &cx_gdsc,
+	[GPU_CC_GX_GDSC] = &gx_gdsc,
+};
+
+static struct clk_regmap *gpu_cc_sc7280_clocks[] = {
+	[GPU_CC_AHB_CLK] = &gpu_cc_ahb_clk.clkr,
+	[GPU_CC_CRC_AHB_CLK] = &gpu_cc_crc_ahb_clk.clkr,
+	[GPU_CC_CX_GMU_CLK] = &gpu_cc_cx_gmu_clk.clkr,
+	[GPU_CC_CX_SNOC_DVM_CLK] = &gpu_cc_cx_snoc_dvm_clk.clkr,
+	[GPU_CC_CXO_AON_CLK] = &gpu_cc_cxo_aon_clk.clkr,
+	[GPU_CC_CXO_CLK] = &gpu_cc_cxo_clk.clkr,
+	[GPU_CC_GMU_CLK_SRC] = &gpu_cc_gmu_clk_src.clkr,
+	[GPU_CC_GX_GMU_CLK] = &gpu_cc_gx_gmu_clk.clkr,
+	[GPU_CC_HLOS1_VOTE_GPU_SMMU_CLK] = &gpu_cc_hlos1_vote_gpu_smmu_clk.clkr,
+	[GPU_CC_HUB_AHB_DIV_CLK_SRC] = &gpu_cc_hub_ahb_div_clk_src.clkr,
+	[GPU_CC_HUB_AON_CLK] = &gpu_cc_hub_aon_clk.clkr,
+	[GPU_CC_HUB_CLK_SRC] = &gpu_cc_hub_clk_src.clkr,
+	[GPU_CC_HUB_CX_INT_CLK] = &gpu_cc_hub_cx_int_clk.clkr,
+	[GPU_CC_HUB_CX_INT_DIV_CLK_SRC] = &gpu_cc_hub_cx_int_div_clk_src.clkr,
+	[GPU_CC_MND1X_0_GFX3D_CLK] = &gpu_cc_mnd1x_0_gfx3d_clk.clkr,
+	[GPU_CC_MND1X_1_GFX3D_CLK] = &gpu_cc_mnd1x_1_gfx3d_clk.clkr,
+	[GPU_CC_PLL0] = &gpu_cc_pll0.clkr,
+	[GPU_CC_PLL1] = &gpu_cc_pll1.clkr,
+	[GPU_CC_SLEEP_CLK] = &gpu_cc_sleep_clk.clkr,
+};
+
+static const struct regmap_config gpu_cc_sc7280_regmap_config = {
+	.reg_bits = 32,
+	.reg_stride = 4,
+	.val_bits = 32,
+	.max_register = 0x8030,
+	.fast_io = true,
+};
+
+static const struct qcom_cc_desc gpu_cc_sc7280_desc = {
+	.config = &gpu_cc_sc7280_regmap_config,
+	.clks = gpu_cc_sc7280_clocks,
+	.num_clks = ARRAY_SIZE(gpu_cc_sc7280_clocks),
+	.gdscs = gpu_cc_sc7180_gdscs,
+	.num_gdscs = ARRAY_SIZE(gpu_cc_sc7180_gdscs),
+};
+
+static const struct of_device_id gpu_cc_sc7280_match_table[] = {
+	{ .compatible = "qcom,sc7280-gpucc" },
+	{ }
+};
+MODULE_DEVICE_TABLE(of, gpu_cc_sc7280_match_table);
+
+static int gpu_cc_sc7280_probe(struct platform_device *pdev)
+{
+	struct regmap *regmap;
+
+	regmap = qcom_cc_map(pdev, &gpu_cc_sc7280_desc);
+	if (IS_ERR(regmap))
+		return PTR_ERR(regmap);
+
+	clk_lucid_pll_configure(&gpu_cc_pll1, regmap, &gpu_cc_pll1_config);
+
+	/*
+	 * Keep the clocks always-ON
+	 * GPU_CC_CB_CLK, GPUCC_CX_GMU_CLK
+	 */
+	regmap_update_bits(regmap, 0x1170, BIT(0), BIT(0));
+	regmap_update_bits(regmap, 0x1098, BIT(0), BIT(0));
+
+	return qcom_cc_really_probe(pdev, &gpu_cc_sc7280_desc, regmap);
+}
+
+static struct platform_driver gpu_cc_sc7280_driver = {
+	.probe = gpu_cc_sc7280_probe,
+	.driver = {
+		.name = "gpu_cc-sc7280",
+		.of_match_table = gpu_cc_sc7280_match_table,
+	},
+};
+
+static int __init gpu_cc_sc7280_init(void)
+{
+	return platform_driver_register(&gpu_cc_sc7280_driver);
+}
+subsys_initcall(gpu_cc_sc7280_init);
+
+static void __exit gpu_cc_sc7280_exit(void)
+{
+	platform_driver_unregister(&gpu_cc_sc7280_driver);
+}
+module_exit(gpu_cc_sc7280_exit);
+
+MODULE_DESCRIPTION("QTI GPU_CC SC7280 Driver");
+MODULE_LICENSE("GPL v2");
diff --git a/drivers/clk/qcom/gpucc-sm8150.c b/drivers/clk/qcom/gpucc-sm8150.c
index 80fb6f73601d..8422fd047493 100644
--- a/drivers/clk/qcom/gpucc-sm8150.c
+++ b/drivers/clk/qcom/gpucc-sm8150.c
@@ -82,6 +82,14 @@ static const struct freq_tbl ftbl_gpu_cc_gmu_clk_src[] = {
 	{ }
 };
 
+static const struct freq_tbl ftbl_gpu_cc_gmu_clk_src_sc8180x[] = {
+	F(19200000, P_BI_TCXO, 1, 0, 0),
+	F(200000000, P_GPLL0_OUT_MAIN_DIV, 1.5, 0, 0),
+	F(400000000, P_GPLL0_OUT_MAIN, 1.5, 0, 0),
+	F(500000000, P_GPU_CC_PLL1_OUT_MAIN, 1, 0, 0),
+	{ }
+};
+
 static struct clk_rcg2 gpu_cc_gmu_clk_src = {
 	.cmd_rcgr = 0x1120,
 	.mnd_width = 0,
@@ -277,6 +285,7 @@ static const struct qcom_cc_desc gpu_cc_sm8150_desc = {
 };
 
 static const struct of_device_id gpu_cc_sm8150_match_table[] = {
+	{ .compatible = "qcom,sc8180x-gpucc" },
 	{ .compatible = "qcom,sm8150-gpucc" },
 	{ }
 };
@@ -290,6 +299,9 @@ static int gpu_cc_sm8150_probe(struct platform_device *pdev)
 	if (IS_ERR(regmap))
 		return PTR_ERR(regmap);
 
+	if (of_device_is_compatible(pdev->dev.of_node, "qcom,sc8180x-gpucc"))
+		gpu_cc_gmu_clk_src.freq_tbl = ftbl_gpu_cc_gmu_clk_src_sc8180x;
+
 	clk_trion_pll_configure(&gpu_cc_pll1, regmap, &gpu_cc_pll1_config);
 
 	return qcom_cc_really_probe(pdev, &gpu_cc_sm8150_desc, regmap);
diff --git a/drivers/clk/qcom/lpass-gfm-sm8250.c b/drivers/clk/qcom/lpass-gfm-sm8250.c
index f5e31e692b9b..96f476f24eb2 100644
--- a/drivers/clk/qcom/lpass-gfm-sm8250.c
+++ b/drivers/clk/qcom/lpass-gfm-sm8250.c
@@ -251,15 +251,18 @@ static int lpass_gfm_clk_driver_probe(struct platform_device *pdev)
 	if (IS_ERR(cc->base))
 		return PTR_ERR(cc->base);
 
-	pm_runtime_enable(dev);
-	err = pm_clk_create(dev);
+	err = devm_pm_runtime_enable(dev);
 	if (err)
-		goto pm_clk_err;
+		return err;
+
+	err = devm_pm_clk_create(dev);
+	if (err)
+		return err;
 
 	err = of_pm_clk_add_clks(dev);
 	if (err < 0) {
 		dev_dbg(dev, "Failed to get lpass core voting clocks\n");
-		goto clk_reg_err;
+		return err;
 	}
 
 	for (i = 0; i < data->onecell_data->num; i++) {
@@ -273,22 +276,16 @@ static int lpass_gfm_clk_driver_probe(struct platform_device *pdev)
 
 		err = devm_clk_hw_register(dev, &data->gfm_clks[i]->hw);
 		if (err)
-			goto clk_reg_err;
+			return err;
 
 	}
 
 	err = devm_of_clk_add_hw_provider(dev, of_clk_hw_onecell_get,
 					  data->onecell_data);
 	if (err)
-		goto clk_reg_err;
+		return err;
 
 	return 0;
-
-clk_reg_err:
-	pm_clk_destroy(dev);
-pm_clk_err:
-	pm_runtime_disable(dev);
-	return err;
 }
 
 static const struct of_device_id lpass_gfm_clk_match_table[] = {
diff --git a/drivers/clk/qcom/lpasscorecc-sc7180.c b/drivers/clk/qcom/lpasscorecc-sc7180.c
index 2e0ecc38efdd..ac09b7b840ab 100644
--- a/drivers/clk/qcom/lpasscorecc-sc7180.c
+++ b/drivers/clk/qcom/lpasscorecc-sc7180.c
@@ -356,32 +356,18 @@ static const struct qcom_cc_desc lpass_audio_hm_sc7180_desc = {
 	.num_gdscs = ARRAY_SIZE(lpass_audio_hm_sc7180_gdscs),
 };
 
-static void lpass_pm_runtime_disable(void *data)
-{
-	pm_runtime_disable(data);
-}
-
-static void lpass_pm_clk_destroy(void *data)
-{
-	pm_clk_destroy(data);
-}
-
 static int lpass_create_pm_clks(struct platform_device *pdev)
 {
 	int ret;
 
 	pm_runtime_use_autosuspend(&pdev->dev);
 	pm_runtime_set_autosuspend_delay(&pdev->dev, 500);
-	pm_runtime_enable(&pdev->dev);
 
-	ret = devm_add_action_or_reset(&pdev->dev, lpass_pm_runtime_disable, &pdev->dev);
+	ret = devm_pm_runtime_enable(&pdev->dev);
 	if (ret)
 		return ret;
 
-	ret = pm_clk_create(&pdev->dev);
-	if (ret)
-		return ret;
-	ret = devm_add_action_or_reset(&pdev->dev, lpass_pm_clk_destroy, &pdev->dev);
+	ret = devm_pm_clk_create(&pdev->dev);
 	if (ret)
 		return ret;
 
diff --git a/drivers/clk/qcom/mmcc-msm8994.c b/drivers/clk/qcom/mmcc-msm8994.c
new file mode 100644
index 000000000000..89c5f5fa7d9a
--- /dev/null
+++ b/drivers/clk/qcom/mmcc-msm8994.c
@@ -0,0 +1,2620 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * Copyright (c) 2021, Konrad Dybcio <konrad.dybcio@somainline.org>
+ */
+
+#include <linux/kernel.h>
+#include <linux/bitops.h>
+#include <linux/err.h>
+#include <linux/platform_device.h>
+#include <linux/module.h>
+#include <linux/of.h>
+#include <linux/of_device.h>
+#include <linux/clk-provider.h>
+#include <linux/regmap.h>
+#include <linux/reset-controller.h>
+#include <linux/clk.h>
+
+#include <dt-bindings/clock/qcom,mmcc-msm8994.h>
+
+#include "common.h"
+#include "clk-regmap.h"
+#include "clk-regmap-divider.h"
+#include "clk-alpha-pll.h"
+#include "clk-rcg.h"
+#include "clk-branch.h"
+#include "reset.h"
+#include "gdsc.h"
+
+
+enum {
+	P_XO,
+	P_GPLL0,
+	P_MMPLL0,
+	P_MMPLL1,
+	P_MMPLL3,
+	P_MMPLL4,
+	P_MMPLL5, /* Is this one even used by anything? Downstream doesn't tell. */
+	P_DSI0PLL,
+	P_DSI1PLL,
+	P_DSI0PLL_BYTE,
+	P_DSI1PLL_BYTE,
+	P_HDMIPLL,
+};
+static const struct parent_map mmcc_xo_gpll0_map[] = {
+	{ P_XO, 0 },
+	{ P_GPLL0, 5 }
+};
+
+static const struct clk_parent_data mmcc_xo_gpll0[] = {
+	{ .fw_name = "xo" },
+	{ .fw_name = "gpll0" },
+};
+
+static const struct parent_map mmss_xo_hdmi_map[] = {
+	{ P_XO, 0 },
+	{ P_HDMIPLL, 3 }
+};
+
+static const struct clk_parent_data mmss_xo_hdmi[] = {
+	{ .fw_name = "xo" },
+	{ .fw_name = "hdmipll" },
+};
+
+static const struct parent_map mmcc_xo_dsi0pll_dsi1pll_map[] = {
+	{ P_XO, 0 },
+	{ P_DSI0PLL, 1 },
+	{ P_DSI1PLL, 2 }
+};
+
+static const struct clk_parent_data mmcc_xo_dsi0pll_dsi1pll[] = {
+	{ .fw_name = "xo" },
+	{ .fw_name = "dsi0pll" },
+	{ .fw_name = "dsi1pll" },
+};
+
+static const struct parent_map mmcc_xo_dsibyte_map[] = {
+	{ P_XO, 0 },
+	{ P_DSI0PLL_BYTE, 1 },
+	{ P_DSI1PLL_BYTE, 2 }
+};
+
+static const struct clk_parent_data mmcc_xo_dsibyte[] = {
+	{ .fw_name = "xo" },
+	{ .fw_name = "dsi0pllbyte" },
+	{ .fw_name = "dsi1pllbyte" },
+};
+
+static struct pll_vco mmpll_p_vco[] = {
+	{ 250000000, 500000000, 3 },
+	{ 500000000, 1000000000, 2 },
+	{ 1000000000, 1500000000, 1 },
+	{ 1500000000, 2000000000, 0 },
+};
+
+static struct pll_vco mmpll_t_vco[] = {
+	{ 500000000, 1500000000, 0 },
+};
+
+static const struct alpha_pll_config mmpll_p_config = {
+	.post_div_mask = 0xf00,
+};
+
+static struct clk_alpha_pll mmpll0_early = {
+	.offset = 0x0,
+	.regs = clk_alpha_pll_regs[CLK_ALPHA_PLL_TYPE_DEFAULT],
+	.vco_table = mmpll_p_vco,
+	.num_vco = ARRAY_SIZE(mmpll_p_vco),
+	.clkr = {
+		.enable_reg = 0x100,
+		.enable_mask = BIT(0),
+		.hw.init = &(struct clk_init_data){
+			.name = "mmpll0_early",
+			.parent_data = &(const struct clk_parent_data){
+				.fw_name = "xo",
+			},
+			.num_parents = 1,
+			.ops = &clk_alpha_pll_ops,
+		},
+	},
+};
+
+static struct clk_alpha_pll_postdiv mmpll0 = {
+	.offset = 0x0,
+	.regs = clk_alpha_pll_regs[CLK_ALPHA_PLL_TYPE_DEFAULT],
+	.width = 4,
+	.clkr.hw.init = &(struct clk_init_data){
+		.name = "mmpll0",
+		.parent_hws = (const struct clk_hw *[]){ &mmpll0_early.clkr.hw },
+		.num_parents = 1,
+		.ops = &clk_alpha_pll_postdiv_ops,
+		.flags = CLK_SET_RATE_PARENT,
+	},
+};
+
+static struct clk_alpha_pll mmpll1_early = {
+	.offset = 0x30,
+	.regs = clk_alpha_pll_regs[CLK_ALPHA_PLL_TYPE_DEFAULT],
+	.vco_table = mmpll_p_vco,
+	.num_vco = ARRAY_SIZE(mmpll_p_vco),
+	.clkr = {
+		.enable_reg = 0x100,
+		.enable_mask = BIT(1),
+		.hw.init = &(struct clk_init_data){
+			.name = "mmpll1_early",
+			.parent_data = &(const struct clk_parent_data){
+				.fw_name = "xo",
+			},
+			.num_parents = 1,
+			.ops = &clk_alpha_pll_ops,
+		}
+	},
+};
+
+static struct clk_alpha_pll_postdiv mmpll1 = {
+	.offset = 0x30,
+	.regs = clk_alpha_pll_regs[CLK_ALPHA_PLL_TYPE_DEFAULT],
+	.width = 4,
+	.clkr.hw.init = &(struct clk_init_data){
+		.name = "mmpll1",
+		.parent_hws = (const struct clk_hw *[]){ &mmpll1_early.clkr.hw },
+		.num_parents = 1,
+		.ops = &clk_alpha_pll_postdiv_ops,
+		.flags = CLK_SET_RATE_PARENT,
+	},
+};
+
+static struct clk_alpha_pll mmpll3_early = {
+	.offset = 0x60,
+	.regs = clk_alpha_pll_regs[CLK_ALPHA_PLL_TYPE_DEFAULT],
+	.vco_table = mmpll_p_vco,
+	.num_vco = ARRAY_SIZE(mmpll_p_vco),
+	.clkr.hw.init = &(struct clk_init_data){
+		.name = "mmpll3_early",
+		.parent_data = &(const struct clk_parent_data){
+				.fw_name = "xo",
+		},
+		.num_parents = 1,
+		.ops = &clk_alpha_pll_ops,
+	},
+};
+
+static struct clk_alpha_pll_postdiv mmpll3 = {
+	.offset = 0x60,
+	.regs = clk_alpha_pll_regs[CLK_ALPHA_PLL_TYPE_DEFAULT],
+	.width = 4,
+	.clkr.hw.init = &(struct clk_init_data){
+		.name = "mmpll3",
+		.parent_hws = (const struct clk_hw *[]){ &mmpll3_early.clkr.hw },
+		.num_parents = 1,
+		.ops = &clk_alpha_pll_postdiv_ops,
+		.flags = CLK_SET_RATE_PARENT,
+	},
+};
+
+static struct clk_alpha_pll mmpll4_early = {
+	.offset = 0x90,
+	.regs = clk_alpha_pll_regs[CLK_ALPHA_PLL_TYPE_DEFAULT],
+	.vco_table = mmpll_t_vco,
+	.num_vco = ARRAY_SIZE(mmpll_t_vco),
+	.clkr.hw.init = &(struct clk_init_data){
+		.name = "mmpll4_early",
+		.parent_data = &(const struct clk_parent_data){
+				.fw_name = "xo",
+		},
+		.num_parents = 1,
+		.ops = &clk_alpha_pll_ops,
+	},
+};
+
+static struct clk_alpha_pll_postdiv mmpll4 = {
+	.offset = 0x90,
+	.regs = clk_alpha_pll_regs[CLK_ALPHA_PLL_TYPE_DEFAULT],
+	.width = 2,
+	.clkr.hw.init = &(struct clk_init_data){
+		.name = "mmpll4",
+		.parent_hws = (const struct clk_hw *[]){ &mmpll4_early.clkr.hw },
+		.num_parents = 1,
+		.ops = &clk_alpha_pll_postdiv_ops,
+		.flags = CLK_SET_RATE_PARENT,
+	},
+};
+
+static const struct parent_map mmcc_xo_gpll0_mmpll1_map[] = {
+	{ P_XO, 0 },
+	{ P_GPLL0, 5 },
+	{ P_MMPLL1, 2 }
+};
+
+static const struct clk_parent_data mmcc_xo_gpll0_mmpll1[] = {
+	{ .fw_name = "xo" },
+	{ .fw_name = "gpll0" },
+	{ .hw = &mmpll1.clkr.hw },
+};
+
+static const struct parent_map mmcc_xo_gpll0_mmpll0_map[] = {
+	{ P_XO, 0 },
+	{ P_GPLL0, 5 },
+	{ P_MMPLL0, 1 }
+};
+
+static const struct clk_parent_data mmcc_xo_gpll0_mmpll0[] = {
+	{ .fw_name = "xo" },
+	{ .fw_name = "gpll0" },
+	{ .hw = &mmpll0.clkr.hw },
+};
+
+static const struct parent_map mmcc_xo_gpll0_mmpll0_mmpll3_map[] = {
+	{ P_XO, 0 },
+	{ P_GPLL0, 5 },
+	{ P_MMPLL0, 1 },
+	{ P_MMPLL3, 3 }
+};
+
+static const struct clk_parent_data mmcc_xo_gpll0_mmpll0_mmpll3[] = {
+	{ .fw_name = "xo" },
+	{ .fw_name = "gpll0" },
+	{ .hw = &mmpll0.clkr.hw },
+	{ .hw = &mmpll3.clkr.hw },
+};
+
+static const struct parent_map mmcc_xo_gpll0_mmpll0_mmpll4_map[] = {
+	{ P_XO, 0 },
+	{ P_GPLL0, 5 },
+	{ P_MMPLL0, 1 },
+	{ P_MMPLL4, 3 }
+};
+
+static const struct clk_parent_data mmcc_xo_gpll0_mmpll0_mmpll4[] = {
+	{ .fw_name = "xo" },
+	{ .fw_name = "gpll0" },
+	{ .hw = &mmpll0.clkr.hw },
+	{ .hw = &mmpll4.clkr.hw },
+};
+
+static struct clk_alpha_pll mmpll5_early = {
+	.offset = 0xc0,
+	.regs = clk_alpha_pll_regs[CLK_ALPHA_PLL_TYPE_DEFAULT],
+	.vco_table = mmpll_p_vco,
+	.num_vco = ARRAY_SIZE(mmpll_p_vco),
+	.clkr.hw.init = &(struct clk_init_data){
+		.name = "mmpll5_early",
+		.parent_data = &(const struct clk_parent_data){
+				.fw_name = "xo",
+		},
+		.num_parents = 1,
+		.ops = &clk_alpha_pll_ops,
+	},
+};
+
+static struct clk_alpha_pll_postdiv mmpll5 = {
+	.offset = 0xc0,
+	.regs = clk_alpha_pll_regs[CLK_ALPHA_PLL_TYPE_DEFAULT],
+	.width = 4,
+	.clkr.hw.init = &(struct clk_init_data){
+		.name = "mmpll5",
+		.parent_hws = (const struct clk_hw *[]){ &mmpll5_early.clkr.hw },
+		.num_parents = 1,
+		.ops = &clk_alpha_pll_postdiv_ops,
+		.flags = CLK_SET_RATE_PARENT,
+	},
+};
+
+static const struct freq_tbl ftbl_ahb_clk_src[] = {
+	/* Note: There might be more frequencies desired here. */
+	F(19200000, P_XO, 1, 0, 0),
+	F(40000000, P_GPLL0, 15, 0, 0),
+	F(80000000, P_MMPLL0, 10, 0, 0),
+	{ }
+};
+
+static struct clk_rcg2 ahb_clk_src = {
+	.cmd_rcgr = 0x5000,
+	.hid_width = 5,
+	.parent_map = mmcc_xo_gpll0_mmpll0_map,
+	.freq_tbl = ftbl_ahb_clk_src,
+	.clkr.hw.init = &(struct clk_init_data){
+		.name = "ahb_clk_src",
+		.parent_data = mmcc_xo_gpll0_mmpll0,
+		.num_parents = ARRAY_SIZE(mmcc_xo_gpll0_mmpll0),
+		.ops = &clk_rcg2_ops,
+	},
+};
+
+static const struct freq_tbl ftbl_axi_clk_src[] = {
+	F(75000000, P_GPLL0, 8, 0, 0),
+	F(150000000, P_GPLL0, 4, 0, 0),
+	F(333430000, P_MMPLL1, 3.5, 0, 0),
+	F(466800000, P_MMPLL1, 2.5, 0, 0),
+	{ }
+};
+
+static const struct freq_tbl ftbl_axi_clk_src_8992[] = {
+	F(75000000, P_GPLL0, 8, 0, 0),
+	F(150000000, P_GPLL0, 4, 0, 0),
+	F(300000000, P_GPLL0, 2, 0, 0),
+	F(404000000, P_MMPLL1, 2, 0, 0),
+	{ }
+};
+
+static struct clk_rcg2 axi_clk_src = {
+	.cmd_rcgr = 0x5040,
+	.hid_width = 5,
+	.parent_map = mmcc_xo_gpll0_mmpll1_map,
+	.freq_tbl = ftbl_axi_clk_src,
+	.clkr.hw.init = &(struct clk_init_data){
+		.name = "axi_clk_src",
+		.parent_data = mmcc_xo_gpll0_mmpll1,
+		.num_parents = ARRAY_SIZE(mmcc_xo_gpll0_mmpll1),
+		.ops = &clk_rcg2_ops,
+	},
+};
+
+static const struct freq_tbl ftbl_csi0_1_2_3_clk_src[] = {
+	F(100000000, P_GPLL0, 6, 0, 0),
+	F(240000000, P_GPLL0, 2.5, 0, 0),
+	F(266670000, P_MMPLL0, 3, 0, 0),
+	{ }
+};
+
+static const struct freq_tbl ftbl_csi0_1_2_3_clk_src_8992[] = {
+	F(100000000, P_GPLL0, 6, 0, 0),
+	F(266670000, P_MMPLL0, 3, 0, 0),
+	{ }
+};
+
+static struct clk_rcg2 csi0_clk_src = {
+	.cmd_rcgr = 0x3090,
+	.hid_width = 5,
+	.parent_map = mmcc_xo_gpll0_mmpll0_map,
+	.freq_tbl = ftbl_csi0_1_2_3_clk_src,
+	.clkr.hw.init = &(struct clk_init_data){
+		.name = "csi0_clk_src",
+		.parent_data = mmcc_xo_gpll0_mmpll0,
+		.num_parents = ARRAY_SIZE(mmcc_xo_gpll0_mmpll0),
+		.ops = &clk_rcg2_ops,
+	},
+};
+
+static const struct freq_tbl ftbl_vcodec0_clk_src[] = {
+	F(66670000, P_GPLL0, 9, 0, 0),
+	F(100000000, P_GPLL0, 6, 0, 0),
+	F(133330000, P_GPLL0, 4.5, 0, 0),
+	F(150000000, P_GPLL0, 4, 0, 0),
+	F(200000000, P_MMPLL0, 4, 0, 0),
+	F(240000000, P_GPLL0, 2.5, 0, 0),
+	F(266670000, P_MMPLL0, 3, 0, 0),
+	F(320000000, P_MMPLL0, 2.5, 0, 0),
+	F(510000000, P_MMPLL3, 2, 0, 0),
+	{ }
+};
+
+static const struct freq_tbl ftbl_vcodec0_clk_src_8992[] = {
+	F(66670000, P_GPLL0, 9, 0, 0),
+	F(100000000, P_GPLL0, 6, 0, 0),
+	F(133330000, P_GPLL0, 4.5, 0, 0),
+	F(200000000, P_MMPLL0, 4, 0, 0),
+	F(320000000, P_MMPLL0, 2.5, 0, 0),
+	F(510000000, P_MMPLL3, 2, 0, 0),
+	{ }
+};
+
+static struct clk_rcg2 vcodec0_clk_src = {
+	.cmd_rcgr = 0x1000,
+	.mnd_width = 8,
+	.hid_width = 5,
+	.parent_map = mmcc_xo_gpll0_mmpll0_mmpll3_map,
+	.freq_tbl = ftbl_vcodec0_clk_src,
+	.clkr.hw.init = &(struct clk_init_data){
+		.name = "vcodec0_clk_src",
+		.parent_data = mmcc_xo_gpll0_mmpll0_mmpll3,
+		.num_parents = ARRAY_SIZE(mmcc_xo_gpll0_mmpll0_mmpll3),
+		.ops = &clk_rcg2_ops,
+	},
+};
+
+static struct clk_rcg2 csi1_clk_src = {
+	.cmd_rcgr = 0x3100,
+	.hid_width = 5,
+	.parent_map = mmcc_xo_gpll0_mmpll0_map,
+	.freq_tbl = ftbl_csi0_1_2_3_clk_src,
+	.clkr.hw.init = &(struct clk_init_data){
+		.name = "csi1_clk_src",
+		.parent_data = mmcc_xo_gpll0_mmpll0,
+		.num_parents = ARRAY_SIZE(mmcc_xo_gpll0_mmpll0),
+		.ops = &clk_rcg2_ops,
+	},
+};
+
+static struct clk_rcg2 csi2_clk_src = {
+	.cmd_rcgr = 0x3160,
+	.hid_width = 5,
+	.parent_map = mmcc_xo_gpll0_mmpll0_map,
+	.freq_tbl = ftbl_csi0_1_2_3_clk_src,
+	.clkr.hw.init = &(struct clk_init_data){
+		.name = "csi2_clk_src",
+		.parent_data = mmcc_xo_gpll0_mmpll0,
+		.num_parents = ARRAY_SIZE(mmcc_xo_gpll0_mmpll0),
+		.ops = &clk_rcg2_ops,
+	},
+};
+
+static struct clk_rcg2 csi3_clk_src = {
+	.cmd_rcgr = 0x31c0,
+	.hid_width = 5,
+	.parent_map = mmcc_xo_gpll0_mmpll0_map,
+	.freq_tbl = ftbl_csi0_1_2_3_clk_src,
+	.clkr.hw.init = &(struct clk_init_data){
+		.name = "csi3_clk_src",
+		.parent_data = mmcc_xo_gpll0_mmpll0,
+		.num_parents = ARRAY_SIZE(mmcc_xo_gpll0_mmpll0),
+		.ops = &clk_rcg2_ops,
+	},
+};
+
+static const struct freq_tbl ftbl_vfe0_clk_src[] = {
+	F(80000000, P_GPLL0, 7.5, 0, 0),
+	F(100000000, P_GPLL0, 6, 0, 0),
+	F(200000000, P_GPLL0, 3, 0, 0),
+	F(320000000, P_MMPLL0, 2.5, 0, 0),
+	F(400000000, P_MMPLL0, 2, 0, 0),
+	F(480000000, P_MMPLL4, 2, 0, 0),
+	F(533330000, P_MMPLL0, 1.5, 0, 0),
+	F(600000000, P_GPLL0, 1, 0, 0),
+	{ }
+};
+
+static const struct freq_tbl ftbl_vfe0_1_clk_src_8992[] = {
+	F(80000000, P_GPLL0, 7.5, 0, 0),
+	F(100000000, P_GPLL0, 6, 0, 0),
+	F(200000000, P_GPLL0, 3, 0, 0),
+	F(320000000, P_MMPLL0, 2.5, 0, 0),
+	F(480000000, P_MMPLL4, 2, 0, 0),
+	F(600000000, P_GPLL0, 1, 0, 0),
+	{ }
+};
+
+static struct clk_rcg2 vfe0_clk_src = {
+	.cmd_rcgr = 0x3600,
+	.hid_width = 5,
+	.parent_map = mmcc_xo_gpll0_mmpll0_mmpll4_map,
+	.freq_tbl = ftbl_vfe0_clk_src,
+	.clkr.hw.init = &(struct clk_init_data){
+		.name = "vfe0_clk_src",
+		.parent_data = mmcc_xo_gpll0_mmpll0_mmpll4,
+		.num_parents = ARRAY_SIZE(mmcc_xo_gpll0_mmpll0_mmpll4),
+		.ops = &clk_rcg2_ops,
+	},
+};
+
+static const struct freq_tbl ftbl_vfe1_clk_src[] = {
+	F(80000000, P_GPLL0, 7.5, 0, 0),
+	F(100000000, P_GPLL0, 6, 0, 0),
+	F(200000000, P_GPLL0, 3, 0, 0),
+	F(320000000, P_MMPLL0, 2.5, 0, 0),
+	F(400000000, P_MMPLL0, 2, 0, 0),
+	F(533330000, P_MMPLL0, 1.5, 0, 0),
+	{ }
+};
+
+static struct clk_rcg2 vfe1_clk_src = {
+	.cmd_rcgr = 0x3620,
+	.hid_width = 5,
+	.parent_map = mmcc_xo_gpll0_mmpll0_mmpll4_map,
+	.freq_tbl = ftbl_vfe1_clk_src,
+	.clkr.hw.init = &(struct clk_init_data){
+		.name = "vfe1_clk_src",
+		.parent_data = mmcc_xo_gpll0_mmpll0_mmpll4,
+		.num_parents = ARRAY_SIZE(mmcc_xo_gpll0_mmpll0_mmpll4),
+		.ops = &clk_rcg2_ops,
+	},
+};
+
+static const struct freq_tbl ftbl_cpp_clk_src[] = {
+	F(100000000, P_GPLL0, 6, 0, 0),
+	F(200000000, P_GPLL0, 3, 0, 0),
+	F(320000000, P_MMPLL0, 2.5, 0, 0),
+	F(480000000, P_MMPLL4, 2, 0, 0),
+	F(600000000, P_GPLL0, 1, 0, 0),
+	F(640000000, P_MMPLL4, 1.5, 0, 0),
+	{ }
+};
+
+static const struct freq_tbl ftbl_cpp_clk_src_8992[] = {
+	F(100000000, P_GPLL0, 6, 0, 0),
+	F(200000000, P_GPLL0, 3, 0, 0),
+	F(320000000, P_MMPLL0, 2.5, 0, 0),
+	F(480000000, P_MMPLL4, 2, 0, 0),
+	F(640000000, P_MMPLL4, 1.5, 0, 0),
+	{ }
+};
+
+static struct clk_rcg2 cpp_clk_src = {
+	.cmd_rcgr = 0x3640,
+	.hid_width = 5,
+	.parent_map = mmcc_xo_gpll0_mmpll0_mmpll4_map,
+	.freq_tbl = ftbl_cpp_clk_src,
+	.clkr.hw.init = &(struct clk_init_data){
+		.name = "cpp_clk_src",
+		.parent_data = mmcc_xo_gpll0_mmpll0_mmpll4,
+		.num_parents = ARRAY_SIZE(mmcc_xo_gpll0_mmpll0_mmpll4),
+		.ops = &clk_rcg2_ops,
+	},
+};
+
+static const struct freq_tbl ftbl_jpeg0_1_clk_src[] = {
+	F(75000000, P_GPLL0, 8, 0, 0),
+	F(150000000, P_GPLL0, 4, 0, 0),
+	F(228570000, P_MMPLL0, 3.5, 0, 0),
+	F(266670000, P_MMPLL0, 3, 0, 0),
+	F(320000000, P_MMPLL0, 2.5, 0, 0),
+	F(480000000, P_MMPLL4, 2, 0, 0),
+	{ }
+};
+
+static struct clk_rcg2 jpeg1_clk_src = {
+	.cmd_rcgr = 0x3520,
+	.hid_width = 5,
+	.parent_map = mmcc_xo_gpll0_mmpll0_mmpll4_map,
+	.freq_tbl = ftbl_jpeg0_1_clk_src,
+	.clkr.hw.init = &(struct clk_init_data){
+		.name = "jpeg1_clk_src",
+		.parent_data = mmcc_xo_gpll0_mmpll0_mmpll4,
+		.num_parents = ARRAY_SIZE(mmcc_xo_gpll0_mmpll0_mmpll4),
+		.ops = &clk_rcg2_ops,
+	},
+};
+
+static const struct freq_tbl ftbl_jpeg2_clk_src[] = {
+	F(75000000, P_GPLL0, 8, 0, 0),
+	F(133330000, P_GPLL0, 4.5, 0, 0),
+	F(150000000, P_GPLL0, 4, 0, 0),
+	F(228570000, P_MMPLL0, 3.5, 0, 0),
+	F(266670000, P_MMPLL0, 3, 0, 0),
+	F(320000000, P_MMPLL0, 2.5, 0, 0),
+	{ }
+};
+
+static struct clk_rcg2 jpeg2_clk_src = {
+	.cmd_rcgr = 0x3540,
+	.hid_width = 5,
+	.parent_map = mmcc_xo_gpll0_mmpll0_map,
+	.freq_tbl = ftbl_jpeg2_clk_src,
+	.clkr.hw.init = &(struct clk_init_data){
+		.name = "jpeg2_clk_src",
+		.parent_data = mmcc_xo_gpll0_mmpll0,
+		.num_parents = ARRAY_SIZE(mmcc_xo_gpll0_mmpll0),
+		.ops = &clk_rcg2_ops,
+	},
+};
+
+static const struct freq_tbl ftbl_csi2phytimer_clk_src[] = {
+	F(50000000, P_GPLL0, 12, 0, 0),
+	F(100000000, P_GPLL0, 6, 0, 0),
+	F(200000000, P_MMPLL0, 4, 0, 0),
+	{ }
+};
+
+static struct clk_rcg2 csi2phytimer_clk_src = {
+	.cmd_rcgr = 0x3060,
+	.hid_width = 5,
+	.parent_map = mmcc_xo_gpll0_mmpll0_map,
+	.freq_tbl = ftbl_csi2phytimer_clk_src,
+	.clkr.hw.init = &(struct clk_init_data){
+		.name = "csi2phytimer_clk_src",
+		.parent_data = mmcc_xo_gpll0_mmpll0,
+		.num_parents = ARRAY_SIZE(mmcc_xo_gpll0_mmpll0),
+		.ops = &clk_rcg2_ops,
+	},
+};
+
+static const struct freq_tbl ftbl_fd_core_clk_src[] = {
+	F(60000000, P_GPLL0, 10, 0, 0),
+	F(200000000, P_GPLL0, 3, 0, 0),
+	F(320000000, P_MMPLL0, 2.5, 0, 0),
+	F(400000000, P_MMPLL0, 2, 0, 0),
+	{ }
+};
+
+static struct clk_rcg2 fd_core_clk_src = {
+	.cmd_rcgr = 0x3b00,
+	.hid_width = 5,
+	.parent_map = mmcc_xo_gpll0_mmpll0_map,
+	.freq_tbl = ftbl_fd_core_clk_src,
+	.clkr.hw.init = &(struct clk_init_data){
+		.name = "fd_core_clk_src",
+		.parent_data = mmcc_xo_gpll0_mmpll0,
+		.num_parents = ARRAY_SIZE(mmcc_xo_gpll0_mmpll0),
+		.ops = &clk_rcg2_ops,
+	},
+};
+
+static const struct freq_tbl ftbl_mdp_clk_src[] = {
+	F(85710000, P_GPLL0, 7, 0, 0),
+	F(100000000, P_GPLL0, 6, 0, 0),
+	F(120000000, P_GPLL0, 5, 0, 0),
+	F(150000000, P_GPLL0, 4, 0, 0),
+	F(171430000, P_GPLL0, 3.5, 0, 0),
+	F(200000000, P_GPLL0, 3, 0, 0),
+	F(240000000, P_GPLL0, 2.5, 0, 0),
+	F(266670000, P_MMPLL0, 3, 0, 0),
+	F(300000000, P_GPLL0, 2, 0, 0),
+	F(320000000, P_MMPLL0, 2.5, 0, 0),
+	F(400000000, P_MMPLL0, 2, 0, 0),
+	{ }
+};
+
+static const struct freq_tbl ftbl_mdp_clk_src_8992[] = {
+	F(85710000, P_GPLL0, 7, 0, 0),
+	F(171430000, P_GPLL0, 3.5, 0, 0),
+	F(200000000, P_GPLL0, 3, 0, 0),
+	F(240000000, P_GPLL0, 2.5, 0, 0),
+	F(266670000, P_MMPLL0, 3, 0, 0),
+	F(320000000, P_MMPLL0, 2.5, 0, 0),
+	F(400000000, P_MMPLL0, 2, 0, 0),
+	{ }
+};
+
+static struct clk_rcg2 mdp_clk_src = {
+	.cmd_rcgr = 0x2040,
+	.hid_width = 5,
+	.parent_map = mmcc_xo_gpll0_mmpll0_map,
+	.freq_tbl = ftbl_mdp_clk_src,
+	.clkr.hw.init = &(struct clk_init_data){
+		.name = "mdp_clk_src",
+		.parent_data = mmcc_xo_gpll0_mmpll0,
+		.num_parents = ARRAY_SIZE(mmcc_xo_gpll0_mmpll0),
+		.ops = &clk_rcg2_ops,
+	},
+};
+
+static struct clk_rcg2 pclk0_clk_src = {
+	.cmd_rcgr = 0x2000,
+	.mnd_width = 8,
+	.hid_width = 5,
+	.parent_map = mmcc_xo_dsi0pll_dsi1pll_map,
+	.clkr.hw.init = &(struct clk_init_data){
+		.name = "pclk0_clk_src",
+		.parent_data = mmcc_xo_dsi0pll_dsi1pll,
+		.num_parents = ARRAY_SIZE(mmcc_xo_dsi0pll_dsi1pll),
+		.ops = &clk_pixel_ops,
+		.flags = CLK_SET_RATE_PARENT | CLK_GET_RATE_NOCACHE,
+	},
+};
+
+static struct clk_rcg2 pclk1_clk_src = {
+	.cmd_rcgr = 0x2020,
+	.mnd_width = 8,
+	.hid_width = 5,
+	.parent_map = mmcc_xo_dsi0pll_dsi1pll_map,
+	.clkr.hw.init = &(struct clk_init_data){
+		.name = "pclk1_clk_src",
+		.parent_data = mmcc_xo_dsi0pll_dsi1pll,
+		.num_parents = ARRAY_SIZE(mmcc_xo_dsi0pll_dsi1pll),
+		.ops = &clk_pixel_ops,
+		.flags = CLK_SET_RATE_PARENT | CLK_GET_RATE_NOCACHE,
+	},
+};
+
+static const struct freq_tbl ftbl_ocmemnoc_clk_src[] = {
+	F(19200000, P_XO, 1, 0, 0),
+	F(75000000, P_GPLL0, 8, 0, 0),
+	F(100000000, P_GPLL0, 6, 0, 0),
+	F(150000000, P_GPLL0, 4, 0, 0),
+	F(228570000, P_MMPLL0, 3.5, 0, 0),
+	F(266670000, P_MMPLL0, 3, 0, 0),
+	F(320000000, P_MMPLL0, 2.5, 0, 0),
+	F(400000000, P_MMPLL0, 2, 0, 0),
+	{ }
+};
+
+static const struct freq_tbl ftbl_ocmemnoc_clk_src_8992[] = {
+	F(19200000, P_XO, 1, 0, 0),
+	F(75000000, P_GPLL0, 8, 0, 0),
+	F(100000000, P_GPLL0, 6, 0, 0),
+	F(150000000, P_GPLL0, 4, 0, 0),
+	F(320000000, P_MMPLL0, 2.5, 0, 0),
+	F(400000000, P_MMPLL0, 2, 0, 0),
+	{ }
+};
+
+static struct clk_rcg2 ocmemnoc_clk_src = {
+	.cmd_rcgr = 0x5090,
+	.hid_width = 5,
+	.parent_map = mmcc_xo_gpll0_mmpll0_map,
+	.freq_tbl = ftbl_ocmemnoc_clk_src,
+	.clkr.hw.init = &(struct clk_init_data){
+		.name = "ocmemnoc_clk_src",
+		.parent_data = mmcc_xo_gpll0_mmpll0,
+		.num_parents = ARRAY_SIZE(mmcc_xo_gpll0_mmpll0),
+		.ops = &clk_rcg2_ops,
+	},
+};
+
+static const struct freq_tbl ftbl_cci_clk_src[] = {
+	F(19200000, P_XO, 1, 0, 0),
+	F(37500000, P_GPLL0, 16, 0, 0),
+	F(50000000, P_GPLL0, 12, 0, 0),
+	F(100000000, P_GPLL0, 6, 0, 0),
+	{ }
+};
+
+static struct clk_rcg2 cci_clk_src = {
+	.cmd_rcgr = 0x3300,
+	.mnd_width = 8,
+	.hid_width = 5,
+	.parent_map = mmcc_xo_gpll0_map,
+	.freq_tbl = ftbl_cci_clk_src,
+	.clkr.hw.init = &(struct clk_init_data){
+		.name = "cci_clk_src",
+		.parent_data = mmcc_xo_gpll0,
+		.num_parents = ARRAY_SIZE(mmcc_xo_gpll0),
+		.ops = &clk_rcg2_ops,
+	},
+};
+
+static const struct freq_tbl ftbl_mmss_gp0_1_clk_src[] = {
+	F(10000, P_XO, 16, 10, 120),
+	F(24000, P_GPLL0, 16, 1, 50),
+	F(6000000, P_GPLL0, 10, 1, 10),
+	F(12000000, P_GPLL0, 10, 1, 5),
+	F(13000000, P_GPLL0, 4, 13, 150),
+	F(24000000, P_GPLL0, 5, 1, 5),
+	{ }
+};
+
+static struct clk_rcg2 mmss_gp0_clk_src = {
+	.cmd_rcgr = 0x3420,
+	.mnd_width = 8,
+	.hid_width = 5,
+	.parent_map = mmcc_xo_gpll0_map,
+	.freq_tbl = ftbl_mmss_gp0_1_clk_src,
+	.clkr.hw.init = &(struct clk_init_data){
+		.name = "mmss_gp0_clk_src",
+		.parent_data = mmcc_xo_gpll0,
+		.num_parents = ARRAY_SIZE(mmcc_xo_gpll0),
+		.ops = &clk_rcg2_ops,
+	},
+};
+
+static struct clk_rcg2 mmss_gp1_clk_src = {
+	.cmd_rcgr = 0x3450,
+	.mnd_width = 8,
+	.hid_width = 5,
+	.parent_map = mmcc_xo_gpll0_map,
+	.freq_tbl = ftbl_mmss_gp0_1_clk_src,
+	.clkr.hw.init = &(struct clk_init_data){
+		.name = "mmss_gp1_clk_src",
+		.parent_data = mmcc_xo_gpll0,
+		.num_parents = ARRAY_SIZE(mmcc_xo_gpll0),
+		.ops = &clk_rcg2_ops,
+	},
+};
+
+static struct clk_rcg2 jpeg0_clk_src = {
+	.cmd_rcgr = 0x3500,
+	.hid_width = 5,
+	.parent_map = mmcc_xo_gpll0_mmpll0_mmpll4_map,
+	.freq_tbl = ftbl_jpeg0_1_clk_src,
+	.clkr.hw.init = &(struct clk_init_data){
+		.name = "jpeg0_clk_src",
+		.parent_data = mmcc_xo_gpll0_mmpll0_mmpll4,
+		.num_parents = ARRAY_SIZE(mmcc_xo_gpll0_mmpll0_mmpll4),
+		.ops = &clk_rcg2_ops,
+	},
+};
+
+static struct clk_rcg2 jpeg_dma_clk_src = {
+	.cmd_rcgr = 0x3560,
+	.hid_width = 5,
+	.parent_map = mmcc_xo_gpll0_mmpll0_mmpll4_map,
+	.freq_tbl = ftbl_jpeg0_1_clk_src,
+	.clkr.hw.init = &(struct clk_init_data){
+		.name = "jpeg_dma_clk_src",
+		.parent_data = mmcc_xo_gpll0_mmpll0_mmpll4,
+		.num_parents = ARRAY_SIZE(mmcc_xo_gpll0_mmpll0_mmpll4),
+		.ops = &clk_rcg2_ops,
+	},
+};
+
+static const struct freq_tbl ftbl_mclk0_1_2_3_clk_src[] = {
+	F(4800000, P_XO, 4, 0, 0),
+	F(6000000, P_GPLL0, 10, 1, 10),
+	F(8000000, P_GPLL0, 15, 1, 5),
+	F(9600000, P_XO, 2, 0, 0),
+	F(16000000, P_MMPLL0, 10, 1, 5),
+	F(19200000, P_XO, 1, 0, 0),
+	F(24000000, P_GPLL0, 5, 1, 5),
+	F(32000000, P_MMPLL0, 5, 1, 5),
+	F(48000000, P_GPLL0, 12.5, 0, 0),
+	F(64000000, P_MMPLL0, 12.5, 0, 0),
+	{ }
+};
+
+static const struct freq_tbl ftbl_mclk0_clk_src_8992[] = {
+	F(4800000, P_XO, 4, 0, 0),
+	F(6000000, P_MMPLL4, 10, 1, 16),
+	F(8000000, P_MMPLL4, 10, 1, 12),
+	F(9600000, P_XO, 2, 0, 0),
+	F(12000000, P_MMPLL4, 10, 1, 8),
+	F(16000000, P_MMPLL4, 10, 1, 6),
+	F(19200000, P_XO, 1, 0, 0),
+	F(24000000, P_MMPLL4, 10, 1, 4),
+	F(32000000, P_MMPLL4, 10, 1, 3),
+	F(48000000, P_MMPLL4, 10, 1, 2),
+	F(64000000, P_MMPLL4, 15, 0, 0),
+	{ }
+};
+
+static const struct freq_tbl ftbl_mclk1_2_3_clk_src_8992[] = {
+	F(4800000, P_XO, 4, 0, 0),
+	F(6000000, P_MMPLL4, 10, 1, 16),
+	F(8000000, P_MMPLL4, 10, 1, 12),
+	F(9600000, P_XO, 2, 0, 0),
+	F(16000000, P_MMPLL4, 10, 1, 6),
+	F(19200000, P_XO, 1, 0, 0),
+	F(24000000, P_MMPLL4, 10, 1, 4),
+	F(32000000, P_MMPLL4, 10, 1, 3),
+	F(48000000, P_MMPLL4, 10, 1, 2),
+	F(64000000, P_MMPLL4, 15, 0, 0),
+	{ }
+};
+
+static struct clk_rcg2 mclk0_clk_src = {
+	.cmd_rcgr = 0x3360,
+	.mnd_width = 8,
+	.hid_width = 5,
+	.parent_map = mmcc_xo_gpll0_mmpll0_map,
+	.freq_tbl = ftbl_mclk0_1_2_3_clk_src,
+	.clkr.hw.init = &(struct clk_init_data){
+		.name = "mclk0_clk_src",
+		.parent_data = mmcc_xo_gpll0_mmpll0,
+		.num_parents = ARRAY_SIZE(mmcc_xo_gpll0_mmpll0),
+		.ops = &clk_rcg2_ops,
+	},
+};
+
+static struct clk_rcg2 mclk1_clk_src = {
+	.cmd_rcgr = 0x3390,
+	.mnd_width = 8,
+	.hid_width = 5,
+	.parent_map = mmcc_xo_gpll0_mmpll0_map,
+	.freq_tbl = ftbl_mclk0_1_2_3_clk_src,
+	.clkr.hw.init = &(struct clk_init_data){
+		.name = "mclk1_clk_src",
+		.parent_data = mmcc_xo_gpll0_mmpll0,
+		.num_parents = ARRAY_SIZE(mmcc_xo_gpll0_mmpll0),
+		.ops = &clk_rcg2_ops,
+	},
+};
+
+static struct clk_rcg2 mclk2_clk_src = {
+	.cmd_rcgr = 0x33c0,
+	.mnd_width = 8,
+	.hid_width = 5,
+	.parent_map = mmcc_xo_gpll0_mmpll0_map,
+	.freq_tbl = ftbl_mclk0_1_2_3_clk_src,
+	.clkr.hw.init = &(struct clk_init_data){
+		.name = "mclk2_clk_src",
+		.parent_data = mmcc_xo_gpll0_mmpll0,
+		.num_parents = ARRAY_SIZE(mmcc_xo_gpll0_mmpll0),
+		.ops = &clk_rcg2_ops,
+	},
+};
+
+static struct clk_rcg2 mclk3_clk_src = {
+	.cmd_rcgr = 0x33f0,
+	.mnd_width = 8,
+	.hid_width = 5,
+	.parent_map = mmcc_xo_gpll0_mmpll0_map,
+	.freq_tbl = ftbl_mclk0_1_2_3_clk_src,
+	.clkr.hw.init = &(struct clk_init_data){
+		.name = "mclk3_clk_src",
+		.parent_data = mmcc_xo_gpll0_mmpll0,
+		.num_parents = ARRAY_SIZE(mmcc_xo_gpll0_mmpll0),
+		.ops = &clk_rcg2_ops,
+	},
+};
+
+static const struct freq_tbl ftbl_csi0_1phytimer_clk_src[] = {
+	F(50000000, P_GPLL0, 12, 0, 0),
+	F(100000000, P_GPLL0, 6, 0, 0),
+	F(200000000, P_MMPLL0, 4, 0, 0),
+	{ }
+};
+
+static struct clk_rcg2 csi0phytimer_clk_src = {
+	.cmd_rcgr = 0x3000,
+	.hid_width = 5,
+	.parent_map = mmcc_xo_gpll0_mmpll0_map,
+	.freq_tbl = ftbl_csi0_1phytimer_clk_src,
+	.clkr.hw.init = &(struct clk_init_data){
+		.name = "csi0phytimer_clk_src",
+		.parent_data = mmcc_xo_gpll0_mmpll0,
+		.num_parents = ARRAY_SIZE(mmcc_xo_gpll0_mmpll0),
+		.ops = &clk_rcg2_ops,
+	},
+};
+
+static struct clk_rcg2 csi1phytimer_clk_src = {
+	.cmd_rcgr = 0x3030,
+	.hid_width = 5,
+	.parent_map = mmcc_xo_gpll0_mmpll0_map,
+	.freq_tbl = ftbl_csi0_1phytimer_clk_src,
+	.clkr.hw.init = &(struct clk_init_data){
+		.name = "csi1phytimer_clk_src",
+		.parent_data = mmcc_xo_gpll0_mmpll0,
+		.num_parents = ARRAY_SIZE(mmcc_xo_gpll0_mmpll0),
+		.ops = &clk_rcg2_ops,
+	},
+};
+
+static struct clk_rcg2 byte0_clk_src = {
+	.cmd_rcgr = 0x2120,
+	.hid_width = 5,
+	.parent_map = mmcc_xo_dsibyte_map,
+	.clkr.hw.init = &(struct clk_init_data){
+		.name = "byte0_clk_src",
+		.parent_data = mmcc_xo_dsibyte,
+		.num_parents = ARRAY_SIZE(mmcc_xo_dsibyte),
+		.ops = &clk_byte2_ops,
+		.flags = CLK_SET_RATE_PARENT | CLK_GET_RATE_NOCACHE,
+	},
+};
+
+static struct clk_rcg2 byte1_clk_src = {
+	.cmd_rcgr = 0x2140,
+	.hid_width = 5,
+	.parent_map = mmcc_xo_dsibyte_map,
+	.clkr.hw.init = &(struct clk_init_data){
+		.name = "byte1_clk_src",
+		.parent_data = mmcc_xo_dsibyte,
+		.num_parents = ARRAY_SIZE(mmcc_xo_dsibyte),
+		.ops = &clk_byte2_ops,
+		.flags = CLK_SET_RATE_PARENT | CLK_GET_RATE_NOCACHE,
+	},
+};
+
+static struct freq_tbl ftbl_mdss_esc0_1_clk[] = {
+	F(19200000, P_XO, 1, 0, 0),
+	{ }
+};
+
+static struct clk_rcg2 esc0_clk_src = {
+	.cmd_rcgr = 0x2160,
+	.hid_width = 5,
+	.parent_map = mmcc_xo_dsibyte_map,
+	.freq_tbl = ftbl_mdss_esc0_1_clk,
+	.clkr.hw.init = &(struct clk_init_data){
+		.name = "esc0_clk_src",
+		.parent_data = mmcc_xo_dsibyte,
+		.num_parents = ARRAY_SIZE(mmcc_xo_dsibyte),
+		.ops = &clk_rcg2_ops,
+	},
+};
+
+static struct clk_rcg2 esc1_clk_src = {
+	.cmd_rcgr = 0x2180,
+	.hid_width = 5,
+	.parent_map = mmcc_xo_dsibyte_map,
+	.freq_tbl = ftbl_mdss_esc0_1_clk,
+	.clkr.hw.init = &(struct clk_init_data){
+		.name = "esc1_clk_src",
+		.parent_data = mmcc_xo_dsibyte,
+		.num_parents = ARRAY_SIZE(mmcc_xo_dsibyte),
+		.ops = &clk_rcg2_ops,
+	},
+};
+
+static struct freq_tbl extpclk_freq_tbl[] = {
+	{ .src = P_HDMIPLL },
+	{ }
+};
+
+static struct clk_rcg2 extpclk_clk_src = {
+	.cmd_rcgr = 0x2060,
+	.hid_width = 5,
+	.parent_map = mmss_xo_hdmi_map,
+	.freq_tbl = extpclk_freq_tbl,
+	.clkr.hw.init = &(struct clk_init_data){
+		.name = "extpclk_clk_src",
+		.parent_data = mmss_xo_hdmi,
+		.num_parents = ARRAY_SIZE(mmss_xo_hdmi),
+		.ops = &clk_rcg2_ops,
+		.flags = CLK_SET_RATE_PARENT,
+	},
+};
+
+static struct freq_tbl ftbl_hdmi_clk_src[] = {
+	F(19200000, P_XO, 1, 0, 0),
+	{ }
+};
+
+static struct clk_rcg2 hdmi_clk_src = {
+	.cmd_rcgr = 0x2100,
+	.hid_width = 5,
+	.parent_map = mmcc_xo_gpll0_map,
+	.freq_tbl = ftbl_hdmi_clk_src,
+	.clkr.hw.init = &(struct clk_init_data){
+		.name = "hdmi_clk_src",
+		.parent_data = mmcc_xo_gpll0,
+		.num_parents = ARRAY_SIZE(mmcc_xo_gpll0),
+		.ops = &clk_rcg2_ops,
+	},
+};
+
+static struct freq_tbl ftbl_mdss_vsync_clk[] = {
+	F(19200000, P_XO, 1, 0, 0),
+	{ }
+};
+
+static struct clk_rcg2 vsync_clk_src = {
+	.cmd_rcgr = 0x2080,
+	.hid_width = 5,
+	.parent_map = mmcc_xo_gpll0_map,
+	.freq_tbl = ftbl_mdss_vsync_clk,
+	.clkr.hw.init = &(struct clk_init_data){
+		.name = "vsync_clk_src",
+		.parent_data = mmcc_xo_gpll0,
+		.num_parents = ARRAY_SIZE(mmcc_xo_gpll0),
+		.ops = &clk_rcg2_ops,
+	},
+};
+
+static const struct freq_tbl ftbl_rbbmtimer_clk_src[] = {
+	F(19200000, P_XO, 1, 0, 0),
+	{ }
+};
+
+static struct clk_rcg2 rbbmtimer_clk_src = {
+	.cmd_rcgr = 0x4090,
+	.hid_width = 5,
+	.parent_map = mmcc_xo_gpll0_map,
+	.freq_tbl = ftbl_rbbmtimer_clk_src,
+	.clkr.hw.init = &(struct clk_init_data){
+		.name = "rbbmtimer_clk_src",
+		.parent_data = mmcc_xo_gpll0,
+		.num_parents = ARRAY_SIZE(mmcc_xo_gpll0),
+		.ops = &clk_rcg2_ops,
+	},
+};
+
+static struct clk_branch camss_ahb_clk = {
+	.halt_reg = 0x348c,
+	.clkr = {
+		.enable_reg = 0x348c,
+		.enable_mask = BIT(0),
+		.hw.init = &(struct clk_init_data){
+			.name = "camss_ahb_clk",
+			.parent_hws = (const struct clk_hw *[]){ &ahb_clk_src.clkr.hw },
+			.num_parents = 1,
+			.flags = CLK_SET_RATE_PARENT,
+			.ops = &clk_branch2_ops,
+		},
+	},
+};
+
+static struct clk_branch camss_cci_cci_ahb_clk = {
+	.halt_reg = 0x3348,
+	.clkr = {
+		.enable_reg = 0x3348,
+		.enable_mask = BIT(0),
+		.hw.init = &(struct clk_init_data){
+			.name = "camss_cci_cci_ahb_clk",
+			.parent_hws = (const struct clk_hw *[]){ &ahb_clk_src.clkr.hw },
+			.num_parents = 1,
+			.flags = CLK_SET_RATE_PARENT,
+			.ops = &clk_branch2_ops,
+		},
+	},
+};
+
+static struct clk_branch camss_cci_cci_clk = {
+	.halt_reg = 0x3344,
+	.clkr = {
+		.enable_reg = 0x3344,
+		.enable_mask = BIT(0),
+		.hw.init = &(struct clk_init_data){
+			.name = "camss_cci_cci_clk",
+			.parent_hws = (const struct clk_hw *[]){ &cci_clk_src.clkr.hw },
+			.num_parents = 1,
+			.ops = &clk_branch2_ops,
+		},
+	},
+};
+
+static struct clk_branch camss_vfe_cpp_ahb_clk = {
+	.halt_reg = 0x36b4,
+	.clkr = {
+		.enable_reg = 0x36b4,
+		.enable_mask = BIT(0),
+		.hw.init = &(struct clk_init_data){
+			.name = "camss_vfe_cpp_ahb_clk",
+			.parent_hws = (const struct clk_hw *[]){ &ahb_clk_src.clkr.hw },
+			.num_parents = 1,
+			.flags = CLK_SET_RATE_PARENT,
+			.ops = &clk_branch2_ops,
+		},
+	},
+};
+
+static struct clk_branch camss_vfe_cpp_axi_clk = {
+	.halt_reg = 0x36c4,
+	.clkr = {
+		.enable_reg = 0x36c4,
+		.enable_mask = BIT(0),
+		.hw.init = &(struct clk_init_data){
+			.name = "camss_vfe_cpp_axi_clk",
+			.parent_hws = (const struct clk_hw *[]){ &axi_clk_src.clkr.hw },
+			.num_parents = 1,
+			.ops = &clk_branch2_ops,
+		},
+	},
+};
+
+static struct clk_branch camss_vfe_cpp_clk = {
+	.halt_reg = 0x36b0,
+	.clkr = {
+		.enable_reg = 0x36b0,
+		.enable_mask = BIT(0),
+		.hw.init = &(struct clk_init_data){
+			.name = "camss_vfe_cpp_clk",
+			.parent_hws = (const struct clk_hw *[]){ &cpp_clk_src.clkr.hw },
+			.num_parents = 1,
+			.ops = &clk_branch2_ops,
+		},
+	},
+};
+
+static struct clk_branch camss_csi0_ahb_clk = {
+	.halt_reg = 0x30bc,
+	.clkr = {
+		.enable_reg = 0x30bc,
+		.enable_mask = BIT(0),
+		.hw.init = &(struct clk_init_data){
+			.name = "camss_csi0_ahb_clk",
+			.parent_hws = (const struct clk_hw *[]){ &ahb_clk_src.clkr.hw },
+			.num_parents = 1,
+			.flags = CLK_SET_RATE_PARENT,
+			.ops = &clk_branch2_ops,
+		},
+	},
+};
+
+static struct clk_branch camss_csi0_clk = {
+	.halt_reg = 0x30b4,
+	.clkr = {
+		.enable_reg = 0x30b4,
+		.enable_mask = BIT(0),
+		.hw.init = &(struct clk_init_data){
+			.name = "camss_csi0_clk",
+			.parent_hws = (const struct clk_hw *[]){ &csi0_clk_src.clkr.hw },
+			.num_parents = 1,
+			.ops = &clk_branch2_ops,
+		},
+	},
+};
+
+static struct clk_branch camss_csi0phy_clk = {
+	.halt_reg = 0x30c4,
+	.clkr = {
+		.enable_reg = 0x30c4,
+		.enable_mask = BIT(0),
+		.hw.init = &(struct clk_init_data){
+			.name = "camss_csi0phy_clk",
+			.parent_hws = (const struct clk_hw *[]){ &csi0_clk_src.clkr.hw },
+			.num_parents = 1,
+			.ops = &clk_branch2_ops,
+		},
+	},
+};
+
+static struct clk_branch camss_csi0pix_clk = {
+	.halt_reg = 0x30e4,
+	.clkr = {
+		.enable_reg = 0x30e4,
+		.enable_mask = BIT(0),
+		.hw.init = &(struct clk_init_data){
+			.name = "camss_csi0pix_clk",
+			.parent_hws = (const struct clk_hw *[]){ &csi0_clk_src.clkr.hw },
+			.num_parents = 1,
+			.ops = &clk_branch2_ops,
+		},
+	},
+};
+
+static struct clk_branch camss_csi0rdi_clk = {
+	.halt_reg = 0x30d4,
+	.clkr = {
+		.enable_reg = 0x30d4,
+		.enable_mask = BIT(0),
+		.hw.init = &(struct clk_init_data){
+			.name = "camss_csi0rdi_clk",
+			.parent_hws = (const struct clk_hw *[]){ &csi0_clk_src.clkr.hw },
+			.num_parents = 1,
+			.ops = &clk_branch2_ops,
+		},
+	},
+};
+
+static struct clk_branch camss_csi1_ahb_clk = {
+	.halt_reg = 0x3128,
+	.clkr = {
+		.enable_reg = 0x3128,
+		.enable_mask = BIT(0),
+		.hw.init = &(struct clk_init_data){
+			.name = "camss_csi1_ahb_clk",
+			.parent_hws = (const struct clk_hw *[]){ &ahb_clk_src.clkr.hw },
+			.num_parents = 1,
+			.flags = CLK_SET_RATE_PARENT,
+			.ops = &clk_branch2_ops,
+		},
+	},
+};
+
+static struct clk_branch camss_csi1_clk = {
+	.halt_reg = 0x3124,
+	.clkr = {
+		.enable_reg = 0x3124,
+		.enable_mask = BIT(0),
+		.hw.init = &(struct clk_init_data){
+			.name = "camss_csi1_clk",
+			.parent_hws = (const struct clk_hw *[]){ &csi1_clk_src.clkr.hw },
+			.num_parents = 1,
+			.ops = &clk_branch2_ops,
+		},
+	},
+};
+
+static struct clk_branch camss_csi1phy_clk = {
+	.halt_reg = 0x3134,
+	.clkr = {
+		.enable_reg = 0x3134,
+		.enable_mask = BIT(0),
+		.hw.init = &(struct clk_init_data){
+			.name = "camss_csi1phy_clk",
+			.parent_hws = (const struct clk_hw *[]){ &csi1_clk_src.clkr.hw },
+			.num_parents = 1,
+			.ops = &clk_branch2_ops,
+		},
+	},
+};
+
+static struct clk_branch camss_csi1pix_clk = {
+	.halt_reg = 0x3154,
+	.clkr = {
+		.enable_reg = 0x3154,
+		.enable_mask = BIT(0),
+		.hw.init = &(struct clk_init_data){
+			.name = "camss_csi1pix_clk",
+			.parent_hws = (const struct clk_hw *[]){ &csi1_clk_src.clkr.hw },
+			.num_parents = 1,
+			.ops = &clk_branch2_ops,
+		},
+	},
+};
+
+static struct clk_branch camss_csi1rdi_clk = {
+	.halt_reg = 0x3144,
+	.clkr = {
+		.enable_reg = 0x3144,
+		.enable_mask = BIT(0),
+		.hw.init = &(struct clk_init_data){
+			.name = "camss_csi1rdi_clk",
+			.parent_hws = (const struct clk_hw *[]){ &csi1_clk_src.clkr.hw },
+			.num_parents = 1,
+			.ops = &clk_branch2_ops,
+		},
+	},
+};
+
+static struct clk_branch camss_csi2_ahb_clk = {
+	.halt_reg = 0x3188,
+	.clkr = {
+		.enable_reg = 0x3188,
+		.enable_mask = BIT(0),
+		.hw.init = &(struct clk_init_data){
+			.name = "camss_csi2_ahb_clk",
+			.parent_hws = (const struct clk_hw *[]){ &ahb_clk_src.clkr.hw },
+			.num_parents = 1,
+			.flags = CLK_SET_RATE_PARENT,
+			.ops = &clk_branch2_ops,
+		},
+	},
+};
+
+static struct clk_branch camss_csi2_clk = {
+	.halt_reg = 0x3184,
+	.clkr = {
+		.enable_reg = 0x3184,
+		.enable_mask = BIT(0),
+		.hw.init = &(struct clk_init_data){
+			.name = "camss_csi2_clk",
+			.parent_hws = (const struct clk_hw *[]){ &csi1_clk_src.clkr.hw },
+			.num_parents = 1,
+			.ops = &clk_branch2_ops,
+		},
+	},
+};
+
+static struct clk_branch camss_csi2phy_clk = {
+	.halt_reg = 0x3194,
+	.clkr = {
+		.enable_reg = 0x3194,
+		.enable_mask = BIT(0),
+		.hw.init = &(struct clk_init_data){
+			.name = "camss_csi2phy_clk",
+			.parent_hws = (const struct clk_hw *[]){ &csi1_clk_src.clkr.hw },
+			.num_parents = 1,
+			.ops = &clk_branch2_ops,
+		},
+	},
+};
+
+static struct clk_branch camss_csi2pix_clk = {
+	.halt_reg = 0x31b4,
+	.clkr = {
+		.enable_reg = 0x31b4,
+		.enable_mask = BIT(0),
+		.hw.init = &(struct clk_init_data){
+			.name = "camss_csi2pix_clk",
+			.parent_hws = (const struct clk_hw *[]){ &csi1_clk_src.clkr.hw },
+			.num_parents = 1,
+			.ops = &clk_branch2_ops,
+		},
+	},
+};
+
+static struct clk_branch camss_csi2rdi_clk = {
+	.halt_reg = 0x31a4,
+	.clkr = {
+		.enable_reg = 0x31a4,
+		.enable_mask = BIT(0),
+		.hw.init = &(struct clk_init_data){
+			.name = "camss_csi2rdi_clk",
+			.parent_hws = (const struct clk_hw *[]){ &csi1_clk_src.clkr.hw },
+			.num_parents = 1,
+			.ops = &clk_branch2_ops,
+		},
+	},
+};
+
+static struct clk_branch camss_csi3_ahb_clk = {
+	.halt_reg = 0x31e8,
+	.clkr = {
+		.enable_reg = 0x31e8,
+		.enable_mask = BIT(0),
+		.hw.init = &(struct clk_init_data){
+			.name = "camss_csi3_ahb_clk",
+			.parent_hws = (const struct clk_hw *[]){ &ahb_clk_src.clkr.hw },
+			.num_parents = 1,
+			.flags = CLK_SET_RATE_PARENT,
+			.ops = &clk_branch2_ops,
+		},
+	},
+};
+
+static struct clk_branch camss_csi3_clk = {
+	.halt_reg = 0x31e4,
+	.clkr = {
+		.enable_reg = 0x31e4,
+		.enable_mask = BIT(0),
+		.hw.init = &(struct clk_init_data){
+			.name = "camss_csi3_clk",
+			.parent_hws = (const struct clk_hw *[]){ &csi1_clk_src.clkr.hw },
+			.num_parents = 1,
+			.ops = &clk_branch2_ops,
+		},
+	},
+};
+
+static struct clk_branch camss_csi3phy_clk = {
+	.halt_reg = 0x31f4,
+	.clkr = {
+		.enable_reg = 0x31f4,
+		.enable_mask = BIT(0),
+		.hw.init = &(struct clk_init_data){
+			.name = "camss_csi3phy_clk",
+			.parent_hws = (const struct clk_hw *[]){ &csi1_clk_src.clkr.hw },
+			.num_parents = 1,
+			.ops = &clk_branch2_ops,
+		},
+	},
+};
+
+static struct clk_branch camss_csi3pix_clk = {
+	.halt_reg = 0x3214,
+	.clkr = {
+		.enable_reg = 0x3214,
+		.enable_mask = BIT(0),
+		.hw.init = &(struct clk_init_data){
+			.name = "camss_csi3pix_clk",
+			.parent_hws = (const struct clk_hw *[]){ &csi1_clk_src.clkr.hw },
+			.num_parents = 1,
+			.ops = &clk_branch2_ops,
+		},
+	},
+};
+
+static struct clk_branch camss_csi3rdi_clk = {
+	.halt_reg = 0x3204,
+	.clkr = {
+		.enable_reg = 0x3204,
+		.enable_mask = BIT(0),
+		.hw.init = &(struct clk_init_data){
+			.name = "camss_csi3rdi_clk",
+			.parent_hws = (const struct clk_hw *[]){ &csi1_clk_src.clkr.hw },
+			.num_parents = 1,
+			.ops = &clk_branch2_ops,
+		},
+	},
+};
+
+static struct clk_branch camss_csi_vfe0_clk = {
+	.halt_reg = 0x3704,
+	.clkr = {
+		.enable_reg = 0x3704,
+		.enable_mask = BIT(0),
+		.hw.init = &(struct clk_init_data){
+			.name = "camss_csi_vfe0_clk",
+			.parent_hws = (const struct clk_hw *[]){ &vfe0_clk_src.clkr.hw },
+			.num_parents = 1,
+			.ops = &clk_branch2_ops,
+		},
+	},
+};
+
+static struct clk_branch camss_csi_vfe1_clk = {
+	.halt_reg = 0x3714,
+	.clkr = {
+		.enable_reg = 0x3714,
+		.enable_mask = BIT(0),
+		.hw.init = &(struct clk_init_data){
+			.name = "camss_csi_vfe1_clk",
+			.parent_hws = (const struct clk_hw *[]){ &vfe1_clk_src.clkr.hw },
+			.num_parents = 1,
+			.ops = &clk_branch2_ops,
+		},
+	},
+};
+
+static struct clk_branch camss_gp0_clk = {
+	.halt_reg = 0x3444,
+	.clkr = {
+		.enable_reg = 0x3444,
+		.enable_mask = BIT(0),
+		.hw.init = &(struct clk_init_data){
+			.name = "camss_gp0_clk",
+			.parent_hws = (const struct clk_hw *[]){ &mmss_gp0_clk_src.clkr.hw },
+			.num_parents = 1,
+			.ops = &clk_branch2_ops,
+		},
+	},
+};
+
+static struct clk_branch camss_gp1_clk = {
+	.halt_reg = 0x3474,
+	.clkr = {
+		.enable_reg = 0x3474,
+		.enable_mask = BIT(0),
+		.hw.init = &(struct clk_init_data){
+			.name = "camss_gp1_clk",
+			.parent_hws = (const struct clk_hw *[]){ &mmss_gp1_clk_src.clkr.hw },
+			.num_parents = 1,
+			.ops = &clk_branch2_ops,
+		},
+	},
+};
+
+static struct clk_branch camss_ispif_ahb_clk = {
+	.halt_reg = 0x3224,
+	.clkr = {
+		.enable_reg = 0x3224,
+		.enable_mask = BIT(0),
+		.hw.init = &(struct clk_init_data){
+			.name = "camss_ispif_ahb_clk",
+			.parent_hws = (const struct clk_hw *[]){ &ahb_clk_src.clkr.hw },
+			.num_parents = 1,
+			.flags = CLK_SET_RATE_PARENT,
+			.ops = &clk_branch2_ops,
+		},
+	},
+};
+
+static struct clk_branch camss_jpeg_dma_clk = {
+	.halt_reg = 0x35c0,
+	.clkr = {
+		.enable_reg = 0x35c0,
+		.enable_mask = BIT(0),
+		.hw.init = &(struct clk_init_data){
+			.name = "camss_jpeg_dma_clk",
+			.parent_hws = (const struct clk_hw *[]){ &jpeg_dma_clk_src.clkr.hw },
+			.num_parents = 1,
+			.ops = &clk_branch2_ops,
+		},
+	},
+};
+
+static struct clk_branch camss_jpeg_jpeg0_clk = {
+	.halt_reg = 0x35a8,
+	.clkr = {
+		.enable_reg = 0x35a8,
+		.enable_mask = BIT(0),
+		.hw.init = &(struct clk_init_data){
+			.name = "camss_jpeg_jpeg0_clk",
+			.parent_hws = (const struct clk_hw *[]){ &jpeg0_clk_src.clkr.hw },
+			.num_parents = 1,
+			.ops = &clk_branch2_ops,
+		},
+	},
+};
+
+static struct clk_branch camss_jpeg_jpeg1_clk = {
+	.halt_reg = 0x35ac,
+	.clkr = {
+		.enable_reg = 0x35ac,
+		.enable_mask = BIT(0),
+		.hw.init = &(struct clk_init_data){
+			.name = "camss_jpeg_jpeg1_clk",
+			.parent_hws = (const struct clk_hw *[]){ &jpeg1_clk_src.clkr.hw },
+			.num_parents = 1,
+			.ops = &clk_branch2_ops,
+		},
+	},
+};
+
+static struct clk_branch camss_jpeg_jpeg2_clk = {
+	.halt_reg = 0x35b0,
+	.clkr = {
+		.enable_reg = 0x35b0,
+		.enable_mask = BIT(0),
+		.hw.init = &(struct clk_init_data){
+			.name = "camss_jpeg_jpeg2_clk",
+			.parent_hws = (const struct clk_hw *[]){ &jpeg2_clk_src.clkr.hw },
+			.num_parents = 1,
+			.ops = &clk_branch2_ops,
+		},
+	},
+};
+
+static struct clk_branch camss_jpeg_jpeg_ahb_clk = {
+	.halt_reg = 0x35b4,
+	.clkr = {
+		.enable_reg = 0x35b4,
+		.enable_mask = BIT(0),
+		.hw.init = &(struct clk_init_data){
+			.name = "camss_jpeg_jpeg_ahb_clk",
+			.parent_hws = (const struct clk_hw *[]){ &ahb_clk_src.clkr.hw },
+			.num_parents = 1,
+			.flags = CLK_SET_RATE_PARENT,
+			.ops = &clk_branch2_ops,
+		},
+	},
+};
+
+static struct clk_branch camss_jpeg_jpeg_axi_clk = {
+	.halt_reg = 0x35b8,
+	.clkr = {
+		.enable_reg = 0x35b8,
+		.enable_mask = BIT(0),
+		.hw.init = &(struct clk_init_data){
+			.name = "camss_jpeg_jpeg_axi_clk",
+			.parent_hws = (const struct clk_hw *[]){ &axi_clk_src.clkr.hw },
+			.num_parents = 1,
+			.ops = &clk_branch2_ops,
+		},
+	},
+};
+
+static struct clk_branch camss_mclk0_clk = {
+	.halt_reg = 0x3384,
+	.clkr = {
+		.enable_reg = 0x3384,
+		.enable_mask = BIT(0),
+		.hw.init = &(struct clk_init_data){
+			.name = "camss_mclk0_clk",
+			.parent_hws = (const struct clk_hw *[]){ &mclk0_clk_src.clkr.hw },
+			.num_parents = 1,
+			.ops = &clk_branch2_ops,
+		},
+	},
+};
+
+static struct clk_branch camss_mclk1_clk = {
+	.halt_reg = 0x33b4,
+	.clkr = {
+		.enable_reg = 0x33b4,
+		.enable_mask = BIT(0),
+		.hw.init = &(struct clk_init_data){
+			.name = "camss_mclk1_clk",
+			.parent_hws = (const struct clk_hw *[]){ &mclk1_clk_src.clkr.hw },
+			.num_parents = 1,
+			.ops = &clk_branch2_ops,
+		},
+	},
+};
+
+static struct clk_branch camss_mclk2_clk = {
+	.halt_reg = 0x33e4,
+	.clkr = {
+		.enable_reg = 0x33e4,
+		.enable_mask = BIT(0),
+		.hw.init = &(struct clk_init_data){
+			.name = "camss_mclk2_clk",
+			.parent_hws = (const struct clk_hw *[]){ &mclk2_clk_src.clkr.hw },
+			.num_parents = 1,
+			.ops = &clk_branch2_ops,
+		},
+	},
+};
+
+static struct clk_branch camss_mclk3_clk = {
+	.halt_reg = 0x3414,
+	.clkr = {
+		.enable_reg = 0x3414,
+		.enable_mask = BIT(0),
+		.hw.init = &(struct clk_init_data){
+			.name = "camss_mclk3_clk",
+			.parent_hws = (const struct clk_hw *[]){ &mclk3_clk_src.clkr.hw },
+			.num_parents = 1,
+			.ops = &clk_branch2_ops,
+		},
+	},
+};
+
+static struct clk_branch camss_micro_ahb_clk = {
+	.halt_reg = 0x3494,
+	.clkr = {
+		.enable_reg = 0x3494,
+		.enable_mask = BIT(0),
+		.hw.init = &(struct clk_init_data){
+			.name = "camss_micro_ahb_clk",
+			.parent_hws = (const struct clk_hw *[]){ &ahb_clk_src.clkr.hw },
+			.num_parents = 1,
+			.flags = CLK_SET_RATE_PARENT,
+			.ops = &clk_branch2_ops,
+		},
+	},
+};
+
+static struct clk_branch camss_phy0_csi0phytimer_clk = {
+	.halt_reg = 0x3024,
+	.clkr = {
+		.enable_reg = 0x3024,
+		.enable_mask = BIT(0),
+		.hw.init = &(struct clk_init_data){
+			.name = "camss_phy0_csi0phytimer_clk",
+			.parent_hws = (const struct clk_hw *[]){ &csi0phytimer_clk_src.clkr.hw },
+			.num_parents = 1,
+			.ops = &clk_branch2_ops,
+		},
+	},
+};
+
+static struct clk_branch camss_phy1_csi1phytimer_clk = {
+	.halt_reg = 0x3054,
+	.clkr = {
+		.enable_reg = 0x3054,
+		.enable_mask = BIT(0),
+		.hw.init = &(struct clk_init_data){
+			.name = "camss_phy1_csi1phytimer_clk",
+			.parent_hws = (const struct clk_hw *[]){ &csi1phytimer_clk_src.clkr.hw },
+			.num_parents = 1,
+			.ops = &clk_branch2_ops,
+		},
+	},
+};
+
+static struct clk_branch camss_phy2_csi2phytimer_clk = {
+	.halt_reg = 0x3084,
+	.clkr = {
+		.enable_reg = 0x3084,
+		.enable_mask = BIT(0),
+		.hw.init = &(struct clk_init_data){
+			.name = "camss_phy2_csi2phytimer_clk",
+			.parent_hws = (const struct clk_hw *[]){ &csi2phytimer_clk_src.clkr.hw },
+			.num_parents = 1,
+			.ops = &clk_branch2_ops,
+		},
+	},
+};
+
+static struct clk_branch camss_top_ahb_clk = {
+	.halt_reg = 0x3484,
+	.clkr = {
+		.enable_reg = 0x3484,
+		.enable_mask = BIT(0),
+		.hw.init = &(struct clk_init_data){
+			.name = "camss_top_ahb_clk",
+			.parent_hws = (const struct clk_hw *[]){ &ahb_clk_src.clkr.hw },
+			.num_parents = 1,
+			.flags = CLK_SET_RATE_PARENT,
+			.ops = &clk_branch2_ops,
+		},
+	},
+};
+
+static struct clk_branch camss_vfe_vfe0_clk = {
+	.halt_reg = 0x36a8,
+	.clkr = {
+		.enable_reg = 0x36a8,
+		.enable_mask = BIT(0),
+		.hw.init = &(struct clk_init_data){
+			.name = "camss_vfe_vfe0_clk",
+			.parent_hws = (const struct clk_hw *[]){ &vfe0_clk_src.clkr.hw },
+			.num_parents = 1,
+			.ops = &clk_branch2_ops,
+		},
+	},
+};
+
+static struct clk_branch camss_vfe_vfe1_clk = {
+	.halt_reg = 0x36ac,
+	.clkr = {
+		.enable_reg = 0x36ac,
+		.enable_mask = BIT(0),
+		.hw.init = &(struct clk_init_data){
+			.name = "camss_vfe_vfe1_clk",
+			.parent_hws = (const struct clk_hw *[]){ &vfe1_clk_src.clkr.hw },
+			.num_parents = 1,
+			.ops = &clk_branch2_ops,
+		},
+	},
+};
+
+static struct clk_branch camss_vfe_vfe_ahb_clk = {
+	.halt_reg = 0x36b8,
+	.clkr = {
+		.enable_reg = 0x36b8,
+		.enable_mask = BIT(0),
+		.hw.init = &(struct clk_init_data){
+			.name = "camss_vfe_vfe_ahb_clk",
+			.parent_hws = (const struct clk_hw *[]){ &ahb_clk_src.clkr.hw },
+			.num_parents = 1,
+			.flags = CLK_SET_RATE_PARENT,
+			.ops = &clk_branch2_ops,
+		},
+	},
+};
+
+static struct clk_branch camss_vfe_vfe_axi_clk = {
+	.halt_reg = 0x36bc,
+	.clkr = {
+		.enable_reg = 0x36bc,
+		.enable_mask = BIT(0),
+		.hw.init = &(struct clk_init_data){
+			.name = "camss_vfe_vfe_axi_clk",
+			.parent_hws = (const struct clk_hw *[]){ &axi_clk_src.clkr.hw },
+			.num_parents = 1,
+			.ops = &clk_branch2_ops,
+		},
+	},
+};
+
+static struct clk_branch fd_ahb_clk = {
+	.halt_reg = 0x3b74,
+	.clkr = {
+		.enable_reg = 0x3b74,
+		.enable_mask = BIT(0),
+		.hw.init = &(struct clk_init_data){
+			.name = "fd_ahb_clk",
+			.parent_hws = (const struct clk_hw *[]){ &ahb_clk_src.clkr.hw },
+			.num_parents = 1,
+			.ops = &clk_branch2_ops,
+		},
+	},
+};
+
+static struct clk_branch fd_axi_clk = {
+	.halt_reg = 0x3b70,
+	.clkr = {
+		.enable_reg = 0x3b70,
+		.enable_mask = BIT(0),
+		.hw.init = &(struct clk_init_data){
+			.name = "fd_axi_clk",
+			.parent_hws = (const struct clk_hw *[]){ &axi_clk_src.clkr.hw },
+			.num_parents = 1,
+			.ops = &clk_branch2_ops,
+		},
+	},
+};
+
+static struct clk_branch fd_core_clk = {
+	.halt_reg = 0x3b68,
+	.clkr = {
+		.enable_reg = 0x3b68,
+		.enable_mask = BIT(0),
+		.hw.init = &(struct clk_init_data){
+			.name = "fd_core_clk",
+			.parent_hws = (const struct clk_hw *[]){ &fd_core_clk_src.clkr.hw },
+			.num_parents = 1,
+			.ops = &clk_branch2_ops,
+		},
+	},
+};
+
+static struct clk_branch fd_core_uar_clk = {
+	.halt_reg = 0x3b6c,
+	.clkr = {
+		.enable_reg = 0x3b6c,
+		.enable_mask = BIT(0),
+		.hw.init = &(struct clk_init_data){
+			.name = "fd_core_uar_clk",
+			.parent_hws = (const struct clk_hw *[]){ &fd_core_clk_src.clkr.hw },
+			.num_parents = 1,
+			.ops = &clk_branch2_ops,
+		},
+	},
+};
+
+static struct clk_branch mdss_ahb_clk = {
+	.halt_reg = 0x2308,
+	.halt_check = BRANCH_HALT,
+	.clkr = {
+		.enable_reg = 0x2308,
+		.enable_mask = BIT(0),
+		.hw.init = &(struct clk_init_data){
+			.name = "mdss_ahb_clk",
+			.parent_hws = (const struct clk_hw *[]){ &ahb_clk_src.clkr.hw },
+			.num_parents = 1,
+			.flags = CLK_SET_RATE_PARENT,
+			.ops = &clk_branch2_ops,
+		},
+	},
+};
+
+static struct clk_branch mdss_axi_clk = {
+	.halt_reg = 0x2310,
+	.clkr = {
+		.enable_reg = 0x2310,
+		.enable_mask = BIT(0),
+		.hw.init = &(struct clk_init_data){
+			.name = "mdss_axi_clk",
+			.parent_hws = (const struct clk_hw *[]){ &axi_clk_src.clkr.hw },
+			.num_parents = 1,
+			.flags = CLK_SET_RATE_PARENT,
+			.ops = &clk_branch2_ops,
+		},
+	},
+};
+
+static struct clk_branch mdss_byte0_clk = {
+	.halt_reg = 0x233c,
+	.clkr = {
+		.enable_reg = 0x233c,
+		.enable_mask = BIT(0),
+		.hw.init = &(struct clk_init_data){
+			.name = "mdss_byte0_clk",
+			.parent_hws = (const struct clk_hw *[]){ &byte0_clk_src.clkr.hw },
+			.num_parents = 1,
+			.flags = CLK_SET_RATE_PARENT,
+			.ops = &clk_branch2_ops,
+		},
+	},
+};
+
+static struct clk_branch mdss_byte1_clk = {
+	.halt_reg = 0x2340,
+	.clkr = {
+		.enable_reg = 0x2340,
+		.enable_mask = BIT(0),
+		.hw.init = &(struct clk_init_data){
+			.name = "mdss_byte1_clk",
+			.parent_hws = (const struct clk_hw *[]){ &byte1_clk_src.clkr.hw },
+			.num_parents = 1,
+			.flags = CLK_SET_RATE_PARENT,
+			.ops = &clk_branch2_ops,
+		},
+	},
+};
+
+static struct clk_branch mdss_esc0_clk = {
+	.halt_reg = 0x2344,
+	.clkr = {
+		.enable_reg = 0x2344,
+		.enable_mask = BIT(0),
+		.hw.init = &(struct clk_init_data){
+			.name = "mdss_esc0_clk",
+			.parent_hws = (const struct clk_hw *[]){ &esc0_clk_src.clkr.hw },
+			.num_parents = 1,
+			.flags = CLK_SET_RATE_PARENT,
+			.ops = &clk_branch2_ops,
+		},
+	},
+};
+
+static struct clk_branch mdss_esc1_clk = {
+	.halt_reg = 0x2348,
+	.clkr = {
+		.enable_reg = 0x2348,
+		.enable_mask = BIT(0),
+		.hw.init = &(struct clk_init_data){
+			.name = "mdss_esc1_clk",
+			.parent_hws = (const struct clk_hw *[]){ &esc1_clk_src.clkr.hw },
+			.num_parents = 1,
+			.flags = CLK_SET_RATE_PARENT,
+			.ops = &clk_branch2_ops,
+		},
+	},
+};
+
+static struct clk_branch mdss_extpclk_clk = {
+	.halt_reg = 0x2324,
+	.clkr = {
+		.enable_reg = 0x2324,
+		.enable_mask = BIT(0),
+		.hw.init = &(struct clk_init_data){
+			.name = "mdss_extpclk_clk",
+			.parent_hws = (const struct clk_hw *[]){ &extpclk_clk_src.clkr.hw },
+			.num_parents = 1,
+			.flags = CLK_SET_RATE_PARENT,
+			.ops = &clk_branch2_ops,
+		},
+	},
+};
+
+static struct clk_branch mdss_hdmi_ahb_clk = {
+	.halt_reg = 0x230c,
+	.clkr = {
+		.enable_reg = 0x230c,
+		.enable_mask = BIT(0),
+		.hw.init = &(struct clk_init_data){
+			.name = "mdss_hdmi_ahb_clk",
+			.parent_hws = (const struct clk_hw *[]){ &ahb_clk_src.clkr.hw },
+			.num_parents = 1,
+			.flags = CLK_SET_RATE_PARENT,
+			.ops = &clk_branch2_ops,
+		},
+	},
+};
+
+static struct clk_branch mdss_hdmi_clk = {
+	.halt_reg = 0x2338,
+	.clkr = {
+		.enable_reg = 0x2338,
+		.enable_mask = BIT(0),
+		.hw.init = &(struct clk_init_data){
+			.name = "mdss_hdmi_clk",
+			.parent_hws = (const struct clk_hw *[]){ &hdmi_clk_src.clkr.hw },
+			.num_parents = 1,
+			.flags = CLK_SET_RATE_PARENT,
+			.ops = &clk_branch2_ops,
+		},
+	},
+};
+
+static struct clk_branch mdss_mdp_clk = {
+	.halt_reg = 0x231c,
+	.clkr = {
+		.enable_reg = 0x231c,
+		.enable_mask = BIT(0),
+		.hw.init = &(struct clk_init_data){
+			.name = "mdss_mdp_clk",
+			.parent_hws = (const struct clk_hw *[]){ &mdp_clk_src.clkr.hw },
+			.num_parents = 1,
+			.flags = CLK_SET_RATE_PARENT,
+			.ops = &clk_branch2_ops,
+		},
+	},
+};
+
+static struct clk_branch mdss_pclk0_clk = {
+	.halt_reg = 0x2314,
+	.clkr = {
+		.enable_reg = 0x2314,
+		.enable_mask = BIT(0),
+		.hw.init = &(struct clk_init_data){
+			.name = "mdss_pclk0_clk",
+			.parent_hws = (const struct clk_hw *[]){ &pclk0_clk_src.clkr.hw },
+			.num_parents = 1,
+			.flags = CLK_SET_RATE_PARENT,
+			.ops = &clk_branch2_ops,
+		},
+	},
+};
+
+static struct clk_branch mdss_pclk1_clk = {
+	.halt_reg = 0x2318,
+	.clkr = {
+		.enable_reg = 0x2318,
+		.enable_mask = BIT(0),
+		.hw.init = &(struct clk_init_data){
+			.name = "mdss_pclk1_clk",
+			.parent_hws = (const struct clk_hw *[]){ &pclk1_clk_src.clkr.hw },
+			.num_parents = 1,
+			.flags = CLK_SET_RATE_PARENT,
+			.ops = &clk_branch2_ops,
+		},
+	},
+};
+
+static struct clk_branch mdss_vsync_clk = {
+	.halt_reg = 0x2328,
+	.clkr = {
+		.enable_reg = 0x2328,
+		.enable_mask = BIT(0),
+		.hw.init = &(struct clk_init_data){
+			.name = "mdss_vsync_clk",
+			.parent_hws = (const struct clk_hw *[]){ &vsync_clk_src.clkr.hw },
+			.num_parents = 1,
+			.flags = CLK_SET_RATE_PARENT,
+			.ops = &clk_branch2_ops,
+		},
+	},
+};
+
+static struct clk_branch mmss_misc_ahb_clk = {
+	.halt_reg = 0x502c,
+	.clkr = {
+		.enable_reg = 0x502c,
+		.enable_mask = BIT(0),
+		.hw.init = &(struct clk_init_data){
+			.name = "mmss_misc_ahb_clk",
+			.parent_hws = (const struct clk_hw *[]){ &ahb_clk_src.clkr.hw },
+			.num_parents = 1,
+			.flags = CLK_SET_RATE_PARENT,
+			.ops = &clk_branch2_ops,
+		},
+	},
+};
+
+static struct clk_branch mmss_mmssnoc_axi_clk = {
+	.halt_reg = 0x506c,
+	.clkr = {
+		.enable_reg = 0x506c,
+		.enable_mask = BIT(0),
+		.hw.init = &(struct clk_init_data){
+			.name = "mmss_mmssnoc_axi_clk",
+			.parent_hws = (const struct clk_hw *[]){ &axi_clk_src.clkr.hw },
+			.num_parents = 1,
+			/* Gating this clock will wreck havoc among MMSS! */
+			.flags = CLK_SET_RATE_PARENT | CLK_IS_CRITICAL,
+			.ops = &clk_branch2_ops,
+		},
+	},
+};
+
+static struct clk_branch mmss_s0_axi_clk = {
+	.halt_reg = 0x5064,
+	.clkr = {
+		.enable_reg = 0x5064,
+		.enable_mask = BIT(0),
+		.hw.init = &(struct clk_init_data){
+			.name = "mmss_s0_axi_clk",
+			.parent_hws = (const struct clk_hw *[]){ &axi_clk_src.clkr.hw, },
+			.num_parents = 1,
+			.flags = CLK_SET_RATE_PARENT | CLK_IGNORE_UNUSED,
+			.ops = &clk_branch2_ops,
+		},
+	},
+};
+
+static struct clk_branch ocmemcx_ocmemnoc_clk = {
+	.halt_reg = 0x4058,
+	.clkr = {
+		.enable_reg = 0x4058,
+		.enable_mask = BIT(0),
+		.hw.init = &(struct clk_init_data){
+			.name = "ocmemcx_ocmemnoc_clk",
+			.parent_hws = (const struct clk_hw *[]){ &ocmemnoc_clk_src.clkr.hw },
+			.num_parents = 1,
+			.flags = CLK_SET_RATE_PARENT,
+			.ops = &clk_branch2_ops,
+		},
+	},
+};
+
+static struct clk_branch oxili_gfx3d_clk = {
+	.halt_reg = 0x4028,
+	.clkr = {
+		.enable_reg = 0x4028,
+		.enable_mask = BIT(0),
+		.hw.init = &(struct clk_init_data){
+			.name = "oxili_gfx3d_clk",
+			.parent_data = &(const struct clk_parent_data){
+				.fw_name = "oxili_gfx3d_clk_src",
+				.name = "oxili_gfx3d_clk_src"
+			},
+			.num_parents = 1,
+			.flags = CLK_SET_RATE_PARENT,
+			.ops = &clk_branch2_ops,
+		},
+	},
+};
+
+static struct clk_branch oxili_rbbmtimer_clk = {
+	.halt_reg = 0x40b0,
+	.clkr = {
+		.enable_reg = 0x40b0,
+		.enable_mask = BIT(0),
+		.hw.init = &(struct clk_init_data){
+			.name = "oxili_rbbmtimer_clk",
+			.parent_hws = (const struct clk_hw *[]){ &rbbmtimer_clk_src.clkr.hw },
+			.num_parents = 1,
+			.flags = CLK_SET_RATE_PARENT,
+			.ops = &clk_branch2_ops,
+		},
+	},
+};
+
+static struct clk_branch oxilicx_ahb_clk = {
+	.halt_reg = 0x403c,
+	.clkr = {
+		.enable_reg = 0x403c,
+		.enable_mask = BIT(0),
+		.hw.init = &(struct clk_init_data){
+			.name = "oxilicx_ahb_clk",
+			.parent_hws = (const struct clk_hw *[]){ &ahb_clk_src.clkr.hw },
+			.num_parents = 1,
+			.flags = CLK_SET_RATE_PARENT,
+			.ops = &clk_branch2_ops,
+		},
+	},
+};
+
+static struct clk_branch venus0_ahb_clk = {
+	.halt_reg = 0x1030,
+	.clkr = {
+		.enable_reg = 0x1030,
+		.enable_mask = BIT(0),
+		.hw.init = &(struct clk_init_data){
+			.name = "venus0_ahb_clk",
+			.parent_hws = (const struct clk_hw *[]){ &ahb_clk_src.clkr.hw },
+			.num_parents = 1,
+			.flags = CLK_SET_RATE_PARENT,
+			.ops = &clk_branch2_ops,
+		},
+	},
+};
+
+static struct clk_branch venus0_axi_clk = {
+	.halt_reg = 0x1034,
+	.clkr = {
+		.enable_reg = 0x1034,
+		.enable_mask = BIT(0),
+		.hw.init = &(struct clk_init_data){
+			.name = "venus0_axi_clk",
+			.parent_hws = (const struct clk_hw *[]){ &axi_clk_src.clkr.hw },
+			.num_parents = 1,
+			.ops = &clk_branch2_ops,
+		},
+	},
+};
+
+static struct clk_branch venus0_ocmemnoc_clk = {
+	.halt_reg = 0x1038,
+	.clkr = {
+		.enable_reg = 0x1038,
+		.enable_mask = BIT(0),
+		.hw.init = &(struct clk_init_data){
+			.name = "venus0_ocmemnoc_clk",
+			.parent_hws = (const struct clk_hw *[]){ &ocmemnoc_clk_src.clkr.hw },
+			.num_parents = 1,
+			.flags = CLK_SET_RATE_PARENT,
+			.ops = &clk_branch2_ops,
+		},
+	},
+};
+
+static struct clk_branch venus0_vcodec0_clk = {
+	.halt_reg = 0x1028,
+	.clkr = {
+		.enable_reg = 0x1028,
+		.enable_mask = BIT(0),
+		.hw.init = &(struct clk_init_data){
+			.name = "venus0_vcodec0_clk",
+			.parent_hws = (const struct clk_hw *[]){ &vcodec0_clk_src.clkr.hw },
+			.num_parents = 1,
+			.flags = CLK_SET_RATE_PARENT,
+			.ops = &clk_branch2_ops,
+		},
+	},
+};
+
+static struct clk_branch venus0_core0_vcodec_clk = {
+	.halt_reg = 0x1048,
+	.clkr = {
+		.enable_reg = 0x1048,
+		.enable_mask = BIT(0),
+		.hw.init = &(struct clk_init_data){
+			.name = "venus0_core0_vcodec_clk",
+			.parent_hws = (const struct clk_hw *[]){ &vcodec0_clk_src.clkr.hw },
+			.num_parents = 1,
+			.flags = CLK_SET_RATE_PARENT,
+			.ops = &clk_branch2_ops,
+		},
+	},
+};
+
+static struct clk_branch venus0_core1_vcodec_clk = {
+	.halt_reg = 0x104c,
+	.clkr = {
+		.enable_reg = 0x104c,
+		.enable_mask = BIT(0),
+		.hw.init = &(struct clk_init_data){
+			.name = "venus0_core1_vcodec_clk",
+			.parent_hws = (const struct clk_hw *[]){ &vcodec0_clk_src.clkr.hw },
+			.num_parents = 1,
+			.flags = CLK_SET_RATE_PARENT,
+			.ops = &clk_branch2_ops,
+		},
+	},
+};
+
+static struct clk_branch venus0_core2_vcodec_clk = {
+	.halt_reg = 0x1054,
+	.clkr = {
+		.enable_reg = 0x1054,
+		.enable_mask = BIT(0),
+		.hw.init = &(struct clk_init_data){
+			.name = "venus0_core2_vcodec_clk",
+			.parent_hws = (const struct clk_hw *[]){ &vcodec0_clk_src.clkr.hw },
+			.num_parents = 1,
+			.flags = CLK_SET_RATE_PARENT,
+			.ops = &clk_branch2_ops,
+		},
+	},
+};
+
+static struct gdsc venus_gdsc = {
+	.gdscr = 0x1024,
+	.cxcs = (unsigned int []){ 0x1038, 0x1034, 0x1048 },
+	.cxc_count = 3,
+	.pd = {
+		.name = "venus_gdsc",
+	},
+	.pwrsts = PWRSTS_OFF_ON,
+};
+
+static struct gdsc venus_core0_gdsc = {
+	.gdscr = 0x1040,
+	.cxcs = (unsigned int []){ 0x1048 },
+	.cxc_count = 1,
+	.pd = {
+		.name = "venus_core0_gdsc",
+	},
+	.pwrsts = PWRSTS_OFF_ON,
+	.flags = HW_CTRL,
+};
+
+static struct gdsc venus_core1_gdsc = {
+	.gdscr = 0x1044,
+	.cxcs = (unsigned int []){ 0x104c },
+	.cxc_count = 1,
+	.pd = {
+	.name = "venus_core1_gdsc",
+	},
+	.pwrsts = PWRSTS_OFF_ON,
+	.flags = HW_CTRL,
+};
+
+static struct gdsc venus_core2_gdsc = {
+	.gdscr = 0x1050,
+	.cxcs = (unsigned int []){ 0x1054 },
+	.cxc_count = 1,
+	.pd = {
+		.name = "venus_core2_gdsc",
+	},
+	.pwrsts = PWRSTS_OFF_ON,
+	.flags = HW_CTRL,
+};
+
+static struct gdsc mdss_gdsc = {
+	.gdscr = 0x2304,
+	.cxcs = (unsigned int []){ 0x2310, 0x231c },
+	.cxc_count = 2,
+	.pd = {
+		.name = "mdss_gdsc",
+	},
+	.pwrsts = PWRSTS_OFF_ON,
+};
+
+static struct gdsc camss_top_gdsc = {
+	.gdscr = 0x34a0,
+	.cxcs = (unsigned int []){ 0x3704, 0x3714, 0x3494 },
+	.cxc_count = 3,
+	.pd = {
+		.name = "camss_top_gdsc",
+	},
+	.pwrsts = PWRSTS_OFF_ON,
+};
+
+static struct gdsc jpeg_gdsc = {
+	.gdscr = 0x35a4,
+	.cxcs = (unsigned int []){ 0x35a8 },
+	.cxc_count = 1,
+	.pd = {
+		.name = "jpeg_gdsc",
+	},
+	.parent = &camss_top_gdsc.pd,
+	.pwrsts = PWRSTS_OFF_ON,
+};
+
+static struct gdsc vfe_gdsc = {
+	.gdscr = 0x36a4,
+	.cxcs = (unsigned int []){ 0x36bc },
+	.cxc_count = 1,
+	.pd = {
+		.name = "vfe_gdsc",
+	},
+	.parent = &camss_top_gdsc.pd,
+	.pwrsts = PWRSTS_OFF_ON,
+};
+
+static struct gdsc cpp_gdsc = {
+	.gdscr = 0x36d4,
+	.cxcs = (unsigned int []){ 0x36c4, 0x36b0 },
+	.cxc_count = 2,
+	.pd = {
+		.name = "cpp_gdsc",
+	},
+	.parent = &camss_top_gdsc.pd,
+	.pwrsts = PWRSTS_OFF_ON,
+};
+
+static struct gdsc fd_gdsc = {
+	.gdscr = 0x3b64,
+	.cxcs = (unsigned int []){ 0x3b70, 0x3b68 },
+	.pd = {
+		.name = "fd_gdsc",
+	},
+	.pwrsts = PWRSTS_OFF_ON,
+};
+
+static struct gdsc oxili_cx_gdsc = {
+	.gdscr = 0x4034,
+	.pd = {
+		.name = "oxili_cx_gdsc",
+	},
+	.pwrsts = PWRSTS_OFF_ON,
+	.flags = VOTABLE,
+};
+
+static struct gdsc oxili_gx_gdsc = {
+	.gdscr = 0x4024,
+	.cxcs = (unsigned int []){ 0x4028 },
+	.cxc_count = 1,
+	.pd = {
+		.name = "oxili_gx_gdsc",
+	},
+	.pwrsts = PWRSTS_OFF_ON,
+	.parent = &oxili_cx_gdsc.pd,
+	.flags = CLAMP_IO,
+	.supply = "VDD_GFX",
+};
+
+static struct clk_regmap *mmcc_msm8994_clocks[] = {
+	[MMPLL0_EARLY] = &mmpll0_early.clkr,
+	[MMPLL0_PLL] = &mmpll0.clkr,
+	[MMPLL1_EARLY] = &mmpll1_early.clkr,
+	[MMPLL1_PLL] = &mmpll1.clkr,
+	[MMPLL3_EARLY] = &mmpll3_early.clkr,
+	[MMPLL3_PLL] = &mmpll3.clkr,
+	[MMPLL4_EARLY] = &mmpll4_early.clkr,
+	[MMPLL4_PLL] = &mmpll4.clkr,
+	[MMPLL5_EARLY] = &mmpll5_early.clkr,
+	[MMPLL5_PLL] = &mmpll5.clkr,
+	[AHB_CLK_SRC] = &ahb_clk_src.clkr,
+	[AXI_CLK_SRC] = &axi_clk_src.clkr,
+	[CSI0_CLK_SRC] = &csi0_clk_src.clkr,
+	[CSI1_CLK_SRC] = &csi1_clk_src.clkr,
+	[CSI2_CLK_SRC] = &csi2_clk_src.clkr,
+	[CSI3_CLK_SRC] = &csi3_clk_src.clkr,
+	[VFE0_CLK_SRC] = &vfe0_clk_src.clkr,
+	[VFE1_CLK_SRC] = &vfe1_clk_src.clkr,
+	[CPP_CLK_SRC] = &cpp_clk_src.clkr,
+	[JPEG0_CLK_SRC] = &jpeg0_clk_src.clkr,
+	[JPEG1_CLK_SRC] = &jpeg1_clk_src.clkr,
+	[JPEG2_CLK_SRC] = &jpeg2_clk_src.clkr,
+	[CSI2PHYTIMER_CLK_SRC] = &csi2phytimer_clk_src.clkr,
+	[FD_CORE_CLK_SRC] = &fd_core_clk_src.clkr,
+	[MDP_CLK_SRC] = &mdp_clk_src.clkr,
+	[PCLK0_CLK_SRC] = &pclk0_clk_src.clkr,
+	[PCLK1_CLK_SRC] = &pclk1_clk_src.clkr,
+	[OCMEMNOC_CLK_SRC] = &ocmemnoc_clk_src.clkr,
+	[CCI_CLK_SRC] = &cci_clk_src.clkr,
+	[MMSS_GP0_CLK_SRC] = &mmss_gp0_clk_src.clkr,
+	[MMSS_GP1_CLK_SRC] = &mmss_gp1_clk_src.clkr,
+	[JPEG_DMA_CLK_SRC] = &jpeg_dma_clk_src.clkr,
+	[MCLK0_CLK_SRC] = &mclk0_clk_src.clkr,
+	[MCLK1_CLK_SRC] = &mclk1_clk_src.clkr,
+	[MCLK2_CLK_SRC] = &mclk2_clk_src.clkr,
+	[MCLK3_CLK_SRC] = &mclk3_clk_src.clkr,
+	[CSI0PHYTIMER_CLK_SRC] = &csi0phytimer_clk_src.clkr,
+	[CSI1PHYTIMER_CLK_SRC] = &csi1phytimer_clk_src.clkr,
+	[BYTE0_CLK_SRC] = &byte0_clk_src.clkr,
+	[BYTE1_CLK_SRC] = &byte1_clk_src.clkr,
+	[ESC0_CLK_SRC] = &esc0_clk_src.clkr,
+	[ESC1_CLK_SRC] = &esc1_clk_src.clkr,
+	[MDSS_ESC0_CLK] = &mdss_esc0_clk.clkr,
+	[MDSS_ESC1_CLK] = &mdss_esc1_clk.clkr,
+	[EXTPCLK_CLK_SRC] = &extpclk_clk_src.clkr,
+	[HDMI_CLK_SRC] = &hdmi_clk_src.clkr,
+	[VSYNC_CLK_SRC] = &vsync_clk_src.clkr,
+	[RBBMTIMER_CLK_SRC] = &rbbmtimer_clk_src.clkr,
+	[CAMSS_AHB_CLK] = &camss_ahb_clk.clkr,
+	[CAMSS_CCI_CCI_AHB_CLK] = &camss_cci_cci_ahb_clk.clkr,
+	[CAMSS_CCI_CCI_CLK] = &camss_cci_cci_clk.clkr,
+	[CAMSS_VFE_CPP_AHB_CLK] = &camss_vfe_cpp_ahb_clk.clkr,
+	[CAMSS_VFE_CPP_AXI_CLK] = &camss_vfe_cpp_axi_clk.clkr,
+	[CAMSS_VFE_CPP_CLK] = &camss_vfe_cpp_clk.clkr,
+	[CAMSS_CSI0_AHB_CLK] = &camss_csi0_ahb_clk.clkr,
+	[CAMSS_CSI0_CLK] = &camss_csi0_clk.clkr,
+	[CAMSS_CSI0PHY_CLK] = &camss_csi0phy_clk.clkr,
+	[CAMSS_CSI0PIX_CLK] = &camss_csi0pix_clk.clkr,
+	[CAMSS_CSI0RDI_CLK] = &camss_csi0rdi_clk.clkr,
+	[CAMSS_CSI1_AHB_CLK] = &camss_csi1_ahb_clk.clkr,
+	[CAMSS_CSI1_CLK] = &camss_csi1_clk.clkr,
+	[CAMSS_CSI1PHY_CLK] = &camss_csi1phy_clk.clkr,
+	[CAMSS_CSI1PIX_CLK] = &camss_csi1pix_clk.clkr,
+	[CAMSS_CSI1RDI_CLK] = &camss_csi1rdi_clk.clkr,
+	[CAMSS_CSI2_AHB_CLK] = &camss_csi2_ahb_clk.clkr,
+	[CAMSS_CSI2_CLK] = &camss_csi2_clk.clkr,
+	[CAMSS_CSI2PHY_CLK] = &camss_csi2phy_clk.clkr,
+	[CAMSS_CSI2PIX_CLK] = &camss_csi2pix_clk.clkr,
+	[CAMSS_CSI2RDI_CLK] = &camss_csi2rdi_clk.clkr,
+	[CAMSS_CSI3_AHB_CLK] = &camss_csi3_ahb_clk.clkr,
+	[CAMSS_CSI3_CLK] = &camss_csi3_clk.clkr,
+	[CAMSS_CSI3PHY_CLK] = &camss_csi3phy_clk.clkr,
+	[CAMSS_CSI3PIX_CLK] = &camss_csi3pix_clk.clkr,
+	[CAMSS_CSI3RDI_CLK] = &camss_csi3rdi_clk.clkr,
+	[CAMSS_CSI_VFE0_CLK] = &camss_csi_vfe0_clk.clkr,
+	[CAMSS_CSI_VFE1_CLK] = &camss_csi_vfe1_clk.clkr,
+	[CAMSS_GP0_CLK] = &camss_gp0_clk.clkr,
+	[CAMSS_GP1_CLK] = &camss_gp1_clk.clkr,
+	[CAMSS_ISPIF_AHB_CLK] = &camss_ispif_ahb_clk.clkr,
+	[CAMSS_JPEG_DMA_CLK] = &camss_jpeg_dma_clk.clkr,
+	[CAMSS_JPEG_JPEG0_CLK] = &camss_jpeg_jpeg0_clk.clkr,
+	[CAMSS_JPEG_JPEG1_CLK] = &camss_jpeg_jpeg1_clk.clkr,
+	[CAMSS_JPEG_JPEG2_CLK] = &camss_jpeg_jpeg2_clk.clkr,
+	[CAMSS_JPEG_JPEG_AHB_CLK] = &camss_jpeg_jpeg_ahb_clk.clkr,
+	[CAMSS_JPEG_JPEG_AXI_CLK] = &camss_jpeg_jpeg_axi_clk.clkr,
+	[CAMSS_MCLK0_CLK] = &camss_mclk0_clk.clkr,
+	[CAMSS_MCLK1_CLK] = &camss_mclk1_clk.clkr,
+	[CAMSS_MCLK2_CLK] = &camss_mclk2_clk.clkr,
+	[CAMSS_MCLK3_CLK] = &camss_mclk3_clk.clkr,
+	[CAMSS_MICRO_AHB_CLK] = &camss_micro_ahb_clk.clkr,
+	[CAMSS_PHY0_CSI0PHYTIMER_CLK] = &camss_phy0_csi0phytimer_clk.clkr,
+	[CAMSS_PHY1_CSI1PHYTIMER_CLK] = &camss_phy1_csi1phytimer_clk.clkr,
+	[CAMSS_PHY2_CSI2PHYTIMER_CLK] = &camss_phy2_csi2phytimer_clk.clkr,
+	[CAMSS_TOP_AHB_CLK] = &camss_top_ahb_clk.clkr,
+	[CAMSS_VFE_VFE0_CLK] = &camss_vfe_vfe0_clk.clkr,
+	[CAMSS_VFE_VFE1_CLK] = &camss_vfe_vfe1_clk.clkr,
+	[CAMSS_VFE_VFE_AHB_CLK] = &camss_vfe_vfe_ahb_clk.clkr,
+	[CAMSS_VFE_VFE_AXI_CLK] = &camss_vfe_vfe_axi_clk.clkr,
+	[FD_AHB_CLK] = &fd_ahb_clk.clkr,
+	[FD_AXI_CLK] = &fd_axi_clk.clkr,
+	[FD_CORE_CLK] = &fd_core_clk.clkr,
+	[FD_CORE_UAR_CLK] = &fd_core_uar_clk.clkr,
+	[MDSS_AHB_CLK] = &mdss_ahb_clk.clkr,
+	[MDSS_AXI_CLK] = &mdss_axi_clk.clkr,
+	[MDSS_BYTE0_CLK] = &mdss_byte0_clk.clkr,
+	[MDSS_BYTE1_CLK] = &mdss_byte1_clk.clkr,
+	[MDSS_EXTPCLK_CLK] = &mdss_extpclk_clk.clkr,
+	[MDSS_HDMI_AHB_CLK] = &mdss_hdmi_ahb_clk.clkr,
+	[MDSS_HDMI_CLK] = &mdss_hdmi_clk.clkr,
+	[MDSS_MDP_CLK] = &mdss_mdp_clk.clkr,
+	[MDSS_PCLK0_CLK] = &mdss_pclk0_clk.clkr,
+	[MDSS_PCLK1_CLK] = &mdss_pclk1_clk.clkr,
+	[MDSS_VSYNC_CLK] = &mdss_vsync_clk.clkr,
+	[MMSS_MISC_AHB_CLK] = &mmss_misc_ahb_clk.clkr,
+	[MMSS_MMSSNOC_AXI_CLK] = &mmss_mmssnoc_axi_clk.clkr,
+	[MMSS_S0_AXI_CLK] = &mmss_s0_axi_clk.clkr,
+	[OCMEMCX_OCMEMNOC_CLK] = &ocmemcx_ocmemnoc_clk.clkr,
+	[OXILI_GFX3D_CLK] = &oxili_gfx3d_clk.clkr,
+	[OXILI_RBBMTIMER_CLK] = &oxili_rbbmtimer_clk.clkr,
+	[OXILICX_AHB_CLK] = &oxilicx_ahb_clk.clkr,
+	[VENUS0_AHB_CLK] = &venus0_ahb_clk.clkr,
+	[VENUS0_AXI_CLK] = &venus0_axi_clk.clkr,
+	[VENUS0_OCMEMNOC_CLK] = &venus0_ocmemnoc_clk.clkr,
+	[VENUS0_VCODEC0_CLK] = &venus0_vcodec0_clk.clkr,
+	[VENUS0_CORE0_VCODEC_CLK] = &venus0_core0_vcodec_clk.clkr,
+	[VENUS0_CORE1_VCODEC_CLK] = &venus0_core1_vcodec_clk.clkr,
+	[VENUS0_CORE2_VCODEC_CLK] = &venus0_core2_vcodec_clk.clkr,
+};
+
+static struct gdsc *mmcc_msm8994_gdscs[] = {
+	[VENUS_GDSC] = &venus_gdsc,
+	[VENUS_CORE0_GDSC] = &venus_core0_gdsc,
+	[VENUS_CORE1_GDSC] = &venus_core1_gdsc,
+	[VENUS_CORE2_GDSC] = &venus_core2_gdsc,
+	[CAMSS_TOP_GDSC] = &camss_top_gdsc,
+	[MDSS_GDSC] = &mdss_gdsc,
+	[JPEG_GDSC] = &jpeg_gdsc,
+	[VFE_GDSC] = &vfe_gdsc,
+	[CPP_GDSC] = &cpp_gdsc,
+	[OXILI_GX_GDSC] = &oxili_gx_gdsc,
+	[OXILI_CX_GDSC] = &oxili_cx_gdsc,
+	[FD_GDSC] = &fd_gdsc,
+};
+
+static const struct qcom_reset_map mmcc_msm8994_resets[] = {
+	[CAMSS_MICRO_BCR] = { 0x3490 },
+};
+
+static const struct regmap_config mmcc_msm8994_regmap_config = {
+	.reg_bits	= 32,
+	.reg_stride	= 4,
+	.val_bits	= 32,
+	.max_register	= 0x5200,
+	.fast_io	= true,
+};
+
+static const struct qcom_cc_desc mmcc_msm8994_desc = {
+	.config = &mmcc_msm8994_regmap_config,
+	.clks = mmcc_msm8994_clocks,
+	.num_clks = ARRAY_SIZE(mmcc_msm8994_clocks),
+	.resets = mmcc_msm8994_resets,
+	.num_resets = ARRAY_SIZE(mmcc_msm8994_resets),
+	.gdscs = mmcc_msm8994_gdscs,
+	.num_gdscs = ARRAY_SIZE(mmcc_msm8994_gdscs),
+};
+
+static const struct of_device_id mmcc_msm8994_match_table[] = {
+	{ .compatible = "qcom,mmcc-msm8992" },
+	{ .compatible = "qcom,mmcc-msm8994" }, /* V2 and V2.1 */
+	{ }
+};
+MODULE_DEVICE_TABLE(of, mmcc_msm8994_match_table);
+
+static int mmcc_msm8994_probe(struct platform_device *pdev)
+{
+	struct regmap *regmap;
+
+	if (of_device_is_compatible(pdev->dev.of_node, "qcom,mmcc-msm8992")) {
+		/* MSM8992 features less clocks and some have different freq tables */
+		mmcc_msm8994_desc.clks[CAMSS_JPEG_JPEG1_CLK] = NULL;
+		mmcc_msm8994_desc.clks[CAMSS_JPEG_JPEG2_CLK] = NULL;
+		mmcc_msm8994_desc.clks[FD_CORE_CLK_SRC] = NULL;
+		mmcc_msm8994_desc.clks[FD_CORE_CLK] = NULL;
+		mmcc_msm8994_desc.clks[FD_CORE_UAR_CLK] = NULL;
+		mmcc_msm8994_desc.clks[FD_AXI_CLK] = NULL;
+		mmcc_msm8994_desc.clks[FD_AHB_CLK] = NULL;
+		mmcc_msm8994_desc.clks[JPEG1_CLK_SRC] = NULL;
+		mmcc_msm8994_desc.clks[JPEG2_CLK_SRC] = NULL;
+		mmcc_msm8994_desc.clks[VENUS0_CORE2_VCODEC_CLK] = NULL;
+
+		mmcc_msm8994_desc.gdscs[FD_GDSC] = NULL;
+		mmcc_msm8994_desc.gdscs[VENUS_CORE2_GDSC] = NULL;
+
+		axi_clk_src.freq_tbl = ftbl_axi_clk_src_8992;
+		cpp_clk_src.freq_tbl = ftbl_cpp_clk_src_8992;
+		csi0_clk_src.freq_tbl = ftbl_csi0_1_2_3_clk_src_8992;
+		csi1_clk_src.freq_tbl = ftbl_csi0_1_2_3_clk_src_8992;
+		csi2_clk_src.freq_tbl = ftbl_csi0_1_2_3_clk_src_8992;
+		csi3_clk_src.freq_tbl = ftbl_csi0_1_2_3_clk_src_8992;
+		mclk0_clk_src.freq_tbl = ftbl_mclk0_clk_src_8992;
+		mclk1_clk_src.freq_tbl = ftbl_mclk1_2_3_clk_src_8992;
+		mclk2_clk_src.freq_tbl = ftbl_mclk1_2_3_clk_src_8992;
+		mclk3_clk_src.freq_tbl = ftbl_mclk1_2_3_clk_src_8992;
+		mdp_clk_src.freq_tbl = ftbl_mdp_clk_src_8992;
+		ocmemnoc_clk_src.freq_tbl = ftbl_ocmemnoc_clk_src_8992;
+		vcodec0_clk_src.freq_tbl = ftbl_vcodec0_clk_src_8992;
+		vfe0_clk_src.freq_tbl = ftbl_vfe0_1_clk_src_8992;
+		vfe1_clk_src.freq_tbl = ftbl_vfe0_1_clk_src_8992;
+	}
+
+	regmap = qcom_cc_map(pdev, &mmcc_msm8994_desc);
+	if (IS_ERR(regmap))
+		return PTR_ERR(regmap);
+
+	clk_alpha_pll_configure(&mmpll0_early, regmap, &mmpll_p_config);
+	clk_alpha_pll_configure(&mmpll1_early, regmap, &mmpll_p_config);
+	clk_alpha_pll_configure(&mmpll3_early, regmap, &mmpll_p_config);
+	clk_alpha_pll_configure(&mmpll5_early, regmap, &mmpll_p_config);
+
+	return qcom_cc_really_probe(pdev, &mmcc_msm8994_desc, regmap);
+}
+
+static struct platform_driver mmcc_msm8994_driver = {
+	.probe		= mmcc_msm8994_probe,
+	.driver		= {
+		.name	= "mmcc-msm8994",
+		.of_match_table = mmcc_msm8994_match_table,
+	},
+};
+module_platform_driver(mmcc_msm8994_driver);
+
+MODULE_DESCRIPTION("QCOM MMCC MSM8994 Driver");
+MODULE_LICENSE("GPL v2");
+MODULE_ALIAS("platform:mmcc-msm8994");
diff --git a/drivers/clk/qcom/mss-sc7180.c b/drivers/clk/qcom/mss-sc7180.c
index 673fa1a4f734..5a1407440662 100644
--- a/drivers/clk/qcom/mss-sc7180.c
+++ b/drivers/clk/qcom/mss-sc7180.c
@@ -73,36 +73,23 @@ static int mss_sc7180_probe(struct platform_device *pdev)
 {
 	int ret;
 
-	pm_runtime_enable(&pdev->dev);
-	ret = pm_clk_create(&pdev->dev);
+	ret = devm_pm_runtime_enable(&pdev->dev);
 	if (ret)
-		goto disable_pm_runtime;
+		return ret;
+
+	ret = devm_pm_clk_create(&pdev->dev);
+	if (ret)
+		return ret;
 
 	ret = pm_clk_add(&pdev->dev, "cfg_ahb");
 	if (ret < 0) {
 		dev_err(&pdev->dev, "failed to acquire iface clock\n");
-		goto destroy_pm_clk;
+		return ret;
 	}
 
 	ret = qcom_cc_probe(pdev, &mss_sc7180_desc);
 	if (ret < 0)
-		goto destroy_pm_clk;
-
-	return 0;
-
-destroy_pm_clk:
-	pm_clk_destroy(&pdev->dev);
-
-disable_pm_runtime:
-	pm_runtime_disable(&pdev->dev);
-
-	return ret;
-}
-
-static int mss_sc7180_remove(struct platform_device *pdev)
-{
-	pm_clk_destroy(&pdev->dev);
-	pm_runtime_disable(&pdev->dev);
+		return ret;
 
 	return 0;
 }
@@ -119,7 +106,6 @@ MODULE_DEVICE_TABLE(of, mss_sc7180_match_table);
 
 static struct platform_driver mss_sc7180_driver = {
 	.probe		= mss_sc7180_probe,
-	.remove		= mss_sc7180_remove,
 	.driver		= {
 		.name		= "sc7180-mss",
 		.of_match_table = mss_sc7180_match_table,
diff --git a/drivers/clk/qcom/q6sstop-qcs404.c b/drivers/clk/qcom/q6sstop-qcs404.c
index 723f932fbf7d..507386bee07d 100644
--- a/drivers/clk/qcom/q6sstop-qcs404.c
+++ b/drivers/clk/qcom/q6sstop-qcs404.c
@@ -159,15 +159,18 @@ static int q6sstopcc_qcs404_probe(struct platform_device *pdev)
 	const struct qcom_cc_desc *desc;
 	int ret;
 
-	pm_runtime_enable(&pdev->dev);
-	ret = pm_clk_create(&pdev->dev);
+	ret = devm_pm_runtime_enable(&pdev->dev);
 	if (ret)
-		goto disable_pm_runtime;
+		return ret;
+
+	ret = devm_pm_clk_create(&pdev->dev);
+	if (ret)
+		return ret;
 
 	ret = pm_clk_add(&pdev->dev, NULL);
 	if (ret < 0) {
 		dev_err(&pdev->dev, "failed to acquire iface clock\n");
-		goto destroy_pm_clk;
+		return ret;
 	}
 
 	q6sstop_regmap_config.name = "q6sstop_tcsr";
@@ -175,30 +178,14 @@ static int q6sstopcc_qcs404_probe(struct platform_device *pdev)
 
 	ret = qcom_cc_probe_by_index(pdev, 1, desc);
 	if (ret)
-		goto destroy_pm_clk;
+		return ret;
 
 	q6sstop_regmap_config.name = "q6sstop_cc";
 	desc = &q6sstop_qcs404_desc;
 
 	ret = qcom_cc_probe_by_index(pdev, 0, desc);
 	if (ret)
-		goto destroy_pm_clk;
-
-	return 0;
-
-destroy_pm_clk:
-	pm_clk_destroy(&pdev->dev);
-
-disable_pm_runtime:
-	pm_runtime_disable(&pdev->dev);
-
-	return ret;
-}
-
-static int q6sstopcc_qcs404_remove(struct platform_device *pdev)
-{
-	pm_clk_destroy(&pdev->dev);
-	pm_runtime_disable(&pdev->dev);
+		return ret;
 
 	return 0;
 }
@@ -209,7 +196,6 @@ static const struct dev_pm_ops q6sstopcc_pm_ops = {
 
 static struct platform_driver q6sstopcc_qcs404_driver = {
 	.probe		= q6sstopcc_qcs404_probe,
-	.remove		= q6sstopcc_qcs404_remove,
 	.driver		= {
 		.name	= "qcs404-q6sstopcc",
 		.of_match_table = q6sstopcc_qcs404_match_table,
diff --git a/drivers/clk/qcom/turingcc-qcs404.c b/drivers/clk/qcom/turingcc-qcs404.c
index 4cfbbf5bf4d9..4543bda793f4 100644
--- a/drivers/clk/qcom/turingcc-qcs404.c
+++ b/drivers/clk/qcom/turingcc-qcs404.c
@@ -110,36 +110,23 @@ static int turingcc_probe(struct platform_device *pdev)
 {
 	int ret;
 
-	pm_runtime_enable(&pdev->dev);
-	ret = pm_clk_create(&pdev->dev);
+	ret = devm_pm_runtime_enable(&pdev->dev);
 	if (ret)
-		goto disable_pm_runtime;
+		return ret;
+
+	ret = devm_pm_clk_create(&pdev->dev);
+	if (ret)
+		return ret;
 
 	ret = pm_clk_add(&pdev->dev, NULL);
 	if (ret < 0) {
 		dev_err(&pdev->dev, "failed to acquire iface clock\n");
-		goto destroy_pm_clk;
+		return ret;
 	}
 
 	ret = qcom_cc_probe(pdev, &turingcc_desc);
 	if (ret < 0)
-		goto destroy_pm_clk;
-
-	return 0;
-
-destroy_pm_clk:
-	pm_clk_destroy(&pdev->dev);
-
-disable_pm_runtime:
-	pm_runtime_disable(&pdev->dev);
-
-	return ret;
-}
-
-static int turingcc_remove(struct platform_device *pdev)
-{
-	pm_clk_destroy(&pdev->dev);
-	pm_runtime_disable(&pdev->dev);
+		return ret;
 
 	return 0;
 }
@@ -156,7 +143,6 @@ MODULE_DEVICE_TABLE(of, turingcc_match_table);
 
 static struct platform_driver turingcc_driver = {
 	.probe		= turingcc_probe,
-	.remove		= turingcc_remove,
 	.driver		= {
 		.name	= "qcs404-turingcc",
 		.of_match_table = turingcc_match_table,
diff --git a/drivers/clk/qcom/videocc-sc7280.c b/drivers/clk/qcom/videocc-sc7280.c
new file mode 100644
index 000000000000..615695d82319
--- /dev/null
+++ b/drivers/clk/qcom/videocc-sc7280.c
@@ -0,0 +1,325 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * Copyright (c) 2021, The Linux Foundation. All rights reserved.
+ */
+
+#include <linux/clk-provider.h>
+#include <linux/module.h>
+#include <linux/platform_device.h>
+#include <linux/regmap.h>
+
+#include <dt-bindings/clock/qcom,videocc-sc7280.h>
+
+#include "clk-alpha-pll.h"
+#include "clk-branch.h"
+#include "clk-rcg.h"
+#include "common.h"
+#include "reset.h"
+#include "gdsc.h"
+
+enum {
+	P_BI_TCXO,
+	P_SLEEP_CLK,
+	P_VIDEO_PLL0_OUT_EVEN,
+};
+
+static const struct pll_vco lucid_vco[] = {
+	{ 249600000, 2000000000, 0 },
+};
+
+/* 400MHz Configuration */
+static const struct alpha_pll_config video_pll0_config = {
+	.l = 0x14,
+	.alpha = 0xD555,
+	.config_ctl_val = 0x20485699,
+	.config_ctl_hi_val = 0x00002261,
+	.config_ctl_hi1_val = 0x329A299C,
+	.user_ctl_val = 0x00000001,
+	.user_ctl_hi_val = 0x00000805,
+	.user_ctl_hi1_val = 0x00000000,
+};
+
+static struct clk_alpha_pll video_pll0 = {
+	.offset = 0x0,
+	.vco_table = lucid_vco,
+	.num_vco = ARRAY_SIZE(lucid_vco),
+	.regs = clk_alpha_pll_regs[CLK_ALPHA_PLL_TYPE_LUCID],
+	.clkr = {
+		.hw.init = &(struct clk_init_data){
+			.name = "video_pll0",
+			.parent_data = &(const struct clk_parent_data){
+				.fw_name = "bi_tcxo",
+			},
+			.num_parents = 1,
+			.ops = &clk_alpha_pll_lucid_ops,
+		},
+	},
+};
+
+static const struct parent_map video_cc_parent_map_0[] = {
+	{ P_BI_TCXO, 0 },
+	{ P_VIDEO_PLL0_OUT_EVEN, 3 },
+};
+
+static const struct clk_parent_data video_cc_parent_data_0[] = {
+	{ .fw_name = "bi_tcxo" },
+	{ .hw = &video_pll0.clkr.hw },
+};
+
+static const struct parent_map video_cc_parent_map_1[] = {
+	{ P_SLEEP_CLK, 0 },
+};
+
+static const struct clk_parent_data video_cc_parent_data_1[] = {
+	{ .fw_name = "sleep_clk" },
+};
+
+static const struct freq_tbl ftbl_video_cc_iris_clk_src[] = {
+	F(133333333, P_VIDEO_PLL0_OUT_EVEN, 3, 0, 0),
+	F(240000000, P_VIDEO_PLL0_OUT_EVEN, 2, 0, 0),
+	F(335000000, P_VIDEO_PLL0_OUT_EVEN, 2, 0, 0),
+	F(424000000, P_VIDEO_PLL0_OUT_EVEN, 2, 0, 0),
+	F(460000000, P_VIDEO_PLL0_OUT_EVEN, 2, 0, 0),
+	{ }
+};
+
+static struct clk_rcg2 video_cc_iris_clk_src = {
+	.cmd_rcgr = 0x1000,
+	.mnd_width = 0,
+	.hid_width = 5,
+	.parent_map = video_cc_parent_map_0,
+	.freq_tbl = ftbl_video_cc_iris_clk_src,
+	.clkr.hw.init = &(struct clk_init_data){
+		.name = "video_cc_iris_clk_src",
+		.parent_data = video_cc_parent_data_0,
+		.num_parents = ARRAY_SIZE(video_cc_parent_data_0),
+		.flags = CLK_SET_RATE_PARENT,
+		.ops = &clk_rcg2_shared_ops,
+	},
+};
+
+static const struct freq_tbl ftbl_video_cc_sleep_clk_src[] = {
+	F(32000, P_SLEEP_CLK, 1, 0, 0),
+	{ }
+};
+
+static struct clk_rcg2 video_cc_sleep_clk_src = {
+	.cmd_rcgr = 0x701c,
+	.mnd_width = 0,
+	.hid_width = 5,
+	.parent_map = video_cc_parent_map_1,
+	.freq_tbl = ftbl_video_cc_sleep_clk_src,
+	.clkr.hw.init = &(struct clk_init_data){
+		.name = "video_cc_sleep_clk_src",
+		.parent_data = video_cc_parent_data_1,
+		.num_parents = ARRAY_SIZE(video_cc_parent_data_1),
+		.ops = &clk_rcg2_ops,
+	},
+};
+
+static struct clk_branch video_cc_iris_ahb_clk = {
+	.halt_reg = 0x5004,
+	.halt_check = BRANCH_HALT_VOTED,
+	.clkr = {
+		.enable_reg = 0x5004,
+		.enable_mask = BIT(0),
+		.hw.init = &(struct clk_init_data){
+			.name = "video_cc_iris_ahb_clk",
+			.parent_hws = (const struct clk_hw*[]){
+				&video_cc_iris_clk_src.clkr.hw,
+			},
+			.num_parents = 1,
+			.flags = CLK_SET_RATE_PARENT,
+			.ops = &clk_branch2_ops,
+		},
+	},
+};
+
+static struct clk_branch video_cc_mvs0_axi_clk = {
+	.halt_reg = 0x800c,
+	.halt_check = BRANCH_HALT,
+	.clkr = {
+		.enable_reg = 0x800c,
+		.enable_mask = BIT(0),
+		.hw.init = &(struct clk_init_data){
+			.name = "video_cc_mvs0_axi_clk",
+			.ops = &clk_branch2_ops,
+		},
+	},
+};
+
+static struct clk_branch video_cc_mvs0_core_clk = {
+	.halt_reg = 0x3010,
+	.halt_check = BRANCH_HALT_VOTED,
+	.hwcg_reg = 0x3010,
+	.hwcg_bit = 1,
+	.clkr = {
+		.enable_reg = 0x3010,
+		.enable_mask = BIT(0),
+		.hw.init = &(struct clk_init_data){
+			.name = "video_cc_mvs0_core_clk",
+			.parent_hws = (const struct clk_hw*[]){
+				&video_cc_iris_clk_src.clkr.hw,
+			},
+			.num_parents = 1,
+			.flags = CLK_SET_RATE_PARENT,
+			.ops = &clk_branch2_ops,
+		},
+	},
+};
+
+static struct clk_branch video_cc_mvsc_core_clk = {
+	.halt_reg = 0x2014,
+	.halt_check = BRANCH_HALT,
+	.clkr = {
+		.enable_reg = 0x2014,
+		.enable_mask = BIT(0),
+		.hw.init = &(struct clk_init_data){
+			.name = "video_cc_mvsc_core_clk",
+			.parent_hws = (const struct clk_hw*[]){
+				&video_cc_iris_clk_src.clkr.hw,
+			},
+			.num_parents = 1,
+			.flags = CLK_SET_RATE_PARENT,
+			.ops = &clk_branch2_ops,
+		},
+	},
+};
+
+static struct clk_branch video_cc_mvsc_ctl_axi_clk = {
+	.halt_reg = 0x8004,
+	.halt_check = BRANCH_HALT,
+	.clkr = {
+		.enable_reg = 0x8004,
+		.enable_mask = BIT(0),
+		.hw.init = &(struct clk_init_data){
+			.name = "video_cc_mvsc_ctl_axi_clk",
+			.ops = &clk_branch2_ops,
+		},
+	},
+};
+
+static struct clk_branch video_cc_sleep_clk = {
+	.halt_reg = 0x7034,
+	.halt_check = BRANCH_HALT,
+	.clkr = {
+		.enable_reg = 0x7034,
+		.enable_mask = BIT(0),
+		.hw.init = &(struct clk_init_data){
+			.name = "video_cc_sleep_clk",
+			.parent_hws = (const struct clk_hw*[]){
+				&video_cc_sleep_clk_src.clkr.hw,
+			},
+			.num_parents = 1,
+			.flags = CLK_SET_RATE_PARENT,
+			.ops = &clk_branch2_ops,
+		},
+	},
+};
+
+static struct clk_branch video_cc_venus_ahb_clk = {
+	.halt_reg = 0x801c,
+	.halt_check = BRANCH_HALT,
+	.clkr = {
+		.enable_reg = 0x801c,
+		.enable_mask = BIT(0),
+		.hw.init = &(struct clk_init_data){
+			.name = "video_cc_venus_ahb_clk",
+			.ops = &clk_branch2_ops,
+		},
+	},
+};
+
+static struct gdsc mvs0_gdsc = {
+	.gdscr = 0x3004,
+	.pd = {
+		.name = "mvs0_gdsc",
+	},
+	.pwrsts = PWRSTS_OFF_ON,
+	.flags = HW_CTRL | RETAIN_FF_ENABLE,
+};
+
+static struct gdsc mvsc_gdsc = {
+	.gdscr = 0x2004,
+	.pd = {
+		.name = "mvsc_gdsc",
+	},
+	.flags = RETAIN_FF_ENABLE,
+	.pwrsts = PWRSTS_OFF_ON,
+};
+
+static struct clk_regmap *video_cc_sc7280_clocks[] = {
+	[VIDEO_CC_IRIS_AHB_CLK] = &video_cc_iris_ahb_clk.clkr,
+	[VIDEO_CC_IRIS_CLK_SRC] = &video_cc_iris_clk_src.clkr,
+	[VIDEO_CC_MVS0_AXI_CLK] = &video_cc_mvs0_axi_clk.clkr,
+	[VIDEO_CC_MVS0_CORE_CLK] = &video_cc_mvs0_core_clk.clkr,
+	[VIDEO_CC_MVSC_CORE_CLK] = &video_cc_mvsc_core_clk.clkr,
+	[VIDEO_CC_MVSC_CTL_AXI_CLK] = &video_cc_mvsc_ctl_axi_clk.clkr,
+	[VIDEO_CC_SLEEP_CLK] = &video_cc_sleep_clk.clkr,
+	[VIDEO_CC_SLEEP_CLK_SRC] = &video_cc_sleep_clk_src.clkr,
+	[VIDEO_CC_VENUS_AHB_CLK] = &video_cc_venus_ahb_clk.clkr,
+	[VIDEO_PLL0] = &video_pll0.clkr,
+};
+
+static struct gdsc *video_cc_sc7280_gdscs[] = {
+	[MVS0_GDSC] = &mvs0_gdsc,
+	[MVSC_GDSC] = &mvsc_gdsc,
+};
+
+static const struct regmap_config video_cc_sc7280_regmap_config = {
+	.reg_bits = 32,
+	.reg_stride = 4,
+	.val_bits = 32,
+	.max_register = 0xb000,
+	.fast_io = true,
+};
+
+static const struct qcom_cc_desc video_cc_sc7280_desc = {
+	.config = &video_cc_sc7280_regmap_config,
+	.clks = video_cc_sc7280_clocks,
+	.num_clks = ARRAY_SIZE(video_cc_sc7280_clocks),
+	.gdscs = video_cc_sc7280_gdscs,
+	.num_gdscs = ARRAY_SIZE(video_cc_sc7280_gdscs),
+};
+
+static const struct of_device_id video_cc_sc7280_match_table[] = {
+	{ .compatible = "qcom,sc7280-videocc" },
+	{ }
+};
+MODULE_DEVICE_TABLE(of, video_cc_sc7280_match_table);
+
+static int video_cc_sc7280_probe(struct platform_device *pdev)
+{
+	struct regmap *regmap;
+
+	regmap = qcom_cc_map(pdev, &video_cc_sc7280_desc);
+	if (IS_ERR(regmap))
+		return PTR_ERR(regmap);
+
+	clk_lucid_pll_configure(&video_pll0, regmap, &video_pll0_config);
+
+	return qcom_cc_really_probe(pdev, &video_cc_sc7280_desc, regmap);
+}
+
+static struct platform_driver video_cc_sc7280_driver = {
+	.probe = video_cc_sc7280_probe,
+	.driver = {
+		.name = "video_cc-sc7280",
+		.of_match_table = video_cc_sc7280_match_table,
+	},
+};
+
+static int __init video_cc_sc7280_init(void)
+{
+	return platform_driver_register(&video_cc_sc7280_driver);
+}
+subsys_initcall(video_cc_sc7280_init);
+
+static void __exit video_cc_sc7280_exit(void)
+{
+	platform_driver_unregister(&video_cc_sc7280_driver);
+}
+module_exit(video_cc_sc7280_exit);
+
+MODULE_DESCRIPTION("QTI VIDEO_CC sc7280 Driver");
+MODULE_LICENSE("GPL v2");
diff --git a/drivers/clk/ralink/clk-mt7621.c b/drivers/clk/ralink/clk-mt7621.c
index 857da1e274be..a2c045390f00 100644
--- a/drivers/clk/ralink/clk-mt7621.c
+++ b/drivers/clk/ralink/clk-mt7621.c
@@ -131,14 +131,7 @@ static int mt7621_gate_ops_init(struct device *dev,
 				struct mt7621_gate *sclk)
 {
 	struct clk_init_data init = {
-		/*
-		 * Until now no clock driver existed so
-		 * these SoC drivers are not prepared
-		 * yet for the clock. We don't want kernel to
-		 * disable anything so we add CLK_IS_CRITICAL
-		 * flag here.
-		 */
-		.flags = CLK_SET_RATE_PARENT | CLK_IS_CRITICAL,
+		.flags = CLK_SET_RATE_PARENT,
 		.num_parents = 1,
 		.parent_names = &sclk->parent_name,
 		.ops = &mt7621_gate_ops,
diff --git a/drivers/clk/renesas/Kconfig b/drivers/clk/renesas/Kconfig
index 7b450650bcae..6d0280751bb1 100644
--- a/drivers/clk/renesas/Kconfig
+++ b/drivers/clk/renesas/Kconfig
@@ -153,9 +153,7 @@ config CLK_R8A779A0
 	select CLK_RENESAS_CPG_MSSR
 
 config CLK_R9A06G032
-	bool "Renesas R9A06G032 clock driver"
-	help
-	  This is a driver for R9A06G032 clocks
+	bool "RZ/N1D clock support" if COMPILE_TEST
 
 config CLK_R9A07G044
 	bool "RZ/G2L clock support" if COMPILE_TEST
diff --git a/drivers/clk/renesas/Makefile b/drivers/clk/renesas/Makefile
index 5c6c5c721d98..7d018700d08b 100644
--- a/drivers/clk/renesas/Makefile
+++ b/drivers/clk/renesas/Makefile
@@ -37,7 +37,7 @@ obj-$(CONFIG_CLK_RCAR_CPG_LIB)		+= rcar-cpg-lib.o
 obj-$(CONFIG_CLK_RCAR_GEN2_CPG)		+= rcar-gen2-cpg.o
 obj-$(CONFIG_CLK_RCAR_GEN3_CPG)		+= rcar-gen3-cpg.o
 obj-$(CONFIG_CLK_RCAR_USB2_CLOCK_SEL)	+= rcar-usb2-clock-sel.o
-obj-$(CONFIG_CLK_RZG2L)			+= renesas-rzg2l-cpg.o
+obj-$(CONFIG_CLK_RZG2L)			+= rzg2l-cpg.o
 
 # Generic
 obj-$(CONFIG_CLK_RENESAS_CPG_MSSR)	+= renesas-cpg-mssr.o
diff --git a/drivers/clk/renesas/r8a774a1-cpg-mssr.c b/drivers/clk/renesas/r8a774a1-cpg-mssr.c
index 4a43ebec7d5e..39b185d8e957 100644
--- a/drivers/clk/renesas/r8a774a1-cpg-mssr.c
+++ b/drivers/clk/renesas/r8a774a1-cpg-mssr.c
@@ -210,7 +210,7 @@ static const struct mssr_mod_clk r8a774a1_mod_clks[] __initconst = {
 	DEF_MOD("rpc-if",		 917,	R8A774A1_CLK_RPCD2),
 	DEF_MOD("i2c6",			 918,	R8A774A1_CLK_S0D6),
 	DEF_MOD("i2c5",			 919,	R8A774A1_CLK_S0D6),
-	DEF_MOD("i2c-dvfs",		 926,	R8A774A1_CLK_CP),
+	DEF_MOD("iic-pmic",		 926,	R8A774A1_CLK_CP),
 	DEF_MOD("i2c4",			 927,	R8A774A1_CLK_S0D6),
 	DEF_MOD("i2c3",			 928,	R8A774A1_CLK_S0D6),
 	DEF_MOD("i2c2",			 929,	R8A774A1_CLK_S3D2),
diff --git a/drivers/clk/renesas/r8a774b1-cpg-mssr.c b/drivers/clk/renesas/r8a774b1-cpg-mssr.c
index 6f04c40fe237..af602d83c8ce 100644
--- a/drivers/clk/renesas/r8a774b1-cpg-mssr.c
+++ b/drivers/clk/renesas/r8a774b1-cpg-mssr.c
@@ -206,7 +206,7 @@ static const struct mssr_mod_clk r8a774b1_mod_clks[] __initconst = {
 	DEF_MOD("rpc-if",		 917,	R8A774B1_CLK_RPCD2),
 	DEF_MOD("i2c6",			 918,	R8A774B1_CLK_S0D6),
 	DEF_MOD("i2c5",			 919,	R8A774B1_CLK_S0D6),
-	DEF_MOD("i2c-dvfs",		 926,	R8A774B1_CLK_CP),
+	DEF_MOD("iic-pmic",		 926,	R8A774B1_CLK_CP),
 	DEF_MOD("i2c4",			 927,	R8A774B1_CLK_S0D6),
 	DEF_MOD("i2c3",			 928,	R8A774B1_CLK_S0D6),
 	DEF_MOD("i2c2",			 929,	R8A774B1_CLK_S3D2),
diff --git a/drivers/clk/renesas/r8a774c0-cpg-mssr.c b/drivers/clk/renesas/r8a774c0-cpg-mssr.c
index ed3a2cf0e0bb..5b938eb2df25 100644
--- a/drivers/clk/renesas/r8a774c0-cpg-mssr.c
+++ b/drivers/clk/renesas/r8a774c0-cpg-mssr.c
@@ -210,7 +210,7 @@ static const struct mssr_mod_clk r8a774c0_mod_clks[] __initconst = {
 	DEF_MOD("rpc-if",		 917,	R8A774C0_CLK_RPCD2),
 	DEF_MOD("i2c6",			 918,	R8A774C0_CLK_S3D2),
 	DEF_MOD("i2c5",			 919,	R8A774C0_CLK_S3D2),
-	DEF_MOD("i2c-dvfs",		 926,	R8A774C0_CLK_CP),
+	DEF_MOD("iic-pmic",		 926,	R8A774C0_CLK_CP),
 	DEF_MOD("i2c4",			 927,	R8A774C0_CLK_S3D2),
 	DEF_MOD("i2c3",			 928,	R8A774C0_CLK_S3D2),
 	DEF_MOD("i2c2",			 929,	R8A774C0_CLK_S3D2),
diff --git a/drivers/clk/renesas/r8a774e1-cpg-mssr.c b/drivers/clk/renesas/r8a774e1-cpg-mssr.c
index b96c486abb44..40c71466df37 100644
--- a/drivers/clk/renesas/r8a774e1-cpg-mssr.c
+++ b/drivers/clk/renesas/r8a774e1-cpg-mssr.c
@@ -219,7 +219,7 @@ static const struct mssr_mod_clk r8a774e1_mod_clks[] __initconst = {
 	DEF_MOD("i2c6",			 918,	R8A774E1_CLK_S0D6),
 	DEF_MOD("i2c5",			 919,	R8A774E1_CLK_S0D6),
 	DEF_MOD("adg",			 922,	R8A774E1_CLK_S0D1),
-	DEF_MOD("i2c-dvfs",		 926,	R8A774E1_CLK_CP),
+	DEF_MOD("iic-pmic",		 926,	R8A774E1_CLK_CP),
 	DEF_MOD("i2c4",			 927,	R8A774E1_CLK_S0D6),
 	DEF_MOD("i2c3",			 928,	R8A774E1_CLK_S0D6),
 	DEF_MOD("i2c2",			 929,	R8A774E1_CLK_S3D2),
diff --git a/drivers/clk/renesas/r8a779a0-cpg-mssr.c b/drivers/clk/renesas/r8a779a0-cpg-mssr.c
index acaf5a93f1d3..f16d125ca009 100644
--- a/drivers/clk/renesas/r8a779a0-cpg-mssr.c
+++ b/drivers/clk/renesas/r8a779a0-cpg-mssr.c
@@ -135,7 +135,6 @@ static const struct cpg_core_clk r8a779a0_core_clks[] __initconst = {
 	DEF_FIXED("zt",		R8A779A0_CLK_ZT,	CLK_PLL1_DIV2,	2, 1),
 	DEF_FIXED("ztr",	R8A779A0_CLK_ZTR,	CLK_PLL1_DIV2,	2, 1),
 	DEF_FIXED("zr",		R8A779A0_CLK_ZR,	CLK_PLL1_DIV2,	1, 1),
-	DEF_FIXED("dsi",	R8A779A0_CLK_DSI,	CLK_PLL5_DIV4,	1, 1),
 	DEF_FIXED("cnndsp",	R8A779A0_CLK_CNNDSP,	CLK_PLL5_DIV4,	1, 1),
 	DEF_FIXED("vip",	R8A779A0_CLK_VIP,	CLK_PLL5,	5, 1),
 	DEF_FIXED("adgh",	R8A779A0_CLK_ADGH,	CLK_PLL5_DIV4,	1, 1),
@@ -151,6 +150,7 @@ static const struct cpg_core_clk r8a779a0_core_clks[] __initconst = {
 	DEF_DIV6P1("mso",	R8A779A0_CLK_MSO,	CLK_PLL5_DIV4,	0x87c),
 	DEF_DIV6P1("canfd",	R8A779A0_CLK_CANFD,	CLK_PLL5_DIV4,	0x878),
 	DEF_DIV6P1("csi0",	R8A779A0_CLK_CSI0,	CLK_PLL5_DIV4,	0x880),
+	DEF_DIV6P1("dsi",	R8A779A0_CLK_DSI,	CLK_PLL5_DIV4,	0x884),
 
 	DEF_OSC("osc",		R8A779A0_CLK_OSC,	CLK_EXTAL,	8),
 	DEF_MDSEL("r",		R8A779A0_CLK_R, 29, CLK_EXTALR, 1, CLK_OCO, 1),
@@ -167,6 +167,9 @@ static const struct mssr_mod_clk r8a779a0_mod_clks[] __initconst = {
 	DEF_MOD("csi41",	400,	R8A779A0_CLK_CSI0),
 	DEF_MOD("csi42",	401,	R8A779A0_CLK_CSI0),
 	DEF_MOD("csi43",	402,	R8A779A0_CLK_CSI0),
+	DEF_MOD("du",		411,	R8A779A0_CLK_S3D1),
+	DEF_MOD("dsi0",		415,	R8A779A0_CLK_DSI),
+	DEF_MOD("dsi1",		416,	R8A779A0_CLK_DSI),
 	DEF_MOD("fcpvd0",	508,	R8A779A0_CLK_S3D1),
 	DEF_MOD("fcpvd1",	509,	R8A779A0_CLK_S3D1),
 	DEF_MOD("hscif0",	514,	R8A779A0_CLK_S1D2),
diff --git a/drivers/clk/renesas/r9a07g044-cpg.c b/drivers/clk/renesas/r9a07g044-cpg.c
index ae24e0397d3c..4c94b94c4125 100644
--- a/drivers/clk/renesas/r9a07g044-cpg.c
+++ b/drivers/clk/renesas/r9a07g044-cpg.c
@@ -12,11 +12,11 @@
 
 #include <dt-bindings/clock/r9a07g044-cpg.h>
 
-#include "renesas-rzg2l-cpg.h"
+#include "rzg2l-cpg.h"
 
 enum clk_ids {
 	/* Core Clock Outputs exported to DT */
-	LAST_DT_CORE_CLK = R9A07G044_OSCCLK,
+	LAST_DT_CORE_CLK = R9A07G044_CLK_P0_DIV2,
 
 	/* External Input Clocks */
 	CLK_EXTAL,
@@ -37,6 +37,7 @@ enum clk_ids {
 	CLK_PLL5,
 	CLK_PLL5_DIV2,
 	CLK_PLL6,
+	CLK_P1_DIV2,
 
 	/* Module Clocks */
 	MOD_CLK_BASE,
@@ -76,9 +77,11 @@ static const struct cpg_core_clk r9a07g044_core_clks[] __initconst = {
 	DEF_FIXED("I", R9A07G044_CLK_I, CLK_PLL1, 1, 1),
 	DEF_DIV("P0", R9A07G044_CLK_P0, CLK_PLL2_DIV16, DIVPL2A,
 		dtable_1_32, CLK_DIVIDER_HIWORD_MASK),
+	DEF_FIXED("P0_DIV2", R9A07G044_CLK_P0_DIV2, R9A07G044_CLK_P0, 1, 2),
 	DEF_FIXED("TSU", R9A07G044_CLK_TSU, CLK_PLL2_DIV20, 1, 1),
 	DEF_DIV("P1", R9A07G044_CLK_P1, CLK_PLL3_DIV2_4,
 		DIVPL3B, dtable_1_32, CLK_DIVIDER_HIWORD_MASK),
+	DEF_FIXED("P1_DIV2", CLK_P1_DIV2, R9A07G044_CLK_P1, 1, 2),
 	DEF_DIV("P2", R9A07G044_CLK_P2, CLK_PLL3_DIV2_4_2,
 		DIVPL3A, dtable_1_32, CLK_DIVIDER_HIWORD_MASK),
 };
@@ -90,6 +93,42 @@ static struct rzg2l_mod_clk r9a07g044_mod_clks[] = {
 				0x518, 0),
 	DEF_MOD("ia55_clk",	R9A07G044_IA55_CLK, R9A07G044_CLK_P1,
 				0x518, 1),
+	DEF_MOD("dmac_aclk",	R9A07G044_DMAC_ACLK, R9A07G044_CLK_P1,
+				0x52c, 0),
+	DEF_MOD("dmac_pclk",	R9A07G044_DMAC_PCLK, CLK_P1_DIV2,
+				0x52c, 1),
+	DEF_MOD("ssi0_pclk",	R9A07G044_SSI0_PCLK2, R9A07G044_CLK_P0,
+				0x570, 0),
+	DEF_MOD("ssi0_sfr",	R9A07G044_SSI0_PCLK_SFR, R9A07G044_CLK_P0,
+				0x570, 1),
+	DEF_MOD("ssi1_pclk",	R9A07G044_SSI1_PCLK2, R9A07G044_CLK_P0,
+				0x570, 2),
+	DEF_MOD("ssi1_sfr",	R9A07G044_SSI1_PCLK_SFR, R9A07G044_CLK_P0,
+				0x570, 3),
+	DEF_MOD("ssi2_pclk",	R9A07G044_SSI2_PCLK2, R9A07G044_CLK_P0,
+				0x570, 4),
+	DEF_MOD("ssi2_sfr",	R9A07G044_SSI2_PCLK_SFR, R9A07G044_CLK_P0,
+				0x570, 5),
+	DEF_MOD("ssi3_pclk",	R9A07G044_SSI3_PCLK2, R9A07G044_CLK_P0,
+				0x570, 6),
+	DEF_MOD("ssi3_sfr",	R9A07G044_SSI3_PCLK_SFR, R9A07G044_CLK_P0,
+				0x570, 7),
+	DEF_MOD("usb0_host",	R9A07G044_USB_U2H0_HCLK, R9A07G044_CLK_P1,
+				0x578, 0),
+	DEF_MOD("usb1_host",	R9A07G044_USB_U2H1_HCLK, R9A07G044_CLK_P1,
+				0x578, 1),
+	DEF_MOD("usb0_func",	R9A07G044_USB_U2P_EXR_CPUCLK, R9A07G044_CLK_P1,
+				0x578, 2),
+	DEF_MOD("usb_pclk",	R9A07G044_USB_PCLK, R9A07G044_CLK_P1,
+				0x578, 3),
+	DEF_MOD("i2c0",		R9A07G044_I2C0_PCLK, R9A07G044_CLK_P0,
+				0x580, 0),
+	DEF_MOD("i2c1",		R9A07G044_I2C1_PCLK, R9A07G044_CLK_P0,
+				0x580, 1),
+	DEF_MOD("i2c2",		R9A07G044_I2C2_PCLK, R9A07G044_CLK_P0,
+				0x580, 2),
+	DEF_MOD("i2c3",		R9A07G044_I2C3_PCLK, R9A07G044_CLK_P0,
+				0x580, 3),
 	DEF_MOD("scif0",	R9A07G044_SCIF0_CLK_PCK, R9A07G044_CLK_P0,
 				0x584, 0),
 	DEF_MOD("scif1",	R9A07G044_SCIF1_CLK_PCK, R9A07G044_CLK_P0,
@@ -102,18 +141,47 @@ static struct rzg2l_mod_clk r9a07g044_mod_clks[] = {
 				0x584, 4),
 	DEF_MOD("sci0",		R9A07G044_SCI0_CLKP, R9A07G044_CLK_P0,
 				0x588, 0),
+	DEF_MOD("canfd",	R9A07G044_CANFD_PCLK, R9A07G044_CLK_P0,
+				0x594, 0),
+	DEF_MOD("gpio",		R9A07G044_GPIO_HCLK, R9A07G044_OSCCLK,
+				0x598, 0),
+	DEF_MOD("adc_adclk",	R9A07G044_ADC_ADCLK, R9A07G044_CLK_TSU,
+				0x5a8, 0),
+	DEF_MOD("adc_pclk",	R9A07G044_ADC_PCLK, R9A07G044_CLK_P0,
+				0x5a8, 1),
 };
 
 static struct rzg2l_reset r9a07g044_resets[] = {
 	DEF_RST(R9A07G044_GIC600_GICRESET_N, 0x814, 0),
 	DEF_RST(R9A07G044_GIC600_DBG_GICRESET_N, 0x814, 1),
 	DEF_RST(R9A07G044_IA55_RESETN, 0x818, 0),
+	DEF_RST(R9A07G044_DMAC_ARESETN, 0x82c, 0),
+	DEF_RST(R9A07G044_DMAC_RST_ASYNC, 0x82c, 1),
+	DEF_RST(R9A07G044_SSI0_RST_M2_REG, 0x870, 0),
+	DEF_RST(R9A07G044_SSI1_RST_M2_REG, 0x870, 1),
+	DEF_RST(R9A07G044_SSI2_RST_M2_REG, 0x870, 2),
+	DEF_RST(R9A07G044_SSI3_RST_M2_REG, 0x870, 3),
+	DEF_RST(R9A07G044_USB_U2H0_HRESETN, 0x878, 0),
+	DEF_RST(R9A07G044_USB_U2H1_HRESETN, 0x878, 1),
+	DEF_RST(R9A07G044_USB_U2P_EXL_SYSRST, 0x878, 2),
+	DEF_RST(R9A07G044_USB_PRESETN, 0x878, 3),
+	DEF_RST(R9A07G044_I2C0_MRST, 0x880, 0),
+	DEF_RST(R9A07G044_I2C1_MRST, 0x880, 1),
+	DEF_RST(R9A07G044_I2C2_MRST, 0x880, 2),
+	DEF_RST(R9A07G044_I2C3_MRST, 0x880, 3),
 	DEF_RST(R9A07G044_SCIF0_RST_SYSTEM_N, 0x884, 0),
 	DEF_RST(R9A07G044_SCIF1_RST_SYSTEM_N, 0x884, 1),
 	DEF_RST(R9A07G044_SCIF2_RST_SYSTEM_N, 0x884, 2),
 	DEF_RST(R9A07G044_SCIF3_RST_SYSTEM_N, 0x884, 3),
 	DEF_RST(R9A07G044_SCIF4_RST_SYSTEM_N, 0x884, 4),
 	DEF_RST(R9A07G044_SCI0_RST, 0x888, 0),
+	DEF_RST(R9A07G044_CANFD_RSTP_N, 0x894, 0),
+	DEF_RST(R9A07G044_CANFD_RSTC_N, 0x894, 1),
+	DEF_RST(R9A07G044_GPIO_RSTN, 0x898, 0),
+	DEF_RST(R9A07G044_GPIO_PORT_RESETN, 0x898, 1),
+	DEF_RST(R9A07G044_GPIO_SPARE_RESETN, 0x898, 2),
+	DEF_RST(R9A07G044_ADC_PRESETN, 0x8a8, 0),
+	DEF_RST(R9A07G044_ADC_ADRST_N, 0x8a8, 1),
 };
 
 static const unsigned int r9a07g044_crit_mod_clks[] __initconst = {
diff --git a/drivers/clk/renesas/renesas-rzg2l-cpg.c b/drivers/clk/renesas/rzg2l-cpg.c
index e7c59af2a1d8..3b3b2c3347f3 100644
--- a/drivers/clk/renesas/renesas-rzg2l-cpg.c
+++ b/drivers/clk/renesas/rzg2l-cpg.c
@@ -29,7 +29,7 @@
 
 #include <dt-bindings/clock/renesas-cpg-mssr.h>
 
-#include "renesas-rzg2l-cpg.h"
+#include "rzg2l-cpg.h"
 
 #ifdef DEBUG
 #define WARN_DEBUG(x)	WARN_ON(x)
@@ -125,7 +125,7 @@ rzg2l_cpg_div_clk_register(const struct cpg_core_clk *core,
 						 core->flag, &priv->rmw_lock);
 
 	if (IS_ERR(clk_hw))
-		return NULL;
+		return ERR_CAST(clk_hw);
 
 	return clk_hw->clk;
 }
@@ -175,17 +175,14 @@ rzg2l_cpg_pll_clk_register(const struct cpg_core_clk *core,
 	struct clk_init_data init;
 	const char *parent_name;
 	struct pll_clk *pll_clk;
-	struct clk *clk;
 
 	parent = clks[core->parent & 0xffff];
 	if (IS_ERR(parent))
 		return ERR_CAST(parent);
 
 	pll_clk = devm_kzalloc(dev, sizeof(*pll_clk), GFP_KERNEL);
-	if (!pll_clk) {
-		clk = ERR_PTR(-ENOMEM);
-		return NULL;
-	}
+	if (!pll_clk)
+		return ERR_PTR(-ENOMEM);
 
 	parent_name = __clk_get_name(parent);
 	init.name = core->name;
@@ -200,11 +197,7 @@ rzg2l_cpg_pll_clk_register(const struct cpg_core_clk *core,
 	pll_clk->priv = priv;
 	pll_clk->type = core->type;
 
-	clk = clk_register(NULL, &pll_clk->hw);
-	if (IS_ERR(clk))
-		kfree(pll_clk);
-
-	return clk;
+	return clk_register(NULL, &pll_clk->hw);
 }
 
 static struct clk
@@ -229,7 +222,7 @@ static struct clk
 
 	case CPG_MOD:
 		type = "module";
-		if (clkidx > priv->num_mod_clks) {
+		if (clkidx >= priv->num_mod_clks) {
 			dev_err(dev, "Invalid %s clock index %u\n", type,
 				clkidx);
 			return ERR_PTR(-EINVAL);
@@ -297,7 +290,7 @@ rzg2l_cpg_register_core_clk(const struct cpg_core_clk *core,
 		break;
 	default:
 		goto fail;
-	};
+	}
 
 	if (IS_ERR_OR_NULL(clk))
 		goto fail;
@@ -473,7 +466,6 @@ rzg2l_cpg_register_mod_clk(const struct rzg2l_mod_clk *mod,
 fail:
 	dev_err(dev, "Failed to register %s clock %s: %ld\n", "module",
 		mod->name, PTR_ERR(clk));
-	kfree(clock);
 }
 
 #define rcdev_to_priv(x)	container_of(x, struct rzg2l_cpg_priv, rcdev)
diff --git a/drivers/clk/renesas/renesas-rzg2l-cpg.h b/drivers/clk/renesas/rzg2l-cpg.h
index 63695280ce8b..63695280ce8b 100644
--- a/drivers/clk/renesas/renesas-rzg2l-cpg.h
+++ b/drivers/clk/renesas/rzg2l-cpg.h
diff --git a/drivers/clk/rockchip/clk-pll.c b/drivers/clk/rockchip/clk-pll.c
index fe937bcdb487..f7827b3b7fc1 100644
--- a/drivers/clk/rockchip/clk-pll.c
+++ b/drivers/clk/rockchip/clk-pll.c
@@ -940,7 +940,7 @@ struct clk *rockchip_clk_register_pll(struct rockchip_clk_provider *ctx,
 	switch (pll_type) {
 	case pll_rk3036:
 	case pll_rk3328:
-		if (!pll->rate_table || IS_ERR(ctx->grf))
+		if (!pll->rate_table)
 			init.ops = &rockchip_rk3036_pll_clk_norate_ops;
 		else
 			init.ops = &rockchip_rk3036_pll_clk_ops;
diff --git a/drivers/clk/rockchip/clk-rk3036.c b/drivers/clk/rockchip/clk-rk3036.c
index 614845cc5b4a..d644bc155ec6 100644
--- a/drivers/clk/rockchip/clk-rk3036.c
+++ b/drivers/clk/rockchip/clk-rk3036.c
@@ -121,6 +121,7 @@ PNAME(mux_pll_src_3plls_p)	= { "apll", "dpll", "gpll" };
 PNAME(mux_timer_p)		= { "xin24m", "pclk_peri_src" };
 
 PNAME(mux_pll_src_apll_dpll_gpll_usb480m_p)	= { "apll", "dpll", "gpll", "usb480m" };
+PNAME(mux_pll_src_dmyapll_dpll_gpll_xin24_p)   = { "dummy_apll", "dpll", "gpll", "xin24m" };
 
 PNAME(mux_mmc_src_p)	= { "apll", "dpll", "gpll", "xin24m" };
 PNAME(mux_i2s_pre_p)	= { "i2s_src", "i2s_frac", "ext_i2s", "xin12m" };
@@ -340,7 +341,7 @@ static struct rockchip_clk_branch rk3036_clk_branches[] __initdata = {
 			RK2928_CLKSEL_CON(16), 8, 2, MFLAGS, 10, 5, DFLAGS,
 			RK2928_CLKGATE_CON(10), 4, GFLAGS),
 
-	COMPOSITE(SCLK_SFC, "sclk_sfc", mux_pll_src_apll_dpll_gpll_usb480m_p, 0,
+	COMPOSITE(SCLK_SFC, "sclk_sfc", mux_pll_src_dmyapll_dpll_gpll_xin24_p, 0,
 			RK2928_CLKSEL_CON(16), 0, 2, MFLAGS, 2, 5, DFLAGS,
 			RK2928_CLKGATE_CON(10), 5, GFLAGS),
 
@@ -403,7 +404,7 @@ static struct rockchip_clk_branch rk3036_clk_branches[] __initdata = {
 	GATE(HCLK_OTG0, "hclk_otg0", "hclk_peri", CLK_IGNORE_UNUSED, RK2928_CLKGATE_CON(5), 13, GFLAGS),
 	GATE(HCLK_OTG1, "hclk_otg1", "hclk_peri", CLK_IGNORE_UNUSED, RK2928_CLKGATE_CON(7), 3, GFLAGS),
 	GATE(HCLK_I2S, "hclk_i2s", "hclk_peri", 0, RK2928_CLKGATE_CON(7), 2, GFLAGS),
-	GATE(0, "hclk_sfc", "hclk_peri", CLK_IGNORE_UNUSED, RK2928_CLKGATE_CON(3), 14, GFLAGS),
+	GATE(HCLK_SFC, "hclk_sfc", "hclk_peri", 0, RK2928_CLKGATE_CON(3), 14, GFLAGS),
 	GATE(HCLK_MAC, "hclk_mac", "hclk_peri", 0, RK2928_CLKGATE_CON(3), 5, GFLAGS),
 
 	/* pclk_peri gates */
diff --git a/drivers/clk/rockchip/clk-rk3308.c b/drivers/clk/rockchip/clk-rk3308.c
index 2c3bd0c749f2..db3396c3e6e9 100644
--- a/drivers/clk/rockchip/clk-rk3308.c
+++ b/drivers/clk/rockchip/clk-rk3308.c
@@ -911,6 +911,7 @@ static const char *const rk3308_critical_clocks[] __initconst = {
 	"hclk_audio",
 	"pclk_audio",
 	"sclk_ddrc",
+	"clk_ddrphy4x",
 };
 
 static void __init rk3308_clk_init(struct device_node *np)
diff --git a/drivers/clk/rockchip/clk.c b/drivers/clk/rockchip/clk.c
index 049e5e0b64f6..b7be7e11b0df 100644
--- a/drivers/clk/rockchip/clk.c
+++ b/drivers/clk/rockchip/clk.c
@@ -22,6 +22,8 @@
 #include <linux/regmap.h>
 #include <linux/reboot.h>
 #include <linux/rational.h>
+
+#include "../clk-fractional-divider.h"
 #include "clk.h"
 
 /*
@@ -178,10 +180,8 @@ static void rockchip_fractional_approximation(struct clk_hw *hw,
 		unsigned long rate, unsigned long *parent_rate,
 		unsigned long *m, unsigned long *n)
 {
-	struct clk_fractional_divider *fd = to_clk_fd(hw);
 	unsigned long p_rate, p_parent_rate;
 	struct clk_hw *p_parent;
-	unsigned long scale;
 
 	p_rate = clk_hw_get_rate(clk_hw_get_parent(hw));
 	if ((rate * 20 > p_rate) && (p_rate % rate != 0)) {
@@ -190,18 +190,7 @@ static void rockchip_fractional_approximation(struct clk_hw *hw,
 		*parent_rate = p_parent_rate;
 	}
 
-	/*
-	 * Get rate closer to *parent_rate to guarantee there is no overflow
-	 * for m and n. In the result it will be the nearest rate left shifted
-	 * by (scale - fd->nwidth) bits.
-	 */
-	scale = fls_long(*parent_rate / rate - 1);
-	if (scale > fd->nwidth)
-		rate <<= scale - fd->nwidth;
-
-	rational_best_approximation(rate, *parent_rate,
-			GENMASK(fd->mwidth - 1, 0), GENMASK(fd->nwidth - 1, 0),
-			m, n);
+	clk_fractional_divider_general_approximation(hw, rate, parent_rate, m, n);
 }
 
 static struct clk *rockchip_clk_register_frac_branch(
diff --git a/drivers/clk/socfpga/clk-agilex.c b/drivers/clk/socfpga/clk-agilex.c
index 1cb21ea79c64..242e94c0cf8a 100644
--- a/drivers/clk/socfpga/clk-agilex.c
+++ b/drivers/clk/socfpga/clk-agilex.c
@@ -107,10 +107,10 @@ static const struct clk_parent_data gpio_db_free_mux[] = {
 };
 
 static const struct clk_parent_data psi_ref_free_mux[] = {
-	{ .fw_name = "main_pll_c3",
-	  .name = "main_pll_c3", },
-	{ .fw_name = "peri_pll_c3",
-	  .name = "peri_pll_c3", },
+	{ .fw_name = "main_pll_c2",
+	  .name = "main_pll_c2", },
+	{ .fw_name = "peri_pll_c2",
+	  .name = "peri_pll_c2", },
 	{ .fw_name = "osc1",
 	  .name = "osc1", },
 	{ .fw_name = "cb-intosc-hs-div2-clk",
@@ -195,6 +195,13 @@ static const struct clk_parent_data sdmmc_mux[] = {
 	  .name = "boot_clk", },
 };
 
+static const struct clk_parent_data s2f_user0_mux[] = {
+	{ .fw_name = "s2f_user0_free_clk",
+	  .name = "s2f_user0_free_clk", },
+	{ .fw_name = "boot_clk",
+	  .name = "boot_clk", },
+};
+
 static const struct clk_parent_data s2f_user1_mux[] = {
 	{ .fw_name = "s2f_user1_free_clk",
 	  .name = "s2f_user1_free_clk", },
@@ -273,7 +280,7 @@ static const struct stratix10_perip_cnt_clock agilex_main_perip_cnt_clks[] = {
 	{ AGILEX_SDMMC_FREE_CLK, "sdmmc_free_clk", NULL, sdmmc_free_mux,
 	  ARRAY_SIZE(sdmmc_free_mux), 0, 0xE4, 0, 0, 0},
 	{ AGILEX_S2F_USER0_FREE_CLK, "s2f_user0_free_clk", NULL, s2f_usr0_free_mux,
-	  ARRAY_SIZE(s2f_usr0_free_mux), 0, 0xE8, 0, 0, 0},
+	  ARRAY_SIZE(s2f_usr0_free_mux), 0, 0xE8, 0, 0x30, 2},
 	{ AGILEX_S2F_USER1_FREE_CLK, "s2f_user1_free_clk", NULL, s2f_usr1_free_mux,
 	  ARRAY_SIZE(s2f_usr1_free_mux), 0, 0xEC, 0, 0x88, 5},
 	{ AGILEX_PSI_REF_FREE_CLK, "psi_ref_free_clk", NULL, psi_ref_free_mux,
@@ -319,6 +326,8 @@ static const struct stratix10_gate_clock agilex_gate_clks[] = {
 	  4, 0x98, 0, 16, 0x88, 3, 0},
 	{ AGILEX_SDMMC_CLK, "sdmmc_clk", NULL, sdmmc_mux, ARRAY_SIZE(sdmmc_mux), 0, 0x7C,
 	  5, 0, 0, 0, 0x88, 4, 4},
+	{ AGILEX_S2F_USER0_CLK, "s2f_user0_clk", NULL, s2f_user0_mux, ARRAY_SIZE(s2f_user0_mux), 0, 0x24,
+	  6, 0, 0, 0, 0x30, 2, 0},
 	{ AGILEX_S2F_USER1_CLK, "s2f_user1_clk", NULL, s2f_user1_mux, ARRAY_SIZE(s2f_user1_mux), 0, 0x7C,
 	  6, 0, 0, 0, 0x88, 5, 0},
 	{ AGILEX_PSI_REF_CLK, "psi_ref_clk", NULL, psi_mux, ARRAY_SIZE(psi_mux), 0, 0x7C,
diff --git a/drivers/clk/tegra/clk-dfll.c b/drivers/clk/tegra/clk-dfll.c
index a5f526bb0483..6144447f86c6 100644
--- a/drivers/clk/tegra/clk-dfll.c
+++ b/drivers/clk/tegra/clk-dfll.c
@@ -1377,7 +1377,7 @@ static void dfll_debug_init(struct tegra_dfll *td)
 }
 
 #else
-static void inline dfll_debug_init(struct tegra_dfll *td) { }
+static inline void dfll_debug_init(struct tegra_dfll *td) { }
 #endif /* CONFIG_DEBUG_FS */
 
 /*
diff --git a/drivers/clk/tegra/clk-tegra-periph.c b/drivers/clk/tegra/clk-tegra-periph.c
index 292d6269daf1..4dcf7f7cb8a0 100644
--- a/drivers/clk/tegra/clk-tegra-periph.c
+++ b/drivers/clk/tegra/clk-tegra-periph.c
@@ -777,11 +777,7 @@ static struct tegra_periph_init_data gate_clks[] = {
 	GATE("ahbdma", "hclk", 33, 0, tegra_clk_ahbdma, 0),
 	GATE("apbdma", "pclk", 34, 0, tegra_clk_apbdma, 0),
 	GATE("kbc", "clk_32k", 36, TEGRA_PERIPH_ON_APB | TEGRA_PERIPH_NO_RESET, tegra_clk_kbc, 0),
-	/*
-	 * Critical for RAM re-repair operation, which must occur on resume
-	 * from LP1 system suspend and as part of CCPLEX cluster switching.
-	 */
-	GATE("fuse", "clk_m", 39, TEGRA_PERIPH_ON_APB, tegra_clk_fuse, CLK_IS_CRITICAL),
+	GATE("fuse", "clk_m", 39, TEGRA_PERIPH_ON_APB, tegra_clk_fuse, 0),
 	GATE("fuse_burn", "clk_m", 39, TEGRA_PERIPH_ON_APB, tegra_clk_fuse_burn, 0),
 	GATE("kfuse", "clk_m", 40, TEGRA_PERIPH_ON_APB, tegra_clk_kfuse, 0),
 	GATE("apbif", "clk_m", 107, TEGRA_PERIPH_ON_APB, tegra_clk_apbif, 0),
diff --git a/drivers/clk/ux500/u8500_of_clk.c b/drivers/clk/ux500/u8500_of_clk.c
index 0aedd42fad52..528c5bb397cc 100644
--- a/drivers/clk/ux500/u8500_of_clk.c
+++ b/drivers/clk/ux500/u8500_of_clk.c
@@ -99,10 +99,11 @@ static void u8500_clk_init(struct device_node *np)
 	if (fw_version != NULL) {
 		switch (fw_version->project) {
 		case PRCMU_FW_PROJECT_U8500_C2:
-		case PRCMU_FW_PROJECT_U8500_MBL:
+		case PRCMU_FW_PROJECT_U8500_SSG1:
 		case PRCMU_FW_PROJECT_U8520:
 		case PRCMU_FW_PROJECT_U8420:
 		case PRCMU_FW_PROJECT_U8420_SYSCLK:
+		case PRCMU_FW_PROJECT_U8500_SSG2:
 			sgaclk_parent = "soc0_pll";
 			break;
 		default:
diff --git a/drivers/clk/x86/Makefile b/drivers/clk/x86/Makefile
index 18564efdc651..1244c4e568ff 100644
--- a/drivers/clk/x86/Makefile
+++ b/drivers/clk/x86/Makefile
@@ -1,6 +1,6 @@
 # SPDX-License-Identifier: GPL-2.0-only
 obj-$(CONFIG_PMC_ATOM)		+= clk-pmc-atom.o
 obj-$(CONFIG_X86_AMD_PLATFORM_DEVICE)	+= clk-fch.o
-clk-x86-lpss-objs		:= clk-lpt.o
+clk-x86-lpss-y			:= clk-lpss-atom.o
 obj-$(CONFIG_X86_INTEL_LPSS)	+= clk-x86-lpss.o
 obj-$(CONFIG_CLK_LGM_CGU)	+= clk-cgu.o clk-cgu-pll.o clk-lgm.o
diff --git a/drivers/clk/x86/clk-lpt.c b/drivers/clk/x86/clk-lpss-atom.c
index fbe9fd3ed948..aa9d0bb98f8b 100644
--- a/drivers/clk/x86/clk-lpt.c
+++ b/drivers/clk/x86/clk-lpss-atom.c
@@ -13,7 +13,7 @@
 #include <linux/platform_data/x86/clk-lpss.h>
 #include <linux/platform_device.h>
 
-static int lpt_clk_probe(struct platform_device *pdev)
+static int lpss_atom_clk_probe(struct platform_device *pdev)
 {
 	struct lpss_clk_data *drvdata;
 	struct clk *clk;
@@ -34,14 +34,14 @@ static int lpt_clk_probe(struct platform_device *pdev)
 	return 0;
 }
 
-static struct platform_driver lpt_clk_driver = {
+static struct platform_driver lpss_atom_clk_driver = {
 	.driver = {
-		.name = "clk-lpt",
+		.name = "clk-lpss-atom",
 	},
-	.probe = lpt_clk_probe,
+	.probe = lpss_atom_clk_probe,
 };
 
-int __init lpt_clk_init(void)
+int __init lpss_atom_clk_init(void)
 {
-	return platform_driver_register(&lpt_clk_driver);
+	return platform_driver_register(&lpss_atom_clk_driver);
 }
diff --git a/drivers/clk/zynqmp/clk-gate-zynqmp.c b/drivers/clk/zynqmp/clk-gate-zynqmp.c
index 695feaa82da5..565ed67a0430 100644
--- a/drivers/clk/zynqmp/clk-gate-zynqmp.c
+++ b/drivers/clk/zynqmp/clk-gate-zynqmp.c
@@ -12,7 +12,7 @@
 #include "clk-zynqmp.h"
 
 /**
- * struct clk_gate - gating clock
+ * struct zynqmp_clk_gate - gating clock
  * @hw:		handle between common and hardware-specific interfaces
  * @flags:	hardware-specific flags
  * @clk_id:	Id of clock
@@ -66,7 +66,7 @@ static void zynqmp_clk_gate_disable(struct clk_hw *hw)
 }
 
 /**
- * zynqmp_clk_gate_is_enable() - Check clock state
+ * zynqmp_clk_gate_is_enabled() - Check clock state
  * @hw:		handle between common and hardware-specific interfaces
  *
  * Return: 1 if enabled, 0 if disabled else error code
diff --git a/drivers/clk/zynqmp/clk-mux-zynqmp.c b/drivers/clk/zynqmp/clk-mux-zynqmp.c
index 157d4a960bf4..17afce594f28 100644
--- a/drivers/clk/zynqmp/clk-mux-zynqmp.c
+++ b/drivers/clk/zynqmp/clk-mux-zynqmp.c
@@ -159,7 +159,7 @@ struct clk_hw *zynqmp_clk_register_mux(const char *name, u32 clk_id,
 	hw = &mux->hw;
 	ret = clk_hw_register(NULL, hw);
 	if (ret) {
-		kfree(hw);
+		kfree(mux);
 		hw = ERR_PTR(ret);
 	}
 
diff --git a/drivers/clk/zynqmp/clk-zynqmp.h b/drivers/clk/zynqmp/clk-zynqmp.h
index 84fa80a969a9..60cbc0674a9e 100644
--- a/drivers/clk/zynqmp/clk-zynqmp.h
+++ b/drivers/clk/zynqmp/clk-zynqmp.h
@@ -56,6 +56,7 @@ enum topology_type {
  * @type:	Type of topology
  * @flag:	Topology flags
  * @type_flag:	Topology type specific flag
+ * @custom_type_flag: Topology type specific custom flag
  */
 struct clock_topology {
 	u32 type;
diff --git a/drivers/clk/zynqmp/clkc.c b/drivers/clk/zynqmp/clkc.c
index 871184e406e1..eb25303eefed 100644
--- a/drivers/clk/zynqmp/clkc.c
+++ b/drivers/clk/zynqmp/clkc.c
@@ -762,9 +762,7 @@ static int zynqmp_clk_setup(struct device_node *np)
 	zynqmp_register_clocks(np);
 
 	zynqmp_data->num = clock_max_idx;
-	of_clk_add_hw_provider(np, of_clk_hw_onecell_get, zynqmp_data);
-
-	return 0;
+	return of_clk_add_hw_provider(np, of_clk_hw_onecell_get, zynqmp_data);
 }
 
 static int zynqmp_clock_probe(struct platform_device *pdev)
diff --git a/drivers/clocksource/Kconfig b/drivers/clocksource/Kconfig
index eb661b539a3e..0f5e3983951a 100644
--- a/drivers/clocksource/Kconfig
+++ b/drivers/clocksource/Kconfig
@@ -234,8 +234,9 @@ config CLKSRC_LPC32XX
 	  Support for the LPC32XX clocksource.
 
 config CLKSRC_PISTACHIO
-	bool "Clocksource for Pistachio SoC" if COMPILE_TEST
+	bool "Clocksource for Pistachio SoC"
 	depends on HAS_IOMEM
+	depends on MIPS || COMPILE_TEST
 	select TIMER_OF
 	help
 	  Enables the clocksource for the Pistachio SoC.
diff --git a/drivers/cpufreq/Kconfig.arm b/drivers/cpufreq/Kconfig.arm
index a5c5f70acfc9..954749afb5fe 100644
--- a/drivers/cpufreq/Kconfig.arm
+++ b/drivers/cpufreq/Kconfig.arm
@@ -133,6 +133,18 @@ config ARM_MEDIATEK_CPUFREQ
 	help
 	  This adds the CPUFreq driver support for MediaTek SoCs.
 
+config ARM_MEDIATEK_CPUFREQ_HW
+	tristate "MediaTek CPUFreq HW driver"
+	depends on ARCH_MEDIATEK || COMPILE_TEST
+	default m
+	help
+	  Support for the CPUFreq HW driver.
+	  Some MediaTek chipsets have a HW engine to offload the steps
+	  necessary for changing the frequency of the CPUs. Firmware loaded
+	  in this engine exposes a programming interface to the OS.
+	  The driver implements the cpufreq interface for this HW engine.
+	  Say Y if you want to support CPUFreq HW.
+
 config ARM_OMAP2PLUS_CPUFREQ
 	bool "TI OMAP2+"
 	depends on ARCH_OMAP2PLUS
diff --git a/drivers/cpufreq/Makefile b/drivers/cpufreq/Makefile
index 27d3bd7ea9d4..48ee5859030c 100644
--- a/drivers/cpufreq/Makefile
+++ b/drivers/cpufreq/Makefile
@@ -56,6 +56,7 @@ obj-$(CONFIG_ARM_IMX6Q_CPUFREQ)		+= imx6q-cpufreq.o
 obj-$(CONFIG_ARM_IMX_CPUFREQ_DT)	+= imx-cpufreq-dt.o
 obj-$(CONFIG_ARM_KIRKWOOD_CPUFREQ)	+= kirkwood-cpufreq.o
 obj-$(CONFIG_ARM_MEDIATEK_CPUFREQ)	+= mediatek-cpufreq.o
+obj-$(CONFIG_ARM_MEDIATEK_CPUFREQ_HW)	+= mediatek-cpufreq-hw.o
 obj-$(CONFIG_MACH_MVEBU_V7)		+= mvebu-cpufreq.o
 obj-$(CONFIG_ARM_OMAP2PLUS_CPUFREQ)	+= omap-cpufreq.o
 obj-$(CONFIG_ARM_PXA2xx_CPUFREQ)	+= pxa2xx-cpufreq.o
diff --git a/drivers/cpufreq/acpi-cpufreq.c b/drivers/cpufreq/acpi-cpufreq.c
index b49612895c78..28467d83c745 100644
--- a/drivers/cpufreq/acpi-cpufreq.c
+++ b/drivers/cpufreq/acpi-cpufreq.c
@@ -889,6 +889,9 @@ static int acpi_cpufreq_cpu_init(struct cpufreq_policy *policy)
 	policy->fast_switch_possible = !acpi_pstate_strict &&
 		!(policy_is_shared(policy) && policy->shared_type != CPUFREQ_SHARED_TYPE_ANY);
 
+	if (perf->states[0].core_frequency * 1000 != freq_table[0].frequency)
+		pr_warn(FW_WARN "P-state 0 is not max freq\n");
+
 	return result;
 
 err_unreg:
@@ -918,16 +921,6 @@ static int acpi_cpufreq_cpu_exit(struct cpufreq_policy *policy)
 	return 0;
 }
 
-static void acpi_cpufreq_cpu_ready(struct cpufreq_policy *policy)
-{
-	struct acpi_processor_performance *perf = per_cpu_ptr(acpi_perf_data,
-							      policy->cpu);
-	unsigned int freq = policy->freq_table[0].frequency;
-
-	if (perf->states[0].core_frequency * 1000 != freq)
-		pr_warn(FW_WARN "P-state 0 is not max freq\n");
-}
-
 static int acpi_cpufreq_resume(struct cpufreq_policy *policy)
 {
 	struct acpi_cpufreq_data *data = policy->driver_data;
@@ -955,7 +948,6 @@ static struct cpufreq_driver acpi_cpufreq_driver = {
 	.bios_limit	= acpi_processor_get_bios_limit,
 	.init		= acpi_cpufreq_cpu_init,
 	.exit		= acpi_cpufreq_cpu_exit,
-	.ready		= acpi_cpufreq_cpu_ready,
 	.resume		= acpi_cpufreq_resume,
 	.name		= "acpi-cpufreq",
 	.attr		= acpi_cpufreq_attr,
diff --git a/drivers/cpufreq/cpufreq-dt-platdev.c b/drivers/cpufreq/cpufreq-dt-platdev.c
index 231e585f6ba2..ca1d103ec449 100644
--- a/drivers/cpufreq/cpufreq-dt-platdev.c
+++ b/drivers/cpufreq/cpufreq-dt-platdev.c
@@ -137,11 +137,15 @@ static const struct of_device_id blocklist[] __initconst = {
 	{ .compatible = "qcom,apq8096", },
 	{ .compatible = "qcom,msm8996", },
 	{ .compatible = "qcom,qcs404", },
+	{ .compatible = "qcom,sa8155p" },
 	{ .compatible = "qcom,sc7180", },
 	{ .compatible = "qcom,sc7280", },
 	{ .compatible = "qcom,sc8180x", },
 	{ .compatible = "qcom,sdm845", },
+	{ .compatible = "qcom,sm6350", },
 	{ .compatible = "qcom,sm8150", },
+	{ .compatible = "qcom,sm8250", },
+	{ .compatible = "qcom,sm8350", },
 
 	{ .compatible = "st,stih407", },
 	{ .compatible = "st,stih410", },
diff --git a/drivers/cpufreq/cpufreq-dt.c b/drivers/cpufreq/cpufreq-dt.c
index ece52863ba62..8fcaba541539 100644
--- a/drivers/cpufreq/cpufreq-dt.c
+++ b/drivers/cpufreq/cpufreq-dt.c
@@ -143,8 +143,6 @@ static int cpufreq_init(struct cpufreq_policy *policy)
 		cpufreq_dt_attr[1] = &cpufreq_freq_attr_scaling_boost_freqs;
 	}
 
-	dev_pm_opp_of_register_em(cpu_dev, policy->cpus);
-
 	return 0;
 
 out_clk_put:
@@ -184,6 +182,7 @@ static struct cpufreq_driver dt_cpufreq_driver = {
 	.exit = cpufreq_exit,
 	.online = cpufreq_online,
 	.offline = cpufreq_offline,
+	.register_em = cpufreq_register_em_with_opp,
 	.name = "cpufreq-dt",
 	.attr = cpufreq_dt_attr,
 	.suspend = cpufreq_generic_suspend,
diff --git a/drivers/cpufreq/cpufreq.c b/drivers/cpufreq/cpufreq.c
index 06c526d66dd3..5782b15a8caa 100644
--- a/drivers/cpufreq/cpufreq.c
+++ b/drivers/cpufreq/cpufreq.c
@@ -1491,6 +1491,19 @@ static int cpufreq_online(unsigned int cpu)
 		write_lock_irqsave(&cpufreq_driver_lock, flags);
 		list_add(&policy->policy_list, &cpufreq_policy_list);
 		write_unlock_irqrestore(&cpufreq_driver_lock, flags);
+
+		/*
+		 * Register with the energy model before
+		 * sched_cpufreq_governor_change() is called, which will result
+		 * in rebuilding of the sched domains, which should only be done
+		 * once the energy model is properly initialized for the policy
+		 * first.
+		 *
+		 * Also, this should be called before the policy is registered
+		 * with cooling framework.
+		 */
+		if (cpufreq_driver->register_em)
+			cpufreq_driver->register_em(policy);
 	}
 
 	ret = cpufreq_init_policy(policy);
@@ -1504,10 +1517,6 @@ static int cpufreq_online(unsigned int cpu)
 
 	kobject_uevent(&policy->kobj, KOBJ_ADD);
 
-	/* Callback for handling stuff after policy is ready */
-	if (cpufreq_driver->ready)
-		cpufreq_driver->ready(policy);
-
 	if (cpufreq_thermal_control_enabled(cpufreq_driver))
 		policy->cdev = of_cpufreq_cooling_register(policy);
 
diff --git a/drivers/cpufreq/imx6q-cpufreq.c b/drivers/cpufreq/imx6q-cpufreq.c
index 5bf5fc759881..90beb26ed34e 100644
--- a/drivers/cpufreq/imx6q-cpufreq.c
+++ b/drivers/cpufreq/imx6q-cpufreq.c
@@ -192,7 +192,6 @@ static int imx6q_cpufreq_init(struct cpufreq_policy *policy)
 	policy->clk = clks[ARM].clk;
 	cpufreq_generic_init(policy, freq_table, transition_latency);
 	policy->suspend_freq = max_freq;
-	dev_pm_opp_of_register_em(cpu_dev, policy->cpus);
 
 	return 0;
 }
@@ -204,6 +203,7 @@ static struct cpufreq_driver imx6q_cpufreq_driver = {
 	.target_index = imx6q_set_target,
 	.get = cpufreq_generic_get,
 	.init = imx6q_cpufreq_init,
+	.register_em = cpufreq_register_em_with_opp,
 	.name = "imx6q-cpufreq",
 	.attr = cpufreq_generic_attr,
 	.suspend = cpufreq_generic_suspend,
diff --git a/drivers/cpufreq/intel_pstate.c b/drivers/cpufreq/intel_pstate.c
index b4ffe6c8a0d0..1097f826ad70 100644
--- a/drivers/cpufreq/intel_pstate.c
+++ b/drivers/cpufreq/intel_pstate.c
@@ -32,7 +32,6 @@
 #include <asm/cpu_device_id.h>
 #include <asm/cpufeature.h>
 #include <asm/intel-family.h>
-#include "../drivers/thermal/intel/thermal_interrupt.h"
 
 #define INTEL_PSTATE_SAMPLING_INTERVAL	(10 * NSEC_PER_MSEC)
 
@@ -220,7 +219,6 @@ struct global_params {
  * @sched_flags:	Store scheduler flags for possible cross CPU update
  * @hwp_boost_min:	Last HWP boosted min performance
  * @suspended:		Whether or not the driver has been suspended.
- * @hwp_notify_work:	workqueue for HWP notifications.
  *
  * This structure stores per CPU instance data for all CPUs.
  */
@@ -259,7 +257,6 @@ struct cpudata {
 	unsigned int sched_flags;
 	u32 hwp_boost_min;
 	bool suspended;
-	struct delayed_work hwp_notify_work;
 };
 
 static struct cpudata **all_cpu_data;
@@ -271,6 +268,7 @@ static struct cpudata **all_cpu_data;
  * @get_min:		Callback to get minimum P state
  * @get_turbo:		Callback to get turbo P state
  * @get_scaling:	Callback to get frequency scaling factor
+ * @get_cpu_scaling:	Get frequency scaling factor for a given cpu
  * @get_aperf_mperf_shift: Callback to get the APERF vs MPERF frequency difference
  * @get_val:		Callback to convert P state to actual MSR write value
  * @get_vid:		Callback to get VID data for Atom platforms
@@ -284,6 +282,7 @@ struct pstate_funcs {
 	int (*get_min)(void);
 	int (*get_turbo)(void);
 	int (*get_scaling)(void);
+	int (*get_cpu_scaling)(int cpu);
 	int (*get_aperf_mperf_shift)(void);
 	u64 (*get_val)(struct cpudata*, int pstate);
 	void (*get_vid)(struct cpudata *);
@@ -387,6 +386,15 @@ static int intel_pstate_get_cppc_guaranteed(int cpu)
 	return cppc_perf.nominal_perf;
 }
 
+static u32 intel_pstate_cppc_nominal(int cpu)
+{
+	u64 nominal_perf;
+
+	if (cppc_get_nominal_perf(cpu, &nominal_perf))
+		return 0;
+
+	return nominal_perf;
+}
 #else /* CONFIG_ACPI_CPPC_LIB */
 static inline void intel_pstate_set_itmt_prio(int cpu)
 {
@@ -473,20 +481,6 @@ static void intel_pstate_exit_perf_limits(struct cpufreq_policy *policy)
 
 	acpi_processor_unregister_performance(policy->cpu);
 }
-
-static bool intel_pstate_cppc_perf_valid(u32 perf, struct cppc_perf_caps *caps)
-{
-	return perf && perf <= caps->highest_perf && perf >= caps->lowest_perf;
-}
-
-static bool intel_pstate_cppc_perf_caps(struct cpudata *cpu,
-					struct cppc_perf_caps *caps)
-{
-	if (cppc_get_perf_caps(cpu->cpu, caps))
-		return false;
-
-	return caps->highest_perf && caps->lowest_perf <= caps->highest_perf;
-}
 #else /* CONFIG_ACPI */
 static inline void intel_pstate_init_acpi_perf_limits(struct cpufreq_policy *policy)
 {
@@ -509,15 +503,8 @@ static inline int intel_pstate_get_cppc_guaranteed(int cpu)
 }
 #endif /* CONFIG_ACPI_CPPC_LIB */
 
-static void intel_pstate_hybrid_hwp_perf_ctl_parity(struct cpudata *cpu)
-{
-	pr_debug("CPU%d: Using PERF_CTL scaling for HWP\n", cpu->cpu);
-
-	cpu->pstate.scaling = cpu->pstate.perf_ctl_scaling;
-}
-
 /**
- * intel_pstate_hybrid_hwp_calibrate - Calibrate HWP performance levels.
+ * intel_pstate_hybrid_hwp_adjust - Calibrate HWP performance levels.
  * @cpu: Target CPU.
  *
  * On hybrid processors, HWP may expose more performance levels than there are
@@ -525,115 +512,46 @@ static void intel_pstate_hybrid_hwp_perf_ctl_parity(struct cpudata *cpu)
  * scaling factor between HWP performance levels and CPU frequency will be less
  * than the scaling factor between P-state values and CPU frequency.
  *
- * In that case, the scaling factor between HWP performance levels and CPU
- * frequency needs to be determined which can be done with the help of the
- * observation that certain HWP performance levels should correspond to certain
- * P-states, like for example the HWP highest performance should correspond
- * to the maximum turbo P-state of the CPU.
+ * In that case, adjust the CPU parameters used in computations accordingly.
  */
-static void intel_pstate_hybrid_hwp_calibrate(struct cpudata *cpu)
+static void intel_pstate_hybrid_hwp_adjust(struct cpudata *cpu)
 {
 	int perf_ctl_max_phys = cpu->pstate.max_pstate_physical;
 	int perf_ctl_scaling = cpu->pstate.perf_ctl_scaling;
 	int perf_ctl_turbo = pstate_funcs.get_turbo();
 	int turbo_freq = perf_ctl_turbo * perf_ctl_scaling;
-	int perf_ctl_max = pstate_funcs.get_max();
-	int max_freq = perf_ctl_max * perf_ctl_scaling;
-	int scaling = INT_MAX;
-	int freq;
+	int scaling = cpu->pstate.scaling;
 
 	pr_debug("CPU%d: perf_ctl_max_phys = %d\n", cpu->cpu, perf_ctl_max_phys);
-	pr_debug("CPU%d: perf_ctl_max = %d\n", cpu->cpu, perf_ctl_max);
+	pr_debug("CPU%d: perf_ctl_max = %d\n", cpu->cpu, pstate_funcs.get_max());
 	pr_debug("CPU%d: perf_ctl_turbo = %d\n", cpu->cpu, perf_ctl_turbo);
 	pr_debug("CPU%d: perf_ctl_scaling = %d\n", cpu->cpu, perf_ctl_scaling);
-
 	pr_debug("CPU%d: HWP_CAP guaranteed = %d\n", cpu->cpu, cpu->pstate.max_pstate);
 	pr_debug("CPU%d: HWP_CAP highest = %d\n", cpu->cpu, cpu->pstate.turbo_pstate);
-
-#ifdef CONFIG_ACPI
-	if (IS_ENABLED(CONFIG_ACPI_CPPC_LIB)) {
-		struct cppc_perf_caps caps;
-
-		if (intel_pstate_cppc_perf_caps(cpu, &caps)) {
-			if (intel_pstate_cppc_perf_valid(caps.nominal_perf, &caps)) {
-				pr_debug("CPU%d: Using CPPC nominal\n", cpu->cpu);
-
-				/*
-				 * If the CPPC nominal performance is valid, it
-				 * can be assumed to correspond to cpu_khz.
-				 */
-				if (caps.nominal_perf == perf_ctl_max_phys) {
-					intel_pstate_hybrid_hwp_perf_ctl_parity(cpu);
-					return;
-				}
-				scaling = DIV_ROUND_UP(cpu_khz, caps.nominal_perf);
-			} else if (intel_pstate_cppc_perf_valid(caps.guaranteed_perf, &caps)) {
-				pr_debug("CPU%d: Using CPPC guaranteed\n", cpu->cpu);
-
-				/*
-				 * If the CPPC guaranteed performance is valid,
-				 * it can be assumed to correspond to max_freq.
-				 */
-				if (caps.guaranteed_perf == perf_ctl_max) {
-					intel_pstate_hybrid_hwp_perf_ctl_parity(cpu);
-					return;
-				}
-				scaling = DIV_ROUND_UP(max_freq, caps.guaranteed_perf);
-			}
-		}
-	}
-#endif
-	/*
-	 * If using the CPPC data to compute the HWP-to-frequency scaling factor
-	 * doesn't work, use the HWP_CAP gauranteed perf for this purpose with
-	 * the assumption that it corresponds to max_freq.
-	 */
-	if (scaling > perf_ctl_scaling) {
-		pr_debug("CPU%d: Using HWP_CAP guaranteed\n", cpu->cpu);
-
-		if (cpu->pstate.max_pstate == perf_ctl_max) {
-			intel_pstate_hybrid_hwp_perf_ctl_parity(cpu);
-			return;
-		}
-		scaling = DIV_ROUND_UP(max_freq, cpu->pstate.max_pstate);
-		if (scaling > perf_ctl_scaling) {
-			/*
-			 * This should not happen, because it would mean that
-			 * the number of HWP perf levels was less than the
-			 * number of P-states, so use the PERF_CTL scaling in
-			 * that case.
-			 */
-			pr_debug("CPU%d: scaling (%d) out of range\n", cpu->cpu,
-				scaling);
-
-			intel_pstate_hybrid_hwp_perf_ctl_parity(cpu);
-			return;
-		}
-	}
+	pr_debug("CPU%d: HWP-to-frequency scaling factor: %d\n", cpu->cpu, scaling);
 
 	/*
-	 * If the product of the HWP performance scaling factor obtained above
-	 * and the HWP_CAP highest performance is greater than the maximum turbo
-	 * frequency corresponding to the pstate_funcs.get_turbo() return value,
-	 * the scaling factor is too high, so recompute it so that the HWP_CAP
-	 * highest performance corresponds to the maximum turbo frequency.
+	 * If the product of the HWP performance scaling factor and the HWP_CAP
+	 * highest performance is greater than the maximum turbo frequency
+	 * corresponding to the pstate_funcs.get_turbo() return value, the
+	 * scaling factor is too high, so recompute it to make the HWP_CAP
+	 * highest performance correspond to the maximum turbo frequency.
 	 */
 	if (turbo_freq < cpu->pstate.turbo_pstate * scaling) {
-		pr_debug("CPU%d: scaling too high (%d)\n", cpu->cpu, scaling);
-
 		cpu->pstate.turbo_freq = turbo_freq;
 		scaling = DIV_ROUND_UP(turbo_freq, cpu->pstate.turbo_pstate);
-	}
-
-	cpu->pstate.scaling = scaling;
+		cpu->pstate.scaling = scaling;
 
-	pr_debug("CPU%d: HWP-to-frequency scaling factor: %d\n", cpu->cpu, scaling);
+		pr_debug("CPU%d: refined HWP-to-frequency scaling factor: %d\n",
+			 cpu->cpu, scaling);
+	}
 
 	cpu->pstate.max_freq = rounddown(cpu->pstate.max_pstate * scaling,
 					 perf_ctl_scaling);
 
-	freq = perf_ctl_max_phys * perf_ctl_scaling;
-	cpu->pstate.max_pstate_physical = DIV_ROUND_UP(freq, scaling);
+	cpu->pstate.max_pstate_physical =
+			DIV_ROUND_UP(perf_ctl_max_phys * perf_ctl_scaling,
+				     scaling);
 
 	cpu->pstate.min_freq = cpu->pstate.min_pstate * perf_ctl_scaling;
 	/*
@@ -1628,40 +1546,6 @@ static void intel_pstate_sysfs_hide_hwp_dynamic_boost(void)
 
 /************************** sysfs end ************************/
 
-static void intel_pstate_notify_work(struct work_struct *work)
-{
-	mutex_lock(&intel_pstate_driver_lock);
-	cpufreq_update_policy(smp_processor_id());
-	wrmsrl(MSR_HWP_STATUS, 0);
-	mutex_unlock(&intel_pstate_driver_lock);
-}
-
-void notify_hwp_interrupt(void)
-{
-	unsigned int this_cpu = smp_processor_id();
-	struct cpudata *cpudata;
-	u64 value;
-
-	if (!hwp_active || !boot_cpu_has(X86_FEATURE_HWP_NOTIFY))
-		return;
-
-	rdmsrl(MSR_HWP_STATUS, value);
-	if (!(value & 0x01))
-		return;
-
-	cpudata = all_cpu_data[this_cpu];
-	schedule_delayed_work_on(this_cpu, &cpudata->hwp_notify_work, msecs_to_jiffies(10));
-}
-
-static void intel_pstate_enable_hwp_interrupt(struct cpudata *cpudata)
-{
-	/* Enable HWP notification interrupt for guaranteed performance change */
-	if (boot_cpu_has(X86_FEATURE_HWP_NOTIFY)) {
-		INIT_DELAYED_WORK(&cpudata->hwp_notify_work, intel_pstate_notify_work);
-		wrmsrl_on_cpu(cpudata->cpu, MSR_HWP_INTERRUPT, 0x01);
-	}
-}
-
 static void intel_pstate_hwp_enable(struct cpudata *cpudata)
 {
 	/* First disable HWP notification interrupt as we don't process them */
@@ -1671,8 +1555,6 @@ static void intel_pstate_hwp_enable(struct cpudata *cpudata)
 	wrmsrl_on_cpu(cpudata->cpu, MSR_PM_ENABLE, 0x1);
 	if (cpudata->epp_default == -EINVAL)
 		cpudata->epp_default = intel_pstate_get_epp(cpudata, 0);
-
-	intel_pstate_enable_hwp_interrupt(cpudata);
 }
 
 static int atom_get_min_pstate(void)
@@ -1900,6 +1782,38 @@ static int knl_get_turbo_pstate(void)
 	return ret;
 }
 
+#ifdef CONFIG_ACPI_CPPC_LIB
+static u32 hybrid_ref_perf;
+
+static int hybrid_get_cpu_scaling(int cpu)
+{
+	return DIV_ROUND_UP(core_get_scaling() * hybrid_ref_perf,
+			    intel_pstate_cppc_nominal(cpu));
+}
+
+static void intel_pstate_cppc_set_cpu_scaling(void)
+{
+	u32 min_nominal_perf = U32_MAX;
+	int cpu;
+
+	for_each_present_cpu(cpu) {
+		u32 nominal_perf = intel_pstate_cppc_nominal(cpu);
+
+		if (nominal_perf && nominal_perf < min_nominal_perf)
+			min_nominal_perf = nominal_perf;
+	}
+
+	if (min_nominal_perf < U32_MAX) {
+		hybrid_ref_perf = min_nominal_perf;
+		pstate_funcs.get_cpu_scaling = hybrid_get_cpu_scaling;
+	}
+}
+#else
+static inline void intel_pstate_cppc_set_cpu_scaling(void)
+{
+}
+#endif /* CONFIG_ACPI_CPPC_LIB */
+
 static void intel_pstate_set_pstate(struct cpudata *cpu, int pstate)
 {
 	trace_cpu_frequency(pstate * cpu->pstate.scaling, cpu->cpu);
@@ -1928,10 +1842,8 @@ static void intel_pstate_max_within_limits(struct cpudata *cpu)
 
 static void intel_pstate_get_cpu_pstates(struct cpudata *cpu)
 {
-	bool hybrid_cpu = boot_cpu_has(X86_FEATURE_HYBRID_CPU);
 	int perf_ctl_max_phys = pstate_funcs.get_max_physical();
-	int perf_ctl_scaling = hybrid_cpu ? cpu_khz / perf_ctl_max_phys :
-					    pstate_funcs.get_scaling();
+	int perf_ctl_scaling = pstate_funcs.get_scaling();
 
 	cpu->pstate.min_pstate = pstate_funcs.get_min();
 	cpu->pstate.max_pstate_physical = perf_ctl_max_phys;
@@ -1940,10 +1852,13 @@ static void intel_pstate_get_cpu_pstates(struct cpudata *cpu)
 	if (hwp_active && !hwp_mode_bdw) {
 		__intel_pstate_get_hwp_cap(cpu);
 
-		if (hybrid_cpu)
-			intel_pstate_hybrid_hwp_calibrate(cpu);
-		else
+		if (pstate_funcs.get_cpu_scaling) {
+			cpu->pstate.scaling = pstate_funcs.get_cpu_scaling(cpu->cpu);
+			if (cpu->pstate.scaling != perf_ctl_scaling)
+				intel_pstate_hybrid_hwp_adjust(cpu);
+		} else {
 			cpu->pstate.scaling = perf_ctl_scaling;
+		}
 	} else {
 		cpu->pstate.scaling = perf_ctl_scaling;
 		cpu->pstate.max_pstate = pstate_funcs.get_max();
@@ -3315,6 +3230,9 @@ static int __init intel_pstate_init(void)
 			if (!default_driver)
 				default_driver = &intel_pstate;
 
+			if (boot_cpu_has(X86_FEATURE_HYBRID_CPU))
+				intel_pstate_cppc_set_cpu_scaling();
+
 			goto hwp_cpu_matched;
 		}
 	} else {
diff --git a/drivers/cpufreq/mediatek-cpufreq-hw.c b/drivers/cpufreq/mediatek-cpufreq-hw.c
new file mode 100644
index 000000000000..0cf18dd46b92
--- /dev/null
+++ b/drivers/cpufreq/mediatek-cpufreq-hw.c
@@ -0,0 +1,308 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Copyright (c) 2020 MediaTek Inc.
+ */
+
+#include <linux/bitfield.h>
+#include <linux/cpufreq.h>
+#include <linux/energy_model.h>
+#include <linux/init.h>
+#include <linux/iopoll.h>
+#include <linux/kernel.h>
+#include <linux/module.h>
+#include <linux/of_address.h>
+#include <linux/of_platform.h>
+#include <linux/slab.h>
+
+#define LUT_MAX_ENTRIES			32U
+#define LUT_FREQ			GENMASK(11, 0)
+#define LUT_ROW_SIZE			0x4
+#define CPUFREQ_HW_STATUS		BIT(0)
+#define SVS_HW_STATUS			BIT(1)
+#define POLL_USEC			1000
+#define TIMEOUT_USEC			300000
+
+enum {
+	REG_FREQ_LUT_TABLE,
+	REG_FREQ_ENABLE,
+	REG_FREQ_PERF_STATE,
+	REG_FREQ_HW_STATE,
+	REG_EM_POWER_TBL,
+	REG_FREQ_LATENCY,
+
+	REG_ARRAY_SIZE,
+};
+
+struct mtk_cpufreq_data {
+	struct cpufreq_frequency_table *table;
+	void __iomem *reg_bases[REG_ARRAY_SIZE];
+	int nr_opp;
+};
+
+static const u16 cpufreq_mtk_offsets[REG_ARRAY_SIZE] = {
+	[REG_FREQ_LUT_TABLE]	= 0x0,
+	[REG_FREQ_ENABLE]	= 0x84,
+	[REG_FREQ_PERF_STATE]	= 0x88,
+	[REG_FREQ_HW_STATE]	= 0x8c,
+	[REG_EM_POWER_TBL]	= 0x90,
+	[REG_FREQ_LATENCY]	= 0x110,
+};
+
+static int __maybe_unused
+mtk_cpufreq_get_cpu_power(unsigned long *mW,
+			  unsigned long *KHz, struct device *cpu_dev)
+{
+	struct mtk_cpufreq_data *data;
+	struct cpufreq_policy *policy;
+	int i;
+
+	policy = cpufreq_cpu_get_raw(cpu_dev->id);
+	if (!policy)
+		return 0;
+
+	data = policy->driver_data;
+
+	for (i = 0; i < data->nr_opp; i++) {
+		if (data->table[i].frequency < *KHz)
+			break;
+	}
+	i--;
+
+	*KHz = data->table[i].frequency;
+	*mW = readl_relaxed(data->reg_bases[REG_EM_POWER_TBL] +
+			    i * LUT_ROW_SIZE) / 1000;
+
+	return 0;
+}
+
+static int mtk_cpufreq_hw_target_index(struct cpufreq_policy *policy,
+				       unsigned int index)
+{
+	struct mtk_cpufreq_data *data = policy->driver_data;
+
+	writel_relaxed(index, data->reg_bases[REG_FREQ_PERF_STATE]);
+
+	return 0;
+}
+
+static unsigned int mtk_cpufreq_hw_get(unsigned int cpu)
+{
+	struct mtk_cpufreq_data *data;
+	struct cpufreq_policy *policy;
+	unsigned int index;
+
+	policy = cpufreq_cpu_get_raw(cpu);
+	if (!policy)
+		return 0;
+
+	data = policy->driver_data;
+
+	index = readl_relaxed(data->reg_bases[REG_FREQ_PERF_STATE]);
+	index = min(index, LUT_MAX_ENTRIES - 1);
+
+	return data->table[index].frequency;
+}
+
+static unsigned int mtk_cpufreq_hw_fast_switch(struct cpufreq_policy *policy,
+					       unsigned int target_freq)
+{
+	struct mtk_cpufreq_data *data = policy->driver_data;
+	unsigned int index;
+
+	index = cpufreq_table_find_index_dl(policy, target_freq);
+
+	writel_relaxed(index, data->reg_bases[REG_FREQ_PERF_STATE]);
+
+	return policy->freq_table[index].frequency;
+}
+
+static int mtk_cpu_create_freq_table(struct platform_device *pdev,
+				     struct mtk_cpufreq_data *data)
+{
+	struct device *dev = &pdev->dev;
+	u32 temp, i, freq, prev_freq = 0;
+	void __iomem *base_table;
+
+	data->table = devm_kcalloc(dev, LUT_MAX_ENTRIES + 1,
+				   sizeof(*data->table), GFP_KERNEL);
+	if (!data->table)
+		return -ENOMEM;
+
+	base_table = data->reg_bases[REG_FREQ_LUT_TABLE];
+
+	for (i = 0; i < LUT_MAX_ENTRIES; i++) {
+		temp = readl_relaxed(base_table + (i * LUT_ROW_SIZE));
+		freq = FIELD_GET(LUT_FREQ, temp) * 1000;
+
+		if (freq == prev_freq)
+			break;
+
+		data->table[i].frequency = freq;
+
+		dev_dbg(dev, "index=%d freq=%d\n", i, data->table[i].frequency);
+
+		prev_freq = freq;
+	}
+
+	data->table[i].frequency = CPUFREQ_TABLE_END;
+	data->nr_opp = i;
+
+	return 0;
+}
+
+static int mtk_cpu_resources_init(struct platform_device *pdev,
+				  struct cpufreq_policy *policy,
+				  const u16 *offsets)
+{
+	struct mtk_cpufreq_data *data;
+	struct device *dev = &pdev->dev;
+	void __iomem *base;
+	int ret, i;
+	int index;
+
+	data = devm_kzalloc(dev, sizeof(*data), GFP_KERNEL);
+	if (!data)
+		return -ENOMEM;
+
+	index = of_perf_domain_get_sharing_cpumask(policy->cpu, "performance-domains",
+						   "#performance-domain-cells",
+						   policy->cpus);
+	if (index < 0)
+		return index;
+
+	base = devm_platform_ioremap_resource(pdev, index);
+	if (IS_ERR(base))
+		return PTR_ERR(base);
+
+	for (i = REG_FREQ_LUT_TABLE; i < REG_ARRAY_SIZE; i++)
+		data->reg_bases[i] = base + offsets[i];
+
+	ret = mtk_cpu_create_freq_table(pdev, data);
+	if (ret) {
+		dev_info(dev, "Domain-%d failed to create freq table\n", index);
+		return ret;
+	}
+
+	policy->freq_table = data->table;
+	policy->driver_data = data;
+
+	return 0;
+}
+
+static int mtk_cpufreq_hw_cpu_init(struct cpufreq_policy *policy)
+{
+	struct platform_device *pdev = cpufreq_get_driver_data();
+	int sig, pwr_hw = CPUFREQ_HW_STATUS | SVS_HW_STATUS;
+	struct mtk_cpufreq_data *data;
+	unsigned int latency;
+	int ret;
+
+	/* Get the bases of cpufreq for domains */
+	ret = mtk_cpu_resources_init(pdev, policy, platform_get_drvdata(pdev));
+	if (ret) {
+		dev_info(&pdev->dev, "CPUFreq resource init failed\n");
+		return ret;
+	}
+
+	data = policy->driver_data;
+
+	latency = readl_relaxed(data->reg_bases[REG_FREQ_LATENCY]) * 1000;
+	if (!latency)
+		latency = CPUFREQ_ETERNAL;
+
+	policy->cpuinfo.transition_latency = latency;
+	policy->fast_switch_possible = true;
+
+	/* HW should be in enabled state to proceed now */
+	writel_relaxed(0x1, data->reg_bases[REG_FREQ_ENABLE]);
+	if (readl_poll_timeout(data->reg_bases[REG_FREQ_HW_STATE], sig,
+			       (sig & pwr_hw) == pwr_hw, POLL_USEC,
+			       TIMEOUT_USEC)) {
+		if (!(sig & CPUFREQ_HW_STATUS)) {
+			pr_info("cpufreq hardware of CPU%d is not enabled\n",
+				policy->cpu);
+			return -ENODEV;
+		}
+
+		pr_info("SVS of CPU%d is not enabled\n", policy->cpu);
+	}
+
+	return 0;
+}
+
+static int mtk_cpufreq_hw_cpu_exit(struct cpufreq_policy *policy)
+{
+	struct mtk_cpufreq_data *data = policy->driver_data;
+
+	/* HW should be in paused state now */
+	writel_relaxed(0x0, data->reg_bases[REG_FREQ_ENABLE]);
+
+	return 0;
+}
+
+static void mtk_cpufreq_register_em(struct cpufreq_policy *policy)
+{
+	struct em_data_callback em_cb = EM_DATA_CB(mtk_cpufreq_get_cpu_power);
+	struct mtk_cpufreq_data *data = policy->driver_data;
+
+	em_dev_register_perf_domain(get_cpu_device(policy->cpu), data->nr_opp,
+				    &em_cb, policy->cpus, true);
+}
+
+static struct cpufreq_driver cpufreq_mtk_hw_driver = {
+	.flags		= CPUFREQ_NEED_INITIAL_FREQ_CHECK |
+			  CPUFREQ_HAVE_GOVERNOR_PER_POLICY |
+			  CPUFREQ_IS_COOLING_DEV,
+	.verify		= cpufreq_generic_frequency_table_verify,
+	.target_index	= mtk_cpufreq_hw_target_index,
+	.get		= mtk_cpufreq_hw_get,
+	.init		= mtk_cpufreq_hw_cpu_init,
+	.exit		= mtk_cpufreq_hw_cpu_exit,
+	.register_em	= mtk_cpufreq_register_em,
+	.fast_switch	= mtk_cpufreq_hw_fast_switch,
+	.name		= "mtk-cpufreq-hw",
+	.attr		= cpufreq_generic_attr,
+};
+
+static int mtk_cpufreq_hw_driver_probe(struct platform_device *pdev)
+{
+	const void *data;
+	int ret;
+
+	data = of_device_get_match_data(&pdev->dev);
+	if (!data)
+		return -EINVAL;
+
+	platform_set_drvdata(pdev, (void *) data);
+	cpufreq_mtk_hw_driver.driver_data = pdev;
+
+	ret = cpufreq_register_driver(&cpufreq_mtk_hw_driver);
+	if (ret)
+		dev_err(&pdev->dev, "CPUFreq HW driver failed to register\n");
+
+	return ret;
+}
+
+static int mtk_cpufreq_hw_driver_remove(struct platform_device *pdev)
+{
+	return cpufreq_unregister_driver(&cpufreq_mtk_hw_driver);
+}
+
+static const struct of_device_id mtk_cpufreq_hw_match[] = {
+	{ .compatible = "mediatek,cpufreq-hw", .data = &cpufreq_mtk_offsets },
+	{}
+};
+
+static struct platform_driver mtk_cpufreq_hw_driver = {
+	.probe = mtk_cpufreq_hw_driver_probe,
+	.remove = mtk_cpufreq_hw_driver_remove,
+	.driver = {
+		.name = "mtk-cpufreq-hw",
+		.of_match_table = mtk_cpufreq_hw_match,
+	},
+};
+module_platform_driver(mtk_cpufreq_hw_driver);
+
+MODULE_AUTHOR("Hector Yuan <hector.yuan@mediatek.com>");
+MODULE_DESCRIPTION("Mediatek cpufreq-hw driver");
+MODULE_LICENSE("GPL v2");
diff --git a/drivers/cpufreq/mediatek-cpufreq.c b/drivers/cpufreq/mediatek-cpufreq.c
index 87019d5a9547..866163883b48 100644
--- a/drivers/cpufreq/mediatek-cpufreq.c
+++ b/drivers/cpufreq/mediatek-cpufreq.c
@@ -448,8 +448,6 @@ static int mtk_cpufreq_init(struct cpufreq_policy *policy)
 	policy->driver_data = info;
 	policy->clk = info->cpu_clk;
 
-	dev_pm_opp_of_register_em(info->cpu_dev, policy->cpus);
-
 	return 0;
 }
 
@@ -471,6 +469,7 @@ static struct cpufreq_driver mtk_cpufreq_driver = {
 	.get = cpufreq_generic_get,
 	.init = mtk_cpufreq_init,
 	.exit = mtk_cpufreq_exit,
+	.register_em = cpufreq_register_em_with_opp,
 	.name = "mtk-cpufreq",
 	.attr = cpufreq_generic_attr,
 };
diff --git a/drivers/cpufreq/omap-cpufreq.c b/drivers/cpufreq/omap-cpufreq.c
index e035ee216b0f..1b50df06c6bc 100644
--- a/drivers/cpufreq/omap-cpufreq.c
+++ b/drivers/cpufreq/omap-cpufreq.c
@@ -131,7 +131,6 @@ static int omap_cpu_init(struct cpufreq_policy *policy)
 
 	/* FIXME: what's the actual transition time? */
 	cpufreq_generic_init(policy, freq_table, 300 * 1000);
-	dev_pm_opp_of_register_em(mpu_dev, policy->cpus);
 
 	return 0;
 }
@@ -150,6 +149,7 @@ static struct cpufreq_driver omap_driver = {
 	.get		= cpufreq_generic_get,
 	.init		= omap_cpu_init,
 	.exit		= omap_cpu_exit,
+	.register_em	= cpufreq_register_em_with_opp,
 	.name		= "omap",
 	.attr		= cpufreq_generic_attr,
 };
diff --git a/drivers/cpufreq/powernv-cpufreq.c b/drivers/cpufreq/powernv-cpufreq.c
index 23a06cba392c..5a2cf5f91ccb 100644
--- a/drivers/cpufreq/powernv-cpufreq.c
+++ b/drivers/cpufreq/powernv-cpufreq.c
@@ -36,6 +36,7 @@
 #define MAX_PSTATE_SHIFT	32
 #define LPSTATE_SHIFT		48
 #define GPSTATE_SHIFT		56
+#define MAX_NR_CHIPS		32
 
 #define MAX_RAMP_DOWN_TIME				5120
 /*
@@ -1046,12 +1047,20 @@ static int init_chip_info(void)
 	unsigned int *chip;
 	unsigned int cpu, i;
 	unsigned int prev_chip_id = UINT_MAX;
+	cpumask_t *chip_cpu_mask;
 	int ret = 0;
 
 	chip = kcalloc(num_possible_cpus(), sizeof(*chip), GFP_KERNEL);
 	if (!chip)
 		return -ENOMEM;
 
+	/* Allocate a chip cpu mask large enough to fit mask for all chips */
+	chip_cpu_mask = kcalloc(MAX_NR_CHIPS, sizeof(cpumask_t), GFP_KERNEL);
+	if (!chip_cpu_mask) {
+		ret = -ENOMEM;
+		goto free_and_return;
+	}
+
 	for_each_possible_cpu(cpu) {
 		unsigned int id = cpu_to_chip_id(cpu);
 
@@ -1059,22 +1068,25 @@ static int init_chip_info(void)
 			prev_chip_id = id;
 			chip[nr_chips++] = id;
 		}
+		cpumask_set_cpu(cpu, &chip_cpu_mask[nr_chips-1]);
 	}
 
 	chips = kcalloc(nr_chips, sizeof(struct chip), GFP_KERNEL);
 	if (!chips) {
 		ret = -ENOMEM;
-		goto free_and_return;
+		goto out_free_chip_cpu_mask;
 	}
 
 	for (i = 0; i < nr_chips; i++) {
 		chips[i].id = chip[i];
-		cpumask_copy(&chips[i].mask, cpumask_of_node(chip[i]));
+		cpumask_copy(&chips[i].mask, &chip_cpu_mask[i]);
 		INIT_WORK(&chips[i].throttle, powernv_cpufreq_work_fn);
 		for_each_cpu(cpu, &chips[i].mask)
 			per_cpu(chip_info, cpu) =  &chips[i];
 	}
 
+out_free_chip_cpu_mask:
+	kfree(chip_cpu_mask);
 free_and_return:
 	kfree(chip);
 	return ret;
diff --git a/drivers/cpufreq/qcom-cpufreq-hw.c b/drivers/cpufreq/qcom-cpufreq-hw.c
index f86859bf76f1..a2be0df7e174 100644
--- a/drivers/cpufreq/qcom-cpufreq-hw.c
+++ b/drivers/cpufreq/qcom-cpufreq-hw.c
@@ -7,12 +7,14 @@
 #include <linux/cpufreq.h>
 #include <linux/init.h>
 #include <linux/interconnect.h>
+#include <linux/interrupt.h>
 #include <linux/kernel.h>
 #include <linux/module.h>
 #include <linux/of_address.h>
 #include <linux/of_platform.h>
 #include <linux/pm_opp.h>
 #include <linux/slab.h>
+#include <linux/spinlock.h>
 
 #define LUT_MAX_ENTRIES			40U
 #define LUT_SRC				GENMASK(31, 30)
@@ -22,10 +24,13 @@
 #define CLK_HW_DIV			2
 #define LUT_TURBO_IND			1
 
+#define HZ_PER_KHZ			1000
+
 struct qcom_cpufreq_soc_data {
 	u32 reg_enable;
 	u32 reg_freq_lut;
 	u32 reg_volt_lut;
+	u32 reg_current_vote;
 	u32 reg_perf_state;
 	u8 lut_row_size;
 };
@@ -34,6 +39,16 @@ struct qcom_cpufreq_data {
 	void __iomem *base;
 	struct resource *res;
 	const struct qcom_cpufreq_soc_data *soc_data;
+
+	/*
+	 * Mutex to synchronize between de-init sequence and re-starting LMh
+	 * polling/interrupts
+	 */
+	struct mutex throttle_lock;
+	int throttle_irq;
+	bool cancel_throttle;
+	struct delayed_work throttle_work;
+	struct cpufreq_policy *policy;
 };
 
 static unsigned long cpu_hw_rate, xo_rate;
@@ -251,10 +266,92 @@ static void qcom_get_related_cpus(int index, struct cpumask *m)
 	}
 }
 
+static unsigned int qcom_lmh_get_throttle_freq(struct qcom_cpufreq_data *data)
+{
+	unsigned int val = readl_relaxed(data->base + data->soc_data->reg_current_vote);
+
+	return (val & 0x3FF) * 19200;
+}
+
+static void qcom_lmh_dcvs_notify(struct qcom_cpufreq_data *data)
+{
+	unsigned long max_capacity, capacity, freq_hz, throttled_freq;
+	struct cpufreq_policy *policy = data->policy;
+	int cpu = cpumask_first(policy->cpus);
+	struct device *dev = get_cpu_device(cpu);
+	struct dev_pm_opp *opp;
+	unsigned int freq;
+
+	/*
+	 * Get the h/w throttled frequency, normalize it using the
+	 * registered opp table and use it to calculate thermal pressure.
+	 */
+	freq = qcom_lmh_get_throttle_freq(data);
+	freq_hz = freq * HZ_PER_KHZ;
+
+	opp = dev_pm_opp_find_freq_floor(dev, &freq_hz);
+	if (IS_ERR(opp) && PTR_ERR(opp) == -ERANGE)
+		dev_pm_opp_find_freq_ceil(dev, &freq_hz);
+
+	throttled_freq = freq_hz / HZ_PER_KHZ;
+
+	/* Update thermal pressure */
+
+	max_capacity = arch_scale_cpu_capacity(cpu);
+	capacity = mult_frac(max_capacity, throttled_freq, policy->cpuinfo.max_freq);
+
+	/* Don't pass boost capacity to scheduler */
+	if (capacity > max_capacity)
+		capacity = max_capacity;
+
+	arch_set_thermal_pressure(policy->cpus, max_capacity - capacity);
+
+	/*
+	 * In the unlikely case policy is unregistered do not enable
+	 * polling or h/w interrupt
+	 */
+	mutex_lock(&data->throttle_lock);
+	if (data->cancel_throttle)
+		goto out;
+
+	/*
+	 * If h/w throttled frequency is higher than what cpufreq has requested
+	 * for, then stop polling and switch back to interrupt mechanism.
+	 */
+	if (throttled_freq >= qcom_cpufreq_hw_get(cpu))
+		enable_irq(data->throttle_irq);
+	else
+		mod_delayed_work(system_highpri_wq, &data->throttle_work,
+				 msecs_to_jiffies(10));
+
+out:
+	mutex_unlock(&data->throttle_lock);
+}
+
+static void qcom_lmh_dcvs_poll(struct work_struct *work)
+{
+	struct qcom_cpufreq_data *data;
+
+	data = container_of(work, struct qcom_cpufreq_data, throttle_work.work);
+	qcom_lmh_dcvs_notify(data);
+}
+
+static irqreturn_t qcom_lmh_dcvs_handle_irq(int irq, void *data)
+{
+	struct qcom_cpufreq_data *c_data = data;
+
+	/* Disable interrupt and enable polling */
+	disable_irq_nosync(c_data->throttle_irq);
+	qcom_lmh_dcvs_notify(c_data);
+
+	return 0;
+}
+
 static const struct qcom_cpufreq_soc_data qcom_soc_data = {
 	.reg_enable = 0x0,
 	.reg_freq_lut = 0x110,
 	.reg_volt_lut = 0x114,
+	.reg_current_vote = 0x704,
 	.reg_perf_state = 0x920,
 	.lut_row_size = 32,
 };
@@ -274,6 +371,51 @@ static const struct of_device_id qcom_cpufreq_hw_match[] = {
 };
 MODULE_DEVICE_TABLE(of, qcom_cpufreq_hw_match);
 
+static int qcom_cpufreq_hw_lmh_init(struct cpufreq_policy *policy, int index)
+{
+	struct qcom_cpufreq_data *data = policy->driver_data;
+	struct platform_device *pdev = cpufreq_get_driver_data();
+	char irq_name[15];
+	int ret;
+
+	/*
+	 * Look for LMh interrupt. If no interrupt line is specified /
+	 * if there is an error, allow cpufreq to be enabled as usual.
+	 */
+	data->throttle_irq = platform_get_irq(pdev, index);
+	if (data->throttle_irq <= 0)
+		return data->throttle_irq == -EPROBE_DEFER ? -EPROBE_DEFER : 0;
+
+	data->cancel_throttle = false;
+	data->policy = policy;
+
+	mutex_init(&data->throttle_lock);
+	INIT_DEFERRABLE_WORK(&data->throttle_work, qcom_lmh_dcvs_poll);
+
+	snprintf(irq_name, sizeof(irq_name), "dcvsh-irq-%u", policy->cpu);
+	ret = request_threaded_irq(data->throttle_irq, NULL, qcom_lmh_dcvs_handle_irq,
+				   IRQF_ONESHOT, irq_name, data);
+	if (ret) {
+		dev_err(&pdev->dev, "Error registering %s: %d\n", irq_name, ret);
+		return 0;
+	}
+
+	return 0;
+}
+
+static void qcom_cpufreq_hw_lmh_exit(struct qcom_cpufreq_data *data)
+{
+	if (data->throttle_irq <= 0)
+		return;
+
+	mutex_lock(&data->throttle_lock);
+	data->cancel_throttle = true;
+	mutex_unlock(&data->throttle_lock);
+
+	cancel_delayed_work_sync(&data->throttle_work);
+	free_irq(data->throttle_irq, data);
+}
+
 static int qcom_cpufreq_hw_cpu_init(struct cpufreq_policy *policy)
 {
 	struct platform_device *pdev = cpufreq_get_driver_data();
@@ -348,6 +490,7 @@ static int qcom_cpufreq_hw_cpu_init(struct cpufreq_policy *policy)
 	}
 
 	policy->driver_data = data;
+	policy->dvfs_possible_from_any_cpu = true;
 
 	ret = qcom_cpufreq_hw_read_lut(cpu_dev, policy);
 	if (ret) {
@@ -362,14 +505,16 @@ static int qcom_cpufreq_hw_cpu_init(struct cpufreq_policy *policy)
 		goto error;
 	}
 
-	dev_pm_opp_of_register_em(cpu_dev, policy->cpus);
-
 	if (policy_has_boost_freq(policy)) {
 		ret = cpufreq_enable_boost_support();
 		if (ret)
 			dev_warn(cpu_dev, "failed to enable boost: %d\n", ret);
 	}
 
+	ret = qcom_cpufreq_hw_lmh_init(policy, index);
+	if (ret)
+		goto error;
+
 	return 0;
 error:
 	kfree(data);
@@ -389,6 +534,7 @@ static int qcom_cpufreq_hw_cpu_exit(struct cpufreq_policy *policy)
 
 	dev_pm_opp_remove_all_dynamic(cpu_dev);
 	dev_pm_opp_of_cpumask_remove_table(policy->related_cpus);
+	qcom_cpufreq_hw_lmh_exit(data);
 	kfree(policy->freq_table);
 	kfree(data);
 	iounmap(base);
@@ -412,6 +558,7 @@ static struct cpufreq_driver cpufreq_qcom_hw_driver = {
 	.get		= qcom_cpufreq_hw_get,
 	.init		= qcom_cpufreq_hw_cpu_init,
 	.exit		= qcom_cpufreq_hw_cpu_exit,
+	.register_em	= cpufreq_register_em_with_opp,
 	.fast_switch    = qcom_cpufreq_hw_fast_switch,
 	.name		= "qcom-cpufreq-hw",
 	.attr		= qcom_cpufreq_hw_attr,
diff --git a/drivers/cpufreq/scmi-cpufreq.c b/drivers/cpufreq/scmi-cpufreq.c
index 75f818d04b48..1e0cd4d165f0 100644
--- a/drivers/cpufreq/scmi-cpufreq.c
+++ b/drivers/cpufreq/scmi-cpufreq.c
@@ -22,7 +22,9 @@
 
 struct scmi_data {
 	int domain_id;
+	int nr_opp;
 	struct device *cpu_dev;
+	cpumask_var_t opp_shared_cpus;
 };
 
 static struct scmi_protocol_handle *ph;
@@ -123,9 +125,6 @@ static int scmi_cpufreq_init(struct cpufreq_policy *policy)
 	struct device *cpu_dev;
 	struct scmi_data *priv;
 	struct cpufreq_frequency_table *freq_table;
-	struct em_data_callback em_cb = EM_DATA_CB(scmi_get_cpu_power);
-	cpumask_var_t opp_shared_cpus;
-	bool power_scale_mw;
 
 	cpu_dev = get_cpu_device(policy->cpu);
 	if (!cpu_dev) {
@@ -133,9 +132,15 @@ static int scmi_cpufreq_init(struct cpufreq_policy *policy)
 		return -ENODEV;
 	}
 
-	if (!zalloc_cpumask_var(&opp_shared_cpus, GFP_KERNEL))
+	priv = kzalloc(sizeof(*priv), GFP_KERNEL);
+	if (!priv)
 		return -ENOMEM;
 
+	if (!zalloc_cpumask_var(&priv->opp_shared_cpus, GFP_KERNEL)) {
+		ret = -ENOMEM;
+		goto out_free_priv;
+	}
+
 	/* Obtain CPUs that share SCMI performance controls */
 	ret = scmi_get_sharing_cpus(cpu_dev, policy->cpus);
 	if (ret) {
@@ -148,14 +153,14 @@ static int scmi_cpufreq_init(struct cpufreq_policy *policy)
 	 * The OPP 'sharing cpus' info may come from DT through an empty opp
 	 * table and opp-shared.
 	 */
-	ret = dev_pm_opp_of_get_sharing_cpus(cpu_dev, opp_shared_cpus);
-	if (ret || !cpumask_weight(opp_shared_cpus)) {
+	ret = dev_pm_opp_of_get_sharing_cpus(cpu_dev, priv->opp_shared_cpus);
+	if (ret || !cpumask_weight(priv->opp_shared_cpus)) {
 		/*
 		 * Either opp-table is not set or no opp-shared was found.
 		 * Use the CPU mask from SCMI to designate CPUs sharing an OPP
 		 * table.
 		 */
-		cpumask_copy(opp_shared_cpus, policy->cpus);
+		cpumask_copy(priv->opp_shared_cpus, policy->cpus);
 	}
 
 	 /*
@@ -180,7 +185,7 @@ static int scmi_cpufreq_init(struct cpufreq_policy *policy)
 			goto out_free_opp;
 		}
 
-		ret = dev_pm_opp_set_sharing_cpus(cpu_dev, opp_shared_cpus);
+		ret = dev_pm_opp_set_sharing_cpus(cpu_dev, priv->opp_shared_cpus);
 		if (ret) {
 			dev_err(cpu_dev, "%s: failed to mark OPPs as shared: %d\n",
 				__func__, ret);
@@ -188,21 +193,13 @@ static int scmi_cpufreq_init(struct cpufreq_policy *policy)
 			goto out_free_opp;
 		}
 
-		power_scale_mw = perf_ops->power_scale_mw_get(ph);
-		em_dev_register_perf_domain(cpu_dev, nr_opp, &em_cb,
-					    opp_shared_cpus, power_scale_mw);
-	}
-
-	priv = kzalloc(sizeof(*priv), GFP_KERNEL);
-	if (!priv) {
-		ret = -ENOMEM;
-		goto out_free_opp;
+		priv->nr_opp = nr_opp;
 	}
 
 	ret = dev_pm_opp_init_cpufreq_table(cpu_dev, &freq_table);
 	if (ret) {
 		dev_err(cpu_dev, "failed to init cpufreq table: %d\n", ret);
-		goto out_free_priv;
+		goto out_free_opp;
 	}
 
 	priv->cpu_dev = cpu_dev;
@@ -223,17 +220,16 @@ static int scmi_cpufreq_init(struct cpufreq_policy *policy)
 	policy->fast_switch_possible =
 		perf_ops->fast_switch_possible(ph, cpu_dev);
 
-	free_cpumask_var(opp_shared_cpus);
 	return 0;
 
-out_free_priv:
-	kfree(priv);
-
 out_free_opp:
 	dev_pm_opp_remove_all_dynamic(cpu_dev);
 
 out_free_cpumask:
-	free_cpumask_var(opp_shared_cpus);
+	free_cpumask_var(priv->opp_shared_cpus);
+
+out_free_priv:
+	kfree(priv);
 
 	return ret;
 }
@@ -244,11 +240,33 @@ static int scmi_cpufreq_exit(struct cpufreq_policy *policy)
 
 	dev_pm_opp_free_cpufreq_table(priv->cpu_dev, &policy->freq_table);
 	dev_pm_opp_remove_all_dynamic(priv->cpu_dev);
+	free_cpumask_var(priv->opp_shared_cpus);
 	kfree(priv);
 
 	return 0;
 }
 
+static void scmi_cpufreq_register_em(struct cpufreq_policy *policy)
+{
+	struct em_data_callback em_cb = EM_DATA_CB(scmi_get_cpu_power);
+	bool power_scale_mw = perf_ops->power_scale_mw_get(ph);
+	struct scmi_data *priv = policy->driver_data;
+
+	/*
+	 * This callback will be called for each policy, but we don't need to
+	 * register with EM every time. Despite not being part of the same
+	 * policy, some CPUs may still share their perf-domains, and a CPU from
+	 * another policy may already have registered with EM on behalf of CPUs
+	 * of this policy.
+	 */
+	if (!priv->nr_opp)
+		return;
+
+	em_dev_register_perf_domain(get_cpu_device(policy->cpu), priv->nr_opp,
+				    &em_cb, priv->opp_shared_cpus,
+				    power_scale_mw);
+}
+
 static struct cpufreq_driver scmi_cpufreq_driver = {
 	.name	= "scmi",
 	.flags	= CPUFREQ_HAVE_GOVERNOR_PER_POLICY |
@@ -261,6 +279,7 @@ static struct cpufreq_driver scmi_cpufreq_driver = {
 	.get	= scmi_cpufreq_get_rate,
 	.init	= scmi_cpufreq_init,
 	.exit	= scmi_cpufreq_exit,
+	.register_em	= scmi_cpufreq_register_em,
 };
 
 static int scmi_cpufreq_probe(struct scmi_device *sdev)
diff --git a/drivers/cpufreq/scpi-cpufreq.c b/drivers/cpufreq/scpi-cpufreq.c
index d6a698a1b5d1..bda3e7d42964 100644
--- a/drivers/cpufreq/scpi-cpufreq.c
+++ b/drivers/cpufreq/scpi-cpufreq.c
@@ -163,8 +163,6 @@ static int scpi_cpufreq_init(struct cpufreq_policy *policy)
 
 	policy->fast_switch_possible = false;
 
-	dev_pm_opp_of_register_em(cpu_dev, policy->cpus);
-
 	return 0;
 
 out_free_cpufreq_table:
@@ -200,6 +198,7 @@ static struct cpufreq_driver scpi_cpufreq_driver = {
 	.init	= scpi_cpufreq_init,
 	.exit	= scpi_cpufreq_exit,
 	.target_index	= scpi_cpufreq_set_target,
+	.register_em	= cpufreq_register_em_with_opp,
 };
 
 static int scpi_cpufreq_probe(struct platform_device *pdev)
diff --git a/drivers/cpufreq/sh-cpufreq.c b/drivers/cpufreq/sh-cpufreq.c
index 1a251e635ebd..b8704232c27b 100644
--- a/drivers/cpufreq/sh-cpufreq.c
+++ b/drivers/cpufreq/sh-cpufreq.c
@@ -145,16 +145,6 @@ static int sh_cpufreq_cpu_exit(struct cpufreq_policy *policy)
 	return 0;
 }
 
-static void sh_cpufreq_cpu_ready(struct cpufreq_policy *policy)
-{
-	struct device *dev = get_cpu_device(policy->cpu);
-
-	dev_info(dev, "CPU Frequencies - Minimum %u.%03u MHz, "
-	       "Maximum %u.%03u MHz.\n",
-	       policy->min / 1000, policy->min % 1000,
-	       policy->max / 1000, policy->max % 1000);
-}
-
 static struct cpufreq_driver sh_cpufreq_driver = {
 	.name		= "sh",
 	.flags		= CPUFREQ_NO_AUTO_DYNAMIC_SWITCHING,
@@ -163,7 +153,6 @@ static struct cpufreq_driver sh_cpufreq_driver = {
 	.verify		= sh_cpufreq_verify,
 	.init		= sh_cpufreq_cpu_init,
 	.exit		= sh_cpufreq_cpu_exit,
-	.ready		= sh_cpufreq_cpu_ready,
 	.attr		= cpufreq_generic_attr,
 };
 
diff --git a/drivers/cpufreq/vexpress-spc-cpufreq.c b/drivers/cpufreq/vexpress-spc-cpufreq.c
index 51dfa9ae6cf5..284b6bd040b1 100644
--- a/drivers/cpufreq/vexpress-spc-cpufreq.c
+++ b/drivers/cpufreq/vexpress-spc-cpufreq.c
@@ -15,7 +15,6 @@
 #include <linux/cpu.h>
 #include <linux/cpufreq.h>
 #include <linux/cpumask.h>
-#include <linux/cpu_cooling.h>
 #include <linux/device.h>
 #include <linux/module.h>
 #include <linux/mutex.h>
@@ -47,7 +46,6 @@ static bool bL_switching_enabled;
 #define ACTUAL_FREQ(cluster, freq)  ((cluster == A7_CLUSTER) ? freq << 1 : freq)
 #define VIRT_FREQ(cluster, freq)    ((cluster == A7_CLUSTER) ? freq >> 1 : freq)
 
-static struct thermal_cooling_device *cdev[MAX_CLUSTERS];
 static struct clk *clk[MAX_CLUSTERS];
 static struct cpufreq_frequency_table *freq_table[MAX_CLUSTERS + 1];
 static atomic_t cluster_usage[MAX_CLUSTERS + 1];
@@ -442,8 +440,6 @@ static int ve_spc_cpufreq_init(struct cpufreq_policy *policy)
 	policy->freq_table = freq_table[cur_cluster];
 	policy->cpuinfo.transition_latency = 1000000; /* 1 ms */
 
-	dev_pm_opp_of_register_em(cpu_dev, policy->cpus);
-
 	if (is_bL_switching_enabled())
 		per_cpu(cpu_last_req_freq, policy->cpu) =
 						clk_get_cpu_rate(policy->cpu);
@@ -457,11 +453,6 @@ static int ve_spc_cpufreq_exit(struct cpufreq_policy *policy)
 	struct device *cpu_dev;
 	int cur_cluster = cpu_to_cluster(policy->cpu);
 
-	if (cur_cluster < MAX_CLUSTERS) {
-		cpufreq_cooling_unregister(cdev[cur_cluster]);
-		cdev[cur_cluster] = NULL;
-	}
-
 	cpu_dev = get_cpu_device(policy->cpu);
 	if (!cpu_dev) {
 		pr_err("%s: failed to get cpu%d device\n", __func__,
@@ -473,17 +464,6 @@ static int ve_spc_cpufreq_exit(struct cpufreq_policy *policy)
 	return 0;
 }
 
-static void ve_spc_cpufreq_ready(struct cpufreq_policy *policy)
-{
-	int cur_cluster = cpu_to_cluster(policy->cpu);
-
-	/* Do not register a cpu_cooling device if we are in IKS mode */
-	if (cur_cluster >= MAX_CLUSTERS)
-		return;
-
-	cdev[cur_cluster] = of_cpufreq_cooling_register(policy);
-}
-
 static struct cpufreq_driver ve_spc_cpufreq_driver = {
 	.name			= "vexpress-spc",
 	.flags			= CPUFREQ_HAVE_GOVERNOR_PER_POLICY |
@@ -493,7 +473,7 @@ static struct cpufreq_driver ve_spc_cpufreq_driver = {
 	.get			= ve_spc_cpufreq_get_rate,
 	.init			= ve_spc_cpufreq_init,
 	.exit			= ve_spc_cpufreq_exit,
-	.ready			= ve_spc_cpufreq_ready,
+	.register_em		= cpufreq_register_em_with_opp,
 	.attr			= cpufreq_generic_attr,
 };
 
@@ -553,6 +533,9 @@ static int ve_spc_cpufreq_probe(struct platform_device *pdev)
 	for (i = 0; i < MAX_CLUSTERS; i++)
 		mutex_init(&cluster_lock[i]);
 
+	if (!is_bL_switching_enabled())
+		ve_spc_cpufreq_driver.flags |= CPUFREQ_IS_COOLING_DEV;
+
 	ret = cpufreq_register_driver(&ve_spc_cpufreq_driver);
 	if (ret) {
 		pr_info("%s: Failed registering platform driver: %s, err: %d\n",
diff --git a/drivers/cpuidle/cpuidle-pseries.c b/drivers/cpuidle/cpuidle-pseries.c
index a2b5c6f60cf0..7e7ab5597d7a 100644
--- a/drivers/cpuidle/cpuidle-pseries.c
+++ b/drivers/cpuidle/cpuidle-pseries.c
@@ -346,11 +346,9 @@ static int pseries_cpuidle_driver_init(void)
 static void __init fixup_cede0_latency(void)
 {
 	struct xcede_latency_payload *payload;
-	u64 min_latency_us;
+	u64 min_xcede_latency_us = UINT_MAX;
 	int i;
 
-	min_latency_us = dedicated_states[1].exit_latency; // CEDE latency
-
 	if (parse_cede_parameters())
 		return;
 
@@ -358,42 +356,45 @@ static void __init fixup_cede0_latency(void)
 		nr_xcede_records);
 
 	payload = &xcede_latency_parameter.payload;
+
+	/*
+	 * The CEDE idle state maps to CEDE(0). While the hypervisor
+	 * does not advertise CEDE(0) exit latency values, it does
+	 * advertise the latency values of the extended CEDE states.
+	 * We use the lowest advertised exit latency value as a proxy
+	 * for the exit latency of CEDE(0).
+	 */
 	for (i = 0; i < nr_xcede_records; i++) {
 		struct xcede_latency_record *record = &payload->records[i];
+		u8 hint = record->hint;
 		u64 latency_tb = be64_to_cpu(record->latency_ticks);
 		u64 latency_us = DIV_ROUND_UP_ULL(tb_to_ns(latency_tb), NSEC_PER_USEC);
 
-		if (latency_us == 0)
-			pr_warn("cpuidle: xcede record %d has an unrealistic latency of 0us.\n", i);
-
-		if (latency_us < min_latency_us)
-			min_latency_us = latency_us;
-	}
-
-	/*
-	 * By default, we assume that CEDE(0) has exit latency 10us,
-	 * since there is no way for us to query from the platform.
-	 *
-	 * However, if the wakeup latency of an Extended CEDE state is
-	 * smaller than 10us, then we can be sure that CEDE(0)
-	 * requires no more than that.
-	 *
-	 * Perform the fix-up.
-	 */
-	if (min_latency_us < dedicated_states[1].exit_latency) {
 		/*
-		 * We set a minimum of 1us wakeup latency for cede0 to
-		 * distinguish it from snooze
+		 * We expect the exit latency of an extended CEDE
+		 * state to be non-zero, it to since it takes at least
+		 * a few nanoseconds to wakeup the idle CPU and
+		 * dispatch the virtual processor into the Linux
+		 * Guest.
+		 *
+		 * So we consider only non-zero value for performing
+		 * the fixup of CEDE(0) latency.
 		 */
-		u64 cede0_latency = 1;
+		if (latency_us == 0) {
+			pr_warn("cpuidle: Skipping xcede record %d [hint=%d]. Exit latency = 0us\n",
+				i, hint);
+			continue;
+		}
 
-		if (min_latency_us > cede0_latency)
-			cede0_latency = min_latency_us - 1;
+		if (latency_us < min_xcede_latency_us)
+			min_xcede_latency_us = latency_us;
+	}
 
-		dedicated_states[1].exit_latency = cede0_latency;
-		dedicated_states[1].target_residency = 10 * (cede0_latency);
+	if (min_xcede_latency_us != UINT_MAX) {
+		dedicated_states[1].exit_latency = min_xcede_latency_us;
+		dedicated_states[1].target_residency = 10 * (min_xcede_latency_us);
 		pr_info("cpuidle: Fixed up CEDE exit latency to %llu us\n",
-			cede0_latency);
+			min_xcede_latency_us);
 	}
 
 }
@@ -402,7 +403,7 @@ static void __init fixup_cede0_latency(void)
  * pseries_idle_probe()
  * Choose state table for shared versus dedicated partition
  */
-static int pseries_idle_probe(void)
+static int __init pseries_idle_probe(void)
 {
 
 	if (cpuidle_disable != IDLE_NO_OVERRIDE)
@@ -419,7 +420,21 @@ static int pseries_idle_probe(void)
 			cpuidle_state_table = shared_states;
 			max_idle_state = ARRAY_SIZE(shared_states);
 		} else {
-			fixup_cede0_latency();
+			/*
+			 * Use firmware provided latency values
+			 * starting with POWER10 platforms. In the
+			 * case that we are running on a POWER10
+			 * platform but in an earlier compat mode, we
+			 * can still use the firmware provided values.
+			 *
+			 * However, on platforms prior to POWER10, we
+			 * cannot rely on the accuracy of the firmware
+			 * provided latency values. On such platforms,
+			 * go with the conservative default estimate
+			 * of 10us.
+			 */
+			if (cpu_has_feature(CPU_FTR_ARCH_31) || pvr_version_is(PVR_POWER10))
+				fixup_cede0_latency();
 			cpuidle_state_table = dedicated_states;
 			max_idle_state = NR_DEDICATED_STATES;
 		}
diff --git a/drivers/cpuidle/cpuidle-ux500.c b/drivers/cpuidle/cpuidle-ux500.c
index a2d34be17a09..f7d778580e9b 100644
--- a/drivers/cpuidle/cpuidle-ux500.c
+++ b/drivers/cpuidle/cpuidle-ux500.c
@@ -117,7 +117,7 @@ static int dbx500_cpuidle_probe(struct platform_device *pdev)
 
 static struct platform_driver dbx500_cpuidle_plat_driver = {
 	.driver = {
-		.name = "cpuidle-dbx500",
+		.name = "db8500-cpuidle",
 	},
 	.probe = dbx500_cpuidle_probe,
 };
diff --git a/drivers/crypto/cavium/nitrox/nitrox_main.c b/drivers/crypto/cavium/nitrox/nitrox_main.c
index 96bc7b5c6532..6c61817996a3 100644
--- a/drivers/crypto/cavium/nitrox/nitrox_main.c
+++ b/drivers/crypto/cavium/nitrox/nitrox_main.c
@@ -306,9 +306,7 @@ static int nitrox_device_flr(struct pci_dev *pdev)
 		return -ENOMEM;
 	}
 
-	/* check flr support */
-	if (pcie_has_flr(pdev))
-		pcie_flr(pdev);
+	pcie_reset_flr(pdev, PCI_RESET_DO_RESET);
 
 	pci_restore_state(pdev);
 
diff --git a/drivers/cxl/Makefile b/drivers/cxl/Makefile
index 32954059b37b..d1aaabc940f3 100644
--- a/drivers/cxl/Makefile
+++ b/drivers/cxl/Makefile
@@ -1,11 +1,9 @@
 # SPDX-License-Identifier: GPL-2.0
-obj-$(CONFIG_CXL_BUS) += cxl_core.o
+obj-$(CONFIG_CXL_BUS) += core/
 obj-$(CONFIG_CXL_MEM) += cxl_pci.o
 obj-$(CONFIG_CXL_ACPI) += cxl_acpi.o
 obj-$(CONFIG_CXL_PMEM) += cxl_pmem.o
 
-ccflags-y += -DDEFAULT_SYMBOL_NAMESPACE=CXL
-cxl_core-y := core.o
 cxl_pci-y := pci.o
 cxl_acpi-y := acpi.o
 cxl_pmem-y := pmem.o
diff --git a/drivers/cxl/acpi.c b/drivers/cxl/acpi.c
index 8ae89273f58e..54e9d4d2cf5f 100644
--- a/drivers/cxl/acpi.c
+++ b/drivers/cxl/acpi.c
@@ -243,6 +243,9 @@ static struct acpi_device *to_cxl_host_bridge(struct device *dev)
 {
 	struct acpi_device *adev = to_acpi_device(dev);
 
+	if (!acpi_pci_find_root(adev->handle))
+		return NULL;
+
 	if (strcmp(acpi_device_hid(adev), "ACPI0016") == 0)
 		return adev;
 	return NULL;
@@ -266,10 +269,6 @@ static int add_host_bridge_uport(struct device *match, void *arg)
 	if (!bridge)
 		return 0;
 
-	pci_root = acpi_pci_find_root(bridge->handle);
-	if (!pci_root)
-		return -ENXIO;
-
 	dport = find_dport_by_dev(root_port, match);
 	if (!dport) {
 		dev_dbg(host, "host bridge expected and not found\n");
@@ -282,6 +281,11 @@ static int add_host_bridge_uport(struct device *match, void *arg)
 		return PTR_ERR(port);
 	dev_dbg(host, "%s: add: %s\n", dev_name(match), dev_name(&port->dev));
 
+	/*
+	 * Note that this lookup already succeeded in
+	 * to_cxl_host_bridge(), so no need to check for failure here
+	 */
+	pci_root = acpi_pci_find_root(bridge->handle);
 	ctx = (struct cxl_walk_context){
 		.dev = host,
 		.root = pci_root->bus,
diff --git a/drivers/cxl/core/Makefile b/drivers/cxl/core/Makefile
new file mode 100644
index 000000000000..0fdbf3c6ac1a
--- /dev/null
+++ b/drivers/cxl/core/Makefile
@@ -0,0 +1,8 @@
+# SPDX-License-Identifier: GPL-2.0
+obj-$(CONFIG_CXL_BUS) += cxl_core.o
+
+ccflags-y += -DDEFAULT_SYMBOL_NAMESPACE=CXL -I$(srctree)/drivers/cxl
+cxl_core-y := bus.o
+cxl_core-y += pmem.o
+cxl_core-y += regs.o
+cxl_core-y += memdev.o
diff --git a/drivers/cxl/core.c b/drivers/cxl/core/bus.c
index 2b90b7c3b9d7..267d8042bec2 100644
--- a/drivers/cxl/core.c
+++ b/drivers/cxl/core/bus.c
@@ -6,14 +6,22 @@
 #include <linux/pci.h>
 #include <linux/slab.h>
 #include <linux/idr.h>
-#include "cxl.h"
-#include "mem.h"
+#include <cxlmem.h>
+#include <cxl.h>
+#include "core.h"
 
 /**
  * DOC: cxl core
  *
- * The CXL core provides a sysfs hierarchy for control devices and a rendezvous
- * point for cross-device interleave coordination through cxl ports.
+ * The CXL core provides a set of interfaces that can be consumed by CXL aware
+ * drivers. The interfaces allow for creation, modification, and destruction of
+ * regions, memory devices, ports, and decoders. CXL aware drivers must register
+ * with the CXL core via these interfaces in order to be able to participate in
+ * cross-device interleave coordination. The CXL core also establishes and
+ * maintains the bridge to the nvdimm subsystem.
+ *
+ * CXL core introduces sysfs hierarchy to control the devices that are
+ * instantiated by the core.
  */
 
 static DEFINE_IDA(cxl_port_ida);
@@ -30,7 +38,7 @@ static struct attribute *cxl_base_attributes[] = {
 	NULL,
 };
 
-static struct attribute_group cxl_base_attribute_group = {
+struct attribute_group cxl_base_attribute_group = {
 	.attrs = cxl_base_attributes,
 };
 
@@ -507,11 +515,6 @@ err:
 	return ERR_PTR(rc);
 }
 
-static void unregister_dev(void *dev)
-{
-	device_unregister(dev);
-}
-
 struct cxl_decoder *
 devm_cxl_add_decoder(struct device *host, struct cxl_port *port, int nr_targets,
 		     resource_size_t base, resource_size_t len,
@@ -536,7 +539,7 @@ devm_cxl_add_decoder(struct device *host, struct cxl_port *port, int nr_targets,
 	if (rc)
 		goto err;
 
-	rc = devm_add_action_or_reset(host, unregister_dev, dev);
+	rc = devm_add_action_or_reset(host, unregister_cxl_dev, dev);
 	if (rc)
 		return ERR_PTR(rc);
 	return cxld;
@@ -548,429 +551,6 @@ err:
 EXPORT_SYMBOL_GPL(devm_cxl_add_decoder);
 
 /**
- * cxl_probe_component_regs() - Detect CXL Component register blocks
- * @dev: Host device of the @base mapping
- * @base: Mapping containing the HDM Decoder Capability Header
- * @map: Map object describing the register block information found
- *
- * See CXL 2.0 8.2.4 Component Register Layout and Definition
- * See CXL 2.0 8.2.5.5 CXL Device Register Interface
- *
- * Probe for component register information and return it in map object.
- */
-void cxl_probe_component_regs(struct device *dev, void __iomem *base,
-			      struct cxl_component_reg_map *map)
-{
-	int cap, cap_count;
-	u64 cap_array;
-
-	*map = (struct cxl_component_reg_map) { 0 };
-
-	/*
-	 * CXL.cache and CXL.mem registers are at offset 0x1000 as defined in
-	 * CXL 2.0 8.2.4 Table 141.
-	 */
-	base += CXL_CM_OFFSET;
-
-	cap_array = readq(base + CXL_CM_CAP_HDR_OFFSET);
-
-	if (FIELD_GET(CXL_CM_CAP_HDR_ID_MASK, cap_array) != CM_CAP_HDR_CAP_ID) {
-		dev_err(dev,
-			"Couldn't locate the CXL.cache and CXL.mem capability array header./n");
-		return;
-	}
-
-	/* It's assumed that future versions will be backward compatible */
-	cap_count = FIELD_GET(CXL_CM_CAP_HDR_ARRAY_SIZE_MASK, cap_array);
-
-	for (cap = 1; cap <= cap_count; cap++) {
-		void __iomem *register_block;
-		u32 hdr;
-		int decoder_cnt;
-		u16 cap_id, offset;
-		u32 length;
-
-		hdr = readl(base + cap * 0x4);
-
-		cap_id = FIELD_GET(CXL_CM_CAP_HDR_ID_MASK, hdr);
-		offset = FIELD_GET(CXL_CM_CAP_PTR_MASK, hdr);
-		register_block = base + offset;
-
-		switch (cap_id) {
-		case CXL_CM_CAP_CAP_ID_HDM:
-			dev_dbg(dev, "found HDM decoder capability (0x%x)\n",
-				offset);
-
-			hdr = readl(register_block);
-
-			decoder_cnt = cxl_hdm_decoder_count(hdr);
-			length = 0x20 * decoder_cnt + 0x10;
-
-			map->hdm_decoder.valid = true;
-			map->hdm_decoder.offset = CXL_CM_OFFSET + offset;
-			map->hdm_decoder.size = length;
-			break;
-		default:
-			dev_dbg(dev, "Unknown CM cap ID: %d (0x%x)\n", cap_id,
-				offset);
-			break;
-		}
-	}
-}
-EXPORT_SYMBOL_GPL(cxl_probe_component_regs);
-
-static void cxl_nvdimm_bridge_release(struct device *dev)
-{
-	struct cxl_nvdimm_bridge *cxl_nvb = to_cxl_nvdimm_bridge(dev);
-
-	kfree(cxl_nvb);
-}
-
-static const struct attribute_group *cxl_nvdimm_bridge_attribute_groups[] = {
-	&cxl_base_attribute_group,
-	NULL,
-};
-
-static const struct device_type cxl_nvdimm_bridge_type = {
-	.name = "cxl_nvdimm_bridge",
-	.release = cxl_nvdimm_bridge_release,
-	.groups = cxl_nvdimm_bridge_attribute_groups,
-};
-
-struct cxl_nvdimm_bridge *to_cxl_nvdimm_bridge(struct device *dev)
-{
-	if (dev_WARN_ONCE(dev, dev->type != &cxl_nvdimm_bridge_type,
-			  "not a cxl_nvdimm_bridge device\n"))
-		return NULL;
-	return container_of(dev, struct cxl_nvdimm_bridge, dev);
-}
-EXPORT_SYMBOL_GPL(to_cxl_nvdimm_bridge);
-
-static struct cxl_nvdimm_bridge *
-cxl_nvdimm_bridge_alloc(struct cxl_port *port)
-{
-	struct cxl_nvdimm_bridge *cxl_nvb;
-	struct device *dev;
-
-	cxl_nvb = kzalloc(sizeof(*cxl_nvb), GFP_KERNEL);
-	if (!cxl_nvb)
-		return ERR_PTR(-ENOMEM);
-
-	dev = &cxl_nvb->dev;
-	cxl_nvb->port = port;
-	cxl_nvb->state = CXL_NVB_NEW;
-	device_initialize(dev);
-	device_set_pm_not_required(dev);
-	dev->parent = &port->dev;
-	dev->bus = &cxl_bus_type;
-	dev->type = &cxl_nvdimm_bridge_type;
-
-	return cxl_nvb;
-}
-
-static void unregister_nvb(void *_cxl_nvb)
-{
-	struct cxl_nvdimm_bridge *cxl_nvb = _cxl_nvb;
-	bool flush;
-
-	/*
-	 * If the bridge was ever activated then there might be in-flight state
-	 * work to flush. Once the state has been changed to 'dead' then no new
-	 * work can be queued by user-triggered bind.
-	 */
-	device_lock(&cxl_nvb->dev);
-	flush = cxl_nvb->state != CXL_NVB_NEW;
-	cxl_nvb->state = CXL_NVB_DEAD;
-	device_unlock(&cxl_nvb->dev);
-
-	/*
-	 * Even though the device core will trigger device_release_driver()
-	 * before the unregister, it does not know about the fact that
-	 * cxl_nvdimm_bridge_driver defers ->remove() work. So, do the driver
-	 * release not and flush it before tearing down the nvdimm device
-	 * hierarchy.
-	 */
-	device_release_driver(&cxl_nvb->dev);
-	if (flush)
-		flush_work(&cxl_nvb->state_work);
-	device_unregister(&cxl_nvb->dev);
-}
-
-struct cxl_nvdimm_bridge *devm_cxl_add_nvdimm_bridge(struct device *host,
-						     struct cxl_port *port)
-{
-	struct cxl_nvdimm_bridge *cxl_nvb;
-	struct device *dev;
-	int rc;
-
-	if (!IS_ENABLED(CONFIG_CXL_PMEM))
-		return ERR_PTR(-ENXIO);
-
-	cxl_nvb = cxl_nvdimm_bridge_alloc(port);
-	if (IS_ERR(cxl_nvb))
-		return cxl_nvb;
-
-	dev = &cxl_nvb->dev;
-	rc = dev_set_name(dev, "nvdimm-bridge");
-	if (rc)
-		goto err;
-
-	rc = device_add(dev);
-	if (rc)
-		goto err;
-
-	rc = devm_add_action_or_reset(host, unregister_nvb, cxl_nvb);
-	if (rc)
-		return ERR_PTR(rc);
-
-	return cxl_nvb;
-
-err:
-	put_device(dev);
-	return ERR_PTR(rc);
-}
-EXPORT_SYMBOL_GPL(devm_cxl_add_nvdimm_bridge);
-
-static void cxl_nvdimm_release(struct device *dev)
-{
-	struct cxl_nvdimm *cxl_nvd = to_cxl_nvdimm(dev);
-
-	kfree(cxl_nvd);
-}
-
-static const struct attribute_group *cxl_nvdimm_attribute_groups[] = {
-	&cxl_base_attribute_group,
-	NULL,
-};
-
-static const struct device_type cxl_nvdimm_type = {
-	.name = "cxl_nvdimm",
-	.release = cxl_nvdimm_release,
-	.groups = cxl_nvdimm_attribute_groups,
-};
-
-bool is_cxl_nvdimm(struct device *dev)
-{
-	return dev->type == &cxl_nvdimm_type;
-}
-EXPORT_SYMBOL_GPL(is_cxl_nvdimm);
-
-struct cxl_nvdimm *to_cxl_nvdimm(struct device *dev)
-{
-	if (dev_WARN_ONCE(dev, !is_cxl_nvdimm(dev),
-			  "not a cxl_nvdimm device\n"))
-		return NULL;
-	return container_of(dev, struct cxl_nvdimm, dev);
-}
-EXPORT_SYMBOL_GPL(to_cxl_nvdimm);
-
-static struct cxl_nvdimm *cxl_nvdimm_alloc(struct cxl_memdev *cxlmd)
-{
-	struct cxl_nvdimm *cxl_nvd;
-	struct device *dev;
-
-	cxl_nvd = kzalloc(sizeof(*cxl_nvd), GFP_KERNEL);
-	if (!cxl_nvd)
-		return ERR_PTR(-ENOMEM);
-
-	dev = &cxl_nvd->dev;
-	cxl_nvd->cxlmd = cxlmd;
-	device_initialize(dev);
-	device_set_pm_not_required(dev);
-	dev->parent = &cxlmd->dev;
-	dev->bus = &cxl_bus_type;
-	dev->type = &cxl_nvdimm_type;
-
-	return cxl_nvd;
-}
-
-int devm_cxl_add_nvdimm(struct device *host, struct cxl_memdev *cxlmd)
-{
-	struct cxl_nvdimm *cxl_nvd;
-	struct device *dev;
-	int rc;
-
-	cxl_nvd = cxl_nvdimm_alloc(cxlmd);
-	if (IS_ERR(cxl_nvd))
-		return PTR_ERR(cxl_nvd);
-
-	dev = &cxl_nvd->dev;
-	rc = dev_set_name(dev, "pmem%d", cxlmd->id);
-	if (rc)
-		goto err;
-
-	rc = device_add(dev);
-	if (rc)
-		goto err;
-
-	dev_dbg(host, "%s: register %s\n", dev_name(dev->parent),
-		dev_name(dev));
-
-	return devm_add_action_or_reset(host, unregister_dev, dev);
-
-err:
-	put_device(dev);
-	return rc;
-}
-EXPORT_SYMBOL_GPL(devm_cxl_add_nvdimm);
-
-/**
- * cxl_probe_device_regs() - Detect CXL Device register blocks
- * @dev: Host device of the @base mapping
- * @base: Mapping of CXL 2.0 8.2.8 CXL Device Register Interface
- * @map: Map object describing the register block information found
- *
- * Probe for device register information and return it in map object.
- */
-void cxl_probe_device_regs(struct device *dev, void __iomem *base,
-			   struct cxl_device_reg_map *map)
-{
-	int cap, cap_count;
-	u64 cap_array;
-
-	*map = (struct cxl_device_reg_map){ 0 };
-
-	cap_array = readq(base + CXLDEV_CAP_ARRAY_OFFSET);
-	if (FIELD_GET(CXLDEV_CAP_ARRAY_ID_MASK, cap_array) !=
-	    CXLDEV_CAP_ARRAY_CAP_ID)
-		return;
-
-	cap_count = FIELD_GET(CXLDEV_CAP_ARRAY_COUNT_MASK, cap_array);
-
-	for (cap = 1; cap <= cap_count; cap++) {
-		u32 offset, length;
-		u16 cap_id;
-
-		cap_id = FIELD_GET(CXLDEV_CAP_HDR_CAP_ID_MASK,
-				   readl(base + cap * 0x10));
-		offset = readl(base + cap * 0x10 + 0x4);
-		length = readl(base + cap * 0x10 + 0x8);
-
-		switch (cap_id) {
-		case CXLDEV_CAP_CAP_ID_DEVICE_STATUS:
-			dev_dbg(dev, "found Status capability (0x%x)\n", offset);
-
-			map->status.valid = true;
-			map->status.offset = offset;
-			map->status.size = length;
-			break;
-		case CXLDEV_CAP_CAP_ID_PRIMARY_MAILBOX:
-			dev_dbg(dev, "found Mailbox capability (0x%x)\n", offset);
-			map->mbox.valid = true;
-			map->mbox.offset = offset;
-			map->mbox.size = length;
-			break;
-		case CXLDEV_CAP_CAP_ID_SECONDARY_MAILBOX:
-			dev_dbg(dev, "found Secondary Mailbox capability (0x%x)\n", offset);
-			break;
-		case CXLDEV_CAP_CAP_ID_MEMDEV:
-			dev_dbg(dev, "found Memory Device capability (0x%x)\n", offset);
-			map->memdev.valid = true;
-			map->memdev.offset = offset;
-			map->memdev.size = length;
-			break;
-		default:
-			if (cap_id >= 0x8000)
-				dev_dbg(dev, "Vendor cap ID: %#x offset: %#x\n", cap_id, offset);
-			else
-				dev_dbg(dev, "Unknown cap ID: %#x offset: %#x\n", cap_id, offset);
-			break;
-		}
-	}
-}
-EXPORT_SYMBOL_GPL(cxl_probe_device_regs);
-
-static void __iomem *devm_cxl_iomap_block(struct device *dev,
-					  resource_size_t addr,
-					  resource_size_t length)
-{
-	void __iomem *ret_val;
-	struct resource *res;
-
-	res = devm_request_mem_region(dev, addr, length, dev_name(dev));
-	if (!res) {
-		resource_size_t end = addr + length - 1;
-
-		dev_err(dev, "Failed to request region %pa-%pa\n", &addr, &end);
-		return NULL;
-	}
-
-	ret_val = devm_ioremap(dev, addr, length);
-	if (!ret_val)
-		dev_err(dev, "Failed to map region %pr\n", res);
-
-	return ret_val;
-}
-
-int cxl_map_component_regs(struct pci_dev *pdev,
-			   struct cxl_component_regs *regs,
-			   struct cxl_register_map *map)
-{
-	struct device *dev = &pdev->dev;
-	resource_size_t phys_addr;
-	resource_size_t length;
-
-	phys_addr = pci_resource_start(pdev, map->barno);
-	phys_addr += map->block_offset;
-
-	phys_addr += map->component_map.hdm_decoder.offset;
-	length = map->component_map.hdm_decoder.size;
-	regs->hdm_decoder = devm_cxl_iomap_block(dev, phys_addr, length);
-	if (!regs->hdm_decoder)
-		return -ENOMEM;
-
-	return 0;
-}
-EXPORT_SYMBOL_GPL(cxl_map_component_regs);
-
-int cxl_map_device_regs(struct pci_dev *pdev,
-			struct cxl_device_regs *regs,
-			struct cxl_register_map *map)
-{
-	struct device *dev = &pdev->dev;
-	resource_size_t phys_addr;
-
-	phys_addr = pci_resource_start(pdev, map->barno);
-	phys_addr += map->block_offset;
-
-	if (map->device_map.status.valid) {
-		resource_size_t addr;
-		resource_size_t length;
-
-		addr = phys_addr + map->device_map.status.offset;
-		length = map->device_map.status.size;
-		regs->status = devm_cxl_iomap_block(dev, addr, length);
-		if (!regs->status)
-			return -ENOMEM;
-	}
-
-	if (map->device_map.mbox.valid) {
-		resource_size_t addr;
-		resource_size_t length;
-
-		addr = phys_addr + map->device_map.mbox.offset;
-		length = map->device_map.mbox.size;
-		regs->mbox = devm_cxl_iomap_block(dev, addr, length);
-		if (!regs->mbox)
-			return -ENOMEM;
-	}
-
-	if (map->device_map.memdev.valid) {
-		resource_size_t addr;
-		resource_size_t length;
-
-		addr = phys_addr + map->device_map.memdev.offset;
-		length = map->device_map.memdev.size;
-		regs->memdev = devm_cxl_iomap_block(dev, addr, length);
-		if (!regs->memdev)
-			return -ENOMEM;
-	}
-
-	return 0;
-}
-EXPORT_SYMBOL_GPL(cxl_map_device_regs);
-
-/**
  * __cxl_driver_register - register a driver for the cxl bus
  * @cxl_drv: cxl driver structure to attach
  * @owner: owning module/driver
@@ -1053,12 +633,26 @@ EXPORT_SYMBOL_GPL(cxl_bus_type);
 
 static __init int cxl_core_init(void)
 {
-	return bus_register(&cxl_bus_type);
+	int rc;
+
+	rc = cxl_memdev_init();
+	if (rc)
+		return rc;
+
+	rc = bus_register(&cxl_bus_type);
+	if (rc)
+		goto err;
+	return 0;
+
+err:
+	cxl_memdev_exit();
+	return rc;
 }
 
 static void cxl_core_exit(void)
 {
 	bus_unregister(&cxl_bus_type);
+	cxl_memdev_exit();
 }
 
 module_init(cxl_core_init);
diff --git a/drivers/cxl/core/core.h b/drivers/cxl/core/core.h
new file mode 100644
index 000000000000..036a3c8106b4
--- /dev/null
+++ b/drivers/cxl/core/core.h
@@ -0,0 +1,20 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
+/* Copyright(c) 2020 Intel Corporation. */
+
+#ifndef __CXL_CORE_H__
+#define __CXL_CORE_H__
+
+extern const struct device_type cxl_nvdimm_bridge_type;
+extern const struct device_type cxl_nvdimm_type;
+
+extern struct attribute_group cxl_base_attribute_group;
+
+static inline void unregister_cxl_dev(void *dev)
+{
+	device_unregister(dev);
+}
+
+int cxl_memdev_init(void);
+void cxl_memdev_exit(void);
+
+#endif /* __CXL_CORE_H__ */
diff --git a/drivers/cxl/core/memdev.c b/drivers/cxl/core/memdev.c
new file mode 100644
index 000000000000..a9c317e32010
--- /dev/null
+++ b/drivers/cxl/core/memdev.c
@@ -0,0 +1,246 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/* Copyright(c) 2020 Intel Corporation. */
+
+#include <linux/device.h>
+#include <linux/slab.h>
+#include <linux/idr.h>
+#include <linux/pci.h>
+#include <cxlmem.h>
+#include "core.h"
+
+/*
+ * An entire PCI topology full of devices should be enough for any
+ * config
+ */
+#define CXL_MEM_MAX_DEVS 65536
+
+static int cxl_mem_major;
+static DEFINE_IDA(cxl_memdev_ida);
+
+static void cxl_memdev_release(struct device *dev)
+{
+	struct cxl_memdev *cxlmd = to_cxl_memdev(dev);
+
+	ida_free(&cxl_memdev_ida, cxlmd->id);
+	kfree(cxlmd);
+}
+
+static char *cxl_memdev_devnode(struct device *dev, umode_t *mode, kuid_t *uid,
+				kgid_t *gid)
+{
+	return kasprintf(GFP_KERNEL, "cxl/%s", dev_name(dev));
+}
+
+static ssize_t firmware_version_show(struct device *dev,
+				     struct device_attribute *attr, char *buf)
+{
+	struct cxl_memdev *cxlmd = to_cxl_memdev(dev);
+	struct cxl_mem *cxlm = cxlmd->cxlm;
+
+	return sysfs_emit(buf, "%.16s\n", cxlm->firmware_version);
+}
+static DEVICE_ATTR_RO(firmware_version);
+
+static ssize_t payload_max_show(struct device *dev,
+				struct device_attribute *attr, char *buf)
+{
+	struct cxl_memdev *cxlmd = to_cxl_memdev(dev);
+	struct cxl_mem *cxlm = cxlmd->cxlm;
+
+	return sysfs_emit(buf, "%zu\n", cxlm->payload_size);
+}
+static DEVICE_ATTR_RO(payload_max);
+
+static ssize_t label_storage_size_show(struct device *dev,
+				       struct device_attribute *attr, char *buf)
+{
+	struct cxl_memdev *cxlmd = to_cxl_memdev(dev);
+	struct cxl_mem *cxlm = cxlmd->cxlm;
+
+	return sysfs_emit(buf, "%zu\n", cxlm->lsa_size);
+}
+static DEVICE_ATTR_RO(label_storage_size);
+
+static ssize_t ram_size_show(struct device *dev, struct device_attribute *attr,
+			     char *buf)
+{
+	struct cxl_memdev *cxlmd = to_cxl_memdev(dev);
+	struct cxl_mem *cxlm = cxlmd->cxlm;
+	unsigned long long len = range_len(&cxlm->ram_range);
+
+	return sysfs_emit(buf, "%#llx\n", len);
+}
+
+static struct device_attribute dev_attr_ram_size =
+	__ATTR(size, 0444, ram_size_show, NULL);
+
+static ssize_t pmem_size_show(struct device *dev, struct device_attribute *attr,
+			      char *buf)
+{
+	struct cxl_memdev *cxlmd = to_cxl_memdev(dev);
+	struct cxl_mem *cxlm = cxlmd->cxlm;
+	unsigned long long len = range_len(&cxlm->pmem_range);
+
+	return sysfs_emit(buf, "%#llx\n", len);
+}
+
+static struct device_attribute dev_attr_pmem_size =
+	__ATTR(size, 0444, pmem_size_show, NULL);
+
+static struct attribute *cxl_memdev_attributes[] = {
+	&dev_attr_firmware_version.attr,
+	&dev_attr_payload_max.attr,
+	&dev_attr_label_storage_size.attr,
+	NULL,
+};
+
+static struct attribute *cxl_memdev_pmem_attributes[] = {
+	&dev_attr_pmem_size.attr,
+	NULL,
+};
+
+static struct attribute *cxl_memdev_ram_attributes[] = {
+	&dev_attr_ram_size.attr,
+	NULL,
+};
+
+static struct attribute_group cxl_memdev_attribute_group = {
+	.attrs = cxl_memdev_attributes,
+};
+
+static struct attribute_group cxl_memdev_ram_attribute_group = {
+	.name = "ram",
+	.attrs = cxl_memdev_ram_attributes,
+};
+
+static struct attribute_group cxl_memdev_pmem_attribute_group = {
+	.name = "pmem",
+	.attrs = cxl_memdev_pmem_attributes,
+};
+
+static const struct attribute_group *cxl_memdev_attribute_groups[] = {
+	&cxl_memdev_attribute_group,
+	&cxl_memdev_ram_attribute_group,
+	&cxl_memdev_pmem_attribute_group,
+	NULL,
+};
+
+static const struct device_type cxl_memdev_type = {
+	.name = "cxl_memdev",
+	.release = cxl_memdev_release,
+	.devnode = cxl_memdev_devnode,
+	.groups = cxl_memdev_attribute_groups,
+};
+
+static void cxl_memdev_unregister(void *_cxlmd)
+{
+	struct cxl_memdev *cxlmd = _cxlmd;
+	struct device *dev = &cxlmd->dev;
+	struct cdev *cdev = &cxlmd->cdev;
+	const struct cdevm_file_operations *cdevm_fops;
+
+	cdevm_fops = container_of(cdev->ops, typeof(*cdevm_fops), fops);
+	cdevm_fops->shutdown(dev);
+
+	cdev_device_del(&cxlmd->cdev, dev);
+	put_device(dev);
+}
+
+static struct cxl_memdev *cxl_memdev_alloc(struct cxl_mem *cxlm,
+					   const struct file_operations *fops)
+{
+	struct pci_dev *pdev = cxlm->pdev;
+	struct cxl_memdev *cxlmd;
+	struct device *dev;
+	struct cdev *cdev;
+	int rc;
+
+	cxlmd = kzalloc(sizeof(*cxlmd), GFP_KERNEL);
+	if (!cxlmd)
+		return ERR_PTR(-ENOMEM);
+
+	rc = ida_alloc_range(&cxl_memdev_ida, 0, CXL_MEM_MAX_DEVS, GFP_KERNEL);
+	if (rc < 0)
+		goto err;
+	cxlmd->id = rc;
+
+	dev = &cxlmd->dev;
+	device_initialize(dev);
+	dev->parent = &pdev->dev;
+	dev->bus = &cxl_bus_type;
+	dev->devt = MKDEV(cxl_mem_major, cxlmd->id);
+	dev->type = &cxl_memdev_type;
+	device_set_pm_not_required(dev);
+
+	cdev = &cxlmd->cdev;
+	cdev_init(cdev, fops);
+	return cxlmd;
+
+err:
+	kfree(cxlmd);
+	return ERR_PTR(rc);
+}
+
+struct cxl_memdev *
+devm_cxl_add_memdev(struct device *host, struct cxl_mem *cxlm,
+		    const struct cdevm_file_operations *cdevm_fops)
+{
+	struct cxl_memdev *cxlmd;
+	struct device *dev;
+	struct cdev *cdev;
+	int rc;
+
+	cxlmd = cxl_memdev_alloc(cxlm, &cdevm_fops->fops);
+	if (IS_ERR(cxlmd))
+		return cxlmd;
+
+	dev = &cxlmd->dev;
+	rc = dev_set_name(dev, "mem%d", cxlmd->id);
+	if (rc)
+		goto err;
+
+	/*
+	 * Activate ioctl operations, no cxl_memdev_rwsem manipulation
+	 * needed as this is ordered with cdev_add() publishing the device.
+	 */
+	cxlmd->cxlm = cxlm;
+
+	cdev = &cxlmd->cdev;
+	rc = cdev_device_add(cdev, dev);
+	if (rc)
+		goto err;
+
+	rc = devm_add_action_or_reset(host, cxl_memdev_unregister, cxlmd);
+	if (rc)
+		return ERR_PTR(rc);
+	return cxlmd;
+
+err:
+	/*
+	 * The cdev was briefly live, shutdown any ioctl operations that
+	 * saw that state.
+	 */
+	cdevm_fops->shutdown(dev);
+	put_device(dev);
+	return ERR_PTR(rc);
+}
+EXPORT_SYMBOL_GPL(devm_cxl_add_memdev);
+
+__init int cxl_memdev_init(void)
+{
+	dev_t devt;
+	int rc;
+
+	rc = alloc_chrdev_region(&devt, 0, CXL_MEM_MAX_DEVS, "cxl");
+	if (rc)
+		return rc;
+
+	cxl_mem_major = MAJOR(devt);
+
+	return 0;
+}
+
+void cxl_memdev_exit(void)
+{
+	unregister_chrdev_region(MKDEV(cxl_mem_major, 0), CXL_MEM_MAX_DEVS);
+}
diff --git a/drivers/cxl/core/pmem.c b/drivers/cxl/core/pmem.c
new file mode 100644
index 000000000000..d24570f5b8ba
--- /dev/null
+++ b/drivers/cxl/core/pmem.c
@@ -0,0 +1,230 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/* Copyright(c) 2020 Intel Corporation. */
+#include <linux/device.h>
+#include <linux/slab.h>
+#include <cxlmem.h>
+#include <cxl.h>
+#include "core.h"
+
+/**
+ * DOC: cxl pmem
+ *
+ * The core CXL PMEM infrastructure supports persistent memory
+ * provisioning and serves as a bridge to the LIBNVDIMM subsystem. A CXL
+ * 'bridge' device is added at the root of a CXL device topology if
+ * platform firmware advertises at least one persistent memory capable
+ * CXL window. That root-level bridge corresponds to a LIBNVDIMM 'bus'
+ * device. Then for each cxl_memdev in the CXL device topology a bridge
+ * device is added to host a LIBNVDIMM dimm object. When these bridges
+ * are registered native LIBNVDIMM uapis are translated to CXL
+ * operations, for example, namespace label access commands.
+ */
+
+static void cxl_nvdimm_bridge_release(struct device *dev)
+{
+	struct cxl_nvdimm_bridge *cxl_nvb = to_cxl_nvdimm_bridge(dev);
+
+	kfree(cxl_nvb);
+}
+
+static const struct attribute_group *cxl_nvdimm_bridge_attribute_groups[] = {
+	&cxl_base_attribute_group,
+	NULL,
+};
+
+const struct device_type cxl_nvdimm_bridge_type = {
+	.name = "cxl_nvdimm_bridge",
+	.release = cxl_nvdimm_bridge_release,
+	.groups = cxl_nvdimm_bridge_attribute_groups,
+};
+
+struct cxl_nvdimm_bridge *to_cxl_nvdimm_bridge(struct device *dev)
+{
+	if (dev_WARN_ONCE(dev, dev->type != &cxl_nvdimm_bridge_type,
+			  "not a cxl_nvdimm_bridge device\n"))
+		return NULL;
+	return container_of(dev, struct cxl_nvdimm_bridge, dev);
+}
+EXPORT_SYMBOL_GPL(to_cxl_nvdimm_bridge);
+
+static struct cxl_nvdimm_bridge *
+cxl_nvdimm_bridge_alloc(struct cxl_port *port)
+{
+	struct cxl_nvdimm_bridge *cxl_nvb;
+	struct device *dev;
+
+	cxl_nvb = kzalloc(sizeof(*cxl_nvb), GFP_KERNEL);
+	if (!cxl_nvb)
+		return ERR_PTR(-ENOMEM);
+
+	dev = &cxl_nvb->dev;
+	cxl_nvb->port = port;
+	cxl_nvb->state = CXL_NVB_NEW;
+	device_initialize(dev);
+	device_set_pm_not_required(dev);
+	dev->parent = &port->dev;
+	dev->bus = &cxl_bus_type;
+	dev->type = &cxl_nvdimm_bridge_type;
+
+	return cxl_nvb;
+}
+
+static void unregister_nvb(void *_cxl_nvb)
+{
+	struct cxl_nvdimm_bridge *cxl_nvb = _cxl_nvb;
+	bool flush;
+
+	/*
+	 * If the bridge was ever activated then there might be in-flight state
+	 * work to flush. Once the state has been changed to 'dead' then no new
+	 * work can be queued by user-triggered bind.
+	 */
+	device_lock(&cxl_nvb->dev);
+	flush = cxl_nvb->state != CXL_NVB_NEW;
+	cxl_nvb->state = CXL_NVB_DEAD;
+	device_unlock(&cxl_nvb->dev);
+
+	/*
+	 * Even though the device core will trigger device_release_driver()
+	 * before the unregister, it does not know about the fact that
+	 * cxl_nvdimm_bridge_driver defers ->remove() work. So, do the driver
+	 * release not and flush it before tearing down the nvdimm device
+	 * hierarchy.
+	 */
+	device_release_driver(&cxl_nvb->dev);
+	if (flush)
+		flush_work(&cxl_nvb->state_work);
+	device_unregister(&cxl_nvb->dev);
+}
+
+/**
+ * devm_cxl_add_nvdimm_bridge() - add the root of a LIBNVDIMM topology
+ * @host: platform firmware root device
+ * @port: CXL port at the root of a CXL topology
+ *
+ * Return: bridge device that can host cxl_nvdimm objects
+ */
+struct cxl_nvdimm_bridge *devm_cxl_add_nvdimm_bridge(struct device *host,
+						     struct cxl_port *port)
+{
+	struct cxl_nvdimm_bridge *cxl_nvb;
+	struct device *dev;
+	int rc;
+
+	if (!IS_ENABLED(CONFIG_CXL_PMEM))
+		return ERR_PTR(-ENXIO);
+
+	cxl_nvb = cxl_nvdimm_bridge_alloc(port);
+	if (IS_ERR(cxl_nvb))
+		return cxl_nvb;
+
+	dev = &cxl_nvb->dev;
+	rc = dev_set_name(dev, "nvdimm-bridge");
+	if (rc)
+		goto err;
+
+	rc = device_add(dev);
+	if (rc)
+		goto err;
+
+	rc = devm_add_action_or_reset(host, unregister_nvb, cxl_nvb);
+	if (rc)
+		return ERR_PTR(rc);
+
+	return cxl_nvb;
+
+err:
+	put_device(dev);
+	return ERR_PTR(rc);
+}
+EXPORT_SYMBOL_GPL(devm_cxl_add_nvdimm_bridge);
+
+static void cxl_nvdimm_release(struct device *dev)
+{
+	struct cxl_nvdimm *cxl_nvd = to_cxl_nvdimm(dev);
+
+	kfree(cxl_nvd);
+}
+
+static const struct attribute_group *cxl_nvdimm_attribute_groups[] = {
+	&cxl_base_attribute_group,
+	NULL,
+};
+
+const struct device_type cxl_nvdimm_type = {
+	.name = "cxl_nvdimm",
+	.release = cxl_nvdimm_release,
+	.groups = cxl_nvdimm_attribute_groups,
+};
+
+bool is_cxl_nvdimm(struct device *dev)
+{
+	return dev->type == &cxl_nvdimm_type;
+}
+EXPORT_SYMBOL_GPL(is_cxl_nvdimm);
+
+struct cxl_nvdimm *to_cxl_nvdimm(struct device *dev)
+{
+	if (dev_WARN_ONCE(dev, !is_cxl_nvdimm(dev),
+			  "not a cxl_nvdimm device\n"))
+		return NULL;
+	return container_of(dev, struct cxl_nvdimm, dev);
+}
+EXPORT_SYMBOL_GPL(to_cxl_nvdimm);
+
+static struct cxl_nvdimm *cxl_nvdimm_alloc(struct cxl_memdev *cxlmd)
+{
+	struct cxl_nvdimm *cxl_nvd;
+	struct device *dev;
+
+	cxl_nvd = kzalloc(sizeof(*cxl_nvd), GFP_KERNEL);
+	if (!cxl_nvd)
+		return ERR_PTR(-ENOMEM);
+
+	dev = &cxl_nvd->dev;
+	cxl_nvd->cxlmd = cxlmd;
+	device_initialize(dev);
+	device_set_pm_not_required(dev);
+	dev->parent = &cxlmd->dev;
+	dev->bus = &cxl_bus_type;
+	dev->type = &cxl_nvdimm_type;
+
+	return cxl_nvd;
+}
+
+/**
+ * devm_cxl_add_nvdimm() - add a bridge between a cxl_memdev and an nvdimm
+ * @host: same host as @cxlmd
+ * @cxlmd: cxl_memdev instance that will perform LIBNVDIMM operations
+ *
+ * Return: 0 on success negative error code on failure.
+ */
+int devm_cxl_add_nvdimm(struct device *host, struct cxl_memdev *cxlmd)
+{
+	struct cxl_nvdimm *cxl_nvd;
+	struct device *dev;
+	int rc;
+
+	cxl_nvd = cxl_nvdimm_alloc(cxlmd);
+	if (IS_ERR(cxl_nvd))
+		return PTR_ERR(cxl_nvd);
+
+	dev = &cxl_nvd->dev;
+	rc = dev_set_name(dev, "pmem%d", cxlmd->id);
+	if (rc)
+		goto err;
+
+	rc = device_add(dev);
+	if (rc)
+		goto err;
+
+	dev_dbg(host, "%s: register %s\n", dev_name(dev->parent),
+		dev_name(dev));
+
+	return devm_add_action_or_reset(host, unregister_cxl_dev, dev);
+
+err:
+	put_device(dev);
+	return rc;
+}
+EXPORT_SYMBOL_GPL(devm_cxl_add_nvdimm);
diff --git a/drivers/cxl/core/regs.c b/drivers/cxl/core/regs.c
new file mode 100644
index 000000000000..41de4a136ecd
--- /dev/null
+++ b/drivers/cxl/core/regs.c
@@ -0,0 +1,249 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/* Copyright(c) 2020 Intel Corporation. */
+#include <linux/io-64-nonatomic-lo-hi.h>
+#include <linux/device.h>
+#include <linux/slab.h>
+#include <linux/pci.h>
+#include <cxlmem.h>
+
+/**
+ * DOC: cxl registers
+ *
+ * CXL device capabilities are enumerated by PCI DVSEC (Designated
+ * Vendor-specific) and / or descriptors provided by platform firmware.
+ * They can be defined as a set like the device and component registers
+ * mandated by CXL Section 8.1.12.2 Memory Device PCIe Capabilities and
+ * Extended Capabilities, or they can be individual capabilities
+ * appended to bridged and endpoint devices.
+ *
+ * Provide common infrastructure for enumerating and mapping these
+ * discrete capabilities.
+ */
+
+/**
+ * cxl_probe_component_regs() - Detect CXL Component register blocks
+ * @dev: Host device of the @base mapping
+ * @base: Mapping containing the HDM Decoder Capability Header
+ * @map: Map object describing the register block information found
+ *
+ * See CXL 2.0 8.2.4 Component Register Layout and Definition
+ * See CXL 2.0 8.2.5.5 CXL Device Register Interface
+ *
+ * Probe for component register information and return it in map object.
+ */
+void cxl_probe_component_regs(struct device *dev, void __iomem *base,
+			      struct cxl_component_reg_map *map)
+{
+	int cap, cap_count;
+	u64 cap_array;
+
+	*map = (struct cxl_component_reg_map) { 0 };
+
+	/*
+	 * CXL.cache and CXL.mem registers are at offset 0x1000 as defined in
+	 * CXL 2.0 8.2.4 Table 141.
+	 */
+	base += CXL_CM_OFFSET;
+
+	cap_array = readq(base + CXL_CM_CAP_HDR_OFFSET);
+
+	if (FIELD_GET(CXL_CM_CAP_HDR_ID_MASK, cap_array) != CM_CAP_HDR_CAP_ID) {
+		dev_err(dev,
+			"Couldn't locate the CXL.cache and CXL.mem capability array header./n");
+		return;
+	}
+
+	/* It's assumed that future versions will be backward compatible */
+	cap_count = FIELD_GET(CXL_CM_CAP_HDR_ARRAY_SIZE_MASK, cap_array);
+
+	for (cap = 1; cap <= cap_count; cap++) {
+		void __iomem *register_block;
+		u32 hdr;
+		int decoder_cnt;
+		u16 cap_id, offset;
+		u32 length;
+
+		hdr = readl(base + cap * 0x4);
+
+		cap_id = FIELD_GET(CXL_CM_CAP_HDR_ID_MASK, hdr);
+		offset = FIELD_GET(CXL_CM_CAP_PTR_MASK, hdr);
+		register_block = base + offset;
+
+		switch (cap_id) {
+		case CXL_CM_CAP_CAP_ID_HDM:
+			dev_dbg(dev, "found HDM decoder capability (0x%x)\n",
+				offset);
+
+			hdr = readl(register_block);
+
+			decoder_cnt = cxl_hdm_decoder_count(hdr);
+			length = 0x20 * decoder_cnt + 0x10;
+
+			map->hdm_decoder.valid = true;
+			map->hdm_decoder.offset = CXL_CM_OFFSET + offset;
+			map->hdm_decoder.size = length;
+			break;
+		default:
+			dev_dbg(dev, "Unknown CM cap ID: %d (0x%x)\n", cap_id,
+				offset);
+			break;
+		}
+	}
+}
+EXPORT_SYMBOL_GPL(cxl_probe_component_regs);
+
+/**
+ * cxl_probe_device_regs() - Detect CXL Device register blocks
+ * @dev: Host device of the @base mapping
+ * @base: Mapping of CXL 2.0 8.2.8 CXL Device Register Interface
+ * @map: Map object describing the register block information found
+ *
+ * Probe for device register information and return it in map object.
+ */
+void cxl_probe_device_regs(struct device *dev, void __iomem *base,
+			   struct cxl_device_reg_map *map)
+{
+	int cap, cap_count;
+	u64 cap_array;
+
+	*map = (struct cxl_device_reg_map){ 0 };
+
+	cap_array = readq(base + CXLDEV_CAP_ARRAY_OFFSET);
+	if (FIELD_GET(CXLDEV_CAP_ARRAY_ID_MASK, cap_array) !=
+	    CXLDEV_CAP_ARRAY_CAP_ID)
+		return;
+
+	cap_count = FIELD_GET(CXLDEV_CAP_ARRAY_COUNT_MASK, cap_array);
+
+	for (cap = 1; cap <= cap_count; cap++) {
+		u32 offset, length;
+		u16 cap_id;
+
+		cap_id = FIELD_GET(CXLDEV_CAP_HDR_CAP_ID_MASK,
+				   readl(base + cap * 0x10));
+		offset = readl(base + cap * 0x10 + 0x4);
+		length = readl(base + cap * 0x10 + 0x8);
+
+		switch (cap_id) {
+		case CXLDEV_CAP_CAP_ID_DEVICE_STATUS:
+			dev_dbg(dev, "found Status capability (0x%x)\n", offset);
+
+			map->status.valid = true;
+			map->status.offset = offset;
+			map->status.size = length;
+			break;
+		case CXLDEV_CAP_CAP_ID_PRIMARY_MAILBOX:
+			dev_dbg(dev, "found Mailbox capability (0x%x)\n", offset);
+			map->mbox.valid = true;
+			map->mbox.offset = offset;
+			map->mbox.size = length;
+			break;
+		case CXLDEV_CAP_CAP_ID_SECONDARY_MAILBOX:
+			dev_dbg(dev, "found Secondary Mailbox capability (0x%x)\n", offset);
+			break;
+		case CXLDEV_CAP_CAP_ID_MEMDEV:
+			dev_dbg(dev, "found Memory Device capability (0x%x)\n", offset);
+			map->memdev.valid = true;
+			map->memdev.offset = offset;
+			map->memdev.size = length;
+			break;
+		default:
+			if (cap_id >= 0x8000)
+				dev_dbg(dev, "Vendor cap ID: %#x offset: %#x\n", cap_id, offset);
+			else
+				dev_dbg(dev, "Unknown cap ID: %#x offset: %#x\n", cap_id, offset);
+			break;
+		}
+	}
+}
+EXPORT_SYMBOL_GPL(cxl_probe_device_regs);
+
+static void __iomem *devm_cxl_iomap_block(struct device *dev,
+					  resource_size_t addr,
+					  resource_size_t length)
+{
+	void __iomem *ret_val;
+	struct resource *res;
+
+	res = devm_request_mem_region(dev, addr, length, dev_name(dev));
+	if (!res) {
+		resource_size_t end = addr + length - 1;
+
+		dev_err(dev, "Failed to request region %pa-%pa\n", &addr, &end);
+		return NULL;
+	}
+
+	ret_val = devm_ioremap(dev, addr, length);
+	if (!ret_val)
+		dev_err(dev, "Failed to map region %pr\n", res);
+
+	return ret_val;
+}
+
+int cxl_map_component_regs(struct pci_dev *pdev,
+			   struct cxl_component_regs *regs,
+			   struct cxl_register_map *map)
+{
+	struct device *dev = &pdev->dev;
+	resource_size_t phys_addr;
+	resource_size_t length;
+
+	phys_addr = pci_resource_start(pdev, map->barno);
+	phys_addr += map->block_offset;
+
+	phys_addr += map->component_map.hdm_decoder.offset;
+	length = map->component_map.hdm_decoder.size;
+	regs->hdm_decoder = devm_cxl_iomap_block(dev, phys_addr, length);
+	if (!regs->hdm_decoder)
+		return -ENOMEM;
+
+	return 0;
+}
+EXPORT_SYMBOL_GPL(cxl_map_component_regs);
+
+int cxl_map_device_regs(struct pci_dev *pdev,
+			struct cxl_device_regs *regs,
+			struct cxl_register_map *map)
+{
+	struct device *dev = &pdev->dev;
+	resource_size_t phys_addr;
+
+	phys_addr = pci_resource_start(pdev, map->barno);
+	phys_addr += map->block_offset;
+
+	if (map->device_map.status.valid) {
+		resource_size_t addr;
+		resource_size_t length;
+
+		addr = phys_addr + map->device_map.status.offset;
+		length = map->device_map.status.size;
+		regs->status = devm_cxl_iomap_block(dev, addr, length);
+		if (!regs->status)
+			return -ENOMEM;
+	}
+
+	if (map->device_map.mbox.valid) {
+		resource_size_t addr;
+		resource_size_t length;
+
+		addr = phys_addr + map->device_map.mbox.offset;
+		length = map->device_map.mbox.size;
+		regs->mbox = devm_cxl_iomap_block(dev, addr, length);
+		if (!regs->mbox)
+			return -ENOMEM;
+	}
+
+	if (map->device_map.memdev.valid) {
+		resource_size_t addr;
+		resource_size_t length;
+
+		addr = phys_addr + map->device_map.memdev.offset;
+		length = map->device_map.memdev.size;
+		regs->memdev = devm_cxl_iomap_block(dev, addr, length);
+		if (!regs->memdev)
+			return -ENOMEM;
+	}
+
+	return 0;
+}
+EXPORT_SYMBOL_GPL(cxl_map_device_regs);
diff --git a/drivers/cxl/cxl.h b/drivers/cxl/cxl.h
index b6bda39a59e3..53927f9fa77e 100644
--- a/drivers/cxl/cxl.h
+++ b/drivers/cxl/cxl.h
@@ -140,7 +140,6 @@ struct cxl_device_reg_map {
 };
 
 struct cxl_register_map {
-	struct list_head list;
 	u64 block_offset;
 	u8 reg_type;
 	u8 barno;
diff --git a/drivers/cxl/mem.h b/drivers/cxl/cxlmem.h
index 8f02d02b26b4..6c0b1e2ea97c 100644
--- a/drivers/cxl/mem.h
+++ b/drivers/cxl/cxlmem.h
@@ -28,11 +28,20 @@
 	(FIELD_GET(CXLMDEV_RESET_NEEDED_MASK, status) !=                       \
 	 CXLMDEV_RESET_NEEDED_NOT)
 
-/*
- * An entire PCI topology full of devices should be enough for any
- * config
+/**
+ * struct cdevm_file_operations - devm coordinated cdev file operations
+ * @fops: file operations that are synchronized against @shutdown
+ * @shutdown: disconnect driver data
+ *
+ * @shutdown is invoked in the devres release path to disconnect any
+ * driver instance data from @dev. It assumes synchronization with any
+ * fops operation that requires driver data. After @shutdown an
+ * operation may only reference @device data.
  */
-#define CXL_MEM_MAX_DEVS 65536
+struct cdevm_file_operations {
+	struct file_operations fops;
+	void (*shutdown)(struct device *dev);
+};
 
 /**
  * struct cxl_memdev - CXL bus object representing a Type-3 Memory Device
@@ -48,6 +57,15 @@ struct cxl_memdev {
 	int id;
 };
 
+static inline struct cxl_memdev *to_cxl_memdev(struct device *dev)
+{
+	return container_of(dev, struct cxl_memdev, dev);
+}
+
+struct cxl_memdev *
+devm_cxl_add_memdev(struct device *host, struct cxl_mem *cxlm,
+		    const struct cdevm_file_operations *cdevm_fops);
+
 /**
  * struct cxl_mem - A CXL memory device
  * @pdev: The PCI device associated with this CXL device.
@@ -77,5 +95,14 @@ struct cxl_mem {
 
 	struct range pmem_range;
 	struct range ram_range;
+	u64 total_bytes;
+	u64 volatile_only_bytes;
+	u64 persistent_only_bytes;
+	u64 partition_align_bytes;
+
+	u64 active_volatile_bytes;
+	u64 active_persistent_bytes;
+	u64 next_volatile_bytes;
+	u64 next_persistent_bytes;
 };
 #endif /* __CXL_MEM_H__ */
diff --git a/drivers/cxl/pci.c b/drivers/cxl/pci.c
index 4cf351a3cf99..8e45aa07d662 100644
--- a/drivers/cxl/pci.c
+++ b/drivers/cxl/pci.c
@@ -12,9 +12,9 @@
 #include <linux/pci.h>
 #include <linux/io.h>
 #include <linux/io-64-nonatomic-lo-hi.h>
+#include "cxlmem.h"
 #include "pci.h"
 #include "cxl.h"
-#include "mem.h"
 
 /**
  * DOC: cxl pci
@@ -64,6 +64,15 @@ enum opcode {
 	CXL_MBOX_OP_MAX			= 0x10000
 };
 
+/*
+ * CXL 2.0 - Memory capacity multiplier
+ * See Section 8.2.9.5
+ *
+ * Volatile, Persistent, and Partition capacities are specified to be in
+ * multiples of 256MB - define a multiplier to convert to/from bytes.
+ */
+#define CXL_CAPACITY_MULTIPLIER SZ_256M
+
 /**
  * struct mbox_cmd - A command to be submitted to hardware.
  * @opcode: (input) The command set and command submitted to hardware.
@@ -94,8 +103,6 @@ struct mbox_cmd {
 #define CXL_MBOX_SUCCESS 0
 };
 
-static int cxl_mem_major;
-static DEFINE_IDA(cxl_memdev_ida);
 static DECLARE_RWSEM(cxl_memdev_rwsem);
 static struct dentry *cxl_debugfs;
 static bool cxl_raw_allow_all;
@@ -568,7 +575,7 @@ static bool cxl_mem_raw_command_allowed(u16 opcode)
 	if (!IS_ENABLED(CONFIG_CXL_MEM_RAW_COMMANDS))
 		return false;
 
-	if (security_locked_down(LOCKDOWN_NONE))
+	if (security_locked_down(LOCKDOWN_PCI_ACCESS))
 		return false;
 
 	if (cxl_raw_allow_all)
@@ -806,13 +813,25 @@ static int cxl_memdev_release_file(struct inode *inode, struct file *file)
 	return 0;
 }
 
-static const struct file_operations cxl_memdev_fops = {
-	.owner = THIS_MODULE,
-	.unlocked_ioctl = cxl_memdev_ioctl,
-	.open = cxl_memdev_open,
-	.release = cxl_memdev_release_file,
-	.compat_ioctl = compat_ptr_ioctl,
-	.llseek = noop_llseek,
+static void cxl_memdev_shutdown(struct device *dev)
+{
+	struct cxl_memdev *cxlmd = to_cxl_memdev(dev);
+
+	down_write(&cxl_memdev_rwsem);
+	cxlmd->cxlm = NULL;
+	up_write(&cxl_memdev_rwsem);
+}
+
+static const struct cdevm_file_operations cxl_memdev_fops = {
+	.fops = {
+		.owner = THIS_MODULE,
+		.unlocked_ioctl = cxl_memdev_ioctl,
+		.open = cxl_memdev_open,
+		.release = cxl_memdev_release_file,
+		.compat_ioctl = compat_ptr_ioctl,
+		.llseek = noop_llseek,
+	},
+	.shutdown = cxl_memdev_shutdown,
 };
 
 static inline struct cxl_mem_command *cxl_mem_find_command(u16 opcode)
@@ -1022,8 +1041,8 @@ static int cxl_probe_regs(struct cxl_mem *cxlm, void __iomem *base,
 		    !dev_map->memdev.valid) {
 			dev_err(dev, "registers not found: %s%s%s\n",
 				!dev_map->status.valid ? "status " : "",
-				!dev_map->mbox.valid ? "status " : "",
-				!dev_map->memdev.valid ? "status " : "");
+				!dev_map->mbox.valid ? "mbox " : "",
+				!dev_map->memdev.valid ? "memdev " : "");
 			return -ENXIO;
 		}
 
@@ -1081,9 +1100,8 @@ static int cxl_mem_setup_regs(struct cxl_mem *cxlm)
 	struct device *dev = &pdev->dev;
 	u32 regloc_size, regblocks;
 	void __iomem *base;
-	int regloc, i;
-	struct cxl_register_map *map, *n;
-	LIST_HEAD(register_maps);
+	int regloc, i, n_maps;
+	struct cxl_register_map *map, maps[CXL_REGLOC_RBI_TYPES];
 	int ret = 0;
 
 	regloc = cxl_mem_dvsec(pdev, PCI_DVSEC_ID_CXL_REGLOC_DVSEC_ID);
@@ -1102,20 +1120,12 @@ static int cxl_mem_setup_regs(struct cxl_mem *cxlm)
 	regloc += PCI_DVSEC_ID_CXL_REGLOC_BLOCK1_OFFSET;
 	regblocks = (regloc_size - PCI_DVSEC_ID_CXL_REGLOC_BLOCK1_OFFSET) / 8;
 
-	for (i = 0; i < regblocks; i++, regloc += 8) {
+	for (i = 0, n_maps = 0; i < regblocks; i++, regloc += 8) {
 		u32 reg_lo, reg_hi;
 		u8 reg_type;
 		u64 offset;
 		u8 bar;
 
-		map = kzalloc(sizeof(*map), GFP_KERNEL);
-		if (!map) {
-			ret = -ENOMEM;
-			goto free_maps;
-		}
-
-		list_add(&map->list, &register_maps);
-
 		pci_read_config_dword(pdev, regloc, &reg_lo);
 		pci_read_config_dword(pdev, regloc + 4, &reg_hi);
 
@@ -1125,12 +1135,15 @@ static int cxl_mem_setup_regs(struct cxl_mem *cxlm)
 		dev_dbg(dev, "Found register block in bar %u @ 0x%llx of type %u\n",
 			bar, offset, reg_type);
 
+		/* Ignore unknown register block types */
+		if (reg_type > CXL_REGLOC_RBI_MEMDEV)
+			continue;
+
 		base = cxl_mem_map_regblock(cxlm, bar, offset);
-		if (!base) {
-			ret = -ENOMEM;
-			goto free_maps;
-		}
+		if (!base)
+			return -ENOMEM;
 
+		map = &maps[n_maps];
 		map->barno = bar;
 		map->block_offset = offset;
 		map->reg_type = reg_type;
@@ -1141,240 +1154,22 @@ static int cxl_mem_setup_regs(struct cxl_mem *cxlm)
 		cxl_mem_unmap_regblock(cxlm, base);
 
 		if (ret)
-			goto free_maps;
+			return ret;
+
+		n_maps++;
 	}
 
 	pci_release_mem_regions(pdev);
 
-	list_for_each_entry(map, &register_maps, list) {
-		ret = cxl_map_regs(cxlm, map);
+	for (i = 0; i < n_maps; i++) {
+		ret = cxl_map_regs(cxlm, &maps[i]);
 		if (ret)
-			goto free_maps;
-	}
-
-free_maps:
-	list_for_each_entry_safe(map, n, &register_maps, list) {
-		list_del(&map->list);
-		kfree(map);
+			break;
 	}
 
 	return ret;
 }
 
-static struct cxl_memdev *to_cxl_memdev(struct device *dev)
-{
-	return container_of(dev, struct cxl_memdev, dev);
-}
-
-static void cxl_memdev_release(struct device *dev)
-{
-	struct cxl_memdev *cxlmd = to_cxl_memdev(dev);
-
-	ida_free(&cxl_memdev_ida, cxlmd->id);
-	kfree(cxlmd);
-}
-
-static char *cxl_memdev_devnode(struct device *dev, umode_t *mode, kuid_t *uid,
-				kgid_t *gid)
-{
-	return kasprintf(GFP_KERNEL, "cxl/%s", dev_name(dev));
-}
-
-static ssize_t firmware_version_show(struct device *dev,
-				     struct device_attribute *attr, char *buf)
-{
-	struct cxl_memdev *cxlmd = to_cxl_memdev(dev);
-	struct cxl_mem *cxlm = cxlmd->cxlm;
-
-	return sysfs_emit(buf, "%.16s\n", cxlm->firmware_version);
-}
-static DEVICE_ATTR_RO(firmware_version);
-
-static ssize_t payload_max_show(struct device *dev,
-				struct device_attribute *attr, char *buf)
-{
-	struct cxl_memdev *cxlmd = to_cxl_memdev(dev);
-	struct cxl_mem *cxlm = cxlmd->cxlm;
-
-	return sysfs_emit(buf, "%zu\n", cxlm->payload_size);
-}
-static DEVICE_ATTR_RO(payload_max);
-
-static ssize_t label_storage_size_show(struct device *dev,
-				struct device_attribute *attr, char *buf)
-{
-	struct cxl_memdev *cxlmd = to_cxl_memdev(dev);
-	struct cxl_mem *cxlm = cxlmd->cxlm;
-
-	return sysfs_emit(buf, "%zu\n", cxlm->lsa_size);
-}
-static DEVICE_ATTR_RO(label_storage_size);
-
-static ssize_t ram_size_show(struct device *dev, struct device_attribute *attr,
-			     char *buf)
-{
-	struct cxl_memdev *cxlmd = to_cxl_memdev(dev);
-	struct cxl_mem *cxlm = cxlmd->cxlm;
-	unsigned long long len = range_len(&cxlm->ram_range);
-
-	return sysfs_emit(buf, "%#llx\n", len);
-}
-
-static struct device_attribute dev_attr_ram_size =
-	__ATTR(size, 0444, ram_size_show, NULL);
-
-static ssize_t pmem_size_show(struct device *dev, struct device_attribute *attr,
-			      char *buf)
-{
-	struct cxl_memdev *cxlmd = to_cxl_memdev(dev);
-	struct cxl_mem *cxlm = cxlmd->cxlm;
-	unsigned long long len = range_len(&cxlm->pmem_range);
-
-	return sysfs_emit(buf, "%#llx\n", len);
-}
-
-static struct device_attribute dev_attr_pmem_size =
-	__ATTR(size, 0444, pmem_size_show, NULL);
-
-static struct attribute *cxl_memdev_attributes[] = {
-	&dev_attr_firmware_version.attr,
-	&dev_attr_payload_max.attr,
-	&dev_attr_label_storage_size.attr,
-	NULL,
-};
-
-static struct attribute *cxl_memdev_pmem_attributes[] = {
-	&dev_attr_pmem_size.attr,
-	NULL,
-};
-
-static struct attribute *cxl_memdev_ram_attributes[] = {
-	&dev_attr_ram_size.attr,
-	NULL,
-};
-
-static struct attribute_group cxl_memdev_attribute_group = {
-	.attrs = cxl_memdev_attributes,
-};
-
-static struct attribute_group cxl_memdev_ram_attribute_group = {
-	.name = "ram",
-	.attrs = cxl_memdev_ram_attributes,
-};
-
-static struct attribute_group cxl_memdev_pmem_attribute_group = {
-	.name = "pmem",
-	.attrs = cxl_memdev_pmem_attributes,
-};
-
-static const struct attribute_group *cxl_memdev_attribute_groups[] = {
-	&cxl_memdev_attribute_group,
-	&cxl_memdev_ram_attribute_group,
-	&cxl_memdev_pmem_attribute_group,
-	NULL,
-};
-
-static const struct device_type cxl_memdev_type = {
-	.name = "cxl_memdev",
-	.release = cxl_memdev_release,
-	.devnode = cxl_memdev_devnode,
-	.groups = cxl_memdev_attribute_groups,
-};
-
-static void cxl_memdev_shutdown(struct cxl_memdev *cxlmd)
-{
-	down_write(&cxl_memdev_rwsem);
-	cxlmd->cxlm = NULL;
-	up_write(&cxl_memdev_rwsem);
-}
-
-static void cxl_memdev_unregister(void *_cxlmd)
-{
-	struct cxl_memdev *cxlmd = _cxlmd;
-	struct device *dev = &cxlmd->dev;
-
-	cdev_device_del(&cxlmd->cdev, dev);
-	cxl_memdev_shutdown(cxlmd);
-	put_device(dev);
-}
-
-static struct cxl_memdev *cxl_memdev_alloc(struct cxl_mem *cxlm)
-{
-	struct pci_dev *pdev = cxlm->pdev;
-	struct cxl_memdev *cxlmd;
-	struct device *dev;
-	struct cdev *cdev;
-	int rc;
-
-	cxlmd = kzalloc(sizeof(*cxlmd), GFP_KERNEL);
-	if (!cxlmd)
-		return ERR_PTR(-ENOMEM);
-
-	rc = ida_alloc_range(&cxl_memdev_ida, 0, CXL_MEM_MAX_DEVS, GFP_KERNEL);
-	if (rc < 0)
-		goto err;
-	cxlmd->id = rc;
-
-	dev = &cxlmd->dev;
-	device_initialize(dev);
-	dev->parent = &pdev->dev;
-	dev->bus = &cxl_bus_type;
-	dev->devt = MKDEV(cxl_mem_major, cxlmd->id);
-	dev->type = &cxl_memdev_type;
-	device_set_pm_not_required(dev);
-
-	cdev = &cxlmd->cdev;
-	cdev_init(cdev, &cxl_memdev_fops);
-	return cxlmd;
-
-err:
-	kfree(cxlmd);
-	return ERR_PTR(rc);
-}
-
-static struct cxl_memdev *devm_cxl_add_memdev(struct device *host,
-					      struct cxl_mem *cxlm)
-{
-	struct cxl_memdev *cxlmd;
-	struct device *dev;
-	struct cdev *cdev;
-	int rc;
-
-	cxlmd = cxl_memdev_alloc(cxlm);
-	if (IS_ERR(cxlmd))
-		return cxlmd;
-
-	dev = &cxlmd->dev;
-	rc = dev_set_name(dev, "mem%d", cxlmd->id);
-	if (rc)
-		goto err;
-
-	/*
-	 * Activate ioctl operations, no cxl_memdev_rwsem manipulation
-	 * needed as this is ordered with cdev_add() publishing the device.
-	 */
-	cxlmd->cxlm = cxlm;
-
-	cdev = &cxlmd->cdev;
-	rc = cdev_device_add(cdev, dev);
-	if (rc)
-		goto err;
-
-	rc = devm_add_action_or_reset(host, cxl_memdev_unregister, cxlmd);
-	if (rc)
-		return ERR_PTR(rc);
-	return cxlmd;
-
-err:
-	/*
-	 * The cdev was briefly live, shutdown any ioctl operations that
-	 * saw that state.
-	 */
-	cxl_memdev_shutdown(cxlmd);
-	put_device(dev);
-	return ERR_PTR(rc);
-}
-
 static int cxl_xfer_log(struct cxl_mem *cxlm, uuid_t *uuid, u32 size, u8 *out)
 {
 	u32 remaining = size;
@@ -1469,6 +1264,53 @@ static struct cxl_mbox_get_supported_logs *cxl_get_gsl(struct cxl_mem *cxlm)
 }
 
 /**
+ * cxl_mem_get_partition_info - Get partition info
+ * @cxlm: The device to act on
+ * @active_volatile_bytes: returned active volatile capacity
+ * @active_persistent_bytes: returned active persistent capacity
+ * @next_volatile_bytes: return next volatile capacity
+ * @next_persistent_bytes: return next persistent capacity
+ *
+ * Retrieve the current partition info for the device specified.  If not 0, the
+ * 'next' values are pending and take affect on next cold reset.
+ *
+ * Return: 0 if no error: or the result of the mailbox command.
+ *
+ * See CXL @8.2.9.5.2.1 Get Partition Info
+ */
+static int cxl_mem_get_partition_info(struct cxl_mem *cxlm,
+				      u64 *active_volatile_bytes,
+				      u64 *active_persistent_bytes,
+				      u64 *next_volatile_bytes,
+				      u64 *next_persistent_bytes)
+{
+	struct cxl_mbox_get_partition_info {
+		__le64 active_volatile_cap;
+		__le64 active_persistent_cap;
+		__le64 next_volatile_cap;
+		__le64 next_persistent_cap;
+	} __packed pi;
+	int rc;
+
+	rc = cxl_mem_mbox_send_cmd(cxlm, CXL_MBOX_OP_GET_PARTITION_INFO,
+				   NULL, 0, &pi, sizeof(pi));
+	if (rc)
+		return rc;
+
+	*active_volatile_bytes = le64_to_cpu(pi.active_volatile_cap);
+	*active_persistent_bytes = le64_to_cpu(pi.active_persistent_cap);
+	*next_volatile_bytes = le64_to_cpu(pi.next_volatile_cap);
+	*next_persistent_bytes = le64_to_cpu(pi.next_volatile_cap);
+
+	*active_volatile_bytes *= CXL_CAPACITY_MULTIPLIER;
+	*active_persistent_bytes *= CXL_CAPACITY_MULTIPLIER;
+	*next_volatile_bytes *= CXL_CAPACITY_MULTIPLIER;
+	*next_persistent_bytes *= CXL_CAPACITY_MULTIPLIER;
+
+	return 0;
+}
+
+/**
  * cxl_mem_enumerate_cmds() - Enumerate commands for a device.
  * @cxlm: The device.
  *
@@ -1564,16 +1406,27 @@ static int cxl_mem_identify(struct cxl_mem *cxlm)
 	if (rc < 0)
 		return rc;
 
-	/*
-	 * TODO: enumerate DPA map, as 'ram' and 'pmem' do not alias.
-	 * For now, only the capacity is exported in sysfs
-	 */
-	cxlm->ram_range.start = 0;
-	cxlm->ram_range.end = le64_to_cpu(id.volatile_capacity) * SZ_256M - 1;
+	cxlm->total_bytes = le64_to_cpu(id.total_capacity);
+	cxlm->total_bytes *= CXL_CAPACITY_MULTIPLIER;
+
+	cxlm->volatile_only_bytes = le64_to_cpu(id.volatile_capacity);
+	cxlm->volatile_only_bytes *= CXL_CAPACITY_MULTIPLIER;
 
-	cxlm->pmem_range.start = 0;
-	cxlm->pmem_range.end =
-		le64_to_cpu(id.persistent_capacity) * SZ_256M - 1;
+	cxlm->persistent_only_bytes = le64_to_cpu(id.persistent_capacity);
+	cxlm->persistent_only_bytes *= CXL_CAPACITY_MULTIPLIER;
+
+	cxlm->partition_align_bytes = le64_to_cpu(id.partition_align);
+	cxlm->partition_align_bytes *= CXL_CAPACITY_MULTIPLIER;
+
+	dev_dbg(&cxlm->pdev->dev, "Identify Memory Device\n"
+		"     total_bytes = %#llx\n"
+		"     volatile_only_bytes = %#llx\n"
+		"     persistent_only_bytes = %#llx\n"
+		"     partition_align_bytes = %#llx\n",
+			cxlm->total_bytes,
+			cxlm->volatile_only_bytes,
+			cxlm->persistent_only_bytes,
+			cxlm->partition_align_bytes);
 
 	cxlm->lsa_size = le32_to_cpu(id.lsa_size);
 	memcpy(cxlm->firmware_version, id.fw_revision, sizeof(id.fw_revision));
@@ -1581,6 +1434,49 @@ static int cxl_mem_identify(struct cxl_mem *cxlm)
 	return 0;
 }
 
+static int cxl_mem_create_range_info(struct cxl_mem *cxlm)
+{
+	int rc;
+
+	if (cxlm->partition_align_bytes == 0) {
+		cxlm->ram_range.start = 0;
+		cxlm->ram_range.end = cxlm->volatile_only_bytes - 1;
+		cxlm->pmem_range.start = cxlm->volatile_only_bytes;
+		cxlm->pmem_range.end = cxlm->volatile_only_bytes +
+					cxlm->persistent_only_bytes - 1;
+		return 0;
+	}
+
+	rc = cxl_mem_get_partition_info(cxlm,
+					&cxlm->active_volatile_bytes,
+					&cxlm->active_persistent_bytes,
+					&cxlm->next_volatile_bytes,
+					&cxlm->next_persistent_bytes);
+	if (rc < 0) {
+		dev_err(&cxlm->pdev->dev, "Failed to query partition information\n");
+		return rc;
+	}
+
+	dev_dbg(&cxlm->pdev->dev, "Get Partition Info\n"
+		"     active_volatile_bytes = %#llx\n"
+		"     active_persistent_bytes = %#llx\n"
+		"     next_volatile_bytes = %#llx\n"
+		"     next_persistent_bytes = %#llx\n",
+			cxlm->active_volatile_bytes,
+			cxlm->active_persistent_bytes,
+			cxlm->next_volatile_bytes,
+			cxlm->next_persistent_bytes);
+
+	cxlm->ram_range.start = 0;
+	cxlm->ram_range.end = cxlm->active_volatile_bytes - 1;
+
+	cxlm->pmem_range.start = cxlm->active_volatile_bytes;
+	cxlm->pmem_range.end = cxlm->active_volatile_bytes +
+				cxlm->active_persistent_bytes - 1;
+
+	return 0;
+}
+
 static int cxl_mem_probe(struct pci_dev *pdev, const struct pci_device_id *id)
 {
 	struct cxl_memdev *cxlmd;
@@ -1611,7 +1507,11 @@ static int cxl_mem_probe(struct pci_dev *pdev, const struct pci_device_id *id)
 	if (rc)
 		return rc;
 
-	cxlmd = devm_cxl_add_memdev(&pdev->dev, cxlm);
+	rc = cxl_mem_create_range_info(cxlm);
+	if (rc)
+		return rc;
+
+	cxlmd = devm_cxl_add_memdev(&pdev->dev, cxlm, &cxl_memdev_fops);
 	if (IS_ERR(cxlmd))
 		return PTR_ERR(cxlmd);
 
@@ -1640,25 +1540,15 @@ static struct pci_driver cxl_mem_driver = {
 static __init int cxl_mem_init(void)
 {
 	struct dentry *mbox_debugfs;
-	dev_t devt;
 	int rc;
 
 	/* Double check the anonymous union trickery in struct cxl_regs */
 	BUILD_BUG_ON(offsetof(struct cxl_regs, memdev) !=
 		     offsetof(struct cxl_regs, device_regs.memdev));
 
-	rc = alloc_chrdev_region(&devt, 0, CXL_MEM_MAX_DEVS, "cxl");
-	if (rc)
-		return rc;
-
-	cxl_mem_major = MAJOR(devt);
-
 	rc = pci_register_driver(&cxl_mem_driver);
-	if (rc) {
-		unregister_chrdev_region(MKDEV(cxl_mem_major, 0),
-					 CXL_MEM_MAX_DEVS);
+	if (rc)
 		return rc;
-	}
 
 	cxl_debugfs = debugfs_create_dir("cxl", NULL);
 	mbox_debugfs = debugfs_create_dir("mbox", cxl_debugfs);
@@ -1672,7 +1562,6 @@ static __exit void cxl_mem_exit(void)
 {
 	debugfs_remove_recursive(cxl_debugfs);
 	pci_unregister_driver(&cxl_mem_driver);
-	unregister_chrdev_region(MKDEV(cxl_mem_major, 0), CXL_MEM_MAX_DEVS);
 }
 
 MODULE_LICENSE("GPL v2");
diff --git a/drivers/cxl/pci.h b/drivers/cxl/pci.h
index dad7a831f65f..8c1a58813816 100644
--- a/drivers/cxl/pci.h
+++ b/drivers/cxl/pci.h
@@ -25,6 +25,7 @@
 #define CXL_REGLOC_RBI_COMPONENT 1
 #define CXL_REGLOC_RBI_VIRT 2
 #define CXL_REGLOC_RBI_MEMDEV 3
+#define CXL_REGLOC_RBI_TYPES CXL_REGLOC_RBI_MEMDEV + 1
 
 #define CXL_REGLOC_ADDR_MASK GENMASK(31, 16)
 
diff --git a/drivers/cxl/pmem.c b/drivers/cxl/pmem.c
index 0088e41dd2f3..9652c3ee41e7 100644
--- a/drivers/cxl/pmem.c
+++ b/drivers/cxl/pmem.c
@@ -6,7 +6,7 @@
 #include <linux/ndctl.h>
 #include <linux/async.h>
 #include <linux/slab.h>
-#include "mem.h"
+#include "cxlmem.h"
 #include "cxl.h"
 
 /*
diff --git a/drivers/dax/kmem.c b/drivers/dax/kmem.c
index ac231cc36359..a37622060fff 100644
--- a/drivers/dax/kmem.c
+++ b/drivers/dax/kmem.c
@@ -37,15 +37,16 @@ static int dax_kmem_range(struct dev_dax *dev_dax, int i, struct range *r)
 
 struct dax_kmem_data {
 	const char *res_name;
+	int mgid;
 	struct resource *res[];
 };
 
 static int dev_dax_kmem_probe(struct dev_dax *dev_dax)
 {
 	struct device *dev = &dev_dax->dev;
+	unsigned long total_len = 0;
 	struct dax_kmem_data *data;
-	int rc = -ENOMEM;
-	int i, mapped = 0;
+	int i, rc, mapped = 0;
 	int numa_node;
 
 	/*
@@ -61,24 +62,44 @@ static int dev_dax_kmem_probe(struct dev_dax *dev_dax)
 		return -EINVAL;
 	}
 
+	for (i = 0; i < dev_dax->nr_range; i++) {
+		struct range range;
+
+		rc = dax_kmem_range(dev_dax, i, &range);
+		if (rc) {
+			dev_info(dev, "mapping%d: %#llx-%#llx too small after alignment\n",
+					i, range.start, range.end);
+			continue;
+		}
+		total_len += range_len(&range);
+	}
+
+	if (!total_len) {
+		dev_warn(dev, "rejecting DAX region without any memory after alignment\n");
+		return -EINVAL;
+	}
+
 	data = kzalloc(struct_size(data, res, dev_dax->nr_range), GFP_KERNEL);
 	if (!data)
 		return -ENOMEM;
 
+	rc = -ENOMEM;
 	data->res_name = kstrdup(dev_name(dev), GFP_KERNEL);
 	if (!data->res_name)
 		goto err_res_name;
 
+	rc = memory_group_register_static(numa_node, total_len);
+	if (rc < 0)
+		goto err_reg_mgid;
+	data->mgid = rc;
+
 	for (i = 0; i < dev_dax->nr_range; i++) {
 		struct resource *res;
 		struct range range;
 
 		rc = dax_kmem_range(dev_dax, i, &range);
-		if (rc) {
-			dev_info(dev, "mapping%d: %#llx-%#llx too small after alignment\n",
-					i, range.start, range.end);
+		if (rc)
 			continue;
-		}
 
 		/* Region is permanently reserved if hotremove fails. */
 		res = request_mem_region(range.start, range_len(&range), data->res_name);
@@ -108,8 +129,8 @@ static int dev_dax_kmem_probe(struct dev_dax *dev_dax)
 		 * Ensure that future kexec'd kernels will not treat
 		 * this as RAM automatically.
 		 */
-		rc = add_memory_driver_managed(numa_node, range.start,
-				range_len(&range), kmem_name, MHP_NONE);
+		rc = add_memory_driver_managed(data->mgid, range.start,
+				range_len(&range), kmem_name, MHP_NID_IS_MGID);
 
 		if (rc) {
 			dev_warn(dev, "mapping%d: %#llx-%#llx memory add failed\n",
@@ -129,6 +150,8 @@ static int dev_dax_kmem_probe(struct dev_dax *dev_dax)
 	return 0;
 
 err_request_mem:
+	memory_group_unregister(data->mgid);
+err_reg_mgid:
 	kfree(data->res_name);
 err_res_name:
 	kfree(data);
@@ -156,8 +179,7 @@ static void dev_dax_kmem_remove(struct dev_dax *dev_dax)
 		if (rc)
 			continue;
 
-		rc = remove_memory(dev_dax->target_node, range.start,
-				range_len(&range));
+		rc = remove_memory(range.start, range_len(&range));
 		if (rc == 0) {
 			release_resource(data->res[i]);
 			kfree(data->res[i]);
@@ -172,6 +194,7 @@ static void dev_dax_kmem_remove(struct dev_dax *dev_dax)
 	}
 
 	if (success >= dev_dax->nr_range) {
+		memory_group_unregister(data->mgid);
 		kfree(data->res_name);
 		kfree(data);
 		dev_set_drvdata(dev, NULL);
diff --git a/drivers/dax/super.c b/drivers/dax/super.c
index 44736cbd446e..fc89e91beea7 100644
--- a/drivers/dax/super.c
+++ b/drivers/dax/super.c
@@ -17,6 +17,24 @@
 #include <linux/fs.h>
 #include "dax-private.h"
 
+/**
+ * struct dax_device - anchor object for dax services
+ * @inode: core vfs
+ * @cdev: optional character interface for "device dax"
+ * @host: optional name for lookups where the device path is not available
+ * @private: dax driver private data
+ * @flags: state and boolean properties
+ */
+struct dax_device {
+	struct hlist_node list;
+	struct inode inode;
+	struct cdev cdev;
+	const char *host;
+	void *private;
+	unsigned long flags;
+	const struct dax_operations *ops;
+};
+
 static dev_t dax_devt;
 DEFINE_STATIC_SRCU(dax_srcu);
 static struct vfsmount *dax_mnt;
@@ -40,6 +58,42 @@ void dax_read_unlock(int id)
 }
 EXPORT_SYMBOL_GPL(dax_read_unlock);
 
+static int dax_host_hash(const char *host)
+{
+	return hashlen_hash(hashlen_string("DAX", host)) % DAX_HASH_SIZE;
+}
+
+/**
+ * dax_get_by_host() - temporary lookup mechanism for filesystem-dax
+ * @host: alternate name for the device registered by a dax driver
+ */
+static struct dax_device *dax_get_by_host(const char *host)
+{
+	struct dax_device *dax_dev, *found = NULL;
+	int hash, id;
+
+	if (!host)
+		return NULL;
+
+	hash = dax_host_hash(host);
+
+	id = dax_read_lock();
+	spin_lock(&dax_host_lock);
+	hlist_for_each_entry(dax_dev, &dax_host_list[hash], list) {
+		if (!dax_alive(dax_dev)
+				|| strcmp(host, dax_dev->host) != 0)
+			continue;
+
+		if (igrab(&dax_dev->inode))
+			found = dax_dev;
+		break;
+	}
+	spin_unlock(&dax_host_lock);
+	dax_read_unlock(id);
+
+	return found;
+}
+
 #ifdef CONFIG_BLOCK
 #include <linux/blkdev.h>
 
@@ -65,15 +119,13 @@ struct dax_device *fs_dax_get_by_bdev(struct block_device *bdev)
 	return dax_get_by_host(bdev->bd_disk->disk_name);
 }
 EXPORT_SYMBOL_GPL(fs_dax_get_by_bdev);
-#endif
 
-bool __generic_fsdax_supported(struct dax_device *dax_dev,
+bool generic_fsdax_supported(struct dax_device *dax_dev,
 		struct block_device *bdev, int blocksize, sector_t start,
 		sector_t sectors)
 {
 	bool dax_enabled = false;
 	pgoff_t pgoff, pgoff_end;
-	char buf[BDEVNAME_SIZE];
 	void *kaddr, *end_kaddr;
 	pfn_t pfn, end_pfn;
 	sector_t last_page;
@@ -81,29 +133,25 @@ bool __generic_fsdax_supported(struct dax_device *dax_dev,
 	int err, id;
 
 	if (blocksize != PAGE_SIZE) {
-		pr_info("%s: error: unsupported blocksize for dax\n",
-				bdevname(bdev, buf));
+		pr_info("%pg: error: unsupported blocksize for dax\n", bdev);
 		return false;
 	}
 
 	if (!dax_dev) {
-		pr_debug("%s: error: dax unsupported by block device\n",
-				bdevname(bdev, buf));
+		pr_debug("%pg: error: dax unsupported by block device\n", bdev);
 		return false;
 	}
 
 	err = bdev_dax_pgoff(bdev, start, PAGE_SIZE, &pgoff);
 	if (err) {
-		pr_info("%s: error: unaligned partition for dax\n",
-				bdevname(bdev, buf));
+		pr_info("%pg: error: unaligned partition for dax\n", bdev);
 		return false;
 	}
 
 	last_page = PFN_DOWN((start + sectors - 1) * 512) * PAGE_SIZE / 512;
 	err = bdev_dax_pgoff(bdev, last_page, PAGE_SIZE, &pgoff_end);
 	if (err) {
-		pr_info("%s: error: unaligned partition for dax\n",
-				bdevname(bdev, buf));
+		pr_info("%pg: error: unaligned partition for dax\n", bdev);
 		return false;
 	}
 
@@ -112,8 +160,8 @@ bool __generic_fsdax_supported(struct dax_device *dax_dev,
 	len2 = dax_direct_access(dax_dev, pgoff_end, 1, &end_kaddr, &end_pfn);
 
 	if (len < 1 || len2 < 1) {
-		pr_info("%s: error: dax access failed (%ld)\n",
-				bdevname(bdev, buf), len < 1 ? len : len2);
+		pr_info("%pg: error: dax access failed (%ld)\n",
+				bdev, len < 1 ? len : len2);
 		dax_read_unlock(id);
 		return false;
 	}
@@ -147,57 +195,32 @@ bool __generic_fsdax_supported(struct dax_device *dax_dev,
 	dax_read_unlock(id);
 
 	if (!dax_enabled) {
-		pr_info("%s: error: dax support not enabled\n",
-				bdevname(bdev, buf));
+		pr_info("%pg: error: dax support not enabled\n", bdev);
 		return false;
 	}
 	return true;
 }
-EXPORT_SYMBOL_GPL(__generic_fsdax_supported);
+EXPORT_SYMBOL_GPL(generic_fsdax_supported);
 
-/**
- * __bdev_dax_supported() - Check if the device supports dax for filesystem
- * @bdev: block device to check
- * @blocksize: The block size of the device
- *
- * This is a library function for filesystems to check if the block device
- * can be mounted with dax option.
- *
- * Return: true if supported, false if unsupported
- */
-bool __bdev_dax_supported(struct block_device *bdev, int blocksize)
+bool dax_supported(struct dax_device *dax_dev, struct block_device *bdev,
+		int blocksize, sector_t start, sector_t len)
 {
-	struct dax_device *dax_dev;
-	struct request_queue *q;
-	char buf[BDEVNAME_SIZE];
-	bool ret;
+	bool ret = false;
 	int id;
 
-	q = bdev_get_queue(bdev);
-	if (!q || !blk_queue_dax(q)) {
-		pr_debug("%s: error: request queue doesn't support dax\n",
-				bdevname(bdev, buf));
-		return false;
-	}
-
-	dax_dev = dax_get_by_host(bdev->bd_disk->disk_name);
-	if (!dax_dev) {
-		pr_debug("%s: error: device does not support dax\n",
-				bdevname(bdev, buf));
+	if (!dax_dev)
 		return false;
-	}
 
 	id = dax_read_lock();
-	ret = dax_supported(dax_dev, bdev, blocksize, 0,
-			i_size_read(bdev->bd_inode) / 512);
+	if (dax_alive(dax_dev) && dax_dev->ops->dax_supported)
+		ret = dax_dev->ops->dax_supported(dax_dev, bdev, blocksize,
+						  start, len);
 	dax_read_unlock(id);
-
-	put_dax(dax_dev);
-
 	return ret;
 }
-EXPORT_SYMBOL_GPL(__bdev_dax_supported);
-#endif
+EXPORT_SYMBOL_GPL(dax_supported);
+#endif /* CONFIG_FS_DAX */
+#endif /* CONFIG_BLOCK */
 
 enum dax_device_flags {
 	/* !alive + rcu grace period == no new operations / mappings */
@@ -208,24 +231,6 @@ enum dax_device_flags {
 	DAXDEV_SYNC,
 };
 
-/**
- * struct dax_device - anchor object for dax services
- * @inode: core vfs
- * @cdev: optional character interface for "device dax"
- * @host: optional name for lookups where the device path is not available
- * @private: dax driver private data
- * @flags: state and boolean properties
- */
-struct dax_device {
-	struct hlist_node list;
-	struct inode inode;
-	struct cdev cdev;
-	const char *host;
-	void *private;
-	unsigned long flags;
-	const struct dax_operations *ops;
-};
-
 static ssize_t write_cache_show(struct device *dev,
 		struct device_attribute *attr, char *buf)
 {
@@ -323,19 +328,6 @@ long dax_direct_access(struct dax_device *dax_dev, pgoff_t pgoff, long nr_pages,
 }
 EXPORT_SYMBOL_GPL(dax_direct_access);
 
-bool dax_supported(struct dax_device *dax_dev, struct block_device *bdev,
-		int blocksize, sector_t start, sector_t len)
-{
-	if (!dax_dev)
-		return false;
-
-	if (!dax_alive(dax_dev))
-		return false;
-
-	return dax_dev->ops->dax_supported(dax_dev, bdev, blocksize, start, len);
-}
-EXPORT_SYMBOL_GPL(dax_supported);
-
 size_t dax_copy_from_iter(struct dax_device *dax_dev, pgoff_t pgoff, void *addr,
 		size_t bytes, struct iov_iter *i)
 {
@@ -423,11 +415,6 @@ bool dax_alive(struct dax_device *dax_dev)
 }
 EXPORT_SYMBOL_GPL(dax_alive);
 
-static int dax_host_hash(const char *host)
-{
-	return hashlen_hash(hashlen_string("DAX", host)) % DAX_HASH_SIZE;
-}
-
 /*
  * Note, rcu is not protecting the liveness of dax_dev, rcu is ensuring
  * that any fault handlers or operations that might have seen
@@ -625,38 +612,6 @@ void put_dax(struct dax_device *dax_dev)
 EXPORT_SYMBOL_GPL(put_dax);
 
 /**
- * dax_get_by_host() - temporary lookup mechanism for filesystem-dax
- * @host: alternate name for the device registered by a dax driver
- */
-struct dax_device *dax_get_by_host(const char *host)
-{
-	struct dax_device *dax_dev, *found = NULL;
-	int hash, id;
-
-	if (!host)
-		return NULL;
-
-	hash = dax_host_hash(host);
-
-	id = dax_read_lock();
-	spin_lock(&dax_host_lock);
-	hlist_for_each_entry(dax_dev, &dax_host_list[hash], list) {
-		if (!dax_alive(dax_dev)
-				|| strcmp(host, dax_dev->host) != 0)
-			continue;
-
-		if (igrab(&dax_dev->inode))
-			found = dax_dev;
-		break;
-	}
-	spin_unlock(&dax_host_lock);
-	dax_read_unlock(id);
-
-	return found;
-}
-EXPORT_SYMBOL_GPL(dax_get_by_host);
-
-/**
  * inode_dax: convert a public inode into its dax_dev
  * @inode: An inode with i_cdev pointing to a dax_dev
  *
diff --git a/drivers/devfreq/devfreq.c b/drivers/devfreq/devfreq.c
index 28f3e0ba6cdd..85faa7a5c7d1 100644
--- a/drivers/devfreq/devfreq.c
+++ b/drivers/devfreq/devfreq.c
@@ -27,6 +27,7 @@
 #include <linux/hrtimer.h>
 #include <linux/of.h>
 #include <linux/pm_qos.h>
+#include <linux/units.h>
 #include "governor.h"
 
 #define CREATE_TRACE_POINTS
@@ -34,7 +35,6 @@
 
 #define IS_SUPPORTED_FLAG(f, name) ((f & DEVFREQ_GOV_FLAG_##name) ? true : false)
 #define IS_SUPPORTED_ATTR(f, name) ((f & DEVFREQ_GOV_ATTR_##name) ? true : false)
-#define HZ_PER_KHZ	1000
 
 static struct class *devfreq_class;
 static struct dentry *devfreq_debugfs;
diff --git a/drivers/dma-buf/Kconfig b/drivers/dma-buf/Kconfig
index 9561e3d2d428..541efe01abc7 100644
--- a/drivers/dma-buf/Kconfig
+++ b/drivers/dma-buf/Kconfig
@@ -42,6 +42,7 @@ config UDMABUF
 config DMABUF_MOVE_NOTIFY
 	bool "Move notify between drivers (EXPERIMENTAL)"
 	default n
+	depends on DMA_SHARED_BUFFER
 	help
 	  Don't pin buffers if the dynamic DMA-buf interface is available on
 	  both the exporter as well as the importer. This fixes a security
@@ -52,6 +53,7 @@ config DMABUF_MOVE_NOTIFY
 
 config DMABUF_DEBUG
 	bool "DMA-BUF debug checks"
+	depends on DMA_SHARED_BUFFER
 	default y if DMA_API_DEBUG
 	help
 	  This option enables additional checks for DMA-BUF importers and
@@ -74,7 +76,7 @@ menuconfig DMABUF_HEAPS
 
 menuconfig DMABUF_SYSFS_STATS
 	bool "DMA-BUF sysfs statistics"
-	select DMA_SHARED_BUFFER
+	depends on DMA_SHARED_BUFFER
 	help
 	   Choose this option to enable DMA-BUF sysfs statistics
 	   in location /sys/kernel/dmabuf/buffers.
diff --git a/drivers/dma/Kconfig b/drivers/dma/Kconfig
index 39b5b46e880f..80c2c03cb014 100644
--- a/drivers/dma/Kconfig
+++ b/drivers/dma/Kconfig
@@ -277,10 +277,15 @@ config INTEL_IDMA64
 	  Enable DMA support for Intel Low Power Subsystem such as found on
 	  Intel Skylake PCH.
 
+config INTEL_IDXD_BUS
+	tristate
+	default INTEL_IDXD
+
 config INTEL_IDXD
 	tristate "Intel Data Accelerators support"
-	depends on PCI && X86_64
+	depends on PCI && X86_64 && !UML
 	depends on PCI_MSI
+	depends on PCI_PASID
 	depends on SBITMAP
 	select DMA_ENGINE
 	help
@@ -291,6 +296,23 @@ config INTEL_IDXD
 
 	  If unsure, say N.
 
+config INTEL_IDXD_COMPAT
+	bool "Legacy behavior for idxd driver"
+	depends on PCI && X86_64
+	select INTEL_IDXD_BUS
+	help
+	  Compatible driver to support old /sys/bus/dsa/drivers/dsa behavior.
+	  The old behavior performed driver bind/unbind for device and wq
+	  devices all under the dsa driver. The compat driver will emulate
+	  the legacy behavior in order to allow existing support apps (i.e.
+	  accel-config) to continue function. It is expected that accel-config
+	  v3.2 and earlier will need the compat mode. A distro with later
+	  accel-config version can disable this compat config.
+
+	  Say Y if you have old applications that require such behavior.
+
+	  If unsure, say N.
+
 # Config symbol that collects all the dependencies that's necessary to
 # support shared virtual memory for the devices supported by idxd.
 config INTEL_IDXD_SVM
@@ -315,7 +337,7 @@ config INTEL_IDXD_PERFMON
 
 config INTEL_IOATDMA
 	tristate "Intel I/OAT DMA support"
-	depends on PCI && X86_64
+	depends on PCI && X86_64 && !UML
 	select DMA_ENGINE
 	select DMA_ENGINE_RAID
 	select DCA
@@ -716,6 +738,8 @@ source "drivers/dma/bestcomm/Kconfig"
 
 source "drivers/dma/mediatek/Kconfig"
 
+source "drivers/dma/ptdma/Kconfig"
+
 source "drivers/dma/qcom/Kconfig"
 
 source "drivers/dma/dw/Kconfig"
diff --git a/drivers/dma/Makefile b/drivers/dma/Makefile
index aa69094e3547..616d926cf2a5 100644
--- a/drivers/dma/Makefile
+++ b/drivers/dma/Makefile
@@ -16,6 +16,7 @@ obj-$(CONFIG_DMATEST) += dmatest.o
 obj-$(CONFIG_ALTERA_MSGDMA) += altera-msgdma.o
 obj-$(CONFIG_AMBA_PL08X) += amba-pl08x.o
 obj-$(CONFIG_AMCC_PPC440SPE_ADMA) += ppc4xx/
+obj-$(CONFIG_AMD_PTDMA) += ptdma/
 obj-$(CONFIG_AT_HDMAC) += at_hdmac.o
 obj-$(CONFIG_AT_XDMAC) += at_xdmac.o
 obj-$(CONFIG_AXI_DMAC) += dma-axi-dmac.o
@@ -41,7 +42,7 @@ obj-$(CONFIG_IMX_DMA) += imx-dma.o
 obj-$(CONFIG_IMX_SDMA) += imx-sdma.o
 obj-$(CONFIG_INTEL_IDMA64) += idma64.o
 obj-$(CONFIG_INTEL_IOATDMA) += ioat/
-obj-$(CONFIG_INTEL_IDXD) += idxd/
+obj-y += idxd/
 obj-$(CONFIG_INTEL_IOP_ADMA) += iop-adma.o
 obj-$(CONFIG_K3_DMA) += k3dma.o
 obj-$(CONFIG_LPC18XX_DMAMUX) += lpc18xx-dmamux.o
diff --git a/drivers/dma/acpi-dma.c b/drivers/dma/acpi-dma.c
index 235f1396f968..5906eae26e2a 100644
--- a/drivers/dma/acpi-dma.c
+++ b/drivers/dma/acpi-dma.c
@@ -70,10 +70,22 @@ static int acpi_dma_parse_resource_group(const struct acpi_csrt_group *grp,
 
 	si = (const struct acpi_csrt_shared_info *)&grp[1];
 
-	/* Match device by MMIO and IRQ */
+	/* Match device by MMIO */
 	if (si->mmio_base_low != lower_32_bits(mem) ||
-	    si->mmio_base_high != upper_32_bits(mem) ||
-	    si->gsi_interrupt != irq)
+	    si->mmio_base_high != upper_32_bits(mem))
+		return 0;
+
+	/*
+	 * acpi_gsi_to_irq() can't be used because some platforms do not save
+	 * registered IRQs in the MP table. Instead we just try to register
+	 * the GSI, which is the core part of the above mentioned function.
+	 */
+	ret = acpi_register_gsi(NULL, si->gsi_interrupt, si->interrupt_mode, si->interrupt_polarity);
+	if (ret < 0)
+		return 0;
+
+	/* Match device by Linux vIRQ */
+	if (ret != irq)
 		return 0;
 
 	dev_dbg(&adev->dev, "matches with %.4s%04X (rev %u)\n",
diff --git a/drivers/dma/altera-msgdma.c b/drivers/dma/altera-msgdma.c
index 0fe0676f8e1d..5a2c7573b692 100644
--- a/drivers/dma/altera-msgdma.c
+++ b/drivers/dma/altera-msgdma.c
@@ -691,10 +691,14 @@ static void msgdma_tasklet(struct tasklet_struct *t)
 
 	spin_lock_irqsave(&mdev->lock, flags);
 
-	/* Read number of responses that are available */
-	count = ioread32(mdev->csr + MSGDMA_CSR_RESP_FILL_LEVEL);
-	dev_dbg(mdev->dev, "%s (%d): response count=%d\n",
-		__func__, __LINE__, count);
+	if (mdev->resp) {
+		/* Read number of responses that are available */
+		count = ioread32(mdev->csr + MSGDMA_CSR_RESP_FILL_LEVEL);
+		dev_dbg(mdev->dev, "%s (%d): response count=%d\n",
+			__func__, __LINE__, count);
+	} else {
+		count = 1;
+	}
 
 	while (count--) {
 		/*
@@ -703,8 +707,12 @@ static void msgdma_tasklet(struct tasklet_struct *t)
 		 * have any real values, like transferred bytes or error
 		 * bits. So we need to just drop these values.
 		 */
-		size = ioread32(mdev->resp + MSGDMA_RESP_BYTES_TRANSFERRED);
-		status = ioread32(mdev->resp + MSGDMA_RESP_STATUS);
+		if (mdev->resp) {
+			size = ioread32(mdev->resp +
+					MSGDMA_RESP_BYTES_TRANSFERRED);
+			status = ioread32(mdev->resp +
+					MSGDMA_RESP_STATUS);
+		}
 
 		msgdma_complete_descriptor(mdev);
 		msgdma_chan_desc_cleanup(mdev);
@@ -757,14 +765,21 @@ static void msgdma_dev_remove(struct msgdma_device *mdev)
 }
 
 static int request_and_map(struct platform_device *pdev, const char *name,
-			   struct resource **res, void __iomem **ptr)
+			   struct resource **res, void __iomem **ptr,
+			   bool optional)
 {
 	struct resource *region;
 	struct device *device = &pdev->dev;
 
 	*res = platform_get_resource_byname(pdev, IORESOURCE_MEM, name);
 	if (*res == NULL) {
-		dev_err(device, "resource %s not defined\n", name);
+		if (optional) {
+			*ptr = NULL;
+			dev_info(device, "optional resource %s not defined\n",
+				 name);
+			return 0;
+		}
+		dev_err(device, "mandatory resource %s not defined\n", name);
 		return -ENODEV;
 	}
 
@@ -805,17 +820,17 @@ static int msgdma_probe(struct platform_device *pdev)
 	mdev->dev = &pdev->dev;
 
 	/* Map CSR space */
-	ret = request_and_map(pdev, "csr", &dma_res, &mdev->csr);
+	ret = request_and_map(pdev, "csr", &dma_res, &mdev->csr, false);
 	if (ret)
 		return ret;
 
 	/* Map (extended) descriptor space */
-	ret = request_and_map(pdev, "desc", &dma_res, &mdev->desc);
+	ret = request_and_map(pdev, "desc", &dma_res, &mdev->desc, false);
 	if (ret)
 		return ret;
 
 	/* Map response space */
-	ret = request_and_map(pdev, "resp", &dma_res, &mdev->resp);
+	ret = request_and_map(pdev, "resp", &dma_res, &mdev->resp, true);
 	if (ret)
 		return ret;
 
diff --git a/drivers/dma/at_xdmac.c b/drivers/dma/at_xdmac.c
index 64a52bf4d737..ab78e0f6afd7 100644
--- a/drivers/dma/at_xdmac.c
+++ b/drivers/dma/at_xdmac.c
@@ -2240,10 +2240,16 @@ static struct platform_driver at_xdmac_driver = {
 
 static int __init at_xdmac_init(void)
 {
-	return platform_driver_probe(&at_xdmac_driver, at_xdmac_probe);
+	return platform_driver_register(&at_xdmac_driver);
 }
 subsys_initcall(at_xdmac_init);
 
+static void __exit at_xdmac_exit(void)
+{
+	platform_driver_unregister(&at_xdmac_driver);
+}
+module_exit(at_xdmac_exit);
+
 MODULE_DESCRIPTION("Atmel Extended DMA Controller driver");
 MODULE_AUTHOR("Ludovic Desroches <ludovic.desroches@atmel.com>");
 MODULE_LICENSE("GPL");
diff --git a/drivers/dma/dw-axi-dmac/dw-axi-dmac-platform.c b/drivers/dma/dw-axi-dmac/dw-axi-dmac-platform.c
index d9e4ac3edb4e..35993ab92154 100644
--- a/drivers/dma/dw-axi-dmac/dw-axi-dmac-platform.c
+++ b/drivers/dma/dw-axi-dmac/dw-axi-dmac-platform.c
@@ -363,12 +363,16 @@ static void axi_chan_block_xfer_start(struct axi_dma_chan *chan,
 			DWAXIDMAC_TT_FC_MEM_TO_PER_DST :
 			DWAXIDMAC_TT_FC_MEM_TO_PER_DMAC)
 			<< CH_CFG_H_TT_FC_POS;
+		if (chan->chip->apb_regs)
+			reg |= (chan->id << CH_CFG_H_DST_PER_POS);
 		break;
 	case DMA_DEV_TO_MEM:
 		reg |= (chan->config.device_fc ?
 			DWAXIDMAC_TT_FC_PER_TO_MEM_SRC :
 			DWAXIDMAC_TT_FC_PER_TO_MEM_DMAC)
 			<< CH_CFG_H_TT_FC_POS;
+		if (chan->chip->apb_regs)
+			reg |= (chan->id << CH_CFG_H_SRC_PER_POS);
 		break;
 	default:
 		break;
@@ -470,18 +474,13 @@ static void dma_chan_free_chan_resources(struct dma_chan *dchan)
 	pm_runtime_put(chan->chip->dev);
 }
 
-static void dw_axi_dma_set_hw_channel(struct axi_dma_chip *chip,
-				      u32 handshake_num, bool set)
+static void dw_axi_dma_set_hw_channel(struct axi_dma_chan *chan, bool set)
 {
-	unsigned long start = 0;
-	unsigned long reg_value;
-	unsigned long reg_mask;
-	unsigned long reg_set;
-	unsigned long mask;
-	unsigned long val;
+	struct axi_dma_chip *chip = chan->chip;
+	unsigned long reg_value, val;
 
 	if (!chip->apb_regs) {
-		dev_dbg(chip->dev, "apb_regs not initialized\n");
+		dev_err(chip->dev, "apb_regs not initialized\n");
 		return;
 	}
 
@@ -490,26 +489,22 @@ static void dw_axi_dma_set_hw_channel(struct axi_dma_chip *chip,
 	 * Lock the DMA channel by assign a handshake number to the channel.
 	 * Unlock the DMA channel by assign 0x3F to the channel.
 	 */
-	if (set) {
-		reg_set = UNUSED_CHANNEL;
-		val = handshake_num;
-	} else {
-		reg_set = handshake_num;
+	if (set)
+		val = chan->hw_handshake_num;
+	else
 		val = UNUSED_CHANNEL;
-	}
 
 	reg_value = lo_hi_readq(chip->apb_regs + DMAC_APB_HW_HS_SEL_0);
 
-	for_each_set_clump8(start, reg_mask, &reg_value, 64) {
-		if (reg_mask == reg_set) {
-			mask = GENMASK_ULL(start + 7, start);
-			reg_value &= ~mask;
-			reg_value |= rol64(val, start);
-			lo_hi_writeq(reg_value,
-				     chip->apb_regs + DMAC_APB_HW_HS_SEL_0);
-			break;
-		}
-	}
+	/* Channel is already allocated, set handshake as per channel ID */
+	/* 64 bit write should handle for 8 channels */
+
+	reg_value &= ~(DMA_APB_HS_SEL_MASK <<
+			(chan->id * DMA_APB_HS_SEL_BIT_SIZE));
+	reg_value |= (val << (chan->id * DMA_APB_HS_SEL_BIT_SIZE));
+	lo_hi_writeq(reg_value, chip->apb_regs + DMAC_APB_HW_HS_SEL_0);
+
+	return;
 }
 
 /*
@@ -742,7 +737,7 @@ dw_axi_dma_chan_prep_cyclic(struct dma_chan *dchan, dma_addr_t dma_addr,
 		llp = hw_desc->llp;
 	} while (total_segments);
 
-	dw_axi_dma_set_hw_channel(chan->chip, chan->hw_handshake_num, true);
+	dw_axi_dma_set_hw_channel(chan, true);
 
 	return vchan_tx_prep(&chan->vc, &desc->vd, flags);
 
@@ -822,7 +817,7 @@ dw_axi_dma_chan_prep_slave_sg(struct dma_chan *dchan, struct scatterlist *sgl,
 		llp = hw_desc->llp;
 	} while (num_sgs);
 
-	dw_axi_dma_set_hw_channel(chan->chip, chan->hw_handshake_num, true);
+	dw_axi_dma_set_hw_channel(chan, true);
 
 	return vchan_tx_prep(&chan->vc, &desc->vd, flags);
 
@@ -1098,8 +1093,7 @@ static int dma_chan_terminate_all(struct dma_chan *dchan)
 			 "%s failed to stop\n", axi_chan_name(chan));
 
 	if (chan->direction != DMA_MEM_TO_MEM)
-		dw_axi_dma_set_hw_channel(chan->chip,
-					  chan->hw_handshake_num, false);
+		dw_axi_dma_set_hw_channel(chan, false);
 	if (chan->direction == DMA_MEM_TO_DEV)
 		dw_axi_dma_set_byte_halfword(chan, false);
 
@@ -1296,7 +1290,7 @@ static int parse_device_properties(struct axi_dma_chip *chip)
 			return -EINVAL;
 
 		chip->dw->hdata->restrict_axi_burst_len = true;
-		chip->dw->hdata->axi_rw_burst_len = tmp - 1;
+		chip->dw->hdata->axi_rw_burst_len = tmp;
 	}
 
 	return 0;
@@ -1365,7 +1359,6 @@ static int dw_probe(struct platform_device *pdev)
 	if (ret)
 		return ret;
 
-
 	INIT_LIST_HEAD(&dw->dma.channels);
 	for (i = 0; i < hdata->nr_channels; i++) {
 		struct axi_dma_chan *chan = &dw->chan[i];
@@ -1386,6 +1379,7 @@ static int dw_probe(struct platform_device *pdev)
 
 	/* DMA capabilities */
 	dw->dma.chancnt = hdata->nr_channels;
+	dw->dma.max_burst = hdata->axi_rw_burst_len;
 	dw->dma.src_addr_widths = AXI_DMA_BUSWIDTHS;
 	dw->dma.dst_addr_widths = AXI_DMA_BUSWIDTHS;
 	dw->dma.directions = BIT(DMA_MEM_TO_MEM);
diff --git a/drivers/dma/dw-axi-dmac/dw-axi-dmac.h b/drivers/dma/dw-axi-dmac/dw-axi-dmac.h
index b69897887c76..380005afde16 100644
--- a/drivers/dma/dw-axi-dmac/dw-axi-dmac.h
+++ b/drivers/dma/dw-axi-dmac/dw-axi-dmac.h
@@ -184,6 +184,8 @@ static inline struct axi_dma_chan *dchan_to_axi_dma_chan(struct dma_chan *dchan)
 #define DMAC_APB_HALFWORD_WR_CH_EN	0x020 /* DMAC Halfword write enables */
 
 #define UNUSED_CHANNEL		0x3F /* Set unused DMA channel to 0x3F */
+#define DMA_APB_HS_SEL_BIT_SIZE	0x08 /* HW handshake bits per channel */
+#define DMA_APB_HS_SEL_MASK	0xFF /* HW handshake select masks */
 #define MAX_BLOCK_SIZE		0x1000 /* 1024 blocks * 4 bytes data width */
 
 /* DMAC_CFG */
@@ -256,6 +258,8 @@ enum {
 
 /* CH_CFG_H */
 #define CH_CFG_H_PRIORITY_POS		17
+#define CH_CFG_H_DST_PER_POS		12
+#define CH_CFG_H_SRC_PER_POS		7
 #define CH_CFG_H_HS_SEL_DST_POS		4
 #define CH_CFG_H_HS_SEL_SRC_POS		3
 enum {
diff --git a/drivers/dma/dw/idma32.c b/drivers/dma/dw/idma32.c
index 3ce44de25d33..58f4078d83fe 100644
--- a/drivers/dma/dw/idma32.c
+++ b/drivers/dma/dw/idma32.c
@@ -1,15 +1,144 @@
 // SPDX-License-Identifier: GPL-2.0
-// Copyright (C) 2013,2018 Intel Corporation
+// Copyright (C) 2013,2018,2020-2021 Intel Corporation
 
 #include <linux/bitops.h>
 #include <linux/dmaengine.h>
 #include <linux/errno.h>
+#include <linux/io.h>
+#include <linux/pci.h>
 #include <linux/slab.h>
 #include <linux/types.h>
 
 #include "internal.h"
 
-static void idma32_initialize_chan(struct dw_dma_chan *dwc)
+#define DMA_CTL_CH(x)			(0x1000 + (x) * 4)
+#define DMA_SRC_ADDR_FILLIN(x)		(0x1100 + (x) * 4)
+#define DMA_DST_ADDR_FILLIN(x)		(0x1200 + (x) * 4)
+#define DMA_XBAR_SEL(x)			(0x1300 + (x) * 4)
+#define DMA_REGACCESS_CHID_CFG		(0x1400)
+
+#define CTL_CH_TRANSFER_MODE_MASK	GENMASK(1, 0)
+#define CTL_CH_TRANSFER_MODE_S2S	0
+#define CTL_CH_TRANSFER_MODE_S2D	1
+#define CTL_CH_TRANSFER_MODE_D2S	2
+#define CTL_CH_TRANSFER_MODE_D2D	3
+#define CTL_CH_RD_RS_MASK		GENMASK(4, 3)
+#define CTL_CH_WR_RS_MASK		GENMASK(6, 5)
+#define CTL_CH_RD_NON_SNOOP_BIT		BIT(8)
+#define CTL_CH_WR_NON_SNOOP_BIT		BIT(9)
+
+#define XBAR_SEL_DEVID_MASK		GENMASK(15, 0)
+#define XBAR_SEL_RX_TX_BIT		BIT(16)
+#define XBAR_SEL_RX_TX_SHIFT		16
+
+#define REGACCESS_CHID_MASK		GENMASK(2, 0)
+
+static unsigned int idma32_get_slave_devfn(struct dw_dma_chan *dwc)
+{
+	struct device *slave = dwc->chan.slave;
+
+	if (!slave || !dev_is_pci(slave))
+		return 0;
+
+	return to_pci_dev(slave)->devfn;
+}
+
+static void idma32_initialize_chan_xbar(struct dw_dma_chan *dwc)
+{
+	struct dw_dma *dw = to_dw_dma(dwc->chan.device);
+	void __iomem *misc = __dw_regs(dw);
+	u32 cfghi = 0, cfglo = 0;
+	u8 dst_id, src_id;
+	u32 value;
+
+	/* DMA Channel ID Configuration register must be programmed first */
+	value = readl(misc + DMA_REGACCESS_CHID_CFG);
+
+	value &= ~REGACCESS_CHID_MASK;
+	value |= dwc->chan.chan_id;
+
+	writel(value, misc + DMA_REGACCESS_CHID_CFG);
+
+	/* Configure channel attributes */
+	value = readl(misc + DMA_CTL_CH(dwc->chan.chan_id));
+
+	value &= ~(CTL_CH_RD_NON_SNOOP_BIT | CTL_CH_WR_NON_SNOOP_BIT);
+	value &= ~(CTL_CH_RD_RS_MASK | CTL_CH_WR_RS_MASK);
+	value &= ~CTL_CH_TRANSFER_MODE_MASK;
+
+	switch (dwc->direction) {
+	case DMA_MEM_TO_DEV:
+		value |= CTL_CH_TRANSFER_MODE_D2S;
+		value |= CTL_CH_WR_NON_SNOOP_BIT;
+		break;
+	case DMA_DEV_TO_MEM:
+		value |= CTL_CH_TRANSFER_MODE_S2D;
+		value |= CTL_CH_RD_NON_SNOOP_BIT;
+		break;
+	default:
+		/*
+		 * Memory-to-Memory and Device-to-Device are ignored for now.
+		 *
+		 * For Memory-to-Memory transfers we would need to set mode
+		 * and disable snooping on both sides.
+		 */
+		return;
+	}
+
+	writel(value, misc + DMA_CTL_CH(dwc->chan.chan_id));
+
+	/* Configure crossbar selection */
+	value = readl(misc + DMA_XBAR_SEL(dwc->chan.chan_id));
+
+	/* DEVFN selection */
+	value &= ~XBAR_SEL_DEVID_MASK;
+	value |= idma32_get_slave_devfn(dwc);
+
+	switch (dwc->direction) {
+	case DMA_MEM_TO_DEV:
+		value |= XBAR_SEL_RX_TX_BIT;
+		break;
+	case DMA_DEV_TO_MEM:
+		value &= ~XBAR_SEL_RX_TX_BIT;
+		break;
+	default:
+		/* Memory-to-Memory and Device-to-Device are ignored for now */
+		return;
+	}
+
+	writel(value, misc + DMA_XBAR_SEL(dwc->chan.chan_id));
+
+	/* Configure DMA channel low and high registers */
+	switch (dwc->direction) {
+	case DMA_MEM_TO_DEV:
+		dst_id = dwc->chan.chan_id;
+		src_id = dwc->dws.src_id;
+		break;
+	case DMA_DEV_TO_MEM:
+		dst_id = dwc->dws.dst_id;
+		src_id = dwc->chan.chan_id;
+		break;
+	default:
+		/* Memory-to-Memory and Device-to-Device are ignored for now */
+		return;
+	}
+
+	/* Set default burst alignment */
+	cfglo |= IDMA32C_CFGL_DST_BURST_ALIGN | IDMA32C_CFGL_SRC_BURST_ALIGN;
+
+	/* Low 4 bits of the request lines */
+	cfghi |= IDMA32C_CFGH_DST_PER(dst_id & 0xf);
+	cfghi |= IDMA32C_CFGH_SRC_PER(src_id & 0xf);
+
+	/* Request line extension (2 bits) */
+	cfghi |= IDMA32C_CFGH_DST_PER_EXT(dst_id >> 4 & 0x3);
+	cfghi |= IDMA32C_CFGH_SRC_PER_EXT(src_id >> 4 & 0x3);
+
+	channel_writel(dwc, CFG_LO, cfglo);
+	channel_writel(dwc, CFG_HI, cfghi);
+}
+
+static void idma32_initialize_chan_generic(struct dw_dma_chan *dwc)
 {
 	u32 cfghi = 0;
 	u32 cfglo = 0;
@@ -134,7 +263,10 @@ int idma32_dma_probe(struct dw_dma_chip *chip)
 		return -ENOMEM;
 
 	/* Channel operations */
-	dw->initialize_chan = idma32_initialize_chan;
+	if (chip->pdata->quirks & DW_DMA_QUIRK_XBAR_PRESENT)
+		dw->initialize_chan = idma32_initialize_chan_xbar;
+	else
+		dw->initialize_chan = idma32_initialize_chan_generic;
 	dw->suspend_chan = idma32_suspend_chan;
 	dw->resume_chan = idma32_resume_chan;
 	dw->prepare_ctllo = idma32_prepare_ctllo;
diff --git a/drivers/dma/dw/internal.h b/drivers/dma/dw/internal.h
index 2e1c52eefdeb..563ce73488db 100644
--- a/drivers/dma/dw/internal.h
+++ b/drivers/dma/dw/internal.h
@@ -74,4 +74,20 @@ static __maybe_unused const struct dw_dma_chip_pdata idma32_chip_pdata = {
 	.remove = idma32_dma_remove,
 };
 
+static const struct dw_dma_platform_data xbar_pdata = {
+	.nr_channels = 8,
+	.chan_allocation_order = CHAN_ALLOCATION_ASCENDING,
+	.chan_priority = CHAN_PRIORITY_ASCENDING,
+	.block_size = 131071,
+	.nr_masters = 1,
+	.data_width = {4},
+	.quirks = DW_DMA_QUIRK_XBAR_PRESENT,
+};
+
+static __maybe_unused const struct dw_dma_chip_pdata xbar_chip_pdata = {
+	.pdata = &xbar_pdata,
+	.probe = idma32_dma_probe,
+	.remove = idma32_dma_remove,
+};
+
 #endif /* _DMA_DW_INTERNAL_H */
diff --git a/drivers/dma/dw/of.c b/drivers/dma/dw/of.c
index c1cf7675b9d1..523ca806837c 100644
--- a/drivers/dma/dw/of.c
+++ b/drivers/dma/dw/of.c
@@ -50,15 +50,10 @@ struct dw_dma_platform_data *dw_dma_parse_dt(struct platform_device *pdev)
 {
 	struct device_node *np = pdev->dev.of_node;
 	struct dw_dma_platform_data *pdata;
-	u32 tmp, arr[DW_DMA_MAX_NR_MASTERS], mb[DW_DMA_MAX_NR_CHANNELS];
+	u32 tmp, arr[DW_DMA_MAX_NR_MASTERS];
 	u32 nr_masters;
 	u32 nr_channels;
 
-	if (!np) {
-		dev_err(&pdev->dev, "Missing DT data\n");
-		return NULL;
-	}
-
 	if (of_property_read_u32(np, "dma-masters", &nr_masters))
 		return NULL;
 	if (nr_masters < 1 || nr_masters > DW_DMA_MAX_NR_MASTERS)
@@ -76,41 +71,29 @@ struct dw_dma_platform_data *dw_dma_parse_dt(struct platform_device *pdev)
 	pdata->nr_masters = nr_masters;
 	pdata->nr_channels = nr_channels;
 
-	if (!of_property_read_u32(np, "chan_allocation_order", &tmp))
-		pdata->chan_allocation_order = (unsigned char)tmp;
+	of_property_read_u32(np, "chan_allocation_order", &pdata->chan_allocation_order);
+	of_property_read_u32(np, "chan_priority", &pdata->chan_priority);
 
-	if (!of_property_read_u32(np, "chan_priority", &tmp))
-		pdata->chan_priority = tmp;
+	of_property_read_u32(np, "block_size", &pdata->block_size);
 
-	if (!of_property_read_u32(np, "block_size", &tmp))
-		pdata->block_size = tmp;
-
-	if (!of_property_read_u32_array(np, "data-width", arr, nr_masters)) {
-		for (tmp = 0; tmp < nr_masters; tmp++)
-			pdata->data_width[tmp] = arr[tmp];
-	} else if (!of_property_read_u32_array(np, "data_width", arr, nr_masters)) {
+	/* Try deprecated property first */
+	if (!of_property_read_u32_array(np, "data_width", arr, nr_masters)) {
 		for (tmp = 0; tmp < nr_masters; tmp++)
 			pdata->data_width[tmp] = BIT(arr[tmp] & 0x07);
 	}
 
-	if (!of_property_read_u32_array(np, "multi-block", mb, nr_channels)) {
-		for (tmp = 0; tmp < nr_channels; tmp++)
-			pdata->multi_block[tmp] = mb[tmp];
-	} else {
-		for (tmp = 0; tmp < nr_channels; tmp++)
-			pdata->multi_block[tmp] = 1;
-	}
+	/* If "data_width" and "data-width" both provided use the latter one */
+	of_property_read_u32_array(np, "data-width", pdata->data_width, nr_masters);
 
-	if (of_property_read_u32_array(np, "snps,max-burst-len", pdata->max_burst,
-				       nr_channels)) {
-		memset32(pdata->max_burst, DW_DMA_MAX_BURST, nr_channels);
-	}
+	memset32(pdata->multi_block, 1, nr_channels);
+	of_property_read_u32_array(np, "multi-block", pdata->multi_block, nr_channels);
 
-	if (!of_property_read_u32(np, "snps,dma-protection-control", &tmp)) {
-		if (tmp > CHAN_PROTCTL_MASK)
-			return NULL;
-		pdata->protctl = tmp;
-	}
+	memset32(pdata->max_burst, DW_DMA_MAX_BURST, nr_channels);
+	of_property_read_u32_array(np, "snps,max-burst-len", pdata->max_burst, nr_channels);
+
+	of_property_read_u32(np, "snps,dma-protection-control", &pdata->protctl);
+	if (pdata->protctl > CHAN_PROTCTL_MASK)
+		return NULL;
 
 	return pdata;
 }
diff --git a/drivers/dma/dw/pci.c b/drivers/dma/dw/pci.c
index 1142aa6f8c4a..26a3f926da02 100644
--- a/drivers/dma/dw/pci.c
+++ b/drivers/dma/dw/pci.c
@@ -120,9 +120,9 @@ static const struct pci_device_id dw_pci_id_table[] = {
 	{ PCI_VDEVICE(INTEL, 0x22c0), (kernel_ulong_t)&dw_dma_chip_pdata },
 
 	/* Elkhart Lake iDMA 32-bit (PSE DMA) */
-	{ PCI_VDEVICE(INTEL, 0x4bb4), (kernel_ulong_t)&idma32_chip_pdata },
-	{ PCI_VDEVICE(INTEL, 0x4bb5), (kernel_ulong_t)&idma32_chip_pdata },
-	{ PCI_VDEVICE(INTEL, 0x4bb6), (kernel_ulong_t)&idma32_chip_pdata },
+	{ PCI_VDEVICE(INTEL, 0x4bb4), (kernel_ulong_t)&xbar_chip_pdata },
+	{ PCI_VDEVICE(INTEL, 0x4bb5), (kernel_ulong_t)&xbar_chip_pdata },
+	{ PCI_VDEVICE(INTEL, 0x4bb6), (kernel_ulong_t)&xbar_chip_pdata },
 
 	/* Haswell */
 	{ PCI_VDEVICE(INTEL, 0x9c60), (kernel_ulong_t)&dw_dma_chip_pdata },
diff --git a/drivers/dma/dw/platform.c b/drivers/dma/dw/platform.c
index 0585d749d935..246118955877 100644
--- a/drivers/dma/dw/platform.c
+++ b/drivers/dma/dw/platform.c
@@ -149,9 +149,9 @@ static const struct acpi_device_id dw_dma_acpi_id_table[] = {
 	{ "808622C0", (kernel_ulong_t)&dw_dma_chip_pdata },
 
 	/* Elkhart Lake iDMA 32-bit (PSE DMA) */
-	{ "80864BB4", (kernel_ulong_t)&idma32_chip_pdata },
-	{ "80864BB5", (kernel_ulong_t)&idma32_chip_pdata },
-	{ "80864BB6", (kernel_ulong_t)&idma32_chip_pdata },
+	{ "80864BB4", (kernel_ulong_t)&xbar_chip_pdata },
+	{ "80864BB5", (kernel_ulong_t)&xbar_chip_pdata },
+	{ "80864BB6", (kernel_ulong_t)&xbar_chip_pdata },
 
 	{ }
 };
diff --git a/drivers/dma/ep93xx_dma.c b/drivers/dma/ep93xx_dma.c
index 01027779beb8..98f9ee70362e 100644
--- a/drivers/dma/ep93xx_dma.c
+++ b/drivers/dma/ep93xx_dma.c
@@ -897,7 +897,7 @@ static int ep93xx_dma_alloc_chan_resources(struct dma_chan *chan)
 	if (data && data->name)
 		name = data->name;
 
-	ret = clk_enable(edmac->clk);
+	ret = clk_prepare_enable(edmac->clk);
 	if (ret)
 		return ret;
 
@@ -936,7 +936,7 @@ static int ep93xx_dma_alloc_chan_resources(struct dma_chan *chan)
 fail_free_irq:
 	free_irq(edmac->irq, edmac);
 fail_clk_disable:
-	clk_disable(edmac->clk);
+	clk_disable_unprepare(edmac->clk);
 
 	return ret;
 }
@@ -969,7 +969,7 @@ static void ep93xx_dma_free_chan_resources(struct dma_chan *chan)
 	list_for_each_entry_safe(desc, d, &list, node)
 		kfree(desc);
 
-	clk_disable(edmac->clk);
+	clk_disable_unprepare(edmac->clk);
 	free_irq(edmac->irq, edmac);
 }
 
diff --git a/drivers/dma/fsl-dpaa2-qdma/dpaa2-qdma.c b/drivers/dma/fsl-dpaa2-qdma/dpaa2-qdma.c
index 4ae057922ef1..8dd40d00a672 100644
--- a/drivers/dma/fsl-dpaa2-qdma/dpaa2-qdma.c
+++ b/drivers/dma/fsl-dpaa2-qdma/dpaa2-qdma.c
@@ -291,9 +291,8 @@ static void dpaa2_qdma_issue_pending(struct dma_chan *chan)
 
 		err = dpaa2_io_service_enqueue_fq(NULL, dpaa2_chan->fqid, fd);
 		if (err) {
-			list_del(&dpaa2_comp->list);
-			list_add_tail(&dpaa2_comp->list,
-				      &dpaa2_chan->comp_free);
+			list_move_tail(&dpaa2_comp->list,
+				       &dpaa2_chan->comp_free);
 		}
 	}
 err_enqueue:
@@ -626,8 +625,7 @@ static void dpaa2_qdma_free_desc(struct virt_dma_desc *vdesc)
 	dpaa2_comp = to_fsl_qdma_comp(vdesc);
 	qchan = dpaa2_comp->qchan;
 	spin_lock_irqsave(&qchan->queue_lock, flags);
-	list_del(&dpaa2_comp->list);
-	list_add_tail(&dpaa2_comp->list, &qchan->comp_free);
+	list_move_tail(&dpaa2_comp->list, &qchan->comp_free);
 	spin_unlock_irqrestore(&qchan->queue_lock, flags);
 }
 
@@ -703,7 +701,7 @@ static int dpaa2_qdma_probe(struct fsl_mc_device *dpdmai_dev)
 	/* DPDMAI enable */
 	err = dpdmai_enable(priv->mc_io, 0, dpdmai_dev->mc_handle);
 	if (err) {
-		dev_err(dev, "dpdmai_enable() faile\n");
+		dev_err(dev, "dpdmai_enable() failed\n");
 		goto err_enable;
 	}
 
diff --git a/drivers/dma/hisi_dma.c b/drivers/dma/hisi_dma.c
index a259ee010e9b..c855a0e4f9ff 100644
--- a/drivers/dma/hisi_dma.c
+++ b/drivers/dma/hisi_dma.c
@@ -133,11 +133,6 @@ static inline void hisi_dma_update_bit(void __iomem *addr, u32 pos, bool val)
 	writel_relaxed(tmp, addr);
 }
 
-static void hisi_dma_free_irq_vectors(void *data)
-{
-	pci_free_irq_vectors(data);
-}
-
 static void hisi_dma_pause_dma(struct hisi_dma_dev *hdma_dev, u32 index,
 			       bool pause)
 {
@@ -544,6 +539,7 @@ static int hisi_dma_probe(struct pci_dev *pdev, const struct pci_device_id *id)
 	pci_set_drvdata(pdev, hdma_dev);
 	pci_set_master(pdev);
 
+	/* This will be freed by 'pcim_release()'. See 'pcim_enable_device()' */
 	ret = pci_alloc_irq_vectors(pdev, HISI_DMA_MSI_NUM, HISI_DMA_MSI_NUM,
 				    PCI_IRQ_MSI);
 	if (ret < 0) {
@@ -551,10 +547,6 @@ static int hisi_dma_probe(struct pci_dev *pdev, const struct pci_device_id *id)
 		return ret;
 	}
 
-	ret = devm_add_action_or_reset(dev, hisi_dma_free_irq_vectors, pdev);
-	if (ret)
-		return ret;
-
 	dma_dev = &hdma_dev->dma_dev;
 	dma_cap_set(DMA_MEMCPY, dma_dev->cap_mask);
 	dma_dev->device_free_chan_resources = hisi_dma_free_chan_resources;
diff --git a/drivers/dma/idxd/Makefile b/drivers/dma/idxd/Makefile
index 6d11558756f8..a1e9f2b3a37c 100644
--- a/drivers/dma/idxd/Makefile
+++ b/drivers/dma/idxd/Makefile
@@ -1,4 +1,12 @@
+ccflags-y += -DDEFAULT_SYMBOL_NAMESPACE=IDXD
+
 obj-$(CONFIG_INTEL_IDXD) += idxd.o
 idxd-y := init.o irq.o device.o sysfs.o submit.o dma.o cdev.o
 
 idxd-$(CONFIG_INTEL_IDXD_PERFMON) += perfmon.o
+
+obj-$(CONFIG_INTEL_IDXD_BUS) += idxd_bus.o
+idxd_bus-y := bus.o
+
+obj-$(CONFIG_INTEL_IDXD_COMPAT) += idxd_compat.o
+idxd_compat-y := compat.o
diff --git a/drivers/dma/idxd/bus.c b/drivers/dma/idxd/bus.c
new file mode 100644
index 000000000000..6f84621053c6
--- /dev/null
+++ b/drivers/dma/idxd/bus.c
@@ -0,0 +1,91 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright(c) 2021 Intel Corporation. All rights rsvd. */
+#include <linux/init.h>
+#include <linux/kernel.h>
+#include <linux/module.h>
+#include <linux/device.h>
+#include "idxd.h"
+
+
+int __idxd_driver_register(struct idxd_device_driver *idxd_drv, struct module *owner,
+			   const char *mod_name)
+{
+	struct device_driver *drv = &idxd_drv->drv;
+
+	if (!idxd_drv->type) {
+		pr_debug("driver type not set (%ps)\n", __builtin_return_address(0));
+		return -EINVAL;
+	}
+
+	drv->name = idxd_drv->name;
+	drv->bus = &dsa_bus_type;
+	drv->owner = owner;
+	drv->mod_name = mod_name;
+
+	return driver_register(drv);
+}
+EXPORT_SYMBOL_GPL(__idxd_driver_register);
+
+void idxd_driver_unregister(struct idxd_device_driver *idxd_drv)
+{
+	driver_unregister(&idxd_drv->drv);
+}
+EXPORT_SYMBOL_GPL(idxd_driver_unregister);
+
+static int idxd_config_bus_match(struct device *dev,
+				 struct device_driver *drv)
+{
+	struct idxd_device_driver *idxd_drv =
+		container_of(drv, struct idxd_device_driver, drv);
+	struct idxd_dev *idxd_dev = confdev_to_idxd_dev(dev);
+	int i = 0;
+
+	while (idxd_drv->type[i] != IDXD_DEV_NONE) {
+		if (idxd_dev->type == idxd_drv->type[i])
+			return 1;
+		i++;
+	}
+
+	return 0;
+}
+
+static int idxd_config_bus_probe(struct device *dev)
+{
+	struct idxd_device_driver *idxd_drv =
+		container_of(dev->driver, struct idxd_device_driver, drv);
+	struct idxd_dev *idxd_dev = confdev_to_idxd_dev(dev);
+
+	return idxd_drv->probe(idxd_dev);
+}
+
+static void idxd_config_bus_remove(struct device *dev)
+{
+	struct idxd_device_driver *idxd_drv =
+		container_of(dev->driver, struct idxd_device_driver, drv);
+	struct idxd_dev *idxd_dev = confdev_to_idxd_dev(dev);
+
+	idxd_drv->remove(idxd_dev);
+}
+
+struct bus_type dsa_bus_type = {
+	.name = "dsa",
+	.match = idxd_config_bus_match,
+	.probe = idxd_config_bus_probe,
+	.remove = idxd_config_bus_remove,
+};
+EXPORT_SYMBOL_GPL(dsa_bus_type);
+
+static int __init dsa_bus_init(void)
+{
+	return bus_register(&dsa_bus_type);
+}
+module_init(dsa_bus_init);
+
+static void __exit dsa_bus_exit(void)
+{
+	bus_unregister(&dsa_bus_type);
+}
+module_exit(dsa_bus_exit);
+
+MODULE_DESCRIPTION("IDXD driver dsa_bus_type driver");
+MODULE_LICENSE("GPL v2");
diff --git a/drivers/dma/idxd/cdev.c b/drivers/dma/idxd/cdev.c
index e9def577c697..b9b2b4a4124e 100644
--- a/drivers/dma/idxd/cdev.c
+++ b/drivers/dma/idxd/cdev.c
@@ -41,7 +41,7 @@ struct idxd_user_context {
 
 static void idxd_cdev_dev_release(struct device *dev)
 {
-	struct idxd_cdev *idxd_cdev = container_of(dev, struct idxd_cdev, dev);
+	struct idxd_cdev *idxd_cdev = dev_to_cdev(dev);
 	struct idxd_cdev_context *cdev_ctx;
 	struct idxd_wq *wq = idxd_cdev->wq;
 
@@ -218,14 +218,13 @@ static __poll_t idxd_cdev_poll(struct file *filp,
 	struct idxd_user_context *ctx = filp->private_data;
 	struct idxd_wq *wq = ctx->wq;
 	struct idxd_device *idxd = wq->idxd;
-	unsigned long flags;
 	__poll_t out = 0;
 
 	poll_wait(filp, &wq->err_queue, wait);
-	spin_lock_irqsave(&idxd->dev_lock, flags);
+	spin_lock(&idxd->dev_lock);
 	if (idxd->sw_err.valid)
 		out = EPOLLIN | EPOLLRDNORM;
-	spin_unlock_irqrestore(&idxd->dev_lock, flags);
+	spin_unlock(&idxd->dev_lock);
 
 	return out;
 }
@@ -256,9 +255,10 @@ int idxd_wq_add_cdev(struct idxd_wq *wq)
 	if (!idxd_cdev)
 		return -ENOMEM;
 
+	idxd_cdev->idxd_dev.type = IDXD_DEV_CDEV;
 	idxd_cdev->wq = wq;
 	cdev = &idxd_cdev->cdev;
-	dev = &idxd_cdev->dev;
+	dev = cdev_dev(idxd_cdev);
 	cdev_ctx = &ictx[wq->idxd->data->type];
 	minor = ida_simple_get(&cdev_ctx->minor_ida, 0, MINORMASK, GFP_KERNEL);
 	if (minor < 0) {
@@ -268,7 +268,7 @@ int idxd_wq_add_cdev(struct idxd_wq *wq)
 	idxd_cdev->minor = minor;
 
 	device_initialize(dev);
-	dev->parent = &wq->conf_dev;
+	dev->parent = wq_confdev(wq);
 	dev->bus = &dsa_bus_type;
 	dev->type = &idxd_cdev_device_type;
 	dev->devt = MKDEV(MAJOR(cdev_ctx->devt), minor);
@@ -299,10 +299,67 @@ void idxd_wq_del_cdev(struct idxd_wq *wq)
 
 	idxd_cdev = wq->idxd_cdev;
 	wq->idxd_cdev = NULL;
-	cdev_device_del(&idxd_cdev->cdev, &idxd_cdev->dev);
-	put_device(&idxd_cdev->dev);
+	cdev_device_del(&idxd_cdev->cdev, cdev_dev(idxd_cdev));
+	put_device(cdev_dev(idxd_cdev));
 }
 
+static int idxd_user_drv_probe(struct idxd_dev *idxd_dev)
+{
+	struct idxd_wq *wq = idxd_dev_to_wq(idxd_dev);
+	struct idxd_device *idxd = wq->idxd;
+	int rc;
+
+	if (idxd->state != IDXD_DEV_ENABLED)
+		return -ENXIO;
+
+	mutex_lock(&wq->wq_lock);
+	wq->type = IDXD_WQT_USER;
+	rc = __drv_enable_wq(wq);
+	if (rc < 0)
+		goto err;
+
+	rc = idxd_wq_add_cdev(wq);
+	if (rc < 0) {
+		idxd->cmd_status = IDXD_SCMD_CDEV_ERR;
+		goto err_cdev;
+	}
+
+	idxd->cmd_status = 0;
+	mutex_unlock(&wq->wq_lock);
+	return 0;
+
+err_cdev:
+	__drv_disable_wq(wq);
+err:
+	wq->type = IDXD_WQT_NONE;
+	mutex_unlock(&wq->wq_lock);
+	return rc;
+}
+
+static void idxd_user_drv_remove(struct idxd_dev *idxd_dev)
+{
+	struct idxd_wq *wq = idxd_dev_to_wq(idxd_dev);
+
+	mutex_lock(&wq->wq_lock);
+	idxd_wq_del_cdev(wq);
+	__drv_disable_wq(wq);
+	wq->type = IDXD_WQT_NONE;
+	mutex_unlock(&wq->wq_lock);
+}
+
+static enum idxd_dev_type dev_types[] = {
+	IDXD_DEV_WQ,
+	IDXD_DEV_NONE,
+};
+
+struct idxd_device_driver idxd_user_drv = {
+	.probe = idxd_user_drv_probe,
+	.remove = idxd_user_drv_remove,
+	.name = "user",
+	.type = dev_types,
+};
+EXPORT_SYMBOL_GPL(idxd_user_drv);
+
 int idxd_cdev_register(void)
 {
 	int rc, i;
diff --git a/drivers/dma/idxd/compat.c b/drivers/dma/idxd/compat.c
new file mode 100644
index 000000000000..3df21615f888
--- /dev/null
+++ b/drivers/dma/idxd/compat.c
@@ -0,0 +1,107 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright(c) 2021 Intel Corporation. All rights rsvd. */
+#include <linux/init.h>
+#include <linux/kernel.h>
+#include <linux/module.h>
+#include <linux/device.h>
+#include <linux/device/bus.h>
+#include "idxd.h"
+
+extern int device_driver_attach(struct device_driver *drv, struct device *dev);
+extern void device_driver_detach(struct device *dev);
+
+#define DRIVER_ATTR_IGNORE_LOCKDEP(_name, _mode, _show, _store)	\
+	struct driver_attribute driver_attr_##_name =		\
+	__ATTR_IGNORE_LOCKDEP(_name, _mode, _show, _store)
+
+static ssize_t unbind_store(struct device_driver *drv, const char *buf, size_t count)
+{
+	struct bus_type *bus = drv->bus;
+	struct device *dev;
+	int rc = -ENODEV;
+
+	dev = bus_find_device_by_name(bus, NULL, buf);
+	if (dev && dev->driver) {
+		device_driver_detach(dev);
+		rc = count;
+	}
+
+	return rc;
+}
+static DRIVER_ATTR_IGNORE_LOCKDEP(unbind, 0200, NULL, unbind_store);
+
+static ssize_t bind_store(struct device_driver *drv, const char *buf, size_t count)
+{
+	struct bus_type *bus = drv->bus;
+	struct device *dev;
+	struct device_driver *alt_drv = NULL;
+	int rc = -ENODEV;
+	struct idxd_dev *idxd_dev;
+
+	dev = bus_find_device_by_name(bus, NULL, buf);
+	if (!dev || dev->driver || drv != &dsa_drv.drv)
+		return -ENODEV;
+
+	idxd_dev = confdev_to_idxd_dev(dev);
+	if (is_idxd_dev(idxd_dev)) {
+		alt_drv = driver_find("idxd", bus);
+	} else if (is_idxd_wq_dev(idxd_dev)) {
+		struct idxd_wq *wq = confdev_to_wq(dev);
+
+		if (is_idxd_wq_kernel(wq))
+			alt_drv = driver_find("dmaengine", bus);
+		else if (is_idxd_wq_user(wq))
+			alt_drv = driver_find("user", bus);
+	}
+	if (!alt_drv)
+		return -ENODEV;
+
+	rc = device_driver_attach(alt_drv, dev);
+	if (rc < 0)
+		return rc;
+
+	return count;
+}
+static DRIVER_ATTR_IGNORE_LOCKDEP(bind, 0200, NULL, bind_store);
+
+static struct attribute *dsa_drv_compat_attrs[] = {
+	&driver_attr_bind.attr,
+	&driver_attr_unbind.attr,
+	NULL,
+};
+
+static const struct attribute_group dsa_drv_compat_attr_group = {
+	.attrs = dsa_drv_compat_attrs,
+};
+
+static const struct attribute_group *dsa_drv_compat_groups[] = {
+	&dsa_drv_compat_attr_group,
+	NULL,
+};
+
+static int idxd_dsa_drv_probe(struct idxd_dev *idxd_dev)
+{
+	return -ENODEV;
+}
+
+static void idxd_dsa_drv_remove(struct idxd_dev *idxd_dev)
+{
+}
+
+static enum idxd_dev_type dev_types[] = {
+	IDXD_DEV_NONE,
+};
+
+struct idxd_device_driver dsa_drv = {
+	.name = "dsa",
+	.probe = idxd_dsa_drv_probe,
+	.remove = idxd_dsa_drv_remove,
+	.type = dev_types,
+	.drv = {
+		.suppress_bind_attrs = true,
+		.groups = dsa_drv_compat_groups,
+	},
+};
+
+module_idxd_driver(dsa_drv);
+MODULE_IMPORT_NS(IDXD);
diff --git a/drivers/dma/idxd/device.c b/drivers/dma/idxd/device.c
index 420b93fe5feb..83a5ff2ecf2a 100644
--- a/drivers/dma/idxd/device.c
+++ b/drivers/dma/idxd/device.c
@@ -15,6 +15,8 @@
 
 static void idxd_cmd_exec(struct idxd_device *idxd, int cmd_code, u32 operand,
 			  u32 *status);
+static void idxd_device_wqs_clear_state(struct idxd_device *idxd);
+static void idxd_wq_disable_cleanup(struct idxd_wq *wq);
 
 /* Interrupt control bits */
 void idxd_mask_msix_vector(struct idxd_device *idxd, int vec_id)
@@ -139,8 +141,8 @@ int idxd_wq_alloc_resources(struct idxd_wq *wq)
 	if (wq->type != IDXD_WQT_KERNEL)
 		return 0;
 
-	wq->num_descs = wq->size;
-	num_descs = wq->size;
+	num_descs = wq_dedicated(wq) ? wq->size : wq->threshold;
+	wq->num_descs = num_descs;
 
 	rc = alloc_hw_descs(wq, num_descs);
 	if (rc < 0)
@@ -234,7 +236,7 @@ int idxd_wq_enable(struct idxd_wq *wq)
 	return 0;
 }
 
-int idxd_wq_disable(struct idxd_wq *wq)
+int idxd_wq_disable(struct idxd_wq *wq, bool reset_config)
 {
 	struct idxd_device *idxd = wq->idxd;
 	struct device *dev = &idxd->pdev->dev;
@@ -255,6 +257,8 @@ int idxd_wq_disable(struct idxd_wq *wq)
 		return -ENXIO;
 	}
 
+	if (reset_config)
+		idxd_wq_disable_cleanup(wq);
 	wq->state = IDXD_WQ_DISABLED;
 	dev_dbg(dev, "WQ %d disabled\n", wq->id);
 	return 0;
@@ -289,6 +293,7 @@ void idxd_wq_reset(struct idxd_wq *wq)
 
 	operand = BIT(wq->id % 16) | ((wq->id / 16) << 16);
 	idxd_cmd_exec(idxd, IDXD_CMD_RESET_WQ, operand, NULL);
+	idxd_wq_disable_cleanup(wq);
 	wq->state = IDXD_WQ_DISABLED;
 }
 
@@ -315,6 +320,7 @@ void idxd_wq_unmap_portal(struct idxd_wq *wq)
 
 	devm_iounmap(dev, wq->portal);
 	wq->portal = NULL;
+	wq->portal_offset = 0;
 }
 
 void idxd_wqs_unmap_portal(struct idxd_device *idxd)
@@ -335,19 +341,18 @@ int idxd_wq_set_pasid(struct idxd_wq *wq, int pasid)
 	int rc;
 	union wqcfg wqcfg;
 	unsigned int offset;
-	unsigned long flags;
 
-	rc = idxd_wq_disable(wq);
+	rc = idxd_wq_disable(wq, false);
 	if (rc < 0)
 		return rc;
 
 	offset = WQCFG_OFFSET(idxd, wq->id, WQCFG_PASID_IDX);
-	spin_lock_irqsave(&idxd->dev_lock, flags);
+	spin_lock(&idxd->dev_lock);
 	wqcfg.bits[WQCFG_PASID_IDX] = ioread32(idxd->reg_base + offset);
 	wqcfg.pasid_en = 1;
 	wqcfg.pasid = pasid;
 	iowrite32(wqcfg.bits[WQCFG_PASID_IDX], idxd->reg_base + offset);
-	spin_unlock_irqrestore(&idxd->dev_lock, flags);
+	spin_unlock(&idxd->dev_lock);
 
 	rc = idxd_wq_enable(wq);
 	if (rc < 0)
@@ -362,19 +367,18 @@ int idxd_wq_disable_pasid(struct idxd_wq *wq)
 	int rc;
 	union wqcfg wqcfg;
 	unsigned int offset;
-	unsigned long flags;
 
-	rc = idxd_wq_disable(wq);
+	rc = idxd_wq_disable(wq, false);
 	if (rc < 0)
 		return rc;
 
 	offset = WQCFG_OFFSET(idxd, wq->id, WQCFG_PASID_IDX);
-	spin_lock_irqsave(&idxd->dev_lock, flags);
+	spin_lock(&idxd->dev_lock);
 	wqcfg.bits[WQCFG_PASID_IDX] = ioread32(idxd->reg_base + offset);
 	wqcfg.pasid_en = 0;
 	wqcfg.pasid = 0;
 	iowrite32(wqcfg.bits[WQCFG_PASID_IDX], idxd->reg_base + offset);
-	spin_unlock_irqrestore(&idxd->dev_lock, flags);
+	spin_unlock(&idxd->dev_lock);
 
 	rc = idxd_wq_enable(wq);
 	if (rc < 0)
@@ -383,11 +387,11 @@ int idxd_wq_disable_pasid(struct idxd_wq *wq)
 	return 0;
 }
 
-void idxd_wq_disable_cleanup(struct idxd_wq *wq)
+static void idxd_wq_disable_cleanup(struct idxd_wq *wq)
 {
 	struct idxd_device *idxd = wq->idxd;
 
-	lockdep_assert_held(&idxd->dev_lock);
+	lockdep_assert_held(&wq->wq_lock);
 	memset(wq->wqcfg, 0, idxd->wqcfg_size);
 	wq->type = IDXD_WQT_NONE;
 	wq->size = 0;
@@ -396,6 +400,7 @@ void idxd_wq_disable_cleanup(struct idxd_wq *wq)
 	wq->priority = 0;
 	wq->ats_dis = 0;
 	clear_bit(WQ_FLAG_DEDICATED, &wq->flags);
+	clear_bit(WQ_FLAG_BLOCK_ON_FAULT, &wq->flags);
 	memset(wq->name, 0, WQ_NAME_SIZE);
 }
 
@@ -455,7 +460,6 @@ int idxd_device_init_reset(struct idxd_device *idxd)
 {
 	struct device *dev = &idxd->pdev->dev;
 	union idxd_command_reg cmd;
-	unsigned long flags;
 
 	if (idxd_device_is_halted(idxd)) {
 		dev_warn(&idxd->pdev->dev, "Device is HALTED!\n");
@@ -465,13 +469,13 @@ int idxd_device_init_reset(struct idxd_device *idxd)
 	memset(&cmd, 0, sizeof(cmd));
 	cmd.cmd = IDXD_CMD_RESET_DEVICE;
 	dev_dbg(dev, "%s: sending reset for init.\n", __func__);
-	spin_lock_irqsave(&idxd->cmd_lock, flags);
+	spin_lock(&idxd->cmd_lock);
 	iowrite32(cmd.bits, idxd->reg_base + IDXD_CMD_OFFSET);
 
 	while (ioread32(idxd->reg_base + IDXD_CMDSTS_OFFSET) &
 	       IDXD_CMDSTS_ACTIVE)
 		cpu_relax();
-	spin_unlock_irqrestore(&idxd->cmd_lock, flags);
+	spin_unlock(&idxd->cmd_lock);
 	return 0;
 }
 
@@ -480,7 +484,7 @@ static void idxd_cmd_exec(struct idxd_device *idxd, int cmd_code, u32 operand,
 {
 	union idxd_command_reg cmd;
 	DECLARE_COMPLETION_ONSTACK(done);
-	unsigned long flags;
+	u32 stat;
 
 	if (idxd_device_is_halted(idxd)) {
 		dev_warn(&idxd->pdev->dev, "Device is HALTED!\n");
@@ -494,7 +498,7 @@ static void idxd_cmd_exec(struct idxd_device *idxd, int cmd_code, u32 operand,
 	cmd.operand = operand;
 	cmd.int_req = 1;
 
-	spin_lock_irqsave(&idxd->cmd_lock, flags);
+	spin_lock(&idxd->cmd_lock);
 	wait_event_lock_irq(idxd->cmd_waitq,
 			    !test_bit(IDXD_FLAG_CMD_RUNNING, &idxd->flags),
 			    idxd->cmd_lock);
@@ -511,18 +515,18 @@ static void idxd_cmd_exec(struct idxd_device *idxd, int cmd_code, u32 operand,
 	 * After command submitted, release lock and go to sleep until
 	 * the command completes via interrupt.
 	 */
-	spin_unlock_irqrestore(&idxd->cmd_lock, flags);
+	spin_unlock(&idxd->cmd_lock);
 	wait_for_completion(&done);
-	spin_lock_irqsave(&idxd->cmd_lock, flags);
-	if (status) {
-		*status = ioread32(idxd->reg_base + IDXD_CMDSTS_OFFSET);
-		idxd->cmd_status = *status & GENMASK(7, 0);
-	}
+	stat = ioread32(idxd->reg_base + IDXD_CMDSTS_OFFSET);
+	spin_lock(&idxd->cmd_lock);
+	if (status)
+		*status = stat;
+	idxd->cmd_status = stat & GENMASK(7, 0);
 
 	__clear_bit(IDXD_FLAG_CMD_RUNNING, &idxd->flags);
 	/* Wake up other pending commands */
 	wake_up(&idxd->cmd_waitq);
-	spin_unlock_irqrestore(&idxd->cmd_lock, flags);
+	spin_unlock(&idxd->cmd_lock);
 }
 
 int idxd_device_enable(struct idxd_device *idxd)
@@ -548,27 +552,10 @@ int idxd_device_enable(struct idxd_device *idxd)
 	return 0;
 }
 
-void idxd_device_wqs_clear_state(struct idxd_device *idxd)
-{
-	int i;
-
-	lockdep_assert_held(&idxd->dev_lock);
-
-	for (i = 0; i < idxd->max_wqs; i++) {
-		struct idxd_wq *wq = idxd->wqs[i];
-
-		if (wq->state == IDXD_WQ_ENABLED) {
-			idxd_wq_disable_cleanup(wq);
-			wq->state = IDXD_WQ_DISABLED;
-		}
-	}
-}
-
 int idxd_device_disable(struct idxd_device *idxd)
 {
 	struct device *dev = &idxd->pdev->dev;
 	u32 status;
-	unsigned long flags;
 
 	if (!idxd_is_enabled(idxd)) {
 		dev_dbg(dev, "Device is not enabled\n");
@@ -584,22 +571,20 @@ int idxd_device_disable(struct idxd_device *idxd)
 		return -ENXIO;
 	}
 
-	spin_lock_irqsave(&idxd->dev_lock, flags);
-	idxd_device_wqs_clear_state(idxd);
-	idxd->state = IDXD_DEV_CONF_READY;
-	spin_unlock_irqrestore(&idxd->dev_lock, flags);
+	spin_lock(&idxd->dev_lock);
+	idxd_device_clear_state(idxd);
+	idxd->state = IDXD_DEV_DISABLED;
+	spin_unlock(&idxd->dev_lock);
 	return 0;
 }
 
 void idxd_device_reset(struct idxd_device *idxd)
 {
-	unsigned long flags;
-
 	idxd_cmd_exec(idxd, IDXD_CMD_RESET_DEVICE, 0, NULL);
-	spin_lock_irqsave(&idxd->dev_lock, flags);
-	idxd_device_wqs_clear_state(idxd);
-	idxd->state = IDXD_DEV_CONF_READY;
-	spin_unlock_irqrestore(&idxd->dev_lock, flags);
+	spin_lock(&idxd->dev_lock);
+	idxd_device_clear_state(idxd);
+	idxd->state = IDXD_DEV_DISABLED;
+	spin_unlock(&idxd->dev_lock);
 }
 
 void idxd_device_drain_pasid(struct idxd_device *idxd, int pasid)
@@ -649,7 +634,6 @@ int idxd_device_release_int_handle(struct idxd_device *idxd, int handle,
 	struct device *dev = &idxd->pdev->dev;
 	u32 operand, status;
 	union idxd_command_reg cmd;
-	unsigned long flags;
 
 	if (!(idxd->hw.cmd_cap & BIT(IDXD_CMD_RELEASE_INT_HANDLE)))
 		return -EOPNOTSUPP;
@@ -667,13 +651,13 @@ int idxd_device_release_int_handle(struct idxd_device *idxd, int handle,
 
 	dev_dbg(dev, "cmd: %u operand: %#x\n", IDXD_CMD_RELEASE_INT_HANDLE, operand);
 
-	spin_lock_irqsave(&idxd->cmd_lock, flags);
+	spin_lock(&idxd->cmd_lock);
 	iowrite32(cmd.bits, idxd->reg_base + IDXD_CMD_OFFSET);
 
 	while (ioread32(idxd->reg_base + IDXD_CMDSTS_OFFSET) & IDXD_CMDSTS_ACTIVE)
 		cpu_relax();
 	status = ioread32(idxd->reg_base + IDXD_CMDSTS_OFFSET);
-	spin_unlock_irqrestore(&idxd->cmd_lock, flags);
+	spin_unlock(&idxd->cmd_lock);
 
 	if ((status & IDXD_CMDSTS_ERR_MASK) != IDXD_CMDSTS_SUCCESS) {
 		dev_dbg(dev, "release int handle failed: %#x\n", status);
@@ -685,6 +669,59 @@ int idxd_device_release_int_handle(struct idxd_device *idxd, int handle,
 }
 
 /* Device configuration bits */
+static void idxd_engines_clear_state(struct idxd_device *idxd)
+{
+	struct idxd_engine *engine;
+	int i;
+
+	lockdep_assert_held(&idxd->dev_lock);
+	for (i = 0; i < idxd->max_engines; i++) {
+		engine = idxd->engines[i];
+		engine->group = NULL;
+	}
+}
+
+static void idxd_groups_clear_state(struct idxd_device *idxd)
+{
+	struct idxd_group *group;
+	int i;
+
+	lockdep_assert_held(&idxd->dev_lock);
+	for (i = 0; i < idxd->max_groups; i++) {
+		group = idxd->groups[i];
+		memset(&group->grpcfg, 0, sizeof(group->grpcfg));
+		group->num_engines = 0;
+		group->num_wqs = 0;
+		group->use_token_limit = false;
+		group->tokens_allowed = 0;
+		group->tokens_reserved = 0;
+		group->tc_a = -1;
+		group->tc_b = -1;
+	}
+}
+
+static void idxd_device_wqs_clear_state(struct idxd_device *idxd)
+{
+	int i;
+
+	lockdep_assert_held(&idxd->dev_lock);
+	for (i = 0; i < idxd->max_wqs; i++) {
+		struct idxd_wq *wq = idxd->wqs[i];
+
+		if (wq->state == IDXD_WQ_ENABLED) {
+			idxd_wq_disable_cleanup(wq);
+			wq->state = IDXD_WQ_DISABLED;
+		}
+	}
+}
+
+void idxd_device_clear_state(struct idxd_device *idxd)
+{
+	idxd_groups_clear_state(idxd);
+	idxd_engines_clear_state(idxd);
+	idxd_device_wqs_clear_state(idxd);
+}
+
 void idxd_msix_perm_setup(struct idxd_device *idxd)
 {
 	union msix_perm mperm;
@@ -773,6 +810,15 @@ static int idxd_groups_config_write(struct idxd_device *idxd)
 	return 0;
 }
 
+static bool idxd_device_pasid_priv_enabled(struct idxd_device *idxd)
+{
+	struct pci_dev *pdev = idxd->pdev;
+
+	if (pdev->pasid_enabled && (pdev->pasid_features & PCI_PASID_CAP_PRIV))
+		return true;
+	return false;
+}
+
 static int idxd_wq_config_write(struct idxd_wq *wq)
 {
 	struct idxd_device *idxd = wq->idxd;
@@ -796,6 +842,7 @@ static int idxd_wq_config_write(struct idxd_wq *wq)
 	wq->wqcfg->wq_size = wq->size;
 
 	if (wq->size == 0) {
+		idxd->cmd_status = IDXD_SCMD_WQ_NO_SIZE;
 		dev_warn(dev, "Incorrect work queue size: 0\n");
 		return -EINVAL;
 	}
@@ -804,7 +851,6 @@ static int idxd_wq_config_write(struct idxd_wq *wq)
 	wq->wqcfg->wq_thresh = wq->threshold;
 
 	/* byte 8-11 */
-	wq->wqcfg->priv = !!(wq->type == IDXD_WQT_KERNEL);
 	if (wq_dedicated(wq))
 		wq->wqcfg->mode = 1;
 
@@ -814,6 +860,25 @@ static int idxd_wq_config_write(struct idxd_wq *wq)
 			wq->wqcfg->pasid = idxd->pasid;
 	}
 
+	/*
+	 * Here the priv bit is set depending on the WQ type. priv = 1 if the
+	 * WQ type is kernel to indicate privileged access. This setting only
+	 * matters for dedicated WQ. According to the DSA spec:
+	 * If the WQ is in dedicated mode, WQ PASID Enable is 1, and the
+	 * Privileged Mode Enable field of the PCI Express PASID capability
+	 * is 0, this field must be 0.
+	 *
+	 * In the case of a dedicated kernel WQ that is not able to support
+	 * the PASID cap, then the configuration will be rejected.
+	 */
+	wq->wqcfg->priv = !!(wq->type == IDXD_WQT_KERNEL);
+	if (wq_dedicated(wq) && wq->wqcfg->pasid_en &&
+	    !idxd_device_pasid_priv_enabled(idxd) &&
+	    wq->type == IDXD_WQT_KERNEL) {
+		idxd->cmd_status = IDXD_SCMD_WQ_NO_PRIV;
+		return -EOPNOTSUPP;
+	}
+
 	wq->wqcfg->priority = wq->priority;
 
 	if (idxd->hw.gen_cap.block_on_fault &&
@@ -931,6 +996,7 @@ static int idxd_wqs_setup(struct idxd_device *idxd)
 			continue;
 
 		if (wq_shared(wq) && !device_swq_supported(idxd)) {
+			idxd->cmd_status = IDXD_SCMD_WQ_NO_SWQ_SUPPORT;
 			dev_warn(dev, "No shared wq support but configured.\n");
 			return -EINVAL;
 		}
@@ -939,8 +1005,10 @@ static int idxd_wqs_setup(struct idxd_device *idxd)
 		configured++;
 	}
 
-	if (configured == 0)
+	if (configured == 0) {
+		idxd->cmd_status = IDXD_SCMD_WQ_NONE_CONFIGURED;
 		return -EINVAL;
+	}
 
 	return 0;
 }
@@ -1086,3 +1154,203 @@ int idxd_device_load_config(struct idxd_device *idxd)
 
 	return 0;
 }
+
+int __drv_enable_wq(struct idxd_wq *wq)
+{
+	struct idxd_device *idxd = wq->idxd;
+	struct device *dev = &idxd->pdev->dev;
+	int rc = -ENXIO;
+
+	lockdep_assert_held(&wq->wq_lock);
+
+	if (idxd->state != IDXD_DEV_ENABLED) {
+		idxd->cmd_status = IDXD_SCMD_DEV_NOT_ENABLED;
+		goto err;
+	}
+
+	if (wq->state != IDXD_WQ_DISABLED) {
+		dev_dbg(dev, "wq %d already enabled.\n", wq->id);
+		idxd->cmd_status = IDXD_SCMD_WQ_ENABLED;
+		rc = -EBUSY;
+		goto err;
+	}
+
+	if (!wq->group) {
+		dev_dbg(dev, "wq %d not attached to group.\n", wq->id);
+		idxd->cmd_status = IDXD_SCMD_WQ_NO_GRP;
+		goto err;
+	}
+
+	if (strlen(wq->name) == 0) {
+		idxd->cmd_status = IDXD_SCMD_WQ_NO_NAME;
+		dev_dbg(dev, "wq %d name not set.\n", wq->id);
+		goto err;
+	}
+
+	/* Shared WQ checks */
+	if (wq_shared(wq)) {
+		if (!device_swq_supported(idxd)) {
+			idxd->cmd_status = IDXD_SCMD_WQ_NO_SVM;
+			dev_dbg(dev, "PASID not enabled and shared wq.\n");
+			goto err;
+		}
+		/*
+		 * Shared wq with the threshold set to 0 means the user
+		 * did not set the threshold or transitioned from a
+		 * dedicated wq but did not set threshold. A value
+		 * of 0 would effectively disable the shared wq. The
+		 * driver does not allow a value of 0 to be set for
+		 * threshold via sysfs.
+		 */
+		if (wq->threshold == 0) {
+			idxd->cmd_status = IDXD_SCMD_WQ_NO_THRESH;
+			dev_dbg(dev, "Shared wq and threshold 0.\n");
+			goto err;
+		}
+	}
+
+	rc = 0;
+	spin_lock(&idxd->dev_lock);
+	if (test_bit(IDXD_FLAG_CONFIGURABLE, &idxd->flags))
+		rc = idxd_device_config(idxd);
+	spin_unlock(&idxd->dev_lock);
+	if (rc < 0) {
+		dev_dbg(dev, "Writing wq %d config failed: %d\n", wq->id, rc);
+		goto err;
+	}
+
+	rc = idxd_wq_enable(wq);
+	if (rc < 0) {
+		dev_dbg(dev, "wq %d enabling failed: %d\n", wq->id, rc);
+		goto err;
+	}
+
+	rc = idxd_wq_map_portal(wq);
+	if (rc < 0) {
+		idxd->cmd_status = IDXD_SCMD_WQ_PORTAL_ERR;
+		dev_dbg(dev, "wq %d portal mapping failed: %d\n", wq->id, rc);
+		goto err_map_portal;
+	}
+
+	wq->client_count = 0;
+	return 0;
+
+err_map_portal:
+	rc = idxd_wq_disable(wq, false);
+	if (rc < 0)
+		dev_dbg(dev, "wq %s disable failed\n", dev_name(wq_confdev(wq)));
+err:
+	return rc;
+}
+
+int drv_enable_wq(struct idxd_wq *wq)
+{
+	int rc;
+
+	mutex_lock(&wq->wq_lock);
+	rc = __drv_enable_wq(wq);
+	mutex_unlock(&wq->wq_lock);
+	return rc;
+}
+
+void __drv_disable_wq(struct idxd_wq *wq)
+{
+	struct idxd_device *idxd = wq->idxd;
+	struct device *dev = &idxd->pdev->dev;
+
+	lockdep_assert_held(&wq->wq_lock);
+
+	if (idxd_wq_refcount(wq))
+		dev_warn(dev, "Clients has claim on wq %d: %d\n",
+			 wq->id, idxd_wq_refcount(wq));
+
+	idxd_wq_unmap_portal(wq);
+
+	idxd_wq_drain(wq);
+	idxd_wq_reset(wq);
+
+	wq->client_count = 0;
+}
+
+void drv_disable_wq(struct idxd_wq *wq)
+{
+	mutex_lock(&wq->wq_lock);
+	__drv_disable_wq(wq);
+	mutex_unlock(&wq->wq_lock);
+}
+
+int idxd_device_drv_probe(struct idxd_dev *idxd_dev)
+{
+	struct idxd_device *idxd = idxd_dev_to_idxd(idxd_dev);
+	int rc = 0;
+
+	/*
+	 * Device should be in disabled state for the idxd_drv to load. If it's in
+	 * enabled state, then the device was altered outside of driver's control.
+	 * If the state is in halted state, then we don't want to proceed.
+	 */
+	if (idxd->state != IDXD_DEV_DISABLED) {
+		idxd->cmd_status = IDXD_SCMD_DEV_ENABLED;
+		return -ENXIO;
+	}
+
+	/* Device configuration */
+	spin_lock(&idxd->dev_lock);
+	if (test_bit(IDXD_FLAG_CONFIGURABLE, &idxd->flags))
+		rc = idxd_device_config(idxd);
+	spin_unlock(&idxd->dev_lock);
+	if (rc < 0)
+		return -ENXIO;
+
+	/* Start device */
+	rc = idxd_device_enable(idxd);
+	if (rc < 0)
+		return rc;
+
+	/* Setup DMA device without channels */
+	rc = idxd_register_dma_device(idxd);
+	if (rc < 0) {
+		idxd_device_disable(idxd);
+		idxd->cmd_status = IDXD_SCMD_DEV_DMA_ERR;
+		return rc;
+	}
+
+	idxd->cmd_status = 0;
+	return 0;
+}
+
+void idxd_device_drv_remove(struct idxd_dev *idxd_dev)
+{
+	struct device *dev = &idxd_dev->conf_dev;
+	struct idxd_device *idxd = idxd_dev_to_idxd(idxd_dev);
+	int i;
+
+	for (i = 0; i < idxd->max_wqs; i++) {
+		struct idxd_wq *wq = idxd->wqs[i];
+		struct device *wq_dev = wq_confdev(wq);
+
+		if (wq->state == IDXD_WQ_DISABLED)
+			continue;
+		dev_warn(dev, "Active wq %d on disable %s.\n", i, dev_name(wq_dev));
+		device_release_driver(wq_dev);
+	}
+
+	idxd_unregister_dma_device(idxd);
+	idxd_device_disable(idxd);
+	if (test_bit(IDXD_FLAG_CONFIGURABLE, &idxd->flags))
+		idxd_device_reset(idxd);
+}
+
+static enum idxd_dev_type dev_types[] = {
+	IDXD_DEV_DSA,
+	IDXD_DEV_IAX,
+	IDXD_DEV_NONE,
+};
+
+struct idxd_device_driver idxd_drv = {
+	.type = dev_types,
+	.probe = idxd_device_drv_probe,
+	.remove = idxd_device_drv_remove,
+	.name = "idxd",
+};
+EXPORT_SYMBOL_GPL(idxd_drv);
diff --git a/drivers/dma/idxd/dma.c b/drivers/dma/idxd/dma.c
index 77439b645044..e0f056c1d1f5 100644
--- a/drivers/dma/idxd/dma.c
+++ b/drivers/dma/idxd/dma.c
@@ -69,7 +69,11 @@ static inline void idxd_prep_desc_common(struct idxd_wq *wq,
 	hw->src_addr = addr_f1;
 	hw->dst_addr = addr_f2;
 	hw->xfer_size = len;
-	hw->priv = !!(wq->type == IDXD_WQT_KERNEL);
+	/*
+	 * For dedicated WQ, this field is ignored and HW will use the WQCFG.priv
+	 * field instead. This field should be set to 1 for kernel descriptors.
+	 */
+	hw->priv = 1;
 	hw->completion_addr = compl;
 }
 
@@ -149,10 +153,8 @@ static dma_cookie_t idxd_dma_tx_submit(struct dma_async_tx_descriptor *tx)
 	cookie = dma_cookie_assign(tx);
 
 	rc = idxd_submit_desc(wq, desc);
-	if (rc < 0) {
-		idxd_free_desc(wq, desc);
+	if (rc < 0)
 		return rc;
-	}
 
 	return cookie;
 }
@@ -245,7 +247,7 @@ int idxd_register_dma_channel(struct idxd_wq *wq)
 
 	wq->idxd_chan = idxd_chan;
 	idxd_chan->wq = wq;
-	get_device(&wq->conf_dev);
+	get_device(wq_confdev(wq));
 
 	return 0;
 }
@@ -260,5 +262,87 @@ void idxd_unregister_dma_channel(struct idxd_wq *wq)
 	list_del(&chan->device_node);
 	kfree(wq->idxd_chan);
 	wq->idxd_chan = NULL;
-	put_device(&wq->conf_dev);
+	put_device(wq_confdev(wq));
 }
+
+static int idxd_dmaengine_drv_probe(struct idxd_dev *idxd_dev)
+{
+	struct device *dev = &idxd_dev->conf_dev;
+	struct idxd_wq *wq = idxd_dev_to_wq(idxd_dev);
+	struct idxd_device *idxd = wq->idxd;
+	int rc;
+
+	if (idxd->state != IDXD_DEV_ENABLED)
+		return -ENXIO;
+
+	mutex_lock(&wq->wq_lock);
+	wq->type = IDXD_WQT_KERNEL;
+	rc = __drv_enable_wq(wq);
+	if (rc < 0) {
+		dev_dbg(dev, "Enable wq %d failed: %d\n", wq->id, rc);
+		rc = -ENXIO;
+		goto err;
+	}
+
+	rc = idxd_wq_alloc_resources(wq);
+	if (rc < 0) {
+		idxd->cmd_status = IDXD_SCMD_WQ_RES_ALLOC_ERR;
+		dev_dbg(dev, "WQ resource alloc failed\n");
+		goto err_res_alloc;
+	}
+
+	rc = idxd_wq_init_percpu_ref(wq);
+	if (rc < 0) {
+		idxd->cmd_status = IDXD_SCMD_PERCPU_ERR;
+		dev_dbg(dev, "percpu_ref setup failed\n");
+		goto err_ref;
+	}
+
+	rc = idxd_register_dma_channel(wq);
+	if (rc < 0) {
+		idxd->cmd_status = IDXD_SCMD_DMA_CHAN_ERR;
+		dev_dbg(dev, "Failed to register dma channel\n");
+		goto err_dma;
+	}
+
+	idxd->cmd_status = 0;
+	mutex_unlock(&wq->wq_lock);
+	return 0;
+
+err_dma:
+	idxd_wq_quiesce(wq);
+err_ref:
+	idxd_wq_free_resources(wq);
+err_res_alloc:
+	__drv_disable_wq(wq);
+err:
+	wq->type = IDXD_WQT_NONE;
+	mutex_unlock(&wq->wq_lock);
+	return rc;
+}
+
+static void idxd_dmaengine_drv_remove(struct idxd_dev *idxd_dev)
+{
+	struct idxd_wq *wq = idxd_dev_to_wq(idxd_dev);
+
+	mutex_lock(&wq->wq_lock);
+	idxd_wq_quiesce(wq);
+	idxd_unregister_dma_channel(wq);
+	__drv_disable_wq(wq);
+	idxd_wq_free_resources(wq);
+	wq->type = IDXD_WQT_NONE;
+	mutex_unlock(&wq->wq_lock);
+}
+
+static enum idxd_dev_type dev_types[] = {
+	IDXD_DEV_WQ,
+	IDXD_DEV_NONE,
+};
+
+struct idxd_device_driver idxd_dmaengine_drv = {
+	.probe = idxd_dmaengine_drv_probe,
+	.remove = idxd_dmaengine_drv_remove,
+	.name = "dmaengine",
+	.type = dev_types,
+};
+EXPORT_SYMBOL_GPL(idxd_dmaengine_drv);
diff --git a/drivers/dma/idxd/idxd.h b/drivers/dma/idxd/idxd.h
index fc708be7ad9a..bfcb03329f77 100644
--- a/drivers/dma/idxd/idxd.h
+++ b/drivers/dma/idxd/idxd.h
@@ -11,14 +11,32 @@
 #include <linux/idr.h>
 #include <linux/pci.h>
 #include <linux/perf_event.h>
+#include <uapi/linux/idxd.h>
 #include "registers.h"
 
 #define IDXD_DRIVER_VERSION	"1.00"
 
 extern struct kmem_cache *idxd_desc_pool;
+extern bool tc_override;
 
-struct idxd_device;
 struct idxd_wq;
+struct idxd_dev;
+
+enum idxd_dev_type {
+	IDXD_DEV_NONE = -1,
+	IDXD_DEV_DSA = 0,
+	IDXD_DEV_IAX,
+	IDXD_DEV_WQ,
+	IDXD_DEV_GROUP,
+	IDXD_DEV_ENGINE,
+	IDXD_DEV_CDEV,
+	IDXD_DEV_MAX_TYPE,
+};
+
+struct idxd_dev {
+	struct device conf_dev;
+	enum idxd_dev_type type;
+};
 
 #define IDXD_REG_TIMEOUT	50
 #define IDXD_DRAIN_TIMEOUT	5000
@@ -34,9 +52,18 @@ enum idxd_type {
 #define IDXD_PMU_EVENT_MAX	64
 
 struct idxd_device_driver {
+	const char *name;
+	enum idxd_dev_type *type;
+	int (*probe)(struct idxd_dev *idxd_dev);
+	void (*remove)(struct idxd_dev *idxd_dev);
 	struct device_driver drv;
 };
 
+extern struct idxd_device_driver dsa_drv;
+extern struct idxd_device_driver idxd_drv;
+extern struct idxd_device_driver idxd_dmaengine_drv;
+extern struct idxd_device_driver idxd_user_drv;
+
 struct idxd_irq_entry {
 	struct idxd_device *idxd;
 	int id;
@@ -51,7 +78,7 @@ struct idxd_irq_entry {
 };
 
 struct idxd_group {
-	struct device conf_dev;
+	struct idxd_dev idxd_dev;
 	struct idxd_device *idxd;
 	struct grpcfg grpcfg;
 	int id;
@@ -110,7 +137,7 @@ enum idxd_wq_type {
 struct idxd_cdev {
 	struct idxd_wq *wq;
 	struct cdev cdev;
-	struct device dev;
+	struct idxd_dev idxd_dev;
 	int minor;
 };
 
@@ -136,9 +163,10 @@ struct idxd_dma_chan {
 
 struct idxd_wq {
 	void __iomem *portal;
+	u32 portal_offset;
 	struct percpu_ref wq_active;
 	struct completion wq_dead;
-	struct device conf_dev;
+	struct idxd_dev idxd_dev;
 	struct idxd_cdev *idxd_cdev;
 	struct wait_queue_head err_queue;
 	struct idxd_device *idxd;
@@ -153,7 +181,6 @@ struct idxd_wq {
 	enum idxd_wq_state state;
 	unsigned long flags;
 	union wqcfg *wqcfg;
-	u32 vec_ptr;		/* interrupt steering */
 	struct dsa_hw_desc **hw_descs;
 	int num_descs;
 	union {
@@ -174,7 +201,7 @@ struct idxd_wq {
 };
 
 struct idxd_engine {
-	struct device conf_dev;
+	struct idxd_dev idxd_dev;
 	int id;
 	struct idxd_group *group;
 	struct idxd_device *idxd;
@@ -194,7 +221,6 @@ struct idxd_hw {
 enum idxd_device_state {
 	IDXD_DEV_HALTED = -1,
 	IDXD_DEV_DISABLED = 0,
-	IDXD_DEV_CONF_READY,
 	IDXD_DEV_ENABLED,
 };
 
@@ -218,7 +244,7 @@ struct idxd_driver_data {
 };
 
 struct idxd_device {
-	struct device conf_dev;
+	struct idxd_dev idxd_dev;
 	struct idxd_driver_data *data;
 	struct list_head list;
 	struct idxd_hw hw;
@@ -226,7 +252,7 @@ struct idxd_device {
 	unsigned long flags;
 	int id;
 	int major;
-	u8 cmd_status;
+	u32 cmd_status;
 
 	struct pci_dev *pdev;
 	void __iomem *reg_base;
@@ -290,7 +316,6 @@ struct idxd_desc {
 	struct list_head list;
 	int id;
 	int cpu;
-	unsigned int vector;
 	struct idxd_wq *wq;
 };
 
@@ -302,11 +327,62 @@ enum idxd_completion_status {
 	IDXD_COMP_DESC_ABORT = 0xff,
 };
 
-#define confdev_to_idxd(dev) container_of(dev, struct idxd_device, conf_dev)
-#define confdev_to_wq(dev) container_of(dev, struct idxd_wq, conf_dev)
+#define idxd_confdev(idxd) &idxd->idxd_dev.conf_dev
+#define wq_confdev(wq) &wq->idxd_dev.conf_dev
+#define engine_confdev(engine) &engine->idxd_dev.conf_dev
+#define group_confdev(group) &group->idxd_dev.conf_dev
+#define cdev_dev(cdev) &cdev->idxd_dev.conf_dev
+
+#define confdev_to_idxd_dev(dev) container_of(dev, struct idxd_dev, conf_dev)
+#define idxd_dev_to_idxd(idxd_dev) container_of(idxd_dev, struct idxd_device, idxd_dev)
+#define idxd_dev_to_wq(idxd_dev) container_of(idxd_dev, struct idxd_wq, idxd_dev)
+
+static inline struct idxd_device *confdev_to_idxd(struct device *dev)
+{
+	struct idxd_dev *idxd_dev = confdev_to_idxd_dev(dev);
+
+	return idxd_dev_to_idxd(idxd_dev);
+}
+
+static inline struct idxd_wq *confdev_to_wq(struct device *dev)
+{
+	struct idxd_dev *idxd_dev = confdev_to_idxd_dev(dev);
+
+	return idxd_dev_to_wq(idxd_dev);
+}
+
+static inline struct idxd_engine *confdev_to_engine(struct device *dev)
+{
+	struct idxd_dev *idxd_dev = confdev_to_idxd_dev(dev);
+
+	return container_of(idxd_dev, struct idxd_engine, idxd_dev);
+}
+
+static inline struct idxd_group *confdev_to_group(struct device *dev)
+{
+	struct idxd_dev *idxd_dev = confdev_to_idxd_dev(dev);
+
+	return container_of(idxd_dev, struct idxd_group, idxd_dev);
+}
+
+static inline struct idxd_cdev *dev_to_cdev(struct device *dev)
+{
+	struct idxd_dev *idxd_dev = confdev_to_idxd_dev(dev);
+
+	return container_of(idxd_dev, struct idxd_cdev, idxd_dev);
+}
+
+static inline void idxd_dev_set_type(struct idxd_dev *idev, int type)
+{
+	if (type >= IDXD_DEV_MAX_TYPE) {
+		idev->type = IDXD_DEV_NONE;
+		return;
+	}
+
+	idev->type = type;
+}
 
 extern struct bus_type dsa_bus_type;
-extern struct bus_type iax_bus_type;
 
 extern bool support_enqcmd;
 extern struct ida idxd_ida;
@@ -316,24 +392,24 @@ extern struct device_type idxd_wq_device_type;
 extern struct device_type idxd_engine_device_type;
 extern struct device_type idxd_group_device_type;
 
-static inline bool is_dsa_dev(struct device *dev)
+static inline bool is_dsa_dev(struct idxd_dev *idxd_dev)
 {
-	return dev->type == &dsa_device_type;
+	return idxd_dev->type == IDXD_DEV_DSA;
 }
 
-static inline bool is_iax_dev(struct device *dev)
+static inline bool is_iax_dev(struct idxd_dev *idxd_dev)
 {
-	return dev->type == &iax_device_type;
+	return idxd_dev->type == IDXD_DEV_IAX;
 }
 
-static inline bool is_idxd_dev(struct device *dev)
+static inline bool is_idxd_dev(struct idxd_dev *idxd_dev)
 {
-	return is_dsa_dev(dev) || is_iax_dev(dev);
+	return is_dsa_dev(idxd_dev) || is_iax_dev(idxd_dev);
 }
 
-static inline bool is_idxd_wq_dev(struct device *dev)
+static inline bool is_idxd_wq_dev(struct idxd_dev *idxd_dev)
 {
-	return dev->type == &idxd_wq_device_type;
+	return idxd_dev->type == IDXD_DEV_WQ;
 }
 
 static inline bool is_idxd_wq_dmaengine(struct idxd_wq *wq)
@@ -343,11 +419,16 @@ static inline bool is_idxd_wq_dmaengine(struct idxd_wq *wq)
 	return false;
 }
 
-static inline bool is_idxd_wq_cdev(struct idxd_wq *wq)
+static inline bool is_idxd_wq_user(struct idxd_wq *wq)
 {
 	return wq->type == IDXD_WQT_USER;
 }
 
+static inline bool is_idxd_wq_kernel(struct idxd_wq *wq)
+{
+	return wq->type == IDXD_WQT_KERNEL;
+}
+
 static inline bool wq_dedicated(struct idxd_wq *wq)
 {
 	return test_bit(WQ_FLAG_DEDICATED, &wq->flags);
@@ -389,6 +470,24 @@ static inline int idxd_get_wq_portal_full_offset(int wq_id,
 	return ((wq_id * 4) << PAGE_SHIFT) + idxd_get_wq_portal_offset(prot);
 }
 
+#define IDXD_PORTAL_MASK	(PAGE_SIZE - 1)
+
+/*
+ * Even though this function can be accessed by multiple threads, it is safe to use.
+ * At worst the address gets used more than once before it gets incremented. We don't
+ * hit a threshold until iops becomes many million times a second. So the occasional
+ * reuse of the same address is tolerable compare to using an atomic variable. This is
+ * safe on a system that has atomic load/store for 32bit integers. Given that this is an
+ * Intel iEP device, that should not be a problem.
+ */
+static inline void __iomem *idxd_wq_portal_addr(struct idxd_wq *wq)
+{
+	int ofs = wq->portal_offset;
+
+	wq->portal_offset = (ofs + sizeof(struct dsa_raw_desc)) & IDXD_PORTAL_MASK;
+	return wq->portal + ofs;
+}
+
 static inline void idxd_wq_get(struct idxd_wq *wq)
 {
 	wq->client_count++;
@@ -404,6 +503,16 @@ static inline int idxd_wq_refcount(struct idxd_wq *wq)
 	return wq->client_count;
 };
 
+int __must_check __idxd_driver_register(struct idxd_device_driver *idxd_drv,
+					struct module *module, const char *mod_name);
+#define idxd_driver_register(driver) \
+	__idxd_driver_register(driver, THIS_MODULE, KBUILD_MODNAME)
+
+void idxd_driver_unregister(struct idxd_device_driver *idxd_drv);
+
+#define module_idxd_driver(__idxd_driver) \
+	module_driver(__idxd_driver, idxd_driver_register, idxd_driver_unregister)
+
 int idxd_register_bus_type(void);
 void idxd_unregister_bus_type(void);
 int idxd_register_devices(struct idxd_device *idxd);
@@ -424,13 +533,20 @@ void idxd_mask_msix_vector(struct idxd_device *idxd, int vec_id);
 void idxd_unmask_msix_vector(struct idxd_device *idxd, int vec_id);
 
 /* device control */
+int idxd_register_idxd_drv(void);
+void idxd_unregister_idxd_drv(void);
+int idxd_device_drv_probe(struct idxd_dev *idxd_dev);
+void idxd_device_drv_remove(struct idxd_dev *idxd_dev);
+int drv_enable_wq(struct idxd_wq *wq);
+int __drv_enable_wq(struct idxd_wq *wq);
+void drv_disable_wq(struct idxd_wq *wq);
+void __drv_disable_wq(struct idxd_wq *wq);
 int idxd_device_init_reset(struct idxd_device *idxd);
 int idxd_device_enable(struct idxd_device *idxd);
 int idxd_device_disable(struct idxd_device *idxd);
 void idxd_device_reset(struct idxd_device *idxd);
-void idxd_device_cleanup(struct idxd_device *idxd);
+void idxd_device_clear_state(struct idxd_device *idxd);
 int idxd_device_config(struct idxd_device *idxd);
-void idxd_device_wqs_clear_state(struct idxd_device *idxd);
 void idxd_device_drain_pasid(struct idxd_device *idxd, int pasid);
 int idxd_device_load_config(struct idxd_device *idxd);
 int idxd_device_request_int_handle(struct idxd_device *idxd, int idx, int *handle,
@@ -443,12 +559,11 @@ void idxd_wqs_unmap_portal(struct idxd_device *idxd);
 int idxd_wq_alloc_resources(struct idxd_wq *wq);
 void idxd_wq_free_resources(struct idxd_wq *wq);
 int idxd_wq_enable(struct idxd_wq *wq);
-int idxd_wq_disable(struct idxd_wq *wq);
+int idxd_wq_disable(struct idxd_wq *wq, bool reset_config);
 void idxd_wq_drain(struct idxd_wq *wq);
 void idxd_wq_reset(struct idxd_wq *wq);
 int idxd_wq_map_portal(struct idxd_wq *wq);
 void idxd_wq_unmap_portal(struct idxd_wq *wq);
-void idxd_wq_disable_cleanup(struct idxd_wq *wq);
 int idxd_wq_set_pasid(struct idxd_wq *wq, int pasid);
 int idxd_wq_disable_pasid(struct idxd_wq *wq);
 void idxd_wq_quiesce(struct idxd_wq *wq);
diff --git a/drivers/dma/idxd/init.c b/drivers/dma/idxd/init.c
index c0f4c0422f32..eb09bc591c31 100644
--- a/drivers/dma/idxd/init.c
+++ b/drivers/dma/idxd/init.c
@@ -26,11 +26,16 @@
 MODULE_VERSION(IDXD_DRIVER_VERSION);
 MODULE_LICENSE("GPL v2");
 MODULE_AUTHOR("Intel Corporation");
+MODULE_IMPORT_NS(IDXD);
 
 static bool sva = true;
 module_param(sva, bool, 0644);
 MODULE_PARM_DESC(sva, "Toggle SVA support on/off");
 
+bool tc_override;
+module_param(tc_override, bool, 0644);
+MODULE_PARM_DESC(tc_override, "Override traffic class defaults");
+
 #define DRV_NAME "idxd"
 
 bool support_enqcmd;
@@ -200,6 +205,7 @@ static int idxd_setup_wqs(struct idxd_device *idxd)
 {
 	struct device *dev = &idxd->pdev->dev;
 	struct idxd_wq *wq;
+	struct device *conf_dev;
 	int i, rc;
 
 	idxd->wqs = kcalloc_node(idxd->max_wqs, sizeof(struct idxd_wq *),
@@ -214,15 +220,17 @@ static int idxd_setup_wqs(struct idxd_device *idxd)
 			goto err;
 		}
 
+		idxd_dev_set_type(&wq->idxd_dev, IDXD_DEV_WQ);
+		conf_dev = wq_confdev(wq);
 		wq->id = i;
 		wq->idxd = idxd;
-		device_initialize(&wq->conf_dev);
-		wq->conf_dev.parent = &idxd->conf_dev;
-		wq->conf_dev.bus = &dsa_bus_type;
-		wq->conf_dev.type = &idxd_wq_device_type;
-		rc = dev_set_name(&wq->conf_dev, "wq%d.%d", idxd->id, wq->id);
+		device_initialize(wq_confdev(wq));
+		conf_dev->parent = idxd_confdev(idxd);
+		conf_dev->bus = &dsa_bus_type;
+		conf_dev->type = &idxd_wq_device_type;
+		rc = dev_set_name(conf_dev, "wq%d.%d", idxd->id, wq->id);
 		if (rc < 0) {
-			put_device(&wq->conf_dev);
+			put_device(conf_dev);
 			goto err;
 		}
 
@@ -233,7 +241,7 @@ static int idxd_setup_wqs(struct idxd_device *idxd)
 		wq->max_batch_size = idxd->max_batch_size;
 		wq->wqcfg = kzalloc_node(idxd->wqcfg_size, GFP_KERNEL, dev_to_node(dev));
 		if (!wq->wqcfg) {
-			put_device(&wq->conf_dev);
+			put_device(conf_dev);
 			rc = -ENOMEM;
 			goto err;
 		}
@@ -243,8 +251,11 @@ static int idxd_setup_wqs(struct idxd_device *idxd)
 	return 0;
 
  err:
-	while (--i >= 0)
-		put_device(&idxd->wqs[i]->conf_dev);
+	while (--i >= 0) {
+		wq = idxd->wqs[i];
+		conf_dev = wq_confdev(wq);
+		put_device(conf_dev);
+	}
 	return rc;
 }
 
@@ -252,6 +263,7 @@ static int idxd_setup_engines(struct idxd_device *idxd)
 {
 	struct idxd_engine *engine;
 	struct device *dev = &idxd->pdev->dev;
+	struct device *conf_dev;
 	int i, rc;
 
 	idxd->engines = kcalloc_node(idxd->max_engines, sizeof(struct idxd_engine *),
@@ -266,15 +278,17 @@ static int idxd_setup_engines(struct idxd_device *idxd)
 			goto err;
 		}
 
+		idxd_dev_set_type(&engine->idxd_dev, IDXD_DEV_ENGINE);
+		conf_dev = engine_confdev(engine);
 		engine->id = i;
 		engine->idxd = idxd;
-		device_initialize(&engine->conf_dev);
-		engine->conf_dev.parent = &idxd->conf_dev;
-		engine->conf_dev.bus = &dsa_bus_type;
-		engine->conf_dev.type = &idxd_engine_device_type;
-		rc = dev_set_name(&engine->conf_dev, "engine%d.%d", idxd->id, engine->id);
+		device_initialize(conf_dev);
+		conf_dev->parent = idxd_confdev(idxd);
+		conf_dev->bus = &dsa_bus_type;
+		conf_dev->type = &idxd_engine_device_type;
+		rc = dev_set_name(conf_dev, "engine%d.%d", idxd->id, engine->id);
 		if (rc < 0) {
-			put_device(&engine->conf_dev);
+			put_device(conf_dev);
 			goto err;
 		}
 
@@ -284,14 +298,18 @@ static int idxd_setup_engines(struct idxd_device *idxd)
 	return 0;
 
  err:
-	while (--i >= 0)
-		put_device(&idxd->engines[i]->conf_dev);
+	while (--i >= 0) {
+		engine = idxd->engines[i];
+		conf_dev = engine_confdev(engine);
+		put_device(conf_dev);
+	}
 	return rc;
 }
 
 static int idxd_setup_groups(struct idxd_device *idxd)
 {
 	struct device *dev = &idxd->pdev->dev;
+	struct device *conf_dev;
 	struct idxd_group *group;
 	int i, rc;
 
@@ -307,28 +325,37 @@ static int idxd_setup_groups(struct idxd_device *idxd)
 			goto err;
 		}
 
+		idxd_dev_set_type(&group->idxd_dev, IDXD_DEV_GROUP);
+		conf_dev = group_confdev(group);
 		group->id = i;
 		group->idxd = idxd;
-		device_initialize(&group->conf_dev);
-		group->conf_dev.parent = &idxd->conf_dev;
-		group->conf_dev.bus = &dsa_bus_type;
-		group->conf_dev.type = &idxd_group_device_type;
-		rc = dev_set_name(&group->conf_dev, "group%d.%d", idxd->id, group->id);
+		device_initialize(conf_dev);
+		conf_dev->parent = idxd_confdev(idxd);
+		conf_dev->bus = &dsa_bus_type;
+		conf_dev->type = &idxd_group_device_type;
+		rc = dev_set_name(conf_dev, "group%d.%d", idxd->id, group->id);
 		if (rc < 0) {
-			put_device(&group->conf_dev);
+			put_device(conf_dev);
 			goto err;
 		}
 
 		idxd->groups[i] = group;
-		group->tc_a = -1;
-		group->tc_b = -1;
+		if (idxd->hw.version < DEVICE_VERSION_2 && !tc_override) {
+			group->tc_a = 1;
+			group->tc_b = 1;
+		} else {
+			group->tc_a = -1;
+			group->tc_b = -1;
+		}
 	}
 
 	return 0;
 
  err:
-	while (--i >= 0)
-		put_device(&idxd->groups[i]->conf_dev);
+	while (--i >= 0) {
+		group = idxd->groups[i];
+		put_device(group_confdev(group));
+	}
 	return rc;
 }
 
@@ -337,11 +364,11 @@ static void idxd_cleanup_internals(struct idxd_device *idxd)
 	int i;
 
 	for (i = 0; i < idxd->max_groups; i++)
-		put_device(&idxd->groups[i]->conf_dev);
+		put_device(group_confdev(idxd->groups[i]));
 	for (i = 0; i < idxd->max_engines; i++)
-		put_device(&idxd->engines[i]->conf_dev);
+		put_device(engine_confdev(idxd->engines[i]));
 	for (i = 0; i < idxd->max_wqs; i++)
-		put_device(&idxd->wqs[i]->conf_dev);
+		put_device(wq_confdev(idxd->wqs[i]));
 	destroy_workqueue(idxd->wq);
 }
 
@@ -381,13 +408,13 @@ static int idxd_setup_internals(struct idxd_device *idxd)
 
  err_wkq_create:
 	for (i = 0; i < idxd->max_groups; i++)
-		put_device(&idxd->groups[i]->conf_dev);
+		put_device(group_confdev(idxd->groups[i]));
  err_group:
 	for (i = 0; i < idxd->max_engines; i++)
-		put_device(&idxd->engines[i]->conf_dev);
+		put_device(engine_confdev(idxd->engines[i]));
  err_engine:
 	for (i = 0; i < idxd->max_wqs; i++)
-		put_device(&idxd->wqs[i]->conf_dev);
+		put_device(wq_confdev(idxd->wqs[i]));
  err_wqs:
 	kfree(idxd->int_handles);
 	return rc;
@@ -469,6 +496,7 @@ static void idxd_read_caps(struct idxd_device *idxd)
 static struct idxd_device *idxd_alloc(struct pci_dev *pdev, struct idxd_driver_data *data)
 {
 	struct device *dev = &pdev->dev;
+	struct device *conf_dev;
 	struct idxd_device *idxd;
 	int rc;
 
@@ -476,19 +504,21 @@ static struct idxd_device *idxd_alloc(struct pci_dev *pdev, struct idxd_driver_d
 	if (!idxd)
 		return NULL;
 
+	conf_dev = idxd_confdev(idxd);
 	idxd->pdev = pdev;
 	idxd->data = data;
+	idxd_dev_set_type(&idxd->idxd_dev, idxd->data->type);
 	idxd->id = ida_alloc(&idxd_ida, GFP_KERNEL);
 	if (idxd->id < 0)
 		return NULL;
 
-	device_initialize(&idxd->conf_dev);
-	idxd->conf_dev.parent = dev;
-	idxd->conf_dev.bus = &dsa_bus_type;
-	idxd->conf_dev.type = idxd->data->dev_type;
-	rc = dev_set_name(&idxd->conf_dev, "%s%d", idxd->data->name_prefix, idxd->id);
+	device_initialize(conf_dev);
+	conf_dev->parent = dev;
+	conf_dev->bus = &dsa_bus_type;
+	conf_dev->type = idxd->data->dev_type;
+	rc = dev_set_name(conf_dev, "%s%d", idxd->data->name_prefix, idxd->id);
 	if (rc < 0) {
-		put_device(&idxd->conf_dev);
+		put_device(conf_dev);
 		return NULL;
 	}
 
@@ -639,15 +669,9 @@ static int idxd_pci_probe(struct pci_dev *pdev, const struct pci_device_id *id)
 	}
 
 	dev_dbg(dev, "Set DMA masks\n");
-	rc = pci_set_dma_mask(pdev, DMA_BIT_MASK(64));
+	rc = dma_set_mask_and_coherent(&pdev->dev, DMA_BIT_MASK(64));
 	if (rc)
-		rc = pci_set_dma_mask(pdev, DMA_BIT_MASK(32));
-	if (rc)
-		goto err;
-
-	rc = pci_set_consistent_dma_mask(pdev, DMA_BIT_MASK(64));
-	if (rc)
-		rc = pci_set_consistent_dma_mask(pdev, DMA_BIT_MASK(32));
+		rc = dma_set_mask_and_coherent(&pdev->dev, DMA_BIT_MASK(32));
 	if (rc)
 		goto err;
 
@@ -668,8 +692,6 @@ static int idxd_pci_probe(struct pci_dev *pdev, const struct pci_device_id *id)
 		goto err_dev_register;
 	}
 
-	idxd->state = IDXD_DEV_CONF_READY;
-
 	dev_info(&pdev->dev, "Intel(R) Accelerator Device (v%x)\n",
 		 idxd->hw.version);
 
@@ -680,7 +702,7 @@ static int idxd_pci_probe(struct pci_dev *pdev, const struct pci_device_id *id)
  err:
 	pci_iounmap(pdev, idxd->reg_base);
  err_iomap:
-	put_device(&idxd->conf_dev);
+	put_device(idxd_confdev(idxd));
  err_idxd_alloc:
 	pci_disable_device(pdev);
 	return rc;
@@ -793,7 +815,7 @@ static void idxd_remove(struct pci_dev *pdev)
 	pci_disable_device(pdev);
 	destroy_workqueue(idxd->wq);
 	perfmon_pmu_remove(idxd);
-	device_unregister(&idxd->conf_dev);
+	device_unregister(idxd_confdev(idxd));
 }
 
 static struct pci_driver idxd_pci_driver = {
@@ -824,13 +846,17 @@ static int __init idxd_init_module(void)
 
 	perfmon_init();
 
-	err = idxd_register_bus_type();
+	err = idxd_driver_register(&idxd_drv);
 	if (err < 0)
-		return err;
+		goto err_idxd_driver_register;
 
-	err = idxd_register_driver();
+	err = idxd_driver_register(&idxd_dmaengine_drv);
 	if (err < 0)
-		goto err_idxd_driver_register;
+		goto err_idxd_dmaengine_driver_register;
+
+	err = idxd_driver_register(&idxd_user_drv);
+	if (err < 0)
+		goto err_idxd_user_driver_register;
 
 	err = idxd_cdev_register();
 	if (err)
@@ -845,19 +871,23 @@ static int __init idxd_init_module(void)
 err_pci_register:
 	idxd_cdev_remove();
 err_cdev_register:
-	idxd_unregister_driver();
+	idxd_driver_unregister(&idxd_user_drv);
+err_idxd_user_driver_register:
+	idxd_driver_unregister(&idxd_dmaengine_drv);
+err_idxd_dmaengine_driver_register:
+	idxd_driver_unregister(&idxd_drv);
 err_idxd_driver_register:
-	idxd_unregister_bus_type();
 	return err;
 }
 module_init(idxd_init_module);
 
 static void __exit idxd_exit_module(void)
 {
-	idxd_unregister_driver();
+	idxd_driver_unregister(&idxd_user_drv);
+	idxd_driver_unregister(&idxd_dmaengine_drv);
+	idxd_driver_unregister(&idxd_drv);
 	pci_unregister_driver(&idxd_pci_driver);
 	idxd_cdev_remove();
-	idxd_unregister_bus_type();
 	perfmon_exit();
 }
 module_exit(idxd_exit_module);
diff --git a/drivers/dma/idxd/irq.c b/drivers/dma/idxd/irq.c
index 4e3a7198c0ca..ca88fa7a328e 100644
--- a/drivers/dma/idxd/irq.c
+++ b/drivers/dma/idxd/irq.c
@@ -22,13 +22,6 @@ struct idxd_fault {
 	struct idxd_device *idxd;
 };
 
-static int irq_process_work_list(struct idxd_irq_entry *irq_entry,
-				 enum irq_work_type wtype,
-				 int *processed, u64 data);
-static int irq_process_pending_llist(struct idxd_irq_entry *irq_entry,
-				     enum irq_work_type wtype,
-				     int *processed, u64 data);
-
 static void idxd_device_reinit(struct work_struct *work)
 {
 	struct idxd_device *idxd = container_of(work, struct idxd_device, work);
@@ -51,7 +44,7 @@ static void idxd_device_reinit(struct work_struct *work)
 			rc = idxd_wq_enable(wq);
 			if (rc < 0) {
 				dev_warn(dev, "Unable to re-enable wq %s\n",
-					 dev_name(&wq->conf_dev));
+					 dev_name(wq_confdev(wq)));
 			}
 		}
 	}
@@ -59,47 +52,7 @@ static void idxd_device_reinit(struct work_struct *work)
 	return;
 
  out:
-	idxd_device_wqs_clear_state(idxd);
-}
-
-static void idxd_device_fault_work(struct work_struct *work)
-{
-	struct idxd_fault *fault = container_of(work, struct idxd_fault, work);
-	struct idxd_irq_entry *ie;
-	int i;
-	int processed;
-	int irqcnt = fault->idxd->num_wq_irqs + 1;
-
-	for (i = 1; i < irqcnt; i++) {
-		ie = &fault->idxd->irq_entries[i];
-		irq_process_work_list(ie, IRQ_WORK_PROCESS_FAULT,
-				      &processed, fault->addr);
-		if (processed)
-			break;
-
-		irq_process_pending_llist(ie, IRQ_WORK_PROCESS_FAULT,
-					  &processed, fault->addr);
-		if (processed)
-			break;
-	}
-
-	kfree(fault);
-}
-
-static int idxd_device_schedule_fault_process(struct idxd_device *idxd,
-					      u64 fault_addr)
-{
-	struct idxd_fault *fault;
-
-	fault = kmalloc(sizeof(*fault), GFP_ATOMIC);
-	if (!fault)
-		return -ENOMEM;
-
-	fault->addr = fault_addr;
-	fault->idxd = idxd;
-	INIT_WORK(&fault->work, idxd_device_fault_work);
-	queue_work(idxd->wq, &fault->work);
-	return 0;
+	idxd_device_clear_state(idxd);
 }
 
 static int process_misc_interrupts(struct idxd_device *idxd, u32 cause)
@@ -111,7 +64,7 @@ static int process_misc_interrupts(struct idxd_device *idxd, u32 cause)
 	bool err = false;
 
 	if (cause & IDXD_INTC_ERR) {
-		spin_lock_bh(&idxd->dev_lock);
+		spin_lock(&idxd->dev_lock);
 		for (i = 0; i < 4; i++)
 			idxd->sw_err.bits[i] = ioread64(idxd->reg_base +
 					IDXD_SWERR_OFFSET + i * sizeof(u64));
@@ -136,7 +89,7 @@ static int process_misc_interrupts(struct idxd_device *idxd, u32 cause)
 			}
 		}
 
-		spin_unlock_bh(&idxd->dev_lock);
+		spin_unlock(&idxd->dev_lock);
 		val |= IDXD_INTC_ERR;
 
 		for (i = 0; i < 4; i++)
@@ -168,15 +121,6 @@ static int process_misc_interrupts(struct idxd_device *idxd, u32 cause)
 	if (!err)
 		return 0;
 
-	/*
-	 * This case should rarely happen and typically is due to software
-	 * programming error by the driver.
-	 */
-	if (idxd->sw_err.valid &&
-	    idxd->sw_err.desc_valid &&
-	    idxd->sw_err.fault_addr)
-		idxd_device_schedule_fault_process(idxd, idxd->sw_err.fault_addr);
-
 	gensts.bits = ioread32(idxd->reg_base + IDXD_GENSTATS_OFFSET);
 	if (gensts.state == IDXD_DEVICE_STATE_HALT) {
 		idxd->state = IDXD_DEV_HALTED;
@@ -189,15 +133,15 @@ static int process_misc_interrupts(struct idxd_device *idxd, u32 cause)
 			INIT_WORK(&idxd->work, idxd_device_reinit);
 			queue_work(idxd->wq, &idxd->work);
 		} else {
-			spin_lock_bh(&idxd->dev_lock);
+			spin_lock(&idxd->dev_lock);
 			idxd_wqs_quiesce(idxd);
 			idxd_wqs_unmap_portal(idxd);
-			idxd_device_wqs_clear_state(idxd);
+			idxd_device_clear_state(idxd);
 			dev_err(&idxd->pdev->dev,
 				"idxd halted, need %s.\n",
 				gensts.reset_type == IDXD_DEVICE_RESET_FLR ?
 				"FLR" : "system reset");
-			spin_unlock_bh(&idxd->dev_lock);
+			spin_unlock(&idxd->dev_lock);
 			return -ENXIO;
 		}
 	}
@@ -228,127 +172,79 @@ irqreturn_t idxd_misc_thread(int vec, void *data)
 	return IRQ_HANDLED;
 }
 
-static inline bool match_fault(struct idxd_desc *desc, u64 fault_addr)
-{
-	/*
-	 * Completion address can be bad as well. Check fault address match for descriptor
-	 * and completion address.
-	 */
-	if ((u64)desc->hw == fault_addr || (u64)desc->completion == fault_addr) {
-		struct idxd_device *idxd = desc->wq->idxd;
-		struct device *dev = &idxd->pdev->dev;
-
-		dev_warn(dev, "desc with fault address: %#llx\n", fault_addr);
-		return true;
-	}
-
-	return false;
-}
-
-static int irq_process_pending_llist(struct idxd_irq_entry *irq_entry,
-				     enum irq_work_type wtype,
-				     int *processed, u64 data)
+static void irq_process_pending_llist(struct idxd_irq_entry *irq_entry)
 {
 	struct idxd_desc *desc, *t;
 	struct llist_node *head;
-	int queued = 0;
-	unsigned long flags;
-	enum idxd_complete_type reason;
 
-	*processed = 0;
 	head = llist_del_all(&irq_entry->pending_llist);
 	if (!head)
-		goto out;
-
-	if (wtype == IRQ_WORK_NORMAL)
-		reason = IDXD_COMPLETE_NORMAL;
-	else
-		reason = IDXD_COMPLETE_DEV_FAIL;
+		return;
 
 	llist_for_each_entry_safe(desc, t, head, llnode) {
 		u8 status = desc->completion->status & DSA_COMP_STATUS_MASK;
 
 		if (status) {
-			if (unlikely(status == IDXD_COMP_DESC_ABORT)) {
+			/*
+			 * Check against the original status as ABORT is software defined
+			 * and 0xff, which DSA_COMP_STATUS_MASK can mask out.
+			 */
+			if (unlikely(desc->completion->status == IDXD_COMP_DESC_ABORT)) {
 				complete_desc(desc, IDXD_COMPLETE_ABORT);
-				(*processed)++;
 				continue;
 			}
 
-			if (unlikely(status != DSA_COMP_SUCCESS))
-				match_fault(desc, data);
-			complete_desc(desc, reason);
-			(*processed)++;
+			complete_desc(desc, IDXD_COMPLETE_NORMAL);
 		} else {
-			spin_lock_irqsave(&irq_entry->list_lock, flags);
+			spin_lock(&irq_entry->list_lock);
 			list_add_tail(&desc->list,
 				      &irq_entry->work_list);
-			spin_unlock_irqrestore(&irq_entry->list_lock, flags);
-			queued++;
+			spin_unlock(&irq_entry->list_lock);
 		}
 	}
-
- out:
-	return queued;
 }
 
-static int irq_process_work_list(struct idxd_irq_entry *irq_entry,
-				 enum irq_work_type wtype,
-				 int *processed, u64 data)
+static void irq_process_work_list(struct idxd_irq_entry *irq_entry)
 {
-	int queued = 0;
-	unsigned long flags;
 	LIST_HEAD(flist);
 	struct idxd_desc *desc, *n;
-	enum idxd_complete_type reason;
-
-	*processed = 0;
-	if (wtype == IRQ_WORK_NORMAL)
-		reason = IDXD_COMPLETE_NORMAL;
-	else
-		reason = IDXD_COMPLETE_DEV_FAIL;
 
 	/*
 	 * This lock protects list corruption from access of list outside of the irq handler
 	 * thread.
 	 */
-	spin_lock_irqsave(&irq_entry->list_lock, flags);
+	spin_lock(&irq_entry->list_lock);
 	if (list_empty(&irq_entry->work_list)) {
-		spin_unlock_irqrestore(&irq_entry->list_lock, flags);
-		return 0;
+		spin_unlock(&irq_entry->list_lock);
+		return;
 	}
 
 	list_for_each_entry_safe(desc, n, &irq_entry->work_list, list) {
 		if (desc->completion->status) {
 			list_del(&desc->list);
-			(*processed)++;
 			list_add_tail(&desc->list, &flist);
-		} else {
-			queued++;
 		}
 	}
 
-	spin_unlock_irqrestore(&irq_entry->list_lock, flags);
+	spin_unlock(&irq_entry->list_lock);
 
 	list_for_each_entry(desc, &flist, list) {
-		u8 status = desc->completion->status & DSA_COMP_STATUS_MASK;
-
-		if (unlikely(status == IDXD_COMP_DESC_ABORT)) {
+		/*
+		 * Check against the original status as ABORT is software defined
+		 * and 0xff, which DSA_COMP_STATUS_MASK can mask out.
+		 */
+		if (unlikely(desc->completion->status == IDXD_COMP_DESC_ABORT)) {
 			complete_desc(desc, IDXD_COMPLETE_ABORT);
 			continue;
 		}
 
-		if (unlikely(status != DSA_COMP_SUCCESS))
-			match_fault(desc, data);
-		complete_desc(desc, reason);
+		complete_desc(desc, IDXD_COMPLETE_NORMAL);
 	}
-
-	return queued;
 }
 
-static int idxd_desc_process(struct idxd_irq_entry *irq_entry)
+irqreturn_t idxd_wq_thread(int irq, void *data)
 {
-	int rc, processed, total = 0;
+	struct idxd_irq_entry *irq_entry = data;
 
 	/*
 	 * There are two lists we are processing. The pending_llist is where
@@ -367,31 +263,9 @@ static int idxd_desc_process(struct idxd_irq_entry *irq_entry)
 	 *    and process the completed entries.
 	 * 4. If the entry is still waiting on hardware, list_add_tail() to
 	 *    the work_list.
-	 * 5. Repeat until no more descriptors.
 	 */
-	do {
-		rc = irq_process_work_list(irq_entry, IRQ_WORK_NORMAL,
-					   &processed, 0);
-		total += processed;
-		if (rc != 0)
-			continue;
-
-		rc = irq_process_pending_llist(irq_entry, IRQ_WORK_NORMAL,
-					       &processed, 0);
-		total += processed;
-	} while (rc != 0);
-
-	return total;
-}
-
-irqreturn_t idxd_wq_thread(int irq, void *data)
-{
-	struct idxd_irq_entry *irq_entry = data;
-	int processed;
-
-	processed = idxd_desc_process(irq_entry);
-	if (processed == 0)
-		return IRQ_NONE;
+	irq_process_work_list(irq_entry);
+	irq_process_pending_llist(irq_entry);
 
 	return IRQ_HANDLED;
 }
diff --git a/drivers/dma/idxd/registers.h b/drivers/dma/idxd/registers.h
index c970c3f025f0..ffc7550a77ee 100644
--- a/drivers/dma/idxd/registers.h
+++ b/drivers/dma/idxd/registers.h
@@ -7,6 +7,9 @@
 #define PCI_DEVICE_ID_INTEL_DSA_SPR0	0x0b25
 #define PCI_DEVICE_ID_INTEL_IAX_SPR0	0x0cfe
 
+#define DEVICE_VERSION_1		0x100
+#define DEVICE_VERSION_2		0x200
+
 #define IDXD_MMIO_BAR		0
 #define IDXD_WQ_BAR		2
 #define IDXD_PORTAL_SIZE	PAGE_SIZE
@@ -349,6 +352,9 @@ union wqcfg {
 } __packed;
 
 #define WQCFG_PASID_IDX                2
+#define WQCFG_OCCUP_IDX		6
+
+#define WQCFG_OCCUP_MASK	0xffff
 
 /*
  * This macro calculates the offset into the WQCFG register
diff --git a/drivers/dma/idxd/submit.c b/drivers/dma/idxd/submit.c
index 36c9c1a89b7e..de76fb4abac2 100644
--- a/drivers/dma/idxd/submit.c
+++ b/drivers/dma/idxd/submit.c
@@ -22,21 +22,13 @@ static struct idxd_desc *__get_desc(struct idxd_wq *wq, int idx, int cpu)
 		desc->hw->pasid = idxd->pasid;
 
 	/*
-	 * Descriptor completion vectors are 1...N for MSIX. We will round
-	 * robin through the N vectors.
+	 * On host, MSIX vecotr 0 is used for misc interrupt. Therefore when we match
+	 * vector 1:1 to the WQ id, we need to add 1
 	 */
-	wq->vec_ptr = desc->vector = (wq->vec_ptr % idxd->num_wq_irqs) + 1;
-	if (!idxd->int_handles) {
-		desc->hw->int_handle = wq->vec_ptr;
-	} else {
-		/*
-		 * int_handles are only for descriptor completion. However for device
-		 * MSIX enumeration, vec 0 is used for misc interrupts. Therefore even
-		 * though we are rotating through 1...N for descriptor interrupts, we
-		 * need to acqurie the int_handles from 0..N-1.
-		 */
-		desc->hw->int_handle = idxd->int_handles[desc->vector - 1];
-	}
+	if (!idxd->int_handles)
+		desc->hw->int_handle = wq->id + 1;
+	else
+		desc->hw->int_handle = idxd->int_handles[wq->id];
 
 	return desc;
 }
@@ -67,7 +59,7 @@ struct idxd_desc *idxd_alloc_desc(struct idxd_wq *wq, enum idxd_op_type optype)
 		if (signal_pending_state(TASK_INTERRUPTIBLE, current))
 			break;
 		idx = sbitmap_queue_get(sbq, &cpu);
-		if (idx > 0)
+		if (idx >= 0)
 			break;
 		schedule();
 	}
@@ -114,14 +106,13 @@ static void llist_abort_desc(struct idxd_wq *wq, struct idxd_irq_entry *ie,
 {
 	struct idxd_desc *d, *t, *found = NULL;
 	struct llist_node *head;
-	unsigned long flags;
 
 	desc->completion->status = IDXD_COMP_DESC_ABORT;
 	/*
 	 * Grab the list lock so it will block the irq thread handler. This allows the
 	 * abort code to locate the descriptor need to be aborted.
 	 */
-	spin_lock_irqsave(&ie->list_lock, flags);
+	spin_lock(&ie->list_lock);
 	head = llist_del_all(&ie->pending_llist);
 	if (head) {
 		llist_for_each_entry_safe(d, t, head, llnode) {
@@ -135,7 +126,7 @@ static void llist_abort_desc(struct idxd_wq *wq, struct idxd_irq_entry *ie,
 
 	if (!found)
 		found = list_abort_desc(wq, ie, desc);
-	spin_unlock_irqrestore(&ie->list_lock, flags);
+	spin_unlock(&ie->list_lock);
 
 	if (found)
 		complete_desc(found, IDXD_COMPLETE_ABORT);
@@ -148,13 +139,17 @@ int idxd_submit_desc(struct idxd_wq *wq, struct idxd_desc *desc)
 	void __iomem *portal;
 	int rc;
 
-	if (idxd->state != IDXD_DEV_ENABLED)
+	if (idxd->state != IDXD_DEV_ENABLED) {
+		idxd_free_desc(wq, desc);
 		return -EIO;
+	}
 
-	if (!percpu_ref_tryget_live(&wq->wq_active))
+	if (!percpu_ref_tryget_live(&wq->wq_active)) {
+		idxd_free_desc(wq, desc);
 		return -ENXIO;
+	}
 
-	portal = wq->portal;
+	portal = idxd_wq_portal_addr(wq);
 
 	/*
 	 * The wmb() flushes writes to coherent DMA data before
@@ -168,7 +163,7 @@ int idxd_submit_desc(struct idxd_wq *wq, struct idxd_desc *desc)
 	 * that we designated the descriptor to.
 	 */
 	if (desc->hw->flags & IDXD_OP_FLAG_RCI) {
-		ie = &idxd->irq_entries[desc->vector];
+		ie = &idxd->irq_entries[wq->id + 1];
 		llist_add(&desc->llnode, &ie->pending_llist);
 	}
 
@@ -183,8 +178,12 @@ int idxd_submit_desc(struct idxd_wq *wq, struct idxd_desc *desc)
 		 */
 		rc = enqcmds(portal, desc->hw);
 		if (rc < 0) {
+			percpu_ref_put(&wq->wq_active);
+			/* abort operation frees the descriptor */
 			if (ie)
 				llist_abort_desc(wq, ie, desc);
+			else
+				idxd_free_desc(wq, desc);
 			return rc;
 		}
 	}
diff --git a/drivers/dma/idxd/sysfs.c b/drivers/dma/idxd/sysfs.c
index 26d8ff97d13d..a9025be940db 100644
--- a/drivers/dma/idxd/sysfs.c
+++ b/drivers/dma/idxd/sysfs.c
@@ -16,336 +16,11 @@ static char *idxd_wq_type_names[] = {
 	[IDXD_WQT_USER]		= "user",
 };
 
-static int idxd_config_bus_match(struct device *dev,
-				 struct device_driver *drv)
-{
-	int matched = 0;
-
-	if (is_idxd_dev(dev)) {
-		struct idxd_device *idxd = confdev_to_idxd(dev);
-
-		if (idxd->state != IDXD_DEV_CONF_READY)
-			return 0;
-		matched = 1;
-	} else if (is_idxd_wq_dev(dev)) {
-		struct idxd_wq *wq = confdev_to_wq(dev);
-		struct idxd_device *idxd = wq->idxd;
-
-		if (idxd->state < IDXD_DEV_CONF_READY)
-			return 0;
-
-		if (wq->state != IDXD_WQ_DISABLED) {
-			dev_dbg(dev, "%s not disabled\n", dev_name(dev));
-			return 0;
-		}
-		matched = 1;
-	}
-
-	if (matched)
-		dev_dbg(dev, "%s matched\n", dev_name(dev));
-
-	return matched;
-}
-
-static int enable_wq(struct idxd_wq *wq)
-{
-	struct idxd_device *idxd = wq->idxd;
-	struct device *dev = &idxd->pdev->dev;
-	unsigned long flags;
-	int rc;
-
-	mutex_lock(&wq->wq_lock);
-
-	if (idxd->state != IDXD_DEV_ENABLED) {
-		mutex_unlock(&wq->wq_lock);
-		dev_warn(dev, "Enabling while device not enabled.\n");
-		return -EPERM;
-	}
-
-	if (wq->state != IDXD_WQ_DISABLED) {
-		mutex_unlock(&wq->wq_lock);
-		dev_warn(dev, "WQ %d already enabled.\n", wq->id);
-		return -EBUSY;
-	}
-
-	if (!wq->group) {
-		mutex_unlock(&wq->wq_lock);
-		dev_warn(dev, "WQ not attached to group.\n");
-		return -EINVAL;
-	}
-
-	if (strlen(wq->name) == 0) {
-		mutex_unlock(&wq->wq_lock);
-		dev_warn(dev, "WQ name not set.\n");
-		return -EINVAL;
-	}
-
-	/* Shared WQ checks */
-	if (wq_shared(wq)) {
-		if (!device_swq_supported(idxd)) {
-			dev_warn(dev, "PASID not enabled and shared WQ.\n");
-			mutex_unlock(&wq->wq_lock);
-			return -ENXIO;
-		}
-		/*
-		 * Shared wq with the threshold set to 0 means the user
-		 * did not set the threshold or transitioned from a
-		 * dedicated wq but did not set threshold. A value
-		 * of 0 would effectively disable the shared wq. The
-		 * driver does not allow a value of 0 to be set for
-		 * threshold via sysfs.
-		 */
-		if (wq->threshold == 0) {
-			dev_warn(dev, "Shared WQ and threshold 0.\n");
-			mutex_unlock(&wq->wq_lock);
-			return -EINVAL;
-		}
-	}
-
-	rc = idxd_wq_alloc_resources(wq);
-	if (rc < 0) {
-		mutex_unlock(&wq->wq_lock);
-		dev_warn(dev, "WQ resource alloc failed\n");
-		return rc;
-	}
-
-	spin_lock_irqsave(&idxd->dev_lock, flags);
-	if (test_bit(IDXD_FLAG_CONFIGURABLE, &idxd->flags))
-		rc = idxd_device_config(idxd);
-	spin_unlock_irqrestore(&idxd->dev_lock, flags);
-	if (rc < 0) {
-		mutex_unlock(&wq->wq_lock);
-		dev_warn(dev, "Writing WQ %d config failed: %d\n", wq->id, rc);
-		return rc;
-	}
-
-	rc = idxd_wq_enable(wq);
-	if (rc < 0) {
-		mutex_unlock(&wq->wq_lock);
-		dev_warn(dev, "WQ %d enabling failed: %d\n", wq->id, rc);
-		return rc;
-	}
-
-	rc = idxd_wq_map_portal(wq);
-	if (rc < 0) {
-		dev_warn(dev, "wq portal mapping failed: %d\n", rc);
-		rc = idxd_wq_disable(wq);
-		if (rc < 0)
-			dev_warn(dev, "IDXD wq disable failed\n");
-		mutex_unlock(&wq->wq_lock);
-		return rc;
-	}
-
-	wq->client_count = 0;
-
-	if (wq->type == IDXD_WQT_KERNEL) {
-		rc = idxd_wq_init_percpu_ref(wq);
-		if (rc < 0) {
-			dev_dbg(dev, "percpu_ref setup failed\n");
-			mutex_unlock(&wq->wq_lock);
-			return rc;
-		}
-	}
-
-	if (is_idxd_wq_dmaengine(wq)) {
-		rc = idxd_register_dma_channel(wq);
-		if (rc < 0) {
-			dev_dbg(dev, "DMA channel register failed\n");
-			mutex_unlock(&wq->wq_lock);
-			return rc;
-		}
-	} else if (is_idxd_wq_cdev(wq)) {
-		rc = idxd_wq_add_cdev(wq);
-		if (rc < 0) {
-			dev_dbg(dev, "Cdev creation failed\n");
-			mutex_unlock(&wq->wq_lock);
-			return rc;
-		}
-	}
-
-	mutex_unlock(&wq->wq_lock);
-	dev_info(dev, "wq %s enabled\n", dev_name(&wq->conf_dev));
-
-	return 0;
-}
-
-static int idxd_config_bus_probe(struct device *dev)
-{
-	int rc = 0;
-	unsigned long flags;
-
-	dev_dbg(dev, "%s called\n", __func__);
-
-	if (is_idxd_dev(dev)) {
-		struct idxd_device *idxd = confdev_to_idxd(dev);
-
-		if (idxd->state != IDXD_DEV_CONF_READY) {
-			dev_warn(dev, "Device not ready for config\n");
-			return -EBUSY;
-		}
-
-		if (!try_module_get(THIS_MODULE))
-			return -ENXIO;
-
-		/* Perform IDXD configuration and enabling */
-		spin_lock_irqsave(&idxd->dev_lock, flags);
-		if (test_bit(IDXD_FLAG_CONFIGURABLE, &idxd->flags))
-			rc = idxd_device_config(idxd);
-		spin_unlock_irqrestore(&idxd->dev_lock, flags);
-		if (rc < 0) {
-			module_put(THIS_MODULE);
-			dev_warn(dev, "Device config failed: %d\n", rc);
-			return rc;
-		}
-
-		/* start device */
-		rc = idxd_device_enable(idxd);
-		if (rc < 0) {
-			module_put(THIS_MODULE);
-			dev_warn(dev, "Device enable failed: %d\n", rc);
-			return rc;
-		}
-
-		dev_info(dev, "Device %s enabled\n", dev_name(dev));
-
-		rc = idxd_register_dma_device(idxd);
-		if (rc < 0) {
-			module_put(THIS_MODULE);
-			dev_dbg(dev, "Failed to register dmaengine device\n");
-			return rc;
-		}
-		return 0;
-	} else if (is_idxd_wq_dev(dev)) {
-		struct idxd_wq *wq = confdev_to_wq(dev);
-
-		return enable_wq(wq);
-	}
-
-	return -ENODEV;
-}
-
-static void disable_wq(struct idxd_wq *wq)
-{
-	struct idxd_device *idxd = wq->idxd;
-	struct device *dev = &idxd->pdev->dev;
-
-	mutex_lock(&wq->wq_lock);
-	dev_dbg(dev, "%s removing WQ %s\n", __func__, dev_name(&wq->conf_dev));
-	if (wq->state == IDXD_WQ_DISABLED) {
-		mutex_unlock(&wq->wq_lock);
-		return;
-	}
-
-	if (wq->type == IDXD_WQT_KERNEL)
-		idxd_wq_quiesce(wq);
-
-	if (is_idxd_wq_dmaengine(wq))
-		idxd_unregister_dma_channel(wq);
-	else if (is_idxd_wq_cdev(wq))
-		idxd_wq_del_cdev(wq);
-
-	if (idxd_wq_refcount(wq))
-		dev_warn(dev, "Clients has claim on wq %d: %d\n",
-			 wq->id, idxd_wq_refcount(wq));
-
-	idxd_wq_unmap_portal(wq);
-
-	idxd_wq_drain(wq);
-	idxd_wq_reset(wq);
-
-	idxd_wq_free_resources(wq);
-	wq->client_count = 0;
-	mutex_unlock(&wq->wq_lock);
-
-	dev_info(dev, "wq %s disabled\n", dev_name(&wq->conf_dev));
-}
-
-static void idxd_config_bus_remove(struct device *dev)
-{
-	int rc;
-
-	dev_dbg(dev, "%s called for %s\n", __func__, dev_name(dev));
-
-	/* disable workqueue here */
-	if (is_idxd_wq_dev(dev)) {
-		struct idxd_wq *wq = confdev_to_wq(dev);
-
-		disable_wq(wq);
-	} else if (is_idxd_dev(dev)) {
-		struct idxd_device *idxd = confdev_to_idxd(dev);
-		int i;
-
-		dev_dbg(dev, "%s removing dev %s\n", __func__,
-			dev_name(&idxd->conf_dev));
-		for (i = 0; i < idxd->max_wqs; i++) {
-			struct idxd_wq *wq = idxd->wqs[i];
-
-			if (wq->state == IDXD_WQ_DISABLED)
-				continue;
-			dev_warn(dev, "Active wq %d on disable %s.\n", i,
-				 dev_name(&idxd->conf_dev));
-			device_release_driver(&wq->conf_dev);
-		}
-
-		idxd_unregister_dma_device(idxd);
-		rc = idxd_device_disable(idxd);
-		if (test_bit(IDXD_FLAG_CONFIGURABLE, &idxd->flags)) {
-			for (i = 0; i < idxd->max_wqs; i++) {
-				struct idxd_wq *wq = idxd->wqs[i];
-
-				mutex_lock(&wq->wq_lock);
-				idxd_wq_disable_cleanup(wq);
-				mutex_unlock(&wq->wq_lock);
-			}
-		}
-		module_put(THIS_MODULE);
-		if (rc < 0)
-			dev_warn(dev, "Device disable failed\n");
-		else
-			dev_info(dev, "Device %s disabled\n", dev_name(dev));
-
-	}
-}
-
-static void idxd_config_bus_shutdown(struct device *dev)
-{
-	dev_dbg(dev, "%s called\n", __func__);
-}
-
-struct bus_type dsa_bus_type = {
-	.name = "dsa",
-	.match = idxd_config_bus_match,
-	.probe = idxd_config_bus_probe,
-	.remove = idxd_config_bus_remove,
-	.shutdown = idxd_config_bus_shutdown,
-};
-
-static struct idxd_device_driver dsa_drv = {
-	.drv = {
-		.name = "dsa",
-		.bus = &dsa_bus_type,
-		.owner = THIS_MODULE,
-		.mod_name = KBUILD_MODNAME,
-	},
-};
-
-/* IDXD generic driver setup */
-int idxd_register_driver(void)
-{
-	return driver_register(&dsa_drv.drv);
-}
-
-void idxd_unregister_driver(void)
-{
-	driver_unregister(&dsa_drv.drv);
-}
-
 /* IDXD engine attributes */
 static ssize_t engine_group_id_show(struct device *dev,
 				    struct device_attribute *attr, char *buf)
 {
-	struct idxd_engine *engine =
-		container_of(dev, struct idxd_engine, conf_dev);
+	struct idxd_engine *engine = confdev_to_engine(dev);
 
 	if (engine->group)
 		return sysfs_emit(buf, "%d\n", engine->group->id);
@@ -357,8 +32,7 @@ static ssize_t engine_group_id_store(struct device *dev,
 				     struct device_attribute *attr,
 				     const char *buf, size_t count)
 {
-	struct idxd_engine *engine =
-		container_of(dev, struct idxd_engine, conf_dev);
+	struct idxd_engine *engine = confdev_to_engine(dev);
 	struct idxd_device *idxd = engine->idxd;
 	long id;
 	int rc;
@@ -412,7 +86,7 @@ static const struct attribute_group *idxd_engine_attribute_groups[] = {
 
 static void idxd_conf_engine_release(struct device *dev)
 {
-	struct idxd_engine *engine = container_of(dev, struct idxd_engine, conf_dev);
+	struct idxd_engine *engine = confdev_to_engine(dev);
 
 	kfree(engine);
 }
@@ -442,8 +116,7 @@ static ssize_t group_tokens_reserved_show(struct device *dev,
 					  struct device_attribute *attr,
 					  char *buf)
 {
-	struct idxd_group *group =
-		container_of(dev, struct idxd_group, conf_dev);
+	struct idxd_group *group = confdev_to_group(dev);
 
 	return sysfs_emit(buf, "%u\n", group->tokens_reserved);
 }
@@ -452,8 +125,7 @@ static ssize_t group_tokens_reserved_store(struct device *dev,
 					   struct device_attribute *attr,
 					   const char *buf, size_t count)
 {
-	struct idxd_group *group =
-		container_of(dev, struct idxd_group, conf_dev);
+	struct idxd_group *group = confdev_to_group(dev);
 	struct idxd_device *idxd = group->idxd;
 	unsigned long val;
 	int rc;
@@ -490,8 +162,7 @@ static ssize_t group_tokens_allowed_show(struct device *dev,
 					 struct device_attribute *attr,
 					 char *buf)
 {
-	struct idxd_group *group =
-		container_of(dev, struct idxd_group, conf_dev);
+	struct idxd_group *group = confdev_to_group(dev);
 
 	return sysfs_emit(buf, "%u\n", group->tokens_allowed);
 }
@@ -500,8 +171,7 @@ static ssize_t group_tokens_allowed_store(struct device *dev,
 					  struct device_attribute *attr,
 					  const char *buf, size_t count)
 {
-	struct idxd_group *group =
-		container_of(dev, struct idxd_group, conf_dev);
+	struct idxd_group *group = confdev_to_group(dev);
 	struct idxd_device *idxd = group->idxd;
 	unsigned long val;
 	int rc;
@@ -535,8 +205,7 @@ static ssize_t group_use_token_limit_show(struct device *dev,
 					  struct device_attribute *attr,
 					  char *buf)
 {
-	struct idxd_group *group =
-		container_of(dev, struct idxd_group, conf_dev);
+	struct idxd_group *group = confdev_to_group(dev);
 
 	return sysfs_emit(buf, "%u\n", group->use_token_limit);
 }
@@ -545,8 +214,7 @@ static ssize_t group_use_token_limit_store(struct device *dev,
 					   struct device_attribute *attr,
 					   const char *buf, size_t count)
 {
-	struct idxd_group *group =
-		container_of(dev, struct idxd_group, conf_dev);
+	struct idxd_group *group = confdev_to_group(dev);
 	struct idxd_device *idxd = group->idxd;
 	unsigned long val;
 	int rc;
@@ -578,8 +246,7 @@ static struct device_attribute dev_attr_group_use_token_limit =
 static ssize_t group_engines_show(struct device *dev,
 				  struct device_attribute *attr, char *buf)
 {
-	struct idxd_group *group =
-		container_of(dev, struct idxd_group, conf_dev);
+	struct idxd_group *group = confdev_to_group(dev);
 	int i, rc = 0;
 	struct idxd_device *idxd = group->idxd;
 
@@ -607,8 +274,7 @@ static struct device_attribute dev_attr_group_engines =
 static ssize_t group_work_queues_show(struct device *dev,
 				      struct device_attribute *attr, char *buf)
 {
-	struct idxd_group *group =
-		container_of(dev, struct idxd_group, conf_dev);
+	struct idxd_group *group = confdev_to_group(dev);
 	int i, rc = 0;
 	struct idxd_device *idxd = group->idxd;
 
@@ -637,8 +303,7 @@ static ssize_t group_traffic_class_a_show(struct device *dev,
 					  struct device_attribute *attr,
 					  char *buf)
 {
-	struct idxd_group *group =
-		container_of(dev, struct idxd_group, conf_dev);
+	struct idxd_group *group = confdev_to_group(dev);
 
 	return sysfs_emit(buf, "%d\n", group->tc_a);
 }
@@ -647,8 +312,7 @@ static ssize_t group_traffic_class_a_store(struct device *dev,
 					   struct device_attribute *attr,
 					   const char *buf, size_t count)
 {
-	struct idxd_group *group =
-		container_of(dev, struct idxd_group, conf_dev);
+	struct idxd_group *group = confdev_to_group(dev);
 	struct idxd_device *idxd = group->idxd;
 	long val;
 	int rc;
@@ -663,6 +327,9 @@ static ssize_t group_traffic_class_a_store(struct device *dev,
 	if (idxd->state == IDXD_DEV_ENABLED)
 		return -EPERM;
 
+	if (idxd->hw.version < DEVICE_VERSION_2 && !tc_override)
+		return -EPERM;
+
 	if (val < 0 || val > 7)
 		return -EINVAL;
 
@@ -678,8 +345,7 @@ static ssize_t group_traffic_class_b_show(struct device *dev,
 					  struct device_attribute *attr,
 					  char *buf)
 {
-	struct idxd_group *group =
-		container_of(dev, struct idxd_group, conf_dev);
+	struct idxd_group *group = confdev_to_group(dev);
 
 	return sysfs_emit(buf, "%d\n", group->tc_b);
 }
@@ -688,8 +354,7 @@ static ssize_t group_traffic_class_b_store(struct device *dev,
 					   struct device_attribute *attr,
 					   const char *buf, size_t count)
 {
-	struct idxd_group *group =
-		container_of(dev, struct idxd_group, conf_dev);
+	struct idxd_group *group = confdev_to_group(dev);
 	struct idxd_device *idxd = group->idxd;
 	long val;
 	int rc;
@@ -704,6 +369,9 @@ static ssize_t group_traffic_class_b_store(struct device *dev,
 	if (idxd->state == IDXD_DEV_ENABLED)
 		return -EPERM;
 
+	if (idxd->hw.version < DEVICE_VERSION_2 && !tc_override)
+		return -EPERM;
+
 	if (val < 0 || val > 7)
 		return -EINVAL;
 
@@ -737,7 +405,7 @@ static const struct attribute_group *idxd_group_attribute_groups[] = {
 
 static void idxd_conf_group_release(struct device *dev)
 {
-	struct idxd_group *group = container_of(dev, struct idxd_group, conf_dev);
+	struct idxd_group *group = confdev_to_group(dev);
 
 	kfree(group);
 }
@@ -752,7 +420,7 @@ struct device_type idxd_group_device_type = {
 static ssize_t wq_clients_show(struct device *dev,
 			       struct device_attribute *attr, char *buf)
 {
-	struct idxd_wq *wq = container_of(dev, struct idxd_wq, conf_dev);
+	struct idxd_wq *wq = confdev_to_wq(dev);
 
 	return sysfs_emit(buf, "%d\n", wq->client_count);
 }
@@ -763,7 +431,7 @@ static struct device_attribute dev_attr_wq_clients =
 static ssize_t wq_state_show(struct device *dev,
 			     struct device_attribute *attr, char *buf)
 {
-	struct idxd_wq *wq = container_of(dev, struct idxd_wq, conf_dev);
+	struct idxd_wq *wq = confdev_to_wq(dev);
 
 	switch (wq->state) {
 	case IDXD_WQ_DISABLED:
@@ -781,7 +449,7 @@ static struct device_attribute dev_attr_wq_state =
 static ssize_t wq_group_id_show(struct device *dev,
 				struct device_attribute *attr, char *buf)
 {
-	struct idxd_wq *wq = container_of(dev, struct idxd_wq, conf_dev);
+	struct idxd_wq *wq = confdev_to_wq(dev);
 
 	if (wq->group)
 		return sysfs_emit(buf, "%u\n", wq->group->id);
@@ -793,7 +461,7 @@ static ssize_t wq_group_id_store(struct device *dev,
 				 struct device_attribute *attr,
 				 const char *buf, size_t count)
 {
-	struct idxd_wq *wq = container_of(dev, struct idxd_wq, conf_dev);
+	struct idxd_wq *wq = confdev_to_wq(dev);
 	struct idxd_device *idxd = wq->idxd;
 	long id;
 	int rc;
@@ -836,7 +504,7 @@ static struct device_attribute dev_attr_wq_group_id =
 static ssize_t wq_mode_show(struct device *dev, struct device_attribute *attr,
 			    char *buf)
 {
-	struct idxd_wq *wq = container_of(dev, struct idxd_wq, conf_dev);
+	struct idxd_wq *wq = confdev_to_wq(dev);
 
 	return sysfs_emit(buf, "%s\n", wq_dedicated(wq) ? "dedicated" : "shared");
 }
@@ -845,7 +513,7 @@ static ssize_t wq_mode_store(struct device *dev,
 			     struct device_attribute *attr, const char *buf,
 			     size_t count)
 {
-	struct idxd_wq *wq = container_of(dev, struct idxd_wq, conf_dev);
+	struct idxd_wq *wq = confdev_to_wq(dev);
 	struct idxd_device *idxd = wq->idxd;
 
 	if (!test_bit(IDXD_FLAG_CONFIGURABLE, &idxd->flags))
@@ -872,7 +540,7 @@ static struct device_attribute dev_attr_wq_mode =
 static ssize_t wq_size_show(struct device *dev, struct device_attribute *attr,
 			    char *buf)
 {
-	struct idxd_wq *wq = container_of(dev, struct idxd_wq, conf_dev);
+	struct idxd_wq *wq = confdev_to_wq(dev);
 
 	return sysfs_emit(buf, "%u\n", wq->size);
 }
@@ -895,7 +563,7 @@ static ssize_t wq_size_store(struct device *dev,
 			     struct device_attribute *attr, const char *buf,
 			     size_t count)
 {
-	struct idxd_wq *wq = container_of(dev, struct idxd_wq, conf_dev);
+	struct idxd_wq *wq = confdev_to_wq(dev);
 	unsigned long size;
 	struct idxd_device *idxd = wq->idxd;
 	int rc;
@@ -923,7 +591,7 @@ static struct device_attribute dev_attr_wq_size =
 static ssize_t wq_priority_show(struct device *dev,
 				struct device_attribute *attr, char *buf)
 {
-	struct idxd_wq *wq = container_of(dev, struct idxd_wq, conf_dev);
+	struct idxd_wq *wq = confdev_to_wq(dev);
 
 	return sysfs_emit(buf, "%u\n", wq->priority);
 }
@@ -932,7 +600,7 @@ static ssize_t wq_priority_store(struct device *dev,
 				 struct device_attribute *attr,
 				 const char *buf, size_t count)
 {
-	struct idxd_wq *wq = container_of(dev, struct idxd_wq, conf_dev);
+	struct idxd_wq *wq = confdev_to_wq(dev);
 	unsigned long prio;
 	struct idxd_device *idxd = wq->idxd;
 	int rc;
@@ -960,7 +628,7 @@ static struct device_attribute dev_attr_wq_priority =
 static ssize_t wq_block_on_fault_show(struct device *dev,
 				      struct device_attribute *attr, char *buf)
 {
-	struct idxd_wq *wq = container_of(dev, struct idxd_wq, conf_dev);
+	struct idxd_wq *wq = confdev_to_wq(dev);
 
 	return sysfs_emit(buf, "%u\n", test_bit(WQ_FLAG_BLOCK_ON_FAULT, &wq->flags));
 }
@@ -969,11 +637,14 @@ static ssize_t wq_block_on_fault_store(struct device *dev,
 				       struct device_attribute *attr,
 				       const char *buf, size_t count)
 {
-	struct idxd_wq *wq = container_of(dev, struct idxd_wq, conf_dev);
+	struct idxd_wq *wq = confdev_to_wq(dev);
 	struct idxd_device *idxd = wq->idxd;
 	bool bof;
 	int rc;
 
+	if (!idxd->hw.gen_cap.block_on_fault)
+		return -EOPNOTSUPP;
+
 	if (!test_bit(IDXD_FLAG_CONFIGURABLE, &idxd->flags))
 		return -EPERM;
 
@@ -999,7 +670,7 @@ static struct device_attribute dev_attr_wq_block_on_fault =
 static ssize_t wq_threshold_show(struct device *dev,
 				 struct device_attribute *attr, char *buf)
 {
-	struct idxd_wq *wq = container_of(dev, struct idxd_wq, conf_dev);
+	struct idxd_wq *wq = confdev_to_wq(dev);
 
 	return sysfs_emit(buf, "%u\n", wq->threshold);
 }
@@ -1008,7 +679,7 @@ static ssize_t wq_threshold_store(struct device *dev,
 				  struct device_attribute *attr,
 				  const char *buf, size_t count)
 {
-	struct idxd_wq *wq = container_of(dev, struct idxd_wq, conf_dev);
+	struct idxd_wq *wq = confdev_to_wq(dev);
 	struct idxd_device *idxd = wq->idxd;
 	unsigned int val;
 	int rc;
@@ -1040,7 +711,7 @@ static struct device_attribute dev_attr_wq_threshold =
 static ssize_t wq_type_show(struct device *dev,
 			    struct device_attribute *attr, char *buf)
 {
-	struct idxd_wq *wq = container_of(dev, struct idxd_wq, conf_dev);
+	struct idxd_wq *wq = confdev_to_wq(dev);
 
 	switch (wq->type) {
 	case IDXD_WQT_KERNEL:
@@ -1059,7 +730,7 @@ static ssize_t wq_type_store(struct device *dev,
 			     struct device_attribute *attr, const char *buf,
 			     size_t count)
 {
-	struct idxd_wq *wq = container_of(dev, struct idxd_wq, conf_dev);
+	struct idxd_wq *wq = confdev_to_wq(dev);
 	enum idxd_wq_type old_type;
 
 	if (wq->state != IDXD_WQ_DISABLED)
@@ -1088,7 +759,7 @@ static struct device_attribute dev_attr_wq_type =
 static ssize_t wq_name_show(struct device *dev,
 			    struct device_attribute *attr, char *buf)
 {
-	struct idxd_wq *wq = container_of(dev, struct idxd_wq, conf_dev);
+	struct idxd_wq *wq = confdev_to_wq(dev);
 
 	return sysfs_emit(buf, "%s\n", wq->name);
 }
@@ -1097,7 +768,7 @@ static ssize_t wq_name_store(struct device *dev,
 			     struct device_attribute *attr, const char *buf,
 			     size_t count)
 {
-	struct idxd_wq *wq = container_of(dev, struct idxd_wq, conf_dev);
+	struct idxd_wq *wq = confdev_to_wq(dev);
 
 	if (wq->state != IDXD_WQ_DISABLED)
 		return -EPERM;
@@ -1124,7 +795,7 @@ static struct device_attribute dev_attr_wq_name =
 static ssize_t wq_cdev_minor_show(struct device *dev,
 				  struct device_attribute *attr, char *buf)
 {
-	struct idxd_wq *wq = container_of(dev, struct idxd_wq, conf_dev);
+	struct idxd_wq *wq = confdev_to_wq(dev);
 	int minor = -1;
 
 	mutex_lock(&wq->wq_lock);
@@ -1158,7 +829,7 @@ static int __get_sysfs_u64(const char *buf, u64 *val)
 static ssize_t wq_max_transfer_size_show(struct device *dev, struct device_attribute *attr,
 					 char *buf)
 {
-	struct idxd_wq *wq = container_of(dev, struct idxd_wq, conf_dev);
+	struct idxd_wq *wq = confdev_to_wq(dev);
 
 	return sysfs_emit(buf, "%llu\n", wq->max_xfer_bytes);
 }
@@ -1166,7 +837,7 @@ static ssize_t wq_max_transfer_size_show(struct device *dev, struct device_attri
 static ssize_t wq_max_transfer_size_store(struct device *dev, struct device_attribute *attr,
 					  const char *buf, size_t count)
 {
-	struct idxd_wq *wq = container_of(dev, struct idxd_wq, conf_dev);
+	struct idxd_wq *wq = confdev_to_wq(dev);
 	struct idxd_device *idxd = wq->idxd;
 	u64 xfer_size;
 	int rc;
@@ -1192,7 +863,7 @@ static struct device_attribute dev_attr_wq_max_transfer_size =
 
 static ssize_t wq_max_batch_size_show(struct device *dev, struct device_attribute *attr, char *buf)
 {
-	struct idxd_wq *wq = container_of(dev, struct idxd_wq, conf_dev);
+	struct idxd_wq *wq = confdev_to_wq(dev);
 
 	return sysfs_emit(buf, "%u\n", wq->max_batch_size);
 }
@@ -1200,7 +871,7 @@ static ssize_t wq_max_batch_size_show(struct device *dev, struct device_attribut
 static ssize_t wq_max_batch_size_store(struct device *dev, struct device_attribute *attr,
 				       const char *buf, size_t count)
 {
-	struct idxd_wq *wq = container_of(dev, struct idxd_wq, conf_dev);
+	struct idxd_wq *wq = confdev_to_wq(dev);
 	struct idxd_device *idxd = wq->idxd;
 	u64 batch_size;
 	int rc;
@@ -1225,7 +896,7 @@ static struct device_attribute dev_attr_wq_max_batch_size =
 
 static ssize_t wq_ats_disable_show(struct device *dev, struct device_attribute *attr, char *buf)
 {
-	struct idxd_wq *wq = container_of(dev, struct idxd_wq, conf_dev);
+	struct idxd_wq *wq = confdev_to_wq(dev);
 
 	return sysfs_emit(buf, "%u\n", wq->ats_dis);
 }
@@ -1233,7 +904,7 @@ static ssize_t wq_ats_disable_show(struct device *dev, struct device_attribute *
 static ssize_t wq_ats_disable_store(struct device *dev, struct device_attribute *attr,
 				    const char *buf, size_t count)
 {
-	struct idxd_wq *wq = container_of(dev, struct idxd_wq, conf_dev);
+	struct idxd_wq *wq = confdev_to_wq(dev);
 	struct idxd_device *idxd = wq->idxd;
 	bool ats_dis;
 	int rc;
@@ -1256,6 +927,24 @@ static ssize_t wq_ats_disable_store(struct device *dev, struct device_attribute
 static struct device_attribute dev_attr_wq_ats_disable =
 		__ATTR(ats_disable, 0644, wq_ats_disable_show, wq_ats_disable_store);
 
+static ssize_t wq_occupancy_show(struct device *dev, struct device_attribute *attr, char *buf)
+{
+	struct idxd_wq *wq = confdev_to_wq(dev);
+	struct idxd_device *idxd = wq->idxd;
+	u32 occup, offset;
+
+	if (!idxd->hw.wq_cap.occupancy)
+		return -EOPNOTSUPP;
+
+	offset = WQCFG_OFFSET(idxd, wq->id, WQCFG_OCCUP_IDX);
+	occup = ioread32(idxd->reg_base + offset) & WQCFG_OCCUP_MASK;
+
+	return sysfs_emit(buf, "%u\n", occup);
+}
+
+static struct device_attribute dev_attr_wq_occupancy =
+		__ATTR(occupancy, 0444, wq_occupancy_show, NULL);
+
 static struct attribute *idxd_wq_attributes[] = {
 	&dev_attr_wq_clients.attr,
 	&dev_attr_wq_state.attr,
@@ -1271,6 +960,7 @@ static struct attribute *idxd_wq_attributes[] = {
 	&dev_attr_wq_max_transfer_size.attr,
 	&dev_attr_wq_max_batch_size.attr,
 	&dev_attr_wq_ats_disable.attr,
+	&dev_attr_wq_occupancy.attr,
 	NULL,
 };
 
@@ -1285,7 +975,7 @@ static const struct attribute_group *idxd_wq_attribute_groups[] = {
 
 static void idxd_conf_wq_release(struct device *dev)
 {
-	struct idxd_wq *wq = container_of(dev, struct idxd_wq, conf_dev);
+	struct idxd_wq *wq = confdev_to_wq(dev);
 
 	kfree(wq->wqcfg);
 	kfree(wq);
@@ -1301,8 +991,7 @@ struct device_type idxd_wq_device_type = {
 static ssize_t version_show(struct device *dev, struct device_attribute *attr,
 			    char *buf)
 {
-	struct idxd_device *idxd =
-		container_of(dev, struct idxd_device, conf_dev);
+	struct idxd_device *idxd = confdev_to_idxd(dev);
 
 	return sysfs_emit(buf, "%#x\n", idxd->hw.version);
 }
@@ -1312,8 +1001,7 @@ static ssize_t max_work_queues_size_show(struct device *dev,
 					 struct device_attribute *attr,
 					 char *buf)
 {
-	struct idxd_device *idxd =
-		container_of(dev, struct idxd_device, conf_dev);
+	struct idxd_device *idxd = confdev_to_idxd(dev);
 
 	return sysfs_emit(buf, "%u\n", idxd->max_wq_size);
 }
@@ -1322,8 +1010,7 @@ static DEVICE_ATTR_RO(max_work_queues_size);
 static ssize_t max_groups_show(struct device *dev,
 			       struct device_attribute *attr, char *buf)
 {
-	struct idxd_device *idxd =
-		container_of(dev, struct idxd_device, conf_dev);
+	struct idxd_device *idxd = confdev_to_idxd(dev);
 
 	return sysfs_emit(buf, "%u\n", idxd->max_groups);
 }
@@ -1332,8 +1019,7 @@ static DEVICE_ATTR_RO(max_groups);
 static ssize_t max_work_queues_show(struct device *dev,
 				    struct device_attribute *attr, char *buf)
 {
-	struct idxd_device *idxd =
-		container_of(dev, struct idxd_device, conf_dev);
+	struct idxd_device *idxd = confdev_to_idxd(dev);
 
 	return sysfs_emit(buf, "%u\n", idxd->max_wqs);
 }
@@ -1342,8 +1028,7 @@ static DEVICE_ATTR_RO(max_work_queues);
 static ssize_t max_engines_show(struct device *dev,
 				struct device_attribute *attr, char *buf)
 {
-	struct idxd_device *idxd =
-		container_of(dev, struct idxd_device, conf_dev);
+	struct idxd_device *idxd = confdev_to_idxd(dev);
 
 	return sysfs_emit(buf, "%u\n", idxd->max_engines);
 }
@@ -1352,8 +1037,7 @@ static DEVICE_ATTR_RO(max_engines);
 static ssize_t numa_node_show(struct device *dev,
 			      struct device_attribute *attr, char *buf)
 {
-	struct idxd_device *idxd =
-		container_of(dev, struct idxd_device, conf_dev);
+	struct idxd_device *idxd = confdev_to_idxd(dev);
 
 	return sysfs_emit(buf, "%d\n", dev_to_node(&idxd->pdev->dev));
 }
@@ -1362,8 +1046,7 @@ static DEVICE_ATTR_RO(numa_node);
 static ssize_t max_batch_size_show(struct device *dev,
 				   struct device_attribute *attr, char *buf)
 {
-	struct idxd_device *idxd =
-		container_of(dev, struct idxd_device, conf_dev);
+	struct idxd_device *idxd = confdev_to_idxd(dev);
 
 	return sysfs_emit(buf, "%u\n", idxd->max_batch_size);
 }
@@ -1373,8 +1056,7 @@ static ssize_t max_transfer_size_show(struct device *dev,
 				      struct device_attribute *attr,
 				      char *buf)
 {
-	struct idxd_device *idxd =
-		container_of(dev, struct idxd_device, conf_dev);
+	struct idxd_device *idxd = confdev_to_idxd(dev);
 
 	return sysfs_emit(buf, "%llu\n", idxd->max_xfer_bytes);
 }
@@ -1383,8 +1065,7 @@ static DEVICE_ATTR_RO(max_transfer_size);
 static ssize_t op_cap_show(struct device *dev,
 			   struct device_attribute *attr, char *buf)
 {
-	struct idxd_device *idxd =
-		container_of(dev, struct idxd_device, conf_dev);
+	struct idxd_device *idxd = confdev_to_idxd(dev);
 	int i, rc = 0;
 
 	for (i = 0; i < 4; i++)
@@ -1399,8 +1080,7 @@ static DEVICE_ATTR_RO(op_cap);
 static ssize_t gen_cap_show(struct device *dev,
 			    struct device_attribute *attr, char *buf)
 {
-	struct idxd_device *idxd =
-		container_of(dev, struct idxd_device, conf_dev);
+	struct idxd_device *idxd = confdev_to_idxd(dev);
 
 	return sysfs_emit(buf, "%#llx\n", idxd->hw.gen_cap.bits);
 }
@@ -1409,8 +1089,7 @@ static DEVICE_ATTR_RO(gen_cap);
 static ssize_t configurable_show(struct device *dev,
 				 struct device_attribute *attr, char *buf)
 {
-	struct idxd_device *idxd =
-		container_of(dev, struct idxd_device, conf_dev);
+	struct idxd_device *idxd = confdev_to_idxd(dev);
 
 	return sysfs_emit(buf, "%u\n", test_bit(IDXD_FLAG_CONFIGURABLE, &idxd->flags));
 }
@@ -1419,18 +1098,16 @@ static DEVICE_ATTR_RO(configurable);
 static ssize_t clients_show(struct device *dev,
 			    struct device_attribute *attr, char *buf)
 {
-	struct idxd_device *idxd =
-		container_of(dev, struct idxd_device, conf_dev);
-	unsigned long flags;
+	struct idxd_device *idxd = confdev_to_idxd(dev);
 	int count = 0, i;
 
-	spin_lock_irqsave(&idxd->dev_lock, flags);
+	spin_lock(&idxd->dev_lock);
 	for (i = 0; i < idxd->max_wqs; i++) {
 		struct idxd_wq *wq = idxd->wqs[i];
 
 		count += wq->client_count;
 	}
-	spin_unlock_irqrestore(&idxd->dev_lock, flags);
+	spin_unlock(&idxd->dev_lock);
 
 	return sysfs_emit(buf, "%d\n", count);
 }
@@ -1439,8 +1116,7 @@ static DEVICE_ATTR_RO(clients);
 static ssize_t pasid_enabled_show(struct device *dev,
 				  struct device_attribute *attr, char *buf)
 {
-	struct idxd_device *idxd =
-		container_of(dev, struct idxd_device, conf_dev);
+	struct idxd_device *idxd = confdev_to_idxd(dev);
 
 	return sysfs_emit(buf, "%u\n", device_pasid_enabled(idxd));
 }
@@ -1449,12 +1125,10 @@ static DEVICE_ATTR_RO(pasid_enabled);
 static ssize_t state_show(struct device *dev,
 			  struct device_attribute *attr, char *buf)
 {
-	struct idxd_device *idxd =
-		container_of(dev, struct idxd_device, conf_dev);
+	struct idxd_device *idxd = confdev_to_idxd(dev);
 
 	switch (idxd->state) {
 	case IDXD_DEV_DISABLED:
-	case IDXD_DEV_CONF_READY:
 		return sysfs_emit(buf, "disabled\n");
 	case IDXD_DEV_ENABLED:
 		return sysfs_emit(buf, "enabled\n");
@@ -1469,15 +1143,13 @@ static DEVICE_ATTR_RO(state);
 static ssize_t errors_show(struct device *dev,
 			   struct device_attribute *attr, char *buf)
 {
-	struct idxd_device *idxd =
-		container_of(dev, struct idxd_device, conf_dev);
+	struct idxd_device *idxd = confdev_to_idxd(dev);
 	int i, out = 0;
-	unsigned long flags;
 
-	spin_lock_irqsave(&idxd->dev_lock, flags);
+	spin_lock(&idxd->dev_lock);
 	for (i = 0; i < 4; i++)
 		out += sysfs_emit_at(buf, out, "%#018llx ", idxd->sw_err.bits[i]);
-	spin_unlock_irqrestore(&idxd->dev_lock, flags);
+	spin_unlock(&idxd->dev_lock);
 	out--;
 	out += sysfs_emit_at(buf, out, "\n");
 	return out;
@@ -1487,8 +1159,7 @@ static DEVICE_ATTR_RO(errors);
 static ssize_t max_tokens_show(struct device *dev,
 			       struct device_attribute *attr, char *buf)
 {
-	struct idxd_device *idxd =
-		container_of(dev, struct idxd_device, conf_dev);
+	struct idxd_device *idxd = confdev_to_idxd(dev);
 
 	return sysfs_emit(buf, "%u\n", idxd->max_tokens);
 }
@@ -1497,8 +1168,7 @@ static DEVICE_ATTR_RO(max_tokens);
 static ssize_t token_limit_show(struct device *dev,
 				struct device_attribute *attr, char *buf)
 {
-	struct idxd_device *idxd =
-		container_of(dev, struct idxd_device, conf_dev);
+	struct idxd_device *idxd = confdev_to_idxd(dev);
 
 	return sysfs_emit(buf, "%u\n", idxd->token_limit);
 }
@@ -1507,8 +1177,7 @@ static ssize_t token_limit_store(struct device *dev,
 				 struct device_attribute *attr,
 				 const char *buf, size_t count)
 {
-	struct idxd_device *idxd =
-		container_of(dev, struct idxd_device, conf_dev);
+	struct idxd_device *idxd = confdev_to_idxd(dev);
 	unsigned long val;
 	int rc;
 
@@ -1536,8 +1205,7 @@ static DEVICE_ATTR_RW(token_limit);
 static ssize_t cdev_major_show(struct device *dev,
 			       struct device_attribute *attr, char *buf)
 {
-	struct idxd_device *idxd =
-		container_of(dev, struct idxd_device, conf_dev);
+	struct idxd_device *idxd = confdev_to_idxd(dev);
 
 	return sysfs_emit(buf, "%u\n", idxd->major);
 }
@@ -1546,11 +1214,20 @@ static DEVICE_ATTR_RO(cdev_major);
 static ssize_t cmd_status_show(struct device *dev,
 			       struct device_attribute *attr, char *buf)
 {
-	struct idxd_device *idxd = container_of(dev, struct idxd_device, conf_dev);
+	struct idxd_device *idxd = confdev_to_idxd(dev);
 
 	return sysfs_emit(buf, "%#x\n", idxd->cmd_status);
 }
-static DEVICE_ATTR_RO(cmd_status);
+
+static ssize_t cmd_status_store(struct device *dev, struct device_attribute *attr,
+				const char *buf, size_t count)
+{
+	struct idxd_device *idxd = confdev_to_idxd(dev);
+
+	idxd->cmd_status = 0;
+	return count;
+}
+static DEVICE_ATTR_RW(cmd_status);
 
 static struct attribute *idxd_device_attributes[] = {
 	&dev_attr_version.attr,
@@ -1586,7 +1263,7 @@ static const struct attribute_group *idxd_attribute_groups[] = {
 
 static void idxd_conf_device_release(struct device *dev)
 {
-	struct idxd_device *idxd = container_of(dev, struct idxd_device, conf_dev);
+	struct idxd_device *idxd = confdev_to_idxd(dev);
 
 	kfree(idxd->groups);
 	kfree(idxd->wqs);
@@ -1611,12 +1288,12 @@ struct device_type iax_device_type = {
 
 static int idxd_register_engine_devices(struct idxd_device *idxd)
 {
+	struct idxd_engine *engine;
 	int i, j, rc;
 
 	for (i = 0; i < idxd->max_engines; i++) {
-		struct idxd_engine *engine = idxd->engines[i];
-
-		rc = device_add(&engine->conf_dev);
+		engine = idxd->engines[i];
+		rc = device_add(engine_confdev(engine));
 		if (rc < 0)
 			goto cleanup;
 	}
@@ -1625,22 +1302,26 @@ static int idxd_register_engine_devices(struct idxd_device *idxd)
 
 cleanup:
 	j = i - 1;
-	for (; i < idxd->max_engines; i++)
-		put_device(&idxd->engines[i]->conf_dev);
+	for (; i < idxd->max_engines; i++) {
+		engine = idxd->engines[i];
+		put_device(engine_confdev(engine));
+	}
 
-	while (j--)
-		device_unregister(&idxd->engines[j]->conf_dev);
+	while (j--) {
+		engine = idxd->engines[j];
+		device_unregister(engine_confdev(engine));
+	}
 	return rc;
 }
 
 static int idxd_register_group_devices(struct idxd_device *idxd)
 {
+	struct idxd_group *group;
 	int i, j, rc;
 
 	for (i = 0; i < idxd->max_groups; i++) {
-		struct idxd_group *group = idxd->groups[i];
-
-		rc = device_add(&group->conf_dev);
+		group = idxd->groups[i];
+		rc = device_add(group_confdev(group));
 		if (rc < 0)
 			goto cleanup;
 	}
@@ -1649,22 +1330,26 @@ static int idxd_register_group_devices(struct idxd_device *idxd)
 
 cleanup:
 	j = i - 1;
-	for (; i < idxd->max_groups; i++)
-		put_device(&idxd->groups[i]->conf_dev);
+	for (; i < idxd->max_groups; i++) {
+		group = idxd->groups[i];
+		put_device(group_confdev(group));
+	}
 
-	while (j--)
-		device_unregister(&idxd->groups[j]->conf_dev);
+	while (j--) {
+		group = idxd->groups[j];
+		device_unregister(group_confdev(group));
+	}
 	return rc;
 }
 
 static int idxd_register_wq_devices(struct idxd_device *idxd)
 {
+	struct idxd_wq *wq;
 	int i, rc, j;
 
 	for (i = 0; i < idxd->max_wqs; i++) {
-		struct idxd_wq *wq = idxd->wqs[i];
-
-		rc = device_add(&wq->conf_dev);
+		wq = idxd->wqs[i];
+		rc = device_add(wq_confdev(wq));
 		if (rc < 0)
 			goto cleanup;
 	}
@@ -1673,11 +1358,15 @@ static int idxd_register_wq_devices(struct idxd_device *idxd)
 
 cleanup:
 	j = i - 1;
-	for (; i < idxd->max_wqs; i++)
-		put_device(&idxd->wqs[i]->conf_dev);
+	for (; i < idxd->max_wqs; i++) {
+		wq = idxd->wqs[i];
+		put_device(wq_confdev(wq));
+	}
 
-	while (j--)
-		device_unregister(&idxd->wqs[j]->conf_dev);
+	while (j--) {
+		wq = idxd->wqs[j];
+		device_unregister(wq_confdev(wq));
+	}
 	return rc;
 }
 
@@ -1686,7 +1375,7 @@ int idxd_register_devices(struct idxd_device *idxd)
 	struct device *dev = &idxd->pdev->dev;
 	int rc, i;
 
-	rc = device_add(&idxd->conf_dev);
+	rc = device_add(idxd_confdev(idxd));
 	if (rc < 0)
 		return rc;
 
@@ -1712,12 +1401,12 @@ int idxd_register_devices(struct idxd_device *idxd)
 
  err_group:
 	for (i = 0; i < idxd->max_engines; i++)
-		device_unregister(&idxd->engines[i]->conf_dev);
+		device_unregister(engine_confdev(idxd->engines[i]));
  err_engine:
 	for (i = 0; i < idxd->max_wqs; i++)
-		device_unregister(&idxd->wqs[i]->conf_dev);
+		device_unregister(wq_confdev(idxd->wqs[i]));
  err_wq:
-	device_del(&idxd->conf_dev);
+	device_del(idxd_confdev(idxd));
 	return rc;
 }
 
@@ -1728,19 +1417,19 @@ void idxd_unregister_devices(struct idxd_device *idxd)
 	for (i = 0; i < idxd->max_wqs; i++) {
 		struct idxd_wq *wq = idxd->wqs[i];
 
-		device_unregister(&wq->conf_dev);
+		device_unregister(wq_confdev(wq));
 	}
 
 	for (i = 0; i < idxd->max_engines; i++) {
 		struct idxd_engine *engine = idxd->engines[i];
 
-		device_unregister(&engine->conf_dev);
+		device_unregister(engine_confdev(engine));
 	}
 
 	for (i = 0; i < idxd->max_groups; i++) {
 		struct idxd_group *group = idxd->groups[i];
 
-		device_unregister(&group->conf_dev);
+		device_unregister(group_confdev(group));
 	}
 }
 
diff --git a/drivers/dma/ppc4xx/adma.c b/drivers/dma/ppc4xx/adma.c
index df7704053d91..e2b5129c5f84 100644
--- a/drivers/dma/ppc4xx/adma.c
+++ b/drivers/dma/ppc4xx/adma.c
@@ -4319,6 +4319,7 @@ static ssize_t enable_store(struct device_driver *dev, const char *buf,
 			    size_t count)
 {
 	unsigned long val;
+	int err;
 
 	if (!count || count > 11)
 		return -EINVAL;
@@ -4327,7 +4328,10 @@ static ssize_t enable_store(struct device_driver *dev, const char *buf,
 		return -EFAULT;
 
 	/* Write a key */
-	sscanf(buf, "%lx", &val);
+	err = kstrtoul(buf, 16, &val);
+	if (err)
+		return err;
+
 	dcr_write(ppc440spe_mq_dcr_host, DCRN_MQ0_XORBA, val);
 	isync();
 
@@ -4368,7 +4372,7 @@ static ssize_t poly_store(struct device_driver *dev, const char *buf,
 			  size_t count)
 {
 	unsigned long reg, val;
-
+	int err;
 #ifdef CONFIG_440SP
 	/* 440SP uses default 0x14D polynomial only */
 	return -EINVAL;
@@ -4378,7 +4382,9 @@ static ssize_t poly_store(struct device_driver *dev, const char *buf,
 		return -EINVAL;
 
 	/* e.g., 0x14D or 0x11D */
-	sscanf(buf, "%lx", &val);
+	err = kstrtoul(buf, 16, &val);
+	if (err)
+		return err;
 
 	if (val & ~0x1FF)
 		return -EINVAL;
diff --git a/drivers/dma/ptdma/Kconfig b/drivers/dma/ptdma/Kconfig
new file mode 100644
index 000000000000..b430edd709f9
--- /dev/null
+++ b/drivers/dma/ptdma/Kconfig
@@ -0,0 +1,13 @@
+# SPDX-License-Identifier: GPL-2.0-only
+config AMD_PTDMA
+	tristate  "AMD PassThru DMA Engine"
+	depends on X86_64 && PCI
+	select DMA_ENGINE
+	select DMA_VIRTUAL_CHANNELS
+	help
+	  Enable support for the AMD PTDMA controller. This controller
+	  provides DMA capabilities to perform high bandwidth memory to
+	  memory and IO copy operations. It performs DMA transfer through
+	  queue-based descriptor management. This DMA controller is intended
+	  to be used with AMD Non-Transparent Bridge devices and not for
+	  general purpose peripheral DMA.
diff --git a/drivers/dma/ptdma/Makefile b/drivers/dma/ptdma/Makefile
new file mode 100644
index 000000000000..ce5410268a9a
--- /dev/null
+++ b/drivers/dma/ptdma/Makefile
@@ -0,0 +1,10 @@
+# SPDX-License-Identifier: GPL-2.0-only
+#
+# AMD Passthru DMA driver
+#
+
+obj-$(CONFIG_AMD_PTDMA) += ptdma.o
+
+ptdma-objs := ptdma-dev.o ptdma-dmaengine.o ptdma-debugfs.o
+
+ptdma-$(CONFIG_PCI) += ptdma-pci.o
diff --git a/drivers/dma/ptdma/ptdma-debugfs.c b/drivers/dma/ptdma/ptdma-debugfs.c
new file mode 100644
index 000000000000..c8307d3044a3
--- /dev/null
+++ b/drivers/dma/ptdma/ptdma-debugfs.c
@@ -0,0 +1,106 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * AMD Passthrough DMA device driver
+ * -- Based on the CCP driver
+ *
+ * Copyright (C) 2016,2021 Advanced Micro Devices, Inc.
+ *
+ * Author: Sanjay R Mehta <sanju.mehta@amd.com>
+ * Author: Gary R Hook <gary.hook@amd.com>
+ */
+
+#include <linux/debugfs.h>
+#include <linux/seq_file.h>
+
+#include "ptdma.h"
+
+/* DebugFS helpers */
+#define	RI_VERSION_NUM	0x0000003F
+
+#define	RI_NUM_VQM	0x00078000
+#define	RI_NVQM_SHIFT	15
+
+static int pt_debugfs_info_show(struct seq_file *s, void *p)
+{
+	struct pt_device *pt = s->private;
+	unsigned int regval;
+
+	seq_printf(s, "Device name: %s\n", dev_name(pt->dev));
+	seq_printf(s, "   # Queues: %d\n", 1);
+	seq_printf(s, "     # Cmds: %d\n", pt->cmd_count);
+
+	regval = ioread32(pt->io_regs + CMD_PT_VERSION);
+
+	seq_printf(s, "    Version: %d\n", regval & RI_VERSION_NUM);
+	seq_puts(s, "    Engines:");
+	seq_puts(s, "\n");
+	seq_printf(s, "     Queues: %d\n", (regval & RI_NUM_VQM) >> RI_NVQM_SHIFT);
+
+	return 0;
+}
+
+/*
+ * Return a formatted buffer containing the current
+ * statistics of queue for PTDMA
+ */
+static int pt_debugfs_stats_show(struct seq_file *s, void *p)
+{
+	struct pt_device *pt = s->private;
+
+	seq_printf(s, "Total Interrupts Handled: %ld\n", pt->total_interrupts);
+
+	return 0;
+}
+
+static int pt_debugfs_queue_show(struct seq_file *s, void *p)
+{
+	struct pt_cmd_queue *cmd_q = s->private;
+	unsigned int regval;
+
+	if (!cmd_q)
+		return 0;
+
+	seq_printf(s, "               Pass-Thru: %ld\n", cmd_q->total_pt_ops);
+
+	regval = ioread32(cmd_q->reg_control + 0x000C);
+
+	seq_puts(s, "      Enabled Interrupts:");
+	if (regval & INT_EMPTY_QUEUE)
+		seq_puts(s, " EMPTY");
+	if (regval & INT_QUEUE_STOPPED)
+		seq_puts(s, " STOPPED");
+	if (regval & INT_ERROR)
+		seq_puts(s, " ERROR");
+	if (regval & INT_COMPLETION)
+		seq_puts(s, " COMPLETION");
+	seq_puts(s, "\n");
+
+	return 0;
+}
+
+DEFINE_SHOW_ATTRIBUTE(pt_debugfs_info);
+DEFINE_SHOW_ATTRIBUTE(pt_debugfs_queue);
+DEFINE_SHOW_ATTRIBUTE(pt_debugfs_stats);
+
+void ptdma_debugfs_setup(struct pt_device *pt)
+{
+	struct pt_cmd_queue *cmd_q;
+	struct dentry *debugfs_q_instance;
+
+	if (!debugfs_initialized())
+		return;
+
+	debugfs_create_file("info", 0400, pt->dma_dev.dbg_dev_root, pt,
+			    &pt_debugfs_info_fops);
+
+	debugfs_create_file("stats", 0400, pt->dma_dev.dbg_dev_root, pt,
+			    &pt_debugfs_stats_fops);
+
+	cmd_q = &pt->cmd_q;
+
+	debugfs_q_instance =
+		debugfs_create_dir("q", pt->dma_dev.dbg_dev_root);
+
+	debugfs_create_file("stats", 0400, debugfs_q_instance, cmd_q,
+			    &pt_debugfs_queue_fops);
+}
diff --git a/drivers/dma/ptdma/ptdma-dev.c b/drivers/dma/ptdma/ptdma-dev.c
new file mode 100644
index 000000000000..8a6bf291a73f
--- /dev/null
+++ b/drivers/dma/ptdma/ptdma-dev.c
@@ -0,0 +1,305 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * AMD Passthru DMA device driver
+ * -- Based on the CCP driver
+ *
+ * Copyright (C) 2016,2021 Advanced Micro Devices, Inc.
+ *
+ * Author: Sanjay R Mehta <sanju.mehta@amd.com>
+ * Author: Gary R Hook <gary.hook@amd.com>
+ */
+
+#include <linux/bitfield.h>
+#include <linux/dma-mapping.h>
+#include <linux/debugfs.h>
+#include <linux/interrupt.h>
+#include <linux/kernel.h>
+#include <linux/module.h>
+#include <linux/pci.h>
+
+#include "ptdma.h"
+
+/* Human-readable error strings */
+static char *pt_error_codes[] = {
+	"",
+	"ERR 01: ILLEGAL_ENGINE",
+	"ERR 03: ILLEGAL_FUNCTION_TYPE",
+	"ERR 04: ILLEGAL_FUNCTION_MODE",
+	"ERR 06: ILLEGAL_FUNCTION_SIZE",
+	"ERR 08: ILLEGAL_FUNCTION_RSVD",
+	"ERR 09: ILLEGAL_BUFFER_LENGTH",
+	"ERR 10: VLSB_FAULT",
+	"ERR 11: ILLEGAL_MEM_ADDR",
+	"ERR 12: ILLEGAL_MEM_SEL",
+	"ERR 13: ILLEGAL_CONTEXT_ID",
+	"ERR 15: 0xF Reserved",
+	"ERR 18: CMD_TIMEOUT",
+	"ERR 19: IDMA0_AXI_SLVERR",
+	"ERR 20: IDMA0_AXI_DECERR",
+	"ERR 21: 0x15 Reserved",
+	"ERR 22: IDMA1_AXI_SLAVE_FAULT",
+	"ERR 23: IDMA1_AIXI_DECERR",
+	"ERR 24: 0x18 Reserved",
+	"ERR 27: 0x1B Reserved",
+	"ERR 38: ODMA0_AXI_SLVERR",
+	"ERR 39: ODMA0_AXI_DECERR",
+	"ERR 40: 0x28 Reserved",
+	"ERR 41: ODMA1_AXI_SLVERR",
+	"ERR 42: ODMA1_AXI_DECERR",
+	"ERR 43: LSB_PARITY_ERR",
+};
+
+static void pt_log_error(struct pt_device *d, int e)
+{
+	dev_err(d->dev, "PTDMA error: %s (0x%x)\n", pt_error_codes[e], e);
+}
+
+void pt_start_queue(struct pt_cmd_queue *cmd_q)
+{
+	/* Turn on the run bit */
+	iowrite32(cmd_q->qcontrol | CMD_Q_RUN, cmd_q->reg_control);
+}
+
+void pt_stop_queue(struct pt_cmd_queue *cmd_q)
+{
+	/* Turn off the run bit */
+	iowrite32(cmd_q->qcontrol & ~CMD_Q_RUN, cmd_q->reg_control);
+}
+
+static int pt_core_execute_cmd(struct ptdma_desc *desc, struct pt_cmd_queue *cmd_q)
+{
+	bool soc = FIELD_GET(DWORD0_SOC, desc->dw0);
+	u8 *q_desc = (u8 *)&cmd_q->qbase[cmd_q->qidx];
+	u32 tail;
+
+	if (soc) {
+		desc->dw0 |= FIELD_PREP(DWORD0_IOC, desc->dw0);
+		desc->dw0 &= ~DWORD0_SOC;
+	}
+	mutex_lock(&cmd_q->q_mutex);
+
+	/* Copy 32-byte command descriptor to hw queue. */
+	memcpy(q_desc, desc, 32);
+	cmd_q->qidx = (cmd_q->qidx + 1) % CMD_Q_LEN;
+
+	/* The data used by this command must be flushed to memory */
+	wmb();
+
+	/* Write the new tail address back to the queue register */
+	tail = lower_32_bits(cmd_q->qdma_tail + cmd_q->qidx * Q_DESC_SIZE);
+	iowrite32(tail, cmd_q->reg_control + 0x0004);
+
+	/* Turn the queue back on using our cached control register */
+	pt_start_queue(cmd_q);
+	mutex_unlock(&cmd_q->q_mutex);
+
+	return 0;
+}
+
+int pt_core_perform_passthru(struct pt_cmd_queue *cmd_q,
+			     struct pt_passthru_engine *pt_engine)
+{
+	struct ptdma_desc desc;
+
+	cmd_q->cmd_error = 0;
+	cmd_q->total_pt_ops++;
+	memset(&desc, 0, sizeof(desc));
+	desc.dw0 = CMD_DESC_DW0_VAL;
+	desc.length = pt_engine->src_len;
+	desc.src_lo = lower_32_bits(pt_engine->src_dma);
+	desc.dw3.src_hi = upper_32_bits(pt_engine->src_dma);
+	desc.dst_lo = lower_32_bits(pt_engine->dst_dma);
+	desc.dw5.dst_hi = upper_32_bits(pt_engine->dst_dma);
+
+	return pt_core_execute_cmd(&desc, cmd_q);
+}
+
+static inline void pt_core_disable_queue_interrupts(struct pt_device *pt)
+{
+	iowrite32(0, pt->cmd_q.reg_control + 0x000C);
+}
+
+static inline void pt_core_enable_queue_interrupts(struct pt_device *pt)
+{
+	iowrite32(SUPPORTED_INTERRUPTS, pt->cmd_q.reg_control + 0x000C);
+}
+
+static void pt_do_cmd_complete(unsigned long data)
+{
+	struct pt_tasklet_data *tdata = (struct pt_tasklet_data *)data;
+	struct pt_cmd *cmd = tdata->cmd;
+	struct pt_cmd_queue *cmd_q = &cmd->pt->cmd_q;
+	u32 tail;
+
+	if (cmd_q->cmd_error) {
+	       /*
+		* Log the error and flush the queue by
+		* moving the head pointer
+		*/
+		tail = lower_32_bits(cmd_q->qdma_tail + cmd_q->qidx * Q_DESC_SIZE);
+		pt_log_error(cmd_q->pt, cmd_q->cmd_error);
+		iowrite32(tail, cmd_q->reg_control + 0x0008);
+	}
+
+	cmd->pt_cmd_callback(cmd->data, cmd->ret);
+}
+
+static irqreturn_t pt_core_irq_handler(int irq, void *data)
+{
+	struct pt_device *pt = data;
+	struct pt_cmd_queue *cmd_q = &pt->cmd_q;
+	u32 status;
+
+	pt_core_disable_queue_interrupts(pt);
+	pt->total_interrupts++;
+	status = ioread32(cmd_q->reg_control + 0x0010);
+	if (status) {
+		cmd_q->int_status = status;
+		cmd_q->q_status = ioread32(cmd_q->reg_control + 0x0100);
+		cmd_q->q_int_status = ioread32(cmd_q->reg_control + 0x0104);
+
+		/* On error, only save the first error value */
+		if ((status & INT_ERROR) && !cmd_q->cmd_error)
+			cmd_q->cmd_error = CMD_Q_ERROR(cmd_q->q_status);
+
+		/* Acknowledge the interrupt */
+		iowrite32(status, cmd_q->reg_control + 0x0010);
+		pt_core_enable_queue_interrupts(pt);
+		pt_do_cmd_complete((ulong)&pt->tdata);
+	}
+	return IRQ_HANDLED;
+}
+
+int pt_core_init(struct pt_device *pt)
+{
+	char dma_pool_name[MAX_DMAPOOL_NAME_LEN];
+	struct pt_cmd_queue *cmd_q = &pt->cmd_q;
+	u32 dma_addr_lo, dma_addr_hi;
+	struct device *dev = pt->dev;
+	struct dma_pool *dma_pool;
+	int ret;
+
+	/* Allocate a dma pool for the queue */
+	snprintf(dma_pool_name, sizeof(dma_pool_name), "%s_q", dev_name(pt->dev));
+
+	dma_pool = dma_pool_create(dma_pool_name, dev,
+				   PT_DMAPOOL_MAX_SIZE,
+				   PT_DMAPOOL_ALIGN, 0);
+	if (!dma_pool)
+		return -ENOMEM;
+
+	/* ptdma core initialisation */
+	iowrite32(CMD_CONFIG_VHB_EN, pt->io_regs + CMD_CONFIG_OFFSET);
+	iowrite32(CMD_QUEUE_PRIO, pt->io_regs + CMD_QUEUE_PRIO_OFFSET);
+	iowrite32(CMD_TIMEOUT_DISABLE, pt->io_regs + CMD_TIMEOUT_OFFSET);
+	iowrite32(CMD_CLK_GATE_CONFIG, pt->io_regs + CMD_CLK_GATE_CTL_OFFSET);
+	iowrite32(CMD_CONFIG_REQID, pt->io_regs + CMD_REQID_CONFIG_OFFSET);
+
+	cmd_q->pt = pt;
+	cmd_q->dma_pool = dma_pool;
+	mutex_init(&cmd_q->q_mutex);
+
+	/* Page alignment satisfies our needs for N <= 128 */
+	cmd_q->qsize = Q_SIZE(Q_DESC_SIZE);
+	cmd_q->qbase = dma_alloc_coherent(dev, cmd_q->qsize,
+					  &cmd_q->qbase_dma,
+					  GFP_KERNEL);
+	if (!cmd_q->qbase) {
+		dev_err(dev, "unable to allocate command queue\n");
+		ret = -ENOMEM;
+		goto e_dma_alloc;
+	}
+
+	cmd_q->qidx = 0;
+
+	/* Preset some register values */
+	cmd_q->reg_control = pt->io_regs + CMD_Q_STATUS_INCR;
+
+	/* Turn off the queues and disable interrupts until ready */
+	pt_core_disable_queue_interrupts(pt);
+
+	cmd_q->qcontrol = 0; /* Start with nothing */
+	iowrite32(cmd_q->qcontrol, cmd_q->reg_control);
+
+	ioread32(cmd_q->reg_control + 0x0104);
+	ioread32(cmd_q->reg_control + 0x0100);
+
+	/* Clear the interrupt status */
+	iowrite32(SUPPORTED_INTERRUPTS, cmd_q->reg_control + 0x0010);
+
+	/* Request an irq */
+	ret = request_irq(pt->pt_irq, pt_core_irq_handler, 0, dev_name(pt->dev), pt);
+	if (ret)
+		goto e_pool;
+
+	/* Update the device registers with queue information. */
+	cmd_q->qcontrol &= ~CMD_Q_SIZE;
+	cmd_q->qcontrol |= FIELD_PREP(CMD_Q_SIZE, QUEUE_SIZE_VAL);
+
+	cmd_q->qdma_tail = cmd_q->qbase_dma;
+	dma_addr_lo = lower_32_bits(cmd_q->qdma_tail);
+	iowrite32((u32)dma_addr_lo, cmd_q->reg_control + 0x0004);
+	iowrite32((u32)dma_addr_lo, cmd_q->reg_control + 0x0008);
+
+	dma_addr_hi = upper_32_bits(cmd_q->qdma_tail);
+	cmd_q->qcontrol |= (dma_addr_hi << 16);
+	iowrite32(cmd_q->qcontrol, cmd_q->reg_control);
+
+	pt_core_enable_queue_interrupts(pt);
+
+	/* Register the DMA engine support */
+	ret = pt_dmaengine_register(pt);
+	if (ret)
+		goto e_dmaengine;
+
+	/* Set up debugfs entries */
+	ptdma_debugfs_setup(pt);
+
+	return 0;
+
+e_dmaengine:
+	free_irq(pt->pt_irq, pt);
+
+e_dma_alloc:
+	dma_free_coherent(dev, cmd_q->qsize, cmd_q->qbase, cmd_q->qbase_dma);
+
+e_pool:
+	dev_err(dev, "unable to allocate an IRQ\n");
+	dma_pool_destroy(pt->cmd_q.dma_pool);
+
+	return ret;
+}
+
+void pt_core_destroy(struct pt_device *pt)
+{
+	struct device *dev = pt->dev;
+	struct pt_cmd_queue *cmd_q = &pt->cmd_q;
+	struct pt_cmd *cmd;
+
+	/* Unregister the DMA engine */
+	pt_dmaengine_unregister(pt);
+
+	/* Disable and clear interrupts */
+	pt_core_disable_queue_interrupts(pt);
+
+	/* Turn off the run bit */
+	pt_stop_queue(cmd_q);
+
+	/* Clear the interrupt status */
+	iowrite32(SUPPORTED_INTERRUPTS, cmd_q->reg_control + 0x0010);
+	ioread32(cmd_q->reg_control + 0x0104);
+	ioread32(cmd_q->reg_control + 0x0100);
+
+	free_irq(pt->pt_irq, pt);
+
+	dma_free_coherent(dev, cmd_q->qsize, cmd_q->qbase,
+			  cmd_q->qbase_dma);
+
+	/* Flush the cmd queue */
+	while (!list_empty(&pt->cmd)) {
+		/* Invoke the callback directly with an error code */
+		cmd = list_first_entry(&pt->cmd, struct pt_cmd, entry);
+		list_del(&cmd->entry);
+		cmd->pt_cmd_callback(cmd->data, -ENODEV);
+	}
+}
diff --git a/drivers/dma/ptdma/ptdma-dmaengine.c b/drivers/dma/ptdma/ptdma-dmaengine.c
new file mode 100644
index 000000000000..c9e52f6f2f50
--- /dev/null
+++ b/drivers/dma/ptdma/ptdma-dmaengine.c
@@ -0,0 +1,389 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * AMD Passthrough DMA device driver
+ * -- Based on the CCP driver
+ *
+ * Copyright (C) 2016,2021 Advanced Micro Devices, Inc.
+ *
+ * Author: Sanjay R Mehta <sanju.mehta@amd.com>
+ * Author: Gary R Hook <gary.hook@amd.com>
+ */
+
+#include "ptdma.h"
+#include "../dmaengine.h"
+#include "../virt-dma.h"
+
+static inline struct pt_dma_chan *to_pt_chan(struct dma_chan *dma_chan)
+{
+	return container_of(dma_chan, struct pt_dma_chan, vc.chan);
+}
+
+static inline struct pt_dma_desc *to_pt_desc(struct virt_dma_desc *vd)
+{
+	return container_of(vd, struct pt_dma_desc, vd);
+}
+
+static void pt_free_chan_resources(struct dma_chan *dma_chan)
+{
+	struct pt_dma_chan *chan = to_pt_chan(dma_chan);
+
+	vchan_free_chan_resources(&chan->vc);
+}
+
+static void pt_synchronize(struct dma_chan *dma_chan)
+{
+	struct pt_dma_chan *chan = to_pt_chan(dma_chan);
+
+	vchan_synchronize(&chan->vc);
+}
+
+static void pt_do_cleanup(struct virt_dma_desc *vd)
+{
+	struct pt_dma_desc *desc = to_pt_desc(vd);
+	struct pt_device *pt = desc->pt;
+
+	kmem_cache_free(pt->dma_desc_cache, desc);
+}
+
+static int pt_dma_start_desc(struct pt_dma_desc *desc)
+{
+	struct pt_passthru_engine *pt_engine;
+	struct pt_device *pt;
+	struct pt_cmd *pt_cmd;
+	struct pt_cmd_queue *cmd_q;
+
+	desc->issued_to_hw = 1;
+
+	pt_cmd = &desc->pt_cmd;
+	pt = pt_cmd->pt;
+	cmd_q = &pt->cmd_q;
+	pt_engine = &pt_cmd->passthru;
+
+	pt->tdata.cmd = pt_cmd;
+
+	/* Execute the command */
+	pt_cmd->ret = pt_core_perform_passthru(cmd_q, pt_engine);
+
+	return 0;
+}
+
+static struct pt_dma_desc *pt_next_dma_desc(struct pt_dma_chan *chan)
+{
+	/* Get the next DMA descriptor on the active list */
+	struct virt_dma_desc *vd = vchan_next_desc(&chan->vc);
+
+	return vd ? to_pt_desc(vd) : NULL;
+}
+
+static struct pt_dma_desc *pt_handle_active_desc(struct pt_dma_chan *chan,
+						 struct pt_dma_desc *desc)
+{
+	struct dma_async_tx_descriptor *tx_desc;
+	struct virt_dma_desc *vd;
+	unsigned long flags;
+
+	/* Loop over descriptors until one is found with commands */
+	do {
+		if (desc) {
+			if (!desc->issued_to_hw) {
+				/* No errors, keep going */
+				if (desc->status != DMA_ERROR)
+					return desc;
+			}
+
+			tx_desc = &desc->vd.tx;
+			vd = &desc->vd;
+		} else {
+			tx_desc = NULL;
+		}
+
+		spin_lock_irqsave(&chan->vc.lock, flags);
+
+		if (desc) {
+			if (desc->status != DMA_ERROR)
+				desc->status = DMA_COMPLETE;
+
+			dma_cookie_complete(tx_desc);
+			dma_descriptor_unmap(tx_desc);
+			list_del(&desc->vd.node);
+		}
+
+		desc = pt_next_dma_desc(chan);
+
+		spin_unlock_irqrestore(&chan->vc.lock, flags);
+
+		if (tx_desc) {
+			dmaengine_desc_get_callback_invoke(tx_desc, NULL);
+			dma_run_dependencies(tx_desc);
+			vchan_vdesc_fini(vd);
+		}
+	} while (desc);
+
+	return NULL;
+}
+
+static void pt_cmd_callback(void *data, int err)
+{
+	struct pt_dma_desc *desc = data;
+	struct dma_chan *dma_chan;
+	struct pt_dma_chan *chan;
+	int ret;
+
+	if (err == -EINPROGRESS)
+		return;
+
+	dma_chan = desc->vd.tx.chan;
+	chan = to_pt_chan(dma_chan);
+
+	if (err)
+		desc->status = DMA_ERROR;
+
+	while (true) {
+		/* Check for DMA descriptor completion */
+		desc = pt_handle_active_desc(chan, desc);
+
+		/* Don't submit cmd if no descriptor or DMA is paused */
+		if (!desc)
+			break;
+
+		ret = pt_dma_start_desc(desc);
+		if (!ret)
+			break;
+
+		desc->status = DMA_ERROR;
+	}
+}
+
+static struct pt_dma_desc *pt_alloc_dma_desc(struct pt_dma_chan *chan,
+					     unsigned long flags)
+{
+	struct pt_dma_desc *desc;
+
+	desc = kmem_cache_zalloc(chan->pt->dma_desc_cache, GFP_NOWAIT);
+	if (!desc)
+		return NULL;
+
+	vchan_tx_prep(&chan->vc, &desc->vd, flags);
+
+	desc->pt = chan->pt;
+	desc->issued_to_hw = 0;
+	desc->status = DMA_IN_PROGRESS;
+
+	return desc;
+}
+
+static struct pt_dma_desc *pt_create_desc(struct dma_chan *dma_chan,
+					  dma_addr_t dst,
+					  dma_addr_t src,
+					  unsigned int len,
+					  unsigned long flags)
+{
+	struct pt_dma_chan *chan = to_pt_chan(dma_chan);
+	struct pt_passthru_engine *pt_engine;
+	struct pt_dma_desc *desc;
+	struct pt_cmd *pt_cmd;
+
+	desc = pt_alloc_dma_desc(chan, flags);
+	if (!desc)
+		return NULL;
+
+	pt_cmd = &desc->pt_cmd;
+	pt_cmd->pt = chan->pt;
+	pt_engine = &pt_cmd->passthru;
+	pt_cmd->engine = PT_ENGINE_PASSTHRU;
+	pt_engine->src_dma = src;
+	pt_engine->dst_dma = dst;
+	pt_engine->src_len = len;
+	pt_cmd->pt_cmd_callback = pt_cmd_callback;
+	pt_cmd->data = desc;
+
+	desc->len = len;
+
+	return desc;
+}
+
+static struct dma_async_tx_descriptor *
+pt_prep_dma_memcpy(struct dma_chan *dma_chan, dma_addr_t dst,
+		   dma_addr_t src, size_t len, unsigned long flags)
+{
+	struct pt_dma_desc *desc;
+
+	desc = pt_create_desc(dma_chan, dst, src, len, flags);
+	if (!desc)
+		return NULL;
+
+	return &desc->vd.tx;
+}
+
+static struct dma_async_tx_descriptor *
+pt_prep_dma_interrupt(struct dma_chan *dma_chan, unsigned long flags)
+{
+	struct pt_dma_chan *chan = to_pt_chan(dma_chan);
+	struct pt_dma_desc *desc;
+
+	desc = pt_alloc_dma_desc(chan, flags);
+	if (!desc)
+		return NULL;
+
+	return &desc->vd.tx;
+}
+
+static void pt_issue_pending(struct dma_chan *dma_chan)
+{
+	struct pt_dma_chan *chan = to_pt_chan(dma_chan);
+	struct pt_dma_desc *desc;
+	unsigned long flags;
+
+	spin_lock_irqsave(&chan->vc.lock, flags);
+
+	vchan_issue_pending(&chan->vc);
+
+	desc = pt_next_dma_desc(chan);
+
+	spin_unlock_irqrestore(&chan->vc.lock, flags);
+
+	/* If there was nothing active, start processing */
+	if (desc)
+		pt_cmd_callback(desc, 0);
+}
+
+static int pt_pause(struct dma_chan *dma_chan)
+{
+	struct pt_dma_chan *chan = to_pt_chan(dma_chan);
+	unsigned long flags;
+
+	spin_lock_irqsave(&chan->vc.lock, flags);
+	pt_stop_queue(&chan->pt->cmd_q);
+	spin_unlock_irqrestore(&chan->vc.lock, flags);
+
+	return 0;
+}
+
+static int pt_resume(struct dma_chan *dma_chan)
+{
+	struct pt_dma_chan *chan = to_pt_chan(dma_chan);
+	struct pt_dma_desc *desc = NULL;
+	unsigned long flags;
+
+	spin_lock_irqsave(&chan->vc.lock, flags);
+	pt_start_queue(&chan->pt->cmd_q);
+	desc = pt_next_dma_desc(chan);
+	spin_unlock_irqrestore(&chan->vc.lock, flags);
+
+	/* If there was something active, re-start */
+	if (desc)
+		pt_cmd_callback(desc, 0);
+
+	return 0;
+}
+
+static int pt_terminate_all(struct dma_chan *dma_chan)
+{
+	struct pt_dma_chan *chan = to_pt_chan(dma_chan);
+	unsigned long flags;
+	LIST_HEAD(head);
+
+	spin_lock_irqsave(&chan->vc.lock, flags);
+	vchan_get_all_descriptors(&chan->vc, &head);
+	spin_unlock_irqrestore(&chan->vc.lock, flags);
+
+	vchan_dma_desc_free_list(&chan->vc, &head);
+	vchan_free_chan_resources(&chan->vc);
+
+	return 0;
+}
+
+int pt_dmaengine_register(struct pt_device *pt)
+{
+	struct pt_dma_chan *chan;
+	struct dma_device *dma_dev = &pt->dma_dev;
+	char *cmd_cache_name;
+	char *desc_cache_name;
+	int ret;
+
+	pt->pt_dma_chan = devm_kzalloc(pt->dev, sizeof(*pt->pt_dma_chan),
+				       GFP_KERNEL);
+	if (!pt->pt_dma_chan)
+		return -ENOMEM;
+
+	cmd_cache_name = devm_kasprintf(pt->dev, GFP_KERNEL,
+					"%s-dmaengine-cmd-cache",
+					dev_name(pt->dev));
+	if (!cmd_cache_name)
+		return -ENOMEM;
+
+	desc_cache_name = devm_kasprintf(pt->dev, GFP_KERNEL,
+					 "%s-dmaengine-desc-cache",
+					 dev_name(pt->dev));
+	if (!desc_cache_name) {
+		ret = -ENOMEM;
+		goto err_cache;
+	}
+
+	pt->dma_desc_cache = kmem_cache_create(desc_cache_name,
+					       sizeof(struct pt_dma_desc), 0,
+					       SLAB_HWCACHE_ALIGN, NULL);
+	if (!pt->dma_desc_cache) {
+		ret = -ENOMEM;
+		goto err_cache;
+	}
+
+	dma_dev->dev = pt->dev;
+	dma_dev->src_addr_widths = DMA_SLAVE_BUSWIDTH_64_BYTES;
+	dma_dev->dst_addr_widths = DMA_SLAVE_BUSWIDTH_64_BYTES;
+	dma_dev->directions = DMA_MEM_TO_MEM;
+	dma_dev->residue_granularity = DMA_RESIDUE_GRANULARITY_DESCRIPTOR;
+	dma_cap_set(DMA_MEMCPY, dma_dev->cap_mask);
+	dma_cap_set(DMA_INTERRUPT, dma_dev->cap_mask);
+
+	/*
+	 * PTDMA is intended to be used with the AMD NTB devices, hence
+	 * marking it as DMA_PRIVATE.
+	 */
+	dma_cap_set(DMA_PRIVATE, dma_dev->cap_mask);
+
+	INIT_LIST_HEAD(&dma_dev->channels);
+
+	chan = pt->pt_dma_chan;
+	chan->pt = pt;
+
+	/* Set base and prep routines */
+	dma_dev->device_free_chan_resources = pt_free_chan_resources;
+	dma_dev->device_prep_dma_memcpy = pt_prep_dma_memcpy;
+	dma_dev->device_prep_dma_interrupt = pt_prep_dma_interrupt;
+	dma_dev->device_issue_pending = pt_issue_pending;
+	dma_dev->device_tx_status = dma_cookie_status;
+	dma_dev->device_pause = pt_pause;
+	dma_dev->device_resume = pt_resume;
+	dma_dev->device_terminate_all = pt_terminate_all;
+	dma_dev->device_synchronize = pt_synchronize;
+
+	chan->vc.desc_free = pt_do_cleanup;
+	vchan_init(&chan->vc, dma_dev);
+
+	dma_set_mask_and_coherent(pt->dev, DMA_BIT_MASK(64));
+
+	ret = dma_async_device_register(dma_dev);
+	if (ret)
+		goto err_reg;
+
+	return 0;
+
+err_reg:
+	kmem_cache_destroy(pt->dma_desc_cache);
+
+err_cache:
+	kmem_cache_destroy(pt->dma_cmd_cache);
+
+	return ret;
+}
+
+void pt_dmaengine_unregister(struct pt_device *pt)
+{
+	struct dma_device *dma_dev = &pt->dma_dev;
+
+	dma_async_device_unregister(dma_dev);
+
+	kmem_cache_destroy(pt->dma_desc_cache);
+	kmem_cache_destroy(pt->dma_cmd_cache);
+}
diff --git a/drivers/dma/ptdma/ptdma-pci.c b/drivers/dma/ptdma/ptdma-pci.c
new file mode 100644
index 000000000000..22739ff0c3c5
--- /dev/null
+++ b/drivers/dma/ptdma/ptdma-pci.c
@@ -0,0 +1,243 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * AMD Passthru DMA device driver
+ * -- Based on the CCP driver
+ *
+ * Copyright (C) 2016,2021 Advanced Micro Devices, Inc.
+ *
+ * Author: Sanjay R Mehta <sanju.mehta@amd.com>
+ * Author: Tom Lendacky <thomas.lendacky@amd.com>
+ * Author: Gary R Hook <gary.hook@amd.com>
+ */
+
+#include <linux/device.h>
+#include <linux/dma-mapping.h>
+#include <linux/delay.h>
+#include <linux/interrupt.h>
+#include <linux/kernel.h>
+#include <linux/kthread.h>
+#include <linux/module.h>
+#include <linux/pci_ids.h>
+#include <linux/pci.h>
+#include <linux/spinlock.h>
+
+#include "ptdma.h"
+
+struct pt_msix {
+	int msix_count;
+	struct msix_entry msix_entry;
+};
+
+/*
+ * pt_alloc_struct - allocate and initialize the pt_device struct
+ *
+ * @dev: device struct of the PTDMA
+ */
+static struct pt_device *pt_alloc_struct(struct device *dev)
+{
+	struct pt_device *pt;
+
+	pt = devm_kzalloc(dev, sizeof(*pt), GFP_KERNEL);
+
+	if (!pt)
+		return NULL;
+	pt->dev = dev;
+
+	INIT_LIST_HEAD(&pt->cmd);
+
+	return pt;
+}
+
+static int pt_get_msix_irqs(struct pt_device *pt)
+{
+	struct pt_msix *pt_msix = pt->pt_msix;
+	struct device *dev = pt->dev;
+	struct pci_dev *pdev = to_pci_dev(dev);
+	int ret;
+
+	pt_msix->msix_entry.entry = 0;
+
+	ret = pci_enable_msix_range(pdev, &pt_msix->msix_entry, 1, 1);
+	if (ret < 0)
+		return ret;
+
+	pt_msix->msix_count = ret;
+
+	pt->pt_irq = pt_msix->msix_entry.vector;
+
+	return 0;
+}
+
+static int pt_get_msi_irq(struct pt_device *pt)
+{
+	struct device *dev = pt->dev;
+	struct pci_dev *pdev = to_pci_dev(dev);
+	int ret;
+
+	ret = pci_enable_msi(pdev);
+	if (ret)
+		return ret;
+
+	pt->pt_irq = pdev->irq;
+
+	return 0;
+}
+
+static int pt_get_irqs(struct pt_device *pt)
+{
+	struct device *dev = pt->dev;
+	int ret;
+
+	ret = pt_get_msix_irqs(pt);
+	if (!ret)
+		return 0;
+
+	/* Couldn't get MSI-X vectors, try MSI */
+	dev_err(dev, "could not enable MSI-X (%d), trying MSI\n", ret);
+	ret = pt_get_msi_irq(pt);
+	if (!ret)
+		return 0;
+
+	/* Couldn't get MSI interrupt */
+	dev_err(dev, "could not enable MSI (%d)\n", ret);
+
+	return ret;
+}
+
+static void pt_free_irqs(struct pt_device *pt)
+{
+	struct pt_msix *pt_msix = pt->pt_msix;
+	struct device *dev = pt->dev;
+	struct pci_dev *pdev = to_pci_dev(dev);
+
+	if (pt_msix->msix_count)
+		pci_disable_msix(pdev);
+	else if (pt->pt_irq)
+		pci_disable_msi(pdev);
+
+	pt->pt_irq = 0;
+}
+
+static int pt_pci_probe(struct pci_dev *pdev, const struct pci_device_id *id)
+{
+	struct pt_device *pt;
+	struct pt_msix *pt_msix;
+	struct device *dev = &pdev->dev;
+	void __iomem * const *iomap_table;
+	int bar_mask;
+	int ret = -ENOMEM;
+
+	pt = pt_alloc_struct(dev);
+	if (!pt)
+		goto e_err;
+
+	pt_msix = devm_kzalloc(dev, sizeof(*pt_msix), GFP_KERNEL);
+	if (!pt_msix)
+		goto e_err;
+
+	pt->pt_msix = pt_msix;
+	pt->dev_vdata = (struct pt_dev_vdata *)id->driver_data;
+	if (!pt->dev_vdata) {
+		ret = -ENODEV;
+		dev_err(dev, "missing driver data\n");
+		goto e_err;
+	}
+
+	ret = pcim_enable_device(pdev);
+	if (ret) {
+		dev_err(dev, "pcim_enable_device failed (%d)\n", ret);
+		goto e_err;
+	}
+
+	bar_mask = pci_select_bars(pdev, IORESOURCE_MEM);
+	ret = pcim_iomap_regions(pdev, bar_mask, "ptdma");
+	if (ret) {
+		dev_err(dev, "pcim_iomap_regions failed (%d)\n", ret);
+		goto e_err;
+	}
+
+	iomap_table = pcim_iomap_table(pdev);
+	if (!iomap_table) {
+		dev_err(dev, "pcim_iomap_table failed\n");
+		ret = -ENOMEM;
+		goto e_err;
+	}
+
+	pt->io_regs = iomap_table[pt->dev_vdata->bar];
+	if (!pt->io_regs) {
+		dev_err(dev, "ioremap failed\n");
+		ret = -ENOMEM;
+		goto e_err;
+	}
+
+	ret = pt_get_irqs(pt);
+	if (ret)
+		goto e_err;
+
+	pci_set_master(pdev);
+
+	ret = dma_set_mask_and_coherent(dev, DMA_BIT_MASK(48));
+	if (ret) {
+		ret = dma_set_mask_and_coherent(dev, DMA_BIT_MASK(32));
+		if (ret) {
+			dev_err(dev, "dma_set_mask_and_coherent failed (%d)\n",
+				ret);
+			goto e_err;
+		}
+	}
+
+	dev_set_drvdata(dev, pt);
+
+	if (pt->dev_vdata)
+		ret = pt_core_init(pt);
+
+	if (ret)
+		goto e_err;
+
+	return 0;
+
+e_err:
+	dev_err(dev, "initialization failed ret = %d\n", ret);
+
+	return ret;
+}
+
+static void pt_pci_remove(struct pci_dev *pdev)
+{
+	struct device *dev = &pdev->dev;
+	struct pt_device *pt = dev_get_drvdata(dev);
+
+	if (!pt)
+		return;
+
+	if (pt->dev_vdata)
+		pt_core_destroy(pt);
+
+	pt_free_irqs(pt);
+}
+
+static const struct pt_dev_vdata dev_vdata[] = {
+	{
+		.bar = 2,
+	},
+};
+
+static const struct pci_device_id pt_pci_table[] = {
+	{ PCI_VDEVICE(AMD, 0x1498), (kernel_ulong_t)&dev_vdata[0] },
+	/* Last entry must be zero */
+	{ 0, }
+};
+MODULE_DEVICE_TABLE(pci, pt_pci_table);
+
+static struct pci_driver pt_pci_driver = {
+	.name = "ptdma",
+	.id_table = pt_pci_table,
+	.probe = pt_pci_probe,
+	.remove = pt_pci_remove,
+};
+
+module_pci_driver(pt_pci_driver);
+
+MODULE_AUTHOR("Sanjay R Mehta <sanju.mehta@amd.com>");
+MODULE_LICENSE("GPL");
+MODULE_DESCRIPTION("AMD PassThru DMA driver");
diff --git a/drivers/dma/ptdma/ptdma.h b/drivers/dma/ptdma/ptdma.h
new file mode 100644
index 000000000000..afbf192c9230
--- /dev/null
+++ b/drivers/dma/ptdma/ptdma.h
@@ -0,0 +1,324 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
+/*
+ * AMD Passthru DMA device driver
+ * -- Based on the CCP driver
+ *
+ * Copyright (C) 2016,2021 Advanced Micro Devices, Inc.
+ *
+ * Author: Sanjay R Mehta <sanju.mehta@amd.com>
+ * Author: Tom Lendacky <thomas.lendacky@amd.com>
+ * Author: Gary R Hook <gary.hook@amd.com>
+ */
+
+#ifndef __PT_DEV_H__
+#define __PT_DEV_H__
+
+#include <linux/device.h>
+#include <linux/dmaengine.h>
+#include <linux/pci.h>
+#include <linux/spinlock.h>
+#include <linux/mutex.h>
+#include <linux/list.h>
+#include <linux/wait.h>
+#include <linux/dmapool.h>
+
+#include "../virt-dma.h"
+
+#define MAX_PT_NAME_LEN			16
+#define MAX_DMAPOOL_NAME_LEN		32
+
+#define MAX_HW_QUEUES			1
+#define MAX_CMD_QLEN			100
+
+#define PT_ENGINE_PASSTHRU		5
+
+/* Register Mappings */
+#define IRQ_MASK_REG			0x040
+#define IRQ_STATUS_REG			0x200
+
+#define CMD_Q_ERROR(__qs)		((__qs) & 0x0000003f)
+
+#define CMD_QUEUE_PRIO_OFFSET		0x00
+#define CMD_REQID_CONFIG_OFFSET		0x04
+#define CMD_TIMEOUT_OFFSET		0x08
+#define CMD_PT_VERSION			0x10
+
+#define CMD_Q_CONTROL_BASE		0x0000
+#define CMD_Q_TAIL_LO_BASE		0x0004
+#define CMD_Q_HEAD_LO_BASE		0x0008
+#define CMD_Q_INT_ENABLE_BASE		0x000C
+#define CMD_Q_INTERRUPT_STATUS_BASE	0x0010
+
+#define CMD_Q_STATUS_BASE		0x0100
+#define CMD_Q_INT_STATUS_BASE		0x0104
+#define CMD_Q_DMA_STATUS_BASE		0x0108
+#define CMD_Q_DMA_READ_STATUS_BASE	0x010C
+#define CMD_Q_DMA_WRITE_STATUS_BASE	0x0110
+#define CMD_Q_ABORT_BASE		0x0114
+#define CMD_Q_AX_CACHE_BASE		0x0118
+
+#define CMD_CONFIG_OFFSET		0x1120
+#define CMD_CLK_GATE_CTL_OFFSET		0x6004
+
+#define CMD_DESC_DW0_VAL		0x500012
+
+/* Address offset for virtual queue registers */
+#define CMD_Q_STATUS_INCR		0x1000
+
+/* Bit masks */
+#define CMD_CONFIG_REQID		0
+#define CMD_TIMEOUT_DISABLE		0
+#define CMD_CLK_DYN_GATING_DIS		0
+#define CMD_CLK_SW_GATE_MODE		0
+#define CMD_CLK_GATE_CTL		0
+#define CMD_QUEUE_PRIO			GENMASK(2, 1)
+#define CMD_CONFIG_VHB_EN		BIT(0)
+#define CMD_CLK_DYN_GATING_EN		BIT(0)
+#define CMD_CLK_HW_GATE_MODE		BIT(0)
+#define CMD_CLK_GATE_ON_DELAY		BIT(12)
+#define CMD_CLK_GATE_OFF_DELAY		BIT(12)
+
+#define CMD_CLK_GATE_CONFIG		(CMD_CLK_GATE_CTL | \
+					CMD_CLK_HW_GATE_MODE | \
+					CMD_CLK_GATE_ON_DELAY | \
+					CMD_CLK_DYN_GATING_EN | \
+					CMD_CLK_GATE_OFF_DELAY)
+
+#define CMD_Q_LEN			32
+#define CMD_Q_RUN			BIT(0)
+#define CMD_Q_HALT			BIT(1)
+#define CMD_Q_MEM_LOCATION		BIT(2)
+#define CMD_Q_SIZE_MASK			GENMASK(4, 0)
+#define CMD_Q_SIZE			GENMASK(7, 3)
+#define CMD_Q_SHIFT			GENMASK(1, 0)
+#define QUEUE_SIZE_VAL			((ffs(CMD_Q_LEN) - 2) & \
+								  CMD_Q_SIZE_MASK)
+#define Q_PTR_MASK			(2 << (QUEUE_SIZE_VAL + 5) - 1)
+#define Q_DESC_SIZE			sizeof(struct ptdma_desc)
+#define Q_SIZE(n)			(CMD_Q_LEN * (n))
+
+#define INT_COMPLETION			BIT(0)
+#define INT_ERROR			BIT(1)
+#define INT_QUEUE_STOPPED		BIT(2)
+#define INT_EMPTY_QUEUE			BIT(3)
+#define SUPPORTED_INTERRUPTS		(INT_COMPLETION | INT_ERROR)
+
+/****** Local Storage Block ******/
+#define LSB_START			0
+#define LSB_END				127
+#define LSB_COUNT			(LSB_END - LSB_START + 1)
+
+#define PT_DMAPOOL_MAX_SIZE		64
+#define PT_DMAPOOL_ALIGN		BIT(5)
+
+#define PT_PASSTHRU_BLOCKSIZE		512
+
+struct pt_device;
+
+struct pt_tasklet_data {
+	struct completion completion;
+	struct pt_cmd *cmd;
+};
+
+/*
+ * struct pt_passthru_engine - pass-through operation
+ *   without performing DMA mapping
+ * @mask: mask to be applied to data
+ * @mask_len: length in bytes of mask
+ * @src_dma: data to be used for this operation
+ * @dst_dma: data produced by this operation
+ * @src_len: length in bytes of data used for this operation
+ *
+ * Variables required to be set when calling pt_enqueue_cmd():
+ *   - bit_mod, byte_swap, src, dst, src_len
+ *   - mask, mask_len if bit_mod is not PT_PASSTHRU_BITWISE_NOOP
+ */
+struct pt_passthru_engine {
+	dma_addr_t mask;
+	u32 mask_len;		/* In bytes */
+
+	dma_addr_t src_dma, dst_dma;
+	u64 src_len;		/* In bytes */
+};
+
+/*
+ * struct pt_cmd - PTDMA operation request
+ * @entry: list element
+ * @work: work element used for callbacks
+ * @pt: PT device to be run on
+ * @ret: operation return code
+ * @flags: cmd processing flags
+ * @engine: PTDMA operation to perform (passthru)
+ * @engine_error: PT engine return code
+ * @passthru: engine specific structures, refer to specific engine struct below
+ * @callback: operation completion callback function
+ * @data: parameter value to be supplied to the callback function
+ *
+ * Variables required to be set when calling pt_enqueue_cmd():
+ *   - engine, callback
+ *   - See the operation structures below for what is required for each
+ *     operation.
+ */
+struct pt_cmd {
+	struct list_head entry;
+	struct work_struct work;
+	struct pt_device *pt;
+	int ret;
+	u32 engine;
+	u32 engine_error;
+	struct pt_passthru_engine passthru;
+	/* Completion callback support */
+	void (*pt_cmd_callback)(void *data, int err);
+	void *data;
+};
+
+struct pt_dma_desc {
+	struct virt_dma_desc vd;
+	struct pt_device *pt;
+	enum dma_status status;
+	size_t len;
+	bool issued_to_hw;
+	struct pt_cmd pt_cmd;
+};
+
+struct pt_dma_chan {
+	struct virt_dma_chan vc;
+	struct pt_device *pt;
+};
+
+struct pt_cmd_queue {
+	struct pt_device *pt;
+
+	/* Queue dma pool */
+	struct dma_pool *dma_pool;
+
+	/* Queue base address (not neccessarily aligned)*/
+	struct ptdma_desc *qbase;
+
+	/* Aligned queue start address (per requirement) */
+	struct mutex q_mutex ____cacheline_aligned;
+	unsigned int qidx;
+
+	unsigned int qsize;
+	dma_addr_t qbase_dma;
+	dma_addr_t qdma_tail;
+
+	unsigned int active;
+	unsigned int suspended;
+
+	/* Register addresses for queue */
+	void __iomem *reg_control;
+	u32 qcontrol; /* Cached control register */
+
+	/* Status values from job */
+	u32 int_status;
+	u32 q_status;
+	u32 q_int_status;
+	u32 cmd_error;
+	/* Queue Statistics */
+	unsigned long total_pt_ops;
+} ____cacheline_aligned;
+
+struct pt_device {
+	struct list_head entry;
+
+	unsigned int ord;
+	char name[MAX_PT_NAME_LEN];
+
+	struct device *dev;
+
+	/* Bus specific device information */
+	struct pt_msix *pt_msix;
+
+	struct pt_dev_vdata *dev_vdata;
+
+	unsigned int pt_irq;
+
+	/* I/O area used for device communication */
+	void __iomem *io_regs;
+
+	spinlock_t cmd_lock ____cacheline_aligned;
+	unsigned int cmd_count;
+	struct list_head cmd;
+
+	/*
+	 * The command queue. This represent the queue available on the
+	 * PTDMA that are available for processing cmds
+	 */
+	struct pt_cmd_queue cmd_q;
+
+	/* Support for the DMA Engine capabilities */
+	struct dma_device dma_dev;
+	struct pt_dma_chan *pt_dma_chan;
+	struct kmem_cache *dma_cmd_cache;
+	struct kmem_cache *dma_desc_cache;
+
+	wait_queue_head_t lsb_queue;
+
+	/* Device Statistics */
+	unsigned long total_interrupts;
+
+	struct pt_tasklet_data tdata;
+};
+
+/*
+ * descriptor for PTDMA commands
+ * 8 32-bit words:
+ * word 0: function; engine; control bits
+ * word 1: length of source data
+ * word 2: low 32 bits of source pointer
+ * word 3: upper 16 bits of source pointer; source memory type
+ * word 4: low 32 bits of destination pointer
+ * word 5: upper 16 bits of destination pointer; destination memory type
+ * word 6: reserved 32 bits
+ * word 7: reserved 32 bits
+ */
+
+#define DWORD0_SOC	BIT(0)
+#define DWORD0_IOC	BIT(1)
+
+struct dword3 {
+	unsigned int  src_hi:16;
+	unsigned int  src_mem:2;
+	unsigned int  lsb_cxt_id:8;
+	unsigned int  rsvd1:5;
+	unsigned int  fixed:1;
+};
+
+struct dword5 {
+	unsigned int  dst_hi:16;
+	unsigned int  dst_mem:2;
+	unsigned int  rsvd1:13;
+	unsigned int  fixed:1;
+};
+
+struct ptdma_desc {
+	u32 dw0;
+	u32 length;
+	u32 src_lo;
+	struct dword3 dw3;
+	u32 dst_lo;
+	struct dword5 dw5;
+	__le32 rsvd1;
+	__le32 rsvd2;
+};
+
+/* Structure to hold PT device data */
+struct pt_dev_vdata {
+	const unsigned int bar;
+};
+
+int pt_dmaengine_register(struct pt_device *pt);
+void pt_dmaengine_unregister(struct pt_device *pt);
+
+void ptdma_debugfs_setup(struct pt_device *pt);
+int pt_core_init(struct pt_device *pt);
+void pt_core_destroy(struct pt_device *pt);
+
+int pt_core_perform_passthru(struct pt_cmd_queue *cmd_q,
+			     struct pt_passthru_engine *pt_engine);
+
+void pt_start_queue(struct pt_cmd_queue *cmd_q);
+void pt_stop_queue(struct pt_cmd_queue *cmd_q);
+
+#endif
diff --git a/drivers/dma/sh/Kconfig b/drivers/dma/sh/Kconfig
index 13437323a85b..a46296285307 100644
--- a/drivers/dma/sh/Kconfig
+++ b/drivers/dma/sh/Kconfig
@@ -47,3 +47,12 @@ config RENESAS_USB_DMAC
 	help
 	  This driver supports the USB-DMA controller found in the Renesas
 	  SoCs.
+
+config RZ_DMAC
+	tristate "Renesas RZ/G2L DMA Controller"
+	depends on ARCH_R9A07G044 || COMPILE_TEST
+	select RENESAS_DMA
+	select DMA_VIRTUAL_CHANNELS
+	help
+	  This driver supports the general purpose DMA controller found in the
+	  Renesas RZ/G2L SoC variants.
diff --git a/drivers/dma/sh/Makefile b/drivers/dma/sh/Makefile
index abdf10341725..360ab6d25e76 100644
--- a/drivers/dma/sh/Makefile
+++ b/drivers/dma/sh/Makefile
@@ -15,3 +15,4 @@ obj-$(CONFIG_SH_DMAE) += shdma.o
 
 obj-$(CONFIG_RCAR_DMAC) += rcar-dmac.o
 obj-$(CONFIG_RENESAS_USB_DMAC) += usb-dmac.o
+obj-$(CONFIG_RZ_DMAC) += rz-dmac.o
diff --git a/drivers/dma/sh/rz-dmac.c b/drivers/dma/sh/rz-dmac.c
new file mode 100644
index 000000000000..f9f30cbeccbe
--- /dev/null
+++ b/drivers/dma/sh/rz-dmac.c
@@ -0,0 +1,969 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Renesas RZ/G2L DMA Controller Driver
+ *
+ * Based on imx-dma.c
+ *
+ * Copyright (C) 2021 Renesas Electronics Corp.
+ * Copyright 2010 Sascha Hauer, Pengutronix <s.hauer@pengutronix.de>
+ * Copyright 2012 Javier Martin, Vista Silicon <javier.martin@vista-silicon.com>
+ */
+
+#include <linux/dma-mapping.h>
+#include <linux/dmaengine.h>
+#include <linux/interrupt.h>
+#include <linux/list.h>
+#include <linux/module.h>
+#include <linux/of.h>
+#include <linux/of_dma.h>
+#include <linux/of_platform.h>
+#include <linux/platform_device.h>
+#include <linux/slab.h>
+#include <linux/spinlock.h>
+
+#include "../dmaengine.h"
+#include "../virt-dma.h"
+
+enum  rz_dmac_prep_type {
+	RZ_DMAC_DESC_MEMCPY,
+	RZ_DMAC_DESC_SLAVE_SG,
+};
+
+struct rz_lmdesc {
+	u32 header;
+	u32 sa;
+	u32 da;
+	u32 tb;
+	u32 chcfg;
+	u32 chitvl;
+	u32 chext;
+	u32 nxla;
+};
+
+struct rz_dmac_desc {
+	struct virt_dma_desc vd;
+	dma_addr_t src;
+	dma_addr_t dest;
+	size_t len;
+	struct list_head node;
+	enum dma_transfer_direction direction;
+	enum rz_dmac_prep_type type;
+	/* For slave sg */
+	struct scatterlist *sg;
+	unsigned int sgcount;
+};
+
+#define to_rz_dmac_desc(d)	container_of(d, struct rz_dmac_desc, vd)
+
+struct rz_dmac_chan {
+	struct virt_dma_chan vc;
+	void __iomem *ch_base;
+	void __iomem *ch_cmn_base;
+	unsigned int index;
+	int irq;
+	struct rz_dmac_desc *desc;
+	int descs_allocated;
+
+	enum dma_slave_buswidth src_word_size;
+	enum dma_slave_buswidth dst_word_size;
+	dma_addr_t src_per_address;
+	dma_addr_t dst_per_address;
+
+	u32 chcfg;
+	u32 chctrl;
+	int mid_rid;
+
+	struct list_head ld_free;
+	struct list_head ld_queue;
+	struct list_head ld_active;
+
+	struct {
+		struct rz_lmdesc *base;
+		struct rz_lmdesc *head;
+		struct rz_lmdesc *tail;
+		dma_addr_t base_dma;
+	} lmdesc;
+};
+
+#define to_rz_dmac_chan(c)	container_of(c, struct rz_dmac_chan, vc.chan)
+
+struct rz_dmac {
+	struct dma_device engine;
+	struct device *dev;
+	void __iomem *base;
+	void __iomem *ext_base;
+
+	unsigned int n_channels;
+	struct rz_dmac_chan *channels;
+
+	DECLARE_BITMAP(modules, 1024);
+};
+
+#define to_rz_dmac(d)	container_of(d, struct rz_dmac, engine)
+
+/*
+ * -----------------------------------------------------------------------------
+ * Registers
+ */
+
+#define CHSTAT				0x0024
+#define CHCTRL				0x0028
+#define CHCFG				0x002c
+#define NXLA				0x0038
+
+#define DCTRL				0x0000
+
+#define EACH_CHANNEL_OFFSET		0x0040
+#define CHANNEL_0_7_OFFSET		0x0000
+#define CHANNEL_0_7_COMMON_BASE		0x0300
+#define CHANNEL_8_15_OFFSET		0x0400
+#define CHANNEL_8_15_COMMON_BASE	0x0700
+
+#define CHSTAT_ER			BIT(4)
+#define CHSTAT_EN			BIT(0)
+
+#define CHCTRL_CLRINTMSK		BIT(17)
+#define CHCTRL_CLRSUS			BIT(9)
+#define CHCTRL_CLRTC			BIT(6)
+#define CHCTRL_CLREND			BIT(5)
+#define CHCTRL_CLRRQ			BIT(4)
+#define CHCTRL_SWRST			BIT(3)
+#define CHCTRL_STG			BIT(2)
+#define CHCTRL_CLREN			BIT(1)
+#define CHCTRL_SETEN			BIT(0)
+#define CHCTRL_DEFAULT			(CHCTRL_CLRINTMSK | CHCTRL_CLRSUS | \
+					 CHCTRL_CLRTC |	CHCTRL_CLREND | \
+					 CHCTRL_CLRRQ | CHCTRL_SWRST | \
+					 CHCTRL_CLREN)
+
+#define CHCFG_DMS			BIT(31)
+#define CHCFG_DEM			BIT(24)
+#define CHCFG_DAD			BIT(21)
+#define CHCFG_SAD			BIT(20)
+#define CHCFG_REQD			BIT(3)
+#define CHCFG_SEL(bits)			((bits) & 0x07)
+#define CHCFG_MEM_COPY			(0x80400008)
+#define CHCFG_FILL_DDS(a)		(((a) << 16) & GENMASK(19, 16))
+#define CHCFG_FILL_SDS(a)		(((a) << 12) & GENMASK(15, 12))
+#define CHCFG_FILL_TM(a)		(((a) & BIT(5)) << 22)
+#define CHCFG_FILL_AM(a)		(((a) & GENMASK(4, 2)) << 6)
+#define CHCFG_FILL_LVL(a)		(((a) & BIT(1)) << 5)
+#define CHCFG_FILL_HIEN(a)		(((a) & BIT(0)) << 5)
+
+#define MID_RID_MASK			GENMASK(9, 0)
+#define CHCFG_MASK			GENMASK(15, 10)
+#define CHCFG_DS_INVALID		0xFF
+#define DCTRL_LVINT			BIT(1)
+#define DCTRL_PR			BIT(0)
+#define DCTRL_DEFAULT			(DCTRL_LVINT | DCTRL_PR)
+
+/* LINK MODE DESCRIPTOR */
+#define HEADER_LV			BIT(0)
+
+#define RZ_DMAC_MAX_CHAN_DESCRIPTORS	16
+#define RZ_DMAC_MAX_CHANNELS		16
+#define DMAC_NR_LMDESC			64
+
+/*
+ * -----------------------------------------------------------------------------
+ * Device access
+ */
+
+static void rz_dmac_writel(struct rz_dmac *dmac, unsigned int val,
+			   unsigned int offset)
+{
+	writel(val, dmac->base + offset);
+}
+
+static void rz_dmac_ext_writel(struct rz_dmac *dmac, unsigned int val,
+			       unsigned int offset)
+{
+	writel(val, dmac->ext_base + offset);
+}
+
+static u32 rz_dmac_ext_readl(struct rz_dmac *dmac, unsigned int offset)
+{
+	return readl(dmac->ext_base + offset);
+}
+
+static void rz_dmac_ch_writel(struct rz_dmac_chan *channel, unsigned int val,
+			      unsigned int offset, int which)
+{
+	if (which)
+		writel(val, channel->ch_base + offset);
+	else
+		writel(val, channel->ch_cmn_base + offset);
+}
+
+static u32 rz_dmac_ch_readl(struct rz_dmac_chan *channel,
+			    unsigned int offset, int which)
+{
+	if (which)
+		return readl(channel->ch_base + offset);
+	else
+		return readl(channel->ch_cmn_base + offset);
+}
+
+/*
+ * -----------------------------------------------------------------------------
+ * Initialization
+ */
+
+static void rz_lmdesc_setup(struct rz_dmac_chan *channel,
+			    struct rz_lmdesc *lmdesc)
+{
+	u32 nxla;
+
+	channel->lmdesc.base = lmdesc;
+	channel->lmdesc.head = lmdesc;
+	channel->lmdesc.tail = lmdesc;
+	nxla = channel->lmdesc.base_dma;
+	while (lmdesc < (channel->lmdesc.base + (DMAC_NR_LMDESC - 1))) {
+		lmdesc->header = 0;
+		nxla += sizeof(*lmdesc);
+		lmdesc->nxla = nxla;
+		lmdesc++;
+	}
+
+	lmdesc->header = 0;
+	lmdesc->nxla = channel->lmdesc.base_dma;
+}
+
+/*
+ * -----------------------------------------------------------------------------
+ * Descriptors preparation
+ */
+
+static void rz_dmac_lmdesc_recycle(struct rz_dmac_chan *channel)
+{
+	struct rz_lmdesc *lmdesc = channel->lmdesc.head;
+
+	while (!(lmdesc->header & HEADER_LV)) {
+		lmdesc->header = 0;
+		lmdesc++;
+		if (lmdesc >= (channel->lmdesc.base + DMAC_NR_LMDESC))
+			lmdesc = channel->lmdesc.base;
+	}
+	channel->lmdesc.head = lmdesc;
+}
+
+static void rz_dmac_enable_hw(struct rz_dmac_chan *channel)
+{
+	struct dma_chan *chan = &channel->vc.chan;
+	struct rz_dmac *dmac = to_rz_dmac(chan->device);
+	unsigned long flags;
+	u32 nxla;
+	u32 chctrl;
+	u32 chstat;
+
+	dev_dbg(dmac->dev, "%s channel %d\n", __func__, channel->index);
+
+	local_irq_save(flags);
+
+	rz_dmac_lmdesc_recycle(channel);
+
+	nxla = channel->lmdesc.base_dma +
+		(sizeof(struct rz_lmdesc) * (channel->lmdesc.head -
+					     channel->lmdesc.base));
+
+	chstat = rz_dmac_ch_readl(channel, CHSTAT, 1);
+	if (!(chstat & CHSTAT_EN)) {
+		chctrl = (channel->chctrl | CHCTRL_SETEN);
+		rz_dmac_ch_writel(channel, nxla, NXLA, 1);
+		rz_dmac_ch_writel(channel, channel->chcfg, CHCFG, 1);
+		rz_dmac_ch_writel(channel, CHCTRL_SWRST, CHCTRL, 1);
+		rz_dmac_ch_writel(channel, chctrl, CHCTRL, 1);
+	}
+
+	local_irq_restore(flags);
+}
+
+static void rz_dmac_disable_hw(struct rz_dmac_chan *channel)
+{
+	struct dma_chan *chan = &channel->vc.chan;
+	struct rz_dmac *dmac = to_rz_dmac(chan->device);
+	unsigned long flags;
+
+	dev_dbg(dmac->dev, "%s channel %d\n", __func__, channel->index);
+
+	local_irq_save(flags);
+	rz_dmac_ch_writel(channel, CHCTRL_DEFAULT, CHCTRL, 1);
+	local_irq_restore(flags);
+}
+
+static void rz_dmac_set_dmars_register(struct rz_dmac *dmac, int nr, u32 dmars)
+{
+	u32 dmars_offset = (nr / 2) * 4;
+	u32 shift = (nr % 2) * 16;
+	u32 dmars32;
+
+	dmars32 = rz_dmac_ext_readl(dmac, dmars_offset);
+	dmars32 &= ~(0xffff << shift);
+	dmars32 |= dmars << shift;
+
+	rz_dmac_ext_writel(dmac, dmars32, dmars_offset);
+}
+
+static void rz_dmac_prepare_desc_for_memcpy(struct rz_dmac_chan *channel)
+{
+	struct dma_chan *chan = &channel->vc.chan;
+	struct rz_dmac *dmac = to_rz_dmac(chan->device);
+	struct rz_lmdesc *lmdesc = channel->lmdesc.tail;
+	struct rz_dmac_desc *d = channel->desc;
+	u32 chcfg = CHCFG_MEM_COPY;
+
+	/* prepare descriptor */
+	lmdesc->sa = d->src;
+	lmdesc->da = d->dest;
+	lmdesc->tb = d->len;
+	lmdesc->chcfg = chcfg;
+	lmdesc->chitvl = 0;
+	lmdesc->chext = 0;
+	lmdesc->header = HEADER_LV;
+
+	rz_dmac_set_dmars_register(dmac, channel->index, 0);
+
+	channel->chcfg = chcfg;
+	channel->chctrl = CHCTRL_STG | CHCTRL_SETEN;
+}
+
+static void rz_dmac_prepare_descs_for_slave_sg(struct rz_dmac_chan *channel)
+{
+	struct dma_chan *chan = &channel->vc.chan;
+	struct rz_dmac *dmac = to_rz_dmac(chan->device);
+	struct rz_dmac_desc *d = channel->desc;
+	struct scatterlist *sg, *sgl = d->sg;
+	struct rz_lmdesc *lmdesc;
+	unsigned int i, sg_len = d->sgcount;
+
+	channel->chcfg |= CHCFG_SEL(channel->index) | CHCFG_DEM | CHCFG_DMS;
+
+	if (d->direction == DMA_DEV_TO_MEM) {
+		channel->chcfg |= CHCFG_SAD;
+		channel->chcfg &= ~CHCFG_REQD;
+	} else {
+		channel->chcfg |= CHCFG_DAD | CHCFG_REQD;
+	}
+
+	lmdesc = channel->lmdesc.tail;
+
+	for (i = 0, sg = sgl; i < sg_len; i++, sg = sg_next(sg)) {
+		if (d->direction == DMA_DEV_TO_MEM) {
+			lmdesc->sa = channel->src_per_address;
+			lmdesc->da = sg_dma_address(sg);
+		} else {
+			lmdesc->sa = sg_dma_address(sg);
+			lmdesc->da = channel->dst_per_address;
+		}
+
+		lmdesc->tb = sg_dma_len(sg);
+		lmdesc->chitvl = 0;
+		lmdesc->chext = 0;
+		if (i == (sg_len - 1)) {
+			lmdesc->chcfg = (channel->chcfg & ~CHCFG_DEM);
+			lmdesc->header = HEADER_LV;
+		} else {
+			lmdesc->chcfg = channel->chcfg;
+			lmdesc->header = HEADER_LV;
+		}
+		if (++lmdesc >= (channel->lmdesc.base + DMAC_NR_LMDESC))
+			lmdesc = channel->lmdesc.base;
+	}
+
+	channel->lmdesc.tail = lmdesc;
+
+	rz_dmac_set_dmars_register(dmac, channel->index, channel->mid_rid);
+	channel->chctrl = CHCTRL_SETEN;
+}
+
+static int rz_dmac_xfer_desc(struct rz_dmac_chan *chan)
+{
+	struct rz_dmac_desc *d = chan->desc;
+	struct virt_dma_desc *vd;
+
+	vd = vchan_next_desc(&chan->vc);
+	if (!vd)
+		return 0;
+
+	list_del(&vd->node);
+
+	switch (d->type) {
+	case RZ_DMAC_DESC_MEMCPY:
+		rz_dmac_prepare_desc_for_memcpy(chan);
+		break;
+
+	case RZ_DMAC_DESC_SLAVE_SG:
+		rz_dmac_prepare_descs_for_slave_sg(chan);
+		break;
+
+	default:
+		return -EINVAL;
+	}
+
+	rz_dmac_enable_hw(chan);
+
+	return 0;
+}
+
+/*
+ * -----------------------------------------------------------------------------
+ * DMA engine operations
+ */
+
+static int rz_dmac_alloc_chan_resources(struct dma_chan *chan)
+{
+	struct rz_dmac_chan *channel = to_rz_dmac_chan(chan);
+
+	while (channel->descs_allocated < RZ_DMAC_MAX_CHAN_DESCRIPTORS) {
+		struct rz_dmac_desc *desc;
+
+		desc = kzalloc(sizeof(*desc), GFP_KERNEL);
+		if (!desc)
+			break;
+
+		list_add_tail(&desc->node, &channel->ld_free);
+		channel->descs_allocated++;
+	}
+
+	if (!channel->descs_allocated)
+		return -ENOMEM;
+
+	return channel->descs_allocated;
+}
+
+static void rz_dmac_free_chan_resources(struct dma_chan *chan)
+{
+	struct rz_dmac_chan *channel = to_rz_dmac_chan(chan);
+	struct rz_dmac *dmac = to_rz_dmac(chan->device);
+	struct rz_lmdesc *lmdesc = channel->lmdesc.base;
+	struct rz_dmac_desc *desc, *_desc;
+	unsigned long flags;
+	unsigned int i;
+
+	spin_lock_irqsave(&channel->vc.lock, flags);
+
+	for (i = 0; i < DMAC_NR_LMDESC; i++)
+		lmdesc[i].header = 0;
+
+	rz_dmac_disable_hw(channel);
+	list_splice_tail_init(&channel->ld_active, &channel->ld_free);
+	list_splice_tail_init(&channel->ld_queue, &channel->ld_free);
+
+	if (channel->mid_rid >= 0) {
+		clear_bit(channel->mid_rid, dmac->modules);
+		channel->mid_rid = -EINVAL;
+	}
+
+	spin_unlock_irqrestore(&channel->vc.lock, flags);
+
+	list_for_each_entry_safe(desc, _desc, &channel->ld_free, node) {
+		kfree(desc);
+		channel->descs_allocated--;
+	}
+
+	INIT_LIST_HEAD(&channel->ld_free);
+	vchan_free_chan_resources(&channel->vc);
+}
+
+static struct dma_async_tx_descriptor *
+rz_dmac_prep_dma_memcpy(struct dma_chan *chan, dma_addr_t dest, dma_addr_t src,
+			size_t len, unsigned long flags)
+{
+	struct rz_dmac_chan *channel = to_rz_dmac_chan(chan);
+	struct rz_dmac *dmac = to_rz_dmac(chan->device);
+	struct rz_dmac_desc *desc;
+
+	dev_dbg(dmac->dev, "%s channel: %d src=0x%pad dst=0x%pad len=%zu\n",
+		__func__, channel->index, &src, &dest, len);
+
+	if (list_empty(&channel->ld_free))
+		return NULL;
+
+	desc = list_first_entry(&channel->ld_free, struct rz_dmac_desc, node);
+
+	desc->type = RZ_DMAC_DESC_MEMCPY;
+	desc->src = src;
+	desc->dest = dest;
+	desc->len = len;
+	desc->direction = DMA_MEM_TO_MEM;
+
+	list_move_tail(channel->ld_free.next, &channel->ld_queue);
+	return vchan_tx_prep(&channel->vc, &desc->vd, flags);
+}
+
+static struct dma_async_tx_descriptor *
+rz_dmac_prep_slave_sg(struct dma_chan *chan, struct scatterlist *sgl,
+		      unsigned int sg_len,
+		      enum dma_transfer_direction direction,
+		      unsigned long flags, void *context)
+{
+	struct rz_dmac_chan *channel = to_rz_dmac_chan(chan);
+	struct rz_dmac_desc *desc;
+	struct scatterlist *sg;
+	int dma_length = 0;
+	int i = 0;
+
+	if (list_empty(&channel->ld_free))
+		return NULL;
+
+	desc = list_first_entry(&channel->ld_free, struct rz_dmac_desc, node);
+
+	for_each_sg(sgl, sg, sg_len, i) {
+		dma_length += sg_dma_len(sg);
+	}
+
+	desc->type = RZ_DMAC_DESC_SLAVE_SG;
+	desc->sg = sgl;
+	desc->sgcount = sg_len;
+	desc->len = dma_length;
+	desc->direction = direction;
+
+	if (direction == DMA_DEV_TO_MEM)
+		desc->src = channel->src_per_address;
+	else
+		desc->dest = channel->dst_per_address;
+
+	list_move_tail(channel->ld_free.next, &channel->ld_queue);
+	return vchan_tx_prep(&channel->vc, &desc->vd, flags);
+}
+
+static int rz_dmac_terminate_all(struct dma_chan *chan)
+{
+	struct rz_dmac_chan *channel = to_rz_dmac_chan(chan);
+	unsigned long flags;
+	LIST_HEAD(head);
+
+	rz_dmac_disable_hw(channel);
+	spin_lock_irqsave(&channel->vc.lock, flags);
+	list_splice_tail_init(&channel->ld_active, &channel->ld_free);
+	list_splice_tail_init(&channel->ld_queue, &channel->ld_free);
+	spin_unlock_irqrestore(&channel->vc.lock, flags);
+	vchan_get_all_descriptors(&channel->vc, &head);
+	vchan_dma_desc_free_list(&channel->vc, &head);
+
+	return 0;
+}
+
+static void rz_dmac_issue_pending(struct dma_chan *chan)
+{
+	struct rz_dmac_chan *channel = to_rz_dmac_chan(chan);
+	struct rz_dmac *dmac = to_rz_dmac(chan->device);
+	struct rz_dmac_desc *desc;
+	unsigned long flags;
+
+	spin_lock_irqsave(&channel->vc.lock, flags);
+
+	if (!list_empty(&channel->ld_queue)) {
+		desc = list_first_entry(&channel->ld_queue,
+					struct rz_dmac_desc, node);
+		channel->desc = desc;
+		if (vchan_issue_pending(&channel->vc)) {
+			if (rz_dmac_xfer_desc(channel) < 0)
+				dev_warn(dmac->dev, "ch: %d couldn't issue DMA xfer\n",
+					 channel->index);
+			else
+				list_move_tail(channel->ld_queue.next,
+					       &channel->ld_active);
+		}
+	}
+
+	spin_unlock_irqrestore(&channel->vc.lock, flags);
+}
+
+static u8 rz_dmac_ds_to_val_mapping(enum dma_slave_buswidth ds)
+{
+	u8 i;
+	const enum dma_slave_buswidth ds_lut[] = {
+		DMA_SLAVE_BUSWIDTH_1_BYTE,
+		DMA_SLAVE_BUSWIDTH_2_BYTES,
+		DMA_SLAVE_BUSWIDTH_4_BYTES,
+		DMA_SLAVE_BUSWIDTH_8_BYTES,
+		DMA_SLAVE_BUSWIDTH_16_BYTES,
+		DMA_SLAVE_BUSWIDTH_32_BYTES,
+		DMA_SLAVE_BUSWIDTH_64_BYTES,
+		DMA_SLAVE_BUSWIDTH_128_BYTES,
+	};
+
+	for (i = 0; i < ARRAY_SIZE(ds_lut); i++) {
+		if (ds_lut[i] == ds)
+			return i;
+	}
+
+	return CHCFG_DS_INVALID;
+}
+
+static int rz_dmac_config(struct dma_chan *chan,
+			  struct dma_slave_config *config)
+{
+	struct rz_dmac_chan *channel = to_rz_dmac_chan(chan);
+	u32 val;
+
+	channel->src_per_address = config->src_addr;
+	channel->src_word_size = config->src_addr_width;
+	channel->dst_per_address = config->dst_addr;
+	channel->dst_word_size = config->dst_addr_width;
+
+	val = rz_dmac_ds_to_val_mapping(config->dst_addr_width);
+	if (val == CHCFG_DS_INVALID)
+		return -EINVAL;
+
+	channel->chcfg |= CHCFG_FILL_DDS(val);
+
+	val = rz_dmac_ds_to_val_mapping(config->src_addr_width);
+	if (val == CHCFG_DS_INVALID)
+		return -EINVAL;
+
+	channel->chcfg |= CHCFG_FILL_SDS(val);
+
+	return 0;
+}
+
+static void rz_dmac_virt_desc_free(struct virt_dma_desc *vd)
+{
+	/*
+	 * Place holder
+	 * Descriptor allocation is done during alloc_chan_resources and
+	 * get freed during free_chan_resources.
+	 * list is used to manage the descriptors and avoid any memory
+	 * allocation/free during DMA read/write.
+	 */
+}
+
+/*
+ * -----------------------------------------------------------------------------
+ * IRQ handling
+ */
+
+static void rz_dmac_irq_handle_channel(struct rz_dmac_chan *channel)
+{
+	struct dma_chan *chan = &channel->vc.chan;
+	struct rz_dmac *dmac = to_rz_dmac(chan->device);
+	u32 chstat, chctrl;
+
+	chstat = rz_dmac_ch_readl(channel, CHSTAT, 1);
+	if (chstat & CHSTAT_ER) {
+		dev_err(dmac->dev, "DMAC err CHSTAT_%d = %08X\n",
+			channel->index, chstat);
+		rz_dmac_ch_writel(channel, CHCTRL_DEFAULT, CHCTRL, 1);
+		goto done;
+	}
+
+	chctrl = rz_dmac_ch_readl(channel, CHCTRL, 1);
+	rz_dmac_ch_writel(channel, chctrl | CHCTRL_CLREND, CHCTRL, 1);
+done:
+	return;
+}
+
+static irqreturn_t rz_dmac_irq_handler(int irq, void *dev_id)
+{
+	struct rz_dmac_chan *channel = dev_id;
+
+	if (channel) {
+		rz_dmac_irq_handle_channel(channel);
+		return IRQ_WAKE_THREAD;
+	}
+	/* handle DMAERR irq */
+	return IRQ_HANDLED;
+}
+
+static irqreturn_t rz_dmac_irq_handler_thread(int irq, void *dev_id)
+{
+	struct rz_dmac_chan *channel = dev_id;
+	struct rz_dmac_desc *desc = NULL;
+	unsigned long flags;
+
+	spin_lock_irqsave(&channel->vc.lock, flags);
+
+	if (list_empty(&channel->ld_active)) {
+		/* Someone might have called terminate all */
+		goto out;
+	}
+
+	desc = list_first_entry(&channel->ld_active, struct rz_dmac_desc, node);
+	vchan_cookie_complete(&desc->vd);
+	list_move_tail(channel->ld_active.next, &channel->ld_free);
+	if (!list_empty(&channel->ld_queue)) {
+		desc = list_first_entry(&channel->ld_queue, struct rz_dmac_desc,
+					node);
+		channel->desc = desc;
+		if (rz_dmac_xfer_desc(channel) == 0)
+			list_move_tail(channel->ld_queue.next, &channel->ld_active);
+	}
+out:
+	spin_unlock_irqrestore(&channel->vc.lock, flags);
+
+	return IRQ_HANDLED;
+}
+
+/*
+ * -----------------------------------------------------------------------------
+ * OF xlate and channel filter
+ */
+
+static bool rz_dmac_chan_filter(struct dma_chan *chan, void *arg)
+{
+	struct rz_dmac_chan *channel = to_rz_dmac_chan(chan);
+	struct rz_dmac *dmac = to_rz_dmac(chan->device);
+	struct of_phandle_args *dma_spec = arg;
+	u32 ch_cfg;
+
+	channel->mid_rid = dma_spec->args[0] & MID_RID_MASK;
+	ch_cfg = (dma_spec->args[0] & CHCFG_MASK) >> 10;
+	channel->chcfg = CHCFG_FILL_TM(ch_cfg) | CHCFG_FILL_AM(ch_cfg) |
+			 CHCFG_FILL_LVL(ch_cfg) | CHCFG_FILL_HIEN(ch_cfg);
+
+	return !test_and_set_bit(channel->mid_rid, dmac->modules);
+}
+
+static struct dma_chan *rz_dmac_of_xlate(struct of_phandle_args *dma_spec,
+					 struct of_dma *ofdma)
+{
+	dma_cap_mask_t mask;
+
+	if (dma_spec->args_count != 1)
+		return NULL;
+
+	/* Only slave DMA channels can be allocated via DT */
+	dma_cap_zero(mask);
+	dma_cap_set(DMA_SLAVE, mask);
+
+	return dma_request_channel(mask, rz_dmac_chan_filter, dma_spec);
+}
+
+/*
+ * -----------------------------------------------------------------------------
+ * Probe and remove
+ */
+
+static int rz_dmac_chan_probe(struct rz_dmac *dmac,
+			      struct rz_dmac_chan *channel,
+			      unsigned int index)
+{
+	struct platform_device *pdev = to_platform_device(dmac->dev);
+	struct rz_lmdesc *lmdesc;
+	char pdev_irqname[5];
+	char *irqname;
+	int ret;
+
+	channel->index = index;
+	channel->mid_rid = -EINVAL;
+
+	/* Request the channel interrupt. */
+	sprintf(pdev_irqname, "ch%u", index);
+	channel->irq = platform_get_irq_byname(pdev, pdev_irqname);
+	if (channel->irq < 0)
+		return channel->irq;
+
+	irqname = devm_kasprintf(dmac->dev, GFP_KERNEL, "%s:%u",
+				 dev_name(dmac->dev), index);
+	if (!irqname)
+		return -ENOMEM;
+
+	ret = devm_request_threaded_irq(dmac->dev, channel->irq,
+					rz_dmac_irq_handler,
+					rz_dmac_irq_handler_thread, 0,
+					irqname, channel);
+	if (ret) {
+		dev_err(dmac->dev, "failed to request IRQ %u (%d)\n",
+			channel->irq, ret);
+		return ret;
+	}
+
+	/* Set io base address for each channel */
+	if (index < 8) {
+		channel->ch_base = dmac->base + CHANNEL_0_7_OFFSET +
+			EACH_CHANNEL_OFFSET * index;
+		channel->ch_cmn_base = dmac->base + CHANNEL_0_7_COMMON_BASE;
+	} else {
+		channel->ch_base = dmac->base + CHANNEL_8_15_OFFSET +
+			EACH_CHANNEL_OFFSET * (index - 8);
+		channel->ch_cmn_base = dmac->base + CHANNEL_8_15_COMMON_BASE;
+	}
+
+	/* Allocate descriptors */
+	lmdesc = dma_alloc_coherent(&pdev->dev,
+				    sizeof(struct rz_lmdesc) * DMAC_NR_LMDESC,
+				    &channel->lmdesc.base_dma, GFP_KERNEL);
+	if (!lmdesc) {
+		dev_err(&pdev->dev, "Can't allocate memory (lmdesc)\n");
+		return -ENOMEM;
+	}
+	rz_lmdesc_setup(channel, lmdesc);
+
+	/* Initialize register for each channel */
+	rz_dmac_ch_writel(channel, CHCTRL_DEFAULT, CHCTRL, 1);
+
+	channel->vc.desc_free = rz_dmac_virt_desc_free;
+	vchan_init(&channel->vc, &dmac->engine);
+	INIT_LIST_HEAD(&channel->ld_queue);
+	INIT_LIST_HEAD(&channel->ld_free);
+	INIT_LIST_HEAD(&channel->ld_active);
+
+	return 0;
+}
+
+static int rz_dmac_parse_of(struct device *dev, struct rz_dmac *dmac)
+{
+	struct device_node *np = dev->of_node;
+	int ret;
+
+	ret = of_property_read_u32(np, "dma-channels", &dmac->n_channels);
+	if (ret < 0) {
+		dev_err(dev, "unable to read dma-channels property\n");
+		return ret;
+	}
+
+	if (!dmac->n_channels || dmac->n_channels > RZ_DMAC_MAX_CHANNELS) {
+		dev_err(dev, "invalid number of channels %u\n", dmac->n_channels);
+		return -EINVAL;
+	}
+
+	return 0;
+}
+
+static int rz_dmac_probe(struct platform_device *pdev)
+{
+	const char *irqname = "error";
+	struct dma_device *engine;
+	struct rz_dmac *dmac;
+	int channel_num;
+	unsigned int i;
+	int ret;
+	int irq;
+
+	dmac = devm_kzalloc(&pdev->dev, sizeof(*dmac), GFP_KERNEL);
+	if (!dmac)
+		return -ENOMEM;
+
+	dmac->dev = &pdev->dev;
+	platform_set_drvdata(pdev, dmac);
+
+	ret = rz_dmac_parse_of(&pdev->dev, dmac);
+	if (ret < 0)
+		return ret;
+
+	dmac->channels = devm_kcalloc(&pdev->dev, dmac->n_channels,
+				      sizeof(*dmac->channels), GFP_KERNEL);
+	if (!dmac->channels)
+		return -ENOMEM;
+
+	/* Request resources */
+	dmac->base = devm_platform_ioremap_resource(pdev, 0);
+	if (IS_ERR(dmac->base))
+		return PTR_ERR(dmac->base);
+
+	dmac->ext_base = devm_platform_ioremap_resource(pdev, 1);
+	if (IS_ERR(dmac->ext_base))
+		return PTR_ERR(dmac->ext_base);
+
+	/* Register interrupt handler for error */
+	irq = platform_get_irq_byname(pdev, irqname);
+	if (irq < 0)
+		return irq;
+
+	ret = devm_request_irq(&pdev->dev, irq, rz_dmac_irq_handler, 0,
+			       irqname, NULL);
+	if (ret) {
+		dev_err(&pdev->dev, "failed to request IRQ %u (%d)\n",
+			irq, ret);
+		return ret;
+	}
+
+	/* Initialize the channels. */
+	INIT_LIST_HEAD(&dmac->engine.channels);
+
+	for (i = 0; i < dmac->n_channels; i++) {
+		ret = rz_dmac_chan_probe(dmac, &dmac->channels[i], i);
+		if (ret < 0)
+			goto err;
+	}
+
+	/* Register the DMAC as a DMA provider for DT. */
+	ret = of_dma_controller_register(pdev->dev.of_node, rz_dmac_of_xlate,
+					 NULL);
+	if (ret < 0)
+		goto err;
+
+	/* Register the DMA engine device. */
+	engine = &dmac->engine;
+	dma_cap_set(DMA_SLAVE, engine->cap_mask);
+	dma_cap_set(DMA_MEMCPY, engine->cap_mask);
+	rz_dmac_writel(dmac, DCTRL_DEFAULT, CHANNEL_0_7_COMMON_BASE + DCTRL);
+	rz_dmac_writel(dmac, DCTRL_DEFAULT, CHANNEL_8_15_COMMON_BASE + DCTRL);
+
+	engine->dev = &pdev->dev;
+
+	engine->device_alloc_chan_resources = rz_dmac_alloc_chan_resources;
+	engine->device_free_chan_resources = rz_dmac_free_chan_resources;
+	engine->device_tx_status = dma_cookie_status;
+	engine->device_prep_slave_sg = rz_dmac_prep_slave_sg;
+	engine->device_prep_dma_memcpy = rz_dmac_prep_dma_memcpy;
+	engine->device_config = rz_dmac_config;
+	engine->device_terminate_all = rz_dmac_terminate_all;
+	engine->device_issue_pending = rz_dmac_issue_pending;
+
+	engine->copy_align = DMAENGINE_ALIGN_1_BYTE;
+	dma_set_max_seg_size(engine->dev, U32_MAX);
+
+	ret = dma_async_device_register(engine);
+	if (ret < 0) {
+		dev_err(&pdev->dev, "unable to register\n");
+		goto dma_register_err;
+	}
+	return 0;
+
+dma_register_err:
+	of_dma_controller_free(pdev->dev.of_node);
+err:
+	channel_num = i ? i - 1 : 0;
+	for (i = 0; i < channel_num; i++) {
+		struct rz_dmac_chan *channel = &dmac->channels[i];
+
+		dma_free_coherent(&pdev->dev,
+				  sizeof(struct rz_lmdesc) * DMAC_NR_LMDESC,
+				  channel->lmdesc.base,
+				  channel->lmdesc.base_dma);
+	}
+
+	return ret;
+}
+
+static int rz_dmac_remove(struct platform_device *pdev)
+{
+	struct rz_dmac *dmac = platform_get_drvdata(pdev);
+	unsigned int i;
+
+	for (i = 0; i < dmac->n_channels; i++) {
+		struct rz_dmac_chan *channel = &dmac->channels[i];
+
+		dma_free_coherent(&pdev->dev,
+				  sizeof(struct rz_lmdesc) * DMAC_NR_LMDESC,
+				  channel->lmdesc.base,
+				  channel->lmdesc.base_dma);
+	}
+	of_dma_controller_free(pdev->dev.of_node);
+	dma_async_device_unregister(&dmac->engine);
+
+	return 0;
+}
+
+static const struct of_device_id of_rz_dmac_match[] = {
+	{ .compatible = "renesas,rz-dmac", },
+	{ /* Sentinel */ }
+};
+MODULE_DEVICE_TABLE(of, of_rz_dmac_match);
+
+static struct platform_driver rz_dmac_driver = {
+	.driver		= {
+		.name	= "rz-dmac",
+		.of_match_table = of_rz_dmac_match,
+	},
+	.probe		= rz_dmac_probe,
+	.remove		= rz_dmac_remove,
+};
+
+module_platform_driver(rz_dmac_driver);
+
+MODULE_DESCRIPTION("Renesas RZ/G2L DMA Controller Driver");
+MODULE_AUTHOR("Biju Das <biju.das.jz@bp.renesas.com>");
+MODULE_LICENSE("GPL v2");
diff --git a/drivers/dma/sh/usb-dmac.c b/drivers/dma/sh/usb-dmac.c
index 1cc06900153e..5edaeb89d1e6 100644
--- a/drivers/dma/sh/usb-dmac.c
+++ b/drivers/dma/sh/usb-dmac.c
@@ -466,7 +466,7 @@ static int usb_dmac_chan_terminate_all(struct dma_chan *chan)
 
 static unsigned int usb_dmac_get_current_residue(struct usb_dmac_chan *chan,
 						 struct usb_dmac_desc *desc,
-						 int sg_index)
+						 unsigned int sg_index)
 {
 	struct usb_dmac_sg *sg = desc->sg + sg_index;
 	u32 mem_addr = sg->mem_addr & 0xffffffff;
diff --git a/drivers/dma/sprd-dma.c b/drivers/dma/sprd-dma.c
index 0ef5ca81ba4d..4357d2395e6b 100644
--- a/drivers/dma/sprd-dma.c
+++ b/drivers/dma/sprd-dma.c
@@ -1265,6 +1265,7 @@ static const struct of_device_id sprd_dma_match[] = {
 	{ .compatible = "sprd,sc9860-dma", },
 	{},
 };
+MODULE_DEVICE_TABLE(of, sprd_dma_match);
 
 static int __maybe_unused sprd_dma_runtime_suspend(struct device *dev)
 {
diff --git a/drivers/dma/stm32-dma.c b/drivers/dma/stm32-dma.c
index 7dd1d3d0bf06..9063c727962e 100644
--- a/drivers/dma/stm32-dma.c
+++ b/drivers/dma/stm32-dma.c
@@ -60,6 +60,7 @@
 #define STM32_DMA_SCR_PSIZE_GET(n)	((n & STM32_DMA_SCR_PSIZE_MASK) >> 11)
 #define STM32_DMA_SCR_DIR_MASK		GENMASK(7, 6)
 #define STM32_DMA_SCR_DIR(n)		((n & 0x3) << 6)
+#define STM32_DMA_SCR_TRBUFF		BIT(20) /* Bufferable transfer for USART/UART */
 #define STM32_DMA_SCR_CT		BIT(19) /* Target in double buffer */
 #define STM32_DMA_SCR_DBM		BIT(18) /* Double Buffer Mode */
 #define STM32_DMA_SCR_PINCOS		BIT(15) /* Peripheral inc offset size */
@@ -138,8 +139,9 @@
 #define STM32_DMA_THRESHOLD_FTR_MASK	GENMASK(1, 0)
 #define STM32_DMA_THRESHOLD_FTR_GET(n)	((n) & STM32_DMA_THRESHOLD_FTR_MASK)
 #define STM32_DMA_DIRECT_MODE_MASK	BIT(2)
-#define STM32_DMA_DIRECT_MODE_GET(n)	(((n) & STM32_DMA_DIRECT_MODE_MASK) \
-					 >> 2)
+#define STM32_DMA_DIRECT_MODE_GET(n)	(((n) & STM32_DMA_DIRECT_MODE_MASK) >> 2)
+#define STM32_DMA_ALT_ACK_MODE_MASK	BIT(4)
+#define STM32_DMA_ALT_ACK_MODE_GET(n)	(((n) & STM32_DMA_ALT_ACK_MODE_MASK) >> 4)
 
 enum stm32_dma_width {
 	STM32_DMA_BYTE,
@@ -1252,6 +1254,8 @@ static void stm32_dma_set_config(struct stm32_dma_chan *chan,
 	chan->threshold = STM32_DMA_THRESHOLD_FTR_GET(cfg->features);
 	if (STM32_DMA_DIRECT_MODE_GET(cfg->features))
 		chan->threshold = STM32_DMA_FIFO_THRESHOLD_NONE;
+	if (STM32_DMA_ALT_ACK_MODE_GET(cfg->features))
+		chan->chan_reg.dma_scr |= STM32_DMA_SCR_TRBUFF;
 }
 
 static struct dma_chan *stm32_dma_of_xlate(struct of_phandle_args *dma_spec,
diff --git a/drivers/dma/tegra210-adma.c b/drivers/dma/tegra210-adma.c
index 4735742e826d..b1115a6d1935 100644
--- a/drivers/dma/tegra210-adma.c
+++ b/drivers/dma/tegra210-adma.c
@@ -655,9 +655,8 @@ static int tegra_adma_alloc_chan_resources(struct dma_chan *dc)
 		return ret;
 	}
 
-	ret = pm_runtime_get_sync(tdc2dev(tdc));
+	ret = pm_runtime_resume_and_get(tdc2dev(tdc));
 	if (ret < 0) {
-		pm_runtime_put_noidle(tdc2dev(tdc));
 		free_irq(tdc->irq, tdc);
 		return ret;
 	}
@@ -869,10 +868,8 @@ static int tegra_adma_probe(struct platform_device *pdev)
 	pm_runtime_enable(&pdev->dev);
 
 	ret = pm_runtime_get_sync(&pdev->dev);
-	if (ret < 0) {
-		pm_runtime_put_noidle(&pdev->dev);
+	if (ret < 0)
 		goto rpm_disable;
-	}
 
 	ret = tegra_adma_init(tdma);
 	if (ret)
diff --git a/drivers/dma/ti/k3-psil-j721e.c b/drivers/dma/ti/k3-psil-j721e.c
index 7580870ed746..34e3fc565a37 100644
--- a/drivers/dma/ti/k3-psil-j721e.c
+++ b/drivers/dma/ti/k3-psil-j721e.c
@@ -58,6 +58,14 @@
 		},					\
 	}
 
+#define PSIL_CSI2RX(x)					\
+	{						\
+		.thread_id = x,				\
+		.ep_config = {				\
+			.ep_type = PSIL_EP_NATIVE,	\
+		},					\
+	}
+
 /* PSI-L source thread IDs, used for RX (DMA_DEV_TO_MEM) */
 static struct psil_ep j721e_src_ep_map[] = {
 	/* SA2UL */
@@ -138,6 +146,71 @@ static struct psil_ep j721e_src_ep_map[] = {
 	PSIL_PDMA_XY_PKT(0x4707),
 	PSIL_PDMA_XY_PKT(0x4708),
 	PSIL_PDMA_XY_PKT(0x4709),
+	/* CSI2RX */
+	PSIL_CSI2RX(0x4940),
+	PSIL_CSI2RX(0x4941),
+	PSIL_CSI2RX(0x4942),
+	PSIL_CSI2RX(0x4943),
+	PSIL_CSI2RX(0x4944),
+	PSIL_CSI2RX(0x4945),
+	PSIL_CSI2RX(0x4946),
+	PSIL_CSI2RX(0x4947),
+	PSIL_CSI2RX(0x4948),
+	PSIL_CSI2RX(0x4949),
+	PSIL_CSI2RX(0x494a),
+	PSIL_CSI2RX(0x494b),
+	PSIL_CSI2RX(0x494c),
+	PSIL_CSI2RX(0x494d),
+	PSIL_CSI2RX(0x494e),
+	PSIL_CSI2RX(0x494f),
+	PSIL_CSI2RX(0x4950),
+	PSIL_CSI2RX(0x4951),
+	PSIL_CSI2RX(0x4952),
+	PSIL_CSI2RX(0x4953),
+	PSIL_CSI2RX(0x4954),
+	PSIL_CSI2RX(0x4955),
+	PSIL_CSI2RX(0x4956),
+	PSIL_CSI2RX(0x4957),
+	PSIL_CSI2RX(0x4958),
+	PSIL_CSI2RX(0x4959),
+	PSIL_CSI2RX(0x495a),
+	PSIL_CSI2RX(0x495b),
+	PSIL_CSI2RX(0x495c),
+	PSIL_CSI2RX(0x495d),
+	PSIL_CSI2RX(0x495e),
+	PSIL_CSI2RX(0x495f),
+	PSIL_CSI2RX(0x4960),
+	PSIL_CSI2RX(0x4961),
+	PSIL_CSI2RX(0x4962),
+	PSIL_CSI2RX(0x4963),
+	PSIL_CSI2RX(0x4964),
+	PSIL_CSI2RX(0x4965),
+	PSIL_CSI2RX(0x4966),
+	PSIL_CSI2RX(0x4967),
+	PSIL_CSI2RX(0x4968),
+	PSIL_CSI2RX(0x4969),
+	PSIL_CSI2RX(0x496a),
+	PSIL_CSI2RX(0x496b),
+	PSIL_CSI2RX(0x496c),
+	PSIL_CSI2RX(0x496d),
+	PSIL_CSI2RX(0x496e),
+	PSIL_CSI2RX(0x496f),
+	PSIL_CSI2RX(0x4970),
+	PSIL_CSI2RX(0x4971),
+	PSIL_CSI2RX(0x4972),
+	PSIL_CSI2RX(0x4973),
+	PSIL_CSI2RX(0x4974),
+	PSIL_CSI2RX(0x4975),
+	PSIL_CSI2RX(0x4976),
+	PSIL_CSI2RX(0x4977),
+	PSIL_CSI2RX(0x4978),
+	PSIL_CSI2RX(0x4979),
+	PSIL_CSI2RX(0x497a),
+	PSIL_CSI2RX(0x497b),
+	PSIL_CSI2RX(0x497c),
+	PSIL_CSI2RX(0x497d),
+	PSIL_CSI2RX(0x497e),
+	PSIL_CSI2RX(0x497f),
 	/* CPSW9 */
 	PSIL_ETHERNET(0x4a00),
 	/* CPSW0 */
diff --git a/drivers/dma/xilinx/xilinx_dma.c b/drivers/dma/xilinx/xilinx_dma.c
index 4b9530a7bf65..a4450bc95466 100644
--- a/drivers/dma/xilinx/xilinx_dma.c
+++ b/drivers/dma/xilinx/xilinx_dma.c
@@ -1420,8 +1420,7 @@ static void xilinx_vdma_start_transfer(struct xilinx_dma_chan *chan)
 
 	chan->desc_submitcount++;
 	chan->desc_pendingcount--;
-	list_del(&desc->node);
-	list_add_tail(&desc->node, &chan->active_list);
+	list_move_tail(&desc->node, &chan->active_list);
 	if (chan->desc_submitcount == chan->num_frms)
 		chan->desc_submitcount = 0;
 
@@ -1659,6 +1658,17 @@ static void xilinx_dma_issue_pending(struct dma_chan *dchan)
 }
 
 /**
+ * xilinx_dma_device_config - Configure the DMA channel
+ * @dchan: DMA channel
+ * @config: channel configuration
+ */
+static int xilinx_dma_device_config(struct dma_chan *dchan,
+				    struct dma_slave_config *config)
+{
+	return 0;
+}
+
+/**
  * xilinx_dma_complete_descriptor - Mark the active descriptor as complete
  * @chan : xilinx DMA channel
  *
@@ -3077,7 +3087,7 @@ static int xilinx_dma_probe(struct platform_device *pdev)
 		xdev->ext_addr = false;
 
 	/* Set the dma mask bits */
-	dma_set_mask(xdev->dev, DMA_BIT_MASK(addr_width));
+	dma_set_mask_and_coherent(xdev->dev, DMA_BIT_MASK(addr_width));
 
 	/* Initialize the DMA engine */
 	xdev->common.dev = &pdev->dev;
@@ -3096,6 +3106,7 @@ static int xilinx_dma_probe(struct platform_device *pdev)
 	xdev->common.device_synchronize = xilinx_dma_synchronize;
 	xdev->common.device_tx_status = xilinx_dma_tx_status;
 	xdev->common.device_issue_pending = xilinx_dma_issue_pending;
+	xdev->common.device_config = xilinx_dma_device_config;
 	if (xdev->dma_config->dmatype == XDMA_TYPE_AXIDMA) {
 		dma_cap_set(DMA_CYCLIC, xdev->common.cap_mask);
 		xdev->common.device_prep_slave_sg = xilinx_dma_prep_slave_sg;
diff --git a/drivers/dma/xilinx/zynqmp_dma.c b/drivers/dma/xilinx/zynqmp_dma.c
index 5fecf5aa6e85..97f02f8eb03a 100644
--- a/drivers/dma/xilinx/zynqmp_dma.c
+++ b/drivers/dma/xilinx/zynqmp_dma.c
@@ -434,8 +434,7 @@ static void zynqmp_dma_free_descriptor(struct zynqmp_dma_chan *chan,
 	struct zynqmp_dma_desc_sw *child, *next;
 
 	chan->desc_free_cnt++;
-	list_del(&sdesc->node);
-	list_add_tail(&sdesc->node, &chan->free_list);
+	list_move_tail(&sdesc->node, &chan->free_list);
 	list_for_each_entry_safe(child, next, &sdesc->tx_list, node) {
 		chan->desc_free_cnt++;
 		list_move_tail(&child->node, &chan->free_list);
diff --git a/drivers/firewire/net.c b/drivers/firewire/net.c
index 715e491dfbc3..4c3fd2eed1da 100644
--- a/drivers/firewire/net.c
+++ b/drivers/firewire/net.c
@@ -488,9 +488,7 @@ static int fwnet_finish_incoming_packet(struct net_device *net,
 					struct sk_buff *skb, u16 source_node_id,
 					bool is_broadcast, u16 ether_type)
 {
-	struct fwnet_device *dev;
 	int status;
-	__be64 guid;
 
 	switch (ether_type) {
 	case ETH_P_ARP:
@@ -503,7 +501,6 @@ static int fwnet_finish_incoming_packet(struct net_device *net,
 		goto err;
 	}
 
-	dev = netdev_priv(net);
 	/* Write metadata, and then pass to the receive level */
 	skb->dev = net;
 	skb->ip_summed = CHECKSUM_NONE;
@@ -512,7 +509,6 @@ static int fwnet_finish_incoming_packet(struct net_device *net,
 	 * Parse the encapsulation header. This actually does the job of
 	 * converting to an ethernet-like pseudo frame header.
 	 */
-	guid = cpu_to_be64(dev->card->guid);
 	if (dev_hard_header(skb, net, ether_type,
 			   is_broadcast ? net->broadcast : net->dev_addr,
 			   NULL, skb->len) >= 0) {
diff --git a/drivers/firmware/dmi-id.c b/drivers/firmware/dmi-id.c
index 4d5421d14a41..940ddf916202 100644
--- a/drivers/firmware/dmi-id.c
+++ b/drivers/firmware/dmi-id.c
@@ -73,6 +73,10 @@ static void ascii_filter(char *d, const char *s)
 
 static ssize_t get_modalias(char *buffer, size_t buffer_size)
 {
+	/*
+	 * Note new fields need to be added at the end to keep compatibility
+	 * with udev's hwdb which does matches on "`cat dmi/id/modalias`*".
+	 */
 	static const struct mafield {
 		const char *prefix;
 		int field;
@@ -85,13 +89,13 @@ static ssize_t get_modalias(char *buffer, size_t buffer_size)
 		{ "svn", DMI_SYS_VENDOR },
 		{ "pn",  DMI_PRODUCT_NAME },
 		{ "pvr", DMI_PRODUCT_VERSION },
-		{ "sku", DMI_PRODUCT_SKU },
 		{ "rvn", DMI_BOARD_VENDOR },
 		{ "rn",  DMI_BOARD_NAME },
 		{ "rvr", DMI_BOARD_VERSION },
 		{ "cvn", DMI_CHASSIS_VENDOR },
 		{ "ct",  DMI_CHASSIS_TYPE },
 		{ "cvr", DMI_CHASSIS_VERSION },
+		{ "sku", DMI_PRODUCT_SKU },
 		{ NULL,  DMI_NONE }
 	};
 
diff --git a/drivers/firmware/efi/libstub/efi-stub-helper.c b/drivers/firmware/efi/libstub/efi-stub-helper.c
index ae87dded989d..d489bdc645fe 100644
--- a/drivers/firmware/efi/libstub/efi-stub-helper.c
+++ b/drivers/firmware/efi/libstub/efi-stub-helper.c
@@ -7,7 +7,7 @@
  * Copyright 2011 Intel Corporation; author Matt Fleming
  */
 
-#include <stdarg.h>
+#include <linux/stdarg.h>
 
 #include <linux/ctype.h>
 #include <linux/efi.h>
diff --git a/drivers/firmware/efi/libstub/vsprintf.c b/drivers/firmware/efi/libstub/vsprintf.c
index 1088e288c04d..71c71c222346 100644
--- a/drivers/firmware/efi/libstub/vsprintf.c
+++ b/drivers/firmware/efi/libstub/vsprintf.c
@@ -10,7 +10,7 @@
  * Oh, it's a waste of space, but oh-so-yummy for debugging.
  */
 
-#include <stdarg.h>
+#include <linux/stdarg.h>
 
 #include <linux/compiler.h>
 #include <linux/ctype.h>
diff --git a/drivers/firmware/iscsi_ibft.c b/drivers/firmware/iscsi_ibft.c
index 612a59e213df..6e9788324fea 100644
--- a/drivers/firmware/iscsi_ibft.c
+++ b/drivers/firmware/iscsi_ibft.c
@@ -86,10 +86,6 @@ MODULE_VERSION(IBFT_ISCSI_VERSION);
 
 static struct acpi_table_ibft *ibft_addr;
 
-#ifndef CONFIG_ISCSI_IBFT_FIND
-phys_addr_t ibft_phys_addr;
-#endif
-
 struct ibft_hdr {
 	u8 id;
 	u8 version;
@@ -851,7 +847,21 @@ static void __init acpi_find_ibft_region(void)
 {
 }
 #endif
-
+#ifdef CONFIG_ISCSI_IBFT_FIND
+static int __init acpi_find_isa_region(void)
+{
+	if (ibft_phys_addr) {
+		ibft_addr = isa_bus_to_virt(ibft_phys_addr);
+		return 0;
+	}
+	return -ENODEV;
+}
+#else
+static int __init acpi_find_isa_region(void)
+{
+	return -ENODEV;
+}
+#endif
 /*
  * ibft_init() - creates sysfs tree entries for the iBFT data.
  */
@@ -864,9 +874,7 @@ static int __init ibft_init(void)
 	   is called before ACPI tables are parsed and it only does
 	   legacy finding.
 	*/
-	if (ibft_phys_addr)
-		ibft_addr = isa_bus_to_virt(ibft_phys_addr);
-	else
+	if (acpi_find_isa_region())
 		acpi_find_ibft_region();
 
 	if (ibft_addr) {
diff --git a/drivers/firmware/qcom_scm.c b/drivers/firmware/qcom_scm.c
index ced1964faf42..2ee97bab7440 100644
--- a/drivers/firmware/qcom_scm.c
+++ b/drivers/firmware/qcom_scm.c
@@ -1147,6 +1147,64 @@ int qcom_scm_qsmmu500_wait_safe_toggle(bool en)
 }
 EXPORT_SYMBOL(qcom_scm_qsmmu500_wait_safe_toggle);
 
+bool qcom_scm_lmh_dcvsh_available(void)
+{
+	return __qcom_scm_is_call_available(__scm->dev, QCOM_SCM_SVC_LMH, QCOM_SCM_LMH_LIMIT_DCVSH);
+}
+EXPORT_SYMBOL(qcom_scm_lmh_dcvsh_available);
+
+int qcom_scm_lmh_profile_change(u32 profile_id)
+{
+	struct qcom_scm_desc desc = {
+		.svc = QCOM_SCM_SVC_LMH,
+		.cmd = QCOM_SCM_LMH_LIMIT_PROFILE_CHANGE,
+		.arginfo = QCOM_SCM_ARGS(1, QCOM_SCM_VAL),
+		.args[0] = profile_id,
+		.owner = ARM_SMCCC_OWNER_SIP,
+	};
+
+	return qcom_scm_call(__scm->dev, &desc, NULL);
+}
+EXPORT_SYMBOL(qcom_scm_lmh_profile_change);
+
+int qcom_scm_lmh_dcvsh(u32 payload_fn, u32 payload_reg, u32 payload_val,
+		       u64 limit_node, u32 node_id, u64 version)
+{
+	dma_addr_t payload_phys;
+	u32 *payload_buf;
+	int ret, payload_size = 5 * sizeof(u32);
+
+	struct qcom_scm_desc desc = {
+		.svc = QCOM_SCM_SVC_LMH,
+		.cmd = QCOM_SCM_LMH_LIMIT_DCVSH,
+		.arginfo = QCOM_SCM_ARGS(5, QCOM_SCM_RO, QCOM_SCM_VAL, QCOM_SCM_VAL,
+					QCOM_SCM_VAL, QCOM_SCM_VAL),
+		.args[1] = payload_size,
+		.args[2] = limit_node,
+		.args[3] = node_id,
+		.args[4] = version,
+		.owner = ARM_SMCCC_OWNER_SIP,
+	};
+
+	payload_buf = dma_alloc_coherent(__scm->dev, payload_size, &payload_phys, GFP_KERNEL);
+	if (!payload_buf)
+		return -ENOMEM;
+
+	payload_buf[0] = payload_fn;
+	payload_buf[1] = 0;
+	payload_buf[2] = payload_reg;
+	payload_buf[3] = 1;
+	payload_buf[4] = payload_val;
+
+	desc.args[0] = payload_phys;
+
+	ret = qcom_scm_call(__scm->dev, &desc, NULL);
+
+	dma_free_coherent(__scm->dev, payload_size, payload_buf, payload_phys);
+	return ret;
+}
+EXPORT_SYMBOL(qcom_scm_lmh_dcvsh);
+
 static int qcom_scm_find_dload_address(struct device *dev, u64 *addr)
 {
 	struct device_node *tcsr;
diff --git a/drivers/firmware/qcom_scm.h b/drivers/firmware/qcom_scm.h
index 632fe3142462..d92156ceb3ac 100644
--- a/drivers/firmware/qcom_scm.h
+++ b/drivers/firmware/qcom_scm.h
@@ -114,6 +114,10 @@ extern int scm_legacy_call(struct device *dev, const struct qcom_scm_desc *desc,
 #define QCOM_SCM_SVC_HDCP		0x11
 #define QCOM_SCM_HDCP_INVOKE		0x01
 
+#define QCOM_SCM_SVC_LMH			0x13
+#define QCOM_SCM_LMH_LIMIT_PROFILE_CHANGE	0x01
+#define QCOM_SCM_LMH_LIMIT_DCVSH		0x10
+
 #define QCOM_SCM_SVC_SMMU_PROGRAM		0x15
 #define QCOM_SCM_SMMU_CONFIG_ERRATA1		0x03
 #define QCOM_SCM_SMMU_CONFIG_ERRATA1_CLIENT_ALL	0x02
diff --git a/drivers/gpio/Kconfig b/drivers/gpio/Kconfig
index 81abd890b364..fae5141251e5 100644
--- a/drivers/gpio/Kconfig
+++ b/drivers/gpio/Kconfig
@@ -1018,12 +1018,6 @@ config GPIO_MAX732X_IRQ
 	  Say yes here to enable the max732x to be used as an interrupt
 	  controller. It requires the driver to be built in the kernel.
 
-config GPIO_MC9S08DZ60
-	bool "MX35 3DS BOARD MC9S08DZ60 GPIO functions"
-	depends on I2C=y && MACH_MX35_3DS
-	help
-	  Select this to enable the MC9S08DZ60 GPIO driver
-
 config GPIO_PCA953X
 	tristate "PCA95[357]x, PCA9698, TCA64xx, and MAX7310 I/O ports"
 	select REGMAP_I2C
@@ -1677,6 +1671,15 @@ config GPIO_MOCKUP
 	  tools/testing/selftests/gpio/gpio-mockup.sh. Reference the usage in
 	  it.
 
+config GPIO_VIRTIO
+	tristate "VirtIO GPIO support"
+	depends on VIRTIO
+	help
+	  Say Y here to enable guest support for virtio-based GPIO controllers.
+
+	  These virtual GPIOs can be routed to real GPIOs or attached to
+	  simulators on the host (like QEMU).
+
 endmenu
 
 endif
diff --git a/drivers/gpio/Makefile b/drivers/gpio/Makefile
index 5243e2d1c207..fbcda637d5e1 100644
--- a/drivers/gpio/Makefile
+++ b/drivers/gpio/Makefile
@@ -92,7 +92,6 @@ obj-$(CONFIG_GPIO_MAX77620)		+= gpio-max77620.o
 obj-$(CONFIG_GPIO_MAX77650)		+= gpio-max77650.o
 obj-$(CONFIG_GPIO_MB86S7X)		+= gpio-mb86s7x.o
 obj-$(CONFIG_GPIO_MC33880)		+= gpio-mc33880.o
-obj-$(CONFIG_GPIO_MC9S08DZ60)		+= gpio-mc9s08dz60.o
 obj-$(CONFIG_GPIO_MENZ127)		+= gpio-menz127.o
 obj-$(CONFIG_GPIO_MERRIFIELD)		+= gpio-merrifield.o
 obj-$(CONFIG_GPIO_ML_IOH)		+= gpio-ml-ioh.o
@@ -166,6 +165,7 @@ obj-$(CONFIG_GPIO_UCB1400)		+= gpio-ucb1400.o
 obj-$(CONFIG_GPIO_UNIPHIER)		+= gpio-uniphier.o
 obj-$(CONFIG_GPIO_VF610)		+= gpio-vf610.o
 obj-$(CONFIG_GPIO_VIPERBOARD)		+= gpio-viperboard.o
+obj-$(CONFIG_GPIO_VIRTIO)		+= gpio-virtio.o
 obj-$(CONFIG_GPIO_VISCONTI)		+= gpio-visconti.o
 obj-$(CONFIG_GPIO_VR41XX)		+= gpio-vr41xx.o
 obj-$(CONFIG_GPIO_VX855)		+= gpio-vx855.o
diff --git a/drivers/gpio/gpio-aspeed-sgpio.c b/drivers/gpio/gpio-aspeed-sgpio.c
index a99ece15db95..10f303d15225 100644
--- a/drivers/gpio/gpio-aspeed-sgpio.c
+++ b/drivers/gpio/gpio-aspeed-sgpio.c
@@ -17,37 +17,30 @@
 #include <linux/spinlock.h>
 #include <linux/string.h>
 
-/*
- * MAX_NR_HW_GPIO represents the number of actual hardware-supported GPIOs (ie,
- * slots within the clocked serial GPIO data). Since each HW GPIO is both an
- * input and an output, we provide MAX_NR_HW_GPIO * 2 lines on our gpiochip
- * device.
- *
- * We use SGPIO_OUTPUT_OFFSET to define the split between the inputs and
- * outputs; the inputs start at line 0, the outputs start at OUTPUT_OFFSET.
- */
-#define MAX_NR_HW_SGPIO			80
-#define SGPIO_OUTPUT_OFFSET		MAX_NR_HW_SGPIO
-
 #define ASPEED_SGPIO_CTRL		0x54
 
-#define ASPEED_SGPIO_PINS_MASK		GENMASK(9, 6)
 #define ASPEED_SGPIO_CLK_DIV_MASK	GENMASK(31, 16)
 #define ASPEED_SGPIO_ENABLE		BIT(0)
+#define ASPEED_SGPIO_PINS_SHIFT		6
+
+struct aspeed_sgpio_pdata {
+	const u32 pin_mask;
+};
 
 struct aspeed_sgpio {
 	struct gpio_chip chip;
+	struct irq_chip intc;
 	struct clk *pclk;
 	spinlock_t lock;
 	void __iomem *base;
 	int irq;
-	int n_sgpio;
 };
 
 struct aspeed_sgpio_bank {
-	uint16_t    val_regs;
-	uint16_t    rdata_reg;
-	uint16_t    irq_regs;
+	u16    val_regs;
+	u16    rdata_reg;
+	u16    irq_regs;
+	u16    tolerance_regs;
 	const char  names[4][3];
 };
 
@@ -63,19 +56,29 @@ static const struct aspeed_sgpio_bank aspeed_sgpio_banks[] = {
 		.val_regs = 0x0000,
 		.rdata_reg = 0x0070,
 		.irq_regs = 0x0004,
+		.tolerance_regs = 0x0018,
 		.names = { "A", "B", "C", "D" },
 	},
 	{
 		.val_regs = 0x001C,
 		.rdata_reg = 0x0074,
 		.irq_regs = 0x0020,
+		.tolerance_regs = 0x0034,
 		.names = { "E", "F", "G", "H" },
 	},
 	{
 		.val_regs = 0x0038,
 		.rdata_reg = 0x0078,
 		.irq_regs = 0x003C,
-		.names = { "I", "J" },
+		.tolerance_regs = 0x0050,
+		.names = { "I", "J", "K", "L" },
+	},
+	{
+		.val_regs = 0x0090,
+		.rdata_reg = 0x007C,
+		.irq_regs = 0x0094,
+		.tolerance_regs = 0x00A8,
+		.names = { "M", "N", "O", "P" },
 	},
 };
 
@@ -87,6 +90,7 @@ enum aspeed_sgpio_reg {
 	reg_irq_type1,
 	reg_irq_type2,
 	reg_irq_status,
+	reg_tolerance,
 };
 
 #define GPIO_VAL_VALUE      0x00
@@ -115,15 +119,17 @@ static void __iomem *bank_reg(struct aspeed_sgpio *gpio,
 		return gpio->base + bank->irq_regs + GPIO_IRQ_TYPE2;
 	case reg_irq_status:
 		return gpio->base + bank->irq_regs + GPIO_IRQ_STATUS;
+	case reg_tolerance:
+		return gpio->base + bank->tolerance_regs;
 	default:
 		/* acturally if code runs to here, it's an error case */
 		BUG();
 	}
 }
 
-#define GPIO_BANK(x)    ((x % SGPIO_OUTPUT_OFFSET) >> 5)
-#define GPIO_OFFSET(x)  ((x % SGPIO_OUTPUT_OFFSET) & 0x1f)
-#define GPIO_BIT(x)     BIT(GPIO_OFFSET(x))
+#define GPIO_BANK(x)    ((x) >> 6)
+#define GPIO_OFFSET(x)  ((x) & GENMASK(5, 0))
+#define GPIO_BIT(x)     BIT(GPIO_OFFSET(x) >> 1)
 
 static const struct aspeed_sgpio_bank *to_bank(unsigned int offset)
 {
@@ -138,39 +144,25 @@ static const struct aspeed_sgpio_bank *to_bank(unsigned int offset)
 static int aspeed_sgpio_init_valid_mask(struct gpio_chip *gc,
 		unsigned long *valid_mask, unsigned int ngpios)
 {
-	struct aspeed_sgpio *sgpio = gpiochip_get_data(gc);
-	int n = sgpio->n_sgpio;
-	int c = SGPIO_OUTPUT_OFFSET - n;
-
-	WARN_ON(ngpios < MAX_NR_HW_SGPIO * 2);
-
-	/* input GPIOs in the lower range */
-	bitmap_set(valid_mask, 0, n);
-	bitmap_clear(valid_mask, n, c);
-
-	/* output GPIOS above SGPIO_OUTPUT_OFFSET */
-	bitmap_set(valid_mask, SGPIO_OUTPUT_OFFSET, n);
-	bitmap_clear(valid_mask, SGPIO_OUTPUT_OFFSET + n, c);
-
+	bitmap_set(valid_mask, 0, ngpios);
 	return 0;
 }
 
 static void aspeed_sgpio_irq_init_valid_mask(struct gpio_chip *gc,
 		unsigned long *valid_mask, unsigned int ngpios)
 {
-	struct aspeed_sgpio *sgpio = gpiochip_get_data(gc);
-	int n = sgpio->n_sgpio;
+	unsigned int i;
 
-	WARN_ON(ngpios < MAX_NR_HW_SGPIO * 2);
-
-	/* input GPIOs in the lower range */
-	bitmap_set(valid_mask, 0, n);
-	bitmap_clear(valid_mask, n, ngpios - n);
+	/* input GPIOs are even bits */
+	for (i = 0; i < ngpios; i++) {
+		if (i % 2)
+			clear_bit(i, valid_mask);
+	}
 }
 
 static bool aspeed_sgpio_is_input(unsigned int offset)
 {
-	return offset < SGPIO_OUTPUT_OFFSET;
+	return !(offset % 2);
 }
 
 static int aspeed_sgpio_get(struct gpio_chip *gc, unsigned int offset)
@@ -409,14 +401,6 @@ static void aspeed_sgpio_irq_handler(struct irq_desc *desc)
 	chained_irq_exit(ic, desc);
 }
 
-static struct irq_chip aspeed_sgpio_irqchip = {
-	.name       = "aspeed-sgpio",
-	.irq_ack    = aspeed_sgpio_irq_ack,
-	.irq_mask   = aspeed_sgpio_irq_mask,
-	.irq_unmask = aspeed_sgpio_irq_unmask,
-	.irq_set_type   = aspeed_sgpio_set_type,
-};
-
 static int aspeed_sgpio_setup_irqs(struct aspeed_sgpio *gpio,
 				   struct platform_device *pdev)
 {
@@ -439,8 +423,14 @@ static int aspeed_sgpio_setup_irqs(struct aspeed_sgpio *gpio,
 		iowrite32(0xffffffff, bank_reg(gpio, bank, reg_irq_status));
 	}
 
+	gpio->intc.name = dev_name(&pdev->dev);
+	gpio->intc.irq_ack = aspeed_sgpio_irq_ack;
+	gpio->intc.irq_mask = aspeed_sgpio_irq_mask;
+	gpio->intc.irq_unmask = aspeed_sgpio_irq_unmask;
+	gpio->intc.irq_set_type = aspeed_sgpio_set_type;
+
 	irq = &gpio->chip.irq;
-	irq->chip = &aspeed_sgpio_irqchip;
+	irq->chip = &gpio->intc;
 	irq->init_valid_mask = aspeed_sgpio_irq_init_valid_mask;
 	irq->handler = handle_bad_irq;
 	irq->default_type = IRQ_TYPE_NONE;
@@ -463,9 +453,56 @@ static int aspeed_sgpio_setup_irqs(struct aspeed_sgpio *gpio,
 	return 0;
 }
 
+static const struct aspeed_sgpio_pdata ast2400_sgpio_pdata = {
+	.pin_mask = GENMASK(9, 6),
+};
+
+static int aspeed_sgpio_reset_tolerance(struct gpio_chip *chip,
+					unsigned int offset, bool enable)
+{
+	struct aspeed_sgpio *gpio = gpiochip_get_data(chip);
+	unsigned long flags;
+	void __iomem *reg;
+	u32 val;
+
+	reg = bank_reg(gpio, to_bank(offset), reg_tolerance);
+
+	spin_lock_irqsave(&gpio->lock, flags);
+
+	val = readl(reg);
+
+	if (enable)
+		val |= GPIO_BIT(offset);
+	else
+		val &= ~GPIO_BIT(offset);
+
+	writel(val, reg);
+
+	spin_unlock_irqrestore(&gpio->lock, flags);
+
+	return 0;
+}
+
+static int aspeed_sgpio_set_config(struct gpio_chip *chip, unsigned int offset,
+				   unsigned long config)
+{
+	unsigned long param = pinconf_to_config_param(config);
+	u32 arg = pinconf_to_config_argument(config);
+
+	if (param == PIN_CONFIG_PERSIST_STATE)
+		return aspeed_sgpio_reset_tolerance(chip, offset, arg);
+
+	return -ENOTSUPP;
+}
+
+static const struct aspeed_sgpio_pdata ast2600_sgpiom_pdata = {
+	.pin_mask = GENMASK(10, 6),
+};
+
 static const struct of_device_id aspeed_sgpio_of_table[] = {
-	{ .compatible = "aspeed,ast2400-sgpio" },
-	{ .compatible = "aspeed,ast2500-sgpio" },
+	{ .compatible = "aspeed,ast2400-sgpio", .data = &ast2400_sgpio_pdata, },
+	{ .compatible = "aspeed,ast2500-sgpio", .data = &ast2400_sgpio_pdata, },
+	{ .compatible = "aspeed,ast2600-sgpiom", .data = &ast2600_sgpiom_pdata, },
 	{}
 };
 
@@ -473,10 +510,11 @@ MODULE_DEVICE_TABLE(of, aspeed_sgpio_of_table);
 
 static int __init aspeed_sgpio_probe(struct platform_device *pdev)
 {
+	u32 nr_gpios, sgpio_freq, sgpio_clk_div, gpio_cnt_regval, pin_mask;
+	const struct aspeed_sgpio_pdata *pdata;
 	struct aspeed_sgpio *gpio;
-	u32 nr_gpios, sgpio_freq, sgpio_clk_div;
-	int rc;
 	unsigned long apb_freq;
+	int rc;
 
 	gpio = devm_kzalloc(&pdev->dev, sizeof(*gpio), GFP_KERNEL);
 	if (!gpio)
@@ -486,18 +524,23 @@ static int __init aspeed_sgpio_probe(struct platform_device *pdev)
 	if (IS_ERR(gpio->base))
 		return PTR_ERR(gpio->base);
 
-	rc = of_property_read_u32(pdev->dev.of_node, "ngpios", &nr_gpios);
+	pdata = device_get_match_data(&pdev->dev);
+	if (!pdata)
+		return -EINVAL;
+
+	pin_mask = pdata->pin_mask;
+
+	rc = device_property_read_u32(&pdev->dev, "ngpios", &nr_gpios);
 	if (rc < 0) {
 		dev_err(&pdev->dev, "Could not read ngpios property\n");
 		return -EINVAL;
-	} else if (nr_gpios > MAX_NR_HW_SGPIO) {
-		dev_err(&pdev->dev, "Number of GPIOs exceeds the maximum of %d: %d\n",
-			MAX_NR_HW_SGPIO, nr_gpios);
+	} else if (nr_gpios % 8) {
+		dev_err(&pdev->dev, "Number of GPIOs not multiple of 8: %d\n",
+			nr_gpios);
 		return -EINVAL;
 	}
-	gpio->n_sgpio = nr_gpios;
 
-	rc = of_property_read_u32(pdev->dev.of_node, "bus-frequency", &sgpio_freq);
+	rc = device_property_read_u32(&pdev->dev, "bus-frequency", &sgpio_freq);
 	if (rc < 0) {
 		dev_err(&pdev->dev, "Could not read bus-frequency property\n");
 		return -EINVAL;
@@ -528,15 +571,14 @@ static int __init aspeed_sgpio_probe(struct platform_device *pdev)
 	if (sgpio_clk_div > (1 << 16) - 1)
 		return -EINVAL;
 
-	iowrite32(FIELD_PREP(ASPEED_SGPIO_CLK_DIV_MASK, sgpio_clk_div) |
-		  FIELD_PREP(ASPEED_SGPIO_PINS_MASK, (nr_gpios / 8)) |
-		  ASPEED_SGPIO_ENABLE,
-		  gpio->base + ASPEED_SGPIO_CTRL);
+	gpio_cnt_regval = ((nr_gpios / 8) << ASPEED_SGPIO_PINS_SHIFT) & pin_mask;
+	iowrite32(FIELD_PREP(ASPEED_SGPIO_CLK_DIV_MASK, sgpio_clk_div) | gpio_cnt_regval |
+		  ASPEED_SGPIO_ENABLE, gpio->base + ASPEED_SGPIO_CTRL);
 
 	spin_lock_init(&gpio->lock);
 
 	gpio->chip.parent = &pdev->dev;
-	gpio->chip.ngpio = MAX_NR_HW_SGPIO * 2;
+	gpio->chip.ngpio = nr_gpios * 2;
 	gpio->chip.init_valid_mask = aspeed_sgpio_init_valid_mask;
 	gpio->chip.direction_input = aspeed_sgpio_dir_in;
 	gpio->chip.direction_output = aspeed_sgpio_dir_out;
@@ -545,7 +587,7 @@ static int __init aspeed_sgpio_probe(struct platform_device *pdev)
 	gpio->chip.free = NULL;
 	gpio->chip.get = aspeed_sgpio_get;
 	gpio->chip.set = aspeed_sgpio_set;
-	gpio->chip.set_config = NULL;
+	gpio->chip.set_config = aspeed_sgpio_set_config;
 	gpio->chip.label = dev_name(&pdev->dev);
 	gpio->chip.base = -1;
 
diff --git a/drivers/gpio/gpio-brcmstb.c b/drivers/gpio/gpio-brcmstb.c
index 74b7c91c3d1a..895a79936248 100644
--- a/drivers/gpio/gpio-brcmstb.c
+++ b/drivers/gpio/gpio-brcmstb.c
@@ -602,49 +602,6 @@ static const struct dev_pm_ops brcmstb_gpio_pm_ops = {
 	.resume_noirq = brcmstb_gpio_resume,
 };
 
-static void brcmstb_gpio_set_names(struct device *dev,
-				   struct brcmstb_gpio_bank *bank)
-{
-	struct device_node *np = dev->of_node;
-	const char **names;
-	int nstrings, base;
-	unsigned int i;
-
-	base = bank->id * MAX_GPIO_PER_BANK;
-
-	nstrings = of_property_count_strings(np, "gpio-line-names");
-	if (nstrings <= base)
-		/* Line names not present */
-		return;
-
-	names = devm_kcalloc(dev, MAX_GPIO_PER_BANK, sizeof(*names),
-			     GFP_KERNEL);
-	if (!names)
-		return;
-
-	/*
-	 * Make sure to not index beyond the end of the number of descriptors
-	 * of the GPIO device.
-	 */
-	for (i = 0; i < bank->width; i++) {
-		const char *name;
-		int ret;
-
-		ret = of_property_read_string_index(np, "gpio-line-names",
-						    base + i, &name);
-		if (ret) {
-			if (ret != -ENODATA)
-				dev_err(dev, "unable to name line %d: %d\n",
-					base + i, ret);
-			break;
-		}
-		if (*name)
-			names[i] = name;
-	}
-
-	bank->gc.names = names;
-}
-
 static int brcmstb_gpio_probe(struct platform_device *pdev)
 {
 	struct device *dev = &pdev->dev;
@@ -758,6 +715,7 @@ static int brcmstb_gpio_probe(struct platform_device *pdev)
 		gc->of_xlate = brcmstb_gpio_of_xlate;
 		/* not all ngpio lines are valid, will use bank width later */
 		gc->ngpio = MAX_GPIO_PER_BANK;
+		gc->offset = bank->id * MAX_GPIO_PER_BANK;
 		if (priv->parent_irq > 0)
 			gc->to_irq = brcmstb_gpio_to_irq;
 
@@ -768,7 +726,6 @@ static int brcmstb_gpio_probe(struct platform_device *pdev)
 		need_wakeup_event |= !!__brcmstb_gpio_get_active_irqs(bank);
 		gc->write_reg(reg_base + GIO_MASK(bank->id), 0);
 
-		brcmstb_gpio_set_names(dev, bank);
 		err = gpiochip_add_data(gc, bank);
 		if (err) {
 			dev_err(dev, "Could not add gpiochip for bank %d\n",
diff --git a/drivers/gpio/gpio-dwapb.c b/drivers/gpio/gpio-dwapb.c
index 3eb13d6d31ef..f98fa33e1679 100644
--- a/drivers/gpio/gpio-dwapb.c
+++ b/drivers/gpio/gpio-dwapb.c
@@ -16,7 +16,6 @@
 #include <linux/mod_devicetable.h>
 #include <linux/module.h>
 #include <linux/of.h>
-#include <linux/platform_data/gpio-dwapb.h>
 #include <linux/platform_device.h>
 #include <linux/property.h>
 #include <linux/reset.h>
@@ -48,6 +47,7 @@
 
 #define DWAPB_DRIVER_NAME	"gpio-dwapb"
 #define DWAPB_MAX_PORTS		4
+#define DWAPB_MAX_GPIOS		32
 
 #define GPIO_EXT_PORT_STRIDE	0x04 /* register stride 32 bits */
 #define GPIO_SWPORT_DR_STRIDE	0x0c /* register stride 3*32 bits */
@@ -65,6 +65,19 @@
 
 struct dwapb_gpio;
 
+struct dwapb_port_property {
+	struct fwnode_handle *fwnode;
+	unsigned int idx;
+	unsigned int ngpio;
+	unsigned int gpio_base;
+	int irq[DWAPB_MAX_GPIOS];
+};
+
+struct dwapb_platform_data {
+	struct dwapb_port_property *properties;
+	unsigned int nports;
+};
+
 #ifdef CONFIG_PM_SLEEP
 /* Store GPIO context across system-wide suspend/resume transitions */
 struct dwapb_context {
@@ -436,21 +449,17 @@ static void dwapb_configure_irqs(struct dwapb_gpio *gpio,
 	pirq->irqchip.irq_set_wake = dwapb_irq_set_wake;
 #endif
 
-	if (!pp->irq_shared) {
-		girq->num_parents = pirq->nr_irqs;
-		girq->parents = pirq->irq;
-		girq->parent_handler_data = gpio;
-		girq->parent_handler = dwapb_irq_handler;
-	} else {
-		/* This will let us handle the parent IRQ in the driver */
+	/*
+	 * Intel ACPI-based platforms mostly have the DesignWare APB GPIO
+	 * IRQ lane shared between several devices. In that case the parental
+	 * IRQ has to be handled in the shared way so to be properly delivered
+	 * to all the connected devices.
+	 */
+	if (has_acpi_companion(gpio->dev)) {
 		girq->num_parents = 0;
 		girq->parents = NULL;
 		girq->parent_handler = NULL;
 
-		/*
-		 * Request a shared IRQ since where MFD would have devices
-		 * using the same irq pin
-		 */
 		err = devm_request_irq(gpio->dev, pp->irq[0],
 				       dwapb_irq_handler_mfd,
 				       IRQF_SHARED, DWAPB_DRIVER_NAME, gpio);
@@ -458,6 +467,11 @@ static void dwapb_configure_irqs(struct dwapb_gpio *gpio,
 			dev_err(gpio->dev, "error requesting IRQ\n");
 			goto err_kfree_pirq;
 		}
+	} else {
+		girq->num_parents = pirq->nr_irqs;
+		girq->parents = pirq->irq;
+		girq->parent_handler_data = gpio;
+		girq->parent_handler = dwapb_irq_handler;
 	}
 
 	girq->chip = &pirq->irqchip;
@@ -581,9 +595,12 @@ static struct dwapb_platform_data *dwapb_gpio_get_pdata(struct device *dev)
 			pp->ngpio = DWAPB_MAX_GPIOS;
 		}
 
-		pp->irq_shared	= false;
 		pp->gpio_base	= -1;
 
+		/* For internal use only, new platforms mustn't exercise this */
+		if (is_software_node(fwnode))
+			fwnode_property_read_u32(fwnode, "gpio-base", &pp->gpio_base);
+
 		/*
 		 * Only port A can provide interrupts in all configurations of
 		 * the IP.
@@ -670,17 +687,12 @@ static int dwapb_gpio_probe(struct platform_device *pdev)
 	unsigned int i;
 	struct dwapb_gpio *gpio;
 	int err;
+	struct dwapb_platform_data *pdata;
 	struct device *dev = &pdev->dev;
-	struct dwapb_platform_data *pdata = dev_get_platdata(dev);
-
-	if (!pdata) {
-		pdata = dwapb_gpio_get_pdata(dev);
-		if (IS_ERR(pdata))
-			return PTR_ERR(pdata);
-	}
 
-	if (!pdata->nports)
-		return -ENODEV;
+	pdata = dwapb_gpio_get_pdata(dev);
+	if (IS_ERR(pdata))
+		return PTR_ERR(pdata);
 
 	gpio = devm_kzalloc(&pdev->dev, sizeof(*gpio), GFP_KERNEL);
 	if (!gpio)
diff --git a/drivers/gpio/gpio-mc9s08dz60.c b/drivers/gpio/gpio-mc9s08dz60.c
deleted file mode 100644
index a9f17cebd5ed..000000000000
--- a/drivers/gpio/gpio-mc9s08dz60.c
+++ /dev/null
@@ -1,112 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0-or-later
-/*
- * Copyright 2009-2012 Freescale Semiconductor, Inc. All Rights Reserved.
- *
- * Author: Wu Guoxing <b39297@freescale.com>
- */
-
-#include <linux/kernel.h>
-#include <linux/init.h>
-#include <linux/slab.h>
-#include <linux/i2c.h>
-#include <linux/gpio/driver.h>
-
-#define GPIO_GROUP_NUM 2
-#define GPIO_NUM_PER_GROUP 8
-#define GPIO_NUM (GPIO_GROUP_NUM*GPIO_NUM_PER_GROUP)
-
-struct mc9s08dz60 {
-	struct i2c_client *client;
-	struct gpio_chip chip;
-};
-
-static void mc9s_gpio_to_reg_and_bit(int offset, u8 *reg, u8 *bit)
-{
-	*reg = 0x20 + offset / GPIO_NUM_PER_GROUP;
-	*bit = offset % GPIO_NUM_PER_GROUP;
-}
-
-static int mc9s08dz60_get_value(struct gpio_chip *gc, unsigned offset)
-{
-	u8 reg, bit;
-	s32 value;
-	struct mc9s08dz60 *mc9s = gpiochip_get_data(gc);
-
-	mc9s_gpio_to_reg_and_bit(offset, &reg, &bit);
-	value = i2c_smbus_read_byte_data(mc9s->client, reg);
-
-	return (value >= 0) ? (value >> bit) & 0x1 : 0;
-}
-
-static int mc9s08dz60_set(struct mc9s08dz60 *mc9s, unsigned offset, int val)
-{
-	u8 reg, bit;
-	s32 value;
-
-	mc9s_gpio_to_reg_and_bit(offset, &reg, &bit);
-	value = i2c_smbus_read_byte_data(mc9s->client, reg);
-	if (value >= 0) {
-		if (val)
-			value |= 1 << bit;
-		else
-			value &= ~(1 << bit);
-
-		return i2c_smbus_write_byte_data(mc9s->client, reg, value);
-	} else
-		return value;
-
-}
-
-
-static void mc9s08dz60_set_value(struct gpio_chip *gc, unsigned offset, int val)
-{
-	struct mc9s08dz60 *mc9s = gpiochip_get_data(gc);
-
-	mc9s08dz60_set(mc9s, offset, val);
-}
-
-static int mc9s08dz60_direction_output(struct gpio_chip *gc,
-				       unsigned offset, int val)
-{
-	struct mc9s08dz60 *mc9s = gpiochip_get_data(gc);
-
-	return mc9s08dz60_set(mc9s, offset, val);
-}
-
-static int mc9s08dz60_probe(struct i2c_client *client,
-			    const struct i2c_device_id *id)
-{
-	struct mc9s08dz60 *mc9s;
-
-	mc9s = devm_kzalloc(&client->dev, sizeof(*mc9s), GFP_KERNEL);
-	if (!mc9s)
-		return -ENOMEM;
-
-	mc9s->chip.label = client->name;
-	mc9s->chip.base = -1;
-	mc9s->chip.parent = &client->dev;
-	mc9s->chip.owner = THIS_MODULE;
-	mc9s->chip.ngpio = GPIO_NUM;
-	mc9s->chip.can_sleep = true;
-	mc9s->chip.get = mc9s08dz60_get_value;
-	mc9s->chip.set = mc9s08dz60_set_value;
-	mc9s->chip.direction_output = mc9s08dz60_direction_output;
-	mc9s->client = client;
-	i2c_set_clientdata(client, mc9s);
-
-	return devm_gpiochip_add_data(&client->dev, &mc9s->chip, mc9s);
-}
-
-static const struct i2c_device_id mc9s08dz60_id[] = {
-	{"mc9s08dz60", 0},
-	{},
-};
-
-static struct i2c_driver mc9s08dz60_i2c_driver = {
-	.driver = {
-		.name = "mc9s08dz60",
-	},
-	.probe = mc9s08dz60_probe,
-	.id_table = mc9s08dz60_id,
-};
-builtin_i2c_driver(mc9s08dz60_i2c_driver);
diff --git a/drivers/gpio/gpio-ml-ioh.c b/drivers/gpio/gpio-ml-ioh.c
index 53d4abefa6ff..efa9acdc320a 100644
--- a/drivers/gpio/gpio-ml-ioh.c
+++ b/drivers/gpio/gpio-ml-ioh.c
@@ -155,11 +155,10 @@ static int ioh_gpio_direction_input(struct gpio_chip *gpio, unsigned nr)
 	return 0;
 }
 
-#ifdef CONFIG_PM
 /*
  * Save register configuration and disable interrupts.
  */
-static void ioh_gpio_save_reg_conf(struct ioh_gpio *chip)
+static void __maybe_unused ioh_gpio_save_reg_conf(struct ioh_gpio *chip)
 {
 	int i;
 
@@ -185,7 +184,7 @@ static void ioh_gpio_save_reg_conf(struct ioh_gpio *chip)
 /*
  * This function restores the register configuration of the GPIO device.
  */
-static void ioh_gpio_restore_reg_conf(struct ioh_gpio *chip)
+static void __maybe_unused ioh_gpio_restore_reg_conf(struct ioh_gpio *chip)
 {
 	int i;
 
@@ -207,7 +206,6 @@ static void ioh_gpio_restore_reg_conf(struct ioh_gpio *chip)
 				  &chip->reg->ioh_sel_reg[i]);
 	}
 }
-#endif
 
 static int ioh_gpio_to_irq(struct gpio_chip *gpio, unsigned offset)
 {
@@ -522,47 +520,23 @@ static void ioh_gpio_remove(struct pci_dev *pdev)
 	kfree(chip);
 }
 
-#ifdef CONFIG_PM
-static int ioh_gpio_suspend(struct pci_dev *pdev, pm_message_t state)
+static int __maybe_unused ioh_gpio_suspend(struct device *dev)
 {
-	s32 ret;
-	struct ioh_gpio *chip = pci_get_drvdata(pdev);
+	struct ioh_gpio *chip = dev_get_drvdata(dev);
 	unsigned long flags;
 
 	spin_lock_irqsave(&chip->spinlock, flags);
 	ioh_gpio_save_reg_conf(chip);
 	spin_unlock_irqrestore(&chip->spinlock, flags);
 
-	ret = pci_save_state(pdev);
-	if (ret) {
-		dev_err(&pdev->dev, "pci_save_state Failed-%d\n", ret);
-		return ret;
-	}
-	pci_disable_device(pdev);
-	pci_set_power_state(pdev, PCI_D0);
-	ret = pci_enable_wake(pdev, PCI_D0, 1);
-	if (ret)
-		dev_err(&pdev->dev, "pci_enable_wake Failed -%d\n", ret);
-
 	return 0;
 }
 
-static int ioh_gpio_resume(struct pci_dev *pdev)
+static int __maybe_unused ioh_gpio_resume(struct device *dev)
 {
-	s32 ret;
-	struct ioh_gpio *chip = pci_get_drvdata(pdev);
+	struct ioh_gpio *chip = dev_get_drvdata(dev);
 	unsigned long flags;
 
-	ret = pci_enable_wake(pdev, PCI_D0, 0);
-
-	pci_set_power_state(pdev, PCI_D0);
-	ret = pci_enable_device(pdev);
-	if (ret) {
-		dev_err(&pdev->dev, "pci_enable_device Failed-%d ", ret);
-		return ret;
-	}
-	pci_restore_state(pdev);
-
 	spin_lock_irqsave(&chip->spinlock, flags);
 	iowrite32(0x01, &chip->reg->srst);
 	iowrite32(0x00, &chip->reg->srst);
@@ -571,10 +545,8 @@ static int ioh_gpio_resume(struct pci_dev *pdev)
 
 	return 0;
 }
-#else
-#define ioh_gpio_suspend NULL
-#define ioh_gpio_resume NULL
-#endif
+
+static SIMPLE_DEV_PM_OPS(ioh_gpio_pm_ops, ioh_gpio_suspend, ioh_gpio_resume);
 
 static const struct pci_device_id ioh_gpio_pcidev_id[] = {
 	{ PCI_DEVICE(PCI_VENDOR_ID_ROHM, 0x802E) },
@@ -587,8 +559,9 @@ static struct pci_driver ioh_gpio_driver = {
 	.id_table = ioh_gpio_pcidev_id,
 	.probe = ioh_gpio_probe,
 	.remove = ioh_gpio_remove,
-	.suspend = ioh_gpio_suspend,
-	.resume = ioh_gpio_resume
+	.driver = {
+		.pm = &ioh_gpio_pm_ops,
+	},
 };
 
 module_pci_driver(ioh_gpio_driver);
diff --git a/drivers/gpio/gpio-mlxbf2.c b/drivers/gpio/gpio-mlxbf2.c
index befa5e109943..177d03ef4529 100644
--- a/drivers/gpio/gpio-mlxbf2.c
+++ b/drivers/gpio/gpio-mlxbf2.c
@@ -1,6 +1,5 @@
 // SPDX-License-Identifier: GPL-2.0
 
-#include <linux/acpi.h>
 #include <linux/bitfield.h>
 #include <linux/bitops.h>
 #include <linux/device.h>
@@ -8,6 +7,7 @@
 #include <linux/io.h>
 #include <linux/ioport.h>
 #include <linux/kernel.h>
+#include <linux/mod_devicetable.h>
 #include <linux/module.h>
 #include <linux/platform_device.h>
 #include <linux/pm.h>
@@ -47,12 +47,10 @@
 #define YU_GPIO_MODE0_SET		0x54
 #define YU_GPIO_MODE0_CLEAR		0x58
 
-#ifdef CONFIG_PM
 struct mlxbf2_gpio_context_save_regs {
 	u32 gpio_mode0;
 	u32 gpio_mode1;
 };
-#endif
 
 /* BlueField-2 gpio block context structure. */
 struct mlxbf2_gpio_context {
@@ -61,9 +59,7 @@ struct mlxbf2_gpio_context {
 	/* YU GPIO blocks address */
 	void __iomem *gpio_io;
 
-#ifdef CONFIG_PM
 	struct mlxbf2_gpio_context_save_regs *csave_regs;
-#endif
 };
 
 /* BlueField-2 gpio shared structure. */
@@ -73,11 +69,8 @@ struct mlxbf2_gpio_param {
 	struct mutex *lock;
 };
 
-static struct resource yu_arm_gpio_lock_res = {
-	.start = YU_ARM_GPIO_LOCK_ADDR,
-	.end   = YU_ARM_GPIO_LOCK_ADDR + YU_ARM_GPIO_LOCK_SIZE - 1,
-	.name  = "YU_ARM_GPIO_LOCK",
-};
+static struct resource yu_arm_gpio_lock_res =
+	DEFINE_RES_MEM_NAMED(YU_ARM_GPIO_LOCK_ADDR, YU_ARM_GPIO_LOCK_SIZE, "YU_ARM_GPIO_LOCK");
 
 static DEFINE_MUTEX(yu_arm_gpio_lock_mutex);
 
@@ -232,7 +225,6 @@ mlxbf2_gpio_probe(struct platform_device *pdev)
 	struct mlxbf2_gpio_context *gs;
 	struct device *dev = &pdev->dev;
 	struct gpio_chip *gc;
-	struct resource *res;
 	unsigned int npins;
 	int ret;
 
@@ -241,13 +233,9 @@ mlxbf2_gpio_probe(struct platform_device *pdev)
 		return -ENOMEM;
 
 	/* YU GPIO block address */
-	res = platform_get_resource(pdev, IORESOURCE_MEM, 0);
-	if (!res)
-		return -ENODEV;
-
-	gs->gpio_io = devm_ioremap(dev, res->start, resource_size(res));
-	if (!gs->gpio_io)
-		return -ENOMEM;
+	gs->gpio_io = devm_platform_ioremap_resource(pdev, 0);
+	if (IS_ERR(gs->gpio_io))
+		return PTR_ERR(gs->gpio_io);
 
 	ret = mlxbf2_gpio_get_lock_res(pdev);
 	if (ret) {
@@ -284,11 +272,9 @@ mlxbf2_gpio_probe(struct platform_device *pdev)
 	return 0;
 }
 
-#ifdef CONFIG_PM
-static int mlxbf2_gpio_suspend(struct platform_device *pdev,
-				pm_message_t state)
+static int __maybe_unused mlxbf2_gpio_suspend(struct device *dev)
 {
-	struct mlxbf2_gpio_context *gs = platform_get_drvdata(pdev);
+	struct mlxbf2_gpio_context *gs = dev_get_drvdata(dev);
 
 	gs->csave_regs->gpio_mode0 = readl(gs->gpio_io +
 		YU_GPIO_MODE0);
@@ -298,9 +284,9 @@ static int mlxbf2_gpio_suspend(struct platform_device *pdev,
 	return 0;
 }
 
-static int mlxbf2_gpio_resume(struct platform_device *pdev)
+static int __maybe_unused mlxbf2_gpio_resume(struct device *dev)
 {
-	struct mlxbf2_gpio_context *gs = platform_get_drvdata(pdev);
+	struct mlxbf2_gpio_context *gs = dev_get_drvdata(dev);
 
 	writel(gs->csave_regs->gpio_mode0, gs->gpio_io +
 		YU_GPIO_MODE0);
@@ -309,7 +295,7 @@ static int mlxbf2_gpio_resume(struct platform_device *pdev)
 
 	return 0;
 }
-#endif
+static SIMPLE_DEV_PM_OPS(mlxbf2_pm_ops, mlxbf2_gpio_suspend, mlxbf2_gpio_resume);
 
 static const struct acpi_device_id __maybe_unused mlxbf2_gpio_acpi_match[] = {
 	{ "MLNXBF22", 0 },
@@ -320,13 +306,10 @@ MODULE_DEVICE_TABLE(acpi, mlxbf2_gpio_acpi_match);
 static struct platform_driver mlxbf2_gpio_driver = {
 	.driver = {
 		.name = "mlxbf2_gpio",
-		.acpi_match_table = ACPI_PTR(mlxbf2_gpio_acpi_match),
+		.acpi_match_table = mlxbf2_gpio_acpi_match,
+		.pm = &mlxbf2_pm_ops,
 	},
 	.probe    = mlxbf2_gpio_probe,
-#ifdef CONFIG_PM
-	.suspend  = mlxbf2_gpio_suspend,
-	.resume   = mlxbf2_gpio_resume,
-#endif
 };
 
 module_platform_driver(mlxbf2_gpio_driver);
diff --git a/drivers/gpio/gpio-mpc8xxx.c b/drivers/gpio/gpio-mpc8xxx.c
index 67dc38976ab6..70d6ae20b1da 100644
--- a/drivers/gpio/gpio-mpc8xxx.c
+++ b/drivers/gpio/gpio-mpc8xxx.c
@@ -332,7 +332,7 @@ static int mpc8xxx_probe(struct platform_device *pdev)
 				 mpc8xxx_gc->regs + GPIO_DIR, NULL,
 				 BGPIOF_BIG_ENDIAN);
 		if (ret)
-			goto err;
+			return ret;
 		dev_dbg(&pdev->dev, "GPIO registers are LITTLE endian\n");
 	} else {
 		ret = bgpio_init(gc, &pdev->dev, 4,
@@ -342,7 +342,7 @@ static int mpc8xxx_probe(struct platform_device *pdev)
 				 BGPIOF_BIG_ENDIAN
 				 | BGPIOF_BIG_ENDIAN_BYTE_ORDER);
 		if (ret)
-			goto err;
+			return ret;
 		dev_dbg(&pdev->dev, "GPIO registers are BIG endian\n");
 	}
 
@@ -380,11 +380,11 @@ static int mpc8xxx_probe(struct platform_device *pdev)
 	    is_acpi_node(fwnode))
 		gc->write_reg(mpc8xxx_gc->regs + GPIO_IBE, 0xffffffff);
 
-	ret = gpiochip_add_data(gc, mpc8xxx_gc);
+	ret = devm_gpiochip_add_data(&pdev->dev, gc, mpc8xxx_gc);
 	if (ret) {
 		dev_err(&pdev->dev,
 			"GPIO chip registration failed with status %d\n", ret);
-		goto err;
+		return ret;
 	}
 
 	mpc8xxx_gc->irqn = platform_get_irq(pdev, 0);
@@ -416,7 +416,7 @@ static int mpc8xxx_probe(struct platform_device *pdev)
 
 	return 0;
 err:
-	iounmap(mpc8xxx_gc->regs);
+	irq_domain_remove(mpc8xxx_gc->irq);
 	return ret;
 }
 
@@ -429,9 +429,6 @@ static int mpc8xxx_remove(struct platform_device *pdev)
 		irq_domain_remove(mpc8xxx_gc->irq);
 	}
 
-	gpiochip_remove(&mpc8xxx_gc->gc);
-	iounmap(mpc8xxx_gc->regs);
-
 	return 0;
 }
 
diff --git a/drivers/gpio/gpio-mt7621.c b/drivers/gpio/gpio-mt7621.c
index 10c0a9bc5ea1..c3658a597a80 100644
--- a/drivers/gpio/gpio-mt7621.c
+++ b/drivers/gpio/gpio-mt7621.c
@@ -239,6 +239,7 @@ mediatek_gpio_bank_probe(struct device *dev,
 	if (!rg->chip.label)
 		return -ENOMEM;
 
+	rg->chip.offset = bank * MTK_BANK_WIDTH;
 	rg->irq_chip.name = dev_name(dev);
 	rg->irq_chip.parent_device = dev;
 	rg->irq_chip.irq_unmask = mediatek_gpio_irq_unmask;
diff --git a/drivers/gpio/gpio-rcar.c b/drivers/gpio/gpio-rcar.c
index b378aba32602..f7b653314e7e 100644
--- a/drivers/gpio/gpio-rcar.c
+++ b/drivers/gpio/gpio-rcar.c
@@ -564,9 +564,9 @@ static int gpio_rcar_probe(struct platform_device *pdev)
 	}
 
 	if (p->info.has_inen) {
-		pm_runtime_get_sync(p->dev);
+		pm_runtime_get_sync(dev);
 		gpio_rcar_enable_inputs(p);
-		pm_runtime_put(p->dev);
+		pm_runtime_put(dev);
 	}
 
 	dev_info(dev, "driving %d GPIOs\n", npins);
diff --git a/drivers/gpio/gpio-tegra186.c b/drivers/gpio/gpio-tegra186.c
index 05c90d76cb22..c99858f40a27 100644
--- a/drivers/gpio/gpio-tegra186.c
+++ b/drivers/gpio/gpio-tegra186.c
@@ -607,15 +607,21 @@ static int tegra186_gpio_probe(struct platform_device *pdev)
 	if (!gpio)
 		return -ENOMEM;
 
-	gpio->soc = of_device_get_match_data(&pdev->dev);
+	gpio->soc = device_get_match_data(&pdev->dev);
 
 	gpio->secure = devm_platform_ioremap_resource_byname(pdev, "security");
-	if (IS_ERR(gpio->secure))
-		return PTR_ERR(gpio->secure);
+	if (IS_ERR(gpio->secure)) {
+		gpio->secure = devm_platform_ioremap_resource(pdev, 0);
+		if (IS_ERR(gpio->secure))
+			return PTR_ERR(gpio->secure);
+	}
 
 	gpio->base = devm_platform_ioremap_resource_byname(pdev, "gpio");
-	if (IS_ERR(gpio->base))
-		return PTR_ERR(gpio->base);
+	if (IS_ERR(gpio->base)) {
+		gpio->base = devm_platform_ioremap_resource(pdev, 1);
+		if (IS_ERR(gpio->base))
+			return PTR_ERR(gpio->base);
+	}
 
 	err = platform_irq_count(pdev);
 	if (err < 0)
@@ -677,11 +683,13 @@ static int tegra186_gpio_probe(struct platform_device *pdev)
 
 	gpio->gpio.names = (const char * const *)names;
 
+#if defined(CONFIG_OF_GPIO)
 	gpio->gpio.of_node = pdev->dev.of_node;
 	gpio->gpio.of_gpio_n_cells = 2;
 	gpio->gpio.of_xlate = tegra186_gpio_of_xlate;
+#endif /* CONFIG_OF_GPIO */
 
-	gpio->intc.name = pdev->dev.of_node->name;
+	gpio->intc.name = dev_name(&pdev->dev);
 	gpio->intc.irq_ack = tegra186_irq_ack;
 	gpio->intc.irq_mask = tegra186_irq_mask;
 	gpio->intc.irq_unmask = tegra186_irq_unmask;
@@ -893,10 +901,20 @@ static const struct of_device_id tegra186_gpio_of_match[] = {
 };
 MODULE_DEVICE_TABLE(of, tegra186_gpio_of_match);
 
+static const struct acpi_device_id  tegra186_gpio_acpi_match[] = {
+	{ .id = "NVDA0108", .driver_data = (kernel_ulong_t)&tegra186_main_soc },
+	{ .id = "NVDA0208", .driver_data = (kernel_ulong_t)&tegra186_aon_soc },
+	{ .id = "NVDA0308", .driver_data = (kernel_ulong_t)&tegra194_main_soc },
+	{ .id = "NVDA0408", .driver_data = (kernel_ulong_t)&tegra194_aon_soc },
+	{}
+};
+MODULE_DEVICE_TABLE(acpi, tegra186_gpio_acpi_match);
+
 static struct platform_driver tegra186_gpio_driver = {
 	.driver = {
 		.name = "tegra186-gpio",
 		.of_match_table = tegra186_gpio_of_match,
+		.acpi_match_table = tegra186_gpio_acpi_match,
 	},
 	.probe = tegra186_gpio_probe,
 };
diff --git a/drivers/gpio/gpio-viperboard.c b/drivers/gpio/gpio-viperboard.c
index c301c1d56dd2..e55d28a8a66f 100644
--- a/drivers/gpio/gpio-viperboard.c
+++ b/drivers/gpio/gpio-viperboard.c
@@ -404,11 +404,10 @@ static int vprbrd_gpio_probe(struct platform_device *pdev)
 	vb_gpio->gpioa.get = vprbrd_gpioa_get;
 	vb_gpio->gpioa.direction_input = vprbrd_gpioa_direction_input;
 	vb_gpio->gpioa.direction_output = vprbrd_gpioa_direction_output;
+
 	ret = devm_gpiochip_add_data(&pdev->dev, &vb_gpio->gpioa, vb_gpio);
-	if (ret < 0) {
-		dev_err(vb_gpio->gpioa.parent, "could not add gpio a");
+	if (ret < 0)
 		return ret;
-	}
 
 	/* registering gpio b */
 	vb_gpio->gpiob.label = "viperboard gpio b";
@@ -421,15 +420,8 @@ static int vprbrd_gpio_probe(struct platform_device *pdev)
 	vb_gpio->gpiob.get = vprbrd_gpiob_get;
 	vb_gpio->gpiob.direction_input = vprbrd_gpiob_direction_input;
 	vb_gpio->gpiob.direction_output = vprbrd_gpiob_direction_output;
-	ret = devm_gpiochip_add_data(&pdev->dev, &vb_gpio->gpiob, vb_gpio);
-	if (ret < 0) {
-		dev_err(vb_gpio->gpiob.parent, "could not add gpio b");
-		return ret;
-	}
-
-	platform_set_drvdata(pdev, vb_gpio);
 
-	return ret;
+	return devm_gpiochip_add_data(&pdev->dev, &vb_gpio->gpiob, vb_gpio);
 }
 
 static struct platform_driver vprbrd_gpio_driver = {
diff --git a/drivers/gpio/gpio-virtio.c b/drivers/gpio/gpio-virtio.c
new file mode 100644
index 000000000000..d24f1c9264bc
--- /dev/null
+++ b/drivers/gpio/gpio-virtio.c
@@ -0,0 +1,374 @@
+// SPDX-License-Identifier: GPL-2.0+
+/*
+ * GPIO driver for virtio-based virtual GPIO controllers
+ *
+ * Copyright (C) 2021 metux IT consult
+ * Enrico Weigelt, metux IT consult <info@metux.net>
+ *
+ * Copyright (C) 2021 Linaro.
+ * Viresh Kumar <viresh.kumar@linaro.org>
+ */
+
+#include <linux/completion.h>
+#include <linux/err.h>
+#include <linux/gpio/driver.h>
+#include <linux/io.h>
+#include <linux/kernel.h>
+#include <linux/module.h>
+#include <linux/mutex.h>
+#include <linux/virtio_config.h>
+#include <uapi/linux/virtio_gpio.h>
+#include <uapi/linux/virtio_ids.h>
+
+struct virtio_gpio_line {
+	struct mutex lock; /* Protects line operation */
+	struct completion completion;
+	struct virtio_gpio_request req ____cacheline_aligned;
+	struct virtio_gpio_response res ____cacheline_aligned;
+	unsigned int rxlen;
+};
+
+struct virtio_gpio {
+	struct virtio_device *vdev;
+	struct mutex lock; /* Protects virtqueue operation */
+	struct gpio_chip gc;
+	struct virtio_gpio_line *lines;
+	struct virtqueue *request_vq;
+};
+
+static int _virtio_gpio_req(struct virtio_gpio *vgpio, u16 type, u16 gpio,
+			    u8 txvalue, u8 *rxvalue, void *response, u32 rxlen)
+{
+	struct virtio_gpio_line *line = &vgpio->lines[gpio];
+	struct virtio_gpio_request *req = &line->req;
+	struct virtio_gpio_response *res = response;
+	struct scatterlist *sgs[2], req_sg, res_sg;
+	struct device *dev = &vgpio->vdev->dev;
+	int ret;
+
+	/*
+	 * Prevent concurrent requests for the same line since we have
+	 * pre-allocated request/response buffers for each GPIO line. Moreover
+	 * Linux always accesses a GPIO line sequentially, so this locking shall
+	 * always go through without any delays.
+	 */
+	mutex_lock(&line->lock);
+
+	req->type = cpu_to_le16(type);
+	req->gpio = cpu_to_le16(gpio);
+	req->value = cpu_to_le32(txvalue);
+
+	sg_init_one(&req_sg, req, sizeof(*req));
+	sg_init_one(&res_sg, res, rxlen);
+	sgs[0] = &req_sg;
+	sgs[1] = &res_sg;
+
+	line->rxlen = 0;
+	reinit_completion(&line->completion);
+
+	/*
+	 * Virtqueue callers need to ensure they don't call its APIs with other
+	 * virtqueue operations at the same time.
+	 */
+	mutex_lock(&vgpio->lock);
+	ret = virtqueue_add_sgs(vgpio->request_vq, sgs, 1, 1, line, GFP_KERNEL);
+	if (ret) {
+		dev_err(dev, "failed to add request to vq\n");
+		mutex_unlock(&vgpio->lock);
+		goto out;
+	}
+
+	virtqueue_kick(vgpio->request_vq);
+	mutex_unlock(&vgpio->lock);
+
+	if (!wait_for_completion_timeout(&line->completion, HZ)) {
+		dev_err(dev, "GPIO operation timed out\n");
+		ret = -ETIMEDOUT;
+		goto out;
+	}
+
+	if (unlikely(res->status != VIRTIO_GPIO_STATUS_OK)) {
+		dev_err(dev, "GPIO request failed: %d\n", gpio);
+		ret = -EINVAL;
+		goto out;
+	}
+
+	if (unlikely(line->rxlen != rxlen)) {
+		dev_err(dev, "GPIO operation returned incorrect len (%u : %u)\n",
+			rxlen, line->rxlen);
+		ret = -EINVAL;
+		goto out;
+	}
+
+	if (rxvalue)
+		*rxvalue = res->value;
+
+out:
+	mutex_unlock(&line->lock);
+	return ret;
+}
+
+static int virtio_gpio_req(struct virtio_gpio *vgpio, u16 type, u16 gpio,
+			   u8 txvalue, u8 *rxvalue)
+{
+	struct virtio_gpio_line *line = &vgpio->lines[gpio];
+	struct virtio_gpio_response *res = &line->res;
+
+	return _virtio_gpio_req(vgpio, type, gpio, txvalue, rxvalue, res,
+				sizeof(*res));
+}
+
+static void virtio_gpio_free(struct gpio_chip *gc, unsigned int gpio)
+{
+	struct virtio_gpio *vgpio = gpiochip_get_data(gc);
+
+	virtio_gpio_req(vgpio, VIRTIO_GPIO_MSG_SET_DIRECTION, gpio,
+			VIRTIO_GPIO_DIRECTION_NONE, NULL);
+}
+
+static int virtio_gpio_get_direction(struct gpio_chip *gc, unsigned int gpio)
+{
+	struct virtio_gpio *vgpio = gpiochip_get_data(gc);
+	u8 direction;
+	int ret;
+
+	ret = virtio_gpio_req(vgpio, VIRTIO_GPIO_MSG_GET_DIRECTION, gpio, 0,
+			      &direction);
+	if (ret)
+		return ret;
+
+	switch (direction) {
+	case VIRTIO_GPIO_DIRECTION_IN:
+		return GPIO_LINE_DIRECTION_IN;
+	case VIRTIO_GPIO_DIRECTION_OUT:
+		return GPIO_LINE_DIRECTION_OUT;
+	default:
+		return -EINVAL;
+	}
+}
+
+static int virtio_gpio_direction_input(struct gpio_chip *gc, unsigned int gpio)
+{
+	struct virtio_gpio *vgpio = gpiochip_get_data(gc);
+
+	return virtio_gpio_req(vgpio, VIRTIO_GPIO_MSG_SET_DIRECTION, gpio,
+			       VIRTIO_GPIO_DIRECTION_IN, NULL);
+}
+
+static int virtio_gpio_direction_output(struct gpio_chip *gc, unsigned int gpio,
+					int value)
+{
+	struct virtio_gpio *vgpio = gpiochip_get_data(gc);
+	int ret;
+
+	ret = virtio_gpio_req(vgpio, VIRTIO_GPIO_MSG_SET_VALUE, gpio, value, NULL);
+	if (ret)
+		return ret;
+
+	return virtio_gpio_req(vgpio, VIRTIO_GPIO_MSG_SET_DIRECTION, gpio,
+			       VIRTIO_GPIO_DIRECTION_OUT, NULL);
+}
+
+static int virtio_gpio_get(struct gpio_chip *gc, unsigned int gpio)
+{
+	struct virtio_gpio *vgpio = gpiochip_get_data(gc);
+	u8 value;
+	int ret;
+
+	ret = virtio_gpio_req(vgpio, VIRTIO_GPIO_MSG_GET_VALUE, gpio, 0, &value);
+	return ret ? ret : value;
+}
+
+static void virtio_gpio_set(struct gpio_chip *gc, unsigned int gpio, int value)
+{
+	struct virtio_gpio *vgpio = gpiochip_get_data(gc);
+
+	virtio_gpio_req(vgpio, VIRTIO_GPIO_MSG_SET_VALUE, gpio, value, NULL);
+}
+
+static void virtio_gpio_request_vq(struct virtqueue *vq)
+{
+	struct virtio_gpio_line *line;
+	unsigned int len;
+
+	do {
+		line = virtqueue_get_buf(vq, &len);
+		if (!line)
+			return;
+
+		line->rxlen = len;
+		complete(&line->completion);
+	} while (1);
+}
+
+static void virtio_gpio_free_vqs(struct virtio_device *vdev)
+{
+	vdev->config->reset(vdev);
+	vdev->config->del_vqs(vdev);
+}
+
+static int virtio_gpio_alloc_vqs(struct virtio_gpio *vgpio,
+				 struct virtio_device *vdev)
+{
+	const char * const names[] = { "requestq" };
+	vq_callback_t *cbs[] = {
+		virtio_gpio_request_vq,
+	};
+	struct virtqueue *vqs[1] = { NULL };
+	int ret;
+
+	ret = virtio_find_vqs(vdev, 1, vqs, cbs, names, NULL);
+	if (ret) {
+		dev_err(&vdev->dev, "failed to find vqs: %d\n", ret);
+		return ret;
+	}
+
+	if (!vqs[0]) {
+		dev_err(&vdev->dev, "failed to find requestq vq\n");
+		return -ENODEV;
+	}
+	vgpio->request_vq = vqs[0];
+
+	return 0;
+}
+
+static const char **virtio_gpio_get_names(struct virtio_gpio *vgpio,
+					  u32 gpio_names_size, u16 ngpio)
+{
+	struct virtio_gpio_response_get_names *res;
+	struct device *dev = &vgpio->vdev->dev;
+	u8 *gpio_names, *str;
+	const char **names;
+	int i, ret, len;
+
+	if (!gpio_names_size)
+		return NULL;
+
+	len = sizeof(*res) + gpio_names_size;
+	res = devm_kzalloc(dev, len, GFP_KERNEL);
+	if (!res)
+		return NULL;
+	gpio_names = res->value;
+
+	ret = _virtio_gpio_req(vgpio, VIRTIO_GPIO_MSG_GET_NAMES, 0, 0, NULL,
+			       res, len);
+	if (ret) {
+		dev_err(dev, "Failed to get GPIO names: %d\n", ret);
+		return NULL;
+	}
+
+	names = devm_kcalloc(dev, ngpio, sizeof(*names), GFP_KERNEL);
+	if (!names)
+		return NULL;
+
+	/* NULL terminate the string instead of checking it */
+	gpio_names[gpio_names_size - 1] = '\0';
+
+	for (i = 0, str = gpio_names; i < ngpio; i++) {
+		names[i] = str;
+		str += strlen(str) + 1; /* zero-length strings are allowed */
+
+		if (str > gpio_names + gpio_names_size) {
+			dev_err(dev, "gpio_names block is too short (%d)\n", i);
+			return NULL;
+		}
+	}
+
+	return names;
+}
+
+static int virtio_gpio_probe(struct virtio_device *vdev)
+{
+	struct virtio_gpio_config config;
+	struct device *dev = &vdev->dev;
+	struct virtio_gpio *vgpio;
+	u32 gpio_names_size;
+	u16 ngpio;
+	int ret, i;
+
+	vgpio = devm_kzalloc(dev, sizeof(*vgpio), GFP_KERNEL);
+	if (!vgpio)
+		return -ENOMEM;
+
+	/* Read configuration */
+	virtio_cread_bytes(vdev, 0, &config, sizeof(config));
+	gpio_names_size = le32_to_cpu(config.gpio_names_size);
+	ngpio = le16_to_cpu(config.ngpio);
+	if (!ngpio) {
+		dev_err(dev, "Number of GPIOs can't be zero\n");
+		return -EINVAL;
+	}
+
+	vgpio->lines = devm_kcalloc(dev, ngpio, sizeof(*vgpio->lines), GFP_KERNEL);
+	if (!vgpio->lines)
+		return -ENOMEM;
+
+	for (i = 0; i < ngpio; i++) {
+		mutex_init(&vgpio->lines[i].lock);
+		init_completion(&vgpio->lines[i].completion);
+	}
+
+	mutex_init(&vgpio->lock);
+	vdev->priv = vgpio;
+
+	vgpio->vdev			= vdev;
+	vgpio->gc.free			= virtio_gpio_free;
+	vgpio->gc.get_direction		= virtio_gpio_get_direction;
+	vgpio->gc.direction_input	= virtio_gpio_direction_input;
+	vgpio->gc.direction_output	= virtio_gpio_direction_output;
+	vgpio->gc.get			= virtio_gpio_get;
+	vgpio->gc.set			= virtio_gpio_set;
+	vgpio->gc.ngpio			= ngpio;
+	vgpio->gc.base			= -1; /* Allocate base dynamically */
+	vgpio->gc.label			= dev_name(dev);
+	vgpio->gc.parent		= dev;
+	vgpio->gc.owner			= THIS_MODULE;
+	vgpio->gc.can_sleep		= true;
+
+	ret = virtio_gpio_alloc_vqs(vgpio, vdev);
+	if (ret)
+		return ret;
+
+	/* Mark the device ready to perform operations from within probe() */
+	virtio_device_ready(vdev);
+
+	vgpio->gc.names = virtio_gpio_get_names(vgpio, gpio_names_size, ngpio);
+
+	ret = gpiochip_add_data(&vgpio->gc, vgpio);
+	if (ret) {
+		virtio_gpio_free_vqs(vdev);
+		dev_err(dev, "Failed to add virtio-gpio controller\n");
+	}
+
+	return ret;
+}
+
+static void virtio_gpio_remove(struct virtio_device *vdev)
+{
+	struct virtio_gpio *vgpio = vdev->priv;
+
+	gpiochip_remove(&vgpio->gc);
+	virtio_gpio_free_vqs(vdev);
+}
+
+static const struct virtio_device_id id_table[] = {
+	{ VIRTIO_ID_GPIO, VIRTIO_DEV_ANY_ID },
+	{},
+};
+MODULE_DEVICE_TABLE(virtio, id_table);
+
+static struct virtio_driver virtio_gpio_driver = {
+	.id_table		= id_table,
+	.probe			= virtio_gpio_probe,
+	.remove			= virtio_gpio_remove,
+	.driver			= {
+		.name		= KBUILD_MODNAME,
+		.owner		= THIS_MODULE,
+	},
+};
+module_virtio_driver(virtio_gpio_driver);
+
+MODULE_AUTHOR("Enrico Weigelt, metux IT consult <info@metux.net>");
+MODULE_AUTHOR("Viresh Kumar <viresh.kumar@linaro.org>");
+MODULE_DESCRIPTION("VirtIO GPIO driver");
+MODULE_LICENSE("GPL");
diff --git a/drivers/gpio/gpiolib-devres.c b/drivers/gpio/gpiolib-devres.c
index 4a517e5dedf0..79da85d17b71 100644
--- a/drivers/gpio/gpiolib-devres.c
+++ b/drivers/gpio/gpiolib-devres.c
@@ -145,7 +145,7 @@ EXPORT_SYMBOL_GPL(devm_gpiod_get_index);
  * In case of error an ERR_PTR() is returned.
  */
 struct gpio_desc *devm_gpiod_get_from_of_node(struct device *dev,
-					      struct device_node *node,
+					      const struct device_node *node,
 					      const char *propname, int index,
 					      enum gpiod_flags dflags,
 					      const char *label)
diff --git a/drivers/gpio/gpiolib-of.c b/drivers/gpio/gpiolib-of.c
index bbcc7c073f63..0ad288ab6262 100644
--- a/drivers/gpio/gpiolib-of.c
+++ b/drivers/gpio/gpiolib-of.c
@@ -122,7 +122,7 @@ static struct gpio_desc *of_xlate_and_get_gpiod_flags(struct gpio_chip *chip,
 bool of_gpio_need_valid_mask(const struct gpio_chip *gc)
 {
 	int size;
-	struct device_node *np = gc->of_node;
+	const struct device_node *np = gc->of_node;
 
 	size = of_property_count_u32_elems(np,  "gpio-reserved-ranges");
 	if (size > 0 && size % 2 == 0)
@@ -130,7 +130,7 @@ bool of_gpio_need_valid_mask(const struct gpio_chip *gc)
 	return false;
 }
 
-static void of_gpio_flags_quirks(struct device_node *np,
+static void of_gpio_flags_quirks(const struct device_node *np,
 				 const char *propname,
 				 enum of_gpio_flags *flags,
 				 int index)
@@ -236,7 +236,7 @@ static void of_gpio_flags_quirks(struct device_node *np,
  * value on the error condition. If @flags is not NULL the function also fills
  * in flags for the GPIO.
  */
-static struct gpio_desc *of_get_named_gpiod_flags(struct device_node *np,
+static struct gpio_desc *of_get_named_gpiod_flags(const struct device_node *np,
 		     const char *propname, int index, enum of_gpio_flags *flags)
 {
 	struct of_phandle_args gpiospec;
@@ -275,7 +275,7 @@ out:
 	return desc;
 }
 
-int of_get_named_gpio_flags(struct device_node *np, const char *list_name,
+int of_get_named_gpio_flags(const struct device_node *np, const char *list_name,
 			    int index, enum of_gpio_flags *flags)
 {
 	struct gpio_desc *desc;
@@ -303,7 +303,7 @@ EXPORT_SYMBOL_GPL(of_get_named_gpio_flags);
  *
  * In case of error an ERR_PTR() is returned.
  */
-struct gpio_desc *gpiod_get_from_of_node(struct device_node *node,
+struct gpio_desc *gpiod_get_from_of_node(const struct device_node *node,
 					 const char *propname, int index,
 					 enum gpiod_flags dflags,
 					 const char *label)
@@ -373,7 +373,7 @@ static struct gpio_desc *of_find_spi_gpio(struct device *dev, const char *con_id
 					  enum of_gpio_flags *of_flags)
 {
 	char prop_name[32]; /* 32 is max size of property name */
-	struct device_node *np = dev->of_node;
+	const struct device_node *np = dev->of_node;
 	struct gpio_desc *desc;
 
 	/*
@@ -404,7 +404,7 @@ static struct gpio_desc *of_find_spi_cs_gpio(struct device *dev,
 					     unsigned int idx,
 					     unsigned long *flags)
 {
-	struct device_node *np = dev->of_node;
+	const struct device_node *np = dev->of_node;
 
 	if (!IS_ENABLED(CONFIG_SPI_MASTER))
 		return ERR_PTR(-ENOENT);
@@ -440,7 +440,7 @@ static struct gpio_desc *of_find_regulator_gpio(struct device *dev, const char *
 		"wlf,ldo1ena", /* WM8994 */
 		"wlf,ldo2ena", /* WM8994 */
 	};
-	struct device_node *np = dev->of_node;
+	const struct device_node *np = dev->of_node;
 	struct gpio_desc *desc;
 	int i;
 
diff --git a/drivers/gpio/gpiolib.c b/drivers/gpio/gpiolib.c
index 27c07108496d..d1b9b721218f 100644
--- a/drivers/gpio/gpiolib.c
+++ b/drivers/gpio/gpiolib.c
@@ -382,10 +382,18 @@ static int devprop_gpiochip_set_names(struct gpio_chip *chip)
 	if (count < 0)
 		return 0;
 
-	if (count > gdev->ngpio) {
-		dev_warn(&gdev->dev, "gpio-line-names is length %d but should be at most length %d",
-			 count, gdev->ngpio);
-		count = gdev->ngpio;
+	/*
+	 * When offset is set in the driver side we assume the driver internally
+	 * is using more than one gpiochip per the same device. We have to stop
+	 * setting friendly names if the specified ones with 'gpio-line-names'
+	 * are less than the offset in the device itself. This means all the
+	 * lines are not present for every single pin within all the internal
+	 * gpiochips.
+	 */
+	if (count <= chip->offset) {
+		dev_warn(&gdev->dev, "gpio-line-names too short (length %d), cannot map names for the gpiochip at offset %u\n",
+			 count, chip->offset);
+		return 0;
 	}
 
 	names = kcalloc(count, sizeof(*names), GFP_KERNEL);
@@ -400,8 +408,22 @@ static int devprop_gpiochip_set_names(struct gpio_chip *chip)
 		return ret;
 	}
 
+	/*
+	 * When more that one gpiochip per device is used, 'count' can
+	 * contain at most number gpiochips x chip->ngpio. We have to
+	 * correctly distribute all defined lines taking into account
+	 * chip->offset as starting point from where we will assign
+	 * the names to pins from the 'names' array. Since property
+	 * 'gpio-line-names' cannot contains gaps, we have to be sure
+	 * we only assign those pins that really exists since chip->ngpio
+	 * can be different of the chip->offset.
+	 */
+	count = (count > chip->offset) ? count - chip->offset : count;
+	if (count > chip->ngpio)
+		count = chip->ngpio;
+
 	for (i = 0; i < count; i++)
-		gdev->descs[i].name = names[i];
+		gdev->descs[i].name = names[chip->offset + i];
 
 	kfree(names);
 
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_atomfirmware.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_atomfirmware.c
index 8f53837d4d3e..97178b307ed6 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_atomfirmware.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_atomfirmware.c
@@ -468,14 +468,18 @@ bool amdgpu_atomfirmware_dynamic_boot_config_supported(struct amdgpu_device *ade
 	return (fw_cap & ATOM_FIRMWARE_CAP_DYNAMIC_BOOT_CFG_ENABLE) ? true : false;
 }
 
-/*
- * Helper function to query RAS EEPROM address
- *
- * @adev: amdgpu_device pointer
+/**
+ * amdgpu_atomfirmware_ras_rom_addr -- Get the RAS EEPROM addr from VBIOS
+ * adev: amdgpu_device pointer
+ * i2c_address: pointer to u8; if not NULL, will contain
+ *    the RAS EEPROM address if the function returns true
  *
- * Return true if vbios supports ras rom address reporting
+ * Return true if VBIOS supports RAS EEPROM address reporting,
+ * else return false. If true and @i2c_address is not NULL,
+ * will contain the RAS ROM address.
  */
-bool amdgpu_atomfirmware_ras_rom_addr(struct amdgpu_device *adev, uint8_t* i2c_address)
+bool amdgpu_atomfirmware_ras_rom_addr(struct amdgpu_device *adev,
+				      u8 *i2c_address)
 {
 	struct amdgpu_mode_info *mode_info = &adev->mode_info;
 	int index;
@@ -483,27 +487,39 @@ bool amdgpu_atomfirmware_ras_rom_addr(struct amdgpu_device *adev, uint8_t* i2c_a
 	union firmware_info *firmware_info;
 	u8 frev, crev;
 
-	if (i2c_address == NULL)
-		return false;
-
-	*i2c_address = 0;
-
 	index = get_index_into_master_table(atom_master_list_of_data_tables_v2_1,
-			firmwareinfo);
+					    firmwareinfo);
 
 	if (amdgpu_atom_parse_data_header(adev->mode_info.atom_context,
-				index, &size, &frev, &crev, &data_offset)) {
+					  index, &size, &frev, &crev,
+					  &data_offset)) {
 		/* support firmware_info 3.4 + */
 		if ((frev == 3 && crev >=4) || (frev > 3)) {
 			firmware_info = (union firmware_info *)
 				(mode_info->atom_context->bios + data_offset);
-			*i2c_address = firmware_info->v34.ras_rom_i2c_slave_addr;
+			/* The ras_rom_i2c_slave_addr should ideally
+			 * be a 19-bit EEPROM address, which would be
+			 * used as is by the driver; see top of
+			 * amdgpu_eeprom.c.
+			 *
+			 * When this is the case, 0 is of course a
+			 * valid RAS EEPROM address, in which case,
+			 * we'll drop the first "if (firm...)" and only
+			 * leave the check for the pointer.
+			 *
+			 * The reason this works right now is because
+			 * ras_rom_i2c_slave_addr contains the EEPROM
+			 * device type qualifier 1010b in the top 4
+			 * bits.
+			 */
+			if (firmware_info->v34.ras_rom_i2c_slave_addr) {
+				if (i2c_address)
+					*i2c_address = firmware_info->v34.ras_rom_i2c_slave_addr;
+				return true;
+			}
 		}
 	}
 
-	if (*i2c_address != 0)
-		return true;
-
 	return false;
 }
 
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_display.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_display.c
index 8e5a7ac8c36f..7a7316731911 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_display.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_display.c
@@ -522,6 +522,7 @@ uint32_t amdgpu_display_supported_domains(struct amdgpu_device *adev,
 			break;
 		case CHIP_RENOIR:
 		case CHIP_VANGOGH:
+		case CHIP_YELLOW_CARP:
 			domain |= AMDGPU_GEM_DOMAIN_GTT;
 			break;
 
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c
index b6640291f980..f18240f87387 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c
@@ -1181,7 +1181,12 @@ static const struct pci_device_id pciidlist[] = {
 	{0x1002, 0x73A1, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_SIENNA_CICHLID},
 	{0x1002, 0x73A2, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_SIENNA_CICHLID},
 	{0x1002, 0x73A3, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_SIENNA_CICHLID},
+	{0x1002, 0x73A5, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_SIENNA_CICHLID},
+	{0x1002, 0x73A8, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_SIENNA_CICHLID},
+	{0x1002, 0x73A9, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_SIENNA_CICHLID},
 	{0x1002, 0x73AB, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_SIENNA_CICHLID},
+	{0x1002, 0x73AC, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_SIENNA_CICHLID},
+	{0x1002, 0x73AD, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_SIENNA_CICHLID},
 	{0x1002, 0x73AE, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_SIENNA_CICHLID},
 	{0x1002, 0x73AF, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_SIENNA_CICHLID},
 	{0x1002, 0x73BF, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_SIENNA_CICHLID},
@@ -1197,6 +1202,11 @@ static const struct pci_device_id pciidlist[] = {
 	{0x1002, 0x73C0, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_NAVY_FLOUNDER},
 	{0x1002, 0x73C1, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_NAVY_FLOUNDER},
 	{0x1002, 0x73C3, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_NAVY_FLOUNDER},
+	{0x1002, 0x73DA, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_NAVY_FLOUNDER},
+	{0x1002, 0x73DB, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_NAVY_FLOUNDER},
+	{0x1002, 0x73DC, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_NAVY_FLOUNDER},
+	{0x1002, 0x73DD, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_NAVY_FLOUNDER},
+	{0x1002, 0x73DE, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_NAVY_FLOUNDER},
 	{0x1002, 0x73DF, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_NAVY_FLOUNDER},
 
 	/* DIMGREY_CAVEFISH */
@@ -1204,6 +1214,13 @@ static const struct pci_device_id pciidlist[] = {
 	{0x1002, 0x73E1, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_DIMGREY_CAVEFISH},
 	{0x1002, 0x73E2, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_DIMGREY_CAVEFISH},
 	{0x1002, 0x73E3, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_DIMGREY_CAVEFISH},
+	{0x1002, 0x73E8, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_DIMGREY_CAVEFISH},
+	{0x1002, 0x73E9, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_DIMGREY_CAVEFISH},
+	{0x1002, 0x73EA, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_DIMGREY_CAVEFISH},
+	{0x1002, 0x73EB, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_DIMGREY_CAVEFISH},
+	{0x1002, 0x73EC, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_DIMGREY_CAVEFISH},
+	{0x1002, 0x73ED, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_DIMGREY_CAVEFISH},
+	{0x1002, 0x73EF, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_DIMGREY_CAVEFISH},
 	{0x1002, 0x73FF, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_DIMGREY_CAVEFISH},
 
 	/* Aldebaran */
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_fdinfo.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_fdinfo.c
index d94c5419ec25..5a6857c44bb6 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_fdinfo.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_fdinfo.c
@@ -59,6 +59,7 @@ void amdgpu_show_fdinfo(struct seq_file *m, struct file *f)
 	uint64_t vram_mem = 0, gtt_mem = 0, cpu_mem = 0;
 	struct drm_file *file = f->private_data;
 	struct amdgpu_device *adev = drm_to_adev(file->minor->dev);
+	struct amdgpu_bo *root;
 	int ret;
 
 	ret = amdgpu_file_to_fpriv(f, &fpriv);
@@ -69,13 +70,19 @@ void amdgpu_show_fdinfo(struct seq_file *m, struct file *f)
 	dev = PCI_SLOT(adev->pdev->devfn);
 	fn = PCI_FUNC(adev->pdev->devfn);
 
-	ret = amdgpu_bo_reserve(fpriv->vm.root.bo, false);
+	root = amdgpu_bo_ref(fpriv->vm.root.bo);
+	if (!root)
+		return;
+
+	ret = amdgpu_bo_reserve(root, false);
 	if (ret) {
 		DRM_ERROR("Fail to reserve bo\n");
 		return;
 	}
 	amdgpu_vm_get_memory(&fpriv->vm, &vram_mem, &gtt_mem, &cpu_mem);
-	amdgpu_bo_unreserve(fpriv->vm.root.bo);
+	amdgpu_bo_unreserve(root);
+	amdgpu_bo_unref(&root);
+
 	seq_printf(m, "pdev:\t%04x:%02x:%02x.%d\npasid:\t%u\n", domain, bus,
 			dev, fn, fpriv->vm.pasid);
 	seq_printf(m, "vram mem:\t%llu kB\n", vram_mem/1024UL);
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_fence.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_fence.c
index 14499f0de32d..8d682befe0d6 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_fence.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_fence.c
@@ -552,6 +552,9 @@ void amdgpu_fence_driver_hw_fini(struct amdgpu_device *adev)
 		if (!ring || !ring->fence_drv.initialized)
 			continue;
 
+		if (!ring->no_scheduler)
+			drm_sched_stop(&ring->sched, NULL);
+
 		/* You can't wait for HW to signal if it's gone */
 		if (!drm_dev_is_unplugged(&adev->ddev))
 			r = amdgpu_fence_wait_empty(ring);
@@ -611,6 +614,11 @@ void amdgpu_fence_driver_hw_init(struct amdgpu_device *adev)
 		if (!ring || !ring->fence_drv.initialized)
 			continue;
 
+		if (!ring->no_scheduler) {
+			drm_sched_resubmit_jobs(&ring->sched);
+			drm_sched_start(&ring->sched, true);
+		}
+
 		/* enable the interrupt */
 		if (ring->fence_drv.irq_src)
 			amdgpu_irq_get(adev, ring->fence_drv.irq_src,
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c
index cb07cc3b06ed..d6aa032890ee 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c
@@ -341,21 +341,18 @@ retry:
 	r = amdgpu_gem_object_create(adev, size, args->in.alignment,
 				     initial_domain,
 				     flags, ttm_bo_type_device, resv, &gobj);
-	if (r) {
-		if (r != -ERESTARTSYS) {
-			if (flags & AMDGPU_GEM_CREATE_CPU_ACCESS_REQUIRED) {
-				flags &= ~AMDGPU_GEM_CREATE_CPU_ACCESS_REQUIRED;
-				goto retry;
-			}
+	if (r && r != -ERESTARTSYS) {
+		if (flags & AMDGPU_GEM_CREATE_CPU_ACCESS_REQUIRED) {
+			flags &= ~AMDGPU_GEM_CREATE_CPU_ACCESS_REQUIRED;
+			goto retry;
+		}
 
-			if (initial_domain == AMDGPU_GEM_DOMAIN_VRAM) {
-				initial_domain |= AMDGPU_GEM_DOMAIN_GTT;
-				goto retry;
-			}
-			DRM_DEBUG("Failed to allocate GEM object (%llu, %d, %llu, %d)\n",
-				  size, initial_domain, args->in.alignment, r);
+		if (initial_domain == AMDGPU_GEM_DOMAIN_VRAM) {
+			initial_domain |= AMDGPU_GEM_DOMAIN_GTT;
+			goto retry;
 		}
-		return r;
+		DRM_DEBUG("Failed to allocate GEM object (%llu, %d, %llu, %d)\n",
+				size, initial_domain, args->in.alignment, r);
 	}
 
 	if (flags & AMDGPU_GEM_CREATE_VM_ALWAYS_VALID) {
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_gtt_mgr.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_gtt_mgr.c
index 543000304a1c..675a72ef305d 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gtt_mgr.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gtt_mgr.c
@@ -118,7 +118,7 @@ bool amdgpu_gtt_mgr_has_gart_addr(struct ttm_resource *res)
  * @man: TTM memory type manager
  * @tbo: TTM BO we need this range for
  * @place: placement flags and restrictions
- * @mem: the resulting mem object
+ * @res: the resulting mem object
  *
  * Dummy, allocate the node but no space for it yet.
  */
@@ -182,7 +182,7 @@ err_out:
  * amdgpu_gtt_mgr_del - free ranges
  *
  * @man: TTM memory type manager
- * @mem: TTM memory object
+ * @res: TTM memory object
  *
  * Free the allocated GTT again.
  */
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_psp.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_psp.c
index 23efdc672502..9b41cb8c3de5 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_psp.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_psp.c
@@ -469,10 +469,10 @@ psp_cmd_submit_buf(struct psp_context *psp,
 	 */
 	if (!skip_unsupport && (psp->cmd_buf_mem->resp.status || !timeout) && !ras_intr) {
 		if (ucode)
-			DRM_WARN("failed to load ucode (%s) ",
-				  amdgpu_ucode_name(ucode->ucode_id));
-		DRM_WARN("psp gfx command (%s) failed and response status is (0x%X)\n",
-			 psp_gfx_cmd_name(psp->cmd_buf_mem->cmd_id),
+			DRM_WARN("failed to load ucode %s(0x%X) ",
+				  amdgpu_ucode_name(ucode->ucode_id), ucode->ucode_id);
+		DRM_WARN("psp gfx command %s(0x%X) failed and response status is (0x%X)\n",
+			 psp_gfx_cmd_name(psp->cmd_buf_mem->cmd_id), psp->cmd_buf_mem->cmd_id,
 			 psp->cmd_buf_mem->resp.status);
 		if (!timeout) {
 			ret = -EINVAL;
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ras_eeprom.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ras_eeprom.c
index 9dc3b2d88176..dc44c946a244 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ras_eeprom.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ras_eeprom.c
@@ -114,27 +114,24 @@ static bool __get_eeprom_i2c_addr_arct(struct amdgpu_device *adev,
 static bool __get_eeprom_i2c_addr(struct amdgpu_device *adev,
 				  struct amdgpu_ras_eeprom_control *control)
 {
-	uint8_t ras_rom_i2c_slave_addr;
+	u8 i2c_addr;
 
 	if (!control)
 		return false;
 
-	control->i2c_address = 0;
-
-	if (amdgpu_atomfirmware_ras_rom_addr(adev, &ras_rom_i2c_slave_addr))
-	{
-		switch (ras_rom_i2c_slave_addr) {
-		case 0xA0:
-			control->i2c_address = 0;
-			return true;
-		case 0xA8:
-			control->i2c_address = 0x40000;
-			return true;
-		default:
-			dev_warn(adev->dev, "RAS EEPROM I2C slave address %02x not supported",
-				 ras_rom_i2c_slave_addr);
-			return false;
-		}
+	if (amdgpu_atomfirmware_ras_rom_addr(adev, &i2c_addr)) {
+		/* The address given by VBIOS is an 8-bit, wire-format
+		 * address, i.e. the most significant byte.
+		 *
+		 * Normalize it to a 19-bit EEPROM address. Remove the
+		 * device type identifier and make it a 7-bit address;
+		 * then make it a 19-bit EEPROM address. See top of
+		 * amdgpu_eeprom.c.
+		 */
+		i2c_addr = (i2c_addr & 0x0F) >> 1;
+		control->i2c_address = ((u32) i2c_addr) << 16;
+
+		return true;
 	}
 
 	switch (adev->asic_type) {
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vram_mgr.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_vram_mgr.c
index 2fd77c36a1ff..7b2b0980ec41 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vram_mgr.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vram_mgr.c
@@ -361,7 +361,7 @@ static void amdgpu_vram_mgr_virt_start(struct ttm_resource *mem,
  * @man: TTM memory type manager
  * @tbo: TTM BO we need this range for
  * @place: placement flags and restrictions
- * @mem: the resulting mem object
+ * @res: the resulting mem object
  *
  * Allocate VRAM for the given BO.
  */
@@ -487,7 +487,7 @@ error_sub:
  * amdgpu_vram_mgr_del - free ranges
  *
  * @man: TTM memory type manager
- * @mem: TTM memory object
+ * @res: TTM memory object
  *
  * Free the allocated VRAM again.
  */
@@ -522,7 +522,7 @@ static void amdgpu_vram_mgr_del(struct ttm_resource_manager *man,
  * amdgpu_vram_mgr_alloc_sgt - allocate and fill a sg table
  *
  * @adev: amdgpu device pointer
- * @mem: TTM memory object
+ * @res: TTM memory object
  * @offset: byte offset from the base of VRAM BO
  * @length: number of bytes to export in sg_table
  * @dev: the other device
diff --git a/drivers/gpu/drm/amd/amdgpu/mxgpu_ai.c b/drivers/gpu/drm/amd/amdgpu/mxgpu_ai.c
index ff2307d7ee0f..23b066bcffb2 100644
--- a/drivers/gpu/drm/amd/amdgpu/mxgpu_ai.c
+++ b/drivers/gpu/drm/amd/amdgpu/mxgpu_ai.c
@@ -258,6 +258,8 @@ static void xgpu_ai_mailbox_flr_work(struct work_struct *work)
 	amdgpu_virt_fini_data_exchange(adev);
 	atomic_set(&adev->in_gpu_reset, 1);
 
+	xgpu_ai_mailbox_trans_msg(adev, IDH_READY_TO_RESET, 0, 0, 0);
+
 	do {
 		if (xgpu_ai_mailbox_peek_msg(adev) == IDH_FLR_NOTIFICATION_CMPL)
 			goto flr_done;
diff --git a/drivers/gpu/drm/amd/amdgpu/mxgpu_ai.h b/drivers/gpu/drm/amd/amdgpu/mxgpu_ai.h
index 50572635d0f8..bd3b23171579 100644
--- a/drivers/gpu/drm/amd/amdgpu/mxgpu_ai.h
+++ b/drivers/gpu/drm/amd/amdgpu/mxgpu_ai.h
@@ -37,6 +37,7 @@ enum idh_request {
 	IDH_REQ_GPU_RESET_ACCESS,
 
 	IDH_LOG_VF_ERROR       = 200,
+	IDH_READY_TO_RESET 	= 201,
 };
 
 enum idh_event {
diff --git a/drivers/gpu/drm/amd/amdgpu/nbio_v7_4.c b/drivers/gpu/drm/amd/amdgpu/nbio_v7_4.c
index ba1d3ab869c1..f50045cebd44 100644
--- a/drivers/gpu/drm/amd/amdgpu/nbio_v7_4.c
+++ b/drivers/gpu/drm/amd/amdgpu/nbio_v7_4.c
@@ -85,11 +85,14 @@
 #define mmRCC_DEV0_EPF0_STRAP0_ALDE			0x0015
 #define mmRCC_DEV0_EPF0_STRAP0_ALDE_BASE_IDX		2
 
-#define mmBIF_DOORBELL_INT_CNTL_ALDE 			0x3878
+#define mmBIF_DOORBELL_INT_CNTL_ALDE 			0x00fe
 #define mmBIF_DOORBELL_INT_CNTL_ALDE_BASE_IDX 		2
 #define BIF_DOORBELL_INT_CNTL_ALDE__DOORBELL_INTERRUPT_DISABLE__SHIFT	0x18
 #define BIF_DOORBELL_INT_CNTL_ALDE__DOORBELL_INTERRUPT_DISABLE_MASK	0x01000000L
 
+#define mmBIF_INTR_CNTL_ALDE 				0x0101
+#define mmBIF_INTR_CNTL_ALDE_BASE_IDX 			2
+
 static void nbio_v7_4_query_ras_error_count(struct amdgpu_device *adev,
 					void *ras_error_status);
 
@@ -440,14 +443,23 @@ static int nbio_v7_4_set_ras_controller_irq_state(struct amdgpu_device *adev,
 	 */
 	uint32_t bif_intr_cntl;
 
-	bif_intr_cntl = RREG32_SOC15(NBIO, 0, mmBIF_INTR_CNTL);
+	if (adev->asic_type == CHIP_ALDEBARAN)
+		bif_intr_cntl = RREG32_SOC15(NBIO, 0, mmBIF_INTR_CNTL_ALDE);
+	else
+		bif_intr_cntl = RREG32_SOC15(NBIO, 0, mmBIF_INTR_CNTL);
+
 	if (state == AMDGPU_IRQ_STATE_ENABLE) {
 		/* set interrupt vector select bit to 0 to select
 		 * vetcor 1 for bare metal case */
 		bif_intr_cntl = REG_SET_FIELD(bif_intr_cntl,
 					      BIF_INTR_CNTL,
 					      RAS_INTR_VEC_SEL, 0);
-		WREG32_SOC15(NBIO, 0, mmBIF_INTR_CNTL, bif_intr_cntl);
+
+		if (adev->asic_type == CHIP_ALDEBARAN)
+			WREG32_SOC15(NBIO, 0, mmBIF_INTR_CNTL_ALDE, bif_intr_cntl);
+		else
+			WREG32_SOC15(NBIO, 0, mmBIF_INTR_CNTL, bif_intr_cntl);
+
 	}
 
 	return 0;
@@ -476,14 +488,22 @@ static int nbio_v7_4_set_ras_err_event_athub_irq_state(struct amdgpu_device *ade
 	 */
 	uint32_t bif_intr_cntl;
 
-	bif_intr_cntl = RREG32_SOC15(NBIO, 0, mmBIF_INTR_CNTL);
+	if (adev->asic_type == CHIP_ALDEBARAN)
+		bif_intr_cntl = RREG32_SOC15(NBIO, 0, mmBIF_INTR_CNTL_ALDE);
+	else
+		bif_intr_cntl = RREG32_SOC15(NBIO, 0, mmBIF_INTR_CNTL);
+
 	if (state == AMDGPU_IRQ_STATE_ENABLE) {
 		/* set interrupt vector select bit to 0 to select
 		 * vetcor 1 for bare metal case */
 		bif_intr_cntl = REG_SET_FIELD(bif_intr_cntl,
 					      BIF_INTR_CNTL,
 					      RAS_INTR_VEC_SEL, 0);
-		WREG32_SOC15(NBIO, 0, mmBIF_INTR_CNTL, bif_intr_cntl);
+
+		if (adev->asic_type == CHIP_ALDEBARAN)
+			WREG32_SOC15(NBIO, 0, mmBIF_INTR_CNTL_ALDE, bif_intr_cntl);
+		else
+			WREG32_SOC15(NBIO, 0, mmBIF_INTR_CNTL, bif_intr_cntl);
 	}
 
 	return 0;
diff --git a/drivers/gpu/drm/amd/amdgpu/vi.c b/drivers/gpu/drm/amd/amdgpu/vi.c
index 42a35d9520f9..fe9a7cc8d9eb 100644
--- a/drivers/gpu/drm/amd/amdgpu/vi.c
+++ b/drivers/gpu/drm/amd/amdgpu/vi.c
@@ -904,14 +904,7 @@ static bool vi_asic_supports_baco(struct amdgpu_device *adev)
 	case CHIP_POLARIS11:
 	case CHIP_POLARIS12:
 	case CHIP_TOPAZ:
-		/* Disable BACO support for the specific polaris12 SKU temporarily */
-		if ((adev->pdev->device == 0x699F) &&
-		     (adev->pdev->revision == 0xC7) &&
-		     (adev->pdev->subsystem_vendor == 0x1028) &&
-		     (adev->pdev->subsystem_device == 0x0039))
-			return false;
-		else
-			return amdgpu_dpm_is_baco_supported(adev);
+		return amdgpu_dpm_is_baco_supported(adev);
 	default:
 		return false;
 	}
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_svm.c b/drivers/gpu/drm/amd/amdkfd/kfd_svm.c
index 491373fcdb38..9fc8021bb0ab 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_svm.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_svm.c
@@ -2484,7 +2484,8 @@ svm_range_restore_pages(struct amdgpu_device *adev, unsigned int pasid,
 	}
 	if (!p->xnack_enabled) {
 		pr_debug("XNACK not enabled for pasid 0x%x\n", pasid);
-		return -EFAULT;
+		r = -EFAULT;
+		goto out;
 	}
 	svms = &p->svms;
 
diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c
index 816723691d51..9b1fc54555ee 100644
--- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c
+++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c
@@ -1200,7 +1200,7 @@ static int amdgpu_dm_init(struct amdgpu_device *adev)
 	dc_hardware_init(adev->dm.dc);
 
 #if defined(CONFIG_DRM_AMD_DC_DCN)
-	if (adev->apu_flags) {
+	if ((adev->flags & AMD_IS_APU) && (adev->asic_type >= CHIP_CARRIZO)) {
 		struct dc_phy_addr_space_config pa_config;
 
 		mmhub_read_system_context(adev, &pa_config);
diff --git a/drivers/gpu/drm/amd/display/dc/core/dc_link_dp.c b/drivers/gpu/drm/amd/display/dc/core/dc_link_dp.c
index cd025c12f17b..330edd666b7d 100644
--- a/drivers/gpu/drm/amd/display/dc/core/dc_link_dp.c
+++ b/drivers/gpu/drm/amd/display/dc/core/dc_link_dp.c
@@ -1561,7 +1561,7 @@ bool dc_link_dp_perform_link_training_skip_aux(
 	struct dc_link *link,
 	const struct dc_link_settings *link_setting)
 {
-	struct link_training_settings lt_settings;
+	struct link_training_settings lt_settings = {0};
 
 	dp_decide_training_settings(
 			link,
@@ -1707,7 +1707,7 @@ enum link_training_result dc_link_dp_perform_link_training(
 	bool skip_video_pattern)
 {
 	enum link_training_result status = LINK_TRAINING_SUCCESS;
-	struct link_training_settings lt_settings;
+	struct link_training_settings lt_settings = {0};
 	enum dp_link_encoding encoding =
 			dp_get_link_encoding_format(link_settings);
 
@@ -1923,7 +1923,7 @@ enum link_training_result dc_link_dp_sync_lt_attempt(
     struct dc_link_settings *link_settings,
     struct dc_link_training_overrides *lt_overrides)
 {
-	struct link_training_settings lt_settings;
+	struct link_training_settings lt_settings = {0};
 	enum link_training_result lt_status = LINK_TRAINING_SUCCESS;
 	enum dp_panel_mode panel_mode = DP_PANEL_MODE_DEFAULT;
 	enum clock_source_id dp_cs_id = CLOCK_SOURCE_ID_EXTERNAL;
diff --git a/drivers/gpu/drm/amd/display/dc/dc_helper.c b/drivers/gpu/drm/amd/display/dc/dc_helper.c
index a612ba6dc389..ab6bc5d79012 100644
--- a/drivers/gpu/drm/amd/display/dc/dc_helper.c
+++ b/drivers/gpu/drm/amd/display/dc/dc_helper.c
@@ -28,9 +28,9 @@
  */
 
 #include <linux/delay.h>
+#include <linux/stdarg.h>
 
 #include "dm_services.h"
-#include <stdarg.h>
 
 #include "dc.h"
 #include "dc_dmub_srv.h"
diff --git a/drivers/gpu/drm/amd/display/dc/dcn303/dcn303_resource.c b/drivers/gpu/drm/amd/display/dc/dcn303/dcn303_resource.c
index dc7823d23ba8..dd38796ba30a 100644
--- a/drivers/gpu/drm/amd/display/dc/dcn303/dcn303_resource.c
+++ b/drivers/gpu/drm/amd/display/dc/dcn303/dcn303_resource.c
@@ -510,8 +510,12 @@ static struct stream_encoder *dcn303_stream_encoder_create(enum engine_id eng_id
 	vpg = dcn303_vpg_create(ctx, vpg_inst);
 	afmt = dcn303_afmt_create(ctx, afmt_inst);
 
-	if (!enc1 || !vpg || !afmt)
+	if (!enc1 || !vpg || !afmt) {
+		kfree(enc1);
+		kfree(vpg);
+		kfree(afmt);
 		return NULL;
+	}
 
 	dcn30_dio_stream_encoder_construct(enc1, ctx, ctx->dc_bios, eng_id, vpg, afmt, &stream_enc_regs[eng_id],
 			&se_shift, &se_mask);
diff --git a/drivers/gpu/drm/amd/display/dc/dcn31/dcn31_panel_cntl.c b/drivers/gpu/drm/amd/display/dc/dcn31/dcn31_panel_cntl.c
index 7db268da6976..3b3721386571 100644
--- a/drivers/gpu/drm/amd/display/dc/dcn31/dcn31_panel_cntl.c
+++ b/drivers/gpu/drm/amd/display/dc/dcn31/dcn31_panel_cntl.c
@@ -109,7 +109,7 @@ bool dcn31_is_panel_backlight_on(struct panel_cntl *panel_cntl)
 	union dmub_rb_cmd cmd;
 
 	if (!dcn31_query_backlight_info(panel_cntl, &cmd))
-		return 0;
+		return false;
 
 	return cmd.panel_cntl.data.is_backlight_on;
 }
@@ -119,7 +119,7 @@ bool dcn31_is_panel_powered_on(struct panel_cntl *panel_cntl)
 	union dmub_rb_cmd cmd;
 
 	if (!dcn31_query_backlight_info(panel_cntl, &cmd))
-		return 0;
+		return false;
 
 	return cmd.panel_cntl.data.is_powered_on;
 }
diff --git a/drivers/gpu/drm/amd/display/dc/dml/dcn20/display_mode_vba_20v2.c b/drivers/gpu/drm/amd/display/dc/dml/dcn20/display_mode_vba_20v2.c
index fbed5304692d..63bbdf8b8678 100644
--- a/drivers/gpu/drm/amd/display/dc/dml/dcn20/display_mode_vba_20v2.c
+++ b/drivers/gpu/drm/amd/display/dc/dml/dcn20/display_mode_vba_20v2.c
@@ -2641,7 +2641,7 @@ static void dml20v2_DISPCLKDPPCLKDCFCLKDeepSleepPrefetchParametersWatermarksAndP
 		for (k = 0; k < mode_lib->vba.NumberOfActivePlanes; ++k) {
 			if (mode_lib->vba.PrefetchMode[mode_lib->vba.VoltageLevel][mode_lib->vba.maxMpcComb] == 0) {
 				if (mode_lib->vba.DRAMClockChangeWatermark >
-				dml_max(mode_lib->vba.StutterEnterPlusExitWatermark, mode_lib->vba.UrgentWatermark))
+					dml_max(mode_lib->vba.StutterEnterPlusExitWatermark, mode_lib->vba.UrgentWatermark))
 					mode_lib->vba.MinTTUVBlank[k] += 25;
 			}
 		}
diff --git a/drivers/gpu/drm/amd/display/dmub/inc/dmub_cmd.h b/drivers/gpu/drm/amd/display/dmub/inc/dmub_cmd.h
index 7b684e7f60df..7efe9ba8706e 100644
--- a/drivers/gpu/drm/amd/display/dmub/inc/dmub_cmd.h
+++ b/drivers/gpu/drm/amd/display/dmub/inc/dmub_cmd.h
@@ -39,7 +39,6 @@
 #include <linux/types.h>
 #include <linux/string.h>
 #include <linux/delay.h>
-#include <stdarg.h>
 
 #include "atomfirmware.h"
 
diff --git a/drivers/gpu/drm/amd/pm/amdgpu_pm.c b/drivers/gpu/drm/amd/pm/amdgpu_pm.c
index 2d55627b05b1..249cb0aeb5ae 100644
--- a/drivers/gpu/drm/amd/pm/amdgpu_pm.c
+++ b/drivers/gpu/drm/amd/pm/amdgpu_pm.c
@@ -2005,10 +2005,10 @@ static int ss_bias_attr_update(struct amdgpu_device *adev, struct amdgpu_device_
 static struct amdgpu_device_attr amdgpu_device_attrs[] = {
 	AMDGPU_DEVICE_ATTR_RW(power_dpm_state,				ATTR_FLAG_BASIC|ATTR_FLAG_ONEVF),
 	AMDGPU_DEVICE_ATTR_RW(power_dpm_force_performance_level,	ATTR_FLAG_BASIC|ATTR_FLAG_ONEVF),
-	AMDGPU_DEVICE_ATTR_RO(pp_num_states,				ATTR_FLAG_BASIC),
-	AMDGPU_DEVICE_ATTR_RO(pp_cur_state,				ATTR_FLAG_BASIC),
-	AMDGPU_DEVICE_ATTR_RW(pp_force_state,				ATTR_FLAG_BASIC),
-	AMDGPU_DEVICE_ATTR_RW(pp_table,					ATTR_FLAG_BASIC),
+	AMDGPU_DEVICE_ATTR_RO(pp_num_states,				ATTR_FLAG_BASIC|ATTR_FLAG_ONEVF),
+	AMDGPU_DEVICE_ATTR_RO(pp_cur_state,				ATTR_FLAG_BASIC|ATTR_FLAG_ONEVF),
+	AMDGPU_DEVICE_ATTR_RW(pp_force_state,				ATTR_FLAG_BASIC|ATTR_FLAG_ONEVF),
+	AMDGPU_DEVICE_ATTR_RW(pp_table,					ATTR_FLAG_BASIC|ATTR_FLAG_ONEVF),
 	AMDGPU_DEVICE_ATTR_RW(pp_dpm_sclk,				ATTR_FLAG_BASIC|ATTR_FLAG_ONEVF),
 	AMDGPU_DEVICE_ATTR_RW(pp_dpm_mclk,				ATTR_FLAG_BASIC|ATTR_FLAG_ONEVF),
 	AMDGPU_DEVICE_ATTR_RW(pp_dpm_socclk,				ATTR_FLAG_BASIC|ATTR_FLAG_ONEVF),
diff --git a/drivers/gpu/drm/amd/pm/inc/amdgpu_smu.h b/drivers/gpu/drm/amd/pm/inc/amdgpu_smu.h
index 715b4225f5ee..8156729c370b 100644
--- a/drivers/gpu/drm/amd/pm/inc/amdgpu_smu.h
+++ b/drivers/gpu/drm/amd/pm/inc/amdgpu_smu.h
@@ -1335,6 +1335,30 @@ enum smu_cmn2asic_mapping_type {
 #define WORKLOAD_MAP(profile, workload) \
 	[profile] = {1, (workload)}
 
+/**
+ * smu_memcpy_trailing - Copy the end of one structure into the middle of another
+ *
+ * @dst: Pointer to destination struct
+ * @first_dst_member: The member name in @dst where the overwrite begins
+ * @last_dst_member: The member name in @dst where the overwrite ends after
+ * @src: Pointer to the source struct
+ * @first_src_member: The member name in @src where the copy begins
+ *
+ */
+#define smu_memcpy_trailing(dst, first_dst_member, last_dst_member,	   \
+			    src, first_src_member)			   \
+({									   \
+	size_t __src_offset = offsetof(typeof(*(src)), first_src_member);  \
+	size_t __src_size = sizeof(*(src)) - __src_offset;		   \
+	size_t __dst_offset = offsetof(typeof(*(dst)), first_dst_member);  \
+	size_t __dst_size = offsetofend(typeof(*(dst)), last_dst_member) - \
+			    __dst_offset;				   \
+	BUILD_BUG_ON(__src_size != __dst_size);				   \
+	__builtin_memcpy((u8 *)(dst) + __dst_offset,			   \
+			 (u8 *)(src) + __src_offset,			   \
+			 __dst_size);					   \
+})
+
 #if !defined(SWSMU_CODE_LAYER_L2) && !defined(SWSMU_CODE_LAYER_L3) && !defined(SWSMU_CODE_LAYER_L4)
 int smu_get_power_limit(void *handle,
 			uint32_t *limit,
diff --git a/drivers/gpu/drm/amd/pm/powerplay/hwmgr/smu7_hwmgr.c b/drivers/gpu/drm/amd/pm/powerplay/hwmgr/smu7_hwmgr.c
index 465ff8d2a01a..e7803ce8f67a 100644
--- a/drivers/gpu/drm/amd/pm/powerplay/hwmgr/smu7_hwmgr.c
+++ b/drivers/gpu/drm/amd/pm/powerplay/hwmgr/smu7_hwmgr.c
@@ -27,6 +27,9 @@
 #include <linux/pci.h>
 #include <linux/slab.h>
 #include <asm/div64.h>
+#if IS_ENABLED(CONFIG_X86_64)
+#include <asm/intel-family.h>
+#endif
 #include <drm/amdgpu_drm.h>
 #include "ppatomctrl.h"
 #include "atombios.h"
@@ -1733,6 +1736,17 @@ static int smu7_disable_dpm_tasks(struct pp_hwmgr *hwmgr)
 	return result;
 }
 
+static bool intel_core_rkl_chk(void)
+{
+#if IS_ENABLED(CONFIG_X86_64)
+	struct cpuinfo_x86 *c = &cpu_data(0);
+
+	return (c->x86 == 6 && c->x86_model == INTEL_FAM6_ROCKETLAKE);
+#else
+	return false;
+#endif
+}
+
 static void smu7_init_dpm_defaults(struct pp_hwmgr *hwmgr)
 {
 	struct smu7_hwmgr *data = (struct smu7_hwmgr *)(hwmgr->backend);
@@ -1758,7 +1772,8 @@ static void smu7_init_dpm_defaults(struct pp_hwmgr *hwmgr)
 
 	data->mclk_dpm_key_disabled = hwmgr->feature_mask & PP_MCLK_DPM_MASK ? false : true;
 	data->sclk_dpm_key_disabled = hwmgr->feature_mask & PP_SCLK_DPM_MASK ? false : true;
-	data->pcie_dpm_key_disabled = hwmgr->feature_mask & PP_PCIE_DPM_MASK ? false : true;
+	data->pcie_dpm_key_disabled =
+		intel_core_rkl_chk() || !(hwmgr->feature_mask & PP_PCIE_DPM_MASK);
 	/* need to set voltage control types before EVV patching */
 	data->voltage_control = SMU7_VOLTAGE_CONTROL_NONE;
 	data->vddci_control = SMU7_VOLTAGE_CONTROL_NONE;
diff --git a/drivers/gpu/drm/amd/pm/swsmu/smu11/arcturus_ppt.c b/drivers/gpu/drm/amd/pm/swsmu/smu11/arcturus_ppt.c
index 273df66cac14..e343cc218990 100644
--- a/drivers/gpu/drm/amd/pm/swsmu/smu11/arcturus_ppt.c
+++ b/drivers/gpu/drm/amd/pm/swsmu/smu11/arcturus_ppt.c
@@ -483,10 +483,8 @@ static int arcturus_append_powerplay_table(struct smu_context *smu)
 
 	if ((smc_dpm_table->table_header.format_revision == 4) &&
 	    (smc_dpm_table->table_header.content_revision == 6))
-		memcpy(&smc_pptable->MaxVoltageStepGfx,
-		       &smc_dpm_table->maxvoltagestepgfx,
-		       sizeof(*smc_dpm_table) - offsetof(struct atom_smc_dpm_info_v4_6, maxvoltagestepgfx));
-
+		smu_memcpy_trailing(smc_pptable, MaxVoltageStepGfx, BoardReserved,
+				    smc_dpm_table, maxvoltagestepgfx);
 	return 0;
 }
 
diff --git a/drivers/gpu/drm/amd/pm/swsmu/smu11/navi10_ppt.c b/drivers/gpu/drm/amd/pm/swsmu/smu11/navi10_ppt.c
index f96681700c41..a5fc5d7cb6c7 100644
--- a/drivers/gpu/drm/amd/pm/swsmu/smu11/navi10_ppt.c
+++ b/drivers/gpu/drm/amd/pm/swsmu/smu11/navi10_ppt.c
@@ -431,16 +431,16 @@ static int navi10_append_powerplay_table(struct smu_context *smu)
 
 	switch (smc_dpm_table->table_header.content_revision) {
 	case 5: /* nv10 and nv14 */
-		memcpy(smc_pptable->I2cControllers, smc_dpm_table->I2cControllers,
-			sizeof(*smc_dpm_table) - sizeof(smc_dpm_table->table_header));
+		smu_memcpy_trailing(smc_pptable, I2cControllers, BoardReserved,
+				    smc_dpm_table, I2cControllers);
 		break;
 	case 7: /* nv12 */
 		ret = amdgpu_atombios_get_data_table(adev, index, NULL, NULL, NULL,
 					      (uint8_t **)&smc_dpm_table_v4_7);
 		if (ret)
 			return ret;
-		memcpy(smc_pptable->I2cControllers, smc_dpm_table_v4_7->I2cControllers,
-			sizeof(*smc_dpm_table_v4_7) - sizeof(smc_dpm_table_v4_7->table_header));
+		smu_memcpy_trailing(smc_pptable, I2cControllers, BoardReserved,
+				    smc_dpm_table_v4_7, I2cControllers);
 		break;
 	default:
 		dev_err(smu->adev->dev, "smc_dpm_info with unsupported content revision %d!\n",
diff --git a/drivers/gpu/drm/amd/pm/swsmu/smu11/vangogh_ppt.c b/drivers/gpu/drm/amd/pm/swsmu/smu11/vangogh_ppt.c
index 6eb50b05a33c..3a3421452e57 100644
--- a/drivers/gpu/drm/amd/pm/swsmu/smu11/vangogh_ppt.c
+++ b/drivers/gpu/drm/amd/pm/swsmu/smu11/vangogh_ppt.c
@@ -1869,7 +1869,7 @@ static int vangogh_od_edit_dpm_table(struct smu_context *smu, enum PP_OD_DPM_TAB
 		} else {
 			if (smu->gfx_actual_hard_min_freq > smu->gfx_actual_soft_max_freq) {
 				dev_err(smu->adev->dev,
-					"The setting minimun sclk (%d) MHz is greater than the setting maximum sclk (%d) MHz\n",
+					"The setting minimum sclk (%d) MHz is greater than the setting maximum sclk (%d) MHz\n",
 					smu->gfx_actual_hard_min_freq,
 					smu->gfx_actual_soft_max_freq);
 				return -EINVAL;
diff --git a/drivers/gpu/drm/amd/pm/swsmu/smu12/renoir_ppt.c b/drivers/gpu/drm/amd/pm/swsmu/smu12/renoir_ppt.c
index b39138041141..5aa175e12a78 100644
--- a/drivers/gpu/drm/amd/pm/swsmu/smu12/renoir_ppt.c
+++ b/drivers/gpu/drm/amd/pm/swsmu/smu12/renoir_ppt.c
@@ -426,7 +426,7 @@ static int renoir_od_edit_dpm_table(struct smu_context *smu,
 		} else {
 			if (smu->gfx_actual_hard_min_freq > smu->gfx_actual_soft_max_freq) {
 				dev_err(smu->adev->dev,
-					"The setting minimun sclk (%d) MHz is greater than the setting maximum sclk (%d) MHz\n",
+					"The setting minimum sclk (%d) MHz is greater than the setting maximum sclk (%d) MHz\n",
 					smu->gfx_actual_hard_min_freq,
 					smu->gfx_actual_soft_max_freq);
 				return -EINVAL;
diff --git a/drivers/gpu/drm/amd/pm/swsmu/smu13/aldebaran_ppt.c b/drivers/gpu/drm/amd/pm/swsmu/smu13/aldebaran_ppt.c
index ec8c30daf31c..ab652028e003 100644
--- a/drivers/gpu/drm/amd/pm/swsmu/smu13/aldebaran_ppt.c
+++ b/drivers/gpu/drm/amd/pm/swsmu/smu13/aldebaran_ppt.c
@@ -409,9 +409,8 @@ static int aldebaran_append_powerplay_table(struct smu_context *smu)
 
 	if ((smc_dpm_table->table_header.format_revision == 4) &&
 	    (smc_dpm_table->table_header.content_revision == 10))
-		memcpy(&smc_pptable->GfxMaxCurrent,
-		       &smc_dpm_table->GfxMaxCurrent,
-		       sizeof(*smc_dpm_table) - offsetof(struct atom_smc_dpm_info_v4_10, GfxMaxCurrent));
+		smu_memcpy_trailing(smc_pptable, GfxMaxCurrent, reserved,
+				    smc_dpm_table, GfxMaxCurrent);
 	return 0;
 }
 
diff --git a/drivers/gpu/drm/amd/pm/swsmu/smu13/yellow_carp_ppt.c b/drivers/gpu/drm/amd/pm/swsmu/smu13/yellow_carp_ppt.c
index 0f17c2522c85..627ba2eec7fd 100644
--- a/drivers/gpu/drm/amd/pm/swsmu/smu13/yellow_carp_ppt.c
+++ b/drivers/gpu/drm/amd/pm/swsmu/smu13/yellow_carp_ppt.c
@@ -731,7 +731,7 @@ static int yellow_carp_od_edit_dpm_table(struct smu_context *smu, enum PP_OD_DPM
 		} else {
 			if (smu->gfx_actual_hard_min_freq > smu->gfx_actual_soft_max_freq) {
 				dev_err(smu->adev->dev,
-					"The setting minimun sclk (%d) MHz is greater than the setting maximum sclk (%d) MHz\n",
+					"The setting minimum sclk (%d) MHz is greater than the setting maximum sclk (%d) MHz\n",
 					smu->gfx_actual_hard_min_freq,
 					smu->gfx_actual_soft_max_freq);
 				return -EINVAL;
diff --git a/drivers/gpu/drm/drm_prime.c b/drivers/gpu/drm/drm_prime.c
index 1d009494af8b..deb23dbec8b5 100644
--- a/drivers/gpu/drm/drm_prime.c
+++ b/drivers/gpu/drm/drm_prime.c
@@ -807,8 +807,8 @@ struct sg_table *drm_prime_pages_to_sg(struct drm_device *dev,
 				       struct page **pages, unsigned int nr_pages)
 {
 	struct sg_table *sg;
-	struct scatterlist *sge;
 	size_t max_segment = 0;
+	int err;
 
 	sg = kmalloc(sizeof(struct sg_table), GFP_KERNEL);
 	if (!sg)
@@ -818,13 +818,12 @@ struct sg_table *drm_prime_pages_to_sg(struct drm_device *dev,
 		max_segment = dma_max_mapping_size(dev->dev);
 	if (max_segment == 0)
 		max_segment = UINT_MAX;
-	sge = __sg_alloc_table_from_pages(sg, pages, nr_pages, 0,
-					  nr_pages << PAGE_SHIFT,
-					  max_segment,
-					  NULL, 0, GFP_KERNEL);
-	if (IS_ERR(sge)) {
+	err = sg_alloc_table_from_pages_segment(sg, pages, nr_pages, 0,
+						nr_pages << PAGE_SHIFT,
+						max_segment, GFP_KERNEL);
+	if (err) {
 		kfree(sg);
-		sg = ERR_CAST(sge);
+		sg = ERR_PTR(err);
 	}
 	return sg;
 }
diff --git a/drivers/gpu/drm/drm_print.c b/drivers/gpu/drm/drm_print.c
index 111b932cf2a9..f783d4963d4b 100644
--- a/drivers/gpu/drm/drm_print.c
+++ b/drivers/gpu/drm/drm_print.c
@@ -25,7 +25,7 @@
 
 #define DEBUG /* for pr_debug() */
 
-#include <stdarg.h>
+#include <linux/stdarg.h>
 
 #include <linux/io.h>
 #include <linux/moduleparam.h>
diff --git a/drivers/gpu/drm/i915/gem/i915_gem_internal.c b/drivers/gpu/drm/i915/gem/i915_gem_internal.c
index 13b217f75055..e5ae9c06510c 100644
--- a/drivers/gpu/drm/i915/gem/i915_gem_internal.c
+++ b/drivers/gpu/drm/i915/gem/i915_gem_internal.c
@@ -42,7 +42,7 @@ static int i915_gem_object_get_pages_internal(struct drm_i915_gem_object *obj)
 
 	max_order = MAX_ORDER;
 #ifdef CONFIG_SWIOTLB
-	if (is_swiotlb_active()) {
+	if (is_swiotlb_active(obj->base.dev->dev)) {
 		unsigned int max_segment;
 
 		max_segment = swiotlb_max_segment();
diff --git a/drivers/gpu/drm/i915/gem/i915_gem_ttm.c b/drivers/gpu/drm/i915/gem/i915_gem_ttm.c
index 771eb2963123..35eedc14f522 100644
--- a/drivers/gpu/drm/i915/gem/i915_gem_ttm.c
+++ b/drivers/gpu/drm/i915/gem/i915_gem_ttm.c
@@ -382,7 +382,6 @@ i915_ttm_region(struct ttm_device *bdev, int ttm_mem_type)
 static struct sg_table *i915_ttm_tt_get_st(struct ttm_tt *ttm)
 {
 	struct i915_ttm_tt *i915_tt = container_of(ttm, typeof(*i915_tt), ttm);
-	struct scatterlist *sg;
 	struct sg_table *st;
 	int ret;
 
@@ -393,13 +392,13 @@ static struct sg_table *i915_ttm_tt_get_st(struct ttm_tt *ttm)
 	if (!st)
 		return ERR_PTR(-ENOMEM);
 
-	sg = __sg_alloc_table_from_pages
-		(st, ttm->pages, ttm->num_pages, 0,
-		 (unsigned long)ttm->num_pages << PAGE_SHIFT,
-		 i915_sg_segment_size(), NULL, 0, GFP_KERNEL);
-	if (IS_ERR(sg)) {
+	ret = sg_alloc_table_from_pages_segment(st,
+			ttm->pages, ttm->num_pages,
+			0, (unsigned long)ttm->num_pages << PAGE_SHIFT,
+			i915_sg_segment_size(), GFP_KERNEL);
+	if (ret) {
 		kfree(st);
-		return ERR_CAST(sg);
+		return ERR_PTR(ret);
 	}
 
 	ret = dma_map_sgtable(i915_tt->dev, st, DMA_BIDIRECTIONAL, 0);
diff --git a/drivers/gpu/drm/i915/gem/i915_gem_userptr.c b/drivers/gpu/drm/i915/gem/i915_gem_userptr.c
index 468a7a617fbf..8ea0fa665e53 100644
--- a/drivers/gpu/drm/i915/gem/i915_gem_userptr.c
+++ b/drivers/gpu/drm/i915/gem/i915_gem_userptr.c
@@ -131,7 +131,6 @@ static int i915_gem_userptr_get_pages(struct drm_i915_gem_object *obj)
 	unsigned int max_segment = i915_sg_segment_size();
 	struct sg_table *st;
 	unsigned int sg_page_sizes;
-	struct scatterlist *sg;
 	struct page **pvec;
 	int ret;
 
@@ -148,13 +147,11 @@ static int i915_gem_userptr_get_pages(struct drm_i915_gem_object *obj)
 	pvec = obj->userptr.pvec;
 
 alloc_table:
-	sg = __sg_alloc_table_from_pages(st, pvec, num_pages, 0,
-					 num_pages << PAGE_SHIFT, max_segment,
-					 NULL, 0, GFP_KERNEL);
-	if (IS_ERR(sg)) {
-		ret = PTR_ERR(sg);
+	ret = sg_alloc_table_from_pages_segment(st, pvec, num_pages, 0,
+						num_pages << PAGE_SHIFT,
+						max_segment, GFP_KERNEL);
+	if (ret)
 		goto err;
-	}
 
 	ret = i915_gem_gtt_prepare_pages(obj, st);
 	if (ret) {
diff --git a/drivers/gpu/drm/i915/gt/intel_gt_requests.h b/drivers/gpu/drm/i915/gt/intel_gt_requests.h
index 51dbe0e3294e..d2969f68dd64 100644
--- a/drivers/gpu/drm/i915/gt/intel_gt_requests.h
+++ b/drivers/gpu/drm/i915/gt/intel_gt_requests.h
@@ -6,7 +6,7 @@
 #ifndef INTEL_GT_REQUESTS_H
 #define INTEL_GT_REQUESTS_H
 
-#include <stddef.h>
+#include <linux/stddef.h>
 
 struct intel_engine_cs;
 struct intel_gt;
diff --git a/drivers/gpu/drm/i915/gvt/kvmgt.c b/drivers/gpu/drm/i915/gvt/kvmgt.c
index 1ac98f8aba31..7efa386449d1 100644
--- a/drivers/gpu/drm/i915/gvt/kvmgt.c
+++ b/drivers/gpu/drm/i915/gvt/kvmgt.c
@@ -885,7 +885,7 @@ static int intel_vgpu_group_notifier(struct notifier_block *nb,
 	return NOTIFY_OK;
 }
 
-static int intel_vgpu_open(struct mdev_device *mdev)
+static int intel_vgpu_open_device(struct mdev_device *mdev)
 {
 	struct intel_vgpu *vgpu = mdev_get_drvdata(mdev);
 	struct kvmgt_vdev *vdev = kvmgt_vdev(vgpu);
@@ -1004,7 +1004,7 @@ static void __intel_vgpu_release(struct intel_vgpu *vgpu)
 	vgpu->handle = 0;
 }
 
-static void intel_vgpu_release(struct mdev_device *mdev)
+static void intel_vgpu_close_device(struct mdev_device *mdev)
 {
 	struct intel_vgpu *vgpu = mdev_get_drvdata(mdev);
 
@@ -1753,8 +1753,8 @@ static struct mdev_parent_ops intel_vgpu_ops = {
 	.create			= intel_vgpu_create,
 	.remove			= intel_vgpu_remove,
 
-	.open			= intel_vgpu_open,
-	.release		= intel_vgpu_release,
+	.open_device		= intel_vgpu_open_device,
+	.close_device		= intel_vgpu_close_device,
 
 	.read			= intel_vgpu_read,
 	.write			= intel_vgpu_write,
diff --git a/drivers/gpu/drm/mgag200/mgag200_pll.c b/drivers/gpu/drm/mgag200/mgag200_pll.c
index 7c903cf19c0d..e9ae22b4f813 100644
--- a/drivers/gpu/drm/mgag200/mgag200_pll.c
+++ b/drivers/gpu/drm/mgag200/mgag200_pll.c
@@ -124,6 +124,7 @@ static int mgag200_pixpll_compute_g200se_00(struct mgag200_pll *pixpll, long clo
 	unsigned int computed;
 
 	m = n = p = s = 0;
+	delta = 0xffffffff;
 	permitteddelta = clock * 5 / 1000;
 
 	for (testp = 8; testp > 0; testp /= 2) {
diff --git a/drivers/gpu/drm/msm/disp/msm_disp_snapshot.h b/drivers/gpu/drm/msm/disp/msm_disp_snapshot.h
index c22b07f68670..4c619307612c 100644
--- a/drivers/gpu/drm/msm/disp/msm_disp_snapshot.h
+++ b/drivers/gpu/drm/msm/disp/msm_disp_snapshot.h
@@ -24,7 +24,6 @@
 #include <linux/pm_runtime.h>
 #include <linux/kthread.h>
 #include <linux/devcoredump.h>
-#include <stdarg.h>
 #include "msm_kms.h"
 
 #define MSM_DISP_SNAPSHOT_MAX_BLKS		10
diff --git a/drivers/gpu/drm/nouveau/nouveau_ttm.c b/drivers/gpu/drm/nouveau/nouveau_ttm.c
index f4c2e46b6fe1..2ca9d9a9e5d5 100644
--- a/drivers/gpu/drm/nouveau/nouveau_ttm.c
+++ b/drivers/gpu/drm/nouveau/nouveau_ttm.c
@@ -276,7 +276,7 @@ nouveau_ttm_init(struct nouveau_drm *drm)
 	}
 
 #if IS_ENABLED(CONFIG_SWIOTLB) && IS_ENABLED(CONFIG_X86)
-	need_swiotlb = is_swiotlb_active();
+	need_swiotlb = is_swiotlb_active(dev->dev);
 #endif
 
 	ret = ttm_device_init(&drm->ttm.bdev, &nouveau_bo_driver, drm->dev->dev,
diff --git a/drivers/gpu/drm/panfrost/panfrost_mmu.c b/drivers/gpu/drm/panfrost/panfrost_mmu.c
index 0da5b3100ab1..dfe5f1d29763 100644
--- a/drivers/gpu/drm/panfrost/panfrost_mmu.c
+++ b/drivers/gpu/drm/panfrost/panfrost_mmu.c
@@ -58,25 +58,16 @@ static int write_cmd(struct panfrost_device *pfdev, u32 as_nr, u32 cmd)
 }
 
 static void lock_region(struct panfrost_device *pfdev, u32 as_nr,
-			u64 iova, size_t size)
+			u64 iova, u64 size)
 {
 	u8 region_width;
 	u64 region = iova & PAGE_MASK;
-	/*
-	 * fls returns:
-	 * 1 .. 32
-	 *
-	 * 10 + fls(num_pages)
-	 * results in the range (11 .. 42)
-	 */
-
-	size = round_up(size, PAGE_SIZE);
 
-	region_width = 10 + fls(size >> PAGE_SHIFT);
-	if ((size >> PAGE_SHIFT) != (1ul << (region_width - 11))) {
-		/* not pow2, so must go up to the next pow2 */
-		region_width += 1;
-	}
+	/* The size is encoded as ceil(log2) minus(1), which may be calculated
+	 * with fls. The size must be clamped to hardware bounds.
+	 */
+	size = max_t(u64, size, AS_LOCK_REGION_MIN_SIZE);
+	region_width = fls64(size - 1) - 1;
 	region |= region_width;
 
 	/* Lock the region that needs to be updated */
@@ -87,7 +78,7 @@ static void lock_region(struct panfrost_device *pfdev, u32 as_nr,
 
 
 static int mmu_hw_do_operation_locked(struct panfrost_device *pfdev, int as_nr,
-				      u64 iova, size_t size, u32 op)
+				      u64 iova, u64 size, u32 op)
 {
 	if (as_nr < 0)
 		return 0;
@@ -104,7 +95,7 @@ static int mmu_hw_do_operation_locked(struct panfrost_device *pfdev, int as_nr,
 
 static int mmu_hw_do_operation(struct panfrost_device *pfdev,
 			       struct panfrost_mmu *mmu,
-			       u64 iova, size_t size, u32 op)
+			       u64 iova, u64 size, u32 op)
 {
 	int ret;
 
@@ -121,7 +112,7 @@ static void panfrost_mmu_enable(struct panfrost_device *pfdev, struct panfrost_m
 	u64 transtab = cfg->arm_mali_lpae_cfg.transtab;
 	u64 memattr = cfg->arm_mali_lpae_cfg.memattr;
 
-	mmu_hw_do_operation_locked(pfdev, as_nr, 0, ~0UL, AS_COMMAND_FLUSH_MEM);
+	mmu_hw_do_operation_locked(pfdev, as_nr, 0, ~0ULL, AS_COMMAND_FLUSH_MEM);
 
 	mmu_write(pfdev, AS_TRANSTAB_LO(as_nr), transtab & 0xffffffffUL);
 	mmu_write(pfdev, AS_TRANSTAB_HI(as_nr), transtab >> 32);
@@ -137,7 +128,7 @@ static void panfrost_mmu_enable(struct panfrost_device *pfdev, struct panfrost_m
 
 static void panfrost_mmu_disable(struct panfrost_device *pfdev, u32 as_nr)
 {
-	mmu_hw_do_operation_locked(pfdev, as_nr, 0, ~0UL, AS_COMMAND_FLUSH_MEM);
+	mmu_hw_do_operation_locked(pfdev, as_nr, 0, ~0ULL, AS_COMMAND_FLUSH_MEM);
 
 	mmu_write(pfdev, AS_TRANSTAB_LO(as_nr), 0);
 	mmu_write(pfdev, AS_TRANSTAB_HI(as_nr), 0);
@@ -251,7 +242,7 @@ static size_t get_pgsize(u64 addr, size_t size)
 
 static void panfrost_mmu_flush_range(struct panfrost_device *pfdev,
 				     struct panfrost_mmu *mmu,
-				     u64 iova, size_t size)
+				     u64 iova, u64 size)
 {
 	if (mmu->as < 0)
 		return;
diff --git a/drivers/gpu/drm/panfrost/panfrost_regs.h b/drivers/gpu/drm/panfrost/panfrost_regs.h
index 1940ff86e49a..6c5a11ef1ee8 100644
--- a/drivers/gpu/drm/panfrost/panfrost_regs.h
+++ b/drivers/gpu/drm/panfrost/panfrost_regs.h
@@ -316,6 +316,8 @@
 #define AS_FAULTSTATUS_ACCESS_TYPE_READ		(0x2 << 8)
 #define AS_FAULTSTATUS_ACCESS_TYPE_WRITE	(0x3 << 8)
 
+#define AS_LOCK_REGION_MIN_SIZE                 (1ULL << 15)
+
 #define gpu_write(dev, reg, data) writel(data, dev->iomem + reg)
 #define gpu_read(dev, reg) readl(dev->iomem + reg)
 
diff --git a/drivers/gpu/drm/ttm/ttm_bo.c b/drivers/gpu/drm/ttm/ttm_bo.c
index ea4add2b9717..bb9e02c31946 100644
--- a/drivers/gpu/drm/ttm/ttm_bo.c
+++ b/drivers/gpu/drm/ttm/ttm_bo.c
@@ -1160,9 +1160,9 @@ int ttm_bo_swapout(struct ttm_buffer_object *bo, struct ttm_operation_ctx *ctx,
 	}
 
 	if (bo->deleted) {
-		ttm_bo_cleanup_refs(bo, false, false, locked);
+		ret = ttm_bo_cleanup_refs(bo, false, false, locked);
 		ttm_bo_put(bo);
-		return 0;
+		return ret == -EBUSY ? -ENOSPC : ret;
 	}
 
 	ttm_bo_del_from_lru(bo);
@@ -1216,7 +1216,7 @@ out:
 	if (locked)
 		dma_resv_unlock(bo->base.resv);
 	ttm_bo_put(bo);
-	return ret;
+	return ret == -EBUSY ? -ENOSPC : ret;
 }
 
 void ttm_bo_tt_destroy(struct ttm_buffer_object *bo)
diff --git a/drivers/gpu/drm/ttm/ttm_bo_util.c b/drivers/gpu/drm/ttm/ttm_bo_util.c
index 763fa6f4e07d..1c5ffe2935af 100644
--- a/drivers/gpu/drm/ttm/ttm_bo_util.c
+++ b/drivers/gpu/drm/ttm/ttm_bo_util.c
@@ -143,7 +143,6 @@ int ttm_bo_move_memcpy(struct ttm_buffer_object *bo,
 	struct ttm_resource *src_mem = bo->resource;
 	struct ttm_resource_manager *src_man =
 		ttm_manager_type(bdev, src_mem->mem_type);
-	struct ttm_resource src_copy = *src_mem;
 	union {
 		struct ttm_kmap_iter_tt tt;
 		struct ttm_kmap_iter_linear_io io;
@@ -173,11 +172,11 @@ int ttm_bo_move_memcpy(struct ttm_buffer_object *bo,
 	}
 
 	ttm_move_memcpy(bo, dst_mem->num_pages, dst_iter, src_iter);
-	src_copy = *src_mem;
-	ttm_bo_move_sync_cleanup(bo, dst_mem);
 
 	if (!src_iter->ops->maps_tt)
-		ttm_kmap_iter_linear_io_fini(&_src_iter.io, bdev, &src_copy);
+		ttm_kmap_iter_linear_io_fini(&_src_iter.io, bdev, src_mem);
+	ttm_bo_move_sync_cleanup(bo, dst_mem);
+
 out_src_iter:
 	if (!dst_iter->ops->maps_tt)
 		ttm_kmap_iter_linear_io_fini(&_dst_iter.io, bdev, dst_mem);
diff --git a/drivers/gpu/drm/ttm/ttm_tt.c b/drivers/gpu/drm/ttm/ttm_tt.c
index 24031a8acd2d..d5cd8b5dc0bf 100644
--- a/drivers/gpu/drm/ttm/ttm_tt.c
+++ b/drivers/gpu/drm/ttm/ttm_tt.c
@@ -32,7 +32,6 @@
 #define pr_fmt(fmt) "[TTM] " fmt
 
 #include <linux/sched.h>
-#include <linux/pagemap.h>
 #include <linux/shmem_fs.h>
 #include <linux/file.h>
 #include <drm/drm_cache.h>
diff --git a/drivers/gpu/drm/vc4/vc4_hdmi.c b/drivers/gpu/drm/vc4/vc4_hdmi.c
index b7dc32a0c9bb..4a1115043114 100644
--- a/drivers/gpu/drm/vc4/vc4_hdmi.c
+++ b/drivers/gpu/drm/vc4/vc4_hdmi.c
@@ -1462,7 +1462,7 @@ static const struct hdmi_codec_ops vc4_hdmi_codec_ops = {
 	.audio_startup = vc4_hdmi_audio_startup,
 };
 
-struct hdmi_codec_pdata vc4_hdmi_codec_pdata = {
+static struct hdmi_codec_pdata vc4_hdmi_codec_pdata = {
 	.ops = &vc4_hdmi_codec_ops,
 	.max_i2s_channels = 8,
 	.i2s = 1,
diff --git a/drivers/gpu/drm/vmwgfx/vmwgfx_ttm_buffer.c b/drivers/gpu/drm/vmwgfx/vmwgfx_ttm_buffer.c
index b0973c27e774..8b8991e3ed2d 100644
--- a/drivers/gpu/drm/vmwgfx/vmwgfx_ttm_buffer.c
+++ b/drivers/gpu/drm/vmwgfx/vmwgfx_ttm_buffer.c
@@ -328,7 +328,6 @@ static int vmw_ttm_map_dma(struct vmw_ttm_tt *vmw_tt)
 	int ret = 0;
 	static size_t sgl_size;
 	static size_t sgt_size;
-	struct scatterlist *sg;
 
 	if (vmw_tt->mapped)
 		return 0;
@@ -351,15 +350,12 @@ static int vmw_ttm_map_dma(struct vmw_ttm_tt *vmw_tt)
 		if (unlikely(ret != 0))
 			return ret;
 
-		sg = __sg_alloc_table_from_pages(&vmw_tt->sgt, vsgt->pages,
-				vsgt->num_pages, 0,
-				(unsigned long) vsgt->num_pages << PAGE_SHIFT,
-				dma_get_max_seg_size(dev_priv->drm.dev),
-				NULL, 0, GFP_KERNEL);
-		if (IS_ERR(sg)) {
-			ret = PTR_ERR(sg);
+		ret = sg_alloc_table_from_pages_segment(
+			&vmw_tt->sgt, vsgt->pages, vsgt->num_pages, 0,
+			(unsigned long)vsgt->num_pages << PAGE_SHIFT,
+			dma_get_max_seg_size(dev_priv->drm.dev), GFP_KERNEL);
+		if (ret)
 			goto out_sg_alloc_fail;
-		}
 
 		if (vsgt->num_pages > vmw_tt->sgt.orig_nents) {
 			uint64_t over_alloc =
diff --git a/drivers/hid/Kconfig b/drivers/hid/Kconfig
index 76937f716fbe..3c33bf572d6d 100644
--- a/drivers/hid/Kconfig
+++ b/drivers/hid/Kconfig
@@ -259,10 +259,11 @@ config HID_PRODIKEYS
 	  and some additional multimedia keys.
 
 config HID_CMEDIA
-	tristate "CMedia CM6533 HID audio jack controls"
+	tristate "CMedia audio chips"
 	depends on HID
 	help
-	Support for CMedia CM6533 HID audio jack controls.
+	Support for CMedia CM6533 HID audio jack controls
+        and HS100B mute buttons.
 
 config HID_CP2112
 	tristate "Silicon Labs CP2112 HID USB-to-SMBus Bridge support"
@@ -952,7 +953,7 @@ config HID_SONY
 	  * Buzz controllers
 	  * Sony PS3 Blue-ray Disk Remote Control (Bluetooth)
 	  * Logitech Harmony adapter for Sony Playstation 3 (Bluetooth)
-	  * Guitar Hero Live PS3 and Wii U guitar dongles
+	  * Guitar Hero Live PS3, Wii U and PS4 guitar dongles
 	  * Guitar Hero PS3 and PC guitar dongles
 
 config SONY_FF
diff --git a/drivers/hid/Makefile b/drivers/hid/Makefile
index 1ea1a7c0b20f..e29efcb1c040 100644
--- a/drivers/hid/Makefile
+++ b/drivers/hid/Makefile
@@ -115,7 +115,6 @@ obj-$(CONFIG_HID_STEELSERIES)	+= hid-steelseries.o
 obj-$(CONFIG_HID_SUNPLUS)	+= hid-sunplus.o
 obj-$(CONFIG_HID_GREENASIA)	+= hid-gaff.o
 obj-$(CONFIG_HID_THRUSTMASTER)	+= hid-tmff.o hid-thrustmaster.o
-obj-$(CONFIG_HID_TMINIT)	+= hid-tminit.o
 obj-$(CONFIG_HID_TIVO)		+= hid-tivo.o
 obj-$(CONFIG_HID_TOPSEED)	+= hid-topseed.o
 obj-$(CONFIG_HID_TWINHAN)	+= hid-twinhan.o
diff --git a/drivers/hid/amd-sfh-hid/amd_sfh_client.c b/drivers/hid/amd-sfh-hid/amd_sfh_client.c
index efb849411d25..840fd075c56f 100644
--- a/drivers/hid/amd-sfh-hid/amd_sfh_client.c
+++ b/drivers/hid/amd-sfh-hid/amd_sfh_client.c
@@ -17,7 +17,6 @@
 #include "amd_sfh_pcie.h"
 #include "amd_sfh_hid.h"
 
-#define AMD_SFH_IDLE_LOOP	200
 
 struct request_list {
 	struct hid_device *hid;
@@ -123,14 +122,24 @@ static void amd_sfh_work_buffer(struct work_struct *work)
 	int i;
 
 	for (i = 0; i < cli_data->num_hid_devices; i++) {
-		report_size = get_input_report(i, cli_data->sensor_idx[i], cli_data->report_id[i],
-					       in_data);
-		hid_input_report(cli_data->hid_sensor_hubs[i], HID_INPUT_REPORT,
-				 in_data->input_report[i], report_size, 0);
+		if (cli_data->sensor_sts[i] == SENSOR_ENABLED) {
+			report_size = get_input_report
+				(i, cli_data->sensor_idx[i], cli_data->report_id[i], in_data);
+			hid_input_report(cli_data->hid_sensor_hubs[i], HID_INPUT_REPORT,
+					 in_data->input_report[i], report_size, 0);
+		}
 	}
 	schedule_delayed_work(&cli_data->work_buffer, msecs_to_jiffies(AMD_SFH_IDLE_LOOP));
 }
 
+u32 amd_sfh_wait_for_response(struct amd_mp2_dev *mp2, u8 sid, u32 sensor_sts)
+{
+	if (mp2->mp2_ops->response)
+		sensor_sts = mp2->mp2_ops->response(mp2, sid, sensor_sts);
+
+	return sensor_sts;
+}
+
 int amd_sfh_hid_client_init(struct amd_mp2_dev *privdata)
 {
 	struct amd_input_data *in_data = &privdata->in_data;
@@ -139,8 +148,8 @@ int amd_sfh_hid_client_init(struct amd_mp2_dev *privdata)
 	struct device *dev;
 	u32 feature_report_size;
 	u32 input_report_size;
+	int rc, i, status;
 	u8 cl_idx;
-	int rc, i;
 
 	dev = &privdata->pdev->dev;
 
@@ -155,7 +164,7 @@ int amd_sfh_hid_client_init(struct amd_mp2_dev *privdata)
 		in_data->sensor_virt_addr[i] = dma_alloc_coherent(dev, sizeof(int) * 8,
 								  &cl_data->sensor_dma_addr[i],
 								  GFP_KERNEL);
-		cl_data->sensor_sts[i] = 0;
+		cl_data->sensor_sts[i] = SENSOR_DISABLED;
 		cl_data->sensor_requested_cnt[i] = 0;
 		cl_data->cur_hid_dev = i;
 		cl_idx = cl_data->sensor_idx[i];
@@ -184,7 +193,7 @@ int amd_sfh_hid_client_init(struct amd_mp2_dev *privdata)
 			rc = -ENOMEM;
 			goto cleanup;
 		}
-		info.period = msecs_to_jiffies(AMD_SFH_IDLE_LOOP);
+		info.period = AMD_SFH_IDLE_LOOP;
 		info.sensor_idx = cl_idx;
 		info.dma_address = cl_data->sensor_dma_addr[i];
 
@@ -197,11 +206,25 @@ int amd_sfh_hid_client_init(struct amd_mp2_dev *privdata)
 		rc = get_report_descriptor(cl_idx, cl_data->report_descr[i]);
 		if (rc)
 			return rc;
-		rc = amdtp_hid_probe(cl_data->cur_hid_dev, cl_data);
-		if (rc)
-			return rc;
 		privdata->mp2_ops->start(privdata, info);
-		cl_data->sensor_sts[i] = 1;
+		status = amd_sfh_wait_for_response
+				(privdata, cl_data->sensor_idx[i], SENSOR_ENABLED);
+		if (status == SENSOR_ENABLED) {
+			cl_data->sensor_sts[i] = SENSOR_ENABLED;
+			rc = amdtp_hid_probe(cl_data->cur_hid_dev, cl_data);
+			if (rc) {
+				privdata->mp2_ops->stop(privdata, cl_data->sensor_idx[i]);
+				status = amd_sfh_wait_for_response
+					(privdata, cl_data->sensor_idx[i], SENSOR_DISABLED);
+				if (status != SENSOR_ENABLED)
+					cl_data->sensor_sts[i] = SENSOR_DISABLED;
+				dev_dbg(dev, "sid 0x%x status 0x%x\n",
+					cl_data->sensor_idx[i], cl_data->sensor_sts[i]);
+				goto cleanup;
+			}
+		}
+		dev_dbg(dev, "sid 0x%x status 0x%x\n",
+			cl_data->sensor_idx[i], cl_data->sensor_sts[i]);
 	}
 	schedule_delayed_work(&cl_data->work_buffer, msecs_to_jiffies(AMD_SFH_IDLE_LOOP));
 	return 0;
@@ -224,10 +247,19 @@ int amd_sfh_hid_client_deinit(struct amd_mp2_dev *privdata)
 {
 	struct amdtp_cl_data *cl_data = privdata->cl_data;
 	struct amd_input_data *in_data = cl_data->in_data;
-	int i;
+	int i, status;
 
-	for (i = 0; i < cl_data->num_hid_devices; i++)
-		privdata->mp2_ops->stop(privdata, i);
+	for (i = 0; i < cl_data->num_hid_devices; i++) {
+		if (cl_data->sensor_sts[i] == SENSOR_ENABLED) {
+			privdata->mp2_ops->stop(privdata, cl_data->sensor_idx[i]);
+			status = amd_sfh_wait_for_response
+					(privdata, cl_data->sensor_idx[i], SENSOR_DISABLED);
+			if (status != SENSOR_ENABLED)
+				cl_data->sensor_sts[i] = SENSOR_DISABLED;
+			dev_dbg(&privdata->pdev->dev, "stopping sid 0x%x status 0x%x\n",
+				cl_data->sensor_idx[i], cl_data->sensor_sts[i]);
+		}
+	}
 
 	cancel_delayed_work_sync(&cl_data->work);
 	cancel_delayed_work_sync(&cl_data->work_buffer);
diff --git a/drivers/hid/amd-sfh-hid/amd_sfh_pcie.c b/drivers/hid/amd-sfh-hid/amd_sfh_pcie.c
index 8d68796aa905..79b138fd4261 100644
--- a/drivers/hid/amd-sfh-hid/amd_sfh_pcie.c
+++ b/drivers/hid/amd-sfh-hid/amd_sfh_pcie.c
@@ -13,6 +13,7 @@
 #include <linux/dmi.h>
 #include <linux/interrupt.h>
 #include <linux/io-64-nonatomic-lo-hi.h>
+#include <linux/iopoll.h>
 #include <linux/module.h>
 #include <linux/slab.h>
 
@@ -31,6 +32,20 @@ static int sensor_mask_override = -1;
 module_param_named(sensor_mask, sensor_mask_override, int, 0444);
 MODULE_PARM_DESC(sensor_mask, "override the detected sensors mask");
 
+static int amd_sfh_wait_response_v2(struct amd_mp2_dev *mp2, u8 sid, u32 sensor_sts)
+{
+	union cmd_response cmd_resp;
+
+	/* Get response with status within a max of 800 ms timeout */
+	if (!readl_poll_timeout(mp2->mmio + AMD_P2C_MSG(0), cmd_resp.resp,
+				(cmd_resp.response_v2.response == sensor_sts &&
+				cmd_resp.response_v2.status == 0 && (sid == 0xff ||
+				cmd_resp.response_v2.sensor_id == sid)), 500, 800000))
+		return cmd_resp.response_v2.response;
+
+	return SENSOR_DISABLED;
+}
+
 static void amd_start_sensor_v2(struct amd_mp2_dev *privdata, struct amd_mp2_sensor_info info)
 {
 	union sfh_cmd_base cmd_base;
@@ -183,6 +198,7 @@ static const struct amd_mp2_ops amd_sfh_ops_v2 = {
 	.start = amd_start_sensor_v2,
 	.stop = amd_stop_sensor_v2,
 	.stop_all = amd_stop_all_sensor_v2,
+	.response = amd_sfh_wait_response_v2,
 };
 
 static const struct amd_mp2_ops amd_sfh_ops = {
@@ -248,6 +264,58 @@ static int amd_mp2_pci_probe(struct pci_dev *pdev, const struct pci_device_id *i
 	return amd_sfh_hid_client_init(privdata);
 }
 
+static int __maybe_unused amd_mp2_pci_resume(struct device *dev)
+{
+	struct pci_dev *pdev = to_pci_dev(dev);
+	struct amd_mp2_dev *mp2 = pci_get_drvdata(pdev);
+	struct amdtp_cl_data *cl_data = mp2->cl_data;
+	struct amd_mp2_sensor_info info;
+	int i, status;
+
+	for (i = 0; i < cl_data->num_hid_devices; i++) {
+		if (cl_data->sensor_sts[i] == SENSOR_DISABLED) {
+			info.period = AMD_SFH_IDLE_LOOP;
+			info.sensor_idx = cl_data->sensor_idx[i];
+			info.dma_address = cl_data->sensor_dma_addr[i];
+			mp2->mp2_ops->start(mp2, info);
+			status = amd_sfh_wait_for_response
+					(mp2, cl_data->sensor_idx[i], SENSOR_ENABLED);
+			if (status == SENSOR_ENABLED)
+				cl_data->sensor_sts[i] = SENSOR_ENABLED;
+			dev_dbg(dev, "resume sid 0x%x status 0x%x\n",
+				cl_data->sensor_idx[i], cl_data->sensor_sts[i]);
+		}
+	}
+
+	return 0;
+}
+
+static int __maybe_unused amd_mp2_pci_suspend(struct device *dev)
+{
+	struct pci_dev *pdev = to_pci_dev(dev);
+	struct amd_mp2_dev *mp2 = pci_get_drvdata(pdev);
+	struct amdtp_cl_data *cl_data = mp2->cl_data;
+	int i, status;
+
+	for (i = 0; i < cl_data->num_hid_devices; i++) {
+		if (cl_data->sensor_idx[i] != HPD_IDX &&
+		    cl_data->sensor_sts[i] == SENSOR_ENABLED) {
+			mp2->mp2_ops->stop(mp2, cl_data->sensor_idx[i]);
+			status = amd_sfh_wait_for_response
+					(mp2, cl_data->sensor_idx[i], SENSOR_DISABLED);
+			if (status != SENSOR_ENABLED)
+				cl_data->sensor_sts[i] = SENSOR_DISABLED;
+			dev_dbg(dev, "suspend sid 0x%x status 0x%x\n",
+				cl_data->sensor_idx[i], cl_data->sensor_sts[i]);
+		}
+	}
+
+	return 0;
+}
+
+static SIMPLE_DEV_PM_OPS(amd_mp2_pm_ops, amd_mp2_pci_suspend,
+		amd_mp2_pci_resume);
+
 static const struct pci_device_id amd_mp2_pci_tbl[] = {
 	{ PCI_VDEVICE(AMD, PCI_DEVICE_ID_AMD_MP2) },
 	{ }
@@ -258,6 +326,7 @@ static struct pci_driver amd_mp2_pci_driver = {
 	.name		= DRIVER_NAME,
 	.id_table	= amd_mp2_pci_tbl,
 	.probe		= amd_mp2_pci_probe,
+	.driver.pm	= &amd_mp2_pm_ops,
 };
 module_pci_driver(amd_mp2_pci_driver);
 
diff --git a/drivers/hid/amd-sfh-hid/amd_sfh_pcie.h b/drivers/hid/amd-sfh-hid/amd_sfh_pcie.h
index 2d5c57e3782d..1ff6f83cb6fd 100644
--- a/drivers/hid/amd-sfh-hid/amd_sfh_pcie.h
+++ b/drivers/hid/amd-sfh-hid/amd_sfh_pcie.h
@@ -24,14 +24,20 @@
 #define AMD_C2P_MSG2	0x10508
 
 #define AMD_C2P_MSG(regno) (0x10500 + ((regno) * 4))
+#define AMD_P2C_MSG(regno) (0x10680 + ((regno) * 4))
 
 /* MP2 P2C Message Registers */
 #define AMD_P2C_MSG3	0x1068C /* Supported Sensors info */
 
 #define V2_STATUS	0x2
 
+#define SENSOR_ENABLED     4
+#define SENSOR_DISABLED    5
+
 #define HPD_IDX		16
 
+#define AMD_SFH_IDLE_LOOP	200
+
 /* SFH Command register */
 union sfh_cmd_base {
 	u32 ul;
@@ -51,6 +57,19 @@ union sfh_cmd_base {
 	} cmd_v2;
 };
 
+union cmd_response {
+	u32 resp;
+	struct {
+		u32 status	: 2;
+		u32 out_in_c2p	: 1;
+		u32 rsvd1	: 1;
+		u32 response	: 4;
+		u32 sub_cmd	: 8;
+		u32 sensor_id	: 6;
+		u32 rsvd2	: 10;
+	} response_v2;
+};
+
 union sfh_cmd_param {
 	u32 ul;
 	struct {
@@ -112,10 +131,14 @@ void amd_stop_all_sensors(struct amd_mp2_dev *privdata);
 int amd_mp2_get_sensor_num(struct amd_mp2_dev *privdata, u8 *sensor_id);
 int amd_sfh_hid_client_init(struct amd_mp2_dev *privdata);
 int amd_sfh_hid_client_deinit(struct amd_mp2_dev *privdata);
+u32 amd_sfh_wait_for_response(struct amd_mp2_dev *mp2, u8 sid, u32 sensor_sts);
+void amd_mp2_suspend(struct amd_mp2_dev *mp2);
+void amd_mp2_resume(struct amd_mp2_dev *mp2);
 
 struct amd_mp2_ops {
 	 void (*start)(struct amd_mp2_dev *privdata, struct amd_mp2_sensor_info info);
 	 void (*stop)(struct amd_mp2_dev *privdata, u16 sensor_idx);
 	 void (*stop_all)(struct amd_mp2_dev *privdata);
+	 int (*response)(struct amd_mp2_dev *mp2, u8 sid, u32 sensor_sts);
 };
 #endif
diff --git a/drivers/hid/hid-apple.c b/drivers/hid/hid-apple.c
index dc6bd4299c54..833fcf07ff35 100644
--- a/drivers/hid/hid-apple.c
+++ b/drivers/hid/hid-apple.c
@@ -187,6 +187,15 @@ static const struct apple_key_translation *apple_find_translation(
 	return NULL;
 }
 
+static void input_event_with_scancode(struct input_dev *input,
+		__u8 type, __u16 code, unsigned int hid, __s32 value)
+{
+	if (type == EV_KEY &&
+	    (!test_bit(code, input->key)) == value)
+		input_event(input, EV_MSC, MSC_SCAN, hid);
+	input_event(input, type, code, value);
+}
+
 static int hidinput_apple_event(struct hid_device *hid, struct input_dev *input,
 		struct hid_usage *usage, __s32 value)
 {
@@ -199,7 +208,8 @@ static int hidinput_apple_event(struct hid_device *hid, struct input_dev *input,
 
 	if (usage->code == fn_keycode) {
 		asc->fn_on = !!value;
-		input_event(input, usage->type, KEY_FN, value);
+		input_event_with_scancode(input, usage->type, KEY_FN,
+				usage->hid, value);
 		return 1;
 	}
 
@@ -240,7 +250,8 @@ static int hidinput_apple_event(struct hid_device *hid, struct input_dev *input,
 				code = do_translate ? trans->to : trans->from;
 			}
 
-			input_event(input, usage->type, code, value);
+			input_event_with_scancode(input, usage->type, code,
+					usage->hid, value);
 			return 1;
 		}
 
@@ -258,8 +269,8 @@ static int hidinput_apple_event(struct hid_device *hid, struct input_dev *input,
 					clear_bit(usage->code,
 							asc->pressed_numlock);
 
-				input_event(input, usage->type, trans->to,
-						value);
+				input_event_with_scancode(input, usage->type,
+						trans->to, usage->hid, value);
 			}
 
 			return 1;
@@ -270,7 +281,8 @@ static int hidinput_apple_event(struct hid_device *hid, struct input_dev *input,
 		if (hid->country == HID_COUNTRY_INTERNATIONAL_ISO) {
 			trans = apple_find_translation(apple_iso_keyboard, usage->code);
 			if (trans) {
-				input_event(input, usage->type, trans->to, value);
+				input_event_with_scancode(input, usage->type,
+						trans->to, usage->hid, value);
 				return 1;
 			}
 		}
@@ -279,7 +291,8 @@ static int hidinput_apple_event(struct hid_device *hid, struct input_dev *input,
 	if (swap_opt_cmd) {
 		trans = apple_find_translation(swapped_option_cmd_keys, usage->code);
 		if (trans) {
-			input_event(input, usage->type, trans->to, value);
+			input_event_with_scancode(input, usage->type,
+					trans->to, usage->hid, value);
 			return 1;
 		}
 	}
@@ -287,7 +300,8 @@ static int hidinput_apple_event(struct hid_device *hid, struct input_dev *input,
 	if (swap_fn_leftctrl) {
 		trans = apple_find_translation(swapped_fn_leftctrl_keys, usage->code);
 		if (trans) {
-			input_event(input, usage->type, trans->to, value);
+			input_event_with_scancode(input, usage->type,
+					trans->to, usage->hid, value);
 			return 1;
 		}
 	}
@@ -306,8 +320,8 @@ static int apple_event(struct hid_device *hdev, struct hid_field *field,
 
 	if ((asc->quirks & APPLE_INVERT_HWHEEL) &&
 			usage->code == REL_HWHEEL) {
-		input_event(field->hidinput->input, usage->type, usage->code,
-				-value);
+		input_event_with_scancode(field->hidinput->input, usage->type,
+				usage->code, usage->hid, -value);
 		return 1;
 	}
 
diff --git a/drivers/hid/hid-asus.c b/drivers/hid/hid-asus.c
index fb807c8e989b..f3ecddc519ee 100644
--- a/drivers/hid/hid-asus.c
+++ b/drivers/hid/hid-asus.c
@@ -82,6 +82,7 @@ MODULE_DESCRIPTION("Asus HID Keyboard and TouchPad");
 #define QUIRK_T90CHI			BIT(9)
 #define QUIRK_MEDION_E1239T		BIT(10)
 #define QUIRK_ROG_NKEY_KEYBOARD		BIT(11)
+#define QUIRK_ROG_CLAYMORE_II_KEYBOARD BIT(12)
 
 #define I2C_KEYBOARD_QUIRKS			(QUIRK_FIX_NOTEBOOK_REPORT | \
 						 QUIRK_NO_INIT_REPORTS | \
@@ -366,6 +367,17 @@ static int asus_raw_event(struct hid_device *hdev,
 
 	}
 
+	if (drvdata->quirks & QUIRK_ROG_CLAYMORE_II_KEYBOARD) {
+		/*
+		 * CLAYMORE II keyboard sends this packet when it goes to sleep
+		 * this causes the whole system to go into suspend.
+		*/
+
+		if(size == 2 && data[0] == 0x02 && data[1] == 0x00) {
+			return -1;
+		}
+	}
+
 	return 0;
 }
 
@@ -1226,6 +1238,9 @@ static const struct hid_device_id asus_devices[] = {
 	    USB_DEVICE_ID_ASUSTEK_ROG_NKEY_KEYBOARD2),
 	  QUIRK_USE_KBD_BACKLIGHT | QUIRK_ROG_NKEY_KEYBOARD },
 	{ HID_USB_DEVICE(USB_VENDOR_ID_ASUSTEK,
+	    USB_DEVICE_ID_ASUSTEK_ROG_CLAYMORE_II_KEYBOARD),
+	  QUIRK_ROG_CLAYMORE_II_KEYBOARD },
+	{ HID_USB_DEVICE(USB_VENDOR_ID_ASUSTEK,
 		USB_DEVICE_ID_ASUSTEK_T100TA_KEYBOARD),
 	  QUIRK_T100_KEYBOARD | QUIRK_NO_CONSUMER_USAGES },
 	{ HID_USB_DEVICE(USB_VENDOR_ID_ASUSTEK,
diff --git a/drivers/hid/hid-cmedia.c b/drivers/hid/hid-cmedia.c
index 3296c5050264..cab42047bc99 100644
--- a/drivers/hid/hid-cmedia.c
+++ b/drivers/hid/hid-cmedia.c
@@ -1,8 +1,10 @@
 // SPDX-License-Identifier: GPL-2.0-only
 /*
  * HID driver for CMedia CM6533 audio jack controls
+ * and HS100B mute buttons
  *
  * Copyright (C) 2015 Ben Chen <ben_chen@bizlinktech.com>
+ * Copyright (C) 2021 Thomas Weißschuh <linux@weissschuh.net>
  */
 
 #include <linux/device.h>
@@ -11,13 +13,53 @@
 #include "hid-ids.h"
 
 MODULE_AUTHOR("Ben Chen");
-MODULE_DESCRIPTION("CM6533 HID jack controls");
+MODULE_AUTHOR("Thomas Weißschuh");
+MODULE_DESCRIPTION("CM6533 HID jack controls and HS100B mute button");
 MODULE_LICENSE("GPL");
 
 #define CM6533_JD_TYPE_COUNT      1
 #define CM6533_JD_RAWEV_LEN	 16
 #define CM6533_JD_SFX_OFFSET	  8
 
+#define HS100B_RDESC_ORIG_SIZE   60
+
+/* Fixed report descriptor of HS-100B audio chip
+ * Bit 4 is an abolute Microphone mute usage instead of being unassigned.
+ */
+static __u8 hs100b_rdesc_fixed[] = {
+	0x05, 0x0C,         /*  Usage Page (Consumer),          */
+	0x09, 0x01,         /*  Usage (Consumer Control),       */
+	0xA1, 0x01,         /*  Collection (Application),       */
+	0x15, 0x00,         /*      Logical Minimum (0),        */
+	0x25, 0x01,         /*      Logical Maximum (1),        */
+	0x09, 0xE9,         /*      Usage (Volume Inc),         */
+	0x09, 0xEA,         /*      Usage (Volume Dec),         */
+	0x75, 0x01,         /*      Report Size (1),            */
+	0x95, 0x02,         /*      Report Count (2),           */
+	0x81, 0x02,         /*      Input (Variable),           */
+	0x09, 0xE2,         /*      Usage (Mute),               */
+	0x95, 0x01,         /*      Report Count (1),           */
+	0x81, 0x06,         /*      Input (Variable, Relative), */
+	0x05, 0x0B,         /*      Usage Page (Telephony),     */
+	0x09, 0x2F,         /*      Usage (2Fh),                */
+	0x81, 0x02,         /*      Input (Variable),           */
+	0x09, 0x20,         /*      Usage (20h),                */
+	0x81, 0x06,         /*      Input (Variable, Relative), */
+	0x05, 0x0C,         /*      Usage Page (Consumer),      */
+	0x09, 0x00,         /*      Usage (00h),                */
+	0x95, 0x03,         /*      Report Count (3),           */
+	0x81, 0x02,         /*      Input (Variable),           */
+	0x26, 0xFF, 0x00,   /*      Logical Maximum (255),      */
+	0x09, 0x00,         /*      Usage (00h),                */
+	0x75, 0x08,         /*      Report Size (8),            */
+	0x95, 0x03,         /*      Report Count (3),           */
+	0x81, 0x02,         /*      Input (Variable),           */
+	0x09, 0x00,         /*      Usage (00h),                */
+	0x95, 0x04,         /*      Report Count (4),           */
+	0x91, 0x02,         /*      Output (Variable),          */
+	0xC0                /*  End Collection                  */
+};
+
 /*
 *
 *CM6533 audio jack HID raw events:
@@ -156,5 +198,49 @@ static struct hid_driver cmhid_driver = {
 	.remove = cmhid_remove,
 	.input_mapping = cmhid_input_mapping,
 };
-module_hid_driver(cmhid_driver);
 
+static __u8 *cmhid_hs100b_report_fixup(struct hid_device *hid, __u8 *rdesc,
+				       unsigned int *rsize)
+{
+	if (*rsize == HS100B_RDESC_ORIG_SIZE) {
+		hid_info(hid, "Fixing CMedia HS-100B report descriptor\n");
+		rdesc = hs100b_rdesc_fixed;
+		*rsize = sizeof(hs100b_rdesc_fixed);
+	}
+	return rdesc;
+}
+
+static const struct hid_device_id cmhid_hs100b_devices[] = {
+	{ HID_USB_DEVICE(USB_VENDOR_ID_CMEDIA, USB_DEVICE_ID_CMEDIA_HS100B) },
+	{ }
+};
+MODULE_DEVICE_TABLE(hid, cmhid_hs100b_devices);
+
+static struct hid_driver cmhid_hs100b_driver = {
+	.name = "cmedia_hs100b",
+	.id_table = cmhid_hs100b_devices,
+	.report_fixup = cmhid_hs100b_report_fixup,
+};
+
+static int cmedia_init(void)
+{
+	int ret;
+
+	ret = hid_register_driver(&cmhid_driver);
+	if (ret)
+		return ret;
+
+	ret = hid_register_driver(&cmhid_hs100b_driver);
+	if (ret)
+		hid_unregister_driver(&cmhid_driver);
+
+	return ret;
+}
+module_init(cmedia_init);
+
+static void cmedia_exit(void)
+{
+		hid_unregister_driver(&cmhid_driver);
+		hid_unregister_driver(&cmhid_hs100b_driver);
+}
+module_exit(cmedia_exit);
diff --git a/drivers/hid/hid-elo.c b/drivers/hid/hid-elo.c
index 0d22713a3874..383dfda8c12f 100644
--- a/drivers/hid/hid-elo.c
+++ b/drivers/hid/hid-elo.c
@@ -228,13 +228,15 @@ static int elo_probe(struct hid_device *hdev, const struct hid_device_id *id)
 {
 	struct elo_priv *priv;
 	int ret;
+	struct usb_device *udev;
 
 	priv = kzalloc(sizeof(*priv), GFP_KERNEL);
 	if (!priv)
 		return -ENOMEM;
 
 	INIT_DELAYED_WORK(&priv->work, elo_work);
-	priv->usbdev = interface_to_usbdev(to_usb_interface(hdev->dev.parent));
+	udev = interface_to_usbdev(to_usb_interface(hdev->dev.parent));
+	priv->usbdev = usb_get_dev(udev);
 
 	hid_set_drvdata(hdev, priv);
 
@@ -265,6 +267,8 @@ static void elo_remove(struct hid_device *hdev)
 {
 	struct elo_priv *priv = hid_get_drvdata(hdev);
 
+	usb_put_dev(priv->usbdev);
+
 	hid_hw_stop(hdev);
 	cancel_delayed_work_sync(&priv->work);
 	kfree(priv);
diff --git a/drivers/hid/hid-ids.h b/drivers/hid/hid-ids.h
index 8f1893e68112..29564b370341 100644
--- a/drivers/hid/hid-ids.h
+++ b/drivers/hid/hid-ids.h
@@ -41,9 +41,6 @@
 #define USB_VENDOR_ID_ACTIONSTAR	0x2101
 #define USB_DEVICE_ID_ACTIONSTAR_1011	0x1011
 
-#define USB_VENDOR_ID_ACTIVISION	0x1430
-#define USB_DEVICE_ID_ACTIVISION_GUITAR_DONGLE	0x474c
-
 #define USB_VENDOR_ID_ADS_TECH		0x06e1
 #define USB_DEVICE_ID_ADS_TECH_RADIO_SI470X	0xa155
 
@@ -197,6 +194,7 @@
 #define USB_DEVICE_ID_ASUSTEK_ROG_KEYBOARD3 0x1822
 #define USB_DEVICE_ID_ASUSTEK_ROG_NKEY_KEYBOARD	0x1866
 #define USB_DEVICE_ID_ASUSTEK_ROG_NKEY_KEYBOARD2	0x19b6
+#define USB_DEVICE_ID_ASUSTEK_ROG_CLAYMORE_II_KEYBOARD	0x196b
 #define USB_DEVICE_ID_ASUSTEK_FX503VD_KEYBOARD	0x1869
 
 #define USB_VENDOR_ID_ATEN		0x0557
@@ -292,6 +290,7 @@
 
 #define USB_VENDOR_ID_CMEDIA		0x0d8c
 #define USB_DEVICE_ID_CM109		0x000e
+#define USB_DEVICE_ID_CMEDIA_HS100B	0x0014
 #define USB_DEVICE_ID_CM6533		0x0022
 
 #define USB_VENDOR_ID_CODEMERCS		0x07c0
@@ -1018,6 +1017,10 @@
 #define USB_VENDOR_ID_REALTEK		0x0bda
 #define USB_DEVICE_ID_REALTEK_READER	0x0152
 
+#define USB_VENDOR_ID_REDOCTANE		0x1430
+#define USB_DEVICE_ID_REDOCTANE_GUITAR_DONGLE	0x474c
+#define USB_DEVICE_ID_REDOCTANE_PS4_GHLIVE_DONGLE	0x07bb
+
 #define USB_VENDOR_ID_RETROUSB		0xf000
 #define USB_DEVICE_ID_RETROUSB_SNES_RETROPAD	0x0003
 #define USB_DEVICE_ID_RETROUSB_SNES_RETROPORT	0x00f1
diff --git a/drivers/hid/hid-input.c b/drivers/hid/hid-input.c
index 4286a51f7f16..4b5ebeacd283 100644
--- a/drivers/hid/hid-input.c
+++ b/drivers/hid/hid-input.c
@@ -419,8 +419,6 @@ static int hidinput_get_battery_property(struct power_supply *psy,
 
 		if (dev->battery_status == HID_BATTERY_UNKNOWN)
 			val->intval = POWER_SUPPLY_STATUS_UNKNOWN;
-		else if (dev->battery_capacity == 100)
-			val->intval = POWER_SUPPLY_STATUS_FULL;
 		else
 			val->intval = POWER_SUPPLY_STATUS_DISCHARGING;
 		break;
diff --git a/drivers/hid/hid-logitech-hidpp.c b/drivers/hid/hid-logitech-hidpp.c
index 61635e629469..81de88ab2ecc 100644
--- a/drivers/hid/hid-logitech-hidpp.c
+++ b/drivers/hid/hid-logitech-hidpp.c
@@ -1331,6 +1331,43 @@ static int hidpp20_battery_get_battery_voltage(struct hidpp_device *hidpp,
 	return 0;
 }
 
+static int hidpp20_map_battery_capacity(struct hid_device *hid_dev, int voltage)
+{
+	/* NB: This voltage curve doesn't necessarily map perfectly to all
+	 * devices that implement the BATTERY_VOLTAGE feature. This is because
+	 * there are a few devices that use different battery technology.
+	 */
+
+	static const int voltages[] = {
+		4186, 4156, 4143, 4133, 4122, 4113, 4103, 4094, 4086, 4075,
+		4067, 4059, 4051, 4043, 4035, 4027, 4019, 4011, 4003, 3997,
+		3989, 3983, 3976, 3969, 3961, 3955, 3949, 3942, 3935, 3929,
+		3922, 3916, 3909, 3902, 3896, 3890, 3883, 3877, 3870, 3865,
+		3859, 3853, 3848, 3842, 3837, 3833, 3828, 3824, 3819, 3815,
+		3811, 3808, 3804, 3800, 3797, 3793, 3790, 3787, 3784, 3781,
+		3778, 3775, 3772, 3770, 3767, 3764, 3762, 3759, 3757, 3754,
+		3751, 3748, 3744, 3741, 3737, 3734, 3730, 3726, 3724, 3720,
+		3717, 3714, 3710, 3706, 3702, 3697, 3693, 3688, 3683, 3677,
+		3671, 3666, 3662, 3658, 3654, 3646, 3633, 3612, 3579, 3537
+	};
+
+	int i;
+
+	BUILD_BUG_ON(ARRAY_SIZE(voltages) != 100);
+
+	if (unlikely(voltage < 3500 || voltage >= 5000))
+		hid_warn_once(hid_dev,
+			      "%s: possibly using the wrong voltage curve\n",
+			      __func__);
+
+	for (i = 0; i < ARRAY_SIZE(voltages); i++) {
+		if (voltage >= voltages[i])
+			return ARRAY_SIZE(voltages) - i;
+	}
+
+	return 0;
+}
+
 static int hidpp20_query_battery_voltage_info(struct hidpp_device *hidpp)
 {
 	u8 feature_type;
@@ -1354,6 +1391,8 @@ static int hidpp20_query_battery_voltage_info(struct hidpp_device *hidpp)
 
 	hidpp->battery.status = status;
 	hidpp->battery.voltage = voltage;
+	hidpp->battery.capacity = hidpp20_map_battery_capacity(hidpp->hid_dev,
+							       voltage);
 	hidpp->battery.level = level;
 	hidpp->battery.charge_type = charge_type;
 	hidpp->battery.online = status != POWER_SUPPLY_STATUS_NOT_CHARGING;
@@ -1378,6 +1417,8 @@ static int hidpp20_battery_voltage_event(struct hidpp_device *hidpp,
 
 	if (voltage != hidpp->battery.voltage || status != hidpp->battery.status) {
 		hidpp->battery.voltage = voltage;
+		hidpp->battery.capacity = hidpp20_map_battery_capacity(hidpp->hid_dev,
+								       voltage);
 		hidpp->battery.status = status;
 		hidpp->battery.level = level;
 		hidpp->battery.charge_type = charge_type;
@@ -2240,11 +2281,10 @@ static int hidpp_ff_queue_work(struct hidpp_ff_private_data *data, int effect_id
 	wd->size = size;
 	memcpy(wd->params, params, size);
 
-	atomic_inc(&data->workqueue_size);
+	s = atomic_inc_return(&data->workqueue_size);
 	queue_work(data->wq, &wd->work);
 
 	/* warn about excessive queue size */
-	s = atomic_read(&data->workqueue_size);
 	if (s >= 20 && s % 20 == 0)
 		hid_warn(data->hidpp->hid_dev, "Force feedback command queue contains %d commands, causing substantial delays!", s);
 
@@ -3717,7 +3757,8 @@ static int hidpp_initialize_battery(struct hidpp_device *hidpp)
 	num_battery_props = ARRAY_SIZE(hidpp_battery_props) - 3;
 
 	if (hidpp->capabilities & HIDPP_CAPABILITY_BATTERY_MILEAGE ||
-	    hidpp->capabilities & HIDPP_CAPABILITY_BATTERY_PERCENTAGE)
+	    hidpp->capabilities & HIDPP_CAPABILITY_BATTERY_PERCENTAGE ||
+	    hidpp->capabilities & HIDPP_CAPABILITY_BATTERY_VOLTAGE)
 		battery_props[num_battery_props++] =
 				POWER_SUPPLY_PROP_CAPACITY;
 
diff --git a/drivers/hid/hid-magicmouse.c b/drivers/hid/hid-magicmouse.c
index 8bcaee4ccae0..686788ebf3e1 100644
--- a/drivers/hid/hid-magicmouse.c
+++ b/drivers/hid/hid-magicmouse.c
@@ -68,6 +68,10 @@ MODULE_PARM_DESC(report_undeciphered, "Report undeciphered multi-touch state fie
 #define TOUCH_STATE_START 0x30
 #define TOUCH_STATE_DRAG  0x40
 
+/* Number of high-resolution events for each low-resolution detent. */
+#define SCROLL_HR_STEPS 10
+#define SCROLL_HR_MULT (120 / SCROLL_HR_STEPS)
+#define SCROLL_HR_THRESHOLD 90 /* units */
 #define SCROLL_ACCEL_DEFAULT 7
 
 /* Touch surface information. Dimension is in hundredths of a mm, min and max
@@ -126,7 +130,11 @@ struct magicmouse_sc {
 		short y;
 		short scroll_x;
 		short scroll_y;
+		short scroll_x_hr;
+		short scroll_y_hr;
 		u8 size;
+		bool scroll_x_active;
+		bool scroll_y_active;
 	} touches[16];
 	int tracking_ids[16];
 
@@ -248,12 +256,20 @@ static void magicmouse_emit_touch(struct magicmouse_sc *msc, int raw_id, u8 *tda
 		unsigned long now = jiffies;
 		int step_x = msc->touches[id].scroll_x - x;
 		int step_y = msc->touches[id].scroll_y - y;
+		int step_hr = ((64 - (int)scroll_speed) * msc->scroll_accel) /
+			      SCROLL_HR_STEPS;
+		int step_x_hr = msc->touches[id].scroll_x_hr - x;
+		int step_y_hr = msc->touches[id].scroll_y_hr - y;
 
 		/* Calculate and apply the scroll motion. */
 		switch (state) {
 		case TOUCH_STATE_START:
 			msc->touches[id].scroll_x = x;
 			msc->touches[id].scroll_y = y;
+			msc->touches[id].scroll_x_hr = x;
+			msc->touches[id].scroll_y_hr = y;
+			msc->touches[id].scroll_x_active = false;
+			msc->touches[id].scroll_y_active = false;
 
 			/* Reset acceleration after half a second. */
 			if (scroll_acceleration && time_before(now,
@@ -280,6 +296,40 @@ static void magicmouse_emit_touch(struct magicmouse_sc *msc, int raw_id, u8 *tda
 				msc->scroll_jiffies = now;
 				input_report_rel(input, REL_WHEEL, step_y);
 			}
+
+			if (!msc->touches[id].scroll_x_active &&
+			    abs(step_x_hr) > SCROLL_HR_THRESHOLD) {
+				msc->touches[id].scroll_x_active = true;
+				msc->touches[id].scroll_x_hr = x;
+				step_x_hr = 0;
+			}
+
+			step_x_hr /= step_hr;
+			if (step_x_hr != 0 &&
+			    msc->touches[id].scroll_x_active) {
+				msc->touches[id].scroll_x_hr -= step_x_hr *
+					step_hr;
+				input_report_rel(input,
+						 REL_HWHEEL_HI_RES,
+						 -step_x_hr * SCROLL_HR_MULT);
+			}
+
+			if (!msc->touches[id].scroll_y_active &&
+			    abs(step_y_hr) > SCROLL_HR_THRESHOLD) {
+				msc->touches[id].scroll_y_active = true;
+				msc->touches[id].scroll_y_hr = y;
+				step_y_hr = 0;
+			}
+
+			step_y_hr /= step_hr;
+			if (step_y_hr != 0 &&
+			    msc->touches[id].scroll_y_active) {
+				msc->touches[id].scroll_y_hr -= step_y_hr *
+					step_hr;
+				input_report_rel(input,
+						 REL_WHEEL_HI_RES,
+						 step_y_hr * SCROLL_HR_MULT);
+			}
 			break;
 		}
 	}
@@ -481,6 +531,8 @@ static int magicmouse_setup_input(struct input_dev *input, struct hid_device *hd
 		if (emulate_scroll_wheel) {
 			__set_bit(REL_WHEEL, input->relbit);
 			__set_bit(REL_HWHEEL, input->relbit);
+			__set_bit(REL_WHEEL_HI_RES, input->relbit);
+			__set_bit(REL_HWHEEL_HI_RES, input->relbit);
 		}
 	} else if (input->id.product == USB_DEVICE_ID_APPLE_MAGICTRACKPAD2) {
 		/* setting the device name to ensure the same driver settings
diff --git a/drivers/hid/hid-quirks.c b/drivers/hid/hid-quirks.c
index 51b39bda9a9d..2e104682c22b 100644
--- a/drivers/hid/hid-quirks.c
+++ b/drivers/hid/hid-quirks.c
@@ -662,8 +662,6 @@ static const struct hid_device_id hid_have_special_driver[] = {
 	{ HID_USB_DEVICE(USB_VENDOR_ID_THRUSTMASTER, 0xb653) },
 	{ HID_USB_DEVICE(USB_VENDOR_ID_THRUSTMASTER, 0xb654) },
 	{ HID_USB_DEVICE(USB_VENDOR_ID_THRUSTMASTER, 0xb65a) },
-#endif
-#if IS_ENABLED(CONFIG_HID_TMINIT)
 	{ HID_USB_DEVICE(USB_VENDOR_ID_THRUSTMASTER, 0xb65d) },
 #endif
 #if IS_ENABLED(CONFIG_HID_TIVO)
diff --git a/drivers/hid/hid-sony.c b/drivers/hid/hid-sony.c
index b3722c51ec78..d1b107d547f5 100644
--- a/drivers/hid/hid-sony.c
+++ b/drivers/hid/hid-sony.c
@@ -11,8 +11,9 @@
  *  Copyright (c) 2013 Colin Leitner <colin.leitner@gmail.com>
  *  Copyright (c) 2014-2016 Frank Praznik <frank.praznik@gmail.com>
  *  Copyright (c) 2018 Todd Kelner
- *  Copyright (c) 2020 Pascal Giard <pascal.giard@etsmtl.ca>
+ *  Copyright (c) 2020-2021 Pascal Giard <pascal.giard@etsmtl.ca>
  *  Copyright (c) 2020 Sanjay Govind <sanjay.govind9@gmail.com>
+ *  Copyright (c) 2021 Daniel Nguyen <daniel.nguyen.1@ens.etsmtl.ca>
  */
 
 /*
@@ -62,6 +63,7 @@
 #define SHANWAN_GAMEPAD           BIT(16)
 #define GH_GUITAR_CONTROLLER      BIT(17)
 #define GHL_GUITAR_PS3WIIU        BIT(18)
+#define GHL_GUITAR_PS4            BIT(19)
 
 #define SIXAXIS_CONTROLLER (SIXAXIS_CONTROLLER_USB | SIXAXIS_CONTROLLER_BT)
 #define MOTION_CONTROLLER (MOTION_CONTROLLER_USB | MOTION_CONTROLLER_BT)
@@ -85,18 +87,27 @@
 #define NSG_MRXU_MAX_X 1667
 #define NSG_MRXU_MAX_Y 1868
 
-#define GHL_GUITAR_POKE_INTERVAL 10 /* In seconds */
+/* The PS3/Wii U dongles require a poke every 10 seconds, but the PS4
+ * requires one every 8 seconds. Using 8 seconds for all for simplicity.
+ */
+#define GHL_GUITAR_POKE_INTERVAL 8 /* In seconds */
 #define GUITAR_TILT_USAGE 44
 
-/* Magic value and data taken from GHLtarUtility:
+/* Magic data taken from GHLtarUtility:
  * https://github.com/ghlre/GHLtarUtility/blob/master/PS3Guitar.cs
  * Note: The Wii U and PS3 dongles happen to share the same!
  */
-static const u16 ghl_ps3wiiu_magic_value = 0x201;
 static const char ghl_ps3wiiu_magic_data[] = {
 	0x02, 0x08, 0x20, 0x00, 0x00, 0x00, 0x00, 0x00
 };
 
+/* Magic data for the PS4 dongles sniffed with a USB protocol
+ * analyzer.
+ */
+static const char ghl_ps4_magic_data[] = {
+	0x30, 0x02, 0x08, 0x0A, 0x00, 0x00, 0x00, 0x00, 0x00
+};
+
 /* PS/3 Motion controller */
 static u8 motion_rdesc[] = {
 	0x05, 0x01,         /*  Usage Page (Desktop),               */
@@ -642,14 +653,14 @@ static void ghl_magic_poke(struct timer_list *t)
 		hid_err(sc->hdev, "usb_submit_urb failed: %d", ret);
 }
 
-static int ghl_init_urb(struct sony_sc *sc, struct usb_device *usbdev)
+static int ghl_init_urb(struct sony_sc *sc, struct usb_device *usbdev,
+					   const char ghl_magic_data[], u16 poke_size)
 {
 	struct usb_ctrlrequest *cr;
-	u16 poke_size;
 	u8 *databuf;
 	unsigned int pipe;
+	u16 ghl_magic_value = (((HID_OUTPUT_REPORT + 1) << 8) | ghl_magic_data[0]);
 
-	poke_size = ARRAY_SIZE(ghl_ps3wiiu_magic_data);
 	pipe = usb_sndctrlpipe(usbdev, 0);
 
 	cr = devm_kzalloc(&sc->hdev->dev, sizeof(*cr), GFP_ATOMIC);
@@ -663,10 +674,10 @@ static int ghl_init_urb(struct sony_sc *sc, struct usb_device *usbdev)
 	cr->bRequestType =
 		USB_RECIP_INTERFACE | USB_TYPE_CLASS | USB_DIR_OUT;
 	cr->bRequest = USB_REQ_SET_CONFIGURATION;
-	cr->wValue = cpu_to_le16(ghl_ps3wiiu_magic_value);
+	cr->wValue = cpu_to_le16(ghl_magic_value);
 	cr->wIndex = 0;
 	cr->wLength = cpu_to_le16(poke_size);
-	memcpy(databuf, ghl_ps3wiiu_magic_data, poke_size);
+	memcpy(databuf, ghl_magic_data, poke_size);
 	usb_fill_control_urb(
 		sc->ghl_urb, usbdev, pipe,
 		(unsigned char *) cr, databuf, poke_size,
@@ -2974,7 +2985,8 @@ static int sony_probe(struct hid_device *hdev, const struct hid_device_id *id)
 	if (!strcmp(hdev->name, "FutureMax Dance Mat"))
 		quirks |= FUTUREMAX_DANCE_MAT;
 
-	if (!strcmp(hdev->name, "SHANWAN PS3 GamePad"))
+	if (!strcmp(hdev->name, "SHANWAN PS3 GamePad") ||
+	    !strcmp(hdev->name, "ShanWan PS(R) Ga`epad"))
 		quirks |= SHANWAN_GAMEPAD;
 
 	sc = devm_kzalloc(&hdev->dev, sizeof(*sc), GFP_KERNEL);
@@ -3030,11 +3042,17 @@ static int sony_probe(struct hid_device *hdev, const struct hid_device_id *id)
 		return -ENODEV;
 	}
 
-	if (sc->quirks & GHL_GUITAR_PS3WIIU) {
+	if (sc->quirks & (GHL_GUITAR_PS3WIIU | GHL_GUITAR_PS4)) {
 		sc->ghl_urb = usb_alloc_urb(0, GFP_ATOMIC);
 		if (!sc->ghl_urb)
 			return -ENOMEM;
-		ret = ghl_init_urb(sc, usbdev);
+
+		if (sc->quirks & GHL_GUITAR_PS3WIIU)
+			ret = ghl_init_urb(sc, usbdev, ghl_ps3wiiu_magic_data,
+							   ARRAY_SIZE(ghl_ps3wiiu_magic_data));
+		else if (sc->quirks & GHL_GUITAR_PS4)
+			ret = ghl_init_urb(sc, usbdev, ghl_ps4_magic_data,
+							   ARRAY_SIZE(ghl_ps4_magic_data));
 		if (ret) {
 			hid_err(hdev, "error preparing URB\n");
 			return ret;
@@ -3052,7 +3070,7 @@ static void sony_remove(struct hid_device *hdev)
 {
 	struct sony_sc *sc = hid_get_drvdata(hdev);
 
-	if (sc->quirks & GHL_GUITAR_PS3WIIU) {
+	if (sc->quirks & (GHL_GUITAR_PS3WIIU | GHL_GUITAR_PS4)) {
 		del_timer_sync(&sc->ghl_poke_timer);
 		usb_free_urb(sc->ghl_urb);
 	}
@@ -3172,11 +3190,14 @@ static const struct hid_device_id sony_devices[] = {
 	{ HID_USB_DEVICE(USB_VENDOR_ID_SONY_RHYTHM, USB_DEVICE_ID_SONY_PS3WIIU_GHLIVE_DONGLE),
 		.driver_data = GHL_GUITAR_PS3WIIU | GH_GUITAR_CONTROLLER },
 	/* Guitar Hero PC Guitar Dongle */
-	{ HID_USB_DEVICE(USB_VENDOR_ID_ACTIVISION, USB_DEVICE_ID_ACTIVISION_GUITAR_DONGLE),
+	{ HID_USB_DEVICE(USB_VENDOR_ID_REDOCTANE, USB_DEVICE_ID_REDOCTANE_GUITAR_DONGLE),
 		.driver_data = GH_GUITAR_CONTROLLER },
 	/* Guitar Hero PS3 World Tour Guitar Dongle */
 	{ HID_USB_DEVICE(USB_VENDOR_ID_SONY_RHYTHM, USB_DEVICE_ID_SONY_PS3_GUITAR_DONGLE),
 		.driver_data = GH_GUITAR_CONTROLLER },
+	/* Guitar Hero Live PS4 guitar dongles */
+	{ HID_USB_DEVICE(USB_VENDOR_ID_REDOCTANE, USB_DEVICE_ID_REDOCTANE_PS4_GHLIVE_DONGLE),
+		.driver_data = GHL_GUITAR_PS4 | GH_GUITAR_CONTROLLER },
 	{ }
 };
 MODULE_DEVICE_TABLE(hid, sony_devices);
diff --git a/drivers/hid/hid-thrustmaster.c b/drivers/hid/hid-thrustmaster.c
index cdc7d82ae9ed..d44550aa8805 100644
--- a/drivers/hid/hid-thrustmaster.c
+++ b/drivers/hid/hid-thrustmaster.c
@@ -173,6 +173,7 @@ static void thrustmaster_interrupts(struct hid_device *hdev)
 
 		if (ret) {
 			hid_err(hdev, "setup data couldn't be sent\n");
+			kfree(send_buf);
 			return;
 		}
 	}
@@ -253,6 +254,7 @@ static void thrustmaster_remove(struct hid_device *hdev)
 
 	usb_kill_urb(tm_wheel->urb);
 
+	kfree(tm_wheel->change_request);
 	kfree(tm_wheel->response);
 	kfree(tm_wheel->model_request);
 	usb_free_urb(tm_wheel->urb);
@@ -336,11 +338,14 @@ static int thrustmaster_probe(struct hid_device *hdev, const struct hid_device_i
 	);
 
 	ret = usb_submit_urb(tm_wheel->urb, GFP_ATOMIC);
-	if (ret)
+	if (ret) {
 		hid_err(hdev, "Error %d while submitting the URB. I am unable to initialize this wheel...\n", ret);
+		goto error6;
+	}
 
 	return ret;
 
+error6: kfree(tm_wheel->change_request);
 error5: kfree(tm_wheel->response);
 error4: kfree(tm_wheel->model_request);
 error3: usb_free_urb(tm_wheel->urb);
diff --git a/drivers/hid/i2c-hid/i2c-hid-core.c b/drivers/hid/i2c-hid/i2c-hid-core.c
index 46474612e73c..517141138b00 100644
--- a/drivers/hid/i2c-hid/i2c-hid-core.c
+++ b/drivers/hid/i2c-hid/i2c-hid-core.c
@@ -171,8 +171,6 @@ static const struct i2c_hid_quirks {
 		I2C_HID_QUIRK_NO_IRQ_AFTER_RESET },
 	{ I2C_VENDOR_ID_RAYDIUM, I2C_PRODUCT_ID_RAYDIUM_3118,
 		I2C_HID_QUIRK_NO_IRQ_AFTER_RESET },
-	{ USB_VENDOR_ID_ELAN, HID_ANY_ID,
-		 I2C_HID_QUIRK_BOGUS_IRQ },
 	{ USB_VENDOR_ID_ALPS_JP, HID_ANY_ID,
 		 I2C_HID_QUIRK_RESET_ON_RESUME },
 	{ I2C_VENDOR_ID_SYNAPTICS, I2C_PRODUCT_ID_SYNAPTICS_SYNA2393,
@@ -183,7 +181,8 @@ static const struct i2c_hid_quirks {
 	 * Sending the wakeup after reset actually break ELAN touchscreen controller
 	 */
 	{ USB_VENDOR_ID_ELAN, HID_ANY_ID,
-		 I2C_HID_QUIRK_NO_WAKEUP_AFTER_RESET },
+		 I2C_HID_QUIRK_NO_WAKEUP_AFTER_RESET |
+		 I2C_HID_QUIRK_BOGUS_IRQ },
 	{ 0, 0 }
 };
 
diff --git a/drivers/hid/i2c-hid/i2c-hid-of-goodix.c b/drivers/hid/i2c-hid/i2c-hid-of-goodix.c
index ee0225982a82..52674149a275 100644
--- a/drivers/hid/i2c-hid/i2c-hid-of-goodix.c
+++ b/drivers/hid/i2c-hid/i2c-hid-of-goodix.c
@@ -26,28 +26,29 @@ struct i2c_hid_of_goodix {
 	struct i2chid_ops ops;
 
 	struct regulator *vdd;
+	struct notifier_block nb;
+	struct mutex regulator_mutex;
 	struct gpio_desc *reset_gpio;
 	const struct goodix_i2c_hid_timing_data *timings;
 };
 
-static int goodix_i2c_hid_power_up(struct i2chid_ops *ops)
+static void goodix_i2c_hid_deassert_reset(struct i2c_hid_of_goodix *ihid_goodix,
+					  bool regulator_just_turned_on)
 {
-	struct i2c_hid_of_goodix *ihid_goodix =
-		container_of(ops, struct i2c_hid_of_goodix, ops);
-	int ret;
-
-	ret = regulator_enable(ihid_goodix->vdd);
-	if (ret)
-		return ret;
-
-	if (ihid_goodix->timings->post_power_delay_ms)
+	if (regulator_just_turned_on && ihid_goodix->timings->post_power_delay_ms)
 		msleep(ihid_goodix->timings->post_power_delay_ms);
 
 	gpiod_set_value_cansleep(ihid_goodix->reset_gpio, 0);
 	if (ihid_goodix->timings->post_gpio_reset_delay_ms)
 		msleep(ihid_goodix->timings->post_gpio_reset_delay_ms);
+}
 
-	return 0;
+static int goodix_i2c_hid_power_up(struct i2chid_ops *ops)
+{
+	struct i2c_hid_of_goodix *ihid_goodix =
+		container_of(ops, struct i2c_hid_of_goodix, ops);
+
+	return regulator_enable(ihid_goodix->vdd);
 }
 
 static void goodix_i2c_hid_power_down(struct i2chid_ops *ops)
@@ -55,20 +56,54 @@ static void goodix_i2c_hid_power_down(struct i2chid_ops *ops)
 	struct i2c_hid_of_goodix *ihid_goodix =
 		container_of(ops, struct i2c_hid_of_goodix, ops);
 
-	gpiod_set_value_cansleep(ihid_goodix->reset_gpio, 1);
 	regulator_disable(ihid_goodix->vdd);
 }
 
+static int ihid_goodix_vdd_notify(struct notifier_block *nb,
+				    unsigned long event,
+				    void *ignored)
+{
+	struct i2c_hid_of_goodix *ihid_goodix =
+		container_of(nb, struct i2c_hid_of_goodix, nb);
+	int ret = NOTIFY_OK;
+
+	mutex_lock(&ihid_goodix->regulator_mutex);
+
+	switch (event) {
+	case REGULATOR_EVENT_PRE_DISABLE:
+		gpiod_set_value_cansleep(ihid_goodix->reset_gpio, 1);
+		break;
+
+	case REGULATOR_EVENT_ENABLE:
+		goodix_i2c_hid_deassert_reset(ihid_goodix, true);
+		break;
+
+	case REGULATOR_EVENT_ABORT_DISABLE:
+		goodix_i2c_hid_deassert_reset(ihid_goodix, false);
+		break;
+
+	default:
+		ret = NOTIFY_DONE;
+		break;
+	}
+
+	mutex_unlock(&ihid_goodix->regulator_mutex);
+
+	return ret;
+}
+
 static int i2c_hid_of_goodix_probe(struct i2c_client *client,
 				   const struct i2c_device_id *id)
 {
 	struct i2c_hid_of_goodix *ihid_goodix;
-
+	int ret;
 	ihid_goodix = devm_kzalloc(&client->dev, sizeof(*ihid_goodix),
 				   GFP_KERNEL);
 	if (!ihid_goodix)
 		return -ENOMEM;
 
+	mutex_init(&ihid_goodix->regulator_mutex);
+
 	ihid_goodix->ops.power_up = goodix_i2c_hid_power_up;
 	ihid_goodix->ops.power_down = goodix_i2c_hid_power_down;
 
@@ -84,6 +119,37 @@ static int i2c_hid_of_goodix_probe(struct i2c_client *client,
 
 	ihid_goodix->timings = device_get_match_data(&client->dev);
 
+	/*
+	 * We need to control the "reset" line in lockstep with the regulator
+	 * actually turning on an off instead of just when we make the request.
+	 * This matters if the regulator is shared with another consumer.
+	 * - If the regulator is off then we must assert reset. The reset
+	 *   line is active low and on some boards it could cause a current
+	 *   leak if left high.
+	 * - If the regulator is on then we don't want reset asserted for very
+	 *   long. Holding the controller in reset apparently draws extra
+	 *   power.
+	 */
+	mutex_lock(&ihid_goodix->regulator_mutex);
+	ihid_goodix->nb.notifier_call = ihid_goodix_vdd_notify;
+	ret = devm_regulator_register_notifier(ihid_goodix->vdd, &ihid_goodix->nb);
+	if (ret) {
+		mutex_unlock(&ihid_goodix->regulator_mutex);
+		return dev_err_probe(&client->dev, ret,
+			"regulator notifier request failed\n");
+	}
+
+	/*
+	 * If someone else is holding the regulator on (or the regulator is
+	 * an always-on one) we might never be told to deassert reset. Do it
+	 * now. Here we'll assume that someone else might have _just
+	 * barely_ turned the regulator on so we'll do the full
+	 * "post_power_delay" just in case.
+	 */
+	if (ihid_goodix->reset_gpio && regulator_is_enabled(ihid_goodix->vdd))
+		goodix_i2c_hid_deassert_reset(ihid_goodix, true);
+	mutex_unlock(&ihid_goodix->regulator_mutex);
+
 	return i2c_hid_core_probe(client, &ihid_goodix->ops, 0x0001);
 }
 
diff --git a/drivers/hid/usbhid/hid-core.c b/drivers/hid/usbhid/hid-core.c
index 06130dc431a0..2dcaf31eb9cd 100644
--- a/drivers/hid/usbhid/hid-core.c
+++ b/drivers/hid/usbhid/hid-core.c
@@ -377,27 +377,23 @@ static int hid_submit_ctrl(struct hid_device *hid)
 	len = hid_report_len(report);
 	if (dir == USB_DIR_OUT) {
 		usbhid->urbctrl->pipe = usb_sndctrlpipe(hid_to_usb_dev(hid), 0);
-		usbhid->urbctrl->transfer_buffer_length = len;
 		if (raw_report) {
 			memcpy(usbhid->ctrlbuf, raw_report, len);
 			kfree(raw_report);
 			usbhid->ctrl[usbhid->ctrltail].raw_report = NULL;
 		}
 	} else {
-		int maxpacket, padlen;
+		int maxpacket;
 
 		usbhid->urbctrl->pipe = usb_rcvctrlpipe(hid_to_usb_dev(hid), 0);
 		maxpacket = usb_maxpacket(hid_to_usb_dev(hid),
 					  usbhid->urbctrl->pipe, 0);
-		if (maxpacket > 0) {
-			padlen = DIV_ROUND_UP(len, maxpacket);
-			padlen *= maxpacket;
-			if (padlen > usbhid->bufsize)
-				padlen = usbhid->bufsize;
-		} else
-			padlen = 0;
-		usbhid->urbctrl->transfer_buffer_length = padlen;
+		len += (len == 0);	/* Don't allow 0-length reports */
+		len = round_up(len, maxpacket);
+		if (len > usbhid->bufsize)
+			len = usbhid->bufsize;
 	}
+	usbhid->urbctrl->transfer_buffer_length = len;
 	usbhid->urbctrl->dev = hid_to_usb_dev(hid);
 
 	usbhid->cr->bRequestType = USB_TYPE_CLASS | USB_RECIP_INTERFACE | dir;
@@ -505,7 +501,7 @@ static void hid_ctrl(struct urb *urb)
 
 	if (unplug) {
 		usbhid->ctrltail = usbhid->ctrlhead;
-	} else {
+	} else if (usbhid->ctrlhead != usbhid->ctrltail) {
 		usbhid->ctrltail = (usbhid->ctrltail + 1) & (HID_CONTROL_FIFO_SIZE - 1);
 
 		if (usbhid->ctrlhead != usbhid->ctrltail &&
@@ -1223,9 +1219,20 @@ static void usbhid_stop(struct hid_device *hid)
 	mutex_lock(&usbhid->mutex);
 
 	clear_bit(HID_STARTED, &usbhid->iofl);
+
 	spin_lock_irq(&usbhid->lock);	/* Sync with error and led handlers */
 	set_bit(HID_DISCONNECTED, &usbhid->iofl);
+	while (usbhid->ctrltail != usbhid->ctrlhead) {
+		if (usbhid->ctrl[usbhid->ctrltail].dir == USB_DIR_OUT) {
+			kfree(usbhid->ctrl[usbhid->ctrltail].raw_report);
+			usbhid->ctrl[usbhid->ctrltail].raw_report = NULL;
+		}
+
+		usbhid->ctrltail = (usbhid->ctrltail + 1) &
+			(HID_CONTROL_FIFO_SIZE - 1);
+	}
 	spin_unlock_irq(&usbhid->lock);
+
 	usb_kill_urb(usbhid->urbin);
 	usb_kill_urb(usbhid->urbout);
 	usb_kill_urb(usbhid->urbctrl);
diff --git a/drivers/hid/wacom_sys.c b/drivers/hid/wacom_sys.c
index 57bfa0ae9836..93f49b766376 100644
--- a/drivers/hid/wacom_sys.c
+++ b/drivers/hid/wacom_sys.c
@@ -2287,7 +2287,13 @@ static void wacom_set_shared_values(struct wacom_wac *wacom_wac)
 
 	if (wacom_wac->has_mute_touch_switch) {
 		wacom_wac->shared->has_mute_touch_switch = true;
-		wacom_wac->shared->is_touch_on = true;
+		/* Hardware touch switch may be off. Wait until
+		 * we know the switch state to decide is_touch_on.
+		 * Softkey state should be initialized to "on" to
+		 * match historic default.
+		 */
+		if (wacom_wac->is_soft_touch_switch)
+			wacom_wac->shared->is_touch_on = true;
 	}
 
 	if (wacom_wac->shared->has_mute_touch_switch &&
@@ -2791,6 +2797,7 @@ static int wacom_probe(struct hid_device *hdev,
 				 error);
 	}
 
+	wacom_wac->probe_complete = true;
 	return 0;
 }
 
diff --git a/drivers/hid/wacom_wac.c b/drivers/hid/wacom_wac.c
index 81ba642adcb7..fd51769d0994 100644
--- a/drivers/hid/wacom_wac.c
+++ b/drivers/hid/wacom_wac.c
@@ -824,6 +824,13 @@ static int wacom_intuos_inout(struct wacom_wac *wacom)
 	return 0;
 }
 
+static inline bool touch_is_muted(struct wacom_wac *wacom_wac)
+{
+	return wacom_wac->probe_complete &&
+	       wacom_wac->shared->has_mute_touch_switch &&
+	       !wacom_wac->shared->is_touch_on;
+}
+
 static inline bool report_touch_events(struct wacom_wac *wacom)
 {
 	return (touch_arbitration ? !wacom->shared->stylus_in_proximity : 1);
@@ -1525,11 +1532,8 @@ static int wacom_24hdt_irq(struct wacom_wac *wacom)
 	int byte_per_packet = WACOM_BYTES_PER_24HDT_PACKET;
 	int y_offset = 2;
 
-	if (wacom->shared->has_mute_touch_switch &&
-	    !wacom->shared->is_touch_on) {
-		if (!wacom->shared->touch_down)
-			return 0;
-	}
+	if (touch_is_muted(wacom) && !wacom->shared->touch_down)
+		return 0;
 
 	if (wacom->features.type == WACOM_27QHDT) {
 		current_num_contacts = data[63];
@@ -1987,14 +1991,17 @@ static void wacom_wac_pad_usage_mapping(struct hid_device *hdev,
 		features->numbered_buttons++;
 		features->device_type |= WACOM_DEVICETYPE_PAD;
 		break;
-	case WACOM_HID_WD_TOUCHONOFF:
 	case WACOM_HID_WD_MUTE_DEVICE:
+		/* softkey touch switch */
+		wacom_wac->is_soft_touch_switch = true;
+		fallthrough;
+	case WACOM_HID_WD_TOUCHONOFF:
 		/*
-		 * This usage, which is used to mute touch events, comes
-		 * from the pad packet, but is reported on the touch
+		 * These two usages, which are used to mute touch events, come
+		 * from the pad packet, but are reported on the touch
 		 * interface. Because the touch interface may not have
 		 * been created yet, we cannot call wacom_map_usage(). In
-		 * order to process this usage when we receive it, we set
+		 * order to process the usages when we receive them, we set
 		 * the usage type and code directly.
 		 */
 		wacom_wac->has_mute_touch_switch = true;
@@ -2533,8 +2540,7 @@ static void wacom_wac_finger_slot(struct wacom_wac *wacom_wac,
 	bool prox = hid_data->tipswitch &&
 		    report_touch_events(wacom_wac);
 
-	if (wacom_wac->shared->has_mute_touch_switch &&
-	    !wacom_wac->shared->is_touch_on) {
+	if (touch_is_muted(wacom_wac)) {
 		if (!wacom_wac->shared->touch_down)
 			return;
 		prox = false;
@@ -2548,8 +2554,17 @@ static void wacom_wac_finger_slot(struct wacom_wac *wacom_wac,
 		int slot;
 
 		slot = input_mt_get_slot_by_key(input, hid_data->id);
-		if (slot < 0)
+		if (slot < 0) {
 			return;
+		} else {
+			struct input_mt_slot *ps = &input->mt->slots[slot];
+			int mt_id = input_mt_get_value(ps, ABS_MT_TRACKING_ID);
+
+			if (!prox && mt_id < 0) {
+				// No data to send for this slot; short-circuit
+				return;
+			}
+		}
 
 		input_mt_slot(input, slot);
 		input_mt_report_slot_state(input, MT_TOOL_FINGER, prox);
@@ -2581,6 +2596,9 @@ static void wacom_wac_finger_event(struct hid_device *hdev,
 	unsigned equivalent_usage = wacom_equivalent_usage(usage->hid);
 	struct wacom_features *features = &wacom->wacom_wac.features;
 
+	if (touch_is_muted(wacom_wac) && !wacom_wac->shared->touch_down)
+		return;
+
 	if (wacom_wac->is_invalid_bt_frame)
 		return;
 
@@ -2630,6 +2648,9 @@ static void wacom_wac_finger_pre_report(struct hid_device *hdev,
 	struct hid_data* hid_data = &wacom_wac->hid_data;
 	int i;
 
+	if (touch_is_muted(wacom_wac) && !wacom_wac->shared->touch_down)
+		return;
+
 	wacom_wac->is_invalid_bt_frame = false;
 
 	for (i = 0; i < report->maxfield; i++) {
@@ -2681,6 +2702,10 @@ static void wacom_wac_finger_report(struct hid_device *hdev,
 	struct input_dev *input = wacom_wac->touch_input;
 	unsigned touch_max = wacom_wac->features.touch_max;
 
+	/* if there was nothing to process, don't send an empty sync */
+	if (wacom_wac->hid_data.num_expected == 0)
+		return;
+
 	/* If more packets of data are expected, give us a chance to
 	 * process them rather than immediately syncing a partial
 	 * update.
@@ -3835,6 +3860,7 @@ int wacom_setup_touch_input_capabilities(struct input_dev *input_dev,
 			input_dev->evbit[0] |= BIT_MASK(EV_SW);
 			__set_bit(SW_MUTE_DEVICE, input_dev->swbit);
 			wacom_wac->has_mute_touch_switch = true;
+			wacom_wac->is_soft_touch_switch = true;
 		}
 		fallthrough;
 
diff --git a/drivers/hid/wacom_wac.h b/drivers/hid/wacom_wac.h
index 8f16654eca09..8b2d4e5b2303 100644
--- a/drivers/hid/wacom_wac.h
+++ b/drivers/hid/wacom_wac.h
@@ -337,6 +337,7 @@ struct wacom_wac {
 	int tool[2];
 	int id[2];
 	__u64 serial[2];
+	bool probe_complete;
 	bool reporting_data;
 	struct wacom_features features;
 	struct wacom_shared *shared;
@@ -352,6 +353,7 @@ struct wacom_wac {
 	int mode_value;
 	struct hid_data hid_data;
 	bool has_mute_touch_switch;
+	bool is_soft_touch_switch;
 	bool has_mode_change;
 	bool is_direct_mode;
 	bool is_invalid_bt_frame;
diff --git a/drivers/hwmon/mr75203.c b/drivers/hwmon/mr75203.c
index 18da5a25e89a..868243dba1ee 100644
--- a/drivers/hwmon/mr75203.c
+++ b/drivers/hwmon/mr75203.c
@@ -17,6 +17,7 @@
 #include <linux/property.h>
 #include <linux/regmap.h>
 #include <linux/reset.h>
+#include <linux/units.h>
 
 /* PVT Common register */
 #define PVT_IP_CONFIG	0x04
@@ -37,7 +38,6 @@
 #define CLK_SYNTH_EN		BIT(24)
 #define CLK_SYS_CYCLES_MAX	514
 #define CLK_SYS_CYCLES_MIN	2
-#define HZ_PER_MHZ		1000000L
 
 #define SDIF_DISABLE	0x04
 
diff --git a/drivers/i2c/i2c-core-acpi.c b/drivers/i2c/i2c-core-acpi.c
index 6f0aa0ed3241..aaeeacc12121 100644
--- a/drivers/i2c/i2c-core-acpi.c
+++ b/drivers/i2c/i2c-core-acpi.c
@@ -69,6 +69,38 @@ bool i2c_acpi_get_i2c_resource(struct acpi_resource *ares,
 }
 EXPORT_SYMBOL_GPL(i2c_acpi_get_i2c_resource);
 
+static int i2c_acpi_resource_count(struct acpi_resource *ares, void *data)
+{
+	struct acpi_resource_i2c_serialbus *sb;
+	int *count = data;
+
+	if (i2c_acpi_get_i2c_resource(ares, &sb))
+		*count = *count + 1;
+
+	return 1;
+}
+
+/**
+ * i2c_acpi_client_count - Count the number of I2cSerialBus resources
+ * @adev:	ACPI device
+ *
+ * Returns the number of I2cSerialBus resources in the ACPI-device's
+ * resource-list; or a negative error code.
+ */
+int i2c_acpi_client_count(struct acpi_device *adev)
+{
+	int ret, count = 0;
+	LIST_HEAD(r);
+
+	ret = acpi_dev_get_resources(adev, &r, i2c_acpi_resource_count, &count);
+	if (ret < 0)
+		return ret;
+
+	acpi_dev_free_resource_list(&r);
+	return count;
+}
+EXPORT_SYMBOL_GPL(i2c_acpi_client_count);
+
 static int i2c_acpi_fill_info(struct acpi_resource *ares, void *data)
 {
 	struct i2c_acpi_lookup *lookup = data;
diff --git a/drivers/iio/common/hid-sensors/hid-sensor-attributes.c b/drivers/iio/common/hid-sensors/hid-sensor-attributes.c
index 043f199e7bc6..9b279937a24e 100644
--- a/drivers/iio/common/hid-sensors/hid-sensor-attributes.c
+++ b/drivers/iio/common/hid-sensors/hid-sensor-attributes.c
@@ -6,12 +6,11 @@
 #include <linux/module.h>
 #include <linux/kernel.h>
 #include <linux/time.h>
+#include <linux/units.h>
 
 #include <linux/hid-sensor-hub.h>
 #include <linux/iio/iio.h>
 
-#define HZ_PER_MHZ	1000000L
-
 static struct {
 	u32 usage_id;
 	int unit; /* 0 for default others from HID sensor spec */
diff --git a/drivers/iio/light/as73211.c b/drivers/iio/light/as73211.c
index 7b32dfaee9b3..3ba2378df3dd 100644
--- a/drivers/iio/light/as73211.c
+++ b/drivers/iio/light/as73211.c
@@ -24,8 +24,7 @@
 #include <linux/module.h>
 #include <linux/mutex.h>
 #include <linux/pm.h>
-
-#define HZ_PER_KHZ 1000
+#include <linux/units.h>
 
 #define AS73211_DRV_NAME "as73211"
 
diff --git a/drivers/infiniband/core/cache.c b/drivers/infiniband/core/cache.c
index c9e9fc81447e..0c98dd3dee67 100644
--- a/drivers/infiniband/core/cache.c
+++ b/drivers/infiniband/core/cache.c
@@ -1429,7 +1429,7 @@ int rdma_read_gid_l2_fields(const struct ib_gid_attr *attr,
 EXPORT_SYMBOL(rdma_read_gid_l2_fields);
 
 static int config_non_roce_gid_cache(struct ib_device *device,
-				     u32 port, int gid_tbl_len)
+				     u32 port, struct ib_port_attr *tprops)
 {
 	struct ib_gid_attr gid_attr = {};
 	struct ib_gid_table *table;
@@ -1441,7 +1441,7 @@ static int config_non_roce_gid_cache(struct ib_device *device,
 	table = rdma_gid_table(device, port);
 
 	mutex_lock(&table->lock);
-	for (i = 0; i < gid_tbl_len; ++i) {
+	for (i = 0; i < tprops->gid_tbl_len; ++i) {
 		if (!device->ops.query_gid)
 			continue;
 		ret = device->ops.query_gid(device, port, i, &gid_attr.gid);
@@ -1452,6 +1452,8 @@ static int config_non_roce_gid_cache(struct ib_device *device,
 			goto err;
 		}
 		gid_attr.index = i;
+		tprops->subnet_prefix =
+			be64_to_cpu(gid_attr.gid.global.subnet_prefix);
 		add_modify_gid(table, &gid_attr);
 	}
 err:
@@ -1484,7 +1486,7 @@ ib_cache_update(struct ib_device *device, u32 port, bool update_gids,
 
 	if (!rdma_protocol_roce(device, port) && update_gids) {
 		ret = config_non_roce_gid_cache(device, port,
-						tprops->gid_tbl_len);
+						tprops);
 		if (ret)
 			goto err;
 	}
@@ -1619,8 +1621,6 @@ int ib_cache_setup_one(struct ib_device *device)
 	u32 p;
 	int err;
 
-	rwlock_init(&device->cache_lock);
-
 	err = gid_table_setup_one(device);
 	if (err)
 		return err;
diff --git a/drivers/infiniband/core/cma.c b/drivers/infiniband/core/cma.c
index 5d3b8b8d163d..c40791baced5 100644
--- a/drivers/infiniband/core/cma.c
+++ b/drivers/infiniband/core/cma.c
@@ -3132,6 +3132,9 @@ int rdma_resolve_route(struct rdma_cm_id *id, unsigned long timeout_ms)
 	struct rdma_id_private *id_priv;
 	int ret;
 
+	if (!timeout_ms)
+		return -EINVAL;
+
 	id_priv = container_of(id, struct rdma_id_private, id);
 	if (!cma_comp_exch(id_priv, RDMA_CM_ADDR_RESOLVED, RDMA_CM_ROUTE_QUERY))
 		return -EINVAL;
diff --git a/drivers/infiniband/core/core_priv.h b/drivers/infiniband/core/core_priv.h
index 647cca4e0240..f66f48d860ec 100644
--- a/drivers/infiniband/core/core_priv.h
+++ b/drivers/infiniband/core/core_priv.h
@@ -316,45 +316,13 @@ struct ib_device *ib_device_get_by_index(const struct net *net, u32 index);
 void nldev_init(void);
 void nldev_exit(void);
 
-static inline struct ib_qp *
-_ib_create_qp(struct ib_device *dev, struct ib_pd *pd,
-	      struct ib_qp_init_attr *attr, struct ib_udata *udata,
-	      struct ib_uqp_object *uobj, const char *caller)
-{
-	struct ib_qp *qp;
-
-	if (!dev->ops.create_qp)
-		return ERR_PTR(-EOPNOTSUPP);
-
-	qp = dev->ops.create_qp(pd, attr, udata);
-	if (IS_ERR(qp))
-		return qp;
-
-	qp->device = dev;
-	qp->pd = pd;
-	qp->uobject = uobj;
-	qp->real_qp = qp;
-
-	qp->qp_type = attr->qp_type;
-	qp->rwq_ind_tbl = attr->rwq_ind_tbl;
-	qp->send_cq = attr->send_cq;
-	qp->recv_cq = attr->recv_cq;
-	qp->srq = attr->srq;
-	qp->rwq_ind_tbl = attr->rwq_ind_tbl;
-	qp->event_handler = attr->event_handler;
-	qp->port = attr->port_num;
-
-	atomic_set(&qp->usecnt, 0);
-	spin_lock_init(&qp->mr_lock);
-	INIT_LIST_HEAD(&qp->rdma_mrs);
-	INIT_LIST_HEAD(&qp->sig_mrs);
-
-	rdma_restrack_new(&qp->res, RDMA_RESTRACK_QP);
-	WARN_ONCE(!udata && !caller, "Missing kernel QP owner");
-	rdma_restrack_set_name(&qp->res, udata ? NULL : caller);
-	rdma_restrack_add(&qp->res);
-	return qp;
-}
+struct ib_qp *ib_create_qp_user(struct ib_device *dev, struct ib_pd *pd,
+				struct ib_qp_init_attr *attr,
+				struct ib_udata *udata,
+				struct ib_uqp_object *uobj, const char *caller);
+
+void ib_qp_usecnt_inc(struct ib_qp *qp);
+void ib_qp_usecnt_dec(struct ib_qp *qp);
 
 struct rdma_dev_addr;
 int rdma_resolve_ip_route(struct sockaddr *src_addr,
diff --git a/drivers/infiniband/core/device.c b/drivers/infiniband/core/device.c
index fa20b1824fb8..f4814bb7f082 100644
--- a/drivers/infiniband/core/device.c
+++ b/drivers/infiniband/core/device.c
@@ -607,6 +607,8 @@ struct ib_device *_ib_alloc_device(size_t size)
 	for (i = 0; i < ARRAY_SIZE(device->cq_pools); i++)
 		INIT_LIST_HEAD(&device->cq_pools[i]);
 
+	rwlock_init(&device->cache_lock);
+
 	device->uverbs_cmd_mask =
 		BIT_ULL(IB_USER_VERBS_CMD_ALLOC_MW) |
 		BIT_ULL(IB_USER_VERBS_CMD_ALLOC_PD) |
@@ -2050,7 +2052,6 @@ static int __ib_query_port(struct ib_device *device,
 			   u32 port_num,
 			   struct ib_port_attr *port_attr)
 {
-	union ib_gid gid = {};
 	int err;
 
 	memset(port_attr, 0, sizeof(*port_attr));
@@ -2063,11 +2064,8 @@ static int __ib_query_port(struct ib_device *device,
 	    IB_LINK_LAYER_INFINIBAND)
 		return 0;
 
-	err = device->ops.query_gid(device, port_num, 0, &gid);
-	if (err)
-		return err;
-
-	port_attr->subnet_prefix = be64_to_cpu(gid.global.subnet_prefix);
+	ib_get_cached_subnet_prefix(device, port_num,
+				    &port_attr->subnet_prefix);
 	return 0;
 }
 
@@ -2656,6 +2654,7 @@ void ib_set_device_ops(struct ib_device *dev, const struct ib_device_ops *ops)
 	SET_DEVICE_OP(dev_ops, get_hw_stats);
 	SET_DEVICE_OP(dev_ops, get_link_layer);
 	SET_DEVICE_OP(dev_ops, get_netdev);
+	SET_DEVICE_OP(dev_ops, get_numa_node);
 	SET_DEVICE_OP(dev_ops, get_port_immutable);
 	SET_DEVICE_OP(dev_ops, get_vector_affinity);
 	SET_DEVICE_OP(dev_ops, get_vf_config);
@@ -2712,6 +2711,7 @@ void ib_set_device_ops(struct ib_device *dev, const struct ib_device_ops *ops)
 	SET_OBJ_SIZE(dev_ops, ib_cq);
 	SET_OBJ_SIZE(dev_ops, ib_mw);
 	SET_OBJ_SIZE(dev_ops, ib_pd);
+	SET_OBJ_SIZE(dev_ops, ib_qp);
 	SET_OBJ_SIZE(dev_ops, ib_rwq_ind_table);
 	SET_OBJ_SIZE(dev_ops, ib_srq);
 	SET_OBJ_SIZE(dev_ops, ib_ucontext);
diff --git a/drivers/infiniband/core/iwcm.c b/drivers/infiniband/core/iwcm.c
index 42261152b489..2b47073c61a6 100644
--- a/drivers/infiniband/core/iwcm.c
+++ b/drivers/infiniband/core/iwcm.c
@@ -1186,29 +1186,34 @@ static int __init iw_cm_init(void)
 
 	ret = iwpm_init(RDMA_NL_IWCM);
 	if (ret)
-		pr_err("iw_cm: couldn't init iwpm\n");
-	else
-		rdma_nl_register(RDMA_NL_IWCM, iwcm_nl_cb_table);
+		return ret;
+
 	iwcm_wq = alloc_ordered_workqueue("iw_cm_wq", 0);
 	if (!iwcm_wq)
-		return -ENOMEM;
+		goto err_alloc;
 
 	iwcm_ctl_table_hdr = register_net_sysctl(&init_net, "net/iw_cm",
 						 iwcm_ctl_table);
 	if (!iwcm_ctl_table_hdr) {
 		pr_err("iw_cm: couldn't register sysctl paths\n");
-		destroy_workqueue(iwcm_wq);
-		return -ENOMEM;
+		goto err_sysctl;
 	}
 
+	rdma_nl_register(RDMA_NL_IWCM, iwcm_nl_cb_table);
 	return 0;
+
+err_sysctl:
+	destroy_workqueue(iwcm_wq);
+err_alloc:
+	iwpm_exit(RDMA_NL_IWCM);
+	return -ENOMEM;
 }
 
 static void __exit iw_cm_cleanup(void)
 {
+	rdma_nl_unregister(RDMA_NL_IWCM);
 	unregister_net_sysctl_table(iwcm_ctl_table_hdr);
 	destroy_workqueue(iwcm_wq);
-	rdma_nl_unregister(RDMA_NL_IWCM);
 	iwpm_exit(RDMA_NL_IWCM);
 }
 
diff --git a/drivers/infiniband/core/iwpm_msg.c b/drivers/infiniband/core/iwpm_msg.c
index 12a9816fc0e2..3c9a9869212b 100644
--- a/drivers/infiniband/core/iwpm_msg.c
+++ b/drivers/infiniband/core/iwpm_msg.c
@@ -69,10 +69,6 @@ int iwpm_register_pid(struct iwpm_dev_data *pm_msg, u8 nl_client)
 	const char *err_str = "";
 	int ret = -EINVAL;
 
-	if (!iwpm_valid_client(nl_client)) {
-		err_str = "Invalid port mapper client";
-		goto pid_query_error;
-	}
 	if (iwpm_check_registration(nl_client, IWPM_REG_VALID) ||
 			iwpm_user_pid == IWPM_PID_UNAVAILABLE)
 		return 0;
@@ -153,10 +149,6 @@ int iwpm_add_mapping(struct iwpm_sa_data *pm_msg, u8 nl_client)
 	const char *err_str = "";
 	int ret = -EINVAL;
 
-	if (!iwpm_valid_client(nl_client)) {
-		err_str = "Invalid port mapper client";
-		goto add_mapping_error;
-	}
 	if (!iwpm_valid_pid())
 		return 0;
 	if (!iwpm_check_registration(nl_client, IWPM_REG_VALID)) {
@@ -240,10 +232,6 @@ int iwpm_add_and_query_mapping(struct iwpm_sa_data *pm_msg, u8 nl_client)
 	const char *err_str = "";
 	int ret = -EINVAL;
 
-	if (!iwpm_valid_client(nl_client)) {
-		err_str = "Invalid port mapper client";
-		goto query_mapping_error;
-	}
 	if (!iwpm_valid_pid())
 		return 0;
 	if (!iwpm_check_registration(nl_client, IWPM_REG_VALID)) {
@@ -331,10 +319,6 @@ int iwpm_remove_mapping(struct sockaddr_storage *local_addr, u8 nl_client)
 	const char *err_str = "";
 	int ret = -EINVAL;
 
-	if (!iwpm_valid_client(nl_client)) {
-		err_str = "Invalid port mapper client";
-		goto remove_mapping_error;
-	}
 	if (!iwpm_valid_pid())
 		return 0;
 	if (iwpm_check_registration(nl_client, IWPM_REG_UNDEF)) {
@@ -444,8 +428,7 @@ int iwpm_register_pid_cb(struct sk_buff *skb, struct netlink_callback *cb)
 	atomic_set(&echo_nlmsg_seq, cb->nlh->nlmsg_seq);
 	pr_debug("%s: iWarp Port Mapper (pid = %d) is available!\n",
 			__func__, iwpm_user_pid);
-	if (iwpm_valid_client(nl_client))
-		iwpm_set_registration(nl_client, IWPM_REG_VALID);
+	iwpm_set_registration(nl_client, IWPM_REG_VALID);
 register_pid_response_exit:
 	nlmsg_request->request_done = 1;
 	/* always for found nlmsg_request */
@@ -649,11 +632,6 @@ int iwpm_remote_info_cb(struct sk_buff *skb, struct netlink_callback *cb)
 		return ret;
 
 	nl_client = RDMA_NL_GET_CLIENT(cb->nlh->nlmsg_type);
-	if (!iwpm_valid_client(nl_client)) {
-		pr_info("%s: Invalid port mapper client = %u\n",
-				__func__, nl_client);
-		return ret;
-	}
 	atomic_set(&echo_nlmsg_seq, cb->nlh->nlmsg_seq);
 
 	local_sockaddr = (struct sockaddr_storage *)
@@ -736,11 +714,6 @@ int iwpm_mapping_info_cb(struct sk_buff *skb, struct netlink_callback *cb)
 		return ret;
 	}
 	nl_client = RDMA_NL_GET_CLIENT(cb->nlh->nlmsg_type);
-	if (!iwpm_valid_client(nl_client)) {
-		pr_info("%s: Invalid port mapper client = %u\n",
-				__func__, nl_client);
-		return ret;
-	}
 	iwpm_set_registration(nl_client, IWPM_REG_INCOMPL);
 	atomic_set(&echo_nlmsg_seq, cb->nlh->nlmsg_seq);
 	iwpm_user_pid = cb->nlh->nlmsg_pid;
@@ -863,11 +836,6 @@ int iwpm_hello_cb(struct sk_buff *skb, struct netlink_callback *cb)
 	}
 	abi_version = nla_get_u16(nltb[IWPM_NLA_HELLO_ABI_VERSION]);
 	nl_client = RDMA_NL_GET_CLIENT(cb->nlh->nlmsg_type);
-	if (!iwpm_valid_client(nl_client)) {
-		pr_info("%s: Invalid port mapper client = %u\n",
-				__func__, nl_client);
-		return ret;
-	}
 	iwpm_set_registration(nl_client, IWPM_REG_INCOMPL);
 	atomic_set(&echo_nlmsg_seq, cb->nlh->nlmsg_seq);
 	iwpm_ulib_version = min_t(u16, IWPM_UABI_VERSION, abi_version);
diff --git a/drivers/infiniband/core/iwpm_util.c b/drivers/infiniband/core/iwpm_util.c
index 3f8c019c7260..54f4feb604d8 100644
--- a/drivers/infiniband/core/iwpm_util.c
+++ b/drivers/infiniband/core/iwpm_util.c
@@ -48,7 +48,6 @@ static DEFINE_SPINLOCK(iwpm_mapinfo_lock);
 static struct hlist_head *iwpm_reminfo_bucket;
 static DEFINE_SPINLOCK(iwpm_reminfo_lock);
 
-static DEFINE_MUTEX(iwpm_admin_lock);
 static struct iwpm_admin_data iwpm_admin;
 
 /**
@@ -59,39 +58,21 @@ static struct iwpm_admin_data iwpm_admin;
  */
 int iwpm_init(u8 nl_client)
 {
-	int ret = 0;
-	mutex_lock(&iwpm_admin_lock);
-	if (!refcount_read(&iwpm_admin.refcount)) {
-		iwpm_hash_bucket = kcalloc(IWPM_MAPINFO_HASH_SIZE,
-					   sizeof(struct hlist_head),
-					   GFP_KERNEL);
-		if (!iwpm_hash_bucket) {
-			ret = -ENOMEM;
-			goto init_exit;
-		}
-		iwpm_reminfo_bucket = kcalloc(IWPM_REMINFO_HASH_SIZE,
-					      sizeof(struct hlist_head),
-					      GFP_KERNEL);
-		if (!iwpm_reminfo_bucket) {
-			kfree(iwpm_hash_bucket);
-			ret = -ENOMEM;
-			goto init_exit;
-		}
+	iwpm_hash_bucket = kcalloc(IWPM_MAPINFO_HASH_SIZE,
+				   sizeof(struct hlist_head), GFP_KERNEL);
+	if (!iwpm_hash_bucket)
+		return -ENOMEM;
 
-		refcount_set(&iwpm_admin.refcount, 1);
-	} else {
-		refcount_inc(&iwpm_admin.refcount);
+	iwpm_reminfo_bucket = kcalloc(IWPM_REMINFO_HASH_SIZE,
+				      sizeof(struct hlist_head), GFP_KERNEL);
+	if (!iwpm_reminfo_bucket) {
+		kfree(iwpm_hash_bucket);
+		return -ENOMEM;
 	}
 
-init_exit:
-	mutex_unlock(&iwpm_admin_lock);
-	if (!ret) {
-		iwpm_set_valid(nl_client, 1);
-		iwpm_set_registration(nl_client, IWPM_REG_UNDEF);
-		pr_debug("%s: Mapinfo and reminfo tables are created\n",
-				__func__);
-	}
-	return ret;
+	iwpm_set_registration(nl_client, IWPM_REG_UNDEF);
+	pr_debug("%s: Mapinfo and reminfo tables are created\n", __func__);
+	return 0;
 }
 
 static void free_hash_bucket(void);
@@ -105,22 +86,9 @@ static void free_reminfo_bucket(void);
  */
 int iwpm_exit(u8 nl_client)
 {
-
-	if (!iwpm_valid_client(nl_client))
-		return -EINVAL;
-	mutex_lock(&iwpm_admin_lock);
-	if (!refcount_read(&iwpm_admin.refcount)) {
-		mutex_unlock(&iwpm_admin_lock);
-		pr_err("%s Incorrect usage - negative refcount\n", __func__);
-		return -EINVAL;
-	}
-	if (refcount_dec_and_test(&iwpm_admin.refcount)) {
-		free_hash_bucket();
-		free_reminfo_bucket();
-		pr_debug("%s: Resources are destroyed\n", __func__);
-	}
-	mutex_unlock(&iwpm_admin_lock);
-	iwpm_set_valid(nl_client, 0);
+	free_hash_bucket();
+	free_reminfo_bucket();
+	pr_debug("%s: Resources are destroyed\n", __func__);
 	iwpm_set_registration(nl_client, IWPM_REG_UNDEF);
 	return 0;
 }
@@ -145,8 +113,6 @@ int iwpm_create_mapinfo(struct sockaddr_storage *local_sockaddr,
 	unsigned long flags;
 	int ret = -EINVAL;
 
-	if (!iwpm_valid_client(nl_client))
-		return ret;
 	map_info = kzalloc(sizeof(struct iwpm_mapping_info), GFP_KERNEL);
 	if (!map_info)
 		return -ENOMEM;
@@ -306,10 +272,6 @@ int iwpm_get_remote_info(struct sockaddr_storage *mapped_loc_addr,
 	unsigned long flags;
 	int ret = -EINVAL;
 
-	if (!iwpm_valid_client(nl_client)) {
-		pr_info("%s: Invalid client = %u\n", __func__, nl_client);
-		return ret;
-	}
 	spin_lock_irqsave(&iwpm_reminfo_lock, flags);
 	if (iwpm_reminfo_bucket) {
 		hash_bucket_head = get_reminfo_hash_bucket(
@@ -424,16 +386,6 @@ int iwpm_get_nlmsg_seq(void)
 	return atomic_inc_return(&iwpm_admin.nlmsg_seq);
 }
 
-int iwpm_valid_client(u8 nl_client)
-{
-	return iwpm_admin.client_list[nl_client];
-}
-
-void iwpm_set_valid(u8 nl_client, int valid)
-{
-	iwpm_admin.client_list[nl_client] = valid;
-}
-
 /* valid client */
 u32 iwpm_get_registration(u8 nl_client)
 {
diff --git a/drivers/infiniband/core/iwpm_util.h b/drivers/infiniband/core/iwpm_util.h
index e201835de733..3a42ad43056e 100644
--- a/drivers/infiniband/core/iwpm_util.h
+++ b/drivers/infiniband/core/iwpm_util.h
@@ -90,9 +90,7 @@ struct iwpm_remote_info {
 };
 
 struct iwpm_admin_data {
-	refcount_t refcount;
 	atomic_t nlmsg_seq;
-	int      client_list[RDMA_NL_NUM_CLIENTS];
 	u32      reg_list[RDMA_NL_NUM_CLIENTS];
 };
 
@@ -148,22 +146,6 @@ int iwpm_get_nlmsg_seq(void);
 void iwpm_add_remote_info(struct iwpm_remote_info *reminfo);
 
 /**
- * iwpm_valid_client - Check if the port mapper client is valid
- * @nl_client: The index of the netlink client
- *
- * Valid clients need to call iwpm_init() before using
- * the port mapper
- */
-int iwpm_valid_client(u8 nl_client);
-
-/**
- * iwpm_set_valid - Set the port mapper client to valid or not
- * @nl_client: The index of the netlink client
- * @valid: 1 if valid or 0 if invalid
- */
-void iwpm_set_valid(u8 nl_client, int valid);
-
-/**
  * iwpm_check_registration - Check if the client registration
  *			      matches the given one
  * @nl_client: The index of the netlink client
diff --git a/drivers/infiniband/core/restrack.c b/drivers/infiniband/core/restrack.c
index 033207882c82..1f935d9f6178 100644
--- a/drivers/infiniband/core/restrack.c
+++ b/drivers/infiniband/core/restrack.c
@@ -343,7 +343,7 @@ void rdma_restrack_del(struct rdma_restrack_entry *res)
 	rt = &dev->res[res->type];
 
 	old = xa_erase(&rt->xa, res->id);
-	if (res->type == RDMA_RESTRACK_MR || res->type == RDMA_RESTRACK_QP)
+	if (res->type == RDMA_RESTRACK_MR)
 		return;
 	WARN_ON(old != res);
 
diff --git a/drivers/infiniband/core/sa_query.c b/drivers/infiniband/core/sa_query.c
index b61576f702b8..a20b8108e160 100644
--- a/drivers/infiniband/core/sa_query.c
+++ b/drivers/infiniband/core/sa_query.c
@@ -123,12 +123,6 @@ struct ib_sa_query {
 #define IB_SA_CANCEL			0x00000002
 #define IB_SA_QUERY_OPA			0x00000004
 
-struct ib_sa_service_query {
-	void (*callback)(int, struct ib_sa_service_rec *, void *);
-	void *context;
-	struct ib_sa_query sa_query;
-};
-
 struct ib_sa_path_query {
 	void (*callback)(int, struct sa_path_rec *, void *);
 	void *context;
@@ -502,54 +496,6 @@ static const struct ib_field mcmember_rec_table[] = {
 	  .size_bits    = 23 },
 };
 
-#define SERVICE_REC_FIELD(field) \
-	.struct_offset_bytes = offsetof(struct ib_sa_service_rec, field),	\
-	.struct_size_bytes   = sizeof_field(struct ib_sa_service_rec, field),	\
-	.field_name          = "sa_service_rec:" #field
-
-static const struct ib_field service_rec_table[] = {
-	{ SERVICE_REC_FIELD(id),
-	  .offset_words = 0,
-	  .offset_bits  = 0,
-	  .size_bits    = 64 },
-	{ SERVICE_REC_FIELD(gid),
-	  .offset_words = 2,
-	  .offset_bits  = 0,
-	  .size_bits    = 128 },
-	{ SERVICE_REC_FIELD(pkey),
-	  .offset_words = 6,
-	  .offset_bits  = 0,
-	  .size_bits    = 16 },
-	{ SERVICE_REC_FIELD(lease),
-	  .offset_words = 7,
-	  .offset_bits  = 0,
-	  .size_bits    = 32 },
-	{ SERVICE_REC_FIELD(key),
-	  .offset_words = 8,
-	  .offset_bits  = 0,
-	  .size_bits    = 128 },
-	{ SERVICE_REC_FIELD(name),
-	  .offset_words = 12,
-	  .offset_bits  = 0,
-	  .size_bits    = 64*8 },
-	{ SERVICE_REC_FIELD(data8),
-	  .offset_words = 28,
-	  .offset_bits  = 0,
-	  .size_bits    = 16*8 },
-	{ SERVICE_REC_FIELD(data16),
-	  .offset_words = 32,
-	  .offset_bits  = 0,
-	  .size_bits    = 8*16 },
-	{ SERVICE_REC_FIELD(data32),
-	  .offset_words = 36,
-	  .offset_bits  = 0,
-	  .size_bits    = 4*32 },
-	{ SERVICE_REC_FIELD(data64),
-	  .offset_words = 40,
-	  .offset_bits  = 0,
-	  .size_bits    = 2*64 },
-};
-
 #define CLASSPORTINFO_REC_FIELD(field) \
 	.struct_offset_bytes = offsetof(struct ib_class_port_info, field),	\
 	.struct_size_bytes   = sizeof_field(struct ib_class_port_info, field),	\
@@ -1358,6 +1304,7 @@ static int send_mad(struct ib_sa_query *query, unsigned long timeout_ms,
 {
 	unsigned long flags;
 	int ret, id;
+	const int nmbr_sa_query_retries = 10;
 
 	xa_lock_irqsave(&queries, flags);
 	ret = __xa_alloc(&queries, &id, query, xa_limit_32b, gfp_mask);
@@ -1365,7 +1312,13 @@ static int send_mad(struct ib_sa_query *query, unsigned long timeout_ms,
 	if (ret < 0)
 		return ret;
 
-	query->mad_buf->timeout_ms  = timeout_ms;
+	query->mad_buf->timeout_ms  = timeout_ms / nmbr_sa_query_retries;
+	query->mad_buf->retries = nmbr_sa_query_retries;
+	if (!query->mad_buf->timeout_ms) {
+		/* Special case, very small timeout_ms */
+		query->mad_buf->timeout_ms = 1;
+		query->mad_buf->retries = timeout_ms;
+	}
 	query->mad_buf->context[0] = query;
 	query->id = id;
 
@@ -1634,129 +1587,6 @@ err1:
 }
 EXPORT_SYMBOL(ib_sa_path_rec_get);
 
-static void ib_sa_service_rec_callback(struct ib_sa_query *sa_query,
-				    int status,
-				    struct ib_sa_mad *mad)
-{
-	struct ib_sa_service_query *query =
-		container_of(sa_query, struct ib_sa_service_query, sa_query);
-
-	if (mad) {
-		struct ib_sa_service_rec rec;
-
-		ib_unpack(service_rec_table, ARRAY_SIZE(service_rec_table),
-			  mad->data, &rec);
-		query->callback(status, &rec, query->context);
-	} else
-		query->callback(status, NULL, query->context);
-}
-
-static void ib_sa_service_rec_release(struct ib_sa_query *sa_query)
-{
-	kfree(container_of(sa_query, struct ib_sa_service_query, sa_query));
-}
-
-/**
- * ib_sa_service_rec_query - Start Service Record operation
- * @client:SA client
- * @device:device to send request on
- * @port_num: port number to send request on
- * @method:SA method - should be get, set, or delete
- * @rec:Service Record to send in request
- * @comp_mask:component mask to send in request
- * @timeout_ms:time to wait for response
- * @gfp_mask:GFP mask to use for internal allocations
- * @callback:function called when request completes, times out or is
- * canceled
- * @context:opaque user context passed to callback
- * @sa_query:request context, used to cancel request
- *
- * Send a Service Record set/get/delete to the SA to register,
- * unregister or query a service record.
- * The callback function will be called when the request completes (or
- * fails); status is 0 for a successful response, -EINTR if the query
- * is canceled, -ETIMEDOUT is the query timed out, or -EIO if an error
- * occurred sending the query.  The resp parameter of the callback is
- * only valid if status is 0.
- *
- * If the return value of ib_sa_service_rec_query() is negative, it is an
- * error code.  Otherwise it is a request ID that can be used to cancel
- * the query.
- */
-int ib_sa_service_rec_query(struct ib_sa_client *client,
-			    struct ib_device *device, u32 port_num, u8 method,
-			    struct ib_sa_service_rec *rec,
-			    ib_sa_comp_mask comp_mask,
-			    unsigned long timeout_ms, gfp_t gfp_mask,
-			    void (*callback)(int status,
-					     struct ib_sa_service_rec *resp,
-					     void *context),
-			    void *context,
-			    struct ib_sa_query **sa_query)
-{
-	struct ib_sa_service_query *query;
-	struct ib_sa_device *sa_dev = ib_get_client_data(device, &sa_client);
-	struct ib_sa_port   *port;
-	struct ib_mad_agent *agent;
-	struct ib_sa_mad *mad;
-	int ret;
-
-	if (!sa_dev)
-		return -ENODEV;
-
-	port  = &sa_dev->port[port_num - sa_dev->start_port];
-	agent = port->agent;
-
-	if (method != IB_MGMT_METHOD_GET &&
-	    method != IB_MGMT_METHOD_SET &&
-	    method != IB_SA_METHOD_DELETE)
-		return -EINVAL;
-
-	query = kzalloc(sizeof(*query), gfp_mask);
-	if (!query)
-		return -ENOMEM;
-
-	query->sa_query.port     = port;
-	ret = alloc_mad(&query->sa_query, gfp_mask);
-	if (ret)
-		goto err1;
-
-	ib_sa_client_get(client);
-	query->sa_query.client = client;
-	query->callback        = callback;
-	query->context         = context;
-
-	mad = query->sa_query.mad_buf->mad;
-	init_mad(&query->sa_query, agent);
-
-	query->sa_query.callback = callback ? ib_sa_service_rec_callback : NULL;
-	query->sa_query.release  = ib_sa_service_rec_release;
-	mad->mad_hdr.method	 = method;
-	mad->mad_hdr.attr_id	 = cpu_to_be16(IB_SA_ATTR_SERVICE_REC);
-	mad->sa_hdr.comp_mask	 = comp_mask;
-
-	ib_pack(service_rec_table, ARRAY_SIZE(service_rec_table),
-		rec, mad->data);
-
-	*sa_query = &query->sa_query;
-
-	ret = send_mad(&query->sa_query, timeout_ms, gfp_mask);
-	if (ret < 0)
-		goto err2;
-
-	return ret;
-
-err2:
-	*sa_query = NULL;
-	ib_sa_client_put(query->sa_query.client);
-	free_mad(&query->sa_query);
-
-err1:
-	kfree(query);
-	return ret;
-}
-EXPORT_SYMBOL(ib_sa_service_rec_query);
-
 static void ib_sa_mcmember_rec_callback(struct ib_sa_query *sa_query,
 					int status,
 					struct ib_sa_mad *mad)
diff --git a/drivers/infiniband/core/umem.c b/drivers/infiniband/core/umem.c
index 0eb40025075f..86d479772fbc 100644
--- a/drivers/infiniband/core/umem.c
+++ b/drivers/infiniband/core/umem.c
@@ -51,15 +51,15 @@ static void __ib_umem_release(struct ib_device *dev, struct ib_umem *umem, int d
 	struct scatterlist *sg;
 	unsigned int i;
 
-	if (umem->nmap > 0)
-		ib_dma_unmap_sg(dev, umem->sg_head.sgl, umem->sg_nents,
-				DMA_BIDIRECTIONAL);
+	if (dirty)
+		ib_dma_unmap_sgtable_attrs(dev, &umem->sgt_append.sgt,
+					   DMA_BIDIRECTIONAL, 0);
 
-	for_each_sg(umem->sg_head.sgl, sg, umem->sg_nents, i)
+	for_each_sgtable_sg(&umem->sgt_append.sgt, sg, i)
 		unpin_user_page_range_dirty_lock(sg_page(sg),
 			DIV_ROUND_UP(sg->length, PAGE_SIZE), make_dirty);
 
-	sg_free_table(&umem->sg_head);
+	sg_free_append_table(&umem->sgt_append);
 }
 
 /**
@@ -111,7 +111,7 @@ unsigned long ib_umem_find_best_pgsz(struct ib_umem *umem,
 	/* offset into first SGL */
 	pgoff = umem->address & ~PAGE_MASK;
 
-	for_each_sg(umem->sg_head.sgl, sg, umem->nmap, i) {
+	for_each_sgtable_dma_sg(&umem->sgt_append.sgt, sg, i) {
 		/* Walk SGL and reduce max page size if VA/PA bits differ
 		 * for any address.
 		 */
@@ -121,7 +121,7 @@ unsigned long ib_umem_find_best_pgsz(struct ib_umem *umem,
 		 * the maximum possible page size as the low bits of the iova
 		 * must be zero when starting the next chunk.
 		 */
-		if (i != (umem->nmap - 1))
+		if (i != (umem->sgt_append.sgt.nents - 1))
 			mask |= va;
 		pgoff = 0;
 	}
@@ -155,8 +155,7 @@ struct ib_umem *ib_umem_get(struct ib_device *device, unsigned long addr,
 	unsigned long dma_attr = 0;
 	struct mm_struct *mm;
 	unsigned long npages;
-	int ret;
-	struct scatterlist *sg = NULL;
+	int pinned, ret;
 	unsigned int gup_flags = FOLL_WRITE;
 
 	/*
@@ -216,24 +215,24 @@ struct ib_umem *ib_umem_get(struct ib_device *device, unsigned long addr,
 
 	while (npages) {
 		cond_resched();
-		ret = pin_user_pages_fast(cur_base,
+		pinned = pin_user_pages_fast(cur_base,
 					  min_t(unsigned long, npages,
 						PAGE_SIZE /
 						sizeof(struct page *)),
 					  gup_flags | FOLL_LONGTERM, page_list);
-		if (ret < 0)
+		if (pinned < 0) {
+			ret = pinned;
 			goto umem_release;
+		}
 
-		cur_base += ret * PAGE_SIZE;
-		npages -= ret;
-		sg = __sg_alloc_table_from_pages(&umem->sg_head, page_list, ret,
-				0, ret << PAGE_SHIFT,
-				ib_dma_max_seg_size(device), sg, npages,
-				GFP_KERNEL);
-		umem->sg_nents = umem->sg_head.nents;
-		if (IS_ERR(sg)) {
-			unpin_user_pages_dirty_lock(page_list, ret, 0);
-			ret = PTR_ERR(sg);
+		cur_base += pinned * PAGE_SIZE;
+		npages -= pinned;
+		ret = sg_alloc_append_table_from_pages(
+			&umem->sgt_append, page_list, pinned, 0,
+			pinned << PAGE_SHIFT, ib_dma_max_seg_size(device),
+			npages, GFP_KERNEL);
+		if (ret) {
+			unpin_user_pages_dirty_lock(page_list, pinned, 0);
 			goto umem_release;
 		}
 	}
@@ -241,16 +240,10 @@ struct ib_umem *ib_umem_get(struct ib_device *device, unsigned long addr,
 	if (access & IB_ACCESS_RELAXED_ORDERING)
 		dma_attr |= DMA_ATTR_WEAK_ORDERING;
 
-	umem->nmap =
-		ib_dma_map_sg_attrs(device, umem->sg_head.sgl, umem->sg_nents,
-				    DMA_BIDIRECTIONAL, dma_attr);
-
-	if (!umem->nmap) {
-		ret = -ENOMEM;
+	ret = ib_dma_map_sgtable_attrs(device, &umem->sgt_append.sgt,
+				       DMA_BIDIRECTIONAL, dma_attr);
+	if (ret)
 		goto umem_release;
-	}
-
-	ret = 0;
 	goto out;
 
 umem_release:
@@ -310,7 +303,8 @@ int ib_umem_copy_from(void *dst, struct ib_umem *umem, size_t offset,
 		return -EINVAL;
 	}
 
-	ret = sg_pcopy_to_buffer(umem->sg_head.sgl, umem->sg_nents, dst, length,
+	ret = sg_pcopy_to_buffer(umem->sgt_append.sgt.sgl,
+				 umem->sgt_append.sgt.orig_nents, dst, length,
 				 offset + ib_umem_offset(umem));
 
 	if (ret < 0)
diff --git a/drivers/infiniband/core/umem_dmabuf.c b/drivers/infiniband/core/umem_dmabuf.c
index c6e875619fac..e824baf4640d 100644
--- a/drivers/infiniband/core/umem_dmabuf.c
+++ b/drivers/infiniband/core/umem_dmabuf.c
@@ -55,9 +55,8 @@ int ib_umem_dmabuf_map_pages(struct ib_umem_dmabuf *umem_dmabuf)
 		cur += sg_dma_len(sg);
 	}
 
-	umem_dmabuf->umem.sg_head.sgl = umem_dmabuf->first_sg;
-	umem_dmabuf->umem.sg_head.nents = nmap;
-	umem_dmabuf->umem.nmap = nmap;
+	umem_dmabuf->umem.sgt_append.sgt.sgl = umem_dmabuf->first_sg;
+	umem_dmabuf->umem.sgt_append.sgt.nents = nmap;
 	umem_dmabuf->sgt = sgt;
 
 wait_fence:
diff --git a/drivers/infiniband/core/umem_odp.c b/drivers/infiniband/core/umem_odp.c
index 9462dbe66014..7a47343d11f9 100644
--- a/drivers/infiniband/core/umem_odp.c
+++ b/drivers/infiniband/core/umem_odp.c
@@ -292,9 +292,6 @@ EXPORT_SYMBOL(ib_umem_odp_release);
  * @dma_index: index in the umem to add the dma to.
  * @page: the page struct to map and add.
  * @access_mask: access permissions needed for this page.
- * @current_seq: sequence number for synchronization with invalidations.
- *               the sequence number is taken from
- *               umem_odp->notifiers_seq.
  *
  * The function returns -EFAULT if the DMA mapping operation fails.
  *
diff --git a/drivers/infiniband/core/uverbs_cmd.c b/drivers/infiniband/core/uverbs_cmd.c
index 8c8ca7bce3ca..740e6b2efe0e 100644
--- a/drivers/infiniband/core/uverbs_cmd.c
+++ b/drivers/infiniband/core/uverbs_cmd.c
@@ -1435,35 +1435,13 @@ static int create_qp(struct uverbs_attr_bundle *attrs,
 		attr.source_qpn = cmd->source_qpn;
 	}
 
-	if (cmd->qp_type == IB_QPT_XRC_TGT)
-		qp = ib_create_qp(pd, &attr);
-	else
-		qp = _ib_create_qp(device, pd, &attr, &attrs->driver_udata, obj,
-				   NULL);
-
+	qp = ib_create_qp_user(device, pd, &attr, &attrs->driver_udata, obj,
+			       KBUILD_MODNAME);
 	if (IS_ERR(qp)) {
 		ret = PTR_ERR(qp);
 		goto err_put;
 	}
-
-	if (cmd->qp_type != IB_QPT_XRC_TGT) {
-		ret = ib_create_qp_security(qp, device);
-		if (ret)
-			goto err_cb;
-
-		atomic_inc(&pd->usecnt);
-		if (attr.send_cq)
-			atomic_inc(&attr.send_cq->usecnt);
-		if (attr.recv_cq)
-			atomic_inc(&attr.recv_cq->usecnt);
-		if (attr.srq)
-			atomic_inc(&attr.srq->usecnt);
-		if (ind_tbl)
-			atomic_inc(&ind_tbl->usecnt);
-	} else {
-		/* It is done in _ib_create_qp for other QP types */
-		qp->uobject = obj;
-	}
+	ib_qp_usecnt_inc(qp);
 
 	obj->uevent.uobject.object = qp;
 	obj->uevent.event_file = READ_ONCE(attrs->ufile->default_async_file);
@@ -1502,9 +1480,6 @@ static int create_qp(struct uverbs_attr_bundle *attrs,
 	resp.response_length = uverbs_response_length(attrs, sizeof(resp));
 	return uverbs_response(attrs, &resp, sizeof(resp));
 
-err_cb:
-	ib_destroy_qp_user(qp, uverbs_get_cleared_udata(attrs));
-
 err_put:
 	if (!IS_ERR(xrcd_uobj))
 		uobj_put_read(xrcd_uobj);
diff --git a/drivers/infiniband/core/uverbs_std_types_qp.c b/drivers/infiniband/core/uverbs_std_types_qp.c
index c00cfb5ed387..dd1075466f61 100644
--- a/drivers/infiniband/core/uverbs_std_types_qp.c
+++ b/drivers/infiniband/core/uverbs_std_types_qp.c
@@ -248,44 +248,23 @@ static int UVERBS_HANDLER(UVERBS_METHOD_QP_CREATE)(
 	set_caps(&attr, &cap, true);
 	mutex_init(&obj->mcast_lock);
 
-	if (attr.qp_type == IB_QPT_XRC_TGT)
-		qp = ib_create_qp(pd, &attr);
-	else
-		qp = _ib_create_qp(device, pd, &attr, &attrs->driver_udata, obj,
-				   NULL);
-
+	qp = ib_create_qp_user(device, pd, &attr, &attrs->driver_udata, obj,
+			       KBUILD_MODNAME);
 	if (IS_ERR(qp)) {
 		ret = PTR_ERR(qp);
 		goto err_put;
 	}
+	ib_qp_usecnt_inc(qp);
 
-	if (attr.qp_type != IB_QPT_XRC_TGT) {
-		atomic_inc(&pd->usecnt);
-		if (attr.send_cq)
-			atomic_inc(&attr.send_cq->usecnt);
-		if (attr.recv_cq)
-			atomic_inc(&attr.recv_cq->usecnt);
-		if (attr.srq)
-			atomic_inc(&attr.srq->usecnt);
-		if (attr.rwq_ind_tbl)
-			atomic_inc(&attr.rwq_ind_tbl->usecnt);
-	} else {
+	if (attr.qp_type == IB_QPT_XRC_TGT) {
 		obj->uxrcd = container_of(xrcd_uobj, struct ib_uxrcd_object,
 					  uobject);
 		atomic_inc(&obj->uxrcd->refcnt);
-		/* It is done in _ib_create_qp for other QP types */
-		qp->uobject = obj;
 	}
 
 	obj->uevent.uobject.object = qp;
 	uverbs_finalize_uobj_create(attrs, UVERBS_ATTR_CREATE_QP_HANDLE);
 
-	if (attr.qp_type != IB_QPT_XRC_TGT) {
-		ret = ib_create_qp_security(qp, device);
-		if (ret)
-			return ret;
-	}
-
 	set_caps(&attr, &cap, false);
 	ret = uverbs_copy_to_struct_or_zero(attrs,
 					UVERBS_ATTR_CREATE_QP_RESP_CAP, &cap,
diff --git a/drivers/infiniband/core/verbs.c b/drivers/infiniband/core/verbs.c
index 7036967e4c0b..89a2b21976d6 100644
--- a/drivers/infiniband/core/verbs.c
+++ b/drivers/infiniband/core/verbs.c
@@ -1035,7 +1035,8 @@ struct ib_srq *ib_create_srq_user(struct ib_pd *pd,
 	}
 	if (srq->srq_type == IB_SRQT_XRC) {
 		srq->ext.xrc.xrcd = srq_init_attr->ext.xrc.xrcd;
-		atomic_inc(&srq->ext.xrc.xrcd->usecnt);
+		if (srq->ext.xrc.xrcd)
+			atomic_inc(&srq->ext.xrc.xrcd->usecnt);
 	}
 	atomic_inc(&pd->usecnt);
 
@@ -1046,7 +1047,7 @@ struct ib_srq *ib_create_srq_user(struct ib_pd *pd,
 	if (ret) {
 		rdma_restrack_put(&srq->res);
 		atomic_dec(&srq->pd->usecnt);
-		if (srq->srq_type == IB_SRQT_XRC)
+		if (srq->srq_type == IB_SRQT_XRC && srq->ext.xrc.xrcd)
 			atomic_dec(&srq->ext.xrc.xrcd->usecnt);
 		if (ib_srq_has_cq(srq->srq_type))
 			atomic_dec(&srq->ext.cq->usecnt);
@@ -1090,7 +1091,7 @@ int ib_destroy_srq_user(struct ib_srq *srq, struct ib_udata *udata)
 		return ret;
 
 	atomic_dec(&srq->pd->usecnt);
-	if (srq->srq_type == IB_SRQT_XRC)
+	if (srq->srq_type == IB_SRQT_XRC && srq->ext.xrc.xrcd)
 		atomic_dec(&srq->ext.xrc.xrcd->usecnt);
 	if (ib_srq_has_cq(srq->srq_type))
 		atomic_dec(&srq->ext.cq->usecnt);
@@ -1199,34 +1200,142 @@ static struct ib_qp *create_xrc_qp_user(struct ib_qp *qp,
 	return qp;
 }
 
+static struct ib_qp *create_qp(struct ib_device *dev, struct ib_pd *pd,
+			       struct ib_qp_init_attr *attr,
+			       struct ib_udata *udata,
+			       struct ib_uqp_object *uobj, const char *caller)
+{
+	struct ib_udata dummy = {};
+	struct ib_qp *qp;
+	int ret;
+
+	if (!dev->ops.create_qp)
+		return ERR_PTR(-EOPNOTSUPP);
+
+	qp = rdma_zalloc_drv_obj_numa(dev, ib_qp);
+	if (!qp)
+		return ERR_PTR(-ENOMEM);
+
+	qp->device = dev;
+	qp->pd = pd;
+	qp->uobject = uobj;
+	qp->real_qp = qp;
+
+	qp->qp_type = attr->qp_type;
+	qp->rwq_ind_tbl = attr->rwq_ind_tbl;
+	qp->srq = attr->srq;
+	qp->event_handler = attr->event_handler;
+	qp->port = attr->port_num;
+	qp->qp_context = attr->qp_context;
+
+	spin_lock_init(&qp->mr_lock);
+	INIT_LIST_HEAD(&qp->rdma_mrs);
+	INIT_LIST_HEAD(&qp->sig_mrs);
+
+	rdma_restrack_new(&qp->res, RDMA_RESTRACK_QP);
+	WARN_ONCE(!udata && !caller, "Missing kernel QP owner");
+	rdma_restrack_set_name(&qp->res, udata ? NULL : caller);
+	ret = dev->ops.create_qp(qp, attr, udata);
+	if (ret)
+		goto err_create;
+
+	/*
+	 * TODO: The mlx4 internally overwrites send_cq and recv_cq.
+	 * Unfortunately, it is not an easy task to fix that driver.
+	 */
+	qp->send_cq = attr->send_cq;
+	qp->recv_cq = attr->recv_cq;
+
+	ret = ib_create_qp_security(qp, dev);
+	if (ret)
+		goto err_security;
+
+	rdma_restrack_add(&qp->res);
+	return qp;
+
+err_security:
+	qp->device->ops.destroy_qp(qp, udata ? &dummy : NULL);
+err_create:
+	rdma_restrack_put(&qp->res);
+	kfree(qp);
+	return ERR_PTR(ret);
+
+}
+
 /**
- * ib_create_named_qp - Creates a kernel QP associated with the specified protection
+ * ib_create_qp_user - Creates a QP associated with the specified protection
  *   domain.
+ * @dev: IB device
  * @pd: The protection domain associated with the QP.
- * @qp_init_attr: A list of initial attributes required to create the
+ * @attr: A list of initial attributes required to create the
  *   QP.  If QP creation succeeds, then the attributes are updated to
  *   the actual capabilities of the created QP.
+ * @udata: User data
+ * @uobj: uverbs obect
  * @caller: caller's build-time module name
- *
- * NOTE: for user qp use ib_create_qp_user with valid udata!
  */
-struct ib_qp *ib_create_named_qp(struct ib_pd *pd,
-				 struct ib_qp_init_attr *qp_init_attr,
-				 const char *caller)
+struct ib_qp *ib_create_qp_user(struct ib_device *dev, struct ib_pd *pd,
+				struct ib_qp_init_attr *attr,
+				struct ib_udata *udata,
+				struct ib_uqp_object *uobj, const char *caller)
 {
-	struct ib_device *device = pd ? pd->device : qp_init_attr->xrcd->device;
-	struct ib_qp *qp;
-	int ret;
+	struct ib_qp *qp, *xrc_qp;
 
-	if (qp_init_attr->rwq_ind_tbl &&
-	    (qp_init_attr->recv_cq ||
-	    qp_init_attr->srq || qp_init_attr->cap.max_recv_wr ||
-	    qp_init_attr->cap.max_recv_sge))
-		return ERR_PTR(-EINVAL);
+	if (attr->qp_type == IB_QPT_XRC_TGT)
+		qp = create_qp(dev, pd, attr, NULL, NULL, caller);
+	else
+		qp = create_qp(dev, pd, attr, udata, uobj, NULL);
+	if (attr->qp_type != IB_QPT_XRC_TGT || IS_ERR(qp))
+		return qp;
 
-	if ((qp_init_attr->create_flags & IB_QP_CREATE_INTEGRITY_EN) &&
-	    !(device->attrs.device_cap_flags & IB_DEVICE_INTEGRITY_HANDOVER))
-		return ERR_PTR(-EINVAL);
+	xrc_qp = create_xrc_qp_user(qp, attr);
+	if (IS_ERR(xrc_qp)) {
+		ib_destroy_qp(qp);
+		return xrc_qp;
+	}
+
+	xrc_qp->uobject = uobj;
+	return xrc_qp;
+}
+EXPORT_SYMBOL(ib_create_qp_user);
+
+void ib_qp_usecnt_inc(struct ib_qp *qp)
+{
+	if (qp->pd)
+		atomic_inc(&qp->pd->usecnt);
+	if (qp->send_cq)
+		atomic_inc(&qp->send_cq->usecnt);
+	if (qp->recv_cq)
+		atomic_inc(&qp->recv_cq->usecnt);
+	if (qp->srq)
+		atomic_inc(&qp->srq->usecnt);
+	if (qp->rwq_ind_tbl)
+		atomic_inc(&qp->rwq_ind_tbl->usecnt);
+}
+EXPORT_SYMBOL(ib_qp_usecnt_inc);
+
+void ib_qp_usecnt_dec(struct ib_qp *qp)
+{
+	if (qp->rwq_ind_tbl)
+		atomic_dec(&qp->rwq_ind_tbl->usecnt);
+	if (qp->srq)
+		atomic_dec(&qp->srq->usecnt);
+	if (qp->recv_cq)
+		atomic_dec(&qp->recv_cq->usecnt);
+	if (qp->send_cq)
+		atomic_dec(&qp->send_cq->usecnt);
+	if (qp->pd)
+		atomic_dec(&qp->pd->usecnt);
+}
+EXPORT_SYMBOL(ib_qp_usecnt_dec);
+
+struct ib_qp *ib_create_qp_kernel(struct ib_pd *pd,
+				  struct ib_qp_init_attr *qp_init_attr,
+				  const char *caller)
+{
+	struct ib_device *device = pd->device;
+	struct ib_qp *qp;
+	int ret;
 
 	/*
 	 * If the callers is using the RDMA API calculate the resources
@@ -1237,47 +1346,11 @@ struct ib_qp *ib_create_named_qp(struct ib_pd *pd,
 	if (qp_init_attr->cap.max_rdma_ctxs)
 		rdma_rw_init_qp(device, qp_init_attr);
 
-	qp = _ib_create_qp(device, pd, qp_init_attr, NULL, NULL, caller);
+	qp = create_qp(device, pd, qp_init_attr, NULL, NULL, caller);
 	if (IS_ERR(qp))
 		return qp;
 
-	ret = ib_create_qp_security(qp, device);
-	if (ret)
-		goto err;
-
-	if (qp_init_attr->qp_type == IB_QPT_XRC_TGT) {
-		struct ib_qp *xrc_qp =
-			create_xrc_qp_user(qp, qp_init_attr);
-
-		if (IS_ERR(xrc_qp)) {
-			ret = PTR_ERR(xrc_qp);
-			goto err;
-		}
-		return xrc_qp;
-	}
-
-	qp->event_handler = qp_init_attr->event_handler;
-	qp->qp_context = qp_init_attr->qp_context;
-	if (qp_init_attr->qp_type == IB_QPT_XRC_INI) {
-		qp->recv_cq = NULL;
-		qp->srq = NULL;
-	} else {
-		qp->recv_cq = qp_init_attr->recv_cq;
-		if (qp_init_attr->recv_cq)
-			atomic_inc(&qp_init_attr->recv_cq->usecnt);
-		qp->srq = qp_init_attr->srq;
-		if (qp->srq)
-			atomic_inc(&qp_init_attr->srq->usecnt);
-	}
-
-	qp->send_cq = qp_init_attr->send_cq;
-	qp->xrcd    = NULL;
-
-	atomic_inc(&pd->usecnt);
-	if (qp_init_attr->send_cq)
-		atomic_inc(&qp_init_attr->send_cq->usecnt);
-	if (qp_init_attr->rwq_ind_tbl)
-		atomic_inc(&qp->rwq_ind_tbl->usecnt);
+	ib_qp_usecnt_inc(qp);
 
 	if (qp_init_attr->cap.max_rdma_ctxs) {
 		ret = rdma_rw_init_mrs(qp, qp_init_attr);
@@ -1303,7 +1376,7 @@ err:
 	return ERR_PTR(ret);
 
 }
-EXPORT_SYMBOL(ib_create_named_qp);
+EXPORT_SYMBOL(ib_create_qp_kernel);
 
 static const struct {
 	int			valid;
@@ -1935,10 +2008,6 @@ int ib_destroy_qp_user(struct ib_qp *qp, struct ib_udata *udata)
 {
 	const struct ib_gid_attr *alt_path_sgid_attr = qp->alt_path_sgid_attr;
 	const struct ib_gid_attr *av_sgid_attr = qp->av_sgid_attr;
-	struct ib_pd *pd;
-	struct ib_cq *scq, *rcq;
-	struct ib_srq *srq;
-	struct ib_rwq_ind_table *ind_tbl;
 	struct ib_qp_security *sec;
 	int ret;
 
@@ -1950,11 +2019,6 @@ int ib_destroy_qp_user(struct ib_qp *qp, struct ib_udata *udata)
 	if (qp->real_qp != qp)
 		return __ib_destroy_shared_qp(qp);
 
-	pd   = qp->pd;
-	scq  = qp->send_cq;
-	rcq  = qp->recv_cq;
-	srq  = qp->srq;
-	ind_tbl = qp->rwq_ind_tbl;
 	sec  = qp->qp_sec;
 	if (sec)
 		ib_destroy_qp_security_begin(sec);
@@ -1963,30 +2027,24 @@ int ib_destroy_qp_user(struct ib_qp *qp, struct ib_udata *udata)
 		rdma_rw_cleanup_mrs(qp);
 
 	rdma_counter_unbind_qp(qp, true);
-	rdma_restrack_del(&qp->res);
 	ret = qp->device->ops.destroy_qp(qp, udata);
-	if (!ret) {
-		if (alt_path_sgid_attr)
-			rdma_put_gid_attr(alt_path_sgid_attr);
-		if (av_sgid_attr)
-			rdma_put_gid_attr(av_sgid_attr);
-		if (pd)
-			atomic_dec(&pd->usecnt);
-		if (scq)
-			atomic_dec(&scq->usecnt);
-		if (rcq)
-			atomic_dec(&rcq->usecnt);
-		if (srq)
-			atomic_dec(&srq->usecnt);
-		if (ind_tbl)
-			atomic_dec(&ind_tbl->usecnt);
-		if (sec)
-			ib_destroy_qp_security_end(sec);
-	} else {
+	if (ret) {
 		if (sec)
 			ib_destroy_qp_security_abort(sec);
+		return ret;
 	}
 
+	if (alt_path_sgid_attr)
+		rdma_put_gid_attr(alt_path_sgid_attr);
+	if (av_sgid_attr)
+		rdma_put_gid_attr(av_sgid_attr);
+
+	ib_qp_usecnt_dec(qp);
+	if (sec)
+		ib_destroy_qp_security_end(sec);
+
+	rdma_restrack_del(&qp->res);
+	kfree(qp);
 	return ret;
 }
 EXPORT_SYMBOL(ib_destroy_qp_user);
diff --git a/drivers/infiniband/hw/bnxt_re/ib_verbs.c b/drivers/infiniband/hw/bnxt_re/ib_verbs.c
index ea0054c60fbc..408dfbcc47b5 100644
--- a/drivers/infiniband/hw/bnxt_re/ib_verbs.c
+++ b/drivers/infiniband/hw/bnxt_re/ib_verbs.c
@@ -815,7 +815,7 @@ int bnxt_re_destroy_qp(struct ib_qp *ib_qp, struct ib_udata *udata)
 	if (ib_qp->qp_type == IB_QPT_GSI && rdev->gsi_ctx.gsi_sqp) {
 		rc = bnxt_re_destroy_gsi_sqp(qp);
 		if (rc)
-			goto sh_fail;
+			return rc;
 	}
 
 	mutex_lock(&rdev->qp_lock);
@@ -826,10 +826,7 @@ int bnxt_re_destroy_qp(struct ib_qp *ib_qp, struct ib_udata *udata)
 	ib_umem_release(qp->rumem);
 	ib_umem_release(qp->sumem);
 
-	kfree(qp);
 	return 0;
-sh_fail:
-	return rc;
 }
 
 static u8 __from_ib_qp_type(enum ib_qp_type type)
@@ -1312,7 +1309,7 @@ out:
 static int bnxt_re_create_shadow_gsi(struct bnxt_re_qp *qp,
 				     struct bnxt_re_pd *pd)
 {
-	struct bnxt_re_sqp_entries *sqp_tbl = NULL;
+	struct bnxt_re_sqp_entries *sqp_tbl;
 	struct bnxt_re_dev *rdev;
 	struct bnxt_re_qp *sqp;
 	struct bnxt_re_ah *sah;
@@ -1320,7 +1317,7 @@ static int bnxt_re_create_shadow_gsi(struct bnxt_re_qp *qp,
 
 	rdev = qp->rdev;
 	/* Create a shadow QP to handle the QP1 traffic */
-	sqp_tbl = kzalloc(sizeof(*sqp_tbl) * BNXT_RE_MAX_GSI_SQP_ENTRIES,
+	sqp_tbl = kcalloc(BNXT_RE_MAX_GSI_SQP_ENTRIES, sizeof(*sqp_tbl),
 			  GFP_KERNEL);
 	if (!sqp_tbl)
 		return -ENOMEM;
@@ -1402,27 +1399,22 @@ static bool bnxt_re_test_qp_limits(struct bnxt_re_dev *rdev,
 	return rc;
 }
 
-struct ib_qp *bnxt_re_create_qp(struct ib_pd *ib_pd,
-				struct ib_qp_init_attr *qp_init_attr,
-				struct ib_udata *udata)
+int bnxt_re_create_qp(struct ib_qp *ib_qp, struct ib_qp_init_attr *qp_init_attr,
+		      struct ib_udata *udata)
 {
+	struct ib_pd *ib_pd = ib_qp->pd;
 	struct bnxt_re_pd *pd = container_of(ib_pd, struct bnxt_re_pd, ib_pd);
 	struct bnxt_re_dev *rdev = pd->rdev;
 	struct bnxt_qplib_dev_attr *dev_attr = &rdev->dev_attr;
-	struct bnxt_re_qp *qp;
+	struct bnxt_re_qp *qp = container_of(ib_qp, struct bnxt_re_qp, ib_qp);
 	int rc;
 
 	rc = bnxt_re_test_qp_limits(rdev, qp_init_attr, dev_attr);
 	if (!rc) {
 		rc = -EINVAL;
-		goto exit;
+		goto fail;
 	}
 
-	qp = kzalloc(sizeof(*qp), GFP_KERNEL);
-	if (!qp) {
-		rc = -ENOMEM;
-		goto exit;
-	}
 	qp->rdev = rdev;
 	rc = bnxt_re_init_qp_attr(qp, pd, qp_init_attr, udata);
 	if (rc)
@@ -1465,16 +1457,14 @@ struct ib_qp *bnxt_re_create_qp(struct ib_pd *ib_pd,
 	mutex_unlock(&rdev->qp_lock);
 	atomic_inc(&rdev->qp_count);
 
-	return &qp->ib_qp;
+	return 0;
 qp_destroy:
 	bnxt_qplib_destroy_qp(&rdev->qplib_res, &qp->qplib_qp);
 free_umem:
 	ib_umem_release(qp->rumem);
 	ib_umem_release(qp->sumem);
 fail:
-	kfree(qp);
-exit:
-	return ERR_PTR(rc);
+	return rc;
 }
 
 static u8 __from_ib_qp_state(enum ib_qp_state state)
diff --git a/drivers/infiniband/hw/bnxt_re/ib_verbs.h b/drivers/infiniband/hw/bnxt_re/ib_verbs.h
index d68671cc6173..b5c6e0f4f877 100644
--- a/drivers/infiniband/hw/bnxt_re/ib_verbs.h
+++ b/drivers/infiniband/hw/bnxt_re/ib_verbs.h
@@ -78,9 +78,9 @@ struct bnxt_re_srq {
 };
 
 struct bnxt_re_qp {
+	struct ib_qp		ib_qp;
 	struct list_head	list;
 	struct bnxt_re_dev	*rdev;
-	struct ib_qp		ib_qp;
 	spinlock_t		sq_lock;	/* protect sq */
 	spinlock_t		rq_lock;	/* protect rq */
 	struct bnxt_qplib_qp	qplib_qp;
@@ -179,9 +179,8 @@ int bnxt_re_query_srq(struct ib_srq *srq, struct ib_srq_attr *srq_attr);
 int bnxt_re_destroy_srq(struct ib_srq *srq, struct ib_udata *udata);
 int bnxt_re_post_srq_recv(struct ib_srq *srq, const struct ib_recv_wr *recv_wr,
 			  const struct ib_recv_wr **bad_recv_wr);
-struct ib_qp *bnxt_re_create_qp(struct ib_pd *pd,
-				struct ib_qp_init_attr *qp_init_attr,
-				struct ib_udata *udata);
+int bnxt_re_create_qp(struct ib_qp *qp, struct ib_qp_init_attr *qp_init_attr,
+		      struct ib_udata *udata);
 int bnxt_re_modify_qp(struct ib_qp *qp, struct ib_qp_attr *qp_attr,
 		      int qp_attr_mask, struct ib_udata *udata);
 int bnxt_re_query_qp(struct ib_qp *qp, struct ib_qp_attr *qp_attr,
diff --git a/drivers/infiniband/hw/bnxt_re/main.c b/drivers/infiniband/hw/bnxt_re/main.c
index 4678bd6ec7d6..66268e41b470 100644
--- a/drivers/infiniband/hw/bnxt_re/main.c
+++ b/drivers/infiniband/hw/bnxt_re/main.c
@@ -711,6 +711,7 @@ static const struct ib_device_ops bnxt_re_dev_ops = {
 	INIT_RDMA_OBJ_SIZE(ib_ah, bnxt_re_ah, ib_ah),
 	INIT_RDMA_OBJ_SIZE(ib_cq, bnxt_re_cq, ib_cq),
 	INIT_RDMA_OBJ_SIZE(ib_pd, bnxt_re_pd, ib_pd),
+	INIT_RDMA_OBJ_SIZE(ib_qp, bnxt_re_qp, ib_qp),
 	INIT_RDMA_OBJ_SIZE(ib_srq, bnxt_re_srq, ib_srq),
 	INIT_RDMA_OBJ_SIZE(ib_ucontext, bnxt_re_ucontext, ib_uctx),
 };
diff --git a/drivers/infiniband/hw/cxgb4/iw_cxgb4.h b/drivers/infiniband/hw/cxgb4/iw_cxgb4.h
index ac5f581aff4c..12f33467c672 100644
--- a/drivers/infiniband/hw/cxgb4/iw_cxgb4.h
+++ b/drivers/infiniband/hw/cxgb4/iw_cxgb4.h
@@ -990,9 +990,8 @@ int c4iw_destroy_srq(struct ib_srq *ib_srq, struct ib_udata *udata);
 int c4iw_create_srq(struct ib_srq *srq, struct ib_srq_init_attr *attrs,
 		    struct ib_udata *udata);
 int c4iw_destroy_qp(struct ib_qp *ib_qp, struct ib_udata *udata);
-struct ib_qp *c4iw_create_qp(struct ib_pd *pd,
-			     struct ib_qp_init_attr *attrs,
-			     struct ib_udata *udata);
+int c4iw_create_qp(struct ib_qp *qp, struct ib_qp_init_attr *attrs,
+		   struct ib_udata *udata);
 int c4iw_ib_modify_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr,
 				 int attr_mask, struct ib_udata *udata);
 int c4iw_ib_query_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr,
diff --git a/drivers/infiniband/hw/cxgb4/provider.c b/drivers/infiniband/hw/cxgb4/provider.c
index 881d515eb15a..e7337662aff8 100644
--- a/drivers/infiniband/hw/cxgb4/provider.c
+++ b/drivers/infiniband/hw/cxgb4/provider.c
@@ -499,6 +499,7 @@ static const struct ib_device_ops c4iw_dev_ops = {
 	INIT_RDMA_OBJ_SIZE(ib_cq, c4iw_cq, ibcq),
 	INIT_RDMA_OBJ_SIZE(ib_mw, c4iw_mw, ibmw),
 	INIT_RDMA_OBJ_SIZE(ib_pd, c4iw_pd, ibpd),
+	INIT_RDMA_OBJ_SIZE(ib_qp, c4iw_qp, ibqp),
 	INIT_RDMA_OBJ_SIZE(ib_srq, c4iw_srq, ibsrq),
 	INIT_RDMA_OBJ_SIZE(ib_ucontext, c4iw_ucontext, ibucontext),
 };
diff --git a/drivers/infiniband/hw/cxgb4/qp.c b/drivers/infiniband/hw/cxgb4/qp.c
index a81fa7a56edb..d20b4ef2c853 100644
--- a/drivers/infiniband/hw/cxgb4/qp.c
+++ b/drivers/infiniband/hw/cxgb4/qp.c
@@ -2103,16 +2103,15 @@ int c4iw_destroy_qp(struct ib_qp *ib_qp, struct ib_udata *udata)
 		   ucontext ? &ucontext->uctx : &rhp->rdev.uctx, !qhp->srq);
 
 	c4iw_put_wr_wait(qhp->wr_waitp);
-
-	kfree(qhp);
 	return 0;
 }
 
-struct ib_qp *c4iw_create_qp(struct ib_pd *pd, struct ib_qp_init_attr *attrs,
-			     struct ib_udata *udata)
+int c4iw_create_qp(struct ib_qp *qp, struct ib_qp_init_attr *attrs,
+		   struct ib_udata *udata)
 {
+	struct ib_pd *pd = qp->pd;
 	struct c4iw_dev *rhp;
-	struct c4iw_qp *qhp;
+	struct c4iw_qp *qhp = to_c4iw_qp(qp);
 	struct c4iw_pd *php;
 	struct c4iw_cq *schp;
 	struct c4iw_cq *rchp;
@@ -2124,44 +2123,36 @@ struct ib_qp *c4iw_create_qp(struct ib_pd *pd, struct ib_qp_init_attr *attrs,
 	struct c4iw_mm_entry *sq_key_mm, *rq_key_mm = NULL, *sq_db_key_mm;
 	struct c4iw_mm_entry *rq_db_key_mm = NULL, *ma_sync_key_mm = NULL;
 
-	pr_debug("ib_pd %p\n", pd);
-
 	if (attrs->qp_type != IB_QPT_RC || attrs->create_flags)
-		return ERR_PTR(-EOPNOTSUPP);
+		return -EOPNOTSUPP;
 
 	php = to_c4iw_pd(pd);
 	rhp = php->rhp;
 	schp = get_chp(rhp, ((struct c4iw_cq *)attrs->send_cq)->cq.cqid);
 	rchp = get_chp(rhp, ((struct c4iw_cq *)attrs->recv_cq)->cq.cqid);
 	if (!schp || !rchp)
-		return ERR_PTR(-EINVAL);
+		return -EINVAL;
 
 	if (attrs->cap.max_inline_data > T4_MAX_SEND_INLINE)
-		return ERR_PTR(-EINVAL);
+		return -EINVAL;
 
 	if (!attrs->srq) {
 		if (attrs->cap.max_recv_wr > rhp->rdev.hw_queue.t4_max_rq_size)
-			return ERR_PTR(-E2BIG);
+			return -E2BIG;
 		rqsize = attrs->cap.max_recv_wr + 1;
 		if (rqsize < 8)
 			rqsize = 8;
 	}
 
 	if (attrs->cap.max_send_wr > rhp->rdev.hw_queue.t4_max_sq_size)
-		return ERR_PTR(-E2BIG);
+		return -E2BIG;
 	sqsize = attrs->cap.max_send_wr + 1;
 	if (sqsize < 8)
 		sqsize = 8;
 
-	qhp = kzalloc(sizeof(*qhp), GFP_KERNEL);
-	if (!qhp)
-		return ERR_PTR(-ENOMEM);
-
 	qhp->wr_waitp = c4iw_alloc_wr_wait(GFP_KERNEL);
-	if (!qhp->wr_waitp) {
-		ret = -ENOMEM;
-		goto err_free_qhp;
-	}
+	if (!qhp->wr_waitp)
+		return -ENOMEM;
 
 	qhp->wq.sq.size = sqsize;
 	qhp->wq.sq.memsize =
@@ -2339,7 +2330,7 @@ struct ib_qp *c4iw_create_qp(struct ib_pd *pd, struct ib_qp_init_attr *attrs,
 		 qhp->wq.sq.qid, qhp->wq.sq.size, qhp->wq.sq.memsize,
 		 attrs->cap.max_send_wr, qhp->wq.rq.qid, qhp->wq.rq.size,
 		 qhp->wq.rq.memsize, attrs->cap.max_recv_wr);
-	return &qhp->ibqp;
+	return 0;
 err_free_ma_sync_key:
 	kfree(ma_sync_key_mm);
 err_free_rq_db_key:
@@ -2359,9 +2350,7 @@ err_destroy_qp:
 		   ucontext ? &ucontext->uctx : &rhp->rdev.uctx, !attrs->srq);
 err_free_wr_wait:
 	c4iw_put_wr_wait(qhp->wr_waitp);
-err_free_qhp:
-	kfree(qhp);
-	return ERR_PTR(ret);
+	return ret;
 }
 
 int c4iw_ib_modify_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr,
diff --git a/drivers/infiniband/hw/efa/efa.h b/drivers/infiniband/hw/efa/efa.h
index 2b8ca099b381..87b1dadeb7fe 100644
--- a/drivers/infiniband/hw/efa/efa.h
+++ b/drivers/infiniband/hw/efa/efa.h
@@ -1,6 +1,6 @@
 /* SPDX-License-Identifier: GPL-2.0 OR BSD-2-Clause */
 /*
- * Copyright 2018-2020 Amazon.com, Inc. or its affiliates. All rights reserved.
+ * Copyright 2018-2021 Amazon.com, Inc. or its affiliates. All rights reserved.
  */
 
 #ifndef _EFA_H_
@@ -27,8 +27,7 @@
 struct efa_irq {
 	irq_handler_t handler;
 	void *data;
-	int cpu;
-	u32 vector;
+	u32 irqn;
 	cpumask_t affinity_hint_mask;
 	char name[EFA_IRQNAME_SIZE];
 };
@@ -132,9 +131,8 @@ int efa_query_pkey(struct ib_device *ibdev, u32 port, u16 index,
 int efa_alloc_pd(struct ib_pd *ibpd, struct ib_udata *udata);
 int efa_dealloc_pd(struct ib_pd *ibpd, struct ib_udata *udata);
 int efa_destroy_qp(struct ib_qp *ibqp, struct ib_udata *udata);
-struct ib_qp *efa_create_qp(struct ib_pd *ibpd,
-			    struct ib_qp_init_attr *init_attr,
-			    struct ib_udata *udata);
+int efa_create_qp(struct ib_qp *ibqp, struct ib_qp_init_attr *init_attr,
+		  struct ib_udata *udata);
 int efa_destroy_cq(struct ib_cq *ibcq, struct ib_udata *udata);
 int efa_create_cq(struct ib_cq *ibcq, const struct ib_cq_init_attr *attr,
 		  struct ib_udata *udata);
diff --git a/drivers/infiniband/hw/efa/efa_main.c b/drivers/infiniband/hw/efa/efa_main.c
index be4a07bd268a..417dea5f90cf 100644
--- a/drivers/infiniband/hw/efa/efa_main.c
+++ b/drivers/infiniband/hw/efa/efa_main.c
@@ -83,8 +83,7 @@ static int efa_request_mgmnt_irq(struct efa_dev *dev)
 	int err;
 
 	irq = &dev->admin_irq;
-	err = request_irq(irq->vector, irq->handler, 0, irq->name,
-			  irq->data);
+	err = request_irq(irq->irqn, irq->handler, 0, irq->name, irq->data);
 	if (err) {
 		dev_err(&dev->pdev->dev, "Failed to request admin irq (%d)\n",
 			err);
@@ -92,8 +91,8 @@ static int efa_request_mgmnt_irq(struct efa_dev *dev)
 	}
 
 	dev_dbg(&dev->pdev->dev, "Set affinity hint of mgmnt irq to %*pbl (irq vector: %d)\n",
-		nr_cpumask_bits, &irq->affinity_hint_mask, irq->vector);
-	irq_set_affinity_hint(irq->vector, &irq->affinity_hint_mask);
+		nr_cpumask_bits, &irq->affinity_hint_mask, irq->irqn);
+	irq_set_affinity_hint(irq->irqn, &irq->affinity_hint_mask);
 
 	return 0;
 }
@@ -106,15 +105,13 @@ static void efa_setup_mgmnt_irq(struct efa_dev *dev)
 		 "efa-mgmnt@pci:%s", pci_name(dev->pdev));
 	dev->admin_irq.handler = efa_intr_msix_mgmnt;
 	dev->admin_irq.data = dev;
-	dev->admin_irq.vector =
+	dev->admin_irq.irqn =
 		pci_irq_vector(dev->pdev, dev->admin_msix_vector_idx);
 	cpu = cpumask_first(cpu_online_mask);
-	dev->admin_irq.cpu = cpu;
 	cpumask_set_cpu(cpu,
 			&dev->admin_irq.affinity_hint_mask);
-	dev_info(&dev->pdev->dev, "Setup irq:0x%p vector:%d name:%s\n",
-		 &dev->admin_irq,
-		 dev->admin_irq.vector,
+	dev_info(&dev->pdev->dev, "Setup irq:%d name:%s\n",
+		 dev->admin_irq.irqn,
 		 dev->admin_irq.name);
 }
 
@@ -123,8 +120,8 @@ static void efa_free_mgmnt_irq(struct efa_dev *dev)
 	struct efa_irq *irq;
 
 	irq = &dev->admin_irq;
-	irq_set_affinity_hint(irq->vector, NULL);
-	free_irq(irq->vector, irq->data);
+	irq_set_affinity_hint(irq->irqn, NULL);
+	free_irq(irq->irqn, irq->data);
 }
 
 static int efa_set_mgmnt_irq(struct efa_dev *dev)
@@ -271,6 +268,7 @@ static const struct ib_device_ops efa_dev_ops = {
 	INIT_RDMA_OBJ_SIZE(ib_ah, efa_ah, ibah),
 	INIT_RDMA_OBJ_SIZE(ib_cq, efa_cq, ibcq),
 	INIT_RDMA_OBJ_SIZE(ib_pd, efa_pd, ibpd),
+	INIT_RDMA_OBJ_SIZE(ib_qp, efa_qp, ibqp),
 	INIT_RDMA_OBJ_SIZE(ib_ucontext, efa_ucontext, ibucontext),
 };
 
diff --git a/drivers/infiniband/hw/efa/efa_verbs.c b/drivers/infiniband/hw/efa/efa_verbs.c
index be6d3ff0f1be..e5f9d90aad5e 100644
--- a/drivers/infiniband/hw/efa/efa_verbs.c
+++ b/drivers/infiniband/hw/efa/efa_verbs.c
@@ -1,6 +1,6 @@
 // SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB
 /*
- * Copyright 2018-2020 Amazon.com, Inc. or its affiliates. All rights reserved.
+ * Copyright 2018-2021 Amazon.com, Inc. or its affiliates. All rights reserved.
  */
 
 #include <linux/vmalloc.h>
@@ -30,7 +30,21 @@ struct efa_user_mmap_entry {
 	u8 mmap_flag;
 };
 
-#define EFA_DEFINE_STATS(op) \
+#define EFA_DEFINE_DEVICE_STATS(op) \
+	op(EFA_SUBMITTED_CMDS, "submitted_cmds") \
+	op(EFA_COMPLETED_CMDS, "completed_cmds") \
+	op(EFA_CMDS_ERR, "cmds_err") \
+	op(EFA_NO_COMPLETION_CMDS, "no_completion_cmds") \
+	op(EFA_KEEP_ALIVE_RCVD, "keep_alive_rcvd") \
+	op(EFA_ALLOC_PD_ERR, "alloc_pd_err") \
+	op(EFA_CREATE_QP_ERR, "create_qp_err") \
+	op(EFA_CREATE_CQ_ERR, "create_cq_err") \
+	op(EFA_REG_MR_ERR, "reg_mr_err") \
+	op(EFA_ALLOC_UCONTEXT_ERR, "alloc_ucontext_err") \
+	op(EFA_CREATE_AH_ERR, "create_ah_err") \
+	op(EFA_MMAP_ERR, "mmap_err")
+
+#define EFA_DEFINE_PORT_STATS(op) \
 	op(EFA_TX_BYTES, "tx_bytes") \
 	op(EFA_TX_PKTS, "tx_pkts") \
 	op(EFA_RX_BYTES, "rx_bytes") \
@@ -44,28 +58,24 @@ struct efa_user_mmap_entry {
 	op(EFA_RDMA_READ_BYTES, "rdma_read_bytes") \
 	op(EFA_RDMA_READ_WR_ERR, "rdma_read_wr_err") \
 	op(EFA_RDMA_READ_RESP_BYTES, "rdma_read_resp_bytes") \
-	op(EFA_SUBMITTED_CMDS, "submitted_cmds") \
-	op(EFA_COMPLETED_CMDS, "completed_cmds") \
-	op(EFA_CMDS_ERR, "cmds_err") \
-	op(EFA_NO_COMPLETION_CMDS, "no_completion_cmds") \
-	op(EFA_KEEP_ALIVE_RCVD, "keep_alive_rcvd") \
-	op(EFA_ALLOC_PD_ERR, "alloc_pd_err") \
-	op(EFA_CREATE_QP_ERR, "create_qp_err") \
-	op(EFA_CREATE_CQ_ERR, "create_cq_err") \
-	op(EFA_REG_MR_ERR, "reg_mr_err") \
-	op(EFA_ALLOC_UCONTEXT_ERR, "alloc_ucontext_err") \
-	op(EFA_CREATE_AH_ERR, "create_ah_err") \
-	op(EFA_MMAP_ERR, "mmap_err")
 
 #define EFA_STATS_ENUM(ename, name) ename,
 #define EFA_STATS_STR(ename, name) [ename] = name,
 
-enum efa_hw_stats {
-	EFA_DEFINE_STATS(EFA_STATS_ENUM)
+enum efa_hw_device_stats {
+	EFA_DEFINE_DEVICE_STATS(EFA_STATS_ENUM)
 };
 
-static const char *const efa_stats_names[] = {
-	EFA_DEFINE_STATS(EFA_STATS_STR)
+static const char *const efa_device_stats_names[] = {
+	EFA_DEFINE_DEVICE_STATS(EFA_STATS_STR)
+};
+
+enum efa_hw_port_stats {
+	EFA_DEFINE_PORT_STATS(EFA_STATS_ENUM)
+};
+
+static const char *const efa_port_stats_names[] = {
+	EFA_DEFINE_PORT_STATS(EFA_STATS_STR)
 };
 
 #define EFA_CHUNK_PAYLOAD_SHIFT       12
@@ -440,7 +450,6 @@ int efa_destroy_qp(struct ib_qp *ibqp, struct ib_udata *udata)
 				qp->rq_size, DMA_TO_DEVICE);
 	}
 
-	kfree(qp);
 	return 0;
 }
 
@@ -599,17 +608,16 @@ static int efa_qp_validate_attr(struct efa_dev *dev,
 	return 0;
 }
 
-struct ib_qp *efa_create_qp(struct ib_pd *ibpd,
-			    struct ib_qp_init_attr *init_attr,
-			    struct ib_udata *udata)
+int efa_create_qp(struct ib_qp *ibqp, struct ib_qp_init_attr *init_attr,
+		  struct ib_udata *udata)
 {
 	struct efa_com_create_qp_params create_qp_params = {};
 	struct efa_com_create_qp_result create_qp_resp;
-	struct efa_dev *dev = to_edev(ibpd->device);
+	struct efa_dev *dev = to_edev(ibqp->device);
 	struct efa_ibv_create_qp_resp resp = {};
 	struct efa_ibv_create_qp cmd = {};
+	struct efa_qp *qp = to_eqp(ibqp);
 	struct efa_ucontext *ucontext;
-	struct efa_qp *qp;
 	int err;
 
 	ucontext = rdma_udata_to_drv_context(udata, struct efa_ucontext,
@@ -654,14 +662,8 @@ struct ib_qp *efa_create_qp(struct ib_pd *ibpd,
 		goto err_out;
 	}
 
-	qp = kzalloc(sizeof(*qp), GFP_KERNEL);
-	if (!qp) {
-		err = -ENOMEM;
-		goto err_out;
-	}
-
 	create_qp_params.uarn = ucontext->uarn;
-	create_qp_params.pd = to_epd(ibpd)->pdn;
+	create_qp_params.pd = to_epd(ibqp->pd)->pdn;
 
 	if (init_attr->qp_type == IB_QPT_UD) {
 		create_qp_params.qp_type = EFA_ADMIN_QP_TYPE_UD;
@@ -672,7 +674,7 @@ struct ib_qp *efa_create_qp(struct ib_pd *ibpd,
 			  "Unsupported qp type %d driver qp type %d\n",
 			  init_attr->qp_type, cmd.driver_qp_type);
 		err = -EOPNOTSUPP;
-		goto err_free_qp;
+		goto err_out;
 	}
 
 	ibdev_dbg(&dev->ibdev, "Create QP: qp type %d driver qp type %#x\n",
@@ -690,7 +692,7 @@ struct ib_qp *efa_create_qp(struct ib_pd *ibpd,
 						    qp->rq_size, DMA_TO_DEVICE);
 		if (!qp->rq_cpu_addr) {
 			err = -ENOMEM;
-			goto err_free_qp;
+			goto err_out;
 		}
 
 		ibdev_dbg(&dev->ibdev,
@@ -717,7 +719,6 @@ struct ib_qp *efa_create_qp(struct ib_pd *ibpd,
 
 	qp->qp_handle = create_qp_resp.qp_handle;
 	qp->ibqp.qp_num = create_qp_resp.qp_num;
-	qp->ibqp.qp_type = init_attr->qp_type;
 	qp->max_send_wr = init_attr->cap.max_send_wr;
 	qp->max_recv_wr = init_attr->cap.max_recv_wr;
 	qp->max_send_sge = init_attr->cap.max_send_sge;
@@ -737,7 +738,7 @@ struct ib_qp *efa_create_qp(struct ib_pd *ibpd,
 
 	ibdev_dbg(&dev->ibdev, "Created qp[%d]\n", qp->ibqp.qp_num);
 
-	return &qp->ibqp;
+	return 0;
 
 err_remove_mmap_entries:
 	efa_qp_user_mmap_entries_remove(qp);
@@ -747,11 +748,9 @@ err_free_mapped:
 	if (qp->rq_size)
 		efa_free_mapped(dev, qp->rq_cpu_addr, qp->rq_dma_addr,
 				qp->rq_size, DMA_TO_DEVICE);
-err_free_qp:
-	kfree(qp);
 err_out:
 	atomic64_inc(&dev->stats.create_qp_err);
-	return ERR_PTR(err);
+	return err;
 }
 
 static const struct {
@@ -1904,33 +1903,53 @@ int efa_destroy_ah(struct ib_ah *ibah, u32 flags)
 	return 0;
 }
 
-struct rdma_hw_stats *efa_alloc_hw_port_stats(struct ib_device *ibdev, u32 port_num)
+struct rdma_hw_stats *efa_alloc_hw_port_stats(struct ib_device *ibdev,
+					      u32 port_num)
 {
-	return rdma_alloc_hw_stats_struct(efa_stats_names,
-					  ARRAY_SIZE(efa_stats_names),
+	return rdma_alloc_hw_stats_struct(efa_port_stats_names,
+					  ARRAY_SIZE(efa_port_stats_names),
 					  RDMA_HW_STATS_DEFAULT_LIFESPAN);
 }
 
 struct rdma_hw_stats *efa_alloc_hw_device_stats(struct ib_device *ibdev)
 {
-	/*
-	 * It is probably a bug that efa reports its port stats as device
-	 * stats
-	 */
-	return efa_alloc_hw_port_stats(ibdev, 0);
+	return rdma_alloc_hw_stats_struct(efa_device_stats_names,
+					  ARRAY_SIZE(efa_device_stats_names),
+					  RDMA_HW_STATS_DEFAULT_LIFESPAN);
 }
 
-int efa_get_hw_stats(struct ib_device *ibdev, struct rdma_hw_stats *stats,
-		     u32 port_num, int index)
+static int efa_fill_device_stats(struct efa_dev *dev,
+				 struct rdma_hw_stats *stats)
+{
+	struct efa_com_stats_admin *as = &dev->edev.aq.stats;
+	struct efa_stats *s = &dev->stats;
+
+	stats->value[EFA_SUBMITTED_CMDS] = atomic64_read(&as->submitted_cmd);
+	stats->value[EFA_COMPLETED_CMDS] = atomic64_read(&as->completed_cmd);
+	stats->value[EFA_CMDS_ERR] = atomic64_read(&as->cmd_err);
+	stats->value[EFA_NO_COMPLETION_CMDS] = atomic64_read(&as->no_completion);
+
+	stats->value[EFA_KEEP_ALIVE_RCVD] = atomic64_read(&s->keep_alive_rcvd);
+	stats->value[EFA_ALLOC_PD_ERR] = atomic64_read(&s->alloc_pd_err);
+	stats->value[EFA_CREATE_QP_ERR] = atomic64_read(&s->create_qp_err);
+	stats->value[EFA_CREATE_CQ_ERR] = atomic64_read(&s->create_cq_err);
+	stats->value[EFA_REG_MR_ERR] = atomic64_read(&s->reg_mr_err);
+	stats->value[EFA_ALLOC_UCONTEXT_ERR] =
+		atomic64_read(&s->alloc_ucontext_err);
+	stats->value[EFA_CREATE_AH_ERR] = atomic64_read(&s->create_ah_err);
+	stats->value[EFA_MMAP_ERR] = atomic64_read(&s->mmap_err);
+
+	return ARRAY_SIZE(efa_device_stats_names);
+}
+
+static int efa_fill_port_stats(struct efa_dev *dev, struct rdma_hw_stats *stats,
+			       u32 port_num)
 {
 	struct efa_com_get_stats_params params = {};
 	union efa_com_get_stats_result result;
-	struct efa_dev *dev = to_edev(ibdev);
 	struct efa_com_rdma_read_stats *rrs;
 	struct efa_com_messages_stats *ms;
 	struct efa_com_basic_stats *bs;
-	struct efa_com_stats_admin *as;
-	struct efa_stats *s;
 	int err;
 
 	params.scope = EFA_ADMIN_GET_STATS_SCOPE_ALL;
@@ -1969,24 +1988,16 @@ int efa_get_hw_stats(struct ib_device *ibdev, struct rdma_hw_stats *stats,
 	stats->value[EFA_RDMA_READ_WR_ERR] = rrs->read_wr_err;
 	stats->value[EFA_RDMA_READ_RESP_BYTES] = rrs->read_resp_bytes;
 
-	as = &dev->edev.aq.stats;
-	stats->value[EFA_SUBMITTED_CMDS] = atomic64_read(&as->submitted_cmd);
-	stats->value[EFA_COMPLETED_CMDS] = atomic64_read(&as->completed_cmd);
-	stats->value[EFA_CMDS_ERR] = atomic64_read(&as->cmd_err);
-	stats->value[EFA_NO_COMPLETION_CMDS] = atomic64_read(&as->no_completion);
-
-	s = &dev->stats;
-	stats->value[EFA_KEEP_ALIVE_RCVD] = atomic64_read(&s->keep_alive_rcvd);
-	stats->value[EFA_ALLOC_PD_ERR] = atomic64_read(&s->alloc_pd_err);
-	stats->value[EFA_CREATE_QP_ERR] = atomic64_read(&s->create_qp_err);
-	stats->value[EFA_CREATE_CQ_ERR] = atomic64_read(&s->create_cq_err);
-	stats->value[EFA_REG_MR_ERR] = atomic64_read(&s->reg_mr_err);
-	stats->value[EFA_ALLOC_UCONTEXT_ERR] =
-		atomic64_read(&s->alloc_ucontext_err);
-	stats->value[EFA_CREATE_AH_ERR] = atomic64_read(&s->create_ah_err);
-	stats->value[EFA_MMAP_ERR] = atomic64_read(&s->mmap_err);
+	return ARRAY_SIZE(efa_port_stats_names);
+}
 
-	return ARRAY_SIZE(efa_stats_names);
+int efa_get_hw_stats(struct ib_device *ibdev, struct rdma_hw_stats *stats,
+		     u32 port_num, int index)
+{
+	if (port_num)
+		return efa_fill_port_stats(to_edev(ibdev), stats, port_num);
+	else
+		return efa_fill_device_stats(to_edev(ibdev), stats);
 }
 
 enum rdma_link_layer efa_port_link_layer(struct ib_device *ibdev,
diff --git a/drivers/infiniband/hw/hfi1/affinity.c b/drivers/infiniband/hw/hfi1/affinity.c
index 16543f717527..98c813ba4304 100644
--- a/drivers/infiniband/hw/hfi1/affinity.c
+++ b/drivers/infiniband/hw/hfi1/affinity.c
@@ -1,49 +1,8 @@
+// SPDX-License-Identifier: GPL-2.0 or BSD-3-Clause
 /*
  * Copyright(c) 2015 - 2020 Intel Corporation.
- *
- * This file is provided under a dual BSD/GPLv2 license.  When using or
- * redistributing this file, you may do so under either license.
- *
- * GPL LICENSE SUMMARY
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of version 2 of the GNU General Public License as
- * published by the Free Software Foundation.
- *
- * This program is distributed in the hope that it will be useful, but
- * WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
- * General Public License for more details.
- *
- * BSD LICENSE
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions
- * are met:
- *
- *  - Redistributions of source code must retain the above copyright
- *    notice, this list of conditions and the following disclaimer.
- *  - Redistributions in binary form must reproduce the above copyright
- *    notice, this list of conditions and the following disclaimer in
- *    the documentation and/or other materials provided with the
- *    distribution.
- *  - Neither the name of Intel Corporation nor the names of its
- *    contributors may be used to endorse or promote products derived
- *    from this software without specific prior written permission.
- *
- * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
- * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
- * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
- * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
- * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
- * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
- * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
- * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
- * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
- * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
- * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
- *
  */
+
 #include <linux/topology.h>
 #include <linux/cpumask.h>
 #include <linux/module.h>
diff --git a/drivers/infiniband/hw/hfi1/affinity.h b/drivers/infiniband/hw/hfi1/affinity.h
index f94ed5d7c7a3..00854f21787f 100644
--- a/drivers/infiniband/hw/hfi1/affinity.h
+++ b/drivers/infiniband/hw/hfi1/affinity.h
@@ -1,49 +1,8 @@
+/* SPDX-License-Identifier: GPL-2.0 or BSD-3-Clause */
 /*
  * Copyright(c) 2015 - 2020 Intel Corporation.
- *
- * This file is provided under a dual BSD/GPLv2 license.  When using or
- * redistributing this file, you may do so under either license.
- *
- * GPL LICENSE SUMMARY
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of version 2 of the GNU General Public License as
- * published by the Free Software Foundation.
- *
- * This program is distributed in the hope that it will be useful, but
- * WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
- * General Public License for more details.
- *
- * BSD LICENSE
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions
- * are met:
- *
- *  - Redistributions of source code must retain the above copyright
- *    notice, this list of conditions and the following disclaimer.
- *  - Redistributions in binary form must reproduce the above copyright
- *    notice, this list of conditions and the following disclaimer in
- *    the documentation and/or other materials provided with the
- *    distribution.
- *  - Neither the name of Intel Corporation nor the names of its
- *    contributors may be used to endorse or promote products derived
- *    from this software without specific prior written permission.
- *
- * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
- * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
- * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
- * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
- * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
- * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
- * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
- * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
- * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
- * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
- * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
- *
  */
+
 #ifndef _HFI1_AFFINITY_H
 #define _HFI1_AFFINITY_H
 
diff --git a/drivers/infiniband/hw/hfi1/aspm.h b/drivers/infiniband/hw/hfi1/aspm.h
index 75d5d18da3da..df295f47b315 100644
--- a/drivers/infiniband/hw/hfi1/aspm.h
+++ b/drivers/infiniband/hw/hfi1/aspm.h
@@ -1,49 +1,8 @@
+/* SPDX-License-Identifier: GPL-2.0 or BSD-3-Clause */
 /*
  * Copyright(c) 2015-2017 Intel Corporation.
- *
- * This file is provided under a dual BSD/GPLv2 license.  When using or
- * redistributing this file, you may do so under either license.
- *
- * GPL LICENSE SUMMARY
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of version 2 of the GNU General Public License as
- * published by the Free Software Foundation.
- *
- * This program is distributed in the hope that it will be useful, but
- * WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
- * General Public License for more details.
- *
- * BSD LICENSE
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions
- * are met:
- *
- *  - Redistributions of source code must retain the above copyright
- *    notice, this list of conditions and the following disclaimer.
- *  - Redistributions in binary form must reproduce the above copyright
- *    notice, this list of conditions and the following disclaimer in
- *    the documentation and/or other materials provided with the
- *    distribution.
- *  - Neither the name of Intel Corporation nor the names of its
- *    contributors may be used to endorse or promote products derived
- *    from this software without specific prior written permission.
- *
- * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
- * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
- * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
- * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
- * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
- * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
- * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
- * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
- * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
- * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
- * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
- *
  */
+
 #ifndef _ASPM_H
 #define _ASPM_H
 
diff --git a/drivers/infiniband/hw/hfi1/chip.c b/drivers/infiniband/hw/hfi1/chip.c
index c97544638367..37273dc0c03c 100644
--- a/drivers/infiniband/hw/hfi1/chip.c
+++ b/drivers/infiniband/hw/hfi1/chip.c
@@ -1,48 +1,6 @@
+// SPDX-License-Identifier: GPL-2.0 or BSD-3-Clause
 /*
  * Copyright(c) 2015 - 2020 Intel Corporation.
- *
- * This file is provided under a dual BSD/GPLv2 license.  When using or
- * redistributing this file, you may do so under either license.
- *
- * GPL LICENSE SUMMARY
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of version 2 of the GNU General Public License as
- * published by the Free Software Foundation.
- *
- * This program is distributed in the hope that it will be useful, but
- * WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
- * General Public License for more details.
- *
- * BSD LICENSE
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions
- * are met:
- *
- *  - Redistributions of source code must retain the above copyright
- *    notice, this list of conditions and the following disclaimer.
- *  - Redistributions in binary form must reproduce the above copyright
- *    notice, this list of conditions and the following disclaimer in
- *    the documentation and/or other materials provided with the
- *    distribution.
- *  - Neither the name of Intel Corporation nor the names of its
- *    contributors may be used to endorse or promote products derived
- *    from this software without specific prior written permission.
- *
- * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
- * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
- * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
- * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
- * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
- * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
- * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
- * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
- * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
- * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
- * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
- *
  */
 
 /*
@@ -14414,7 +14372,7 @@ static void init_qos(struct hfi1_devdata *dd, struct rsm_map_table *rmt)
 	if (rmt->used + rmt_entries >= NUM_MAP_ENTRIES)
 		goto bail;
 
-	/* add qos entries to the the RSM map table */
+	/* add qos entries to the RSM map table */
 	for (i = 0, ctxt = FIRST_KERNEL_KCTXT; i < num_vls; i++) {
 		unsigned tctxt;
 
@@ -14893,7 +14851,7 @@ int hfi1_clear_ctxt_pkey(struct hfi1_devdata *dd, struct hfi1_ctxtdata *ctxt)
 }
 
 /*
- * Start doing the clean up the the chip. Our clean up happens in multiple
+ * Start doing the clean up the chip. Our clean up happens in multiple
  * stages and this is just the first.
  */
 void hfi1_start_cleanup(struct hfi1_devdata *dd)
@@ -15336,7 +15294,7 @@ int hfi1_init_dd(struct hfi1_devdata *dd)
 	init_completion(&dd->user_comp);
 
 	/* The user refcount starts with one to inidicate an active device */
-	atomic_set(&dd->user_refcount, 1);
+	refcount_set(&dd->user_refcount, 1);
 
 	goto bail;
 
diff --git a/drivers/infiniband/hw/hfi1/chip.h b/drivers/infiniband/hw/hfi1/chip.h
index ac26649d4463..b2d53713da58 100644
--- a/drivers/infiniband/hw/hfi1/chip.h
+++ b/drivers/infiniband/hw/hfi1/chip.h
@@ -1,52 +1,10 @@
-#ifndef _CHIP_H
-#define _CHIP_H
+/* SPDX-License-Identifier: GPL-2.0 or BSD-3-Clause */
 /*
  * Copyright(c) 2015 - 2020 Intel Corporation.
- *
- * This file is provided under a dual BSD/GPLv2 license.  When using or
- * redistributing this file, you may do so under either license.
- *
- * GPL LICENSE SUMMARY
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of version 2 of the GNU General Public License as
- * published by the Free Software Foundation.
- *
- * This program is distributed in the hope that it will be useful, but
- * WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
- * General Public License for more details.
- *
- * BSD LICENSE
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions
- * are met:
- *
- *  - Redistributions of source code must retain the above copyright
- *    notice, this list of conditions and the following disclaimer.
- *  - Redistributions in binary form must reproduce the above copyright
- *    notice, this list of conditions and the following disclaimer in
- *    the documentation and/or other materials provided with the
- *    distribution.
- *  - Neither the name of Intel Corporation nor the names of its
- *    contributors may be used to endorse or promote products derived
- *    from this software without specific prior written permission.
- *
- * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
- * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
- * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
- * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
- * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
- * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
- * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
- * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
- * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
- * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
- * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
- *
  */
 
+#ifndef _CHIP_H
+#define _CHIP_H
 /*
  * This file contains all of the defines that is specific to the HFI chip
  */
diff --git a/drivers/infiniband/hw/hfi1/chip_registers.h b/drivers/infiniband/hw/hfi1/chip_registers.h
index fb3ec9bff7a2..95a8d530d554 100644
--- a/drivers/infiniband/hw/hfi1/chip_registers.h
+++ b/drivers/infiniband/hw/hfi1/chip_registers.h
@@ -1,53 +1,11 @@
-#ifndef DEF_CHIP_REG
-#define DEF_CHIP_REG
-
+/* SPDX-License-Identifier: GPL-2.0 or BSD-3-Clause */
 /*
  * Copyright(c) 2015, 2016 Intel Corporation.
- *
- * This file is provided under a dual BSD/GPLv2 license.  When using or
- * redistributing this file, you may do so under either license.
- *
- * GPL LICENSE SUMMARY
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of version 2 of the GNU General Public License as
- * published by the Free Software Foundation.
- *
- * This program is distributed in the hope that it will be useful, but
- * WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
- * General Public License for more details.
- *
- * BSD LICENSE
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions
- * are met:
- *
- *  - Redistributions of source code must retain the above copyright
- *    notice, this list of conditions and the following disclaimer.
- *  - Redistributions in binary form must reproduce the above copyright
- *    notice, this list of conditions and the following disclaimer in
- *    the documentation and/or other materials provided with the
- *    distribution.
- *  - Neither the name of Intel Corporation nor the names of its
- *    contributors may be used to endorse or promote products derived
- *    from this software without specific prior written permission.
- *
- * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
- * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
- * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
- * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
- * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
- * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
- * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
- * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
- * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
- * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
- * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
- *
  */
 
+#ifndef DEF_CHIP_REG
+#define DEF_CHIP_REG
+
 #define CORE		0x000000000000
 #define CCE			(CORE + 0x000000000000)
 #define ASIC		(CORE + 0x000000400000)
diff --git a/drivers/infiniband/hw/hfi1/common.h b/drivers/infiniband/hw/hfi1/common.h
index ff423e546b80..995991d9709d 100644
--- a/drivers/infiniband/hw/hfi1/common.h
+++ b/drivers/infiniband/hw/hfi1/common.h
@@ -1,48 +1,6 @@
+/* SPDX-License-Identifier: GPL-2.0 or BSD-3-Clause */
 /*
  * Copyright(c) 2015 - 2020 Intel Corporation.
- *
- * This file is provided under a dual BSD/GPLv2 license.  When using or
- * redistributing this file, you may do so under either license.
- *
- * GPL LICENSE SUMMARY
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of version 2 of the GNU General Public License as
- * published by the Free Software Foundation.
- *
- * This program is distributed in the hope that it will be useful, but
- * WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
- * General Public License for more details.
- *
- * BSD LICENSE
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions
- * are met:
- *
- *  - Redistributions of source code must retain the above copyright
- *    notice, this list of conditions and the following disclaimer.
- *  - Redistributions in binary form must reproduce the above copyright
- *    notice, this list of conditions and the following disclaimer in
- *    the documentation and/or other materials provided with the
- *    distribution.
- *  - Neither the name of Intel Corporation nor the names of its
- *    contributors may be used to endorse or promote products derived
- *    from this software without specific prior written permission.
- *
- * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
- * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
- * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
- * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
- * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
- * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
- * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
- * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
- * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
- * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
- * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
- *
  */
 
 #ifndef _COMMON_H
diff --git a/drivers/infiniband/hw/hfi1/debugfs.c b/drivers/infiniband/hw/hfi1/debugfs.c
index 2ced236e1553..22a3cdb940be 100644
--- a/drivers/infiniband/hw/hfi1/debugfs.c
+++ b/drivers/infiniband/hw/hfi1/debugfs.c
@@ -1,49 +1,8 @@
+// SPDX-License-Identifier: GPL-2.0 or BSD-3-Clause
 /*
  * Copyright(c) 2015-2018 Intel Corporation.
- *
- * This file is provided under a dual BSD/GPLv2 license.  When using or
- * redistributing this file, you may do so under either license.
- *
- * GPL LICENSE SUMMARY
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of version 2 of the GNU General Public License as
- * published by the Free Software Foundation.
- *
- * This program is distributed in the hope that it will be useful, but
- * WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
- * General Public License for more details.
- *
- * BSD LICENSE
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions
- * are met:
- *
- *  - Redistributions of source code must retain the above copyright
- *    notice, this list of conditions and the following disclaimer.
- *  - Redistributions in binary form must reproduce the above copyright
- *    notice, this list of conditions and the following disclaimer in
- *    the documentation and/or other materials provided with the
- *    distribution.
- *  - Neither the name of Intel Corporation nor the names of its
- *    contributors may be used to endorse or promote products derived
- *    from this software without specific prior written permission.
- *
- * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
- * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
- * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
- * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
- * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
- * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
- * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
- * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
- * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
- * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
- * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
- *
  */
+
 #include <linux/debugfs.h>
 #include <linux/seq_file.h>
 #include <linux/kernel.h>
@@ -1358,7 +1317,7 @@ static void _driver_stats_seq_stop(struct seq_file *s, void *v)
 {
 }
 
-static u64 hfi1_sps_ints(void)
+static void hfi1_sps_show_ints(struct seq_file *s)
 {
 	unsigned long index, flags;
 	struct hfi1_devdata *dd;
@@ -1369,24 +1328,19 @@ static u64 hfi1_sps_ints(void)
 		sps_ints += get_all_cpu_total(dd->int_counter);
 	}
 	xa_unlock_irqrestore(&hfi1_dev_table, flags);
-	return sps_ints;
+	seq_write(s, &sps_ints, sizeof(u64));
 }
 
 static int _driver_stats_seq_show(struct seq_file *s, void *v)
 {
 	loff_t *spos = v;
-	char *buffer;
 	u64 *stats = (u64 *)&hfi1_stats;
-	size_t sz = seq_get_buf(s, &buffer);
 
-	if (sz < sizeof(u64))
-		return SEQ_SKIP;
 	/* special case for interrupts */
 	if (*spos == 0)
-		*(u64 *)buffer = hfi1_sps_ints();
+		hfi1_sps_show_ints(s);
 	else
-		*(u64 *)buffer = stats[*spos];
-	seq_commit(s,  sizeof(u64));
+		seq_write(s, stats + *spos, sizeof(u64));
 	return 0;
 }
 
diff --git a/drivers/infiniband/hw/hfi1/debugfs.h b/drivers/infiniband/hw/hfi1/debugfs.h
index 57e582caa5eb..29a5a8de2c41 100644
--- a/drivers/infiniband/hw/hfi1/debugfs.h
+++ b/drivers/infiniband/hw/hfi1/debugfs.h
@@ -1,52 +1,11 @@
-#ifndef _HFI1_DEBUGFS_H
-#define _HFI1_DEBUGFS_H
+/* SPDX-License-Identifier: GPL-2.0 or BSD-3-Clause */
 /*
  * Copyright(c) 2015, 2016, 2018 Intel Corporation.
- *
- * This file is provided under a dual BSD/GPLv2 license.  When using or
- * redistributing this file, you may do so under either license.
- *
- * GPL LICENSE SUMMARY
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of version 2 of the GNU General Public License as
- * published by the Free Software Foundation.
- *
- * This program is distributed in the hope that it will be useful, but
- * WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
- * General Public License for more details.
- *
- * BSD LICENSE
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions
- * are met:
- *
- *  - Redistributions of source code must retain the above copyright
- *    notice, this list of conditions and the following disclaimer.
- *  - Redistributions in binary form must reproduce the above copyright
- *    notice, this list of conditions and the following disclaimer in
- *    the documentation and/or other materials provided with the
- *    distribution.
- *  - Neither the name of Intel Corporation nor the names of its
- *    contributors may be used to endorse or promote products derived
- *    from this software without specific prior written permission.
- *
- * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
- * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
- * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
- * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
- * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
- * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
- * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
- * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
- * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
- * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
- * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
- *
  */
 
+#ifndef _HFI1_DEBUGFS_H
+#define _HFI1_DEBUGFS_H
+
 struct hfi1_ibdev;
 
 #define DEBUGFS_SEQ_FILE_OPS(name) \
diff --git a/drivers/infiniband/hw/hfi1/device.c b/drivers/infiniband/hw/hfi1/device.c
index bbb6069dec2a..68a184c39941 100644
--- a/drivers/infiniband/hw/hfi1/device.c
+++ b/drivers/infiniband/hw/hfi1/device.c
@@ -1,48 +1,6 @@
+// SPDX-License-Identifier: GPL-2.0 or BSD-3-Clause
 /*
  * Copyright(c) 2015, 2016 Intel Corporation.
- *
- * This file is provided under a dual BSD/GPLv2 license.  When using or
- * redistributing this file, you may do so under either license.
- *
- * GPL LICENSE SUMMARY
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of version 2 of the GNU General Public License as
- * published by the Free Software Foundation.
- *
- * This program is distributed in the hope that it will be useful, but
- * WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
- * General Public License for more details.
- *
- * BSD LICENSE
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions
- * are met:
- *
- *  - Redistributions of source code must retain the above copyright
- *    notice, this list of conditions and the following disclaimer.
- *  - Redistributions in binary form must reproduce the above copyright
- *    notice, this list of conditions and the following disclaimer in
- *    the documentation and/or other materials provided with the
- *    distribution.
- *  - Neither the name of Intel Corporation nor the names of its
- *    contributors may be used to endorse or promote products derived
- *    from this software without specific prior written permission.
- *
- * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
- * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
- * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
- * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
- * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
- * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
- * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
- * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
- * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
- * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
- * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
- *
  */
 
 #include <linux/cdev.h>
diff --git a/drivers/infiniband/hw/hfi1/device.h b/drivers/infiniband/hw/hfi1/device.h
index c3ec19cb0ac9..c371b5612b6b 100644
--- a/drivers/infiniband/hw/hfi1/device.h
+++ b/drivers/infiniband/hw/hfi1/device.h
@@ -1,52 +1,11 @@
-#ifndef _HFI1_DEVICE_H
-#define _HFI1_DEVICE_H
+/* SPDX-License-Identifier: GPL-2.0 or BSD-3-Clause */
 /*
  * Copyright(c) 2015, 2016 Intel Corporation.
- *
- * This file is provided under a dual BSD/GPLv2 license.  When using or
- * redistributing this file, you may do so under either license.
- *
- * GPL LICENSE SUMMARY
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of version 2 of the GNU General Public License as
- * published by the Free Software Foundation.
- *
- * This program is distributed in the hope that it will be useful, but
- * WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
- * General Public License for more details.
- *
- * BSD LICENSE
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions
- * are met:
- *
- *  - Redistributions of source code must retain the above copyright
- *    notice, this list of conditions and the following disclaimer.
- *  - Redistributions in binary form must reproduce the above copyright
- *    notice, this list of conditions and the following disclaimer in
- *    the documentation and/or other materials provided with the
- *    distribution.
- *  - Neither the name of Intel Corporation nor the names of its
- *    contributors may be used to endorse or promote products derived
- *    from this software without specific prior written permission.
- *
- * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
- * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
- * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
- * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
- * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
- * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
- * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
- * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
- * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
- * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
- * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
- *
  */
 
+#ifndef _HFI1_DEVICE_H
+#define _HFI1_DEVICE_H
+
 int hfi1_cdev_init(int minor, const char *name,
 		   const struct file_operations *fops,
 		   struct cdev *cdev, struct device **devp,
diff --git a/drivers/infiniband/hw/hfi1/driver.c b/drivers/infiniband/hw/hfi1/driver.c
index f88bb4af245f..de411884386b 100644
--- a/drivers/infiniband/hw/hfi1/driver.c
+++ b/drivers/infiniband/hw/hfi1/driver.c
@@ -1,48 +1,6 @@
+// SPDX-License-Identifier: GPL-2.0 or BSD-3-Clause
 /*
  * Copyright(c) 2015-2020 Intel Corporation.
- *
- * This file is provided under a dual BSD/GPLv2 license.  When using or
- * redistributing this file, you may do so under either license.
- *
- * GPL LICENSE SUMMARY
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of version 2 of the GNU General Public License as
- * published by the Free Software Foundation.
- *
- * This program is distributed in the hope that it will be useful, but
- * WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
- * General Public License for more details.
- *
- * BSD LICENSE
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions
- * are met:
- *
- *  - Redistributions of source code must retain the above copyright
- *    notice, this list of conditions and the following disclaimer.
- *  - Redistributions in binary form must reproduce the above copyright
- *    notice, this list of conditions and the following disclaimer in
- *    the documentation and/or other materials provided with the
- *    distribution.
- *  - Neither the name of Intel Corporation nor the names of its
- *    contributors may be used to endorse or promote products derived
- *    from this software without specific prior written permission.
- *
- * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
- * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
- * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
- * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
- * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
- * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
- * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
- * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
- * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
- * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
- * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
- *
  */
 
 #include <linux/spinlock.h>
diff --git a/drivers/infiniband/hw/hfi1/efivar.c b/drivers/infiniband/hw/hfi1/efivar.c
index c22ab7b5163b..f275dd1abed8 100644
--- a/drivers/infiniband/hw/hfi1/efivar.c
+++ b/drivers/infiniband/hw/hfi1/efivar.c
@@ -1,48 +1,6 @@
+// SPDX-License-Identifier: GPL-2.0 or BSD-3-Clause
 /*
  * Copyright(c) 2015, 2016 Intel Corporation.
- *
- * This file is provided under a dual BSD/GPLv2 license.  When using or
- * redistributing this file, you may do so under either license.
- *
- * GPL LICENSE SUMMARY
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of version 2 of the GNU General Public License as
- * published by the Free Software Foundation.
- *
- * This program is distributed in the hope that it will be useful, but
- * WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
- * General Public License for more details.
- *
- * BSD LICENSE
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions
- * are met:
- *
- *  - Redistributions of source code must retain the above copyright
- *    notice, this list of conditions and the following disclaimer.
- *  - Redistributions in binary form must reproduce the above copyright
- *    notice, this list of conditions and the following disclaimer in
- *    the documentation and/or other materials provided with the
- *    distribution.
- *  - Neither the name of Intel Corporation nor the names of its
- *    contributors may be used to endorse or promote products derived
- *    from this software without specific prior written permission.
- *
- * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
- * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
- * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
- * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
- * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
- * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
- * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
- * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
- * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
- * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
- * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
- *
  */
 
 #include <linux/ctype.h>
diff --git a/drivers/infiniband/hw/hfi1/efivar.h b/drivers/infiniband/hw/hfi1/efivar.h
index 94e9e70de568..5ebc2f07bbef 100644
--- a/drivers/infiniband/hw/hfi1/efivar.h
+++ b/drivers/infiniband/hw/hfi1/efivar.h
@@ -1,49 +1,8 @@
+/* SPDX-License-Identifier: GPL-2.0 or BSD-3-Clause */
 /*
  * Copyright(c) 2015, 2016 Intel Corporation.
- *
- * This file is provided under a dual BSD/GPLv2 license.  When using or
- * redistributing this file, you may do so under either license.
- *
- * GPL LICENSE SUMMARY
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of version 2 of the GNU General Public License as
- * published by the Free Software Foundation.
- *
- * This program is distributed in the hope that it will be useful, but
- * WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
- * General Public License for more details.
- *
- * BSD LICENSE
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions
- * are met:
- *
- *  - Redistributions of source code must retain the above copyright
- *    notice, this list of conditions and the following disclaimer.
- *  - Redistributions in binary form must reproduce the above copyright
- *    notice, this list of conditions and the following disclaimer in
- *    the documentation and/or other materials provided with the
- *    distribution.
- *  - Neither the name of Intel Corporation nor the names of its
- *    contributors may be used to endorse or promote products derived
- *    from this software without specific prior written permission.
- *
- * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
- * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
- * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
- * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
- * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
- * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
- * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
- * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
- * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
- * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
- * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
- *
  */
+
 #ifndef _HFI1_EFIVAR_H
 #define _HFI1_EFIVAR_H
 
diff --git a/drivers/infiniband/hw/hfi1/eprom.c b/drivers/infiniband/hw/hfi1/eprom.c
index 1613af1c58d9..fbe958107457 100644
--- a/drivers/infiniband/hw/hfi1/eprom.c
+++ b/drivers/infiniband/hw/hfi1/eprom.c
@@ -1,49 +1,8 @@
+// SPDX-License-Identifier: GPL-2.0 or BSD-3-Clause
 /*
  * Copyright(c) 2015, 2016 Intel Corporation.
- *
- * This file is provided under a dual BSD/GPLv2 license.  When using or
- * redistributing this file, you may do so under either license.
- *
- * GPL LICENSE SUMMARY
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of version 2 of the GNU General Public License as
- * published by the Free Software Foundation.
- *
- * This program is distributed in the hope that it will be useful, but
- * WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
- * General Public License for more details.
- *
- * BSD LICENSE
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions
- * are met:
- *
- *  - Redistributions of source code must retain the above copyright
- *    notice, this list of conditions and the following disclaimer.
- *  - Redistributions in binary form must reproduce the above copyright
- *    notice, this list of conditions and the following disclaimer in
- *    the documentation and/or other materials provided with the
- *    distribution.
- *  - Neither the name of Intel Corporation nor the names of its
- *    contributors may be used to endorse or promote products derived
- *    from this software without specific prior written permission.
- *
- * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
- * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
- * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
- * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
- * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
- * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
- * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
- * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
- * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
- * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
- * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
- *
  */
+
 #include <linux/delay.h>
 #include "hfi.h"
 #include "common.h"
diff --git a/drivers/infiniband/hw/hfi1/eprom.h b/drivers/infiniband/hw/hfi1/eprom.h
index e774184f1643..772c516366ce 100644
--- a/drivers/infiniband/hw/hfi1/eprom.h
+++ b/drivers/infiniband/hw/hfi1/eprom.h
@@ -1,48 +1,6 @@
+/* SPDX-License-Identifier: GPL-2.0 or BSD-3-Clause */
 /*
  * Copyright(c) 2015, 2016 Intel Corporation.
- *
- * This file is provided under a dual BSD/GPLv2 license.  When using or
- * redistributing this file, you may do so under either license.
- *
- * GPL LICENSE SUMMARY
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of version 2 of the GNU General Public License as
- * published by the Free Software Foundation.
- *
- * This program is distributed in the hope that it will be useful, but
- * WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
- * General Public License for more details.
- *
- * BSD LICENSE
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions
- * are met:
- *
- *  - Redistributions of source code must retain the above copyright
- *    notice, this list of conditions and the following disclaimer.
- *  - Redistributions in binary form must reproduce the above copyright
- *    notice, this list of conditions and the following disclaimer in
- *    the documentation and/or other materials provided with the
- *    distribution.
- *  - Neither the name of Intel Corporation nor the names of its
- *    contributors may be used to endorse or promote products derived
- *    from this software without specific prior written permission.
- *
- * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
- * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
- * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
- * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
- * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
- * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
- * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
- * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
- * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
- * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
- * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
- *
  */
 
 struct hfi1_devdata;
diff --git a/drivers/infiniband/hw/hfi1/exp_rcv.c b/drivers/infiniband/hw/hfi1/exp_rcv.c
index a414214f6035..b86f697c7956 100644
--- a/drivers/infiniband/hw/hfi1/exp_rcv.c
+++ b/drivers/infiniband/hw/hfi1/exp_rcv.c
@@ -1,48 +1,6 @@
+// SPDX-License-Identifier: GPL-2.0 or BSD-3-Clause
 /*
  * Copyright(c) 2017 Intel Corporation.
- *
- * This file is provided under a dual BSD/GPLv2 license.  When using or
- * redistributing this file, you may do so under either license.
- *
- * GPL LICENSE SUMMARY
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of version 2 of the GNU General Public License as
- * published by the Free Software Foundation.
- *
- * This program is distributed in the hope that it will be useful, but
- * WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
- * General Public License for more details.
- *
- * BSD LICENSE
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions
- * are met:
- *
- *  - Redistributions of source code must retain the above copyright
- *    notice, this list of conditions and the following disclaimer.
- *  - Redistributions in binary form must reproduce the above copyright
- *    notice, this list of conditions and the following disclaimer in
- *    the documentation and/or other materials provided with the
- *    distribution.
- *  - Neither the name of Intel Corporation nor the names of its
- *    contributors may be used to endorse or promote products derived
- *    from this software without specific prior written permission.
- *
- * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
- * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
- * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
- * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
- * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
- * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
- * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
- * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
- * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
- * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
- * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
- *
  */
 
 #include "exp_rcv.h"
diff --git a/drivers/infiniband/hw/hfi1/exp_rcv.h b/drivers/infiniband/hw/hfi1/exp_rcv.h
index f25362015095..c6291bbf723c 100644
--- a/drivers/infiniband/hw/hfi1/exp_rcv.h
+++ b/drivers/infiniband/hw/hfi1/exp_rcv.h
@@ -1,52 +1,10 @@
-#ifndef _HFI1_EXP_RCV_H
-#define _HFI1_EXP_RCV_H
+/* SPDX-License-Identifier: GPL-2.0 or BSD-3-Clause */
 /*
  * Copyright(c) 2017 Intel Corporation.
- *
- * This file is provided under a dual BSD/GPLv2 license.  When using or
- * redistributing this file, you may do so under either license.
- *
- * GPL LICENSE SUMMARY
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of version 2 of the GNU General Public License as
- * published by the Free Software Foundation.
- *
- * This program is distributed in the hope that it will be useful, but
- * WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
- * General Public License for more details.
- *
- * BSD LICENSE
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions
- * are met:
- *
- *  - Redistributions of source code must retain the above copyright
- *    notice, this list of conditions and the following disclaimer.
- *  - Redistributions in binary form must reproduce the above copyright
- *    notice, this list of conditions and the following disclaimer in
- *    the documentation and/or other materials provided with the
- *    distribution.
- *  - Neither the name of Intel Corporation nor the names of its
- *    contributors may be used to endorse or promote products derived
- *    from this software without specific prior written permission.
- *
- * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
- * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
- * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
- * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
- * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
- * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
- * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
- * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
- * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
- * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
- * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
- *
  */
 
+#ifndef _HFI1_EXP_RCV_H
+#define _HFI1_EXP_RCV_H
 #include "hfi.h"
 
 #define EXP_TID_SET_EMPTY(set) (set.count == 0 && list_empty(&set.list))
diff --git a/drivers/infiniband/hw/hfi1/fault.c b/drivers/infiniband/hw/hfi1/fault.c
index 0dfbcfb048ca..e2e4f9f6fae2 100644
--- a/drivers/infiniband/hw/hfi1/fault.c
+++ b/drivers/infiniband/hw/hfi1/fault.c
@@ -1,49 +1,8 @@
+// SPDX-License-Identifier: GPL-2.0 or BSD-3-Clause
 /*
  * Copyright(c) 2018 Intel Corporation.
- *
- * This file is provided under a dual BSD/GPLv2 license.  When using or
- * redistributing this file, you may do so under either license.
- *
- * GPL LICENSE SUMMARY
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of version 2 of the GNU General Public License as
- * published by the Free Software Foundation.
- *
- * This program is distributed in the hope that it will be useful, but
- * WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
- * General Public License for more details.
- *
- * BSD LICENSE
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions
- * are met:
- *
- *  - Redistributions of source code must retain the above copyright
- *    notice, this list of conditions and the following disclaimer.
- *  - Redistributions in binary form must reproduce the above copyright
- *    notice, this list of conditions and the following disclaimer in
- *    the documentation and/or other materials provided with the
- *    distribution.
- *  - Neither the name of Intel Corporation nor the names of its
- *    contributors may be used to endorse or promote products derived
- *    from this software without specific prior written permission.
- *
- * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
- * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
- * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
- * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
- * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
- * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
- * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
- * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
- * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
- * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
- * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
- *
  */
+
 #include <linux/debugfs.h>
 #include <linux/seq_file.h>
 #include <linux/kernel.h>
diff --git a/drivers/infiniband/hw/hfi1/fault.h b/drivers/infiniband/hw/hfi1/fault.h
index a83382700a7c..7fe7f47219db 100644
--- a/drivers/infiniband/hw/hfi1/fault.h
+++ b/drivers/infiniband/hw/hfi1/fault.h
@@ -1,51 +1,11 @@
-#ifndef _HFI1_FAULT_H
-#define _HFI1_FAULT_H
+/* SPDX-License-Identifier: GPL-2.0 or BSD-3-Clause */
 /*
  * Copyright(c) 2018 Intel Corporation.
- *
- * This file is provided under a dual BSD/GPLv2 license.  When using or
- * redistributing this file, you may do so under either license.
- *
- * GPL LICENSE SUMMARY
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of version 2 of the GNU General Public License as
- * published by the Free Software Foundation.
- *
- * This program is distributed in the hope that it will be useful, but
- * WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
- * General Public License for more details.
- *
- * BSD LICENSE
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions
- * are met:
- *
- *  - Redistributions of source code must retain the above copyright
- *    notice, this list of conditions and the following disclaimer.
- *  - Redistributions in binary form must reproduce the above copyright
- *    notice, this list of conditions and the following disclaimer in
- *    the documentation and/or other materials provided with the
- *    distribution.
- *  - Neither the name of Intel Corporation nor the names of its
- *    contributors may be used to endorse or promote products derived
- *    from this software without specific prior written permission.
- *
- * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
- * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
- * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
- * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
- * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
- * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
- * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
- * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
- * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
- * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
- * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
- *
  */
+
+#ifndef _HFI1_FAULT_H
+#define _HFI1_FAULT_H
+
 #include <linux/fault-inject.h>
 #include <linux/dcache.h>
 #include <linux/bitops.h>
diff --git a/drivers/infiniband/hw/hfi1/file_ops.c b/drivers/infiniband/hw/hfi1/file_ops.c
index 955c3637980e..1783a6ea5427 100644
--- a/drivers/infiniband/hw/hfi1/file_ops.c
+++ b/drivers/infiniband/hw/hfi1/file_ops.c
@@ -1,50 +1,9 @@
+// SPDX-License-Identifier: GPL-2.0 or BSD-3-Clause
 /*
  * Copyright(c) 2020 Cornelis Networks, Inc.
  * Copyright(c) 2015-2020 Intel Corporation.
- *
- * This file is provided under a dual BSD/GPLv2 license.  When using or
- * redistributing this file, you may do so under either license.
- *
- * GPL LICENSE SUMMARY
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of version 2 of the GNU General Public License as
- * published by the Free Software Foundation.
- *
- * This program is distributed in the hope that it will be useful, but
- * WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
- * General Public License for more details.
- *
- * BSD LICENSE
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions
- * are met:
- *
- *  - Redistributions of source code must retain the above copyright
- *    notice, this list of conditions and the following disclaimer.
- *  - Redistributions in binary form must reproduce the above copyright
- *    notice, this list of conditions and the following disclaimer in
- *    the documentation and/or other materials provided with the
- *    distribution.
- *  - Neither the name of Intel Corporation nor the names of its
- *    contributors may be used to endorse or promote products derived
- *    from this software without specific prior written permission.
- *
- * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
- * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
- * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
- * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
- * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
- * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
- * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
- * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
- * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
- * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
- * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
- *
  */
+
 #include <linux/poll.h>
 #include <linux/cdev.h>
 #include <linux/vmalloc.h>
@@ -194,7 +153,7 @@ static int hfi1_file_open(struct inode *inode, struct file *fp)
 	if (!((dd->flags & HFI1_PRESENT) && dd->kregbase1))
 		return -EINVAL;
 
-	if (!atomic_inc_not_zero(&dd->user_refcount))
+	if (!refcount_inc_not_zero(&dd->user_refcount))
 		return -ENXIO;
 
 	/* The real work is performed later in assign_ctxt() */
@@ -213,7 +172,7 @@ static int hfi1_file_open(struct inode *inode, struct file *fp)
 nomem:
 	kfree(fd);
 	fp->private_data = NULL;
-	if (atomic_dec_and_test(&dd->user_refcount))
+	if (refcount_dec_and_test(&dd->user_refcount))
 		complete(&dd->user_comp);
 	return -ENOMEM;
 }
@@ -711,7 +670,7 @@ static int hfi1_file_close(struct inode *inode, struct file *fp)
 	deallocate_ctxt(uctxt);
 done:
 
-	if (atomic_dec_and_test(&dd->user_refcount))
+	if (refcount_dec_and_test(&dd->user_refcount))
 		complete(&dd->user_comp);
 
 	cleanup_srcu_struct(&fdata->pq_srcu);
diff --git a/drivers/infiniband/hw/hfi1/firmware.c b/drivers/infiniband/hw/hfi1/firmware.c
index 2cf102b5abd4..31e63e245ea9 100644
--- a/drivers/infiniband/hw/hfi1/firmware.c
+++ b/drivers/infiniband/hw/hfi1/firmware.c
@@ -1,48 +1,6 @@
+// SPDX-License-Identifier: GPL-2.0 or BSD-3-Clause
 /*
  * Copyright(c) 2015 - 2017 Intel Corporation.
- *
- * This file is provided under a dual BSD/GPLv2 license.  When using or
- * redistributing this file, you may do so under either license.
- *
- * GPL LICENSE SUMMARY
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of version 2 of the GNU General Public License as
- * published by the Free Software Foundation.
- *
- * This program is distributed in the hope that it will be useful, but
- * WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
- * General Public License for more details.
- *
- * BSD LICENSE
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions
- * are met:
- *
- *  - Redistributions of source code must retain the above copyright
- *    notice, this list of conditions and the following disclaimer.
- *  - Redistributions in binary form must reproduce the above copyright
- *    notice, this list of conditions and the following disclaimer in
- *    the documentation and/or other materials provided with the
- *    distribution.
- *  - Neither the name of Intel Corporation nor the names of its
- *    contributors may be used to endorse or promote products derived
- *    from this software without specific prior written permission.
- *
- * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
- * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
- * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
- * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
- * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
- * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
- * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
- * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
- * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
- * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
- * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
- *
  */
 
 #include <linux/firmware.h>
diff --git a/drivers/infiniband/hw/hfi1/hfi.h b/drivers/infiniband/hw/hfi1/hfi.h
index 31664f43c27f..7fa9cd39254f 100644
--- a/drivers/infiniband/hw/hfi1/hfi.h
+++ b/drivers/infiniband/hw/hfi1/hfi.h
@@ -1,53 +1,13 @@
-#ifndef _HFI1_KERNEL_H
-#define _HFI1_KERNEL_H
+/* SPDX-License-Identifier: GPL-2.0 or BSD-3-Clause */
 /*
  * Copyright(c) 2020 Cornelis Networks, Inc.
  * Copyright(c) 2015-2020 Intel Corporation.
- *
- * This file is provided under a dual BSD/GPLv2 license.  When using or
- * redistributing this file, you may do so under either license.
- *
- * GPL LICENSE SUMMARY
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of version 2 of the GNU General Public License as
- * published by the Free Software Foundation.
- *
- * This program is distributed in the hope that it will be useful, but
- * WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
- * General Public License for more details.
- *
- * BSD LICENSE
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions
- * are met:
- *
- *  - Redistributions of source code must retain the above copyright
- *    notice, this list of conditions and the following disclaimer.
- *  - Redistributions in binary form must reproduce the above copyright
- *    notice, this list of conditions and the following disclaimer in
- *    the documentation and/or other materials provided with the
- *    distribution.
- *  - Neither the name of Intel Corporation nor the names of its
- *    contributors may be used to endorse or promote products derived
- *    from this software without specific prior written permission.
- *
- * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
- * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
- * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
- * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
- * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
- * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
- * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
- * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
- * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
- * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
- * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
- *
  */
 
+#ifndef _HFI1_KERNEL_H
+#define _HFI1_KERNEL_H
+
+#include <linux/refcount.h>
 #include <linux/interrupt.h>
 #include <linux/pci.h>
 #include <linux/dma-mapping.h>
@@ -1384,7 +1344,7 @@ struct hfi1_devdata {
 	/* Number of verbs contexts which have disabled ASPM */
 	atomic_t aspm_disabled_cnt;
 	/* Keeps track of user space clients */
-	atomic_t user_refcount;
+	refcount_t user_refcount;
 	/* Used to wait for outstanding user space clients before dev removal */
 	struct completion user_comp;
 
@@ -2601,7 +2561,7 @@ static inline bool hfi1_get_hdr_type(u32 lid, struct rdma_ah_attr *attr)
 			HFI1_PKT_TYPE_16B : HFI1_PKT_TYPE_9B;
 
 	/*
-	 * Return a 16B header type if either the the destination
+	 * Return a 16B header type if either the destination
 	 * or source lid is extended.
 	 */
 	if (hfi1_get_packet_type(rdma_ah_get_dlid(attr)) == HFI1_PKT_TYPE_16B)
diff --git a/drivers/infiniband/hw/hfi1/init.c b/drivers/infiniband/hw/hfi1/init.c
index 0986aa065418..e3679d076eaa 100644
--- a/drivers/infiniband/hw/hfi1/init.c
+++ b/drivers/infiniband/hw/hfi1/init.c
@@ -1,48 +1,6 @@
+// SPDX-License-Identifier: GPL-2.0 or BSD-3-Clause
 /*
  * Copyright(c) 2015 - 2020 Intel Corporation.
- *
- * This file is provided under a dual BSD/GPLv2 license.  When using or
- * redistributing this file, you may do so under either license.
- *
- * GPL LICENSE SUMMARY
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of version 2 of the GNU General Public License as
- * published by the Free Software Foundation.
- *
- * This program is distributed in the hope that it will be useful, but
- * WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
- * General Public License for more details.
- *
- * BSD LICENSE
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions
- * are met:
- *
- *  - Redistributions of source code must retain the above copyright
- *    notice, this list of conditions and the following disclaimer.
- *  - Redistributions in binary form must reproduce the above copyright
- *    notice, this list of conditions and the following disclaimer in
- *    the documentation and/or other materials provided with the
- *    distribution.
- *  - Neither the name of Intel Corporation nor the names of its
- *    contributors may be used to endorse or promote products derived
- *    from this software without specific prior written permission.
- *
- * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
- * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
- * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
- * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
- * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
- * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
- * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
- * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
- * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
- * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
- * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
- *
  */
 
 #include <linux/pci.h>
@@ -650,12 +608,7 @@ void hfi1_init_pportdata(struct pci_dev *pdev, struct hfi1_pportdata *ppd,
 
 	ppd->pkeys[default_pkey_idx] = DEFAULT_P_KEY;
 	ppd->part_enforce |= HFI1_PART_ENFORCE_IN;
-
-	if (loopback) {
-		dd_dev_err(dd, "Faking data partition 0x8001 in idx %u\n",
-			   !default_pkey_idx);
-		ppd->pkeys[!default_pkey_idx] = 0x8001;
-	}
+	ppd->pkeys[0] = 0x8001;
 
 	INIT_WORK(&ppd->link_vc_work, handle_verify_cap);
 	INIT_WORK(&ppd->link_up_work, handle_link_up);
@@ -1752,7 +1705,7 @@ static void wait_for_clients(struct hfi1_devdata *dd)
 	 * Remove the device init value and complete the device if there is
 	 * no clients or wait for active clients to finish.
 	 */
-	if (atomic_dec_and_test(&dd->user_refcount))
+	if (refcount_dec_and_test(&dd->user_refcount))
 		complete(&dd->user_comp);
 
 	wait_for_completion(&dd->user_comp);
diff --git a/drivers/infiniband/hw/hfi1/intr.c b/drivers/infiniband/hw/hfi1/intr.c
index 5ba5c11459e7..70376e6dba33 100644
--- a/drivers/infiniband/hw/hfi1/intr.c
+++ b/drivers/infiniband/hw/hfi1/intr.c
@@ -1,48 +1,6 @@
+// SPDX-License-Identifier: GPL-2.0 or BSD-3-Clause
 /*
  * Copyright(c) 2015, 2016 Intel Corporation.
- *
- * This file is provided under a dual BSD/GPLv2 license.  When using or
- * redistributing this file, you may do so under either license.
- *
- * GPL LICENSE SUMMARY
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of version 2 of the GNU General Public License as
- * published by the Free Software Foundation.
- *
- * This program is distributed in the hope that it will be useful, but
- * WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
- * General Public License for more details.
- *
- * BSD LICENSE
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions
- * are met:
- *
- *  - Redistributions of source code must retain the above copyright
- *    notice, this list of conditions and the following disclaimer.
- *  - Redistributions in binary form must reproduce the above copyright
- *    notice, this list of conditions and the following disclaimer in
- *    the documentation and/or other materials provided with the
- *    distribution.
- *  - Neither the name of Intel Corporation nor the names of its
- *    contributors may be used to endorse or promote products derived
- *    from this software without specific prior written permission.
- *
- * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
- * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
- * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
- * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
- * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
- * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
- * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
- * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
- * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
- * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
- * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
- *
  */
 
 #include <linux/pci.h>
diff --git a/drivers/infiniband/hw/hfi1/iowait.h b/drivers/infiniband/hw/hfi1/iowait.h
index cda81a7843c2..4df0700cbaba 100644
--- a/drivers/infiniband/hw/hfi1/iowait.h
+++ b/drivers/infiniband/hw/hfi1/iowait.h
@@ -1,52 +1,11 @@
-#ifndef _HFI1_IOWAIT_H
-#define _HFI1_IOWAIT_H
+/* SPDX-License-Identifier: GPL-2.0 or BSD-3-Clause */
 /*
  * Copyright(c) 2015 - 2018 Intel Corporation.
- *
- * This file is provided under a dual BSD/GPLv2 license.  When using or
- * redistributing this file, you may do so under either license.
- *
- * GPL LICENSE SUMMARY
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of version 2 of the GNU General Public License as
- * published by the Free Software Foundation.
- *
- * This program is distributed in the hope that it will be useful, but
- * WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
- * General Public License for more details.
- *
- * BSD LICENSE
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions
- * are met:
- *
- *  - Redistributions of source code must retain the above copyright
- *    notice, this list of conditions and the following disclaimer.
- *  - Redistributions in binary form must reproduce the above copyright
- *    notice, this list of conditions and the following disclaimer in
- *    the documentation and/or other materials provided with the
- *    distribution.
- *  - Neither the name of Intel Corporation nor the names of its
- *    contributors may be used to endorse or promote products derived
- *    from this software without specific prior written permission.
- *
- * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
- * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
- * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
- * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
- * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
- * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
- * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
- * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
- * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
- * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
- * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
- *
  */
 
+#ifndef _HFI1_IOWAIT_H
+#define _HFI1_IOWAIT_H
+
 #include <linux/list.h>
 #include <linux/workqueue.h>
 #include <linux/wait.h>
diff --git a/drivers/infiniband/hw/hfi1/ipoib_tx.c b/drivers/infiniband/hw/hfi1/ipoib_tx.c
index 993f9838b6c8..e74ddbe46589 100644
--- a/drivers/infiniband/hw/hfi1/ipoib_tx.c
+++ b/drivers/infiniband/hw/hfi1/ipoib_tx.c
@@ -644,10 +644,13 @@ static int hfi1_ipoib_sdma_sleep(struct sdma_engine *sde,
 			/* came from non-list submit */
 			list_add_tail(&txreq->list, &txq->tx_list);
 		if (list_empty(&txq->wait.list)) {
+			struct hfi1_ibport *ibp = &sde->ppd->ibport_data;
+
 			if (!atomic_xchg(&txq->no_desc, 1)) {
 				trace_hfi1_txq_queued(txq);
 				hfi1_ipoib_stop_txq(txq);
 			}
+			ibp->rvp.n_dmawait++;
 			iowait_queue(pkts_sent, wait->iow, &sde->dmawait);
 		}
 
diff --git a/drivers/infiniband/hw/hfi1/mad.c b/drivers/infiniband/hw/hfi1/mad.c
index 1fe5e702f31d..4146a2113a95 100644
--- a/drivers/infiniband/hw/hfi1/mad.c
+++ b/drivers/infiniband/hw/hfi1/mad.c
@@ -1,48 +1,6 @@
+// SPDX-License-Identifier: GPL-2.0 or BSD-3-Clause
 /*
  * Copyright(c) 2015-2018 Intel Corporation.
- *
- * This file is provided under a dual BSD/GPLv2 license.  When using or
- * redistributing this file, you may do so under either license.
- *
- * GPL LICENSE SUMMARY
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of version 2 of the GNU General Public License as
- * published by the Free Software Foundation.
- *
- * This program is distributed in the hope that it will be useful, but
- * WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
- * General Public License for more details.
- *
- * BSD LICENSE
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions
- * are met:
- *
- *  - Redistributions of source code must retain the above copyright
- *    notice, this list of conditions and the following disclaimer.
- *  - Redistributions in binary form must reproduce the above copyright
- *    notice, this list of conditions and the following disclaimer in
- *    the documentation and/or other materials provided with the
- *    distribution.
- *  - Neither the name of Intel Corporation nor the names of its
- *    contributors may be used to endorse or promote products derived
- *    from this software without specific prior written permission.
- *
- * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
- * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
- * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
- * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
- * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
- * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
- * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
- * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
- * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
- * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
- * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
- *
  */
 
 #include <linux/net.h>
diff --git a/drivers/infiniband/hw/hfi1/mad.h b/drivers/infiniband/hw/hfi1/mad.h
index 0205d308ef5e..1d45a008fa7f 100644
--- a/drivers/infiniband/hw/hfi1/mad.h
+++ b/drivers/infiniband/hw/hfi1/mad.h
@@ -1,49 +1,8 @@
+/* SPDX-License-Identifier: GPL-2.0 or BSD-3-Clause */
 /*
  * Copyright(c) 2015 - 2017 Intel Corporation.
- *
- * This file is provided under a dual BSD/GPLv2 license.  When using or
- * redistributing this file, you may do so under either license.
- *
- * GPL LICENSE SUMMARY
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of version 2 of the GNU General Public License as
- * published by the Free Software Foundation.
- *
- * This program is distributed in the hope that it will be useful, but
- * WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
- * General Public License for more details.
- *
- * BSD LICENSE
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions
- * are met:
- *
- *  - Redistributions of source code must retain the above copyright
- *    notice, this list of conditions and the following disclaimer.
- *  - Redistributions in binary form must reproduce the above copyright
- *    notice, this list of conditions and the following disclaimer in
- *    the documentation and/or other materials provided with the
- *    distribution.
- *  - Neither the name of Intel Corporation nor the names of its
- *    contributors may be used to endorse or promote products derived
- *    from this software without specific prior written permission.
- *
- * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
- * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
- * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
- * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
- * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
- * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
- * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
- * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
- * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
- * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
- * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
- *
  */
+
 #ifndef _HFI1_MAD_H
 #define _HFI1_MAD_H
 
diff --git a/drivers/infiniband/hw/hfi1/mmu_rb.c b/drivers/infiniband/hw/hfi1/mmu_rb.c
index d213f65d4cdd..876cc78a22cc 100644
--- a/drivers/infiniband/hw/hfi1/mmu_rb.c
+++ b/drivers/infiniband/hw/hfi1/mmu_rb.c
@@ -1,50 +1,9 @@
+// SPDX-License-Identifier: GPL-2.0 or BSD-3-Clause
 /*
  * Copyright(c) 2020 Cornelis Networks, Inc.
  * Copyright(c) 2016 - 2017 Intel Corporation.
- *
- * This file is provided under a dual BSD/GPLv2 license.  When using or
- * redistributing this file, you may do so under either license.
- *
- * GPL LICENSE SUMMARY
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of version 2 of the GNU General Public License as
- * published by the Free Software Foundation.
- *
- * This program is distributed in the hope that it will be useful, but
- * WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
- * General Public License for more details.
- *
- * BSD LICENSE
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions
- * are met:
- *
- *  - Redistributions of source code must retain the above copyright
- *    notice, this list of conditions and the following disclaimer.
- *  - Redistributions in binary form must reproduce the above copyright
- *    notice, this list of conditions and the following disclaimer in
- *    the documentation and/or other materials provided with the
- *    distribution.
- *  - Neither the name of Intel Corporation nor the names of its
- *    contributors may be used to endorse or promote products derived
- *    from this software without specific prior written permission.
- *
- * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
- * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
- * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
- * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
- * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
- * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
- * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
- * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
- * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
- * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
- * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
- *
  */
+
 #include <linux/list.h>
 #include <linux/rculist.h>
 #include <linux/mmu_notifier.h>
diff --git a/drivers/infiniband/hw/hfi1/mmu_rb.h b/drivers/infiniband/hw/hfi1/mmu_rb.h
index 423aacc67e94..7417be2b9dc8 100644
--- a/drivers/infiniband/hw/hfi1/mmu_rb.h
+++ b/drivers/infiniband/hw/hfi1/mmu_rb.h
@@ -1,50 +1,9 @@
+/* SPDX-License-Identifier: GPL-2.0 or BSD-3-Clause */
 /*
  * Copyright(c) 2020 Cornelis Networks, Inc.
  * Copyright(c) 2016 Intel Corporation.
- *
- * This file is provided under a dual BSD/GPLv2 license.  When using or
- * redistributing this file, you may do so under either license.
- *
- * GPL LICENSE SUMMARY
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of version 2 of the GNU General Public License as
- * published by the Free Software Foundation.
- *
- * This program is distributed in the hope that it will be useful, but
- * WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
- * General Public License for more details.
- *
- * BSD LICENSE
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions
- * are met:
- *
- *  - Redistributions of source code must retain the above copyright
- *    notice, this list of conditions and the following disclaimer.
- *  - Redistributions in binary form must reproduce the above copyright
- *    notice, this list of conditions and the following disclaimer in
- *    the documentation and/or other materials provided with the
- *    distribution.
- *  - Neither the name of Intel Corporation nor the names of its
- *    contributors may be used to endorse or promote products derived
- *    from this software without specific prior written permission.
- *
- * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
- * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
- * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
- * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
- * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
- * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
- * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
- * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
- * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
- * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
- * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
- *
  */
+
 #ifndef _HFI1_MMU_RB_H
 #define _HFI1_MMU_RB_H
 
diff --git a/drivers/infiniband/hw/hfi1/msix.c b/drivers/infiniband/hw/hfi1/msix.c
index 57a5f02ebc77..77d2ece9a9cb 100644
--- a/drivers/infiniband/hw/hfi1/msix.c
+++ b/drivers/infiniband/hw/hfi1/msix.c
@@ -1,49 +1,6 @@
 // SPDX-License-Identifier: (GPL-2.0 OR BSD-3-Clause)
 /*
  * Copyright(c) 2018 - 2020 Intel Corporation.
- *
- * This file is provided under a dual BSD/GPLv2 license.  When using or
- * redistributing this file, you may do so under either license.
- *
- * GPL LICENSE SUMMARY
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of version 2 of the GNU General Public License as
- * published by the Free Software Foundation.
- *
- * This program is distributed in the hope that it will be useful, but
- * WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
- * General Public License for more details.
- *
- * BSD LICENSE
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions
- * are met:
- *
- *  - Redistributions of source code must retain the above copyright
- *    notice, this list of conditions and the following disclaimer.
- *  - Redistributions in binary form must reproduce the above copyright
- *    notice, this list of conditions and the following disclaimer in
- *    the documentation and/or other materials provided with the
- *    distribution.
- *  - Neither the name of Intel Corporation nor the names of its
- *    contributors may be used to endorse or promote products derived
- *    from this software without specific prior written permission.
- *
- * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
- * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
- * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
- * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
- * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
- * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
- * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
- * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
- * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
- * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
- * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
- *
  */
 
 #include "hfi.h"
diff --git a/drivers/infiniband/hw/hfi1/msix.h b/drivers/infiniband/hw/hfi1/msix.h
index e63e944bf0fc..9530ccb0a2ce 100644
--- a/drivers/infiniband/hw/hfi1/msix.h
+++ b/drivers/infiniband/hw/hfi1/msix.h
@@ -1,50 +1,8 @@
 /* SPDX-License-Identifier: (GPL-2.0 OR BSD-3-Clause) */
 /*
  * Copyright(c) 2018 - 2020 Intel Corporation.
- *
- * This file is provided under a dual BSD/GPLv2 license.  When using or
- * redistributing this file, you may do so under either license.
- *
- * GPL LICENSE SUMMARY
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of version 2 of the GNU General Public License as
- * published by the Free Software Foundation.
- *
- * This program is distributed in the hope that it will be useful, but
- * WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
- * General Public License for more details.
- *
- * BSD LICENSE
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions
- * are met:
- *
- *  - Redistributions of source code must retain the above copyright
- *    notice, this list of conditions and the following disclaimer.
- *  - Redistributions in binary form must reproduce the above copyright
- *    notice, this list of conditions and the following disclaimer in
- *    the documentation and/or other materials provided with the
- *    distribution.
- *  - Neither the name of Intel Corporation nor the names of its
- *    contributors may be used to endorse or promote products derived
- *    from this software without specific prior written permission.
- *
- * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
- * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
- * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
- * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
- * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
- * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
- * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
- * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
- * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
- * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
- * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
- *
  */
+
 #ifndef _HFI1_MSIX_H
 #define _HFI1_MSIX_H
 
diff --git a/drivers/infiniband/hw/hfi1/opa_compat.h b/drivers/infiniband/hw/hfi1/opa_compat.h
index 774215b95df5..31570b0cfd18 100644
--- a/drivers/infiniband/hw/hfi1/opa_compat.h
+++ b/drivers/infiniband/hw/hfi1/opa_compat.h
@@ -1,52 +1,10 @@
-#ifndef _LINUX_H
-#define _LINUX_H
+/* SPDX-License-Identifier: GPL-2.0 or BSD-3-Clause */
 /*
  * Copyright(c) 2015, 2016 Intel Corporation.
- *
- * This file is provided under a dual BSD/GPLv2 license.  When using or
- * redistributing this file, you may do so under either license.
- *
- * GPL LICENSE SUMMARY
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of version 2 of the GNU General Public License as
- * published by the Free Software Foundation.
- *
- * This program is distributed in the hope that it will be useful, but
- * WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
- * General Public License for more details.
- *
- * BSD LICENSE
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions
- * are met:
- *
- *  - Redistributions of source code must retain the above copyright
- *    notice, this list of conditions and the following disclaimer.
- *  - Redistributions in binary form must reproduce the above copyright
- *    notice, this list of conditions and the following disclaimer in
- *    the documentation and/or other materials provided with the
- *    distribution.
- *  - Neither the name of Intel Corporation nor the names of its
- *    contributors may be used to endorse or promote products derived
- *    from this software without specific prior written permission.
- *
- * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
- * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
- * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
- * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
- * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
- * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
- * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
- * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
- * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
- * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
- * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
- *
  */
 
+#ifndef _LINUX_H
+#define _LINUX_H
 /*
  * This header file is for OPA-specific definitions which are
  * required by the HFI driver, and which aren't yet in the Linux
diff --git a/drivers/infiniband/hw/hfi1/pcie.c b/drivers/infiniband/hw/hfi1/pcie.c
index 6f06e9920503..a0802332c8cb 100644
--- a/drivers/infiniband/hw/hfi1/pcie.c
+++ b/drivers/infiniband/hw/hfi1/pcie.c
@@ -1,48 +1,6 @@
+// SPDX-License-Identifier: GPL-2.0 or BSD-3-Clause
 /*
  * Copyright(c) 2015 - 2019 Intel Corporation.
- *
- * This file is provided under a dual BSD/GPLv2 license.  When using or
- * redistributing this file, you may do so under either license.
- *
- * GPL LICENSE SUMMARY
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of version 2 of the GNU General Public License as
- * published by the Free Software Foundation.
- *
- * This program is distributed in the hope that it will be useful, but
- * WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
- * General Public License for more details.
- *
- * BSD LICENSE
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions
- * are met:
- *
- *  - Redistributions of source code must retain the above copyright
- *    notice, this list of conditions and the following disclaimer.
- *  - Redistributions in binary form must reproduce the above copyright
- *    notice, this list of conditions and the following disclaimer in
- *    the documentation and/or other materials provided with the
- *    distribution.
- *  - Neither the name of Intel Corporation nor the names of its
- *    contributors may be used to endorse or promote products derived
- *    from this software without specific prior written permission.
- *
- * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
- * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
- * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
- * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
- * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
- * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
- * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
- * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
- * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
- * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
- * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
- *
  */
 
 #include <linux/pci.h>
@@ -92,25 +50,18 @@ int hfi1_pcie_init(struct hfi1_devdata *dd)
 		goto bail;
 	}
 
-	ret = pci_set_dma_mask(pdev, DMA_BIT_MASK(64));
+	ret = dma_set_mask_and_coherent(&pdev->dev, DMA_BIT_MASK(64));
 	if (ret) {
 		/*
 		 * If the 64 bit setup fails, try 32 bit.  Some systems
 		 * do not setup 64 bit maps on systems with 2GB or less
 		 * memory installed.
 		 */
-		ret = pci_set_dma_mask(pdev, DMA_BIT_MASK(32));
+		ret = dma_set_mask_and_coherent(&pdev->dev, DMA_BIT_MASK(32));
 		if (ret) {
 			dd_dev_err(dd, "Unable to set DMA mask: %d\n", ret);
 			goto bail;
 		}
-		ret = pci_set_consistent_dma_mask(pdev, DMA_BIT_MASK(32));
-	} else {
-		ret = pci_set_consistent_dma_mask(pdev, DMA_BIT_MASK(64));
-	}
-	if (ret) {
-		dd_dev_err(dd, "Unable to set DMA consistent mask: %d\n", ret);
-		goto bail;
 	}
 
 	pci_set_master(pdev);
diff --git a/drivers/infiniband/hw/hfi1/pio.c b/drivers/infiniband/hw/hfi1/pio.c
index e276522104c6..489b436f19bb 100644
--- a/drivers/infiniband/hw/hfi1/pio.c
+++ b/drivers/infiniband/hw/hfi1/pio.c
@@ -1,48 +1,6 @@
+// SPDX-License-Identifier: GPL-2.0 or BSD-3-Clause
 /*
  * Copyright(c) 2015-2018 Intel Corporation.
- *
- * This file is provided under a dual BSD/GPLv2 license.  When using or
- * redistributing this file, you may do so under either license.
- *
- * GPL LICENSE SUMMARY
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of version 2 of the GNU General Public License as
- * published by the Free Software Foundation.
- *
- * This program is distributed in the hope that it will be useful, but
- * WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
- * General Public License for more details.
- *
- * BSD LICENSE
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions
- * are met:
- *
- *  - Redistributions of source code must retain the above copyright
- *    notice, this list of conditions and the following disclaimer.
- *  - Redistributions in binary form must reproduce the above copyright
- *    notice, this list of conditions and the following disclaimer in
- *    the documentation and/or other materials provided with the
- *    distribution.
- *  - Neither the name of Intel Corporation nor the names of its
- *    contributors may be used to endorse or promote products derived
- *    from this software without specific prior written permission.
- *
- * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
- * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
- * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
- * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
- * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
- * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
- * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
- * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
- * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
- * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
- * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
- *
  */
 
 #include <linux/delay.h>
diff --git a/drivers/infiniband/hw/hfi1/pio.h b/drivers/infiniband/hw/hfi1/pio.h
index 9e5f08d2b985..ea714008f261 100644
--- a/drivers/infiniband/hw/hfi1/pio.h
+++ b/drivers/infiniband/hw/hfi1/pio.h
@@ -1,52 +1,10 @@
-#ifndef _PIO_H
-#define _PIO_H
+/* SPDX-License-Identifier: GPL-2.0 or BSD-3-Clause */
 /*
  * Copyright(c) 2015-2017 Intel Corporation.
- *
- * This file is provided under a dual BSD/GPLv2 license.  When using or
- * redistributing this file, you may do so under either license.
- *
- * GPL LICENSE SUMMARY
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of version 2 of the GNU General Public License as
- * published by the Free Software Foundation.
- *
- * This program is distributed in the hope that it will be useful, but
- * WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
- * General Public License for more details.
- *
- * BSD LICENSE
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions
- * are met:
- *
- *  - Redistributions of source code must retain the above copyright
- *    notice, this list of conditions and the following disclaimer.
- *  - Redistributions in binary form must reproduce the above copyright
- *    notice, this list of conditions and the following disclaimer in
- *    the documentation and/or other materials provided with the
- *    distribution.
- *  - Neither the name of Intel Corporation nor the names of its
- *    contributors may be used to endorse or promote products derived
- *    from this software without specific prior written permission.
- *
- * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
- * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
- * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
- * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
- * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
- * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
- * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
- * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
- * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
- * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
- * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
- *
  */
 
+#ifndef _PIO_H
+#define _PIO_H
 /* send context types */
 #define SC_KERNEL 0
 #define SC_VL15   1
diff --git a/drivers/infiniband/hw/hfi1/pio_copy.c b/drivers/infiniband/hw/hfi1/pio_copy.c
index 14bfd8287f4a..136f9a99e1e0 100644
--- a/drivers/infiniband/hw/hfi1/pio_copy.c
+++ b/drivers/infiniband/hw/hfi1/pio_copy.c
@@ -1,48 +1,6 @@
+// SPDX-License-Identifier: GPL-2.0 or BSD-3-Clause
 /*
  * Copyright(c) 2015, 2016 Intel Corporation.
- *
- * This file is provided under a dual BSD/GPLv2 license.  When using or
- * redistributing this file, you may do so under either license.
- *
- * GPL LICENSE SUMMARY
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of version 2 of the GNU General Public License as
- * published by the Free Software Foundation.
- *
- * This program is distributed in the hope that it will be useful, but
- * WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
- * General Public License for more details.
- *
- * BSD LICENSE
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions
- * are met:
- *
- *  - Redistributions of source code must retain the above copyright
- *    notice, this list of conditions and the following disclaimer.
- *  - Redistributions in binary form must reproduce the above copyright
- *    notice, this list of conditions and the following disclaimer in
- *    the documentation and/or other materials provided with the
- *    distribution.
- *  - Neither the name of Intel Corporation nor the names of its
- *    contributors may be used to endorse or promote products derived
- *    from this software without specific prior written permission.
- *
- * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
- * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
- * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
- * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
- * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
- * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
- * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
- * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
- * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
- * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
- * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
- *
  */
 
 #include "hfi.h"
diff --git a/drivers/infiniband/hw/hfi1/platform.c b/drivers/infiniband/hw/hfi1/platform.c
index 4642d6ceb890..54cbd8f1a6c1 100644
--- a/drivers/infiniband/hw/hfi1/platform.c
+++ b/drivers/infiniband/hw/hfi1/platform.c
@@ -1,48 +1,6 @@
+// SPDX-License-Identifier: GPL-2.0 or BSD-3-Clause
 /*
  * Copyright(c) 2015, 2016 Intel Corporation.
- *
- * This file is provided under a dual BSD/GPLv2 license.  When using or
- * redistributing this file, you may do so under either license.
- *
- * GPL LICENSE SUMMARY
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of version 2 of the GNU General Public License as
- * published by the Free Software Foundation.
- *
- * This program is distributed in the hope that it will be useful, but
- * WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
- * General Public License for more details.
- *
- * BSD LICENSE
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions
- * are met:
- *
- *  - Redistributions of source code must retain the above copyright
- *    notice, this list of conditions and the following disclaimer.
- *  - Redistributions in binary form must reproduce the above copyright
- *    notice, this list of conditions and the following disclaimer in
- *    the documentation and/or other materials provided with the
- *    distribution.
- *  - Neither the name of Intel Corporation nor the names of its
- *    contributors may be used to endorse or promote products derived
- *    from this software without specific prior written permission.
- *
- * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
- * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
- * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
- * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
- * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
- * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
- * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
- * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
- * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
- * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
- * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
- *
  */
 
 #include <linux/firmware.h>
diff --git a/drivers/infiniband/hw/hfi1/platform.h b/drivers/infiniband/hw/hfi1/platform.h
index eed0aa9124fa..1d51dca1bc30 100644
--- a/drivers/infiniband/hw/hfi1/platform.h
+++ b/drivers/infiniband/hw/hfi1/platform.h
@@ -1,49 +1,8 @@
+/* SPDX-License-Identifier: GPL-2.0 or BSD-3-Clause */
 /*
  * Copyright(c) 2015, 2016 Intel Corporation.
- *
- * This file is provided under a dual BSD/GPLv2 license.  When using or
- * redistributing this file, you may do so under either license.
- *
- * GPL LICENSE SUMMARY
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of version 2 of the GNU General Public License as
- * published by the Free Software Foundation.
- *
- * This program is distributed in the hope that it will be useful, but
- * WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
- * General Public License for more details.
- *
- * BSD LICENSE
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions
- * are met:
- *
- *  - Redistributions of source code must retain the above copyright
- *    notice, this list of conditions and the following disclaimer.
- *  - Redistributions in binary form must reproduce the above copyright
- *    notice, this list of conditions and the following disclaimer in
- *    the documentation and/or other materials provided with the
- *    distribution.
- *  - Neither the name of Intel Corporation nor the names of its
- *    contributors may be used to endorse or promote products derived
- *    from this software without specific prior written permission.
- *
- * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
- * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
- * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
- * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
- * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
- * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
- * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
- * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
- * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
- * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
- * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
- *
  */
+
 #ifndef __PLATFORM_H
 #define __PLATFORM_H
 
diff --git a/drivers/infiniband/hw/hfi1/qp.c b/drivers/infiniband/hw/hfi1/qp.c
index e037df911512..6193d48b2c1f 100644
--- a/drivers/infiniband/hw/hfi1/qp.c
+++ b/drivers/infiniband/hw/hfi1/qp.c
@@ -1,48 +1,6 @@
+// SPDX-License-Identifier: GPL-2.0 or BSD-3-Clause
 /*
  * Copyright(c) 2015 - 2020 Intel Corporation.
- *
- * This file is provided under a dual BSD/GPLv2 license.  When using or
- * redistributing this file, you may do so under either license.
- *
- * GPL LICENSE SUMMARY
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of version 2 of the GNU General Public License as
- * published by the Free Software Foundation.
- *
- * This program is distributed in the hope that it will be useful, but
- * WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
- * General Public License for more details.
- *
- * BSD LICENSE
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions
- * are met:
- *
- *  - Redistributions of source code must retain the above copyright
- *    notice, this list of conditions and the following disclaimer.
- *  - Redistributions in binary form must reproduce the above copyright
- *    notice, this list of conditions and the following disclaimer in
- *    the documentation and/or other materials provided with the
- *    distribution.
- *  - Neither the name of Intel Corporation nor the names of its
- *    contributors may be used to endorse or promote products derived
- *    from this software without specific prior written permission.
- *
- * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
- * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
- * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
- * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
- * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
- * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
- * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
- * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
- * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
- * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
- * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
- *
  */
 
 #include <linux/err.h>
diff --git a/drivers/infiniband/hw/hfi1/qp.h b/drivers/infiniband/hw/hfi1/qp.h
index b0d053d12129..cdf87bc6ad94 100644
--- a/drivers/infiniband/hw/hfi1/qp.h
+++ b/drivers/infiniband/hw/hfi1/qp.h
@@ -1,52 +1,10 @@
-#ifndef _QP_H
-#define _QP_H
+/* SPDX-License-Identifier: GPL-2.0 or BSD-3-Clause */
 /*
  * Copyright(c) 2015 - 2018 Intel Corporation.
- *
- * This file is provided under a dual BSD/GPLv2 license.  When using or
- * redistributing this file, you may do so under either license.
- *
- * GPL LICENSE SUMMARY
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of version 2 of the GNU General Public License as
- * published by the Free Software Foundation.
- *
- * This program is distributed in the hope that it will be useful, but
- * WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
- * General Public License for more details.
- *
- * BSD LICENSE
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions
- * are met:
- *
- *  - Redistributions of source code must retain the above copyright
- *    notice, this list of conditions and the following disclaimer.
- *  - Redistributions in binary form must reproduce the above copyright
- *    notice, this list of conditions and the following disclaimer in
- *    the documentation and/or other materials provided with the
- *    distribution.
- *  - Neither the name of Intel Corporation nor the names of its
- *    contributors may be used to endorse or promote products derived
- *    from this software without specific prior written permission.
- *
- * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
- * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
- * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
- * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
- * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
- * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
- * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
- * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
- * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
- * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
- * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
- *
  */
 
+#ifndef _QP_H
+#define _QP_H
 #include <linux/hash.h>
 #include <rdma/rdmavt_qp.h>
 #include "verbs.h"
diff --git a/drivers/infiniband/hw/hfi1/qsfp.c b/drivers/infiniband/hw/hfi1/qsfp.c
index 38f311f855b5..19d7887a4f10 100644
--- a/drivers/infiniband/hw/hfi1/qsfp.c
+++ b/drivers/infiniband/hw/hfi1/qsfp.c
@@ -1,48 +1,6 @@
+// SPDX-License-Identifier: GPL-2.0 or BSD-3-Clause
 /*
  * Copyright(c) 2015, 2016 Intel Corporation.
- *
- * This file is provided under a dual BSD/GPLv2 license.  When using or
- * redistributing this file, you may do so under either license.
- *
- * GPL LICENSE SUMMARY
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of version 2 of the GNU General Public License as
- * published by the Free Software Foundation.
- *
- * This program is distributed in the hope that it will be useful, but
- * WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
- * General Public License for more details.
- *
- * BSD LICENSE
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions
- * are met:
- *
- *  - Redistributions of source code must retain the above copyright
- *    notice, this list of conditions and the following disclaimer.
- *  - Redistributions in binary form must reproduce the above copyright
- *    notice, this list of conditions and the following disclaimer in
- *    the documentation and/or other materials provided with the
- *    distribution.
- *  - Neither the name of Intel Corporation nor the names of its
- *    contributors may be used to endorse or promote products derived
- *    from this software without specific prior written permission.
- *
- * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
- * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
- * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
- * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
- * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
- * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
- * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
- * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
- * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
- * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
- * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
- *
  */
 
 #include <linux/delay.h>
diff --git a/drivers/infiniband/hw/hfi1/qsfp.h b/drivers/infiniband/hw/hfi1/qsfp.h
index 36cf52359848..8f14111eaa47 100644
--- a/drivers/infiniband/hw/hfi1/qsfp.h
+++ b/drivers/infiniband/hw/hfi1/qsfp.h
@@ -1,48 +1,6 @@
+/* SPDX-License-Identifier: GPL-2.0 or BSD-3-Clause */
 /*
  * Copyright(c) 2015, 2016 Intel Corporation.
- *
- * This file is provided under a dual BSD/GPLv2 license.  When using or
- * redistributing this file, you may do so under either license.
- *
- * GPL LICENSE SUMMARY
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of version 2 of the GNU General Public License as
- * published by the Free Software Foundation.
- *
- * This program is distributed in the hope that it will be useful, but
- * WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
- * General Public License for more details.
- *
- * BSD LICENSE
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions
- * are met:
- *
- *  - Redistributions of source code must retain the above copyright
- *    notice, this list of conditions and the following disclaimer.
- *  - Redistributions in binary form must reproduce the above copyright
- *    notice, this list of conditions and the following disclaimer in
- *    the documentation and/or other materials provided with the
- *    distribution.
- *  - Neither the name of Intel Corporation nor the names of its
- *    contributors may be used to endorse or promote products derived
- *    from this software without specific prior written permission.
- *
- * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
- * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
- * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
- * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
- * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
- * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
- * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
- * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
- * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
- * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
- * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
- *
  */
 /* QSFP support common definitions, for hfi driver */
 
diff --git a/drivers/infiniband/hw/hfi1/rc.c b/drivers/infiniband/hw/hfi1/rc.c
index 0174b8ee9f00..acd2b273ea7d 100644
--- a/drivers/infiniband/hw/hfi1/rc.c
+++ b/drivers/infiniband/hw/hfi1/rc.c
@@ -1,48 +1,6 @@
+// SPDX-License-Identifier: GPL-2.0 or BSD-3-Clause
 /*
  * Copyright(c) 2015 - 2018 Intel Corporation.
- *
- * This file is provided under a dual BSD/GPLv2 license.  When using or
- * redistributing this file, you may do so under either license.
- *
- * GPL LICENSE SUMMARY
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of version 2 of the GNU General Public License as
- * published by the Free Software Foundation.
- *
- * This program is distributed in the hope that it will be useful, but
- * WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
- * General Public License for more details.
- *
- * BSD LICENSE
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions
- * are met:
- *
- *  - Redistributions of source code must retain the above copyright
- *    notice, this list of conditions and the following disclaimer.
- *  - Redistributions in binary form must reproduce the above copyright
- *    notice, this list of conditions and the following disclaimer in
- *    the documentation and/or other materials provided with the
- *    distribution.
- *  - Neither the name of Intel Corporation nor the names of its
- *    contributors may be used to endorse or promote products derived
- *    from this software without specific prior written permission.
- *
- * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
- * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
- * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
- * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
- * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
- * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
- * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
- * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
- * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
- * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
- * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
- *
  */
 
 #include <linux/io.h>
diff --git a/drivers/infiniband/hw/hfi1/ruc.c b/drivers/infiniband/hw/hfi1/ruc.c
index c3fa1814c6a8..b0151b7293f5 100644
--- a/drivers/infiniband/hw/hfi1/ruc.c
+++ b/drivers/infiniband/hw/hfi1/ruc.c
@@ -1,48 +1,6 @@
+// SPDX-License-Identifier: GPL-2.0 or BSD-3-Clause
 /*
  * Copyright(c) 2015 - 2018 Intel Corporation.
- *
- * This file is provided under a dual BSD/GPLv2 license.  When using or
- * redistributing this file, you may do so under either license.
- *
- * GPL LICENSE SUMMARY
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of version 2 of the GNU General Public License as
- * published by the Free Software Foundation.
- *
- * This program is distributed in the hope that it will be useful, but
- * WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
- * General Public License for more details.
- *
- * BSD LICENSE
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions
- * are met:
- *
- *  - Redistributions of source code must retain the above copyright
- *    notice, this list of conditions and the following disclaimer.
- *  - Redistributions in binary form must reproduce the above copyright
- *    notice, this list of conditions and the following disclaimer in
- *    the documentation and/or other materials provided with the
- *    distribution.
- *  - Neither the name of Intel Corporation nor the names of its
- *    contributors may be used to endorse or promote products derived
- *    from this software without specific prior written permission.
- *
- * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
- * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
- * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
- * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
- * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
- * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
- * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
- * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
- * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
- * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
- * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
- *
  */
 
 #include <linux/spinlock.h>
@@ -459,7 +417,7 @@ void hfi1_make_ruc_header(struct rvt_qp *qp, struct ib_other_headers *ohdr,
  * send engine
  * @qp: a pointer to QP
  * @ps: a pointer to a structure with commonly lookup values for
- *      the the send engine progress
+ *      the send engine progress
  * @tid: true if it is the tid leg
  *
  * This routine checks if the time slice for the QP has expired
diff --git a/drivers/infiniband/hw/hfi1/sdma.c b/drivers/infiniband/hw/hfi1/sdma.c
index e83dc562629e..2b6c24b7b586 100644
--- a/drivers/infiniband/hw/hfi1/sdma.c
+++ b/drivers/infiniband/hw/hfi1/sdma.c
@@ -1,48 +1,6 @@
+// SPDX-License-Identifier: GPL-2.0 or BSD-3-Clause
 /*
  * Copyright(c) 2015 - 2018 Intel Corporation.
- *
- * This file is provided under a dual BSD/GPLv2 license.  When using or
- * redistributing this file, you may do so under either license.
- *
- * GPL LICENSE SUMMARY
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of version 2 of the GNU General Public License as
- * published by the Free Software Foundation.
- *
- * This program is distributed in the hope that it will be useful, but
- * WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
- * General Public License for more details.
- *
- * BSD LICENSE
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions
- * are met:
- *
- *  - Redistributions of source code must retain the above copyright
- *    notice, this list of conditions and the following disclaimer.
- *  - Redistributions in binary form must reproduce the above copyright
- *    notice, this list of conditions and the following disclaimer in
- *    the documentation and/or other materials provided with the
- *    distribution.
- *  - Neither the name of Intel Corporation nor the names of its
- *    contributors may be used to endorse or promote products derived
- *    from this software without specific prior written permission.
- *
- * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
- * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
- * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
- * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
- * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
- * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
- * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
- * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
- * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
- * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
- * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
- *
  */
 
 #include <linux/spinlock.h>
@@ -1860,7 +1818,7 @@ retry:
 
 	/*
 	 * The SDMA idle interrupt is not guaranteed to be ordered with respect
-	 * to updates to the the dma_head location in host memory. The head
+	 * to updates to the dma_head location in host memory. The head
 	 * value read might not be fully up to date. If there are pending
 	 * descriptors and the SDMA idle interrupt fired then read from the
 	 * CSR SDMA head instead to get the latest value from the hardware.
diff --git a/drivers/infiniband/hw/hfi1/sdma.h b/drivers/infiniband/hw/hfi1/sdma.h
index f57d55272dd2..d8170fcbfbdd 100644
--- a/drivers/infiniband/hw/hfi1/sdma.h
+++ b/drivers/infiniband/hw/hfi1/sdma.h
@@ -1,52 +1,11 @@
-#ifndef _HFI1_SDMA_H
-#define _HFI1_SDMA_H
+/* SPDX-License-Identifier: GPL-2.0 or BSD-3-Clause */
 /*
  * Copyright(c) 2015 - 2018 Intel Corporation.
- *
- * This file is provided under a dual BSD/GPLv2 license.  When using or
- * redistributing this file, you may do so under either license.
- *
- * GPL LICENSE SUMMARY
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of version 2 of the GNU General Public License as
- * published by the Free Software Foundation.
- *
- * This program is distributed in the hope that it will be useful, but
- * WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
- * General Public License for more details.
- *
- * BSD LICENSE
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions
- * are met:
- *
- *  - Redistributions of source code must retain the above copyright
- *    notice, this list of conditions and the following disclaimer.
- *  - Redistributions in binary form must reproduce the above copyright
- *    notice, this list of conditions and the following disclaimer in
- *    the documentation and/or other materials provided with the
- *    distribution.
- *  - Neither the name of Intel Corporation nor the names of its
- *    contributors may be used to endorse or promote products derived
- *    from this software without specific prior written permission.
- *
- * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
- * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
- * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
- * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
- * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
- * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
- * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
- * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
- * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
- * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
- * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
- *
  */
 
+#ifndef _HFI1_SDMA_H
+#define _HFI1_SDMA_H
+
 #include <linux/types.h>
 #include <linux/list.h>
 #include <asm/byteorder.h>
diff --git a/drivers/infiniband/hw/hfi1/sdma_txreq.h b/drivers/infiniband/hw/hfi1/sdma_txreq.h
index 514a4784566b..e262fb5c5ec6 100644
--- a/drivers/infiniband/hw/hfi1/sdma_txreq.h
+++ b/drivers/infiniband/hw/hfi1/sdma_txreq.h
@@ -1,48 +1,6 @@
+/* SPDX-License-Identifier: GPL-2.0 or BSD-3-Clause */
 /*
  * Copyright(c) 2016 Intel Corporation.
- *
- * This file is provided under a dual BSD/GPLv2 license.  When using or
- * redistributing this file, you may do so under either license.
- *
- * GPL LICENSE SUMMARY
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of version 2 of the GNU General Public License as
- * published by the Free Software Foundation.
- *
- * This program is distributed in the hope that it will be useful, but
- * WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
- * General Public License for more details.
- *
- * BSD LICENSE
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions
- * are met:
- *
- *  - Redistributions of source code must retain the above copyright
- *    notice, this list of conditions and the following disclaimer.
- *  - Redistributions in binary form must reproduce the above copyright
- *    notice, this list of conditions and the following disclaimer in
- *    the documentation and/or other materials provided with the
- *    distribution.
- *  - Neither the name of Intel Corporation nor the names of its
- *    contributors may be used to endorse or promote products derived
- *    from this software without specific prior written permission.
- *
- * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
- * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
- * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
- * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
- * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
- * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
- * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
- * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
- * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
- * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
- * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
- *
  */
 
 #ifndef HFI1_SDMA_TXREQ_H
diff --git a/drivers/infiniband/hw/hfi1/sysfs.c b/drivers/infiniband/hw/hfi1/sysfs.c
index acfcbedebe0d..3b3407dc7c21 100644
--- a/drivers/infiniband/hw/hfi1/sysfs.c
+++ b/drivers/infiniband/hw/hfi1/sysfs.c
@@ -1,49 +1,8 @@
+// SPDX-License-Identifier: GPL-2.0 or BSD-3-Clause
 /*
  * Copyright(c) 2015-2017 Intel Corporation.
- *
- * This file is provided under a dual BSD/GPLv2 license.  When using or
- * redistributing this file, you may do so under either license.
- *
- * GPL LICENSE SUMMARY
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of version 2 of the GNU General Public License as
- * published by the Free Software Foundation.
- *
- * This program is distributed in the hope that it will be useful, but
- * WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
- * General Public License for more details.
- *
- * BSD LICENSE
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions
- * are met:
- *
- *  - Redistributions of source code must retain the above copyright
- *    notice, this list of conditions and the following disclaimer.
- *  - Redistributions in binary form must reproduce the above copyright
- *    notice, this list of conditions and the following disclaimer in
- *    the documentation and/or other materials provided with the
- *    distribution.
- *  - Neither the name of Intel Corporation nor the names of its
- *    contributors may be used to endorse or promote products derived
- *    from this software without specific prior written permission.
- *
- * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
- * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
- * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
- * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
- * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
- * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
- * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
- * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
- * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
- * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
- * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
- *
  */
+
 #include <linux/ctype.h>
 #include <rdma/ib_sysfs.h>
 
diff --git a/drivers/infiniband/hw/hfi1/tid_rdma.c b/drivers/infiniband/hw/hfi1/tid_rdma.c
index 233ea48b72c8..2a7abf7a1f7f 100644
--- a/drivers/infiniband/hw/hfi1/tid_rdma.c
+++ b/drivers/infiniband/hw/hfi1/tid_rdma.c
@@ -605,7 +605,7 @@ static void __trigger_tid_waiter(struct rvt_qp *qp)
  * to this call via first_qp().
  *
  * If the qp trigger was already scheduled (!rval)
- * the the reference is dropped, otherwise the resume
+ * the reference is dropped, otherwise the resume
  * or the destroy cancel will dispatch the reference.
  */
 static void tid_rdma_schedule_tid_wakeup(struct rvt_qp *qp)
@@ -5174,7 +5174,7 @@ bail_no_tx:
 	priv->s_flags &= ~RVT_S_BUSY;
 	/*
 	 * If we didn't get a txreq, the QP will be woken up later to try
-	 * again, set the flags to the the wake up which work item to wake
+	 * again, set the flags to the wake up which work item to wake
 	 * up.
 	 * (A better algorithm should be found to do this and generalize the
 	 * sleep/wakeup flags.)
diff --git a/drivers/infiniband/hw/hfi1/trace.c b/drivers/infiniband/hw/hfi1/trace.c
index 715c81308b85..8302469582c6 100644
--- a/drivers/infiniband/hw/hfi1/trace.c
+++ b/drivers/infiniband/hw/hfi1/trace.c
@@ -1,48 +1,6 @@
+// SPDX-License-Identifier: GPL-2.0 or BSD-3-Clause
 /*
  * Copyright(c) 2015 - 2020 Intel Corporation.
- *
- * This file is provided under a dual BSD/GPLv2 license.  When using or
- * redistributing this file, you may do so under either license.
- *
- * GPL LICENSE SUMMARY
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of version 2 of the GNU General Public License as
- * published by the Free Software Foundation.
- *
- * This program is distributed in the hope that it will be useful, but
- * WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
- * General Public License for more details.
- *
- * BSD LICENSE
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions
- * are met:
- *
- *  - Redistributions of source code must retain the above copyright
- *    notice, this list of conditions and the following disclaimer.
- *  - Redistributions in binary form must reproduce the above copyright
- *    notice, this list of conditions and the following disclaimer in
- *    the documentation and/or other materials provided with the
- *    distribution.
- *  - Neither the name of Intel Corporation nor the names of its
- *    contributors may be used to endorse or promote products derived
- *    from this software without specific prior written permission.
- *
- * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
- * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
- * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
- * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
- * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
- * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
- * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
- * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
- * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
- * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
- * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
- *
  */
 #define CREATE_TRACE_POINTS
 #include "trace.h"
@@ -530,7 +488,7 @@ struct hfi1_ctxt_hist {
 	atomic_t data[255];
 };
 
-struct hfi1_ctxt_hist hist = {
+static struct hfi1_ctxt_hist hist = {
 	.count = ATOMIC_INIT(0)
 };
 
diff --git a/drivers/infiniband/hw/hfi1/trace.h b/drivers/infiniband/hw/hfi1/trace.h
index 1ce551864118..31e027c5a0c0 100644
--- a/drivers/infiniband/hw/hfi1/trace.h
+++ b/drivers/infiniband/hw/hfi1/trace.h
@@ -1,48 +1,6 @@
+/* SPDX-License-Identifier: GPL-2.0 or BSD-3-Clause */
 /*
  * Copyright(c) 2015 - 2018 Intel Corporation.
- *
- * This file is provided under a dual BSD/GPLv2 license.  When using or
- * redistributing this file, you may do so under either license.
- *
- * GPL LICENSE SUMMARY
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of version 2 of the GNU General Public License as
- * published by the Free Software Foundation.
- *
- * This program is distributed in the hope that it will be useful, but
- * WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
- * General Public License for more details.
- *
- * BSD LICENSE
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions
- * are met:
- *
- *  - Redistributions of source code must retain the above copyright
- *    notice, this list of conditions and the following disclaimer.
- *  - Redistributions in binary form must reproduce the above copyright
- *    notice, this list of conditions and the following disclaimer in
- *    the documentation and/or other materials provided with the
- *    distribution.
- *  - Neither the name of Intel Corporation nor the names of its
- *    contributors may be used to endorse or promote products derived
- *    from this software without specific prior written permission.
- *
- * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
- * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
- * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
- * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
- * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
- * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
- * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
- * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
- * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
- * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
- * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
- *
  */
 
 #define packettype_name(etype) { RHF_RCV_TYPE_##etype, #etype }
diff --git a/drivers/infiniband/hw/hfi1/trace_ctxts.h b/drivers/infiniband/hw/hfi1/trace_ctxts.h
index d8c168dc3ea8..1858eaf33b18 100644
--- a/drivers/infiniband/hw/hfi1/trace_ctxts.h
+++ b/drivers/infiniband/hw/hfi1/trace_ctxts.h
@@ -1,49 +1,8 @@
+/* SPDX-License-Identifier: GPL-2.0 or BSD-3-Clause */
 /*
 * Copyright(c) 2015 - 2020 Intel Corporation.
-*
-* This file is provided under a dual BSD/GPLv2 license.  When using or
-* redistributing this file, you may do so under either license.
-*
-* GPL LICENSE SUMMARY
-*
-* This program is free software; you can redistribute it and/or modify
-* it under the terms of version 2 of the GNU General Public License as
-* published by the Free Software Foundation.
-*
-* This program is distributed in the hope that it will be useful, but
-* WITHOUT ANY WARRANTY; without even the implied warranty of
-* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
-* General Public License for more details.
-*
-* BSD LICENSE
-*
-* Redistribution and use in source and binary forms, with or without
-* modification, are permitted provided that the following conditions
-* are met:
-*
-*  - Redistributions of source code must retain the above copyright
-*    notice, this list of conditions and the following disclaimer.
-*  - Redistributions in binary form must reproduce the above copyright
-*    notice, this list of conditions and the following disclaimer in
-*    the documentation and/or other materials provided with the
-*    distribution.
-*  - Neither the name of Intel Corporation nor the names of its
-*    contributors may be used to endorse or promote products derived
-*    from this software without specific prior written permission.
-*
-* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
-* "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
-* LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
-* A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
-* OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
-* SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
-* LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
-* DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
-* THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
-* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
-* OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
-*
 */
+
 #if !defined(__HFI1_TRACE_CTXTS_H) || defined(TRACE_HEADER_MULTI_READ)
 #define __HFI1_TRACE_CTXTS_H
 
diff --git a/drivers/infiniband/hw/hfi1/trace_dbg.h b/drivers/infiniband/hw/hfi1/trace_dbg.h
index de7a87392b8d..707f1053f0b7 100644
--- a/drivers/infiniband/hw/hfi1/trace_dbg.h
+++ b/drivers/infiniband/hw/hfi1/trace_dbg.h
@@ -1,49 +1,8 @@
+/* SPDX-License-Identifier: GPL-2.0 or BSD-3-Clause */
 /*
 * Copyright(c) 2015 - 2018 Intel Corporation.
-*
-* This file is provided under a dual BSD/GPLv2 license.  When using or
-* redistributing this file, you may do so under either license.
-*
-* GPL LICENSE SUMMARY
-*
-* This program is free software; you can redistribute it and/or modify
-* it under the terms of version 2 of the GNU General Public License as
-* published by the Free Software Foundation.
-*
-* This program is distributed in the hope that it will be useful, but
-* WITHOUT ANY WARRANTY; without even the implied warranty of
-* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
-* General Public License for more details.
-*
-* BSD LICENSE
-*
-* Redistribution and use in source and binary forms, with or without
-* modification, are permitted provided that the following conditions
-* are met:
-*
-*  - Redistributions of source code must retain the above copyright
-*    notice, this list of conditions and the following disclaimer.
-*  - Redistributions in binary form must reproduce the above copyright
-*    notice, this list of conditions and the following disclaimer in
-*    the documentation and/or other materials provided with the
-*    distribution.
-*  - Neither the name of Intel Corporation nor the names of its
-*    contributors may be used to endorse or promote products derived
-*    from this software without specific prior written permission.
-*
-* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
-* "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
-* LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
-* A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
-* OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
-* SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
-* LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
-* DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
-* THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
-* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
-* OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
-*
 */
+
 #if !defined(__HFI1_TRACE_EXTRA_H) || defined(TRACE_HEADER_MULTI_READ)
 #define __HFI1_TRACE_EXTRA_H
 
diff --git a/drivers/infiniband/hw/hfi1/trace_ibhdrs.h b/drivers/infiniband/hw/hfi1/trace_ibhdrs.h
index 2f84290a88ca..b33f8f575f8a 100644
--- a/drivers/infiniband/hw/hfi1/trace_ibhdrs.h
+++ b/drivers/infiniband/hw/hfi1/trace_ibhdrs.h
@@ -1,49 +1,8 @@
+/* SPDX-License-Identifier: GPL-2.0 or BSD-3-Clause */
 /*
  * Copyright(c) 2015 - 2017 Intel Corporation.
- *
- * This file is provided under a dual BSD/GPLv2 license.  When using or
- * redistributing this file, you may do so under either license.
- *
- * GPL LICENSE SUMMARY
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of version 2 of the GNU General Public License as
- * published by the Free Software Foundation.
- *
- * This program is distributed in the hope that it will be useful, but
- * WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
- * General Public License for more details.
- *
- * BSD LICENSE
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions
- * are met:
- *
- *  - Redistributions of source code must retain the above copyright
- *    notice, this list of conditions and the following disclaimer.
- *  - Redistributions in binary form must reproduce the above copyright
- *    notice, this list of conditions and the following disclaimer in
- *    the documentation and/or other materials provided with the
- *    distribution.
- *  - Neither the name of Intel Corporation nor the names of its
- *    contributors may be used to endorse or promote products derived
- *    from this software without specific prior written permission.
- *
- * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
- * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
- * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
- * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
- * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
- * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
- * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
- * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
- * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
- * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
- * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
- *
  */
+
 #if !defined(__HFI1_TRACE_IBHDRS_H) || defined(TRACE_HEADER_MULTI_READ)
 #define __HFI1_TRACE_IBHDRS_H
 
diff --git a/drivers/infiniband/hw/hfi1/trace_misc.h b/drivers/infiniband/hw/hfi1/trace_misc.h
index 93338988b922..742675fa7576 100644
--- a/drivers/infiniband/hw/hfi1/trace_misc.h
+++ b/drivers/infiniband/hw/hfi1/trace_misc.h
@@ -1,49 +1,8 @@
+/* SPDX-License-Identifier: GPL-2.0 or BSD-3-Clause */
 /*
 * Copyright(c) 2015, 2016 Intel Corporation.
-*
-* This file is provided under a dual BSD/GPLv2 license.  When using or
-* redistributing this file, you may do so under either license.
-*
-* GPL LICENSE SUMMARY
-*
-* This program is free software; you can redistribute it and/or modify
-* it under the terms of version 2 of the GNU General Public License as
-* published by the Free Software Foundation.
-*
-* This program is distributed in the hope that it will be useful, but
-* WITHOUT ANY WARRANTY; without even the implied warranty of
-* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
-* General Public License for more details.
-*
-* BSD LICENSE
-*
-* Redistribution and use in source and binary forms, with or without
-* modification, are permitted provided that the following conditions
-* are met:
-*
-*  - Redistributions of source code must retain the above copyright
-*    notice, this list of conditions and the following disclaimer.
-*  - Redistributions in binary form must reproduce the above copyright
-*    notice, this list of conditions and the following disclaimer in
-*    the documentation and/or other materials provided with the
-*    distribution.
-*  - Neither the name of Intel Corporation nor the names of its
-*    contributors may be used to endorse or promote products derived
-*    from this software without specific prior written permission.
-*
-* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
-* "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
-* LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
-* A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
-* OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
-* SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
-* LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
-* DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
-* THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
-* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
-* OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
-*
 */
+
 #if !defined(__HFI1_TRACE_MISC_H) || defined(TRACE_HEADER_MULTI_READ)
 #define __HFI1_TRACE_MISC_H
 
diff --git a/drivers/infiniband/hw/hfi1/trace_mmu.h b/drivers/infiniband/hw/hfi1/trace_mmu.h
index 3b7abbc382c2..187e9244fe5e 100644
--- a/drivers/infiniband/hw/hfi1/trace_mmu.h
+++ b/drivers/infiniband/hw/hfi1/trace_mmu.h
@@ -1,49 +1,8 @@
+/* SPDX-License-Identifier: GPL-2.0 or BSD-3-Clause */
 /*
  * Copyright(c) 2017 Intel Corporation.
- *
- * This file is provided under a dual BSD/GPLv2 license.  When using or
- * redistributing this file, you may do so under either license.
- *
- * GPL LICENSE SUMMARY
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of version 2 of the GNU General Public License as
- * published by the Free Software Foundation.
- *
- * This program is distributed in the hope that it will be useful, but
- * WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
- * General Public License for more details.
- *
- * BSD LICENSE
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions
- * are met:
- *
- *  - Redistributions of source code must retain the above copyright
- *    notice, this list of conditions and the following disclaimer.
- *  - Redistributions in binary form must reproduce the above copyright
- *    notice, this list of conditions and the following disclaimer in
- *    the documentation and/or other materials provided with the
- *    distribution.
- *  - Neither the name of Intel Corporation nor the names of its
- *    contributors may be used to endorse or promote products derived
- *    from this software without specific prior written permission.
- *
- * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
- * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
- * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
- * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
- * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
- * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
- * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
- * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
- * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
- * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
- * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
- *
  */
+
 #if !defined(__HFI1_TRACE_MMU_H) || defined(TRACE_HEADER_MULTI_READ)
 #define __HFI1_TRACE_MMU_H
 
diff --git a/drivers/infiniband/hw/hfi1/trace_rc.h b/drivers/infiniband/hw/hfi1/trace_rc.h
index 5f49e1eeb211..7c3a1c77536d 100644
--- a/drivers/infiniband/hw/hfi1/trace_rc.h
+++ b/drivers/infiniband/hw/hfi1/trace_rc.h
@@ -1,49 +1,8 @@
+/* SPDX-License-Identifier: GPL-2.0 or BSD-3-Clause */
 /*
 * Copyright(c) 2015, 2016, 2017 Intel Corporation.
-*
-* This file is provided under a dual BSD/GPLv2 license.  When using or
-* redistributing this file, you may do so under either license.
-*
-* GPL LICENSE SUMMARY
-*
-* This program is free software; you can redistribute it and/or modify
-* it under the terms of version 2 of the GNU General Public License as
-* published by the Free Software Foundation.
-*
-* This program is distributed in the hope that it will be useful, but
-* WITHOUT ANY WARRANTY; without even the implied warranty of
-* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
-* General Public License for more details.
-*
-* BSD LICENSE
-*
-* Redistribution and use in source and binary forms, with or without
-* modification, are permitted provided that the following conditions
-* are met:
-*
-*  - Redistributions of source code must retain the above copyright
-*    notice, this list of conditions and the following disclaimer.
-*  - Redistributions in binary form must reproduce the above copyright
-*    notice, this list of conditions and the following disclaimer in
-*    the documentation and/or other materials provided with the
-*    distribution.
-*  - Neither the name of Intel Corporation nor the names of its
-*    contributors may be used to endorse or promote products derived
-*    from this software without specific prior written permission.
-*
-* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
-* "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
-* LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
-* A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
-* OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
-* SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
-* LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
-* DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
-* THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
-* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
-* OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
-*
 */
+
 #if !defined(__HFI1_TRACE_RC_H) || defined(TRACE_HEADER_MULTI_READ)
 #define __HFI1_TRACE_RC_H
 
diff --git a/drivers/infiniband/hw/hfi1/trace_rx.h b/drivers/infiniband/hw/hfi1/trace_rx.h
index 168079ed122c..0da22f9bc75e 100644
--- a/drivers/infiniband/hw/hfi1/trace_rx.h
+++ b/drivers/infiniband/hw/hfi1/trace_rx.h
@@ -1,49 +1,8 @@
+/* SPDX-License-Identifier: GPL-2.0 or BSD-3-Clause */
 /*
  * Copyright(c) 2015 - 2018 Intel Corporation.
- *
- * This file is provided under a dual BSD/GPLv2 license.  When using or
- * redistributing this file, you may do so under either license.
- *
- * GPL LICENSE SUMMARY
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of version 2 of the GNU General Public License as
- * published by the Free Software Foundation.
- *
- * This program is distributed in the hope that it will be useful, but
- * WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
- * General Public License for more details.
- *
- * BSD LICENSE
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions
- * are met:
- *
- *  - Redistributions of source code must retain the above copyright
- *    notice, this list of conditions and the following disclaimer.
- *  - Redistributions in binary form must reproduce the above copyright
- *    notice, this list of conditions and the following disclaimer in
- *    the documentation and/or other materials provided with the
- *    distribution.
- *  - Neither the name of Intel Corporation nor the names of its
- *    contributors may be used to endorse or promote products derived
- *    from this software without specific prior written permission.
- *
- * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
- * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
- * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
- * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
- * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
- * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
- * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
- * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
- * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
- * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
- * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
- *
  */
+
 #if !defined(__HFI1_TRACE_RX_H) || defined(TRACE_HEADER_MULTI_READ)
 #define __HFI1_TRACE_RX_H
 
diff --git a/drivers/infiniband/hw/hfi1/trace_tx.h b/drivers/infiniband/hw/hfi1/trace_tx.h
index f1922a7619fe..7318aa6150b5 100644
--- a/drivers/infiniband/hw/hfi1/trace_tx.h
+++ b/drivers/infiniband/hw/hfi1/trace_tx.h
@@ -1,48 +1,6 @@
+/* SPDX-License-Identifier: GPL-2.0 or BSD-3-Clause */
 /*
  * Copyright(c) 2015 - 2017 Intel Corporation.
- *
- * This file is provided under a dual BSD/GPLv2 license.  When using or
- * redistributing this file, you may do so under either license.
- *
- * GPL LICENSE SUMMARY
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of version 2 of the GNU General Public License as
- * published by the Free Software Foundation.
- *
- * This program is distributed in the hope that it will be useful, but
- * WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
- * General Public License for more details.
- *
- * BSD LICENSE
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions
- * are met:
- *
- *  - Redistributions of source code must retain the above copyright
- *    notice, this list of conditions and the following disclaimer.
- *  - Redistributions in binary form must reproduce the above copyright
- *    notice, this list of conditions and the following disclaimer in
- *    the documentation and/or other materials provided with the
- *    distribution.
- *  - Neither the name of Intel Corporation nor the names of its
- *    contributors may be used to endorse or promote products derived
- *    from this software without specific prior written permission.
- *
- * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
- * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
- * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
- * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
- * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
- * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
- * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
- * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
- * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
- * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
- * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
- *
  */
 #if !defined(__HFI1_TRACE_TX_H) || defined(TRACE_HEADER_MULTI_READ)
 #define __HFI1_TRACE_TX_H
diff --git a/drivers/infiniband/hw/hfi1/uc.c b/drivers/infiniband/hw/hfi1/uc.c
index 5b0f536b34e0..4e9d6aa39305 100644
--- a/drivers/infiniband/hw/hfi1/uc.c
+++ b/drivers/infiniband/hw/hfi1/uc.c
@@ -1,48 +1,6 @@
+// SPDX-License-Identifier: GPL-2.0 or BSD-3-Clause
 /*
  * Copyright(c) 2015 - 2018 Intel Corporation.
- *
- * This file is provided under a dual BSD/GPLv2 license.  When using or
- * redistributing this file, you may do so under either license.
- *
- * GPL LICENSE SUMMARY
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of version 2 of the GNU General Public License as
- * published by the Free Software Foundation.
- *
- * This program is distributed in the hope that it will be useful, but
- * WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
- * General Public License for more details.
- *
- * BSD LICENSE
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions
- * are met:
- *
- *  - Redistributions of source code must retain the above copyright
- *    notice, this list of conditions and the following disclaimer.
- *  - Redistributions in binary form must reproduce the above copyright
- *    notice, this list of conditions and the following disclaimer in
- *    the documentation and/or other materials provided with the
- *    distribution.
- *  - Neither the name of Intel Corporation nor the names of its
- *    contributors may be used to endorse or promote products derived
- *    from this software without specific prior written permission.
- *
- * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
- * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
- * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
- * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
- * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
- * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
- * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
- * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
- * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
- * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
- * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
- *
  */
 
 #include "hfi.h"
diff --git a/drivers/infiniband/hw/hfi1/ud.c b/drivers/infiniband/hw/hfi1/ud.c
index 6ecb984c85fa..b64b9d7e08f0 100644
--- a/drivers/infiniband/hw/hfi1/ud.c
+++ b/drivers/infiniband/hw/hfi1/ud.c
@@ -1,48 +1,6 @@
+// SPDX-License-Identifier: GPL-2.0 or BSD-3-Clause
 /*
  * Copyright(c) 2015 - 2019 Intel Corporation.
- *
- * This file is provided under a dual BSD/GPLv2 license.  When using or
- * redistributing this file, you may do so under either license.
- *
- * GPL LICENSE SUMMARY
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of version 2 of the GNU General Public License as
- * published by the Free Software Foundation.
- *
- * This program is distributed in the hope that it will be useful, but
- * WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
- * General Public License for more details.
- *
- * BSD LICENSE
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions
- * are met:
- *
- *  - Redistributions of source code must retain the above copyright
- *    notice, this list of conditions and the following disclaimer.
- *  - Redistributions in binary form must reproduce the above copyright
- *    notice, this list of conditions and the following disclaimer in
- *    the documentation and/or other materials provided with the
- *    distribution.
- *  - Neither the name of Intel Corporation nor the names of its
- *    contributors may be used to endorse or promote products derived
- *    from this software without specific prior written permission.
- *
- * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
- * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
- * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
- * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
- * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
- * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
- * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
- * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
- * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
- * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
- * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
- *
  */
 
 #include <linux/net.h>
diff --git a/drivers/infiniband/hw/hfi1/user_exp_rcv.c b/drivers/infiniband/hw/hfi1/user_exp_rcv.c
index 58dcab2679d9..0c86e9d354f8 100644
--- a/drivers/infiniband/hw/hfi1/user_exp_rcv.c
+++ b/drivers/infiniband/hw/hfi1/user_exp_rcv.c
@@ -1,49 +1,7 @@
+// SPDX-License-Identifier: GPL-2.0 or BSD-3-Clause
 /*
  * Copyright(c) 2020 Cornelis Networks, Inc.
  * Copyright(c) 2015-2018 Intel Corporation.
- *
- * This file is provided under a dual BSD/GPLv2 license.  When using or
- * redistributing this file, you may do so under either license.
- *
- * GPL LICENSE SUMMARY
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of version 2 of the GNU General Public License as
- * published by the Free Software Foundation.
- *
- * This program is distributed in the hope that it will be useful, but
- * WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
- * General Public License for more details.
- *
- * BSD LICENSE
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions
- * are met:
- *
- *  - Redistributions of source code must retain the above copyright
- *    notice, this list of conditions and the following disclaimer.
- *  - Redistributions in binary form must reproduce the above copyright
- *    notice, this list of conditions and the following disclaimer in
- *    the documentation and/or other materials provided with the
- *    distribution.
- *  - Neither the name of Intel Corporation nor the names of its
- *    contributors may be used to endorse or promote products derived
- *    from this software without specific prior written permission.
- *
- * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
- * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
- * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
- * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
- * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
- * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
- * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
- * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
- * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
- * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
- * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
- *
  */
 #include <asm/page.h>
 #include <linux/string.h>
@@ -177,8 +135,8 @@ static void unpin_rcv_pages(struct hfi1_filedata *fd,
 	struct mm_struct *mm;
 
 	if (mapped) {
-		pci_unmap_single(dd->pcidev, node->dma_addr,
-				 node->npages * PAGE_SIZE, PCI_DMA_FROMDEVICE);
+		dma_unmap_single(&dd->pcidev->dev, node->dma_addr,
+				 node->npages * PAGE_SIZE, DMA_FROM_DEVICE);
 		pages = &node->pages[idx];
 		mm = mm_from_tid_node(node);
 	} else {
@@ -739,9 +697,8 @@ static int set_rcvarray_entry(struct hfi1_filedata *fd,
 	if (!node)
 		return -ENOMEM;
 
-	phys = pci_map_single(dd->pcidev,
-			      __va(page_to_phys(pages[0])),
-			      npages * PAGE_SIZE, PCI_DMA_FROMDEVICE);
+	phys = dma_map_single(&dd->pcidev->dev, __va(page_to_phys(pages[0])),
+			      npages * PAGE_SIZE, DMA_FROM_DEVICE);
 	if (dma_mapping_error(&dd->pcidev->dev, phys)) {
 		dd_dev_err(dd, "Failed to DMA map Exp Rcv pages 0x%llx\n",
 			   phys);
@@ -783,8 +740,8 @@ out_unmap:
 	hfi1_cdbg(TID, "Failed to insert RB node %u 0x%lx, 0x%lx %d",
 		  node->rcventry, node->notifier.interval_tree.start,
 		  node->phys, ret);
-	pci_unmap_single(dd->pcidev, phys, npages * PAGE_SIZE,
-			 PCI_DMA_FROMDEVICE);
+	dma_unmap_single(&dd->pcidev->dev, phys, npages * PAGE_SIZE,
+			 DMA_FROM_DEVICE);
 	kfree(node);
 	return -EFAULT;
 }
diff --git a/drivers/infiniband/hw/hfi1/user_exp_rcv.h b/drivers/infiniband/hw/hfi1/user_exp_rcv.h
index d45c7b6988d4..8c53e416bf84 100644
--- a/drivers/infiniband/hw/hfi1/user_exp_rcv.h
+++ b/drivers/infiniband/hw/hfi1/user_exp_rcv.h
@@ -1,53 +1,12 @@
-#ifndef _HFI1_USER_EXP_RCV_H
-#define _HFI1_USER_EXP_RCV_H
+/* SPDX-License-Identifier: GPL-2.0 or BSD-3-Clause */
 /*
  * Copyright(c) 2020 - Cornelis Networks, Inc.
  * Copyright(c) 2015 - 2017 Intel Corporation.
- *
- * This file is provided under a dual BSD/GPLv2 license.  When using or
- * redistributing this file, you may do so under either license.
- *
- * GPL LICENSE SUMMARY
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of version 2 of the GNU General Public License as
- * published by the Free Software Foundation.
- *
- * This program is distributed in the hope that it will be useful, but
- * WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
- * General Public License for more details.
- *
- * BSD LICENSE
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions
- * are met:
- *
- *  - Redistributions of source code must retain the above copyright
- *    notice, this list of conditions and the following disclaimer.
- *  - Redistributions in binary form must reproduce the above copyright
- *    notice, this list of conditions and the following disclaimer in
- *    the documentation and/or other materials provided with the
- *    distribution.
- *  - Neither the name of Intel Corporation nor the names of its
- *    contributors may be used to endorse or promote products derived
- *    from this software without specific prior written permission.
- *
- * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
- * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
- * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
- * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
- * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
- * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
- * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
- * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
- * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
- * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
- * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
- *
  */
 
+#ifndef _HFI1_USER_EXP_RCV_H
+#define _HFI1_USER_EXP_RCV_H
+
 #include "hfi.h"
 #include "exp_rcv.h"
 
diff --git a/drivers/infiniband/hw/hfi1/user_pages.c b/drivers/infiniband/hw/hfi1/user_pages.c
index 3b505006c0a6..7bce963e2ae6 100644
--- a/drivers/infiniband/hw/hfi1/user_pages.c
+++ b/drivers/infiniband/hw/hfi1/user_pages.c
@@ -1,48 +1,6 @@
+// SPDX-License-Identifier: GPL-2.0 or BSD-3-Clause
 /*
  * Copyright(c) 2015-2017 Intel Corporation.
- *
- * This file is provided under a dual BSD/GPLv2 license.  When using or
- * redistributing this file, you may do so under either license.
- *
- * GPL LICENSE SUMMARY
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of version 2 of the GNU General Public License as
- * published by the Free Software Foundation.
- *
- * This program is distributed in the hope that it will be useful, but
- * WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
- * General Public License for more details.
- *
- * BSD LICENSE
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions
- * are met:
- *
- *  - Redistributions of source code must retain the above copyright
- *    notice, this list of conditions and the following disclaimer.
- *  - Redistributions in binary form must reproduce the above copyright
- *    notice, this list of conditions and the following disclaimer in
- *    the documentation and/or other materials provided with the
- *    distribution.
- *  - Neither the name of Intel Corporation nor the names of its
- *    contributors may be used to endorse or promote products derived
- *    from this software without specific prior written permission.
- *
- * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
- * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
- * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
- * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
- * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
- * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
- * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
- * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
- * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
- * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
- * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
- *
  */
 
 #include <linux/mm.h>
diff --git a/drivers/infiniband/hw/hfi1/user_sdma.c b/drivers/infiniband/hw/hfi1/user_sdma.c
index da5b2e37355a..5b11c8282744 100644
--- a/drivers/infiniband/hw/hfi1/user_sdma.c
+++ b/drivers/infiniband/hw/hfi1/user_sdma.c
@@ -1,50 +1,9 @@
+// SPDX-License-Identifier: GPL-2.0 or BSD-3-Clause
 /*
  * Copyright(c) 2020 - Cornelis Networks, Inc.
  * Copyright(c) 2015 - 2018 Intel Corporation.
- *
- * This file is provided under a dual BSD/GPLv2 license.  When using or
- * redistributing this file, you may do so under either license.
- *
- * GPL LICENSE SUMMARY
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of version 2 of the GNU General Public License as
- * published by the Free Software Foundation.
- *
- * This program is distributed in the hope that it will be useful, but
- * WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
- * General Public License for more details.
- *
- * BSD LICENSE
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions
- * are met:
- *
- *  - Redistributions of source code must retain the above copyright
- *    notice, this list of conditions and the following disclaimer.
- *  - Redistributions in binary form must reproduce the above copyright
- *    notice, this list of conditions and the following disclaimer in
- *    the documentation and/or other materials provided with the
- *    distribution.
- *  - Neither the name of Intel Corporation nor the names of its
- *    contributors may be used to endorse or promote products derived
- *    from this software without specific prior written permission.
- *
- * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
- * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
- * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
- * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
- * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
- * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
- * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
- * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
- * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
- * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
- * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
- *
  */
+
 #include <linux/mm.h>
 #include <linux/types.h>
 #include <linux/device.h>
diff --git a/drivers/infiniband/hw/hfi1/user_sdma.h b/drivers/infiniband/hw/hfi1/user_sdma.h
index fabe58139906..ea56eb57e656 100644
--- a/drivers/infiniband/hw/hfi1/user_sdma.h
+++ b/drivers/infiniband/hw/hfi1/user_sdma.h
@@ -1,52 +1,11 @@
-#ifndef _HFI1_USER_SDMA_H
-#define _HFI1_USER_SDMA_H
+/* SPDX-License-Identifier: GPL-2.0 or BSD-3-Clause */
 /*
  * Copyright(c) 2020 - Cornelis Networks, Inc.
  * Copyright(c) 2015 - 2018 Intel Corporation.
- *
- * This file is provided under a dual BSD/GPLv2 license.  When using or
- * redistributing this file, you may do so under either license.
- *
- * GPL LICENSE SUMMARY
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of version 2 of the GNU General Public License as
- * published by the Free Software Foundation.
- *
- * This program is distributed in the hope that it will be useful, but
- * WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
- * General Public License for more details.
- *
- * BSD LICENSE
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions
- * are met:
- *
- *  - Redistributions of source code must retain the above copyright
- *    notice, this list of conditions and the following disclaimer.
- *  - Redistributions in binary form must reproduce the above copyright
- *    notice, this list of conditions and the following disclaimer in
- *    the documentation and/or other materials provided with the
- *    distribution.
- *  - Neither the name of Intel Corporation nor the names of its
- *    contributors may be used to endorse or promote products derived
- *    from this software without specific prior written permission.
- *
- * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
- * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
- * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
- * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
- * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
- * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
- * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
- * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
- * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
- * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
- * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
- *
  */
+#ifndef _HFI1_USER_SDMA_H
+#define _HFI1_USER_SDMA_H
+
 #include <linux/device.h>
 #include <linux/wait.h>
 
diff --git a/drivers/infiniband/hw/hfi1/verbs.c b/drivers/infiniband/hw/hfi1/verbs.c
index 9b198c35e1a1..26bea51869bf 100644
--- a/drivers/infiniband/hw/hfi1/verbs.c
+++ b/drivers/infiniband/hw/hfi1/verbs.c
@@ -1,48 +1,6 @@
+// SPDX-License-Identifier: GPL-2.0 or BSD-3-Clause
 /*
  * Copyright(c) 2015 - 2020 Intel Corporation.
- *
- * This file is provided under a dual BSD/GPLv2 license.  When using or
- * redistributing this file, you may do so under either license.
- *
- * GPL LICENSE SUMMARY
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of version 2 of the GNU General Public License as
- * published by the Free Software Foundation.
- *
- * This program is distributed in the hope that it will be useful, but
- * WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
- * General Public License for more details.
- *
- * BSD LICENSE
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions
- * are met:
- *
- *  - Redistributions of source code must retain the above copyright
- *    notice, this list of conditions and the following disclaimer.
- *  - Redistributions in binary form must reproduce the above copyright
- *    notice, this list of conditions and the following disclaimer in
- *    the documentation and/or other materials provided with the
- *    distribution.
- *  - Neither the name of Intel Corporation nor the names of its
- *    contributors may be used to endorse or promote products derived
- *    from this software without specific prior written permission.
- *
- * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
- * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
- * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
- * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
- * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
- * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
- * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
- * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
- * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
- * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
- * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
- *
  */
 
 #include <rdma/ib_mad.h>
diff --git a/drivers/infiniband/hw/hfi1/verbs.h b/drivers/infiniband/hw/hfi1/verbs.h
index 420df17cd184..38565532d654 100644
--- a/drivers/infiniband/hw/hfi1/verbs.h
+++ b/drivers/infiniband/hw/hfi1/verbs.h
@@ -1,48 +1,6 @@
+/* SPDX-License-Identifier: GPL-2.0 or BSD-3-Clause */
 /*
  * Copyright(c) 2015 - 2018 Intel Corporation.
- *
- * This file is provided under a dual BSD/GPLv2 license.  When using or
- * redistributing this file, you may do so under either license.
- *
- * GPL LICENSE SUMMARY
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of version 2 of the GNU General Public License as
- * published by the Free Software Foundation.
- *
- * This program is distributed in the hope that it will be useful, but
- * WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
- * General Public License for more details.
- *
- * BSD LICENSE
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions
- * are met:
- *
- *  - Redistributions of source code must retain the above copyright
- *    notice, this list of conditions and the following disclaimer.
- *  - Redistributions in binary form must reproduce the above copyright
- *    notice, this list of conditions and the following disclaimer in
- *    the documentation and/or other materials provided with the
- *    distribution.
- *  - Neither the name of Intel Corporation nor the names of its
- *    contributors may be used to endorse or promote products derived
- *    from this software without specific prior written permission.
- *
- * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
- * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
- * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
- * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
- * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
- * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
- * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
- * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
- * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
- * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
- * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
- *
  */
 
 #ifndef HFI1_VERBS_H
diff --git a/drivers/infiniband/hw/hfi1/verbs_txreq.c b/drivers/infiniband/hw/hfi1/verbs_txreq.c
index 8f766dd3f61c..cfecc81a27c7 100644
--- a/drivers/infiniband/hw/hfi1/verbs_txreq.c
+++ b/drivers/infiniband/hw/hfi1/verbs_txreq.c
@@ -1,48 +1,6 @@
+// SPDX-License-Identifier: GPL-2.0 or BSD-3-Clause
 /*
  * Copyright(c) 2016 - 2018 Intel Corporation.
- *
- * This file is provided under a dual BSD/GPLv2 license.  When using or
- * redistributing this file, you may do so under either license.
- *
- * GPL LICENSE SUMMARY
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of version 2 of the GNU General Public License as
- * published by the Free Software Foundation.
- *
- * This program is distributed in the hope that it will be useful, but
- * WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
- * General Public License for more details.
- *
- * BSD LICENSE
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions
- * are met:
- *
- *  - Redistributions of source code must retain the above copyright
- *    notice, this list of conditions and the following disclaimer.
- *  - Redistributions in binary form must reproduce the above copyright
- *    notice, this list of conditions and the following disclaimer in
- *    the documentation and/or other materials provided with the
- *    distribution.
- *  - Neither the name of Intel Corporation nor the names of its
- *    contributors may be used to endorse or promote products derived
- *    from this software without specific prior written permission.
- *
- * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
- * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
- * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
- * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
- * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
- * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
- * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
- * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
- * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
- * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
- * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
- *
  */
 
 #include "hfi.h"
diff --git a/drivers/infiniband/hw/hfi1/verbs_txreq.h b/drivers/infiniband/hw/hfi1/verbs_txreq.h
index 4bdfc7932376..2a7e0ae892e9 100644
--- a/drivers/infiniband/hw/hfi1/verbs_txreq.h
+++ b/drivers/infiniband/hw/hfi1/verbs_txreq.h
@@ -1,48 +1,6 @@
+/* SPDX-License-Identifier: GPL-2.0 or BSD-3-Clause */
 /*
  * Copyright(c) 2016 - 2018 Intel Corporation.
- *
- * This file is provided under a dual BSD/GPLv2 license.  When using or
- * redistributing this file, you may do so under either license.
- *
- * GPL LICENSE SUMMARY
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of version 2 of the GNU General Public License as
- * published by the Free Software Foundation.
- *
- * This program is distributed in the hope that it will be useful, but
- * WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
- * General Public License for more details.
- *
- * BSD LICENSE
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions
- * are met:
- *
- *  - Redistributions of source code must retain the above copyright
- *    notice, this list of conditions and the following disclaimer.
- *  - Redistributions in binary form must reproduce the above copyright
- *    notice, this list of conditions and the following disclaimer in
- *    the documentation and/or other materials provided with the
- *    distribution.
- *  - Neither the name of Intel Corporation nor the names of its
- *    contributors may be used to endorse or promote products derived
- *    from this software without specific prior written permission.
- *
- * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
- * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
- * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
- * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
- * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
- * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
- * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
- * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
- * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
- * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
- * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
- *
  */
 
 #ifndef HFI1_VERBS_TXREQ_H
diff --git a/drivers/infiniband/hw/hfi1/vnic.h b/drivers/infiniband/hw/hfi1/vnic.h
index a7a450e2cf2c..34f03e7770be 100644
--- a/drivers/infiniband/hw/hfi1/vnic.h
+++ b/drivers/infiniband/hw/hfi1/vnic.h
@@ -1,52 +1,10 @@
-#ifndef _HFI1_VNIC_H
-#define _HFI1_VNIC_H
+/* SPDX-License-Identifier: GPL-2.0 or BSD-3-Clause */
 /*
  * Copyright(c) 2017 - 2020 Intel Corporation.
- *
- * This file is provided under a dual BSD/GPLv2 license.  When using or
- * redistributing this file, you may do so under either license.
- *
- * GPL LICENSE SUMMARY
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of version 2 of the GNU General Public License as
- * published by the Free Software Foundation.
- *
- * This program is distributed in the hope that it will be useful, but
- * WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
- * General Public License for more details.
- *
- * BSD LICENSE
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions
- * are met:
- *
- *  - Redistributions of source code must retain the above copyright
- *    notice, this list of conditions and the following disclaimer.
- *  - Redistributions in binary form must reproduce the above copyright
- *    notice, this list of conditions and the following disclaimer in
- *    the documentation and/or other materials provided with the
- *    distribution.
- *  - Neither the name of Intel Corporation nor the names of its
- *    contributors may be used to endorse or promote products derived
- *    from this software without specific prior written permission.
- *
- * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
- * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
- * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
- * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
- * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
- * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
- * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
- * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
- * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
- * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
- * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
- *
  */
 
+#ifndef _HFI1_VNIC_H
+#define _HFI1_VNIC_H
 #include <rdma/opa_vnic.h>
 #include "hfi.h"
 #include "sdma.h"
diff --git a/drivers/infiniband/hw/hfi1/vnic_main.c b/drivers/infiniband/hw/hfi1/vnic_main.c
index 7e79c0578ecf..3650fababf25 100644
--- a/drivers/infiniband/hw/hfi1/vnic_main.c
+++ b/drivers/infiniband/hw/hfi1/vnic_main.c
@@ -1,48 +1,6 @@
+// SPDX-License-Identifier: GPL-2.0 or BSD-3-Clause
 /*
  * Copyright(c) 2017 - 2020 Intel Corporation.
- *
- * This file is provided under a dual BSD/GPLv2 license.  When using or
- * redistributing this file, you may do so under either license.
- *
- * GPL LICENSE SUMMARY
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of version 2 of the GNU General Public License as
- * published by the Free Software Foundation.
- *
- * This program is distributed in the hope that it will be useful, but
- * WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
- * General Public License for more details.
- *
- * BSD LICENSE
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions
- * are met:
- *
- *  - Redistributions of source code must retain the above copyright
- *    notice, this list of conditions and the following disclaimer.
- *  - Redistributions in binary form must reproduce the above copyright
- *    notice, this list of conditions and the following disclaimer in
- *    the documentation and/or other materials provided with the
- *    distribution.
- *  - Neither the name of Intel Corporation nor the names of its
- *    contributors may be used to endorse or promote products derived
- *    from this software without specific prior written permission.
- *
- * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
- * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
- * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
- * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
- * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
- * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
- * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
- * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
- * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
- * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
- * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
- *
  */
 
 /*
diff --git a/drivers/infiniband/hw/hfi1/vnic_sdma.c b/drivers/infiniband/hw/hfi1/vnic_sdma.c
index 7d90b900131b..c3f0f8d877c3 100644
--- a/drivers/infiniband/hw/hfi1/vnic_sdma.c
+++ b/drivers/infiniband/hw/hfi1/vnic_sdma.c
@@ -1,48 +1,6 @@
+// SPDX-License-Identifier: GPL-2.0 or BSD-3-Clause
 /*
  * Copyright(c) 2017 - 2018 Intel Corporation.
- *
- * This file is provided under a dual BSD/GPLv2 license.  When using or
- * redistributing this file, you may do so under either license.
- *
- * GPL LICENSE SUMMARY
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of version 2 of the GNU General Public License as
- * published by the Free Software Foundation.
- *
- * This program is distributed in the hope that it will be useful, but
- * WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
- * General Public License for more details.
- *
- * BSD LICENSE
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions
- * are met:
- *
- *  - Redistributions of source code must retain the above copyright
- *    notice, this list of conditions and the following disclaimer.
- *  - Redistributions in binary form must reproduce the above copyright
- *    notice, this list of conditions and the following disclaimer in
- *    the documentation and/or other materials provided with the
- *    distribution.
- *  - Neither the name of Intel Corporation nor the names of its
- *    contributors may be used to endorse or promote products derived
- *    from this software without specific prior written permission.
- *
- * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
- * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
- * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
- * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
- * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
- * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
- * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
- * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
- * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
- * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
- * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
- *
  */
 
 /*
diff --git a/drivers/infiniband/hw/hns/hns_roce_alloc.c b/drivers/infiniband/hw/hns/hns_roce_alloc.c
index 1b02d3bc9bae..d4fa0fd52294 100644
--- a/drivers/infiniband/hw/hns/hns_roce_alloc.c
+++ b/drivers/infiniband/hw/hns/hns_roce_alloc.c
@@ -36,76 +36,6 @@
 #include "hns_roce_device.h"
 #include <rdma/ib_umem.h>
 
-int hns_roce_bitmap_alloc(struct hns_roce_bitmap *bitmap, unsigned long *obj)
-{
-	int ret = 0;
-
-	spin_lock(&bitmap->lock);
-	*obj = find_next_zero_bit(bitmap->table, bitmap->max, bitmap->last);
-	if (*obj >= bitmap->max) {
-		bitmap->top = (bitmap->top + bitmap->max + bitmap->reserved_top)
-			       & bitmap->mask;
-		*obj = find_first_zero_bit(bitmap->table, bitmap->max);
-	}
-
-	if (*obj < bitmap->max) {
-		set_bit(*obj, bitmap->table);
-		bitmap->last = (*obj + 1);
-		if (bitmap->last == bitmap->max)
-			bitmap->last = 0;
-		*obj |= bitmap->top;
-	} else {
-		ret = -EINVAL;
-	}
-
-	spin_unlock(&bitmap->lock);
-
-	return ret;
-}
-
-void hns_roce_bitmap_free(struct hns_roce_bitmap *bitmap, unsigned long obj)
-{
-	obj &= bitmap->max + bitmap->reserved_top - 1;
-
-	spin_lock(&bitmap->lock);
-	clear_bit(obj, bitmap->table);
-
-	bitmap->last = min(bitmap->last, obj);
-	bitmap->top = (bitmap->top + bitmap->max + bitmap->reserved_top)
-		       & bitmap->mask;
-	spin_unlock(&bitmap->lock);
-}
-
-int hns_roce_bitmap_init(struct hns_roce_bitmap *bitmap, u32 num, u32 mask,
-			 u32 reserved_bot, u32 reserved_top)
-{
-	u32 i;
-
-	if (num != roundup_pow_of_two(num))
-		return -EINVAL;
-
-	bitmap->last = 0;
-	bitmap->top = 0;
-	bitmap->max = num - reserved_top;
-	bitmap->mask = mask;
-	bitmap->reserved_top = reserved_top;
-	spin_lock_init(&bitmap->lock);
-	bitmap->table = kcalloc(BITS_TO_LONGS(bitmap->max), sizeof(long),
-				GFP_KERNEL);
-	if (!bitmap->table)
-		return -ENOMEM;
-
-	for (i = 0; i < reserved_bot; ++i)
-		set_bit(i, bitmap->table);
-
-	return 0;
-}
-
-void hns_roce_bitmap_cleanup(struct hns_roce_bitmap *bitmap)
-{
-	kfree(bitmap->table);
-}
-
 void hns_roce_buf_free(struct hns_roce_dev *hr_dev, struct hns_roce_buf *buf)
 {
 	struct hns_roce_buf_list *trunks;
@@ -248,10 +178,10 @@ void hns_roce_cleanup_bitmap(struct hns_roce_dev *hr_dev)
 		ida_destroy(&hr_dev->xrcd_ida.ida);
 
 	if (hr_dev->caps.flags & HNS_ROCE_CAP_FLAG_SRQ)
-		hns_roce_cleanup_srq_table(hr_dev);
+		ida_destroy(&hr_dev->srq_table.srq_ida.ida);
 	hns_roce_cleanup_qp_table(hr_dev);
 	hns_roce_cleanup_cq_table(hr_dev);
 	ida_destroy(&hr_dev->mr_table.mtpt_ida.ida);
 	ida_destroy(&hr_dev->pd_ida.ida);
-	hns_roce_cleanup_uar_table(hr_dev);
+	ida_destroy(&hr_dev->uar_ida.ida);
 }
diff --git a/drivers/infiniband/hw/hns/hns_roce_db.c b/drivers/infiniband/hw/hns/hns_roce_db.c
index d40ea3d87260..751470c7a2ce 100644
--- a/drivers/infiniband/hw/hns/hns_roce_db.c
+++ b/drivers/infiniband/hw/hns/hns_roce_db.c
@@ -42,8 +42,8 @@ int hns_roce_db_map_user(struct hns_roce_ucontext *context, unsigned long virt,
 
 found:
 	offset = virt - page_addr;
-	db->dma = sg_dma_address(page->umem->sg_head.sgl) + offset;
-	db->virt_addr = sg_virt(page->umem->sg_head.sgl) + offset;
+	db->dma = sg_dma_address(page->umem->sgt_append.sgt.sgl) + offset;
+	db->virt_addr = sg_virt(page->umem->sgt_append.sgt.sgl) + offset;
 	db->u.user_page = page;
 	refcount_inc(&page->refcount);
 
diff --git a/drivers/infiniband/hw/hns/hns_roce_device.h b/drivers/infiniband/hw/hns/hns_roce_device.h
index 991f65269fa6..9467c39e3d28 100644
--- a/drivers/infiniband/hw/hns/hns_roce_device.h
+++ b/drivers/infiniband/hw/hns/hns_roce_device.h
@@ -496,6 +496,12 @@ struct hns_roce_bank {
 	u32 next; /* Next ID to allocate. */
 };
 
+struct hns_roce_idx_table {
+	u32 *spare_idx;
+	u32 head;
+	u32 tail;
+};
+
 struct hns_roce_qp_table {
 	struct hns_roce_hem_table	qp_table;
 	struct hns_roce_hem_table	irrl_table;
@@ -504,6 +510,7 @@ struct hns_roce_qp_table {
 	struct mutex			scc_mutex;
 	struct hns_roce_bank bank[HNS_ROCE_QP_BANK_NUM];
 	struct mutex bank_mutex;
+	struct hns_roce_idx_table	idx_table;
 };
 
 struct hns_roce_cq_table {
@@ -514,7 +521,7 @@ struct hns_roce_cq_table {
 };
 
 struct hns_roce_srq_table {
-	struct hns_roce_bitmap		bitmap;
+	struct hns_roce_ida		srq_ida;
 	struct xarray			xa;
 	struct hns_roce_hem_table	table;
 };
@@ -963,7 +970,7 @@ struct hns_roce_dev {
 	struct hns_roce_cmdq	cmd;
 	struct hns_roce_ida pd_ida;
 	struct hns_roce_ida xrcd_ida;
-	struct hns_roce_uar_table uar_table;
+	struct hns_roce_ida uar_ida;
 	struct hns_roce_mr_table  mr_table;
 	struct hns_roce_cq_table  cq_table;
 	struct hns_roce_srq_table srq_table;
@@ -1118,10 +1125,8 @@ static inline u8 get_tclass(const struct ib_global_route *grh)
 	       grh->traffic_class >> DSCP_SHIFT : grh->traffic_class;
 }
 
-int hns_roce_init_uar_table(struct hns_roce_dev *dev);
+void hns_roce_init_uar_table(struct hns_roce_dev *dev);
 int hns_roce_uar_alloc(struct hns_roce_dev *dev, struct hns_roce_uar *uar);
-void hns_roce_uar_free(struct hns_roce_dev *dev, struct hns_roce_uar *uar);
-void hns_roce_cleanup_uar_table(struct hns_roce_dev *dev);
 
 int hns_roce_cmd_init(struct hns_roce_dev *hr_dev);
 void hns_roce_cmd_cleanup(struct hns_roce_dev *hr_dev);
@@ -1146,20 +1151,14 @@ int hns_roce_mtr_map(struct hns_roce_dev *hr_dev, struct hns_roce_mtr *mtr,
 void hns_roce_init_pd_table(struct hns_roce_dev *hr_dev);
 void hns_roce_init_mr_table(struct hns_roce_dev *hr_dev);
 void hns_roce_init_cq_table(struct hns_roce_dev *hr_dev);
-void hns_roce_init_qp_table(struct hns_roce_dev *hr_dev);
-int hns_roce_init_srq_table(struct hns_roce_dev *hr_dev);
+int hns_roce_init_qp_table(struct hns_roce_dev *hr_dev);
+void hns_roce_init_srq_table(struct hns_roce_dev *hr_dev);
 void hns_roce_init_xrcd_table(struct hns_roce_dev *hr_dev);
 
 void hns_roce_cleanup_eq_table(struct hns_roce_dev *hr_dev);
 void hns_roce_cleanup_cq_table(struct hns_roce_dev *hr_dev);
 void hns_roce_cleanup_qp_table(struct hns_roce_dev *hr_dev);
-void hns_roce_cleanup_srq_table(struct hns_roce_dev *hr_dev);
 
-int hns_roce_bitmap_alloc(struct hns_roce_bitmap *bitmap, unsigned long *obj);
-void hns_roce_bitmap_free(struct hns_roce_bitmap *bitmap, unsigned long obj);
-int hns_roce_bitmap_init(struct hns_roce_bitmap *bitmap, u32 num, u32 mask,
-			 u32 reserved_bot, u32 resetrved_top);
-void hns_roce_bitmap_cleanup(struct hns_roce_bitmap *bitmap);
 void hns_roce_cleanup_bitmap(struct hns_roce_dev *hr_dev);
 
 int hns_roce_create_ah(struct ib_ah *ah, struct rdma_ah_init_attr *init_attr,
@@ -1216,9 +1215,8 @@ int hns_roce_destroy_srq(struct ib_srq *ibsrq, struct ib_udata *udata);
 int hns_roce_alloc_xrcd(struct ib_xrcd *ib_xrcd, struct ib_udata *udata);
 int hns_roce_dealloc_xrcd(struct ib_xrcd *ib_xrcd, struct ib_udata *udata);
 
-struct ib_qp *hns_roce_create_qp(struct ib_pd *ib_pd,
-				 struct ib_qp_init_attr *init_attr,
-				 struct ib_udata *udata);
+int hns_roce_create_qp(struct ib_qp *ib_qp, struct ib_qp_init_attr *init_attr,
+		       struct ib_udata *udata);
 int hns_roce_modify_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr,
 		       int attr_mask, struct ib_udata *udata);
 void init_flush_work(struct hns_roce_dev *hr_dev, struct hns_roce_qp *hr_qp);
diff --git a/drivers/infiniband/hw/hns/hns_roce_hw_v1.c b/drivers/infiniband/hw/hns/hns_roce_hw_v1.c
index a3305d196675..e0f59b8d7d5d 100644
--- a/drivers/infiniband/hw/hns/hns_roce_hw_v1.c
+++ b/drivers/infiniband/hw/hns/hns_roce_hw_v1.c
@@ -758,7 +758,7 @@ static struct hns_roce_qp *hns_roce_v1_create_lp_qp(struct hns_roce_dev *hr_dev,
 	init_attr.cap.max_recv_wr	= HNS_ROCE_MIN_WQE_NUM;
 	init_attr.cap.max_send_wr	= HNS_ROCE_MIN_WQE_NUM;
 
-	qp = hns_roce_create_qp(pd, &init_attr, NULL);
+	qp = ib_create_qp(pd, &init_attr);
 	if (IS_ERR(qp)) {
 		dev_err(dev, "Create loop qp for mr free failed!");
 		return NULL;
@@ -923,7 +923,7 @@ static int hns_roce_v1_rsv_lp_qp(struct hns_roce_dev *hr_dev)
 create_lp_qp_failed:
 	for (i -= 1; i >= 0; i--) {
 		hr_qp = free_mr->mr_free_qp[i];
-		if (hns_roce_v1_destroy_qp(&hr_qp->ibqp, NULL))
+		if (ib_destroy_qp(&hr_qp->ibqp))
 			dev_err(dev, "Destroy qp %d for mr free failed!\n", i);
 	}
 
@@ -953,7 +953,7 @@ static void hns_roce_v1_release_lp_qp(struct hns_roce_dev *hr_dev)
 		if (!hr_qp)
 			continue;
 
-		ret = hns_roce_v1_destroy_qp(&hr_qp->ibqp, NULL);
+		ret = ib_destroy_qp(&hr_qp->ibqp);
 		if (ret)
 			dev_err(dev, "Destroy qp %d for mr free failed(%d)!\n",
 				i, ret);
diff --git a/drivers/infiniband/hw/hns/hns_roce_hw_v2.c b/drivers/infiniband/hw/hns/hns_roce_hw_v2.c
index 594d4cef31b3..5b9953105752 100644
--- a/drivers/infiniband/hw/hns/hns_roce_hw_v2.c
+++ b/drivers/infiniband/hw/hns/hns_roce_hw_v2.c
@@ -1248,8 +1248,7 @@ static void hns_roce_cmq_setup_basic_desc(struct hns_roce_cmq_desc *desc,
 {
 	memset((void *)desc, 0, sizeof(struct hns_roce_cmq_desc));
 	desc->opcode = cpu_to_le16(opcode);
-	desc->flag =
-		cpu_to_le16(HNS_ROCE_CMD_FLAG_NO_INTR | HNS_ROCE_CMD_FLAG_IN);
+	desc->flag = cpu_to_le16(HNS_ROCE_CMD_FLAG_IN);
 	if (is_read)
 		desc->flag |= cpu_to_le16(HNS_ROCE_CMD_FLAG_WR);
 	else
@@ -1288,16 +1287,11 @@ static int __hns_roce_cmq_send(struct hns_roce_dev *hr_dev,
 	/* Write to hardware */
 	roce_write(hr_dev, ROCEE_TX_CMQ_PI_REG, csq->head);
 
-	/* If the command is sync, wait for the firmware to write back,
-	 * if multi descriptors to be sent, use the first one to check
-	 */
-	if (le16_to_cpu(desc->flag) & HNS_ROCE_CMD_FLAG_NO_INTR) {
-		do {
-			if (hns_roce_cmq_csq_done(hr_dev))
-				break;
-			udelay(1);
-		} while (++timeout < priv->cmq.tx_timeout);
-	}
+	do {
+		if (hns_roce_cmq_csq_done(hr_dev))
+			break;
+		udelay(1);
+	} while (++timeout < priv->cmq.tx_timeout);
 
 	if (hns_roce_cmq_csq_done(hr_dev)) {
 		for (ret = 0, i = 0; i < num; i++) {
@@ -1761,8 +1755,7 @@ static int __hns_roce_set_vf_switch_param(struct hns_roce_dev *hr_dev,
 	if (ret)
 		return ret;
 
-	desc.flag =
-		cpu_to_le16(HNS_ROCE_CMD_FLAG_NO_INTR | HNS_ROCE_CMD_FLAG_IN);
+	desc.flag = cpu_to_le16(HNS_ROCE_CMD_FLAG_IN);
 	desc.flag &= cpu_to_le16(~HNS_ROCE_CMD_FLAG_WR);
 	roce_set_bit(swt->cfg, VF_SWITCH_DATA_CFG_ALW_LPBK_S, 1);
 	roce_set_bit(swt->cfg, VF_SWITCH_DATA_CFG_ALW_LCL_LPBK_S, 0);
@@ -2004,6 +1997,7 @@ static void set_default_caps(struct hns_roce_dev *hr_dev)
 	caps->gid_table_len[0] = HNS_ROCE_V2_GID_INDEX_NUM;
 
 	if (hr_dev->pci_dev->revision >= PCI_REVISION_ID_HIP09) {
+		caps->flags |= HNS_ROCE_CAP_FLAG_STASH;
 		caps->max_sq_inline = HNS_ROCE_V3_MAX_SQ_INLINE;
 	} else {
 		caps->max_sq_inline = HNS_ROCE_V2_MAX_SQ_INLINE;
@@ -4114,6 +4108,9 @@ static void modify_qp_reset_to_init(struct ib_qp *ibqp,
 	if (hr_qp->en_flags & HNS_ROCE_QP_CAP_RQ_RECORD_DB)
 		hr_reg_enable(context, QPC_RQ_RECORD_EN);
 
+	if (hr_qp->en_flags & HNS_ROCE_QP_CAP_OWNER_DB)
+		hr_reg_enable(context, QPC_OWNER_MODE);
+
 	hr_reg_write(context, QPC_RQ_DB_RECORD_ADDR_L,
 		     lower_32_bits(hr_qp->rdb.dma) >> 1);
 	hr_reg_write(context, QPC_RQ_DB_RECORD_ADDR_H,
@@ -4146,8 +4143,6 @@ static void modify_qp_init_to_init(struct ib_qp *ibqp,
 				   struct hns_roce_v2_qp_context *context,
 				   struct hns_roce_v2_qp_context *qpc_mask)
 {
-	struct hns_roce_qp *hr_qp = to_hr_qp(ibqp);
-
 	/*
 	 * In v2 engine, software pass context and context mask to hardware
 	 * when modifying qp. If software need modify some fields in context,
@@ -4172,11 +4167,6 @@ static void modify_qp_init_to_init(struct ib_qp *ibqp,
 		hr_reg_write(context, QPC_SRQN, to_hr_srq(ibqp->srq)->srqn);
 		hr_reg_clear(qpc_mask, QPC_SRQN);
 	}
-
-	if (attr_mask & IB_QP_DEST_QPN) {
-		hr_reg_write(context, QPC_DQPN, hr_qp->qpn);
-		hr_reg_clear(qpc_mask, QPC_DQPN);
-	}
 }
 
 static int config_qp_rq_buf(struct hns_roce_dev *hr_dev,
@@ -4486,9 +4476,6 @@ static int modify_qp_rtr_to_rts(struct ib_qp *ibqp,
 
 	hr_reg_clear(qpc_mask, QPC_CHECK_FLG);
 
-	hr_reg_write(context, QPC_LSN, 0x100);
-	hr_reg_clear(qpc_mask, QPC_LSN);
-
 	hr_reg_clear(qpc_mask, QPC_V2_IRRL_HEAD);
 
 	return 0;
@@ -4507,15 +4494,23 @@ static int get_dip_ctx_idx(struct ib_qp *ibqp, const struct ib_qp_attr *attr,
 {
 	const struct ib_global_route *grh = rdma_ah_read_grh(&attr->ah_attr);
 	struct hns_roce_dev *hr_dev = to_hr_dev(ibqp->device);
+	u32 *spare_idx = hr_dev->qp_table.idx_table.spare_idx;
+	u32 *head =  &hr_dev->qp_table.idx_table.head;
+	u32 *tail =  &hr_dev->qp_table.idx_table.tail;
 	struct hns_roce_dip *hr_dip;
 	unsigned long flags;
 	int ret = 0;
 
 	spin_lock_irqsave(&hr_dev->dip_list_lock, flags);
 
+	spare_idx[*tail] = ibqp->qp_num;
+	*tail = (*tail == hr_dev->caps.num_qps - 1) ? 0 : (*tail + 1);
+
 	list_for_each_entry(hr_dip, &hr_dev->dip_list, node) {
-		if (!memcmp(grh->dgid.raw, hr_dip->dgid, 16))
+		if (!memcmp(grh->dgid.raw, hr_dip->dgid, 16)) {
+			*dip_idx = hr_dip->dip_idx;
 			goto out;
+		}
 	}
 
 	/* If no dgid is found, a new dip and a mapping between dgid and
@@ -4528,7 +4523,8 @@ static int get_dip_ctx_idx(struct ib_qp *ibqp, const struct ib_qp_attr *attr,
 	}
 
 	memcpy(hr_dip->dgid, grh->dgid.raw, sizeof(grh->dgid.raw));
-	hr_dip->dip_idx = *dip_idx = ibqp->qp_num;
+	hr_dip->dip_idx = *dip_idx = spare_idx[*head];
+	*head = (*head == hr_dev->caps.num_qps - 1) ? 0 : (*head + 1);
 	list_add_tail(&hr_dip->node, &hr_dev->dip_list);
 
 out:
@@ -5127,7 +5123,7 @@ static int hns_roce_v2_query_qp(struct ib_qp *ibqp, struct ib_qp_attr *qp_attr,
 
 	qp_attr->rq_psn = hr_reg_read(&context, QPC_RX_REQ_EPSN);
 	qp_attr->sq_psn = (u32)hr_reg_read(&context, QPC_SQ_CUR_PSN);
-	qp_attr->dest_qp_num = (u8)hr_reg_read(&context, QPC_DQPN);
+	qp_attr->dest_qp_num = hr_reg_read(&context, QPC_DQPN);
 	qp_attr->qp_access_flags =
 		((hr_reg_read(&context, QPC_RRE)) << V2_QP_RRE_S) |
 		((hr_reg_read(&context, QPC_RWE)) << V2_QP_RWE_S) |
@@ -5224,7 +5220,6 @@ static int hns_roce_v2_destroy_qp_common(struct hns_roce_dev *hr_dev,
 
 		if (send_cq && send_cq != recv_cq)
 			__hns_roce_v2_cq_clean(send_cq, hr_qp->qpn, NULL);
-
 	}
 
 	hns_roce_qp_remove(hr_dev, hr_qp);
@@ -6118,35 +6113,32 @@ static int hns_roce_v2_init_eq_table(struct hns_roce_dev *hr_dev)
 
 		ret = hns_roce_v2_create_eq(hr_dev, eq, eq_cmd);
 		if (ret) {
-			dev_err(dev, "eq create failed.\n");
+			dev_err(dev, "failed to create eq.\n");
 			goto err_create_eq_fail;
 		}
 	}
 
-	/* enable irq */
-	hns_roce_v2_int_mask_enable(hr_dev, eq_num, EQ_ENABLE);
+	hr_dev->irq_workq = alloc_ordered_workqueue("hns_roce_irq_workq", 0);
+	if (!hr_dev->irq_workq) {
+		dev_err(dev, "failed to create irq workqueue.\n");
+		ret = -ENOMEM;
+		goto err_create_eq_fail;
+	}
 
-	ret = __hns_roce_request_irq(hr_dev, irq_num, comp_num,
-				     aeq_num, other_num);
+	ret = __hns_roce_request_irq(hr_dev, irq_num, comp_num, aeq_num,
+				     other_num);
 	if (ret) {
-		dev_err(dev, "Request irq failed.\n");
+		dev_err(dev, "failed to request irq.\n");
 		goto err_request_irq_fail;
 	}
 
-	hr_dev->irq_workq = alloc_ordered_workqueue("hns_roce_irq_workq", 0);
-	if (!hr_dev->irq_workq) {
-		dev_err(dev, "Create irq workqueue failed!\n");
-		ret = -ENOMEM;
-		goto err_create_wq_fail;
-	}
+	/* enable irq */
+	hns_roce_v2_int_mask_enable(hr_dev, eq_num, EQ_ENABLE);
 
 	return 0;
 
-err_create_wq_fail:
-	__hns_roce_free_irq(hr_dev);
-
 err_request_irq_fail:
-	hns_roce_v2_int_mask_enable(hr_dev, eq_num, EQ_DISABLE);
+	destroy_workqueue(hr_dev->irq_workq);
 
 err_create_eq_fail:
 	for (i -= 1; i >= 0; i--)
@@ -6367,7 +6359,6 @@ static int hns_roce_hw_v2_init_instance(struct hnae3_handle *handle)
 
 	handle->rinfo.instance_state = HNS_ROCE_STATE_INITED;
 
-
 	return 0;
 
 reset_chk_err:
diff --git a/drivers/infiniband/hw/hns/hns_roce_hw_v2.h b/drivers/infiniband/hw/hns/hns_roce_hw_v2.h
index b8a09d411e2e..4d904d5e82be 100644
--- a/drivers/infiniband/hw/hns/hns_roce_hw_v2.h
+++ b/drivers/infiniband/hw/hns/hns_roce_hw_v2.h
@@ -129,19 +129,13 @@
 
 #define HNS_ROCE_V2_TABLE_CHUNK_SIZE		(1 << 18)
 
-#define HNS_ROCE_CMD_FLAG_IN_VALID_SHIFT	0
-#define HNS_ROCE_CMD_FLAG_OUT_VALID_SHIFT	1
-#define HNS_ROCE_CMD_FLAG_NEXT_SHIFT		2
-#define HNS_ROCE_CMD_FLAG_WR_OR_RD_SHIFT	3
-#define HNS_ROCE_CMD_FLAG_NO_INTR_SHIFT		4
-#define HNS_ROCE_CMD_FLAG_ERR_INTR_SHIFT	5
-
-#define HNS_ROCE_CMD_FLAG_IN		BIT(HNS_ROCE_CMD_FLAG_IN_VALID_SHIFT)
-#define HNS_ROCE_CMD_FLAG_OUT		BIT(HNS_ROCE_CMD_FLAG_OUT_VALID_SHIFT)
-#define HNS_ROCE_CMD_FLAG_NEXT		BIT(HNS_ROCE_CMD_FLAG_NEXT_SHIFT)
-#define HNS_ROCE_CMD_FLAG_WR		BIT(HNS_ROCE_CMD_FLAG_WR_OR_RD_SHIFT)
-#define HNS_ROCE_CMD_FLAG_NO_INTR	BIT(HNS_ROCE_CMD_FLAG_NO_INTR_SHIFT)
-#define HNS_ROCE_CMD_FLAG_ERR_INTR	BIT(HNS_ROCE_CMD_FLAG_ERR_INTR_SHIFT)
+enum {
+	HNS_ROCE_CMD_FLAG_IN = BIT(0),
+	HNS_ROCE_CMD_FLAG_OUT = BIT(1),
+	HNS_ROCE_CMD_FLAG_NEXT = BIT(2),
+	HNS_ROCE_CMD_FLAG_WR = BIT(3),
+	HNS_ROCE_CMD_FLAG_ERR_INTR = BIT(5),
+};
 
 #define HNS_ROCE_CMQ_DESC_NUM_S		3
 
@@ -1413,7 +1407,6 @@ struct hns_roce_cmq_desc {
 			__le32 rsv[4];
 		} func_info;
 	};
-
 };
 
 struct hns_roce_v2_cmq_ring {
@@ -1447,7 +1440,7 @@ struct hns_roce_v2_priv {
 
 struct hns_roce_dip {
 	u8 dgid[GID_LEN_V2];
-	u8 dip_idx;
+	u32 dip_idx;
 	struct list_head node;	/* all dips are on a list */
 };
 
diff --git a/drivers/infiniband/hw/hns/hns_roce_main.c b/drivers/infiniband/hw/hns/hns_roce_main.c
index cc6eab14a222..5d39bd08582a 100644
--- a/drivers/infiniband/hw/hns/hns_roce_main.c
+++ b/drivers/infiniband/hw/hns/hns_roce_main.c
@@ -325,7 +325,7 @@ static int hns_roce_alloc_ucontext(struct ib_ucontext *uctx,
 	return 0;
 
 error_fail_copy_to_udata:
-	hns_roce_uar_free(hr_dev, &context->uar);
+	ida_free(&hr_dev->uar_ida.ida, (int)context->uar.logic_idx);
 
 error_fail_uar_alloc:
 	return ret;
@@ -334,8 +334,9 @@ error_fail_uar_alloc:
 static void hns_roce_dealloc_ucontext(struct ib_ucontext *ibcontext)
 {
 	struct hns_roce_ucontext *context = to_hr_ucontext(ibcontext);
+	struct hns_roce_dev *hr_dev = to_hr_dev(ibcontext->device);
 
-	hns_roce_uar_free(to_hr_dev(ibcontext->device), &context->uar);
+	ida_free(&hr_dev->uar_ida.ida, (int)context->uar.logic_idx);
 }
 
 static int hns_roce_mmap(struct ib_ucontext *context,
@@ -454,6 +455,7 @@ static const struct ib_device_ops hns_roce_dev_ops = {
 	INIT_RDMA_OBJ_SIZE(ib_ah, hns_roce_ah, ibah),
 	INIT_RDMA_OBJ_SIZE(ib_cq, hns_roce_cq, ib_cq),
 	INIT_RDMA_OBJ_SIZE(ib_pd, hns_roce_pd, ibpd),
+	INIT_RDMA_OBJ_SIZE(ib_qp, hns_roce_qp, ibqp),
 	INIT_RDMA_OBJ_SIZE(ib_ucontext, hns_roce_ucontext, ibucontext),
 };
 
@@ -736,11 +738,7 @@ static int hns_roce_setup_hca(struct hns_roce_dev *hr_dev)
 		mutex_init(&hr_dev->pgdir_mutex);
 	}
 
-	ret = hns_roce_init_uar_table(hr_dev);
-	if (ret) {
-		dev_err(dev, "Failed to initialize uar table. aborting\n");
-		return ret;
-	}
+	hns_roce_init_uar_table(hr_dev);
 
 	ret = hns_roce_uar_alloc(hr_dev, &hr_dev->priv_uar);
 	if (ret) {
@@ -748,6 +746,12 @@ static int hns_roce_setup_hca(struct hns_roce_dev *hr_dev)
 		goto err_uar_table_free;
 	}
 
+	ret = hns_roce_init_qp_table(hr_dev);
+	if (ret) {
+		dev_err(dev, "Failed to init qp_table.\n");
+		goto err_uar_table_free;
+	}
+
 	hns_roce_init_pd_table(hr_dev);
 
 	if (hr_dev->caps.flags & HNS_ROCE_CAP_FLAG_XRC)
@@ -757,32 +761,14 @@ static int hns_roce_setup_hca(struct hns_roce_dev *hr_dev)
 
 	hns_roce_init_cq_table(hr_dev);
 
-	hns_roce_init_qp_table(hr_dev);
-
 	if (hr_dev->caps.flags & HNS_ROCE_CAP_FLAG_SRQ) {
-		ret = hns_roce_init_srq_table(hr_dev);
-		if (ret) {
-			dev_err(dev,
-				"Failed to init share receive queue table.\n");
-			goto err_qp_table_free;
-		}
+		hns_roce_init_srq_table(hr_dev);
 	}
 
 	return 0;
 
-err_qp_table_free:
-	hns_roce_cleanup_qp_table(hr_dev);
-	hns_roce_cleanup_cq_table(hr_dev);
-	ida_destroy(&hr_dev->mr_table.mtpt_ida.ida);
-
-	if (hr_dev->caps.flags & HNS_ROCE_CAP_FLAG_XRC)
-		ida_destroy(&hr_dev->xrcd_ida.ida);
-
-	ida_destroy(&hr_dev->pd_ida.ida);
-	hns_roce_uar_free(hr_dev, &hr_dev->priv_uar);
-
 err_uar_table_free:
-	hns_roce_cleanup_uar_table(hr_dev);
+	ida_destroy(&hr_dev->uar_ida.ida);
 	return ret;
 }
 
diff --git a/drivers/infiniband/hw/hns/hns_roce_mr.c b/drivers/infiniband/hw/hns/hns_roce_mr.c
index 006c84bb3f9f..7089ac780291 100644
--- a/drivers/infiniband/hw/hns/hns_roce_mr.c
+++ b/drivers/infiniband/hw/hns/hns_roce_mr.c
@@ -352,7 +352,9 @@ struct ib_mr *hns_roce_rereg_user_mr(struct ib_mr *ibmr, int flags, u64 start,
 free_cmd_mbox:
 	hns_roce_free_cmd_mailbox(hr_dev, mailbox);
 
-	return ERR_PTR(ret);
+	if (ret)
+		return ERR_PTR(ret);
+	return NULL;
 }
 
 int hns_roce_dereg_mr(struct ib_mr *ibmr, struct ib_udata *udata)
diff --git a/drivers/infiniband/hw/hns/hns_roce_pd.c b/drivers/infiniband/hw/hns/hns_roce_pd.c
index ea5663630985..81ffad77ae42 100644
--- a/drivers/infiniband/hw/hns/hns_roce_pd.c
+++ b/drivers/infiniband/hw/hns/hns_roce_pd.c
@@ -85,13 +85,18 @@ int hns_roce_dealloc_pd(struct ib_pd *pd, struct ib_udata *udata)
 
 int hns_roce_uar_alloc(struct hns_roce_dev *hr_dev, struct hns_roce_uar *uar)
 {
+	struct hns_roce_ida *uar_ida = &hr_dev->uar_ida;
 	struct resource *res;
-	int ret;
+	int id;
 
 	/* Using bitmap to manager UAR index */
-	ret = hns_roce_bitmap_alloc(&hr_dev->uar_table.bitmap, &uar->logic_idx);
-	if (ret)
+	id = ida_alloc_range(&uar_ida->ida, uar_ida->min, uar_ida->max,
+			     GFP_KERNEL);
+	if (id < 0) {
+		ibdev_err(&hr_dev->ib_dev, "failed to alloc uar id(%d).\n", id);
 		return -ENOMEM;
+	}
+	uar->logic_idx = (unsigned long)id;
 
 	if (uar->logic_idx > 0 && hr_dev->caps.phy_num_uars > 1)
 		uar->index = (uar->logic_idx - 1) %
@@ -102,6 +107,7 @@ int hns_roce_uar_alloc(struct hns_roce_dev *hr_dev, struct hns_roce_uar *uar)
 	if (!dev_is_pci(hr_dev->dev)) {
 		res = platform_get_resource(hr_dev->pdev, IORESOURCE_MEM, 0);
 		if (!res) {
+			ida_free(&uar_ida->ida, id);
 			dev_err(&hr_dev->pdev->dev, "memory resource not found!\n");
 			return -EINVAL;
 		}
@@ -114,22 +120,13 @@ int hns_roce_uar_alloc(struct hns_roce_dev *hr_dev, struct hns_roce_uar *uar)
 	return 0;
 }
 
-void hns_roce_uar_free(struct hns_roce_dev *hr_dev, struct hns_roce_uar *uar)
+void hns_roce_init_uar_table(struct hns_roce_dev *hr_dev)
 {
-	hns_roce_bitmap_free(&hr_dev->uar_table.bitmap, uar->logic_idx);
-}
+	struct hns_roce_ida *uar_ida = &hr_dev->uar_ida;
 
-int hns_roce_init_uar_table(struct hns_roce_dev *hr_dev)
-{
-	return hns_roce_bitmap_init(&hr_dev->uar_table.bitmap,
-				    hr_dev->caps.num_uars,
-				    hr_dev->caps.num_uars - 1,
-				    hr_dev->caps.reserved_uars, 0);
-}
-
-void hns_roce_cleanup_uar_table(struct hns_roce_dev *hr_dev)
-{
-	hns_roce_bitmap_cleanup(&hr_dev->uar_table.bitmap);
+	ida_init(&uar_ida->ida);
+	uar_ida->max = hr_dev->caps.num_uars - 1;
+	uar_ida->min = hr_dev->caps.reserved_uars;
 }
 
 static int hns_roce_xrcd_alloc(struct hns_roce_dev *hr_dev, u32 *xrcdn)
diff --git a/drivers/infiniband/hw/hns/hns_roce_qp.c b/drivers/infiniband/hw/hns/hns_roce_qp.c
index b101b7e578f2..9af4509894e6 100644
--- a/drivers/infiniband/hw/hns/hns_roce_qp.c
+++ b/drivers/infiniband/hw/hns/hns_roce_qp.c
@@ -715,7 +715,6 @@ static int alloc_rq_inline_buf(struct hns_roce_qp *hr_qp,
 	/* allocate recv inline buf */
 	wqe_list = kcalloc(wqe_cnt, sizeof(struct hns_roce_rinl_wqe),
 			   GFP_KERNEL);
-
 	if (!wqe_list)
 		goto err;
 
@@ -823,77 +822,104 @@ static inline bool kernel_qp_has_rdb(struct hns_roce_dev *hr_dev,
 		hns_roce_qp_has_rq(init_attr));
 }
 
+static int alloc_user_qp_db(struct hns_roce_dev *hr_dev,
+			    struct hns_roce_qp *hr_qp,
+			    struct ib_qp_init_attr *init_attr,
+			    struct ib_udata *udata,
+			    struct hns_roce_ib_create_qp *ucmd,
+			    struct hns_roce_ib_create_qp_resp *resp)
+{
+	struct hns_roce_ucontext *uctx = rdma_udata_to_drv_context(udata,
+		struct hns_roce_ucontext, ibucontext);
+	struct ib_device *ibdev = &hr_dev->ib_dev;
+	int ret;
+
+	if (user_qp_has_sdb(hr_dev, init_attr, udata, resp, ucmd)) {
+		ret = hns_roce_db_map_user(uctx, ucmd->sdb_addr, &hr_qp->sdb);
+		if (ret) {
+			ibdev_err(ibdev,
+				  "failed to map user SQ doorbell, ret = %d.\n",
+				  ret);
+			goto err_out;
+		}
+		hr_qp->en_flags |= HNS_ROCE_QP_CAP_SQ_RECORD_DB;
+	}
+
+	if (user_qp_has_rdb(hr_dev, init_attr, udata, resp)) {
+		ret = hns_roce_db_map_user(uctx, ucmd->db_addr, &hr_qp->rdb);
+		if (ret) {
+			ibdev_err(ibdev,
+				  "failed to map user RQ doorbell, ret = %d.\n",
+				  ret);
+			goto err_sdb;
+		}
+		hr_qp->en_flags |= HNS_ROCE_QP_CAP_RQ_RECORD_DB;
+	}
+
+	return 0;
+
+err_sdb:
+	if (hr_qp->en_flags & HNS_ROCE_QP_CAP_SQ_RECORD_DB)
+		hns_roce_db_unmap_user(uctx, &hr_qp->sdb);
+err_out:
+	return ret;
+}
+
+static int alloc_kernel_qp_db(struct hns_roce_dev *hr_dev,
+			      struct hns_roce_qp *hr_qp,
+			      struct ib_qp_init_attr *init_attr)
+{
+	struct ib_device *ibdev = &hr_dev->ib_dev;
+	int ret;
+
+	if (hr_dev->pci_dev->revision >= PCI_REVISION_ID_HIP09)
+		hr_qp->sq.db_reg = hr_dev->mem_base +
+				   HNS_ROCE_DWQE_SIZE * hr_qp->qpn;
+	else
+		hr_qp->sq.db_reg = hr_dev->reg_base + hr_dev->sdb_offset +
+				   DB_REG_OFFSET * hr_dev->priv_uar.index;
+
+	hr_qp->rq.db_reg = hr_dev->reg_base + hr_dev->odb_offset +
+			   DB_REG_OFFSET * hr_dev->priv_uar.index;
+
+	if (kernel_qp_has_rdb(hr_dev, init_attr)) {
+		ret = hns_roce_alloc_db(hr_dev, &hr_qp->rdb, 0);
+		if (ret) {
+			ibdev_err(ibdev,
+				  "failed to alloc kernel RQ doorbell, ret = %d.\n",
+				  ret);
+			return ret;
+		}
+		*hr_qp->rdb.db_record = 0;
+		hr_qp->en_flags |= HNS_ROCE_QP_CAP_RQ_RECORD_DB;
+	}
+
+	return 0;
+}
+
 static int alloc_qp_db(struct hns_roce_dev *hr_dev, struct hns_roce_qp *hr_qp,
 		       struct ib_qp_init_attr *init_attr,
 		       struct ib_udata *udata,
 		       struct hns_roce_ib_create_qp *ucmd,
 		       struct hns_roce_ib_create_qp_resp *resp)
 {
-	struct hns_roce_ucontext *uctx = rdma_udata_to_drv_context(
-		udata, struct hns_roce_ucontext, ibucontext);
-	struct ib_device *ibdev = &hr_dev->ib_dev;
 	int ret;
 
 	if (hr_dev->caps.flags & HNS_ROCE_CAP_FLAG_SDI_MODE)
 		hr_qp->en_flags |= HNS_ROCE_QP_CAP_OWNER_DB;
 
 	if (udata) {
-		if (user_qp_has_sdb(hr_dev, init_attr, udata, resp, ucmd)) {
-			ret = hns_roce_db_map_user(uctx, ucmd->sdb_addr,
-						   &hr_qp->sdb);
-			if (ret) {
-				ibdev_err(ibdev,
-					  "failed to map user SQ doorbell, ret = %d.\n",
-					  ret);
-				goto err_out;
-			}
-			hr_qp->en_flags |= HNS_ROCE_QP_CAP_SQ_RECORD_DB;
-			resp->cap_flags |= HNS_ROCE_QP_CAP_SQ_RECORD_DB;
-		}
-
-		if (user_qp_has_rdb(hr_dev, init_attr, udata, resp)) {
-			ret = hns_roce_db_map_user(uctx, ucmd->db_addr,
-						   &hr_qp->rdb);
-			if (ret) {
-				ibdev_err(ibdev,
-					  "failed to map user RQ doorbell, ret = %d.\n",
-					  ret);
-				goto err_sdb;
-			}
-			hr_qp->en_flags |= HNS_ROCE_QP_CAP_RQ_RECORD_DB;
-			resp->cap_flags |= HNS_ROCE_QP_CAP_RQ_RECORD_DB;
-		}
+		ret = alloc_user_qp_db(hr_dev, hr_qp, init_attr, udata, ucmd,
+				       resp);
+		if (ret)
+			return ret;
 	} else {
-		if (hr_dev->pci_dev->revision >= PCI_REVISION_ID_HIP09)
-			hr_qp->sq.db_reg = hr_dev->mem_base +
-					   HNS_ROCE_DWQE_SIZE * hr_qp->qpn;
-		else
-			hr_qp->sq.db_reg =
-				hr_dev->reg_base + hr_dev->sdb_offset +
-				DB_REG_OFFSET * hr_dev->priv_uar.index;
-
-		hr_qp->rq.db_reg = hr_dev->reg_base + hr_dev->odb_offset +
-				   DB_REG_OFFSET * hr_dev->priv_uar.index;
-
-		if (kernel_qp_has_rdb(hr_dev, init_attr)) {
-			ret = hns_roce_alloc_db(hr_dev, &hr_qp->rdb, 0);
-			if (ret) {
-				ibdev_err(ibdev,
-					  "failed to alloc kernel RQ doorbell, ret = %d.\n",
-					  ret);
-				goto err_out;
-			}
-			*hr_qp->rdb.db_record = 0;
-			hr_qp->en_flags |= HNS_ROCE_QP_CAP_RQ_RECORD_DB;
-		}
+		ret = alloc_kernel_qp_db(hr_dev, hr_qp, init_attr);
+		if (ret)
+			return ret;
 	}
 
 	return 0;
-err_sdb:
-	if (udata && hr_qp->en_flags & HNS_ROCE_QP_CAP_SQ_RECORD_DB)
-		hns_roce_db_unmap_user(uctx, &hr_qp->sdb);
-err_out:
-	return ret;
 }
 
 static void free_qp_db(struct hns_roce_dev *hr_dev, struct hns_roce_qp *hr_qp,
@@ -959,8 +985,6 @@ static int set_qp_param(struct hns_roce_dev *hr_dev, struct hns_roce_qp *hr_qp,
 	struct ib_device *ibdev = &hr_dev->ib_dev;
 	int ret;
 
-	hr_qp->ibqp.qp_type = init_attr->qp_type;
-
 	if (init_attr->cap.max_inline_data > hr_dev->caps.max_sq_inline)
 		init_attr->cap.max_inline_data = hr_dev->caps.max_sq_inline;
 
@@ -1073,6 +1097,7 @@ static int hns_roce_create_qp_common(struct hns_roce_dev *hr_dev,
 	}
 
 	if (udata) {
+		resp.cap_flags = hr_qp->en_flags;
 		ret = ib_copy_to_udata(udata, &resp,
 				       min(udata->outlen, sizeof(resp)));
 		if (ret) {
@@ -1121,8 +1146,6 @@ void hns_roce_qp_destroy(struct hns_roce_dev *hr_dev, struct hns_roce_qp *hr_qp,
 	free_qp_buf(hr_dev, hr_qp);
 	free_kernel_wrid(hr_qp);
 	free_qp_db(hr_dev, hr_qp, udata);
-
-	kfree(hr_qp);
 }
 
 static int check_qp_type(struct hns_roce_dev *hr_dev, enum ib_qp_type type,
@@ -1154,31 +1177,21 @@ out:
 	return -EOPNOTSUPP;
 }
 
-struct ib_qp *hns_roce_create_qp(struct ib_pd *pd,
-				 struct ib_qp_init_attr *init_attr,
-				 struct ib_udata *udata)
+int hns_roce_create_qp(struct ib_qp *qp, struct ib_qp_init_attr *init_attr,
+		       struct ib_udata *udata)
 {
-	struct ib_device *ibdev = pd ? pd->device : init_attr->xrcd->device;
+	struct ib_device *ibdev = qp->device;
 	struct hns_roce_dev *hr_dev = to_hr_dev(ibdev);
-	struct hns_roce_qp *hr_qp;
+	struct hns_roce_qp *hr_qp = to_hr_qp(qp);
+	struct ib_pd *pd = qp->pd;
 	int ret;
 
 	ret = check_qp_type(hr_dev, init_attr->qp_type, !!udata);
 	if (ret)
-		return ERR_PTR(ret);
-
-	hr_qp = kzalloc(sizeof(*hr_qp), GFP_KERNEL);
-	if (!hr_qp)
-		return ERR_PTR(-ENOMEM);
-
-	if (init_attr->qp_type == IB_QPT_XRC_INI)
-		init_attr->recv_cq = NULL;
+		return ret;
 
-	if (init_attr->qp_type == IB_QPT_XRC_TGT) {
+	if (init_attr->qp_type == IB_QPT_XRC_TGT)
 		hr_qp->xrcdn = to_hr_xrcd(init_attr->xrcd)->xrcdn;
-		init_attr->recv_cq = NULL;
-		init_attr->send_cq = NULL;
-	}
 
 	if (init_attr->qp_type == IB_QPT_GSI) {
 		hr_qp->port = init_attr->port_num - 1;
@@ -1186,15 +1199,11 @@ struct ib_qp *hns_roce_create_qp(struct ib_pd *pd,
 	}
 
 	ret = hns_roce_create_qp_common(hr_dev, pd, init_attr, udata, hr_qp);
-	if (ret) {
+	if (ret)
 		ibdev_err(ibdev, "Create QP type 0x%x failed(%d)\n",
 			  init_attr->qp_type, ret);
 
-		kfree(hr_qp);
-		return ERR_PTR(ret);
-	}
-
-	return &hr_qp->ibqp;
+	return ret;
 }
 
 int to_hr_qp_type(int qp_type)
@@ -1321,17 +1330,8 @@ int hns_roce_modify_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr,
 	if (ret)
 		goto out;
 
-	if (cur_state == new_state && cur_state == IB_QPS_RESET) {
-		if (hr_dev->hw_rev == HNS_ROCE_HW_VER1) {
-			ret = -EPERM;
-			ibdev_err(&hr_dev->ib_dev,
-				  "RST2RST state is not supported\n");
-		} else {
-			ret = 0;
-		}
-
+	if (cur_state == new_state && cur_state == IB_QPS_RESET)
 		goto out;
-	}
 
 	ret = hr_dev->hw->modify_qp(ibqp, attr, attr_mask, cur_state,
 				    new_state);
@@ -1429,12 +1429,17 @@ bool hns_roce_wq_overflow(struct hns_roce_wq *hr_wq, u32 nreq,
 	return cur + nreq >= hr_wq->wqe_cnt;
 }
 
-void hns_roce_init_qp_table(struct hns_roce_dev *hr_dev)
+int hns_roce_init_qp_table(struct hns_roce_dev *hr_dev)
 {
 	struct hns_roce_qp_table *qp_table = &hr_dev->qp_table;
 	unsigned int reserved_from_bot;
 	unsigned int i;
 
+	qp_table->idx_table.spare_idx = kcalloc(hr_dev->caps.num_qps,
+					sizeof(u32), GFP_KERNEL);
+	if (!qp_table->idx_table.spare_idx)
+		return -ENOMEM;
+
 	mutex_init(&qp_table->scc_mutex);
 	mutex_init(&qp_table->bank_mutex);
 	xa_init(&hr_dev->qp_table_xa);
@@ -1452,6 +1457,8 @@ void hns_roce_init_qp_table(struct hns_roce_dev *hr_dev)
 					       HNS_ROCE_QP_BANK_NUM - 1;
 		hr_dev->qp_table.bank[i].next = hr_dev->qp_table.bank[i].min;
 	}
+
+	return 0;
 }
 
 void hns_roce_cleanup_qp_table(struct hns_roce_dev *hr_dev)
@@ -1460,4 +1467,5 @@ void hns_roce_cleanup_qp_table(struct hns_roce_dev *hr_dev)
 
 	for (i = 0; i < HNS_ROCE_QP_BANK_NUM; i++)
 		ida_destroy(&hr_dev->qp_table.bank[i].ida);
+	kfree(hr_dev->qp_table.idx_table.spare_idx);
 }
diff --git a/drivers/infiniband/hw/hns/hns_roce_srq.c b/drivers/infiniband/hw/hns/hns_roce_srq.c
index 6f2992f443fa..6eee9deadd12 100644
--- a/drivers/infiniband/hw/hns/hns_roce_srq.c
+++ b/drivers/infiniband/hw/hns/hns_roce_srq.c
@@ -80,15 +80,19 @@ static int hns_roce_hw_destroy_srq(struct hns_roce_dev *dev,
 static int alloc_srqc(struct hns_roce_dev *hr_dev, struct hns_roce_srq *srq)
 {
 	struct hns_roce_srq_table *srq_table = &hr_dev->srq_table;
+	struct hns_roce_ida *srq_ida = &hr_dev->srq_table.srq_ida;
 	struct ib_device *ibdev = &hr_dev->ib_dev;
 	struct hns_roce_cmd_mailbox *mailbox;
 	int ret;
+	int id;
 
-	ret = hns_roce_bitmap_alloc(&srq_table->bitmap, &srq->srqn);
-	if (ret) {
-		ibdev_err(ibdev, "failed to alloc SRQ number.\n");
+	id = ida_alloc_range(&srq_ida->ida, srq_ida->min, srq_ida->max,
+			     GFP_KERNEL);
+	if (id < 0) {
+		ibdev_err(ibdev, "failed to alloc srq(%d).\n", id);
 		return -ENOMEM;
 	}
+	srq->srqn = (unsigned long)id;
 
 	ret = hns_roce_table_get(hr_dev, &srq_table->table, srq->srqn);
 	if (ret) {
@@ -132,7 +136,7 @@ err_xa:
 err_put:
 	hns_roce_table_put(hr_dev, &srq_table->table, srq->srqn);
 err_out:
-	hns_roce_bitmap_free(&srq_table->bitmap, srq->srqn);
+	ida_free(&srq_ida->ida, id);
 
 	return ret;
 }
@@ -154,7 +158,7 @@ static void free_srqc(struct hns_roce_dev *hr_dev, struct hns_roce_srq *srq)
 	wait_for_completion(&srq->free);
 
 	hns_roce_table_put(hr_dev, &srq_table->table, srq->srqn);
-	hns_roce_bitmap_free(&srq_table->bitmap, srq->srqn);
+	ida_free(&srq_table->srq_ida.ida, (int)srq->srqn);
 }
 
 static int alloc_srq_idx(struct hns_roce_dev *hr_dev, struct hns_roce_srq *srq,
@@ -440,18 +444,14 @@ int hns_roce_destroy_srq(struct ib_srq *ibsrq, struct ib_udata *udata)
 	return 0;
 }
 
-int hns_roce_init_srq_table(struct hns_roce_dev *hr_dev)
+void hns_roce_init_srq_table(struct hns_roce_dev *hr_dev)
 {
 	struct hns_roce_srq_table *srq_table = &hr_dev->srq_table;
+	struct hns_roce_ida *srq_ida = &srq_table->srq_ida;
 
 	xa_init(&srq_table->xa);
 
-	return hns_roce_bitmap_init(&srq_table->bitmap, hr_dev->caps.num_srqs,
-				    hr_dev->caps.num_srqs - 1,
-				    hr_dev->caps.reserved_srqs, 0);
-}
-
-void hns_roce_cleanup_srq_table(struct hns_roce_dev *hr_dev)
-{
-	hns_roce_bitmap_cleanup(&hr_dev->srq_table.bitmap);
+	ida_init(&srq_ida->ida);
+	srq_ida->max = hr_dev->caps.num_srqs - 1;
+	srq_ida->min = hr_dev->caps.reserved_srqs;
 }
diff --git a/drivers/infiniband/hw/irdma/protos.h b/drivers/infiniband/hw/irdma/protos.h
index e3f5173706fe..78f598fdbccf 100644
--- a/drivers/infiniband/hw/irdma/protos.h
+++ b/drivers/infiniband/hw/irdma/protos.h
@@ -71,8 +71,6 @@ void irdma_qp_rem_qos(struct irdma_sc_qp *qp);
 struct irdma_sc_qp *irdma_get_qp_from_list(struct list_head *head,
 					   struct irdma_sc_qp *qp);
 void irdma_reinitialize_ieq(struct irdma_sc_vsi *vsi);
-u16 irdma_alloc_ws_node_id(struct irdma_sc_dev *dev);
-void irdma_free_ws_node_id(struct irdma_sc_dev *dev, u16 node_id);
 /* terminate functions*/
 void irdma_terminate_send_fin(struct irdma_sc_qp *qp);
 
diff --git a/drivers/infiniband/hw/irdma/utils.c b/drivers/infiniband/hw/irdma/utils.c
index 5bbe44e54f9a..e94470991fe0 100644
--- a/drivers/infiniband/hw/irdma/utils.c
+++ b/drivers/infiniband/hw/irdma/utils.c
@@ -1141,10 +1141,7 @@ void irdma_free_qp_rsrc(struct irdma_qp *iwqp)
 			  iwqp->kqp.dma_mem.va, iwqp->kqp.dma_mem.pa);
 	iwqp->kqp.dma_mem.va = NULL;
 	kfree(iwqp->kqp.sq_wrid_mem);
-	iwqp->kqp.sq_wrid_mem = NULL;
 	kfree(iwqp->kqp.rq_wrid_mem);
-	iwqp->kqp.rq_wrid_mem = NULL;
-	kfree(iwqp);
 }
 
 /**
diff --git a/drivers/infiniband/hw/irdma/verbs.c b/drivers/infiniband/hw/irdma/verbs.c
index 717147ed0519..4fc323402073 100644
--- a/drivers/infiniband/hw/irdma/verbs.c
+++ b/drivers/infiniband/hw/irdma/verbs.c
@@ -790,18 +790,19 @@ static int irdma_validate_qp_attrs(struct ib_qp_init_attr *init_attr,
 
 /**
  * irdma_create_qp - create qp
- * @ibpd: ptr of pd
+ * @ibqp: ptr of qp
  * @init_attr: attributes for qp
  * @udata: user data for create qp
  */
-static struct ib_qp *irdma_create_qp(struct ib_pd *ibpd,
-				     struct ib_qp_init_attr *init_attr,
-				     struct ib_udata *udata)
+static int irdma_create_qp(struct ib_qp *ibqp,
+			   struct ib_qp_init_attr *init_attr,
+			   struct ib_udata *udata)
 {
+	struct ib_pd *ibpd = ibqp->pd;
 	struct irdma_pd *iwpd = to_iwpd(ibpd);
 	struct irdma_device *iwdev = to_iwdev(ibpd->device);
 	struct irdma_pci_f *rf = iwdev->rf;
-	struct irdma_qp *iwqp;
+	struct irdma_qp *iwqp = to_iwqp(ibqp);
 	struct irdma_create_qp_req req;
 	struct irdma_create_qp_resp uresp = {};
 	u32 qp_num = 0;
@@ -818,7 +819,7 @@ static struct ib_qp *irdma_create_qp(struct ib_pd *ibpd,
 
 	err_code = irdma_validate_qp_attrs(init_attr, iwdev);
 	if (err_code)
-		return ERR_PTR(err_code);
+		return err_code;
 
 	sq_size = init_attr->cap.max_send_wr;
 	rq_size = init_attr->cap.max_recv_wr;
@@ -831,10 +832,6 @@ static struct ib_qp *irdma_create_qp(struct ib_pd *ibpd,
 	init_info.qp_uk_init_info.max_rq_frag_cnt = init_attr->cap.max_recv_sge;
 	init_info.qp_uk_init_info.max_inline_data = init_attr->cap.max_inline_data;
 
-	iwqp = kzalloc(sizeof(*iwqp), GFP_KERNEL);
-	if (!iwqp)
-		return ERR_PTR(-ENOMEM);
-
 	qp = &iwqp->sc_qp;
 	qp->qp_uk.back_qp = iwqp;
 	qp->qp_uk.lock = &iwqp->lock;
@@ -847,10 +844,8 @@ static struct ib_qp *irdma_create_qp(struct ib_pd *ibpd,
 						 iwqp->q2_ctx_mem.size,
 						 &iwqp->q2_ctx_mem.pa,
 						 GFP_KERNEL);
-	if (!iwqp->q2_ctx_mem.va) {
-		err_code = -ENOMEM;
-		goto error;
-	}
+	if (!iwqp->q2_ctx_mem.va)
+		return -ENOMEM;
 
 	init_info.q2 = iwqp->q2_ctx_mem.va;
 	init_info.q2_pa = iwqp->q2_ctx_mem.pa;
@@ -999,17 +994,16 @@ static struct ib_qp *irdma_create_qp(struct ib_pd *ibpd,
 		if (err_code) {
 			ibdev_dbg(&iwdev->ibdev, "VERBS: copy_to_udata failed\n");
 			irdma_destroy_qp(&iwqp->ibqp, udata);
-			return ERR_PTR(err_code);
+			return err_code;
 		}
 	}
 
 	init_completion(&iwqp->free_qp);
-	return &iwqp->ibqp;
+	return 0;
 
 error:
 	irdma_free_qp_rsrc(iwqp);
-
-	return ERR_PTR(err_code);
+	return err_code;
 }
 
 static int irdma_get_ib_acc_flags(struct irdma_qp *iwqp)
@@ -2235,7 +2229,7 @@ static void irdma_copy_user_pgaddrs(struct irdma_mr *iwmr, u64 *pbl,
 	pinfo = (level == PBLE_LEVEL_1) ? NULL : palloc->level2.leaf;
 
 	if (iwmr->type == IRDMA_MEMREG_TYPE_QP)
-		iwpbl->qp_mr.sq_page = sg_page(region->sg_head.sgl);
+		iwpbl->qp_mr.sq_page = sg_page(region->sgt_append.sgt.sgl);
 
 	rdma_umem_for_each_dma_block(region, &biter, iwmr->page_size) {
 		*pbl = rdma_block_iter_dma_address(&biter);
@@ -4404,6 +4398,7 @@ static const struct ib_device_ops irdma_dev_ops = {
 	INIT_RDMA_OBJ_SIZE(ib_ah, irdma_ah, ibah),
 	INIT_RDMA_OBJ_SIZE(ib_cq, irdma_cq, ibcq),
 	INIT_RDMA_OBJ_SIZE(ib_mw, irdma_mr, ibmw),
+	INIT_RDMA_OBJ_SIZE(ib_qp, irdma_qp, ibqp),
 };
 
 /**
diff --git a/drivers/infiniband/hw/mlx4/doorbell.c b/drivers/infiniband/hw/mlx4/doorbell.c
index d41f03ccb0e1..9bbd695a9fd5 100644
--- a/drivers/infiniband/hw/mlx4/doorbell.c
+++ b/drivers/infiniband/hw/mlx4/doorbell.c
@@ -75,7 +75,8 @@ int mlx4_ib_db_map_user(struct ib_udata *udata, unsigned long virt,
 	list_add(&page->list, &context->db_page_list);
 
 found:
-	db->dma = sg_dma_address(page->umem->sg_head.sgl) + (virt & ~PAGE_MASK);
+	db->dma = sg_dma_address(page->umem->sgt_append.sgt.sgl) +
+		  (virt & ~PAGE_MASK);
 	db->u.user_page = page;
 	++page->refcnt;
 
diff --git a/drivers/infiniband/hw/mlx4/main.c b/drivers/infiniband/hw/mlx4/main.c
index ae4c91b612ce..f367f4a4abff 100644
--- a/drivers/infiniband/hw/mlx4/main.c
+++ b/drivers/infiniband/hw/mlx4/main.c
@@ -2577,6 +2577,7 @@ static const struct ib_device_ops mlx4_ib_dev_ops = {
 	INIT_RDMA_OBJ_SIZE(ib_ah, mlx4_ib_ah, ibah),
 	INIT_RDMA_OBJ_SIZE(ib_cq, mlx4_ib_cq, ibcq),
 	INIT_RDMA_OBJ_SIZE(ib_pd, mlx4_ib_pd, ibpd),
+	INIT_RDMA_OBJ_SIZE(ib_qp, mlx4_ib_qp, ibqp),
 	INIT_RDMA_OBJ_SIZE(ib_srq, mlx4_ib_srq, ibsrq),
 	INIT_RDMA_OBJ_SIZE(ib_ucontext, mlx4_ib_ucontext, ibucontext),
 };
diff --git a/drivers/infiniband/hw/mlx4/mlx4_ib.h b/drivers/infiniband/hw/mlx4/mlx4_ib.h
index e856cf23a0a1..c60f6e9ac640 100644
--- a/drivers/infiniband/hw/mlx4/mlx4_ib.h
+++ b/drivers/infiniband/hw/mlx4/mlx4_ib.h
@@ -792,9 +792,8 @@ void mlx4_ib_free_srq_wqe(struct mlx4_ib_srq *srq, int wqe_index);
 int mlx4_ib_post_srq_recv(struct ib_srq *ibsrq, const struct ib_recv_wr *wr,
 			  const struct ib_recv_wr **bad_wr);
 
-struct ib_qp *mlx4_ib_create_qp(struct ib_pd *pd,
-				struct ib_qp_init_attr *init_attr,
-				struct ib_udata *udata);
+int mlx4_ib_create_qp(struct ib_qp *qp, struct ib_qp_init_attr *init_attr,
+		      struct ib_udata *udata);
 int mlx4_ib_destroy_qp(struct ib_qp *qp, struct ib_udata *udata);
 void mlx4_ib_drain_sq(struct ib_qp *qp);
 void mlx4_ib_drain_rq(struct ib_qp *qp);
diff --git a/drivers/infiniband/hw/mlx4/mr.c b/drivers/infiniband/hw/mlx4/mr.c
index 50becc0e4b62..04a67b481608 100644
--- a/drivers/infiniband/hw/mlx4/mr.c
+++ b/drivers/infiniband/hw/mlx4/mr.c
@@ -200,7 +200,7 @@ int mlx4_ib_umem_write_mtt(struct mlx4_ib_dev *dev, struct mlx4_mtt *mtt,
 	mtt_shift = mtt->page_shift;
 	mtt_size = 1ULL << mtt_shift;
 
-	for_each_sg(umem->sg_head.sgl, sg, umem->nmap, i) {
+	for_each_sgtable_dma_sg(&umem->sgt_append.sgt, sg, i) {
 		if (cur_start_addr + len == sg_dma_address(sg)) {
 			/* still the same block */
 			len += sg_dma_len(sg);
@@ -273,7 +273,7 @@ int mlx4_ib_umem_calc_optimal_mtt_size(struct ib_umem *umem, u64 start_va,
 
 	*num_of_mtts = ib_umem_num_dma_blocks(umem, PAGE_SIZE);
 
-	for_each_sg(umem->sg_head.sgl, sg, umem->nmap, i) {
+	for_each_sgtable_dma_sg(&umem->sgt_append.sgt, sg, i) {
 		/*
 		 * Initialization - save the first chunk start as the
 		 * current_block_start - block means contiguous pages.
diff --git a/drivers/infiniband/hw/mlx4/qp.c b/drivers/infiniband/hw/mlx4/qp.c
index 4a2ef7daaded..8662f462e2a5 100644
--- a/drivers/infiniband/hw/mlx4/qp.c
+++ b/drivers/infiniband/hw/mlx4/qp.c
@@ -1578,24 +1578,19 @@ static int _mlx4_ib_create_qp(struct ib_pd *pd, struct mlx4_ib_qp *qp,
 	return 0;
 }
 
-struct ib_qp *mlx4_ib_create_qp(struct ib_pd *pd,
-				struct ib_qp_init_attr *init_attr,
-				struct ib_udata *udata) {
-	struct ib_device *device = pd ? pd->device : init_attr->xrcd->device;
+int mlx4_ib_create_qp(struct ib_qp *ibqp, struct ib_qp_init_attr *init_attr,
+		      struct ib_udata *udata)
+{
+	struct ib_device *device = ibqp->device;
 	struct mlx4_ib_dev *dev = to_mdev(device);
-	struct mlx4_ib_qp *qp;
+	struct mlx4_ib_qp *qp = to_mqp(ibqp);
+	struct ib_pd *pd = ibqp->pd;
 	int ret;
 
-	qp = kzalloc(sizeof(*qp), GFP_KERNEL);
-	if (!qp)
-		return ERR_PTR(-ENOMEM);
-
 	mutex_init(&qp->mutex);
 	ret = _mlx4_ib_create_qp(pd, qp, init_attr, udata);
-	if (ret) {
-		kfree(qp);
-		return ERR_PTR(ret);
-	}
+	if (ret)
+		return ret;
 
 	if (init_attr->qp_type == IB_QPT_GSI &&
 	    !(init_attr->create_flags & MLX4_IB_QP_CREATE_ROCE_V2_GSI)) {
@@ -1618,7 +1613,7 @@ struct ib_qp *mlx4_ib_create_qp(struct ib_pd *pd,
 			init_attr->create_flags &= ~MLX4_IB_QP_CREATE_ROCE_V2_GSI;
 		}
 	}
-	return &qp->ibqp;
+	return 0;
 }
 
 static int _mlx4_ib_destroy_qp(struct ib_qp *qp, struct ib_udata *udata)
@@ -1646,8 +1641,6 @@ static int _mlx4_ib_destroy_qp(struct ib_qp *qp, struct ib_udata *udata)
 	}
 
 	kfree(mqp->sqp);
-	kfree(mqp);
-
 	return 0;
 }
 
diff --git a/drivers/infiniband/hw/mlx5/doorbell.c b/drivers/infiniband/hw/mlx5/doorbell.c
index 9ca2e61807ec..6398e2f48579 100644
--- a/drivers/infiniband/hw/mlx5/doorbell.c
+++ b/drivers/infiniband/hw/mlx5/doorbell.c
@@ -78,7 +78,8 @@ int mlx5_ib_db_map_user(struct mlx5_ib_ucontext *context, unsigned long virt,
 	list_add(&page->list, &context->db_page_list);
 
 found:
-	db->dma = sg_dma_address(page->umem->sg_head.sgl) + (virt & ~PAGE_MASK);
+	db->dma = sg_dma_address(page->umem->sgt_append.sgt.sgl) +
+		  (virt & ~PAGE_MASK);
 	db->u.user_page = page;
 	++page->refcnt;
 
diff --git a/drivers/infiniband/hw/mlx5/gsi.c b/drivers/infiniband/hw/mlx5/gsi.c
index 7fcad9135276..3ad8f637c589 100644
--- a/drivers/infiniband/hw/mlx5/gsi.c
+++ b/drivers/infiniband/hw/mlx5/gsi.c
@@ -116,8 +116,6 @@ int mlx5_ib_create_gsi(struct ib_pd *pd, struct mlx5_ib_qp *mqp,
 		goto err_free_tx;
 	}
 
-	mutex_lock(&dev->devr.mutex);
-
 	if (dev->devr.ports[port_num - 1].gsi) {
 		mlx5_ib_warn(dev, "GSI QP already exists on port %d\n",
 			     port_num);
@@ -147,35 +145,20 @@ int mlx5_ib_create_gsi(struct ib_pd *pd, struct mlx5_ib_qp *mqp,
 		hw_init_attr.cap.max_inline_data = 0;
 	}
 
-	gsi->rx_qp = mlx5_ib_create_qp(pd, &hw_init_attr, NULL);
+	gsi->rx_qp = ib_create_qp(pd, &hw_init_attr);
 	if (IS_ERR(gsi->rx_qp)) {
 		mlx5_ib_warn(dev, "unable to create hardware GSI QP. error %ld\n",
 			     PTR_ERR(gsi->rx_qp));
 		ret = PTR_ERR(gsi->rx_qp);
 		goto err_destroy_cq;
 	}
-	gsi->rx_qp->device = pd->device;
-	gsi->rx_qp->pd = pd;
-	gsi->rx_qp->real_qp = gsi->rx_qp;
-
-	gsi->rx_qp->qp_type = hw_init_attr.qp_type;
-	gsi->rx_qp->send_cq = hw_init_attr.send_cq;
-	gsi->rx_qp->recv_cq = hw_init_attr.recv_cq;
-	gsi->rx_qp->event_handler = hw_init_attr.event_handler;
-	spin_lock_init(&gsi->rx_qp->mr_lock);
-	INIT_LIST_HEAD(&gsi->rx_qp->rdma_mrs);
-	INIT_LIST_HEAD(&gsi->rx_qp->sig_mrs);
 
 	dev->devr.ports[attr->port_num - 1].gsi = gsi;
-
-	mutex_unlock(&dev->devr.mutex);
-
 	return 0;
 
 err_destroy_cq:
 	ib_free_cq(gsi->cq);
 err_free_wrs:
-	mutex_unlock(&dev->devr.mutex);
 	kfree(gsi->outstanding_wrs);
 err_free_tx:
 	kfree(gsi->tx_qps);
@@ -190,16 +173,13 @@ int mlx5_ib_destroy_gsi(struct mlx5_ib_qp *mqp)
 	int qp_index;
 	int ret;
 
-	mutex_lock(&dev->devr.mutex);
-	ret = mlx5_ib_destroy_qp(gsi->rx_qp, NULL);
+	ret = ib_destroy_qp(gsi->rx_qp);
 	if (ret) {
 		mlx5_ib_warn(dev, "unable to destroy hardware GSI QP. error %d\n",
 			     ret);
-		mutex_unlock(&dev->devr.mutex);
 		return ret;
 	}
 	dev->devr.ports[port_num - 1].gsi = NULL;
-	mutex_unlock(&dev->devr.mutex);
 	gsi->rx_qp = NULL;
 
 	for (qp_index = 0; qp_index < gsi->num_qps; ++qp_index) {
@@ -213,8 +193,6 @@ int mlx5_ib_destroy_gsi(struct mlx5_ib_qp *mqp)
 
 	kfree(gsi->outstanding_wrs);
 	kfree(gsi->tx_qps);
-	kfree(mqp);
-
 	return 0;
 }
 
@@ -339,23 +317,13 @@ err_destroy_qp:
 	WARN_ON_ONCE(qp);
 }
 
-static void setup_qps(struct mlx5_ib_gsi_qp *gsi)
-{
-	struct mlx5_ib_dev *dev = to_mdev(gsi->rx_qp->device);
-	u16 qp_index;
-
-	mutex_lock(&dev->devr.mutex);
-	for (qp_index = 0; qp_index < gsi->num_qps; ++qp_index)
-		setup_qp(gsi, qp_index);
-	mutex_unlock(&dev->devr.mutex);
-}
-
 int mlx5_ib_gsi_modify_qp(struct ib_qp *qp, struct ib_qp_attr *attr,
 			  int attr_mask)
 {
 	struct mlx5_ib_dev *dev = to_mdev(qp->device);
 	struct mlx5_ib_qp *mqp = to_mqp(qp);
 	struct mlx5_ib_gsi_qp *gsi = &mqp->gsi;
+	u16 qp_index;
 	int ret;
 
 	mlx5_ib_dbg(dev, "modifying GSI QP to state %d\n", attr->qp_state);
@@ -366,8 +334,11 @@ int mlx5_ib_gsi_modify_qp(struct ib_qp *qp, struct ib_qp_attr *attr,
 		return ret;
 	}
 
-	if (to_mqp(gsi->rx_qp)->state == IB_QPS_RTS)
-		setup_qps(gsi);
+	if (to_mqp(gsi->rx_qp)->state != IB_QPS_RTS)
+		return 0;
+
+	for (qp_index = 0; qp_index < gsi->num_qps; ++qp_index)
+		setup_qp(gsi, qp_index);
 	return 0;
 }
 
@@ -511,8 +482,8 @@ int mlx5_ib_gsi_post_recv(struct ib_qp *qp, const struct ib_recv_wr *wr,
 
 void mlx5_ib_gsi_pkey_change(struct mlx5_ib_gsi_qp *gsi)
 {
-	if (!gsi)
-		return;
+	u16 qp_index;
 
-	setup_qps(gsi);
+	for (qp_index = 0; qp_index < gsi->num_qps; ++qp_index)
+		setup_qp(gsi, qp_index);
 }
diff --git a/drivers/infiniband/hw/mlx5/main.c b/drivers/infiniband/hw/mlx5/main.c
index 466f0a521940..8664bcf6d3f5 100644
--- a/drivers/infiniband/hw/mlx5/main.c
+++ b/drivers/infiniband/hw/mlx5/main.c
@@ -1184,6 +1184,16 @@ static int mlx5_ib_query_device(struct ib_device *ibdev,
 				MLX5_IB_TUNNELED_OFFLOADS_MPLS_UDP;
 	}
 
+	if (offsetofend(typeof(resp), dci_streams_caps) <= uhw_outlen) {
+		resp.response_length += sizeof(resp.dci_streams_caps);
+
+		resp.dci_streams_caps.max_log_num_concurent =
+			MLX5_CAP_GEN(mdev, log_max_dci_stream_channels);
+
+		resp.dci_streams_caps.max_log_num_errored =
+			MLX5_CAP_GEN(mdev, log_max_dci_errored_streams);
+	}
+
 	if (uhw_outlen) {
 		err = ib_copy_to_udata(uhw, &resp, resp.response_length);
 
@@ -2501,6 +2511,13 @@ static void pkey_change_handler(struct work_struct *work)
 		container_of(work, struct mlx5_ib_port_resources,
 			     pkey_change_work);
 
+	if (!ports->gsi)
+		/*
+		 * We got this event before device was fully configured
+		 * and MAD registration code wasn't called/finished yet.
+		 */
+		return;
+
 	mlx5_ib_gsi_pkey_change(ports->gsi);
 }
 
@@ -2795,33 +2812,16 @@ static int mlx5_ib_dev_res_init(struct mlx5_ib_dev *dev)
 	if (!MLX5_CAP_GEN(dev->mdev, xrc))
 		return -EOPNOTSUPP;
 
-	mutex_init(&devr->mutex);
-
-	devr->p0 = rdma_zalloc_drv_obj(ibdev, ib_pd);
-	if (!devr->p0)
-		return -ENOMEM;
-
-	devr->p0->device  = ibdev;
-	devr->p0->uobject = NULL;
-	atomic_set(&devr->p0->usecnt, 0);
-
-	ret = mlx5_ib_alloc_pd(devr->p0, NULL);
-	if (ret)
-		goto error0;
+	devr->p0 = ib_alloc_pd(ibdev, 0);
+	if (IS_ERR(devr->p0))
+		return PTR_ERR(devr->p0);
 
-	devr->c0 = rdma_zalloc_drv_obj(ibdev, ib_cq);
-	if (!devr->c0) {
-		ret = -ENOMEM;
+	devr->c0 = ib_create_cq(ibdev, NULL, NULL, NULL, &cq_attr);
+	if (IS_ERR(devr->c0)) {
+		ret = PTR_ERR(devr->c0);
 		goto error1;
 	}
 
-	devr->c0->device = &dev->ib_dev;
-	atomic_set(&devr->c0->usecnt, 0);
-
-	ret = mlx5_ib_create_cq(devr->c0, &cq_attr, NULL);
-	if (ret)
-		goto err_create_cq;
-
 	ret = mlx5_cmd_xrcd_alloc(dev->mdev, &devr->xrcdn0, 0);
 	if (ret)
 		goto error2;
@@ -2836,45 +2836,22 @@ static int mlx5_ib_dev_res_init(struct mlx5_ib_dev *dev)
 	attr.srq_type = IB_SRQT_XRC;
 	attr.ext.cq = devr->c0;
 
-	devr->s0 = rdma_zalloc_drv_obj(ibdev, ib_srq);
-	if (!devr->s0) {
-		ret = -ENOMEM;
-		goto error4;
-	}
-
-	devr->s0->device	= &dev->ib_dev;
-	devr->s0->pd		= devr->p0;
-	devr->s0->srq_type      = IB_SRQT_XRC;
-	devr->s0->ext.cq	= devr->c0;
-	ret = mlx5_ib_create_srq(devr->s0, &attr, NULL);
-	if (ret)
+	devr->s0 = ib_create_srq(devr->p0, &attr);
+	if (IS_ERR(devr->s0)) {
+		ret = PTR_ERR(devr->s0);
 		goto err_create;
-
-	atomic_inc(&devr->s0->ext.cq->usecnt);
-	atomic_inc(&devr->p0->usecnt);
-	atomic_set(&devr->s0->usecnt, 0);
+	}
 
 	memset(&attr, 0, sizeof(attr));
 	attr.attr.max_sge = 1;
 	attr.attr.max_wr = 1;
 	attr.srq_type = IB_SRQT_BASIC;
-	devr->s1 = rdma_zalloc_drv_obj(ibdev, ib_srq);
-	if (!devr->s1) {
-		ret = -ENOMEM;
-		goto error5;
-	}
-
-	devr->s1->device	= &dev->ib_dev;
-	devr->s1->pd		= devr->p0;
-	devr->s1->srq_type      = IB_SRQT_BASIC;
-	devr->s1->ext.cq	= devr->c0;
 
-	ret = mlx5_ib_create_srq(devr->s1, &attr, NULL);
-	if (ret)
+	devr->s1 = ib_create_srq(devr->p0, &attr);
+	if (IS_ERR(devr->s1)) {
+		ret = PTR_ERR(devr->s1);
 		goto error6;
-
-	atomic_inc(&devr->p0->usecnt);
-	atomic_set(&devr->s1->usecnt, 0);
+	}
 
 	for (port = 0; port < ARRAY_SIZE(devr->ports); ++port)
 		INIT_WORK(&devr->ports[port].pkey_change_work,
@@ -2883,23 +2860,15 @@ static int mlx5_ib_dev_res_init(struct mlx5_ib_dev *dev)
 	return 0;
 
 error6:
-	kfree(devr->s1);
-error5:
-	mlx5_ib_destroy_srq(devr->s0, NULL);
+	ib_destroy_srq(devr->s0);
 err_create:
-	kfree(devr->s0);
-error4:
 	mlx5_cmd_xrcd_dealloc(dev->mdev, devr->xrcdn1, 0);
 error3:
 	mlx5_cmd_xrcd_dealloc(dev->mdev, devr->xrcdn0, 0);
 error2:
-	mlx5_ib_destroy_cq(devr->c0, NULL);
-err_create_cq:
-	kfree(devr->c0);
+	ib_destroy_cq(devr->c0);
 error1:
-	mlx5_ib_dealloc_pd(devr->p0, NULL);
-error0:
-	kfree(devr->p0);
+	ib_dealloc_pd(devr->p0);
 	return ret;
 }
 
@@ -2908,20 +2877,21 @@ static void mlx5_ib_dev_res_cleanup(struct mlx5_ib_dev *dev)
 	struct mlx5_ib_resources *devr = &dev->devr;
 	int port;
 
-	mlx5_ib_destroy_srq(devr->s1, NULL);
-	kfree(devr->s1);
-	mlx5_ib_destroy_srq(devr->s0, NULL);
-	kfree(devr->s0);
-	mlx5_cmd_xrcd_dealloc(dev->mdev, devr->xrcdn1, 0);
-	mlx5_cmd_xrcd_dealloc(dev->mdev, devr->xrcdn0, 0);
-	mlx5_ib_destroy_cq(devr->c0, NULL);
-	kfree(devr->c0);
-	mlx5_ib_dealloc_pd(devr->p0, NULL);
-	kfree(devr->p0);
-
-	/* Make sure no change P_Key work items are still executing */
+	/*
+	 * Make sure no change P_Key work items are still executing.
+	 *
+	 * At this stage, the mlx5_ib_event should be unregistered
+	 * and it ensures that no new works are added.
+	 */
 	for (port = 0; port < ARRAY_SIZE(devr->ports); ++port)
 		cancel_work_sync(&devr->ports[port].pkey_change_work);
+
+	ib_destroy_srq(devr->s1);
+	ib_destroy_srq(devr->s0);
+	mlx5_cmd_xrcd_dealloc(dev->mdev, devr->xrcdn1, 0);
+	mlx5_cmd_xrcd_dealloc(dev->mdev, devr->xrcdn0, 0);
+	ib_destroy_cq(devr->c0);
+	ib_dealloc_pd(devr->p0);
 }
 
 static u32 get_core_cap_flags(struct ib_device *ibdev,
@@ -3799,6 +3769,7 @@ static const struct ib_device_ops mlx5_ib_dev_ops = {
 	INIT_RDMA_OBJ_SIZE(ib_counters, mlx5_ib_mcounters, ibcntrs),
 	INIT_RDMA_OBJ_SIZE(ib_cq, mlx5_ib_cq, ibcq),
 	INIT_RDMA_OBJ_SIZE(ib_pd, mlx5_ib_pd, ibpd),
+	INIT_RDMA_OBJ_SIZE(ib_qp, mlx5_ib_qp, ibqp),
 	INIT_RDMA_OBJ_SIZE(ib_srq, mlx5_ib_srq, ibsrq),
 	INIT_RDMA_OBJ_SIZE(ib_ucontext, mlx5_ib_ucontext, ibucontext),
 };
@@ -4061,7 +4032,7 @@ static void mlx5_ib_stage_pre_ib_reg_umr_cleanup(struct mlx5_ib_dev *dev)
 		mlx5_ib_warn(dev, "mr cache cleanup failed\n");
 
 	if (dev->umrc.qp)
-		mlx5_ib_destroy_qp(dev->umrc.qp, NULL);
+		ib_destroy_qp(dev->umrc.qp);
 	if (dev->umrc.cq)
 		ib_free_cq(dev->umrc.cq);
 	if (dev->umrc.pd)
@@ -4114,23 +4085,17 @@ static int mlx5_ib_stage_post_ib_reg_umr_init(struct mlx5_ib_dev *dev)
 	init_attr->cap.max_send_sge = 1;
 	init_attr->qp_type = MLX5_IB_QPT_REG_UMR;
 	init_attr->port_num = 1;
-	qp = mlx5_ib_create_qp(pd, init_attr, NULL);
+	qp = ib_create_qp(pd, init_attr);
 	if (IS_ERR(qp)) {
 		mlx5_ib_dbg(dev, "Couldn't create sync UMR QP\n");
 		ret = PTR_ERR(qp);
 		goto error_3;
 	}
-	qp->device     = &dev->ib_dev;
-	qp->real_qp    = qp;
-	qp->uobject    = NULL;
-	qp->qp_type    = MLX5_IB_QPT_REG_UMR;
-	qp->send_cq    = init_attr->send_cq;
-	qp->recv_cq    = init_attr->recv_cq;
 
 	attr->qp_state = IB_QPS_INIT;
 	attr->port_num = 1;
-	ret = mlx5_ib_modify_qp(qp, attr, IB_QP_STATE | IB_QP_PKEY_INDEX |
-				IB_QP_PORT, NULL);
+	ret = ib_modify_qp(qp, attr,
+			   IB_QP_STATE | IB_QP_PKEY_INDEX | IB_QP_PORT);
 	if (ret) {
 		mlx5_ib_dbg(dev, "Couldn't modify UMR QP\n");
 		goto error_4;
@@ -4140,7 +4105,7 @@ static int mlx5_ib_stage_post_ib_reg_umr_init(struct mlx5_ib_dev *dev)
 	attr->qp_state = IB_QPS_RTR;
 	attr->path_mtu = IB_MTU_256;
 
-	ret = mlx5_ib_modify_qp(qp, attr, IB_QP_STATE, NULL);
+	ret = ib_modify_qp(qp, attr, IB_QP_STATE);
 	if (ret) {
 		mlx5_ib_dbg(dev, "Couldn't modify umr QP to rtr\n");
 		goto error_4;
@@ -4148,7 +4113,7 @@ static int mlx5_ib_stage_post_ib_reg_umr_init(struct mlx5_ib_dev *dev)
 
 	memset(attr, 0, sizeof(*attr));
 	attr->qp_state = IB_QPS_RTS;
-	ret = mlx5_ib_modify_qp(qp, attr, IB_QP_STATE, NULL);
+	ret = ib_modify_qp(qp, attr, IB_QP_STATE);
 	if (ret) {
 		mlx5_ib_dbg(dev, "Couldn't modify umr QP to rts\n");
 		goto error_4;
@@ -4171,7 +4136,7 @@ static int mlx5_ib_stage_post_ib_reg_umr_init(struct mlx5_ib_dev *dev)
 	return 0;
 
 error_4:
-	mlx5_ib_destroy_qp(qp, NULL);
+	ib_destroy_qp(qp);
 	dev->umrc.qp = NULL;
 
 error_3:
diff --git a/drivers/infiniband/hw/mlx5/mlx5_ib.h b/drivers/infiniband/hw/mlx5/mlx5_ib.h
index 585fb00bdce8..bf20a388eabe 100644
--- a/drivers/infiniband/hw/mlx5/mlx5_ib.h
+++ b/drivers/infiniband/hw/mlx5/mlx5_ib.h
@@ -795,8 +795,6 @@ struct mlx5_ib_resources {
 	struct ib_srq	*s0;
 	struct ib_srq	*s1;
 	struct mlx5_ib_port_resources ports[2];
-	/* Protects changes to the port resources */
-	struct mutex	mutex;
 };
 
 struct mlx5_ib_counters {
@@ -1221,9 +1219,8 @@ int mlx5_ib_post_srq_recv(struct ib_srq *ibsrq, const struct ib_recv_wr *wr,
 			  const struct ib_recv_wr **bad_wr);
 int mlx5_ib_enable_lb(struct mlx5_ib_dev *dev, bool td, bool qp);
 void mlx5_ib_disable_lb(struct mlx5_ib_dev *dev, bool td, bool qp);
-struct ib_qp *mlx5_ib_create_qp(struct ib_pd *pd,
-				struct ib_qp_init_attr *init_attr,
-				struct ib_udata *udata);
+int mlx5_ib_create_qp(struct ib_qp *qp, struct ib_qp_init_attr *init_attr,
+		      struct ib_udata *udata);
 int mlx5_ib_modify_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr,
 		      int attr_mask, struct ib_udata *udata);
 int mlx5_ib_query_qp(struct ib_qp *ibqp, struct ib_qp_attr *qp_attr, int qp_attr_mask,
diff --git a/drivers/infiniband/hw/mlx5/mr.c b/drivers/infiniband/hw/mlx5/mr.c
index 3f1c5a4f158b..3be36ebbf67a 100644
--- a/drivers/infiniband/hw/mlx5/mr.c
+++ b/drivers/infiniband/hw/mlx5/mr.c
@@ -995,7 +995,7 @@ static struct mlx5_ib_mr *alloc_cacheable_mr(struct ib_pd *pd,
 static void *mlx5_ib_alloc_xlt(size_t *nents, size_t ent_size, gfp_t gfp_mask)
 {
 	const size_t xlt_chunk_align =
-		MLX5_UMR_MTT_ALIGNMENT / sizeof(ent_size);
+		MLX5_UMR_MTT_ALIGNMENT / ent_size;
 	size_t size;
 	void *res = NULL;
 
@@ -1024,7 +1024,7 @@ static void *mlx5_ib_alloc_xlt(size_t *nents, size_t ent_size, gfp_t gfp_mask)
 
 	if (size > MLX5_SPARE_UMR_CHUNK) {
 		size = MLX5_SPARE_UMR_CHUNK;
-		*nents = get_order(size) / ent_size;
+		*nents = size / ent_size;
 		res = (void *)__get_free_pages(gfp_mask | __GFP_NOWARN,
 					       get_order(size));
 		if (res)
@@ -1226,7 +1226,8 @@ int mlx5_ib_update_mr_pas(struct mlx5_ib_mr *mr, unsigned int flags)
 	orig_sg_length = sg.length;
 
 	cur_mtt = mtt;
-	rdma_for_each_block (mr->umem->sg_head.sgl, &biter, mr->umem->nmap,
+	rdma_for_each_block (mr->umem->sgt_append.sgt.sgl, &biter,
+			     mr->umem->sgt_append.sgt.nents,
 			     BIT(mr->page_shift)) {
 		if (cur_mtt == (void *)mtt + sg.length) {
 			dma_sync_single_for_device(ddev, sg.addr, sg.length,
diff --git a/drivers/infiniband/hw/mlx5/qp.c b/drivers/infiniband/hw/mlx5/qp.c
index a77db29f8391..b2fca110346c 100644
--- a/drivers/infiniband/hw/mlx5/qp.c
+++ b/drivers/infiniband/hw/mlx5/qp.c
@@ -1906,7 +1906,6 @@ static int get_atomic_mode(struct mlx5_ib_dev *dev,
 static int create_xrc_tgt_qp(struct mlx5_ib_dev *dev, struct mlx5_ib_qp *qp,
 			     struct mlx5_create_qp_params *params)
 {
-	struct mlx5_ib_create_qp *ucmd = params->ucmd;
 	struct ib_qp_init_attr *attr = params->attr;
 	u32 uidx = params->uidx;
 	struct mlx5_ib_resources *devr = &dev->devr;
@@ -1926,8 +1925,6 @@ static int create_xrc_tgt_qp(struct mlx5_ib_dev *dev, struct mlx5_ib_qp *qp,
 	if (!in)
 		return -ENOMEM;
 
-	if (MLX5_CAP_GEN(mdev, ece_support) && ucmd)
-		MLX5_SET(create_qp_in, in, ece, ucmd->ece_options);
 	qpc = MLX5_ADDR_OF(create_qp_in, in, qpc);
 
 	MLX5_SET(qpc, qpc, st, MLX5_QP_ST_XRC);
@@ -1982,6 +1979,167 @@ static int create_xrc_tgt_qp(struct mlx5_ib_dev *dev, struct mlx5_ib_qp *qp,
 	return 0;
 }
 
+static int create_dci(struct mlx5_ib_dev *dev, struct ib_pd *pd,
+		      struct mlx5_ib_qp *qp,
+		      struct mlx5_create_qp_params *params)
+{
+	struct ib_qp_init_attr *init_attr = params->attr;
+	struct mlx5_ib_create_qp *ucmd = params->ucmd;
+	u32 out[MLX5_ST_SZ_DW(create_qp_out)] = {};
+	struct ib_udata *udata = params->udata;
+	u32 uidx = params->uidx;
+	struct mlx5_ib_resources *devr = &dev->devr;
+	int inlen = MLX5_ST_SZ_BYTES(create_qp_in);
+	struct mlx5_core_dev *mdev = dev->mdev;
+	struct mlx5_ib_cq *send_cq;
+	struct mlx5_ib_cq *recv_cq;
+	unsigned long flags;
+	struct mlx5_ib_qp_base *base;
+	int ts_format;
+	int mlx5_st;
+	void *qpc;
+	u32 *in;
+	int err;
+
+	spin_lock_init(&qp->sq.lock);
+	spin_lock_init(&qp->rq.lock);
+
+	mlx5_st = to_mlx5_st(qp->type);
+	if (mlx5_st < 0)
+		return -EINVAL;
+
+	if (init_attr->sq_sig_type == IB_SIGNAL_ALL_WR)
+		qp->sq_signal_bits = MLX5_WQE_CTRL_CQ_UPDATE;
+
+	base = &qp->trans_qp.base;
+
+	qp->has_rq = qp_has_rq(init_attr);
+	err = set_rq_size(dev, &init_attr->cap, qp->has_rq, qp, ucmd);
+	if (err) {
+		mlx5_ib_dbg(dev, "err %d\n", err);
+		return err;
+	}
+
+	if (ucmd->rq_wqe_shift != qp->rq.wqe_shift ||
+	    ucmd->rq_wqe_count != qp->rq.wqe_cnt)
+		return -EINVAL;
+
+	if (ucmd->sq_wqe_count > (1 << MLX5_CAP_GEN(mdev, log_max_qp_sz)))
+		return -EINVAL;
+
+	ts_format = get_qp_ts_format(dev, to_mcq(init_attr->send_cq),
+				     to_mcq(init_attr->recv_cq));
+
+	if (ts_format < 0)
+		return ts_format;
+
+	err = _create_user_qp(dev, pd, qp, udata, init_attr, &in, &params->resp,
+			      &inlen, base, ucmd);
+	if (err)
+		return err;
+
+	if (MLX5_CAP_GEN(mdev, ece_support))
+		MLX5_SET(create_qp_in, in, ece, ucmd->ece_options);
+	qpc = MLX5_ADDR_OF(create_qp_in, in, qpc);
+
+	MLX5_SET(qpc, qpc, st, mlx5_st);
+	MLX5_SET(qpc, qpc, pm_state, MLX5_QP_PM_MIGRATED);
+	MLX5_SET(qpc, qpc, pd, to_mpd(pd)->pdn);
+
+	if (qp->flags_en & MLX5_QP_FLAG_SIGNATURE)
+		MLX5_SET(qpc, qpc, wq_signature, 1);
+
+	if (qp->flags & IB_QP_CREATE_CROSS_CHANNEL)
+		MLX5_SET(qpc, qpc, cd_master, 1);
+	if (qp->flags & IB_QP_CREATE_MANAGED_SEND)
+		MLX5_SET(qpc, qpc, cd_slave_send, 1);
+	if (qp->flags_en & MLX5_QP_FLAG_SCATTER_CQE)
+		configure_requester_scat_cqe(dev, qp, init_attr, qpc);
+
+	if (qp->rq.wqe_cnt) {
+		MLX5_SET(qpc, qpc, log_rq_stride, qp->rq.wqe_shift - 4);
+		MLX5_SET(qpc, qpc, log_rq_size, ilog2(qp->rq.wqe_cnt));
+	}
+
+	if (qp->flags_en & MLX5_QP_FLAG_DCI_STREAM) {
+		MLX5_SET(qpc, qpc, log_num_dci_stream_channels,
+			 ucmd->dci_streams.log_num_concurent);
+		MLX5_SET(qpc, qpc, log_num_dci_errored_streams,
+			 ucmd->dci_streams.log_num_errored);
+	}
+
+	MLX5_SET(qpc, qpc, ts_format, ts_format);
+	MLX5_SET(qpc, qpc, rq_type, get_rx_type(qp, init_attr));
+
+	MLX5_SET(qpc, qpc, log_sq_size, ilog2(qp->sq.wqe_cnt));
+
+	/* Set default resources */
+	if (init_attr->srq) {
+		MLX5_SET(qpc, qpc, xrcd, devr->xrcdn0);
+		MLX5_SET(qpc, qpc, srqn_rmpn_xrqn,
+			 to_msrq(init_attr->srq)->msrq.srqn);
+	} else {
+		MLX5_SET(qpc, qpc, xrcd, devr->xrcdn1);
+		MLX5_SET(qpc, qpc, srqn_rmpn_xrqn,
+			 to_msrq(devr->s1)->msrq.srqn);
+	}
+
+	if (init_attr->send_cq)
+		MLX5_SET(qpc, qpc, cqn_snd,
+			 to_mcq(init_attr->send_cq)->mcq.cqn);
+
+	if (init_attr->recv_cq)
+		MLX5_SET(qpc, qpc, cqn_rcv,
+			 to_mcq(init_attr->recv_cq)->mcq.cqn);
+
+	MLX5_SET64(qpc, qpc, dbr_addr, qp->db.dma);
+
+	/* 0xffffff means we ask to work with cqe version 0 */
+	if (MLX5_CAP_GEN(mdev, cqe_version) == MLX5_CQE_VERSION_V1)
+		MLX5_SET(qpc, qpc, user_index, uidx);
+
+	if (qp->flags & IB_QP_CREATE_PCI_WRITE_END_PADDING) {
+		MLX5_SET(qpc, qpc, end_padding_mode,
+			 MLX5_WQ_END_PAD_MODE_ALIGN);
+		/* Special case to clean flag */
+		qp->flags &= ~IB_QP_CREATE_PCI_WRITE_END_PADDING;
+	}
+
+	err = mlx5_qpc_create_qp(dev, &base->mqp, in, inlen, out);
+
+	kvfree(in);
+	if (err)
+		goto err_create;
+
+	base->container_mibqp = qp;
+	base->mqp.event = mlx5_ib_qp_event;
+	if (MLX5_CAP_GEN(mdev, ece_support))
+		params->resp.ece_options = MLX5_GET(create_qp_out, out, ece);
+
+	get_cqs(qp->type, init_attr->send_cq, init_attr->recv_cq,
+		&send_cq, &recv_cq);
+	spin_lock_irqsave(&dev->reset_flow_resource_lock, flags);
+	mlx5_ib_lock_cqs(send_cq, recv_cq);
+	/* Maintain device to QPs access, needed for further handling via reset
+	 * flow
+	 */
+	list_add_tail(&qp->qps_list, &dev->qp_list);
+	/* Maintain CQ to QPs access, needed for further handling via reset flow
+	 */
+	if (send_cq)
+		list_add_tail(&qp->cq_send_list, &send_cq->list_send_qp);
+	if (recv_cq)
+		list_add_tail(&qp->cq_recv_list, &recv_cq->list_recv_qp);
+	mlx5_ib_unlock_cqs(send_cq, recv_cq);
+	spin_unlock_irqrestore(&dev->reset_flow_resource_lock, flags);
+
+	return 0;
+
+err_create:
+	destroy_qp(dev, qp, base, udata);
+	return err;
+}
+
 static int create_user_qp(struct mlx5_ib_dev *dev, struct ib_pd *pd,
 			  struct mlx5_ib_qp *qp,
 			  struct mlx5_create_qp_params *params)
@@ -2512,7 +2670,6 @@ static int create_dct(struct mlx5_ib_dev *dev, struct ib_pd *pd,
 	}
 
 	qp->state = IB_QPS_RESET;
-	rdma_restrack_no_track(&qp->ibqp.res);
 	return 0;
 }
 
@@ -2653,6 +2810,9 @@ static int process_vendor_flags(struct mlx5_ib_dev *dev, struct mlx5_ib_qp *qp,
 
 	process_vendor_flag(dev, &flags, MLX5_QP_FLAG_TYPE_DCI, true, qp);
 	process_vendor_flag(dev, &flags, MLX5_QP_FLAG_TYPE_DCT, true, qp);
+	process_vendor_flag(dev, &flags, MLX5_QP_FLAG_DCI_STREAM,
+			    MLX5_CAP_GEN(mdev, log_max_dci_stream_channels),
+			    qp);
 
 	process_vendor_flag(dev, &flags, MLX5_QP_FLAG_SIGNATURE, true, qp);
 	process_vendor_flag(dev, &flags, MLX5_QP_FLAG_SCATTER_CQE,
@@ -2847,6 +3007,10 @@ static int create_qp(struct mlx5_ib_dev *dev, struct ib_pd *pd,
 	switch (qp->type) {
 	case MLX5_IB_QPT_DCT:
 		err = create_dct(dev, pd, qp, params);
+		rdma_restrack_no_track(&qp->ibqp.res);
+		break;
+	case MLX5_IB_QPT_DCI:
+		err = create_dci(dev, pd, qp, params);
 		break;
 	case IB_QPT_XRC_TGT:
 		err = create_xrc_tgt_qp(dev, qp, params);
@@ -2854,6 +3018,10 @@ static int create_qp(struct mlx5_ib_dev *dev, struct ib_pd *pd,
 	case IB_QPT_GSI:
 		err = mlx5_ib_create_gsi(pd, qp, params->attr);
 		break;
+	case MLX5_IB_QPT_HW_GSI:
+	case MLX5_IB_QPT_REG_UMR:
+		rdma_restrack_no_track(&qp->ibqp.res);
+		fallthrough;
 	default:
 		if (params->udata)
 			err = create_user_qp(dev, pd, qp, params);
@@ -2942,7 +3110,6 @@ static int mlx5_ib_destroy_dct(struct mlx5_ib_qp *mqp)
 	}
 
 	kfree(mqp->dct.in);
-	kfree(mqp);
 	return 0;
 }
 
@@ -2980,25 +3147,23 @@ static int check_ucmd_data(struct mlx5_ib_dev *dev,
 	return ret ? 0 : -EINVAL;
 }
 
-struct ib_qp *mlx5_ib_create_qp(struct ib_pd *pd, struct ib_qp_init_attr *attr,
-				struct ib_udata *udata)
+int mlx5_ib_create_qp(struct ib_qp *ibqp, struct ib_qp_init_attr *attr,
+		      struct ib_udata *udata)
 {
 	struct mlx5_create_qp_params params = {};
-	struct mlx5_ib_dev *dev;
-	struct mlx5_ib_qp *qp;
+	struct mlx5_ib_dev *dev = to_mdev(ibqp->device);
+	struct mlx5_ib_qp *qp = to_mqp(ibqp);
+	struct ib_pd *pd = ibqp->pd;
 	enum ib_qp_type type;
 	int err;
 
-	dev = pd ? to_mdev(pd->device) :
-		   to_mdev(to_mxrcd(attr->xrcd)->ibxrcd.device);
-
 	err = check_qp_type(dev, attr, &type);
 	if (err)
-		return ERR_PTR(err);
+		return err;
 
 	err = check_valid_flow(dev, pd, attr, udata);
 	if (err)
-		return ERR_PTR(err);
+		return err;
 
 	params.udata = udata;
 	params.uidx = MLX5_IB_DEFAULT_UIDX;
@@ -3008,49 +3173,43 @@ struct ib_qp *mlx5_ib_create_qp(struct ib_pd *pd, struct ib_qp_init_attr *attr,
 	if (udata) {
 		err = process_udata_size(dev, &params);
 		if (err)
-			return ERR_PTR(err);
+			return err;
 
 		err = check_ucmd_data(dev, &params);
 		if (err)
-			return ERR_PTR(err);
+			return err;
 
 		params.ucmd = kzalloc(params.ucmd_size, GFP_KERNEL);
 		if (!params.ucmd)
-			return ERR_PTR(-ENOMEM);
+			return -ENOMEM;
 
 		err = ib_copy_from_udata(params.ucmd, udata, params.inlen);
 		if (err)
 			goto free_ucmd;
 	}
 
-	qp = kzalloc(sizeof(*qp), GFP_KERNEL);
-	if (!qp) {
-		err = -ENOMEM;
-		goto free_ucmd;
-	}
-
 	mutex_init(&qp->mutex);
 	qp->type = type;
 	if (udata) {
 		err = process_vendor_flags(dev, qp, params.ucmd, attr);
 		if (err)
-			goto free_qp;
+			goto free_ucmd;
 
 		err = get_qp_uidx(qp, &params);
 		if (err)
-			goto free_qp;
+			goto free_ucmd;
 	}
 	err = process_create_flags(dev, qp, attr);
 	if (err)
-		goto free_qp;
+		goto free_ucmd;
 
 	err = check_qp_attr(dev, qp, attr);
 	if (err)
-		goto free_qp;
+		goto free_ucmd;
 
 	err = create_qp(dev, pd, qp, &params);
 	if (err)
-		goto free_qp;
+		goto free_ucmd;
 
 	kfree(params.ucmd);
 	params.ucmd = NULL;
@@ -3065,7 +3224,7 @@ struct ib_qp *mlx5_ib_create_qp(struct ib_pd *pd, struct ib_qp_init_attr *attr,
 	if (err)
 		goto destroy_qp;
 
-	return &qp->ibqp;
+	return 0;
 
 destroy_qp:
 	switch (qp->type) {
@@ -3076,22 +3235,12 @@ destroy_qp:
 		mlx5_ib_destroy_gsi(qp);
 		break;
 	default:
-		/*
-		 * These lines below are temp solution till QP allocation
-		 * will be moved to be under IB/core responsiblity.
-		 */
-		qp->ibqp.send_cq = attr->send_cq;
-		qp->ibqp.recv_cq = attr->recv_cq;
-		qp->ibqp.pd = pd;
 		destroy_qp_common(dev, qp, udata);
 	}
 
-	qp = NULL;
-free_qp:
-	kfree(qp);
 free_ucmd:
 	kfree(params.ucmd);
-	return ERR_PTR(err);
+	return err;
 }
 
 int mlx5_ib_destroy_qp(struct ib_qp *qp, struct ib_udata *udata)
@@ -3106,9 +3255,6 @@ int mlx5_ib_destroy_qp(struct ib_qp *qp, struct ib_udata *udata)
 		return mlx5_ib_destroy_dct(mqp);
 
 	destroy_qp_common(dev, mqp, udata);
-
-	kfree(mqp);
-
 	return 0;
 }
 
diff --git a/drivers/infiniband/hw/mthca/mthca_eq.c b/drivers/infiniband/hw/mthca/mthca_eq.c
index 2cdf686203c1..97287c544da8 100644
--- a/drivers/infiniband/hw/mthca/mthca_eq.c
+++ b/drivers/infiniband/hw/mthca/mthca_eq.c
@@ -617,9 +617,9 @@ static void mthca_free_eq(struct mthca_dev *dev,
 
 	mthca_free_mr(dev, &eq->mr);
 	for (i = 0; i < npages; ++i)
-		pci_free_consistent(dev->pdev, PAGE_SIZE,
-				    eq->page_list[i].buf,
-				    dma_unmap_addr(&eq->page_list[i], mapping));
+		dma_free_coherent(&dev->pdev->dev, PAGE_SIZE,
+				  eq->page_list[i].buf,
+				  dma_unmap_addr(&eq->page_list[i], mapping));
 
 	kfree(eq->page_list);
 	mthca_free_mailbox(dev, mailbox);
@@ -739,17 +739,18 @@ int mthca_map_eq_icm(struct mthca_dev *dev, u64 icm_virt)
 	dev->eq_table.icm_page = alloc_page(GFP_HIGHUSER);
 	if (!dev->eq_table.icm_page)
 		return -ENOMEM;
-	dev->eq_table.icm_dma  = pci_map_page(dev->pdev, dev->eq_table.icm_page, 0,
-					      PAGE_SIZE, PCI_DMA_BIDIRECTIONAL);
-	if (pci_dma_mapping_error(dev->pdev, dev->eq_table.icm_dma)) {
+	dev->eq_table.icm_dma =
+		dma_map_page(&dev->pdev->dev, dev->eq_table.icm_page, 0,
+			     PAGE_SIZE, DMA_BIDIRECTIONAL);
+	if (dma_mapping_error(&dev->pdev->dev, dev->eq_table.icm_dma)) {
 		__free_page(dev->eq_table.icm_page);
 		return -ENOMEM;
 	}
 
 	ret = mthca_MAP_ICM_page(dev, dev->eq_table.icm_dma, icm_virt);
 	if (ret) {
-		pci_unmap_page(dev->pdev, dev->eq_table.icm_dma, PAGE_SIZE,
-			       PCI_DMA_BIDIRECTIONAL);
+		dma_unmap_page(&dev->pdev->dev, dev->eq_table.icm_dma,
+			       PAGE_SIZE, DMA_BIDIRECTIONAL);
 		__free_page(dev->eq_table.icm_page);
 	}
 
@@ -759,8 +760,8 @@ int mthca_map_eq_icm(struct mthca_dev *dev, u64 icm_virt)
 void mthca_unmap_eq_icm(struct mthca_dev *dev)
 {
 	mthca_UNMAP_ICM(dev, dev->eq_table.icm_virt, 1);
-	pci_unmap_page(dev->pdev, dev->eq_table.icm_dma, PAGE_SIZE,
-		       PCI_DMA_BIDIRECTIONAL);
+	dma_unmap_page(&dev->pdev->dev, dev->eq_table.icm_dma, PAGE_SIZE,
+		       DMA_BIDIRECTIONAL);
 	__free_page(dev->eq_table.icm_page);
 }
 
diff --git a/drivers/infiniband/hw/mthca/mthca_main.c b/drivers/infiniband/hw/mthca/mthca_main.c
index fe9654a7af71..f507c4cd46d3 100644
--- a/drivers/infiniband/hw/mthca/mthca_main.c
+++ b/drivers/infiniband/hw/mthca/mthca_main.c
@@ -937,26 +937,15 @@ static int __mthca_init_one(struct pci_dev *pdev, int hca_type)
 
 	pci_set_master(pdev);
 
-	err = pci_set_dma_mask(pdev, DMA_BIT_MASK(64));
+	err = dma_set_mask_and_coherent(&pdev->dev, DMA_BIT_MASK(64));
 	if (err) {
 		dev_warn(&pdev->dev, "Warning: couldn't set 64-bit PCI DMA mask.\n");
-		err = pci_set_dma_mask(pdev, DMA_BIT_MASK(32));
+		err = dma_set_mask_and_coherent(&pdev->dev, DMA_BIT_MASK(32));
 		if (err) {
 			dev_err(&pdev->dev, "Can't set PCI DMA mask, aborting.\n");
 			goto err_free_res;
 		}
 	}
-	err = pci_set_consistent_dma_mask(pdev, DMA_BIT_MASK(64));
-	if (err) {
-		dev_warn(&pdev->dev, "Warning: couldn't set 64-bit "
-			 "consistent PCI DMA mask.\n");
-		err = pci_set_consistent_dma_mask(pdev, DMA_BIT_MASK(32));
-		if (err) {
-			dev_err(&pdev->dev, "Can't set consistent PCI DMA mask, "
-				"aborting.\n");
-			goto err_free_res;
-		}
-	}
 
 	/* We can handle large RDMA requests, so allow larger segments. */
 	dma_set_max_seg_size(&pdev->dev, 1024 * 1024 * 1024);
diff --git a/drivers/infiniband/hw/mthca/mthca_memfree.c b/drivers/infiniband/hw/mthca/mthca_memfree.c
index fa808582b08b..f2734a5c5f26 100644
--- a/drivers/infiniband/hw/mthca/mthca_memfree.c
+++ b/drivers/infiniband/hw/mthca/mthca_memfree.c
@@ -66,8 +66,8 @@ static void mthca_free_icm_pages(struct mthca_dev *dev, struct mthca_icm_chunk *
 	int i;
 
 	if (chunk->nsg > 0)
-		pci_unmap_sg(dev->pdev, chunk->mem, chunk->npages,
-			     PCI_DMA_BIDIRECTIONAL);
+		dma_unmap_sg(&dev->pdev->dev, chunk->mem, chunk->npages,
+			     DMA_BIDIRECTIONAL);
 
 	for (i = 0; i < chunk->npages; ++i)
 		__free_pages(sg_page(&chunk->mem[i]),
@@ -184,9 +184,10 @@ struct mthca_icm *mthca_alloc_icm(struct mthca_dev *dev, int npages,
 			if (coherent)
 				++chunk->nsg;
 			else if (chunk->npages == MTHCA_ICM_CHUNK_LEN) {
-				chunk->nsg = pci_map_sg(dev->pdev, chunk->mem,
-							chunk->npages,
-							PCI_DMA_BIDIRECTIONAL);
+				chunk->nsg =
+					dma_map_sg(&dev->pdev->dev, chunk->mem,
+						   chunk->npages,
+						   DMA_BIDIRECTIONAL);
 
 				if (chunk->nsg <= 0)
 					goto fail;
@@ -204,9 +205,8 @@ struct mthca_icm *mthca_alloc_icm(struct mthca_dev *dev, int npages,
 	}
 
 	if (!coherent && chunk) {
-		chunk->nsg = pci_map_sg(dev->pdev, chunk->mem,
-					chunk->npages,
-					PCI_DMA_BIDIRECTIONAL);
+		chunk->nsg = dma_map_sg(&dev->pdev->dev, chunk->mem,
+					chunk->npages, DMA_BIDIRECTIONAL);
 
 		if (chunk->nsg <= 0)
 			goto fail;
@@ -480,7 +480,8 @@ int mthca_map_user_db(struct mthca_dev *dev, struct mthca_uar *uar,
 	sg_set_page(&db_tab->page[i].mem, pages[0], MTHCA_ICM_PAGE_SIZE,
 			uaddr & ~PAGE_MASK);
 
-	ret = pci_map_sg(dev->pdev, &db_tab->page[i].mem, 1, PCI_DMA_TODEVICE);
+	ret = dma_map_sg(&dev->pdev->dev, &db_tab->page[i].mem, 1,
+			 DMA_TO_DEVICE);
 	if (ret < 0) {
 		unpin_user_page(pages[0]);
 		goto out;
@@ -489,7 +490,8 @@ int mthca_map_user_db(struct mthca_dev *dev, struct mthca_uar *uar,
 	ret = mthca_MAP_ICM_page(dev, sg_dma_address(&db_tab->page[i].mem),
 				 mthca_uarc_virt(dev, uar, i));
 	if (ret) {
-		pci_unmap_sg(dev->pdev, &db_tab->page[i].mem, 1, PCI_DMA_TODEVICE);
+		dma_unmap_sg(&dev->pdev->dev, &db_tab->page[i].mem, 1,
+			     DMA_TO_DEVICE);
 		unpin_user_page(sg_page(&db_tab->page[i].mem));
 		goto out;
 	}
@@ -555,7 +557,8 @@ void mthca_cleanup_user_db_tab(struct mthca_dev *dev, struct mthca_uar *uar,
 	for (i = 0; i < dev->uar_table.uarc_size / MTHCA_ICM_PAGE_SIZE; ++i) {
 		if (db_tab->page[i].uvirt) {
 			mthca_UNMAP_ICM(dev, mthca_uarc_virt(dev, uar, i), 1);
-			pci_unmap_sg(dev->pdev, &db_tab->page[i].mem, 1, PCI_DMA_TODEVICE);
+			dma_unmap_sg(&dev->pdev->dev, &db_tab->page[i].mem, 1,
+				     DMA_TO_DEVICE);
 			unpin_user_page(sg_page(&db_tab->page[i].mem));
 		}
 	}
diff --git a/drivers/infiniband/hw/mthca/mthca_provider.c b/drivers/infiniband/hw/mthca/mthca_provider.c
index adf4fcf0fee4..ceee23ebc0f2 100644
--- a/drivers/infiniband/hw/mthca/mthca_provider.c
+++ b/drivers/infiniband/hw/mthca/mthca_provider.c
@@ -459,52 +459,45 @@ static int mthca_destroy_srq(struct ib_srq *srq, struct ib_udata *udata)
 	return 0;
 }
 
-static struct ib_qp *mthca_create_qp(struct ib_pd *pd,
-				     struct ib_qp_init_attr *init_attr,
-				     struct ib_udata *udata)
+static int mthca_create_qp(struct ib_qp *ibqp,
+			   struct ib_qp_init_attr *init_attr,
+			   struct ib_udata *udata)
 {
 	struct mthca_ucontext *context = rdma_udata_to_drv_context(
 		udata, struct mthca_ucontext, ibucontext);
 	struct mthca_create_qp ucmd;
-	struct mthca_qp *qp;
+	struct mthca_qp *qp = to_mqp(ibqp);
+	struct mthca_dev *dev = to_mdev(ibqp->device);
 	int err;
 
 	if (init_attr->create_flags)
-		return ERR_PTR(-EOPNOTSUPP);
+		return -EOPNOTSUPP;
 
 	switch (init_attr->qp_type) {
 	case IB_QPT_RC:
 	case IB_QPT_UC:
 	case IB_QPT_UD:
 	{
-		qp = kzalloc(sizeof(*qp), GFP_KERNEL);
-		if (!qp)
-			return ERR_PTR(-ENOMEM);
-
 		if (udata) {
-			if (ib_copy_from_udata(&ucmd, udata, sizeof ucmd)) {
-				kfree(qp);
-				return ERR_PTR(-EFAULT);
-			}
+			if (ib_copy_from_udata(&ucmd, udata, sizeof(ucmd)))
+				return -EFAULT;
 
-			err = mthca_map_user_db(to_mdev(pd->device), &context->uar,
+			err = mthca_map_user_db(dev, &context->uar,
 						context->db_tab,
-						ucmd.sq_db_index, ucmd.sq_db_page);
-			if (err) {
-				kfree(qp);
-				return ERR_PTR(err);
-			}
+						ucmd.sq_db_index,
+						ucmd.sq_db_page);
+			if (err)
+				return err;
 
-			err = mthca_map_user_db(to_mdev(pd->device), &context->uar,
+			err = mthca_map_user_db(dev, &context->uar,
 						context->db_tab,
-						ucmd.rq_db_index, ucmd.rq_db_page);
+						ucmd.rq_db_index,
+						ucmd.rq_db_page);
 			if (err) {
-				mthca_unmap_user_db(to_mdev(pd->device),
-						    &context->uar,
+				mthca_unmap_user_db(dev, &context->uar,
 						    context->db_tab,
 						    ucmd.sq_db_index);
-				kfree(qp);
-				return ERR_PTR(err);
+				return err;
 			}
 
 			qp->mr.ibmr.lkey = ucmd.lkey;
@@ -512,20 +505,16 @@ static struct ib_qp *mthca_create_qp(struct ib_pd *pd,
 			qp->rq.db_index  = ucmd.rq_db_index;
 		}
 
-		err = mthca_alloc_qp(to_mdev(pd->device), to_mpd(pd),
+		err = mthca_alloc_qp(dev, to_mpd(ibqp->pd),
 				     to_mcq(init_attr->send_cq),
 				     to_mcq(init_attr->recv_cq),
 				     init_attr->qp_type, init_attr->sq_sig_type,
 				     &init_attr->cap, qp, udata);
 
 		if (err && udata) {
-			mthca_unmap_user_db(to_mdev(pd->device),
-					    &context->uar,
-					    context->db_tab,
+			mthca_unmap_user_db(dev, &context->uar, context->db_tab,
 					    ucmd.sq_db_index);
-			mthca_unmap_user_db(to_mdev(pd->device),
-					    &context->uar,
-					    context->db_tab,
+			mthca_unmap_user_db(dev, &context->uar, context->db_tab,
 					    ucmd.rq_db_index);
 		}
 
@@ -535,34 +524,28 @@ static struct ib_qp *mthca_create_qp(struct ib_pd *pd,
 	case IB_QPT_SMI:
 	case IB_QPT_GSI:
 	{
-		qp = kzalloc(sizeof(*qp), GFP_KERNEL);
-		if (!qp)
-			return ERR_PTR(-ENOMEM);
 		qp->sqp = kzalloc(sizeof(struct mthca_sqp), GFP_KERNEL);
-		if (!qp->sqp) {
-			kfree(qp);
-			return ERR_PTR(-ENOMEM);
-		}
+		if (!qp->sqp)
+			return -ENOMEM;
 
 		qp->ibqp.qp_num = init_attr->qp_type == IB_QPT_SMI ? 0 : 1;
 
-		err = mthca_alloc_sqp(to_mdev(pd->device), to_mpd(pd),
+		err = mthca_alloc_sqp(dev, to_mpd(ibqp->pd),
 				      to_mcq(init_attr->send_cq),
 				      to_mcq(init_attr->recv_cq),
 				      init_attr->sq_sig_type, &init_attr->cap,
-				      qp->ibqp.qp_num, init_attr->port_num,
-				      qp, udata);
+				      qp->ibqp.qp_num, init_attr->port_num, qp,
+				      udata);
 		break;
 	}
 	default:
 		/* Don't support raw QPs */
-		return ERR_PTR(-EOPNOTSUPP);
+		return -EOPNOTSUPP;
 	}
 
 	if (err) {
 		kfree(qp->sqp);
-		kfree(qp);
-		return ERR_PTR(err);
+		return err;
 	}
 
 	init_attr->cap.max_send_wr     = qp->sq.max;
@@ -571,7 +554,7 @@ static struct ib_qp *mthca_create_qp(struct ib_pd *pd,
 	init_attr->cap.max_recv_sge    = qp->rq.max_gs;
 	init_attr->cap.max_inline_data = qp->max_inline_data;
 
-	return &qp->ibqp;
+	return 0;
 }
 
 static int mthca_destroy_qp(struct ib_qp *qp, struct ib_udata *udata)
@@ -594,7 +577,6 @@ static int mthca_destroy_qp(struct ib_qp *qp, struct ib_udata *udata)
 	}
 	mthca_free_qp(to_mdev(qp->device), to_mqp(qp));
 	kfree(to_mqp(qp)->sqp);
-	kfree(to_mqp(qp));
 	return 0;
 }
 
@@ -1121,6 +1103,7 @@ static const struct ib_device_ops mthca_dev_ops = {
 	INIT_RDMA_OBJ_SIZE(ib_ah, mthca_ah, ibah),
 	INIT_RDMA_OBJ_SIZE(ib_cq, mthca_cq, ibcq),
 	INIT_RDMA_OBJ_SIZE(ib_pd, mthca_pd, ibpd),
+	INIT_RDMA_OBJ_SIZE(ib_qp, mthca_qp, ibqp),
 	INIT_RDMA_OBJ_SIZE(ib_ucontext, mthca_ucontext, ibucontext),
 };
 
diff --git a/drivers/infiniband/hw/ocrdma/ocrdma_main.c b/drivers/infiniband/hw/ocrdma/ocrdma_main.c
index f329db0c591f..7abf6cf1e937 100644
--- a/drivers/infiniband/hw/ocrdma/ocrdma_main.c
+++ b/drivers/infiniband/hw/ocrdma/ocrdma_main.c
@@ -185,6 +185,7 @@ static const struct ib_device_ops ocrdma_dev_ops = {
 	INIT_RDMA_OBJ_SIZE(ib_ah, ocrdma_ah, ibah),
 	INIT_RDMA_OBJ_SIZE(ib_cq, ocrdma_cq, ibcq),
 	INIT_RDMA_OBJ_SIZE(ib_pd, ocrdma_pd, ibpd),
+	INIT_RDMA_OBJ_SIZE(ib_qp, ocrdma_qp, ibqp),
 	INIT_RDMA_OBJ_SIZE(ib_ucontext, ocrdma_ucontext, ibucontext),
 };
 
diff --git a/drivers/infiniband/hw/ocrdma/ocrdma_verbs.c b/drivers/infiniband/hw/ocrdma/ocrdma_verbs.c
index 58619ce64d0d..735123d0e9ec 100644
--- a/drivers/infiniband/hw/ocrdma/ocrdma_verbs.c
+++ b/drivers/infiniband/hw/ocrdma/ocrdma_verbs.c
@@ -1288,19 +1288,19 @@ static void ocrdma_store_gsi_qp_cq(struct ocrdma_dev *dev,
 	}
 }
 
-struct ib_qp *ocrdma_create_qp(struct ib_pd *ibpd,
-			       struct ib_qp_init_attr *attrs,
-			       struct ib_udata *udata)
+int ocrdma_create_qp(struct ib_qp *ibqp, struct ib_qp_init_attr *attrs,
+		     struct ib_udata *udata)
 {
 	int status;
+	struct ib_pd *ibpd = ibqp->pd;
 	struct ocrdma_pd *pd = get_ocrdma_pd(ibpd);
-	struct ocrdma_qp *qp;
-	struct ocrdma_dev *dev = get_ocrdma_dev(ibpd->device);
+	struct ocrdma_qp *qp = get_ocrdma_qp(ibqp);
+	struct ocrdma_dev *dev = get_ocrdma_dev(ibqp->device);
 	struct ocrdma_create_qp_ureq ureq;
 	u16 dpp_credit_lmt, dpp_offset;
 
 	if (attrs->create_flags)
-		return ERR_PTR(-EOPNOTSUPP);
+		return -EOPNOTSUPP;
 
 	status = ocrdma_check_qp_params(ibpd, dev, attrs, udata);
 	if (status)
@@ -1309,12 +1309,7 @@ struct ib_qp *ocrdma_create_qp(struct ib_pd *ibpd,
 	memset(&ureq, 0, sizeof(ureq));
 	if (udata) {
 		if (ib_copy_from_udata(&ureq, udata, sizeof(ureq)))
-			return ERR_PTR(-EFAULT);
-	}
-	qp = kzalloc(sizeof(*qp), GFP_KERNEL);
-	if (!qp) {
-		status = -ENOMEM;
-		goto gen_err;
+			return -EFAULT;
 	}
 	ocrdma_set_qp_init_params(qp, pd, attrs);
 	if (udata == NULL)
@@ -1349,7 +1344,7 @@ struct ib_qp *ocrdma_create_qp(struct ib_pd *ibpd,
 	ocrdma_store_gsi_qp_cq(dev, attrs);
 	qp->ibqp.qp_num = qp->id;
 	mutex_unlock(&dev->dev_lock);
-	return &qp->ibqp;
+	return 0;
 
 cpy_err:
 	ocrdma_del_qpn_map(dev, qp);
@@ -1359,10 +1354,9 @@ mbx_err:
 	mutex_unlock(&dev->dev_lock);
 	kfree(qp->wqe_wr_id_tbl);
 	kfree(qp->rqe_wr_id_tbl);
-	kfree(qp);
 	pr_err("%s(%d) error=%d\n", __func__, dev->id, status);
 gen_err:
-	return ERR_PTR(status);
+	return status;
 }
 
 int _ocrdma_modify_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr,
@@ -1731,7 +1725,6 @@ int ocrdma_destroy_qp(struct ib_qp *ibqp, struct ib_udata *udata)
 
 	kfree(qp->wqe_wr_id_tbl);
 	kfree(qp->rqe_wr_id_tbl);
-	kfree(qp);
 	return 0;
 }
 
diff --git a/drivers/infiniband/hw/ocrdma/ocrdma_verbs.h b/drivers/infiniband/hw/ocrdma/ocrdma_verbs.h
index b1c5fad81603..b73d742a520c 100644
--- a/drivers/infiniband/hw/ocrdma/ocrdma_verbs.h
+++ b/drivers/infiniband/hw/ocrdma/ocrdma_verbs.h
@@ -75,9 +75,8 @@ int ocrdma_create_cq(struct ib_cq *ibcq, const struct ib_cq_init_attr *attr,
 int ocrdma_resize_cq(struct ib_cq *, int cqe, struct ib_udata *);
 int ocrdma_destroy_cq(struct ib_cq *ibcq, struct ib_udata *udata);
 
-struct ib_qp *ocrdma_create_qp(struct ib_pd *,
-			       struct ib_qp_init_attr *attrs,
-			       struct ib_udata *);
+int ocrdma_create_qp(struct ib_qp *qp, struct ib_qp_init_attr *attrs,
+		     struct ib_udata *udata);
 int _ocrdma_modify_qp(struct ib_qp *, struct ib_qp_attr *attr,
 		      int attr_mask);
 int ocrdma_modify_qp(struct ib_qp *, struct ib_qp_attr *attr,
diff --git a/drivers/infiniband/hw/qedr/main.c b/drivers/infiniband/hw/qedr/main.c
index de98e0604f91..755930be01b8 100644
--- a/drivers/infiniband/hw/qedr/main.c
+++ b/drivers/infiniband/hw/qedr/main.c
@@ -233,6 +233,7 @@ static const struct ib_device_ops qedr_dev_ops = {
 	INIT_RDMA_OBJ_SIZE(ib_ah, qedr_ah, ibah),
 	INIT_RDMA_OBJ_SIZE(ib_cq, qedr_cq, ibcq),
 	INIT_RDMA_OBJ_SIZE(ib_pd, qedr_pd, ibpd),
+	INIT_RDMA_OBJ_SIZE(ib_qp, qedr_qp, ibqp),
 	INIT_RDMA_OBJ_SIZE(ib_srq, qedr_srq, ibsrq),
 	INIT_RDMA_OBJ_SIZE(ib_xrcd, qedr_xrcd, ibxrcd),
 	INIT_RDMA_OBJ_SIZE(ib_ucontext, qedr_ucontext, ibucontext),
diff --git a/drivers/infiniband/hw/qedr/qedr_roce_cm.c b/drivers/infiniband/hw/qedr/qedr_roce_cm.c
index 13e5e6bbec99..05307c1488b8 100644
--- a/drivers/infiniband/hw/qedr/qedr_roce_cm.c
+++ b/drivers/infiniband/hw/qedr/qedr_roce_cm.c
@@ -319,20 +319,19 @@ err1:
 	return rc;
 }
 
-struct ib_qp *qedr_create_gsi_qp(struct qedr_dev *dev,
-				 struct ib_qp_init_attr *attrs,
-				 struct qedr_qp *qp)
+int qedr_create_gsi_qp(struct qedr_dev *dev, struct ib_qp_init_attr *attrs,
+		       struct qedr_qp *qp)
 {
 	int rc;
 
 	rc = qedr_check_gsi_qp_attrs(dev, attrs);
 	if (rc)
-		return ERR_PTR(rc);
+		return rc;
 
 	rc = qedr_ll2_start(dev, attrs, qp);
 	if (rc) {
 		DP_ERR(dev, "create gsi qp: failed on ll2 start. rc=%d\n", rc);
-		return ERR_PTR(rc);
+		return rc;
 	}
 
 	/* create QP */
@@ -359,7 +358,7 @@ struct ib_qp *qedr_create_gsi_qp(struct qedr_dev *dev,
 
 	DP_DEBUG(dev, QEDR_MSG_GSI, "created GSI QP %p\n", qp);
 
-	return &qp->ibqp;
+	return 0;
 
 err:
 	kfree(qp->rqe_wr_id);
@@ -368,7 +367,7 @@ err:
 	if (rc)
 		DP_ERR(dev, "create gsi qp: failed destroy on create\n");
 
-	return ERR_PTR(-ENOMEM);
+	return -ENOMEM;
 }
 
 int qedr_destroy_gsi_qp(struct qedr_dev *dev)
diff --git a/drivers/infiniband/hw/qedr/qedr_roce_cm.h b/drivers/infiniband/hw/qedr/qedr_roce_cm.h
index d46dcd3f6424..f3432f035ec6 100644
--- a/drivers/infiniband/hw/qedr/qedr_roce_cm.h
+++ b/drivers/infiniband/hw/qedr/qedr_roce_cm.h
@@ -50,9 +50,8 @@ int qedr_gsi_post_recv(struct ib_qp *ibqp, const struct ib_recv_wr *wr,
 		       const struct ib_recv_wr **bad_wr);
 int qedr_gsi_post_send(struct ib_qp *ibqp, const struct ib_send_wr *wr,
 		       const struct ib_send_wr **bad_wr);
-struct ib_qp *qedr_create_gsi_qp(struct qedr_dev *dev,
-				 struct ib_qp_init_attr *attrs,
-				 struct qedr_qp *qp);
+int qedr_create_gsi_qp(struct qedr_dev *dev, struct ib_qp_init_attr *attrs,
+		       struct qedr_qp *qp);
 void qedr_store_gsi_qp_cq(struct qedr_dev *dev,
 			  struct qedr_qp *qp, struct ib_qp_init_attr *attrs);
 int qedr_destroy_gsi_qp(struct qedr_dev *dev);
diff --git a/drivers/infiniband/hw/qedr/verbs.c b/drivers/infiniband/hw/qedr/verbs.c
index fdc47ef7d861..3fbf172dbbef 100644
--- a/drivers/infiniband/hw/qedr/verbs.c
+++ b/drivers/infiniband/hw/qedr/verbs.c
@@ -1339,6 +1339,15 @@ static int qedr_copy_qp_uresp(struct qedr_dev *dev,
 	return rc;
 }
 
+static void qedr_reset_qp_hwq_info(struct qedr_qp_hwq_info *qph)
+{
+	qed_chain_reset(&qph->pbl);
+	qph->prod = 0;
+	qph->cons = 0;
+	qph->wqe_cons = 0;
+	qph->db_data.data.value = cpu_to_le16(0);
+}
+
 static void qedr_set_common_qp_params(struct qedr_dev *dev,
 				      struct qedr_qp *qp,
 				      struct qedr_pd *pd,
@@ -1354,9 +1363,13 @@ static void qedr_set_common_qp_params(struct qedr_dev *dev,
 	qp->qp_type = attrs->qp_type;
 	qp->max_inline_data = attrs->cap.max_inline_data;
 	qp->state = QED_ROCE_QP_STATE_RESET;
+
+	qp->prev_wqe_size = 0;
+
 	qp->signaled = (attrs->sq_sig_type == IB_SIGNAL_ALL_WR) ? true : false;
 	qp->dev = dev;
 	if (qedr_qp_has_sq(qp)) {
+		qedr_reset_qp_hwq_info(&qp->sq);
 		qp->sq.max_sges = attrs->cap.max_send_sge;
 		qp->sq_cq = get_qedr_cq(attrs->send_cq);
 		DP_DEBUG(dev, QEDR_MSG_QP,
@@ -1368,6 +1381,7 @@ static void qedr_set_common_qp_params(struct qedr_dev *dev,
 		qp->srq = get_qedr_srq(attrs->srq);
 
 	if (qedr_qp_has_rq(qp)) {
+		qedr_reset_qp_hwq_info(&qp->rq);
 		qp->rq_cq = get_qedr_cq(attrs->recv_cq);
 		qp->rq.max_sges = attrs->cap.max_recv_sge;
 		DP_DEBUG(dev, QEDR_MSG_QP,
@@ -1481,7 +1495,7 @@ static int qedr_init_srq_user_params(struct ib_udata *udata,
 		return PTR_ERR(srq->prod_umem);
 	}
 
-	sg = srq->prod_umem->sg_head.sgl;
+	sg = srq->prod_umem->sgt_append.sgt.sgl;
 	srq->hw_srq.phy_prod_pair_addr = sg_dma_address(sg);
 
 	return 0;
@@ -2239,34 +2253,30 @@ static int qedr_free_qp_resources(struct qedr_dev *dev, struct qedr_qp *qp,
 	return 0;
 }
 
-struct ib_qp *qedr_create_qp(struct ib_pd *ibpd,
-			     struct ib_qp_init_attr *attrs,
-			     struct ib_udata *udata)
+int qedr_create_qp(struct ib_qp *ibqp, struct ib_qp_init_attr *attrs,
+		   struct ib_udata *udata)
 {
 	struct qedr_xrcd *xrcd = NULL;
-	struct qedr_pd *pd = NULL;
-	struct qedr_dev *dev;
-	struct qedr_qp *qp;
-	struct ib_qp *ibqp;
+	struct ib_pd *ibpd = ibqp->pd;
+	struct qedr_pd *pd = get_qedr_pd(ibpd);
+	struct qedr_dev *dev = get_qedr_dev(ibqp->device);
+	struct qedr_qp *qp = get_qedr_qp(ibqp);
 	int rc = 0;
 
 	if (attrs->create_flags)
-		return ERR_PTR(-EOPNOTSUPP);
+		return -EOPNOTSUPP;
 
-	if (attrs->qp_type == IB_QPT_XRC_TGT) {
+	if (attrs->qp_type == IB_QPT_XRC_TGT)
 		xrcd = get_qedr_xrcd(attrs->xrcd);
-		dev = get_qedr_dev(xrcd->ibxrcd.device);
-	} else {
+	else
 		pd = get_qedr_pd(ibpd);
-		dev = get_qedr_dev(ibpd->device);
-	}
 
 	DP_DEBUG(dev, QEDR_MSG_QP, "create qp: called from %s, pd=%p\n",
 		 udata ? "user library" : "kernel", pd);
 
 	rc = qedr_check_qp_attrs(ibpd, dev, attrs, udata);
 	if (rc)
-		return ERR_PTR(rc);
+		return rc;
 
 	DP_DEBUG(dev, QEDR_MSG_QP,
 		 "create qp: called from %s, event_handler=%p, eepd=%p sq_cq=%p, sq_icid=%d, rq_cq=%p, rq_icid=%d\n",
@@ -2276,20 +2286,10 @@ struct ib_qp *qedr_create_qp(struct ib_pd *ibpd,
 		 get_qedr_cq(attrs->recv_cq),
 		 attrs->recv_cq ? get_qedr_cq(attrs->recv_cq)->icid : 0);
 
-	qp = kzalloc(sizeof(*qp), GFP_KERNEL);
-	if (!qp) {
-		DP_ERR(dev, "create qp: failed allocating memory\n");
-		return ERR_PTR(-ENOMEM);
-	}
-
 	qedr_set_common_qp_params(dev, qp, pd, attrs);
 
-	if (attrs->qp_type == IB_QPT_GSI) {
-		ibqp = qedr_create_gsi_qp(dev, attrs, qp);
-		if (IS_ERR(ibqp))
-			kfree(qp);
-		return ibqp;
-	}
+	if (attrs->qp_type == IB_QPT_GSI)
+		return qedr_create_gsi_qp(dev, attrs, qp);
 
 	if (udata || xrcd)
 		rc = qedr_create_user_qp(dev, qp, ibpd, udata, attrs);
@@ -2297,7 +2297,7 @@ struct ib_qp *qedr_create_qp(struct ib_pd *ibpd,
 		rc = qedr_create_kernel_qp(dev, qp, ibpd, attrs);
 
 	if (rc)
-		goto out_free_qp;
+		return rc;
 
 	qp->ibqp.qp_num = qp->qp_id;
 
@@ -2307,14 +2307,11 @@ struct ib_qp *qedr_create_qp(struct ib_pd *ibpd,
 			goto out_free_qp_resources;
 	}
 
-	return &qp->ibqp;
+	return 0;
 
 out_free_qp_resources:
 	qedr_free_qp_resources(dev, qp, udata);
-out_free_qp:
-	kfree(qp);
-
-	return ERR_PTR(-EFAULT);
+	return -EFAULT;
 }
 
 static enum ib_qp_state qedr_get_ibqp_state(enum qed_roce_qp_state qp_state)
@@ -2359,15 +2356,6 @@ static enum qed_roce_qp_state qedr_get_state_from_ibqp(
 	}
 }
 
-static void qedr_reset_qp_hwq_info(struct qedr_qp_hwq_info *qph)
-{
-	qed_chain_reset(&qph->pbl);
-	qph->prod = 0;
-	qph->cons = 0;
-	qph->wqe_cons = 0;
-	qph->db_data.data.value = cpu_to_le16(0);
-}
-
 static int qedr_update_qp_state(struct qedr_dev *dev,
 				struct qedr_qp *qp,
 				enum qed_roce_qp_state cur_state,
@@ -2382,9 +2370,6 @@ static int qedr_update_qp_state(struct qedr_dev *dev,
 	case QED_ROCE_QP_STATE_RESET:
 		switch (new_state) {
 		case QED_ROCE_QP_STATE_INIT:
-			qp->prev_wqe_size = 0;
-			qedr_reset_qp_hwq_info(&qp->sq);
-			qedr_reset_qp_hwq_info(&qp->rq);
 			break;
 		default:
 			status = -EINVAL;
@@ -2874,8 +2859,6 @@ int qedr_destroy_qp(struct ib_qp *ibqp, struct ib_udata *udata)
 
 	if (rdma_protocol_iwarp(&dev->ibdev, 1))
 		qedr_iw_qp_rem_ref(&qp->ibqp);
-	else
-		kfree(qp);
 
 	return 0;
 }
@@ -2996,7 +2979,11 @@ struct ib_mr *qedr_reg_user_mr(struct ib_pd *ibpd, u64 start, u64 len,
 
 	rc = dev->ops->rdma_alloc_tid(dev->rdma_ctx, &mr->hw_mr.itid);
 	if (rc) {
-		DP_ERR(dev, "roce alloc tid returned an error %d\n", rc);
+		if (rc == -EINVAL)
+			DP_ERR(dev, "Out of MR resources\n");
+		else
+			DP_ERR(dev, "roce alloc tid returned error %d\n", rc);
+
 		goto err1;
 	}
 
@@ -3091,7 +3078,11 @@ static struct qedr_mr *__qedr_alloc_mr(struct ib_pd *ibpd,
 
 	rc = dev->ops->rdma_alloc_tid(dev->rdma_ctx, &mr->hw_mr.itid);
 	if (rc) {
-		DP_ERR(dev, "roce alloc tid returned an error %d\n", rc);
+		if (rc == -EINVAL)
+			DP_ERR(dev, "Out of MR resources\n");
+		else
+			DP_ERR(dev, "roce alloc tid returned error %d\n", rc);
+
 		goto err0;
 	}
 
@@ -3221,7 +3212,11 @@ struct ib_mr *qedr_get_dma_mr(struct ib_pd *ibpd, int acc)
 
 	rc = dev->ops->rdma_alloc_tid(dev->rdma_ctx, &mr->hw_mr.itid);
 	if (rc) {
-		DP_ERR(dev, "roce alloc tid returned an error %d\n", rc);
+		if (rc == -EINVAL)
+			DP_ERR(dev, "Out of MR resources\n");
+		else
+			DP_ERR(dev, "roce alloc tid returned error %d\n", rc);
+
 		goto err1;
 	}
 
@@ -3915,12 +3910,6 @@ int qedr_post_recv(struct ib_qp *ibqp, const struct ib_recv_wr *wr,
 
 	spin_lock_irqsave(&qp->q_lock, flags);
 
-	if (qp->state == QED_ROCE_QP_STATE_RESET) {
-		spin_unlock_irqrestore(&qp->q_lock, flags);
-		*bad_wr = wr;
-		return -EINVAL;
-	}
-
 	while (wr) {
 		int i;
 
diff --git a/drivers/infiniband/hw/qedr/verbs.h b/drivers/infiniband/hw/qedr/verbs.h
index 34ad47515861..031687dafc61 100644
--- a/drivers/infiniband/hw/qedr/verbs.h
+++ b/drivers/infiniband/hw/qedr/verbs.h
@@ -56,8 +56,8 @@ int qedr_create_cq(struct ib_cq *ibcq, const struct ib_cq_init_attr *attr,
 int qedr_resize_cq(struct ib_cq *, int cqe, struct ib_udata *);
 int qedr_destroy_cq(struct ib_cq *ibcq, struct ib_udata *udata);
 int qedr_arm_cq(struct ib_cq *ibcq, enum ib_cq_notify_flags flags);
-struct ib_qp *qedr_create_qp(struct ib_pd *, struct ib_qp_init_attr *attrs,
-			     struct ib_udata *);
+int qedr_create_qp(struct ib_qp *qp, struct ib_qp_init_attr *attrs,
+		   struct ib_udata *udata);
 int qedr_modify_qp(struct ib_qp *, struct ib_qp_attr *attr,
 		   int attr_mask, struct ib_udata *udata);
 int qedr_query_qp(struct ib_qp *, struct ib_qp_attr *qp_attr,
diff --git a/drivers/infiniband/hw/qib/qib_file_ops.c b/drivers/infiniband/hw/qib/qib_file_ops.c
index c60e79d214a1..63854f4b6524 100644
--- a/drivers/infiniband/hw/qib/qib_file_ops.c
+++ b/drivers/infiniband/hw/qib/qib_file_ops.c
@@ -429,8 +429,8 @@ cleanup:
 				dd->f_put_tid(dd, &tidbase[tid],
 					      RCVHQ_RCV_TYPE_EXPECTED,
 					      dd->tidinvalid);
-				pci_unmap_page(dd->pcidev, phys, PAGE_SIZE,
-					       PCI_DMA_FROMDEVICE);
+				dma_unmap_page(&dd->pcidev->dev, phys,
+					       PAGE_SIZE, DMA_FROM_DEVICE);
 				dd->pageshadow[ctxttid + tid] = NULL;
 			}
 		}
@@ -544,8 +544,8 @@ static int qib_tid_free(struct qib_ctxtdata *rcd, unsigned subctxt,
 			 */
 			dd->f_put_tid(dd, &tidbase[tid],
 				      RCVHQ_RCV_TYPE_EXPECTED, dd->tidinvalid);
-			pci_unmap_page(dd->pcidev, phys, PAGE_SIZE,
-				       PCI_DMA_FROMDEVICE);
+			dma_unmap_page(&dd->pcidev->dev, phys, PAGE_SIZE,
+				       DMA_FROM_DEVICE);
 			qib_release_user_pages(&p, 1);
 		}
 	}
@@ -1781,8 +1781,8 @@ static void unlock_expected_tids(struct qib_ctxtdata *rcd)
 		phys = dd->physshadow[i];
 		dd->physshadow[i] = dd->tidinvalid;
 		dd->pageshadow[i] = NULL;
-		pci_unmap_page(dd->pcidev, phys, PAGE_SIZE,
-			       PCI_DMA_FROMDEVICE);
+		dma_unmap_page(&dd->pcidev->dev, phys, PAGE_SIZE,
+			       DMA_FROM_DEVICE);
 		qib_release_user_pages(&p, 1);
 		cnt++;
 	}
diff --git a/drivers/infiniband/hw/qib/qib_init.c b/drivers/infiniband/hw/qib/qib_init.c
index b5a78576c48b..d1a72e89e297 100644
--- a/drivers/infiniband/hw/qib/qib_init.c
+++ b/drivers/infiniband/hw/qib/qib_init.c
@@ -1335,8 +1335,8 @@ static void cleanup_device_data(struct qib_devdata *dd)
 			for (i = ctxt_tidbase; i < maxtid; i++) {
 				if (!tmpp[i])
 					continue;
-				pci_unmap_page(dd->pcidev, tmpd[i],
-					       PAGE_SIZE, PCI_DMA_FROMDEVICE);
+				dma_unmap_page(&dd->pcidev->dev, tmpd[i],
+					       PAGE_SIZE, DMA_FROM_DEVICE);
 				qib_release_user_pages(&tmpp[i], 1);
 				tmpp[i] = NULL;
 			}
diff --git a/drivers/infiniband/hw/qib/qib_sysfs.c b/drivers/infiniband/hw/qib/qib_sysfs.c
index d57e49de6650..452e2355d24e 100644
--- a/drivers/infiniband/hw/qib/qib_sysfs.c
+++ b/drivers/infiniband/hw/qib/qib_sysfs.c
@@ -403,9 +403,11 @@ static ssize_t diagc_attr_store(struct ib_device *ibdev, u32 port_num,
 }
 
 #define QIB_DIAGC_ATTR(N)                                                      \
+	static_assert(&((struct qib_ibport *)0)->rvp.n_##N != (u64 *)NULL);    \
 	static struct qib_diagc_attr qib_diagc_attr_##N = {                    \
 		.attr = __ATTR(N, 0664, diagc_attr_show, diagc_attr_store),    \
-		.counter = &((struct qib_ibport *)0)->rvp.n_##N - (u64 *)0,    \
+		.counter =                                                     \
+			offsetof(struct qib_ibport, rvp.n_##N) / sizeof(u64)   \
 	}
 
 QIB_DIAGC_ATTR(rc_resends);
diff --git a/drivers/infiniband/hw/qib/qib_user_pages.c b/drivers/infiniband/hw/qib/qib_user_pages.c
index 5d6cf7427431..f4b5f05058e4 100644
--- a/drivers/infiniband/hw/qib/qib_user_pages.c
+++ b/drivers/infiniband/hw/qib/qib_user_pages.c
@@ -60,15 +60,15 @@ int qib_map_page(struct pci_dev *hwdev, struct page *page, dma_addr_t *daddr)
 {
 	dma_addr_t phys;
 
-	phys = pci_map_page(hwdev, page, 0, PAGE_SIZE, PCI_DMA_FROMDEVICE);
-	if (pci_dma_mapping_error(hwdev, phys))
+	phys = dma_map_page(&hwdev->dev, page, 0, PAGE_SIZE, DMA_FROM_DEVICE);
+	if (dma_mapping_error(&hwdev->dev, phys))
 		return -ENOMEM;
 
 	if (!phys) {
-		pci_unmap_page(hwdev, phys, PAGE_SIZE, PCI_DMA_FROMDEVICE);
-		phys = pci_map_page(hwdev, page, 0, PAGE_SIZE,
-				    PCI_DMA_FROMDEVICE);
-		if (pci_dma_mapping_error(hwdev, phys))
+		dma_unmap_page(&hwdev->dev, phys, PAGE_SIZE, DMA_FROM_DEVICE);
+		phys = dma_map_page(&hwdev->dev, page, 0, PAGE_SIZE,
+				    DMA_FROM_DEVICE);
+		if (dma_mapping_error(&hwdev->dev, phys))
 			return -ENOMEM;
 		/*
 		 * FIXME: If we get 0 again, we should keep this page,
diff --git a/drivers/infiniband/hw/usnic/usnic_ib_main.c b/drivers/infiniband/hw/usnic/usnic_ib_main.c
index c49f9e19d926..228e9a36dad0 100644
--- a/drivers/infiniband/hw/usnic/usnic_ib_main.c
+++ b/drivers/infiniband/hw/usnic/usnic_ib_main.c
@@ -360,6 +360,7 @@ static const struct ib_device_ops usnic_dev_ops = {
 	.reg_user_mr = usnic_ib_reg_mr,
 	INIT_RDMA_OBJ_SIZE(ib_pd, usnic_ib_pd, ibpd),
 	INIT_RDMA_OBJ_SIZE(ib_cq, usnic_ib_cq, ibcq),
+	INIT_RDMA_OBJ_SIZE(ib_qp, usnic_ib_qp_grp, ibqp),
 	INIT_RDMA_OBJ_SIZE(ib_ucontext, usnic_ib_ucontext, ibucontext),
 };
 
diff --git a/drivers/infiniband/hw/usnic/usnic_ib_qp_grp.c b/drivers/infiniband/hw/usnic/usnic_ib_qp_grp.c
index 0cdb156e165e..3b60fa9cb58d 100644
--- a/drivers/infiniband/hw/usnic/usnic_ib_qp_grp.c
+++ b/drivers/infiniband/hw/usnic/usnic_ib_qp_grp.c
@@ -665,13 +665,12 @@ static int qp_grp_id_from_flow(struct usnic_ib_qp_grp_flow *qp_flow,
 	return 0;
 }
 
-struct usnic_ib_qp_grp *
-usnic_ib_qp_grp_create(struct usnic_fwd_dev *ufdev, struct usnic_ib_vf *vf,
-			struct usnic_ib_pd *pd,
-			struct usnic_vnic_res_spec *res_spec,
-			struct usnic_transport_spec *transport_spec)
+int usnic_ib_qp_grp_create(struct usnic_ib_qp_grp *qp_grp,
+			   struct usnic_fwd_dev *ufdev, struct usnic_ib_vf *vf,
+			   struct usnic_ib_pd *pd,
+			   struct usnic_vnic_res_spec *res_spec,
+			   struct usnic_transport_spec *transport_spec)
 {
-	struct usnic_ib_qp_grp *qp_grp;
 	int err;
 	enum usnic_transport_type transport = transport_spec->trans_type;
 	struct usnic_ib_qp_grp_flow *qp_flow;
@@ -684,20 +683,15 @@ usnic_ib_qp_grp_create(struct usnic_fwd_dev *ufdev, struct usnic_ib_vf *vf,
 		usnic_err("Spec does not meet minimum req for transport %d\n",
 				transport);
 		log_spec(res_spec);
-		return ERR_PTR(err);
+		return err;
 	}
 
-	qp_grp = kzalloc(sizeof(*qp_grp), GFP_ATOMIC);
-	if (!qp_grp)
-		return NULL;
-
 	qp_grp->res_chunk_list = alloc_res_chunk_list(vf->vnic, res_spec,
 							qp_grp);
-	if (IS_ERR_OR_NULL(qp_grp->res_chunk_list)) {
-		err = qp_grp->res_chunk_list ?
-				PTR_ERR(qp_grp->res_chunk_list) : -ENOMEM;
-		goto out_free_qp_grp;
-	}
+	if (IS_ERR_OR_NULL(qp_grp->res_chunk_list))
+		return qp_grp->res_chunk_list ?
+				     PTR_ERR(qp_grp->res_chunk_list) :
+				     -ENOMEM;
 
 	err = qp_grp_and_vf_bind(vf, pd, qp_grp);
 	if (err)
@@ -724,7 +718,7 @@ usnic_ib_qp_grp_create(struct usnic_fwd_dev *ufdev, struct usnic_ib_vf *vf,
 
 	usnic_ib_sysfs_qpn_add(qp_grp);
 
-	return qp_grp;
+	return 0;
 
 out_release_flow:
 	release_and_remove_flow(qp_flow);
@@ -732,10 +726,7 @@ out_qp_grp_vf_unbind:
 	qp_grp_and_vf_unbind(qp_grp);
 out_free_res:
 	free_qp_grp_res(qp_grp->res_chunk_list);
-out_free_qp_grp:
-	kfree(qp_grp);
-
-	return ERR_PTR(err);
+	return err;
 }
 
 void usnic_ib_qp_grp_destroy(struct usnic_ib_qp_grp *qp_grp)
@@ -748,7 +739,6 @@ void usnic_ib_qp_grp_destroy(struct usnic_ib_qp_grp *qp_grp)
 	usnic_ib_sysfs_qpn_remove(qp_grp);
 	qp_grp_and_vf_unbind(qp_grp);
 	free_qp_grp_res(qp_grp->res_chunk_list);
-	kfree(qp_grp);
 }
 
 struct usnic_vnic_res_chunk*
diff --git a/drivers/infiniband/hw/usnic/usnic_ib_qp_grp.h b/drivers/infiniband/hw/usnic/usnic_ib_qp_grp.h
index a8a2314c9531..62e732be6736 100644
--- a/drivers/infiniband/hw/usnic/usnic_ib_qp_grp.h
+++ b/drivers/infiniband/hw/usnic/usnic_ib_qp_grp.h
@@ -89,11 +89,11 @@ extern const struct usnic_vnic_res_spec min_transport_spec[USNIC_TRANSPORT_MAX];
 const char *usnic_ib_qp_grp_state_to_string(enum ib_qp_state state);
 int usnic_ib_qp_grp_dump_hdr(char *buf, int buf_sz);
 int usnic_ib_qp_grp_dump_rows(void *obj, char *buf, int buf_sz);
-struct usnic_ib_qp_grp *
-usnic_ib_qp_grp_create(struct usnic_fwd_dev *ufdev, struct usnic_ib_vf *vf,
-			struct usnic_ib_pd *pd,
-			struct usnic_vnic_res_spec *res_spec,
-			struct usnic_transport_spec *trans_spec);
+int usnic_ib_qp_grp_create(struct usnic_ib_qp_grp *qp,
+			   struct usnic_fwd_dev *ufdev, struct usnic_ib_vf *vf,
+			   struct usnic_ib_pd *pd,
+			   struct usnic_vnic_res_spec *res_spec,
+			   struct usnic_transport_spec *trans_spec);
 void usnic_ib_qp_grp_destroy(struct usnic_ib_qp_grp *qp_grp);
 int usnic_ib_qp_grp_modify(struct usnic_ib_qp_grp *qp_grp,
 				enum ib_qp_state new_state,
diff --git a/drivers/infiniband/hw/usnic/usnic_ib_verbs.c b/drivers/infiniband/hw/usnic/usnic_ib_verbs.c
index 57d210ca855a..06a4e9d4545d 100644
--- a/drivers/infiniband/hw/usnic/usnic_ib_verbs.c
+++ b/drivers/infiniband/hw/usnic/usnic_ib_verbs.c
@@ -168,30 +168,31 @@ static int usnic_ib_fill_create_qp_resp(struct usnic_ib_qp_grp *qp_grp,
 	return 0;
 }
 
-static struct usnic_ib_qp_grp*
-find_free_vf_and_create_qp_grp(struct usnic_ib_dev *us_ibdev,
-				struct usnic_ib_pd *pd,
-				struct usnic_transport_spec *trans_spec,
-				struct usnic_vnic_res_spec *res_spec)
+static int
+find_free_vf_and_create_qp_grp(struct ib_qp *qp,
+			       struct usnic_transport_spec *trans_spec,
+			       struct usnic_vnic_res_spec *res_spec)
 {
+	struct usnic_ib_dev *us_ibdev = to_usdev(qp->device);
+	struct usnic_ib_pd *pd = to_upd(qp->pd);
 	struct usnic_ib_vf *vf;
 	struct usnic_vnic *vnic;
-	struct usnic_ib_qp_grp *qp_grp;
+	struct usnic_ib_qp_grp *qp_grp = to_uqp_grp(qp);
 	struct device *dev, **dev_list;
-	int i;
+	int i, ret;
 
 	BUG_ON(!mutex_is_locked(&us_ibdev->usdev_lock));
 
 	if (list_empty(&us_ibdev->vf_dev_list)) {
 		usnic_info("No vfs to allocate\n");
-		return NULL;
+		return -ENOMEM;
 	}
 
 	if (usnic_ib_share_vf) {
 		/* Try to find resouces on a used vf which is in pd */
 		dev_list = usnic_uiom_get_dev_list(pd->umem_pd);
 		if (IS_ERR(dev_list))
-			return ERR_CAST(dev_list);
+			return PTR_ERR(dev_list);
 		for (i = 0; dev_list[i]; i++) {
 			dev = dev_list[i];
 			vf = dev_get_drvdata(dev);
@@ -202,10 +203,10 @@ find_free_vf_and_create_qp_grp(struct usnic_ib_dev *us_ibdev,
 						dev_name(&us_ibdev->ib_dev.dev),
 						pci_name(usnic_vnic_get_pdev(
 									vnic)));
-				qp_grp = usnic_ib_qp_grp_create(us_ibdev->ufdev,
-								vf, pd,
-								res_spec,
-								trans_spec);
+				ret = usnic_ib_qp_grp_create(qp_grp,
+							     us_ibdev->ufdev,
+							     vf, pd, res_spec,
+							     trans_spec);
 
 				spin_unlock(&vf->lock);
 				goto qp_grp_check;
@@ -223,9 +224,9 @@ find_free_vf_and_create_qp_grp(struct usnic_ib_dev *us_ibdev,
 		vnic = vf->vnic;
 		if (vf->qp_grp_ref_cnt == 0 &&
 		    usnic_vnic_check_room(vnic, res_spec) == 0) {
-			qp_grp = usnic_ib_qp_grp_create(us_ibdev->ufdev, vf,
-							pd, res_spec,
-							trans_spec);
+			ret = usnic_ib_qp_grp_create(qp_grp, us_ibdev->ufdev,
+						     vf, pd, res_spec,
+						     trans_spec);
 
 			spin_unlock(&vf->lock);
 			goto qp_grp_check;
@@ -235,16 +236,15 @@ find_free_vf_and_create_qp_grp(struct usnic_ib_dev *us_ibdev,
 
 	usnic_info("No free qp grp found on %s\n",
 		   dev_name(&us_ibdev->ib_dev.dev));
-	return ERR_PTR(-ENOMEM);
+	return -ENOMEM;
 
 qp_grp_check:
-	if (IS_ERR_OR_NULL(qp_grp)) {
+	if (ret) {
 		usnic_err("Failed to allocate qp_grp\n");
 		if (usnic_ib_share_vf)
 			usnic_uiom_free_dev_list(dev_list);
-		return ERR_PTR(qp_grp ? PTR_ERR(qp_grp) : -ENOMEM);
 	}
-	return qp_grp;
+	return ret;
 }
 
 static void qp_grp_destroy(struct usnic_ib_qp_grp *qp_grp)
@@ -458,13 +458,12 @@ int usnic_ib_dealloc_pd(struct ib_pd *pd, struct ib_udata *udata)
 	return 0;
 }
 
-struct ib_qp *usnic_ib_create_qp(struct ib_pd *pd,
-					struct ib_qp_init_attr *init_attr,
-					struct ib_udata *udata)
+int usnic_ib_create_qp(struct ib_qp *ibqp, struct ib_qp_init_attr *init_attr,
+		       struct ib_udata *udata)
 {
 	int err;
 	struct usnic_ib_dev *us_ibdev;
-	struct usnic_ib_qp_grp *qp_grp;
+	struct usnic_ib_qp_grp *qp_grp = to_uqp_grp(ibqp);
 	struct usnic_ib_ucontext *ucontext = rdma_udata_to_drv_context(
 		udata, struct usnic_ib_ucontext, ibucontext);
 	int cq_cnt;
@@ -474,29 +473,29 @@ struct ib_qp *usnic_ib_create_qp(struct ib_pd *pd,
 
 	usnic_dbg("\n");
 
-	us_ibdev = to_usdev(pd->device);
+	us_ibdev = to_usdev(ibqp->device);
 
 	if (init_attr->create_flags)
-		return ERR_PTR(-EOPNOTSUPP);
+		return -EOPNOTSUPP;
 
 	err = ib_copy_from_udata(&cmd, udata, sizeof(cmd));
 	if (err) {
 		usnic_err("%s: cannot copy udata for create_qp\n",
 			  dev_name(&us_ibdev->ib_dev.dev));
-		return ERR_PTR(-EINVAL);
+		return -EINVAL;
 	}
 
 	err = create_qp_validate_user_data(cmd);
 	if (err) {
 		usnic_err("%s: Failed to validate user data\n",
 			  dev_name(&us_ibdev->ib_dev.dev));
-		return ERR_PTR(-EINVAL);
+		return -EINVAL;
 	}
 
 	if (init_attr->qp_type != IB_QPT_UD) {
 		usnic_err("%s asked to make a non-UD QP: %d\n",
 			  dev_name(&us_ibdev->ib_dev.dev), init_attr->qp_type);
-		return ERR_PTR(-EOPNOTSUPP);
+		return -EOPNOTSUPP;
 	}
 
 	trans_spec = cmd.spec;
@@ -504,13 +503,9 @@ struct ib_qp *usnic_ib_create_qp(struct ib_pd *pd,
 	cq_cnt = (init_attr->send_cq == init_attr->recv_cq) ? 1 : 2;
 	res_spec = min_transport_spec[trans_spec.trans_type];
 	usnic_vnic_res_spec_update(&res_spec, USNIC_VNIC_RES_TYPE_CQ, cq_cnt);
-	qp_grp = find_free_vf_and_create_qp_grp(us_ibdev, to_upd(pd),
-						&trans_spec,
-						&res_spec);
-	if (IS_ERR_OR_NULL(qp_grp)) {
-		err = qp_grp ? PTR_ERR(qp_grp) : -ENOMEM;
+	err = find_free_vf_and_create_qp_grp(ibqp, &trans_spec, &res_spec);
+	if (err)
 		goto out_release_mutex;
-	}
 
 	err = usnic_ib_fill_create_qp_resp(qp_grp, udata);
 	if (err) {
@@ -522,13 +517,13 @@ struct ib_qp *usnic_ib_create_qp(struct ib_pd *pd,
 	list_add_tail(&qp_grp->link, &ucontext->qp_grp_list);
 	usnic_ib_log_vf(qp_grp->vf);
 	mutex_unlock(&us_ibdev->usdev_lock);
-	return &qp_grp->ibqp;
+	return 0;
 
 out_release_qp_grp:
 	qp_grp_destroy(qp_grp);
 out_release_mutex:
 	mutex_unlock(&us_ibdev->usdev_lock);
-	return ERR_PTR(err);
+	return err;
 }
 
 int usnic_ib_destroy_qp(struct ib_qp *qp, struct ib_udata *udata)
diff --git a/drivers/infiniband/hw/usnic/usnic_ib_verbs.h b/drivers/infiniband/hw/usnic/usnic_ib_verbs.h
index 6b82d0f2d184..6ca9ee0dddbe 100644
--- a/drivers/infiniband/hw/usnic/usnic_ib_verbs.h
+++ b/drivers/infiniband/hw/usnic/usnic_ib_verbs.h
@@ -50,9 +50,8 @@ int usnic_ib_query_gid(struct ib_device *ibdev, u32 port, int index,
 				union ib_gid *gid);
 int usnic_ib_alloc_pd(struct ib_pd *ibpd, struct ib_udata *udata);
 int usnic_ib_dealloc_pd(struct ib_pd *pd, struct ib_udata *udata);
-struct ib_qp *usnic_ib_create_qp(struct ib_pd *pd,
-					struct ib_qp_init_attr *init_attr,
-					struct ib_udata *udata);
+int usnic_ib_create_qp(struct ib_qp *qp, struct ib_qp_init_attr *init_attr,
+		       struct ib_udata *udata);
 int usnic_ib_destroy_qp(struct ib_qp *qp, struct ib_udata *udata);
 int usnic_ib_modify_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr,
 				int attr_mask, struct ib_udata *udata);
diff --git a/drivers/infiniband/hw/vmw_pvrdma/pvrdma_main.c b/drivers/infiniband/hw/vmw_pvrdma/pvrdma_main.c
index 8ed8bc24c69f..105f3a155939 100644
--- a/drivers/infiniband/hw/vmw_pvrdma/pvrdma_main.c
+++ b/drivers/infiniband/hw/vmw_pvrdma/pvrdma_main.c
@@ -185,6 +185,7 @@ static const struct ib_device_ops pvrdma_dev_ops = {
 	INIT_RDMA_OBJ_SIZE(ib_ah, pvrdma_ah, ibah),
 	INIT_RDMA_OBJ_SIZE(ib_cq, pvrdma_cq, ibcq),
 	INIT_RDMA_OBJ_SIZE(ib_pd, pvrdma_pd, ibpd),
+	INIT_RDMA_OBJ_SIZE(ib_qp, pvrdma_qp, ibqp),
 	INIT_RDMA_OBJ_SIZE(ib_ucontext, pvrdma_ucontext, ibucontext),
 };
 
@@ -810,18 +811,10 @@ static int pvrdma_pci_probe(struct pci_dev *pdev,
 	}
 
 	/* Enable 64-Bit DMA */
-	if (pci_set_dma_mask(pdev, DMA_BIT_MASK(64)) == 0) {
-		ret = pci_set_consistent_dma_mask(pdev, DMA_BIT_MASK(64));
+	if (dma_set_mask_and_coherent(&pdev->dev, DMA_BIT_MASK(64)) != 0) {
+		ret = dma_set_mask(&pdev->dev, DMA_BIT_MASK(32));
 		if (ret != 0) {
-			dev_err(&pdev->dev,
-				"pci_set_consistent_dma_mask failed\n");
-			goto err_free_resource;
-		}
-	} else {
-		ret = pci_set_dma_mask(pdev, DMA_BIT_MASK(32));
-		if (ret != 0) {
-			dev_err(&pdev->dev,
-				"pci_set_dma_mask failed\n");
+			dev_err(&pdev->dev, "dma_set_mask failed\n");
 			goto err_free_resource;
 		}
 	}
diff --git a/drivers/infiniband/hw/vmw_pvrdma/pvrdma_qp.c b/drivers/infiniband/hw/vmw_pvrdma/pvrdma_qp.c
index 67769b715126..f83cd4a9d992 100644
--- a/drivers/infiniband/hw/vmw_pvrdma/pvrdma_qp.c
+++ b/drivers/infiniband/hw/vmw_pvrdma/pvrdma_qp.c
@@ -182,18 +182,17 @@ static int pvrdma_set_sq_size(struct pvrdma_dev *dev, struct ib_qp_cap *req_cap,
 
 /**
  * pvrdma_create_qp - create queue pair
- * @pd: protection domain
+ * @ibqp: queue pair
  * @init_attr: queue pair attributes
  * @udata: user data
  *
- * @return: the ib_qp pointer on success, otherwise returns an errno.
+ * @return: the 0 on success, otherwise returns an errno.
  */
-struct ib_qp *pvrdma_create_qp(struct ib_pd *pd,
-			       struct ib_qp_init_attr *init_attr,
-			       struct ib_udata *udata)
+int pvrdma_create_qp(struct ib_qp *ibqp, struct ib_qp_init_attr *init_attr,
+		     struct ib_udata *udata)
 {
-	struct pvrdma_qp *qp = NULL;
-	struct pvrdma_dev *dev = to_vdev(pd->device);
+	struct pvrdma_qp *qp = to_vqp(ibqp);
+	struct pvrdma_dev *dev = to_vdev(ibqp->device);
 	union pvrdma_cmd_req req;
 	union pvrdma_cmd_resp rsp;
 	struct pvrdma_cmd_create_qp *cmd = &req.create_qp;
@@ -209,7 +208,7 @@ struct ib_qp *pvrdma_create_qp(struct ib_pd *pd,
 		dev_warn(&dev->pdev->dev,
 			 "invalid create queuepair flags %#x\n",
 			 init_attr->create_flags);
-		return ERR_PTR(-EOPNOTSUPP);
+		return -EOPNOTSUPP;
 	}
 
 	if (init_attr->qp_type != IB_QPT_RC &&
@@ -217,22 +216,22 @@ struct ib_qp *pvrdma_create_qp(struct ib_pd *pd,
 	    init_attr->qp_type != IB_QPT_GSI) {
 		dev_warn(&dev->pdev->dev, "queuepair type %d not supported\n",
 			 init_attr->qp_type);
-		return ERR_PTR(-EOPNOTSUPP);
+		return -EOPNOTSUPP;
 	}
 
 	if (is_srq && !dev->dsr->caps.max_srq) {
 		dev_warn(&dev->pdev->dev,
 			 "SRQs not supported by device\n");
-		return ERR_PTR(-EINVAL);
+		return -EINVAL;
 	}
 
 	if (!atomic_add_unless(&dev->num_qps, 1, dev->dsr->caps.max_qp))
-		return ERR_PTR(-ENOMEM);
+		return -ENOMEM;
 
 	switch (init_attr->qp_type) {
 	case IB_QPT_GSI:
 		if (init_attr->port_num == 0 ||
-		    init_attr->port_num > pd->device->phys_port_cnt) {
+		    init_attr->port_num > ibqp->device->phys_port_cnt) {
 			dev_warn(&dev->pdev->dev, "invalid queuepair attrs\n");
 			ret = -EINVAL;
 			goto err_qp;
@@ -240,12 +239,6 @@ struct ib_qp *pvrdma_create_qp(struct ib_pd *pd,
 		fallthrough;
 	case IB_QPT_RC:
 	case IB_QPT_UD:
-		qp = kzalloc(sizeof(*qp), GFP_KERNEL);
-		if (!qp) {
-			ret = -ENOMEM;
-			goto err_qp;
-		}
-
 		spin_lock_init(&qp->sq.lock);
 		spin_lock_init(&qp->rq.lock);
 		mutex_init(&qp->mutex);
@@ -275,9 +268,9 @@ struct ib_qp *pvrdma_create_qp(struct ib_pd *pd,
 
 			if (!is_srq) {
 				/* set qp->sq.wqe_cnt, shift, buf_size.. */
-				qp->rumem =
-					ib_umem_get(pd->device, ucmd.rbuf_addr,
-						    ucmd.rbuf_size, 0);
+				qp->rumem = ib_umem_get(ibqp->device,
+							ucmd.rbuf_addr,
+							ucmd.rbuf_size, 0);
 				if (IS_ERR(qp->rumem)) {
 					ret = PTR_ERR(qp->rumem);
 					goto err_qp;
@@ -288,7 +281,7 @@ struct ib_qp *pvrdma_create_qp(struct ib_pd *pd,
 				qp->srq = to_vsrq(init_attr->srq);
 			}
 
-			qp->sumem = ib_umem_get(pd->device, ucmd.sbuf_addr,
+			qp->sumem = ib_umem_get(ibqp->device, ucmd.sbuf_addr,
 						ucmd.sbuf_size, 0);
 			if (IS_ERR(qp->sumem)) {
 				if (!is_srq)
@@ -306,12 +299,12 @@ struct ib_qp *pvrdma_create_qp(struct ib_pd *pd,
 				qp->npages_recv = 0;
 			qp->npages = qp->npages_send + qp->npages_recv;
 		} else {
-			ret = pvrdma_set_sq_size(to_vdev(pd->device),
+			ret = pvrdma_set_sq_size(to_vdev(ibqp->device),
 						 &init_attr->cap, qp);
 			if (ret)
 				goto err_qp;
 
-			ret = pvrdma_set_rq_size(to_vdev(pd->device),
+			ret = pvrdma_set_rq_size(to_vdev(ibqp->device),
 						 &init_attr->cap, qp);
 			if (ret)
 				goto err_qp;
@@ -362,7 +355,7 @@ struct ib_qp *pvrdma_create_qp(struct ib_pd *pd,
 
 	memset(cmd, 0, sizeof(*cmd));
 	cmd->hdr.cmd = PVRDMA_CMD_CREATE_QP;
-	cmd->pd_handle = to_vpd(pd)->pd_handle;
+	cmd->pd_handle = to_vpd(ibqp->pd)->pd_handle;
 	cmd->send_cq_handle = to_vcq(init_attr->send_cq)->cq_handle;
 	cmd->recv_cq_handle = to_vcq(init_attr->recv_cq)->cq_handle;
 	if (is_srq)
@@ -418,11 +411,11 @@ struct ib_qp *pvrdma_create_qp(struct ib_pd *pd,
 			dev_warn(&dev->pdev->dev,
 				 "failed to copy back udata\n");
 			__pvrdma_destroy_qp(dev, qp);
-			return ERR_PTR(-EINVAL);
+			return -EINVAL;
 		}
 	}
 
-	return &qp->ibqp;
+	return 0;
 
 err_pdir:
 	pvrdma_page_dir_cleanup(dev, &qp->pdir);
@@ -430,10 +423,8 @@ err_umem:
 	ib_umem_release(qp->rumem);
 	ib_umem_release(qp->sumem);
 err_qp:
-	kfree(qp);
 	atomic_dec(&dev->num_qps);
-
-	return ERR_PTR(ret);
+	return ret;
 }
 
 static void _pvrdma_free_qp(struct pvrdma_qp *qp)
@@ -454,8 +445,6 @@ static void _pvrdma_free_qp(struct pvrdma_qp *qp)
 
 	pvrdma_page_dir_cleanup(dev, &qp->pdir);
 
-	kfree(qp);
-
 	atomic_dec(&dev->num_qps);
 }
 
diff --git a/drivers/infiniband/hw/vmw_pvrdma/pvrdma_verbs.h b/drivers/infiniband/hw/vmw_pvrdma/pvrdma_verbs.h
index 544b94d97c3a..78807b23d831 100644
--- a/drivers/infiniband/hw/vmw_pvrdma/pvrdma_verbs.h
+++ b/drivers/infiniband/hw/vmw_pvrdma/pvrdma_verbs.h
@@ -390,9 +390,8 @@ int pvrdma_modify_srq(struct ib_srq *ibsrq, struct ib_srq_attr *attr,
 int pvrdma_query_srq(struct ib_srq *srq, struct ib_srq_attr *srq_attr);
 int pvrdma_destroy_srq(struct ib_srq *srq, struct ib_udata *udata);
 
-struct ib_qp *pvrdma_create_qp(struct ib_pd *pd,
-			       struct ib_qp_init_attr *init_attr,
-			       struct ib_udata *udata);
+int pvrdma_create_qp(struct ib_qp *qp, struct ib_qp_init_attr *init_attr,
+		     struct ib_udata *udata);
 int pvrdma_modify_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr,
 		     int attr_mask, struct ib_udata *udata);
 int pvrdma_query_qp(struct ib_qp *ibqp, struct ib_qp_attr *qp_attr,
diff --git a/drivers/infiniband/sw/rdmavt/ah.c b/drivers/infiniband/sw/rdmavt/ah.c
index a3e5b368c5e7..63999239ed9e 100644
--- a/drivers/infiniband/sw/rdmavt/ah.c
+++ b/drivers/infiniband/sw/rdmavt/ah.c
@@ -1,48 +1,6 @@
+// SPDX-License-Identifier: GPL-2.0 or BSD-3-Clause
 /*
  * Copyright(c) 2016 - 2019 Intel Corporation.
- *
- * This file is provided under a dual BSD/GPLv2 license.  When using or
- * redistributing this file, you may do so under either license.
- *
- * GPL LICENSE SUMMARY
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of version 2 of the GNU General Public License as
- * published by the Free Software Foundation.
- *
- * This program is distributed in the hope that it will be useful, but
- * WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
- * General Public License for more details.
- *
- * BSD LICENSE
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions
- * are met:
- *
- *  - Redistributions of source code must retain the above copyright
- *    notice, this list of conditions and the following disclaimer.
- *  - Redistributions in binary form must reproduce the above copyright
- *    notice, this list of conditions and the following disclaimer in
- *    the documentation and/or other materials provided with the
- *    distribution.
- *  - Neither the name of Intel Corporation nor the names of its
- *    contributors may be used to endorse or promote products derived
- *    from this software without specific prior written permission.
- *
- * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
- * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
- * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
- * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
- * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
- * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
- * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
- * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
- * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
- * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
- * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
- *
  */
 
 #include <linux/slab.h>
diff --git a/drivers/infiniband/sw/rdmavt/ah.h b/drivers/infiniband/sw/rdmavt/ah.h
index 5a85edd06491..c11fdf637d64 100644
--- a/drivers/infiniband/sw/rdmavt/ah.h
+++ b/drivers/infiniband/sw/rdmavt/ah.h
@@ -1,53 +1,11 @@
-#ifndef DEF_RVTAH_H
-#define DEF_RVTAH_H
-
+/* SPDX-License-Identifier: GPL-2.0 or BSD-3-Clause */
 /*
  * Copyright(c) 2016 Intel Corporation.
- *
- * This file is provided under a dual BSD/GPLv2 license.  When using or
- * redistributing this file, you may do so under either license.
- *
- * GPL LICENSE SUMMARY
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of version 2 of the GNU General Public License as
- * published by the Free Software Foundation.
- *
- * This program is distributed in the hope that it will be useful, but
- * WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
- * General Public License for more details.
- *
- * BSD LICENSE
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions
- * are met:
- *
- *  - Redistributions of source code must retain the above copyright
- *    notice, this list of conditions and the following disclaimer.
- *  - Redistributions in binary form must reproduce the above copyright
- *    notice, this list of conditions and the following disclaimer in
- *    the documentation and/or other materials provided with the
- *    distribution.
- *  - Neither the name of Intel Corporation nor the names of its
- *    contributors may be used to endorse or promote products derived
- *    from this software without specific prior written permission.
- *
- * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
- * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
- * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
- * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
- * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
- * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
- * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
- * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
- * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
- * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
- * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
- *
  */
 
+#ifndef DEF_RVTAH_H
+#define DEF_RVTAH_H
+
 #include <rdma/rdma_vt.h>
 
 int rvt_create_ah(struct ib_ah *ah, struct rdma_ah_init_attr *init_attr,
diff --git a/drivers/infiniband/sw/rdmavt/cq.c b/drivers/infiniband/sw/rdmavt/cq.c
index 5138afca067f..9fe4dcaa049a 100644
--- a/drivers/infiniband/sw/rdmavt/cq.c
+++ b/drivers/infiniband/sw/rdmavt/cq.c
@@ -1,48 +1,6 @@
+// SPDX-License-Identifier: GPL-2.0 or BSD-3-Clause
 /*
  * Copyright(c) 2016 - 2018 Intel Corporation.
- *
- * This file is provided under a dual BSD/GPLv2 license.  When using or
- * redistributing this file, you may do so under either license.
- *
- * GPL LICENSE SUMMARY
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of version 2 of the GNU General Public License as
- * published by the Free Software Foundation.
- *
- * This program is distributed in the hope that it will be useful, but
- * WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
- * General Public License for more details.
- *
- * BSD LICENSE
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions
- * are met:
- *
- *  - Redistributions of source code must retain the above copyright
- *    notice, this list of conditions and the following disclaimer.
- *  - Redistributions in binary form must reproduce the above copyright
- *    notice, this list of conditions and the following disclaimer in
- *    the documentation and/or other materials provided with the
- *    distribution.
- *  - Neither the name of Intel Corporation nor the names of its
- *    contributors may be used to endorse or promote products derived
- *    from this software without specific prior written permission.
- *
- * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
- * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
- * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
- * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
- * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
- * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
- * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
- * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
- * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
- * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
- * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
- *
  */
 
 #include <linux/slab.h>
diff --git a/drivers/infiniband/sw/rdmavt/cq.h b/drivers/infiniband/sw/rdmavt/cq.h
index feb01e7ee004..b0a948ec760b 100644
--- a/drivers/infiniband/sw/rdmavt/cq.h
+++ b/drivers/infiniband/sw/rdmavt/cq.h
@@ -1,53 +1,11 @@
-#ifndef DEF_RVTCQ_H
-#define DEF_RVTCQ_H
-
+/* SPDX-License-Identifier: GPL-2.0 or BSD-3-Clause */
 /*
  * Copyright(c) 2016 - 2018 Intel Corporation.
- *
- * This file is provided under a dual BSD/GPLv2 license.  When using or
- * redistributing this file, you may do so under either license.
- *
- * GPL LICENSE SUMMARY
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of version 2 of the GNU General Public License as
- * published by the Free Software Foundation.
- *
- * This program is distributed in the hope that it will be useful, but
- * WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
- * General Public License for more details.
- *
- * BSD LICENSE
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions
- * are met:
- *
- *  - Redistributions of source code must retain the above copyright
- *    notice, this list of conditions and the following disclaimer.
- *  - Redistributions in binary form must reproduce the above copyright
- *    notice, this list of conditions and the following disclaimer in
- *    the documentation and/or other materials provided with the
- *    distribution.
- *  - Neither the name of Intel Corporation nor the names of its
- *    contributors may be used to endorse or promote products derived
- *    from this software without specific prior written permission.
- *
- * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
- * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
- * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
- * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
- * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
- * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
- * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
- * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
- * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
- * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
- * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
- *
  */
 
+#ifndef DEF_RVTCQ_H
+#define DEF_RVTCQ_H
+
 #include <rdma/rdma_vt.h>
 #include <rdma/rdmavt_cq.h>
 
diff --git a/drivers/infiniband/sw/rdmavt/mad.c b/drivers/infiniband/sw/rdmavt/mad.c
index 207bc0ed96ff..98a8fe3b04ef 100644
--- a/drivers/infiniband/sw/rdmavt/mad.c
+++ b/drivers/infiniband/sw/rdmavt/mad.c
@@ -1,48 +1,6 @@
+// SPDX-License-Identifier: GPL-2.0 or BSD-3-Clause
 /*
  * Copyright(c) 2016 Intel Corporation.
- *
- * This file is provided under a dual BSD/GPLv2 license.  When using or
- * redistributing this file, you may do so under either license.
- *
- * GPL LICENSE SUMMARY
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of version 2 of the GNU General Public License as
- * published by the Free Software Foundation.
- *
- * This program is distributed in the hope that it will be useful, but
- * WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
- * General Public License for more details.
- *
- * BSD LICENSE
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions
- * are met:
- *
- *  - Redistributions of source code must retain the above copyright
- *    notice, this list of conditions and the following disclaimer.
- *  - Redistributions in binary form must reproduce the above copyright
- *    notice, this list of conditions and the following disclaimer in
- *    the documentation and/or other materials provided with the
- *    distribution.
- *  - Neither the name of Intel Corporation nor the names of its
- *    contributors may be used to endorse or promote products derived
- *    from this software without specific prior written permission.
- *
- * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
- * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
- * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
- * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
- * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
- * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
- * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
- * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
- * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
- * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
- * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
- *
  */
 
 #include <rdma/ib_mad.h>
diff --git a/drivers/infiniband/sw/rdmavt/mad.h b/drivers/infiniband/sw/rdmavt/mad.h
index 1eae5efea4be..368be29eab37 100644
--- a/drivers/infiniband/sw/rdmavt/mad.h
+++ b/drivers/infiniband/sw/rdmavt/mad.h
@@ -1,53 +1,11 @@
-#ifndef DEF_RVTMAD_H
-#define DEF_RVTMAD_H
-
+/* SPDX-License-Identifier: GPL-2.0 or BSD-3-Clause */
 /*
  * Copyright(c) 2016 Intel Corporation.
- *
- * This file is provided under a dual BSD/GPLv2 license.  When using or
- * redistributing this file, you may do so under either license.
- *
- * GPL LICENSE SUMMARY
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of version 2 of the GNU General Public License as
- * published by the Free Software Foundation.
- *
- * This program is distributed in the hope that it will be useful, but
- * WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
- * General Public License for more details.
- *
- * BSD LICENSE
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions
- * are met:
- *
- *  - Redistributions of source code must retain the above copyright
- *    notice, this list of conditions and the following disclaimer.
- *  - Redistributions in binary form must reproduce the above copyright
- *    notice, this list of conditions and the following disclaimer in
- *    the documentation and/or other materials provided with the
- *    distribution.
- *  - Neither the name of Intel Corporation nor the names of its
- *    contributors may be used to endorse or promote products derived
- *    from this software without specific prior written permission.
- *
- * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
- * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
- * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
- * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
- * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
- * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
- * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
- * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
- * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
- * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
- * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
- *
  */
 
+#ifndef DEF_RVTMAD_H
+#define DEF_RVTMAD_H
+
 #include <rdma/rdma_vt.h>
 
 int rvt_process_mad(struct ib_device *ibdev, int mad_flags, u32 port_num,
diff --git a/drivers/infiniband/sw/rdmavt/mcast.c b/drivers/infiniband/sw/rdmavt/mcast.c
index 951abac13dbb..a123874e1ca7 100644
--- a/drivers/infiniband/sw/rdmavt/mcast.c
+++ b/drivers/infiniband/sw/rdmavt/mcast.c
@@ -1,48 +1,6 @@
+// SPDX-License-Identifier: GPL-2.0 or BSD-3-Clause
 /*
  * Copyright(c) 2016 Intel Corporation.
- *
- * This file is provided under a dual BSD/GPLv2 license.  When using or
- * redistributing this file, you may do so under either license.
- *
- * GPL LICENSE SUMMARY
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of version 2 of the GNU General Public License as
- * published by the Free Software Foundation.
- *
- * This program is distributed in the hope that it will be useful, but
- * WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
- * General Public License for more details.
- *
- * BSD LICENSE
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions
- * are met:
- *
- *  - Redistributions of source code must retain the above copyright
- *    notice, this list of conditions and the following disclaimer.
- *  - Redistributions in binary form must reproduce the above copyright
- *    notice, this list of conditions and the following disclaimer in
- *    the documentation and/or other materials provided with the
- *    distribution.
- *  - Neither the name of Intel Corporation nor the names of its
- *    contributors may be used to endorse or promote products derived
- *    from this software without specific prior written permission.
- *
- * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
- * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
- * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
- * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
- * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
- * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
- * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
- * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
- * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
- * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
- * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
- *
  */
 
 #include <linux/slab.h>
diff --git a/drivers/infiniband/sw/rdmavt/mcast.h b/drivers/infiniband/sw/rdmavt/mcast.h
index 29f579267608..b96d86f9625b 100644
--- a/drivers/infiniband/sw/rdmavt/mcast.h
+++ b/drivers/infiniband/sw/rdmavt/mcast.h
@@ -1,53 +1,11 @@
-#ifndef DEF_RVTMCAST_H
-#define DEF_RVTMCAST_H
-
+/* SPDX-License-Identifier: GPL-2.0 or BSD-3-Clause */
 /*
  * Copyright(c) 2016 Intel Corporation.
- *
- * This file is provided under a dual BSD/GPLv2 license.  When using or
- * redistributing this file, you may do so under either license.
- *
- * GPL LICENSE SUMMARY
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of version 2 of the GNU General Public License as
- * published by the Free Software Foundation.
- *
- * This program is distributed in the hope that it will be useful, but
- * WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
- * General Public License for more details.
- *
- * BSD LICENSE
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions
- * are met:
- *
- *  - Redistributions of source code must retain the above copyright
- *    notice, this list of conditions and the following disclaimer.
- *  - Redistributions in binary form must reproduce the above copyright
- *    notice, this list of conditions and the following disclaimer in
- *    the documentation and/or other materials provided with the
- *    distribution.
- *  - Neither the name of Intel Corporation nor the names of its
- *    contributors may be used to endorse or promote products derived
- *    from this software without specific prior written permission.
- *
- * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
- * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
- * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
- * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
- * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
- * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
- * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
- * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
- * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
- * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
- * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
- *
  */
 
+#ifndef DEF_RVTMCAST_H
+#define DEF_RVTMCAST_H
+
 #include <rdma/rdma_vt.h>
 
 void rvt_driver_mcast_init(struct rvt_dev_info *rdi);
diff --git a/drivers/infiniband/sw/rdmavt/mmap.c b/drivers/infiniband/sw/rdmavt/mmap.c
index f5d0e33cf3d7..4d2238f3f3c8 100644
--- a/drivers/infiniband/sw/rdmavt/mmap.c
+++ b/drivers/infiniband/sw/rdmavt/mmap.c
@@ -1,48 +1,6 @@
+// SPDX-License-Identifier: GPL-2.0 or BSD-3-Clause
 /*
  * Copyright(c) 2016 Intel Corporation.
- *
- * This file is provided under a dual BSD/GPLv2 license.  When using or
- * redistributing this file, you may do so under either license.
- *
- * GPL LICENSE SUMMARY
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of version 2 of the GNU General Public License as
- * published by the Free Software Foundation.
- *
- * This program is distributed in the hope that it will be useful, but
- * WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
- * General Public License for more details.
- *
- * BSD LICENSE
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions
- * are met:
- *
- *  - Redistributions of source code must retain the above copyright
- *    notice, this list of conditions and the following disclaimer.
- *  - Redistributions in binary form must reproduce the above copyright
- *    notice, this list of conditions and the following disclaimer in
- *    the documentation and/or other materials provided with the
- *    distribution.
- *  - Neither the name of Intel Corporation nor the names of its
- *    contributors may be used to endorse or promote products derived
- *    from this software without specific prior written permission.
- *
- * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
- * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
- * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
- * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
- * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
- * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
- * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
- * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
- * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
- * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
- * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
- *
  */
 
 #include <linux/slab.h>
diff --git a/drivers/infiniband/sw/rdmavt/mmap.h b/drivers/infiniband/sw/rdmavt/mmap.h
index 02466c40bc1e..7e92cf28e071 100644
--- a/drivers/infiniband/sw/rdmavt/mmap.h
+++ b/drivers/infiniband/sw/rdmavt/mmap.h
@@ -1,53 +1,11 @@
-#ifndef DEF_RDMAVTMMAP_H
-#define DEF_RDMAVTMMAP_H
-
+/* SPDX-License-Identifier: GPL-2.0 or BSD-3-Clause */
 /*
  * Copyright(c) 2016 Intel Corporation.
- *
- * This file is provided under a dual BSD/GPLv2 license.  When using or
- * redistributing this file, you may do so under either license.
- *
- * GPL LICENSE SUMMARY
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of version 2 of the GNU General Public License as
- * published by the Free Software Foundation.
- *
- * This program is distributed in the hope that it will be useful, but
- * WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
- * General Public License for more details.
- *
- * BSD LICENSE
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions
- * are met:
- *
- *  - Redistributions of source code must retain the above copyright
- *    notice, this list of conditions and the following disclaimer.
- *  - Redistributions in binary form must reproduce the above copyright
- *    notice, this list of conditions and the following disclaimer in
- *    the documentation and/or other materials provided with the
- *    distribution.
- *  - Neither the name of Intel Corporation nor the names of its
- *    contributors may be used to endorse or promote products derived
- *    from this software without specific prior written permission.
- *
- * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
- * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
- * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
- * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
- * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
- * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
- * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
- * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
- * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
- * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
- * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
- *
  */
 
+#ifndef DEF_RDMAVTMMAP_H
+#define DEF_RDMAVTMMAP_H
+
 #include <rdma/rdma_vt.h>
 
 void rvt_mmap_init(struct rvt_dev_info *rdi);
diff --git a/drivers/infiniband/sw/rdmavt/mr.c b/drivers/infiniband/sw/rdmavt/mr.c
index 34b7af6ab9c2..8a1f2e285180 100644
--- a/drivers/infiniband/sw/rdmavt/mr.c
+++ b/drivers/infiniband/sw/rdmavt/mr.c
@@ -1,48 +1,6 @@
+// SPDX-License-Identifier: GPL-2.0 or BSD-3-Clause
 /*
  * Copyright(c) 2016 Intel Corporation.
- *
- * This file is provided under a dual BSD/GPLv2 license.  When using or
- * redistributing this file, you may do so under either license.
- *
- * GPL LICENSE SUMMARY
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of version 2 of the GNU General Public License as
- * published by the Free Software Foundation.
- *
- * This program is distributed in the hope that it will be useful, but
- * WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
- * General Public License for more details.
- *
- * BSD LICENSE
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions
- * are met:
- *
- *  - Redistributions of source code must retain the above copyright
- *    notice, this list of conditions and the following disclaimer.
- *  - Redistributions in binary form must reproduce the above copyright
- *    notice, this list of conditions and the following disclaimer in
- *    the documentation and/or other materials provided with the
- *    distribution.
- *  - Neither the name of Intel Corporation nor the names of its
- *    contributors may be used to endorse or promote products derived
- *    from this software without specific prior written permission.
- *
- * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
- * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
- * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
- * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
- * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
- * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
- * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
- * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
- * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
- * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
- * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
- *
  */
 
 #include <linux/slab.h>
@@ -410,7 +368,7 @@ struct ib_mr *rvt_reg_user_mr(struct ib_pd *pd, u64 start, u64 length,
 	mr->mr.page_shift = PAGE_SHIFT;
 	m = 0;
 	n = 0;
-	for_each_sg_page (umem->sg_head.sgl, &sg_iter, umem->nmap, 0) {
+	for_each_sgtable_page (&umem->sgt_append.sgt, &sg_iter, 0) {
 		void *vaddr;
 
 		vaddr = page_address(sg_page_iter_page(&sg_iter));
diff --git a/drivers/infiniband/sw/rdmavt/mr.h b/drivers/infiniband/sw/rdmavt/mr.h
index b3aba359401b..d17f1400b5f6 100644
--- a/drivers/infiniband/sw/rdmavt/mr.h
+++ b/drivers/infiniband/sw/rdmavt/mr.h
@@ -1,53 +1,11 @@
-#ifndef DEF_RVTMR_H
-#define DEF_RVTMR_H
-
+/* SPDX-License-Identifier: GPL-2.0 or BSD-3-Clause */
 /*
  * Copyright(c) 2016 Intel Corporation.
- *
- * This file is provided under a dual BSD/GPLv2 license.  When using or
- * redistributing this file, you may do so under either license.
- *
- * GPL LICENSE SUMMARY
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of version 2 of the GNU General Public License as
- * published by the Free Software Foundation.
- *
- * This program is distributed in the hope that it will be useful, but
- * WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
- * General Public License for more details.
- *
- * BSD LICENSE
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions
- * are met:
- *
- *  - Redistributions of source code must retain the above copyright
- *    notice, this list of conditions and the following disclaimer.
- *  - Redistributions in binary form must reproduce the above copyright
- *    notice, this list of conditions and the following disclaimer in
- *    the documentation and/or other materials provided with the
- *    distribution.
- *  - Neither the name of Intel Corporation nor the names of its
- *    contributors may be used to endorse or promote products derived
- *    from this software without specific prior written permission.
- *
- * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
- * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
- * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
- * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
- * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
- * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
- * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
- * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
- * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
- * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
- * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
- *
  */
 
+#ifndef DEF_RVTMR_H
+#define DEF_RVTMR_H
+
 #include <rdma/rdma_vt.h>
 
 struct rvt_mr {
diff --git a/drivers/infiniband/sw/rdmavt/pd.c b/drivers/infiniband/sw/rdmavt/pd.c
index 01b7abf91520..ae62071969fa 100644
--- a/drivers/infiniband/sw/rdmavt/pd.c
+++ b/drivers/infiniband/sw/rdmavt/pd.c
@@ -1,48 +1,6 @@
+// SPDX-License-Identifier: GPL-2.0 or BSD-3-Clause
 /*
  * Copyright(c) 2016 Intel Corporation.
- *
- * This file is provided under a dual BSD/GPLv2 license.  When using or
- * redistributing this file, you may do so under either license.
- *
- * GPL LICENSE SUMMARY
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of version 2 of the GNU General Public License as
- * published by the Free Software Foundation.
- *
- * This program is distributed in the hope that it will be useful, but
- * WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
- * General Public License for more details.
- *
- * BSD LICENSE
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions
- * are met:
- *
- *  - Redistributions of source code must retain the above copyright
- *    notice, this list of conditions and the following disclaimer.
- *  - Redistributions in binary form must reproduce the above copyright
- *    notice, this list of conditions and the following disclaimer in
- *    the documentation and/or other materials provided with the
- *    distribution.
- *  - Neither the name of Intel Corporation nor the names of its
- *    contributors may be used to endorse or promote products derived
- *    from this software without specific prior written permission.
- *
- * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
- * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
- * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
- * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
- * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
- * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
- * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
- * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
- * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
- * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
- * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
- *
  */
 
 #include <linux/slab.h>
diff --git a/drivers/infiniband/sw/rdmavt/pd.h b/drivers/infiniband/sw/rdmavt/pd.h
index 06a6a38beedc..42a0ef3b7da3 100644
--- a/drivers/infiniband/sw/rdmavt/pd.h
+++ b/drivers/infiniband/sw/rdmavt/pd.h
@@ -1,53 +1,11 @@
-#ifndef DEF_RDMAVTPD_H
-#define DEF_RDMAVTPD_H
-
+/* SPDX-License-Identifier: GPL-2.0 or BSD-3-Clause */
 /*
  * Copyright(c) 2016 Intel Corporation.
- *
- * This file is provided under a dual BSD/GPLv2 license.  When using or
- * redistributing this file, you may do so under either license.
- *
- * GPL LICENSE SUMMARY
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of version 2 of the GNU General Public License as
- * published by the Free Software Foundation.
- *
- * This program is distributed in the hope that it will be useful, but
- * WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
- * General Public License for more details.
- *
- * BSD LICENSE
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions
- * are met:
- *
- *  - Redistributions of source code must retain the above copyright
- *    notice, this list of conditions and the following disclaimer.
- *  - Redistributions in binary form must reproduce the above copyright
- *    notice, this list of conditions and the following disclaimer in
- *    the documentation and/or other materials provided with the
- *    distribution.
- *  - Neither the name of Intel Corporation nor the names of its
- *    contributors may be used to endorse or promote products derived
- *    from this software without specific prior written permission.
- *
- * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
- * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
- * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
- * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
- * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
- * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
- * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
- * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
- * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
- * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
- * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
- *
  */
 
+#ifndef DEF_RDMAVTPD_H
+#define DEF_RDMAVTPD_H
+
 #include <rdma/rdma_vt.h>
 
 int rvt_alloc_pd(struct ib_pd *pd, struct ib_udata *udata);
diff --git a/drivers/infiniband/sw/rdmavt/qp.c b/drivers/infiniband/sw/rdmavt/qp.c
index e9f3d356b361..49bdd78ac664 100644
--- a/drivers/infiniband/sw/rdmavt/qp.c
+++ b/drivers/infiniband/sw/rdmavt/qp.c
@@ -1,48 +1,6 @@
+// SPDX-License-Identifier: GPL-2.0 or BSD-3-Clause
 /*
  * Copyright(c) 2016 - 2020 Intel Corporation.
- *
- * This file is provided under a dual BSD/GPLv2 license.  When using or
- * redistributing this file, you may do so under either license.
- *
- * GPL LICENSE SUMMARY
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of version 2 of the GNU General Public License as
- * published by the Free Software Foundation.
- *
- * This program is distributed in the hope that it will be useful, but
- * WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
- * General Public License for more details.
- *
- * BSD LICENSE
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions
- * are met:
- *
- *  - Redistributions of source code must retain the above copyright
- *    notice, this list of conditions and the following disclaimer.
- *  - Redistributions in binary form must reproduce the above copyright
- *    notice, this list of conditions and the following disclaimer in
- *    the documentation and/or other materials provided with the
- *    distribution.
- *  - Neither the name of Intel Corporation nor the names of its
- *    contributors may be used to endorse or promote products derived
- *    from this software without specific prior written permission.
- *
- * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
- * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
- * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
- * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
- * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
- * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
- * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
- * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
- * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
- * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
- * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
- *
  */
 
 #include <linux/hash.h>
@@ -1058,7 +1016,7 @@ static int alloc_ud_wq_attr(struct rvt_qp *qp, int node)
 
 /**
  * rvt_create_qp - create a queue pair for a device
- * @ibpd: the protection domain who's device we create the queue pair for
+ * @ibqp: the queue pair
  * @init_attr: the attributes of the queue pair
  * @udata: user data for libibverbs.so
  *
@@ -1066,47 +1024,45 @@ static int alloc_ud_wq_attr(struct rvt_qp *qp, int node)
  * unique idea of what queue pair numbers mean. For instance there is a reserved
  * range for PSM.
  *
- * Return: the queue pair on success, otherwise returns an errno.
+ * Return: 0 on success, otherwise returns an errno.
  *
  * Called by the ib_create_qp() core verbs function.
  */
-struct ib_qp *rvt_create_qp(struct ib_pd *ibpd,
-			    struct ib_qp_init_attr *init_attr,
-			    struct ib_udata *udata)
+int rvt_create_qp(struct ib_qp *ibqp, struct ib_qp_init_attr *init_attr,
+		  struct ib_udata *udata)
 {
-	struct rvt_qp *qp;
-	int err;
+	struct rvt_qp *qp = ibqp_to_rvtqp(ibqp);
+	int ret = -ENOMEM;
 	struct rvt_swqe *swq = NULL;
 	size_t sz;
-	size_t sg_list_sz;
-	struct ib_qp *ret = ERR_PTR(-ENOMEM);
-	struct rvt_dev_info *rdi = ib_to_rvt(ibpd->device);
+	size_t sg_list_sz = 0;
+	struct rvt_dev_info *rdi = ib_to_rvt(ibqp->device);
 	void *priv = NULL;
 	size_t sqsize;
 	u8 exclude_prefix = 0;
 
 	if (!rdi)
-		return ERR_PTR(-EINVAL);
+		return -EINVAL;
 
 	if (init_attr->create_flags & ~IB_QP_CREATE_NETDEV_USE)
-		return ERR_PTR(-EOPNOTSUPP);
+		return -EOPNOTSUPP;
 
 	if (init_attr->cap.max_send_sge > rdi->dparms.props.max_send_sge ||
 	    init_attr->cap.max_send_wr > rdi->dparms.props.max_qp_wr)
-		return ERR_PTR(-EINVAL);
+		return -EINVAL;
 
 	/* Check receive queue parameters if no SRQ is specified. */
 	if (!init_attr->srq) {
 		if (init_attr->cap.max_recv_sge >
 		    rdi->dparms.props.max_recv_sge ||
 		    init_attr->cap.max_recv_wr > rdi->dparms.props.max_qp_wr)
-			return ERR_PTR(-EINVAL);
+			return -EINVAL;
 
 		if (init_attr->cap.max_send_sge +
 		    init_attr->cap.max_send_wr +
 		    init_attr->cap.max_recv_sge +
 		    init_attr->cap.max_recv_wr == 0)
-			return ERR_PTR(-EINVAL);
+			return -EINVAL;
 	}
 	sqsize =
 		init_attr->cap.max_send_wr + 1 +
@@ -1115,8 +1071,8 @@ struct ib_qp *rvt_create_qp(struct ib_pd *ibpd,
 	case IB_QPT_SMI:
 	case IB_QPT_GSI:
 		if (init_attr->port_num == 0 ||
-		    init_attr->port_num > ibpd->device->phys_port_cnt)
-			return ERR_PTR(-EINVAL);
+		    init_attr->port_num > ibqp->device->phys_port_cnt)
+			return -EINVAL;
 		fallthrough;
 	case IB_QPT_UC:
 	case IB_QPT_RC:
@@ -1124,10 +1080,8 @@ struct ib_qp *rvt_create_qp(struct ib_pd *ibpd,
 		sz = struct_size(swq, sg_list, init_attr->cap.max_send_sge);
 		swq = vzalloc_node(array_size(sz, sqsize), rdi->dparms.node);
 		if (!swq)
-			return ERR_PTR(-ENOMEM);
+			return -ENOMEM;
 
-		sz = sizeof(*qp);
-		sg_list_sz = 0;
 		if (init_attr->srq) {
 			struct rvt_srq *srq = ibsrq_to_rvtsrq(init_attr->srq);
 
@@ -1137,10 +1091,10 @@ struct ib_qp *rvt_create_qp(struct ib_pd *ibpd,
 		} else if (init_attr->cap.max_recv_sge > 1)
 			sg_list_sz = sizeof(*qp->r_sg_list) *
 				(init_attr->cap.max_recv_sge - 1);
-		qp = kzalloc_node(sz + sg_list_sz, GFP_KERNEL,
-				  rdi->dparms.node);
-		if (!qp)
-			goto bail_swq;
+		qp->r_sg_list =
+			kzalloc_node(sg_list_sz, GFP_KERNEL, rdi->dparms.node);
+		if (!qp->r_sg_list)
+			goto bail_qp;
 		qp->allowed_ops = get_allowed_ops(init_attr->qp_type);
 
 		RCU_INIT_POINTER(qp->next, NULL);
@@ -1165,7 +1119,7 @@ struct ib_qp *rvt_create_qp(struct ib_pd *ibpd,
 		 */
 		priv = rdi->driver_f.qp_priv_alloc(rdi, qp);
 		if (IS_ERR(priv)) {
-			ret = priv;
+			ret = PTR_ERR(priv);
 			goto bail_qp;
 		}
 		qp->priv = priv;
@@ -1179,12 +1133,10 @@ struct ib_qp *rvt_create_qp(struct ib_pd *ibpd,
 			qp->r_rq.max_sge = init_attr->cap.max_recv_sge;
 			sz = (sizeof(struct ib_sge) * qp->r_rq.max_sge) +
 				sizeof(struct rvt_rwqe);
-			err = rvt_alloc_rq(&qp->r_rq, qp->r_rq.size * sz,
+			ret = rvt_alloc_rq(&qp->r_rq, qp->r_rq.size * sz,
 					   rdi->dparms.node, udata);
-			if (err) {
-				ret = ERR_PTR(err);
+			if (ret)
 				goto bail_driver_priv;
-			}
 		}
 
 		/*
@@ -1205,40 +1157,35 @@ struct ib_qp *rvt_create_qp(struct ib_pd *ibpd,
 		qp->s_max_sge = init_attr->cap.max_send_sge;
 		if (init_attr->sq_sig_type == IB_SIGNAL_REQ_WR)
 			qp->s_flags = RVT_S_SIGNAL_REQ_WR;
-		err = alloc_ud_wq_attr(qp, rdi->dparms.node);
-		if (err) {
-			ret = (ERR_PTR(err));
+		ret = alloc_ud_wq_attr(qp, rdi->dparms.node);
+		if (ret)
 			goto bail_rq_rvt;
-		}
 
 		if (init_attr->create_flags & IB_QP_CREATE_NETDEV_USE)
 			exclude_prefix = RVT_AIP_QP_PREFIX;
 
-		err = alloc_qpn(rdi, &rdi->qp_dev->qpn_table,
+		ret = alloc_qpn(rdi, &rdi->qp_dev->qpn_table,
 				init_attr->qp_type,
 				init_attr->port_num,
 				exclude_prefix);
-		if (err < 0) {
-			ret = ERR_PTR(err);
+		if (ret < 0)
 			goto bail_rq_wq;
-		}
-		qp->ibqp.qp_num = err;
+
+		qp->ibqp.qp_num = ret;
 		if (init_attr->create_flags & IB_QP_CREATE_NETDEV_USE)
 			qp->ibqp.qp_num |= RVT_AIP_QP_BASE;
 		qp->port_num = init_attr->port_num;
 		rvt_init_qp(rdi, qp, init_attr->qp_type);
 		if (rdi->driver_f.qp_priv_init) {
-			err = rdi->driver_f.qp_priv_init(rdi, qp, init_attr);
-			if (err) {
-				ret = ERR_PTR(err);
+			ret = rdi->driver_f.qp_priv_init(rdi, qp, init_attr);
+			if (ret)
 				goto bail_rq_wq;
-			}
 		}
 		break;
 
 	default:
 		/* Don't support raw QPs */
-		return ERR_PTR(-EOPNOTSUPP);
+		return -EOPNOTSUPP;
 	}
 
 	init_attr->cap.max_inline_data = 0;
@@ -1251,28 +1198,24 @@ struct ib_qp *rvt_create_qp(struct ib_pd *ibpd,
 		if (!qp->r_rq.wq) {
 			__u64 offset = 0;
 
-			err = ib_copy_to_udata(udata, &offset,
+			ret = ib_copy_to_udata(udata, &offset,
 					       sizeof(offset));
-			if (err) {
-				ret = ERR_PTR(err);
+			if (ret)
 				goto bail_qpn;
-			}
 		} else {
 			u32 s = sizeof(struct rvt_rwq) + qp->r_rq.size * sz;
 
 			qp->ip = rvt_create_mmap_info(rdi, s, udata,
 						      qp->r_rq.wq);
 			if (IS_ERR(qp->ip)) {
-				ret = ERR_CAST(qp->ip);
+				ret = PTR_ERR(qp->ip);
 				goto bail_qpn;
 			}
 
-			err = ib_copy_to_udata(udata, &qp->ip->offset,
+			ret = ib_copy_to_udata(udata, &qp->ip->offset,
 					       sizeof(qp->ip->offset));
-			if (err) {
-				ret = ERR_PTR(err);
+			if (ret)
 				goto bail_ip;
-			}
 		}
 		qp->pid = current->pid;
 	}
@@ -1280,7 +1223,7 @@ struct ib_qp *rvt_create_qp(struct ib_pd *ibpd,
 	spin_lock(&rdi->n_qps_lock);
 	if (rdi->n_qps_allocated == rdi->dparms.props.max_qp) {
 		spin_unlock(&rdi->n_qps_lock);
-		ret = ERR_PTR(-ENOMEM);
+		ret = ENOMEM;
 		goto bail_ip;
 	}
 
@@ -1306,9 +1249,7 @@ struct ib_qp *rvt_create_qp(struct ib_pd *ibpd,
 		spin_unlock_irq(&rdi->pending_lock);
 	}
 
-	ret = &qp->ibqp;
-
-	return ret;
+	return 0;
 
 bail_ip:
 	if (qp->ip)
@@ -1328,11 +1269,8 @@ bail_driver_priv:
 
 bail_qp:
 	kfree(qp->s_ack_queue);
-	kfree(qp);
-
-bail_swq:
+	kfree(qp->r_sg_list);
 	vfree(swq);
-
 	return ret;
 }
 
@@ -1762,11 +1700,11 @@ int rvt_destroy_qp(struct ib_qp *ibqp, struct ib_udata *udata)
 	kvfree(qp->r_rq.kwq);
 	rdi->driver_f.qp_priv_free(rdi, qp);
 	kfree(qp->s_ack_queue);
+	kfree(qp->r_sg_list);
 	rdma_destroy_ah_attr(&qp->remote_ah_attr);
 	rdma_destroy_ah_attr(&qp->alt_ah_attr);
 	free_ud_wq_attr(qp);
 	vfree(qp->s_wq);
-	kfree(qp);
 	return 0;
 }
 
diff --git a/drivers/infiniband/sw/rdmavt/qp.h b/drivers/infiniband/sw/rdmavt/qp.h
index 2cdba1283bf6..bd04be80723c 100644
--- a/drivers/infiniband/sw/rdmavt/qp.h
+++ b/drivers/infiniband/sw/rdmavt/qp.h
@@ -1,60 +1,17 @@
-#ifndef DEF_RVTQP_H
-#define DEF_RVTQP_H
-
+/* SPDX-License-Identifier: GPL-2.0 or BSD-3-Clause */
 /*
  * Copyright(c) 2016 Intel Corporation.
- *
- * This file is provided under a dual BSD/GPLv2 license.  When using or
- * redistributing this file, you may do so under either license.
- *
- * GPL LICENSE SUMMARY
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of version 2 of the GNU General Public License as
- * published by the Free Software Foundation.
- *
- * This program is distributed in the hope that it will be useful, but
- * WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
- * General Public License for more details.
- *
- * BSD LICENSE
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions
- * are met:
- *
- *  - Redistributions of source code must retain the above copyright
- *    notice, this list of conditions and the following disclaimer.
- *  - Redistributions in binary form must reproduce the above copyright
- *    notice, this list of conditions and the following disclaimer in
- *    the documentation and/or other materials provided with the
- *    distribution.
- *  - Neither the name of Intel Corporation nor the names of its
- *    contributors may be used to endorse or promote products derived
- *    from this software without specific prior written permission.
- *
- * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
- * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
- * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
- * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
- * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
- * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
- * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
- * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
- * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
- * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
- * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
- *
  */
 
+#ifndef DEF_RVTQP_H
+#define DEF_RVTQP_H
+
 #include <rdma/rdmavt_qp.h>
 
 int rvt_driver_qp_init(struct rvt_dev_info *rdi);
 void rvt_qp_exit(struct rvt_dev_info *rdi);
-struct ib_qp *rvt_create_qp(struct ib_pd *ibpd,
-			    struct ib_qp_init_attr *init_attr,
-			    struct ib_udata *udata);
+int rvt_create_qp(struct ib_qp *ibqp, struct ib_qp_init_attr *init_attr,
+		  struct ib_udata *udata);
 int rvt_modify_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr,
 		  int attr_mask, struct ib_udata *udata);
 int rvt_destroy_qp(struct ib_qp *ibqp, struct ib_udata *udata);
diff --git a/drivers/infiniband/sw/rdmavt/rc.c b/drivers/infiniband/sw/rdmavt/rc.c
index c58735f4c94a..4e5d4a27633c 100644
--- a/drivers/infiniband/sw/rdmavt/rc.c
+++ b/drivers/infiniband/sw/rdmavt/rc.c
@@ -1,48 +1,6 @@
+// SPDX-License-Identifier: GPL-2.0 or BSD-3-Clause
 /*
  * Copyright(c) 2016 Intel Corporation.
- *
- * This file is provided under a dual BSD/GPLv2 license.  When using or
- * redistributing this file, you may do so under either license.
- *
- * GPL LICENSE SUMMARY
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of version 2 of the GNU General Public License as
- * published by the Free Software Foundation.
- *
- * This program is distributed in the hope that it will be useful, but
- * WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
- * General Public License for more details.
- *
- * BSD LICENSE
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions
- * are met:
- *
- *  - Redistributions of source code must retain the above copyright
- *    notice, this list of conditions and the following disclaimer.
- *  - Redistributions in binary form must reproduce the above copyright
- *    notice, this list of conditions and the following disclaimer in
- *    the documentation and/or other materials provided with the
- *    distribution.
- *  - Neither the name of Intel Corporation nor the names of its
- *    contributors may be used to endorse or promote products derived
- *    from this software without specific prior written permission.
- *
- * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
- * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
- * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
- * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
- * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
- * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
- * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
- * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
- * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
- * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
- * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
- *
  */
 
 #include <rdma/rdmavt_qp.h>
diff --git a/drivers/infiniband/sw/rdmavt/srq.c b/drivers/infiniband/sw/rdmavt/srq.c
index 2a7c2f12d372..14d196bde2a1 100644
--- a/drivers/infiniband/sw/rdmavt/srq.c
+++ b/drivers/infiniband/sw/rdmavt/srq.c
@@ -1,48 +1,6 @@
+// SPDX-License-Identifier: GPL-2.0 or BSD-3-Clause
 /*
  * Copyright(c) 2016 Intel Corporation.
- *
- * This file is provided under a dual BSD/GPLv2 license.  When using or
- * redistributing this file, you may do so under either license.
- *
- * GPL LICENSE SUMMARY
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of version 2 of the GNU General Public License as
- * published by the Free Software Foundation.
- *
- * This program is distributed in the hope that it will be useful, but
- * WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
- * General Public License for more details.
- *
- * BSD LICENSE
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions
- * are met:
- *
- *  - Redistributions of source code must retain the above copyright
- *    notice, this list of conditions and the following disclaimer.
- *  - Redistributions in binary form must reproduce the above copyright
- *    notice, this list of conditions and the following disclaimer in
- *    the documentation and/or other materials provided with the
- *    distribution.
- *  - Neither the name of Intel Corporation nor the names of its
- *    contributors may be used to endorse or promote products derived
- *    from this software without specific prior written permission.
- *
- * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
- * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
- * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
- * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
- * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
- * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
- * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
- * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
- * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
- * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
- * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
- *
  */
 
 #include <linux/err.h>
diff --git a/drivers/infiniband/sw/rdmavt/srq.h b/drivers/infiniband/sw/rdmavt/srq.h
index d5a1a053b1b9..7d17372cd269 100644
--- a/drivers/infiniband/sw/rdmavt/srq.h
+++ b/drivers/infiniband/sw/rdmavt/srq.h
@@ -1,53 +1,11 @@
-#ifndef DEF_RVTSRQ_H
-#define DEF_RVTSRQ_H
-
+/* SPDX-License-Identifier: GPL-2.0 or BSD-3-Clause */
 /*
  * Copyright(c) 2016 Intel Corporation.
- *
- * This file is provided under a dual BSD/GPLv2 license.  When using or
- * redistributing this file, you may do so under either license.
- *
- * GPL LICENSE SUMMARY
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of version 2 of the GNU General Public License as
- * published by the Free Software Foundation.
- *
- * This program is distributed in the hope that it will be useful, but
- * WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
- * General Public License for more details.
- *
- * BSD LICENSE
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions
- * are met:
- *
- *  - Redistributions of source code must retain the above copyright
- *    notice, this list of conditions and the following disclaimer.
- *  - Redistributions in binary form must reproduce the above copyright
- *    notice, this list of conditions and the following disclaimer in
- *    the documentation and/or other materials provided with the
- *    distribution.
- *  - Neither the name of Intel Corporation nor the names of its
- *    contributors may be used to endorse or promote products derived
- *    from this software without specific prior written permission.
- *
- * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
- * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
- * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
- * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
- * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
- * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
- * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
- * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
- * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
- * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
- * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
- *
  */
 
+#ifndef DEF_RVTSRQ_H
+#define DEF_RVTSRQ_H
+
 #include <rdma/rdma_vt.h>
 void rvt_driver_srq_init(struct rvt_dev_info *rdi);
 int rvt_create_srq(struct ib_srq *ibsrq, struct ib_srq_init_attr *srq_init_attr,
diff --git a/drivers/infiniband/sw/rdmavt/trace.c b/drivers/infiniband/sw/rdmavt/trace.c
index d593285a349c..01704b8dd683 100644
--- a/drivers/infiniband/sw/rdmavt/trace.c
+++ b/drivers/infiniband/sw/rdmavt/trace.c
@@ -1,48 +1,6 @@
+// SPDX-License-Identifier: GPL-2.0 or BSD-3-Clause
 /*
  * Copyright(c) 2016 Intel Corporation.
- *
- * This file is provided under a dual BSD/GPLv2 license.  When using or
- * redistributing this file, you may do so under either license.
- *
- * GPL LICENSE SUMMARY
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of version 2 of the GNU General Public License as
- * published by the Free Software Foundation.
- *
- * This program is distributed in the hope that it will be useful, but
- * WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
- * General Public License for more details.
- *
- * BSD LICENSE
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions
- * are met:
- *
- *  - Redistributions of source code must retain the above copyright
- *    notice, this list of conditions and the following disclaimer.
- *  - Redistributions in binary form must reproduce the above copyright
- *    notice, this list of conditions and the following disclaimer in
- *    the documentation and/or other materials provided with the
- *    distribution.
- *  - Neither the name of Intel Corporation nor the names of its
- *    contributors may be used to endorse or promote products derived
- *    from this software without specific prior written permission.
- *
- * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
- * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
- * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
- * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
- * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
- * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
- * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
- * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
- * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
- * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
- * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
- *
  */
 
 #define CREATE_TRACE_POINTS
diff --git a/drivers/infiniband/sw/rdmavt/trace.h b/drivers/infiniband/sw/rdmavt/trace.h
index 36ddbd291ee0..30eb4a72ea7d 100644
--- a/drivers/infiniband/sw/rdmavt/trace.h
+++ b/drivers/infiniband/sw/rdmavt/trace.h
@@ -1,48 +1,6 @@
+/* SPDX-License-Identifier: GPL-2.0 or BSD-3-Clause */
 /*
  * Copyright(c) 2016, 2017 Intel Corporation.
- *
- * This file is provided under a dual BSD/GPLv2 license.  When using or
- * redistributing this file, you may do so under either license.
- *
- * GPL LICENSE SUMMARY
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of version 2 of the GNU General Public License as
- * published by the Free Software Foundation.
- *
- * This program is distributed in the hope that it will be useful, but
- * WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
- * General Public License for more details.
- *
- * BSD LICENSE
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions
- * are met:
- *
- *  - Redistributions of source code must retain the above copyright
- *    notice, this list of conditions and the following disclaimer.
- *  - Redistributions in binary form must reproduce the above copyright
- *    notice, this list of conditions and the following disclaimer in
- *    the documentation and/or other materials provided with the
- *    distribution.
- *  - Neither the name of Intel Corporation nor the names of its
- *    contributors may be used to endorse or promote products derived
- *    from this software without specific prior written permission.
- *
- * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
- * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
- * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
- * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
- * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
- * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
- * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
- * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
- * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
- * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
- * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
- *
  */
 
 #define RDI_DEV_ENTRY(rdi)   __string(dev, rvt_get_ibdev_name(rdi))
diff --git a/drivers/infiniband/sw/rdmavt/trace_cq.h b/drivers/infiniband/sw/rdmavt/trace_cq.h
index 91bc192cee5e..30dd1d9bae26 100644
--- a/drivers/infiniband/sw/rdmavt/trace_cq.h
+++ b/drivers/infiniband/sw/rdmavt/trace_cq.h
@@ -1,48 +1,6 @@
+/* SPDX-License-Identifier: GPL-2.0 or BSD-3-Clause */
 /*
  * Copyright(c) 2016 - 2018 Intel Corporation.
- *
- * This file is provided under a dual BSD/GPLv2 license.  When using or
- * redistributing this file, you may do so under either license.
- *
- * GPL LICENSE SUMMARY
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of version 2 of the GNU General Public License as
- * published by the Free Software Foundation.
- *
- * This program is distributed in the hope that it will be useful, but
- * WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
- * General Public License for more details.
- *
- * BSD LICENSE
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions
- * are met:
- *
- *  - Redistributions of source code must retain the above copyright
- *    notice, this list of conditions and the following disclaimer.
- *  - Redistributions in binary form must reproduce the above copyright
- *    notice, this list of conditions and the following disclaimer in
- *    the documentation and/or other materials provided with the
- *    distribution.
- *  - Neither the name of Intel Corporation nor the names of its
- *    contributors may be used to endorse or promote products derived
- *    from this software without specific prior written permission.
- *
- * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
- * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
- * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
- * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
- * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
- * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
- * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
- * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
- * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
- * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
- * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
- *
  */
 #if !defined(__RVT_TRACE_CQ_H) || defined(TRACE_HEADER_MULTI_READ)
 #define __RVT_TRACE_CQ_H
diff --git a/drivers/infiniband/sw/rdmavt/trace_mr.h b/drivers/infiniband/sw/rdmavt/trace_mr.h
index c5b675ca4fa0..1de7012000cb 100644
--- a/drivers/infiniband/sw/rdmavt/trace_mr.h
+++ b/drivers/infiniband/sw/rdmavt/trace_mr.h
@@ -1,48 +1,6 @@
+/* SPDX-License-Identifier: GPL-2.0 or BSD-3-Clause */
 /*
  * Copyright(c) 2016 Intel Corporation.
- *
- * This file is provided under a dual BSD/GPLv2 license.  When using or
- * redistributing this file, you may do so under either license.
- *
- * GPL LICENSE SUMMARY
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of version 2 of the GNU General Public License as
- * published by the Free Software Foundation.
- *
- * This program is distributed in the hope that it will be useful, but
- * WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
- * General Public License for more details.
- *
- * BSD LICENSE
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions
- * are met:
- *
- *  - Redistributions of source code must retain the above copyright
- *    notice, this list of conditions and the following disclaimer.
- *  - Redistributions in binary form must reproduce the above copyright
- *    notice, this list of conditions and the following disclaimer in
- *    the documentation and/or other materials provided with the
- *    distribution.
- *  - Neither the name of Intel Corporation nor the names of its
- *    contributors may be used to endorse or promote products derived
- *    from this software without specific prior written permission.
- *
- * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
- * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
- * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
- * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
- * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
- * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
- * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
- * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
- * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
- * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
- * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
- *
  */
 #if !defined(__RVT_TRACE_MR_H) || defined(TRACE_HEADER_MULTI_READ)
 #define __RVT_TRACE_MR_H
diff --git a/drivers/infiniband/sw/rdmavt/trace_qp.h b/drivers/infiniband/sw/rdmavt/trace_qp.h
index 800cec8bb3c7..c28c81fcb32a 100644
--- a/drivers/infiniband/sw/rdmavt/trace_qp.h
+++ b/drivers/infiniband/sw/rdmavt/trace_qp.h
@@ -1,48 +1,6 @@
+/* SPDX-License-Identifier: GPL-2.0 or BSD-3-Clause */
 /*
  * Copyright(c) 2016 Intel Corporation.
- *
- * This file is provided under a dual BSD/GPLv2 license.  When using or
- * redistributing this file, you may do so under either license.
- *
- * GPL LICENSE SUMMARY
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of version 2 of the GNU General Public License as
- * published by the Free Software Foundation.
- *
- * This program is distributed in the hope that it will be useful, but
- * WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
- * General Public License for more details.
- *
- * BSD LICENSE
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions
- * are met:
- *
- *  - Redistributions of source code must retain the above copyright
- *    notice, this list of conditions and the following disclaimer.
- *  - Redistributions in binary form must reproduce the above copyright
- *    notice, this list of conditions and the following disclaimer in
- *    the documentation and/or other materials provided with the
- *    distribution.
- *  - Neither the name of Intel Corporation nor the names of its
- *    contributors may be used to endorse or promote products derived
- *    from this software without specific prior written permission.
- *
- * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
- * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
- * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
- * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
- * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
- * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
- * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
- * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
- * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
- * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
- * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
- *
  */
 #if !defined(__RVT_TRACE_QP_H) || defined(TRACE_HEADER_MULTI_READ)
 #define __RVT_TRACE_QP_H
diff --git a/drivers/infiniband/sw/rdmavt/trace_rc.h b/drivers/infiniband/sw/rdmavt/trace_rc.h
index 9de52e138025..833bf778b05d 100644
--- a/drivers/infiniband/sw/rdmavt/trace_rc.h
+++ b/drivers/infiniband/sw/rdmavt/trace_rc.h
@@ -1,48 +1,6 @@
+/* SPDX-License-Identifier: GPL-2.0 or BSD-3-Clause */
 /*
  * Copyright(c) 2017 Intel Corporation.
- *
- * This file is provided under a dual BSD/GPLv2 license.  When using or
- * redistributing this file, you may do so under either license.
- *
- * GPL LICENSE SUMMARY
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of version 2 of the GNU General Public License as
- * published by the Free Software Foundation.
- *
- * This program is distributed in the hope that it will be useful, but
- * WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
- * General Public License for more details.
- *
- * BSD LICENSE
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions
- * are met:
- *
- *  - Redistributions of source code must retain the above copyright
- *    notice, this list of conditions and the following disclaimer.
- *  - Redistributions in binary form must reproduce the above copyright
- *    notice, this list of conditions and the following disclaimer in
- *    the documentation and/or other materials provided with the
- *    distribution.
- *  - Neither the name of Intel Corporation nor the names of its
- *    contributors may be used to endorse or promote products derived
- *    from this software without specific prior written permission.
- *
- * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
- * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
- * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
- * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
- * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
- * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
- * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
- * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
- * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
- * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
- * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
- *
  */
 #if !defined(__RVT_TRACE_RC_H) || defined(TRACE_HEADER_MULTI_READ)
 #define __RVT_TRACE_RC_H
diff --git a/drivers/infiniband/sw/rdmavt/trace_rvt.h b/drivers/infiniband/sw/rdmavt/trace_rvt.h
index 746f33461d9a..9df6b0b8263b 100644
--- a/drivers/infiniband/sw/rdmavt/trace_rvt.h
+++ b/drivers/infiniband/sw/rdmavt/trace_rvt.h
@@ -1,48 +1,6 @@
+/* SPDX-License-Identifier: GPL-2.0 or BSD-3-Clause */
 /*
  * Copyright(c) 2016 Intel Corporation.
- *
- * This file is provided under a dual BSD/GPLv2 license.  When using or
- * redistributing this file, you may do so under either license.
- *
- * GPL LICENSE SUMMARY
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of version 2 of the GNU General Public License as
- * published by the Free Software Foundation.
- *
- * This program is distributed in the hope that it will be useful, but
- * WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
- * General Public License for more details.
- *
- * BSD LICENSE
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions
- * are met:
- *
- *  - Redistributions of source code must retain the above copyright
- *    notice, this list of conditions and the following disclaimer.
- *  - Redistributions in binary form must reproduce the above copyright
- *    notice, this list of conditions and the following disclaimer in
- *    the documentation and/or other materials provided with the
- *    distribution.
- *  - Neither the name of Intel Corporation nor the names of its
- *    contributors may be used to endorse or promote products derived
- *    from this software without specific prior written permission.
- *
- * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
- * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
- * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
- * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
- * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
- * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
- * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
- * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
- * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
- * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
- * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
- *
  */
 #if !defined(__RVT_TRACE_RVT_H) || defined(TRACE_HEADER_MULTI_READ)
 #define __RVT_TRACE_RVT_H
diff --git a/drivers/infiniband/sw/rdmavt/trace_tx.h b/drivers/infiniband/sw/rdmavt/trace_tx.h
index cb96be0f8f19..ff7d39a30768 100644
--- a/drivers/infiniband/sw/rdmavt/trace_tx.h
+++ b/drivers/infiniband/sw/rdmavt/trace_tx.h
@@ -1,48 +1,6 @@
+/* SPDX-License-Identifier: GPL-2.0 or BSD-3-Clause */
 /*
  * Copyright(c) 2016 Intel Corporation.
- *
- * This file is provided under a dual BSD/GPLv2 license.  When using or
- * redistributing this file, you may do so under either license.
- *
- * GPL LICENSE SUMMARY
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of version 2 of the GNU General Public License as
- * published by the Free Software Foundation.
- *
- * This program is distributed in the hope that it will be useful, but
- * WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
- * General Public License for more details.
- *
- * BSD LICENSE
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions
- * are met:
- *
- *  - Redistributions of source code must retain the above copyright
- *    notice, this list of conditions and the following disclaimer.
- *  - Redistributions in binary form must reproduce the above copyright
- *    notice, this list of conditions and the following disclaimer in
- *    the documentation and/or other materials provided with the
- *    distribution.
- *  - Neither the name of Intel Corporation nor the names of its
- *    contributors may be used to endorse or promote products derived
- *    from this software without specific prior written permission.
- *
- * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
- * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
- * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
- * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
- * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
- * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
- * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
- * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
- * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
- * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
- * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
- *
  */
 #if !defined(__RVT_TRACE_TX_H) || defined(TRACE_HEADER_MULTI_READ)
 #define __RVT_TRACE_TX_H
diff --git a/drivers/infiniband/sw/rdmavt/vt.c b/drivers/infiniband/sw/rdmavt/vt.c
index ac17209816cd..59481ae39505 100644
--- a/drivers/infiniband/sw/rdmavt/vt.c
+++ b/drivers/infiniband/sw/rdmavt/vt.c
@@ -1,48 +1,6 @@
+// SPDX-License-Identifier: GPL-2.0 or BSD-3-Clause
 /*
  * Copyright(c) 2016 - 2018 Intel Corporation.
- *
- * This file is provided under a dual BSD/GPLv2 license.  When using or
- * redistributing this file, you may do so under either license.
- *
- * GPL LICENSE SUMMARY
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of version 2 of the GNU General Public License as
- * published by the Free Software Foundation.
- *
- * This program is distributed in the hope that it will be useful, but
- * WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
- * General Public License for more details.
- *
- * BSD LICENSE
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions
- * are met:
- *
- *  - Redistributions of source code must retain the above copyright
- *    notice, this list of conditions and the following disclaimer.
- *  - Redistributions in binary form must reproduce the above copyright
- *    notice, this list of conditions and the following disclaimer in
- *    the documentation and/or other materials provided with the
- *    distribution.
- *  - Neither the name of Intel Corporation nor the names of its
- *    contributors may be used to endorse or promote products derived
- *    from this software without specific prior written permission.
- *
- * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
- * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
- * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
- * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
- * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
- * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
- * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
- * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
- * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
- * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
- * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
- *
  */
 
 #include <linux/module.h>
@@ -131,6 +89,13 @@ static int rvt_query_device(struct ib_device *ibdev,
 	return 0;
 }
 
+static int rvt_get_numa_node(struct ib_device *ibdev)
+{
+	struct rvt_dev_info *rdi = ib_to_rvt(ibdev);
+
+	return rdi->dparms.node;
+}
+
 static int rvt_modify_device(struct ib_device *device,
 			     int device_modify_mask,
 			     struct ib_device_modify *device_modify)
@@ -380,6 +345,7 @@ static const struct ib_device_ops rvt_dev_ops = {
 	.destroy_srq = rvt_destroy_srq,
 	.detach_mcast = rvt_detach_mcast,
 	.get_dma_mr = rvt_get_dma_mr,
+	.get_numa_node = rvt_get_numa_node,
 	.get_port_immutable = rvt_get_port_immutable,
 	.map_mr_sg = rvt_map_mr_sg,
 	.mmap = rvt_mmap,
@@ -406,6 +372,7 @@ static const struct ib_device_ops rvt_dev_ops = {
 	INIT_RDMA_OBJ_SIZE(ib_ah, rvt_ah, ibah),
 	INIT_RDMA_OBJ_SIZE(ib_cq, rvt_cq, ibcq),
 	INIT_RDMA_OBJ_SIZE(ib_pd, rvt_pd, ibpd),
+	INIT_RDMA_OBJ_SIZE(ib_qp, rvt_qp, ibqp),
 	INIT_RDMA_OBJ_SIZE(ib_srq, rvt_srq, ibsrq),
 	INIT_RDMA_OBJ_SIZE(ib_ucontext, rvt_ucontext, ibucontext),
 };
diff --git a/drivers/infiniband/sw/rdmavt/vt.h b/drivers/infiniband/sw/rdmavt/vt.h
index c0fed6510f0b..461574e3f6a5 100644
--- a/drivers/infiniband/sw/rdmavt/vt.h
+++ b/drivers/infiniband/sw/rdmavt/vt.h
@@ -1,53 +1,11 @@
-#ifndef DEF_RDMAVT_H
-#define DEF_RDMAVT_H
-
+/* SPDX-License-Identifier: GPL-2.0 or BSD-3-Clause */
 /*
  * Copyright(c) 2016 Intel Corporation.
- *
- * This file is provided under a dual BSD/GPLv2 license.  When using or
- * redistributing this file, you may do so under either license.
- *
- * GPL LICENSE SUMMARY
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of version 2 of the GNU General Public License as
- * published by the Free Software Foundation.
- *
- * This program is distributed in the hope that it will be useful, but
- * WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
- * General Public License for more details.
- *
- * BSD LICENSE
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions
- * are met:
- *
- *  - Redistributions of source code must retain the above copyright
- *    notice, this list of conditions and the following disclaimer.
- *  - Redistributions in binary form must reproduce the above copyright
- *    notice, this list of conditions and the following disclaimer in
- *    the documentation and/or other materials provided with the
- *    distribution.
- *  - Neither the name of Intel Corporation nor the names of its
- *    contributors may be used to endorse or promote products derived
- *    from this software without specific prior written permission.
- *
- * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
- * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
- * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
- * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
- * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
- * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
- * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
- * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
- * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
- * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
- * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
- *
  */
 
+#ifndef DEF_RDMAVT_H
+#define DEF_RDMAVT_H
+
 #include <rdma/rdma_vt.h>
 #include <linux/pci.h>
 #include "pd.h"
diff --git a/drivers/infiniband/sw/rxe/rxe.h b/drivers/infiniband/sw/rxe/rxe.h
index 623fd17df02d..1bb3fb618bf5 100644
--- a/drivers/infiniband/sw/rxe/rxe.h
+++ b/drivers/infiniband/sw/rxe/rxe.h
@@ -14,7 +14,6 @@
 
 #include <linux/module.h>
 #include <linux/skbuff.h>
-#include <linux/crc32.h>
 
 #include <rdma/ib_verbs.h>
 #include <rdma/ib_user_verbs.h>
@@ -42,27 +41,6 @@
 
 extern bool rxe_initialized;
 
-static inline u32 rxe_crc32(struct rxe_dev *rxe,
-			    u32 crc, void *next, size_t len)
-{
-	u32 retval;
-	int err;
-
-	SHASH_DESC_ON_STACK(shash, rxe->tfm);
-
-	shash->tfm = rxe->tfm;
-	*(u32 *)shash_desc_ctx(shash) = crc;
-	err = crypto_shash_update(shash, next, len);
-	if (unlikely(err)) {
-		pr_warn_ratelimited("failed crc calculation, err: %d\n", err);
-		return crc32_le(crc, next, len);
-	}
-
-	retval = *(u32 *)shash_desc_ctx(shash);
-	barrier_data(shash_desc_ctx(shash));
-	return retval;
-}
-
 void rxe_set_mtu(struct rxe_dev *rxe, unsigned int dev_mtu);
 
 int rxe_add(struct rxe_dev *rxe, unsigned int mtu, const char *ibdev_name);
diff --git a/drivers/infiniband/sw/rxe/rxe_comp.c b/drivers/infiniband/sw/rxe/rxe_comp.c
index 58ad9c2644f3..d2d802c776fd 100644
--- a/drivers/infiniband/sw/rxe/rxe_comp.c
+++ b/drivers/infiniband/sw/rxe/rxe_comp.c
@@ -349,7 +349,7 @@ static inline enum comp_state do_read(struct rxe_qp *qp,
 
 	ret = copy_data(qp->pd, IB_ACCESS_LOCAL_WRITE,
 			&wqe->dma, payload_addr(pkt),
-			payload_size(pkt), RXE_TO_MR_OBJ, NULL);
+			payload_size(pkt), RXE_TO_MR_OBJ);
 	if (ret) {
 		wqe->status = IB_WC_LOC_PROT_ERR;
 		return COMPST_ERROR;
@@ -371,7 +371,7 @@ static inline enum comp_state do_atomic(struct rxe_qp *qp,
 
 	ret = copy_data(qp->pd, IB_ACCESS_LOCAL_WRITE,
 			&wqe->dma, &atomic_orig,
-			sizeof(u64), RXE_TO_MR_OBJ, NULL);
+			sizeof(u64), RXE_TO_MR_OBJ);
 	if (ret) {
 		wqe->status = IB_WC_LOC_PROT_ERR;
 		return COMPST_ERROR;
diff --git a/drivers/infiniband/sw/rxe/rxe_icrc.c b/drivers/infiniband/sw/rxe/rxe_icrc.c
index 66b2aad54bb7..e03af3012590 100644
--- a/drivers/infiniband/sw/rxe/rxe_icrc.c
+++ b/drivers/infiniband/sw/rxe/rxe_icrc.c
@@ -4,18 +4,79 @@
  * Copyright (c) 2015 System Fabric Works, Inc. All rights reserved.
  */
 
+#include <linux/crc32.h>
+
 #include "rxe.h"
 #include "rxe_loc.h"
 
-/* Compute a partial ICRC for all the IB transport headers. */
-u32 rxe_icrc_hdr(struct rxe_pkt_info *pkt, struct sk_buff *skb)
+/**
+ * rxe_icrc_init() - Initialize crypto function for computing crc32
+ * @rxe: rdma_rxe device object
+ *
+ * Return: 0 on success else an error
+ */
+int rxe_icrc_init(struct rxe_dev *rxe)
+{
+	struct crypto_shash *tfm;
+
+	tfm = crypto_alloc_shash("crc32", 0, 0);
+	if (IS_ERR(tfm)) {
+		pr_warn("failed to init crc32 algorithm err:%ld\n",
+			       PTR_ERR(tfm));
+		return PTR_ERR(tfm);
+	}
+
+	rxe->tfm = tfm;
+
+	return 0;
+}
+
+/**
+ * rxe_crc32() - Compute cumulative crc32 for a contiguous segment
+ * @rxe: rdma_rxe device object
+ * @crc: starting crc32 value from previous segments
+ * @next: starting address of current segment
+ * @len: length of current segment
+ *
+ * Return: the cumulative crc32 checksum
+ */
+static __be32 rxe_crc32(struct rxe_dev *rxe, __be32 crc, void *next, size_t len)
+{
+	__be32 icrc;
+	int err;
+
+	SHASH_DESC_ON_STACK(shash, rxe->tfm);
+
+	shash->tfm = rxe->tfm;
+	*(__be32 *)shash_desc_ctx(shash) = crc;
+	err = crypto_shash_update(shash, next, len);
+	if (unlikely(err)) {
+		pr_warn_ratelimited("failed crc calculation, err: %d\n", err);
+		return (__force __be32)crc32_le((__force u32)crc, next, len);
+	}
+
+	icrc = *(__be32 *)shash_desc_ctx(shash);
+	barrier_data(shash_desc_ctx(shash));
+
+	return icrc;
+}
+
+/**
+ * rxe_icrc_hdr() - Compute the partial ICRC for the network and transport
+ *		  headers of a packet.
+ * @skb: packet buffer
+ * @pkt: packet information
+ *
+ * Return: the partial ICRC
+ */
+static __be32 rxe_icrc_hdr(struct sk_buff *skb, struct rxe_pkt_info *pkt)
 {
 	unsigned int bth_offset = 0;
 	struct iphdr *ip4h = NULL;
 	struct ipv6hdr *ip6h = NULL;
 	struct udphdr *udph;
 	struct rxe_bth *bth;
-	int crc;
+	__be32 crc;
 	int length;
 	int hdr_size = sizeof(struct udphdr) +
 		(skb->protocol == htons(ETH_P_IP) ?
@@ -30,7 +91,7 @@ u32 rxe_icrc_hdr(struct rxe_pkt_info *pkt, struct sk_buff *skb)
 	/* This seed is the result of computing a CRC with a seed of
 	 * 0xfffffff and 8 bytes of 0xff representing a masked LRH.
 	 */
-	crc = 0xdebb20e3;
+	crc = (__force __be32)0xdebb20e3;
 
 	if (skb->protocol == htons(ETH_P_IP)) { /* IPv4 */
 		memcpy(pshdr, ip_hdr(skb), hdr_size);
@@ -67,3 +128,58 @@ u32 rxe_icrc_hdr(struct rxe_pkt_info *pkt, struct sk_buff *skb)
 			rxe_opcode[pkt->opcode].length - RXE_BTH_BYTES);
 	return crc;
 }
+
+/**
+ * rxe_icrc_check() - Compute ICRC for a packet and compare to the ICRC
+ *		      delivered in the packet.
+ * @skb: packet buffer
+ * @pkt: packet information
+ *
+ * Return: 0 if the values match else an error
+ */
+int rxe_icrc_check(struct sk_buff *skb, struct rxe_pkt_info *pkt)
+{
+	__be32 *icrcp;
+	__be32 pkt_icrc;
+	__be32 icrc;
+
+	icrcp = (__be32 *)(pkt->hdr + pkt->paylen - RXE_ICRC_SIZE);
+	pkt_icrc = *icrcp;
+
+	icrc = rxe_icrc_hdr(skb, pkt);
+	icrc = rxe_crc32(pkt->rxe, icrc, (u8 *)payload_addr(pkt),
+				payload_size(pkt) + bth_pad(pkt));
+	icrc = ~icrc;
+
+	if (unlikely(icrc != pkt_icrc)) {
+		if (skb->protocol == htons(ETH_P_IPV6))
+			pr_warn_ratelimited("bad ICRC from %pI6c\n",
+					    &ipv6_hdr(skb)->saddr);
+		else if (skb->protocol == htons(ETH_P_IP))
+			pr_warn_ratelimited("bad ICRC from %pI4\n",
+					    &ip_hdr(skb)->saddr);
+		else
+			pr_warn_ratelimited("bad ICRC from unknown\n");
+
+		return -EINVAL;
+	}
+
+	return 0;
+}
+
+/**
+ * rxe_icrc_generate() - compute ICRC for a packet.
+ * @skb: packet buffer
+ * @pkt: packet information
+ */
+void rxe_icrc_generate(struct sk_buff *skb, struct rxe_pkt_info *pkt)
+{
+	__be32 *icrcp;
+	__be32 icrc;
+
+	icrcp = (__be32 *)(pkt->hdr + pkt->paylen - RXE_ICRC_SIZE);
+	icrc = rxe_icrc_hdr(skb, pkt);
+	icrc = rxe_crc32(pkt->rxe, icrc, (u8 *)payload_addr(pkt),
+				payload_size(pkt) + bth_pad(pkt));
+	*icrcp = ~icrc;
+}
diff --git a/drivers/infiniband/sw/rxe/rxe_loc.h b/drivers/infiniband/sw/rxe/rxe_loc.h
index 1ddb20855dee..f0c954575bde 100644
--- a/drivers/infiniband/sw/rxe/rxe_loc.h
+++ b/drivers/infiniband/sw/rxe/rxe_loc.h
@@ -77,10 +77,9 @@ int rxe_mr_init_user(struct rxe_pd *pd, u64 start, u64 length, u64 iova,
 		     int access, struct rxe_mr *mr);
 int rxe_mr_init_fast(struct rxe_pd *pd, int max_pages, struct rxe_mr *mr);
 int rxe_mr_copy(struct rxe_mr *mr, u64 iova, void *addr, int length,
-		enum rxe_mr_copy_dir dir, u32 *crcp);
-int copy_data(struct rxe_pd *pd, int access,
-	      struct rxe_dma_info *dma, void *addr, int length,
-	      enum rxe_mr_copy_dir dir, u32 *crcp);
+		enum rxe_mr_copy_dir dir);
+int copy_data(struct rxe_pd *pd, int access, struct rxe_dma_info *dma,
+	      void *addr, int length, enum rxe_mr_copy_dir dir);
 void *iova_to_vaddr(struct rxe_mr *mr, u64 iova, int length);
 struct rxe_mr *lookup_mr(struct rxe_pd *pd, int access, u32 key,
 			 enum rxe_mr_lookup_type type);
@@ -99,11 +98,11 @@ struct rxe_mw *rxe_lookup_mw(struct rxe_qp *qp, int access, u32 rkey);
 void rxe_mw_cleanup(struct rxe_pool_entry *arg);
 
 /* rxe_net.c */
-void rxe_loopback(struct sk_buff *skb);
-int rxe_send(struct rxe_pkt_info *pkt, struct sk_buff *skb);
 struct sk_buff *rxe_init_packet(struct rxe_dev *rxe, struct rxe_av *av,
 				int paylen, struct rxe_pkt_info *pkt);
-int rxe_prepare(struct rxe_pkt_info *pkt, struct sk_buff *skb, u32 *crc);
+int rxe_prepare(struct rxe_pkt_info *pkt, struct sk_buff *skb);
+int rxe_xmit_packet(struct rxe_qp *qp, struct rxe_pkt_info *pkt,
+		    struct sk_buff *skb);
 const char *rxe_parent_name(struct rxe_dev *rxe, unsigned int port_num);
 int rxe_mcast_add(struct rxe_dev *rxe, union ib_gid *mgid);
 int rxe_mcast_delete(struct rxe_dev *rxe, union ib_gid *mgid);
@@ -193,7 +192,10 @@ int rxe_completer(void *arg);
 int rxe_requester(void *arg);
 int rxe_responder(void *arg);
 
-u32 rxe_icrc_hdr(struct rxe_pkt_info *pkt, struct sk_buff *skb);
+/* rxe_icrc.c */
+int rxe_icrc_init(struct rxe_dev *rxe);
+int rxe_icrc_check(struct sk_buff *skb, struct rxe_pkt_info *pkt);
+void rxe_icrc_generate(struct sk_buff *skb, struct rxe_pkt_info *pkt);
 
 void rxe_resp_queue_pkt(struct rxe_qp *qp, struct sk_buff *skb);
 
@@ -204,47 +206,4 @@ static inline unsigned int wr_opcode_mask(int opcode, struct rxe_qp *qp)
 	return rxe_wr_opcode_info[opcode].mask[qp->ibqp.qp_type];
 }
 
-static inline int rxe_xmit_packet(struct rxe_qp *qp, struct rxe_pkt_info *pkt,
-				  struct sk_buff *skb)
-{
-	int err;
-	int is_request = pkt->mask & RXE_REQ_MASK;
-	struct rxe_dev *rxe = to_rdev(qp->ibqp.device);
-
-	if ((is_request && (qp->req.state != QP_STATE_READY)) ||
-	    (!is_request && (qp->resp.state != QP_STATE_READY))) {
-		pr_info("Packet dropped. QP is not in ready state\n");
-		goto drop;
-	}
-
-	if (pkt->mask & RXE_LOOPBACK_MASK) {
-		memcpy(SKB_TO_PKT(skb), pkt, sizeof(*pkt));
-		rxe_loopback(skb);
-		err = 0;
-	} else {
-		err = rxe_send(pkt, skb);
-	}
-
-	if (err) {
-		rxe->xmit_errors++;
-		rxe_counter_inc(rxe, RXE_CNT_SEND_ERR);
-		return err;
-	}
-
-	if ((qp_type(qp) != IB_QPT_RC) &&
-	    (pkt->mask & RXE_END_MASK)) {
-		pkt->wqe->state = wqe_state_done;
-		rxe_run_task(&qp->comp.task, 1);
-	}
-
-	rxe_counter_inc(rxe, RXE_CNT_SENT_PKTS);
-	goto done;
-
-drop:
-	kfree_skb(skb);
-	err = 0;
-done:
-	return err;
-}
-
 #endif /* RXE_LOC_H */
diff --git a/drivers/infiniband/sw/rxe/rxe_mr.c b/drivers/infiniband/sw/rxe/rxe_mr.c
index be4bcb420fab..5890a8246216 100644
--- a/drivers/infiniband/sw/rxe/rxe_mr.c
+++ b/drivers/infiniband/sw/rxe/rxe_mr.c
@@ -123,7 +123,6 @@ int rxe_mr_init_user(struct rxe_pd *pd, u64 start, u64 length, u64 iova,
 		goto err_out;
 	}
 
-	mr->umem = umem;
 	num_buf = ib_umem_num_pages(umem);
 
 	rxe_mr_init(access, mr);
@@ -143,7 +142,7 @@ int rxe_mr_init_user(struct rxe_pd *pd, u64 start, u64 length, u64 iova,
 	if (length > 0) {
 		buf = map[0]->buf;
 
-		for_each_sg_page(umem->sg_head.sgl, &sg_iter, umem->nmap, 0) {
+		for_each_sgtable_page (&umem->sgt_append.sgt, &sg_iter, 0) {
 			if (num_buf >= RXE_BUF_PER_MAP) {
 				map++;
 				buf = map[0]->buf;
@@ -286,11 +285,10 @@ out:
 }
 
 /* copy data from a range (vaddr, vaddr+length-1) to or from
- * a mr object starting at iova. Compute incremental value of
- * crc32 if crcp is not zero. caller must hold a reference to mr
+ * a mr object starting at iova.
  */
 int rxe_mr_copy(struct rxe_mr *mr, u64 iova, void *addr, int length,
-		enum rxe_mr_copy_dir dir, u32 *crcp)
+		enum rxe_mr_copy_dir dir)
 {
 	int			err;
 	int			bytes;
@@ -300,7 +298,6 @@ int rxe_mr_copy(struct rxe_mr *mr, u64 iova, void *addr, int length,
 	int			m;
 	int			i;
 	size_t			offset;
-	u32			crc = crcp ? (*crcp) : 0;
 
 	if (length == 0)
 		return 0;
@@ -314,10 +311,6 @@ int rxe_mr_copy(struct rxe_mr *mr, u64 iova, void *addr, int length,
 
 		memcpy(dest, src, length);
 
-		if (crcp)
-			*crcp = rxe_crc32(to_rdev(mr->ibmr.device), *crcp, dest,
-					  length);
-
 		return 0;
 	}
 
@@ -348,10 +341,6 @@ int rxe_mr_copy(struct rxe_mr *mr, u64 iova, void *addr, int length,
 
 		memcpy(dest, src, bytes);
 
-		if (crcp)
-			crc = rxe_crc32(to_rdev(mr->ibmr.device), crc, dest,
-					bytes);
-
 		length	-= bytes;
 		addr	+= bytes;
 
@@ -366,9 +355,6 @@ int rxe_mr_copy(struct rxe_mr *mr, u64 iova, void *addr, int length,
 		}
 	}
 
-	if (crcp)
-		*crcp = crc;
-
 	return 0;
 
 err1:
@@ -384,8 +370,7 @@ int copy_data(
 	struct rxe_dma_info	*dma,
 	void			*addr,
 	int			length,
-	enum rxe_mr_copy_dir	dir,
-	u32			*crcp)
+	enum rxe_mr_copy_dir	dir)
 {
 	int			bytes;
 	struct rxe_sge		*sge	= &dma->sge[dma->cur_sge];
@@ -446,7 +431,7 @@ int copy_data(
 		if (bytes > 0) {
 			iova = sge->addr + offset;
 
-			err = rxe_mr_copy(mr, iova, addr, bytes, dir, crcp);
+			err = rxe_mr_copy(mr, iova, addr, bytes, dir);
 			if (err)
 				goto err2;
 
diff --git a/drivers/infiniband/sw/rxe/rxe_net.c b/drivers/infiniband/sw/rxe/rxe_net.c
index 5ac27f28ace1..2cb810cb890a 100644
--- a/drivers/infiniband/sw/rxe/rxe_net.c
+++ b/drivers/infiniband/sw/rxe/rxe_net.c
@@ -344,7 +344,7 @@ static int prepare6(struct rxe_pkt_info *pkt, struct sk_buff *skb)
 	return 0;
 }
 
-int rxe_prepare(struct rxe_pkt_info *pkt, struct sk_buff *skb, u32 *crc)
+int rxe_prepare(struct rxe_pkt_info *pkt, struct sk_buff *skb)
 {
 	int err = 0;
 
@@ -353,8 +353,6 @@ int rxe_prepare(struct rxe_pkt_info *pkt, struct sk_buff *skb, u32 *crc)
 	else if (skb->protocol == htons(ETH_P_IPV6))
 		err = prepare6(pkt, skb);
 
-	*crc = rxe_icrc_hdr(pkt, skb);
-
 	if (ether_addr_equal(skb->dev->dev_addr, rxe_get_av(pkt)->dmac))
 		pkt->mask |= RXE_LOOPBACK_MASK;
 
@@ -374,7 +372,7 @@ static void rxe_skb_tx_dtor(struct sk_buff *skb)
 	rxe_drop_ref(qp);
 }
 
-int rxe_send(struct rxe_pkt_info *pkt, struct sk_buff *skb)
+static int rxe_send(struct sk_buff *skb, struct rxe_pkt_info *pkt)
 {
 	int err;
 
@@ -407,19 +405,64 @@ int rxe_send(struct rxe_pkt_info *pkt, struct sk_buff *skb)
 /* fix up a send packet to match the packets
  * received from UDP before looping them back
  */
-void rxe_loopback(struct sk_buff *skb)
+static int rxe_loopback(struct sk_buff *skb, struct rxe_pkt_info *pkt)
 {
-	struct rxe_pkt_info *pkt = SKB_TO_PKT(skb);
+	memcpy(SKB_TO_PKT(skb), pkt, sizeof(*pkt));
 
 	if (skb->protocol == htons(ETH_P_IP))
 		skb_pull(skb, sizeof(struct iphdr));
 	else
 		skb_pull(skb, sizeof(struct ipv6hdr));
 
-	if (WARN_ON(!ib_device_try_get(&pkt->rxe->ib_dev)))
+	if (WARN_ON(!ib_device_try_get(&pkt->rxe->ib_dev))) {
 		kfree_skb(skb);
+		return -EIO;
+	}
+
+	rxe_rcv(skb);
+
+	return 0;
+}
+
+int rxe_xmit_packet(struct rxe_qp *qp, struct rxe_pkt_info *pkt,
+		    struct sk_buff *skb)
+{
+	int err;
+	int is_request = pkt->mask & RXE_REQ_MASK;
+	struct rxe_dev *rxe = to_rdev(qp->ibqp.device);
+
+	if ((is_request && (qp->req.state != QP_STATE_READY)) ||
+	    (!is_request && (qp->resp.state != QP_STATE_READY))) {
+		pr_info("Packet dropped. QP is not in ready state\n");
+		goto drop;
+	}
+
+	rxe_icrc_generate(skb, pkt);
+
+	if (pkt->mask & RXE_LOOPBACK_MASK)
+		err = rxe_loopback(skb, pkt);
 	else
-		rxe_rcv(skb);
+		err = rxe_send(skb, pkt);
+	if (err) {
+		rxe->xmit_errors++;
+		rxe_counter_inc(rxe, RXE_CNT_SEND_ERR);
+		return err;
+	}
+
+	if ((qp_type(qp) != IB_QPT_RC) &&
+	    (pkt->mask & RXE_END_MASK)) {
+		pkt->wqe->state = wqe_state_done;
+		rxe_run_task(&qp->comp.task, 1);
+	}
+
+	rxe_counter_inc(rxe, RXE_CNT_SENT_PKTS);
+	goto done;
+
+drop:
+	kfree_skb(skb);
+	err = 0;
+done:
+	return err;
 }
 
 struct sk_buff *rxe_init_packet(struct rxe_dev *rxe, struct rxe_av *av,
diff --git a/drivers/infiniband/sw/rxe/rxe_pool.c b/drivers/infiniband/sw/rxe/rxe_pool.c
index 0b8e7c6255a2..ffa8420b4765 100644
--- a/drivers/infiniband/sw/rxe/rxe_pool.c
+++ b/drivers/infiniband/sw/rxe/rxe_pool.c
@@ -41,7 +41,7 @@ struct rxe_type_info rxe_type_info[RXE_NUM_TYPES] = {
 		.size		= sizeof(struct rxe_qp),
 		.elem_offset	= offsetof(struct rxe_qp, pelem),
 		.cleanup	= rxe_qp_cleanup,
-		.flags		= RXE_POOL_INDEX,
+		.flags		= RXE_POOL_INDEX | RXE_POOL_NO_ALLOC,
 		.min_index	= RXE_MIN_QP_INDEX,
 		.max_index	= RXE_MAX_QP_INDEX,
 	},
diff --git a/drivers/infiniband/sw/rxe/rxe_recv.c b/drivers/infiniband/sw/rxe/rxe_recv.c
index 7a49e27da23a..6a6cc1fa90e4 100644
--- a/drivers/infiniband/sw/rxe/rxe_recv.c
+++ b/drivers/infiniband/sw/rxe/rxe_recv.c
@@ -361,8 +361,6 @@ void rxe_rcv(struct sk_buff *skb)
 	int err;
 	struct rxe_pkt_info *pkt = SKB_TO_PKT(skb);
 	struct rxe_dev *rxe = pkt->rxe;
-	__be32 *icrcp;
-	u32 calc_icrc, pack_icrc;
 
 	if (unlikely(skb->len < RXE_BTH_BYTES))
 		goto drop;
@@ -384,26 +382,9 @@ void rxe_rcv(struct sk_buff *skb)
 	if (unlikely(err))
 		goto drop;
 
-	/* Verify ICRC */
-	icrcp = (__be32 *)(pkt->hdr + pkt->paylen - RXE_ICRC_SIZE);
-	pack_icrc = be32_to_cpu(*icrcp);
-
-	calc_icrc = rxe_icrc_hdr(pkt, skb);
-	calc_icrc = rxe_crc32(rxe, calc_icrc, (u8 *)payload_addr(pkt),
-			      payload_size(pkt) + bth_pad(pkt));
-	calc_icrc = (__force u32)cpu_to_be32(~calc_icrc);
-	if (unlikely(calc_icrc != pack_icrc)) {
-		if (skb->protocol == htons(ETH_P_IPV6))
-			pr_warn_ratelimited("bad ICRC from %pI6c\n",
-					    &ipv6_hdr(skb)->saddr);
-		else if (skb->protocol == htons(ETH_P_IP))
-			pr_warn_ratelimited("bad ICRC from %pI4\n",
-					    &ip_hdr(skb)->saddr);
-		else
-			pr_warn_ratelimited("bad ICRC from unknown\n");
-
+	err = rxe_icrc_check(skb, pkt);
+	if (unlikely(err))
 		goto drop;
-	}
 
 	rxe_counter_inc(rxe, RXE_CNT_RCVD_PKTS);
 
diff --git a/drivers/infiniband/sw/rxe/rxe_req.c b/drivers/infiniband/sw/rxe/rxe_req.c
index c57699cc6578..3894197a82f6 100644
--- a/drivers/infiniband/sw/rxe/rxe_req.c
+++ b/drivers/infiniband/sw/rxe/rxe_req.c
@@ -466,12 +466,9 @@ static int finish_packet(struct rxe_qp *qp, struct rxe_send_wqe *wqe,
 		       struct rxe_pkt_info *pkt, struct sk_buff *skb,
 		       int paylen)
 {
-	struct rxe_dev *rxe = to_rdev(qp->ibqp.device);
-	u32 crc = 0;
-	u32 *p;
 	int err;
 
-	err = rxe_prepare(pkt, skb, &crc);
+	err = rxe_prepare(pkt, skb);
 	if (err)
 		return err;
 
@@ -479,7 +476,6 @@ static int finish_packet(struct rxe_qp *qp, struct rxe_send_wqe *wqe,
 		if (wqe->wr.send_flags & IB_SEND_INLINE) {
 			u8 *tmp = &wqe->dma.inline_data[wqe->dma.sge_offset];
 
-			crc = rxe_crc32(rxe, crc, tmp, paylen);
 			memcpy(payload_addr(pkt), tmp, paylen);
 
 			wqe->dma.resid -= paylen;
@@ -487,8 +483,7 @@ static int finish_packet(struct rxe_qp *qp, struct rxe_send_wqe *wqe,
 		} else {
 			err = copy_data(qp->pd, 0, &wqe->dma,
 					payload_addr(pkt), paylen,
-					RXE_FROM_MR_OBJ,
-					&crc);
+					RXE_FROM_MR_OBJ);
 			if (err)
 				return err;
 		}
@@ -496,12 +491,8 @@ static int finish_packet(struct rxe_qp *qp, struct rxe_send_wqe *wqe,
 			u8 *pad = payload_addr(pkt) + paylen;
 
 			memset(pad, 0, bth_pad(pkt));
-			crc = rxe_crc32(rxe, crc, pad, bth_pad(pkt));
 		}
 	}
-	p = payload_addr(pkt) + paylen + bth_pad(pkt);
-
-	*p = ~crc;
 
 	return 0;
 }
diff --git a/drivers/infiniband/sw/rxe/rxe_resp.c b/drivers/infiniband/sw/rxe/rxe_resp.c
index 360ec67cb9e1..5501227ddc65 100644
--- a/drivers/infiniband/sw/rxe/rxe_resp.c
+++ b/drivers/infiniband/sw/rxe/rxe_resp.c
@@ -536,7 +536,7 @@ static enum resp_states send_data_in(struct rxe_qp *qp, void *data_addr,
 	int err;
 
 	err = copy_data(qp->pd, IB_ACCESS_LOCAL_WRITE, &qp->resp.wqe->dma,
-			data_addr, data_len, RXE_TO_MR_OBJ, NULL);
+			data_addr, data_len, RXE_TO_MR_OBJ);
 	if (unlikely(err))
 		return (err == -ENOSPC) ? RESPST_ERR_LENGTH
 					: RESPST_ERR_MALFORMED_WQE;
@@ -552,7 +552,7 @@ static enum resp_states write_data_in(struct rxe_qp *qp,
 	int data_len = payload_size(pkt);
 
 	err = rxe_mr_copy(qp->resp.mr, qp->resp.va + qp->resp.offset,
-			  payload_addr(pkt), data_len, RXE_TO_MR_OBJ, NULL);
+			  payload_addr(pkt), data_len, RXE_TO_MR_OBJ);
 	if (err) {
 		rc = RESPST_ERR_RKEY_VIOLATION;
 		goto out;
@@ -613,13 +613,10 @@ static struct sk_buff *prepare_ack_packet(struct rxe_qp *qp,
 					  int opcode,
 					  int payload,
 					  u32 psn,
-					  u8 syndrome,
-					  u32 *crcp)
+					  u8 syndrome)
 {
 	struct rxe_dev *rxe = to_rdev(qp->ibqp.device);
 	struct sk_buff *skb;
-	u32 crc = 0;
-	u32 *p;
 	int paylen;
 	int pad;
 	int err;
@@ -651,20 +648,12 @@ static struct sk_buff *prepare_ack_packet(struct rxe_qp *qp,
 	if (ack->mask & RXE_ATMACK_MASK)
 		atmack_set_orig(ack, qp->resp.atomic_orig);
 
-	err = rxe_prepare(ack, skb, &crc);
+	err = rxe_prepare(ack, skb);
 	if (err) {
 		kfree_skb(skb);
 		return NULL;
 	}
 
-	if (crcp) {
-		/* CRC computation will be continued by the caller */
-		*crcp = crc;
-	} else {
-		p = payload_addr(ack) + payload + bth_pad(ack);
-		*p = ~crc;
-	}
-
 	return skb;
 }
 
@@ -682,8 +671,6 @@ static enum resp_states read_reply(struct rxe_qp *qp,
 	int opcode;
 	int err;
 	struct resp_res *res = qp->resp.res;
-	u32 icrc;
-	u32 *p;
 
 	if (!res) {
 		/* This is the first time we process that request. Get a
@@ -742,24 +729,20 @@ static enum resp_states read_reply(struct rxe_qp *qp,
 	payload = min_t(int, res->read.resid, mtu);
 
 	skb = prepare_ack_packet(qp, req_pkt, &ack_pkt, opcode, payload,
-				 res->cur_psn, AETH_ACK_UNLIMITED, &icrc);
+				 res->cur_psn, AETH_ACK_UNLIMITED);
 	if (!skb)
 		return RESPST_ERR_RNR;
 
 	err = rxe_mr_copy(res->read.mr, res->read.va, payload_addr(&ack_pkt),
-			  payload, RXE_FROM_MR_OBJ, &icrc);
+			  payload, RXE_FROM_MR_OBJ);
 	if (err)
 		pr_err("Failed copying memory\n");
 
 	if (bth_pad(&ack_pkt)) {
-		struct rxe_dev *rxe = to_rdev(qp->ibqp.device);
 		u8 *pad = payload_addr(&ack_pkt) + payload;
 
 		memset(pad, 0, bth_pad(&ack_pkt));
-		icrc = rxe_crc32(rxe, icrc, pad, bth_pad(&ack_pkt));
 	}
-	p = payload_addr(&ack_pkt) + payload + bth_pad(&ack_pkt);
-	*p = ~icrc;
 
 	err = rxe_xmit_packet(qp, &ack_pkt, skb);
 	if (err) {
@@ -984,7 +967,7 @@ static int send_ack(struct rxe_qp *qp, struct rxe_pkt_info *pkt,
 	struct sk_buff *skb;
 
 	skb = prepare_ack_packet(qp, pkt, &ack_pkt, IB_OPCODE_RC_ACKNOWLEDGE,
-				 0, psn, syndrome, NULL);
+				 0, psn, syndrome);
 	if (!skb) {
 		err = -ENOMEM;
 		goto err1;
@@ -1008,7 +991,7 @@ static int send_atomic_ack(struct rxe_qp *qp, struct rxe_pkt_info *pkt,
 
 	skb = prepare_ack_packet(qp, pkt, &ack_pkt,
 				 IB_OPCODE_RC_ATOMIC_ACKNOWLEDGE, 0, pkt->psn,
-				 syndrome, NULL);
+				 syndrome);
 	if (!skb) {
 		rc = -ENOMEM;
 		goto out;
diff --git a/drivers/infiniband/sw/rxe/rxe_verbs.c b/drivers/infiniband/sw/rxe/rxe_verbs.c
index c223959ac174..267b5a9c345d 100644
--- a/drivers/infiniband/sw/rxe/rxe_verbs.c
+++ b/drivers/infiniband/sw/rxe/rxe_verbs.c
@@ -391,59 +391,52 @@ static int rxe_post_srq_recv(struct ib_srq *ibsrq, const struct ib_recv_wr *wr,
 	return err;
 }
 
-static struct ib_qp *rxe_create_qp(struct ib_pd *ibpd,
-				   struct ib_qp_init_attr *init,
-				   struct ib_udata *udata)
+static int rxe_create_qp(struct ib_qp *ibqp, struct ib_qp_init_attr *init,
+			 struct ib_udata *udata)
 {
 	int err;
-	struct rxe_dev *rxe = to_rdev(ibpd->device);
-	struct rxe_pd *pd = to_rpd(ibpd);
-	struct rxe_qp *qp;
+	struct rxe_dev *rxe = to_rdev(ibqp->device);
+	struct rxe_pd *pd = to_rpd(ibqp->pd);
+	struct rxe_qp *qp = to_rqp(ibqp);
 	struct rxe_create_qp_resp __user *uresp = NULL;
 
 	if (udata) {
 		if (udata->outlen < sizeof(*uresp))
-			return ERR_PTR(-EINVAL);
+			return -EINVAL;
 		uresp = udata->outbuf;
 	}
 
 	if (init->create_flags)
-		return ERR_PTR(-EOPNOTSUPP);
+		return -EOPNOTSUPP;
 
 	err = rxe_qp_chk_init(rxe, init);
 	if (err)
-		goto err1;
-
-	qp = rxe_alloc(&rxe->qp_pool);
-	if (!qp) {
-		err = -ENOMEM;
-		goto err1;
-	}
+		return err;
 
 	if (udata) {
-		if (udata->inlen) {
-			err = -EINVAL;
-			goto err2;
-		}
+		if (udata->inlen)
+			return -EINVAL;
+
 		qp->is_user = true;
 	} else {
 		qp->is_user = false;
 	}
 
-	rxe_add_index(qp);
+	err = rxe_add_to_pool(&rxe->qp_pool, qp);
+	if (err)
+		return err;
 
-	err = rxe_qp_from_init(rxe, qp, pd, init, uresp, ibpd, udata);
+	rxe_add_index(qp);
+	err = rxe_qp_from_init(rxe, qp, pd, init, uresp, ibqp->pd, udata);
 	if (err)
-		goto err3;
+		goto qp_init;
 
-	return &qp->ibqp;
+	return 0;
 
-err3:
+qp_init:
 	rxe_drop_index(qp);
-err2:
 	rxe_drop_ref(qp);
-err1:
-	return ERR_PTR(err);
+	return err;
 }
 
 static int rxe_modify_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr,
@@ -1145,6 +1138,7 @@ static const struct ib_device_ops rxe_dev_ops = {
 	INIT_RDMA_OBJ_SIZE(ib_ah, rxe_ah, ibah),
 	INIT_RDMA_OBJ_SIZE(ib_cq, rxe_cq, ibcq),
 	INIT_RDMA_OBJ_SIZE(ib_pd, rxe_pd, ibpd),
+	INIT_RDMA_OBJ_SIZE(ib_qp, rxe_qp, ibqp),
 	INIT_RDMA_OBJ_SIZE(ib_srq, rxe_srq, ibsrq),
 	INIT_RDMA_OBJ_SIZE(ib_ucontext, rxe_ucontext, ibuc),
 	INIT_RDMA_OBJ_SIZE(ib_mw, rxe_mw, ibmw),
@@ -1154,7 +1148,6 @@ int rxe_register_device(struct rxe_dev *rxe, const char *ibdev_name)
 {
 	int err;
 	struct ib_device *dev = &rxe->ib_dev;
-	struct crypto_shash *tfm;
 
 	strscpy(dev->node_desc, "rxe", sizeof(dev->node_desc));
 
@@ -1173,13 +1166,9 @@ int rxe_register_device(struct rxe_dev *rxe, const char *ibdev_name)
 	if (err)
 		return err;
 
-	tfm = crypto_alloc_shash("crc32", 0, 0);
-	if (IS_ERR(tfm)) {
-		pr_err("failed to allocate crc algorithm err:%ld\n",
-		       PTR_ERR(tfm));
-		return PTR_ERR(tfm);
-	}
-	rxe->tfm = tfm;
+	err = rxe_icrc_init(rxe);
+	if (err)
+		return err;
 
 	err = ib_register_device(dev, ibdev_name, NULL);
 	if (err)
diff --git a/drivers/infiniband/sw/rxe/rxe_verbs.h b/drivers/infiniband/sw/rxe/rxe_verbs.h
index 959a3260fcab..ac2a2148027f 100644
--- a/drivers/infiniband/sw/rxe/rxe_verbs.h
+++ b/drivers/infiniband/sw/rxe/rxe_verbs.h
@@ -210,8 +210,8 @@ struct rxe_resp_info {
 };
 
 struct rxe_qp {
-	struct rxe_pool_entry	pelem;
 	struct ib_qp		ibqp;
+	struct rxe_pool_entry	pelem;
 	struct ib_qp_attr	attr;
 	unsigned int		valid;
 	unsigned int		mtu;
diff --git a/drivers/infiniband/sw/siw/siw_main.c b/drivers/infiniband/sw/siw/siw_main.c
index cf55326f2ab4..9093e6a80b26 100644
--- a/drivers/infiniband/sw/siw/siw_main.c
+++ b/drivers/infiniband/sw/siw/siw_main.c
@@ -297,6 +297,7 @@ static const struct ib_device_ops siw_device_ops = {
 
 	INIT_RDMA_OBJ_SIZE(ib_cq, siw_cq, base_cq),
 	INIT_RDMA_OBJ_SIZE(ib_pd, siw_pd, base_pd),
+	INIT_RDMA_OBJ_SIZE(ib_qp, siw_qp, base_qp),
 	INIT_RDMA_OBJ_SIZE(ib_srq, siw_srq, base_srq),
 	INIT_RDMA_OBJ_SIZE(ib_ucontext, siw_ucontext, base_ucontext),
 };
diff --git a/drivers/infiniband/sw/siw/siw_qp.c b/drivers/infiniband/sw/siw/siw_qp.c
index ddb2e66f9f13..7e01f2438afc 100644
--- a/drivers/infiniband/sw/siw/siw_qp.c
+++ b/drivers/infiniband/sw/siw/siw_qp.c
@@ -1344,6 +1344,4 @@ void siw_free_qp(struct kref *ref)
 	siw_put_tx_cpu(qp->tx_cpu);
 
 	atomic_dec(&sdev->num_qp);
-	siw_dbg_qp(qp, "free QP\n");
-	kfree_rcu(qp, rcu);
 }
diff --git a/drivers/infiniband/sw/siw/siw_qp_tx.c b/drivers/infiniband/sw/siw/siw_qp_tx.c
index 7989c4043db4..1f4e60257700 100644
--- a/drivers/infiniband/sw/siw/siw_qp_tx.c
+++ b/drivers/infiniband/sw/siw/siw_qp_tx.c
@@ -76,7 +76,7 @@ static int siw_try_1seg(struct siw_iwarp_tx *c_tx, void *paddr)
 			if (unlikely(!p))
 				return -EFAULT;
 
-			buffer = kmap(p);
+			buffer = kmap_local_page(p);
 
 			if (likely(PAGE_SIZE - off >= bytes)) {
 				memcpy(paddr, buffer + off, bytes);
@@ -84,7 +84,7 @@ static int siw_try_1seg(struct siw_iwarp_tx *c_tx, void *paddr)
 				unsigned long part = bytes - (PAGE_SIZE - off);
 
 				memcpy(paddr, buffer + off, part);
-				kunmap(p);
+				kunmap_local(buffer);
 
 				if (!mem->is_pbl)
 					p = siw_get_upage(mem->umem,
@@ -96,10 +96,10 @@ static int siw_try_1seg(struct siw_iwarp_tx *c_tx, void *paddr)
 				if (unlikely(!p))
 					return -EFAULT;
 
-				buffer = kmap(p);
+				buffer = kmap_local_page(p);
 				memcpy(paddr + part, buffer, bytes - part);
 			}
-			kunmap(p);
+			kunmap_local(buffer);
 		}
 	}
 	return (int)bytes;
@@ -396,13 +396,20 @@ static int siw_0copy_tx(struct socket *s, struct page **page,
 
 #define MAX_TRAILER (MPA_CRC_SIZE + 4)
 
-static void siw_unmap_pages(struct page **pp, unsigned long kmap_mask)
+static void siw_unmap_pages(struct kvec *iov, unsigned long kmap_mask, int len)
 {
-	while (kmap_mask) {
-		if (kmap_mask & BIT(0))
-			kunmap(*pp);
-		pp++;
-		kmap_mask >>= 1;
+	int i;
+
+	/*
+	 * Work backwards through the array to honor the kmap_local_page()
+	 * ordering requirements.
+	 */
+	for (i = (len-1); i >= 0; i--) {
+		if (kmap_mask & BIT(i)) {
+			unsigned long addr = (unsigned long)iov[i].iov_base;
+
+			kunmap_local((void *)(addr & PAGE_MASK));
+		}
 	}
 }
 
@@ -485,6 +492,7 @@ static int siw_tx_hdt(struct siw_iwarp_tx *c_tx, struct socket *s)
 
 		while (sge_len) {
 			size_t plen = min((int)PAGE_SIZE - fp_off, sge_len);
+			void *kaddr;
 
 			if (!is_kva) {
 				struct page *p;
@@ -497,7 +505,7 @@ static int siw_tx_hdt(struct siw_iwarp_tx *c_tx, struct socket *s)
 					p = siw_get_upage(mem->umem,
 							  sge->laddr + sge_off);
 				if (unlikely(!p)) {
-					siw_unmap_pages(page_array, kmap_mask);
+					siw_unmap_pages(iov, kmap_mask, seg);
 					wqe->processed -= c_tx->bytes_unsent;
 					rv = -EFAULT;
 					goto done_crc;
@@ -505,11 +513,12 @@ static int siw_tx_hdt(struct siw_iwarp_tx *c_tx, struct socket *s)
 				page_array[seg] = p;
 
 				if (!c_tx->use_sendpage) {
-					iov[seg].iov_base = kmap(p) + fp_off;
-					iov[seg].iov_len = plen;
+					void *kaddr = kmap_local_page(p);
 
 					/* Remember for later kunmap() */
 					kmap_mask |= BIT(seg);
+					iov[seg].iov_base = kaddr + fp_off;
+					iov[seg].iov_len = plen;
 
 					if (do_crc)
 						crypto_shash_update(
@@ -517,10 +526,11 @@ static int siw_tx_hdt(struct siw_iwarp_tx *c_tx, struct socket *s)
 							iov[seg].iov_base,
 							plen);
 				} else if (do_crc) {
+					kaddr = kmap_local_page(p);
 					crypto_shash_update(c_tx->mpa_crc_hd,
-							    kmap(p) + fp_off,
+							    kaddr + fp_off,
 							    plen);
-					kunmap(p);
+					kunmap_local(kaddr);
 				}
 			} else {
 				u64 va = sge->laddr + sge_off;
@@ -540,7 +550,7 @@ static int siw_tx_hdt(struct siw_iwarp_tx *c_tx, struct socket *s)
 
 			if (++seg > (int)MAX_ARRAY) {
 				siw_dbg_qp(tx_qp(c_tx), "to many fragments\n");
-				siw_unmap_pages(page_array, kmap_mask);
+				siw_unmap_pages(iov, kmap_mask, seg-1);
 				wqe->processed -= c_tx->bytes_unsent;
 				rv = -EMSGSIZE;
 				goto done_crc;
@@ -591,7 +601,7 @@ sge_done:
 	} else {
 		rv = kernel_sendmsg(s, &msg, iov, seg + 1,
 				    hdr_len + data_len + trl_len);
-		siw_unmap_pages(page_array, kmap_mask);
+		siw_unmap_pages(iov, kmap_mask, seg);
 	}
 	if (rv < (int)hdr_len) {
 		/* Not even complete hdr pushed or negative rv */
diff --git a/drivers/infiniband/sw/siw/siw_verbs.c b/drivers/infiniband/sw/siw/siw_verbs.c
index 3f175f220a22..1b36350601fa 100644
--- a/drivers/infiniband/sw/siw/siw_verbs.c
+++ b/drivers/infiniband/sw/siw/siw_verbs.c
@@ -285,16 +285,16 @@ siw_mmap_entry_insert(struct siw_ucontext *uctx,
  *
  * Create QP of requested size on given device.
  *
- * @pd:		Protection Domain
+ * @qp:		Queue pait
  * @attrs:	Initial QP attributes.
  * @udata:	used to provide QP ID, SQ and RQ size back to user.
  */
 
-struct ib_qp *siw_create_qp(struct ib_pd *pd,
-			    struct ib_qp_init_attr *attrs,
-			    struct ib_udata *udata)
+int siw_create_qp(struct ib_qp *ibqp, struct ib_qp_init_attr *attrs,
+		  struct ib_udata *udata)
 {
-	struct siw_qp *qp = NULL;
+	struct ib_pd *pd = ibqp->pd;
+	struct siw_qp *qp = to_siw_qp(ibqp);
 	struct ib_device *base_dev = pd->device;
 	struct siw_device *sdev = to_siw_dev(base_dev);
 	struct siw_ucontext *uctx =
@@ -307,17 +307,16 @@ struct ib_qp *siw_create_qp(struct ib_pd *pd,
 	siw_dbg(base_dev, "create new QP\n");
 
 	if (attrs->create_flags)
-		return ERR_PTR(-EOPNOTSUPP);
+		return -EOPNOTSUPP;
 
 	if (atomic_inc_return(&sdev->num_qp) > SIW_MAX_QP) {
 		siw_dbg(base_dev, "too many QP's\n");
-		rv = -ENOMEM;
-		goto err_out;
+		return -ENOMEM;
 	}
 	if (attrs->qp_type != IB_QPT_RC) {
 		siw_dbg(base_dev, "only RC QP's supported\n");
 		rv = -EOPNOTSUPP;
-		goto err_out;
+		goto err_atomic;
 	}
 	if ((attrs->cap.max_send_wr > SIW_MAX_QP_WR) ||
 	    (attrs->cap.max_recv_wr > SIW_MAX_QP_WR) ||
@@ -325,13 +324,13 @@ struct ib_qp *siw_create_qp(struct ib_pd *pd,
 	    (attrs->cap.max_recv_sge > SIW_MAX_SGE)) {
 		siw_dbg(base_dev, "QP size error\n");
 		rv = -EINVAL;
-		goto err_out;
+		goto err_atomic;
 	}
 	if (attrs->cap.max_inline_data > SIW_MAX_INLINE) {
 		siw_dbg(base_dev, "max inline send: %d > %d\n",
 			attrs->cap.max_inline_data, (int)SIW_MAX_INLINE);
 		rv = -EINVAL;
-		goto err_out;
+		goto err_atomic;
 	}
 	/*
 	 * NOTE: we allow for zero element SQ and RQ WQE's SGL's
@@ -340,19 +339,15 @@ struct ib_qp *siw_create_qp(struct ib_pd *pd,
 	if (attrs->cap.max_send_wr + attrs->cap.max_recv_wr == 0) {
 		siw_dbg(base_dev, "QP must have send or receive queue\n");
 		rv = -EINVAL;
-		goto err_out;
+		goto err_atomic;
 	}
 
 	if (!attrs->send_cq || (!attrs->recv_cq && !attrs->srq)) {
 		siw_dbg(base_dev, "send CQ or receive CQ invalid\n");
 		rv = -EINVAL;
-		goto err_out;
-	}
-	qp = kzalloc(sizeof(*qp), GFP_KERNEL);
-	if (!qp) {
-		rv = -ENOMEM;
-		goto err_out;
+		goto err_atomic;
 	}
+
 	init_rwsem(&qp->state_lock);
 	spin_lock_init(&qp->sq_lock);
 	spin_lock_init(&qp->rq_lock);
@@ -360,7 +355,7 @@ struct ib_qp *siw_create_qp(struct ib_pd *pd,
 
 	rv = siw_qp_add(sdev, qp);
 	if (rv)
-		goto err_out;
+		goto err_atomic;
 
 	num_sqe = attrs->cap.max_send_wr;
 	num_rqe = attrs->cap.max_recv_wr;
@@ -482,23 +477,20 @@ struct ib_qp *siw_create_qp(struct ib_pd *pd,
 	list_add_tail(&qp->devq, &sdev->qp_list);
 	spin_unlock_irqrestore(&sdev->lock, flags);
 
-	return &qp->base_qp;
+	return 0;
 
 err_out_xa:
 	xa_erase(&sdev->qp_xa, qp_id(qp));
-err_out:
-	if (qp) {
-		if (uctx) {
-			rdma_user_mmap_entry_remove(qp->sq_entry);
-			rdma_user_mmap_entry_remove(qp->rq_entry);
-		}
-		vfree(qp->sendq);
-		vfree(qp->recvq);
-		kfree(qp);
+	if (uctx) {
+		rdma_user_mmap_entry_remove(qp->sq_entry);
+		rdma_user_mmap_entry_remove(qp->rq_entry);
 	}
-	atomic_dec(&sdev->num_qp);
+	vfree(qp->sendq);
+	vfree(qp->recvq);
 
-	return ERR_PTR(rv);
+err_atomic:
+	atomic_dec(&sdev->num_qp);
+	return rv;
 }
 
 /*
diff --git a/drivers/infiniband/sw/siw/siw_verbs.h b/drivers/infiniband/sw/siw/siw_verbs.h
index 67ac08886a70..09964234f8d3 100644
--- a/drivers/infiniband/sw/siw/siw_verbs.h
+++ b/drivers/infiniband/sw/siw/siw_verbs.h
@@ -50,9 +50,8 @@ int siw_query_gid(struct ib_device *base_dev, u32 port, int idx,
 		  union ib_gid *gid);
 int siw_alloc_pd(struct ib_pd *base_pd, struct ib_udata *udata);
 int siw_dealloc_pd(struct ib_pd *base_pd, struct ib_udata *udata);
-struct ib_qp *siw_create_qp(struct ib_pd *base_pd,
-			    struct ib_qp_init_attr *attr,
-			    struct ib_udata *udata);
+int siw_create_qp(struct ib_qp *qp, struct ib_qp_init_attr *attr,
+		  struct ib_udata *udata);
 int siw_query_qp(struct ib_qp *base_qp, struct ib_qp_attr *qp_attr,
 		 int qp_attr_mask, struct ib_qp_init_attr *qp_init_attr);
 int siw_verbs_modify_qp(struct ib_qp *base_qp, struct ib_qp_attr *attr,
diff --git a/drivers/infiniband/ulp/iser/iser_memory.c b/drivers/infiniband/ulp/iser/iser_memory.c
index afec40da9b58..9776b755d848 100644
--- a/drivers/infiniband/ulp/iser/iser_memory.c
+++ b/drivers/infiniband/ulp/iser/iser_memory.c
@@ -159,7 +159,7 @@ iser_set_dif_domain(struct scsi_cmnd *sc, struct ib_sig_domain *domain)
 {
 	domain->sig_type = IB_SIG_TYPE_T10_DIF;
 	domain->sig.dif.pi_interval = scsi_prot_interval(sc);
-	domain->sig.dif.ref_tag = t10_pi_ref_tag(sc->request);
+	domain->sig.dif.ref_tag = t10_pi_ref_tag(scsi_cmd_to_rq(sc));
 	/*
 	 * At the moment we hard code those, but in the future
 	 * we will take them from sc.
diff --git a/drivers/infiniband/ulp/iser/iser_verbs.c b/drivers/infiniband/ulp/iser/iser_verbs.c
index b44cbb8e84eb..b566f7cb7797 100644
--- a/drivers/infiniband/ulp/iser/iser_verbs.c
+++ b/drivers/infiniband/ulp/iser/iser_verbs.c
@@ -949,7 +949,7 @@ u8 iser_check_task_pi_status(struct iscsi_iser_task *iser_task,
 			sector_t sector_off = mr_status.sig_err.sig_err_offset;
 
 			sector_div(sector_off, sector_size + 8);
-			*sector = scsi_get_lba(iser_task->sc) + sector_off;
+			*sector = scsi_get_sector(iser_task->sc) + sector_off;
 
 			iser_err("PI error found type %d at sector %llx "
 			       "expected %x vs actual %x\n",
diff --git a/drivers/infiniband/ulp/rtrs/rtrs-clt-stats.c b/drivers/infiniband/ulp/rtrs/rtrs-clt-stats.c
index 26bbe5d6dff5..5e780bdd763d 100644
--- a/drivers/infiniband/ulp/rtrs/rtrs-clt-stats.c
+++ b/drivers/infiniband/ulp/rtrs/rtrs-clt-stats.c
@@ -20,7 +20,7 @@ void rtrs_clt_update_wc_stats(struct rtrs_clt_con *con)
 
 	cpu = raw_smp_processor_id();
 	s = this_cpu_ptr(stats->pcpu_stats);
-	if (unlikely(con->cpu != cpu)) {
+	if (con->cpu != cpu) {
 		s->cpu_migr.to++;
 
 		/* Careful here, override s pointer */
@@ -180,7 +180,7 @@ void rtrs_clt_update_all_stats(struct rtrs_clt_io_req *req, int dir)
 
 	len = req->usr_len + req->data_len;
 	rtrs_clt_update_rdma_stats(stats, len, dir);
-	if (sess->clt->mp_policy == MP_POLICY_MIN_INFLIGHT)
+	if (req->mp_policy == MP_POLICY_MIN_INFLIGHT)
 		atomic_inc(&stats->inflight);
 }
 
diff --git a/drivers/infiniband/ulp/rtrs/rtrs-clt.c b/drivers/infiniband/ulp/rtrs/rtrs-clt.c
index f2c40e50f25e..bc8824b4ee0d 100644
--- a/drivers/infiniband/ulp/rtrs/rtrs-clt.c
+++ b/drivers/infiniband/ulp/rtrs/rtrs-clt.c
@@ -75,9 +75,9 @@ __rtrs_get_permit(struct rtrs_clt *clt, enum rtrs_clt_con_type con_type)
 	 */
 	do {
 		bit = find_first_zero_bit(clt->permits_map, max_depth);
-		if (unlikely(bit >= max_depth))
+		if (bit >= max_depth)
 			return NULL;
-	} while (unlikely(test_and_set_bit_lock(bit, clt->permits_map)));
+	} while (test_and_set_bit_lock(bit, clt->permits_map));
 
 	permit = get_permit(clt, bit);
 	WARN_ON(permit->mem_id != bit);
@@ -115,14 +115,14 @@ struct rtrs_permit *rtrs_clt_get_permit(struct rtrs_clt *clt,
 	DEFINE_WAIT(wait);
 
 	permit = __rtrs_get_permit(clt, con_type);
-	if (likely(permit) || !can_wait)
+	if (permit || !can_wait)
 		return permit;
 
 	do {
 		prepare_to_wait(&clt->permits_wait, &wait,
 				TASK_UNINTERRUPTIBLE);
 		permit = __rtrs_get_permit(clt, con_type);
-		if (likely(permit))
+		if (permit)
 			break;
 
 		io_schedule();
@@ -175,7 +175,7 @@ struct rtrs_clt_con *rtrs_permit_to_clt_con(struct rtrs_clt_sess *sess,
 {
 	int id = 0;
 
-	if (likely(permit->con_type == RTRS_IO_CON))
+	if (permit->con_type == RTRS_IO_CON)
 		id = (permit->cpu_id % (sess->s.irq_con_num - 1)) + 1;
 
 	return to_clt_con(sess->s.con[id]);
@@ -329,7 +329,7 @@ static void rtrs_clt_fast_reg_done(struct ib_cq *cq, struct ib_wc *wc)
 {
 	struct rtrs_clt_con *con = to_clt_con(wc->qp->qp_context);
 
-	if (unlikely(wc->status != IB_WC_SUCCESS)) {
+	if (wc->status != IB_WC_SUCCESS) {
 		rtrs_err(con->c.sess, "Failed IB_WR_REG_MR: %s\n",
 			  ib_wc_status_msg(wc->status));
 		rtrs_rdma_error_recovery(con);
@@ -349,13 +349,13 @@ static void rtrs_clt_inv_rkey_done(struct ib_cq *cq, struct ib_wc *wc)
 		container_of(wc->wr_cqe, typeof(*req), inv_cqe);
 	struct rtrs_clt_con *con = to_clt_con(wc->qp->qp_context);
 
-	if (unlikely(wc->status != IB_WC_SUCCESS)) {
+	if (wc->status != IB_WC_SUCCESS) {
 		rtrs_err(con->c.sess, "Failed IB_WR_LOCAL_INV: %s\n",
 			  ib_wc_status_msg(wc->status));
 		rtrs_rdma_error_recovery(con);
 	}
 	req->need_inv = false;
-	if (likely(req->need_inv_comp))
+	if (req->need_inv_comp)
 		complete(&req->inv_comp);
 	else
 		/* Complete request from INV callback */
@@ -390,7 +390,7 @@ static void complete_rdma_req(struct rtrs_clt_io_req *req, int errno,
 	sess = to_clt_sess(con->c.sess);
 
 	if (req->sg_cnt) {
-		if (unlikely(req->dir == DMA_FROM_DEVICE && req->need_inv)) {
+		if (req->dir == DMA_FROM_DEVICE && req->need_inv) {
 			/*
 			 * We are here to invalidate read requests
 			 * ourselves.  In normal scenario server should
@@ -405,7 +405,7 @@ static void complete_rdma_req(struct rtrs_clt_io_req *req, int errno,
 			 *        should do that ourselves.
 			 */
 
-			if (likely(can_wait)) {
+			if (can_wait) {
 				req->need_inv_comp = true;
 			} else {
 				/* This should be IO path, so always notify */
@@ -416,10 +416,10 @@ static void complete_rdma_req(struct rtrs_clt_io_req *req, int errno,
 
 			refcount_inc(&req->ref);
 			err = rtrs_inv_rkey(req);
-			if (unlikely(err)) {
+			if (err) {
 				rtrs_err(con->c.sess, "Send INV WR key=%#x: %d\n",
 					  req->mr->rkey, err);
-			} else if (likely(can_wait)) {
+			} else if (can_wait) {
 				wait_for_completion(&req->inv_comp);
 			} else {
 				/*
@@ -438,7 +438,7 @@ static void complete_rdma_req(struct rtrs_clt_io_req *req, int errno,
 	}
 	if (!refcount_dec_and_test(&req->ref))
 		return;
-	if (sess->clt->mp_policy == MP_POLICY_MIN_INFLIGHT)
+	if (req->mp_policy == MP_POLICY_MIN_INFLIGHT)
 		atomic_dec(&sess->stats->inflight);
 
 	req->in_use = false;
@@ -463,7 +463,7 @@ static int rtrs_post_send_rdma(struct rtrs_clt_con *con,
 	enum ib_send_flags flags;
 	struct ib_sge sge;
 
-	if (unlikely(!req->sg_size)) {
+	if (!req->sg_size) {
 		rtrs_wrn(con->c.sess,
 			 "Doing RDMA Write failed, no data supplied\n");
 		return -EINVAL;
@@ -478,7 +478,7 @@ static int rtrs_post_send_rdma(struct rtrs_clt_con *con,
 	 * From time to time we have to post signalled sends,
 	 * or send queue will fill up and only QP reset can help.
 	 */
-	flags = atomic_inc_return(&con->io_cnt) % sess->queue_depth ?
+	flags = atomic_inc_return(&con->c.wr_cnt) % sess->s.signal_interval ?
 			0 : IB_SEND_SIGNALED;
 
 	ib_dma_sync_single_for_device(sess->s.dev->ib_dev, req->iu->dma_addr,
@@ -513,7 +513,7 @@ static void rtrs_clt_recv_done(struct rtrs_clt_con *con, struct ib_wc *wc)
 	iu = container_of(wc->wr_cqe, struct rtrs_iu,
 			  cqe);
 	err = rtrs_iu_post_recv(&con->c, iu);
-	if (unlikely(err)) {
+	if (err) {
 		rtrs_err(con->c.sess, "post iu failed %d\n", err);
 		rtrs_rdma_error_recovery(con);
 	}
@@ -533,7 +533,7 @@ static void rtrs_clt_rkey_rsp_done(struct rtrs_clt_con *con, struct ib_wc *wc)
 
 	iu = container_of(wc->wr_cqe, struct rtrs_iu, cqe);
 
-	if (unlikely(wc->byte_len < sizeof(*msg))) {
+	if (wc->byte_len < sizeof(*msg)) {
 		rtrs_err(con->c.sess, "rkey response is malformed: size %d\n",
 			  wc->byte_len);
 		goto out;
@@ -541,7 +541,7 @@ static void rtrs_clt_rkey_rsp_done(struct rtrs_clt_con *con, struct ib_wc *wc)
 	ib_dma_sync_single_for_cpu(sess->s.dev->ib_dev, iu->dma_addr,
 				   iu->size, DMA_FROM_DEVICE);
 	msg = iu->buf;
-	if (unlikely(le16_to_cpu(msg->type) != RTRS_MSG_RKEY_RSP)) {
+	if (le16_to_cpu(msg->type) != RTRS_MSG_RKEY_RSP) {
 		rtrs_err(sess->clt, "rkey response is malformed: type %d\n",
 			  le16_to_cpu(msg->type));
 		goto out;
@@ -551,8 +551,8 @@ static void rtrs_clt_rkey_rsp_done(struct rtrs_clt_con *con, struct ib_wc *wc)
 		goto out;
 
 	rtrs_from_imm(be32_to_cpu(wc->ex.imm_data), &imm_type, &imm_payload);
-	if (likely(imm_type == RTRS_IO_RSP_IMM ||
-		   imm_type == RTRS_IO_RSP_W_INV_IMM)) {
+	if (imm_type == RTRS_IO_RSP_IMM ||
+	    imm_type == RTRS_IO_RSP_W_INV_IMM) {
 		u32 msg_id;
 
 		w_inval = (imm_type == RTRS_IO_RSP_W_INV_IMM);
@@ -605,7 +605,7 @@ static void rtrs_clt_rdma_done(struct ib_cq *cq, struct ib_wc *wc)
 	bool w_inval = false;
 	int err;
 
-	if (unlikely(wc->status != IB_WC_SUCCESS)) {
+	if (wc->status != IB_WC_SUCCESS) {
 		if (wc->status != IB_WC_WR_FLUSH_ERR) {
 			rtrs_err(sess->clt, "RDMA failed: %s\n",
 				  ib_wc_status_msg(wc->status));
@@ -625,8 +625,8 @@ static void rtrs_clt_rdma_done(struct ib_cq *cq, struct ib_wc *wc)
 			return;
 		rtrs_from_imm(be32_to_cpu(wc->ex.imm_data),
 			       &imm_type, &imm_payload);
-		if (likely(imm_type == RTRS_IO_RSP_IMM ||
-			   imm_type == RTRS_IO_RSP_W_INV_IMM)) {
+		if (imm_type == RTRS_IO_RSP_IMM ||
+		    imm_type == RTRS_IO_RSP_W_INV_IMM) {
 			u32 msg_id;
 
 			w_inval = (imm_type == RTRS_IO_RSP_W_INV_IMM);
@@ -657,7 +657,7 @@ static void rtrs_clt_rdma_done(struct ib_cq *cq, struct ib_wc *wc)
 			err = rtrs_post_recv_empty_x2(&con->c, &io_comp_cqe);
 		else
 			err = rtrs_post_recv_empty(&con->c, &io_comp_cqe);
-		if (unlikely(err)) {
+		if (err) {
 			rtrs_err(con->c.sess, "rtrs_post_recv_empty(): %d\n",
 				  err);
 			rtrs_rdma_error_recovery(con);
@@ -680,6 +680,7 @@ static void rtrs_clt_rdma_done(struct ib_cq *cq, struct ib_wc *wc)
 	case IB_WC_RDMA_WRITE:
 		/*
 		 * post_send() RDMA write completions of IO reqs (read/write)
+		 * and hb.
 		 */
 		break;
 
@@ -702,7 +703,7 @@ static int post_recv_io(struct rtrs_clt_con *con, size_t q_size)
 		} else {
 			err = rtrs_post_recv_empty(&con->c, &io_comp_cqe);
 		}
-		if (unlikely(err))
+		if (err)
 			return err;
 	}
 
@@ -727,7 +728,7 @@ static int post_recv_sess(struct rtrs_clt_sess *sess)
 		q_size *= 2;
 
 		err = post_recv_io(to_clt_con(sess->s.con[cid]), q_size);
-		if (unlikely(err)) {
+		if (err) {
 			rtrs_err(sess->clt, "post_recv_io(), err: %d\n", err);
 			return err;
 		}
@@ -788,7 +789,7 @@ static struct rtrs_clt_sess *get_next_path_rr(struct path_it *it)
 
 	ppcpu_path = this_cpu_ptr(clt->pcpu_path);
 	path = rcu_dereference(*ppcpu_path);
-	if (unlikely(!path))
+	if (!path)
 		path = list_first_or_null_rcu(&clt->paths_list,
 					      typeof(*path), s.entry);
 	else
@@ -819,10 +820,10 @@ static struct rtrs_clt_sess *get_next_path_min_inflight(struct path_it *it)
 	int inflight;
 
 	list_for_each_entry_rcu(sess, &clt->paths_list, s.entry) {
-		if (unlikely(READ_ONCE(sess->state) != RTRS_CLT_CONNECTED))
+		if (READ_ONCE(sess->state) != RTRS_CLT_CONNECTED)
 			continue;
 
-		if (unlikely(!list_empty(raw_cpu_ptr(sess->mp_skip_entry))))
+		if (!list_empty(raw_cpu_ptr(sess->mp_skip_entry)))
 			continue;
 
 		inflight = atomic_read(&sess->stats->inflight);
@@ -870,10 +871,10 @@ static struct rtrs_clt_sess *get_next_path_min_latency(struct path_it *it)
 	ktime_t latency;
 
 	list_for_each_entry_rcu(sess, &clt->paths_list, s.entry) {
-		if (unlikely(READ_ONCE(sess->state) != RTRS_CLT_CONNECTED))
+		if (READ_ONCE(sess->state) != RTRS_CLT_CONNECTED)
 			continue;
 
-		if (unlikely(!list_empty(raw_cpu_ptr(sess->mp_skip_entry))))
+		if (!list_empty(raw_cpu_ptr(sess->mp_skip_entry)))
 			continue;
 
 		latency = sess->s.hb_cur_latency;
@@ -963,6 +964,7 @@ static void rtrs_clt_init_req(struct rtrs_clt_io_req *req,
 	req->need_inv_comp = false;
 	req->inv_errno = 0;
 	refcount_set(&req->ref, 1);
+	req->mp_policy = sess->clt->mp_policy;
 
 	iov_iter_kvec(&iter, READ, vec, 1, usr_len);
 	len = _copy_from_iter(req->iu->buf, usr_len, &iter);
@@ -1043,7 +1045,7 @@ static int rtrs_post_rdma_write_sg(struct rtrs_clt_con *con,
 	 * From time to time we have to post signalled sends,
 	 * or send queue will fill up and only QP reset can help.
 	 */
-	flags = atomic_inc_return(&con->io_cnt) % sess->queue_depth ?
+	flags = atomic_inc_return(&con->c.wr_cnt) % sess->s.signal_interval ?
 			0 : IB_SEND_SIGNALED;
 
 	ib_dma_sync_single_for_device(sess->s.dev->ib_dev, req->iu->dma_addr,
@@ -1062,7 +1064,7 @@ static int rtrs_map_sg_fr(struct rtrs_clt_io_req *req, size_t count)
 	nr = ib_map_mr_sg(req->mr, req->sglist, count, NULL, SZ_4K);
 	if (nr < 0)
 		return nr;
-	if (unlikely(nr < req->sg_cnt))
+	if (nr < req->sg_cnt)
 		return -EINVAL;
 	ib_update_fast_reg_key(req->mr, ib_inc_rkey(req->mr->rkey));
 
@@ -1086,7 +1088,7 @@ static int rtrs_clt_write_req(struct rtrs_clt_io_req *req)
 
 	const size_t tsize = sizeof(*msg) + req->data_len + req->usr_len;
 
-	if (unlikely(tsize > sess->chunk_size)) {
+	if (tsize > sess->chunk_size) {
 		rtrs_wrn(s, "Write request failed, size too big %zu > %d\n",
 			  tsize, sess->chunk_size);
 		return -EMSGSIZE;
@@ -1094,7 +1096,7 @@ static int rtrs_clt_write_req(struct rtrs_clt_io_req *req)
 	if (req->sg_cnt) {
 		count = ib_dma_map_sg(sess->s.dev->ib_dev, req->sglist,
 				      req->sg_cnt, req->dir);
-		if (unlikely(!count)) {
+		if (!count) {
 			rtrs_wrn(s, "Write request failed, map failed\n");
 			return -EINVAL;
 		}
@@ -1148,12 +1150,12 @@ static int rtrs_clt_write_req(struct rtrs_clt_io_req *req)
 	ret = rtrs_post_rdma_write_sg(req->con, req, rbuf, fr_en,
 				      req->usr_len + sizeof(*msg),
 				      imm, wr, &inv_wr);
-	if (unlikely(ret)) {
+	if (ret) {
 		rtrs_err_rl(s,
 			    "Write request failed: error=%d path=%s [%s:%u]\n",
 			    ret, kobject_name(&sess->kobj), sess->hca_name,
 			    sess->hca_port);
-		if (sess->clt->mp_policy == MP_POLICY_MIN_INFLIGHT)
+		if (req->mp_policy == MP_POLICY_MIN_INFLIGHT)
 			atomic_dec(&sess->stats->inflight);
 		if (req->sg_cnt)
 			ib_dma_unmap_sg(sess->s.dev->ib_dev, req->sglist,
@@ -1179,7 +1181,7 @@ static int rtrs_clt_read_req(struct rtrs_clt_io_req *req)
 
 	const size_t tsize = sizeof(*msg) + req->data_len + req->usr_len;
 
-	if (unlikely(tsize > sess->chunk_size)) {
+	if (tsize > sess->chunk_size) {
 		rtrs_wrn(s,
 			  "Read request failed, message size is %zu, bigger than CHUNK_SIZE %d\n",
 			  tsize, sess->chunk_size);
@@ -1189,7 +1191,7 @@ static int rtrs_clt_read_req(struct rtrs_clt_io_req *req)
 	if (req->sg_cnt) {
 		count = ib_dma_map_sg(dev->ib_dev, req->sglist, req->sg_cnt,
 				      req->dir);
-		if (unlikely(!count)) {
+		if (!count) {
 			rtrs_wrn(s,
 				  "Read request failed, dma map failed\n");
 			return -EINVAL;
@@ -1254,12 +1256,12 @@ static int rtrs_clt_read_req(struct rtrs_clt_io_req *req)
 
 	ret = rtrs_post_send_rdma(req->con, req, &sess->rbufs[buf_id],
 				   req->data_len, imm, wr);
-	if (unlikely(ret)) {
+	if (ret) {
 		rtrs_err_rl(s,
 			    "Read request failed: error=%d path=%s [%s:%u]\n",
 			    ret, kobject_name(&sess->kobj), sess->hca_name,
 			    sess->hca_port);
-		if (sess->clt->mp_policy == MP_POLICY_MIN_INFLIGHT)
+		if (req->mp_policy == MP_POLICY_MIN_INFLIGHT)
 			atomic_dec(&sess->stats->inflight);
 		req->need_inv = false;
 		if (req->sg_cnt)
@@ -1287,15 +1289,14 @@ static int rtrs_clt_failover_req(struct rtrs_clt *clt,
 	for (path_it_init(&it, clt);
 	     (alive_sess = it.next_path(&it)) && it.i < it.clt->paths_num;
 	     it.i++) {
-		if (unlikely(READ_ONCE(alive_sess->state) !=
-			     RTRS_CLT_CONNECTED))
+		if (READ_ONCE(alive_sess->state) != RTRS_CLT_CONNECTED)
 			continue;
 		req = rtrs_clt_get_copy_req(alive_sess, fail_req);
 		if (req->dir == DMA_TO_DEVICE)
 			err = rtrs_clt_write_req(req);
 		else
 			err = rtrs_clt_read_req(req);
-		if (unlikely(err)) {
+		if (err) {
 			req->in_use = false;
 			continue;
 		}
@@ -1330,7 +1331,7 @@ static void fail_all_outstanding_reqs(struct rtrs_clt_sess *sess)
 		complete_rdma_req(req, -ECONNABORTED, false, true);
 
 		err = rtrs_clt_failover_req(clt, req);
-		if (unlikely(err))
+		if (err)
 			/* Failover failed, notify anyway */
 			req->conf(req->priv, err);
 	}
@@ -1601,7 +1602,8 @@ static int create_con(struct rtrs_clt_sess *sess, unsigned int cid)
 	con->cpu  = (cid ? cid - 1 : 0) % nr_cpu_ids;
 	con->c.cid = cid;
 	con->c.sess = &sess->s;
-	atomic_set(&con->io_cnt, 0);
+	/* Align with srv, init as 1 */
+	atomic_set(&con->c.wr_cnt, 1);
 	mutex_init(&con->con_mutex);
 
 	sess->s.con[cid] = &con->c;
@@ -1678,6 +1680,7 @@ static int create_con_cq_qp(struct rtrs_clt_con *con)
 			      sess->queue_depth * 3 + 1);
 		max_send_sge = 2;
 	}
+	atomic_set(&con->c.sq_wr_avail, max_send_wr);
 	cq_num = max_send_wr + max_recv_wr;
 	/* alloc iu to recv new rkey reply when server reports flags set */
 	if (sess->flags & RTRS_MSG_NEW_RKEY_F || con->c.cid == 0) {
@@ -1841,13 +1844,14 @@ static int rtrs_rdma_conn_established(struct rtrs_clt_con *con,
 		}
 
 		if (!sess->rbufs) {
-			kfree(sess->rbufs);
 			sess->rbufs = kcalloc(queue_depth, sizeof(*sess->rbufs),
 					      GFP_KERNEL);
 			if (!sess->rbufs)
 				return -ENOMEM;
 		}
 		sess->queue_depth = queue_depth;
+		sess->s.signal_interval = min_not_zero(queue_depth,
+						(unsigned short) SERVICE_CON_QUEUE_DEPTH);
 		sess->max_hdr_size = le32_to_cpu(msg->max_hdr_size);
 		sess->max_io_size = le32_to_cpu(msg->max_io_size);
 		sess->flags = le32_to_cpu(msg->flags);
@@ -1958,7 +1962,7 @@ static int rtrs_clt_rdma_cm_handler(struct rdma_cm_id *cm_id,
 		break;
 	case RDMA_CM_EVENT_ESTABLISHED:
 		cm_err = rtrs_rdma_conn_established(con, ev);
-		if (likely(!cm_err)) {
+		if (!cm_err) {
 			/*
 			 * Report success and wake up. Here we abuse state_wq,
 			 * i.e. wake up without state change, but we set cm_err.
@@ -2377,7 +2381,7 @@ static void rtrs_clt_info_req_done(struct ib_cq *cq, struct ib_wc *wc)
 	iu = container_of(wc->wr_cqe, struct rtrs_iu, cqe);
 	rtrs_iu_free(iu, sess->s.dev->ib_dev, 1);
 
-	if (unlikely(wc->status != IB_WC_SUCCESS)) {
+	if (wc->status != IB_WC_SUCCESS) {
 		rtrs_err(sess->clt, "Sess info request send failed: %s\n",
 			  ib_wc_status_msg(wc->status));
 		rtrs_clt_change_state_get_old(sess, RTRS_CLT_CONNECTING_ERR, NULL);
@@ -2394,7 +2398,7 @@ static int process_info_rsp(struct rtrs_clt_sess *sess,
 	int i, sgi;
 
 	sg_cnt = le16_to_cpu(msg->sg_cnt);
-	if (unlikely(!sg_cnt || (sess->queue_depth % sg_cnt))) {
+	if (!sg_cnt || (sess->queue_depth % sg_cnt)) {
 		rtrs_err(sess->clt, "Incorrect sg_cnt %d, is not multiple\n",
 			  sg_cnt);
 		return -EINVAL;
@@ -2404,9 +2408,8 @@ static int process_info_rsp(struct rtrs_clt_sess *sess,
 	 * Check if IB immediate data size is enough to hold the mem_id and
 	 * the offset inside the memory chunk.
 	 */
-	if (unlikely((ilog2(sg_cnt - 1) + 1) +
-		     (ilog2(sess->chunk_size - 1) + 1) >
-		     MAX_IMM_PAYL_BITS)) {
+	if ((ilog2(sg_cnt - 1) + 1) + (ilog2(sess->chunk_size - 1) + 1) >
+	    MAX_IMM_PAYL_BITS) {
 		rtrs_err(sess->clt,
 			  "RDMA immediate size (%db) not enough to encode %d buffers of size %dB\n",
 			  MAX_IMM_PAYL_BITS, sg_cnt, sess->chunk_size);
@@ -2424,7 +2427,7 @@ static int process_info_rsp(struct rtrs_clt_sess *sess,
 
 		total_len += len;
 
-		if (unlikely(!len || (len % sess->chunk_size))) {
+		if (!len || (len % sess->chunk_size)) {
 			rtrs_err(sess->clt, "Incorrect [%d].len %d\n", sgi,
 				  len);
 			return -EINVAL;
@@ -2438,11 +2441,11 @@ static int process_info_rsp(struct rtrs_clt_sess *sess,
 		}
 	}
 	/* Sanity check */
-	if (unlikely(sgi != sg_cnt || i != sess->queue_depth)) {
+	if (sgi != sg_cnt || i != sess->queue_depth) {
 		rtrs_err(sess->clt, "Incorrect sg vector, not fully mapped\n");
 		return -EINVAL;
 	}
-	if (unlikely(total_len != sess->chunk_size * sess->queue_depth)) {
+	if (total_len != sess->chunk_size * sess->queue_depth) {
 		rtrs_err(sess->clt, "Incorrect total_len %d\n", total_len);
 		return -EINVAL;
 	}
@@ -2464,14 +2467,14 @@ static void rtrs_clt_info_rsp_done(struct ib_cq *cq, struct ib_wc *wc)
 
 	WARN_ON(con->c.cid);
 	iu = container_of(wc->wr_cqe, struct rtrs_iu, cqe);
-	if (unlikely(wc->status != IB_WC_SUCCESS)) {
+	if (wc->status != IB_WC_SUCCESS) {
 		rtrs_err(sess->clt, "Sess info response recv failed: %s\n",
 			  ib_wc_status_msg(wc->status));
 		goto out;
 	}
 	WARN_ON(wc->opcode != IB_WC_RECV);
 
-	if (unlikely(wc->byte_len < sizeof(*msg))) {
+	if (wc->byte_len < sizeof(*msg)) {
 		rtrs_err(sess->clt, "Sess info response is malformed: size %d\n",
 			  wc->byte_len);
 		goto out;
@@ -2479,24 +2482,24 @@ static void rtrs_clt_info_rsp_done(struct ib_cq *cq, struct ib_wc *wc)
 	ib_dma_sync_single_for_cpu(sess->s.dev->ib_dev, iu->dma_addr,
 				   iu->size, DMA_FROM_DEVICE);
 	msg = iu->buf;
-	if (unlikely(le16_to_cpu(msg->type) != RTRS_MSG_INFO_RSP)) {
+	if (le16_to_cpu(msg->type) != RTRS_MSG_INFO_RSP) {
 		rtrs_err(sess->clt, "Sess info response is malformed: type %d\n",
 			  le16_to_cpu(msg->type));
 		goto out;
 	}
 	rx_sz  = sizeof(*msg);
 	rx_sz += sizeof(msg->desc[0]) * le16_to_cpu(msg->sg_cnt);
-	if (unlikely(wc->byte_len < rx_sz)) {
+	if (wc->byte_len < rx_sz) {
 		rtrs_err(sess->clt, "Sess info response is malformed: size %d\n",
 			  wc->byte_len);
 		goto out;
 	}
 	err = process_info_rsp(sess, msg);
-	if (unlikely(err))
+	if (err)
 		goto out;
 
 	err = post_recv_sess(sess);
-	if (unlikely(err))
+	if (err)
 		goto out;
 
 	state = RTRS_CLT_CONNECTED;
@@ -2523,13 +2526,13 @@ static int rtrs_send_sess_info(struct rtrs_clt_sess *sess)
 			       rtrs_clt_info_req_done);
 	rx_iu = rtrs_iu_alloc(1, rx_sz, GFP_KERNEL, sess->s.dev->ib_dev,
 			       DMA_FROM_DEVICE, rtrs_clt_info_rsp_done);
-	if (unlikely(!tx_iu || !rx_iu)) {
+	if (!tx_iu || !rx_iu) {
 		err = -ENOMEM;
 		goto out;
 	}
 	/* Prepare for getting info response */
 	err = rtrs_iu_post_recv(&usr_con->c, rx_iu);
-	if (unlikely(err)) {
+	if (err) {
 		rtrs_err(sess->clt, "rtrs_iu_post_recv(), err: %d\n", err);
 		goto out;
 	}
@@ -2544,7 +2547,7 @@ static int rtrs_send_sess_info(struct rtrs_clt_sess *sess)
 
 	/* Send info request */
 	err = rtrs_iu_post_send(&usr_con->c, tx_iu, sizeof(*msg), NULL);
-	if (unlikely(err)) {
+	if (err) {
 		rtrs_err(sess->clt, "rtrs_iu_post_send(), err: %d\n", err);
 		goto out;
 	}
@@ -2555,7 +2558,7 @@ static int rtrs_send_sess_info(struct rtrs_clt_sess *sess)
 					 sess->state != RTRS_CLT_CONNECTING,
 					 msecs_to_jiffies(
 						 RTRS_CONNECT_TIMEOUT_MS));
-	if (unlikely(READ_ONCE(sess->state) != RTRS_CLT_CONNECTED)) {
+	if (READ_ONCE(sess->state) != RTRS_CLT_CONNECTED) {
 		if (READ_ONCE(sess->state) == RTRS_CLT_CONNECTING_ERR)
 			err = -ECONNRESET;
 		else
@@ -2567,7 +2570,7 @@ out:
 		rtrs_iu_free(tx_iu, sess->s.dev->ib_dev, 1);
 	if (rx_iu)
 		rtrs_iu_free(rx_iu, sess->s.dev->ib_dev, 1);
-	if (unlikely(err))
+	if (err)
 		/* If we've never taken async path because of malloc problems */
 		rtrs_clt_change_state_get_old(sess, RTRS_CLT_CONNECTING_ERR, NULL);
 
@@ -2915,7 +2918,7 @@ int rtrs_clt_remove_path_from_sysfs(struct rtrs_clt_sess *sess,
 							&old_state);
 	} while (!changed && old_state != RTRS_CLT_DEAD);
 
-	if (likely(changed)) {
+	if (changed) {
 		rtrs_clt_remove_path_from_arr(sess);
 		rtrs_clt_destroy_sess_files(sess, sysfs_self);
 		kobject_put(&sess->kobj);
@@ -2987,10 +2990,10 @@ int rtrs_clt_request(int dir, struct rtrs_clt_req_ops *ops,
 	rcu_read_lock();
 	for (path_it_init(&it, clt);
 	     (sess = it.next_path(&it)) && it.i < it.clt->paths_num; it.i++) {
-		if (unlikely(READ_ONCE(sess->state) != RTRS_CLT_CONNECTED))
+		if (READ_ONCE(sess->state) != RTRS_CLT_CONNECTED)
 			continue;
 
-		if (unlikely(usr_len + hdr_len > sess->max_hdr_size)) {
+		if (usr_len + hdr_len > sess->max_hdr_size) {
 			rtrs_wrn_rl(sess->clt,
 				     "%s request failed, user message size is %zu and header length %zu, but max size is %u\n",
 				     dir == READ ? "Read" : "Write",
@@ -3005,7 +3008,7 @@ int rtrs_clt_request(int dir, struct rtrs_clt_req_ops *ops,
 			err = rtrs_clt_read_req(req);
 		else
 			err = rtrs_clt_write_req(req);
-		if (unlikely(err)) {
+		if (err) {
 			req->in_use = false;
 			continue;
 		}
@@ -3078,6 +3081,18 @@ int rtrs_clt_create_path_from_sysfs(struct rtrs_clt *clt,
 	if (IS_ERR(sess))
 		return PTR_ERR(sess);
 
+	mutex_lock(&clt->paths_mutex);
+	if (clt->paths_num == 0) {
+		/*
+		 * When all the paths are removed for a session,
+		 * the addition of the first path is like a new session for
+		 * the storage server
+		 */
+		sess->for_new_clt = 1;
+	}
+
+	mutex_unlock(&clt->paths_mutex);
+
 	/*
 	 * It is totally safe to add path in CONNECTING state: coming
 	 * IO will never grab it.  Also it is very important to add
diff --git a/drivers/infiniband/ulp/rtrs/rtrs-clt.h b/drivers/infiniband/ulp/rtrs/rtrs-clt.h
index e276a2dfcf7c..9dc819885ec7 100644
--- a/drivers/infiniband/ulp/rtrs/rtrs-clt.h
+++ b/drivers/infiniband/ulp/rtrs/rtrs-clt.h
@@ -74,7 +74,6 @@ struct rtrs_clt_con {
 	u32			queue_num;
 	unsigned int		cpu;
 	struct mutex		con_mutex;
-	atomic_t		io_cnt;
 	int			cm_err;
 };
 
@@ -102,6 +101,7 @@ struct rtrs_clt_io_req {
 	unsigned int		usr_len;
 	void			*priv;
 	bool			in_use;
+	enum rtrs_mp_policy     mp_policy;
 	struct rtrs_clt_con	*con;
 	struct rtrs_sg_desc	*desc;
 	struct ib_sge		*sge;
@@ -230,10 +230,7 @@ int rtrs_clt_stats_migration_cnt_to_str(struct rtrs_clt_stats *stats, char *buf,
 					 size_t len);
 int rtrs_clt_reset_reconnects_stat(struct rtrs_clt_stats *stats, bool enable);
 int rtrs_clt_stats_reconnects_to_str(struct rtrs_clt_stats *stats, char *buf,
-				      size_t len);
-int rtrs_clt_reset_wc_comp_stats(struct rtrs_clt_stats *stats, bool enable);
-int rtrs_clt_stats_wc_completion_to_str(struct rtrs_clt_stats *stats, char *buf,
-					 size_t len);
+				     size_t len);
 int rtrs_clt_reset_rdma_stats(struct rtrs_clt_stats *stats, bool enable);
 ssize_t rtrs_clt_stats_rdma_to_str(struct rtrs_clt_stats *stats,
 				    char *page, size_t len);
diff --git a/drivers/infiniband/ulp/rtrs/rtrs-pri.h b/drivers/infiniband/ulp/rtrs/rtrs-pri.h
index 36f184a3b676..d12ddfa50747 100644
--- a/drivers/infiniband/ulp/rtrs/rtrs-pri.h
+++ b/drivers/infiniband/ulp/rtrs/rtrs-pri.h
@@ -96,6 +96,8 @@ struct rtrs_con {
 	struct rdma_cm_id	*cm_id;
 	unsigned int		cid;
 	int                     nr_cqe;
+	atomic_t		wr_cnt;
+	atomic_t		sq_wr_avail;
 };
 
 struct rtrs_sess {
@@ -108,6 +110,7 @@ struct rtrs_sess {
 	unsigned int		con_num;
 	unsigned int		irq_con_num;
 	unsigned int		recon_cnt;
+	unsigned int		signal_interval;
 	struct rtrs_ib_dev	*dev;
 	int			dev_ref;
 	struct ib_cqe		*hb_cqe;
@@ -309,9 +312,6 @@ int rtrs_iu_post_rdma_write_imm(struct rtrs_con *con, struct rtrs_iu *iu,
 				struct ib_send_wr *tail);
 
 int rtrs_post_recv_empty(struct rtrs_con *con, struct ib_cqe *cqe);
-int rtrs_post_rdma_write_imm_empty(struct rtrs_con *con, struct ib_cqe *cqe,
-				   u32 imm_data, enum ib_send_flags flags,
-				   struct ib_send_wr *head);
 
 int rtrs_cq_qp_create(struct rtrs_sess *sess, struct rtrs_con *con,
 		      u32 max_send_sge, int cq_vector, int nr_cqe,
diff --git a/drivers/infiniband/ulp/rtrs/rtrs-srv.c b/drivers/infiniband/ulp/rtrs/rtrs-srv.c
index 3df290086169..716ef7b23558 100644
--- a/drivers/infiniband/ulp/rtrs/rtrs-srv.c
+++ b/drivers/infiniband/ulp/rtrs/rtrs-srv.c
@@ -183,7 +183,7 @@ static void rtrs_srv_reg_mr_done(struct ib_cq *cq, struct ib_wc *wc)
 	struct rtrs_sess *s = con->c.sess;
 	struct rtrs_srv_sess *sess = to_srv_sess(s);
 
-	if (unlikely(wc->status != IB_WC_SUCCESS)) {
+	if (wc->status != IB_WC_SUCCESS) {
 		rtrs_err(s, "REG MR failed: %s\n",
 			  ib_wc_status_msg(wc->status));
 		close_sess(sess);
@@ -201,7 +201,6 @@ static int rdma_write_sg(struct rtrs_srv_op *id)
 	struct rtrs_srv_sess *sess = to_srv_sess(s);
 	dma_addr_t dma_addr = sess->dma_addr[id->msg_id];
 	struct rtrs_srv_mr *srv_mr;
-	struct rtrs_srv *srv = sess->srv;
 	struct ib_send_wr inv_wr;
 	struct ib_rdma_wr imm_wr;
 	struct ib_rdma_wr *wr = NULL;
@@ -216,7 +215,7 @@ static int rdma_write_sg(struct rtrs_srv_op *id)
 
 	sg_cnt = le16_to_cpu(id->rd_msg->sg_cnt);
 	need_inval = le16_to_cpu(id->rd_msg->flags) & RTRS_MSG_NEED_INVAL_F;
-	if (unlikely(sg_cnt != 1))
+	if (sg_cnt != 1)
 		return -EINVAL;
 
 	offset = 0;
@@ -229,7 +228,7 @@ static int rdma_write_sg(struct rtrs_srv_op *id)
 	/* WR will fail with length error
 	 * if this is 0
 	 */
-	if (unlikely(plist->length == 0)) {
+	if (plist->length == 0) {
 		rtrs_err(s, "Invalid RDMA-Write sg list length 0\n");
 		return -EINVAL;
 	}
@@ -269,7 +268,7 @@ static int rdma_write_sg(struct rtrs_srv_op *id)
 	 * From time to time we have to post signaled sends,
 	 * or send queue will fill up and only QP reset can help.
 	 */
-	flags = (atomic_inc_return(&id->con->wr_cnt) % srv->queue_depth) ?
+	flags = (atomic_inc_return(&id->con->c.wr_cnt) % s->signal_interval) ?
 		0 : IB_SEND_SIGNALED;
 
 	if (need_inval) {
@@ -322,7 +321,7 @@ static int rdma_write_sg(struct rtrs_srv_op *id)
 				      offset, DMA_BIDIRECTIONAL);
 
 	err = ib_post_send(id->con->c.qp, &id->tx_wr.wr, NULL);
-	if (unlikely(err))
+	if (err)
 		rtrs_err(s,
 			  "Posting RDMA-Write-Request to QP failed, err: %d\n",
 			  err);
@@ -347,7 +346,6 @@ static int send_io_resp_imm(struct rtrs_srv_con *con, struct rtrs_srv_op *id,
 	struct ib_send_wr inv_wr, *wr = NULL;
 	struct ib_rdma_wr imm_wr;
 	struct ib_reg_wr rwr;
-	struct rtrs_srv *srv = sess->srv;
 	struct rtrs_srv_mr *srv_mr;
 	bool need_inval = false;
 	enum ib_send_flags flags;
@@ -363,7 +361,7 @@ static int send_io_resp_imm(struct rtrs_srv_con *con, struct rtrs_srv_op *id,
 		sg_cnt = le16_to_cpu(rd_msg->sg_cnt);
 
 		if (need_inval) {
-			if (likely(sg_cnt)) {
+			if (sg_cnt) {
 				inv_wr.wr_cqe   = &io_comp_cqe;
 				inv_wr.sg_list = NULL;
 				inv_wr.num_sge = 0;
@@ -396,7 +394,7 @@ static int send_io_resp_imm(struct rtrs_srv_con *con, struct rtrs_srv_op *id,
 	 * From time to time we have to post signalled sends,
 	 * or send queue will fill up and only QP reset can help.
 	 */
-	flags = (atomic_inc_return(&con->wr_cnt) % srv->queue_depth) ?
+	flags = (atomic_inc_return(&con->c.wr_cnt) % s->signal_interval) ?
 		0 : IB_SEND_SIGNALED;
 	imm = rtrs_to_io_rsp_imm(id->msg_id, errno, need_inval);
 	imm_wr.wr.next = NULL;
@@ -439,7 +437,7 @@ static int send_io_resp_imm(struct rtrs_srv_con *con, struct rtrs_srv_op *id,
 	imm_wr.wr.ex.imm_data = cpu_to_be32(imm);
 
 	err = ib_post_send(id->con->c.qp, wr, NULL);
-	if (unlikely(err))
+	if (err)
 		rtrs_err_rl(s, "Posting RDMA-Reply to QP failed, err: %d\n",
 			     err);
 
@@ -496,7 +494,7 @@ bool rtrs_srv_resp_rdma(struct rtrs_srv_op *id, int status)
 
 	id->status = status;
 
-	if (unlikely(sess->state != RTRS_SRV_CONNECTED)) {
+	if (sess->state != RTRS_SRV_CONNECTED) {
 		rtrs_err_rl(s,
 			    "Sending I/O response failed,  session %s is disconnected, sess state %s\n",
 			    kobject_name(&sess->kobj),
@@ -508,12 +506,11 @@ bool rtrs_srv_resp_rdma(struct rtrs_srv_op *id, int status)
 
 		ib_update_fast_reg_key(mr->mr, ib_inc_rkey(mr->mr->rkey));
 	}
-	if (unlikely(atomic_sub_return(1,
-				       &con->sq_wr_avail) < 0)) {
+	if (atomic_sub_return(1, &con->c.sq_wr_avail) < 0) {
 		rtrs_err(s, "IB send queue full: sess=%s cid=%d\n",
 			 kobject_name(&sess->kobj),
 			 con->c.cid);
-		atomic_add(1, &con->sq_wr_avail);
+		atomic_add(1, &con->c.sq_wr_avail);
 		spin_lock(&con->rsp_wr_wait_lock);
 		list_add_tail(&id->wait_list, &con->rsp_wr_wait_list);
 		spin_unlock(&con->rsp_wr_wait_lock);
@@ -525,7 +522,7 @@ bool rtrs_srv_resp_rdma(struct rtrs_srv_op *id, int status)
 	else
 		err = rdma_write_sg(id);
 
-	if (unlikely(err)) {
+	if (err) {
 		rtrs_err_rl(s, "IO response failed: %d: sess=%s\n", err,
 			    kobject_name(&sess->kobj));
 		close_sess(sess);
@@ -712,7 +709,7 @@ static void rtrs_srv_info_rsp_done(struct ib_cq *cq, struct ib_wc *wc)
 	iu = container_of(wc->wr_cqe, struct rtrs_iu, cqe);
 	rtrs_iu_free(iu, sess->s.dev->ib_dev, 1);
 
-	if (unlikely(wc->status != IB_WC_SUCCESS)) {
+	if (wc->status != IB_WC_SUCCESS) {
 		rtrs_err(s, "Sess info response send failed: %s\n",
 			  ib_wc_status_msg(wc->status));
 		close_sess(sess);
@@ -801,7 +798,7 @@ static int process_info_req(struct rtrs_srv_con *con,
 	size_t tx_sz;
 
 	err = post_recv_sess(sess);
-	if (unlikely(err)) {
+	if (err) {
 		rtrs_err(s, "post_recv_sess(), err: %d\n", err);
 		return err;
 	}
@@ -814,14 +811,14 @@ static int process_info_req(struct rtrs_srv_con *con,
 	strscpy(sess->s.sessname, msg->sessname, sizeof(sess->s.sessname));
 
 	rwr = kcalloc(sess->mrs_num, sizeof(*rwr), GFP_KERNEL);
-	if (unlikely(!rwr))
+	if (!rwr)
 		return -ENOMEM;
 
 	tx_sz  = sizeof(*rsp);
 	tx_sz += sizeof(rsp->desc[0]) * sess->mrs_num;
 	tx_iu = rtrs_iu_alloc(1, tx_sz, GFP_KERNEL, sess->s.dev->ib_dev,
 			       DMA_TO_DEVICE, rtrs_srv_info_rsp_done);
-	if (unlikely(!tx_iu)) {
+	if (!tx_iu) {
 		err = -ENOMEM;
 		goto rwr_free;
 	}
@@ -853,7 +850,7 @@ static int process_info_req(struct rtrs_srv_con *con,
 	}
 
 	err = rtrs_srv_create_sess_files(sess);
-	if (unlikely(err))
+	if (err)
 		goto iu_free;
 	kobject_get(&sess->kobj);
 	get_device(&sess->srv->dev);
@@ -873,7 +870,7 @@ static int process_info_req(struct rtrs_srv_con *con,
 
 	/* Send info response */
 	err = rtrs_iu_post_send(&con->c, tx_iu, tx_sz, reg_wr);
-	if (unlikely(err)) {
+	if (err) {
 		rtrs_err(s, "rtrs_iu_post_send(), err: %d\n", err);
 iu_free:
 		rtrs_iu_free(tx_iu, sess->s.dev->ib_dev, 1);
@@ -896,14 +893,14 @@ static void rtrs_srv_info_req_done(struct ib_cq *cq, struct ib_wc *wc)
 	WARN_ON(con->c.cid);
 
 	iu = container_of(wc->wr_cqe, struct rtrs_iu, cqe);
-	if (unlikely(wc->status != IB_WC_SUCCESS)) {
+	if (wc->status != IB_WC_SUCCESS) {
 		rtrs_err(s, "Sess info request receive failed: %s\n",
 			  ib_wc_status_msg(wc->status));
 		goto close;
 	}
 	WARN_ON(wc->opcode != IB_WC_RECV);
 
-	if (unlikely(wc->byte_len < sizeof(*msg))) {
+	if (wc->byte_len < sizeof(*msg)) {
 		rtrs_err(s, "Sess info request is malformed: size %d\n",
 			  wc->byte_len);
 		goto close;
@@ -911,13 +908,13 @@ static void rtrs_srv_info_req_done(struct ib_cq *cq, struct ib_wc *wc)
 	ib_dma_sync_single_for_cpu(sess->s.dev->ib_dev, iu->dma_addr,
 				   iu->size, DMA_FROM_DEVICE);
 	msg = iu->buf;
-	if (unlikely(le16_to_cpu(msg->type) != RTRS_MSG_INFO_REQ)) {
+	if (le16_to_cpu(msg->type) != RTRS_MSG_INFO_REQ) {
 		rtrs_err(s, "Sess info request is malformed: type %d\n",
 			  le16_to_cpu(msg->type));
 		goto close;
 	}
 	err = process_info_req(con, msg);
-	if (unlikely(err))
+	if (err)
 		goto close;
 
 out:
@@ -938,11 +935,11 @@ static int post_recv_info_req(struct rtrs_srv_con *con)
 	rx_iu = rtrs_iu_alloc(1, sizeof(struct rtrs_msg_info_req),
 			       GFP_KERNEL, sess->s.dev->ib_dev,
 			       DMA_FROM_DEVICE, rtrs_srv_info_req_done);
-	if (unlikely(!rx_iu))
+	if (!rx_iu)
 		return -ENOMEM;
 	/* Prepare for getting info response */
 	err = rtrs_iu_post_recv(&con->c, rx_iu);
-	if (unlikely(err)) {
+	if (err) {
 		rtrs_err(s, "rtrs_iu_post_recv(), err: %d\n", err);
 		rtrs_iu_free(rx_iu, sess->s.dev->ib_dev, 1);
 		return err;
@@ -957,7 +954,7 @@ static int post_recv_io(struct rtrs_srv_con *con, size_t q_size)
 
 	for (i = 0; i < q_size; i++) {
 		err = rtrs_post_recv_empty(&con->c, &io_comp_cqe);
-		if (unlikely(err))
+		if (err)
 			return err;
 	}
 
@@ -978,7 +975,7 @@ static int post_recv_sess(struct rtrs_srv_sess *sess)
 			q_size = srv->queue_depth;
 
 		err = post_recv_io(to_srv_con(sess->s.con[cid]), q_size);
-		if (unlikely(err)) {
+		if (err) {
 			rtrs_err(s, "post_recv_io(), err: %d\n", err);
 			return err;
 		}
@@ -1001,13 +998,13 @@ static void process_read(struct rtrs_srv_con *con,
 	void *data;
 	int ret;
 
-	if (unlikely(sess->state != RTRS_SRV_CONNECTED)) {
+	if (sess->state != RTRS_SRV_CONNECTED) {
 		rtrs_err_rl(s,
 			     "Processing read request failed,  session is disconnected, sess state %s\n",
 			     rtrs_srv_state_str(sess->state));
 		return;
 	}
-	if (unlikely(msg->sg_cnt != 1 && msg->sg_cnt != 0)) {
+	if (msg->sg_cnt != 1 && msg->sg_cnt != 0) {
 		rtrs_err_rl(s,
 			    "Processing read request failed, invalid message\n");
 		return;
@@ -1025,7 +1022,7 @@ static void process_read(struct rtrs_srv_con *con,
 	ret = ctx->ops.rdma_ev(srv->priv, id, READ, data, data_len,
 			   data + data_len, usr_len);
 
-	if (unlikely(ret)) {
+	if (ret) {
 		rtrs_err_rl(s,
 			     "Processing read request failed, user module cb reported for msg_id %d, err: %d\n",
 			     buf_id, ret);
@@ -1059,7 +1056,7 @@ static void process_write(struct rtrs_srv_con *con,
 	void *data;
 	int ret;
 
-	if (unlikely(sess->state != RTRS_SRV_CONNECTED)) {
+	if (sess->state != RTRS_SRV_CONNECTED) {
 		rtrs_err_rl(s,
 			     "Processing write request failed,  session is disconnected, sess state %s\n",
 			     rtrs_srv_state_str(sess->state));
@@ -1076,8 +1073,8 @@ static void process_write(struct rtrs_srv_con *con,
 	data_len = off - usr_len;
 	data = page_address(srv->chunks[buf_id]);
 	ret = ctx->ops.rdma_ev(srv->priv, id, WRITE, data, data_len,
-			   data + data_len, usr_len);
-	if (unlikely(ret)) {
+			       data + data_len, usr_len);
+	if (ret) {
 		rtrs_err_rl(s,
 			     "Processing write request failed, user module callback reports err: %d\n",
 			     ret);
@@ -1141,7 +1138,7 @@ static void rtrs_srv_inv_rkey_done(struct ib_cq *cq, struct ib_wc *wc)
 	u32 msg_id, off;
 	void *data;
 
-	if (unlikely(wc->status != IB_WC_SUCCESS)) {
+	if (wc->status != IB_WC_SUCCESS) {
 		rtrs_err(s, "Failed IB_WR_LOCAL_INV: %s\n",
 			  ib_wc_status_msg(wc->status));
 		close_sess(sess);
@@ -1198,7 +1195,7 @@ static void rtrs_srv_rdma_done(struct ib_cq *cq, struct ib_wc *wc)
 	u32 imm_type, imm_payload;
 	int err;
 
-	if (unlikely(wc->status != IB_WC_SUCCESS)) {
+	if (wc->status != IB_WC_SUCCESS) {
 		if (wc->status != IB_WC_WR_FLUSH_ERR) {
 			rtrs_err(s,
 				  "%s (wr_cqe: %p, type: %d, vendor_err: 0x%x, len: %u)\n",
@@ -1218,21 +1215,20 @@ static void rtrs_srv_rdma_done(struct ib_cq *cq, struct ib_wc *wc)
 		if (WARN_ON(wc->wr_cqe != &io_comp_cqe))
 			return;
 		err = rtrs_post_recv_empty(&con->c, &io_comp_cqe);
-		if (unlikely(err)) {
+		if (err) {
 			rtrs_err(s, "rtrs_post_recv(), err: %d\n", err);
 			close_sess(sess);
 			break;
 		}
 		rtrs_from_imm(be32_to_cpu(wc->ex.imm_data),
 			       &imm_type, &imm_payload);
-		if (likely(imm_type == RTRS_IO_REQ_IMM)) {
+		if (imm_type == RTRS_IO_REQ_IMM) {
 			u32 msg_id, off;
 			void *data;
 
 			msg_id = imm_payload >> sess->mem_bits;
 			off = imm_payload & ((1 << sess->mem_bits) - 1);
-			if (unlikely(msg_id >= srv->queue_depth ||
-				     off >= max_chunk_size)) {
+			if (msg_id >= srv->queue_depth || off >= max_chunk_size) {
 				rtrs_err(s, "Wrong msg_id %u, off %u\n",
 					  msg_id, off);
 				close_sess(sess);
@@ -1244,7 +1240,7 @@ static void rtrs_srv_rdma_done(struct ib_cq *cq, struct ib_wc *wc)
 				mr->msg_off = off;
 				mr->msg_id = msg_id;
 				err = rtrs_srv_inv_rkey(con, mr);
-				if (unlikely(err)) {
+				if (err) {
 					rtrs_err(s, "rtrs_post_recv(), err: %d\n",
 						  err);
 					close_sess(sess);
@@ -1268,10 +1264,11 @@ static void rtrs_srv_rdma_done(struct ib_cq *cq, struct ib_wc *wc)
 	case IB_WC_SEND:
 		/*
 		 * post_send() RDMA write completions of IO reqs (read/write)
+		 * and hb.
 		 */
-		atomic_add(srv->queue_depth, &con->sq_wr_avail);
+		atomic_add(s->signal_interval, &con->c.sq_wr_avail);
 
-		if (unlikely(!list_empty_careful(&con->rsp_wr_wait_list)))
+		if (!list_empty_careful(&con->rsp_wr_wait_list))
 			rtrs_rdma_process_wr_wait_list(con);
 
 		break;
@@ -1648,7 +1645,7 @@ static int create_con(struct rtrs_srv_sess *sess,
 	con->c.cm_id = cm_id;
 	con->c.sess = &sess->s;
 	con->c.cid = cid;
-	atomic_set(&con->wr_cnt, 1);
+	atomic_set(&con->c.wr_cnt, 1);
 	wr_limit = sess->s.dev->ib_dev->attrs.max_qp_wr;
 
 	if (con->c.cid == 0) {
@@ -1659,6 +1656,8 @@ static int create_con(struct rtrs_srv_sess *sess,
 		max_send_wr = min_t(int, wr_limit,
 				    SERVICE_CON_QUEUE_DEPTH * 2 + 2);
 		max_recv_wr = max_send_wr;
+		s->signal_interval = min_not_zero(srv->queue_depth,
+						  (size_t)SERVICE_CON_QUEUE_DEPTH);
 	} else {
 		/* when always_invlaidate enalbed, we need linv+rinv+mr+imm */
 		if (always_invalidate)
@@ -1679,7 +1678,7 @@ static int create_con(struct rtrs_srv_sess *sess,
 		 */
 	}
 	cq_num = max_send_wr + max_recv_wr;
-	atomic_set(&con->sq_wr_avail, max_send_wr);
+	atomic_set(&con->c.sq_wr_avail, max_send_wr);
 	cq_vector = rtrs_srv_get_next_cq_vector(sess);
 
 	/* TODO: SOFTIRQ can be faster, but be careful with softirq context */
@@ -1894,7 +1893,7 @@ static int rtrs_rdma_connect(struct rdma_cm_id *cm_id,
 	err = create_con(sess, cm_id, cid);
 	if (err) {
 		rtrs_err((&sess->s), "create_con(), error %d\n", err);
-		(void)rtrs_rdma_do_reject(cm_id, err);
+		rtrs_rdma_do_reject(cm_id, err);
 		/*
 		 * Since session has other connections we follow normal way
 		 * through workqueue, but still return an error to tell cma.c
@@ -1905,7 +1904,7 @@ static int rtrs_rdma_connect(struct rdma_cm_id *cm_id,
 	err = rtrs_rdma_do_accept(sess, cm_id);
 	if (err) {
 		rtrs_err((&sess->s), "rtrs_rdma_do_accept(), error %d\n", err);
-		(void)rtrs_rdma_do_reject(cm_id, err);
+		rtrs_rdma_do_reject(cm_id, err);
 		/*
 		 * Since current connection was successfully added to the
 		 * session we follow normal way through workqueue to close the
diff --git a/drivers/infiniband/ulp/rtrs/rtrs-srv.h b/drivers/infiniband/ulp/rtrs/rtrs-srv.h
index f8da2e3f0bda..9d8d2a91a235 100644
--- a/drivers/infiniband/ulp/rtrs/rtrs-srv.h
+++ b/drivers/infiniband/ulp/rtrs/rtrs-srv.h
@@ -42,8 +42,6 @@ struct rtrs_srv_stats {
 
 struct rtrs_srv_con {
 	struct rtrs_con		c;
-	atomic_t		wr_cnt;
-	atomic_t		sq_wr_avail;
 	struct list_head	rsp_wr_wait_list;
 	spinlock_t		rsp_wr_wait_lock;
 };
@@ -140,10 +138,6 @@ static inline void rtrs_srv_update_rdma_stats(struct rtrs_srv_stats *s,
 int rtrs_srv_reset_rdma_stats(struct rtrs_srv_stats *stats, bool enable);
 ssize_t rtrs_srv_stats_rdma_to_str(struct rtrs_srv_stats *stats,
 				    char *page, size_t len);
-int rtrs_srv_reset_wc_completion_stats(struct rtrs_srv_stats *stats,
-					bool enable);
-int rtrs_srv_stats_wc_completion_to_str(struct rtrs_srv_stats *stats, char *buf,
-					 size_t len);
 int rtrs_srv_reset_all_stats(struct rtrs_srv_stats *stats, bool enable);
 ssize_t rtrs_srv_reset_all_help(struct rtrs_srv_stats *stats,
 				 char *page, size_t len);
diff --git a/drivers/infiniband/ulp/rtrs/rtrs.c b/drivers/infiniband/ulp/rtrs/rtrs.c
index 61919ebd92b2..ca542e477d38 100644
--- a/drivers/infiniband/ulp/rtrs/rtrs.c
+++ b/drivers/infiniband/ulp/rtrs/rtrs.c
@@ -182,22 +182,28 @@ int rtrs_iu_post_rdma_write_imm(struct rtrs_con *con, struct rtrs_iu *iu,
 }
 EXPORT_SYMBOL_GPL(rtrs_iu_post_rdma_write_imm);
 
-int rtrs_post_rdma_write_imm_empty(struct rtrs_con *con, struct ib_cqe *cqe,
-				    u32 imm_data, enum ib_send_flags flags,
-				    struct ib_send_wr *head)
+static int rtrs_post_rdma_write_imm_empty(struct rtrs_con *con,
+					  struct ib_cqe *cqe,
+					  u32 imm_data,
+					  struct ib_send_wr *head)
 {
 	struct ib_rdma_wr wr;
+	struct rtrs_sess *sess = con->sess;
+	enum ib_send_flags sflags;
+
+	atomic_dec_if_positive(&con->sq_wr_avail);
+	sflags = (atomic_inc_return(&con->wr_cnt) % sess->signal_interval) ?
+		0 : IB_SEND_SIGNALED;
 
 	wr = (struct ib_rdma_wr) {
 		.wr.wr_cqe	= cqe,
-		.wr.send_flags	= flags,
+		.wr.send_flags	= sflags,
 		.wr.opcode	= IB_WR_RDMA_WRITE_WITH_IMM,
 		.wr.ex.imm_data	= cpu_to_be32(imm_data),
 	};
 
 	return rtrs_post_send(con->qp, head, &wr.wr, NULL);
 }
-EXPORT_SYMBOL_GPL(rtrs_post_rdma_write_imm_empty);
 
 static void qp_event_handler(struct ib_event *ev, void *ctx)
 {
@@ -314,8 +320,9 @@ void rtrs_send_hb_ack(struct rtrs_sess *sess)
 
 	imm = rtrs_to_imm(RTRS_HB_ACK_IMM, 0);
 	err = rtrs_post_rdma_write_imm_empty(usr_con, sess->hb_cqe, imm,
-					     0, NULL);
+					     NULL);
 	if (err) {
+		rtrs_err(sess, "send HB ACK failed, errno: %d\n", err);
 		sess->hb_err_handler(usr_con);
 		return;
 	}
@@ -333,6 +340,7 @@ static void hb_work(struct work_struct *work)
 	usr_con = sess->con[0];
 
 	if (sess->hb_missed_cnt > sess->hb_missed_max) {
+		rtrs_err(sess, "HB missed max reached.\n");
 		sess->hb_err_handler(usr_con);
 		return;
 	}
@@ -346,8 +354,9 @@ static void hb_work(struct work_struct *work)
 
 	imm = rtrs_to_imm(RTRS_HB_MSG_IMM, 0);
 	err = rtrs_post_rdma_write_imm_empty(usr_con, sess->hb_cqe, imm,
-					     0, NULL);
+					     NULL);
 	if (err) {
+		rtrs_err(sess, "HB send failed, errno: %d\n", err);
 		sess->hb_err_handler(usr_con);
 		return;
 	}
diff --git a/drivers/infiniband/ulp/srp/ib_srp.c b/drivers/infiniband/ulp/srp/ib_srp.c
index 8d5cf5eb5778..71eda91e810c 100644
--- a/drivers/infiniband/ulp/srp/ib_srp.c
+++ b/drivers/infiniband/ulp/srp/ib_srp.c
@@ -1280,7 +1280,7 @@ static bool srp_terminate_cmd(struct scsi_cmnd *scmnd, void *context_ptr,
 {
 	struct srp_terminate_context *context = context_ptr;
 	struct srp_target_port *target = context->srp_target;
-	u32 tag = blk_mq_unique_tag(scmnd->request);
+	u32 tag = blk_mq_unique_tag(scsi_cmd_to_rq(scmnd));
 	struct srp_rdma_ch *ch = &target->ch[blk_mq_unique_tag_to_hwq(tag)];
 	struct srp_request *req = scsi_cmd_priv(scmnd);
 
@@ -2152,6 +2152,7 @@ static void srp_handle_qp_err(struct ib_cq *cq, struct ib_wc *wc,
 
 static int srp_queuecommand(struct Scsi_Host *shost, struct scsi_cmnd *scmnd)
 {
+	struct request *rq = scsi_cmd_to_rq(scmnd);
 	struct srp_target_port *target = host_to_target(shost);
 	struct srp_rdma_ch *ch;
 	struct srp_request *req = scsi_cmd_priv(scmnd);
@@ -2166,8 +2167,8 @@ static int srp_queuecommand(struct Scsi_Host *shost, struct scsi_cmnd *scmnd)
 	if (unlikely(scmnd->result))
 		goto err;
 
-	WARN_ON_ONCE(scmnd->request->tag < 0);
-	tag = blk_mq_unique_tag(scmnd->request);
+	WARN_ON_ONCE(rq->tag < 0);
+	tag = blk_mq_unique_tag(rq);
 	ch = &target->ch[blk_mq_unique_tag_to_hwq(tag)];
 
 	spin_lock_irqsave(&ch->lock, flags);
@@ -2791,7 +2792,7 @@ static int srp_abort(struct scsi_cmnd *scmnd)
 
 	if (!req)
 		return SUCCESS;
-	tag = blk_mq_unique_tag(scmnd->request);
+	tag = blk_mq_unique_tag(scsi_cmd_to_rq(scmnd));
 	ch_idx = blk_mq_unique_tag_to_hwq(tag);
 	if (WARN_ON_ONCE(ch_idx >= target->ch_count))
 		return SUCCESS;
diff --git a/drivers/input/joystick/analog.c b/drivers/input/joystick/analog.c
index f798922a4598..882c3c8ba399 100644
--- a/drivers/input/joystick/analog.c
+++ b/drivers/input/joystick/analog.c
@@ -28,10 +28,6 @@ MODULE_AUTHOR("Vojtech Pavlik <vojtech@ucw.cz>");
 MODULE_DESCRIPTION(DRIVER_DESC);
 MODULE_LICENSE("GPL");
 
-static bool use_ktime = true;
-module_param(use_ktime, bool, 0400);
-MODULE_PARM_DESC(use_ktime, "Use ktime for measuring I/O speed");
-
 /*
  * Option parsing.
  */
@@ -110,7 +106,6 @@ struct analog_port {
 	char cooked;
 	int bads;
 	int reads;
-	int speed;
 	int loop;
 	int fuzz;
 	int axes[4];
@@ -120,66 +115,6 @@ struct analog_port {
 };
 
 /*
- * Time macros.
- */
-
-#ifdef __i386__
-
-#include <linux/i8253.h>
-
-#define GET_TIME(x)	do { if (boot_cpu_has(X86_FEATURE_TSC)) x = (unsigned int)rdtsc(); else x = get_time_pit(); } while (0)
-#define DELTA(x,y)	(boot_cpu_has(X86_FEATURE_TSC) ? ((y) - (x)) : ((x) - (y) + ((x) < (y) ? PIT_TICK_RATE / HZ : 0)))
-#define TIME_NAME	(boot_cpu_has(X86_FEATURE_TSC)?"TSC":"PIT")
-static unsigned int get_time_pit(void)
-{
-        unsigned long flags;
-        unsigned int count;
-
-        raw_spin_lock_irqsave(&i8253_lock, flags);
-        outb_p(0x00, 0x43);
-        count = inb_p(0x40);
-        count |= inb_p(0x40) << 8;
-        raw_spin_unlock_irqrestore(&i8253_lock, flags);
-
-        return count;
-}
-#elif defined(__x86_64__)
-#define GET_TIME(x)	do { x = (unsigned int)rdtsc(); } while (0)
-#define DELTA(x,y)	((y)-(x))
-#define TIME_NAME	"TSC"
-#elif defined(__alpha__) || defined(CONFIG_ARM) || defined(CONFIG_ARM64) || defined(CONFIG_PPC) || defined(CONFIG_RISCV)
-#define GET_TIME(x)	do { x = get_cycles(); } while (0)
-#define DELTA(x,y)	((y)-(x))
-#define TIME_NAME	"get_cycles"
-#else
-#define FAKE_TIME
-static unsigned long analog_faketime = 0;
-#define GET_TIME(x)     do { x = analog_faketime++; } while(0)
-#define DELTA(x,y)	((y)-(x))
-#define TIME_NAME	"Unreliable"
-#warning Precise timer not defined for this architecture.
-#endif
-
-static inline u64 get_time(void)
-{
-	if (use_ktime) {
-		return ktime_get_ns();
-	} else {
-		unsigned int x;
-		GET_TIME(x);
-		return x;
-	}
-}
-
-static inline unsigned int delta(u64 x, u64 y)
-{
-	if (use_ktime)
-		return y - x;
-	else
-		return DELTA((unsigned int)x, (unsigned int)y);
-}
-
-/*
  * analog_decode() decodes analog joystick data and reports input events.
  */
 
@@ -234,18 +169,18 @@ static void analog_decode(struct analog *analog, int *axes, int *initial, int bu
 static int analog_cooked_read(struct analog_port *port)
 {
 	struct gameport *gameport = port->gameport;
-	u64 time[4], start, loop, now;
+	ktime_t time[4], start, loop, now;
 	unsigned int loopout, timeout;
 	unsigned char data[4], this, last;
 	unsigned long flags;
 	int i, j;
 
 	loopout = (ANALOG_LOOP_TIME * port->loop) / 1000;
-	timeout = ANALOG_MAX_TIME * port->speed;
+	timeout = ANALOG_MAX_TIME * NSEC_PER_MSEC;
 
 	local_irq_save(flags);
 	gameport_trigger(gameport);
-	now = get_time();
+	now = ktime_get();
 	local_irq_restore(flags);
 
 	start = now;
@@ -258,16 +193,16 @@ static int analog_cooked_read(struct analog_port *port)
 
 		local_irq_disable();
 		this = gameport_read(gameport) & port->mask;
-		now = get_time();
+		now = ktime_get();
 		local_irq_restore(flags);
 
-		if ((last ^ this) && (delta(loop, now) < loopout)) {
+		if ((last ^ this) && (ktime_sub(now, loop) < loopout)) {
 			data[i] = last ^ this;
 			time[i] = now;
 			i++;
 		}
 
-	} while (this && (i < 4) && (delta(start, now) < timeout));
+	} while (this && (i < 4) && (ktime_sub(now, start) < timeout));
 
 	this <<= 4;
 
@@ -275,7 +210,7 @@ static int analog_cooked_read(struct analog_port *port)
 		this |= data[i];
 		for (j = 0; j < 4; j++)
 			if (data[i] & (1 << j))
-				port->axes[j] = (delta(start, time[i]) << ANALOG_FUZZ_BITS) / port->loop;
+				port->axes[j] = ((u32)ktime_sub(time[i], start) << ANALOG_FUZZ_BITS) / port->loop;
 	}
 
 	return -(this != port->mask);
@@ -375,38 +310,22 @@ static void analog_calibrate_timer(struct analog_port *port)
 {
 	struct gameport *gameport = port->gameport;
 	unsigned int i, t, tx;
-	u64 t1, t2, t3;
+	ktime_t t1, t2, t3;
 	unsigned long flags;
 
-	if (use_ktime) {
-		port->speed = 1000000;
-	} else {
-		local_irq_save(flags);
-		t1 = get_time();
-#ifdef FAKE_TIME
-		analog_faketime += 830;
-#endif
-		mdelay(1);
-		t2 = get_time();
-		t3 = get_time();
-		local_irq_restore(flags);
-
-		port->speed = delta(t1, t2) - delta(t2, t3);
-	}
-
 	tx = ~0;
 
 	for (i = 0; i < 50; i++) {
 		local_irq_save(flags);
-		t1 = get_time();
+		t1 = ktime_get();
 		for (t = 0; t < 50; t++) {
 			gameport_read(gameport);
-			t2 = get_time();
+			t2 = ktime_get();
 		}
-		t3 = get_time();
+		t3 = ktime_get();
 		local_irq_restore(flags);
 		udelay(i);
-		t = delta(t1, t2) - delta(t2, t3);
+		t = ktime_sub(t2, t1) - ktime_sub(t3, t2);
 		if (t < tx) tx = t;
 	}
 
@@ -611,7 +530,7 @@ static int analog_init_port(struct gameport *gameport, struct gameport_driver *d
 		t = gameport_read(gameport);
 		msleep(ANALOG_MAX_TIME);
 		port->mask = (gameport_read(gameport) ^ t) & t & 0xf;
-		port->fuzz = (port->speed * ANALOG_FUZZ_MAGIC) / port->loop / 1000 + ANALOG_FUZZ_BITS;
+		port->fuzz = (NSEC_PER_MSEC * ANALOG_FUZZ_MAGIC) / port->loop / 1000 + ANALOG_FUZZ_BITS;
 
 		for (i = 0; i < ANALOG_INIT_RETRIES; i++) {
 			if (!analog_cooked_read(port))
diff --git a/drivers/input/keyboard/Kconfig b/drivers/input/keyboard/Kconfig
index 40a070a2e7f5..e75650e98c9e 100644
--- a/drivers/input/keyboard/Kconfig
+++ b/drivers/input/keyboard/Kconfig
@@ -210,7 +210,7 @@ config KEYBOARD_LKKBD
 	select SERIO
 	help
 	  Say Y here if you want to use a LK201 or LK401 style serial
-	  keyboard. This keyboard is also useable on PCs if you attach
+	  keyboard. This keyboard is also usable on PCs if you attach
 	  it with the inputattach program. The connector pinout is
 	  described within lkkbd.c.
 
diff --git a/drivers/input/keyboard/adc-keys.c b/drivers/input/keyboard/adc-keys.c
index 6d5be48d1b3d..bf72ab8df817 100644
--- a/drivers/input/keyboard/adc-keys.c
+++ b/drivers/input/keyboard/adc-keys.c
@@ -193,7 +193,7 @@ static const struct of_device_id adc_keys_of_match[] = {
 MODULE_DEVICE_TABLE(of, adc_keys_of_match);
 #endif
 
-static struct platform_driver __refdata adc_keys_driver = {
+static struct platform_driver adc_keys_driver = {
 	.driver = {
 		.name = "adc_keys",
 		.of_match_table = of_match_ptr(adc_keys_of_match),
diff --git a/drivers/input/keyboard/adp5588-keys.c b/drivers/input/keyboard/adp5588-keys.c
index 90a59b973d00..1592da4de336 100644
--- a/drivers/input/keyboard/adp5588-keys.c
+++ b/drivers/input/keyboard/adp5588-keys.c
@@ -17,7 +17,7 @@
 #include <linux/platform_device.h>
 #include <linux/input.h>
 #include <linux/i2c.h>
-#include <linux/gpio.h>
+#include <linux/gpio/driver.h>
 #include <linux/slab.h>
 
 #include <linux/platform_data/adp5588.h>
diff --git a/drivers/input/keyboard/adp5589-keys.c b/drivers/input/keyboard/adp5589-keys.c
index 654e0476406b..bdd264459a97 100644
--- a/drivers/input/keyboard/adp5589-keys.c
+++ b/drivers/input/keyboard/adp5589-keys.c
@@ -18,7 +18,7 @@
 #include <linux/platform_device.h>
 #include <linux/input.h>
 #include <linux/i2c.h>
-#include <linux/gpio.h>
+#include <linux/gpio/driver.h>
 #include <linux/slab.h>
 
 #include <linux/input/adp5589.h>
diff --git a/drivers/input/keyboard/ep93xx_keypad.c b/drivers/input/keyboard/ep93xx_keypad.c
index c8194333d612..e0e931e796fa 100644
--- a/drivers/input/keyboard/ep93xx_keypad.c
+++ b/drivers/input/keyboard/ep93xx_keypad.c
@@ -157,7 +157,7 @@ static int ep93xx_keypad_open(struct input_dev *pdev)
 
 	if (!keypad->enabled) {
 		ep93xx_keypad_config(keypad);
-		clk_enable(keypad->clk);
+		clk_prepare_enable(keypad->clk);
 		keypad->enabled = true;
 	}
 
@@ -169,7 +169,7 @@ static void ep93xx_keypad_close(struct input_dev *pdev)
 	struct ep93xx_keypad *keypad = input_get_drvdata(pdev);
 
 	if (keypad->enabled) {
-		clk_disable(keypad->clk);
+		clk_disable_unprepare(keypad->clk);
 		keypad->enabled = false;
 	}
 }
diff --git a/drivers/input/keyboard/hilkbd.c b/drivers/input/keyboard/hilkbd.c
index 62ccfebf2f60..c1a4d5055de6 100644
--- a/drivers/input/keyboard/hilkbd.c
+++ b/drivers/input/keyboard/hilkbd.c
@@ -316,11 +316,9 @@ static int __init hil_probe_chip(struct parisc_device *dev)
 	return hil_keyb_init();
 }
 
-static int __exit hil_remove_chip(struct parisc_device *dev)
+static void __exit hil_remove_chip(struct parisc_device *dev)
 {
 	hil_keyb_exit();
-
-	return 0;
 }
 
 static const struct parisc_device_id hil_tbl[] __initconst = {
diff --git a/drivers/input/misc/Kconfig b/drivers/input/misc/Kconfig
index 498cde376981..dd5227cf8696 100644
--- a/drivers/input/misc/Kconfig
+++ b/drivers/input/misc/Kconfig
@@ -309,18 +309,6 @@ config INPUT_GPIO_VIBRA
 	  To compile this driver as a module, choose M here: the module will be
 	  called gpio-vibra.
 
-config INPUT_IXP4XX_BEEPER
-	tristate "IXP4XX Beeper support"
-	depends on ARCH_IXP4XX
-	help
-	  If you say yes here, you can connect a beeper to the
-	  ixp4xx gpio pins. This is used by the LinkSys NSLU2.
-
-	  If unsure, say Y.
-
-	  To compile this driver as a module, choose M here: the
-	  module will be called ixp4xx-beeper.
-
 config INPUT_COBALT_BTNS
 	tristate "Cobalt button interface"
 	depends on MIPS_COBALT
@@ -811,16 +799,6 @@ config INPUT_XEN_KBDDEV_FRONTEND
 	  To compile this driver as a module, choose M here: the
 	  module will be called xen-kbdfront.
 
-config INPUT_SIRFSOC_ONKEY
-	tristate "CSR SiRFSoC power on/off/suspend key support"
-	depends on ARCH_SIRF && OF
-	default y
-	help
-	  Say Y here if you want to support for the SiRFSoC power on/off/suspend key
-	  in Linux, after you press the onkey, system will suspend.
-
-	  If unsure, say N.
-
 config INPUT_IDEAPAD_SLIDEBAR
 	tristate "IdeaPad Laptop Slidebar"
 	depends on INPUT
diff --git a/drivers/input/misc/Makefile b/drivers/input/misc/Makefile
index f593beed7e05..b92c53a6b5ae 100644
--- a/drivers/input/misc/Makefile
+++ b/drivers/input/misc/Makefile
@@ -44,7 +44,6 @@ obj-$(CONFIG_HP_SDC_RTC)		+= hp_sdc_rtc.o
 obj-$(CONFIG_INPUT_IMS_PCU)		+= ims-pcu.o
 obj-$(CONFIG_INPUT_IQS269A)		+= iqs269a.o
 obj-$(CONFIG_INPUT_IQS626A)		+= iqs626a.o
-obj-$(CONFIG_INPUT_IXP4XX_BEEPER)	+= ixp4xx-beeper.o
 obj-$(CONFIG_INPUT_KEYSPAN_REMOTE)	+= keyspan_remote.o
 obj-$(CONFIG_INPUT_KXTJ9)		+= kxtj9.o
 obj-$(CONFIG_INPUT_M68K_BEEP)		+= m68kspkr.o
@@ -74,7 +73,6 @@ obj-$(CONFIG_INPUT_GPIO_ROTARY_ENCODER)	+= rotary_encoder.o
 obj-$(CONFIG_INPUT_RK805_PWRKEY)	+= rk805-pwrkey.o
 obj-$(CONFIG_INPUT_SC27XX_VIBRA)	+= sc27xx-vibra.o
 obj-$(CONFIG_INPUT_SGI_BTNS)		+= sgi_btns.o
-obj-$(CONFIG_INPUT_SIRFSOC_ONKEY)	+= sirfsoc-onkey.o
 obj-$(CONFIG_INPUT_SOC_BUTTON_ARRAY)	+= soc_button_array.o
 obj-$(CONFIG_INPUT_SPARCSPKR)		+= sparcspkr.o
 obj-$(CONFIG_INPUT_STPMIC1_ONKEY)  	+= stpmic1_onkey.o
diff --git a/drivers/input/misc/ixp4xx-beeper.c b/drivers/input/misc/ixp4xx-beeper.c
deleted file mode 100644
index 05018d0c97c7..000000000000
--- a/drivers/input/misc/ixp4xx-beeper.c
+++ /dev/null
@@ -1,183 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0-only
-/*
- * Generic IXP4xx beeper driver
- *
- * Copyright (C) 2005 Tower Technologies
- *
- * based on nslu2-io.c
- *  Copyright (C) 2004 Karen Spearel
- *
- * Author: Alessandro Zummo <a.zummo@towertech.it>
- * Maintainers: http://www.nslu2-linux.org/
- */
-
-#include <linux/module.h>
-#include <linux/input.h>
-#include <linux/delay.h>
-#include <linux/platform_device.h>
-#include <linux/interrupt.h>
-#include <linux/gpio.h>
-#include <mach/hardware.h>
-
-MODULE_AUTHOR("Alessandro Zummo <a.zummo@towertech.it>");
-MODULE_DESCRIPTION("ixp4xx beeper driver");
-MODULE_LICENSE("GPL");
-MODULE_ALIAS("platform:ixp4xx-beeper");
-
-static DEFINE_SPINLOCK(beep_lock);
-
-static int ixp4xx_timer2_irq;
-
-static void ixp4xx_spkr_control(unsigned int pin, unsigned int count)
-{
-	unsigned long flags;
-
-	spin_lock_irqsave(&beep_lock, flags);
-
-	if (count) {
-		gpio_direction_output(pin, 0);
-		*IXP4XX_OSRT2 = (count & ~IXP4XX_OST_RELOAD_MASK) | IXP4XX_OST_ENABLE;
-	} else {
-		gpio_direction_output(pin, 1);
-		gpio_direction_input(pin);
-		*IXP4XX_OSRT2 = 0;
-	}
-
-	spin_unlock_irqrestore(&beep_lock, flags);
-}
-
-static int ixp4xx_spkr_event(struct input_dev *dev, unsigned int type, unsigned int code, int value)
-{
-	unsigned int pin = (unsigned int) input_get_drvdata(dev);
-	unsigned int count = 0;
-
-	if (type != EV_SND)
-		return -1;
-
-	switch (code) {
-		case SND_BELL:
-			if (value)
-				value = 1000;
-		case SND_TONE:
-			break;
-		default:
-			return -1;
-	}
-
-	if (value > 20 && value < 32767)
-		count = (ixp4xx_timer_freq / (value * 4)) - 1;
-
-	ixp4xx_spkr_control(pin, count);
-
-	return 0;
-}
-
-static irqreturn_t ixp4xx_spkr_interrupt(int irq, void *dev_id)
-{
-	unsigned int pin = (unsigned int) dev_id;
-
-	/* clear interrupt */
-	*IXP4XX_OSST = IXP4XX_OSST_TIMER_2_PEND;
-
-	/* flip the beeper output */
-	gpio_set_value(pin, !gpio_get_value(pin));
-
-	return IRQ_HANDLED;
-}
-
-static int ixp4xx_spkr_probe(struct platform_device *dev)
-{
-	struct input_dev *input_dev;
-	int irq;
-	int err;
-
-	input_dev = input_allocate_device();
-	if (!input_dev)
-		return -ENOMEM;
-
-	input_set_drvdata(input_dev, (void *) dev->id);
-
-	input_dev->name = "ixp4xx beeper";
-	input_dev->phys = "ixp4xx/gpio";
-	input_dev->id.bustype = BUS_HOST;
-	input_dev->id.vendor  = 0x001f;
-	input_dev->id.product = 0x0001;
-	input_dev->id.version = 0x0100;
-	input_dev->dev.parent = &dev->dev;
-
-	input_dev->evbit[0] = BIT_MASK(EV_SND);
-	input_dev->sndbit[0] = BIT_MASK(SND_BELL) | BIT_MASK(SND_TONE);
-	input_dev->event = ixp4xx_spkr_event;
-
-	irq = platform_get_irq(dev, 0);
-	if (irq < 0) {
-		err = irq;
-		goto err_free_device;
-	}
-
-	err = gpio_request(dev->id, "ixp4-beeper");
-	if (err)
-		goto err_free_device;
-
-	err = request_irq(irq, &ixp4xx_spkr_interrupt,
-			  IRQF_NO_SUSPEND, "ixp4xx-beeper",
-			  (void *) dev->id);
-	if (err)
-		goto err_free_gpio;
-	ixp4xx_timer2_irq = irq;
-
-	err = input_register_device(input_dev);
-	if (err)
-		goto err_free_irq;
-
-	platform_set_drvdata(dev, input_dev);
-
-	return 0;
-
- err_free_irq:
-	free_irq(irq, (void *)dev->id);
- err_free_gpio:
-	gpio_free(dev->id);
- err_free_device:
-	input_free_device(input_dev);
-
-	return err;
-}
-
-static int ixp4xx_spkr_remove(struct platform_device *dev)
-{
-	struct input_dev *input_dev = platform_get_drvdata(dev);
-	unsigned int pin = (unsigned int) input_get_drvdata(input_dev);
-
-	input_unregister_device(input_dev);
-
-	/* turn the speaker off */
-	disable_irq(ixp4xx_timer2_irq);
-	ixp4xx_spkr_control(pin, 0);
-
-	free_irq(ixp4xx_timer2_irq, (void *)dev->id);
-	gpio_free(dev->id);
-
-	return 0;
-}
-
-static void ixp4xx_spkr_shutdown(struct platform_device *dev)
-{
-	struct input_dev *input_dev = platform_get_drvdata(dev);
-	unsigned int pin = (unsigned int) input_get_drvdata(input_dev);
-
-	/* turn off the speaker */
-	disable_irq(ixp4xx_timer2_irq);
-	ixp4xx_spkr_control(pin, 0);
-}
-
-static struct platform_driver ixp4xx_spkr_platform_driver = {
-	.driver		= {
-		.name	= "ixp4xx-beeper",
-	},
-	.probe		= ixp4xx_spkr_probe,
-	.remove		= ixp4xx_spkr_remove,
-	.shutdown	= ixp4xx_spkr_shutdown,
-};
-module_platform_driver(ixp4xx_spkr_platform_driver);
-
diff --git a/drivers/input/misc/pm8941-pwrkey.c b/drivers/input/misc/pm8941-pwrkey.c
index 10e3fc0eac6e..33609603245d 100644
--- a/drivers/input/misc/pm8941-pwrkey.c
+++ b/drivers/input/misc/pm8941-pwrkey.c
@@ -284,7 +284,7 @@ static int pm8941_pwrkey_probe(struct platform_device *pdev)
 	}
 
 	if (pwrkey->data->supports_ps_hold_poff_config) {
-		pwrkey->reboot_notifier.notifier_call = pm8941_reboot_notify,
+		pwrkey->reboot_notifier.notifier_call = pm8941_reboot_notify;
 		error = register_reboot_notifier(&pwrkey->reboot_notifier);
 		if (error) {
 			dev_err(&pdev->dev, "failed to register reboot notifier: %d\n",
diff --git a/drivers/input/misc/sirfsoc-onkey.c b/drivers/input/misc/sirfsoc-onkey.c
deleted file mode 100644
index 7982bf8fb839..000000000000
--- a/drivers/input/misc/sirfsoc-onkey.c
+++ /dev/null
@@ -1,207 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0-or-later
-/*
- * Power key driver for SiRF PrimaII
- *
- * Copyright (c) 2013 - 2014 Cambridge Silicon Radio Limited, a CSR plc group
- * company.
- */
-
-#include <linux/module.h>
-#include <linux/interrupt.h>
-#include <linux/delay.h>
-#include <linux/platform_device.h>
-#include <linux/input.h>
-#include <linux/rtc/sirfsoc_rtciobrg.h>
-#include <linux/of.h>
-#include <linux/workqueue.h>
-
-struct sirfsoc_pwrc_drvdata {
-	u32			pwrc_base;
-	struct input_dev	*input;
-	struct delayed_work	work;
-};
-
-#define PWRC_ON_KEY_BIT			(1 << 0)
-
-#define PWRC_INT_STATUS			0xc
-#define PWRC_INT_MASK			0x10
-#define PWRC_PIN_STATUS			0x14
-#define PWRC_KEY_DETECT_UP_TIME		20	/* ms*/
-
-static int sirfsoc_pwrc_is_on_key_down(struct sirfsoc_pwrc_drvdata *pwrcdrv)
-{
-	u32 state = sirfsoc_rtc_iobrg_readl(pwrcdrv->pwrc_base +
-							PWRC_PIN_STATUS);
-	return !(state & PWRC_ON_KEY_BIT); /* ON_KEY is active low */
-}
-
-static void sirfsoc_pwrc_report_event(struct work_struct *work)
-{
-	struct sirfsoc_pwrc_drvdata *pwrcdrv =
-		container_of(work, struct sirfsoc_pwrc_drvdata, work.work);
-
-	if (sirfsoc_pwrc_is_on_key_down(pwrcdrv)) {
-		schedule_delayed_work(&pwrcdrv->work,
-			msecs_to_jiffies(PWRC_KEY_DETECT_UP_TIME));
-	} else {
-		input_event(pwrcdrv->input, EV_KEY, KEY_POWER, 0);
-		input_sync(pwrcdrv->input);
-	}
-}
-
-static irqreturn_t sirfsoc_pwrc_isr(int irq, void *dev_id)
-{
-	struct sirfsoc_pwrc_drvdata *pwrcdrv = dev_id;
-	u32 int_status;
-
-	int_status = sirfsoc_rtc_iobrg_readl(pwrcdrv->pwrc_base +
-							PWRC_INT_STATUS);
-	sirfsoc_rtc_iobrg_writel(int_status & ~PWRC_ON_KEY_BIT,
-				 pwrcdrv->pwrc_base + PWRC_INT_STATUS);
-
-	input_event(pwrcdrv->input, EV_KEY, KEY_POWER, 1);
-	input_sync(pwrcdrv->input);
-	schedule_delayed_work(&pwrcdrv->work,
-			      msecs_to_jiffies(PWRC_KEY_DETECT_UP_TIME));
-
-	return IRQ_HANDLED;
-}
-
-static void sirfsoc_pwrc_toggle_interrupts(struct sirfsoc_pwrc_drvdata *pwrcdrv,
-					   bool enable)
-{
-	u32 int_mask;
-
-	int_mask = sirfsoc_rtc_iobrg_readl(pwrcdrv->pwrc_base + PWRC_INT_MASK);
-	if (enable)
-		int_mask |= PWRC_ON_KEY_BIT;
-	else
-		int_mask &= ~PWRC_ON_KEY_BIT;
-	sirfsoc_rtc_iobrg_writel(int_mask, pwrcdrv->pwrc_base + PWRC_INT_MASK);
-}
-
-static int sirfsoc_pwrc_open(struct input_dev *input)
-{
-	struct sirfsoc_pwrc_drvdata *pwrcdrv = input_get_drvdata(input);
-
-	sirfsoc_pwrc_toggle_interrupts(pwrcdrv, true);
-
-	return 0;
-}
-
-static void sirfsoc_pwrc_close(struct input_dev *input)
-{
-	struct sirfsoc_pwrc_drvdata *pwrcdrv = input_get_drvdata(input);
-
-	sirfsoc_pwrc_toggle_interrupts(pwrcdrv, false);
-	cancel_delayed_work_sync(&pwrcdrv->work);
-}
-
-static const struct of_device_id sirfsoc_pwrc_of_match[] = {
-	{ .compatible = "sirf,prima2-pwrc" },
-	{},
-};
-MODULE_DEVICE_TABLE(of, sirfsoc_pwrc_of_match);
-
-static int sirfsoc_pwrc_probe(struct platform_device *pdev)
-{
-	struct device_node *np = pdev->dev.of_node;
-	struct sirfsoc_pwrc_drvdata *pwrcdrv;
-	int irq;
-	int error;
-
-	pwrcdrv = devm_kzalloc(&pdev->dev, sizeof(struct sirfsoc_pwrc_drvdata),
-			       GFP_KERNEL);
-	if (!pwrcdrv) {
-		dev_info(&pdev->dev, "Not enough memory for the device data\n");
-		return -ENOMEM;
-	}
-
-	/*
-	 * We can't use of_iomap because pwrc is not mapped in memory,
-	 * the so-called base address is only offset in rtciobrg
-	 */
-	error = of_property_read_u32(np, "reg", &pwrcdrv->pwrc_base);
-	if (error) {
-		dev_err(&pdev->dev,
-			"unable to find base address of pwrc node in dtb\n");
-		return error;
-	}
-
-	pwrcdrv->input = devm_input_allocate_device(&pdev->dev);
-	if (!pwrcdrv->input)
-		return -ENOMEM;
-
-	pwrcdrv->input->name = "sirfsoc pwrckey";
-	pwrcdrv->input->phys = "pwrc/input0";
-	pwrcdrv->input->evbit[0] = BIT_MASK(EV_KEY);
-	input_set_capability(pwrcdrv->input, EV_KEY, KEY_POWER);
-
-	INIT_DELAYED_WORK(&pwrcdrv->work, sirfsoc_pwrc_report_event);
-
-	pwrcdrv->input->open = sirfsoc_pwrc_open;
-	pwrcdrv->input->close = sirfsoc_pwrc_close;
-
-	input_set_drvdata(pwrcdrv->input, pwrcdrv);
-
-	/* Make sure the device is quiesced */
-	sirfsoc_pwrc_toggle_interrupts(pwrcdrv, false);
-
-	irq = platform_get_irq(pdev, 0);
-	error = devm_request_irq(&pdev->dev, irq,
-				 sirfsoc_pwrc_isr, 0,
-				 "sirfsoc_pwrc_int", pwrcdrv);
-	if (error) {
-		dev_err(&pdev->dev, "unable to claim irq %d, error: %d\n",
-			irq, error);
-		return error;
-	}
-
-	error = input_register_device(pwrcdrv->input);
-	if (error) {
-		dev_err(&pdev->dev,
-			"unable to register input device, error: %d\n",
-			error);
-		return error;
-	}
-
-	dev_set_drvdata(&pdev->dev, pwrcdrv);
-	device_init_wakeup(&pdev->dev, 1);
-
-	return 0;
-}
-
-static int __maybe_unused sirfsoc_pwrc_resume(struct device *dev)
-{
-	struct sirfsoc_pwrc_drvdata *pwrcdrv = dev_get_drvdata(dev);
-	struct input_dev *input = pwrcdrv->input;
-
-	/*
-	 * Do not mask pwrc interrupt as we want pwrc work as a wakeup source
-	 * if users touch X_ONKEY_B, see arch/arm/mach-prima2/pm.c
-	 */
-	mutex_lock(&input->mutex);
-	if (input_device_enabled(input))
-		sirfsoc_pwrc_toggle_interrupts(pwrcdrv, true);
-	mutex_unlock(&input->mutex);
-
-	return 0;
-}
-
-static SIMPLE_DEV_PM_OPS(sirfsoc_pwrc_pm_ops, NULL, sirfsoc_pwrc_resume);
-
-static struct platform_driver sirfsoc_pwrc_driver = {
-	.probe		= sirfsoc_pwrc_probe,
-	.driver		= {
-		.name	= "sirfsoc-pwrc",
-		.pm	= &sirfsoc_pwrc_pm_ops,
-		.of_match_table = sirfsoc_pwrc_of_match,
-	}
-};
-
-module_platform_driver(sirfsoc_pwrc_driver);
-
-MODULE_LICENSE("GPL v2");
-MODULE_AUTHOR("Binghua Duan <Binghua.Duan@csr.com>, Xianglong Du <Xianglong.Du@csr.com>");
-MODULE_DESCRIPTION("CSR Prima2 PWRC Driver");
-MODULE_ALIAS("platform:sirfsoc-pwrc");
diff --git a/drivers/input/mouse/elan_i2c.h b/drivers/input/mouse/elan_i2c.h
index dc4a240f4489..3c84deefa327 100644
--- a/drivers/input/mouse/elan_i2c.h
+++ b/drivers/input/mouse/elan_i2c.h
@@ -55,8 +55,9 @@
 #define ETP_FW_PAGE_SIZE_512	512
 #define ETP_FW_SIGNATURE_SIZE	6
 
-#define ETP_PRODUCT_ID_DELBIN	0x00C2
+#define ETP_PRODUCT_ID_WHITEBOX	0x00B8
 #define ETP_PRODUCT_ID_VOXEL	0x00BF
+#define ETP_PRODUCT_ID_DELBIN	0x00C2
 #define ETP_PRODUCT_ID_MAGPIE	0x0120
 #define ETP_PRODUCT_ID_BOBBA	0x0121
 
diff --git a/drivers/input/mouse/elan_i2c_core.c b/drivers/input/mouse/elan_i2c_core.c
index dad22c1ea6a0..47af62c12267 100644
--- a/drivers/input/mouse/elan_i2c_core.c
+++ b/drivers/input/mouse/elan_i2c_core.c
@@ -105,6 +105,7 @@ static u32 elan_i2c_lookup_quirks(u16 ic_type, u16 product_id)
 		u32 quirks;
 	} elan_i2c_quirks[] = {
 		{ 0x0D, ETP_PRODUCT_ID_DELBIN, ETP_QUIRK_QUICK_WAKEUP },
+		{ 0x0D, ETP_PRODUCT_ID_WHITEBOX, ETP_QUIRK_QUICK_WAKEUP },
 		{ 0x10, ETP_PRODUCT_ID_VOXEL, ETP_QUIRK_QUICK_WAKEUP },
 		{ 0x14, ETP_PRODUCT_ID_MAGPIE, ETP_QUIRK_QUICK_WAKEUP },
 		{ 0x14, ETP_PRODUCT_ID_BOBBA, ETP_QUIRK_QUICK_WAKEUP },
diff --git a/drivers/input/serio/gscps2.c b/drivers/input/serio/gscps2.c
index 2f9775de3c5b..a9065c6ab550 100644
--- a/drivers/input/serio/gscps2.c
+++ b/drivers/input/serio/gscps2.c
@@ -411,7 +411,7 @@ fail_nomem:
  * @return: success/error report
  */
 
-static int __exit gscps2_remove(struct parisc_device *dev)
+static void __exit gscps2_remove(struct parisc_device *dev)
 {
 	struct gscps2port *ps2port = dev_get_drvdata(&dev->dev);
 
@@ -425,7 +425,6 @@ static int __exit gscps2_remove(struct parisc_device *dev)
 #endif
 	dev_set_drvdata(&dev->dev, NULL);
 	kfree(ps2port);
-	return 0;
 }
 
 
diff --git a/drivers/input/serio/parkbd.c b/drivers/input/serio/parkbd.c
index 3ac57a91ede4..51b68501896c 100644
--- a/drivers/input/serio/parkbd.c
+++ b/drivers/input/serio/parkbd.c
@@ -220,16 +220,4 @@ static struct parport_driver parkbd_parport_driver = {
 	.detach = parkbd_detach,
 	.devmodel = true,
 };
-
-static int __init parkbd_init(void)
-{
-	return parport_register_driver(&parkbd_parport_driver);
-}
-
-static void __exit parkbd_exit(void)
-{
-	parport_unregister_driver(&parkbd_parport_driver);
-}
-
-module_init(parkbd_init);
-module_exit(parkbd_exit);
+module_parport_driver(parkbd_parport_driver);
diff --git a/drivers/input/touchscreen/Kconfig b/drivers/input/touchscreen/Kconfig
index ad454cd2855a..d4e74738c5a8 100644
--- a/drivers/input/touchscreen/Kconfig
+++ b/drivers/input/touchscreen/Kconfig
@@ -932,7 +932,7 @@ config TOUCHSCREEN_USB_COMPOSITE
 	  - JASTEC USB Touch Controller/DigiTech DTR-02U
 	  - Zytronic controllers
 	  - Elo TouchSystems 2700 IntelliTouch
-	  - EasyTouch USB Touch Controller from Data Modul
+	  - EasyTouch USB Touch Controller from Data Module
 	  - e2i (Mimo monitors)
 
 	  Have a look at <http://linux.chapter7.ch/touchkit/> for
diff --git a/drivers/input/touchscreen/edt-ft5x06.c b/drivers/input/touchscreen/edt-ft5x06.c
index 263de3bfb6cd..bb2e1cbffba7 100644
--- a/drivers/input/touchscreen/edt-ft5x06.c
+++ b/drivers/input/touchscreen/edt-ft5x06.c
@@ -899,6 +899,7 @@ static int edt_ft5x06_ts_identify(struct i2c_client *client,
 		 * the identification registers.
 		 */
 		switch (rdbuf[0]) {
+		case 0x11:   /* EDT EP0110M09 */
 		case 0x35:   /* EDT EP0350M09 */
 		case 0x43:   /* EDT EP0430M09 */
 		case 0x50:   /* EDT EP0500M09 */
diff --git a/drivers/input/touchscreen/mms114.c b/drivers/input/touchscreen/mms114.c
index 0efd1a1bb192..9fa3b0e421be 100644
--- a/drivers/input/touchscreen/mms114.c
+++ b/drivers/input/touchscreen/mms114.c
@@ -54,6 +54,7 @@
 
 enum mms_type {
 	TYPE_MMS114	= 114,
+	TYPE_MMS134S	= 134,
 	TYPE_MMS136	= 136,
 	TYPE_MMS152	= 152,
 	TYPE_MMS345L	= 345,
@@ -212,7 +213,7 @@ static irqreturn_t mms114_interrupt(int irq, void *dev_id)
 		goto out;
 
 	/* MMS136 has slightly different event size */
-	if (data->type == TYPE_MMS136)
+	if (data->type == TYPE_MMS134S || data->type == TYPE_MMS136)
 		touch_size = packet_size / MMS136_EVENT_SIZE;
 	else
 		touch_size = packet_size / MMS114_EVENT_SIZE;
@@ -281,6 +282,7 @@ static int mms114_get_version(struct mms114_data *data)
 		break;
 
 	case TYPE_MMS114:
+	case TYPE_MMS134S:
 	case TYPE_MMS136:
 		error = __mms114_read_reg(data, MMS114_TSP_REV, 6, buf);
 		if (error)
@@ -304,8 +306,9 @@ static int mms114_setup_regs(struct mms114_data *data)
 	if (error < 0)
 		return error;
 
-	/* Only MMS114 and MMS136 have configuration and power on registers */
-	if (data->type != TYPE_MMS114 && data->type != TYPE_MMS136)
+	/* MMS114, MMS134S and MMS136 have configuration and power on registers */
+	if (data->type != TYPE_MMS114 && data->type != TYPE_MMS134S &&
+	    data->type != TYPE_MMS136)
 		return 0;
 
 	error = mms114_set_active(data, true);
@@ -487,7 +490,8 @@ static int mms114_probe(struct i2c_client *client,
 				     0, data->props.max_y, 0, 0);
 	}
 
-	if (data->type == TYPE_MMS114 || data->type == TYPE_MMS136) {
+	if (data->type == TYPE_MMS114 || data->type == TYPE_MMS134S ||
+	    data->type == TYPE_MMS136) {
 		/*
 		 * The firmware handles movement and pressure fuzz, so
 		 * don't duplicate that in software.
@@ -612,6 +616,9 @@ static const struct of_device_id mms114_dt_match[] = {
 		.compatible = "melfas,mms114",
 		.data = (void *)TYPE_MMS114,
 	}, {
+		.compatible = "melfas,mms134s",
+		.data = (void *)TYPE_MMS134S,
+	}, {
 		.compatible = "melfas,mms136",
 		.data = (void *)TYPE_MMS136,
 	}, {
diff --git a/drivers/iommu/Kconfig b/drivers/iommu/Kconfig
index f61516c17589..124c41adeca1 100644
--- a/drivers/iommu/Kconfig
+++ b/drivers/iommu/Kconfig
@@ -79,16 +79,57 @@ config IOMMU_DEBUGFS
 	  debug/iommu directory, and then populate a subdirectory with
 	  entries as required.
 
-config IOMMU_DEFAULT_PASSTHROUGH
-	bool "IOMMU passthrough by default"
+choice
+	prompt "IOMMU default domain type"
 	depends on IOMMU_API
+	default IOMMU_DEFAULT_DMA_LAZY if X86 || IA64
+	default IOMMU_DEFAULT_DMA_STRICT
 	help
-	  Enable passthrough by default, removing the need to pass in
-	  iommu.passthrough=on or iommu=pt through command line. If this
-	  is enabled, you can still disable with iommu.passthrough=off
-	  or iommu=nopt depending on the architecture.
+	  Choose the type of IOMMU domain used to manage DMA API usage by
+	  device drivers. The options here typically represent different
+	  levels of tradeoff between robustness/security and performance,
+	  depending on the IOMMU driver. Not all IOMMUs support all options.
+	  This choice can be overridden at boot via the command line, and for
+	  some devices also at runtime via sysfs.
 
-	  If unsure, say N here.
+	  If unsure, keep the default.
+
+config IOMMU_DEFAULT_DMA_STRICT
+	bool "Translated - Strict"
+	help
+	  Trusted devices use translation to restrict their access to only
+	  DMA-mapped pages, with strict TLB invalidation on unmap. Equivalent
+	  to passing "iommu.passthrough=0 iommu.strict=1" on the command line.
+
+	  Untrusted devices always use this mode, with an additional layer of
+	  bounce-buffering such that they cannot gain access to any unrelated
+	  data within a mapped page.
+
+config IOMMU_DEFAULT_DMA_LAZY
+	bool "Translated - Lazy"
+	help
+	  Trusted devices use translation to restrict their access to only
+	  DMA-mapped pages, but with "lazy" batched TLB invalidation. This
+	  mode allows higher performance with some IOMMUs due to reduced TLB
+	  flushing, but at the cost of reduced isolation since devices may be
+	  able to access memory for some time after it has been unmapped.
+	  Equivalent to passing "iommu.passthrough=0 iommu.strict=0" on the
+	  command line.
+
+	  If this mode is not supported by the IOMMU driver, the effective
+	  runtime default will fall back to IOMMU_DEFAULT_DMA_STRICT.
+
+config IOMMU_DEFAULT_PASSTHROUGH
+	bool "Passthrough"
+	help
+	  Trusted devices are identity-mapped, giving them unrestricted access
+	  to memory with minimal performance overhead. Equivalent to passing
+	  "iommu.passthrough=1" (historically "iommu=pt") on the command line.
+
+	  If this mode is not supported by the IOMMU driver, the effective
+	  runtime default will fall back to IOMMU_DEFAULT_DMA_STRICT.
+
+endchoice
 
 config OF_IOMMU
 	def_bool y
@@ -249,6 +290,20 @@ config SPAPR_TCE_IOMMU
 	  Enables bits of IOMMU API required by VFIO. The iommu_ops
 	  is not implemented as it is not necessary for VFIO.
 
+config APPLE_DART
+	tristate "Apple DART IOMMU Support"
+	depends on ARCH_APPLE || (COMPILE_TEST && !GENERIC_ATOMIC64)
+	select IOMMU_API
+	select IOMMU_IO_PGTABLE_LPAE
+	default ARCH_APPLE
+	help
+	  Support for Apple DART (Device Address Resolution Table) IOMMUs
+	  found in Apple ARM SoCs like the M1.
+	  This IOMMU is required for most peripherals using DMA to access
+	  the main memory.
+
+	  Say Y here if you are using an Apple SoC.
+
 # ARM IOMMU support
 config ARM_SMMU
 	tristate "ARM Ltd. System MMU (SMMU) Support"
diff --git a/drivers/iommu/Makefile b/drivers/iommu/Makefile
index c0fb0ba88143..bc7f730edbb0 100644
--- a/drivers/iommu/Makefile
+++ b/drivers/iommu/Makefile
@@ -29,3 +29,4 @@ obj-$(CONFIG_HYPERV_IOMMU) += hyperv-iommu.o
 obj-$(CONFIG_VIRTIO_IOMMU) += virtio-iommu.o
 obj-$(CONFIG_IOMMU_SVA_LIB) += iommu-sva-lib.o io-pgfault.o
 obj-$(CONFIG_SPRD_IOMMU) += sprd-iommu.o
+obj-$(CONFIG_APPLE_DART) += apple-dart.o
diff --git a/drivers/iommu/amd/amd_iommu_types.h b/drivers/iommu/amd/amd_iommu_types.h
index 94c1a7a9876d..8dbe61e2b3c1 100644
--- a/drivers/iommu/amd/amd_iommu_types.h
+++ b/drivers/iommu/amd/amd_iommu_types.h
@@ -779,12 +779,6 @@ extern u16 amd_iommu_last_bdf;
 /* allocation bitmap for domain ids */
 extern unsigned long *amd_iommu_pd_alloc_bitmap;
 
-/*
- * If true, the addresses will be flushed on unmap time, not when
- * they are reused
- */
-extern bool amd_iommu_unmap_flush;
-
 /* Smallest max PASID supported by any IOMMU in the system */
 extern u32 amd_iommu_max_pasid;
 
diff --git a/drivers/iommu/amd/init.c b/drivers/iommu/amd/init.c
index 46280e6e1535..2a822b229bd0 100644
--- a/drivers/iommu/amd/init.c
+++ b/drivers/iommu/amd/init.c
@@ -161,7 +161,6 @@ u16 amd_iommu_last_bdf;			/* largest PCI device id we have
 					   to handle */
 LIST_HEAD(amd_iommu_unity_map);		/* a list of required unity mappings
 					   we find in ACPI */
-bool amd_iommu_unmap_flush;		/* if true, flush on every unmap */
 
 LIST_HEAD(amd_iommu_list);		/* list of all AMD IOMMUs in the
 					   system */
@@ -298,6 +297,22 @@ int amd_iommu_get_num_iommus(void)
 	return amd_iommus_present;
 }
 
+#ifdef CONFIG_IRQ_REMAP
+static bool check_feature_on_all_iommus(u64 mask)
+{
+	bool ret = false;
+	struct amd_iommu *iommu;
+
+	for_each_iommu(iommu) {
+		ret = iommu_feature(iommu, mask);
+		if (!ret)
+			return false;
+	}
+
+	return true;
+}
+#endif
+
 /*
  * For IVHD type 0x11/0x40, EFR is also available via IVHD.
  * Default to IVHD EFR since it is available sooner
@@ -814,9 +829,9 @@ static int iommu_ga_log_enable(struct amd_iommu *iommu)
 	return 0;
 }
 
-#ifdef CONFIG_IRQ_REMAP
 static int iommu_init_ga_log(struct amd_iommu *iommu)
 {
+#ifdef CONFIG_IRQ_REMAP
 	u64 entry;
 
 	if (!AMD_IOMMU_GUEST_IR_VAPIC(amd_iommu_guest_ir))
@@ -846,25 +861,9 @@ static int iommu_init_ga_log(struct amd_iommu *iommu)
 err_out:
 	free_ga_log(iommu);
 	return -EINVAL;
-}
-#endif /* CONFIG_IRQ_REMAP */
-
-static int iommu_init_ga(struct amd_iommu *iommu)
-{
-	int ret = 0;
-
-#ifdef CONFIG_IRQ_REMAP
-	/* Note: We have already checked GASup from IVRS table.
-	 *       Now, we need to make sure that GAMSup is set.
-	 */
-	if (AMD_IOMMU_GUEST_IR_VAPIC(amd_iommu_guest_ir) &&
-	    !iommu_feature(iommu, FEATURE_GAM_VAPIC))
-		amd_iommu_guest_ir = AMD_IOMMU_GUEST_IR_LEGACY_GA;
-
-	ret = iommu_init_ga_log(iommu);
+#else
+	return 0;
 #endif /* CONFIG_IRQ_REMAP */
-
-	return ret;
 }
 
 static int __init alloc_cwwb_sem(struct amd_iommu *iommu)
@@ -1846,12 +1845,15 @@ static int __init iommu_init_pci(struct amd_iommu *iommu)
 	if (iommu_feature(iommu, FEATURE_PPR) && alloc_ppr_log(iommu))
 		return -ENOMEM;
 
-	ret = iommu_init_ga(iommu);
+	ret = iommu_init_ga_log(iommu);
 	if (ret)
 		return ret;
 
-	if (iommu->cap & (1UL << IOMMU_CAP_NPCACHE))
+	if (iommu->cap & (1UL << IOMMU_CAP_NPCACHE)) {
+		pr_info("Using strict mode due to virtualization\n");
+		iommu_set_dma_strict();
 		amd_iommu_np_cache = true;
+	}
 
 	init_iommu_perf_ctr(iommu);
 
@@ -2477,6 +2479,14 @@ static void early_enable_iommus(void)
 	}
 
 #ifdef CONFIG_IRQ_REMAP
+	/*
+	 * Note: We have already checked GASup from IVRS table.
+	 *       Now, we need to make sure that GAMSup is set.
+	 */
+	if (AMD_IOMMU_GUEST_IR_VAPIC(amd_iommu_guest_ir) &&
+	    !check_feature_on_all_iommus(FEATURE_GAM_VAPIC))
+		amd_iommu_guest_ir = AMD_IOMMU_GUEST_IR_LEGACY_GA;
+
 	if (AMD_IOMMU_GUEST_IR_VAPIC(amd_iommu_guest_ir))
 		amd_iommu_irq_ops.capability |= (1 << IRQ_POSTING_CAP);
 #endif
@@ -3098,8 +3108,10 @@ static int __init parse_amd_iommu_intr(char *str)
 static int __init parse_amd_iommu_options(char *str)
 {
 	for (; *str; ++str) {
-		if (strncmp(str, "fullflush", 9) == 0)
-			amd_iommu_unmap_flush = true;
+		if (strncmp(str, "fullflush", 9) == 0) {
+			pr_warn("amd_iommu=fullflush deprecated; use iommu.strict=1 instead\n");
+			iommu_set_dma_strict();
+		}
 		if (strncmp(str, "force_enable", 12) == 0)
 			amd_iommu_force_enable = true;
 		if (strncmp(str, "off", 3) == 0)
diff --git a/drivers/iommu/amd/io_pgtable.c b/drivers/iommu/amd/io_pgtable.c
index bb0ee5c9fde7..182c93a43efd 100644
--- a/drivers/iommu/amd/io_pgtable.c
+++ b/drivers/iommu/amd/io_pgtable.c
@@ -493,9 +493,6 @@ static phys_addr_t iommu_v1_iova_to_phys(struct io_pgtable_ops *ops, unsigned lo
 	unsigned long offset_mask, pte_pgsize;
 	u64 *pte, __pte;
 
-	if (pgtable->mode == PAGE_MODE_NONE)
-		return iova;
-
 	pte = fetch_pte(pgtable, iova, &pte_pgsize);
 
 	if (!pte || !IOMMU_PTE_PRESENT(*pte))
diff --git a/drivers/iommu/amd/iommu.c b/drivers/iommu/amd/iommu.c
index 811a49a95d04..1722bb161841 100644
--- a/drivers/iommu/amd/iommu.c
+++ b/drivers/iommu/amd/iommu.c
@@ -425,9 +425,11 @@ static void amd_iommu_report_rmp_hw_error(volatile u32 *event)
 	if (pdev)
 		dev_data = dev_iommu_priv_get(&pdev->dev);
 
-	if (dev_data && __ratelimit(&dev_data->rs)) {
-		pci_err(pdev, "Event logged [RMP_HW_ERROR vmg_tag=0x%04x, spa=0x%llx, flags=0x%04x]\n",
-			vmg_tag, spa, flags);
+	if (dev_data) {
+		if (__ratelimit(&dev_data->rs)) {
+			pci_err(pdev, "Event logged [RMP_HW_ERROR vmg_tag=0x%04x, spa=0x%llx, flags=0x%04x]\n",
+				vmg_tag, spa, flags);
+		}
 	} else {
 		pr_err_ratelimited("Event logged [RMP_HW_ERROR device=%02x:%02x.%x, vmg_tag=0x%04x, spa=0x%llx, flags=0x%04x]\n",
 			PCI_BUS_NUM(devid), PCI_SLOT(devid), PCI_FUNC(devid),
@@ -456,9 +458,11 @@ static void amd_iommu_report_rmp_fault(volatile u32 *event)
 	if (pdev)
 		dev_data = dev_iommu_priv_get(&pdev->dev);
 
-	if (dev_data && __ratelimit(&dev_data->rs)) {
-		pci_err(pdev, "Event logged [RMP_PAGE_FAULT vmg_tag=0x%04x, gpa=0x%llx, flags_rmp=0x%04x, flags=0x%04x]\n",
-			vmg_tag, gpa, flags_rmp, flags);
+	if (dev_data) {
+		if (__ratelimit(&dev_data->rs)) {
+			pci_err(pdev, "Event logged [RMP_PAGE_FAULT vmg_tag=0x%04x, gpa=0x%llx, flags_rmp=0x%04x, flags=0x%04x]\n",
+				vmg_tag, gpa, flags_rmp, flags);
+		}
 	} else {
 		pr_err_ratelimited("Event logged [RMP_PAGE_FAULT device=%02x:%02x.%x, vmg_tag=0x%04x, gpa=0x%llx, flags_rmp=0x%04x, flags=0x%04x]\n",
 			PCI_BUS_NUM(devid), PCI_SLOT(devid), PCI_FUNC(devid),
@@ -480,11 +484,13 @@ static void amd_iommu_report_page_fault(u16 devid, u16 domain_id,
 	if (pdev)
 		dev_data = dev_iommu_priv_get(&pdev->dev);
 
-	if (dev_data && __ratelimit(&dev_data->rs)) {
-		pci_err(pdev, "Event logged [IO_PAGE_FAULT domain=0x%04x address=0x%llx flags=0x%04x]\n",
-			domain_id, address, flags);
-	} else if (printk_ratelimit()) {
-		pr_err("Event logged [IO_PAGE_FAULT device=%02x:%02x.%x domain=0x%04x address=0x%llx flags=0x%04x]\n",
+	if (dev_data) {
+		if (__ratelimit(&dev_data->rs)) {
+			pci_err(pdev, "Event logged [IO_PAGE_FAULT domain=0x%04x address=0x%llx flags=0x%04x]\n",
+				domain_id, address, flags);
+		}
+	} else {
+		pr_err_ratelimited("Event logged [IO_PAGE_FAULT device=%02x:%02x.%x domain=0x%04x address=0x%llx flags=0x%04x]\n",
 			PCI_BUS_NUM(devid), PCI_SLOT(devid), PCI_FUNC(devid),
 			domain_id, address, flags);
 	}
@@ -1261,15 +1267,52 @@ static void __domain_flush_pages(struct protection_domain *domain,
 }
 
 static void domain_flush_pages(struct protection_domain *domain,
-			       u64 address, size_t size)
+			       u64 address, size_t size, int pde)
 {
-	__domain_flush_pages(domain, address, size, 0);
+	if (likely(!amd_iommu_np_cache)) {
+		__domain_flush_pages(domain, address, size, pde);
+		return;
+	}
+
+	/*
+	 * When NpCache is on, we infer that we run in a VM and use a vIOMMU.
+	 * In such setups it is best to avoid flushes of ranges which are not
+	 * naturally aligned, since it would lead to flushes of unmodified
+	 * PTEs. Such flushes would require the hypervisor to do more work than
+	 * necessary. Therefore, perform repeated flushes of aligned ranges
+	 * until you cover the range. Each iteration flushes the smaller
+	 * between the natural alignment of the address that we flush and the
+	 * greatest naturally aligned region that fits in the range.
+	 */
+	while (size != 0) {
+		int addr_alignment = __ffs(address);
+		int size_alignment = __fls(size);
+		int min_alignment;
+		size_t flush_size;
+
+		/*
+		 * size is always non-zero, but address might be zero, causing
+		 * addr_alignment to be negative. As the casting of the
+		 * argument in __ffs(address) to long might trim the high bits
+		 * of the address on x86-32, cast to long when doing the check.
+		 */
+		if (likely((unsigned long)address != 0))
+			min_alignment = min(addr_alignment, size_alignment);
+		else
+			min_alignment = size_alignment;
+
+		flush_size = 1ul << min_alignment;
+
+		__domain_flush_pages(domain, address, flush_size, pde);
+		address += flush_size;
+		size -= flush_size;
+	}
 }
 
 /* Flush the whole IO/TLB for a given protection domain - including PDE */
 void amd_iommu_domain_flush_tlb_pde(struct protection_domain *domain)
 {
-	__domain_flush_pages(domain, 0, CMD_INV_IOMMU_ALL_PAGES_ADDRESS, 1);
+	domain_flush_pages(domain, 0, CMD_INV_IOMMU_ALL_PAGES_ADDRESS, 1);
 }
 
 void amd_iommu_domain_flush_complete(struct protection_domain *domain)
@@ -1296,7 +1339,7 @@ static void domain_flush_np_cache(struct protection_domain *domain,
 		unsigned long flags;
 
 		spin_lock_irqsave(&domain->lock, flags);
-		domain_flush_pages(domain, iova, size);
+		domain_flush_pages(domain, iova, size, 1);
 		amd_iommu_domain_flush_complete(domain);
 		spin_unlock_irqrestore(&domain->lock, flags);
 	}
@@ -1707,14 +1750,9 @@ static struct iommu_device *amd_iommu_probe_device(struct device *dev)
 
 static void amd_iommu_probe_finalize(struct device *dev)
 {
-	struct iommu_domain *domain;
-
 	/* Domains are initialized for this device - have a look what we ended up with */
-	domain = iommu_get_domain_for_dev(dev);
-	if (domain->type == IOMMU_DOMAIN_DMA)
-		iommu_setup_dma_ops(dev, 0, U64_MAX);
-	else
-		set_dma_ops(dev, NULL);
+	set_dma_ops(dev, NULL);
+	iommu_setup_dma_ops(dev, 0, U64_MAX);
 }
 
 static void amd_iommu_release_device(struct device *dev)
@@ -1775,12 +1813,6 @@ void amd_iommu_domain_update(struct protection_domain *domain)
 static void __init amd_iommu_init_dma_ops(void)
 {
 	swiotlb = (iommu_default_passthrough() || sme_me_mask) ? 1 : 0;
-
-	if (amd_iommu_unmap_flush)
-		pr_info("IO/TLB flush on unmap enabled\n");
-	else
-		pr_info("Lazy IO/TLB flushing enabled\n");
-	iommu_set_dma_strict(amd_iommu_unmap_flush);
 }
 
 int __init amd_iommu_init_api(void)
@@ -1924,16 +1956,7 @@ static struct iommu_domain *amd_iommu_domain_alloc(unsigned type)
 	domain->domain.geometry.aperture_end   = ~0ULL;
 	domain->domain.geometry.force_aperture = true;
 
-	if (type == IOMMU_DOMAIN_DMA &&
-	    iommu_get_dma_cookie(&domain->domain) == -ENOMEM)
-		goto free_domain;
-
 	return &domain->domain;
-
-free_domain:
-	protection_domain_free(domain);
-
-	return NULL;
 }
 
 static void amd_iommu_domain_free(struct iommu_domain *dom)
@@ -1950,9 +1973,6 @@ static void amd_iommu_domain_free(struct iommu_domain *dom)
 	if (!dom)
 		return;
 
-	if (dom->type == IOMMU_DOMAIN_DMA)
-		iommu_put_dma_cookie(&domain->domain);
-
 	if (domain->flags & PD_IOMMUV2_MASK)
 		free_gcr3_table(domain);
 
@@ -2022,6 +2042,16 @@ static int amd_iommu_attach_device(struct iommu_domain *dom,
 	return ret;
 }
 
+static void amd_iommu_iotlb_sync_map(struct iommu_domain *dom,
+				     unsigned long iova, size_t size)
+{
+	struct protection_domain *domain = to_pdomain(dom);
+	struct io_pgtable_ops *ops = &domain->iop.iop.ops;
+
+	if (ops->map)
+		domain_flush_np_cache(domain, iova, size);
+}
+
 static int amd_iommu_map(struct iommu_domain *dom, unsigned long iova,
 			 phys_addr_t paddr, size_t page_size, int iommu_prot,
 			 gfp_t gfp)
@@ -2040,26 +2070,50 @@ static int amd_iommu_map(struct iommu_domain *dom, unsigned long iova,
 	if (iommu_prot & IOMMU_WRITE)
 		prot |= IOMMU_PROT_IW;
 
-	if (ops->map) {
+	if (ops->map)
 		ret = ops->map(ops, iova, paddr, page_size, prot, gfp);
-		domain_flush_np_cache(domain, iova, page_size);
-	}
 
 	return ret;
 }
 
+static void amd_iommu_iotlb_gather_add_page(struct iommu_domain *domain,
+					    struct iommu_iotlb_gather *gather,
+					    unsigned long iova, size_t size)
+{
+	/*
+	 * AMD's IOMMU can flush as many pages as necessary in a single flush.
+	 * Unless we run in a virtual machine, which can be inferred according
+	 * to whether "non-present cache" is on, it is probably best to prefer
+	 * (potentially) too extensive TLB flushing (i.e., more misses) over
+	 * mutliple TLB flushes (i.e., more flushes). For virtual machines the
+	 * hypervisor needs to synchronize the host IOMMU PTEs with those of
+	 * the guest, and the trade-off is different: unnecessary TLB flushes
+	 * should be avoided.
+	 */
+	if (amd_iommu_np_cache &&
+	    iommu_iotlb_gather_is_disjoint(gather, iova, size))
+		iommu_iotlb_sync(domain, gather);
+
+	iommu_iotlb_gather_add_range(gather, iova, size);
+}
+
 static size_t amd_iommu_unmap(struct iommu_domain *dom, unsigned long iova,
 			      size_t page_size,
 			      struct iommu_iotlb_gather *gather)
 {
 	struct protection_domain *domain = to_pdomain(dom);
 	struct io_pgtable_ops *ops = &domain->iop.iop.ops;
+	size_t r;
 
 	if ((amd_iommu_pgtable == AMD_IOMMU_V1) &&
 	    (domain->iop.mode == PAGE_MODE_NONE))
 		return 0;
 
-	return (ops->unmap) ? ops->unmap(ops, iova, page_size, gather) : 0;
+	r = (ops->unmap) ? ops->unmap(ops, iova, page_size, gather) : 0;
+
+	amd_iommu_iotlb_gather_add_page(dom, gather, iova, page_size);
+
+	return r;
 }
 
 static phys_addr_t amd_iommu_iova_to_phys(struct iommu_domain *dom,
@@ -2162,7 +2216,13 @@ static void amd_iommu_flush_iotlb_all(struct iommu_domain *domain)
 static void amd_iommu_iotlb_sync(struct iommu_domain *domain,
 				 struct iommu_iotlb_gather *gather)
 {
-	amd_iommu_flush_iotlb_all(domain);
+	struct protection_domain *dom = to_pdomain(domain);
+	unsigned long flags;
+
+	spin_lock_irqsave(&dom->lock, flags);
+	domain_flush_pages(dom, gather->start, gather->end - gather->start, 1);
+	amd_iommu_domain_flush_complete(dom);
+	spin_unlock_irqrestore(&dom->lock, flags);
 }
 
 static int amd_iommu_def_domain_type(struct device *dev)
@@ -2191,6 +2251,7 @@ const struct iommu_ops amd_iommu_ops = {
 	.attach_dev = amd_iommu_attach_device,
 	.detach_dev = amd_iommu_detach_device,
 	.map = amd_iommu_map,
+	.iotlb_sync_map	= amd_iommu_iotlb_sync_map,
 	.unmap = amd_iommu_unmap,
 	.iova_to_phys = amd_iommu_iova_to_phys,
 	.probe_device = amd_iommu_probe_device,
diff --git a/drivers/iommu/amd/iommu_v2.c b/drivers/iommu/amd/iommu_v2.c
index f8d4ad421e07..a9e568276c99 100644
--- a/drivers/iommu/amd/iommu_v2.c
+++ b/drivers/iommu/amd/iommu_v2.c
@@ -6,6 +6,7 @@
 
 #define pr_fmt(fmt)     "AMD-Vi: " fmt
 
+#include <linux/refcount.h>
 #include <linux/mmu_notifier.h>
 #include <linux/amd-iommu.h>
 #include <linux/mm_types.h>
@@ -33,7 +34,7 @@ struct pri_queue {
 
 struct pasid_state {
 	struct list_head list;			/* For global state-list */
-	atomic_t count;				/* Reference count */
+	refcount_t count;				/* Reference count */
 	unsigned mmu_notifier_count;		/* Counting nested mmu_notifier
 						   calls */
 	struct mm_struct *mm;			/* mm_struct for the faults */
@@ -242,7 +243,7 @@ static struct pasid_state *get_pasid_state(struct device_state *dev_state,
 
 	ret = *ptr;
 	if (ret)
-		atomic_inc(&ret->count);
+		refcount_inc(&ret->count);
 
 out_unlock:
 	spin_unlock_irqrestore(&dev_state->lock, flags);
@@ -257,14 +258,14 @@ static void free_pasid_state(struct pasid_state *pasid_state)
 
 static void put_pasid_state(struct pasid_state *pasid_state)
 {
-	if (atomic_dec_and_test(&pasid_state->count))
+	if (refcount_dec_and_test(&pasid_state->count))
 		wake_up(&pasid_state->wq);
 }
 
 static void put_pasid_state_wait(struct pasid_state *pasid_state)
 {
-	atomic_dec(&pasid_state->count);
-	wait_event(pasid_state->wq, !atomic_read(&pasid_state->count));
+	refcount_dec(&pasid_state->count);
+	wait_event(pasid_state->wq, !refcount_read(&pasid_state->count));
 	free_pasid_state(pasid_state);
 }
 
@@ -624,7 +625,7 @@ int amd_iommu_bind_pasid(struct pci_dev *pdev, u32 pasid,
 		goto out;
 
 
-	atomic_set(&pasid_state->count, 1);
+	refcount_set(&pasid_state->count, 1);
 	init_waitqueue_head(&pasid_state->wq);
 	spin_lock_init(&pasid_state->lock);
 
diff --git a/drivers/iommu/apple-dart.c b/drivers/iommu/apple-dart.c
new file mode 100644
index 000000000000..559db9259e65
--- /dev/null
+++ b/drivers/iommu/apple-dart.c
@@ -0,0 +1,923 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * Apple DART (Device Address Resolution Table) IOMMU driver
+ *
+ * Copyright (C) 2021 The Asahi Linux Contributors
+ *
+ * Based on arm/arm-smmu/arm-ssmu.c and arm/arm-smmu-v3/arm-smmu-v3.c
+ *  Copyright (C) 2013 ARM Limited
+ *  Copyright (C) 2015 ARM Limited
+ * and on exynos-iommu.c
+ *  Copyright (c) 2011,2016 Samsung Electronics Co., Ltd.
+ */
+
+#include <linux/atomic.h>
+#include <linux/bitfield.h>
+#include <linux/clk.h>
+#include <linux/dev_printk.h>
+#include <linux/dma-iommu.h>
+#include <linux/dma-mapping.h>
+#include <linux/err.h>
+#include <linux/interrupt.h>
+#include <linux/io-pgtable.h>
+#include <linux/iommu.h>
+#include <linux/iopoll.h>
+#include <linux/module.h>
+#include <linux/of.h>
+#include <linux/of_address.h>
+#include <linux/of_iommu.h>
+#include <linux/of_platform.h>
+#include <linux/pci.h>
+#include <linux/platform_device.h>
+#include <linux/slab.h>
+#include <linux/swab.h>
+#include <linux/types.h>
+
+#define DART_MAX_STREAMS 16
+#define DART_MAX_TTBR 4
+#define MAX_DARTS_PER_DEVICE 2
+
+#define DART_STREAM_ALL 0xffff
+
+#define DART_PARAMS1 0x00
+#define DART_PARAMS_PAGE_SHIFT GENMASK(27, 24)
+
+#define DART_PARAMS2 0x04
+#define DART_PARAMS_BYPASS_SUPPORT BIT(0)
+
+#define DART_STREAM_COMMAND 0x20
+#define DART_STREAM_COMMAND_BUSY BIT(2)
+#define DART_STREAM_COMMAND_INVALIDATE BIT(20)
+
+#define DART_STREAM_SELECT 0x34
+
+#define DART_ERROR 0x40
+#define DART_ERROR_STREAM GENMASK(27, 24)
+#define DART_ERROR_CODE GENMASK(11, 0)
+#define DART_ERROR_FLAG BIT(31)
+
+#define DART_ERROR_READ_FAULT BIT(4)
+#define DART_ERROR_WRITE_FAULT BIT(3)
+#define DART_ERROR_NO_PTE BIT(2)
+#define DART_ERROR_NO_PMD BIT(1)
+#define DART_ERROR_NO_TTBR BIT(0)
+
+#define DART_CONFIG 0x60
+#define DART_CONFIG_LOCK BIT(15)
+
+#define DART_STREAM_COMMAND_BUSY_TIMEOUT 100
+
+#define DART_ERROR_ADDR_HI 0x54
+#define DART_ERROR_ADDR_LO 0x50
+
+#define DART_TCR(sid) (0x100 + 4 * (sid))
+#define DART_TCR_TRANSLATE_ENABLE BIT(7)
+#define DART_TCR_BYPASS0_ENABLE BIT(8)
+#define DART_TCR_BYPASS1_ENABLE BIT(12)
+
+#define DART_TTBR(sid, idx) (0x200 + 16 * (sid) + 4 * (idx))
+#define DART_TTBR_VALID BIT(31)
+#define DART_TTBR_SHIFT 12
+
+/*
+ * Private structure associated with each DART device.
+ *
+ * @dev: device struct
+ * @regs: mapped MMIO region
+ * @irq: interrupt number, can be shared with other DARTs
+ * @clks: clocks associated with this DART
+ * @num_clks: number of @clks
+ * @lock: lock for hardware operations involving this dart
+ * @pgsize: pagesize supported by this DART
+ * @supports_bypass: indicates if this DART supports bypass mode
+ * @force_bypass: force bypass mode due to pagesize mismatch?
+ * @sid2group: maps stream ids to iommu_groups
+ * @iommu: iommu core device
+ */
+struct apple_dart {
+	struct device *dev;
+
+	void __iomem *regs;
+
+	int irq;
+	struct clk_bulk_data *clks;
+	int num_clks;
+
+	spinlock_t lock;
+
+	u32 pgsize;
+	u32 supports_bypass : 1;
+	u32 force_bypass : 1;
+
+	struct iommu_group *sid2group[DART_MAX_STREAMS];
+	struct iommu_device iommu;
+};
+
+/*
+ * Convenience struct to identify streams.
+ *
+ * The normal variant is used inside apple_dart_master_cfg which isn't written
+ * to concurrently.
+ * The atomic variant is used inside apple_dart_domain where we have to guard
+ * against races from potential parallel calls to attach/detach_device.
+ * Note that even inside the atomic variant the apple_dart pointer is not
+ * protected: This pointer is initialized once under the domain init mutex
+ * and never changed again afterwards. Devices with different dart pointers
+ * cannot be attached to the same domain.
+ *
+ * @dart dart pointer
+ * @sid stream id bitmap
+ */
+struct apple_dart_stream_map {
+	struct apple_dart *dart;
+	unsigned long sidmap;
+};
+struct apple_dart_atomic_stream_map {
+	struct apple_dart *dart;
+	atomic64_t sidmap;
+};
+
+/*
+ * This structure is attached to each iommu domain handled by a DART.
+ *
+ * @pgtbl_ops: pagetable ops allocated by io-pgtable
+ * @finalized: true if the domain has been completely initialized
+ * @init_lock: protects domain initialization
+ * @stream_maps: streams attached to this domain (valid for DMA/UNMANAGED only)
+ * @domain: core iommu domain pointer
+ */
+struct apple_dart_domain {
+	struct io_pgtable_ops *pgtbl_ops;
+
+	bool finalized;
+	struct mutex init_lock;
+	struct apple_dart_atomic_stream_map stream_maps[MAX_DARTS_PER_DEVICE];
+
+	struct iommu_domain domain;
+};
+
+/*
+ * This structure is attached to devices with dev_iommu_priv_set() on of_xlate
+ * and contains a list of streams bound to this device.
+ * So far the worst case seen is a single device with two streams
+ * from different darts, such that this simple static array is enough.
+ *
+ * @streams: streams for this device
+ */
+struct apple_dart_master_cfg {
+	struct apple_dart_stream_map stream_maps[MAX_DARTS_PER_DEVICE];
+};
+
+/*
+ * Helper macro to iterate over apple_dart_master_cfg.stream_maps and
+ * apple_dart_domain.stream_maps
+ *
+ * @i int used as loop variable
+ * @base pointer to base struct (apple_dart_master_cfg or apple_dart_domain)
+ * @stream pointer to the apple_dart_streams struct for each loop iteration
+ */
+#define for_each_stream_map(i, base, stream_map)                               \
+	for (i = 0, stream_map = &(base)->stream_maps[0];                      \
+	     i < MAX_DARTS_PER_DEVICE && stream_map->dart;                     \
+	     stream_map = &(base)->stream_maps[++i])
+
+static struct platform_driver apple_dart_driver;
+static const struct iommu_ops apple_dart_iommu_ops;
+static const struct iommu_flush_ops apple_dart_tlb_ops;
+
+static struct apple_dart_domain *to_dart_domain(struct iommu_domain *dom)
+{
+	return container_of(dom, struct apple_dart_domain, domain);
+}
+
+static void
+apple_dart_hw_enable_translation(struct apple_dart_stream_map *stream_map)
+{
+	int sid;
+
+	for_each_set_bit(sid, &stream_map->sidmap, DART_MAX_STREAMS)
+		writel(DART_TCR_TRANSLATE_ENABLE,
+		       stream_map->dart->regs + DART_TCR(sid));
+}
+
+static void apple_dart_hw_disable_dma(struct apple_dart_stream_map *stream_map)
+{
+	int sid;
+
+	for_each_set_bit(sid, &stream_map->sidmap, DART_MAX_STREAMS)
+		writel(0, stream_map->dart->regs + DART_TCR(sid));
+}
+
+static void
+apple_dart_hw_enable_bypass(struct apple_dart_stream_map *stream_map)
+{
+	int sid;
+
+	WARN_ON(!stream_map->dart->supports_bypass);
+	for_each_set_bit(sid, &stream_map->sidmap, DART_MAX_STREAMS)
+		writel(DART_TCR_BYPASS0_ENABLE | DART_TCR_BYPASS1_ENABLE,
+		       stream_map->dart->regs + DART_TCR(sid));
+}
+
+static void apple_dart_hw_set_ttbr(struct apple_dart_stream_map *stream_map,
+				   u8 idx, phys_addr_t paddr)
+{
+	int sid;
+
+	WARN_ON(paddr & ((1 << DART_TTBR_SHIFT) - 1));
+	for_each_set_bit(sid, &stream_map->sidmap, DART_MAX_STREAMS)
+		writel(DART_TTBR_VALID | (paddr >> DART_TTBR_SHIFT),
+		       stream_map->dart->regs + DART_TTBR(sid, idx));
+}
+
+static void apple_dart_hw_clear_ttbr(struct apple_dart_stream_map *stream_map,
+				     u8 idx)
+{
+	int sid;
+
+	for_each_set_bit(sid, &stream_map->sidmap, DART_MAX_STREAMS)
+		writel(0, stream_map->dart->regs + DART_TTBR(sid, idx));
+}
+
+static void
+apple_dart_hw_clear_all_ttbrs(struct apple_dart_stream_map *stream_map)
+{
+	int i;
+
+	for (i = 0; i < DART_MAX_TTBR; ++i)
+		apple_dart_hw_clear_ttbr(stream_map, i);
+}
+
+static int
+apple_dart_hw_stream_command(struct apple_dart_stream_map *stream_map,
+			     u32 command)
+{
+	unsigned long flags;
+	int ret;
+	u32 command_reg;
+
+	spin_lock_irqsave(&stream_map->dart->lock, flags);
+
+	writel(stream_map->sidmap, stream_map->dart->regs + DART_STREAM_SELECT);
+	writel(command, stream_map->dart->regs + DART_STREAM_COMMAND);
+
+	ret = readl_poll_timeout_atomic(
+		stream_map->dart->regs + DART_STREAM_COMMAND, command_reg,
+		!(command_reg & DART_STREAM_COMMAND_BUSY), 1,
+		DART_STREAM_COMMAND_BUSY_TIMEOUT);
+
+	spin_unlock_irqrestore(&stream_map->dart->lock, flags);
+
+	if (ret) {
+		dev_err(stream_map->dart->dev,
+			"busy bit did not clear after command %x for streams %lx\n",
+			command, stream_map->sidmap);
+		return ret;
+	}
+
+	return 0;
+}
+
+static int
+apple_dart_hw_invalidate_tlb(struct apple_dart_stream_map *stream_map)
+{
+	return apple_dart_hw_stream_command(stream_map,
+					    DART_STREAM_COMMAND_INVALIDATE);
+}
+
+static int apple_dart_hw_reset(struct apple_dart *dart)
+{
+	u32 config;
+	struct apple_dart_stream_map stream_map;
+
+	config = readl(dart->regs + DART_CONFIG);
+	if (config & DART_CONFIG_LOCK) {
+		dev_err(dart->dev, "DART is locked down until reboot: %08x\n",
+			config);
+		return -EINVAL;
+	}
+
+	stream_map.dart = dart;
+	stream_map.sidmap = DART_STREAM_ALL;
+	apple_dart_hw_disable_dma(&stream_map);
+	apple_dart_hw_clear_all_ttbrs(&stream_map);
+
+	/* clear any pending errors before the interrupt is unmasked */
+	writel(readl(dart->regs + DART_ERROR), dart->regs + DART_ERROR);
+
+	return apple_dart_hw_invalidate_tlb(&stream_map);
+}
+
+static void apple_dart_domain_flush_tlb(struct apple_dart_domain *domain)
+{
+	int i;
+	struct apple_dart_atomic_stream_map *domain_stream_map;
+	struct apple_dart_stream_map stream_map;
+
+	for_each_stream_map(i, domain, domain_stream_map) {
+		stream_map.dart = domain_stream_map->dart;
+		stream_map.sidmap = atomic64_read(&domain_stream_map->sidmap);
+		apple_dart_hw_invalidate_tlb(&stream_map);
+	}
+}
+
+static void apple_dart_flush_iotlb_all(struct iommu_domain *domain)
+{
+	apple_dart_domain_flush_tlb(to_dart_domain(domain));
+}
+
+static void apple_dart_iotlb_sync(struct iommu_domain *domain,
+				  struct iommu_iotlb_gather *gather)
+{
+	apple_dart_domain_flush_tlb(to_dart_domain(domain));
+}
+
+static void apple_dart_iotlb_sync_map(struct iommu_domain *domain,
+				      unsigned long iova, size_t size)
+{
+	apple_dart_domain_flush_tlb(to_dart_domain(domain));
+}
+
+static void apple_dart_tlb_flush_all(void *cookie)
+{
+	apple_dart_domain_flush_tlb(cookie);
+}
+
+static void apple_dart_tlb_flush_walk(unsigned long iova, size_t size,
+				      size_t granule, void *cookie)
+{
+	apple_dart_domain_flush_tlb(cookie);
+}
+
+static const struct iommu_flush_ops apple_dart_tlb_ops = {
+	.tlb_flush_all = apple_dart_tlb_flush_all,
+	.tlb_flush_walk = apple_dart_tlb_flush_walk,
+};
+
+static phys_addr_t apple_dart_iova_to_phys(struct iommu_domain *domain,
+					   dma_addr_t iova)
+{
+	struct apple_dart_domain *dart_domain = to_dart_domain(domain);
+	struct io_pgtable_ops *ops = dart_domain->pgtbl_ops;
+
+	if (!ops)
+		return 0;
+
+	return ops->iova_to_phys(ops, iova);
+}
+
+static int apple_dart_map_pages(struct iommu_domain *domain, unsigned long iova,
+				phys_addr_t paddr, size_t pgsize,
+				size_t pgcount, int prot, gfp_t gfp,
+				size_t *mapped)
+{
+	struct apple_dart_domain *dart_domain = to_dart_domain(domain);
+	struct io_pgtable_ops *ops = dart_domain->pgtbl_ops;
+
+	if (!ops)
+		return -ENODEV;
+
+	return ops->map_pages(ops, iova, paddr, pgsize, pgcount, prot, gfp,
+			      mapped);
+}
+
+static size_t apple_dart_unmap_pages(struct iommu_domain *domain,
+				     unsigned long iova, size_t pgsize,
+				     size_t pgcount,
+				     struct iommu_iotlb_gather *gather)
+{
+	struct apple_dart_domain *dart_domain = to_dart_domain(domain);
+	struct io_pgtable_ops *ops = dart_domain->pgtbl_ops;
+
+	return ops->unmap_pages(ops, iova, pgsize, pgcount, gather);
+}
+
+static void
+apple_dart_setup_translation(struct apple_dart_domain *domain,
+			     struct apple_dart_stream_map *stream_map)
+{
+	int i;
+	struct io_pgtable_cfg *pgtbl_cfg =
+		&io_pgtable_ops_to_pgtable(domain->pgtbl_ops)->cfg;
+
+	for (i = 0; i < pgtbl_cfg->apple_dart_cfg.n_ttbrs; ++i)
+		apple_dart_hw_set_ttbr(stream_map, i,
+				       pgtbl_cfg->apple_dart_cfg.ttbr[i]);
+	for (; i < DART_MAX_TTBR; ++i)
+		apple_dart_hw_clear_ttbr(stream_map, i);
+
+	apple_dart_hw_enable_translation(stream_map);
+	apple_dart_hw_invalidate_tlb(stream_map);
+}
+
+static int apple_dart_finalize_domain(struct iommu_domain *domain,
+				      struct apple_dart_master_cfg *cfg)
+{
+	struct apple_dart_domain *dart_domain = to_dart_domain(domain);
+	struct apple_dart *dart = cfg->stream_maps[0].dart;
+	struct io_pgtable_cfg pgtbl_cfg;
+	int ret = 0;
+	int i;
+
+	mutex_lock(&dart_domain->init_lock);
+
+	if (dart_domain->finalized)
+		goto done;
+
+	for (i = 0; i < MAX_DARTS_PER_DEVICE; ++i) {
+		dart_domain->stream_maps[i].dart = cfg->stream_maps[i].dart;
+		atomic64_set(&dart_domain->stream_maps[i].sidmap,
+			     cfg->stream_maps[i].sidmap);
+	}
+
+	pgtbl_cfg = (struct io_pgtable_cfg){
+		.pgsize_bitmap = dart->pgsize,
+		.ias = 32,
+		.oas = 36,
+		.coherent_walk = 1,
+		.tlb = &apple_dart_tlb_ops,
+		.iommu_dev = dart->dev,
+	};
+
+	dart_domain->pgtbl_ops =
+		alloc_io_pgtable_ops(APPLE_DART, &pgtbl_cfg, domain);
+	if (!dart_domain->pgtbl_ops) {
+		ret = -ENOMEM;
+		goto done;
+	}
+
+	domain->pgsize_bitmap = pgtbl_cfg.pgsize_bitmap;
+	domain->geometry.aperture_start = 0;
+	domain->geometry.aperture_end = DMA_BIT_MASK(32);
+	domain->geometry.force_aperture = true;
+
+	dart_domain->finalized = true;
+
+done:
+	mutex_unlock(&dart_domain->init_lock);
+	return ret;
+}
+
+static int
+apple_dart_mod_streams(struct apple_dart_atomic_stream_map *domain_maps,
+		       struct apple_dart_stream_map *master_maps,
+		       bool add_streams)
+{
+	int i;
+
+	for (i = 0; i < MAX_DARTS_PER_DEVICE; ++i) {
+		if (domain_maps[i].dart != master_maps[i].dart)
+			return -EINVAL;
+	}
+
+	for (i = 0; i < MAX_DARTS_PER_DEVICE; ++i) {
+		if (!domain_maps[i].dart)
+			break;
+		if (add_streams)
+			atomic64_or(master_maps[i].sidmap,
+				    &domain_maps[i].sidmap);
+		else
+			atomic64_and(~master_maps[i].sidmap,
+				     &domain_maps[i].sidmap);
+	}
+
+	return 0;
+}
+
+static int apple_dart_domain_add_streams(struct apple_dart_domain *domain,
+					 struct apple_dart_master_cfg *cfg)
+{
+	return apple_dart_mod_streams(domain->stream_maps, cfg->stream_maps,
+				      true);
+}
+
+static int apple_dart_domain_remove_streams(struct apple_dart_domain *domain,
+					    struct apple_dart_master_cfg *cfg)
+{
+	return apple_dart_mod_streams(domain->stream_maps, cfg->stream_maps,
+				      false);
+}
+
+static int apple_dart_attach_dev(struct iommu_domain *domain,
+				 struct device *dev)
+{
+	int ret, i;
+	struct apple_dart_stream_map *stream_map;
+	struct apple_dart_master_cfg *cfg = dev_iommu_priv_get(dev);
+	struct apple_dart_domain *dart_domain = to_dart_domain(domain);
+
+	if (cfg->stream_maps[0].dart->force_bypass &&
+	    domain->type != IOMMU_DOMAIN_IDENTITY)
+		return -EINVAL;
+	if (!cfg->stream_maps[0].dart->supports_bypass &&
+	    domain->type == IOMMU_DOMAIN_IDENTITY)
+		return -EINVAL;
+
+	ret = apple_dart_finalize_domain(domain, cfg);
+	if (ret)
+		return ret;
+
+	switch (domain->type) {
+	case IOMMU_DOMAIN_DMA:
+	case IOMMU_DOMAIN_UNMANAGED:
+		ret = apple_dart_domain_add_streams(dart_domain, cfg);
+		if (ret)
+			return ret;
+
+		for_each_stream_map(i, cfg, stream_map)
+			apple_dart_setup_translation(dart_domain, stream_map);
+		break;
+	case IOMMU_DOMAIN_BLOCKED:
+		for_each_stream_map(i, cfg, stream_map)
+			apple_dart_hw_disable_dma(stream_map);
+		break;
+	case IOMMU_DOMAIN_IDENTITY:
+		for_each_stream_map(i, cfg, stream_map)
+			apple_dart_hw_enable_bypass(stream_map);
+		break;
+	}
+
+	return ret;
+}
+
+static void apple_dart_detach_dev(struct iommu_domain *domain,
+				  struct device *dev)
+{
+	int i;
+	struct apple_dart_stream_map *stream_map;
+	struct apple_dart_master_cfg *cfg = dev_iommu_priv_get(dev);
+	struct apple_dart_domain *dart_domain = to_dart_domain(domain);
+
+	for_each_stream_map(i, cfg, stream_map)
+		apple_dart_hw_disable_dma(stream_map);
+
+	if (domain->type == IOMMU_DOMAIN_DMA ||
+	    domain->type == IOMMU_DOMAIN_UNMANAGED)
+		apple_dart_domain_remove_streams(dart_domain, cfg);
+}
+
+static struct iommu_device *apple_dart_probe_device(struct device *dev)
+{
+	struct apple_dart_master_cfg *cfg = dev_iommu_priv_get(dev);
+	struct apple_dart_stream_map *stream_map;
+	int i;
+
+	if (!cfg)
+		return ERR_PTR(-ENODEV);
+
+	for_each_stream_map(i, cfg, stream_map)
+		device_link_add(
+			dev, stream_map->dart->dev,
+			DL_FLAG_PM_RUNTIME | DL_FLAG_AUTOREMOVE_SUPPLIER);
+
+	return &cfg->stream_maps[0].dart->iommu;
+}
+
+static void apple_dart_release_device(struct device *dev)
+{
+	struct apple_dart_master_cfg *cfg = dev_iommu_priv_get(dev);
+
+	if (!cfg)
+		return;
+
+	dev_iommu_priv_set(dev, NULL);
+	kfree(cfg);
+}
+
+static struct iommu_domain *apple_dart_domain_alloc(unsigned int type)
+{
+	struct apple_dart_domain *dart_domain;
+
+	if (type != IOMMU_DOMAIN_DMA && type != IOMMU_DOMAIN_UNMANAGED &&
+	    type != IOMMU_DOMAIN_IDENTITY && type != IOMMU_DOMAIN_BLOCKED)
+		return NULL;
+
+	dart_domain = kzalloc(sizeof(*dart_domain), GFP_KERNEL);
+	if (!dart_domain)
+		return NULL;
+
+	iommu_get_dma_cookie(&dart_domain->domain);
+	mutex_init(&dart_domain->init_lock);
+
+	/* no need to allocate pgtbl_ops or do any other finalization steps */
+	if (type == IOMMU_DOMAIN_IDENTITY || type == IOMMU_DOMAIN_BLOCKED)
+		dart_domain->finalized = true;
+
+	return &dart_domain->domain;
+}
+
+static void apple_dart_domain_free(struct iommu_domain *domain)
+{
+	struct apple_dart_domain *dart_domain = to_dart_domain(domain);
+
+	if (dart_domain->pgtbl_ops)
+		free_io_pgtable_ops(dart_domain->pgtbl_ops);
+
+	kfree(dart_domain);
+}
+
+static int apple_dart_of_xlate(struct device *dev, struct of_phandle_args *args)
+{
+	struct apple_dart_master_cfg *cfg = dev_iommu_priv_get(dev);
+	struct platform_device *iommu_pdev = of_find_device_by_node(args->np);
+	struct apple_dart *dart = platform_get_drvdata(iommu_pdev);
+	struct apple_dart *cfg_dart;
+	int i, sid;
+
+	if (args->args_count != 1)
+		return -EINVAL;
+	sid = args->args[0];
+
+	if (!cfg)
+		cfg = kzalloc(sizeof(*cfg), GFP_KERNEL);
+	if (!cfg)
+		return -ENOMEM;
+	dev_iommu_priv_set(dev, cfg);
+
+	cfg_dart = cfg->stream_maps[0].dart;
+	if (cfg_dart) {
+		if (cfg_dart->supports_bypass != dart->supports_bypass)
+			return -EINVAL;
+		if (cfg_dart->force_bypass != dart->force_bypass)
+			return -EINVAL;
+		if (cfg_dart->pgsize != dart->pgsize)
+			return -EINVAL;
+	}
+
+	for (i = 0; i < MAX_DARTS_PER_DEVICE; ++i) {
+		if (cfg->stream_maps[i].dart == dart) {
+			cfg->stream_maps[i].sidmap |= 1 << sid;
+			return 0;
+		}
+	}
+	for (i = 0; i < MAX_DARTS_PER_DEVICE; ++i) {
+		if (!cfg->stream_maps[i].dart) {
+			cfg->stream_maps[i].dart = dart;
+			cfg->stream_maps[i].sidmap = 1 << sid;
+			return 0;
+		}
+	}
+
+	return -EINVAL;
+}
+
+static struct iommu_group *apple_dart_device_group(struct device *dev)
+{
+	static DEFINE_MUTEX(lock);
+	int i, sid;
+	struct apple_dart_master_cfg *cfg = dev_iommu_priv_get(dev);
+	struct apple_dart_stream_map *stream_map;
+	struct iommu_group *group = NULL;
+	struct iommu_group *res = ERR_PTR(-EINVAL);
+
+	mutex_lock(&lock);
+
+	for_each_stream_map(i, cfg, stream_map) {
+		for_each_set_bit(sid, &stream_map->sidmap, DART_MAX_STREAMS) {
+			struct iommu_group *stream_group =
+				stream_map->dart->sid2group[sid];
+
+			if (group && group != stream_group) {
+				res = ERR_PTR(-EINVAL);
+				goto out;
+			}
+
+			group = stream_group;
+		}
+	}
+
+	if (group) {
+		res = iommu_group_ref_get(group);
+		goto out;
+	}
+
+#ifdef CONFIG_PCI
+	if (dev_is_pci(dev))
+		group = pci_device_group(dev);
+	else
+#endif
+		group = generic_device_group(dev);
+
+	for_each_stream_map(i, cfg, stream_map)
+		for_each_set_bit(sid, &stream_map->sidmap, DART_MAX_STREAMS)
+			stream_map->dart->sid2group[sid] = group;
+
+	res = group;
+
+out:
+	mutex_unlock(&lock);
+	return res;
+}
+
+static int apple_dart_def_domain_type(struct device *dev)
+{
+	struct apple_dart_master_cfg *cfg = dev_iommu_priv_get(dev);
+
+	if (cfg->stream_maps[0].dart->force_bypass)
+		return IOMMU_DOMAIN_IDENTITY;
+	if (!cfg->stream_maps[0].dart->supports_bypass)
+		return IOMMU_DOMAIN_DMA;
+
+	return 0;
+}
+
+static const struct iommu_ops apple_dart_iommu_ops = {
+	.domain_alloc = apple_dart_domain_alloc,
+	.domain_free = apple_dart_domain_free,
+	.attach_dev = apple_dart_attach_dev,
+	.detach_dev = apple_dart_detach_dev,
+	.map_pages = apple_dart_map_pages,
+	.unmap_pages = apple_dart_unmap_pages,
+	.flush_iotlb_all = apple_dart_flush_iotlb_all,
+	.iotlb_sync = apple_dart_iotlb_sync,
+	.iotlb_sync_map = apple_dart_iotlb_sync_map,
+	.iova_to_phys = apple_dart_iova_to_phys,
+	.probe_device = apple_dart_probe_device,
+	.release_device = apple_dart_release_device,
+	.device_group = apple_dart_device_group,
+	.of_xlate = apple_dart_of_xlate,
+	.def_domain_type = apple_dart_def_domain_type,
+	.pgsize_bitmap = -1UL, /* Restricted during dart probe */
+};
+
+static irqreturn_t apple_dart_irq(int irq, void *dev)
+{
+	struct apple_dart *dart = dev;
+	const char *fault_name = NULL;
+	u32 error = readl(dart->regs + DART_ERROR);
+	u32 error_code = FIELD_GET(DART_ERROR_CODE, error);
+	u32 addr_lo = readl(dart->regs + DART_ERROR_ADDR_LO);
+	u32 addr_hi = readl(dart->regs + DART_ERROR_ADDR_HI);
+	u64 addr = addr_lo | (((u64)addr_hi) << 32);
+	u8 stream_idx = FIELD_GET(DART_ERROR_STREAM, error);
+
+	if (!(error & DART_ERROR_FLAG))
+		return IRQ_NONE;
+
+	/* there should only be a single bit set but let's use == to be sure */
+	if (error_code == DART_ERROR_READ_FAULT)
+		fault_name = "READ FAULT";
+	else if (error_code == DART_ERROR_WRITE_FAULT)
+		fault_name = "WRITE FAULT";
+	else if (error_code == DART_ERROR_NO_PTE)
+		fault_name = "NO PTE FOR IOVA";
+	else if (error_code == DART_ERROR_NO_PMD)
+		fault_name = "NO PMD FOR IOVA";
+	else if (error_code == DART_ERROR_NO_TTBR)
+		fault_name = "NO TTBR FOR IOVA";
+	else
+		fault_name = "unknown";
+
+	dev_err_ratelimited(
+		dart->dev,
+		"translation fault: status:0x%x stream:%d code:0x%x (%s) at 0x%llx",
+		error, stream_idx, error_code, fault_name, addr);
+
+	writel(error, dart->regs + DART_ERROR);
+	return IRQ_HANDLED;
+}
+
+static int apple_dart_set_bus_ops(const struct iommu_ops *ops)
+{
+	int ret;
+
+	if (!iommu_present(&platform_bus_type)) {
+		ret = bus_set_iommu(&platform_bus_type, ops);
+		if (ret)
+			return ret;
+	}
+#ifdef CONFIG_PCI
+	if (!iommu_present(&pci_bus_type)) {
+		ret = bus_set_iommu(&pci_bus_type, ops);
+		if (ret) {
+			bus_set_iommu(&platform_bus_type, NULL);
+			return ret;
+		}
+	}
+#endif
+	return 0;
+}
+
+static int apple_dart_probe(struct platform_device *pdev)
+{
+	int ret;
+	u32 dart_params[2];
+	struct resource *res;
+	struct apple_dart *dart;
+	struct device *dev = &pdev->dev;
+
+	dart = devm_kzalloc(dev, sizeof(*dart), GFP_KERNEL);
+	if (!dart)
+		return -ENOMEM;
+
+	dart->dev = dev;
+	spin_lock_init(&dart->lock);
+
+	res = platform_get_resource(pdev, IORESOURCE_MEM, 0);
+	if (resource_size(res) < 0x4000) {
+		dev_err(dev, "MMIO region too small (%pr)\n", res);
+		return -EINVAL;
+	}
+
+	dart->regs = devm_ioremap_resource(dev, res);
+	if (IS_ERR(dart->regs))
+		return PTR_ERR(dart->regs);
+
+	dart->irq = platform_get_irq(pdev, 0);
+	if (dart->irq < 0)
+		return -ENODEV;
+
+	ret = devm_clk_bulk_get_all(dev, &dart->clks);
+	if (ret < 0)
+		return ret;
+	dart->num_clks = ret;
+
+	ret = clk_bulk_prepare_enable(dart->num_clks, dart->clks);
+	if (ret)
+		return ret;
+
+	ret = apple_dart_hw_reset(dart);
+	if (ret)
+		goto err_clk_disable;
+
+	dart_params[0] = readl(dart->regs + DART_PARAMS1);
+	dart_params[1] = readl(dart->regs + DART_PARAMS2);
+	dart->pgsize = 1 << FIELD_GET(DART_PARAMS_PAGE_SHIFT, dart_params[0]);
+	dart->supports_bypass = dart_params[1] & DART_PARAMS_BYPASS_SUPPORT;
+	dart->force_bypass = dart->pgsize > PAGE_SIZE;
+
+	ret = request_irq(dart->irq, apple_dart_irq, IRQF_SHARED,
+			  "apple-dart fault handler", dart);
+	if (ret)
+		goto err_clk_disable;
+
+	platform_set_drvdata(pdev, dart);
+
+	ret = apple_dart_set_bus_ops(&apple_dart_iommu_ops);
+	if (ret)
+		goto err_free_irq;
+
+	ret = iommu_device_sysfs_add(&dart->iommu, dev, NULL, "apple-dart.%s",
+				     dev_name(&pdev->dev));
+	if (ret)
+		goto err_remove_bus_ops;
+
+	ret = iommu_device_register(&dart->iommu, &apple_dart_iommu_ops, dev);
+	if (ret)
+		goto err_sysfs_remove;
+
+	dev_info(
+		&pdev->dev,
+		"DART [pagesize %x, bypass support: %d, bypass forced: %d] initialized\n",
+		dart->pgsize, dart->supports_bypass, dart->force_bypass);
+	return 0;
+
+err_sysfs_remove:
+	iommu_device_sysfs_remove(&dart->iommu);
+err_remove_bus_ops:
+	apple_dart_set_bus_ops(NULL);
+err_free_irq:
+	free_irq(dart->irq, dart);
+err_clk_disable:
+	clk_bulk_disable_unprepare(dart->num_clks, dart->clks);
+
+	return ret;
+}
+
+static int apple_dart_remove(struct platform_device *pdev)
+{
+	struct apple_dart *dart = platform_get_drvdata(pdev);
+
+	apple_dart_hw_reset(dart);
+	free_irq(dart->irq, dart);
+	apple_dart_set_bus_ops(NULL);
+
+	iommu_device_unregister(&dart->iommu);
+	iommu_device_sysfs_remove(&dart->iommu);
+
+	clk_bulk_disable_unprepare(dart->num_clks, dart->clks);
+
+	return 0;
+}
+
+static const struct of_device_id apple_dart_of_match[] = {
+	{ .compatible = "apple,t8103-dart", .data = NULL },
+	{},
+};
+MODULE_DEVICE_TABLE(of, apple_dart_of_match);
+
+static struct platform_driver apple_dart_driver = {
+	.driver	= {
+		.name			= "apple-dart",
+		.of_match_table		= apple_dart_of_match,
+		.suppress_bind_attrs    = true,
+	},
+	.probe	= apple_dart_probe,
+	.remove	= apple_dart_remove,
+};
+
+module_platform_driver(apple_dart_driver);
+
+MODULE_DESCRIPTION("IOMMU API for Apple's DART");
+MODULE_AUTHOR("Sven Peter <sven@svenpeter.dev>");
+MODULE_LICENSE("GPL v2");
diff --git a/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c b/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c
index 235f9bdaeaf2..a388e318f86e 100644
--- a/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c
+++ b/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c
@@ -335,10 +335,14 @@ static int arm_smmu_cmdq_build_cmd(u64 *cmd, struct arm_smmu_cmdq_ent *ent)
 	return 0;
 }
 
+static struct arm_smmu_cmdq *arm_smmu_get_cmdq(struct arm_smmu_device *smmu)
+{
+	return &smmu->cmdq;
+}
+
 static void arm_smmu_cmdq_build_sync_cmd(u64 *cmd, struct arm_smmu_device *smmu,
-					 u32 prod)
+					 struct arm_smmu_queue *q, u32 prod)
 {
-	struct arm_smmu_queue *q = &smmu->cmdq.q;
 	struct arm_smmu_cmdq_ent ent = {
 		.opcode = CMDQ_OP_CMD_SYNC,
 	};
@@ -355,7 +359,8 @@ static void arm_smmu_cmdq_build_sync_cmd(u64 *cmd, struct arm_smmu_device *smmu,
 	arm_smmu_cmdq_build_cmd(cmd, &ent);
 }
 
-static void arm_smmu_cmdq_skip_err(struct arm_smmu_device *smmu)
+static void __arm_smmu_cmdq_skip_err(struct arm_smmu_device *smmu,
+				     struct arm_smmu_queue *q)
 {
 	static const char * const cerror_str[] = {
 		[CMDQ_ERR_CERROR_NONE_IDX]	= "No error",
@@ -366,7 +371,6 @@ static void arm_smmu_cmdq_skip_err(struct arm_smmu_device *smmu)
 
 	int i;
 	u64 cmd[CMDQ_ENT_DWORDS];
-	struct arm_smmu_queue *q = &smmu->cmdq.q;
 	u32 cons = readl_relaxed(q->cons_reg);
 	u32 idx = FIELD_GET(CMDQ_CONS_ERR, cons);
 	struct arm_smmu_cmdq_ent cmd_sync = {
@@ -413,6 +417,11 @@ static void arm_smmu_cmdq_skip_err(struct arm_smmu_device *smmu)
 	queue_write(Q_ENT(q, cons), cmd, q->ent_dwords);
 }
 
+static void arm_smmu_cmdq_skip_err(struct arm_smmu_device *smmu)
+{
+	__arm_smmu_cmdq_skip_err(smmu, &smmu->cmdq.q);
+}
+
 /*
  * Command queue locking.
  * This is a form of bastardised rwlock with the following major changes:
@@ -579,7 +588,7 @@ static int arm_smmu_cmdq_poll_until_not_full(struct arm_smmu_device *smmu,
 {
 	unsigned long flags;
 	struct arm_smmu_queue_poll qp;
-	struct arm_smmu_cmdq *cmdq = &smmu->cmdq;
+	struct arm_smmu_cmdq *cmdq = arm_smmu_get_cmdq(smmu);
 	int ret = 0;
 
 	/*
@@ -595,7 +604,7 @@ static int arm_smmu_cmdq_poll_until_not_full(struct arm_smmu_device *smmu,
 
 	queue_poll_init(smmu, &qp);
 	do {
-		llq->val = READ_ONCE(smmu->cmdq.q.llq.val);
+		llq->val = READ_ONCE(cmdq->q.llq.val);
 		if (!queue_full(llq))
 			break;
 
@@ -614,7 +623,7 @@ static int __arm_smmu_cmdq_poll_until_msi(struct arm_smmu_device *smmu,
 {
 	int ret = 0;
 	struct arm_smmu_queue_poll qp;
-	struct arm_smmu_cmdq *cmdq = &smmu->cmdq;
+	struct arm_smmu_cmdq *cmdq = arm_smmu_get_cmdq(smmu);
 	u32 *cmd = (u32 *)(Q_ENT(&cmdq->q, llq->prod));
 
 	queue_poll_init(smmu, &qp);
@@ -637,12 +646,12 @@ static int __arm_smmu_cmdq_poll_until_consumed(struct arm_smmu_device *smmu,
 					       struct arm_smmu_ll_queue *llq)
 {
 	struct arm_smmu_queue_poll qp;
-	struct arm_smmu_cmdq *cmdq = &smmu->cmdq;
+	struct arm_smmu_cmdq *cmdq = arm_smmu_get_cmdq(smmu);
 	u32 prod = llq->prod;
 	int ret = 0;
 
 	queue_poll_init(smmu, &qp);
-	llq->val = READ_ONCE(smmu->cmdq.q.llq.val);
+	llq->val = READ_ONCE(cmdq->q.llq.val);
 	do {
 		if (queue_consumed(llq, prod))
 			break;
@@ -732,12 +741,12 @@ static int arm_smmu_cmdq_issue_cmdlist(struct arm_smmu_device *smmu,
 	u32 prod;
 	unsigned long flags;
 	bool owner;
-	struct arm_smmu_cmdq *cmdq = &smmu->cmdq;
-	struct arm_smmu_ll_queue llq = {
-		.max_n_shift = cmdq->q.llq.max_n_shift,
-	}, head = llq;
+	struct arm_smmu_cmdq *cmdq = arm_smmu_get_cmdq(smmu);
+	struct arm_smmu_ll_queue llq, head;
 	int ret = 0;
 
+	llq.max_n_shift = cmdq->q.llq.max_n_shift;
+
 	/* 1. Allocate some space in the queue */
 	local_irq_save(flags);
 	llq.val = READ_ONCE(cmdq->q.llq.val);
@@ -772,7 +781,7 @@ static int arm_smmu_cmdq_issue_cmdlist(struct arm_smmu_device *smmu,
 	arm_smmu_cmdq_write_entries(cmdq, cmds, llq.prod, n);
 	if (sync) {
 		prod = queue_inc_prod_n(&llq, n);
-		arm_smmu_cmdq_build_sync_cmd(cmd_sync, smmu, prod);
+		arm_smmu_cmdq_build_sync_cmd(cmd_sync, smmu, &cmdq->q, prod);
 		queue_write(Q_ENT(&cmdq->q, prod), cmd_sync, CMDQ_ENT_DWORDS);
 
 		/*
@@ -845,8 +854,9 @@ static int arm_smmu_cmdq_issue_cmdlist(struct arm_smmu_device *smmu,
 	return ret;
 }
 
-static int arm_smmu_cmdq_issue_cmd(struct arm_smmu_device *smmu,
-				   struct arm_smmu_cmdq_ent *ent)
+static int __arm_smmu_cmdq_issue_cmd(struct arm_smmu_device *smmu,
+				     struct arm_smmu_cmdq_ent *ent,
+				     bool sync)
 {
 	u64 cmd[CMDQ_ENT_DWORDS];
 
@@ -856,12 +866,19 @@ static int arm_smmu_cmdq_issue_cmd(struct arm_smmu_device *smmu,
 		return -EINVAL;
 	}
 
-	return arm_smmu_cmdq_issue_cmdlist(smmu, cmd, 1, false);
+	return arm_smmu_cmdq_issue_cmdlist(smmu, cmd, 1, sync);
+}
+
+static int arm_smmu_cmdq_issue_cmd(struct arm_smmu_device *smmu,
+				   struct arm_smmu_cmdq_ent *ent)
+{
+	return __arm_smmu_cmdq_issue_cmd(smmu, ent, false);
 }
 
-static int arm_smmu_cmdq_issue_sync(struct arm_smmu_device *smmu)
+static int arm_smmu_cmdq_issue_cmd_with_sync(struct arm_smmu_device *smmu,
+					     struct arm_smmu_cmdq_ent *ent)
 {
-	return arm_smmu_cmdq_issue_cmdlist(smmu, NULL, 0, true);
+	return __arm_smmu_cmdq_issue_cmd(smmu, ent, true);
 }
 
 static void arm_smmu_cmdq_batch_add(struct arm_smmu_device *smmu,
@@ -929,8 +946,7 @@ void arm_smmu_tlb_inv_asid(struct arm_smmu_device *smmu, u16 asid)
 		.tlbi.asid = asid,
 	};
 
-	arm_smmu_cmdq_issue_cmd(smmu, &cmd);
-	arm_smmu_cmdq_issue_sync(smmu);
+	arm_smmu_cmdq_issue_cmd_with_sync(smmu, &cmd);
 }
 
 static void arm_smmu_sync_cd(struct arm_smmu_domain *smmu_domain,
@@ -939,7 +955,7 @@ static void arm_smmu_sync_cd(struct arm_smmu_domain *smmu_domain,
 	size_t i;
 	unsigned long flags;
 	struct arm_smmu_master *master;
-	struct arm_smmu_cmdq_batch cmds = {};
+	struct arm_smmu_cmdq_batch cmds;
 	struct arm_smmu_device *smmu = smmu_domain->smmu;
 	struct arm_smmu_cmdq_ent cmd = {
 		.opcode	= CMDQ_OP_CFGI_CD,
@@ -949,6 +965,8 @@ static void arm_smmu_sync_cd(struct arm_smmu_domain *smmu_domain,
 		},
 	};
 
+	cmds.num = 0;
+
 	spin_lock_irqsave(&smmu_domain->devices_lock, flags);
 	list_for_each_entry(master, &smmu_domain->devices, domain_head) {
 		for (i = 0; i < master->num_streams; i++) {
@@ -1211,8 +1229,7 @@ static void arm_smmu_sync_ste_for_sid(struct arm_smmu_device *smmu, u32 sid)
 		},
 	};
 
-	arm_smmu_cmdq_issue_cmd(smmu, &cmd);
-	arm_smmu_cmdq_issue_sync(smmu);
+	arm_smmu_cmdq_issue_cmd_with_sync(smmu, &cmd);
 }
 
 static void arm_smmu_write_strtab_ent(struct arm_smmu_master *master, u32 sid,
@@ -1747,15 +1764,16 @@ static int arm_smmu_atc_inv_master(struct arm_smmu_master *master)
 {
 	int i;
 	struct arm_smmu_cmdq_ent cmd;
+	struct arm_smmu_cmdq_batch cmds = {};
 
 	arm_smmu_atc_inv_to_cmd(0, 0, 0, &cmd);
 
 	for (i = 0; i < master->num_streams; i++) {
 		cmd.atc.sid = master->streams[i].id;
-		arm_smmu_cmdq_issue_cmd(master->smmu, &cmd);
+		arm_smmu_cmdq_batch_add(master->smmu, &cmds, &cmd);
 	}
 
-	return arm_smmu_cmdq_issue_sync(master->smmu);
+	return arm_smmu_cmdq_batch_submit(master->smmu, &cmds);
 }
 
 int arm_smmu_atc_inv_domain(struct arm_smmu_domain *smmu_domain, int ssid,
@@ -1765,7 +1783,7 @@ int arm_smmu_atc_inv_domain(struct arm_smmu_domain *smmu_domain, int ssid,
 	unsigned long flags;
 	struct arm_smmu_cmdq_ent cmd;
 	struct arm_smmu_master *master;
-	struct arm_smmu_cmdq_batch cmds = {};
+	struct arm_smmu_cmdq_batch cmds;
 
 	if (!(smmu_domain->smmu->features & ARM_SMMU_FEAT_ATS))
 		return 0;
@@ -1789,6 +1807,8 @@ int arm_smmu_atc_inv_domain(struct arm_smmu_domain *smmu_domain, int ssid,
 
 	arm_smmu_atc_inv_to_cmd(ssid, iova, size, &cmd);
 
+	cmds.num = 0;
+
 	spin_lock_irqsave(&smmu_domain->devices_lock, flags);
 	list_for_each_entry(master, &smmu_domain->devices, domain_head) {
 		if (!master->ats_enabled)
@@ -1823,8 +1843,7 @@ static void arm_smmu_tlb_inv_context(void *cookie)
 	} else {
 		cmd.opcode	= CMDQ_OP_TLBI_S12_VMALL;
 		cmd.tlbi.vmid	= smmu_domain->s2_cfg.vmid;
-		arm_smmu_cmdq_issue_cmd(smmu, &cmd);
-		arm_smmu_cmdq_issue_sync(smmu);
+		arm_smmu_cmdq_issue_cmd_with_sync(smmu, &cmd);
 	}
 	arm_smmu_atc_inv_domain(smmu_domain, 0, 0, 0);
 }
@@ -1837,7 +1856,7 @@ static void __arm_smmu_tlb_inv_range(struct arm_smmu_cmdq_ent *cmd,
 	struct arm_smmu_device *smmu = smmu_domain->smmu;
 	unsigned long end = iova + size, num_pages = 0, tg = 0;
 	size_t inv_range = granule;
-	struct arm_smmu_cmdq_batch cmds = {};
+	struct arm_smmu_cmdq_batch cmds;
 
 	if (!size)
 		return;
@@ -1855,6 +1874,8 @@ static void __arm_smmu_tlb_inv_range(struct arm_smmu_cmdq_ent *cmd,
 		num_pages = size >> tg;
 	}
 
+	cmds.num = 0;
+
 	while (iova < end) {
 		if (smmu->features & ARM_SMMU_FEAT_RANGE_INV) {
 			/*
@@ -1972,6 +1993,7 @@ static struct iommu_domain *arm_smmu_domain_alloc(unsigned type)
 
 	if (type != IOMMU_DOMAIN_UNMANAGED &&
 	    type != IOMMU_DOMAIN_DMA &&
+	    type != IOMMU_DOMAIN_DMA_FQ &&
 	    type != IOMMU_DOMAIN_IDENTITY)
 		return NULL;
 
@@ -1984,12 +2006,6 @@ static struct iommu_domain *arm_smmu_domain_alloc(unsigned type)
 	if (!smmu_domain)
 		return NULL;
 
-	if (type == IOMMU_DOMAIN_DMA &&
-	    iommu_get_dma_cookie(&smmu_domain->domain)) {
-		kfree(smmu_domain);
-		return NULL;
-	}
-
 	mutex_init(&smmu_domain->init_mutex);
 	INIT_LIST_HEAD(&smmu_domain->devices);
 	spin_lock_init(&smmu_domain->devices_lock);
@@ -2021,7 +2037,6 @@ static void arm_smmu_domain_free(struct iommu_domain *domain)
 	struct arm_smmu_domain *smmu_domain = to_smmu_domain(domain);
 	struct arm_smmu_device *smmu = smmu_domain->smmu;
 
-	iommu_put_dma_cookie(domain);
 	free_io_pgtable_ops(smmu_domain->pgtbl_ops);
 
 	/* Free the CD and ASID, if we allocated them */
@@ -2181,9 +2196,6 @@ static int arm_smmu_domain_finalise(struct iommu_domain *domain,
 		.iommu_dev	= smmu->dev,
 	};
 
-	if (!iommu_get_dma_strict(domain))
-		pgtbl_cfg.quirks |= IO_PGTABLE_QUIRK_NON_STRICT;
-
 	pgtbl_ops = alloc_io_pgtable_ops(fmt, &pgtbl_cfg, smmu_domain);
 	if (!pgtbl_ops)
 		return -ENOMEM;
@@ -2439,19 +2451,21 @@ out_unlock:
 	return ret;
 }
 
-static int arm_smmu_map(struct iommu_domain *domain, unsigned long iova,
-			phys_addr_t paddr, size_t size, int prot, gfp_t gfp)
+static int arm_smmu_map_pages(struct iommu_domain *domain, unsigned long iova,
+			      phys_addr_t paddr, size_t pgsize, size_t pgcount,
+			      int prot, gfp_t gfp, size_t *mapped)
 {
 	struct io_pgtable_ops *ops = to_smmu_domain(domain)->pgtbl_ops;
 
 	if (!ops)
 		return -ENODEV;
 
-	return ops->map(ops, iova, paddr, size, prot, gfp);
+	return ops->map_pages(ops, iova, paddr, pgsize, pgcount, prot, gfp, mapped);
 }
 
-static size_t arm_smmu_unmap(struct iommu_domain *domain, unsigned long iova,
-			     size_t size, struct iommu_iotlb_gather *gather)
+static size_t arm_smmu_unmap_pages(struct iommu_domain *domain, unsigned long iova,
+				   size_t pgsize, size_t pgcount,
+				   struct iommu_iotlb_gather *gather)
 {
 	struct arm_smmu_domain *smmu_domain = to_smmu_domain(domain);
 	struct io_pgtable_ops *ops = smmu_domain->pgtbl_ops;
@@ -2459,7 +2473,7 @@ static size_t arm_smmu_unmap(struct iommu_domain *domain, unsigned long iova,
 	if (!ops)
 		return 0;
 
-	return ops->unmap(ops, iova, size, gather);
+	return ops->unmap_pages(ops, iova, pgsize, pgcount, gather);
 }
 
 static void arm_smmu_flush_iotlb_all(struct iommu_domain *domain)
@@ -2488,9 +2502,6 @@ arm_smmu_iova_to_phys(struct iommu_domain *domain, dma_addr_t iova)
 {
 	struct io_pgtable_ops *ops = to_smmu_domain(domain)->pgtbl_ops;
 
-	if (domain->type == IOMMU_DOMAIN_IDENTITY)
-		return iova;
-
 	if (!ops)
 		return 0;
 
@@ -2825,8 +2836,8 @@ static struct iommu_ops arm_smmu_ops = {
 	.domain_alloc		= arm_smmu_domain_alloc,
 	.domain_free		= arm_smmu_domain_free,
 	.attach_dev		= arm_smmu_attach_dev,
-	.map			= arm_smmu_map,
-	.unmap			= arm_smmu_unmap,
+	.map_pages		= arm_smmu_map_pages,
+	.unmap_pages		= arm_smmu_unmap_pages,
 	.flush_iotlb_all	= arm_smmu_flush_iotlb_all,
 	.iotlb_sync		= arm_smmu_iotlb_sync,
 	.iova_to_phys		= arm_smmu_iova_to_phys,
@@ -3338,18 +3349,16 @@ static int arm_smmu_device_reset(struct arm_smmu_device *smmu, bool bypass)
 
 	/* Invalidate any cached configuration */
 	cmd.opcode = CMDQ_OP_CFGI_ALL;
-	arm_smmu_cmdq_issue_cmd(smmu, &cmd);
-	arm_smmu_cmdq_issue_sync(smmu);
+	arm_smmu_cmdq_issue_cmd_with_sync(smmu, &cmd);
 
 	/* Invalidate any stale TLB entries */
 	if (smmu->features & ARM_SMMU_FEAT_HYP) {
 		cmd.opcode = CMDQ_OP_TLBI_EL2_ALL;
-		arm_smmu_cmdq_issue_cmd(smmu, &cmd);
+		arm_smmu_cmdq_issue_cmd_with_sync(smmu, &cmd);
 	}
 
 	cmd.opcode = CMDQ_OP_TLBI_NSNH_ALL;
-	arm_smmu_cmdq_issue_cmd(smmu, &cmd);
-	arm_smmu_cmdq_issue_sync(smmu);
+	arm_smmu_cmdq_issue_cmd_with_sync(smmu, &cmd);
 
 	/* Event queue */
 	writeq_relaxed(smmu->evtq.q.q_base, smmu->base + ARM_SMMU_EVTQ_BASE);
diff --git a/drivers/iommu/arm/arm-smmu/arm-smmu-qcom.c b/drivers/iommu/arm/arm-smmu/arm-smmu-qcom.c
index 9b9d13ec5a88..55690af1b25d 100644
--- a/drivers/iommu/arm/arm-smmu/arm-smmu-qcom.c
+++ b/drivers/iommu/arm/arm-smmu/arm-smmu-qcom.c
@@ -193,6 +193,8 @@ static int qcom_adreno_smmu_init_context(struct arm_smmu_domain *smmu_domain,
 {
 	struct adreno_smmu_priv *priv;
 
+	smmu_domain->cfg.flush_walk_prefer_tlbiasid = true;
+
 	/* Only enable split pagetables for the GPU device (SID 0) */
 	if (!qcom_adreno_smmu_is_gpu_device(dev))
 		return 0;
@@ -235,6 +237,14 @@ static const struct of_device_id qcom_smmu_client_of_match[] __maybe_unused = {
 	{ }
 };
 
+static int qcom_smmu_init_context(struct arm_smmu_domain *smmu_domain,
+		struct io_pgtable_cfg *pgtbl_cfg, struct device *dev)
+{
+	smmu_domain->cfg.flush_walk_prefer_tlbiasid = true;
+
+	return 0;
+}
+
 static int qcom_smmu_cfg_probe(struct arm_smmu_device *smmu)
 {
 	unsigned int last_s2cr = ARM_SMMU_GR0_S2CR(smmu->num_mapping_groups - 1);
@@ -358,6 +368,7 @@ static int qcom_smmu500_reset(struct arm_smmu_device *smmu)
 }
 
 static const struct arm_smmu_impl qcom_smmu_impl = {
+	.init_context = qcom_smmu_init_context,
 	.cfg_probe = qcom_smmu_cfg_probe,
 	.def_domain_type = qcom_smmu_def_domain_type,
 	.reset = qcom_smmu500_reset,
diff --git a/drivers/iommu/arm/arm-smmu/arm-smmu.c b/drivers/iommu/arm/arm-smmu/arm-smmu.c
index f22dbeb1e510..4bc75c4ce402 100644
--- a/drivers/iommu/arm/arm-smmu/arm-smmu.c
+++ b/drivers/iommu/arm/arm-smmu/arm-smmu.c
@@ -327,9 +327,16 @@ static void arm_smmu_tlb_inv_range_s2(unsigned long iova, size_t size,
 static void arm_smmu_tlb_inv_walk_s1(unsigned long iova, size_t size,
 				     size_t granule, void *cookie)
 {
-	arm_smmu_tlb_inv_range_s1(iova, size, granule, cookie,
-				  ARM_SMMU_CB_S1_TLBIVA);
-	arm_smmu_tlb_sync_context(cookie);
+	struct arm_smmu_domain *smmu_domain = cookie;
+	struct arm_smmu_cfg *cfg = &smmu_domain->cfg;
+
+	if (cfg->flush_walk_prefer_tlbiasid) {
+		arm_smmu_tlb_inv_context_s1(cookie);
+	} else {
+		arm_smmu_tlb_inv_range_s1(iova, size, granule, cookie,
+					  ARM_SMMU_CB_S1_TLBIVA);
+		arm_smmu_tlb_sync_context(cookie);
+	}
 }
 
 static void arm_smmu_tlb_add_page_s1(struct iommu_iotlb_gather *gather,
@@ -765,9 +772,6 @@ static int arm_smmu_init_domain_context(struct iommu_domain *domain,
 		.iommu_dev	= smmu->dev,
 	};
 
-	if (!iommu_get_dma_strict(domain))
-		pgtbl_cfg.quirks |= IO_PGTABLE_QUIRK_NON_STRICT;
-
 	if (smmu->impl && smmu->impl->init_context) {
 		ret = smmu->impl->init_context(smmu_domain, &pgtbl_cfg, dev);
 		if (ret)
@@ -868,10 +872,11 @@ static struct iommu_domain *arm_smmu_domain_alloc(unsigned type)
 {
 	struct arm_smmu_domain *smmu_domain;
 
-	if (type != IOMMU_DOMAIN_UNMANAGED &&
-	    type != IOMMU_DOMAIN_DMA &&
-	    type != IOMMU_DOMAIN_IDENTITY)
-		return NULL;
+	if (type != IOMMU_DOMAIN_UNMANAGED && type != IOMMU_DOMAIN_IDENTITY) {
+		if (using_legacy_binding ||
+		    (type != IOMMU_DOMAIN_DMA && type != IOMMU_DOMAIN_DMA_FQ))
+			return NULL;
+	}
 	/*
 	 * Allocate the domain and initialise some of its data structures.
 	 * We can't really do anything meaningful until we've added a
@@ -881,12 +886,6 @@ static struct iommu_domain *arm_smmu_domain_alloc(unsigned type)
 	if (!smmu_domain)
 		return NULL;
 
-	if (type == IOMMU_DOMAIN_DMA && (using_legacy_binding ||
-	    iommu_get_dma_cookie(&smmu_domain->domain))) {
-		kfree(smmu_domain);
-		return NULL;
-	}
-
 	mutex_init(&smmu_domain->init_mutex);
 	spin_lock_init(&smmu_domain->cb_lock);
 
@@ -901,7 +900,6 @@ static void arm_smmu_domain_free(struct iommu_domain *domain)
 	 * Free the domain resources. We assume that all devices have
 	 * already been detached.
 	 */
-	iommu_put_dma_cookie(domain);
 	arm_smmu_destroy_domain_context(domain);
 	kfree(smmu_domain);
 }
@@ -1198,8 +1196,9 @@ rpm_put:
 	return ret;
 }
 
-static int arm_smmu_map(struct iommu_domain *domain, unsigned long iova,
-			phys_addr_t paddr, size_t size, int prot, gfp_t gfp)
+static int arm_smmu_map_pages(struct iommu_domain *domain, unsigned long iova,
+			      phys_addr_t paddr, size_t pgsize, size_t pgcount,
+			      int prot, gfp_t gfp, size_t *mapped)
 {
 	struct io_pgtable_ops *ops = to_smmu_domain(domain)->pgtbl_ops;
 	struct arm_smmu_device *smmu = to_smmu_domain(domain)->smmu;
@@ -1209,14 +1208,15 @@ static int arm_smmu_map(struct iommu_domain *domain, unsigned long iova,
 		return -ENODEV;
 
 	arm_smmu_rpm_get(smmu);
-	ret = ops->map(ops, iova, paddr, size, prot, gfp);
+	ret = ops->map_pages(ops, iova, paddr, pgsize, pgcount, prot, gfp, mapped);
 	arm_smmu_rpm_put(smmu);
 
 	return ret;
 }
 
-static size_t arm_smmu_unmap(struct iommu_domain *domain, unsigned long iova,
-			     size_t size, struct iommu_iotlb_gather *gather)
+static size_t arm_smmu_unmap_pages(struct iommu_domain *domain, unsigned long iova,
+				   size_t pgsize, size_t pgcount,
+				   struct iommu_iotlb_gather *iotlb_gather)
 {
 	struct io_pgtable_ops *ops = to_smmu_domain(domain)->pgtbl_ops;
 	struct arm_smmu_device *smmu = to_smmu_domain(domain)->smmu;
@@ -1226,7 +1226,7 @@ static size_t arm_smmu_unmap(struct iommu_domain *domain, unsigned long iova,
 		return 0;
 
 	arm_smmu_rpm_get(smmu);
-	ret = ops->unmap(ops, iova, size, gather);
+	ret = ops->unmap_pages(ops, iova, pgsize, pgcount, iotlb_gather);
 	arm_smmu_rpm_put(smmu);
 
 	return ret;
@@ -1320,9 +1320,6 @@ static phys_addr_t arm_smmu_iova_to_phys(struct iommu_domain *domain,
 	struct arm_smmu_domain *smmu_domain = to_smmu_domain(domain);
 	struct io_pgtable_ops *ops = smmu_domain->pgtbl_ops;
 
-	if (domain->type == IOMMU_DOMAIN_IDENTITY)
-		return iova;
-
 	if (!ops)
 		return 0;
 
@@ -1478,16 +1475,21 @@ static struct iommu_group *arm_smmu_device_group(struct device *dev)
 	struct iommu_group *group = NULL;
 	int i, idx;
 
+	mutex_lock(&smmu->stream_map_mutex);
 	for_each_cfg_sme(cfg, fwspec, i, idx) {
 		if (group && smmu->s2crs[idx].group &&
-		    group != smmu->s2crs[idx].group)
+		    group != smmu->s2crs[idx].group) {
+			mutex_unlock(&smmu->stream_map_mutex);
 			return ERR_PTR(-EINVAL);
+		}
 
 		group = smmu->s2crs[idx].group;
 	}
 
-	if (group)
+	if (group) {
+		mutex_unlock(&smmu->stream_map_mutex);
 		return iommu_group_ref_get(group);
+	}
 
 	if (dev_is_pci(dev))
 		group = pci_device_group(dev);
@@ -1501,6 +1503,7 @@ static struct iommu_group *arm_smmu_device_group(struct device *dev)
 		for_each_cfg_sme(cfg, fwspec, i, idx)
 			smmu->s2crs[idx].group = group;
 
+	mutex_unlock(&smmu->stream_map_mutex);
 	return group;
 }
 
@@ -1582,8 +1585,8 @@ static struct iommu_ops arm_smmu_ops = {
 	.domain_alloc		= arm_smmu_domain_alloc,
 	.domain_free		= arm_smmu_domain_free,
 	.attach_dev		= arm_smmu_attach_dev,
-	.map			= arm_smmu_map,
-	.unmap			= arm_smmu_unmap,
+	.map_pages		= arm_smmu_map_pages,
+	.unmap_pages		= arm_smmu_unmap_pages,
 	.flush_iotlb_all	= arm_smmu_flush_iotlb_all,
 	.iotlb_sync		= arm_smmu_iotlb_sync,
 	.iova_to_phys		= arm_smmu_iova_to_phys,
@@ -2281,18 +2284,38 @@ static int __maybe_unused arm_smmu_runtime_suspend(struct device *dev)
 
 static int __maybe_unused arm_smmu_pm_resume(struct device *dev)
 {
+	int ret;
+	struct arm_smmu_device *smmu = dev_get_drvdata(dev);
+
+	ret = clk_bulk_prepare(smmu->num_clks, smmu->clks);
+	if (ret)
+		return ret;
+
 	if (pm_runtime_suspended(dev))
 		return 0;
 
-	return arm_smmu_runtime_resume(dev);
+	ret = arm_smmu_runtime_resume(dev);
+	if (ret)
+		clk_bulk_unprepare(smmu->num_clks, smmu->clks);
+
+	return ret;
 }
 
 static int __maybe_unused arm_smmu_pm_suspend(struct device *dev)
 {
+	int ret = 0;
+	struct arm_smmu_device *smmu = dev_get_drvdata(dev);
+
 	if (pm_runtime_suspended(dev))
-		return 0;
+		goto clk_unprepare;
+
+	ret = arm_smmu_runtime_suspend(dev);
+	if (ret)
+		return ret;
 
-	return arm_smmu_runtime_suspend(dev);
+clk_unprepare:
+	clk_bulk_unprepare(smmu->num_clks, smmu->clks);
+	return ret;
 }
 
 static const struct dev_pm_ops arm_smmu_pm_ops = {
diff --git a/drivers/iommu/arm/arm-smmu/arm-smmu.h b/drivers/iommu/arm/arm-smmu/arm-smmu.h
index a50271595960..432de2f742c3 100644
--- a/drivers/iommu/arm/arm-smmu/arm-smmu.h
+++ b/drivers/iommu/arm/arm-smmu/arm-smmu.h
@@ -346,6 +346,7 @@ struct arm_smmu_cfg {
 	};
 	enum arm_smmu_cbar_type		cbar;
 	enum arm_smmu_context_fmt	fmt;
+	bool				flush_walk_prefer_tlbiasid;
 };
 #define ARM_SMMU_INVALID_IRPTNDX	0xff
 
diff --git a/drivers/iommu/arm/arm-smmu/qcom_iommu.c b/drivers/iommu/arm/arm-smmu/qcom_iommu.c
index 021cf8f65ffc..b91874cb6cf3 100644
--- a/drivers/iommu/arm/arm-smmu/qcom_iommu.c
+++ b/drivers/iommu/arm/arm-smmu/qcom_iommu.c
@@ -10,7 +10,6 @@
 #include <linux/bitfield.h>
 #include <linux/clk.h>
 #include <linux/delay.h>
-#include <linux/dma-iommu.h>
 #include <linux/dma-mapping.h>
 #include <linux/err.h>
 #include <linux/interrupt.h>
@@ -335,12 +334,6 @@ static struct iommu_domain *qcom_iommu_domain_alloc(unsigned type)
 	if (!qcom_domain)
 		return NULL;
 
-	if (type == IOMMU_DOMAIN_DMA &&
-	    iommu_get_dma_cookie(&qcom_domain->domain)) {
-		kfree(qcom_domain);
-		return NULL;
-	}
-
 	mutex_init(&qcom_domain->init_mutex);
 	spin_lock_init(&qcom_domain->pgtbl_lock);
 
@@ -351,8 +344,6 @@ static void qcom_iommu_domain_free(struct iommu_domain *domain)
 {
 	struct qcom_iommu_domain *qcom_domain = to_qcom_iommu_domain(domain);
 
-	iommu_put_dma_cookie(domain);
-
 	if (qcom_domain->iommu) {
 		/*
 		 * NOTE: unmap can be called after client device is powered
diff --git a/drivers/iommu/dma-iommu.c b/drivers/iommu/dma-iommu.c
index 6f0df629353f..896bea04c347 100644
--- a/drivers/iommu/dma-iommu.c
+++ b/drivers/iommu/dma-iommu.c
@@ -317,6 +317,30 @@ static bool dev_is_untrusted(struct device *dev)
 	return dev_is_pci(dev) && to_pci_dev(dev)->untrusted;
 }
 
+/* sysfs updates are serialised by the mutex of the group owning @domain */
+int iommu_dma_init_fq(struct iommu_domain *domain)
+{
+	struct iommu_dma_cookie *cookie = domain->iova_cookie;
+	int ret;
+
+	if (cookie->fq_domain)
+		return 0;
+
+	ret = init_iova_flush_queue(&cookie->iovad, iommu_dma_flush_iotlb_all,
+				    iommu_dma_entry_dtor);
+	if (ret) {
+		pr_warn("iova flush queue initialization failed\n");
+		return ret;
+	}
+	/*
+	 * Prevent incomplete iovad->fq being observable. Pairs with path from
+	 * __iommu_dma_unmap() through iommu_dma_free_iova() to queue_iova()
+	 */
+	smp_wmb();
+	WRITE_ONCE(cookie->fq_domain, domain);
+	return 0;
+}
+
 /**
  * iommu_dma_init_domain - Initialise a DMA mapping domain
  * @domain: IOMMU domain previously prepared by iommu_get_dma_cookie()
@@ -370,17 +394,9 @@ static int iommu_dma_init_domain(struct iommu_domain *domain, dma_addr_t base,
 
 	init_iova_domain(iovad, 1UL << order, base_pfn);
 
-	if (!cookie->fq_domain && (!dev || !dev_is_untrusted(dev)) &&
-	    domain->ops->flush_iotlb_all && !iommu_get_dma_strict(domain)) {
-		if (init_iova_flush_queue(iovad, iommu_dma_flush_iotlb_all,
-					  iommu_dma_entry_dtor))
-			pr_warn("iova flush queue initialization failed\n");
-		else
-			cookie->fq_domain = domain;
-	}
-
-	if (!dev)
-		return 0;
+	/* If the FQ fails we can simply fall back to strict mode */
+	if (domain->type == IOMMU_DOMAIN_DMA_FQ && iommu_dma_init_fq(domain))
+		domain->type = IOMMU_DOMAIN_DMA;
 
 	return iova_reserve_iommu_regions(dev, domain);
 }
@@ -455,17 +471,17 @@ static dma_addr_t iommu_dma_alloc_iova(struct iommu_domain *domain,
 }
 
 static void iommu_dma_free_iova(struct iommu_dma_cookie *cookie,
-		dma_addr_t iova, size_t size, struct page *freelist)
+		dma_addr_t iova, size_t size, struct iommu_iotlb_gather *gather)
 {
 	struct iova_domain *iovad = &cookie->iovad;
 
 	/* The MSI case is only ever cleaning up its most recent allocation */
 	if (cookie->type == IOMMU_DMA_MSI_COOKIE)
 		cookie->msi_iova -= size;
-	else if (cookie->fq_domain)	/* non-strict mode */
+	else if (gather && gather->queued)
 		queue_iova(iovad, iova_pfn(iovad, iova),
 				size >> iova_shift(iovad),
-				(unsigned long)freelist);
+				(unsigned long)gather->freelist);
 	else
 		free_iova_fast(iovad, iova_pfn(iovad, iova),
 				size >> iova_shift(iovad));
@@ -484,13 +500,14 @@ static void __iommu_dma_unmap(struct device *dev, dma_addr_t dma_addr,
 	dma_addr -= iova_off;
 	size = iova_align(iovad, size + iova_off);
 	iommu_iotlb_gather_init(&iotlb_gather);
+	iotlb_gather.queued = READ_ONCE(cookie->fq_domain);
 
 	unmapped = iommu_unmap_fast(domain, dma_addr, size, &iotlb_gather);
 	WARN_ON(unmapped != size);
 
-	if (!cookie->fq_domain)
+	if (!iotlb_gather.queued)
 		iommu_iotlb_sync(domain, &iotlb_gather);
-	iommu_dma_free_iova(cookie, dma_addr, size, iotlb_gather.freelist);
+	iommu_dma_free_iova(cookie, dma_addr, size, &iotlb_gather);
 }
 
 static void __iommu_dma_unmap_swiotlb(struct device *dev, dma_addr_t dma_addr,
@@ -506,7 +523,7 @@ static void __iommu_dma_unmap_swiotlb(struct device *dev, dma_addr_t dma_addr,
 
 	__iommu_dma_unmap(dev, dma_addr, size);
 
-	if (unlikely(is_swiotlb_buffer(phys)))
+	if (unlikely(is_swiotlb_buffer(dev, phys)))
 		swiotlb_tbl_unmap_single(dev, phys, size, dir, attrs);
 }
 
@@ -577,7 +594,7 @@ static dma_addr_t __iommu_dma_map_swiotlb(struct device *dev, phys_addr_t phys,
 	}
 
 	iova = __iommu_dma_map(dev, phys, aligned_size, prot, dma_mask);
-	if (iova == DMA_MAPPING_ERROR && is_swiotlb_buffer(phys))
+	if (iova == DMA_MAPPING_ERROR && is_swiotlb_buffer(dev, phys))
 		swiotlb_tbl_unmap_single(dev, phys, org_size, dir, attrs);
 	return iova;
 }
@@ -784,7 +801,7 @@ static void iommu_dma_sync_single_for_cpu(struct device *dev,
 	if (!dev_is_dma_coherent(dev))
 		arch_sync_dma_for_cpu(phys, size, dir);
 
-	if (is_swiotlb_buffer(phys))
+	if (is_swiotlb_buffer(dev, phys))
 		swiotlb_sync_single_for_cpu(dev, phys, size, dir);
 }
 
@@ -797,7 +814,7 @@ static void iommu_dma_sync_single_for_device(struct device *dev,
 		return;
 
 	phys = iommu_iova_to_phys(iommu_get_dma_domain(dev), dma_handle);
-	if (is_swiotlb_buffer(phys))
+	if (is_swiotlb_buffer(dev, phys))
 		swiotlb_sync_single_for_device(dev, phys, size, dir);
 
 	if (!dev_is_dma_coherent(dev))
@@ -818,7 +835,7 @@ static void iommu_dma_sync_sg_for_cpu(struct device *dev,
 		if (!dev_is_dma_coherent(dev))
 			arch_sync_dma_for_cpu(sg_phys(sg), sg->length, dir);
 
-		if (is_swiotlb_buffer(sg_phys(sg)))
+		if (is_swiotlb_buffer(dev, sg_phys(sg)))
 			swiotlb_sync_single_for_cpu(dev, sg_phys(sg),
 						    sg->length, dir);
 	}
@@ -835,7 +852,7 @@ static void iommu_dma_sync_sg_for_device(struct device *dev,
 		return;
 
 	for_each_sg(sgl, sg, nelems, i) {
-		if (is_swiotlb_buffer(sg_phys(sg)))
+		if (is_swiotlb_buffer(dev, sg_phys(sg)))
 			swiotlb_sync_single_for_device(dev, sg_phys(sg),
 						       sg->length, dir);
 
@@ -973,7 +990,7 @@ static int iommu_dma_map_sg_swiotlb(struct device *dev, struct scatterlist *sg,
 
 out_unmap:
 	iommu_dma_unmap_sg_swiotlb(dev, sg, i, dir, attrs | DMA_ATTR_SKIP_CPU_SYNC);
-	return 0;
+	return -EIO;
 }
 
 /*
@@ -994,11 +1011,13 @@ static int iommu_dma_map_sg(struct device *dev, struct scatterlist *sg,
 	dma_addr_t iova;
 	size_t iova_len = 0;
 	unsigned long mask = dma_get_seg_boundary(dev);
+	ssize_t ret;
 	int i;
 
-	if (static_branch_unlikely(&iommu_deferred_attach_enabled) &&
-	    iommu_deferred_attach(dev, domain))
-		return 0;
+	if (static_branch_unlikely(&iommu_deferred_attach_enabled)) {
+		ret = iommu_deferred_attach(dev, domain);
+		goto out;
+	}
 
 	if (!(attrs & DMA_ATTR_SKIP_CPU_SYNC))
 		iommu_dma_sync_sg_for_device(dev, sg, nents, dir);
@@ -1046,14 +1065,17 @@ static int iommu_dma_map_sg(struct device *dev, struct scatterlist *sg,
 	}
 
 	iova = iommu_dma_alloc_iova(domain, iova_len, dma_get_mask(dev), dev);
-	if (!iova)
+	if (!iova) {
+		ret = -ENOMEM;
 		goto out_restore_sg;
+	}
 
 	/*
 	 * We'll leave any physical concatenation to the IOMMU driver's
 	 * implementation - it knows better than we do.
 	 */
-	if (iommu_map_sg_atomic(domain, iova, sg, nents, prot) < iova_len)
+	ret = iommu_map_sg_atomic(domain, iova, sg, nents, prot);
+	if (ret < iova_len)
 		goto out_free_iova;
 
 	return __finalise_sg(dev, sg, nents, iova);
@@ -1062,7 +1084,10 @@ out_free_iova:
 	iommu_dma_free_iova(cookie, iova, iova_len, NULL);
 out_restore_sg:
 	__invalidate_sg(sg, nents);
-	return 0;
+out:
+	if (ret != -ENOMEM)
+		return -EINVAL;
+	return ret;
 }
 
 static void iommu_dma_unmap_sg(struct device *dev, struct scatterlist *sg,
@@ -1322,7 +1347,7 @@ void iommu_setup_dma_ops(struct device *dev, u64 dma_base, u64 dma_limit)
 	 * The IOMMU core code allocates the default DMA domain, which the
 	 * underlying IOMMU driver needs to support via the dma-iommu layer.
 	 */
-	if (domain->type == IOMMU_DOMAIN_DMA) {
+	if (iommu_is_dma_domain(domain)) {
 		if (iommu_dma_init_domain(domain, dma_base, dma_limit, dev))
 			goto out_err;
 		dev->dma_ops = &iommu_dma_ops;
diff --git a/drivers/iommu/exynos-iommu.c b/drivers/iommu/exynos-iommu.c
index d0fbf1d10e18..939ffa768986 100644
--- a/drivers/iommu/exynos-iommu.c
+++ b/drivers/iommu/exynos-iommu.c
@@ -21,7 +21,6 @@
 #include <linux/platform_device.h>
 #include <linux/pm_runtime.h>
 #include <linux/slab.h>
-#include <linux/dma-iommu.h>
 
 typedef u32 sysmmu_iova_t;
 typedef u32 sysmmu_pte_t;
@@ -735,20 +734,16 @@ static struct iommu_domain *exynos_iommu_domain_alloc(unsigned type)
 	/* Check if correct PTE offsets are initialized */
 	BUG_ON(PG_ENT_SHIFT < 0 || !dma_dev);
 
+	if (type != IOMMU_DOMAIN_DMA && type != IOMMU_DOMAIN_UNMANAGED)
+		return NULL;
+
 	domain = kzalloc(sizeof(*domain), GFP_KERNEL);
 	if (!domain)
 		return NULL;
 
-	if (type == IOMMU_DOMAIN_DMA) {
-		if (iommu_get_dma_cookie(&domain->domain) != 0)
-			goto err_pgtable;
-	} else if (type != IOMMU_DOMAIN_UNMANAGED) {
-		goto err_pgtable;
-	}
-
 	domain->pgtable = (sysmmu_pte_t *)__get_free_pages(GFP_KERNEL, 2);
 	if (!domain->pgtable)
-		goto err_dma_cookie;
+		goto err_pgtable;
 
 	domain->lv2entcnt = (short *)__get_free_pages(GFP_KERNEL | __GFP_ZERO, 1);
 	if (!domain->lv2entcnt)
@@ -779,9 +774,6 @@ err_lv2ent:
 	free_pages((unsigned long)domain->lv2entcnt, 1);
 err_counter:
 	free_pages((unsigned long)domain->pgtable, 2);
-err_dma_cookie:
-	if (type == IOMMU_DOMAIN_DMA)
-		iommu_put_dma_cookie(&domain->domain);
 err_pgtable:
 	kfree(domain);
 	return NULL;
@@ -809,9 +801,6 @@ static void exynos_iommu_domain_free(struct iommu_domain *iommu_domain)
 
 	spin_unlock_irqrestore(&domain->lock, flags);
 
-	if (iommu_domain->type == IOMMU_DOMAIN_DMA)
-		iommu_put_dma_cookie(iommu_domain);
-
 	dma_unmap_single(dma_dev, virt_to_phys(domain->pgtable), LV1TABLE_SIZE,
 			 DMA_TO_DEVICE);
 
diff --git a/drivers/iommu/intel/Kconfig b/drivers/iommu/intel/Kconfig
index 43ebd8af11c5..0ddb77115be7 100644
--- a/drivers/iommu/intel/Kconfig
+++ b/drivers/iommu/intel/Kconfig
@@ -25,9 +25,11 @@ config INTEL_IOMMU
 	  and include PCI device scope covered by these DMA
 	  remapping devices.
 
+if INTEL_IOMMU
+
 config INTEL_IOMMU_DEBUGFS
 	bool "Export Intel IOMMU internals in Debugfs"
-	depends on INTEL_IOMMU && IOMMU_DEBUGFS
+	depends on IOMMU_DEBUGFS
 	select DMAR_PERF
 	help
 	  !!!WARNING!!!
@@ -41,7 +43,7 @@ config INTEL_IOMMU_DEBUGFS
 
 config INTEL_IOMMU_SVM
 	bool "Support for Shared Virtual Memory with Intel IOMMU"
-	depends on INTEL_IOMMU && X86_64
+	depends on X86_64
 	select PCI_PASID
 	select PCI_PRI
 	select MMU_NOTIFIER
@@ -53,9 +55,8 @@ config INTEL_IOMMU_SVM
 	  means of a Process Address Space ID (PASID).
 
 config INTEL_IOMMU_DEFAULT_ON
-	def_bool y
-	prompt "Enable Intel DMA Remapping Devices by default"
-	depends on INTEL_IOMMU
+	bool "Enable Intel DMA Remapping Devices by default"
+	default y
 	help
 	  Selecting this option will enable a DMAR device at boot time if
 	  one is found. If this option is not selected, DMAR support can
@@ -63,7 +64,7 @@ config INTEL_IOMMU_DEFAULT_ON
 
 config INTEL_IOMMU_BROKEN_GFX_WA
 	bool "Workaround broken graphics drivers (going away soon)"
-	depends on INTEL_IOMMU && BROKEN && X86
+	depends on BROKEN && X86
 	help
 	  Current Graphics drivers tend to use physical address
 	  for DMA and avoid using DMA APIs. Setting this config
@@ -74,7 +75,7 @@ config INTEL_IOMMU_BROKEN_GFX_WA
 
 config INTEL_IOMMU_FLOPPY_WA
 	def_bool y
-	depends on INTEL_IOMMU && X86
+	depends on X86
 	help
 	  Floppy disk drivers are known to bypass DMA API calls
 	  thereby failing to work when IOMMU is enabled. This
@@ -83,7 +84,7 @@ config INTEL_IOMMU_FLOPPY_WA
 
 config INTEL_IOMMU_SCALABLE_MODE_DEFAULT_ON
 	bool "Enable Intel IOMMU scalable mode by default"
-	depends on INTEL_IOMMU
+	default y
 	help
 	  Selecting this option will enable by default the scalable mode if
 	  hardware presents the capability. The scalable mode is defined in
@@ -92,3 +93,5 @@ config INTEL_IOMMU_SCALABLE_MODE_DEFAULT_ON
 	  is not selected, scalable mode support could also be enabled by
 	  passing intel_iommu=sm_on to the kernel. If not sure, please use
 	  the default value.
+
+endif # INTEL_IOMMU
diff --git a/drivers/iommu/intel/dmar.c b/drivers/iommu/intel/dmar.c
index d66f79acd14d..0ec5514c9980 100644
--- a/drivers/iommu/intel/dmar.c
+++ b/drivers/iommu/intel/dmar.c
@@ -149,8 +149,6 @@ dmar_alloc_pci_notify_info(struct pci_dev *dev, unsigned long event)
 	} else {
 		info = kzalloc(size, GFP_KERNEL);
 		if (!info) {
-			pr_warn("Out of memory when allocating notify_info "
-				"for %s.\n", pci_name(dev));
 			if (dmar_dev_scope_status == 0)
 				dmar_dev_scope_status = -ENOMEM;
 			return NULL;
diff --git a/drivers/iommu/intel/iommu.c b/drivers/iommu/intel/iommu.c
index dd22fc7d5176..d75f59ae28e6 100644
--- a/drivers/iommu/intel/iommu.c
+++ b/drivers/iommu/intel/iommu.c
@@ -33,6 +33,7 @@
 #include <linux/iommu.h>
 #include <linux/dma-iommu.h>
 #include <linux/intel-iommu.h>
+#include <linux/intel-svm.h>
 #include <linux/syscore_ops.h>
 #include <linux/tboot.h>
 #include <linux/dmi.h>
@@ -85,24 +86,6 @@
 #define LEVEL_STRIDE		(9)
 #define LEVEL_MASK		(((u64)1 << LEVEL_STRIDE) - 1)
 
-/*
- * This bitmap is used to advertise the page sizes our hardware support
- * to the IOMMU core, which will then use this information to split
- * physically contiguous memory regions it is mapping into page sizes
- * that we support.
- *
- * Traditionally the IOMMU core just handed us the mappings directly,
- * after making sure the size is an order of a 4KiB page and that the
- * mapping has natural alignment.
- *
- * To retain this behavior, we currently advertise that we support
- * all page sizes that are an order of 4KiB.
- *
- * If at some point we'd like to utilize the IOMMU core's new behavior,
- * we could change this to advertise the real page sizes we support.
- */
-#define INTEL_IOMMU_PGSIZES	(~0xFFFUL)
-
 static inline int agaw_to_level(int agaw)
 {
 	return agaw + 2;
@@ -345,23 +328,13 @@ static int intel_iommu_attach_device(struct iommu_domain *domain,
 static phys_addr_t intel_iommu_iova_to_phys(struct iommu_domain *domain,
 					    dma_addr_t iova);
 
-#ifdef CONFIG_INTEL_IOMMU_DEFAULT_ON
-int dmar_disabled = 0;
-#else
-int dmar_disabled = 1;
-#endif /* CONFIG_INTEL_IOMMU_DEFAULT_ON */
-
-#ifdef CONFIG_INTEL_IOMMU_SCALABLE_MODE_DEFAULT_ON
-int intel_iommu_sm = 1;
-#else
-int intel_iommu_sm;
-#endif /* CONFIG_INTEL_IOMMU_SCALABLE_MODE_DEFAULT_ON */
+int dmar_disabled = !IS_ENABLED(CONFIG_INTEL_IOMMU_DEFAULT_ON);
+int intel_iommu_sm = IS_ENABLED(CONFIG_INTEL_IOMMU_SCALABLE_MODE_DEFAULT_ON);
 
 int intel_iommu_enabled = 0;
 EXPORT_SYMBOL_GPL(intel_iommu_enabled);
 
 static int dmar_map_gfx = 1;
-static int intel_iommu_strict;
 static int intel_iommu_superpage = 1;
 static int iommu_identity_mapping;
 static int iommu_skip_te_disable;
@@ -454,14 +427,17 @@ static int __init intel_iommu_setup(char *str)
 			pr_warn("intel_iommu=forcedac deprecated; use iommu.forcedac instead\n");
 			iommu_dma_forcedac = true;
 		} else if (!strncmp(str, "strict", 6)) {
-			pr_info("Disable batched IOTLB flush\n");
-			intel_iommu_strict = 1;
+			pr_warn("intel_iommu=strict deprecated; use iommu.strict=1 instead\n");
+			iommu_set_dma_strict();
 		} else if (!strncmp(str, "sp_off", 6)) {
 			pr_info("Disable supported super page\n");
 			intel_iommu_superpage = 0;
 		} else if (!strncmp(str, "sm_on", 5)) {
-			pr_info("Intel-IOMMU: scalable mode supported\n");
+			pr_info("Enable scalable mode if hardware supports\n");
 			intel_iommu_sm = 1;
+		} else if (!strncmp(str, "sm_off", 6)) {
+			pr_info("Scalable mode is disallowed\n");
+			intel_iommu_sm = 0;
 		} else if (!strncmp(str, "tboot_noforce", 13)) {
 			pr_info("Intel-IOMMU: not forcing on after tboot. This could expose security risk for tboot\n");
 			intel_iommu_tboot_noforce = 1;
@@ -601,7 +577,7 @@ struct intel_iommu *domain_get_iommu(struct dmar_domain *domain)
 	int iommu_id;
 
 	/* si_domain and vm domain should not get here. */
-	if (WARN_ON(domain->domain.type != IOMMU_DOMAIN_DMA))
+	if (WARN_ON(!iommu_is_dma_domain(&domain->domain)))
 		return NULL;
 
 	for_each_domain_iommu(iommu_id, domain)
@@ -736,6 +712,23 @@ static int domain_update_device_node(struct dmar_domain *domain)
 
 static void domain_update_iotlb(struct dmar_domain *domain);
 
+/* Return the super pagesize bitmap if supported. */
+static unsigned long domain_super_pgsize_bitmap(struct dmar_domain *domain)
+{
+	unsigned long bitmap = 0;
+
+	/*
+	 * 1-level super page supports page size of 2MiB, 2-level super page
+	 * supports page size of both 2MiB and 1GiB.
+	 */
+	if (domain->iommu_superpage == 1)
+		bitmap |= SZ_2M;
+	else if (domain->iommu_superpage == 2)
+		bitmap |= SZ_2M | SZ_1G;
+
+	return bitmap;
+}
+
 /* Some capabilities may be different across iommus */
 static void domain_update_iommu_cap(struct dmar_domain *domain)
 {
@@ -762,6 +755,7 @@ static void domain_update_iommu_cap(struct dmar_domain *domain)
 	else
 		domain->domain.geometry.aperture_end = __DOMAIN_MAX_ADDR(domain->gaw);
 
+	domain->domain.pgsize_bitmap |= domain_super_pgsize_bitmap(domain);
 	domain_update_iotlb(domain);
 }
 
@@ -1035,7 +1029,7 @@ static struct dma_pte *pfn_to_dma_pte(struct dmar_domain *domain,
 			pteval = ((uint64_t)virt_to_dma_pfn(tmp_page) << VTD_PAGE_SHIFT) | DMA_PTE_READ | DMA_PTE_WRITE;
 			if (domain_use_first_level(domain)) {
 				pteval |= DMA_FL_PTE_XD | DMA_FL_PTE_US;
-				if (domain->domain.type == IOMMU_DOMAIN_DMA)
+				if (iommu_is_dma_domain(&domain->domain))
 					pteval |= DMA_FL_PTE_ACCESS;
 			}
 			if (cmpxchg64(&pte->val, 0ULL, pteval))
@@ -1548,7 +1542,7 @@ static void iommu_enable_dev_iotlb(struct device_domain_info *info)
 
 	if (info->pri_supported &&
 	    (info->pasid_enabled ? pci_prg_resp_pasid_required(pdev) : 1)  &&
-	    !pci_reset_pri(pdev) && !pci_enable_pri(pdev, 32))
+	    !pci_reset_pri(pdev) && !pci_enable_pri(pdev, PRQ_DEPTH))
 		info->pri_enabled = 1;
 #endif
 	if (info->ats_supported && pci_ats_page_aligned(pdev) &&
@@ -1780,11 +1774,8 @@ static int iommu_init_domains(struct intel_iommu *iommu)
 	spin_lock_init(&iommu->lock);
 
 	iommu->domain_ids = kcalloc(nlongs, sizeof(unsigned long), GFP_KERNEL);
-	if (!iommu->domain_ids) {
-		pr_err("%s: Allocating domain id array failed\n",
-		       iommu->name);
+	if (!iommu->domain_ids)
 		return -ENOMEM;
-	}
 
 	size = (ALIGN(ndomains, 256) >> 8) * sizeof(struct dmar_domain **);
 	iommu->domains = kzalloc(size, GFP_KERNEL);
@@ -1980,10 +1971,6 @@ static void domain_exit(struct dmar_domain *domain)
 	/* Remove associated devices and clear attached or cached domains */
 	domain_remove_dev_info(domain);
 
-	/* destroy iovas */
-	if (domain->domain.type == IOMMU_DOMAIN_DMA)
-		iommu_put_dma_cookie(&domain->domain);
-
 	if (domain->pgd) {
 		struct page *freelist;
 
@@ -2334,9 +2321,9 @@ static int
 __domain_mapping(struct dmar_domain *domain, unsigned long iov_pfn,
 		 unsigned long phys_pfn, unsigned long nr_pages, int prot)
 {
+	struct dma_pte *first_pte = NULL, *pte = NULL;
 	unsigned int largepage_lvl = 0;
 	unsigned long lvl_pages = 0;
-	struct dma_pte *pte = NULL;
 	phys_addr_t pteval;
 	u64 attr;
 
@@ -2348,13 +2335,9 @@ __domain_mapping(struct dmar_domain *domain, unsigned long iov_pfn,
 	attr = prot & (DMA_PTE_READ | DMA_PTE_WRITE | DMA_PTE_SNP);
 	attr |= DMA_FL_PTE_PRESENT;
 	if (domain_use_first_level(domain)) {
-		attr |= DMA_FL_PTE_XD | DMA_FL_PTE_US;
-
-		if (domain->domain.type == IOMMU_DOMAIN_DMA) {
-			attr |= DMA_FL_PTE_ACCESS;
-			if (prot & DMA_PTE_WRITE)
-				attr |= DMA_FL_PTE_DIRTY;
-		}
+		attr |= DMA_FL_PTE_XD | DMA_FL_PTE_US | DMA_FL_PTE_ACCESS;
+		if (prot & DMA_PTE_WRITE)
+			attr |= DMA_FL_PTE_DIRTY;
 	}
 
 	pteval = ((phys_addr_t)phys_pfn << VTD_PAGE_SHIFT) | attr;
@@ -2369,6 +2352,8 @@ __domain_mapping(struct dmar_domain *domain, unsigned long iov_pfn,
 			pte = pfn_to_dma_pte(domain, iov_pfn, &largepage_lvl);
 			if (!pte)
 				return -ENOMEM;
+			first_pte = pte;
+
 			/* It is large page*/
 			if (largepage_lvl > 1) {
 				unsigned long end_pfn;
@@ -2416,14 +2401,14 @@ __domain_mapping(struct dmar_domain *domain, unsigned long iov_pfn,
 		 * recalculate 'pte' and switch back to smaller pages for the
 		 * end of the mapping, if the trailing size is not enough to
 		 * use another superpage (i.e. nr_pages < lvl_pages).
-		 *
-		 * We leave clflush for the leaf pte changes to iotlb_sync_map()
-		 * callback.
 		 */
 		pte++;
 		if (!nr_pages || first_pte_in_page(pte) ||
-		    (largepage_lvl > 1 && nr_pages < lvl_pages))
+		    (largepage_lvl > 1 && nr_pages < lvl_pages)) {
+			domain_flush_cache(domain, first_pte,
+					   (void *)pte - (void *)first_pte);
 			pte = NULL;
+		}
 	}
 
 	return 0;
@@ -3227,7 +3212,6 @@ static int __init init_dmars(void)
 	g_iommus = kcalloc(g_num_of_iommus, sizeof(struct intel_iommu *),
 			GFP_KERNEL);
 	if (!g_iommus) {
-		pr_err("Allocating global iommu array failed\n");
 		ret = -ENOMEM;
 		goto error;
 	}
@@ -4393,9 +4377,9 @@ int __init intel_iommu_init(void)
 		 * is likely to be much lower than the overhead of synchronizing
 		 * the virtual and physical IOMMU page-tables.
 		 */
-		if (!intel_iommu_strict && cap_caching_mode(iommu->cap)) {
-			pr_warn("IOMMU batching is disabled due to virtualization");
-			intel_iommu_strict = 1;
+		if (cap_caching_mode(iommu->cap)) {
+			pr_info_once("IOMMU batching disallowed due to virtualization\n");
+			iommu_set_dma_strict();
 		}
 		iommu_device_sysfs_add(&iommu->iommu, NULL,
 				       intel_iommu_groups,
@@ -4404,7 +4388,6 @@ int __init intel_iommu_init(void)
 	}
 	up_read(&dmar_global_lock);
 
-	iommu_set_dma_strict(intel_iommu_strict);
 	bus_set_iommu(&pci_bus_type, &intel_iommu_ops);
 	if (si_domain && !hw_pass_through)
 		register_memory_notifier(&intel_iommu_memory_nb);
@@ -4532,6 +4515,7 @@ static struct iommu_domain *intel_iommu_domain_alloc(unsigned type)
 
 	switch (type) {
 	case IOMMU_DOMAIN_DMA:
+	case IOMMU_DOMAIN_DMA_FQ:
 	case IOMMU_DOMAIN_UNMANAGED:
 		dmar_domain = alloc_domain(0);
 		if (!dmar_domain) {
@@ -4544,10 +4528,6 @@ static struct iommu_domain *intel_iommu_domain_alloc(unsigned type)
 			return NULL;
 		}
 
-		if (type == IOMMU_DOMAIN_DMA &&
-		    iommu_get_dma_cookie(&dmar_domain->domain))
-			return NULL;
-
 		domain = &dmar_domain->domain;
 		domain->geometry.aperture_start = 0;
 		domain->geometry.aperture_end   =
@@ -5067,6 +5047,28 @@ static int intel_iommu_map(struct iommu_domain *domain,
 				hpa >> VTD_PAGE_SHIFT, size, prot);
 }
 
+static int intel_iommu_map_pages(struct iommu_domain *domain,
+				 unsigned long iova, phys_addr_t paddr,
+				 size_t pgsize, size_t pgcount,
+				 int prot, gfp_t gfp, size_t *mapped)
+{
+	unsigned long pgshift = __ffs(pgsize);
+	size_t size = pgcount << pgshift;
+	int ret;
+
+	if (pgsize != SZ_4K && pgsize != SZ_2M && pgsize != SZ_1G)
+		return -EINVAL;
+
+	if (!IS_ALIGNED(iova | paddr, pgsize))
+		return -EINVAL;
+
+	ret = intel_iommu_map(domain, iova, paddr, size, prot, gfp);
+	if (!ret && mapped)
+		*mapped = size;
+
+	return ret;
+}
+
 static size_t intel_iommu_unmap(struct iommu_domain *domain,
 				unsigned long iova, size_t size,
 				struct iommu_iotlb_gather *gather)
@@ -5096,6 +5098,17 @@ static size_t intel_iommu_unmap(struct iommu_domain *domain,
 	return size;
 }
 
+static size_t intel_iommu_unmap_pages(struct iommu_domain *domain,
+				      unsigned long iova,
+				      size_t pgsize, size_t pgcount,
+				      struct iommu_iotlb_gather *gather)
+{
+	unsigned long pgshift = __ffs(pgsize);
+	size_t size = pgcount << pgshift;
+
+	return intel_iommu_unmap(domain, iova, size, gather);
+}
+
 static void intel_iommu_tlb_sync(struct iommu_domain *domain,
 				 struct iommu_iotlb_gather *gather)
 {
@@ -5172,12 +5185,8 @@ static void intel_iommu_release_device(struct device *dev)
 
 static void intel_iommu_probe_finalize(struct device *dev)
 {
-	struct iommu_domain *domain = iommu_get_domain_for_dev(dev);
-
-	if (domain && domain->type == IOMMU_DOMAIN_DMA)
-		iommu_setup_dma_ops(dev, 0, U64_MAX);
-	else
-		set_dma_ops(dev, NULL);
+	set_dma_ops(dev, NULL);
+	iommu_setup_dma_ops(dev, 0, U64_MAX);
 }
 
 static void intel_iommu_get_resv_regions(struct device *device,
@@ -5532,39 +5541,6 @@ static bool risky_device(struct pci_dev *pdev)
 	return false;
 }
 
-static void clflush_sync_map(struct dmar_domain *domain, unsigned long clf_pfn,
-			     unsigned long clf_pages)
-{
-	struct dma_pte *first_pte = NULL, *pte = NULL;
-	unsigned long lvl_pages = 0;
-	int level = 0;
-
-	while (clf_pages > 0) {
-		if (!pte) {
-			level = 0;
-			pte = pfn_to_dma_pte(domain, clf_pfn, &level);
-			if (WARN_ON(!pte))
-				return;
-			first_pte = pte;
-			lvl_pages = lvl_to_nr_pages(level);
-		}
-
-		if (WARN_ON(!lvl_pages || clf_pages < lvl_pages))
-			return;
-
-		clf_pages -= lvl_pages;
-		clf_pfn += lvl_pages;
-		pte++;
-
-		if (!clf_pages || first_pte_in_page(pte) ||
-		    (level > 1 && clf_pages < lvl_pages)) {
-			domain_flush_cache(domain, first_pte,
-					   (void *)pte - (void *)first_pte);
-			pte = NULL;
-		}
-	}
-}
-
 static void intel_iommu_iotlb_sync_map(struct iommu_domain *domain,
 				       unsigned long iova, size_t size)
 {
@@ -5574,9 +5550,6 @@ static void intel_iommu_iotlb_sync_map(struct iommu_domain *domain,
 	struct intel_iommu *iommu;
 	int iommu_id;
 
-	if (!dmar_domain->iommu_coherency)
-		clflush_sync_map(dmar_domain, pfn, pages);
-
 	for_each_domain_iommu(iommu_id, dmar_domain) {
 		iommu = g_iommus[iommu_id];
 		__mapping_notify_one(iommu, dmar_domain, pfn, pages);
@@ -5593,9 +5566,9 @@ const struct iommu_ops intel_iommu_ops = {
 	.aux_attach_dev		= intel_iommu_aux_attach_device,
 	.aux_detach_dev		= intel_iommu_aux_detach_device,
 	.aux_get_pasid		= intel_iommu_aux_get_pasid,
-	.map			= intel_iommu_map,
+	.map_pages		= intel_iommu_map_pages,
+	.unmap_pages		= intel_iommu_unmap_pages,
 	.iotlb_sync_map		= intel_iommu_iotlb_sync_map,
-	.unmap			= intel_iommu_unmap,
 	.flush_iotlb_all        = intel_flush_iotlb_all,
 	.iotlb_sync		= intel_iommu_tlb_sync,
 	.iova_to_phys		= intel_iommu_iova_to_phys,
@@ -5611,7 +5584,7 @@ const struct iommu_ops intel_iommu_ops = {
 	.dev_disable_feat	= intel_iommu_dev_disable_feat,
 	.is_attach_deferred	= intel_iommu_is_attach_deferred,
 	.def_domain_type	= device_def_domain_type,
-	.pgsize_bitmap		= INTEL_IOMMU_PGSIZES,
+	.pgsize_bitmap		= SZ_4K,
 #ifdef CONFIG_INTEL_IOMMU_SVM
 	.cache_invalidate	= intel_iommu_sva_invalidate,
 	.sva_bind_gpasid	= intel_svm_bind_gpasid,
@@ -5714,8 +5687,8 @@ static void quirk_calpella_no_shadow_gtt(struct pci_dev *dev)
 	} else if (dmar_map_gfx) {
 		/* we have to ensure the gfx device is idle before we flush */
 		pci_info(dev, "Disabling batched IOTLB flush on Ironlake\n");
-		intel_iommu_strict = 1;
-       }
+		iommu_set_dma_strict();
+	}
 }
 DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x0040, quirk_calpella_no_shadow_gtt);
 DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x0044, quirk_calpella_no_shadow_gtt);
diff --git a/drivers/iommu/intel/pasid.c b/drivers/iommu/intel/pasid.c
index 9ec374e17469..07c390aed1fe 100644
--- a/drivers/iommu/intel/pasid.c
+++ b/drivers/iommu/intel/pasid.c
@@ -517,7 +517,7 @@ void intel_pasid_tear_down_entry(struct intel_iommu *iommu, struct device *dev,
 	if (WARN_ON(!pte))
 		return;
 
-	if (!(pte->val[0] & PASID_PTE_PRESENT))
+	if (!pasid_pte_is_present(pte))
 		return;
 
 	did = pasid_get_domain_id(pte);
@@ -540,6 +540,10 @@ void intel_pasid_tear_down_entry(struct intel_iommu *iommu, struct device *dev,
 		devtlb_invalidation_with_pasid(iommu, dev, pasid);
 }
 
+/*
+ * This function flushes cache for a newly setup pasid table entry.
+ * Caller of it should not modify the in-use pasid table entries.
+ */
 static void pasid_flush_caches(struct intel_iommu *iommu,
 				struct pasid_entry *pte,
 			       u32 pasid, u16 did)
@@ -591,6 +595,10 @@ int intel_pasid_setup_first_level(struct intel_iommu *iommu,
 	if (WARN_ON(!pte))
 		return -EINVAL;
 
+	/* Caller must ensure PASID entry is not in use. */
+	if (pasid_pte_is_present(pte))
+		return -EBUSY;
+
 	pasid_clear_entry(pte);
 
 	/* Setup the first level page table pointer: */
@@ -690,6 +698,10 @@ int intel_pasid_setup_second_level(struct intel_iommu *iommu,
 		return -ENODEV;
 	}
 
+	/* Caller must ensure PASID entry is not in use. */
+	if (pasid_pte_is_present(pte))
+		return -EBUSY;
+
 	pasid_clear_entry(pte);
 	pasid_set_domain_id(pte, did);
 	pasid_set_slptr(pte, pgd_val);
@@ -729,6 +741,10 @@ int intel_pasid_setup_pass_through(struct intel_iommu *iommu,
 		return -ENODEV;
 	}
 
+	/* Caller must ensure PASID entry is not in use. */
+	if (pasid_pte_is_present(pte))
+		return -EBUSY;
+
 	pasid_clear_entry(pte);
 	pasid_set_domain_id(pte, did);
 	pasid_set_address_width(pte, iommu->agaw);
diff --git a/drivers/iommu/intel/pasid.h b/drivers/iommu/intel/pasid.h
index c11bc8b833b8..d5552e2c160d 100644
--- a/drivers/iommu/intel/pasid.h
+++ b/drivers/iommu/intel/pasid.h
@@ -28,12 +28,12 @@
 #define VCMD_CMD_ALLOC			0x1
 #define VCMD_CMD_FREE			0x2
 #define VCMD_VRSP_IP			0x1
-#define VCMD_VRSP_SC(e)			(((e) >> 1) & 0x3)
+#define VCMD_VRSP_SC(e)			(((e) & 0xff) >> 1)
 #define VCMD_VRSP_SC_SUCCESS		0
-#define VCMD_VRSP_SC_NO_PASID_AVAIL	2
-#define VCMD_VRSP_SC_INVALID_PASID	2
-#define VCMD_VRSP_RESULT_PASID(e)	(((e) >> 8) & 0xfffff)
-#define VCMD_CMD_OPERAND(e)		((e) << 8)
+#define VCMD_VRSP_SC_NO_PASID_AVAIL	16
+#define VCMD_VRSP_SC_INVALID_PASID	16
+#define VCMD_VRSP_RESULT_PASID(e)	(((e) >> 16) & 0xfffff)
+#define VCMD_CMD_OPERAND(e)		((e) << 16)
 /*
  * Domain ID reserved for pasid entries programmed for first-level
  * only and pass-through transfer modes.
diff --git a/drivers/iommu/intel/perf.c b/drivers/iommu/intel/perf.c
index 73b7ec705552..0e8e03252d92 100644
--- a/drivers/iommu/intel/perf.c
+++ b/drivers/iommu/intel/perf.c
@@ -1,5 +1,5 @@
 // SPDX-License-Identifier: GPL-2.0
-/**
+/*
  * perf.c - performance monitor
  *
  * Copyright (C) 2021 Intel Corporation
diff --git a/drivers/iommu/intel/svm.c b/drivers/iommu/intel/svm.c
index 4b9b3f35ba0e..0c228787704f 100644
--- a/drivers/iommu/intel/svm.c
+++ b/drivers/iommu/intel/svm.c
@@ -31,8 +31,6 @@ static irqreturn_t prq_event_thread(int irq, void *d);
 static void intel_svm_drain_prq(struct device *dev, u32 pasid);
 #define to_intel_svm_dev(handle) container_of(handle, struct intel_svm_dev, sva)
 
-#define PRQ_ORDER 0
-
 static DEFINE_XARRAY_ALLOC(pasid_private_array);
 static int pasid_private_add(ioasid_t pasid, void *priv)
 {
@@ -516,9 +514,6 @@ static void load_pasid(struct mm_struct *mm, u32 pasid)
 {
 	mutex_lock(&mm->context.lock);
 
-	/* Synchronize with READ_ONCE in update_pasid(). */
-	smp_store_release(&mm->pasid, pasid);
-
 	/* Update PASID MSR on all CPUs running the mm's tasks. */
 	on_each_cpu_mask(mm_cpumask(mm), _load_pasid, NULL, true);
 
@@ -725,8 +720,6 @@ struct page_req_dsc {
 	u64 priv_data[2];
 };
 
-#define PRQ_RING_MASK	((0x1000 << PRQ_ORDER) - 0x20)
-
 static bool is_canonical_address(u64 addr)
 {
 	int shift = 64 - (__VIRTUAL_MASK_SHIFT + 1);
@@ -796,7 +789,19 @@ prq_retry:
 		goto prq_retry;
 	}
 
+	/*
+	 * A work in IO page fault workqueue may try to lock pasid_mutex now.
+	 * Holding pasid_mutex while waiting in iopf_queue_flush_dev() for
+	 * all works in the workqueue to finish may cause deadlock.
+	 *
+	 * It's unnecessary to hold pasid_mutex in iopf_queue_flush_dev().
+	 * Unlock it to allow the works to be handled while waiting for
+	 * them to finish.
+	 */
+	lockdep_assert_held(&pasid_mutex);
+	mutex_unlock(&pasid_mutex);
 	iopf_queue_flush_dev(dev);
+	mutex_lock(&pasid_mutex);
 
 	/*
 	 * Perform steps described in VT-d spec CH7.10 to drain page
diff --git a/drivers/iommu/io-pgtable-arm-v7s.c b/drivers/iommu/io-pgtable-arm-v7s.c
index d4004bcf333a..bfb6acb651e5 100644
--- a/drivers/iommu/io-pgtable-arm-v7s.c
+++ b/drivers/iommu/io-pgtable-arm-v7s.c
@@ -519,11 +519,12 @@ static int __arm_v7s_map(struct arm_v7s_io_pgtable *data, unsigned long iova,
 	return __arm_v7s_map(data, iova, paddr, size, prot, lvl + 1, cptep, gfp);
 }
 
-static int arm_v7s_map(struct io_pgtable_ops *ops, unsigned long iova,
-			phys_addr_t paddr, size_t size, int prot, gfp_t gfp)
+static int arm_v7s_map_pages(struct io_pgtable_ops *ops, unsigned long iova,
+			     phys_addr_t paddr, size_t pgsize, size_t pgcount,
+			     int prot, gfp_t gfp, size_t *mapped)
 {
 	struct arm_v7s_io_pgtable *data = io_pgtable_ops_to_data(ops);
-	int ret;
+	int ret = -EINVAL;
 
 	if (WARN_ON(iova >= (1ULL << data->iop.cfg.ias) ||
 		    paddr >= (1ULL << data->iop.cfg.oas)))
@@ -533,7 +534,17 @@ static int arm_v7s_map(struct io_pgtable_ops *ops, unsigned long iova,
 	if (!(prot & (IOMMU_READ | IOMMU_WRITE)))
 		return 0;
 
-	ret = __arm_v7s_map(data, iova, paddr, size, prot, 1, data->pgd, gfp);
+	while (pgcount--) {
+		ret = __arm_v7s_map(data, iova, paddr, pgsize, prot, 1, data->pgd,
+				    gfp);
+		if (ret)
+			break;
+
+		iova += pgsize;
+		paddr += pgsize;
+		if (mapped)
+			*mapped += pgsize;
+	}
 	/*
 	 * Synchronise all PTE updates for the new mapping before there's
 	 * a chance for anything to kick off a table walk for the new iova.
@@ -543,6 +554,12 @@ static int arm_v7s_map(struct io_pgtable_ops *ops, unsigned long iova,
 	return ret;
 }
 
+static int arm_v7s_map(struct io_pgtable_ops *ops, unsigned long iova,
+		       phys_addr_t paddr, size_t size, int prot, gfp_t gfp)
+{
+	return arm_v7s_map_pages(ops, iova, paddr, size, 1, prot, gfp, NULL);
+}
+
 static void arm_v7s_free_pgtable(struct io_pgtable *iop)
 {
 	struct arm_v7s_io_pgtable *data = io_pgtable_to_data(iop);
@@ -683,14 +700,7 @@ static size_t __arm_v7s_unmap(struct arm_v7s_io_pgtable *data,
 						ARM_V7S_BLOCK_SIZE(lvl + 1));
 				ptep = iopte_deref(pte[i], lvl, data);
 				__arm_v7s_free_table(ptep, lvl + 1, data);
-			} else if (iop->cfg.quirks & IO_PGTABLE_QUIRK_NON_STRICT) {
-				/*
-				 * Order the PTE update against queueing the IOVA, to
-				 * guarantee that a flush callback from a different CPU
-				 * has observed it before the TLBIALL can be issued.
-				 */
-				smp_wmb();
-			} else {
+			} else if (!iommu_iotlb_gather_queued(gather)) {
 				io_pgtable_tlb_add_page(iop, gather, iova, blk_size);
 			}
 			iova += blk_size;
@@ -710,15 +720,32 @@ static size_t __arm_v7s_unmap(struct arm_v7s_io_pgtable *data,
 	return __arm_v7s_unmap(data, gather, iova, size, lvl + 1, ptep);
 }
 
-static size_t arm_v7s_unmap(struct io_pgtable_ops *ops, unsigned long iova,
-			    size_t size, struct iommu_iotlb_gather *gather)
+static size_t arm_v7s_unmap_pages(struct io_pgtable_ops *ops, unsigned long iova,
+				  size_t pgsize, size_t pgcount,
+				  struct iommu_iotlb_gather *gather)
 {
 	struct arm_v7s_io_pgtable *data = io_pgtable_ops_to_data(ops);
+	size_t unmapped = 0, ret;
 
 	if (WARN_ON(iova >= (1ULL << data->iop.cfg.ias)))
 		return 0;
 
-	return __arm_v7s_unmap(data, gather, iova, size, 1, data->pgd);
+	while (pgcount--) {
+		ret = __arm_v7s_unmap(data, gather, iova, pgsize, 1, data->pgd);
+		if (!ret)
+			break;
+
+		unmapped += pgsize;
+		iova += pgsize;
+	}
+
+	return unmapped;
+}
+
+static size_t arm_v7s_unmap(struct io_pgtable_ops *ops, unsigned long iova,
+			    size_t size, struct iommu_iotlb_gather *gather)
+{
+	return arm_v7s_unmap_pages(ops, iova, size, 1, gather);
 }
 
 static phys_addr_t arm_v7s_iova_to_phys(struct io_pgtable_ops *ops,
@@ -757,8 +784,7 @@ static struct io_pgtable *arm_v7s_alloc_pgtable(struct io_pgtable_cfg *cfg,
 
 	if (cfg->quirks & ~(IO_PGTABLE_QUIRK_ARM_NS |
 			    IO_PGTABLE_QUIRK_NO_PERMS |
-			    IO_PGTABLE_QUIRK_ARM_MTK_EXT |
-			    IO_PGTABLE_QUIRK_NON_STRICT))
+			    IO_PGTABLE_QUIRK_ARM_MTK_EXT))
 		return NULL;
 
 	/* If ARM_MTK_4GB is enabled, the NO_PERMS is also expected. */
@@ -780,7 +806,9 @@ static struct io_pgtable *arm_v7s_alloc_pgtable(struct io_pgtable_cfg *cfg,
 
 	data->iop.ops = (struct io_pgtable_ops) {
 		.map		= arm_v7s_map,
+		.map_pages	= arm_v7s_map_pages,
 		.unmap		= arm_v7s_unmap,
+		.unmap_pages	= arm_v7s_unmap_pages,
 		.iova_to_phys	= arm_v7s_iova_to_phys,
 	};
 
diff --git a/drivers/iommu/io-pgtable-arm.c b/drivers/iommu/io-pgtable-arm.c
index 87def58e79b5..dd9e47189d0d 100644
--- a/drivers/iommu/io-pgtable-arm.c
+++ b/drivers/iommu/io-pgtable-arm.c
@@ -46,6 +46,9 @@
 #define ARM_LPAE_PGD_SIZE(d)						\
 	(sizeof(arm_lpae_iopte) << (d)->pgd_bits)
 
+#define ARM_LPAE_PTES_PER_TABLE(d)					\
+	(ARM_LPAE_GRANULE(d) >> ilog2(sizeof(arm_lpae_iopte)))
+
 /*
  * Calculate the index at level l used to map virtual address a using the
  * pagetable in d.
@@ -127,6 +130,9 @@
 #define ARM_MALI_LPAE_MEMATTR_IMP_DEF	0x88ULL
 #define ARM_MALI_LPAE_MEMATTR_WRITE_ALLOC 0x8DULL
 
+#define APPLE_DART_PTE_PROT_NO_WRITE (1<<7)
+#define APPLE_DART_PTE_PROT_NO_READ (1<<8)
+
 /* IOPTE accessors */
 #define iopte_deref(pte,d) __va(iopte_to_paddr(pte, d))
 
@@ -232,70 +238,77 @@ static void __arm_lpae_free_pages(void *pages, size_t size,
 	free_pages((unsigned long)pages, get_order(size));
 }
 
-static void __arm_lpae_sync_pte(arm_lpae_iopte *ptep,
+static void __arm_lpae_sync_pte(arm_lpae_iopte *ptep, int num_entries,
 				struct io_pgtable_cfg *cfg)
 {
 	dma_sync_single_for_device(cfg->iommu_dev, __arm_lpae_dma_addr(ptep),
-				   sizeof(*ptep), DMA_TO_DEVICE);
+				   sizeof(*ptep) * num_entries, DMA_TO_DEVICE);
 }
 
-static void __arm_lpae_set_pte(arm_lpae_iopte *ptep, arm_lpae_iopte pte,
-			       struct io_pgtable_cfg *cfg)
+static void __arm_lpae_clear_pte(arm_lpae_iopte *ptep, struct io_pgtable_cfg *cfg)
 {
-	*ptep = pte;
+
+	*ptep = 0;
 
 	if (!cfg->coherent_walk)
-		__arm_lpae_sync_pte(ptep, cfg);
+		__arm_lpae_sync_pte(ptep, 1, cfg);
 }
 
 static size_t __arm_lpae_unmap(struct arm_lpae_io_pgtable *data,
 			       struct iommu_iotlb_gather *gather,
-			       unsigned long iova, size_t size, int lvl,
-			       arm_lpae_iopte *ptep);
+			       unsigned long iova, size_t size, size_t pgcount,
+			       int lvl, arm_lpae_iopte *ptep);
 
 static void __arm_lpae_init_pte(struct arm_lpae_io_pgtable *data,
 				phys_addr_t paddr, arm_lpae_iopte prot,
-				int lvl, arm_lpae_iopte *ptep)
+				int lvl, int num_entries, arm_lpae_iopte *ptep)
 {
 	arm_lpae_iopte pte = prot;
+	struct io_pgtable_cfg *cfg = &data->iop.cfg;
+	size_t sz = ARM_LPAE_BLOCK_SIZE(lvl, data);
+	int i;
 
 	if (data->iop.fmt != ARM_MALI_LPAE && lvl == ARM_LPAE_MAX_LEVELS - 1)
 		pte |= ARM_LPAE_PTE_TYPE_PAGE;
 	else
 		pte |= ARM_LPAE_PTE_TYPE_BLOCK;
 
-	pte |= paddr_to_iopte(paddr, data);
+	for (i = 0; i < num_entries; i++)
+		ptep[i] = pte | paddr_to_iopte(paddr + i * sz, data);
 
-	__arm_lpae_set_pte(ptep, pte, &data->iop.cfg);
+	if (!cfg->coherent_walk)
+		__arm_lpae_sync_pte(ptep, num_entries, cfg);
 }
 
 static int arm_lpae_init_pte(struct arm_lpae_io_pgtable *data,
 			     unsigned long iova, phys_addr_t paddr,
-			     arm_lpae_iopte prot, int lvl,
+			     arm_lpae_iopte prot, int lvl, int num_entries,
 			     arm_lpae_iopte *ptep)
 {
-	arm_lpae_iopte pte = *ptep;
-
-	if (iopte_leaf(pte, lvl, data->iop.fmt)) {
-		/* We require an unmap first */
-		WARN_ON(!selftest_running);
-		return -EEXIST;
-	} else if (iopte_type(pte) == ARM_LPAE_PTE_TYPE_TABLE) {
-		/*
-		 * We need to unmap and free the old table before
-		 * overwriting it with a block entry.
-		 */
-		arm_lpae_iopte *tblp;
-		size_t sz = ARM_LPAE_BLOCK_SIZE(lvl, data);
-
-		tblp = ptep - ARM_LPAE_LVL_IDX(iova, lvl, data);
-		if (__arm_lpae_unmap(data, NULL, iova, sz, lvl, tblp) != sz) {
-			WARN_ON(1);
-			return -EINVAL;
+	int i;
+
+	for (i = 0; i < num_entries; i++)
+		if (iopte_leaf(ptep[i], lvl, data->iop.fmt)) {
+			/* We require an unmap first */
+			WARN_ON(!selftest_running);
+			return -EEXIST;
+		} else if (iopte_type(ptep[i]) == ARM_LPAE_PTE_TYPE_TABLE) {
+			/*
+			 * We need to unmap and free the old table before
+			 * overwriting it with a block entry.
+			 */
+			arm_lpae_iopte *tblp;
+			size_t sz = ARM_LPAE_BLOCK_SIZE(lvl, data);
+
+			tblp = ptep - ARM_LPAE_LVL_IDX(iova, lvl, data);
+			if (__arm_lpae_unmap(data, NULL, iova + i * sz, sz, 1,
+					     lvl, tblp) != sz) {
+				WARN_ON(1);
+				return -EINVAL;
+			}
 		}
-	}
 
-	__arm_lpae_init_pte(data, paddr, prot, lvl, ptep);
+	__arm_lpae_init_pte(data, paddr, prot, lvl, num_entries, ptep);
 	return 0;
 }
 
@@ -323,7 +336,7 @@ static arm_lpae_iopte arm_lpae_install_table(arm_lpae_iopte *table,
 		return old;
 
 	/* Even if it's not ours, there's no point waiting; just kick it */
-	__arm_lpae_sync_pte(ptep, cfg);
+	__arm_lpae_sync_pte(ptep, 1, cfg);
 	if (old == curr)
 		WRITE_ONCE(*ptep, new | ARM_LPAE_PTE_SW_SYNC);
 
@@ -331,20 +344,30 @@ static arm_lpae_iopte arm_lpae_install_table(arm_lpae_iopte *table,
 }
 
 static int __arm_lpae_map(struct arm_lpae_io_pgtable *data, unsigned long iova,
-			  phys_addr_t paddr, size_t size, arm_lpae_iopte prot,
-			  int lvl, arm_lpae_iopte *ptep, gfp_t gfp)
+			  phys_addr_t paddr, size_t size, size_t pgcount,
+			  arm_lpae_iopte prot, int lvl, arm_lpae_iopte *ptep,
+			  gfp_t gfp, size_t *mapped)
 {
 	arm_lpae_iopte *cptep, pte;
 	size_t block_size = ARM_LPAE_BLOCK_SIZE(lvl, data);
 	size_t tblsz = ARM_LPAE_GRANULE(data);
 	struct io_pgtable_cfg *cfg = &data->iop.cfg;
+	int ret = 0, num_entries, max_entries, map_idx_start;
 
 	/* Find our entry at the current level */
-	ptep += ARM_LPAE_LVL_IDX(iova, lvl, data);
+	map_idx_start = ARM_LPAE_LVL_IDX(iova, lvl, data);
+	ptep += map_idx_start;
 
 	/* If we can install a leaf entry at this level, then do so */
-	if (size == block_size)
-		return arm_lpae_init_pte(data, iova, paddr, prot, lvl, ptep);
+	if (size == block_size) {
+		max_entries = ARM_LPAE_PTES_PER_TABLE(data) - map_idx_start;
+		num_entries = min_t(int, pgcount, max_entries);
+		ret = arm_lpae_init_pte(data, iova, paddr, prot, lvl, num_entries, ptep);
+		if (!ret && mapped)
+			*mapped += num_entries * size;
+
+		return ret;
+	}
 
 	/* We can't allocate tables at the final level */
 	if (WARN_ON(lvl >= ARM_LPAE_MAX_LEVELS - 1))
@@ -361,7 +384,7 @@ static int __arm_lpae_map(struct arm_lpae_io_pgtable *data, unsigned long iova,
 		if (pte)
 			__arm_lpae_free_pages(cptep, tblsz, cfg);
 	} else if (!cfg->coherent_walk && !(pte & ARM_LPAE_PTE_SW_SYNC)) {
-		__arm_lpae_sync_pte(ptep, cfg);
+		__arm_lpae_sync_pte(ptep, 1, cfg);
 	}
 
 	if (pte && !iopte_leaf(pte, lvl, data->iop.fmt)) {
@@ -373,7 +396,8 @@ static int __arm_lpae_map(struct arm_lpae_io_pgtable *data, unsigned long iova,
 	}
 
 	/* Rinse, repeat */
-	return __arm_lpae_map(data, iova, paddr, size, prot, lvl + 1, cptep, gfp);
+	return __arm_lpae_map(data, iova, paddr, size, pgcount, prot, lvl + 1,
+			      cptep, gfp, mapped);
 }
 
 static arm_lpae_iopte arm_lpae_prot_to_pte(struct arm_lpae_io_pgtable *data,
@@ -381,6 +405,15 @@ static arm_lpae_iopte arm_lpae_prot_to_pte(struct arm_lpae_io_pgtable *data,
 {
 	arm_lpae_iopte pte;
 
+	if (data->iop.fmt == APPLE_DART) {
+		pte = 0;
+		if (!(prot & IOMMU_WRITE))
+			pte |= APPLE_DART_PTE_PROT_NO_WRITE;
+		if (!(prot & IOMMU_READ))
+			pte |= APPLE_DART_PTE_PROT_NO_READ;
+		return pte;
+	}
+
 	if (data->iop.fmt == ARM_64_LPAE_S1 ||
 	    data->iop.fmt == ARM_32_LPAE_S1) {
 		pte = ARM_LPAE_PTE_nG;
@@ -440,8 +473,9 @@ static arm_lpae_iopte arm_lpae_prot_to_pte(struct arm_lpae_io_pgtable *data,
 	return pte;
 }
 
-static int arm_lpae_map(struct io_pgtable_ops *ops, unsigned long iova,
-			phys_addr_t paddr, size_t size, int iommu_prot, gfp_t gfp)
+static int arm_lpae_map_pages(struct io_pgtable_ops *ops, unsigned long iova,
+			      phys_addr_t paddr, size_t pgsize, size_t pgcount,
+			      int iommu_prot, gfp_t gfp, size_t *mapped)
 {
 	struct arm_lpae_io_pgtable *data = io_pgtable_ops_to_data(ops);
 	struct io_pgtable_cfg *cfg = &data->iop.cfg;
@@ -450,7 +484,7 @@ static int arm_lpae_map(struct io_pgtable_ops *ops, unsigned long iova,
 	arm_lpae_iopte prot;
 	long iaext = (s64)iova >> cfg->ias;
 
-	if (WARN_ON(!size || (size & cfg->pgsize_bitmap) != size))
+	if (WARN_ON(!pgsize || (pgsize & cfg->pgsize_bitmap) != pgsize))
 		return -EINVAL;
 
 	if (cfg->quirks & IO_PGTABLE_QUIRK_ARM_TTBR1)
@@ -463,7 +497,8 @@ static int arm_lpae_map(struct io_pgtable_ops *ops, unsigned long iova,
 		return 0;
 
 	prot = arm_lpae_prot_to_pte(data, iommu_prot);
-	ret = __arm_lpae_map(data, iova, paddr, size, prot, lvl, ptep, gfp);
+	ret = __arm_lpae_map(data, iova, paddr, pgsize, pgcount, prot, lvl,
+			     ptep, gfp, mapped);
 	/*
 	 * Synchronise all PTE updates for the new mapping before there's
 	 * a chance for anything to kick off a table walk for the new iova.
@@ -473,6 +508,13 @@ static int arm_lpae_map(struct io_pgtable_ops *ops, unsigned long iova,
 	return ret;
 }
 
+static int arm_lpae_map(struct io_pgtable_ops *ops, unsigned long iova,
+			phys_addr_t paddr, size_t size, int iommu_prot, gfp_t gfp)
+{
+	return arm_lpae_map_pages(ops, iova, paddr, size, 1, iommu_prot, gfp,
+				  NULL);
+}
+
 static void __arm_lpae_free_pgtable(struct arm_lpae_io_pgtable *data, int lvl,
 				    arm_lpae_iopte *ptep)
 {
@@ -516,14 +558,15 @@ static size_t arm_lpae_split_blk_unmap(struct arm_lpae_io_pgtable *data,
 				       struct iommu_iotlb_gather *gather,
 				       unsigned long iova, size_t size,
 				       arm_lpae_iopte blk_pte, int lvl,
-				       arm_lpae_iopte *ptep)
+				       arm_lpae_iopte *ptep, size_t pgcount)
 {
 	struct io_pgtable_cfg *cfg = &data->iop.cfg;
 	arm_lpae_iopte pte, *tablep;
 	phys_addr_t blk_paddr;
 	size_t tablesz = ARM_LPAE_GRANULE(data);
 	size_t split_sz = ARM_LPAE_BLOCK_SIZE(lvl, data);
-	int i, unmap_idx = -1;
+	int ptes_per_table = ARM_LPAE_PTES_PER_TABLE(data);
+	int i, unmap_idx_start = -1, num_entries = 0, max_entries;
 
 	if (WARN_ON(lvl == ARM_LPAE_MAX_LEVELS))
 		return 0;
@@ -532,18 +575,21 @@ static size_t arm_lpae_split_blk_unmap(struct arm_lpae_io_pgtable *data,
 	if (!tablep)
 		return 0; /* Bytes unmapped */
 
-	if (size == split_sz)
-		unmap_idx = ARM_LPAE_LVL_IDX(iova, lvl, data);
+	if (size == split_sz) {
+		unmap_idx_start = ARM_LPAE_LVL_IDX(iova, lvl, data);
+		max_entries = ptes_per_table - unmap_idx_start;
+		num_entries = min_t(int, pgcount, max_entries);
+	}
 
 	blk_paddr = iopte_to_paddr(blk_pte, data);
 	pte = iopte_prot(blk_pte);
 
-	for (i = 0; i < tablesz / sizeof(pte); i++, blk_paddr += split_sz) {
+	for (i = 0; i < ptes_per_table; i++, blk_paddr += split_sz) {
 		/* Unmap! */
-		if (i == unmap_idx)
+		if (i >= unmap_idx_start && i < (unmap_idx_start + num_entries))
 			continue;
 
-		__arm_lpae_init_pte(data, blk_paddr, pte, lvl, &tablep[i]);
+		__arm_lpae_init_pte(data, blk_paddr, pte, lvl, 1, &tablep[i]);
 	}
 
 	pte = arm_lpae_install_table(tablep, ptep, blk_pte, cfg);
@@ -558,76 +604,85 @@ static size_t arm_lpae_split_blk_unmap(struct arm_lpae_io_pgtable *data,
 			return 0;
 
 		tablep = iopte_deref(pte, data);
-	} else if (unmap_idx >= 0) {
-		io_pgtable_tlb_add_page(&data->iop, gather, iova, size);
-		return size;
+	} else if (unmap_idx_start >= 0) {
+		for (i = 0; i < num_entries; i++)
+			io_pgtable_tlb_add_page(&data->iop, gather, iova + i * size, size);
+
+		return num_entries * size;
 	}
 
-	return __arm_lpae_unmap(data, gather, iova, size, lvl, tablep);
+	return __arm_lpae_unmap(data, gather, iova, size, pgcount, lvl, tablep);
 }
 
 static size_t __arm_lpae_unmap(struct arm_lpae_io_pgtable *data,
 			       struct iommu_iotlb_gather *gather,
-			       unsigned long iova, size_t size, int lvl,
-			       arm_lpae_iopte *ptep)
+			       unsigned long iova, size_t size, size_t pgcount,
+			       int lvl, arm_lpae_iopte *ptep)
 {
 	arm_lpae_iopte pte;
 	struct io_pgtable *iop = &data->iop;
+	int i = 0, num_entries, max_entries, unmap_idx_start;
 
 	/* Something went horribly wrong and we ran out of page table */
 	if (WARN_ON(lvl == ARM_LPAE_MAX_LEVELS))
 		return 0;
 
-	ptep += ARM_LPAE_LVL_IDX(iova, lvl, data);
+	unmap_idx_start = ARM_LPAE_LVL_IDX(iova, lvl, data);
+	ptep += unmap_idx_start;
 	pte = READ_ONCE(*ptep);
 	if (WARN_ON(!pte))
 		return 0;
 
 	/* If the size matches this level, we're in the right place */
 	if (size == ARM_LPAE_BLOCK_SIZE(lvl, data)) {
-		__arm_lpae_set_pte(ptep, 0, &iop->cfg);
-
-		if (!iopte_leaf(pte, lvl, iop->fmt)) {
-			/* Also flush any partial walks */
-			io_pgtable_tlb_flush_walk(iop, iova, size,
-						  ARM_LPAE_GRANULE(data));
-			ptep = iopte_deref(pte, data);
-			__arm_lpae_free_pgtable(data, lvl + 1, ptep);
-		} else if (iop->cfg.quirks & IO_PGTABLE_QUIRK_NON_STRICT) {
-			/*
-			 * Order the PTE update against queueing the IOVA, to
-			 * guarantee that a flush callback from a different CPU
-			 * has observed it before the TLBIALL can be issued.
-			 */
-			smp_wmb();
-		} else {
-			io_pgtable_tlb_add_page(iop, gather, iova, size);
+		max_entries = ARM_LPAE_PTES_PER_TABLE(data) - unmap_idx_start;
+		num_entries = min_t(int, pgcount, max_entries);
+
+		while (i < num_entries) {
+			pte = READ_ONCE(*ptep);
+			if (WARN_ON(!pte))
+				break;
+
+			__arm_lpae_clear_pte(ptep, &iop->cfg);
+
+			if (!iopte_leaf(pte, lvl, iop->fmt)) {
+				/* Also flush any partial walks */
+				io_pgtable_tlb_flush_walk(iop, iova + i * size, size,
+							  ARM_LPAE_GRANULE(data));
+				__arm_lpae_free_pgtable(data, lvl + 1, iopte_deref(pte, data));
+			} else if (!iommu_iotlb_gather_queued(gather)) {
+				io_pgtable_tlb_add_page(iop, gather, iova + i * size, size);
+			}
+
+			ptep++;
+			i++;
 		}
 
-		return size;
+		return i * size;
 	} else if (iopte_leaf(pte, lvl, iop->fmt)) {
 		/*
 		 * Insert a table at the next level to map the old region,
 		 * minus the part we want to unmap
 		 */
 		return arm_lpae_split_blk_unmap(data, gather, iova, size, pte,
-						lvl + 1, ptep);
+						lvl + 1, ptep, pgcount);
 	}
 
 	/* Keep on walkin' */
 	ptep = iopte_deref(pte, data);
-	return __arm_lpae_unmap(data, gather, iova, size, lvl + 1, ptep);
+	return __arm_lpae_unmap(data, gather, iova, size, pgcount, lvl + 1, ptep);
 }
 
-static size_t arm_lpae_unmap(struct io_pgtable_ops *ops, unsigned long iova,
-			     size_t size, struct iommu_iotlb_gather *gather)
+static size_t arm_lpae_unmap_pages(struct io_pgtable_ops *ops, unsigned long iova,
+				   size_t pgsize, size_t pgcount,
+				   struct iommu_iotlb_gather *gather)
 {
 	struct arm_lpae_io_pgtable *data = io_pgtable_ops_to_data(ops);
 	struct io_pgtable_cfg *cfg = &data->iop.cfg;
 	arm_lpae_iopte *ptep = data->pgd;
 	long iaext = (s64)iova >> cfg->ias;
 
-	if (WARN_ON(!size || (size & cfg->pgsize_bitmap) != size))
+	if (WARN_ON(!pgsize || (pgsize & cfg->pgsize_bitmap) != pgsize || !pgcount))
 		return 0;
 
 	if (cfg->quirks & IO_PGTABLE_QUIRK_ARM_TTBR1)
@@ -635,7 +690,14 @@ static size_t arm_lpae_unmap(struct io_pgtable_ops *ops, unsigned long iova,
 	if (WARN_ON(iaext))
 		return 0;
 
-	return __arm_lpae_unmap(data, gather, iova, size, data->start_level, ptep);
+	return __arm_lpae_unmap(data, gather, iova, pgsize, pgcount,
+				data->start_level, ptep);
+}
+
+static size_t arm_lpae_unmap(struct io_pgtable_ops *ops, unsigned long iova,
+			     size_t size, struct iommu_iotlb_gather *gather)
+{
+	return arm_lpae_unmap_pages(ops, iova, size, 1, gather);
 }
 
 static phys_addr_t arm_lpae_iova_to_phys(struct io_pgtable_ops *ops,
@@ -750,7 +812,9 @@ arm_lpae_alloc_pgtable(struct io_pgtable_cfg *cfg)
 
 	data->iop.ops = (struct io_pgtable_ops) {
 		.map		= arm_lpae_map,
+		.map_pages	= arm_lpae_map_pages,
 		.unmap		= arm_lpae_unmap,
+		.unmap_pages	= arm_lpae_unmap_pages,
 		.iova_to_phys	= arm_lpae_iova_to_phys,
 	};
 
@@ -766,7 +830,6 @@ arm_64_lpae_alloc_pgtable_s1(struct io_pgtable_cfg *cfg, void *cookie)
 	bool tg1;
 
 	if (cfg->quirks & ~(IO_PGTABLE_QUIRK_ARM_NS |
-			    IO_PGTABLE_QUIRK_NON_STRICT |
 			    IO_PGTABLE_QUIRK_ARM_TTBR1 |
 			    IO_PGTABLE_QUIRK_ARM_OUTER_WBWA))
 		return NULL;
@@ -870,7 +933,7 @@ arm_64_lpae_alloc_pgtable_s2(struct io_pgtable_cfg *cfg, void *cookie)
 	typeof(&cfg->arm_lpae_s2_cfg.vtcr) vtcr = &cfg->arm_lpae_s2_cfg.vtcr;
 
 	/* The NS quirk doesn't apply at stage 2 */
-	if (cfg->quirks & ~(IO_PGTABLE_QUIRK_NON_STRICT))
+	if (cfg->quirks)
 		return NULL;
 
 	data = arm_lpae_alloc_pgtable(cfg);
@@ -1043,6 +1106,52 @@ out_free_data:
 	return NULL;
 }
 
+static struct io_pgtable *
+apple_dart_alloc_pgtable(struct io_pgtable_cfg *cfg, void *cookie)
+{
+	struct arm_lpae_io_pgtable *data;
+	int i;
+
+	if (cfg->oas > 36)
+		return NULL;
+
+	data = arm_lpae_alloc_pgtable(cfg);
+	if (!data)
+		return NULL;
+
+	/*
+	 * The table format itself always uses two levels, but the total VA
+	 * space is mapped by four separate tables, making the MMIO registers
+	 * an effective "level 1". For simplicity, though, we treat this
+	 * equivalently to LPAE stage 2 concatenation at level 2, with the
+	 * additional TTBRs each just pointing at consecutive pages.
+	 */
+	if (data->start_level < 1)
+		goto out_free_data;
+	if (data->start_level == 1 && data->pgd_bits > 2)
+		goto out_free_data;
+	if (data->start_level > 1)
+		data->pgd_bits = 0;
+	data->start_level = 2;
+	cfg->apple_dart_cfg.n_ttbrs = 1 << data->pgd_bits;
+	data->pgd_bits += data->bits_per_level;
+
+	data->pgd = __arm_lpae_alloc_pages(ARM_LPAE_PGD_SIZE(data), GFP_KERNEL,
+					   cfg);
+	if (!data->pgd)
+		goto out_free_data;
+
+	for (i = 0; i < cfg->apple_dart_cfg.n_ttbrs; ++i)
+		cfg->apple_dart_cfg.ttbr[i] =
+			virt_to_phys(data->pgd + i * ARM_LPAE_GRANULE(data));
+
+	return &data->iop;
+
+out_free_data:
+	kfree(data);
+	return NULL;
+}
+
 struct io_pgtable_init_fns io_pgtable_arm_64_lpae_s1_init_fns = {
 	.alloc	= arm_64_lpae_alloc_pgtable_s1,
 	.free	= arm_lpae_free_pgtable,
@@ -1068,6 +1177,11 @@ struct io_pgtable_init_fns io_pgtable_arm_mali_lpae_init_fns = {
 	.free	= arm_lpae_free_pgtable,
 };
 
+struct io_pgtable_init_fns io_pgtable_apple_dart_init_fns = {
+	.alloc	= apple_dart_alloc_pgtable,
+	.free	= arm_lpae_free_pgtable,
+};
+
 #ifdef CONFIG_IOMMU_IO_PGTABLE_LPAE_SELFTEST
 
 static struct io_pgtable_cfg *cfg_cookie __initdata;
diff --git a/drivers/iommu/io-pgtable.c b/drivers/iommu/io-pgtable.c
index 6e9917ce980f..f4bfcef98297 100644
--- a/drivers/iommu/io-pgtable.c
+++ b/drivers/iommu/io-pgtable.c
@@ -20,6 +20,7 @@ io_pgtable_init_table[IO_PGTABLE_NUM_FMTS] = {
 	[ARM_64_LPAE_S1] = &io_pgtable_arm_64_lpae_s1_init_fns,
 	[ARM_64_LPAE_S2] = &io_pgtable_arm_64_lpae_s2_init_fns,
 	[ARM_MALI_LPAE] = &io_pgtable_arm_mali_lpae_init_fns,
+	[APPLE_DART] = &io_pgtable_apple_dart_init_fns,
 #endif
 #ifdef CONFIG_IOMMU_IO_PGTABLE_ARMV7S
 	[ARM_V7S] = &io_pgtable_arm_v7s_init_fns,
diff --git a/drivers/iommu/iommu.c b/drivers/iommu/iommu.c
index 63f0af10c403..3303d707bab4 100644
--- a/drivers/iommu/iommu.c
+++ b/drivers/iommu/iommu.c
@@ -7,7 +7,9 @@
 #define pr_fmt(fmt)    "iommu: " fmt
 
 #include <linux/device.h>
+#include <linux/dma-iommu.h>
 #include <linux/kernel.h>
+#include <linux/bits.h>
 #include <linux/bug.h>
 #include <linux/types.h>
 #include <linux/init.h>
@@ -29,7 +31,7 @@ static struct kset *iommu_group_kset;
 static DEFINE_IDA(iommu_group_ida);
 
 static unsigned int iommu_def_domain_type __read_mostly;
-static bool iommu_dma_strict __read_mostly = true;
+static bool iommu_dma_strict __read_mostly = IS_ENABLED(CONFIG_IOMMU_DEFAULT_DMA_STRICT);
 static u32 iommu_cmd_line __read_mostly;
 
 struct iommu_group {
@@ -113,6 +115,7 @@ static const char *iommu_domain_type_str(unsigned int t)
 	case IOMMU_DOMAIN_UNMANAGED:
 		return "Unmanaged";
 	case IOMMU_DOMAIN_DMA:
+	case IOMMU_DOMAIN_DMA_FQ:
 		return "Translated";
 	default:
 		return "Unknown";
@@ -133,11 +136,20 @@ static int __init iommu_subsys_init(void)
 		}
 	}
 
+	if (!iommu_default_passthrough() && !iommu_dma_strict)
+		iommu_def_domain_type = IOMMU_DOMAIN_DMA_FQ;
+
 	pr_info("Default domain type: %s %s\n",
 		iommu_domain_type_str(iommu_def_domain_type),
 		(iommu_cmd_line & IOMMU_CMD_LINE_DMA_API) ?
 			"(set via kernel command line)" : "");
 
+	if (!iommu_default_passthrough())
+		pr_info("DMA domain TLB invalidation policy: %s mode %s\n",
+			iommu_dma_strict ? "strict" : "lazy",
+			(iommu_cmd_line & IOMMU_CMD_LINE_STRICT) ?
+				"(set via kernel command line)" : "");
+
 	return 0;
 }
 subsys_initcall(iommu_subsys_init);
@@ -273,7 +285,9 @@ int iommu_probe_device(struct device *dev)
 	 * support default domains, so the return value is not yet
 	 * checked.
 	 */
+	mutex_lock(&group->mutex);
 	iommu_alloc_default_domain(group, dev);
+	mutex_unlock(&group->mutex);
 
 	if (group->default_domain) {
 		ret = __iommu_attach_device(group->default_domain, dev);
@@ -344,20 +358,12 @@ static int __init iommu_dma_setup(char *str)
 }
 early_param("iommu.strict", iommu_dma_setup);
 
-void iommu_set_dma_strict(bool strict)
-{
-	if (strict || !(iommu_cmd_line & IOMMU_CMD_LINE_STRICT))
-		iommu_dma_strict = strict;
-}
-
-bool iommu_get_dma_strict(struct iommu_domain *domain)
+void iommu_set_dma_strict(void)
 {
-	/* only allow lazy flushing for DMA domains */
-	if (domain->type == IOMMU_DOMAIN_DMA)
-		return iommu_dma_strict;
-	return true;
+	iommu_dma_strict = true;
+	if (iommu_def_domain_type == IOMMU_DOMAIN_DMA_FQ)
+		iommu_def_domain_type = IOMMU_DOMAIN_DMA;
 }
-EXPORT_SYMBOL_GPL(iommu_get_dma_strict);
 
 static ssize_t iommu_group_attr_show(struct kobject *kobj,
 				     struct attribute *__attr, char *buf)
@@ -546,6 +552,9 @@ static ssize_t iommu_group_show_type(struct iommu_group *group,
 		case IOMMU_DOMAIN_DMA:
 			type = "DMA\n";
 			break;
+		case IOMMU_DOMAIN_DMA_FQ:
+			type = "DMA-FQ\n";
+			break;
 		}
 	}
 	mutex_unlock(&group->mutex);
@@ -759,7 +768,7 @@ static int iommu_create_device_direct_mappings(struct iommu_group *group,
 	unsigned long pg_size;
 	int ret = 0;
 
-	if (!domain || domain->type != IOMMU_DOMAIN_DMA)
+	if (!domain || !iommu_is_dma_domain(domain))
 		return 0;
 
 	BUG_ON(!domain->pgsize_bitmap);
@@ -1944,6 +1953,11 @@ static struct iommu_domain *__iommu_domain_alloc(struct bus_type *bus,
 	/* Assume all sizes by default; the driver may override this later */
 	domain->pgsize_bitmap  = bus->iommu_ops->pgsize_bitmap;
 
+	/* Temporarily avoid -EEXIST while drivers still get their own cookies */
+	if (iommu_is_dma_domain(domain) && !domain->iova_cookie && iommu_get_dma_cookie(domain)) {
+		iommu_domain_free(domain);
+		domain = NULL;
+	}
 	return domain;
 }
 
@@ -1955,6 +1969,7 @@ EXPORT_SYMBOL_GPL(iommu_domain_alloc);
 
 void iommu_domain_free(struct iommu_domain *domain)
 {
+	iommu_put_dma_cookie(domain);
 	domain->ops->domain_free(domain);
 }
 EXPORT_SYMBOL_GPL(iommu_domain_free);
@@ -2370,45 +2385,94 @@ EXPORT_SYMBOL_GPL(iommu_detach_group);
 
 phys_addr_t iommu_iova_to_phys(struct iommu_domain *domain, dma_addr_t iova)
 {
-	if (unlikely(domain->ops->iova_to_phys == NULL))
+	if (domain->type == IOMMU_DOMAIN_IDENTITY)
+		return iova;
+
+	if (domain->type == IOMMU_DOMAIN_BLOCKED)
 		return 0;
 
 	return domain->ops->iova_to_phys(domain, iova);
 }
 EXPORT_SYMBOL_GPL(iommu_iova_to_phys);
 
-static size_t iommu_pgsize(struct iommu_domain *domain,
-			   unsigned long addr_merge, size_t size)
+static size_t iommu_pgsize(struct iommu_domain *domain, unsigned long iova,
+			   phys_addr_t paddr, size_t size, size_t *count)
 {
-	unsigned int pgsize_idx;
-	size_t pgsize;
+	unsigned int pgsize_idx, pgsize_idx_next;
+	unsigned long pgsizes;
+	size_t offset, pgsize, pgsize_next;
+	unsigned long addr_merge = paddr | iova;
 
-	/* Max page size that still fits into 'size' */
-	pgsize_idx = __fls(size);
+	/* Page sizes supported by the hardware and small enough for @size */
+	pgsizes = domain->pgsize_bitmap & GENMASK(__fls(size), 0);
 
-	/* need to consider alignment requirements ? */
-	if (likely(addr_merge)) {
-		/* Max page size allowed by address */
-		unsigned int align_pgsize_idx = __ffs(addr_merge);
-		pgsize_idx = min(pgsize_idx, align_pgsize_idx);
-	}
+	/* Constrain the page sizes further based on the maximum alignment */
+	if (likely(addr_merge))
+		pgsizes &= GENMASK(__ffs(addr_merge), 0);
 
-	/* build a mask of acceptable page sizes */
-	pgsize = (1UL << (pgsize_idx + 1)) - 1;
+	/* Make sure we have at least one suitable page size */
+	BUG_ON(!pgsizes);
 
-	/* throw away page sizes not supported by the hardware */
-	pgsize &= domain->pgsize_bitmap;
+	/* Pick the biggest page size remaining */
+	pgsize_idx = __fls(pgsizes);
+	pgsize = BIT(pgsize_idx);
+	if (!count)
+		return pgsize;
 
-	/* make sure we're still sane */
-	BUG_ON(!pgsize);
+	/* Find the next biggest support page size, if it exists */
+	pgsizes = domain->pgsize_bitmap & ~GENMASK(pgsize_idx, 0);
+	if (!pgsizes)
+		goto out_set_count;
 
-	/* pick the biggest page */
-	pgsize_idx = __fls(pgsize);
-	pgsize = 1UL << pgsize_idx;
+	pgsize_idx_next = __ffs(pgsizes);
+	pgsize_next = BIT(pgsize_idx_next);
 
+	/*
+	 * There's no point trying a bigger page size unless the virtual
+	 * and physical addresses are similarly offset within the larger page.
+	 */
+	if ((iova ^ paddr) & (pgsize_next - 1))
+		goto out_set_count;
+
+	/* Calculate the offset to the next page size alignment boundary */
+	offset = pgsize_next - (addr_merge & (pgsize_next - 1));
+
+	/*
+	 * If size is big enough to accommodate the larger page, reduce
+	 * the number of smaller pages.
+	 */
+	if (offset + pgsize_next <= size)
+		size = offset;
+
+out_set_count:
+	*count = size >> pgsize_idx;
 	return pgsize;
 }
 
+static int __iommu_map_pages(struct iommu_domain *domain, unsigned long iova,
+			     phys_addr_t paddr, size_t size, int prot,
+			     gfp_t gfp, size_t *mapped)
+{
+	const struct iommu_ops *ops = domain->ops;
+	size_t pgsize, count;
+	int ret;
+
+	pgsize = iommu_pgsize(domain, iova, paddr, size, &count);
+
+	pr_debug("mapping: iova 0x%lx pa %pa pgsize 0x%zx count %zu\n",
+		 iova, &paddr, pgsize, count);
+
+	if (ops->map_pages) {
+		ret = ops->map_pages(domain, iova, paddr, pgsize, count, prot,
+				     gfp, mapped);
+	} else {
+		ret = ops->map(domain, iova, paddr, pgsize, prot, gfp);
+		*mapped = ret ? 0 : pgsize;
+	}
+
+	return ret;
+}
+
 static int __iommu_map(struct iommu_domain *domain, unsigned long iova,
 		       phys_addr_t paddr, size_t size, int prot, gfp_t gfp)
 {
@@ -2419,7 +2483,7 @@ static int __iommu_map(struct iommu_domain *domain, unsigned long iova,
 	phys_addr_t orig_paddr = paddr;
 	int ret = 0;
 
-	if (unlikely(ops->map == NULL ||
+	if (unlikely(!(ops->map || ops->map_pages) ||
 		     domain->pgsize_bitmap == 0UL))
 		return -ENODEV;
 
@@ -2443,18 +2507,21 @@ static int __iommu_map(struct iommu_domain *domain, unsigned long iova,
 	pr_debug("map: iova 0x%lx pa %pa size 0x%zx\n", iova, &paddr, size);
 
 	while (size) {
-		size_t pgsize = iommu_pgsize(domain, iova | paddr, size);
+		size_t mapped = 0;
 
-		pr_debug("mapping: iova 0x%lx pa %pa pgsize 0x%zx\n",
-			 iova, &paddr, pgsize);
-		ret = ops->map(domain, iova, paddr, pgsize, prot, gfp);
+		ret = __iommu_map_pages(domain, iova, paddr, size, prot, gfp,
+					&mapped);
+		/*
+		 * Some pages may have been mapped, even if an error occurred,
+		 * so we should account for those so they can be unmapped.
+		 */
+		size -= mapped;
 
 		if (ret)
 			break;
 
-		iova += pgsize;
-		paddr += pgsize;
-		size -= pgsize;
+		iova += mapped;
+		paddr += mapped;
 	}
 
 	/* unroll mapping in case something went wrong */
@@ -2494,6 +2561,19 @@ int iommu_map_atomic(struct iommu_domain *domain, unsigned long iova,
 }
 EXPORT_SYMBOL_GPL(iommu_map_atomic);
 
+static size_t __iommu_unmap_pages(struct iommu_domain *domain,
+				  unsigned long iova, size_t size,
+				  struct iommu_iotlb_gather *iotlb_gather)
+{
+	const struct iommu_ops *ops = domain->ops;
+	size_t pgsize, count;
+
+	pgsize = iommu_pgsize(domain, iova, iova, size, &count);
+	return ops->unmap_pages ?
+	       ops->unmap_pages(domain, iova, pgsize, count, iotlb_gather) :
+	       ops->unmap(domain, iova, pgsize, iotlb_gather);
+}
+
 static size_t __iommu_unmap(struct iommu_domain *domain,
 			    unsigned long iova, size_t size,
 			    struct iommu_iotlb_gather *iotlb_gather)
@@ -2503,7 +2583,7 @@ static size_t __iommu_unmap(struct iommu_domain *domain,
 	unsigned long orig_iova = iova;
 	unsigned int min_pagesz;
 
-	if (unlikely(ops->unmap == NULL ||
+	if (unlikely(!(ops->unmap || ops->unmap_pages) ||
 		     domain->pgsize_bitmap == 0UL))
 		return 0;
 
@@ -2531,9 +2611,9 @@ static size_t __iommu_unmap(struct iommu_domain *domain,
 	 * or we hit an area that isn't mapped.
 	 */
 	while (unmapped < size) {
-		size_t pgsize = iommu_pgsize(domain, iova, size - unmapped);
-
-		unmapped_page = ops->unmap(domain, iova, pgsize, iotlb_gather);
+		unmapped_page = __iommu_unmap_pages(domain, iova,
+						    size - unmapped,
+						    iotlb_gather);
 		if (!unmapped_page)
 			break;
 
@@ -2570,9 +2650,9 @@ size_t iommu_unmap_fast(struct iommu_domain *domain,
 }
 EXPORT_SYMBOL_GPL(iommu_unmap_fast);
 
-static size_t __iommu_map_sg(struct iommu_domain *domain, unsigned long iova,
-			     struct scatterlist *sg, unsigned int nents, int prot,
-			     gfp_t gfp)
+static ssize_t __iommu_map_sg(struct iommu_domain *domain, unsigned long iova,
+		struct scatterlist *sg, unsigned int nents, int prot,
+		gfp_t gfp)
 {
 	const struct iommu_ops *ops = domain->ops;
 	size_t len = 0, mapped = 0;
@@ -2613,19 +2693,18 @@ out_err:
 	/* undo mappings already done */
 	iommu_unmap(domain, iova, mapped);
 
-	return 0;
-
+	return ret;
 }
 
-size_t iommu_map_sg(struct iommu_domain *domain, unsigned long iova,
-		    struct scatterlist *sg, unsigned int nents, int prot)
+ssize_t iommu_map_sg(struct iommu_domain *domain, unsigned long iova,
+		     struct scatterlist *sg, unsigned int nents, int prot)
 {
 	might_sleep();
 	return __iommu_map_sg(domain, iova, sg, nents, prot, GFP_KERNEL);
 }
 EXPORT_SYMBOL_GPL(iommu_map_sg);
 
-size_t iommu_map_sg_atomic(struct iommu_domain *domain, unsigned long iova,
+ssize_t iommu_map_sg_atomic(struct iommu_domain *domain, unsigned long iova,
 		    struct scatterlist *sg, unsigned int nents, int prot)
 {
 	return __iommu_map_sg(domain, iova, sg, nents, prot, GFP_ATOMIC);
@@ -3129,6 +3208,14 @@ static int iommu_change_dev_def_domain(struct iommu_group *group,
 		goto out;
 	}
 
+	/* We can bring up a flush queue without tearing down the domain */
+	if (type == IOMMU_DOMAIN_DMA_FQ && prev_dom->type == IOMMU_DOMAIN_DMA) {
+		ret = iommu_dma_init_fq(prev_dom);
+		if (!ret)
+			prev_dom->type = IOMMU_DOMAIN_DMA_FQ;
+		goto out;
+	}
+
 	/* Sets group->default_domain to the newly allocated domain */
 	ret = iommu_group_alloc_default_domain(dev->bus, group, type);
 	if (ret)
@@ -3169,9 +3256,9 @@ out:
 }
 
 /*
- * Changing the default domain through sysfs requires the users to ubind the
- * drivers from the devices in the iommu group. Return failure if this doesn't
- * meet.
+ * Changing the default domain through sysfs requires the users to unbind the
+ * drivers from the devices in the iommu group, except for a DMA -> DMA-FQ
+ * transition. Return failure if this isn't met.
  *
  * We need to consider the race between this and the device release path.
  * device_lock(dev) is used here to guarantee that the device release path
@@ -3194,6 +3281,8 @@ static ssize_t iommu_group_store_type(struct iommu_group *group,
 		req_type = IOMMU_DOMAIN_IDENTITY;
 	else if (sysfs_streq(buf, "DMA"))
 		req_type = IOMMU_DOMAIN_DMA;
+	else if (sysfs_streq(buf, "DMA-FQ"))
+		req_type = IOMMU_DOMAIN_DMA_FQ;
 	else if (sysfs_streq(buf, "auto"))
 		req_type = 0;
 	else
@@ -3245,7 +3334,8 @@ static ssize_t iommu_group_store_type(struct iommu_group *group,
 
 	/* Check if the device in the group still has a driver bound to it */
 	device_lock(dev);
-	if (device_is_bound(dev)) {
+	if (device_is_bound(dev) && !(req_type == IOMMU_DOMAIN_DMA_FQ &&
+	    group->default_domain->type == IOMMU_DOMAIN_DMA)) {
 		pr_err_ratelimited("Device is still bound to driver\n");
 		ret = -EBUSY;
 		goto out;
diff --git a/drivers/iommu/iova.c b/drivers/iommu/iova.c
index b6cf5f16123b..9e8bc802ac05 100644
--- a/drivers/iommu/iova.c
+++ b/drivers/iommu/iova.c
@@ -121,8 +121,6 @@ int init_iova_flush_queue(struct iova_domain *iovad,
 		spin_lock_init(&fq->lock);
 	}
 
-	smp_wmb();
-
 	iovad->fq = queue;
 
 	timer_setup(&iovad->fq_timer, fq_flush_timeout, 0);
@@ -521,6 +519,7 @@ retry:
 
 	return new_iova->pfn_lo;
 }
+EXPORT_SYMBOL_GPL(alloc_iova_fast);
 
 /**
  * free_iova_fast - free iova pfn range into rcache
@@ -538,6 +537,7 @@ free_iova_fast(struct iova_domain *iovad, unsigned long pfn, unsigned long size)
 
 	free_iova(iovad, pfn);
 }
+EXPORT_SYMBOL_GPL(free_iova_fast);
 
 #define fq_ring_for_each(i, fq) \
 	for ((i) = (fq)->head; (i) != (fq)->tail; (i) = ((i) + 1) % IOVA_FQ_SIZE)
@@ -633,10 +633,20 @@ void queue_iova(struct iova_domain *iovad,
 		unsigned long pfn, unsigned long pages,
 		unsigned long data)
 {
-	struct iova_fq *fq = raw_cpu_ptr(iovad->fq);
+	struct iova_fq *fq;
 	unsigned long flags;
 	unsigned idx;
 
+	/*
+	 * Order against the IOMMU driver's pagetable update from unmapping
+	 * @pte, to guarantee that iova_domain_flush() observes that if called
+	 * from a different CPU before we release the lock below. Full barrier
+	 * so it also pairs with iommu_dma_init_fq() to avoid seeing partially
+	 * written fq state here.
+	 */
+	smp_mb();
+
+	fq = raw_cpu_ptr(iovad->fq);
 	spin_lock_irqsave(&fq->lock, flags);
 
 	/*
diff --git a/drivers/iommu/ipmmu-vmsa.c b/drivers/iommu/ipmmu-vmsa.c
index 51ea6f00db2f..d38ff29a76e8 100644
--- a/drivers/iommu/ipmmu-vmsa.c
+++ b/drivers/iommu/ipmmu-vmsa.c
@@ -8,7 +8,6 @@
 
 #include <linux/bitmap.h>
 #include <linux/delay.h>
-#include <linux/dma-iommu.h>
 #include <linux/dma-mapping.h>
 #include <linux/err.h>
 #include <linux/export.h>
@@ -564,10 +563,13 @@ static irqreturn_t ipmmu_irq(int irq, void *dev)
  * IOMMU Operations
  */
 
-static struct iommu_domain *__ipmmu_domain_alloc(unsigned type)
+static struct iommu_domain *ipmmu_domain_alloc(unsigned type)
 {
 	struct ipmmu_vmsa_domain *domain;
 
+	if (type != IOMMU_DOMAIN_UNMANAGED && type != IOMMU_DOMAIN_DMA)
+		return NULL;
+
 	domain = kzalloc(sizeof(*domain), GFP_KERNEL);
 	if (!domain)
 		return NULL;
@@ -577,27 +579,6 @@ static struct iommu_domain *__ipmmu_domain_alloc(unsigned type)
 	return &domain->io_domain;
 }
 
-static struct iommu_domain *ipmmu_domain_alloc(unsigned type)
-{
-	struct iommu_domain *io_domain = NULL;
-
-	switch (type) {
-	case IOMMU_DOMAIN_UNMANAGED:
-		io_domain = __ipmmu_domain_alloc(type);
-		break;
-
-	case IOMMU_DOMAIN_DMA:
-		io_domain = __ipmmu_domain_alloc(type);
-		if (io_domain && iommu_get_dma_cookie(io_domain)) {
-			kfree(io_domain);
-			io_domain = NULL;
-		}
-		break;
-	}
-
-	return io_domain;
-}
-
 static void ipmmu_domain_free(struct iommu_domain *io_domain)
 {
 	struct ipmmu_vmsa_domain *domain = to_vmsa_domain(io_domain);
@@ -606,7 +587,6 @@ static void ipmmu_domain_free(struct iommu_domain *io_domain)
 	 * Free the domain resources. We assume that all devices have already
 	 * been detached.
 	 */
-	iommu_put_dma_cookie(io_domain);
 	ipmmu_domain_destroy_context(domain);
 	free_io_pgtable_ops(domain->iop);
 	kfree(domain);
diff --git a/drivers/iommu/mtk_iommu.c b/drivers/iommu/mtk_iommu.c
index 6f7c69688ce2..d837adfd1da5 100644
--- a/drivers/iommu/mtk_iommu.c
+++ b/drivers/iommu/mtk_iommu.c
@@ -9,7 +9,6 @@
 #include <linux/component.h>
 #include <linux/device.h>
 #include <linux/dma-direct.h>
-#include <linux/dma-iommu.h>
 #include <linux/err.h>
 #include <linux/interrupt.h>
 #include <linux/io.h>
@@ -441,17 +440,11 @@ static struct iommu_domain *mtk_iommu_domain_alloc(unsigned type)
 	if (!dom)
 		return NULL;
 
-	if (iommu_get_dma_cookie(&dom->domain)) {
-		kfree(dom);
-		return NULL;
-	}
-
 	return &dom->domain;
 }
 
 static void mtk_iommu_domain_free(struct iommu_domain *domain)
 {
-	iommu_put_dma_cookie(domain);
 	kfree(to_mtk_domain(domain));
 }
 
@@ -520,12 +513,8 @@ static size_t mtk_iommu_unmap(struct iommu_domain *domain,
 			      struct iommu_iotlb_gather *gather)
 {
 	struct mtk_iommu_domain *dom = to_mtk_domain(domain);
-	unsigned long end = iova + size - 1;
 
-	if (gather->start > iova)
-		gather->start = iova;
-	if (gather->end < end)
-		gather->end = end;
+	iommu_iotlb_gather_add_range(gather, iova, size);
 	return dom->iop->unmap(dom->iop, iova, size, gather);
 }
 
diff --git a/drivers/iommu/mtk_iommu_v1.c b/drivers/iommu/mtk_iommu_v1.c
index 778e66f5f1aa..be22fcf988ce 100644
--- a/drivers/iommu/mtk_iommu_v1.c
+++ b/drivers/iommu/mtk_iommu_v1.c
@@ -13,7 +13,6 @@
 #include <linux/component.h>
 #include <linux/device.h>
 #include <linux/dma-mapping.h>
-#include <linux/dma-iommu.h>
 #include <linux/err.h>
 #include <linux/interrupt.h>
 #include <linux/io.h>
diff --git a/drivers/iommu/rockchip-iommu.c b/drivers/iommu/rockchip-iommu.c
index 9febfb7f3025..5cb260820eda 100644
--- a/drivers/iommu/rockchip-iommu.c
+++ b/drivers/iommu/rockchip-iommu.c
@@ -10,7 +10,6 @@
 #include <linux/compiler.h>
 #include <linux/delay.h>
 #include <linux/device.h>
-#include <linux/dma-iommu.h>
 #include <linux/dma-mapping.h>
 #include <linux/errno.h>
 #include <linux/interrupt.h>
@@ -1074,10 +1073,6 @@ static struct iommu_domain *rk_iommu_domain_alloc(unsigned type)
 	if (!rk_domain)
 		return NULL;
 
-	if (type == IOMMU_DOMAIN_DMA &&
-	    iommu_get_dma_cookie(&rk_domain->domain))
-		goto err_free_domain;
-
 	/*
 	 * rk32xx iommus use a 2 level pagetable.
 	 * Each level1 (dt) and level2 (pt) table has 1024 4-byte entries.
@@ -1085,7 +1080,7 @@ static struct iommu_domain *rk_iommu_domain_alloc(unsigned type)
 	 */
 	rk_domain->dt = (u32 *)get_zeroed_page(GFP_KERNEL | GFP_DMA32);
 	if (!rk_domain->dt)
-		goto err_put_cookie;
+		goto err_free_domain;
 
 	rk_domain->dt_dma = dma_map_single(dma_dev, rk_domain->dt,
 					   SPAGE_SIZE, DMA_TO_DEVICE);
@@ -1106,9 +1101,6 @@ static struct iommu_domain *rk_iommu_domain_alloc(unsigned type)
 
 err_free_dt:
 	free_page((unsigned long)rk_domain->dt);
-err_put_cookie:
-	if (type == IOMMU_DOMAIN_DMA)
-		iommu_put_dma_cookie(&rk_domain->domain);
 err_free_domain:
 	kfree(rk_domain);
 
@@ -1137,8 +1129,6 @@ static void rk_iommu_domain_free(struct iommu_domain *domain)
 			 SPAGE_SIZE, DMA_TO_DEVICE);
 	free_page((unsigned long)rk_domain->dt);
 
-	if (domain->type == IOMMU_DOMAIN_DMA)
-		iommu_put_dma_cookie(&rk_domain->domain);
 	kfree(rk_domain);
 }
 
diff --git a/drivers/iommu/sprd-iommu.c b/drivers/iommu/sprd-iommu.c
index 73dfd9946312..27ac818b0354 100644
--- a/drivers/iommu/sprd-iommu.c
+++ b/drivers/iommu/sprd-iommu.c
@@ -8,7 +8,6 @@
 
 #include <linux/clk.h>
 #include <linux/device.h>
-#include <linux/dma-iommu.h>
 #include <linux/dma-mapping.h>
 #include <linux/errno.h>
 #include <linux/iommu.h>
@@ -144,11 +143,6 @@ static struct iommu_domain *sprd_iommu_domain_alloc(unsigned int domain_type)
 	if (!dom)
 		return NULL;
 
-	if (iommu_get_dma_cookie(&dom->domain)) {
-		kfree(dom);
-		return NULL;
-	}
-
 	spin_lock_init(&dom->pgtlock);
 
 	dom->domain.geometry.aperture_start = 0;
@@ -161,7 +155,6 @@ static void sprd_iommu_domain_free(struct iommu_domain *domain)
 {
 	struct sprd_iommu_domain *dom = to_sprd_domain(domain);
 
-	iommu_put_dma_cookie(domain);
 	kfree(dom);
 }
 
diff --git a/drivers/iommu/sun50i-iommu.c b/drivers/iommu/sun50i-iommu.c
index 181bb1c3437c..92997021e188 100644
--- a/drivers/iommu/sun50i-iommu.c
+++ b/drivers/iommu/sun50i-iommu.c
@@ -7,7 +7,6 @@
 #include <linux/clk.h>
 #include <linux/device.h>
 #include <linux/dma-direction.h>
-#include <linux/dma-iommu.h>
 #include <linux/dma-mapping.h>
 #include <linux/err.h>
 #include <linux/errno.h>
@@ -610,14 +609,10 @@ static struct iommu_domain *sun50i_iommu_domain_alloc(unsigned type)
 	if (!sun50i_domain)
 		return NULL;
 
-	if (type == IOMMU_DOMAIN_DMA &&
-	    iommu_get_dma_cookie(&sun50i_domain->domain))
-		goto err_free_domain;
-
 	sun50i_domain->dt = (u32 *)__get_free_pages(GFP_KERNEL | __GFP_ZERO,
 						    get_order(DT_SIZE));
 	if (!sun50i_domain->dt)
-		goto err_put_cookie;
+		goto err_free_domain;
 
 	refcount_set(&sun50i_domain->refcnt, 1);
 
@@ -627,10 +622,6 @@ static struct iommu_domain *sun50i_iommu_domain_alloc(unsigned type)
 
 	return &sun50i_domain->domain;
 
-err_put_cookie:
-	if (type == IOMMU_DOMAIN_DMA)
-		iommu_put_dma_cookie(&sun50i_domain->domain);
-
 err_free_domain:
 	kfree(sun50i_domain);
 
@@ -644,8 +635,6 @@ static void sun50i_iommu_domain_free(struct iommu_domain *domain)
 	free_pages((unsigned long)sun50i_domain->dt, get_order(DT_SIZE));
 	sun50i_domain->dt = NULL;
 
-	iommu_put_dma_cookie(domain);
-
 	kfree(sun50i_domain);
 }
 
diff --git a/drivers/iommu/virtio-iommu.c b/drivers/iommu/virtio-iommu.c
index 6abdcab7273b..80930ce04a16 100644
--- a/drivers/iommu/virtio-iommu.c
+++ b/drivers/iommu/virtio-iommu.c
@@ -598,12 +598,6 @@ static struct iommu_domain *viommu_domain_alloc(unsigned type)
 	spin_lock_init(&vdomain->mappings_lock);
 	vdomain->mappings = RB_ROOT_CACHED;
 
-	if (type == IOMMU_DOMAIN_DMA &&
-	    iommu_get_dma_cookie(&vdomain->domain)) {
-		kfree(vdomain);
-		return NULL;
-	}
-
 	return &vdomain->domain;
 }
 
@@ -643,8 +637,6 @@ static void viommu_domain_free(struct iommu_domain *domain)
 {
 	struct viommu_domain *vdomain = to_viommu_domain(domain);
 
-	iommu_put_dma_cookie(domain);
-
 	/* Free all remaining mappings (size 2^64) */
 	viommu_del_mappings(vdomain, 0, 0);
 
diff --git a/drivers/isdn/capi/capiutil.c b/drivers/isdn/capi/capiutil.c
index f26bf3c66d7e..d7ae42edc4a8 100644
--- a/drivers/isdn/capi/capiutil.c
+++ b/drivers/isdn/capi/capiutil.c
@@ -379,7 +379,7 @@ static char *pnames[] =
 	/*2f */ "Useruserdata"
 };
 
-#include <stdarg.h>
+#include <linux/stdarg.h>
 
 /*-------------------------------------------------------*/
 static _cdebbuf *bufprint(_cdebbuf *cdb, char *fmt, ...)
diff --git a/drivers/macintosh/macio-adb.c b/drivers/macintosh/macio-adb.c
index d4759db002c6..dc634c2932fd 100644
--- a/drivers/macintosh/macio-adb.c
+++ b/drivers/macintosh/macio-adb.c
@@ -2,7 +2,6 @@
 /*
  * Driver for the ADB controller in the Mac I/O (Hydra) chip.
  */
-#include <stdarg.h>
 #include <linux/types.h>
 #include <linux/errno.h>
 #include <linux/kernel.h>
diff --git a/drivers/macintosh/via-cuda.c b/drivers/macintosh/via-cuda.c
index 3581abfb0c6a..cd267392289c 100644
--- a/drivers/macintosh/via-cuda.c
+++ b/drivers/macintosh/via-cuda.c
@@ -9,7 +9,7 @@
  *
  * Copyright (C) 1996 Paul Mackerras.
  */
-#include <stdarg.h>
+#include <linux/stdarg.h>
 #include <linux/types.h>
 #include <linux/errno.h>
 #include <linux/kernel.h>
diff --git a/drivers/macintosh/via-macii.c b/drivers/macintosh/via-macii.c
index 060e03f2264b..db9270da5b8e 100644
--- a/drivers/macintosh/via-macii.c
+++ b/drivers/macintosh/via-macii.c
@@ -23,8 +23,6 @@
  * Apple's "ADB Analyzer" bus sniffer is invaluable:
  *   ftp://ftp.apple.com/developer/Tool_Chest/Devices_-_Hardware/Apple_Desktop_Bus/
  */
-
-#include <stdarg.h>
 #include <linux/types.h>
 #include <linux/errno.h>
 #include <linux/kernel.h>
diff --git a/drivers/macintosh/via-pmu.c b/drivers/macintosh/via-pmu.c
index 4bdd4c45e7a7..4b98bc26a94b 100644
--- a/drivers/macintosh/via-pmu.c
+++ b/drivers/macintosh/via-pmu.c
@@ -18,7 +18,7 @@
  *    a sleep or a freq. switch
  *
  */
-#include <stdarg.h>
+#include <linux/stdarg.h>
 #include <linux/mutex.h>
 #include <linux/types.h>
 #include <linux/errno.h>
diff --git a/drivers/mailbox/Kconfig b/drivers/mailbox/Kconfig
index b4b780ea2ac8..c9fc06c7e685 100644
--- a/drivers/mailbox/Kconfig
+++ b/drivers/mailbox/Kconfig
@@ -264,7 +264,7 @@ config SPRD_MBOX
 	  you want to build the Spreatrum mailbox controller driver.
 
 config QCOM_IPCC
-	bool "Qualcomm Technologies, Inc. IPCC driver"
+	tristate "Qualcomm Technologies, Inc. IPCC driver"
 	depends on ARCH_QCOM || COMPILE_TEST
 	help
 	  Qualcomm Technologies, Inc. Inter-Processor Communication Controller
diff --git a/drivers/mailbox/mailbox-sti.c b/drivers/mailbox/mailbox-sti.c
index 2baf69a0b81c..ab3a6ab762d3 100644
--- a/drivers/mailbox/mailbox-sti.c
+++ b/drivers/mailbox/mailbox-sti.c
@@ -36,12 +36,7 @@
 #define MBOX_BASE(mdev, inst)   ((mdev)->base + ((inst) * 4))
 
 /**
- * STi Mailbox device data
- *
- * An IP Mailbox is currently composed of 4 instances
- * Each instance is currently composed of 32 channels
- * This means that we have 128 channels per Mailbox
- * A channel an be used for TX or RX
+ * struct sti_mbox_device - STi Mailbox device data
  *
  * @dev:	Device to which it is attached
  * @mbox:	Representation of a communication channel controller
@@ -49,6 +44,11 @@
  * @name:	Name of the mailbox
  * @enabled:	Local copy of enabled channels
  * @lock:	Mutex protecting enabled status
+ *
+ * An IP Mailbox is currently composed of 4 instances
+ * Each instance is currently composed of 32 channels
+ * This means that we have 128 channels per Mailbox
+ * A channel an be used for TX or RX
  */
 struct sti_mbox_device {
 	struct device		*dev;
@@ -60,7 +60,7 @@ struct sti_mbox_device {
 };
 
 /**
- * STi Mailbox platform specific configuration
+ * struct sti_mbox_pdata - STi Mailbox platform specific configuration
  *
  * @num_inst:	Maximum number of instances in one HW Mailbox
  * @num_chan:	Maximum number of channel per instance
@@ -71,7 +71,7 @@ struct sti_mbox_pdata {
 };
 
 /**
- * STi Mailbox allocated channel information
+ * struct sti_channel - STi Mailbox allocated channel information
  *
  * @mdev:	Pointer to parent Mailbox device
  * @instance:	Instance number channel resides in
diff --git a/drivers/mailbox/mtk-cmdq-mailbox.c b/drivers/mailbox/mtk-cmdq-mailbox.c
index 67a42b514429..64175a893312 100644
--- a/drivers/mailbox/mtk-cmdq-mailbox.c
+++ b/drivers/mailbox/mtk-cmdq-mailbox.c
@@ -19,6 +19,7 @@
 
 #define CMDQ_OP_CODE_MASK		(0xff << CMDQ_OP_CODE_SHIFT)
 #define CMDQ_NUM_CMD(t)			(t->cmd_buf_size / CMDQ_INST_SIZE)
+#define CMDQ_GCE_NUM_MAX		(2)
 
 #define CMDQ_CURR_IRQ_STATUS		0x10
 #define CMDQ_SYNC_TOKEN_UPDATE		0x68
@@ -36,6 +37,8 @@
 #define CMDQ_THR_WAIT_TOKEN		0x30
 #define CMDQ_THR_PRIORITY		0x40
 
+#define GCE_GCTL_VALUE			0x48
+
 #define CMDQ_THR_ACTIVE_SLOT_CYCLES	0x3200
 #define CMDQ_THR_ENABLED		0x1
 #define CMDQ_THR_DISABLED		0x0
@@ -73,14 +76,18 @@ struct cmdq {
 	u32			thread_nr;
 	u32			irq_mask;
 	struct cmdq_thread	*thread;
-	struct clk		*clock;
+	struct clk_bulk_data	clocks[CMDQ_GCE_NUM_MAX];
 	bool			suspended;
 	u8			shift_pa;
+	bool			control_by_sw;
+	u32			gce_num;
 };
 
 struct gce_plat {
 	u32 thread_nr;
 	u8 shift;
+	bool control_by_sw;
+	u32 gce_num;
 };
 
 u8 cmdq_get_shift_pa(struct mbox_chan *chan)
@@ -120,11 +127,13 @@ static void cmdq_init(struct cmdq *cmdq)
 {
 	int i;
 
-	WARN_ON(clk_enable(cmdq->clock) < 0);
+	WARN_ON(clk_bulk_enable(cmdq->gce_num, cmdq->clocks));
+	if (cmdq->control_by_sw)
+		writel(0x7, cmdq->base + GCE_GCTL_VALUE);
 	writel(CMDQ_THR_ACTIVE_SLOT_CYCLES, cmdq->base + CMDQ_THR_SLOT_CYCLES);
 	for (i = 0; i <= CMDQ_MAX_EVENT; i++)
 		writel(i, cmdq->base + CMDQ_SYNC_TOKEN_UPDATE);
-	clk_disable(cmdq->clock);
+	clk_bulk_disable(cmdq->gce_num, cmdq->clocks);
 }
 
 static int cmdq_thread_reset(struct cmdq *cmdq, struct cmdq_thread *thread)
@@ -168,7 +177,8 @@ static void cmdq_task_insert_into_thread(struct cmdq_task *task)
 	dma_sync_single_for_cpu(dev, prev_task->pa_base,
 				prev_task->pkt->cmd_buf_size, DMA_TO_DEVICE);
 	prev_task_base[CMDQ_NUM_CMD(prev_task->pkt) - 1] =
-		(u64)CMDQ_JUMP_BY_PA << 32 | task->pa_base;
+		(u64)CMDQ_JUMP_BY_PA << 32 |
+		(task->pa_base >> task->cmdq->shift_pa);
 	dma_sync_single_for_device(dev, prev_task->pa_base,
 				   prev_task->pkt->cmd_buf_size, DMA_TO_DEVICE);
 
@@ -262,7 +272,7 @@ static void cmdq_thread_irq_handler(struct cmdq *cmdq,
 
 	if (list_empty(&thread->task_busy_list)) {
 		cmdq_thread_disable(cmdq, thread);
-		clk_disable(cmdq->clock);
+		clk_bulk_disable(cmdq->gce_num, cmdq->clocks);
 	}
 }
 
@@ -307,7 +317,7 @@ static int cmdq_suspend(struct device *dev)
 	if (task_running)
 		dev_warn(dev, "exist running task(s) in suspend\n");
 
-	clk_unprepare(cmdq->clock);
+	clk_bulk_unprepare(cmdq->gce_num, cmdq->clocks);
 
 	return 0;
 }
@@ -316,7 +326,7 @@ static int cmdq_resume(struct device *dev)
 {
 	struct cmdq *cmdq = dev_get_drvdata(dev);
 
-	WARN_ON(clk_prepare(cmdq->clock) < 0);
+	WARN_ON(clk_bulk_prepare(cmdq->gce_num, cmdq->clocks));
 	cmdq->suspended = false;
 	return 0;
 }
@@ -325,8 +335,7 @@ static int cmdq_remove(struct platform_device *pdev)
 {
 	struct cmdq *cmdq = platform_get_drvdata(pdev);
 
-	clk_unprepare(cmdq->clock);
-
+	clk_bulk_unprepare(cmdq->gce_num, cmdq->clocks);
 	return 0;
 }
 
@@ -352,7 +361,8 @@ static int cmdq_mbox_send_data(struct mbox_chan *chan, void *data)
 	task->pkt = pkt;
 
 	if (list_empty(&thread->task_busy_list)) {
-		WARN_ON(clk_enable(cmdq->clock) < 0);
+		WARN_ON(clk_bulk_enable(cmdq->gce_num, cmdq->clocks));
+
 		/*
 		 * The thread reset will clear thread related register to 0,
 		 * including pc, end, priority, irq, suspend and enable. Thus
@@ -424,7 +434,8 @@ static void cmdq_mbox_shutdown(struct mbox_chan *chan)
 	}
 
 	cmdq_thread_disable(cmdq, thread);
-	clk_disable(cmdq->clock);
+	clk_bulk_disable(cmdq->gce_num, cmdq->clocks);
+
 done:
 	/*
 	 * The thread->task_busy_list empty means thread already disable. The
@@ -469,7 +480,7 @@ static int cmdq_mbox_flush(struct mbox_chan *chan, unsigned long timeout)
 
 	cmdq_thread_resume(thread);
 	cmdq_thread_disable(cmdq, thread);
-	clk_disable(cmdq->clock);
+	clk_bulk_disable(cmdq->gce_num, cmdq->clocks);
 
 out:
 	spin_unlock_irqrestore(&thread->chan->lock, flags);
@@ -518,6 +529,10 @@ static int cmdq_probe(struct platform_device *pdev)
 	struct cmdq *cmdq;
 	int err, i;
 	struct gce_plat *plat_data;
+	struct device_node *phandle = dev->of_node;
+	struct device_node *node;
+	int alias_id = 0;
+	char clk_name[4] = "gce";
 
 	cmdq = devm_kzalloc(dev, sizeof(*cmdq), GFP_KERNEL);
 	if (!cmdq)
@@ -540,6 +555,8 @@ static int cmdq_probe(struct platform_device *pdev)
 
 	cmdq->thread_nr = plat_data->thread_nr;
 	cmdq->shift_pa = plat_data->shift;
+	cmdq->control_by_sw = plat_data->control_by_sw;
+	cmdq->gce_num = plat_data->gce_num;
 	cmdq->irq_mask = GENMASK(cmdq->thread_nr - 1, 0);
 	err = devm_request_irq(dev, cmdq->irq, cmdq_irq_handler, IRQF_SHARED,
 			       "mtk_cmdq", cmdq);
@@ -551,10 +568,28 @@ static int cmdq_probe(struct platform_device *pdev)
 	dev_dbg(dev, "cmdq device: addr:0x%p, va:0x%p, irq:%d\n",
 		dev, cmdq->base, cmdq->irq);
 
-	cmdq->clock = devm_clk_get(dev, "gce");
-	if (IS_ERR(cmdq->clock)) {
-		dev_err(dev, "failed to get gce clk\n");
-		return PTR_ERR(cmdq->clock);
+	if (cmdq->gce_num > 1) {
+		for_each_child_of_node(phandle->parent, node) {
+			char clk_id[8];
+
+			alias_id = of_alias_get_id(node, clk_name);
+			if (alias_id < cmdq->gce_num) {
+				snprintf(clk_id, sizeof(clk_id), "%s%d", clk_name, alias_id);
+				cmdq->clocks[alias_id].id = clk_id;
+				cmdq->clocks[alias_id].clk = of_clk_get(node, 0);
+				if (IS_ERR(cmdq->clocks[alias_id].clk)) {
+					dev_err(dev, "failed to get gce clk: %d\n", alias_id);
+					return PTR_ERR(cmdq->clocks[alias_id].clk);
+				}
+			}
+		}
+	} else {
+		cmdq->clocks[alias_id].id = clk_name;
+		cmdq->clocks[alias_id].clk = devm_clk_get(&pdev->dev, clk_name);
+		if (IS_ERR(cmdq->clocks[alias_id].clk)) {
+			dev_err(dev, "failed to get gce clk\n");
+			return PTR_ERR(cmdq->clocks[alias_id].clk);
+		}
 	}
 
 	cmdq->mbox.dev = dev;
@@ -590,7 +625,8 @@ static int cmdq_probe(struct platform_device *pdev)
 	}
 
 	platform_set_drvdata(pdev, cmdq);
-	WARN_ON(clk_prepare(cmdq->clock) < 0);
+
+	WARN_ON(clk_bulk_prepare(cmdq->gce_num, cmdq->clocks));
 
 	cmdq_init(cmdq);
 
@@ -602,14 +638,47 @@ static const struct dev_pm_ops cmdq_pm_ops = {
 	.resume = cmdq_resume,
 };
 
-static const struct gce_plat gce_plat_v2 = {.thread_nr = 16};
-static const struct gce_plat gce_plat_v3 = {.thread_nr = 24};
-static const struct gce_plat gce_plat_v4 = {.thread_nr = 24, .shift = 3};
+static const struct gce_plat gce_plat_v2 = {
+	.thread_nr = 16,
+	.shift = 0,
+	.control_by_sw = false,
+	.gce_num = 1
+};
+
+static const struct gce_plat gce_plat_v3 = {
+	.thread_nr = 24,
+	.shift = 0,
+	.control_by_sw = false,
+	.gce_num = 1
+};
+
+static const struct gce_plat gce_plat_v4 = {
+	.thread_nr = 24,
+	.shift = 3,
+	.control_by_sw = false,
+	.gce_num = 1
+};
+
+static const struct gce_plat gce_plat_v5 = {
+	.thread_nr = 24,
+	.shift = 3,
+	.control_by_sw = true,
+	.gce_num = 2
+};
+
+static const struct gce_plat gce_plat_v6 = {
+	.thread_nr = 24,
+	.shift = 3,
+	.control_by_sw = false,
+	.gce_num = 2
+};
 
 static const struct of_device_id cmdq_of_ids[] = {
 	{.compatible = "mediatek,mt8173-gce", .data = (void *)&gce_plat_v2},
 	{.compatible = "mediatek,mt8183-gce", .data = (void *)&gce_plat_v3},
 	{.compatible = "mediatek,mt6779-gce", .data = (void *)&gce_plat_v4},
+	{.compatible = "mediatek,mt8192-gce", .data = (void *)&gce_plat_v5},
+	{.compatible = "mediatek,mt8195-gce", .data = (void *)&gce_plat_v6},
 	{}
 };
 
diff --git a/drivers/mailbox/qcom-apcs-ipc-mailbox.c b/drivers/mailbox/qcom-apcs-ipc-mailbox.c
index 03bdc96dc457..82ccfaf14b24 100644
--- a/drivers/mailbox/qcom-apcs-ipc-mailbox.c
+++ b/drivers/mailbox/qcom-apcs-ipc-mailbox.c
@@ -163,6 +163,7 @@ static const struct of_device_id qcom_apcs_ipc_of_match[] = {
 	{ .compatible = "qcom,ipq8074-apcs-apps-global", .data = &ipq8074_apcs_data },
 	{ .compatible = "qcom,msm8916-apcs-kpss-global", .data = &msm8916_apcs_data },
 	{ .compatible = "qcom,msm8939-apcs-kpss-global", .data = &msm8916_apcs_data },
+	{ .compatible = "qcom,msm8953-apcs-kpss-global", .data = &msm8994_apcs_data },
 	{ .compatible = "qcom,msm8994-apcs-kpss-global", .data = &msm8994_apcs_data },
 	{ .compatible = "qcom,msm8996-apcs-hmss-global", .data = &msm8996_apcs_data },
 	{ .compatible = "qcom,msm8998-apcs-hmss-global", .data = &msm8998_apcs_data },
@@ -173,6 +174,7 @@ static const struct of_device_id qcom_apcs_ipc_of_match[] = {
 	{ .compatible = "qcom,sdm845-apss-shared", .data = &apps_shared_apcs_data },
 	{ .compatible = "qcom,sm6125-apcs-hmss-global", .data = &sm6125_apcs_data },
 	{ .compatible = "qcom,sm8150-apss-shared", .data = &apps_shared_apcs_data },
+	{ .compatible = "qcom,sm6115-apcs-hmss-global", .data = &sdm660_apcs_data },
 	{ .compatible = "qcom,sdx55-apcs-gcc", .data = &sdx55_apcs_data },
 	{}
 };
diff --git a/drivers/mailbox/qcom-ipcc.c b/drivers/mailbox/qcom-ipcc.c
index 584700cd1585..f1d4f4679b17 100644
--- a/drivers/mailbox/qcom-ipcc.c
+++ b/drivers/mailbox/qcom-ipcc.c
@@ -277,6 +277,7 @@ static struct platform_driver qcom_ipcc_driver = {
 	.driver = {
 		.name = "qcom-ipcc",
 		.of_match_table = qcom_ipcc_of_match,
+		.suppress_bind_attrs = true,
 	},
 };
 
diff --git a/drivers/md/dm-ima.c b/drivers/md/dm-ima.c
index 3fd69ab12a8e..2c5edfbd7711 100644
--- a/drivers/md/dm-ima.c
+++ b/drivers/md/dm-ima.c
@@ -136,7 +136,8 @@ static void dm_ima_measure_data(const char *event_name, const void *buf, size_t
 	if (noio)
 		noio_flag = memalloc_noio_save();
 
-	ima_measure_critical_data(DM_NAME, event_name, buf, buf_len, false);
+	ima_measure_critical_data(DM_NAME, event_name, buf, buf_len,
+				  false, NULL, 0);
 
 	if (noio)
 		memalloc_noio_restore(noio_flag);
diff --git a/drivers/md/dm-table.c b/drivers/md/dm-table.c
index b03eabc1ed7c..2111daaacaba 100644
--- a/drivers/md/dm-table.c
+++ b/drivers/md/dm-table.c
@@ -809,14 +809,9 @@ EXPORT_SYMBOL_GPL(dm_table_set_type);
 int device_not_dax_capable(struct dm_target *ti, struct dm_dev *dev,
 			sector_t start, sector_t len, void *data)
 {
-	int blocksize = *(int *) data, id;
-	bool rc;
+	int blocksize = *(int *) data;
 
-	id = dax_read_lock();
-	rc = !dax_supported(dev->dax_dev, dev->bdev, blocksize, start, len);
-	dax_read_unlock(id);
-
-	return rc;
+	return !dax_supported(dev->dax_dev, dev->bdev, blocksize, start, len);
 }
 
 /* Check devices support synchronous DAX */
diff --git a/drivers/md/dm.c b/drivers/md/dm.c
index 84e9145b1714..a011d09cb0fa 100644
--- a/drivers/md/dm.c
+++ b/drivers/md/dm.c
@@ -654,7 +654,7 @@ static int open_table_device(struct table_device *td, dev_t dev,
 	}
 
 	td->dm_dev.bdev = bdev;
-	td->dm_dev.dax_dev = dax_get_by_host(bdev->bd_disk->disk_name);
+	td->dm_dev.dax_dev = fs_dax_get_by_bdev(bdev);
 	return 0;
 }
 
diff --git a/drivers/media/i2c/ov02a10.c b/drivers/media/i2c/ov02a10.c
index a3ce5500d355..0f08c05333ea 100644
--- a/drivers/media/i2c/ov02a10.c
+++ b/drivers/media/i2c/ov02a10.c
@@ -9,6 +9,7 @@
 #include <linux/module.h>
 #include <linux/pm_runtime.h>
 #include <linux/regulator/consumer.h>
+#include <linux/units.h>
 #include <media/media-entity.h>
 #include <media/v4l2-async.h>
 #include <media/v4l2-ctrls.h>
@@ -64,7 +65,6 @@
 /* Test pattern control */
 #define OV02A10_REG_TEST_PATTERN			0xb6
 
-#define HZ_PER_MHZ					1000000L
 #define OV02A10_LINK_FREQ_390MHZ			(390 * HZ_PER_MHZ)
 #define OV02A10_ECLK_FREQ				(24 * HZ_PER_MHZ)
 
diff --git a/drivers/mfd/Kconfig b/drivers/mfd/Kconfig
index 01bb42f0ca0b..ca0edab91aeb 100644
--- a/drivers/mfd/Kconfig
+++ b/drivers/mfd/Kconfig
@@ -2199,5 +2199,33 @@ config MFD_INTEL_M10_BMC
 	  additional drivers must be enabled in order to use the functionality
 	  of the device.
 
+config MFD_RSMU_I2C
+	tristate "Renesas Synchronization Management Unit with I2C"
+	depends on I2C && OF
+	select MFD_CORE
+	select REGMAP_I2C
+	help
+	  Support for the Renesas Synchronization Management Unit, such as
+	  Clockmatrix and 82P33XXX series. This option supports I2C as
+	  the control interface.
+
+	  This driver provides common support for accessing the device.
+	  Additional drivers must be enabled in order to use the functionality
+	  of the device.
+
+config MFD_RSMU_SPI
+	tristate "Renesas Synchronization Management Unit with SPI"
+	depends on SPI && OF
+	select MFD_CORE
+	select REGMAP_SPI
+	help
+	  Support for the Renesas Synchronization Management Unit, such as
+	  Clockmatrix and 82P33XXX series. This option supports SPI as
+	  the control interface.
+
+	  This driver provides common support for accessing the device.
+	  Additional drivers must be enabled in order to use the functionality
+	  of the device.
+
 endmenu
 endif
diff --git a/drivers/mfd/Makefile b/drivers/mfd/Makefile
index 570b9ffb34d0..2ba6646e874c 100644
--- a/drivers/mfd/Makefile
+++ b/drivers/mfd/Makefile
@@ -273,3 +273,8 @@ obj-$(CONFIG_MFD_INTEL_M10_BMC)   += intel-m10-bmc.o
 
 obj-$(CONFIG_MFD_ATC260X)	+= atc260x-core.o
 obj-$(CONFIG_MFD_ATC260X_I2C)	+= atc260x-i2c.o
+
+rsmu-i2c-objs			:= rsmu_core.o rsmu_i2c.o
+rsmu-spi-objs			:= rsmu_core.o rsmu_spi.o
+obj-$(CONFIG_MFD_RSMU_I2C)	+= rsmu-i2c.o
+obj-$(CONFIG_MFD_RSMU_SPI)	+= rsmu-spi.o
diff --git a/drivers/mfd/ab8500-core.c b/drivers/mfd/ab8500-core.c
index 30489670ea52..cca0aac26148 100644
--- a/drivers/mfd/ab8500-core.c
+++ b/drivers/mfd/ab8500-core.c
@@ -485,7 +485,7 @@ static int ab8500_handle_hierarchical_line(struct ab8500 *ab8500,
 		if (line == AB8540_INT_GPIO43F || line == AB8540_INT_GPIO44F)
 			line += 1;
 
-		handle_nested_irq(irq_create_mapping(ab8500->domain, line));
+		handle_nested_irq(irq_find_mapping(ab8500->domain, line));
 	}
 
 	return 0;
diff --git a/drivers/mfd/axp20x.c b/drivers/mfd/axp20x.c
index 4145a38b3890..8161a5dc68e8 100644
--- a/drivers/mfd/axp20x.c
+++ b/drivers/mfd/axp20x.c
@@ -125,12 +125,13 @@ static const struct regmap_range axp288_writeable_ranges[] = {
 
 static const struct regmap_range axp288_volatile_ranges[] = {
 	regmap_reg_range(AXP20X_PWR_INPUT_STATUS, AXP288_POWER_REASON),
+	regmap_reg_range(AXP22X_PWR_OUT_CTRL1, AXP22X_ALDO3_V_OUT),
 	regmap_reg_range(AXP288_BC_GLOBAL, AXP288_BC_GLOBAL),
 	regmap_reg_range(AXP288_BC_DET_STAT, AXP20X_VBUS_IPSOUT_MGMT),
 	regmap_reg_range(AXP20X_CHRG_BAK_CTRL, AXP20X_CHRG_BAK_CTRL),
 	regmap_reg_range(AXP20X_IRQ1_EN, AXP20X_IPSOUT_V_HIGH_L),
 	regmap_reg_range(AXP20X_TIMER_CTRL, AXP20X_TIMER_CTRL),
-	regmap_reg_range(AXP22X_GPIO_STATE, AXP22X_GPIO_STATE),
+	regmap_reg_range(AXP20X_GPIO1_CTRL, AXP22X_GPIO_STATE),
 	regmap_reg_range(AXP288_RT_BATT_V_H, AXP288_RT_BATT_V_L),
 	regmap_reg_range(AXP20X_FG_RES, AXP288_FG_CC_CAP_REG),
 };
@@ -699,6 +700,18 @@ static const struct resource axp288_charger_resources[] = {
 	DEFINE_RES_IRQ(AXP288_IRQ_CBTO),
 };
 
+static const char * const axp288_fuel_gauge_suppliers[] = { "axp288_charger" };
+
+static const struct property_entry axp288_fuel_gauge_properties[] = {
+	PROPERTY_ENTRY_STRING_ARRAY("supplied-from", axp288_fuel_gauge_suppliers),
+	{ }
+};
+
+static const struct software_node axp288_fuel_gauge_sw_node = {
+	.name = "axp288_fuel_gauge",
+	.properties = axp288_fuel_gauge_properties,
+};
+
 static const struct mfd_cell axp288_cells[] = {
 	{
 		.name		= "axp288_adc",
@@ -716,6 +729,7 @@ static const struct mfd_cell axp288_cells[] = {
 		.name		= "axp288_fuel_gauge",
 		.num_resources	= ARRAY_SIZE(axp288_fuel_gauge_resources),
 		.resources	= axp288_fuel_gauge_resources,
+		.swnode		= &axp288_fuel_gauge_sw_node,
 	}, {
 		.name		= "axp221-pek",
 		.num_resources	= ARRAY_SIZE(axp288_power_button_resources),
diff --git a/drivers/mfd/dbx500-prcmu-regs.h b/drivers/mfd/db8500-prcmu-regs.h
index 75fd1069372c..75fd1069372c 100644
--- a/drivers/mfd/dbx500-prcmu-regs.h
+++ b/drivers/mfd/db8500-prcmu-regs.h
diff --git a/drivers/mfd/db8500-prcmu.c b/drivers/mfd/db8500-prcmu.c
index 287da20f1231..c1d3e7c116cf 100644
--- a/drivers/mfd/db8500-prcmu.c
+++ b/drivers/mfd/db8500-prcmu.c
@@ -37,7 +37,7 @@
 #include <linux/regulator/db8500-prcmu.h>
 #include <linux/regulator/machine.h>
 #include <linux/platform_data/ux500_wdt.h>
-#include "dbx500-prcmu-regs.h"
+#include "db8500-prcmu-regs.h"
 
 /* Index of different voltages to be used when accessing AVSData */
 #define PRCM_AVS_BASE		0x2FC
@@ -1622,22 +1622,20 @@ static long round_clock_rate(u8 clock, unsigned long rate)
 }
 
 static const unsigned long db8500_armss_freqs[] = {
-	200000000,
-	400000000,
-	800000000,
+	199680000,
+	399360000,
+	798720000,
 	998400000
 };
 
 /* The DB8520 has slightly higher ARMSS max frequency */
 static const unsigned long db8520_armss_freqs[] = {
-	200000000,
-	400000000,
-	800000000,
+	199680000,
+	399360000,
+	798720000,
 	1152000000
 };
 
-
-
 static long round_armss_rate(unsigned long rate)
 {
 	unsigned long freq = 0;
@@ -2567,14 +2565,16 @@ static char *fw_project_name(u32 project)
 		return "U8500 C4";
 	case PRCMU_FW_PROJECT_U9500_MBL:
 		return "U9500 MBL";
-	case PRCMU_FW_PROJECT_U8500_MBL:
-		return "U8500 MBL";
+	case PRCMU_FW_PROJECT_U8500_SSG1:
+		return "U8500 Samsung 1";
 	case PRCMU_FW_PROJECT_U8500_MBL2:
 		return "U8500 MBL2";
 	case PRCMU_FW_PROJECT_U8520:
 		return "U8520 MBL";
 	case PRCMU_FW_PROJECT_U8420:
 		return "U8420";
+	case PRCMU_FW_PROJECT_U8500_SSG2:
+		return "U8500 Samsung 2";
 	case PRCMU_FW_PROJECT_U8420_SYSCLK:
 		return "U8420-sysclk";
 	case PRCMU_FW_PROJECT_U9540:
@@ -2951,14 +2951,13 @@ static const struct mfd_cell common_prcmu_devs[] = {
 		.pdata_size = sizeof(db8500_wdt_pdata),
 		.id = -1,
 	},
+	MFD_CELL_NAME("db8500-cpuidle"),
 };
 
 static const struct mfd_cell db8500_prcmu_devs[] = {
 	MFD_CELL_OF("db8500-prcmu-regulators", NULL,
 		    &db8500_regulators, sizeof(db8500_regulators), 0,
 		    "stericsson,db8500-prcmu-regulator"),
-	MFD_CELL_OF("cpuidle-dbx500",
-		    NULL, NULL, 0, 0, "stericsson,cpuidle-dbx500"),
 	MFD_CELL_OF("db8500-thermal",
 		    NULL, NULL, 0, 0, "stericsson,db8500-thermal"),
 };
diff --git a/drivers/mfd/intel-lpss-acpi.c b/drivers/mfd/intel-lpss-acpi.c
index 1f396039d58f..3f1d976eb67c 100644
--- a/drivers/mfd/intel-lpss-acpi.c
+++ b/drivers/mfd/intel-lpss-acpi.c
@@ -89,6 +89,11 @@ static const struct intel_lpss_platform_info apl_i2c_info = {
 	.swnode = &apl_i2c_node,
 };
 
+static const struct intel_lpss_platform_info cnl_i2c_info = {
+	.clk_rate = 216000000,
+	.swnode = &spt_i2c_node,
+};
+
 static const struct acpi_device_id intel_lpss_acpi_ids[] = {
 	/* SPT */
 	{ "INT3440", (kernel_ulong_t)&spt_info },
@@ -102,6 +107,19 @@ static const struct acpi_device_id intel_lpss_acpi_ids[] = {
 	{ "INT3448", (kernel_ulong_t)&spt_uart_info },
 	{ "INT3449", (kernel_ulong_t)&spt_uart_info },
 	{ "INT344A", (kernel_ulong_t)&spt_uart_info },
+	/* CNL */
+	{ "INT34B0", (kernel_ulong_t)&spt_info },
+	{ "INT34B1", (kernel_ulong_t)&spt_info },
+	{ "INT34B2", (kernel_ulong_t)&cnl_i2c_info },
+	{ "INT34B3", (kernel_ulong_t)&cnl_i2c_info },
+	{ "INT34B4", (kernel_ulong_t)&cnl_i2c_info },
+	{ "INT34B5", (kernel_ulong_t)&cnl_i2c_info },
+	{ "INT34B6", (kernel_ulong_t)&cnl_i2c_info },
+	{ "INT34B7", (kernel_ulong_t)&cnl_i2c_info },
+	{ "INT34B8", (kernel_ulong_t)&spt_uart_info },
+	{ "INT34B9", (kernel_ulong_t)&spt_uart_info },
+	{ "INT34BA", (kernel_ulong_t)&spt_uart_info },
+	{ "INT34BC", (kernel_ulong_t)&spt_info },
 	/* BXT */
 	{ "80860AAC", (kernel_ulong_t)&bxt_i2c_info },
 	{ "80860ABC", (kernel_ulong_t)&bxt_info },
diff --git a/drivers/mfd/intel-lpss.c b/drivers/mfd/intel-lpss.c
index a9bf10bee796..0e15afc39f54 100644
--- a/drivers/mfd/intel-lpss.c
+++ b/drivers/mfd/intel-lpss.c
@@ -301,7 +301,8 @@ static int intel_lpss_register_clock_divider(struct intel_lpss *lpss,
 
 	snprintf(name, sizeof(name), "%s-div", devname);
 	tmp = clk_register_fractional_divider(NULL, name, __clk_get_name(tmp),
-					      0, lpss->priv, 1, 15, 16, 15, 0,
+					      CLK_FRAC_DIVIDER_POWER_OF_TWO_PS,
+					      lpss->priv, 1, 15, 16, 15, 0,
 					      NULL);
 	if (IS_ERR(tmp))
 		return PTR_ERR(tmp);
diff --git a/drivers/mfd/intel-m10-bmc.c b/drivers/mfd/intel-m10-bmc.c
index 1a9bfb7f48cd..8db3bcf5fccc 100644
--- a/drivers/mfd/intel-m10-bmc.c
+++ b/drivers/mfd/intel-m10-bmc.c
@@ -15,7 +15,8 @@
 
 enum m10bmc_type {
 	M10_N3000,
-	M10_D5005
+	M10_D5005,
+	M10_N5010,
 };
 
 static struct mfd_cell m10bmc_d5005_subdevs[] = {
@@ -28,6 +29,10 @@ static struct mfd_cell m10bmc_pacn3000_subdevs[] = {
 	{ .name = "n3000bmc-secure" },
 };
 
+static struct mfd_cell m10bmc_n5010_subdevs[] = {
+	{ .name = "n5010bmc-hwmon" },
+};
+
 static const struct regmap_range m10bmc_regmap_range[] = {
 	regmap_reg_range(M10BMC_LEGACY_BUILD_VER, M10BMC_LEGACY_BUILD_VER),
 	regmap_reg_range(M10BMC_SYS_BASE, M10BMC_SYS_END),
@@ -192,6 +197,10 @@ static int intel_m10_bmc_spi_probe(struct spi_device *spi)
 		cells = m10bmc_d5005_subdevs;
 		n_cell = ARRAY_SIZE(m10bmc_d5005_subdevs);
 		break;
+	case M10_N5010:
+		cells = m10bmc_n5010_subdevs;
+		n_cell = ARRAY_SIZE(m10bmc_n5010_subdevs);
+		break;
 	default:
 		return -ENODEV;
 	}
@@ -207,6 +216,7 @@ static int intel_m10_bmc_spi_probe(struct spi_device *spi)
 static const struct spi_device_id m10bmc_spi_id[] = {
 	{ "m10-n3000", M10_N3000 },
 	{ "m10-d5005", M10_D5005 },
+	{ "m10-n5010", M10_N5010 },
 	{ }
 };
 MODULE_DEVICE_TABLE(spi, m10bmc_spi_id);
diff --git a/drivers/mfd/intel_quark_i2c_gpio.c b/drivers/mfd/intel_quark_i2c_gpio.c
index 01935ae4e9e1..9b9c76bd067b 100644
--- a/drivers/mfd/intel_quark_i2c_gpio.c
+++ b/drivers/mfd/intel_quark_i2c_gpio.c
@@ -17,7 +17,6 @@
 #include <linux/clk-provider.h>
 #include <linux/dmi.h>
 #include <linux/i2c.h>
-#include <linux/platform_data/gpio-dwapb.h>
 #include <linux/property.h>
 
 /* PCI BAR for register base address */
@@ -28,15 +27,6 @@
 #define MFD_ACPI_MATCH_GPIO	0ULL
 #define MFD_ACPI_MATCH_I2C	1ULL
 
-/* The base GPIO number under GPIOLIB framework */
-#define INTEL_QUARK_MFD_GPIO_BASE	8
-
-/* The default number of South-Cluster GPIO on Quark. */
-#define INTEL_QUARK_MFD_NGPIO		8
-
-/* The DesignWare GPIO ports on Quark. */
-#define INTEL_QUARK_GPIO_NPORTS	1
-
 #define INTEL_QUARK_IORES_MEM	0
 #define INTEL_QUARK_IORES_IRQ	1
 
@@ -111,12 +101,38 @@ static struct resource intel_quark_gpio_res[] = {
 	[INTEL_QUARK_IORES_MEM] = {
 		.flags = IORESOURCE_MEM,
 	},
+	[INTEL_QUARK_IORES_IRQ] = {
+		.flags = IORESOURCE_IRQ,
+	},
 };
 
 static struct mfd_cell_acpi_match intel_quark_acpi_match_gpio = {
 	.adr = MFD_ACPI_MATCH_GPIO,
 };
 
+static const struct software_node intel_quark_gpio_controller_node = {
+	.name = "intel-quark-gpio-controller",
+};
+
+static const struct property_entry intel_quark_gpio_portA_properties[] = {
+	PROPERTY_ENTRY_U32("reg", 0),
+	PROPERTY_ENTRY_U32("snps,nr-gpios", 8),
+	PROPERTY_ENTRY_U32("gpio-base", 8),
+	{ }
+};
+
+static const struct software_node intel_quark_gpio_portA_node = {
+	.name = "portA",
+	.parent = &intel_quark_gpio_controller_node,
+	.properties = intel_quark_gpio_portA_properties,
+};
+
+static const struct software_node *intel_quark_gpio_node_group[] = {
+	&intel_quark_gpio_controller_node,
+	&intel_quark_gpio_portA_node,
+	NULL
+};
+
 static struct mfd_cell intel_quark_mfd_cells[] = {
 	[MFD_I2C_BAR] = {
 		.id = MFD_I2C_BAR,
@@ -203,35 +219,19 @@ static int intel_quark_gpio_setup(struct pci_dev *pdev)
 {
 	struct mfd_cell *cell = &intel_quark_mfd_cells[MFD_GPIO_BAR];
 	struct resource *res = intel_quark_gpio_res;
-	struct dwapb_platform_data *pdata;
-	struct device *dev = &pdev->dev;
+	int ret;
 
 	res[INTEL_QUARK_IORES_MEM].start = pci_resource_start(pdev, MFD_GPIO_BAR);
 	res[INTEL_QUARK_IORES_MEM].end = pci_resource_end(pdev, MFD_GPIO_BAR);
 
-	pdata = devm_kzalloc(dev, sizeof(*pdata), GFP_KERNEL);
-	if (!pdata)
-		return -ENOMEM;
-
-	/* For intel quark x1000, it has only one port: portA */
-	pdata->nports = INTEL_QUARK_GPIO_NPORTS;
-	pdata->properties = devm_kcalloc(dev, pdata->nports,
-					 sizeof(*pdata->properties),
-					 GFP_KERNEL);
-	if (!pdata->properties)
-		return -ENOMEM;
-
-	/* Set the properties for portA */
-	pdata->properties->fwnode	= NULL;
-	pdata->properties->idx		= 0;
-	pdata->properties->ngpio	= INTEL_QUARK_MFD_NGPIO;
-	pdata->properties->gpio_base	= INTEL_QUARK_MFD_GPIO_BASE;
-	pdata->properties->irq[0]	= pci_irq_vector(pdev, 0);
-	pdata->properties->irq_shared	= true;
+	res[INTEL_QUARK_IORES_IRQ].start = pci_irq_vector(pdev, 0);
+	res[INTEL_QUARK_IORES_IRQ].end = pci_irq_vector(pdev, 0);
 
-	cell->platform_data = pdata;
-	cell->pdata_size = sizeof(*pdata);
+	ret = software_node_register_node_group(intel_quark_gpio_node_group);
+	if (ret)
+		return ret;
 
+	cell->swnode = &intel_quark_gpio_controller_node;
 	return 0;
 }
 
@@ -274,10 +274,12 @@ static int intel_quark_mfd_probe(struct pci_dev *pdev,
 			      ARRAY_SIZE(intel_quark_mfd_cells), NULL, 0,
 			      NULL);
 	if (ret)
-		goto err_free_irq_vectors;
+		goto err_unregister_gpio_node_group;
 
 	return 0;
 
+err_unregister_gpio_node_group:
+	software_node_unregister_node_group(intel_quark_gpio_node_group);
 err_free_irq_vectors:
 	pci_free_irq_vectors(pdev);
 err_unregister_i2c_clk:
@@ -288,6 +290,7 @@ err_unregister_i2c_clk:
 static void intel_quark_mfd_remove(struct pci_dev *pdev)
 {
 	mfd_remove_devices(&pdev->dev);
+	software_node_unregister_node_group(intel_quark_gpio_node_group);
 	pci_free_irq_vectors(pdev);
 	intel_quark_unregister_i2c_clk(&pdev->dev);
 }
diff --git a/drivers/mfd/lpc_ich.c b/drivers/mfd/lpc_ich.c
index 3bbb29a7e7a5..f10e53187f67 100644
--- a/drivers/mfd/lpc_ich.c
+++ b/drivers/mfd/lpc_ich.c
@@ -489,6 +489,7 @@ static struct lpc_ich_info lpc_chipset_info[] = {
 	[LPC_DH89XXCC] = {
 		.name = "DH89xxCC",
 		.iTCO_version = 2,
+		.gpio_version = ICH_V5_GPIO,
 	},
 	[LPC_PPT] = {
 		.name = "Panther Point",
diff --git a/drivers/mfd/lpc_sch.c b/drivers/mfd/lpc_sch.c
index 428a526cbe86..9ab9adce06fd 100644
--- a/drivers/mfd/lpc_sch.c
+++ b/drivers/mfd/lpc_sch.c
@@ -22,7 +22,7 @@
 #define SMBASE		0x40
 #define SMBUS_IO_SIZE	64
 
-#define GPIOBASE	0x44
+#define GPIO_BASE	0x44
 #define GPIO_IO_SIZE	64
 #define GPIO_IO_SIZE_CENTERTON	128
 
@@ -145,7 +145,7 @@ static int lpc_sch_probe(struct pci_dev *dev, const struct pci_device_id *id)
 	if (ret == 0)
 		cells++;
 
-	ret = lpc_sch_populate_cell(dev, GPIOBASE, "sch_gpio",
+	ret = lpc_sch_populate_cell(dev, GPIO_BASE, "sch_gpio",
 				    info->io_size_gpio,
 				    id->device, &lpc_sch_cells[cells]);
 	if (ret < 0)
diff --git a/drivers/mfd/mt6360-core.c b/drivers/mfd/mt6360-core.c
index e628953548ce..6eaa6775b888 100644
--- a/drivers/mfd/mt6360-core.c
+++ b/drivers/mfd/mt6360-core.c
@@ -319,18 +319,18 @@ static const struct resource mt6360_regulator_resources[] = {
 	DEFINE_RES_IRQ_NAMED(MT6360_BUCK2_OC_EVT, "buck2_oc_evt"),
 	DEFINE_RES_IRQ_NAMED(MT6360_BUCK2_OV_EVT, "buck2_ov_evt"),
 	DEFINE_RES_IRQ_NAMED(MT6360_BUCK2_UV_EVT, "buck2_uv_evt"),
-	DEFINE_RES_IRQ_NAMED(MT6360_LDO6_OC_EVT, "ldo6_oc_evt"),
-	DEFINE_RES_IRQ_NAMED(MT6360_LDO7_OC_EVT, "ldo7_oc_evt"),
-	DEFINE_RES_IRQ_NAMED(MT6360_LDO6_PGB_EVT, "ldo6_pgb_evt"),
-	DEFINE_RES_IRQ_NAMED(MT6360_LDO7_PGB_EVT, "ldo7_pgb_evt"),
 	DEFINE_RES_IRQ_NAMED(MT6360_LDO1_OC_EVT, "ldo1_oc_evt"),
 	DEFINE_RES_IRQ_NAMED(MT6360_LDO2_OC_EVT, "ldo2_oc_evt"),
 	DEFINE_RES_IRQ_NAMED(MT6360_LDO3_OC_EVT, "ldo3_oc_evt"),
 	DEFINE_RES_IRQ_NAMED(MT6360_LDO5_OC_EVT, "ldo5_oc_evt"),
+	DEFINE_RES_IRQ_NAMED(MT6360_LDO6_OC_EVT, "ldo6_oc_evt"),
+	DEFINE_RES_IRQ_NAMED(MT6360_LDO7_OC_EVT, "ldo7_oc_evt"),
 	DEFINE_RES_IRQ_NAMED(MT6360_LDO1_PGB_EVT, "ldo1_pgb_evt"),
 	DEFINE_RES_IRQ_NAMED(MT6360_LDO2_PGB_EVT, "ldo2_pgb_evt"),
 	DEFINE_RES_IRQ_NAMED(MT6360_LDO3_PGB_EVT, "ldo3_pgb_evt"),
 	DEFINE_RES_IRQ_NAMED(MT6360_LDO5_PGB_EVT, "ldo5_pgb_evt"),
+	DEFINE_RES_IRQ_NAMED(MT6360_LDO6_PGB_EVT, "ldo6_pgb_evt"),
+	DEFINE_RES_IRQ_NAMED(MT6360_LDO7_PGB_EVT, "ldo7_pgb_evt"),
 };
 
 static const struct mfd_cell mt6360_devs[] = {
diff --git a/drivers/mfd/rsmu.h b/drivers/mfd/rsmu.h
new file mode 100644
index 000000000000..bb88597d189f
--- /dev/null
+++ b/drivers/mfd/rsmu.h
@@ -0,0 +1,16 @@
+/* SPDX-License-Identifier: GPL-2.0+ */
+/*
+ * Renesas Synchronization Management Unit (SMU) devices.
+ *
+ * Copyright (C) 2021 Integrated Device Technology, Inc., a Renesas Company.
+ */
+
+#ifndef __RSMU_MFD_H
+#define __RSMU_MFD_H
+
+#include <linux/mfd/rsmu.h>
+
+int rsmu_core_init(struct rsmu_ddata *rsmu);
+void rsmu_core_exit(struct rsmu_ddata *rsmu);
+
+#endif /* __RSMU_MFD_H */
diff --git a/drivers/mfd/rsmu_core.c b/drivers/mfd/rsmu_core.c
new file mode 100644
index 000000000000..29437fd0bd5b
--- /dev/null
+++ b/drivers/mfd/rsmu_core.c
@@ -0,0 +1,88 @@
+// SPDX-License-Identifier: GPL-2.0+
+/*
+ * Core driver for Renesas Synchronization Management Unit (SMU) devices.
+ *
+ * Copyright (C) 2021 Integrated Device Technology, Inc., a Renesas Company.
+ */
+
+#include <linux/init.h>
+#include <linux/kernel.h>
+#include <linux/mfd/core.h>
+#include <linux/mfd/rsmu.h>
+#include <linux/module.h>
+#include <linux/of.h>
+#include <linux/regmap.h>
+#include <linux/slab.h>
+
+#include "rsmu.h"
+
+enum {
+	RSMU_PHC = 0,
+	RSMU_CDEV = 1,
+	RSMU_N_DEVS = 2,
+};
+
+static struct mfd_cell rsmu_cm_devs[] = {
+	[RSMU_PHC] = {
+		.name = "8a3400x-phc",
+	},
+	[RSMU_CDEV] = {
+		.name = "8a3400x-cdev",
+	},
+};
+
+static struct mfd_cell rsmu_sabre_devs[] = {
+	[RSMU_PHC] = {
+		.name = "82p33x1x-phc",
+	},
+	[RSMU_CDEV] = {
+		.name = "82p33x1x-cdev",
+	},
+};
+
+static struct mfd_cell rsmu_sl_devs[] = {
+	[RSMU_PHC] = {
+		.name = "8v19n85x-phc",
+	},
+	[RSMU_CDEV] = {
+		.name = "8v19n85x-cdev",
+	},
+};
+
+int rsmu_core_init(struct rsmu_ddata *rsmu)
+{
+	struct mfd_cell *cells;
+	int ret;
+
+	switch (rsmu->type) {
+	case RSMU_CM:
+		cells = rsmu_cm_devs;
+		break;
+	case RSMU_SABRE:
+		cells = rsmu_sabre_devs;
+		break;
+	case RSMU_SL:
+		cells = rsmu_sl_devs;
+		break;
+	default:
+		dev_err(rsmu->dev, "Unsupported RSMU device type: %d\n", rsmu->type);
+		return -ENODEV;
+	}
+
+	mutex_init(&rsmu->lock);
+
+	ret = devm_mfd_add_devices(rsmu->dev, PLATFORM_DEVID_AUTO, cells,
+				   RSMU_N_DEVS, NULL, 0, NULL);
+	if (ret < 0)
+		dev_err(rsmu->dev, "Failed to register sub-devices: %d\n", ret);
+
+	return ret;
+}
+
+void rsmu_core_exit(struct rsmu_ddata *rsmu)
+{
+	mutex_destroy(&rsmu->lock);
+}
+
+MODULE_DESCRIPTION("Renesas SMU core driver");
+MODULE_LICENSE("GPL");
diff --git a/drivers/mfd/rsmu_i2c.c b/drivers/mfd/rsmu_i2c.c
new file mode 100644
index 000000000000..dc001c9791c1
--- /dev/null
+++ b/drivers/mfd/rsmu_i2c.c
@@ -0,0 +1,203 @@
+// SPDX-License-Identifier: GPL-2.0+
+/*
+ * I2C driver for Renesas Synchronization Management Unit (SMU) devices.
+ *
+ * Copyright (C) 2021 Integrated Device Technology, Inc., a Renesas Company.
+ */
+
+#include <linux/i2c.h>
+#include <linux/init.h>
+#include <linux/kernel.h>
+#include <linux/mfd/core.h>
+#include <linux/mfd/rsmu.h>
+#include <linux/module.h>
+#include <linux/of.h>
+#include <linux/regmap.h>
+#include <linux/slab.h>
+
+#include "rsmu.h"
+
+/*
+ * 16-bit register address: the lower 8 bits of the register address come
+ * from the offset addr byte and the upper 8 bits come from the page register.
+ */
+#define	RSMU_CM_PAGE_ADDR		0xFD
+#define	RSMU_CM_PAGE_WINDOW		256
+
+/*
+ * 15-bit register address: the lower 7 bits of the register address come
+ * from the offset addr byte and the upper 8 bits come from the page register.
+ */
+#define	RSMU_SABRE_PAGE_ADDR		0x7F
+#define	RSMU_SABRE_PAGE_WINDOW		128
+
+static const struct regmap_range_cfg rsmu_cm_range_cfg[] = {
+	{
+		.range_min = 0,
+		.range_max = 0xD000,
+		.selector_reg = RSMU_CM_PAGE_ADDR,
+		.selector_mask = 0xFF,
+		.selector_shift = 0,
+		.window_start = 0,
+		.window_len = RSMU_CM_PAGE_WINDOW,
+	}
+};
+
+static const struct regmap_range_cfg rsmu_sabre_range_cfg[] = {
+	{
+		.range_min = 0,
+		.range_max = 0x400,
+		.selector_reg = RSMU_SABRE_PAGE_ADDR,
+		.selector_mask = 0xFF,
+		.selector_shift = 0,
+		.window_start = 0,
+		.window_len = RSMU_SABRE_PAGE_WINDOW,
+	}
+};
+
+static bool rsmu_cm_volatile_reg(struct device *dev, unsigned int reg)
+{
+	switch (reg) {
+	case RSMU_CM_PAGE_ADDR:
+		return false;
+	default:
+		return true;
+	}
+}
+
+static bool rsmu_sabre_volatile_reg(struct device *dev, unsigned int reg)
+{
+	switch (reg) {
+	case RSMU_SABRE_PAGE_ADDR:
+		return false;
+	default:
+		return true;
+	}
+}
+
+static const struct regmap_config rsmu_cm_regmap_config = {
+	.reg_bits = 8,
+	.val_bits = 8,
+	.max_register = 0xD000,
+	.ranges = rsmu_cm_range_cfg,
+	.num_ranges = ARRAY_SIZE(rsmu_cm_range_cfg),
+	.volatile_reg = rsmu_cm_volatile_reg,
+	.cache_type = REGCACHE_RBTREE,
+	.can_multi_write = true,
+};
+
+static const struct regmap_config rsmu_sabre_regmap_config = {
+	.reg_bits = 8,
+	.val_bits = 8,
+	.max_register = 0x400,
+	.ranges = rsmu_sabre_range_cfg,
+	.num_ranges = ARRAY_SIZE(rsmu_sabre_range_cfg),
+	.volatile_reg = rsmu_sabre_volatile_reg,
+	.cache_type = REGCACHE_RBTREE,
+	.can_multi_write = true,
+};
+
+static const struct regmap_config rsmu_sl_regmap_config = {
+	.reg_bits = 16,
+	.val_bits = 8,
+	.reg_format_endian = REGMAP_ENDIAN_BIG,
+	.max_register = 0x339,
+	.cache_type = REGCACHE_NONE,
+	.can_multi_write = true,
+};
+
+static int rsmu_i2c_probe(struct i2c_client *client,
+			  const struct i2c_device_id *id)
+{
+	const struct regmap_config *cfg;
+	struct rsmu_ddata *rsmu;
+	int ret;
+
+	rsmu = devm_kzalloc(&client->dev, sizeof(*rsmu), GFP_KERNEL);
+	if (!rsmu)
+		return -ENOMEM;
+
+	i2c_set_clientdata(client, rsmu);
+
+	rsmu->dev = &client->dev;
+	rsmu->type = (enum rsmu_type)id->driver_data;
+
+	switch (rsmu->type) {
+	case RSMU_CM:
+		cfg = &rsmu_cm_regmap_config;
+		break;
+	case RSMU_SABRE:
+		cfg = &rsmu_sabre_regmap_config;
+		break;
+	case RSMU_SL:
+		cfg = &rsmu_sl_regmap_config;
+		break;
+	default:
+		dev_err(rsmu->dev, "Unsupported RSMU device type: %d\n", rsmu->type);
+		return -ENODEV;
+	}
+	rsmu->regmap = devm_regmap_init_i2c(client, cfg);
+	if (IS_ERR(rsmu->regmap)) {
+		ret = PTR_ERR(rsmu->regmap);
+		dev_err(rsmu->dev, "Failed to allocate register map: %d\n", ret);
+		return ret;
+	}
+
+	return rsmu_core_init(rsmu);
+}
+
+static int rsmu_i2c_remove(struct i2c_client *client)
+{
+	struct rsmu_ddata *rsmu = i2c_get_clientdata(client);
+
+	rsmu_core_exit(rsmu);
+
+	return 0;
+}
+
+static const struct i2c_device_id rsmu_i2c_id[] = {
+	{ "8a34000",  RSMU_CM },
+	{ "8a34001",  RSMU_CM },
+	{ "82p33810", RSMU_SABRE },
+	{ "82p33811", RSMU_SABRE },
+	{ "8v19n850", RSMU_SL },
+	{ "8v19n851", RSMU_SL },
+	{}
+};
+MODULE_DEVICE_TABLE(i2c, rsmu_i2c_id);
+
+static const struct of_device_id rsmu_i2c_of_match[] = {
+	{ .compatible = "idt,8a34000",  .data = (void *)RSMU_CM },
+	{ .compatible = "idt,8a34001",  .data = (void *)RSMU_CM },
+	{ .compatible = "idt,82p33810", .data = (void *)RSMU_SABRE },
+	{ .compatible = "idt,82p33811", .data = (void *)RSMU_SABRE },
+	{ .compatible = "idt,8v19n850", .data = (void *)RSMU_SL },
+	{ .compatible = "idt,8v19n851", .data = (void *)RSMU_SL },
+	{}
+};
+MODULE_DEVICE_TABLE(of, rsmu_i2c_of_match);
+
+static struct i2c_driver rsmu_i2c_driver = {
+	.driver = {
+		.name = "rsmu-i2c",
+		.of_match_table = of_match_ptr(rsmu_i2c_of_match),
+	},
+	.probe = rsmu_i2c_probe,
+	.remove	= rsmu_i2c_remove,
+	.id_table = rsmu_i2c_id,
+};
+
+static int __init rsmu_i2c_init(void)
+{
+	return i2c_add_driver(&rsmu_i2c_driver);
+}
+subsys_initcall(rsmu_i2c_init);
+
+static void __exit rsmu_i2c_exit(void)
+{
+	i2c_del_driver(&rsmu_i2c_driver);
+}
+module_exit(rsmu_i2c_exit);
+
+MODULE_DESCRIPTION("Renesas SMU I2C driver");
+MODULE_LICENSE("GPL");
diff --git a/drivers/mfd/rsmu_spi.c b/drivers/mfd/rsmu_spi.c
new file mode 100644
index 000000000000..fec2b4ec477c
--- /dev/null
+++ b/drivers/mfd/rsmu_spi.c
@@ -0,0 +1,273 @@
+// SPDX-License-Identifier: GPL-2.0+
+/*
+ * SPI driver for Renesas Synchronization Management Unit (SMU) devices.
+ *
+ * Copyright (C) 2021 Integrated Device Technology, Inc., a Renesas Company.
+ */
+
+#include <linux/init.h>
+#include <linux/kernel.h>
+#include <linux/mfd/core.h>
+#include <linux/mfd/rsmu.h>
+#include <linux/module.h>
+#include <linux/of.h>
+#include <linux/regmap.h>
+#include <linux/slab.h>
+#include <linux/spi/spi.h>
+
+#include "rsmu.h"
+
+#define	RSMU_CM_PAGE_ADDR		0x7C
+#define	RSMU_SABRE_PAGE_ADDR		0x7F
+#define	RSMU_HIGHER_ADDR_MASK		0xFF80
+#define	RSMU_HIGHER_ADDR_SHIFT		7
+#define	RSMU_LOWER_ADDR_MASK		0x7F
+
+static int rsmu_read_device(struct rsmu_ddata *rsmu, u8 reg, u8 *buf, u16 bytes)
+{
+	struct spi_device *client = to_spi_device(rsmu->dev);
+	struct spi_transfer xfer = {0};
+	struct spi_message msg;
+	u8 cmd[256] = {0};
+	u8 rsp[256] = {0};
+	int ret;
+
+	cmd[0] = reg | 0x80;
+	xfer.rx_buf = rsp;
+	xfer.len = bytes + 1;
+	xfer.tx_buf = cmd;
+	xfer.bits_per_word = client->bits_per_word;
+	xfer.speed_hz = client->max_speed_hz;
+
+	spi_message_init(&msg);
+	spi_message_add_tail(&xfer, &msg);
+
+	/*
+	 * 4-wire SPI is a shift register, so for every byte you send,
+	 * you get one back at the same time. Example read from 0xC024,
+	 * which has value of 0x2D
+	 *
+	 * MOSI:
+	 *       7C 00 C0 #Set page register
+	 *       A4 00    #MSB is set, so this is read command
+	 * MISO:
+	 *       XX 2D    #XX is a dummy byte from sending A4 and we
+	 *                 need to throw it away
+	 */
+	ret = spi_sync(client, &msg);
+	if (ret >= 0)
+		memcpy(buf, &rsp[1], xfer.len-1);
+
+	return ret;
+}
+
+static int rsmu_write_device(struct rsmu_ddata *rsmu, u8 reg, u8 *buf, u16 bytes)
+{
+	struct spi_device *client = to_spi_device(rsmu->dev);
+	struct spi_transfer xfer = {0};
+	struct spi_message msg;
+	u8 cmd[256] = {0};
+
+	cmd[0] = reg;
+	memcpy(&cmd[1], buf, bytes);
+
+	xfer.len = bytes + 1;
+	xfer.tx_buf = cmd;
+	xfer.bits_per_word = client->bits_per_word;
+	xfer.speed_hz = client->max_speed_hz;
+	spi_message_init(&msg);
+	spi_message_add_tail(&xfer, &msg);
+
+	return  spi_sync(client, &msg);
+}
+
+/*
+ * 1-byte (1B) offset addressing:
+ * 16-bit register address: the lower 7 bits of the register address come
+ * from the offset addr byte and the upper 9 bits come from the page register.
+ */
+static int rsmu_write_page_register(struct rsmu_ddata *rsmu, u16 reg)
+{
+	u8 page_reg;
+	u8 buf[2];
+	u16 bytes;
+	u16 page;
+	int err;
+
+	switch (rsmu->type) {
+	case RSMU_CM:
+		page_reg = RSMU_CM_PAGE_ADDR;
+		page = reg & RSMU_HIGHER_ADDR_MASK;
+		buf[0] = (u8)(page & 0xff);
+		buf[1] = (u8)((page >> 8) & 0xff);
+		bytes = 2;
+		break;
+	case RSMU_SABRE:
+		page_reg = RSMU_SABRE_PAGE_ADDR;
+		page = reg >> RSMU_HIGHER_ADDR_SHIFT;
+		buf[0] = (u8)(page & 0xff);
+		bytes = 1;
+		break;
+	default:
+		dev_err(rsmu->dev, "Unsupported RSMU device type: %d\n", rsmu->type);
+		return -ENODEV;
+	}
+
+	/* Simply return if we are on the same page */
+	if (rsmu->page == page)
+		return 0;
+
+	err = rsmu_write_device(rsmu, page_reg, buf, bytes);
+	if (err)
+		dev_err(rsmu->dev, "Failed to set page offset 0x%x\n", page);
+	else
+		/* Remember the last page */
+		rsmu->page = page;
+
+	return err;
+}
+
+static int rsmu_reg_read(void *context, unsigned int reg, unsigned int *val)
+{
+	struct rsmu_ddata *rsmu = spi_get_drvdata((struct spi_device *)context);
+	u8 addr = (u8)(reg & RSMU_LOWER_ADDR_MASK);
+	int err;
+
+	err = rsmu_write_page_register(rsmu, reg);
+	if (err)
+		return err;
+
+	err = rsmu_read_device(rsmu, addr, (u8 *)val, 1);
+	if (err)
+		dev_err(rsmu->dev, "Failed to read offset address 0x%x\n", addr);
+
+	return err;
+}
+
+static int rsmu_reg_write(void *context, unsigned int reg, unsigned int val)
+{
+	struct rsmu_ddata *rsmu = spi_get_drvdata((struct spi_device *)context);
+	u8 addr = (u8)(reg & RSMU_LOWER_ADDR_MASK);
+	u8 data = (u8)val;
+	int err;
+
+	err = rsmu_write_page_register(rsmu, reg);
+	if (err)
+		return err;
+
+	err = rsmu_write_device(rsmu, addr, &data, 1);
+	if (err)
+		dev_err(rsmu->dev,
+			"Failed to write offset address 0x%x\n", addr);
+
+	return err;
+}
+
+static const struct regmap_config rsmu_cm_regmap_config = {
+	.reg_bits = 16,
+	.val_bits = 8,
+	.max_register = 0xD000,
+	.reg_read = rsmu_reg_read,
+	.reg_write = rsmu_reg_write,
+	.cache_type = REGCACHE_NONE,
+};
+
+static const struct regmap_config rsmu_sabre_regmap_config = {
+	.reg_bits = 16,
+	.val_bits = 8,
+	.max_register = 0x400,
+	.reg_read = rsmu_reg_read,
+	.reg_write = rsmu_reg_write,
+	.cache_type = REGCACHE_NONE,
+};
+
+static int rsmu_spi_probe(struct spi_device *client)
+{
+	const struct spi_device_id *id = spi_get_device_id(client);
+	const struct regmap_config *cfg;
+	struct rsmu_ddata *rsmu;
+	int ret;
+
+	rsmu = devm_kzalloc(&client->dev, sizeof(*rsmu), GFP_KERNEL);
+	if (!rsmu)
+		return -ENOMEM;
+
+	spi_set_drvdata(client, rsmu);
+
+	rsmu->dev = &client->dev;
+	rsmu->type = (enum rsmu_type)id->driver_data;
+
+	/* Initialize regmap */
+	switch (rsmu->type) {
+	case RSMU_CM:
+		cfg = &rsmu_cm_regmap_config;
+		break;
+	case RSMU_SABRE:
+		cfg = &rsmu_sabre_regmap_config;
+		break;
+	default:
+		dev_err(rsmu->dev, "Unsupported RSMU device type: %d\n", rsmu->type);
+		return -ENODEV;
+	}
+
+	rsmu->regmap = devm_regmap_init(&client->dev, NULL, client, cfg);
+	if (IS_ERR(rsmu->regmap)) {
+		ret = PTR_ERR(rsmu->regmap);
+		dev_err(rsmu->dev, "Failed to allocate register map: %d\n", ret);
+		return ret;
+	}
+
+	return rsmu_core_init(rsmu);
+}
+
+static int rsmu_spi_remove(struct spi_device *client)
+{
+	struct rsmu_ddata *rsmu = spi_get_drvdata(client);
+
+	rsmu_core_exit(rsmu);
+
+	return 0;
+}
+
+static const struct spi_device_id rsmu_spi_id[] = {
+	{ "8a34000",  RSMU_CM },
+	{ "8a34001",  RSMU_CM },
+	{ "82p33810", RSMU_SABRE },
+	{ "82p33811", RSMU_SABRE },
+	{}
+};
+MODULE_DEVICE_TABLE(spi, rsmu_spi_id);
+
+static const struct of_device_id rsmu_spi_of_match[] = {
+	{ .compatible = "idt,8a34000",  .data = (void *)RSMU_CM },
+	{ .compatible = "idt,8a34001",  .data = (void *)RSMU_CM },
+	{ .compatible = "idt,82p33810", .data = (void *)RSMU_SABRE },
+	{ .compatible = "idt,82p33811", .data = (void *)RSMU_SABRE },
+	{}
+};
+MODULE_DEVICE_TABLE(of, rsmu_spi_of_match);
+
+static struct spi_driver rsmu_spi_driver = {
+	.driver = {
+		.name = "rsmu-spi",
+		.of_match_table = of_match_ptr(rsmu_spi_of_match),
+	},
+	.probe = rsmu_spi_probe,
+	.remove	= rsmu_spi_remove,
+	.id_table = rsmu_spi_id,
+};
+
+static int __init rsmu_spi_init(void)
+{
+	return spi_register_driver(&rsmu_spi_driver);
+}
+subsys_initcall(rsmu_spi_init);
+
+static void __exit rsmu_spi_exit(void)
+{
+	spi_unregister_driver(&rsmu_spi_driver);
+}
+module_exit(rsmu_spi_exit);
+
+MODULE_DESCRIPTION("Renesas SMU SPI driver");
+MODULE_LICENSE("GPL");
diff --git a/drivers/mfd/simple-mfd-i2c.c b/drivers/mfd/simple-mfd-i2c.c
index 87f684cff9a1..51536691ad9d 100644
--- a/drivers/mfd/simple-mfd-i2c.c
+++ b/drivers/mfd/simple-mfd-i2c.c
@@ -2,39 +2,64 @@
 /*
  * Simple MFD - I2C
  *
+ * Author(s):
+ * 	Michael Walle <michael@walle.cc>
+ * 	Lee Jones <lee.jones@linaro.org>
+ *
  * This driver creates a single register map with the intention for it to be
  * shared by all sub-devices.  Children can use their parent's device structure
  * (dev.parent) in order to reference it.
  *
  * Once the register map has been successfully initialised, any sub-devices
- * represented by child nodes in Device Tree will be subsequently registered.
+ * represented by child nodes in Device Tree or via the MFD cells in this file
+ * will be subsequently registered.
  */
 
 #include <linux/i2c.h>
 #include <linux/kernel.h>
+#include <linux/mfd/core.h>
 #include <linux/module.h>
 #include <linux/of_platform.h>
 #include <linux/regmap.h>
 
-static const struct regmap_config simple_regmap_config = {
+#include "simple-mfd-i2c.h"
+
+static const struct regmap_config regmap_config_8r_8v = {
 	.reg_bits = 8,
 	.val_bits = 8,
 };
 
 static int simple_mfd_i2c_probe(struct i2c_client *i2c)
 {
-	const struct regmap_config *config;
+	const struct simple_mfd_data *simple_mfd_data;
+	const struct regmap_config *regmap_config;
 	struct regmap *regmap;
+	int ret;
+
+	simple_mfd_data = device_get_match_data(&i2c->dev);
 
-	config = device_get_match_data(&i2c->dev);
-	if (!config)
-		config = &simple_regmap_config;
+	/* If no regmap_config is specified, use the default 8reg and 8val bits */
+	if (!simple_mfd_data || !simple_mfd_data->regmap_config)
+		regmap_config = &regmap_config_8r_8v;
+	else
+		regmap_config = simple_mfd_data->regmap_config;
 
-	regmap = devm_regmap_init_i2c(i2c, config);
+	regmap = devm_regmap_init_i2c(i2c, regmap_config);
 	if (IS_ERR(regmap))
 		return PTR_ERR(regmap);
 
-	return devm_of_platform_populate(&i2c->dev);
+	/* If no MFD cells are spedified, use register the DT child nodes instead */
+	if (!simple_mfd_data || !simple_mfd_data->mfd_cell)
+		return devm_of_platform_populate(&i2c->dev);
+
+	ret = devm_mfd_add_devices(&i2c->dev, PLATFORM_DEVID_AUTO,
+				   simple_mfd_data->mfd_cell,
+				   simple_mfd_data->mfd_cell_size,
+				   NULL, 0, NULL);
+	if (ret)
+		dev_err(&i2c->dev, "Failed to add child devices\n");
+
+	return ret;
 }
 
 static const struct of_device_id simple_mfd_i2c_of_match[] = {
diff --git a/drivers/mfd/simple-mfd-i2c.h b/drivers/mfd/simple-mfd-i2c.h
new file mode 100644
index 000000000000..7cb2bdd347d9
--- /dev/null
+++ b/drivers/mfd/simple-mfd-i2c.h
@@ -0,0 +1,32 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * Simple MFD - I2C
+ *
+ * Author: Lee Jones <lee.jones@linaro.org>
+ *
+ * This driver creates a single register map with the intention for it to be
+ * shared by all sub-devices.  Children can use their parent's device structure
+ * (dev.parent) in order to reference it.
+ *
+ * This driver creates a single register map with the intention for it to be
+ * shared by all sub-devices.  Children can use their parent's device structure
+ * (dev.parent) in order to reference it.
+ *
+ * Once the register map has been successfully initialised, any sub-devices
+ * represented by child nodes in Device Tree or via the MFD cells in the
+ * associated C file will be subsequently registered.
+ */
+
+#ifndef __MFD_SIMPLE_MFD_I2C_H
+#define __MFD_SIMPLE_MFD_I2C_H
+
+#include <linux/mfd/core.h>
+#include <linux/regmap.h>
+
+struct simple_mfd_data {
+	const struct regmap_config *regmap_config;
+	const struct mfd_cell *mfd_cell;
+	size_t mfd_cell_size;
+};
+
+#endif /* __MFD_SIMPLE_MFD_I2C_H */
diff --git a/drivers/mfd/stmpe.c b/drivers/mfd/stmpe.c
index 1dd39483e7c1..58d09c615e67 100644
--- a/drivers/mfd/stmpe.c
+++ b/drivers/mfd/stmpe.c
@@ -1095,7 +1095,7 @@ static irqreturn_t stmpe_irq(int irq, void *data)
 
 	if (variant->id_val == STMPE801_ID ||
 	    variant->id_val == STMPE1600_ID) {
-		int base = irq_create_mapping(stmpe->domain, 0);
+		int base = irq_find_mapping(stmpe->domain, 0);
 
 		handle_nested_irq(base);
 		return IRQ_HANDLED;
@@ -1123,7 +1123,7 @@ static irqreturn_t stmpe_irq(int irq, void *data)
 		while (status) {
 			int bit = __ffs(status);
 			int line = bank * 8 + bit;
-			int nestedirq = irq_create_mapping(stmpe->domain, line);
+			int nestedirq = irq_find_mapping(stmpe->domain, line);
 
 			handle_nested_irq(nestedirq);
 			status &= ~(1 << bit);
diff --git a/drivers/mfd/syscon.c b/drivers/mfd/syscon.c
index 765c0210cb52..191fdb87c424 100644
--- a/drivers/mfd/syscon.c
+++ b/drivers/mfd/syscon.c
@@ -60,7 +60,7 @@ static struct syscon *of_syscon_register(struct device_node *np, bool check_clk)
 		goto err_map;
 	}
 
-	base = ioremap(res.start, resource_size(&res));
+	base = of_iomap(np, 0);
 	if (!base) {
 		ret = -ENOMEM;
 		goto err_map;
diff --git a/drivers/mfd/tc3589x.c b/drivers/mfd/tc3589x.c
index 7614f8fe0e91..13583cdb93b6 100644
--- a/drivers/mfd/tc3589x.c
+++ b/drivers/mfd/tc3589x.c
@@ -187,7 +187,7 @@ again:
 
 	while (status) {
 		int bit = __ffs(status);
-		int virq = irq_create_mapping(tc3589x->domain, bit);
+		int virq = irq_find_mapping(tc3589x->domain, bit);
 
 		handle_nested_irq(virq);
 		status &= ~(1 << bit);
diff --git a/drivers/mfd/ti_am335x_tscadc.c b/drivers/mfd/ti_am335x_tscadc.c
index 0e6e25308190..55adc379f94b 100644
--- a/drivers/mfd/ti_am335x_tscadc.c
+++ b/drivers/mfd/ti_am335x_tscadc.c
@@ -175,10 +175,9 @@ static	int ti_tscadc_probe(struct platform_device *pdev)
 	tscadc->dev = &pdev->dev;
 
 	err = platform_get_irq(pdev, 0);
-	if (err < 0) {
-		dev_err(&pdev->dev, "no irq ID is specified.\n");
+	if (err < 0)
 		goto ret;
-	} else
+	else
 		tscadc->irq = err;
 
 	res = platform_get_resource(pdev, IORESOURCE_MEM, 0);
diff --git a/drivers/mfd/tps65086.c b/drivers/mfd/tps65086.c
index 341466ef20cc..3bd5728844a0 100644
--- a/drivers/mfd/tps65086.c
+++ b/drivers/mfd/tps65086.c
@@ -24,6 +24,7 @@
 static const struct mfd_cell tps65086_cells[] = {
 	{ .name = "tps65086-regulator", },
 	{ .name = "tps65086-gpio", },
+	{ .name = "tps65086-reset", },
 };
 
 static const struct regmap_range tps65086_yes_ranges[] = {
@@ -100,29 +101,30 @@ static int tps65086_probe(struct i2c_client *client,
 		 (char)((version & TPS65086_DEVICEID_OTP_MASK) >> 4) + 'A',
 		 (version & TPS65086_DEVICEID_REV_MASK) >> 6);
 
-	ret = regmap_add_irq_chip(tps->regmap, tps->irq, IRQF_ONESHOT, 0,
-				  &tps65086_irq_chip, &tps->irq_data);
-	if (ret) {
-		dev_err(tps->dev, "Failed to register IRQ chip\n");
-		return ret;
+	if (tps->irq > 0) {
+		ret = regmap_add_irq_chip(tps->regmap, tps->irq, IRQF_ONESHOT, 0,
+					  &tps65086_irq_chip, &tps->irq_data);
+		if (ret) {
+			dev_err(tps->dev, "Failed to register IRQ chip\n");
+			return ret;
+		}
 	}
 
 	ret = mfd_add_devices(tps->dev, PLATFORM_DEVID_AUTO, tps65086_cells,
 			      ARRAY_SIZE(tps65086_cells), NULL, 0,
 			      regmap_irq_get_domain(tps->irq_data));
-	if (ret) {
+	if (ret && tps->irq > 0)
 		regmap_del_irq_chip(tps->irq, tps->irq_data);
-		return ret;
-	}
 
-	return 0;
+	return ret;
 }
 
 static int tps65086_remove(struct i2c_client *client)
 {
 	struct tps65086 *tps = i2c_get_clientdata(client);
 
-	regmap_del_irq_chip(tps->irq, tps->irq_data);
+	if (tps->irq > 0)
+		regmap_del_irq_chip(tps->irq, tps->irq_data);
 
 	return 0;
 }
diff --git a/drivers/mfd/tqmx86.c b/drivers/mfd/tqmx86.c
index ddddf08b6a4c..7ae906ff8e35 100644
--- a/drivers/mfd/tqmx86.c
+++ b/drivers/mfd/tqmx86.c
@@ -35,7 +35,11 @@
 #define TQMX86_REG_BOARD_ID_E39x	7
 #define TQMX86_REG_BOARD_ID_70EB	8
 #define TQMX86_REG_BOARD_ID_80UC	9
-#define TQMX86_REG_BOARD_ID_90UC	10
+#define TQMX86_REG_BOARD_ID_110EB	11
+#define TQMX86_REG_BOARD_ID_E40M	12
+#define TQMX86_REG_BOARD_ID_E40S	13
+#define TQMX86_REG_BOARD_ID_E40C1	14
+#define TQMX86_REG_BOARD_ID_E40C2	15
 #define TQMX86_REG_BOARD_REV	0x21
 #define TQMX86_REG_IO_EXT_INT	0x26
 #define TQMX86_REG_IO_EXT_INT_NONE		0
@@ -77,7 +81,7 @@ static struct i2c_board_info tqmx86_i2c_devices[] = {
 	},
 };
 
-static struct ocores_i2c_platform_data ocores_platfom_data = {
+static struct ocores_i2c_platform_data ocores_platform_data = {
 	.num_devices = ARRAY_SIZE(tqmx86_i2c_devices),
 	.devices = tqmx86_i2c_devices,
 };
@@ -85,8 +89,8 @@ static struct ocores_i2c_platform_data ocores_platfom_data = {
 static const struct mfd_cell tqmx86_i2c_soft_dev[] = {
 	{
 		.name = "ocores-i2c",
-		.platform_data = &ocores_platfom_data,
-		.pdata_size = sizeof(ocores_platfom_data),
+		.platform_data = &ocores_platform_data,
+		.pdata_size = sizeof(ocores_platform_data),
 		.resources = tqmx_i2c_soft_resources,
 		.num_resources = ARRAY_SIZE(tqmx_i2c_soft_resources),
 	},
@@ -128,21 +132,33 @@ static const char *tqmx86_board_id_to_name(u8 board_id)
 		return "TQMx70EB";
 	case TQMX86_REG_BOARD_ID_80UC:
 		return "TQMx80UC";
-	case TQMX86_REG_BOARD_ID_90UC:
-		return "TQMx90UC";
+	case TQMX86_REG_BOARD_ID_110EB:
+		return "TQMx110EB";
+	case TQMX86_REG_BOARD_ID_E40M:
+		return "TQMxE40M";
+	case TQMX86_REG_BOARD_ID_E40S:
+		return "TQMxE40S";
+	case TQMX86_REG_BOARD_ID_E40C1:
+		return "TQMxE40C1";
+	case TQMX86_REG_BOARD_ID_E40C2:
+		return "TQMxE40C2";
 	default:
 		return "Unknown";
 	}
 }
 
-static int tqmx86_board_id_to_clk_rate(u8 board_id)
+static int tqmx86_board_id_to_clk_rate(struct device *dev, u8 board_id)
 {
 	switch (board_id) {
 	case TQMX86_REG_BOARD_ID_50UC:
 	case TQMX86_REG_BOARD_ID_60EB:
 	case TQMX86_REG_BOARD_ID_70EB:
 	case TQMX86_REG_BOARD_ID_80UC:
-	case TQMX86_REG_BOARD_ID_90UC:
+	case TQMX86_REG_BOARD_ID_110EB:
+	case TQMX86_REG_BOARD_ID_E40M:
+	case TQMX86_REG_BOARD_ID_E40S:
+	case TQMX86_REG_BOARD_ID_E40C1:
+	case TQMX86_REG_BOARD_ID_E40C2:
 		return 24000;
 	case TQMX86_REG_BOARD_ID_E39M:
 	case TQMX86_REG_BOARD_ID_E39C:
@@ -152,7 +168,9 @@ static int tqmx86_board_id_to_clk_rate(u8 board_id)
 	case TQMX86_REG_BOARD_ID_E38C:
 		return 33000;
 	default:
-		return 0;
+		dev_warn(dev, "unknown board %d, assuming 24MHz LPC clock\n",
+			 board_id);
+		return 24000;
 	}
 }
 
@@ -209,9 +227,11 @@ static int tqmx86_probe(struct platform_device *pdev)
 
 		/* Assumes the IRQ resource is first. */
 		tqmx_gpio_resources[0].start = gpio_irq;
+	} else {
+		tqmx_gpio_resources[0].flags = 0;
 	}
 
-	ocores_platfom_data.clock_khz = tqmx86_board_id_to_clk_rate(board_id);
+	ocores_platform_data.clock_khz = tqmx86_board_id_to_clk_rate(dev, board_id);
 
 	if (i2c_det == TQMX86_REG_I2C_DETECT_SOFT) {
 		err = devm_mfd_add_devices(dev, PLATFORM_DEVID_NONE,
@@ -253,6 +273,14 @@ static const struct dmi_system_id tqmx86_dmi_table[] __initconst = {
 		},
 		.callback = tqmx86_create_platform_device,
 	},
+	{
+		.ident = "TQMX86",
+		.matches = {
+			DMI_MATCH(DMI_SYS_VENDOR, "TQ-Systems"),
+			DMI_MATCH(DMI_PRODUCT_NAME, "TQMx"),
+		},
+		.callback = tqmx86_create_platform_device,
+	},
 	{}
 };
 MODULE_DEVICE_TABLE(dmi, tqmx86_dmi_table);
diff --git a/drivers/mfd/wm8994-irq.c b/drivers/mfd/wm8994-irq.c
index 6c3a619e2628..651a028bc519 100644
--- a/drivers/mfd/wm8994-irq.c
+++ b/drivers/mfd/wm8994-irq.c
@@ -154,7 +154,7 @@ static irqreturn_t wm8994_edge_irq(int irq, void *data)
 	struct wm8994 *wm8994 = data;
 
 	while (gpio_get_value_cansleep(wm8994->pdata.irq_gpio))
-		handle_nested_irq(irq_create_mapping(wm8994->edge_irq, 0));
+		handle_nested_irq(irq_find_mapping(wm8994->edge_irq, 0));
 
 	return IRQ_HANDLED;
 }
diff --git a/drivers/misc/habanalabs/common/Makefile b/drivers/misc/habanalabs/common/Makefile
index 5d8b48288cf4..6ebe3c7001ff 100644
--- a/drivers/misc/habanalabs/common/Makefile
+++ b/drivers/misc/habanalabs/common/Makefile
@@ -10,4 +10,5 @@ HL_COMMON_FILES := common/habanalabs_drv.o common/device.o common/context.o \
 		common/asid.o common/habanalabs_ioctl.o \
 		common/command_buffer.o common/hw_queue.o common/irq.o \
 		common/sysfs.o common/hwmon.o common/memory.o \
-		common/command_submission.o common/firmware_if.o
+		common/command_submission.o common/firmware_if.o \
+		common/state_dump.o
diff --git a/drivers/misc/habanalabs/common/command_buffer.c b/drivers/misc/habanalabs/common/command_buffer.c
index 719168c980a4..8132a84698d5 100644
--- a/drivers/misc/habanalabs/common/command_buffer.c
+++ b/drivers/misc/habanalabs/common/command_buffer.c
@@ -314,8 +314,6 @@ int hl_cb_create(struct hl_device *hdev, struct hl_cb_mgr *mgr,
 
 	spin_lock(&mgr->cb_lock);
 	rc = idr_alloc(&mgr->cb_handles, cb, 1, 0, GFP_ATOMIC);
-	if (rc < 0)
-		rc = idr_alloc(&mgr->cb_handles, cb, 1, 0, GFP_KERNEL);
 	spin_unlock(&mgr->cb_lock);
 
 	if (rc < 0) {
@@ -552,7 +550,7 @@ int hl_cb_mmap(struct hl_fpriv *hpriv, struct vm_area_struct *vma)
 
 	vma->vm_private_data = cb;
 
-	rc = hdev->asic_funcs->cb_mmap(hdev, vma, cb->kernel_address,
+	rc = hdev->asic_funcs->mmap(hdev, vma, cb->kernel_address,
 					cb->bus_address, cb->size);
 	if (rc) {
 		spin_lock(&cb->lock);
diff --git a/drivers/misc/habanalabs/common/command_submission.c b/drivers/misc/habanalabs/common/command_submission.c
index 80c60fb41bbc..7b0516cf808b 100644
--- a/drivers/misc/habanalabs/common/command_submission.c
+++ b/drivers/misc/habanalabs/common/command_submission.c
@@ -38,7 +38,11 @@ static void hl_sob_reset(struct kref *ref)
 							kref);
 	struct hl_device *hdev = hw_sob->hdev;
 
+	dev_dbg(hdev->dev, "reset sob id %u\n", hw_sob->sob_id);
+
 	hdev->asic_funcs->reset_sob(hdev, hw_sob);
+
+	hw_sob->need_reset = false;
 }
 
 void hl_sob_reset_error(struct kref *ref)
@@ -52,6 +56,24 @@ void hl_sob_reset_error(struct kref *ref)
 		hw_sob->q_idx, hw_sob->sob_id);
 }
 
+void hw_sob_put(struct hl_hw_sob *hw_sob)
+{
+	if (hw_sob)
+		kref_put(&hw_sob->kref, hl_sob_reset);
+}
+
+static void hw_sob_put_err(struct hl_hw_sob *hw_sob)
+{
+	if (hw_sob)
+		kref_put(&hw_sob->kref, hl_sob_reset_error);
+}
+
+void hw_sob_get(struct hl_hw_sob *hw_sob)
+{
+	if (hw_sob)
+		kref_get(&hw_sob->kref);
+}
+
 /**
  * hl_gen_sob_mask() - Generates a sob mask to be used in a monitor arm packet
  * @sob_base: sob base id
@@ -84,76 +106,29 @@ int hl_gen_sob_mask(u16 sob_base, u8 sob_mask, u8 *mask)
 	return 0;
 }
 
-static void sob_reset_work(struct work_struct *work)
-{
-	struct hl_cs_compl *hl_cs_cmpl =
-		container_of(work, struct hl_cs_compl, sob_reset_work);
-	struct hl_device *hdev = hl_cs_cmpl->hdev;
-
-	/*
-	 * A signal CS can get completion while the corresponding wait
-	 * for signal CS is on its way to the PQ. The wait for signal CS
-	 * will get stuck if the signal CS incremented the SOB to its
-	 * max value and there are no pending (submitted) waits on this
-	 * SOB.
-	 * We do the following to void this situation:
-	 * 1. The wait for signal CS must get a ref for the signal CS as
-	 *    soon as possible in cs_ioctl_signal_wait() and put it
-	 *    before being submitted to the PQ but after it incremented
-	 *    the SOB refcnt in init_signal_wait_cs().
-	 * 2. Signal/Wait for signal CS will decrement the SOB refcnt
-	 *    here.
-	 * These two measures guarantee that the wait for signal CS will
-	 * reset the SOB upon completion rather than the signal CS and
-	 * hence the above scenario is avoided.
-	 */
-	kref_put(&hl_cs_cmpl->hw_sob->kref, hl_sob_reset);
-
-	if (hl_cs_cmpl->type == CS_TYPE_COLLECTIVE_WAIT)
-		hdev->asic_funcs->reset_sob_group(hdev,
-				hl_cs_cmpl->sob_group);
-
-	kfree(hl_cs_cmpl);
-}
-
 static void hl_fence_release(struct kref *kref)
 {
 	struct hl_fence *fence =
 		container_of(kref, struct hl_fence, refcount);
 	struct hl_cs_compl *hl_cs_cmpl =
 		container_of(fence, struct hl_cs_compl, base_fence);
-	struct hl_device *hdev = hl_cs_cmpl->hdev;
 
-	/* EBUSY means the CS was never submitted and hence we don't have
-	 * an attached hw_sob object that we should handle here
-	 */
-	if (fence->error == -EBUSY)
-		goto free;
-
-	if ((hl_cs_cmpl->type == CS_TYPE_SIGNAL) ||
-		(hl_cs_cmpl->type == CS_TYPE_WAIT) ||
-		(hl_cs_cmpl->type == CS_TYPE_COLLECTIVE_WAIT)) {
-
-		dev_dbg(hdev->dev,
-			"CS 0x%llx type %d finished, sob_id: %d, sob_val: 0x%x\n",
-			hl_cs_cmpl->cs_seq,
-			hl_cs_cmpl->type,
-			hl_cs_cmpl->hw_sob->sob_id,
-			hl_cs_cmpl->sob_val);
-
-		queue_work(hdev->sob_reset_wq, &hl_cs_cmpl->sob_reset_work);
-
-		return;
-	}
-
-free:
 	kfree(hl_cs_cmpl);
 }
 
 void hl_fence_put(struct hl_fence *fence)
 {
-	if (fence)
-		kref_put(&fence->refcount, hl_fence_release);
+	if (IS_ERR_OR_NULL(fence))
+		return;
+	kref_put(&fence->refcount, hl_fence_release);
+}
+
+void hl_fences_put(struct hl_fence **fence, int len)
+{
+	int i;
+
+	for (i = 0; i < len; i++, fence++)
+		hl_fence_put(*fence);
 }
 
 void hl_fence_get(struct hl_fence *fence)
@@ -473,11 +448,139 @@ static void cs_handle_tdr(struct hl_device *hdev, struct hl_cs *cs)
 	spin_unlock(&hdev->cs_mirror_lock);
 }
 
+/*
+ * force_complete_multi_cs - complete all contexts that wait on multi-CS
+ *
+ * @hdev: pointer to habanalabs device structure
+ */
+static void force_complete_multi_cs(struct hl_device *hdev)
+{
+	int i;
+
+	for (i = 0; i < MULTI_CS_MAX_USER_CTX; i++) {
+		struct multi_cs_completion *mcs_compl;
+
+		mcs_compl = &hdev->multi_cs_completion[i];
+
+		spin_lock(&mcs_compl->lock);
+
+		if (!mcs_compl->used) {
+			spin_unlock(&mcs_compl->lock);
+			continue;
+		}
+
+		/* when calling force complete no context should be waiting on
+		 * multi-cS.
+		 * We are calling the function as a protection for such case
+		 * to free any pending context and print error message
+		 */
+		dev_err(hdev->dev,
+				"multi-CS completion context %d still waiting when calling force completion\n",
+				i);
+		complete_all(&mcs_compl->completion);
+		spin_unlock(&mcs_compl->lock);
+	}
+}
+
+/*
+ * complete_multi_cs - complete all waiting entities on multi-CS
+ *
+ * @hdev: pointer to habanalabs device structure
+ * @cs: CS structure
+ * The function signals a waiting entity that has an overlapping stream masters
+ * with the completed CS.
+ * For example:
+ * - a completed CS worked on stream master QID 4, multi CS completion
+ *   is actively waiting on stream master QIDs 3, 5. don't send signal as no
+ *   common stream master QID
+ * - a completed CS worked on stream master QID 4, multi CS completion
+ *   is actively waiting on stream master QIDs 3, 4. send signal as stream
+ *   master QID 4 is common
+ */
+static void complete_multi_cs(struct hl_device *hdev, struct hl_cs *cs)
+{
+	struct hl_fence *fence = cs->fence;
+	int i;
+
+	/* in case of multi CS check for completion only for the first CS */
+	if (cs->staged_cs && !cs->staged_first)
+		return;
+
+	for (i = 0; i < MULTI_CS_MAX_USER_CTX; i++) {
+		struct multi_cs_completion *mcs_compl;
+
+		mcs_compl = &hdev->multi_cs_completion[i];
+		if (!mcs_compl->used)
+			continue;
+
+		spin_lock(&mcs_compl->lock);
+
+		/*
+		 * complete if:
+		 * 1. still waiting for completion
+		 * 2. the completed CS has at least one overlapping stream
+		 *    master with the stream masters in the completion
+		 */
+		if (mcs_compl->used &&
+				(fence->stream_master_qid_map &
+					mcs_compl->stream_master_qid_map)) {
+			/* extract the timestamp only of first completed CS */
+			if (!mcs_compl->timestamp)
+				mcs_compl->timestamp =
+						ktime_to_ns(fence->timestamp);
+			complete_all(&mcs_compl->completion);
+		}
+
+		spin_unlock(&mcs_compl->lock);
+	}
+}
+
+static inline void cs_release_sob_reset_handler(struct hl_device *hdev,
+					struct hl_cs *cs,
+					struct hl_cs_compl *hl_cs_cmpl)
+{
+	/* Skip this handler if the cs wasn't submitted, to avoid putting
+	 * the hw_sob twice, since this case already handled at this point,
+	 * also skip if the hw_sob pointer wasn't set.
+	 */
+	if (!hl_cs_cmpl->hw_sob || !cs->submitted)
+		return;
+
+	spin_lock(&hl_cs_cmpl->lock);
+
+	/*
+	 * we get refcount upon reservation of signals or signal/wait cs for the
+	 * hw_sob object, and need to put it when the first staged cs
+	 * (which cotains the encaps signals) or cs signal/wait is completed.
+	 */
+	if ((hl_cs_cmpl->type == CS_TYPE_SIGNAL) ||
+			(hl_cs_cmpl->type == CS_TYPE_WAIT) ||
+			(hl_cs_cmpl->type == CS_TYPE_COLLECTIVE_WAIT) ||
+			(!!hl_cs_cmpl->encaps_signals)) {
+		dev_dbg(hdev->dev,
+				"CS 0x%llx type %d finished, sob_id: %d, sob_val: %u\n",
+				hl_cs_cmpl->cs_seq,
+				hl_cs_cmpl->type,
+				hl_cs_cmpl->hw_sob->sob_id,
+				hl_cs_cmpl->sob_val);
+
+		hw_sob_put(hl_cs_cmpl->hw_sob);
+
+		if (hl_cs_cmpl->type == CS_TYPE_COLLECTIVE_WAIT)
+			hdev->asic_funcs->reset_sob_group(hdev,
+					hl_cs_cmpl->sob_group);
+	}
+
+	spin_unlock(&hl_cs_cmpl->lock);
+}
+
 static void cs_do_release(struct kref *ref)
 {
 	struct hl_cs *cs = container_of(ref, struct hl_cs, refcount);
 	struct hl_device *hdev = cs->ctx->hdev;
 	struct hl_cs_job *job, *tmp;
+	struct hl_cs_compl *hl_cs_cmpl =
+			container_of(cs->fence, struct hl_cs_compl, base_fence);
 
 	cs->completed = true;
 
@@ -493,8 +596,9 @@ static void cs_do_release(struct kref *ref)
 		complete_job(hdev, job);
 
 	if (!cs->submitted) {
-		/* In case the wait for signal CS was submitted, the put occurs
-		 * in init_signal_wait_cs() or collective_wait_init_cs()
+		/*
+		 * In case the wait for signal CS was submitted, the fence put
+		 * occurs in init_signal_wait_cs() or collective_wait_init_cs()
 		 * right before hanging on the PQ.
 		 */
 		if (cs->type == CS_TYPE_WAIT ||
@@ -535,8 +639,20 @@ static void cs_do_release(struct kref *ref)
 			list_del(&cs->staged_cs_node);
 			spin_unlock(&hdev->cs_mirror_lock);
 		}
+
+		/* decrement refcount to handle when first staged cs
+		 * with encaps signals is completed.
+		 */
+		if (hl_cs_cmpl->encaps_signals)
+			kref_put(&hl_cs_cmpl->encaps_sig_hdl->refcount,
+						hl_encaps_handle_do_release);
 	}
 
+	if ((cs->type == CS_TYPE_WAIT || cs->type == CS_TYPE_COLLECTIVE_WAIT)
+			&& cs->encaps_signals)
+		kref_put(&cs->encaps_sig_hdl->refcount,
+					hl_encaps_handle_do_release);
+
 out:
 	/* Must be called before hl_ctx_put because inside we use ctx to get
 	 * the device
@@ -566,6 +682,10 @@ out:
 	if (cs->timestamp)
 		cs->fence->timestamp = ktime_get();
 	complete_all(&cs->fence->completion);
+	complete_multi_cs(hdev, cs);
+
+	cs_release_sob_reset_handler(hdev, cs, hl_cs_cmpl);
+
 	hl_fence_put(cs->fence);
 
 	kfree(cs->jobs_in_queue_cnt);
@@ -621,6 +741,10 @@ static void cs_timedout(struct work_struct *work)
 		break;
 	}
 
+	rc = hl_state_dump(hdev);
+	if (rc)
+		dev_err(hdev->dev, "Error during system state dump %d\n", rc);
+
 	cs_put(cs);
 
 	if (likely(!skip_reset_on_timeout)) {
@@ -661,6 +785,7 @@ static int allocate_cs(struct hl_device *hdev, struct hl_ctx *ctx,
 	cs->completed = false;
 	cs->type = cs_type;
 	cs->timestamp = !!(flags & HL_CS_FLAGS_TIMESTAMP);
+	cs->encaps_signals = !!(flags & HL_CS_FLAGS_ENCAP_SIGNALS);
 	cs->timeout_jiffies = timeout;
 	cs->skip_reset_on_timeout =
 		hdev->skip_reset_on_timeout ||
@@ -671,9 +796,9 @@ static int allocate_cs(struct hl_device *hdev, struct hl_ctx *ctx,
 	kref_init(&cs->refcount);
 	spin_lock_init(&cs->job_lock);
 
-	cs_cmpl = kmalloc(sizeof(*cs_cmpl), GFP_ATOMIC);
+	cs_cmpl = kzalloc(sizeof(*cs_cmpl), GFP_ATOMIC);
 	if (!cs_cmpl)
-		cs_cmpl = kmalloc(sizeof(*cs_cmpl), GFP_KERNEL);
+		cs_cmpl = kzalloc(sizeof(*cs_cmpl), GFP_KERNEL);
 
 	if (!cs_cmpl) {
 		atomic64_inc(&ctx->cs_counters.out_of_mem_drop_cnt);
@@ -698,7 +823,6 @@ static int allocate_cs(struct hl_device *hdev, struct hl_ctx *ctx,
 	cs_cmpl->hdev = hdev;
 	cs_cmpl->type = cs->type;
 	spin_lock_init(&cs_cmpl->lock);
-	INIT_WORK(&cs_cmpl->sob_reset_work, sob_reset_work);
 	cs->fence = &cs_cmpl->base_fence;
 
 	spin_lock(&ctx->cs_lock);
@@ -791,31 +915,22 @@ void hl_cs_rollback_all(struct hl_device *hdev)
 		cs_rollback(hdev, cs);
 		cs_put(cs);
 	}
-}
-
-void hl_pending_cb_list_flush(struct hl_ctx *ctx)
-{
-	struct hl_pending_cb *pending_cb, *tmp;
 
-	list_for_each_entry_safe(pending_cb, tmp,
-			&ctx->pending_cb_list, cb_node) {
-		list_del(&pending_cb->cb_node);
-		hl_cb_put(pending_cb->cb);
-		kfree(pending_cb);
-	}
+	force_complete_multi_cs(hdev);
 }
 
 static void
 wake_pending_user_interrupt_threads(struct hl_user_interrupt *interrupt)
 {
 	struct hl_user_pending_interrupt *pend;
+	unsigned long flags;
 
-	spin_lock(&interrupt->wait_list_lock);
+	spin_lock_irqsave(&interrupt->wait_list_lock, flags);
 	list_for_each_entry(pend, &interrupt->wait_list_head, wait_list_node) {
 		pend->fence.error = -EIO;
 		complete_all(&pend->fence.completion);
 	}
-	spin_unlock(&interrupt->wait_list_lock);
+	spin_unlock_irqrestore(&interrupt->wait_list_lock, flags);
 }
 
 void hl_release_pending_user_interrupts(struct hl_device *hdev)
@@ -981,6 +1096,10 @@ static enum hl_cs_type hl_cs_get_cs_type(u32 cs_type_flags)
 		return CS_TYPE_WAIT;
 	else if (cs_type_flags & HL_CS_FLAGS_COLLECTIVE_WAIT)
 		return CS_TYPE_COLLECTIVE_WAIT;
+	else if (cs_type_flags & HL_CS_FLAGS_RESERVE_SIGNALS_ONLY)
+		return CS_RESERVE_SIGNALS;
+	else if (cs_type_flags & HL_CS_FLAGS_UNRESERVE_SIGNALS_ONLY)
+		return CS_UNRESERVE_SIGNALS;
 	else
 		return CS_TYPE_DEFAULT;
 }
@@ -1081,7 +1200,8 @@ static int hl_cs_copy_chunk_array(struct hl_device *hdev,
 }
 
 static int cs_staged_submission(struct hl_device *hdev, struct hl_cs *cs,
-				u64 sequence, u32 flags)
+				u64 sequence, u32 flags,
+				u32 encaps_signal_handle)
 {
 	if (!(flags & HL_CS_FLAGS_STAGED_SUBMISSION))
 		return 0;
@@ -1093,6 +1213,9 @@ static int cs_staged_submission(struct hl_device *hdev, struct hl_cs *cs,
 		/* Staged CS sequence is the first CS sequence */
 		INIT_LIST_HEAD(&cs->staged_cs_node);
 		cs->staged_sequence = cs->sequence;
+
+		if (cs->encaps_signals)
+			cs->encaps_sig_hdl_id = encaps_signal_handle;
 	} else {
 		/* User sequence will be validated in 'hl_hw_queue_schedule_cs'
 		 * under the cs_mirror_lock
@@ -1108,9 +1231,20 @@ static int cs_staged_submission(struct hl_device *hdev, struct hl_cs *cs,
 	return 0;
 }
 
+static u32 get_stream_master_qid_mask(struct hl_device *hdev, u32 qid)
+{
+	int i;
+
+	for (i = 0; i < hdev->stream_master_qid_arr_size; i++)
+		if (qid == hdev->stream_master_qid_arr[i])
+			return BIT(i);
+
+	return 0;
+}
+
 static int cs_ioctl_default(struct hl_fpriv *hpriv, void __user *chunks,
 				u32 num_chunks, u64 *cs_seq, u32 flags,
-				u32 timeout)
+				u32 encaps_signals_handle, u32 timeout)
 {
 	bool staged_mid, int_queues_only = true;
 	struct hl_device *hdev = hpriv->hdev;
@@ -1121,6 +1255,7 @@ static int cs_ioctl_default(struct hl_fpriv *hpriv, void __user *chunks,
 	struct hl_cs *cs;
 	struct hl_cb *cb;
 	u64 user_sequence;
+	u8 stream_master_qid_map = 0;
 	int rc, i;
 
 	cntr = &hdev->aggregated_cs_counters;
@@ -1148,7 +1283,8 @@ static int cs_ioctl_default(struct hl_fpriv *hpriv, void __user *chunks,
 
 	hl_debugfs_add_cs(cs);
 
-	rc = cs_staged_submission(hdev, cs, user_sequence, flags);
+	rc = cs_staged_submission(hdev, cs, user_sequence, flags,
+						encaps_signals_handle);
 	if (rc)
 		goto free_cs_object;
 
@@ -1179,9 +1315,20 @@ static int cs_ioctl_default(struct hl_fpriv *hpriv, void __user *chunks,
 			cb = (struct hl_cb *) (uintptr_t) chunk->cb_handle;
 		}
 
-		if (queue_type == QUEUE_TYPE_EXT || queue_type == QUEUE_TYPE_HW)
+		if (queue_type == QUEUE_TYPE_EXT ||
+						queue_type == QUEUE_TYPE_HW) {
 			int_queues_only = false;
 
+			/*
+			 * store which stream are being used for external/HW
+			 * queues of this CS
+			 */
+			if (hdev->supports_wait_for_multi_cs)
+				stream_master_qid_map |=
+					get_stream_master_qid_mask(hdev,
+							chunk->queue_index);
+		}
+
 		job = hl_cs_allocate_job(hdev, queue_type,
 						is_kernel_allocated_cb);
 		if (!job) {
@@ -1242,6 +1389,13 @@ static int cs_ioctl_default(struct hl_fpriv *hpriv, void __user *chunks,
 		goto free_cs_object;
 	}
 
+	/*
+	 * store the (external/HW queues) streams used by the CS in the
+	 * fence object for multi-CS completion
+	 */
+	if (hdev->supports_wait_for_multi_cs)
+		cs->fence->stream_master_qid_map = stream_master_qid_map;
+
 	rc = hl_hw_queue_schedule_cs(cs);
 	if (rc) {
 		if (rc != -EAGAIN)
@@ -1270,130 +1424,6 @@ out:
 	return rc;
 }
 
-static int pending_cb_create_job(struct hl_device *hdev, struct hl_ctx *ctx,
-		struct hl_cs *cs, struct hl_cb *cb, u32 size, u32 hw_queue_id)
-{
-	struct hw_queue_properties *hw_queue_prop;
-	struct hl_cs_counters_atomic *cntr;
-	struct hl_cs_job *job;
-
-	hw_queue_prop = &hdev->asic_prop.hw_queues_props[hw_queue_id];
-	cntr = &hdev->aggregated_cs_counters;
-
-	job = hl_cs_allocate_job(hdev, hw_queue_prop->type, true);
-	if (!job) {
-		atomic64_inc(&ctx->cs_counters.out_of_mem_drop_cnt);
-		atomic64_inc(&cntr->out_of_mem_drop_cnt);
-		dev_err(hdev->dev, "Failed to allocate a new job\n");
-		return -ENOMEM;
-	}
-
-	job->id = 0;
-	job->cs = cs;
-	job->user_cb = cb;
-	atomic_inc(&job->user_cb->cs_cnt);
-	job->user_cb_size = size;
-	job->hw_queue_id = hw_queue_id;
-	job->patched_cb = job->user_cb;
-	job->job_cb_size = job->user_cb_size;
-
-	/* increment refcount as for external queues we get completion */
-	cs_get(cs);
-
-	cs->jobs_in_queue_cnt[job->hw_queue_id]++;
-
-	list_add_tail(&job->cs_node, &cs->job_list);
-
-	hl_debugfs_add_job(hdev, job);
-
-	return 0;
-}
-
-static int hl_submit_pending_cb(struct hl_fpriv *hpriv)
-{
-	struct hl_device *hdev = hpriv->hdev;
-	struct hl_ctx *ctx = hpriv->ctx;
-	struct hl_pending_cb *pending_cb, *tmp;
-	struct list_head local_cb_list;
-	struct hl_cs *cs;
-	struct hl_cb *cb;
-	u32 hw_queue_id;
-	u32 cb_size;
-	int process_list, rc = 0;
-
-	if (list_empty(&ctx->pending_cb_list))
-		return 0;
-
-	process_list = atomic_cmpxchg(&ctx->thread_pending_cb_token, 1, 0);
-
-	/* Only a single thread is allowed to process the list */
-	if (!process_list)
-		return 0;
-
-	if (list_empty(&ctx->pending_cb_list))
-		goto free_pending_cb_token;
-
-	/* move all list elements to a local list */
-	INIT_LIST_HEAD(&local_cb_list);
-	spin_lock(&ctx->pending_cb_lock);
-	list_for_each_entry_safe(pending_cb, tmp, &ctx->pending_cb_list,
-								cb_node)
-		list_move_tail(&pending_cb->cb_node, &local_cb_list);
-	spin_unlock(&ctx->pending_cb_lock);
-
-	rc = allocate_cs(hdev, ctx, CS_TYPE_DEFAULT, ULLONG_MAX, &cs, 0,
-				hdev->timeout_jiffies);
-	if (rc)
-		goto add_list_elements;
-
-	hl_debugfs_add_cs(cs);
-
-	/* Iterate through pending cb list, create jobs and add to CS */
-	list_for_each_entry(pending_cb, &local_cb_list, cb_node) {
-		cb = pending_cb->cb;
-		cb_size = pending_cb->cb_size;
-		hw_queue_id = pending_cb->hw_queue_id;
-
-		rc = pending_cb_create_job(hdev, ctx, cs, cb, cb_size,
-								hw_queue_id);
-		if (rc)
-			goto free_cs_object;
-	}
-
-	rc = hl_hw_queue_schedule_cs(cs);
-	if (rc) {
-		if (rc != -EAGAIN)
-			dev_err(hdev->dev,
-				"Failed to submit CS %d.%llu (%d)\n",
-				ctx->asid, cs->sequence, rc);
-		goto free_cs_object;
-	}
-
-	/* pending cb was scheduled successfully */
-	list_for_each_entry_safe(pending_cb, tmp, &local_cb_list, cb_node) {
-		list_del(&pending_cb->cb_node);
-		kfree(pending_cb);
-	}
-
-	cs_put(cs);
-
-	goto free_pending_cb_token;
-
-free_cs_object:
-	cs_rollback(hdev, cs);
-	cs_put(cs);
-add_list_elements:
-	spin_lock(&ctx->pending_cb_lock);
-	list_for_each_entry_safe_reverse(pending_cb, tmp, &local_cb_list,
-								cb_node)
-		list_move(&pending_cb->cb_node, &ctx->pending_cb_list);
-	spin_unlock(&ctx->pending_cb_lock);
-free_pending_cb_token:
-	atomic_set(&ctx->thread_pending_cb_token, 1);
-
-	return rc;
-}
-
 static int hl_cs_ctx_switch(struct hl_fpriv *hpriv, union hl_cs_args *args,
 				u64 *cs_seq)
 {
@@ -1443,7 +1473,7 @@ static int hl_cs_ctx_switch(struct hl_fpriv *hpriv, union hl_cs_args *args,
 			rc = 0;
 		} else {
 			rc = cs_ioctl_default(hpriv, chunks, num_chunks,
-					cs_seq, 0, hdev->timeout_jiffies);
+					cs_seq, 0, 0, hdev->timeout_jiffies);
 		}
 
 		mutex_unlock(&hpriv->restore_phase_mutex);
@@ -1501,10 +1531,17 @@ out:
  * hl_cs_signal_sob_wraparound_handler: handle SOB value wrapaound case.
  * if the SOB value reaches the max value move to the other SOB reserved
  * to the queue.
+ * @hdev: pointer to device structure
+ * @q_idx: stream queue index
+ * @hw_sob: the H/W SOB used in this signal CS.
+ * @count: signals count
+ * @encaps_sig: tells whether it's reservation for encaps signals or not.
+ *
  * Note that this function must be called while hw_queues_lock is taken.
  */
 int hl_cs_signal_sob_wraparound_handler(struct hl_device *hdev, u32 q_idx,
-			struct hl_hw_sob **hw_sob, u32 count)
+			struct hl_hw_sob **hw_sob, u32 count, bool encaps_sig)
+
 {
 	struct hl_sync_stream_properties *prop;
 	struct hl_hw_sob *sob = *hw_sob, *other_sob;
@@ -1512,7 +1549,7 @@ int hl_cs_signal_sob_wraparound_handler(struct hl_device *hdev, u32 q_idx,
 
 	prop = &hdev->kernel_queues[q_idx].sync_stream_prop;
 
-	kref_get(&sob->kref);
+	hw_sob_get(sob);
 
 	/* check for wraparound */
 	if (prop->next_sob_val + count >= HL_MAX_SOB_VAL) {
@@ -1522,7 +1559,7 @@ int hl_cs_signal_sob_wraparound_handler(struct hl_device *hdev, u32 q_idx,
 		 * just incremented the refcount right before calling this
 		 * function.
 		 */
-		kref_put(&sob->kref, hl_sob_reset_error);
+		hw_sob_put_err(sob);
 
 		/*
 		 * check the other sob value, if it still in use then fail
@@ -1537,12 +1574,42 @@ int hl_cs_signal_sob_wraparound_handler(struct hl_device *hdev, u32 q_idx,
 			return -EINVAL;
 		}
 
-		prop->next_sob_val = 1;
+		/*
+		 * next_sob_val always points to the next available signal
+		 * in the sob, so in encaps signals it will be the next one
+		 * after reserving the required amount.
+		 */
+		if (encaps_sig)
+			prop->next_sob_val = count + 1;
+		else
+			prop->next_sob_val = count;
 
 		/* only two SOBs are currently in use */
 		prop->curr_sob_offset = other_sob_offset;
 		*hw_sob = other_sob;
 
+		/*
+		 * check if other_sob needs reset, then do it before using it
+		 * for the reservation or the next signal cs.
+		 * we do it here, and for both encaps and regular signal cs
+		 * cases in order to avoid possible races of two kref_put
+		 * of the sob which can occur at the same time if we move the
+		 * sob reset(kref_put) to cs_do_release function.
+		 * in addition, if we have combination of cs signal and
+		 * encaps, and at the point we need to reset the sob there was
+		 * no more reservations and only signal cs keep coming,
+		 * in such case we need signal_cs to put the refcount and
+		 * reset the sob.
+		 */
+		if (other_sob->need_reset)
+			hw_sob_put(other_sob);
+
+		if (encaps_sig) {
+			/* set reset indication for the sob */
+			sob->need_reset = true;
+			hw_sob_get(other_sob);
+		}
+
 		dev_dbg(hdev->dev, "switched to SOB %d, q_idx: %d\n",
 				prop->curr_sob_offset, q_idx);
 	} else {
@@ -1553,12 +1620,18 @@ int hl_cs_signal_sob_wraparound_handler(struct hl_device *hdev, u32 q_idx,
 }
 
 static int cs_ioctl_extract_signal_seq(struct hl_device *hdev,
-		struct hl_cs_chunk *chunk, u64 *signal_seq, struct hl_ctx *ctx)
+		struct hl_cs_chunk *chunk, u64 *signal_seq, struct hl_ctx *ctx,
+		bool encaps_signals)
 {
 	u64 *signal_seq_arr = NULL;
 	u32 size_to_copy, signal_seq_arr_len;
 	int rc = 0;
 
+	if (encaps_signals) {
+		*signal_seq = chunk->encaps_signal_seq;
+		return 0;
+	}
+
 	signal_seq_arr_len = chunk->num_signal_seq_arr;
 
 	/* currently only one signal seq is supported */
@@ -1583,7 +1656,7 @@ static int cs_ioctl_extract_signal_seq(struct hl_device *hdev,
 		return -ENOMEM;
 	}
 
-	size_to_copy = chunk->num_signal_seq_arr * sizeof(*signal_seq_arr);
+	size_to_copy = signal_seq_arr_len * sizeof(*signal_seq_arr);
 	if (copy_from_user(signal_seq_arr,
 				u64_to_user_ptr(chunk->signal_seq_arr),
 				size_to_copy)) {
@@ -1605,8 +1678,8 @@ out:
 }
 
 static int cs_ioctl_signal_wait_create_jobs(struct hl_device *hdev,
-		struct hl_ctx *ctx, struct hl_cs *cs, enum hl_queue_type q_type,
-		u32 q_idx)
+		struct hl_ctx *ctx, struct hl_cs *cs,
+		enum hl_queue_type q_type, u32 q_idx, u32 encaps_signal_offset)
 {
 	struct hl_cs_counters_atomic *cntr;
 	struct hl_cs_job *job;
@@ -1644,6 +1717,9 @@ static int cs_ioctl_signal_wait_create_jobs(struct hl_device *hdev,
 	job->user_cb_size = cb_size;
 	job->hw_queue_id = q_idx;
 
+	if ((cs->type == CS_TYPE_WAIT || cs->type == CS_TYPE_COLLECTIVE_WAIT)
+			&& cs->encaps_signals)
+		job->encaps_sig_wait_offset = encaps_signal_offset;
 	/*
 	 * No need in parsing, user CB is the patched CB.
 	 * We call hl_cb_destroy() out of two reasons - we don't need the CB in
@@ -1666,11 +1742,196 @@ static int cs_ioctl_signal_wait_create_jobs(struct hl_device *hdev,
 	return 0;
 }
 
+static int cs_ioctl_reserve_signals(struct hl_fpriv *hpriv,
+				u32 q_idx, u32 count,
+				u32 *handle_id, u32 *sob_addr,
+				u32 *signals_count)
+{
+	struct hw_queue_properties *hw_queue_prop;
+	struct hl_sync_stream_properties *prop;
+	struct hl_device *hdev = hpriv->hdev;
+	struct hl_cs_encaps_sig_handle *handle;
+	struct hl_encaps_signals_mgr *mgr;
+	struct hl_hw_sob *hw_sob;
+	int hdl_id;
+	int rc = 0;
+
+	if (count >= HL_MAX_SOB_VAL) {
+		dev_err(hdev->dev, "signals count(%u) exceeds the max SOB value\n",
+						count);
+		rc = -EINVAL;
+		goto out;
+	}
+
+	if (q_idx >= hdev->asic_prop.max_queues) {
+		dev_err(hdev->dev, "Queue index %d is invalid\n",
+			q_idx);
+		rc = -EINVAL;
+		goto out;
+	}
+
+	hw_queue_prop = &hdev->asic_prop.hw_queues_props[q_idx];
+
+	if (!hw_queue_prop->supports_sync_stream) {
+		dev_err(hdev->dev,
+			"Queue index %d does not support sync stream operations\n",
+									q_idx);
+		rc = -EINVAL;
+		goto out;
+	}
+
+	prop = &hdev->kernel_queues[q_idx].sync_stream_prop;
+
+	handle = kzalloc(sizeof(*handle), GFP_KERNEL);
+	if (!handle) {
+		rc = -ENOMEM;
+		goto out;
+	}
+
+	handle->count = count;
+	mgr = &hpriv->ctx->sig_mgr;
+
+	spin_lock(&mgr->lock);
+	hdl_id = idr_alloc(&mgr->handles, handle, 1, 0, GFP_ATOMIC);
+	spin_unlock(&mgr->lock);
+
+	if (hdl_id < 0) {
+		dev_err(hdev->dev, "Failed to allocate IDR for a new signal reservation\n");
+		rc = -EINVAL;
+		goto out;
+	}
+
+	handle->id = hdl_id;
+	handle->q_idx = q_idx;
+	handle->hdev = hdev;
+	kref_init(&handle->refcount);
+
+	hdev->asic_funcs->hw_queues_lock(hdev);
+
+	hw_sob = &prop->hw_sob[prop->curr_sob_offset];
+
+	/*
+	 * Increment the SOB value by count by user request
+	 * to reserve those signals
+	 * check if the signals amount to reserve is not exceeding the max sob
+	 * value, if yes then switch sob.
+	 */
+	rc = hl_cs_signal_sob_wraparound_handler(hdev, q_idx, &hw_sob, count,
+								true);
+	if (rc) {
+		dev_err(hdev->dev, "Failed to switch SOB\n");
+		hdev->asic_funcs->hw_queues_unlock(hdev);
+		rc = -EINVAL;
+		goto remove_idr;
+	}
+	/* set the hw_sob to the handle after calling the sob wraparound handler
+	 * since sob could have changed.
+	 */
+	handle->hw_sob = hw_sob;
+
+	/* store the current sob value for unreserve validity check, and
+	 * signal offset support
+	 */
+	handle->pre_sob_val = prop->next_sob_val - handle->count;
+
+	*signals_count = prop->next_sob_val;
+	hdev->asic_funcs->hw_queues_unlock(hdev);
+
+	*sob_addr = handle->hw_sob->sob_addr;
+	*handle_id = hdl_id;
+
+	dev_dbg(hdev->dev,
+		"Signals reserved, sob_id: %d, sob addr: 0x%x, last sob_val: %u, q_idx: %d, hdl_id: %d\n",
+			hw_sob->sob_id, handle->hw_sob->sob_addr,
+			prop->next_sob_val - 1, q_idx, hdl_id);
+	goto out;
+
+remove_idr:
+	spin_lock(&mgr->lock);
+	idr_remove(&mgr->handles, hdl_id);
+	spin_unlock(&mgr->lock);
+
+	kfree(handle);
+out:
+	return rc;
+}
+
+static int cs_ioctl_unreserve_signals(struct hl_fpriv *hpriv, u32 handle_id)
+{
+	struct hl_cs_encaps_sig_handle *encaps_sig_hdl;
+	struct hl_sync_stream_properties *prop;
+	struct hl_device *hdev = hpriv->hdev;
+	struct hl_encaps_signals_mgr *mgr;
+	struct hl_hw_sob *hw_sob;
+	u32 q_idx, sob_addr;
+	int rc = 0;
+
+	mgr = &hpriv->ctx->sig_mgr;
+
+	spin_lock(&mgr->lock);
+	encaps_sig_hdl = idr_find(&mgr->handles, handle_id);
+	if (encaps_sig_hdl) {
+		dev_dbg(hdev->dev, "unreserve signals, handle: %u, SOB:0x%x, count: %u\n",
+				handle_id, encaps_sig_hdl->hw_sob->sob_addr,
+					encaps_sig_hdl->count);
+
+		hdev->asic_funcs->hw_queues_lock(hdev);
+
+		q_idx = encaps_sig_hdl->q_idx;
+		prop = &hdev->kernel_queues[q_idx].sync_stream_prop;
+		hw_sob = &prop->hw_sob[prop->curr_sob_offset];
+		sob_addr = hdev->asic_funcs->get_sob_addr(hdev, hw_sob->sob_id);
+
+		/* Check if sob_val got out of sync due to other
+		 * signal submission requests which were handled
+		 * between the reserve-unreserve calls or SOB switch
+		 * upon reaching SOB max value.
+		 */
+		if (encaps_sig_hdl->pre_sob_val + encaps_sig_hdl->count
+				!= prop->next_sob_val ||
+				sob_addr != encaps_sig_hdl->hw_sob->sob_addr) {
+			dev_err(hdev->dev, "Cannot unreserve signals, SOB val ran out of sync, expected: %u, actual val: %u\n",
+				encaps_sig_hdl->pre_sob_val,
+				(prop->next_sob_val - encaps_sig_hdl->count));
+
+			hdev->asic_funcs->hw_queues_unlock(hdev);
+			rc = -EINVAL;
+			goto out;
+		}
+
+		/*
+		 * Decrement the SOB value by count by user request
+		 * to unreserve those signals
+		 */
+		prop->next_sob_val -= encaps_sig_hdl->count;
+
+		hdev->asic_funcs->hw_queues_unlock(hdev);
+
+		hw_sob_put(hw_sob);
+
+		/* Release the id and free allocated memory of the handle */
+		idr_remove(&mgr->handles, handle_id);
+		kfree(encaps_sig_hdl);
+	} else {
+		rc = -EINVAL;
+		dev_err(hdev->dev, "failed to unreserve signals, cannot find handler\n");
+	}
+out:
+	spin_unlock(&mgr->lock);
+
+	return rc;
+}
+
 static int cs_ioctl_signal_wait(struct hl_fpriv *hpriv, enum hl_cs_type cs_type,
 				void __user *chunks, u32 num_chunks,
 				u64 *cs_seq, u32 flags, u32 timeout)
 {
+	struct hl_cs_encaps_sig_handle *encaps_sig_hdl = NULL;
+	bool handle_found = false, is_wait_cs = false,
+			wait_cs_submitted = false,
+			cs_encaps_signals = false;
 	struct hl_cs_chunk *cs_chunk_array, *chunk;
+	bool staged_cs_with_encaps_signals = false;
 	struct hw_queue_properties *hw_queue_prop;
 	struct hl_device *hdev = hpriv->hdev;
 	struct hl_cs_compl *sig_waitcs_cmpl;
@@ -1730,11 +1991,58 @@ static int cs_ioctl_signal_wait(struct hl_fpriv *hpriv, enum hl_cs_type cs_type,
 		collective_engine_id = chunk->collective_engine_id;
 	}
 
-	if (cs_type == CS_TYPE_WAIT || cs_type == CS_TYPE_COLLECTIVE_WAIT) {
-		rc = cs_ioctl_extract_signal_seq(hdev, chunk, &signal_seq, ctx);
+	is_wait_cs = !!(cs_type == CS_TYPE_WAIT ||
+			cs_type == CS_TYPE_COLLECTIVE_WAIT);
+
+	cs_encaps_signals = !!(flags & HL_CS_FLAGS_ENCAP_SIGNALS);
+
+	if (is_wait_cs) {
+		rc = cs_ioctl_extract_signal_seq(hdev, chunk, &signal_seq,
+				ctx, cs_encaps_signals);
 		if (rc)
 			goto free_cs_chunk_array;
 
+		if (cs_encaps_signals) {
+			/* check if cs sequence has encapsulated
+			 * signals handle
+			 */
+			struct idr *idp;
+			u32 id;
+
+			spin_lock(&ctx->sig_mgr.lock);
+			idp = &ctx->sig_mgr.handles;
+			idr_for_each_entry(idp, encaps_sig_hdl, id) {
+				if (encaps_sig_hdl->cs_seq == signal_seq) {
+					handle_found = true;
+					/* get refcount to protect removing
+					 * this handle from idr, needed when
+					 * multiple wait cs are used with offset
+					 * to wait on reserved encaps signals.
+					 */
+					kref_get(&encaps_sig_hdl->refcount);
+					break;
+				}
+			}
+			spin_unlock(&ctx->sig_mgr.lock);
+
+			if (!handle_found) {
+				dev_err(hdev->dev, "Cannot find encapsulated signals handle for seq 0x%llx\n",
+						signal_seq);
+				rc = -EINVAL;
+				goto free_cs_chunk_array;
+			}
+
+			/* validate also the signal offset value */
+			if (chunk->encaps_signal_offset >
+					encaps_sig_hdl->count) {
+				dev_err(hdev->dev, "offset(%u) value exceed max reserved signals count(%u)!\n",
+						chunk->encaps_signal_offset,
+						encaps_sig_hdl->count);
+				rc = -EINVAL;
+				goto free_cs_chunk_array;
+			}
+		}
+
 		sig_fence = hl_ctx_get_fence(ctx, signal_seq);
 		if (IS_ERR(sig_fence)) {
 			atomic64_inc(&ctx->cs_counters.validation_drop_cnt);
@@ -1755,11 +2063,16 @@ static int cs_ioctl_signal_wait(struct hl_fpriv *hpriv, enum hl_cs_type cs_type,
 		sig_waitcs_cmpl =
 			container_of(sig_fence, struct hl_cs_compl, base_fence);
 
-		if (sig_waitcs_cmpl->type != CS_TYPE_SIGNAL) {
+		staged_cs_with_encaps_signals = !!
+				(sig_waitcs_cmpl->type == CS_TYPE_DEFAULT &&
+				(flags & HL_CS_FLAGS_ENCAP_SIGNALS));
+
+		if (sig_waitcs_cmpl->type != CS_TYPE_SIGNAL &&
+				!staged_cs_with_encaps_signals) {
 			atomic64_inc(&ctx->cs_counters.validation_drop_cnt);
 			atomic64_inc(&cntr->validation_drop_cnt);
 			dev_err(hdev->dev,
-				"CS seq 0x%llx is not of a signal CS\n",
+				"CS seq 0x%llx is not of a signal/encaps-signal CS\n",
 				signal_seq);
 			hl_fence_put(sig_fence);
 			rc = -EINVAL;
@@ -1776,18 +2089,27 @@ static int cs_ioctl_signal_wait(struct hl_fpriv *hpriv, enum hl_cs_type cs_type,
 
 	rc = allocate_cs(hdev, ctx, cs_type, ULLONG_MAX, &cs, flags, timeout);
 	if (rc) {
-		if (cs_type == CS_TYPE_WAIT ||
-			cs_type == CS_TYPE_COLLECTIVE_WAIT)
+		if (is_wait_cs)
 			hl_fence_put(sig_fence);
+
 		goto free_cs_chunk_array;
 	}
 
 	/*
 	 * Save the signal CS fence for later initialization right before
 	 * hanging the wait CS on the queue.
+	 * for encaps signals case, we save the cs sequence and handle pointer
+	 * for later initialization.
 	 */
-	if (cs_type == CS_TYPE_WAIT || cs_type == CS_TYPE_COLLECTIVE_WAIT)
+	if (is_wait_cs) {
 		cs->signal_fence = sig_fence;
+		/* store the handle pointer, so we don't have to
+		 * look for it again, later on the flow
+		 * when we need to set SOB info in hw_queue.
+		 */
+		if (cs->encaps_signals)
+			cs->encaps_sig_hdl = encaps_sig_hdl;
+	}
 
 	hl_debugfs_add_cs(cs);
 
@@ -1795,10 +2117,11 @@ static int cs_ioctl_signal_wait(struct hl_fpriv *hpriv, enum hl_cs_type cs_type,
 
 	if (cs_type == CS_TYPE_WAIT || cs_type == CS_TYPE_SIGNAL)
 		rc = cs_ioctl_signal_wait_create_jobs(hdev, ctx, cs, q_type,
-				q_idx);
+				q_idx, chunk->encaps_signal_offset);
 	else if (cs_type == CS_TYPE_COLLECTIVE_WAIT)
 		rc = hdev->asic_funcs->collective_wait_create_jobs(hdev, ctx,
-				cs, q_idx, collective_engine_id);
+				cs, q_idx, collective_engine_id,
+				chunk->encaps_signal_offset);
 	else {
 		atomic64_inc(&ctx->cs_counters.validation_drop_cnt);
 		atomic64_inc(&cntr->validation_drop_cnt);
@@ -1810,7 +2133,13 @@ static int cs_ioctl_signal_wait(struct hl_fpriv *hpriv, enum hl_cs_type cs_type,
 
 	rc = hl_hw_queue_schedule_cs(cs);
 	if (rc) {
-		if (rc != -EAGAIN)
+		/* In case wait cs failed here, it means the signal cs
+		 * already completed. we want to free all it's related objects
+		 * but we don't want to fail the ioctl.
+		 */
+		if (is_wait_cs)
+			rc = 0;
+		else if (rc != -EAGAIN)
 			dev_err(hdev->dev,
 				"Failed to submit CS %d.%llu to H/W queues, error %d\n",
 				ctx->asid, cs->sequence, rc);
@@ -1818,6 +2147,8 @@ static int cs_ioctl_signal_wait(struct hl_fpriv *hpriv, enum hl_cs_type cs_type,
 	}
 
 	rc = HL_CS_STATUS_SUCCESS;
+	if (is_wait_cs)
+		wait_cs_submitted = true;
 	goto put_cs;
 
 free_cs_object:
@@ -1828,6 +2159,10 @@ put_cs:
 	/* We finished with the CS in this function, so put the ref */
 	cs_put(cs);
 free_cs_chunk_array:
+	if (!wait_cs_submitted && cs_encaps_signals && handle_found &&
+							is_wait_cs)
+		kref_put(&encaps_sig_hdl->refcount,
+				hl_encaps_handle_do_release);
 	kfree(cs_chunk_array);
 out:
 	return rc;
@@ -1836,10 +2171,11 @@ out:
 int hl_cs_ioctl(struct hl_fpriv *hpriv, void *data)
 {
 	union hl_cs_args *args = data;
-	enum hl_cs_type cs_type;
+	enum hl_cs_type cs_type = 0;
 	u64 cs_seq = ULONG_MAX;
 	void __user *chunks;
-	u32 num_chunks, flags, timeout;
+	u32 num_chunks, flags, timeout,
+		signals_count = 0, sob_addr = 0, handle_id = 0;
 	int rc;
 
 	rc = hl_cs_sanity_checks(hpriv, args);
@@ -1850,10 +2186,6 @@ int hl_cs_ioctl(struct hl_fpriv *hpriv, void *data)
 	if (rc)
 		goto out;
 
-	rc = hl_submit_pending_cb(hpriv);
-	if (rc)
-		goto out;
-
 	cs_type = hl_cs_get_cs_type(args->in.cs_flags &
 					~HL_CS_FLAGS_FORCE_RESTORE);
 	chunks = (void __user *) (uintptr_t) args->in.chunks_execute;
@@ -1876,80 +2208,448 @@ int hl_cs_ioctl(struct hl_fpriv *hpriv, void *data)
 		rc = cs_ioctl_signal_wait(hpriv, cs_type, chunks, num_chunks,
 					&cs_seq, args->in.cs_flags, timeout);
 		break;
+	case CS_RESERVE_SIGNALS:
+		rc = cs_ioctl_reserve_signals(hpriv,
+					args->in.encaps_signals_q_idx,
+					args->in.encaps_signals_count,
+					&handle_id, &sob_addr, &signals_count);
+		break;
+	case CS_UNRESERVE_SIGNALS:
+		rc = cs_ioctl_unreserve_signals(hpriv,
+					args->in.encaps_sig_handle_id);
+		break;
 	default:
 		rc = cs_ioctl_default(hpriv, chunks, num_chunks, &cs_seq,
-						args->in.cs_flags, timeout);
+						args->in.cs_flags,
+						args->in.encaps_sig_handle_id,
+						timeout);
 		break;
 	}
-
 out:
 	if (rc != -EAGAIN) {
 		memset(args, 0, sizeof(*args));
+
+		if (cs_type == CS_RESERVE_SIGNALS) {
+			args->out.handle_id = handle_id;
+			args->out.sob_base_addr_offset = sob_addr;
+			args->out.count = signals_count;
+		} else {
+			args->out.seq = cs_seq;
+		}
 		args->out.status = rc;
-		args->out.seq = cs_seq;
 	}
 
 	return rc;
 }
 
+static int hl_wait_for_fence(struct hl_ctx *ctx, u64 seq, struct hl_fence *fence,
+				enum hl_cs_wait_status *status, u64 timeout_us,
+				s64 *timestamp)
+{
+	struct hl_device *hdev = ctx->hdev;
+	long completion_rc;
+	int rc = 0;
+
+	if (IS_ERR(fence)) {
+		rc = PTR_ERR(fence);
+		if (rc == -EINVAL)
+			dev_notice_ratelimited(hdev->dev,
+				"Can't wait on CS %llu because current CS is at seq %llu\n",
+				seq, ctx->cs_sequence);
+		return rc;
+	}
+
+	if (!fence) {
+		dev_dbg(hdev->dev,
+			"Can't wait on seq %llu because current CS is at seq %llu (Fence is gone)\n",
+				seq, ctx->cs_sequence);
+
+		*status = CS_WAIT_STATUS_GONE;
+		return 0;
+	}
+
+	if (!timeout_us) {
+		completion_rc = completion_done(&fence->completion);
+	} else {
+		unsigned long timeout;
+
+		timeout = (timeout_us == MAX_SCHEDULE_TIMEOUT) ?
+				timeout_us : usecs_to_jiffies(timeout_us);
+		completion_rc =
+			wait_for_completion_interruptible_timeout(
+				&fence->completion, timeout);
+	}
+
+	if (completion_rc > 0) {
+		*status = CS_WAIT_STATUS_COMPLETED;
+		if (timestamp)
+			*timestamp = ktime_to_ns(fence->timestamp);
+	} else {
+		*status = CS_WAIT_STATUS_BUSY;
+	}
+
+	if (fence->error == -ETIMEDOUT)
+		rc = -ETIMEDOUT;
+	else if (fence->error == -EIO)
+		rc = -EIO;
+
+	return rc;
+}
+
+/*
+ * hl_cs_poll_fences - iterate CS fences to check for CS completion
+ *
+ * @mcs_data: multi-CS internal data
+ *
+ * @return 0 on success, otherwise non 0 error code
+ *
+ * The function iterates on all CS sequence in the list and set bit in
+ * completion_bitmap for each completed CS.
+ * while iterating, the function can extracts the stream map to be later
+ * used by the waiting function.
+ * this function shall be called after taking context ref
+ */
+static int hl_cs_poll_fences(struct multi_cs_data *mcs_data)
+{
+	struct hl_fence **fence_ptr = mcs_data->fence_arr;
+	struct hl_device *hdev = mcs_data->ctx->hdev;
+	int i, rc, arr_len = mcs_data->arr_len;
+	u64 *seq_arr = mcs_data->seq_arr;
+	ktime_t max_ktime, first_cs_time;
+	enum hl_cs_wait_status status;
+
+	memset(fence_ptr, 0, arr_len * sizeof(*fence_ptr));
+
+	/* get all fences under the same lock */
+	rc = hl_ctx_get_fences(mcs_data->ctx, seq_arr, fence_ptr, arr_len);
+	if (rc)
+		return rc;
+
+	/*
+	 * set to maximum time to verify timestamp is valid: if at the end
+	 * this value is maintained- no timestamp was updated
+	 */
+	max_ktime = ktime_set(KTIME_SEC_MAX, 0);
+	first_cs_time = max_ktime;
+
+	for (i = 0; i < arr_len; i++, fence_ptr++) {
+		struct hl_fence *fence = *fence_ptr;
+
+		/*
+		 * function won't sleep as it is called with timeout 0 (i.e.
+		 * poll the fence)
+		 */
+		rc = hl_wait_for_fence(mcs_data->ctx, seq_arr[i], fence,
+						&status, 0, NULL);
+		if (rc) {
+			dev_err(hdev->dev,
+				"wait_for_fence error :%d for CS seq %llu\n",
+								rc, seq_arr[i]);
+			break;
+		}
+
+		mcs_data->stream_master_qid_map |= fence->stream_master_qid_map;
+
+		if (status == CS_WAIT_STATUS_BUSY)
+			continue;
+
+		mcs_data->completion_bitmap |= BIT(i);
+
+		/*
+		 * best effort to extract timestamp. few notes:
+		 * - if even single fence is gone we cannot extract timestamp
+		 *   (as fence not exist anymore)
+		 * - for all completed CSs we take the earliest timestamp.
+		 *   for this we have to validate that:
+		 *       1. given timestamp was indeed set
+		 *       2. the timestamp is earliest of all timestamps so far
+		 */
+
+		if (status == CS_WAIT_STATUS_GONE) {
+			mcs_data->update_ts = false;
+			mcs_data->gone_cs = true;
+		} else if (mcs_data->update_ts &&
+			(ktime_compare(fence->timestamp,
+						ktime_set(0, 0)) > 0) &&
+			(ktime_compare(fence->timestamp, first_cs_time) < 0)) {
+			first_cs_time = fence->timestamp;
+		}
+	}
+
+	hl_fences_put(mcs_data->fence_arr, arr_len);
+
+	if (mcs_data->update_ts &&
+			(ktime_compare(first_cs_time, max_ktime) != 0))
+		mcs_data->timestamp = ktime_to_ns(first_cs_time);
+
+	return rc;
+}
+
 static int _hl_cs_wait_ioctl(struct hl_device *hdev, struct hl_ctx *ctx,
 				u64 timeout_us, u64 seq,
 				enum hl_cs_wait_status *status, s64 *timestamp)
 {
 	struct hl_fence *fence;
-	unsigned long timeout;
 	int rc = 0;
-	long completion_rc;
 
 	if (timestamp)
 		*timestamp = 0;
 
-	if (timeout_us == MAX_SCHEDULE_TIMEOUT)
-		timeout = timeout_us;
-	else
-		timeout = usecs_to_jiffies(timeout_us);
-
 	hl_ctx_get(hdev, ctx);
 
 	fence = hl_ctx_get_fence(ctx, seq);
-	if (IS_ERR(fence)) {
-		rc = PTR_ERR(fence);
-		if (rc == -EINVAL)
-			dev_notice_ratelimited(hdev->dev,
-				"Can't wait on CS %llu because current CS is at seq %llu\n",
-				seq, ctx->cs_sequence);
-	} else if (fence) {
-		if (!timeout_us)
-			completion_rc = completion_done(&fence->completion);
-		else
-			completion_rc =
-				wait_for_completion_interruptible_timeout(
-					&fence->completion, timeout);
 
-		if (completion_rc > 0) {
-			*status = CS_WAIT_STATUS_COMPLETED;
-			if (timestamp)
-				*timestamp = ktime_to_ns(fence->timestamp);
-		} else {
-			*status = CS_WAIT_STATUS_BUSY;
+	rc = hl_wait_for_fence(ctx, seq, fence, status, timeout_us, timestamp);
+	hl_fence_put(fence);
+	hl_ctx_put(ctx);
+
+	return rc;
+}
+
+/*
+ * hl_wait_multi_cs_completion_init - init completion structure
+ *
+ * @hdev: pointer to habanalabs device structure
+ * @stream_master_bitmap: stream master QIDs map, set bit indicates stream
+ *                        master QID to wait on
+ *
+ * @return valid completion struct pointer on success, otherwise error pointer
+ *
+ * up to MULTI_CS_MAX_USER_CTX calls can be done concurrently to the driver.
+ * the function gets the first available completion (by marking it "used")
+ * and initialize its values.
+ */
+static struct multi_cs_completion *hl_wait_multi_cs_completion_init(
+							struct hl_device *hdev,
+							u8 stream_master_bitmap)
+{
+	struct multi_cs_completion *mcs_compl;
+	int i;
+
+	/* find free multi_cs completion structure */
+	for (i = 0; i < MULTI_CS_MAX_USER_CTX; i++) {
+		mcs_compl = &hdev->multi_cs_completion[i];
+		spin_lock(&mcs_compl->lock);
+		if (!mcs_compl->used) {
+			mcs_compl->used = 1;
+			mcs_compl->timestamp = 0;
+			mcs_compl->stream_master_qid_map = stream_master_bitmap;
+			reinit_completion(&mcs_compl->completion);
+			spin_unlock(&mcs_compl->lock);
+			break;
 		}
+		spin_unlock(&mcs_compl->lock);
+	}
 
-		if (fence->error == -ETIMEDOUT)
-			rc = -ETIMEDOUT;
-		else if (fence->error == -EIO)
-			rc = -EIO;
+	if (i == MULTI_CS_MAX_USER_CTX) {
+		dev_err(hdev->dev,
+				"no available multi-CS completion structure\n");
+		return ERR_PTR(-ENOMEM);
+	}
+	return mcs_compl;
+}
 
-		hl_fence_put(fence);
-	} else {
-		dev_dbg(hdev->dev,
-			"Can't wait on seq %llu because current CS is at seq %llu (Fence is gone)\n",
-			seq, ctx->cs_sequence);
-		*status = CS_WAIT_STATUS_GONE;
+/*
+ * hl_wait_multi_cs_completion_fini - return completion structure and set as
+ *                                    unused
+ *
+ * @mcs_compl: pointer to the completion structure
+ */
+static void hl_wait_multi_cs_completion_fini(
+					struct multi_cs_completion *mcs_compl)
+{
+	/*
+	 * free completion structure, do it under lock to be in-sync with the
+	 * thread that signals completion
+	 */
+	spin_lock(&mcs_compl->lock);
+	mcs_compl->used = 0;
+	spin_unlock(&mcs_compl->lock);
+}
+
+/*
+ * hl_wait_multi_cs_completion - wait for first CS to complete
+ *
+ * @mcs_data: multi-CS internal data
+ *
+ * @return 0 on success, otherwise non 0 error code
+ */
+static int hl_wait_multi_cs_completion(struct multi_cs_data *mcs_data)
+{
+	struct hl_device *hdev = mcs_data->ctx->hdev;
+	struct multi_cs_completion *mcs_compl;
+	long completion_rc;
+
+	mcs_compl = hl_wait_multi_cs_completion_init(hdev,
+					mcs_data->stream_master_qid_map);
+	if (IS_ERR(mcs_compl))
+		return PTR_ERR(mcs_compl);
+
+	completion_rc = wait_for_completion_interruptible_timeout(
+					&mcs_compl->completion,
+					usecs_to_jiffies(mcs_data->timeout_us));
+
+	/* update timestamp */
+	if (completion_rc > 0)
+		mcs_data->timestamp = mcs_compl->timestamp;
+
+	hl_wait_multi_cs_completion_fini(mcs_compl);
+
+	mcs_data->wait_status = completion_rc;
+
+	return 0;
+}
+
+/*
+ * hl_multi_cs_completion_init - init array of multi-CS completion structures
+ *
+ * @hdev: pointer to habanalabs device structure
+ */
+void hl_multi_cs_completion_init(struct hl_device *hdev)
+{
+	struct multi_cs_completion *mcs_cmpl;
+	int i;
+
+	for (i = 0; i < MULTI_CS_MAX_USER_CTX; i++) {
+		mcs_cmpl = &hdev->multi_cs_completion[i];
+		mcs_cmpl->used = 0;
+		spin_lock_init(&mcs_cmpl->lock);
+		init_completion(&mcs_cmpl->completion);
+	}
+}
+
+/*
+ * hl_multi_cs_wait_ioctl - implementation of the multi-CS wait ioctl
+ *
+ * @hpriv: pointer to the private data of the fd
+ * @data: pointer to multi-CS wait ioctl in/out args
+ *
+ */
+static int hl_multi_cs_wait_ioctl(struct hl_fpriv *hpriv, void *data)
+{
+	struct hl_device *hdev = hpriv->hdev;
+	struct multi_cs_data mcs_data = {0};
+	union hl_wait_cs_args *args = data;
+	struct hl_ctx *ctx = hpriv->ctx;
+	struct hl_fence **fence_arr;
+	void __user *seq_arr;
+	u32 size_to_copy;
+	u64 *cs_seq_arr;
+	u8 seq_arr_len;
+	int rc;
+
+	if (!hdev->supports_wait_for_multi_cs) {
+		dev_err(hdev->dev, "Wait for multi CS is not supported\n");
+		return -EPERM;
+	}
+
+	seq_arr_len = args->in.seq_arr_len;
+
+	if (seq_arr_len > HL_WAIT_MULTI_CS_LIST_MAX_LEN) {
+		dev_err(hdev->dev, "Can wait only up to %d CSs, input sequence is of length %u\n",
+				HL_WAIT_MULTI_CS_LIST_MAX_LEN, seq_arr_len);
+		return -EINVAL;
+	}
+
+	/* allocate memory for sequence array */
+	cs_seq_arr =
+		kmalloc_array(seq_arr_len, sizeof(*cs_seq_arr), GFP_KERNEL);
+	if (!cs_seq_arr)
+		return -ENOMEM;
+
+	/* copy CS sequence array from user */
+	seq_arr = (void __user *) (uintptr_t) args->in.seq;
+	size_to_copy = seq_arr_len * sizeof(*cs_seq_arr);
+	if (copy_from_user(cs_seq_arr, seq_arr, size_to_copy)) {
+		dev_err(hdev->dev, "Failed to copy multi-cs sequence array from user\n");
+		rc = -EFAULT;
+		goto free_seq_arr;
+	}
+
+	/* allocate array for the fences */
+	fence_arr = kmalloc_array(seq_arr_len, sizeof(*fence_arr), GFP_KERNEL);
+	if (!fence_arr) {
+		rc = -ENOMEM;
+		goto free_seq_arr;
+	}
+
+	/* initialize the multi-CS internal data */
+	mcs_data.ctx = ctx;
+	mcs_data.seq_arr = cs_seq_arr;
+	mcs_data.fence_arr = fence_arr;
+	mcs_data.arr_len = seq_arr_len;
+
+	hl_ctx_get(hdev, ctx);
+
+	/* poll all CS fences, extract timestamp */
+	mcs_data.update_ts = true;
+	rc = hl_cs_poll_fences(&mcs_data);
+	/*
+	 * skip wait for CS completion when one of the below is true:
+	 * - an error on the poll function
+	 * - one or more CS in the list completed
+	 * - the user called ioctl with timeout 0
+	 */
+	if (rc || mcs_data.completion_bitmap || !args->in.timeout_us)
+		goto put_ctx;
+
+	/* wait (with timeout) for the first CS to be completed */
+	mcs_data.timeout_us = args->in.timeout_us;
+	rc = hl_wait_multi_cs_completion(&mcs_data);
+	if (rc)
+		goto put_ctx;
+
+	if (mcs_data.wait_status > 0) {
+		/*
+		 * poll fences once again to update the CS map.
+		 * no timestamp should be updated this time.
+		 */
+		mcs_data.update_ts = false;
+		rc = hl_cs_poll_fences(&mcs_data);
+
+		/*
+		 * if hl_wait_multi_cs_completion returned before timeout (i.e.
+		 * it got a completion) we expect to see at least one CS
+		 * completed after the poll function.
+		 */
+		if (!mcs_data.completion_bitmap) {
+			dev_err(hdev->dev, "Multi-CS got completion on wait but no CS completed\n");
+			rc = -EFAULT;
+		}
 	}
 
+put_ctx:
 	hl_ctx_put(ctx);
+	kfree(fence_arr);
 
-	return rc;
+free_seq_arr:
+	kfree(cs_seq_arr);
+
+	/* update output args */
+	memset(args, 0, sizeof(*args));
+	if (rc)
+		return rc;
+
+	if (mcs_data.completion_bitmap) {
+		args->out.status = HL_WAIT_CS_STATUS_COMPLETED;
+		args->out.cs_completion_map = mcs_data.completion_bitmap;
+
+		/* if timestamp not 0- it's valid */
+		if (mcs_data.timestamp) {
+			args->out.timestamp_nsec = mcs_data.timestamp;
+			args->out.flags |= HL_WAIT_CS_STATUS_FLAG_TIMESTAMP_VLD;
+		}
+
+		/* update if some CS was gone */
+		if (mcs_data.timestamp)
+			args->out.flags |= HL_WAIT_CS_STATUS_FLAG_GONE;
+	} else if (mcs_data.wait_status == -ERESTARTSYS) {
+		args->out.status = HL_WAIT_CS_STATUS_INTERRUPTED;
+	} else {
+		args->out.status = HL_WAIT_CS_STATUS_BUSY;
+	}
+
+	return 0;
 }
 
 static int hl_cs_wait_ioctl(struct hl_fpriv *hpriv, void *data)
@@ -2015,9 +2715,9 @@ static int _hl_interrupt_wait_ioctl(struct hl_device *hdev, struct hl_ctx *ctx,
 {
 	struct hl_user_pending_interrupt *pend;
 	struct hl_user_interrupt *interrupt;
-	unsigned long timeout;
-	long completion_rc;
+	unsigned long timeout, flags;
 	u32 completion_value;
+	long completion_rc;
 	int rc = 0;
 
 	if (timeout_us == U32_MAX)
@@ -2040,17 +2740,10 @@ static int _hl_interrupt_wait_ioctl(struct hl_device *hdev, struct hl_ctx *ctx,
 	else
 		interrupt = &hdev->user_interrupt[interrupt_offset];
 
-	spin_lock(&interrupt->wait_list_lock);
-	if (!hl_device_operational(hdev, NULL)) {
-		rc = -EPERM;
-		goto unlock_and_free_fence;
-	}
-
 	if (copy_from_user(&completion_value, u64_to_user_ptr(user_address), 4)) {
-		dev_err(hdev->dev,
-			"Failed to copy completion value from user\n");
+		dev_err(hdev->dev, "Failed to copy completion value from user\n");
 		rc = -EFAULT;
-		goto unlock_and_free_fence;
+		goto free_fence;
 	}
 
 	if (completion_value >= target_value)
@@ -2059,48 +2752,57 @@ static int _hl_interrupt_wait_ioctl(struct hl_device *hdev, struct hl_ctx *ctx,
 		*status = CS_WAIT_STATUS_BUSY;
 
 	if (!timeout_us || (*status == CS_WAIT_STATUS_COMPLETED))
-		goto unlock_and_free_fence;
+		goto free_fence;
 
 	/* Add pending user interrupt to relevant list for the interrupt
 	 * handler to monitor
 	 */
+	spin_lock_irqsave(&interrupt->wait_list_lock, flags);
 	list_add_tail(&pend->wait_list_node, &interrupt->wait_list_head);
-	spin_unlock(&interrupt->wait_list_lock);
+	spin_unlock_irqrestore(&interrupt->wait_list_lock, flags);
 
 wait_again:
 	/* Wait for interrupt handler to signal completion */
-	completion_rc =
-		wait_for_completion_interruptible_timeout(
-				&pend->fence.completion, timeout);
+	completion_rc = wait_for_completion_interruptible_timeout(&pend->fence.completion,
+										timeout);
 
 	/* If timeout did not expire we need to perform the comparison.
 	 * If comparison fails, keep waiting until timeout expires
 	 */
 	if (completion_rc > 0) {
-		if (copy_from_user(&completion_value,
-				u64_to_user_ptr(user_address), 4)) {
-			dev_err(hdev->dev,
-				"Failed to copy completion value from user\n");
+		if (copy_from_user(&completion_value, u64_to_user_ptr(user_address), 4)) {
+			dev_err(hdev->dev, "Failed to copy completion value from user\n");
 			rc = -EFAULT;
+
 			goto remove_pending_user_interrupt;
 		}
 
 		if (completion_value >= target_value) {
 			*status = CS_WAIT_STATUS_COMPLETED;
 		} else {
+			spin_lock_irqsave(&interrupt->wait_list_lock, flags);
+			reinit_completion(&pend->fence.completion);
 			timeout = completion_rc;
+
+			spin_unlock_irqrestore(&interrupt->wait_list_lock, flags);
 			goto wait_again;
 		}
+	} else if (completion_rc == -ERESTARTSYS) {
+		dev_err_ratelimited(hdev->dev,
+			"user process got signal while waiting for interrupt ID %d\n",
+			interrupt->interrupt_id);
+		*status = HL_WAIT_CS_STATUS_INTERRUPTED;
+		rc = -EINTR;
 	} else {
 		*status = CS_WAIT_STATUS_BUSY;
 	}
 
 remove_pending_user_interrupt:
-	spin_lock(&interrupt->wait_list_lock);
+	spin_lock_irqsave(&interrupt->wait_list_lock, flags);
 	list_del(&pend->wait_list_node);
+	spin_unlock_irqrestore(&interrupt->wait_list_lock, flags);
 
-unlock_and_free_fence:
-	spin_unlock(&interrupt->wait_list_lock);
+free_fence:
 	kfree(pend);
 	hl_ctx_put(ctx);
 
@@ -2148,8 +2850,9 @@ static int hl_interrupt_wait_ioctl(struct hl_fpriv *hpriv, void *data)
 	memset(args, 0, sizeof(*args));
 
 	if (rc) {
-		dev_err_ratelimited(hdev->dev,
-			"interrupt_wait_ioctl failed (%d)\n", rc);
+		if (rc != -EINTR)
+			dev_err_ratelimited(hdev->dev,
+				"interrupt_wait_ioctl failed (%d)\n", rc);
 
 		return rc;
 	}
@@ -2173,8 +2876,16 @@ int hl_wait_ioctl(struct hl_fpriv *hpriv, void *data)
 	u32 flags = args->in.flags;
 	int rc;
 
+	/* If the device is not operational, no point in waiting for any command submission or
+	 * user interrupt
+	 */
+	if (!hl_device_operational(hpriv->hdev, NULL))
+		return -EPERM;
+
 	if (flags & HL_WAIT_CS_FLAGS_INTERRUPT)
 		rc = hl_interrupt_wait_ioctl(hpriv, data);
+	else if (flags & HL_WAIT_CS_FLAGS_MULTI_CS)
+		rc = hl_multi_cs_wait_ioctl(hpriv, data);
 	else
 		rc = hl_cs_wait_ioctl(hpriv, data);
 
diff --git a/drivers/misc/habanalabs/common/context.c b/drivers/misc/habanalabs/common/context.c
index 19b6b045219e..22978303ad63 100644
--- a/drivers/misc/habanalabs/common/context.c
+++ b/drivers/misc/habanalabs/common/context.c
@@ -9,16 +9,70 @@
 
 #include <linux/slab.h>
 
+void hl_encaps_handle_do_release(struct kref *ref)
+{
+	struct hl_cs_encaps_sig_handle *handle =
+		container_of(ref, struct hl_cs_encaps_sig_handle, refcount);
+	struct hl_ctx *ctx = handle->hdev->compute_ctx;
+	struct hl_encaps_signals_mgr *mgr = &ctx->sig_mgr;
+
+	spin_lock(&mgr->lock);
+	idr_remove(&mgr->handles, handle->id);
+	spin_unlock(&mgr->lock);
+
+	kfree(handle);
+}
+
+static void hl_encaps_handle_do_release_sob(struct kref *ref)
+{
+	struct hl_cs_encaps_sig_handle *handle =
+		container_of(ref, struct hl_cs_encaps_sig_handle, refcount);
+	struct hl_ctx *ctx = handle->hdev->compute_ctx;
+	struct hl_encaps_signals_mgr *mgr = &ctx->sig_mgr;
+
+	/* if we're here, then there was a signals reservation but cs with
+	 * encaps signals wasn't submitted, so need to put refcount
+	 * to hw_sob taken at the reservation.
+	 */
+	hw_sob_put(handle->hw_sob);
+
+	spin_lock(&mgr->lock);
+	idr_remove(&mgr->handles, handle->id);
+	spin_unlock(&mgr->lock);
+
+	kfree(handle);
+}
+
+static void hl_encaps_sig_mgr_init(struct hl_encaps_signals_mgr *mgr)
+{
+	spin_lock_init(&mgr->lock);
+	idr_init(&mgr->handles);
+}
+
+static void hl_encaps_sig_mgr_fini(struct hl_device *hdev,
+			struct hl_encaps_signals_mgr *mgr)
+{
+	struct hl_cs_encaps_sig_handle *handle;
+	struct idr *idp;
+	u32 id;
+
+	idp = &mgr->handles;
+
+	if (!idr_is_empty(idp)) {
+		dev_warn(hdev->dev, "device released while some encaps signals handles are still allocated\n");
+		idr_for_each_entry(idp, handle, id)
+			kref_put(&handle->refcount,
+					hl_encaps_handle_do_release_sob);
+	}
+
+	idr_destroy(&mgr->handles);
+}
+
 static void hl_ctx_fini(struct hl_ctx *ctx)
 {
 	struct hl_device *hdev = ctx->hdev;
 	int i;
 
-	/* Release all allocated pending cb's, those cb's were never
-	 * scheduled so it is safe to release them here
-	 */
-	hl_pending_cb_list_flush(ctx);
-
 	/* Release all allocated HW block mapped list entries and destroy
 	 * the mutex.
 	 */
@@ -53,6 +107,7 @@ static void hl_ctx_fini(struct hl_ctx *ctx)
 		hl_cb_va_pool_fini(ctx);
 		hl_vm_ctx_fini(ctx);
 		hl_asid_free(hdev, ctx->asid);
+		hl_encaps_sig_mgr_fini(hdev, &ctx->sig_mgr);
 
 		/* Scrub both SRAM and DRAM */
 		hdev->asic_funcs->scrub_device_mem(hdev, 0, 0);
@@ -130,9 +185,6 @@ void hl_ctx_free(struct hl_device *hdev, struct hl_ctx *ctx)
 {
 	if (kref_put(&ctx->refcount, hl_ctx_do_release) == 1)
 		return;
-
-	dev_warn(hdev->dev,
-		"user process released device but its command submissions are still executing\n");
 }
 
 int hl_ctx_init(struct hl_device *hdev, struct hl_ctx *ctx, bool is_kernel_ctx)
@@ -144,11 +196,8 @@ int hl_ctx_init(struct hl_device *hdev, struct hl_ctx *ctx, bool is_kernel_ctx)
 	kref_init(&ctx->refcount);
 
 	ctx->cs_sequence = 1;
-	INIT_LIST_HEAD(&ctx->pending_cb_list);
-	spin_lock_init(&ctx->pending_cb_lock);
 	spin_lock_init(&ctx->cs_lock);
 	atomic_set(&ctx->thread_ctx_switch_token, 1);
-	atomic_set(&ctx->thread_pending_cb_token, 1);
 	ctx->thread_ctx_switch_wait_token = 0;
 	ctx->cs_pending = kcalloc(hdev->asic_prop.max_pending_cs,
 				sizeof(struct hl_fence *),
@@ -200,6 +249,8 @@ int hl_ctx_init(struct hl_device *hdev, struct hl_ctx *ctx, bool is_kernel_ctx)
 			goto err_cb_va_pool_fini;
 		}
 
+		hl_encaps_sig_mgr_init(&ctx->sig_mgr);
+
 		dev_dbg(hdev->dev, "create user context %d\n", ctx->asid);
 	}
 
@@ -229,25 +280,40 @@ int hl_ctx_put(struct hl_ctx *ctx)
 	return kref_put(&ctx->refcount, hl_ctx_do_release);
 }
 
-struct hl_fence *hl_ctx_get_fence(struct hl_ctx *ctx, u64 seq)
+/*
+ * hl_ctx_get_fence_locked - get CS fence under CS lock
+ *
+ * @ctx: pointer to the context structure.
+ * @seq: CS sequences number
+ *
+ * @return valid fence pointer on success, NULL if fence is gone, otherwise
+ *         error pointer.
+ *
+ * NOTE: this function shall be called with cs_lock locked
+ */
+static struct hl_fence *hl_ctx_get_fence_locked(struct hl_ctx *ctx, u64 seq)
 {
 	struct asic_fixed_properties *asic_prop = &ctx->hdev->asic_prop;
 	struct hl_fence *fence;
 
-	spin_lock(&ctx->cs_lock);
-
-	if (seq >= ctx->cs_sequence) {
-		spin_unlock(&ctx->cs_lock);
+	if (seq >= ctx->cs_sequence)
 		return ERR_PTR(-EINVAL);
-	}
 
-	if (seq + asic_prop->max_pending_cs < ctx->cs_sequence) {
-		spin_unlock(&ctx->cs_lock);
+	if (seq + asic_prop->max_pending_cs < ctx->cs_sequence)
 		return NULL;
-	}
 
 	fence = ctx->cs_pending[seq & (asic_prop->max_pending_cs - 1)];
 	hl_fence_get(fence);
+	return fence;
+}
+
+struct hl_fence *hl_ctx_get_fence(struct hl_ctx *ctx, u64 seq)
+{
+	struct hl_fence *fence;
+
+	spin_lock(&ctx->cs_lock);
+
+	fence = hl_ctx_get_fence_locked(ctx, seq);
 
 	spin_unlock(&ctx->cs_lock);
 
@@ -255,6 +321,46 @@ struct hl_fence *hl_ctx_get_fence(struct hl_ctx *ctx, u64 seq)
 }
 
 /*
+ * hl_ctx_get_fences - get multiple CS fences under the same CS lock
+ *
+ * @ctx: pointer to the context structure.
+ * @seq_arr: array of CS sequences to wait for
+ * @fence: fence array to store the CS fences
+ * @arr_len: length of seq_arr and fence_arr
+ *
+ * @return 0 on success, otherwise non 0 error code
+ */
+int hl_ctx_get_fences(struct hl_ctx *ctx, u64 *seq_arr,
+				struct hl_fence **fence, u32 arr_len)
+{
+	struct hl_fence **fence_arr_base = fence;
+	int i, rc = 0;
+
+	spin_lock(&ctx->cs_lock);
+
+	for (i = 0; i < arr_len; i++, fence++) {
+		u64 seq = seq_arr[i];
+
+		*fence = hl_ctx_get_fence_locked(ctx, seq);
+
+		if (IS_ERR(*fence)) {
+			dev_err(ctx->hdev->dev,
+				"Failed to get fence for CS with seq 0x%llx\n",
+					seq);
+			rc = PTR_ERR(*fence);
+			break;
+		}
+	}
+
+	spin_unlock(&ctx->cs_lock);
+
+	if (rc)
+		hl_fences_put(fence_arr_base, i);
+
+	return rc;
+}
+
+/*
  * hl_ctx_mgr_init - initialize the context manager
  *
  * @mgr: pointer to context manager structure
diff --git a/drivers/misc/habanalabs/common/debugfs.c b/drivers/misc/habanalabs/common/debugfs.c
index 703d79fb6f3f..985f1f3dbd20 100644
--- a/drivers/misc/habanalabs/common/debugfs.c
+++ b/drivers/misc/habanalabs/common/debugfs.c
@@ -209,12 +209,12 @@ static int userptr_show(struct seq_file *s, void *data)
 		if (first) {
 			first = false;
 			seq_puts(s, "\n");
-			seq_puts(s, " user virtual address     size             dma dir\n");
+			seq_puts(s, " pid      user virtual address     size             dma dir\n");
 			seq_puts(s, "----------------------------------------------------------\n");
 		}
-		seq_printf(s,
-			"    0x%-14llx      %-10u    %-30s\n",
-			userptr->addr, userptr->size, dma_dir[userptr->dir]);
+		seq_printf(s, " %-7d  0x%-14llx      %-10llu    %-30s\n",
+				userptr->pid, userptr->addr, userptr->size,
+				dma_dir[userptr->dir]);
 	}
 
 	spin_unlock(&dev_entry->userptr_spinlock);
@@ -235,7 +235,7 @@ static int vm_show(struct seq_file *s, void *data)
 	struct hl_vm_hash_node *hnode;
 	struct hl_userptr *userptr;
 	struct hl_vm_phys_pg_pack *phys_pg_pack = NULL;
-	enum vm_type_t *vm_type;
+	enum vm_type *vm_type;
 	bool once = true;
 	u64 j;
 	int i;
@@ -261,7 +261,7 @@ static int vm_show(struct seq_file *s, void *data)
 			if (*vm_type == VM_TYPE_USERPTR) {
 				userptr = hnode->ptr;
 				seq_printf(s,
-					"    0x%-14llx      %-10u\n",
+					"    0x%-14llx      %-10llu\n",
 					hnode->vaddr, userptr->size);
 			} else {
 				phys_pg_pack = hnode->ptr;
@@ -320,6 +320,77 @@ static int vm_show(struct seq_file *s, void *data)
 	return 0;
 }
 
+static int userptr_lookup_show(struct seq_file *s, void *data)
+{
+	struct hl_debugfs_entry *entry = s->private;
+	struct hl_dbg_device_entry *dev_entry = entry->dev_entry;
+	struct scatterlist *sg;
+	struct hl_userptr *userptr;
+	bool first = true;
+	u64 total_npages, npages, sg_start, sg_end;
+	dma_addr_t dma_addr;
+	int i;
+
+	spin_lock(&dev_entry->userptr_spinlock);
+
+	list_for_each_entry(userptr, &dev_entry->userptr_list, debugfs_list) {
+		if (dev_entry->userptr_lookup >= userptr->addr &&
+		dev_entry->userptr_lookup < userptr->addr + userptr->size) {
+			total_npages = 0;
+			for_each_sg(userptr->sgt->sgl, sg, userptr->sgt->nents,
+					i) {
+				npages = hl_get_sg_info(sg, &dma_addr);
+				sg_start = userptr->addr +
+					total_npages * PAGE_SIZE;
+				sg_end = userptr->addr +
+					(total_npages + npages) * PAGE_SIZE;
+
+				if (dev_entry->userptr_lookup >= sg_start &&
+				    dev_entry->userptr_lookup < sg_end) {
+					dma_addr += (dev_entry->userptr_lookup -
+							sg_start);
+					if (first) {
+						first = false;
+						seq_puts(s, "\n");
+						seq_puts(s, " user virtual address         dma address       pid        region start     region size\n");
+						seq_puts(s, "---------------------------------------------------------------------------------------\n");
+					}
+					seq_printf(s, " 0x%-18llx  0x%-16llx  %-8u  0x%-16llx %-12llu\n",
+						dev_entry->userptr_lookup,
+						(u64)dma_addr, userptr->pid,
+						userptr->addr, userptr->size);
+				}
+				total_npages += npages;
+			}
+		}
+	}
+
+	spin_unlock(&dev_entry->userptr_spinlock);
+
+	if (!first)
+		seq_puts(s, "\n");
+
+	return 0;
+}
+
+static ssize_t userptr_lookup_write(struct file *file, const char __user *buf,
+		size_t count, loff_t *f_pos)
+{
+	struct seq_file *s = file->private_data;
+	struct hl_debugfs_entry *entry = s->private;
+	struct hl_dbg_device_entry *dev_entry = entry->dev_entry;
+	ssize_t rc;
+	u64 value;
+
+	rc = kstrtoull_from_user(buf, count, 16, &value);
+	if (rc)
+		return rc;
+
+	dev_entry->userptr_lookup = value;
+
+	return count;
+}
+
 static int mmu_show(struct seq_file *s, void *data)
 {
 	struct hl_debugfs_entry *entry = s->private;
@@ -349,7 +420,7 @@ static int mmu_show(struct seq_file *s, void *data)
 		return 0;
 	}
 
-	phys_addr = hops_info.hop_info[hops_info.used_hops - 1].hop_pte_val;
+	hl_mmu_va_to_pa(ctx, virt_addr, &phys_addr);
 
 	if (hops_info.scrambled_vaddr &&
 		(dev_entry->mmu_addr != hops_info.scrambled_vaddr))
@@ -491,11 +562,10 @@ static int device_va_to_pa(struct hl_device *hdev, u64 virt_addr, u32 size,
 	struct hl_vm_phys_pg_pack *phys_pg_pack;
 	struct hl_ctx *ctx = hdev->compute_ctx;
 	struct hl_vm_hash_node *hnode;
+	u64 end_address, range_size;
 	struct hl_userptr *userptr;
-	enum vm_type_t *vm_type;
+	enum vm_type *vm_type;
 	bool valid = false;
-	u64 end_address;
-	u32 range_size;
 	int i, rc = 0;
 
 	if (!ctx) {
@@ -1043,6 +1113,60 @@ static ssize_t hl_security_violations_read(struct file *f, char __user *buf,
 	return 0;
 }
 
+static ssize_t hl_state_dump_read(struct file *f, char __user *buf,
+					size_t count, loff_t *ppos)
+{
+	struct hl_dbg_device_entry *entry = file_inode(f)->i_private;
+	ssize_t rc;
+
+	down_read(&entry->state_dump_sem);
+	if (!entry->state_dump[entry->state_dump_head])
+		rc = 0;
+	else
+		rc = simple_read_from_buffer(
+			buf, count, ppos,
+			entry->state_dump[entry->state_dump_head],
+			strlen(entry->state_dump[entry->state_dump_head]));
+	up_read(&entry->state_dump_sem);
+
+	return rc;
+}
+
+static ssize_t hl_state_dump_write(struct file *f, const char __user *buf,
+					size_t count, loff_t *ppos)
+{
+	struct hl_dbg_device_entry *entry = file_inode(f)->i_private;
+	struct hl_device *hdev = entry->hdev;
+	ssize_t rc;
+	u32 size;
+	int i;
+
+	rc = kstrtouint_from_user(buf, count, 10, &size);
+	if (rc)
+		return rc;
+
+	if (size <= 0 || size >= ARRAY_SIZE(entry->state_dump)) {
+		dev_err(hdev->dev, "Invalid number of dumps to skip\n");
+		return -EINVAL;
+	}
+
+	if (entry->state_dump[entry->state_dump_head]) {
+		down_write(&entry->state_dump_sem);
+		for (i = 0; i < size; ++i) {
+			vfree(entry->state_dump[entry->state_dump_head]);
+			entry->state_dump[entry->state_dump_head] = NULL;
+			if (entry->state_dump_head > 0)
+				entry->state_dump_head--;
+			else
+				entry->state_dump_head =
+					ARRAY_SIZE(entry->state_dump) - 1;
+		}
+		up_write(&entry->state_dump_sem);
+	}
+
+	return count;
+}
+
 static const struct file_operations hl_data32b_fops = {
 	.owner = THIS_MODULE,
 	.read = hl_data_read32,
@@ -1110,12 +1234,19 @@ static const struct file_operations hl_security_violations_fops = {
 	.read = hl_security_violations_read
 };
 
+static const struct file_operations hl_state_dump_fops = {
+	.owner = THIS_MODULE,
+	.read = hl_state_dump_read,
+	.write = hl_state_dump_write
+};
+
 static const struct hl_info_list hl_debugfs_list[] = {
 	{"command_buffers", command_buffers_show, NULL},
 	{"command_submission", command_submission_show, NULL},
 	{"command_submission_jobs", command_submission_jobs_show, NULL},
 	{"userptr", userptr_show, NULL},
 	{"vm", vm_show, NULL},
+	{"userptr_lookup", userptr_lookup_show, userptr_lookup_write},
 	{"mmu", mmu_show, mmu_asid_va_write},
 	{"engines", engines_show, NULL}
 };
@@ -1172,6 +1303,7 @@ void hl_debugfs_add_device(struct hl_device *hdev)
 	INIT_LIST_HEAD(&dev_entry->userptr_list);
 	INIT_LIST_HEAD(&dev_entry->ctx_mem_hash_list);
 	mutex_init(&dev_entry->file_mutex);
+	init_rwsem(&dev_entry->state_dump_sem);
 	spin_lock_init(&dev_entry->cb_spinlock);
 	spin_lock_init(&dev_entry->cs_spinlock);
 	spin_lock_init(&dev_entry->cs_job_spinlock);
@@ -1283,6 +1415,12 @@ void hl_debugfs_add_device(struct hl_device *hdev)
 				dev_entry->root,
 				&hdev->skip_reset_on_timeout);
 
+	debugfs_create_file("state_dump",
+				0600,
+				dev_entry->root,
+				dev_entry,
+				&hl_state_dump_fops);
+
 	for (i = 0, entry = dev_entry->entry_arr ; i < count ; i++, entry++) {
 		debugfs_create_file(hl_debugfs_list[i].name,
 					0444,
@@ -1297,6 +1435,7 @@ void hl_debugfs_add_device(struct hl_device *hdev)
 void hl_debugfs_remove_device(struct hl_device *hdev)
 {
 	struct hl_dbg_device_entry *entry = &hdev->hl_debugfs;
+	int i;
 
 	debugfs_remove_recursive(entry->root);
 
@@ -1304,6 +1443,9 @@ void hl_debugfs_remove_device(struct hl_device *hdev)
 
 	vfree(entry->blob_desc.data);
 
+	for (i = 0; i < ARRAY_SIZE(entry->state_dump); ++i)
+		vfree(entry->state_dump[i]);
+
 	kfree(entry->entry_arr);
 }
 
@@ -1416,6 +1558,28 @@ void hl_debugfs_remove_ctx_mem_hash(struct hl_device *hdev, struct hl_ctx *ctx)
 	spin_unlock(&dev_entry->ctx_mem_hash_spinlock);
 }
 
+/**
+ * hl_debugfs_set_state_dump - register state dump making it accessible via
+ *                             debugfs
+ * @hdev: pointer to the device structure
+ * @data: the actual dump data
+ * @length: the length of the data
+ */
+void hl_debugfs_set_state_dump(struct hl_device *hdev, char *data,
+					unsigned long length)
+{
+	struct hl_dbg_device_entry *dev_entry = &hdev->hl_debugfs;
+
+	down_write(&dev_entry->state_dump_sem);
+
+	dev_entry->state_dump_head = (dev_entry->state_dump_head + 1) %
+					ARRAY_SIZE(dev_entry->state_dump);
+	vfree(dev_entry->state_dump[dev_entry->state_dump_head]);
+	dev_entry->state_dump[dev_entry->state_dump_head] = data;
+
+	up_write(&dev_entry->state_dump_sem);
+}
+
 void __init hl_debugfs_init(void)
 {
 	hl_debug_root = debugfs_create_dir("habanalabs", NULL);
diff --git a/drivers/misc/habanalabs/common/device.c b/drivers/misc/habanalabs/common/device.c
index ff4cbde289c0..97c7c86580e6 100644
--- a/drivers/misc/habanalabs/common/device.c
+++ b/drivers/misc/habanalabs/common/device.c
@@ -7,11 +7,11 @@
 
 #define pr_fmt(fmt)			"habanalabs: " fmt
 
+#include <uapi/misc/habanalabs.h>
 #include "habanalabs.h"
 
 #include <linux/pci.h>
 #include <linux/hwmon.h>
-#include <uapi/misc/habanalabs.h>
 
 enum hl_device_status hl_device_status(struct hl_device *hdev)
 {
@@ -23,6 +23,8 @@ enum hl_device_status hl_device_status(struct hl_device *hdev)
 		status = HL_DEVICE_STATUS_NEEDS_RESET;
 	else if (hdev->disabled)
 		status = HL_DEVICE_STATUS_MALFUNCTION;
+	else if (!hdev->init_done)
+		status = HL_DEVICE_STATUS_IN_DEVICE_CREATION;
 	else
 		status = HL_DEVICE_STATUS_OPERATIONAL;
 
@@ -44,6 +46,7 @@ bool hl_device_operational(struct hl_device *hdev,
 	case HL_DEVICE_STATUS_NEEDS_RESET:
 		return false;
 	case HL_DEVICE_STATUS_OPERATIONAL:
+	case HL_DEVICE_STATUS_IN_DEVICE_CREATION:
 	default:
 		return true;
 	}
@@ -129,8 +132,8 @@ static int hl_device_release(struct inode *inode, struct file *filp)
 	hl_ctx_mgr_fini(hdev, &hpriv->ctx_mgr);
 
 	if (!hl_hpriv_put(hpriv))
-		dev_warn(hdev->dev,
-			"Device is still in use because there are live CS and/or memory mappings\n");
+		dev_notice(hdev->dev,
+			"User process closed FD but device still in use\n");
 
 	hdev->last_open_session_duration_jif =
 		jiffies - hdev->last_successful_open_jif;
@@ -308,9 +311,15 @@ static void device_hard_reset_pending(struct work_struct *work)
 		container_of(work, struct hl_device_reset_work,
 				reset_work.work);
 	struct hl_device *hdev = device_reset_work->hdev;
+	u32 flags;
 	int rc;
 
-	rc = hl_device_reset(hdev, HL_RESET_HARD | HL_RESET_FROM_RESET_THREAD);
+	flags = HL_RESET_HARD | HL_RESET_FROM_RESET_THREAD;
+
+	if (device_reset_work->fw_reset)
+		flags |= HL_RESET_FW;
+
+	rc = hl_device_reset(hdev, flags);
 	if ((rc == -EBUSY) && !hdev->device_fini_pending) {
 		dev_info(hdev->dev,
 			"Could not reset device. will try again in %u seconds",
@@ -682,6 +691,44 @@ out:
 	return rc;
 }
 
+static void take_release_locks(struct hl_device *hdev)
+{
+	/* Flush anyone that is inside the critical section of enqueue
+	 * jobs to the H/W
+	 */
+	hdev->asic_funcs->hw_queues_lock(hdev);
+	hdev->asic_funcs->hw_queues_unlock(hdev);
+
+	/* Flush processes that are sending message to CPU */
+	mutex_lock(&hdev->send_cpu_message_lock);
+	mutex_unlock(&hdev->send_cpu_message_lock);
+
+	/* Flush anyone that is inside device open */
+	mutex_lock(&hdev->fpriv_list_lock);
+	mutex_unlock(&hdev->fpriv_list_lock);
+}
+
+static void cleanup_resources(struct hl_device *hdev, bool hard_reset, bool fw_reset)
+{
+	if (hard_reset)
+		device_late_fini(hdev);
+
+	/*
+	 * Halt the engines and disable interrupts so we won't get any more
+	 * completions from H/W and we won't have any accesses from the
+	 * H/W to the host machine
+	 */
+	hdev->asic_funcs->halt_engines(hdev, hard_reset, fw_reset);
+
+	/* Go over all the queues, release all CS and their jobs */
+	hl_cs_rollback_all(hdev);
+
+	/* Release all pending user interrupts, each pending user interrupt
+	 * holds a reference to user context
+	 */
+	hl_release_pending_user_interrupts(hdev);
+}
+
 /*
  * hl_device_suspend - initiate device suspend
  *
@@ -707,16 +754,7 @@ int hl_device_suspend(struct hl_device *hdev)
 	/* This blocks all other stuff that is not blocked by in_reset */
 	hdev->disabled = true;
 
-	/*
-	 * Flush anyone that is inside the critical section of enqueue
-	 * jobs to the H/W
-	 */
-	hdev->asic_funcs->hw_queues_lock(hdev);
-	hdev->asic_funcs->hw_queues_unlock(hdev);
-
-	/* Flush processes that are sending message to CPU */
-	mutex_lock(&hdev->send_cpu_message_lock);
-	mutex_unlock(&hdev->send_cpu_message_lock);
+	take_release_locks(hdev);
 
 	rc = hdev->asic_funcs->suspend(hdev);
 	if (rc)
@@ -819,6 +857,11 @@ static int device_kill_open_processes(struct hl_device *hdev, u32 timeout)
 			usleep_range(1000, 10000);
 
 			put_task_struct(task);
+		} else {
+			dev_warn(hdev->dev,
+				"Can't get task struct for PID so giving up on killing process\n");
+			mutex_unlock(&hdev->fpriv_list_lock);
+			return -ETIME;
 		}
 	}
 
@@ -885,7 +928,7 @@ static void device_disable_open_processes(struct hl_device *hdev)
 int hl_device_reset(struct hl_device *hdev, u32 flags)
 {
 	u64 idle_mask[HL_BUSY_ENGINES_MASK_EXT_SIZE] = {0};
-	bool hard_reset, from_hard_reset_thread, hard_instead_soft = false;
+	bool hard_reset, from_hard_reset_thread, fw_reset, hard_instead_soft = false;
 	int i, rc;
 
 	if (!hdev->init_done) {
@@ -894,8 +937,9 @@ int hl_device_reset(struct hl_device *hdev, u32 flags)
 		return 0;
 	}
 
-	hard_reset = (flags & HL_RESET_HARD) != 0;
-	from_hard_reset_thread = (flags & HL_RESET_FROM_RESET_THREAD) != 0;
+	hard_reset = !!(flags & HL_RESET_HARD);
+	from_hard_reset_thread = !!(flags & HL_RESET_FROM_RESET_THREAD);
+	fw_reset = !!(flags & HL_RESET_FW);
 
 	if (!hard_reset && !hdev->supports_soft_reset) {
 		hard_instead_soft = true;
@@ -947,11 +991,13 @@ do_reset:
 		else
 			hdev->curr_reset_cause = HL_RESET_CAUSE_UNKNOWN;
 
-		/*
-		 * if reset is due to heartbeat, device CPU is no responsive in
-		 * which case no point sending PCI disable message to it
+		/* If reset is due to heartbeat, device CPU is no responsive in
+		 * which case no point sending PCI disable message to it.
+		 *
+		 * If F/W is performing the reset, no need to send it a message to disable
+		 * PCI access
 		 */
-		if (hard_reset && !(flags & HL_RESET_HEARTBEAT)) {
+		if (hard_reset && !(flags & (HL_RESET_HEARTBEAT | HL_RESET_FW))) {
 			/* Disable PCI access from device F/W so he won't send
 			 * us additional interrupts. We disable MSI/MSI-X at
 			 * the halt_engines function and we can't have the F/W
@@ -970,15 +1016,7 @@ do_reset:
 		/* This also blocks future CS/VM/JOB completion operations */
 		hdev->disabled = true;
 
-		/* Flush anyone that is inside the critical section of enqueue
-		 * jobs to the H/W
-		 */
-		hdev->asic_funcs->hw_queues_lock(hdev);
-		hdev->asic_funcs->hw_queues_unlock(hdev);
-
-		/* Flush anyone that is inside device open */
-		mutex_lock(&hdev->fpriv_list_lock);
-		mutex_unlock(&hdev->fpriv_list_lock);
+		take_release_locks(hdev);
 
 		dev_err(hdev->dev, "Going to RESET device!\n");
 	}
@@ -989,6 +1027,8 @@ again:
 
 		hdev->process_kill_trial_cnt = 0;
 
+		hdev->device_reset_work.fw_reset = fw_reset;
+
 		/*
 		 * Because the reset function can't run from heartbeat work,
 		 * we need to call the reset function from a dedicated work.
@@ -999,31 +1039,7 @@ again:
 		return 0;
 	}
 
-	if (hard_reset) {
-		device_late_fini(hdev);
-
-		/*
-		 * Now that the heartbeat thread is closed, flush processes
-		 * which are sending messages to CPU
-		 */
-		mutex_lock(&hdev->send_cpu_message_lock);
-		mutex_unlock(&hdev->send_cpu_message_lock);
-	}
-
-	/*
-	 * Halt the engines and disable interrupts so we won't get any more
-	 * completions from H/W and we won't have any accesses from the
-	 * H/W to the host machine
-	 */
-	hdev->asic_funcs->halt_engines(hdev, hard_reset);
-
-	/* Go over all the queues, release all CS and their jobs */
-	hl_cs_rollback_all(hdev);
-
-	/* Release all pending user interrupts, each pending user interrupt
-	 * holds a reference to user context
-	 */
-	hl_release_pending_user_interrupts(hdev);
+	cleanup_resources(hdev, hard_reset, fw_reset);
 
 kill_processes:
 	if (hard_reset) {
@@ -1057,12 +1073,15 @@ kill_processes:
 	}
 
 	/* Reset the H/W. It will be in idle state after this returns */
-	hdev->asic_funcs->hw_fini(hdev, hard_reset);
+	hdev->asic_funcs->hw_fini(hdev, hard_reset, fw_reset);
 
 	if (hard_reset) {
+		hdev->fw_loader.linux_loaded = false;
+
 		/* Release kernel context */
 		if (hdev->kernel_ctx && hl_ctx_put(hdev->kernel_ctx) == 1)
 			hdev->kernel_ctx = NULL;
+
 		hl_vm_fini(hdev);
 		hl_mmu_fini(hdev);
 		hl_eq_reset(hdev, &hdev->event_queue);
@@ -1292,6 +1311,10 @@ int hl_device_init(struct hl_device *hdev, struct class *hclass)
 	if (rc)
 		goto user_interrupts_fini;
 
+
+	/* initialize completion structure for multi CS wait */
+	hl_multi_cs_completion_init(hdev);
+
 	/*
 	 * Initialize the H/W queues. Must be done before hw_init, because
 	 * there the addresses of the kernel queue are being written to the
@@ -1361,6 +1384,8 @@ int hl_device_init(struct hl_device *hdev, struct class *hclass)
 
 	hdev->compute_ctx = NULL;
 
+	hdev->asic_funcs->state_dump_init(hdev);
+
 	hl_debugfs_add_device(hdev);
 
 	/* debugfs nodes are created in hl_ctx_init so it must be called after
@@ -1567,31 +1592,13 @@ void hl_device_fini(struct hl_device *hdev)
 	/* Mark device as disabled */
 	hdev->disabled = true;
 
-	/* Flush anyone that is inside the critical section of enqueue
-	 * jobs to the H/W
-	 */
-	hdev->asic_funcs->hw_queues_lock(hdev);
-	hdev->asic_funcs->hw_queues_unlock(hdev);
-
-	/* Flush anyone that is inside device open */
-	mutex_lock(&hdev->fpriv_list_lock);
-	mutex_unlock(&hdev->fpriv_list_lock);
+	take_release_locks(hdev);
 
 	hdev->hard_reset_pending = true;
 
 	hl_hwmon_fini(hdev);
 
-	device_late_fini(hdev);
-
-	/*
-	 * Halt the engines and disable interrupts so we won't get any more
-	 * completions from H/W and we won't have any accesses from the
-	 * H/W to the host machine
-	 */
-	hdev->asic_funcs->halt_engines(hdev, true);
-
-	/* Go over all the queues, release all CS and their jobs */
-	hl_cs_rollback_all(hdev);
+	cleanup_resources(hdev, true, false);
 
 	/* Kill processes here after CS rollback. This is because the process
 	 * can't really exit until all its CSs are done, which is what we
@@ -1610,7 +1617,9 @@ void hl_device_fini(struct hl_device *hdev)
 	hl_cb_pool_fini(hdev);
 
 	/* Reset the H/W. It will be in idle state after this returns */
-	hdev->asic_funcs->hw_fini(hdev, true);
+	hdev->asic_funcs->hw_fini(hdev, true, false);
+
+	hdev->fw_loader.linux_loaded = false;
 
 	/* Release kernel context */
 	if ((hdev->kernel_ctx) && (hl_ctx_put(hdev->kernel_ctx) != 1))
diff --git a/drivers/misc/habanalabs/common/firmware_if.c b/drivers/misc/habanalabs/common/firmware_if.c
index 2e4d04ec6b53..8d2568c63f19 100644
--- a/drivers/misc/habanalabs/common/firmware_if.c
+++ b/drivers/misc/habanalabs/common/firmware_if.c
@@ -1,7 +1,7 @@
 // SPDX-License-Identifier: GPL-2.0
 
 /*
- * Copyright 2016-2019 HabanaLabs, Ltd.
+ * Copyright 2016-2021 HabanaLabs, Ltd.
  * All Rights Reserved.
  */
 
@@ -240,11 +240,15 @@ int hl_fw_send_cpu_message(struct hl_device *hdev, u32 hw_queue_id, u32 *msg,
 	/* set fence to a non valid value */
 	pkt->fence = cpu_to_le32(UINT_MAX);
 
-	rc = hl_hw_queue_send_cb_no_cmpl(hdev, hw_queue_id, len, pkt_dma_addr);
-	if (rc) {
-		dev_err(hdev->dev, "Failed to send CB on CPU PQ (%d)\n", rc);
-		goto out;
-	}
+	/*
+	 * The CPU queue is a synchronous queue with an effective depth of
+	 * a single entry (although it is allocated with room for multiple
+	 * entries). We lock on it using 'send_cpu_message_lock' which
+	 * serializes accesses to the CPU queue.
+	 * Which means that we don't need to lock the access to the entire H/W
+	 * queues module when submitting a JOB to the CPU queue.
+	 */
+	hl_hw_queue_submit_bd(hdev, queue, 0, len, pkt_dma_addr);
 
 	if (prop->fw_app_cpu_boot_dev_sts0 & CPU_BOOT_DEV_STS0_PKT_PI_ACK_EN)
 		expected_ack_val = queue->pi;
@@ -663,17 +667,15 @@ int hl_fw_cpucp_info_get(struct hl_device *hdev,
 	hdev->event_queue.check_eqe_index = false;
 
 	/* Read FW application security bits again */
-	if (hdev->asic_prop.fw_cpu_boot_dev_sts0_valid) {
-		hdev->asic_prop.fw_app_cpu_boot_dev_sts0 =
-						RREG32(sts_boot_dev_sts0_reg);
-		if (hdev->asic_prop.fw_app_cpu_boot_dev_sts0 &
+	if (prop->fw_cpu_boot_dev_sts0_valid) {
+		prop->fw_app_cpu_boot_dev_sts0 = RREG32(sts_boot_dev_sts0_reg);
+		if (prop->fw_app_cpu_boot_dev_sts0 &
 				CPU_BOOT_DEV_STS0_EQ_INDEX_EN)
 			hdev->event_queue.check_eqe_index = true;
 	}
 
-	if (hdev->asic_prop.fw_cpu_boot_dev_sts1_valid)
-		hdev->asic_prop.fw_app_cpu_boot_dev_sts1 =
-						RREG32(sts_boot_dev_sts1_reg);
+	if (prop->fw_cpu_boot_dev_sts1_valid)
+		prop->fw_app_cpu_boot_dev_sts1 = RREG32(sts_boot_dev_sts1_reg);
 
 out:
 	hdev->asic_funcs->cpu_accessible_dma_pool_free(hdev,
@@ -1008,6 +1010,11 @@ void hl_fw_ask_halt_machine_without_linux(struct hl_device *hdev)
 	} else {
 		WREG32(static_loader->kmd_msg_to_cpu_reg, KMD_MSG_GOTO_WFE);
 		msleep(static_loader->cpu_reset_wait_msec);
+
+		/* Must clear this register in order to prevent preboot
+		 * from reading WFE after reboot
+		 */
+		WREG32(static_loader->kmd_msg_to_cpu_reg, KMD_MSG_NA);
 	}
 
 	hdev->device_cpu_is_halted = true;
@@ -1055,6 +1062,10 @@ static void detect_cpu_boot_status(struct hl_device *hdev, u32 status)
 		dev_err(hdev->dev,
 			"Device boot progress - Thermal Sensor initialization failed\n");
 		break;
+	case CPU_BOOT_STATUS_SECURITY_READY:
+		dev_err(hdev->dev,
+			"Device boot progress - Stuck in preboot after security initialization\n");
+		break;
 	default:
 		dev_err(hdev->dev,
 			"Device boot progress - Invalid status code %d\n",
@@ -1238,11 +1249,6 @@ static void hl_fw_preboot_update_state(struct hl_device *hdev)
 	 *               b. Check whether hard reset is done by boot cpu
 	 * 3. FW application - a. Fetch fw application security status
 	 *                     b. Check whether hard reset is done by fw app
-	 *
-	 * Preboot:
-	 * Check security status bit (CPU_BOOT_DEV_STS0_ENABLED). If set, then-
-	 * check security enabled bit (CPU_BOOT_DEV_STS0_SECURITY_EN)
-	 * If set, then mark GIC controller to be disabled.
 	 */
 	prop->hard_reset_done_by_fw =
 		!!(cpu_boot_dev_sts0 & CPU_BOOT_DEV_STS0_FW_HARD_RST_EN);
@@ -1953,8 +1959,8 @@ static void hl_fw_dynamic_update_linux_interrupt_if(struct hl_device *hdev)
 	if (!hdev->asic_prop.gic_interrupts_enable &&
 			!(hdev->asic_prop.fw_app_cpu_boot_dev_sts0 &
 				CPU_BOOT_DEV_STS0_MULTI_IRQ_POLL_EN)) {
-		dyn_regs->gic_host_halt_irq = dyn_regs->gic_host_irq_ctrl;
-		dyn_regs->gic_host_ints_irq = dyn_regs->gic_host_irq_ctrl;
+		dyn_regs->gic_host_halt_irq = dyn_regs->gic_host_pi_upd_irq;
+		dyn_regs->gic_host_ints_irq = dyn_regs->gic_host_pi_upd_irq;
 
 		dev_warn(hdev->dev,
 			"Using a single interrupt interface towards cpucp");
@@ -2122,8 +2128,7 @@ static void hl_fw_linux_update_state(struct hl_device *hdev,
 
 	/* Read FW application security bits */
 	if (prop->fw_cpu_boot_dev_sts0_valid) {
-		prop->fw_app_cpu_boot_dev_sts0 =
-				RREG32(cpu_boot_dev_sts0_reg);
+		prop->fw_app_cpu_boot_dev_sts0 = RREG32(cpu_boot_dev_sts0_reg);
 
 		if (prop->fw_app_cpu_boot_dev_sts0 &
 				CPU_BOOT_DEV_STS0_FW_HARD_RST_EN)
@@ -2143,8 +2148,7 @@ static void hl_fw_linux_update_state(struct hl_device *hdev,
 	}
 
 	if (prop->fw_cpu_boot_dev_sts1_valid) {
-		prop->fw_app_cpu_boot_dev_sts1 =
-				RREG32(cpu_boot_dev_sts1_reg);
+		prop->fw_app_cpu_boot_dev_sts1 = RREG32(cpu_boot_dev_sts1_reg);
 
 		dev_dbg(hdev->dev,
 			"Firmware application CPU status1 %#x\n",
@@ -2235,6 +2239,10 @@ static int hl_fw_dynamic_init_cpu(struct hl_device *hdev,
 	dev_info(hdev->dev,
 		"Loading firmware to device, may take some time...\n");
 
+	/*
+	 * In this stage, "cpu_dyn_regs" contains only LKD's hard coded values!
+	 * It will be updated from FW after hl_fw_dynamic_request_descriptor().
+	 */
 	dyn_regs = &fw_loader->dynamic_loader.comm_desc.cpu_dyn_regs;
 
 	rc = hl_fw_dynamic_send_protocol_cmd(hdev, fw_loader, COMMS_RST_STATE,
diff --git a/drivers/misc/habanalabs/common/habanalabs.h b/drivers/misc/habanalabs/common/habanalabs.h
index 6b3cdd7e068a..bebebcb163ee 100644
--- a/drivers/misc/habanalabs/common/habanalabs.h
+++ b/drivers/misc/habanalabs/common/habanalabs.h
@@ -20,6 +20,7 @@
 #include <linux/scatterlist.h>
 #include <linux/hashtable.h>
 #include <linux/debugfs.h>
+#include <linux/rwsem.h>
 #include <linux/bitfield.h>
 #include <linux/genalloc.h>
 #include <linux/sched/signal.h>
@@ -65,6 +66,11 @@
 
 #define HL_COMMON_USER_INTERRUPT_ID	0xFFF
 
+#define HL_STATE_DUMP_HIST_LEN		5
+
+#define OBJ_NAMES_HASH_TABLE_BITS	7 /* 1 << 7 buckets */
+#define SYNC_TO_ENGINE_HASH_TABLE_BITS	7 /* 1 << 7 buckets */
+
 /* Memory */
 #define MEM_HASH_TABLE_BITS		7 /* 1 << 7 buckets */
 
@@ -122,12 +128,17 @@ enum hl_mmu_page_table_location {
  *
  * - HL_RESET_DEVICE_RELEASE
  *       Set if reset is due to device release
+ *
+ * - HL_RESET_FW
+ *       F/W will perform the reset. No need to ask it to reset the device. This is relevant
+ *       only when running with secured f/w
  */
 #define HL_RESET_HARD			(1 << 0)
 #define HL_RESET_FROM_RESET_THREAD	(1 << 1)
 #define HL_RESET_HEARTBEAT		(1 << 2)
 #define HL_RESET_TDR			(1 << 3)
 #define HL_RESET_DEVICE_RELEASE		(1 << 4)
+#define HL_RESET_FW			(1 << 5)
 
 #define HL_MAX_SOBS_PER_MONITOR	8
 
@@ -236,7 +247,9 @@ enum hl_cs_type {
 	CS_TYPE_DEFAULT,
 	CS_TYPE_SIGNAL,
 	CS_TYPE_WAIT,
-	CS_TYPE_COLLECTIVE_WAIT
+	CS_TYPE_COLLECTIVE_WAIT,
+	CS_RESERVE_SIGNALS,
+	CS_UNRESERVE_SIGNALS
 };
 
 /*
@@ -281,13 +294,17 @@ enum queue_cb_alloc_flags {
  * @hdev: habanalabs device structure.
  * @kref: refcount of this SOB. The SOB will reset once the refcount is zero.
  * @sob_id: id of this SOB.
+ * @sob_addr: the sob offset from the base address.
  * @q_idx: the H/W queue that uses this SOB.
+ * @need_reset: reset indication set when switching to the other sob.
  */
 struct hl_hw_sob {
 	struct hl_device	*hdev;
 	struct kref		kref;
 	u32			sob_id;
+	u32			sob_addr;
 	u32			q_idx;
+	bool			need_reset;
 };
 
 enum hl_collective_mode {
@@ -317,11 +334,11 @@ struct hw_queue_properties {
 };
 
 /**
- * enum vm_type_t - virtual memory mapping request information.
+ * enum vm_type - virtual memory mapping request information.
  * @VM_TYPE_USERPTR: mapping of user memory to device virtual address.
  * @VM_TYPE_PHYS_PACK: mapping of DRAM memory to device virtual address.
  */
-enum vm_type_t {
+enum vm_type {
 	VM_TYPE_USERPTR = 0x1,
 	VM_TYPE_PHYS_PACK = 0x2
 };
@@ -382,6 +399,16 @@ struct hl_mmu_properties {
 };
 
 /**
+ * struct hl_hints_range - hint addresses reserved va range.
+ * @start_addr: start address of the va range.
+ * @end_addr: end address of the va range.
+ */
+struct hl_hints_range {
+	u64 start_addr;
+	u64 end_addr;
+};
+
+/**
  * struct asic_fixed_properties - ASIC specific immutable properties.
  * @hw_queues_props: H/W queues properties.
  * @cpucp_info: received various information from CPU-CP regarding the H/W, e.g.
@@ -392,6 +419,10 @@ struct hl_mmu_properties {
  * @pmmu: PCI (host) MMU address translation properties.
  * @pmmu_huge: PCI (host) MMU address translation properties for memory
  *              allocated with huge pages.
+ * @hints_dram_reserved_va_range: dram hint addresses reserved range.
+ * @hints_host_reserved_va_range: host hint addresses reserved range.
+ * @hints_host_hpage_reserved_va_range: host huge page hint addresses reserved
+ *                                      range.
  * @sram_base_address: SRAM physical start address.
  * @sram_end_address: SRAM physical end address.
  * @sram_user_base_address - SRAM physical start address for user access.
@@ -412,6 +443,10 @@ struct hl_mmu_properties {
  *                    to the device's MMU.
  * @cb_va_end_addr: virtual end address of command buffers which are mapped to
  *                  the device's MMU.
+ * @dram_hints_align_mask: dram va hint addresses alignment mask which is used
+ *                  for hints validity check.
+ * device_dma_offset_for_host_access: the offset to add to host DMA addresses
+ *                                    to enable the device to access them.
  * @mmu_pgt_size: MMU page tables total size.
  * @mmu_pte_size: PTE size in MMU page tables.
  * @mmu_hop_table_size: MMU hop table size.
@@ -459,6 +494,8 @@ struct hl_mmu_properties {
  *                                       reserved for the user
  * @first_available_cq: first available CQ for the user.
  * @user_interrupt_count: number of user interrupts.
+ * @server_type: Server type that the ASIC is currently installed in.
+ *               The value is according to enum hl_server_type in uapi file.
  * @tpc_enabled_mask: which TPCs are enabled.
  * @completion_queues_count: number of completion queues.
  * @fw_security_enabled: true if security measures are enabled in firmware,
@@ -470,6 +507,7 @@ struct hl_mmu_properties {
  * @dram_supports_virtual_memory: is there an MMU towards the DRAM
  * @hard_reset_done_by_fw: true if firmware is handling hard reset flow
  * @num_functional_hbms: number of functional HBMs in each DCORE.
+ * @hints_range_reservation: device support hint addresses range reservation.
  * @iatu_done_by_fw: true if iATU configuration is being done by FW.
  * @dynamic_fw_load: is dynamic FW load is supported.
  * @gic_interrupts_enable: true if FW is not blocking GIC controller,
@@ -483,6 +521,9 @@ struct asic_fixed_properties {
 	struct hl_mmu_properties	dmmu;
 	struct hl_mmu_properties	pmmu;
 	struct hl_mmu_properties	pmmu_huge;
+	struct hl_hints_range		hints_dram_reserved_va_range;
+	struct hl_hints_range		hints_host_reserved_va_range;
+	struct hl_hints_range		hints_host_hpage_reserved_va_range;
 	u64				sram_base_address;
 	u64				sram_end_address;
 	u64				sram_user_base_address;
@@ -500,6 +541,8 @@ struct asic_fixed_properties {
 	u64				mmu_dram_default_page_addr;
 	u64				cb_va_start_addr;
 	u64				cb_va_end_addr;
+	u64				dram_hints_align_mask;
+	u64				device_dma_offset_for_host_access;
 	u32				mmu_pgt_size;
 	u32				mmu_pte_size;
 	u32				mmu_hop_table_size;
@@ -534,6 +577,7 @@ struct asic_fixed_properties {
 	u16				first_available_user_msix_interrupt;
 	u16				first_available_cq[HL_MAX_DCORES];
 	u16				user_interrupt_count;
+	u16				server_type;
 	u8				tpc_enabled_mask;
 	u8				completion_queues_count;
 	u8				fw_security_enabled;
@@ -542,6 +586,7 @@ struct asic_fixed_properties {
 	u8				dram_supports_virtual_memory;
 	u8				hard_reset_done_by_fw;
 	u8				num_functional_hbms;
+	u8				hints_range_reservation;
 	u8				iatu_done_by_fw;
 	u8				dynamic_fw_load;
 	u8				gic_interrupts_enable;
@@ -552,40 +597,45 @@ struct asic_fixed_properties {
  * @completion: fence is implemented using completion
  * @refcount: refcount for this fence
  * @cs_sequence: sequence of the corresponding command submission
+ * @stream_master_qid_map: streams masters QID bitmap to represent all streams
+ *                         masters QIDs that multi cs is waiting on
  * @error: mark this fence with error
  * @timestamp: timestamp upon completion
- *
  */
 struct hl_fence {
 	struct completion	completion;
 	struct kref		refcount;
 	u64			cs_sequence;
+	u32			stream_master_qid_map;
 	int			error;
 	ktime_t			timestamp;
 };
 
 /**
  * struct hl_cs_compl - command submission completion object.
- * @sob_reset_work: workqueue object to run SOB reset flow.
  * @base_fence: hl fence object.
  * @lock: spinlock to protect fence.
  * @hdev: habanalabs device structure.
  * @hw_sob: the H/W SOB used in this signal/wait CS.
+ * @encaps_sig_hdl: encaps signals hanlder.
  * @cs_seq: command submission sequence number.
  * @type: type of the CS - signal/wait.
  * @sob_val: the SOB value that is used in this signal/wait CS.
  * @sob_group: the SOB group that is used in this collective wait CS.
+ * @encaps_signals: indication whether it's a completion object of cs with
+ * encaps signals or not.
  */
 struct hl_cs_compl {
-	struct work_struct	sob_reset_work;
 	struct hl_fence		base_fence;
 	spinlock_t		lock;
 	struct hl_device	*hdev;
 	struct hl_hw_sob	*hw_sob;
+	struct hl_cs_encaps_sig_handle *encaps_sig_hdl;
 	u64			cs_seq;
 	enum hl_cs_type		type;
 	u16			sob_val;
 	u16			sob_group;
+	bool			encaps_signals;
 };
 
 /*
@@ -698,6 +748,17 @@ struct hl_sync_stream_properties {
 };
 
 /**
+ * struct hl_encaps_signals_mgr - describes sync stream encapsulated signals
+ * handlers manager
+ * @lock: protects handles.
+ * @handles: an idr to hold all encapsulated signals handles.
+ */
+struct hl_encaps_signals_mgr {
+	spinlock_t		lock;
+	struct idr		handles;
+};
+
+/**
  * struct hl_hw_queue - describes a H/W transport queue.
  * @shadow_queue: pointer to a shadow queue that holds pointers to jobs.
  * @sync_stream_prop: sync stream queue properties
@@ -875,7 +936,7 @@ struct pci_mem_region {
 	u64 region_base;
 	u64 region_size;
 	u64 bar_size;
-	u32 offset_in_bar;
+	u64 offset_in_bar;
 	u8 bar_id;
 	u8 used;
 };
@@ -996,7 +1057,7 @@ struct fw_load_mgr {
  *                hw_fini and before CS rollback.
  * @suspend: handles IP specific H/W or SW changes for suspend.
  * @resume: handles IP specific H/W or SW changes for resume.
- * @cb_mmap: maps a CB.
+ * @mmap: maps a memory.
  * @ring_doorbell: increment PI on a given QMAN.
  * @pqe_write: Write the PQ entry to the PQ. This is ASIC-specific
  *             function because the PQs are located in different memory areas
@@ -1101,6 +1162,10 @@ struct fw_load_mgr {
  *                         generic f/w compatible PLL Indexes
  * @init_firmware_loader: initialize data for FW loader.
  * @init_cpu_scrambler_dram: Enable CPU specific DRAM scrambling
+ * @state_dump_init: initialize constants required for state dump
+ * @get_sob_addr: get SOB base address offset.
+ * @set_pci_memory_regions: setting properties of PCI memory regions
+ * @get_stream_master_qid_arr: get pointer to stream masters QID array
  */
 struct hl_asic_funcs {
 	int (*early_init)(struct hl_device *hdev);
@@ -1110,11 +1175,11 @@ struct hl_asic_funcs {
 	int (*sw_init)(struct hl_device *hdev);
 	int (*sw_fini)(struct hl_device *hdev);
 	int (*hw_init)(struct hl_device *hdev);
-	void (*hw_fini)(struct hl_device *hdev, bool hard_reset);
-	void (*halt_engines)(struct hl_device *hdev, bool hard_reset);
+	void (*hw_fini)(struct hl_device *hdev, bool hard_reset, bool fw_reset);
+	void (*halt_engines)(struct hl_device *hdev, bool hard_reset, bool fw_reset);
 	int (*suspend)(struct hl_device *hdev);
 	int (*resume)(struct hl_device *hdev);
-	int (*cb_mmap)(struct hl_device *hdev, struct vm_area_struct *vma,
+	int (*mmap)(struct hl_device *hdev, struct vm_area_struct *vma,
 			void *cpu_addr, dma_addr_t dma_addr, size_t size);
 	void (*ring_doorbell)(struct hl_device *hdev, u32 hw_queue_id, u32 pi);
 	void (*pqe_write)(struct hl_device *hdev, __le64 *pqe,
@@ -1210,10 +1275,11 @@ struct hl_asic_funcs {
 	void (*reset_sob_group)(struct hl_device *hdev, u16 sob_group);
 	void (*set_dma_mask_from_fw)(struct hl_device *hdev);
 	u64 (*get_device_time)(struct hl_device *hdev);
-	void (*collective_wait_init_cs)(struct hl_cs *cs);
+	int (*collective_wait_init_cs)(struct hl_cs *cs);
 	int (*collective_wait_create_jobs)(struct hl_device *hdev,
-			struct hl_ctx *ctx, struct hl_cs *cs, u32 wait_queue_id,
-			u32 collective_engine_id);
+			struct hl_ctx *ctx, struct hl_cs *cs,
+			u32 wait_queue_id, u32 collective_engine_id,
+			u32 encaps_signal_offset);
 	u64 (*scramble_addr)(struct hl_device *hdev, u64 addr);
 	u64 (*descramble_addr)(struct hl_device *hdev, u64 addr);
 	void (*ack_protection_bits_errors)(struct hl_device *hdev);
@@ -1226,6 +1292,10 @@ struct hl_asic_funcs {
 	int (*map_pll_idx_to_fw_idx)(u32 pll_idx);
 	void (*init_firmware_loader)(struct hl_device *hdev);
 	void (*init_cpu_scrambler_dram)(struct hl_device *hdev);
+	void (*state_dump_init)(struct hl_device *hdev);
+	u32 (*get_sob_addr)(struct hl_device *hdev, u32 sob_id);
+	void (*set_pci_memory_regions)(struct hl_device *hdev);
+	u32* (*get_stream_master_qid_arr)(void);
 };
 
 
@@ -1283,20 +1353,6 @@ struct hl_cs_counters_atomic {
 };
 
 /**
- * struct hl_pending_cb - pending command buffer structure
- * @cb_node: cb node in pending cb list
- * @cb: command buffer to send in next submission
- * @cb_size: command buffer size
- * @hw_queue_id: destination queue id
- */
-struct hl_pending_cb {
-	struct list_head	cb_node;
-	struct hl_cb		*cb;
-	u32			cb_size;
-	u32			hw_queue_id;
-};
-
-/**
  * struct hl_ctx - user/kernel context.
  * @mem_hash: holds mapping from virtual address to virtual memory area
  *		descriptor (hl_vm_phys_pg_list or hl_userptr).
@@ -1312,28 +1368,21 @@ struct hl_pending_cb {
  *            MMU hash or walking the PGT requires talking this lock.
  * @hw_block_list_lock: protects the HW block memory list.
  * @debugfs_list: node in debugfs list of contexts.
- * pending_cb_list: list of pending command buffers waiting to be sent upon
- *                  next user command submission context.
  * @hw_block_mem_list: list of HW block virtual mapped addresses.
  * @cs_counters: context command submission counters.
  * @cb_va_pool: device VA pool for command buffers which are mapped to the
  *              device's MMU.
+ * @sig_mgr: encaps signals handle manager.
  * @cs_sequence: sequence number for CS. Value is assigned to a CS and passed
  *			to user so user could inquire about CS. It is used as
  *			index to cs_pending array.
  * @dram_default_hops: array that holds all hops addresses needed for default
  *                     DRAM mapping.
- * @pending_cb_lock: spinlock to protect pending cb list
  * @cs_lock: spinlock to protect cs_sequence.
  * @dram_phys_mem: amount of used physical DRAM memory by this context.
  * @thread_ctx_switch_token: token to prevent multiple threads of the same
  *				context	from running the context switch phase.
  *				Only a single thread should run it.
- * @thread_pending_cb_token: token to prevent multiple threads from processing
- *				the pending CB list. Only a single thread should
- *				process the list since it is protected by a
- *				spinlock and we don't want to halt the entire
- *				command submission sequence.
  * @thread_ctx_switch_wait_token: token to prevent the threads that didn't run
  *				the context switch phase from moving to their
  *				execution phase before the context switch phase
@@ -1353,17 +1402,15 @@ struct hl_ctx {
 	struct mutex			mmu_lock;
 	struct mutex			hw_block_list_lock;
 	struct list_head		debugfs_list;
-	struct list_head		pending_cb_list;
 	struct list_head		hw_block_mem_list;
 	struct hl_cs_counters_atomic	cs_counters;
 	struct gen_pool			*cb_va_pool;
+	struct hl_encaps_signals_mgr	sig_mgr;
 	u64				cs_sequence;
 	u64				*dram_default_hops;
-	spinlock_t			pending_cb_lock;
 	spinlock_t			cs_lock;
 	atomic64_t			dram_phys_mem;
 	atomic_t			thread_ctx_switch_token;
-	atomic_t			thread_pending_cb_token;
 	u32				thread_ctx_switch_wait_token;
 	u32				asid;
 	u32				handle;
@@ -1394,20 +1441,22 @@ struct hl_ctx_mgr {
  * @sgt: pointer to the scatter-gather table that holds the pages.
  * @dir: for DMA unmapping, the direction must be supplied, so save it.
  * @debugfs_list: node in debugfs list of command submissions.
+ * @pid: the pid of the user process owning the memory
  * @addr: user-space virtual address of the start of the memory area.
  * @size: size of the memory area to pin & map.
  * @dma_mapped: true if the SG was mapped to DMA addresses, false otherwise.
  */
 struct hl_userptr {
-	enum vm_type_t		vm_type; /* must be first */
+	enum vm_type		vm_type; /* must be first */
 	struct list_head	job_node;
 	struct page		**pages;
 	unsigned int		npages;
 	struct sg_table		*sgt;
 	enum dma_data_direction dir;
 	struct list_head	debugfs_list;
+	pid_t			pid;
 	u64			addr;
-	u32			size;
+	u64			size;
 	u8			dma_mapped;
 };
 
@@ -1426,12 +1475,14 @@ struct hl_userptr {
  * @mirror_node : node in device mirror list of command submissions.
  * @staged_cs_node: node in the staged cs list.
  * @debugfs_list: node in debugfs list of command submissions.
+ * @encaps_sig_hdl: holds the encaps signals handle.
  * @sequence: the sequence number of this CS.
  * @staged_sequence: the sequence of the staged submission this CS is part of,
  *                   relevant only if staged_cs is set.
  * @timeout_jiffies: cs timeout in jiffies.
  * @submission_time_jiffies: submission time of the cs
  * @type: CS_TYPE_*.
+ * @encaps_sig_hdl_id: encaps signals handle id, set for the first staged cs.
  * @submitted: true if CS was submitted to H/W.
  * @completed: true if CS was completed by device.
  * @timedout : true if CS was timedout.
@@ -1445,6 +1496,7 @@ struct hl_userptr {
  * @staged_cs: true if this CS is part of a staged submission.
  * @skip_reset_on_timeout: true if we shall not reset the device in case
  *                         timeout occurs (debug scenario).
+ * @encaps_signals: true if this CS has encaps reserved signals.
  */
 struct hl_cs {
 	u16			*jobs_in_queue_cnt;
@@ -1459,11 +1511,13 @@ struct hl_cs {
 	struct list_head	mirror_node;
 	struct list_head	staged_cs_node;
 	struct list_head	debugfs_list;
+	struct hl_cs_encaps_sig_handle *encaps_sig_hdl;
 	u64			sequence;
 	u64			staged_sequence;
 	u64			timeout_jiffies;
 	u64			submission_time_jiffies;
 	enum hl_cs_type		type;
+	u32			encaps_sig_hdl_id;
 	u8			submitted;
 	u8			completed;
 	u8			timedout;
@@ -1474,6 +1528,7 @@ struct hl_cs {
 	u8			staged_first;
 	u8			staged_cs;
 	u8			skip_reset_on_timeout;
+	u8			encaps_signals;
 };
 
 /**
@@ -1493,6 +1548,8 @@ struct hl_cs {
  * @hw_queue_id: the id of the H/W queue this job is submitted to.
  * @user_cb_size: the actual size of the CB we got from the user.
  * @job_cb_size: the actual size of the CB that we put on the queue.
+ * @encaps_sig_wait_offset: encapsulated signals offset, which allow user
+ *                          to wait on part of the reserved signals.
  * @is_kernel_allocated_cb: true if the CB handle we got from the user holds a
  *                          handle to a kernel-allocated CB object, false
  *                          otherwise (SRAM/DRAM/host address).
@@ -1517,6 +1574,7 @@ struct hl_cs_job {
 	u32			hw_queue_id;
 	u32			user_cb_size;
 	u32			job_cb_size;
+	u32			encaps_sig_wait_offset;
 	u8			is_kernel_allocated_cb;
 	u8			contains_dma_pkt;
 };
@@ -1613,7 +1671,7 @@ struct hl_vm_hw_block_list_node {
  * @created_from_userptr: is product of host virtual address.
  */
 struct hl_vm_phys_pg_pack {
-	enum vm_type_t		vm_type; /* must be first */
+	enum vm_type		vm_type; /* must be first */
 	u64			*pages;
 	u64			npages;
 	u64			total_size;
@@ -1759,9 +1817,13 @@ struct hl_debugfs_entry {
  * @ctx_mem_hash_list: list of available contexts with MMU mappings.
  * @ctx_mem_hash_spinlock: protects cb_list.
  * @blob_desc: descriptor of blob
+ * @state_dump: data of the system states in case of a bad cs.
+ * @state_dump_sem: protects state_dump.
  * @addr: next address to read/write from/to in read/write32.
  * @mmu_addr: next virtual address to translate to physical address in mmu_show.
+ * @userptr_lookup: the target user ptr to look up for on demand.
  * @mmu_asid: ASID to use while translating in mmu_show.
+ * @state_dump_head: index of the latest state dump
  * @i2c_bus: generic u8 debugfs file for bus value to use in i2c_data_read.
  * @i2c_addr: generic u8 debugfs file for address value to use in i2c_data_read.
  * @i2c_reg: generic u8 debugfs file for register value to use in i2c_data_read.
@@ -1783,14 +1845,149 @@ struct hl_dbg_device_entry {
 	struct list_head		ctx_mem_hash_list;
 	spinlock_t			ctx_mem_hash_spinlock;
 	struct debugfs_blob_wrapper	blob_desc;
+	char				*state_dump[HL_STATE_DUMP_HIST_LEN];
+	struct rw_semaphore		state_dump_sem;
 	u64				addr;
 	u64				mmu_addr;
+	u64				userptr_lookup;
 	u32				mmu_asid;
+	u32				state_dump_head;
 	u8				i2c_bus;
 	u8				i2c_addr;
 	u8				i2c_reg;
 };
 
+/**
+ * struct hl_hw_obj_name_entry - single hw object name, member of
+ * hl_state_dump_specs
+ * @node: link to the containing hash table
+ * @name: hw object name
+ * @id: object identifier
+ */
+struct hl_hw_obj_name_entry {
+	struct hlist_node	node;
+	const char		*name;
+	u32			id;
+};
+
+enum hl_state_dump_specs_props {
+	SP_SYNC_OBJ_BASE_ADDR,
+	SP_NEXT_SYNC_OBJ_ADDR,
+	SP_SYNC_OBJ_AMOUNT,
+	SP_MON_OBJ_WR_ADDR_LOW,
+	SP_MON_OBJ_WR_ADDR_HIGH,
+	SP_MON_OBJ_WR_DATA,
+	SP_MON_OBJ_ARM_DATA,
+	SP_MON_OBJ_STATUS,
+	SP_MONITORS_AMOUNT,
+	SP_TPC0_CMDQ,
+	SP_TPC0_CFG_SO,
+	SP_NEXT_TPC,
+	SP_MME_CMDQ,
+	SP_MME_CFG_SO,
+	SP_NEXT_MME,
+	SP_DMA_CMDQ,
+	SP_DMA_CFG_SO,
+	SP_DMA_QUEUES_OFFSET,
+	SP_NUM_OF_MME_ENGINES,
+	SP_SUB_MME_ENG_NUM,
+	SP_NUM_OF_DMA_ENGINES,
+	SP_NUM_OF_TPC_ENGINES,
+	SP_ENGINE_NUM_OF_QUEUES,
+	SP_ENGINE_NUM_OF_STREAMS,
+	SP_ENGINE_NUM_OF_FENCES,
+	SP_FENCE0_CNT_OFFSET,
+	SP_FENCE0_RDATA_OFFSET,
+	SP_CP_STS_OFFSET,
+	SP_NUM_CORES,
+
+	SP_MAX
+};
+
+enum hl_sync_engine_type {
+	ENGINE_TPC,
+	ENGINE_DMA,
+	ENGINE_MME,
+};
+
+/**
+ * struct hl_mon_state_dump - represents a state dump of a single monitor
+ * @id: monitor id
+ * @wr_addr_low: address monitor will write to, low bits
+ * @wr_addr_high: address monitor will write to, high bits
+ * @wr_data: data monitor will write
+ * @arm_data: register value containing monitor configuration
+ * @status: monitor status
+ */
+struct hl_mon_state_dump {
+	u32		id;
+	u32		wr_addr_low;
+	u32		wr_addr_high;
+	u32		wr_data;
+	u32		arm_data;
+	u32		status;
+};
+
+/**
+ * struct hl_sync_to_engine_map_entry - sync object id to engine mapping entry
+ * @engine_type: type of the engine
+ * @engine_id: id of the engine
+ * @sync_id: id of the sync object
+ */
+struct hl_sync_to_engine_map_entry {
+	struct hlist_node		node;
+	enum hl_sync_engine_type	engine_type;
+	u32				engine_id;
+	u32				sync_id;
+};
+
+/**
+ * struct hl_sync_to_engine_map - maps sync object id to associated engine id
+ * @tb: hash table containing the mapping, each element is of type
+ *      struct hl_sync_to_engine_map_entry
+ */
+struct hl_sync_to_engine_map {
+	DECLARE_HASHTABLE(tb, SYNC_TO_ENGINE_HASH_TABLE_BITS);
+};
+
+/**
+ * struct hl_state_dump_specs_funcs - virtual functions used by the state dump
+ * @gen_sync_to_engine_map: generate a hash map from sync obj id to its engine
+ * @print_single_monitor: format monitor data as string
+ * @monitor_valid: return true if given monitor dump is valid
+ * @print_fences_single_engine: format fences data as string
+ */
+struct hl_state_dump_specs_funcs {
+	int (*gen_sync_to_engine_map)(struct hl_device *hdev,
+				struct hl_sync_to_engine_map *map);
+	int (*print_single_monitor)(char **buf, size_t *size, size_t *offset,
+				    struct hl_device *hdev,
+				    struct hl_mon_state_dump *mon);
+	int (*monitor_valid)(struct hl_mon_state_dump *mon);
+	int (*print_fences_single_engine)(struct hl_device *hdev,
+					u64 base_offset,
+					u64 status_base_offset,
+					enum hl_sync_engine_type engine_type,
+					u32 engine_id, char **buf,
+					size_t *size, size_t *offset);
+};
+
+/**
+ * struct hl_state_dump_specs - defines ASIC known hw objects names
+ * @so_id_to_str_tb: sync objects names index table
+ * @monitor_id_to_str_tb: monitors names index table
+ * @funcs: virtual functions used for state dump
+ * @sync_namager_names: readable names for sync manager if available (ex: N_E)
+ * @props: pointer to a per asic const props array required for state dump
+ */
+struct hl_state_dump_specs {
+	DECLARE_HASHTABLE(so_id_to_str_tb, OBJ_NAMES_HASH_TABLE_BITS);
+	DECLARE_HASHTABLE(monitor_id_to_str_tb, OBJ_NAMES_HASH_TABLE_BITS);
+	struct hl_state_dump_specs_funcs	funcs;
+	const char * const			*sync_namager_names;
+	s64					*props;
+};
+
 
 /*
  * DEVICES
@@ -1798,7 +1995,7 @@ struct hl_dbg_device_entry {
 
 #define HL_STR_MAX	32
 
-#define HL_DEV_STS_MAX (HL_DEVICE_STATUS_NEEDS_RESET + 1)
+#define HL_DEV_STS_MAX (HL_DEVICE_STATUS_LAST + 1)
 
 /* Theoretical limit only. A single host can only contain up to 4 or 8 PCIe
  * x16 cards. In extreme cases, there are hosts that can accommodate 16 cards.
@@ -1946,11 +2143,13 @@ struct hwmon_chip_info;
  * @wq: work queue for device reset procedure.
  * @reset_work: reset work to be done.
  * @hdev: habanalabs device structure.
+ * @fw_reset: whether f/w will do the reset without us sending them a message to do it.
  */
 struct hl_device_reset_work {
 	struct workqueue_struct		*wq;
 	struct delayed_work		reset_work;
 	struct hl_device		*hdev;
+	bool				fw_reset;
 };
 
 /**
@@ -2065,6 +2264,58 @@ struct hl_mmu_funcs {
 };
 
 /**
+ * number of user contexts allowed to call wait_for_multi_cs ioctl in
+ * parallel
+ */
+#define MULTI_CS_MAX_USER_CTX	2
+
+/**
+ * struct multi_cs_completion - multi CS wait completion.
+ * @completion: completion of any of the CS in the list
+ * @lock: spinlock for the completion structure
+ * @timestamp: timestamp for the multi-CS completion
+ * @stream_master_qid_map: bitmap of all stream masters on which the multi-CS
+ *                        is waiting
+ * @used: 1 if in use, otherwise 0
+ */
+struct multi_cs_completion {
+	struct completion	completion;
+	spinlock_t		lock;
+	s64			timestamp;
+	u32			stream_master_qid_map;
+	u8			used;
+};
+
+/**
+ * struct multi_cs_data - internal data for multi CS call
+ * @ctx: pointer to the context structure
+ * @fence_arr: array of fences of all CSs
+ * @seq_arr: array of CS sequence numbers
+ * @timeout_us: timeout in usec for waiting for CS to complete
+ * @timestamp: timestamp of first completed CS
+ * @wait_status: wait for CS status
+ * @completion_bitmap: bitmap of completed CSs (1- completed, otherwise 0)
+ * @stream_master_qid_map: bitmap of all stream master QIDs on which the
+ *                         multi-CS is waiting
+ * @arr_len: fence_arr and seq_arr array length
+ * @gone_cs: indication of gone CS (1- there was gone CS, otherwise 0)
+ * @update_ts: update timestamp. 1- update the timestamp, otherwise 0.
+ */
+struct multi_cs_data {
+	struct hl_ctx	*ctx;
+	struct hl_fence	**fence_arr;
+	u64		*seq_arr;
+	s64		timeout_us;
+	s64		timestamp;
+	long		wait_status;
+	u32		completion_bitmap;
+	u32		stream_master_qid_map;
+	u8		arr_len;
+	u8		gone_cs;
+	u8		update_ts;
+};
+
+/**
  * struct hl_device - habanalabs device structure.
  * @pdev: pointer to PCI device, can be NULL in case of simulator device.
  * @pcie_bar_phys: array of available PCIe bars physical addresses.
@@ -2129,6 +2380,8 @@ struct hl_mmu_funcs {
  * @mmu_func: device-related MMU functions.
  * @fw_loader: FW loader manager.
  * @pci_mem_region: array of memory regions in the PCI
+ * @state_dump_specs: constants and dictionaries needed to dump system state.
+ * @multi_cs_completion: array of multi-CS completion.
  * @dram_used_mem: current DRAM memory consumption.
  * @timeout_jiffies: device CS timeout value.
  * @max_power: the max power of the device, as configured by the sysadmin. This
@@ -2205,6 +2458,7 @@ struct hl_mmu_funcs {
  *                        halted. We can't halt it again because the COMMS
  *                        protocol will throw an error. Relevant only for
  *                        cases where Linux was not loaded to device CPU
+ * @supports_wait_for_multi_cs: true if wait for multi CS is supported
  */
 struct hl_device {
 	struct pci_dev			*pdev;
@@ -2273,6 +2527,11 @@ struct hl_device {
 
 	struct pci_mem_region		pci_mem_region[PCI_REGION_NUMBER];
 
+	struct hl_state_dump_specs	state_dump_specs;
+
+	struct multi_cs_completion	multi_cs_completion[
+							MULTI_CS_MAX_USER_CTX];
+	u32				*stream_master_qid_arr;
 	atomic64_t			dram_used_mem;
 	u64				timeout_jiffies;
 	u64				max_power;
@@ -2322,6 +2581,8 @@ struct hl_device {
 	u8				curr_reset_cause;
 	u8				skip_reset_on_timeout;
 	u8				device_cpu_is_halted;
+	u8				supports_wait_for_multi_cs;
+	u8				stream_master_qid_arr_size;
 
 	/* Parameters for bring-up */
 	u64				nic_ports_mask;
@@ -2343,6 +2604,29 @@ struct hl_device {
 };
 
 
+/**
+ * struct hl_cs_encaps_sig_handle - encapsulated signals handle structure
+ * @refcount: refcount used to protect removing this id when several
+ *            wait cs are used to wait of the reserved encaps signals.
+ * @hdev: pointer to habanalabs device structure.
+ * @hw_sob: pointer to  H/W SOB used in the reservation.
+ * @cs_seq: staged cs sequence which contains encapsulated signals
+ * @id: idr handler id to be used to fetch the handler info
+ * @q_idx: stream queue index
+ * @pre_sob_val: current SOB value before reservation
+ * @count: signals number
+ */
+struct hl_cs_encaps_sig_handle {
+	struct kref refcount;
+	struct hl_device *hdev;
+	struct hl_hw_sob *hw_sob;
+	u64  cs_seq;
+	u32  id;
+	u32  q_idx;
+	u32  pre_sob_val;
+	u32  count;
+};
+
 /*
  * IOCTLs
  */
@@ -2373,6 +2657,23 @@ struct hl_ioctl_desc {
  */
 
 /**
+ * hl_get_sg_info() - get number of pages and the DMA address from SG list.
+ * @sg: the SG list.
+ * @dma_addr: pointer to DMA address to return.
+ *
+ * Calculate the number of consecutive pages described by the SG list. Take the
+ * offset of the address in the first page, add to it the length and round it up
+ * to the number of needed pages.
+ */
+static inline u32 hl_get_sg_info(struct scatterlist *sg, dma_addr_t *dma_addr)
+{
+	*dma_addr = sg_dma_address(sg);
+
+	return ((((*dma_addr) & (PAGE_SIZE - 1)) + sg_dma_len(sg)) +
+			(PAGE_SIZE - 1)) >> PAGE_SHIFT;
+}
+
+/**
  * hl_mem_area_inside_range() - Checks whether address+size are inside a range.
  * @address: The start address of the area we want to validate.
  * @size: The size in bytes of the area we want to validate.
@@ -2436,7 +2737,9 @@ void destroy_hdev(struct hl_device *hdev);
 int hl_hw_queues_create(struct hl_device *hdev);
 void hl_hw_queues_destroy(struct hl_device *hdev);
 int hl_hw_queue_send_cb_no_cmpl(struct hl_device *hdev, u32 hw_queue_id,
-				u32 cb_size, u64 cb_ptr);
+		u32 cb_size, u64 cb_ptr);
+void hl_hw_queue_submit_bd(struct hl_device *hdev, struct hl_hw_queue *q,
+		u32 ctl, u32 len, u64 ptr);
 int hl_hw_queue_schedule_cs(struct hl_cs *cs);
 u32 hl_hw_queue_add_ptr(u32 ptr, u16 val);
 void hl_hw_queue_inc_ci_kernel(struct hl_device *hdev, u32 hw_queue_id);
@@ -2470,6 +2773,8 @@ void hl_ctx_do_release(struct kref *ref);
 void hl_ctx_get(struct hl_device *hdev,	struct hl_ctx *ctx);
 int hl_ctx_put(struct hl_ctx *ctx);
 struct hl_fence *hl_ctx_get_fence(struct hl_ctx *ctx, u64 seq);
+int hl_ctx_get_fences(struct hl_ctx *ctx, u64 *seq_arr,
+				struct hl_fence **fence, u32 arr_len);
 void hl_ctx_mgr_init(struct hl_ctx_mgr *mgr);
 void hl_ctx_mgr_fini(struct hl_device *hdev, struct hl_ctx_mgr *mgr);
 
@@ -2511,18 +2816,19 @@ int hl_cb_va_pool_init(struct hl_ctx *ctx);
 void hl_cb_va_pool_fini(struct hl_ctx *ctx);
 
 void hl_cs_rollback_all(struct hl_device *hdev);
-void hl_pending_cb_list_flush(struct hl_ctx *ctx);
 struct hl_cs_job *hl_cs_allocate_job(struct hl_device *hdev,
 		enum hl_queue_type queue_type, bool is_kernel_allocated_cb);
 void hl_sob_reset_error(struct kref *ref);
 int hl_gen_sob_mask(u16 sob_base, u8 sob_mask, u8 *mask);
 void hl_fence_put(struct hl_fence *fence);
+void hl_fences_put(struct hl_fence **fence, int len);
 void hl_fence_get(struct hl_fence *fence);
 void cs_get(struct hl_cs *cs);
 bool cs_needs_completion(struct hl_cs *cs);
 bool cs_needs_timeout(struct hl_cs *cs);
 bool is_staged_cs_last_exists(struct hl_device *hdev, struct hl_cs *cs);
 struct hl_cs *hl_staged_cs_find_first(struct hl_device *hdev, u64 cs_seq);
+void hl_multi_cs_completion_init(struct hl_device *hdev);
 
 void goya_set_asic_funcs(struct hl_device *hdev);
 void gaudi_set_asic_funcs(struct hl_device *hdev);
@@ -2650,9 +2956,25 @@ int hl_set_voltage(struct hl_device *hdev,
 			int sensor_index, u32 attr, long value);
 int hl_set_current(struct hl_device *hdev,
 			int sensor_index, u32 attr, long value);
+void hw_sob_get(struct hl_hw_sob *hw_sob);
+void hw_sob_put(struct hl_hw_sob *hw_sob);
+void hl_encaps_handle_do_release(struct kref *ref);
+void hl_hw_queue_encaps_sig_set_sob_info(struct hl_device *hdev,
+			struct hl_cs *cs, struct hl_cs_job *job,
+			struct hl_cs_compl *cs_cmpl);
 void hl_release_pending_user_interrupts(struct hl_device *hdev);
 int hl_cs_signal_sob_wraparound_handler(struct hl_device *hdev, u32 q_idx,
-			struct hl_hw_sob **hw_sob, u32 count);
+			struct hl_hw_sob **hw_sob, u32 count, bool encaps_sig);
+
+int hl_state_dump(struct hl_device *hdev);
+const char *hl_state_dump_get_sync_name(struct hl_device *hdev, u32 sync_id);
+const char *hl_state_dump_get_monitor_name(struct hl_device *hdev,
+					struct hl_mon_state_dump *mon);
+void hl_state_dump_free_sync_to_engine_map(struct hl_sync_to_engine_map *map);
+__printf(4, 5) int hl_snprintf_resize(char **buf, size_t *size, size_t *offset,
+					const char *format, ...);
+char *hl_format_as_binary(char *buf, size_t buf_len, u32 n);
+const char *hl_sync_engine_to_string(enum hl_sync_engine_type engine_type);
 
 #ifdef CONFIG_DEBUG_FS
 
@@ -2673,6 +2995,8 @@ void hl_debugfs_remove_userptr(struct hl_device *hdev,
 				struct hl_userptr *userptr);
 void hl_debugfs_add_ctx_mem_hash(struct hl_device *hdev, struct hl_ctx *ctx);
 void hl_debugfs_remove_ctx_mem_hash(struct hl_device *hdev, struct hl_ctx *ctx);
+void hl_debugfs_set_state_dump(struct hl_device *hdev, char *data,
+					unsigned long length);
 
 #else
 
@@ -2746,6 +3070,11 @@ static inline void hl_debugfs_remove_ctx_mem_hash(struct hl_device *hdev,
 {
 }
 
+static inline void hl_debugfs_set_state_dump(struct hl_device *hdev,
+					char *data, unsigned long length)
+{
+}
+
 #endif
 
 /* IOCTLs */
diff --git a/drivers/misc/habanalabs/common/habanalabs_drv.c b/drivers/misc/habanalabs/common/habanalabs_drv.c
index 4194cda2d04c..a75e4fceb9d8 100644
--- a/drivers/misc/habanalabs/common/habanalabs_drv.c
+++ b/drivers/misc/habanalabs/common/habanalabs_drv.c
@@ -141,7 +141,7 @@ int hl_device_open(struct inode *inode, struct file *filp)
 	hl_cb_mgr_init(&hpriv->cb_mgr);
 	hl_ctx_mgr_init(&hpriv->ctx_mgr);
 
-	hpriv->taskpid = find_get_pid(current->pid);
+	hpriv->taskpid = get_task_pid(current, PIDTYPE_PID);
 
 	mutex_lock(&hdev->fpriv_list_lock);
 
@@ -194,7 +194,6 @@ int hl_device_open(struct inode *inode, struct file *filp)
 
 out_err:
 	mutex_unlock(&hdev->fpriv_list_lock);
-
 	hl_cb_mgr_fini(hpriv->hdev, &hpriv->cb_mgr);
 	hl_ctx_mgr_fini(hpriv->hdev, &hpriv->ctx_mgr);
 	filp->private_data = NULL;
@@ -318,12 +317,16 @@ int create_hdev(struct hl_device **dev, struct pci_dev *pdev,
 		hdev->asic_prop.fw_security_enabled = false;
 
 	/* Assign status description string */
-	strncpy(hdev->status[HL_DEVICE_STATUS_MALFUNCTION],
-					"disabled", HL_STR_MAX);
+	strncpy(hdev->status[HL_DEVICE_STATUS_OPERATIONAL],
+					"operational", HL_STR_MAX);
 	strncpy(hdev->status[HL_DEVICE_STATUS_IN_RESET],
 					"in reset", HL_STR_MAX);
+	strncpy(hdev->status[HL_DEVICE_STATUS_MALFUNCTION],
+					"disabled", HL_STR_MAX);
 	strncpy(hdev->status[HL_DEVICE_STATUS_NEEDS_RESET],
 					"needs reset", HL_STR_MAX);
+	strncpy(hdev->status[HL_DEVICE_STATUS_IN_DEVICE_CREATION],
+					"in device creation", HL_STR_MAX);
 
 	hdev->major = hl_major;
 	hdev->reset_on_lockup = reset_on_lockup;
@@ -532,7 +535,7 @@ hl_pci_err_detected(struct pci_dev *pdev, pci_channel_state_t state)
 		result = PCI_ERS_RESULT_NONE;
 	}
 
-	hdev->asic_funcs->halt_engines(hdev, true);
+	hdev->asic_funcs->halt_engines(hdev, true, false);
 
 	return result;
 }
diff --git a/drivers/misc/habanalabs/common/habanalabs_ioctl.c b/drivers/misc/habanalabs/common/habanalabs_ioctl.c
index f4dda7b4acdd..86c3257d9ae1 100644
--- a/drivers/misc/habanalabs/common/habanalabs_ioctl.c
+++ b/drivers/misc/habanalabs/common/habanalabs_ioctl.c
@@ -94,6 +94,8 @@ static int hw_ip_info(struct hl_device *hdev, struct hl_info_args *args)
 
 	hw_ip.first_available_interrupt_id =
 			prop->first_available_user_msix_interrupt;
+	hw_ip.server_type = prop->server_type;
+
 	return copy_to_user(out, &hw_ip,
 		min((size_t) size, sizeof(hw_ip))) ? -EFAULT : 0;
 }
diff --git a/drivers/misc/habanalabs/common/hw_queue.c b/drivers/misc/habanalabs/common/hw_queue.c
index bcabfdbf1e01..76b7de8f1406 100644
--- a/drivers/misc/habanalabs/common/hw_queue.c
+++ b/drivers/misc/habanalabs/common/hw_queue.c
@@ -65,7 +65,7 @@ void hl_hw_queue_update_ci(struct hl_cs *cs)
 }
 
 /*
- * ext_and_hw_queue_submit_bd() - Submit a buffer descriptor to an external or a
+ * hl_hw_queue_submit_bd() - Submit a buffer descriptor to an external or a
  *                                H/W queue.
  * @hdev: pointer to habanalabs device structure
  * @q: pointer to habanalabs queue structure
@@ -80,8 +80,8 @@ void hl_hw_queue_update_ci(struct hl_cs *cs)
  * This function must be called when the scheduler mutex is taken
  *
  */
-static void ext_and_hw_queue_submit_bd(struct hl_device *hdev,
-			struct hl_hw_queue *q, u32 ctl, u32 len, u64 ptr)
+void hl_hw_queue_submit_bd(struct hl_device *hdev, struct hl_hw_queue *q,
+		u32 ctl, u32 len, u64 ptr)
 {
 	struct hl_bd *bd;
 
@@ -222,8 +222,8 @@ static int hw_queue_sanity_checks(struct hl_device *hdev, struct hl_hw_queue *q,
  * @cb_size: size of CB
  * @cb_ptr: pointer to CB location
  *
- * This function sends a single CB, that must NOT generate a completion entry
- *
+ * This function sends a single CB, that must NOT generate a completion entry.
+ * Sending CPU messages can be done instead via 'hl_hw_queue_submit_bd()'
  */
 int hl_hw_queue_send_cb_no_cmpl(struct hl_device *hdev, u32 hw_queue_id,
 				u32 cb_size, u64 cb_ptr)
@@ -231,16 +231,7 @@ int hl_hw_queue_send_cb_no_cmpl(struct hl_device *hdev, u32 hw_queue_id,
 	struct hl_hw_queue *q = &hdev->kernel_queues[hw_queue_id];
 	int rc = 0;
 
-	/*
-	 * The CPU queue is a synchronous queue with an effective depth of
-	 * a single entry (although it is allocated with room for multiple
-	 * entries). Therefore, there is a different lock, called
-	 * send_cpu_message_lock, that serializes accesses to the CPU queue.
-	 * As a result, we don't need to lock the access to the entire H/W
-	 * queues module when submitting a JOB to the CPU queue
-	 */
-	if (q->queue_type != QUEUE_TYPE_CPU)
-		hdev->asic_funcs->hw_queues_lock(hdev);
+	hdev->asic_funcs->hw_queues_lock(hdev);
 
 	if (hdev->disabled) {
 		rc = -EPERM;
@@ -258,11 +249,10 @@ int hl_hw_queue_send_cb_no_cmpl(struct hl_device *hdev, u32 hw_queue_id,
 			goto out;
 	}
 
-	ext_and_hw_queue_submit_bd(hdev, q, 0, cb_size, cb_ptr);
+	hl_hw_queue_submit_bd(hdev, q, 0, cb_size, cb_ptr);
 
 out:
-	if (q->queue_type != QUEUE_TYPE_CPU)
-		hdev->asic_funcs->hw_queues_unlock(hdev);
+	hdev->asic_funcs->hw_queues_unlock(hdev);
 
 	return rc;
 }
@@ -328,7 +318,7 @@ static void ext_queue_schedule_job(struct hl_cs_job *job)
 	cq->pi = hl_cq_inc_ptr(cq->pi);
 
 submit_bd:
-	ext_and_hw_queue_submit_bd(hdev, q, ctl, len, ptr);
+	hl_hw_queue_submit_bd(hdev, q, ctl, len, ptr);
 }
 
 /*
@@ -407,7 +397,7 @@ static void hw_queue_schedule_job(struct hl_cs_job *job)
 	else
 		ptr = (u64) (uintptr_t) job->user_cb;
 
-	ext_and_hw_queue_submit_bd(hdev, q, ctl, len, ptr);
+	hl_hw_queue_submit_bd(hdev, q, ctl, len, ptr);
 }
 
 static int init_signal_cs(struct hl_device *hdev,
@@ -426,8 +416,9 @@ static int init_signal_cs(struct hl_device *hdev,
 	cs_cmpl->sob_val = prop->next_sob_val;
 
 	dev_dbg(hdev->dev,
-		"generate signal CB, sob_id: %d, sob val: 0x%x, q_idx: %d\n",
-		cs_cmpl->hw_sob->sob_id, cs_cmpl->sob_val, q_idx);
+		"generate signal CB, sob_id: %d, sob val: %u, q_idx: %d, seq: %llu\n",
+		cs_cmpl->hw_sob->sob_id, cs_cmpl->sob_val, q_idx,
+		cs_cmpl->cs_seq);
 
 	/* we set an EB since we must make sure all oeprations are done
 	 * when sending the signal
@@ -435,17 +426,37 @@ static int init_signal_cs(struct hl_device *hdev,
 	hdev->asic_funcs->gen_signal_cb(hdev, job->patched_cb,
 				cs_cmpl->hw_sob->sob_id, 0, true);
 
-	rc = hl_cs_signal_sob_wraparound_handler(hdev, q_idx, &hw_sob, 1);
+	rc = hl_cs_signal_sob_wraparound_handler(hdev, q_idx, &hw_sob, 1,
+								false);
 
 	return rc;
 }
 
-static void init_wait_cs(struct hl_device *hdev, struct hl_cs *cs,
+void hl_hw_queue_encaps_sig_set_sob_info(struct hl_device *hdev,
+			struct hl_cs *cs, struct hl_cs_job *job,
+			struct hl_cs_compl *cs_cmpl)
+{
+	struct hl_cs_encaps_sig_handle *handle = cs->encaps_sig_hdl;
+
+	cs_cmpl->hw_sob = handle->hw_sob;
+
+	/* Note that encaps_sig_wait_offset was validated earlier in the flow
+	 * for offset value which exceeds the max reserved signal count.
+	 * always decrement 1 of the offset since when the user
+	 * set offset 1 for example he mean to wait only for the first
+	 * signal only, which will be pre_sob_val, and if he set offset 2
+	 * then the value required is (pre_sob_val + 1) and so on...
+	 */
+	cs_cmpl->sob_val = handle->pre_sob_val +
+			(job->encaps_sig_wait_offset - 1);
+}
+
+static int init_wait_cs(struct hl_device *hdev, struct hl_cs *cs,
 		struct hl_cs_job *job, struct hl_cs_compl *cs_cmpl)
 {
-	struct hl_cs_compl *signal_cs_cmpl;
-	struct hl_sync_stream_properties *prop;
 	struct hl_gen_wait_properties wait_prop;
+	struct hl_sync_stream_properties *prop;
+	struct hl_cs_compl *signal_cs_cmpl;
 	u32 q_idx;
 
 	q_idx = job->hw_queue_id;
@@ -455,14 +466,51 @@ static void init_wait_cs(struct hl_device *hdev, struct hl_cs *cs,
 					struct hl_cs_compl,
 					base_fence);
 
-	/* copy the SOB id and value of the signal CS */
-	cs_cmpl->hw_sob = signal_cs_cmpl->hw_sob;
-	cs_cmpl->sob_val = signal_cs_cmpl->sob_val;
+	if (cs->encaps_signals) {
+		/* use the encaps signal handle stored earlier in the flow
+		 * and set the SOB information from the encaps
+		 * signals handle
+		 */
+		hl_hw_queue_encaps_sig_set_sob_info(hdev, cs, job, cs_cmpl);
+
+		dev_dbg(hdev->dev, "Wait for encaps signals handle, qidx(%u), CS sequence(%llu), sob val: 0x%x, offset: %u\n",
+				cs->encaps_sig_hdl->q_idx,
+				cs->encaps_sig_hdl->cs_seq,
+				cs_cmpl->sob_val,
+				job->encaps_sig_wait_offset);
+	} else {
+		/* Copy the SOB id and value of the signal CS */
+		cs_cmpl->hw_sob = signal_cs_cmpl->hw_sob;
+		cs_cmpl->sob_val = signal_cs_cmpl->sob_val;
+	}
+
+	/* check again if the signal cs already completed.
+	 * if yes then don't send any wait cs since the hw_sob
+	 * could be in reset already. if signal is not completed
+	 * then get refcount to hw_sob to prevent resetting the sob
+	 * while wait cs is not submitted.
+	 * note that this check is protected by two locks,
+	 * hw queue lock and completion object lock,
+	 * and the same completion object lock also protects
+	 * the hw_sob reset handler function.
+	 * The hw_queue lock prevent out of sync of hw_sob
+	 * refcount value, changed by signal/wait flows.
+	 */
+	spin_lock(&signal_cs_cmpl->lock);
+
+	if (completion_done(&cs->signal_fence->completion)) {
+		spin_unlock(&signal_cs_cmpl->lock);
+		return -EINVAL;
+	}
+
+	kref_get(&cs_cmpl->hw_sob->kref);
+
+	spin_unlock(&signal_cs_cmpl->lock);
 
 	dev_dbg(hdev->dev,
-		"generate wait CB, sob_id: %d, sob_val: 0x%x, mon_id: %d, q_idx: %d\n",
+		"generate wait CB, sob_id: %d, sob_val: 0x%x, mon_id: %d, q_idx: %d, seq: %llu\n",
 		cs_cmpl->hw_sob->sob_id, cs_cmpl->sob_val,
-		prop->base_mon_id, q_idx);
+		prop->base_mon_id, q_idx, cs->sequence);
 
 	wait_prop.data = (void *) job->patched_cb;
 	wait_prop.sob_base = cs_cmpl->hw_sob->sob_id;
@@ -471,17 +519,14 @@ static void init_wait_cs(struct hl_device *hdev, struct hl_cs *cs,
 	wait_prop.mon_id = prop->base_mon_id;
 	wait_prop.q_idx = q_idx;
 	wait_prop.size = 0;
+
 	hdev->asic_funcs->gen_wait_cb(hdev, &wait_prop);
 
-	kref_get(&cs_cmpl->hw_sob->kref);
-	/*
-	 * Must put the signal fence after the SOB refcnt increment so
-	 * the SOB refcnt won't turn 0 and reset the SOB before the
-	 * wait CS was submitted.
-	 */
 	mb();
 	hl_fence_put(cs->signal_fence);
 	cs->signal_fence = NULL;
+
+	return 0;
 }
 
 /*
@@ -506,7 +551,60 @@ static int init_signal_wait_cs(struct hl_cs *cs)
 	if (cs->type & CS_TYPE_SIGNAL)
 		rc = init_signal_cs(hdev, job, cs_cmpl);
 	else if (cs->type & CS_TYPE_WAIT)
-		init_wait_cs(hdev, cs, job, cs_cmpl);
+		rc = init_wait_cs(hdev, cs, job, cs_cmpl);
+
+	return rc;
+}
+
+static int encaps_sig_first_staged_cs_handler
+			(struct hl_device *hdev, struct hl_cs *cs)
+{
+	struct hl_cs_compl *cs_cmpl =
+			container_of(cs->fence,
+					struct hl_cs_compl, base_fence);
+	struct hl_cs_encaps_sig_handle *encaps_sig_hdl;
+	struct hl_encaps_signals_mgr *mgr;
+	int rc = 0;
+
+	mgr = &hdev->compute_ctx->sig_mgr;
+
+	spin_lock(&mgr->lock);
+	encaps_sig_hdl = idr_find(&mgr->handles, cs->encaps_sig_hdl_id);
+	if (encaps_sig_hdl) {
+		/*
+		 * Set handler CS sequence,
+		 * the CS which contains the encapsulated signals.
+		 */
+		encaps_sig_hdl->cs_seq = cs->sequence;
+		/* store the handle and set encaps signal indication,
+		 * to be used later in cs_do_release to put the last
+		 * reference to encaps signals handlers.
+		 */
+		cs_cmpl->encaps_signals = true;
+		cs_cmpl->encaps_sig_hdl = encaps_sig_hdl;
+
+		/* set hw_sob pointer in completion object
+		 * since it's used in cs_do_release flow to put
+		 * refcount to sob
+		 */
+		cs_cmpl->hw_sob = encaps_sig_hdl->hw_sob;
+		cs_cmpl->sob_val = encaps_sig_hdl->pre_sob_val +
+						encaps_sig_hdl->count;
+
+		dev_dbg(hdev->dev, "CS seq (%llu) added to encaps signal handler id (%u), count(%u), qidx(%u), sob(%u), val(%u)\n",
+				cs->sequence, encaps_sig_hdl->id,
+				encaps_sig_hdl->count,
+				encaps_sig_hdl->q_idx,
+				cs_cmpl->hw_sob->sob_id,
+				cs_cmpl->sob_val);
+
+	} else {
+		dev_err(hdev->dev, "encaps handle id(%u) wasn't found!\n",
+				cs->encaps_sig_hdl_id);
+		rc = -EINVAL;
+	}
+
+	spin_unlock(&mgr->lock);
 
 	return rc;
 }
@@ -581,14 +679,21 @@ int hl_hw_queue_schedule_cs(struct hl_cs *cs)
 
 	if ((cs->type == CS_TYPE_SIGNAL) || (cs->type == CS_TYPE_WAIT)) {
 		rc = init_signal_wait_cs(cs);
-		if (rc) {
-			dev_err(hdev->dev, "Failed to submit signal cs\n");
+		if (rc)
 			goto unroll_cq_resv;
-		}
-	} else if (cs->type == CS_TYPE_COLLECTIVE_WAIT)
-		hdev->asic_funcs->collective_wait_init_cs(cs);
+	} else if (cs->type == CS_TYPE_COLLECTIVE_WAIT) {
+		rc = hdev->asic_funcs->collective_wait_init_cs(cs);
+		if (rc)
+			goto unroll_cq_resv;
+	}
 
 
+	if (cs->encaps_signals && cs->staged_first) {
+		rc = encaps_sig_first_staged_cs_handler(hdev, cs);
+		if (rc)
+			goto unroll_cq_resv;
+	}
+
 	spin_lock(&hdev->cs_mirror_lock);
 
 	/* Verify staged CS exists and add to the staged list */
@@ -613,6 +718,11 @@ int hl_hw_queue_schedule_cs(struct hl_cs *cs)
 		}
 
 		list_add_tail(&cs->staged_cs_node, &staged_cs->staged_cs_node);
+
+		/* update stream map of the first CS */
+		if (hdev->supports_wait_for_multi_cs)
+			staged_cs->fence->stream_master_qid_map |=
+					cs->fence->stream_master_qid_map;
 	}
 
 	list_add_tail(&cs->mirror_node, &hdev->cs_mirror_list);
@@ -834,6 +944,8 @@ static void sync_stream_queue_init(struct hl_device *hdev, u32 q_idx)
 		hw_sob = &sync_stream_prop->hw_sob[sob];
 		hw_sob->hdev = hdev;
 		hw_sob->sob_id = sync_stream_prop->base_sob_id + sob;
+		hw_sob->sob_addr =
+			hdev->asic_funcs->get_sob_addr(hdev, hw_sob->sob_id);
 		hw_sob->q_idx = q_idx;
 		kref_init(&hw_sob->kref);
 	}
diff --git a/drivers/misc/habanalabs/common/memory.c b/drivers/misc/habanalabs/common/memory.c
index af339ce1ab4f..33986933aa9e 100644
--- a/drivers/misc/habanalabs/common/memory.c
+++ b/drivers/misc/habanalabs/common/memory.c
@@ -124,7 +124,7 @@ static int alloc_device_memory(struct hl_ctx *ctx, struct hl_mem_in *args,
 
 	spin_lock(&vm->idr_lock);
 	handle = idr_alloc(&vm->phys_pg_pack_handles, phys_pg_pack, 1, 0,
-				GFP_KERNEL);
+				GFP_ATOMIC);
 	spin_unlock(&vm->idr_lock);
 
 	if (handle < 0) {
@@ -529,6 +529,33 @@ static inline int add_va_block(struct hl_device *hdev,
 }
 
 /**
+ * is_hint_crossing_range() - check if hint address crossing specified reserved
+ * range.
+ */
+static inline bool is_hint_crossing_range(enum hl_va_range_type range_type,
+		u64 start_addr, u32 size, struct asic_fixed_properties *prop) {
+	bool range_cross;
+
+	if (range_type == HL_VA_RANGE_TYPE_DRAM)
+		range_cross =
+			hl_mem_area_crosses_range(start_addr, size,
+			prop->hints_dram_reserved_va_range.start_addr,
+			prop->hints_dram_reserved_va_range.end_addr);
+	else if (range_type == HL_VA_RANGE_TYPE_HOST)
+		range_cross =
+			hl_mem_area_crosses_range(start_addr,	size,
+			prop->hints_host_reserved_va_range.start_addr,
+			prop->hints_host_reserved_va_range.end_addr);
+	else
+		range_cross =
+			hl_mem_area_crosses_range(start_addr, size,
+			prop->hints_host_hpage_reserved_va_range.start_addr,
+			prop->hints_host_hpage_reserved_va_range.end_addr);
+
+	return range_cross;
+}
+
+/**
  * get_va_block() - get a virtual block for the given size and alignment.
  *
  * @hdev: pointer to the habanalabs device structure.
@@ -536,6 +563,8 @@ static inline int add_va_block(struct hl_device *hdev,
  * @size: requested block size.
  * @hint_addr: hint for requested address by the user.
  * @va_block_align: required alignment of the virtual block start address.
+ * @range_type: va range type (host, dram)
+ * @flags: additional memory flags, currently only uses HL_MEM_FORCE_HINT
  *
  * This function does the following:
  * - Iterate on the virtual block list to find a suitable virtual block for the
@@ -545,13 +574,19 @@ static inline int add_va_block(struct hl_device *hdev,
  */
 static u64 get_va_block(struct hl_device *hdev,
 				struct hl_va_range *va_range,
-				u64 size, u64 hint_addr, u32 va_block_align)
+				u64 size, u64 hint_addr, u32 va_block_align,
+				enum hl_va_range_type range_type,
+				u32 flags)
 {
 	struct hl_vm_va_block *va_block, *new_va_block = NULL;
+	struct asic_fixed_properties *prop = &hdev->asic_prop;
 	u64 tmp_hint_addr, valid_start, valid_size, prev_start, prev_end,
-		align_mask, reserved_valid_start = 0, reserved_valid_size = 0;
+		align_mask, reserved_valid_start = 0, reserved_valid_size = 0,
+		dram_hint_mask = prop->dram_hints_align_mask;
 	bool add_prev = false;
 	bool is_align_pow_2  = is_power_of_2(va_range->page_size);
+	bool is_hint_dram_addr = hl_is_dram_va(hdev, hint_addr);
+	bool force_hint = flags & HL_MEM_FORCE_HINT;
 
 	if (is_align_pow_2)
 		align_mask = ~((u64)va_block_align - 1);
@@ -564,12 +599,20 @@ static u64 get_va_block(struct hl_device *hdev,
 		size = DIV_ROUND_UP_ULL(size, va_range->page_size) *
 							va_range->page_size;
 
-	tmp_hint_addr = hint_addr;
+	tmp_hint_addr = hint_addr & ~dram_hint_mask;
 
 	/* Check if we need to ignore hint address */
 	if ((is_align_pow_2 && (hint_addr & (va_block_align - 1))) ||
-			(!is_align_pow_2 &&
-				do_div(tmp_hint_addr, va_range->page_size))) {
+		(!is_align_pow_2 && is_hint_dram_addr &&
+			do_div(tmp_hint_addr, va_range->page_size))) {
+
+		if (force_hint) {
+			/* Hint must be respected, so here we just fail */
+			dev_err(hdev->dev,
+				"Hint address 0x%llx is not page aligned - cannot be respected\n",
+				hint_addr);
+			return 0;
+		}
 
 		dev_dbg(hdev->dev,
 			"Hint address 0x%llx will be ignored because it is not aligned\n",
@@ -596,6 +639,16 @@ static u64 get_va_block(struct hl_device *hdev,
 		if (valid_size < size)
 			continue;
 
+		/*
+		 * In case hint address is 0, and arc_hints_range_reservation
+		 * property enabled, then avoid allocating va blocks from the
+		 * range reserved for hint addresses
+		 */
+		if (prop->hints_range_reservation && !hint_addr)
+			if (is_hint_crossing_range(range_type, valid_start,
+					size, prop))
+				continue;
+
 		/* Pick the minimal length block which has the required size */
 		if (!new_va_block || (valid_size < reserved_valid_size)) {
 			new_va_block = va_block;
@@ -618,6 +671,17 @@ static u64 get_va_block(struct hl_device *hdev,
 		goto out;
 	}
 
+	if (force_hint && reserved_valid_start != hint_addr) {
+		/* Hint address must be respected. If we are here - this means
+		 * we could not respect it.
+		 */
+		dev_err(hdev->dev,
+			"Hint address 0x%llx could not be respected\n",
+			hint_addr);
+		reserved_valid_start = 0;
+		goto out;
+	}
+
 	/*
 	 * Check if there is some leftover range due to reserving the new
 	 * va block, then return it to the main virtual addresses list.
@@ -670,7 +734,8 @@ u64 hl_reserve_va_block(struct hl_device *hdev, struct hl_ctx *ctx,
 		enum hl_va_range_type type, u32 size, u32 alignment)
 {
 	return get_va_block(hdev, ctx->va_range[type], size, 0,
-			max(alignment, ctx->va_range[type]->page_size));
+			max(alignment, ctx->va_range[type]->page_size),
+			type, 0);
 }
 
 /**
@@ -732,28 +797,15 @@ int hl_unreserve_va_block(struct hl_device *hdev, struct hl_ctx *ctx,
 }
 
 /**
- * get_sg_info() - get number of pages and the DMA address from SG list.
- * @sg: the SG list.
- * @dma_addr: pointer to DMA address to return.
- *
- * Calculate the number of consecutive pages described by the SG list. Take the
- * offset of the address in the first page, add to it the length and round it up
- * to the number of needed pages.
- */
-static u32 get_sg_info(struct scatterlist *sg, dma_addr_t *dma_addr)
-{
-	*dma_addr = sg_dma_address(sg);
-
-	return ((((*dma_addr) & (PAGE_SIZE - 1)) + sg_dma_len(sg)) +
-			(PAGE_SIZE - 1)) >> PAGE_SHIFT;
-}
-
-/**
  * init_phys_pg_pack_from_userptr() - initialize physical page pack from host
  *                                    memory
  * @ctx: pointer to the context structure.
  * @userptr: userptr to initialize from.
  * @pphys_pg_pack: result pointer.
+ * @force_regular_page: tell the function to ignore huge page optimization,
+ *                      even if possible. Needed for cases where the device VA
+ *                      is allocated before we know the composition of the
+ *                      physical pages
  *
  * This function does the following:
  * - Pin the physical pages related to the given virtual block.
@@ -762,17 +814,18 @@ static u32 get_sg_info(struct scatterlist *sg, dma_addr_t *dma_addr)
  */
 static int init_phys_pg_pack_from_userptr(struct hl_ctx *ctx,
 				struct hl_userptr *userptr,
-				struct hl_vm_phys_pg_pack **pphys_pg_pack)
+				struct hl_vm_phys_pg_pack **pphys_pg_pack,
+				bool force_regular_page)
 {
+	u32 npages, page_size = PAGE_SIZE,
+		huge_page_size = ctx->hdev->asic_prop.pmmu_huge.page_size;
+	u32 pgs_in_huge_page = huge_page_size >> __ffs(page_size);
 	struct hl_vm_phys_pg_pack *phys_pg_pack;
+	bool first = true, is_huge_page_opt;
+	u64 page_mask, total_npages;
 	struct scatterlist *sg;
 	dma_addr_t dma_addr;
-	u64 page_mask, total_npages;
-	u32 npages, page_size = PAGE_SIZE,
-		huge_page_size = ctx->hdev->asic_prop.pmmu_huge.page_size;
-	bool first = true, is_huge_page_opt = true;
 	int rc, i, j;
-	u32 pgs_in_huge_page = huge_page_size >> __ffs(page_size);
 
 	phys_pg_pack = kzalloc(sizeof(*phys_pg_pack), GFP_KERNEL);
 	if (!phys_pg_pack)
@@ -783,6 +836,8 @@ static int init_phys_pg_pack_from_userptr(struct hl_ctx *ctx,
 	phys_pg_pack->asid = ctx->asid;
 	atomic_set(&phys_pg_pack->mapping_cnt, 1);
 
+	is_huge_page_opt = (force_regular_page ? false : true);
+
 	/* Only if all dma_addrs are aligned to 2MB and their
 	 * sizes is at least 2MB, we can use huge page mapping.
 	 * We limit the 2MB optimization to this condition,
@@ -791,7 +846,7 @@ static int init_phys_pg_pack_from_userptr(struct hl_ctx *ctx,
 	 */
 	total_npages = 0;
 	for_each_sg(userptr->sgt->sgl, sg, userptr->sgt->nents, i) {
-		npages = get_sg_info(sg, &dma_addr);
+		npages = hl_get_sg_info(sg, &dma_addr);
 
 		total_npages += npages;
 
@@ -820,7 +875,7 @@ static int init_phys_pg_pack_from_userptr(struct hl_ctx *ctx,
 
 	j = 0;
 	for_each_sg(userptr->sgt->sgl, sg, userptr->sgt->nents, i) {
-		npages = get_sg_info(sg, &dma_addr);
+		npages = hl_get_sg_info(sg, &dma_addr);
 
 		/* align down to physical page size and save the offset */
 		if (first) {
@@ -1001,11 +1056,12 @@ static int map_device_va(struct hl_ctx *ctx, struct hl_mem_in *args,
 	struct hl_userptr *userptr = NULL;
 	struct hl_vm_hash_node *hnode;
 	struct hl_va_range *va_range;
-	enum vm_type_t *vm_type;
+	enum vm_type *vm_type;
 	u64 ret_vaddr, hint_addr;
 	u32 handle = 0, va_block_align;
 	int rc;
 	bool is_userptr = args->flags & HL_MEM_USERPTR;
+	enum hl_va_range_type va_range_type = 0;
 
 	/* Assume failure */
 	*device_addr = 0;
@@ -1023,7 +1079,7 @@ static int map_device_va(struct hl_ctx *ctx, struct hl_mem_in *args,
 		}
 
 		rc = init_phys_pg_pack_from_userptr(ctx, userptr,
-				&phys_pg_pack);
+				&phys_pg_pack, false);
 		if (rc) {
 			dev_err(hdev->dev,
 				"unable to init page pack for vaddr 0x%llx\n",
@@ -1031,14 +1087,14 @@ static int map_device_va(struct hl_ctx *ctx, struct hl_mem_in *args,
 			goto init_page_pack_err;
 		}
 
-		vm_type = (enum vm_type_t *) userptr;
+		vm_type = (enum vm_type *) userptr;
 		hint_addr = args->map_host.hint_addr;
 		handle = phys_pg_pack->handle;
 
 		/* get required alignment */
 		if (phys_pg_pack->page_size == page_size) {
 			va_range = ctx->va_range[HL_VA_RANGE_TYPE_HOST];
-
+			va_range_type = HL_VA_RANGE_TYPE_HOST;
 			/*
 			 * huge page alignment may be needed in case of regular
 			 * page mapping, depending on the host VA alignment
@@ -1053,6 +1109,7 @@ static int map_device_va(struct hl_ctx *ctx, struct hl_mem_in *args,
 			 * mapping
 			 */
 			va_range = ctx->va_range[HL_VA_RANGE_TYPE_HOST_HUGE];
+			va_range_type = HL_VA_RANGE_TYPE_HOST_HUGE;
 			va_block_align = huge_page_size;
 		}
 	} else {
@@ -1072,12 +1129,13 @@ static int map_device_va(struct hl_ctx *ctx, struct hl_mem_in *args,
 
 		spin_unlock(&vm->idr_lock);
 
-		vm_type = (enum vm_type_t *) phys_pg_pack;
+		vm_type = (enum vm_type *) phys_pg_pack;
 
 		hint_addr = args->map_device.hint_addr;
 
 		/* DRAM VA alignment is the same as the MMU page size */
 		va_range = ctx->va_range[HL_VA_RANGE_TYPE_DRAM];
+		va_range_type = HL_VA_RANGE_TYPE_DRAM;
 		va_block_align = hdev->asic_prop.dmmu.page_size;
 	}
 
@@ -1100,8 +1158,23 @@ static int map_device_va(struct hl_ctx *ctx, struct hl_mem_in *args,
 		goto hnode_err;
 	}
 
+	if (hint_addr && phys_pg_pack->offset) {
+		if (args->flags & HL_MEM_FORCE_HINT) {
+			/* Fail if hint must be respected but it can't be */
+			dev_err(hdev->dev,
+				"Hint address 0x%llx cannot be respected because source memory is not aligned 0x%x\n",
+				hint_addr, phys_pg_pack->offset);
+			rc = -EINVAL;
+			goto va_block_err;
+		}
+		dev_dbg(hdev->dev,
+			"Hint address 0x%llx will be ignored because source memory is not aligned 0x%x\n",
+			hint_addr, phys_pg_pack->offset);
+	}
+
 	ret_vaddr = get_va_block(hdev, va_range, phys_pg_pack->total_size,
-					hint_addr, va_block_align);
+					hint_addr, va_block_align,
+					va_range_type, args->flags);
 	if (!ret_vaddr) {
 		dev_err(hdev->dev, "no available va block for handle %u\n",
 				handle);
@@ -1181,17 +1254,19 @@ init_page_pack_err:
 static int unmap_device_va(struct hl_ctx *ctx, struct hl_mem_in *args,
 				bool ctx_free)
 {
-	struct hl_device *hdev = ctx->hdev;
-	struct asic_fixed_properties *prop = &hdev->asic_prop;
 	struct hl_vm_phys_pg_pack *phys_pg_pack = NULL;
+	u64 vaddr = args->unmap.device_virt_addr;
 	struct hl_vm_hash_node *hnode = NULL;
+	struct asic_fixed_properties *prop;
+	struct hl_device *hdev = ctx->hdev;
 	struct hl_userptr *userptr = NULL;
 	struct hl_va_range *va_range;
-	u64 vaddr = args->unmap.device_virt_addr;
-	enum vm_type_t *vm_type;
+	enum vm_type *vm_type;
 	bool is_userptr;
 	int rc = 0;
 
+	prop = &hdev->asic_prop;
+
 	/* protect from double entrance */
 	mutex_lock(&ctx->mem_hash_lock);
 	hash_for_each_possible(ctx->mem_hash, hnode, node, (unsigned long)vaddr)
@@ -1214,8 +1289,9 @@ static int unmap_device_va(struct hl_ctx *ctx, struct hl_mem_in *args,
 	if (*vm_type == VM_TYPE_USERPTR) {
 		is_userptr = true;
 		userptr = hnode->ptr;
-		rc = init_phys_pg_pack_from_userptr(ctx, userptr,
-							&phys_pg_pack);
+
+		rc = init_phys_pg_pack_from_userptr(ctx, userptr, &phys_pg_pack,
+							false);
 		if (rc) {
 			dev_err(hdev->dev,
 				"unable to init page pack for vaddr 0x%llx\n",
@@ -1299,7 +1375,7 @@ static int unmap_device_va(struct hl_ctx *ctx, struct hl_mem_in *args,
 	kfree(hnode);
 
 	if (is_userptr) {
-		rc = free_phys_pg_pack(hdev, phys_pg_pack);
+		free_phys_pg_pack(hdev, phys_pg_pack);
 		dma_unmap_host_va(hdev, userptr);
 	}
 
@@ -1669,6 +1745,7 @@ int hl_pin_host_memory(struct hl_device *hdev, u64 addr, u64 size,
 		return -EINVAL;
 	}
 
+	userptr->pid = current->pid;
 	userptr->sgt = kzalloc(sizeof(*userptr->sgt), GFP_KERNEL);
 	if (!userptr->sgt)
 		return -ENOMEM;
@@ -2033,7 +2110,7 @@ void hl_vm_ctx_fini(struct hl_ctx *ctx)
 	 * another side effect error
 	 */
 	if (!hdev->hard_reset_pending && !hash_empty(ctx->mem_hash))
-		dev_notice(hdev->dev,
+		dev_dbg(hdev->dev,
 			"user released device without removing its memory mappings\n");
 
 	hash_for_each_safe(ctx->mem_hash, i, tmp_node, hnode, node) {
diff --git a/drivers/misc/habanalabs/common/mmu/mmu_v1.c b/drivers/misc/habanalabs/common/mmu/mmu_v1.c
index c5e93ff32586..0f536f79dd9c 100644
--- a/drivers/misc/habanalabs/common/mmu/mmu_v1.c
+++ b/drivers/misc/habanalabs/common/mmu/mmu_v1.c
@@ -470,13 +470,13 @@ static void hl_mmu_v1_fini(struct hl_device *hdev)
 	if (!ZERO_OR_NULL_PTR(hdev->mmu_priv.hr.mmu_shadow_hop0)) {
 		kvfree(hdev->mmu_priv.dr.mmu_shadow_hop0);
 		gen_pool_destroy(hdev->mmu_priv.dr.mmu_pgt_pool);
-	}
 
-	/* Make sure that if we arrive here again without init was called we
-	 * won't cause kernel panic. This can happen for example if we fail
-	 * during hard reset code at certain points
-	 */
-	hdev->mmu_priv.dr.mmu_shadow_hop0 = NULL;
+		/* Make sure that if we arrive here again without init was
+		 * called we won't cause kernel panic. This can happen for
+		 * example if we fail during hard reset code at certain points
+		 */
+		hdev->mmu_priv.dr.mmu_shadow_hop0 = NULL;
+	}
 }
 
 /**
diff --git a/drivers/misc/habanalabs/common/pci/pci.c b/drivers/misc/habanalabs/common/pci/pci.c
index d5bedf5ba011..0b5366cc84fd 100644
--- a/drivers/misc/habanalabs/common/pci/pci.c
+++ b/drivers/misc/habanalabs/common/pci/pci.c
@@ -436,6 +436,8 @@ int hl_pci_init(struct hl_device *hdev)
 		goto unmap_pci_bars;
 	}
 
+	dma_set_max_seg_size(&pdev->dev, U32_MAX);
+
 	return 0;
 
 unmap_pci_bars:
diff --git a/drivers/misc/habanalabs/common/state_dump.c b/drivers/misc/habanalabs/common/state_dump.c
new file mode 100644
index 000000000000..74726907c95e
--- /dev/null
+++ b/drivers/misc/habanalabs/common/state_dump.c
@@ -0,0 +1,718 @@
+// SPDX-License-Identifier: GPL-2.0
+
+/*
+ * Copyright 2021 HabanaLabs, Ltd.
+ * All Rights Reserved.
+ */
+
+#include <linux/vmalloc.h>
+#include <uapi/misc/habanalabs.h>
+#include "habanalabs.h"
+
+/**
+ * hl_format_as_binary - helper function, format an integer as binary
+ *                       using supplied scratch buffer
+ * @buf: the buffer to use
+ * @buf_len: buffer capacity
+ * @n: number to format
+ *
+ * Returns pointer to buffer
+ */
+char *hl_format_as_binary(char *buf, size_t buf_len, u32 n)
+{
+	int i;
+	u32 bit;
+	bool leading0 = true;
+	char *wrptr = buf;
+
+	if (buf_len > 0 && buf_len < 3) {
+		*wrptr = '\0';
+		return buf;
+	}
+
+	wrptr[0] = '0';
+	wrptr[1] = 'b';
+	wrptr += 2;
+	/* Remove 3 characters from length for '0b' and '\0' termination */
+	buf_len -= 3;
+
+	for (i = 0; i < sizeof(n) * BITS_PER_BYTE && buf_len; ++i, n <<= 1) {
+		/* Writing bit calculation in one line would cause a false
+		 * positive static code analysis error, so splitting.
+		 */
+		bit = n & (1 << (sizeof(n) * BITS_PER_BYTE - 1));
+		bit = !!bit;
+		leading0 &= !bit;
+		if (!leading0) {
+			*wrptr = '0' + bit;
+			++wrptr;
+		}
+	}
+
+	*wrptr = '\0';
+
+	return buf;
+}
+
+/**
+ * resize_to_fit - helper function, resize buffer to fit given amount of data
+ * @buf: destination buffer double pointer
+ * @size: pointer to the size container
+ * @desired_size: size the buffer must contain
+ *
+ * Returns 0 on success or error code on failure.
+ * On success, the size of buffer is at least desired_size. Buffer is allocated
+ * via vmalloc and must be freed with vfree.
+ */
+static int resize_to_fit(char **buf, size_t *size, size_t desired_size)
+{
+	char *resized_buf;
+	size_t new_size;
+
+	if (*size >= desired_size)
+		return 0;
+
+	/* Not enough space to print all, have to resize */
+	new_size = max_t(size_t, PAGE_SIZE, round_up(desired_size, PAGE_SIZE));
+	resized_buf = vmalloc(new_size);
+	if (!resized_buf)
+		return -ENOMEM;
+	memcpy(resized_buf, *buf, *size);
+	vfree(*buf);
+	*buf = resized_buf;
+	*size = new_size;
+
+	return 1;
+}
+
+/**
+ * hl_snprintf_resize() - print formatted data to buffer, resize as needed
+ * @buf: buffer double pointer, to be written to and resized, must be either
+ *       NULL or allocated with vmalloc.
+ * @size: current size of the buffer
+ * @offset: current offset to write to
+ * @format: format of the data
+ *
+ * This function will write formatted data into the buffer. If buffer is not
+ * large enough, it will be resized using vmalloc. Size may be modified if the
+ * buffer was resized, offset will be advanced by the number of bytes written
+ * not including the terminating character
+ *
+ * Returns 0 on success or error code on failure
+ *
+ * Note that the buffer has to be manually released using vfree.
+ */
+int hl_snprintf_resize(char **buf, size_t *size, size_t *offset,
+			   const char *format, ...)
+{
+	va_list args;
+	size_t length;
+	int rc;
+
+	if (*buf == NULL && (*size != 0 || *offset != 0))
+		return -EINVAL;
+
+	va_start(args, format);
+	length = vsnprintf(*buf + *offset, *size - *offset, format, args);
+	va_end(args);
+
+	rc = resize_to_fit(buf, size, *offset + length + 1);
+	if (rc < 0)
+		return rc;
+	else if (rc > 0) {
+		/* Resize was needed, write again */
+		va_start(args, format);
+		length = vsnprintf(*buf + *offset, *size - *offset, format,
+				   args);
+		va_end(args);
+	}
+
+	*offset += length;
+
+	return 0;
+}
+
+/**
+ * hl_sync_engine_to_string - convert engine type enum to string literal
+ * @engine_type: engine type (TPC/MME/DMA)
+ *
+ * Return the resolved string literal
+ */
+const char *hl_sync_engine_to_string(enum hl_sync_engine_type engine_type)
+{
+	switch (engine_type) {
+	case ENGINE_DMA:
+		return "DMA";
+	case ENGINE_MME:
+		return "MME";
+	case ENGINE_TPC:
+		return "TPC";
+	}
+	return "Invalid Engine Type";
+}
+
+/**
+ * hl_print_resize_sync_engine - helper function, format engine name and ID
+ * using hl_snprintf_resize
+ * @buf: destination buffer double pointer to be used with hl_snprintf_resize
+ * @size: pointer to the size container
+ * @offset: pointer to the offset container
+ * @engine_type: engine type (TPC/MME/DMA)
+ * @engine_id: engine numerical id
+ *
+ * Returns 0 on success or error code on failure
+ */
+static int hl_print_resize_sync_engine(char **buf, size_t *size, size_t *offset,
+				enum hl_sync_engine_type engine_type,
+				u32 engine_id)
+{
+	return hl_snprintf_resize(buf, size, offset, "%s%u",
+			hl_sync_engine_to_string(engine_type), engine_id);
+}
+
+/**
+ * hl_state_dump_get_sync_name - transform sync object id to name if available
+ * @hdev: pointer to the device
+ * @sync_id: sync object id
+ *
+ * Returns a name literal or NULL if not resolved.
+ * Note: returning NULL shall not be considered as a failure, as not all
+ * sync objects are named.
+ */
+const char *hl_state_dump_get_sync_name(struct hl_device *hdev, u32 sync_id)
+{
+	struct hl_state_dump_specs *sds = &hdev->state_dump_specs;
+	struct hl_hw_obj_name_entry *entry;
+
+	hash_for_each_possible(sds->so_id_to_str_tb, entry,
+				node, sync_id)
+		if (sync_id == entry->id)
+			return entry->name;
+
+	return NULL;
+}
+
+/**
+ * hl_state_dump_get_monitor_name - transform monitor object dump to monitor
+ * name if available
+ * @hdev: pointer to the device
+ * @mon: monitor state dump
+ *
+ * Returns a name literal or NULL if not resolved.
+ * Note: returning NULL shall not be considered as a failure, as not all
+ * monitors are named.
+ */
+const char *hl_state_dump_get_monitor_name(struct hl_device *hdev,
+					struct hl_mon_state_dump *mon)
+{
+	struct hl_state_dump_specs *sds = &hdev->state_dump_specs;
+	struct hl_hw_obj_name_entry *entry;
+
+	hash_for_each_possible(sds->monitor_id_to_str_tb,
+				entry, node, mon->id)
+		if (mon->id == entry->id)
+			return entry->name;
+
+	return NULL;
+}
+
+/**
+ * hl_state_dump_free_sync_to_engine_map - free sync object to engine map
+ * @map: sync object to engine map
+ *
+ * Note: generic free implementation, the allocation is implemented per ASIC.
+ */
+void hl_state_dump_free_sync_to_engine_map(struct hl_sync_to_engine_map *map)
+{
+	struct hl_sync_to_engine_map_entry *entry;
+	struct hlist_node *tmp_node;
+	int i;
+
+	hash_for_each_safe(map->tb, i, tmp_node, entry, node) {
+		hash_del(&entry->node);
+		kfree(entry);
+	}
+}
+
+/**
+ * hl_state_dump_get_sync_to_engine - transform sync_id to
+ * hl_sync_to_engine_map_entry if available for current id
+ * @map: sync object to engine map
+ * @sync_id: sync object id
+ *
+ * Returns the translation entry if found or NULL if not.
+ * Note, returned NULL shall not be considered as a failure as the map
+ * does not cover all possible, it is a best effort sync ids.
+ */
+static struct hl_sync_to_engine_map_entry *
+hl_state_dump_get_sync_to_engine(struct hl_sync_to_engine_map *map, u32 sync_id)
+{
+	struct hl_sync_to_engine_map_entry *entry;
+
+	hash_for_each_possible(map->tb, entry, node, sync_id)
+		if (entry->sync_id == sync_id)
+			return entry;
+	return NULL;
+}
+
+/**
+ * hl_state_dump_read_sync_objects - read sync objects array
+ * @hdev: pointer to the device
+ * @index: sync manager block index starting with E_N
+ *
+ * Returns array of size SP_SYNC_OBJ_AMOUNT on success or NULL on failure
+ */
+static u32 *hl_state_dump_read_sync_objects(struct hl_device *hdev, u32 index)
+{
+	struct hl_state_dump_specs *sds = &hdev->state_dump_specs;
+	u32 *sync_objects;
+	s64 base_addr; /* Base addr can be negative */
+	int i;
+
+	base_addr = sds->props[SP_SYNC_OBJ_BASE_ADDR] +
+			sds->props[SP_NEXT_SYNC_OBJ_ADDR] * index;
+
+	sync_objects = vmalloc(sds->props[SP_SYNC_OBJ_AMOUNT] * sizeof(u32));
+	if (!sync_objects)
+		return NULL;
+
+	for (i = 0; i < sds->props[SP_SYNC_OBJ_AMOUNT]; ++i)
+		sync_objects[i] = RREG32(base_addr + i * sizeof(u32));
+
+	return sync_objects;
+}
+
+/**
+ * hl_state_dump_free_sync_objects - free sync objects array allocated by
+ * hl_state_dump_read_sync_objects
+ * @sync_objects: sync objects array
+ */
+static void hl_state_dump_free_sync_objects(u32 *sync_objects)
+{
+	vfree(sync_objects);
+}
+
+
+/**
+ * hl_state_dump_print_syncs_single_block - print active sync objects on a
+ * single block
+ * @hdev: pointer to the device
+ * @index: sync manager block index starting with E_N
+ * @buf: destination buffer double pointer to be used with hl_snprintf_resize
+ * @size: pointer to the size container
+ * @offset: pointer to the offset container
+ * @map: sync engines names map
+ *
+ * Returns 0 on success or error code on failure
+ */
+static int
+hl_state_dump_print_syncs_single_block(struct hl_device *hdev, u32 index,
+				char **buf, size_t *size, size_t *offset,
+				struct hl_sync_to_engine_map *map)
+{
+	struct hl_state_dump_specs *sds = &hdev->state_dump_specs;
+	const char *sync_name;
+	u32 *sync_objects = NULL;
+	int rc = 0, i;
+
+	if (sds->sync_namager_names) {
+		rc = hl_snprintf_resize(
+			buf, size, offset, "%s\n",
+			sds->sync_namager_names[index]);
+		if (rc)
+			goto out;
+	}
+
+	sync_objects = hl_state_dump_read_sync_objects(hdev, index);
+	if (!sync_objects) {
+		rc = -ENOMEM;
+		goto out;
+	}
+
+	for (i = 0; i < sds->props[SP_SYNC_OBJ_AMOUNT]; ++i) {
+		struct hl_sync_to_engine_map_entry *entry;
+		u64 sync_object_addr;
+
+		if (!sync_objects[i])
+			continue;
+
+		sync_object_addr = sds->props[SP_SYNC_OBJ_BASE_ADDR] +
+				sds->props[SP_NEXT_SYNC_OBJ_ADDR] * index +
+				i * sizeof(u32);
+
+		rc = hl_snprintf_resize(buf, size, offset, "sync id: %u", i);
+		if (rc)
+			goto free_sync_objects;
+		sync_name = hl_state_dump_get_sync_name(hdev, i);
+		if (sync_name) {
+			rc = hl_snprintf_resize(buf, size, offset, " %s",
+						sync_name);
+			if (rc)
+				goto free_sync_objects;
+		}
+		rc = hl_snprintf_resize(buf, size, offset, ", value: %u",
+					sync_objects[i]);
+		if (rc)
+			goto free_sync_objects;
+
+		/* Append engine string */
+		entry = hl_state_dump_get_sync_to_engine(map,
+			(u32)sync_object_addr);
+		if (entry) {
+			rc = hl_snprintf_resize(buf, size, offset,
+						", Engine: ");
+			if (rc)
+				goto free_sync_objects;
+			rc = hl_print_resize_sync_engine(buf, size, offset,
+						entry->engine_type,
+						entry->engine_id);
+			if (rc)
+				goto free_sync_objects;
+		}
+
+		rc = hl_snprintf_resize(buf, size, offset, "\n");
+		if (rc)
+			goto free_sync_objects;
+	}
+
+free_sync_objects:
+	hl_state_dump_free_sync_objects(sync_objects);
+out:
+	return rc;
+}
+
+/**
+ * hl_state_dump_print_syncs - print active sync objects
+ * @hdev: pointer to the device
+ * @buf: destination buffer double pointer to be used with hl_snprintf_resize
+ * @size: pointer to the size container
+ * @offset: pointer to the offset container
+ *
+ * Returns 0 on success or error code on failure
+ */
+static int hl_state_dump_print_syncs(struct hl_device *hdev,
+					char **buf, size_t *size,
+					size_t *offset)
+
+{
+	struct hl_state_dump_specs *sds = &hdev->state_dump_specs;
+	struct hl_sync_to_engine_map *map;
+	u32 index;
+	int rc = 0;
+
+	map = kzalloc(sizeof(*map), GFP_KERNEL);
+	if (!map)
+		return -ENOMEM;
+
+	rc = sds->funcs.gen_sync_to_engine_map(hdev, map);
+	if (rc)
+		goto free_map_mem;
+
+	rc = hl_snprintf_resize(buf, size, offset, "Non zero sync objects:\n");
+	if (rc)
+		goto out;
+
+	if (sds->sync_namager_names) {
+		for (index = 0; sds->sync_namager_names[index]; ++index) {
+			rc = hl_state_dump_print_syncs_single_block(
+				hdev, index, buf, size, offset, map);
+			if (rc)
+				goto out;
+		}
+	} else {
+		for (index = 0; index < sds->props[SP_NUM_CORES]; ++index) {
+			rc = hl_state_dump_print_syncs_single_block(
+				hdev, index, buf, size, offset, map);
+			if (rc)
+				goto out;
+		}
+	}
+
+out:
+	hl_state_dump_free_sync_to_engine_map(map);
+free_map_mem:
+	kfree(map);
+
+	return rc;
+}
+
+/**
+ * hl_state_dump_alloc_read_sm_block_monitors - read monitors for a specific
+ * block
+ * @hdev: pointer to the device
+ * @index: sync manager block index starting with E_N
+ *
+ * Returns an array of monitor data of size SP_MONITORS_AMOUNT or NULL
+ * on error
+ */
+static struct hl_mon_state_dump *
+hl_state_dump_alloc_read_sm_block_monitors(struct hl_device *hdev, u32 index)
+{
+	struct hl_state_dump_specs *sds = &hdev->state_dump_specs;
+	struct hl_mon_state_dump *monitors;
+	s64 base_addr; /* Base addr can be negative */
+	int i;
+
+	monitors = vmalloc(sds->props[SP_MONITORS_AMOUNT] *
+			   sizeof(struct hl_mon_state_dump));
+	if (!monitors)
+		return NULL;
+
+	base_addr = sds->props[SP_NEXT_SYNC_OBJ_ADDR] * index;
+
+	for (i = 0; i < sds->props[SP_MONITORS_AMOUNT]; ++i) {
+		monitors[i].id = i;
+		monitors[i].wr_addr_low =
+			RREG32(base_addr + sds->props[SP_MON_OBJ_WR_ADDR_LOW] +
+				i * sizeof(u32));
+
+		monitors[i].wr_addr_high =
+			RREG32(base_addr + sds->props[SP_MON_OBJ_WR_ADDR_HIGH] +
+				i * sizeof(u32));
+
+		monitors[i].wr_data =
+			RREG32(base_addr + sds->props[SP_MON_OBJ_WR_DATA] +
+				i * sizeof(u32));
+
+		monitors[i].arm_data =
+			RREG32(base_addr + sds->props[SP_MON_OBJ_ARM_DATA] +
+				i * sizeof(u32));
+
+		monitors[i].status =
+			RREG32(base_addr + sds->props[SP_MON_OBJ_STATUS] +
+				i * sizeof(u32));
+	}
+
+	return monitors;
+}
+
+/**
+ * hl_state_dump_free_monitors - free the monitors structure
+ * @monitors: monitors array created with
+ *            hl_state_dump_alloc_read_sm_block_monitors
+ */
+static void hl_state_dump_free_monitors(struct hl_mon_state_dump *monitors)
+{
+	vfree(monitors);
+}
+
+/**
+ * hl_state_dump_print_monitors_single_block - print active monitors on a
+ * single block
+ * @hdev: pointer to the device
+ * @index: sync manager block index starting with E_N
+ * @buf: destination buffer double pointer to be used with hl_snprintf_resize
+ * @size: pointer to the size container
+ * @offset: pointer to the offset container
+ *
+ * Returns 0 on success or error code on failure
+ */
+static int hl_state_dump_print_monitors_single_block(struct hl_device *hdev,
+						u32 index,
+						char **buf, size_t *size,
+						size_t *offset)
+{
+	struct hl_state_dump_specs *sds = &hdev->state_dump_specs;
+	struct hl_mon_state_dump *monitors = NULL;
+	int rc = 0, i;
+
+	if (sds->sync_namager_names) {
+		rc = hl_snprintf_resize(
+			buf, size, offset, "%s\n",
+			sds->sync_namager_names[index]);
+		if (rc)
+			goto out;
+	}
+
+	monitors = hl_state_dump_alloc_read_sm_block_monitors(hdev, index);
+	if (!monitors) {
+		rc = -ENOMEM;
+		goto out;
+	}
+
+	for (i = 0; i < sds->props[SP_MONITORS_AMOUNT]; ++i) {
+		if (!(sds->funcs.monitor_valid(&monitors[i])))
+			continue;
+
+		/* Monitor is valid, dump it */
+		rc = sds->funcs.print_single_monitor(buf, size, offset, hdev,
+							&monitors[i]);
+		if (rc)
+			goto free_monitors;
+
+		hl_snprintf_resize(buf, size, offset, "\n");
+	}
+
+free_monitors:
+	hl_state_dump_free_monitors(monitors);
+out:
+	return rc;
+}
+
+/**
+ * hl_state_dump_print_monitors - print active monitors
+ * @hdev: pointer to the device
+ * @buf: destination buffer double pointer to be used with hl_snprintf_resize
+ * @size: pointer to the size container
+ * @offset: pointer to the offset container
+ *
+ * Returns 0 on success or error code on failure
+ */
+static int hl_state_dump_print_monitors(struct hl_device *hdev,
+					char **buf, size_t *size,
+					size_t *offset)
+{
+	struct hl_state_dump_specs *sds = &hdev->state_dump_specs;
+	u32 index;
+	int rc = 0;
+
+	rc = hl_snprintf_resize(buf, size, offset,
+		"Valid (armed) monitor objects:\n");
+	if (rc)
+		goto out;
+
+	if (sds->sync_namager_names) {
+		for (index = 0; sds->sync_namager_names[index]; ++index) {
+			rc = hl_state_dump_print_monitors_single_block(
+				hdev, index, buf, size, offset);
+			if (rc)
+				goto out;
+		}
+	} else {
+		for (index = 0; index < sds->props[SP_NUM_CORES]; ++index) {
+			rc = hl_state_dump_print_monitors_single_block(
+				hdev, index, buf, size, offset);
+			if (rc)
+				goto out;
+		}
+	}
+
+out:
+	return rc;
+}
+
+/**
+ * hl_state_dump_print_engine_fences - print active fences for a specific
+ * engine
+ * @hdev: pointer to the device
+ * @engine_type: engine type to use
+ * @buf: destination buffer double pointer to be used with hl_snprintf_resize
+ * @size: pointer to the size container
+ * @offset: pointer to the offset container
+ */
+static int
+hl_state_dump_print_engine_fences(struct hl_device *hdev,
+				  enum hl_sync_engine_type engine_type,
+				  char **buf, size_t *size, size_t *offset)
+{
+	struct hl_state_dump_specs *sds = &hdev->state_dump_specs;
+	int rc = 0, i, n_fences;
+	u64 base_addr, next_fence;
+
+	switch (engine_type) {
+	case ENGINE_TPC:
+		n_fences = sds->props[SP_NUM_OF_TPC_ENGINES];
+		base_addr = sds->props[SP_TPC0_CMDQ];
+		next_fence = sds->props[SP_NEXT_TPC];
+		break;
+	case ENGINE_MME:
+		n_fences = sds->props[SP_NUM_OF_MME_ENGINES];
+		base_addr = sds->props[SP_MME_CMDQ];
+		next_fence = sds->props[SP_NEXT_MME];
+		break;
+	case ENGINE_DMA:
+		n_fences = sds->props[SP_NUM_OF_DMA_ENGINES];
+		base_addr = sds->props[SP_DMA_CMDQ];
+		next_fence = sds->props[SP_DMA_QUEUES_OFFSET];
+		break;
+	default:
+		return -EINVAL;
+	}
+	for (i = 0; i < n_fences; ++i) {
+		rc = sds->funcs.print_fences_single_engine(
+			hdev,
+			base_addr + next_fence * i +
+				sds->props[SP_FENCE0_CNT_OFFSET],
+			base_addr + next_fence * i +
+				sds->props[SP_CP_STS_OFFSET],
+			engine_type, i, buf, size, offset);
+		if (rc)
+			goto out;
+	}
+out:
+	return rc;
+}
+
+/**
+ * hl_state_dump_print_fences - print active fences
+ * @hdev: pointer to the device
+ * @buf: destination buffer double pointer to be used with hl_snprintf_resize
+ * @size: pointer to the size container
+ * @offset: pointer to the offset container
+ */
+static int hl_state_dump_print_fences(struct hl_device *hdev, char **buf,
+				      size_t *size, size_t *offset)
+{
+	int rc = 0;
+
+	rc = hl_snprintf_resize(buf, size, offset, "Valid (armed) fences:\n");
+	if (rc)
+		goto out;
+
+	rc = hl_state_dump_print_engine_fences(hdev, ENGINE_TPC, buf, size, offset);
+	if (rc)
+		goto out;
+
+	rc = hl_state_dump_print_engine_fences(hdev, ENGINE_MME, buf, size, offset);
+	if (rc)
+		goto out;
+
+	rc = hl_state_dump_print_engine_fences(hdev, ENGINE_DMA, buf, size, offset);
+	if (rc)
+		goto out;
+
+out:
+	return rc;
+}
+
+/**
+ * hl_state_dump() - dump system state
+ * @hdev: pointer to device structure
+ */
+int hl_state_dump(struct hl_device *hdev)
+{
+	char *buf = NULL;
+	size_t offset = 0, size = 0;
+	int rc;
+
+	rc = hl_snprintf_resize(&buf, &size, &offset,
+				"Timestamp taken on: %llu\n\n",
+				ktime_to_ns(ktime_get()));
+	if (rc)
+		goto err;
+
+	rc = hl_state_dump_print_syncs(hdev, &buf, &size, &offset);
+	if (rc)
+		goto err;
+
+	hl_snprintf_resize(&buf, &size, &offset, "\n");
+
+	rc = hl_state_dump_print_monitors(hdev, &buf, &size, &offset);
+	if (rc)
+		goto err;
+
+	hl_snprintf_resize(&buf, &size, &offset, "\n");
+
+	rc = hl_state_dump_print_fences(hdev, &buf, &size, &offset);
+	if (rc)
+		goto err;
+
+	hl_snprintf_resize(&buf, &size, &offset, "\n");
+
+	hl_debugfs_set_state_dump(hdev, buf, size);
+
+	return 0;
+err:
+	vfree(buf);
+	return rc;
+}
diff --git a/drivers/misc/habanalabs/common/sysfs.c b/drivers/misc/habanalabs/common/sysfs.c
index db72df282ef8..34f9f2779962 100644
--- a/drivers/misc/habanalabs/common/sysfs.c
+++ b/drivers/misc/habanalabs/common/sysfs.c
@@ -9,8 +9,7 @@
 
 #include <linux/pci.h>
 
-long hl_get_frequency(struct hl_device *hdev, u32 pll_index,
-								bool curr)
+long hl_get_frequency(struct hl_device *hdev, u32 pll_index, bool curr)
 {
 	struct cpucp_packet pkt;
 	u32 used_pll_idx;
@@ -44,8 +43,7 @@ long hl_get_frequency(struct hl_device *hdev, u32 pll_index,
 	return (long) result;
 }
 
-void hl_set_frequency(struct hl_device *hdev, u32 pll_index,
-								u64 freq)
+void hl_set_frequency(struct hl_device *hdev, u32 pll_index, u64 freq)
 {
 	struct cpucp_packet pkt;
 	u32 used_pll_idx;
@@ -285,16 +283,12 @@ static ssize_t status_show(struct device *dev, struct device_attribute *attr,
 				char *buf)
 {
 	struct hl_device *hdev = dev_get_drvdata(dev);
-	char *str;
+	char str[HL_STR_MAX];
 
-	if (atomic_read(&hdev->in_reset))
-		str = "In reset";
-	else if (hdev->disabled)
-		str = "Malfunction";
-	else if (hdev->needs_reset)
-		str = "Needs Reset";
-	else
-		str = "Operational";
+	strscpy(str, hdev->status[hl_device_status(hdev)], HL_STR_MAX);
+
+	/* use uppercase for backward compatibility */
+	str[0] = 'A' + (str[0] - 'a');
 
 	return sprintf(buf, "%s\n", str);
 }
diff --git a/drivers/misc/habanalabs/gaudi/gaudi.c b/drivers/misc/habanalabs/gaudi/gaudi.c
index aa8a0ca5aca2..383865be3c2c 100644
--- a/drivers/misc/habanalabs/gaudi/gaudi.c
+++ b/drivers/misc/habanalabs/gaudi/gaudi.c
@@ -76,7 +76,7 @@
 #define GAUDI_PLDM_MMU_TIMEOUT_USEC	(MMU_CONFIG_TIMEOUT_USEC * 100)
 #define GAUDI_PLDM_QMAN0_TIMEOUT_USEC	(HL_DEVICE_TIMEOUT_USEC * 30)
 #define GAUDI_PLDM_TPC_KERNEL_WAIT_USEC	(HL_DEVICE_TIMEOUT_USEC * 30)
-#define GAUDI_BOOT_FIT_REQ_TIMEOUT_USEC	1000000		/* 1s */
+#define GAUDI_BOOT_FIT_REQ_TIMEOUT_USEC	4000000		/* 4s */
 #define GAUDI_MSG_TO_CPU_TIMEOUT_USEC	4000000		/* 4s */
 #define GAUDI_WAIT_FOR_BL_TIMEOUT_USEC	15000000	/* 15s */
 
@@ -106,6 +106,21 @@
 
 #define GAUDI_PLL_MAX 10
 
+#define BIN_REG_STRING_SIZE	sizeof("0b10101010101010101010101010101010")
+
+#define MONITOR_SOB_STRING_SIZE		256
+
+static u32 gaudi_stream_master[GAUDI_STREAM_MASTER_ARR_SIZE] = {
+	GAUDI_QUEUE_ID_DMA_0_0,
+	GAUDI_QUEUE_ID_DMA_0_1,
+	GAUDI_QUEUE_ID_DMA_0_2,
+	GAUDI_QUEUE_ID_DMA_0_3,
+	GAUDI_QUEUE_ID_DMA_1_0,
+	GAUDI_QUEUE_ID_DMA_1_1,
+	GAUDI_QUEUE_ID_DMA_1_2,
+	GAUDI_QUEUE_ID_DMA_1_3
+};
+
 static const char gaudi_irq_name[GAUDI_MSI_ENTRIES][GAUDI_MAX_STRING_LEN] = {
 		"gaudi cq 0_0", "gaudi cq 0_1", "gaudi cq 0_2", "gaudi cq 0_3",
 		"gaudi cq 1_0", "gaudi cq 1_1", "gaudi cq 1_2", "gaudi cq 1_3",
@@ -348,6 +363,97 @@ static enum hl_queue_type gaudi_queue_type[GAUDI_QUEUE_ID_SIZE] = {
 	QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_9_3 */
 };
 
+static struct hl_hw_obj_name_entry gaudi_so_id_to_str[] = {
+	{ .id = 0,  .name = "SYNC_OBJ_DMA_DOWN_FEEDBACK" },
+	{ .id = 1,  .name = "SYNC_OBJ_DMA_UP_FEEDBACK" },
+	{ .id = 2,  .name = "SYNC_OBJ_DMA_STATIC_DRAM_SRAM_FEEDBACK" },
+	{ .id = 3,  .name = "SYNC_OBJ_DMA_SRAM_DRAM_FEEDBACK" },
+	{ .id = 4,  .name = "SYNC_OBJ_FIRST_COMPUTE_FINISH" },
+	{ .id = 5,  .name = "SYNC_OBJ_HOST_DRAM_DONE" },
+	{ .id = 6,  .name = "SYNC_OBJ_DBG_CTR_DEPRECATED" },
+	{ .id = 7,  .name = "SYNC_OBJ_DMA_ACTIVATIONS_DRAM_SRAM_FEEDBACK" },
+	{ .id = 8,  .name = "SYNC_OBJ_ENGINE_SEM_MME_0" },
+	{ .id = 9,  .name = "SYNC_OBJ_ENGINE_SEM_MME_1" },
+	{ .id = 10, .name = "SYNC_OBJ_ENGINE_SEM_TPC_0" },
+	{ .id = 11, .name = "SYNC_OBJ_ENGINE_SEM_TPC_1" },
+	{ .id = 12, .name = "SYNC_OBJ_ENGINE_SEM_TPC_2" },
+	{ .id = 13, .name = "SYNC_OBJ_ENGINE_SEM_TPC_3" },
+	{ .id = 14, .name = "SYNC_OBJ_ENGINE_SEM_TPC_4" },
+	{ .id = 15, .name = "SYNC_OBJ_ENGINE_SEM_TPC_5" },
+	{ .id = 16, .name = "SYNC_OBJ_ENGINE_SEM_TPC_6" },
+	{ .id = 17, .name = "SYNC_OBJ_ENGINE_SEM_TPC_7" },
+	{ .id = 18, .name = "SYNC_OBJ_ENGINE_SEM_DMA_1" },
+	{ .id = 19, .name = "SYNC_OBJ_ENGINE_SEM_DMA_2" },
+	{ .id = 20, .name = "SYNC_OBJ_ENGINE_SEM_DMA_3" },
+	{ .id = 21, .name = "SYNC_OBJ_ENGINE_SEM_DMA_4" },
+	{ .id = 22, .name = "SYNC_OBJ_ENGINE_SEM_DMA_5" },
+	{ .id = 23, .name = "SYNC_OBJ_ENGINE_SEM_DMA_6" },
+	{ .id = 24, .name = "SYNC_OBJ_ENGINE_SEM_DMA_7" },
+	{ .id = 25, .name = "SYNC_OBJ_DBG_CTR_0" },
+	{ .id = 26, .name = "SYNC_OBJ_DBG_CTR_1" },
+};
+
+static struct hl_hw_obj_name_entry gaudi_monitor_id_to_str[] = {
+	{ .id = 200, .name = "MON_OBJ_DMA_DOWN_FEEDBACK_RESET" },
+	{ .id = 201, .name = "MON_OBJ_DMA_UP_FEADBACK_RESET" },
+	{ .id = 203, .name = "MON_OBJ_DRAM_TO_SRAM_QUEUE_FENCE" },
+	{ .id = 204, .name = "MON_OBJ_TPC_0_CLK_GATE" },
+	{ .id = 205, .name = "MON_OBJ_TPC_1_CLK_GATE" },
+	{ .id = 206, .name = "MON_OBJ_TPC_2_CLK_GATE" },
+	{ .id = 207, .name = "MON_OBJ_TPC_3_CLK_GATE" },
+	{ .id = 208, .name = "MON_OBJ_TPC_4_CLK_GATE" },
+	{ .id = 209, .name = "MON_OBJ_TPC_5_CLK_GATE" },
+	{ .id = 210, .name = "MON_OBJ_TPC_6_CLK_GATE" },
+	{ .id = 211, .name = "MON_OBJ_TPC_7_CLK_GATE" },
+};
+
+static s64 gaudi_state_dump_specs_props[] = {
+	[SP_SYNC_OBJ_BASE_ADDR] = mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_SOB_OBJ_0,
+	[SP_NEXT_SYNC_OBJ_ADDR] = NEXT_SYNC_OBJ_ADDR_INTERVAL,
+	[SP_SYNC_OBJ_AMOUNT] = NUM_OF_SOB_IN_BLOCK,
+	[SP_MON_OBJ_WR_ADDR_LOW] =
+		mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_MON_PAY_ADDRL_0,
+	[SP_MON_OBJ_WR_ADDR_HIGH] =
+		mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_MON_PAY_ADDRH_0,
+	[SP_MON_OBJ_WR_DATA] = mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_MON_PAY_DATA_0,
+	[SP_MON_OBJ_ARM_DATA] = mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_MON_ARM_0,
+	[SP_MON_OBJ_STATUS] = mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_MON_STATUS_0,
+	[SP_MONITORS_AMOUNT] = NUM_OF_MONITORS_IN_BLOCK,
+	[SP_TPC0_CMDQ] = mmTPC0_QM_GLBL_CFG0,
+	[SP_TPC0_CFG_SO] = mmTPC0_CFG_QM_SYNC_OBJECT_ADDR,
+	[SP_NEXT_TPC] = mmTPC1_QM_GLBL_CFG0 - mmTPC0_QM_GLBL_CFG0,
+	[SP_MME_CMDQ] = mmMME0_QM_GLBL_CFG0,
+	[SP_MME_CFG_SO] = mmMME0_CTRL_ARCH_DESC_SYNC_OBJECT_ADDR_LOW_LOCAL,
+	[SP_NEXT_MME] = mmMME2_QM_GLBL_CFG0 - mmMME0_QM_GLBL_CFG0,
+	[SP_DMA_CMDQ] = mmDMA0_QM_GLBL_CFG0,
+	[SP_DMA_CFG_SO] = mmDMA0_CORE_WR_COMP_ADDR_LO,
+	[SP_DMA_QUEUES_OFFSET] = mmDMA1_QM_GLBL_CFG0 - mmDMA0_QM_GLBL_CFG0,
+	[SP_NUM_OF_MME_ENGINES] = NUM_OF_MME_ENGINES,
+	[SP_SUB_MME_ENG_NUM] = NUM_OF_MME_SUB_ENGINES,
+	[SP_NUM_OF_DMA_ENGINES] = NUM_OF_DMA_ENGINES,
+	[SP_NUM_OF_TPC_ENGINES] = NUM_OF_TPC_ENGINES,
+	[SP_ENGINE_NUM_OF_QUEUES] = NUM_OF_QUEUES,
+	[SP_ENGINE_NUM_OF_STREAMS] = NUM_OF_STREAMS,
+	[SP_ENGINE_NUM_OF_FENCES] = NUM_OF_FENCES,
+	[SP_FENCE0_CNT_OFFSET] =
+		mmDMA0_QM_CP_FENCE0_CNT_0 - mmDMA0_QM_GLBL_CFG0,
+	[SP_FENCE0_RDATA_OFFSET] =
+		mmDMA0_QM_CP_FENCE0_RDATA_0 - mmDMA0_QM_GLBL_CFG0,
+	[SP_CP_STS_OFFSET] = mmDMA0_QM_CP_STS_0 - mmDMA0_QM_GLBL_CFG0,
+	[SP_NUM_CORES] = 1,
+};
+
+/* The order here is opposite to the order of the indexing in the h/w.
+ * i.e. SYNC_MGR_W_S is actually 0, SYNC_MGR_E_S is 1, etc.
+ */
+static const char * const gaudi_sync_manager_names[] = {
+	"SYNC_MGR_E_N",
+	"SYNC_MGR_W_N",
+	"SYNC_MGR_E_S",
+	"SYNC_MGR_W_S",
+	NULL
+};
+
 struct ecc_info_extract_params {
 	u64 block_address;
 	u32 num_memories;
@@ -363,8 +469,6 @@ static int gaudi_memset_device_memory(struct hl_device *hdev, u64 addr,
 					u32 size, u64 val);
 static int gaudi_memset_registers(struct hl_device *hdev, u64 reg_base,
 					u32 num_regs, u32 val);
-static int gaudi_schedule_register_memset(struct hl_device *hdev,
-		u32 hw_queue_id, u64 reg_base, u32 num_regs, u32 val);
 static int gaudi_run_tpc_kernel(struct hl_device *hdev, u64 tpc_kernel,
 				u32 tpc_id);
 static int gaudi_mmu_clear_pgt_range(struct hl_device *hdev);
@@ -375,7 +479,6 @@ static u32 gaudi_gen_signal_cb(struct hl_device *hdev, void *data, u16 sob_id,
 				u32 size, bool eb);
 static u32 gaudi_gen_wait_cb(struct hl_device *hdev,
 				struct hl_gen_wait_properties *prop);
-
 static inline enum hl_collective_mode
 get_collective_mode(struct hl_device *hdev, u32 queue_id)
 {
@@ -403,7 +506,11 @@ static inline void set_default_power_values(struct hl_device *hdev)
 
 	if (hdev->card_type == cpucp_card_type_pmc) {
 		prop->max_power_default = MAX_POWER_DEFAULT_PMC;
-		prop->dc_power_default = DC_POWER_DEFAULT_PMC;
+
+		if (prop->fw_security_enabled)
+			prop->dc_power_default = DC_POWER_DEFAULT_PMC_SEC;
+		else
+			prop->dc_power_default = DC_POWER_DEFAULT_PMC;
 	} else {
 		prop->max_power_default = MAX_POWER_DEFAULT_PCI;
 		prop->dc_power_default = DC_POWER_DEFAULT_PCI;
@@ -450,6 +557,7 @@ static int gaudi_set_fixed_properties(struct hl_device *hdev)
 						get_collective_mode(hdev, i);
 	}
 
+	prop->device_dma_offset_for_host_access = HOST_PHYS_BASE;
 	prop->completion_queues_count = NUMBER_OF_CMPLT_QUEUES;
 	prop->collective_first_sob = 0;
 	prop->collective_first_mon = 0;
@@ -551,6 +659,8 @@ static int gaudi_set_fixed_properties(struct hl_device *hdev)
 	prop->hard_reset_done_by_fw = false;
 	prop->gic_interrupts_enable = true;
 
+	prop->server_type = HL_SERVER_TYPE_UNKNOWN;
+
 	return 0;
 }
 
@@ -723,14 +833,14 @@ pci_init:
 					GAUDI_BOOT_FIT_REQ_TIMEOUT_USEC);
 	if (rc) {
 		if (hdev->reset_on_preboot_fail)
-			hdev->asic_funcs->hw_fini(hdev, true);
+			hdev->asic_funcs->hw_fini(hdev, true, false);
 		goto pci_fini;
 	}
 
 	if (gaudi_get_hw_state(hdev) == HL_DEVICE_HW_STATE_DIRTY) {
 		dev_info(hdev->dev,
 			"H/W state is dirty, must reset before initializing\n");
-		hdev->asic_funcs->hw_fini(hdev, true);
+		hdev->asic_funcs->hw_fini(hdev, true, false);
 	}
 
 	return 0;
@@ -974,17 +1084,11 @@ static void gaudi_sob_group_hw_reset(struct kref *ref)
 	struct gaudi_hw_sob_group *hw_sob_group =
 		container_of(ref, struct gaudi_hw_sob_group, kref);
 	struct hl_device *hdev = hw_sob_group->hdev;
-	u64 base_addr;
-	int rc;
+	int i;
 
-	base_addr = CFG_BASE + mmSYNC_MNGR_W_S_SYNC_MNGR_OBJS_SOB_OBJ_0 +
-			hw_sob_group->base_sob_id * 4;
-	rc = gaudi_schedule_register_memset(hdev, hw_sob_group->queue_id,
-			base_addr, NUMBER_OF_SOBS_IN_GRP, 0);
-	if (rc)
-		dev_err(hdev->dev,
-			"failed resetting sob group - sob base %u, count %u",
-			hw_sob_group->base_sob_id, NUMBER_OF_SOBS_IN_GRP);
+	for (i = 0 ; i < NUMBER_OF_SOBS_IN_GRP ; i++)
+		WREG32((mmSYNC_MNGR_W_S_SYNC_MNGR_OBJS_SOB_OBJ_0 +
+			(hw_sob_group->base_sob_id * 4) + (i * 4)), 0);
 
 	kref_init(&hw_sob_group->kref);
 }
@@ -1121,6 +1225,20 @@ static void gaudi_collective_slave_init_job(struct hl_device *hdev,
 	queue_id = job->hw_queue_id;
 	prop = &hdev->kernel_queues[queue_id].sync_stream_prop;
 
+	if (job->cs->encaps_signals) {
+		/* use the encaps signal handle store earlier in the flow
+		 * and set the SOB information from the encaps
+		 * signals handle
+		 */
+		hl_hw_queue_encaps_sig_set_sob_info(hdev, job->cs, job,
+						cs_cmpl);
+
+		dev_dbg(hdev->dev, "collective wait: Sequence %llu found, sob_id: %u,  wait for sob_val: %u\n",
+				job->cs->sequence,
+				cs_cmpl->hw_sob->sob_id,
+				cs_cmpl->sob_val);
+	}
+
 	/* Add to wait CBs using slave monitor */
 	wait_prop.data = (void *) job->user_cb;
 	wait_prop.sob_base = cs_cmpl->hw_sob->sob_id;
@@ -1131,7 +1249,7 @@ static void gaudi_collective_slave_init_job(struct hl_device *hdev,
 	wait_prop.size = cb_size;
 
 	dev_dbg(hdev->dev,
-		"Generate slave wait CB, sob %d, val:0x%x, mon %d, q %d\n",
+		"Generate slave wait CB, sob %d, val:%x, mon %d, q %d\n",
 		cs_cmpl->hw_sob->sob_id, cs_cmpl->sob_val,
 		prop->collective_slave_mon_id, queue_id);
 
@@ -1145,7 +1263,7 @@ static void gaudi_collective_slave_init_job(struct hl_device *hdev,
 			prop->collective_sob_id, cb_size, false);
 }
 
-static void gaudi_collective_wait_init_cs(struct hl_cs *cs)
+static int gaudi_collective_wait_init_cs(struct hl_cs *cs)
 {
 	struct hl_cs_compl *signal_cs_cmpl =
 		container_of(cs->signal_fence, struct hl_cs_compl, base_fence);
@@ -1163,9 +1281,37 @@ static void gaudi_collective_wait_init_cs(struct hl_cs *cs)
 	gaudi = hdev->asic_specific;
 	cprop = &gaudi->collective_props;
 
-	/* copy the SOB id and value of the signal CS */
-	cs_cmpl->hw_sob = signal_cs_cmpl->hw_sob;
-	cs_cmpl->sob_val = signal_cs_cmpl->sob_val;
+	/* In encaps signals case the SOB info will be retrieved from
+	 * the handle in gaudi_collective_slave_init_job.
+	 */
+	if (!cs->encaps_signals) {
+		/* copy the SOB id and value of the signal CS */
+		cs_cmpl->hw_sob = signal_cs_cmpl->hw_sob;
+		cs_cmpl->sob_val = signal_cs_cmpl->sob_val;
+	}
+
+	/* check again if the signal cs already completed.
+	 * if yes then don't send any wait cs since the hw_sob
+	 * could be in reset already. if signal is not completed
+	 * then get refcount to hw_sob to prevent resetting the sob
+	 * while wait cs is not submitted.
+	 * note that this check is protected by two locks,
+	 * hw queue lock and completion object lock,
+	 * and the same completion object lock also protects
+	 * the hw_sob reset handler function.
+	 * The hw_queue lock prevent out of sync of hw_sob
+	 * refcount value, changed by signal/wait flows.
+	 */
+	spin_lock(&signal_cs_cmpl->lock);
+
+	if (completion_done(&cs->signal_fence->completion)) {
+		spin_unlock(&signal_cs_cmpl->lock);
+		return -EINVAL;
+	}
+	/* Increment kref since all slave queues are now waiting on it */
+	kref_get(&cs_cmpl->hw_sob->kref);
+
+	spin_unlock(&signal_cs_cmpl->lock);
 
 	/* Calculate the stream from collective master queue (1st job) */
 	job = list_first_entry(&cs->job_list, struct hl_cs_job, cs_node);
@@ -1210,21 +1356,17 @@ static void gaudi_collective_wait_init_cs(struct hl_cs *cs)
 				cprop->curr_sob_group_idx[stream], stream);
 	}
 
-	/* Increment kref since all slave queues are now waiting on it */
-	kref_get(&cs_cmpl->hw_sob->kref);
-	/*
-	 * Must put the signal fence after the SOB refcnt increment so
-	 * the SOB refcnt won't turn 0 and reset the SOB before the
-	 * wait CS was submitted.
-	 */
 	mb();
 	hl_fence_put(cs->signal_fence);
 	cs->signal_fence = NULL;
+
+	return 0;
 }
 
 static int gaudi_collective_wait_create_job(struct hl_device *hdev,
 		struct hl_ctx *ctx, struct hl_cs *cs,
-		enum hl_collective_mode mode, u32 queue_id, u32 wait_queue_id)
+		enum hl_collective_mode mode, u32 queue_id, u32 wait_queue_id,
+		u32 encaps_signal_offset)
 {
 	struct hw_queue_properties *hw_queue_prop;
 	struct hl_cs_counters_atomic *cntr;
@@ -1284,6 +1426,13 @@ static int gaudi_collective_wait_create_job(struct hl_device *hdev,
 	job->user_cb_size = cb_size;
 	job->hw_queue_id = queue_id;
 
+	/* since its guaranteed to have only one chunk in the collective wait
+	 * cs, we can use this chunk to set the encapsulated signal offset
+	 * in the jobs.
+	 */
+	if (cs->encaps_signals)
+		job->encaps_sig_wait_offset = encaps_signal_offset;
+
 	/*
 	 * No need in parsing, user CB is the patched CB.
 	 * We call hl_cb_destroy() out of two reasons - we don't need
@@ -1312,8 +1461,9 @@ static int gaudi_collective_wait_create_job(struct hl_device *hdev,
 }
 
 static int gaudi_collective_wait_create_jobs(struct hl_device *hdev,
-		struct hl_ctx *ctx, struct hl_cs *cs, u32 wait_queue_id,
-		u32 collective_engine_id)
+		struct hl_ctx *ctx, struct hl_cs *cs,
+		u32 wait_queue_id, u32 collective_engine_id,
+		u32 encaps_signal_offset)
 {
 	struct gaudi_device *gaudi = hdev->asic_specific;
 	struct hw_queue_properties *hw_queue_prop;
@@ -1363,7 +1513,8 @@ static int gaudi_collective_wait_create_jobs(struct hl_device *hdev,
 		if (i == 0) {
 			queue_id = wait_queue_id;
 			rc = gaudi_collective_wait_create_job(hdev, ctx, cs,
-				HL_COLLECTIVE_MASTER, queue_id, wait_queue_id);
+				HL_COLLECTIVE_MASTER, queue_id,
+				wait_queue_id, encaps_signal_offset);
 		} else {
 			if (nic_idx < NIC_NUMBER_OF_ENGINES) {
 				if (gaudi->hw_cap_initialized &
@@ -1383,7 +1534,8 @@ static int gaudi_collective_wait_create_jobs(struct hl_device *hdev,
 			}
 
 			rc = gaudi_collective_wait_create_job(hdev, ctx, cs,
-				HL_COLLECTIVE_SLAVE, queue_id, wait_queue_id);
+				HL_COLLECTIVE_SLAVE, queue_id,
+				wait_queue_id, encaps_signal_offset);
 		}
 
 		if (rc)
@@ -1431,6 +1583,11 @@ static int gaudi_late_init(struct hl_device *hdev)
 		return rc;
 	}
 
+	/* Scrub both SRAM and DRAM */
+	rc = hdev->asic_funcs->scrub_device_mem(hdev, 0, 0);
+	if (rc)
+		goto disable_pci_access;
+
 	rc = gaudi_fetch_psoc_frequency(hdev);
 	if (rc) {
 		dev_err(hdev->dev, "Failed to fetch psoc frequency\n");
@@ -1455,6 +1612,11 @@ static int gaudi_late_init(struct hl_device *hdev)
 		goto disable_pci_access;
 	}
 
+	/* We only support a single ASID for the user, so for the sake of optimization, just
+	 * initialize the ASID one time during device initialization with the fixed value of 1
+	 */
+	gaudi_mmu_prepare(hdev, 1);
+
 	return 0;
 
 disable_pci_access:
@@ -1720,8 +1882,12 @@ static int gaudi_sw_init(struct hl_device *hdev)
 	hdev->supports_sync_stream = true;
 	hdev->supports_coresight = true;
 	hdev->supports_staged_submission = true;
+	hdev->supports_wait_for_multi_cs = true;
 
-	gaudi_set_pci_memory_regions(hdev);
+	hdev->asic_funcs->set_pci_memory_regions(hdev);
+	hdev->stream_master_qid_arr =
+				hdev->asic_funcs->get_stream_master_qid_arr();
+	hdev->stream_master_qid_arr_size = GAUDI_STREAM_MASTER_ARR_SIZE;
 
 	return 0;
 
@@ -2523,7 +2689,7 @@ static void gaudi_init_golden_registers(struct hl_device *hdev)
 				tpc_id < TPC_NUMBER_OF_ENGINES;
 				tpc_id++, tpc_offset += TPC_CFG_OFFSET) {
 		/* Mask all arithmetic interrupts from TPC */
-		WREG32(mmTPC0_CFG_TPC_INTR_MASK + tpc_offset, 0x8FFF);
+		WREG32(mmTPC0_CFG_TPC_INTR_MASK + tpc_offset, 0x8FFE);
 		/* Set 16 cache lines */
 		WREG32_FIELD(TPC0_CFG_MSS_CONFIG, tpc_offset,
 				ICACHE_FETCH_LINE_NUM, 2);
@@ -3670,7 +3836,7 @@ static void gaudi_disable_timestamp(struct hl_device *hdev)
 	WREG32(mmPSOC_TIMESTAMP_BASE - CFG_BASE, 0);
 }
 
-static void gaudi_halt_engines(struct hl_device *hdev, bool hard_reset)
+static void gaudi_halt_engines(struct hl_device *hdev, bool hard_reset, bool fw_reset)
 {
 	u32 wait_timeout_ms;
 
@@ -3682,6 +3848,9 @@ static void gaudi_halt_engines(struct hl_device *hdev, bool hard_reset)
 	else
 		wait_timeout_ms = GAUDI_RESET_WAIT_MSEC;
 
+	if (fw_reset)
+		goto skip_engines;
+
 	gaudi_stop_nic_qmans(hdev);
 	gaudi_stop_mme_qmans(hdev);
 	gaudi_stop_tpc_qmans(hdev);
@@ -3707,6 +3876,7 @@ static void gaudi_halt_engines(struct hl_device *hdev, bool hard_reset)
 
 	gaudi_disable_timestamp(hdev);
 
+skip_engines:
 	gaudi_disable_msi(hdev);
 }
 
@@ -3739,6 +3909,9 @@ static int gaudi_mmu_init(struct hl_device *hdev)
 	WREG32(mmSTLB_CACHE_INV_BASE_39_8, MMU_CACHE_MNG_ADDR >> 8);
 	WREG32(mmSTLB_CACHE_INV_BASE_49_40, MMU_CACHE_MNG_ADDR >> 40);
 
+	/* mem cache invalidation */
+	WREG32(mmSTLB_MEM_CACHE_INVALIDATION, 1);
+
 	hdev->asic_funcs->mmu_invalidate_cache(hdev, true, 0);
 
 	WREG32(mmMMU_UP_MMU_ENABLE, 1);
@@ -4071,7 +4244,7 @@ disable_queues:
 	return rc;
 }
 
-static void gaudi_hw_fini(struct hl_device *hdev, bool hard_reset)
+static void gaudi_hw_fini(struct hl_device *hdev, bool hard_reset, bool fw_reset)
 {
 	struct cpu_dyn_regs *dyn_regs =
 			&hdev->fw_loader.dynamic_loader.comm_desc.cpu_dyn_regs;
@@ -4092,6 +4265,14 @@ static void gaudi_hw_fini(struct hl_device *hdev, bool hard_reset)
 		cpu_timeout_ms = GAUDI_CPU_RESET_WAIT_MSEC;
 	}
 
+	if (fw_reset) {
+		dev_info(hdev->dev,
+			"Firmware performs HARD reset, going to wait %dms\n",
+			reset_timeout_ms);
+
+		goto skip_reset;
+	}
+
 	driver_performs_reset = !!(!hdev->asic_prop.fw_security_enabled &&
 					!hdev->asic_prop.hard_reset_done_by_fw);
 
@@ -4168,6 +4349,7 @@ static void gaudi_hw_fini(struct hl_device *hdev, bool hard_reset)
 			reset_timeout_ms);
 	}
 
+skip_reset:
 	/*
 	 * After hard reset, we can't poll the BTM_FSM register because the PSOC
 	 * itself is in reset. Need to wait until the reset is deasserted
@@ -4212,7 +4394,7 @@ static int gaudi_resume(struct hl_device *hdev)
 	return gaudi_init_iatu(hdev);
 }
 
-static int gaudi_cb_mmap(struct hl_device *hdev, struct vm_area_struct *vma,
+static int gaudi_mmap(struct hl_device *hdev, struct vm_area_struct *vma,
 			void *cpu_addr, dma_addr_t dma_addr, size_t size)
 {
 	int rc;
@@ -4621,8 +4803,8 @@ static int gaudi_hbm_scrubbing(struct hl_device *hdev)
 				"Doing HBM scrubbing for 0x%09llx - 0x%09llx\n",
 				cur_addr, cur_addr + chunk_size);
 
-			WREG32(mmDMA0_CORE_SRC_BASE_LO + dma_offset, 0);
-			WREG32(mmDMA0_CORE_SRC_BASE_HI + dma_offset, 0);
+			WREG32(mmDMA0_CORE_SRC_BASE_LO + dma_offset, 0xdeadbeaf);
+			WREG32(mmDMA0_CORE_SRC_BASE_HI + dma_offset, 0xdeadbeaf);
 			WREG32(mmDMA0_CORE_DST_BASE_LO + dma_offset,
 						lower_32_bits(cur_addr));
 			WREG32(mmDMA0_CORE_DST_BASE_HI + dma_offset,
@@ -5796,78 +5978,6 @@ release_cb:
 	return rc;
 }
 
-static int gaudi_schedule_register_memset(struct hl_device *hdev,
-		u32 hw_queue_id, u64 reg_base, u32 num_regs, u32 val)
-{
-	struct hl_ctx *ctx;
-	struct hl_pending_cb *pending_cb;
-	struct packet_msg_long *pkt;
-	u32 cb_size, ctl;
-	struct hl_cb *cb;
-	int i, rc;
-
-	mutex_lock(&hdev->fpriv_list_lock);
-	ctx = hdev->compute_ctx;
-
-	/* If no compute context available or context is going down
-	 * memset registers directly
-	 */
-	if (!ctx || kref_read(&ctx->refcount) == 0) {
-		rc = gaudi_memset_registers(hdev, reg_base, num_regs, val);
-		mutex_unlock(&hdev->fpriv_list_lock);
-		return rc;
-	}
-
-	mutex_unlock(&hdev->fpriv_list_lock);
-
-	cb_size = (sizeof(*pkt) * num_regs) +
-			sizeof(struct packet_msg_prot) * 2;
-
-	if (cb_size > SZ_2M) {
-		dev_err(hdev->dev, "CB size must be smaller than %uMB", SZ_2M);
-		return -ENOMEM;
-	}
-
-	pending_cb = kzalloc(sizeof(*pending_cb), GFP_KERNEL);
-	if (!pending_cb)
-		return -ENOMEM;
-
-	cb = hl_cb_kernel_create(hdev, cb_size, false);
-	if (!cb) {
-		kfree(pending_cb);
-		return -EFAULT;
-	}
-
-	pkt = cb->kernel_address;
-
-	ctl = FIELD_PREP(GAUDI_PKT_LONG_CTL_OP_MASK, 0); /* write the value */
-	ctl |= FIELD_PREP(GAUDI_PKT_CTL_OPCODE_MASK, PACKET_MSG_LONG);
-	ctl |= FIELD_PREP(GAUDI_PKT_CTL_EB_MASK, 1);
-	ctl |= FIELD_PREP(GAUDI_PKT_CTL_RB_MASK, 1);
-	ctl |= FIELD_PREP(GAUDI_PKT_CTL_MB_MASK, 1);
-
-	for (i = 0; i < num_regs ; i++, pkt++) {
-		pkt->ctl = cpu_to_le32(ctl);
-		pkt->value = cpu_to_le32(val);
-		pkt->addr = cpu_to_le64(reg_base + (i * 4));
-	}
-
-	hl_cb_destroy(hdev, &hdev->kernel_cb_mgr, cb->id << PAGE_SHIFT);
-
-	pending_cb->cb = cb;
-	pending_cb->cb_size = cb_size;
-	/* The queue ID MUST be an external queue ID. Otherwise, we will
-	 * have undefined behavior
-	 */
-	pending_cb->hw_queue_id = hw_queue_id;
-
-	spin_lock(&ctx->pending_cb_lock);
-	list_add_tail(&pending_cb->cb_node, &ctx->pending_cb_list);
-	spin_unlock(&ctx->pending_cb_lock);
-
-	return 0;
-}
-
 static int gaudi_restore_sm_registers(struct hl_device *hdev)
 {
 	u64 base_addr;
@@ -6013,7 +6123,7 @@ static int gaudi_restore_user_registers(struct hl_device *hdev)
 
 static int gaudi_context_switch(struct hl_device *hdev, u32 asid)
 {
-	return gaudi_restore_user_registers(hdev);
+	return 0;
 }
 
 static int gaudi_mmu_clear_pgt_range(struct hl_device *hdev)
@@ -6723,6 +6833,9 @@ static void gaudi_mmu_prepare(struct hl_device *hdev, u32 asid)
 				asid);
 	}
 
+	gaudi_mmu_prepare_reg(hdev, mmPSOC_GLOBAL_CONF_TRACE_ARUSER, asid);
+	gaudi_mmu_prepare_reg(hdev, mmPSOC_GLOBAL_CONF_TRACE_AWUSER, asid);
+
 	hdev->asic_funcs->set_clock_gating(hdev);
 
 	mutex_unlock(&gaudi->clk_gate_mutex);
@@ -6772,7 +6885,8 @@ static int gaudi_send_job_on_qman0(struct hl_device *hdev,
 
 	dma_offset = gaudi_dma_assignment[GAUDI_PCI_DMA_1] * DMA_CORE_OFFSET;
 
-	WREG32_OR(mmDMA0_CORE_PROT + dma_offset, BIT(DMA0_CORE_PROT_VAL_SHIFT));
+	WREG32(mmDMA0_CORE_PROT + dma_offset,
+			BIT(DMA0_CORE_PROT_ERR_VAL_SHIFT) | BIT(DMA0_CORE_PROT_VAL_SHIFT));
 
 	rc = hl_hw_queue_send_cb_no_cmpl(hdev, GAUDI_QUEUE_ID_DMA_0_0,
 					job->job_cb_size, cb->bus_address);
@@ -6793,8 +6907,7 @@ static int gaudi_send_job_on_qman0(struct hl_device *hdev,
 	}
 
 free_fence_ptr:
-	WREG32_AND(mmDMA0_CORE_PROT + dma_offset,
-			~BIT(DMA0_CORE_PROT_VAL_SHIFT));
+	WREG32(mmDMA0_CORE_PROT + dma_offset, BIT(DMA0_CORE_PROT_ERR_VAL_SHIFT));
 
 	hdev->asic_funcs->asic_dma_pool_free(hdev, (void *) fence_ptr,
 					fence_dma_addr);
@@ -7168,7 +7281,7 @@ static void gaudi_print_sw_config_stream_data(struct hl_device *hdev, u32 stream
 
 	cq_ptr = (((u64) RREG32(cq_ptr_hi)) << 32) | RREG32(cq_ptr_lo);
 	size = RREG32(cq_tsize);
-	dev_info(hdev->dev, "stop on err: stream: %u, addr: %#llx, size: %x\n",
+	dev_info(hdev->dev, "stop on err: stream: %u, addr: %#llx, size: %u\n",
 							stream, cq_ptr, size);
 }
 
@@ -7224,7 +7337,7 @@ static void gaudi_print_last_pqes_on_err(struct hl_device *hdev, u32 qid_base,
 
 		addr = le64_to_cpu(bd->ptr);
 
-		dev_info(hdev->dev, "stop on err PQE(stream %u): ci: %u, addr: %#llx, size: %x\n",
+		dev_info(hdev->dev, "stop on err PQE(stream %u): ci: %u, addr: %#llx, size: %u\n",
 							stream, ci, addr, len);
 
 		/* get previous ci, wrap if needed */
@@ -7326,24 +7439,30 @@ static void gaudi_print_sm_sei_info(struct hl_device *hdev, u16 event_type,
 {
 	u32 index = event_type - GAUDI_EVENT_DMA_IF_SEI_0;
 
+	/* Flip the bits as the enum is ordered in the opposite way */
+	index = (index ^ 0x3) & 0x3;
+
 	switch (sei_data->sei_cause) {
 	case SM_SEI_SO_OVERFLOW:
-		dev_err(hdev->dev,
-			"SM %u SEI Error: SO %u overflow/underflow",
-			index, le32_to_cpu(sei_data->sei_log));
+		dev_err_ratelimited(hdev->dev,
+			"%s SEI Error: SOB Group %u overflow/underflow",
+			gaudi_sync_manager_names[index],
+			le32_to_cpu(sei_data->sei_log));
 		break;
 	case SM_SEI_LBW_4B_UNALIGNED:
-		dev_err(hdev->dev,
-			"SM %u SEI Error: Unaligned 4B LBW access, monitor agent address low - %#x",
-			index, le32_to_cpu(sei_data->sei_log));
+		dev_err_ratelimited(hdev->dev,
+			"%s SEI Error: Unaligned 4B LBW access, monitor agent address low - %#x",
+			gaudi_sync_manager_names[index],
+			le32_to_cpu(sei_data->sei_log));
 		break;
 	case SM_SEI_AXI_RESPONSE_ERR:
-		dev_err(hdev->dev,
-			"SM %u SEI Error: AXI ID %u response error",
-			index, le32_to_cpu(sei_data->sei_log));
+		dev_err_ratelimited(hdev->dev,
+			"%s SEI Error: AXI ID %u response error",
+			gaudi_sync_manager_names[index],
+			le32_to_cpu(sei_data->sei_log));
 		break;
 	default:
-		dev_err(hdev->dev, "Unknown SM SEI cause %u",
+		dev_err_ratelimited(hdev->dev, "Unknown SM SEI cause %u",
 				le32_to_cpu(sei_data->sei_log));
 		break;
 	}
@@ -7358,6 +7477,11 @@ static void gaudi_handle_ecc_event(struct hl_device *hdev, u16 event_type,
 	bool extract_info_from_fw;
 	int rc;
 
+	if (hdev->asic_prop.fw_security_enabled) {
+		extract_info_from_fw = true;
+		goto extract_ecc_info;
+	}
+
 	switch (event_type) {
 	case GAUDI_EVENT_PCIE_CORE_SERR ... GAUDI_EVENT_PCIE_PHY_DERR:
 	case GAUDI_EVENT_DMA0_SERR_ECC ... GAUDI_EVENT_MMU_DERR:
@@ -7430,6 +7554,7 @@ static void gaudi_handle_ecc_event(struct hl_device *hdev, u16 event_type,
 		return;
 	}
 
+extract_ecc_info:
 	if (extract_info_from_fw) {
 		ecc_address = le64_to_cpu(ecc_data->ecc_address);
 		ecc_syndrom = le64_to_cpu(ecc_data->ecc_syndrom);
@@ -7806,8 +7931,15 @@ static void gaudi_handle_eqe(struct hl_device *hdev,
 	u32 ctl = le32_to_cpu(eq_entry->hdr.ctl);
 	u16 event_type = ((ctl & EQ_CTL_EVENT_TYPE_MASK)
 			>> EQ_CTL_EVENT_TYPE_SHIFT);
-	u8 cause;
 	bool reset_required;
+	u8 cause;
+	int rc;
+
+	if (event_type >= GAUDI_EVENT_SIZE) {
+		dev_err(hdev->dev, "Event type %u exceeds maximum of %u",
+				event_type, GAUDI_EVENT_SIZE - 1);
+		return;
+	}
 
 	gaudi->events_stat[event_type]++;
 	gaudi->events_stat_aggregate[event_type]++;
@@ -7880,10 +8012,10 @@ static void gaudi_handle_eqe(struct hl_device *hdev,
 					tpc_dec_event_to_tpc_id(event_type),
 					"AXI_SLV_DEC_Error");
 		if (reset_required) {
-			dev_err(hdev->dev, "hard reset required due to %s\n",
+			dev_err(hdev->dev, "reset required due to %s\n",
 				gaudi_irq_map_table[event_type].name);
 
-			goto reset_device;
+			hl_device_reset(hdev, 0);
 		} else {
 			hl_fw_unmask_irq(hdev, event_type);
 		}
@@ -7902,10 +8034,10 @@ static void gaudi_handle_eqe(struct hl_device *hdev,
 					tpc_krn_event_to_tpc_id(event_type),
 					"KRN_ERR");
 		if (reset_required) {
-			dev_err(hdev->dev, "hard reset required due to %s\n",
+			dev_err(hdev->dev, "reset required due to %s\n",
 				gaudi_irq_map_table[event_type].name);
 
-			goto reset_device;
+			hl_device_reset(hdev, 0);
 		} else {
 			hl_fw_unmask_irq(hdev, event_type);
 		}
@@ -7993,6 +8125,10 @@ static void gaudi_handle_eqe(struct hl_device *hdev,
 		gaudi_print_irq_info(hdev, event_type, false);
 		gaudi_print_sm_sei_info(hdev, event_type,
 					&eq_entry->sm_sei_data);
+		rc = hl_state_dump(hdev);
+		if (rc)
+			dev_err(hdev->dev,
+				"Error during system state dump %d\n", rc);
 		hl_fw_unmask_irq(hdev, event_type);
 		break;
 
@@ -8031,7 +8167,9 @@ static void gaudi_handle_eqe(struct hl_device *hdev,
 	return;
 
 reset_device:
-	if (hdev->hard_reset_on_fw_events)
+	if (hdev->asic_prop.fw_security_enabled)
+		hl_device_reset(hdev, HL_RESET_HARD | HL_RESET_FW);
+	else if (hdev->hard_reset_on_fw_events)
 		hl_device_reset(hdev, HL_RESET_HARD);
 	else
 		hl_fw_unmask_irq(hdev, event_type);
@@ -8563,11 +8701,20 @@ static void gaudi_internal_cb_pool_fini(struct hl_device *hdev,
 
 static int gaudi_ctx_init(struct hl_ctx *ctx)
 {
+	int rc;
+
 	if (ctx->asid == HL_KERNEL_ASID_ID)
 		return 0;
 
-	gaudi_mmu_prepare(ctx->hdev, ctx->asid);
-	return gaudi_internal_cb_pool_init(ctx->hdev, ctx);
+	rc = gaudi_internal_cb_pool_init(ctx->hdev, ctx);
+	if (rc)
+		return rc;
+
+	rc = gaudi_restore_user_registers(ctx->hdev);
+	if (rc)
+		gaudi_internal_cb_pool_fini(ctx->hdev, ctx);
+
+	return rc;
 }
 
 static void gaudi_ctx_fini(struct hl_ctx *ctx)
@@ -8596,6 +8743,11 @@ static u32 gaudi_get_wait_cb_size(struct hl_device *hdev)
 			sizeof(struct packet_msg_prot) * 2;
 }
 
+static u32 gaudi_get_sob_addr(struct hl_device *hdev, u32 sob_id)
+{
+	return mmSYNC_MNGR_W_S_SYNC_MNGR_OBJS_SOB_OBJ_0 + (sob_id * 4);
+}
+
 static u32 gaudi_gen_signal_cb(struct hl_device *hdev, void *data, u16 sob_id,
 				u32 size, bool eb)
 {
@@ -8902,16 +9054,12 @@ static u32 gaudi_gen_wait_cb(struct hl_device *hdev,
 static void gaudi_reset_sob(struct hl_device *hdev, void *data)
 {
 	struct hl_hw_sob *hw_sob = (struct hl_hw_sob *) data;
-	int rc;
 
 	dev_dbg(hdev->dev, "reset SOB, q_idx: %d, sob_id: %d\n", hw_sob->q_idx,
 		hw_sob->sob_id);
 
-	rc = gaudi_schedule_register_memset(hdev, hw_sob->q_idx,
-			CFG_BASE + mmSYNC_MNGR_W_S_SYNC_MNGR_OBJS_SOB_OBJ_0 +
-			hw_sob->sob_id * 4, 1, 0);
-	if (rc)
-		dev_err(hdev->dev, "failed resetting sob %u", hw_sob->sob_id);
+	WREG32(mmSYNC_MNGR_W_S_SYNC_MNGR_OBJS_SOB_OBJ_0 +
+			hw_sob->sob_id * 4, 0);
 
 	kref_init(&hw_sob->kref);
 }
@@ -8977,6 +9125,280 @@ static int gaudi_map_pll_idx_to_fw_idx(u32 pll_idx)
 	}
 }
 
+static int gaudi_add_sync_to_engine_map_entry(
+	struct hl_sync_to_engine_map *map, u32 reg_value,
+	enum hl_sync_engine_type engine_type, u32 engine_id)
+{
+	struct hl_sync_to_engine_map_entry *entry;
+
+	/* Reg value represents a partial address of sync object,
+	 * it is used as unique identifier. For this we need to
+	 * clear the cutoff cfg base bits from the value.
+	 */
+	if (reg_value == 0 || reg_value == 0xffffffff)
+		return 0;
+	reg_value -= (u32)CFG_BASE;
+
+	/* create a new hash entry */
+	entry = kzalloc(sizeof(*entry), GFP_KERNEL);
+	if (!entry)
+		return -ENOMEM;
+	entry->engine_type = engine_type;
+	entry->engine_id = engine_id;
+	entry->sync_id = reg_value;
+	hash_add(map->tb, &entry->node, reg_value);
+
+	return 0;
+}
+
+static int gaudi_gen_sync_to_engine_map(struct hl_device *hdev,
+				struct hl_sync_to_engine_map *map)
+{
+	struct hl_state_dump_specs *sds = &hdev->state_dump_specs;
+	struct gaudi_device *gaudi = hdev->asic_specific;
+	int i, j, rc;
+	u32 reg_value;
+
+	/* Iterate over TPC engines */
+	for (i = 0; i < sds->props[SP_NUM_OF_TPC_ENGINES]; ++i) {
+		/* TPC registered must be accessed with clock gating disabled */
+		mutex_lock(&gaudi->clk_gate_mutex);
+		hdev->asic_funcs->disable_clock_gating(hdev);
+
+		reg_value = RREG32(sds->props[SP_TPC0_CFG_SO] +
+					sds->props[SP_NEXT_TPC] * i);
+
+		/* We can reenable clock_gating */
+		hdev->asic_funcs->set_clock_gating(hdev);
+		mutex_unlock(&gaudi->clk_gate_mutex);
+
+		rc = gaudi_add_sync_to_engine_map_entry(map, reg_value,
+							ENGINE_TPC, i);
+		if (rc)
+			goto free_sync_to_engine_map;
+	}
+
+	/* Iterate over MME engines */
+	for (i = 0; i < sds->props[SP_NUM_OF_MME_ENGINES]; ++i) {
+		for (j = 0; j < sds->props[SP_SUB_MME_ENG_NUM]; ++j) {
+			/* MME registered must be accessed with clock gating
+			 * disabled
+			 */
+			mutex_lock(&gaudi->clk_gate_mutex);
+			hdev->asic_funcs->disable_clock_gating(hdev);
+
+			reg_value = RREG32(sds->props[SP_MME_CFG_SO] +
+						sds->props[SP_NEXT_MME] * i +
+						j * sizeof(u32));
+
+			/* We can reenable clock_gating */
+			hdev->asic_funcs->set_clock_gating(hdev);
+			mutex_unlock(&gaudi->clk_gate_mutex);
+
+			rc = gaudi_add_sync_to_engine_map_entry(
+				map, reg_value, ENGINE_MME,
+				i * sds->props[SP_SUB_MME_ENG_NUM] + j);
+			if (rc)
+				goto free_sync_to_engine_map;
+		}
+	}
+
+	/* Iterate over DMA engines */
+	for (i = 0; i < sds->props[SP_NUM_OF_DMA_ENGINES]; ++i) {
+		reg_value = RREG32(sds->props[SP_DMA_CFG_SO] +
+					sds->props[SP_DMA_QUEUES_OFFSET] * i);
+		rc = gaudi_add_sync_to_engine_map_entry(map, reg_value,
+							ENGINE_DMA, i);
+		if (rc)
+			goto free_sync_to_engine_map;
+	}
+
+	return 0;
+
+free_sync_to_engine_map:
+	hl_state_dump_free_sync_to_engine_map(map);
+
+	return rc;
+}
+
+static int gaudi_monitor_valid(struct hl_mon_state_dump *mon)
+{
+	return FIELD_GET(
+		SYNC_MNGR_W_S_SYNC_MNGR_OBJS_MON_STATUS_0_VALID_MASK,
+		mon->status);
+}
+
+static void gaudi_fill_sobs_from_mon(char *sobs, struct hl_mon_state_dump *mon)
+{
+	const size_t max_write = 10;
+	u32 gid, mask, sob;
+	int i, offset;
+
+	/* Sync object ID is calculated as follows:
+	 * (8 * group_id + cleared bits in mask)
+	 */
+	gid = FIELD_GET(SYNC_MNGR_W_S_SYNC_MNGR_OBJS_MON_ARM_0_SID_MASK,
+			mon->arm_data);
+	mask = FIELD_GET(SYNC_MNGR_W_S_SYNC_MNGR_OBJS_MON_ARM_0_MASK_MASK,
+			mon->arm_data);
+
+	for (i = 0, offset = 0; mask && offset < MONITOR_SOB_STRING_SIZE -
+		max_write; mask >>= 1, i++) {
+		if (!(mask & 1)) {
+			sob = gid * MONITOR_MAX_SOBS + i;
+
+			if (offset > 0)
+				offset += snprintf(sobs + offset, max_write,
+							", ");
+
+			offset += snprintf(sobs + offset, max_write, "%u", sob);
+		}
+	}
+}
+
+static int gaudi_print_single_monitor(char **buf, size_t *size, size_t *offset,
+				struct hl_device *hdev,
+				struct hl_mon_state_dump *mon)
+{
+	const char *name;
+	char scratch_buf1[BIN_REG_STRING_SIZE],
+		scratch_buf2[BIN_REG_STRING_SIZE];
+	char monitored_sobs[MONITOR_SOB_STRING_SIZE] = {0};
+
+	name = hl_state_dump_get_monitor_name(hdev, mon);
+	if (!name)
+		name = "";
+
+	gaudi_fill_sobs_from_mon(monitored_sobs, mon);
+
+	return hl_snprintf_resize(
+		buf, size, offset,
+		"Mon id: %u%s, wait for group id: %u mask %s to reach val: %u and write %u to address 0x%llx. Pending: %s. Means sync objects [%s] are being monitored.",
+		mon->id, name,
+		FIELD_GET(SYNC_MNGR_W_S_SYNC_MNGR_OBJS_MON_ARM_0_SID_MASK,
+				mon->arm_data),
+		hl_format_as_binary(
+			scratch_buf1, sizeof(scratch_buf1),
+			FIELD_GET(
+				SYNC_MNGR_W_S_SYNC_MNGR_OBJS_MON_ARM_0_MASK_MASK,
+				mon->arm_data)),
+		FIELD_GET(SYNC_MNGR_W_S_SYNC_MNGR_OBJS_MON_ARM_0_SOD_MASK,
+				mon->arm_data),
+		mon->wr_data,
+		(((u64)mon->wr_addr_high) << 32) | mon->wr_addr_low,
+		hl_format_as_binary(
+			scratch_buf2, sizeof(scratch_buf2),
+			FIELD_GET(
+				SYNC_MNGR_W_S_SYNC_MNGR_OBJS_MON_STATUS_0_PENDING_MASK,
+				mon->status)),
+		monitored_sobs);
+}
+
+
+static int gaudi_print_fences_single_engine(
+	struct hl_device *hdev, u64 base_offset, u64 status_base_offset,
+	enum hl_sync_engine_type engine_type, u32 engine_id, char **buf,
+	size_t *size, size_t *offset)
+{
+	struct hl_state_dump_specs *sds = &hdev->state_dump_specs;
+	int rc = -ENOMEM, i;
+	u32 *statuses, *fences;
+
+	statuses = kcalloc(sds->props[SP_ENGINE_NUM_OF_QUEUES],
+			sizeof(*statuses), GFP_KERNEL);
+	if (!statuses)
+		goto out;
+
+	fences = kcalloc(sds->props[SP_ENGINE_NUM_OF_FENCES] *
+				sds->props[SP_ENGINE_NUM_OF_QUEUES],
+			 sizeof(*fences), GFP_KERNEL);
+	if (!fences)
+		goto free_status;
+
+	for (i = 0; i < sds->props[SP_ENGINE_NUM_OF_FENCES]; ++i)
+		statuses[i] = RREG32(status_base_offset + i * sizeof(u32));
+
+	for (i = 0; i < sds->props[SP_ENGINE_NUM_OF_FENCES] *
+				sds->props[SP_ENGINE_NUM_OF_QUEUES]; ++i)
+		fences[i] = RREG32(base_offset + i * sizeof(u32));
+
+	/* The actual print */
+	for (i = 0; i < sds->props[SP_ENGINE_NUM_OF_QUEUES]; ++i) {
+		u32 fence_id;
+		u64 fence_cnt, fence_rdata;
+		const char *engine_name;
+
+		if (!FIELD_GET(TPC0_QM_CP_STS_0_FENCE_IN_PROGRESS_MASK,
+			statuses[i]))
+			continue;
+
+		fence_id =
+			FIELD_GET(TPC0_QM_CP_STS_0_FENCE_ID_MASK, statuses[i]);
+		fence_cnt = base_offset + CFG_BASE +
+			sizeof(u32) *
+			(i + fence_id * sds->props[SP_ENGINE_NUM_OF_QUEUES]);
+		fence_rdata = fence_cnt - sds->props[SP_FENCE0_CNT_OFFSET] +
+				sds->props[SP_FENCE0_RDATA_OFFSET];
+		engine_name = hl_sync_engine_to_string(engine_type);
+
+		rc = hl_snprintf_resize(
+			buf, size, offset,
+			"%s%u, stream %u: fence id %u cnt = 0x%llx (%s%u_QM.CP_FENCE%u_CNT_%u) rdata = 0x%llx (%s%u_QM.CP_FENCE%u_RDATA_%u) value = %u, cp_status = %u\n",
+			engine_name, engine_id,
+			i, fence_id,
+			fence_cnt, engine_name, engine_id, fence_id, i,
+			fence_rdata, engine_name, engine_id, fence_id, i,
+			fences[fence_id],
+			statuses[i]);
+		if (rc)
+			goto free_fences;
+	}
+
+	rc = 0;
+
+free_fences:
+	kfree(fences);
+free_status:
+	kfree(statuses);
+out:
+	return rc;
+}
+
+
+static struct hl_state_dump_specs_funcs gaudi_state_dump_funcs = {
+	.monitor_valid = gaudi_monitor_valid,
+	.print_single_monitor = gaudi_print_single_monitor,
+	.gen_sync_to_engine_map = gaudi_gen_sync_to_engine_map,
+	.print_fences_single_engine = gaudi_print_fences_single_engine,
+};
+
+static void gaudi_state_dump_init(struct hl_device *hdev)
+{
+	struct hl_state_dump_specs *sds = &hdev->state_dump_specs;
+	int i;
+
+	for (i = 0; i < ARRAY_SIZE(gaudi_so_id_to_str); ++i)
+		hash_add(sds->so_id_to_str_tb,
+			&gaudi_so_id_to_str[i].node,
+			gaudi_so_id_to_str[i].id);
+
+	for (i = 0; i < ARRAY_SIZE(gaudi_monitor_id_to_str); ++i)
+		hash_add(sds->monitor_id_to_str_tb,
+			&gaudi_monitor_id_to_str[i].node,
+			gaudi_monitor_id_to_str[i].id);
+
+	sds->props = gaudi_state_dump_specs_props;
+
+	sds->sync_namager_names = gaudi_sync_manager_names;
+
+	sds->funcs = gaudi_state_dump_funcs;
+}
+
+static u32 *gaudi_get_stream_master_qid_arr(void)
+{
+	return gaudi_stream_master;
+}
+
 static const struct hl_asic_funcs gaudi_funcs = {
 	.early_init = gaudi_early_init,
 	.early_fini = gaudi_early_fini,
@@ -8989,7 +9411,7 @@ static const struct hl_asic_funcs gaudi_funcs = {
 	.halt_engines = gaudi_halt_engines,
 	.suspend = gaudi_suspend,
 	.resume = gaudi_resume,
-	.cb_mmap = gaudi_cb_mmap,
+	.mmap = gaudi_mmap,
 	.ring_doorbell = gaudi_ring_doorbell,
 	.pqe_write = gaudi_pqe_write,
 	.asic_dma_alloc_coherent = gaudi_dma_alloc_coherent,
@@ -9062,7 +9484,11 @@ static const struct hl_asic_funcs gaudi_funcs = {
 	.enable_events_from_fw = gaudi_enable_events_from_fw,
 	.map_pll_idx_to_fw_idx = gaudi_map_pll_idx_to_fw_idx,
 	.init_firmware_loader = gaudi_init_firmware_loader,
-	.init_cpu_scrambler_dram = gaudi_init_scrambler_hbm
+	.init_cpu_scrambler_dram = gaudi_init_scrambler_hbm,
+	.state_dump_init = gaudi_state_dump_init,
+	.get_sob_addr = gaudi_get_sob_addr,
+	.set_pci_memory_regions = gaudi_set_pci_memory_regions,
+	.get_stream_master_qid_arr = gaudi_get_stream_master_qid_arr
 };
 
 /**
diff --git a/drivers/misc/habanalabs/gaudi/gaudiP.h b/drivers/misc/habanalabs/gaudi/gaudiP.h
index 957bf3720f70..bbbf1c343e75 100644
--- a/drivers/misc/habanalabs/gaudi/gaudiP.h
+++ b/drivers/misc/habanalabs/gaudi/gaudiP.h
@@ -36,6 +36,8 @@
 #define NUMBER_OF_INTERRUPTS		(NUMBER_OF_CMPLT_QUEUES + \
 						NUMBER_OF_CPU_HW_QUEUES)
 
+#define GAUDI_STREAM_MASTER_ARR_SIZE	8
+
 #if (NUMBER_OF_INTERRUPTS > GAUDI_MSI_ENTRIES)
 #error "Number of MSI interrupts must be smaller or equal to GAUDI_MSI_ENTRIES"
 #endif
@@ -50,6 +52,8 @@
 #define DC_POWER_DEFAULT_PCI		60000		/* 60W */
 #define DC_POWER_DEFAULT_PMC		60000		/* 60W */
 
+#define DC_POWER_DEFAULT_PMC_SEC	97000		/* 97W */
+
 #define GAUDI_CPU_TIMEOUT_USEC		30000000	/* 30s */
 
 #define TPC_ENABLED_MASK		0xFF
@@ -62,7 +66,7 @@
 
 #define DMA_MAX_TRANSFER_SIZE		U32_MAX
 
-#define GAUDI_DEFAULT_CARD_NAME		"HL2000"
+#define GAUDI_DEFAULT_CARD_NAME		"HL205"
 
 #define GAUDI_MAX_PENDING_CS		SZ_16K
 
@@ -117,6 +121,7 @@
 	(((mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_MON_STATUS_511 - \
 	mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_MON_STATUS_0) + 4) >> 2)
 
+#define MONITOR_MAX_SOBS	8
 
 /* DRAM Memory Map */
 
@@ -200,6 +205,18 @@
 #define HW_CAP_TPC_MASK		GENMASK(31, 24)
 #define HW_CAP_TPC_SHIFT	24
 
+#define NEXT_SYNC_OBJ_ADDR_INTERVAL \
+	(mmSYNC_MNGR_W_N_SYNC_MNGR_OBJS_SOB_OBJ_0 - \
+	 mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_SOB_OBJ_0)
+#define NUM_OF_MME_ENGINES			2
+#define NUM_OF_MME_SUB_ENGINES		2
+#define NUM_OF_TPC_ENGINES			8
+#define NUM_OF_DMA_ENGINES			8
+#define NUM_OF_QUEUES				5
+#define NUM_OF_STREAMS				4
+#define NUM_OF_FENCES				4
+
+
 #define GAUDI_CPU_PCI_MSB_ADDR(addr)	(((addr) & GENMASK_ULL(49, 39)) >> 39)
 #define GAUDI_PCI_TO_CPU_ADDR(addr)			\
 	do {						\
diff --git a/drivers/misc/habanalabs/gaudi/gaudi_coresight.c b/drivers/misc/habanalabs/gaudi/gaudi_coresight.c
index c2a27ed1c4d1..5349c1be13f9 100644
--- a/drivers/misc/habanalabs/gaudi/gaudi_coresight.c
+++ b/drivers/misc/habanalabs/gaudi/gaudi_coresight.c
@@ -622,11 +622,6 @@ static int gaudi_config_etr(struct hl_device *hdev,
 			return -EINVAL;
 		}
 
-		gaudi_mmu_prepare_reg(hdev, mmPSOC_GLOBAL_CONF_TRACE_ARUSER,
-						hdev->compute_ctx->asid);
-		gaudi_mmu_prepare_reg(hdev, mmPSOC_GLOBAL_CONF_TRACE_AWUSER,
-						hdev->compute_ctx->asid);
-
 		msb = upper_32_bits(input->buffer_address) >> 8;
 		msb &= PSOC_GLOBAL_CONF_TRACE_ADDR_MSB_MASK;
 		WREG32(mmPSOC_GLOBAL_CONF_TRACE_ADDR, msb);
diff --git a/drivers/misc/habanalabs/gaudi/gaudi_security.c b/drivers/misc/habanalabs/gaudi/gaudi_security.c
index 0d3240f1f7d7..cb265c00cf73 100644
--- a/drivers/misc/habanalabs/gaudi/gaudi_security.c
+++ b/drivers/misc/habanalabs/gaudi/gaudi_security.c
@@ -9559,6 +9559,7 @@ static void gaudi_init_tpc_protection_bits(struct hl_device *hdev)
 	mask |= 1U << ((mmTPC0_CFG_CFG_BASE_ADDRESS_HIGH & 0x7F) >> 2);
 	mask |= 1U << ((mmTPC0_CFG_CFG_SUBTRACT_VALUE & 0x7F) >> 2);
 	mask |= 1U << ((mmTPC0_CFG_TPC_STALL & 0x7F) >> 2);
+	mask |= 1U << ((mmTPC0_CFG_ICACHE_BASE_ADDERESS_HIGH & 0x7F) >> 2);
 	mask |= 1U << ((mmTPC0_CFG_RD_RATE_LIMIT & 0x7F) >> 2);
 	mask |= 1U << ((mmTPC0_CFG_WR_RATE_LIMIT & 0x7F) >> 2);
 	mask |= 1U << ((mmTPC0_CFG_MSS_CONFIG & 0x7F) >> 2);
@@ -10013,6 +10014,7 @@ static void gaudi_init_tpc_protection_bits(struct hl_device *hdev)
 	mask |= 1U << ((mmTPC1_CFG_CFG_BASE_ADDRESS_HIGH & 0x7F) >> 2);
 	mask |= 1U << ((mmTPC1_CFG_CFG_SUBTRACT_VALUE & 0x7F) >> 2);
 	mask |= 1U << ((mmTPC1_CFG_TPC_STALL & 0x7F) >> 2);
+	mask |= 1U << ((mmTPC1_CFG_ICACHE_BASE_ADDERESS_HIGH & 0x7F) >> 2);
 	mask |= 1U << ((mmTPC1_CFG_RD_RATE_LIMIT & 0x7F) >> 2);
 	mask |= 1U << ((mmTPC1_CFG_WR_RATE_LIMIT & 0x7F) >> 2);
 	mask |= 1U << ((mmTPC1_CFG_MSS_CONFIG & 0x7F) >> 2);
@@ -10466,6 +10468,7 @@ static void gaudi_init_tpc_protection_bits(struct hl_device *hdev)
 	mask |= 1U << ((mmTPC2_CFG_CFG_BASE_ADDRESS_HIGH & 0x7F) >> 2);
 	mask |= 1U << ((mmTPC2_CFG_CFG_SUBTRACT_VALUE & 0x7F) >> 2);
 	mask |= 1U << ((mmTPC2_CFG_TPC_STALL & 0x7F) >> 2);
+	mask |= 1U << ((mmTPC2_CFG_ICACHE_BASE_ADDERESS_HIGH & 0x7F) >> 2);
 	mask |= 1U << ((mmTPC2_CFG_RD_RATE_LIMIT & 0x7F) >> 2);
 	mask |= 1U << ((mmTPC2_CFG_WR_RATE_LIMIT & 0x7F) >> 2);
 	mask |= 1U << ((mmTPC2_CFG_MSS_CONFIG & 0x7F) >> 2);
@@ -10919,6 +10922,7 @@ static void gaudi_init_tpc_protection_bits(struct hl_device *hdev)
 	mask |= 1U << ((mmTPC3_CFG_CFG_BASE_ADDRESS_HIGH & 0x7F) >> 2);
 	mask |= 1U << ((mmTPC3_CFG_CFG_SUBTRACT_VALUE & 0x7F) >> 2);
 	mask |= 1U << ((mmTPC3_CFG_TPC_STALL & 0x7F) >> 2);
+	mask |= 1U << ((mmTPC3_CFG_ICACHE_BASE_ADDERESS_HIGH & 0x7F) >> 2);
 	mask |= 1U << ((mmTPC3_CFG_RD_RATE_LIMIT & 0x7F) >> 2);
 	mask |= 1U << ((mmTPC3_CFG_WR_RATE_LIMIT & 0x7F) >> 2);
 	mask |= 1U << ((mmTPC3_CFG_MSS_CONFIG & 0x7F) >> 2);
@@ -11372,6 +11376,7 @@ static void gaudi_init_tpc_protection_bits(struct hl_device *hdev)
 	mask |= 1U << ((mmTPC4_CFG_CFG_BASE_ADDRESS_HIGH & 0x7F) >> 2);
 	mask |= 1U << ((mmTPC4_CFG_CFG_SUBTRACT_VALUE & 0x7F) >> 2);
 	mask |= 1U << ((mmTPC4_CFG_TPC_STALL & 0x7F) >> 2);
+	mask |= 1U << ((mmTPC4_CFG_ICACHE_BASE_ADDERESS_HIGH & 0x7F) >> 2);
 	mask |= 1U << ((mmTPC4_CFG_RD_RATE_LIMIT & 0x7F) >> 2);
 	mask |= 1U << ((mmTPC4_CFG_WR_RATE_LIMIT & 0x7F) >> 2);
 	mask |= 1U << ((mmTPC4_CFG_MSS_CONFIG & 0x7F) >> 2);
@@ -11825,6 +11830,7 @@ static void gaudi_init_tpc_protection_bits(struct hl_device *hdev)
 	mask |= 1U << ((mmTPC5_CFG_CFG_BASE_ADDRESS_HIGH & 0x7F) >> 2);
 	mask |= 1U << ((mmTPC5_CFG_CFG_SUBTRACT_VALUE & 0x7F) >> 2);
 	mask |= 1U << ((mmTPC5_CFG_TPC_STALL & 0x7F) >> 2);
+	mask |= 1U << ((mmTPC5_CFG_ICACHE_BASE_ADDERESS_HIGH & 0x7F) >> 2);
 	mask |= 1U << ((mmTPC5_CFG_RD_RATE_LIMIT & 0x7F) >> 2);
 	mask |= 1U << ((mmTPC5_CFG_WR_RATE_LIMIT & 0x7F) >> 2);
 	mask |= 1U << ((mmTPC5_CFG_MSS_CONFIG & 0x7F) >> 2);
@@ -12280,6 +12286,7 @@ static void gaudi_init_tpc_protection_bits(struct hl_device *hdev)
 	mask |= 1U << ((mmTPC6_CFG_CFG_BASE_ADDRESS_HIGH & 0x7F) >> 2);
 	mask |= 1U << ((mmTPC6_CFG_CFG_SUBTRACT_VALUE & 0x7F) >> 2);
 	mask |= 1U << ((mmTPC6_CFG_TPC_STALL & 0x7F) >> 2);
+	mask |= 1U << ((mmTPC6_CFG_ICACHE_BASE_ADDERESS_HIGH & 0x7F) >> 2);
 	mask |= 1U << ((mmTPC6_CFG_RD_RATE_LIMIT & 0x7F) >> 2);
 	mask |= 1U << ((mmTPC6_CFG_WR_RATE_LIMIT & 0x7F) >> 2);
 	mask |= 1U << ((mmTPC6_CFG_MSS_CONFIG & 0x7F) >> 2);
@@ -12735,6 +12742,7 @@ static void gaudi_init_tpc_protection_bits(struct hl_device *hdev)
 	mask |= 1U << ((mmTPC7_CFG_CFG_BASE_ADDRESS_HIGH & 0x7F) >> 2);
 	mask |= 1U << ((mmTPC7_CFG_CFG_SUBTRACT_VALUE & 0x7F) >> 2);
 	mask |= 1U << ((mmTPC7_CFG_TPC_STALL & 0x7F) >> 2);
+	mask |= 1U << ((mmTPC7_CFG_ICACHE_BASE_ADDERESS_HIGH & 0x7F) >> 2);
 	mask |= 1U << ((mmTPC7_CFG_RD_RATE_LIMIT & 0x7F) >> 2);
 	mask |= 1U << ((mmTPC7_CFG_WR_RATE_LIMIT & 0x7F) >> 2);
 	mask |= 1U << ((mmTPC7_CFG_MSS_CONFIG & 0x7F) >> 2);
diff --git a/drivers/misc/habanalabs/goya/goya.c b/drivers/misc/habanalabs/goya/goya.c
index 755e08cf2ecc..031c1849da14 100644
--- a/drivers/misc/habanalabs/goya/goya.c
+++ b/drivers/misc/habanalabs/goya/goya.c
@@ -350,6 +350,8 @@ static u32 goya_all_events[] = {
 	GOYA_ASYNC_EVENT_ID_FIX_THERMAL_ENV_E
 };
 
+static s64 goya_state_dump_specs_props[SP_MAX] = {0};
+
 static int goya_mmu_clear_pgt_range(struct hl_device *hdev);
 static int goya_mmu_set_dram_default_page(struct hl_device *hdev);
 static int goya_mmu_add_mappings_for_device_cpu(struct hl_device *hdev);
@@ -387,6 +389,7 @@ int goya_set_fixed_properties(struct hl_device *hdev)
 		prop->hw_queues_props[i].cb_alloc_flags = CB_ALLOC_USER;
 	}
 
+	prop->device_dma_offset_for_host_access = HOST_PHYS_BASE;
 	prop->completion_queues_count = NUMBER_OF_CMPLT_QUEUES;
 
 	prop->dram_base_address = DRAM_PHYS_BASE;
@@ -466,6 +469,8 @@ int goya_set_fixed_properties(struct hl_device *hdev)
 	prop->hard_reset_done_by_fw = false;
 	prop->gic_interrupts_enable = true;
 
+	prop->server_type = HL_SERVER_TYPE_UNKNOWN;
+
 	return 0;
 }
 
@@ -649,14 +654,14 @@ pci_init:
 					GOYA_BOOT_FIT_REQ_TIMEOUT_USEC);
 	if (rc) {
 		if (hdev->reset_on_preboot_fail)
-			hdev->asic_funcs->hw_fini(hdev, true);
+			hdev->asic_funcs->hw_fini(hdev, true, false);
 		goto pci_fini;
 	}
 
 	if (goya_get_hw_state(hdev) == HL_DEVICE_HW_STATE_DIRTY) {
 		dev_info(hdev->dev,
 			"H/W state is dirty, must reset before initializing\n");
-		hdev->asic_funcs->hw_fini(hdev, true);
+		hdev->asic_funcs->hw_fini(hdev, true, false);
 	}
 
 	if (!hdev->pldm) {
@@ -955,8 +960,9 @@ static int goya_sw_init(struct hl_device *hdev)
 	hdev->supports_coresight = true;
 	hdev->supports_soft_reset = true;
 	hdev->allow_external_soft_reset = true;
+	hdev->supports_wait_for_multi_cs = false;
 
-	goya_set_pci_memory_regions(hdev);
+	hdev->asic_funcs->set_pci_memory_regions(hdev);
 
 	return 0;
 
@@ -2374,7 +2380,7 @@ static void goya_disable_timestamp(struct hl_device *hdev)
 	WREG32(mmPSOC_TIMESTAMP_BASE - CFG_BASE, 0);
 }
 
-static void goya_halt_engines(struct hl_device *hdev, bool hard_reset)
+static void goya_halt_engines(struct hl_device *hdev, bool hard_reset, bool fw_reset)
 {
 	u32 wait_timeout_ms;
 
@@ -2493,6 +2499,7 @@ static void goya_init_firmware_loader(struct hl_device *hdev)
 	struct fw_load_mgr *fw_loader = &hdev->fw_loader;
 
 	/* fill common fields */
+	fw_loader->linux_loaded = false;
 	fw_loader->boot_fit_img.image_name = GOYA_BOOT_FIT_FILE;
 	fw_loader->linux_img.image_name = GOYA_LINUX_FW_FILE;
 	fw_loader->cpu_timeout = GOYA_CPU_TIMEOUT_USEC;
@@ -2696,14 +2703,7 @@ disable_queues:
 	return rc;
 }
 
-/*
- * goya_hw_fini - Goya hardware tear-down code
- *
- * @hdev: pointer to hl_device structure
- * @hard_reset: should we do hard reset to all engines or just reset the
- *              compute/dma engines
- */
-static void goya_hw_fini(struct hl_device *hdev, bool hard_reset)
+static void goya_hw_fini(struct hl_device *hdev, bool hard_reset, bool fw_reset)
 {
 	struct goya_device *goya = hdev->asic_specific;
 	u32 reset_timeout_ms, cpu_timeout_ms, status;
@@ -2796,7 +2796,7 @@ int goya_resume(struct hl_device *hdev)
 	return goya_init_iatu(hdev);
 }
 
-static int goya_cb_mmap(struct hl_device *hdev, struct vm_area_struct *vma,
+static int goya_mmap(struct hl_device *hdev, struct vm_area_struct *vma,
 			void *cpu_addr, dma_addr_t dma_addr, size_t size)
 {
 	int rc;
@@ -4797,6 +4797,12 @@ void goya_handle_eqe(struct hl_device *hdev, struct hl_eq_entry *eq_entry)
 				>> EQ_CTL_EVENT_TYPE_SHIFT);
 	struct goya_device *goya = hdev->asic_specific;
 
+	if (event_type >= GOYA_ASYNC_EVENT_ID_SIZE) {
+		dev_err(hdev->dev, "Event type %u exceeds maximum of %u",
+				event_type, GOYA_ASYNC_EVENT_ID_SIZE - 1);
+		return;
+	}
+
 	goya->events_stat[event_type]++;
 	goya->events_stat_aggregate[event_type]++;
 
@@ -5475,14 +5481,14 @@ u64 goya_get_device_time(struct hl_device *hdev)
 	return device_time | RREG32(mmPSOC_TIMESTAMP_CNTCVL);
 }
 
-static void goya_collective_wait_init_cs(struct hl_cs *cs)
+static int goya_collective_wait_init_cs(struct hl_cs *cs)
 {
-
+	return 0;
 }
 
 static int goya_collective_wait_create_jobs(struct hl_device *hdev,
 		struct hl_ctx *ctx, struct hl_cs *cs, u32 wait_queue_id,
-		u32 collective_engine_id)
+		u32 collective_engine_id, u32 encaps_signal_offset)
 {
 	return -EINVAL;
 }
@@ -5524,6 +5530,62 @@ static int goya_map_pll_idx_to_fw_idx(u32 pll_idx)
 	}
 }
 
+static int goya_gen_sync_to_engine_map(struct hl_device *hdev,
+				struct hl_sync_to_engine_map *map)
+{
+	/* Not implemented */
+	return 0;
+}
+
+static int goya_monitor_valid(struct hl_mon_state_dump *mon)
+{
+	/* Not implemented */
+	return 0;
+}
+
+static int goya_print_single_monitor(char **buf, size_t *size, size_t *offset,
+				struct hl_device *hdev,
+				struct hl_mon_state_dump *mon)
+{
+	/* Not implemented */
+	return 0;
+}
+
+
+static int goya_print_fences_single_engine(
+	struct hl_device *hdev, u64 base_offset, u64 status_base_offset,
+	enum hl_sync_engine_type engine_type, u32 engine_id, char **buf,
+	size_t *size, size_t *offset)
+{
+	/* Not implemented */
+	return 0;
+}
+
+
+static struct hl_state_dump_specs_funcs goya_state_dump_funcs = {
+	.monitor_valid = goya_monitor_valid,
+	.print_single_monitor = goya_print_single_monitor,
+	.gen_sync_to_engine_map = goya_gen_sync_to_engine_map,
+	.print_fences_single_engine = goya_print_fences_single_engine,
+};
+
+static void goya_state_dump_init(struct hl_device *hdev)
+{
+	/* Not implemented */
+	hdev->state_dump_specs.props = goya_state_dump_specs_props;
+	hdev->state_dump_specs.funcs = goya_state_dump_funcs;
+}
+
+static u32 goya_get_sob_addr(struct hl_device *hdev, u32 sob_id)
+{
+	return 0;
+}
+
+static u32 *goya_get_stream_master_qid_arr(void)
+{
+	return NULL;
+}
+
 static const struct hl_asic_funcs goya_funcs = {
 	.early_init = goya_early_init,
 	.early_fini = goya_early_fini,
@@ -5536,7 +5598,7 @@ static const struct hl_asic_funcs goya_funcs = {
 	.halt_engines = goya_halt_engines,
 	.suspend = goya_suspend,
 	.resume = goya_resume,
-	.cb_mmap = goya_cb_mmap,
+	.mmap = goya_mmap,
 	.ring_doorbell = goya_ring_doorbell,
 	.pqe_write = goya_pqe_write,
 	.asic_dma_alloc_coherent = goya_dma_alloc_coherent,
@@ -5609,7 +5671,11 @@ static const struct hl_asic_funcs goya_funcs = {
 	.enable_events_from_fw = goya_enable_events_from_fw,
 	.map_pll_idx_to_fw_idx = goya_map_pll_idx_to_fw_idx,
 	.init_firmware_loader = goya_init_firmware_loader,
-	.init_cpu_scrambler_dram = goya_cpu_init_scrambler_dram
+	.init_cpu_scrambler_dram = goya_cpu_init_scrambler_dram,
+	.state_dump_init = goya_state_dump_init,
+	.get_sob_addr = &goya_get_sob_addr,
+	.set_pci_memory_regions = goya_set_pci_memory_regions,
+	.get_stream_master_qid_arr = goya_get_stream_master_qid_arr,
 };
 
 /*
diff --git a/drivers/misc/habanalabs/include/common/cpucp_if.h b/drivers/misc/habanalabs/include/common/cpucp_if.h
index 80b1d5a9d9f1..9ff6a448f0d4 100644
--- a/drivers/misc/habanalabs/include/common/cpucp_if.h
+++ b/drivers/misc/habanalabs/include/common/cpucp_if.h
@@ -98,6 +98,18 @@ struct hl_eq_fw_alive {
 	__u8 pad[7];
 };
 
+enum hl_pcie_addr_dec_cause {
+	PCIE_ADDR_DEC_HBW_ERR_RESP,
+	PCIE_ADDR_DEC_LBW_ERR_RESP,
+	PCIE_ADDR_DEC_TLP_BLOCKED_BY_RR
+};
+
+struct hl_eq_pcie_addr_dec_data {
+	/* enum hl_pcie_addr_dec_cause */
+	__u8 addr_dec_cause;
+	__u8 pad[7];
+};
+
 struct hl_eq_entry {
 	struct hl_eq_header hdr;
 	union {
@@ -106,6 +118,7 @@ struct hl_eq_entry {
 		struct hl_eq_sm_sei_data sm_sei_data;
 		struct cpucp_pkt_sync_err pkt_sync_err;
 		struct hl_eq_fw_alive fw_alive;
+		struct hl_eq_pcie_addr_dec_data pcie_addr_dec_data;
 		__le64 data[7];
 	};
 };
@@ -116,7 +129,7 @@ struct hl_eq_entry {
 #define EQ_CTL_READY_MASK		0x80000000
 
 #define EQ_CTL_EVENT_TYPE_SHIFT		16
-#define EQ_CTL_EVENT_TYPE_MASK		0x03FF0000
+#define EQ_CTL_EVENT_TYPE_MASK		0x0FFF0000
 
 #define EQ_CTL_INDEX_SHIFT		0
 #define EQ_CTL_INDEX_MASK		0x0000FFFF
@@ -300,7 +313,7 @@ enum pq_init_status {
  *       The packet's arguments specify the desired sensor and the field to
  *       set.
  *
- * CPUCP_PACKET_PCIE_THROUGHPUT_GET
+ * CPUCP_PACKET_PCIE_THROUGHPUT_GET -
  *       Get throughput of PCIe.
  *       The packet's arguments specify the transaction direction (TX/RX).
  *       The window measurement is 10[msec], and the return value is in KB/sec.
@@ -309,19 +322,19 @@ enum pq_init_status {
  *       Replay count measures number of "replay" events, which is basicly
  *       number of retries done by PCIe.
  *
- * CPUCP_PACKET_TOTAL_ENERGY_GET
+ * CPUCP_PACKET_TOTAL_ENERGY_GET -
  *       Total Energy is measurement of energy from the time FW Linux
  *       is loaded. It is calculated by multiplying the average power
  *       by time (passed from armcp start). The units are in MilliJouls.
  *
- * CPUCP_PACKET_PLL_INFO_GET
+ * CPUCP_PACKET_PLL_INFO_GET -
  *       Fetch frequencies of PLL from the required PLL IP.
  *       The packet's arguments specify the device PLL type
  *       Pll type is the PLL from device pll_index enum.
  *       The result is composed of 4 outputs, each is 16-bit
  *       frequency in MHz.
  *
- * CPUCP_PACKET_POWER_GET
+ * CPUCP_PACKET_POWER_GET -
  *       Fetch the present power consumption of the device (Current * Voltage).
  *
  * CPUCP_PACKET_NIC_PFC_SET -
@@ -345,6 +358,24 @@ enum pq_init_status {
  * CPUCP_PACKET_MSI_INFO_SET -
  *       set the index number for each supported msi type going from
  *       host to device
+ *
+ * CPUCP_PACKET_NIC_XPCS91_REGS_GET -
+ *       Fetch the un/correctable counters values from the NIC MAC.
+ *
+ * CPUCP_PACKET_NIC_STAT_REGS_GET -
+ *       Fetch various NIC MAC counters from the NIC STAT.
+ *
+ * CPUCP_PACKET_NIC_STAT_REGS_CLR -
+ *       Clear the various NIC MAC counters in the NIC STAT.
+ *
+ * CPUCP_PACKET_NIC_STAT_REGS_ALL_GET -
+ *       Fetch all NIC MAC counters from the NIC STAT.
+ *
+ * CPUCP_PACKET_IS_IDLE_CHECK -
+ *       Check if the device is IDLE in regard to the DMA/compute engines
+ *       and QMANs. The f/w will return a bitmask where each bit represents
+ *       a different engine or QMAN according to enum cpucp_idle_mask.
+ *       The bit will be 1 if the engine is NOT idle.
  */
 
 enum cpucp_packet_id {
@@ -385,6 +416,11 @@ enum cpucp_packet_id {
 	CPUCP_PACKET_NIC_LPBK_SET,		/* internal */
 	CPUCP_PACKET_NIC_MAC_CFG,		/* internal */
 	CPUCP_PACKET_MSI_INFO_SET,		/* internal */
+	CPUCP_PACKET_NIC_XPCS91_REGS_GET,	/* internal */
+	CPUCP_PACKET_NIC_STAT_REGS_GET,		/* internal */
+	CPUCP_PACKET_NIC_STAT_REGS_CLR,		/* internal */
+	CPUCP_PACKET_NIC_STAT_REGS_ALL_GET,	/* internal */
+	CPUCP_PACKET_IS_IDLE_CHECK,		/* internal */
 };
 
 #define CPUCP_PACKET_FENCE_VAL	0xFE8CE7A5
@@ -414,6 +450,11 @@ enum cpucp_packet_id {
 #define CPUCP_PKT_VAL_LPBK_IN2_SHIFT	1
 #define CPUCP_PKT_VAL_LPBK_IN2_MASK	0x000000000000001Eull
 
+#define CPUCP_PKT_VAL_MAC_CNT_IN1_SHIFT	0
+#define CPUCP_PKT_VAL_MAC_CNT_IN1_MASK	0x0000000000000001ull
+#define CPUCP_PKT_VAL_MAC_CNT_IN2_SHIFT	1
+#define CPUCP_PKT_VAL_MAC_CNT_IN2_MASK	0x00000000FFFFFFFEull
+
 /* heartbeat status bits */
 #define CPUCP_PKT_HB_STATUS_EQ_FAULT_SHIFT		0
 #define CPUCP_PKT_HB_STATUS_EQ_FAULT_MASK		0x00000001
@@ -467,7 +508,8 @@ struct cpucp_packet {
 		__le32 status_mask;
 	};
 
-	__le32 reserved;
+	/* For NIC requests */
+	__le32 port_index;
 };
 
 struct cpucp_unmask_irq_arr_packet {
@@ -476,6 +518,12 @@ struct cpucp_unmask_irq_arr_packet {
 	__le32 irqs[0];
 };
 
+struct cpucp_nic_status_packet {
+	struct cpucp_packet cpucp_pkt;
+	__le32 length;
+	__le32 data[0];
+};
+
 struct cpucp_array_data_packet {
 	struct cpucp_packet cpucp_pkt;
 	__le32 length;
@@ -595,6 +643,18 @@ enum pll_index {
 	PLL_MAX
 };
 
+enum rl_index {
+	TPC_RL = 0,
+	MME_RL,
+};
+
+enum pvt_index {
+	PVT_SW,
+	PVT_SE,
+	PVT_NW,
+	PVT_NE
+};
+
 /* Event Queue Packets */
 
 struct eq_generic_event {
@@ -700,6 +760,15 @@ struct cpucp_mac_addr {
 	__u8 mac_addr[ETH_ALEN];
 };
 
+enum cpucp_serdes_type {
+	TYPE_1_SERDES_TYPE,
+	TYPE_2_SERDES_TYPE,
+	HLS1_SERDES_TYPE,
+	HLS1H_SERDES_TYPE,
+	UNKNOWN_SERDES_TYPE,
+	MAX_NUM_SERDES_TYPE = UNKNOWN_SERDES_TYPE
+};
+
 struct cpucp_nic_info {
 	struct cpucp_mac_addr mac_addrs[CPUCP_MAX_NICS];
 	__le64 link_mask[CPUCP_NIC_MASK_ARR_LEN];
@@ -708,6 +777,40 @@ struct cpucp_nic_info {
 	__le64 link_ext_mask[CPUCP_NIC_MASK_ARR_LEN];
 	__u8 qsfp_eeprom[CPUCP_NIC_QSFP_EEPROM_MAX_LEN];
 	__le64 auto_neg_mask[CPUCP_NIC_MASK_ARR_LEN];
+	__le16 serdes_type; /* enum cpucp_serdes_type */
+	__u8 reserved[6];
+};
+
+/*
+ * struct cpucp_nic_status - describes the status of a NIC port.
+ * @port: NIC port index.
+ * @bad_format_cnt: e.g. CRC.
+ * @responder_out_of_sequence_psn_cnt: e.g NAK.
+ * @high_ber_reinit_cnt: link reinit due to high BER.
+ * @correctable_err_cnt: e.g. bit-flip.
+ * @uncorrectable_err_cnt: e.g. MAC errors.
+ * @retraining_cnt: re-training counter.
+ * @up: is port up.
+ * @pcs_link: has PCS link.
+ * @phy_ready: is PHY ready.
+ * @auto_neg: is Autoneg enabled.
+ * @timeout_retransmission_cnt: timeout retransmission events
+ * @high_ber_cnt: high ber events
+ */
+struct cpucp_nic_status {
+	__le32 port;
+	__le32 bad_format_cnt;
+	__le32 responder_out_of_sequence_psn_cnt;
+	__le32 high_ber_reinit;
+	__le32 correctable_err_cnt;
+	__le32 uncorrectable_err_cnt;
+	__le32 retraining_cnt;
+	__u8 up;
+	__u8 pcs_link;
+	__u8 phy_ready;
+	__u8 auto_neg;
+	__le32 timeout_retransmission_cnt;
+	__le32 high_ber_cnt;
 };
 
 #endif /* CPUCP_IF_H */
diff --git a/drivers/misc/habanalabs/include/common/hl_boot_if.h b/drivers/misc/habanalabs/include/common/hl_boot_if.h
index fa8a5ad2d438..3099653234e4 100644
--- a/drivers/misc/habanalabs/include/common/hl_boot_if.h
+++ b/drivers/misc/habanalabs/include/common/hl_boot_if.h
@@ -78,6 +78,26 @@
  * CPU_BOOT_ERR0_DEVICE_UNUSABLE_FAIL	Device is unusable and customer support
  *					should be contacted.
  *
+ * CPU_BOOT_ERR0_ARC0_HALT_ACK_NOT_RCVD	HALT ACK from ARC0 is not received
+ *					within specified retries after issuing
+ *					HALT request. ARC0 appears to be in bad
+ *					reset.
+ *
+ * CPU_BOOT_ERR0_ARC1_HALT_ACK_NOT_RCVD	HALT ACK from ARC1 is not received
+ *					within specified retries after issuing
+ *					HALT request. ARC1 appears to be in bad
+ *					reset.
+ *
+ * CPU_BOOT_ERR0_ARC0_RUN_ACK_NOT_RCVD	RUN ACK from ARC0 is not received
+ *					within specified timeout after issuing
+ *					RUN request. ARC0 appears to be in bad
+ *					reset.
+ *
+ * CPU_BOOT_ERR0_ARC1_RUN_ACK_NOT_RCVD	RUN ACK from ARC1 is not received
+ *					within specified timeout after issuing
+ *					RUN request. ARC1 appears to be in bad
+ *					reset.
+ *
  * CPU_BOOT_ERR0_ENABLED		Error registers enabled.
  *					This is a main indication that the
  *					running FW populates the error
@@ -98,6 +118,10 @@
 #define CPU_BOOT_ERR0_SEC_IMG_VER_FAIL		(1 << 11)
 #define CPU_BOOT_ERR0_PLL_FAIL			(1 << 12)
 #define CPU_BOOT_ERR0_DEVICE_UNUSABLE_FAIL	(1 << 13)
+#define CPU_BOOT_ERR0_ARC0_HALT_ACK_NOT_RCVD	(1 << 14)
+#define CPU_BOOT_ERR0_ARC1_HALT_ACK_NOT_RCVD	(1 << 15)
+#define CPU_BOOT_ERR0_ARC0_RUN_ACK_NOT_RCVD	(1 << 16)
+#define CPU_BOOT_ERR0_ARC1_RUN_ACK_NOT_RCVD	(1 << 17)
 #define CPU_BOOT_ERR0_ENABLED			(1 << 31)
 #define CPU_BOOT_ERR1_ENABLED			(1 << 31)
 
@@ -186,6 +210,10 @@
  *					configured and is ready for use.
  *					Initialized in: ppboot
  *
+ * CPU_BOOT_DEV_STS0_FW_NIC_MAC_EN	NIC MAC channels init is done by FW and
+ *					any access to them is done via the FW.
+ *					Initialized in: linux
+ *
  * CPU_BOOT_DEV_STS0_DYN_PLL_EN		Dynamic PLL configuration is enabled.
  *					FW sends to host a bitmap of supported
  *					PLLs.
@@ -209,6 +237,21 @@
  *					prevent IRQs overriding each other.
  *					Initialized in: linux
  *
+ * CPU_BOOT_DEV_STS0_FW_NIC_STAT_XPCS91_EN
+ *					NIC STAT and XPCS91 access is restricted
+ *					and is done via FW only.
+ *					Initialized in: linux
+ *
+ * CPU_BOOT_DEV_STS0_FW_NIC_STAT_EXT_EN
+ *					NIC STAT get all is supported.
+ *					Initialized in: linux
+ *
+ * CPU_BOOT_DEV_STS0_IS_IDLE_CHECK_EN
+ *					F/W checks if the device is idle by reading defined set
+ *					of registers. It returns a bitmask of all the engines,
+ *					where a bit is set if the engine is not idle.
+ *					Initialized in: linux
+ *
  * CPU_BOOT_DEV_STS0_ENABLED		Device status register enabled.
  *					This is a main indication that the
  *					running FW populates the device status
@@ -236,10 +279,14 @@
 #define CPU_BOOT_DEV_STS0_PKT_PI_ACK_EN			(1 << 15)
 #define CPU_BOOT_DEV_STS0_FW_LD_COM_EN			(1 << 16)
 #define CPU_BOOT_DEV_STS0_FW_IATU_CONF_EN		(1 << 17)
+#define CPU_BOOT_DEV_STS0_FW_NIC_MAC_EN			(1 << 18)
 #define CPU_BOOT_DEV_STS0_DYN_PLL_EN			(1 << 19)
 #define CPU_BOOT_DEV_STS0_GIC_PRIVILEGED_EN		(1 << 20)
 #define CPU_BOOT_DEV_STS0_EQ_INDEX_EN			(1 << 21)
 #define CPU_BOOT_DEV_STS0_MULTI_IRQ_POLL_EN		(1 << 22)
+#define CPU_BOOT_DEV_STS0_FW_NIC_STAT_XPCS91_EN		(1 << 23)
+#define CPU_BOOT_DEV_STS0_FW_NIC_STAT_EXT_EN		(1 << 24)
+#define CPU_BOOT_DEV_STS0_IS_IDLE_CHECK_EN		(1 << 25)
 #define CPU_BOOT_DEV_STS0_ENABLED			(1 << 31)
 #define CPU_BOOT_DEV_STS1_ENABLED			(1 << 31)
 
@@ -313,10 +360,7 @@ struct cpu_dyn_regs {
 	__le32 hw_state;
 	__le32 kmd_msg_to_cpu;
 	__le32 cpu_cmd_status_to_host;
-	union {
-		__le32 gic_host_irq_ctrl;
-		__le32 gic_host_pi_upd_irq;
-	};
+	__le32 gic_host_pi_upd_irq;
 	__le32 gic_tpc_qm_irq_ctrl;
 	__le32 gic_mme_qm_irq_ctrl;
 	__le32 gic_dma_qm_irq_ctrl;
@@ -324,7 +368,9 @@ struct cpu_dyn_regs {
 	__le32 gic_dma_core_irq_ctrl;
 	__le32 gic_host_halt_irq;
 	__le32 gic_host_ints_irq;
-	__le32 reserved1[24];		/* reserve for future use */
+	__le32 gic_host_soft_rst_irq;
+	__le32 gic_rot_qm_irq_ctrl;
+	__le32 reserved1[22];		/* reserve for future use */
 };
 
 /* TODO: remove the desc magic after the code is updated to use message */
@@ -462,6 +508,11 @@ struct lkd_fw_comms_msg {
  *				Do not wait for BMC response.
  *
  * COMMS_LOW_PLL_OPP		Initialize PLLs for low OPP.
+ *
+ * COMMS_PREP_DESC_ELBI		Same as COMMS_PREP_DESC only that the memory
+ *				space is allocated in a ELBI access only
+ *				address range.
+ *
  */
 enum comms_cmd {
 	COMMS_NOOP = 0,
@@ -474,6 +525,7 @@ enum comms_cmd {
 	COMMS_GOTO_WFE = 7,
 	COMMS_SKIP_BMC = 8,
 	COMMS_LOW_PLL_OPP = 9,
+	COMMS_PREP_DESC_ELBI = 10,
 	COMMS_INVLD_LAST
 };
 
diff --git a/drivers/misc/habanalabs/include/gaudi/asic_reg/gaudi_regs.h b/drivers/misc/habanalabs/include/gaudi/asic_reg/gaudi_regs.h
index 5bb54b34a8ae..ffdfbd9b3220 100644
--- a/drivers/misc/habanalabs/include/gaudi/asic_reg/gaudi_regs.h
+++ b/drivers/misc/habanalabs/include/gaudi/asic_reg/gaudi_regs.h
@@ -126,6 +126,9 @@
 #define mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_SOB_OBJ_1                     0x4F2004
 #define mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_SOB_OBJ_2047                  0x4F3FFC
 #define mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_MON_PAY_ADDRL_0               0x4F4000
+#define mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_MON_PAY_ADDRH_0               0x4F4800
+#define mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_MON_PAY_DATA_0                0x4F5000
+#define mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_MON_ARM_0                     0x4F5800
 #define mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_MON_STATUS_0                  0x4F6000
 #define mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_MON_STATUS_511                0x4F67FC
 
diff --git a/drivers/misc/habanalabs/include/gaudi/gaudi_masks.h b/drivers/misc/habanalabs/include/gaudi/gaudi_masks.h
index 9aea7e996654..acc85d3ed98b 100644
--- a/drivers/misc/habanalabs/include/gaudi/gaudi_masks.h
+++ b/drivers/misc/habanalabs/include/gaudi/gaudi_masks.h
@@ -449,4 +449,21 @@ enum axi_id {
 #define PCIE_AUX_FLR_CTRL_HW_CTRL_MASK                               0x1
 #define PCIE_AUX_FLR_CTRL_INT_MASK_MASK                              0x2
 
+#define SYNC_MNGR_W_S_SYNC_MNGR_OBJS_MON_STATUS_0_VALID_SHIFT        0
+#define SYNC_MNGR_W_S_SYNC_MNGR_OBJS_MON_STATUS_0_VALID_MASK         0x1
+#define SYNC_MNGR_W_S_SYNC_MNGR_OBJS_MON_STATUS_0_PENDING_SHIFT      1
+#define SYNC_MNGR_W_S_SYNC_MNGR_OBJS_MON_STATUS_0_PENDING_MASK       0x1FE
+#define SYNC_MNGR_W_S_SYNC_MNGR_OBJS_MON_ARM_0_SID_SHIFT             0
+#define SYNC_MNGR_W_S_SYNC_MNGR_OBJS_MON_ARM_0_SID_MASK              0xFF
+#define SYNC_MNGR_W_S_SYNC_MNGR_OBJS_MON_ARM_0_MASK_SHIFT            8
+#define SYNC_MNGR_W_S_SYNC_MNGR_OBJS_MON_ARM_0_MASK_MASK             0xFF00
+#define SYNC_MNGR_W_S_SYNC_MNGR_OBJS_MON_ARM_0_SOP_SHIFT             16
+#define SYNC_MNGR_W_S_SYNC_MNGR_OBJS_MON_ARM_0_SOP_MASK              0x10000
+#define SYNC_MNGR_W_S_SYNC_MNGR_OBJS_MON_ARM_0_SOD_SHIFT             17
+#define SYNC_MNGR_W_S_SYNC_MNGR_OBJS_MON_ARM_0_SOD_MASK              0xFFFE0000
+#define TPC0_QM_CP_STS_0_FENCE_ID_SHIFT                              20
+#define TPC0_QM_CP_STS_0_FENCE_ID_MASK                               0x300000
+#define TPC0_QM_CP_STS_0_FENCE_IN_PROGRESS_SHIFT                     22
+#define TPC0_QM_CP_STS_0_FENCE_IN_PROGRESS_MASK                      0x400000
+
 #endif /* GAUDI_MASKS_H_ */
diff --git a/drivers/misc/habanalabs/include/gaudi/gaudi_reg_map.h b/drivers/misc/habanalabs/include/gaudi/gaudi_reg_map.h
index d95d4162ae2c..b9bd5a7f71eb 100644
--- a/drivers/misc/habanalabs/include/gaudi/gaudi_reg_map.h
+++ b/drivers/misc/habanalabs/include/gaudi/gaudi_reg_map.h
@@ -12,8 +12,6 @@
  * PSOC scratch-pad registers
  */
 #define mmHW_STATE			mmPSOC_GLOBAL_CONF_SCRATCHPAD_0
-/* TODO: remove mmGIC_HOST_IRQ_CTRL_POLL_REG */
-#define mmGIC_HOST_IRQ_CTRL_POLL_REG	mmPSOC_GLOBAL_CONF_SCRATCHPAD_1
 #define mmGIC_HOST_PI_UPD_IRQ_POLL_REG	mmPSOC_GLOBAL_CONF_SCRATCHPAD_1
 #define mmGIC_TPC_QM_IRQ_CTRL_POLL_REG	mmPSOC_GLOBAL_CONF_SCRATCHPAD_2
 #define mmGIC_MME_QM_IRQ_CTRL_POLL_REG	mmPSOC_GLOBAL_CONF_SCRATCHPAD_3
diff --git a/drivers/misc/lis3lv02d/lis3lv02d.h b/drivers/misc/lis3lv02d/lis3lv02d.h
index 7ac788fae1b8..c394c0b08519 100644
--- a/drivers/misc/lis3lv02d/lis3lv02d.h
+++ b/drivers/misc/lis3lv02d/lis3lv02d.h
@@ -271,7 +271,6 @@ struct lis3lv02d {
 	int			regs_size;
 	u8                      *reg_cache;
 	bool			regs_stored;
-	bool			init_required;
 	u8                      odr_mask;  /* ODR bit mask */
 	u8			whoami;    /* indicates measurement precision */
 	s16 (*read_data) (struct lis3lv02d *lis3, int reg);
diff --git a/drivers/misc/lkdtm/core.c b/drivers/misc/lkdtm/core.c
index 95b1c6800a22..fe6fd34b8caf 100644
--- a/drivers/misc/lkdtm/core.c
+++ b/drivers/misc/lkdtm/core.c
@@ -26,6 +26,7 @@
 #include <linux/init.h>
 #include <linux/slab.h>
 #include <linux/debugfs.h>
+#include <linux/utsname.h>
 
 #define DEFAULT_COUNT 10
 
@@ -210,6 +211,8 @@ module_param(cpoint_count, int, 0644);
 MODULE_PARM_DESC(cpoint_count, " Crash Point Count, number of times the "\
 				"crash point is to be hit to trigger action");
 
+/* For test debug reporting. */
+char *lkdtm_kernel_info;
 
 /* Return the crashtype number or NULL if the name is invalid */
 static const struct crashtype *find_crashtype(const char *name)
@@ -490,6 +493,11 @@ static int __init lkdtm_module_init(void)
 	crash_count = cpoint_count;
 #endif
 
+	/* Common initialization. */
+	lkdtm_kernel_info = kasprintf(GFP_KERNEL, "kernel (%s %s)",
+				      init_uts_ns.name.release,
+				      init_uts_ns.name.machine);
+
 	/* Handle test-specific initialization. */
 	lkdtm_bugs_init(&recur_count);
 	lkdtm_perms_init();
@@ -538,6 +546,8 @@ static void __exit lkdtm_module_exit(void)
 	if (lkdtm_kprobe != NULL)
 		unregister_kprobe(lkdtm_kprobe);
 
+	kfree(lkdtm_kernel_info);
+
 	pr_info("Crash point unregistered\n");
 }
 
diff --git a/drivers/misc/lkdtm/lkdtm.h b/drivers/misc/lkdtm/lkdtm.h
index d7d64d9765eb..c212a253edde 100644
--- a/drivers/misc/lkdtm/lkdtm.h
+++ b/drivers/misc/lkdtm/lkdtm.h
@@ -5,17 +5,17 @@
 #define pr_fmt(fmt) "lkdtm: " fmt
 
 #include <linux/kernel.h>
-#include <generated/compile.h>
-#include <generated/utsrelease.h>
 
-#define LKDTM_KERNEL "kernel (" UTS_RELEASE " " UTS_MACHINE ")"
+extern char *lkdtm_kernel_info;
 
 #define pr_expected_config(kconfig)				\
 {								\
 	if (IS_ENABLED(kconfig)) 				\
-		pr_err("Unexpected! This " LKDTM_KERNEL " was built with " #kconfig "=y\n"); \
+		pr_err("Unexpected! This %s was built with " #kconfig "=y\n", \
+			lkdtm_kernel_info);			\
 	else							\
-		pr_warn("This is probably expected, since this " LKDTM_KERNEL " was built *without* " #kconfig "=y\n"); \
+		pr_warn("This is probably expected, since this %s was built *without* " #kconfig "=y\n", \
+			lkdtm_kernel_info);			\
 }
 
 #ifndef MODULE
@@ -25,24 +25,30 @@ int lkdtm_check_bool_cmdline(const char *param);
 	if (IS_ENABLED(kconfig)) {				\
 		switch (lkdtm_check_bool_cmdline(param)) {	\
 		case 0:						\
-			pr_warn("This is probably expected, since this " LKDTM_KERNEL " was built with " #kconfig "=y but booted with '" param "=N'\n"); \
+			pr_warn("This is probably expected, since this %s was built with " #kconfig "=y but booted with '" param "=N'\n", \
+				lkdtm_kernel_info);		\
 			break;					\
 		case 1:						\
-			pr_err("Unexpected! This " LKDTM_KERNEL " was built with " #kconfig "=y and booted with '" param "=Y'\n"); \
+			pr_err("Unexpected! This %s was built with " #kconfig "=y and booted with '" param "=Y'\n", \
+				lkdtm_kernel_info);		\
 			break;					\
 		default:					\
-			pr_err("Unexpected! This " LKDTM_KERNEL " was built with " #kconfig "=y (and booted without '" param "' specified)\n"); \
+			pr_err("Unexpected! This %s was built with " #kconfig "=y (and booted without '" param "' specified)\n", \
+				lkdtm_kernel_info);		\
 		}						\
 	} else {						\
 		switch (lkdtm_check_bool_cmdline(param)) {	\
 		case 0:						\
-			pr_warn("This is probably expected, as this " LKDTM_KERNEL " was built *without* " #kconfig "=y and booted with '" param "=N'\n"); \
+			pr_warn("This is probably expected, as this %s was built *without* " #kconfig "=y and booted with '" param "=N'\n", \
+				lkdtm_kernel_info);		\
 			break;					\
 		case 1:						\
-			pr_err("Unexpected! This " LKDTM_KERNEL " was built *without* " #kconfig "=y but booted with '" param "=Y'\n"); \
+			pr_err("Unexpected! This %s was built *without* " #kconfig "=y but booted with '" param "=Y'\n", \
+				lkdtm_kernel_info);		\
 			break;					\
 		default:					\
-			pr_err("This is probably expected, since this " LKDTM_KERNEL " was built *without* " #kconfig "=y (and booted without '" param "' specified)\n"); \
+			pr_err("This is probably expected, since this %s was built *without* " #kconfig "=y (and booted without '" param "' specified)\n", \
+				lkdtm_kernel_info);		\
 			break;					\
 		}						\
 	}							\
diff --git a/drivers/misc/pci_endpoint_test.c b/drivers/misc/pci_endpoint_test.c
index d1137a95ad02..2ed7e3aaff3a 100644
--- a/drivers/misc/pci_endpoint_test.c
+++ b/drivers/misc/pci_endpoint_test.c
@@ -69,6 +69,8 @@
 #define FLAG_USE_DMA				BIT(0)
 
 #define PCI_DEVICE_ID_TI_AM654			0xb00c
+#define PCI_DEVICE_ID_TI_J7200			0xb00f
+#define PCI_DEVICE_ID_TI_AM64			0xb010
 #define PCI_DEVICE_ID_LS1088A			0x80c0
 
 #define is_am654_pci_dev(pdev)		\
@@ -970,6 +972,12 @@ static const struct pci_device_id pci_endpoint_test_tbl[] = {
 	{ PCI_DEVICE(PCI_VENDOR_ID_TI, PCI_DEVICE_ID_TI_J721E),
 	  .driver_data = (kernel_ulong_t)&j721e_data,
 	},
+	{ PCI_DEVICE(PCI_VENDOR_ID_TI, PCI_DEVICE_ID_TI_J7200),
+	  .driver_data = (kernel_ulong_t)&j721e_data,
+	},
+	{ PCI_DEVICE(PCI_VENDOR_ID_TI, PCI_DEVICE_ID_TI_AM64),
+	  .driver_data = (kernel_ulong_t)&j721e_data,
+	},
 	{ }
 };
 MODULE_DEVICE_TABLE(pci, pci_endpoint_test_tbl);
@@ -979,6 +987,7 @@ static struct pci_driver pci_endpoint_test_driver = {
 	.id_table	= pci_endpoint_test_tbl,
 	.probe		= pci_endpoint_test_probe,
 	.remove		= pci_endpoint_test_remove,
+	.sriov_configure = pci_sriov_configure_simple,
 };
 module_pci_driver(pci_endpoint_test_driver);
 
diff --git a/drivers/mmc/host/jz4740_mmc.c b/drivers/mmc/host/jz4740_mmc.c
index cb1a64a5c256..80a2c270d502 100644
--- a/drivers/mmc/host/jz4740_mmc.c
+++ b/drivers/mmc/host/jz4740_mmc.c
@@ -578,10 +578,6 @@ static bool jz4740_mmc_read_data(struct jz4740_mmc_host *host,
 			}
 		}
 		data->bytes_xfered += miter->length;
-
-		/* This can go away once MIPS implements
-		 * flush_kernel_dcache_page */
-		flush_dcache_page(miter->page);
 	}
 	sg_miter_stop(miter);
 
diff --git a/drivers/mmc/host/mmc_spi.c b/drivers/mmc/host/mmc_spi.c
index a1bcde3395a6..f4c8e1a61f53 100644
--- a/drivers/mmc/host/mmc_spi.c
+++ b/drivers/mmc/host/mmc_spi.c
@@ -941,7 +941,7 @@ mmc_spi_data_do(struct mmc_spi_host *host, struct mmc_command *cmd,
 
 		/* discard mappings */
 		if (direction == DMA_FROM_DEVICE)
-			flush_kernel_dcache_page(sg_page(sg));
+			flush_dcache_page(sg_page(sg));
 		kunmap(sg_page(sg));
 		if (dma_dev)
 			dma_unmap_page(dma_dev, dma_addr, PAGE_SIZE, dir);
diff --git a/drivers/mtd/Kconfig b/drivers/mtd/Kconfig
index 8bab6f8718a9..796a2eccbef0 100644
--- a/drivers/mtd/Kconfig
+++ b/drivers/mtd/Kconfig
@@ -45,10 +45,9 @@ config MTD_BLOCK
 	  on RAM chips in this manner. This block device is a user of MTD
 	  devices performing that function.
 
-	  At the moment, it is also required for the Journalling Flash File
-	  System(s) to obtain a handle on the MTD device when it's mounted
-	  (although JFFS and JFFS2 don't actually use any of the functionality
-	  of the mtdblock device).
+	  Note that mounting a JFFS2 filesystem doesn't require using mtdblock.
+	  It's possible to mount a rootfs using the MTD device on the "root="
+	  bootargs as "root=mtd2" or "root=mtd:name_of_device".
 
 	  Later, it may be extended to perform read/erase/modify/write cycles
 	  on flash chips to emulate a smaller block size. Needless to say,
@@ -70,6 +69,9 @@ config MTD_BLOCK_RO
 	  You do not need this option for use with the DiskOnChip devices. For
 	  those, enable NFTL support (CONFIG_NFTL) instead.
 
+comment "Note that in some cases UBI block is preferred. See MTD_UBI_BLOCK."
+	depends on MTD_BLOCK || MTD_BLOCK_RO
+
 config FTL
 	tristate "FTL (Flash Translation Layer) support"
 	depends on BLOCK
diff --git a/drivers/mtd/ftl.c b/drivers/mtd/ftl.c
index 9b33c082179d..f655d2905270 100644
--- a/drivers/mtd/ftl.c
+++ b/drivers/mtd/ftl.c
@@ -1029,7 +1029,7 @@ static void ftl_add_mtd(struct mtd_blktrans_ops *tr, struct mtd_info *mtd)
 
 		partition->mbd.tr = tr;
 		partition->mbd.devnum = -1;
-		if (!add_mtd_blktrans_dev((void *)partition))
+		if (!add_mtd_blktrans_dev(&partition->mbd))
 			return;
 	}
 
diff --git a/drivers/mtd/maps/Kconfig b/drivers/mtd/maps/Kconfig
index 6650acbc961e..aaa164b977fe 100644
--- a/drivers/mtd/maps/Kconfig
+++ b/drivers/mtd/maps/Kconfig
@@ -127,29 +127,6 @@ config MTD_PHYSMAP_GPIO_ADDR
 	  Extend the physmap driver to allow flashes to be partially
 	  physically addressed and assisted by GPIOs.
 
-config MTD_PMC_MSP_EVM
-	tristate "CFI Flash device mapped on PMC-Sierra MSP"
-	depends on PMC_MSP && MTD_CFI
-	help
-	  This provides a 'mapping' driver which supports the way
-	  in which user-programmable flash chips are connected on the
-	  PMC-Sierra MSP eval/demo boards.
-
-choice
-	prompt "Maximum mappable memory available for flash IO"
-	depends on MTD_PMC_MSP_EVM
-	default MSP_FLASH_MAP_LIMIT_32M
-
-config MSP_FLASH_MAP_LIMIT_32M
-	bool "32M"
-
-endchoice
-
-config MSP_FLASH_MAP_LIMIT
-	hex
-	default "0x02000000"
-	depends on MSP_FLASH_MAP_LIMIT_32M
-
 config MTD_SUN_UFLASH
 	tristate "Sun Microsystems userflash support"
 	depends on SPARC && MTD_CFI && PCI
diff --git a/drivers/mtd/maps/Makefile b/drivers/mtd/maps/Makefile
index 79f018cf412f..11fea9c8d561 100644
--- a/drivers/mtd/maps/Makefile
+++ b/drivers/mtd/maps/Makefile
@@ -25,7 +25,6 @@ physmap-objs-$(CONFIG_MTD_PHYSMAP_IXP4XX) += physmap-ixp4xx.o
 physmap-objs			:= $(physmap-objs-y)
 obj-$(CONFIG_MTD_PHYSMAP)	+= physmap.o
 obj-$(CONFIG_MTD_PISMO)		+= pismo.o
-obj-$(CONFIG_MTD_PMC_MSP_EVM)   += pmcmsp-flash.o
 obj-$(CONFIG_MTD_PCMCIA)	+= pcmciamtd.o
 obj-$(CONFIG_MTD_SA1100)	+= sa1100-flash.o
 obj-$(CONFIG_MTD_SBC_GXX)	+= sbc_gxx.o
diff --git a/drivers/mtd/maps/pmcmsp-flash.c b/drivers/mtd/maps/pmcmsp-flash.c
deleted file mode 100644
index 2051f28ddac6..000000000000
--- a/drivers/mtd/maps/pmcmsp-flash.c
+++ /dev/null
@@ -1,227 +0,0 @@
-/*
- * Mapping of a custom board with both AMD CFI and JEDEC flash in partitions.
- * Config with both CFI and JEDEC device support.
- *
- * Basically physmap.c with the addition of partitions and
- * an array of mapping info to accommodate more than one flash type per board.
- *
- * Copyright 2005-2007 PMC-Sierra, Inc.
- *
- *  This program is free software; you can redistribute  it and/or modify it
- *  under  the terms of  the GNU General  Public License as published by the
- *  Free Software Foundation;  either version 2 of the  License, or (at your
- *  option) any later version.
- *
- *  THIS  SOFTWARE  IS PROVIDED   ``AS  IS'' AND   ANY  EXPRESS OR IMPLIED
- *  WARRANTIES,   INCLUDING, BUT NOT  LIMITED  TO, THE IMPLIED WARRANTIES OF
- *  MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.  IN
- *  NO  EVENT  SHALL   THE AUTHOR  BE    LIABLE FOR ANY   DIRECT, INDIRECT,
- *  INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
- *  NOT LIMITED   TO, PROCUREMENT OF  SUBSTITUTE GOODS  OR SERVICES; LOSS OF
- *  USE, DATA,  OR PROFITS; OR  BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON
- *  ANY THEORY OF LIABILITY, WHETHER IN  CONTRACT, STRICT LIABILITY, OR TORT
- *  (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
- *  THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
- *
- *  You should have received a copy of the  GNU General Public License along
- *  with this program; if not, write  to the Free Software Foundation, Inc.,
- *  675 Mass Ave, Cambridge, MA 02139, USA.
- */
-
-#include <linux/slab.h>
-#include <linux/module.h>
-#include <linux/types.h>
-#include <linux/kernel.h>
-#include <linux/mtd/mtd.h>
-#include <linux/mtd/map.h>
-#include <linux/mtd/partitions.h>
-
-#include <asm/io.h>
-
-#include <msp_prom.h>
-#include <msp_regs.h>
-
-
-static struct mtd_info **msp_flash;
-static struct mtd_partition **msp_parts;
-static struct map_info *msp_maps;
-static int fcnt;
-
-#define DEBUG_MARKER printk(KERN_NOTICE "%s[%d]\n", __func__, __LINE__)
-
-static int __init init_msp_flash(void)
-{
-	int i, j, ret = -ENOMEM;
-	int offset, coff;
-	char *env;
-	int pcnt;
-	char flash_name[] = "flash0";
-	char part_name[] = "flash0_0";
-	unsigned addr, size;
-
-	/* If ELB is disabled by "ful-mux" mode, we can't get at flash */
-	if ((*DEV_ID_REG & DEV_ID_SINGLE_PC) &&
-	    (*ELB_1PC_EN_REG & SINGLE_PCCARD)) {
-		printk(KERN_NOTICE "Single PC Card mode: no flash access\n");
-		return -ENXIO;
-	}
-
-	/* examine the prom environment for flash devices */
-	for (fcnt = 0; (env = prom_getenv(flash_name)); fcnt++)
-		flash_name[5] = '0' + fcnt + 1;
-
-	if (fcnt < 1)
-		return -ENXIO;
-
-	printk(KERN_NOTICE "Found %d PMC flash devices\n", fcnt);
-
-	msp_flash = kcalloc(fcnt, sizeof(*msp_flash), GFP_KERNEL);
-	if (!msp_flash)
-		return -ENOMEM;
-
-	msp_parts = kcalloc(fcnt, sizeof(*msp_parts), GFP_KERNEL);
-	if (!msp_parts)
-		goto free_msp_flash;
-
-	msp_maps = kcalloc(fcnt, sizeof(*msp_maps), GFP_KERNEL);
-	if (!msp_maps)
-		goto free_msp_parts;
-
-	/* loop over the flash devices, initializing each */
-	for (i = 0; i < fcnt; i++) {
-		/* examine the prom environment for flash partititions */
-		part_name[5] = '0' + i;
-		part_name[7] = '0';
-		for (pcnt = 0; (env = prom_getenv(part_name)); pcnt++)
-			part_name[7] = '0' + pcnt + 1;
-
-		if (pcnt == 0) {
-			printk(KERN_NOTICE "Skipping flash device %d "
-				"(no partitions defined)\n", i);
-			continue;
-		}
-
-		msp_parts[i] = kcalloc(pcnt, sizeof(struct mtd_partition),
-				       GFP_KERNEL);
-		if (!msp_parts[i])
-			goto cleanup_loop;
-
-		/* now initialize the devices proper */
-		flash_name[5] = '0' + i;
-		env = prom_getenv(flash_name);
-
-		if (sscanf(env, "%x:%x", &addr, &size) < 2) {
-			ret = -ENXIO;
-			kfree(msp_parts[i]);
-			goto cleanup_loop;
-		}
-		addr = CPHYSADDR(addr);
-
-		printk(KERN_NOTICE
-			"MSP flash device \"%s\": 0x%08x at 0x%08x\n",
-			flash_name, size, addr);
-		/* This must matchs the actual size of the flash chip */
-		msp_maps[i].size = size;
-		msp_maps[i].phys = addr;
-
-		/*
-		 * Platforms have a specific limit of the size of memory
-		 * which may be mapped for flash:
-		 */
-		if (size > CONFIG_MSP_FLASH_MAP_LIMIT)
-			size = CONFIG_MSP_FLASH_MAP_LIMIT;
-
-		msp_maps[i].virt = ioremap(addr, size);
-		if (msp_maps[i].virt == NULL) {
-			ret = -ENXIO;
-			kfree(msp_parts[i]);
-			goto cleanup_loop;
-		}
-
-		msp_maps[i].bankwidth = 1;
-		msp_maps[i].name = kstrndup(flash_name, 7, GFP_KERNEL);
-		if (!msp_maps[i].name) {
-			iounmap(msp_maps[i].virt);
-			kfree(msp_parts[i]);
-			goto cleanup_loop;
-		}
-
-		for (j = 0; j < pcnt; j++) {
-			part_name[5] = '0' + i;
-			part_name[7] = '0' + j;
-
-			env = prom_getenv(part_name);
-
-			if (sscanf(env, "%x:%x:%n", &offset, &size,
-						&coff) < 2) {
-				ret = -ENXIO;
-				kfree(msp_maps[i].name);
-				iounmap(msp_maps[i].virt);
-				kfree(msp_parts[i]);
-				goto cleanup_loop;
-			}
-
-			msp_parts[i][j].size = size;
-			msp_parts[i][j].offset = offset;
-			msp_parts[i][j].name = env + coff;
-		}
-
-		/* now probe and add the device */
-		simple_map_init(&msp_maps[i]);
-		msp_flash[i] = do_map_probe("cfi_probe", &msp_maps[i]);
-		if (msp_flash[i]) {
-			msp_flash[i]->owner = THIS_MODULE;
-			mtd_device_register(msp_flash[i], msp_parts[i], pcnt);
-		} else {
-			printk(KERN_ERR "map probe failed for flash\n");
-			ret = -ENXIO;
-			kfree(msp_maps[i].name);
-			iounmap(msp_maps[i].virt);
-			kfree(msp_parts[i]);
-			goto cleanup_loop;
-		}
-	}
-
-	return 0;
-
-cleanup_loop:
-	while (i--) {
-		mtd_device_unregister(msp_flash[i]);
-		map_destroy(msp_flash[i]);
-		kfree(msp_maps[i].name);
-		iounmap(msp_maps[i].virt);
-		kfree(msp_parts[i]);
-	}
-	kfree(msp_maps);
-free_msp_parts:
-	kfree(msp_parts);
-free_msp_flash:
-	kfree(msp_flash);
-	return ret;
-}
-
-static void __exit cleanup_msp_flash(void)
-{
-	int i;
-
-	for (i = 0; i < fcnt; i++) {
-		mtd_device_unregister(msp_flash[i]);
-		map_destroy(msp_flash[i]);
-		iounmap((void *)msp_maps[i].virt);
-
-		/* free the memory */
-		kfree(msp_maps[i].name);
-		kfree(msp_parts[i]);
-	}
-
-	kfree(msp_flash);
-	kfree(msp_parts);
-	kfree(msp_maps);
-}
-
-MODULE_AUTHOR("PMC-Sierra, Inc");
-MODULE_DESCRIPTION("MTD map driver for PMC-Sierra MSP boards");
-MODULE_LICENSE("GPL");
-
-module_init(init_msp_flash);
-module_exit(cleanup_msp_flash);
diff --git a/drivers/mtd/mtd_blkdevs.c b/drivers/mtd/mtd_blkdevs.c
index 44bea3f65060..b8ae1ec14e17 100644
--- a/drivers/mtd/mtd_blkdevs.c
+++ b/drivers/mtd/mtd_blkdevs.c
@@ -23,7 +23,6 @@
 #include "mtdcore.h"
 
 static LIST_HEAD(blktrans_majors);
-static DEFINE_MUTEX(blktrans_ref_mutex);
 
 static void blktrans_dev_release(struct kref *kref)
 {
@@ -37,26 +36,9 @@ static void blktrans_dev_release(struct kref *kref)
 	kfree(dev);
 }
 
-static struct mtd_blktrans_dev *blktrans_dev_get(struct gendisk *disk)
-{
-	struct mtd_blktrans_dev *dev;
-
-	mutex_lock(&blktrans_ref_mutex);
-	dev = disk->private_data;
-
-	if (!dev)
-		goto unlock;
-	kref_get(&dev->ref);
-unlock:
-	mutex_unlock(&blktrans_ref_mutex);
-	return dev;
-}
-
 static void blktrans_dev_put(struct mtd_blktrans_dev *dev)
 {
-	mutex_lock(&blktrans_ref_mutex);
 	kref_put(&dev->ref, blktrans_dev_release);
-	mutex_unlock(&blktrans_ref_mutex);
 }
 
 
@@ -201,19 +183,16 @@ static blk_status_t mtd_queue_rq(struct blk_mq_hw_ctx *hctx,
 
 static int blktrans_open(struct block_device *bdev, fmode_t mode)
 {
-	struct mtd_blktrans_dev *dev = blktrans_dev_get(bdev->bd_disk);
+	struct mtd_blktrans_dev *dev = bdev->bd_disk->private_data;
 	int ret = 0;
 
-	if (!dev)
-		return -ERESTARTSYS; /* FIXME: busy loop! -arnd*/
+	kref_get(&dev->ref);
 
-	mutex_lock(&mtd_table_mutex);
 	mutex_lock(&dev->lock);
 
 	if (dev->open)
 		goto unlock;
 
-	kref_get(&dev->ref);
 	__module_get(dev->tr->owner);
 
 	if (!dev->mtd)
@@ -233,8 +212,6 @@ static int blktrans_open(struct block_device *bdev, fmode_t mode)
 unlock:
 	dev->open++;
 	mutex_unlock(&dev->lock);
-	mutex_unlock(&mtd_table_mutex);
-	blktrans_dev_put(dev);
 	return ret;
 
 error_release:
@@ -242,27 +219,20 @@ error_release:
 		dev->tr->release(dev);
 error_put:
 	module_put(dev->tr->owner);
-	kref_put(&dev->ref, blktrans_dev_release);
 	mutex_unlock(&dev->lock);
-	mutex_unlock(&mtd_table_mutex);
 	blktrans_dev_put(dev);
 	return ret;
 }
 
 static void blktrans_release(struct gendisk *disk, fmode_t mode)
 {
-	struct mtd_blktrans_dev *dev = blktrans_dev_get(disk);
-
-	if (!dev)
-		return;
+	struct mtd_blktrans_dev *dev = disk->private_data;
 
-	mutex_lock(&mtd_table_mutex);
 	mutex_lock(&dev->lock);
 
 	if (--dev->open)
 		goto unlock;
 
-	kref_put(&dev->ref, blktrans_dev_release);
 	module_put(dev->tr->owner);
 
 	if (dev->mtd) {
@@ -272,18 +242,14 @@ static void blktrans_release(struct gendisk *disk, fmode_t mode)
 	}
 unlock:
 	mutex_unlock(&dev->lock);
-	mutex_unlock(&mtd_table_mutex);
 	blktrans_dev_put(dev);
 }
 
 static int blktrans_getgeo(struct block_device *bdev, struct hd_geometry *geo)
 {
-	struct mtd_blktrans_dev *dev = blktrans_dev_get(bdev->bd_disk);
+	struct mtd_blktrans_dev *dev = bdev->bd_disk->private_data;
 	int ret = -ENXIO;
 
-	if (!dev)
-		return ret;
-
 	mutex_lock(&dev->lock);
 
 	if (!dev->mtd)
@@ -292,7 +258,6 @@ static int blktrans_getgeo(struct block_device *bdev, struct hd_geometry *geo)
 	ret = dev->tr->getgeo ? dev->tr->getgeo(dev, geo) : -ENOTTY;
 unlock:
 	mutex_unlock(&dev->lock);
-	blktrans_dev_put(dev);
 	return ret;
 }
 
@@ -315,12 +280,8 @@ int add_mtd_blktrans_dev(struct mtd_blktrans_dev *new)
 	struct gendisk *gd;
 	int ret;
 
-	if (mutex_trylock(&mtd_table_mutex)) {
-		mutex_unlock(&mtd_table_mutex);
-		BUG();
-	}
+	lockdep_assert_held(&mtd_table_mutex);
 
-	mutex_lock(&blktrans_ref_mutex);
 	list_for_each_entry(d, &tr->devs, list) {
 		if (new->devnum == -1) {
 			/* Use first free number */
@@ -332,7 +293,6 @@ int add_mtd_blktrans_dev(struct mtd_blktrans_dev *new)
 			}
 		} else if (d->devnum == new->devnum) {
 			/* Required number taken */
-			mutex_unlock(&blktrans_ref_mutex);
 			return -EBUSY;
 		} else if (d->devnum > new->devnum) {
 			/* Required number was free */
@@ -350,14 +310,11 @@ int add_mtd_blktrans_dev(struct mtd_blktrans_dev *new)
 	 * minor numbers and that the disk naming code below can cope
 	 * with this number. */
 	if (new->devnum > (MINORMASK >> tr->part_bits) ||
-	    (tr->part_bits && new->devnum >= 27 * 26)) {
-		mutex_unlock(&blktrans_ref_mutex);
+	    (tr->part_bits && new->devnum >= 27 * 26))
 		return ret;
-	}
 
 	list_add_tail(&new->list, &tr->devs);
  added:
-	mutex_unlock(&blktrans_ref_mutex);
 
 	mutex_init(&new->lock);
 	kref_init(&new->ref);
@@ -449,10 +406,7 @@ int del_mtd_blktrans_dev(struct mtd_blktrans_dev *old)
 {
 	unsigned long flags;
 
-	if (mutex_trylock(&mtd_table_mutex)) {
-		mutex_unlock(&mtd_table_mutex);
-		BUG();
-	}
+	lockdep_assert_held(&mtd_table_mutex);
 
 	if (old->disk_attributes)
 		sysfs_remove_group(&disk_to_dev(old->disk)->kobj,
diff --git a/drivers/mtd/mtdblock.c b/drivers/mtd/mtdblock.c
index a80809543793..03e3de3a5d79 100644
--- a/drivers/mtd/mtdblock.c
+++ b/drivers/mtd/mtdblock.c
@@ -322,6 +322,10 @@ static void mtdblock_add_mtd(struct mtd_blktrans_ops *tr, struct mtd_info *mtd)
 	if (!(mtd->flags & MTD_WRITEABLE))
 		dev->mbd.readonly = 1;
 
+	if (mtd_type_is_nand(mtd))
+		pr_warn("%s: MTD device '%s' is NAND, please consider using UBI block devices instead.\n",
+			tr->name, mtd->name);
+
 	if (add_mtd_blktrans_dev(&dev->mbd))
 		kfree(dev);
 }
diff --git a/drivers/mtd/mtdblock_ro.c b/drivers/mtd/mtdblock_ro.c
index d92914f73d52..7c51952ce55d 100644
--- a/drivers/mtd/mtdblock_ro.c
+++ b/drivers/mtd/mtdblock_ro.c
@@ -46,6 +46,10 @@ static void mtdblock_add_mtd(struct mtd_blktrans_ops *tr, struct mtd_info *mtd)
 	dev->tr = tr;
 	dev->readonly = 1;
 
+	if (mtd_type_is_nand(mtd))
+		pr_warn("%s: MTD device '%s' is NAND, please consider using UBI block devices instead.\n",
+			tr->name, mtd->name);
+
 	if (add_mtd_blktrans_dev(dev))
 		kfree(dev);
 }
diff --git a/drivers/mtd/mtdconcat.c b/drivers/mtd/mtdconcat.c
index 6e4d0017c0bd..f685a581df48 100644
--- a/drivers/mtd/mtdconcat.c
+++ b/drivers/mtd/mtdconcat.c
@@ -641,6 +641,7 @@ struct mtd_info *mtd_concat_create(struct mtd_info *subdev[],	/* subdevices to c
 	int i;
 	size_t size;
 	struct mtd_concat *concat;
+	struct mtd_info *subdev_master = NULL;
 	uint32_t max_erasesize, curr_erasesize;
 	int num_erase_region;
 	int max_writebufsize = 0;
@@ -679,18 +680,24 @@ struct mtd_info *mtd_concat_create(struct mtd_info *subdev[],	/* subdevices to c
 	concat->mtd.subpage_sft = subdev[0]->subpage_sft;
 	concat->mtd.oobsize = subdev[0]->oobsize;
 	concat->mtd.oobavail = subdev[0]->oobavail;
-	if (subdev[0]->_writev)
+
+	subdev_master = mtd_get_master(subdev[0]);
+	if (subdev_master->_writev)
 		concat->mtd._writev = concat_writev;
-	if (subdev[0]->_read_oob)
+	if (subdev_master->_read_oob)
 		concat->mtd._read_oob = concat_read_oob;
-	if (subdev[0]->_write_oob)
+	if (subdev_master->_write_oob)
 		concat->mtd._write_oob = concat_write_oob;
-	if (subdev[0]->_block_isbad)
+	if (subdev_master->_block_isbad)
 		concat->mtd._block_isbad = concat_block_isbad;
-	if (subdev[0]->_block_markbad)
+	if (subdev_master->_block_markbad)
 		concat->mtd._block_markbad = concat_block_markbad;
-	if (subdev[0]->_panic_write)
+	if (subdev_master->_panic_write)
 		concat->mtd._panic_write = concat_panic_write;
+	if (subdev_master->_read)
+		concat->mtd._read = concat_read;
+	if (subdev_master->_write)
+		concat->mtd._write = concat_write;
 
 	concat->mtd.ecc_stats.badblocks = subdev[0]->ecc_stats.badblocks;
 
@@ -721,14 +728,22 @@ struct mtd_info *mtd_concat_create(struct mtd_info *subdev[],	/* subdevices to c
 				    subdev[i]->flags & MTD_WRITEABLE;
 		}
 
+		subdev_master = mtd_get_master(subdev[i]);
 		concat->mtd.size += subdev[i]->size;
 		concat->mtd.ecc_stats.badblocks +=
 			subdev[i]->ecc_stats.badblocks;
 		if (concat->mtd.writesize   !=  subdev[i]->writesize ||
 		    concat->mtd.subpage_sft != subdev[i]->subpage_sft ||
 		    concat->mtd.oobsize    !=  subdev[i]->oobsize ||
-		    !concat->mtd._read_oob  != !subdev[i]->_read_oob ||
-		    !concat->mtd._write_oob != !subdev[i]->_write_oob) {
+		    !concat->mtd._read_oob  != !subdev_master->_read_oob ||
+		    !concat->mtd._write_oob != !subdev_master->_write_oob) {
+			/*
+			 * Check against subdev[i] for data members, because
+			 * subdev's attributes may be different from master
+			 * mtd device. Check against subdev's master mtd
+			 * device for callbacks, because the existence of
+			 * subdev's callbacks is decided by master mtd device.
+			 */
 			kfree(concat);
 			printk("Incompatible OOB or ECC data on \"%s\"\n",
 			       subdev[i]->name);
@@ -744,8 +759,6 @@ struct mtd_info *mtd_concat_create(struct mtd_info *subdev[],	/* subdevices to c
 	concat->mtd.name = name;
 
 	concat->mtd._erase = concat_erase;
-	concat->mtd._read = concat_read;
-	concat->mtd._write = concat_write;
 	concat->mtd._sync = concat_sync;
 	concat->mtd._lock = concat_lock;
 	concat->mtd._unlock = concat_unlock;
diff --git a/drivers/mtd/nand/raw/Kconfig b/drivers/mtd/nand/raw/Kconfig
index 630728de4b7c..67b7cb67c030 100644
--- a/drivers/mtd/nand/raw/Kconfig
+++ b/drivers/mtd/nand/raw/Kconfig
@@ -480,9 +480,9 @@ config MTD_NAND_RICOH
 	select MTD_SM_COMMON
 	help
 	  Enable support for Ricoh R5C852 xD card reader
-	  You also need to enable ether
+	  You also need to enable either
 	  NAND SSFDC (SmartMedia) read only translation layer' or new
-	  expermental, readwrite
+	  experimental, readwrite
 	  'SmartMedia/xD new translation layer'
 
 config MTD_NAND_DISKONCHIP
diff --git a/drivers/mtd/nand/raw/cafe_nand.c b/drivers/mtd/nand/raw/cafe_nand.c
index d0e8ffd55c22..9dbf031716a6 100644
--- a/drivers/mtd/nand/raw/cafe_nand.c
+++ b/drivers/mtd/nand/raw/cafe_nand.c
@@ -751,7 +751,7 @@ static int cafe_nand_probe(struct pci_dev *pdev,
 			  "CAFE NAND", mtd);
 	if (err) {
 		dev_warn(&pdev->dev, "Could not register IRQ %d\n", pdev->irq);
-		goto out_ior;
+		goto out_free_rs;
 	}
 
 	/* Disable master reset, enable NAND clock */
@@ -795,6 +795,8 @@ static int cafe_nand_probe(struct pci_dev *pdev,
 	/* Disable NAND IRQ in global IRQ mask register */
 	cafe_writel(cafe, ~1 & cafe_readl(cafe, GLOBAL_IRQ_MASK), GLOBAL_IRQ_MASK);
 	free_irq(pdev->irq, mtd);
+ out_free_rs:
+	free_rs(cafe->rs);
  out_ior:
 	pci_iounmap(pdev, cafe->mmio);
  out_free_mtd:
diff --git a/drivers/mtd/nand/raw/intel-nand-controller.c b/drivers/mtd/nand/raw/intel-nand-controller.c
index 8b49fd56cf96..b9784f3da7a1 100644
--- a/drivers/mtd/nand/raw/intel-nand-controller.c
+++ b/drivers/mtd/nand/raw/intel-nand-controller.c
@@ -20,6 +20,7 @@
 #include <linux/sched.h>
 #include <linux/slab.h>
 #include <linux/types.h>
+#include <linux/units.h>
 #include <asm/unaligned.h>
 
 #define EBU_CLC			0x000
@@ -102,7 +103,6 @@
 
 #define MAX_CS	2
 
-#define HZ_PER_MHZ	1000000L
 #define USEC_PER_SEC	1000000L
 
 struct ebu_nand_cs {
@@ -631,19 +631,26 @@ static int ebu_nand_probe(struct platform_device *pdev)
 	ebu_host->clk_rate = clk_get_rate(ebu_host->clk);
 
 	ebu_host->dma_tx = dma_request_chan(dev, "tx");
-	if (IS_ERR(ebu_host->dma_tx))
-		return dev_err_probe(dev, PTR_ERR(ebu_host->dma_tx),
-				     "failed to request DMA tx chan!.\n");
+	if (IS_ERR(ebu_host->dma_tx)) {
+		ret = dev_err_probe(dev, PTR_ERR(ebu_host->dma_tx),
+				    "failed to request DMA tx chan!.\n");
+		goto err_disable_unprepare_clk;
+	}
 
 	ebu_host->dma_rx = dma_request_chan(dev, "rx");
-	if (IS_ERR(ebu_host->dma_rx))
-		return dev_err_probe(dev, PTR_ERR(ebu_host->dma_rx),
-				     "failed to request DMA rx chan!.\n");
+	if (IS_ERR(ebu_host->dma_rx)) {
+		ret = dev_err_probe(dev, PTR_ERR(ebu_host->dma_rx),
+				    "failed to request DMA rx chan!.\n");
+		ebu_host->dma_rx = NULL;
+		goto err_cleanup_dma;
+	}
 
 	resname = devm_kasprintf(dev, GFP_KERNEL, "addr_sel%d", cs);
 	res = platform_get_resource_byname(pdev, IORESOURCE_MEM, resname);
-	if (!res)
-		return -EINVAL;
+	if (!res) {
+		ret = -EINVAL;
+		goto err_cleanup_dma;
+	}
 	ebu_host->cs[cs].addr_sel = res->start;
 	writel(ebu_host->cs[cs].addr_sel | EBU_ADDR_MASK(5) | EBU_ADDR_SEL_REGEN,
 	       ebu_host->ebu + EBU_ADDR_SEL(cs));
@@ -653,7 +660,8 @@ static int ebu_nand_probe(struct platform_device *pdev)
 	mtd = nand_to_mtd(&ebu_host->chip);
 	if (!mtd->name) {
 		dev_err(ebu_host->dev, "NAND label property is mandatory\n");
-		return -EINVAL;
+		ret = -EINVAL;
+		goto err_cleanup_dma;
 	}
 
 	mtd->dev.parent = dev;
@@ -681,6 +689,7 @@ err_clean_nand:
 	nand_cleanup(&ebu_host->chip);
 err_cleanup_dma:
 	ebu_dma_cleanup(ebu_host);
+err_disable_unprepare_clk:
 	clk_disable_unprepare(ebu_host->clk);
 
 	return ret;
diff --git a/drivers/mtd/nand/raw/meson_nand.c b/drivers/mtd/nand/raw/meson_nand.c
index 817bddccb775..ac3be92872d0 100644
--- a/drivers/mtd/nand/raw/meson_nand.c
+++ b/drivers/mtd/nand/raw/meson_nand.c
@@ -580,7 +580,7 @@ static int meson_nfc_rw_cmd_prepare_and_execute(struct nand_chip *nand,
 	u32 *addrs = nfc->cmdfifo.rw.addrs;
 	u32 cs = nfc->param.chip_select;
 	u32 cmd0, cmd_num, row_start;
-	int ret = 0, i;
+	int i;
 
 	cmd_num = sizeof(struct nand_rw_cmd) / sizeof(int);
 
@@ -620,7 +620,7 @@ static int meson_nfc_rw_cmd_prepare_and_execute(struct nand_chip *nand,
 		meson_nfc_cmd_idle(nfc, nfc->timing.tadl);
 	}
 
-	return ret;
+	return 0;
 }
 
 static int meson_nfc_write_page_sub(struct nand_chip *nand,
diff --git a/drivers/mtd/nand/raw/nand_bbt.c b/drivers/mtd/nand/raw/nand_bbt.c
index dced32a126d9..b7ad030225f8 100644
--- a/drivers/mtd/nand/raw/nand_bbt.c
+++ b/drivers/mtd/nand/raw/nand_bbt.c
@@ -447,6 +447,35 @@ static int scan_block_fast(struct nand_chip *this, struct nand_bbt_descr *bd,
 	return 0;
 }
 
+/* Check if a potential BBT block is marked as bad */
+static int bbt_block_checkbad(struct nand_chip *this, struct nand_bbt_descr *td,
+			      loff_t offs, uint8_t *buf)
+{
+	struct nand_bbt_descr *bd = this->badblock_pattern;
+
+	/*
+	 * No need to check for a bad BBT block if the BBM area overlaps with
+	 * the bad block table marker area in OOB since writing a BBM here
+	 * invalidates the bad block table marker anyway.
+	 */
+	if (!(td->options & NAND_BBT_NO_OOB) &&
+	    td->offs >= bd->offs && td->offs < bd->offs + bd->len)
+		return 0;
+
+	/*
+	 * There is no point in checking for a bad block marker if writing
+	 * such marker is not supported
+	 */
+	if (this->bbt_options & NAND_BBT_NO_OOB_BBM ||
+	    this->options & NAND_NO_BBM_QUIRK)
+		return 0;
+
+	if (scan_block_fast(this, bd, offs, buf) > 0)
+		return 1;
+
+	return 0;
+}
+
 /**
  * create_bbt - [GENERIC] Create a bad block table by scanning the device
  * @this: NAND chip object
@@ -560,6 +589,10 @@ static int search_bbt(struct nand_chip *this, uint8_t *buf,
 			int actblock = startblock + dir * block;
 			loff_t offs = (loff_t)actblock << this->bbt_erase_shift;
 
+			/* Check if block is marked bad */
+			if (bbt_block_checkbad(this, td, offs, buf))
+				continue;
+
 			/* Read first page */
 			scan_read(this, buf, offs, mtd->writesize, td);
 			if (!check_pattern(buf, scanlen, mtd->writesize, td)) {
diff --git a/drivers/mtd/nand/raw/omap2.c b/drivers/mtd/nand/raw/omap2.c
index b1839eef5b65..b26d4947af02 100644
--- a/drivers/mtd/nand/raw/omap2.c
+++ b/drivers/mtd/nand/raw/omap2.c
@@ -911,7 +911,7 @@ static int omap_correct_data(struct nand_chip *chip, u_char *dat,
 }
 
 /**
- * omap_calcuate_ecc - Generate non-inverted ECC bytes.
+ * omap_calculate_ecc - Generate non-inverted ECC bytes.
  * @chip: NAND chip object
  * @dat: The pointer to data on which ecc is computed
  * @ecc_code: The ecc_code buffer
diff --git a/drivers/mtd/nand/spi/core.c b/drivers/mtd/nand/spi/core.c
index 446ba8d43fbc..2c8685f1f2fa 100644
--- a/drivers/mtd/nand/spi/core.c
+++ b/drivers/mtd/nand/spi/core.c
@@ -288,6 +288,8 @@ static int spinand_ondie_ecc_prepare_io_req(struct nand_device *nand,
 	struct spinand_device *spinand = nand_to_spinand(nand);
 	bool enable = (req->mode != MTD_OPS_RAW);
 
+	memset(spinand->oobbuf, 0xff, nanddev_per_page_oobsize(nand));
+
 	/* Only enable or disable the engine */
 	return spinand_ecc_enable(spinand, enable);
 }
@@ -307,7 +309,7 @@ static int spinand_ondie_ecc_finish_io_req(struct nand_device *nand,
 	if (req->type == NAND_PAGE_WRITE)
 		return 0;
 
-	/* Finish a page write: check the status, report errors/bitflips */
+	/* Finish a page read: check the status, report errors/bitflips */
 	ret = spinand_check_ecc_status(spinand, engine_conf->status);
 	if (ret == -EBADMSG)
 		mtd->ecc_stats.failed++;
diff --git a/drivers/mtd/nand/spi/macronix.c b/drivers/mtd/nand/spi/macronix.c
index a9890350db02..3f31f1381a62 100644
--- a/drivers/mtd/nand/spi/macronix.c
+++ b/drivers/mtd/nand/spi/macronix.c
@@ -126,7 +126,7 @@ static const struct spinand_info macronix_spinand_table[] = {
 		     SPINAND_INFO_OP_VARIANTS(&read_cache_variants,
 					      &write_cache_variants,
 					      &update_cache_variants),
-		     0,
+		     SPINAND_HAS_QE_BIT,
 		     SPINAND_ECCINFO(&mx35lfxge4ab_ooblayout,
 				     mx35lf1ge4ab_ecc_get_status)),
 	SPINAND_INFO("MX35LF4GE4AD",
@@ -136,7 +136,7 @@ static const struct spinand_info macronix_spinand_table[] = {
 		     SPINAND_INFO_OP_VARIANTS(&read_cache_variants,
 					      &write_cache_variants,
 					      &update_cache_variants),
-		     0,
+		     SPINAND_HAS_QE_BIT,
 		     SPINAND_ECCINFO(&mx35lfxge4ab_ooblayout,
 				     mx35lf1ge4ab_ecc_get_status)),
 	SPINAND_INFO("MX35LF1G24AD",
@@ -146,16 +146,16 @@ static const struct spinand_info macronix_spinand_table[] = {
 		     SPINAND_INFO_OP_VARIANTS(&read_cache_variants,
 					      &write_cache_variants,
 					      &update_cache_variants),
-		     0,
+		     SPINAND_HAS_QE_BIT,
 		     SPINAND_ECCINFO(&mx35lfxge4ab_ooblayout, NULL)),
 	SPINAND_INFO("MX35LF2G24AD",
 		     SPINAND_ID(SPINAND_READID_METHOD_OPCODE_DUMMY, 0x24),
-		     NAND_MEMORG(1, 2048, 128, 64, 2048, 40, 1, 1, 1),
+		     NAND_MEMORG(1, 2048, 128, 64, 2048, 40, 2, 1, 1),
 		     NAND_ECCREQ(8, 512),
 		     SPINAND_INFO_OP_VARIANTS(&read_cache_variants,
 					      &write_cache_variants,
 					      &update_cache_variants),
-		     0,
+		     SPINAND_HAS_QE_BIT,
 		     SPINAND_ECCINFO(&mx35lfxge4ab_ooblayout, NULL)),
 	SPINAND_INFO("MX35LF4G24AD",
 		     SPINAND_ID(SPINAND_READID_METHOD_OPCODE_DUMMY, 0x35),
@@ -164,7 +164,7 @@ static const struct spinand_info macronix_spinand_table[] = {
 		     SPINAND_INFO_OP_VARIANTS(&read_cache_variants,
 					      &write_cache_variants,
 					      &update_cache_variants),
-		     0,
+		     SPINAND_HAS_QE_BIT,
 		     SPINAND_ECCINFO(&mx35lfxge4ab_ooblayout, NULL)),
 	SPINAND_INFO("MX31LF1GE4BC",
 		     SPINAND_ID(SPINAND_READID_METHOD_OPCODE_DUMMY, 0x1e),
@@ -173,7 +173,7 @@ static const struct spinand_info macronix_spinand_table[] = {
 		     SPINAND_INFO_OP_VARIANTS(&read_cache_variants,
 					      &write_cache_variants,
 					      &update_cache_variants),
-		     0 /*SPINAND_HAS_QE_BIT*/,
+		     SPINAND_HAS_QE_BIT,
 		     SPINAND_ECCINFO(&mx35lfxge4ab_ooblayout,
 				     mx35lf1ge4ab_ecc_get_status)),
 	SPINAND_INFO("MX31UF1GE4BC",
@@ -183,7 +183,7 @@ static const struct spinand_info macronix_spinand_table[] = {
 		     SPINAND_INFO_OP_VARIANTS(&read_cache_variants,
 					      &write_cache_variants,
 					      &update_cache_variants),
-		     0 /*SPINAND_HAS_QE_BIT*/,
+		     SPINAND_HAS_QE_BIT,
 		     SPINAND_ECCINFO(&mx35lfxge4ab_ooblayout,
 				     mx35lf1ge4ab_ecc_get_status)),
 
diff --git a/drivers/mtd/rfd_ftl.c b/drivers/mtd/rfd_ftl.c
index 6e0d5ce9b010..c546f8c5f24d 100644
--- a/drivers/mtd/rfd_ftl.c
+++ b/drivers/mtd/rfd_ftl.c
@@ -239,7 +239,7 @@ err:
 
 static int rfd_ftl_readsect(struct mtd_blktrans_dev *dev, u_long sector, char *buf)
 {
-	struct partition *part = (struct partition*)dev;
+	struct partition *part = container_of(dev, struct partition, mbd);
 	u_long addr;
 	size_t retlen;
 	int rc;
@@ -600,7 +600,7 @@ static int find_free_sector(const struct partition *part, const struct block *bl
 
 static int do_writesect(struct mtd_blktrans_dev *dev, u_long sector, char *buf, ulong *old_addr)
 {
-	struct partition *part = (struct partition*)dev;
+	struct partition *part = container_of(dev, struct partition, mbd);
 	struct block *block;
 	u_long addr;
 	int i;
@@ -666,7 +666,7 @@ err:
 
 static int rfd_ftl_writesect(struct mtd_blktrans_dev *dev, u_long sector, char *buf)
 {
-	struct partition *part = (struct partition*)dev;
+	struct partition *part = container_of(dev, struct partition, mbd);
 	u_long old_addr;
 	int i;
 	int rc = 0;
@@ -705,9 +705,37 @@ err:
 	return rc;
 }
 
+static int rfd_ftl_discardsect(struct mtd_blktrans_dev *dev,
+			       unsigned long sector, unsigned int nr_sects)
+{
+	struct partition *part = container_of(dev, struct partition, mbd);
+	u_long addr;
+	int rc;
+
+	while (nr_sects) {
+		if (sector >= part->sector_count)
+			return -EIO;
+
+		addr = part->sector_map[sector];
+
+		if (addr != -1) {
+			rc = mark_sector_deleted(part, addr);
+			if (rc)
+				return rc;
+
+			part->sector_map[sector] = -1;
+		}
+
+		sector++;
+		nr_sects--;
+	}
+
+	return 0;
+}
+
 static int rfd_ftl_getgeo(struct mtd_blktrans_dev *dev, struct hd_geometry *geo)
 {
-	struct partition *part = (struct partition*)dev;
+	struct partition *part = container_of(dev, struct partition, mbd);
 
 	geo->heads = 1;
 	geo->sectors = SECTORS_PER_TRACK;
@@ -720,7 +748,8 @@ static void rfd_ftl_add_mtd(struct mtd_blktrans_ops *tr, struct mtd_info *mtd)
 {
 	struct partition *part;
 
-	if (mtd->type != MTD_NORFLASH || mtd->size > UINT_MAX)
+	if ((mtd->type != MTD_NORFLASH && mtd->type != MTD_RAM) ||
+	    mtd->size > UINT_MAX)
 		return;
 
 	part = kzalloc(sizeof(struct partition), GFP_KERNEL);
@@ -754,7 +783,7 @@ static void rfd_ftl_add_mtd(struct mtd_blktrans_ops *tr, struct mtd_info *mtd)
 		printk(KERN_INFO PREFIX "name: '%s' type: %d flags %x\n",
 				mtd->name, mtd->type, mtd->flags);
 
-		if (!add_mtd_blktrans_dev((void*)part))
+		if (!add_mtd_blktrans_dev(&part->mbd))
 			return;
 	}
 out:
@@ -763,7 +792,7 @@ out:
 
 static void rfd_ftl_remove_dev(struct mtd_blktrans_dev *dev)
 {
-	struct partition *part = (struct partition*)dev;
+	struct partition *part = container_of(dev, struct partition, mbd);
 	int i;
 
 	for (i=0; i<part->total_blocks; i++) {
@@ -771,10 +800,10 @@ static void rfd_ftl_remove_dev(struct mtd_blktrans_dev *dev)
 			part->mbd.mtd->name, i, part->blocks[i].erases);
 	}
 
-	del_mtd_blktrans_dev(dev);
 	vfree(part->sector_map);
 	kfree(part->header_cache);
 	kfree(part->blocks);
+	del_mtd_blktrans_dev(&part->mbd);
 }
 
 static struct mtd_blktrans_ops rfd_ftl_tr = {
@@ -785,6 +814,7 @@ static struct mtd_blktrans_ops rfd_ftl_tr = {
 
 	.readsect	= rfd_ftl_readsect,
 	.writesect	= rfd_ftl_writesect,
+	.discard	= rfd_ftl_discardsect,
 	.getgeo		= rfd_ftl_getgeo,
 	.add_mtd	= rfd_ftl_add_mtd,
 	.remove_dev	= rfd_ftl_remove_dev,
diff --git a/drivers/net/bonding/bond_3ad.c b/drivers/net/bonding/bond_3ad.c
index a4a202b9a0a2..6006c2e8fa2b 100644
--- a/drivers/net/bonding/bond_3ad.c
+++ b/drivers/net/bonding/bond_3ad.c
@@ -96,7 +96,7 @@ static int ad_marker_send(struct port *port, struct bond_marker *marker);
 static void ad_mux_machine(struct port *port, bool *update_slave_arr);
 static void ad_rx_machine(struct lacpdu *lacpdu, struct port *port);
 static void ad_tx_machine(struct port *port);
-static void ad_periodic_machine(struct port *port, struct bond_params bond_params);
+static void ad_periodic_machine(struct port *port, struct bond_params *bond_params);
 static void ad_port_selection_logic(struct port *port, bool *update_slave_arr);
 static void ad_agg_selection_logic(struct aggregator *aggregator,
 				   bool *update_slave_arr);
@@ -1298,7 +1298,7 @@ static void ad_tx_machine(struct port *port)
  *
  * Turn ntt flag on priodically to perform periodic transmission of lacpdu's.
  */
-static void ad_periodic_machine(struct port *port, struct bond_params bond_params)
+static void ad_periodic_machine(struct port *port, struct bond_params *bond_params)
 {
 	periodic_states_t last_state;
 
@@ -1308,7 +1308,7 @@ static void ad_periodic_machine(struct port *port, struct bond_params bond_param
 	/* check if port was reinitialized */
 	if (((port->sm_vars & AD_PORT_BEGIN) || !(port->sm_vars & AD_PORT_LACP_ENABLED) || !port->is_enabled) ||
 	    (!(port->actor_oper_port_state & LACP_STATE_LACP_ACTIVITY) && !(port->partner_oper.port_state & LACP_STATE_LACP_ACTIVITY)) ||
-	    !bond_params.lacp_active) {
+	    !bond_params->lacp_active) {
 		port->sm_periodic_state = AD_NO_PERIODIC;
 	}
 	/* check if state machine should change state */
@@ -2342,7 +2342,7 @@ void bond_3ad_state_machine_handler(struct work_struct *work)
 		}
 
 		ad_rx_machine(NULL, port);
-		ad_periodic_machine(port, bond->params);
+		ad_periodic_machine(port, &bond->params);
 		ad_port_selection_logic(port, &update_slave_arr);
 		ad_mux_machine(port, &update_slave_arr);
 		ad_tx_machine(port);
diff --git a/drivers/net/bonding/bond_main.c b/drivers/net/bonding/bond_main.c
index b0966e733926..77dc79a7f574 100644
--- a/drivers/net/bonding/bond_main.c
+++ b/drivers/net/bonding/bond_main.c
@@ -2169,7 +2169,7 @@ int bond_enslave(struct net_device *bond_dev, struct net_device *slave_dev,
 			res = -EOPNOTSUPP;
 			goto err_sysfs_del;
 		}
-	} else {
+	} else if (bond->xdp_prog) {
 		struct netdev_bpf xdp = {
 			.command = XDP_SETUP_PROG,
 			.flags   = 0,
@@ -2910,9 +2910,9 @@ static void bond_arp_send_all(struct bonding *bond, struct slave *slave)
 			 * probe to generate any traffic (arp_validate=0)
 			 */
 			if (bond->params.arp_validate)
-				net_warn_ratelimited("%s: no route to arp_ip_target %pI4 and arp_validate is set\n",
-						     bond->dev->name,
-						     &targets[i]);
+				pr_warn_once("%s: no route to arp_ip_target %pI4 and arp_validate is set\n",
+					     bond->dev->name,
+					     &targets[i]);
 			bond_arp_send(slave, ARPOP_REQUEST, targets[i],
 				      0, tags);
 			continue;
@@ -5224,13 +5224,12 @@ static int bond_xdp_set(struct net_device *dev, struct bpf_prog *prog,
 			bpf_prog_inc(prog);
 	}
 
-	if (old_prog)
-		bpf_prog_put(old_prog);
-
-	if (prog)
+	if (prog) {
 		static_branch_inc(&bpf_master_redirect_enabled_key);
-	else
+	} else if (old_prog) {
+		bpf_prog_put(old_prog);
 		static_branch_dec(&bpf_master_redirect_enabled_key);
+	}
 
 	return 0;
 
diff --git a/drivers/net/can/c_can/c_can_ethtool.c b/drivers/net/can/c_can/c_can_ethtool.c
index cd5f07fca2a5..377c7d2e7612 100644
--- a/drivers/net/can/c_can/c_can_ethtool.c
+++ b/drivers/net/can/c_can/c_can_ethtool.c
@@ -15,10 +15,8 @@ static void c_can_get_drvinfo(struct net_device *netdev,
 			      struct ethtool_drvinfo *info)
 {
 	struct c_can_priv *priv = netdev_priv(netdev);
-	struct platform_device *pdev = to_platform_device(priv->device);
-
 	strscpy(info->driver, "c_can", sizeof(info->driver));
-	strscpy(info->bus_info, pdev->name, sizeof(info->bus_info));
+	strscpy(info->bus_info, dev_name(priv->device), sizeof(info->bus_info));
 }
 
 static void c_can_get_ringparam(struct net_device *netdev,
diff --git a/drivers/net/can/rcar/rcar_canfd.c b/drivers/net/can/rcar/rcar_canfd.c
index c47988d3674e..ff9d0f5ae0dd 100644
--- a/drivers/net/can/rcar/rcar_canfd.c
+++ b/drivers/net/can/rcar/rcar_canfd.c
@@ -2017,7 +2017,7 @@ static int __maybe_unused rcar_canfd_resume(struct device *dev)
 static SIMPLE_DEV_PM_OPS(rcar_canfd_pm_ops, rcar_canfd_suspend,
 			 rcar_canfd_resume);
 
-static const struct of_device_id rcar_canfd_of_table[] = {
+static const __maybe_unused struct of_device_id rcar_canfd_of_table[] = {
 	{ .compatible = "renesas,rcar-gen3-canfd", .data = (void *)RENESAS_RCAR_GEN3 },
 	{ .compatible = "renesas,rzg2l-canfd", .data = (void *)RENESAS_RZG2L },
 	{ }
diff --git a/drivers/net/dsa/b53/b53_common.c b/drivers/net/dsa/b53/b53_common.c
index bd1417a66cbf..604f54112665 100644
--- a/drivers/net/dsa/b53/b53_common.c
+++ b/drivers/net/dsa/b53/b53_common.c
@@ -1144,7 +1144,7 @@ static void b53_force_link(struct b53_device *dev, int port, int link)
 	u8 reg, val, off;
 
 	/* Override the port settings */
-	if (port == dev->cpu_port) {
+	if (port == dev->imp_port) {
 		off = B53_PORT_OVERRIDE_CTRL;
 		val = PORT_OVERRIDE_EN;
 	} else {
@@ -1168,7 +1168,7 @@ static void b53_force_port_config(struct b53_device *dev, int port,
 	u8 reg, val, off;
 
 	/* Override the port settings */
-	if (port == dev->cpu_port) {
+	if (port == dev->imp_port) {
 		off = B53_PORT_OVERRIDE_CTRL;
 		val = PORT_OVERRIDE_EN;
 	} else {
@@ -1236,7 +1236,7 @@ static void b53_adjust_link(struct dsa_switch *ds, int port,
 	b53_force_link(dev, port, phydev->link);
 
 	if (is531x5(dev) && phy_interface_is_rgmii(phydev)) {
-		if (port == 8)
+		if (port == dev->imp_port)
 			off = B53_RGMII_CTRL_IMP;
 		else
 			off = B53_RGMII_CTRL_P(port);
@@ -2280,6 +2280,7 @@ struct b53_chip_data {
 	const char *dev_name;
 	u16 vlans;
 	u16 enabled_ports;
+	u8 imp_port;
 	u8 cpu_port;
 	u8 vta_regs[3];
 	u8 arl_bins;
@@ -2304,6 +2305,7 @@ static const struct b53_chip_data b53_switch_chips[] = {
 		.enabled_ports = 0x1f,
 		.arl_bins = 2,
 		.arl_buckets = 1024,
+		.imp_port = 5,
 		.cpu_port = B53_CPU_PORT_25,
 		.duplex_reg = B53_DUPLEX_STAT_FE,
 	},
@@ -2314,6 +2316,7 @@ static const struct b53_chip_data b53_switch_chips[] = {
 		.enabled_ports = 0x1f,
 		.arl_bins = 2,
 		.arl_buckets = 1024,
+		.imp_port = 5,
 		.cpu_port = B53_CPU_PORT_25,
 		.duplex_reg = B53_DUPLEX_STAT_FE,
 	},
@@ -2324,6 +2327,7 @@ static const struct b53_chip_data b53_switch_chips[] = {
 		.enabled_ports = 0x1f,
 		.arl_bins = 4,
 		.arl_buckets = 1024,
+		.imp_port = 8,
 		.cpu_port = B53_CPU_PORT,
 		.vta_regs = B53_VTA_REGS,
 		.duplex_reg = B53_DUPLEX_STAT_GE,
@@ -2337,6 +2341,7 @@ static const struct b53_chip_data b53_switch_chips[] = {
 		.enabled_ports = 0x1f,
 		.arl_bins = 4,
 		.arl_buckets = 1024,
+		.imp_port = 8,
 		.cpu_port = B53_CPU_PORT,
 		.vta_regs = B53_VTA_REGS,
 		.duplex_reg = B53_DUPLEX_STAT_GE,
@@ -2350,6 +2355,7 @@ static const struct b53_chip_data b53_switch_chips[] = {
 		.enabled_ports = 0x1f,
 		.arl_bins = 4,
 		.arl_buckets = 1024,
+		.imp_port = 8,
 		.cpu_port = B53_CPU_PORT,
 		.vta_regs = B53_VTA_REGS_9798,
 		.duplex_reg = B53_DUPLEX_STAT_GE,
@@ -2363,6 +2369,7 @@ static const struct b53_chip_data b53_switch_chips[] = {
 		.enabled_ports = 0x7f,
 		.arl_bins = 4,
 		.arl_buckets = 1024,
+		.imp_port = 8,
 		.cpu_port = B53_CPU_PORT,
 		.vta_regs = B53_VTA_REGS_9798,
 		.duplex_reg = B53_DUPLEX_STAT_GE,
@@ -2377,6 +2384,7 @@ static const struct b53_chip_data b53_switch_chips[] = {
 		.arl_bins = 4,
 		.arl_buckets = 1024,
 		.vta_regs = B53_VTA_REGS,
+		.imp_port = 8,
 		.cpu_port = B53_CPU_PORT,
 		.duplex_reg = B53_DUPLEX_STAT_GE,
 		.jumbo_pm_reg = B53_JUMBO_PORT_MASK,
@@ -2389,6 +2397,7 @@ static const struct b53_chip_data b53_switch_chips[] = {
 		.enabled_ports = 0xff,
 		.arl_bins = 4,
 		.arl_buckets = 1024,
+		.imp_port = 8,
 		.cpu_port = B53_CPU_PORT,
 		.vta_regs = B53_VTA_REGS,
 		.duplex_reg = B53_DUPLEX_STAT_GE,
@@ -2402,6 +2411,7 @@ static const struct b53_chip_data b53_switch_chips[] = {
 		.enabled_ports = 0x1ff,
 		.arl_bins = 4,
 		.arl_buckets = 1024,
+		.imp_port = 8,
 		.cpu_port = B53_CPU_PORT,
 		.vta_regs = B53_VTA_REGS,
 		.duplex_reg = B53_DUPLEX_STAT_GE,
@@ -2415,6 +2425,7 @@ static const struct b53_chip_data b53_switch_chips[] = {
 		.enabled_ports = 0, /* pdata must provide them */
 		.arl_bins = 4,
 		.arl_buckets = 1024,
+		.imp_port = 8,
 		.cpu_port = B53_CPU_PORT,
 		.vta_regs = B53_VTA_REGS_63XX,
 		.duplex_reg = B53_DUPLEX_STAT_63XX,
@@ -2428,6 +2439,7 @@ static const struct b53_chip_data b53_switch_chips[] = {
 		.enabled_ports = 0x1f,
 		.arl_bins = 4,
 		.arl_buckets = 1024,
+		.imp_port = 8,
 		.cpu_port = B53_CPU_PORT_25, /* TODO: auto detect */
 		.vta_regs = B53_VTA_REGS,
 		.duplex_reg = B53_DUPLEX_STAT_GE,
@@ -2441,6 +2453,7 @@ static const struct b53_chip_data b53_switch_chips[] = {
 		.enabled_ports = 0x1bf,
 		.arl_bins = 4,
 		.arl_buckets = 1024,
+		.imp_port = 8,
 		.cpu_port = B53_CPU_PORT_25, /* TODO: auto detect */
 		.vta_regs = B53_VTA_REGS,
 		.duplex_reg = B53_DUPLEX_STAT_GE,
@@ -2454,6 +2467,7 @@ static const struct b53_chip_data b53_switch_chips[] = {
 		.enabled_ports = 0x1bf,
 		.arl_bins = 4,
 		.arl_buckets = 1024,
+		.imp_port = 8,
 		.cpu_port = B53_CPU_PORT_25, /* TODO: auto detect */
 		.vta_regs = B53_VTA_REGS,
 		.duplex_reg = B53_DUPLEX_STAT_GE,
@@ -2467,6 +2481,7 @@ static const struct b53_chip_data b53_switch_chips[] = {
 		.enabled_ports = 0x1f,
 		.arl_bins = 4,
 		.arl_buckets = 1024,
+		.imp_port = 8,
 		.cpu_port = B53_CPU_PORT_25, /* TODO: auto detect */
 		.vta_regs = B53_VTA_REGS,
 		.duplex_reg = B53_DUPLEX_STAT_GE,
@@ -2480,6 +2495,7 @@ static const struct b53_chip_data b53_switch_chips[] = {
 		.enabled_ports = 0x1f,
 		.arl_bins = 4,
 		.arl_buckets = 1024,
+		.imp_port = 8,
 		.cpu_port = B53_CPU_PORT_25, /* TODO: auto detect */
 		.vta_regs = B53_VTA_REGS,
 		.duplex_reg = B53_DUPLEX_STAT_GE,
@@ -2493,6 +2509,7 @@ static const struct b53_chip_data b53_switch_chips[] = {
 		.enabled_ports = 0x1ff,
 		.arl_bins = 4,
 		.arl_buckets = 1024,
+		.imp_port = 8,
 		.cpu_port = B53_CPU_PORT,
 		.vta_regs = B53_VTA_REGS,
 		.duplex_reg = B53_DUPLEX_STAT_GE,
@@ -2506,6 +2523,7 @@ static const struct b53_chip_data b53_switch_chips[] = {
 		.enabled_ports = 0x103,
 		.arl_bins = 4,
 		.arl_buckets = 1024,
+		.imp_port = 8,
 		.cpu_port = B53_CPU_PORT,
 		.vta_regs = B53_VTA_REGS,
 		.duplex_reg = B53_DUPLEX_STAT_GE,
@@ -2520,6 +2538,7 @@ static const struct b53_chip_data b53_switch_chips[] = {
 		.enabled_ports = 0x1bf,
 		.arl_bins = 4,
 		.arl_buckets = 256,
+		.imp_port = 8,
 		.cpu_port = 8, /* TODO: ports 4, 5, 8 */
 		.vta_regs = B53_VTA_REGS,
 		.duplex_reg = B53_DUPLEX_STAT_GE,
@@ -2533,6 +2552,7 @@ static const struct b53_chip_data b53_switch_chips[] = {
 		.enabled_ports = 0x1ff,
 		.arl_bins = 4,
 		.arl_buckets = 1024,
+		.imp_port = 8,
 		.cpu_port = B53_CPU_PORT,
 		.vta_regs = B53_VTA_REGS,
 		.duplex_reg = B53_DUPLEX_STAT_GE,
@@ -2546,6 +2566,7 @@ static const struct b53_chip_data b53_switch_chips[] = {
 		.enabled_ports = 0x1ff,
 		.arl_bins = 4,
 		.arl_buckets = 256,
+		.imp_port = 8,
 		.cpu_port = B53_CPU_PORT,
 		.vta_regs = B53_VTA_REGS,
 		.duplex_reg = B53_DUPLEX_STAT_GE,
@@ -2571,6 +2592,7 @@ static int b53_switch_init(struct b53_device *dev)
 			dev->vta_regs[1] = chip->vta_regs[1];
 			dev->vta_regs[2] = chip->vta_regs[2];
 			dev->jumbo_pm_reg = chip->jumbo_pm_reg;
+			dev->imp_port = chip->imp_port;
 			dev->cpu_port = chip->cpu_port;
 			dev->num_vlans = chip->vlans;
 			dev->num_arl_bins = chip->arl_bins;
@@ -2612,9 +2634,10 @@ static int b53_switch_init(struct b53_device *dev)
 			dev->cpu_port = 5;
 	}
 
-	/* cpu port is always last */
-	dev->num_ports = dev->cpu_port + 1;
 	dev->enabled_ports |= BIT(dev->cpu_port);
+	dev->num_ports = fls(dev->enabled_ports);
+
+	dev->ds->num_ports = min_t(unsigned int, dev->num_ports, DSA_MAX_PORTS);
 
 	/* Include non standard CPU port built-in PHYs to be probed */
 	if (is539x(dev) || is531x5(dev)) {
@@ -2660,7 +2683,6 @@ struct b53_device *b53_switch_alloc(struct device *base,
 		return NULL;
 
 	ds->dev = base;
-	ds->num_ports = DSA_MAX_PORTS;
 
 	dev = devm_kzalloc(base, sizeof(*dev), GFP_KERNEL);
 	if (!dev)
diff --git a/drivers/net/dsa/b53/b53_priv.h b/drivers/net/dsa/b53/b53_priv.h
index 9bf8319342b0..5d068acf7cf8 100644
--- a/drivers/net/dsa/b53/b53_priv.h
+++ b/drivers/net/dsa/b53/b53_priv.h
@@ -123,6 +123,7 @@ struct b53_device {
 
 	/* used ports mask */
 	u16 enabled_ports;
+	unsigned int imp_port;
 	unsigned int cpu_port;
 
 	/* connect specific data */
diff --git a/drivers/net/dsa/lantiq_gswip.c b/drivers/net/dsa/lantiq_gswip.c
index e78026ef6d8c..64d6dfa83122 100644
--- a/drivers/net/dsa/lantiq_gswip.c
+++ b/drivers/net/dsa/lantiq_gswip.c
@@ -843,7 +843,8 @@ static int gswip_setup(struct dsa_switch *ds)
 
 	gswip_switch_mask(priv, 0, GSWIP_MAC_CTRL_2_MLEN,
 			  GSWIP_MAC_CTRL_2p(cpu_port));
-	gswip_switch_w(priv, VLAN_ETH_FRAME_LEN + 8, GSWIP_MAC_FLEN);
+	gswip_switch_w(priv, VLAN_ETH_FRAME_LEN + 8 + ETH_FCS_LEN,
+		       GSWIP_MAC_FLEN);
 	gswip_switch_mask(priv, 0, GSWIP_BM_QUEUE_GCTRL_GL_MOD,
 			  GSWIP_BM_QUEUE_GCTRL);
 
diff --git a/drivers/net/ethernet/3com/3c59x.c b/drivers/net/ethernet/3com/3c59x.c
index 17c16333a412..7b0ae9efc004 100644
--- a/drivers/net/ethernet/3com/3c59x.c
+++ b/drivers/net/ethernet/3com/3c59x.c
@@ -2786,7 +2786,7 @@ static void
 dump_tx_ring(struct net_device *dev)
 {
 	if (vortex_debug > 0) {
-	struct vortex_private *vp = netdev_priv(dev);
+		struct vortex_private *vp = netdev_priv(dev);
 		void __iomem *ioaddr = vp->ioaddr;
 
 		if (vp->full_bus_master_tx) {
diff --git a/drivers/net/ethernet/broadcom/bnx2.c b/drivers/net/ethernet/broadcom/bnx2.c
index a705e2615307..8c83973adca5 100644
--- a/drivers/net/ethernet/broadcom/bnx2.c
+++ b/drivers/net/ethernet/broadcom/bnx2.c
@@ -8038,9 +8038,9 @@ bnx2_get_pci_speed(struct bnx2 *bp)
 static void
 bnx2_read_vpd_fw_ver(struct bnx2 *bp)
 {
+	unsigned int len;
 	int rc, i, j;
 	u8 *data;
-	unsigned int block_end, rosize, len;
 
 #define BNX2_VPD_NVRAM_OFFSET	0x300
 #define BNX2_VPD_LEN		128
@@ -8057,38 +8057,21 @@ bnx2_read_vpd_fw_ver(struct bnx2 *bp)
 	for (i = 0; i < BNX2_VPD_LEN; i += 4)
 		swab32s((u32 *)&data[i]);
 
-	i = pci_vpd_find_tag(data, BNX2_VPD_LEN, PCI_VPD_LRDT_RO_DATA);
-	if (i < 0)
-		goto vpd_done;
-
-	rosize = pci_vpd_lrdt_size(&data[i]);
-	i += PCI_VPD_LRDT_TAG_SIZE;
-	block_end = i + rosize;
-
-	if (block_end > BNX2_VPD_LEN)
-		goto vpd_done;
-
-	j = pci_vpd_find_info_keyword(data, i, rosize,
-				      PCI_VPD_RO_KEYWORD_MFR_ID);
+	j = pci_vpd_find_ro_info_keyword(data, BNX2_VPD_LEN,
+					 PCI_VPD_RO_KEYWORD_MFR_ID, &len);
 	if (j < 0)
 		goto vpd_done;
 
-	len = pci_vpd_info_field_size(&data[j]);
-
-	j += PCI_VPD_INFO_FLD_HDR_SIZE;
-	if (j + len > block_end || len != 4 ||
-	    memcmp(&data[j], "1028", 4))
+	if (len != 4 || memcmp(&data[j], "1028", 4))
 		goto vpd_done;
 
-	j = pci_vpd_find_info_keyword(data, i, rosize,
-				      PCI_VPD_RO_KEYWORD_VENDOR0);
+	j = pci_vpd_find_ro_info_keyword(data, BNX2_VPD_LEN,
+					 PCI_VPD_RO_KEYWORD_VENDOR0,
+					 &len);
 	if (j < 0)
 		goto vpd_done;
 
-	len = pci_vpd_info_field_size(&data[j]);
-
-	j += PCI_VPD_INFO_FLD_HDR_SIZE;
-	if (j + len > block_end || len > BNX2_MAX_VER_SLEN)
+	if (len > BNX2_MAX_VER_SLEN)
 		goto vpd_done;
 
 	memcpy(bp->fw_version, &data[j], len);
diff --git a/drivers/net/ethernet/broadcom/bnx2x/bnx2x.h b/drivers/net/ethernet/broadcom/bnx2x/bnx2x.h
index d04994840b87..e789430f407c 100644
--- a/drivers/net/ethernet/broadcom/bnx2x/bnx2x.h
+++ b/drivers/net/ethernet/broadcom/bnx2x/bnx2x.h
@@ -2407,7 +2407,6 @@ void bnx2x_igu_clear_sb_gen(struct bnx2x *bp, u8 func, u8 idu_sb_id,
 #define ETH_MAX_RX_CLIENTS_E2		ETH_MAX_RX_CLIENTS_E1H
 #endif
 
-#define BNX2X_VPD_LEN			128
 #define VENDOR_ID_LEN			4
 
 #define VF_ACQUIRE_THRESH		3
diff --git a/drivers/net/ethernet/broadcom/bnx2x/bnx2x_main.c b/drivers/net/ethernet/broadcom/bnx2x/bnx2x_main.c
index 6d98134913cd..ae87296ae1ff 100644
--- a/drivers/net/ethernet/broadcom/bnx2x/bnx2x_main.c
+++ b/drivers/net/ethernet/broadcom/bnx2x/bnx2x_main.c
@@ -12189,86 +12189,35 @@ static int bnx2x_get_hwinfo(struct bnx2x *bp)
 
 static void bnx2x_read_fwinfo(struct bnx2x *bp)
 {
-	int cnt, i, block_end, rodi;
-	char vpd_start[BNX2X_VPD_LEN+1];
-	char str_id_reg[VENDOR_ID_LEN+1];
-	char str_id_cap[VENDOR_ID_LEN+1];
-	char *vpd_data;
-	char *vpd_extended_data = NULL;
-	u8 len;
-
-	cnt = pci_read_vpd(bp->pdev, 0, BNX2X_VPD_LEN, vpd_start);
-	memset(bp->fw_ver, 0, sizeof(bp->fw_ver));
-
-	if (cnt < BNX2X_VPD_LEN)
-		goto out_not_found;
-
-	/* VPD RO tag should be first tag after identifier string, hence
-	 * we should be able to find it in first BNX2X_VPD_LEN chars
-	 */
-	i = pci_vpd_find_tag(vpd_start, BNX2X_VPD_LEN, PCI_VPD_LRDT_RO_DATA);
-	if (i < 0)
-		goto out_not_found;
-
-	block_end = i + PCI_VPD_LRDT_TAG_SIZE +
-		    pci_vpd_lrdt_size(&vpd_start[i]);
-
-	i += PCI_VPD_LRDT_TAG_SIZE;
-
-	if (block_end > BNX2X_VPD_LEN) {
-		vpd_extended_data = kmalloc(block_end, GFP_KERNEL);
-		if (vpd_extended_data  == NULL)
-			goto out_not_found;
-
-		/* read rest of vpd image into vpd_extended_data */
-		memcpy(vpd_extended_data, vpd_start, BNX2X_VPD_LEN);
-		cnt = pci_read_vpd(bp->pdev, BNX2X_VPD_LEN,
-				   block_end - BNX2X_VPD_LEN,
-				   vpd_extended_data + BNX2X_VPD_LEN);
-		if (cnt < (block_end - BNX2X_VPD_LEN))
-			goto out_not_found;
-		vpd_data = vpd_extended_data;
-	} else
-		vpd_data = vpd_start;
+	char str_id[VENDOR_ID_LEN + 1];
+	unsigned int vpd_len, kw_len;
+	u8 *vpd_data;
+	int rodi;
 
-	/* now vpd_data holds full vpd content in both cases */
-
-	rodi = pci_vpd_find_info_keyword(vpd_data, i, block_end,
-				   PCI_VPD_RO_KEYWORD_MFR_ID);
-	if (rodi < 0)
-		goto out_not_found;
+	memset(bp->fw_ver, 0, sizeof(bp->fw_ver));
 
-	len = pci_vpd_info_field_size(&vpd_data[rodi]);
+	vpd_data = pci_vpd_alloc(bp->pdev, &vpd_len);
+	if (IS_ERR(vpd_data))
+		return;
 
-	if (len != VENDOR_ID_LEN)
+	rodi = pci_vpd_find_ro_info_keyword(vpd_data, vpd_len,
+					    PCI_VPD_RO_KEYWORD_MFR_ID, &kw_len);
+	if (rodi < 0 || kw_len != VENDOR_ID_LEN)
 		goto out_not_found;
 
-	rodi += PCI_VPD_INFO_FLD_HDR_SIZE;
-
 	/* vendor specific info */
-	snprintf(str_id_reg, VENDOR_ID_LEN + 1, "%04x", PCI_VENDOR_ID_DELL);
-	snprintf(str_id_cap, VENDOR_ID_LEN + 1, "%04X", PCI_VENDOR_ID_DELL);
-	if (!strncmp(str_id_reg, &vpd_data[rodi], VENDOR_ID_LEN) ||
-	    !strncmp(str_id_cap, &vpd_data[rodi], VENDOR_ID_LEN)) {
-
-		rodi = pci_vpd_find_info_keyword(vpd_data, i, block_end,
-						PCI_VPD_RO_KEYWORD_VENDOR0);
-		if (rodi >= 0) {
-			len = pci_vpd_info_field_size(&vpd_data[rodi]);
-
-			rodi += PCI_VPD_INFO_FLD_HDR_SIZE;
-
-			if (len < 32 && (len + rodi) <= BNX2X_VPD_LEN) {
-				memcpy(bp->fw_ver, &vpd_data[rodi], len);
-				bp->fw_ver[len] = ' ';
-			}
+	snprintf(str_id, VENDOR_ID_LEN + 1, "%04x", PCI_VENDOR_ID_DELL);
+	if (!strncasecmp(str_id, &vpd_data[rodi], VENDOR_ID_LEN)) {
+		rodi = pci_vpd_find_ro_info_keyword(vpd_data, vpd_len,
+						    PCI_VPD_RO_KEYWORD_VENDOR0,
+						    &kw_len);
+		if (rodi >= 0 && kw_len < sizeof(bp->fw_ver)) {
+			memcpy(bp->fw_ver, &vpd_data[rodi], kw_len);
+			bp->fw_ver[kw_len] = ' ';
 		}
-		kfree(vpd_extended_data);
-		return;
 	}
 out_not_found:
-	kfree(vpd_extended_data);
-	return;
+	kfree(vpd_data);
 }
 
 static void bnx2x_set_modes_bitmap(struct bnx2x *bp)
diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt.c b/drivers/net/ethernet/broadcom/bnxt/bnxt.c
index 627f85ee3922..ea0c45d33814 100644
--- a/drivers/net/ethernet/broadcom/bnxt/bnxt.c
+++ b/drivers/net/ethernet/broadcom/bnxt/bnxt.c
@@ -305,13 +305,15 @@ static bool bnxt_vf_pciid(enum board_idx idx)
 	writel(DB_CP_FLAGS | RING_CMP(idx), (db)->doorbell)
 
 #define BNXT_DB_NQ_P5(db, idx)						\
-	writeq((db)->db_key64 | DBR_TYPE_NQ | RING_CMP(idx), (db)->doorbell)
+	bnxt_writeq(bp, (db)->db_key64 | DBR_TYPE_NQ | RING_CMP(idx),	\
+		    (db)->doorbell)
 
 #define BNXT_DB_CQ_ARM(db, idx)						\
 	writel(DB_CP_REARM_FLAGS | RING_CMP(idx), (db)->doorbell)
 
 #define BNXT_DB_NQ_ARM_P5(db, idx)					\
-	writeq((db)->db_key64 | DBR_TYPE_NQ_ARM | RING_CMP(idx), (db)->doorbell)
+	bnxt_writeq(bp, (db)->db_key64 | DBR_TYPE_NQ_ARM | RING_CMP(idx),\
+		    (db)->doorbell)
 
 static void bnxt_db_nq(struct bnxt *bp, struct bnxt_db_info *db, u32 idx)
 {
@@ -332,8 +334,8 @@ static void bnxt_db_nq_arm(struct bnxt *bp, struct bnxt_db_info *db, u32 idx)
 static void bnxt_db_cq(struct bnxt *bp, struct bnxt_db_info *db, u32 idx)
 {
 	if (bp->flags & BNXT_FLAG_CHIP_P5)
-		writeq(db->db_key64 | DBR_TYPE_CQ_ARMALL | RING_CMP(idx),
-		       db->doorbell);
+		bnxt_writeq(bp, db->db_key64 | DBR_TYPE_CQ_ARMALL |
+			    RING_CMP(idx), db->doorbell);
 	else
 		BNXT_DB_CQ(db, idx);
 }
@@ -2200,25 +2202,34 @@ static int bnxt_async_event_process(struct bnxt *bp,
 		if (!fw_health)
 			goto async_event_process_exit;
 
-		fw_health->enabled = EVENT_DATA1_RECOVERY_ENABLED(data1);
-		fw_health->master = EVENT_DATA1_RECOVERY_MASTER_FUNC(data1);
-		if (!fw_health->enabled) {
+		if (!EVENT_DATA1_RECOVERY_ENABLED(data1)) {
+			fw_health->enabled = false;
 			netif_info(bp, drv, bp->dev,
 				   "Error recovery info: error recovery[0]\n");
 			break;
 		}
+		fw_health->master = EVENT_DATA1_RECOVERY_MASTER_FUNC(data1);
 		fw_health->tmr_multiplier =
 			DIV_ROUND_UP(fw_health->polling_dsecs * HZ,
 				     bp->current_interval * 10);
 		fw_health->tmr_counter = fw_health->tmr_multiplier;
-		fw_health->last_fw_heartbeat =
-			bnxt_fw_health_readl(bp, BNXT_FW_HEARTBEAT_REG);
-		fw_health->last_fw_reset_cnt =
-			bnxt_fw_health_readl(bp, BNXT_FW_RESET_CNT_REG);
+		if (!fw_health->enabled) {
+			fw_health->last_fw_heartbeat =
+				bnxt_fw_health_readl(bp, BNXT_FW_HEARTBEAT_REG);
+			fw_health->last_fw_reset_cnt =
+				bnxt_fw_health_readl(bp, BNXT_FW_RESET_CNT_REG);
+		}
 		netif_info(bp, drv, bp->dev,
 			   "Error recovery info: error recovery[1], master[%d], reset count[%u], health status: 0x%x\n",
 			   fw_health->master, fw_health->last_fw_reset_cnt,
 			   bnxt_fw_health_readl(bp, BNXT_FW_HEALTH_REG));
+		if (!fw_health->enabled) {
+			/* Make sure tmr_counter is set and visible to
+			 * bnxt_health_check() before setting enabled to true.
+			 */
+			smp_wmb();
+			fw_health->enabled = true;
+		}
 		goto async_event_process_exit;
 	}
 	case ASYNC_EVENT_CMPL_EVENT_ID_DEBUG_NOTIFICATION:
@@ -2638,8 +2649,8 @@ static void __bnxt_poll_cqs_done(struct bnxt *bp, struct bnxt_napi *bnapi,
 
 		if (cpr2 && cpr2->had_work_done) {
 			db = &cpr2->cp_db;
-			writeq(db->db_key64 | dbr_type |
-			       RING_CMP(cpr2->cp_raw_cons), db->doorbell);
+			bnxt_writeq(bp, db->db_key64 | dbr_type |
+				    RING_CMP(cpr2->cp_raw_cons), db->doorbell);
 			cpr2->had_work_done = 0;
 		}
 	}
@@ -4639,6 +4650,13 @@ static int bnxt_hwrm_tunnel_dst_port_free(struct bnxt *bp, u8 tunnel_type)
 	struct hwrm_tunnel_dst_port_free_input *req;
 	int rc;
 
+	if (tunnel_type == TUNNEL_DST_PORT_FREE_REQ_TUNNEL_TYPE_VXLAN &&
+	    bp->vxlan_fw_dst_port_id == INVALID_HW_RING_ID)
+		return 0;
+	if (tunnel_type == TUNNEL_DST_PORT_FREE_REQ_TUNNEL_TYPE_GENEVE &&
+	    bp->nge_fw_dst_port_id == INVALID_HW_RING_ID)
+		return 0;
+
 	rc = hwrm_req_init(bp, req, HWRM_TUNNEL_DST_PORT_FREE);
 	if (rc)
 		return rc;
@@ -4648,10 +4666,12 @@ static int bnxt_hwrm_tunnel_dst_port_free(struct bnxt *bp, u8 tunnel_type)
 	switch (tunnel_type) {
 	case TUNNEL_DST_PORT_FREE_REQ_TUNNEL_TYPE_VXLAN:
 		req->tunnel_dst_port_id = cpu_to_le16(bp->vxlan_fw_dst_port_id);
+		bp->vxlan_port = 0;
 		bp->vxlan_fw_dst_port_id = INVALID_HW_RING_ID;
 		break;
 	case TUNNEL_DST_PORT_FREE_REQ_TUNNEL_TYPE_GENEVE:
 		req->tunnel_dst_port_id = cpu_to_le16(bp->nge_fw_dst_port_id);
+		bp->nge_port = 0;
 		bp->nge_fw_dst_port_id = INVALID_HW_RING_ID;
 		break;
 	default:
@@ -4689,10 +4709,12 @@ static int bnxt_hwrm_tunnel_dst_port_alloc(struct bnxt *bp, __be16 port,
 
 	switch (tunnel_type) {
 	case TUNNEL_DST_PORT_ALLOC_REQ_TUNNEL_TYPE_VXLAN:
+		bp->vxlan_port = port;
 		bp->vxlan_fw_dst_port_id =
 			le16_to_cpu(resp->tunnel_dst_port_id);
 		break;
 	case TUNNEL_DST_PORT_ALLOC_REQ_TUNNEL_TYPE_GENEVE:
+		bp->nge_port = port;
 		bp->nge_fw_dst_port_id = le16_to_cpu(resp->tunnel_dst_port_id);
 		break;
 	default:
@@ -8221,12 +8243,10 @@ static int bnxt_hwrm_port_qstats_ext(struct bnxt *bp, u8 flags)
 
 static void bnxt_hwrm_free_tunnel_ports(struct bnxt *bp)
 {
-	if (bp->vxlan_fw_dst_port_id != INVALID_HW_RING_ID)
-		bnxt_hwrm_tunnel_dst_port_free(
-			bp, TUNNEL_DST_PORT_FREE_REQ_TUNNEL_TYPE_VXLAN);
-	if (bp->nge_fw_dst_port_id != INVALID_HW_RING_ID)
-		bnxt_hwrm_tunnel_dst_port_free(
-			bp, TUNNEL_DST_PORT_FREE_REQ_TUNNEL_TYPE_GENEVE);
+	bnxt_hwrm_tunnel_dst_port_free(bp,
+		TUNNEL_DST_PORT_FREE_REQ_TUNNEL_TYPE_VXLAN);
+	bnxt_hwrm_tunnel_dst_port_free(bp,
+		TUNNEL_DST_PORT_FREE_REQ_TUNNEL_TYPE_GENEVE);
 }
 
 static int bnxt_set_tpa(struct bnxt *bp, bool set_tpa)
@@ -11247,6 +11267,8 @@ static void bnxt_fw_health_check(struct bnxt *bp)
 	if (!fw_health->enabled || test_bit(BNXT_STATE_IN_FW_RESET, &bp->state))
 		return;
 
+	/* Make sure it is enabled before checking the tmr_counter. */
+	smp_rmb();
 	if (fw_health->tmr_counter) {
 		fw_health->tmr_counter--;
 		return;
@@ -12625,13 +12647,10 @@ static int bnxt_udp_tunnel_sync(struct net_device *netdev, unsigned int table)
 	unsigned int cmd;
 
 	udp_tunnel_nic_get_port(netdev, table, 0, &ti);
-	if (ti.type == UDP_TUNNEL_TYPE_VXLAN) {
-		bp->vxlan_port = ti.port;
+	if (ti.type == UDP_TUNNEL_TYPE_VXLAN)
 		cmd = TUNNEL_DST_PORT_FREE_REQ_TUNNEL_TYPE_VXLAN;
-	} else {
-		bp->nge_port = ti.port;
+	else
 		cmd = TUNNEL_DST_PORT_FREE_REQ_TUNNEL_TYPE_GENEVE;
-	}
 
 	if (ti.port)
 		return bnxt_hwrm_tunnel_dst_port_alloc(bp, ti.port, cmd);
@@ -13081,66 +13100,35 @@ static int bnxt_init_mac_addr(struct bnxt *bp)
 	return rc;
 }
 
-#define BNXT_VPD_LEN	512
 static void bnxt_vpd_read_info(struct bnxt *bp)
 {
 	struct pci_dev *pdev = bp->pdev;
-	int i, len, pos, ro_size, size;
-	ssize_t vpd_size;
+	unsigned int vpd_size, kw_len;
+	int pos, size;
 	u8 *vpd_data;
 
-	vpd_data = kmalloc(BNXT_VPD_LEN, GFP_KERNEL);
-	if (!vpd_data)
+	vpd_data = pci_vpd_alloc(pdev, &vpd_size);
+	if (IS_ERR(vpd_data)) {
+		pci_warn(pdev, "Unable to read VPD\n");
 		return;
-
-	vpd_size = pci_read_vpd(pdev, 0, BNXT_VPD_LEN, vpd_data);
-	if (vpd_size <= 0) {
-		netdev_err(bp->dev, "Unable to read VPD\n");
-		goto exit;
-	}
-
-	i = pci_vpd_find_tag(vpd_data, vpd_size, PCI_VPD_LRDT_RO_DATA);
-	if (i < 0) {
-		netdev_err(bp->dev, "VPD READ-Only not found\n");
-		goto exit;
-	}
-
-	i = pci_vpd_find_tag(vpd_data, vpd_size, PCI_VPD_LRDT_RO_DATA);
-	if (i < 0) {
-		netdev_err(bp->dev, "VPD READ-Only not found\n");
-		goto exit;
 	}
 
-	ro_size = pci_vpd_lrdt_size(&vpd_data[i]);
-	i += PCI_VPD_LRDT_TAG_SIZE;
-	if (i + ro_size > vpd_size)
-		goto exit;
-
-	pos = pci_vpd_find_info_keyword(vpd_data, i, ro_size,
-					PCI_VPD_RO_KEYWORD_PARTNO);
+	pos = pci_vpd_find_ro_info_keyword(vpd_data, vpd_size,
+					   PCI_VPD_RO_KEYWORD_PARTNO, &kw_len);
 	if (pos < 0)
 		goto read_sn;
 
-	len = pci_vpd_info_field_size(&vpd_data[pos]);
-	pos += PCI_VPD_INFO_FLD_HDR_SIZE;
-	if (len + pos > vpd_size)
-		goto read_sn;
-
-	size = min(len, BNXT_VPD_FLD_LEN - 1);
+	size = min_t(int, kw_len, BNXT_VPD_FLD_LEN - 1);
 	memcpy(bp->board_partno, &vpd_data[pos], size);
 
 read_sn:
-	pos = pci_vpd_find_info_keyword(vpd_data, i, ro_size,
-					PCI_VPD_RO_KEYWORD_SERIALNO);
+	pos = pci_vpd_find_ro_info_keyword(vpd_data, vpd_size,
+					   PCI_VPD_RO_KEYWORD_SERIALNO,
+					   &kw_len);
 	if (pos < 0)
 		goto exit;
 
-	len = pci_vpd_info_field_size(&vpd_data[pos]);
-	pos += PCI_VPD_INFO_FLD_HDR_SIZE;
-	if (len + pos > vpd_size)
-		goto exit;
-
-	size = min(len, BNXT_VPD_FLD_LEN - 1);
+	size = min_t(int, kw_len, BNXT_VPD_FLD_LEN - 1);
 	memcpy(bp->board_serialno, &vpd_data[pos], size);
 exit:
 	kfree(vpd_data);
diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt.h b/drivers/net/ethernet/broadcom/bnxt/bnxt.h
index a8212dcdad5f..ec046e7a2484 100644
--- a/drivers/net/ethernet/broadcom/bnxt/bnxt.h
+++ b/drivers/net/ethernet/broadcom/bnxt/bnxt.h
@@ -28,6 +28,7 @@
 #include <net/dst_metadata.h>
 #include <net/xdp.h>
 #include <linux/dim.h>
+#include <linux/io-64-nonatomic-lo-hi.h>
 #ifdef CONFIG_TEE_BNXT_FW
 #include <linux/firmware/broadcom/tee_bnxt_fw.h>
 #endif
@@ -1981,7 +1982,7 @@ struct bnxt {
 	struct mutex		sriov_lock;
 #endif
 
-#ifndef writeq
+#if BITS_PER_LONG == 32
 	/* ensure atomic 64-bit doorbell writes on 32-bit systems. */
 	spinlock_t		db_lock;
 #endif
@@ -2110,24 +2111,36 @@ static inline u32 bnxt_tx_avail(struct bnxt *bp, struct bnxt_tx_ring_info *txr)
 		((txr->tx_prod - txr->tx_cons) & bp->tx_ring_mask);
 }
 
-#ifndef writeq
-#define writeq(val64, db)			\
-do {						\
-	spin_lock(&bp->db_lock);		\
-	writel((val64) & 0xffffffff, db);	\
-	writel((val64) >> 32, (db) + 4);	\
-	spin_unlock(&bp->db_lock);		\
-} while (0)
+static inline void bnxt_writeq(struct bnxt *bp, u64 val,
+			       volatile void __iomem *addr)
+{
+#if BITS_PER_LONG == 32
+	spin_lock(&bp->db_lock);
+	lo_hi_writeq(val, addr);
+	spin_unlock(&bp->db_lock);
+#else
+	writeq(val, addr);
+#endif
+}
 
-#define writeq_relaxed writeq
+static inline void bnxt_writeq_relaxed(struct bnxt *bp, u64 val,
+				       volatile void __iomem *addr)
+{
+#if BITS_PER_LONG == 32
+	spin_lock(&bp->db_lock);
+	lo_hi_writeq_relaxed(val, addr);
+	spin_unlock(&bp->db_lock);
+#else
+	writeq_relaxed(val, addr);
 #endif
+}
 
 /* For TX and RX ring doorbells with no ordering guarantee*/
 static inline void bnxt_db_write_relaxed(struct bnxt *bp,
 					 struct bnxt_db_info *db, u32 idx)
 {
 	if (bp->flags & BNXT_FLAG_CHIP_P5) {
-		writeq_relaxed(db->db_key64 | idx, db->doorbell);
+		bnxt_writeq_relaxed(bp, db->db_key64 | idx, db->doorbell);
 	} else {
 		u32 db_val = db->db_key32 | idx;
 
@@ -2142,7 +2155,7 @@ static inline void bnxt_db_write(struct bnxt *bp, struct bnxt_db_info *db,
 				 u32 idx)
 {
 	if (bp->flags & BNXT_FLAG_CHIP_P5) {
-		writeq(db->db_key64 | idx, db->doorbell);
+		bnxt_writeq(bp, db->db_key64 | idx, db->doorbell);
 	} else {
 		u32 db_val = db->db_key32 | idx;
 
diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt_devlink.c b/drivers/net/ethernet/broadcom/bnxt/bnxt_devlink.c
index 1423cc617d93..9576547df4ab 100644
--- a/drivers/net/ethernet/broadcom/bnxt/bnxt_devlink.c
+++ b/drivers/net/ethernet/broadcom/bnxt/bnxt_devlink.c
@@ -352,13 +352,16 @@ static void bnxt_copy_from_nvm_data(union devlink_param_value *dst,
 		dst->vu8 = (u8)val32;
 }
 
-static int bnxt_hwrm_get_nvm_cfg_ver(struct bnxt *bp,
-				     union devlink_param_value *nvm_cfg_ver)
+static int bnxt_hwrm_get_nvm_cfg_ver(struct bnxt *bp, u32 *nvm_cfg_ver)
 {
 	struct hwrm_nvm_get_variable_input *req;
+	u16 bytes = BNXT_NVM_CFG_VER_BYTES;
+	u16 bits = BNXT_NVM_CFG_VER_BITS;
+	union devlink_param_value ver;
 	union bnxt_nvm_data *data;
 	dma_addr_t data_dma_addr;
-	int rc;
+	int rc, i = 2;
+	u16 dim = 1;
 
 	rc = hwrm_req_init(bp, req, HWRM_NVM_GET_VARIABLE);
 	if (rc)
@@ -370,16 +373,34 @@ static int bnxt_hwrm_get_nvm_cfg_ver(struct bnxt *bp,
 		goto exit;
 	}
 
+	/* earlier devices present as an array of raw bytes */
+	if (!BNXT_CHIP_P5(bp)) {
+		dim = 0;
+		i = 0;
+		bits *= 3;  /* array of 3 version components */
+		bytes *= 4; /* copy whole word */
+	}
+
 	hwrm_req_hold(bp, req);
 	req->dest_data_addr = cpu_to_le64(data_dma_addr);
-	req->data_len = cpu_to_le16(BNXT_NVM_CFG_VER_BITS);
+	req->data_len = cpu_to_le16(bits);
 	req->option_num = cpu_to_le16(NVM_OFF_NVM_CFG_VER);
+	req->dimensions = cpu_to_le16(dim);
 
-	rc = hwrm_req_send_silent(bp, req);
-	if (!rc)
-		bnxt_copy_from_nvm_data(nvm_cfg_ver, data,
-					BNXT_NVM_CFG_VER_BITS,
-					BNXT_NVM_CFG_VER_BYTES);
+	while (i >= 0) {
+		req->index_0 = cpu_to_le16(i--);
+		rc = hwrm_req_send_silent(bp, req);
+		if (rc)
+			goto exit;
+		bnxt_copy_from_nvm_data(&ver, data, bits, bytes);
+
+		if (BNXT_CHIP_P5(bp)) {
+			*nvm_cfg_ver <<= 8;
+			*nvm_cfg_ver |= ver.vu8;
+		} else {
+			*nvm_cfg_ver = ver.vu32;
+		}
+	}
 
 exit:
 	hwrm_req_drop(bp, req);
@@ -416,12 +437,12 @@ static int bnxt_dl_info_get(struct devlink *dl, struct devlink_info_req *req,
 {
 	struct hwrm_nvm_get_dev_info_output nvm_dev_info;
 	struct bnxt *bp = bnxt_get_bp_from_dl(dl);
-	union devlink_param_value nvm_cfg_ver;
 	struct hwrm_ver_get_output *ver_resp;
 	char mgmt_ver[FW_VER_STR_LEN];
 	char roce_ver[FW_VER_STR_LEN];
 	char ncsi_ver[FW_VER_STR_LEN];
 	char buf[32];
+	u32 ver = 0;
 	int rc;
 
 	rc = devlink_info_driver_name_put(req, DRV_MODULE_NAME);
@@ -456,7 +477,7 @@ static int bnxt_dl_info_get(struct devlink *dl, struct devlink_info_req *req,
 		return rc;
 
 	ver_resp = &bp->ver_resp;
-	sprintf(buf, "%X", ver_resp->chip_rev);
+	sprintf(buf, "%c%d", 'A' + ver_resp->chip_rev, ver_resp->chip_metal);
 	rc = bnxt_dl_info_put(bp, req, BNXT_VERSION_FIXED,
 			      DEVLINK_INFO_VERSION_GENERIC_ASIC_REV, buf);
 	if (rc)
@@ -475,11 +496,9 @@ static int bnxt_dl_info_get(struct devlink *dl, struct devlink_info_req *req,
 	if (rc)
 		return rc;
 
-	if (BNXT_PF(bp) && !bnxt_hwrm_get_nvm_cfg_ver(bp, &nvm_cfg_ver)) {
-		u32 ver = nvm_cfg_ver.vu32;
-
-		sprintf(buf, "%d.%d.%d", (ver >> 16) & 0xf, (ver >> 8) & 0xf,
-			ver & 0xf);
+	if (BNXT_PF(bp) && !bnxt_hwrm_get_nvm_cfg_ver(bp, &ver)) {
+		sprintf(buf, "%d.%d.%d", (ver >> 16) & 0xff, (ver >> 8) & 0xff,
+			ver & 0xff);
 		rc = bnxt_dl_info_put(bp, req, BNXT_VERSION_STORED,
 				      DEVLINK_INFO_VERSION_GENERIC_FW_PSID,
 				      buf);
diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt_devlink.h b/drivers/net/ethernet/broadcom/bnxt/bnxt_devlink.h
index d22cab5d6856..d889f240da2b 100644
--- a/drivers/net/ethernet/broadcom/bnxt/bnxt_devlink.h
+++ b/drivers/net/ethernet/broadcom/bnxt/bnxt_devlink.h
@@ -40,8 +40,8 @@ static inline void bnxt_link_bp_to_dl(struct bnxt *bp, struct devlink *dl)
 #define NVM_OFF_ENABLE_SRIOV		401
 #define NVM_OFF_NVM_CFG_VER		602
 
-#define BNXT_NVM_CFG_VER_BITS		24
-#define BNXT_NVM_CFG_VER_BYTES		4
+#define BNXT_NVM_CFG_VER_BITS		8
+#define BNXT_NVM_CFG_VER_BYTES		1
 
 #define BNXT_MSIX_VEC_MAX	512
 #define BNXT_MSIX_VEC_MIN_MAX	128
diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt_hwrm.c b/drivers/net/ethernet/broadcom/bnxt/bnxt_hwrm.c
index acef61abe35d..bb7327b82d0b 100644
--- a/drivers/net/ethernet/broadcom/bnxt/bnxt_hwrm.c
+++ b/drivers/net/ethernet/broadcom/bnxt/bnxt_hwrm.c
@@ -145,11 +145,11 @@ void hwrm_req_timeout(struct bnxt *bp, void *req, unsigned int timeout)
  * @bp: The driver context.
  * @req: The request for which calls to hwrm_req_dma_slice() will have altered
  *	allocation flags.
- * @flags: A bitmask of GFP flags. These flags are passed to
- *	dma_alloc_coherent() whenever it is used to allocate backing memory
- *	for slices. Note that calls to hwrm_req_dma_slice() will not always
- *	result in new allocations, however, memory suballocated from the
- *	request buffer is already __GFP_ZERO.
+ * @gfp: A bitmask of GFP flags. These flags are passed to dma_alloc_coherent()
+ *	whenever it is used to allocate backing memory for slices. Note that
+ *	calls to hwrm_req_dma_slice() will not always result in new allocations,
+ *	however, memory suballocated from the request buffer is already
+ *	__GFP_ZERO.
  *
  * Sets the GFP allocation flags associated with the request for subsequent
  * calls to hwrm_req_dma_slice(). This can be useful for specifying __GFP_ZERO
@@ -698,8 +698,8 @@ int hwrm_req_send_silent(struct bnxt *bp, void *req)
  * @bp: The driver context.
  * @req: The request for which indirect data will be associated.
  * @size: The size of the allocation.
- * @dma: The bus address associated with the allocation. The HWRM API has no
- *	knowledge about the type of the request and so cannot infer how the
+ * @dma_handle: The bus address associated with the allocation. The HWRM API has
+ *	no knowledge about the type of the request and so cannot infer how the
  *	caller intends to use the indirect data. Thus, the caller is
  *	responsible for configuring the request object appropriately to
  *	point to the associated indirect memory. Note, DMA handle has the
diff --git a/drivers/net/ethernet/broadcom/tg3.c b/drivers/net/ethernet/broadcom/tg3.c
index 8a238e349e02..5e0e0e70d801 100644
--- a/drivers/net/ethernet/broadcom/tg3.c
+++ b/drivers/net/ethernet/broadcom/tg3.c
@@ -12788,7 +12788,7 @@ static void tg3_get_ethtool_stats(struct net_device *dev,
 		memset(tmp_stats, 0, sizeof(struct tg3_ethtool_stats));
 }
 
-static __be32 *tg3_vpd_readblock(struct tg3 *tp, u32 *vpdlen)
+static __be32 *tg3_vpd_readblock(struct tg3 *tp, unsigned int *vpdlen)
 {
 	int i;
 	__be32 *buf;
@@ -12822,15 +12822,11 @@ static __be32 *tg3_vpd_readblock(struct tg3 *tp, u32 *vpdlen)
 			offset = TG3_NVM_VPD_OFF;
 			len = TG3_NVM_VPD_LEN;
 		}
-	} else {
-		len = TG3_NVM_PCI_VPD_MAX_LEN;
-	}
 
-	buf = kmalloc(len, GFP_KERNEL);
-	if (buf == NULL)
-		return NULL;
+		buf = kmalloc(len, GFP_KERNEL);
+		if (!buf)
+			return NULL;
 
-	if (magic == TG3_EEPROM_MAGIC) {
 		for (i = 0; i < len; i += 4) {
 			/* The data is in little-endian format in NVRAM.
 			 * Use the big-endian read routines to preserve
@@ -12841,12 +12837,9 @@ static __be32 *tg3_vpd_readblock(struct tg3 *tp, u32 *vpdlen)
 		}
 		*vpdlen = len;
 	} else {
-		ssize_t cnt;
-
-		cnt = pci_read_vpd(tp->pdev, 0, len, (u8 *)buf);
-		if (cnt < 0)
-			goto error;
-		*vpdlen = cnt;
+		buf = pci_vpd_alloc(tp->pdev, vpdlen);
+		if (IS_ERR(buf))
+			return NULL;
 	}
 
 	return buf;
@@ -12868,9 +12861,10 @@ error:
 
 static int tg3_test_nvram(struct tg3 *tp)
 {
-	u32 csum, magic, len;
+	u32 csum, magic;
 	__be32 *buf;
 	int i, j, k, err = 0, size;
+	unsigned int len;
 
 	if (tg3_flag(tp, NO_NVRAM))
 		return 0;
@@ -13013,33 +13007,10 @@ static int tg3_test_nvram(struct tg3 *tp)
 	if (!buf)
 		return -ENOMEM;
 
-	i = pci_vpd_find_tag((u8 *)buf, len, PCI_VPD_LRDT_RO_DATA);
-	if (i > 0) {
-		j = pci_vpd_lrdt_size(&((u8 *)buf)[i]);
-		if (j < 0)
-			goto out;
-
-		if (i + PCI_VPD_LRDT_TAG_SIZE + j > len)
-			goto out;
-
-		i += PCI_VPD_LRDT_TAG_SIZE;
-		j = pci_vpd_find_info_keyword((u8 *)buf, i, j,
-					      PCI_VPD_RO_KEYWORD_CHKSUM);
-		if (j > 0) {
-			u8 csum8 = 0;
-
-			j += PCI_VPD_INFO_FLD_HDR_SIZE;
-
-			for (i = 0; i <= j; i++)
-				csum8 += ((u8 *)buf)[i];
-
-			if (csum8)
-				goto out;
-		}
-	}
-
-	err = 0;
-
+	err = pci_vpd_check_csum(buf, len);
+	/* go on if no checksum found */
+	if (err == 1)
+		err = 0;
 out:
 	kfree(buf);
 	return err;
@@ -15624,64 +15595,36 @@ skip_phy_reset:
 static void tg3_read_vpd(struct tg3 *tp)
 {
 	u8 *vpd_data;
-	unsigned int block_end, rosize, len;
-	u32 vpdlen;
-	int j, i = 0;
+	unsigned int len, vpdlen;
+	int i;
 
 	vpd_data = (u8 *)tg3_vpd_readblock(tp, &vpdlen);
 	if (!vpd_data)
 		goto out_no_vpd;
 
-	i = pci_vpd_find_tag(vpd_data, vpdlen, PCI_VPD_LRDT_RO_DATA);
+	i = pci_vpd_find_ro_info_keyword(vpd_data, vpdlen,
+					 PCI_VPD_RO_KEYWORD_MFR_ID, &len);
 	if (i < 0)
-		goto out_not_found;
-
-	rosize = pci_vpd_lrdt_size(&vpd_data[i]);
-	block_end = i + PCI_VPD_LRDT_TAG_SIZE + rosize;
-	i += PCI_VPD_LRDT_TAG_SIZE;
+		goto partno;
 
-	if (block_end > vpdlen)
-		goto out_not_found;
-
-	j = pci_vpd_find_info_keyword(vpd_data, i, rosize,
-				      PCI_VPD_RO_KEYWORD_MFR_ID);
-	if (j > 0) {
-		len = pci_vpd_info_field_size(&vpd_data[j]);
-
-		j += PCI_VPD_INFO_FLD_HDR_SIZE;
-		if (j + len > block_end || len != 4 ||
-		    memcmp(&vpd_data[j], "1028", 4))
-			goto partno;
-
-		j = pci_vpd_find_info_keyword(vpd_data, i, rosize,
-					      PCI_VPD_RO_KEYWORD_VENDOR0);
-		if (j < 0)
-			goto partno;
+	if (len != 4 || memcmp(vpd_data + i, "1028", 4))
+		goto partno;
 
-		len = pci_vpd_info_field_size(&vpd_data[j]);
-
-		j += PCI_VPD_INFO_FLD_HDR_SIZE;
-		if (j + len > block_end)
-			goto partno;
+	i = pci_vpd_find_ro_info_keyword(vpd_data, vpdlen,
+					 PCI_VPD_RO_KEYWORD_VENDOR0, &len);
+	if (i < 0)
+		goto partno;
 
-		if (len >= sizeof(tp->fw_ver))
-			len = sizeof(tp->fw_ver) - 1;
-		memset(tp->fw_ver, 0, sizeof(tp->fw_ver));
-		snprintf(tp->fw_ver, sizeof(tp->fw_ver), "%.*s bc ", len,
-			 &vpd_data[j]);
-	}
+	memset(tp->fw_ver, 0, sizeof(tp->fw_ver));
+	snprintf(tp->fw_ver, sizeof(tp->fw_ver), "%.*s bc ", len, vpd_data + i);
 
 partno:
-	i = pci_vpd_find_info_keyword(vpd_data, i, rosize,
-				      PCI_VPD_RO_KEYWORD_PARTNO);
+	i = pci_vpd_find_ro_info_keyword(vpd_data, vpdlen,
+					 PCI_VPD_RO_KEYWORD_PARTNO, &len);
 	if (i < 0)
 		goto out_not_found;
 
-	len = pci_vpd_info_field_size(&vpd_data[i]);
-
-	i += PCI_VPD_INFO_FLD_HDR_SIZE;
-	if (len > TG3_BPN_SIZE ||
-	    (len + i) > vpdlen)
+	if (len > TG3_BPN_SIZE)
 		goto out_not_found;
 
 	memcpy(tp->board_part_number, &vpd_data[i], len);
diff --git a/drivers/net/ethernet/broadcom/tg3.h b/drivers/net/ethernet/broadcom/tg3.h
index 46ec4fdfd16a..1000c894064f 100644
--- a/drivers/net/ethernet/broadcom/tg3.h
+++ b/drivers/net/ethernet/broadcom/tg3.h
@@ -2101,7 +2101,6 @@
 /* Hardware Legacy NVRAM layout */
 #define TG3_NVM_VPD_OFF			0x100
 #define TG3_NVM_VPD_LEN			256
-#define TG3_NVM_PCI_VPD_MAX_LEN		512
 
 /* Hardware Selfboot NVRAM layout */
 #define TG3_NVM_HWSB_CFG1		0x00000004
diff --git a/drivers/net/ethernet/cavium/liquidio/lio_vf_main.c b/drivers/net/ethernet/cavium/liquidio/lio_vf_main.c
index c6fe0f2a4d0e..f6396ac64006 100644
--- a/drivers/net/ethernet/cavium/liquidio/lio_vf_main.c
+++ b/drivers/net/ethernet/cavium/liquidio/lio_vf_main.c
@@ -526,7 +526,7 @@ static void octeon_destroy_resources(struct octeon_device *oct)
 			oct->irq_name_storage = NULL;
 		}
 		/* Soft reset the octeon device before exiting */
-		if (oct->pci_dev->reset_fn)
+		if (!pcie_reset_flr(oct->pci_dev, PCI_RESET_PROBE))
 			octeon_pci_flr(oct);
 		else
 			cn23xx_vf_ask_pf_to_do_flr(oct);
diff --git a/drivers/net/ethernet/chelsio/cxgb/cxgb2.c b/drivers/net/ethernet/chelsio/cxgb/cxgb2.c
index 73c016166f06..d246eee4b6d5 100644
--- a/drivers/net/ethernet/chelsio/cxgb/cxgb2.c
+++ b/drivers/net/ethernet/chelsio/cxgb/cxgb2.c
@@ -1111,6 +1111,7 @@ static int init_one(struct pci_dev *pdev, const struct pci_device_id *ent)
 	if (!adapter->registered_device_map) {
 		pr_err("%s: could not register any net devices\n",
 		       pci_name(pdev));
+		err = -EINVAL;
 		goto out_release_adapter_res;
 	}
 
diff --git a/drivers/net/ethernet/chelsio/cxgb3/sge.c b/drivers/net/ethernet/chelsio/cxgb3/sge.c
index e21a2e691382..c3afec1041f8 100644
--- a/drivers/net/ethernet/chelsio/cxgb3/sge.c
+++ b/drivers/net/ethernet/chelsio/cxgb3/sge.c
@@ -3301,6 +3301,9 @@ void t3_sge_stop(struct adapter *adap)
 
 	t3_sge_stop_dma(adap);
 
+	/* workqueues aren't initialized otherwise */
+	if (!(adap->flags & FULL_INIT_DONE))
+		return;
 	for (i = 0; i < SGE_QSETS; ++i) {
 		struct sge_qset *qs = &adap->sge.qs[i];
 
diff --git a/drivers/net/ethernet/chelsio/cxgb4/cxgb4.h b/drivers/net/ethernet/chelsio/cxgb4/cxgb4.h
index 9058f09f921e..ecea3cdd30b3 100644
--- a/drivers/net/ethernet/chelsio/cxgb4/cxgb4.h
+++ b/drivers/net/ethernet/chelsio/cxgb4/cxgb4.h
@@ -84,7 +84,6 @@ extern struct mutex uld_mutex;
 enum {
 	MAX_NPORTS	= 4,     /* max # of ports */
 	SERNUM_LEN	= 24,    /* Serial # length */
-	EC_LEN		= 16,    /* E/C length */
 	ID_LEN		= 16,    /* ID length */
 	PN_LEN		= 16,    /* Part Number length */
 	MACADDR_LEN	= 12,    /* MAC Address length */
@@ -391,7 +390,6 @@ struct tp_params {
 
 struct vpd_params {
 	unsigned int cclk;
-	u8 ec[EC_LEN + 1];
 	u8 sn[SERNUM_LEN + 1];
 	u8 id[ID_LEN + 1];
 	u8 pn[PN_LEN + 1];
diff --git a/drivers/net/ethernet/chelsio/cxgb4/t4_hw.c b/drivers/net/ethernet/chelsio/cxgb4/t4_hw.c
index 6606fb8b3e42..64144b6171d7 100644
--- a/drivers/net/ethernet/chelsio/cxgb4/t4_hw.c
+++ b/drivers/net/ethernet/chelsio/cxgb4/t4_hw.c
@@ -2743,10 +2743,9 @@ int t4_seeprom_wp(struct adapter *adapter, bool enable)
  */
 int t4_get_raw_vpd_params(struct adapter *adapter, struct vpd_params *p)
 {
-	int i, ret = 0, addr;
-	int ec, sn, pn, na;
-	u8 *vpd, csum, base_val = 0;
-	unsigned int vpdr_len, kw_offset, id_len;
+	unsigned int id_len, pn_len, sn_len, na_len;
+	int id, sn, pn, na, addr, ret = 0;
+	u8 *vpd, base_val = 0;
 
 	vpd = vmalloc(VPD_LEN);
 	if (!vpd)
@@ -2765,74 +2764,52 @@ int t4_get_raw_vpd_params(struct adapter *adapter, struct vpd_params *p)
 	if (ret < 0)
 		goto out;
 
-	if (vpd[0] != PCI_VPD_LRDT_ID_STRING) {
-		dev_err(adapter->pdev_dev, "missing VPD ID string\n");
-		ret = -EINVAL;
+	ret = pci_vpd_find_id_string(vpd, VPD_LEN, &id_len);
+	if (ret < 0)
 		goto out;
-	}
+	id = ret;
 
-	id_len = pci_vpd_lrdt_size(vpd);
-	if (id_len > ID_LEN)
-		id_len = ID_LEN;
-
-	i = pci_vpd_find_tag(vpd, VPD_LEN, PCI_VPD_LRDT_RO_DATA);
-	if (i < 0) {
-		dev_err(adapter->pdev_dev, "missing VPD-R section\n");
+	ret = pci_vpd_check_csum(vpd, VPD_LEN);
+	if (ret) {
+		dev_err(adapter->pdev_dev, "VPD checksum incorrect or missing\n");
 		ret = -EINVAL;
 		goto out;
 	}
 
-	vpdr_len = pci_vpd_lrdt_size(&vpd[i]);
-	kw_offset = i + PCI_VPD_LRDT_TAG_SIZE;
-	if (vpdr_len + kw_offset > VPD_LEN) {
-		dev_err(adapter->pdev_dev, "bad VPD-R length %u\n", vpdr_len);
-		ret = -EINVAL;
+	ret = pci_vpd_find_ro_info_keyword(vpd, VPD_LEN,
+					   PCI_VPD_RO_KEYWORD_SERIALNO, &sn_len);
+	if (ret < 0)
 		goto out;
-	}
-
-#define FIND_VPD_KW(var, name) do { \
-	var = pci_vpd_find_info_keyword(vpd, kw_offset, vpdr_len, name); \
-	if (var < 0) { \
-		dev_err(adapter->pdev_dev, "missing VPD keyword " name "\n"); \
-		ret = -EINVAL; \
-		goto out; \
-	} \
-	var += PCI_VPD_INFO_FLD_HDR_SIZE; \
-} while (0)
-
-	FIND_VPD_KW(i, "RV");
-	for (csum = 0; i >= 0; i--)
-		csum += vpd[i];
+	sn = ret;
 
-	if (csum) {
-		dev_err(adapter->pdev_dev,
-			"corrupted VPD EEPROM, actual csum %u\n", csum);
-		ret = -EINVAL;
+	ret = pci_vpd_find_ro_info_keyword(vpd, VPD_LEN,
+					   PCI_VPD_RO_KEYWORD_PARTNO, &pn_len);
+	if (ret < 0)
 		goto out;
-	}
+	pn = ret;
 
-	FIND_VPD_KW(ec, "EC");
-	FIND_VPD_KW(sn, "SN");
-	FIND_VPD_KW(pn, "PN");
-	FIND_VPD_KW(na, "NA");
-#undef FIND_VPD_KW
+	ret = pci_vpd_find_ro_info_keyword(vpd, VPD_LEN, "NA", &na_len);
+	if (ret < 0)
+		goto out;
+	na = ret;
 
-	memcpy(p->id, vpd + PCI_VPD_LRDT_TAG_SIZE, id_len);
+	memcpy(p->id, vpd + id, min_t(int, id_len, ID_LEN));
 	strim(p->id);
-	memcpy(p->ec, vpd + ec, EC_LEN);
-	strim(p->ec);
-	i = pci_vpd_info_field_size(vpd + sn - PCI_VPD_INFO_FLD_HDR_SIZE);
-	memcpy(p->sn, vpd + sn, min(i, SERNUM_LEN));
+	memcpy(p->sn, vpd + sn, min_t(int, sn_len, SERNUM_LEN));
 	strim(p->sn);
-	i = pci_vpd_info_field_size(vpd + pn - PCI_VPD_INFO_FLD_HDR_SIZE);
-	memcpy(p->pn, vpd + pn, min(i, PN_LEN));
+	memcpy(p->pn, vpd + pn, min_t(int, pn_len, PN_LEN));
 	strim(p->pn);
-	memcpy(p->na, vpd + na, min(i, MACADDR_LEN));
+	memcpy(p->na, vpd + na, min_t(int, na_len, MACADDR_LEN));
 	strim((char *)p->na);
 
 out:
 	vfree(vpd);
-	return ret < 0 ? ret : 0;
+	if (ret < 0) {
+		dev_err(adapter->pdev_dev, "error reading VPD\n");
+		return ret;
+	}
+
+	return 0;
 }
 
 /**
diff --git a/drivers/net/ethernet/cirrus/Kconfig b/drivers/net/ethernet/cirrus/Kconfig
index dac1764ba740..5bdf731d9503 100644
--- a/drivers/net/ethernet/cirrus/Kconfig
+++ b/drivers/net/ethernet/cirrus/Kconfig
@@ -38,7 +38,7 @@ config CS89x0_ISA
 
 config CS89x0_PLATFORM
 	tristate "CS89x0 platform driver support"
-	depends on ARM || COMPILE_TEST
+	depends on ARM || (COMPILE_TEST && !PPC)
 	select CS89x0
 	help
 	  Say Y to compile the cs89x0 platform driver. This makes this driver
diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_cmd.c b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_cmd.c
index 474c6d1664e7..ac9b69513332 100644
--- a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_cmd.c
+++ b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_cmd.c
@@ -362,7 +362,7 @@ static void hclge_set_default_capability(struct hclge_dev *hdev)
 	}
 }
 
-const struct hclge_caps_bit_map hclge_cmd_caps_bit_map0[] = {
+static const struct hclge_caps_bit_map hclge_cmd_caps_bit_map0[] = {
 	{HCLGE_CAP_UDP_GSO_B, HNAE3_DEV_SUPPORT_UDP_GSO_B},
 	{HCLGE_CAP_PTP_B, HNAE3_DEV_SUPPORT_PTP_B},
 	{HCLGE_CAP_INT_QL_B, HNAE3_DEV_SUPPORT_INT_QL_B},
diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3vf/hclgevf_cmd.c b/drivers/net/ethernet/hisilicon/hns3/hns3vf/hclgevf_cmd.c
index 59772b0e9531..f89bfb352adf 100644
--- a/drivers/net/ethernet/hisilicon/hns3/hns3vf/hclgevf_cmd.c
+++ b/drivers/net/ethernet/hisilicon/hns3/hns3vf/hclgevf_cmd.c
@@ -342,7 +342,7 @@ static void hclgevf_set_default_capability(struct hclgevf_dev *hdev)
 	set_bit(HNAE3_DEV_SUPPORT_FEC_B, ae_dev->caps);
 }
 
-const struct hclgevf_caps_bit_map hclgevf_cmd_caps_bit_map0[] = {
+static const struct hclgevf_caps_bit_map hclgevf_cmd_caps_bit_map0[] = {
 	{HCLGEVF_CAP_UDP_GSO_B, HNAE3_DEV_SUPPORT_UDP_GSO_B},
 	{HCLGEVF_CAP_INT_QL_B, HNAE3_DEV_SUPPORT_INT_QL_B},
 	{HCLGEVF_CAP_TQP_TXRX_INDEP_B, HNAE3_DEV_SUPPORT_TQP_TXRX_INDEP_B},
diff --git a/drivers/net/ethernet/i825xx/lasi_82596.c b/drivers/net/ethernet/i825xx/lasi_82596.c
index 96c6f4f36904..48e001881c75 100644
--- a/drivers/net/ethernet/i825xx/lasi_82596.c
+++ b/drivers/net/ethernet/i825xx/lasi_82596.c
@@ -196,7 +196,7 @@ out_free_netdev:
 	return retval;
 }
 
-static int __exit lan_remove_chip(struct parisc_device *pdev)
+static void __exit lan_remove_chip(struct parisc_device *pdev)
 {
 	struct net_device *dev = parisc_get_drvdata(pdev);
 	struct i596_private *lp = netdev_priv(dev);
@@ -205,7 +205,6 @@ static int __exit lan_remove_chip(struct parisc_device *pdev)
 	dma_free_noncoherent(&pdev->dev, sizeof(struct i596_private), lp->dma,
 		       lp->dma_addr, DMA_BIDIRECTIONAL);
 	free_netdev (dev);
-	return 0;
 }
 
 static const struct parisc_device_id lan_tbl[] __initconst = {
diff --git a/drivers/net/ethernet/i825xx/sun3_82586.c b/drivers/net/ethernet/i825xx/sun3_82586.c
index 893e0ddcb611..0696f723228a 100644
--- a/drivers/net/ethernet/i825xx/sun3_82586.c
+++ b/drivers/net/ethernet/i825xx/sun3_82586.c
@@ -314,7 +314,7 @@ static int __init sun3_82586_probe(void)
 	err = register_netdev(dev);
 	if (err)
 		goto out2;
-	return dev;
+	return 0;
 
 out2:
 	release_region(ioaddr, SUN3_82586_TOTAL_SIZE);
diff --git a/drivers/net/ethernet/marvell/octeontx2/af/cgx.c b/drivers/net/ethernet/marvell/octeontx2/af/cgx.c
index 7f3d01059e19..34a089b71e55 100644
--- a/drivers/net/ethernet/marvell/octeontx2/af/cgx.c
+++ b/drivers/net/ethernet/marvell/octeontx2/af/cgx.c
@@ -1487,7 +1487,7 @@ static int cgx_lmac_init(struct cgx *cgx)
 				MAX_DMAC_ENTRIES_PER_CGX / cgx->lmac_count;
 		err = rvu_alloc_bitmap(&lmac->mac_to_index_bmap);
 		if (err)
-			return err;
+			goto err_name_free;
 
 		/* Reserve first entry for default MAC address */
 		set_bit(0, lmac->mac_to_index_bmap.bmap);
@@ -1497,7 +1497,7 @@ static int cgx_lmac_init(struct cgx *cgx)
 		spin_lock_init(&lmac->event_cb_lock);
 		err = cgx_configure_interrupt(cgx, lmac, lmac->lmac_id, false);
 		if (err)
-			goto err_irq;
+			goto err_bitmap_free;
 
 		/* Add reference */
 		cgx->lmac_idmap[lmac->lmac_id] = lmac;
@@ -1507,7 +1507,9 @@ static int cgx_lmac_init(struct cgx *cgx)
 
 	return cgx_lmac_verify_fwi_version(cgx);
 
-err_irq:
+err_bitmap_free:
+	rvu_free_bitmap(&lmac->mac_to_index_bmap);
+err_name_free:
 	kfree(lmac->name);
 err_lmac_free:
 	kfree(lmac);
diff --git a/drivers/net/ethernet/marvell/octeontx2/af/rvu.c b/drivers/net/ethernet/marvell/octeontx2/af/rvu.c
index ce647e037f4d..35836903b7fb 100644
--- a/drivers/net/ethernet/marvell/octeontx2/af/rvu.c
+++ b/drivers/net/ethernet/marvell/octeontx2/af/rvu.c
@@ -92,7 +92,8 @@ static void rvu_setup_hw_capabilities(struct rvu *rvu)
  */
 int rvu_poll_reg(struct rvu *rvu, u64 block, u64 offset, u64 mask, bool zero)
 {
-	unsigned long timeout = jiffies + usecs_to_jiffies(10000);
+	unsigned long timeout = jiffies + usecs_to_jiffies(20000);
+	bool twice = false;
 	void __iomem *reg;
 	u64 reg_val;
 
@@ -107,6 +108,15 @@ again:
 		usleep_range(1, 5);
 		goto again;
 	}
+	/* In scenarios where CPU is scheduled out before checking
+	 * 'time_before' (above) and gets scheduled in such that
+	 * jiffies are beyond timeout value, then check again if HW is
+	 * done with the operation in the meantime.
+	 */
+	if (!twice) {
+		twice = true;
+		goto again;
+	}
 	return -EBUSY;
 }
 
@@ -201,6 +211,11 @@ int rvu_alloc_bitmap(struct rsrc_bmap *rsrc)
 	return 0;
 }
 
+void rvu_free_bitmap(struct rsrc_bmap *rsrc)
+{
+	kfree(rsrc->bmap);
+}
+
 /* Get block LF's HW index from a PF_FUNC's block slot number */
 int rvu_get_lf(struct rvu *rvu, struct rvu_block *block, u16 pcifunc, u16 slot)
 {
diff --git a/drivers/net/ethernet/marvell/octeontx2/af/rvu.h b/drivers/net/ethernet/marvell/octeontx2/af/rvu.h
index d38e5c980c30..1d9411232f1d 100644
--- a/drivers/net/ethernet/marvell/octeontx2/af/rvu.h
+++ b/drivers/net/ethernet/marvell/octeontx2/af/rvu.h
@@ -638,6 +638,7 @@ static inline bool is_rvu_fwdata_valid(struct rvu *rvu)
 }
 
 int rvu_alloc_bitmap(struct rsrc_bmap *rsrc);
+void rvu_free_bitmap(struct rsrc_bmap *rsrc);
 int rvu_alloc_rsrc(struct rsrc_bmap *rsrc);
 void rvu_free_rsrc(struct rsrc_bmap *rsrc, int id);
 bool is_rsrc_free(struct rsrc_bmap *rsrc, int id);
diff --git a/drivers/net/ethernet/marvell/octeontx2/nic/cn10k.c b/drivers/net/ethernet/marvell/octeontx2/nic/cn10k.c
index 3cc76f14d2fd..95f21dfdba48 100644
--- a/drivers/net/ethernet/marvell/octeontx2/nic/cn10k.c
+++ b/drivers/net/ethernet/marvell/octeontx2/nic/cn10k.c
@@ -27,7 +27,8 @@ int cn10k_lmtst_init(struct otx2_nic *pfvf)
 {
 
 	struct lmtst_tbl_setup_req *req;
-	int qcount, err;
+	struct otx2_lmt_info *lmt_info;
+	int err, cpu;
 
 	if (!test_bit(CN10K_LMTST, &pfvf->hw.cap_flag)) {
 		pfvf->hw_ops = &otx2_hw_ops;
@@ -35,15 +36,9 @@ int cn10k_lmtst_init(struct otx2_nic *pfvf)
 	}
 
 	pfvf->hw_ops = &cn10k_hw_ops;
-	qcount = pfvf->hw.max_queues;
-	/* LMTST lines allocation
-	 * qcount = num_online_cpus();
-	 * NPA = TX + RX + XDP.
-	 * NIX = TX * 32 (For Burst SQE flush).
-	 */
-	pfvf->tot_lmt_lines = (qcount * 3) + (qcount * 32);
-	pfvf->npa_lmt_lines = qcount * 3;
-	pfvf->nix_lmt_size =  LMT_BURST_SIZE * LMT_LINE_SIZE;
+	/* Total LMTLINES = num_online_cpus() * 32 (For Burst flush).*/
+	pfvf->tot_lmt_lines = (num_online_cpus() * LMT_BURST_SIZE);
+	pfvf->hw.lmt_info = alloc_percpu(struct otx2_lmt_info);
 
 	mutex_lock(&pfvf->mbox.lock);
 	req = otx2_mbox_alloc_msg_lmtst_tbl_setup(&pfvf->mbox);
@@ -66,6 +61,13 @@ int cn10k_lmtst_init(struct otx2_nic *pfvf)
 	err = otx2_sync_mbox_msg(&pfvf->mbox);
 	mutex_unlock(&pfvf->mbox.lock);
 
+	for_each_possible_cpu(cpu) {
+		lmt_info = per_cpu_ptr(pfvf->hw.lmt_info, cpu);
+		lmt_info->lmt_addr = ((u64)pfvf->hw.lmt_base +
+				      (cpu * LMT_BURST_SIZE * LMT_LINE_SIZE));
+		lmt_info->lmt_id = cpu * LMT_BURST_SIZE;
+	}
+
 	return 0;
 }
 EXPORT_SYMBOL(cn10k_lmtst_init);
@@ -74,13 +76,6 @@ int cn10k_sq_aq_init(void *dev, u16 qidx, u16 sqb_aura)
 {
 	struct nix_cn10k_aq_enq_req *aq;
 	struct otx2_nic *pfvf = dev;
-	struct otx2_snd_queue *sq;
-
-	sq = &pfvf->qset.sq[qidx];
-	sq->lmt_addr = (u64 *)((u64)pfvf->hw.nix_lmt_base +
-			       (qidx * pfvf->nix_lmt_size));
-
-	sq->lmt_id = pfvf->npa_lmt_lines + (qidx * LMT_BURST_SIZE);
 
 	/* Get memory to put this msg */
 	aq = otx2_mbox_alloc_msg_nix_cn10k_aq_enq(&pfvf->mbox);
@@ -125,8 +120,7 @@ void cn10k_refill_pool_ptrs(void *dev, struct otx2_cq_queue *cq)
 		if (otx2_alloc_buffer(pfvf, cq, &bufptr)) {
 			if (num_ptrs--)
 				__cn10k_aura_freeptr(pfvf, cq->cq_idx, ptrs,
-						     num_ptrs,
-						     cq->rbpool->lmt_addr);
+						     num_ptrs);
 			break;
 		}
 		cq->pool_ptrs--;
@@ -134,8 +128,7 @@ void cn10k_refill_pool_ptrs(void *dev, struct otx2_cq_queue *cq)
 		num_ptrs++;
 		if (num_ptrs == NPA_MAX_BURST || cq->pool_ptrs == 0) {
 			__cn10k_aura_freeptr(pfvf, cq->cq_idx, ptrs,
-					     num_ptrs,
-					     cq->rbpool->lmt_addr);
+					     num_ptrs);
 			num_ptrs = 1;
 		}
 	}
@@ -143,20 +136,23 @@ void cn10k_refill_pool_ptrs(void *dev, struct otx2_cq_queue *cq)
 
 void cn10k_sqe_flush(void *dev, struct otx2_snd_queue *sq, int size, int qidx)
 {
+	struct otx2_lmt_info *lmt_info;
+	struct otx2_nic *pfvf = dev;
 	u64 val = 0, tar_addr = 0;
 
+	lmt_info = per_cpu_ptr(pfvf->hw.lmt_info, smp_processor_id());
 	/* FIXME: val[0:10] LMT_ID.
 	 * [12:15] no of LMTST - 1 in the burst.
 	 * [19:63] data size of each LMTST in the burst except first.
 	 */
-	val = (sq->lmt_id & 0x7FF);
+	val = (lmt_info->lmt_id & 0x7FF);
 	/* Target address for LMTST flush tells HW how many 128bit
 	 * words are present.
 	 * tar_addr[6:4] size of first LMTST - 1 in units of 128b.
 	 */
 	tar_addr |= sq->io_addr | (((size / 16) - 1) & 0x7) << 4;
 	dma_wmb();
-	memcpy(sq->lmt_addr, sq->sqe_base, size);
+	memcpy((u64 *)lmt_info->lmt_addr, sq->sqe_base, size);
 	cn10k_lmt_flush(val, tar_addr);
 
 	sq->head++;
diff --git a/drivers/net/ethernet/marvell/octeontx2/nic/otx2_common.c b/drivers/net/ethernet/marvell/octeontx2/nic/otx2_common.c
index ce25c2744435..78df173e6df2 100644
--- a/drivers/net/ethernet/marvell/octeontx2/nic/otx2_common.c
+++ b/drivers/net/ethernet/marvell/octeontx2/nic/otx2_common.c
@@ -1230,11 +1230,6 @@ static int otx2_pool_init(struct otx2_nic *pfvf, u16 pool_id,
 
 	pool->rbsize = buf_size;
 
-	/* Set LMTST addr for NPA batch free */
-	if (test_bit(CN10K_LMTST, &pfvf->hw.cap_flag))
-		pool->lmt_addr = (__force u64 *)((u64)pfvf->hw.npa_lmt_base +
-						 (pool_id * LMT_LINE_SIZE));
-
 	/* Initialize this pool's context via AF */
 	aq = otx2_mbox_alloc_msg_npa_aq_enq(&pfvf->mbox);
 	if (!aq) {
diff --git a/drivers/net/ethernet/marvell/octeontx2/nic/otx2_common.h b/drivers/net/ethernet/marvell/octeontx2/nic/otx2_common.h
index 48227cec06ee..a51ecd771d07 100644
--- a/drivers/net/ethernet/marvell/octeontx2/nic/otx2_common.h
+++ b/drivers/net/ethernet/marvell/octeontx2/nic/otx2_common.h
@@ -53,6 +53,10 @@ enum arua_mapped_qtypes {
 /* Send skid of 2000 packets required for CQ size of 4K CQEs. */
 #define SEND_CQ_SKID	2000
 
+struct otx2_lmt_info {
+	u64 lmt_addr;
+	u16 lmt_id;
+};
 /* RSS configuration */
 struct otx2_rss_ctx {
 	u8  ind_tbl[MAX_RSS_INDIR_TBL_SIZE];
@@ -224,8 +228,7 @@ struct otx2_hw {
 #define LMT_LINE_SIZE		128
 #define LMT_BURST_SIZE		32 /* 32 LMTST lines for burst SQE flush */
 	u64			*lmt_base;
-	u64			*npa_lmt_base;
-	u64			*nix_lmt_base;
+	struct otx2_lmt_info	__percpu *lmt_info;
 };
 
 enum vfperm {
@@ -407,17 +410,18 @@ static inline bool is_96xx_B0(struct pci_dev *pdev)
  */
 #define PCI_REVISION_ID_96XX		0x00
 #define PCI_REVISION_ID_95XX		0x10
-#define PCI_REVISION_ID_LOKI		0x20
+#define PCI_REVISION_ID_95XXN		0x20
 #define PCI_REVISION_ID_98XX		0x30
 #define PCI_REVISION_ID_95XXMM		0x40
+#define PCI_REVISION_ID_95XXO		0xE0
 
 static inline bool is_dev_otx2(struct pci_dev *pdev)
 {
 	u8 midr = pdev->revision & 0xF0;
 
 	return (midr == PCI_REVISION_ID_96XX || midr == PCI_REVISION_ID_95XX ||
-		midr == PCI_REVISION_ID_LOKI || midr == PCI_REVISION_ID_98XX ||
-		midr == PCI_REVISION_ID_95XXMM);
+		midr == PCI_REVISION_ID_95XXN || midr == PCI_REVISION_ID_98XX ||
+		midr == PCI_REVISION_ID_95XXMM || midr == PCI_REVISION_ID_95XXO);
 }
 
 static inline void otx2_setup_dev_hw_settings(struct otx2_nic *pfvf)
@@ -562,15 +566,16 @@ static inline u64 otx2_atomic64_add(u64 incr, u64 *ptr)
 #endif
 
 static inline void __cn10k_aura_freeptr(struct otx2_nic *pfvf, u64 aura,
-					u64 *ptrs, u64 num_ptrs,
-					u64 *lmt_addr)
+					u64 *ptrs, u64 num_ptrs)
 {
+	struct otx2_lmt_info *lmt_info;
 	u64 size = 0, count_eot = 0;
 	u64 tar_addr, val = 0;
 
+	lmt_info = per_cpu_ptr(pfvf->hw.lmt_info, smp_processor_id());
 	tar_addr = (__force u64)otx2_get_regaddr(pfvf, NPA_LF_AURA_BATCH_FREE0);
 	/* LMTID is same as AURA Id */
-	val = (aura & 0x7FF) | BIT_ULL(63);
+	val = (lmt_info->lmt_id & 0x7FF) | BIT_ULL(63);
 	/* Set if [127:64] of last 128bit word has a valid pointer */
 	count_eot = (num_ptrs % 2) ? 0ULL : 1ULL;
 	/* Set AURA ID to free pointer */
@@ -586,7 +591,7 @@ static inline void __cn10k_aura_freeptr(struct otx2_nic *pfvf, u64 aura,
 			size++;
 		tar_addr |=  ((size - 1) & 0x7) << 4;
 	}
-	memcpy(lmt_addr, ptrs, sizeof(u64) * num_ptrs);
+	memcpy((u64 *)lmt_info->lmt_addr, ptrs, sizeof(u64) * num_ptrs);
 	/* Perform LMTST flush */
 	cn10k_lmt_flush(val, tar_addr);
 }
@@ -594,12 +599,11 @@ static inline void __cn10k_aura_freeptr(struct otx2_nic *pfvf, u64 aura,
 static inline void cn10k_aura_freeptr(void *dev, int aura, u64 buf)
 {
 	struct otx2_nic *pfvf = dev;
-	struct otx2_pool *pool;
 	u64 ptrs[2];
 
-	pool = &pfvf->qset.pool[aura];
 	ptrs[1] = buf;
-	__cn10k_aura_freeptr(pfvf, aura, ptrs, 2, pool->lmt_addr);
+	/* Free only one buffer at time during init and teardown */
+	__cn10k_aura_freeptr(pfvf, aura, ptrs, 2);
 }
 
 /* Alloc pointer from pool/aura */
diff --git a/drivers/net/ethernet/marvell/octeontx2/nic/otx2_ethtool.c b/drivers/net/ethernet/marvell/octeontx2/nic/otx2_ethtool.c
index 799486c72177..dbfa3bc39e34 100644
--- a/drivers/net/ethernet/marvell/octeontx2/nic/otx2_ethtool.c
+++ b/drivers/net/ethernet/marvell/octeontx2/nic/otx2_ethtool.c
@@ -16,8 +16,8 @@
 #include "otx2_common.h"
 #include "otx2_ptp.h"
 
-#define DRV_NAME	"octeontx2-nicpf"
-#define DRV_VF_NAME	"octeontx2-nicvf"
+#define DRV_NAME	"rvu-nicpf"
+#define DRV_VF_NAME	"rvu-nicvf"
 
 struct otx2_stat {
 	char name[ETH_GSTRING_LEN];
diff --git a/drivers/net/ethernet/marvell/octeontx2/nic/otx2_pf.c b/drivers/net/ethernet/marvell/octeontx2/nic/otx2_pf.c
index 2f2e8a3d7924..53df7fff92c4 100644
--- a/drivers/net/ethernet/marvell/octeontx2/nic/otx2_pf.c
+++ b/drivers/net/ethernet/marvell/octeontx2/nic/otx2_pf.c
@@ -1533,14 +1533,6 @@ int otx2_open(struct net_device *netdev)
 	if (!qset->rq)
 		goto err_free_mem;
 
-	if (test_bit(CN10K_LMTST, &pf->hw.cap_flag)) {
-		/* Reserve LMT lines for NPA AURA batch free */
-		pf->hw.npa_lmt_base = pf->hw.lmt_base;
-		/* Reserve LMT lines for NIX TX */
-		pf->hw.nix_lmt_base = (u64 *)((u64)pf->hw.npa_lmt_base +
-				      (pf->npa_lmt_lines * LMT_LINE_SIZE));
-	}
-
 	err = otx2_init_hw_resources(pf);
 	if (err)
 		goto err_free_mem;
@@ -2668,6 +2660,8 @@ err_del_mcam_entries:
 err_ptp_destroy:
 	otx2_ptp_destroy(pf);
 err_detach_rsrc:
+	if (pf->hw.lmt_info)
+		free_percpu(pf->hw.lmt_info);
 	if (test_bit(CN10K_LMTST, &pf->hw.cap_flag))
 		qmem_free(pf->dev, pf->dync_lmt);
 	otx2_detach_resources(&pf->mbox);
@@ -2811,6 +2805,8 @@ static void otx2_remove(struct pci_dev *pdev)
 	otx2_mcam_flow_del(pf);
 	otx2_shutdown_tc(pf);
 	otx2_detach_resources(&pf->mbox);
+	if (pf->hw.lmt_info)
+		free_percpu(pf->hw.lmt_info);
 	if (test_bit(CN10K_LMTST, &pf->hw.cap_flag))
 		qmem_free(pf->dev, pf->dync_lmt);
 	otx2_disable_mbox_intr(pf);
diff --git a/drivers/net/ethernet/marvell/octeontx2/nic/otx2_txrx.h b/drivers/net/ethernet/marvell/octeontx2/nic/otx2_txrx.h
index 869de5f59e73..3ff1ad79c001 100644
--- a/drivers/net/ethernet/marvell/octeontx2/nic/otx2_txrx.h
+++ b/drivers/net/ethernet/marvell/octeontx2/nic/otx2_txrx.h
@@ -80,7 +80,6 @@ struct otx2_snd_queue {
 	u16			num_sqbs;
 	u16			sqe_thresh;
 	u8			sqe_per_sqb;
-	u32			lmt_id;
 	u64			 io_addr;
 	u64			*aura_fc_addr;
 	u64			*lmt_addr;
@@ -111,7 +110,6 @@ struct otx2_cq_poll {
 struct otx2_pool {
 	struct qmem		*stack;
 	struct qmem		*fc_addr;
-	u64			*lmt_addr;
 	u16			rbsize;
 };
 
diff --git a/drivers/net/ethernet/pensando/ionic/ionic_ethtool.c b/drivers/net/ethernet/pensando/ionic/ionic_ethtool.c
index e91b4874a57f..3de1a03839e2 100644
--- a/drivers/net/ethernet/pensando/ionic/ionic_ethtool.c
+++ b/drivers/net/ethernet/pensando/ionic/ionic_ethtool.c
@@ -582,7 +582,10 @@ static int ionic_set_ringparam(struct net_device *netdev,
 
 	qparam.ntxq_descs = ring->tx_pending;
 	qparam.nrxq_descs = ring->rx_pending;
+
+	mutex_lock(&lif->queue_lock);
 	err = ionic_reconfigure_queues(lif, &qparam);
+	mutex_unlock(&lif->queue_lock);
 	if (err)
 		netdev_info(netdev, "Ring reconfiguration failed, changes canceled: %d\n", err);
 
@@ -679,7 +682,9 @@ static int ionic_set_channels(struct net_device *netdev,
 		return 0;
 	}
 
+	mutex_lock(&lif->queue_lock);
 	err = ionic_reconfigure_queues(lif, &qparam);
+	mutex_unlock(&lif->queue_lock);
 	if (err)
 		netdev_info(netdev, "Queue reconfiguration failed, changes canceled: %d\n", err);
 
diff --git a/drivers/net/ethernet/pensando/ionic/ionic_lif.c b/drivers/net/ethernet/pensando/ionic/ionic_lif.c
index 23c9e196a784..381966e8f557 100644
--- a/drivers/net/ethernet/pensando/ionic/ionic_lif.c
+++ b/drivers/net/ethernet/pensando/ionic/ionic_lif.c
@@ -1715,7 +1715,6 @@ static int ionic_set_mac_address(struct net_device *netdev, void *sa)
 static void ionic_stop_queues_reconfig(struct ionic_lif *lif)
 {
 	/* Stop and clean the queues before reconfiguration */
-	mutex_lock(&lif->queue_lock);
 	netif_device_detach(lif->netdev);
 	ionic_stop_queues(lif);
 	ionic_txrx_deinit(lif);
@@ -1734,8 +1733,7 @@ static int ionic_start_queues_reconfig(struct ionic_lif *lif)
 	 * DOWN and UP to try to reset and clear the issue.
 	 */
 	err = ionic_txrx_init(lif);
-	mutex_unlock(&lif->queue_lock);
-	ionic_link_status_check_request(lif, CAN_SLEEP);
+	ionic_link_status_check_request(lif, CAN_NOT_SLEEP);
 	netif_device_attach(lif->netdev);
 
 	return err;
@@ -1765,9 +1763,13 @@ static int ionic_change_mtu(struct net_device *netdev, int new_mtu)
 		return 0;
 	}
 
+	mutex_lock(&lif->queue_lock);
 	ionic_stop_queues_reconfig(lif);
 	netdev->mtu = new_mtu;
-	return ionic_start_queues_reconfig(lif);
+	err = ionic_start_queues_reconfig(lif);
+	mutex_unlock(&lif->queue_lock);
+
+	return err;
 }
 
 static void ionic_tx_timeout_work(struct work_struct *ws)
@@ -1783,8 +1785,10 @@ static void ionic_tx_timeout_work(struct work_struct *ws)
 	if (!netif_running(lif->netdev))
 		return;
 
+	mutex_lock(&lif->queue_lock);
 	ionic_stop_queues_reconfig(lif);
 	ionic_start_queues_reconfig(lif);
+	mutex_unlock(&lif->queue_lock);
 }
 
 static void ionic_tx_timeout(struct net_device *netdev, unsigned int txqueue)
diff --git a/drivers/net/ethernet/pensando/ionic/ionic_rx_filter.c b/drivers/net/ethernet/pensando/ionic/ionic_rx_filter.c
index 7e3a5634c161..25ecfcfa1281 100644
--- a/drivers/net/ethernet/pensando/ionic/ionic_rx_filter.c
+++ b/drivers/net/ethernet/pensando/ionic/ionic_rx_filter.c
@@ -318,7 +318,7 @@ void ionic_rx_filter_sync(struct ionic_lif *lif)
 			if (f->state == IONIC_FILTER_STATE_NEW ||
 			    f->state == IONIC_FILTER_STATE_OLD) {
 				sync_item = devm_kzalloc(dev, sizeof(*sync_item),
-							 GFP_KERNEL);
+							 GFP_ATOMIC);
 				if (!sync_item)
 					goto loop_out;
 
diff --git a/drivers/net/ethernet/qlogic/qed/qed_cxt.c b/drivers/net/ethernet/qlogic/qed/qed_cxt.c
index 5a0a3cbcc1c1..cb0f2a3a1ac9 100644
--- a/drivers/net/ethernet/qlogic/qed/qed_cxt.c
+++ b/drivers/net/ethernet/qlogic/qed/qed_cxt.c
@@ -2226,8 +2226,8 @@ qed_cxt_dynamic_ilt_alloc(struct qed_hwfn *p_hwfn,
 		p_blk = &p_cli->pf_blks[CDUT_SEG_BLK(QED_CXT_ROCE_TID_SEG)];
 		break;
 	default:
-		DP_NOTICE(p_hwfn, "-EINVALID elem type = %d", elem_type);
-		return -EINVAL;
+		DP_NOTICE(p_hwfn, "-EOPNOTSUPP elem type = %d", elem_type);
+		return -EOPNOTSUPP;
 	}
 
 	/* Calculate line in ilt */
diff --git a/drivers/net/ethernet/qlogic/qlcnic/qlcnic_init.c b/drivers/net/ethernet/qlogic/qlcnic/qlcnic_init.c
index 3d61a767a8a3..09f20c794754 100644
--- a/drivers/net/ethernet/qlogic/qlcnic/qlcnic_init.c
+++ b/drivers/net/ethernet/qlogic/qlcnic/qlcnic_init.c
@@ -437,7 +437,6 @@ int qlcnic_pinit_from_rom(struct qlcnic_adapter *adapter)
 	QLCWR32(adapter, QLCNIC_CRB_PEG_NET_4 + 0x3c, 1);
 	msleep(20);
 
-	qlcnic_rom_unlock(adapter);
 	/* big hammer don't reset CAM block on reset */
 	QLCWR32(adapter, QLCNIC_ROMUSB_GLB_SW_RESET, 0xfeffffff);
 
diff --git a/drivers/net/ethernet/qualcomm/emac/emac-ethtool.c b/drivers/net/ethernet/qualcomm/emac/emac-ethtool.c
index 79e50079ed03..f72e13b83869 100644
--- a/drivers/net/ethernet/qualcomm/emac/emac-ethtool.c
+++ b/drivers/net/ethernet/qualcomm/emac/emac-ethtool.c
@@ -100,7 +100,7 @@ static void emac_get_strings(struct net_device *netdev, u32 stringset, u8 *data)
 
 	case ETH_SS_STATS:
 		for (i = 0; i < EMAC_STATS_LEN; i++) {
-			strlcpy(data, emac_ethtool_stat_strings[i],
+			strscpy(data, emac_ethtool_stat_strings[i],
 				ETH_GSTRING_LEN);
 			data += ETH_GSTRING_LEN;
 		}
diff --git a/drivers/net/ethernet/renesas/sh_eth.c b/drivers/net/ethernet/renesas/sh_eth.c
index 6c8ba916d1a6..1374faa229a2 100644
--- a/drivers/net/ethernet/renesas/sh_eth.c
+++ b/drivers/net/ethernet/renesas/sh_eth.c
@@ -2533,6 +2533,7 @@ static netdev_tx_t sh_eth_start_xmit(struct sk_buff *skb,
 	else
 		txdesc->status |= cpu_to_le32(TD_TACT);
 
+	wmb(); /* cur_tx must be incremented after TACT bit was set */
 	mdp->cur_tx++;
 
 	if (!(sh_eth_read(ndev, EDTRR) & mdp->cd->edtrr_trns))
diff --git a/drivers/net/ethernet/sfc/efx.c b/drivers/net/ethernet/sfc/efx.c
index a295e2621cf3..43ef4f529028 100644
--- a/drivers/net/ethernet/sfc/efx.c
+++ b/drivers/net/ethernet/sfc/efx.c
@@ -900,74 +900,36 @@ static void efx_pci_remove(struct pci_dev *pci_dev)
 
 /* NIC VPD information
  * Called during probe to display the part number of the
- * installed NIC.  VPD is potentially very large but this should
- * always appear within the first 512 bytes.
+ * installed NIC.
  */
-#define SFC_VPD_LEN 512
 static void efx_probe_vpd_strings(struct efx_nic *efx)
 {
 	struct pci_dev *dev = efx->pci_dev;
-	char vpd_data[SFC_VPD_LEN];
-	ssize_t vpd_size;
-	int ro_start, ro_size, i, j;
-
-	/* Get the vpd data from the device */
-	vpd_size = pci_read_vpd(dev, 0, sizeof(vpd_data), vpd_data);
-	if (vpd_size <= 0) {
-		netif_err(efx, drv, efx->net_dev, "Unable to read VPD\n");
-		return;
-	}
-
-	/* Get the Read only section */
-	ro_start = pci_vpd_find_tag(vpd_data, vpd_size, PCI_VPD_LRDT_RO_DATA);
-	if (ro_start < 0) {
-		netif_err(efx, drv, efx->net_dev, "VPD Read-only not found\n");
-		return;
-	}
-
-	ro_size = pci_vpd_lrdt_size(&vpd_data[ro_start]);
-	j = ro_size;
-	i = ro_start + PCI_VPD_LRDT_TAG_SIZE;
-	if (i + j > vpd_size)
-		j = vpd_size - i;
-
-	/* Get the Part number */
-	i = pci_vpd_find_info_keyword(vpd_data, i, j, "PN");
-	if (i < 0) {
-		netif_err(efx, drv, efx->net_dev, "Part number not found\n");
-		return;
-	}
+	unsigned int vpd_size, kw_len;
+	u8 *vpd_data;
+	int start;
 
-	j = pci_vpd_info_field_size(&vpd_data[i]);
-	i += PCI_VPD_INFO_FLD_HDR_SIZE;
-	if (i + j > vpd_size) {
-		netif_err(efx, drv, efx->net_dev, "Incomplete part number\n");
+	vpd_data = pci_vpd_alloc(dev, &vpd_size);
+	if (IS_ERR(vpd_data)) {
+		pci_warn(dev, "Unable to read VPD\n");
 		return;
 	}
 
-	netif_info(efx, drv, efx->net_dev,
-		   "Part Number : %.*s\n", j, &vpd_data[i]);
-
-	i = ro_start + PCI_VPD_LRDT_TAG_SIZE;
-	j = ro_size;
-	i = pci_vpd_find_info_keyword(vpd_data, i, j, "SN");
-	if (i < 0) {
-		netif_err(efx, drv, efx->net_dev, "Serial number not found\n");
-		return;
-	}
-
-	j = pci_vpd_info_field_size(&vpd_data[i]);
-	i += PCI_VPD_INFO_FLD_HDR_SIZE;
-	if (i + j > vpd_size) {
-		netif_err(efx, drv, efx->net_dev, "Incomplete serial number\n");
-		return;
-	}
+	start = pci_vpd_find_ro_info_keyword(vpd_data, vpd_size,
+					     PCI_VPD_RO_KEYWORD_PARTNO, &kw_len);
+	if (start < 0)
+		pci_err(dev, "Part number not found or incomplete\n");
+	else
+		pci_info(dev, "Part Number : %.*s\n", kw_len, vpd_data + start);
 
-	efx->vpd_sn = kmalloc(j + 1, GFP_KERNEL);
-	if (!efx->vpd_sn)
-		return;
+	start = pci_vpd_find_ro_info_keyword(vpd_data, vpd_size,
+					     PCI_VPD_RO_KEYWORD_SERIALNO, &kw_len);
+	if (start < 0)
+		pci_err(dev, "Serial number not found or incomplete\n");
+	else
+		efx->vpd_sn = kmemdup_nul(vpd_data + start, kw_len, GFP_KERNEL);
 
-	snprintf(efx->vpd_sn, j + 1, "%s", &vpd_data[i]);
+	kfree(vpd_data);
 }
 
 
diff --git a/drivers/net/ethernet/sfc/falcon/efx.c b/drivers/net/ethernet/sfc/falcon/efx.c
index c177ea0f301e..423bdf81200f 100644
--- a/drivers/net/ethernet/sfc/falcon/efx.c
+++ b/drivers/net/ethernet/sfc/falcon/efx.c
@@ -2780,75 +2780,36 @@ static void ef4_pci_remove(struct pci_dev *pci_dev)
 };
 
 /* NIC VPD information
- * Called during probe to display the part number of the
- * installed NIC.  VPD is potentially very large but this should
- * always appear within the first 512 bytes.
+ * Called during probe to display the part number of the installed NIC.
  */
-#define SFC_VPD_LEN 512
 static void ef4_probe_vpd_strings(struct ef4_nic *efx)
 {
 	struct pci_dev *dev = efx->pci_dev;
-	char vpd_data[SFC_VPD_LEN];
-	ssize_t vpd_size;
-	int ro_start, ro_size, i, j;
-
-	/* Get the vpd data from the device */
-	vpd_size = pci_read_vpd(dev, 0, sizeof(vpd_data), vpd_data);
-	if (vpd_size <= 0) {
-		netif_err(efx, drv, efx->net_dev, "Unable to read VPD\n");
-		return;
-	}
-
-	/* Get the Read only section */
-	ro_start = pci_vpd_find_tag(vpd_data, vpd_size, PCI_VPD_LRDT_RO_DATA);
-	if (ro_start < 0) {
-		netif_err(efx, drv, efx->net_dev, "VPD Read-only not found\n");
-		return;
-	}
-
-	ro_size = pci_vpd_lrdt_size(&vpd_data[ro_start]);
-	j = ro_size;
-	i = ro_start + PCI_VPD_LRDT_TAG_SIZE;
-	if (i + j > vpd_size)
-		j = vpd_size - i;
-
-	/* Get the Part number */
-	i = pci_vpd_find_info_keyword(vpd_data, i, j, "PN");
-	if (i < 0) {
-		netif_err(efx, drv, efx->net_dev, "Part number not found\n");
-		return;
-	}
+	unsigned int vpd_size, kw_len;
+	u8 *vpd_data;
+	int start;
 
-	j = pci_vpd_info_field_size(&vpd_data[i]);
-	i += PCI_VPD_INFO_FLD_HDR_SIZE;
-	if (i + j > vpd_size) {
-		netif_err(efx, drv, efx->net_dev, "Incomplete part number\n");
+	vpd_data = pci_vpd_alloc(dev, &vpd_size);
+	if (IS_ERR(vpd_data)) {
+		pci_warn(dev, "Unable to read VPD\n");
 		return;
 	}
 
-	netif_info(efx, drv, efx->net_dev,
-		   "Part Number : %.*s\n", j, &vpd_data[i]);
-
-	i = ro_start + PCI_VPD_LRDT_TAG_SIZE;
-	j = ro_size;
-	i = pci_vpd_find_info_keyword(vpd_data, i, j, "SN");
-	if (i < 0) {
-		netif_err(efx, drv, efx->net_dev, "Serial number not found\n");
-		return;
-	}
-
-	j = pci_vpd_info_field_size(&vpd_data[i]);
-	i += PCI_VPD_INFO_FLD_HDR_SIZE;
-	if (i + j > vpd_size) {
-		netif_err(efx, drv, efx->net_dev, "Incomplete serial number\n");
-		return;
-	}
+	start = pci_vpd_find_ro_info_keyword(vpd_data, vpd_size,
+					     PCI_VPD_RO_KEYWORD_PARTNO, &kw_len);
+	if (start < 0)
+		pci_warn(dev, "Part number not found or incomplete\n");
+	else
+		pci_info(dev, "Part Number : %.*s\n", kw_len, vpd_data + start);
 
-	efx->vpd_sn = kmalloc(j + 1, GFP_KERNEL);
-	if (!efx->vpd_sn)
-		return;
+	start = pci_vpd_find_ro_info_keyword(vpd_data, vpd_size,
+					     PCI_VPD_RO_KEYWORD_SERIALNO, &kw_len);
+	if (start < 0)
+		pci_warn(dev, "Serial number not found or incomplete\n");
+	else
+		efx->vpd_sn = kmemdup_nul(vpd_data + start, kw_len, GFP_KERNEL);
 
-	snprintf(efx->vpd_sn, j + 1, "%s", &vpd_data[i]);
+	kfree(vpd_data);
 }
 
 
diff --git a/drivers/net/ethernet/smsc/smc911x.c b/drivers/net/ethernet/smsc/smc911x.c
index 22cdbf12c823..b008b4e8a2a5 100644
--- a/drivers/net/ethernet/smsc/smc911x.c
+++ b/drivers/net/ethernet/smsc/smc911x.c
@@ -1550,7 +1550,7 @@ static int smc911x_ethtool_getregslen(struct net_device *dev)
 }
 
 static void smc911x_ethtool_getregs(struct net_device *dev,
-										 struct ethtool_regs* regs, void *buf)
+				    struct ethtool_regs *regs, void *buf)
 {
 	struct smc911x_local *lp = netdev_priv(dev);
 	unsigned long flags;
@@ -1600,7 +1600,7 @@ static int smc911x_ethtool_wait_eeprom_ready(struct net_device *dev)
 }
 
 static inline int smc911x_ethtool_write_eeprom_cmd(struct net_device *dev,
-													int cmd, int addr)
+						   int cmd, int addr)
 {
 	struct smc911x_local *lp = netdev_priv(dev);
 	int ret;
@@ -1614,7 +1614,7 @@ static inline int smc911x_ethtool_write_eeprom_cmd(struct net_device *dev,
 }
 
 static inline int smc911x_ethtool_read_eeprom_byte(struct net_device *dev,
-													u8 *data)
+						   u8 *data)
 {
 	struct smc911x_local *lp = netdev_priv(dev);
 	int ret;
@@ -1626,7 +1626,7 @@ static inline int smc911x_ethtool_read_eeprom_byte(struct net_device *dev,
 }
 
 static inline int smc911x_ethtool_write_eeprom_byte(struct net_device *dev,
-													 u8 data)
+						    u8 data)
 {
 	struct smc911x_local *lp = netdev_priv(dev);
 	int ret;
@@ -1638,7 +1638,7 @@ static inline int smc911x_ethtool_write_eeprom_byte(struct net_device *dev,
 }
 
 static int smc911x_ethtool_geteeprom(struct net_device *dev,
-									  struct ethtool_eeprom *eeprom, u8 *data)
+				     struct ethtool_eeprom *eeprom, u8 *data)
 {
 	u8 eebuf[SMC911X_EEPROM_LEN];
 	int i, ret;
@@ -1654,7 +1654,7 @@ static int smc911x_ethtool_geteeprom(struct net_device *dev,
 }
 
 static int smc911x_ethtool_seteeprom(struct net_device *dev,
-									   struct ethtool_eeprom *eeprom, u8 *data)
+				     struct ethtool_eeprom *eeprom, u8 *data)
 {
 	int i, ret;
 
diff --git a/drivers/net/ethernet/stmicro/stmmac/dwmac-loongson.c b/drivers/net/ethernet/stmicro/stmmac/dwmac-loongson.c
index 4c9a37dd0d3f..ecf759ee1c9f 100644
--- a/drivers/net/ethernet/stmicro/stmmac/dwmac-loongson.c
+++ b/drivers/net/ethernet/stmicro/stmmac/dwmac-loongson.c
@@ -109,8 +109,10 @@ static int loongson_dwmac_probe(struct pci_dev *pdev, const struct pci_device_id
 		plat->bus_id = pci_dev_id(pdev);
 
 	phy_mode = device_get_phy_mode(&pdev->dev);
-	if (phy_mode < 0)
+	if (phy_mode < 0) {
 		dev_err(&pdev->dev, "phy_mode not found\n");
+		return phy_mode;
+	}
 
 	plat->phy_interface = phy_mode;
 	plat->interface = PHY_INTERFACE_MODE_GMII;
diff --git a/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c b/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c
index ed0cd3920171..ece02b35a6ce 100644
--- a/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c
+++ b/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c
@@ -5347,7 +5347,7 @@ static int stmmac_napi_poll_rxtx(struct napi_struct *napi, int budget)
 	struct stmmac_channel *ch =
 		container_of(napi, struct stmmac_channel, rxtx_napi);
 	struct stmmac_priv *priv = ch->priv_data;
-	int rx_done, tx_done;
+	int rx_done, tx_done, rxtx_done;
 	u32 chan = ch->index;
 
 	priv->xstats.napi_poll++;
@@ -5357,14 +5357,16 @@ static int stmmac_napi_poll_rxtx(struct napi_struct *napi, int budget)
 
 	rx_done = stmmac_rx_zc(priv, budget, chan);
 
+	rxtx_done = max(tx_done, rx_done);
+
 	/* If either TX or RX work is not complete, return budget
 	 * and keep pooling
 	 */
-	if (tx_done >= budget || rx_done >= budget)
+	if (rxtx_done >= budget)
 		return budget;
 
 	/* all work done, exit the polling mode */
-	if (napi_complete_done(napi, rx_done)) {
+	if (napi_complete_done(napi, rxtx_done)) {
 		unsigned long flags;
 
 		spin_lock_irqsave(&ch->lock, flags);
@@ -5375,7 +5377,7 @@ static int stmmac_napi_poll_rxtx(struct napi_struct *napi, int budget)
 		spin_unlock_irqrestore(&ch->lock, flags);
 	}
 
-	return min(rx_done, budget - 1);
+	return min(rxtx_done, budget - 1);
 }
 
 /**
@@ -7121,8 +7123,6 @@ int stmmac_suspend(struct device *dev)
 	if (!ndev || !netif_running(ndev))
 		return 0;
 
-	phylink_mac_change(priv->phylink, false);
-
 	mutex_lock(&priv->lock);
 
 	netif_device_detach(ndev);
@@ -7148,14 +7148,6 @@ int stmmac_suspend(struct device *dev)
 		stmmac_pmt(priv, priv->hw, priv->wolopts);
 		priv->irq_wake = 1;
 	} else {
-		mutex_unlock(&priv->lock);
-		rtnl_lock();
-		if (device_may_wakeup(priv->device))
-			phylink_speed_down(priv->phylink, false);
-		phylink_stop(priv->phylink);
-		rtnl_unlock();
-		mutex_lock(&priv->lock);
-
 		stmmac_mac_set(priv, priv->ioaddr, false);
 		pinctrl_pm_select_sleep_state(priv->device);
 		/* Disable clock in case of PWM is off */
@@ -7169,6 +7161,16 @@ int stmmac_suspend(struct device *dev)
 
 	mutex_unlock(&priv->lock);
 
+	rtnl_lock();
+	if (device_may_wakeup(priv->device) && priv->plat->pmt) {
+		phylink_suspend(priv->phylink, true);
+	} else {
+		if (device_may_wakeup(priv->device))
+			phylink_speed_down(priv->phylink, false);
+		phylink_suspend(priv->phylink, false);
+	}
+	rtnl_unlock();
+
 	if (priv->dma_cap.fpesel) {
 		/* Disable FPE */
 		stmmac_fpe_configure(priv, priv->ioaddr,
@@ -7259,13 +7261,15 @@ int stmmac_resume(struct device *dev)
 			return ret;
 	}
 
-	if (!device_may_wakeup(priv->device) || !priv->plat->pmt) {
-		rtnl_lock();
-		phylink_start(priv->phylink);
-		/* We may have called phylink_speed_down before */
-		phylink_speed_up(priv->phylink);
-		rtnl_unlock();
+	rtnl_lock();
+	if (device_may_wakeup(priv->device) && priv->plat->pmt) {
+		phylink_resume(priv->phylink);
+	} else {
+		phylink_resume(priv->phylink);
+		if (device_may_wakeup(priv->device))
+			phylink_speed_up(priv->phylink);
 	}
+	rtnl_unlock();
 
 	rtnl_lock();
 	mutex_lock(&priv->lock);
@@ -7286,8 +7290,6 @@ int stmmac_resume(struct device *dev)
 	mutex_unlock(&priv->lock);
 	rtnl_unlock();
 
-	phylink_mac_change(priv->phylink, true);
-
 	netif_device_attach(ndev);
 
 	return 0;
diff --git a/drivers/net/ethernet/xscale/ptp_ixp46x.c b/drivers/net/ethernet/xscale/ptp_ixp46x.c
index ecece21315c3..39234852e01b 100644
--- a/drivers/net/ethernet/xscale/ptp_ixp46x.c
+++ b/drivers/net/ethernet/xscale/ptp_ixp46x.c
@@ -16,7 +16,6 @@
 #include <linux/ptp_clock_kernel.h>
 #include <linux/platform_device.h>
 #include <linux/soc/ixp4xx/cpu.h>
-#include <linux/module.h>
 #include <mach/ixp4xx-regs.h>
 
 #include "ixp46x_ts.h"
diff --git a/drivers/net/phy/phylink.c b/drivers/net/phy/phylink.c
index 2cdf9f989dec..a1464b764d4d 100644
--- a/drivers/net/phy/phylink.c
+++ b/drivers/net/phy/phylink.c
@@ -33,6 +33,7 @@
 enum {
 	PHYLINK_DISABLE_STOPPED,
 	PHYLINK_DISABLE_LINK,
+	PHYLINK_DISABLE_MAC_WOL,
 };
 
 /**
@@ -1282,6 +1283,9 @@ EXPORT_SYMBOL_GPL(phylink_start);
  * network device driver's &struct net_device_ops ndo_stop() method.  The
  * network device's carrier state should not be changed prior to calling this
  * function.
+ *
+ * This will synchronously bring down the link if the link is not already
+ * down (in other words, it will trigger a mac_link_down() method call.)
  */
 void phylink_stop(struct phylink *pl)
 {
@@ -1302,6 +1306,84 @@ void phylink_stop(struct phylink *pl)
 EXPORT_SYMBOL_GPL(phylink_stop);
 
 /**
+ * phylink_suspend() - handle a network device suspend event
+ * @pl: a pointer to a &struct phylink returned from phylink_create()
+ * @mac_wol: true if the MAC needs to receive packets for Wake-on-Lan
+ *
+ * Handle a network device suspend event. There are several cases:
+ * - If Wake-on-Lan is not active, we can bring down the link between
+ *   the MAC and PHY by calling phylink_stop().
+ * - If Wake-on-Lan is active, and being handled only by the PHY, we
+ *   can also bring down the link between the MAC and PHY.
+ * - If Wake-on-Lan is active, but being handled by the MAC, the MAC
+ *   still needs to receive packets, so we can not bring the link down.
+ */
+void phylink_suspend(struct phylink *pl, bool mac_wol)
+{
+	ASSERT_RTNL();
+
+	if (mac_wol && (!pl->netdev || pl->netdev->wol_enabled)) {
+		/* Wake-on-Lan enabled, MAC handling */
+		mutex_lock(&pl->state_mutex);
+
+		/* Stop the resolver bringing the link up */
+		__set_bit(PHYLINK_DISABLE_MAC_WOL, &pl->phylink_disable_state);
+
+		/* Disable the carrier, to prevent transmit timeouts,
+		 * but one would hope all packets have been sent. This
+		 * also means phylink_resolve() will do nothing.
+		 */
+		netif_carrier_off(pl->netdev);
+
+		/* We do not call mac_link_down() here as we want the
+		 * link to remain up to receive the WoL packets.
+		 */
+		mutex_unlock(&pl->state_mutex);
+	} else {
+		phylink_stop(pl);
+	}
+}
+EXPORT_SYMBOL_GPL(phylink_suspend);
+
+/**
+ * phylink_resume() - handle a network device resume event
+ * @pl: a pointer to a &struct phylink returned from phylink_create()
+ *
+ * Undo the effects of phylink_suspend(), returning the link to an
+ * operational state.
+ */
+void phylink_resume(struct phylink *pl)
+{
+	ASSERT_RTNL();
+
+	if (test_bit(PHYLINK_DISABLE_MAC_WOL, &pl->phylink_disable_state)) {
+		/* Wake-on-Lan enabled, MAC handling */
+
+		/* Call mac_link_down() so we keep the overall state balanced.
+		 * Do this under the state_mutex lock for consistency. This
+		 * will cause a "Link Down" message to be printed during
+		 * resume, which is harmless - the true link state will be
+		 * printed when we run a resolve.
+		 */
+		mutex_lock(&pl->state_mutex);
+		phylink_link_down(pl);
+		mutex_unlock(&pl->state_mutex);
+
+		/* Re-apply the link parameters so that all the settings get
+		 * restored to the MAC.
+		 */
+		phylink_mac_initial_config(pl, true);
+
+		/* Re-enable and re-resolve the link parameters */
+		clear_bit(PHYLINK_DISABLE_MAC_WOL, &pl->phylink_disable_state);
+		phylink_run_resolve(pl);
+	} else {
+		phylink_start(pl);
+	}
+}
+EXPORT_SYMBOL_GPL(phylink_resume);
+
+/**
  * phylink_ethtool_get_wol() - get the wake on lan parameters for the PHY
  * @pl: a pointer to a &struct phylink returned from phylink_create()
  * @wol: a pointer to &struct ethtool_wolinfo to hold the read parameters
diff --git a/drivers/net/usb/cdc_mbim.c b/drivers/net/usb/cdc_mbim.c
index 4c4ab7b38d78..82bb5ed94c48 100644
--- a/drivers/net/usb/cdc_mbim.c
+++ b/drivers/net/usb/cdc_mbim.c
@@ -654,6 +654,11 @@ static const struct usb_device_id mbim_devs[] = {
 	  .driver_info = (unsigned long)&cdc_mbim_info_avoid_altsetting_toggle,
 	},
 
+	/* Telit LN920 */
+	{ USB_DEVICE_AND_INTERFACE_INFO(0x1bc7, 0x1061, USB_CLASS_COMM, USB_CDC_SUBCLASS_MBIM, USB_CDC_PROTO_NONE),
+	  .driver_info = (unsigned long)&cdc_mbim_info_avoid_altsetting_toggle,
+	},
+
 	/* default entry */
 	{ USB_INTERFACE_INFO(USB_CLASS_COMM, USB_CDC_SUBCLASS_MBIM, USB_CDC_PROTO_NONE),
 	  .driver_info = (unsigned long)&cdc_mbim_info_zlp,
diff --git a/drivers/net/usb/hso.c b/drivers/net/usb/hso.c
index 7dc1ef3f93c3..a57251ba5991 100644
--- a/drivers/net/usb/hso.c
+++ b/drivers/net/usb/hso.c
@@ -2535,13 +2535,17 @@ static struct hso_device *hso_create_net_device(struct usb_interface *interface,
 	if (!hso_net->mux_bulk_tx_buf)
 		goto err_free_tx_urb;
 
-	add_net_device(hso_dev);
+	result = add_net_device(hso_dev);
+	if (result) {
+		dev_err(&interface->dev, "Failed to add net device\n");
+		goto err_free_tx_buf;
+	}
 
 	/* registering our net device */
 	result = register_netdev(net);
 	if (result) {
 		dev_err(&interface->dev, "Failed to register device\n");
-		goto err_free_tx_buf;
+		goto err_rmv_ndev;
 	}
 
 	hso_log_port(hso_dev);
@@ -2550,8 +2554,9 @@ static struct hso_device *hso_create_net_device(struct usb_interface *interface,
 
 	return hso_dev;
 
-err_free_tx_buf:
+err_rmv_ndev:
 	remove_net_device(hso_dev);
+err_free_tx_buf:
 	kfree(hso_net->mux_bulk_tx_buf);
 err_free_tx_urb:
 	usb_free_urb(hso_net->mux_bulk_tx_urb);
diff --git a/drivers/net/usb/qmi_wwan.c b/drivers/net/usb/qmi_wwan.c
index 6a2e4f884b12..33ada2c59952 100644
--- a/drivers/net/usb/qmi_wwan.c
+++ b/drivers/net/usb/qmi_wwan.c
@@ -1354,6 +1354,7 @@ static const struct usb_device_id products[] = {
 	{QMI_QUIRK_SET_DTR(0x1bc7, 0x1031, 3)}, /* Telit LE910C1-EUX */
 	{QMI_QUIRK_SET_DTR(0x1bc7, 0x1040, 2)},	/* Telit LE922A */
 	{QMI_QUIRK_SET_DTR(0x1bc7, 0x1050, 2)},	/* Telit FN980 */
+	{QMI_QUIRK_SET_DTR(0x1bc7, 0x1060, 2)},	/* Telit LN920 */
 	{QMI_FIXED_INTF(0x1bc7, 0x1100, 3)},	/* Telit ME910 */
 	{QMI_FIXED_INTF(0x1bc7, 0x1101, 3)},	/* Telit ME910 dual modem */
 	{QMI_FIXED_INTF(0x1bc7, 0x1200, 5)},	/* Telit LE920 */
diff --git a/drivers/net/wireless/intel/iwlwifi/cfg/22000.c b/drivers/net/wireless/intel/iwlwifi/cfg/22000.c
index 52d1d391f4c6..d8231cc821ae 100644
--- a/drivers/net/wireless/intel/iwlwifi/cfg/22000.c
+++ b/drivers/net/wireless/intel/iwlwifi/cfg/22000.c
@@ -9,7 +9,7 @@
 #include "iwl-prph.h"
 
 /* Highest firmware API version supported */
-#define IWL_22000_UCODE_API_MAX	65
+#define IWL_22000_UCODE_API_MAX	66
 
 /* Lowest firmware API version supported */
 #define IWL_22000_UCODE_API_MIN	39
diff --git a/drivers/net/wireless/intel/iwlwifi/fw/pnvm.c b/drivers/net/wireless/intel/iwlwifi/fw/pnvm.c
index 314ed90c23dd..dde22bdc8703 100644
--- a/drivers/net/wireless/intel/iwlwifi/fw/pnvm.c
+++ b/drivers/net/wireless/intel/iwlwifi/fw/pnvm.c
@@ -231,6 +231,7 @@ static int iwl_pnvm_get_from_fs(struct iwl_trans *trans, u8 **data, size_t *len)
 {
 	const struct firmware *pnvm;
 	char pnvm_name[MAX_PNVM_NAME];
+	size_t new_len;
 	int ret;
 
 	iwl_pnvm_get_fs_name(trans, pnvm_name, sizeof(pnvm_name));
@@ -242,11 +243,14 @@ static int iwl_pnvm_get_from_fs(struct iwl_trans *trans, u8 **data, size_t *len)
 		return ret;
 	}
 
+	new_len = pnvm->size;
 	*data = kmemdup(pnvm->data, pnvm->size, GFP_KERNEL);
+	release_firmware(pnvm);
+
 	if (!*data)
 		return -ENOMEM;
 
-	*len = pnvm->size;
+	*len = new_len;
 
 	return 0;
 }
diff --git a/drivers/net/wireless/intel/iwlwifi/fw/uefi.c b/drivers/net/wireless/intel/iwlwifi/fw/uefi.c
index a7c79d814aa4..c875bf35533c 100644
--- a/drivers/net/wireless/intel/iwlwifi/fw/uefi.c
+++ b/drivers/net/wireless/intel/iwlwifi/fw/uefi.c
@@ -49,14 +49,14 @@ void *iwl_uefi_get_pnvm(struct iwl_trans *trans, size_t *len)
 	err = efivar_entry_get(pnvm_efivar, NULL, &package_size, data);
 	if (err) {
 		IWL_DEBUG_FW(trans,
-			     "PNVM UEFI variable not found %d (len %zd)\n",
+			     "PNVM UEFI variable not found %d (len %lu)\n",
 			     err, package_size);
 		kfree(data);
 		data = ERR_PTR(err);
 		goto out;
 	}
 
-	IWL_DEBUG_FW(trans, "Read PNVM from UEFI with size %zd\n", package_size);
+	IWL_DEBUG_FW(trans, "Read PNVM from UEFI with size %lu\n", package_size);
 	*len = package_size;
 
 out:
diff --git a/drivers/net/wireless/intel/iwlwifi/mvm/ops.c b/drivers/net/wireless/intel/iwlwifi/mvm/ops.c
index 6f60018feed1..77ea2d0a3091 100644
--- a/drivers/net/wireless/intel/iwlwifi/mvm/ops.c
+++ b/drivers/net/wireless/intel/iwlwifi/mvm/ops.c
@@ -686,6 +686,7 @@ static int iwl_mvm_start_get_nvm(struct iwl_mvm *mvm)
 {
 	int ret;
 
+	rtnl_lock();
 	mutex_lock(&mvm->mutex);
 
 	ret = iwl_run_init_mvm_ucode(mvm);
diff --git a/drivers/net/wireless/intel/iwlwifi/pcie/drv.c b/drivers/net/wireless/intel/iwlwifi/pcie/drv.c
index 8dc1b8eecb86..61b2797a34a8 100644
--- a/drivers/net/wireless/intel/iwlwifi/pcie/drv.c
+++ b/drivers/net/wireless/intel/iwlwifi/pcie/drv.c
@@ -558,6 +558,7 @@ static const struct iwl_dev_info iwl_dev_info_table[] = {
 	IWL_DEV_INFO(0xA0F0, 0x1652, killer1650i_2ax_cfg_qu_b0_hr_b0, NULL),
 	IWL_DEV_INFO(0xA0F0, 0x2074, iwl_ax201_cfg_qu_hr, NULL),
 	IWL_DEV_INFO(0xA0F0, 0x4070, iwl_ax201_cfg_qu_hr, NULL),
+	IWL_DEV_INFO(0xA0F0, 0x6074, iwl_ax201_cfg_qu_hr, NULL),
 	IWL_DEV_INFO(0x02F0, 0x0070, iwl_ax201_cfg_quz_hr, NULL),
 	IWL_DEV_INFO(0x02F0, 0x0074, iwl_ax201_cfg_quz_hr, NULL),
 	IWL_DEV_INFO(0x02F0, 0x6074, iwl_ax201_cfg_quz_hr, NULL),
diff --git a/drivers/net/wireless/intersil/orinoco/hermes.c b/drivers/net/wireless/intersil/orinoco/hermes.c
index 6d4b7f64efcf..256946552742 100644
--- a/drivers/net/wireless/intersil/orinoco/hermes.c
+++ b/drivers/net/wireless/intersil/orinoco/hermes.c
@@ -79,7 +79,6 @@
 
 #undef HERMES_DEBUG
 #ifdef HERMES_DEBUG
-#include <stdarg.h>
 
 #define DEBUG(lvl, stuff...) if ((lvl) <= HERMES_DEBUG) DMSG(stuff)
 
diff --git a/drivers/net/wwan/iosm/iosm_ipc_imem.h b/drivers/net/wwan/iosm/iosm_ipc_imem.h
index 0d2f10e4cbc8..dc65b0712261 100644
--- a/drivers/net/wwan/iosm/iosm_ipc_imem.h
+++ b/drivers/net/wwan/iosm/iosm_ipc_imem.h
@@ -7,7 +7,6 @@
 #define IOSM_IPC_IMEM_H
 
 #include <linux/skbuff.h>
-#include <stdbool.h>
 
 #include "iosm_ipc_mmio.h"
 #include "iosm_ipc_pcie.h"
diff --git a/drivers/net/wwan/iosm/iosm_ipc_mmio.c b/drivers/net/wwan/iosm/iosm_ipc_mmio.c
index 06c94b1720b6..09f94c123531 100644
--- a/drivers/net/wwan/iosm/iosm_ipc_mmio.c
+++ b/drivers/net/wwan/iosm/iosm_ipc_mmio.c
@@ -69,7 +69,7 @@ void ipc_mmio_update_cp_capability(struct iosm_mmio *ipc_mmio)
 	unsigned int ver;
 
 	ver = ipc_mmio_get_cp_version(ipc_mmio);
-	cp_cap = readl(ipc_mmio->base + ipc_mmio->offset.cp_capability);
+	cp_cap = ioread32(ipc_mmio->base + ipc_mmio->offset.cp_capability);
 
 	ipc_mmio->has_mux_lite = (ver >= IOSM_CP_VERSION) &&
 				 !(cp_cap & DL_AGGR) && !(cp_cap & UL_AGGR);
@@ -150,8 +150,8 @@ enum ipc_mem_exec_stage ipc_mmio_get_exec_stage(struct iosm_mmio *ipc_mmio)
 	if (!ipc_mmio)
 		return IPC_MEM_EXEC_STAGE_INVALID;
 
-	return (enum ipc_mem_exec_stage)readl(ipc_mmio->base +
-					      ipc_mmio->offset.exec_stage);
+	return (enum ipc_mem_exec_stage)ioread32(ipc_mmio->base +
+						 ipc_mmio->offset.exec_stage);
 }
 
 void ipc_mmio_copy_chip_info(struct iosm_mmio *ipc_mmio, void *dest,
@@ -167,8 +167,8 @@ enum ipc_mem_device_ipc_state ipc_mmio_get_ipc_state(struct iosm_mmio *ipc_mmio)
 	if (!ipc_mmio)
 		return IPC_MEM_DEVICE_IPC_INVALID;
 
-	return (enum ipc_mem_device_ipc_state)
-		readl(ipc_mmio->base + ipc_mmio->offset.ipc_status);
+	return (enum ipc_mem_device_ipc_state)ioread32(ipc_mmio->base +
+						       ipc_mmio->offset.ipc_status);
 }
 
 enum rom_exit_code ipc_mmio_get_rom_exit_code(struct iosm_mmio *ipc_mmio)
@@ -176,8 +176,8 @@ enum rom_exit_code ipc_mmio_get_rom_exit_code(struct iosm_mmio *ipc_mmio)
 	if (!ipc_mmio)
 		return IMEM_ROM_EXIT_FAIL;
 
-	return (enum rom_exit_code)readl(ipc_mmio->base +
-					 ipc_mmio->offset.rom_exit_code);
+	return (enum rom_exit_code)ioread32(ipc_mmio->base +
+					    ipc_mmio->offset.rom_exit_code);
 }
 
 void ipc_mmio_config(struct iosm_mmio *ipc_mmio)
@@ -188,10 +188,10 @@ void ipc_mmio_config(struct iosm_mmio *ipc_mmio)
 	/* AP memory window (full window is open and active so that modem checks
 	 * each AP address) 0 means don't check on modem side.
 	 */
-	iowrite64_lo_hi(0, ipc_mmio->base + ipc_mmio->offset.ap_win_base);
-	iowrite64_lo_hi(0, ipc_mmio->base + ipc_mmio->offset.ap_win_end);
+	iowrite64(0, ipc_mmio->base + ipc_mmio->offset.ap_win_base);
+	iowrite64(0, ipc_mmio->base + ipc_mmio->offset.ap_win_end);
 
-	iowrite64_lo_hi(ipc_mmio->context_info_addr,
+	iowrite64(ipc_mmio->context_info_addr,
 			ipc_mmio->base + ipc_mmio->offset.context_info);
 }
 
@@ -201,8 +201,8 @@ void ipc_mmio_set_psi_addr_and_size(struct iosm_mmio *ipc_mmio, dma_addr_t addr,
 	if (!ipc_mmio)
 		return;
 
-	iowrite64_lo_hi(addr, ipc_mmio->base + ipc_mmio->offset.psi_address);
-	writel(size, ipc_mmio->base + ipc_mmio->offset.psi_size);
+	iowrite64(addr, ipc_mmio->base + ipc_mmio->offset.psi_address);
+	iowrite32(size, ipc_mmio->base + ipc_mmio->offset.psi_size);
 }
 
 void ipc_mmio_set_contex_info_addr(struct iosm_mmio *ipc_mmio, phys_addr_t addr)
@@ -218,6 +218,8 @@ void ipc_mmio_set_contex_info_addr(struct iosm_mmio *ipc_mmio, phys_addr_t addr)
 
 int ipc_mmio_get_cp_version(struct iosm_mmio *ipc_mmio)
 {
-	return ipc_mmio ? readl(ipc_mmio->base + ipc_mmio->offset.cp_version) :
-			  -EFAULT;
+	if (ipc_mmio)
+		return ioread32(ipc_mmio->base + ipc_mmio->offset.cp_version);
+
+	return -EFAULT;
 }
diff --git a/drivers/ntb/hw/amd/ntb_hw_amd.c b/drivers/ntb/hw/amd/ntb_hw_amd.c
index 71428d8cbcfc..87847c380051 100644
--- a/drivers/ntb/hw/amd/ntb_hw_amd.c
+++ b/drivers/ntb/hw/amd/ntb_hw_amd.c
@@ -1176,22 +1176,14 @@ static int amd_ntb_init_pci(struct amd_ntb_dev *ndev,
 
 	pci_set_master(pdev);
 
-	rc = pci_set_dma_mask(pdev, DMA_BIT_MASK(64));
+	rc = dma_set_mask_and_coherent(&pdev->dev, DMA_BIT_MASK(64));
 	if (rc) {
-		rc = pci_set_dma_mask(pdev, DMA_BIT_MASK(32));
+		rc = dma_set_mask_and_coherent(&pdev->dev, DMA_BIT_MASK(32));
 		if (rc)
 			goto err_dma_mask;
 		dev_warn(&pdev->dev, "Cannot DMA highmem\n");
 	}
 
-	rc = pci_set_consistent_dma_mask(pdev, DMA_BIT_MASK(64));
-	if (rc) {
-		rc = pci_set_consistent_dma_mask(pdev, DMA_BIT_MASK(32));
-		if (rc)
-			goto err_dma_mask;
-		dev_warn(&pdev->dev, "Cannot DMA consistent highmem\n");
-	}
-
 	ndev->self_mmio = pci_iomap(pdev, 0, 0);
 	if (!ndev->self_mmio) {
 		rc = -EIO;
diff --git a/drivers/ntb/hw/idt/ntb_hw_idt.c b/drivers/ntb/hw/idt/ntb_hw_idt.c
index e7a4c2aa8baa..733557231ed0 100644
--- a/drivers/ntb/hw/idt/ntb_hw_idt.c
+++ b/drivers/ntb/hw/idt/ntb_hw_idt.c
@@ -2640,26 +2640,15 @@ static int idt_init_pci(struct idt_ntb_dev *ndev)
 	int ret;
 
 	/* Initialize the bit mask of PCI/NTB DMA */
-	ret = pci_set_dma_mask(pdev, DMA_BIT_MASK(64));
+	ret = dma_set_mask_and_coherent(&pdev->dev, DMA_BIT_MASK(64));
 	if (ret != 0) {
-		ret = pci_set_dma_mask(pdev, DMA_BIT_MASK(32));
+		ret = dma_set_mask_and_coherent(&pdev->dev, DMA_BIT_MASK(32));
 		if (ret != 0) {
 			dev_err(&pdev->dev, "Failed to set DMA bit mask\n");
 			return ret;
 		}
 		dev_warn(&pdev->dev, "Cannot set DMA highmem bit mask\n");
 	}
-	ret = pci_set_consistent_dma_mask(pdev, DMA_BIT_MASK(64));
-	if (ret != 0) {
-		ret = pci_set_consistent_dma_mask(pdev, DMA_BIT_MASK(32));
-		if (ret != 0) {
-			dev_err(&pdev->dev,
-				"Failed to set consistent DMA bit mask\n");
-			return ret;
-		}
-		dev_warn(&pdev->dev,
-			"Cannot set consistent DMA highmem bit mask\n");
-	}
 
 	/*
 	 * Enable the device advanced error reporting. It's not critical to
diff --git a/drivers/ntb/hw/intel/ntb_hw_gen1.c b/drivers/ntb/hw/intel/ntb_hw_gen1.c
index 093dd20057b9..e5f14e20a9ff 100644
--- a/drivers/ntb/hw/intel/ntb_hw_gen1.c
+++ b/drivers/ntb/hw/intel/ntb_hw_gen1.c
@@ -1771,22 +1771,14 @@ static int intel_ntb_init_pci(struct intel_ntb_dev *ndev, struct pci_dev *pdev)
 
 	pci_set_master(pdev);
 
-	rc = pci_set_dma_mask(pdev, DMA_BIT_MASK(64));
+	rc = dma_set_mask_and_coherent(&pdev->dev, DMA_BIT_MASK(64));
 	if (rc) {
-		rc = pci_set_dma_mask(pdev, DMA_BIT_MASK(32));
+		rc = dma_set_mask_and_coherent(&pdev->dev, DMA_BIT_MASK(32));
 		if (rc)
 			goto err_dma_mask;
 		dev_warn(&pdev->dev, "Cannot DMA highmem\n");
 	}
 
-	rc = pci_set_consistent_dma_mask(pdev, DMA_BIT_MASK(64));
-	if (rc) {
-		rc = pci_set_consistent_dma_mask(pdev, DMA_BIT_MASK(32));
-		if (rc)
-			goto err_dma_mask;
-		dev_warn(&pdev->dev, "Cannot DMA consistent highmem\n");
-	}
-
 	ndev->self_mmio = pci_iomap(pdev, 0, 0);
 	if (!ndev->self_mmio) {
 		rc = -EIO;
diff --git a/drivers/ntb/hw/intel/ntb_hw_intel.h b/drivers/ntb/hw/intel/ntb_hw_intel.h
index 05e2335c9596..b233d1c6ba2d 100644
--- a/drivers/ntb/hw/intel/ntb_hw_intel.h
+++ b/drivers/ntb/hw/intel/ntb_hw_intel.h
@@ -43,9 +43,6 @@
  *   OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
  *
  * Intel PCIe NTB Linux driver
- *
- * Contact Information:
- * Jon Mason <jon.mason@intel.com>
  */
 
 #ifndef NTB_HW_INTEL_H
diff --git a/drivers/ntb/test/ntb_msi_test.c b/drivers/ntb/test/ntb_msi_test.c
index 7095ecd6223a..4e18e08776c9 100644
--- a/drivers/ntb/test/ntb_msi_test.c
+++ b/drivers/ntb/test/ntb_msi_test.c
@@ -369,8 +369,10 @@ static int ntb_msit_probe(struct ntb_client *client, struct ntb_dev *ntb)
 	if (ret)
 		goto remove_dbgfs;
 
-	if (!nm->isr_ctx)
+	if (!nm->isr_ctx) {
+		ret = -ENOMEM;
 		goto remove_dbgfs;
+	}
 
 	ntb_link_enable(ntb, NTB_SPEED_AUTO, NTB_WIDTH_AUTO);
 
diff --git a/drivers/ntb/test/ntb_perf.c b/drivers/ntb/test/ntb_perf.c
index 89df1350fefd..65e1e5cf1b29 100644
--- a/drivers/ntb/test/ntb_perf.c
+++ b/drivers/ntb/test/ntb_perf.c
@@ -598,6 +598,7 @@ static int perf_setup_inbuf(struct perf_peer *peer)
 		return -ENOMEM;
 	}
 	if (!IS_ALIGNED(peer->inbuf_xlat, xlat_align)) {
+		ret = -EINVAL;
 		dev_err(&perf->ntb->dev, "Unaligned inbuf allocated\n");
 		goto err_free_inbuf;
 	}
diff --git a/drivers/ntb/test/ntb_pingpong.c b/drivers/ntb/test/ntb_pingpong.c
index 2164e8492772..8aeca7914050 100644
--- a/drivers/ntb/test/ntb_pingpong.c
+++ b/drivers/ntb/test/ntb_pingpong.c
@@ -187,7 +187,7 @@ static void pp_ping(struct pp_ctx *pp)
 
 static void pp_pong(struct pp_ctx *pp)
 {
-	u32 msg_data = -1, spad_data = -1;
+	u32 msg_data, spad_data;
 	int pidx = 0;
 
 	/* Read pong data */
diff --git a/drivers/nvdimm/label.c b/drivers/nvdimm/label.c
index 9251441fd8a3..7f473f9db300 100644
--- a/drivers/nvdimm/label.c
+++ b/drivers/nvdimm/label.c
@@ -346,29 +346,45 @@ static bool preamble_next(struct nvdimm_drvdata *ndd,
 			free, nslot);
 }
 
+static bool nsl_validate_checksum(struct nvdimm_drvdata *ndd,
+				  struct nd_namespace_label *nd_label)
+{
+	u64 sum, sum_save;
+
+	if (!namespace_label_has(ndd, checksum))
+		return true;
+
+	sum_save = nsl_get_checksum(ndd, nd_label);
+	nsl_set_checksum(ndd, nd_label, 0);
+	sum = nd_fletcher64(nd_label, sizeof_namespace_label(ndd), 1);
+	nsl_set_checksum(ndd, nd_label, sum_save);
+	return sum == sum_save;
+}
+
+static void nsl_calculate_checksum(struct nvdimm_drvdata *ndd,
+				   struct nd_namespace_label *nd_label)
+{
+	u64 sum;
+
+	if (!namespace_label_has(ndd, checksum))
+		return;
+	nsl_set_checksum(ndd, nd_label, 0);
+	sum = nd_fletcher64(nd_label, sizeof_namespace_label(ndd), 1);
+	nsl_set_checksum(ndd, nd_label, sum);
+}
+
 static bool slot_valid(struct nvdimm_drvdata *ndd,
 		struct nd_namespace_label *nd_label, u32 slot)
 {
+	bool valid;
+
 	/* check that we are written where we expect to be written */
-	if (slot != __le32_to_cpu(nd_label->slot))
+	if (slot != nsl_get_slot(ndd, nd_label))
 		return false;
-
-	/* check checksum */
-	if (namespace_label_has(ndd, checksum)) {
-		u64 sum, sum_save;
-
-		sum_save = __le64_to_cpu(nd_label->checksum);
-		nd_label->checksum = __cpu_to_le64(0);
-		sum = nd_fletcher64(nd_label, sizeof_namespace_label(ndd), 1);
-		nd_label->checksum = __cpu_to_le64(sum_save);
-		if (sum != sum_save) {
-			dev_dbg(ndd->dev, "fail checksum. slot: %d expect: %#llx\n",
-				slot, sum);
-			return false;
-		}
-	}
-
-	return true;
+	valid = nsl_validate_checksum(ndd, nd_label);
+	if (!valid)
+		dev_dbg(ndd->dev, "fail checksum. slot: %d\n", slot);
+	return valid;
 }
 
 int nd_label_reserve_dpa(struct nvdimm_drvdata *ndd)
@@ -395,13 +411,13 @@ int nd_label_reserve_dpa(struct nvdimm_drvdata *ndd)
 			continue;
 
 		memcpy(label_uuid, nd_label->uuid, NSLABEL_UUID_LEN);
-		flags = __le32_to_cpu(nd_label->flags);
+		flags = nsl_get_flags(ndd, nd_label);
 		if (test_bit(NDD_NOBLK, &nvdimm->flags))
 			flags &= ~NSLABEL_FLAG_LOCAL;
 		nd_label_gen_id(&label_id, label_uuid, flags);
 		res = nvdimm_allocate_dpa(ndd, &label_id,
-				__le64_to_cpu(nd_label->dpa),
-				__le64_to_cpu(nd_label->rawsize));
+					  nsl_get_dpa(ndd, nd_label),
+					  nsl_get_rawsize(ndd, nd_label));
 		nd_dbg_dpa(nd_region, ndd, res, "reserve\n");
 		if (!res)
 			return -EBUSY;
@@ -548,9 +564,9 @@ int nd_label_active_count(struct nvdimm_drvdata *ndd)
 		nd_label = to_label(ndd, slot);
 
 		if (!slot_valid(ndd, nd_label, slot)) {
-			u32 label_slot = __le32_to_cpu(nd_label->slot);
-			u64 size = __le64_to_cpu(nd_label->rawsize);
-			u64 dpa = __le64_to_cpu(nd_label->dpa);
+			u32 label_slot = nsl_get_slot(ndd, nd_label);
+			u64 size = nsl_get_rawsize(ndd, nd_label);
+			u64 dpa = nsl_get_dpa(ndd, nd_label);
 
 			dev_dbg(ndd->dev,
 				"slot%d invalid slot: %d dpa: %llx size: %llx\n",
@@ -708,7 +724,7 @@ static unsigned long nd_label_offset(struct nvdimm_drvdata *ndd,
 		- (unsigned long) to_namespace_index(ndd, 0);
 }
 
-enum nvdimm_claim_class to_nvdimm_cclass(guid_t *guid)
+static enum nvdimm_claim_class to_nvdimm_cclass(guid_t *guid)
 {
 	if (guid_equal(guid, &nvdimm_btt_guid))
 		return NVDIMM_CCLASS_BTT;
@@ -756,6 +772,45 @@ static void reap_victim(struct nd_mapping *nd_mapping,
 	victim->label = NULL;
 }
 
+static void nsl_set_type_guid(struct nvdimm_drvdata *ndd,
+			      struct nd_namespace_label *nd_label, guid_t *guid)
+{
+	if (namespace_label_has(ndd, type_guid))
+		guid_copy(&nd_label->type_guid, guid);
+}
+
+bool nsl_validate_type_guid(struct nvdimm_drvdata *ndd,
+			    struct nd_namespace_label *nd_label, guid_t *guid)
+{
+	if (!namespace_label_has(ndd, type_guid))
+		return true;
+	if (!guid_equal(&nd_label->type_guid, guid)) {
+		dev_dbg(ndd->dev, "expect type_guid %pUb got %pUb\n", guid,
+			&nd_label->type_guid);
+		return false;
+	}
+	return true;
+}
+
+static void nsl_set_claim_class(struct nvdimm_drvdata *ndd,
+				struct nd_namespace_label *nd_label,
+				enum nvdimm_claim_class claim_class)
+{
+	if (!namespace_label_has(ndd, abstraction_guid))
+		return;
+	guid_copy(&nd_label->abstraction_guid,
+		  to_abstraction_guid(claim_class,
+				      &nd_label->abstraction_guid));
+}
+
+enum nvdimm_claim_class nsl_get_claim_class(struct nvdimm_drvdata *ndd,
+					    struct nd_namespace_label *nd_label)
+{
+	if (!namespace_label_has(ndd, abstraction_guid))
+		return NVDIMM_CCLASS_NONE;
+	return to_nvdimm_cclass(&nd_label->abstraction_guid);
+}
+
 static int __pmem_label_update(struct nd_region *nd_region,
 		struct nd_mapping *nd_mapping, struct nd_namespace_pmem *nspm,
 		int pos, unsigned long flags)
@@ -797,29 +852,18 @@ static int __pmem_label_update(struct nd_region *nd_region,
 	nd_label = to_label(ndd, slot);
 	memset(nd_label, 0, sizeof_namespace_label(ndd));
 	memcpy(nd_label->uuid, nspm->uuid, NSLABEL_UUID_LEN);
-	if (nspm->alt_name)
-		memcpy(nd_label->name, nspm->alt_name, NSLABEL_NAME_LEN);
-	nd_label->flags = __cpu_to_le32(flags);
-	nd_label->nlabel = __cpu_to_le16(nd_region->ndr_mappings);
-	nd_label->position = __cpu_to_le16(pos);
-	nd_label->isetcookie = __cpu_to_le64(cookie);
-	nd_label->rawsize = __cpu_to_le64(resource_size(res));
-	nd_label->lbasize = __cpu_to_le64(nspm->lbasize);
-	nd_label->dpa = __cpu_to_le64(res->start);
-	nd_label->slot = __cpu_to_le32(slot);
-	if (namespace_label_has(ndd, type_guid))
-		guid_copy(&nd_label->type_guid, &nd_set->type_guid);
-	if (namespace_label_has(ndd, abstraction_guid))
-		guid_copy(&nd_label->abstraction_guid,
-				to_abstraction_guid(ndns->claim_class,
-					&nd_label->abstraction_guid));
-	if (namespace_label_has(ndd, checksum)) {
-		u64 sum;
-
-		nd_label->checksum = __cpu_to_le64(0);
-		sum = nd_fletcher64(nd_label, sizeof_namespace_label(ndd), 1);
-		nd_label->checksum = __cpu_to_le64(sum);
-	}
+	nsl_set_name(ndd, nd_label, nspm->alt_name);
+	nsl_set_flags(ndd, nd_label, flags);
+	nsl_set_nlabel(ndd, nd_label, nd_region->ndr_mappings);
+	nsl_set_position(ndd, nd_label, pos);
+	nsl_set_isetcookie(ndd, nd_label, cookie);
+	nsl_set_rawsize(ndd, nd_label, resource_size(res));
+	nsl_set_lbasize(ndd, nd_label, nspm->lbasize);
+	nsl_set_dpa(ndd, nd_label, res->start);
+	nsl_set_slot(ndd, nd_label, slot);
+	nsl_set_type_guid(ndd, nd_label, &nd_set->type_guid);
+	nsl_set_claim_class(ndd, nd_label, ndns->claim_class);
+	nsl_calculate_checksum(ndd, nd_label);
 	nd_dbg_dpa(nd_region, ndd, res, "\n");
 
 	/* update label */
@@ -879,9 +923,9 @@ static struct resource *to_resource(struct nvdimm_drvdata *ndd,
 	struct resource *res;
 
 	for_each_dpa_resource(ndd, res) {
-		if (res->start != __le64_to_cpu(nd_label->dpa))
+		if (res->start != nsl_get_dpa(ndd, nd_label))
 			continue;
-		if (resource_size(res) != __le64_to_cpu(nd_label->rawsize))
+		if (resource_size(res) != nsl_get_rawsize(ndd, nd_label))
 			continue;
 		return res;
 	}
@@ -890,6 +934,59 @@ static struct resource *to_resource(struct nvdimm_drvdata *ndd,
 }
 
 /*
+ * Use the presence of the type_guid as a flag to determine isetcookie
+ * usage and nlabel + position policy for blk-aperture namespaces.
+ */
+static void nsl_set_blk_isetcookie(struct nvdimm_drvdata *ndd,
+				   struct nd_namespace_label *nd_label,
+				   u64 isetcookie)
+{
+	if (namespace_label_has(ndd, type_guid)) {
+		nsl_set_isetcookie(ndd, nd_label, isetcookie);
+		return;
+	}
+	nsl_set_isetcookie(ndd, nd_label, 0); /* N/A */
+}
+
+bool nsl_validate_blk_isetcookie(struct nvdimm_drvdata *ndd,
+				 struct nd_namespace_label *nd_label,
+				 u64 isetcookie)
+{
+	if (!namespace_label_has(ndd, type_guid))
+		return true;
+
+	if (nsl_get_isetcookie(ndd, nd_label) != isetcookie) {
+		dev_dbg(ndd->dev, "expect cookie %#llx got %#llx\n", isetcookie,
+			nsl_get_isetcookie(ndd, nd_label));
+		return false;
+	}
+
+	return true;
+}
+
+static void nsl_set_blk_nlabel(struct nvdimm_drvdata *ndd,
+			       struct nd_namespace_label *nd_label, int nlabel,
+			       bool first)
+{
+	if (!namespace_label_has(ndd, type_guid)) {
+		nsl_set_nlabel(ndd, nd_label, 0); /* N/A */
+		return;
+	}
+	nsl_set_nlabel(ndd, nd_label, first ? nlabel : 0xffff);
+}
+
+static void nsl_set_blk_position(struct nvdimm_drvdata *ndd,
+				 struct nd_namespace_label *nd_label,
+				 bool first)
+{
+	if (!namespace_label_has(ndd, type_guid)) {
+		nsl_set_position(ndd, nd_label, 0);
+		return;
+	}
+	nsl_set_position(ndd, nd_label, first ? 0 : 0xffff);
+}
+
+/*
  * 1/ Account all the labels that can be freed after this update
  * 2/ Allocate and write the label to the staging (next) index
  * 3/ Record the resources in the namespace device
@@ -1017,50 +1114,21 @@ static int __blk_label_update(struct nd_region *nd_region,
 		nd_label = to_label(ndd, slot);
 		memset(nd_label, 0, sizeof_namespace_label(ndd));
 		memcpy(nd_label->uuid, nsblk->uuid, NSLABEL_UUID_LEN);
-		if (nsblk->alt_name)
-			memcpy(nd_label->name, nsblk->alt_name,
-					NSLABEL_NAME_LEN);
-		nd_label->flags = __cpu_to_le32(NSLABEL_FLAG_LOCAL);
-
-		/*
-		 * Use the presence of the type_guid as a flag to
-		 * determine isetcookie usage and nlabel + position
-		 * policy for blk-aperture namespaces.
-		 */
-		if (namespace_label_has(ndd, type_guid)) {
-			if (i == min_dpa_idx) {
-				nd_label->nlabel = __cpu_to_le16(nsblk->num_resources);
-				nd_label->position = __cpu_to_le16(0);
-			} else {
-				nd_label->nlabel = __cpu_to_le16(0xffff);
-				nd_label->position = __cpu_to_le16(0xffff);
-			}
-			nd_label->isetcookie = __cpu_to_le64(nd_set->cookie2);
-		} else {
-			nd_label->nlabel = __cpu_to_le16(0); /* N/A */
-			nd_label->position = __cpu_to_le16(0); /* N/A */
-			nd_label->isetcookie = __cpu_to_le64(0); /* N/A */
-		}
-
-		nd_label->dpa = __cpu_to_le64(res->start);
-		nd_label->rawsize = __cpu_to_le64(resource_size(res));
-		nd_label->lbasize = __cpu_to_le64(nsblk->lbasize);
-		nd_label->slot = __cpu_to_le32(slot);
-		if (namespace_label_has(ndd, type_guid))
-			guid_copy(&nd_label->type_guid, &nd_set->type_guid);
-		if (namespace_label_has(ndd, abstraction_guid))
-			guid_copy(&nd_label->abstraction_guid,
-					to_abstraction_guid(ndns->claim_class,
-						&nd_label->abstraction_guid));
-
-		if (namespace_label_has(ndd, checksum)) {
-			u64 sum;
-
-			nd_label->checksum = __cpu_to_le64(0);
-			sum = nd_fletcher64(nd_label,
-					sizeof_namespace_label(ndd), 1);
-			nd_label->checksum = __cpu_to_le64(sum);
-		}
+		nsl_set_name(ndd, nd_label, nsblk->alt_name);
+		nsl_set_flags(ndd, nd_label, NSLABEL_FLAG_LOCAL);
+
+		nsl_set_blk_nlabel(ndd, nd_label, nsblk->num_resources,
+				   i == min_dpa_idx);
+		nsl_set_blk_position(ndd, nd_label, i == min_dpa_idx);
+		nsl_set_blk_isetcookie(ndd, nd_label, nd_set->cookie2);
+
+		nsl_set_dpa(ndd, nd_label, res->start);
+		nsl_set_rawsize(ndd, nd_label, resource_size(res));
+		nsl_set_lbasize(ndd, nd_label, nsblk->lbasize);
+		nsl_set_slot(ndd, nd_label, slot);
+		nsl_set_type_guid(ndd, nd_label, &nd_set->type_guid);
+		nsl_set_claim_class(ndd, nd_label, ndns->claim_class);
+		nsl_calculate_checksum(ndd, nd_label);
 
 		/* update label */
 		offset = nd_label_offset(ndd, nd_label);
diff --git a/drivers/nvdimm/label.h b/drivers/nvdimm/label.h
index 956b6d1bd8cc..31f94fad7b92 100644
--- a/drivers/nvdimm/label.h
+++ b/drivers/nvdimm/label.h
@@ -135,7 +135,6 @@ struct nd_namespace_label *nd_label_active(struct nvdimm_drvdata *ndd, int n);
 u32 nd_label_alloc_slot(struct nvdimm_drvdata *ndd);
 bool nd_label_free_slot(struct nvdimm_drvdata *ndd, u32 slot);
 u32 nd_label_nfree(struct nvdimm_drvdata *ndd);
-enum nvdimm_claim_class to_nvdimm_cclass(guid_t *guid);
 struct nd_region;
 struct nd_namespace_pmem;
 struct nd_namespace_blk;
diff --git a/drivers/nvdimm/namespace_devs.c b/drivers/nvdimm/namespace_devs.c
index 745478213ff2..4cec171c934d 100644
--- a/drivers/nvdimm/namespace_devs.c
+++ b/drivers/nvdimm/namespace_devs.c
@@ -1235,7 +1235,7 @@ static int namespace_update_uuid(struct nd_region *nd_region,
 			if (!nd_label)
 				continue;
 			nd_label_gen_id(&label_id, nd_label->uuid,
-					__le32_to_cpu(nd_label->flags));
+					nsl_get_flags(ndd, nd_label));
 			if (strcmp(old_label_id.id, label_id.id) == 0)
 				set_bit(ND_LABEL_REAP, &label_ent->flags);
 		}
@@ -1847,28 +1847,21 @@ static bool has_uuid_at_pos(struct nd_region *nd_region, u8 *uuid,
 		list_for_each_entry(label_ent, &nd_mapping->labels, list) {
 			struct nd_namespace_label *nd_label = label_ent->label;
 			u16 position, nlabel;
-			u64 isetcookie;
 
 			if (!nd_label)
 				continue;
-			isetcookie = __le64_to_cpu(nd_label->isetcookie);
-			position = __le16_to_cpu(nd_label->position);
-			nlabel = __le16_to_cpu(nd_label->nlabel);
+			position = nsl_get_position(ndd, nd_label);
+			nlabel = nsl_get_nlabel(ndd, nd_label);
 
-			if (isetcookie != cookie)
+			if (!nsl_validate_isetcookie(ndd, nd_label, cookie))
 				continue;
 
 			if (memcmp(nd_label->uuid, uuid, NSLABEL_UUID_LEN) != 0)
 				continue;
 
-			if (namespace_label_has(ndd, type_guid)
-					&& !guid_equal(&nd_set->type_guid,
-						&nd_label->type_guid)) {
-				dev_dbg(ndd->dev, "expect type_guid %pUb got %pUb\n",
-						&nd_set->type_guid,
-						&nd_label->type_guid);
+			if (!nsl_validate_type_guid(ndd, nd_label,
+						    &nd_set->type_guid))
 				continue;
-			}
 
 			if (found_uuid) {
 				dev_dbg(ndd->dev, "duplicate entry for uuid\n");
@@ -1923,8 +1916,8 @@ static int select_pmem_id(struct nd_region *nd_region, u8 *pmem_id)
 		 */
 		hw_start = nd_mapping->start;
 		hw_end = hw_start + nd_mapping->size;
-		pmem_start = __le64_to_cpu(nd_label->dpa);
-		pmem_end = pmem_start + __le64_to_cpu(nd_label->rawsize);
+		pmem_start = nsl_get_dpa(ndd, nd_label);
+		pmem_end = pmem_start + nsl_get_rawsize(ndd, nd_label);
 		if (pmem_start >= hw_start && pmem_start < hw_end
 				&& pmem_end <= hw_end && pmem_end > hw_start)
 			/* pass */;
@@ -1947,14 +1940,16 @@ static int select_pmem_id(struct nd_region *nd_region, u8 *pmem_id)
  * @nd_label: target pmem namespace label to evaluate
  */
 static struct device *create_namespace_pmem(struct nd_region *nd_region,
-		struct nd_namespace_index *nsindex,
-		struct nd_namespace_label *nd_label)
+					    struct nd_mapping *nd_mapping,
+					    struct nd_namespace_label *nd_label)
 {
+	struct nvdimm_drvdata *ndd = to_ndd(nd_mapping);
+	struct nd_namespace_index *nsindex =
+		to_namespace_index(ndd, ndd->ns_current);
 	u64 cookie = nd_region_interleave_set_cookie(nd_region, nsindex);
 	u64 altcookie = nd_region_interleave_set_altcookie(nd_region);
 	struct nd_label_ent *label_ent;
 	struct nd_namespace_pmem *nspm;
-	struct nd_mapping *nd_mapping;
 	resource_size_t size = 0;
 	struct resource *res;
 	struct device *dev;
@@ -1966,10 +1961,10 @@ static struct device *create_namespace_pmem(struct nd_region *nd_region,
 		return ERR_PTR(-ENXIO);
 	}
 
-	if (__le64_to_cpu(nd_label->isetcookie) != cookie) {
+	if (!nsl_validate_isetcookie(ndd, nd_label, cookie)) {
 		dev_dbg(&nd_region->dev, "invalid cookie in label: %pUb\n",
 				nd_label->uuid);
-		if (__le64_to_cpu(nd_label->isetcookie) != altcookie)
+		if (!nsl_validate_isetcookie(ndd, nd_label, altcookie))
 			return ERR_PTR(-EAGAIN);
 
 		dev_dbg(&nd_region->dev, "valid altcookie in label: %pUb\n",
@@ -2037,20 +2032,18 @@ static struct device *create_namespace_pmem(struct nd_region *nd_region,
 			continue;
 		}
 
-		size += __le64_to_cpu(label0->rawsize);
-		if (__le16_to_cpu(label0->position) != 0)
+		ndd = to_ndd(nd_mapping);
+		size += nsl_get_rawsize(ndd, label0);
+		if (nsl_get_position(ndd, label0) != 0)
 			continue;
 		WARN_ON(nspm->alt_name || nspm->uuid);
-		nspm->alt_name = kmemdup((void __force *) label0->name,
-				NSLABEL_NAME_LEN, GFP_KERNEL);
+		nspm->alt_name = kmemdup(nsl_ref_name(ndd, label0),
+					 NSLABEL_NAME_LEN, GFP_KERNEL);
 		nspm->uuid = kmemdup((void __force *) label0->uuid,
 				NSLABEL_UUID_LEN, GFP_KERNEL);
-		nspm->lbasize = __le64_to_cpu(label0->lbasize);
-		ndd = to_ndd(nd_mapping);
-		if (namespace_label_has(ndd, abstraction_guid))
-			nspm->nsio.common.claim_class
-				= to_nvdimm_cclass(&label0->abstraction_guid);
-
+		nspm->lbasize = nsl_get_lbasize(ndd, label0);
+		nspm->nsio.common.claim_class =
+			nsl_get_claim_class(ndd, label0);
 	}
 
 	if (!nspm->alt_name || !nspm->uuid) {
@@ -2237,7 +2230,7 @@ static int add_namespace_resource(struct nd_region *nd_region,
 		if (is_namespace_blk(devs[i])) {
 			res = nsblk_add_resource(nd_region, ndd,
 					to_nd_namespace_blk(devs[i]),
-					__le64_to_cpu(nd_label->dpa));
+					nsl_get_dpa(ndd, nd_label));
 			if (!res)
 				return -ENXIO;
 			nd_dbg_dpa(nd_region, ndd, res, "%d assign\n", count);
@@ -2265,21 +2258,10 @@ static struct device *create_namespace_blk(struct nd_region *nd_region,
 	struct device *dev = NULL;
 	struct resource *res;
 
-	if (namespace_label_has(ndd, type_guid)) {
-		if (!guid_equal(&nd_set->type_guid, &nd_label->type_guid)) {
-			dev_dbg(ndd->dev, "expect type_guid %pUb got %pUb\n",
-					&nd_set->type_guid,
-					&nd_label->type_guid);
-			return ERR_PTR(-EAGAIN);
-		}
-
-		if (nd_label->isetcookie != __cpu_to_le64(nd_set->cookie2)) {
-			dev_dbg(ndd->dev, "expect cookie %#llx got %#llx\n",
-					nd_set->cookie2,
-					__le64_to_cpu(nd_label->isetcookie));
-			return ERR_PTR(-EAGAIN);
-		}
-	}
+	if (!nsl_validate_type_guid(ndd, nd_label, &nd_set->type_guid))
+		return ERR_PTR(-EAGAIN);
+	if (!nsl_validate_blk_isetcookie(ndd, nd_label, nd_set->cookie2))
+		return ERR_PTR(-EAGAIN);
 
 	nsblk = kzalloc(sizeof(*nsblk), GFP_KERNEL);
 	if (!nsblk)
@@ -2288,23 +2270,19 @@ static struct device *create_namespace_blk(struct nd_region *nd_region,
 	dev->type = &namespace_blk_device_type;
 	dev->parent = &nd_region->dev;
 	nsblk->id = -1;
-	nsblk->lbasize = __le64_to_cpu(nd_label->lbasize);
-	nsblk->uuid = kmemdup(nd_label->uuid, NSLABEL_UUID_LEN,
-			GFP_KERNEL);
-	if (namespace_label_has(ndd, abstraction_guid))
-		nsblk->common.claim_class
-			= to_nvdimm_cclass(&nd_label->abstraction_guid);
+	nsblk->lbasize = nsl_get_lbasize(ndd, nd_label);
+	nsblk->uuid = kmemdup(nd_label->uuid, NSLABEL_UUID_LEN, GFP_KERNEL);
+	nsblk->common.claim_class = nsl_get_claim_class(ndd, nd_label);
 	if (!nsblk->uuid)
 		goto blk_err;
-	memcpy(name, nd_label->name, NSLABEL_NAME_LEN);
+	nsl_get_name(ndd, nd_label, name);
 	if (name[0]) {
-		nsblk->alt_name = kmemdup(name, NSLABEL_NAME_LEN,
-				GFP_KERNEL);
+		nsblk->alt_name = kmemdup(name, NSLABEL_NAME_LEN, GFP_KERNEL);
 		if (!nsblk->alt_name)
 			goto blk_err;
 	}
 	res = nsblk_add_resource(nd_region, ndd, nsblk,
-			__le64_to_cpu(nd_label->dpa));
+			nsl_get_dpa(ndd, nd_label));
 	if (!res)
 		goto blk_err;
 	nd_dbg_dpa(nd_region, ndd, res, "%d: assign\n", count);
@@ -2345,6 +2323,7 @@ static struct device **scan_labels(struct nd_region *nd_region)
 	struct device *dev, **devs = NULL;
 	struct nd_label_ent *label_ent, *e;
 	struct nd_mapping *nd_mapping = &nd_region->mapping[0];
+	struct nvdimm_drvdata *ndd = to_ndd(nd_mapping);
 	resource_size_t map_end = nd_mapping->start + nd_mapping->size - 1;
 
 	/* "safe" because create_namespace_pmem() might list_move() label_ent */
@@ -2355,7 +2334,7 @@ static struct device **scan_labels(struct nd_region *nd_region)
 
 		if (!nd_label)
 			continue;
-		flags = __le32_to_cpu(nd_label->flags);
+		flags = nsl_get_flags(ndd, nd_label);
 		if (is_nd_blk(&nd_region->dev)
 				== !!(flags & NSLABEL_FLAG_LOCAL))
 			/* pass, region matches label type */;
@@ -2363,9 +2342,9 @@ static struct device **scan_labels(struct nd_region *nd_region)
 			continue;
 
 		/* skip labels that describe extents outside of the region */
-		if (__le64_to_cpu(nd_label->dpa) < nd_mapping->start ||
-		    __le64_to_cpu(nd_label->dpa) > map_end)
-				continue;
+		if (nsl_get_dpa(ndd, nd_label) < nd_mapping->start ||
+		    nsl_get_dpa(ndd, nd_label) > map_end)
+			continue;
 
 		i = add_namespace_resource(nd_region, nd_label, devs, count);
 		if (i < 0)
@@ -2381,13 +2360,9 @@ static struct device **scan_labels(struct nd_region *nd_region)
 
 		if (is_nd_blk(&nd_region->dev))
 			dev = create_namespace_blk(nd_region, nd_label, count);
-		else {
-			struct nvdimm_drvdata *ndd = to_ndd(nd_mapping);
-			struct nd_namespace_index *nsindex;
-
-			nsindex = to_namespace_index(ndd, ndd->ns_current);
-			dev = create_namespace_pmem(nd_region, nsindex, nd_label);
-		}
+		else
+			dev = create_namespace_pmem(nd_region, nd_mapping,
+						    nd_label);
 
 		if (IS_ERR(dev)) {
 			switch (PTR_ERR(dev)) {
@@ -2571,10 +2546,10 @@ static int init_active_labels(struct nd_region *nd_region)
 				break;
 			label = nd_label_active(ndd, j);
 			if (test_bit(NDD_NOBLK, &nvdimm->flags)) {
-				u32 flags = __le32_to_cpu(label->flags);
+				u32 flags = nsl_get_flags(ndd, label);
 
 				flags &= ~NSLABEL_FLAG_LOCAL;
-				label->flags = __cpu_to_le32(flags);
+				nsl_set_flags(ndd, label, flags);
 			}
 			label_ent->label = label;
 
diff --git a/drivers/nvdimm/nd.h b/drivers/nvdimm/nd.h
index 696b55556d4d..5467ebbb4a6b 100644
--- a/drivers/nvdimm/nd.h
+++ b/drivers/nvdimm/nd.h
@@ -35,6 +35,156 @@ struct nvdimm_drvdata {
 	struct kref kref;
 };
 
+static inline const u8 *nsl_ref_name(struct nvdimm_drvdata *ndd,
+				     struct nd_namespace_label *nd_label)
+{
+	return nd_label->name;
+}
+
+static inline u8 *nsl_get_name(struct nvdimm_drvdata *ndd,
+			       struct nd_namespace_label *nd_label, u8 *name)
+{
+	return memcpy(name, nd_label->name, NSLABEL_NAME_LEN);
+}
+
+static inline u8 *nsl_set_name(struct nvdimm_drvdata *ndd,
+			       struct nd_namespace_label *nd_label, u8 *name)
+{
+	if (!name)
+		return NULL;
+	return memcpy(nd_label->name, name, NSLABEL_NAME_LEN);
+}
+
+static inline u32 nsl_get_slot(struct nvdimm_drvdata *ndd,
+			       struct nd_namespace_label *nd_label)
+{
+	return __le32_to_cpu(nd_label->slot);
+}
+
+static inline void nsl_set_slot(struct nvdimm_drvdata *ndd,
+				struct nd_namespace_label *nd_label, u32 slot)
+{
+	nd_label->slot = __cpu_to_le32(slot);
+}
+
+static inline u64 nsl_get_checksum(struct nvdimm_drvdata *ndd,
+				   struct nd_namespace_label *nd_label)
+{
+	return __le64_to_cpu(nd_label->checksum);
+}
+
+static inline void nsl_set_checksum(struct nvdimm_drvdata *ndd,
+				    struct nd_namespace_label *nd_label,
+				    u64 checksum)
+{
+	nd_label->checksum = __cpu_to_le64(checksum);
+}
+
+static inline u32 nsl_get_flags(struct nvdimm_drvdata *ndd,
+				struct nd_namespace_label *nd_label)
+{
+	return __le32_to_cpu(nd_label->flags);
+}
+
+static inline void nsl_set_flags(struct nvdimm_drvdata *ndd,
+				 struct nd_namespace_label *nd_label, u32 flags)
+{
+	nd_label->flags = __cpu_to_le32(flags);
+}
+
+static inline u64 nsl_get_dpa(struct nvdimm_drvdata *ndd,
+			      struct nd_namespace_label *nd_label)
+{
+	return __le64_to_cpu(nd_label->dpa);
+}
+
+static inline void nsl_set_dpa(struct nvdimm_drvdata *ndd,
+			       struct nd_namespace_label *nd_label, u64 dpa)
+{
+	nd_label->dpa = __cpu_to_le64(dpa);
+}
+
+static inline u64 nsl_get_rawsize(struct nvdimm_drvdata *ndd,
+				  struct nd_namespace_label *nd_label)
+{
+	return __le64_to_cpu(nd_label->rawsize);
+}
+
+static inline void nsl_set_rawsize(struct nvdimm_drvdata *ndd,
+				   struct nd_namespace_label *nd_label,
+				   u64 rawsize)
+{
+	nd_label->rawsize = __cpu_to_le64(rawsize);
+}
+
+static inline u64 nsl_get_isetcookie(struct nvdimm_drvdata *ndd,
+				     struct nd_namespace_label *nd_label)
+{
+	return __le64_to_cpu(nd_label->isetcookie);
+}
+
+static inline void nsl_set_isetcookie(struct nvdimm_drvdata *ndd,
+				      struct nd_namespace_label *nd_label,
+				      u64 isetcookie)
+{
+	nd_label->isetcookie = __cpu_to_le64(isetcookie);
+}
+
+static inline bool nsl_validate_isetcookie(struct nvdimm_drvdata *ndd,
+					   struct nd_namespace_label *nd_label,
+					   u64 cookie)
+{
+	return cookie == __le64_to_cpu(nd_label->isetcookie);
+}
+
+static inline u16 nsl_get_position(struct nvdimm_drvdata *ndd,
+				   struct nd_namespace_label *nd_label)
+{
+	return __le16_to_cpu(nd_label->position);
+}
+
+static inline void nsl_set_position(struct nvdimm_drvdata *ndd,
+				    struct nd_namespace_label *nd_label,
+				    u16 position)
+{
+	nd_label->position = __cpu_to_le16(position);
+}
+
+
+static inline u16 nsl_get_nlabel(struct nvdimm_drvdata *ndd,
+				 struct nd_namespace_label *nd_label)
+{
+	return __le16_to_cpu(nd_label->nlabel);
+}
+
+static inline void nsl_set_nlabel(struct nvdimm_drvdata *ndd,
+				  struct nd_namespace_label *nd_label,
+				  u16 nlabel)
+{
+	nd_label->nlabel = __cpu_to_le16(nlabel);
+}
+
+static inline u64 nsl_get_lbasize(struct nvdimm_drvdata *ndd,
+				  struct nd_namespace_label *nd_label)
+{
+	return __le64_to_cpu(nd_label->lbasize);
+}
+
+static inline void nsl_set_lbasize(struct nvdimm_drvdata *ndd,
+				   struct nd_namespace_label *nd_label,
+				   u64 lbasize)
+{
+	nd_label->lbasize = __cpu_to_le64(lbasize);
+}
+
+bool nsl_validate_blk_isetcookie(struct nvdimm_drvdata *ndd,
+				 struct nd_namespace_label *nd_label,
+				 u64 isetcookie);
+bool nsl_validate_type_guid(struct nvdimm_drvdata *ndd,
+			    struct nd_namespace_label *nd_label, guid_t *guid);
+enum nvdimm_claim_class nsl_get_claim_class(struct nvdimm_drvdata *ndd,
+					    struct nd_namespace_label *nd_label);
+
 struct nd_region_data {
 	int ns_count;
 	int ns_active;
diff --git a/drivers/nvdimm/pmem.c b/drivers/nvdimm/pmem.c
index 1e0615b8565e..72de88ff0d30 100644
--- a/drivers/nvdimm/pmem.c
+++ b/drivers/nvdimm/pmem.c
@@ -450,11 +450,11 @@ static int pmem_attach_disk(struct device *dev,
 		pmem->pfn_flags |= PFN_MAP;
 		bb_range = pmem->pgmap.range;
 	} else {
+		addr = devm_memremap(dev, pmem->phys_addr,
+				pmem->size, ARCH_MEMREMAP_PMEM);
 		if (devm_add_action_or_reset(dev, pmem_release_queue,
 					&pmem->pgmap))
 			return -ENOMEM;
-		addr = devm_memremap(dev, pmem->phys_addr,
-				pmem->size, ARCH_MEMREMAP_PMEM);
 		bb_range.start =  res->start;
 		bb_range.end = res->end;
 	}
diff --git a/drivers/nvme/host/core.c b/drivers/nvme/host/core.c
index 8679a108f571..7efb31b87f37 100644
--- a/drivers/nvme/host/core.c
+++ b/drivers/nvme/host/core.c
@@ -116,6 +116,8 @@ static struct class *nvme_ns_chr_class;
 static void nvme_put_subsystem(struct nvme_subsystem *subsys);
 static void nvme_remove_invalid_namespaces(struct nvme_ctrl *ctrl,
 					   unsigned nsid);
+static void nvme_update_keep_alive(struct nvme_ctrl *ctrl,
+				   struct nvme_command *cmd);
 
 /*
  * Prepare a queue for teardown.
@@ -1152,7 +1154,8 @@ static u32 nvme_passthru_start(struct nvme_ctrl *ctrl, struct nvme_ns *ns,
 	return effects;
 }
 
-static void nvme_passthru_end(struct nvme_ctrl *ctrl, u32 effects)
+static void nvme_passthru_end(struct nvme_ctrl *ctrl, u32 effects,
+			      struct nvme_command *cmd, int status)
 {
 	if (effects & NVME_CMD_EFFECTS_CSE_MASK) {
 		nvme_unfreeze(ctrl);
@@ -1167,6 +1170,26 @@ static void nvme_passthru_end(struct nvme_ctrl *ctrl, u32 effects)
 		nvme_queue_scan(ctrl);
 		flush_work(&ctrl->scan_work);
 	}
+
+	switch (cmd->common.opcode) {
+	case nvme_admin_set_features:
+		switch (le32_to_cpu(cmd->common.cdw10) & 0xFF) {
+		case NVME_FEAT_KATO:
+			/*
+			 * Keep alive commands interval on the host should be
+			 * updated when KATO is modified by Set Features
+			 * commands.
+			 */
+			if (!status)
+				nvme_update_keep_alive(ctrl, cmd);
+			break;
+		default:
+			break;
+		}
+		break;
+	default:
+		break;
+	}
 }
 
 int nvme_execute_passthru_rq(struct request *rq)
@@ -1181,7 +1204,7 @@ int nvme_execute_passthru_rq(struct request *rq)
 	effects = nvme_passthru_start(ctrl, ns, cmd->common.opcode);
 	ret = nvme_execute_rq(disk, rq, false);
 	if (effects) /* nothing to be done for zero cmd effects */
-		nvme_passthru_end(ctrl, effects);
+		nvme_passthru_end(ctrl, effects, cmd, ret);
 
 	return ret;
 }
@@ -1269,6 +1292,21 @@ void nvme_stop_keep_alive(struct nvme_ctrl *ctrl)
 }
 EXPORT_SYMBOL_GPL(nvme_stop_keep_alive);
 
+static void nvme_update_keep_alive(struct nvme_ctrl *ctrl,
+				   struct nvme_command *cmd)
+{
+	unsigned int new_kato =
+		DIV_ROUND_UP(le32_to_cpu(cmd->common.cdw11), 1000);
+
+	dev_info(ctrl->device,
+		 "keep alive interval updated from %u ms to %u ms\n",
+		 ctrl->kato * 1000 / 2, new_kato * 1000 / 2);
+
+	nvme_stop_keep_alive(ctrl);
+	ctrl->kato = new_kato;
+	nvme_start_keep_alive(ctrl);
+}
+
 /*
  * In NVMe 1.0 the CNS field was just a binary controller or namespace
  * flag, thus sending any new CNS opcodes has a big chance of not working.
@@ -1302,11 +1340,6 @@ static int nvme_identify_ctrl(struct nvme_ctrl *dev, struct nvme_id_ctrl **id)
 	return error;
 }
 
-static bool nvme_multi_css(struct nvme_ctrl *ctrl)
-{
-	return (ctrl->ctrl_config & NVME_CC_CSS_MASK) == NVME_CC_CSS_CSI;
-}
-
 static int nvme_process_ns_desc(struct nvme_ctrl *ctrl, struct nvme_ns_ids *ids,
 		struct nvme_ns_id_desc *cur, bool *csi_seen)
 {
@@ -1874,6 +1907,7 @@ static int nvme_update_ns_info(struct nvme_ns *ns, struct nvme_id_ns *id)
 			goto out_unfreeze;
 	}
 
+	set_bit(NVME_NS_READY, &ns->flags);
 	blk_mq_unfreeze_queue(ns->disk->queue);
 
 	if (blk_queue_is_zoned(ns->queue)) {
@@ -1885,6 +1919,7 @@ static int nvme_update_ns_info(struct nvme_ns *ns, struct nvme_id_ns *id)
 	if (nvme_ns_head_multipath(ns->head)) {
 		blk_mq_freeze_queue(ns->head->disk->queue);
 		nvme_update_disk_info(ns->head->disk, ns, id);
+		nvme_mpath_revalidate_paths(ns);
 		blk_stack_limits(&ns->head->disk->queue->limits,
 				 &ns->queue->limits, 0);
 		disk_update_readahead(ns->head->disk);
@@ -3763,7 +3798,9 @@ static void nvme_alloc_ns(struct nvme_ctrl *ctrl, unsigned nsid,
 
 	nvme_get_ctrl(ctrl);
 
-	device_add_disk(ctrl->device, ns->disk, nvme_ns_id_attr_groups);
+	if (device_add_disk(ctrl->device, ns->disk, nvme_ns_id_attr_groups))
+		goto out_cleanup_ns_from_list;
+
 	if (!nvme_ns_head_multipath(ns->head))
 		nvme_add_ns_cdev(ns);
 
@@ -3773,6 +3810,11 @@ static void nvme_alloc_ns(struct nvme_ctrl *ctrl, unsigned nsid,
 
 	return;
 
+ out_cleanup_ns_from_list:
+	nvme_put_ctrl(ctrl);
+	down_write(&ctrl->namespaces_rwsem);
+	list_del_init(&ns->list);
+	up_write(&ctrl->namespaces_rwsem);
  out_unlink_ns:
 	mutex_lock(&ctrl->subsys->lock);
 	list_del_rcu(&ns->siblings);
@@ -3795,6 +3837,7 @@ static void nvme_ns_remove(struct nvme_ns *ns)
 	if (test_and_set_bit(NVME_NS_REMOVING, &ns->flags))
 		return;
 
+	clear_bit(NVME_NS_READY, &ns->flags);
 	set_capacity(ns->disk, 0);
 	nvme_fault_inject_fini(&ns->fault_inject);
 
@@ -3802,9 +3845,12 @@ static void nvme_ns_remove(struct nvme_ns *ns)
 	list_del_rcu(&ns->siblings);
 	mutex_unlock(&ns->ctrl->subsys->lock);
 
-	synchronize_rcu(); /* guarantee not available in head->list */
-	nvme_mpath_clear_current_path(ns);
-	synchronize_srcu(&ns->head->srcu); /* wait for concurrent submissions */
+	/* guarantee not available in head->list */
+	synchronize_rcu();
+
+	/* wait for concurrent submissions */
+	if (nvme_mpath_clear_current_path(ns))
+		synchronize_srcu(&ns->head->srcu);
 
 	if (!nvme_ns_head_multipath(ns->head))
 		nvme_cdev_del(&ns->cdev, &ns->cdev_device);
diff --git a/drivers/nvme/host/multipath.c b/drivers/nvme/host/multipath.c
index 37ce3e8b1db2..5d7bc58a27bd 100644
--- a/drivers/nvme/host/multipath.c
+++ b/drivers/nvme/host/multipath.c
@@ -147,6 +147,21 @@ void nvme_mpath_clear_ctrl_paths(struct nvme_ctrl *ctrl)
 	mutex_unlock(&ctrl->scan_lock);
 }
 
+void nvme_mpath_revalidate_paths(struct nvme_ns *ns)
+{
+	struct nvme_ns_head *head = ns->head;
+	sector_t capacity = get_capacity(head->disk);
+	int node;
+
+	list_for_each_entry_rcu(ns, &head->list, siblings) {
+		if (capacity != get_capacity(ns->disk))
+			clear_bit(NVME_NS_READY, &ns->flags);
+	}
+
+	for_each_node(node)
+		rcu_assign_pointer(head->current_path[node], NULL);
+}
+
 static bool nvme_path_is_disabled(struct nvme_ns *ns)
 {
 	/*
@@ -158,7 +173,7 @@ static bool nvme_path_is_disabled(struct nvme_ns *ns)
 	    ns->ctrl->state != NVME_CTRL_DELETING)
 		return true;
 	if (test_bit(NVME_NS_ANA_PENDING, &ns->flags) ||
-	    test_bit(NVME_NS_REMOVING, &ns->flags))
+	    !test_bit(NVME_NS_READY, &ns->flags))
 		return true;
 	return false;
 }
@@ -465,6 +480,8 @@ int nvme_mpath_alloc_disk(struct nvme_ctrl *ctrl, struct nvme_ns_head *head)
 			ctrl->subsys->instance, head->instance);
 
 	blk_queue_flag_set(QUEUE_FLAG_NONROT, head->disk->queue);
+	blk_queue_flag_set(QUEUE_FLAG_NOWAIT, head->disk->queue);
+
 	/* set to a default value of 512 until the disk is validated */
 	blk_queue_logical_block_size(head->disk->queue, 512);
 	blk_set_stacking_limits(&head->disk->queue->limits);
diff --git a/drivers/nvme/host/nvme.h b/drivers/nvme/host/nvme.h
index a2e1f298b217..9871c0c9374c 100644
--- a/drivers/nvme/host/nvme.h
+++ b/drivers/nvme/host/nvme.h
@@ -456,6 +456,7 @@ struct nvme_ns {
 #define NVME_NS_DEAD     	1
 #define NVME_NS_ANA_PENDING	2
 #define NVME_NS_FORCE_RO	3
+#define NVME_NS_READY		4
 
 	struct cdev		cdev;
 	struct device		cdev_device;
@@ -748,6 +749,7 @@ void nvme_mpath_init_ctrl(struct nvme_ctrl *ctrl);
 void nvme_mpath_uninit(struct nvme_ctrl *ctrl);
 void nvme_mpath_stop(struct nvme_ctrl *ctrl);
 bool nvme_mpath_clear_current_path(struct nvme_ns *ns);
+void nvme_mpath_revalidate_paths(struct nvme_ns *ns);
 void nvme_mpath_clear_ctrl_paths(struct nvme_ctrl *ctrl);
 void nvme_mpath_shutdown_disk(struct nvme_ns_head *head);
 
@@ -795,6 +797,9 @@ static inline bool nvme_mpath_clear_current_path(struct nvme_ns *ns)
 {
 	return false;
 }
+static inline void nvme_mpath_revalidate_paths(struct nvme_ns *ns)
+{
+}
 static inline void nvme_mpath_clear_ctrl_paths(struct nvme_ctrl *ctrl)
 {
 }
@@ -887,4 +892,9 @@ struct nvme_ctrl *nvme_ctrl_from_file(struct file *file);
 struct nvme_ns *nvme_find_get_ns(struct nvme_ctrl *ctrl, unsigned nsid);
 void nvme_put_ns(struct nvme_ns *ns);
 
+static inline bool nvme_multi_css(struct nvme_ctrl *ctrl)
+{
+	return (ctrl->ctrl_config & NVME_CC_CSS_MASK) == NVME_CC_CSS_CSI;
+}
+
 #endif /* _NVME_H */
diff --git a/drivers/nvme/host/tcp.c b/drivers/nvme/host/tcp.c
index 645025620154..e2ab12f3f51c 100644
--- a/drivers/nvme/host/tcp.c
+++ b/drivers/nvme/host/tcp.c
@@ -45,6 +45,7 @@ struct nvme_tcp_request {
 	u32			pdu_len;
 	u32			pdu_sent;
 	u16			ttag;
+	__le16			status;
 	struct list_head	entry;
 	struct llist_node	lentry;
 	__le32			ddgst;
@@ -485,6 +486,7 @@ static void nvme_tcp_error_recovery(struct nvme_ctrl *ctrl)
 static int nvme_tcp_process_nvme_cqe(struct nvme_tcp_queue *queue,
 		struct nvme_completion *cqe)
 {
+	struct nvme_tcp_request *req;
 	struct request *rq;
 
 	rq = nvme_find_rq(nvme_tcp_tagset(queue), cqe->command_id);
@@ -496,7 +498,11 @@ static int nvme_tcp_process_nvme_cqe(struct nvme_tcp_queue *queue,
 		return -EINVAL;
 	}
 
-	if (!nvme_try_complete_req(rq, cqe->status, cqe->result))
+	req = blk_mq_rq_to_pdu(rq);
+	if (req->status == cpu_to_le16(NVME_SC_SUCCESS))
+		req->status = cqe->status;
+
+	if (!nvme_try_complete_req(rq, req->status, cqe->result))
 		nvme_complete_rq(rq);
 	queue->nr_cqe++;
 
@@ -758,7 +764,8 @@ static int nvme_tcp_recv_data(struct nvme_tcp_queue *queue, struct sk_buff *skb,
 			queue->ddgst_remaining = NVME_TCP_DIGEST_LENGTH;
 		} else {
 			if (pdu->hdr.flags & NVME_TCP_F_DATA_SUCCESS) {
-				nvme_tcp_end_request(rq, NVME_SC_SUCCESS);
+				nvme_tcp_end_request(rq,
+						le16_to_cpu(req->status));
 				queue->nr_cqe++;
 			}
 			nvme_tcp_init_recv_ctx(queue);
@@ -788,18 +795,24 @@ static int nvme_tcp_recv_ddgst(struct nvme_tcp_queue *queue,
 		return 0;
 
 	if (queue->recv_ddgst != queue->exp_ddgst) {
+		struct request *rq = nvme_cid_to_rq(nvme_tcp_tagset(queue),
+					pdu->command_id);
+		struct nvme_tcp_request *req = blk_mq_rq_to_pdu(rq);
+
+		req->status = cpu_to_le16(NVME_SC_DATA_XFER_ERROR);
+
 		dev_err(queue->ctrl->ctrl.device,
 			"data digest error: recv %#x expected %#x\n",
 			le32_to_cpu(queue->recv_ddgst),
 			le32_to_cpu(queue->exp_ddgst));
-		return -EIO;
 	}
 
 	if (pdu->hdr.flags & NVME_TCP_F_DATA_SUCCESS) {
 		struct request *rq = nvme_cid_to_rq(nvme_tcp_tagset(queue),
 					pdu->command_id);
+		struct nvme_tcp_request *req = blk_mq_rq_to_pdu(rq);
 
-		nvme_tcp_end_request(rq, NVME_SC_SUCCESS);
+		nvme_tcp_end_request(rq, le16_to_cpu(req->status));
 		queue->nr_cqe++;
 	}
 
@@ -2293,6 +2306,7 @@ static blk_status_t nvme_tcp_setup_cmd_pdu(struct nvme_ns *ns,
 		return ret;
 
 	req->state = NVME_TCP_SEND_CMD_PDU;
+	req->status = cpu_to_le16(NVME_SC_SUCCESS);
 	req->offset = 0;
 	req->data_sent = 0;
 	req->pdu_len = 0;
diff --git a/drivers/nvme/target/admin-cmd.c b/drivers/nvme/target/admin-cmd.c
index 0cb98f2bbc8c..aa6d84d8848e 100644
--- a/drivers/nvme/target/admin-cmd.c
+++ b/drivers/nvme/target/admin-cmd.c
@@ -1015,7 +1015,7 @@ u16 nvmet_parse_admin_cmd(struct nvmet_req *req)
 	if (unlikely(ret))
 		return ret;
 
-	if (nvmet_req_passthru_ctrl(req))
+	if (nvmet_is_passthru_req(req))
 		return nvmet_parse_passthru_admin_cmd(req);
 
 	switch (cmd->common.opcode) {
diff --git a/drivers/nvme/target/configfs.c b/drivers/nvme/target/configfs.c
index 273555127188..d784f3c200b4 100644
--- a/drivers/nvme/target/configfs.c
+++ b/drivers/nvme/target/configfs.c
@@ -1028,7 +1028,7 @@ nvmet_subsys_attr_version_store_locked(struct nvmet_subsys *subsys,
 	}
 
 	/* passthru subsystems use the underlying controller's version */
-	if (nvmet_passthru_ctrl(subsys))
+	if (nvmet_is_passthru_subsys(subsys))
 		return -EINVAL;
 
 	ret = sscanf(page, "%d.%d.%d\n", &major, &minor, &tertiary);
@@ -1067,7 +1067,8 @@ static ssize_t nvmet_subsys_attr_serial_show(struct config_item *item,
 {
 	struct nvmet_subsys *subsys = to_subsys(item);
 
-	return snprintf(page, PAGE_SIZE, "%s\n", subsys->serial);
+	return snprintf(page, PAGE_SIZE, "%*s\n",
+			NVMET_SN_MAX_SIZE, subsys->serial);
 }
 
 static ssize_t
diff --git a/drivers/nvme/target/core.c b/drivers/nvme/target/core.c
index 66d05eecc2a9..b8425fa34300 100644
--- a/drivers/nvme/target/core.c
+++ b/drivers/nvme/target/core.c
@@ -553,7 +553,7 @@ int nvmet_ns_enable(struct nvmet_ns *ns)
 	mutex_lock(&subsys->lock);
 	ret = 0;
 
-	if (nvmet_passthru_ctrl(subsys)) {
+	if (nvmet_is_passthru_subsys(subsys)) {
 		pr_info("cannot enable both passthru and regular namespaces for a single subsystem");
 		goto out_unlock;
 	}
@@ -869,7 +869,7 @@ static u16 nvmet_parse_io_cmd(struct nvmet_req *req)
 	if (unlikely(ret))
 		return ret;
 
-	if (nvmet_req_passthru_ctrl(req))
+	if (nvmet_is_passthru_req(req))
 		return nvmet_parse_passthru_io_cmd(req);
 
 	ret = nvmet_req_find_ns(req);
@@ -1206,6 +1206,9 @@ static void nvmet_init_cap(struct nvmet_ctrl *ctrl)
 	ctrl->cap |= (15ULL << 24);
 	/* maximum queue entries supported: */
 	ctrl->cap |= NVMET_QUEUE_SIZE - 1;
+
+	if (nvmet_is_passthru_subsys(ctrl->subsys))
+		nvmet_passthrough_override_cap(ctrl);
 }
 
 struct nvmet_ctrl *nvmet_ctrl_find_get(const char *subsysnqn,
@@ -1363,8 +1366,6 @@ u16 nvmet_alloc_ctrl(const char *subsysnqn, const char *hostnqn,
 		goto out_put_subsystem;
 	mutex_init(&ctrl->lock);
 
-	nvmet_init_cap(ctrl);
-
 	ctrl->port = req->port;
 
 	INIT_WORK(&ctrl->async_event_work, nvmet_async_event_work);
@@ -1378,6 +1379,7 @@ u16 nvmet_alloc_ctrl(const char *subsysnqn, const char *hostnqn,
 
 	kref_init(&ctrl->ref);
 	ctrl->subsys = subsys;
+	nvmet_init_cap(ctrl);
 	WRITE_ONCE(ctrl->aen_enabled, NVMET_AEN_CFG_OPTIONAL);
 
 	ctrl->changed_ns_list = kmalloc_array(NVME_MAX_CHANGED_NAMESPACES,
diff --git a/drivers/nvme/target/nvmet.h b/drivers/nvme/target/nvmet.h
index 06dd3d537f07..7143c7fa7464 100644
--- a/drivers/nvme/target/nvmet.h
+++ b/drivers/nvme/target/nvmet.h
@@ -582,7 +582,7 @@ int nvmet_passthru_ctrl_enable(struct nvmet_subsys *subsys);
 void nvmet_passthru_ctrl_disable(struct nvmet_subsys *subsys);
 u16 nvmet_parse_passthru_admin_cmd(struct nvmet_req *req);
 u16 nvmet_parse_passthru_io_cmd(struct nvmet_req *req);
-static inline struct nvme_ctrl *nvmet_passthru_ctrl(struct nvmet_subsys *subsys)
+static inline bool nvmet_is_passthru_subsys(struct nvmet_subsys *subsys)
 {
 	return subsys->passthru_ctrl;
 }
@@ -601,18 +601,19 @@ static inline u16 nvmet_parse_passthru_io_cmd(struct nvmet_req *req)
 {
 	return 0;
 }
-static inline struct nvme_ctrl *nvmet_passthru_ctrl(struct nvmet_subsys *subsys)
+static inline bool nvmet_is_passthru_subsys(struct nvmet_subsys *subsys)
 {
 	return NULL;
 }
 #endif /* CONFIG_NVME_TARGET_PASSTHRU */
 
-static inline struct nvme_ctrl *
-nvmet_req_passthru_ctrl(struct nvmet_req *req)
+static inline bool nvmet_is_passthru_req(struct nvmet_req *req)
 {
-	return nvmet_passthru_ctrl(nvmet_req_subsys(req));
+	return nvmet_is_passthru_subsys(nvmet_req_subsys(req));
 }
 
+void nvmet_passthrough_override_cap(struct nvmet_ctrl *ctrl);
+
 u16 errno_to_nvme_status(struct nvmet_req *req, int errno);
 u16 nvmet_report_invalid_opcode(struct nvmet_req *req);
 
diff --git a/drivers/nvme/target/passthru.c b/drivers/nvme/target/passthru.c
index 225cd1ffbe45..f0efb3537989 100644
--- a/drivers/nvme/target/passthru.c
+++ b/drivers/nvme/target/passthru.c
@@ -20,6 +20,16 @@ MODULE_IMPORT_NS(NVME_TARGET_PASSTHRU);
  */
 static DEFINE_XARRAY(passthru_subsystems);
 
+void nvmet_passthrough_override_cap(struct nvmet_ctrl *ctrl)
+{
+	/*
+	 * Multiple command set support can only be declared if the underlying
+	 * controller actually supports it.
+	 */
+	if (!nvme_multi_css(ctrl->subsys->passthru_ctrl))
+		ctrl->cap &= ~(1ULL << 43);
+}
+
 static u16 nvmet_passthru_override_id_ctrl(struct nvmet_req *req)
 {
 	struct nvmet_ctrl *ctrl = req->sq->ctrl;
@@ -218,7 +228,7 @@ static int nvmet_passthru_map_sg(struct nvmet_req *req, struct request *rq)
 
 static void nvmet_passthru_execute_cmd(struct nvmet_req *req)
 {
-	struct nvme_ctrl *ctrl = nvmet_req_passthru_ctrl(req);
+	struct nvme_ctrl *ctrl = nvmet_req_subsys(req)->passthru_ctrl;
 	struct request_queue *q = ctrl->admin_q;
 	struct nvme_ns *ns = NULL;
 	struct request *rq = NULL;
@@ -299,7 +309,7 @@ out:
  */
 static void nvmet_passthru_set_host_behaviour(struct nvmet_req *req)
 {
-	struct nvme_ctrl *ctrl = nvmet_req_passthru_ctrl(req);
+	struct nvme_ctrl *ctrl = nvmet_req_subsys(req)->passthru_ctrl;
 	struct nvme_feat_host_behavior *host;
 	u16 status = NVME_SC_INTERNAL;
 	int ret;
diff --git a/drivers/of/device.c b/drivers/of/device.c
index c5a9473a5fb1..5b043ee30824 100644
--- a/drivers/of/device.c
+++ b/drivers/of/device.c
@@ -5,6 +5,7 @@
 #include <linux/of_device.h>
 #include <linux/of_address.h>
 #include <linux/of_iommu.h>
+#include <linux/of_reserved_mem.h>
 #include <linux/dma-direct.h> /* for bus_dma_region */
 #include <linux/dma-map-ops.h>
 #include <linux/init.h>
@@ -52,6 +53,42 @@ int of_device_add(struct platform_device *ofdev)
 	return device_add(&ofdev->dev);
 }
 
+static void
+of_dma_set_restricted_buffer(struct device *dev, struct device_node *np)
+{
+	struct device_node *node, *of_node = dev->of_node;
+	int count, i;
+
+	if (!IS_ENABLED(CONFIG_DMA_RESTRICTED_POOL))
+		return;
+
+	count = of_property_count_elems_of_size(of_node, "memory-region",
+						sizeof(u32));
+	/*
+	 * If dev->of_node doesn't exist or doesn't contain memory-region, try
+	 * the OF node having DMA configuration.
+	 */
+	if (count <= 0) {
+		of_node = np;
+		count = of_property_count_elems_of_size(
+			of_node, "memory-region", sizeof(u32));
+	}
+
+	for (i = 0; i < count; i++) {
+		node = of_parse_phandle(of_node, "memory-region", i);
+		/*
+		 * There might be multiple memory regions, but only one
+		 * restricted-dma-pool region is allowed.
+		 */
+		if (of_device_is_compatible(node, "restricted-dma-pool") &&
+		    of_device_is_available(node))
+			break;
+	}
+
+	if (i != count && of_reserved_mem_device_init_by_idx(dev, of_node, i))
+		dev_warn(dev, "failed to initialise \"restricted-dma-pool\" memory node\n");
+}
+
 /**
  * of_dma_configure_id - Setup DMA configuration
  * @dev:	Device to apply DMA configuration
@@ -165,6 +202,9 @@ int of_dma_configure_id(struct device *dev, struct device_node *np,
 
 	arch_setup_dma_ops(dev, dma_start, size, iommu, coherent);
 
+	if (!iommu)
+		of_dma_set_restricted_buffer(dev, np);
+
 	return 0;
 }
 EXPORT_SYMBOL_GPL(of_dma_configure_id);
diff --git a/drivers/of/of_reserved_mem.c b/drivers/of/of_reserved_mem.c
index fd3964d24224..59c1390cdf42 100644
--- a/drivers/of/of_reserved_mem.c
+++ b/drivers/of/of_reserved_mem.c
@@ -33,18 +33,22 @@ static int __init early_init_dt_alloc_reserved_memory_arch(phys_addr_t size,
 	phys_addr_t *res_base)
 {
 	phys_addr_t base;
+	int err = 0;
 
 	end = !end ? MEMBLOCK_ALLOC_ANYWHERE : end;
 	align = !align ? SMP_CACHE_BYTES : align;
-	base = memblock_find_in_range(start, end, size, align);
+	base = memblock_phys_alloc_range(size, align, start, end);
 	if (!base)
 		return -ENOMEM;
 
 	*res_base = base;
-	if (nomap)
-		return memblock_mark_nomap(base, size);
+	if (nomap) {
+		err = memblock_mark_nomap(base, size);
+		if (err)
+			memblock_free(base, size);
+	}
 
-	return memblock_reserve(base, size);
+	return err;
 }
 
 /*
diff --git a/drivers/of/property.c b/drivers/of/property.c
index 0c0dc2e369c0..3fd74bb34819 100644
--- a/drivers/of/property.c
+++ b/drivers/of/property.c
@@ -1444,6 +1444,9 @@ static int of_fwnode_add_links(struct fwnode_handle *fwnode)
 	struct property *p;
 	struct device_node *con_np = to_of_node(fwnode);
 
+	if (IS_ENABLED(CONFIG_X86))
+		return 0;
+
 	if (!con_np)
 		return -EINVAL;
 
diff --git a/drivers/parisc/ccio-dma.c b/drivers/parisc/ccio-dma.c
index b5f9ee81a46c..059566f54429 100644
--- a/drivers/parisc/ccio-dma.c
+++ b/drivers/parisc/ccio-dma.c
@@ -111,29 +111,29 @@
 #define CMD_TLB_PURGE        33         /* IO_COMMAND to Purge I/O TLB entry */
 
 struct ioa_registers {
-        /* Runway Supervisory Set */
-        int32_t    unused1[12];
-        uint32_t   io_command;             /* Offset 12 */
-        uint32_t   io_status;              /* Offset 13 */
-        uint32_t   io_control;             /* Offset 14 */
-        int32_t    unused2[1];
-
-        /* Runway Auxiliary Register Set */
-        uint32_t   io_err_resp;            /* Offset  0 */
-        uint32_t   io_err_info;            /* Offset  1 */
-        uint32_t   io_err_req;             /* Offset  2 */
-        uint32_t   io_err_resp_hi;         /* Offset  3 */
-        uint32_t   io_tlb_entry_m;         /* Offset  4 */
-        uint32_t   io_tlb_entry_l;         /* Offset  5 */
-        uint32_t   unused3[1];
-        uint32_t   io_pdir_base;           /* Offset  7 */
-        uint32_t   io_io_low_hv;           /* Offset  8 */
-        uint32_t   io_io_high_hv;          /* Offset  9 */
-        uint32_t   unused4[1];
-        uint32_t   io_chain_id_mask;       /* Offset 11 */
-        uint32_t   unused5[2];
-        uint32_t   io_io_low;              /* Offset 14 */
-        uint32_t   io_io_high;             /* Offset 15 */
+	/* Runway Supervisory Set */
+	int32_t    unused1[12];
+	uint32_t   io_command;             /* Offset 12 */
+	uint32_t   io_status;              /* Offset 13 */
+	uint32_t   io_control;             /* Offset 14 */
+	int32_t    unused2[1];
+
+	/* Runway Auxiliary Register Set */
+	uint32_t   io_err_resp;            /* Offset  0 */
+	uint32_t   io_err_info;            /* Offset  1 */
+	uint32_t   io_err_req;             /* Offset  2 */
+	uint32_t   io_err_resp_hi;         /* Offset  3 */
+	uint32_t   io_tlb_entry_m;         /* Offset  4 */
+	uint32_t   io_tlb_entry_l;         /* Offset  5 */
+	uint32_t   unused3[1];
+	uint32_t   io_pdir_base;           /* Offset  7 */
+	uint32_t   io_io_low_hv;           /* Offset  8 */
+	uint32_t   io_io_high_hv;          /* Offset  9 */
+	uint32_t   unused4[1];
+	uint32_t   io_chain_id_mask;       /* Offset 11 */
+	uint32_t   unused5[2];
+	uint32_t   io_io_low;              /* Offset 14 */
+	uint32_t   io_io_high;             /* Offset 15 */
 };
 
 /*
@@ -198,7 +198,7 @@ struct ioa_registers {
 ** In order for a Runway address to reside within GSC+ extended address space:
 **	Runway Address [0:7]    must identically compare to 8'b11111111
 **	Runway Address [8:11]   must be equal to IO_IO_LOW(_HV)[16:19]
-** 	Runway Address [12:23]  must be greater than or equal to
+**	Runway Address [12:23]  must be greater than or equal to
 **	           IO_IO_LOW(_HV)[20:31] and less than IO_IO_HIGH(_HV)[20:31].
 **	Runway Address [24:39]  is not used in the comparison.
 **
@@ -226,10 +226,10 @@ struct ioc {
 	struct ioa_registers __iomem *ioc_regs;  /* I/O MMU base address */
 	u8  *res_map;	                /* resource map, bit == pdir entry */
 	u64 *pdir_base;	                /* physical base address */
-	u32 pdir_size; 			/* bytes, function of IOV Space size */
-	u32 res_hint;	                /* next available IOVP - 
+	u32 pdir_size;			/* bytes, function of IOV Space size */
+	u32 res_hint;			/* next available IOVP -
 					   circular search */
-	u32 res_size;		    	/* size of resource map in bytes */
+	u32 res_size;			/* size of resource map in bytes */
 	spinlock_t res_lock;
 
 #ifdef CCIO_COLLECT_STATS
@@ -249,7 +249,7 @@ struct ioc {
 	unsigned short cujo20_bug;
 
 	/* STUFF We don't need in performance path */
-	u32 chainid_shift; 		/* specify bit location of chain_id */
+	u32 chainid_shift;		/* specify bit location of chain_id */
 	struct ioc *next;		/* Linked list of discovered iocs */
 	const char *name;		/* device name from firmware */
 	unsigned int hw_path;           /* the hardware path this ioc is associatd with */
@@ -293,7 +293,7 @@ static int ioc_count;
 ** cause the kernel to panic anyhow.
 */
 #define CCIO_SEARCH_LOOP(ioc, res_idx, mask, size)  \
-       for(; res_ptr < res_end; ++res_ptr) { \
+	for (; res_ptr < res_end; ++res_ptr) { \
 		int ret;\
 		unsigned int idx;\
 		idx = (unsigned int)((unsigned long)res_ptr - (unsigned long)ioc->res_map); \
@@ -309,9 +309,9 @@ static int ioc_count;
 #define CCIO_FIND_FREE_MAPPING(ioa, res_idx, mask, size) \
        u##size *res_ptr = (u##size *)&((ioc)->res_map[ioa->res_hint & ~((size >> 3) - 1)]); \
        u##size *res_end = (u##size *)&(ioc)->res_map[ioa->res_size]; \
-       CCIO_SEARCH_LOOP(ioc, res_idx, mask, size); \
-       res_ptr = (u##size *)&(ioc)->res_map[0]; \
-       CCIO_SEARCH_LOOP(ioa, res_idx, mask, size);
+	CCIO_SEARCH_LOOP(ioc, res_idx, mask, size); \
+	res_ptr = (u##size *)&(ioc)->res_map[0]; \
+	CCIO_SEARCH_LOOP(ioa, res_idx, mask, size);
 
 /*
 ** Find available bit in this ioa's resource map.
@@ -348,9 +348,9 @@ ccio_alloc_range(struct ioc *ioc, struct device *dev, size_t size)
 	
 	BUG_ON(pages_needed == 0);
 	BUG_ON((pages_needed * IOVP_SIZE) > DMA_CHUNK_SIZE);
-     
-	DBG_RES("%s() size: %d pages_needed %d\n", 
-		__func__, size, pages_needed);
+
+	DBG_RES("%s() size: %d pages_needed %d\n",
+			__func__, size, pages_needed);
 
 	/*
 	** "seek and ye shall find"...praying never hurts either...
@@ -416,7 +416,7 @@ resource_found:
 #define CCIO_FREE_MAPPINGS(ioc, res_idx, mask, size) \
         u##size *res_ptr = (u##size *)&((ioc)->res_map[res_idx]); \
         BUG_ON((*res_ptr & mask) != mask); \
-        *res_ptr &= ~(mask);
+	*res_ptr &= ~(mask);
 
 /**
  * ccio_free_range - Free pages from the ioc's resource map.
@@ -518,9 +518,9 @@ typedef unsigned long space_t;
 ** when it passes in BIDIRECTIONAL flag.
 */
 static u32 hint_lookup[] = {
-	[PCI_DMA_BIDIRECTIONAL]	= HINT_STOP_MOST | HINT_SAFE_DMA | IOPDIR_VALID,
-	[PCI_DMA_TODEVICE]	= HINT_STOP_MOST | HINT_PREFETCH | IOPDIR_VALID,
-	[PCI_DMA_FROMDEVICE]	= HINT_STOP_MOST | IOPDIR_VALID,
+	[DMA_BIDIRECTIONAL]	= HINT_STOP_MOST | HINT_SAFE_DMA | IOPDIR_VALID,
+	[DMA_TO_DEVICE]		= HINT_STOP_MOST | HINT_PREFETCH | IOPDIR_VALID,
+	[DMA_FROM_DEVICE]	= HINT_STOP_MOST | IOPDIR_VALID,
 };
 
 /**
@@ -845,7 +845,7 @@ static void *
 ccio_alloc(struct device *dev, size_t size, dma_addr_t *dma_handle, gfp_t flag,
 		unsigned long attrs)
 {
-      void *ret;
+	void *ret;
 #if 0
 /* GRANT Need to establish hierarchy for non-PCI devs as well
 ** and then provide matching gsc_map_xxx() functions for them as well.
@@ -856,11 +856,11 @@ ccio_alloc(struct device *dev, size_t size, dma_addr_t *dma_handle, gfp_t flag,
 		return 0;
 	}
 #endif
-        ret = (void *) __get_free_pages(flag, get_order(size));
+	ret = (void *) __get_free_pages(flag, get_order(size));
 
 	if (ret) {
 		memset(ret, 0, size);
-		*dma_handle = ccio_map_single(dev, ret, size, PCI_DMA_BIDIRECTIONAL);
+		*dma_handle = ccio_map_single(dev, ret, size, DMA_BIDIRECTIONAL);
 	}
 
 	return ret;
@@ -918,7 +918,7 @@ ccio_map_sg(struct device *dev, struct scatterlist *sglist, int nents,
 	BUG_ON(!dev);
 	ioc = GET_IOC(dev);
 	if (!ioc)
-		return 0;
+		return -EINVAL;
 	
 	DBG_RUN_SG("%s() START %d entries\n", __func__, nents);
 
@@ -1022,8 +1022,8 @@ static const struct dma_map_ops ccio_ops = {
 	.free =			ccio_free,
 	.map_page =		ccio_map_page,
 	.unmap_page =		ccio_unmap_page,
-	.map_sg = 		ccio_map_sg,
-	.unmap_sg = 		ccio_unmap_sg,
+	.map_sg =		ccio_map_sg,
+	.unmap_sg =		ccio_unmap_sg,
 	.get_sgtable =		dma_common_get_sgtable,
 	.alloc_pages =		dma_common_alloc_pages,
 	.free_pages =		dma_common_free_pages,
@@ -1080,7 +1080,7 @@ static int ccio_proc_info(struct seq_file *m, void *p)
 		max = ioc->usingle_pages - ioc->usg_pages;
 		seq_printf(m, "pci_unmap_single: %8ld calls  %8ld pages (avg %d/1000)\n",
 			   min, max, (int)((max * 1000)/min));
- 
+
 		seq_printf(m, "pci_map_sg()    : %8ld calls  %8ld pages (avg %d/1000)\n",
 			   ioc->msg_calls, ioc->msg_pages,
 			   (int)((ioc->msg_pages * 1000)/ioc->msg_calls));
@@ -1169,7 +1169,7 @@ void __init ccio_cujo20_fixup(struct parisc_device *cujo, u32 iovp)
 	idx = PDIR_INDEX(iovp) >> 3;
 
 	while (idx < ioc->res_size) {
- 		res_ptr[idx] |= 0xff;
+		res_ptr[idx] |= 0xff;
 		idx += PDIR_INDEX(CUJO_20_STEP) >> 3;
 	}
 }
@@ -1297,7 +1297,7 @@ ccio_ioc_init(struct ioc *ioc)
 	DBG_INIT(" base %p\n", ioc->pdir_base);
 
 	/* resource map size dictated by pdir_size */
- 	ioc->res_size = (ioc->pdir_size / sizeof(u64)) >> 3;
+	ioc->res_size = (ioc->pdir_size / sizeof(u64)) >> 3;
 	DBG_INIT("%s() res_size 0x%x\n", __func__, ioc->res_size);
 	
 	ioc->res_map = (u8 *)__get_free_pages(GFP_KERNEL, 
diff --git a/drivers/parisc/dino.c b/drivers/parisc/dino.c
index 889d7ce282eb..952a92504df6 100644
--- a/drivers/parisc/dino.c
+++ b/drivers/parisc/dino.c
@@ -156,15 +156,6 @@ static inline struct dino_device *DINO_DEV(struct pci_hba_data *hba)
 	return container_of(hba, struct dino_device, hba);
 }
 
-/* Check if PCI device is behind a Card-mode Dino. */
-static int pci_dev_is_behind_card_dino(struct pci_dev *dev)
-{
-	struct dino_device *dino_dev;
-
-	dino_dev = DINO_DEV(parisc_walk_tree(dev->bus->bridge));
-	return is_card_dino(&dino_dev->hba.dev->id);
-}
-
 /*
  * Dino Configuration Space Accessor Functions
  */
@@ -447,6 +438,15 @@ static void quirk_cirrus_cardbus(struct pci_dev *dev)
 DECLARE_PCI_FIXUP_ENABLE(PCI_VENDOR_ID_CIRRUS, PCI_DEVICE_ID_CIRRUS_6832, quirk_cirrus_cardbus );
 
 #ifdef CONFIG_TULIP
+/* Check if PCI device is behind a Card-mode Dino. */
+static int pci_dev_is_behind_card_dino(struct pci_dev *dev)
+{
+	struct dino_device *dino_dev;
+
+	dino_dev = DINO_DEV(parisc_walk_tree(dev->bus->bridge));
+	return is_card_dino(&dino_dev->hba.dev->id);
+}
+
 static void pci_fixup_tulip(struct pci_dev *dev)
 {
 	if (!pci_dev_is_behind_card_dino(dev))
diff --git a/drivers/parisc/led.c b/drivers/parisc/led.c
index 36c6613f7a36..cf91cb024be3 100644
--- a/drivers/parisc/led.c
+++ b/drivers/parisc/led.c
@@ -250,14 +250,14 @@ static int __init led_create_procfs(void)
 
 	if (!lcd_no_led_support)
 	{
-		ent = proc_create_data("led", S_IRUGO|S_IWUSR, proc_pdc_root,
+		ent = proc_create_data("led", 0644, proc_pdc_root,
 					&led_proc_ops, (void *)LED_NOLCD); /* LED */
 		if (!ent) return -1;
 	}
 
 	if (led_type == LED_HASLCD)
 	{
-		ent = proc_create_data("lcd", S_IRUGO|S_IWUSR, proc_pdc_root,
+		ent = proc_create_data("lcd", 0644, proc_pdc_root,
 					&led_proc_ops, (void *)LED_HASLCD); /* LCD */
 		if (!ent) return -1;
 	}
diff --git a/drivers/parisc/sba_iommu.c b/drivers/parisc/sba_iommu.c
index dce4cdf786cd..e60690d38d67 100644
--- a/drivers/parisc/sba_iommu.c
+++ b/drivers/parisc/sba_iommu.c
@@ -947,7 +947,7 @@ sba_map_sg(struct device *dev, struct scatterlist *sglist, int nents,
 
 	ioc = GET_IOC(dev);
 	if (!ioc)
-		return 0;
+		return -EINVAL;
 
 	/* Fast path single entry scatterlists. */
 	if (nents == 1) {
diff --git a/drivers/parport/parport_gsc.c b/drivers/parport/parport_gsc.c
index 1e43b3f399a8..0dcc497b0449 100644
--- a/drivers/parport/parport_gsc.c
+++ b/drivers/parport/parport_gsc.c
@@ -378,7 +378,7 @@ static int __init parport_init_chip(struct parisc_device *dev)
 	return 0;
 }
 
-static int __exit parport_remove_chip(struct parisc_device *dev)
+static void __exit parport_remove_chip(struct parisc_device *dev)
 {
 	struct parport *p = dev_get_drvdata(&dev->dev);
 	if (p) {
@@ -390,14 +390,12 @@ static int __exit parport_remove_chip(struct parisc_device *dev)
 		if (p->irq != PARPORT_IRQ_NONE)
 			free_irq(p->irq, p);
 		if (priv->dma_buf)
-			pci_free_consistent(priv->dev, PAGE_SIZE,
-					    priv->dma_buf,
-					    priv->dma_handle);
+			dma_free_coherent(&priv->dev->dev, PAGE_SIZE,
+					  priv->dma_buf, priv->dma_handle);
 		kfree (p->private_data);
 		parport_put_port(p);
 		kfree (ops); /* hope no-one cached it */
 	}
-	return 0;
 }
 
 static const struct parisc_device_id parport_tbl[] __initconst = {
diff --git a/drivers/pci/ats.c b/drivers/pci/ats.c
index 6d7d64939f82..c967ad6e2626 100644
--- a/drivers/pci/ats.c
+++ b/drivers/pci/ats.c
@@ -376,7 +376,7 @@ int pci_enable_pasid(struct pci_dev *pdev, int features)
 	if (WARN_ON(pdev->pasid_enabled))
 		return -EBUSY;
 
-	if (!pdev->eetlp_prefix_path)
+	if (!pdev->eetlp_prefix_path && !pdev->pasid_no_tlp)
 		return -EINVAL;
 
 	if (!pasid)
diff --git a/drivers/pci/controller/Kconfig b/drivers/pci/controller/Kconfig
index 5e1e3796efa4..326f7d13024f 100644
--- a/drivers/pci/controller/Kconfig
+++ b/drivers/pci/controller/Kconfig
@@ -40,6 +40,7 @@ config PCI_FTPCI100
 config PCI_IXP4XX
 	bool "Intel IXP4xx PCI controller"
 	depends on ARM && OF
+	depends on ARCH_IXP4XX || COMPILE_TEST
 	default ARCH_IXP4XX
 	help
 	  Say Y here if you want support for the PCI host controller found
diff --git a/drivers/pci/controller/cadence/pci-j721e.c b/drivers/pci/controller/cadence/pci-j721e.c
index 35e61048e133..ffb176d288cd 100644
--- a/drivers/pci/controller/cadence/pci-j721e.c
+++ b/drivers/pci/controller/cadence/pci-j721e.c
@@ -27,6 +27,7 @@
 #define STATUS_REG_SYS_2	0x508
 #define STATUS_CLR_REG_SYS_2	0x708
 #define LINK_DOWN		BIT(1)
+#define J7200_LINK_DOWN		BIT(10)
 
 #define J721E_PCIE_USER_CMD_STATUS	0x4
 #define LINK_TRAINING_ENABLE		BIT(0)
@@ -57,6 +58,7 @@ struct j721e_pcie {
 	struct cdns_pcie	*cdns_pcie;
 	void __iomem		*user_cfg_base;
 	void __iomem		*intd_cfg_base;
+	u32			linkdown_irq_regfield;
 };
 
 enum j721e_pcie_mode {
@@ -66,7 +68,10 @@ enum j721e_pcie_mode {
 
 struct j721e_pcie_data {
 	enum j721e_pcie_mode	mode;
-	bool quirk_retrain_flag;
+	unsigned int		quirk_retrain_flag:1;
+	unsigned int		quirk_detect_quiet_flag:1;
+	u32			linkdown_irq_regfield;
+	unsigned int		byte_access_allowed:1;
 };
 
 static inline u32 j721e_pcie_user_readl(struct j721e_pcie *pcie, u32 offset)
@@ -98,12 +103,12 @@ static irqreturn_t j721e_pcie_link_irq_handler(int irq, void *priv)
 	u32 reg;
 
 	reg = j721e_pcie_intd_readl(pcie, STATUS_REG_SYS_2);
-	if (!(reg & LINK_DOWN))
+	if (!(reg & pcie->linkdown_irq_regfield))
 		return IRQ_NONE;
 
 	dev_err(dev, "LINK DOWN!\n");
 
-	j721e_pcie_intd_writel(pcie, STATUS_CLR_REG_SYS_2, LINK_DOWN);
+	j721e_pcie_intd_writel(pcie, STATUS_CLR_REG_SYS_2, pcie->linkdown_irq_regfield);
 	return IRQ_HANDLED;
 }
 
@@ -112,7 +117,7 @@ static void j721e_pcie_config_link_irq(struct j721e_pcie *pcie)
 	u32 reg;
 
 	reg = j721e_pcie_intd_readl(pcie, ENABLE_REG_SYS_2);
-	reg |= LINK_DOWN;
+	reg |= pcie->linkdown_irq_regfield;
 	j721e_pcie_intd_writel(pcie, ENABLE_REG_SYS_2, reg);
 }
 
@@ -284,10 +289,36 @@ static struct pci_ops cdns_ti_pcie_host_ops = {
 static const struct j721e_pcie_data j721e_pcie_rc_data = {
 	.mode = PCI_MODE_RC,
 	.quirk_retrain_flag = true,
+	.byte_access_allowed = false,
+	.linkdown_irq_regfield = LINK_DOWN,
 };
 
 static const struct j721e_pcie_data j721e_pcie_ep_data = {
 	.mode = PCI_MODE_EP,
+	.linkdown_irq_regfield = LINK_DOWN,
+};
+
+static const struct j721e_pcie_data j7200_pcie_rc_data = {
+	.mode = PCI_MODE_RC,
+	.quirk_detect_quiet_flag = true,
+	.linkdown_irq_regfield = J7200_LINK_DOWN,
+	.byte_access_allowed = true,
+};
+
+static const struct j721e_pcie_data j7200_pcie_ep_data = {
+	.mode = PCI_MODE_EP,
+	.quirk_detect_quiet_flag = true,
+};
+
+static const struct j721e_pcie_data am64_pcie_rc_data = {
+	.mode = PCI_MODE_RC,
+	.linkdown_irq_regfield = J7200_LINK_DOWN,
+	.byte_access_allowed = true,
+};
+
+static const struct j721e_pcie_data am64_pcie_ep_data = {
+	.mode = PCI_MODE_EP,
+	.linkdown_irq_regfield = J7200_LINK_DOWN,
 };
 
 static const struct of_device_id of_j721e_pcie_match[] = {
@@ -299,6 +330,22 @@ static const struct of_device_id of_j721e_pcie_match[] = {
 		.compatible = "ti,j721e-pcie-ep",
 		.data = &j721e_pcie_ep_data,
 	},
+	{
+		.compatible = "ti,j7200-pcie-host",
+		.data = &j7200_pcie_rc_data,
+	},
+	{
+		.compatible = "ti,j7200-pcie-ep",
+		.data = &j7200_pcie_ep_data,
+	},
+	{
+		.compatible = "ti,am64-pcie-host",
+		.data = &am64_pcie_rc_data,
+	},
+	{
+		.compatible = "ti,am64-pcie-ep",
+		.data = &am64_pcie_ep_data,
+	},
 	{},
 };
 
@@ -332,6 +379,7 @@ static int j721e_pcie_probe(struct platform_device *pdev)
 
 	pcie->dev = dev;
 	pcie->mode = mode;
+	pcie->linkdown_irq_regfield = data->linkdown_irq_regfield;
 
 	base = devm_platform_ioremap_resource_byname(pdev, "intd_cfg");
 	if (IS_ERR(base))
@@ -391,9 +439,11 @@ static int j721e_pcie_probe(struct platform_device *pdev)
 			goto err_get_sync;
 		}
 
-		bridge->ops = &cdns_ti_pcie_host_ops;
+		if (!data->byte_access_allowed)
+			bridge->ops = &cdns_ti_pcie_host_ops;
 		rc = pci_host_bridge_priv(bridge);
 		rc->quirk_retrain_flag = data->quirk_retrain_flag;
+		rc->quirk_detect_quiet_flag = data->quirk_detect_quiet_flag;
 
 		cdns_pcie = &rc->pcie;
 		cdns_pcie->dev = dev;
@@ -459,6 +509,7 @@ static int j721e_pcie_probe(struct platform_device *pdev)
 			ret = -ENOMEM;
 			goto err_get_sync;
 		}
+		ep->quirk_detect_quiet_flag = data->quirk_detect_quiet_flag;
 
 		cdns_pcie = &ep->pcie;
 		cdns_pcie->dev = dev;
diff --git a/drivers/pci/controller/cadence/pcie-cadence-ep.c b/drivers/pci/controller/cadence/pcie-cadence-ep.c
index 897cdde02bd8..88e05b9c2e5b 100644
--- a/drivers/pci/controller/cadence/pcie-cadence-ep.c
+++ b/drivers/pci/controller/cadence/pcie-cadence-ep.c
@@ -16,11 +16,37 @@
 #define CDNS_PCIE_EP_IRQ_PCI_ADDR_NONE		0x1
 #define CDNS_PCIE_EP_IRQ_PCI_ADDR_LEGACY	0x3
 
-static int cdns_pcie_ep_write_header(struct pci_epc *epc, u8 fn,
+static u8 cdns_pcie_get_fn_from_vfn(struct cdns_pcie *pcie, u8 fn, u8 vfn)
+{
+	u32 cap = CDNS_PCIE_EP_FUNC_SRIOV_CAP_OFFSET;
+	u32 first_vf_offset, stride;
+
+	if (vfn == 0)
+		return fn;
+
+	first_vf_offset = cdns_pcie_ep_fn_readw(pcie, fn, cap + PCI_SRIOV_VF_OFFSET);
+	stride = cdns_pcie_ep_fn_readw(pcie, fn, cap +  PCI_SRIOV_VF_STRIDE);
+	fn = fn + first_vf_offset + ((vfn - 1) * stride);
+
+	return fn;
+}
+
+static int cdns_pcie_ep_write_header(struct pci_epc *epc, u8 fn, u8 vfn,
 				     struct pci_epf_header *hdr)
 {
 	struct cdns_pcie_ep *ep = epc_get_drvdata(epc);
+	u32 cap = CDNS_PCIE_EP_FUNC_SRIOV_CAP_OFFSET;
 	struct cdns_pcie *pcie = &ep->pcie;
+	u32 reg;
+
+	if (vfn > 1) {
+		dev_err(&epc->dev, "Only Virtual Function #1 has deviceID\n");
+		return -EINVAL;
+	} else if (vfn == 1) {
+		reg = cap + PCI_SRIOV_VF_DID;
+		cdns_pcie_ep_fn_writew(pcie, fn, reg, hdr->deviceid);
+		return 0;
+	}
 
 	cdns_pcie_ep_fn_writew(pcie, fn, PCI_DEVICE_ID, hdr->deviceid);
 	cdns_pcie_ep_fn_writeb(pcie, fn, PCI_REVISION_ID, hdr->revid);
@@ -47,7 +73,7 @@ static int cdns_pcie_ep_write_header(struct pci_epc *epc, u8 fn,
 	return 0;
 }
 
-static int cdns_pcie_ep_set_bar(struct pci_epc *epc, u8 fn,
+static int cdns_pcie_ep_set_bar(struct pci_epc *epc, u8 fn, u8 vfn,
 				struct pci_epf_bar *epf_bar)
 {
 	struct cdns_pcie_ep *ep = epc_get_drvdata(epc);
@@ -92,32 +118,36 @@ static int cdns_pcie_ep_set_bar(struct pci_epc *epc, u8 fn,
 
 	addr0 = lower_32_bits(bar_phys);
 	addr1 = upper_32_bits(bar_phys);
+
+	if (vfn == 1)
+		reg = CDNS_PCIE_LM_EP_VFUNC_BAR_CFG(bar, fn);
+	else
+		reg = CDNS_PCIE_LM_EP_FUNC_BAR_CFG(bar, fn);
+	b = (bar < BAR_4) ? bar : bar - BAR_4;
+
+	if (vfn == 0 || vfn == 1) {
+		cfg = cdns_pcie_readl(pcie, reg);
+		cfg &= ~(CDNS_PCIE_LM_EP_FUNC_BAR_CFG_BAR_APERTURE_MASK(b) |
+			 CDNS_PCIE_LM_EP_FUNC_BAR_CFG_BAR_CTRL_MASK(b));
+		cfg |= (CDNS_PCIE_LM_EP_FUNC_BAR_CFG_BAR_APERTURE(b, aperture) |
+			CDNS_PCIE_LM_EP_FUNC_BAR_CFG_BAR_CTRL(b, ctrl));
+		cdns_pcie_writel(pcie, reg, cfg);
+	}
+
+	fn = cdns_pcie_get_fn_from_vfn(pcie, fn, vfn);
 	cdns_pcie_writel(pcie, CDNS_PCIE_AT_IB_EP_FUNC_BAR_ADDR0(fn, bar),
 			 addr0);
 	cdns_pcie_writel(pcie, CDNS_PCIE_AT_IB_EP_FUNC_BAR_ADDR1(fn, bar),
 			 addr1);
 
-	if (bar < BAR_4) {
-		reg = CDNS_PCIE_LM_EP_FUNC_BAR_CFG0(fn);
-		b = bar;
-	} else {
-		reg = CDNS_PCIE_LM_EP_FUNC_BAR_CFG1(fn);
-		b = bar - BAR_4;
-	}
-
-	cfg = cdns_pcie_readl(pcie, reg);
-	cfg &= ~(CDNS_PCIE_LM_EP_FUNC_BAR_CFG_BAR_APERTURE_MASK(b) |
-		 CDNS_PCIE_LM_EP_FUNC_BAR_CFG_BAR_CTRL_MASK(b));
-	cfg |= (CDNS_PCIE_LM_EP_FUNC_BAR_CFG_BAR_APERTURE(b, aperture) |
-		CDNS_PCIE_LM_EP_FUNC_BAR_CFG_BAR_CTRL(b, ctrl));
-	cdns_pcie_writel(pcie, reg, cfg);
-
+	if (vfn > 0)
+		epf = &epf->epf[vfn - 1];
 	epf->epf_bar[bar] = epf_bar;
 
 	return 0;
 }
 
-static void cdns_pcie_ep_clear_bar(struct pci_epc *epc, u8 fn,
+static void cdns_pcie_ep_clear_bar(struct pci_epc *epc, u8 fn, u8 vfn,
 				   struct pci_epf_bar *epf_bar)
 {
 	struct cdns_pcie_ep *ep = epc_get_drvdata(epc);
@@ -126,29 +156,32 @@ static void cdns_pcie_ep_clear_bar(struct pci_epc *epc, u8 fn,
 	enum pci_barno bar = epf_bar->barno;
 	u32 reg, cfg, b, ctrl;
 
-	if (bar < BAR_4) {
-		reg = CDNS_PCIE_LM_EP_FUNC_BAR_CFG0(fn);
-		b = bar;
-	} else {
-		reg = CDNS_PCIE_LM_EP_FUNC_BAR_CFG1(fn);
-		b = bar - BAR_4;
+	if (vfn == 1)
+		reg = CDNS_PCIE_LM_EP_VFUNC_BAR_CFG(bar, fn);
+	else
+		reg = CDNS_PCIE_LM_EP_FUNC_BAR_CFG(bar, fn);
+	b = (bar < BAR_4) ? bar : bar - BAR_4;
+
+	if (vfn == 0 || vfn == 1) {
+		ctrl = CDNS_PCIE_LM_BAR_CFG_CTRL_DISABLED;
+		cfg = cdns_pcie_readl(pcie, reg);
+		cfg &= ~(CDNS_PCIE_LM_EP_FUNC_BAR_CFG_BAR_APERTURE_MASK(b) |
+			 CDNS_PCIE_LM_EP_FUNC_BAR_CFG_BAR_CTRL_MASK(b));
+		cfg |= CDNS_PCIE_LM_EP_FUNC_BAR_CFG_BAR_CTRL(b, ctrl);
+		cdns_pcie_writel(pcie, reg, cfg);
 	}
 
-	ctrl = CDNS_PCIE_LM_BAR_CFG_CTRL_DISABLED;
-	cfg = cdns_pcie_readl(pcie, reg);
-	cfg &= ~(CDNS_PCIE_LM_EP_FUNC_BAR_CFG_BAR_APERTURE_MASK(b) |
-		 CDNS_PCIE_LM_EP_FUNC_BAR_CFG_BAR_CTRL_MASK(b));
-	cfg |= CDNS_PCIE_LM_EP_FUNC_BAR_CFG_BAR_CTRL(b, ctrl);
-	cdns_pcie_writel(pcie, reg, cfg);
-
+	fn = cdns_pcie_get_fn_from_vfn(pcie, fn, vfn);
 	cdns_pcie_writel(pcie, CDNS_PCIE_AT_IB_EP_FUNC_BAR_ADDR0(fn, bar), 0);
 	cdns_pcie_writel(pcie, CDNS_PCIE_AT_IB_EP_FUNC_BAR_ADDR1(fn, bar), 0);
 
+	if (vfn > 0)
+		epf = &epf->epf[vfn - 1];
 	epf->epf_bar[bar] = NULL;
 }
 
-static int cdns_pcie_ep_map_addr(struct pci_epc *epc, u8 fn, phys_addr_t addr,
-				 u64 pci_addr, size_t size)
+static int cdns_pcie_ep_map_addr(struct pci_epc *epc, u8 fn, u8 vfn,
+				 phys_addr_t addr, u64 pci_addr, size_t size)
 {
 	struct cdns_pcie_ep *ep = epc_get_drvdata(epc);
 	struct cdns_pcie *pcie = &ep->pcie;
@@ -161,6 +194,7 @@ static int cdns_pcie_ep_map_addr(struct pci_epc *epc, u8 fn, phys_addr_t addr,
 		return -EINVAL;
 	}
 
+	fn = cdns_pcie_get_fn_from_vfn(pcie, fn, vfn);
 	cdns_pcie_set_outbound_region(pcie, 0, fn, r, false, addr, pci_addr, size);
 
 	set_bit(r, &ep->ob_region_map);
@@ -169,7 +203,7 @@ static int cdns_pcie_ep_map_addr(struct pci_epc *epc, u8 fn, phys_addr_t addr,
 	return 0;
 }
 
-static void cdns_pcie_ep_unmap_addr(struct pci_epc *epc, u8 fn,
+static void cdns_pcie_ep_unmap_addr(struct pci_epc *epc, u8 fn, u8 vfn,
 				    phys_addr_t addr)
 {
 	struct cdns_pcie_ep *ep = epc_get_drvdata(epc);
@@ -189,13 +223,15 @@ static void cdns_pcie_ep_unmap_addr(struct pci_epc *epc, u8 fn,
 	clear_bit(r, &ep->ob_region_map);
 }
 
-static int cdns_pcie_ep_set_msi(struct pci_epc *epc, u8 fn, u8 mmc)
+static int cdns_pcie_ep_set_msi(struct pci_epc *epc, u8 fn, u8 vfn, u8 mmc)
 {
 	struct cdns_pcie_ep *ep = epc_get_drvdata(epc);
 	struct cdns_pcie *pcie = &ep->pcie;
 	u32 cap = CDNS_PCIE_EP_FUNC_MSI_CAP_OFFSET;
 	u16 flags;
 
+	fn = cdns_pcie_get_fn_from_vfn(pcie, fn, vfn);
+
 	/*
 	 * Set the Multiple Message Capable bitfield into the Message Control
 	 * register.
@@ -209,13 +245,15 @@ static int cdns_pcie_ep_set_msi(struct pci_epc *epc, u8 fn, u8 mmc)
 	return 0;
 }
 
-static int cdns_pcie_ep_get_msi(struct pci_epc *epc, u8 fn)
+static int cdns_pcie_ep_get_msi(struct pci_epc *epc, u8 fn, u8 vfn)
 {
 	struct cdns_pcie_ep *ep = epc_get_drvdata(epc);
 	struct cdns_pcie *pcie = &ep->pcie;
 	u32 cap = CDNS_PCIE_EP_FUNC_MSI_CAP_OFFSET;
 	u16 flags, mme;
 
+	fn = cdns_pcie_get_fn_from_vfn(pcie, fn, vfn);
+
 	/* Validate that the MSI feature is actually enabled. */
 	flags = cdns_pcie_ep_fn_readw(pcie, fn, cap + PCI_MSI_FLAGS);
 	if (!(flags & PCI_MSI_FLAGS_ENABLE))
@@ -230,13 +268,15 @@ static int cdns_pcie_ep_get_msi(struct pci_epc *epc, u8 fn)
 	return mme;
 }
 
-static int cdns_pcie_ep_get_msix(struct pci_epc *epc, u8 func_no)
+static int cdns_pcie_ep_get_msix(struct pci_epc *epc, u8 func_no, u8 vfunc_no)
 {
 	struct cdns_pcie_ep *ep = epc_get_drvdata(epc);
 	struct cdns_pcie *pcie = &ep->pcie;
 	u32 cap = CDNS_PCIE_EP_FUNC_MSIX_CAP_OFFSET;
 	u32 val, reg;
 
+	func_no = cdns_pcie_get_fn_from_vfn(pcie, func_no, vfunc_no);
+
 	reg = cap + PCI_MSIX_FLAGS;
 	val = cdns_pcie_ep_fn_readw(pcie, func_no, reg);
 	if (!(val & PCI_MSIX_FLAGS_ENABLE))
@@ -247,14 +287,17 @@ static int cdns_pcie_ep_get_msix(struct pci_epc *epc, u8 func_no)
 	return val;
 }
 
-static int cdns_pcie_ep_set_msix(struct pci_epc *epc, u8 fn, u16 interrupts,
-				 enum pci_barno bir, u32 offset)
+static int cdns_pcie_ep_set_msix(struct pci_epc *epc, u8 fn, u8 vfn,
+				 u16 interrupts, enum pci_barno bir,
+				 u32 offset)
 {
 	struct cdns_pcie_ep *ep = epc_get_drvdata(epc);
 	struct cdns_pcie *pcie = &ep->pcie;
 	u32 cap = CDNS_PCIE_EP_FUNC_MSIX_CAP_OFFSET;
 	u32 val, reg;
 
+	fn = cdns_pcie_get_fn_from_vfn(pcie, fn, vfn);
+
 	reg = cap + PCI_MSIX_FLAGS;
 	val = cdns_pcie_ep_fn_readw(pcie, fn, reg);
 	val &= ~PCI_MSIX_FLAGS_QSIZE;
@@ -274,8 +317,8 @@ static int cdns_pcie_ep_set_msix(struct pci_epc *epc, u8 fn, u16 interrupts,
 	return 0;
 }
 
-static void cdns_pcie_ep_assert_intx(struct cdns_pcie_ep *ep, u8 fn,
-				     u8 intx, bool is_asserted)
+static void cdns_pcie_ep_assert_intx(struct cdns_pcie_ep *ep, u8 fn, u8 intx,
+				     bool is_asserted)
 {
 	struct cdns_pcie *pcie = &ep->pcie;
 	unsigned long flags;
@@ -317,7 +360,8 @@ static void cdns_pcie_ep_assert_intx(struct cdns_pcie_ep *ep, u8 fn,
 	writel(0, ep->irq_cpu_addr + offset);
 }
 
-static int cdns_pcie_ep_send_legacy_irq(struct cdns_pcie_ep *ep, u8 fn, u8 intx)
+static int cdns_pcie_ep_send_legacy_irq(struct cdns_pcie_ep *ep, u8 fn, u8 vfn,
+					u8 intx)
 {
 	u16 cmd;
 
@@ -334,7 +378,7 @@ static int cdns_pcie_ep_send_legacy_irq(struct cdns_pcie_ep *ep, u8 fn, u8 intx)
 	return 0;
 }
 
-static int cdns_pcie_ep_send_msi_irq(struct cdns_pcie_ep *ep, u8 fn,
+static int cdns_pcie_ep_send_msi_irq(struct cdns_pcie_ep *ep, u8 fn, u8 vfn,
 				     u8 interrupt_num)
 {
 	struct cdns_pcie *pcie = &ep->pcie;
@@ -343,6 +387,8 @@ static int cdns_pcie_ep_send_msi_irq(struct cdns_pcie_ep *ep, u8 fn,
 	u8 msi_count;
 	u64 pci_addr, pci_addr_mask = 0xff;
 
+	fn = cdns_pcie_get_fn_from_vfn(pcie, fn, vfn);
+
 	/* Check whether the MSI feature has been enabled by the PCI host. */
 	flags = cdns_pcie_ep_fn_readw(pcie, fn, cap + PCI_MSI_FLAGS);
 	if (!(flags & PCI_MSI_FLAGS_ENABLE))
@@ -382,7 +428,7 @@ static int cdns_pcie_ep_send_msi_irq(struct cdns_pcie_ep *ep, u8 fn,
 	return 0;
 }
 
-static int cdns_pcie_ep_map_msi_irq(struct pci_epc *epc, u8 fn,
+static int cdns_pcie_ep_map_msi_irq(struct pci_epc *epc, u8 fn, u8 vfn,
 				    phys_addr_t addr, u8 interrupt_num,
 				    u32 entry_size, u32 *msi_data,
 				    u32 *msi_addr_offset)
@@ -396,6 +442,8 @@ static int cdns_pcie_ep_map_msi_irq(struct pci_epc *epc, u8 fn,
 	int ret;
 	int i;
 
+	fn = cdns_pcie_get_fn_from_vfn(pcie, fn, vfn);
+
 	/* Check whether the MSI feature has been enabled by the PCI host. */
 	flags = cdns_pcie_ep_fn_readw(pcie, fn, cap + PCI_MSI_FLAGS);
 	if (!(flags & PCI_MSI_FLAGS_ENABLE))
@@ -419,7 +467,7 @@ static int cdns_pcie_ep_map_msi_irq(struct pci_epc *epc, u8 fn,
 	pci_addr &= GENMASK_ULL(63, 2);
 
 	for (i = 0; i < interrupt_num; i++) {
-		ret = cdns_pcie_ep_map_addr(epc, fn, addr,
+		ret = cdns_pcie_ep_map_addr(epc, fn, vfn, addr,
 					    pci_addr & ~pci_addr_mask,
 					    entry_size);
 		if (ret)
@@ -433,7 +481,7 @@ static int cdns_pcie_ep_map_msi_irq(struct pci_epc *epc, u8 fn,
 	return 0;
 }
 
-static int cdns_pcie_ep_send_msix_irq(struct cdns_pcie_ep *ep, u8 fn,
+static int cdns_pcie_ep_send_msix_irq(struct cdns_pcie_ep *ep, u8 fn, u8 vfn,
 				      u16 interrupt_num)
 {
 	u32 cap = CDNS_PCIE_EP_FUNC_MSIX_CAP_OFFSET;
@@ -446,6 +494,12 @@ static int cdns_pcie_ep_send_msix_irq(struct cdns_pcie_ep *ep, u8 fn,
 	u16 flags;
 	u8 bir;
 
+	epf = &ep->epf[fn];
+	if (vfn > 0)
+		epf = &epf->epf[vfn - 1];
+
+	fn = cdns_pcie_get_fn_from_vfn(pcie, fn, vfn);
+
 	/* Check whether the MSI-X feature has been enabled by the PCI host. */
 	flags = cdns_pcie_ep_fn_readw(pcie, fn, cap + PCI_MSIX_FLAGS);
 	if (!(flags & PCI_MSIX_FLAGS_ENABLE))
@@ -456,7 +510,6 @@ static int cdns_pcie_ep_send_msix_irq(struct cdns_pcie_ep *ep, u8 fn,
 	bir = tbl_offset & PCI_MSIX_TABLE_BIR;
 	tbl_offset &= PCI_MSIX_TABLE_OFFSET;
 
-	epf = &ep->epf[fn];
 	msix_tbl = epf->epf_bar[bir]->addr + tbl_offset;
 	msg_addr = msix_tbl[(interrupt_num - 1)].msg_addr;
 	msg_data = msix_tbl[(interrupt_num - 1)].msg_data;
@@ -478,21 +531,27 @@ static int cdns_pcie_ep_send_msix_irq(struct cdns_pcie_ep *ep, u8 fn,
 	return 0;
 }
 
-static int cdns_pcie_ep_raise_irq(struct pci_epc *epc, u8 fn,
+static int cdns_pcie_ep_raise_irq(struct pci_epc *epc, u8 fn, u8 vfn,
 				  enum pci_epc_irq_type type,
 				  u16 interrupt_num)
 {
 	struct cdns_pcie_ep *ep = epc_get_drvdata(epc);
+	struct cdns_pcie *pcie = &ep->pcie;
+	struct device *dev = pcie->dev;
 
 	switch (type) {
 	case PCI_EPC_IRQ_LEGACY:
-		return cdns_pcie_ep_send_legacy_irq(ep, fn, 0);
+		if (vfn > 0) {
+			dev_err(dev, "Cannot raise legacy interrupts for VF\n");
+			return -EINVAL;
+		}
+		return cdns_pcie_ep_send_legacy_irq(ep, fn, vfn, 0);
 
 	case PCI_EPC_IRQ_MSI:
-		return cdns_pcie_ep_send_msi_irq(ep, fn, interrupt_num);
+		return cdns_pcie_ep_send_msi_irq(ep, fn, vfn, interrupt_num);
 
 	case PCI_EPC_IRQ_MSIX:
-		return cdns_pcie_ep_send_msix_irq(ep, fn, interrupt_num);
+		return cdns_pcie_ep_send_msix_irq(ep, fn, vfn, interrupt_num);
 
 	default:
 		break;
@@ -523,6 +582,13 @@ static int cdns_pcie_ep_start(struct pci_epc *epc)
 	return 0;
 }
 
+static const struct pci_epc_features cdns_pcie_epc_vf_features = {
+	.linkup_notifier = false,
+	.msi_capable = true,
+	.msix_capable = true,
+	.align = 65536,
+};
+
 static const struct pci_epc_features cdns_pcie_epc_features = {
 	.linkup_notifier = false,
 	.msi_capable = true,
@@ -531,9 +597,12 @@ static const struct pci_epc_features cdns_pcie_epc_features = {
 };
 
 static const struct pci_epc_features*
-cdns_pcie_ep_get_features(struct pci_epc *epc, u8 func_no)
+cdns_pcie_ep_get_features(struct pci_epc *epc, u8 func_no, u8 vfunc_no)
 {
-	return &cdns_pcie_epc_features;
+	if (!vfunc_no)
+		return &cdns_pcie_epc_features;
+
+	return &cdns_pcie_epc_vf_features;
 }
 
 static const struct pci_epc_ops cdns_pcie_epc_ops = {
@@ -559,9 +628,11 @@ int cdns_pcie_ep_setup(struct cdns_pcie_ep *ep)
 	struct platform_device *pdev = to_platform_device(dev);
 	struct device_node *np = dev->of_node;
 	struct cdns_pcie *pcie = &ep->pcie;
+	struct cdns_pcie_epf *epf;
 	struct resource *res;
 	struct pci_epc *epc;
 	int ret;
+	int i;
 
 	pcie->is_rc = false;
 
@@ -606,6 +677,25 @@ int cdns_pcie_ep_setup(struct cdns_pcie_ep *ep)
 	if (!ep->epf)
 		return -ENOMEM;
 
+	epc->max_vfs = devm_kcalloc(dev, epc->max_functions,
+				    sizeof(*epc->max_vfs), GFP_KERNEL);
+	if (!epc->max_vfs)
+		return -ENOMEM;
+
+	ret = of_property_read_u8_array(np, "max-virtual-functions",
+					epc->max_vfs, epc->max_functions);
+	if (ret == 0) {
+		for (i = 0; i < epc->max_functions; i++) {
+			epf = &ep->epf[i];
+			if (epc->max_vfs[i] == 0)
+				continue;
+			epf->epf = devm_kcalloc(dev, epc->max_vfs[i],
+						sizeof(*ep->epf), GFP_KERNEL);
+			if (!epf->epf)
+				return -ENOMEM;
+		}
+	}
+
 	ret = pci_epc_mem_init(epc, pcie->mem_res->start,
 			       resource_size(pcie->mem_res), PAGE_SIZE);
 	if (ret < 0) {
@@ -623,6 +713,10 @@ int cdns_pcie_ep_setup(struct cdns_pcie_ep *ep)
 	ep->irq_pci_addr = CDNS_PCIE_EP_IRQ_PCI_ADDR_NONE;
 	/* Reserve region 0 for IRQs */
 	set_bit(0, &ep->ob_region_map);
+
+	if (ep->quirk_detect_quiet_flag)
+		cdns_pcie_detect_quiet_min_delay_set(&ep->pcie);
+
 	spin_lock_init(&ep->lock);
 
 	return 0;
diff --git a/drivers/pci/controller/cadence/pcie-cadence-host.c b/drivers/pci/controller/cadence/pcie-cadence-host.c
index ae1c55503513..fb96d37a135c 100644
--- a/drivers/pci/controller/cadence/pcie-cadence-host.c
+++ b/drivers/pci/controller/cadence/pcie-cadence-host.c
@@ -498,6 +498,9 @@ int cdns_pcie_host_setup(struct cdns_pcie_rc *rc)
 		return PTR_ERR(rc->cfg_base);
 	rc->cfg_res = res;
 
+	if (rc->quirk_detect_quiet_flag)
+		cdns_pcie_detect_quiet_min_delay_set(&rc->pcie);
+
 	ret = cdns_pcie_start_link(pcie);
 	if (ret) {
 		dev_err(dev, "Failed to start link\n");
diff --git a/drivers/pci/controller/cadence/pcie-cadence.c b/drivers/pci/controller/cadence/pcie-cadence.c
index 3c3646502d05..52767f26048f 100644
--- a/drivers/pci/controller/cadence/pcie-cadence.c
+++ b/drivers/pci/controller/cadence/pcie-cadence.c
@@ -7,6 +7,22 @@
 
 #include "pcie-cadence.h"
 
+void cdns_pcie_detect_quiet_min_delay_set(struct cdns_pcie *pcie)
+{
+	u32 delay = 0x3;
+	u32 ltssm_control_cap;
+
+	/*
+	 * Set the LTSSM Detect Quiet state min. delay to 2ms.
+	 */
+	ltssm_control_cap = cdns_pcie_readl(pcie, CDNS_PCIE_LTSSM_CONTROL_CAP);
+	ltssm_control_cap = ((ltssm_control_cap &
+			    ~CDNS_PCIE_DETECT_QUIET_MIN_DELAY_MASK) |
+			    CDNS_PCIE_DETECT_QUIET_MIN_DELAY(delay));
+
+	cdns_pcie_writel(pcie, CDNS_PCIE_LTSSM_CONTROL_CAP, ltssm_control_cap);
+}
+
 void cdns_pcie_set_outbound_region(struct cdns_pcie *pcie, u8 busnr, u8 fn,
 				   u32 r, bool is_io,
 				   u64 cpu_addr, u64 pci_addr, size_t size)
diff --git a/drivers/pci/controller/cadence/pcie-cadence.h b/drivers/pci/controller/cadence/pcie-cadence.h
index 30db2d68c17a..262421e5d917 100644
--- a/drivers/pci/controller/cadence/pcie-cadence.h
+++ b/drivers/pci/controller/cadence/pcie-cadence.h
@@ -8,6 +8,7 @@
 
 #include <linux/kernel.h>
 #include <linux/pci.h>
+#include <linux/pci-epf.h>
 #include <linux/phy/phy.h>
 
 /* Parameters for the waiting for link up routine */
@@ -46,10 +47,18 @@
 #define  CDNS_PCIE_LM_EP_ID_BUS_SHIFT	8
 
 /* Endpoint Function f BAR b Configuration Registers */
+#define CDNS_PCIE_LM_EP_FUNC_BAR_CFG(bar, fn) \
+	(((bar) < BAR_4) ? CDNS_PCIE_LM_EP_FUNC_BAR_CFG0(fn) : CDNS_PCIE_LM_EP_FUNC_BAR_CFG1(fn))
 #define CDNS_PCIE_LM_EP_FUNC_BAR_CFG0(fn) \
 	(CDNS_PCIE_LM_BASE + 0x0240 + (fn) * 0x0008)
 #define CDNS_PCIE_LM_EP_FUNC_BAR_CFG1(fn) \
 	(CDNS_PCIE_LM_BASE + 0x0244 + (fn) * 0x0008)
+#define CDNS_PCIE_LM_EP_VFUNC_BAR_CFG(bar, fn) \
+	(((bar) < BAR_4) ? CDNS_PCIE_LM_EP_VFUNC_BAR_CFG0(fn) : CDNS_PCIE_LM_EP_VFUNC_BAR_CFG1(fn))
+#define CDNS_PCIE_LM_EP_VFUNC_BAR_CFG0(fn) \
+	(CDNS_PCIE_LM_BASE + 0x0280 + (fn) * 0x0008)
+#define CDNS_PCIE_LM_EP_VFUNC_BAR_CFG1(fn) \
+	(CDNS_PCIE_LM_BASE + 0x0284 + (fn) * 0x0008)
 #define  CDNS_PCIE_LM_EP_FUNC_BAR_CFG_BAR_APERTURE_MASK(b) \
 	(GENMASK(4, 0) << ((b) * 8))
 #define  CDNS_PCIE_LM_EP_FUNC_BAR_CFG_BAR_APERTURE(b, a) \
@@ -114,6 +123,7 @@
 
 #define CDNS_PCIE_EP_FUNC_MSI_CAP_OFFSET	0x90
 #define CDNS_PCIE_EP_FUNC_MSIX_CAP_OFFSET	0xb0
+#define CDNS_PCIE_EP_FUNC_SRIOV_CAP_OFFSET	0x200
 
 /*
  * Root Port Registers (PCI configuration space for the root port function)
@@ -189,6 +199,14 @@
 /* AXI link down register */
 #define CDNS_PCIE_AT_LINKDOWN (CDNS_PCIE_AT_BASE + 0x0824)
 
+/* LTSSM Capabilities register */
+#define CDNS_PCIE_LTSSM_CONTROL_CAP             (CDNS_PCIE_LM_BASE + 0x0054)
+#define  CDNS_PCIE_DETECT_QUIET_MIN_DELAY_MASK  GENMASK(2, 1)
+#define  CDNS_PCIE_DETECT_QUIET_MIN_DELAY_SHIFT 1
+#define  CDNS_PCIE_DETECT_QUIET_MIN_DELAY(delay) \
+	 (((delay) << CDNS_PCIE_DETECT_QUIET_MIN_DELAY_SHIFT) & \
+	 CDNS_PCIE_DETECT_QUIET_MIN_DELAY_MASK)
+
 enum cdns_pcie_rp_bar {
 	RP_BAR_UNDEFINED = -1,
 	RP_BAR0,
@@ -295,6 +313,7 @@ struct cdns_pcie {
  * @avail_ib_bar: Satus of RP_BAR0, RP_BAR1 and	RP_NO_BAR if it's free or
  *                available
  * @quirk_retrain_flag: Retrain link as quirk for PCIe Gen2
+ * @quirk_detect_quiet_flag: LTSSM Detect Quiet min delay set as quirk
  */
 struct cdns_pcie_rc {
 	struct cdns_pcie	pcie;
@@ -303,14 +322,17 @@ struct cdns_pcie_rc {
 	u32			vendor_id;
 	u32			device_id;
 	bool			avail_ib_bar[CDNS_PCIE_RP_MAX_IB];
-	bool                    quirk_retrain_flag;
+	unsigned int		quirk_retrain_flag:1;
+	unsigned int		quirk_detect_quiet_flag:1;
 };
 
 /**
  * struct cdns_pcie_epf - Structure to hold info about endpoint function
+ * @epf: Info about virtual functions attached to the physical function
  * @epf_bar: reference to the pci_epf_bar for the six Base Address Registers
  */
 struct cdns_pcie_epf {
+	struct cdns_pcie_epf *epf;
 	struct pci_epf_bar *epf_bar[PCI_STD_NUM_BARS];
 };
 
@@ -334,6 +356,7 @@ struct cdns_pcie_epf {
  *        registers fields (RMW) accessible by both remote RC and EP to
  *        minimize time between read and write
  * @epf: Structure to hold info about endpoint function
+ * @quirk_detect_quiet_flag: LTSSM Detect Quiet min delay set as quirk
  */
 struct cdns_pcie_ep {
 	struct cdns_pcie	pcie;
@@ -348,6 +371,7 @@ struct cdns_pcie_ep {
 	/* protect writing to PCI_STATUS while raising legacy interrupts */
 	spinlock_t		lock;
 	struct cdns_pcie_epf	*epf;
+	unsigned int		quirk_detect_quiet_flag:1;
 };
 
 
@@ -508,6 +532,9 @@ static inline int cdns_pcie_ep_setup(struct cdns_pcie_ep *ep)
 	return 0;
 }
 #endif
+
+void cdns_pcie_detect_quiet_min_delay_set(struct cdns_pcie *pcie);
+
 void cdns_pcie_set_outbound_region(struct cdns_pcie *pcie, u8 busnr, u8 fn,
 				   u32 r, bool is_io,
 				   u64 cpu_addr, u64 pci_addr, size_t size);
diff --git a/drivers/pci/controller/dwc/Kconfig b/drivers/pci/controller/dwc/Kconfig
index 423d35872ce4..76c0a63a3f64 100644
--- a/drivers/pci/controller/dwc/Kconfig
+++ b/drivers/pci/controller/dwc/Kconfig
@@ -214,6 +214,17 @@ config PCIE_ARTPEC6_EP
 	  Enables support for the PCIe controller in the ARTPEC-6 SoC to work in
 	  endpoint mode. This uses the DesignWare core.
 
+config PCIE_ROCKCHIP_DW_HOST
+	bool "Rockchip DesignWare PCIe controller"
+	select PCIE_DW
+	select PCIE_DW_HOST
+	depends on PCI_MSI_IRQ_DOMAIN
+	depends on ARCH_ROCKCHIP || COMPILE_TEST
+	depends on OF
+	help
+	  Enables support for the DesignWare PCIe controller in the
+	  Rockchip SoC except RK3399.
+
 config PCIE_INTEL_GW
 	bool "Intel Gateway PCIe host controller support"
 	depends on OF && (X86 || COMPILE_TEST)
@@ -225,6 +236,34 @@ config PCIE_INTEL_GW
 	  The PCIe controller uses the DesignWare core plus Intel-specific
 	  hardware wrappers.
 
+config PCIE_KEEMBAY
+	bool
+
+config PCIE_KEEMBAY_HOST
+	bool "Intel Keem Bay PCIe controller - Host mode"
+	depends on ARCH_KEEMBAY || COMPILE_TEST
+	depends on PCI && PCI_MSI_IRQ_DOMAIN
+	select PCIE_DW_HOST
+	select PCIE_KEEMBAY
+	help
+	  Say 'Y' here to enable support for the PCIe controller in Keem Bay
+	  to work in host mode.
+	  The PCIe controller is based on DesignWare Hardware and uses
+	  DesignWare core functions.
+
+config PCIE_KEEMBAY_EP
+	bool "Intel Keem Bay PCIe controller - Endpoint mode"
+	depends on ARCH_KEEMBAY || COMPILE_TEST
+	depends on PCI && PCI_MSI_IRQ_DOMAIN
+	depends on PCI_ENDPOINT
+	select PCIE_DW_EP
+	select PCIE_KEEMBAY
+	help
+	  Say 'Y' here to enable support for the PCIe controller in Keem Bay
+	  to work in endpoint mode.
+	  The PCIe controller is based on DesignWare Hardware and uses
+	  DesignWare core functions.
+
 config PCIE_KIRIN
 	depends on OF && (ARM64 || COMPILE_TEST)
 	bool "HiSilicon Kirin series SoCs PCIe controllers"
@@ -286,6 +325,15 @@ config PCIE_TEGRA194_EP
 	  in order to enable device-specific features PCIE_TEGRA194_EP must be
 	  selected. This uses the DesignWare core.
 
+config PCIE_VISCONTI_HOST
+	bool "Toshiba Visconti PCIe controllers"
+	depends on ARCH_VISCONTI || COMPILE_TEST
+	depends on PCI_MSI_IRQ_DOMAIN
+	select PCIE_DW_HOST
+	help
+	  Say Y here if you want PCIe controller support on Toshiba Visconti SoC.
+	  This driver supports TMPV7708 SoC.
+
 config PCIE_UNIPHIER
 	bool "Socionext UniPhier PCIe host controllers"
 	depends on ARCH_UNIPHIER || COMPILE_TEST
diff --git a/drivers/pci/controller/dwc/Makefile b/drivers/pci/controller/dwc/Makefile
index 9e6ce0dc2f53..73244409792c 100644
--- a/drivers/pci/controller/dwc/Makefile
+++ b/drivers/pci/controller/dwc/Makefile
@@ -14,13 +14,16 @@ obj-$(CONFIG_PCI_LAYERSCAPE_EP) += pci-layerscape-ep.o
 obj-$(CONFIG_PCIE_QCOM) += pcie-qcom.o
 obj-$(CONFIG_PCIE_ARMADA_8K) += pcie-armada8k.o
 obj-$(CONFIG_PCIE_ARTPEC6) += pcie-artpec6.o
+obj-$(CONFIG_PCIE_ROCKCHIP_DW_HOST) += pcie-dw-rockchip.o
 obj-$(CONFIG_PCIE_INTEL_GW) += pcie-intel-gw.o
+obj-$(CONFIG_PCIE_KEEMBAY) += pcie-keembay.o
 obj-$(CONFIG_PCIE_KIRIN) += pcie-kirin.o
 obj-$(CONFIG_PCIE_HISI_STB) += pcie-histb.o
 obj-$(CONFIG_PCI_MESON) += pci-meson.o
 obj-$(CONFIG_PCIE_TEGRA194) += pcie-tegra194.o
 obj-$(CONFIG_PCIE_UNIPHIER) += pcie-uniphier.o
 obj-$(CONFIG_PCIE_UNIPHIER_EP) += pcie-uniphier-ep.o
+obj-$(CONFIG_PCIE_VISCONTI_HOST) += pcie-visconti.o
 
 # The following drivers are for devices that use the generic ACPI
 # pci_root.c driver but don't support standard ECAM config access.
diff --git a/drivers/pci/controller/dwc/pci-dra7xx.c b/drivers/pci/controller/dwc/pci-dra7xx.c
index 047cfbdc1330..fbbb78f6885e 100644
--- a/drivers/pci/controller/dwc/pci-dra7xx.c
+++ b/drivers/pci/controller/dwc/pci-dra7xx.c
@@ -204,7 +204,7 @@ static int dra7xx_pcie_handle_msi(struct pcie_port *pp, int index)
 {
 	struct dw_pcie *pci = to_dw_pcie_from_pp(pp);
 	unsigned long val;
-	int pos, irq;
+	int pos;
 
 	val = dw_pcie_readl_dbi(pci, PCIE_MSI_INTR0_STATUS +
 				   (index * MSI_REG_CTRL_BLOCK_SIZE));
@@ -213,9 +213,8 @@ static int dra7xx_pcie_handle_msi(struct pcie_port *pp, int index)
 
 	pos = find_next_bit(&val, MAX_MSI_IRQS_PER_CTRL, 0);
 	while (pos != MAX_MSI_IRQS_PER_CTRL) {
-		irq = irq_find_mapping(pp->irq_domain,
-				       (index * MAX_MSI_IRQS_PER_CTRL) + pos);
-		generic_handle_irq(irq);
+		generic_handle_domain_irq(pp->irq_domain,
+					  (index * MAX_MSI_IRQS_PER_CTRL) + pos);
 		pos++;
 		pos = find_next_bit(&val, MAX_MSI_IRQS_PER_CTRL, pos);
 	}
@@ -257,7 +256,7 @@ static void dra7xx_pcie_msi_irq_handler(struct irq_desc *desc)
 	struct dw_pcie *pci;
 	struct pcie_port *pp;
 	unsigned long reg;
-	u32 virq, bit;
+	u32 bit;
 
 	chained_irq_enter(chip, desc);
 
@@ -276,11 +275,8 @@ static void dra7xx_pcie_msi_irq_handler(struct irq_desc *desc)
 	case INTB:
 	case INTC:
 	case INTD:
-		for_each_set_bit(bit, &reg, PCI_NUM_INTX) {
-			virq = irq_find_mapping(dra7xx->irq_domain, bit);
-			if (virq)
-				generic_handle_irq(virq);
-		}
+		for_each_set_bit(bit, &reg, PCI_NUM_INTX)
+			generic_handle_domain_irq(dra7xx->irq_domain, bit);
 		break;
 	}
 
diff --git a/drivers/pci/controller/dwc/pci-keystone.c b/drivers/pci/controller/dwc/pci-keystone.c
index bde3b2824e89..865258d8c53c 100644
--- a/drivers/pci/controller/dwc/pci-keystone.c
+++ b/drivers/pci/controller/dwc/pci-keystone.c
@@ -259,14 +259,12 @@ static void ks_pcie_handle_legacy_irq(struct keystone_pcie *ks_pcie,
 	struct dw_pcie *pci = ks_pcie->pci;
 	struct device *dev = pci->dev;
 	u32 pending;
-	int virq;
 
 	pending = ks_pcie_app_readl(ks_pcie, IRQ_STATUS(offset));
 
 	if (BIT(0) & pending) {
-		virq = irq_linear_revmap(ks_pcie->legacy_irq_domain, offset);
-		dev_dbg(dev, ": irq: irq_offset %d, virq %d\n", offset, virq);
-		generic_handle_irq(virq);
+		dev_dbg(dev, ": irq: irq_offset %d", offset);
+		generic_handle_domain_irq(ks_pcie->legacy_irq_domain, offset);
 	}
 
 	/* EOI the INTx interrupt */
@@ -579,7 +577,7 @@ static void ks_pcie_msi_irq_handler(struct irq_desc *desc)
 	struct pcie_port *pp = &pci->pp;
 	struct device *dev = pci->dev;
 	struct irq_chip *chip = irq_desc_get_chip(desc);
-	u32 vector, virq, reg, pos;
+	u32 vector, reg, pos;
 
 	dev_dbg(dev, "%s, irq %d\n", __func__, irq);
 
@@ -600,10 +598,8 @@ static void ks_pcie_msi_irq_handler(struct irq_desc *desc)
 			continue;
 
 		vector = offset + (pos << 3);
-		virq = irq_linear_revmap(pp->irq_domain, vector);
-		dev_dbg(dev, "irq: bit %d, vector %d, virq %d\n", pos, vector,
-			virq);
-		generic_handle_irq(virq);
+		dev_dbg(dev, "irq: bit %d, vector %d\n", pos, vector);
+		generic_handle_domain_irq(pp->irq_domain, vector);
 	}
 
 	chained_irq_exit(chip, desc);
diff --git a/drivers/pci/controller/dwc/pcie-artpec6.c b/drivers/pci/controller/dwc/pcie-artpec6.c
index 597c282f586c..c91fc1954432 100644
--- a/drivers/pci/controller/dwc/pcie-artpec6.c
+++ b/drivers/pci/controller/dwc/pcie-artpec6.c
@@ -384,6 +384,7 @@ static int artpec6_pcie_probe(struct platform_device *pdev)
 	const struct artpec_pcie_of_data *data;
 	enum artpec_pcie_variants variant;
 	enum dw_pcie_device_mode mode;
+	u32 val;
 
 	match = of_match_device(artpec6_pcie_of_match, dev);
 	if (!match)
@@ -432,9 +433,7 @@ static int artpec6_pcie_probe(struct platform_device *pdev)
 		if (ret < 0)
 			return ret;
 		break;
-	case DW_PCIE_EP_TYPE: {
-		u32 val;
-
+	case DW_PCIE_EP_TYPE:
 		if (!IS_ENABLED(CONFIG_PCIE_ARTPEC6_EP))
 			return -ENODEV;
 
@@ -445,8 +444,6 @@ static int artpec6_pcie_probe(struct platform_device *pdev)
 		pci->ep.ops = &pcie_ep_ops;
 
 		return dw_pcie_ep_init(&pci->ep);
-		break;
-	}
 	default:
 		dev_err(dev, "INVALID device type %d\n", artpec6_pcie->mode);
 	}
diff --git a/drivers/pci/controller/dwc/pcie-designware-ep.c b/drivers/pci/controller/dwc/pcie-designware-ep.c
index 8d028a88b375..998b698f4085 100644
--- a/drivers/pci/controller/dwc/pcie-designware-ep.c
+++ b/drivers/pci/controller/dwc/pcie-designware-ep.c
@@ -125,7 +125,7 @@ static u8 dw_pcie_ep_find_capability(struct dw_pcie_ep *ep, u8 func_no, u8 cap)
 	return __dw_pcie_ep_find_next_cap(ep, func_no, next_cap_ptr, cap);
 }
 
-static int dw_pcie_ep_write_header(struct pci_epc *epc, u8 func_no,
+static int dw_pcie_ep_write_header(struct pci_epc *epc, u8 func_no, u8 vfunc_no,
 				   struct pci_epf_header *hdr)
 {
 	struct dw_pcie_ep *ep = epc_get_drvdata(epc);
@@ -202,7 +202,7 @@ static int dw_pcie_ep_outbound_atu(struct dw_pcie_ep *ep, u8 func_no,
 	return 0;
 }
 
-static void dw_pcie_ep_clear_bar(struct pci_epc *epc, u8 func_no,
+static void dw_pcie_ep_clear_bar(struct pci_epc *epc, u8 func_no, u8 vfunc_no,
 				 struct pci_epf_bar *epf_bar)
 {
 	struct dw_pcie_ep *ep = epc_get_drvdata(epc);
@@ -217,7 +217,7 @@ static void dw_pcie_ep_clear_bar(struct pci_epc *epc, u8 func_no,
 	ep->epf_bar[bar] = NULL;
 }
 
-static int dw_pcie_ep_set_bar(struct pci_epc *epc, u8 func_no,
+static int dw_pcie_ep_set_bar(struct pci_epc *epc, u8 func_no, u8 vfunc_no,
 			      struct pci_epf_bar *epf_bar)
 {
 	int ret;
@@ -276,7 +276,7 @@ static int dw_pcie_find_index(struct dw_pcie_ep *ep, phys_addr_t addr,
 	return -EINVAL;
 }
 
-static void dw_pcie_ep_unmap_addr(struct pci_epc *epc, u8 func_no,
+static void dw_pcie_ep_unmap_addr(struct pci_epc *epc, u8 func_no, u8 vfunc_no,
 				  phys_addr_t addr)
 {
 	int ret;
@@ -292,9 +292,8 @@ static void dw_pcie_ep_unmap_addr(struct pci_epc *epc, u8 func_no,
 	clear_bit(atu_index, ep->ob_window_map);
 }
 
-static int dw_pcie_ep_map_addr(struct pci_epc *epc, u8 func_no,
-			       phys_addr_t addr,
-			       u64 pci_addr, size_t size)
+static int dw_pcie_ep_map_addr(struct pci_epc *epc, u8 func_no, u8 vfunc_no,
+			       phys_addr_t addr, u64 pci_addr, size_t size)
 {
 	int ret;
 	struct dw_pcie_ep *ep = epc_get_drvdata(epc);
@@ -309,7 +308,7 @@ static int dw_pcie_ep_map_addr(struct pci_epc *epc, u8 func_no,
 	return 0;
 }
 
-static int dw_pcie_ep_get_msi(struct pci_epc *epc, u8 func_no)
+static int dw_pcie_ep_get_msi(struct pci_epc *epc, u8 func_no, u8 vfunc_no)
 {
 	struct dw_pcie_ep *ep = epc_get_drvdata(epc);
 	struct dw_pcie *pci = to_dw_pcie_from_ep(ep);
@@ -333,7 +332,8 @@ static int dw_pcie_ep_get_msi(struct pci_epc *epc, u8 func_no)
 	return val;
 }
 
-static int dw_pcie_ep_set_msi(struct pci_epc *epc, u8 func_no, u8 interrupts)
+static int dw_pcie_ep_set_msi(struct pci_epc *epc, u8 func_no, u8 vfunc_no,
+			      u8 interrupts)
 {
 	struct dw_pcie_ep *ep = epc_get_drvdata(epc);
 	struct dw_pcie *pci = to_dw_pcie_from_ep(ep);
@@ -358,7 +358,7 @@ static int dw_pcie_ep_set_msi(struct pci_epc *epc, u8 func_no, u8 interrupts)
 	return 0;
 }
 
-static int dw_pcie_ep_get_msix(struct pci_epc *epc, u8 func_no)
+static int dw_pcie_ep_get_msix(struct pci_epc *epc, u8 func_no, u8 vfunc_no)
 {
 	struct dw_pcie_ep *ep = epc_get_drvdata(epc);
 	struct dw_pcie *pci = to_dw_pcie_from_ep(ep);
@@ -382,8 +382,8 @@ static int dw_pcie_ep_get_msix(struct pci_epc *epc, u8 func_no)
 	return val;
 }
 
-static int dw_pcie_ep_set_msix(struct pci_epc *epc, u8 func_no, u16 interrupts,
-			       enum pci_barno bir, u32 offset)
+static int dw_pcie_ep_set_msix(struct pci_epc *epc, u8 func_no, u8 vfunc_no,
+			       u16 interrupts, enum pci_barno bir, u32 offset)
 {
 	struct dw_pcie_ep *ep = epc_get_drvdata(epc);
 	struct dw_pcie *pci = to_dw_pcie_from_ep(ep);
@@ -418,7 +418,7 @@ static int dw_pcie_ep_set_msix(struct pci_epc *epc, u8 func_no, u16 interrupts,
 	return 0;
 }
 
-static int dw_pcie_ep_raise_irq(struct pci_epc *epc, u8 func_no,
+static int dw_pcie_ep_raise_irq(struct pci_epc *epc, u8 func_no, u8 vfunc_no,
 				enum pci_epc_irq_type type, u16 interrupt_num)
 {
 	struct dw_pcie_ep *ep = epc_get_drvdata(epc);
@@ -450,7 +450,7 @@ static int dw_pcie_ep_start(struct pci_epc *epc)
 }
 
 static const struct pci_epc_features*
-dw_pcie_ep_get_features(struct pci_epc *epc, u8 func_no)
+dw_pcie_ep_get_features(struct pci_epc *epc, u8 func_no, u8 vfunc_no)
 {
 	struct dw_pcie_ep *ep = epc_get_drvdata(epc);
 
@@ -525,14 +525,14 @@ int dw_pcie_ep_raise_msi_irq(struct dw_pcie_ep *ep, u8 func_no,
 	aligned_offset = msg_addr_lower & (epc->mem->window.page_size - 1);
 	msg_addr = ((u64)msg_addr_upper) << 32 |
 			(msg_addr_lower & ~aligned_offset);
-	ret = dw_pcie_ep_map_addr(epc, func_no, ep->msi_mem_phys, msg_addr,
+	ret = dw_pcie_ep_map_addr(epc, func_no, 0, ep->msi_mem_phys, msg_addr,
 				  epc->mem->window.page_size);
 	if (ret)
 		return ret;
 
 	writel(msg_data | (interrupt_num - 1), ep->msi_mem + aligned_offset);
 
-	dw_pcie_ep_unmap_addr(epc, func_no, ep->msi_mem_phys);
+	dw_pcie_ep_unmap_addr(epc, func_no, 0, ep->msi_mem_phys);
 
 	return 0;
 }
@@ -593,14 +593,14 @@ int dw_pcie_ep_raise_msix_irq(struct dw_pcie_ep *ep, u8 func_no,
 	}
 
 	aligned_offset = msg_addr & (epc->mem->window.page_size - 1);
-	ret = dw_pcie_ep_map_addr(epc, func_no, ep->msi_mem_phys,  msg_addr,
+	ret = dw_pcie_ep_map_addr(epc, func_no, 0, ep->msi_mem_phys, msg_addr,
 				  epc->mem->window.page_size);
 	if (ret)
 		return ret;
 
 	writel(msg_data, ep->msi_mem + aligned_offset);
 
-	dw_pcie_ep_unmap_addr(epc, func_no, ep->msi_mem_phys);
+	dw_pcie_ep_unmap_addr(epc, func_no, 0, ep->msi_mem_phys);
 
 	return 0;
 }
diff --git a/drivers/pci/controller/dwc/pcie-designware-host.c b/drivers/pci/controller/dwc/pcie-designware-host.c
index a608ae1fad57..d1d9b8344ec9 100644
--- a/drivers/pci/controller/dwc/pcie-designware-host.c
+++ b/drivers/pci/controller/dwc/pcie-designware-host.c
@@ -55,7 +55,7 @@ static struct msi_domain_info dw_pcie_msi_domain_info = {
 /* MSI int handler */
 irqreturn_t dw_handle_msi_irq(struct pcie_port *pp)
 {
-	int i, pos, irq;
+	int i, pos;
 	unsigned long val;
 	u32 status, num_ctrls;
 	irqreturn_t ret = IRQ_NONE;
@@ -74,10 +74,9 @@ irqreturn_t dw_handle_msi_irq(struct pcie_port *pp)
 		pos = 0;
 		while ((pos = find_next_bit(&val, MAX_MSI_IRQS_PER_CTRL,
 					    pos)) != MAX_MSI_IRQS_PER_CTRL) {
-			irq = irq_find_mapping(pp->irq_domain,
-					       (i * MAX_MSI_IRQS_PER_CTRL) +
-					       pos);
-			generic_handle_irq(irq);
+			generic_handle_domain_irq(pp->irq_domain,
+						  (i * MAX_MSI_IRQS_PER_CTRL) +
+						  pos);
 			pos++;
 		}
 	}
diff --git a/drivers/pci/controller/dwc/pcie-designware-plat.c b/drivers/pci/controller/dwc/pcie-designware-plat.c
index 9b397c807261..8851eb161a0e 100644
--- a/drivers/pci/controller/dwc/pcie-designware-plat.c
+++ b/drivers/pci/controller/dwc/pcie-designware-plat.c
@@ -164,7 +164,6 @@ static int dw_plat_pcie_probe(struct platform_device *pdev)
 
 		pci->ep.ops = &pcie_ep_ops;
 		return dw_pcie_ep_init(&pci->ep);
-		break;
 	default:
 		dev_err(dev, "INVALID device type %d\n", dw_plat_pcie->mode);
 	}
diff --git a/drivers/pci/controller/dwc/pcie-dw-rockchip.c b/drivers/pci/controller/dwc/pcie-dw-rockchip.c
new file mode 100644
index 000000000000..c9b341e55cbb
--- /dev/null
+++ b/drivers/pci/controller/dwc/pcie-dw-rockchip.c
@@ -0,0 +1,279 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * PCIe host controller driver for Rockchip SoCs.
+ *
+ * Copyright (C) 2021 Rockchip Electronics Co., Ltd.
+ *		http://www.rock-chips.com
+ *
+ * Author: Simon Xue <xxm@rock-chips.com>
+ */
+
+#include <linux/clk.h>
+#include <linux/gpio/consumer.h>
+#include <linux/mfd/syscon.h>
+#include <linux/module.h>
+#include <linux/of_device.h>
+#include <linux/phy/phy.h>
+#include <linux/platform_device.h>
+#include <linux/regmap.h>
+#include <linux/reset.h>
+
+#include "pcie-designware.h"
+
+/*
+ * The upper 16 bits of PCIE_CLIENT_CONFIG are a write
+ * mask for the lower 16 bits.
+ */
+#define HIWORD_UPDATE(mask, val) (((mask) << 16) | (val))
+#define HIWORD_UPDATE_BIT(val)	HIWORD_UPDATE(val, val)
+
+#define to_rockchip_pcie(x) dev_get_drvdata((x)->dev)
+
+#define PCIE_CLIENT_RC_MODE		HIWORD_UPDATE_BIT(0x40)
+#define PCIE_CLIENT_ENABLE_LTSSM	HIWORD_UPDATE_BIT(0xc)
+#define PCIE_SMLH_LINKUP		BIT(16)
+#define PCIE_RDLH_LINKUP		BIT(17)
+#define PCIE_LINKUP			(PCIE_SMLH_LINKUP | PCIE_RDLH_LINKUP)
+#define PCIE_L0S_ENTRY			0x11
+#define PCIE_CLIENT_GENERAL_CONTROL	0x0
+#define PCIE_CLIENT_GENERAL_DEBUG	0x104
+#define PCIE_CLIENT_HOT_RESET_CTRL      0x180
+#define PCIE_CLIENT_LTSSM_STATUS	0x300
+#define PCIE_LTSSM_ENABLE_ENHANCE       BIT(4)
+#define PCIE_LTSSM_STATUS_MASK		GENMASK(5, 0)
+
+struct rockchip_pcie {
+	struct dw_pcie			pci;
+	void __iomem			*apb_base;
+	struct phy			*phy;
+	struct clk_bulk_data		*clks;
+	unsigned int			clk_cnt;
+	struct reset_control		*rst;
+	struct gpio_desc		*rst_gpio;
+	struct regulator                *vpcie3v3;
+};
+
+static int rockchip_pcie_readl_apb(struct rockchip_pcie *rockchip,
+					     u32 reg)
+{
+	return readl_relaxed(rockchip->apb_base + reg);
+}
+
+static void rockchip_pcie_writel_apb(struct rockchip_pcie *rockchip,
+						u32 val, u32 reg)
+{
+	writel_relaxed(val, rockchip->apb_base + reg);
+}
+
+static void rockchip_pcie_enable_ltssm(struct rockchip_pcie *rockchip)
+{
+	rockchip_pcie_writel_apb(rockchip, PCIE_CLIENT_ENABLE_LTSSM,
+				 PCIE_CLIENT_GENERAL_CONTROL);
+}
+
+static int rockchip_pcie_link_up(struct dw_pcie *pci)
+{
+	struct rockchip_pcie *rockchip = to_rockchip_pcie(pci);
+	u32 val = rockchip_pcie_readl_apb(rockchip, PCIE_CLIENT_LTSSM_STATUS);
+
+	if ((val & PCIE_LINKUP) == PCIE_LINKUP &&
+	    (val & PCIE_LTSSM_STATUS_MASK) == PCIE_L0S_ENTRY)
+		return 1;
+
+	return 0;
+}
+
+static int rockchip_pcie_start_link(struct dw_pcie *pci)
+{
+	struct rockchip_pcie *rockchip = to_rockchip_pcie(pci);
+
+	/* Reset device */
+	gpiod_set_value_cansleep(rockchip->rst_gpio, 0);
+
+	rockchip_pcie_enable_ltssm(rockchip);
+
+	/*
+	 * PCIe requires the refclk to be stable for 100µs prior to releasing
+	 * PERST. See table 2-4 in section 2.6.2 AC Specifications of the PCI
+	 * Express Card Electromechanical Specification, 1.1. However, we don't
+	 * know if the refclk is coming from RC's PHY or external OSC. If it's
+	 * from RC, so enabling LTSSM is the just right place to release #PERST.
+	 * We need more extra time as before, rather than setting just
+	 * 100us as we don't know how long should the device need to reset.
+	 */
+	msleep(100);
+	gpiod_set_value_cansleep(rockchip->rst_gpio, 1);
+
+	return 0;
+}
+
+static int rockchip_pcie_host_init(struct pcie_port *pp)
+{
+	struct dw_pcie *pci = to_dw_pcie_from_pp(pp);
+	struct rockchip_pcie *rockchip = to_rockchip_pcie(pci);
+	u32 val = HIWORD_UPDATE_BIT(PCIE_LTSSM_ENABLE_ENHANCE);
+
+	/* LTSSM enable control mode */
+	rockchip_pcie_writel_apb(rockchip, val, PCIE_CLIENT_HOT_RESET_CTRL);
+
+	rockchip_pcie_writel_apb(rockchip, PCIE_CLIENT_RC_MODE,
+				 PCIE_CLIENT_GENERAL_CONTROL);
+
+	return 0;
+}
+
+static const struct dw_pcie_host_ops rockchip_pcie_host_ops = {
+	.host_init = rockchip_pcie_host_init,
+};
+
+static int rockchip_pcie_clk_init(struct rockchip_pcie *rockchip)
+{
+	struct device *dev = rockchip->pci.dev;
+	int ret;
+
+	ret = devm_clk_bulk_get_all(dev, &rockchip->clks);
+	if (ret < 0)
+		return ret;
+
+	rockchip->clk_cnt = ret;
+
+	return clk_bulk_prepare_enable(rockchip->clk_cnt, rockchip->clks);
+}
+
+static int rockchip_pcie_resource_get(struct platform_device *pdev,
+				      struct rockchip_pcie *rockchip)
+{
+	rockchip->apb_base = devm_platform_ioremap_resource_byname(pdev, "apb");
+	if (IS_ERR(rockchip->apb_base))
+		return PTR_ERR(rockchip->apb_base);
+
+	rockchip->rst_gpio = devm_gpiod_get_optional(&pdev->dev, "reset",
+						     GPIOD_OUT_HIGH);
+	if (IS_ERR(rockchip->rst_gpio))
+		return PTR_ERR(rockchip->rst_gpio);
+
+	return 0;
+}
+
+static int rockchip_pcie_phy_init(struct rockchip_pcie *rockchip)
+{
+	struct device *dev = rockchip->pci.dev;
+	int ret;
+
+	rockchip->phy = devm_phy_get(dev, "pcie-phy");
+	if (IS_ERR(rockchip->phy))
+		return dev_err_probe(dev, PTR_ERR(rockchip->phy),
+				     "missing PHY\n");
+
+	ret = phy_init(rockchip->phy);
+	if (ret < 0)
+		return ret;
+
+	ret = phy_power_on(rockchip->phy);
+	if (ret)
+		phy_exit(rockchip->phy);
+
+	return ret;
+}
+
+static void rockchip_pcie_phy_deinit(struct rockchip_pcie *rockchip)
+{
+	phy_exit(rockchip->phy);
+	phy_power_off(rockchip->phy);
+}
+
+static int rockchip_pcie_reset_control_release(struct rockchip_pcie *rockchip)
+{
+	struct device *dev = rockchip->pci.dev;
+
+	rockchip->rst = devm_reset_control_array_get_exclusive(dev);
+	if (IS_ERR(rockchip->rst))
+		return dev_err_probe(dev, PTR_ERR(rockchip->rst),
+				     "failed to get reset lines\n");
+
+	return reset_control_deassert(rockchip->rst);
+}
+
+static const struct dw_pcie_ops dw_pcie_ops = {
+	.link_up = rockchip_pcie_link_up,
+	.start_link = rockchip_pcie_start_link,
+};
+
+static int rockchip_pcie_probe(struct platform_device *pdev)
+{
+	struct device *dev = &pdev->dev;
+	struct rockchip_pcie *rockchip;
+	struct pcie_port *pp;
+	int ret;
+
+	rockchip = devm_kzalloc(dev, sizeof(*rockchip), GFP_KERNEL);
+	if (!rockchip)
+		return -ENOMEM;
+
+	platform_set_drvdata(pdev, rockchip);
+
+	rockchip->pci.dev = dev;
+	rockchip->pci.ops = &dw_pcie_ops;
+
+	pp = &rockchip->pci.pp;
+	pp->ops = &rockchip_pcie_host_ops;
+
+	ret = rockchip_pcie_resource_get(pdev, rockchip);
+	if (ret)
+		return ret;
+
+	/* DON'T MOVE ME: must be enable before PHY init */
+	rockchip->vpcie3v3 = devm_regulator_get_optional(dev, "vpcie3v3");
+	if (IS_ERR(rockchip->vpcie3v3)) {
+		if (PTR_ERR(rockchip->vpcie3v3) != -ENODEV)
+			return dev_err_probe(dev, PTR_ERR(rockchip->vpcie3v3),
+					"failed to get vpcie3v3 regulator\n");
+		rockchip->vpcie3v3 = NULL;
+	} else {
+		ret = regulator_enable(rockchip->vpcie3v3);
+		if (ret) {
+			dev_err(dev, "failed to enable vpcie3v3 regulator\n");
+			return ret;
+		}
+	}
+
+	ret = rockchip_pcie_phy_init(rockchip);
+	if (ret)
+		goto disable_regulator;
+
+	ret = rockchip_pcie_reset_control_release(rockchip);
+	if (ret)
+		goto deinit_phy;
+
+	ret = rockchip_pcie_clk_init(rockchip);
+	if (ret)
+		goto deinit_phy;
+
+	ret = dw_pcie_host_init(pp);
+	if (!ret)
+		return 0;
+
+	clk_bulk_disable_unprepare(rockchip->clk_cnt, rockchip->clks);
+deinit_phy:
+	rockchip_pcie_phy_deinit(rockchip);
+disable_regulator:
+	if (rockchip->vpcie3v3)
+		regulator_disable(rockchip->vpcie3v3);
+
+	return ret;
+}
+
+static const struct of_device_id rockchip_pcie_of_match[] = {
+	{ .compatible = "rockchip,rk3568-pcie", },
+	{},
+};
+
+static struct platform_driver rockchip_pcie_driver = {
+	.driver = {
+		.name	= "rockchip-dw-pcie",
+		.of_match_table = rockchip_pcie_of_match,
+		.suppress_bind_attrs = true,
+	},
+	.probe = rockchip_pcie_probe,
+};
+builtin_platform_driver(rockchip_pcie_driver);
diff --git a/drivers/pci/controller/dwc/pcie-keembay.c b/drivers/pci/controller/dwc/pcie-keembay.c
new file mode 100644
index 000000000000..1ac29a6eef22
--- /dev/null
+++ b/drivers/pci/controller/dwc/pcie-keembay.c
@@ -0,0 +1,460 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * PCIe controller driver for Intel Keem Bay
+ * Copyright (C) 2020 Intel Corporation
+ */
+
+#include <linux/bitfield.h>
+#include <linux/bits.h>
+#include <linux/clk.h>
+#include <linux/delay.h>
+#include <linux/err.h>
+#include <linux/gpio/consumer.h>
+#include <linux/init.h>
+#include <linux/iopoll.h>
+#include <linux/irqchip/chained_irq.h>
+#include <linux/kernel.h>
+#include <linux/mod_devicetable.h>
+#include <linux/pci.h>
+#include <linux/platform_device.h>
+#include <linux/property.h>
+
+#include "pcie-designware.h"
+
+/* PCIE_REGS_APB_SLV Registers */
+#define PCIE_REGS_PCIE_CFG		0x0004
+#define  PCIE_DEVICE_TYPE		BIT(8)
+#define  PCIE_RSTN			BIT(0)
+#define PCIE_REGS_PCIE_APP_CNTRL	0x0008
+#define  APP_LTSSM_ENABLE		BIT(0)
+#define PCIE_REGS_INTERRUPT_ENABLE	0x0028
+#define  MSI_CTRL_INT_EN		BIT(8)
+#define  EDMA_INT_EN			GENMASK(7, 0)
+#define PCIE_REGS_INTERRUPT_STATUS	0x002c
+#define  MSI_CTRL_INT			BIT(8)
+#define PCIE_REGS_PCIE_SII_PM_STATE	0x00b0
+#define  SMLH_LINK_UP			BIT(19)
+#define  RDLH_LINK_UP			BIT(8)
+#define  PCIE_REGS_PCIE_SII_LINK_UP	(SMLH_LINK_UP | RDLH_LINK_UP)
+#define PCIE_REGS_PCIE_PHY_CNTL		0x0164
+#define  PHY0_SRAM_BYPASS		BIT(8)
+#define PCIE_REGS_PCIE_PHY_STAT		0x0168
+#define  PHY0_MPLLA_STATE		BIT(1)
+#define PCIE_REGS_LJPLL_STA		0x016c
+#define  LJPLL_LOCK			BIT(0)
+#define PCIE_REGS_LJPLL_CNTRL_0		0x0170
+#define  LJPLL_EN			BIT(29)
+#define  LJPLL_FOUT_EN			GENMASK(24, 21)
+#define PCIE_REGS_LJPLL_CNTRL_2		0x0178
+#define  LJPLL_REF_DIV			GENMASK(17, 12)
+#define  LJPLL_FB_DIV			GENMASK(11, 0)
+#define PCIE_REGS_LJPLL_CNTRL_3		0x017c
+#define  LJPLL_POST_DIV3A		GENMASK(24, 22)
+#define  LJPLL_POST_DIV2A		GENMASK(18, 16)
+
+#define PERST_DELAY_US		1000
+#define AUX_CLK_RATE_HZ		24000000
+
+struct keembay_pcie {
+	struct dw_pcie		pci;
+	void __iomem		*apb_base;
+	enum dw_pcie_device_mode mode;
+
+	struct clk		*clk_master;
+	struct clk		*clk_aux;
+	struct gpio_desc	*reset;
+};
+
+struct keembay_pcie_of_data {
+	enum dw_pcie_device_mode mode;
+};
+
+static void keembay_ep_reset_assert(struct keembay_pcie *pcie)
+{
+	gpiod_set_value_cansleep(pcie->reset, 1);
+	usleep_range(PERST_DELAY_US, PERST_DELAY_US + 500);
+}
+
+static void keembay_ep_reset_deassert(struct keembay_pcie *pcie)
+{
+	/*
+	 * Ensure that PERST# is asserted for a minimum of 100ms.
+	 *
+	 * For more details, refer to PCI Express Card Electromechanical
+	 * Specification Revision 1.1, Table-2.4.
+	 */
+	msleep(100);
+
+	gpiod_set_value_cansleep(pcie->reset, 0);
+	usleep_range(PERST_DELAY_US, PERST_DELAY_US + 500);
+}
+
+static void keembay_pcie_ltssm_set(struct keembay_pcie *pcie, bool enable)
+{
+	u32 val;
+
+	val = readl(pcie->apb_base + PCIE_REGS_PCIE_APP_CNTRL);
+	if (enable)
+		val |= APP_LTSSM_ENABLE;
+	else
+		val &= ~APP_LTSSM_ENABLE;
+	writel(val, pcie->apb_base + PCIE_REGS_PCIE_APP_CNTRL);
+}
+
+static int keembay_pcie_link_up(struct dw_pcie *pci)
+{
+	struct keembay_pcie *pcie = dev_get_drvdata(pci->dev);
+	u32 val;
+
+	val = readl(pcie->apb_base + PCIE_REGS_PCIE_SII_PM_STATE);
+
+	return (val & PCIE_REGS_PCIE_SII_LINK_UP) == PCIE_REGS_PCIE_SII_LINK_UP;
+}
+
+static int keembay_pcie_start_link(struct dw_pcie *pci)
+{
+	struct keembay_pcie *pcie = dev_get_drvdata(pci->dev);
+	u32 val;
+	int ret;
+
+	if (pcie->mode == DW_PCIE_EP_TYPE)
+		return 0;
+
+	keembay_pcie_ltssm_set(pcie, false);
+
+	ret = readl_poll_timeout(pcie->apb_base + PCIE_REGS_PCIE_PHY_STAT,
+				 val, val & PHY0_MPLLA_STATE, 20,
+				 500 * USEC_PER_MSEC);
+	if (ret) {
+		dev_err(pci->dev, "MPLLA is not locked\n");
+		return ret;
+	}
+
+	keembay_pcie_ltssm_set(pcie, true);
+
+	return 0;
+}
+
+static void keembay_pcie_stop_link(struct dw_pcie *pci)
+{
+	struct keembay_pcie *pcie = dev_get_drvdata(pci->dev);
+
+	keembay_pcie_ltssm_set(pcie, false);
+}
+
+static const struct dw_pcie_ops keembay_pcie_ops = {
+	.link_up	= keembay_pcie_link_up,
+	.start_link	= keembay_pcie_start_link,
+	.stop_link	= keembay_pcie_stop_link,
+};
+
+static inline struct clk *keembay_pcie_probe_clock(struct device *dev,
+						   const char *id, u64 rate)
+{
+	struct clk *clk;
+	int ret;
+
+	clk = devm_clk_get(dev, id);
+	if (IS_ERR(clk))
+		return clk;
+
+	if (rate) {
+		ret = clk_set_rate(clk, rate);
+		if (ret)
+			return ERR_PTR(ret);
+	}
+
+	ret = clk_prepare_enable(clk);
+	if (ret)
+		return ERR_PTR(ret);
+
+	ret = devm_add_action_or_reset(dev,
+				       (void(*)(void *))clk_disable_unprepare,
+				       clk);
+	if (ret)
+		return ERR_PTR(ret);
+
+	return clk;
+}
+
+static int keembay_pcie_probe_clocks(struct keembay_pcie *pcie)
+{
+	struct dw_pcie *pci = &pcie->pci;
+	struct device *dev = pci->dev;
+
+	pcie->clk_master = keembay_pcie_probe_clock(dev, "master", 0);
+	if (IS_ERR(pcie->clk_master))
+		return dev_err_probe(dev, PTR_ERR(pcie->clk_master),
+				     "Failed to enable master clock");
+
+	pcie->clk_aux = keembay_pcie_probe_clock(dev, "aux", AUX_CLK_RATE_HZ);
+	if (IS_ERR(pcie->clk_aux))
+		return dev_err_probe(dev, PTR_ERR(pcie->clk_aux),
+				     "Failed to enable auxiliary clock");
+
+	return 0;
+}
+
+/*
+ * Initialize the internal PCIe PLL in Host mode.
+ * See the following sections in Keem Bay data book,
+ * (1) 6.4.6.1 PCIe Subsystem Example Initialization,
+ * (2) 6.8 PCIe Low Jitter PLL for Ref Clk Generation.
+ */
+static int keembay_pcie_pll_init(struct keembay_pcie *pcie)
+{
+	struct dw_pcie *pci = &pcie->pci;
+	u32 val;
+	int ret;
+
+	val = FIELD_PREP(LJPLL_REF_DIV, 0) | FIELD_PREP(LJPLL_FB_DIV, 0x32);
+	writel(val, pcie->apb_base + PCIE_REGS_LJPLL_CNTRL_2);
+
+	val = FIELD_PREP(LJPLL_POST_DIV3A, 0x2) |
+		FIELD_PREP(LJPLL_POST_DIV2A, 0x2);
+	writel(val, pcie->apb_base + PCIE_REGS_LJPLL_CNTRL_3);
+
+	val = FIELD_PREP(LJPLL_EN, 0x1) | FIELD_PREP(LJPLL_FOUT_EN, 0xc);
+	writel(val, pcie->apb_base + PCIE_REGS_LJPLL_CNTRL_0);
+
+	ret = readl_poll_timeout(pcie->apb_base + PCIE_REGS_LJPLL_STA,
+				 val, val & LJPLL_LOCK, 20,
+				 500 * USEC_PER_MSEC);
+	if (ret)
+		dev_err(pci->dev, "Low jitter PLL is not locked\n");
+
+	return ret;
+}
+
+static void keembay_pcie_msi_irq_handler(struct irq_desc *desc)
+{
+	struct keembay_pcie *pcie = irq_desc_get_handler_data(desc);
+	struct irq_chip *chip = irq_desc_get_chip(desc);
+	u32 val, mask, status;
+	struct pcie_port *pp;
+
+	/*
+	 * Keem Bay PCIe Controller provides an additional IP logic on top of
+	 * standard DWC IP to clear MSI IRQ by writing '1' to the respective
+	 * bit of the status register.
+	 *
+	 * So, a chained irq handler is defined to handle this additional
+	 * IP logic.
+	 */
+
+	chained_irq_enter(chip, desc);
+
+	pp = &pcie->pci.pp;
+	val = readl(pcie->apb_base + PCIE_REGS_INTERRUPT_STATUS);
+	mask = readl(pcie->apb_base + PCIE_REGS_INTERRUPT_ENABLE);
+
+	status = val & mask;
+
+	if (status & MSI_CTRL_INT) {
+		dw_handle_msi_irq(pp);
+		writel(status, pcie->apb_base + PCIE_REGS_INTERRUPT_STATUS);
+	}
+
+	chained_irq_exit(chip, desc);
+}
+
+static int keembay_pcie_setup_msi_irq(struct keembay_pcie *pcie)
+{
+	struct dw_pcie *pci = &pcie->pci;
+	struct device *dev = pci->dev;
+	struct platform_device *pdev = to_platform_device(dev);
+	int irq;
+
+	irq = platform_get_irq_byname(pdev, "pcie");
+	if (irq < 0)
+		return irq;
+
+	irq_set_chained_handler_and_data(irq, keembay_pcie_msi_irq_handler,
+					 pcie);
+
+	return 0;
+}
+
+static void keembay_pcie_ep_init(struct dw_pcie_ep *ep)
+{
+	struct dw_pcie *pci = to_dw_pcie_from_ep(ep);
+	struct keembay_pcie *pcie = dev_get_drvdata(pci->dev);
+
+	writel(EDMA_INT_EN, pcie->apb_base + PCIE_REGS_INTERRUPT_ENABLE);
+}
+
+static int keembay_pcie_ep_raise_irq(struct dw_pcie_ep *ep, u8 func_no,
+				     enum pci_epc_irq_type type,
+				     u16 interrupt_num)
+{
+	struct dw_pcie *pci = to_dw_pcie_from_ep(ep);
+
+	switch (type) {
+	case PCI_EPC_IRQ_LEGACY:
+		/* Legacy interrupts are not supported in Keem Bay */
+		dev_err(pci->dev, "Legacy IRQ is not supported\n");
+		return -EINVAL;
+	case PCI_EPC_IRQ_MSI:
+		return dw_pcie_ep_raise_msi_irq(ep, func_no, interrupt_num);
+	case PCI_EPC_IRQ_MSIX:
+		return dw_pcie_ep_raise_msix_irq(ep, func_no, interrupt_num);
+	default:
+		dev_err(pci->dev, "Unknown IRQ type %d\n", type);
+		return -EINVAL;
+	}
+}
+
+static const struct pci_epc_features keembay_pcie_epc_features = {
+	.linkup_notifier	= false,
+	.msi_capable		= true,
+	.msix_capable		= true,
+	.reserved_bar		= BIT(BAR_1) | BIT(BAR_3) | BIT(BAR_5),
+	.bar_fixed_64bit	= BIT(BAR_0) | BIT(BAR_2) | BIT(BAR_4),
+	.align			= SZ_16K,
+};
+
+static const struct pci_epc_features *
+keembay_pcie_get_features(struct dw_pcie_ep *ep)
+{
+	return &keembay_pcie_epc_features;
+}
+
+static const struct dw_pcie_ep_ops keembay_pcie_ep_ops = {
+	.ep_init	= keembay_pcie_ep_init,
+	.raise_irq	= keembay_pcie_ep_raise_irq,
+	.get_features	= keembay_pcie_get_features,
+};
+
+static const struct dw_pcie_host_ops keembay_pcie_host_ops = {
+};
+
+static int keembay_pcie_add_pcie_port(struct keembay_pcie *pcie,
+				      struct platform_device *pdev)
+{
+	struct dw_pcie *pci = &pcie->pci;
+	struct pcie_port *pp = &pci->pp;
+	struct device *dev = &pdev->dev;
+	u32 val;
+	int ret;
+
+	pp->ops = &keembay_pcie_host_ops;
+	pp->msi_irq = -ENODEV;
+
+	ret = keembay_pcie_setup_msi_irq(pcie);
+	if (ret)
+		return ret;
+
+	pcie->reset = devm_gpiod_get(dev, "reset", GPIOD_OUT_HIGH);
+	if (IS_ERR(pcie->reset))
+		return PTR_ERR(pcie->reset);
+
+	ret = keembay_pcie_probe_clocks(pcie);
+	if (ret)
+		return ret;
+
+	val = readl(pcie->apb_base + PCIE_REGS_PCIE_PHY_CNTL);
+	val |= PHY0_SRAM_BYPASS;
+	writel(val, pcie->apb_base + PCIE_REGS_PCIE_PHY_CNTL);
+
+	writel(PCIE_DEVICE_TYPE, pcie->apb_base + PCIE_REGS_PCIE_CFG);
+
+	ret = keembay_pcie_pll_init(pcie);
+	if (ret)
+		return ret;
+
+	val = readl(pcie->apb_base + PCIE_REGS_PCIE_CFG);
+	writel(val | PCIE_RSTN, pcie->apb_base + PCIE_REGS_PCIE_CFG);
+	keembay_ep_reset_deassert(pcie);
+
+	ret = dw_pcie_host_init(pp);
+	if (ret) {
+		keembay_ep_reset_assert(pcie);
+		dev_err(dev, "Failed to initialize host: %d\n", ret);
+		return ret;
+	}
+
+	val = readl(pcie->apb_base + PCIE_REGS_INTERRUPT_ENABLE);
+	if (IS_ENABLED(CONFIG_PCI_MSI))
+		val |= MSI_CTRL_INT_EN;
+	writel(val, pcie->apb_base + PCIE_REGS_INTERRUPT_ENABLE);
+
+	return 0;
+}
+
+static int keembay_pcie_probe(struct platform_device *pdev)
+{
+	const struct keembay_pcie_of_data *data;
+	struct device *dev = &pdev->dev;
+	struct keembay_pcie *pcie;
+	struct dw_pcie *pci;
+	enum dw_pcie_device_mode mode;
+
+	data = device_get_match_data(dev);
+	if (!data)
+		return -ENODEV;
+
+	mode = (enum dw_pcie_device_mode)data->mode;
+
+	pcie = devm_kzalloc(dev, sizeof(*pcie), GFP_KERNEL);
+	if (!pcie)
+		return -ENOMEM;
+
+	pci = &pcie->pci;
+	pci->dev = dev;
+	pci->ops = &keembay_pcie_ops;
+
+	pcie->mode = mode;
+
+	pcie->apb_base = devm_platform_ioremap_resource_byname(pdev, "apb");
+	if (IS_ERR(pcie->apb_base))
+		return PTR_ERR(pcie->apb_base);
+
+	platform_set_drvdata(pdev, pcie);
+
+	switch (pcie->mode) {
+	case DW_PCIE_RC_TYPE:
+		if (!IS_ENABLED(CONFIG_PCIE_KEEMBAY_HOST))
+			return -ENODEV;
+
+		return keembay_pcie_add_pcie_port(pcie, pdev);
+	case DW_PCIE_EP_TYPE:
+		if (!IS_ENABLED(CONFIG_PCIE_KEEMBAY_EP))
+			return -ENODEV;
+
+		pci->ep.ops = &keembay_pcie_ep_ops;
+		return dw_pcie_ep_init(&pci->ep);
+	default:
+		dev_err(dev, "Invalid device type %d\n", pcie->mode);
+		return -ENODEV;
+	}
+}
+
+static const struct keembay_pcie_of_data keembay_pcie_rc_of_data = {
+	.mode = DW_PCIE_RC_TYPE,
+};
+
+static const struct keembay_pcie_of_data keembay_pcie_ep_of_data = {
+	.mode = DW_PCIE_EP_TYPE,
+};
+
+static const struct of_device_id keembay_pcie_of_match[] = {
+	{
+		.compatible = "intel,keembay-pcie",
+		.data = &keembay_pcie_rc_of_data,
+	},
+	{
+		.compatible = "intel,keembay-pcie-ep",
+		.data = &keembay_pcie_ep_of_data,
+	},
+	{}
+};
+
+static struct platform_driver keembay_pcie_driver = {
+	.driver = {
+		.name = "keembay-pcie",
+		.of_match_table = keembay_pcie_of_match,
+		.suppress_bind_attrs = true,
+	},
+	.probe  = keembay_pcie_probe,
+};
+builtin_platform_driver(keembay_pcie_driver);
diff --git a/drivers/pci/controller/dwc/pcie-tegra194.c b/drivers/pci/controller/dwc/pcie-tegra194.c
index 3ec7b29d5dc7..904976913081 100644
--- a/drivers/pci/controller/dwc/pcie-tegra194.c
+++ b/drivers/pci/controller/dwc/pcie-tegra194.c
@@ -497,19 +497,19 @@ static irqreturn_t tegra_pcie_ep_hard_irq(int irq, void *arg)
 	struct tegra_pcie_dw *pcie = arg;
 	struct dw_pcie_ep *ep = &pcie->pci.ep;
 	int spurious = 1;
-	u32 val, tmp;
+	u32 status_l0, status_l1, link_status;
 
-	val = appl_readl(pcie, APPL_INTR_STATUS_L0);
-	if (val & APPL_INTR_STATUS_L0_LINK_STATE_INT) {
-		val = appl_readl(pcie, APPL_INTR_STATUS_L1_0_0);
-		appl_writel(pcie, val, APPL_INTR_STATUS_L1_0_0);
+	status_l0 = appl_readl(pcie, APPL_INTR_STATUS_L0);
+	if (status_l0 & APPL_INTR_STATUS_L0_LINK_STATE_INT) {
+		status_l1 = appl_readl(pcie, APPL_INTR_STATUS_L1_0_0);
+		appl_writel(pcie, status_l1, APPL_INTR_STATUS_L1_0_0);
 
-		if (val & APPL_INTR_STATUS_L1_0_0_HOT_RESET_DONE)
+		if (status_l1 & APPL_INTR_STATUS_L1_0_0_HOT_RESET_DONE)
 			pex_ep_event_hot_rst_done(pcie);
 
-		if (val & APPL_INTR_STATUS_L1_0_0_RDLH_LINK_UP_CHGED) {
-			tmp = appl_readl(pcie, APPL_LINK_STATUS);
-			if (tmp & APPL_LINK_STATUS_RDLH_LINK_UP) {
+		if (status_l1 & APPL_INTR_STATUS_L1_0_0_RDLH_LINK_UP_CHGED) {
+			link_status = appl_readl(pcie, APPL_LINK_STATUS);
+			if (link_status & APPL_LINK_STATUS_RDLH_LINK_UP) {
 				dev_dbg(pcie->dev, "Link is up with Host\n");
 				dw_pcie_ep_linkup(ep);
 			}
@@ -518,11 +518,11 @@ static irqreturn_t tegra_pcie_ep_hard_irq(int irq, void *arg)
 		spurious = 0;
 	}
 
-	if (val & APPL_INTR_STATUS_L0_PCI_CMD_EN_INT) {
-		val = appl_readl(pcie, APPL_INTR_STATUS_L1_15);
-		appl_writel(pcie, val, APPL_INTR_STATUS_L1_15);
+	if (status_l0 & APPL_INTR_STATUS_L0_PCI_CMD_EN_INT) {
+		status_l1 = appl_readl(pcie, APPL_INTR_STATUS_L1_15);
+		appl_writel(pcie, status_l1, APPL_INTR_STATUS_L1_15);
 
-		if (val & APPL_INTR_STATUS_L1_15_CFG_BME_CHGED)
+		if (status_l1 & APPL_INTR_STATUS_L1_15_CFG_BME_CHGED)
 			return IRQ_WAKE_THREAD;
 
 		spurious = 0;
@@ -530,8 +530,8 @@ static irqreturn_t tegra_pcie_ep_hard_irq(int irq, void *arg)
 
 	if (spurious) {
 		dev_warn(pcie->dev, "Random interrupt (STATUS = 0x%08X)\n",
-			 val);
-		appl_writel(pcie, val, APPL_INTR_STATUS_L0);
+			 status_l0);
+		appl_writel(pcie, status_l0, APPL_INTR_STATUS_L0);
 	}
 
 	return IRQ_HANDLED;
@@ -1493,6 +1493,16 @@ static void tegra_pcie_dw_pme_turnoff(struct tegra_pcie_dw *pcie)
 		return;
 	}
 
+	/*
+	 * PCIe controller exits from L2 only if reset is applied, so
+	 * controller doesn't handle interrupts. But in cases where
+	 * L2 entry fails, PERST# is asserted which can trigger surprise
+	 * link down AER. However this function call happens in
+	 * suspend_noirq(), so AER interrupt will not be processed.
+	 * Disable all interrupts to avoid such a scenario.
+	 */
+	appl_writel(pcie, 0x0, APPL_INTR_EN_L0_0);
+
 	if (tegra_pcie_try_link_l2(pcie)) {
 		dev_info(pcie->dev, "Link didn't transition to L2 state\n");
 		/*
@@ -1763,7 +1773,7 @@ static void pex_ep_event_pex_rst_deassert(struct tegra_pcie_dw *pcie)
 	val = (ep->msi_mem_phys & MSIX_ADDR_MATCH_LOW_OFF_MASK);
 	val |= MSIX_ADDR_MATCH_LOW_OFF_EN;
 	dw_pcie_writel_dbi(pci, MSIX_ADDR_MATCH_LOW_OFF, val);
-	val = (lower_32_bits(ep->msi_mem_phys) & MSIX_ADDR_MATCH_HIGH_OFF_MASK);
+	val = (upper_32_bits(ep->msi_mem_phys) & MSIX_ADDR_MATCH_HIGH_OFF_MASK);
 	dw_pcie_writel_dbi(pci, MSIX_ADDR_MATCH_HIGH_OFF, val);
 
 	ret = dw_pcie_ep_init_complete(ep);
@@ -1935,13 +1945,6 @@ static int tegra_pcie_config_ep(struct tegra_pcie_dw *pcie,
 		return ret;
 	}
 
-	name = devm_kasprintf(dev, GFP_KERNEL, "tegra_pcie_%u_ep_work",
-			      pcie->cid);
-	if (!name) {
-		dev_err(dev, "Failed to create PCIe EP work thread string\n");
-		return -ENOMEM;
-	}
-
 	pm_runtime_enable(dev);
 
 	ret = dw_pcie_ep_init(ep);
@@ -2236,6 +2239,11 @@ static int tegra_pcie_dw_resume_early(struct device *dev)
 	struct tegra_pcie_dw *pcie = dev_get_drvdata(dev);
 	u32 val;
 
+	if (pcie->mode == DW_PCIE_EP_TYPE) {
+		dev_err(dev, "Suspend is not supported in EP mode");
+		return -ENOTSUPP;
+	}
+
 	if (!pcie->link_state)
 		return 0;
 
diff --git a/drivers/pci/controller/dwc/pcie-uniphier.c b/drivers/pci/controller/dwc/pcie-uniphier.c
index 7e8bad326770..d842fd018129 100644
--- a/drivers/pci/controller/dwc/pcie-uniphier.c
+++ b/drivers/pci/controller/dwc/pcie-uniphier.c
@@ -235,7 +235,7 @@ static void uniphier_pcie_irq_handler(struct irq_desc *desc)
 	struct uniphier_pcie_priv *priv = to_uniphier_pcie(pci);
 	struct irq_chip *chip = irq_desc_get_chip(desc);
 	unsigned long reg;
-	u32 val, bit, virq;
+	u32 val, bit;
 
 	/* INT for debug */
 	val = readl(priv->base + PCL_RCV_INT);
@@ -257,10 +257,8 @@ static void uniphier_pcie_irq_handler(struct irq_desc *desc)
 	val = readl(priv->base + PCL_RCV_INTX);
 	reg = FIELD_GET(PCL_RCV_INTX_ALL_STATUS, val);
 
-	for_each_set_bit(bit, &reg, PCI_NUM_INTX) {
-		virq = irq_linear_revmap(priv->legacy_irq_domain, bit);
-		generic_handle_irq(virq);
-	}
+	for_each_set_bit(bit, &reg, PCI_NUM_INTX)
+		generic_handle_domain_irq(priv->legacy_irq_domain, bit);
 
 	chained_irq_exit(chip, desc);
 }
diff --git a/drivers/pci/controller/dwc/pcie-visconti.c b/drivers/pci/controller/dwc/pcie-visconti.c
new file mode 100644
index 000000000000..a88eab6829bb
--- /dev/null
+++ b/drivers/pci/controller/dwc/pcie-visconti.c
@@ -0,0 +1,332 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * DWC PCIe RC driver for Toshiba Visconti ARM SoC
+ *
+ * Copyright (C) 2021 Toshiba Electronic Device & Storage Corporation
+ * Copyright (C) 2021 TOSHIBA CORPORATION
+ *
+ * Nobuhiro Iwamatsu <nobuhiro1.iwamatsu@toshiba.co.jp>
+ */
+
+#include <linux/clk.h>
+#include <linux/delay.h>
+#include <linux/gpio.h>
+#include <linux/interrupt.h>
+#include <linux/init.h>
+#include <linux/iopoll.h>
+#include <linux/kernel.h>
+#include <linux/of_platform.h>
+#include <linux/pci.h>
+#include <linux/platform_device.h>
+#include <linux/resource.h>
+#include <linux/types.h>
+
+#include "pcie-designware.h"
+#include "../../pci.h"
+
+struct visconti_pcie {
+	struct dw_pcie pci;
+	void __iomem *ulreg_base;
+	void __iomem *smu_base;
+	void __iomem *mpu_base;
+	struct clk *refclk;
+	struct clk *coreclk;
+	struct clk *auxclk;
+};
+
+#define PCIE_UL_REG_S_PCIE_MODE		0x00F4
+#define  PCIE_UL_REG_S_PCIE_MODE_EP	0x00
+#define  PCIE_UL_REG_S_PCIE_MODE_RC	0x04
+
+#define PCIE_UL_REG_S_PERSTN_CTRL	0x00F8
+#define  PCIE_UL_IOM_PCIE_PERSTN_I_EN	BIT(3)
+#define  PCIE_UL_DIRECT_PERSTN_EN	BIT(2)
+#define  PCIE_UL_PERSTN_OUT		BIT(1)
+#define  PCIE_UL_DIRECT_PERSTN		BIT(0)
+#define  PCIE_UL_REG_S_PERSTN_CTRL_INIT	(PCIE_UL_IOM_PCIE_PERSTN_I_EN | \
+					 PCIE_UL_DIRECT_PERSTN_EN | \
+					 PCIE_UL_DIRECT_PERSTN)
+
+#define PCIE_UL_REG_S_PHY_INIT_02	0x0104
+#define  PCIE_UL_PHY0_SRAM_EXT_LD_DONE	BIT(0)
+
+#define PCIE_UL_REG_S_PHY_INIT_03	0x0108
+#define  PCIE_UL_PHY0_SRAM_INIT_DONE	BIT(0)
+
+#define PCIE_UL_REG_S_INT_EVENT_MASK1	0x0138
+#define  PCIE_UL_CFG_PME_INT		BIT(0)
+#define  PCIE_UL_CFG_LINK_EQ_REQ_INT	BIT(1)
+#define  PCIE_UL_EDMA_INT0		BIT(2)
+#define  PCIE_UL_EDMA_INT1		BIT(3)
+#define  PCIE_UL_EDMA_INT2		BIT(4)
+#define  PCIE_UL_EDMA_INT3		BIT(5)
+#define  PCIE_UL_S_INT_EVENT_MASK1_ALL  (PCIE_UL_CFG_PME_INT | \
+					 PCIE_UL_CFG_LINK_EQ_REQ_INT | \
+					 PCIE_UL_EDMA_INT0 | \
+					 PCIE_UL_EDMA_INT1 | \
+					 PCIE_UL_EDMA_INT2 | \
+					 PCIE_UL_EDMA_INT3)
+
+#define PCIE_UL_REG_S_SB_MON		0x0198
+#define PCIE_UL_REG_S_SIG_MON		0x019C
+#define  PCIE_UL_CORE_RST_N_MON		BIT(0)
+
+#define PCIE_UL_REG_V_SII_DBG_00	0x0844
+#define PCIE_UL_REG_V_SII_GEN_CTRL_01	0x0860
+#define  PCIE_UL_APP_LTSSM_ENABLE	BIT(0)
+
+#define PCIE_UL_REG_V_PHY_ST_00		0x0864
+#define  PCIE_UL_SMLH_LINK_UP		BIT(0)
+
+#define PCIE_UL_REG_V_PHY_ST_02		0x0868
+#define  PCIE_UL_S_DETECT_ACT		0x01
+#define  PCIE_UL_S_L0			0x11
+
+#define PISMU_CKON_PCIE			0x0038
+#define  PISMU_CKON_PCIE_AUX_CLK	BIT(1)
+#define  PISMU_CKON_PCIE_MSTR_ACLK	BIT(0)
+
+#define PISMU_RSOFF_PCIE		0x0538
+#define  PISMU_RSOFF_PCIE_ULREG_RST_N	BIT(1)
+#define  PISMU_RSOFF_PCIE_PWR_UP_RST_N	BIT(0)
+
+#define PCIE_MPU_REG_MP_EN		0x0
+#define  MPU_MP_EN_DISABLE		BIT(0)
+
+/* Access registers in PCIe ulreg */
+static void visconti_ulreg_writel(struct visconti_pcie *pcie, u32 val, u32 reg)
+{
+	writel_relaxed(val, pcie->ulreg_base + reg);
+}
+
+static u32 visconti_ulreg_readl(struct visconti_pcie *pcie, u32 reg)
+{
+	return readl_relaxed(pcie->ulreg_base + reg);
+}
+
+/* Access registers in PCIe smu */
+static void visconti_smu_writel(struct visconti_pcie *pcie, u32 val, u32 reg)
+{
+	writel_relaxed(val, pcie->smu_base + reg);
+}
+
+/* Access registers in PCIe mpu */
+static void visconti_mpu_writel(struct visconti_pcie *pcie, u32 val, u32 reg)
+{
+	writel_relaxed(val, pcie->mpu_base + reg);
+}
+
+static u32 visconti_mpu_readl(struct visconti_pcie *pcie, u32 reg)
+{
+	return readl_relaxed(pcie->mpu_base + reg);
+}
+
+static int visconti_pcie_link_up(struct dw_pcie *pci)
+{
+	struct visconti_pcie *pcie = dev_get_drvdata(pci->dev);
+	void __iomem *addr = pcie->ulreg_base;
+	u32 val = readl_relaxed(addr + PCIE_UL_REG_V_PHY_ST_02);
+
+	return !!(val & PCIE_UL_S_L0);
+}
+
+static int visconti_pcie_start_link(struct dw_pcie *pci)
+{
+	struct visconti_pcie *pcie = dev_get_drvdata(pci->dev);
+	void __iomem *addr = pcie->ulreg_base;
+	u32 val;
+	int ret;
+
+	visconti_ulreg_writel(pcie, PCIE_UL_APP_LTSSM_ENABLE,
+			      PCIE_UL_REG_V_SII_GEN_CTRL_01);
+
+	ret = readl_relaxed_poll_timeout(addr + PCIE_UL_REG_V_PHY_ST_02,
+					 val, (val & PCIE_UL_S_L0),
+					 90000, 100000);
+	if (ret)
+		return ret;
+
+	visconti_ulreg_writel(pcie, PCIE_UL_S_INT_EVENT_MASK1_ALL,
+			      PCIE_UL_REG_S_INT_EVENT_MASK1);
+
+	if (dw_pcie_link_up(pci)) {
+		val = visconti_mpu_readl(pcie, PCIE_MPU_REG_MP_EN);
+		visconti_mpu_writel(pcie, val & ~MPU_MP_EN_DISABLE,
+				    PCIE_MPU_REG_MP_EN);
+	}
+
+	return 0;
+}
+
+static void visconti_pcie_stop_link(struct dw_pcie *pci)
+{
+	struct visconti_pcie *pcie = dev_get_drvdata(pci->dev);
+	u32 val;
+
+	val = visconti_ulreg_readl(pcie, PCIE_UL_REG_V_SII_GEN_CTRL_01);
+	val &= ~PCIE_UL_APP_LTSSM_ENABLE;
+	visconti_ulreg_writel(pcie, val, PCIE_UL_REG_V_SII_GEN_CTRL_01);
+
+	val = visconti_mpu_readl(pcie, PCIE_MPU_REG_MP_EN);
+	visconti_mpu_writel(pcie, val | MPU_MP_EN_DISABLE, PCIE_MPU_REG_MP_EN);
+}
+
+/*
+ * In this SoC specification, the CPU bus outputs the offset value from
+ * 0x40000000 to the PCIe bus, so 0x40000000 is subtracted from the CPU
+ * bus address. This 0x40000000 is also based on io_base from DT.
+ */
+static u64 visconti_pcie_cpu_addr_fixup(struct dw_pcie *pci, u64 cpu_addr)
+{
+	struct pcie_port *pp = &pci->pp;
+
+	return cpu_addr & ~pp->io_base;
+}
+
+static const struct dw_pcie_ops dw_pcie_ops = {
+	.cpu_addr_fixup = visconti_pcie_cpu_addr_fixup,
+	.link_up = visconti_pcie_link_up,
+	.start_link = visconti_pcie_start_link,
+	.stop_link = visconti_pcie_stop_link,
+};
+
+static int visconti_pcie_host_init(struct pcie_port *pp)
+{
+	struct dw_pcie *pci = to_dw_pcie_from_pp(pp);
+	struct visconti_pcie *pcie = dev_get_drvdata(pci->dev);
+	void __iomem *addr;
+	int err;
+	u32 val;
+
+	visconti_smu_writel(pcie,
+			    PISMU_CKON_PCIE_AUX_CLK | PISMU_CKON_PCIE_MSTR_ACLK,
+			    PISMU_CKON_PCIE);
+	ndelay(250);
+
+	visconti_smu_writel(pcie, PISMU_RSOFF_PCIE_ULREG_RST_N,
+			    PISMU_RSOFF_PCIE);
+	visconti_ulreg_writel(pcie, PCIE_UL_REG_S_PCIE_MODE_RC,
+			      PCIE_UL_REG_S_PCIE_MODE);
+
+	val = PCIE_UL_REG_S_PERSTN_CTRL_INIT;
+	visconti_ulreg_writel(pcie, val, PCIE_UL_REG_S_PERSTN_CTRL);
+	udelay(100);
+
+	val |= PCIE_UL_PERSTN_OUT;
+	visconti_ulreg_writel(pcie, val, PCIE_UL_REG_S_PERSTN_CTRL);
+	udelay(100);
+
+	visconti_smu_writel(pcie, PISMU_RSOFF_PCIE_PWR_UP_RST_N,
+			    PISMU_RSOFF_PCIE);
+
+	addr = pcie->ulreg_base + PCIE_UL_REG_S_PHY_INIT_03;
+	err = readl_relaxed_poll_timeout(addr, val,
+					 (val & PCIE_UL_PHY0_SRAM_INIT_DONE),
+					 100, 1000);
+	if (err)
+		return err;
+
+	visconti_ulreg_writel(pcie, PCIE_UL_PHY0_SRAM_EXT_LD_DONE,
+			      PCIE_UL_REG_S_PHY_INIT_02);
+
+	addr = pcie->ulreg_base + PCIE_UL_REG_S_SIG_MON;
+	return readl_relaxed_poll_timeout(addr, val,
+					  (val & PCIE_UL_CORE_RST_N_MON), 100,
+					  1000);
+}
+
+static const struct dw_pcie_host_ops visconti_pcie_host_ops = {
+	.host_init = visconti_pcie_host_init,
+};
+
+static int visconti_get_resources(struct platform_device *pdev,
+				  struct visconti_pcie *pcie)
+{
+	struct device *dev = &pdev->dev;
+
+	pcie->ulreg_base = devm_platform_ioremap_resource_byname(pdev, "ulreg");
+	if (IS_ERR(pcie->ulreg_base))
+		return PTR_ERR(pcie->ulreg_base);
+
+	pcie->smu_base = devm_platform_ioremap_resource_byname(pdev, "smu");
+	if (IS_ERR(pcie->smu_base))
+		return PTR_ERR(pcie->smu_base);
+
+	pcie->mpu_base = devm_platform_ioremap_resource_byname(pdev, "mpu");
+	if (IS_ERR(pcie->mpu_base))
+		return PTR_ERR(pcie->mpu_base);
+
+	pcie->refclk = devm_clk_get(dev, "ref");
+	if (IS_ERR(pcie->refclk))
+		return dev_err_probe(dev, PTR_ERR(pcie->refclk),
+				     "Failed to get ref clock\n");
+
+	pcie->coreclk = devm_clk_get(dev, "core");
+	if (IS_ERR(pcie->coreclk))
+		return dev_err_probe(dev, PTR_ERR(pcie->coreclk),
+				     "Failed to get core clock\n");
+
+	pcie->auxclk = devm_clk_get(dev, "aux");
+	if (IS_ERR(pcie->auxclk))
+		return dev_err_probe(dev, PTR_ERR(pcie->auxclk),
+				     "Failed to get aux clock\n");
+
+	return 0;
+}
+
+static int visconti_add_pcie_port(struct visconti_pcie *pcie,
+				  struct platform_device *pdev)
+{
+	struct dw_pcie *pci = &pcie->pci;
+	struct pcie_port *pp = &pci->pp;
+	struct device *dev = &pdev->dev;
+
+	pp->irq = platform_get_irq_byname(pdev, "intr");
+	if (pp->irq < 0) {
+		dev_err(dev, "Interrupt intr is missing");
+		return pp->irq;
+	}
+
+	pp->ops = &visconti_pcie_host_ops;
+
+	return dw_pcie_host_init(pp);
+}
+
+static int visconti_pcie_probe(struct platform_device *pdev)
+{
+	struct device *dev = &pdev->dev;
+	struct visconti_pcie *pcie;
+	struct dw_pcie *pci;
+	int ret;
+
+	pcie = devm_kzalloc(dev, sizeof(*pcie), GFP_KERNEL);
+	if (!pcie)
+		return -ENOMEM;
+
+	pci = &pcie->pci;
+	pci->dev = dev;
+	pci->ops = &dw_pcie_ops;
+
+	ret = visconti_get_resources(pdev, pcie);
+	if (ret)
+		return ret;
+
+	platform_set_drvdata(pdev, pcie);
+
+	return visconti_add_pcie_port(pcie, pdev);
+}
+
+static const struct of_device_id visconti_pcie_match[] = {
+	{ .compatible = "toshiba,visconti-pcie" },
+	{},
+};
+
+static struct platform_driver visconti_pcie_driver = {
+	.probe = visconti_pcie_probe,
+	.driver = {
+		.name = "visconti-pcie",
+		.of_match_table = visconti_pcie_match,
+		.suppress_bind_attrs = true,
+	},
+};
+builtin_platform_driver(visconti_pcie_driver);
diff --git a/drivers/pci/controller/mobiveil/pcie-mobiveil-host.c b/drivers/pci/controller/mobiveil/pcie-mobiveil-host.c
index c637de3a389b..f3547aa60140 100644
--- a/drivers/pci/controller/mobiveil/pcie-mobiveil-host.c
+++ b/drivers/pci/controller/mobiveil/pcie-mobiveil-host.c
@@ -92,7 +92,7 @@ static void mobiveil_pcie_isr(struct irq_desc *desc)
 	u32 msi_data, msi_addr_lo, msi_addr_hi;
 	u32 intr_status, msi_status;
 	unsigned long shifted_status;
-	u32 bit, virq, val, mask;
+	u32 bit, val, mask;
 
 	/*
 	 * The core provides a single interrupt for both INTx/MSI messages.
@@ -114,11 +114,10 @@ static void mobiveil_pcie_isr(struct irq_desc *desc)
 		shifted_status >>= PAB_INTX_START;
 		do {
 			for_each_set_bit(bit, &shifted_status, PCI_NUM_INTX) {
-				virq = irq_find_mapping(rp->intx_domain,
-							bit + 1);
-				if (virq)
-					generic_handle_irq(virq);
-				else
+				int ret;
+				ret = generic_handle_domain_irq(rp->intx_domain,
+								bit + 1);
+				if (ret)
 					dev_err_ratelimited(dev, "unexpected IRQ, INT%d\n",
 							    bit);
 
@@ -155,9 +154,7 @@ static void mobiveil_pcie_isr(struct irq_desc *desc)
 		dev_dbg(dev, "MSI registers, data: %08x, addr: %08x:%08x\n",
 			msi_data, msi_addr_hi, msi_addr_lo);
 
-		virq = irq_find_mapping(msi->dev_domain, msi_data);
-		if (virq)
-			generic_handle_irq(virq);
+		generic_handle_domain_irq(msi->dev_domain, msi_data);
 
 		msi_status = readl_relaxed(pcie->apb_csr_base +
 					   MSI_STATUS_OFFSET);
diff --git a/drivers/pci/controller/pci-aardvark.c b/drivers/pci/controller/pci-aardvark.c
index c95ebe808f92..596ebcfcc82d 100644
--- a/drivers/pci/controller/pci-aardvark.c
+++ b/drivers/pci/controller/pci-aardvark.c
@@ -58,6 +58,7 @@
 #define   PIO_COMPLETION_STATUS_CRS		2
 #define   PIO_COMPLETION_STATUS_CA		4
 #define   PIO_NON_POSTED_REQ			BIT(10)
+#define   PIO_ERR_STATUS			BIT(11)
 #define PIO_ADDR_LS				(PIO_BASE_ADDR + 0x8)
 #define PIO_ADDR_MS				(PIO_BASE_ADDR + 0xc)
 #define PIO_WR_DATA				(PIO_BASE_ADDR + 0x10)
@@ -118,6 +119,46 @@
 #define PCIE_MSI_MASK_REG			(CONTROL_BASE_ADDR + 0x5C)
 #define PCIE_MSI_PAYLOAD_REG			(CONTROL_BASE_ADDR + 0x9C)
 
+/* PCIe window configuration */
+#define OB_WIN_BASE_ADDR			0x4c00
+#define OB_WIN_BLOCK_SIZE			0x20
+#define OB_WIN_COUNT				8
+#define OB_WIN_REG_ADDR(win, offset)		(OB_WIN_BASE_ADDR + \
+						 OB_WIN_BLOCK_SIZE * (win) + \
+						 (offset))
+#define OB_WIN_MATCH_LS(win)			OB_WIN_REG_ADDR(win, 0x00)
+#define     OB_WIN_ENABLE			BIT(0)
+#define OB_WIN_MATCH_MS(win)			OB_WIN_REG_ADDR(win, 0x04)
+#define OB_WIN_REMAP_LS(win)			OB_WIN_REG_ADDR(win, 0x08)
+#define OB_WIN_REMAP_MS(win)			OB_WIN_REG_ADDR(win, 0x0c)
+#define OB_WIN_MASK_LS(win)			OB_WIN_REG_ADDR(win, 0x10)
+#define OB_WIN_MASK_MS(win)			OB_WIN_REG_ADDR(win, 0x14)
+#define OB_WIN_ACTIONS(win)			OB_WIN_REG_ADDR(win, 0x18)
+#define OB_WIN_DEFAULT_ACTIONS			(OB_WIN_ACTIONS(OB_WIN_COUNT-1) + 0x4)
+#define     OB_WIN_FUNC_NUM_MASK		GENMASK(31, 24)
+#define     OB_WIN_FUNC_NUM_SHIFT		24
+#define     OB_WIN_FUNC_NUM_ENABLE		BIT(23)
+#define     OB_WIN_BUS_NUM_BITS_MASK		GENMASK(22, 20)
+#define     OB_WIN_BUS_NUM_BITS_SHIFT		20
+#define     OB_WIN_MSG_CODE_ENABLE		BIT(22)
+#define     OB_WIN_MSG_CODE_MASK		GENMASK(21, 14)
+#define     OB_WIN_MSG_CODE_SHIFT		14
+#define     OB_WIN_MSG_PAYLOAD_LEN		BIT(12)
+#define     OB_WIN_ATTR_ENABLE			BIT(11)
+#define     OB_WIN_ATTR_TC_MASK			GENMASK(10, 8)
+#define     OB_WIN_ATTR_TC_SHIFT		8
+#define     OB_WIN_ATTR_RELAXED			BIT(7)
+#define     OB_WIN_ATTR_NOSNOOP			BIT(6)
+#define     OB_WIN_ATTR_POISON			BIT(5)
+#define     OB_WIN_ATTR_IDO			BIT(4)
+#define     OB_WIN_TYPE_MASK			GENMASK(3, 0)
+#define     OB_WIN_TYPE_SHIFT			0
+#define     OB_WIN_TYPE_MEM			0x0
+#define     OB_WIN_TYPE_IO			0x4
+#define     OB_WIN_TYPE_CONFIG_TYPE0		0x8
+#define     OB_WIN_TYPE_CONFIG_TYPE1		0x9
+#define     OB_WIN_TYPE_MSG			0xc
+
 /* LMI registers base address and register offsets */
 #define LMI_BASE_ADDR				0x6000
 #define CFG_REG					(LMI_BASE_ADDR + 0x0)
@@ -166,7 +207,7 @@
 #define PCIE_CONFIG_WR_TYPE0			0xa
 #define PCIE_CONFIG_WR_TYPE1			0xb
 
-#define PIO_RETRY_CNT			500
+#define PIO_RETRY_CNT			750000 /* 1.5 s */
 #define PIO_RETRY_DELAY			2 /* 2 us*/
 
 #define LINK_WAIT_MAX_RETRIES		10
@@ -177,11 +218,21 @@
 
 #define MSI_IRQ_NUM			32
 
+#define CFG_RD_CRS_VAL			0xffff0001
+
 struct advk_pcie {
 	struct platform_device *pdev;
 	void __iomem *base;
+	struct {
+		phys_addr_t match;
+		phys_addr_t remap;
+		phys_addr_t mask;
+		u32 actions;
+	} wins[OB_WIN_COUNT];
+	u8 wins_count;
 	struct irq_domain *irq_domain;
 	struct irq_chip irq_chip;
+	raw_spinlock_t irq_lock;
 	struct irq_domain *msi_domain;
 	struct irq_domain *msi_inner_domain;
 	struct irq_chip msi_bottom_irq_chip;
@@ -366,9 +417,39 @@ err:
 	dev_err(dev, "link never came up\n");
 }
 
+/*
+ * Set PCIe address window register which could be used for memory
+ * mapping.
+ */
+static void advk_pcie_set_ob_win(struct advk_pcie *pcie, u8 win_num,
+				 phys_addr_t match, phys_addr_t remap,
+				 phys_addr_t mask, u32 actions)
+{
+	advk_writel(pcie, OB_WIN_ENABLE |
+			  lower_32_bits(match), OB_WIN_MATCH_LS(win_num));
+	advk_writel(pcie, upper_32_bits(match), OB_WIN_MATCH_MS(win_num));
+	advk_writel(pcie, lower_32_bits(remap), OB_WIN_REMAP_LS(win_num));
+	advk_writel(pcie, upper_32_bits(remap), OB_WIN_REMAP_MS(win_num));
+	advk_writel(pcie, lower_32_bits(mask), OB_WIN_MASK_LS(win_num));
+	advk_writel(pcie, upper_32_bits(mask), OB_WIN_MASK_MS(win_num));
+	advk_writel(pcie, actions, OB_WIN_ACTIONS(win_num));
+}
+
+static void advk_pcie_disable_ob_win(struct advk_pcie *pcie, u8 win_num)
+{
+	advk_writel(pcie, 0, OB_WIN_MATCH_LS(win_num));
+	advk_writel(pcie, 0, OB_WIN_MATCH_MS(win_num));
+	advk_writel(pcie, 0, OB_WIN_REMAP_LS(win_num));
+	advk_writel(pcie, 0, OB_WIN_REMAP_MS(win_num));
+	advk_writel(pcie, 0, OB_WIN_MASK_LS(win_num));
+	advk_writel(pcie, 0, OB_WIN_MASK_MS(win_num));
+	advk_writel(pcie, 0, OB_WIN_ACTIONS(win_num));
+}
+
 static void advk_pcie_setup_hw(struct advk_pcie *pcie)
 {
 	u32 reg;
+	int i;
 
 	/* Enable TX */
 	reg = advk_readl(pcie, PCIE_CORE_REF_CLK_REG);
@@ -447,15 +528,51 @@ static void advk_pcie_setup_hw(struct advk_pcie *pcie)
 	reg = PCIE_IRQ_ALL_MASK & (~PCIE_IRQ_ENABLE_INTS_MASK);
 	advk_writel(pcie, reg, HOST_CTRL_INT_MASK_REG);
 
+	/*
+	 * Enable AXI address window location generation:
+	 * When it is enabled, the default outbound window
+	 * configurations (Default User Field: 0xD0074CFC)
+	 * are used to transparent address translation for
+	 * the outbound transactions. Thus, PCIe address
+	 * windows are not required for transparent memory
+	 * access when default outbound window configuration
+	 * is set for memory access.
+	 */
 	reg = advk_readl(pcie, PCIE_CORE_CTRL2_REG);
 	reg |= PCIE_CORE_CTRL2_OB_WIN_ENABLE;
 	advk_writel(pcie, reg, PCIE_CORE_CTRL2_REG);
 
-	/* Bypass the address window mapping for PIO */
+	/*
+	 * Set memory access in Default User Field so it
+	 * is not required to configure PCIe address for
+	 * transparent memory access.
+	 */
+	advk_writel(pcie, OB_WIN_TYPE_MEM, OB_WIN_DEFAULT_ACTIONS);
+
+	/*
+	 * Bypass the address window mapping for PIO:
+	 * Since PIO access already contains all required
+	 * info over AXI interface by PIO registers, the
+	 * address window is not required.
+	 */
 	reg = advk_readl(pcie, PIO_CTRL);
 	reg |= PIO_CTRL_ADDR_WIN_DISABLE;
 	advk_writel(pcie, reg, PIO_CTRL);
 
+	/*
+	 * Configure PCIe address windows for non-memory or
+	 * non-transparent access as by default PCIe uses
+	 * transparent memory access.
+	 */
+	for (i = 0; i < pcie->wins_count; i++)
+		advk_pcie_set_ob_win(pcie, i,
+				     pcie->wins[i].match, pcie->wins[i].remap,
+				     pcie->wins[i].mask, pcie->wins[i].actions);
+
+	/* Disable remaining PCIe outbound windows */
+	for (i = pcie->wins_count; i < OB_WIN_COUNT; i++)
+		advk_pcie_disable_ob_win(pcie, i);
+
 	advk_pcie_train_link(pcie);
 
 	/*
@@ -472,7 +589,7 @@ static void advk_pcie_setup_hw(struct advk_pcie *pcie)
 	advk_writel(pcie, reg, PCIE_CORE_CMD_STATUS_REG);
 }
 
-static void advk_pcie_check_pio_status(struct advk_pcie *pcie)
+static int advk_pcie_check_pio_status(struct advk_pcie *pcie, bool allow_crs, u32 *val)
 {
 	struct device *dev = &pcie->pdev->dev;
 	u32 reg;
@@ -483,14 +600,70 @@ static void advk_pcie_check_pio_status(struct advk_pcie *pcie)
 	status = (reg & PIO_COMPLETION_STATUS_MASK) >>
 		PIO_COMPLETION_STATUS_SHIFT;
 
-	if (!status)
-		return;
-
+	/*
+	 * According to HW spec, the PIO status check sequence as below:
+	 * 1) even if COMPLETION_STATUS(bit9:7) indicates successful,
+	 *    it still needs to check Error Status(bit11), only when this bit
+	 *    indicates no error happen, the operation is successful.
+	 * 2) value Unsupported Request(1) of COMPLETION_STATUS(bit9:7) only
+	 *    means a PIO write error, and for PIO read it is successful with
+	 *    a read value of 0xFFFFFFFF.
+	 * 3) value Completion Retry Status(CRS) of COMPLETION_STATUS(bit9:7)
+	 *    only means a PIO write error, and for PIO read it is successful
+	 *    with a read value of 0xFFFF0001.
+	 * 4) value Completer Abort (CA) of COMPLETION_STATUS(bit9:7) means
+	 *    error for both PIO read and PIO write operation.
+	 * 5) other errors are indicated as 'unknown'.
+	 */
 	switch (status) {
+	case PIO_COMPLETION_STATUS_OK:
+		if (reg & PIO_ERR_STATUS) {
+			strcomp_status = "COMP_ERR";
+			break;
+		}
+		/* Get the read result */
+		if (val)
+			*val = advk_readl(pcie, PIO_RD_DATA);
+		/* No error */
+		strcomp_status = NULL;
+		break;
 	case PIO_COMPLETION_STATUS_UR:
 		strcomp_status = "UR";
 		break;
 	case PIO_COMPLETION_STATUS_CRS:
+		if (allow_crs && val) {
+			/* PCIe r4.0, sec 2.3.2, says:
+			 * If CRS Software Visibility is enabled:
+			 * For a Configuration Read Request that includes both
+			 * bytes of the Vendor ID field of a device Function's
+			 * Configuration Space Header, the Root Complex must
+			 * complete the Request to the host by returning a
+			 * read-data value of 0001h for the Vendor ID field and
+			 * all '1's for any additional bytes included in the
+			 * request.
+			 *
+			 * So CRS in this case is not an error status.
+			 */
+			*val = CFG_RD_CRS_VAL;
+			strcomp_status = NULL;
+			break;
+		}
+		/* PCIe r4.0, sec 2.3.2, says:
+		 * If CRS Software Visibility is not enabled, the Root Complex
+		 * must re-issue the Configuration Request as a new Request.
+		 * If CRS Software Visibility is enabled: For a Configuration
+		 * Write Request or for any other Configuration Read Request,
+		 * the Root Complex must re-issue the Configuration Request as
+		 * a new Request.
+		 * A Root Complex implementation may choose to limit the number
+		 * of Configuration Request/CRS Completion Status loops before
+		 * determining that something is wrong with the target of the
+		 * Request and taking appropriate action, e.g., complete the
+		 * Request to the host as a failed transaction.
+		 *
+		 * To simplify implementation do not re-issue the Configuration
+		 * Request and complete the Request as a failed transaction.
+		 */
 		strcomp_status = "CRS";
 		break;
 	case PIO_COMPLETION_STATUS_CA:
@@ -501,6 +674,9 @@ static void advk_pcie_check_pio_status(struct advk_pcie *pcie)
 		break;
 	}
 
+	if (!strcomp_status)
+		return 0;
+
 	if (reg & PIO_NON_POSTED_REQ)
 		str_posted = "Non-posted";
 	else
@@ -508,6 +684,8 @@ static void advk_pcie_check_pio_status(struct advk_pcie *pcie)
 
 	dev_err(dev, "%s PIO Response Status: %s, %#x @ %#x\n",
 		str_posted, strcomp_status, reg, advk_readl(pcie, PIO_ADDR_LS));
+
+	return -EFAULT;
 }
 
 static int advk_pcie_wait_pio(struct advk_pcie *pcie)
@@ -545,6 +723,7 @@ advk_pci_bridge_emul_pcie_conf_read(struct pci_bridge_emul *bridge,
 	case PCI_EXP_RTCTL: {
 		u32 val = advk_readl(pcie, PCIE_ISR0_MASK_REG);
 		*value = (val & PCIE_MSG_PM_PME_MASK) ? 0 : PCI_EXP_RTCTL_PMEIE;
+		*value |= PCI_EXP_RTCAP_CRSVIS << 16;
 		return PCI_BRIDGE_EMUL_HANDLED;
 	}
 
@@ -626,6 +805,7 @@ static struct pci_bridge_emul_ops advk_pci_bridge_emul_ops = {
 static int advk_sw_pci_bridge_init(struct advk_pcie *pcie)
 {
 	struct pci_bridge_emul *bridge = &pcie->bridge;
+	int ret;
 
 	bridge->conf.vendor =
 		cpu_to_le16(advk_readl(pcie, PCIE_CORE_DEV_ID_REG) & 0xffff);
@@ -649,7 +829,15 @@ static int advk_sw_pci_bridge_init(struct advk_pcie *pcie)
 	bridge->data = pcie;
 	bridge->ops = &advk_pci_bridge_emul_ops;
 
-	return pci_bridge_emul_init(bridge, 0);
+	/* PCIe config space can be initialized after pci_bridge_emul_init() */
+	ret = pci_bridge_emul_init(bridge, 0);
+	if (ret < 0)
+		return ret;
+
+	/* Indicates supports for Completion Retry Status */
+	bridge->pcie_conf.rootcap = cpu_to_le16(PCI_EXP_RTCAP_CRSVIS);
+
+	return 0;
 }
 
 static bool advk_pcie_valid_device(struct advk_pcie *pcie, struct pci_bus *bus,
@@ -701,6 +889,7 @@ static int advk_pcie_rd_conf(struct pci_bus *bus, u32 devfn,
 			     int where, int size, u32 *val)
 {
 	struct advk_pcie *pcie = bus->sysdata;
+	bool allow_crs;
 	u32 reg;
 	int ret;
 
@@ -713,7 +902,24 @@ static int advk_pcie_rd_conf(struct pci_bus *bus, u32 devfn,
 		return pci_bridge_emul_conf_read(&pcie->bridge, where,
 						 size, val);
 
+	/*
+	 * Completion Retry Status is possible to return only when reading all
+	 * 4 bytes from PCI_VENDOR_ID and PCI_DEVICE_ID registers at once and
+	 * CRSSVE flag on Root Bridge is enabled.
+	 */
+	allow_crs = (where == PCI_VENDOR_ID) && (size == 4) &&
+		    (le16_to_cpu(pcie->bridge.pcie_conf.rootctl) &
+		     PCI_EXP_RTCTL_CRSSVE);
+
 	if (advk_pcie_pio_is_running(pcie)) {
+		/*
+		 * If it is possible return Completion Retry Status so caller
+		 * tries to issue the request again instead of failing.
+		 */
+		if (allow_crs) {
+			*val = CFG_RD_CRS_VAL;
+			return PCIBIOS_SUCCESSFUL;
+		}
 		*val = 0xffffffff;
 		return PCIBIOS_SET_FAILED;
 	}
@@ -741,14 +947,25 @@ static int advk_pcie_rd_conf(struct pci_bus *bus, u32 devfn,
 
 	ret = advk_pcie_wait_pio(pcie);
 	if (ret < 0) {
+		/*
+		 * If it is possible return Completion Retry Status so caller
+		 * tries to issue the request again instead of failing.
+		 */
+		if (allow_crs) {
+			*val = CFG_RD_CRS_VAL;
+			return PCIBIOS_SUCCESSFUL;
+		}
 		*val = 0xffffffff;
 		return PCIBIOS_SET_FAILED;
 	}
 
-	advk_pcie_check_pio_status(pcie);
+	/* Check PIO status and get the read result */
+	ret = advk_pcie_check_pio_status(pcie, allow_crs, val);
+	if (ret < 0) {
+		*val = 0xffffffff;
+		return PCIBIOS_SET_FAILED;
+	}
 
-	/* Get the read result */
-	*val = advk_readl(pcie, PIO_RD_DATA);
 	if (size == 1)
 		*val = (*val >> (8 * (where & 3))) & 0xff;
 	else if (size == 2)
@@ -812,7 +1029,9 @@ static int advk_pcie_wr_conf(struct pci_bus *bus, u32 devfn,
 	if (ret < 0)
 		return PCIBIOS_SET_FAILED;
 
-	advk_pcie_check_pio_status(pcie);
+	ret = advk_pcie_check_pio_status(pcie, false, NULL);
+	if (ret < 0)
+		return PCIBIOS_SET_FAILED;
 
 	return PCIBIOS_SUCCESSFUL;
 }
@@ -886,22 +1105,28 @@ static void advk_pcie_irq_mask(struct irq_data *d)
 {
 	struct advk_pcie *pcie = d->domain->host_data;
 	irq_hw_number_t hwirq = irqd_to_hwirq(d);
+	unsigned long flags;
 	u32 mask;
 
+	raw_spin_lock_irqsave(&pcie->irq_lock, flags);
 	mask = advk_readl(pcie, PCIE_ISR1_MASK_REG);
 	mask |= PCIE_ISR1_INTX_ASSERT(hwirq);
 	advk_writel(pcie, mask, PCIE_ISR1_MASK_REG);
+	raw_spin_unlock_irqrestore(&pcie->irq_lock, flags);
 }
 
 static void advk_pcie_irq_unmask(struct irq_data *d)
 {
 	struct advk_pcie *pcie = d->domain->host_data;
 	irq_hw_number_t hwirq = irqd_to_hwirq(d);
+	unsigned long flags;
 	u32 mask;
 
+	raw_spin_lock_irqsave(&pcie->irq_lock, flags);
 	mask = advk_readl(pcie, PCIE_ISR1_MASK_REG);
 	mask &= ~PCIE_ISR1_INTX_ASSERT(hwirq);
 	advk_writel(pcie, mask, PCIE_ISR1_MASK_REG);
+	raw_spin_unlock_irqrestore(&pcie->irq_lock, flags);
 }
 
 static int advk_pcie_irq_map(struct irq_domain *h,
@@ -985,6 +1210,8 @@ static int advk_pcie_init_irq_domain(struct advk_pcie *pcie)
 	struct irq_chip *irq_chip;
 	int ret = 0;
 
+	raw_spin_lock_init(&pcie->irq_lock);
+
 	pcie_intc_node =  of_get_next_child(node, NULL);
 	if (!pcie_intc_node) {
 		dev_err(dev, "No PCIe Intc node found\n");
@@ -1049,7 +1276,7 @@ static void advk_pcie_handle_int(struct advk_pcie *pcie)
 {
 	u32 isr0_val, isr0_mask, isr0_status;
 	u32 isr1_val, isr1_mask, isr1_status;
-	int i, virq;
+	int i;
 
 	isr0_val = advk_readl(pcie, PCIE_ISR0_REG);
 	isr0_mask = advk_readl(pcie, PCIE_ISR0_MASK_REG);
@@ -1077,8 +1304,7 @@ static void advk_pcie_handle_int(struct advk_pcie *pcie)
 		advk_writel(pcie, PCIE_ISR1_INTX_ASSERT(i),
 			    PCIE_ISR1_REG);
 
-		virq = irq_find_mapping(pcie->irq_domain, i);
-		generic_handle_irq(virq);
+		generic_handle_domain_irq(pcie->irq_domain, i);
 	}
 }
 
@@ -1162,6 +1388,7 @@ static int advk_pcie_probe(struct platform_device *pdev)
 	struct device *dev = &pdev->dev;
 	struct advk_pcie *pcie;
 	struct pci_host_bridge *bridge;
+	struct resource_entry *entry;
 	int ret, irq;
 
 	bridge = devm_pci_alloc_host_bridge(dev, sizeof(struct advk_pcie));
@@ -1172,6 +1399,80 @@ static int advk_pcie_probe(struct platform_device *pdev)
 	pcie->pdev = pdev;
 	platform_set_drvdata(pdev, pcie);
 
+	resource_list_for_each_entry(entry, &bridge->windows) {
+		resource_size_t start = entry->res->start;
+		resource_size_t size = resource_size(entry->res);
+		unsigned long type = resource_type(entry->res);
+		u64 win_size;
+
+		/*
+		 * Aardvark hardware allows to configure also PCIe window
+		 * for config type 0 and type 1 mapping, but driver uses
+		 * only PIO for issuing configuration transfers which does
+		 * not use PCIe window configuration.
+		 */
+		if (type != IORESOURCE_MEM && type != IORESOURCE_MEM_64 &&
+		    type != IORESOURCE_IO)
+			continue;
+
+		/*
+		 * Skip transparent memory resources. Default outbound access
+		 * configuration is set to transparent memory access so it
+		 * does not need window configuration.
+		 */
+		if ((type == IORESOURCE_MEM || type == IORESOURCE_MEM_64) &&
+		    entry->offset == 0)
+			continue;
+
+		/*
+		 * The n-th PCIe window is configured by tuple (match, remap, mask)
+		 * and an access to address A uses this window if A matches the
+		 * match with given mask.
+		 * So every PCIe window size must be a power of two and every start
+		 * address must be aligned to window size. Minimal size is 64 KiB
+		 * because lower 16 bits of mask must be zero. Remapped address
+		 * may have set only bits from the mask.
+		 */
+		while (pcie->wins_count < OB_WIN_COUNT && size > 0) {
+			/* Calculate the largest aligned window size */
+			win_size = (1ULL << (fls64(size)-1)) |
+				   (start ? (1ULL << __ffs64(start)) : 0);
+			win_size = 1ULL << __ffs64(win_size);
+			if (win_size < 0x10000)
+				break;
+
+			dev_dbg(dev,
+				"Configuring PCIe window %d: [0x%llx-0x%llx] as %lu\n",
+				pcie->wins_count, (unsigned long long)start,
+				(unsigned long long)start + win_size, type);
+
+			if (type == IORESOURCE_IO) {
+				pcie->wins[pcie->wins_count].actions = OB_WIN_TYPE_IO;
+				pcie->wins[pcie->wins_count].match = pci_pio_to_address(start);
+			} else {
+				pcie->wins[pcie->wins_count].actions = OB_WIN_TYPE_MEM;
+				pcie->wins[pcie->wins_count].match = start;
+			}
+			pcie->wins[pcie->wins_count].remap = start - entry->offset;
+			pcie->wins[pcie->wins_count].mask = ~(win_size - 1);
+
+			if (pcie->wins[pcie->wins_count].remap & (win_size - 1))
+				break;
+
+			start += win_size;
+			size -= win_size;
+			pcie->wins_count++;
+		}
+
+		if (size > 0) {
+			dev_err(&pcie->pdev->dev,
+				"Invalid PCIe region [0x%llx-0x%llx]\n",
+				(unsigned long long)entry->res->start,
+				(unsigned long long)entry->res->end + 1);
+			return -EINVAL;
+		}
+	}
+
 	pcie->base = devm_platform_ioremap_resource(pdev, 0);
 	if (IS_ERR(pcie->base))
 		return PTR_ERR(pcie->base);
@@ -1252,6 +1553,7 @@ static int advk_pcie_remove(struct platform_device *pdev)
 {
 	struct advk_pcie *pcie = platform_get_drvdata(pdev);
 	struct pci_host_bridge *bridge = pci_host_bridge_from_priv(pcie);
+	int i;
 
 	pci_lock_rescan_remove();
 	pci_stop_root_bus(bridge->bus);
@@ -1261,6 +1563,10 @@ static int advk_pcie_remove(struct platform_device *pdev)
 	advk_pcie_remove_msi_irq_domain(pcie);
 	advk_pcie_remove_irq_domain(pcie);
 
+	/* Disable outbound address windows mapping */
+	for (i = 0; i < OB_WIN_COUNT; i++)
+		advk_pcie_disable_ob_win(pcie, i);
+
 	return 0;
 }
 
diff --git a/drivers/pci/controller/pci-ftpci100.c b/drivers/pci/controller/pci-ftpci100.c
index aefef1986201..88980a44461d 100644
--- a/drivers/pci/controller/pci-ftpci100.c
+++ b/drivers/pci/controller/pci-ftpci100.c
@@ -314,7 +314,7 @@ static void faraday_pci_irq_handler(struct irq_desc *desc)
 	for (i = 0; i < 4; i++) {
 		if ((irq_stat & BIT(i)) == 0)
 			continue;
-		generic_handle_irq(irq_find_mapping(p->irqdomain, i));
+		generic_handle_domain_irq(p->irqdomain, i);
 	}
 
 	chained_irq_exit(irqchip, desc);
diff --git a/drivers/pci/controller/pci-hyperv.c b/drivers/pci/controller/pci-hyperv.c
index a53bd8728d0d..eaec915ffe62 100644
--- a/drivers/pci/controller/pci-hyperv.c
+++ b/drivers/pci/controller/pci-hyperv.c
@@ -40,6 +40,7 @@
 #include <linux/kernel.h>
 #include <linux/module.h>
 #include <linux/pci.h>
+#include <linux/pci-ecam.h>
 #include <linux/delay.h>
 #include <linux/semaphore.h>
 #include <linux/irqdomain.h>
@@ -64,6 +65,7 @@ enum pci_protocol_version_t {
 	PCI_PROTOCOL_VERSION_1_1 = PCI_MAKE_VERSION(1, 1),	/* Win10 */
 	PCI_PROTOCOL_VERSION_1_2 = PCI_MAKE_VERSION(1, 2),	/* RS1 */
 	PCI_PROTOCOL_VERSION_1_3 = PCI_MAKE_VERSION(1, 3),	/* Vibranium */
+	PCI_PROTOCOL_VERSION_1_4 = PCI_MAKE_VERSION(1, 4),	/* WS2022 */
 };
 
 #define CPU_AFFINITY_ALL	-1ULL
@@ -73,6 +75,7 @@ enum pci_protocol_version_t {
  * first.
  */
 static enum pci_protocol_version_t pci_protocol_versions[] = {
+	PCI_PROTOCOL_VERSION_1_4,
 	PCI_PROTOCOL_VERSION_1_3,
 	PCI_PROTOCOL_VERSION_1_2,
 	PCI_PROTOCOL_VERSION_1_1,
@@ -122,6 +125,8 @@ enum pci_message_type {
 	PCI_CREATE_INTERRUPT_MESSAGE2	= PCI_MESSAGE_BASE + 0x17,
 	PCI_DELETE_INTERRUPT_MESSAGE2	= PCI_MESSAGE_BASE + 0x18, /* unused */
 	PCI_BUS_RELATIONS2		= PCI_MESSAGE_BASE + 0x19,
+	PCI_RESOURCES_ASSIGNED3         = PCI_MESSAGE_BASE + 0x1A,
+	PCI_CREATE_INTERRUPT_MESSAGE3   = PCI_MESSAGE_BASE + 0x1B,
 	PCI_MESSAGE_MAXIMUM
 };
 
@@ -235,6 +240,21 @@ struct hv_msi_desc2 {
 	u16	processor_array[32];
 } __packed;
 
+/*
+ * struct hv_msi_desc3 - 1.3 version of hv_msi_desc
+ *	Everything is the same as in 'hv_msi_desc2' except that the size of the
+ *	'vector' field is larger to support bigger vector values. For ex: LPI
+ *	vectors on ARM.
+ */
+struct hv_msi_desc3 {
+	u32	vector;
+	u8	delivery_mode;
+	u8	reserved;
+	u16	vector_count;
+	u16	processor_count;
+	u16	processor_array[32];
+} __packed;
+
 /**
  * struct tran_int_desc
  * @reserved:		unused, padding
@@ -383,6 +403,12 @@ struct pci_create_interrupt2 {
 	struct hv_msi_desc2 int_desc;
 } __packed;
 
+struct pci_create_interrupt3 {
+	struct pci_message message_type;
+	union win_slot_encoding wslot;
+	struct hv_msi_desc3 int_desc;
+} __packed;
+
 struct pci_delete_interrupt {
 	struct pci_message message_type;
 	union win_slot_encoding wslot;
@@ -448,7 +474,13 @@ enum hv_pcibus_state {
 };
 
 struct hv_pcibus_device {
+#ifdef CONFIG_X86
 	struct pci_sysdata sysdata;
+#elif defined(CONFIG_ARM64)
+	struct pci_config_window sysdata;
+#endif
+	struct pci_host_bridge *bridge;
+	struct fwnode_handle *fwnode;
 	/* Protocol version negotiated with the host */
 	enum pci_protocol_version_t protocol_version;
 	enum hv_pcibus_state state;
@@ -464,8 +496,6 @@ struct hv_pcibus_device {
 	spinlock_t device_list_lock;	/* Protect lists below */
 	void __iomem *cfg_addr;
 
-	struct list_head resources_for_children;
-
 	struct list_head children;
 	struct list_head dr_list;
 
@@ -1328,6 +1358,15 @@ static u32 hv_compose_msi_req_v1(
 	return sizeof(*int_pkt);
 }
 
+/*
+ * Create MSI w/ dummy vCPU set targeting just one vCPU, overwritten
+ * by subsequent retarget in hv_irq_unmask().
+ */
+static int hv_compose_msi_req_get_cpu(struct cpumask *affinity)
+{
+	return cpumask_first_and(affinity, cpu_online_mask);
+}
+
 static u32 hv_compose_msi_req_v2(
 	struct pci_create_interrupt2 *int_pkt, struct cpumask *affinity,
 	u32 slot, u8 vector)
@@ -1339,12 +1378,27 @@ static u32 hv_compose_msi_req_v2(
 	int_pkt->int_desc.vector = vector;
 	int_pkt->int_desc.vector_count = 1;
 	int_pkt->int_desc.delivery_mode = APIC_DELIVERY_MODE_FIXED;
+	cpu = hv_compose_msi_req_get_cpu(affinity);
+	int_pkt->int_desc.processor_array[0] =
+		hv_cpu_number_to_vp_number(cpu);
+	int_pkt->int_desc.processor_count = 1;
 
-	/*
-	 * Create MSI w/ dummy vCPU set targeting just one vCPU, overwritten
-	 * by subsequent retarget in hv_irq_unmask().
-	 */
-	cpu = cpumask_first_and(affinity, cpu_online_mask);
+	return sizeof(*int_pkt);
+}
+
+static u32 hv_compose_msi_req_v3(
+	struct pci_create_interrupt3 *int_pkt, struct cpumask *affinity,
+	u32 slot, u32 vector)
+{
+	int cpu;
+
+	int_pkt->message_type.type = PCI_CREATE_INTERRUPT_MESSAGE3;
+	int_pkt->wslot.slot = slot;
+	int_pkt->int_desc.vector = vector;
+	int_pkt->int_desc.reserved = 0;
+	int_pkt->int_desc.vector_count = 1;
+	int_pkt->int_desc.delivery_mode = APIC_DELIVERY_MODE_FIXED;
+	cpu = hv_compose_msi_req_get_cpu(affinity);
 	int_pkt->int_desc.processor_array[0] =
 		hv_cpu_number_to_vp_number(cpu);
 	int_pkt->int_desc.processor_count = 1;
@@ -1379,6 +1433,7 @@ static void hv_compose_msi_msg(struct irq_data *data, struct msi_msg *msg)
 		union {
 			struct pci_create_interrupt v1;
 			struct pci_create_interrupt2 v2;
+			struct pci_create_interrupt3 v3;
 		} int_pkts;
 	} __packed ctxt;
 
@@ -1426,6 +1481,13 @@ static void hv_compose_msi_msg(struct irq_data *data, struct msi_msg *msg)
 					cfg->vector);
 		break;
 
+	case PCI_PROTOCOL_VERSION_1_4:
+		size = hv_compose_msi_req_v3(&ctxt.int_pkts.v3,
+					dest,
+					hpdev->desc.win_slot.slot,
+					cfg->vector);
+		break;
+
 	default:
 		/* As we only negotiate protocol versions known to this driver,
 		 * this path should never hit. However, this is it not a hot
@@ -1566,7 +1628,7 @@ static int hv_pcie_init_irq_domain(struct hv_pcibus_device *hbus)
 	hbus->msi_info.handler = handle_edge_irq;
 	hbus->msi_info.handler_name = "edge";
 	hbus->msi_info.data = hbus;
-	hbus->irq_domain = pci_msi_create_irq_domain(hbus->sysdata.fwnode,
+	hbus->irq_domain = pci_msi_create_irq_domain(hbus->fwnode,
 						     &hbus->msi_info,
 						     x86_vector_domain);
 	if (!hbus->irq_domain) {
@@ -1575,6 +1637,8 @@ static int hv_pcie_init_irq_domain(struct hv_pcibus_device *hbus)
 		return -ENODEV;
 	}
 
+	dev_set_msi_domain(&hbus->bridge->dev, hbus->irq_domain);
+
 	return 0;
 }
 
@@ -1797,7 +1861,7 @@ static void hv_pci_assign_slots(struct hv_pcibus_device *hbus)
 
 		slot_nr = PCI_SLOT(wslot_to_devfn(hpdev->desc.win_slot.slot));
 		snprintf(name, SLOT_NAME_SIZE, "%u", hpdev->desc.ser);
-		hpdev->pci_slot = pci_create_slot(hbus->pci_bus, slot_nr,
+		hpdev->pci_slot = pci_create_slot(hbus->bridge->bus, slot_nr,
 					  name, NULL);
 		if (IS_ERR(hpdev->pci_slot)) {
 			pr_warn("pci_create slot %s failed\n", name);
@@ -1827,7 +1891,7 @@ static void hv_pci_remove_slots(struct hv_pcibus_device *hbus)
 static void hv_pci_assign_numa_node(struct hv_pcibus_device *hbus)
 {
 	struct pci_dev *dev;
-	struct pci_bus *bus = hbus->pci_bus;
+	struct pci_bus *bus = hbus->bridge->bus;
 	struct hv_pci_dev *hv_dev;
 
 	list_for_each_entry(dev, &bus->devices, bus_list) {
@@ -1850,21 +1914,22 @@ static void hv_pci_assign_numa_node(struct hv_pcibus_device *hbus)
  */
 static int create_root_hv_pci_bus(struct hv_pcibus_device *hbus)
 {
-	/* Register the device */
-	hbus->pci_bus = pci_create_root_bus(&hbus->hdev->device,
-					    0, /* bus number is always zero */
-					    &hv_pcifront_ops,
-					    &hbus->sysdata,
-					    &hbus->resources_for_children);
-	if (!hbus->pci_bus)
-		return -ENODEV;
+	int error;
+	struct pci_host_bridge *bridge = hbus->bridge;
+
+	bridge->dev.parent = &hbus->hdev->device;
+	bridge->sysdata = &hbus->sysdata;
+	bridge->ops = &hv_pcifront_ops;
+
+	error = pci_scan_root_bus_bridge(bridge);
+	if (error)
+		return error;
 
 	pci_lock_rescan_remove();
-	pci_scan_child_bus(hbus->pci_bus);
 	hv_pci_assign_numa_node(hbus);
-	pci_bus_assign_resources(hbus->pci_bus);
+	pci_bus_assign_resources(bridge->bus);
 	hv_pci_assign_slots(hbus);
-	pci_bus_add_devices(hbus->pci_bus);
+	pci_bus_add_devices(bridge->bus);
 	pci_unlock_rescan_remove();
 	hbus->state = hv_pcibus_installed;
 	return 0;
@@ -2127,7 +2192,7 @@ static void pci_devices_present_work(struct work_struct *work)
 		 * because there may have been changes.
 		 */
 		pci_lock_rescan_remove();
-		pci_scan_child_bus(hbus->pci_bus);
+		pci_scan_child_bus(hbus->bridge->bus);
 		hv_pci_assign_numa_node(hbus);
 		hv_pci_assign_slots(hbus);
 		pci_unlock_rescan_remove();
@@ -2295,11 +2360,11 @@ static void hv_eject_device_work(struct work_struct *work)
 	/*
 	 * Ejection can come before or after the PCI bus has been set up, so
 	 * attempt to find it and tear down the bus state, if it exists.  This
-	 * must be done without constructs like pci_domain_nr(hbus->pci_bus)
-	 * because hbus->pci_bus may not exist yet.
+	 * must be done without constructs like pci_domain_nr(hbus->bridge->bus)
+	 * because hbus->bridge->bus may not exist yet.
 	 */
 	wslot = wslot_to_devfn(hpdev->desc.win_slot.slot);
-	pdev = pci_get_domain_bus_and_slot(hbus->sysdata.domain, 0, wslot);
+	pdev = pci_get_domain_bus_and_slot(hbus->bridge->domain_nr, 0, wslot);
 	if (pdev) {
 		pci_lock_rescan_remove();
 		pci_stop_and_remove_bus_device(pdev);
@@ -2662,8 +2727,7 @@ static int hv_pci_allocate_bridge_windows(struct hv_pcibus_device *hbus)
 		/* Modify this resource to become a bridge window. */
 		hbus->low_mmio_res->flags |= IORESOURCE_WINDOW;
 		hbus->low_mmio_res->flags &= ~IORESOURCE_BUSY;
-		pci_add_resource(&hbus->resources_for_children,
-				 hbus->low_mmio_res);
+		pci_add_resource(&hbus->bridge->windows, hbus->low_mmio_res);
 	}
 
 	if (hbus->high_mmio_space) {
@@ -2682,8 +2746,7 @@ static int hv_pci_allocate_bridge_windows(struct hv_pcibus_device *hbus)
 		/* Modify this resource to become a bridge window. */
 		hbus->high_mmio_res->flags |= IORESOURCE_WINDOW;
 		hbus->high_mmio_res->flags &= ~IORESOURCE_BUSY;
-		pci_add_resource(&hbus->resources_for_children,
-				 hbus->high_mmio_res);
+		pci_add_resource(&hbus->bridge->windows, hbus->high_mmio_res);
 	}
 
 	return 0;
@@ -3002,6 +3065,7 @@ static void hv_put_dom_num(u16 dom)
 static int hv_pci_probe(struct hv_device *hdev,
 			const struct hv_vmbus_device_id *dev_id)
 {
+	struct pci_host_bridge *bridge;
 	struct hv_pcibus_device *hbus;
 	u16 dom_req, dom;
 	char *name;
@@ -3014,6 +3078,10 @@ static int hv_pci_probe(struct hv_device *hdev,
 	 */
 	BUILD_BUG_ON(sizeof(*hbus) > HV_HYP_PAGE_SIZE);
 
+	bridge = devm_pci_alloc_host_bridge(&hdev->device, 0);
+	if (!bridge)
+		return -ENOMEM;
+
 	/*
 	 * With the recent 59bb47985c1d ("mm, sl[aou]b: guarantee natural
 	 * alignment for kmalloc(power-of-two)"), kzalloc() is able to allocate
@@ -3035,6 +3103,8 @@ static int hv_pci_probe(struct hv_device *hdev,
 	hbus = kzalloc(HV_HYP_PAGE_SIZE, GFP_KERNEL);
 	if (!hbus)
 		return -ENOMEM;
+
+	hbus->bridge = bridge;
 	hbus->state = hv_pcibus_init;
 	hbus->wslot_res_allocated = -1;
 
@@ -3066,17 +3136,19 @@ static int hv_pci_probe(struct hv_device *hdev,
 			 "PCI dom# 0x%hx has collision, using 0x%hx",
 			 dom_req, dom);
 
+	hbus->bridge->domain_nr = dom;
+#ifdef CONFIG_X86
 	hbus->sysdata.domain = dom;
+#endif
 
 	hbus->hdev = hdev;
 	INIT_LIST_HEAD(&hbus->children);
 	INIT_LIST_HEAD(&hbus->dr_list);
-	INIT_LIST_HEAD(&hbus->resources_for_children);
 	spin_lock_init(&hbus->config_lock);
 	spin_lock_init(&hbus->device_list_lock);
 	spin_lock_init(&hbus->retarget_msi_interrupt_lock);
 	hbus->wq = alloc_ordered_workqueue("hv_pci_%x", 0,
-					   hbus->sysdata.domain);
+					   hbus->bridge->domain_nr);
 	if (!hbus->wq) {
 		ret = -ENOMEM;
 		goto free_dom;
@@ -3113,9 +3185,9 @@ static int hv_pci_probe(struct hv_device *hdev,
 		goto unmap;
 	}
 
-	hbus->sysdata.fwnode = irq_domain_alloc_named_fwnode(name);
+	hbus->fwnode = irq_domain_alloc_named_fwnode(name);
 	kfree(name);
-	if (!hbus->sysdata.fwnode) {
+	if (!hbus->fwnode) {
 		ret = -ENOMEM;
 		goto unmap;
 	}
@@ -3193,7 +3265,7 @@ exit_d0:
 free_irq_domain:
 	irq_domain_remove(hbus->irq_domain);
 free_fwnode:
-	irq_domain_free_fwnode(hbus->sysdata.fwnode);
+	irq_domain_free_fwnode(hbus->fwnode);
 unmap:
 	iounmap(hbus->cfg_addr);
 free_config:
@@ -3203,7 +3275,7 @@ close:
 destroy_wq:
 	destroy_workqueue(hbus->wq);
 free_dom:
-	hv_put_dom_num(hbus->sysdata.domain);
+	hv_put_dom_num(hbus->bridge->domain_nr);
 free_bus:
 	kfree(hbus);
 	return ret;
@@ -3295,9 +3367,9 @@ static int hv_pci_remove(struct hv_device *hdev)
 
 		/* Remove the bus from PCI's point of view. */
 		pci_lock_rescan_remove();
-		pci_stop_root_bus(hbus->pci_bus);
+		pci_stop_root_bus(hbus->bridge->bus);
 		hv_pci_remove_slots(hbus);
-		pci_remove_root_bus(hbus->pci_bus);
+		pci_remove_root_bus(hbus->bridge->bus);
 		pci_unlock_rescan_remove();
 	}
 
@@ -3307,12 +3379,11 @@ static int hv_pci_remove(struct hv_device *hdev)
 
 	iounmap(hbus->cfg_addr);
 	hv_free_config_window(hbus);
-	pci_free_resource_list(&hbus->resources_for_children);
 	hv_pci_free_bridge_windows(hbus);
 	irq_domain_remove(hbus->irq_domain);
-	irq_domain_free_fwnode(hbus->sysdata.fwnode);
+	irq_domain_free_fwnode(hbus->fwnode);
 
-	hv_put_dom_num(hbus->sysdata.domain);
+	hv_put_dom_num(hbus->bridge->domain_nr);
 
 	kfree(hbus);
 	return ret;
@@ -3390,7 +3461,7 @@ static int hv_pci_restore_msi_msg(struct pci_dev *pdev, void *arg)
  */
 static void hv_pci_restore_msi_state(struct hv_pcibus_device *hbus)
 {
-	pci_walk_bus(hbus->pci_bus, hv_pci_restore_msi_msg, NULL);
+	pci_walk_bus(hbus->bridge->bus, hv_pci_restore_msi_msg, NULL);
 }
 
 static int hv_pci_resume(struct hv_device *hdev)
diff --git a/drivers/pci/controller/pci-tegra.c b/drivers/pci/controller/pci-tegra.c
index c979229a6d0d..cb0aa65d6934 100644
--- a/drivers/pci/controller/pci-tegra.c
+++ b/drivers/pci/controller/pci-tegra.c
@@ -372,11 +372,6 @@ struct tegra_pcie_port {
 	struct gpio_desc *reset_gpio;
 };
 
-struct tegra_pcie_bus {
-	struct list_head list;
-	unsigned int nr;
-};
-
 static inline void afi_writel(struct tegra_pcie *pcie, u32 value,
 			      unsigned long offset)
 {
@@ -764,7 +759,7 @@ static int tegra_pcie_map_irq(const struct pci_dev *pdev, u8 slot, u8 pin)
 
 static irqreturn_t tegra_pcie_isr(int irq, void *arg)
 {
-	const char *err_msg[] = {
+	static const char * const err_msg[] = {
 		"Unknown",
 		"AXI slave error",
 		"AXI decode error",
@@ -1553,12 +1548,10 @@ static void tegra_pcie_msi_irq(struct irq_desc *desc)
 		while (reg) {
 			unsigned int offset = find_first_bit(&reg, 32);
 			unsigned int index = i * 32 + offset;
-			unsigned int irq;
+			int ret;
 
-			irq = irq_find_mapping(msi->domain->parent, index);
-			if (irq) {
-				generic_handle_irq(irq);
-			} else {
+			ret = generic_handle_domain_irq(msi->domain->parent, index);
+			if (ret) {
 				/*
 				 * that's weird who triggered this?
 				 * just clear it
@@ -2193,13 +2186,15 @@ static int tegra_pcie_parse_dt(struct tegra_pcie *pcie)
 		rp->np = port;
 
 		rp->base = devm_pci_remap_cfg_resource(dev, &rp->regs);
-		if (IS_ERR(rp->base))
-			return PTR_ERR(rp->base);
+		if (IS_ERR(rp->base)) {
+			err = PTR_ERR(rp->base);
+			goto err_node_put;
+		}
 
 		label = devm_kasprintf(dev, GFP_KERNEL, "pex-reset-%u", index);
 		if (!label) {
-			dev_err(dev, "failed to create reset GPIO label\n");
-			return -ENOMEM;
+			err = -ENOMEM;
+			goto err_node_put;
 		}
 
 		/*
@@ -2217,7 +2212,8 @@ static int tegra_pcie_parse_dt(struct tegra_pcie *pcie)
 			} else {
 				dev_err(dev, "failed to get reset GPIO: %ld\n",
 					PTR_ERR(rp->reset_gpio));
-				return PTR_ERR(rp->reset_gpio);
+				err = PTR_ERR(rp->reset_gpio);
+				goto err_node_put;
 			}
 		}
 
@@ -2548,7 +2544,7 @@ static void *tegra_pcie_ports_seq_start(struct seq_file *s, loff_t *pos)
 	if (list_empty(&pcie->ports))
 		return NULL;
 
-	seq_printf(s, "Index  Status\n");
+	seq_puts(s, "Index  Status\n");
 
 	return seq_list_start(&pcie->ports, *pos);
 }
@@ -2585,16 +2581,16 @@ static int tegra_pcie_ports_seq_show(struct seq_file *s, void *v)
 	seq_printf(s, "%2u     ", port->index);
 
 	if (up)
-		seq_printf(s, "up");
+		seq_puts(s, "up");
 
 	if (active) {
 		if (up)
-			seq_printf(s, ", ");
+			seq_puts(s, ", ");
 
-		seq_printf(s, "active");
+		seq_puts(s, "active");
 	}
 
-	seq_printf(s, "\n");
+	seq_puts(s, "\n");
 	return 0;
 }
 
diff --git a/drivers/pci/controller/pci-xgene-msi.c b/drivers/pci/controller/pci-xgene-msi.c
index 1c34c897a7e2..b7a8e062fcc5 100644
--- a/drivers/pci/controller/pci-xgene-msi.c
+++ b/drivers/pci/controller/pci-xgene-msi.c
@@ -291,8 +291,7 @@ static void xgene_msi_isr(struct irq_desc *desc)
 	struct irq_chip *chip = irq_desc_get_chip(desc);
 	struct xgene_msi_group *msi_groups;
 	struct xgene_msi *xgene_msi;
-	unsigned int virq;
-	int msir_index, msir_val, hw_irq;
+	int msir_index, msir_val, hw_irq, ret;
 	u32 intr_index, grp_select, msi_grp;
 
 	chained_irq_enter(chip, desc);
@@ -330,10 +329,8 @@ static void xgene_msi_isr(struct irq_desc *desc)
 			 * CPU0
 			 */
 			hw_irq = hwirq_to_canonical_hwirq(hw_irq);
-			virq = irq_find_mapping(xgene_msi->inner_domain, hw_irq);
-			WARN_ON(!virq);
-			if (virq != 0)
-				generic_handle_irq(virq);
+			ret = generic_handle_domain_irq(xgene_msi->inner_domain, hw_irq);
+			WARN_ON_ONCE(ret);
 			msir_val &= ~(1 << intr_index);
 		}
 		grp_select &= ~(1 << msir_index);
@@ -451,7 +448,6 @@ static int xgene_msi_probe(struct platform_device *pdev)
 	res = platform_get_resource(pdev, IORESOURCE_MEM, 0);
 	xgene_msi->msi_regs = devm_ioremap_resource(&pdev->dev, res);
 	if (IS_ERR(xgene_msi->msi_regs)) {
-		dev_err(&pdev->dev, "no reg space\n");
 		rc = PTR_ERR(xgene_msi->msi_regs);
 		goto error;
 	}
diff --git a/drivers/pci/controller/pcie-altera-msi.c b/drivers/pci/controller/pcie-altera-msi.c
index 98aa1dccc6e6..7b1d3ebc34ec 100644
--- a/drivers/pci/controller/pcie-altera-msi.c
+++ b/drivers/pci/controller/pcie-altera-msi.c
@@ -55,7 +55,7 @@ static void altera_msi_isr(struct irq_desc *desc)
 	struct altera_msi *msi;
 	unsigned long status;
 	u32 bit;
-	u32 virq;
+	int ret;
 
 	chained_irq_enter(chip, desc);
 	msi = irq_desc_get_handler_data(desc);
@@ -65,11 +65,9 @@ static void altera_msi_isr(struct irq_desc *desc)
 			/* Dummy read from vector to clear the interrupt */
 			readl_relaxed(msi->vector_base + (bit * sizeof(u32)));
 
-			virq = irq_find_mapping(msi->inner_domain, bit);
-			if (virq)
-				generic_handle_irq(virq);
-			else
-				dev_err(&msi->pdev->dev, "unexpected MSI\n");
+			ret = generic_handle_domain_irq(msi->inner_domain, bit);
+			if (ret)
+				dev_err_ratelimited(&msi->pdev->dev, "unexpected MSI\n");
 		}
 	}
 
diff --git a/drivers/pci/controller/pcie-altera.c b/drivers/pci/controller/pcie-altera.c
index 523bd928b380..2513e9363236 100644
--- a/drivers/pci/controller/pcie-altera.c
+++ b/drivers/pci/controller/pcie-altera.c
@@ -646,7 +646,7 @@ static void altera_pcie_isr(struct irq_desc *desc)
 	struct device *dev;
 	unsigned long status;
 	u32 bit;
-	u32 virq;
+	int ret;
 
 	chained_irq_enter(chip, desc);
 	pcie = irq_desc_get_handler_data(desc);
@@ -658,11 +658,9 @@ static void altera_pcie_isr(struct irq_desc *desc)
 			/* clear interrupts */
 			cra_writel(pcie, 1 << bit, P2A_INT_STATUS);
 
-			virq = irq_find_mapping(pcie->irq_domain, bit);
-			if (virq)
-				generic_handle_irq(virq);
-			else
-				dev_err(dev, "unexpected IRQ, INT%d\n", bit);
+			ret = generic_handle_domain_irq(pcie->irq_domain, bit);
+			if (ret)
+				dev_err_ratelimited(dev, "unexpected IRQ, INT%d\n", bit);
 		}
 	}
 
diff --git a/drivers/pci/controller/pcie-brcmstb.c b/drivers/pci/controller/pcie-brcmstb.c
index 08bc788d9422..cc30215f5a43 100644
--- a/drivers/pci/controller/pcie-brcmstb.c
+++ b/drivers/pci/controller/pcie-brcmstb.c
@@ -476,7 +476,7 @@ static struct msi_domain_info brcm_msi_domain_info = {
 static void brcm_pcie_msi_isr(struct irq_desc *desc)
 {
 	struct irq_chip *chip = irq_desc_get_chip(desc);
-	unsigned long status, virq;
+	unsigned long status;
 	struct brcm_msi *msi;
 	struct device *dev;
 	u32 bit;
@@ -489,10 +489,9 @@ static void brcm_pcie_msi_isr(struct irq_desc *desc)
 	status >>= msi->legacy_shift;
 
 	for_each_set_bit(bit, &status, msi->nr) {
-		virq = irq_find_mapping(msi->inner_domain, bit);
-		if (virq)
-			generic_handle_irq(virq);
-		else
+		int ret;
+		ret = generic_handle_domain_irq(msi->inner_domain, bit);
+		if (ret)
 			dev_dbg(dev, "unexpected MSI\n");
 	}
 
diff --git a/drivers/pci/controller/pcie-iproc-bcma.c b/drivers/pci/controller/pcie-iproc-bcma.c
index 56b8ee7bf330..f918c713afb0 100644
--- a/drivers/pci/controller/pcie-iproc-bcma.c
+++ b/drivers/pci/controller/pcie-iproc-bcma.c
@@ -35,7 +35,6 @@ static int iproc_pcie_bcma_probe(struct bcma_device *bdev)
 {
 	struct device *dev = &bdev->dev;
 	struct iproc_pcie *pcie;
-	LIST_HEAD(resources);
 	struct pci_host_bridge *bridge;
 	int ret;
 
@@ -60,19 +59,16 @@ static int iproc_pcie_bcma_probe(struct bcma_device *bdev)
 	pcie->mem.end = bdev->addr_s[0] + SZ_128M - 1;
 	pcie->mem.name = "PCIe MEM space";
 	pcie->mem.flags = IORESOURCE_MEM;
-	pci_add_resource(&resources, &pcie->mem);
+	pci_add_resource(&bridge->windows, &pcie->mem);
+	ret = devm_request_pci_bus_resources(dev, &bridge->windows);
+	if (ret)
+		return ret;
 
 	pcie->map_irq = iproc_pcie_bcma_map_irq;
 
-	ret = iproc_pcie_setup(pcie, &resources);
-	if (ret) {
-		dev_err(dev, "PCIe controller setup failed\n");
-		pci_free_resource_list(&resources);
-		return ret;
-	}
-
 	bcma_set_drvdata(bdev, pcie);
-	return 0;
+
+	return iproc_pcie_setup(pcie, &bridge->windows);
 }
 
 static void iproc_pcie_bcma_remove(struct bcma_device *bdev)
diff --git a/drivers/pci/controller/pcie-iproc-msi.c b/drivers/pci/controller/pcie-iproc-msi.c
index 35a82124a126..757b7fbcdc59 100644
--- a/drivers/pci/controller/pcie-iproc-msi.c
+++ b/drivers/pci/controller/pcie-iproc-msi.c
@@ -326,7 +326,6 @@ static void iproc_msi_handler(struct irq_desc *desc)
 	struct iproc_msi *msi;
 	u32 eq, head, tail, nr_events;
 	unsigned long hwirq;
-	int virq;
 
 	chained_irq_enter(chip, desc);
 
@@ -362,8 +361,7 @@ static void iproc_msi_handler(struct irq_desc *desc)
 		/* process all outstanding events */
 		while (nr_events--) {
 			hwirq = decode_msi_hwirq(msi, eq, head);
-			virq = irq_find_mapping(msi->inner_domain, hwirq);
-			generic_handle_irq(virq);
+			generic_handle_domain_irq(msi->inner_domain, hwirq);
 
 			head++;
 			head %= EQ_LEN;
diff --git a/drivers/pci/controller/pcie-mediatek-gen3.c b/drivers/pci/controller/pcie-mediatek-gen3.c
index f3aeb8d4eaca..17c59b0d6978 100644
--- a/drivers/pci/controller/pcie-mediatek-gen3.c
+++ b/drivers/pci/controller/pcie-mediatek-gen3.c
@@ -645,7 +645,6 @@ static void mtk_pcie_msi_handler(struct mtk_pcie_port *port, int set_idx)
 {
 	struct mtk_msi_set *msi_set = &port->msi_sets[set_idx];
 	unsigned long msi_enable, msi_status;
-	unsigned int virq;
 	irq_hw_number_t bit, hwirq;
 
 	msi_enable = readl_relaxed(msi_set->base + PCIE_MSI_SET_ENABLE_OFFSET);
@@ -659,8 +658,7 @@ static void mtk_pcie_msi_handler(struct mtk_pcie_port *port, int set_idx)
 
 		for_each_set_bit(bit, &msi_status, PCIE_MSI_IRQS_PER_SET) {
 			hwirq = bit + set_idx * PCIE_MSI_IRQS_PER_SET;
-			virq = irq_find_mapping(port->msi_bottom_domain, hwirq);
-			generic_handle_irq(virq);
+			generic_handle_domain_irq(port->msi_bottom_domain, hwirq);
 		}
 	} while (true);
 }
@@ -670,18 +668,15 @@ static void mtk_pcie_irq_handler(struct irq_desc *desc)
 	struct mtk_pcie_port *port = irq_desc_get_handler_data(desc);
 	struct irq_chip *irqchip = irq_desc_get_chip(desc);
 	unsigned long status;
-	unsigned int virq;
 	irq_hw_number_t irq_bit = PCIE_INTX_SHIFT;
 
 	chained_irq_enter(irqchip, desc);
 
 	status = readl_relaxed(port->base + PCIE_INT_STATUS_REG);
 	for_each_set_bit_from(irq_bit, &status, PCI_NUM_INTX +
-			      PCIE_INTX_SHIFT) {
-		virq = irq_find_mapping(port->intx_domain,
-					irq_bit - PCIE_INTX_SHIFT);
-		generic_handle_irq(virq);
-	}
+			      PCIE_INTX_SHIFT)
+		generic_handle_domain_irq(port->intx_domain,
+					  irq_bit - PCIE_INTX_SHIFT);
 
 	irq_bit = PCIE_MSI_SHIFT;
 	for_each_set_bit_from(irq_bit, &status, PCIE_MSI_SET_NUM +
diff --git a/drivers/pci/controller/pcie-mediatek.c b/drivers/pci/controller/pcie-mediatek.c
index 25bee693834f..2f3f974977a3 100644
--- a/drivers/pci/controller/pcie-mediatek.c
+++ b/drivers/pci/controller/pcie-mediatek.c
@@ -14,6 +14,7 @@
 #include <linux/irqchip/chained_irq.h>
 #include <linux/irqdomain.h>
 #include <linux/kernel.h>
+#include <linux/mfd/syscon.h>
 #include <linux/msi.h>
 #include <linux/module.h>
 #include <linux/of_address.h>
@@ -23,6 +24,7 @@
 #include <linux/phy/phy.h>
 #include <linux/platform_device.h>
 #include <linux/pm_runtime.h>
+#include <linux/regmap.h>
 #include <linux/reset.h>
 
 #include "../pci.h"
@@ -207,6 +209,7 @@ struct mtk_pcie_port {
  * struct mtk_pcie - PCIe host information
  * @dev: pointer to PCIe device
  * @base: IO mapped register base
+ * @cfg: IO mapped register map for PCIe config
  * @free_ck: free-run reference clock
  * @mem: non-prefetchable memory resource
  * @ports: pointer to PCIe port information
@@ -215,6 +218,7 @@ struct mtk_pcie_port {
 struct mtk_pcie {
 	struct device *dev;
 	void __iomem *base;
+	struct regmap *cfg;
 	struct clk *free_ck;
 
 	struct list_head ports;
@@ -602,7 +606,6 @@ static void mtk_pcie_intr_handler(struct irq_desc *desc)
 	struct mtk_pcie_port *port = irq_desc_get_handler_data(desc);
 	struct irq_chip *irqchip = irq_desc_get_chip(desc);
 	unsigned long status;
-	u32 virq;
 	u32 bit = INTX_SHIFT;
 
 	chained_irq_enter(irqchip, desc);
@@ -612,9 +615,8 @@ static void mtk_pcie_intr_handler(struct irq_desc *desc)
 		for_each_set_bit_from(bit, &status, PCI_NUM_INTX + INTX_SHIFT) {
 			/* Clear the INTx */
 			writel(1 << bit, port->base + PCIE_INT_STATUS);
-			virq = irq_find_mapping(port->irq_domain,
-						bit - INTX_SHIFT);
-			generic_handle_irq(virq);
+			generic_handle_domain_irq(port->irq_domain,
+						  bit - INTX_SHIFT);
 		}
 	}
 
@@ -623,10 +625,8 @@ static void mtk_pcie_intr_handler(struct irq_desc *desc)
 			unsigned long imsi_status;
 
 			while ((imsi_status = readl(port->base + PCIE_IMSI_STATUS))) {
-				for_each_set_bit(bit, &imsi_status, MTK_MSI_IRQS_NUM) {
-					virq = irq_find_mapping(port->inner_domain, bit);
-					generic_handle_irq(virq);
-				}
+				for_each_set_bit(bit, &imsi_status, MTK_MSI_IRQS_NUM)
+					generic_handle_domain_irq(port->inner_domain, bit);
 			}
 			/* Clear MSI interrupt status */
 			writel(MSI_STATUS, port->base + PCIE_INT_STATUS);
@@ -650,7 +650,11 @@ static int mtk_pcie_setup_irq(struct mtk_pcie_port *port,
 		return err;
 	}
 
-	port->irq = platform_get_irq(pdev, port->slot);
+	if (of_find_property(dev->of_node, "interrupt-names", NULL))
+		port->irq = platform_get_irq_byname(pdev, "pcie_irq");
+	else
+		port->irq = platform_get_irq(pdev, port->slot);
+
 	if (port->irq < 0)
 		return port->irq;
 
@@ -682,6 +686,10 @@ static int mtk_pcie_startup_port_v2(struct mtk_pcie_port *port)
 		val |= PCIE_CSR_LTSSM_EN(port->slot) |
 		       PCIE_CSR_ASPM_L1_EN(port->slot);
 		writel(val, pcie->base + PCIE_SYS_CFG_V2);
+	} else if (pcie->cfg) {
+		val = PCIE_CSR_LTSSM_EN(port->slot) |
+		      PCIE_CSR_ASPM_L1_EN(port->slot);
+		regmap_update_bits(pcie->cfg, PCIE_SYS_CFG_V2, val, val);
 	}
 
 	/* Assert all reset signals */
@@ -985,6 +993,7 @@ static int mtk_pcie_subsys_powerup(struct mtk_pcie *pcie)
 	struct device *dev = pcie->dev;
 	struct platform_device *pdev = to_platform_device(dev);
 	struct resource *regs;
+	struct device_node *cfg_node;
 	int err;
 
 	/* get shared registers, which are optional */
@@ -995,6 +1004,14 @@ static int mtk_pcie_subsys_powerup(struct mtk_pcie *pcie)
 			return PTR_ERR(pcie->base);
 	}
 
+	cfg_node = of_find_compatible_node(NULL, NULL,
+					   "mediatek,generic-pciecfg");
+	if (cfg_node) {
+		pcie->cfg = syscon_node_to_regmap(cfg_node);
+		if (IS_ERR(pcie->cfg))
+			return PTR_ERR(pcie->cfg);
+	}
+
 	pcie->free_ck = devm_clk_get(dev, "free_ck");
 	if (IS_ERR(pcie->free_ck)) {
 		if (PTR_ERR(pcie->free_ck) == -EPROBE_DEFER)
@@ -1027,22 +1044,27 @@ static int mtk_pcie_setup(struct mtk_pcie *pcie)
 	struct device *dev = pcie->dev;
 	struct device_node *node = dev->of_node, *child;
 	struct mtk_pcie_port *port, *tmp;
-	int err;
+	int err, slot;
+
+	slot = of_get_pci_domain_nr(dev->of_node);
+	if (slot < 0) {
+		for_each_available_child_of_node(node, child) {
+			err = of_pci_get_devfn(child);
+			if (err < 0) {
+				dev_err(dev, "failed to get devfn: %d\n", err);
+				goto error_put_node;
+			}
 
-	for_each_available_child_of_node(node, child) {
-		int slot;
+			slot = PCI_SLOT(err);
 
-		err = of_pci_get_devfn(child);
-		if (err < 0) {
-			dev_err(dev, "failed to parse devfn: %d\n", err);
-			goto error_put_node;
+			err = mtk_pcie_parse_port(pcie, child, slot);
+			if (err)
+				goto error_put_node;
 		}
-
-		slot = PCI_SLOT(err);
-
-		err = mtk_pcie_parse_port(pcie, child, slot);
+	} else {
+		err = mtk_pcie_parse_port(pcie, node, slot);
 		if (err)
-			goto error_put_node;
+			return err;
 	}
 
 	err = mtk_pcie_subsys_powerup(pcie);
diff --git a/drivers/pci/controller/pcie-microchip-host.c b/drivers/pci/controller/pcie-microchip-host.c
index fdab8202ae5d..329f930d17aa 100644
--- a/drivers/pci/controller/pcie-microchip-host.c
+++ b/drivers/pci/controller/pcie-microchip-host.c
@@ -412,16 +412,14 @@ static void mc_handle_msi(struct irq_desc *desc)
 		port->axi_base_addr + MC_PCIE_BRIDGE_ADDR;
 	unsigned long status;
 	u32 bit;
-	u32 virq;
+	int ret;
 
 	status = readl_relaxed(bridge_base_addr + ISTATUS_LOCAL);
 	if (status & PM_MSI_INT_MSI_MASK) {
 		status = readl_relaxed(bridge_base_addr + ISTATUS_MSI);
 		for_each_set_bit(bit, &status, msi->num_vectors) {
-			virq = irq_find_mapping(msi->dev_domain, bit);
-			if (virq)
-				generic_handle_irq(virq);
-			else
+			ret = generic_handle_domain_irq(msi->dev_domain, bit);
+			if (ret)
 				dev_err_ratelimited(dev, "bad MSI IRQ %d\n",
 						    bit);
 		}
@@ -570,17 +568,15 @@ static void mc_handle_intx(struct irq_desc *desc)
 		port->axi_base_addr + MC_PCIE_BRIDGE_ADDR;
 	unsigned long status;
 	u32 bit;
-	u32 virq;
+	int ret;
 
 	status = readl_relaxed(bridge_base_addr + ISTATUS_LOCAL);
 	if (status & PM_MSI_INT_INTX_MASK) {
 		status &= PM_MSI_INT_INTX_MASK;
 		status >>= PM_MSI_INT_INTX_SHIFT;
 		for_each_set_bit(bit, &status, PCI_NUM_INTX) {
-			virq = irq_find_mapping(port->intx_domain, bit);
-			if (virq)
-				generic_handle_irq(virq);
-			else
+			ret = generic_handle_domain_irq(port->intx_domain, bit);
+			if (ret)
 				dev_err_ratelimited(dev, "bad INTx IRQ %d\n",
 						    bit);
 		}
@@ -745,7 +741,7 @@ static void mc_handle_event(struct irq_desc *desc)
 	events = get_events(port);
 
 	for_each_set_bit(bit, &events, NUM_EVENTS)
-		generic_handle_irq(irq_find_mapping(port->event_domain, bit));
+		generic_handle_domain_irq(port->event_domain, bit);
 
 	chained_irq_exit(chip, desc);
 }
diff --git a/drivers/pci/controller/pcie-rcar-ep.c b/drivers/pci/controller/pcie-rcar-ep.c
index b4a288e24aaf..aa1cf24a5a72 100644
--- a/drivers/pci/controller/pcie-rcar-ep.c
+++ b/drivers/pci/controller/pcie-rcar-ep.c
@@ -159,7 +159,7 @@ static int rcar_pcie_ep_get_pdata(struct rcar_pcie_endpoint *ep,
 	return 0;
 }
 
-static int rcar_pcie_ep_write_header(struct pci_epc *epc, u8 fn,
+static int rcar_pcie_ep_write_header(struct pci_epc *epc, u8 fn, u8 vfn,
 				     struct pci_epf_header *hdr)
 {
 	struct rcar_pcie_endpoint *ep = epc_get_drvdata(epc);
@@ -195,7 +195,7 @@ static int rcar_pcie_ep_write_header(struct pci_epc *epc, u8 fn,
 	return 0;
 }
 
-static int rcar_pcie_ep_set_bar(struct pci_epc *epc, u8 func_no,
+static int rcar_pcie_ep_set_bar(struct pci_epc *epc, u8 func_no, u8 vfunc_no,
 				struct pci_epf_bar *epf_bar)
 {
 	int flags = epf_bar->flags | LAR_ENABLE | LAM_64BIT;
@@ -246,7 +246,7 @@ static int rcar_pcie_ep_set_bar(struct pci_epc *epc, u8 func_no,
 	return 0;
 }
 
-static void rcar_pcie_ep_clear_bar(struct pci_epc *epc, u8 fn,
+static void rcar_pcie_ep_clear_bar(struct pci_epc *epc, u8 fn, u8 vfn,
 				   struct pci_epf_bar *epf_bar)
 {
 	struct rcar_pcie_endpoint *ep = epc_get_drvdata(epc);
@@ -259,7 +259,8 @@ static void rcar_pcie_ep_clear_bar(struct pci_epc *epc, u8 fn,
 	clear_bit(atu_index + 1, ep->ib_window_map);
 }
 
-static int rcar_pcie_ep_set_msi(struct pci_epc *epc, u8 fn, u8 interrupts)
+static int rcar_pcie_ep_set_msi(struct pci_epc *epc, u8 fn, u8 vfn,
+				u8 interrupts)
 {
 	struct rcar_pcie_endpoint *ep = epc_get_drvdata(epc);
 	struct rcar_pcie *pcie = &ep->pcie;
@@ -272,7 +273,7 @@ static int rcar_pcie_ep_set_msi(struct pci_epc *epc, u8 fn, u8 interrupts)
 	return 0;
 }
 
-static int rcar_pcie_ep_get_msi(struct pci_epc *epc, u8 fn)
+static int rcar_pcie_ep_get_msi(struct pci_epc *epc, u8 fn, u8 vfn)
 {
 	struct rcar_pcie_endpoint *ep = epc_get_drvdata(epc);
 	struct rcar_pcie *pcie = &ep->pcie;
@@ -285,7 +286,7 @@ static int rcar_pcie_ep_get_msi(struct pci_epc *epc, u8 fn)
 	return ((flags & MSICAP0_MMESE_MASK) >> MSICAP0_MMESE_OFFSET);
 }
 
-static int rcar_pcie_ep_map_addr(struct pci_epc *epc, u8 fn,
+static int rcar_pcie_ep_map_addr(struct pci_epc *epc, u8 fn, u8 vfn,
 				 phys_addr_t addr, u64 pci_addr, size_t size)
 {
 	struct rcar_pcie_endpoint *ep = epc_get_drvdata(epc);
@@ -322,7 +323,7 @@ static int rcar_pcie_ep_map_addr(struct pci_epc *epc, u8 fn,
 	return 0;
 }
 
-static void rcar_pcie_ep_unmap_addr(struct pci_epc *epc, u8 fn,
+static void rcar_pcie_ep_unmap_addr(struct pci_epc *epc, u8 fn, u8 vfn,
 				    phys_addr_t addr)
 {
 	struct rcar_pcie_endpoint *ep = epc_get_drvdata(epc);
@@ -403,7 +404,7 @@ static int rcar_pcie_ep_assert_msi(struct rcar_pcie *pcie,
 	return 0;
 }
 
-static int rcar_pcie_ep_raise_irq(struct pci_epc *epc, u8 fn,
+static int rcar_pcie_ep_raise_irq(struct pci_epc *epc, u8 fn, u8 vfn,
 				  enum pci_epc_irq_type type,
 				  u16 interrupt_num)
 {
@@ -451,7 +452,7 @@ static const struct pci_epc_features rcar_pcie_epc_features = {
 };
 
 static const struct pci_epc_features*
-rcar_pcie_ep_get_features(struct pci_epc *epc, u8 func_no)
+rcar_pcie_ep_get_features(struct pci_epc *epc, u8 func_no, u8 vfunc_no)
 {
 	return &rcar_pcie_epc_features;
 }
@@ -492,9 +493,9 @@ static int rcar_pcie_ep_probe(struct platform_device *pdev)
 	pcie->dev = dev;
 
 	pm_runtime_enable(dev);
-	err = pm_runtime_get_sync(dev);
+	err = pm_runtime_resume_and_get(dev);
 	if (err < 0) {
-		dev_err(dev, "pm_runtime_get_sync failed\n");
+		dev_err(dev, "pm_runtime_resume_and_get failed\n");
 		goto err_pm_disable;
 	}
 
diff --git a/drivers/pci/controller/pcie-rcar-host.c b/drivers/pci/controller/pcie-rcar-host.c
index 765cf2b45e24..8f3131844e77 100644
--- a/drivers/pci/controller/pcie-rcar-host.c
+++ b/drivers/pci/controller/pcie-rcar-host.c
@@ -13,12 +13,14 @@
 
 #include <linux/bitops.h>
 #include <linux/clk.h>
+#include <linux/clk-provider.h>
 #include <linux/delay.h>
 #include <linux/interrupt.h>
 #include <linux/irq.h>
 #include <linux/irqdomain.h>
 #include <linux/kernel.h>
 #include <linux/init.h>
+#include <linux/iopoll.h>
 #include <linux/msi.h>
 #include <linux/of_address.h>
 #include <linux/of_irq.h>
@@ -41,6 +43,21 @@ struct rcar_msi {
 	int irq2;
 };
 
+#ifdef CONFIG_ARM
+/*
+ * Here we keep a static copy of the remapped PCIe controller address.
+ * This is only used on aarch32 systems, all of which have one single
+ * PCIe controller, to provide quick access to the PCIe controller in
+ * the L1 link state fixup function, called from the ARM fault handler.
+ */
+static void __iomem *pcie_base;
+/*
+ * Static copy of bus clock pointer, so we can check whether the clock
+ * is enabled or not.
+ */
+static struct clk *pcie_bus_clk;
+#endif
+
 /* Structure representing the PCIe interface */
 struct rcar_pcie_host {
 	struct rcar_pcie	pcie;
@@ -486,12 +503,10 @@ static irqreturn_t rcar_pcie_msi_irq(int irq, void *data)
 
 	while (reg) {
 		unsigned int index = find_first_bit(&reg, 32);
-		unsigned int msi_irq;
+		int ret;
 
-		msi_irq = irq_find_mapping(msi->domain->parent, index);
-		if (msi_irq) {
-			generic_handle_irq(msi_irq);
-		} else {
+		ret = generic_handle_domain_irq(msi->domain->parent, index);
+		if (ret) {
 			/* Unknown MSI, just clear it */
 			dev_dbg(dev, "unexpected MSI\n");
 			rcar_pci_write_reg(pcie, BIT(index), PCIEMSIFR);
@@ -776,6 +791,12 @@ static int rcar_pcie_get_resources(struct rcar_pcie_host *host)
 	}
 	host->msi.irq2 = i;
 
+#ifdef CONFIG_ARM
+	/* Cache static copy for L1 link state fixup hook on aarch32 */
+	pcie_base = pcie->base;
+	pcie_bus_clk = host->bus_clk;
+#endif
+
 	return 0;
 
 err_irq2:
@@ -1031,4 +1052,67 @@ static struct platform_driver rcar_pcie_driver = {
 	},
 	.probe = rcar_pcie_probe,
 };
+
+#ifdef CONFIG_ARM
+static DEFINE_SPINLOCK(pmsr_lock);
+static int rcar_pcie_aarch32_abort_handler(unsigned long addr,
+		unsigned int fsr, struct pt_regs *regs)
+{
+	unsigned long flags;
+	u32 pmsr, val;
+	int ret = 0;
+
+	spin_lock_irqsave(&pmsr_lock, flags);
+
+	if (!pcie_base || !__clk_is_enabled(pcie_bus_clk)) {
+		ret = 1;
+		goto unlock_exit;
+	}
+
+	pmsr = readl(pcie_base + PMSR);
+
+	/*
+	 * Test if the PCIe controller received PM_ENTER_L1 DLLP and
+	 * the PCIe controller is not in L1 link state. If true, apply
+	 * fix, which will put the controller into L1 link state, from
+	 * which it can return to L0s/L0 on its own.
+	 */
+	if ((pmsr & PMEL1RX) && ((pmsr & PMSTATE) != PMSTATE_L1)) {
+		writel(L1IATN, pcie_base + PMCTLR);
+		ret = readl_poll_timeout_atomic(pcie_base + PMSR, val,
+						val & L1FAEG, 10, 1000);
+		WARN(ret, "Timeout waiting for L1 link state, ret=%d\n", ret);
+		writel(L1FAEG | PMEL1RX, pcie_base + PMSR);
+	}
+
+unlock_exit:
+	spin_unlock_irqrestore(&pmsr_lock, flags);
+	return ret;
+}
+
+static const struct of_device_id rcar_pcie_abort_handler_of_match[] __initconst = {
+	{ .compatible = "renesas,pcie-r8a7779" },
+	{ .compatible = "renesas,pcie-r8a7790" },
+	{ .compatible = "renesas,pcie-r8a7791" },
+	{ .compatible = "renesas,pcie-rcar-gen2" },
+	{},
+};
+
+static int __init rcar_pcie_init(void)
+{
+	if (of_find_matching_node(NULL, rcar_pcie_abort_handler_of_match)) {
+#ifdef CONFIG_ARM_LPAE
+		hook_fault_code(17, rcar_pcie_aarch32_abort_handler, SIGBUS, 0,
+				"asynchronous external abort");
+#else
+		hook_fault_code(22, rcar_pcie_aarch32_abort_handler, SIGBUS, 0,
+				"imprecise external abort");
+#endif
+	}
+
+	return platform_driver_register(&rcar_pcie_driver);
+}
+device_initcall(rcar_pcie_init);
+#else
 builtin_platform_driver(rcar_pcie_driver);
+#endif
diff --git a/drivers/pci/controller/pcie-rcar.h b/drivers/pci/controller/pcie-rcar.h
index d4c698b5f821..9bb125db85c6 100644
--- a/drivers/pci/controller/pcie-rcar.h
+++ b/drivers/pci/controller/pcie-rcar.h
@@ -85,6 +85,13 @@
 #define  LTSMDIS		BIT(31)
 #define  MACCTLR_INIT_VAL	(LTSMDIS | MACCTLR_NFTS_MASK)
 #define PMSR			0x01105c
+#define  L1FAEG			BIT(31)
+#define  PMEL1RX		BIT(23)
+#define  PMSTATE		GENMASK(18, 16)
+#define  PMSTATE_L1		(3 << 16)
+#define PMCTLR			0x011060
+#define  L1IATN			BIT(31)
+
 #define MACS2R			0x011078
 #define MACCGSPSETR		0x011084
 #define  SPCNGRSN		BIT(31)
diff --git a/drivers/pci/controller/pcie-rockchip-ep.c b/drivers/pci/controller/pcie-rockchip-ep.c
index 7631dc3961c1..5fb9ce6e536e 100644
--- a/drivers/pci/controller/pcie-rockchip-ep.c
+++ b/drivers/pci/controller/pcie-rockchip-ep.c
@@ -122,7 +122,7 @@ static void rockchip_pcie_prog_ep_ob_atu(struct rockchip_pcie *rockchip, u8 fn,
 			    ROCKCHIP_PCIE_AT_OB_REGION_CPU_ADDR1(r));
 }
 
-static int rockchip_pcie_ep_write_header(struct pci_epc *epc, u8 fn,
+static int rockchip_pcie_ep_write_header(struct pci_epc *epc, u8 fn, u8 vfn,
 					 struct pci_epf_header *hdr)
 {
 	struct rockchip_pcie_ep *ep = epc_get_drvdata(epc);
@@ -159,7 +159,7 @@ static int rockchip_pcie_ep_write_header(struct pci_epc *epc, u8 fn,
 	return 0;
 }
 
-static int rockchip_pcie_ep_set_bar(struct pci_epc *epc, u8 fn,
+static int rockchip_pcie_ep_set_bar(struct pci_epc *epc, u8 fn, u8 vfn,
 				    struct pci_epf_bar *epf_bar)
 {
 	struct rockchip_pcie_ep *ep = epc_get_drvdata(epc);
@@ -227,7 +227,7 @@ static int rockchip_pcie_ep_set_bar(struct pci_epc *epc, u8 fn,
 	return 0;
 }
 
-static void rockchip_pcie_ep_clear_bar(struct pci_epc *epc, u8 fn,
+static void rockchip_pcie_ep_clear_bar(struct pci_epc *epc, u8 fn, u8 vfn,
 				       struct pci_epf_bar *epf_bar)
 {
 	struct rockchip_pcie_ep *ep = epc_get_drvdata(epc);
@@ -256,7 +256,7 @@ static void rockchip_pcie_ep_clear_bar(struct pci_epc *epc, u8 fn,
 			    ROCKCHIP_PCIE_AT_IB_EP_FUNC_BAR_ADDR1(fn, bar));
 }
 
-static int rockchip_pcie_ep_map_addr(struct pci_epc *epc, u8 fn,
+static int rockchip_pcie_ep_map_addr(struct pci_epc *epc, u8 fn, u8 vfn,
 				     phys_addr_t addr, u64 pci_addr,
 				     size_t size)
 {
@@ -284,7 +284,7 @@ static int rockchip_pcie_ep_map_addr(struct pci_epc *epc, u8 fn,
 	return 0;
 }
 
-static void rockchip_pcie_ep_unmap_addr(struct pci_epc *epc, u8 fn,
+static void rockchip_pcie_ep_unmap_addr(struct pci_epc *epc, u8 fn, u8 vfn,
 					phys_addr_t addr)
 {
 	struct rockchip_pcie_ep *ep = epc_get_drvdata(epc);
@@ -308,7 +308,7 @@ static void rockchip_pcie_ep_unmap_addr(struct pci_epc *epc, u8 fn,
 	clear_bit(r, &ep->ob_region_map);
 }
 
-static int rockchip_pcie_ep_set_msi(struct pci_epc *epc, u8 fn,
+static int rockchip_pcie_ep_set_msi(struct pci_epc *epc, u8 fn, u8 vfn,
 				    u8 multi_msg_cap)
 {
 	struct rockchip_pcie_ep *ep = epc_get_drvdata(epc);
@@ -329,7 +329,7 @@ static int rockchip_pcie_ep_set_msi(struct pci_epc *epc, u8 fn,
 	return 0;
 }
 
-static int rockchip_pcie_ep_get_msi(struct pci_epc *epc, u8 fn)
+static int rockchip_pcie_ep_get_msi(struct pci_epc *epc, u8 fn, u8 vfn)
 {
 	struct rockchip_pcie_ep *ep = epc_get_drvdata(epc);
 	struct rockchip_pcie *rockchip = &ep->rockchip;
@@ -471,7 +471,7 @@ static int rockchip_pcie_ep_send_msi_irq(struct rockchip_pcie_ep *ep, u8 fn,
 	return 0;
 }
 
-static int rockchip_pcie_ep_raise_irq(struct pci_epc *epc, u8 fn,
+static int rockchip_pcie_ep_raise_irq(struct pci_epc *epc, u8 fn, u8 vfn,
 				      enum pci_epc_irq_type type,
 				      u16 interrupt_num)
 {
@@ -510,7 +510,7 @@ static const struct pci_epc_features rockchip_pcie_epc_features = {
 };
 
 static const struct pci_epc_features*
-rockchip_pcie_ep_get_features(struct pci_epc *epc, u8 func_no)
+rockchip_pcie_ep_get_features(struct pci_epc *epc, u8 func_no, u8 vfunc_no)
 {
 	return &rockchip_pcie_epc_features;
 }
diff --git a/drivers/pci/controller/pcie-rockchip-host.c b/drivers/pci/controller/pcie-rockchip-host.c
index 78d04ac29cd5..c52316d0bfd2 100644
--- a/drivers/pci/controller/pcie-rockchip-host.c
+++ b/drivers/pci/controller/pcie-rockchip-host.c
@@ -517,7 +517,7 @@ static void rockchip_pcie_legacy_int_handler(struct irq_desc *desc)
 	struct device *dev = rockchip->dev;
 	u32 reg;
 	u32 hwirq;
-	u32 virq;
+	int ret;
 
 	chained_irq_enter(chip, desc);
 
@@ -528,10 +528,8 @@ static void rockchip_pcie_legacy_int_handler(struct irq_desc *desc)
 		hwirq = ffs(reg) - 1;
 		reg &= ~BIT(hwirq);
 
-		virq = irq_find_mapping(rockchip->irq_domain, hwirq);
-		if (virq)
-			generic_handle_irq(virq);
-		else
+		ret = generic_handle_domain_irq(rockchip->irq_domain, hwirq);
+		if (ret)
 			dev_err(dev, "unexpected IRQ, INT%d\n", hwirq);
 	}
 
diff --git a/drivers/pci/controller/pcie-xilinx-cpm.c b/drivers/pci/controller/pcie-xilinx-cpm.c
index 67937facd90c..95426df03200 100644
--- a/drivers/pci/controller/pcie-xilinx-cpm.c
+++ b/drivers/pci/controller/pcie-xilinx-cpm.c
@@ -222,7 +222,7 @@ static void xilinx_cpm_pcie_intx_flow(struct irq_desc *desc)
 			pcie_read(port, XILINX_CPM_PCIE_REG_IDRN));
 
 	for_each_set_bit(i, &val, PCI_NUM_INTX)
-		generic_handle_irq(irq_find_mapping(port->intx_domain, i));
+		generic_handle_domain_irq(port->intx_domain, i);
 
 	chained_irq_exit(chip, desc);
 }
@@ -282,7 +282,7 @@ static void xilinx_cpm_pcie_event_flow(struct irq_desc *desc)
 	val =  pcie_read(port, XILINX_CPM_PCIE_REG_IDR);
 	val &= pcie_read(port, XILINX_CPM_PCIE_REG_IMR);
 	for_each_set_bit(i, &val, 32)
-		generic_handle_irq(irq_find_mapping(port->cpm_domain, i));
+		generic_handle_domain_irq(port->cpm_domain, i);
 	pcie_write(port, val, XILINX_CPM_PCIE_REG_IDR);
 
 	/*
diff --git a/drivers/pci/controller/pcie-xilinx-nwl.c b/drivers/pci/controller/pcie-xilinx-nwl.c
index 8689311c5ef6..a72b4f9a2b00 100644
--- a/drivers/pci/controller/pcie-xilinx-nwl.c
+++ b/drivers/pci/controller/pcie-xilinx-nwl.c
@@ -6,6 +6,7 @@
  * (C) Copyright 2014 - 2015, Xilinx, Inc.
  */
 
+#include <linux/clk.h>
 #include <linux/delay.h>
 #include <linux/interrupt.h>
 #include <linux/irq.h>
@@ -169,6 +170,7 @@ struct nwl_pcie {
 	u8 last_busno;
 	struct nwl_msi msi;
 	struct irq_domain *legacy_irq_domain;
+	struct clk *clk;
 	raw_spinlock_t leg_mask_lock;
 };
 
@@ -318,18 +320,14 @@ static void nwl_pcie_leg_handler(struct irq_desc *desc)
 	struct nwl_pcie *pcie;
 	unsigned long status;
 	u32 bit;
-	u32 virq;
 
 	chained_irq_enter(chip, desc);
 	pcie = irq_desc_get_handler_data(desc);
 
 	while ((status = nwl_bridge_readl(pcie, MSGF_LEG_STATUS) &
 				MSGF_LEG_SR_MASKALL) != 0) {
-		for_each_set_bit(bit, &status, PCI_NUM_INTX) {
-			virq = irq_find_mapping(pcie->legacy_irq_domain, bit);
-			if (virq)
-				generic_handle_irq(virq);
-		}
+		for_each_set_bit(bit, &status, PCI_NUM_INTX)
+			generic_handle_domain_irq(pcie->legacy_irq_domain, bit);
 	}
 
 	chained_irq_exit(chip, desc);
@@ -340,16 +338,13 @@ static void nwl_pcie_handle_msi_irq(struct nwl_pcie *pcie, u32 status_reg)
 	struct nwl_msi *msi;
 	unsigned long status;
 	u32 bit;
-	u32 virq;
 
 	msi = &pcie->msi;
 
 	while ((status = nwl_bridge_readl(pcie, status_reg)) != 0) {
 		for_each_set_bit(bit, &status, 32) {
 			nwl_bridge_writel(pcie, 1 << bit, status_reg);
-			virq = irq_find_mapping(msi->dev_domain, bit);
-			if (virq)
-				generic_handle_irq(virq);
+			generic_handle_domain_irq(msi->dev_domain, bit);
 		}
 	}
 }
@@ -823,6 +818,16 @@ static int nwl_pcie_probe(struct platform_device *pdev)
 		return err;
 	}
 
+	pcie->clk = devm_clk_get(dev, NULL);
+	if (IS_ERR(pcie->clk))
+		return PTR_ERR(pcie->clk);
+
+	err = clk_prepare_enable(pcie->clk);
+	if (err) {
+		dev_err(dev, "can't enable PCIe ref clock\n");
+		return err;
+	}
+
 	err = nwl_pcie_bridge_init(pcie);
 	if (err) {
 		dev_err(dev, "HW Initialization failed\n");
diff --git a/drivers/pci/controller/pcie-xilinx.c b/drivers/pci/controller/pcie-xilinx.c
index 14001febf59a..aa9bdcebc838 100644
--- a/drivers/pci/controller/pcie-xilinx.c
+++ b/drivers/pci/controller/pcie-xilinx.c
@@ -385,7 +385,7 @@ static irqreturn_t xilinx_pcie_intr_handler(int irq, void *data)
 	}
 
 	if (status & (XILINX_PCIE_INTR_INTX | XILINX_PCIE_INTR_MSI)) {
-		unsigned int irq;
+		struct irq_domain *domain;
 
 		val = pcie_read(port, XILINX_PCIE_REG_RPIFR1);
 
@@ -399,19 +399,18 @@ static irqreturn_t xilinx_pcie_intr_handler(int irq, void *data)
 		if (val & XILINX_PCIE_RPIFR1_MSI_INTR) {
 			val = pcie_read(port, XILINX_PCIE_REG_RPIFR2) &
 				XILINX_PCIE_RPIFR2_MSG_DATA;
-			irq = irq_find_mapping(port->msi_domain->parent, val);
+			domain = port->msi_domain->parent;
 		} else {
 			val = (val & XILINX_PCIE_RPIFR1_INTR_MASK) >>
 				XILINX_PCIE_RPIFR1_INTR_SHIFT;
-			irq = irq_find_mapping(port->leg_domain, val);
+			domain = port->leg_domain;
 		}
 
 		/* Clear interrupt FIFO register 1 */
 		pcie_write(port, XILINX_PCIE_RPIFR1_ALL_MASK,
 			   XILINX_PCIE_REG_RPIFR1);
 
-		if (irq)
-			generic_handle_irq(irq);
+		generic_handle_domain_irq(domain, val);
 	}
 
 	if (status & XILINX_PCIE_INTR_SLV_UNSUPP)
diff --git a/drivers/pci/controller/vmd.c b/drivers/pci/controller/vmd.c
index e3fcdfec58b3..a5987e52700e 100644
--- a/drivers/pci/controller/vmd.c
+++ b/drivers/pci/controller/vmd.c
@@ -11,6 +11,7 @@
 #include <linux/module.h>
 #include <linux/msi.h>
 #include <linux/pci.h>
+#include <linux/pci-acpi.h>
 #include <linux/pci-ecam.h>
 #include <linux/srcu.h>
 #include <linux/rculist.h>
@@ -447,6 +448,56 @@ static struct pci_ops vmd_ops = {
 	.write		= vmd_pci_write,
 };
 
+#ifdef CONFIG_ACPI
+static struct acpi_device *vmd_acpi_find_companion(struct pci_dev *pci_dev)
+{
+	struct pci_host_bridge *bridge;
+	u32 busnr, addr;
+
+	if (pci_dev->bus->ops != &vmd_ops)
+		return NULL;
+
+	bridge = pci_find_host_bridge(pci_dev->bus);
+	busnr = pci_dev->bus->number - bridge->bus->number;
+	/*
+	 * The address computation below is only applicable to relative bus
+	 * numbers below 32.
+	 */
+	if (busnr > 31)
+		return NULL;
+
+	addr = (busnr << 24) | ((u32)pci_dev->devfn << 16) | 0x8000FFFFU;
+
+	dev_dbg(&pci_dev->dev, "Looking for ACPI companion (address 0x%x)\n",
+		addr);
+
+	return acpi_find_child_device(ACPI_COMPANION(bridge->dev.parent), addr,
+				      false);
+}
+
+static bool hook_installed;
+
+static void vmd_acpi_begin(void)
+{
+	if (pci_acpi_set_companion_lookup_hook(vmd_acpi_find_companion))
+		return;
+
+	hook_installed = true;
+}
+
+static void vmd_acpi_end(void)
+{
+	if (!hook_installed)
+		return;
+
+	pci_acpi_clear_companion_lookup_hook();
+	hook_installed = false;
+}
+#else
+static inline void vmd_acpi_begin(void) { }
+static inline void vmd_acpi_end(void) { }
+#endif /* CONFIG_ACPI */
+
 static void vmd_attach_resources(struct vmd_dev *vmd)
 {
 	vmd->dev->resource[VMD_MEMBAR1].child = &vmd->resources[1];
@@ -747,6 +798,8 @@ static int vmd_enable_domain(struct vmd_dev *vmd, unsigned long features)
 	if (vmd->irq_domain)
 		dev_set_msi_domain(&vmd->bus->dev, vmd->irq_domain);
 
+	vmd_acpi_begin();
+
 	pci_scan_child_bus(vmd->bus);
 	pci_assign_unassigned_bus_resources(vmd->bus);
 
@@ -760,6 +813,8 @@ static int vmd_enable_domain(struct vmd_dev *vmd, unsigned long features)
 
 	pci_bus_add_devices(vmd->bus);
 
+	vmd_acpi_end();
+
 	WARN(sysfs_create_link(&vmd->dev->dev.kobj, &vmd->bus->dev.kobj,
 			       "domain"), "Can't create symlink to domain\n");
 	return 0;
diff --git a/drivers/pci/endpoint/functions/pci-epf-ntb.c b/drivers/pci/endpoint/functions/pci-epf-ntb.c
index bce274d02dcf..8b4756159f15 100644
--- a/drivers/pci/endpoint/functions/pci-epf-ntb.c
+++ b/drivers/pci/endpoint/functions/pci-epf-ntb.c
@@ -87,6 +87,7 @@ struct epf_ntb {
 
 struct epf_ntb_epc {
 	u8 func_no;
+	u8 vfunc_no;
 	bool linkup;
 	bool is_msix;
 	int msix_bar;
@@ -143,14 +144,15 @@ static int epf_ntb_link_up(struct epf_ntb *ntb, bool link_up)
 	struct epf_ntb_epc *ntb_epc;
 	struct epf_ntb_ctrl *ctrl;
 	struct pci_epc *epc;
+	u8 func_no, vfunc_no;
 	bool is_msix;
-	u8 func_no;
 	int ret;
 
 	for (type = PRIMARY_INTERFACE; type <= SECONDARY_INTERFACE; type++) {
 		ntb_epc = ntb->epc[type];
 		epc = ntb_epc->epc;
 		func_no = ntb_epc->func_no;
+		vfunc_no = ntb_epc->vfunc_no;
 		is_msix = ntb_epc->is_msix;
 		ctrl = ntb_epc->reg;
 		if (link_up)
@@ -158,7 +160,7 @@ static int epf_ntb_link_up(struct epf_ntb *ntb, bool link_up)
 		else
 			ctrl->link_status &= ~LINK_STATUS_UP;
 		irq_type = is_msix ? PCI_EPC_IRQ_MSIX : PCI_EPC_IRQ_MSI;
-		ret = pci_epc_raise_irq(epc, func_no, irq_type, 1);
+		ret = pci_epc_raise_irq(epc, func_no, vfunc_no, irq_type, 1);
 		if (ret) {
 			dev_err(&epc->dev,
 				"%s intf: Failed to raise Link Up IRQ\n",
@@ -238,10 +240,10 @@ static int epf_ntb_configure_mw(struct epf_ntb *ntb,
 	enum pci_barno peer_barno;
 	struct epf_ntb_ctrl *ctrl;
 	phys_addr_t phys_addr;
+	u8 func_no, vfunc_no;
 	struct pci_epc *epc;
 	u64 addr, size;
 	int ret = 0;
-	u8 func_no;
 
 	ntb_epc = ntb->epc[type];
 	epc = ntb_epc->epc;
@@ -267,8 +269,9 @@ static int epf_ntb_configure_mw(struct epf_ntb *ntb,
 	}
 
 	func_no = ntb_epc->func_no;
+	vfunc_no = ntb_epc->vfunc_no;
 
-	ret = pci_epc_map_addr(epc, func_no, phys_addr, addr, size);
+	ret = pci_epc_map_addr(epc, func_no, vfunc_no, phys_addr, addr, size);
 	if (ret)
 		dev_err(&epc->dev,
 			"%s intf: Failed to map memory window %d address\n",
@@ -296,8 +299,8 @@ static void epf_ntb_teardown_mw(struct epf_ntb *ntb,
 	enum pci_barno peer_barno;
 	struct epf_ntb_ctrl *ctrl;
 	phys_addr_t phys_addr;
+	u8 func_no, vfunc_no;
 	struct pci_epc *epc;
-	u8 func_no;
 
 	ntb_epc = ntb->epc[type];
 	epc = ntb_epc->epc;
@@ -311,8 +314,9 @@ static void epf_ntb_teardown_mw(struct epf_ntb *ntb,
 	if (mw + NTB_MW_OFFSET == BAR_DB_MW1)
 		phys_addr += ctrl->mw1_offset;
 	func_no = ntb_epc->func_no;
+	vfunc_no = ntb_epc->vfunc_no;
 
-	pci_epc_unmap_addr(epc, func_no, phys_addr);
+	pci_epc_unmap_addr(epc, func_no, vfunc_no, phys_addr);
 }
 
 /**
@@ -385,8 +389,8 @@ static int epf_ntb_configure_msi(struct epf_ntb *ntb,
 	struct epf_ntb_ctrl *peer_ctrl;
 	enum pci_barno peer_barno;
 	phys_addr_t phys_addr;
+	u8 func_no, vfunc_no;
 	struct pci_epc *epc;
-	u8 func_no;
 	int ret, i;
 
 	ntb_epc = ntb->epc[type];
@@ -400,8 +404,9 @@ static int epf_ntb_configure_msi(struct epf_ntb *ntb,
 
 	phys_addr = peer_epf_bar->phys_addr;
 	func_no = ntb_epc->func_no;
+	vfunc_no = ntb_epc->vfunc_no;
 
-	ret = pci_epc_map_msi_irq(epc, func_no, phys_addr, db_count,
+	ret = pci_epc_map_msi_irq(epc, func_no, vfunc_no, phys_addr, db_count,
 				  db_entry_size, &db_data, &db_offset);
 	if (ret) {
 		dev_err(&epc->dev, "%s intf: Failed to map MSI IRQ\n",
@@ -491,10 +496,10 @@ static int epf_ntb_configure_msix(struct epf_ntb *ntb,
 	u32 db_entry_size, msg_data;
 	enum pci_barno peer_barno;
 	phys_addr_t phys_addr;
+	u8 func_no, vfunc_no;
 	struct pci_epc *epc;
 	size_t align;
 	u64 msg_addr;
-	u8 func_no;
 	int ret, i;
 
 	ntb_epc = ntb->epc[type];
@@ -512,12 +517,13 @@ static int epf_ntb_configure_msix(struct epf_ntb *ntb,
 	align = epc_features->align;
 
 	func_no = ntb_epc->func_no;
+	vfunc_no = ntb_epc->vfunc_no;
 	db_entry_size = peer_ctrl->db_entry_size;
 
 	for (i = 0; i < db_count; i++) {
 		msg_addr = ALIGN_DOWN(msix_tbl[i].msg_addr, align);
 		msg_data = msix_tbl[i].msg_data;
-		ret = pci_epc_map_addr(epc, func_no, phys_addr, msg_addr,
+		ret = pci_epc_map_addr(epc, func_no, vfunc_no, phys_addr, msg_addr,
 				       db_entry_size);
 		if (ret) {
 			dev_err(&epc->dev,
@@ -586,8 +592,8 @@ epf_ntb_teardown_db(struct epf_ntb *ntb, enum pci_epc_interface_type type)
 	struct pci_epf_bar *peer_epf_bar;
 	enum pci_barno peer_barno;
 	phys_addr_t phys_addr;
+	u8 func_no, vfunc_no;
 	struct pci_epc *epc;
-	u8 func_no;
 
 	ntb_epc = ntb->epc[type];
 	epc = ntb_epc->epc;
@@ -597,8 +603,9 @@ epf_ntb_teardown_db(struct epf_ntb *ntb, enum pci_epc_interface_type type)
 	peer_epf_bar = &peer_ntb_epc->epf_bar[peer_barno];
 	phys_addr = peer_epf_bar->phys_addr;
 	func_no = ntb_epc->func_no;
+	vfunc_no = ntb_epc->vfunc_no;
 
-	pci_epc_unmap_addr(epc, func_no, phys_addr);
+	pci_epc_unmap_addr(epc, func_no, vfunc_no, phys_addr);
 }
 
 /**
@@ -728,14 +735,15 @@ static void epf_ntb_peer_spad_bar_clear(struct epf_ntb_epc *ntb_epc)
 {
 	struct pci_epf_bar *epf_bar;
 	enum pci_barno barno;
+	u8 func_no, vfunc_no;
 	struct pci_epc *epc;
-	u8 func_no;
 
 	epc = ntb_epc->epc;
 	func_no = ntb_epc->func_no;
+	vfunc_no = ntb_epc->vfunc_no;
 	barno = ntb_epc->epf_ntb_bar[BAR_PEER_SPAD];
 	epf_bar = &ntb_epc->epf_bar[barno];
-	pci_epc_clear_bar(epc, func_no, epf_bar);
+	pci_epc_clear_bar(epc, func_no, vfunc_no, epf_bar);
 }
 
 /**
@@ -775,9 +783,9 @@ static int epf_ntb_peer_spad_bar_set(struct epf_ntb *ntb,
 	struct pci_epf_bar *peer_epf_bar, *epf_bar;
 	enum pci_barno peer_barno, barno;
 	u32 peer_spad_offset;
+	u8 func_no, vfunc_no;
 	struct pci_epc *epc;
 	struct device *dev;
-	u8 func_no;
 	int ret;
 
 	dev = &ntb->epf->dev;
@@ -790,6 +798,7 @@ static int epf_ntb_peer_spad_bar_set(struct epf_ntb *ntb,
 	barno = ntb_epc->epf_ntb_bar[BAR_PEER_SPAD];
 	epf_bar = &ntb_epc->epf_bar[barno];
 	func_no = ntb_epc->func_no;
+	vfunc_no = ntb_epc->vfunc_no;
 	epc = ntb_epc->epc;
 
 	peer_spad_offset = peer_ntb_epc->reg->spad_offset;
@@ -798,7 +807,7 @@ static int epf_ntb_peer_spad_bar_set(struct epf_ntb *ntb,
 	epf_bar->barno = barno;
 	epf_bar->flags = PCI_BASE_ADDRESS_MEM_TYPE_32;
 
-	ret = pci_epc_set_bar(epc, func_no, epf_bar);
+	ret = pci_epc_set_bar(epc, func_no, vfunc_no, epf_bar);
 	if (ret) {
 		dev_err(dev, "%s intf: peer SPAD BAR set failed\n",
 			pci_epc_interface_string(type));
@@ -842,14 +851,15 @@ static void epf_ntb_config_sspad_bar_clear(struct epf_ntb_epc *ntb_epc)
 {
 	struct pci_epf_bar *epf_bar;
 	enum pci_barno barno;
+	u8 func_no, vfunc_no;
 	struct pci_epc *epc;
-	u8 func_no;
 
 	epc = ntb_epc->epc;
 	func_no = ntb_epc->func_no;
+	vfunc_no = ntb_epc->vfunc_no;
 	barno = ntb_epc->epf_ntb_bar[BAR_CONFIG];
 	epf_bar = &ntb_epc->epf_bar[barno];
-	pci_epc_clear_bar(epc, func_no, epf_bar);
+	pci_epc_clear_bar(epc, func_no, vfunc_no, epf_bar);
 }
 
 /**
@@ -886,10 +896,10 @@ static int epf_ntb_config_sspad_bar_set(struct epf_ntb_epc *ntb_epc)
 {
 	struct pci_epf_bar *epf_bar;
 	enum pci_barno barno;
+	u8 func_no, vfunc_no;
 	struct epf_ntb *ntb;
 	struct pci_epc *epc;
 	struct device *dev;
-	u8 func_no;
 	int ret;
 
 	ntb = ntb_epc->epf_ntb;
@@ -897,10 +907,11 @@ static int epf_ntb_config_sspad_bar_set(struct epf_ntb_epc *ntb_epc)
 
 	epc = ntb_epc->epc;
 	func_no = ntb_epc->func_no;
+	vfunc_no = ntb_epc->vfunc_no;
 	barno = ntb_epc->epf_ntb_bar[BAR_CONFIG];
 	epf_bar = &ntb_epc->epf_bar[barno];
 
-	ret = pci_epc_set_bar(epc, func_no, epf_bar);
+	ret = pci_epc_set_bar(epc, func_no, vfunc_no, epf_bar);
 	if (ret) {
 		dev_err(dev, "%s inft: Config/Status/SPAD BAR set failed\n",
 			pci_epc_interface_string(ntb_epc->type));
@@ -1214,17 +1225,18 @@ static void epf_ntb_db_mw_bar_clear(struct epf_ntb_epc *ntb_epc)
 	struct pci_epf_bar *epf_bar;
 	enum epf_ntb_bar bar;
 	enum pci_barno barno;
+	u8 func_no, vfunc_no;
 	struct pci_epc *epc;
-	u8 func_no;
 
 	epc = ntb_epc->epc;
 
 	func_no = ntb_epc->func_no;
+	vfunc_no = ntb_epc->vfunc_no;
 
 	for (bar = BAR_DB_MW1; bar < BAR_MW4; bar++) {
 		barno = ntb_epc->epf_ntb_bar[bar];
 		epf_bar = &ntb_epc->epf_bar[barno];
-		pci_epc_clear_bar(epc, func_no, epf_bar);
+		pci_epc_clear_bar(epc, func_no, vfunc_no, epf_bar);
 	}
 }
 
@@ -1263,10 +1275,10 @@ static int epf_ntb_configure_interrupt(struct epf_ntb *ntb,
 	const struct pci_epc_features *epc_features;
 	bool msix_capable, msi_capable;
 	struct epf_ntb_epc *ntb_epc;
+	u8 func_no, vfunc_no;
 	struct pci_epc *epc;
 	struct device *dev;
 	u32 db_count;
-	u8 func_no;
 	int ret;
 
 	ntb_epc = ntb->epc[type];
@@ -1282,6 +1294,7 @@ static int epf_ntb_configure_interrupt(struct epf_ntb *ntb,
 	}
 
 	func_no = ntb_epc->func_no;
+	vfunc_no = ntb_epc->vfunc_no;
 
 	db_count = ntb->db_count;
 	if (db_count > MAX_DB_COUNT) {
@@ -1293,7 +1306,7 @@ static int epf_ntb_configure_interrupt(struct epf_ntb *ntb,
 	epc = ntb_epc->epc;
 
 	if (msi_capable) {
-		ret = pci_epc_set_msi(epc, func_no, db_count);
+		ret = pci_epc_set_msi(epc, func_no, vfunc_no, db_count);
 		if (ret) {
 			dev_err(dev, "%s intf: MSI configuration failed\n",
 				pci_epc_interface_string(type));
@@ -1302,7 +1315,7 @@ static int epf_ntb_configure_interrupt(struct epf_ntb *ntb,
 	}
 
 	if (msix_capable) {
-		ret = pci_epc_set_msix(epc, func_no, db_count,
+		ret = pci_epc_set_msix(epc, func_no, vfunc_no, db_count,
 				       ntb_epc->msix_bar,
 				       ntb_epc->msix_table_offset);
 		if (ret) {
@@ -1423,11 +1436,11 @@ static int epf_ntb_db_mw_bar_init(struct epf_ntb *ntb,
 	u32 num_mws, db_count;
 	enum epf_ntb_bar bar;
 	enum pci_barno barno;
+	u8 func_no, vfunc_no;
 	struct pci_epc *epc;
 	struct device *dev;
 	size_t align;
 	int ret, i;
-	u8 func_no;
 	u64 size;
 
 	ntb_epc = ntb->epc[type];
@@ -1437,6 +1450,7 @@ static int epf_ntb_db_mw_bar_init(struct epf_ntb *ntb,
 	epc_features = ntb_epc->epc_features;
 	align = epc_features->align;
 	func_no = ntb_epc->func_no;
+	vfunc_no = ntb_epc->vfunc_no;
 	epc = ntb_epc->epc;
 	num_mws = ntb->num_mws;
 	db_count = ntb->db_count;
@@ -1464,7 +1478,7 @@ static int epf_ntb_db_mw_bar_init(struct epf_ntb *ntb,
 		barno = ntb_epc->epf_ntb_bar[bar];
 		epf_bar = &ntb_epc->epf_bar[barno];
 
-		ret = pci_epc_set_bar(epc, func_no, epf_bar);
+		ret = pci_epc_set_bar(epc, func_no, vfunc_no, epf_bar);
 		if (ret) {
 			dev_err(dev, "%s intf: DoorBell BAR set failed\n",
 				pci_epc_interface_string(type));
@@ -1536,9 +1550,9 @@ static int epf_ntb_epc_create_interface(struct epf_ntb *ntb,
 	const struct pci_epc_features *epc_features;
 	struct pci_epf_bar *epf_bar;
 	struct epf_ntb_epc *ntb_epc;
+	u8 func_no, vfunc_no;
 	struct pci_epf *epf;
 	struct device *dev;
-	u8 func_no;
 
 	dev = &ntb->epf->dev;
 
@@ -1547,6 +1561,7 @@ static int epf_ntb_epc_create_interface(struct epf_ntb *ntb,
 		return -ENOMEM;
 
 	epf = ntb->epf;
+	vfunc_no = epf->vfunc_no;
 	if (type == PRIMARY_INTERFACE) {
 		func_no = epf->func_no;
 		epf_bar = epf->bar;
@@ -1558,11 +1573,12 @@ static int epf_ntb_epc_create_interface(struct epf_ntb *ntb,
 	ntb_epc->linkup = false;
 	ntb_epc->epc = epc;
 	ntb_epc->func_no = func_no;
+	ntb_epc->vfunc_no = vfunc_no;
 	ntb_epc->type = type;
 	ntb_epc->epf_bar = epf_bar;
 	ntb_epc->epf_ntb = ntb;
 
-	epc_features = pci_epc_get_features(epc, func_no);
+	epc_features = pci_epc_get_features(epc, func_no, vfunc_no);
 	if (!epc_features)
 		return -EINVAL;
 	ntb_epc->epc_features = epc_features;
@@ -1702,10 +1718,10 @@ static int epf_ntb_epc_init_interface(struct epf_ntb *ntb,
 				      enum pci_epc_interface_type type)
 {
 	struct epf_ntb_epc *ntb_epc;
+	u8 func_no, vfunc_no;
 	struct pci_epc *epc;
 	struct pci_epf *epf;
 	struct device *dev;
-	u8 func_no;
 	int ret;
 
 	ntb_epc = ntb->epc[type];
@@ -1713,6 +1729,7 @@ static int epf_ntb_epc_init_interface(struct epf_ntb *ntb,
 	dev = &epf->dev;
 	epc = ntb_epc->epc;
 	func_no = ntb_epc->func_no;
+	vfunc_no = ntb_epc->vfunc_no;
 
 	ret = epf_ntb_config_sspad_bar_set(ntb->epc[type]);
 	if (ret) {
@@ -1742,11 +1759,13 @@ static int epf_ntb_epc_init_interface(struct epf_ntb *ntb,
 		goto err_db_mw_bar_init;
 	}
 
-	ret = pci_epc_write_header(epc, func_no, epf->header);
-	if (ret) {
-		dev_err(dev, "%s intf: Configuration header write failed\n",
-			pci_epc_interface_string(type));
-		goto err_write_header;
+	if (vfunc_no <= 1) {
+		ret = pci_epc_write_header(epc, func_no, vfunc_no, epf->header);
+		if (ret) {
+			dev_err(dev, "%s intf: Configuration header write failed\n",
+				pci_epc_interface_string(type));
+			goto err_write_header;
+		}
 	}
 
 	INIT_DELAYED_WORK(&ntb->epc[type]->cmd_handler, epf_ntb_cmd_handler);
diff --git a/drivers/pci/endpoint/functions/pci-epf-test.c b/drivers/pci/endpoint/functions/pci-epf-test.c
index d2708ca4bece..90d84d3bc868 100644
--- a/drivers/pci/endpoint/functions/pci-epf-test.c
+++ b/drivers/pci/endpoint/functions/pci-epf-test.c
@@ -247,8 +247,8 @@ static int pci_epf_test_copy(struct pci_epf_test *epf_test)
 		goto err;
 	}
 
-	ret = pci_epc_map_addr(epc, epf->func_no, src_phys_addr, reg->src_addr,
-			       reg->size);
+	ret = pci_epc_map_addr(epc, epf->func_no, epf->vfunc_no, src_phys_addr,
+			       reg->src_addr, reg->size);
 	if (ret) {
 		dev_err(dev, "Failed to map source address\n");
 		reg->status = STATUS_SRC_ADDR_INVALID;
@@ -263,8 +263,8 @@ static int pci_epf_test_copy(struct pci_epf_test *epf_test)
 		goto err_src_map_addr;
 	}
 
-	ret = pci_epc_map_addr(epc, epf->func_no, dst_phys_addr, reg->dst_addr,
-			       reg->size);
+	ret = pci_epc_map_addr(epc, epf->func_no, epf->vfunc_no, dst_phys_addr,
+			       reg->dst_addr, reg->size);
 	if (ret) {
 		dev_err(dev, "Failed to map destination address\n");
 		reg->status = STATUS_DST_ADDR_INVALID;
@@ -291,13 +291,13 @@ static int pci_epf_test_copy(struct pci_epf_test *epf_test)
 	pci_epf_test_print_rate("COPY", reg->size, &start, &end, use_dma);
 
 err_map_addr:
-	pci_epc_unmap_addr(epc, epf->func_no, dst_phys_addr);
+	pci_epc_unmap_addr(epc, epf->func_no, epf->vfunc_no, dst_phys_addr);
 
 err_dst_addr:
 	pci_epc_mem_free_addr(epc, dst_phys_addr, dst_addr, reg->size);
 
 err_src_map_addr:
-	pci_epc_unmap_addr(epc, epf->func_no, src_phys_addr);
+	pci_epc_unmap_addr(epc, epf->func_no, epf->vfunc_no, src_phys_addr);
 
 err_src_addr:
 	pci_epc_mem_free_addr(epc, src_phys_addr, src_addr, reg->size);
@@ -331,8 +331,8 @@ static int pci_epf_test_read(struct pci_epf_test *epf_test)
 		goto err;
 	}
 
-	ret = pci_epc_map_addr(epc, epf->func_no, phys_addr, reg->src_addr,
-			       reg->size);
+	ret = pci_epc_map_addr(epc, epf->func_no, epf->vfunc_no, phys_addr,
+			       reg->src_addr, reg->size);
 	if (ret) {
 		dev_err(dev, "Failed to map address\n");
 		reg->status = STATUS_SRC_ADDR_INVALID;
@@ -386,7 +386,7 @@ err_dma_map:
 	kfree(buf);
 
 err_map_addr:
-	pci_epc_unmap_addr(epc, epf->func_no, phys_addr);
+	pci_epc_unmap_addr(epc, epf->func_no, epf->vfunc_no, phys_addr);
 
 err_addr:
 	pci_epc_mem_free_addr(epc, phys_addr, src_addr, reg->size);
@@ -419,8 +419,8 @@ static int pci_epf_test_write(struct pci_epf_test *epf_test)
 		goto err;
 	}
 
-	ret = pci_epc_map_addr(epc, epf->func_no, phys_addr, reg->dst_addr,
-			       reg->size);
+	ret = pci_epc_map_addr(epc, epf->func_no, epf->vfunc_no, phys_addr,
+			       reg->dst_addr, reg->size);
 	if (ret) {
 		dev_err(dev, "Failed to map address\n");
 		reg->status = STATUS_DST_ADDR_INVALID;
@@ -479,7 +479,7 @@ err_dma_map:
 	kfree(buf);
 
 err_map_addr:
-	pci_epc_unmap_addr(epc, epf->func_no, phys_addr);
+	pci_epc_unmap_addr(epc, epf->func_no, epf->vfunc_no, phys_addr);
 
 err_addr:
 	pci_epc_mem_free_addr(epc, phys_addr, dst_addr, reg->size);
@@ -501,13 +501,16 @@ static void pci_epf_test_raise_irq(struct pci_epf_test *epf_test, u8 irq_type,
 
 	switch (irq_type) {
 	case IRQ_TYPE_LEGACY:
-		pci_epc_raise_irq(epc, epf->func_no, PCI_EPC_IRQ_LEGACY, 0);
+		pci_epc_raise_irq(epc, epf->func_no, epf->vfunc_no,
+				  PCI_EPC_IRQ_LEGACY, 0);
 		break;
 	case IRQ_TYPE_MSI:
-		pci_epc_raise_irq(epc, epf->func_no, PCI_EPC_IRQ_MSI, irq);
+		pci_epc_raise_irq(epc, epf->func_no, epf->vfunc_no,
+				  PCI_EPC_IRQ_MSI, irq);
 		break;
 	case IRQ_TYPE_MSIX:
-		pci_epc_raise_irq(epc, epf->func_no, PCI_EPC_IRQ_MSIX, irq);
+		pci_epc_raise_irq(epc, epf->func_no, epf->vfunc_no,
+				  PCI_EPC_IRQ_MSIX, irq);
 		break;
 	default:
 		dev_err(dev, "Failed to raise IRQ, unknown type\n");
@@ -542,7 +545,8 @@ static void pci_epf_test_cmd_handler(struct work_struct *work)
 
 	if (command & COMMAND_RAISE_LEGACY_IRQ) {
 		reg->status = STATUS_IRQ_RAISED;
-		pci_epc_raise_irq(epc, epf->func_no, PCI_EPC_IRQ_LEGACY, 0);
+		pci_epc_raise_irq(epc, epf->func_no, epf->vfunc_no,
+				  PCI_EPC_IRQ_LEGACY, 0);
 		goto reset_handler;
 	}
 
@@ -580,22 +584,22 @@ static void pci_epf_test_cmd_handler(struct work_struct *work)
 	}
 
 	if (command & COMMAND_RAISE_MSI_IRQ) {
-		count = pci_epc_get_msi(epc, epf->func_no);
+		count = pci_epc_get_msi(epc, epf->func_no, epf->vfunc_no);
 		if (reg->irq_number > count || count <= 0)
 			goto reset_handler;
 		reg->status = STATUS_IRQ_RAISED;
-		pci_epc_raise_irq(epc, epf->func_no, PCI_EPC_IRQ_MSI,
-				  reg->irq_number);
+		pci_epc_raise_irq(epc, epf->func_no, epf->vfunc_no,
+				  PCI_EPC_IRQ_MSI, reg->irq_number);
 		goto reset_handler;
 	}
 
 	if (command & COMMAND_RAISE_MSIX_IRQ) {
-		count = pci_epc_get_msix(epc, epf->func_no);
+		count = pci_epc_get_msix(epc, epf->func_no, epf->vfunc_no);
 		if (reg->irq_number > count || count <= 0)
 			goto reset_handler;
 		reg->status = STATUS_IRQ_RAISED;
-		pci_epc_raise_irq(epc, epf->func_no, PCI_EPC_IRQ_MSIX,
-				  reg->irq_number);
+		pci_epc_raise_irq(epc, epf->func_no, epf->vfunc_no,
+				  PCI_EPC_IRQ_MSIX, reg->irq_number);
 		goto reset_handler;
 	}
 
@@ -618,7 +622,8 @@ static void pci_epf_test_unbind(struct pci_epf *epf)
 		epf_bar = &epf->bar[bar];
 
 		if (epf_test->reg[bar]) {
-			pci_epc_clear_bar(epc, epf->func_no, epf_bar);
+			pci_epc_clear_bar(epc, epf->func_no, epf->vfunc_no,
+					  epf_bar);
 			pci_epf_free_space(epf, epf_test->reg[bar], bar,
 					   PRIMARY_INTERFACE);
 		}
@@ -650,7 +655,8 @@ static int pci_epf_test_set_bar(struct pci_epf *epf)
 		if (!!(epc_features->reserved_bar & (1 << bar)))
 			continue;
 
-		ret = pci_epc_set_bar(epc, epf->func_no, epf_bar);
+		ret = pci_epc_set_bar(epc, epf->func_no, epf->vfunc_no,
+				      epf_bar);
 		if (ret) {
 			pci_epf_free_space(epf, epf_test->reg[bar], bar,
 					   PRIMARY_INTERFACE);
@@ -674,16 +680,18 @@ static int pci_epf_test_core_init(struct pci_epf *epf)
 	bool msi_capable = true;
 	int ret;
 
-	epc_features = pci_epc_get_features(epc, epf->func_no);
+	epc_features = pci_epc_get_features(epc, epf->func_no, epf->vfunc_no);
 	if (epc_features) {
 		msix_capable = epc_features->msix_capable;
 		msi_capable = epc_features->msi_capable;
 	}
 
-	ret = pci_epc_write_header(epc, epf->func_no, header);
-	if (ret) {
-		dev_err(dev, "Configuration header write failed\n");
-		return ret;
+	if (epf->vfunc_no <= 1) {
+		ret = pci_epc_write_header(epc, epf->func_no, epf->vfunc_no, header);
+		if (ret) {
+			dev_err(dev, "Configuration header write failed\n");
+			return ret;
+		}
 	}
 
 	ret = pci_epf_test_set_bar(epf);
@@ -691,7 +699,8 @@ static int pci_epf_test_core_init(struct pci_epf *epf)
 		return ret;
 
 	if (msi_capable) {
-		ret = pci_epc_set_msi(epc, epf->func_no, epf->msi_interrupts);
+		ret = pci_epc_set_msi(epc, epf->func_no, epf->vfunc_no,
+				      epf->msi_interrupts);
 		if (ret) {
 			dev_err(dev, "MSI configuration failed\n");
 			return ret;
@@ -699,7 +708,8 @@ static int pci_epf_test_core_init(struct pci_epf *epf)
 	}
 
 	if (msix_capable) {
-		ret = pci_epc_set_msix(epc, epf->func_no, epf->msix_interrupts,
+		ret = pci_epc_set_msix(epc, epf->func_no, epf->vfunc_no,
+				       epf->msix_interrupts,
 				       epf_test->test_reg_bar,
 				       epf_test->msix_table_offset);
 		if (ret) {
@@ -832,7 +842,7 @@ static int pci_epf_test_bind(struct pci_epf *epf)
 	if (WARN_ON_ONCE(!epc))
 		return -EINVAL;
 
-	epc_features = pci_epc_get_features(epc, epf->func_no);
+	epc_features = pci_epc_get_features(epc, epf->func_no, epf->vfunc_no);
 	if (!epc_features) {
 		dev_err(&epf->dev, "epc_features not implemented\n");
 		return -EOPNOTSUPP;
diff --git a/drivers/pci/endpoint/pci-ep-cfs.c b/drivers/pci/endpoint/pci-ep-cfs.c
index f3a8b833b479..999911801877 100644
--- a/drivers/pci/endpoint/pci-ep-cfs.c
+++ b/drivers/pci/endpoint/pci-ep-cfs.c
@@ -475,6 +475,28 @@ static struct configfs_attribute *pci_epf_attrs[] = {
 	NULL,
 };
 
+static int pci_epf_vepf_link(struct config_item *epf_pf_item,
+			     struct config_item *epf_vf_item)
+{
+	struct pci_epf_group *epf_vf_group = to_pci_epf_group(epf_vf_item);
+	struct pci_epf_group *epf_pf_group = to_pci_epf_group(epf_pf_item);
+	struct pci_epf *epf_pf = epf_pf_group->epf;
+	struct pci_epf *epf_vf = epf_vf_group->epf;
+
+	return pci_epf_add_vepf(epf_pf, epf_vf);
+}
+
+static void pci_epf_vepf_unlink(struct config_item *epf_pf_item,
+				struct config_item *epf_vf_item)
+{
+	struct pci_epf_group *epf_vf_group = to_pci_epf_group(epf_vf_item);
+	struct pci_epf_group *epf_pf_group = to_pci_epf_group(epf_pf_item);
+	struct pci_epf *epf_pf = epf_pf_group->epf;
+	struct pci_epf *epf_vf = epf_vf_group->epf;
+
+	pci_epf_remove_vepf(epf_pf, epf_vf);
+}
+
 static void pci_epf_release(struct config_item *item)
 {
 	struct pci_epf_group *epf_group = to_pci_epf_group(item);
@@ -487,6 +509,8 @@ static void pci_epf_release(struct config_item *item)
 }
 
 static struct configfs_item_operations pci_epf_ops = {
+	.allow_link		= pci_epf_vepf_link,
+	.drop_link		= pci_epf_vepf_unlink,
 	.release		= pci_epf_release,
 };
 
diff --git a/drivers/pci/endpoint/pci-epc-core.c b/drivers/pci/endpoint/pci-epc-core.c
index adec9bee72cf..ecbb0fb3b653 100644
--- a/drivers/pci/endpoint/pci-epc-core.c
+++ b/drivers/pci/endpoint/pci-epc-core.c
@@ -137,24 +137,29 @@ EXPORT_SYMBOL_GPL(pci_epc_get_next_free_bar);
  * @epc: the features supported by *this* EPC device will be returned
  * @func_no: the features supported by the EPC device specific to the
  *	     endpoint function with func_no will be returned
+ * @vfunc_no: the features supported by the EPC device specific to the
+ *	     virtual endpoint function with vfunc_no will be returned
  *
  * Invoke to get the features provided by the EPC which may be
  * specific to an endpoint function. Returns pci_epc_features on success
  * and NULL for any failures.
  */
 const struct pci_epc_features *pci_epc_get_features(struct pci_epc *epc,
-						    u8 func_no)
+						    u8 func_no, u8 vfunc_no)
 {
 	const struct pci_epc_features *epc_features;
 
 	if (IS_ERR_OR_NULL(epc) || func_no >= epc->max_functions)
 		return NULL;
 
+	if (vfunc_no > 0 && (!epc->max_vfs || vfunc_no > epc->max_vfs[func_no]))
+		return NULL;
+
 	if (!epc->ops->get_features)
 		return NULL;
 
 	mutex_lock(&epc->lock);
-	epc_features = epc->ops->get_features(epc, func_no);
+	epc_features = epc->ops->get_features(epc, func_no, vfunc_no);
 	mutex_unlock(&epc->lock);
 
 	return epc_features;
@@ -205,13 +210,14 @@ EXPORT_SYMBOL_GPL(pci_epc_start);
 /**
  * pci_epc_raise_irq() - interrupt the host system
  * @epc: the EPC device which has to interrupt the host
- * @func_no: the endpoint function number in the EPC device
+ * @func_no: the physical endpoint function number in the EPC device
+ * @vfunc_no: the virtual endpoint function number in the physical function
  * @type: specify the type of interrupt; legacy, MSI or MSI-X
  * @interrupt_num: the MSI or MSI-X interrupt number
  *
  * Invoke to raise an legacy, MSI or MSI-X interrupt
  */
-int pci_epc_raise_irq(struct pci_epc *epc, u8 func_no,
+int pci_epc_raise_irq(struct pci_epc *epc, u8 func_no, u8 vfunc_no,
 		      enum pci_epc_irq_type type, u16 interrupt_num)
 {
 	int ret;
@@ -219,11 +225,14 @@ int pci_epc_raise_irq(struct pci_epc *epc, u8 func_no,
 	if (IS_ERR_OR_NULL(epc) || func_no >= epc->max_functions)
 		return -EINVAL;
 
+	if (vfunc_no > 0 && (!epc->max_vfs || vfunc_no > epc->max_vfs[func_no]))
+		return -EINVAL;
+
 	if (!epc->ops->raise_irq)
 		return 0;
 
 	mutex_lock(&epc->lock);
-	ret = epc->ops->raise_irq(epc, func_no, type, interrupt_num);
+	ret = epc->ops->raise_irq(epc, func_no, vfunc_no, type, interrupt_num);
 	mutex_unlock(&epc->lock);
 
 	return ret;
@@ -235,6 +244,7 @@ EXPORT_SYMBOL_GPL(pci_epc_raise_irq);
  *                         MSI data
  * @epc: the EPC device which has the MSI capability
  * @func_no: the physical endpoint function number in the EPC device
+ * @vfunc_no: the virtual endpoint function number in the physical function
  * @phys_addr: the physical address of the outbound region
  * @interrupt_num: the MSI interrupt number
  * @entry_size: Size of Outbound address region for each interrupt
@@ -250,21 +260,25 @@ EXPORT_SYMBOL_GPL(pci_epc_raise_irq);
  * physical address (in outbound region) of the other interface to ring
  * doorbell.
  */
-int pci_epc_map_msi_irq(struct pci_epc *epc, u8 func_no, phys_addr_t phys_addr,
-			u8 interrupt_num, u32 entry_size, u32 *msi_data,
-			u32 *msi_addr_offset)
+int pci_epc_map_msi_irq(struct pci_epc *epc, u8 func_no, u8 vfunc_no,
+			phys_addr_t phys_addr, u8 interrupt_num, u32 entry_size,
+			u32 *msi_data, u32 *msi_addr_offset)
 {
 	int ret;
 
 	if (IS_ERR_OR_NULL(epc))
 		return -EINVAL;
 
+	if (vfunc_no > 0 && (!epc->max_vfs || vfunc_no > epc->max_vfs[func_no]))
+		return -EINVAL;
+
 	if (!epc->ops->map_msi_irq)
 		return -EINVAL;
 
 	mutex_lock(&epc->lock);
-	ret = epc->ops->map_msi_irq(epc, func_no, phys_addr, interrupt_num,
-				    entry_size, msi_data, msi_addr_offset);
+	ret = epc->ops->map_msi_irq(epc, func_no, vfunc_no, phys_addr,
+				    interrupt_num, entry_size, msi_data,
+				    msi_addr_offset);
 	mutex_unlock(&epc->lock);
 
 	return ret;
@@ -274,22 +288,26 @@ EXPORT_SYMBOL_GPL(pci_epc_map_msi_irq);
 /**
  * pci_epc_get_msi() - get the number of MSI interrupt numbers allocated
  * @epc: the EPC device to which MSI interrupts was requested
- * @func_no: the endpoint function number in the EPC device
+ * @func_no: the physical endpoint function number in the EPC device
+ * @vfunc_no: the virtual endpoint function number in the physical function
  *
  * Invoke to get the number of MSI interrupts allocated by the RC
  */
-int pci_epc_get_msi(struct pci_epc *epc, u8 func_no)
+int pci_epc_get_msi(struct pci_epc *epc, u8 func_no, u8 vfunc_no)
 {
 	int interrupt;
 
 	if (IS_ERR_OR_NULL(epc) || func_no >= epc->max_functions)
 		return 0;
 
+	if (vfunc_no > 0 && (!epc->max_vfs || vfunc_no > epc->max_vfs[func_no]))
+		return 0;
+
 	if (!epc->ops->get_msi)
 		return 0;
 
 	mutex_lock(&epc->lock);
-	interrupt = epc->ops->get_msi(epc, func_no);
+	interrupt = epc->ops->get_msi(epc, func_no, vfunc_no);
 	mutex_unlock(&epc->lock);
 
 	if (interrupt < 0)
@@ -304,12 +322,13 @@ EXPORT_SYMBOL_GPL(pci_epc_get_msi);
 /**
  * pci_epc_set_msi() - set the number of MSI interrupt numbers required
  * @epc: the EPC device on which MSI has to be configured
- * @func_no: the endpoint function number in the EPC device
+ * @func_no: the physical endpoint function number in the EPC device
+ * @vfunc_no: the virtual endpoint function number in the physical function
  * @interrupts: number of MSI interrupts required by the EPF
  *
  * Invoke to set the required number of MSI interrupts.
  */
-int pci_epc_set_msi(struct pci_epc *epc, u8 func_no, u8 interrupts)
+int pci_epc_set_msi(struct pci_epc *epc, u8 func_no, u8 vfunc_no, u8 interrupts)
 {
 	int ret;
 	u8 encode_int;
@@ -318,13 +337,16 @@ int pci_epc_set_msi(struct pci_epc *epc, u8 func_no, u8 interrupts)
 	    interrupts > 32)
 		return -EINVAL;
 
+	if (vfunc_no > 0 && (!epc->max_vfs || vfunc_no > epc->max_vfs[func_no]))
+		return -EINVAL;
+
 	if (!epc->ops->set_msi)
 		return 0;
 
 	encode_int = order_base_2(interrupts);
 
 	mutex_lock(&epc->lock);
-	ret = epc->ops->set_msi(epc, func_no, encode_int);
+	ret = epc->ops->set_msi(epc, func_no, vfunc_no, encode_int);
 	mutex_unlock(&epc->lock);
 
 	return ret;
@@ -334,22 +356,26 @@ EXPORT_SYMBOL_GPL(pci_epc_set_msi);
 /**
  * pci_epc_get_msix() - get the number of MSI-X interrupt numbers allocated
  * @epc: the EPC device to which MSI-X interrupts was requested
- * @func_no: the endpoint function number in the EPC device
+ * @func_no: the physical endpoint function number in the EPC device
+ * @vfunc_no: the virtual endpoint function number in the physical function
  *
  * Invoke to get the number of MSI-X interrupts allocated by the RC
  */
-int pci_epc_get_msix(struct pci_epc *epc, u8 func_no)
+int pci_epc_get_msix(struct pci_epc *epc, u8 func_no, u8 vfunc_no)
 {
 	int interrupt;
 
 	if (IS_ERR_OR_NULL(epc) || func_no >= epc->max_functions)
 		return 0;
 
+	if (vfunc_no > 0 && (!epc->max_vfs || vfunc_no > epc->max_vfs[func_no]))
+		return 0;
+
 	if (!epc->ops->get_msix)
 		return 0;
 
 	mutex_lock(&epc->lock);
-	interrupt = epc->ops->get_msix(epc, func_no);
+	interrupt = epc->ops->get_msix(epc, func_no, vfunc_no);
 	mutex_unlock(&epc->lock);
 
 	if (interrupt < 0)
@@ -362,15 +388,16 @@ EXPORT_SYMBOL_GPL(pci_epc_get_msix);
 /**
  * pci_epc_set_msix() - set the number of MSI-X interrupt numbers required
  * @epc: the EPC device on which MSI-X has to be configured
- * @func_no: the endpoint function number in the EPC device
+ * @func_no: the physical endpoint function number in the EPC device
+ * @vfunc_no: the virtual endpoint function number in the physical function
  * @interrupts: number of MSI-X interrupts required by the EPF
  * @bir: BAR where the MSI-X table resides
  * @offset: Offset pointing to the start of MSI-X table
  *
  * Invoke to set the required number of MSI-X interrupts.
  */
-int pci_epc_set_msix(struct pci_epc *epc, u8 func_no, u16 interrupts,
-		     enum pci_barno bir, u32 offset)
+int pci_epc_set_msix(struct pci_epc *epc, u8 func_no, u8 vfunc_no,
+		     u16 interrupts, enum pci_barno bir, u32 offset)
 {
 	int ret;
 
@@ -378,11 +405,15 @@ int pci_epc_set_msix(struct pci_epc *epc, u8 func_no, u16 interrupts,
 	    interrupts < 1 || interrupts > 2048)
 		return -EINVAL;
 
+	if (vfunc_no > 0 && (!epc->max_vfs || vfunc_no > epc->max_vfs[func_no]))
+		return -EINVAL;
+
 	if (!epc->ops->set_msix)
 		return 0;
 
 	mutex_lock(&epc->lock);
-	ret = epc->ops->set_msix(epc, func_no, interrupts - 1, bir, offset);
+	ret = epc->ops->set_msix(epc, func_no, vfunc_no, interrupts - 1, bir,
+				 offset);
 	mutex_unlock(&epc->lock);
 
 	return ret;
@@ -392,22 +423,26 @@ EXPORT_SYMBOL_GPL(pci_epc_set_msix);
 /**
  * pci_epc_unmap_addr() - unmap CPU address from PCI address
  * @epc: the EPC device on which address is allocated
- * @func_no: the endpoint function number in the EPC device
+ * @func_no: the physical endpoint function number in the EPC device
+ * @vfunc_no: the virtual endpoint function number in the physical function
  * @phys_addr: physical address of the local system
  *
  * Invoke to unmap the CPU address from PCI address.
  */
-void pci_epc_unmap_addr(struct pci_epc *epc, u8 func_no,
+void pci_epc_unmap_addr(struct pci_epc *epc, u8 func_no, u8 vfunc_no,
 			phys_addr_t phys_addr)
 {
 	if (IS_ERR_OR_NULL(epc) || func_no >= epc->max_functions)
 		return;
 
+	if (vfunc_no > 0 && (!epc->max_vfs || vfunc_no > epc->max_vfs[func_no]))
+		return;
+
 	if (!epc->ops->unmap_addr)
 		return;
 
 	mutex_lock(&epc->lock);
-	epc->ops->unmap_addr(epc, func_no, phys_addr);
+	epc->ops->unmap_addr(epc, func_no, vfunc_no, phys_addr);
 	mutex_unlock(&epc->lock);
 }
 EXPORT_SYMBOL_GPL(pci_epc_unmap_addr);
@@ -415,14 +450,15 @@ EXPORT_SYMBOL_GPL(pci_epc_unmap_addr);
 /**
  * pci_epc_map_addr() - map CPU address to PCI address
  * @epc: the EPC device on which address is allocated
- * @func_no: the endpoint function number in the EPC device
+ * @func_no: the physical endpoint function number in the EPC device
+ * @vfunc_no: the virtual endpoint function number in the physical function
  * @phys_addr: physical address of the local system
  * @pci_addr: PCI address to which the physical address should be mapped
  * @size: the size of the allocation
  *
  * Invoke to map CPU address with PCI address.
  */
-int pci_epc_map_addr(struct pci_epc *epc, u8 func_no,
+int pci_epc_map_addr(struct pci_epc *epc, u8 func_no, u8 vfunc_no,
 		     phys_addr_t phys_addr, u64 pci_addr, size_t size)
 {
 	int ret;
@@ -430,11 +466,15 @@ int pci_epc_map_addr(struct pci_epc *epc, u8 func_no,
 	if (IS_ERR_OR_NULL(epc) || func_no >= epc->max_functions)
 		return -EINVAL;
 
+	if (vfunc_no > 0 && (!epc->max_vfs || vfunc_no > epc->max_vfs[func_no]))
+		return -EINVAL;
+
 	if (!epc->ops->map_addr)
 		return 0;
 
 	mutex_lock(&epc->lock);
-	ret = epc->ops->map_addr(epc, func_no, phys_addr, pci_addr, size);
+	ret = epc->ops->map_addr(epc, func_no, vfunc_no, phys_addr, pci_addr,
+				 size);
 	mutex_unlock(&epc->lock);
 
 	return ret;
@@ -444,12 +484,13 @@ EXPORT_SYMBOL_GPL(pci_epc_map_addr);
 /**
  * pci_epc_clear_bar() - reset the BAR
  * @epc: the EPC device for which the BAR has to be cleared
- * @func_no: the endpoint function number in the EPC device
+ * @func_no: the physical endpoint function number in the EPC device
+ * @vfunc_no: the virtual endpoint function number in the physical function
  * @epf_bar: the struct epf_bar that contains the BAR information
  *
  * Invoke to reset the BAR of the endpoint device.
  */
-void pci_epc_clear_bar(struct pci_epc *epc, u8 func_no,
+void pci_epc_clear_bar(struct pci_epc *epc, u8 func_no, u8 vfunc_no,
 		       struct pci_epf_bar *epf_bar)
 {
 	if (IS_ERR_OR_NULL(epc) || func_no >= epc->max_functions ||
@@ -457,11 +498,14 @@ void pci_epc_clear_bar(struct pci_epc *epc, u8 func_no,
 	     epf_bar->flags & PCI_BASE_ADDRESS_MEM_TYPE_64))
 		return;
 
+	if (vfunc_no > 0 && (!epc->max_vfs || vfunc_no > epc->max_vfs[func_no]))
+		return;
+
 	if (!epc->ops->clear_bar)
 		return;
 
 	mutex_lock(&epc->lock);
-	epc->ops->clear_bar(epc, func_no, epf_bar);
+	epc->ops->clear_bar(epc, func_no, vfunc_no, epf_bar);
 	mutex_unlock(&epc->lock);
 }
 EXPORT_SYMBOL_GPL(pci_epc_clear_bar);
@@ -469,12 +513,13 @@ EXPORT_SYMBOL_GPL(pci_epc_clear_bar);
 /**
  * pci_epc_set_bar() - configure BAR in order for host to assign PCI addr space
  * @epc: the EPC device on which BAR has to be configured
- * @func_no: the endpoint function number in the EPC device
+ * @func_no: the physical endpoint function number in the EPC device
+ * @vfunc_no: the virtual endpoint function number in the physical function
  * @epf_bar: the struct epf_bar that contains the BAR information
  *
  * Invoke to configure the BAR of the endpoint device.
  */
-int pci_epc_set_bar(struct pci_epc *epc, u8 func_no,
+int pci_epc_set_bar(struct pci_epc *epc, u8 func_no, u8 vfunc_no,
 		    struct pci_epf_bar *epf_bar)
 {
 	int ret;
@@ -489,11 +534,14 @@ int pci_epc_set_bar(struct pci_epc *epc, u8 func_no,
 	     !(flags & PCI_BASE_ADDRESS_MEM_TYPE_64)))
 		return -EINVAL;
 
+	if (vfunc_no > 0 && (!epc->max_vfs || vfunc_no > epc->max_vfs[func_no]))
+		return -EINVAL;
+
 	if (!epc->ops->set_bar)
 		return 0;
 
 	mutex_lock(&epc->lock);
-	ret = epc->ops->set_bar(epc, func_no, epf_bar);
+	ret = epc->ops->set_bar(epc, func_no, vfunc_no, epf_bar);
 	mutex_unlock(&epc->lock);
 
 	return ret;
@@ -503,7 +551,8 @@ EXPORT_SYMBOL_GPL(pci_epc_set_bar);
 /**
  * pci_epc_write_header() - write standard configuration header
  * @epc: the EPC device to which the configuration header should be written
- * @func_no: the endpoint function number in the EPC device
+ * @func_no: the physical endpoint function number in the EPC device
+ * @vfunc_no: the virtual endpoint function number in the physical function
  * @header: standard configuration header fields
  *
  * Invoke to write the configuration header to the endpoint controller. Every
@@ -511,7 +560,7 @@ EXPORT_SYMBOL_GPL(pci_epc_set_bar);
  * configuration header would be written. The callback function should write
  * the header fields to this dedicated location.
  */
-int pci_epc_write_header(struct pci_epc *epc, u8 func_no,
+int pci_epc_write_header(struct pci_epc *epc, u8 func_no, u8 vfunc_no,
 			 struct pci_epf_header *header)
 {
 	int ret;
@@ -519,11 +568,18 @@ int pci_epc_write_header(struct pci_epc *epc, u8 func_no,
 	if (IS_ERR_OR_NULL(epc) || func_no >= epc->max_functions)
 		return -EINVAL;
 
+	if (vfunc_no > 0 && (!epc->max_vfs || vfunc_no > epc->max_vfs[func_no]))
+		return -EINVAL;
+
+	/* Only Virtual Function #1 has deviceID */
+	if (vfunc_no > 1)
+		return -EINVAL;
+
 	if (!epc->ops->write_header)
 		return 0;
 
 	mutex_lock(&epc->lock);
-	ret = epc->ops->write_header(epc, func_no, header);
+	ret = epc->ops->write_header(epc, func_no, vfunc_no, header);
 	mutex_unlock(&epc->lock);
 
 	return ret;
@@ -548,7 +604,7 @@ int pci_epc_add_epf(struct pci_epc *epc, struct pci_epf *epf,
 	u32 func_no;
 	int ret = 0;
 
-	if (IS_ERR_OR_NULL(epc))
+	if (IS_ERR_OR_NULL(epc) || epf->is_vf)
 		return -EINVAL;
 
 	if (type == PRIMARY_INTERFACE && epf->epc)
diff --git a/drivers/pci/endpoint/pci-epf-core.c b/drivers/pci/endpoint/pci-epf-core.c
index 502eb79cd551..8aea16380870 100644
--- a/drivers/pci/endpoint/pci-epf-core.c
+++ b/drivers/pci/endpoint/pci-epf-core.c
@@ -62,13 +62,20 @@ EXPORT_SYMBOL_GPL(pci_epf_type_add_cfs);
  */
 void pci_epf_unbind(struct pci_epf *epf)
 {
+	struct pci_epf *epf_vf;
+
 	if (!epf->driver) {
 		dev_WARN(&epf->dev, "epf device not bound to driver\n");
 		return;
 	}
 
 	mutex_lock(&epf->lock);
-	epf->driver->ops->unbind(epf);
+	list_for_each_entry(epf_vf, &epf->pci_vepf, list) {
+		if (epf_vf->is_bound)
+			epf_vf->driver->ops->unbind(epf_vf);
+	}
+	if (epf->is_bound)
+		epf->driver->ops->unbind(epf);
 	mutex_unlock(&epf->lock);
 	module_put(epf->driver->owner);
 }
@@ -83,10 +90,14 @@ EXPORT_SYMBOL_GPL(pci_epf_unbind);
  */
 int pci_epf_bind(struct pci_epf *epf)
 {
+	struct device *dev = &epf->dev;
+	struct pci_epf *epf_vf;
+	u8 func_no, vfunc_no;
+	struct pci_epc *epc;
 	int ret;
 
 	if (!epf->driver) {
-		dev_WARN(&epf->dev, "epf device not bound to driver\n");
+		dev_WARN(dev, "epf device not bound to driver\n");
 		return -EINVAL;
 	}
 
@@ -94,14 +105,141 @@ int pci_epf_bind(struct pci_epf *epf)
 		return -EAGAIN;
 
 	mutex_lock(&epf->lock);
+	list_for_each_entry(epf_vf, &epf->pci_vepf, list) {
+		vfunc_no = epf_vf->vfunc_no;
+
+		if (vfunc_no < 1) {
+			dev_err(dev, "Invalid virtual function number\n");
+			ret = -EINVAL;
+			goto ret;
+		}
+
+		epc = epf->epc;
+		func_no = epf->func_no;
+		if (!IS_ERR_OR_NULL(epc)) {
+			if (!epc->max_vfs) {
+				dev_err(dev, "No support for virt function\n");
+				ret = -EINVAL;
+				goto ret;
+			}
+
+			if (vfunc_no > epc->max_vfs[func_no]) {
+				dev_err(dev, "PF%d: Exceeds max vfunc number\n",
+					func_no);
+				ret = -EINVAL;
+				goto ret;
+			}
+		}
+
+		epc = epf->sec_epc;
+		func_no = epf->sec_epc_func_no;
+		if (!IS_ERR_OR_NULL(epc)) {
+			if (!epc->max_vfs) {
+				dev_err(dev, "No support for virt function\n");
+				ret = -EINVAL;
+				goto ret;
+			}
+
+			if (vfunc_no > epc->max_vfs[func_no]) {
+				dev_err(dev, "PF%d: Exceeds max vfunc number\n",
+					func_no);
+				ret = -EINVAL;
+				goto ret;
+			}
+		}
+
+		epf_vf->func_no = epf->func_no;
+		epf_vf->sec_epc_func_no = epf->sec_epc_func_no;
+		epf_vf->epc = epf->epc;
+		epf_vf->sec_epc = epf->sec_epc;
+		ret = epf_vf->driver->ops->bind(epf_vf);
+		if (ret)
+			goto ret;
+		epf_vf->is_bound = true;
+	}
+
 	ret = epf->driver->ops->bind(epf);
+	if (ret)
+		goto ret;
+	epf->is_bound = true;
+
+	mutex_unlock(&epf->lock);
+	return 0;
+
+ret:
 	mutex_unlock(&epf->lock);
+	pci_epf_unbind(epf);
 
 	return ret;
 }
 EXPORT_SYMBOL_GPL(pci_epf_bind);
 
 /**
+ * pci_epf_add_vepf() - associate virtual EP function to physical EP function
+ * @epf_pf: the physical EP function to which the virtual EP function should be
+ *   associated
+ * @epf_vf: the virtual EP function to be added
+ *
+ * A physical endpoint function can be associated with multiple virtual
+ * endpoint functions. Invoke pci_epf_add_epf() to add a virtual PCI endpoint
+ * function to a physical PCI endpoint function.
+ */
+int pci_epf_add_vepf(struct pci_epf *epf_pf, struct pci_epf *epf_vf)
+{
+	u32 vfunc_no;
+
+	if (IS_ERR_OR_NULL(epf_pf) || IS_ERR_OR_NULL(epf_vf))
+		return -EINVAL;
+
+	if (epf_pf->epc || epf_vf->epc || epf_vf->epf_pf)
+		return -EBUSY;
+
+	if (epf_pf->sec_epc || epf_vf->sec_epc)
+		return -EBUSY;
+
+	mutex_lock(&epf_pf->lock);
+	vfunc_no = find_first_zero_bit(&epf_pf->vfunction_num_map,
+				       BITS_PER_LONG);
+	if (vfunc_no >= BITS_PER_LONG) {
+		mutex_unlock(&epf_pf->lock);
+		return -EINVAL;
+	}
+
+	set_bit(vfunc_no, &epf_pf->vfunction_num_map);
+	epf_vf->vfunc_no = vfunc_no;
+
+	epf_vf->epf_pf = epf_pf;
+	epf_vf->is_vf = true;
+
+	list_add_tail(&epf_vf->list, &epf_pf->pci_vepf);
+	mutex_unlock(&epf_pf->lock);
+
+	return 0;
+}
+EXPORT_SYMBOL_GPL(pci_epf_add_vepf);
+
+/**
+ * pci_epf_remove_vepf() - remove virtual EP function from physical EP function
+ * @epf_pf: the physical EP function from which the virtual EP function should
+ *   be removed
+ * @epf_vf: the virtual EP function to be removed
+ *
+ * Invoke to remove a virtual endpoint function from the physcial endpoint
+ * function.
+ */
+void pci_epf_remove_vepf(struct pci_epf *epf_pf, struct pci_epf *epf_vf)
+{
+	if (IS_ERR_OR_NULL(epf_pf) || IS_ERR_OR_NULL(epf_vf))
+		return;
+
+	mutex_lock(&epf_pf->lock);
+	clear_bit(epf_vf->vfunc_no, &epf_pf->vfunction_num_map);
+	list_del(&epf_vf->list);
+	mutex_unlock(&epf_pf->lock);
+}
+EXPORT_SYMBOL_GPL(pci_epf_remove_vepf);
+
+/**
  * pci_epf_free_space() - free the allocated PCI EPF register space
  * @epf: the EPF device from whom to free the memory
  * @addr: the virtual address of the PCI EPF register space
@@ -317,6 +455,10 @@ struct pci_epf *pci_epf_create(const char *name)
 		return ERR_PTR(-ENOMEM);
 	}
 
+	/* VFs are numbered starting with 1. So set BIT(0) by default */
+	epf->vfunction_num_map = 1;
+	INIT_LIST_HEAD(&epf->pci_vepf);
+
 	dev = &epf->dev;
 	device_initialize(dev);
 	dev->bus = &pci_epf_bus_type;
diff --git a/drivers/pci/host-bridge.c b/drivers/pci/host-bridge.c
index e01d53f5b32f..afa50b446567 100644
--- a/drivers/pci/host-bridge.c
+++ b/drivers/pci/host-bridge.c
@@ -23,6 +23,7 @@ struct pci_host_bridge *pci_find_host_bridge(struct pci_bus *bus)
 
 	return to_pci_host_bridge(root_bus->bridge);
 }
+EXPORT_SYMBOL_GPL(pci_find_host_bridge);
 
 struct device *pci_get_host_bridge_device(struct pci_dev *dev)
 {
diff --git a/drivers/pci/hotplug/TODO b/drivers/pci/hotplug/TODO
index a32070be5adf..cc6194aa24c1 100644
--- a/drivers/pci/hotplug/TODO
+++ b/drivers/pci/hotplug/TODO
@@ -40,9 +40,6 @@ ibmphp:
 
 * The return value of pci_hp_register() is not checked.
 
-* iounmap(io_mem) is called in the error path of ebda_rsrc_controller()
-  and once more in the error path of its caller ibmphp_access_ebda().
-
 * The various slot data structures are difficult to follow and need to be
   simplified.  A lot of functions are too large and too complex, they need
   to be broken up into smaller, manageable pieces.  Negative examples are
diff --git a/drivers/pci/hotplug/ibmphp_ebda.c b/drivers/pci/hotplug/ibmphp_ebda.c
index 11a2661dc062..7fb75401ad8a 100644
--- a/drivers/pci/hotplug/ibmphp_ebda.c
+++ b/drivers/pci/hotplug/ibmphp_ebda.c
@@ -714,8 +714,7 @@ static int __init ebda_rsrc_controller(void)
 		/* init hpc structure */
 		hpc_ptr = alloc_ebda_hpc(slot_num, bus_num);
 		if (!hpc_ptr) {
-			rc = -ENOMEM;
-			goto error_no_hpc;
+			return -ENOMEM;
 		}
 		hpc_ptr->ctlr_id = ctlr_id;
 		hpc_ptr->ctlr_relative_id = ctlr;
@@ -910,8 +909,6 @@ error:
 	kfree(tmp_slot);
 error_no_slot:
 	free_ebda_hpc(hpc_ptr);
-error_no_hpc:
-	iounmap(io_mem);
 	return rc;
 }
 
diff --git a/drivers/pci/hotplug/pciehp.h b/drivers/pci/hotplug/pciehp.h
index d4a930881054..69fd401691be 100644
--- a/drivers/pci/hotplug/pciehp.h
+++ b/drivers/pci/hotplug/pciehp.h
@@ -184,7 +184,7 @@ void pciehp_release_ctrl(struct controller *ctrl);
 
 int pciehp_sysfs_enable_slot(struct hotplug_slot *hotplug_slot);
 int pciehp_sysfs_disable_slot(struct hotplug_slot *hotplug_slot);
-int pciehp_reset_slot(struct hotplug_slot *hotplug_slot, int probe);
+int pciehp_reset_slot(struct hotplug_slot *hotplug_slot, bool probe);
 int pciehp_get_attention_status(struct hotplug_slot *hotplug_slot, u8 *status);
 int pciehp_set_raw_indicator_status(struct hotplug_slot *h_slot, u8 status);
 int pciehp_get_raw_indicator_status(struct hotplug_slot *h_slot, u8 *status);
diff --git a/drivers/pci/hotplug/pciehp_hpc.c b/drivers/pci/hotplug/pciehp_hpc.c
index 9d06939736c0..3024d7e85e6a 100644
--- a/drivers/pci/hotplug/pciehp_hpc.c
+++ b/drivers/pci/hotplug/pciehp_hpc.c
@@ -870,7 +870,7 @@ void pcie_disable_interrupt(struct controller *ctrl)
  * momentarily, if we see that they could interfere. Also, clear any spurious
  * events after.
  */
-int pciehp_reset_slot(struct hotplug_slot *hotplug_slot, int probe)
+int pciehp_reset_slot(struct hotplug_slot *hotplug_slot, bool probe)
 {
 	struct controller *ctrl = to_ctrl(hotplug_slot);
 	struct pci_dev *pdev = ctrl_dev(ctrl);
diff --git a/drivers/pci/hotplug/pnv_php.c b/drivers/pci/hotplug/pnv_php.c
index 04565162a449..f4c2e6e01be0 100644
--- a/drivers/pci/hotplug/pnv_php.c
+++ b/drivers/pci/hotplug/pnv_php.c
@@ -526,7 +526,7 @@ scan:
 	return 0;
 }
 
-static int pnv_php_reset_slot(struct hotplug_slot *slot, int probe)
+static int pnv_php_reset_slot(struct hotplug_slot *slot, bool probe)
 {
 	struct pnv_php_slot *php_slot = to_pnv_php_slot(slot);
 	struct pci_dev *bridge = php_slot->pdev;
diff --git a/drivers/pci/of.c b/drivers/pci/of.c
index a143b02b2dcd..d84381ce82b5 100644
--- a/drivers/pci/of.c
+++ b/drivers/pci/of.c
@@ -310,7 +310,7 @@ static int devm_of_pci_get_host_bridge_resources(struct device *dev,
 	/* Check for ranges property */
 	err = of_pci_range_parser_init(&parser, dev_node);
 	if (err)
-		goto failed;
+		return 0;
 
 	dev_dbg(dev, "Parsing ranges property...\n");
 	for_each_of_pci_range(&parser, &range) {
diff --git a/drivers/pci/pci-acpi.c b/drivers/pci/pci-acpi.c
index 36bc23e21759..a1b1e2a01632 100644
--- a/drivers/pci/pci-acpi.c
+++ b/drivers/pci/pci-acpi.c
@@ -17,6 +17,7 @@
 #include <linux/pci-acpi.h>
 #include <linux/pm_runtime.h>
 #include <linux/pm_qos.h>
+#include <linux/rwsem.h>
 #include "pci.h"
 
 /*
@@ -934,58 +935,77 @@ static pci_power_t acpi_pci_choose_state(struct pci_dev *pdev)
 
 static struct acpi_device *acpi_pci_find_companion(struct device *dev);
 
+void pci_set_acpi_fwnode(struct pci_dev *dev)
+{
+	if (!ACPI_COMPANION(&dev->dev) && !pci_dev_is_added(dev))
+		ACPI_COMPANION_SET(&dev->dev,
+				   acpi_pci_find_companion(&dev->dev));
+}
+
+/**
+ * pci_dev_acpi_reset - do a function level reset using _RST method
+ * @dev: device to reset
+ * @probe: if true, return 0 if device supports _RST
+ */
+int pci_dev_acpi_reset(struct pci_dev *dev, bool probe)
+{
+	acpi_handle handle = ACPI_HANDLE(&dev->dev);
+
+	if (!handle || !acpi_has_method(handle, "_RST"))
+		return -ENOTTY;
+
+	if (probe)
+		return 0;
+
+	if (ACPI_FAILURE(acpi_evaluate_object(handle, "_RST", NULL, NULL))) {
+		pci_warn(dev, "ACPI _RST failed\n");
+		return -ENOTTY;
+	}
+
+	return 0;
+}
+
+static bool acpi_pci_power_manageable(struct pci_dev *dev)
+{
+	struct acpi_device *adev = ACPI_COMPANION(&dev->dev);
+
+	if (!adev)
+		return false;
+	return acpi_device_power_manageable(adev);
+}
+
 static bool acpi_pci_bridge_d3(struct pci_dev *dev)
 {
-	const struct fwnode_handle *fwnode;
+	const union acpi_object *obj;
 	struct acpi_device *adev;
-	struct pci_dev *root;
-	u8 val;
+	struct pci_dev *rpdev;
 
 	if (!dev->is_hotplug_bridge)
 		return false;
 
 	/* Assume D3 support if the bridge is power-manageable by ACPI. */
-	adev = ACPI_COMPANION(&dev->dev);
-	if (!adev && !pci_dev_is_added(dev)) {
-		adev = acpi_pci_find_companion(&dev->dev);
-		ACPI_COMPANION_SET(&dev->dev, adev);
-	}
-
-	if (adev && acpi_device_power_manageable(adev))
+	if (acpi_pci_power_manageable(dev))
 		return true;
 
 	/*
-	 * Look for a special _DSD property for the root port and if it
-	 * is set we know the hierarchy behind it supports D3 just fine.
+	 * The ACPI firmware will provide the device-specific properties through
+	 * _DSD configuration object. Look for the 'HotPlugSupportInD3' property
+	 * for the root port and if it is set we know the hierarchy behind it
+	 * supports D3 just fine.
 	 */
-	root = pcie_find_root_port(dev);
-	if (!root)
+	rpdev = pcie_find_root_port(dev);
+	if (!rpdev)
 		return false;
 
-	adev = ACPI_COMPANION(&root->dev);
-	if (root == dev) {
-		/*
-		 * It is possible that the ACPI companion is not yet bound
-		 * for the root port so look it up manually here.
-		 */
-		if (!adev && !pci_dev_is_added(root))
-			adev = acpi_pci_find_companion(&root->dev);
-	}
-
+	adev = ACPI_COMPANION(&rpdev->dev);
 	if (!adev)
 		return false;
 
-	fwnode = acpi_fwnode_handle(adev);
-	if (fwnode_property_read_u8(fwnode, "HotPlugSupportInD3", &val))
+	if (acpi_dev_get_property(adev, "HotPlugSupportInD3",
+				   ACPI_TYPE_INTEGER, &obj) < 0)
 		return false;
 
-	return val == 1;
-}
-
-static bool acpi_pci_power_manageable(struct pci_dev *dev)
-{
-	struct acpi_device *adev = ACPI_COMPANION(&dev->dev);
-	return adev ? acpi_device_power_manageable(adev) : false;
+	return obj->integer.value == 1;
 }
 
 static int acpi_pci_set_power_state(struct pci_dev *dev, pci_power_t state)
@@ -1159,6 +1179,69 @@ void acpi_pci_remove_bus(struct pci_bus *bus)
 }
 
 /* ACPI bus type */
+
+
+static DECLARE_RWSEM(pci_acpi_companion_lookup_sem);
+static struct acpi_device *(*pci_acpi_find_companion_hook)(struct pci_dev *);
+
+/**
+ * pci_acpi_set_companion_lookup_hook - Set ACPI companion lookup callback.
+ * @func: ACPI companion lookup callback pointer or NULL.
+ *
+ * Set a special ACPI companion lookup callback for PCI devices whose companion
+ * objects in the ACPI namespace have _ADR with non-standard bus-device-function
+ * encodings.
+ *
+ * Return 0 on success or a negative error code on failure (in which case no
+ * changes are made).
+ *
+ * The caller is responsible for the appropriate ordering of the invocations of
+ * this function with respect to the enumeration of the PCI devices needing the
+ * callback installed by it.
+ */
+int pci_acpi_set_companion_lookup_hook(struct acpi_device *(*func)(struct pci_dev *))
+{
+	int ret;
+
+	if (!func)
+		return -EINVAL;
+
+	down_write(&pci_acpi_companion_lookup_sem);
+
+	if (pci_acpi_find_companion_hook) {
+		ret = -EBUSY;
+	} else {
+		pci_acpi_find_companion_hook = func;
+		ret = 0;
+	}
+
+	up_write(&pci_acpi_companion_lookup_sem);
+
+	return ret;
+}
+EXPORT_SYMBOL_GPL(pci_acpi_set_companion_lookup_hook);
+
+/**
+ * pci_acpi_clear_companion_lookup_hook - Clear ACPI companion lookup callback.
+ *
+ * Clear the special ACPI companion lookup callback previously set by
+ * pci_acpi_set_companion_lookup_hook().  Block until the last running instance
+ * of the callback returns before clearing it.
+ *
+ * The caller is responsible for the appropriate ordering of the invocations of
+ * this function with respect to the enumeration of the PCI devices needing the
+ * callback cleared by it.
+ */
+void pci_acpi_clear_companion_lookup_hook(void)
+{
+	down_write(&pci_acpi_companion_lookup_sem);
+
+	pci_acpi_find_companion_hook = NULL;
+
+	up_write(&pci_acpi_companion_lookup_sem);
+}
+EXPORT_SYMBOL_GPL(pci_acpi_clear_companion_lookup_hook);
+
 static struct acpi_device *acpi_pci_find_companion(struct device *dev)
 {
 	struct pci_dev *pci_dev = to_pci_dev(dev);
@@ -1166,6 +1249,16 @@ static struct acpi_device *acpi_pci_find_companion(struct device *dev)
 	bool check_children;
 	u64 addr;
 
+	down_read(&pci_acpi_companion_lookup_sem);
+
+	adev = pci_acpi_find_companion_hook ?
+		pci_acpi_find_companion_hook(pci_dev) : NULL;
+
+	up_read(&pci_acpi_companion_lookup_sem);
+
+	if (adev)
+		return adev;
+
 	check_children = pci_is_bridge(pci_dev);
 	/* Please ref to ACPI spec for the syntax of _ADR */
 	addr = (PCI_SLOT(pci_dev->devfn) << 16) | PCI_FUNC(pci_dev->devfn);
diff --git a/drivers/pci/pci-bridge-emul.h b/drivers/pci/pci-bridge-emul.h
index b31883022a8e..49bbd37ee318 100644
--- a/drivers/pci/pci-bridge-emul.h
+++ b/drivers/pci/pci-bridge-emul.h
@@ -54,7 +54,7 @@ struct pci_bridge_emul_pcie_conf {
 	__le16 slotctl;
 	__le16 slotsta;
 	__le16 rootctl;
-	__le16 rsvd;
+	__le16 rootcap;
 	__le32 rootsta;
 	__le32 devcap2;
 	__le16 devctl2;
diff --git a/drivers/pci/pci-driver.c b/drivers/pci/pci-driver.c
index a0615395500a..2761ab86490d 100644
--- a/drivers/pci/pci-driver.c
+++ b/drivers/pci/pci-driver.c
@@ -136,7 +136,7 @@ static const struct pci_device_id *pci_match_device(struct pci_driver *drv,
 						    struct pci_dev *dev)
 {
 	struct pci_dynid *dynid;
-	const struct pci_device_id *found_id = NULL;
+	const struct pci_device_id *found_id = NULL, *ids;
 
 	/* When driver_override is set, only bind to the matching driver */
 	if (dev->driver_override && strcmp(dev->driver_override, drv->name))
@@ -152,14 +152,28 @@ static const struct pci_device_id *pci_match_device(struct pci_driver *drv,
 	}
 	spin_unlock(&drv->dynids.lock);
 
-	if (!found_id)
-		found_id = pci_match_id(drv->id_table, dev);
+	if (found_id)
+		return found_id;
 
-	/* driver_override will always match, send a dummy id */
-	if (!found_id && dev->driver_override)
-		found_id = &pci_device_id_any;
+	for (ids = drv->id_table; (found_id = pci_match_id(ids, dev));
+	     ids = found_id + 1) {
+		/*
+		 * The match table is split based on driver_override.
+		 * In case override_only was set, enforce driver_override
+		 * matching.
+		 */
+		if (found_id->override_only) {
+			if (dev->driver_override)
+				return found_id;
+		} else {
+			return found_id;
+		}
+	}
 
-	return found_id;
+	/* driver_override will always match, send a dummy id */
+	if (dev->driver_override)
+		return &pci_device_id_any;
+	return NULL;
 }
 
 /**
diff --git a/drivers/pci/pci-sysfs.c b/drivers/pci/pci-sysfs.c
index b70f61fbcd4b..7fb5cd17cc98 100644
--- a/drivers/pci/pci-sysfs.c
+++ b/drivers/pci/pci-sysfs.c
@@ -1367,7 +1367,7 @@ static umode_t pci_dev_reset_attr_is_visible(struct kobject *kobj,
 {
 	struct pci_dev *pdev = to_pci_dev(kobj_to_dev(kobj));
 
-	if (!pdev->reset_fn)
+	if (!pci_reset_supported(pdev))
 		return 0;
 
 	return a->mode;
@@ -1491,6 +1491,7 @@ const struct attribute_group *pci_dev_groups[] = {
 	&pci_dev_config_attr_group,
 	&pci_dev_rom_attr_group,
 	&pci_dev_reset_attr_group,
+	&pci_dev_reset_method_attr_group,
 	&pci_dev_vpd_attr_group,
 #ifdef CONFIG_DMI
 	&pci_dev_smbios_attr_group,
diff --git a/drivers/pci/pci.c b/drivers/pci/pci.c
index a5e6759c407b..ce2ab62b64cf 100644
--- a/drivers/pci/pci.c
+++ b/drivers/pci/pci.c
@@ -31,6 +31,7 @@
 #include <linux/vmalloc.h>
 #include <asm/dma.h>
 #include <linux/aer.h>
+#include <linux/bitfield.h>
 #include "pci.h"
 
 DEFINE_MUTEX(pci_slot_mutex);
@@ -72,6 +73,11 @@ static void pci_dev_d3_sleep(struct pci_dev *dev)
 		msleep(delay);
 }
 
+bool pci_reset_supported(struct pci_dev *dev)
+{
+	return dev->reset_methods[0] != 0;
+}
+
 #ifdef CONFIG_PCI_DOMAINS
 int pci_domains_supported = 1;
 #endif
@@ -206,32 +212,36 @@ int pci_status_get_and_clear_errors(struct pci_dev *pdev)
 EXPORT_SYMBOL_GPL(pci_status_get_and_clear_errors);
 
 #ifdef CONFIG_HAS_IOMEM
-void __iomem *pci_ioremap_bar(struct pci_dev *pdev, int bar)
+static void __iomem *__pci_ioremap_resource(struct pci_dev *pdev, int bar,
+					    bool write_combine)
 {
 	struct resource *res = &pdev->resource[bar];
+	resource_size_t start = res->start;
+	resource_size_t size = resource_size(res);
 
 	/*
 	 * Make sure the BAR is actually a memory resource, not an IO resource
 	 */
 	if (res->flags & IORESOURCE_UNSET || !(res->flags & IORESOURCE_MEM)) {
-		pci_warn(pdev, "can't ioremap BAR %d: %pR\n", bar, res);
+		pci_err(pdev, "can't ioremap BAR %d: %pR\n", bar, res);
 		return NULL;
 	}
-	return ioremap(res->start, resource_size(res));
+
+	if (write_combine)
+		return ioremap_wc(start, size);
+
+	return ioremap(start, size);
+}
+
+void __iomem *pci_ioremap_bar(struct pci_dev *pdev, int bar)
+{
+	return __pci_ioremap_resource(pdev, bar, false);
 }
 EXPORT_SYMBOL_GPL(pci_ioremap_bar);
 
 void __iomem *pci_ioremap_wc_bar(struct pci_dev *pdev, int bar)
 {
-	/*
-	 * Make sure the BAR is actually a memory resource, not an IO resource
-	 */
-	if (!(pci_resource_flags(pdev, bar) & IORESOURCE_MEM)) {
-		WARN_ON(1);
-		return NULL;
-	}
-	return ioremap_wc(pci_resource_start(pdev, bar),
-			  pci_resource_len(pdev, bar));
+	return __pci_ioremap_resource(pdev, bar, true);
 }
 EXPORT_SYMBOL_GPL(pci_ioremap_wc_bar);
 #endif
@@ -265,7 +275,7 @@ static int pci_dev_str_match_path(struct pci_dev *dev, const char *path,
 
 	*endptr = strchrnul(path, ';');
 
-	wpath = kmemdup_nul(path, *endptr - path, GFP_KERNEL);
+	wpath = kmemdup_nul(path, *endptr - path, GFP_ATOMIC);
 	if (!wpath)
 		return -ENOMEM;
 
@@ -915,8 +925,8 @@ static void pci_std_enable_acs(struct pci_dev *dev)
 	/* Upstream Forwarding */
 	ctrl |= (cap & PCI_ACS_UF);
 
-	/* Enable Translation Blocking for external devices */
-	if (dev->external_facing || dev->untrusted)
+	/* Enable Translation Blocking for external devices and noats */
+	if (pci_ats_disabled() || dev->external_facing || dev->untrusted)
 		ctrl |= (cap & PCI_ACS_TB);
 
 	pci_write_config_word(dev, pos + PCI_ACS_CTRL, ctrl);
@@ -4629,31 +4639,11 @@ int pci_wait_for_pending_transaction(struct pci_dev *dev)
 EXPORT_SYMBOL(pci_wait_for_pending_transaction);
 
 /**
- * pcie_has_flr - check if a device supports function level resets
- * @dev: device to check
- *
- * Returns true if the device advertises support for PCIe function level
- * resets.
- */
-bool pcie_has_flr(struct pci_dev *dev)
-{
-	u32 cap;
-
-	if (dev->dev_flags & PCI_DEV_FLAGS_NO_FLR_RESET)
-		return false;
-
-	pcie_capability_read_dword(dev, PCI_EXP_DEVCAP, &cap);
-	return cap & PCI_EXP_DEVCAP_FLR;
-}
-EXPORT_SYMBOL_GPL(pcie_has_flr);
-
-/**
  * pcie_flr - initiate a PCIe function level reset
  * @dev: device to reset
  *
- * Initiate a function level reset on @dev.  The caller should ensure the
- * device supports FLR before calling this function, e.g. by using the
- * pcie_has_flr() helper.
+ * Initiate a function level reset unconditionally on @dev without
+ * checking any flags and DEVCAP
  */
 int pcie_flr(struct pci_dev *dev)
 {
@@ -4676,7 +4666,29 @@ int pcie_flr(struct pci_dev *dev)
 }
 EXPORT_SYMBOL_GPL(pcie_flr);
 
-static int pci_af_flr(struct pci_dev *dev, int probe)
+/**
+ * pcie_reset_flr - initiate a PCIe function level reset
+ * @dev: device to reset
+ * @probe: if true, return 0 if device can be reset this way
+ *
+ * Initiate a function level reset on @dev.
+ */
+int pcie_reset_flr(struct pci_dev *dev, bool probe)
+{
+	if (dev->dev_flags & PCI_DEV_FLAGS_NO_FLR_RESET)
+		return -ENOTTY;
+
+	if (!(dev->devcap & PCI_EXP_DEVCAP_FLR))
+		return -ENOTTY;
+
+	if (probe)
+		return 0;
+
+	return pcie_flr(dev);
+}
+EXPORT_SYMBOL_GPL(pcie_reset_flr);
+
+static int pci_af_flr(struct pci_dev *dev, bool probe)
 {
 	int pos;
 	u8 cap;
@@ -4723,7 +4735,7 @@ static int pci_af_flr(struct pci_dev *dev, int probe)
 /**
  * pci_pm_reset - Put device into PCI_D3 and back into PCI_D0.
  * @dev: Device to reset.
- * @probe: If set, only check if the device can be reset this way.
+ * @probe: if true, return 0 if the device can be reset this way.
  *
  * If @dev supports native PCI PM and its PCI_PM_CTRL_NO_SOFT_RESET flag is
  * unset, it will be reinitialized internally when going from PCI_D3hot to
@@ -4735,7 +4747,7 @@ static int pci_af_flr(struct pci_dev *dev, int probe)
  * by default (i.e. unless the @dev's d3hot_delay field has a different value).
  * Moreover, only devices in D0 can be reset by this function.
  */
-static int pci_pm_reset(struct pci_dev *dev, int probe)
+static int pci_pm_reset(struct pci_dev *dev, bool probe)
 {
 	u16 csr;
 
@@ -4995,7 +5007,7 @@ int pci_bridge_secondary_bus_reset(struct pci_dev *dev)
 }
 EXPORT_SYMBOL_GPL(pci_bridge_secondary_bus_reset);
 
-static int pci_parent_bus_reset(struct pci_dev *dev, int probe)
+static int pci_parent_bus_reset(struct pci_dev *dev, bool probe)
 {
 	struct pci_dev *pdev;
 
@@ -5013,7 +5025,7 @@ static int pci_parent_bus_reset(struct pci_dev *dev, int probe)
 	return pci_bridge_secondary_bus_reset(dev->bus->self);
 }
 
-static int pci_reset_hotplug_slot(struct hotplug_slot *hotplug, int probe)
+static int pci_reset_hotplug_slot(struct hotplug_slot *hotplug, bool probe)
 {
 	int rc = -ENOTTY;
 
@@ -5028,7 +5040,7 @@ static int pci_reset_hotplug_slot(struct hotplug_slot *hotplug, int probe)
 	return rc;
 }
 
-static int pci_dev_reset_slot_function(struct pci_dev *dev, int probe)
+static int pci_dev_reset_slot_function(struct pci_dev *dev, bool probe)
 {
 	if (dev->multifunction || dev->subordinate || !dev->slot ||
 	    dev->dev_flags & PCI_DEV_FLAGS_NO_BUS_RESET)
@@ -5037,7 +5049,7 @@ static int pci_dev_reset_slot_function(struct pci_dev *dev, int probe)
 	return pci_reset_hotplug_slot(dev->slot->hotplug, probe);
 }
 
-static int pci_reset_bus_function(struct pci_dev *dev, int probe)
+static int pci_reset_bus_function(struct pci_dev *dev, bool probe)
 {
 	int rc;
 
@@ -5121,6 +5133,139 @@ static void pci_dev_restore(struct pci_dev *dev)
 		err_handler->reset_done(dev);
 }
 
+/* dev->reset_methods[] is a 0-terminated list of indices into this array */
+static const struct pci_reset_fn_method pci_reset_fn_methods[] = {
+	{ },
+	{ pci_dev_specific_reset, .name = "device_specific" },
+	{ pci_dev_acpi_reset, .name = "acpi" },
+	{ pcie_reset_flr, .name = "flr" },
+	{ pci_af_flr, .name = "af_flr" },
+	{ pci_pm_reset, .name = "pm" },
+	{ pci_reset_bus_function, .name = "bus" },
+};
+
+static ssize_t reset_method_show(struct device *dev,
+				 struct device_attribute *attr, char *buf)
+{
+	struct pci_dev *pdev = to_pci_dev(dev);
+	ssize_t len = 0;
+	int i, m;
+
+	for (i = 0; i < PCI_NUM_RESET_METHODS; i++) {
+		m = pdev->reset_methods[i];
+		if (!m)
+			break;
+
+		len += sysfs_emit_at(buf, len, "%s%s", len ? " " : "",
+				     pci_reset_fn_methods[m].name);
+	}
+
+	if (len)
+		len += sysfs_emit_at(buf, len, "\n");
+
+	return len;
+}
+
+static int reset_method_lookup(const char *name)
+{
+	int m;
+
+	for (m = 1; m < PCI_NUM_RESET_METHODS; m++) {
+		if (sysfs_streq(name, pci_reset_fn_methods[m].name))
+			return m;
+	}
+
+	return 0;	/* not found */
+}
+
+static ssize_t reset_method_store(struct device *dev,
+				  struct device_attribute *attr,
+				  const char *buf, size_t count)
+{
+	struct pci_dev *pdev = to_pci_dev(dev);
+	char *options, *name;
+	int m, n;
+	u8 reset_methods[PCI_NUM_RESET_METHODS] = { 0 };
+
+	if (sysfs_streq(buf, "")) {
+		pdev->reset_methods[0] = 0;
+		pci_warn(pdev, "All device reset methods disabled by user");
+		return count;
+	}
+
+	if (sysfs_streq(buf, "default")) {
+		pci_init_reset_methods(pdev);
+		return count;
+	}
+
+	options = kstrndup(buf, count, GFP_KERNEL);
+	if (!options)
+		return -ENOMEM;
+
+	n = 0;
+	while ((name = strsep(&options, " ")) != NULL) {
+		if (sysfs_streq(name, ""))
+			continue;
+
+		name = strim(name);
+
+		m = reset_method_lookup(name);
+		if (!m) {
+			pci_err(pdev, "Invalid reset method '%s'", name);
+			goto error;
+		}
+
+		if (pci_reset_fn_methods[m].reset_fn(pdev, PCI_RESET_PROBE)) {
+			pci_err(pdev, "Unsupported reset method '%s'", name);
+			goto error;
+		}
+
+		if (n == PCI_NUM_RESET_METHODS - 1) {
+			pci_err(pdev, "Too many reset methods\n");
+			goto error;
+		}
+
+		reset_methods[n++] = m;
+	}
+
+	reset_methods[n] = 0;
+
+	/* Warn if dev-specific supported but not highest priority */
+	if (pci_reset_fn_methods[1].reset_fn(pdev, PCI_RESET_PROBE) == 0 &&
+	    reset_methods[0] != 1)
+		pci_warn(pdev, "Device-specific reset disabled/de-prioritized by user");
+	memcpy(pdev->reset_methods, reset_methods, sizeof(pdev->reset_methods));
+	kfree(options);
+	return count;
+
+error:
+	/* Leave previous methods unchanged */
+	kfree(options);
+	return -EINVAL;
+}
+static DEVICE_ATTR_RW(reset_method);
+
+static struct attribute *pci_dev_reset_method_attrs[] = {
+	&dev_attr_reset_method.attr,
+	NULL,
+};
+
+static umode_t pci_dev_reset_method_attr_is_visible(struct kobject *kobj,
+						    struct attribute *a, int n)
+{
+	struct pci_dev *pdev = to_pci_dev(kobj_to_dev(kobj));
+
+	if (!pci_reset_supported(pdev))
+		return 0;
+
+	return a->mode;
+}
+
+const struct attribute_group pci_dev_reset_method_attr_group = {
+	.attrs = pci_dev_reset_method_attrs,
+	.is_visible = pci_dev_reset_method_attr_is_visible,
+};
+
 /**
  * __pci_reset_function_locked - reset a PCI device function while holding
  * the @dev mutex lock.
@@ -5143,66 +5288,64 @@ static void pci_dev_restore(struct pci_dev *dev)
  */
 int __pci_reset_function_locked(struct pci_dev *dev)
 {
-	int rc;
+	int i, m, rc = -ENOTTY;
 
 	might_sleep();
 
 	/*
-	 * A reset method returns -ENOTTY if it doesn't support this device
-	 * and we should try the next method.
+	 * A reset method returns -ENOTTY if it doesn't support this device and
+	 * we should try the next method.
 	 *
-	 * If it returns 0 (success), we're finished.  If it returns any
-	 * other error, we're also finished: this indicates that further
-	 * reset mechanisms might be broken on the device.
+	 * If it returns 0 (success), we're finished.  If it returns any other
+	 * error, we're also finished: this indicates that further reset
+	 * mechanisms might be broken on the device.
 	 */
-	rc = pci_dev_specific_reset(dev, 0);
-	if (rc != -ENOTTY)
-		return rc;
-	if (pcie_has_flr(dev)) {
-		rc = pcie_flr(dev);
+	for (i = 0; i < PCI_NUM_RESET_METHODS; i++) {
+		m = dev->reset_methods[i];
+		if (!m)
+			return -ENOTTY;
+
+		rc = pci_reset_fn_methods[m].reset_fn(dev, PCI_RESET_DO_RESET);
+		if (!rc)
+			return 0;
 		if (rc != -ENOTTY)
 			return rc;
 	}
-	rc = pci_af_flr(dev, 0);
-	if (rc != -ENOTTY)
-		return rc;
-	rc = pci_pm_reset(dev, 0);
-	if (rc != -ENOTTY)
-		return rc;
-	return pci_reset_bus_function(dev, 0);
+
+	return -ENOTTY;
 }
 EXPORT_SYMBOL_GPL(__pci_reset_function_locked);
 
 /**
- * pci_probe_reset_function - check whether the device can be safely reset
- * @dev: PCI device to reset
+ * pci_init_reset_methods - check whether device can be safely reset
+ * and store supported reset mechanisms.
+ * @dev: PCI device to check for reset mechanisms
  *
  * Some devices allow an individual function to be reset without affecting
- * other functions in the same device.  The PCI device must be responsive
- * to PCI config space in order to use this function.
+ * other functions in the same device.  The PCI device must be in D0-D3hot
+ * state.
  *
- * Returns 0 if the device function can be reset or negative if the
- * device doesn't support resetting a single function.
+ * Stores reset mechanisms supported by device in reset_methods byte array
+ * which is a member of struct pci_dev.
  */
-int pci_probe_reset_function(struct pci_dev *dev)
+void pci_init_reset_methods(struct pci_dev *dev)
 {
-	int rc;
+	int m, i, rc;
+
+	BUILD_BUG_ON(ARRAY_SIZE(pci_reset_fn_methods) != PCI_NUM_RESET_METHODS);
 
 	might_sleep();
 
-	rc = pci_dev_specific_reset(dev, 1);
-	if (rc != -ENOTTY)
-		return rc;
-	if (pcie_has_flr(dev))
-		return 0;
-	rc = pci_af_flr(dev, 1);
-	if (rc != -ENOTTY)
-		return rc;
-	rc = pci_pm_reset(dev, 1);
-	if (rc != -ENOTTY)
-		return rc;
+	i = 0;
+	for (m = 1; m < PCI_NUM_RESET_METHODS; m++) {
+		rc = pci_reset_fn_methods[m].reset_fn(dev, PCI_RESET_PROBE);
+		if (!rc)
+			dev->reset_methods[i++] = m;
+		else if (rc != -ENOTTY)
+			break;
+	}
 
-	return pci_reset_bus_function(dev, 1);
+	dev->reset_methods[i] = 0;
 }
 
 /**
@@ -5225,7 +5368,7 @@ int pci_reset_function(struct pci_dev *dev)
 {
 	int rc;
 
-	if (!dev->reset_fn)
+	if (!pci_reset_supported(dev))
 		return -ENOTTY;
 
 	pci_dev_lock(dev);
@@ -5261,7 +5404,7 @@ int pci_reset_function_locked(struct pci_dev *dev)
 {
 	int rc;
 
-	if (!dev->reset_fn)
+	if (!pci_reset_supported(dev))
 		return -ENOTTY;
 
 	pci_dev_save_and_disable(dev);
@@ -5284,7 +5427,7 @@ int pci_try_reset_function(struct pci_dev *dev)
 {
 	int rc;
 
-	if (!dev->reset_fn)
+	if (!pci_reset_supported(dev))
 		return -ENOTTY;
 
 	if (!pci_dev_trylock(dev))
@@ -5512,7 +5655,7 @@ static void pci_slot_restore_locked(struct pci_slot *slot)
 	}
 }
 
-static int pci_slot_reset(struct pci_slot *slot, int probe)
+static int pci_slot_reset(struct pci_slot *slot, bool probe)
 {
 	int rc;
 
@@ -5540,7 +5683,7 @@ static int pci_slot_reset(struct pci_slot *slot, int probe)
  */
 int pci_probe_reset_slot(struct pci_slot *slot)
 {
-	return pci_slot_reset(slot, 1);
+	return pci_slot_reset(slot, PCI_RESET_PROBE);
 }
 EXPORT_SYMBOL_GPL(pci_probe_reset_slot);
 
@@ -5563,14 +5706,14 @@ static int __pci_reset_slot(struct pci_slot *slot)
 {
 	int rc;
 
-	rc = pci_slot_reset(slot, 1);
+	rc = pci_slot_reset(slot, PCI_RESET_PROBE);
 	if (rc)
 		return rc;
 
 	if (pci_slot_trylock(slot)) {
 		pci_slot_save_and_disable_locked(slot);
 		might_sleep();
-		rc = pci_reset_hotplug_slot(slot->hotplug, 0);
+		rc = pci_reset_hotplug_slot(slot->hotplug, PCI_RESET_DO_RESET);
 		pci_slot_restore_locked(slot);
 		pci_slot_unlock(slot);
 	} else
@@ -5579,7 +5722,7 @@ static int __pci_reset_slot(struct pci_slot *slot)
 	return rc;
 }
 
-static int pci_bus_reset(struct pci_bus *bus, int probe)
+static int pci_bus_reset(struct pci_bus *bus, bool probe)
 {
 	int ret;
 
@@ -5625,14 +5768,14 @@ int pci_bus_error_reset(struct pci_dev *bridge)
 			goto bus_reset;
 
 	list_for_each_entry(slot, &bus->slots, list)
-		if (pci_slot_reset(slot, 0))
+		if (pci_slot_reset(slot, PCI_RESET_DO_RESET))
 			goto bus_reset;
 
 	mutex_unlock(&pci_slot_mutex);
 	return 0;
 bus_reset:
 	mutex_unlock(&pci_slot_mutex);
-	return pci_bus_reset(bridge->subordinate, 0);
+	return pci_bus_reset(bridge->subordinate, PCI_RESET_DO_RESET);
 }
 
 /**
@@ -5643,7 +5786,7 @@ bus_reset:
  */
 int pci_probe_reset_bus(struct pci_bus *bus)
 {
-	return pci_bus_reset(bus, 1);
+	return pci_bus_reset(bus, PCI_RESET_PROBE);
 }
 EXPORT_SYMBOL_GPL(pci_probe_reset_bus);
 
@@ -5657,7 +5800,7 @@ static int __pci_reset_bus(struct pci_bus *bus)
 {
 	int rc;
 
-	rc = pci_bus_reset(bus, 1);
+	rc = pci_bus_reset(bus, PCI_RESET_PROBE);
 	if (rc)
 		return rc;
 
diff --git a/drivers/pci/pci.h b/drivers/pci/pci.h
index 2f52110cac97..1cce56c2aea0 100644
--- a/drivers/pci/pci.h
+++ b/drivers/pci/pci.h
@@ -33,10 +33,32 @@ enum pci_mmap_api {
 int pci_mmap_fits(struct pci_dev *pdev, int resno, struct vm_area_struct *vmai,
 		  enum pci_mmap_api mmap_api);
 
-int pci_probe_reset_function(struct pci_dev *dev);
+bool pci_reset_supported(struct pci_dev *dev);
+void pci_init_reset_methods(struct pci_dev *dev);
 int pci_bridge_secondary_bus_reset(struct pci_dev *dev);
 int pci_bus_error_reset(struct pci_dev *dev);
 
+struct pci_cap_saved_data {
+	u16		cap_nr;
+	bool		cap_extended;
+	unsigned int	size;
+	u32		data[];
+};
+
+struct pci_cap_saved_state {
+	struct hlist_node		next;
+	struct pci_cap_saved_data	cap;
+};
+
+void pci_allocate_cap_save_buffers(struct pci_dev *dev);
+void pci_free_cap_save_buffers(struct pci_dev *dev);
+int pci_add_cap_save_buffer(struct pci_dev *dev, char cap, unsigned int size);
+int pci_add_ext_cap_save_buffer(struct pci_dev *dev,
+				u16 cap, unsigned int size);
+struct pci_cap_saved_state *pci_find_saved_cap(struct pci_dev *dev, char cap);
+struct pci_cap_saved_state *pci_find_saved_ext_cap(struct pci_dev *dev,
+						   u16 cap);
+
 #define PCI_PM_D2_DELAY         200	/* usec; see PCIe r4.0, sec 5.9.1 */
 #define PCI_PM_D3HOT_WAIT       10	/* msec */
 #define PCI_PM_D3COLD_WAIT      100	/* msec */
@@ -100,8 +122,6 @@ void pci_pm_init(struct pci_dev *dev);
 void pci_ea_init(struct pci_dev *dev);
 void pci_msi_init(struct pci_dev *dev);
 void pci_msix_init(struct pci_dev *dev);
-void pci_allocate_cap_save_buffers(struct pci_dev *dev);
-void pci_free_cap_save_buffers(struct pci_dev *dev);
 bool pci_bridge_d3_possible(struct pci_dev *dev);
 void pci_bridge_d3_update(struct pci_dev *dev);
 void pci_bridge_wait_for_secondary_bus(struct pci_dev *dev);
@@ -604,13 +624,18 @@ static inline void pci_ptm_init(struct pci_dev *dev) { }
 struct pci_dev_reset_methods {
 	u16 vendor;
 	u16 device;
-	int (*reset)(struct pci_dev *dev, int probe);
+	int (*reset)(struct pci_dev *dev, bool probe);
+};
+
+struct pci_reset_fn_method {
+	int (*reset_fn)(struct pci_dev *pdev, bool probe);
+	char *name;
 };
 
 #ifdef CONFIG_PCI_QUIRKS
-int pci_dev_specific_reset(struct pci_dev *dev, int probe);
+int pci_dev_specific_reset(struct pci_dev *dev, bool probe);
 #else
-static inline int pci_dev_specific_reset(struct pci_dev *dev, int probe)
+static inline int pci_dev_specific_reset(struct pci_dev *dev, bool probe)
 {
 	return -ENOTTY;
 }
@@ -698,7 +723,15 @@ static inline int pci_aer_raw_clear_status(struct pci_dev *dev) { return -EINVAL
 #ifdef CONFIG_ACPI
 int pci_acpi_program_hp_params(struct pci_dev *dev);
 extern const struct attribute_group pci_dev_acpi_attr_group;
+void pci_set_acpi_fwnode(struct pci_dev *dev);
+int pci_dev_acpi_reset(struct pci_dev *dev, bool probe);
 #else
+static inline int pci_dev_acpi_reset(struct pci_dev *dev, bool probe)
+{
+	return -ENOTTY;
+}
+
+static inline void pci_set_acpi_fwnode(struct pci_dev *dev) {}
 static inline int pci_acpi_program_hp_params(struct pci_dev *dev)
 {
 	return -ENODEV;
@@ -709,4 +742,6 @@ static inline int pci_acpi_program_hp_params(struct pci_dev *dev)
 extern const struct attribute_group aspm_ctrl_attr_group;
 #endif
 
+extern const struct attribute_group pci_dev_reset_method_attr_group;
+
 #endif /* DRIVERS_PCI_H */
diff --git a/drivers/pci/pcie/aer.c b/drivers/pci/pcie/aer.c
index df4ba9b384c2..9784fdcf3006 100644
--- a/drivers/pci/pcie/aer.c
+++ b/drivers/pci/pcie/aer.c
@@ -1407,13 +1407,11 @@ static pci_ers_result_t aer_root_reset(struct pci_dev *dev)
 	}
 
 	if (type == PCI_EXP_TYPE_RC_EC || type == PCI_EXP_TYPE_RC_END) {
-		if (pcie_has_flr(dev)) {
-			rc = pcie_flr(dev);
-			pci_info(dev, "has been reset (%d)\n", rc);
-		} else {
-			pci_info(dev, "not reset (no FLR support)\n");
-			rc = -ENOTTY;
-		}
+		rc = pcie_reset_flr(dev, PCI_RESET_DO_RESET);
+		if (!rc)
+			pci_info(dev, "has been reset\n");
+		else
+			pci_info(dev, "not reset (no FLR support: %d)\n", rc);
 	} else {
 		rc = pci_bus_error_reset(dev);
 		pci_info(dev, "%s Port link has been reset (%d)\n",
diff --git a/drivers/pci/pcie/portdrv_core.c b/drivers/pci/pcie/portdrv_core.c
index e1fed6649c41..3ee63968deaa 100644
--- a/drivers/pci/pcie/portdrv_core.c
+++ b/drivers/pci/pcie/portdrv_core.c
@@ -257,8 +257,13 @@ static int get_port_device_capability(struct pci_dev *dev)
 		services |= PCIE_PORT_SERVICE_DPC;
 
 	if (pci_pcie_type(dev) == PCI_EXP_TYPE_DOWNSTREAM ||
-	    pci_pcie_type(dev) == PCI_EXP_TYPE_ROOT_PORT)
-		services |= PCIE_PORT_SERVICE_BWNOTIF;
+	    pci_pcie_type(dev) == PCI_EXP_TYPE_ROOT_PORT) {
+		u32 linkcap;
+
+		pcie_capability_read_dword(dev, PCI_EXP_LNKCAP, &linkcap);
+		if (linkcap & PCI_EXP_LNKCAP_LBNC)
+			services |= PCIE_PORT_SERVICE_BWNOTIF;
+	}
 
 	return services;
 }
diff --git a/drivers/pci/pcie/ptm.c b/drivers/pci/pcie/ptm.c
index 8a4ad974c5ac..368a254e3124 100644
--- a/drivers/pci/pcie/ptm.c
+++ b/drivers/pci/pcie/ptm.c
@@ -60,10 +60,8 @@ void pci_save_ptm_state(struct pci_dev *dev)
 		return;
 
 	save_state = pci_find_saved_ext_cap(dev, PCI_EXT_CAP_ID_PTM);
-	if (!save_state) {
-		pci_err(dev, "no suspend buffer for PTM\n");
+	if (!save_state)
 		return;
-	}
 
 	cap = (u16 *)&save_state->cap.data[0];
 	pci_read_config_word(dev, ptm + PCI_PTM_CTRL, cap);
diff --git a/drivers/pci/probe.c b/drivers/pci/probe.c
index 79177ac37880..d9fc02a71baa 100644
--- a/drivers/pci/probe.c
+++ b/drivers/pci/probe.c
@@ -19,6 +19,7 @@
 #include <linux/hypervisor.h>
 #include <linux/irqdomain.h>
 #include <linux/pm_runtime.h>
+#include <linux/bitfield.h>
 #include "pci.h"
 
 #define CARDBUS_LATENCY_TIMER	176	/* secondary latency timer */
@@ -594,6 +595,7 @@ static void pci_init_host_bridge(struct pci_host_bridge *bridge)
 	bridge->native_pme = 1;
 	bridge->native_ltr = 1;
 	bridge->native_dpc = 1;
+	bridge->domain_nr = PCI_DOMAIN_NR_NOT_SET;
 
 	device_initialize(&bridge->dev);
 }
@@ -828,11 +830,15 @@ static struct irq_domain *pci_host_bridge_msi_domain(struct pci_bus *bus)
 {
 	struct irq_domain *d;
 
+	/* If the host bridge driver sets a MSI domain of the bridge, use it */
+	d = dev_get_msi_domain(bus->bridge);
+
 	/*
 	 * Any firmware interface that can resolve the msi_domain
 	 * should be called from here.
 	 */
-	d = pci_host_bridge_of_msi_domain(bus);
+	if (!d)
+		d = pci_host_bridge_of_msi_domain(bus);
 	if (!d)
 		d = pci_host_bridge_acpi_msi_domain(bus);
 
@@ -898,7 +904,10 @@ static int pci_register_host_bridge(struct pci_host_bridge *bridge)
 	bus->ops = bridge->ops;
 	bus->number = bus->busn_res.start = bridge->busnr;
 #ifdef CONFIG_PCI_DOMAINS_GENERIC
-	bus->domain_nr = pci_bus_find_domain_nr(bus, parent);
+	if (bridge->domain_nr == PCI_DOMAIN_NR_NOT_SET)
+		bus->domain_nr = pci_bus_find_domain_nr(bus, parent);
+	else
+		bus->domain_nr = bridge->domain_nr;
 #endif
 
 	b = pci_find_bus(pci_domain_nr(bus), bridge->busnr);
@@ -1498,8 +1507,8 @@ void set_pcie_port_type(struct pci_dev *pdev)
 	pdev->pcie_cap = pos;
 	pci_read_config_word(pdev, pos + PCI_EXP_FLAGS, &reg16);
 	pdev->pcie_flags_reg = reg16;
-	pci_read_config_word(pdev, pos + PCI_EXP_DEVCAP, &reg16);
-	pdev->pcie_mpss = reg16 & PCI_EXP_DEVCAP_PAYLOAD;
+	pci_read_config_dword(pdev, pos + PCI_EXP_DEVCAP, &pdev->devcap);
+	pdev->pcie_mpss = FIELD_GET(PCI_EXP_DEVCAP_PAYLOAD, pdev->devcap);
 
 	parent = pci_upstream_bridge(pdev);
 	if (!parent)
@@ -1809,6 +1818,9 @@ int pci_setup_device(struct pci_dev *dev)
 	dev->error_state = pci_channel_io_normal;
 	set_pcie_port_type(dev);
 
+	pci_set_of_node(dev);
+	pci_set_acpi_fwnode(dev);
+
 	pci_dev_assign_slot(dev);
 
 	/*
@@ -1946,6 +1958,7 @@ int pci_setup_device(struct pci_dev *dev)
 	default:				    /* unknown header */
 		pci_err(dev, "unknown header type %02x, ignoring device\n",
 			dev->hdr_type);
+		pci_release_of_node(dev);
 		return -EIO;
 
 	bad:
@@ -2225,7 +2238,6 @@ static void pci_release_capabilities(struct pci_dev *dev)
 {
 	pci_aer_exit(dev);
 	pci_rcec_exit(dev);
-	pci_vpd_release(dev);
 	pci_iov_release(dev);
 	pci_free_cap_save_buffers(dev);
 }
@@ -2374,10 +2386,7 @@ static struct pci_dev *pci_scan_device(struct pci_bus *bus, int devfn)
 	dev->vendor = l & 0xffff;
 	dev->device = (l >> 16) & 0xffff;
 
-	pci_set_of_node(dev);
-
 	if (pci_setup_device(dev)) {
-		pci_release_of_node(dev);
 		pci_bus_put(dev->bus);
 		kfree(dev);
 		return NULL;
@@ -2428,9 +2437,7 @@ static void pci_init_capabilities(struct pci_dev *dev)
 	pci_rcec_init(dev);		/* Root Complex Event Collector */
 
 	pcie_report_downtraining(dev);
-
-	if (pci_probe_reset_function(dev) == 0)
-		dev->reset_fn = 1;
+	pci_init_reset_methods(dev);
 }
 
 /*
diff --git a/drivers/pci/proc.c b/drivers/pci/proc.c
index d32fbfc93ea9..cb18f8a13ab6 100644
--- a/drivers/pci/proc.c
+++ b/drivers/pci/proc.c
@@ -83,6 +83,7 @@ static ssize_t proc_bus_pci_read(struct file *file, char __user *buf,
 		buf += 4;
 		pos += 4;
 		cnt -= 4;
+		cond_resched();
 	}
 
 	if (cnt >= 2) {
diff --git a/drivers/pci/quirks.c b/drivers/pci/quirks.c
index ab3de1551b50..e5089af8ad90 100644
--- a/drivers/pci/quirks.c
+++ b/drivers/pci/quirks.c
@@ -1822,6 +1822,45 @@ DECLARE_PCI_FIXUP_FINAL(PCI_VENDOR_ID_INTEL,	PCI_DEVICE_ID_INTEL_E7525_MCH,	quir
 DECLARE_PCI_FIXUP_CLASS_FINAL(PCI_VENDOR_ID_HUAWEI, 0x1610, PCI_CLASS_BRIDGE_PCI, 8, quirk_pcie_mch);
 
 /*
+ * HiSilicon KunPeng920 and KunPeng930 have devices appear as PCI but are
+ * actually on the AMBA bus. These fake PCI devices can support SVA via
+ * SMMU stall feature, by setting dma-can-stall for ACPI platforms.
+ *
+ * Normally stalling must not be enabled for PCI devices, since it would
+ * break the PCI requirement for free-flowing writes and may lead to
+ * deadlock.  We expect PCI devices to support ATS and PRI if they want to
+ * be fault-tolerant, so there's no ACPI binding to describe anything else,
+ * even when a "PCI" device turns out to be a regular old SoC device
+ * dressed up as a RCiEP and normal rules don't apply.
+ */
+static void quirk_huawei_pcie_sva(struct pci_dev *pdev)
+{
+	struct property_entry properties[] = {
+		PROPERTY_ENTRY_BOOL("dma-can-stall"),
+		{},
+	};
+
+	if (pdev->revision != 0x21 && pdev->revision != 0x30)
+		return;
+
+	pdev->pasid_no_tlp = 1;
+
+	/*
+	 * Set the dma-can-stall property on ACPI platforms. Device tree
+	 * can set it directly.
+	 */
+	if (!pdev->dev.of_node &&
+	    device_add_properties(&pdev->dev, properties))
+		pci_warn(pdev, "could not add stall property");
+}
+DECLARE_PCI_FIXUP_FINAL(PCI_VENDOR_ID_HUAWEI, 0xa250, quirk_huawei_pcie_sva);
+DECLARE_PCI_FIXUP_FINAL(PCI_VENDOR_ID_HUAWEI, 0xa251, quirk_huawei_pcie_sva);
+DECLARE_PCI_FIXUP_FINAL(PCI_VENDOR_ID_HUAWEI, 0xa255, quirk_huawei_pcie_sva);
+DECLARE_PCI_FIXUP_FINAL(PCI_VENDOR_ID_HUAWEI, 0xa256, quirk_huawei_pcie_sva);
+DECLARE_PCI_FIXUP_FINAL(PCI_VENDOR_ID_HUAWEI, 0xa258, quirk_huawei_pcie_sva);
+DECLARE_PCI_FIXUP_FINAL(PCI_VENDOR_ID_HUAWEI, 0xa259, quirk_huawei_pcie_sva);
+
+/*
  * It's possible for the MSI to get corrupted if SHPC and ACPI are used
  * together on certain PXH-based systems.
  */
@@ -3235,12 +3274,13 @@ static void fixup_mpss_256(struct pci_dev *dev)
 {
 	dev->pcie_mpss = 1; /* 256 bytes */
 }
-DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_SOLARFLARE,
-			 PCI_DEVICE_ID_SOLARFLARE_SFC4000A_0, fixup_mpss_256);
-DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_SOLARFLARE,
-			 PCI_DEVICE_ID_SOLARFLARE_SFC4000A_1, fixup_mpss_256);
-DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_SOLARFLARE,
-			 PCI_DEVICE_ID_SOLARFLARE_SFC4000B, fixup_mpss_256);
+DECLARE_PCI_FIXUP_EARLY(PCI_VENDOR_ID_SOLARFLARE,
+			PCI_DEVICE_ID_SOLARFLARE_SFC4000A_0, fixup_mpss_256);
+DECLARE_PCI_FIXUP_EARLY(PCI_VENDOR_ID_SOLARFLARE,
+			PCI_DEVICE_ID_SOLARFLARE_SFC4000A_1, fixup_mpss_256);
+DECLARE_PCI_FIXUP_EARLY(PCI_VENDOR_ID_SOLARFLARE,
+			PCI_DEVICE_ID_SOLARFLARE_SFC4000B, fixup_mpss_256);
+DECLARE_PCI_FIXUP_EARLY(PCI_VENDOR_ID_ASMEDIA, 0x0612, fixup_mpss_256);
 
 /*
  * Intel 5000 and 5100 Memory controllers have an erratum with read completion
@@ -3703,7 +3743,7 @@ DECLARE_PCI_FIXUP_SUSPEND_LATE(PCI_VENDOR_ID_INTEL,
  * reset a single function if other methods (e.g. FLR, PM D0->D3) are
  * not available.
  */
-static int reset_intel_82599_sfp_virtfn(struct pci_dev *dev, int probe)
+static int reset_intel_82599_sfp_virtfn(struct pci_dev *dev, bool probe)
 {
 	/*
 	 * http://www.intel.com/content/dam/doc/datasheet/82599-10-gbe-controller-datasheet.pdf
@@ -3725,7 +3765,7 @@ static int reset_intel_82599_sfp_virtfn(struct pci_dev *dev, int probe)
 #define NSDE_PWR_STATE		0xd0100
 #define IGD_OPERATION_TIMEOUT	10000     /* set timeout 10 seconds */
 
-static int reset_ivb_igd(struct pci_dev *dev, int probe)
+static int reset_ivb_igd(struct pci_dev *dev, bool probe)
 {
 	void __iomem *mmio_base;
 	unsigned long timeout;
@@ -3768,7 +3808,7 @@ reset_complete:
 }
 
 /* Device-specific reset method for Chelsio T4-based adapters */
-static int reset_chelsio_generic_dev(struct pci_dev *dev, int probe)
+static int reset_chelsio_generic_dev(struct pci_dev *dev, bool probe)
 {
 	u16 old_command;
 	u16 msix_flags;
@@ -3846,14 +3886,14 @@ static int reset_chelsio_generic_dev(struct pci_dev *dev, int probe)
  *    Chapter 3: NVMe control registers
  *    Chapter 7.3: Reset behavior
  */
-static int nvme_disable_and_flr(struct pci_dev *dev, int probe)
+static int nvme_disable_and_flr(struct pci_dev *dev, bool probe)
 {
 	void __iomem *bar;
 	u16 cmd;
 	u32 cfg;
 
 	if (dev->class != PCI_CLASS_STORAGE_EXPRESS ||
-	    !pcie_has_flr(dev) || !pci_resource_start(dev, 0))
+	    pcie_reset_flr(dev, PCI_RESET_PROBE) || !pci_resource_start(dev, 0))
 		return -ENOTTY;
 
 	if (probe)
@@ -3920,15 +3960,12 @@ static int nvme_disable_and_flr(struct pci_dev *dev, int probe)
  * device too soon after FLR.  A 250ms delay after FLR has heuristically
  * proven to produce reliably working results for device assignment cases.
  */
-static int delay_250ms_after_flr(struct pci_dev *dev, int probe)
+static int delay_250ms_after_flr(struct pci_dev *dev, bool probe)
 {
-	if (!pcie_has_flr(dev))
-		return -ENOTTY;
-
 	if (probe)
-		return 0;
+		return pcie_reset_flr(dev, PCI_RESET_PROBE);
 
-	pcie_flr(dev);
+	pcie_reset_flr(dev, PCI_RESET_DO_RESET);
 
 	msleep(250);
 
@@ -3943,7 +3980,7 @@ static int delay_250ms_after_flr(struct pci_dev *dev, int probe)
 #define HINIC_OPERATION_TIMEOUT     15000	/* 15 seconds */
 
 /* Device-specific reset method for Huawei Intelligent NIC virtual functions */
-static int reset_hinic_vf_dev(struct pci_dev *pdev, int probe)
+static int reset_hinic_vf_dev(struct pci_dev *pdev, bool probe)
 {
 	unsigned long timeout;
 	void __iomem *bar;
@@ -4020,7 +4057,7 @@ static const struct pci_dev_reset_methods pci_dev_reset_methods[] = {
  * because when a host assigns a device to a guest VM, the host may need
  * to reset the device but probably doesn't have a driver for it.
  */
-int pci_dev_specific_reset(struct pci_dev *dev, int probe)
+int pci_dev_specific_reset(struct pci_dev *dev, bool probe)
 {
 	const struct pci_dev_reset_methods *i;
 
@@ -4615,6 +4652,18 @@ static int pci_quirk_qcom_rp_acs(struct pci_dev *dev, u16 acs_flags)
 		PCI_ACS_SV | PCI_ACS_RR | PCI_ACS_CR | PCI_ACS_UF);
 }
 
+/*
+ * Each of these NXP Root Ports is in a Root Complex with a unique segment
+ * number and does provide isolation features to disable peer transactions
+ * and validate bus numbers in requests, but does not provide an ACS
+ * capability.
+ */
+static int pci_quirk_nxp_rp_acs(struct pci_dev *dev, u16 acs_flags)
+{
+	return pci_acs_ctrl_enabled(acs_flags,
+		PCI_ACS_SV | PCI_ACS_RR | PCI_ACS_CR | PCI_ACS_UF);
+}
+
 static int pci_quirk_al_acs(struct pci_dev *dev, u16 acs_flags)
 {
 	if (pci_pcie_type(dev) != PCI_EXP_TYPE_ROOT_PORT)
@@ -4841,6 +4890,10 @@ static const struct pci_dev_acs_enabled {
 	{ 0x10df, 0x720, pci_quirk_mf_endpoint_acs }, /* Emulex Skyhawk-R */
 	/* Cavium ThunderX */
 	{ PCI_VENDOR_ID_CAVIUM, PCI_ANY_ID, pci_quirk_cavium_acs },
+	/* Cavium multi-function devices */
+	{ PCI_VENDOR_ID_CAVIUM, 0xA026, pci_quirk_mf_endpoint_acs },
+	{ PCI_VENDOR_ID_CAVIUM, 0xA059, pci_quirk_mf_endpoint_acs },
+	{ PCI_VENDOR_ID_CAVIUM, 0xA060, pci_quirk_mf_endpoint_acs },
 	/* APM X-Gene */
 	{ PCI_VENDOR_ID_AMCC, 0xE004, pci_quirk_xgene_acs },
 	/* Ampere Computing */
@@ -4861,6 +4914,39 @@ static const struct pci_dev_acs_enabled {
 	{ PCI_VENDOR_ID_ZHAOXIN, 0x3038, pci_quirk_mf_endpoint_acs },
 	{ PCI_VENDOR_ID_ZHAOXIN, 0x3104, pci_quirk_mf_endpoint_acs },
 	{ PCI_VENDOR_ID_ZHAOXIN, 0x9083, pci_quirk_mf_endpoint_acs },
+	/* NXP root ports, xx=16, 12, or 08 cores */
+	/* LX2xx0A : without security features + CAN-FD */
+	{ PCI_VENDOR_ID_NXP, 0x8d81, pci_quirk_nxp_rp_acs },
+	{ PCI_VENDOR_ID_NXP, 0x8da1, pci_quirk_nxp_rp_acs },
+	{ PCI_VENDOR_ID_NXP, 0x8d83, pci_quirk_nxp_rp_acs },
+	/* LX2xx0C : security features + CAN-FD */
+	{ PCI_VENDOR_ID_NXP, 0x8d80, pci_quirk_nxp_rp_acs },
+	{ PCI_VENDOR_ID_NXP, 0x8da0, pci_quirk_nxp_rp_acs },
+	{ PCI_VENDOR_ID_NXP, 0x8d82, pci_quirk_nxp_rp_acs },
+	/* LX2xx0E : security features + CAN */
+	{ PCI_VENDOR_ID_NXP, 0x8d90, pci_quirk_nxp_rp_acs },
+	{ PCI_VENDOR_ID_NXP, 0x8db0, pci_quirk_nxp_rp_acs },
+	{ PCI_VENDOR_ID_NXP, 0x8d92, pci_quirk_nxp_rp_acs },
+	/* LX2xx0N : without security features + CAN */
+	{ PCI_VENDOR_ID_NXP, 0x8d91, pci_quirk_nxp_rp_acs },
+	{ PCI_VENDOR_ID_NXP, 0x8db1, pci_quirk_nxp_rp_acs },
+	{ PCI_VENDOR_ID_NXP, 0x8d93, pci_quirk_nxp_rp_acs },
+	/* LX2xx2A : without security features + CAN-FD */
+	{ PCI_VENDOR_ID_NXP, 0x8d89, pci_quirk_nxp_rp_acs },
+	{ PCI_VENDOR_ID_NXP, 0x8da9, pci_quirk_nxp_rp_acs },
+	{ PCI_VENDOR_ID_NXP, 0x8d8b, pci_quirk_nxp_rp_acs },
+	/* LX2xx2C : security features + CAN-FD */
+	{ PCI_VENDOR_ID_NXP, 0x8d88, pci_quirk_nxp_rp_acs },
+	{ PCI_VENDOR_ID_NXP, 0x8da8, pci_quirk_nxp_rp_acs },
+	{ PCI_VENDOR_ID_NXP, 0x8d8a, pci_quirk_nxp_rp_acs },
+	/* LX2xx2E : security features + CAN */
+	{ PCI_VENDOR_ID_NXP, 0x8d98, pci_quirk_nxp_rp_acs },
+	{ PCI_VENDOR_ID_NXP, 0x8db8, pci_quirk_nxp_rp_acs },
+	{ PCI_VENDOR_ID_NXP, 0x8d9a, pci_quirk_nxp_rp_acs },
+	/* LX2xx2N : without security features + CAN */
+	{ PCI_VENDOR_ID_NXP, 0x8d99, pci_quirk_nxp_rp_acs },
+	{ PCI_VENDOR_ID_NXP, 0x8db9, pci_quirk_nxp_rp_acs },
+	{ PCI_VENDOR_ID_NXP, 0x8d9b, pci_quirk_nxp_rp_acs },
 	/* Zhaoxin Root/Downstream Ports */
 	{ PCI_VENDOR_ID_ZHAOXIN, PCI_ANY_ID, pci_quirk_zhaoxin_pcie_ports_acs },
 	{ 0 }
@@ -5032,7 +5118,7 @@ static int pci_quirk_enable_intel_spt_pch_acs(struct pci_dev *dev)
 	ctrl |= (cap & PCI_ACS_CR);
 	ctrl |= (cap & PCI_ACS_UF);
 
-	if (dev->external_facing || dev->untrusted)
+	if (pci_ats_disabled() || dev->external_facing || dev->untrusted)
 		ctrl |= (cap & PCI_ACS_TB);
 
 	pci_write_config_dword(dev, pos + INTEL_SPT_ACS_CTRL, ctrl);
@@ -5630,7 +5716,7 @@ static void quirk_reset_lenovo_thinkpad_p50_nvgpu(struct pci_dev *pdev)
 
 	if (pdev->subsystem_vendor != PCI_VENDOR_ID_LENOVO ||
 	    pdev->subsystem_device != 0x222e ||
-	    !pdev->reset_fn)
+	    !pci_reset_supported(pdev))
 		return;
 
 	if (pci_enable_device_mem(pdev))
diff --git a/drivers/pci/remove.c b/drivers/pci/remove.c
index dd12c2fcc7dc..4c54c75050dc 100644
--- a/drivers/pci/remove.c
+++ b/drivers/pci/remove.c
@@ -19,7 +19,6 @@ static void pci_stop_dev(struct pci_dev *dev)
 	pci_pme_active(dev, false);
 
 	if (pci_dev_is_added(dev)) {
-		dev->reset_fn = 0;
 
 		device_release_driver(&dev->dev);
 		pci_proc_detach_device(dev);
diff --git a/drivers/pci/syscall.c b/drivers/pci/syscall.c
index 8b003c890b87..61a6fe3cde21 100644
--- a/drivers/pci/syscall.c
+++ b/drivers/pci/syscall.c
@@ -19,11 +19,12 @@ SYSCALL_DEFINE5(pciconfig_read, unsigned long, bus, unsigned long, dfn,
 	u8 byte;
 	u16 word;
 	u32 dword;
-	long err;
-	int cfg_ret;
+	int err, cfg_ret;
 
+	err = -EPERM;
+	dev = NULL;
 	if (!capable(CAP_SYS_ADMIN))
-		return -EPERM;
+		goto error;
 
 	err = -ENODEV;
 	dev = pci_get_domain_bus_and_slot(0, bus, dfn);
diff --git a/drivers/pci/vpd.c b/drivers/pci/vpd.c
index 26bf7c877de5..25557b272a4f 100644
--- a/drivers/pci/vpd.c
+++ b/drivers/pci/vpd.c
@@ -9,116 +9,94 @@
 #include <linux/delay.h>
 #include <linux/export.h>
 #include <linux/sched/signal.h>
+#include <asm/unaligned.h>
 #include "pci.h"
 
-/* VPD access through PCI 2.2+ VPD capability */
+#define PCI_VPD_LRDT_TAG_SIZE		3
+#define PCI_VPD_SRDT_LEN_MASK		0x07
+#define PCI_VPD_SRDT_TAG_SIZE		1
+#define PCI_VPD_STIN_END		0x0f
+#define PCI_VPD_INFO_FLD_HDR_SIZE	3
 
-struct pci_vpd_ops {
-	ssize_t (*read)(struct pci_dev *dev, loff_t pos, size_t count, void *buf);
-	ssize_t (*write)(struct pci_dev *dev, loff_t pos, size_t count, const void *buf);
-};
+static u16 pci_vpd_lrdt_size(const u8 *lrdt)
+{
+	return get_unaligned_le16(lrdt + 1);
+}
 
-struct pci_vpd {
-	const struct pci_vpd_ops *ops;
-	struct mutex	lock;
-	unsigned int	len;
-	u16		flag;
-	u8		cap;
-	unsigned int	busy:1;
-	unsigned int	valid:1;
-};
+static u8 pci_vpd_srdt_tag(const u8 *srdt)
+{
+	return *srdt >> 3;
+}
 
-static struct pci_dev *pci_get_func0_dev(struct pci_dev *dev)
+static u8 pci_vpd_srdt_size(const u8 *srdt)
 {
-	return pci_get_slot(dev->bus, PCI_DEVFN(PCI_SLOT(dev->devfn), 0));
+	return *srdt & PCI_VPD_SRDT_LEN_MASK;
 }
 
-/**
- * pci_read_vpd - Read one entry from Vital Product Data
- * @dev:	pci device struct
- * @pos:	offset in vpd space
- * @count:	number of bytes to read
- * @buf:	pointer to where to store result
- */
-ssize_t pci_read_vpd(struct pci_dev *dev, loff_t pos, size_t count, void *buf)
+static u8 pci_vpd_info_field_size(const u8 *info_field)
 {
-	if (!dev->vpd || !dev->vpd->ops)
-		return -ENODEV;
-	return dev->vpd->ops->read(dev, pos, count, buf);
+	return info_field[2];
 }
-EXPORT_SYMBOL(pci_read_vpd);
 
-/**
- * pci_write_vpd - Write entry to Vital Product Data
- * @dev:	pci device struct
- * @pos:	offset in vpd space
- * @count:	number of bytes to write
- * @buf:	buffer containing write data
- */
-ssize_t pci_write_vpd(struct pci_dev *dev, loff_t pos, size_t count, const void *buf)
+/* VPD access through PCI 2.2+ VPD capability */
+
+static struct pci_dev *pci_get_func0_dev(struct pci_dev *dev)
 {
-	if (!dev->vpd || !dev->vpd->ops)
-		return -ENODEV;
-	return dev->vpd->ops->write(dev, pos, count, buf);
+	return pci_get_slot(dev->bus, PCI_DEVFN(PCI_SLOT(dev->devfn), 0));
 }
-EXPORT_SYMBOL(pci_write_vpd);
 
-#define PCI_VPD_MAX_SIZE (PCI_VPD_ADDR_MASK + 1)
+#define PCI_VPD_MAX_SIZE	(PCI_VPD_ADDR_MASK + 1)
+#define PCI_VPD_SZ_INVALID	UINT_MAX
 
 /**
  * pci_vpd_size - determine actual size of Vital Product Data
  * @dev:	pci device struct
- * @old_size:	current assumed size, also maximum allowed size
  */
-static size_t pci_vpd_size(struct pci_dev *dev, size_t old_size)
+static size_t pci_vpd_size(struct pci_dev *dev)
 {
-	size_t off = 0;
-	unsigned char header[1+2];	/* 1 byte tag, 2 bytes length */
+	size_t off = 0, size;
+	unsigned char tag, header[1+2];	/* 1 byte tag, 2 bytes length */
 
-	while (off < old_size && pci_read_vpd(dev, off, 1, header) == 1) {
-		unsigned char tag;
+	/* Otherwise the following reads would fail. */
+	dev->vpd.len = PCI_VPD_MAX_SIZE;
 
-		if (!header[0] && !off) {
-			pci_info(dev, "Invalid VPD tag 00, assume missing optional VPD EPROM\n");
-			return 0;
-		}
+	while (pci_read_vpd(dev, off, 1, header) == 1) {
+		size = 0;
+
+		if (off == 0 && (header[0] == 0x00 || header[0] == 0xff))
+			goto error;
 
 		if (header[0] & PCI_VPD_LRDT) {
 			/* Large Resource Data Type Tag */
-			tag = pci_vpd_lrdt_tag(header);
-			/* Only read length from known tag items */
-			if ((tag == PCI_VPD_LTIN_ID_STRING) ||
-			    (tag == PCI_VPD_LTIN_RO_DATA) ||
-			    (tag == PCI_VPD_LTIN_RW_DATA)) {
-				if (pci_read_vpd(dev, off+1, 2,
-						 &header[1]) != 2) {
-					pci_warn(dev, "invalid large VPD tag %02x size at offset %zu",
-						 tag, off + 1);
-					return 0;
-				}
-				off += PCI_VPD_LRDT_TAG_SIZE +
-					pci_vpd_lrdt_size(header);
+			if (pci_read_vpd(dev, off + 1, 2, &header[1]) != 2) {
+				pci_warn(dev, "failed VPD read at offset %zu\n",
+					 off + 1);
+				return off ?: PCI_VPD_SZ_INVALID;
 			}
+			size = pci_vpd_lrdt_size(header);
+			if (off + size > PCI_VPD_MAX_SIZE)
+				goto error;
+
+			off += PCI_VPD_LRDT_TAG_SIZE + size;
 		} else {
 			/* Short Resource Data Type Tag */
-			off += PCI_VPD_SRDT_TAG_SIZE +
-				pci_vpd_srdt_size(header);
 			tag = pci_vpd_srdt_tag(header);
-		}
-
-		if (tag == PCI_VPD_STIN_END)	/* End tag descriptor */
-			return off;
+			size = pci_vpd_srdt_size(header);
+			if (off + size > PCI_VPD_MAX_SIZE)
+				goto error;
 
-		if ((tag != PCI_VPD_LTIN_ID_STRING) &&
-		    (tag != PCI_VPD_LTIN_RO_DATA) &&
-		    (tag != PCI_VPD_LTIN_RW_DATA)) {
-			pci_warn(dev, "invalid %s VPD tag %02x at offset %zu",
-				 (header[0] & PCI_VPD_LRDT) ? "large" : "short",
-				 tag, off);
-			return 0;
+			off += PCI_VPD_SRDT_TAG_SIZE + size;
+			if (tag == PCI_VPD_STIN_END)	/* End tag descriptor */
+				return off;
 		}
 	}
-	return 0;
+	return off;
+
+error:
+	pci_info(dev, "invalid VPD tag %#04x (size %zu) at offset %zu%s\n",
+		 header[0], size, off, off == 0 ?
+		 "; assume missing optional EEPROM" : "");
+	return off ?: PCI_VPD_SZ_INVALID;
 }
 
 /*
@@ -126,33 +104,26 @@ static size_t pci_vpd_size(struct pci_dev *dev, size_t old_size)
  * This code has to spin since there is no other notification from the PCI
  * hardware. Since the VPD is often implemented by serial attachment to an
  * EEPROM, it may take many milliseconds to complete.
+ * @set: if true wait for flag to be set, else wait for it to be cleared
  *
  * Returns 0 on success, negative values indicate error.
  */
-static int pci_vpd_wait(struct pci_dev *dev)
+static int pci_vpd_wait(struct pci_dev *dev, bool set)
 {
-	struct pci_vpd *vpd = dev->vpd;
+	struct pci_vpd *vpd = &dev->vpd;
 	unsigned long timeout = jiffies + msecs_to_jiffies(125);
 	unsigned long max_sleep = 16;
 	u16 status;
 	int ret;
 
-	if (!vpd->busy)
-		return 0;
-
 	do {
 		ret = pci_user_read_config_word(dev, vpd->cap + PCI_VPD_ADDR,
 						&status);
 		if (ret < 0)
 			return ret;
 
-		if ((status & PCI_VPD_ADDR_F) == vpd->flag) {
-			vpd->busy = 0;
+		if (!!(status & PCI_VPD_ADDR_F) == set)
 			return 0;
-		}
-
-		if (fatal_signal_pending(current))
-			return -EINTR;
 
 		if (time_after(jiffies, timeout))
 			break;
@@ -169,22 +140,17 @@ static int pci_vpd_wait(struct pci_dev *dev)
 static ssize_t pci_vpd_read(struct pci_dev *dev, loff_t pos, size_t count,
 			    void *arg)
 {
-	struct pci_vpd *vpd = dev->vpd;
-	int ret;
+	struct pci_vpd *vpd = &dev->vpd;
+	int ret = 0;
 	loff_t end = pos + count;
 	u8 *buf = arg;
 
+	if (!vpd->cap)
+		return -ENODEV;
+
 	if (pos < 0)
 		return -EINVAL;
 
-	if (!vpd->valid) {
-		vpd->valid = 1;
-		vpd->len = pci_vpd_size(dev, vpd->len);
-	}
-
-	if (vpd->len == 0)
-		return -EIO;
-
 	if (pos > vpd->len)
 		return 0;
 
@@ -196,21 +162,20 @@ static ssize_t pci_vpd_read(struct pci_dev *dev, loff_t pos, size_t count,
 	if (mutex_lock_killable(&vpd->lock))
 		return -EINTR;
 
-	ret = pci_vpd_wait(dev);
-	if (ret < 0)
-		goto out;
-
 	while (pos < end) {
 		u32 val;
 		unsigned int i, skip;
 
+		if (fatal_signal_pending(current)) {
+			ret = -EINTR;
+			break;
+		}
+
 		ret = pci_user_write_config_word(dev, vpd->cap + PCI_VPD_ADDR,
 						 pos & ~3);
 		if (ret < 0)
 			break;
-		vpd->busy = 1;
-		vpd->flag = PCI_VPD_ADDR_F;
-		ret = pci_vpd_wait(dev);
+		ret = pci_vpd_wait(dev, true);
 		if (ret < 0)
 			break;
 
@@ -228,7 +193,7 @@ static ssize_t pci_vpd_read(struct pci_dev *dev, loff_t pos, size_t count,
 			val >>= 8;
 		}
 	}
-out:
+
 	mutex_unlock(&vpd->lock);
 	return ret ? ret : count;
 }
@@ -236,41 +201,26 @@ out:
 static ssize_t pci_vpd_write(struct pci_dev *dev, loff_t pos, size_t count,
 			     const void *arg)
 {
-	struct pci_vpd *vpd = dev->vpd;
+	struct pci_vpd *vpd = &dev->vpd;
 	const u8 *buf = arg;
 	loff_t end = pos + count;
 	int ret = 0;
 
+	if (!vpd->cap)
+		return -ENODEV;
+
 	if (pos < 0 || (pos & 3) || (count & 3))
 		return -EINVAL;
 
-	if (!vpd->valid) {
-		vpd->valid = 1;
-		vpd->len = pci_vpd_size(dev, vpd->len);
-	}
-
-	if (vpd->len == 0)
-		return -EIO;
-
 	if (end > vpd->len)
 		return -EINVAL;
 
 	if (mutex_lock_killable(&vpd->lock))
 		return -EINTR;
 
-	ret = pci_vpd_wait(dev);
-	if (ret < 0)
-		goto out;
-
 	while (pos < end) {
-		u32 val;
-
-		val = *buf++;
-		val |= *buf++ << 8;
-		val |= *buf++ << 16;
-		val |= *buf++ << 24;
-
-		ret = pci_user_write_config_dword(dev, vpd->cap + PCI_VPD_DATA, val);
+		ret = pci_user_write_config_dword(dev, vpd->cap + PCI_VPD_DATA,
+						  get_unaligned_le32(buf));
 		if (ret < 0)
 			break;
 		ret = pci_user_write_config_word(dev, vpd->cap + PCI_VPD_ADDR,
@@ -278,85 +228,28 @@ static ssize_t pci_vpd_write(struct pci_dev *dev, loff_t pos, size_t count,
 		if (ret < 0)
 			break;
 
-		vpd->busy = 1;
-		vpd->flag = 0;
-		ret = pci_vpd_wait(dev);
+		ret = pci_vpd_wait(dev, false);
 		if (ret < 0)
 			break;
 
+		buf += sizeof(u32);
 		pos += sizeof(u32);
 	}
-out:
+
 	mutex_unlock(&vpd->lock);
 	return ret ? ret : count;
 }
 
-static const struct pci_vpd_ops pci_vpd_ops = {
-	.read = pci_vpd_read,
-	.write = pci_vpd_write,
-};
-
-static ssize_t pci_vpd_f0_read(struct pci_dev *dev, loff_t pos, size_t count,
-			       void *arg)
-{
-	struct pci_dev *tdev = pci_get_func0_dev(dev);
-	ssize_t ret;
-
-	if (!tdev)
-		return -ENODEV;
-
-	ret = pci_read_vpd(tdev, pos, count, arg);
-	pci_dev_put(tdev);
-	return ret;
-}
-
-static ssize_t pci_vpd_f0_write(struct pci_dev *dev, loff_t pos, size_t count,
-				const void *arg)
-{
-	struct pci_dev *tdev = pci_get_func0_dev(dev);
-	ssize_t ret;
-
-	if (!tdev)
-		return -ENODEV;
-
-	ret = pci_write_vpd(tdev, pos, count, arg);
-	pci_dev_put(tdev);
-	return ret;
-}
-
-static const struct pci_vpd_ops pci_vpd_f0_ops = {
-	.read = pci_vpd_f0_read,
-	.write = pci_vpd_f0_write,
-};
-
 void pci_vpd_init(struct pci_dev *dev)
 {
-	struct pci_vpd *vpd;
-	u8 cap;
+	dev->vpd.cap = pci_find_capability(dev, PCI_CAP_ID_VPD);
+	mutex_init(&dev->vpd.lock);
 
-	cap = pci_find_capability(dev, PCI_CAP_ID_VPD);
-	if (!cap)
-		return;
-
-	vpd = kzalloc(sizeof(*vpd), GFP_ATOMIC);
-	if (!vpd)
-		return;
-
-	vpd->len = PCI_VPD_MAX_SIZE;
-	if (dev->dev_flags & PCI_DEV_FLAGS_VPD_REF_F0)
-		vpd->ops = &pci_vpd_f0_ops;
-	else
-		vpd->ops = &pci_vpd_ops;
-	mutex_init(&vpd->lock);
-	vpd->cap = cap;
-	vpd->busy = 0;
-	vpd->valid = 0;
-	dev->vpd = vpd;
-}
+	if (!dev->vpd.len)
+		dev->vpd.len = pci_vpd_size(dev);
 
-void pci_vpd_release(struct pci_dev *dev)
-{
-	kfree(dev->vpd);
+	if (dev->vpd.len == PCI_VPD_SZ_INVALID)
+		dev->vpd.cap = 0;
 }
 
 static ssize_t vpd_read(struct file *filp, struct kobject *kobj,
@@ -388,7 +281,7 @@ static umode_t vpd_attr_is_visible(struct kobject *kobj,
 {
 	struct pci_dev *pdev = to_pci_dev(kobj_to_dev(kobj));
 
-	if (!pdev->vpd)
+	if (!pdev->vpd.cap)
 		return 0;
 
 	return a->attr.mode;
@@ -399,23 +292,63 @@ const struct attribute_group pci_dev_vpd_attr_group = {
 	.is_bin_visible = vpd_attr_is_visible,
 };
 
-int pci_vpd_find_tag(const u8 *buf, unsigned int len, u8 rdt)
+void *pci_vpd_alloc(struct pci_dev *dev, unsigned int *size)
+{
+	unsigned int len = dev->vpd.len;
+	void *buf;
+	int cnt;
+
+	if (!dev->vpd.cap)
+		return ERR_PTR(-ENODEV);
+
+	buf = kmalloc(len, GFP_KERNEL);
+	if (!buf)
+		return ERR_PTR(-ENOMEM);
+
+	cnt = pci_read_vpd(dev, 0, len, buf);
+	if (cnt != len) {
+		kfree(buf);
+		return ERR_PTR(-EIO);
+	}
+
+	if (size)
+		*size = len;
+
+	return buf;
+}
+EXPORT_SYMBOL_GPL(pci_vpd_alloc);
+
+static int pci_vpd_find_tag(const u8 *buf, unsigned int len, u8 rdt, unsigned int *size)
 {
 	int i = 0;
 
 	/* look for LRDT tags only, end tag is the only SRDT tag */
 	while (i + PCI_VPD_LRDT_TAG_SIZE <= len && buf[i] & PCI_VPD_LRDT) {
-		if (buf[i] == rdt)
+		unsigned int lrdt_len = pci_vpd_lrdt_size(buf + i);
+		u8 tag = buf[i];
+
+		i += PCI_VPD_LRDT_TAG_SIZE;
+		if (tag == rdt) {
+			if (i + lrdt_len > len)
+				lrdt_len = len - i;
+			if (size)
+				*size = lrdt_len;
 			return i;
+		}
 
-		i += PCI_VPD_LRDT_TAG_SIZE + pci_vpd_lrdt_size(buf + i);
+		i += lrdt_len;
 	}
 
 	return -ENOENT;
 }
-EXPORT_SYMBOL_GPL(pci_vpd_find_tag);
 
-int pci_vpd_find_info_keyword(const u8 *buf, unsigned int off,
+int pci_vpd_find_id_string(const u8 *buf, unsigned int len, unsigned int *size)
+{
+	return pci_vpd_find_tag(buf, len, PCI_VPD_LRDT_ID_STRING, size);
+}
+EXPORT_SYMBOL_GPL(pci_vpd_find_id_string);
+
+static int pci_vpd_find_info_keyword(const u8 *buf, unsigned int off,
 			      unsigned int len, const char *kw)
 {
 	int i;
@@ -431,7 +364,106 @@ int pci_vpd_find_info_keyword(const u8 *buf, unsigned int off,
 
 	return -ENOENT;
 }
-EXPORT_SYMBOL_GPL(pci_vpd_find_info_keyword);
+
+/**
+ * pci_read_vpd - Read one entry from Vital Product Data
+ * @dev:	PCI device struct
+ * @pos:	offset in VPD space
+ * @count:	number of bytes to read
+ * @buf:	pointer to where to store result
+ */
+ssize_t pci_read_vpd(struct pci_dev *dev, loff_t pos, size_t count, void *buf)
+{
+	ssize_t ret;
+
+	if (dev->dev_flags & PCI_DEV_FLAGS_VPD_REF_F0) {
+		dev = pci_get_func0_dev(dev);
+		if (!dev)
+			return -ENODEV;
+
+		ret = pci_vpd_read(dev, pos, count, buf);
+		pci_dev_put(dev);
+		return ret;
+	}
+
+	return pci_vpd_read(dev, pos, count, buf);
+}
+EXPORT_SYMBOL(pci_read_vpd);
+
+/**
+ * pci_write_vpd - Write entry to Vital Product Data
+ * @dev:	PCI device struct
+ * @pos:	offset in VPD space
+ * @count:	number of bytes to write
+ * @buf:	buffer containing write data
+ */
+ssize_t pci_write_vpd(struct pci_dev *dev, loff_t pos, size_t count, const void *buf)
+{
+	ssize_t ret;
+
+	if (dev->dev_flags & PCI_DEV_FLAGS_VPD_REF_F0) {
+		dev = pci_get_func0_dev(dev);
+		if (!dev)
+			return -ENODEV;
+
+		ret = pci_vpd_write(dev, pos, count, buf);
+		pci_dev_put(dev);
+		return ret;
+	}
+
+	return pci_vpd_write(dev, pos, count, buf);
+}
+EXPORT_SYMBOL(pci_write_vpd);
+
+int pci_vpd_find_ro_info_keyword(const void *buf, unsigned int len,
+				 const char *kw, unsigned int *size)
+{
+	int ro_start, infokw_start;
+	unsigned int ro_len, infokw_size;
+
+	ro_start = pci_vpd_find_tag(buf, len, PCI_VPD_LRDT_RO_DATA, &ro_len);
+	if (ro_start < 0)
+		return ro_start;
+
+	infokw_start = pci_vpd_find_info_keyword(buf, ro_start, ro_len, kw);
+	if (infokw_start < 0)
+		return infokw_start;
+
+	infokw_size = pci_vpd_info_field_size(buf + infokw_start);
+	infokw_start += PCI_VPD_INFO_FLD_HDR_SIZE;
+
+	if (infokw_start + infokw_size > len)
+		return -EINVAL;
+
+	if (size)
+		*size = infokw_size;
+
+	return infokw_start;
+}
+EXPORT_SYMBOL_GPL(pci_vpd_find_ro_info_keyword);
+
+int pci_vpd_check_csum(const void *buf, unsigned int len)
+{
+	const u8 *vpd = buf;
+	unsigned int size;
+	u8 csum = 0;
+	int rv_start;
+
+	rv_start = pci_vpd_find_ro_info_keyword(buf, len, PCI_VPD_RO_KEYWORD_CHKSUM, &size);
+	if (rv_start == -ENOENT) /* no checksum in VPD */
+		return 1;
+	else if (rv_start < 0)
+		return rv_start;
+
+	if (!size)
+		return -EINVAL;
+
+	while (rv_start >= 0)
+		csum += vpd[rv_start--];
+
+	return csum ? -EILSEQ : 0;
+}
+EXPORT_SYMBOL_GPL(pci_vpd_check_csum);
 
 #ifdef CONFIG_PCI_QUIRKS
 /*
@@ -450,7 +482,7 @@ static void quirk_f0_vpd_link(struct pci_dev *dev)
 	if (!f0)
 		return;
 
-	if (f0->vpd && dev->class == f0->class &&
+	if (f0->vpd.cap && dev->class == f0->class &&
 	    dev->vendor == f0->vendor && dev->device == f0->device)
 		dev->dev_flags |= PCI_DEV_FLAGS_VPD_REF_F0;
 
@@ -468,41 +500,27 @@ DECLARE_PCI_FIXUP_CLASS_EARLY(PCI_VENDOR_ID_INTEL, PCI_ANY_ID,
  */
 static void quirk_blacklist_vpd(struct pci_dev *dev)
 {
-	if (dev->vpd) {
-		dev->vpd->len = 0;
-		pci_warn(dev, FW_BUG "disabling VPD access (can't determine size of non-standard VPD format)\n");
-	}
+	dev->vpd.len = PCI_VPD_SZ_INVALID;
+	pci_warn(dev, FW_BUG "disabling VPD access (can't determine size of non-standard VPD format)\n");
 }
-DECLARE_PCI_FIXUP_FINAL(PCI_VENDOR_ID_LSI_LOGIC, 0x0060, quirk_blacklist_vpd);
-DECLARE_PCI_FIXUP_FINAL(PCI_VENDOR_ID_LSI_LOGIC, 0x007c, quirk_blacklist_vpd);
-DECLARE_PCI_FIXUP_FINAL(PCI_VENDOR_ID_LSI_LOGIC, 0x0413, quirk_blacklist_vpd);
-DECLARE_PCI_FIXUP_FINAL(PCI_VENDOR_ID_LSI_LOGIC, 0x0078, quirk_blacklist_vpd);
-DECLARE_PCI_FIXUP_FINAL(PCI_VENDOR_ID_LSI_LOGIC, 0x0079, quirk_blacklist_vpd);
-DECLARE_PCI_FIXUP_FINAL(PCI_VENDOR_ID_LSI_LOGIC, 0x0073, quirk_blacklist_vpd);
-DECLARE_PCI_FIXUP_FINAL(PCI_VENDOR_ID_LSI_LOGIC, 0x0071, quirk_blacklist_vpd);
-DECLARE_PCI_FIXUP_FINAL(PCI_VENDOR_ID_LSI_LOGIC, 0x005b, quirk_blacklist_vpd);
-DECLARE_PCI_FIXUP_FINAL(PCI_VENDOR_ID_LSI_LOGIC, 0x002f, quirk_blacklist_vpd);
-DECLARE_PCI_FIXUP_FINAL(PCI_VENDOR_ID_LSI_LOGIC, 0x005d, quirk_blacklist_vpd);
-DECLARE_PCI_FIXUP_FINAL(PCI_VENDOR_ID_LSI_LOGIC, 0x005f, quirk_blacklist_vpd);
-DECLARE_PCI_FIXUP_FINAL(PCI_VENDOR_ID_ATTANSIC, PCI_ANY_ID,
-		quirk_blacklist_vpd);
+DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_LSI_LOGIC, 0x0060, quirk_blacklist_vpd);
+DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_LSI_LOGIC, 0x007c, quirk_blacklist_vpd);
+DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_LSI_LOGIC, 0x0413, quirk_blacklist_vpd);
+DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_LSI_LOGIC, 0x0078, quirk_blacklist_vpd);
+DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_LSI_LOGIC, 0x0079, quirk_blacklist_vpd);
+DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_LSI_LOGIC, 0x0073, quirk_blacklist_vpd);
+DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_LSI_LOGIC, 0x0071, quirk_blacklist_vpd);
+DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_LSI_LOGIC, 0x005b, quirk_blacklist_vpd);
+DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_LSI_LOGIC, 0x002f, quirk_blacklist_vpd);
+DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_LSI_LOGIC, 0x005d, quirk_blacklist_vpd);
+DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_LSI_LOGIC, 0x005f, quirk_blacklist_vpd);
+DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_ATTANSIC, PCI_ANY_ID, quirk_blacklist_vpd);
 /*
  * The Amazon Annapurna Labs 0x0031 device id is reused for other non Root Port
  * device types, so the quirk is registered for the PCI_CLASS_BRIDGE_PCI class.
  */
-DECLARE_PCI_FIXUP_CLASS_FINAL(PCI_VENDOR_ID_AMAZON_ANNAPURNA_LABS, 0x0031,
-			      PCI_CLASS_BRIDGE_PCI, 8, quirk_blacklist_vpd);
-
-static void pci_vpd_set_size(struct pci_dev *dev, size_t len)
-{
-	struct pci_vpd *vpd = dev->vpd;
-
-	if (!vpd || len == 0 || len > PCI_VPD_MAX_SIZE)
-		return;
-
-	vpd->valid = 1;
-	vpd->len = len;
-}
+DECLARE_PCI_FIXUP_CLASS_HEADER(PCI_VENDOR_ID_AMAZON_ANNAPURNA_LABS, 0x0031,
+			       PCI_CLASS_BRIDGE_PCI, 8, quirk_blacklist_vpd);
 
 static void quirk_chelsio_extend_vpd(struct pci_dev *dev)
 {
@@ -522,12 +540,12 @@ static void quirk_chelsio_extend_vpd(struct pci_dev *dev)
 	 * limits.
 	 */
 	if (chip == 0x0 && prod >= 0x20)
-		pci_vpd_set_size(dev, 8192);
+		dev->vpd.len = 8192;
 	else if (chip >= 0x4 && func < 0x8)
-		pci_vpd_set_size(dev, 2048);
+		dev->vpd.len = 2048;
 }
 
-DECLARE_PCI_FIXUP_FINAL(PCI_VENDOR_ID_CHELSIO, PCI_ANY_ID,
-			quirk_chelsio_extend_vpd);
+DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_CHELSIO, PCI_ANY_ID,
+			 quirk_chelsio_extend_vpd);
 
 #endif
diff --git a/drivers/pci/xen-pcifront.c b/drivers/pci/xen-pcifront.c
index b7a8f3a1921f..2156c632524d 100644
--- a/drivers/pci/xen-pcifront.c
+++ b/drivers/pci/xen-pcifront.c
@@ -115,7 +115,7 @@ static int do_pci_op(struct pcifront_device *pdev, struct xen_pci_op *op)
 	struct xen_pci_op *active_op = &pdev->sh_info->op;
 	unsigned long irq_flags;
 	evtchn_port_t port = pdev->evtchn;
-	unsigned irq = pdev->irq;
+	unsigned int irq = pdev->irq;
 	s64 ns, ns_timeout;
 
 	spin_lock_irqsave(&pdev->sh_info_lock, irq_flags);
@@ -153,10 +153,10 @@ static int do_pci_op(struct pcifront_device *pdev, struct xen_pci_op *op)
 	}
 
 	/*
-	* We might lose backend service request since we
-	* reuse same evtchn with pci_conf backend response. So re-schedule
-	* aer pcifront service.
-	*/
+	 * We might lose backend service request since we
+	 * reuse same evtchn with pci_conf backend response. So re-schedule
+	 * aer pcifront service.
+	 */
 	if (test_bit(_XEN_PCIB_active,
 			(unsigned long *)&pdev->sh_info->flags)) {
 		dev_err(&pdev->xdev->dev,
@@ -414,7 +414,8 @@ static int pcifront_scan_bus(struct pcifront_device *pdev,
 	struct pci_dev *d;
 	unsigned int devfn;
 
-	/* Scan the bus for functions and add.
+	/*
+	 * Scan the bus for functions and add.
 	 * We omit handling of PCI bridge attachment because pciback prevents
 	 * bridges from being exported.
 	 */
@@ -492,8 +493,10 @@ static int pcifront_scan_root(struct pcifront_device *pdev,
 
 	list_add(&bus_entry->list, &pdev->root_buses);
 
-	/* pci_scan_root_bus skips devices which do not have a
-	* devfn==0. The pcifront_scan_bus enumerates all devfn. */
+	/*
+	 * pci_scan_root_bus skips devices which do not have a
+	 * devfn==0. The pcifront_scan_bus enumerates all devfn.
+	 */
 	err = pcifront_scan_bus(pdev, domain, bus, b);
 
 	/* Claim resources before going "live" with our devices */
@@ -651,8 +654,10 @@ static void pcifront_do_aer(struct work_struct *data)
 	pci_channel_state_t state =
 		(pci_channel_state_t)pdev->sh_info->aer_op.err;
 
-	/*If a pci_conf op is in progress,
-		we have to wait until it is done before service aer op*/
+	/*
+	 * If a pci_conf op is in progress, we have to wait until it is done
+	 * before service aer op
+	 */
 	dev_dbg(&pdev->xdev->dev,
 		"pcifront service aer bus %x devfn %x\n",
 		pdev->sh_info->aer_op.bus, pdev->sh_info->aer_op.devfn);
@@ -676,6 +681,7 @@ static void pcifront_do_aer(struct work_struct *data)
 static irqreturn_t pcifront_handler_aer(int irq, void *dev)
 {
 	struct pcifront_device *pdev = dev;
+
 	schedule_pcifront_aer_op(pdev);
 	return IRQ_HANDLED;
 }
@@ -693,7 +699,7 @@ static int pcifront_connect_and_init_dma(struct pcifront_device *pdev)
 
 	spin_unlock(&pcifront_dev_lock);
 
-	if (!err && !is_swiotlb_active()) {
+	if (!err && !is_swiotlb_active(&pdev->xdev->dev)) {
 		err = pci_xen_swiotlb_init_late();
 		if (err)
 			dev_err(&pdev->xdev->dev, "Could not setup SWIOTLB!\n");
@@ -1027,6 +1033,7 @@ static int pcifront_detach_devices(struct pcifront_device *pdev)
 	/* Find devices being detached and remove them. */
 	for (i = 0; i < num_devs; i++) {
 		int l, state;
+
 		l = snprintf(str, sizeof(str), "state-%d", i);
 		if (unlikely(l >= (sizeof(str) - 1))) {
 			err = -ENOMEM;
@@ -1078,7 +1085,7 @@ out:
 	return err;
 }
 
-static void __ref pcifront_backend_changed(struct xenbus_device *xdev,
+static void pcifront_backend_changed(struct xenbus_device *xdev,
 						  enum xenbus_state be_state)
 {
 	struct pcifront_device *pdev = dev_get_drvdata(&xdev->dev);
@@ -1137,6 +1144,7 @@ out:
 static int pcifront_xenbus_remove(struct xenbus_device *xdev)
 {
 	struct pcifront_device *pdev = dev_get_drvdata(&xdev->dev);
+
 	if (pdev)
 		free_pdev(pdev);
 
diff --git a/drivers/phy/Kconfig b/drivers/phy/Kconfig
index 7dd35f1b9cc5..82b63e60c5a2 100644
--- a/drivers/phy/Kconfig
+++ b/drivers/phy/Kconfig
@@ -37,7 +37,7 @@ config PHY_LPC18XX_USB_OTG
 
 config PHY_PISTACHIO_USB
 	tristate "IMG Pistachio USB2.0 PHY driver"
-	depends on MACH_PISTACHIO
+	depends on MIPS || COMPILE_TEST
 	select GENERIC_PHY
 	help
 	  Enable this to support the USB2.0 PHY on the IMG Pistachio SoC.
diff --git a/drivers/phy/st/phy-stm32-usbphyc.c b/drivers/phy/st/phy-stm32-usbphyc.c
index 3e491dfb2525..937a14fa7448 100644
--- a/drivers/phy/st/phy-stm32-usbphyc.c
+++ b/drivers/phy/st/phy-stm32-usbphyc.c
@@ -15,6 +15,7 @@
 #include <linux/of_platform.h>
 #include <linux/phy/phy.h>
 #include <linux/reset.h>
+#include <linux/units.h>
 
 #define STM32_USBPHYC_PLL	0x0
 #define STM32_USBPHYC_MISC	0x8
@@ -47,7 +48,6 @@
 #define PLL_FVCO_MHZ		2880
 #define PLL_INFF_MIN_RATE_HZ	19200000
 #define PLL_INFF_MAX_RATE_HZ	38400000
-#define HZ_PER_MHZ		1000000L
 
 struct pll_params {
 	u8 ndiv;
diff --git a/drivers/pinctrl/Kconfig b/drivers/pinctrl/Kconfig
index f38f12801f18..31921108e456 100644
--- a/drivers/pinctrl/Kconfig
+++ b/drivers/pinctrl/Kconfig
@@ -248,12 +248,15 @@ config PINCTRL_SX150X
 	  - 16 bits: sx1509q, sx1506q
 
 config PINCTRL_PISTACHIO
-	def_bool y if MACH_PISTACHIO
+	bool "IMG Pistachio SoC pinctrl driver"
+	depends on OF && (MIPS || COMPILE_TEST)
 	depends on GPIOLIB
 	select PINMUX
 	select GENERIC_PINCONF
 	select GPIOLIB_IRQCHIP
 	select OF_GPIO
+    help
+	  This support pinctrl and gpio driver for IMG Pistachio SoC.
 
 config PINCTRL_ST
 	bool
@@ -404,6 +407,25 @@ config PINCTRL_K210
 	  Add support for the Canaan Kendryte K210 RISC-V SOC Field
 	  Programmable IO Array (FPIOA) controller.
 
+config PINCTRL_KEEMBAY
+	tristate "Pinctrl driver for Intel Keem Bay SoC"
+	depends on ARCH_KEEMBAY || (ARM64 && COMPILE_TEST)
+	depends on HAS_IOMEM
+	select PINMUX
+	select PINCONF
+	select GENERIC_PINCONF
+	select GENERIC_PINCTRL_GROUPS
+	select GENERIC_PINMUX_FUNCTIONS
+	select GPIOLIB
+	select GPIOLIB_IRQCHIP
+	select GPIO_GENERIC
+	help
+	  This selects pin control driver for the Intel Keembay SoC.
+	  It provides pin config functions such as pullup, pulldown,
+	  interrupt, drive strength, sec lock, schmitt trigger, slew
+	  rate control and direction control. This module will be
+	  called as pinctrl-keembay.
+
 source "drivers/pinctrl/actions/Kconfig"
 source "drivers/pinctrl/aspeed/Kconfig"
 source "drivers/pinctrl/bcm/Kconfig"
diff --git a/drivers/pinctrl/Makefile b/drivers/pinctrl/Makefile
index 5ef5334a797f..200073bcc2c1 100644
--- a/drivers/pinctrl/Makefile
+++ b/drivers/pinctrl/Makefile
@@ -47,6 +47,7 @@ obj-$(CONFIG_PINCTRL_OCELOT)	+= pinctrl-ocelot.o
 obj-$(CONFIG_PINCTRL_MICROCHIP_SGPIO)	+= pinctrl-microchip-sgpio.o
 obj-$(CONFIG_PINCTRL_EQUILIBRIUM)   += pinctrl-equilibrium.o
 obj-$(CONFIG_PINCTRL_K210)	+= pinctrl-k210.o
+obj-$(CONFIG_PINCTRL_KEEMBAY)	+= pinctrl-keembay.o
 
 obj-y				+= actions/
 obj-$(CONFIG_ARCH_ASPEED)	+= aspeed/
diff --git a/drivers/pinctrl/aspeed/pinctrl-aspeed.c b/drivers/pinctrl/aspeed/pinctrl-aspeed.c
index 9bbfe5c14b36..c94e24aadf92 100644
--- a/drivers/pinctrl/aspeed/pinctrl-aspeed.c
+++ b/drivers/pinctrl/aspeed/pinctrl-aspeed.c
@@ -133,8 +133,8 @@ static int aspeed_disable_sig(struct aspeed_pinmux_data *ctx,
 }
 
 /**
- * Search for the signal expression needed to enable the pin's signal for the
- * requested function.
+ * aspeed_find_expr_by_name - Search for the signal expression needed to
+ * enable the pin's signal for the requested function.
  *
  * @exprs: List of signal expressions (haystack)
  * @name: The name of the requested function (needle)
diff --git a/drivers/pinctrl/aspeed/pinmux-aspeed.c b/drivers/pinctrl/aspeed/pinmux-aspeed.c
index 894e2efd3be7..4aa46383c2c5 100644
--- a/drivers/pinctrl/aspeed/pinmux-aspeed.c
+++ b/drivers/pinctrl/aspeed/pinmux-aspeed.c
@@ -59,7 +59,8 @@ int aspeed_sig_desc_eval(const struct aspeed_sig_desc *desc,
 }
 
 /**
- * Query the enabled or disabled state for a mux function's signal on a pin
+ * aspeed_sig_expr_eval - Query the enabled or disabled state for a
+ * mux function's signal on a pin
  *
  * @ctx: The driver context for the pinctrl IP
  * @expr: An expression controlling the signal for a mux function on a pin
diff --git a/drivers/pinctrl/aspeed/pinmux-aspeed.h b/drivers/pinctrl/aspeed/pinmux-aspeed.h
index b69ba6b360a2..4d7548686f39 100644
--- a/drivers/pinctrl/aspeed/pinmux-aspeed.h
+++ b/drivers/pinctrl/aspeed/pinmux-aspeed.h
@@ -5,7 +5,6 @@
 #define ASPEED_PINMUX_H
 
 #include <linux/regmap.h>
-#include <stdbool.h>
 
 /*
  * The ASPEED SoCs provide typically more than 200 pins for GPIO and other
diff --git a/drivers/pinctrl/bcm/pinctrl-bcm2835.c b/drivers/pinctrl/bcm/pinctrl-bcm2835.c
index 8b34d2c308c7..6e6fefeb21ea 100644
--- a/drivers/pinctrl/bcm/pinctrl-bcm2835.c
+++ b/drivers/pinctrl/bcm/pinctrl-bcm2835.c
@@ -416,8 +416,7 @@ static void bcm2835_gpio_irq_handler(struct irq_desc *desc)
 		}
 	}
 	/* This should not happen, every IRQ has a bank */
-	if (i == BCM2835_NUM_IRQS)
-		BUG();
+	BUG_ON(i == BCM2835_NUM_IRQS);
 
 	chained_irq_enter(host_chip, desc);
 
diff --git a/drivers/pinctrl/freescale/Kconfig b/drivers/pinctrl/freescale/Kconfig
index f294336430cc..21fa21c6547b 100644
--- a/drivers/pinctrl/freescale/Kconfig
+++ b/drivers/pinctrl/freescale/Kconfig
@@ -166,6 +166,13 @@ config PINCTRL_IMX8DXL
 	help
 	  Say Y here to enable the imx8dxl pinctrl driver
 
+config PINCTRL_IMX8ULP
+	tristate "IMX8ULP pinctrl driver"
+	depends on ARCH_MXC
+	select PINCTRL_IMX
+	help
+	  Say Y here to enable the imx8ulp pinctrl driver
+
 config PINCTRL_VF610
 	bool "Freescale Vybrid VF610 pinctrl driver"
 	depends on SOC_VF610
diff --git a/drivers/pinctrl/freescale/Makefile b/drivers/pinctrl/freescale/Makefile
index e476cb671037..c44930b1b362 100644
--- a/drivers/pinctrl/freescale/Makefile
+++ b/drivers/pinctrl/freescale/Makefile
@@ -24,6 +24,7 @@ obj-$(CONFIG_PINCTRL_IMX8MQ)	+= pinctrl-imx8mq.o
 obj-$(CONFIG_PINCTRL_IMX8QM)	+= pinctrl-imx8qm.o
 obj-$(CONFIG_PINCTRL_IMX8QXP)	+= pinctrl-imx8qxp.o
 obj-$(CONFIG_PINCTRL_IMX8DXL)	+= pinctrl-imx8dxl.o
+obj-$(CONFIG_PINCTRL_IMX8ULP)	+= pinctrl-imx8ulp.o
 obj-$(CONFIG_PINCTRL_VF610)	+= pinctrl-vf610.o
 obj-$(CONFIG_PINCTRL_MXS)	+= pinctrl-mxs.o
 obj-$(CONFIG_PINCTRL_IMX23)	+= pinctrl-imx23.o
diff --git a/drivers/pinctrl/freescale/pinctrl-imx8dxl.c b/drivers/pinctrl/freescale/pinctrl-imx8dxl.c
index 041455c13d0d..f947b1d0d1aa 100644
--- a/drivers/pinctrl/freescale/pinctrl-imx8dxl.c
+++ b/drivers/pinctrl/freescale/pinctrl-imx8dxl.c
@@ -155,7 +155,7 @@ static const struct pinctrl_pin_desc imx8dxl_pinctrl_pads[] = {
 };
 
 
-static struct imx_pinctrl_soc_info imx8dxl_pinctrl_info = {
+static const struct imx_pinctrl_soc_info imx8dxl_pinctrl_info = {
 	.pins = imx8dxl_pinctrl_pads,
 	.npins = ARRAY_SIZE(imx8dxl_pinctrl_pads),
 	.flags = IMX_USE_SCU,
diff --git a/drivers/pinctrl/freescale/pinctrl-imx8mn.c b/drivers/pinctrl/freescale/pinctrl-imx8mn.c
index 448a79eb4568..dbf89cfba477 100644
--- a/drivers/pinctrl/freescale/pinctrl-imx8mn.c
+++ b/drivers/pinctrl/freescale/pinctrl-imx8mn.c
@@ -317,7 +317,7 @@ static const struct pinctrl_pin_desc imx8mn_pinctrl_pads[] = {
 	IMX_PINCTRL_PIN(MX8MN_IOMUXC_UART4_TXD),
 };
 
-static struct imx_pinctrl_soc_info imx8mn_pinctrl_info = {
+static const struct imx_pinctrl_soc_info imx8mn_pinctrl_info = {
 	.pins = imx8mn_pinctrl_pads,
 	.npins = ARRAY_SIZE(imx8mn_pinctrl_pads),
 	.gpr_compatible = "fsl,imx8mn-iomuxc-gpr",
diff --git a/drivers/pinctrl/freescale/pinctrl-imx8qxp.c b/drivers/pinctrl/freescale/pinctrl-imx8qxp.c
index 4f97813ba8b7..0a0acc0038d0 100644
--- a/drivers/pinctrl/freescale/pinctrl-imx8qxp.c
+++ b/drivers/pinctrl/freescale/pinctrl-imx8qxp.c
@@ -194,7 +194,7 @@ static const struct pinctrl_pin_desc imx8qxp_pinctrl_pads[] = {
 	IMX_PINCTRL_PIN(IMX8QXP_COMP_CTL_GPIO_1V8_3V3_QSPI0B),
 };
 
-static struct imx_pinctrl_soc_info imx8qxp_pinctrl_info = {
+static const struct imx_pinctrl_soc_info imx8qxp_pinctrl_info = {
 	.pins = imx8qxp_pinctrl_pads,
 	.npins = ARRAY_SIZE(imx8qxp_pinctrl_pads),
 	.flags = IMX_USE_SCU,
diff --git a/drivers/pinctrl/freescale/pinctrl-imx8ulp.c b/drivers/pinctrl/freescale/pinctrl-imx8ulp.c
new file mode 100644
index 000000000000..f8572597a54e
--- /dev/null
+++ b/drivers/pinctrl/freescale/pinctrl-imx8ulp.c
@@ -0,0 +1,278 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Copyright 2021 NXP
+ */
+
+#include <linux/err.h>
+#include <linux/init.h>
+#include <linux/io.h>
+#include <linux/module.h>
+#include <linux/of.h>
+#include <linux/of_device.h>
+#include <linux/pinctrl/pinctrl.h>
+
+#include "pinctrl-imx.h"
+
+enum imx8ulp_pads {
+	IMX8ULP_PAD_PTD0 = 0,
+	IMX8ULP_PAD_PTD1,
+	IMX8ULP_PAD_PTD2,
+	IMX8ULP_PAD_PTD3,
+	IMX8ULP_PAD_PTD4,
+	IMX8ULP_PAD_PTD5,
+	IMX8ULP_PAD_PTD6,
+	IMX8ULP_PAD_PTD7,
+	IMX8ULP_PAD_PTD8,
+	IMX8ULP_PAD_PTD9,
+	IMX8ULP_PAD_PTD10,
+	IMX8ULP_PAD_PTD11,
+	IMX8ULP_PAD_PTD12,
+	IMX8ULP_PAD_PTD13,
+	IMX8ULP_PAD_PTD14,
+	IMX8ULP_PAD_PTD15,
+	IMX8ULP_PAD_PTD16,
+	IMX8ULP_PAD_PTD17,
+	IMX8ULP_PAD_PTD18,
+	IMX8ULP_PAD_PTD19,
+	IMX8ULP_PAD_PTD20,
+	IMX8ULP_PAD_PTD21,
+	IMX8ULP_PAD_PTD22,
+	IMX8ULP_PAD_PTD23,
+	IMX8ULP_PAD_RESERVE0,
+	IMX8ULP_PAD_RESERVE1,
+	IMX8ULP_PAD_RESERVE2,
+	IMX8ULP_PAD_RESERVE3,
+	IMX8ULP_PAD_RESERVE4,
+	IMX8ULP_PAD_RESERVE5,
+	IMX8ULP_PAD_RESERVE6,
+	IMX8ULP_PAD_RESERVE7,
+	IMX8ULP_PAD_PTE0,
+	IMX8ULP_PAD_PTE1,
+	IMX8ULP_PAD_PTE2,
+	IMX8ULP_PAD_PTE3,
+	IMX8ULP_PAD_PTE4,
+	IMX8ULP_PAD_PTE5,
+	IMX8ULP_PAD_PTE6,
+	IMX8ULP_PAD_PTE7,
+	IMX8ULP_PAD_PTE8,
+	IMX8ULP_PAD_PTE9,
+	IMX8ULP_PAD_PTE10,
+	IMX8ULP_PAD_PTE11,
+	IMX8ULP_PAD_PTE12,
+	IMX8ULP_PAD_PTE13,
+	IMX8ULP_PAD_PTE14,
+	IMX8ULP_PAD_PTE15,
+	IMX8ULP_PAD_PTE16,
+	IMX8ULP_PAD_PTE17,
+	IMX8ULP_PAD_PTE18,
+	IMX8ULP_PAD_PTE19,
+	IMX8ULP_PAD_PTE20,
+	IMX8ULP_PAD_PTE21,
+	IMX8ULP_PAD_PTE22,
+	IMX8ULP_PAD_PTE23,
+	IMX8ULP_PAD_RESERVE8,
+	IMX8ULP_PAD_RESERVE9,
+	IMX8ULP_PAD_RESERVE10,
+	IMX8ULP_PAD_RESERVE11,
+	IMX8ULP_PAD_RESERVE12,
+	IMX8ULP_PAD_RESERVE13,
+	IMX8ULP_PAD_RESERVE14,
+	IMX8ULP_PAD_RESERVE15,
+	IMX8ULP_PAD_PTF0,
+	IMX8ULP_PAD_PTF1,
+	IMX8ULP_PAD_PTF2,
+	IMX8ULP_PAD_PTF3,
+	IMX8ULP_PAD_PTF4,
+	IMX8ULP_PAD_PTF5,
+	IMX8ULP_PAD_PTF6,
+	IMX8ULP_PAD_PTF7,
+	IMX8ULP_PAD_PTF8,
+	IMX8ULP_PAD_PTF9,
+	IMX8ULP_PAD_PTF10,
+	IMX8ULP_PAD_PTF11,
+	IMX8ULP_PAD_PTF12,
+	IMX8ULP_PAD_PTF13,
+	IMX8ULP_PAD_PTF14,
+	IMX8ULP_PAD_PTF15,
+	IMX8ULP_PAD_PTF16,
+	IMX8ULP_PAD_PTF17,
+	IMX8ULP_PAD_PTF18,
+	IMX8ULP_PAD_PTF19,
+	IMX8ULP_PAD_PTF20,
+	IMX8ULP_PAD_PTF21,
+	IMX8ULP_PAD_PTF22,
+	IMX8ULP_PAD_PTF23,
+	IMX8ULP_PAD_PTF24,
+	IMX8ULP_PAD_PTF25,
+	IMX8ULP_PAD_PTF26,
+	IMX8ULP_PAD_PTF27,
+	IMX8ULP_PAD_PTF28,
+	IMX8ULP_PAD_PTF29,
+	IMX8ULP_PAD_PTF30,
+	IMX8ULP_PAD_PTF31,
+};
+
+/* Pad names for the pinmux subsystem */
+static const struct pinctrl_pin_desc imx8ulp_pinctrl_pads[] = {
+	IMX_PINCTRL_PIN(IMX8ULP_PAD_PTD0),
+	IMX_PINCTRL_PIN(IMX8ULP_PAD_PTD1),
+	IMX_PINCTRL_PIN(IMX8ULP_PAD_PTD2),
+	IMX_PINCTRL_PIN(IMX8ULP_PAD_PTD3),
+	IMX_PINCTRL_PIN(IMX8ULP_PAD_PTD4),
+	IMX_PINCTRL_PIN(IMX8ULP_PAD_PTD5),
+	IMX_PINCTRL_PIN(IMX8ULP_PAD_PTD6),
+	IMX_PINCTRL_PIN(IMX8ULP_PAD_PTD7),
+	IMX_PINCTRL_PIN(IMX8ULP_PAD_PTD8),
+	IMX_PINCTRL_PIN(IMX8ULP_PAD_PTD9),
+	IMX_PINCTRL_PIN(IMX8ULP_PAD_PTD10),
+	IMX_PINCTRL_PIN(IMX8ULP_PAD_PTD11),
+	IMX_PINCTRL_PIN(IMX8ULP_PAD_PTD12),
+	IMX_PINCTRL_PIN(IMX8ULP_PAD_PTD13),
+	IMX_PINCTRL_PIN(IMX8ULP_PAD_PTD14),
+	IMX_PINCTRL_PIN(IMX8ULP_PAD_PTD15),
+	IMX_PINCTRL_PIN(IMX8ULP_PAD_PTD16),
+	IMX_PINCTRL_PIN(IMX8ULP_PAD_PTD17),
+	IMX_PINCTRL_PIN(IMX8ULP_PAD_PTD18),
+	IMX_PINCTRL_PIN(IMX8ULP_PAD_PTD19),
+	IMX_PINCTRL_PIN(IMX8ULP_PAD_PTD20),
+	IMX_PINCTRL_PIN(IMX8ULP_PAD_PTD21),
+	IMX_PINCTRL_PIN(IMX8ULP_PAD_PTD22),
+	IMX_PINCTRL_PIN(IMX8ULP_PAD_PTD23),
+	IMX_PINCTRL_PIN(IMX8ULP_PAD_RESERVE0),
+	IMX_PINCTRL_PIN(IMX8ULP_PAD_RESERVE1),
+	IMX_PINCTRL_PIN(IMX8ULP_PAD_RESERVE2),
+	IMX_PINCTRL_PIN(IMX8ULP_PAD_RESERVE3),
+	IMX_PINCTRL_PIN(IMX8ULP_PAD_RESERVE4),
+	IMX_PINCTRL_PIN(IMX8ULP_PAD_RESERVE5),
+	IMX_PINCTRL_PIN(IMX8ULP_PAD_RESERVE6),
+	IMX_PINCTRL_PIN(IMX8ULP_PAD_RESERVE7),
+	IMX_PINCTRL_PIN(IMX8ULP_PAD_PTE0),
+	IMX_PINCTRL_PIN(IMX8ULP_PAD_PTE1),
+	IMX_PINCTRL_PIN(IMX8ULP_PAD_PTE2),
+	IMX_PINCTRL_PIN(IMX8ULP_PAD_PTE3),
+	IMX_PINCTRL_PIN(IMX8ULP_PAD_PTE4),
+	IMX_PINCTRL_PIN(IMX8ULP_PAD_PTE5),
+	IMX_PINCTRL_PIN(IMX8ULP_PAD_PTE6),
+	IMX_PINCTRL_PIN(IMX8ULP_PAD_PTE7),
+	IMX_PINCTRL_PIN(IMX8ULP_PAD_PTE8),
+	IMX_PINCTRL_PIN(IMX8ULP_PAD_PTE9),
+	IMX_PINCTRL_PIN(IMX8ULP_PAD_PTE10),
+	IMX_PINCTRL_PIN(IMX8ULP_PAD_PTE11),
+	IMX_PINCTRL_PIN(IMX8ULP_PAD_PTE12),
+	IMX_PINCTRL_PIN(IMX8ULP_PAD_PTE13),
+	IMX_PINCTRL_PIN(IMX8ULP_PAD_PTE14),
+	IMX_PINCTRL_PIN(IMX8ULP_PAD_PTE15),
+	IMX_PINCTRL_PIN(IMX8ULP_PAD_PTE16),
+	IMX_PINCTRL_PIN(IMX8ULP_PAD_PTE17),
+	IMX_PINCTRL_PIN(IMX8ULP_PAD_PTE18),
+	IMX_PINCTRL_PIN(IMX8ULP_PAD_PTE19),
+	IMX_PINCTRL_PIN(IMX8ULP_PAD_PTE20),
+	IMX_PINCTRL_PIN(IMX8ULP_PAD_PTE21),
+	IMX_PINCTRL_PIN(IMX8ULP_PAD_PTE22),
+	IMX_PINCTRL_PIN(IMX8ULP_PAD_PTE23),
+	IMX_PINCTRL_PIN(IMX8ULP_PAD_RESERVE8),
+	IMX_PINCTRL_PIN(IMX8ULP_PAD_RESERVE9),
+	IMX_PINCTRL_PIN(IMX8ULP_PAD_RESERVE10),
+	IMX_PINCTRL_PIN(IMX8ULP_PAD_RESERVE11),
+	IMX_PINCTRL_PIN(IMX8ULP_PAD_RESERVE12),
+	IMX_PINCTRL_PIN(IMX8ULP_PAD_RESERVE13),
+	IMX_PINCTRL_PIN(IMX8ULP_PAD_RESERVE14),
+	IMX_PINCTRL_PIN(IMX8ULP_PAD_RESERVE15),
+	IMX_PINCTRL_PIN(IMX8ULP_PAD_PTF0),
+	IMX_PINCTRL_PIN(IMX8ULP_PAD_PTF1),
+	IMX_PINCTRL_PIN(IMX8ULP_PAD_PTF2),
+	IMX_PINCTRL_PIN(IMX8ULP_PAD_PTF3),
+	IMX_PINCTRL_PIN(IMX8ULP_PAD_PTF4),
+	IMX_PINCTRL_PIN(IMX8ULP_PAD_PTF5),
+	IMX_PINCTRL_PIN(IMX8ULP_PAD_PTF6),
+	IMX_PINCTRL_PIN(IMX8ULP_PAD_PTF7),
+	IMX_PINCTRL_PIN(IMX8ULP_PAD_PTF8),
+	IMX_PINCTRL_PIN(IMX8ULP_PAD_PTF9),
+	IMX_PINCTRL_PIN(IMX8ULP_PAD_PTF10),
+	IMX_PINCTRL_PIN(IMX8ULP_PAD_PTF11),
+	IMX_PINCTRL_PIN(IMX8ULP_PAD_PTF12),
+	IMX_PINCTRL_PIN(IMX8ULP_PAD_PTF13),
+	IMX_PINCTRL_PIN(IMX8ULP_PAD_PTF14),
+	IMX_PINCTRL_PIN(IMX8ULP_PAD_PTF15),
+	IMX_PINCTRL_PIN(IMX8ULP_PAD_PTF16),
+	IMX_PINCTRL_PIN(IMX8ULP_PAD_PTF17),
+	IMX_PINCTRL_PIN(IMX8ULP_PAD_PTF18),
+	IMX_PINCTRL_PIN(IMX8ULP_PAD_PTF19),
+	IMX_PINCTRL_PIN(IMX8ULP_PAD_PTF20),
+	IMX_PINCTRL_PIN(IMX8ULP_PAD_PTF21),
+	IMX_PINCTRL_PIN(IMX8ULP_PAD_PTF22),
+	IMX_PINCTRL_PIN(IMX8ULP_PAD_PTF23),
+	IMX_PINCTRL_PIN(IMX8ULP_PAD_PTF24),
+	IMX_PINCTRL_PIN(IMX8ULP_PAD_PTF25),
+	IMX_PINCTRL_PIN(IMX8ULP_PAD_PTF26),
+	IMX_PINCTRL_PIN(IMX8ULP_PAD_PTF27),
+	IMX_PINCTRL_PIN(IMX8ULP_PAD_PTF28),
+	IMX_PINCTRL_PIN(IMX8ULP_PAD_PTF29),
+	IMX_PINCTRL_PIN(IMX8ULP_PAD_PTF30),
+	IMX_PINCTRL_PIN(IMX8ULP_PAD_PTF31),
+};
+
+#define BM_OBE_ENABLED		BIT(17)
+#define BM_IBE_ENABLED		BIT(16)
+#define BM_MUX_MODE		0xf00
+#define BP_MUX_MODE		8
+
+static int imx8ulp_pmx_gpio_set_direction(struct pinctrl_dev *pctldev,
+					  struct pinctrl_gpio_range *range,
+					  unsigned offset, bool input)
+{
+	struct imx_pinctrl *ipctl = pinctrl_dev_get_drvdata(pctldev);
+	const struct imx_pin_reg *pin_reg;
+	u32 reg;
+
+	pin_reg = &ipctl->pin_regs[offset];
+	if (pin_reg->mux_reg == -1)
+		return -EINVAL;
+
+	reg = readl(ipctl->base + pin_reg->mux_reg);
+	if (input)
+		reg = (reg & ~BM_OBE_ENABLED) | BM_IBE_ENABLED;
+	else
+		reg = (reg & ~BM_IBE_ENABLED) | BM_OBE_ENABLED;
+	writel(reg, ipctl->base + pin_reg->mux_reg);
+
+	return 0;
+}
+
+static const struct imx_pinctrl_soc_info imx8ulp_pinctrl_info = {
+	.pins = imx8ulp_pinctrl_pads,
+	.npins = ARRAY_SIZE(imx8ulp_pinctrl_pads),
+	.flags = ZERO_OFFSET_VALID | SHARE_MUX_CONF_REG,
+	.gpio_set_direction = imx8ulp_pmx_gpio_set_direction,
+	.mux_mask = BM_MUX_MODE,
+	.mux_shift = BP_MUX_MODE,
+};
+
+static const struct of_device_id imx8ulp_pinctrl_of_match[] = {
+	{ .compatible = "fsl,imx8ulp-iomuxc1", },
+	{ /* sentinel */ }
+};
+
+static int imx8ulp_pinctrl_probe(struct platform_device *pdev)
+{
+	return imx_pinctrl_probe(pdev, &imx8ulp_pinctrl_info);
+}
+
+static struct platform_driver imx8ulp_pinctrl_driver = {
+	.driver = {
+		.name = "imx8ulp-pinctrl",
+		.of_match_table = imx8ulp_pinctrl_of_match,
+		.suppress_bind_attrs = true,
+	},
+	.probe = imx8ulp_pinctrl_probe,
+};
+
+static int __init imx8ulp_pinctrl_init(void)
+{
+	return platform_driver_register(&imx8ulp_pinctrl_driver);
+}
+arch_initcall(imx8ulp_pinctrl_init);
+
+MODULE_AUTHOR("Jacky Bai <ping.bai@nxp.com>");
+MODULE_DESCRIPTION("NXP i.MX8ULP pinctrl driver");
+MODULE_LICENSE("GPL v2");
diff --git a/drivers/pinctrl/mediatek/pinctrl-mt8365.c b/drivers/pinctrl/mediatek/pinctrl-mt8365.c
index 22c33c3cb581..79b1fee5a1eb 100644
--- a/drivers/pinctrl/mediatek/pinctrl-mt8365.c
+++ b/drivers/pinctrl/mediatek/pinctrl-mt8365.c
@@ -485,7 +485,6 @@ static struct platform_driver mtk_pinctrl_driver = {
 	.probe = mtk_pinctrl_probe,
 	.driver = {
 		.name = "mediatek-mt8365-pinctrl",
-		.owner = THIS_MODULE,
 		.of_match_table = mt8365_pctrl_match,
 		.pm = &mtk_eint_pm_ops,
 	},
diff --git a/drivers/pinctrl/mvebu/pinctrl-armada-37xx.c b/drivers/pinctrl/mvebu/pinctrl-armada-37xx.c
index 5a68e242f6b3..5cb018f98800 100644
--- a/drivers/pinctrl/mvebu/pinctrl-armada-37xx.c
+++ b/drivers/pinctrl/mvebu/pinctrl-armada-37xx.c
@@ -167,10 +167,14 @@ static struct armada_37xx_pin_group armada_37xx_nb_groups[] = {
 	PIN_GRP_GPIO("jtag", 20, 5, BIT(0), "jtag"),
 	PIN_GRP_GPIO("sdio0", 8, 3, BIT(1), "sdio"),
 	PIN_GRP_GPIO("emmc_nb", 27, 9, BIT(2), "emmc"),
-	PIN_GRP_GPIO("pwm0", 11, 1, BIT(3), "pwm"),
-	PIN_GRP_GPIO("pwm1", 12, 1, BIT(4), "pwm"),
-	PIN_GRP_GPIO("pwm2", 13, 1, BIT(5), "pwm"),
-	PIN_GRP_GPIO("pwm3", 14, 1, BIT(6), "pwm"),
+	PIN_GRP_GPIO_3("pwm0", 11, 1, BIT(3) | BIT(20), 0, BIT(20), BIT(3),
+		       "pwm", "led"),
+	PIN_GRP_GPIO_3("pwm1", 12, 1, BIT(4) | BIT(21), 0, BIT(21), BIT(4),
+		       "pwm", "led"),
+	PIN_GRP_GPIO_3("pwm2", 13, 1, BIT(5) | BIT(22), 0, BIT(22), BIT(5),
+		       "pwm", "led"),
+	PIN_GRP_GPIO_3("pwm3", 14, 1, BIT(6) | BIT(23), 0, BIT(23), BIT(6),
+		       "pwm", "led"),
 	PIN_GRP_GPIO("pmic1", 7, 1, BIT(7), "pmic"),
 	PIN_GRP_GPIO("pmic0", 6, 1, BIT(8), "pmic"),
 	PIN_GRP_GPIO("i2c2", 2, 2, BIT(9), "i2c"),
@@ -184,10 +188,6 @@ static struct armada_37xx_pin_group armada_37xx_nb_groups[] = {
 	PIN_GRP_EXTRA("uart2", 9, 2, BIT(1) | BIT(13) | BIT(14) | BIT(19),
 		      BIT(1) | BIT(13) | BIT(14), BIT(1) | BIT(19),
 		      18, 2, "gpio", "uart"),
-	PIN_GRP_GPIO_2("led0_od", 11, 1, BIT(20), BIT(20), 0, "led"),
-	PIN_GRP_GPIO_2("led1_od", 12, 1, BIT(21), BIT(21), 0, "led"),
-	PIN_GRP_GPIO_2("led2_od", 13, 1, BIT(22), BIT(22), 0, "led"),
-	PIN_GRP_GPIO_2("led3_od", 14, 1, BIT(23), BIT(23), 0, "led"),
 };
 
 static struct armada_37xx_pin_group armada_37xx_sb_groups[] = {
diff --git a/drivers/pinctrl/pinctrl-ingenic.c b/drivers/pinctrl/pinctrl-ingenic.c
index ce9cc719c395..2712f51eb238 100644
--- a/drivers/pinctrl/pinctrl-ingenic.c
+++ b/drivers/pinctrl/pinctrl-ingenic.c
@@ -104,6 +104,7 @@ enum jz_version {
 	ID_X1500,
 	ID_X1830,
 	ID_X2000,
+	ID_X2100,
 };
 
 struct ingenic_chip_info {
@@ -589,6 +590,18 @@ static int jz4755_uart0_data_pins[] = { 0x7c, 0x7d, };
 static int jz4755_uart0_hwflow_pins[] = { 0x7e, 0x7f, };
 static int jz4755_uart1_data_pins[] = { 0x97, 0x99, };
 static int jz4755_uart2_data_pins[] = { 0x9f, };
+static int jz4755_ssi_dt_b_pins[] = { 0x3b, };
+static int jz4755_ssi_dt_f_pins[] = { 0xa1, };
+static int jz4755_ssi_dr_b_pins[] = { 0x3c, };
+static int jz4755_ssi_dr_f_pins[] = { 0xa2, };
+static int jz4755_ssi_clk_b_pins[] = { 0x3a, };
+static int jz4755_ssi_clk_f_pins[] = { 0xa0, };
+static int jz4755_ssi_gpc_b_pins[] = { 0x3e, };
+static int jz4755_ssi_gpc_f_pins[] = { 0xa4, };
+static int jz4755_ssi_ce0_b_pins[] = { 0x3d, };
+static int jz4755_ssi_ce0_f_pins[] = { 0xa3, };
+static int jz4755_ssi_ce1_b_pins[] = { 0x3f, };
+static int jz4755_ssi_ce1_f_pins[] = { 0xa5, };
 static int jz4755_mmc0_1bit_pins[] = { 0x2f, 0x50, 0x5c, };
 static int jz4755_mmc0_4bit_pins[] = { 0x5d, 0x5b, 0x51, };
 static int jz4755_mmc1_1bit_pins[] = { 0x3a, 0x3d, 0x3c, };
@@ -630,6 +643,18 @@ static const struct group_desc jz4755_groups[] = {
 	INGENIC_PIN_GROUP("uart0-hwflow", jz4755_uart0_hwflow, 0),
 	INGENIC_PIN_GROUP("uart1-data", jz4755_uart1_data, 0),
 	INGENIC_PIN_GROUP("uart2-data", jz4755_uart2_data, 1),
+	INGENIC_PIN_GROUP("ssi-dt-b", jz4755_ssi_dt_b, 0),
+	INGENIC_PIN_GROUP("ssi-dt-f", jz4755_ssi_dt_f, 0),
+	INGENIC_PIN_GROUP("ssi-dr-b", jz4755_ssi_dr_b, 0),
+	INGENIC_PIN_GROUP("ssi-dr-f", jz4755_ssi_dr_f, 0),
+	INGENIC_PIN_GROUP("ssi-clk-b", jz4755_ssi_clk_b, 0),
+	INGENIC_PIN_GROUP("ssi-clk-f", jz4755_ssi_clk_f, 0),
+	INGENIC_PIN_GROUP("ssi-gpc-b", jz4755_ssi_gpc_b, 0),
+	INGENIC_PIN_GROUP("ssi-gpc-f", jz4755_ssi_gpc_f, 0),
+	INGENIC_PIN_GROUP("ssi-ce0-b", jz4755_ssi_ce0_b, 0),
+	INGENIC_PIN_GROUP("ssi-ce0-f", jz4755_ssi_ce0_f, 0),
+	INGENIC_PIN_GROUP("ssi-ce1-b", jz4755_ssi_ce1_b, 0),
+	INGENIC_PIN_GROUP("ssi-ce1-f", jz4755_ssi_ce1_f, 0),
 	INGENIC_PIN_GROUP_FUNCS("mmc0-1bit", jz4755_mmc0_1bit,
 				jz4755_mmc0_1bit_funcs),
 	INGENIC_PIN_GROUP_FUNCS("mmc0-4bit", jz4755_mmc0_4bit,
@@ -661,6 +686,14 @@ static const struct group_desc jz4755_groups[] = {
 static const char *jz4755_uart0_groups[] = { "uart0-data", "uart0-hwflow", };
 static const char *jz4755_uart1_groups[] = { "uart1-data", };
 static const char *jz4755_uart2_groups[] = { "uart2-data", };
+static const char *jz4755_ssi_groups[] = {
+	"ssi-dt-b", "ssi-dt-f",
+	"ssi-dr-b", "ssi-dr-f",
+	"ssi-clk-b", "ssi-clk-f",
+	"ssi-gpc-b", "ssi-gpc-f",
+	"ssi-ce0-b", "ssi-ce0-f",
+	"ssi-ce1-b", "ssi-ce1-f",
+};
 static const char *jz4755_mmc0_groups[] = { "mmc0-1bit", "mmc0-4bit", };
 static const char *jz4755_mmc1_groups[] = { "mmc0-1bit", "mmc0-4bit", };
 static const char *jz4755_i2c_groups[] = { "i2c-data", };
@@ -683,6 +716,7 @@ static const struct function_desc jz4755_functions[] = {
 	{ "uart0", jz4755_uart0_groups, ARRAY_SIZE(jz4755_uart0_groups), },
 	{ "uart1", jz4755_uart1_groups, ARRAY_SIZE(jz4755_uart1_groups), },
 	{ "uart2", jz4755_uart2_groups, ARRAY_SIZE(jz4755_uart2_groups), },
+	{ "ssi", jz4755_ssi_groups, ARRAY_SIZE(jz4755_ssi_groups), },
 	{ "mmc0", jz4755_mmc0_groups, ARRAY_SIZE(jz4755_mmc0_groups), },
 	{ "mmc1", jz4755_mmc1_groups, ARRAY_SIZE(jz4755_mmc1_groups), },
 	{ "i2c", jz4755_i2c_groups, ARRAY_SIZE(jz4755_i2c_groups), },
@@ -710,7 +744,7 @@ static const struct ingenic_chip_info jz4755_chip_info = {
 };
 
 static const u32 jz4760_pull_ups[6] = {
-	0xffffffff, 0xfffcf3ff, 0xffffffff, 0xffffcfff, 0xfffffb7c, 0xfffff00f,
+	0xffffffff, 0xfffcf3ff, 0xffffffff, 0xffffcfff, 0xfffffb7c, 0x0000000f,
 };
 
 static const u32 jz4760_pull_downs[6] = {
@@ -725,6 +759,58 @@ static int jz4760_uart2_data_pins[] = { 0x5c, 0x5e, };
 static int jz4760_uart2_hwflow_pins[] = { 0x5d, 0x5f, };
 static int jz4760_uart3_data_pins[] = { 0x6c, 0x85, };
 static int jz4760_uart3_hwflow_pins[] = { 0x88, 0x89, };
+static int jz4760_ssi0_dt_a_pins[] = { 0x15, };
+static int jz4760_ssi0_dt_b_pins[] = { 0x35, };
+static int jz4760_ssi0_dt_d_pins[] = { 0x75, };
+static int jz4760_ssi0_dt_e_pins[] = { 0x91, };
+static int jz4760_ssi0_dr_a_pins[] = { 0x14, };
+static int jz4760_ssi0_dr_b_pins[] = { 0x34, };
+static int jz4760_ssi0_dr_d_pins[] = { 0x74, };
+static int jz4760_ssi0_dr_e_pins[] = { 0x8e, };
+static int jz4760_ssi0_clk_a_pins[] = { 0x12, };
+static int jz4760_ssi0_clk_b_pins[] = { 0x3c, };
+static int jz4760_ssi0_clk_d_pins[] = { 0x78, };
+static int jz4760_ssi0_clk_e_pins[] = { 0x8f, };
+static int jz4760_ssi0_gpc_b_pins[] = { 0x3e, };
+static int jz4760_ssi0_gpc_d_pins[] = { 0x76, };
+static int jz4760_ssi0_gpc_e_pins[] = { 0x93, };
+static int jz4760_ssi0_ce0_a_pins[] = { 0x13, };
+static int jz4760_ssi0_ce0_b_pins[] = { 0x3d, };
+static int jz4760_ssi0_ce0_d_pins[] = { 0x79, };
+static int jz4760_ssi0_ce0_e_pins[] = { 0x90, };
+static int jz4760_ssi0_ce1_b_pins[] = { 0x3f, };
+static int jz4760_ssi0_ce1_d_pins[] = { 0x77, };
+static int jz4760_ssi0_ce1_e_pins[] = { 0x92, };
+static int jz4760_ssi1_dt_b_9_pins[] = { 0x29, };
+static int jz4760_ssi1_dt_b_21_pins[] = { 0x35, };
+static int jz4760_ssi1_dt_d_12_pins[] = { 0x6c, };
+static int jz4760_ssi1_dt_d_21_pins[] = { 0x75, };
+static int jz4760_ssi1_dt_e_pins[] = { 0x91, };
+static int jz4760_ssi1_dt_f_pins[] = { 0xa3, };
+static int jz4760_ssi1_dr_b_6_pins[] = { 0x26, };
+static int jz4760_ssi1_dr_b_20_pins[] = { 0x34, };
+static int jz4760_ssi1_dr_d_13_pins[] = { 0x6d, };
+static int jz4760_ssi1_dr_d_20_pins[] = { 0x74, };
+static int jz4760_ssi1_dr_e_pins[] = { 0x8e, };
+static int jz4760_ssi1_dr_f_pins[] = { 0xa0, };
+static int jz4760_ssi1_clk_b_7_pins[] = { 0x27, };
+static int jz4760_ssi1_clk_b_28_pins[] = { 0x3c, };
+static int jz4760_ssi1_clk_d_pins[] = { 0x78, };
+static int jz4760_ssi1_clk_e_7_pins[] = { 0x87, };
+static int jz4760_ssi1_clk_e_15_pins[] = { 0x8f, };
+static int jz4760_ssi1_clk_f_pins[] = { 0xa2, };
+static int jz4760_ssi1_gpc_b_pins[] = { 0x3e, };
+static int jz4760_ssi1_gpc_d_pins[] = { 0x76, };
+static int jz4760_ssi1_gpc_e_pins[] = { 0x93, };
+static int jz4760_ssi1_ce0_b_8_pins[] = { 0x28, };
+static int jz4760_ssi1_ce0_b_29_pins[] = { 0x3d, };
+static int jz4760_ssi1_ce0_d_pins[] = { 0x79, };
+static int jz4760_ssi1_ce0_e_6_pins[] = { 0x86, };
+static int jz4760_ssi1_ce0_e_16_pins[] = { 0x90, };
+static int jz4760_ssi1_ce0_f_pins[] = { 0xa1, };
+static int jz4760_ssi1_ce1_b_pins[] = { 0x3f, };
+static int jz4760_ssi1_ce1_d_pins[] = { 0x77, };
+static int jz4760_ssi1_ce1_e_pins[] = { 0x92, };
 static int jz4760_mmc0_1bit_a_pins[] = { 0x12, 0x13, 0x14, };
 static int jz4760_mmc0_4bit_a_pins[] = { 0x15, 0x16, 0x17, };
 static int jz4760_mmc0_1bit_e_pins[] = { 0x9c, 0x9d, 0x94, };
@@ -801,6 +887,58 @@ static const struct group_desc jz4760_groups[] = {
 	INGENIC_PIN_GROUP_FUNCS("uart3-data", jz4760_uart3_data,
 				jz4760_uart3_data_funcs),
 	INGENIC_PIN_GROUP("uart3-hwflow", jz4760_uart3_hwflow, 0),
+	INGENIC_PIN_GROUP("ssi0-dt-a", jz4760_ssi0_dt_a, 2),
+	INGENIC_PIN_GROUP("ssi0-dt-b", jz4760_ssi0_dt_b, 1),
+	INGENIC_PIN_GROUP("ssi0-dt-d", jz4760_ssi0_dt_d, 1),
+	INGENIC_PIN_GROUP("ssi0-dt-e", jz4760_ssi0_dt_e, 0),
+	INGENIC_PIN_GROUP("ssi0-dr-a", jz4760_ssi0_dr_a, 1),
+	INGENIC_PIN_GROUP("ssi0-dr-b", jz4760_ssi0_dr_b, 1),
+	INGENIC_PIN_GROUP("ssi0-dr-d", jz4760_ssi0_dr_d, 1),
+	INGENIC_PIN_GROUP("ssi0-dr-e", jz4760_ssi0_dr_e, 0),
+	INGENIC_PIN_GROUP("ssi0-clk-a", jz4760_ssi0_clk_a, 2),
+	INGENIC_PIN_GROUP("ssi0-clk-b", jz4760_ssi0_clk_b, 1),
+	INGENIC_PIN_GROUP("ssi0-clk-d", jz4760_ssi0_clk_d, 1),
+	INGENIC_PIN_GROUP("ssi0-clk-e", jz4760_ssi0_clk_e, 0),
+	INGENIC_PIN_GROUP("ssi0-gpc-b", jz4760_ssi0_gpc_b, 1),
+	INGENIC_PIN_GROUP("ssi0-gpc-d", jz4760_ssi0_gpc_d, 1),
+	INGENIC_PIN_GROUP("ssi0-gpc-e", jz4760_ssi0_gpc_e, 0),
+	INGENIC_PIN_GROUP("ssi0-ce0-a", jz4760_ssi0_ce0_a, 2),
+	INGENIC_PIN_GROUP("ssi0-ce0-b", jz4760_ssi0_ce0_b, 1),
+	INGENIC_PIN_GROUP("ssi0-ce0-d", jz4760_ssi0_ce0_d, 1),
+	INGENIC_PIN_GROUP("ssi0-ce0-e", jz4760_ssi0_ce0_e, 0),
+	INGENIC_PIN_GROUP("ssi0-ce1-b", jz4760_ssi0_ce1_b, 1),
+	INGENIC_PIN_GROUP("ssi0-ce1-d", jz4760_ssi0_ce1_d, 1),
+	INGENIC_PIN_GROUP("ssi0-ce1-e", jz4760_ssi0_ce1_e, 0),
+	INGENIC_PIN_GROUP("ssi1-dt-b-9", jz4760_ssi1_dt_b_9, 2),
+	INGENIC_PIN_GROUP("ssi1-dt-b-21", jz4760_ssi1_dt_b_21, 2),
+	INGENIC_PIN_GROUP("ssi1-dt-d-12", jz4760_ssi1_dt_d_12, 2),
+	INGENIC_PIN_GROUP("ssi1-dt-d-21", jz4760_ssi1_dt_d_21, 2),
+	INGENIC_PIN_GROUP("ssi1-dt-e", jz4760_ssi1_dt_e, 1),
+	INGENIC_PIN_GROUP("ssi1-dt-f", jz4760_ssi1_dt_f, 2),
+	INGENIC_PIN_GROUP("ssi1-dr-b-6", jz4760_ssi1_dr_b_6, 2),
+	INGENIC_PIN_GROUP("ssi1-dr-b-20", jz4760_ssi1_dr_b_20, 2),
+	INGENIC_PIN_GROUP("ssi1-dr-d-13", jz4760_ssi1_dr_d_13, 2),
+	INGENIC_PIN_GROUP("ssi1-dr-d-20", jz4760_ssi1_dr_d_20, 2),
+	INGENIC_PIN_GROUP("ssi1-dr-e", jz4760_ssi1_dr_e, 1),
+	INGENIC_PIN_GROUP("ssi1-dr-f", jz4760_ssi1_dr_f, 2),
+	INGENIC_PIN_GROUP("ssi1-clk-b-7", jz4760_ssi1_clk_b_7, 2),
+	INGENIC_PIN_GROUP("ssi1-clk-b-28", jz4760_ssi1_clk_b_28, 2),
+	INGENIC_PIN_GROUP("ssi1-clk-d", jz4760_ssi1_clk_d, 2),
+	INGENIC_PIN_GROUP("ssi1-clk-e-7", jz4760_ssi1_clk_e_7, 2),
+	INGENIC_PIN_GROUP("ssi1-clk-e-15", jz4760_ssi1_clk_e_15, 1),
+	INGENIC_PIN_GROUP("ssi1-clk-f", jz4760_ssi1_clk_f, 2),
+	INGENIC_PIN_GROUP("ssi1-gpc-b", jz4760_ssi1_gpc_b, 2),
+	INGENIC_PIN_GROUP("ssi1-gpc-d", jz4760_ssi1_gpc_d, 2),
+	INGENIC_PIN_GROUP("ssi1-gpc-e", jz4760_ssi1_gpc_e, 1),
+	INGENIC_PIN_GROUP("ssi1-ce0-b-8", jz4760_ssi1_ce0_b_8, 2),
+	INGENIC_PIN_GROUP("ssi1-ce0-b-29", jz4760_ssi1_ce0_b_29, 2),
+	INGENIC_PIN_GROUP("ssi1-ce0-d", jz4760_ssi1_ce0_d, 2),
+	INGENIC_PIN_GROUP("ssi1-ce0-e-6", jz4760_ssi1_ce0_e_6, 2),
+	INGENIC_PIN_GROUP("ssi1-ce0-e-16", jz4760_ssi1_ce0_e_16, 1),
+	INGENIC_PIN_GROUP("ssi1-ce0-f", jz4760_ssi1_ce0_f, 2),
+	INGENIC_PIN_GROUP("ssi1-ce1-b", jz4760_ssi1_ce1_b, 2),
+	INGENIC_PIN_GROUP("ssi1-ce1-d", jz4760_ssi1_ce1_d, 2),
+	INGENIC_PIN_GROUP("ssi1-ce1-e", jz4760_ssi1_ce1_e, 1),
 	INGENIC_PIN_GROUP_FUNCS("mmc0-1bit-a", jz4760_mmc0_1bit_a,
 				jz4760_mmc0_1bit_a_funcs),
 	INGENIC_PIN_GROUP("mmc0-4bit-a", jz4760_mmc0_4bit_a, 1),
@@ -854,6 +992,22 @@ static const char *jz4760_uart0_groups[] = { "uart0-data", "uart0-hwflow", };
 static const char *jz4760_uart1_groups[] = { "uart1-data", "uart1-hwflow", };
 static const char *jz4760_uart2_groups[] = { "uart2-data", "uart2-hwflow", };
 static const char *jz4760_uart3_groups[] = { "uart3-data", "uart3-hwflow", };
+static const char *jz4760_ssi0_groups[] = {
+	"ssi0-dt-a", "ssi0-dt-b", "ssi0-dt-d", "ssi0-dt-e",
+	"ssi0-dr-a", "ssi0-dr-b", "ssi0-dr-d", "ssi0-dr-e",
+	"ssi0-clk-a", "ssi0-clk-b", "ssi0-clk-d", "ssi0-clk-e",
+	"ssi0-gpc-b", "ssi0-gpc-d", "ssi0-gpc-e",
+	"ssi0-ce0-a", "ssi0-ce0-b", "ssi0-ce0-d", "ssi0-ce0-e",
+	"ssi0-ce1-b", "ssi0-ce1-d", "ssi0-ce1-e",
+};
+static const char *jz4760_ssi1_groups[] = {
+	"ssi1-dt-b-9", "ssi1-dt-b-21", "ssi1-dt-d-12", "ssi1-dt-d-21", "ssi1-dt-e", "ssi1-dt-f",
+	"ssi1-dr-b-6", "ssi1-dr-b-20", "ssi1-dr-d-13", "ssi1-dr-d-20", "ssi1-dr-e", "ssi1-dr-f",
+	"ssi1-clk-b-7", "ssi1-clk-b-28", "ssi1-clk-d", "ssi1-clk-e-7", "ssi1-clk-e-15", "ssi1-clk-f",
+	"ssi1-gpc-b", "ssi1-gpc-d", "ssi1-gpc-e",
+	"ssi1-ce0-b-8", "ssi1-ce0-b-29", "ssi1-ce0-d", "ssi1-ce0-e-6", "ssi1-ce0-e-16", "ssi1-ce0-f",
+	"ssi1-ce1-b", "ssi1-ce1-d", "ssi1-ce1-e",
+};
 static const char *jz4760_mmc0_groups[] = {
 	"mmc0-1bit-a", "mmc0-4bit-a",
 	"mmc0-1bit-e", "mmc0-4bit-e", "mmc0-8bit-e",
@@ -898,6 +1052,8 @@ static const struct function_desc jz4760_functions[] = {
 	{ "uart1", jz4760_uart1_groups, ARRAY_SIZE(jz4760_uart1_groups), },
 	{ "uart2", jz4760_uart2_groups, ARRAY_SIZE(jz4760_uart2_groups), },
 	{ "uart3", jz4760_uart3_groups, ARRAY_SIZE(jz4760_uart3_groups), },
+	{ "ssi0", jz4760_ssi0_groups, ARRAY_SIZE(jz4760_ssi0_groups), },
+	{ "ssi1", jz4760_ssi1_groups, ARRAY_SIZE(jz4760_ssi1_groups), },
 	{ "mmc0", jz4760_mmc0_groups, ARRAY_SIZE(jz4760_mmc0_groups), },
 	{ "mmc1", jz4760_mmc1_groups, ARRAY_SIZE(jz4760_mmc1_groups), },
 	{ "mmc2", jz4760_mmc2_groups, ARRAY_SIZE(jz4760_mmc2_groups), },
@@ -936,11 +1092,11 @@ static const struct ingenic_chip_info jz4760_chip_info = {
 };
 
 static const u32 jz4770_pull_ups[6] = {
-	0x3fffffff, 0xfff0030c, 0xffffffff, 0xffff4fff, 0xfffffb7c, 0xffa7f00f,
+	0x3fffffff, 0xfff0f3fc, 0xffffffff, 0xffff4fff, 0xfffffb7c, 0x0024f00f,
 };
 
 static const u32 jz4770_pull_downs[6] = {
-	0x00000000, 0x000f0c03, 0x00000000, 0x0000b000, 0x00000483, 0x00580ff0,
+	0x00000000, 0x000f0c03, 0x00000000, 0x0000b000, 0x00000483, 0x005b0ff0,
 };
 
 static int jz4770_uart0_data_pins[] = { 0xa0, 0xa3, };
@@ -1827,7 +1983,9 @@ static int x1000_uart1_data_d_pins[] = { 0x62, 0x63, };
 static int x1000_uart1_hwflow_pins[] = { 0x64, 0x65, };
 static int x1000_uart2_data_a_pins[] = { 0x02, 0x03, };
 static int x1000_uart2_data_d_pins[] = { 0x65, 0x64, };
-static int x1000_sfc_pins[] = { 0x1d, 0x1c, 0x1e, 0x1f, 0x1a, 0x1b, };
+static int x1000_sfc_data_pins[] = { 0x1d, 0x1c, 0x1e, 0x1f, };
+static int x1000_sfc_clk_pins[] = { 0x1a, };
+static int x1000_sfc_ce_pins[] = { 0x1b, };
 static int x1000_ssi_dt_a_22_pins[] = { 0x16, };
 static int x1000_ssi_dt_a_29_pins[] = { 0x1d, };
 static int x1000_ssi_dt_d_pins[] = { 0x62, };
@@ -1871,8 +2029,8 @@ static int x1000_i2s_data_tx_pins[] = { 0x24, };
 static int x1000_i2s_data_rx_pins[] = { 0x23, };
 static int x1000_i2s_clk_txrx_pins[] = { 0x21, 0x22, };
 static int x1000_i2s_sysclk_pins[] = { 0x20, };
-static int x1000_dmic0_pins[] = { 0x35, 0x36, };
-static int x1000_dmic1_pins[] = { 0x25, };
+static int x1000_dmic_if0_pins[] = { 0x35, 0x36, };
+static int x1000_dmic_if1_pins[] = { 0x25, };
 static int x1000_cim_pins[] = {
 	0x08, 0x09, 0x0a, 0x0b,
 	0x13, 0x12, 0x11, 0x10, 0x0f, 0x0e, 0x0d, 0x0c,
@@ -1901,7 +2059,9 @@ static const struct group_desc x1000_groups[] = {
 	INGENIC_PIN_GROUP("uart1-hwflow", x1000_uart1_hwflow, 1),
 	INGENIC_PIN_GROUP("uart2-data-a", x1000_uart2_data_a, 2),
 	INGENIC_PIN_GROUP("uart2-data-d", x1000_uart2_data_d, 0),
-	INGENIC_PIN_GROUP("sfc", x1000_sfc, 1),
+	INGENIC_PIN_GROUP("sfc-data", x1000_sfc_data, 1),
+	INGENIC_PIN_GROUP("sfc-clk", x1000_sfc_clk, 1),
+	INGENIC_PIN_GROUP("sfc-ce", x1000_sfc_ce, 1),
 	INGENIC_PIN_GROUP("ssi-dt-a-22", x1000_ssi_dt_a_22, 2),
 	INGENIC_PIN_GROUP("ssi-dt-a-29", x1000_ssi_dt_a_29, 2),
 	INGENIC_PIN_GROUP("ssi-dt-d", x1000_ssi_dt_d, 0),
@@ -1938,8 +2098,8 @@ static const struct group_desc x1000_groups[] = {
 	INGENIC_PIN_GROUP("i2s-data-rx", x1000_i2s_data_rx, 1),
 	INGENIC_PIN_GROUP("i2s-clk-txrx", x1000_i2s_clk_txrx, 1),
 	INGENIC_PIN_GROUP("i2s-sysclk", x1000_i2s_sysclk, 1),
-	INGENIC_PIN_GROUP("dmic0", x1000_dmic0, 0),
-	INGENIC_PIN_GROUP("dmic1", x1000_dmic1, 1),
+	INGENIC_PIN_GROUP("dmic-if0", x1000_dmic_if0, 0),
+	INGENIC_PIN_GROUP("dmic-if1", x1000_dmic_if1, 1),
 	INGENIC_PIN_GROUP("cim-data", x1000_cim, 2),
 	INGENIC_PIN_GROUP("lcd-8bit", x1000_lcd_8bit, 1),
 	INGENIC_PIN_GROUP("lcd-16bit", x1000_lcd_16bit, 1),
@@ -1956,7 +2116,7 @@ static const char *x1000_uart1_groups[] = {
 	"uart1-data-a", "uart1-data-d", "uart1-hwflow",
 };
 static const char *x1000_uart2_groups[] = { "uart2-data-a", "uart2-data-d", };
-static const char *x1000_sfc_groups[] = { "sfc", };
+static const char *x1000_sfc_groups[] = { "sfc-data", "sfc-clk", "sfc-ce", };
 static const char *x1000_ssi_groups[] = {
 	"ssi-dt-a-22", "ssi-dt-a-29", "ssi-dt-d",
 	"ssi-dr-a-23", "ssi-dr-a-28", "ssi-dr-d",
@@ -1983,7 +2143,7 @@ static const char *x1000_i2c2_groups[] = { "i2c2-data", };
 static const char *x1000_i2s_groups[] = {
 	"i2s-data-tx", "i2s-data-rx", "i2s-clk-txrx", "i2s-sysclk",
 };
-static const char *x1000_dmic_groups[] = { "dmic0", "dmic1", };
+static const char *x1000_dmic_groups[] = { "dmic-if0", "dmic-if1", };
 static const char *x1000_cim_groups[] = { "cim-data", };
 static const char *x1000_lcd_groups[] = { "lcd-8bit", "lcd-16bit", };
 static const char *x1000_pwm0_groups[] = { "pwm0", };
@@ -2048,8 +2208,8 @@ static int x1500_i2s_data_tx_pins[] = { 0x24, };
 static int x1500_i2s_data_rx_pins[] = { 0x23, };
 static int x1500_i2s_clk_txrx_pins[] = { 0x21, 0x22, };
 static int x1500_i2s_sysclk_pins[] = { 0x20, };
-static int x1500_dmic0_pins[] = { 0x35, 0x36, };
-static int x1500_dmic1_pins[] = { 0x25, };
+static int x1500_dmic_if0_pins[] = { 0x35, 0x36, };
+static int x1500_dmic_if1_pins[] = { 0x25, };
 static int x1500_cim_pins[] = {
 	0x08, 0x09, 0x0a, 0x0b,
 	0x13, 0x12, 0x11, 0x10, 0x0f, 0x0e, 0x0d, 0x0c,
@@ -2068,7 +2228,9 @@ static const struct group_desc x1500_groups[] = {
 	INGENIC_PIN_GROUP("uart1-hwflow", x1500_uart1_hwflow, 1),
 	INGENIC_PIN_GROUP("uart2-data-a", x1500_uart2_data_a, 2),
 	INGENIC_PIN_GROUP("uart2-data-d", x1500_uart2_data_d, 0),
-	INGENIC_PIN_GROUP("sfc", x1000_sfc, 1),
+	INGENIC_PIN_GROUP("sfc-data", x1000_sfc_data, 1),
+	INGENIC_PIN_GROUP("sfc-clk", x1000_sfc_clk, 1),
+	INGENIC_PIN_GROUP("sfc-ce", x1000_sfc_ce, 1),
 	INGENIC_PIN_GROUP("mmc-1bit", x1500_mmc_1bit, 1),
 	INGENIC_PIN_GROUP("mmc-4bit", x1500_mmc_4bit, 1),
 	INGENIC_PIN_GROUP("i2c0-data", x1500_i2c0, 0),
@@ -2079,8 +2241,8 @@ static const struct group_desc x1500_groups[] = {
 	INGENIC_PIN_GROUP("i2s-data-rx", x1500_i2s_data_rx, 1),
 	INGENIC_PIN_GROUP("i2s-clk-txrx", x1500_i2s_clk_txrx, 1),
 	INGENIC_PIN_GROUP("i2s-sysclk", x1500_i2s_sysclk, 1),
-	INGENIC_PIN_GROUP("dmic0", x1500_dmic0, 0),
-	INGENIC_PIN_GROUP("dmic1", x1500_dmic1, 1),
+	INGENIC_PIN_GROUP("dmic-if0", x1500_dmic_if0, 0),
+	INGENIC_PIN_GROUP("dmic-if1", x1500_dmic_if1, 1),
 	INGENIC_PIN_GROUP("cim-data", x1500_cim, 2),
 	INGENIC_PIN_GROUP("pwm0", x1500_pwm_pwm0, 0),
 	INGENIC_PIN_GROUP("pwm1", x1500_pwm_pwm1, 1),
@@ -2101,7 +2263,7 @@ static const char *x1500_i2c2_groups[] = { "i2c2-data", };
 static const char *x1500_i2s_groups[] = {
 	"i2s-data-tx", "i2s-data-rx", "i2s-clk-txrx", "i2s-sysclk",
 };
-static const char *x1500_dmic_groups[] = { "dmic0", "dmic1", };
+static const char *x1500_dmic_groups[] = { "dmic-if0", "dmic-if1", };
 static const char *x1500_cim_groups[] = { "cim-data", };
 static const char *x1500_pwm0_groups[] = { "pwm0", };
 static const char *x1500_pwm1_groups[] = { "pwm1", };
@@ -2151,7 +2313,9 @@ static const u32 x1830_pull_downs[4] = {
 static int x1830_uart0_data_pins[] = { 0x33, 0x36, };
 static int x1830_uart0_hwflow_pins[] = { 0x34, 0x35, };
 static int x1830_uart1_data_pins[] = { 0x38, 0x37, };
-static int x1830_sfc_pins[] = { 0x17, 0x18, 0x1a, 0x19, 0x1b, 0x1c, };
+static int x1830_sfc_data_pins[] = { 0x17, 0x18, 0x1a, 0x19, };
+static int x1830_sfc_clk_pins[] = { 0x1b, };
+static int x1830_sfc_ce_pins[] = { 0x1c, };
 static int x1830_ssi0_dt_pins[] = { 0x4c, };
 static int x1830_ssi0_dr_pins[] = { 0x4b, };
 static int x1830_ssi0_clk_pins[] = { 0x4f, };
@@ -2182,8 +2346,8 @@ static int x1830_i2s_data_rx_pins[] = { 0x54, };
 static int x1830_i2s_clk_txrx_pins[] = { 0x58, 0x52, };
 static int x1830_i2s_clk_rx_pins[] = { 0x56, 0x55, };
 static int x1830_i2s_sysclk_pins[] = { 0x57, };
-static int x1830_dmic0_pins[] = { 0x48, 0x59, };
-static int x1830_dmic1_pins[] = { 0x5a, };
+static int x1830_dmic_if0_pins[] = { 0x48, 0x59, };
+static int x1830_dmic_if1_pins[] = { 0x5a, };
 static int x1830_lcd_tft_8bit_pins[] = {
 	0x62, 0x63, 0x64, 0x65, 0x66, 0x67,
 	0x68, 0x73, 0x72, 0x69,
@@ -2223,7 +2387,9 @@ static const struct group_desc x1830_groups[] = {
 	INGENIC_PIN_GROUP("uart0-data", x1830_uart0_data, 0),
 	INGENIC_PIN_GROUP("uart0-hwflow", x1830_uart0_hwflow, 0),
 	INGENIC_PIN_GROUP("uart1-data", x1830_uart1_data, 0),
-	INGENIC_PIN_GROUP("sfc", x1830_sfc, 1),
+	INGENIC_PIN_GROUP("sfc-data", x1830_sfc_data, 1),
+	INGENIC_PIN_GROUP("sfc-clk", x1830_sfc_clk, 1),
+	INGENIC_PIN_GROUP("sfc-ce", x1830_sfc_ce, 1),
 	INGENIC_PIN_GROUP("ssi0-dt", x1830_ssi0_dt, 0),
 	INGENIC_PIN_GROUP("ssi0-dr", x1830_ssi0_dr, 0),
 	INGENIC_PIN_GROUP("ssi0-clk", x1830_ssi0_clk, 0),
@@ -2254,8 +2420,8 @@ static const struct group_desc x1830_groups[] = {
 	INGENIC_PIN_GROUP("i2s-clk-txrx", x1830_i2s_clk_txrx, 0),
 	INGENIC_PIN_GROUP("i2s-clk-rx", x1830_i2s_clk_rx, 0),
 	INGENIC_PIN_GROUP("i2s-sysclk", x1830_i2s_sysclk, 0),
-	INGENIC_PIN_GROUP("dmic0", x1830_dmic0, 2),
-	INGENIC_PIN_GROUP("dmic1", x1830_dmic1, 2),
+	INGENIC_PIN_GROUP("dmic-if0", x1830_dmic_if0, 2),
+	INGENIC_PIN_GROUP("dmic-if1", x1830_dmic_if1, 2),
 	INGENIC_PIN_GROUP("lcd-tft-8bit", x1830_lcd_tft_8bit, 0),
 	INGENIC_PIN_GROUP("lcd-tft-24bit", x1830_lcd_tft_24bit, 0),
 	INGENIC_PIN_GROUP("lcd-slcd-8bit", x1830_lcd_slcd_8bit, 1),
@@ -2281,7 +2447,7 @@ static const struct group_desc x1830_groups[] = {
 
 static const char *x1830_uart0_groups[] = { "uart0-data", "uart0-hwflow", };
 static const char *x1830_uart1_groups[] = { "uart1-data", };
-static const char *x1830_sfc_groups[] = { "sfc", };
+static const char *x1830_sfc_groups[] = { "sfc-data", "sfc-clk", "sfc-ce", };
 static const char *x1830_ssi0_groups[] = {
 	"ssi0-dt", "ssi0-dr", "ssi0-clk", "ssi0-gpc", "ssi0-ce0", "ssi0-ce1",
 };
@@ -2301,7 +2467,7 @@ static const char *x1830_i2c2_groups[] = { "i2c2-data", };
 static const char *x1830_i2s_groups[] = {
 	"i2s-data-tx", "i2s-data-rx", "i2s-clk-txrx", "i2s-clk-rx", "i2s-sysclk",
 };
-static const char *x1830_dmic_groups[] = { "dmic0", "dmic1", };
+static const char *x1830_dmic_groups[] = { "dmic-if0", "dmic-if1", };
 static const char *x1830_lcd_groups[] = {
 	"lcd-tft-8bit", "lcd-tft-24bit", "lcd-slcd-8bit", "lcd-slcd-16bit",
 };
@@ -2381,17 +2547,21 @@ static int x2000_uart7_data_a_pins[] = { 0x08, 0x09, };
 static int x2000_uart7_data_c_pins[] = { 0x41, 0x42, };
 static int x2000_uart8_data_pins[] = { 0x3c, 0x3d, };
 static int x2000_uart9_data_pins[] = { 0x3e, 0x3f, };
-static int x2000_sfc0_d_pins[] = { 0x73, 0x74, 0x75, 0x76, 0x71, 0x72, };
-static int x2000_sfc0_e_pins[] = { 0x92, 0x93, 0x94, 0x95, 0x90, 0x91, };
-static int x2000_sfc1_pins[] = { 0x77, 0x78, 0x79, 0x7a, };
+static int x2000_sfc_data_if0_d_pins[] = { 0x73, 0x74, 0x75, 0x76, };
+static int x2000_sfc_data_if0_e_pins[] = { 0x92, 0x93, 0x94, 0x95, };
+static int x2000_sfc_data_if1_pins[] = { 0x77, 0x78, 0x79, 0x7a, };
+static int x2000_sfc_clk_d_pins[] = { 0x71, };
+static int x2000_sfc_clk_e_pins[] = { 0x90, };
+static int x2000_sfc_ce_d_pins[] = { 0x72, };
+static int x2000_sfc_ce_e_pins[] = { 0x91, };
 static int x2000_ssi0_dt_b_pins[] = { 0x3e, };
 static int x2000_ssi0_dt_d_pins[] = { 0x69, };
 static int x2000_ssi0_dr_b_pins[] = { 0x3d, };
 static int x2000_ssi0_dr_d_pins[] = { 0x6a, };
 static int x2000_ssi0_clk_b_pins[] = { 0x3f, };
 static int x2000_ssi0_clk_d_pins[] = { 0x68, };
-static int x2000_ssi0_ce0_b_pins[] = { 0x3c, };
-static int x2000_ssi0_ce0_d_pins[] = { 0x6d, };
+static int x2000_ssi0_ce_b_pins[] = { 0x3c, };
+static int x2000_ssi0_ce_d_pins[] = { 0x6d, };
 static int x2000_ssi1_dt_c_pins[] = { 0x4b, };
 static int x2000_ssi1_dt_d_pins[] = { 0x72, };
 static int x2000_ssi1_dt_e_pins[] = { 0x91, };
@@ -2401,9 +2571,9 @@ static int x2000_ssi1_dr_e_pins[] = { 0x92, };
 static int x2000_ssi1_clk_c_pins[] = { 0x4c, };
 static int x2000_ssi1_clk_d_pins[] = { 0x71, };
 static int x2000_ssi1_clk_e_pins[] = { 0x90, };
-static int x2000_ssi1_ce0_c_pins[] = { 0x49, };
-static int x2000_ssi1_ce0_d_pins[] = { 0x76, };
-static int x2000_ssi1_ce0_e_pins[] = { 0x95, };
+static int x2000_ssi1_ce_c_pins[] = { 0x49, };
+static int x2000_ssi1_ce_d_pins[] = { 0x76, };
+static int x2000_ssi1_ce_e_pins[] = { 0x95, };
 static int x2000_mmc0_1bit_pins[] = { 0x71, 0x72, 0x73, };
 static int x2000_mmc0_4bit_pins[] = { 0x74, 0x75, 0x75, };
 static int x2000_mmc0_8bit_pins[] = { 0x77, 0x78, 0x79, 0x7a, };
@@ -2455,10 +2625,10 @@ static int x2000_i2s3_data_tx2_pins[] = { 0x05, };
 static int x2000_i2s3_data_tx3_pins[] = { 0x06, };
 static int x2000_i2s3_clk_tx_pins[] = { 0x10, 0x02, };
 static int x2000_i2s3_sysclk_tx_pins[] = { 0x00, };
-static int x2000_dmic0_pins[] = { 0x54, 0x55, };
-static int x2000_dmic1_pins[] = { 0x56, };
-static int x2000_dmic2_pins[] = { 0x57, };
-static int x2000_dmic3_pins[] = { 0x58, };
+static int x2000_dmic_if0_pins[] = { 0x54, 0x55, };
+static int x2000_dmic_if1_pins[] = { 0x56, };
+static int x2000_dmic_if2_pins[] = { 0x57, };
+static int x2000_dmic_if3_pins[] = { 0x58, };
 static int x2000_cim_8bit_pins[] = {
 	0x0e, 0x0c, 0x0d, 0x4f,
 	0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07,
@@ -2545,17 +2715,21 @@ static const struct group_desc x2000_groups[] = {
 	INGENIC_PIN_GROUP("uart7-data-c", x2000_uart7_data_c, 3),
 	INGENIC_PIN_GROUP("uart8-data", x2000_uart8_data, 3),
 	INGENIC_PIN_GROUP("uart9-data", x2000_uart9_data, 3),
-	INGENIC_PIN_GROUP("sfc0-d", x2000_sfc0_d, 1),
-	INGENIC_PIN_GROUP("sfc0-e", x2000_sfc0_e, 0),
-	INGENIC_PIN_GROUP("sfc1", x2000_sfc1, 1),
+	INGENIC_PIN_GROUP("sfc-data-if0-d", x2000_sfc_data_if0_d, 1),
+	INGENIC_PIN_GROUP("sfc-data-if0-e", x2000_sfc_data_if0_e, 0),
+	INGENIC_PIN_GROUP("sfc-data-if1", x2000_sfc_data_if1, 1),
+	INGENIC_PIN_GROUP("sfc-clk-d", x2000_sfc_clk_d, 1),
+	INGENIC_PIN_GROUP("sfc-clk-e", x2000_sfc_clk_e, 0),
+	INGENIC_PIN_GROUP("sfc-ce-d", x2000_sfc_ce_d, 1),
+	INGENIC_PIN_GROUP("sfc-ce-e", x2000_sfc_ce_e, 0),
 	INGENIC_PIN_GROUP("ssi0-dt-b", x2000_ssi0_dt_b, 1),
 	INGENIC_PIN_GROUP("ssi0-dt-d", x2000_ssi0_dt_d, 1),
 	INGENIC_PIN_GROUP("ssi0-dr-b", x2000_ssi0_dr_b, 1),
 	INGENIC_PIN_GROUP("ssi0-dr-d", x2000_ssi0_dr_d, 1),
 	INGENIC_PIN_GROUP("ssi0-clk-b", x2000_ssi0_clk_b, 1),
 	INGENIC_PIN_GROUP("ssi0-clk-d", x2000_ssi0_clk_d, 1),
-	INGENIC_PIN_GROUP("ssi0-ce0-b", x2000_ssi0_ce0_b, 1),
-	INGENIC_PIN_GROUP("ssi0-ce0-d", x2000_ssi0_ce0_d, 1),
+	INGENIC_PIN_GROUP("ssi0-ce-b", x2000_ssi0_ce_b, 1),
+	INGENIC_PIN_GROUP("ssi0-ce-d", x2000_ssi0_ce_d, 1),
 	INGENIC_PIN_GROUP("ssi1-dt-c", x2000_ssi1_dt_c, 2),
 	INGENIC_PIN_GROUP("ssi1-dt-d", x2000_ssi1_dt_d, 2),
 	INGENIC_PIN_GROUP("ssi1-dt-e", x2000_ssi1_dt_e, 1),
@@ -2565,9 +2739,9 @@ static const struct group_desc x2000_groups[] = {
 	INGENIC_PIN_GROUP("ssi1-clk-c", x2000_ssi1_clk_c, 2),
 	INGENIC_PIN_GROUP("ssi1-clk-d", x2000_ssi1_clk_d, 2),
 	INGENIC_PIN_GROUP("ssi1-clk-e", x2000_ssi1_clk_e, 1),
-	INGENIC_PIN_GROUP("ssi1-ce0-c", x2000_ssi1_ce0_c, 2),
-	INGENIC_PIN_GROUP("ssi1-ce0-d", x2000_ssi1_ce0_d, 2),
-	INGENIC_PIN_GROUP("ssi1-ce0-e", x2000_ssi1_ce0_e, 1),
+	INGENIC_PIN_GROUP("ssi1-ce-c", x2000_ssi1_ce_c, 2),
+	INGENIC_PIN_GROUP("ssi1-ce-d", x2000_ssi1_ce_d, 2),
+	INGENIC_PIN_GROUP("ssi1-ce-e", x2000_ssi1_ce_e, 1),
 	INGENIC_PIN_GROUP("mmc0-1bit", x2000_mmc0_1bit, 0),
 	INGENIC_PIN_GROUP("mmc0-4bit", x2000_mmc0_4bit, 0),
 	INGENIC_PIN_GROUP("mmc0-8bit", x2000_mmc0_8bit, 0),
@@ -2612,10 +2786,10 @@ static const struct group_desc x2000_groups[] = {
 	INGENIC_PIN_GROUP("i2s3-data-tx3", x2000_i2s3_data_tx3, 2),
 	INGENIC_PIN_GROUP("i2s3-clk-tx", x2000_i2s3_clk_tx, 2),
 	INGENIC_PIN_GROUP("i2s3-sysclk-tx", x2000_i2s3_sysclk_tx, 2),
-	INGENIC_PIN_GROUP("dmic0", x2000_dmic0, 0),
-	INGENIC_PIN_GROUP("dmic1", x2000_dmic1, 0),
-	INGENIC_PIN_GROUP("dmic2", x2000_dmic2, 0),
-	INGENIC_PIN_GROUP("dmic3", x2000_dmic3, 0),
+	INGENIC_PIN_GROUP("dmic-if0", x2000_dmic_if0, 0),
+	INGENIC_PIN_GROUP("dmic-if1", x2000_dmic_if1, 0),
+	INGENIC_PIN_GROUP("dmic-if2", x2000_dmic_if2, 0),
+	INGENIC_PIN_GROUP("dmic-if3", x2000_dmic_if3, 0),
 	INGENIC_PIN_GROUP_FUNCS("cim-data-8bit", x2000_cim_8bit,
 				x2000_cim_8bit_funcs),
 	INGENIC_PIN_GROUP("cim-data-12bit", x2000_cim_12bit, 0),
@@ -2670,18 +2844,21 @@ static const char *x2000_uart6_groups[] = { "uart6-data-a", "uart6-data-c", };
 static const char *x2000_uart7_groups[] = { "uart7-data-a", "uart7-data-c", };
 static const char *x2000_uart8_groups[] = { "uart8-data", };
 static const char *x2000_uart9_groups[] = { "uart9-data", };
-static const char *x2000_sfc_groups[] = { "sfc0-d", "sfc0-e", "sfc1", };
+static const char *x2000_sfc_groups[] = {
+	"sfc-data-if0-d", "sfc-data-if0-e", "sfc-data-if1",
+	"sfc-clk-d", "sfc-clk-e", "sfc-ce-d", "sfc-ce-e",
+};
 static const char *x2000_ssi0_groups[] = {
 	"ssi0-dt-b", "ssi0-dt-d",
 	"ssi0-dr-b", "ssi0-dr-d",
 	"ssi0-clk-b", "ssi0-clk-d",
-	"ssi0-ce0-b", "ssi0-ce0-d",
+	"ssi0-ce-b", "ssi0-ce-d",
 };
 static const char *x2000_ssi1_groups[] = {
 	"ssi1-dt-c", "ssi1-dt-d", "ssi1-dt-e",
 	"ssi1-dr-c", "ssi1-dr-d", "ssi1-dr-e",
 	"ssi1-clk-c", "ssi1-clk-d", "ssi1-clk-e",
-	"ssi1-ce0-c", "ssi1-ce0-d", "ssi1-ce0-e",
+	"ssi1-ce-c", "ssi1-ce-d", "ssi1-ce-e",
 };
 static const char *x2000_mmc0_groups[] = { "mmc0-1bit", "mmc0-4bit", "mmc0-8bit", };
 static const char *x2000_mmc1_groups[] = { "mmc1-1bit", "mmc1-4bit", };
@@ -2711,7 +2888,9 @@ static const char *x2000_i2s3_groups[] = {
 	"i2s3-data-tx0", "i2s3-data-tx1", "i2s3-data-tx2", "i2s3-data-tx3",
 	"i2s3-clk-tx", "i2s3-sysclk-tx",
 };
-static const char *x2000_dmic_groups[] = { "dmic0", "dmic1", "dmic2", "dmic3", };
+static const char *x2000_dmic_groups[] = {
+	"dmic-if0", "dmic-if1", "dmic-if2", "dmic-if3",
+};
 static const char *x2000_cim_groups[] = { "cim-data-8bit", "cim-data-12bit", };
 static const char *x2000_lcd_groups[] = {
 	"lcd-tft-8bit", "lcd-tft-16bit", "lcd-tft-18bit", "lcd-tft-24bit",
@@ -2802,6 +2981,216 @@ static const struct ingenic_chip_info x2000_chip_info = {
 	.pull_downs = x2000_pull_downs,
 };
 
+static const u32 x2100_pull_ups[5] = {
+	0x0003ffff, 0xffffffff, 0x1ff0ffff, 0xc7fe3f3f, 0x0fbf003f,
+};
+
+static const u32 x2100_pull_downs[5] = {
+	0x0003ffff, 0xffffffff, 0x1ff0ffff, 0x00000000, 0x0fbf003f,
+};
+
+static int x2100_mac_pins[] = {
+	0x4b, 0x47, 0x46, 0x4a, 0x43, 0x42, 0x4c, 0x4d, 0x4f, 0x41,
+};
+
+static const struct group_desc x2100_groups[] = {
+	INGENIC_PIN_GROUP("uart0-data", x2000_uart0_data, 2),
+	INGENIC_PIN_GROUP("uart0-hwflow", x2000_uart0_hwflow, 2),
+	INGENIC_PIN_GROUP("uart1-data", x2000_uart1_data, 1),
+	INGENIC_PIN_GROUP("uart1-hwflow", x2000_uart1_hwflow, 1),
+	INGENIC_PIN_GROUP("uart2-data", x2000_uart2_data, 0),
+	INGENIC_PIN_GROUP("uart3-data-c", x2000_uart3_data_c, 0),
+	INGENIC_PIN_GROUP("uart3-data-d", x2000_uart3_data_d, 1),
+	INGENIC_PIN_GROUP("uart3-hwflow-c", x2000_uart3_hwflow_c, 0),
+	INGENIC_PIN_GROUP("uart3-hwflow-d", x2000_uart3_hwflow_d, 1),
+	INGENIC_PIN_GROUP("uart4-data-a", x2000_uart4_data_a, 1),
+	INGENIC_PIN_GROUP("uart4-data-c", x2000_uart4_data_c, 3),
+	INGENIC_PIN_GROUP("uart4-hwflow-a", x2000_uart4_hwflow_a, 1),
+	INGENIC_PIN_GROUP("uart4-hwflow-c", x2000_uart4_hwflow_c, 3),
+	INGENIC_PIN_GROUP("uart5-data-a", x2000_uart5_data_a, 1),
+	INGENIC_PIN_GROUP("uart5-data-c", x2000_uart5_data_c, 3),
+	INGENIC_PIN_GROUP("uart6-data-a", x2000_uart6_data_a, 1),
+	INGENIC_PIN_GROUP("uart6-data-c", x2000_uart6_data_c, 3),
+	INGENIC_PIN_GROUP("uart7-data-a", x2000_uart7_data_a, 1),
+	INGENIC_PIN_GROUP("uart7-data-c", x2000_uart7_data_c, 3),
+	INGENIC_PIN_GROUP("uart8-data", x2000_uart8_data, 3),
+	INGENIC_PIN_GROUP("uart9-data", x2000_uart9_data, 3),
+	INGENIC_PIN_GROUP("sfc-data-if0-d", x2000_sfc_data_if0_d, 1),
+	INGENIC_PIN_GROUP("sfc-data-if0-e", x2000_sfc_data_if0_e, 0),
+	INGENIC_PIN_GROUP("sfc-data-if1", x2000_sfc_data_if1, 1),
+	INGENIC_PIN_GROUP("sfc-clk-d", x2000_sfc_clk_d, 1),
+	INGENIC_PIN_GROUP("sfc-clk-e", x2000_sfc_clk_e, 0),
+	INGENIC_PIN_GROUP("sfc-ce-d", x2000_sfc_ce_d, 1),
+	INGENIC_PIN_GROUP("sfc-ce-e", x2000_sfc_ce_e, 0),
+	INGENIC_PIN_GROUP("ssi0-dt-b", x2000_ssi0_dt_b, 1),
+	INGENIC_PIN_GROUP("ssi0-dt-d", x2000_ssi0_dt_d, 1),
+	INGENIC_PIN_GROUP("ssi0-dr-b", x2000_ssi0_dr_b, 1),
+	INGENIC_PIN_GROUP("ssi0-dr-d", x2000_ssi0_dr_d, 1),
+	INGENIC_PIN_GROUP("ssi0-clk-b", x2000_ssi0_clk_b, 1),
+	INGENIC_PIN_GROUP("ssi0-clk-d", x2000_ssi0_clk_d, 1),
+	INGENIC_PIN_GROUP("ssi0-ce-b", x2000_ssi0_ce_b, 1),
+	INGENIC_PIN_GROUP("ssi0-ce-d", x2000_ssi0_ce_d, 1),
+	INGENIC_PIN_GROUP("ssi1-dt-c", x2000_ssi1_dt_c, 2),
+	INGENIC_PIN_GROUP("ssi1-dt-d", x2000_ssi1_dt_d, 2),
+	INGENIC_PIN_GROUP("ssi1-dt-e", x2000_ssi1_dt_e, 1),
+	INGENIC_PIN_GROUP("ssi1-dr-c", x2000_ssi1_dr_c, 2),
+	INGENIC_PIN_GROUP("ssi1-dr-d", x2000_ssi1_dr_d, 2),
+	INGENIC_PIN_GROUP("ssi1-dr-e", x2000_ssi1_dr_e, 1),
+	INGENIC_PIN_GROUP("ssi1-clk-c", x2000_ssi1_clk_c, 2),
+	INGENIC_PIN_GROUP("ssi1-clk-d", x2000_ssi1_clk_d, 2),
+	INGENIC_PIN_GROUP("ssi1-clk-e", x2000_ssi1_clk_e, 1),
+	INGENIC_PIN_GROUP("ssi1-ce-c", x2000_ssi1_ce_c, 2),
+	INGENIC_PIN_GROUP("ssi1-ce-d", x2000_ssi1_ce_d, 2),
+	INGENIC_PIN_GROUP("ssi1-ce-e", x2000_ssi1_ce_e, 1),
+	INGENIC_PIN_GROUP("mmc0-1bit", x2000_mmc0_1bit, 0),
+	INGENIC_PIN_GROUP("mmc0-4bit", x2000_mmc0_4bit, 0),
+	INGENIC_PIN_GROUP("mmc0-8bit", x2000_mmc0_8bit, 0),
+	INGENIC_PIN_GROUP("mmc1-1bit", x2000_mmc1_1bit, 0),
+	INGENIC_PIN_GROUP("mmc1-4bit", x2000_mmc1_4bit, 0),
+	INGENIC_PIN_GROUP("mmc2-1bit", x2000_mmc2_1bit, 0),
+	INGENIC_PIN_GROUP("mmc2-4bit", x2000_mmc2_4bit, 0),
+	INGENIC_PIN_GROUP("emc-8bit-data", x2000_emc_8bit_data, 0),
+	INGENIC_PIN_GROUP("emc-16bit-data", x2000_emc_16bit_data, 0),
+	INGENIC_PIN_GROUP("emc-addr", x2000_emc_addr, 0),
+	INGENIC_PIN_GROUP("emc-rd-we", x2000_emc_rd_we, 0),
+	INGENIC_PIN_GROUP("emc-wait", x2000_emc_wait, 0),
+	INGENIC_PIN_GROUP("emc-cs1", x2000_emc_cs1, 3),
+	INGENIC_PIN_GROUP("emc-cs2", x2000_emc_cs2, 3),
+	INGENIC_PIN_GROUP("i2c0-data", x2000_i2c0, 3),
+	INGENIC_PIN_GROUP("i2c1-data-c", x2000_i2c1_c, 2),
+	INGENIC_PIN_GROUP("i2c1-data-d", x2000_i2c1_d, 1),
+	INGENIC_PIN_GROUP("i2c2-data-b", x2000_i2c2_b, 2),
+	INGENIC_PIN_GROUP("i2c2-data-d", x2000_i2c2_d, 2),
+	INGENIC_PIN_GROUP("i2c2-data-e", x2000_i2c2_e, 1),
+	INGENIC_PIN_GROUP("i2c3-data-a", x2000_i2c3_a, 0),
+	INGENIC_PIN_GROUP("i2c3-data-d", x2000_i2c3_d, 1),
+	INGENIC_PIN_GROUP("i2c4-data-c", x2000_i2c4_c, 1),
+	INGENIC_PIN_GROUP("i2c4-data-d", x2000_i2c4_d, 2),
+	INGENIC_PIN_GROUP("i2c5-data-c", x2000_i2c5_c, 1),
+	INGENIC_PIN_GROUP("i2c5-data-d", x2000_i2c5_d, 1),
+	INGENIC_PIN_GROUP("i2s1-data-tx", x2000_i2s1_data_tx, 2),
+	INGENIC_PIN_GROUP("i2s1-data-rx", x2000_i2s1_data_rx, 2),
+	INGENIC_PIN_GROUP("i2s1-clk-tx", x2000_i2s1_clk_tx, 2),
+	INGENIC_PIN_GROUP("i2s1-clk-rx", x2000_i2s1_clk_rx, 2),
+	INGENIC_PIN_GROUP("i2s1-sysclk-tx", x2000_i2s1_sysclk_tx, 2),
+	INGENIC_PIN_GROUP("i2s1-sysclk-rx", x2000_i2s1_sysclk_rx, 2),
+	INGENIC_PIN_GROUP("i2s2-data-rx0", x2000_i2s2_data_rx0, 2),
+	INGENIC_PIN_GROUP("i2s2-data-rx1", x2000_i2s2_data_rx1, 2),
+	INGENIC_PIN_GROUP("i2s2-data-rx2", x2000_i2s2_data_rx2, 2),
+	INGENIC_PIN_GROUP("i2s2-data-rx3", x2000_i2s2_data_rx3, 2),
+	INGENIC_PIN_GROUP("i2s2-clk-rx", x2000_i2s2_clk_rx, 2),
+	INGENIC_PIN_GROUP("i2s2-sysclk-rx", x2000_i2s2_sysclk_rx, 2),
+	INGENIC_PIN_GROUP("i2s3-data-tx0", x2000_i2s3_data_tx0, 2),
+	INGENIC_PIN_GROUP("i2s3-data-tx1", x2000_i2s3_data_tx1, 2),
+	INGENIC_PIN_GROUP("i2s3-data-tx2", x2000_i2s3_data_tx2, 2),
+	INGENIC_PIN_GROUP("i2s3-data-tx3", x2000_i2s3_data_tx3, 2),
+	INGENIC_PIN_GROUP("i2s3-clk-tx", x2000_i2s3_clk_tx, 2),
+	INGENIC_PIN_GROUP("i2s3-sysclk-tx", x2000_i2s3_sysclk_tx, 2),
+	INGENIC_PIN_GROUP("dmic-if0", x2000_dmic_if0, 0),
+	INGENIC_PIN_GROUP("dmic-if1", x2000_dmic_if1, 0),
+	INGENIC_PIN_GROUP("dmic-if2", x2000_dmic_if2, 0),
+	INGENIC_PIN_GROUP("dmic-if3", x2000_dmic_if3, 0),
+	INGENIC_PIN_GROUP_FUNCS("cim-data-8bit", x2000_cim_8bit,
+				x2000_cim_8bit_funcs),
+	INGENIC_PIN_GROUP("cim-data-12bit", x2000_cim_12bit, 0),
+	INGENIC_PIN_GROUP("lcd-tft-8bit", x2000_lcd_tft_8bit, 1),
+	INGENIC_PIN_GROUP("lcd-tft-16bit", x2000_lcd_tft_16bit, 1),
+	INGENIC_PIN_GROUP("lcd-tft-18bit", x2000_lcd_tft_18bit, 1),
+	INGENIC_PIN_GROUP("lcd-tft-24bit", x2000_lcd_tft_24bit, 1),
+	INGENIC_PIN_GROUP("lcd-slcd-8bit", x2000_lcd_slcd_8bit, 2),
+	INGENIC_PIN_GROUP("lcd-slcd-16bit", x2000_lcd_tft_16bit, 2),
+	INGENIC_PIN_GROUP("pwm0-c", x2000_pwm_pwm0_c, 0),
+	INGENIC_PIN_GROUP("pwm0-d", x2000_pwm_pwm0_d, 2),
+	INGENIC_PIN_GROUP("pwm1-c", x2000_pwm_pwm1_c, 0),
+	INGENIC_PIN_GROUP("pwm1-d", x2000_pwm_pwm1_d, 2),
+	INGENIC_PIN_GROUP("pwm2-c", x2000_pwm_pwm2_c, 0),
+	INGENIC_PIN_GROUP("pwm2-e", x2000_pwm_pwm2_e, 1),
+	INGENIC_PIN_GROUP("pwm3-c", x2000_pwm_pwm3_c, 0),
+	INGENIC_PIN_GROUP("pwm3-e", x2000_pwm_pwm3_e, 1),
+	INGENIC_PIN_GROUP("pwm4-c", x2000_pwm_pwm4_c, 0),
+	INGENIC_PIN_GROUP("pwm4-e", x2000_pwm_pwm4_e, 1),
+	INGENIC_PIN_GROUP("pwm5-c", x2000_pwm_pwm5_c, 0),
+	INGENIC_PIN_GROUP("pwm5-e", x2000_pwm_pwm5_e, 1),
+	INGENIC_PIN_GROUP("pwm6-c", x2000_pwm_pwm6_c, 0),
+	INGENIC_PIN_GROUP("pwm6-e", x2000_pwm_pwm6_e, 1),
+	INGENIC_PIN_GROUP("pwm7-c", x2000_pwm_pwm7_c, 0),
+	INGENIC_PIN_GROUP("pwm7-e", x2000_pwm_pwm7_e, 1),
+	INGENIC_PIN_GROUP("pwm8", x2000_pwm_pwm8, 0),
+	INGENIC_PIN_GROUP("pwm9", x2000_pwm_pwm9, 0),
+	INGENIC_PIN_GROUP("pwm10", x2000_pwm_pwm10, 0),
+	INGENIC_PIN_GROUP("pwm11", x2000_pwm_pwm11, 0),
+	INGENIC_PIN_GROUP("pwm12", x2000_pwm_pwm12, 0),
+	INGENIC_PIN_GROUP("pwm13", x2000_pwm_pwm13, 0),
+	INGENIC_PIN_GROUP("pwm14", x2000_pwm_pwm14, 0),
+	INGENIC_PIN_GROUP("pwm15", x2000_pwm_pwm15, 0),
+	INGENIC_PIN_GROUP("mac", x2100_mac, 1),
+};
+
+static const char *x2100_mac_groups[] = { "mac", };
+
+static const struct function_desc x2100_functions[] = {
+	{ "uart0", x2000_uart0_groups, ARRAY_SIZE(x2000_uart0_groups), },
+	{ "uart1", x2000_uart1_groups, ARRAY_SIZE(x2000_uart1_groups), },
+	{ "uart2", x2000_uart2_groups, ARRAY_SIZE(x2000_uart2_groups), },
+	{ "uart3", x2000_uart3_groups, ARRAY_SIZE(x2000_uart3_groups), },
+	{ "uart4", x2000_uart4_groups, ARRAY_SIZE(x2000_uart4_groups), },
+	{ "uart5", x2000_uart5_groups, ARRAY_SIZE(x2000_uart5_groups), },
+	{ "uart6", x2000_uart6_groups, ARRAY_SIZE(x2000_uart6_groups), },
+	{ "uart7", x2000_uart7_groups, ARRAY_SIZE(x2000_uart7_groups), },
+	{ "uart8", x2000_uart8_groups, ARRAY_SIZE(x2000_uart8_groups), },
+	{ "uart9", x2000_uart9_groups, ARRAY_SIZE(x2000_uart9_groups), },
+	{ "sfc", x2000_sfc_groups, ARRAY_SIZE(x2000_sfc_groups), },
+	{ "ssi0", x2000_ssi0_groups, ARRAY_SIZE(x2000_ssi0_groups), },
+	{ "ssi1", x2000_ssi1_groups, ARRAY_SIZE(x2000_ssi1_groups), },
+	{ "mmc0", x2000_mmc0_groups, ARRAY_SIZE(x2000_mmc0_groups), },
+	{ "mmc1", x2000_mmc1_groups, ARRAY_SIZE(x2000_mmc1_groups), },
+	{ "mmc2", x2000_mmc2_groups, ARRAY_SIZE(x2000_mmc2_groups), },
+	{ "emc", x2000_emc_groups, ARRAY_SIZE(x2000_emc_groups), },
+	{ "emc-cs1", x2000_cs1_groups, ARRAY_SIZE(x2000_cs1_groups), },
+	{ "emc-cs2", x2000_cs2_groups, ARRAY_SIZE(x2000_cs2_groups), },
+	{ "i2c0", x2000_i2c0_groups, ARRAY_SIZE(x2000_i2c0_groups), },
+	{ "i2c1", x2000_i2c1_groups, ARRAY_SIZE(x2000_i2c1_groups), },
+	{ "i2c2", x2000_i2c2_groups, ARRAY_SIZE(x2000_i2c2_groups), },
+	{ "i2c3", x2000_i2c3_groups, ARRAY_SIZE(x2000_i2c3_groups), },
+	{ "i2c4", x2000_i2c4_groups, ARRAY_SIZE(x2000_i2c4_groups), },
+	{ "i2c5", x2000_i2c5_groups, ARRAY_SIZE(x2000_i2c5_groups), },
+	{ "i2s1", x2000_i2s1_groups, ARRAY_SIZE(x2000_i2s1_groups), },
+	{ "i2s2", x2000_i2s2_groups, ARRAY_SIZE(x2000_i2s2_groups), },
+	{ "i2s3", x2000_i2s3_groups, ARRAY_SIZE(x2000_i2s3_groups), },
+	{ "dmic", x2000_dmic_groups, ARRAY_SIZE(x2000_dmic_groups), },
+	{ "cim", x2000_cim_groups, ARRAY_SIZE(x2000_cim_groups), },
+	{ "lcd", x2000_lcd_groups, ARRAY_SIZE(x2000_lcd_groups), },
+	{ "pwm0", x2000_pwm0_groups, ARRAY_SIZE(x2000_pwm0_groups), },
+	{ "pwm1", x2000_pwm1_groups, ARRAY_SIZE(x2000_pwm1_groups), },
+	{ "pwm2", x2000_pwm2_groups, ARRAY_SIZE(x2000_pwm2_groups), },
+	{ "pwm3", x2000_pwm3_groups, ARRAY_SIZE(x2000_pwm3_groups), },
+	{ "pwm4", x2000_pwm4_groups, ARRAY_SIZE(x2000_pwm4_groups), },
+	{ "pwm5", x2000_pwm5_groups, ARRAY_SIZE(x2000_pwm5_groups), },
+	{ "pwm6", x2000_pwm6_groups, ARRAY_SIZE(x2000_pwm6_groups), },
+	{ "pwm7", x2000_pwm7_groups, ARRAY_SIZE(x2000_pwm7_groups), },
+	{ "pwm8", x2000_pwm8_groups, ARRAY_SIZE(x2000_pwm8_groups), },
+	{ "pwm9", x2000_pwm9_groups, ARRAY_SIZE(x2000_pwm9_groups), },
+	{ "pwm10", x2000_pwm10_groups, ARRAY_SIZE(x2000_pwm10_groups), },
+	{ "pwm11", x2000_pwm11_groups, ARRAY_SIZE(x2000_pwm11_groups), },
+	{ "pwm12", x2000_pwm12_groups, ARRAY_SIZE(x2000_pwm12_groups), },
+	{ "pwm13", x2000_pwm13_groups, ARRAY_SIZE(x2000_pwm13_groups), },
+	{ "pwm14", x2000_pwm14_groups, ARRAY_SIZE(x2000_pwm14_groups), },
+	{ "pwm15", x2000_pwm15_groups, ARRAY_SIZE(x2000_pwm15_groups), },
+	{ "mac", x2100_mac_groups, ARRAY_SIZE(x2100_mac_groups), },
+};
+
+static const struct ingenic_chip_info x2100_chip_info = {
+	.num_chips = 5,
+	.reg_offset = 0x100,
+	.version = ID_X2100,
+	.groups = x2100_groups,
+	.num_groups = ARRAY_SIZE(x2100_groups),
+	.functions = x2100_functions,
+	.num_functions = ARRAY_SIZE(x2100_functions),
+	.pull_ups = x2100_pull_ups,
+	.pull_downs = x2100_pull_downs,
+};
+
 static u32 ingenic_gpio_read_reg(struct ingenic_gpio_chip *jzgc, u8 reg)
 {
 	unsigned int val;
@@ -3441,17 +3830,17 @@ static void ingenic_set_bias(struct ingenic_pinctrl *jzpc,
 {
 	if (jzpc->info->version >= ID_X2000) {
 		switch (bias) {
-		case PIN_CONFIG_BIAS_PULL_UP:
+		case GPIO_PULL_UP:
 			ingenic_config_pin(jzpc, pin, X2000_GPIO_PEPD, false);
 			ingenic_config_pin(jzpc, pin, X2000_GPIO_PEPU, true);
 			break;
 
-		case PIN_CONFIG_BIAS_PULL_DOWN:
+		case GPIO_PULL_DOWN:
 			ingenic_config_pin(jzpc, pin, X2000_GPIO_PEPU, false);
 			ingenic_config_pin(jzpc, pin, X2000_GPIO_PEPD, true);
 			break;
 
-		case PIN_CONFIG_BIAS_DISABLE:
+		case GPIO_PULL_DIS:
 		default:
 			ingenic_config_pin(jzpc, pin, X2000_GPIO_PEPU, false);
 			ingenic_config_pin(jzpc, pin, X2000_GPIO_PEPD, false);
@@ -3654,19 +4043,20 @@ static const struct regmap_config ingenic_pinctrl_regmap_config = {
 	.reg_stride = 4,
 };
 
-static const struct of_device_id ingenic_gpio_of_match[] __initconst = {
-	{ .compatible = "ingenic,jz4730-gpio", },
-	{ .compatible = "ingenic,jz4740-gpio", },
-	{ .compatible = "ingenic,jz4725b-gpio", },
-	{ .compatible = "ingenic,jz4750-gpio", },
-	{ .compatible = "ingenic,jz4755-gpio", },
-	{ .compatible = "ingenic,jz4760-gpio", },
-	{ .compatible = "ingenic,jz4770-gpio", },
-	{ .compatible = "ingenic,jz4775-gpio", },
-	{ .compatible = "ingenic,jz4780-gpio", },
-	{ .compatible = "ingenic,x1000-gpio", },
-	{ .compatible = "ingenic,x1830-gpio", },
-	{ .compatible = "ingenic,x2000-gpio", },
+static const struct of_device_id ingenic_gpio_of_matches[] __initconst = {
+	{ .compatible = "ingenic,jz4730-gpio" },
+	{ .compatible = "ingenic,jz4740-gpio" },
+	{ .compatible = "ingenic,jz4725b-gpio" },
+	{ .compatible = "ingenic,jz4750-gpio" },
+	{ .compatible = "ingenic,jz4755-gpio" },
+	{ .compatible = "ingenic,jz4760-gpio" },
+	{ .compatible = "ingenic,jz4770-gpio" },
+	{ .compatible = "ingenic,jz4775-gpio" },
+	{ .compatible = "ingenic,jz4780-gpio" },
+	{ .compatible = "ingenic,x1000-gpio" },
+	{ .compatible = "ingenic,x1830-gpio" },
+	{ .compatible = "ingenic,x2000-gpio" },
+	{ .compatible = "ingenic,x2100-gpio" },
 	{},
 };
 
@@ -3759,6 +4149,7 @@ static int __init ingenic_pinctrl_probe(struct platform_device *pdev)
 	void __iomem *base;
 	const struct ingenic_chip_info *chip_info;
 	struct device_node *node;
+	struct regmap_config regmap_config;
 	unsigned int i;
 	int err;
 
@@ -3776,8 +4167,10 @@ static int __init ingenic_pinctrl_probe(struct platform_device *pdev)
 	if (IS_ERR(base))
 		return PTR_ERR(base);
 
-	jzpc->map = devm_regmap_init_mmio(dev, base,
-			&ingenic_pinctrl_regmap_config);
+	regmap_config = ingenic_pinctrl_regmap_config;
+	regmap_config.max_register = chip_info->num_chips * chip_info->reg_offset;
+
+	jzpc->map = devm_regmap_init_mmio(dev, base, &regmap_config);
 	if (IS_ERR(jzpc->map)) {
 		dev_err(dev, "Failed to create regmap\n");
 		return PTR_ERR(jzpc->map);
@@ -3843,7 +4236,7 @@ static int __init ingenic_pinctrl_probe(struct platform_device *pdev)
 	dev_set_drvdata(dev, jzpc->map);
 
 	for_each_child_of_node(dev->of_node, node) {
-		if (of_match_node(ingenic_gpio_of_match, node)) {
+		if (of_match_node(ingenic_gpio_of_matches, node)) {
 			err = ingenic_gpio_probe(jzpc, node);
 			if (err) {
 				of_node_put(node);
@@ -3857,7 +4250,7 @@ static int __init ingenic_pinctrl_probe(struct platform_device *pdev)
 
 #define IF_ENABLED(cfg, ptr)	PTR_IF(IS_ENABLED(cfg), (ptr))
 
-static const struct of_device_id ingenic_pinctrl_of_match[] = {
+static const struct of_device_id ingenic_pinctrl_of_matches[] = {
 	{
 		.compatible = "ingenic,jz4730-pinctrl",
 		.data = IF_ENABLED(CONFIG_MACH_JZ4730, &jz4730_chip_info)
@@ -3922,13 +4315,17 @@ static const struct of_device_id ingenic_pinctrl_of_match[] = {
 		.compatible = "ingenic,x2000e-pinctrl",
 		.data = IF_ENABLED(CONFIG_MACH_X2000, &x2000_chip_info)
 	},
+	{
+		.compatible = "ingenic,x2100-pinctrl",
+		.data = IF_ENABLED(CONFIG_MACH_X2100, &x2100_chip_info)
+	},
 	{ /* sentinel */ },
 };
 
 static struct platform_driver ingenic_pinctrl_driver = {
 	.driver = {
 		.name = "pinctrl-ingenic",
-		.of_match_table = ingenic_pinctrl_of_match,
+		.of_match_table = ingenic_pinctrl_of_matches,
 	},
 };
 
diff --git a/drivers/pinctrl/pinctrl-keembay.c b/drivers/pinctrl/pinctrl-keembay.c
new file mode 100644
index 000000000000..2bce563d5b8b
--- /dev/null
+++ b/drivers/pinctrl/pinctrl-keembay.c
@@ -0,0 +1,1731 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright (C) 2020 Intel Corporation */
+
+#include <linux/bitfield.h>
+#include <linux/bitops.h>
+#include <linux/gpio/driver.h>
+#include <linux/interrupt.h>
+#include <linux/io.h>
+#include <linux/module.h>
+
+#include <linux/pinctrl/pinconf.h>
+#include <linux/pinctrl/pinconf-generic.h>
+#include <linux/pinctrl/pinctrl.h>
+#include <linux/pinctrl/pinmux.h>
+
+#include <linux/platform_device.h>
+
+#include "core.h"
+#include "pinmux.h"
+
+/* GPIO data registers' offsets */
+#define KEEMBAY_GPIO_DATA_OUT		0x000
+#define KEEMBAY_GPIO_DATA_IN		0x020
+#define KEEMBAY_GPIO_DATA_IN_RAW	0x040
+#define KEEMBAY_GPIO_DATA_HIGH		0x060
+#define KEEMBAY_GPIO_DATA_LOW		0x080
+
+/* GPIO Interrupt and mode registers' offsets */
+#define KEEMBAY_GPIO_INT_CFG		0x000
+#define KEEMBAY_GPIO_MODE		0x070
+
+/* GPIO mode register bit fields */
+#define KEEMBAY_GPIO_MODE_PULLUP_MASK	GENMASK(13, 12)
+#define KEEMBAY_GPIO_MODE_DRIVE_MASK	GENMASK(8, 7)
+#define KEEMBAY_GPIO_MODE_INV_MASK	GENMASK(5, 4)
+#define KEEMBAY_GPIO_MODE_SELECT_MASK	GENMASK(2, 0)
+#define KEEMBAY_GPIO_MODE_DIR_OVR	BIT(15)
+#define KEEMBAY_GPIO_MODE_REN		BIT(11)
+#define KEEMBAY_GPIO_MODE_SCHMITT_EN	BIT(10)
+#define KEEMBAY_GPIO_MODE_SLEW_RATE	BIT(9)
+#define KEEMBAY_GPIO_IRQ_ENABLE		BIT(7)
+#define KEEMBAY_GPIO_MODE_DIR		BIT(3)
+#define KEEMBAY_GPIO_MODE_DEFAULT	0x7
+#define KEEMBAY_GPIO_MODE_INV_VAL	0x3
+
+#define KEEMBAY_GPIO_DISABLE		0
+#define KEEMBAY_GPIO_PULL_UP		1
+#define KEEMBAY_GPIO_PULL_DOWN		2
+#define KEEMBAY_GPIO_BUS_HOLD		3
+#define KEEMBAY_GPIO_NUM_IRQ		8
+#define KEEMBAY_GPIO_MAX_PER_IRQ	4
+#define KEEMBAY_GPIO_MAX_PER_REG	32
+#define KEEMBAY_GPIO_MIN_STRENGTH	2
+#define KEEMBAY_GPIO_MAX_STRENGTH	12
+#define KEEMBAY_GPIO_SENSE_LOW		(IRQ_TYPE_LEVEL_LOW | IRQ_TYPE_EDGE_FALLING)
+
+/* GPIO reg address calculation */
+#define KEEMBAY_GPIO_REG_OFFSET(pin)	((pin) * 4)
+
+/**
+ * struct keembay_mux_desc - Mux properties of each GPIO pin
+ * @mode: Pin mode when operating in this function
+ * @name: Pin function name
+ */
+struct keembay_mux_desc {
+	u8 mode;
+	const char *name;
+};
+
+#define KEEMBAY_PIN_DESC(pin_number, pin_name, ...) {	\
+	.number = pin_number,				\
+	.name = pin_name,				\
+	.drv_data = &(struct keembay_mux_desc[]) {	\
+		    __VA_ARGS__, { } },			\
+}							\
+
+#define KEEMBAY_MUX(pin_mode, pin_function) {		\
+	.mode = pin_mode,				\
+	.name = pin_function,				\
+}							\
+
+/**
+ * struct keembay_gpio_irq - Config of each GPIO Interrupt sources
+ * @source: Interrupt source number (0 - 7)
+ * @line: Actual Interrupt line number
+ * @pins: Array of GPIO pins using this Interrupt line
+ * @trigger: Interrupt trigger type for this line
+ * @num_share: Number of pins currently using this Interrupt line
+ */
+struct keembay_gpio_irq {
+	unsigned int source;
+	unsigned int line;
+	unsigned int pins[KEEMBAY_GPIO_MAX_PER_IRQ];
+	unsigned int trigger;
+	unsigned int num_share;
+};
+
+/**
+ * struct keembay_pinctrl - Intel Keembay pinctrl structure
+ * @pctrl: Pointer to the pin controller device
+ * @base0: First register base address
+ * @base1: Second register base address
+ * @dev: Pointer to the device structure
+ * @chip: GPIO chip used by this pin controller
+ * @soc: Pin control configuration data based on SoC
+ * @lock: Spinlock to protect various gpio config register access
+ * @ngroups: Number of pin groups available
+ * @nfuncs: Number of pin functions available
+ * @npins: Number of GPIO pins available
+ * @irq: Store Interrupt source
+ * @max_gpios_level_type: Store max level trigger type
+ * @max_gpios_edge_type: Store max edge trigger type
+ */
+struct keembay_pinctrl {
+	struct pinctrl_dev *pctrl;
+	void __iomem *base0;
+	void __iomem *base1;
+	struct device *dev;
+	struct gpio_chip chip;
+	const struct keembay_pin_soc *soc;
+	raw_spinlock_t lock;
+	unsigned int ngroups;
+	unsigned int nfuncs;
+	unsigned int npins;
+	struct keembay_gpio_irq irq[KEEMBAY_GPIO_NUM_IRQ];
+	int max_gpios_level_type;
+	int max_gpios_edge_type;
+};
+
+/**
+ * struct keembay_pin_soc - Pin control config data based on SoC
+ * @pins: Pin description structure
+ */
+struct keembay_pin_soc {
+	const struct pinctrl_pin_desc *pins;
+};
+
+static const struct pinctrl_pin_desc keembay_pins[] = {
+	KEEMBAY_PIN_DESC(0, "GPIO0",
+			 KEEMBAY_MUX(0x0, "I2S0_M0"),
+			 KEEMBAY_MUX(0x1, "SD0_M1"),
+			 KEEMBAY_MUX(0x2, "SLVDS0_M2"),
+			 KEEMBAY_MUX(0x3, "I2C0_M3"),
+			 KEEMBAY_MUX(0x4, "CAM_M4"),
+			 KEEMBAY_MUX(0x5, "ETH_M5"),
+			 KEEMBAY_MUX(0x6, "LCD_M6"),
+			 KEEMBAY_MUX(0x7, "GPIO_M7")),
+	KEEMBAY_PIN_DESC(1, "GPIO1",
+			 KEEMBAY_MUX(0x0, "I2S0_M0"),
+			 KEEMBAY_MUX(0x1, "SD0_M1"),
+			 KEEMBAY_MUX(0x2, "SLVDS0_M2"),
+			 KEEMBAY_MUX(0x3, "I2C0_M3"),
+			 KEEMBAY_MUX(0x4, "CAM_M4"),
+			 KEEMBAY_MUX(0x5, "ETH_M5"),
+			 KEEMBAY_MUX(0x6, "LCD_M6"),
+			 KEEMBAY_MUX(0x7, "GPIO_M7")),
+	KEEMBAY_PIN_DESC(2, "GPIO2",
+			 KEEMBAY_MUX(0x0, "I2S0_M0"),
+			 KEEMBAY_MUX(0x1, "I2S0_M1"),
+			 KEEMBAY_MUX(0x2, "SLVDS0_M2"),
+			 KEEMBAY_MUX(0x3, "I2C1_M3"),
+			 KEEMBAY_MUX(0x4, "CAM_M4"),
+			 KEEMBAY_MUX(0x5, "ETH_M5"),
+			 KEEMBAY_MUX(0x6, "LCD_M6"),
+			 KEEMBAY_MUX(0x7, "GPIO_M7")),
+	KEEMBAY_PIN_DESC(3, "GPIO3",
+			 KEEMBAY_MUX(0x0, "I2S0_M0"),
+			 KEEMBAY_MUX(0x1, "I2S0_M1"),
+			 KEEMBAY_MUX(0x2, "SLVDS0_M2"),
+			 KEEMBAY_MUX(0x3, "I2C1_M3"),
+			 KEEMBAY_MUX(0x4, "CAM_M4"),
+			 KEEMBAY_MUX(0x5, "ETH_M5"),
+			 KEEMBAY_MUX(0x6, "LCD_M6"),
+			 KEEMBAY_MUX(0x7, "GPIO_M7")),
+	KEEMBAY_PIN_DESC(4, "GPIO4",
+			 KEEMBAY_MUX(0x0, "I2S0_M0"),
+			 KEEMBAY_MUX(0x1, "I2S0_M1"),
+			 KEEMBAY_MUX(0x2, "SLVDS0_M2"),
+			 KEEMBAY_MUX(0x3, "I2C2_M3"),
+			 KEEMBAY_MUX(0x4, "CAM_M4"),
+			 KEEMBAY_MUX(0x5, "ETH_M5"),
+			 KEEMBAY_MUX(0x6, "LCD_M6"),
+			 KEEMBAY_MUX(0x7, "GPIO_M7")),
+	KEEMBAY_PIN_DESC(5, "GPIO5",
+			 KEEMBAY_MUX(0x0, "I2S0_M0"),
+			 KEEMBAY_MUX(0x1, "I2S0_M1"),
+			 KEEMBAY_MUX(0x2, "SLVDS0_M2"),
+			 KEEMBAY_MUX(0x3, "I2C2_M3"),
+			 KEEMBAY_MUX(0x4, "CAM_M4"),
+			 KEEMBAY_MUX(0x5, "ETH_M5"),
+			 KEEMBAY_MUX(0x6, "LCD_M6"),
+			 KEEMBAY_MUX(0x7, "GPIO_M7")),
+	KEEMBAY_PIN_DESC(6, "GPIO6",
+			 KEEMBAY_MUX(0x0, "I2S1_M0"),
+			 KEEMBAY_MUX(0x1, "SD0_M1"),
+			 KEEMBAY_MUX(0x2, "SLVDS0_M2"),
+			 KEEMBAY_MUX(0x3, "I2C3_M3"),
+			 KEEMBAY_MUX(0x4, "CAM_M4"),
+			 KEEMBAY_MUX(0x5, "ETH_M5"),
+			 KEEMBAY_MUX(0x6, "LCD_M6"),
+			 KEEMBAY_MUX(0x7, "GPIO_M7")),
+	KEEMBAY_PIN_DESC(7, "GPIO7",
+			 KEEMBAY_MUX(0x0, "I2S1_M0"),
+			 KEEMBAY_MUX(0x1, "SD0_M1"),
+			 KEEMBAY_MUX(0x2, "SLVDS0_M2"),
+			 KEEMBAY_MUX(0x3, "I2C3_M3"),
+			 KEEMBAY_MUX(0x4, "CAM_M4"),
+			 KEEMBAY_MUX(0x5, "ETH_M5"),
+			 KEEMBAY_MUX(0x6, "LCD_M6"),
+			 KEEMBAY_MUX(0x7, "GPIO_M7")),
+	KEEMBAY_PIN_DESC(8, "GPIO8",
+			 KEEMBAY_MUX(0x0, "I2S1_M0"),
+			 KEEMBAY_MUX(0x1, "I2S1_M1"),
+			 KEEMBAY_MUX(0x2, "SLVDS0_M2"),
+			 KEEMBAY_MUX(0x3, "UART0_M3"),
+			 KEEMBAY_MUX(0x4, "CAM_M4"),
+			 KEEMBAY_MUX(0x5, "ETH_M5"),
+			 KEEMBAY_MUX(0x6, "LCD_M6"),
+			 KEEMBAY_MUX(0x7, "GPIO_M7")),
+	KEEMBAY_PIN_DESC(9, "GPIO9",
+			 KEEMBAY_MUX(0x0, "I2S1_M0"),
+			 KEEMBAY_MUX(0x1, "I2S1_M1"),
+			 KEEMBAY_MUX(0x2, "PWM_M2"),
+			 KEEMBAY_MUX(0x3, "UART0_M3"),
+			 KEEMBAY_MUX(0x4, "CAM_M4"),
+			 KEEMBAY_MUX(0x5, "ETH_M5"),
+			 KEEMBAY_MUX(0x6, "LCD_M6"),
+			 KEEMBAY_MUX(0x7, "GPIO_M7")),
+	KEEMBAY_PIN_DESC(10, "GPIO10",
+			 KEEMBAY_MUX(0x0, "I2S2_M0"),
+			 KEEMBAY_MUX(0x1, "SD0_M1"),
+			 KEEMBAY_MUX(0x2, "PWM_M2"),
+			 KEEMBAY_MUX(0x3, "UART0_M3"),
+			 KEEMBAY_MUX(0x4, "CAM_M4"),
+			 KEEMBAY_MUX(0x5, "ETH_M5"),
+			 KEEMBAY_MUX(0x6, "LCD_M6"),
+			 KEEMBAY_MUX(0x7, "GPIO_M7")),
+	KEEMBAY_PIN_DESC(11, "GPIO11",
+			 KEEMBAY_MUX(0x0, "I2S2_M0"),
+			 KEEMBAY_MUX(0x1, "SD0_M1"),
+			 KEEMBAY_MUX(0x2, "PWM_M2"),
+			 KEEMBAY_MUX(0x3, "UART0_M3"),
+			 KEEMBAY_MUX(0x4, "CAM_M4"),
+			 KEEMBAY_MUX(0x5, "ETH_M5"),
+			 KEEMBAY_MUX(0x6, "LCD_M6"),
+			 KEEMBAY_MUX(0x7, "GPIO_M7")),
+	KEEMBAY_PIN_DESC(12, "GPIO12",
+			 KEEMBAY_MUX(0x0, "I2S2_M0"),
+			 KEEMBAY_MUX(0x1, "I2S2_M1"),
+			 KEEMBAY_MUX(0x2, "PWM_M2"),
+			 KEEMBAY_MUX(0x3, "SPI0_M3"),
+			 KEEMBAY_MUX(0x4, "CAM_M4"),
+			 KEEMBAY_MUX(0x5, "ETH_M5"),
+			 KEEMBAY_MUX(0x6, "LCD_M6"),
+			 KEEMBAY_MUX(0x7, "GPIO_M7")),
+	KEEMBAY_PIN_DESC(13, "GPIO13",
+			 KEEMBAY_MUX(0x0, "I2S2_M0"),
+			 KEEMBAY_MUX(0x1, "I2S2_M1"),
+			 KEEMBAY_MUX(0x2, "PWM_M2"),
+			 KEEMBAY_MUX(0x3, "SPI0_M3"),
+			 KEEMBAY_MUX(0x4, "CAM_M4"),
+			 KEEMBAY_MUX(0x5, "ETH_M5"),
+			 KEEMBAY_MUX(0x6, "LCD_M6"),
+			 KEEMBAY_MUX(0x7, "GPIO_M7")),
+	KEEMBAY_PIN_DESC(14, "GPIO14",
+			 KEEMBAY_MUX(0x0, "UART0_M0"),
+			 KEEMBAY_MUX(0x1, "I2S3_M1"),
+			 KEEMBAY_MUX(0x2, "PWM_M2"),
+			 KEEMBAY_MUX(0x3, "SD1_M3"),
+			 KEEMBAY_MUX(0x4, "CAM_M4"),
+			 KEEMBAY_MUX(0x5, "ETH_M5"),
+			 KEEMBAY_MUX(0x6, "LCD_M6"),
+			 KEEMBAY_MUX(0x7, "GPIO_M7")),
+	KEEMBAY_PIN_DESC(15, "GPIO15",
+			 KEEMBAY_MUX(0x0, "UART0_M0"),
+			 KEEMBAY_MUX(0x1, "I2S3_M1"),
+			 KEEMBAY_MUX(0x2, "UART0_M2"),
+			 KEEMBAY_MUX(0x3, "SD1_M3"),
+			 KEEMBAY_MUX(0x4, "CAM_M4"),
+			 KEEMBAY_MUX(0x5, "SPI1_M5"),
+			 KEEMBAY_MUX(0x6, "LCD_M6"),
+			 KEEMBAY_MUX(0x7, "GPIO_M7")),
+	KEEMBAY_PIN_DESC(16, "GPIO16",
+			 KEEMBAY_MUX(0x0, "UART0_M0"),
+			 KEEMBAY_MUX(0x1, "I2S3_M1"),
+			 KEEMBAY_MUX(0x2, "UART0_M2"),
+			 KEEMBAY_MUX(0x3, "SD1_M3"),
+			 KEEMBAY_MUX(0x4, "CAM_M4"),
+			 KEEMBAY_MUX(0x5, "SPI1_M5"),
+			 KEEMBAY_MUX(0x6, "LCD_M6"),
+			 KEEMBAY_MUX(0x7, "GPIO_M7")),
+	KEEMBAY_PIN_DESC(17, "GPIO17",
+			 KEEMBAY_MUX(0x0, "UART0_M0"),
+			 KEEMBAY_MUX(0x1, "I2S3_M1"),
+			 KEEMBAY_MUX(0x2, "I2S3_M2"),
+			 KEEMBAY_MUX(0x3, "SD1_M3"),
+			 KEEMBAY_MUX(0x4, "CAM_M4"),
+			 KEEMBAY_MUX(0x5, "SPI1_M5"),
+			 KEEMBAY_MUX(0x6, "LCD_M6"),
+			 KEEMBAY_MUX(0x7, "GPIO_M7")),
+	KEEMBAY_PIN_DESC(18, "GPIO18",
+			 KEEMBAY_MUX(0x0, "UART1_M0"),
+			 KEEMBAY_MUX(0x1, "SPI0_M1"),
+			 KEEMBAY_MUX(0x2, "I2S3_M2"),
+			 KEEMBAY_MUX(0x3, "SD1_M3"),
+			 KEEMBAY_MUX(0x4, "CAM_M4"),
+			 KEEMBAY_MUX(0x5, "SPI1_M5"),
+			 KEEMBAY_MUX(0x6, "LCD_M6"),
+			 KEEMBAY_MUX(0x7, "GPIO_M7")),
+	KEEMBAY_PIN_DESC(19, "GPIO19",
+			 KEEMBAY_MUX(0x0, "UART1_M0"),
+			 KEEMBAY_MUX(0x1, "LCD_M1"),
+			 KEEMBAY_MUX(0x2, "DEBUG_M2"),
+			 KEEMBAY_MUX(0x3, "SD1_M3"),
+			 KEEMBAY_MUX(0x4, "CAM_M4"),
+			 KEEMBAY_MUX(0x5, "SPI1_M5"),
+			 KEEMBAY_MUX(0x6, "LCD_M6"),
+			 KEEMBAY_MUX(0x7, "GPIO_M7")),
+	KEEMBAY_PIN_DESC(20, "GPIO20",
+			 KEEMBAY_MUX(0x0, "UART1_M0"),
+			 KEEMBAY_MUX(0x1, "LCD_M1"),
+			 KEEMBAY_MUX(0x2, "DEBUG_M2"),
+			 KEEMBAY_MUX(0x3, "CPR_M3"),
+			 KEEMBAY_MUX(0x4, "CAM_M4"),
+			 KEEMBAY_MUX(0x5, "SPI1_M5"),
+			 KEEMBAY_MUX(0x6, "SLVDS0_M6"),
+			 KEEMBAY_MUX(0x7, "GPIO_M7")),
+	KEEMBAY_PIN_DESC(21, "GPIO21",
+			 KEEMBAY_MUX(0x0, "UART1_M0"),
+			 KEEMBAY_MUX(0x1, "LCD_M1"),
+			 KEEMBAY_MUX(0x2, "DEBUG_M2"),
+			 KEEMBAY_MUX(0x3, "CPR_M3"),
+			 KEEMBAY_MUX(0x4, "CAM_M4"),
+			 KEEMBAY_MUX(0x5, "I3C0_M5"),
+			 KEEMBAY_MUX(0x6, "SLVDS0_M6"),
+			 KEEMBAY_MUX(0x7, "GPIO_M7")),
+	KEEMBAY_PIN_DESC(22, "GPIO22",
+			 KEEMBAY_MUX(0x0, "I2C0_M0"),
+			 KEEMBAY_MUX(0x1, "UART2_M1"),
+			 KEEMBAY_MUX(0x2, "DEBUG_M2"),
+			 KEEMBAY_MUX(0x3, "CPR_M3"),
+			 KEEMBAY_MUX(0x4, "CAM_M4"),
+			 KEEMBAY_MUX(0x5, "I3C0_M5"),
+			 KEEMBAY_MUX(0x6, "SLVDS0_M6"),
+			 KEEMBAY_MUX(0x7, "GPIO_M7")),
+	KEEMBAY_PIN_DESC(23, "GPIO23",
+			 KEEMBAY_MUX(0x0, "I2C0_M0"),
+			 KEEMBAY_MUX(0x1, "UART2_M1"),
+			 KEEMBAY_MUX(0x2, "DEBUG_M2"),
+			 KEEMBAY_MUX(0x3, "CPR_M3"),
+			 KEEMBAY_MUX(0x4, "CAM_M4"),
+			 KEEMBAY_MUX(0x5, "I3C1_M5"),
+			 KEEMBAY_MUX(0x6, "SLVDS0_M6"),
+			 KEEMBAY_MUX(0x7, "GPIO_M7")),
+	KEEMBAY_PIN_DESC(24, "GPIO24",
+			 KEEMBAY_MUX(0x0, "I2C1_M0"),
+			 KEEMBAY_MUX(0x1, "UART2_M1"),
+			 KEEMBAY_MUX(0x2, "DEBUG_M2"),
+			 KEEMBAY_MUX(0x3, "CPR_M3"),
+			 KEEMBAY_MUX(0x4, "CAM_M4"),
+			 KEEMBAY_MUX(0x5, "I3C1_M5"),
+			 KEEMBAY_MUX(0x6, "SLVDS0_M6"),
+			 KEEMBAY_MUX(0x7, "GPIO_M7")),
+	KEEMBAY_PIN_DESC(25, "GPIO25",
+			 KEEMBAY_MUX(0x0, "I2C1_M0"),
+			 KEEMBAY_MUX(0x1, "UART2_M1"),
+			 KEEMBAY_MUX(0x2, "SPI0_M2"),
+			 KEEMBAY_MUX(0x3, "CPR_M3"),
+			 KEEMBAY_MUX(0x4, "CAM_M4"),
+			 KEEMBAY_MUX(0x5, "I3C2_M5"),
+			 KEEMBAY_MUX(0x6, "SLVDS0_M6"),
+			 KEEMBAY_MUX(0x7, "GPIO_M7")),
+	KEEMBAY_PIN_DESC(26, "GPIO26",
+			 KEEMBAY_MUX(0x0, "SPI0_M0"),
+			 KEEMBAY_MUX(0x1, "I2C2_M1"),
+			 KEEMBAY_MUX(0x2, "UART0_M2"),
+			 KEEMBAY_MUX(0x3, "DSU_M3"),
+			 KEEMBAY_MUX(0x4, "CAM_M4"),
+			 KEEMBAY_MUX(0x5, "I3C2_M5"),
+			 KEEMBAY_MUX(0x6, "SLVDS0_M6"),
+			 KEEMBAY_MUX(0x7, "GPIO_M7")),
+	KEEMBAY_PIN_DESC(27, "GPIO27",
+			 KEEMBAY_MUX(0x0, "SPI0_M0"),
+			 KEEMBAY_MUX(0x1, "I2C2_M1"),
+			 KEEMBAY_MUX(0x2, "UART0_M2"),
+			 KEEMBAY_MUX(0x3, "DSU_M3"),
+			 KEEMBAY_MUX(0x4, "CAM_M4"),
+			 KEEMBAY_MUX(0x5, "I3C0_M5"),
+			 KEEMBAY_MUX(0x6, "SLVDS0_M6"),
+			 KEEMBAY_MUX(0x7, "GPIO_M7")),
+	KEEMBAY_PIN_DESC(28, "GPIO28",
+			 KEEMBAY_MUX(0x0, "SPI0_M0"),
+			 KEEMBAY_MUX(0x1, "I2C3_M1"),
+			 KEEMBAY_MUX(0x2, "UART0_M2"),
+			 KEEMBAY_MUX(0x3, "PWM_M3"),
+			 KEEMBAY_MUX(0x4, "CAM_M4"),
+			 KEEMBAY_MUX(0x5, "I3C1_M5"),
+			 KEEMBAY_MUX(0x6, "SLVDS0_M6"),
+			 KEEMBAY_MUX(0x7, "GPIO_M7")),
+	KEEMBAY_PIN_DESC(29, "GPIO29",
+			 KEEMBAY_MUX(0x0, "SPI0_M0"),
+			 KEEMBAY_MUX(0x1, "I2C3_M1"),
+			 KEEMBAY_MUX(0x2, "UART0_M2"),
+			 KEEMBAY_MUX(0x3, "PWM_M3"),
+			 KEEMBAY_MUX(0x4, "CAM_M4"),
+			 KEEMBAY_MUX(0x5, "I3C2_M5"),
+			 KEEMBAY_MUX(0x6, "SLVDS1_M6"),
+			 KEEMBAY_MUX(0x7, "GPIO_M7")),
+	KEEMBAY_PIN_DESC(30, "GPIO30",
+			 KEEMBAY_MUX(0x0, "SPI0_M0"),
+			 KEEMBAY_MUX(0x1, "I2S0_M1"),
+			 KEEMBAY_MUX(0x2, "I2C4_M2"),
+			 KEEMBAY_MUX(0x3, "PWM_M3"),
+			 KEEMBAY_MUX(0x4, "CAM_M4"),
+			 KEEMBAY_MUX(0x5, "LCD_M5"),
+			 KEEMBAY_MUX(0x6, "SLVDS1_M6"),
+			 KEEMBAY_MUX(0x7, "GPIO_M7")),
+	KEEMBAY_PIN_DESC(31, "GPIO31",
+			 KEEMBAY_MUX(0x0, "SPI0_M0"),
+			 KEEMBAY_MUX(0x1, "I2S0_M1"),
+			 KEEMBAY_MUX(0x2, "I2C4_M2"),
+			 KEEMBAY_MUX(0x3, "PWM_M3"),
+			 KEEMBAY_MUX(0x4, "CAM_M4"),
+			 KEEMBAY_MUX(0x5, "UART1_M5"),
+			 KEEMBAY_MUX(0x6, "SLVDS1_M6"),
+			 KEEMBAY_MUX(0x7, "GPIO_M7")),
+	KEEMBAY_PIN_DESC(32, "GPIO32",
+			 KEEMBAY_MUX(0x0, "SD0_M0"),
+			 KEEMBAY_MUX(0x1, "SPI0_M1"),
+			 KEEMBAY_MUX(0x2, "UART1_M2"),
+			 KEEMBAY_MUX(0x3, "PWM_M3"),
+			 KEEMBAY_MUX(0x4, "CAM_M4"),
+			 KEEMBAY_MUX(0x5, "PCIE_M5"),
+			 KEEMBAY_MUX(0x6, "SLVDS1_M6"),
+			 KEEMBAY_MUX(0x7, "GPIO_M7")),
+	KEEMBAY_PIN_DESC(33, "GPIO33",
+			 KEEMBAY_MUX(0x0, "SD0_M0"),
+			 KEEMBAY_MUX(0x1, "SPI0_M1"),
+			 KEEMBAY_MUX(0x2, "UART1_M2"),
+			 KEEMBAY_MUX(0x3, "PWM_M3"),
+			 KEEMBAY_MUX(0x4, "CAM_M4"),
+			 KEEMBAY_MUX(0x5, "PCIE_M5"),
+			 KEEMBAY_MUX(0x6, "SLVDS1_M6"),
+			 KEEMBAY_MUX(0x7, "GPIO_M7")),
+	KEEMBAY_PIN_DESC(34, "GPIO34",
+			 KEEMBAY_MUX(0x0, "SD0_M0"),
+			 KEEMBAY_MUX(0x1, "SPI0_M1"),
+			 KEEMBAY_MUX(0x2, "I2C0_M2"),
+			 KEEMBAY_MUX(0x3, "UART1_M3"),
+			 KEEMBAY_MUX(0x4, "CAM_M4"),
+			 KEEMBAY_MUX(0x5, "I2S0_M5"),
+			 KEEMBAY_MUX(0x6, "SLVDS1_M6"),
+			 KEEMBAY_MUX(0x7, "GPIO_M7")),
+	KEEMBAY_PIN_DESC(35, "GPIO35",
+			 KEEMBAY_MUX(0x0, "SD0_M0"),
+			 KEEMBAY_MUX(0x1, "PCIE_M1"),
+			 KEEMBAY_MUX(0x2, "I2C0_M2"),
+			 KEEMBAY_MUX(0x3, "UART1_M3"),
+			 KEEMBAY_MUX(0x4, "CAM_M4"),
+			 KEEMBAY_MUX(0x5, "I2S0_M5"),
+			 KEEMBAY_MUX(0x6, "SLVDS1_M6"),
+			 KEEMBAY_MUX(0x7, "GPIO_M7")),
+	KEEMBAY_PIN_DESC(36, "GPIO36",
+			 KEEMBAY_MUX(0x0, "SD0_M0"),
+			 KEEMBAY_MUX(0x1, "SPI3_M1"),
+			 KEEMBAY_MUX(0x2, "I2C1_M2"),
+			 KEEMBAY_MUX(0x3, "DEBUG_M3"),
+			 KEEMBAY_MUX(0x4, "CAM_M4"),
+			 KEEMBAY_MUX(0x5, "I2S0_M5"),
+			 KEEMBAY_MUX(0x6, "SLVDS1_M6"),
+			 KEEMBAY_MUX(0x7, "GPIO_M7")),
+	KEEMBAY_PIN_DESC(37, "GPIO37",
+			 KEEMBAY_MUX(0x0, "SD0_M0"),
+			 KEEMBAY_MUX(0x1, "SPI3_M1"),
+			 KEEMBAY_MUX(0x2, "I2C1_M2"),
+			 KEEMBAY_MUX(0x3, "DEBUG_M3"),
+			 KEEMBAY_MUX(0x4, "CAM_M4"),
+			 KEEMBAY_MUX(0x5, "I2S0_M5"),
+			 KEEMBAY_MUX(0x6, "SLVDS1_M6"),
+			 KEEMBAY_MUX(0x7, "GPIO_M7")),
+	KEEMBAY_PIN_DESC(38, "GPIO38",
+			 KEEMBAY_MUX(0x0, "I3C1_M0"),
+			 KEEMBAY_MUX(0x1, "SPI3_M1"),
+			 KEEMBAY_MUX(0x2, "UART3_M2"),
+			 KEEMBAY_MUX(0x3, "DEBUG_M3"),
+			 KEEMBAY_MUX(0x4, "CAM_M4"),
+			 KEEMBAY_MUX(0x5, "LCD_M5"),
+			 KEEMBAY_MUX(0x6, "I2C2_M6"),
+			 KEEMBAY_MUX(0x7, "GPIO_M7")),
+	KEEMBAY_PIN_DESC(39, "GPIO39",
+			 KEEMBAY_MUX(0x0, "I3C1_M0"),
+			 KEEMBAY_MUX(0x1, "SPI3_M1"),
+			 KEEMBAY_MUX(0x2, "UART3_M2"),
+			 KEEMBAY_MUX(0x3, "DEBUG_M3"),
+			 KEEMBAY_MUX(0x4, "CAM_M4"),
+			 KEEMBAY_MUX(0x5, "LCD_M5"),
+			 KEEMBAY_MUX(0x6, "I2C2_M6"),
+			 KEEMBAY_MUX(0x7, "GPIO_M7")),
+	KEEMBAY_PIN_DESC(40, "GPIO40",
+			 KEEMBAY_MUX(0x0, "I2S2_M0"),
+			 KEEMBAY_MUX(0x1, "SPI3_M1"),
+			 KEEMBAY_MUX(0x2, "UART3_M2"),
+			 KEEMBAY_MUX(0x3, "DEBUG_M3"),
+			 KEEMBAY_MUX(0x4, "CAM_M4"),
+			 KEEMBAY_MUX(0x5, "LCD_M5"),
+			 KEEMBAY_MUX(0x6, "I2C3_M6"),
+			 KEEMBAY_MUX(0x7, "GPIO_M7")),
+	KEEMBAY_PIN_DESC(41, "GPIO41",
+			 KEEMBAY_MUX(0x0, "ETH_M0"),
+			 KEEMBAY_MUX(0x1, "SPI3_M1"),
+			 KEEMBAY_MUX(0x2, "SPI3_M2"),
+			 KEEMBAY_MUX(0x3, "DEBUG_M3"),
+			 KEEMBAY_MUX(0x4, "CAM_M4"),
+			 KEEMBAY_MUX(0x5, "LCD_M5"),
+			 KEEMBAY_MUX(0x6, "I2C3_M6"),
+			 KEEMBAY_MUX(0x7, "GPIO_M7")),
+	KEEMBAY_PIN_DESC(42, "GPIO42",
+			 KEEMBAY_MUX(0x0, "ETH_M0"),
+			 KEEMBAY_MUX(0x1, "SD1_M1"),
+			 KEEMBAY_MUX(0x2, "SPI3_M2"),
+			 KEEMBAY_MUX(0x3, "CPR_M3"),
+			 KEEMBAY_MUX(0x4, "CAM_M4"),
+			 KEEMBAY_MUX(0x5, "LCD_M5"),
+			 KEEMBAY_MUX(0x6, "I2C4_M6"),
+			 KEEMBAY_MUX(0x7, "GPIO_M7")),
+	KEEMBAY_PIN_DESC(43, "GPIO43",
+			 KEEMBAY_MUX(0x0, "ETH_M0"),
+			 KEEMBAY_MUX(0x1, "SD1_M1"),
+			 KEEMBAY_MUX(0x2, "SPI3_M2"),
+			 KEEMBAY_MUX(0x3, "CPR_M3"),
+			 KEEMBAY_MUX(0x4, "I2S0_M4"),
+			 KEEMBAY_MUX(0x5, "LCD_M5"),
+			 KEEMBAY_MUX(0x6, "I2C4_M6"),
+			 KEEMBAY_MUX(0x7, "GPIO_M7")),
+	KEEMBAY_PIN_DESC(44, "GPIO44",
+			 KEEMBAY_MUX(0x0, "ETH_M0"),
+			 KEEMBAY_MUX(0x1, "SD1_M1"),
+			 KEEMBAY_MUX(0x2, "SPI0_M2"),
+			 KEEMBAY_MUX(0x3, "CPR_M3"),
+			 KEEMBAY_MUX(0x4, "I2S0_M4"),
+			 KEEMBAY_MUX(0x5, "LCD_M5"),
+			 KEEMBAY_MUX(0x6, "CAM_M6"),
+			 KEEMBAY_MUX(0x7, "GPIO_M7")),
+	KEEMBAY_PIN_DESC(45, "GPIO45",
+			 KEEMBAY_MUX(0x0, "ETH_M0"),
+			 KEEMBAY_MUX(0x1, "SD1_M1"),
+			 KEEMBAY_MUX(0x2, "SPI0_M2"),
+			 KEEMBAY_MUX(0x3, "CPR_M3"),
+			 KEEMBAY_MUX(0x4, "I2S0_M4"),
+			 KEEMBAY_MUX(0x5, "LCD_M5"),
+			 KEEMBAY_MUX(0x6, "CAM_M6"),
+			 KEEMBAY_MUX(0x7, "GPIO_M7")),
+	KEEMBAY_PIN_DESC(46, "GPIO46",
+			 KEEMBAY_MUX(0x0, "ETH_M0"),
+			 KEEMBAY_MUX(0x1, "SD1_M1"),
+			 KEEMBAY_MUX(0x2, "SPI0_M2"),
+			 KEEMBAY_MUX(0x3, "TPIU_M3"),
+			 KEEMBAY_MUX(0x4, "I2S0_M4"),
+			 KEEMBAY_MUX(0x5, "LCD_M5"),
+			 KEEMBAY_MUX(0x6, "CAM_M6"),
+			 KEEMBAY_MUX(0x7, "GPIO_M7")),
+	KEEMBAY_PIN_DESC(47, "GPIO47",
+			 KEEMBAY_MUX(0x0, "ETH_M0"),
+			 KEEMBAY_MUX(0x1, "SD1_M1"),
+			 KEEMBAY_MUX(0x2, "SPI0_M2"),
+			 KEEMBAY_MUX(0x3, "TPIU_M3"),
+			 KEEMBAY_MUX(0x4, "I2S0_M4"),
+			 KEEMBAY_MUX(0x5, "LCD_M5"),
+			 KEEMBAY_MUX(0x6, "CAM_M6"),
+			 KEEMBAY_MUX(0x7, "GPIO_M7")),
+	KEEMBAY_PIN_DESC(48, "GPIO48",
+			 KEEMBAY_MUX(0x0, "ETH_M0"),
+			 KEEMBAY_MUX(0x1, "SPI2_M1"),
+			 KEEMBAY_MUX(0x2, "UART2_M2"),
+			 KEEMBAY_MUX(0x3, "TPIU_M3"),
+			 KEEMBAY_MUX(0x4, "I2S0_M4"),
+			 KEEMBAY_MUX(0x5, "LCD_M5"),
+			 KEEMBAY_MUX(0x6, "CAM_M6"),
+			 KEEMBAY_MUX(0x7, "GPIO_M7")),
+	KEEMBAY_PIN_DESC(49, "GPIO49",
+			 KEEMBAY_MUX(0x0, "ETH_M0"),
+			 KEEMBAY_MUX(0x1, "SPI2_M1"),
+			 KEEMBAY_MUX(0x2, "UART2_M2"),
+			 KEEMBAY_MUX(0x3, "TPIU_M3"),
+			 KEEMBAY_MUX(0x4, "I2S1_M4"),
+			 KEEMBAY_MUX(0x5, "LCD_M5"),
+			 KEEMBAY_MUX(0x6, "CAM_M6"),
+			 KEEMBAY_MUX(0x7, "GPIO_M7")),
+	KEEMBAY_PIN_DESC(50, "GPIO50",
+			 KEEMBAY_MUX(0x0, "ETH_M0"),
+			 KEEMBAY_MUX(0x1, "SPI2_M1"),
+			 KEEMBAY_MUX(0x2, "UART2_M2"),
+			 KEEMBAY_MUX(0x3, "TPIU_M3"),
+			 KEEMBAY_MUX(0x4, "I2S1_M4"),
+			 KEEMBAY_MUX(0x5, "LCD_M5"),
+			 KEEMBAY_MUX(0x6, "CAM_M6"),
+			 KEEMBAY_MUX(0x7, "GPIO_M7")),
+	KEEMBAY_PIN_DESC(51, "GPIO51",
+			 KEEMBAY_MUX(0x0, "ETH_M0"),
+			 KEEMBAY_MUX(0x1, "SPI2_M1"),
+			 KEEMBAY_MUX(0x2, "UART2_M2"),
+			 KEEMBAY_MUX(0x3, "TPIU_M3"),
+			 KEEMBAY_MUX(0x4, "I2S1_M4"),
+			 KEEMBAY_MUX(0x5, "LCD_M5"),
+			 KEEMBAY_MUX(0x6, "CAM_M6"),
+			 KEEMBAY_MUX(0x7, "GPIO_M7")),
+	KEEMBAY_PIN_DESC(52, "GPIO52",
+			 KEEMBAY_MUX(0x0, "ETH_M0"),
+			 KEEMBAY_MUX(0x1, "SPI2_M1"),
+			 KEEMBAY_MUX(0x2, "SD0_M2"),
+			 KEEMBAY_MUX(0x3, "TPIU_M3"),
+			 KEEMBAY_MUX(0x4, "I2S1_M4"),
+			 KEEMBAY_MUX(0x5, "LCD_M5"),
+			 KEEMBAY_MUX(0x6, "CAM_M6"),
+			 KEEMBAY_MUX(0x7, "GPIO_M7")),
+	KEEMBAY_PIN_DESC(53, "GPIO53",
+			 KEEMBAY_MUX(0x0, "ETH_M0"),
+			 KEEMBAY_MUX(0x1, "SPI2_M1"),
+			 KEEMBAY_MUX(0x2, "SD0_M2"),
+			 KEEMBAY_MUX(0x3, "TPIU_M3"),
+			 KEEMBAY_MUX(0x4, "I2S2_M4"),
+			 KEEMBAY_MUX(0x5, "LCD_M5"),
+			 KEEMBAY_MUX(0x6, "CAM_M6"),
+			 KEEMBAY_MUX(0x7, "GPIO_M7")),
+	KEEMBAY_PIN_DESC(54, "GPIO54",
+			 KEEMBAY_MUX(0x0, "ETH_M0"),
+			 KEEMBAY_MUX(0x1, "SPI2_M1"),
+			 KEEMBAY_MUX(0x2, "SD0_M2"),
+			 KEEMBAY_MUX(0x3, "TPIU_M3"),
+			 KEEMBAY_MUX(0x4, "I2S2_M4"),
+			 KEEMBAY_MUX(0x5, "LCD_M5"),
+			 KEEMBAY_MUX(0x6, "CAM_M6"),
+			 KEEMBAY_MUX(0x7, "GPIO_M7")),
+	KEEMBAY_PIN_DESC(55, "GPIO55",
+			 KEEMBAY_MUX(0x0, "ETH_M0"),
+			 KEEMBAY_MUX(0x1, "SPI2_M1"),
+			 KEEMBAY_MUX(0x2, "SD1_M2"),
+			 KEEMBAY_MUX(0x3, "TPIU_M3"),
+			 KEEMBAY_MUX(0x4, "I2S2_M4"),
+			 KEEMBAY_MUX(0x5, "LCD_M5"),
+			 KEEMBAY_MUX(0x6, "CAM_M6"),
+			 KEEMBAY_MUX(0x7, "GPIO_M7")),
+	KEEMBAY_PIN_DESC(56, "GPIO56",
+			 KEEMBAY_MUX(0x0, "ETH_M0"),
+			 KEEMBAY_MUX(0x1, "SPI2_M1"),
+			 KEEMBAY_MUX(0x2, "SD1_M2"),
+			 KEEMBAY_MUX(0x3, "TPIU_M3"),
+			 KEEMBAY_MUX(0x4, "I2S2_M4"),
+			 KEEMBAY_MUX(0x5, "LCD_M5"),
+			 KEEMBAY_MUX(0x6, "CAM_M6"),
+			 KEEMBAY_MUX(0x7, "GPIO_M7")),
+	KEEMBAY_PIN_DESC(57, "GPIO57",
+			 KEEMBAY_MUX(0x0, "SPI1_M0"),
+			 KEEMBAY_MUX(0x1, "I2S1_M1"),
+			 KEEMBAY_MUX(0x2, "SD1_M2"),
+			 KEEMBAY_MUX(0x3, "TPIU_M3"),
+			 KEEMBAY_MUX(0x4, "UART0_M4"),
+			 KEEMBAY_MUX(0x5, "LCD_M5"),
+			 KEEMBAY_MUX(0x6, "CAM_M6"),
+			 KEEMBAY_MUX(0x7, "GPIO_M7")),
+	KEEMBAY_PIN_DESC(58, "GPIO58",
+			 KEEMBAY_MUX(0x0, "SPI1_M0"),
+			 KEEMBAY_MUX(0x1, "ETH_M1"),
+			 KEEMBAY_MUX(0x2, "SD0_M2"),
+			 KEEMBAY_MUX(0x3, "TPIU_M3"),
+			 KEEMBAY_MUX(0x4, "UART0_M4"),
+			 KEEMBAY_MUX(0x5, "LCD_M5"),
+			 KEEMBAY_MUX(0x6, "CAM_M6"),
+			 KEEMBAY_MUX(0x7, "GPIO_M7")),
+	KEEMBAY_PIN_DESC(59, "GPIO59",
+			 KEEMBAY_MUX(0x0, "SPI1_M0"),
+			 KEEMBAY_MUX(0x1, "ETH_M1"),
+			 KEEMBAY_MUX(0x2, "SD0_M2"),
+			 KEEMBAY_MUX(0x3, "TPIU_M3"),
+			 KEEMBAY_MUX(0x4, "UART0_M4"),
+			 KEEMBAY_MUX(0x5, "LCD_M5"),
+			 KEEMBAY_MUX(0x6, "CAM_M6"),
+			 KEEMBAY_MUX(0x7, "GPIO_M7")),
+	KEEMBAY_PIN_DESC(60, "GPIO60",
+			 KEEMBAY_MUX(0x0, "SPI1_M0"),
+			 KEEMBAY_MUX(0x1, "ETH_M1"),
+			 KEEMBAY_MUX(0x2, "I3C1_M2"),
+			 KEEMBAY_MUX(0x3, "TPIU_M3"),
+			 KEEMBAY_MUX(0x4, "UART0_M4"),
+			 KEEMBAY_MUX(0x5, "LCD_M5"),
+			 KEEMBAY_MUX(0x6, "CAM_M6"),
+			 KEEMBAY_MUX(0x7, "GPIO_M7")),
+	KEEMBAY_PIN_DESC(61, "GPIO61",
+			 KEEMBAY_MUX(0x0, "SPI1_M0"),
+			 KEEMBAY_MUX(0x1, "ETH_M1"),
+			 KEEMBAY_MUX(0x2, "SD0_M2"),
+			 KEEMBAY_MUX(0x3, "TPIU_M3"),
+			 KEEMBAY_MUX(0x4, "UART1_M4"),
+			 KEEMBAY_MUX(0x5, "LCD_M5"),
+			 KEEMBAY_MUX(0x6, "CAM_M6"),
+			 KEEMBAY_MUX(0x7, "GPIO_M7")),
+	KEEMBAY_PIN_DESC(62, "GPIO62",
+			 KEEMBAY_MUX(0x0, "SPI1_M0"),
+			 KEEMBAY_MUX(0x1, "ETH_M1"),
+			 KEEMBAY_MUX(0x2, "SD1_M2"),
+			 KEEMBAY_MUX(0x3, "TPIU_M3"),
+			 KEEMBAY_MUX(0x4, "UART1_M4"),
+			 KEEMBAY_MUX(0x5, "LCD_M5"),
+			 KEEMBAY_MUX(0x6, "CAM_M6"),
+			 KEEMBAY_MUX(0x7, "GPIO_M7")),
+	KEEMBAY_PIN_DESC(63, "GPIO63",
+			 KEEMBAY_MUX(0x0, "I2S1_M0"),
+			 KEEMBAY_MUX(0x1, "SPI1_M1"),
+			 KEEMBAY_MUX(0x2, "SD1_M2"),
+			 KEEMBAY_MUX(0x3, "TPIU_M3"),
+			 KEEMBAY_MUX(0x4, "UART1_M4"),
+			 KEEMBAY_MUX(0x5, "LCD_M5"),
+			 KEEMBAY_MUX(0x6, "CAM_M6"),
+			 KEEMBAY_MUX(0x7, "GPIO_M7")),
+	KEEMBAY_PIN_DESC(64, "GPIO64",
+			 KEEMBAY_MUX(0x0, "I2S2_M0"),
+			 KEEMBAY_MUX(0x1, "SPI1_M1"),
+			 KEEMBAY_MUX(0x2, "ETH_M2"),
+			 KEEMBAY_MUX(0x3, "TPIU_M3"),
+			 KEEMBAY_MUX(0x4, "UART1_M4"),
+			 KEEMBAY_MUX(0x5, "LCD_M5"),
+			 KEEMBAY_MUX(0x6, "CAM_M6"),
+			 KEEMBAY_MUX(0x7, "GPIO_M7")),
+	KEEMBAY_PIN_DESC(65, "GPIO65",
+			 KEEMBAY_MUX(0x0, "I3C0_M0"),
+			 KEEMBAY_MUX(0x1, "SPI1_M1"),
+			 KEEMBAY_MUX(0x2, "SD1_M2"),
+			 KEEMBAY_MUX(0x3, "TPIU_M3"),
+			 KEEMBAY_MUX(0x4, "SPI0_M4"),
+			 KEEMBAY_MUX(0x5, "LCD_M5"),
+			 KEEMBAY_MUX(0x6, "CAM_M6"),
+			 KEEMBAY_MUX(0x7, "GPIO_M7")),
+	KEEMBAY_PIN_DESC(66, "GPIO66",
+			 KEEMBAY_MUX(0x0, "I3C0_M0"),
+			 KEEMBAY_MUX(0x1, "ETH_M1"),
+			 KEEMBAY_MUX(0x2, "I2C0_M2"),
+			 KEEMBAY_MUX(0x3, "TPIU_M3"),
+			 KEEMBAY_MUX(0x4, "SPI0_M4"),
+			 KEEMBAY_MUX(0x5, "LCD_M5"),
+			 KEEMBAY_MUX(0x6, "CAM_M6"),
+			 KEEMBAY_MUX(0x7, "GPIO_M7")),
+	KEEMBAY_PIN_DESC(67, "GPIO67",
+			 KEEMBAY_MUX(0x0, "I3C1_M0"),
+			 KEEMBAY_MUX(0x1, "ETH_M1"),
+			 KEEMBAY_MUX(0x2, "I2C0_M2"),
+			 KEEMBAY_MUX(0x3, "TPIU_M3"),
+			 KEEMBAY_MUX(0x4, "SPI0_M4"),
+			 KEEMBAY_MUX(0x5, "LCD_M5"),
+			 KEEMBAY_MUX(0x6, "I2S3_M6"),
+			 KEEMBAY_MUX(0x7, "GPIO_M7")),
+	KEEMBAY_PIN_DESC(68, "GPIO68",
+			 KEEMBAY_MUX(0x0, "I3C1_M0"),
+			 KEEMBAY_MUX(0x1, "ETH_M1"),
+			 KEEMBAY_MUX(0x2, "I2C1_M2"),
+			 KEEMBAY_MUX(0x3, "TPIU_M3"),
+			 KEEMBAY_MUX(0x4, "SPI0_M4"),
+			 KEEMBAY_MUX(0x5, "LCD_M5"),
+			 KEEMBAY_MUX(0x6, "I2S3_M6"),
+			 KEEMBAY_MUX(0x7, "GPIO_M7")),
+	KEEMBAY_PIN_DESC(69, "GPIO69",
+			 KEEMBAY_MUX(0x0, "I3C2_M0"),
+			 KEEMBAY_MUX(0x1, "ETH_M1"),
+			 KEEMBAY_MUX(0x2, "I2C1_M2"),
+			 KEEMBAY_MUX(0x3, "TPIU_M3"),
+			 KEEMBAY_MUX(0x4, "SPI0_M4"),
+			 KEEMBAY_MUX(0x5, "LCD_M5"),
+			 KEEMBAY_MUX(0x6, "I2S3_M6"),
+			 KEEMBAY_MUX(0x7, "GPIO_M7")),
+	KEEMBAY_PIN_DESC(70, "GPIO70",
+			 KEEMBAY_MUX(0x0, "I3C2_M0"),
+			 KEEMBAY_MUX(0x1, "ETH_M1"),
+			 KEEMBAY_MUX(0x2, "SPI0_M2"),
+			 KEEMBAY_MUX(0x3, "TPIU_M3"),
+			 KEEMBAY_MUX(0x4, "SD0_M4"),
+			 KEEMBAY_MUX(0x5, "LCD_M5"),
+			 KEEMBAY_MUX(0x6, "I2S3_M6"),
+			 KEEMBAY_MUX(0x7, "GPIO_M7")),
+	KEEMBAY_PIN_DESC(71, "GPIO71",
+			 KEEMBAY_MUX(0x0, "I3C0_M0"),
+			 KEEMBAY_MUX(0x1, "ETH_M1"),
+			 KEEMBAY_MUX(0x2, "SLVDS1_M2"),
+			 KEEMBAY_MUX(0x3, "TPIU_M3"),
+			 KEEMBAY_MUX(0x4, "SD0_M4"),
+			 KEEMBAY_MUX(0x5, "LCD_M5"),
+			 KEEMBAY_MUX(0x6, "I2S3_M6"),
+			 KEEMBAY_MUX(0x7, "GPIO_M7")),
+	KEEMBAY_PIN_DESC(72, "GPIO72",
+			 KEEMBAY_MUX(0x0, "I3C1_M0"),
+			 KEEMBAY_MUX(0x1, "ETH_M1"),
+			 KEEMBAY_MUX(0x2, "SLVDS1_M2"),
+			 KEEMBAY_MUX(0x3, "TPIU_M3"),
+			 KEEMBAY_MUX(0x4, "SD0_M4"),
+			 KEEMBAY_MUX(0x5, "LCD_M5"),
+			 KEEMBAY_MUX(0x6, "UART2_M6"),
+			 KEEMBAY_MUX(0x7, "GPIO_M7")),
+	KEEMBAY_PIN_DESC(73, "GPIO73",
+			 KEEMBAY_MUX(0x0, "I3C2_M0"),
+			 KEEMBAY_MUX(0x1, "ETH_M1"),
+			 KEEMBAY_MUX(0x2, "SLVDS1_M2"),
+			 KEEMBAY_MUX(0x3, "TPIU_M3"),
+			 KEEMBAY_MUX(0x4, "SD0_M4"),
+			 KEEMBAY_MUX(0x5, "LCD_M5"),
+			 KEEMBAY_MUX(0x6, "UART2_M6"),
+			 KEEMBAY_MUX(0x7, "GPIO_M7")),
+	KEEMBAY_PIN_DESC(74, "GPIO74",
+			 KEEMBAY_MUX(0x0, "I3C0_M0"),
+			 KEEMBAY_MUX(0x1, "ETH_M1"),
+			 KEEMBAY_MUX(0x2, "SLVDS1_M2"),
+			 KEEMBAY_MUX(0x3, "TPIU_M3"),
+			 KEEMBAY_MUX(0x4, "SD0_M4"),
+			 KEEMBAY_MUX(0x5, "LCD_M5"),
+			 KEEMBAY_MUX(0x6, "UART2_M6"),
+			 KEEMBAY_MUX(0x7, "GPIO_M7")),
+	KEEMBAY_PIN_DESC(75, "GPIO75",
+			 KEEMBAY_MUX(0x0, "I3C0_M0"),
+			 KEEMBAY_MUX(0x1, "ETH_M1"),
+			 KEEMBAY_MUX(0x2, "SLVDS1_M2"),
+			 KEEMBAY_MUX(0x3, "TPIU_M3"),
+			 KEEMBAY_MUX(0x4, "SD0_M4"),
+			 KEEMBAY_MUX(0x5, "LCD_M5"),
+			 KEEMBAY_MUX(0x6, "UART2_M6"),
+			 KEEMBAY_MUX(0x7, "GPIO_M7")),
+	KEEMBAY_PIN_DESC(76, "GPIO76",
+			 KEEMBAY_MUX(0x0, "I2C2_M0"),
+			 KEEMBAY_MUX(0x1, "I3C0_M1"),
+			 KEEMBAY_MUX(0x2, "SLVDS1_M2"),
+			 KEEMBAY_MUX(0x3, "TPIU_M3"),
+			 KEEMBAY_MUX(0x4, "ETH_M4"),
+			 KEEMBAY_MUX(0x5, "LCD_M5"),
+			 KEEMBAY_MUX(0x6, "UART3_M6"),
+			 KEEMBAY_MUX(0x7, "GPIO_M7")),
+	KEEMBAY_PIN_DESC(77, "GPIO77",
+			 KEEMBAY_MUX(0x0, "PCIE_M0"),
+			 KEEMBAY_MUX(0x1, "I3C1_M1"),
+			 KEEMBAY_MUX(0x2, "SLVDS1_M2"),
+			 KEEMBAY_MUX(0x3, "TPIU_M3"),
+			 KEEMBAY_MUX(0x4, "I3C2_M4"),
+			 KEEMBAY_MUX(0x5, "LCD_M5"),
+			 KEEMBAY_MUX(0x6, "UART3_M6"),
+			 KEEMBAY_MUX(0x7, "GPIO_M7")),
+	KEEMBAY_PIN_DESC(78, "GPIO78",
+			 KEEMBAY_MUX(0x0, "PCIE_M0"),
+			 KEEMBAY_MUX(0x1, "I3C2_M1"),
+			 KEEMBAY_MUX(0x2, "SLVDS1_M2"),
+			 KEEMBAY_MUX(0x3, "TPIU_M3"),
+			 KEEMBAY_MUX(0x4, "I3C2_M4"),
+			 KEEMBAY_MUX(0x5, "LCD_M5"),
+			 KEEMBAY_MUX(0x6, "UART3_M6"),
+			 KEEMBAY_MUX(0x7, "GPIO_M7")),
+	KEEMBAY_PIN_DESC(79, "GPIO79",
+			 KEEMBAY_MUX(0x0, "PCIE_M0"),
+			 KEEMBAY_MUX(0x1, "I2C2_M1"),
+			 KEEMBAY_MUX(0x2, "SLVDS1_M2"),
+			 KEEMBAY_MUX(0x3, "TPIU_M3"),
+			 KEEMBAY_MUX(0x4, "I3C2_M4"),
+			 KEEMBAY_MUX(0x5, "LCD_M5"),
+			 KEEMBAY_MUX(0x6, "UART3_M6"),
+			 KEEMBAY_MUX(0x7, "GPIO_M7")),
+};
+
+static inline u32 keembay_read_reg(void __iomem *base, unsigned int pin)
+{
+	return readl(base + KEEMBAY_GPIO_REG_OFFSET(pin));
+}
+
+static inline u32 keembay_read_gpio_reg(void __iomem *base, unsigned int pin)
+{
+	return keembay_read_reg(base, pin / KEEMBAY_GPIO_MAX_PER_REG);
+}
+
+static inline u32 keembay_read_pin(void __iomem *base, unsigned int pin)
+{
+	u32 val = keembay_read_gpio_reg(base, pin);
+
+	return !!(val & BIT(pin % KEEMBAY_GPIO_MAX_PER_REG));
+}
+
+static inline void keembay_write_reg(u32 val, void __iomem *base, unsigned int pin)
+{
+	writel(val, base + KEEMBAY_GPIO_REG_OFFSET(pin));
+}
+
+static inline void keembay_write_gpio_reg(u32 val, void __iomem *base, unsigned int pin)
+{
+	keembay_write_reg(val, base, pin / KEEMBAY_GPIO_MAX_PER_REG);
+}
+
+static void keembay_gpio_invert(struct keembay_pinctrl *kpc, unsigned int pin)
+{
+	unsigned int val = keembay_read_reg(kpc->base1 + KEEMBAY_GPIO_MODE, pin);
+
+	/*
+	 * This IP doesn't support the falling edge and low level interrupt
+	 * trigger. Invert API is used to mimic the falling edge and low
+	 * level support
+	 */
+
+	val |= FIELD_PREP(KEEMBAY_GPIO_MODE_INV_MASK, KEEMBAY_GPIO_MODE_INV_VAL);
+	keembay_write_reg(val, kpc->base1 + KEEMBAY_GPIO_MODE, pin);
+}
+
+static void keembay_gpio_restore_default(struct keembay_pinctrl *kpc, unsigned int pin)
+{
+	unsigned int val = keembay_read_reg(kpc->base1 + KEEMBAY_GPIO_MODE, pin);
+
+	val &= FIELD_PREP(KEEMBAY_GPIO_MODE_INV_MASK, 0);
+	keembay_write_reg(val, kpc->base1 + KEEMBAY_GPIO_MODE, pin);
+}
+
+static int keembay_request_gpio(struct pinctrl_dev *pctldev,
+				struct pinctrl_gpio_range *range, unsigned int pin)
+{
+	struct keembay_pinctrl *kpc = pinctrl_dev_get_drvdata(pctldev);
+	unsigned int val;
+
+	if (pin >= kpc->npins)
+		return -EINVAL;
+
+	val = keembay_read_reg(kpc->base1 + KEEMBAY_GPIO_MODE, pin);
+	val = FIELD_GET(KEEMBAY_GPIO_MODE_SELECT_MASK, val);
+
+	/* As per Pin Mux Map, Modes 0 to 6 are for peripherals */
+	if (val != KEEMBAY_GPIO_MODE_DEFAULT)
+		return -EBUSY;
+
+	return 0;
+}
+
+static int keembay_set_mux(struct pinctrl_dev *pctldev, unsigned int fun_sel,
+			   unsigned int grp_sel)
+{
+	struct keembay_pinctrl *kpc = pinctrl_dev_get_drvdata(pctldev);
+	struct function_desc *func;
+	struct group_desc *grp;
+	unsigned int val;
+	u8 pin_mode;
+	int pin;
+
+	grp = pinctrl_generic_get_group(pctldev, grp_sel);
+	if (!grp)
+		return -EINVAL;
+
+	func = pinmux_generic_get_function(pctldev, fun_sel);
+	if (!func)
+		return -EINVAL;
+
+	/* Change modes for pins in the selected group */
+	pin = *grp->pins;
+	pin_mode = *(u8 *)(func->data);
+
+	val = keembay_read_reg(kpc->base1 + KEEMBAY_GPIO_MODE, pin);
+	val = u32_replace_bits(val, pin_mode, KEEMBAY_GPIO_MODE_SELECT_MASK);
+	keembay_write_reg(val, kpc->base1 + KEEMBAY_GPIO_MODE, pin);
+
+	return 0;
+}
+
+static u32 keembay_pinconf_get_pull(struct keembay_pinctrl *kpc, unsigned int pin)
+{
+	unsigned int val = keembay_read_reg(kpc->base1 + KEEMBAY_GPIO_MODE, pin);
+
+	return FIELD_GET(KEEMBAY_GPIO_MODE_PULLUP_MASK, val);
+}
+
+static int keembay_pinconf_set_pull(struct keembay_pinctrl *kpc, unsigned int pin,
+				    unsigned int pull)
+{
+	unsigned int val = keembay_read_reg(kpc->base1 + KEEMBAY_GPIO_MODE, pin);
+
+	val = u32_replace_bits(val, pull, KEEMBAY_GPIO_MODE_PULLUP_MASK);
+	keembay_write_reg(val, kpc->base1 + KEEMBAY_GPIO_MODE, pin);
+
+	return 0;
+}
+
+static int keembay_pinconf_get_drive(struct keembay_pinctrl *kpc, unsigned int pin)
+{
+	unsigned int val = keembay_read_reg(kpc->base1 + KEEMBAY_GPIO_MODE, pin);
+
+	val = FIELD_GET(KEEMBAY_GPIO_MODE_DRIVE_MASK, val) * 4;
+	if (val)
+		return val;
+
+	return KEEMBAY_GPIO_MIN_STRENGTH;
+}
+
+static int keembay_pinconf_set_drive(struct keembay_pinctrl *kpc, unsigned int pin,
+				     unsigned int drive)
+{
+	unsigned int val = keembay_read_reg(kpc->base1 + KEEMBAY_GPIO_MODE, pin);
+	unsigned int strength = clamp_val(drive, KEEMBAY_GPIO_MIN_STRENGTH,
+				 KEEMBAY_GPIO_MAX_STRENGTH) / 4;
+
+	val = u32_replace_bits(val, strength, KEEMBAY_GPIO_MODE_DRIVE_MASK);
+	keembay_write_reg(val, kpc->base1 + KEEMBAY_GPIO_MODE, pin);
+
+	return 0;
+}
+
+static int keembay_pinconf_get_slew_rate(struct keembay_pinctrl *kpc, unsigned int pin)
+{
+	unsigned int val = keembay_read_reg(kpc->base1 + KEEMBAY_GPIO_MODE, pin);
+
+	return !!(val & KEEMBAY_GPIO_MODE_SLEW_RATE);
+}
+
+static int keembay_pinconf_set_slew_rate(struct keembay_pinctrl *kpc, unsigned int pin,
+					 unsigned int slew_rate)
+{
+	unsigned int val = keembay_read_reg(kpc->base1 + KEEMBAY_GPIO_MODE, pin);
+
+	if (slew_rate)
+		val |= KEEMBAY_GPIO_MODE_SLEW_RATE;
+	else
+		val &= ~KEEMBAY_GPIO_MODE_SLEW_RATE;
+
+	keembay_write_reg(val, kpc->base1 + KEEMBAY_GPIO_MODE, pin);
+
+	return 0;
+}
+
+static int keembay_pinconf_get_schmitt(struct keembay_pinctrl *kpc, unsigned int pin)
+{
+	unsigned int val = keembay_read_reg(kpc->base1 + KEEMBAY_GPIO_MODE, pin);
+
+	return !!(val & KEEMBAY_GPIO_MODE_SCHMITT_EN);
+}
+
+static int keembay_pinconf_set_schmitt(struct keembay_pinctrl *kpc, unsigned int pin,
+				       unsigned int schmitt_en)
+{
+	unsigned int val = keembay_read_reg(kpc->base1 + KEEMBAY_GPIO_MODE, pin);
+
+	if (schmitt_en)
+		val |= KEEMBAY_GPIO_MODE_SCHMITT_EN;
+	else
+		val &= ~KEEMBAY_GPIO_MODE_SCHMITT_EN;
+
+	keembay_write_reg(val, kpc->base1 + KEEMBAY_GPIO_MODE, pin);
+
+	return 0;
+}
+
+static int keembay_pinconf_get(struct pinctrl_dev *pctldev, unsigned int pin,
+			       unsigned long *cfg)
+{
+	struct keembay_pinctrl *kpc = pinctrl_dev_get_drvdata(pctldev);
+	unsigned int param = pinconf_to_config_param(*cfg);
+	unsigned int val;
+
+	if (pin >= kpc->npins)
+		return -EINVAL;
+
+	switch (param) {
+	case PIN_CONFIG_BIAS_DISABLE:
+		if (keembay_pinconf_get_pull(kpc, pin) != KEEMBAY_GPIO_DISABLE)
+			return -EINVAL;
+		break;
+
+	case PIN_CONFIG_BIAS_PULL_UP:
+		if (keembay_pinconf_get_pull(kpc, pin) != KEEMBAY_GPIO_PULL_UP)
+			return -EINVAL;
+		break;
+
+	case PIN_CONFIG_BIAS_PULL_DOWN:
+		if (keembay_pinconf_get_pull(kpc, pin) != KEEMBAY_GPIO_PULL_DOWN)
+			return -EINVAL;
+		break;
+
+	case PIN_CONFIG_BIAS_BUS_HOLD:
+		if (keembay_pinconf_get_pull(kpc, pin) != KEEMBAY_GPIO_BUS_HOLD)
+			return -EINVAL;
+		break;
+
+	case PIN_CONFIG_INPUT_SCHMITT_ENABLE:
+		if (!keembay_pinconf_get_schmitt(kpc, pin))
+			return -EINVAL;
+		break;
+
+	case PIN_CONFIG_SLEW_RATE:
+		val = keembay_pinconf_get_slew_rate(kpc, pin);
+		*cfg = pinconf_to_config_packed(param, val);
+		break;
+
+	case PIN_CONFIG_DRIVE_STRENGTH:
+		val = keembay_pinconf_get_drive(kpc, pin);
+		*cfg = pinconf_to_config_packed(param, val);
+		break;
+
+	default:
+		return -ENOTSUPP;
+	}
+
+	return 0;
+}
+
+static int keembay_pinconf_set(struct pinctrl_dev *pctldev, unsigned int pin,
+			       unsigned long *cfg, unsigned int num_configs)
+{
+	struct keembay_pinctrl *kpc = pinctrl_dev_get_drvdata(pctldev);
+	enum pin_config_param param;
+	unsigned int arg, i;
+	int ret = 0;
+
+	if (pin >= kpc->npins)
+		return -EINVAL;
+
+	for (i = 0; i < num_configs; i++) {
+		param = pinconf_to_config_param(cfg[i]);
+		arg = pinconf_to_config_argument(cfg[i]);
+
+		switch (param) {
+		case PIN_CONFIG_BIAS_DISABLE:
+			ret = keembay_pinconf_set_pull(kpc, pin, KEEMBAY_GPIO_DISABLE);
+			break;
+
+		case PIN_CONFIG_BIAS_PULL_UP:
+			ret = keembay_pinconf_set_pull(kpc, pin, KEEMBAY_GPIO_PULL_UP);
+			break;
+
+		case PIN_CONFIG_BIAS_PULL_DOWN:
+			ret = keembay_pinconf_set_pull(kpc, pin, KEEMBAY_GPIO_PULL_DOWN);
+			break;
+
+		case PIN_CONFIG_BIAS_BUS_HOLD:
+			ret = keembay_pinconf_set_pull(kpc, pin, KEEMBAY_GPIO_BUS_HOLD);
+			break;
+
+		case PIN_CONFIG_INPUT_SCHMITT_ENABLE:
+			ret = keembay_pinconf_set_schmitt(kpc, pin, arg);
+			break;
+
+		case PIN_CONFIG_SLEW_RATE:
+			ret = keembay_pinconf_set_slew_rate(kpc, pin, arg);
+			break;
+
+		case PIN_CONFIG_DRIVE_STRENGTH:
+			ret = keembay_pinconf_set_drive(kpc, pin, arg);
+			break;
+
+		default:
+			return -ENOTSUPP;
+		}
+		if (ret)
+			return ret;
+	}
+	return ret;
+}
+
+static const struct pinctrl_ops keembay_pctlops = {
+	.get_groups_count	= pinctrl_generic_get_group_count,
+	.get_group_name		= pinctrl_generic_get_group_name,
+	.get_group_pins		= pinctrl_generic_get_group_pins,
+	.dt_node_to_map		= pinconf_generic_dt_node_to_map_all,
+	.dt_free_map		= pinconf_generic_dt_free_map,
+};
+
+static const struct pinmux_ops keembay_pmxops = {
+	.get_functions_count	= pinmux_generic_get_function_count,
+	.get_function_name	= pinmux_generic_get_function_name,
+	.get_function_groups	= pinmux_generic_get_function_groups,
+	.gpio_request_enable	= keembay_request_gpio,
+	.set_mux		= keembay_set_mux,
+};
+
+static const struct pinconf_ops keembay_confops = {
+	.is_generic	= true,
+	.pin_config_get	= keembay_pinconf_get,
+	.pin_config_set	= keembay_pinconf_set,
+};
+
+static struct pinctrl_desc keembay_pinctrl_desc = {
+	.name		= "keembay-pinmux",
+	.pctlops	= &keembay_pctlops,
+	.pmxops		= &keembay_pmxops,
+	.confops	= &keembay_confops,
+	.owner		= THIS_MODULE,
+};
+
+static int keembay_gpio_get(struct gpio_chip *gc, unsigned int pin)
+{
+	struct keembay_pinctrl *kpc = gpiochip_get_data(gc);
+	unsigned int val, offset;
+
+	val = keembay_read_reg(kpc->base1 + KEEMBAY_GPIO_MODE, pin);
+	offset = (val & KEEMBAY_GPIO_MODE_DIR) ? KEEMBAY_GPIO_DATA_IN : KEEMBAY_GPIO_DATA_OUT;
+
+	return keembay_read_pin(kpc->base0 + offset, pin);
+}
+
+static void keembay_gpio_set(struct gpio_chip *gc, unsigned int pin, int val)
+{
+	struct keembay_pinctrl *kpc = gpiochip_get_data(gc);
+	unsigned int reg_val;
+
+	reg_val = keembay_read_gpio_reg(kpc->base0 + KEEMBAY_GPIO_DATA_OUT, pin);
+	if (val)
+		keembay_write_gpio_reg(reg_val | BIT(pin % KEEMBAY_GPIO_MAX_PER_REG),
+				       kpc->base0 + KEEMBAY_GPIO_DATA_HIGH, pin);
+	else
+		keembay_write_gpio_reg(~reg_val | BIT(pin % KEEMBAY_GPIO_MAX_PER_REG),
+				       kpc->base0 + KEEMBAY_GPIO_DATA_LOW, pin);
+}
+
+static int keembay_gpio_get_direction(struct gpio_chip *gc, unsigned int pin)
+{
+	struct keembay_pinctrl *kpc = gpiochip_get_data(gc);
+	unsigned int val = keembay_read_reg(kpc->base1 + KEEMBAY_GPIO_MODE, pin);
+
+	return !!(val & KEEMBAY_GPIO_MODE_DIR);
+}
+
+static int keembay_gpio_set_direction_in(struct gpio_chip *gc, unsigned int pin)
+{
+	struct keembay_pinctrl *kpc = gpiochip_get_data(gc);
+	unsigned int val;
+
+	val = keembay_read_reg(kpc->base1 + KEEMBAY_GPIO_MODE, pin);
+	val |= KEEMBAY_GPIO_MODE_DIR;
+	keembay_write_reg(val, kpc->base1 + KEEMBAY_GPIO_MODE, pin);
+
+	return 0;
+}
+
+static int keembay_gpio_set_direction_out(struct gpio_chip *gc,
+					  unsigned int pin, int value)
+{
+	struct keembay_pinctrl *kpc = gpiochip_get_data(gc);
+	unsigned int val;
+
+	val = keembay_read_reg(kpc->base1 + KEEMBAY_GPIO_MODE, pin);
+	val &= ~KEEMBAY_GPIO_MODE_DIR;
+	keembay_write_reg(val, kpc->base1 + KEEMBAY_GPIO_MODE, pin);
+	keembay_gpio_set(gc, pin, value);
+
+	return 0;
+}
+
+static void keembay_gpio_irq_handler(struct irq_desc *desc)
+{
+	struct gpio_chip *gc = irq_desc_get_handler_data(desc);
+	unsigned int kmb_irq = irq_desc_get_irq(desc);
+	unsigned long reg, clump = 0, bit = 0;
+	struct irq_chip *parent_chip;
+	struct keembay_pinctrl *kpc;
+	unsigned int src, pin, val;
+
+	/* Identify GPIO interrupt number from GIC interrupt number */
+	for (src = 0; src < KEEMBAY_GPIO_NUM_IRQ; src++) {
+		if (kmb_irq == gc->irq.parents[src])
+			break;
+	}
+
+	if (src == KEEMBAY_GPIO_NUM_IRQ)
+		return;
+
+	parent_chip = irq_desc_get_chip(desc);
+	kpc = gpiochip_get_data(gc);
+
+	chained_irq_enter(parent_chip, desc);
+	reg = keembay_read_reg(kpc->base1 + KEEMBAY_GPIO_INT_CFG, src);
+
+	/*
+	 * Each Interrupt line can be shared by up to 4 GPIO pins. Enable bit
+	 * and input values were checked to identify the source of the
+	 * Interrupt. The checked enable bit positions are 7, 15, 23 and 31.
+	 */
+	for_each_set_clump8(bit, clump, &reg, BITS_PER_TYPE(typeof(reg))) {
+		pin = clump & ~KEEMBAY_GPIO_IRQ_ENABLE;
+		val = keembay_read_pin(kpc->base0 + KEEMBAY_GPIO_DATA_IN, pin);
+		kmb_irq = irq_linear_revmap(gc->irq.domain, pin);
+
+		/* Checks if the interrupt is enabled */
+		if (val && (clump & KEEMBAY_GPIO_IRQ_ENABLE))
+			generic_handle_irq(kmb_irq);
+	}
+	chained_irq_exit(parent_chip, desc);
+}
+
+static void keembay_gpio_clear_irq(struct irq_data *data, unsigned long pos,
+				   u32 src, irq_hw_number_t pin)
+{
+	struct gpio_chip *gc = irq_data_get_irq_chip_data(data);
+	struct keembay_pinctrl *kpc = gpiochip_get_data(gc);
+	unsigned long trig = irqd_get_trigger_type(data);
+	struct keembay_gpio_irq *irq = &kpc->irq[src];
+	unsigned long val;
+
+	/* Check if the value of pos/KEEMBAY_GPIO_NUM_IRQ is in valid range. */
+	if ((pos / KEEMBAY_GPIO_NUM_IRQ) >= KEEMBAY_GPIO_MAX_PER_IRQ)
+		return;
+
+	/* Retains val register as it handles other interrupts as well. */
+	val = keembay_read_reg(kpc->base1 + KEEMBAY_GPIO_INT_CFG, src);
+
+	bitmap_set_value8(&val, 0, pos);
+	keembay_write_reg(val, kpc->base1 + KEEMBAY_GPIO_INT_CFG, src);
+
+	irq->num_share--;
+	irq->pins[pos / KEEMBAY_GPIO_NUM_IRQ] = 0;
+
+	if (trig & IRQ_TYPE_LEVEL_MASK)
+		keembay_gpio_restore_default(kpc, pin);
+
+	if (irq->trigger == IRQ_TYPE_LEVEL_HIGH)
+		kpc->max_gpios_level_type++;
+	else if (irq->trigger == IRQ_TYPE_EDGE_RISING)
+		kpc->max_gpios_edge_type++;
+}
+
+static int keembay_find_free_slot(struct keembay_pinctrl *kpc, unsigned int src)
+{
+	unsigned long val = keembay_read_reg(kpc->base1 + KEEMBAY_GPIO_INT_CFG, src);
+
+	return bitmap_find_free_region(&val, KEEMBAY_GPIO_MAX_PER_REG, 3) / KEEMBAY_GPIO_NUM_IRQ;
+}
+
+static int keembay_find_free_src(struct keembay_pinctrl *kpc, unsigned int trig)
+{
+	int src, type = 0;
+
+	if (trig & IRQ_TYPE_LEVEL_MASK)
+		type = IRQ_TYPE_LEVEL_HIGH;
+	else if (trig & IRQ_TYPE_EDGE_BOTH)
+		type = IRQ_TYPE_EDGE_RISING;
+
+	for (src = 0; src < KEEMBAY_GPIO_NUM_IRQ; src++) {
+		if (kpc->irq[src].trigger != type)
+			continue;
+
+		if (!keembay_read_reg(kpc->base1 + KEEMBAY_GPIO_INT_CFG, src) ||
+		    kpc->irq[src].num_share < KEEMBAY_GPIO_MAX_PER_IRQ)
+			return src;
+	}
+
+	return -EBUSY;
+}
+
+static void keembay_gpio_set_irq(struct keembay_pinctrl *kpc, int src,
+				 int slot, irq_hw_number_t pin)
+{
+	unsigned long val = pin | KEEMBAY_GPIO_IRQ_ENABLE;
+	struct keembay_gpio_irq *irq = &kpc->irq[src];
+	unsigned long flags, reg;
+
+	raw_spin_lock_irqsave(&kpc->lock, flags);
+	reg = keembay_read_reg(kpc->base1 + KEEMBAY_GPIO_INT_CFG, src);
+	bitmap_set_value8(&reg, val, slot * 8);
+	keembay_write_reg(reg, kpc->base1 + KEEMBAY_GPIO_INT_CFG, src);
+	raw_spin_unlock_irqrestore(&kpc->lock, flags);
+
+	if (irq->trigger == IRQ_TYPE_LEVEL_HIGH)
+		kpc->max_gpios_level_type--;
+	else if (irq->trigger == IRQ_TYPE_EDGE_RISING)
+		kpc->max_gpios_edge_type--;
+
+	irq->source = src;
+	irq->pins[slot] = pin;
+	irq->num_share++;
+}
+
+static void keembay_gpio_irq_enable(struct irq_data *data)
+{
+	struct gpio_chip *gc = irq_data_get_irq_chip_data(data);
+	struct keembay_pinctrl *kpc = gpiochip_get_data(gc);
+	unsigned int trig = irqd_get_trigger_type(data);
+	irq_hw_number_t pin = irqd_to_hwirq(data);
+	int src, slot;
+
+	/* Check which Interrupt source and slot is available */
+	src = keembay_find_free_src(kpc, trig);
+	slot = keembay_find_free_slot(kpc, src);
+
+	if (src < 0 || slot < 0)
+		return;
+
+	if (trig & KEEMBAY_GPIO_SENSE_LOW)
+		keembay_gpio_invert(kpc, pin);
+
+	keembay_gpio_set_irq(kpc, src, slot, pin);
+}
+
+static void keembay_gpio_irq_ack(struct irq_data *data)
+{
+	/*
+	 * The keembay_gpio_irq_ack function is needed to handle_edge_irq.
+	 * IRQ ack is not possible from the SOC perspective. The IP by itself
+	 * is used for handling interrupts which do not come in short-time and
+	 * not used as protocol or communication interrupts. All the interrupts
+	 * are threaded IRQ interrupts. But this function is expected to be
+	 * present as the gpio IP is registered with irq framework. Otherwise
+	 * handle_edge_irq() fails.
+	 */
+}
+
+static void keembay_gpio_irq_disable(struct irq_data *data)
+{
+	struct gpio_chip *gc = irq_data_get_irq_chip_data(data);
+	struct keembay_pinctrl *kpc = gpiochip_get_data(gc);
+	irq_hw_number_t pin = irqd_to_hwirq(data);
+	unsigned long reg, clump = 0, pos = 0;
+	unsigned int src;
+
+	for (src = 0; src < KEEMBAY_GPIO_NUM_IRQ; src++) {
+		reg = keembay_read_reg(kpc->base1 + KEEMBAY_GPIO_INT_CFG, src);
+		for_each_set_clump8(pos, clump, &reg, BITS_PER_TYPE(typeof(reg))) {
+			if ((clump & ~KEEMBAY_GPIO_IRQ_ENABLE) == pin) {
+				keembay_gpio_clear_irq(data, pos, src, pin);
+				return;
+			}
+		}
+	}
+}
+
+static int keembay_gpio_irq_set_type(struct irq_data *data, unsigned int type)
+{
+	struct gpio_chip *gc = irq_data_get_irq_chip_data(data);
+	struct keembay_pinctrl *kpc = gpiochip_get_data(gc);
+
+	/* Change EDGE_BOTH as EDGE_RISING in order to claim the IRQ for power button */
+	if (!kpc->max_gpios_edge_type && (type & IRQ_TYPE_EDGE_BOTH))
+		type = IRQ_TYPE_EDGE_RISING;
+
+	if (!kpc->max_gpios_level_type && (type & IRQ_TYPE_LEVEL_MASK))
+		type = IRQ_TYPE_NONE;
+
+	if (type & IRQ_TYPE_EDGE_BOTH)
+		irq_set_handler_locked(data, handle_edge_irq);
+	else if (type & IRQ_TYPE_LEVEL_MASK)
+		irq_set_handler_locked(data, handle_level_irq);
+	else
+		return -EINVAL;
+
+	return 0;
+}
+
+static int keembay_gpio_add_pin_ranges(struct gpio_chip *chip)
+{
+	struct keembay_pinctrl *kpc = gpiochip_get_data(chip);
+	int ret;
+
+	ret = gpiochip_add_pin_range(chip, dev_name(kpc->dev), 0, 0, chip->ngpio);
+	if (ret)
+		dev_err_probe(kpc->dev, ret, "failed to add GPIO pin range\n");
+	return ret;
+}
+
+static struct irq_chip keembay_gpio_irqchip = {
+	.name = "keembay-gpio",
+	.irq_enable = keembay_gpio_irq_enable,
+	.irq_disable = keembay_gpio_irq_disable,
+	.irq_set_type = keembay_gpio_irq_set_type,
+	.irq_ack = keembay_gpio_irq_ack,
+};
+
+static int keembay_gpiochip_probe(struct keembay_pinctrl *kpc,
+				  struct platform_device *pdev)
+{
+	unsigned int i, level_line = 0, edge_line = 0;
+	struct gpio_chip *gc = &kpc->chip;
+	struct gpio_irq_chip *girq;
+
+	/* Setup GPIO IRQ chip */
+	girq			= &kpc->chip.irq;
+	girq->chip		= &keembay_gpio_irqchip;
+	girq->parent_handler	= keembay_gpio_irq_handler;
+	girq->num_parents	= KEEMBAY_GPIO_NUM_IRQ;
+	girq->parents		= devm_kcalloc(kpc->dev, girq->num_parents,
+					       sizeof(*girq->parents), GFP_KERNEL);
+
+	if (!girq->parents)
+		return -ENOMEM;
+
+	/* Setup GPIO chip */
+	gc->label		= dev_name(kpc->dev);
+	gc->parent		= kpc->dev;
+	gc->request		= gpiochip_generic_request;
+	gc->free		= gpiochip_generic_free;
+	gc->get_direction	= keembay_gpio_get_direction;
+	gc->direction_input	= keembay_gpio_set_direction_in;
+	gc->direction_output	= keembay_gpio_set_direction_out;
+	gc->get			= keembay_gpio_get;
+	gc->set			= keembay_gpio_set;
+	gc->set_config		= gpiochip_generic_config;
+	gc->base		= -1;
+	gc->ngpio		= kpc->npins;
+	gc->add_pin_ranges	= keembay_gpio_add_pin_ranges;
+
+	for (i = 0; i < KEEMBAY_GPIO_NUM_IRQ; i++) {
+		struct keembay_gpio_irq *kmb_irq = &kpc->irq[i];
+		int irq;
+
+		irq = platform_get_irq_optional(pdev, i);
+		if (irq <= 0)
+			continue;
+
+		girq->parents[i]	= irq;
+		kmb_irq->line	= girq->parents[i];
+		kmb_irq->source	= i;
+		kmb_irq->trigger	= irq_get_trigger_type(girq->parents[i]);
+		kmb_irq->num_share	= 0;
+
+		if (kmb_irq->trigger == IRQ_TYPE_LEVEL_HIGH)
+			level_line++;
+		else
+			edge_line++;
+	}
+
+	kpc->max_gpios_level_type = level_line * KEEMBAY_GPIO_MAX_PER_IRQ;
+	kpc->max_gpios_edge_type = edge_line * KEEMBAY_GPIO_MAX_PER_IRQ;
+
+	girq->default_type = IRQ_TYPE_NONE;
+	girq->handler = handle_bad_irq;
+
+	return devm_gpiochip_add_data(kpc->dev, gc, kpc);
+}
+
+static int keembay_build_groups(struct keembay_pinctrl *kpc)
+{
+	struct group_desc *grp;
+	unsigned int i;
+
+	kpc->ngroups = kpc->npins;
+	grp = devm_kcalloc(kpc->dev, kpc->ngroups, sizeof(*grp), GFP_KERNEL);
+	if (!grp)
+		return -ENOMEM;
+
+	/* Each pin is categorised as one group */
+	for (i = 0; i < kpc->ngroups; i++) {
+		const struct pinctrl_pin_desc *pdesc = keembay_pins + i;
+		struct group_desc *kmb_grp = grp + i;
+
+		kmb_grp->name = pdesc->name;
+		kmb_grp->pins = (int *)&pdesc->number;
+		pinctrl_generic_add_group(kpc->pctrl, kmb_grp->name,
+					  kmb_grp->pins, 1, NULL);
+	}
+
+	return 0;
+}
+
+static int keembay_pinctrl_reg(struct keembay_pinctrl *kpc,  struct device *dev)
+{
+	int ret;
+
+	keembay_pinctrl_desc.pins = keembay_pins;
+	ret = of_property_read_u32(dev->of_node, "ngpios", &kpc->npins);
+	if (ret < 0)
+		return ret;
+	keembay_pinctrl_desc.npins = kpc->npins;
+
+	kpc->pctrl = devm_pinctrl_register(kpc->dev, &keembay_pinctrl_desc, kpc);
+
+	return PTR_ERR_OR_ZERO(kpc->pctrl);
+}
+
+static int keembay_add_functions(struct keembay_pinctrl *kpc,
+				 struct function_desc *function)
+{
+	unsigned int i;
+
+	/* Assign the groups for each function */
+	for (i = 0; i < kpc->npins; i++) {
+		const struct pinctrl_pin_desc *pdesc = keembay_pins + i;
+		struct keembay_mux_desc *mux = pdesc->drv_data;
+
+		while (mux->name) {
+			struct function_desc *func;
+			const char **grp;
+			size_t grp_size;
+			u32 j, grp_num;
+
+			for (j = 0; j < kpc->nfuncs; j++) {
+				if (!strcmp(mux->name, function[j].name))
+					break;
+			}
+
+			if (j == kpc->nfuncs)
+				return -EINVAL;
+
+			func = function + j;
+			grp_num = func->num_group_names;
+			grp_size = sizeof(*func->group_names);
+
+			if (!func->group_names) {
+				func->group_names = devm_kcalloc(kpc->dev,
+								 grp_num,
+								 grp_size,
+								 GFP_KERNEL);
+				if (!func->group_names)
+					return -ENOMEM;
+			}
+
+			grp = func->group_names;
+			while (*grp)
+				grp++;
+
+			*grp = pdesc->name;
+			mux++;
+		}
+	}
+
+	/* Add all functions */
+	for (i = 0; i < kpc->nfuncs; i++) {
+		pinmux_generic_add_function(kpc->pctrl,
+					    function[i].name,
+					    function[i].group_names,
+					    function[i].num_group_names,
+					    function[i].data);
+	}
+
+	return 0;
+}
+
+static int keembay_build_functions(struct keembay_pinctrl *kpc)
+{
+	struct function_desc *keembay_funcs, *new_funcs;
+	int i;
+
+	/* Allocate total number of functions */
+	kpc->nfuncs = 0;
+	keembay_funcs = kcalloc(kpc->npins * 8, sizeof(*keembay_funcs), GFP_KERNEL);
+	if (!keembay_funcs)
+		return -ENOMEM;
+
+	/* Find total number of functions and each's properties */
+	for (i = 0; i < kpc->npins; i++) {
+		const struct pinctrl_pin_desc *pdesc = keembay_pins + i;
+		struct keembay_mux_desc *mux = pdesc->drv_data;
+
+		while (mux->name) {
+			struct function_desc *fdesc = keembay_funcs;
+
+			while (fdesc->name) {
+				if (!strcmp(mux->name, fdesc->name)) {
+					fdesc->num_group_names++;
+					break;
+				}
+
+				fdesc++;
+			}
+
+			if (!fdesc->name) {
+				fdesc->name = mux->name;
+				fdesc->num_group_names = 1;
+				fdesc->data = &mux->mode;
+				kpc->nfuncs++;
+			}
+
+			mux++;
+		}
+	}
+
+	/* Reallocate memory based on actual number of functions */
+	new_funcs = krealloc(keembay_funcs, kpc->nfuncs * sizeof(*new_funcs), GFP_KERNEL);
+	if (!new_funcs) {
+		kfree(keembay_funcs);
+		return -ENOMEM;
+	}
+
+	return keembay_add_functions(kpc, new_funcs);
+}
+
+static const struct keembay_pin_soc keembay_data = {
+	.pins    = keembay_pins,
+};
+
+static const struct of_device_id keembay_pinctrl_match[] = {
+	{ .compatible = "intel,keembay-pinctrl", .data = &keembay_data },
+	{ }
+};
+MODULE_DEVICE_TABLE(of, keembay_pinctrl_match);
+
+static int keembay_pinctrl_probe(struct platform_device *pdev)
+{
+	struct device *dev = &pdev->dev;
+	struct keembay_pinctrl *kpc;
+	int ret;
+
+	kpc = devm_kzalloc(dev, sizeof(*kpc), GFP_KERNEL);
+	if (!kpc)
+		return -ENOMEM;
+
+	kpc->dev = dev;
+	kpc->soc = device_get_match_data(dev);
+
+	kpc->base0 = devm_platform_ioremap_resource(pdev, 0);
+	if (IS_ERR(kpc->base0))
+		return PTR_ERR(kpc->base0);
+
+	kpc->base1 = devm_platform_ioremap_resource(pdev, 1);
+	if (IS_ERR(kpc->base1))
+		return PTR_ERR(kpc->base1);
+
+	raw_spin_lock_init(&kpc->lock);
+
+	ret = keembay_pinctrl_reg(kpc, dev);
+	if (ret)
+		return ret;
+
+	ret = keembay_build_groups(kpc);
+	if (ret)
+		return ret;
+
+	ret = keembay_build_functions(kpc);
+	if (ret)
+		return ret;
+
+	ret = keembay_gpiochip_probe(kpc, pdev);
+	if (ret)
+		return ret;
+
+	platform_set_drvdata(pdev, kpc);
+
+	return 0;
+}
+
+static struct platform_driver keembay_pinctrl_driver = {
+	.probe = keembay_pinctrl_probe,
+	.driver = {
+		.name = "keembay-pinctrl",
+		.of_match_table = keembay_pinctrl_match,
+	},
+};
+module_platform_driver(keembay_pinctrl_driver);
+
+MODULE_AUTHOR("Muhammad Husaini Zulkifli <muhammad.husaini.zulkifli@intel.com>");
+MODULE_AUTHOR("Vijayakannan Ayyathurai <vijayakannan.ayyathurai@intel.com>");
+MODULE_AUTHOR("Lakshmi Sowjanya D <lakshmi.sowjanya.d@intel.com>");
+MODULE_DESCRIPTION("Intel Keem Bay SoC pinctrl/GPIO driver");
+MODULE_LICENSE("GPL");
diff --git a/drivers/pinctrl/pinctrl-single.c b/drivers/pinctrl/pinctrl-single.c
index aa6e72214609..67bec7ea0f8b 100644
--- a/drivers/pinctrl/pinctrl-single.c
+++ b/drivers/pinctrl/pinctrl-single.c
@@ -1115,7 +1115,7 @@ static int pcs_parse_bits_in_pinctrl_entry(struct pcs_device *pcs,
 {
 	const char *name = "pinctrl-single,bits";
 	struct pcs_func_vals *vals;
-	int rows, *pins, found = 0, res = -ENOMEM, i, fsel, gsel;
+	int rows, *pins, found = 0, res = -ENOMEM, i, fsel;
 	int npins_in_row;
 	struct pcs_function *function = NULL;
 
@@ -1125,6 +1125,11 @@ static int pcs_parse_bits_in_pinctrl_entry(struct pcs_device *pcs,
 		return -EINVAL;
 	}
 
+	if (PCS_HAS_PINCONF) {
+		dev_err(pcs->dev, "pinconf not supported\n");
+		return -ENOTSUPP;
+	}
+
 	npins_in_row = pcs->width / pcs->bits_per_pin;
 
 	vals = devm_kzalloc(pcs->dev,
@@ -1212,29 +1217,19 @@ static int pcs_parse_bits_in_pinctrl_entry(struct pcs_device *pcs,
 		goto free_pins;
 	}
 
-	gsel = pinctrl_generic_add_group(pcs->pctl, np->name, pins, found, pcs);
-	if (gsel < 0) {
-		res = gsel;
+	res = pinctrl_generic_add_group(pcs->pctl, np->name, pins, found, pcs);
+	if (res < 0)
 		goto free_function;
-	}
 
 	(*map)->type = PIN_MAP_TYPE_MUX_GROUP;
 	(*map)->data.mux.group = np->name;
 	(*map)->data.mux.function = np->name;
 
-	if (PCS_HAS_PINCONF) {
-		dev_err(pcs->dev, "pinconf not supported\n");
-		goto free_pingroups;
-	}
-
 	*num_maps = 1;
 	mutex_unlock(&pcs->mutex);
 
 	return 0;
 
-free_pingroups:
-	pinctrl_generic_remove_group(pcs->pctl, gsel);
-	*num_maps = 1;
 free_function:
 	pinmux_generic_remove_function(pcs->pctl, fsel);
 free_pins:
diff --git a/drivers/pinctrl/pinctrl-stmfx.c b/drivers/pinctrl/pinctrl-stmfx.c
index 008c83107a3c..5fa2488fae87 100644
--- a/drivers/pinctrl/pinctrl-stmfx.c
+++ b/drivers/pinctrl/pinctrl-stmfx.c
@@ -566,7 +566,7 @@ static irqreturn_t stmfx_pinctrl_irq_thread_fn(int irq, void *dev_id)
 	u8 pending[NR_GPIO_REGS];
 	u8 src[NR_GPIO_REGS] = {0, 0, 0};
 	unsigned long n, status;
-	int ret;
+	int i, ret;
 
 	ret = regmap_bulk_read(pctl->stmfx->map, STMFX_REG_IRQ_GPI_PENDING,
 			       &pending, NR_GPIO_REGS);
@@ -576,7 +576,9 @@ static irqreturn_t stmfx_pinctrl_irq_thread_fn(int irq, void *dev_id)
 	regmap_bulk_write(pctl->stmfx->map, STMFX_REG_IRQ_GPI_SRC,
 			  src, NR_GPIO_REGS);
 
-	status = *(unsigned long *)pending;
+	BUILD_BUG_ON(NR_GPIO_REGS > sizeof(status));
+	for (i = 0, status = 0; i < NR_GPIO_REGS; i++)
+		status |= (unsigned long)pending[i] << (i * 8);
 	for_each_set_bit(n, &status, gc->ngpio) {
 		handle_nested_irq(irq_find_mapping(gc->irq.domain, n));
 		stmfx_pinctrl_irq_toggle_trigger(pctl, n);
diff --git a/drivers/pinctrl/pinctrl-zynq.c b/drivers/pinctrl/pinctrl-zynq.c
index 5fb924a2eedd..a96af8a76a7a 100644
--- a/drivers/pinctrl/pinctrl-zynq.c
+++ b/drivers/pinctrl/pinctrl-zynq.c
@@ -1028,6 +1028,7 @@ static int zynq_pinconf_cfg_get(struct pinctrl_dev *pctldev,
 		break;
 	}
 	case PIN_CONFIG_IOSTANDARD:
+	case PIN_CONFIG_POWER_SOURCE:
 		arg = zynq_pinconf_iostd_get(reg);
 		break;
 	default:
@@ -1078,6 +1079,7 @@ static int zynq_pinconf_cfg_set(struct pinctrl_dev *pctldev,
 
 			break;
 		case PIN_CONFIG_IOSTANDARD:
+		case PIN_CONFIG_POWER_SOURCE:
 			if (arg <= zynq_iostd_min || arg >= zynq_iostd_max) {
 				dev_warn(pctldev->dev,
 					 "unsupported IO standard '%u'\n",
diff --git a/drivers/pinctrl/pinctrl-zynqmp.c b/drivers/pinctrl/pinctrl-zynqmp.c
index bbde676b7313..e14012209992 100644
--- a/drivers/pinctrl/pinctrl-zynqmp.c
+++ b/drivers/pinctrl/pinctrl-zynqmp.c
@@ -866,15 +866,6 @@ static int zynqmp_pinctrl_probe(struct platform_device *pdev)
 	return ret;
 }
 
-static int zynqmp_pinctrl_remove(struct platform_device *pdev)
-{
-	struct zynqmp_pinctrl *pctrl = platform_get_drvdata(pdev);
-
-	pinctrl_unregister(pctrl->pctrl);
-
-	return 0;
-}
-
 static const struct of_device_id zynqmp_pinctrl_of_match[] = {
 	{ .compatible = "xlnx,zynqmp-pinctrl" },
 	{ }
@@ -887,7 +878,6 @@ static struct platform_driver zynqmp_pinctrl_driver = {
 		.of_match_table = zynqmp_pinctrl_of_match,
 	},
 	.probe = zynqmp_pinctrl_probe,
-	.remove = zynqmp_pinctrl_remove,
 };
 module_platform_driver(zynqmp_pinctrl_driver);
 
diff --git a/drivers/pinctrl/qcom/Kconfig b/drivers/pinctrl/qcom/Kconfig
index cad4e60df618..32ea2a8ec02b 100644
--- a/drivers/pinctrl/qcom/Kconfig
+++ b/drivers/pinctrl/qcom/Kconfig
@@ -88,6 +88,14 @@ config PINCTRL_MSM8960
 	  This is the pinctrl, pinmux, pinconf and gpiolib driver for the
 	  Qualcomm TLMM block found in the Qualcomm 8960 platform.
 
+config PINCTRL_MDM9607
+	tristate "Qualcomm 9607 pin controller driver"
+	depends on GPIOLIB && OF
+	depends on PINCTRL_MSM
+	help
+	  This is the pinctrl, pinmux, pinconf and gpiolib driver for the
+	  Qualcomm TLMM block found in the Qualcomm 9607 platform.
+
 config PINCTRL_MDM9615
 	tristate "Qualcomm 9615 pin controller driver"
 	depends on OF
@@ -256,6 +264,15 @@ config PINCTRL_SDX55
 	 Qualcomm Technologies Inc TLMM block found on the Qualcomm
 	 Technologies Inc SDX55 platform.
 
+config PINCTRL_SM6115
+	tristate "Qualcomm Technologies Inc SM6115,SM4250 pin controller driver"
+	depends on GPIOLIB && OF
+	depends on PINCTRL_MSM
+	help
+	 This is the pinctrl, pinmux, pinconf and gpiolib driver for the
+	 Qualcomm Technologies Inc TLMM block found on the Qualcomm
+	 Technologies Inc SM6115 and SM4250 platforms.
+
 config PINCTRL_SM6125
 	tristate "Qualcomm Technologies Inc SM6125 pin controller driver"
 	depends on OF
diff --git a/drivers/pinctrl/qcom/Makefile b/drivers/pinctrl/qcom/Makefile
index d696fe2789bb..7a12e8cd2fba 100644
--- a/drivers/pinctrl/qcom/Makefile
+++ b/drivers/pinctrl/qcom/Makefile
@@ -19,6 +19,7 @@ obj-$(CONFIG_PINCTRL_MSM8996)   += pinctrl-msm8996.o
 obj-$(CONFIG_PINCTRL_MSM8998)   += pinctrl-msm8998.o
 obj-$(CONFIG_PINCTRL_QCS404)	+= pinctrl-qcs404.o
 obj-$(CONFIG_PINCTRL_QDF2XXX)	+= pinctrl-qdf2xxx.o
+obj-$(CONFIG_PINCTRL_MDM9607)	+= pinctrl-mdm9607.o
 obj-$(CONFIG_PINCTRL_MDM9615)	+= pinctrl-mdm9615.o
 obj-$(CONFIG_PINCTRL_QCOM_SPMI_PMIC) += pinctrl-spmi-gpio.o
 obj-$(CONFIG_PINCTRL_QCOM_SPMI_PMIC) += pinctrl-spmi-mpp.o
@@ -30,6 +31,7 @@ obj-$(CONFIG_PINCTRL_SC8180X)	+= pinctrl-sc8180x.o
 obj-$(CONFIG_PINCTRL_SDM660)   += pinctrl-sdm660.o
 obj-$(CONFIG_PINCTRL_SDM845) += pinctrl-sdm845.o
 obj-$(CONFIG_PINCTRL_SDX55) += pinctrl-sdx55.o
+obj-$(CONFIG_PINCTRL_SM6115) += pinctrl-sm6115.o
 obj-$(CONFIG_PINCTRL_SM6125) += pinctrl-sm6125.o
 obj-$(CONFIG_PINCTRL_SM8150) += pinctrl-sm8150.o
 obj-$(CONFIG_PINCTRL_SM8250) += pinctrl-sm8250.o
diff --git a/drivers/pinctrl/qcom/pinctrl-mdm9607.c b/drivers/pinctrl/qcom/pinctrl-mdm9607.c
new file mode 100644
index 000000000000..d622b3df0fe7
--- /dev/null
+++ b/drivers/pinctrl/qcom/pinctrl-mdm9607.c
@@ -0,0 +1,1087 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * Copyright (c) 2021, Konrad Dybcio <konrad.dybcio@somainline.org>
+ *
+ * based on pinctrl-msm8916.c
+ */
+
+#include <linux/module.h>
+#include <linux/of.h>
+#include <linux/platform_device.h>
+#include <linux/pinctrl/pinctrl.h>
+
+#include "pinctrl-msm.h"
+
+static const struct pinctrl_pin_desc mdm9607_pins[] = {
+	PINCTRL_PIN(0, "GPIO_0"),
+	PINCTRL_PIN(1, "GPIO_1"),
+	PINCTRL_PIN(2, "GPIO_2"),
+	PINCTRL_PIN(3, "GPIO_3"),
+	PINCTRL_PIN(4, "GPIO_4"),
+	PINCTRL_PIN(5, "GPIO_5"),
+	PINCTRL_PIN(6, "GPIO_6"),
+	PINCTRL_PIN(7, "GPIO_7"),
+	PINCTRL_PIN(8, "GPIO_8"),
+	PINCTRL_PIN(9, "GPIO_9"),
+	PINCTRL_PIN(10, "GPIO_10"),
+	PINCTRL_PIN(11, "GPIO_11"),
+	PINCTRL_PIN(12, "GPIO_12"),
+	PINCTRL_PIN(13, "GPIO_13"),
+	PINCTRL_PIN(14, "GPIO_14"),
+	PINCTRL_PIN(15, "GPIO_15"),
+	PINCTRL_PIN(16, "GPIO_16"),
+	PINCTRL_PIN(17, "GPIO_17"),
+	PINCTRL_PIN(18, "GPIO_18"),
+	PINCTRL_PIN(19, "GPIO_19"),
+	PINCTRL_PIN(20, "GPIO_20"),
+	PINCTRL_PIN(21, "GPIO_21"),
+	PINCTRL_PIN(22, "GPIO_22"),
+	PINCTRL_PIN(23, "GPIO_23"),
+	PINCTRL_PIN(24, "GPIO_24"),
+	PINCTRL_PIN(25, "GPIO_25"),
+	PINCTRL_PIN(26, "GPIO_26"),
+	PINCTRL_PIN(27, "GPIO_27"),
+	PINCTRL_PIN(28, "GPIO_28"),
+	PINCTRL_PIN(29, "GPIO_29"),
+	PINCTRL_PIN(30, "GPIO_30"),
+	PINCTRL_PIN(31, "GPIO_31"),
+	PINCTRL_PIN(32, "GPIO_32"),
+	PINCTRL_PIN(33, "GPIO_33"),
+	PINCTRL_PIN(34, "GPIO_34"),
+	PINCTRL_PIN(35, "GPIO_35"),
+	PINCTRL_PIN(36, "GPIO_36"),
+	PINCTRL_PIN(37, "GPIO_37"),
+	PINCTRL_PIN(38, "GPIO_38"),
+	PINCTRL_PIN(39, "GPIO_39"),
+	PINCTRL_PIN(40, "GPIO_40"),
+	PINCTRL_PIN(41, "GPIO_41"),
+	PINCTRL_PIN(42, "GPIO_42"),
+	PINCTRL_PIN(43, "GPIO_43"),
+	PINCTRL_PIN(44, "GPIO_44"),
+	PINCTRL_PIN(45, "GPIO_45"),
+	PINCTRL_PIN(46, "GPIO_46"),
+	PINCTRL_PIN(47, "GPIO_47"),
+	PINCTRL_PIN(48, "GPIO_48"),
+	PINCTRL_PIN(49, "GPIO_49"),
+	PINCTRL_PIN(50, "GPIO_50"),
+	PINCTRL_PIN(51, "GPIO_51"),
+	PINCTRL_PIN(52, "GPIO_52"),
+	PINCTRL_PIN(53, "GPIO_53"),
+	PINCTRL_PIN(54, "GPIO_54"),
+	PINCTRL_PIN(55, "GPIO_55"),
+	PINCTRL_PIN(56, "GPIO_56"),
+	PINCTRL_PIN(57, "GPIO_57"),
+	PINCTRL_PIN(58, "GPIO_58"),
+	PINCTRL_PIN(59, "GPIO_59"),
+	PINCTRL_PIN(60, "GPIO_60"),
+	PINCTRL_PIN(61, "GPIO_61"),
+	PINCTRL_PIN(62, "GPIO_62"),
+	PINCTRL_PIN(63, "GPIO_63"),
+	PINCTRL_PIN(64, "GPIO_64"),
+	PINCTRL_PIN(65, "GPIO_65"),
+	PINCTRL_PIN(66, "GPIO_66"),
+	PINCTRL_PIN(67, "GPIO_67"),
+	PINCTRL_PIN(68, "GPIO_68"),
+	PINCTRL_PIN(69, "GPIO_69"),
+	PINCTRL_PIN(70, "GPIO_70"),
+	PINCTRL_PIN(71, "GPIO_71"),
+	PINCTRL_PIN(72, "GPIO_72"),
+	PINCTRL_PIN(73, "GPIO_73"),
+	PINCTRL_PIN(74, "GPIO_74"),
+	PINCTRL_PIN(75, "GPIO_75"),
+	PINCTRL_PIN(76, "GPIO_76"),
+	PINCTRL_PIN(77, "GPIO_77"),
+	PINCTRL_PIN(78, "GPIO_78"),
+	PINCTRL_PIN(79, "GPIO_79"),
+	PINCTRL_PIN(80, "SDC1_CLK"),
+	PINCTRL_PIN(81, "SDC1_CMD"),
+	PINCTRL_PIN(82, "SDC1_DATA"),
+	PINCTRL_PIN(83, "SDC2_CLK"),
+	PINCTRL_PIN(84, "SDC2_CMD"),
+	PINCTRL_PIN(85, "SDC2_DATA"),
+	PINCTRL_PIN(86, "QDSD_CLK"),
+	PINCTRL_PIN(87, "QDSD_CMD"),
+	PINCTRL_PIN(88, "QDSD_DATA0"),
+	PINCTRL_PIN(89, "QDSD_DATA1"),
+	PINCTRL_PIN(90, "QDSD_DATA2"),
+	PINCTRL_PIN(91, "QDSD_DATA3"),
+};
+
+#define DECLARE_MSM_GPIO_PINS(pin)	\
+	static const unsigned int gpio##pin##_pins[] = { pin }
+
+DECLARE_MSM_GPIO_PINS(0);
+DECLARE_MSM_GPIO_PINS(1);
+DECLARE_MSM_GPIO_PINS(2);
+DECLARE_MSM_GPIO_PINS(3);
+DECLARE_MSM_GPIO_PINS(4);
+DECLARE_MSM_GPIO_PINS(5);
+DECLARE_MSM_GPIO_PINS(6);
+DECLARE_MSM_GPIO_PINS(7);
+DECLARE_MSM_GPIO_PINS(8);
+DECLARE_MSM_GPIO_PINS(9);
+DECLARE_MSM_GPIO_PINS(10);
+DECLARE_MSM_GPIO_PINS(11);
+DECLARE_MSM_GPIO_PINS(12);
+DECLARE_MSM_GPIO_PINS(13);
+DECLARE_MSM_GPIO_PINS(14);
+DECLARE_MSM_GPIO_PINS(15);
+DECLARE_MSM_GPIO_PINS(16);
+DECLARE_MSM_GPIO_PINS(17);
+DECLARE_MSM_GPIO_PINS(18);
+DECLARE_MSM_GPIO_PINS(19);
+DECLARE_MSM_GPIO_PINS(20);
+DECLARE_MSM_GPIO_PINS(21);
+DECLARE_MSM_GPIO_PINS(22);
+DECLARE_MSM_GPIO_PINS(23);
+DECLARE_MSM_GPIO_PINS(24);
+DECLARE_MSM_GPIO_PINS(25);
+DECLARE_MSM_GPIO_PINS(26);
+DECLARE_MSM_GPIO_PINS(27);
+DECLARE_MSM_GPIO_PINS(28);
+DECLARE_MSM_GPIO_PINS(29);
+DECLARE_MSM_GPIO_PINS(30);
+DECLARE_MSM_GPIO_PINS(31);
+DECLARE_MSM_GPIO_PINS(32);
+DECLARE_MSM_GPIO_PINS(33);
+DECLARE_MSM_GPIO_PINS(34);
+DECLARE_MSM_GPIO_PINS(35);
+DECLARE_MSM_GPIO_PINS(36);
+DECLARE_MSM_GPIO_PINS(37);
+DECLARE_MSM_GPIO_PINS(38);
+DECLARE_MSM_GPIO_PINS(39);
+DECLARE_MSM_GPIO_PINS(40);
+DECLARE_MSM_GPIO_PINS(41);
+DECLARE_MSM_GPIO_PINS(42);
+DECLARE_MSM_GPIO_PINS(43);
+DECLARE_MSM_GPIO_PINS(44);
+DECLARE_MSM_GPIO_PINS(45);
+DECLARE_MSM_GPIO_PINS(46);
+DECLARE_MSM_GPIO_PINS(47);
+DECLARE_MSM_GPIO_PINS(48);
+DECLARE_MSM_GPIO_PINS(49);
+DECLARE_MSM_GPIO_PINS(50);
+DECLARE_MSM_GPIO_PINS(51);
+DECLARE_MSM_GPIO_PINS(52);
+DECLARE_MSM_GPIO_PINS(53);
+DECLARE_MSM_GPIO_PINS(54);
+DECLARE_MSM_GPIO_PINS(55);
+DECLARE_MSM_GPIO_PINS(56);
+DECLARE_MSM_GPIO_PINS(57);
+DECLARE_MSM_GPIO_PINS(58);
+DECLARE_MSM_GPIO_PINS(59);
+DECLARE_MSM_GPIO_PINS(60);
+DECLARE_MSM_GPIO_PINS(61);
+DECLARE_MSM_GPIO_PINS(62);
+DECLARE_MSM_GPIO_PINS(63);
+DECLARE_MSM_GPIO_PINS(64);
+DECLARE_MSM_GPIO_PINS(65);
+DECLARE_MSM_GPIO_PINS(66);
+DECLARE_MSM_GPIO_PINS(67);
+DECLARE_MSM_GPIO_PINS(68);
+DECLARE_MSM_GPIO_PINS(69);
+DECLARE_MSM_GPIO_PINS(70);
+DECLARE_MSM_GPIO_PINS(71);
+DECLARE_MSM_GPIO_PINS(72);
+DECLARE_MSM_GPIO_PINS(73);
+DECLARE_MSM_GPIO_PINS(74);
+DECLARE_MSM_GPIO_PINS(75);
+DECLARE_MSM_GPIO_PINS(76);
+DECLARE_MSM_GPIO_PINS(77);
+DECLARE_MSM_GPIO_PINS(78);
+DECLARE_MSM_GPIO_PINS(79);
+
+static const unsigned int sdc1_clk_pins[] = { 80 };
+static const unsigned int sdc1_cmd_pins[] = { 81 };
+static const unsigned int sdc1_data_pins[] = { 82 };
+static const unsigned int sdc2_clk_pins[] = { 83 };
+static const unsigned int sdc2_cmd_pins[] = { 84 };
+static const unsigned int sdc2_data_pins[] = { 85 };
+static const unsigned int qdsd_clk_pins[] = { 86 };
+static const unsigned int qdsd_cmd_pins[] = { 87 };
+static const unsigned int qdsd_data0_pins[] = { 88 };
+static const unsigned int qdsd_data1_pins[] = { 89 };
+static const unsigned int qdsd_data2_pins[] = { 90 };
+static const unsigned int qdsd_data3_pins[] = { 91 };
+
+#define FUNCTION(fname)			                \
+	[msm_mux_##fname] = {		                \
+		.name = #fname,				\
+		.groups = fname##_groups,               \
+		.ngroups = ARRAY_SIZE(fname##_groups),	\
+	}
+
+#define PINGROUP(id, f1, f2, f3, f4, f5, f6, f7, f8, f9)	\
+	{							\
+		.name = "gpio" #id,				\
+		.pins = gpio##id##_pins,			\
+		.npins = ARRAY_SIZE(gpio##id##_pins),		\
+		.funcs = (int[]){				\
+			msm_mux_gpio,				\
+			msm_mux_##f1,				\
+			msm_mux_##f2,				\
+			msm_mux_##f3,				\
+			msm_mux_##f4,				\
+			msm_mux_##f5,				\
+			msm_mux_##f6,				\
+			msm_mux_##f7,				\
+			msm_mux_##f8,				\
+			msm_mux_##f9				\
+		},					\
+		.nfuncs = 10,				\
+		.ctl_reg = 0x1000 * id,		\
+		.io_reg = 0x4 + 0x1000 * id,			\
+		.intr_cfg_reg = 0x8 + 0x1000 * id,		\
+		.intr_status_reg = 0xc + 0x1000 * id,		\
+		.intr_target_reg = 0x8 + 0x1000 * id,		\
+		.mux_bit = 2,					\
+		.pull_bit = 0,					\
+		.drv_bit = 6,					\
+		.oe_bit = 9,					\
+		.in_bit = 0,					\
+		.out_bit = 1,					\
+		.intr_enable_bit = 0,				\
+		.intr_status_bit = 0,				\
+		.intr_target_bit = 5,				\
+		.intr_target_kpss_val = 4,			\
+		.intr_raw_status_bit = 4,			\
+		.intr_polarity_bit = 1,				\
+		.intr_detection_bit = 2,			\
+		.intr_detection_width = 2,			\
+	}
+
+#define SDC_PINGROUP(pg_name, ctl, pull, drv)	\
+	{					        \
+		.name = #pg_name,			\
+		.pins = pg_name##_pins,			\
+		.npins = ARRAY_SIZE(pg_name##_pins),	\
+		.ctl_reg = ctl,				\
+		.io_reg = 0,				\
+		.intr_cfg_reg = 0,			\
+		.intr_status_reg = 0,			\
+		.intr_target_reg = 0,			\
+		.mux_bit = -1,				\
+		.pull_bit = pull,			\
+		.drv_bit = drv,				\
+		.oe_bit = -1,				\
+		.in_bit = -1,				\
+		.out_bit = -1,				\
+		.intr_enable_bit = -1,			\
+		.intr_status_bit = -1,			\
+		.intr_target_bit = -1,			\
+		.intr_target_kpss_val = -1,		\
+		.intr_raw_status_bit = -1,		\
+		.intr_polarity_bit = -1,		\
+		.intr_detection_bit = -1,		\
+		.intr_detection_width = -1,		\
+	}
+
+enum mdm9607_functions {
+	msm_mux_adsp_ext,
+	msm_mux_atest_bbrx0,
+	msm_mux_atest_bbrx1,
+	msm_mux_atest_char,
+	msm_mux_atest_char0,
+	msm_mux_atest_char1,
+	msm_mux_atest_char2,
+	msm_mux_atest_char3,
+	msm_mux_atest_combodac_to_gpio_native,
+	msm_mux_atest_gpsadc_dtest0_native,
+	msm_mux_atest_gpsadc_dtest1_native,
+	msm_mux_atest_tsens,
+	msm_mux_backlight_en_b,
+	msm_mux_bimc_dte0,
+	msm_mux_bimc_dte1,
+	msm_mux_blsp1_spi,
+	msm_mux_blsp2_spi,
+	msm_mux_blsp3_spi,
+	msm_mux_blsp_i2c1,
+	msm_mux_blsp_i2c2,
+	msm_mux_blsp_i2c3,
+	msm_mux_blsp_i2c4,
+	msm_mux_blsp_i2c5,
+	msm_mux_blsp_i2c6,
+	msm_mux_blsp_spi1,
+	msm_mux_blsp_spi2,
+	msm_mux_blsp_spi3,
+	msm_mux_blsp_spi4,
+	msm_mux_blsp_spi5,
+	msm_mux_blsp_spi6,
+	msm_mux_blsp_uart1,
+	msm_mux_blsp_uart2,
+	msm_mux_blsp_uart3,
+	msm_mux_blsp_uart4,
+	msm_mux_blsp_uart5,
+	msm_mux_blsp_uart6,
+	msm_mux_blsp_uim1,
+	msm_mux_blsp_uim2,
+	msm_mux_codec_int,
+	msm_mux_codec_rst,
+	msm_mux_coex_uart,
+	msm_mux_cri_trng,
+	msm_mux_cri_trng0,
+	msm_mux_cri_trng1,
+	msm_mux_dbg_out,
+	msm_mux_ebi0_wrcdc,
+	msm_mux_ebi2_a,
+	msm_mux_ebi2_a_d_8_b,
+	msm_mux_ebi2_lcd,
+	msm_mux_ebi2_lcd_cs_n_b,
+	msm_mux_ebi2_lcd_te_b,
+	msm_mux_eth_irq,
+	msm_mux_eth_rst,
+	msm_mux_gcc_gp1_clk_a,
+	msm_mux_gcc_gp1_clk_b,
+	msm_mux_gcc_gp2_clk_a,
+	msm_mux_gcc_gp2_clk_b,
+	msm_mux_gcc_gp3_clk_a,
+	msm_mux_gcc_gp3_clk_b,
+	msm_mux_gcc_plltest,
+	msm_mux_gcc_tlmm,
+	msm_mux_gmac_mdio,
+	msm_mux_gpio,
+	msm_mux_gsm0_tx,
+	msm_mux_lcd_rst,
+	msm_mux_ldo_en,
+	msm_mux_ldo_update,
+	msm_mux_m_voc,
+	msm_mux_modem_tsync,
+	msm_mux_nav_ptp_pps_in_a,
+	msm_mux_nav_ptp_pps_in_b,
+	msm_mux_nav_tsync_out_a,
+	msm_mux_nav_tsync_out_b,
+	msm_mux_pa_indicator,
+	msm_mux_pbs0,
+	msm_mux_pbs1,
+	msm_mux_pbs2,
+	msm_mux_pri_mi2s_data0_a,
+	msm_mux_pri_mi2s_data1_a,
+	msm_mux_pri_mi2s_mclk_a,
+	msm_mux_pri_mi2s_sck_a,
+	msm_mux_pri_mi2s_ws_a,
+	msm_mux_prng_rosc,
+	msm_mux_ptp_pps_out_a,
+	msm_mux_ptp_pps_out_b,
+	msm_mux_pwr_crypto_enabled_a,
+	msm_mux_pwr_crypto_enabled_b,
+	msm_mux_pwr_modem_enabled_a,
+	msm_mux_pwr_modem_enabled_b,
+	msm_mux_pwr_nav_enabled_a,
+	msm_mux_pwr_nav_enabled_b,
+	msm_mux_qdss_cti_trig_in_a0,
+	msm_mux_qdss_cti_trig_in_a1,
+	msm_mux_qdss_cti_trig_in_b0,
+	msm_mux_qdss_cti_trig_in_b1,
+	msm_mux_qdss_cti_trig_out_a0,
+	msm_mux_qdss_cti_trig_out_a1,
+	msm_mux_qdss_cti_trig_out_b0,
+	msm_mux_qdss_cti_trig_out_b1,
+	msm_mux_qdss_traceclk_a,
+	msm_mux_qdss_traceclk_b,
+	msm_mux_qdss_tracectl_a,
+	msm_mux_qdss_tracectl_b,
+	msm_mux_qdss_tracedata_a,
+	msm_mux_qdss_tracedata_b,
+	msm_mux_rcm_marker1,
+	msm_mux_rcm_marker2,
+	msm_mux_sd_write,
+	msm_mux_sec_mi2s,
+	msm_mux_sensor_en,
+	msm_mux_sensor_int2,
+	msm_mux_sensor_int3,
+	msm_mux_sensor_rst,
+	msm_mux_ssbi1,
+	msm_mux_ssbi2,
+	msm_mux_touch_rst,
+	msm_mux_ts_int,
+	msm_mux_uim1_clk,
+	msm_mux_uim1_data,
+	msm_mux_uim1_present,
+	msm_mux_uim1_reset,
+	msm_mux_uim2_clk,
+	msm_mux_uim2_data,
+	msm_mux_uim2_present,
+	msm_mux_uim2_reset,
+	msm_mux_uim_batt,
+	msm_mux_wlan_en1,
+	msm_mux__,
+};
+
+static const char * const gpio_groups[] = {
+	"gpio0", "gpio1", "gpio2", "gpio3", "gpio4", "gpio5", "gpio6", "gpio7",
+	"gpio8", "gpio9", "gpio10", "gpio11", "gpio12", "gpio13", "gpio14",
+	"gpio15", "gpio16", "gpio17", "gpio18", "gpio19", "gpio20", "gpio21",
+	"gpio22", "gpio23", "gpio24", "gpio25", "gpio26", "gpio27", "gpio28",
+	"gpio29", "gpio30", "gpio31", "gpio32", "gpio33", "gpio34", "gpio35",
+	"gpio36", "gpio37", "gpio38", "gpio39", "gpio40", "gpio41", "gpio42",
+	"gpio43", "gpio44", "gpio45", "gpio46", "gpio47", "gpio48", "gpio49",
+	"gpio50", "gpio51", "gpio52", "gpio53", "gpio54", "gpio55", "gpio56",
+	"gpio57", "gpio58", "gpio59", "gpio60", "gpio61", "gpio62", "gpio63",
+	"gpio64", "gpio65", "gpio66", "gpio67", "gpio68", "gpio69", "gpio70",
+	"gpio71", "gpio72", "gpio73", "gpio74", "gpio75", "gpio76", "gpio77",
+	"gpio78", "gpio79",
+};
+static const char * const blsp_spi3_groups[] = {
+	"gpio0", "gpio1", "gpio2", "gpio3",
+};
+static const char * const blsp_uart3_groups[] = {
+	"gpio0", "gpio1", "gpio2", "gpio3",
+};
+static const char * const qdss_tracedata_a_groups[] = {
+	"gpio0", "gpio1", "gpio4", "gpio5", "gpio20", "gpio21", "gpio22",
+	"gpio23", "gpio24", "gpio25", "gpio26", "gpio75", "gpio76", "gpio77",
+	"gpio78", "gpio79",
+};
+static const char * const bimc_dte1_groups[] = {
+	"gpio1", "gpio24",
+};
+static const char * const blsp_i2c3_groups[] = {
+	"gpio2", "gpio3",
+};
+static const char * const qdss_traceclk_a_groups[] = {
+	"gpio2",
+};
+static const char * const bimc_dte0_groups[] = {
+	"gpio2", "gpio15",
+};
+static const char * const qdss_cti_trig_in_a1_groups[] = {
+	"gpio3",
+};
+static const char * const blsp_spi2_groups[] = {
+	"gpio4", "gpio5", "gpio6", "gpio7",
+};
+static const char * const blsp_uart2_groups[] = {
+	"gpio4", "gpio5", "gpio6", "gpio7",
+};
+static const char * const blsp_uim2_groups[] = {
+	"gpio4", "gpio5",
+};
+static const char * const blsp_i2c2_groups[] = {
+	"gpio6", "gpio7",
+};
+static const char * const qdss_tracectl_a_groups[] = {
+	"gpio6",
+};
+static const char * const sensor_int2_groups[] = {
+	"gpio8",
+};
+static const char * const blsp_spi5_groups[] = {
+	"gpio8", "gpio9", "gpio10", "gpio11",
+};
+static const char * const blsp_uart5_groups[] = {
+	"gpio8", "gpio9", "gpio10", "gpio11",
+};
+static const char * const ebi2_lcd_groups[] = {
+	"gpio8", "gpio11", "gpio74", "gpio78",
+};
+static const char * const m_voc_groups[] = {
+	"gpio8", "gpio78",
+};
+static const char * const sensor_int3_groups[] = {
+	"gpio9",
+};
+static const char * const sensor_en_groups[] = {
+	"gpio10",
+};
+static const char * const blsp_i2c5_groups[] = {
+	"gpio10", "gpio11",
+};
+static const char * const ebi2_a_groups[] = {
+	"gpio10",
+};
+static const char * const qdss_tracedata_b_groups[] = {
+	"gpio10", "gpio39", "gpio40", "gpio41", "gpio42", "gpio43", "gpio46",
+	"gpio47", "gpio48", "gpio51", "gpio52", "gpio53", "gpio54", "gpio55",
+	"gpio58", "gpio59",
+};
+static const char * const sensor_rst_groups[] = {
+	"gpio11",
+};
+static const char * const blsp2_spi_groups[] = {
+	"gpio11", "gpio13", "gpio77",
+};
+static const char * const blsp_spi1_groups[] = {
+	"gpio12", "gpio13", "gpio14", "gpio15",
+};
+static const char * const blsp_uart1_groups[] = {
+	"gpio12", "gpio13", "gpio14", "gpio15",
+};
+static const char * const blsp_uim1_groups[] = {
+	"gpio12", "gpio13",
+};
+static const char * const blsp3_spi_groups[] = {
+	"gpio12", "gpio26", "gpio76",
+};
+static const char * const gcc_gp2_clk_b_groups[] = {
+	"gpio12",
+};
+static const char * const gcc_gp3_clk_b_groups[] = {
+	"gpio13",
+};
+static const char * const blsp_i2c1_groups[] = {
+	"gpio14", "gpio15",
+};
+static const char * const gcc_gp1_clk_b_groups[] = {
+	"gpio14",
+};
+static const char * const blsp_spi4_groups[] = {
+	"gpio16", "gpio17", "gpio18", "gpio19",
+};
+static const char * const blsp_uart4_groups[] = {
+	"gpio16", "gpio17", "gpio18", "gpio19",
+};
+static const char * const rcm_marker1_groups[] = {
+	"gpio18",
+};
+static const char * const blsp_i2c4_groups[] = {
+	"gpio18", "gpio19",
+};
+static const char * const qdss_cti_trig_out_a1_groups[] = {
+	"gpio18",
+};
+static const char * const rcm_marker2_groups[] = {
+	"gpio19",
+};
+static const char * const qdss_cti_trig_out_a0_groups[] = {
+	"gpio19",
+};
+static const char * const blsp_spi6_groups[] = {
+	"gpio20", "gpio21", "gpio22", "gpio23",
+};
+static const char * const blsp_uart6_groups[] = {
+	"gpio20", "gpio21", "gpio22", "gpio23",
+};
+static const char * const pri_mi2s_ws_a_groups[] = {
+	"gpio20",
+};
+static const char * const ebi2_lcd_te_b_groups[] = {
+	"gpio20",
+};
+static const char * const blsp1_spi_groups[] = {
+	"gpio20", "gpio21", "gpio78",
+};
+static const char * const backlight_en_b_groups[] = {
+	"gpio21",
+};
+static const char * const pri_mi2s_data0_a_groups[] = {
+	"gpio21",
+};
+static const char * const pri_mi2s_data1_a_groups[] = {
+	"gpio22",
+};
+static const char * const blsp_i2c6_groups[] = {
+	"gpio22", "gpio23",
+};
+static const char * const ebi2_a_d_8_b_groups[] = {
+	"gpio22",
+};
+static const char * const pri_mi2s_sck_a_groups[] = {
+	"gpio23",
+};
+static const char * const ebi2_lcd_cs_n_b_groups[] = {
+	"gpio23",
+};
+static const char * const touch_rst_groups[] = {
+	"gpio24",
+};
+static const char * const pri_mi2s_mclk_a_groups[] = {
+	"gpio24",
+};
+static const char * const pwr_nav_enabled_a_groups[] = {
+	"gpio24",
+};
+static const char * const ts_int_groups[] = {
+	"gpio25",
+};
+static const char * const sd_write_groups[] = {
+	"gpio25",
+};
+static const char * const pwr_crypto_enabled_a_groups[] = {
+	"gpio25",
+};
+static const char * const codec_rst_groups[] = {
+	"gpio26",
+};
+static const char * const adsp_ext_groups[] = {
+	"gpio26",
+};
+static const char * const atest_combodac_to_gpio_native_groups[] = {
+	"gpio26", "gpio27", "gpio28", "gpio29", "gpio30", "gpio31", "gpio32",
+	"gpio33", "gpio34", "gpio35", "gpio41", "gpio45", "gpio49", "gpio50",
+	"gpio51", "gpio52", "gpio54", "gpio55", "gpio57", "gpio59",
+};
+static const char * const uim2_data_groups[] = {
+	"gpio27",
+};
+static const char * const gmac_mdio_groups[] = {
+	"gpio27", "gpio28",
+};
+static const char * const gcc_gp1_clk_a_groups[] = {
+	"gpio27",
+};
+static const char * const uim2_clk_groups[] = {
+	"gpio28",
+};
+static const char * const gcc_gp2_clk_a_groups[] = {
+	"gpio28",
+};
+static const char * const eth_irq_groups[] = {
+	"gpio29",
+};
+static const char * const uim2_reset_groups[] = {
+	"gpio29",
+};
+static const char * const gcc_gp3_clk_a_groups[] = {
+	"gpio29",
+};
+static const char * const eth_rst_groups[] = {
+	"gpio30",
+};
+static const char * const uim2_present_groups[] = {
+	"gpio30",
+};
+static const char * const prng_rosc_groups[] = {
+	"gpio30",
+};
+static const char * const uim1_data_groups[] = {
+	"gpio31",
+};
+static const char * const uim1_clk_groups[] = {
+	"gpio32",
+};
+static const char * const uim1_reset_groups[] = {
+	"gpio33",
+};
+static const char * const uim1_present_groups[] = {
+	"gpio34",
+};
+static const char * const gcc_plltest_groups[] = {
+	"gpio34", "gpio35",
+};
+static const char * const uim_batt_groups[] = {
+	"gpio35",
+};
+static const char * const coex_uart_groups[] = {
+	"gpio36", "gpio37",
+};
+static const char * const codec_int_groups[] = {
+	"gpio38",
+};
+static const char * const qdss_cti_trig_in_a0_groups[] = {
+	"gpio38",
+};
+static const char * const atest_bbrx1_groups[] = {
+	"gpio39",
+};
+static const char * const cri_trng0_groups[] = {
+	"gpio40",
+};
+static const char * const atest_bbrx0_groups[] = {
+	"gpio40",
+};
+static const char * const cri_trng_groups[] = {
+	"gpio42",
+};
+static const char * const qdss_cti_trig_in_b0_groups[] = {
+	"gpio44",
+};
+static const char * const atest_gpsadc_dtest0_native_groups[] = {
+	"gpio44",
+};
+static const char * const qdss_cti_trig_out_b0_groups[] = {
+	"gpio45",
+};
+static const char * const qdss_tracectl_b_groups[] = {
+	"gpio49",
+};
+static const char * const qdss_traceclk_b_groups[] = {
+	"gpio50",
+};
+static const char * const pa_indicator_groups[] = {
+	"gpio51",
+};
+static const char * const modem_tsync_groups[] = {
+	"gpio53",
+};
+static const char * const nav_tsync_out_a_groups[] = {
+	"gpio53",
+};
+static const char * const nav_ptp_pps_in_a_groups[] = {
+	"gpio53",
+};
+static const char * const ptp_pps_out_a_groups[] = {
+	"gpio53",
+};
+static const char * const gsm0_tx_groups[] = {
+	"gpio55",
+};
+static const char * const qdss_cti_trig_in_b1_groups[] = {
+	"gpio56",
+};
+static const char * const cri_trng1_groups[] = {
+	"gpio57",
+};
+static const char * const qdss_cti_trig_out_b1_groups[] = {
+	"gpio57",
+};
+static const char * const ssbi1_groups[] = {
+	"gpio58",
+};
+static const char * const atest_gpsadc_dtest1_native_groups[] = {
+	"gpio58",
+};
+static const char * const ssbi2_groups[] = {
+	"gpio59",
+};
+static const char * const atest_char3_groups[] = {
+	"gpio60",
+};
+static const char * const atest_char2_groups[] = {
+	"gpio61",
+};
+static const char * const atest_char1_groups[] = {
+	"gpio62",
+};
+static const char * const atest_char0_groups[] = {
+	"gpio63",
+};
+static const char * const atest_char_groups[] = {
+	"gpio64",
+};
+static const char * const ebi0_wrcdc_groups[] = {
+	"gpio70",
+};
+static const char * const ldo_update_groups[] = {
+	"gpio72",
+};
+static const char * const gcc_tlmm_groups[] = {
+	"gpio72",
+};
+static const char * const ldo_en_groups[] = {
+	"gpio73",
+};
+static const char * const dbg_out_groups[] = {
+	"gpio73",
+};
+static const char * const atest_tsens_groups[] = {
+	"gpio73",
+};
+static const char * const lcd_rst_groups[] = {
+	"gpio74",
+};
+static const char * const wlan_en1_groups[] = {
+	"gpio75",
+};
+static const char * const nav_tsync_out_b_groups[] = {
+	"gpio75",
+};
+static const char * const nav_ptp_pps_in_b_groups[] = {
+	"gpio75",
+};
+static const char * const ptp_pps_out_b_groups[] = {
+	"gpio75",
+};
+static const char * const pbs0_groups[] = {
+	"gpio76",
+};
+static const char * const sec_mi2s_groups[] = {
+	"gpio76", "gpio77", "gpio78", "gpio79",
+};
+static const char * const pwr_modem_enabled_a_groups[] = {
+	"gpio76",
+};
+static const char * const pbs1_groups[] = {
+	"gpio77",
+};
+static const char * const pwr_modem_enabled_b_groups[] = {
+	"gpio77",
+};
+static const char * const pbs2_groups[] = {
+	"gpio78",
+};
+static const char * const pwr_nav_enabled_b_groups[] = {
+	"gpio78",
+};
+static const char * const pwr_crypto_enabled_b_groups[] = {
+	"gpio79",
+};
+
+static const struct msm_function mdm9607_functions[] = {
+	FUNCTION(adsp_ext),
+	FUNCTION(atest_bbrx0),
+	FUNCTION(atest_bbrx1),
+	FUNCTION(atest_char),
+	FUNCTION(atest_char0),
+	FUNCTION(atest_char1),
+	FUNCTION(atest_char2),
+	FUNCTION(atest_char3),
+	FUNCTION(atest_combodac_to_gpio_native),
+	FUNCTION(atest_gpsadc_dtest0_native),
+	FUNCTION(atest_gpsadc_dtest1_native),
+	FUNCTION(atest_tsens),
+	FUNCTION(backlight_en_b),
+	FUNCTION(bimc_dte0),
+	FUNCTION(bimc_dte1),
+	FUNCTION(blsp1_spi),
+	FUNCTION(blsp2_spi),
+	FUNCTION(blsp3_spi),
+	FUNCTION(blsp_i2c1),
+	FUNCTION(blsp_i2c2),
+	FUNCTION(blsp_i2c3),
+	FUNCTION(blsp_i2c4),
+	FUNCTION(blsp_i2c5),
+	FUNCTION(blsp_i2c6),
+	FUNCTION(blsp_spi1),
+	FUNCTION(blsp_spi2),
+	FUNCTION(blsp_spi3),
+	FUNCTION(blsp_spi4),
+	FUNCTION(blsp_spi5),
+	FUNCTION(blsp_spi6),
+	FUNCTION(blsp_uart1),
+	FUNCTION(blsp_uart2),
+	FUNCTION(blsp_uart3),
+	FUNCTION(blsp_uart4),
+	FUNCTION(blsp_uart5),
+	FUNCTION(blsp_uart6),
+	FUNCTION(blsp_uim1),
+	FUNCTION(blsp_uim2),
+	FUNCTION(codec_int),
+	FUNCTION(codec_rst),
+	FUNCTION(coex_uart),
+	FUNCTION(cri_trng),
+	FUNCTION(cri_trng0),
+	FUNCTION(cri_trng1),
+	FUNCTION(dbg_out),
+	FUNCTION(ebi0_wrcdc),
+	FUNCTION(ebi2_a),
+	FUNCTION(ebi2_a_d_8_b),
+	FUNCTION(ebi2_lcd),
+	FUNCTION(ebi2_lcd_cs_n_b),
+	FUNCTION(ebi2_lcd_te_b),
+	FUNCTION(eth_irq),
+	FUNCTION(eth_rst),
+	FUNCTION(gcc_gp1_clk_a),
+	FUNCTION(gcc_gp1_clk_b),
+	FUNCTION(gcc_gp2_clk_a),
+	FUNCTION(gcc_gp2_clk_b),
+	FUNCTION(gcc_gp3_clk_a),
+	FUNCTION(gcc_gp3_clk_b),
+	FUNCTION(gcc_plltest),
+	FUNCTION(gcc_tlmm),
+	FUNCTION(gmac_mdio),
+	FUNCTION(gpio),
+	FUNCTION(gsm0_tx),
+	FUNCTION(lcd_rst),
+	FUNCTION(ldo_en),
+	FUNCTION(ldo_update),
+	FUNCTION(m_voc),
+	FUNCTION(modem_tsync),
+	FUNCTION(nav_ptp_pps_in_a),
+	FUNCTION(nav_ptp_pps_in_b),
+	FUNCTION(nav_tsync_out_a),
+	FUNCTION(nav_tsync_out_b),
+	FUNCTION(pa_indicator),
+	FUNCTION(pbs0),
+	FUNCTION(pbs1),
+	FUNCTION(pbs2),
+	FUNCTION(pri_mi2s_data0_a),
+	FUNCTION(pri_mi2s_data1_a),
+	FUNCTION(pri_mi2s_mclk_a),
+	FUNCTION(pri_mi2s_sck_a),
+	FUNCTION(pri_mi2s_ws_a),
+	FUNCTION(prng_rosc),
+	FUNCTION(ptp_pps_out_a),
+	FUNCTION(ptp_pps_out_b),
+	FUNCTION(pwr_crypto_enabled_a),
+	FUNCTION(pwr_crypto_enabled_b),
+	FUNCTION(pwr_modem_enabled_a),
+	FUNCTION(pwr_modem_enabled_b),
+	FUNCTION(pwr_nav_enabled_a),
+	FUNCTION(pwr_nav_enabled_b),
+	FUNCTION(qdss_cti_trig_in_a0),
+	FUNCTION(qdss_cti_trig_in_a1),
+	FUNCTION(qdss_cti_trig_in_b0),
+	FUNCTION(qdss_cti_trig_in_b1),
+	FUNCTION(qdss_cti_trig_out_a0),
+	FUNCTION(qdss_cti_trig_out_a1),
+	FUNCTION(qdss_cti_trig_out_b0),
+	FUNCTION(qdss_cti_trig_out_b1),
+	FUNCTION(qdss_traceclk_a),
+	FUNCTION(qdss_traceclk_b),
+	FUNCTION(qdss_tracectl_a),
+	FUNCTION(qdss_tracectl_b),
+	FUNCTION(qdss_tracedata_a),
+	FUNCTION(qdss_tracedata_b),
+	FUNCTION(rcm_marker1),
+	FUNCTION(rcm_marker2),
+	FUNCTION(sd_write),
+	FUNCTION(sec_mi2s),
+	FUNCTION(sensor_en),
+	FUNCTION(sensor_int2),
+	FUNCTION(sensor_int3),
+	FUNCTION(sensor_rst),
+	FUNCTION(ssbi1),
+	FUNCTION(ssbi2),
+	FUNCTION(touch_rst),
+	FUNCTION(ts_int),
+	FUNCTION(uim1_clk),
+	FUNCTION(uim1_data),
+	FUNCTION(uim1_present),
+	FUNCTION(uim1_reset),
+	FUNCTION(uim2_clk),
+	FUNCTION(uim2_data),
+	FUNCTION(uim2_present),
+	FUNCTION(uim2_reset),
+	FUNCTION(uim_batt),
+	FUNCTION(wlan_en1)
+};
+
+static const struct msm_pingroup mdm9607_groups[] = {
+	PINGROUP(0, blsp_uart3, blsp_spi3, _, _, _, _, _, qdss_tracedata_a, _),
+	PINGROUP(1, blsp_uart3, blsp_spi3, _, _, _, _, _, qdss_tracedata_a, bimc_dte1),
+	PINGROUP(2, blsp_uart3, blsp_i2c3, blsp_spi3, _, _, _, _, _, qdss_traceclk_a),
+	PINGROUP(3, blsp_uart3, blsp_i2c3, blsp_spi3, _, _, _, _, _, _),
+	PINGROUP(4, blsp_spi2, blsp_uart2, blsp_uim2, _, _, _, _, qdss_tracedata_a, _),
+	PINGROUP(5, blsp_spi2, blsp_uart2, blsp_uim2, _, _, _, _, qdss_tracedata_a, _),
+	PINGROUP(6, blsp_spi2, blsp_uart2, blsp_i2c2, _, _, _, _, _, _),
+	PINGROUP(7, blsp_spi2, blsp_uart2, blsp_i2c2, _, _, _, _, _, _),
+	PINGROUP(8, blsp_spi5, blsp_uart5, ebi2_lcd, m_voc, _, _, _, _, _),
+	PINGROUP(9, blsp_spi5, blsp_uart5, _, _, _, _, _, _, _),
+	PINGROUP(10, blsp_spi5, blsp_i2c5, blsp_uart5, ebi2_a, _, _, qdss_tracedata_b, _, _),
+	PINGROUP(11, blsp_spi5, blsp_i2c5, blsp_uart5, blsp2_spi, ebi2_lcd, _, _, _, _),
+	PINGROUP(12, blsp_spi1, blsp_uart1, blsp_uim1, blsp3_spi, gcc_gp2_clk_b, _, _, _, _),
+	PINGROUP(13, blsp_spi1, blsp_uart1, blsp_uim1, blsp2_spi, gcc_gp3_clk_b, _, _, _, _),
+	PINGROUP(14, blsp_spi1, blsp_uart1, blsp_i2c1, gcc_gp1_clk_b, _, _, _, _, _),
+	PINGROUP(15, blsp_spi1, blsp_uart1, blsp_i2c1, _, _, _, _, _, _),
+	PINGROUP(16, blsp_spi4, blsp_uart4, _, _, _, _, _, _, _),
+	PINGROUP(17, blsp_spi4, blsp_uart4, _, _, _, _, _, _, _),
+	PINGROUP(18, blsp_spi4, blsp_uart4, blsp_i2c4, _, _, _, _, _, _),
+	PINGROUP(19, blsp_spi4, blsp_uart4, blsp_i2c4, _, _, _, _, _, _),
+	PINGROUP(20, blsp_spi6, blsp_uart6, pri_mi2s_ws_a, ebi2_lcd_te_b, blsp1_spi, _, _, _,
+		 qdss_tracedata_a),
+	PINGROUP(21, blsp_spi6, blsp_uart6, pri_mi2s_data0_a, blsp1_spi, _, _, _, _, _),
+	PINGROUP(22, blsp_spi6, blsp_uart6, pri_mi2s_data1_a, blsp_i2c6, ebi2_a_d_8_b, _, _, _, _),
+	PINGROUP(23, blsp_spi6, blsp_uart6, pri_mi2s_sck_a, blsp_i2c6, ebi2_lcd_cs_n_b, _, _, _, _),
+	PINGROUP(24, pri_mi2s_mclk_a, _, pwr_nav_enabled_a, _, _, _, _, qdss_tracedata_a,
+		 bimc_dte1),
+	PINGROUP(25, sd_write, _, pwr_crypto_enabled_a, _, _, _, _, qdss_tracedata_a, _),
+	PINGROUP(26, blsp3_spi, adsp_ext, _, qdss_tracedata_a, _, atest_combodac_to_gpio_native, _,
+		 _, _),
+	PINGROUP(27, uim2_data, gmac_mdio, gcc_gp1_clk_a, _, _, atest_combodac_to_gpio_native, _, _,
+		 _),
+	PINGROUP(28, uim2_clk, gmac_mdio, gcc_gp2_clk_a, _, _, atest_combodac_to_gpio_native, _, _,
+		 _),
+	PINGROUP(29, uim2_reset, gcc_gp3_clk_a, _, _, atest_combodac_to_gpio_native, _, _, _, _),
+	PINGROUP(30, uim2_present, prng_rosc, _, _, atest_combodac_to_gpio_native, _, _, _, _),
+	PINGROUP(31, uim1_data, _, _, atest_combodac_to_gpio_native, _, _, _, _, _),
+	PINGROUP(32, uim1_clk, _, _, atest_combodac_to_gpio_native, _, _, _, _, _),
+	PINGROUP(33, uim1_reset, _, _, atest_combodac_to_gpio_native, _, _, _, _, _),
+	PINGROUP(34, uim1_present, gcc_plltest, _, _, atest_combodac_to_gpio_native, _, _, _, _),
+	PINGROUP(35, uim_batt, gcc_plltest, _, atest_combodac_to_gpio_native, _, _, _, _, _),
+	PINGROUP(36, coex_uart, _, _, _, _, _, _, _, _),
+	PINGROUP(37, coex_uart, _, _, _, _, _, _, _, _),
+	PINGROUP(38, _, _, _, qdss_cti_trig_in_a0, _, _, _, _, _),
+	PINGROUP(39, _, _, _, qdss_tracedata_b, _, atest_bbrx1, _, _, _),
+	PINGROUP(40, _, cri_trng0, _, _, _, _, qdss_tracedata_b, _, atest_bbrx0),
+	PINGROUP(41, _, _, _, _, _, qdss_tracedata_b, _, atest_combodac_to_gpio_native, _),
+	PINGROUP(42, _, cri_trng, _, _, qdss_tracedata_b, _, _, _, _),
+	PINGROUP(43, _, _, _, _, qdss_tracedata_b, _, _, _, _),
+	PINGROUP(44, _, _, qdss_cti_trig_in_b0, _, atest_gpsadc_dtest0_native, _, _, _, _),
+	PINGROUP(45, _, _, qdss_cti_trig_out_b0, _, atest_combodac_to_gpio_native, _, _, _, _),
+	PINGROUP(46, _, _, qdss_tracedata_b, _, _, _, _, _, _),
+	PINGROUP(47, _, _, qdss_tracedata_b, _, _, _, _, _, _),
+	PINGROUP(48, _, _, qdss_tracedata_b, _, _, _, _, _, _),
+	PINGROUP(49, _, _, qdss_tracectl_b, _, atest_combodac_to_gpio_native, _, _, _, _),
+	PINGROUP(50, _, _, qdss_traceclk_b, _, atest_combodac_to_gpio_native, _, _, _, _),
+	PINGROUP(51, _, pa_indicator, _, qdss_tracedata_b, _, atest_combodac_to_gpio_native, _, _,
+		 _),
+	PINGROUP(52, _, _, _, qdss_tracedata_b, _, atest_combodac_to_gpio_native, _, _, _),
+	PINGROUP(53, _, modem_tsync, nav_tsync_out_a, nav_ptp_pps_in_a, ptp_pps_out_a,
+		 qdss_tracedata_b, _, _, _),
+	PINGROUP(54, _, qdss_tracedata_b, _, atest_combodac_to_gpio_native, _, _, _, _, _),
+	PINGROUP(55, gsm0_tx, _, qdss_tracedata_b, _, atest_combodac_to_gpio_native, _, _, _, _),
+	PINGROUP(56, _, _, qdss_cti_trig_in_b1, _, _, _, _, _, _),
+	PINGROUP(57, _, cri_trng1, _, qdss_cti_trig_out_b1, _, atest_combodac_to_gpio_native, _, _,
+		 _),
+	PINGROUP(58, _, ssbi1, _, qdss_tracedata_b, _, atest_gpsadc_dtest1_native, _, _, _),
+	PINGROUP(59, _, ssbi2, _, qdss_tracedata_b, _, atest_combodac_to_gpio_native, _, _, _),
+	PINGROUP(60, atest_char3, _, _, _, _, _, _, _, _),
+	PINGROUP(61, atest_char2, _, _, _, _, _, _, _, _),
+	PINGROUP(62, atest_char1, _, _, _, _, _, _, _, _),
+	PINGROUP(63, atest_char0, _, _, _, _, _, _, _, _),
+	PINGROUP(64, atest_char, _, _, _, _, _, _, _, _),
+	PINGROUP(65, _, _, _, _, _, _, _, _, _),
+	PINGROUP(66, _, _, _, _, _, _, _, _, _),
+	PINGROUP(67, _, _, _, _, _, _, _, _, _),
+	PINGROUP(68, _, _, _, _, _, _, _, _, _),
+	PINGROUP(69, _, _, _, _, _, _, _, _, _),
+	PINGROUP(70, _, _, ebi0_wrcdc, _, _, _, _, _, _),
+	PINGROUP(71, _, _, _, _, _, _, _, _, _),
+	PINGROUP(72, ldo_update, _, gcc_tlmm, _, _, _, _, _, _),
+	PINGROUP(73, ldo_en, dbg_out, _, _, _, atest_tsens, _, _, _),
+	PINGROUP(74, ebi2_lcd, _, _, _, _, _, _, _, _),
+	PINGROUP(75, nav_tsync_out_b, nav_ptp_pps_in_b, ptp_pps_out_b, _, qdss_tracedata_a, _, _, _,
+		 _),
+	PINGROUP(76, pbs0, sec_mi2s, blsp3_spi, pwr_modem_enabled_a, _, qdss_tracedata_a, _, _, _),
+	PINGROUP(77, pbs1, sec_mi2s, blsp2_spi, pwr_modem_enabled_b, _, qdss_tracedata_a, _, _, _),
+	PINGROUP(78, pbs2, sec_mi2s, blsp1_spi, ebi2_lcd, m_voc, pwr_nav_enabled_b, _,
+		 qdss_tracedata_a, _),
+	PINGROUP(79, sec_mi2s, _, pwr_crypto_enabled_b, _, qdss_tracedata_a, _, _, _, _),
+	SDC_PINGROUP(sdc1_clk, 0x10a000, 13, 6),
+	SDC_PINGROUP(sdc1_cmd, 0x10a000, 11, 3),
+	SDC_PINGROUP(sdc1_data, 0x10a000, 9, 0),
+	SDC_PINGROUP(sdc2_clk, 0x109000, 14, 6),
+	SDC_PINGROUP(sdc2_cmd, 0x109000, 11, 3),
+	SDC_PINGROUP(sdc2_data, 0x109000, 9, 0),
+	SDC_PINGROUP(qdsd_clk, 0x19c000, 3, 0),
+	SDC_PINGROUP(qdsd_cmd, 0x19c000, 8, 5),
+	SDC_PINGROUP(qdsd_data0, 0x19c000, 13, 10),
+	SDC_PINGROUP(qdsd_data1, 0x19c000, 18, 15),
+	SDC_PINGROUP(qdsd_data2, 0x19c000, 23, 20),
+	SDC_PINGROUP(qdsd_data3, 0x19c000, 28, 25),
+};
+
+static const struct msm_pinctrl_soc_data mdm9607_pinctrl = {
+	.pins = mdm9607_pins,
+	.npins = ARRAY_SIZE(mdm9607_pins),
+	.functions = mdm9607_functions,
+	.nfunctions = ARRAY_SIZE(mdm9607_functions),
+	.groups = mdm9607_groups,
+	.ngroups = ARRAY_SIZE(mdm9607_groups),
+	.ngpios = 80,
+};
+
+static int mdm9607_pinctrl_probe(struct platform_device *pdev)
+{
+	return msm_pinctrl_probe(pdev, &mdm9607_pinctrl);
+}
+
+static const struct of_device_id mdm9607_pinctrl_of_match[] = {
+	{ .compatible = "qcom,mdm9607-tlmm", },
+	{ }
+};
+
+static struct platform_driver mdm9607_pinctrl_driver = {
+	.driver = {
+		.name = "mdm9607-pinctrl",
+		.of_match_table = mdm9607_pinctrl_of_match,
+	},
+	.probe = mdm9607_pinctrl_probe,
+	.remove = msm_pinctrl_remove,
+};
+
+static int __init mdm9607_pinctrl_init(void)
+{
+	return platform_driver_register(&mdm9607_pinctrl_driver);
+}
+arch_initcall(mdm9607_pinctrl_init);
+
+static void __exit mdm9607_pinctrl_exit(void)
+{
+	platform_driver_unregister(&mdm9607_pinctrl_driver);
+}
+module_exit(mdm9607_pinctrl_exit);
+
+MODULE_DESCRIPTION("Qualcomm mdm9607 pinctrl driver");
+MODULE_LICENSE("GPL v2");
+MODULE_DEVICE_TABLE(of, mdm9607_pinctrl_of_match);
diff --git a/drivers/pinctrl/qcom/pinctrl-sm6115.c b/drivers/pinctrl/qcom/pinctrl-sm6115.c
new file mode 100644
index 000000000000..b3a0161ca377
--- /dev/null
+++ b/drivers/pinctrl/qcom/pinctrl-sm6115.c
@@ -0,0 +1,923 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * Copyright (c) 2019, The Linux Foundation. All rights reserved.
+ */
+
+#include <linux/module.h>
+#include <linux/of.h>
+#include <linux/platform_device.h>
+#include <linux/pinctrl/pinctrl.h>
+
+#include "pinctrl-msm.h"
+
+static const char * const sm6115_tiles[] = {
+	"south",
+	"east",
+	"west"
+};
+
+enum {
+	SOUTH,
+	EAST,
+	WEST
+};
+
+#define FUNCTION(fname)					\
+	[msm_mux_##fname] = {				\
+		.name = #fname,				\
+		.groups = fname##_groups,		\
+		.ngroups = ARRAY_SIZE(fname##_groups),	\
+	}
+
+#define PINGROUP(id, _tile, f1, f2, f3, f4, f5, f6, f7, f8, f9)	\
+	{						\
+		.name = "gpio" #id,			\
+		.pins = gpio##id##_pins,		\
+		.npins = (unsigned int)ARRAY_SIZE(gpio##id##_pins),	\
+		.funcs = (int[]){			\
+			msm_mux_gpio, /* gpio mode */	\
+			msm_mux_##f1,			\
+			msm_mux_##f2,			\
+			msm_mux_##f3,			\
+			msm_mux_##f4,			\
+			msm_mux_##f5,			\
+			msm_mux_##f6,			\
+			msm_mux_##f7,			\
+			msm_mux_##f8,			\
+			msm_mux_##f9			\
+		},					\
+		.nfuncs = 10,				\
+		.ctl_reg = 0x1000 * id,		\
+		.io_reg = 0x4 + 0x1000 * id,		\
+		.intr_cfg_reg = 0x8 + 0x1000 * id,	\
+		.intr_status_reg = 0xc + 0x1000 * id,	\
+		.intr_target_reg = 0x8 + 0x1000 * id,	\
+		.tile = _tile,			\
+		.mux_bit = 2,			\
+		.pull_bit = 0,			\
+		.drv_bit = 6,			\
+		.oe_bit = 9,			\
+		.in_bit = 0,			\
+		.out_bit = 1,			\
+		.intr_enable_bit = 0,		\
+		.intr_status_bit = 0,		\
+		.intr_target_bit = 5,		\
+		.intr_target_kpss_val = 3,	\
+		.intr_raw_status_bit = 4,	\
+		.intr_polarity_bit = 1,		\
+		.intr_detection_bit = 2,	\
+		.intr_detection_width = 2,	\
+	}
+
+#define SDC_QDSD_PINGROUP(pg_name, _tile, ctl, pull, drv)	\
+	{						\
+		.name = #pg_name,			\
+		.pins = pg_name##_pins,			\
+		.npins = (unsigned int)ARRAY_SIZE(pg_name##_pins),	\
+		.ctl_reg = ctl,				\
+		.io_reg = 0,				\
+		.intr_cfg_reg = 0,			\
+		.intr_status_reg = 0,			\
+		.intr_target_reg = 0,			\
+		.tile = _tile,				\
+		.mux_bit = -1,				\
+		.pull_bit = pull,			\
+		.drv_bit = drv,				\
+		.oe_bit = -1,				\
+		.in_bit = -1,				\
+		.out_bit = -1,				\
+		.intr_enable_bit = -1,			\
+		.intr_status_bit = -1,			\
+		.intr_target_bit = -1,			\
+		.intr_raw_status_bit = -1,		\
+		.intr_polarity_bit = -1,		\
+		.intr_detection_bit = -1,		\
+		.intr_detection_width = -1,		\
+	}
+
+#define UFS_RESET(pg_name, offset)			\
+	{						\
+		.name = #pg_name,			\
+		.pins = pg_name##_pins,			\
+		.npins = (unsigned int)ARRAY_SIZE(pg_name##_pins),	\
+		.ctl_reg = offset,			\
+		.io_reg = offset + 0x4,			\
+		.intr_cfg_reg = 0,			\
+		.intr_status_reg = 0,			\
+		.intr_target_reg = 0,			\
+		.tile = WEST,				\
+		.mux_bit = -1,				\
+		.pull_bit = 3,				\
+		.drv_bit = 0,				\
+		.oe_bit = -1,				\
+		.in_bit = -1,				\
+		.out_bit = 0,				\
+		.intr_enable_bit = -1,			\
+		.intr_status_bit = -1,			\
+		.intr_target_bit = -1,			\
+		.intr_raw_status_bit = -1,		\
+		.intr_polarity_bit = -1,		\
+		.intr_detection_bit = -1,		\
+		.intr_detection_width = -1,		\
+	}
+static const struct pinctrl_pin_desc sm6115_pins[] = {
+	PINCTRL_PIN(0, "GPIO_0"),
+	PINCTRL_PIN(1, "GPIO_1"),
+	PINCTRL_PIN(2, "GPIO_2"),
+	PINCTRL_PIN(3, "GPIO_3"),
+	PINCTRL_PIN(4, "GPIO_4"),
+	PINCTRL_PIN(5, "GPIO_5"),
+	PINCTRL_PIN(6, "GPIO_6"),
+	PINCTRL_PIN(7, "GPIO_7"),
+	PINCTRL_PIN(8, "GPIO_8"),
+	PINCTRL_PIN(9, "GPIO_9"),
+	PINCTRL_PIN(10, "GPIO_10"),
+	PINCTRL_PIN(11, "GPIO_11"),
+	PINCTRL_PIN(12, "GPIO_12"),
+	PINCTRL_PIN(13, "GPIO_13"),
+	PINCTRL_PIN(14, "GPIO_14"),
+	PINCTRL_PIN(15, "GPIO_15"),
+	PINCTRL_PIN(16, "GPIO_16"),
+	PINCTRL_PIN(17, "GPIO_17"),
+	PINCTRL_PIN(18, "GPIO_18"),
+	PINCTRL_PIN(19, "GPIO_19"),
+	PINCTRL_PIN(20, "GPIO_20"),
+	PINCTRL_PIN(21, "GPIO_21"),
+	PINCTRL_PIN(22, "GPIO_22"),
+	PINCTRL_PIN(23, "GPIO_23"),
+	PINCTRL_PIN(24, "GPIO_24"),
+	PINCTRL_PIN(25, "GPIO_25"),
+	PINCTRL_PIN(26, "GPIO_26"),
+	PINCTRL_PIN(27, "GPIO_27"),
+	PINCTRL_PIN(28, "GPIO_28"),
+	PINCTRL_PIN(29, "GPIO_29"),
+	PINCTRL_PIN(30, "GPIO_30"),
+	PINCTRL_PIN(31, "GPIO_31"),
+	PINCTRL_PIN(32, "GPIO_32"),
+	PINCTRL_PIN(33, "GPIO_33"),
+	PINCTRL_PIN(34, "GPIO_34"),
+	PINCTRL_PIN(35, "GPIO_35"),
+	PINCTRL_PIN(36, "GPIO_36"),
+	PINCTRL_PIN(37, "GPIO_37"),
+	PINCTRL_PIN(38, "GPIO_38"),
+	PINCTRL_PIN(39, "GPIO_39"),
+	PINCTRL_PIN(40, "GPIO_40"),
+	PINCTRL_PIN(41, "GPIO_41"),
+	PINCTRL_PIN(42, "GPIO_42"),
+	PINCTRL_PIN(43, "GPIO_43"),
+	PINCTRL_PIN(44, "GPIO_44"),
+	PINCTRL_PIN(45, "GPIO_45"),
+	PINCTRL_PIN(46, "GPIO_46"),
+	PINCTRL_PIN(47, "GPIO_47"),
+	PINCTRL_PIN(48, "GPIO_48"),
+	PINCTRL_PIN(49, "GPIO_49"),
+	PINCTRL_PIN(50, "GPIO_50"),
+	PINCTRL_PIN(51, "GPIO_51"),
+	PINCTRL_PIN(52, "GPIO_52"),
+	PINCTRL_PIN(53, "GPIO_53"),
+	PINCTRL_PIN(54, "GPIO_54"),
+	PINCTRL_PIN(55, "GPIO_55"),
+	PINCTRL_PIN(56, "GPIO_56"),
+	PINCTRL_PIN(57, "GPIO_57"),
+	PINCTRL_PIN(58, "GPIO_58"),
+	PINCTRL_PIN(59, "GPIO_59"),
+	PINCTRL_PIN(60, "GPIO_60"),
+	PINCTRL_PIN(61, "GPIO_61"),
+	PINCTRL_PIN(62, "GPIO_62"),
+	PINCTRL_PIN(63, "GPIO_63"),
+	PINCTRL_PIN(64, "GPIO_64"),
+	PINCTRL_PIN(65, "GPIO_65"),
+	PINCTRL_PIN(66, "GPIO_66"),
+	PINCTRL_PIN(67, "GPIO_67"),
+	PINCTRL_PIN(68, "GPIO_68"),
+	PINCTRL_PIN(69, "GPIO_69"),
+	PINCTRL_PIN(70, "GPIO_70"),
+	PINCTRL_PIN(71, "GPIO_71"),
+	PINCTRL_PIN(72, "GPIO_72"),
+	PINCTRL_PIN(73, "GPIO_73"),
+	PINCTRL_PIN(74, "GPIO_74"),
+	PINCTRL_PIN(75, "GPIO_75"),
+	PINCTRL_PIN(76, "GPIO_76"),
+	PINCTRL_PIN(77, "GPIO_77"),
+	PINCTRL_PIN(78, "GPIO_78"),
+	PINCTRL_PIN(79, "GPIO_79"),
+	PINCTRL_PIN(80, "GPIO_80"),
+	PINCTRL_PIN(81, "GPIO_81"),
+	PINCTRL_PIN(82, "GPIO_82"),
+	PINCTRL_PIN(83, "GPIO_83"),
+	PINCTRL_PIN(84, "GPIO_84"),
+	PINCTRL_PIN(85, "GPIO_85"),
+	PINCTRL_PIN(86, "GPIO_86"),
+	PINCTRL_PIN(87, "GPIO_87"),
+	PINCTRL_PIN(88, "GPIO_88"),
+	PINCTRL_PIN(89, "GPIO_89"),
+	PINCTRL_PIN(90, "GPIO_90"),
+	PINCTRL_PIN(91, "GPIO_91"),
+	PINCTRL_PIN(92, "GPIO_92"),
+	PINCTRL_PIN(93, "GPIO_93"),
+	PINCTRL_PIN(94, "GPIO_94"),
+	PINCTRL_PIN(95, "GPIO_95"),
+	PINCTRL_PIN(96, "GPIO_96"),
+	PINCTRL_PIN(97, "GPIO_97"),
+	PINCTRL_PIN(98, "GPIO_98"),
+	PINCTRL_PIN(99, "GPIO_99"),
+	PINCTRL_PIN(100, "GPIO_100"),
+	PINCTRL_PIN(101, "GPIO_101"),
+	PINCTRL_PIN(102, "GPIO_102"),
+	PINCTRL_PIN(103, "GPIO_103"),
+	PINCTRL_PIN(104, "GPIO_104"),
+	PINCTRL_PIN(105, "GPIO_105"),
+	PINCTRL_PIN(106, "GPIO_106"),
+	PINCTRL_PIN(107, "GPIO_107"),
+	PINCTRL_PIN(108, "GPIO_108"),
+	PINCTRL_PIN(109, "GPIO_109"),
+	PINCTRL_PIN(110, "GPIO_110"),
+	PINCTRL_PIN(111, "GPIO_111"),
+	PINCTRL_PIN(112, "GPIO_112"),
+	PINCTRL_PIN(113, "UFS_RESET"),
+	PINCTRL_PIN(114, "SDC1_RCLK"),
+	PINCTRL_PIN(115, "SDC1_CLK"),
+	PINCTRL_PIN(116, "SDC1_CMD"),
+	PINCTRL_PIN(117, "SDC1_DATA"),
+	PINCTRL_PIN(118, "SDC2_CLK"),
+	PINCTRL_PIN(119, "SDC2_CMD"),
+	PINCTRL_PIN(120, "SDC2_DATA"),
+};
+
+#define DECLARE_MSM_GPIO_PINS(pin) \
+	static const unsigned int gpio##pin##_pins[] = { pin }
+DECLARE_MSM_GPIO_PINS(0);
+DECLARE_MSM_GPIO_PINS(1);
+DECLARE_MSM_GPIO_PINS(2);
+DECLARE_MSM_GPIO_PINS(3);
+DECLARE_MSM_GPIO_PINS(4);
+DECLARE_MSM_GPIO_PINS(5);
+DECLARE_MSM_GPIO_PINS(6);
+DECLARE_MSM_GPIO_PINS(7);
+DECLARE_MSM_GPIO_PINS(8);
+DECLARE_MSM_GPIO_PINS(9);
+DECLARE_MSM_GPIO_PINS(10);
+DECLARE_MSM_GPIO_PINS(11);
+DECLARE_MSM_GPIO_PINS(12);
+DECLARE_MSM_GPIO_PINS(13);
+DECLARE_MSM_GPIO_PINS(14);
+DECLARE_MSM_GPIO_PINS(15);
+DECLARE_MSM_GPIO_PINS(16);
+DECLARE_MSM_GPIO_PINS(17);
+DECLARE_MSM_GPIO_PINS(18);
+DECLARE_MSM_GPIO_PINS(19);
+DECLARE_MSM_GPIO_PINS(20);
+DECLARE_MSM_GPIO_PINS(21);
+DECLARE_MSM_GPIO_PINS(22);
+DECLARE_MSM_GPIO_PINS(23);
+DECLARE_MSM_GPIO_PINS(24);
+DECLARE_MSM_GPIO_PINS(25);
+DECLARE_MSM_GPIO_PINS(26);
+DECLARE_MSM_GPIO_PINS(27);
+DECLARE_MSM_GPIO_PINS(28);
+DECLARE_MSM_GPIO_PINS(29);
+DECLARE_MSM_GPIO_PINS(30);
+DECLARE_MSM_GPIO_PINS(31);
+DECLARE_MSM_GPIO_PINS(32);
+DECLARE_MSM_GPIO_PINS(33);
+DECLARE_MSM_GPIO_PINS(34);
+DECLARE_MSM_GPIO_PINS(35);
+DECLARE_MSM_GPIO_PINS(36);
+DECLARE_MSM_GPIO_PINS(37);
+DECLARE_MSM_GPIO_PINS(38);
+DECLARE_MSM_GPIO_PINS(39);
+DECLARE_MSM_GPIO_PINS(40);
+DECLARE_MSM_GPIO_PINS(41);
+DECLARE_MSM_GPIO_PINS(42);
+DECLARE_MSM_GPIO_PINS(43);
+DECLARE_MSM_GPIO_PINS(44);
+DECLARE_MSM_GPIO_PINS(45);
+DECLARE_MSM_GPIO_PINS(46);
+DECLARE_MSM_GPIO_PINS(47);
+DECLARE_MSM_GPIO_PINS(48);
+DECLARE_MSM_GPIO_PINS(49);
+DECLARE_MSM_GPIO_PINS(50);
+DECLARE_MSM_GPIO_PINS(51);
+DECLARE_MSM_GPIO_PINS(52);
+DECLARE_MSM_GPIO_PINS(53);
+DECLARE_MSM_GPIO_PINS(54);
+DECLARE_MSM_GPIO_PINS(55);
+DECLARE_MSM_GPIO_PINS(56);
+DECLARE_MSM_GPIO_PINS(57);
+DECLARE_MSM_GPIO_PINS(58);
+DECLARE_MSM_GPIO_PINS(59);
+DECLARE_MSM_GPIO_PINS(60);
+DECLARE_MSM_GPIO_PINS(61);
+DECLARE_MSM_GPIO_PINS(62);
+DECLARE_MSM_GPIO_PINS(63);
+DECLARE_MSM_GPIO_PINS(64);
+DECLARE_MSM_GPIO_PINS(65);
+DECLARE_MSM_GPIO_PINS(66);
+DECLARE_MSM_GPIO_PINS(67);
+DECLARE_MSM_GPIO_PINS(68);
+DECLARE_MSM_GPIO_PINS(69);
+DECLARE_MSM_GPIO_PINS(70);
+DECLARE_MSM_GPIO_PINS(71);
+DECLARE_MSM_GPIO_PINS(72);
+DECLARE_MSM_GPIO_PINS(73);
+DECLARE_MSM_GPIO_PINS(74);
+DECLARE_MSM_GPIO_PINS(75);
+DECLARE_MSM_GPIO_PINS(76);
+DECLARE_MSM_GPIO_PINS(77);
+DECLARE_MSM_GPIO_PINS(78);
+DECLARE_MSM_GPIO_PINS(79);
+DECLARE_MSM_GPIO_PINS(80);
+DECLARE_MSM_GPIO_PINS(81);
+DECLARE_MSM_GPIO_PINS(82);
+DECLARE_MSM_GPIO_PINS(83);
+DECLARE_MSM_GPIO_PINS(84);
+DECLARE_MSM_GPIO_PINS(85);
+DECLARE_MSM_GPIO_PINS(86);
+DECLARE_MSM_GPIO_PINS(87);
+DECLARE_MSM_GPIO_PINS(88);
+DECLARE_MSM_GPIO_PINS(89);
+DECLARE_MSM_GPIO_PINS(90);
+DECLARE_MSM_GPIO_PINS(91);
+DECLARE_MSM_GPIO_PINS(92);
+DECLARE_MSM_GPIO_PINS(93);
+DECLARE_MSM_GPIO_PINS(94);
+DECLARE_MSM_GPIO_PINS(95);
+DECLARE_MSM_GPIO_PINS(96);
+DECLARE_MSM_GPIO_PINS(97);
+DECLARE_MSM_GPIO_PINS(98);
+DECLARE_MSM_GPIO_PINS(99);
+DECLARE_MSM_GPIO_PINS(100);
+DECLARE_MSM_GPIO_PINS(101);
+DECLARE_MSM_GPIO_PINS(102);
+DECLARE_MSM_GPIO_PINS(103);
+DECLARE_MSM_GPIO_PINS(104);
+DECLARE_MSM_GPIO_PINS(105);
+DECLARE_MSM_GPIO_PINS(106);
+DECLARE_MSM_GPIO_PINS(107);
+DECLARE_MSM_GPIO_PINS(108);
+DECLARE_MSM_GPIO_PINS(109);
+DECLARE_MSM_GPIO_PINS(110);
+DECLARE_MSM_GPIO_PINS(111);
+DECLARE_MSM_GPIO_PINS(112);
+
+static const unsigned int ufs_reset_pins[] = { 113 };
+static const unsigned int sdc1_rclk_pins[] = { 114 };
+static const unsigned int sdc1_clk_pins[] = { 115 };
+static const unsigned int sdc1_cmd_pins[] = { 116 };
+static const unsigned int sdc1_data_pins[] = { 117 };
+static const unsigned int sdc2_clk_pins[] = { 118 };
+static const unsigned int sdc2_cmd_pins[] = { 119 };
+static const unsigned int sdc2_data_pins[] = { 120 };
+
+enum sm6115_functions {
+	msm_mux_adsp_ext,
+	msm_mux_agera_pll,
+	msm_mux_atest,
+	msm_mux_cam_mclk,
+	msm_mux_cci_async,
+	msm_mux_cci_i2c,
+	msm_mux_cci_timer,
+	msm_mux_cri_trng,
+	msm_mux_dac_calib,
+	msm_mux_dbg_out,
+	msm_mux_ddr_bist,
+	msm_mux_ddr_pxi0,
+	msm_mux_ddr_pxi1,
+	msm_mux_ddr_pxi2,
+	msm_mux_ddr_pxi3,
+	msm_mux_gcc_gp1,
+	msm_mux_gcc_gp2,
+	msm_mux_gcc_gp3,
+	msm_mux_gpio,
+	msm_mux_gp_pdm0,
+	msm_mux_gp_pdm1,
+	msm_mux_gp_pdm2,
+	msm_mux_gsm0_tx,
+	msm_mux_gsm1_tx,
+	msm_mux_jitter_bist,
+	msm_mux_mdp_vsync,
+	msm_mux_mdp_vsync_out_0,
+	msm_mux_mdp_vsync_out_1,
+	msm_mux_mpm_pwr,
+	msm_mux_mss_lte,
+	msm_mux_m_voc,
+	msm_mux_nav_gpio,
+	msm_mux_pa_indicator,
+	msm_mux_pbs,
+	msm_mux_pbs_out,
+	msm_mux_phase_flag,
+	msm_mux_pll_bist,
+	msm_mux_pll_bypassnl,
+	msm_mux_pll_reset,
+	msm_mux_prng_rosc,
+	msm_mux_qdss_cti,
+	msm_mux_qdss_gpio,
+	msm_mux_qup0,
+	msm_mux_qup1,
+	msm_mux_qup2,
+	msm_mux_qup3,
+	msm_mux_qup4,
+	msm_mux_qup5,
+	msm_mux_sdc1_tb,
+	msm_mux_sdc2_tb,
+	msm_mux_sd_write,
+	msm_mux_ssbi_wtr1,
+	msm_mux_tgu,
+	msm_mux_tsense_pwm,
+	msm_mux_uim1_clk,
+	msm_mux_uim1_data,
+	msm_mux_uim1_present,
+	msm_mux_uim1_reset,
+	msm_mux_uim2_clk,
+	msm_mux_uim2_data,
+	msm_mux_uim2_present,
+	msm_mux_uim2_reset,
+	msm_mux_usb_phy,
+	msm_mux_vfr_1,
+	msm_mux_vsense_trigger,
+	msm_mux_wlan1_adc0,
+	msm_mux_wlan1_adc1,
+	msm_mux__,
+};
+
+static const char * const qup0_groups[] = {
+	"gpio0", "gpio1", "gpio2", "gpio3", "gpio82", "gpio86",
+};
+static const char * const gpio_groups[] = {
+	"gpio0", "gpio1", "gpio2", "gpio3", "gpio4", "gpio5", "gpio6", "gpio7",
+	"gpio8", "gpio9", "gpio10", "gpio11", "gpio12", "gpio13", "gpio14",
+	"gpio15", "gpio16", "gpio17", "gpio18", "gpio19", "gpio20", "gpio21",
+	"gpio22", "gpio23", "gpio24", "gpio25", "gpio26", "gpio27", "gpio28",
+	"gpio29", "gpio30", "gpio31", "gpio32", "gpio33", "gpio34", "gpio35",
+	"gpio36", "gpio37", "gpio38", "gpio39", "gpio40", "gpio41", "gpio42",
+	"gpio43", "gpio44", "gpio45", "gpio46", "gpio47", "gpio48", "gpio49",
+	"gpio50", "gpio51", "gpio52", "gpio53", "gpio54", "gpio55", "gpio56",
+	"gpio57", "gpio58", "gpio59", "gpio60", "gpio61", "gpio62", "gpio63",
+	"gpio64", "gpio65", "gpio66", "gpio67", "gpio68", "gpio69", "gpio70",
+	"gpio71", "gpio72", "gpio73", "gpio74", "gpio75", "gpio76", "gpio77",
+	"gpio78", "gpio79", "gpio80", "gpio81", "gpio82", "gpio83", "gpio84",
+	"gpio85", "gpio86", "gpio87", "gpio88", "gpio89", "gpio90", "gpio91",
+	"gpio92", "gpio93", "gpio94", "gpio95", "gpio96", "gpio97", "gpio98",
+	"gpio99", "gpio100", "gpio101", "gpio102", "gpio103", "gpio104",
+	"gpio105", "gpio106", "gpio107", "gpio108", "gpio109", "gpio110",
+	"gpio111", "gpio112",
+};
+static const char * const ddr_bist_groups[] = {
+	"gpio0", "gpio1", "gpio2", "gpio3",
+};
+static const char * const phase_flag_groups[] = {
+	"gpio0", "gpio1", "gpio2", "gpio3", "gpio4", "gpio5", "gpio6",
+	"gpio14", "gpio15", "gpio16", "gpio17", "gpio22", "gpio23", "gpio24",
+	"gpio25", "gpio26", "gpio29", "gpio30", "gpio31", "gpio32", "gpio33",
+	"gpio35", "gpio36", "gpio43", "gpio44", "gpio45", "gpio63", "gpio64",
+	"gpio102", "gpio103", "gpio104", "gpio105",
+};
+static const char * const qdss_gpio_groups[] = {
+	"gpio0", "gpio1", "gpio2", "gpio3", "gpio8", "gpio9", "gpio10",
+	"gpio11", "gpio14", "gpio15", "gpio16", "gpio17", "gpio18", "gpio19",
+	"gpio20", "gpio21", "gpio22", "gpio23", "gpio24", "gpio25", "gpio26",
+	"gpio47", "gpio48", "gpio69", "gpio70", "gpio87", "gpio90", "gpio91",
+	"gpio94", "gpio95", "gpio104", "gpio105", "gpio106", "gpio107",
+	"gpio109", "gpio110",
+};
+static const char * const atest_groups[] = {
+	"gpio0", "gpio1", "gpio2", "gpio3", "gpio4", "gpio5", "gpio6",
+	"gpio22", "gpio23", "gpio24", "gpio25", "gpio26", "gpio29", "gpio30",
+	"gpio31", "gpio32", "gpio33", "gpio86", "gpio87", "gpio88", "gpio89",
+	"gpio100", "gpio101",
+};
+static const char * const mpm_pwr_groups[] = {
+	"gpio1",
+};
+static const char * const m_voc_groups[] = {
+	"gpio0",
+};
+static const char * const dac_calib_groups[] = {
+	"gpio2", "gpio3", "gpio4", "gpio5", "gpio6", "gpio14", "gpio15",
+	"gpio16", "gpio17", "gpio22", "gpio23", "gpio24", "gpio25", "gpio26",
+	"gpio29", "gpio30", "gpio31", "gpio32", "gpio33", "gpio80", "gpio81",
+	"gpio82", "gpio102", "gpio103", "gpio104", "gpio105"
+};
+static const char * const qup1_groups[] = {
+	"gpio4", "gpio5", "gpio69", "gpio70",
+};
+static const char * const cri_trng_groups[] = {
+	"gpio4", "gpio5", "gpio18",
+};
+static const char * const qup2_groups[] = {
+	"gpio6", "gpio7", "gpio71", "gpio80",
+};
+static const char * const qup3_groups[] = {
+	"gpio8", "gpio9", "gpio10", "gpio11",
+};
+static const char * const pbs_out_groups[] = {
+	"gpio8", "gpio9", "gpio52",
+};
+static const char * const pll_bist_groups[] = {
+	"gpio8", "gpio9",
+};
+static const char * const tsense_pwm_groups[] = {
+	"gpio8",
+};
+static const char * const agera_pll_groups[] = {
+	"gpio10", "gpio11",
+};
+static const char * const pbs_groups[] = {
+	"gpio10", "gpio11", "gpio18", "gpio19", "gpio20", "gpio21", "gpio22",
+	"gpio23", "gpio24", "gpio25", "gpio26", "gpio47", "gpio48", "gpio87",
+	"gpio90", "gpio91",
+};
+static const char * const qup4_groups[] = {
+	"gpio12", "gpio13", "gpio96", "gpio97",
+};
+static const char * const tgu_groups[] = {
+	"gpio12", "gpio13", "gpio14", "gpio15",
+};
+static const char * const qup5_groups[] = {
+	"gpio14", "gpio15", "gpio16", "gpio17",
+};
+static const char * const sdc2_tb_groups[] = {
+	"gpio18",
+};
+static const char * const sdc1_tb_groups[] = {
+	"gpio19",
+};
+static const char * const cam_mclk_groups[] = {
+	"gpio20", "gpio21", "gpio27", "gpio28",
+};
+static const char * const adsp_ext_groups[] = {
+	"gpio21",
+};
+static const char * const cci_i2c_groups[] = {
+	"gpio22", "gpio23", "gpio29", "gpio30",
+};
+static const char * const prng_rosc_groups[] = {
+	"gpio22", "gpio23",
+};
+static const char * const cci_timer_groups[] = {
+	"gpio24", "gpio25", "gpio28", "gpio32",
+};
+static const char * const gcc_gp1_groups[] = {
+	"gpio24", "gpio86",
+};
+static const char * const cci_async_groups[] = {
+	"gpio25",
+};
+static const char * const vsense_trigger_groups[] = {
+	"gpio26",
+};
+static const char * const qdss_cti_groups[] = {
+	"gpio27", "gpio28", "gpio72", "gpio73", "gpio96", "gpio97",
+};
+static const char * const gp_pdm0_groups[] = {
+	"gpio31", "gpio95",
+};
+static const char * const gp_pdm1_groups[] = {
+	"gpio32", "gpio96",
+};
+static const char * const gp_pdm2_groups[] = {
+	"gpio33", "gpio97",
+};
+static const char * const nav_gpio_groups[] = {
+	"gpio42", "gpio47", "gpio52", "gpio95", "gpio96", "gpio97", "gpio106",
+	"gpio107", "gpio108",
+};
+static const char * const vfr_1_groups[] = {
+	"gpio48",
+};
+static const char * const pa_indicator_groups[] = {
+	"gpio49",
+};
+static const char * const gsm1_tx_groups[] = {
+	"gpio53",
+};
+static const char * const ssbi_wtr1_groups[] = {
+	"gpio59", "gpio60",
+};
+static const char * const pll_bypassnl_groups[] = {
+	"gpio62",
+};
+static const char * const pll_reset_groups[] = {
+	"gpio63",
+};
+static const char * const ddr_pxi0_groups[] = {
+	"gpio63", "gpio64",
+};
+static const char * const gsm0_tx_groups[] = {
+	"gpio64",
+};
+static const char * const gcc_gp2_groups[] = {
+	"gpio69", "gpio107",
+};
+static const char * const ddr_pxi1_groups[] = {
+	"gpio69", "gpio70",
+};
+static const char * const gcc_gp3_groups[] = {
+	"gpio70", "gpio106",
+};
+static const char * const dbg_out_groups[] = {
+	"gpio71",
+};
+static const char * const uim2_data_groups[] = {
+	"gpio72",
+};
+static const char * const uim2_clk_groups[] = {
+	"gpio73",
+};
+static const char * const uim2_reset_groups[] = {
+	"gpio74",
+};
+static const char * const uim2_present_groups[] = {
+	"gpio75",
+};
+static const char * const uim1_data_groups[] = {
+	"gpio76",
+};
+static const char * const uim1_clk_groups[] = {
+	"gpio77",
+};
+static const char * const uim1_reset_groups[] = {
+	"gpio78",
+};
+static const char * const uim1_present_groups[] = {
+	"gpio79",
+};
+static const char * const mdp_vsync_groups[] = {
+	"gpio81", "gpio96", "gpio97",
+};
+static const char * const mdp_vsync_out_0_groups[] = {
+	"gpio81",
+};
+static const char * const mdp_vsync_out_1_groups[] = {
+	"gpio81",
+};
+static const char * const usb_phy_groups[] = {
+	"gpio89",
+};
+static const char * const mss_lte_groups[] = {
+	"gpio90", "gpio91",
+};
+static const char * const wlan1_adc0_groups[] = {
+	"gpio94",
+};
+static const char * const wlan1_adc1_groups[] = {
+	"gpio95",
+};
+static const char * const sd_write_groups[] = {
+	"gpio96",
+};
+static const char * const jitter_bist_groups[] = {
+	"gpio96", "gpio97",
+};
+static const char * const ddr_pxi2_groups[] = {
+	"gpio102", "gpio103",
+};
+static const char * const ddr_pxi3_groups[] = {
+	"gpio104", "gpio105",
+};
+
+static const struct msm_function sm6115_functions[] = {
+	FUNCTION(adsp_ext),
+	FUNCTION(agera_pll),
+	FUNCTION(atest),
+	FUNCTION(cam_mclk),
+	FUNCTION(cci_async),
+	FUNCTION(cci_i2c),
+	FUNCTION(cci_timer),
+	FUNCTION(cri_trng),
+	FUNCTION(dac_calib),
+	FUNCTION(dbg_out),
+	FUNCTION(ddr_bist),
+	FUNCTION(ddr_pxi0),
+	FUNCTION(ddr_pxi1),
+	FUNCTION(ddr_pxi2),
+	FUNCTION(ddr_pxi3),
+	FUNCTION(gcc_gp1),
+	FUNCTION(gcc_gp2),
+	FUNCTION(gcc_gp3),
+	FUNCTION(gpio),
+	FUNCTION(gp_pdm0),
+	FUNCTION(gp_pdm1),
+	FUNCTION(gp_pdm2),
+	FUNCTION(gsm0_tx),
+	FUNCTION(gsm1_tx),
+	FUNCTION(jitter_bist),
+	FUNCTION(mdp_vsync),
+	FUNCTION(mdp_vsync_out_0),
+	FUNCTION(mdp_vsync_out_1),
+	FUNCTION(mpm_pwr),
+	FUNCTION(mss_lte),
+	FUNCTION(m_voc),
+	FUNCTION(nav_gpio),
+	FUNCTION(pa_indicator),
+	FUNCTION(pbs),
+	FUNCTION(pbs_out),
+	FUNCTION(phase_flag),
+	FUNCTION(pll_bist),
+	FUNCTION(pll_bypassnl),
+	FUNCTION(pll_reset),
+	FUNCTION(prng_rosc),
+	FUNCTION(qdss_cti),
+	FUNCTION(qdss_gpio),
+	FUNCTION(qup0),
+	FUNCTION(qup1),
+	FUNCTION(qup2),
+	FUNCTION(qup3),
+	FUNCTION(qup4),
+	FUNCTION(qup5),
+	FUNCTION(sdc1_tb),
+	FUNCTION(sdc2_tb),
+	FUNCTION(sd_write),
+	FUNCTION(ssbi_wtr1),
+	FUNCTION(tgu),
+	FUNCTION(tsense_pwm),
+	FUNCTION(uim1_clk),
+	FUNCTION(uim1_data),
+	FUNCTION(uim1_present),
+	FUNCTION(uim1_reset),
+	FUNCTION(uim2_clk),
+	FUNCTION(uim2_data),
+	FUNCTION(uim2_present),
+	FUNCTION(uim2_reset),
+	FUNCTION(usb_phy),
+	FUNCTION(vfr_1),
+	FUNCTION(vsense_trigger),
+	FUNCTION(wlan1_adc0),
+	FUNCTION(wlan1_adc1),
+};
+
+/* Every pin is maintained as a single group, and missing or non-existing pin
+ * would be maintained as dummy group to synchronize pin group index with
+ * pin descriptor registered with pinctrl core.
+ * Clients would not be able to request these dummy pin groups.
+ */
+static const struct msm_pingroup sm6115_groups[] = {
+	[0] = PINGROUP(0, WEST, qup0, m_voc, ddr_bist, _, phase_flag, qdss_gpio, atest, _, _),
+	[1] = PINGROUP(1, WEST, qup0, mpm_pwr, ddr_bist, _, phase_flag, qdss_gpio, atest, _, _),
+	[2] = PINGROUP(2, WEST, qup0, ddr_bist, _, phase_flag, qdss_gpio, dac_calib, atest, _, _),
+	[3] = PINGROUP(3, WEST, qup0, ddr_bist, _, phase_flag, qdss_gpio, dac_calib, atest, _, _),
+	[4] = PINGROUP(4, WEST, qup1, cri_trng, _, phase_flag, dac_calib, atest, _, _, _),
+	[5] = PINGROUP(5, WEST, qup1, cri_trng, _, phase_flag, dac_calib, atest, _, _, _),
+	[6] = PINGROUP(6, WEST, qup2, _, phase_flag, dac_calib, atest, _, _, _, _),
+	[7] = PINGROUP(7, WEST, qup2, _, _, _, _, _, _, _, _),
+	[8] = PINGROUP(8, EAST, qup3, pbs_out, pll_bist, _, qdss_gpio, _, tsense_pwm, _, _),
+	[9] = PINGROUP(9, EAST, qup3, pbs_out, pll_bist, _, qdss_gpio, _, _, _, _),
+	[10] = PINGROUP(10, EAST, qup3, agera_pll, _, pbs, qdss_gpio, _, _, _, _),
+	[11] = PINGROUP(11, EAST, qup3, agera_pll, _, pbs, qdss_gpio, _, _, _, _),
+	[12] = PINGROUP(12, WEST, qup4, tgu, _, _, _, _, _, _, _),
+	[13] = PINGROUP(13, WEST, qup4, tgu, _, _, _, _, _, _, _),
+	[14] = PINGROUP(14, WEST, qup5, tgu, _, phase_flag, qdss_gpio, dac_calib, _, _, _),
+	[15] = PINGROUP(15, WEST, qup5, tgu, _, phase_flag, qdss_gpio, dac_calib, _, _, _),
+	[16] = PINGROUP(16, WEST, qup5, _, phase_flag, qdss_gpio, dac_calib, _, _, _, _),
+	[17] = PINGROUP(17, WEST, qup5, _, phase_flag, qdss_gpio, dac_calib, _, _, _, _),
+	[18] = PINGROUP(18, EAST, sdc2_tb, cri_trng, pbs, qdss_gpio, _, _, _, _, _),
+	[19] = PINGROUP(19, EAST, sdc1_tb, pbs, qdss_gpio, _, _, _, _, _, _),
+	[20] = PINGROUP(20, EAST, cam_mclk, pbs, qdss_gpio, _, _, _, _, _, _),
+	[21] = PINGROUP(21, EAST, cam_mclk, adsp_ext, pbs, qdss_gpio, _, _, _, _, _),
+	[22] = PINGROUP(22, EAST, cci_i2c, prng_rosc, _, pbs, phase_flag, qdss_gpio, dac_calib, atest, _),
+	[23] = PINGROUP(23, EAST, cci_i2c, prng_rosc, _, pbs, phase_flag, qdss_gpio, dac_calib, atest, _),
+	[24] = PINGROUP(24, EAST, cci_timer, gcc_gp1, _, pbs, phase_flag, qdss_gpio, dac_calib, atest, _),
+	[25] = PINGROUP(25, EAST, cci_async, cci_timer, _, pbs, phase_flag, qdss_gpio, dac_calib, atest, _),
+	[26] = PINGROUP(26, EAST, _, pbs, phase_flag, qdss_gpio, dac_calib, atest, vsense_trigger, _, _),
+	[27] = PINGROUP(27, EAST, cam_mclk, qdss_cti, _, _, _, _, _, _, _),
+	[28] = PINGROUP(28, EAST, cam_mclk, cci_timer, qdss_cti, _, _, _, _, _, _),
+	[29] = PINGROUP(29, EAST, cci_i2c, _, phase_flag, dac_calib, atest, _, _, _, _),
+	[30] = PINGROUP(30, EAST, cci_i2c, _, phase_flag, dac_calib, atest, _, _, _, _),
+	[31] = PINGROUP(31, EAST, gp_pdm0, _, phase_flag, dac_calib, atest, _, _, _, _),
+	[32] = PINGROUP(32, EAST, cci_timer, gp_pdm1, _, phase_flag, dac_calib, atest, _, _, _),
+	[33] = PINGROUP(33, EAST, gp_pdm2, _, phase_flag, dac_calib, atest, _, _, _, _),
+	[34] = PINGROUP(34, EAST, _, _, _, _, _, _, _, _, _),
+	[35] = PINGROUP(35, EAST, _, phase_flag, _, _, _, _, _, _, _),
+	[36] = PINGROUP(36, EAST, _, phase_flag, _, _, _, _, _, _, _),
+	[37] = PINGROUP(37, EAST, _, _, _, _, _, _, _, _, _),
+	[38] = PINGROUP(38, EAST, _, _, _, _, _, _, _, _, _),
+	[39] = PINGROUP(39, EAST, _, _, _, _, _, _, _, _, _),
+	[40] = PINGROUP(40, EAST, _, _, _, _, _, _, _, _, _),
+	[41] = PINGROUP(41, EAST, _, _, _, _, _, _, _, _, _),
+	[42] = PINGROUP(42, EAST, _, nav_gpio, _, _, _, _, _, _, _),
+	[43] = PINGROUP(43, EAST, _, _, phase_flag, _, _, _, _, _, _),
+	[44] = PINGROUP(44, EAST, _, _, phase_flag, _, _, _, _, _, _),
+	[45] = PINGROUP(45, EAST, _, _, phase_flag, _, _, _, _, _, _),
+	[46] = PINGROUP(46, EAST, _, _, _, _, _, _, _, _, _),
+	[47] = PINGROUP(47, EAST, _, nav_gpio, pbs, qdss_gpio, _, _, _, _, _),
+	[48] = PINGROUP(48, EAST, _, vfr_1, _, pbs, qdss_gpio, _, _, _, _),
+	[49] = PINGROUP(49, EAST, _, pa_indicator, _, _, _, _, _, _, _),
+	[50] = PINGROUP(50, EAST, _, _, _, _, _, _, _, _, _),
+	[51] = PINGROUP(51, EAST, _, _, _, _, _, _, _, _, _),
+	[52] = PINGROUP(52, EAST, _, nav_gpio, pbs_out, _, _, _, _, _, _),
+	[53] = PINGROUP(53, EAST, _, gsm1_tx, _, _, _, _, _, _, _),
+	[54] = PINGROUP(54, EAST, _, _, _, _, _, _, _, _, _),
+	[55] = PINGROUP(55, EAST, _, _, _, _, _, _, _, _, _),
+	[56] = PINGROUP(56, EAST, _, _, _, _, _, _, _, _, _),
+	[57] = PINGROUP(57, EAST, _, _, _, _, _, _, _, _, _),
+	[58] = PINGROUP(58, EAST, _, _, _, _, _, _, _, _, _),
+	[59] = PINGROUP(59, EAST, _, ssbi_wtr1, _, _, _, _, _, _, _),
+	[60] = PINGROUP(60, EAST, _, ssbi_wtr1, _, _, _, _, _, _, _),
+	[61] = PINGROUP(61, EAST, _, _, _, _, _, _, _, _, _),
+	[62] = PINGROUP(62, EAST, _, pll_bypassnl, _, _, _, _, _, _, _),
+	[63] = PINGROUP(63, EAST, pll_reset, _, phase_flag, ddr_pxi0, _, _, _, _, _),
+	[64] = PINGROUP(64, EAST, gsm0_tx, _, phase_flag, ddr_pxi0, _, _, _, _, _),
+	[65] = PINGROUP(65, WEST, _, _, _, _, _, _, _, _, _),
+	[66] = PINGROUP(66, WEST, _, _, _, _, _, _, _, _, _),
+	[67] = PINGROUP(67, WEST, _, _, _, _, _, _, _, _, _),
+	[68] = PINGROUP(68, WEST, _, _, _, _, _, _, _, _, _),
+	[69] = PINGROUP(69, WEST, qup1, gcc_gp2, qdss_gpio, ddr_pxi1, _, _, _, _, _),
+	[70] = PINGROUP(70, WEST, qup1, gcc_gp3, qdss_gpio, ddr_pxi1, _, _, _, _, _),
+	[71] = PINGROUP(71, WEST, qup2, dbg_out, _, _, _, _, _, _, _),
+	[72] = PINGROUP(72, SOUTH, uim2_data, qdss_cti, _, _, _, _, _, _, _),
+	[73] = PINGROUP(73, SOUTH, uim2_clk, _, qdss_cti, _, _, _, _, _, _),
+	[74] = PINGROUP(74, SOUTH, uim2_reset, _, _, _, _, _, _, _, _),
+	[75] = PINGROUP(75, SOUTH, uim2_present, _, _, _, _, _, _, _, _),
+	[76] = PINGROUP(76, SOUTH, uim1_data, _, _, _, _, _, _, _, _),
+	[77] = PINGROUP(77, SOUTH, uim1_clk, _, _, _, _, _, _, _, _),
+	[78] = PINGROUP(78, SOUTH, uim1_reset, _, _, _, _, _, _, _, _),
+	[79] = PINGROUP(79, SOUTH, uim1_present, _, _, _, _, _, _, _, _),
+	[80] = PINGROUP(80, WEST, qup2, dac_calib, _, _, _, _, _, _, _),
+	[81] = PINGROUP(81, WEST, mdp_vsync_out_0, mdp_vsync_out_1, mdp_vsync, dac_calib, _, _, _, _, _),
+	[82] = PINGROUP(82, WEST, qup0, dac_calib, _, _, _, _, _, _, _),
+	[83] = PINGROUP(83, WEST, _, _, _, _, _, _, _, _, _),
+	[84] = PINGROUP(84, WEST, _, _, _, _, _, _, _, _, _),
+	[85] = PINGROUP(85, WEST, _, _, _, _, _, _, _, _, _),
+	[86] = PINGROUP(86, WEST, qup0, gcc_gp1, atest, _, _, _, _, _, _),
+	[87] = PINGROUP(87, EAST, pbs, qdss_gpio, _, _, _, _, _, _, _),
+	[88] = PINGROUP(88, EAST, _, _, _, _, _, _, _, _, _),
+	[89] = PINGROUP(89, WEST, usb_phy, atest, _, _, _, _, _, _, _),
+	[90] = PINGROUP(90, EAST, mss_lte, pbs, qdss_gpio, _, _, _, _, _, _),
+	[91] = PINGROUP(91, EAST, mss_lte, pbs, qdss_gpio, _, _, _, _, _, _),
+	[92] = PINGROUP(92, WEST, _, _, _, _, _, _, _, _, _),
+	[93] = PINGROUP(93, WEST, _, _, _, _, _, _, _, _, _),
+	[94] = PINGROUP(94, WEST, _, qdss_gpio, wlan1_adc0, _, _, _, _, _, _),
+	[95] = PINGROUP(95, WEST, nav_gpio, gp_pdm0, qdss_gpio, wlan1_adc1, _, _, _, _, _),
+	[96] = PINGROUP(96, WEST, qup4, nav_gpio, mdp_vsync, gp_pdm1, sd_write, jitter_bist, qdss_cti, qdss_cti, _),
+	[97] = PINGROUP(97, WEST, qup4, nav_gpio, mdp_vsync, gp_pdm2, jitter_bist, qdss_cti, qdss_cti, _, _),
+	[98] = PINGROUP(98, SOUTH, _, _, _, _, _, _, _, _, _),
+	[99] = PINGROUP(99, SOUTH, _, _, _, _, _, _, _, _, _),
+	[100] = PINGROUP(100, SOUTH, atest, _, _, _, _, _, _, _, _),
+	[101] = PINGROUP(101, SOUTH, atest, _, _, _, _, _, _, _, _),
+	[102] = PINGROUP(102, SOUTH, _, phase_flag, dac_calib, ddr_pxi2, _, _, _, _, _),
+	[103] = PINGROUP(103, SOUTH, _, phase_flag, dac_calib, ddr_pxi2, _, _, _, _, _),
+	[104] = PINGROUP(104, SOUTH, _, phase_flag, qdss_gpio, dac_calib, ddr_pxi3, _, _, _, _),
+	[105] = PINGROUP(105, SOUTH, _, phase_flag, qdss_gpio, dac_calib, ddr_pxi3, _, _, _, _),
+	[106] = PINGROUP(106, SOUTH, nav_gpio, gcc_gp3, qdss_gpio, _, _, _, _, _, _),
+	[107] = PINGROUP(107, SOUTH, nav_gpio, gcc_gp2, qdss_gpio, _, _, _, _, _, _),
+	[108] = PINGROUP(108, SOUTH, nav_gpio, _, _, _, _, _, _, _, _),
+	[109] = PINGROUP(109, SOUTH, _, qdss_gpio, _, _, _, _, _, _, _),
+	[110] = PINGROUP(110, SOUTH, _, qdss_gpio, _, _, _, _, _, _, _),
+	[111] = PINGROUP(111, SOUTH, _, _, _, _, _, _, _, _, _),
+	[112] = PINGROUP(112, SOUTH, _, _, _, _, _, _, _, _, _),
+	[113] = UFS_RESET(ufs_reset, 0x78000),
+	[114] = SDC_QDSD_PINGROUP(sdc1_rclk, WEST, 0x75000, 15, 0),
+	[115] = SDC_QDSD_PINGROUP(sdc1_clk, WEST, 0x75000, 13, 6),
+	[116] = SDC_QDSD_PINGROUP(sdc1_cmd, WEST, 0x75000, 11, 3),
+	[117] = SDC_QDSD_PINGROUP(sdc1_data, WEST, 0x75000, 9, 0),
+	[118] = SDC_QDSD_PINGROUP(sdc2_clk, SOUTH, 0x73000, 14, 6),
+	[119] = SDC_QDSD_PINGROUP(sdc2_cmd, SOUTH, 0x73000, 11, 3),
+	[120] = SDC_QDSD_PINGROUP(sdc2_data, SOUTH, 0x73000, 9, 0),
+};
+
+static const struct msm_pinctrl_soc_data sm6115_tlmm = {
+	.pins = sm6115_pins,
+	.npins = ARRAY_SIZE(sm6115_pins),
+	.functions = sm6115_functions,
+	.nfunctions = ARRAY_SIZE(sm6115_functions),
+	.groups = sm6115_groups,
+	.ngroups = ARRAY_SIZE(sm6115_groups),
+	.ngpios = 114,
+	.tiles = sm6115_tiles,
+	.ntiles = ARRAY_SIZE(sm6115_tiles),
+};
+
+static int sm6115_tlmm_probe(struct platform_device *pdev)
+{
+	return msm_pinctrl_probe(pdev, &sm6115_tlmm);
+}
+
+static const struct of_device_id sm6115_tlmm_of_match[] = {
+	{ .compatible = "qcom,sm6115-tlmm", },
+	{ }
+};
+
+static struct platform_driver sm6115_tlmm_driver = {
+	.driver = {
+		.name = "sm6115-tlmm",
+		.of_match_table = sm6115_tlmm_of_match,
+	},
+	.probe = sm6115_tlmm_probe,
+	.remove = msm_pinctrl_remove,
+};
+
+static int __init sm6115_tlmm_init(void)
+{
+	return platform_driver_register(&sm6115_tlmm_driver);
+}
+arch_initcall(sm6115_tlmm_init);
+
+static void __exit sm6115_tlmm_exit(void)
+{
+	platform_driver_unregister(&sm6115_tlmm_driver);
+}
+module_exit(sm6115_tlmm_exit);
+
+MODULE_DESCRIPTION("QTI sm6115 tlmm driver");
+MODULE_LICENSE("GPL v2");
+MODULE_DEVICE_TABLE(of, sm6115_tlmm_of_match);
diff --git a/drivers/pinctrl/qcom/pinctrl-spmi-gpio.c b/drivers/pinctrl/qcom/pinctrl-spmi-gpio.c
index a89d24a040af..98bf0e2a2a8d 100644
--- a/drivers/pinctrl/qcom/pinctrl-spmi-gpio.c
+++ b/drivers/pinctrl/qcom/pinctrl-spmi-gpio.c
@@ -1104,39 +1104,42 @@ static int pmic_gpio_remove(struct platform_device *pdev)
 }
 
 static const struct of_device_id pmic_gpio_of_match[] = {
-	{ .compatible = "qcom,pm8005-gpio", .data = (void *) 4 },
-	{ .compatible = "qcom,pm8916-gpio", .data = (void *) 4 },
-	{ .compatible = "qcom,pm8941-gpio", .data = (void *) 36 },
-	/* pm8950 has 8 GPIOs with holes on 3 */
-	{ .compatible = "qcom,pm8950-gpio", .data = (void *) 8 },
-	{ .compatible = "qcom,pmi8950-gpio", .data = (void *) 2 },
-	{ .compatible = "qcom,pm8994-gpio", .data = (void *) 22 },
-	{ .compatible = "qcom,pmi8994-gpio", .data = (void *) 10 },
-	{ .compatible = "qcom,pm8998-gpio", .data = (void *) 26 },
-	{ .compatible = "qcom,pmi8998-gpio", .data = (void *) 14 },
-	{ .compatible = "qcom,pma8084-gpio", .data = (void *) 22 },
-	/* pms405 has 12 GPIOs with holes on 1, 9, and 10 */
-	{ .compatible = "qcom,pms405-gpio", .data = (void *) 12 },
 	/* pm660 has 13 GPIOs with holes on 1, 5, 6, 7, 8 and 10 */
 	{ .compatible = "qcom,pm660-gpio", .data = (void *) 13 },
 	/* pm660l has 12 GPIOs with holes on 1, 2, 10, 11 and 12 */
 	{ .compatible = "qcom,pm660l-gpio", .data = (void *) 12 },
+	{ .compatible = "qcom,pm6150-gpio", .data = (void *) 10 },
+	{ .compatible = "qcom,pm6150l-gpio", .data = (void *) 12 },
+	{ .compatible = "qcom,pm7325-gpio", .data = (void *) 10 },
+	{ .compatible = "qcom,pm8005-gpio", .data = (void *) 4 },
+	{ .compatible = "qcom,pm8008-gpio", .data = (void *) 2 },
 	/* pm8150 has 10 GPIOs with holes on 2, 5, 7 and 8 */
 	{ .compatible = "qcom,pm8150-gpio", .data = (void *) 10 },
+	{ .compatible = "qcom,pmc8180-gpio", .data = (void *) 10 },
 	/* pm8150b has 12 GPIOs with holes on 3, r and 7 */
 	{ .compatible = "qcom,pm8150b-gpio", .data = (void *) 12 },
 	/* pm8150l has 12 GPIOs with holes on 7 */
 	{ .compatible = "qcom,pm8150l-gpio", .data = (void *) 12 },
+	{ .compatible = "qcom,pmc8180c-gpio", .data = (void *) 12 },
 	{ .compatible = "qcom,pm8350-gpio", .data = (void *) 10 },
 	{ .compatible = "qcom,pm8350b-gpio", .data = (void *) 8 },
 	{ .compatible = "qcom,pm8350c-gpio", .data = (void *) 9 },
+	{ .compatible = "qcom,pm8916-gpio", .data = (void *) 4 },
+	{ .compatible = "qcom,pm8941-gpio", .data = (void *) 36 },
+	/* pm8950 has 8 GPIOs with holes on 3 */
+	{ .compatible = "qcom,pm8950-gpio", .data = (void *) 8 },
+	{ .compatible = "qcom,pm8994-gpio", .data = (void *) 22 },
+	{ .compatible = "qcom,pm8998-gpio", .data = (void *) 26 },
+	{ .compatible = "qcom,pma8084-gpio", .data = (void *) 22 },
+	{ .compatible = "qcom,pmi8950-gpio", .data = (void *) 2 },
+	{ .compatible = "qcom,pmi8994-gpio", .data = (void *) 10 },
+	{ .compatible = "qcom,pmi8998-gpio", .data = (void *) 14 },
 	{ .compatible = "qcom,pmk8350-gpio", .data = (void *) 4 },
-	{ .compatible = "qcom,pm7325-gpio", .data = (void *) 10 },
+	{ .compatible = "qcom,pmm8155au-gpio", .data = (void *) 10 },
 	{ .compatible = "qcom,pmr735a-gpio", .data = (void *) 4 },
 	{ .compatible = "qcom,pmr735b-gpio", .data = (void *) 4 },
-	{ .compatible = "qcom,pm6150-gpio", .data = (void *) 10 },
-	{ .compatible = "qcom,pm6150l-gpio", .data = (void *) 12 },
-	{ .compatible = "qcom,pm8008-gpio", .data = (void *) 2 },
+	/* pms405 has 12 GPIOs with holes on 1, 9, and 10 */
+	{ .compatible = "qcom,pms405-gpio", .data = (void *) 12 },
 	/* pmx55 has 11 GPIOs with holes on 3, 7, 10, 11 */
 	{ .compatible = "qcom,pmx55-gpio", .data = (void *) 11 },
 	{ },
diff --git a/drivers/pinctrl/renesas/Kconfig b/drivers/pinctrl/renesas/Kconfig
index 4b84a744ae87..9a72999084b3 100644
--- a/drivers/pinctrl/renesas/Kconfig
+++ b/drivers/pinctrl/renesas/Kconfig
@@ -37,6 +37,7 @@ config PINCTRL_RENESAS
 	select PINCTRL_PFC_R8A77990 if ARCH_R8A77990
 	select PINCTRL_PFC_R8A77995 if ARCH_R8A77995
 	select PINCTRL_PFC_R8A779A0 if ARCH_R8A779A0
+	select PINCTRL_RZG2L if ARCH_R9A07G044
 	select PINCTRL_PFC_SH7203 if CPU_SUBTYPE_SH7203
 	select PINCTRL_PFC_SH7264 if CPU_SUBTYPE_SH7264
 	select PINCTRL_PFC_SH7269 if CPU_SUBTYPE_SH7269
@@ -176,6 +177,16 @@ config PINCTRL_RZA2
 	help
 	  This selects GPIO and pinctrl driver for Renesas RZ/A2 platforms.
 
+config PINCTRL_RZG2L
+	bool "pin control support for RZ/G2L" if COMPILE_TEST
+	depends on OF
+	select GPIOLIB
+	select GENERIC_PINCTRL_GROUPS
+	select GENERIC_PINMUX_FUNCTIONS
+	select GENERIC_PINCONF
+	help
+	  This selects GPIO and pinctrl driver for Renesas RZ/G2L platforms.
+
 config PINCTRL_PFC_R8A77470
 	bool "pin control support for RZ/G1C" if COMPILE_TEST
 	select PINCTRL_SH_PFC
diff --git a/drivers/pinctrl/renesas/Makefile b/drivers/pinctrl/renesas/Makefile
index 353563228dc2..7d9238a9ef57 100644
--- a/drivers/pinctrl/renesas/Makefile
+++ b/drivers/pinctrl/renesas/Makefile
@@ -46,6 +46,7 @@ obj-$(CONFIG_PINCTRL_PFC_SHX3)		+= pfc-shx3.o
 
 obj-$(CONFIG_PINCTRL_RZA1)	+= pinctrl-rza1.o
 obj-$(CONFIG_PINCTRL_RZA2)	+= pinctrl-rza2.o
+obj-$(CONFIG_PINCTRL_RZG2L)	+= pinctrl-rzg2l.o
 obj-$(CONFIG_PINCTRL_RZN1)	+= pinctrl-rzn1.o
 
 ifeq ($(CONFIG_COMPILE_TEST),y)
diff --git a/drivers/pinctrl/renesas/core.c b/drivers/pinctrl/renesas/core.c
index 5ccc49b387f1..f2ab02225837 100644
--- a/drivers/pinctrl/renesas/core.c
+++ b/drivers/pinctrl/renesas/core.c
@@ -571,17 +571,21 @@ static const struct of_device_id sh_pfc_of_table[] = {
 		.data = &r8a7794_pinmux_info,
 	},
 #endif
-/* Both r8a7795 entries must be present to make sanity checks work */
-#ifdef CONFIG_PINCTRL_PFC_R8A77950
+/*
+ * Both r8a7795 entries must be present to make sanity checks work, but only
+ * the first entry is actually used.
+ * R-Car H3 ES1.x is matched using soc_device_match() instead.
+ */
+#ifdef CONFIG_PINCTRL_PFC_R8A77951
 	{
 		.compatible = "renesas,pfc-r8a7795",
-		.data = &r8a77950_pinmux_info,
+		.data = &r8a77951_pinmux_info,
 	},
 #endif
-#ifdef CONFIG_PINCTRL_PFC_R8A77951
+#ifdef CONFIG_PINCTRL_PFC_R8A77950
 	{
 		.compatible = "renesas,pfc-r8a7795",
-		.data = &r8a77951_pinmux_info,
+		.data = &r8a77950_pinmux_info,
 	},
 #endif
 #ifdef CONFIG_PINCTRL_PFC_R8A77960
@@ -1085,26 +1089,20 @@ static inline void sh_pfc_check_driver(struct platform_driver *pdrv) {}
 #ifdef CONFIG_OF
 static const void *sh_pfc_quirk_match(void)
 {
-#if defined(CONFIG_PINCTRL_PFC_R8A77950) || \
-    defined(CONFIG_PINCTRL_PFC_R8A77951)
+#ifdef CONFIG_PINCTRL_PFC_R8A77950
 	const struct soc_device_attribute *match;
 	static const struct soc_device_attribute quirks[] = {
 		{
 			.soc_id = "r8a7795", .revision = "ES1.*",
 			.data = &r8a77950_pinmux_info,
 		},
-		{
-			.soc_id = "r8a7795",
-			.data = &r8a77951_pinmux_info,
-		},
-
 		{ /* sentinel */ }
 	};
 
 	match = soc_device_match(quirks);
 	if (match)
-		return match->data ?: ERR_PTR(-ENODEV);
-#endif /* CONFIG_PINCTRL_PFC_R8A77950 || CONFIG_PINCTRL_PFC_R8A77951 */
+		return match->data;
+#endif /* CONFIG_PINCTRL_PFC_R8A77950 */
 
 	return NULL;
 }
@@ -1119,9 +1117,6 @@ static int sh_pfc_probe(struct platform_device *pdev)
 #ifdef CONFIG_OF
 	if (pdev->dev.of_node) {
 		info = sh_pfc_quirk_match();
-		if (IS_ERR(info))
-			return PTR_ERR(info);
-
 		if (!info)
 			info = of_device_get_match_data(&pdev->dev);
 	} else
diff --git a/drivers/pinctrl/renesas/pfc-r8a77995.c b/drivers/pinctrl/renesas/pfc-r8a77995.c
index b479f87a3b23..c56e1e4c13b3 100644
--- a/drivers/pinctrl/renesas/pfc-r8a77995.c
+++ b/drivers/pinctrl/renesas/pfc-r8a77995.c
@@ -14,16 +14,27 @@
 #include <linux/errno.h>
 #include <linux/kernel.h>
 
+#include "core.h"
 #include "sh_pfc.h"
 
-#define CPU_ALL_GP(fn, sfx)			\
-		PORT_GP_9(0,  fn, sfx),		\
-		PORT_GP_32(1, fn, sfx),		\
-		PORT_GP_32(2, fn, sfx),		\
-		PORT_GP_CFG_10(3,  fn, sfx, SH_PFC_PIN_CFG_IO_VOLTAGE),	\
-		PORT_GP_32(4, fn, sfx),		\
-		PORT_GP_21(5, fn, sfx),		\
-		PORT_GP_14(6, fn, sfx)
+#define CPU_ALL_GP(fn, sfx)						\
+	PORT_GP_CFG_9(0,  fn, sfx, SH_PFC_PIN_CFG_PULL_UP_DOWN),	\
+	PORT_GP_CFG_32(1, fn, sfx, SH_PFC_PIN_CFG_PULL_UP_DOWN),	\
+	PORT_GP_CFG_32(2, fn, sfx, SH_PFC_PIN_CFG_PULL_UP_DOWN),	\
+	PORT_GP_CFG_10(3, fn, sfx, SH_PFC_PIN_CFG_IO_VOLTAGE | SH_PFC_PIN_CFG_PULL_UP_DOWN),	\
+	PORT_GP_CFG_32(4, fn, sfx, SH_PFC_PIN_CFG_PULL_UP_DOWN),	\
+	PORT_GP_CFG_21(5, fn, sfx, SH_PFC_PIN_CFG_PULL_UP_DOWN),	\
+	PORT_GP_CFG_14(6, fn, sfx, SH_PFC_PIN_CFG_PULL_UP_DOWN)
+
+#define CPU_ALL_NOGP(fn)						\
+	PIN_NOGP_CFG(DU_DOTCLKIN0, "DU_DOTCLKIN0", fn, SH_PFC_PIN_CFG_PULL_DOWN),	\
+	PIN_NOGP_CFG(FSCLKST_N, "FSCLKST#", fn, SH_PFC_PIN_CFG_PULL_UP_DOWN),	\
+	PIN_NOGP_CFG(MLB_REF, "MLB_REF", fn, SH_PFC_PIN_CFG_PULL_UP_DOWN),	\
+	PIN_NOGP_CFG(PRESETOUT_N, "PRESETOUT#", fn, SH_PFC_PIN_CFG_PULL_UP_DOWN),	\
+	PIN_NOGP_CFG(TCK, "TCK", fn, SH_PFC_PIN_CFG_PULL_UP),		\
+	PIN_NOGP_CFG(TDI, "TDI", fn, SH_PFC_PIN_CFG_PULL_UP),		\
+	PIN_NOGP_CFG(TMS, "TMS", fn, SH_PFC_PIN_CFG_PULL_UP),		\
+	PIN_NOGP_CFG(TRST_N, "TRST#", fn, SH_PFC_PIN_CFG_PULL_UP)
 
 /*
  * F_() : just information
@@ -930,8 +941,17 @@ static const u16 pinmux_data[] = {
 	PINMUX_IPSR_GPSR(IP13_7_4,	TPU0TO3_A),
 };
 
+/*
+ * Pins not associated with a GPIO port.
+ */
+enum {
+	GP_ASSIGN_LAST(),
+	NOGP_ALL(),
+};
+
 static const struct sh_pfc_pin pinmux_pins[] = {
 	PINMUX_GPIO_GP_ALL(),
+	PINMUX_NOGP_ALL(),
 };
 
 /* - AUDIO CLOCK ------------------------------------------------------------- */
@@ -2834,6 +2854,214 @@ static int r8a77995_pin_to_pocctrl(struct sh_pfc *pfc, unsigned int pin, u32 *po
 	return bit;
 }
 
+static const struct pinmux_bias_reg pinmux_bias_regs[] = {
+	{ PINMUX_BIAS_REG("PUEN0", 0xe6060400, "PUD0", 0xe6060440) {
+		[ 0] = RCAR_GP_PIN(1, 9),	/* DU_DG1 */
+		[ 1] = RCAR_GP_PIN(1, 8),	/* DU_DG0 */
+		[ 2] = RCAR_GP_PIN(1, 7),	/* DU_DB7 */
+		[ 3] = RCAR_GP_PIN(1, 6),	/* DU_DB6 */
+		[ 4] = RCAR_GP_PIN(1, 5),	/* DU_DB5 */
+		[ 5] = RCAR_GP_PIN(1, 4),	/* DU_DB4 */
+		[ 6] = RCAR_GP_PIN(1, 3),	/* DU_DB3 */
+		[ 7] = RCAR_GP_PIN(1, 2),	/* DU_DB2 */
+		[ 8] = RCAR_GP_PIN(1, 1),	/* DU_DB1 */
+		[ 9] = RCAR_GP_PIN(1, 0),	/* DU_DB0 */
+		[10] = PIN_MLB_REF,		/* MLB_REF */
+		[11] = RCAR_GP_PIN(0, 8),	/* MLB_SIG */
+		[12] = RCAR_GP_PIN(0, 7),	/* MLB_DAT */
+		[13] = RCAR_GP_PIN(0, 6),	/* MLB_CLK */
+		[14] = RCAR_GP_PIN(0, 5),	/* MSIOF2_RXD */
+		[15] = RCAR_GP_PIN(0, 4),	/* MSIOF2_TXD */
+		[16] = RCAR_GP_PIN(0, 3),	/* MSIOF2_SCK */
+		[17] = RCAR_GP_PIN(0, 2),	/* IRQ0_A */
+		[18] = RCAR_GP_PIN(0, 1),	/* USB0_OVC */
+		[19] = RCAR_GP_PIN(0, 0),	/* USB0_PWEN */
+		[20] = PIN_PRESETOUT_N,		/* PRESETOUT# */
+		[21] = PIN_DU_DOTCLKIN0,	/* DU_DOTCLKIN0 */
+		[22] = PIN_FSCLKST_N,		/* FSCLKST# */
+		[23] = SH_PFC_PIN_NONE,
+		[24] = SH_PFC_PIN_NONE,
+		[25] = SH_PFC_PIN_NONE,
+		[26] = SH_PFC_PIN_NONE,
+		[27] = SH_PFC_PIN_NONE,
+		[28] = PIN_TDI,			/* TDI */
+		[29] = PIN_TMS,			/* TMS */
+		[30] = PIN_TCK,			/* TCK */
+		[31] = PIN_TRST_N,		/* TRST# */
+	} },
+	{ PINMUX_BIAS_REG("PUEN1", 0xe6060404, "PUD1", 0xe6060444) {
+		[ 0] = RCAR_GP_PIN(2, 9),	/* VI4_DATA8 */
+		[ 1] = RCAR_GP_PIN(2, 8),	/* VI4_DATA7 */
+		[ 2] = RCAR_GP_PIN(2, 7),	/* VI4_DATA6 */
+		[ 3] = RCAR_GP_PIN(2, 6),	/* VI4_DATA5 */
+		[ 4] = RCAR_GP_PIN(2, 5),	/* VI4_DATA4 */
+		[ 5] = RCAR_GP_PIN(2, 4),	/* VI4_DATA3 */
+		[ 6] = RCAR_GP_PIN(2, 3),	/* VI4_DATA2 */
+		[ 7] = RCAR_GP_PIN(2, 2),	/* VI4_DATA1 */
+		[ 8] = RCAR_GP_PIN(2, 1),	/* VI4_DATA0 */
+		[ 9] = RCAR_GP_PIN(2, 0),	/* VI4_CLK */
+		[10] = RCAR_GP_PIN(1, 31),	/* QPOLB */
+		[11] = RCAR_GP_PIN(1, 30),	/* QPOLA */
+		[12] = RCAR_GP_PIN(1, 29),	/* DU_CDE */
+		[13] = RCAR_GP_PIN(1, 28),	/* DU_DISP/CDE */
+		[14] = RCAR_GP_PIN(1, 27),	/* DU_DISP */
+		[15] = RCAR_GP_PIN(1, 26),	/* DU_VSYNC */
+		[16] = RCAR_GP_PIN(1, 25),	/* DU_HSYNC */
+		[17] = RCAR_GP_PIN(1, 24),	/* DU_DOTCLKOUT0 */
+		[18] = RCAR_GP_PIN(1, 23),	/* DU_DR7 */
+		[19] = RCAR_GP_PIN(1, 22),	/* DU_DR6 */
+		[20] = RCAR_GP_PIN(1, 21),	/* DU_DR5 */
+		[21] = RCAR_GP_PIN(1, 20),	/* DU_DR4 */
+		[22] = RCAR_GP_PIN(1, 19),	/* DU_DR3 */
+		[23] = RCAR_GP_PIN(1, 18),	/* DU_DR2 */
+		[24] = RCAR_GP_PIN(1, 17),	/* DU_DR1 */
+		[25] = RCAR_GP_PIN(1, 16),	/* DU_DR0 */
+		[26] = RCAR_GP_PIN(1, 15),	/* DU_DG7 */
+		[27] = RCAR_GP_PIN(1, 14),	/* DU_DG6 */
+		[28] = RCAR_GP_PIN(1, 13),	/* DU_DG5 */
+		[29] = RCAR_GP_PIN(1, 12),	/* DU_DG4 */
+		[30] = RCAR_GP_PIN(1, 11),	/* DU_DG3 */
+		[31] = RCAR_GP_PIN(1, 10),	/* DU_DG2 */
+	} },
+	{ PINMUX_BIAS_REG("PUEN2", 0xe6060408, "PUD2", 0xe6060448) {
+		[ 0] = RCAR_GP_PIN(3, 8),	/* NFDATA6 */
+		[ 1] = RCAR_GP_PIN(3, 7),	/* NFDATA5 */
+		[ 2] = RCAR_GP_PIN(3, 6),	/* NFDATA4 */
+		[ 3] = RCAR_GP_PIN(3, 5),	/* NFDATA3 */
+		[ 4] = RCAR_GP_PIN(3, 4),	/* NFDATA2 */
+		[ 5] = RCAR_GP_PIN(3, 3),	/* NFDATA1 */
+		[ 6] = RCAR_GP_PIN(3, 2),	/* NFDATA0 */
+		[ 7] = RCAR_GP_PIN(3, 1),	/* NFWE# (PUEN) / NFRE# (PUD) */
+		[ 8] = RCAR_GP_PIN(3, 0),	/* NFRE# (PUEN) / NFWE# (PUD) */
+		[ 9] = RCAR_GP_PIN(4, 0),	/* NFRB# */
+		[10] = RCAR_GP_PIN(2, 31),	/* NFCE# */
+		[11] = RCAR_GP_PIN(2, 30),	/* NFCLE */
+		[12] = RCAR_GP_PIN(2, 29),	/* NFALE */
+		[13] = RCAR_GP_PIN(2, 28),	/* VI4_CLKENB */
+		[14] = RCAR_GP_PIN(2, 27),	/* VI4_FIELD */
+		[15] = RCAR_GP_PIN(2, 26),	/* VI4_HSYNC# */
+		[16] = RCAR_GP_PIN(2, 25),	/* VI4_VSYNC# */
+		[17] = RCAR_GP_PIN(2, 24),	/* VI4_DATA23 */
+		[18] = RCAR_GP_PIN(2, 23),	/* VI4_DATA22 */
+		[19] = RCAR_GP_PIN(2, 22),	/* VI4_DATA21 */
+		[20] = RCAR_GP_PIN(2, 21),	/* VI4_DATA20 */
+		[21] = RCAR_GP_PIN(2, 20),	/* VI4_DATA19 */
+		[22] = RCAR_GP_PIN(2, 19),	/* VI4_DATA18 */
+		[23] = RCAR_GP_PIN(2, 18),	/* VI4_DATA17 */
+		[24] = RCAR_GP_PIN(2, 17),	/* VI4_DATA16 */
+		[25] = RCAR_GP_PIN(2, 16),	/* VI4_DATA15 */
+		[26] = RCAR_GP_PIN(2, 15),	/* VI4_DATA14 */
+		[27] = RCAR_GP_PIN(2, 14),	/* VI4_DATA13 */
+		[28] = RCAR_GP_PIN(2, 13),	/* VI4_DATA12 */
+		[29] = RCAR_GP_PIN(2, 12),	/* VI4_DATA11 */
+		[30] = RCAR_GP_PIN(2, 11),	/* VI4_DATA10 */
+		[31] = RCAR_GP_PIN(2, 10),	/* VI4_DATA9 */
+	} },
+	{ PINMUX_BIAS_REG("PUEN3", 0xe606040c, "PUD3", 0xe606044c) {
+		[ 0] = RCAR_GP_PIN(4, 31),	/* CAN0_RX_A */
+		[ 1] = RCAR_GP_PIN(5, 2),	/* CAN_CLK */
+		[ 2] = RCAR_GP_PIN(5, 1),	/* TPU0TO1_A */
+		[ 3] = RCAR_GP_PIN(5, 0),	/* TPU0TO0_A */
+		[ 4] = RCAR_GP_PIN(4, 27),	/* TX2 */
+		[ 5] = RCAR_GP_PIN(4, 26),	/* RX2 */
+		[ 6] = RCAR_GP_PIN(4, 25),	/* SCK2 */
+		[ 7] = RCAR_GP_PIN(4, 24),	/* TX1_A */
+		[ 8] = RCAR_GP_PIN(4, 23),	/* RX1_A */
+		[ 9] = RCAR_GP_PIN(4, 22),	/* SCK1_A */
+		[10] = RCAR_GP_PIN(4, 21),	/* TX0_A */
+		[11] = RCAR_GP_PIN(4, 20),	/* RX0_A */
+		[12] = RCAR_GP_PIN(4, 19),	/* SCK0_A */
+		[13] = RCAR_GP_PIN(4, 18),	/* MSIOF1_RXD */
+		[14] = RCAR_GP_PIN(4, 17),	/* MSIOF1_TXD */
+		[15] = RCAR_GP_PIN(4, 16),	/* MSIOF1_SCK */
+		[16] = RCAR_GP_PIN(4, 15),	/* MSIOF0_RXD */
+		[17] = RCAR_GP_PIN(4, 14),	/* MSIOF0_TXD */
+		[18] = RCAR_GP_PIN(4, 13),	/* MSIOF0_SYNC */
+		[19] = RCAR_GP_PIN(4, 12),	/* MSIOF0_SCK */
+		[20] = RCAR_GP_PIN(4, 11),	/* SDA1 */
+		[21] = RCAR_GP_PIN(4, 10),	/* SCL1 */
+		[22] = RCAR_GP_PIN(4, 9),	/* SDA0 */
+		[23] = RCAR_GP_PIN(4, 8),	/* SCL0 */
+		[24] = RCAR_GP_PIN(4, 7),	/* SSI_WS4_A */
+		[25] = RCAR_GP_PIN(4, 6),	/* SSI_SDATA4_A */
+		[26] = RCAR_GP_PIN(4, 5),	/* SSI_SCK4_A */
+		[27] = RCAR_GP_PIN(4, 4),	/* SSI_WS34 */
+		[28] = RCAR_GP_PIN(4, 3),	/* SSI_SDATA3 */
+		[29] = RCAR_GP_PIN(4, 2),	/* SSI_SCK34 */
+		[30] = RCAR_GP_PIN(4, 1),	/* AUDIO_CLKA */
+		[31] = RCAR_GP_PIN(3, 9),	/* NFDATA7 */
+	} },
+	{ PINMUX_BIAS_REG("PUEN4", 0xe6060410, "PUD4", 0xe6060450) {
+		[ 0] = RCAR_GP_PIN(6, 10),	/* QSPI1_IO3 */
+		[ 1] = RCAR_GP_PIN(6, 9),	/* QSPI1_IO2 */
+		[ 2] = RCAR_GP_PIN(6, 8),	/* QSPI1_MISO_IO1 */
+		[ 3] = RCAR_GP_PIN(6, 7),	/* QSPI1_MOSI_IO0 */
+		[ 4] = RCAR_GP_PIN(6, 6),	/* QSPI1_SPCLK */
+		[ 5] = RCAR_GP_PIN(6, 5),	/* QSPI0_SSL */
+		[ 6] = RCAR_GP_PIN(6, 4),	/* QSPI0_IO3 */
+		[ 7] = RCAR_GP_PIN(6, 3),	/* QSPI0_IO2 */
+		[ 8] = RCAR_GP_PIN(6, 2),	/* QSPI0_MISO_IO1 */
+		[ 9] = RCAR_GP_PIN(6, 1),	/* QSPI0_MOSI_IO0 */
+		[10] = RCAR_GP_PIN(6, 0),	/* QSPI0_SPCLK */
+		[11] = RCAR_GP_PIN(5, 20),	/* AVB0_LINK */
+		[12] = RCAR_GP_PIN(5, 19),	/* AVB0_PHY_INT */
+		[13] = RCAR_GP_PIN(5, 18),	/* AVB0_MAGIC */
+		[14] = RCAR_GP_PIN(5, 17),	/* AVB0_MDC */
+		[15] = RCAR_GP_PIN(5, 16),	/* AVB0_MDIO */
+		[16] = RCAR_GP_PIN(5, 15),	/* AVB0_TXCREFCLK */
+		[17] = RCAR_GP_PIN(5, 14),	/* AVB0_TD3 */
+		[18] = RCAR_GP_PIN(5, 13),	/* AVB0_TD2 */
+		[19] = RCAR_GP_PIN(5, 12),	/* AVB0_TD1 */
+		[20] = RCAR_GP_PIN(5, 11),	/* AVB0_TD0 */
+		[21] = RCAR_GP_PIN(5, 10),	/* AVB0_TXC */
+		[22] = RCAR_GP_PIN(5, 9),	/* AVB0_TX_CTL */
+		[23] = RCAR_GP_PIN(5, 8),	/* AVB0_RD3 */
+		[24] = RCAR_GP_PIN(5, 7),	/* AVB0_RD2 */
+		[25] = RCAR_GP_PIN(5, 6),	/* AVB0_RD1 */
+		[26] = RCAR_GP_PIN(5, 5),	/* AVB0_RD0 */
+		[27] = RCAR_GP_PIN(5, 4),	/* AVB0_RXC */
+		[28] = RCAR_GP_PIN(5, 3),	/* AVB0_RX_CTL */
+		[29] = RCAR_GP_PIN(4, 30),	/* CAN1_TX_A */
+		[30] = RCAR_GP_PIN(4, 29),	/* CAN1_RX_A */
+		[31] = RCAR_GP_PIN(4, 28),	/* CAN0_TX_A */
+	} },
+	{ PINMUX_BIAS_REG("PUEN5", 0xe6060414, "PUD4", 0xe6060454) {
+		[ 0] = SH_PFC_PIN_NONE,
+		[ 1] = SH_PFC_PIN_NONE,
+		[ 2] = SH_PFC_PIN_NONE,
+		[ 3] = SH_PFC_PIN_NONE,
+		[ 4] = SH_PFC_PIN_NONE,
+		[ 5] = SH_PFC_PIN_NONE,
+		[ 6] = SH_PFC_PIN_NONE,
+		[ 7] = SH_PFC_PIN_NONE,
+		[ 8] = SH_PFC_PIN_NONE,
+		[ 9] = SH_PFC_PIN_NONE,
+		[10] = SH_PFC_PIN_NONE,
+		[11] = SH_PFC_PIN_NONE,
+		[12] = SH_PFC_PIN_NONE,
+		[13] = SH_PFC_PIN_NONE,
+		[14] = SH_PFC_PIN_NONE,
+		[15] = SH_PFC_PIN_NONE,
+		[16] = SH_PFC_PIN_NONE,
+		[17] = SH_PFC_PIN_NONE,
+		[18] = SH_PFC_PIN_NONE,
+		[19] = SH_PFC_PIN_NONE,
+		[20] = SH_PFC_PIN_NONE,
+		[21] = SH_PFC_PIN_NONE,
+		[22] = SH_PFC_PIN_NONE,
+		[23] = SH_PFC_PIN_NONE,
+		[24] = SH_PFC_PIN_NONE,
+		[25] = SH_PFC_PIN_NONE,
+		[26] = SH_PFC_PIN_NONE,
+		[27] = SH_PFC_PIN_NONE,
+		[28] = SH_PFC_PIN_NONE,
+		[29] = RCAR_GP_PIN(6, 13),	/* RPC_INT# */
+		[30] = RCAR_GP_PIN(6, 12),	/* RPC_RESET# */
+		[31] = RCAR_GP_PIN(6, 11),	/* QSPI1_SSL */
+	} },
+	{ /* sentinel */ }
+};
+
 enum ioctrl_regs {
 	TDSELCTRL,
 };
@@ -2843,8 +3071,83 @@ static const struct pinmux_ioctrl_reg pinmux_ioctrl_regs[] = {
 	{ /* sentinel */ },
 };
 
+static const struct pinmux_bias_reg *
+r8a77995_pin_to_bias_reg(const struct sh_pfc *pfc, unsigned int pin,
+			 unsigned int *puen_bit, unsigned int *pud_bit)
+{
+	const struct pinmux_bias_reg *reg;
+	unsigned int bit;
+
+	reg = rcar_pin_to_bias_reg(pfc, pin, &bit);
+	if (!reg)
+		return reg;
+
+	*puen_bit = bit;
+
+	/* NFWE# and NFRE# use different bit positions in PUD2 */
+	switch (pin) {
+	case RCAR_GP_PIN(3, 0):	/* NFRE# */
+		*pud_bit = 7;
+		break;
+
+	case RCAR_GP_PIN(3, 1):	/* NFWE# */
+		*pud_bit = 8;
+		break;
+
+	default:
+		*pud_bit = bit;
+		break;
+	}
+
+	return reg;
+}
+
+static unsigned int r8a77995_pinmux_get_bias(struct sh_pfc *pfc,
+					     unsigned int pin)
+{
+	const struct pinmux_bias_reg *reg;
+	unsigned int puen_bit, pud_bit;
+
+	reg = r8a77995_pin_to_bias_reg(pfc, pin, &puen_bit, &pud_bit);
+	if (!reg)
+		return PIN_CONFIG_BIAS_DISABLE;
+
+	if (!(sh_pfc_read(pfc, reg->puen) & BIT(puen_bit)))
+		return PIN_CONFIG_BIAS_DISABLE;
+	else if (sh_pfc_read(pfc, reg->pud) & BIT(pud_bit))
+		return PIN_CONFIG_BIAS_PULL_UP;
+	else
+		return PIN_CONFIG_BIAS_PULL_DOWN;
+}
+
+static void r8a77995_pinmux_set_bias(struct sh_pfc *pfc, unsigned int pin,
+				     unsigned int bias)
+{
+	const struct pinmux_bias_reg *reg;
+	unsigned int puen_bit, pud_bit;
+	u32 enable, updown;
+
+	reg = r8a77995_pin_to_bias_reg(pfc, pin, &puen_bit, &pud_bit);
+	if (!reg)
+		return;
+
+	enable = sh_pfc_read(pfc, reg->puen) & ~BIT(puen_bit);
+	if (bias != PIN_CONFIG_BIAS_DISABLE) {
+		enable |= BIT(puen_bit);
+
+		updown = sh_pfc_read(pfc, reg->pud) & ~BIT(pud_bit);
+		if (bias == PIN_CONFIG_BIAS_PULL_UP)
+			updown |= BIT(pud_bit);
+
+		sh_pfc_write(pfc, reg->pud, updown);
+	}
+	sh_pfc_write(pfc, reg->puen, enable);
+}
+
 static const struct sh_pfc_soc_operations r8a77995_pinmux_ops = {
 	.pin_to_pocctrl = r8a77995_pin_to_pocctrl,
+	.get_bias = r8a77995_pinmux_get_bias,
+	.set_bias = r8a77995_pinmux_set_bias,
 };
 
 const struct sh_pfc_soc_info r8a77995_pinmux_info = {
@@ -2862,6 +3165,7 @@ const struct sh_pfc_soc_info r8a77995_pinmux_info = {
 	.nr_functions = ARRAY_SIZE(pinmux_functions),
 
 	.cfg_regs = pinmux_config_regs,
+	.bias_regs = pinmux_bias_regs,
 	.ioctrl_regs = pinmux_ioctrl_regs,
 
 	.pinmux_data = pinmux_data,
diff --git a/drivers/pinctrl/renesas/pinctrl-rzg2l.c b/drivers/pinctrl/renesas/pinctrl-rzg2l.c
new file mode 100644
index 000000000000..dbf2f521bb27
--- /dev/null
+++ b/drivers/pinctrl/renesas/pinctrl-rzg2l.c
@@ -0,0 +1,1175 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Renesas RZ/G2L Pin Control and GPIO driver core
+ *
+ * Copyright (C) 2021 Renesas Electronics Corporation.
+ */
+
+#include <linux/bitops.h>
+#include <linux/clk.h>
+#include <linux/gpio/driver.h>
+#include <linux/io.h>
+#include <linux/module.h>
+#include <linux/of_device.h>
+#include <linux/pinctrl/pinconf-generic.h>
+#include <linux/pinctrl/pinconf.h>
+#include <linux/pinctrl/pinctrl.h>
+#include <linux/pinctrl/pinmux.h>
+#include <linux/spinlock.h>
+
+#include <dt-bindings/pinctrl/rzg2l-pinctrl.h>
+
+#include "../core.h"
+#include "../pinconf.h"
+#include "../pinmux.h"
+
+#define DRV_NAME	"pinctrl-rzg2l"
+
+/*
+ * Use 16 lower bits [15:0] for pin identifier
+ * Use 16 higher bits [31:16] for pin mux function
+ */
+#define MUX_PIN_ID_MASK		GENMASK(15, 0)
+#define MUX_FUNC_MASK		GENMASK(31, 16)
+#define MUX_FUNC_OFFS		16
+#define MUX_FUNC(pinconf)	(((pinconf) & MUX_FUNC_MASK) >> MUX_FUNC_OFFS)
+
+/* PIN capabilities */
+#define PIN_CFG_IOLH			BIT(0)
+#define PIN_CFG_SR			BIT(1)
+#define PIN_CFG_IEN			BIT(2)
+#define PIN_CFG_PUPD			BIT(3)
+#define PIN_CFG_IOLH_SD0		BIT(4)
+#define PIN_CFG_IOLH_SD1		BIT(5)
+#define PIN_CFG_IOLH_QSPI		BIT(6)
+#define PIN_CFG_IOLH_ETH0		BIT(7)
+#define PIN_CFG_IOLH_ETH1		BIT(8)
+#define PIN_CFG_FILONOFF		BIT(9)
+#define PIN_CFG_FILNUM			BIT(10)
+#define PIN_CFG_FILCLKSEL		BIT(11)
+
+#define RZG2L_MPXED_PIN_FUNCS		(PIN_CFG_IOLH | \
+					 PIN_CFG_SR | \
+					 PIN_CFG_PUPD | \
+					 PIN_CFG_FILONOFF | \
+					 PIN_CFG_FILNUM | \
+					 PIN_CFG_FILCLKSEL)
+
+#define RZG2L_MPXED_ETH_PIN_FUNCS(x)	((x) | \
+					 PIN_CFG_FILONOFF | \
+					 PIN_CFG_FILNUM | \
+					 PIN_CFG_FILCLKSEL)
+
+/*
+ * n indicates number of pins in the port, a is the register index
+ * and f is pin configuration capabilities supported.
+ */
+#define RZG2L_GPIO_PORT_PACK(n, a, f)	(((n) << 28) | ((a) << 20) | (f))
+#define RZG2L_GPIO_PORT_GET_PINCNT(x)	(((x) & GENMASK(30, 28)) >> 28)
+#define RZG2L_GPIO_PORT_GET_INDEX(x)	(((x) & GENMASK(26, 20)) >> 20)
+#define RZG2L_GPIO_PORT_GET_CFGS(x)	((x) & GENMASK(19, 0))
+
+/*
+ * BIT(31) indicates dedicated pin, p is the register index while
+ * referencing to SR/IEN/IOLH/FILxx registers, b is the register bits
+ * (b * 8) and f is the pin configuration capabilities supported.
+ */
+#define RZG2L_SINGLE_PIN		BIT(31)
+#define RZG2L_SINGLE_PIN_PACK(p, b, f)	(RZG2L_SINGLE_PIN | \
+					 ((p) << 24) | ((b) << 20) | (f))
+#define RZG2L_SINGLE_PIN_GET_PORT(x)	(((x) & GENMASK(30, 24)) >> 24)
+#define RZG2L_SINGLE_PIN_GET_BIT(x)	(((x) & GENMASK(22, 20)) >> 20)
+#define RZG2L_SINGLE_PIN_GET_CFGS(x)	((x) & GENMASK(19, 0))
+
+#define P(n)			(0x0000 + 0x10 + (n))
+#define PM(n)			(0x0100 + 0x20 + (n) * 2)
+#define PMC(n)			(0x0200 + 0x10 + (n))
+#define PFC(n)			(0x0400 + 0x40 + (n) * 4)
+#define PIN(n)			(0x0800 + 0x10 + (n))
+#define IEN(n)			(0x1800 + (n) * 8)
+#define PWPR			(0x3014)
+#define SD_CH(n)		(0x3000 + (n) * 4)
+#define QSPI			(0x3008)
+
+#define PVDD_1800		1	/* I/O domain voltage <= 1.8V */
+#define PVDD_3300		0	/* I/O domain voltage >= 3.3V */
+
+#define PWPR_B0WI		BIT(7)	/* Bit Write Disable */
+#define PWPR_PFCWE		BIT(6)	/* PFC Register Write Enable */
+
+#define PM_MASK			0x03
+#define PVDD_MASK		0x01
+#define PFC_MASK		0x07
+#define IEN_MASK		0x01
+
+#define PM_INPUT		0x1
+#define PM_OUTPUT		0x2
+
+#define RZG2L_PIN_ID_TO_PORT(id)	((id) / RZG2L_PINS_PER_PORT)
+#define RZG2L_PIN_ID_TO_PIN(id)		((id) % RZG2L_PINS_PER_PORT)
+
+struct rzg2l_dedicated_configs {
+	const char *name;
+	u32 config;
+};
+
+struct rzg2l_pinctrl_data {
+	const char * const *port_pins;
+	const u32 *port_pin_configs;
+	struct rzg2l_dedicated_configs *dedicated_pins;
+	unsigned int n_port_pins;
+	unsigned int n_dedicated_pins;
+};
+
+struct rzg2l_pinctrl {
+	struct pinctrl_dev		*pctl;
+	struct pinctrl_desc		desc;
+	struct pinctrl_pin_desc		*pins;
+
+	const struct rzg2l_pinctrl_data	*data;
+	void __iomem			*base;
+	struct device			*dev;
+	struct clk			*clk;
+
+	struct gpio_chip		gpio_chip;
+	struct pinctrl_gpio_range	gpio_range;
+
+	spinlock_t			lock;
+};
+
+static void rzg2l_pinctrl_set_pfc_mode(struct rzg2l_pinctrl *pctrl,
+				       u8 port, u8 pin, u8 func)
+{
+	unsigned long flags;
+	u32 reg;
+
+	spin_lock_irqsave(&pctrl->lock, flags);
+
+	/* Set pin to 'Non-use (Hi-Z input protection)'  */
+	reg = readw(pctrl->base + PM(port));
+	reg &= ~(PM_MASK << (pin * 2));
+	writew(reg, pctrl->base + PM(port));
+
+	/* Temporarily switch to GPIO mode with PMC register */
+	reg = readb(pctrl->base + PMC(port));
+	writeb(reg & ~BIT(pin), pctrl->base + PMC(port));
+
+	/* Set the PWPR register to allow PFC register to write */
+	writel(0x0, pctrl->base + PWPR);        /* B0WI=0, PFCWE=0 */
+	writel(PWPR_PFCWE, pctrl->base + PWPR);  /* B0WI=0, PFCWE=1 */
+
+	/* Select Pin function mode with PFC register */
+	reg = readl(pctrl->base + PFC(port));
+	reg &= ~(PFC_MASK << (pin * 4));
+	writel(reg | (func << (pin * 4)), pctrl->base + PFC(port));
+
+	/* Set the PWPR register to be write-protected */
+	writel(0x0, pctrl->base + PWPR);        /* B0WI=0, PFCWE=0 */
+	writel(PWPR_B0WI, pctrl->base + PWPR);  /* B0WI=1, PFCWE=0 */
+
+	/* Switch to Peripheral pin function with PMC register */
+	reg = readb(pctrl->base + PMC(port));
+	writeb(reg | BIT(pin), pctrl->base + PMC(port));
+
+	spin_unlock_irqrestore(&pctrl->lock, flags);
+};
+
+static int rzg2l_pinctrl_set_mux(struct pinctrl_dev *pctldev,
+				 unsigned int func_selector,
+				 unsigned int group_selector)
+{
+	struct rzg2l_pinctrl *pctrl = pinctrl_dev_get_drvdata(pctldev);
+	struct function_desc *func;
+	unsigned int i, *psel_val;
+	struct group_desc *group;
+	int *pins;
+
+	func = pinmux_generic_get_function(pctldev, func_selector);
+	if (!func)
+		return -EINVAL;
+	group = pinctrl_generic_get_group(pctldev, group_selector);
+	if (!group)
+		return -EINVAL;
+
+	psel_val = func->data;
+	pins = group->pins;
+
+	for (i = 0; i < group->num_pins; i++) {
+		dev_dbg(pctrl->dev, "port:%u pin: %u PSEL:%u\n",
+			RZG2L_PIN_ID_TO_PORT(pins[i]), RZG2L_PIN_ID_TO_PIN(pins[i]),
+			psel_val[i]);
+		rzg2l_pinctrl_set_pfc_mode(pctrl, RZG2L_PIN_ID_TO_PORT(pins[i]),
+					   RZG2L_PIN_ID_TO_PIN(pins[i]), psel_val[i]);
+	}
+
+	return 0;
+};
+
+static int rzg2l_map_add_config(struct pinctrl_map *map,
+				const char *group_or_pin,
+				enum pinctrl_map_type type,
+				unsigned long *configs,
+				unsigned int num_configs)
+{
+	unsigned long *cfgs;
+
+	cfgs = kmemdup(configs, num_configs * sizeof(*cfgs),
+		       GFP_KERNEL);
+	if (!cfgs)
+		return -ENOMEM;
+
+	map->type = type;
+	map->data.configs.group_or_pin = group_or_pin;
+	map->data.configs.configs = cfgs;
+	map->data.configs.num_configs = num_configs;
+
+	return 0;
+}
+
+static int rzg2l_dt_subnode_to_map(struct pinctrl_dev *pctldev,
+				   struct device_node *np,
+				   struct pinctrl_map **map,
+				   unsigned int *num_maps,
+				   unsigned int *index)
+{
+	struct rzg2l_pinctrl *pctrl = pinctrl_dev_get_drvdata(pctldev);
+	struct pinctrl_map *maps = *map;
+	unsigned int nmaps = *num_maps;
+	unsigned long *configs = NULL;
+	unsigned int *pins, *psel_val;
+	unsigned int num_pinmux = 0;
+	unsigned int idx = *index;
+	unsigned int num_pins, i;
+	unsigned int num_configs;
+	struct property *pinmux;
+	struct property *prop;
+	int ret, gsel, fsel;
+	const char **pin_fn;
+	const char *pin;
+
+	pinmux = of_find_property(np, "pinmux", NULL);
+	if (pinmux)
+		num_pinmux = pinmux->length / sizeof(u32);
+
+	ret = of_property_count_strings(np, "pins");
+	if (ret == -EINVAL) {
+		num_pins = 0;
+	} else if (ret < 0) {
+		dev_err(pctrl->dev, "Invalid pins list in DT\n");
+		return ret;
+	} else {
+		num_pins = ret;
+	}
+
+	if (!num_pinmux && !num_pins)
+		return 0;
+
+	if (num_pinmux && num_pins) {
+		dev_err(pctrl->dev,
+			"DT node must contain either a pinmux or pins and not both\n");
+		return -EINVAL;
+	}
+
+	ret = pinconf_generic_parse_dt_config(np, NULL, &configs, &num_configs);
+	if (ret < 0)
+		return ret;
+
+	if (num_pins && !num_configs) {
+		dev_err(pctrl->dev, "DT node must contain a config\n");
+		ret = -ENODEV;
+		goto done;
+	}
+
+	if (num_pinmux)
+		nmaps += 1;
+
+	if (num_pins)
+		nmaps += num_pins;
+
+	maps = krealloc_array(maps, nmaps, sizeof(*maps), GFP_KERNEL);
+	if (!maps) {
+		ret = -ENOMEM;
+		goto done;
+	}
+
+	*map = maps;
+	*num_maps = nmaps;
+	if (num_pins) {
+		of_property_for_each_string(np, "pins", prop, pin) {
+			ret = rzg2l_map_add_config(&maps[idx], pin,
+						   PIN_MAP_TYPE_CONFIGS_PIN,
+						   configs, num_configs);
+			if (ret < 0)
+				goto done;
+
+			idx++;
+		}
+		ret = 0;
+		goto done;
+	}
+
+	pins = devm_kcalloc(pctrl->dev, num_pinmux, sizeof(*pins), GFP_KERNEL);
+	psel_val = devm_kcalloc(pctrl->dev, num_pinmux, sizeof(*psel_val),
+				GFP_KERNEL);
+	pin_fn = devm_kzalloc(pctrl->dev, sizeof(*pin_fn), GFP_KERNEL);
+	if (!pins || !psel_val || !pin_fn) {
+		ret = -ENOMEM;
+		goto done;
+	}
+
+	/* Collect pin locations and mux settings from DT properties */
+	for (i = 0; i < num_pinmux; ++i) {
+		u32 value;
+
+		ret = of_property_read_u32_index(np, "pinmux", i, &value);
+		if (ret)
+			goto done;
+		pins[i] = value & MUX_PIN_ID_MASK;
+		psel_val[i] = MUX_FUNC(value);
+	}
+
+	/* Register a single pin group listing all the pins we read from DT */
+	gsel = pinctrl_generic_add_group(pctldev, np->name, pins, num_pinmux, NULL);
+	if (gsel < 0) {
+		ret = gsel;
+		goto done;
+	}
+
+	/*
+	 * Register a single group function where the 'data' is an array PSEL
+	 * register values read from DT.
+	 */
+	pin_fn[0] = np->name;
+	fsel = pinmux_generic_add_function(pctldev, np->name, pin_fn, 1,
+					   psel_val);
+	if (fsel < 0) {
+		ret = fsel;
+		goto remove_group;
+	}
+
+	maps[idx].type = PIN_MAP_TYPE_MUX_GROUP;
+	maps[idx].data.mux.group = np->name;
+	maps[idx].data.mux.function = np->name;
+	idx++;
+
+	dev_dbg(pctrl->dev, "Parsed %pOF with %d pins\n", np, num_pinmux);
+	ret = 0;
+	goto done;
+
+remove_group:
+	pinctrl_generic_remove_group(pctldev, gsel);
+done:
+	*index = idx;
+	kfree(configs);
+	return ret;
+}
+
+static void rzg2l_dt_free_map(struct pinctrl_dev *pctldev,
+			      struct pinctrl_map *map,
+			      unsigned int num_maps)
+{
+	unsigned int i;
+
+	if (!map)
+		return;
+
+	for (i = 0; i < num_maps; ++i) {
+		if (map[i].type == PIN_MAP_TYPE_CONFIGS_GROUP ||
+		    map[i].type == PIN_MAP_TYPE_CONFIGS_PIN)
+			kfree(map[i].data.configs.configs);
+	}
+	kfree(map);
+}
+
+static int rzg2l_dt_node_to_map(struct pinctrl_dev *pctldev,
+				struct device_node *np,
+				struct pinctrl_map **map,
+				unsigned int *num_maps)
+{
+	struct rzg2l_pinctrl *pctrl = pinctrl_dev_get_drvdata(pctldev);
+	struct device_node *child;
+	unsigned int index;
+	int ret;
+
+	*map = NULL;
+	*num_maps = 0;
+	index = 0;
+
+	for_each_child_of_node(np, child) {
+		ret = rzg2l_dt_subnode_to_map(pctldev, child, map,
+					      num_maps, &index);
+		if (ret < 0) {
+			of_node_put(child);
+			goto done;
+		}
+	}
+
+	if (*num_maps == 0) {
+		ret = rzg2l_dt_subnode_to_map(pctldev, np, map,
+					      num_maps, &index);
+		if (ret < 0)
+			goto done;
+	}
+
+	if (*num_maps)
+		return 0;
+
+	dev_err(pctrl->dev, "no mapping found in node %pOF\n", np);
+	ret = -EINVAL;
+
+done:
+	if (ret < 0)
+		rzg2l_dt_free_map(pctldev, *map, *num_maps);
+
+	return ret;
+}
+
+static int rzg2l_pinctrl_pinconf_get(struct pinctrl_dev *pctldev,
+				     unsigned int _pin,
+				     unsigned long *config)
+{
+	struct rzg2l_pinctrl *pctrl = pinctrl_dev_get_drvdata(pctldev);
+	enum pin_config_param param = pinconf_to_config_param(*config);
+	const struct pinctrl_pin_desc *pin = &pctrl->desc.pins[_pin];
+	unsigned int *pin_data = pin->drv_data;
+	unsigned int arg = 0;
+	unsigned long flags;
+	void __iomem *addr;
+	u32 port = 0, reg;
+	u32 cfg = 0;
+	u8 bit = 0;
+
+	if (!pin_data)
+		return -EINVAL;
+
+	if (*pin_data & RZG2L_SINGLE_PIN) {
+		port = RZG2L_SINGLE_PIN_GET_PORT(*pin_data);
+		cfg = RZG2L_SINGLE_PIN_GET_CFGS(*pin_data);
+		bit = RZG2L_SINGLE_PIN_GET_BIT(*pin_data);
+	}
+
+	switch (param) {
+	case PIN_CONFIG_INPUT_ENABLE:
+		if (!(cfg & PIN_CFG_IEN))
+			return -EINVAL;
+		spin_lock_irqsave(&pctrl->lock, flags);
+		/* handle _L/_H for 32-bit register read/write */
+		addr = pctrl->base + IEN(port);
+		if (bit >= 4) {
+			bit -= 4;
+			addr += 4;
+		}
+
+		reg = readl(addr) & (IEN_MASK << (bit * 8));
+		arg = (reg >> (bit * 8)) & 0x1;
+		spin_unlock_irqrestore(&pctrl->lock, flags);
+		break;
+
+	case PIN_CONFIG_POWER_SOURCE: {
+		u32 pwr_reg = 0x0;
+
+		if (cfg & PIN_CFG_IOLH_SD0)
+			pwr_reg = SD_CH(0);
+		else if (cfg & PIN_CFG_IOLH_SD1)
+			pwr_reg = SD_CH(1);
+		else if (cfg & PIN_CFG_IOLH_QSPI)
+			pwr_reg = QSPI;
+		else
+			return -EINVAL;
+
+		spin_lock_irqsave(&pctrl->lock, flags);
+		addr = pctrl->base + pwr_reg;
+		arg = (readl(addr) & PVDD_MASK) ? 1800 : 3300;
+		spin_unlock_irqrestore(&pctrl->lock, flags);
+		break;
+	}
+
+	default:
+		return -ENOTSUPP;
+	}
+
+	*config = pinconf_to_config_packed(param, arg);
+
+	return 0;
+};
+
+static int rzg2l_pinctrl_pinconf_set(struct pinctrl_dev *pctldev,
+				     unsigned int _pin,
+				     unsigned long *_configs,
+				     unsigned int num_configs)
+{
+	struct rzg2l_pinctrl *pctrl = pinctrl_dev_get_drvdata(pctldev);
+	const struct pinctrl_pin_desc *pin = &pctrl->desc.pins[_pin];
+	unsigned int *pin_data = pin->drv_data;
+	enum pin_config_param param;
+	unsigned long flags;
+	void __iomem *addr;
+	u32 port = 0, reg;
+	unsigned int i;
+	u32 cfg = 0;
+	u8 bit = 0;
+
+	if (!pin_data)
+		return -EINVAL;
+
+	if (*pin_data & RZG2L_SINGLE_PIN) {
+		port = RZG2L_SINGLE_PIN_GET_PORT(*pin_data);
+		cfg = RZG2L_SINGLE_PIN_GET_CFGS(*pin_data);
+		bit = RZG2L_SINGLE_PIN_GET_BIT(*pin_data);
+	}
+
+	for (i = 0; i < num_configs; i++) {
+		param = pinconf_to_config_param(_configs[i]);
+		switch (param) {
+		case PIN_CONFIG_INPUT_ENABLE: {
+			unsigned int arg =
+					pinconf_to_config_argument(_configs[i]);
+
+			if (!(cfg & PIN_CFG_IEN))
+				return -EINVAL;
+
+			/* handle _L/_H for 32-bit register read/write */
+			addr = pctrl->base + IEN(port);
+			if (bit >= 4) {
+				bit -= 4;
+				addr += 4;
+			}
+
+			spin_lock_irqsave(&pctrl->lock, flags);
+			reg = readl(addr) & ~(IEN_MASK << (bit * 8));
+			writel(reg | (arg << (bit * 8)), addr);
+			spin_unlock_irqrestore(&pctrl->lock, flags);
+			break;
+		}
+
+		case PIN_CONFIG_POWER_SOURCE: {
+			unsigned int mV = pinconf_to_config_argument(_configs[i]);
+			u32 pwr_reg = 0x0;
+
+			if (mV != 1800 && mV != 3300)
+				return -EINVAL;
+
+			if (cfg & PIN_CFG_IOLH_SD0)
+				pwr_reg = SD_CH(0);
+			else if (cfg & PIN_CFG_IOLH_SD1)
+				pwr_reg = SD_CH(1);
+			else if (cfg & PIN_CFG_IOLH_QSPI)
+				pwr_reg = QSPI;
+			else
+				return -EINVAL;
+
+			addr = pctrl->base + pwr_reg;
+			spin_lock_irqsave(&pctrl->lock, flags);
+			writel((mV == 1800) ? PVDD_1800 : PVDD_3300, addr);
+			spin_unlock_irqrestore(&pctrl->lock, flags);
+			break;
+		}
+		default:
+			return -EOPNOTSUPP;
+		}
+	}
+
+	return 0;
+}
+
+static int rzg2l_pinctrl_pinconf_group_set(struct pinctrl_dev *pctldev,
+					   unsigned int group,
+					   unsigned long *configs,
+					   unsigned int num_configs)
+{
+	const unsigned int *pins;
+	unsigned int i, npins;
+	int ret;
+
+	ret = pinctrl_generic_get_group_pins(pctldev, group, &pins, &npins);
+	if (ret)
+		return ret;
+
+	for (i = 0; i < npins; i++) {
+		ret = rzg2l_pinctrl_pinconf_set(pctldev, pins[i], configs,
+						num_configs);
+		if (ret)
+			return ret;
+	}
+
+	return 0;
+};
+
+static int rzg2l_pinctrl_pinconf_group_get(struct pinctrl_dev *pctldev,
+					   unsigned int group,
+					   unsigned long *config)
+{
+	const unsigned int *pins;
+	unsigned int i, npins, prev_config = 0;
+	int ret;
+
+	ret = pinctrl_generic_get_group_pins(pctldev, group, &pins, &npins);
+	if (ret)
+		return ret;
+
+	for (i = 0; i < npins; i++) {
+		ret = rzg2l_pinctrl_pinconf_get(pctldev, pins[i], config);
+		if (ret)
+			return ret;
+
+		/* Check config matching between to pin  */
+		if (i && prev_config != *config)
+			return -EOPNOTSUPP;
+
+		prev_config = *config;
+	}
+
+	return 0;
+};
+
+static const struct pinctrl_ops rzg2l_pinctrl_pctlops = {
+	.get_groups_count = pinctrl_generic_get_group_count,
+	.get_group_name = pinctrl_generic_get_group_name,
+	.get_group_pins = pinctrl_generic_get_group_pins,
+	.dt_node_to_map = rzg2l_dt_node_to_map,
+	.dt_free_map = rzg2l_dt_free_map,
+};
+
+static const struct pinmux_ops rzg2l_pinctrl_pmxops = {
+	.get_functions_count = pinmux_generic_get_function_count,
+	.get_function_name = pinmux_generic_get_function_name,
+	.get_function_groups = pinmux_generic_get_function_groups,
+	.set_mux = rzg2l_pinctrl_set_mux,
+	.strict = true,
+};
+
+static const struct pinconf_ops rzg2l_pinctrl_confops = {
+	.is_generic = true,
+	.pin_config_get = rzg2l_pinctrl_pinconf_get,
+	.pin_config_set = rzg2l_pinctrl_pinconf_set,
+	.pin_config_group_set = rzg2l_pinctrl_pinconf_group_set,
+	.pin_config_group_get = rzg2l_pinctrl_pinconf_group_get,
+	.pin_config_config_dbg_show = pinconf_generic_dump_config,
+};
+
+static int rzg2l_gpio_request(struct gpio_chip *chip, unsigned int offset)
+{
+	struct rzg2l_pinctrl *pctrl = gpiochip_get_data(chip);
+	u32 port = RZG2L_PIN_ID_TO_PORT(offset);
+	u8 bit = RZG2L_PIN_ID_TO_PIN(offset);
+	unsigned long flags;
+	u8 reg8;
+	int ret;
+
+	ret = pinctrl_gpio_request(chip->base + offset);
+	if (ret)
+		return ret;
+
+	spin_lock_irqsave(&pctrl->lock, flags);
+
+	/* Select GPIO mode in PMC Register */
+	reg8 = readb(pctrl->base + PMC(port));
+	reg8 &= ~BIT(bit);
+	writeb(reg8, pctrl->base + PMC(port));
+
+	spin_unlock_irqrestore(&pctrl->lock, flags);
+
+	return 0;
+}
+
+static void rzg2l_gpio_set_direction(struct rzg2l_pinctrl *pctrl, u32 port,
+				     u8 bit, bool output)
+{
+	unsigned long flags;
+	u16 reg16;
+
+	spin_lock_irqsave(&pctrl->lock, flags);
+
+	reg16 = readw(pctrl->base + PM(port));
+	reg16 &= ~(PM_MASK << (bit * 2));
+
+	reg16 |= (output ? PM_OUTPUT : PM_INPUT) << (bit * 2);
+	writew(reg16, pctrl->base + PM(port));
+
+	spin_unlock_irqrestore(&pctrl->lock, flags);
+}
+
+static int rzg2l_gpio_get_direction(struct gpio_chip *chip, unsigned int offset)
+{
+	struct rzg2l_pinctrl *pctrl = gpiochip_get_data(chip);
+	u32 port = RZG2L_PIN_ID_TO_PORT(offset);
+	u8 bit = RZG2L_PIN_ID_TO_PIN(offset);
+
+	if (!(readb(pctrl->base + PMC(port)) & BIT(bit))) {
+		u16 reg16;
+
+		reg16 = readw(pctrl->base + PM(port));
+		reg16 = (reg16 >> (bit * 2)) & PM_MASK;
+		if (reg16 == PM_OUTPUT)
+			return GPIO_LINE_DIRECTION_OUT;
+	}
+
+	return GPIO_LINE_DIRECTION_IN;
+}
+
+static int rzg2l_gpio_direction_input(struct gpio_chip *chip,
+				      unsigned int offset)
+{
+	struct rzg2l_pinctrl *pctrl = gpiochip_get_data(chip);
+	u32 port = RZG2L_PIN_ID_TO_PORT(offset);
+	u8 bit = RZG2L_PIN_ID_TO_PIN(offset);
+
+	rzg2l_gpio_set_direction(pctrl, port, bit, false);
+
+	return 0;
+}
+
+static void rzg2l_gpio_set(struct gpio_chip *chip, unsigned int offset,
+			   int value)
+{
+	struct rzg2l_pinctrl *pctrl = gpiochip_get_data(chip);
+	u32 port = RZG2L_PIN_ID_TO_PORT(offset);
+	u8 bit = RZG2L_PIN_ID_TO_PIN(offset);
+	unsigned long flags;
+	u8 reg8;
+
+	spin_lock_irqsave(&pctrl->lock, flags);
+
+	reg8 = readb(pctrl->base + P(port));
+
+	if (value)
+		writeb(reg8 | BIT(bit), pctrl->base + P(port));
+	else
+		writeb(reg8 & ~BIT(bit), pctrl->base + P(port));
+
+	spin_unlock_irqrestore(&pctrl->lock, flags);
+}
+
+static int rzg2l_gpio_direction_output(struct gpio_chip *chip,
+				       unsigned int offset, int value)
+{
+	struct rzg2l_pinctrl *pctrl = gpiochip_get_data(chip);
+	u32 port = RZG2L_PIN_ID_TO_PORT(offset);
+	u8 bit = RZG2L_PIN_ID_TO_PIN(offset);
+
+	rzg2l_gpio_set(chip, offset, value);
+	rzg2l_gpio_set_direction(pctrl, port, bit, true);
+
+	return 0;
+}
+
+static int rzg2l_gpio_get(struct gpio_chip *chip, unsigned int offset)
+{
+	struct rzg2l_pinctrl *pctrl = gpiochip_get_data(chip);
+	u32 port = RZG2L_PIN_ID_TO_PORT(offset);
+	u8 bit = RZG2L_PIN_ID_TO_PIN(offset);
+	u16 reg16;
+
+	reg16 = readw(pctrl->base + PM(port));
+	reg16 = (reg16 >> (bit * 2)) & PM_MASK;
+
+	if (reg16 == PM_INPUT)
+		return !!(readb(pctrl->base + PIN(port)) & BIT(bit));
+	else if (reg16 == PM_OUTPUT)
+		return !!(readb(pctrl->base + P(port)) & BIT(bit));
+	else
+		return -EINVAL;
+}
+
+static void rzg2l_gpio_free(struct gpio_chip *chip, unsigned int offset)
+{
+	pinctrl_gpio_free(chip->base + offset);
+
+	/*
+	 * Set the GPIO as an input to ensure that the next GPIO request won't
+	 * drive the GPIO pin as an output.
+	 */
+	rzg2l_gpio_direction_input(chip, offset);
+}
+
+static const char * const rzg2l_gpio_names[] = {
+	"P0_0", "P0_1", "P0_2", "P0_3", "P0_4", "P0_5", "P0_6", "P0_7",
+	"P1_0", "P1_1", "P1_2", "P1_3", "P1_4", "P1_5", "P1_6", "P1_7",
+	"P2_0", "P2_1", "P2_2", "P2_3", "P2_4", "P2_5", "P2_6", "P2_7",
+	"P3_0", "P3_1", "P3_2", "P3_3", "P3_4", "P3_5", "P3_6", "P3_7",
+	"P4_0", "P4_1", "P4_2", "P4_3", "P4_4", "P4_5", "P4_6", "P4_7",
+	"P5_0", "P5_1", "P5_2", "P5_3", "P5_4", "P5_5", "P5_6", "P5_7",
+	"P6_0", "P6_1", "P6_2", "P6_3", "P6_4", "P6_5", "P6_6", "P6_7",
+	"P7_0", "P7_1", "P7_2", "P7_3", "P7_4", "P7_5", "P7_6", "P7_7",
+	"P8_0", "P8_1", "P8_2", "P8_3", "P8_4", "P8_5", "P8_6", "P8_7",
+	"P9_0", "P9_1", "P9_2", "P9_3", "P9_4", "P9_5", "P9_6", "P9_7",
+	"P10_0", "P10_1", "P10_2", "P10_3", "P10_4", "P10_5", "P10_6", "P10_7",
+	"P11_0", "P11_1", "P11_2", "P11_3", "P11_4", "P11_5", "P11_6", "P11_7",
+	"P12_0", "P12_1", "P12_2", "P12_3", "P12_4", "P12_5", "P12_6", "P12_7",
+	"P13_0", "P13_1", "P13_2", "P13_3", "P13_4", "P13_5", "P13_6", "P13_7",
+	"P14_0", "P14_1", "P14_2", "P14_3", "P14_4", "P14_5", "P14_6", "P14_7",
+	"P15_0", "P15_1", "P15_2", "P15_3", "P15_4", "P15_5", "P15_6", "P15_7",
+	"P16_0", "P16_1", "P16_2", "P16_3", "P16_4", "P16_5", "P16_6", "P16_7",
+	"P17_0", "P17_1", "P17_2", "P17_3", "P17_4", "P17_5", "P17_6", "P17_7",
+	"P18_0", "P18_1", "P18_2", "P18_3", "P18_4", "P18_5", "P18_6", "P18_7",
+	"P19_0", "P19_1", "P19_2", "P19_3", "P19_4", "P19_5", "P19_6", "P19_7",
+	"P20_0", "P20_1", "P20_2", "P20_3", "P20_4", "P20_5", "P20_6", "P20_7",
+	"P21_0", "P21_1", "P21_2", "P21_3", "P21_4", "P21_5", "P21_6", "P21_7",
+	"P22_0", "P22_1", "P22_2", "P22_3", "P22_4", "P22_5", "P22_6", "P22_7",
+	"P23_0", "P23_1", "P23_2", "P23_3", "P23_4", "P23_5", "P23_6", "P23_7",
+	"P24_0", "P24_1", "P24_2", "P24_3", "P24_4", "P24_5", "P24_6", "P24_7",
+	"P25_0", "P25_1", "P25_2", "P25_3", "P25_4", "P25_5", "P25_6", "P25_7",
+	"P26_0", "P26_1", "P26_2", "P26_3", "P26_4", "P26_5", "P26_6", "P26_7",
+	"P27_0", "P27_1", "P27_2", "P27_3", "P27_4", "P27_5", "P27_6", "P27_7",
+	"P28_0", "P28_1", "P28_2", "P28_3", "P28_4", "P28_5", "P28_6", "P28_7",
+	"P29_0", "P29_1", "P29_2", "P29_3", "P29_4", "P29_5", "P29_6", "P29_7",
+	"P30_0", "P30_1", "P30_2", "P30_3", "P30_4", "P30_5", "P30_6", "P30_7",
+	"P31_0", "P31_1", "P31_2", "P31_3", "P31_4", "P31_5", "P31_6", "P31_7",
+	"P32_0", "P32_1", "P32_2", "P32_3", "P32_4", "P32_5", "P32_6", "P32_7",
+	"P33_0", "P33_1", "P33_2", "P33_3", "P33_4", "P33_5", "P33_6", "P33_7",
+	"P34_0", "P34_1", "P34_2", "P34_3", "P34_4", "P34_5", "P34_6", "P34_7",
+	"P35_0", "P35_1", "P35_2", "P35_3", "P35_4", "P35_5", "P35_6", "P35_7",
+	"P36_0", "P36_1", "P36_2", "P36_3", "P36_4", "P36_5", "P36_6", "P36_7",
+	"P37_0", "P37_1", "P37_2", "P37_3", "P37_4", "P37_5", "P37_6", "P37_7",
+	"P38_0", "P38_1", "P38_2", "P38_3", "P38_4", "P38_5", "P38_6", "P38_7",
+	"P39_0", "P39_1", "P39_2", "P39_3", "P39_4", "P39_5", "P39_6", "P39_7",
+	"P40_0", "P40_1", "P40_2", "P40_3", "P40_4", "P40_5", "P40_6", "P40_7",
+	"P41_0", "P41_1", "P41_2", "P41_3", "P41_4", "P41_5", "P41_6", "P41_7",
+	"P42_0", "P42_1", "P42_2", "P42_3", "P42_4", "P42_5", "P42_6", "P42_7",
+	"P43_0", "P43_1", "P43_2", "P43_3", "P43_4", "P43_5", "P43_6", "P43_7",
+	"P44_0", "P44_1", "P44_2", "P44_3", "P44_4", "P44_5", "P44_6", "P44_7",
+	"P45_0", "P45_1", "P45_2", "P45_3", "P45_4", "P45_5", "P45_6", "P45_7",
+	"P46_0", "P46_1", "P46_2", "P46_3", "P46_4", "P46_5", "P46_6", "P46_7",
+	"P47_0", "P47_1", "P47_2", "P47_3", "P47_4", "P47_5", "P47_6", "P47_7",
+	"P48_0", "P48_1", "P48_2", "P48_3", "P48_4", "P48_5", "P48_6", "P48_7",
+};
+
+static const u32 rzg2l_gpio_configs[] = {
+	RZG2L_GPIO_PORT_PACK(2, 0x10, RZG2L_MPXED_PIN_FUNCS),
+	RZG2L_GPIO_PORT_PACK(2, 0x11, RZG2L_MPXED_PIN_FUNCS),
+	RZG2L_GPIO_PORT_PACK(2, 0x12, RZG2L_MPXED_PIN_FUNCS),
+	RZG2L_GPIO_PORT_PACK(2, 0x13, RZG2L_MPXED_PIN_FUNCS),
+	RZG2L_GPIO_PORT_PACK(2, 0x14, RZG2L_MPXED_PIN_FUNCS),
+	RZG2L_GPIO_PORT_PACK(3, 0x15, RZG2L_MPXED_PIN_FUNCS),
+	RZG2L_GPIO_PORT_PACK(2, 0x16, RZG2L_MPXED_PIN_FUNCS),
+	RZG2L_GPIO_PORT_PACK(3, 0x17, RZG2L_MPXED_PIN_FUNCS),
+	RZG2L_GPIO_PORT_PACK(3, 0x18, RZG2L_MPXED_PIN_FUNCS),
+	RZG2L_GPIO_PORT_PACK(2, 0x19, RZG2L_MPXED_PIN_FUNCS),
+	RZG2L_GPIO_PORT_PACK(2, 0x1a, RZG2L_MPXED_PIN_FUNCS),
+	RZG2L_GPIO_PORT_PACK(2, 0x1b, RZG2L_MPXED_PIN_FUNCS),
+	RZG2L_GPIO_PORT_PACK(2, 0x1c, RZG2L_MPXED_PIN_FUNCS),
+	RZG2L_GPIO_PORT_PACK(3, 0x1d, RZG2L_MPXED_PIN_FUNCS),
+	RZG2L_GPIO_PORT_PACK(2, 0x1e, RZG2L_MPXED_PIN_FUNCS),
+	RZG2L_GPIO_PORT_PACK(2, 0x1f, RZG2L_MPXED_PIN_FUNCS),
+	RZG2L_GPIO_PORT_PACK(2, 0x20, RZG2L_MPXED_PIN_FUNCS),
+	RZG2L_GPIO_PORT_PACK(3, 0x22, RZG2L_MPXED_PIN_FUNCS),
+	RZG2L_GPIO_PORT_PACK(2, 0x22, RZG2L_MPXED_PIN_FUNCS),
+	RZG2L_GPIO_PORT_PACK(2, 0x23, RZG2L_MPXED_PIN_FUNCS),
+	RZG2L_GPIO_PORT_PACK(3, 0x24, RZG2L_MPXED_ETH_PIN_FUNCS(PIN_CFG_IOLH_ETH0)),
+	RZG2L_GPIO_PORT_PACK(2, 0x25, RZG2L_MPXED_ETH_PIN_FUNCS(PIN_CFG_IOLH_ETH0)),
+	RZG2L_GPIO_PORT_PACK(2, 0x26, RZG2L_MPXED_ETH_PIN_FUNCS(PIN_CFG_IOLH_ETH0)),
+	RZG2L_GPIO_PORT_PACK(2, 0x27, RZG2L_MPXED_ETH_PIN_FUNCS(PIN_CFG_IOLH_ETH0)),
+	RZG2L_GPIO_PORT_PACK(2, 0x28, RZG2L_MPXED_ETH_PIN_FUNCS(PIN_CFG_IOLH_ETH0)),
+	RZG2L_GPIO_PORT_PACK(2, 0x29, RZG2L_MPXED_ETH_PIN_FUNCS(PIN_CFG_IOLH_ETH0)),
+	RZG2L_GPIO_PORT_PACK(2, 0x2a, RZG2L_MPXED_ETH_PIN_FUNCS(PIN_CFG_IOLH_ETH0)),
+	RZG2L_GPIO_PORT_PACK(2, 0x2b, RZG2L_MPXED_ETH_PIN_FUNCS(PIN_CFG_IOLH_ETH0)),
+	RZG2L_GPIO_PORT_PACK(2, 0x2c, RZG2L_MPXED_ETH_PIN_FUNCS(PIN_CFG_IOLH_ETH0)),
+	RZG2L_GPIO_PORT_PACK(2, 0x2d, RZG2L_MPXED_ETH_PIN_FUNCS(PIN_CFG_IOLH_ETH1)),
+	RZG2L_GPIO_PORT_PACK(2, 0x2e, RZG2L_MPXED_ETH_PIN_FUNCS(PIN_CFG_IOLH_ETH1)),
+	RZG2L_GPIO_PORT_PACK(2, 0x2f, RZG2L_MPXED_ETH_PIN_FUNCS(PIN_CFG_IOLH_ETH1)),
+	RZG2L_GPIO_PORT_PACK(2, 0x30, RZG2L_MPXED_ETH_PIN_FUNCS(PIN_CFG_IOLH_ETH1)),
+	RZG2L_GPIO_PORT_PACK(2, 0x31, RZG2L_MPXED_ETH_PIN_FUNCS(PIN_CFG_IOLH_ETH1)),
+	RZG2L_GPIO_PORT_PACK(2, 0x32, RZG2L_MPXED_ETH_PIN_FUNCS(PIN_CFG_IOLH_ETH1)),
+	RZG2L_GPIO_PORT_PACK(2, 0x33, RZG2L_MPXED_ETH_PIN_FUNCS(PIN_CFG_IOLH_ETH1)),
+	RZG2L_GPIO_PORT_PACK(2, 0x34, RZG2L_MPXED_ETH_PIN_FUNCS(PIN_CFG_IOLH_ETH1)),
+	RZG2L_GPIO_PORT_PACK(3, 0x35, RZG2L_MPXED_ETH_PIN_FUNCS(PIN_CFG_IOLH_ETH1)),
+	RZG2L_GPIO_PORT_PACK(2, 0x36, RZG2L_MPXED_PIN_FUNCS),
+	RZG2L_GPIO_PORT_PACK(3, 0x37, RZG2L_MPXED_PIN_FUNCS),
+	RZG2L_GPIO_PORT_PACK(3, 0x38, RZG2L_MPXED_PIN_FUNCS),
+	RZG2L_GPIO_PORT_PACK(2, 0x39, RZG2L_MPXED_PIN_FUNCS),
+	RZG2L_GPIO_PORT_PACK(5, 0x3a, RZG2L_MPXED_PIN_FUNCS),
+	RZG2L_GPIO_PORT_PACK(4, 0x3b, RZG2L_MPXED_PIN_FUNCS),
+	RZG2L_GPIO_PORT_PACK(4, 0x3c, RZG2L_MPXED_PIN_FUNCS),
+	RZG2L_GPIO_PORT_PACK(4, 0x3d, RZG2L_MPXED_PIN_FUNCS),
+	RZG2L_GPIO_PORT_PACK(4, 0x3e, RZG2L_MPXED_PIN_FUNCS),
+	RZG2L_GPIO_PORT_PACK(4, 0x3f, RZG2L_MPXED_PIN_FUNCS),
+	RZG2L_GPIO_PORT_PACK(5, 0x40, RZG2L_MPXED_PIN_FUNCS),
+};
+
+static  struct rzg2l_dedicated_configs rzg2l_dedicated_pins[] = {
+	{ "NMI", RZG2L_SINGLE_PIN_PACK(0x1, 0,
+	 (PIN_CFG_FILONOFF | PIN_CFG_FILNUM | PIN_CFG_FILCLKSEL)) },
+	{ "TMS/SWDIO", RZG2L_SINGLE_PIN_PACK(0x2, 0,
+	 (PIN_CFG_SR | PIN_CFG_IOLH | PIN_CFG_IEN)) },
+	{ "TDO", RZG2L_SINGLE_PIN_PACK(0x3, 0,
+	 (PIN_CFG_IOLH | PIN_CFG_SR | PIN_CFG_IEN)) },
+	{ "AUDIO_CLK1", RZG2L_SINGLE_PIN_PACK(0x4, 0, PIN_CFG_IEN) },
+	{ "AUDIO_CLK2", RZG2L_SINGLE_PIN_PACK(0x4, 1, PIN_CFG_IEN) },
+	{ "SD0_CLK", RZG2L_SINGLE_PIN_PACK(0x6, 0,
+	 (PIN_CFG_IOLH | PIN_CFG_SR | PIN_CFG_IOLH_SD0)) },
+	{ "SD0_CMD", RZG2L_SINGLE_PIN_PACK(0x6, 1,
+	 (PIN_CFG_IOLH | PIN_CFG_SR | PIN_CFG_IEN | PIN_CFG_IOLH_SD0)) },
+	{ "SD0_RST#", RZG2L_SINGLE_PIN_PACK(0x6, 2,
+	 (PIN_CFG_IOLH | PIN_CFG_SR | PIN_CFG_IOLH_SD0)) },
+	{ "SD0_DATA0", RZG2L_SINGLE_PIN_PACK(0x7, 0,
+	 (PIN_CFG_IOLH | PIN_CFG_SR | PIN_CFG_IEN | PIN_CFG_IOLH_SD0)) },
+	{ "SD0_DATA1", RZG2L_SINGLE_PIN_PACK(0x7, 1,
+	 (PIN_CFG_IOLH | PIN_CFG_SR | PIN_CFG_IEN | PIN_CFG_IOLH_SD0)) },
+	{ "SD0_DATA2", RZG2L_SINGLE_PIN_PACK(0x7, 2,
+	 (PIN_CFG_IOLH | PIN_CFG_SR | PIN_CFG_IEN | PIN_CFG_IOLH_SD0)) },
+	{ "SD0_DATA3", RZG2L_SINGLE_PIN_PACK(0x7, 3,
+	 (PIN_CFG_IOLH | PIN_CFG_SR | PIN_CFG_IEN | PIN_CFG_IOLH_SD0)) },
+	{ "SD0_DATA4", RZG2L_SINGLE_PIN_PACK(0x7, 4,
+	 (PIN_CFG_IOLH | PIN_CFG_SR | PIN_CFG_IEN | PIN_CFG_IOLH_SD0)) },
+	{ "SD0_DATA5", RZG2L_SINGLE_PIN_PACK(0x7, 5,
+	 (PIN_CFG_IOLH | PIN_CFG_SR | PIN_CFG_IEN | PIN_CFG_IOLH_SD0)) },
+	{ "SD0_DATA6", RZG2L_SINGLE_PIN_PACK(0x7, 6,
+	 (PIN_CFG_IOLH | PIN_CFG_SR | PIN_CFG_IEN | PIN_CFG_IOLH_SD0)) },
+	{ "SD0_DATA7", RZG2L_SINGLE_PIN_PACK(0x7, 7,
+	 (PIN_CFG_IOLH | PIN_CFG_SR | PIN_CFG_IEN | PIN_CFG_IOLH_SD0)) },
+	{ "SD1_CLK", RZG2L_SINGLE_PIN_PACK(0x8, 0,
+	 (PIN_CFG_IOLH | PIN_CFG_SR | PIN_CFG_IOLH_SD1))},
+	{ "SD1_CMD", RZG2L_SINGLE_PIN_PACK(0x8, 1,
+	 (PIN_CFG_IOLH | PIN_CFG_SR | PIN_CFG_IEN | PIN_CFG_IOLH_SD1)) },
+	{ "SD1_DATA0", RZG2L_SINGLE_PIN_PACK(0x9, 0,
+	 (PIN_CFG_IOLH | PIN_CFG_SR | PIN_CFG_IEN | PIN_CFG_IOLH_SD1)) },
+	{ "SD1_DATA1", RZG2L_SINGLE_PIN_PACK(0x9, 1,
+	 (PIN_CFG_IOLH | PIN_CFG_SR | PIN_CFG_IEN | PIN_CFG_IOLH_SD1)) },
+	{ "SD1_DATA2", RZG2L_SINGLE_PIN_PACK(0x9, 2,
+	 (PIN_CFG_IOLH | PIN_CFG_SR | PIN_CFG_IEN | PIN_CFG_IOLH_SD1)) },
+	{ "SD1_DATA3", RZG2L_SINGLE_PIN_PACK(0x9, 3,
+	 (PIN_CFG_IOLH | PIN_CFG_SR | PIN_CFG_IEN | PIN_CFG_IOLH_SD1)) },
+	{ "QSPI0_SPCLK", RZG2L_SINGLE_PIN_PACK(0xa, 0,
+	 (PIN_CFG_IOLH | PIN_CFG_SR | PIN_CFG_IOLH_QSPI)) },
+	{ "QSPI0_IO0", RZG2L_SINGLE_PIN_PACK(0xa, 1,
+	 (PIN_CFG_IOLH | PIN_CFG_SR | PIN_CFG_IOLH_QSPI)) },
+	{ "QSPI0_IO1", RZG2L_SINGLE_PIN_PACK(0xa, 2,
+	 (PIN_CFG_IOLH | PIN_CFG_SR | PIN_CFG_IOLH_QSPI)) },
+	{ "QSPI0_IO2", RZG2L_SINGLE_PIN_PACK(0xa, 3,
+	 (PIN_CFG_IOLH | PIN_CFG_SR | PIN_CFG_IOLH_QSPI)) },
+	{ "QSPI0_IO3", RZG2L_SINGLE_PIN_PACK(0xa, 4,
+	 (PIN_CFG_IOLH | PIN_CFG_SR | PIN_CFG_IOLH_QSPI)) },
+	{ "QSPI0_SSL", RZG2L_SINGLE_PIN_PACK(0xa, 5,
+	 (PIN_CFG_IOLH | PIN_CFG_SR | PIN_CFG_IOLH_QSPI)) },
+	{ "QSPI1_SPCLK", RZG2L_SINGLE_PIN_PACK(0xb, 0,
+	 (PIN_CFG_IOLH | PIN_CFG_SR | PIN_CFG_IOLH_QSPI)) },
+	{ "QSPI1_IO0", RZG2L_SINGLE_PIN_PACK(0xb, 1,
+	 (PIN_CFG_IOLH | PIN_CFG_SR | PIN_CFG_IOLH_QSPI)) },
+	{ "QSPI1_IO1", RZG2L_SINGLE_PIN_PACK(0xb, 2,
+	 (PIN_CFG_IOLH | PIN_CFG_SR | PIN_CFG_IOLH_QSPI)) },
+	{ "QSPI1_IO2", RZG2L_SINGLE_PIN_PACK(0xb, 3,
+	 (PIN_CFG_IOLH | PIN_CFG_SR | PIN_CFG_IOLH_QSPI)) },
+	{ "QSPI1_IO3", RZG2L_SINGLE_PIN_PACK(0xb, 4,
+	 (PIN_CFG_IOLH | PIN_CFG_SR  | PIN_CFG_IOLH_QSPI)) },
+	{ "QSPI1_SSL", RZG2L_SINGLE_PIN_PACK(0xb, 5,
+	 (PIN_CFG_IOLH | PIN_CFG_SR | PIN_CFG_IOLH_QSPI)) },
+	{ "QSPI_RESET#", RZG2L_SINGLE_PIN_PACK(0xc, 0,
+	 (PIN_CFG_IOLH | PIN_CFG_SR | PIN_CFG_IOLH_QSPI)) },
+	{ "QSPI_WP#", RZG2L_SINGLE_PIN_PACK(0xc, 1,
+	 (PIN_CFG_IOLH | PIN_CFG_SR | PIN_CFG_IOLH_QSPI)) },
+	{ "QSPI_INT#", RZG2L_SINGLE_PIN_PACK(0xc, 2, (PIN_CFG_SR | PIN_CFG_IOLH_QSPI)) },
+	{ "WDTOVF_PERROUT#", RZG2L_SINGLE_PIN_PACK(0xd, 0, (PIN_CFG_IOLH | PIN_CFG_SR)) },
+	{ "RIIC0_SDA", RZG2L_SINGLE_PIN_PACK(0xe, 0, PIN_CFG_IEN) },
+	{ "RIIC0_SCL", RZG2L_SINGLE_PIN_PACK(0xe, 1, PIN_CFG_IEN) },
+	{ "RIIC1_SDA", RZG2L_SINGLE_PIN_PACK(0xe, 2, PIN_CFG_IEN) },
+	{ "RIIC1_SCL", RZG2L_SINGLE_PIN_PACK(0xe, 3, PIN_CFG_IEN) },
+};
+
+static int rzg2l_gpio_register(struct rzg2l_pinctrl *pctrl)
+{
+	struct device_node *np = pctrl->dev->of_node;
+	struct gpio_chip *chip = &pctrl->gpio_chip;
+	const char *name = dev_name(pctrl->dev);
+	struct of_phandle_args of_args;
+	int ret;
+
+	ret = of_parse_phandle_with_fixed_args(np, "gpio-ranges", 3, 0, &of_args);
+	if (ret) {
+		dev_err(pctrl->dev, "Unable to parse gpio-ranges\n");
+		return ret;
+	}
+
+	if (of_args.args[0] != 0 || of_args.args[1] != 0 ||
+	    of_args.args[2] != ARRAY_SIZE(rzg2l_gpio_names)) {
+		dev_err(pctrl->dev, "gpio-ranges does not match selected SOC\n");
+		return -EINVAL;
+	}
+
+	chip->names = rzg2l_gpio_names;
+	chip->request = rzg2l_gpio_request;
+	chip->free = rzg2l_gpio_free;
+	chip->get_direction = rzg2l_gpio_get_direction;
+	chip->direction_input = rzg2l_gpio_direction_input;
+	chip->direction_output = rzg2l_gpio_direction_output;
+	chip->get = rzg2l_gpio_get;
+	chip->set = rzg2l_gpio_set;
+	chip->label = name;
+	chip->parent = pctrl->dev;
+	chip->owner = THIS_MODULE;
+	chip->base = -1;
+	chip->ngpio = of_args.args[2];
+
+	pctrl->gpio_range.id = 0;
+	pctrl->gpio_range.pin_base = 0;
+	pctrl->gpio_range.base = 0;
+	pctrl->gpio_range.npins = chip->ngpio;
+	pctrl->gpio_range.name = chip->label;
+	pctrl->gpio_range.gc = chip;
+	ret = devm_gpiochip_add_data(pctrl->dev, chip, pctrl);
+	if (ret) {
+		dev_err(pctrl->dev, "failed to add GPIO controller\n");
+		return ret;
+	}
+
+	dev_dbg(pctrl->dev, "Registered gpio controller\n");
+
+	return 0;
+}
+
+static int rzg2l_pinctrl_register(struct rzg2l_pinctrl *pctrl)
+{
+	struct pinctrl_pin_desc *pins;
+	unsigned int i, j;
+	u32 *pin_data;
+	int ret;
+
+	pctrl->desc.name = DRV_NAME;
+	pctrl->desc.npins = pctrl->data->n_port_pins + pctrl->data->n_dedicated_pins;
+	pctrl->desc.pctlops = &rzg2l_pinctrl_pctlops;
+	pctrl->desc.pmxops = &rzg2l_pinctrl_pmxops;
+	pctrl->desc.confops = &rzg2l_pinctrl_confops;
+	pctrl->desc.owner = THIS_MODULE;
+
+	pins = devm_kcalloc(pctrl->dev, pctrl->desc.npins, sizeof(*pins), GFP_KERNEL);
+	if (!pins)
+		return -ENOMEM;
+
+	pin_data = devm_kcalloc(pctrl->dev, pctrl->desc.npins,
+				sizeof(*pin_data), GFP_KERNEL);
+	if (!pin_data)
+		return -ENOMEM;
+
+	pctrl->pins = pins;
+	pctrl->desc.pins = pins;
+
+	for (i = 0, j = 0; i < pctrl->data->n_port_pins; i++) {
+		pins[i].number = i;
+		pins[i].name = pctrl->data->port_pins[i];
+		if (i && !(i % RZG2L_PINS_PER_PORT))
+			j++;
+		pin_data[i] = pctrl->data->port_pin_configs[j];
+		pins[i].drv_data = &pin_data[i];
+	}
+
+	for (i = 0; i < pctrl->data->n_dedicated_pins; i++) {
+		unsigned int index = pctrl->data->n_port_pins + i;
+
+		pins[index].number = index;
+		pins[index].name = pctrl->data->dedicated_pins[i].name;
+		pin_data[index] = pctrl->data->dedicated_pins[i].config;
+		pins[index].drv_data = &pin_data[index];
+	}
+
+	ret = devm_pinctrl_register_and_init(pctrl->dev, &pctrl->desc, pctrl,
+					     &pctrl->pctl);
+	if (ret) {
+		dev_err(pctrl->dev, "pinctrl registration failed\n");
+		return ret;
+	}
+
+	ret = pinctrl_enable(pctrl->pctl);
+	if (ret) {
+		dev_err(pctrl->dev, "pinctrl enable failed\n");
+		return ret;
+	}
+
+	ret = rzg2l_gpio_register(pctrl);
+	if (ret) {
+		dev_err(pctrl->dev, "failed to add GPIO chip: %i\n", ret);
+		return ret;
+	}
+
+	return 0;
+}
+
+static void rzg2l_pinctrl_clk_disable(void *data)
+{
+	clk_disable_unprepare(data);
+}
+
+static int rzg2l_pinctrl_probe(struct platform_device *pdev)
+{
+	struct rzg2l_pinctrl *pctrl;
+	int ret;
+
+	pctrl = devm_kzalloc(&pdev->dev, sizeof(*pctrl), GFP_KERNEL);
+	if (!pctrl)
+		return -ENOMEM;
+
+	pctrl->dev = &pdev->dev;
+
+	pctrl->data = of_device_get_match_data(&pdev->dev);
+	if (!pctrl->data)
+		return -EINVAL;
+
+	pctrl->base = devm_platform_ioremap_resource(pdev, 0);
+	if (IS_ERR(pctrl->base))
+		return PTR_ERR(pctrl->base);
+
+	pctrl->clk = devm_clk_get(pctrl->dev, NULL);
+	if (IS_ERR(pctrl->clk)) {
+		ret = PTR_ERR(pctrl->clk);
+		dev_err(pctrl->dev, "failed to get GPIO clk : %i\n", ret);
+		return ret;
+	}
+
+	spin_lock_init(&pctrl->lock);
+
+	platform_set_drvdata(pdev, pctrl);
+
+	ret = clk_prepare_enable(pctrl->clk);
+	if (ret) {
+		dev_err(pctrl->dev, "failed to enable GPIO clk: %i\n", ret);
+		return ret;
+	}
+
+	ret = devm_add_action_or_reset(&pdev->dev, rzg2l_pinctrl_clk_disable,
+				       pctrl->clk);
+	if (ret) {
+		dev_err(pctrl->dev,
+			"failed to register GPIO clk disable action, %i\n",
+			ret);
+		return ret;
+	}
+
+	ret = rzg2l_pinctrl_register(pctrl);
+	if (ret)
+		return ret;
+
+	dev_info(pctrl->dev, "%s support registered\n", DRV_NAME);
+	return 0;
+}
+
+static struct rzg2l_pinctrl_data r9a07g044_data = {
+	.port_pins = rzg2l_gpio_names,
+	.port_pin_configs = rzg2l_gpio_configs,
+	.dedicated_pins = rzg2l_dedicated_pins,
+	.n_port_pins = ARRAY_SIZE(rzg2l_gpio_names),
+	.n_dedicated_pins = ARRAY_SIZE(rzg2l_dedicated_pins),
+};
+
+static const struct of_device_id rzg2l_pinctrl_of_table[] = {
+	{
+		.compatible = "renesas,r9a07g044-pinctrl",
+		.data = &r9a07g044_data,
+	},
+	{ /* sentinel */ }
+};
+
+static struct platform_driver rzg2l_pinctrl_driver = {
+	.driver = {
+		.name = DRV_NAME,
+		.of_match_table = of_match_ptr(rzg2l_pinctrl_of_table),
+	},
+	.probe = rzg2l_pinctrl_probe,
+};
+
+static int __init rzg2l_pinctrl_init(void)
+{
+	return platform_driver_register(&rzg2l_pinctrl_driver);
+}
+core_initcall(rzg2l_pinctrl_init);
+
+MODULE_AUTHOR("Lad Prabhakar <prabhakar.mahadev-lad.rj@bp.renesas.com>");
+MODULE_DESCRIPTION("Pin and gpio controller driver for RZ/G2L family");
+MODULE_LICENSE("GPL v2");
diff --git a/drivers/pinctrl/renesas/pinctrl.c b/drivers/pinctrl/renesas/pinctrl.c
index bb488af29862..f3eecb20c086 100644
--- a/drivers/pinctrl/renesas/pinctrl.c
+++ b/drivers/pinctrl/renesas/pinctrl.c
@@ -841,7 +841,7 @@ int sh_pfc_register_pinctrl(struct sh_pfc *pfc)
 	return pinctrl_enable(pmx->pctl);
 }
 
-static const struct pinmux_bias_reg *
+const struct pinmux_bias_reg *
 rcar_pin_to_bias_reg(const struct sh_pfc *pfc, unsigned int pin,
 		     unsigned int *bit)
 {
@@ -898,17 +898,17 @@ void rcar_pinmux_set_bias(struct sh_pfc *pfc, unsigned int pin,
 
 	if (reg->puen) {
 		enable = sh_pfc_read(pfc, reg->puen) & ~BIT(bit);
-		if (bias != PIN_CONFIG_BIAS_DISABLE)
+		if (bias != PIN_CONFIG_BIAS_DISABLE) {
 			enable |= BIT(bit);
 
-		if (reg->pud) {
-			updown = sh_pfc_read(pfc, reg->pud) & ~BIT(bit);
-			if (bias == PIN_CONFIG_BIAS_PULL_UP)
-				updown |= BIT(bit);
+			if (reg->pud) {
+				updown = sh_pfc_read(pfc, reg->pud) & ~BIT(bit);
+				if (bias == PIN_CONFIG_BIAS_PULL_UP)
+					updown |= BIT(bit);
 
-			sh_pfc_write(pfc, reg->pud, updown);
+				sh_pfc_write(pfc, reg->pud, updown);
+			}
 		}
-
 		sh_pfc_write(pfc, reg->puen, enable);
 	} else {
 		enable = sh_pfc_read(pfc, reg->pud) & ~BIT(bit);
diff --git a/drivers/pinctrl/renesas/sh_pfc.h b/drivers/pinctrl/renesas/sh_pfc.h
index 320898861c4b..2479b4fb9cf9 100644
--- a/drivers/pinctrl/renesas/sh_pfc.h
+++ b/drivers/pinctrl/renesas/sh_pfc.h
@@ -332,8 +332,8 @@ extern const struct sh_pfc_soc_info r8a7791_pinmux_info;
 extern const struct sh_pfc_soc_info r8a7792_pinmux_info;
 extern const struct sh_pfc_soc_info r8a7793_pinmux_info;
 extern const struct sh_pfc_soc_info r8a7794_pinmux_info;
-extern const struct sh_pfc_soc_info r8a77950_pinmux_info __weak;
-extern const struct sh_pfc_soc_info r8a77951_pinmux_info __weak;
+extern const struct sh_pfc_soc_info r8a77950_pinmux_info;
+extern const struct sh_pfc_soc_info r8a77951_pinmux_info;
 extern const struct sh_pfc_soc_info r8a77960_pinmux_info;
 extern const struct sh_pfc_soc_info r8a77961_pinmux_info;
 extern const struct sh_pfc_soc_info r8a77965_pinmux_info;
@@ -781,6 +781,9 @@ extern const struct sh_pfc_soc_info shx3_pinmux_info;
 /*
  * Bias helpers
  */
+const struct pinmux_bias_reg *
+rcar_pin_to_bias_reg(const struct sh_pfc *pfc, unsigned int pin,
+		     unsigned int *bit);
 unsigned int rcar_pinmux_get_bias(struct sh_pfc *pfc, unsigned int pin);
 void rcar_pinmux_set_bias(struct sh_pfc *pfc, unsigned int pin,
 			  unsigned int bias);
diff --git a/drivers/pinctrl/samsung/pinctrl-exynos-arm64.c b/drivers/pinctrl/samsung/pinctrl-exynos-arm64.c
index b6e56422a700..fe5f6046fbd5 100644
--- a/drivers/pinctrl/samsung/pinctrl-exynos-arm64.c
+++ b/drivers/pinctrl/samsung/pinctrl-exynos-arm64.c
@@ -40,6 +40,24 @@ static const struct samsung_pin_bank_type exynos5433_bank_type_alive = {
 	.reg_offset = { 0x00, 0x04, 0x08, 0x0c, },
 };
 
+/*
+ * Bank type for non-alive type. Bit fields:
+ * CON: 4, DAT: 1, PUD: 4, DRV: 4, CONPDN: 2, PUDPDN: 4
+ */
+static const struct samsung_pin_bank_type exynos850_bank_type_off  = {
+	.fld_width = { 4, 1, 4, 4, 2, 4, },
+	.reg_offset = { 0x00, 0x04, 0x08, 0x0c, 0x10, 0x14, },
+};
+
+/*
+ * Bank type for alive type. Bit fields:
+ * CON: 4, DAT: 1, PUD: 4, DRV: 4
+ */
+static const struct samsung_pin_bank_type exynos850_bank_type_alive = {
+	.fld_width = { 4, 1, 4, 4, },
+	.reg_offset = { 0x00, 0x04, 0x08, 0x0c, },
+};
+
 /* Pad retention control code for accessing PMU regmap */
 static atomic_t exynos_shared_retention_refcnt;
 
@@ -422,3 +440,101 @@ const struct samsung_pinctrl_of_match_data exynos7_of_data __initconst = {
 	.ctrl		= exynos7_pin_ctrl,
 	.num_ctrl	= ARRAY_SIZE(exynos7_pin_ctrl),
 };
+
+/* pin banks of exynos850 pin-controller 0 (ALIVE) */
+static const struct samsung_pin_bank_data exynos850_pin_banks0[] __initconst = {
+	/* Must start with EINTG banks, ordered by EINT group number. */
+	EXYNOS850_PIN_BANK_EINTW(8, 0x000, "gpa0", 0x00),
+	EXYNOS850_PIN_BANK_EINTW(8, 0x020, "gpa1", 0x04),
+	EXYNOS850_PIN_BANK_EINTW(8, 0x040, "gpa2", 0x08),
+	EXYNOS850_PIN_BANK_EINTW(8, 0x060, "gpa3", 0x0c),
+	EXYNOS850_PIN_BANK_EINTW(4, 0x080, "gpa4", 0x10),
+	EXYNOS850_PIN_BANK_EINTN(3, 0x0a0, "gpq0"),
+};
+
+/* pin banks of exynos850 pin-controller 1 (CMGP) */
+static const struct samsung_pin_bank_data exynos850_pin_banks1[] __initconst = {
+	/* Must start with EINTG banks, ordered by EINT group number. */
+	EXYNOS850_PIN_BANK_EINTW(1, 0x000, "gpm0", 0x00),
+	EXYNOS850_PIN_BANK_EINTW(1, 0x020, "gpm1", 0x04),
+	EXYNOS850_PIN_BANK_EINTW(1, 0x040, "gpm2", 0x08),
+	EXYNOS850_PIN_BANK_EINTW(1, 0x060, "gpm3", 0x0c),
+	EXYNOS850_PIN_BANK_EINTW(1, 0x080, "gpm4", 0x10),
+	EXYNOS850_PIN_BANK_EINTW(1, 0x0a0, "gpm5", 0x14),
+	EXYNOS850_PIN_BANK_EINTW(1, 0x0c0, "gpm6", 0x18),
+	EXYNOS850_PIN_BANK_EINTW(1, 0x0e0, "gpm7", 0x1c),
+};
+
+/* pin banks of exynos850 pin-controller 2 (AUD) */
+static const struct samsung_pin_bank_data exynos850_pin_banks2[] __initconst = {
+	/* Must start with EINTG banks, ordered by EINT group number. */
+	EXYNOS850_PIN_BANK_EINTG(5, 0x000, "gpb0", 0x00),
+	EXYNOS850_PIN_BANK_EINTG(5, 0x020, "gpb1", 0x04),
+};
+
+/* pin banks of exynos850 pin-controller 3 (HSI) */
+static const struct samsung_pin_bank_data exynos850_pin_banks3[] __initconst = {
+	/* Must start with EINTG banks, ordered by EINT group number. */
+	EXYNOS850_PIN_BANK_EINTG(6, 0x000, "gpf2", 0x00),
+};
+
+/* pin banks of exynos850 pin-controller 4 (CORE) */
+static const struct samsung_pin_bank_data exynos850_pin_banks4[] __initconst = {
+	/* Must start with EINTG banks, ordered by EINT group number. */
+	EXYNOS850_PIN_BANK_EINTG(4, 0x000, "gpf0", 0x00),
+	EXYNOS850_PIN_BANK_EINTG(8, 0x020, "gpf1", 0x04),
+};
+
+/* pin banks of exynos850 pin-controller 5 (PERI) */
+static const struct samsung_pin_bank_data exynos850_pin_banks5[] __initconst = {
+	/* Must start with EINTG banks, ordered by EINT group number. */
+	EXYNOS850_PIN_BANK_EINTG(2, 0x000, "gpg0", 0x00),
+	EXYNOS850_PIN_BANK_EINTG(6, 0x020, "gpp0", 0x04),
+	EXYNOS850_PIN_BANK_EINTG(4, 0x040, "gpp1", 0x08),
+	EXYNOS850_PIN_BANK_EINTG(4, 0x060, "gpp2", 0x0c),
+	EXYNOS850_PIN_BANK_EINTG(8, 0x080, "gpg1", 0x10),
+	EXYNOS850_PIN_BANK_EINTG(8, 0x0a0, "gpg2", 0x14),
+	EXYNOS850_PIN_BANK_EINTG(1, 0x0c0, "gpg3", 0x18),
+	EXYNOS850_PIN_BANK_EINTG(3, 0x0e0, "gpc0", 0x1c),
+	EXYNOS850_PIN_BANK_EINTG(6, 0x100, "gpc1", 0x20),
+};
+
+static const struct samsung_pin_ctrl exynos850_pin_ctrl[] __initconst = {
+	{
+		/* pin-controller instance 0 ALIVE data */
+		.pin_banks	= exynos850_pin_banks0,
+		.nr_banks	= ARRAY_SIZE(exynos850_pin_banks0),
+		.eint_gpio_init = exynos_eint_gpio_init,
+		.eint_wkup_init = exynos_eint_wkup_init,
+	}, {
+		/* pin-controller instance 1 CMGP data */
+		.pin_banks	= exynos850_pin_banks1,
+		.nr_banks	= ARRAY_SIZE(exynos850_pin_banks1),
+		.eint_gpio_init = exynos_eint_gpio_init,
+		.eint_wkup_init = exynos_eint_wkup_init,
+	}, {
+		/* pin-controller instance 2 AUD data */
+		.pin_banks	= exynos850_pin_banks2,
+		.nr_banks	= ARRAY_SIZE(exynos850_pin_banks2),
+	}, {
+		/* pin-controller instance 3 HSI data */
+		.pin_banks	= exynos850_pin_banks3,
+		.nr_banks	= ARRAY_SIZE(exynos850_pin_banks3),
+		.eint_gpio_init = exynos_eint_gpio_init,
+	}, {
+		/* pin-controller instance 4 CORE data */
+		.pin_banks	= exynos850_pin_banks4,
+		.nr_banks	= ARRAY_SIZE(exynos850_pin_banks4),
+		.eint_gpio_init = exynos_eint_gpio_init,
+	}, {
+		/* pin-controller instance 5 PERI data */
+		.pin_banks	= exynos850_pin_banks5,
+		.nr_banks	= ARRAY_SIZE(exynos850_pin_banks5),
+		.eint_gpio_init = exynos_eint_gpio_init,
+	},
+};
+
+const struct samsung_pinctrl_of_match_data exynos850_of_data __initconst = {
+	.ctrl		= exynos850_pin_ctrl,
+	.num_ctrl	= ARRAY_SIZE(exynos850_pin_ctrl),
+};
diff --git a/drivers/pinctrl/samsung/pinctrl-exynos.h b/drivers/pinctrl/samsung/pinctrl-exynos.h
index da1ec13697e7..bfad1ced8017 100644
--- a/drivers/pinctrl/samsung/pinctrl-exynos.h
+++ b/drivers/pinctrl/samsung/pinctrl-exynos.h
@@ -108,6 +108,35 @@
 		.pctl_res_idx   = pctl_idx,			\
 	}							\
 
+#define EXYNOS850_PIN_BANK_EINTN(pins, reg, id)			\
+	{							\
+		.type		= &exynos850_bank_type_alive,	\
+		.pctl_offset	= reg,				\
+		.nr_pins	= pins,				\
+		.eint_type	= EINT_TYPE_NONE,		\
+		.name		= id				\
+	}
+
+#define EXYNOS850_PIN_BANK_EINTG(pins, reg, id, offs)		\
+	{							\
+		.type		= &exynos850_bank_type_off,	\
+		.pctl_offset	= reg,				\
+		.nr_pins	= pins,				\
+		.eint_type	= EINT_TYPE_GPIO,		\
+		.eint_offset	= offs,				\
+		.name		= id				\
+	}
+
+#define EXYNOS850_PIN_BANK_EINTW(pins, reg, id, offs)		\
+	{							\
+		.type		= &exynos850_bank_type_alive,	\
+		.pctl_offset	= reg,				\
+		.nr_pins	= pins,				\
+		.eint_type	= EINT_TYPE_WKUP,		\
+		.eint_offset	= offs,				\
+		.name		= id				\
+	}
+
 /**
  * struct exynos_weint_data: irq specific data for all the wakeup interrupts
  * generated by the external wakeup interrupt controller.
diff --git a/drivers/pinctrl/samsung/pinctrl-samsung.c b/drivers/pinctrl/samsung/pinctrl-samsung.c
index 376876bd6605..2a0fc63516f1 100644
--- a/drivers/pinctrl/samsung/pinctrl-samsung.c
+++ b/drivers/pinctrl/samsung/pinctrl-samsung.c
@@ -918,7 +918,7 @@ static int samsung_pinctrl_register(struct platform_device *pdev,
 		pin_bank->grange.pin_base = drvdata->pin_base
 						+ pin_bank->pin_base;
 		pin_bank->grange.base = pin_bank->grange.pin_base;
-		pin_bank->grange.npins = pin_bank->gpio_chip.ngpio;
+		pin_bank->grange.npins = pin_bank->nr_pins;
 		pin_bank->grange.gc = &pin_bank->gpio_chip;
 		pinctrl_add_gpio_range(drvdata->pctl_dev, &pin_bank->grange);
 	}
@@ -1264,6 +1264,8 @@ static const struct of_device_id samsung_pinctrl_dt_match[] = {
 		.data = &exynos5433_of_data },
 	{ .compatible = "samsung,exynos7-pinctrl",
 		.data = &exynos7_of_data },
+	{ .compatible = "samsung,exynos850-pinctrl",
+		.data = &exynos850_of_data },
 #endif
 #ifdef CONFIG_PINCTRL_S3C64XX
 	{ .compatible = "samsung,s3c64xx-pinctrl",
diff --git a/drivers/pinctrl/samsung/pinctrl-samsung.h b/drivers/pinctrl/samsung/pinctrl-samsung.h
index de44f8ec330b..4c2149e9c544 100644
--- a/drivers/pinctrl/samsung/pinctrl-samsung.h
+++ b/drivers/pinctrl/samsung/pinctrl-samsung.h
@@ -339,6 +339,7 @@ extern const struct samsung_pinctrl_of_match_data exynos5410_of_data;
 extern const struct samsung_pinctrl_of_match_data exynos5420_of_data;
 extern const struct samsung_pinctrl_of_match_data exynos5433_of_data;
 extern const struct samsung_pinctrl_of_match_data exynos7_of_data;
+extern const struct samsung_pinctrl_of_match_data exynos850_of_data;
 extern const struct samsung_pinctrl_of_match_data s3c64xx_of_data;
 extern const struct samsung_pinctrl_of_match_data s3c2412_of_data;
 extern const struct samsung_pinctrl_of_match_data s3c2416_of_data;
diff --git a/drivers/pinctrl/stm32/Kconfig b/drivers/pinctrl/stm32/Kconfig
index f36f29113370..d532f3c6f670 100644
--- a/drivers/pinctrl/stm32/Kconfig
+++ b/drivers/pinctrl/stm32/Kconfig
@@ -40,6 +40,12 @@ config PINCTRL_STM32H743
 	default MACH_STM32H743
 	select PINCTRL_STM32
 
+config PINCTRL_STM32MP135
+	bool "STMicroelectronics STM32MP135 pin control" if COMPILE_TEST && !MACH_STM32MP13
+	depends on OF && HAS_IOMEM
+	default MACH_STM32MP13
+	select PINCTRL_STM32
+
 config PINCTRL_STM32MP157
 	bool "STMicroelectronics STM32MP157 pin control" if COMPILE_TEST && !MACH_STM32MP157
 	depends on OF && HAS_IOMEM
diff --git a/drivers/pinctrl/stm32/Makefile b/drivers/pinctrl/stm32/Makefile
index f7c56d4b941c..619629ee9944 100644
--- a/drivers/pinctrl/stm32/Makefile
+++ b/drivers/pinctrl/stm32/Makefile
@@ -8,4 +8,5 @@ obj-$(CONFIG_PINCTRL_STM32F469)	+= pinctrl-stm32f469.o
 obj-$(CONFIG_PINCTRL_STM32F746)	+= pinctrl-stm32f746.o
 obj-$(CONFIG_PINCTRL_STM32F769)	+= pinctrl-stm32f769.o
 obj-$(CONFIG_PINCTRL_STM32H743)	+= pinctrl-stm32h743.o
+obj-$(CONFIG_PINCTRL_STM32MP135) += pinctrl-stm32mp135.o
 obj-$(CONFIG_PINCTRL_STM32MP157) += pinctrl-stm32mp157.o
diff --git a/drivers/pinctrl/stm32/pinctrl-stm32mp135.c b/drivers/pinctrl/stm32/pinctrl-stm32mp135.c
new file mode 100644
index 000000000000..4ab03520c407
--- /dev/null
+++ b/drivers/pinctrl/stm32/pinctrl-stm32mp135.c
@@ -0,0 +1,1679 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Copyright (C) STMicroelectronics 2021 - All Rights Reserved
+ * Author: Alexandre Torgue <alexandre.torgue@foss.st.com> for STMicroelectronics.
+ */
+#include <linux/init.h>
+#include <linux/of.h>
+#include <linux/platform_device.h>
+
+#include "pinctrl-stm32.h"
+
+static const struct stm32_desc_pin stm32mp135_pins[] = {
+	STM32_PIN(
+		PINCTRL_PIN(0, "PA0"),
+		STM32_FUNCTION(0, "GPIOA0"),
+		STM32_FUNCTION(2, "TIM2_CH1"),
+		STM32_FUNCTION(3, "TIM5_CH1"),
+		STM32_FUNCTION(4, "TIM8_ETR"),
+		STM32_FUNCTION(5, "TIM15_BKIN"),
+		STM32_FUNCTION(7, "SAI1_SD_B"),
+		STM32_FUNCTION(9, "UART5_TX"),
+		STM32_FUNCTION(12, "ETH1_MII_CRS"),
+		STM32_FUNCTION(13, "ETH2_MII_CRS"),
+		STM32_FUNCTION(17, "ANALOG")
+	),
+	STM32_PIN(
+		PINCTRL_PIN(1, "PA1"),
+		STM32_FUNCTION(0, "GPIOA1"),
+		STM32_FUNCTION(2, "TIM2_CH2"),
+		STM32_FUNCTION(3, "TIM5_CH2"),
+		STM32_FUNCTION(4, "LPTIM3_OUT"),
+		STM32_FUNCTION(5, "TIM15_CH1N"),
+		STM32_FUNCTION(7, "DFSDM1_CKIN0"),
+		STM32_FUNCTION(8, "USART2_RTS USART2_DE"),
+		STM32_FUNCTION(12, "ETH1_MII_RX_CLK ETH1_RGMII_RX_CLK ETH1_RMII_REF_CLK"),
+		STM32_FUNCTION(17, "ANALOG")
+	),
+	STM32_PIN(
+		PINCTRL_PIN(2, "PA2"),
+		STM32_FUNCTION(0, "GPIOA2"),
+		STM32_FUNCTION(2, "TIM2_CH3"),
+		STM32_FUNCTION(3, "TIM5_CH3"),
+		STM32_FUNCTION(4, "LPTIM4_OUT"),
+		STM32_FUNCTION(5, "TIM15_CH1"),
+		STM32_FUNCTION(8, "USART2_TX"),
+		STM32_FUNCTION(12, "ETH1_MDIO"),
+		STM32_FUNCTION(17, "ANALOG")
+	),
+	STM32_PIN(
+		PINCTRL_PIN(3, "PA3"),
+		STM32_FUNCTION(0, "GPIOA3"),
+		STM32_FUNCTION(2, "TIM2_CH4"),
+		STM32_FUNCTION(3, "TIM5_CH4"),
+		STM32_FUNCTION(4, "LPTIM5_OUT"),
+		STM32_FUNCTION(5, "TIM15_CH2"),
+		STM32_FUNCTION(6, "SPI1_MOSI I2S1_SDO"),
+		STM32_FUNCTION(7, "SAI1_FS_B"),
+		STM32_FUNCTION(8, "USART2_RX"),
+		STM32_FUNCTION(12, "ETH1_MII_COL"),
+		STM32_FUNCTION(13, "ETH2_MII_COL"),
+		STM32_FUNCTION(17, "ANALOG")
+	),
+	STM32_PIN(
+		PINCTRL_PIN(4, "PA4"),
+		STM32_FUNCTION(0, "GPIOA4"),
+		STM32_FUNCTION(3, "TIM5_ETR"),
+		STM32_FUNCTION(4, "USART2_CK"),
+		STM32_FUNCTION(5, "SAI1_SCK_B"),
+		STM32_FUNCTION(6, "SPI1_NSS I2S1_WS"),
+		STM32_FUNCTION(7, "DFSDM1_CKIN1"),
+		STM32_FUNCTION(11, "ETH1_PPS_OUT"),
+		STM32_FUNCTION(12, "ETH2_PPS_OUT"),
+		STM32_FUNCTION(13, "SAI1_SCK_A"),
+		STM32_FUNCTION(17, "ANALOG")
+	),
+	STM32_PIN(
+		PINCTRL_PIN(5, "PA5"),
+		STM32_FUNCTION(0, "GPIOA5"),
+		STM32_FUNCTION(2, "TIM2_CH1 TIM2_ETR"),
+		STM32_FUNCTION(3, "USART2_CK"),
+		STM32_FUNCTION(4, "TIM8_CH1N"),
+		STM32_FUNCTION(5, "SAI1_D1"),
+		STM32_FUNCTION(6, "SPI1_NSS I2S1_WS"),
+		STM32_FUNCTION(7, "SAI1_SD_A"),
+		STM32_FUNCTION(11, "ETH1_PPS_OUT"),
+		STM32_FUNCTION(12, "ETH2_PPS_OUT"),
+		STM32_FUNCTION(17, "ANALOG")
+	),
+	STM32_PIN(
+		PINCTRL_PIN(6, "PA6"),
+		STM32_FUNCTION(0, "GPIOA6"),
+		STM32_FUNCTION(2, "TIM1_BKIN"),
+		STM32_FUNCTION(3, "TIM3_CH1"),
+		STM32_FUNCTION(4, "TIM8_BKIN"),
+		STM32_FUNCTION(5, "SAI2_CK2"),
+		STM32_FUNCTION(6, "SPI1_MISO I2S1_SDI"),
+		STM32_FUNCTION(8, "USART1_CK"),
+		STM32_FUNCTION(9, "UART4_RTS UART4_DE"),
+		STM32_FUNCTION(10, "TIM13_CH1"),
+		STM32_FUNCTION(13, "SAI2_SCK_A"),
+		STM32_FUNCTION(17, "ANALOG")
+	),
+	STM32_PIN(
+		PINCTRL_PIN(7, "PA7"),
+		STM32_FUNCTION(0, "GPIOA7"),
+		STM32_FUNCTION(2, "TIM1_CH1N"),
+		STM32_FUNCTION(3, "TIM3_CH2"),
+		STM32_FUNCTION(4, "TIM8_CH1N"),
+		STM32_FUNCTION(5, "SAI2_D1"),
+		STM32_FUNCTION(6, "SPI1_SCK I2S1_CK"),
+		STM32_FUNCTION(8, "USART1_CTS USART1_NSS"),
+		STM32_FUNCTION(10, "TIM14_CH1"),
+		STM32_FUNCTION(12, "ETH1_MII_RX_DV ETH1_RGMII_RX_CTL ETH1_RMII_CRS_DV"),
+		STM32_FUNCTION(13, "SAI2_SD_A"),
+		STM32_FUNCTION(17, "ANALOG")
+	),
+	STM32_PIN(
+		PINCTRL_PIN(8, "PA8"),
+		STM32_FUNCTION(0, "GPIOA8"),
+		STM32_FUNCTION(1, "MCO1"),
+		STM32_FUNCTION(3, "SAI2_MCLK_A"),
+		STM32_FUNCTION(4, "TIM8_BKIN2"),
+		STM32_FUNCTION(5, "I2C4_SDA"),
+		STM32_FUNCTION(6, "SPI5_MISO"),
+		STM32_FUNCTION(7, "SAI2_CK1"),
+		STM32_FUNCTION(8, "USART1_CK"),
+		STM32_FUNCTION(9, "SPI2_MOSI I2S2_SDO"),
+		STM32_FUNCTION(11, "OTG_HS_SOF"),
+		STM32_FUNCTION(12, "ETH2_MII_RXD3 ETH2_RGMII_RXD3"),
+		STM32_FUNCTION(13, "FMC_A21"),
+		STM32_FUNCTION(15, "LCD_B7"),
+		STM32_FUNCTION(17, "ANALOG")
+	),
+	STM32_PIN(
+		PINCTRL_PIN(9, "PA9"),
+		STM32_FUNCTION(0, "GPIOA9"),
+		STM32_FUNCTION(2, "TIM1_CH2"),
+		STM32_FUNCTION(5, "I2C3_SMBA"),
+		STM32_FUNCTION(7, "DFSDM1_DATIN0"),
+		STM32_FUNCTION(8, "USART1_TX"),
+		STM32_FUNCTION(9, "UART4_TX"),
+		STM32_FUNCTION(11, "FMC_NWAIT"),
+		STM32_FUNCTION(14, "DCMIPP_D0"),
+		STM32_FUNCTION(15, "LCD_R6"),
+		STM32_FUNCTION(17, "ANALOG")
+	),
+	STM32_PIN(
+		PINCTRL_PIN(10, "PA10"),
+		STM32_FUNCTION(0, "GPIOA10"),
+		STM32_FUNCTION(2, "TIM1_CH3"),
+		STM32_FUNCTION(17, "ANALOG")
+	),
+	STM32_PIN(
+		PINCTRL_PIN(11, "PA11"),
+		STM32_FUNCTION(0, "GPIOA11"),
+		STM32_FUNCTION(2, "TIM1_CH4"),
+		STM32_FUNCTION(5, "I2C5_SCL"),
+		STM32_FUNCTION(6, "SPI2_NSS I2S2_WS"),
+		STM32_FUNCTION(8, "USART1_CTS USART1_NSS"),
+		STM32_FUNCTION(11, "ETH2_MII_RXD1 ETH2_RGMII_RXD1 ETH2_RMII_RXD1"),
+		STM32_FUNCTION(12, "ETH1_CLK"),
+		STM32_FUNCTION(14, "ETH2_CLK"),
+		STM32_FUNCTION(17, "ANALOG")
+	),
+	STM32_PIN(
+		PINCTRL_PIN(12, "PA12"),
+		STM32_FUNCTION(0, "GPIOA12"),
+		STM32_FUNCTION(2, "TIM1_ETR"),
+		STM32_FUNCTION(3, "SAI2_MCLK_A"),
+		STM32_FUNCTION(8, "USART1_RTS USART1_DE"),
+		STM32_FUNCTION(11, "TSC_G1_IO2"),
+		STM32_FUNCTION(12, "ETH2_MII_RX_DV ETH2_RGMII_RX_CTL ETH2_RMII_CRS_DV"),
+		STM32_FUNCTION(13, "FMC_A7"),
+		STM32_FUNCTION(14, "DCMIPP_D1"),
+		STM32_FUNCTION(15, "LCD_G6"),
+		STM32_FUNCTION(17, "ANALOG")
+	),
+	STM32_PIN(
+		PINCTRL_PIN(13, "PA13"),
+		STM32_FUNCTION(0, "GPIOA13"),
+		STM32_FUNCTION(1, "DBTRGO"),
+		STM32_FUNCTION(2, "DBTRGI"),
+		STM32_FUNCTION(3, "MCO1"),
+		STM32_FUNCTION(9, "UART4_TX"),
+		STM32_FUNCTION(17, "ANALOG")
+	),
+	STM32_PIN(
+		PINCTRL_PIN(14, "PA14"),
+		STM32_FUNCTION(0, "GPIOA14"),
+		STM32_FUNCTION(1, "DBTRGO"),
+		STM32_FUNCTION(2, "DBTRGI"),
+		STM32_FUNCTION(3, "MCO2"),
+		STM32_FUNCTION(11, "OTG_HS_SOF"),
+		STM32_FUNCTION(17, "ANALOG")
+	),
+	STM32_PIN(
+		PINCTRL_PIN(15, "PA15"),
+		STM32_FUNCTION(0, "GPIOA15"),
+		STM32_FUNCTION(1, "TRACED5"),
+		STM32_FUNCTION(2, "TIM2_CH1"),
+		STM32_FUNCTION(6, "I2S4_MCK"),
+		STM32_FUNCTION(8, "UART4_RTS UART4_DE"),
+		STM32_FUNCTION(9, "UART4_RX"),
+		STM32_FUNCTION(10, "LCD_R0"),
+		STM32_FUNCTION(11, "TSC_G3_IO1"),
+		STM32_FUNCTION(12, "LCD_G7"),
+		STM32_FUNCTION(13, "FMC_A9"),
+		STM32_FUNCTION(14, "DCMIPP_D14"),
+		STM32_FUNCTION(15, "DCMIPP_D5"),
+		STM32_FUNCTION(16, "HDP5"),
+		STM32_FUNCTION(17, "ANALOG")
+	),
+	STM32_PIN(
+		PINCTRL_PIN(16, "PB0"),
+		STM32_FUNCTION(0, "GPIOB0"),
+		STM32_FUNCTION(1, "DBTRGI"),
+		STM32_FUNCTION(2, "TIM1_CH2N"),
+		STM32_FUNCTION(3, "TIM3_CH3"),
+		STM32_FUNCTION(4, "TIM8_CH2N"),
+		STM32_FUNCTION(5, "USART1_RX"),
+		STM32_FUNCTION(6, "I2S1_MCK"),
+		STM32_FUNCTION(7, "SAI2_FS_A"),
+		STM32_FUNCTION(8, "USART1_CK"),
+		STM32_FUNCTION(9, "UART4_CTS"),
+		STM32_FUNCTION(11, "SAI2_D2"),
+		STM32_FUNCTION(12, "ETH1_MII_RXD2 ETH1_RGMII_RXD2"),
+		STM32_FUNCTION(17, "ANALOG")
+	),
+	STM32_PIN(
+		PINCTRL_PIN(17, "PB1"),
+		STM32_FUNCTION(0, "GPIOB1"),
+		STM32_FUNCTION(2, "TIM1_CH3N"),
+		STM32_FUNCTION(3, "TIM3_CH4"),
+		STM32_FUNCTION(4, "TIM8_CH3N"),
+		STM32_FUNCTION(6, "SPI1_SCK I2S1_CK"),
+		STM32_FUNCTION(7, "DFSDM1_DATIN1"),
+		STM32_FUNCTION(8, "UART4_RX"),
+		STM32_FUNCTION(12, "ETH1_MII_RXD3 ETH1_RGMII_RXD3"),
+		STM32_FUNCTION(17, "ANALOG")
+	),
+	STM32_PIN(
+		PINCTRL_PIN(18, "PB2"),
+		STM32_FUNCTION(0, "GPIOB2"),
+		STM32_FUNCTION(2, "RTC_OUT2"),
+		STM32_FUNCTION(3, "SAI1_D1"),
+		STM32_FUNCTION(6, "I2S_CKIN"),
+		STM32_FUNCTION(7, "SAI1_SD_A"),
+		STM32_FUNCTION(9, "UART4_RX"),
+		STM32_FUNCTION(10, "QUADSPI_BK1_NCS"),
+		STM32_FUNCTION(12, "ETH2_MDIO"),
+		STM32_FUNCTION(13, "FMC_A6"),
+		STM32_FUNCTION(15, "LCD_B4"),
+		STM32_FUNCTION(17, "ANALOG")
+	),
+	STM32_PIN(
+		PINCTRL_PIN(19, "PB3"),
+		STM32_FUNCTION(0, "GPIOB3"),
+		STM32_FUNCTION(1, "TRACED2"),
+		STM32_FUNCTION(2, "TIM2_CH2"),
+		STM32_FUNCTION(5, "SAI2_CK1"),
+		STM32_FUNCTION(6, "SPI4_NSS I2S4_WS"),
+		STM32_FUNCTION(9, "SDMMC1_D123DIR"),
+		STM32_FUNCTION(11, "SDMMC2_D2"),
+		STM32_FUNCTION(12, "LCD_R6"),
+		STM32_FUNCTION(13, "SAI2_MCLK_A"),
+		STM32_FUNCTION(14, "UART7_RX"),
+		STM32_FUNCTION(15, "LCD_B2"),
+		STM32_FUNCTION(17, "ANALOG")
+	),
+	STM32_PIN(
+		PINCTRL_PIN(20, "PB4"),
+		STM32_FUNCTION(0, "GPIOB4"),
+		STM32_FUNCTION(1, "TRACED14"),
+		STM32_FUNCTION(2, "TIM16_BKIN"),
+		STM32_FUNCTION(3, "TIM3_CH1"),
+		STM32_FUNCTION(5, "SAI2_CK2"),
+		STM32_FUNCTION(6, "SPI4_SCK I2S4_CK"),
+		STM32_FUNCTION(8, "USART3_CK"),
+		STM32_FUNCTION(11, "SDMMC2_D3"),
+		STM32_FUNCTION(12, "LCD_G1"),
+		STM32_FUNCTION(13, "SAI2_SCK_A"),
+		STM32_FUNCTION(14, "LCD_B6"),
+		STM32_FUNCTION(15, "LCD_R0"),
+		STM32_FUNCTION(17, "ANALOG")
+	),
+	STM32_PIN(
+		PINCTRL_PIN(21, "PB5"),
+		STM32_FUNCTION(0, "GPIOB5"),
+		STM32_FUNCTION(1, "TRACED4"),
+		STM32_FUNCTION(2, "TIM17_BKIN"),
+		STM32_FUNCTION(3, "TIM3_CH2"),
+		STM32_FUNCTION(6, "SPI2_MISO I2S2_SDI"),
+		STM32_FUNCTION(7, "I2C4_SMBA"),
+		STM32_FUNCTION(9, "SDMMC1_CKIN"),
+		STM32_FUNCTION(10, "FDCAN2_RX"),
+		STM32_FUNCTION(12, "UART5_RX"),
+		STM32_FUNCTION(14, "LCD_B6"),
+		STM32_FUNCTION(15, "LCD_DE"),
+		STM32_FUNCTION(17, "ANALOG")
+	),
+	STM32_PIN(
+		PINCTRL_PIN(22, "PB6"),
+		STM32_FUNCTION(0, "GPIOB6"),
+		STM32_FUNCTION(1, "TRACED6"),
+		STM32_FUNCTION(2, "TIM16_CH1N"),
+		STM32_FUNCTION(3, "TIM4_CH1"),
+		STM32_FUNCTION(4, "TIM8_CH1"),
+		STM32_FUNCTION(5, "USART1_TX"),
+		STM32_FUNCTION(7, "SAI1_CK2"),
+		STM32_FUNCTION(8, "LCD_B6"),
+		STM32_FUNCTION(10, "QUADSPI_BK1_NCS"),
+		STM32_FUNCTION(11, "TSC_G1_IO4"),
+		STM32_FUNCTION(12, "ETH2_MDIO"),
+		STM32_FUNCTION(13, "FMC_NE3"),
+		STM32_FUNCTION(14, "DCMIPP_D5"),
+		STM32_FUNCTION(15, "LCD_B7"),
+		STM32_FUNCTION(16, "HDP6"),
+		STM32_FUNCTION(17, "ANALOG")
+	),
+	STM32_PIN(
+		PINCTRL_PIN(23, "PB7"),
+		STM32_FUNCTION(0, "GPIOB7"),
+		STM32_FUNCTION(2, "TIM17_CH1N"),
+		STM32_FUNCTION(3, "TIM4_CH2"),
+		STM32_FUNCTION(4, "TSC_SYNC"),
+		STM32_FUNCTION(6, "I2S4_CK"),
+		STM32_FUNCTION(7, "I2C4_SDA"),
+		STM32_FUNCTION(11, "FMC_NCE2"),
+		STM32_FUNCTION(13, "FMC_NL"),
+		STM32_FUNCTION(14, "DCMIPP_D13"),
+		STM32_FUNCTION(15, "DCMIPP_PIXCLK"),
+		STM32_FUNCTION(17, "ANALOG")
+	),
+	STM32_PIN(
+		PINCTRL_PIN(24, "PB8"),
+		STM32_FUNCTION(0, "GPIOB8"),
+		STM32_FUNCTION(2, "TIM16_CH1"),
+		STM32_FUNCTION(3, "TIM4_CH3"),
+		STM32_FUNCTION(5, "I2C1_SCL"),
+		STM32_FUNCTION(6, "I2C3_SCL"),
+		STM32_FUNCTION(7, "DFSDM1_DATIN1"),
+		STM32_FUNCTION(9, "UART4_RX"),
+		STM32_FUNCTION(11, "SAI1_D1"),
+		STM32_FUNCTION(13, "FMC_D13 FMC_AD13"),
+		STM32_FUNCTION(14, "DCMIPP_D6"),
+		STM32_FUNCTION(17, "ANALOG")
+	),
+	STM32_PIN(
+		PINCTRL_PIN(25, "PB9"),
+		STM32_FUNCTION(0, "GPIOB9"),
+		STM32_FUNCTION(1, "TRACED3"),
+		STM32_FUNCTION(3, "TIM4_CH4"),
+		STM32_FUNCTION(7, "I2C4_SDA"),
+		STM32_FUNCTION(10, "FDCAN1_TX"),
+		STM32_FUNCTION(11, "SDMMC2_D5"),
+		STM32_FUNCTION(12, "UART5_TX"),
+		STM32_FUNCTION(13, "SDMMC1_CDIR"),
+		STM32_FUNCTION(14, "LCD_DE"),
+		STM32_FUNCTION(15, "LCD_B1"),
+		STM32_FUNCTION(17, "ANALOG")
+	),
+	STM32_PIN(
+		PINCTRL_PIN(26, "PB10"),
+		STM32_FUNCTION(0, "GPIOB10"),
+		STM32_FUNCTION(2, "TIM2_CH3"),
+		STM32_FUNCTION(4, "LPTIM2_IN1"),
+		STM32_FUNCTION(5, "I2C5_SMBA"),
+		STM32_FUNCTION(6, "SPI4_NSS I2S4_WS"),
+		STM32_FUNCTION(7, "SPI2_SCK I2S2_CK"),
+		STM32_FUNCTION(8, "USART3_TX"),
+		STM32_FUNCTION(15, "LCD_R3"),
+		STM32_FUNCTION(17, "ANALOG")
+	),
+	STM32_PIN(
+		PINCTRL_PIN(27, "PB11"),
+		STM32_FUNCTION(0, "GPIOB11"),
+		STM32_FUNCTION(2, "TIM2_CH4"),
+		STM32_FUNCTION(4, "LPTIM1_OUT"),
+		STM32_FUNCTION(5, "I2C5_SMBA"),
+		STM32_FUNCTION(8, "USART3_RX"),
+		STM32_FUNCTION(12, "ETH1_MII_TX_EN ETH1_RGMII_TX_CTL ETH1_RMII_TX_EN"),
+		STM32_FUNCTION(17, "ANALOG")
+	),
+	STM32_PIN(
+		PINCTRL_PIN(28, "PB12"),
+		STM32_FUNCTION(0, "GPIOB12"),
+		STM32_FUNCTION(1, "TRACED10"),
+		STM32_FUNCTION(5, "I2C2_SMBA"),
+		STM32_FUNCTION(7, "DFSDM1_DATIN1"),
+		STM32_FUNCTION(8, "UART7_RTS UART7_DE"),
+		STM32_FUNCTION(9, "USART3_RX"),
+		STM32_FUNCTION(12, "UART5_RX"),
+		STM32_FUNCTION(13, "SDMMC1_D5"),
+		STM32_FUNCTION(14, "LCD_R3"),
+		STM32_FUNCTION(15, "LCD_VSYNC"),
+		STM32_FUNCTION(17, "ANALOG")
+	),
+	STM32_PIN(
+		PINCTRL_PIN(29, "PB13"),
+		STM32_FUNCTION(0, "GPIOB13"),
+		STM32_FUNCTION(1, "TRACECLK"),
+		STM32_FUNCTION(2, "TIM1_CH1N"),
+		STM32_FUNCTION(5, "LPTIM2_OUT"),
+		STM32_FUNCTION(6, "SPI2_NSS I2S2_WS"),
+		STM32_FUNCTION(7, "I2C4_SCL"),
+		STM32_FUNCTION(9, "SDMMC1_D123DIR"),
+		STM32_FUNCTION(10, "FDCAN2_TX"),
+		STM32_FUNCTION(12, "UART5_TX"),
+		STM32_FUNCTION(14, "LCD_CLK"),
+		STM32_FUNCTION(17, "ANALOG")
+	),
+	STM32_PIN(
+		PINCTRL_PIN(30, "PB14"),
+		STM32_FUNCTION(0, "GPIOB14"),
+		STM32_FUNCTION(1, "TRACED0"),
+		STM32_FUNCTION(2, "TIM1_CH2N"),
+		STM32_FUNCTION(3, "TIM12_CH1"),
+		STM32_FUNCTION(4, "TIM8_CH2N"),
+		STM32_FUNCTION(5, "USART1_TX"),
+		STM32_FUNCTION(11, "SDMMC2_D0"),
+		STM32_FUNCTION(12, "SDMMC1_D4"),
+		STM32_FUNCTION(14, "LCD_R0"),
+		STM32_FUNCTION(15, "LCD_G5"),
+		STM32_FUNCTION(17, "ANALOG")
+	),
+	STM32_PIN(
+		PINCTRL_PIN(31, "PB15"),
+		STM32_FUNCTION(0, "GPIOB15"),
+		STM32_FUNCTION(1, "RTC_REFIN"),
+		STM32_FUNCTION(2, "TIM1_CH3N"),
+		STM32_FUNCTION(3, "TIM12_CH2"),
+		STM32_FUNCTION(4, "TIM8_CH3N"),
+		STM32_FUNCTION(5, "SAI2_D2"),
+		STM32_FUNCTION(6, "SPI4_MOSI I2S4_SDO"),
+		STM32_FUNCTION(7, "DFSDM1_CKIN2"),
+		STM32_FUNCTION(8, "UART7_CTS"),
+		STM32_FUNCTION(9, "SDMMC1_CKIN"),
+		STM32_FUNCTION(11, "SDMMC2_D1"),
+		STM32_FUNCTION(13, "SAI2_FS_A"),
+		STM32_FUNCTION(14, "LCD_CLK"),
+		STM32_FUNCTION(15, "LCD_B0"),
+		STM32_FUNCTION(17, "ANALOG")
+	),
+	STM32_PIN(
+		PINCTRL_PIN(32, "PC0"),
+		STM32_FUNCTION(0, "GPIOC0"),
+		STM32_FUNCTION(3, "SAI1_SCK_A"),
+		STM32_FUNCTION(5, "SAI1_CK2"),
+		STM32_FUNCTION(6, "I2S1_MCK"),
+		STM32_FUNCTION(7, "SPI1_MOSI I2S1_SDO"),
+		STM32_FUNCTION(8, "USART1_TX"),
+		STM32_FUNCTION(17, "ANALOG")
+	),
+	STM32_PIN(
+		PINCTRL_PIN(33, "PC1"),
+		STM32_FUNCTION(0, "GPIOC1"),
+		STM32_FUNCTION(4, "DFSDM1_DATIN0"),
+		STM32_FUNCTION(7, "SAI1_D3"),
+		STM32_FUNCTION(11, "ETH1_MII_RX_DV ETH1_RMII_CRS_DV"),
+		STM32_FUNCTION(12, "ETH1_RGMII_GTX_CLK"),
+		STM32_FUNCTION(17, "ANALOG")
+	),
+	STM32_PIN(
+		PINCTRL_PIN(34, "PC2"),
+		STM32_FUNCTION(0, "GPIOC2"),
+		STM32_FUNCTION(2, "SPI5_NSS"),
+		STM32_FUNCTION(6, "SPI1_NSS I2S1_WS"),
+		STM32_FUNCTION(7, "SAI2_MCLK_A"),
+		STM32_FUNCTION(8, "USART1_RTS USART1_DE"),
+		STM32_FUNCTION(11, "SAI2_CK1"),
+		STM32_FUNCTION(12, "ETH1_MII_TXD2 ETH1_RGMII_TXD2"),
+		STM32_FUNCTION(17, "ANALOG")
+	),
+	STM32_PIN(
+		PINCTRL_PIN(35, "PC3"),
+		STM32_FUNCTION(0, "GPIOC3"),
+		STM32_FUNCTION(3, "SAI1_CK1"),
+		STM32_FUNCTION(4, "DFSDM1_CKOUT"),
+		STM32_FUNCTION(6, "SPI1_MISO I2S1_SDI"),
+		STM32_FUNCTION(7, "SPI1_SCK I2S1_CK"),
+		STM32_FUNCTION(9, "UART5_CTS"),
+		STM32_FUNCTION(11, "SAI1_MCLK_A"),
+		STM32_FUNCTION(12, "ETH1_MII_TX_CLK"),
+		STM32_FUNCTION(13, "ETH2_MII_TX_CLK"),
+		STM32_FUNCTION(17, "ANALOG")
+	),
+	STM32_PIN(
+		PINCTRL_PIN(36, "PC4"),
+		STM32_FUNCTION(0, "GPIOC4"),
+		STM32_FUNCTION(3, "TIM3_ETR"),
+		STM32_FUNCTION(4, "DFSDM1_CKIN2"),
+		STM32_FUNCTION(5, "SAI1_D3"),
+		STM32_FUNCTION(6, "I2S1_MCK"),
+		STM32_FUNCTION(9, "UART5_RTS UART5_DE"),
+		STM32_FUNCTION(10, "SPDIFRX_IN2"),
+		STM32_FUNCTION(12, "ETH1_MII_RXD0 ETH1_RGMII_RXD0 ETH1_RMII_RXD0"),
+		STM32_FUNCTION(13, "SAI2_D3"),
+		STM32_FUNCTION(17, "ANALOG")
+	),
+	STM32_PIN(
+		PINCTRL_PIN(37, "PC5"),
+		STM32_FUNCTION(0, "GPIOC5"),
+		STM32_FUNCTION(4, "DFSDM1_DATIN2"),
+		STM32_FUNCTION(5, "SAI2_D4"),
+		STM32_FUNCTION(6, "I2S_CKIN"),
+		STM32_FUNCTION(7, "SAI1_D4"),
+		STM32_FUNCTION(8, "USART2_CTS USART2_NSS"),
+		STM32_FUNCTION(10, "SPDIFRX_IN3"),
+		STM32_FUNCTION(12, "ETH1_MII_RXD1 ETH1_RGMII_RXD1 ETH1_RMII_RXD1"),
+		STM32_FUNCTION(17, "ANALOG")
+	),
+	STM32_PIN(
+		PINCTRL_PIN(38, "PC6"),
+		STM32_FUNCTION(0, "GPIOC6"),
+		STM32_FUNCTION(1, "TRACED2"),
+		STM32_FUNCTION(3, "TIM3_CH1"),
+		STM32_FUNCTION(4, "TIM8_CH1"),
+		STM32_FUNCTION(5, "DFSDM1_DATIN0"),
+		STM32_FUNCTION(6, "I2S3_MCK"),
+		STM32_FUNCTION(8, "USART6_TX"),
+		STM32_FUNCTION(9, "SDMMC1_D6"),
+		STM32_FUNCTION(10, "SDMMC2_D0DIR"),
+		STM32_FUNCTION(11, "SDMMC2_D6"),
+		STM32_FUNCTION(12, "LCD_B1"),
+		STM32_FUNCTION(13, "FMC_A19"),
+		STM32_FUNCTION(14, "LCD_R6"),
+		STM32_FUNCTION(15, "LCD_HSYNC"),
+		STM32_FUNCTION(16, "HDP2"),
+		STM32_FUNCTION(17, "ANALOG")
+	),
+	STM32_PIN(
+		PINCTRL_PIN(39, "PC7"),
+		STM32_FUNCTION(0, "GPIOC7"),
+		STM32_FUNCTION(1, "TRACED4"),
+		STM32_FUNCTION(3, "TIM3_CH2"),
+		STM32_FUNCTION(4, "TIM8_CH2"),
+		STM32_FUNCTION(7, "I2S2_MCK"),
+		STM32_FUNCTION(8, "USART6_RX"),
+		STM32_FUNCTION(9, "USART3_CTS"),
+		STM32_FUNCTION(10, "SDMMC2_CDIR"),
+		STM32_FUNCTION(11, "SDMMC2_D7"),
+		STM32_FUNCTION(12, "LCD_R1"),
+		STM32_FUNCTION(13, "SDMMC1_D7"),
+		STM32_FUNCTION(15, "LCD_G6"),
+		STM32_FUNCTION(16, "HDP4"),
+		STM32_FUNCTION(17, "ANALOG")
+	),
+	STM32_PIN(
+		PINCTRL_PIN(40, "PC8"),
+		STM32_FUNCTION(0, "GPIOC8"),
+		STM32_FUNCTION(1, "TRACED0"),
+		STM32_FUNCTION(3, "TIM3_CH3"),
+		STM32_FUNCTION(4, "TIM8_CH3"),
+		STM32_FUNCTION(6, "SPI3_MISO I2S3_SDI"),
+		STM32_FUNCTION(8, "USART6_CK"),
+		STM32_FUNCTION(9, "USART3_CTS"),
+		STM32_FUNCTION(11, "SAI2_FS_B"),
+		STM32_FUNCTION(12, "UART5_RTS UART5_DE"),
+		STM32_FUNCTION(13, "SDMMC1_D0"),
+		STM32_FUNCTION(15, "LCD_G7"),
+		STM32_FUNCTION(17, "ANALOG")
+	),
+	STM32_PIN(
+		PINCTRL_PIN(41, "PC9"),
+		STM32_FUNCTION(0, "GPIOC9"),
+		STM32_FUNCTION(1, "TRACED1"),
+		STM32_FUNCTION(3, "TIM3_CH4"),
+		STM32_FUNCTION(4, "TIM8_CH4"),
+		STM32_FUNCTION(8, "USART3_RTS"),
+		STM32_FUNCTION(9, "UART5_CTS"),
+		STM32_FUNCTION(10, "FDCAN1_TX"),
+		STM32_FUNCTION(13, "SDMMC1_D1"),
+		STM32_FUNCTION(15, "LCD_B4"),
+		STM32_FUNCTION(17, "ANALOG")
+	),
+	STM32_PIN(
+		PINCTRL_PIN(42, "PC10"),
+		STM32_FUNCTION(0, "GPIOC10"),
+		STM32_FUNCTION(1, "TRACED2"),
+		STM32_FUNCTION(6, "I2C1_SCL"),
+		STM32_FUNCTION(7, "SPI3_SCK I2S3_CK"),
+		STM32_FUNCTION(8, "USART3_TX"),
+		STM32_FUNCTION(11, "SAI2_MCLK_B"),
+		STM32_FUNCTION(13, "SDMMC1_D2"),
+		STM32_FUNCTION(17, "ANALOG")
+	),
+	STM32_PIN(
+		PINCTRL_PIN(43, "PC11"),
+		STM32_FUNCTION(0, "GPIOC11"),
+		STM32_FUNCTION(1, "TRACED3"),
+		STM32_FUNCTION(5, "I2C1_SDA"),
+		STM32_FUNCTION(7, "SPI3_MOSI I2S3_SDO"),
+		STM32_FUNCTION(8, "USART3_CK"),
+		STM32_FUNCTION(9, "UART5_RX"),
+		STM32_FUNCTION(11, "SAI2_SCK_B"),
+		STM32_FUNCTION(13, "SDMMC1_D3"),
+		STM32_FUNCTION(17, "ANALOG")
+	),
+	STM32_PIN(
+		PINCTRL_PIN(44, "PC12"),
+		STM32_FUNCTION(0, "GPIOC12"),
+		STM32_FUNCTION(1, "TRACECLK"),
+		STM32_FUNCTION(9, "UART7_TX"),
+		STM32_FUNCTION(11, "SAI2_SD_B"),
+		STM32_FUNCTION(13, "SDMMC1_CK"),
+		STM32_FUNCTION(15, "LCD_DE"),
+		STM32_FUNCTION(17, "ANALOG")
+	),
+	STM32_PIN(
+		PINCTRL_PIN(45, "PC13"),
+		STM32_FUNCTION(0, "GPIOC13"),
+		STM32_FUNCTION(17, "ANALOG")
+	),
+	STM32_PIN(
+		PINCTRL_PIN(46, "PC14"),
+		STM32_FUNCTION(0, "GPIOC14"),
+		STM32_FUNCTION(17, "ANALOG")
+	),
+	STM32_PIN(
+		PINCTRL_PIN(47, "PC15"),
+		STM32_FUNCTION(0, "GPIOC15"),
+		STM32_FUNCTION(17, "ANALOG")
+	),
+	STM32_PIN(
+		PINCTRL_PIN(48, "PD0"),
+		STM32_FUNCTION(0, "GPIOD0"),
+		STM32_FUNCTION(3, "SAI1_MCLK_A"),
+		STM32_FUNCTION(7, "SAI1_CK1"),
+		STM32_FUNCTION(10, "FDCAN1_RX"),
+		STM32_FUNCTION(13, "FMC_D2 FMC_AD2"),
+		STM32_FUNCTION(14, "DCMIPP_D1"),
+		STM32_FUNCTION(17, "ANALOG")
+	),
+	STM32_PIN(
+		PINCTRL_PIN(49, "PD1"),
+		STM32_FUNCTION(0, "GPIOD1"),
+		STM32_FUNCTION(5, "I2C5_SCL"),
+		STM32_FUNCTION(6, "SPI4_MOSI I2S4_SDO"),
+		STM32_FUNCTION(9, "UART4_TX"),
+		STM32_FUNCTION(10, "QUADSPI_BK1_NCS"),
+		STM32_FUNCTION(12, "LCD_B6"),
+		STM32_FUNCTION(13, "FMC_D3 FMC_AD3"),
+		STM32_FUNCTION(14, "DCMIPP_D13"),
+		STM32_FUNCTION(15, "LCD_G2"),
+		STM32_FUNCTION(17, "ANALOG")
+	),
+	STM32_PIN(
+		PINCTRL_PIN(50, "PD2"),
+		STM32_FUNCTION(0, "GPIOD2"),
+		STM32_FUNCTION(1, "TRACED4"),
+		STM32_FUNCTION(3, "TIM3_ETR"),
+		STM32_FUNCTION(5, "I2C1_SMBA"),
+		STM32_FUNCTION(6, "SPI3_NSS I2S3_WS"),
+		STM32_FUNCTION(7, "SAI2_D1"),
+		STM32_FUNCTION(8, "USART3_RX"),
+		STM32_FUNCTION(13, "SDMMC1_CMD"),
+		STM32_FUNCTION(17, "ANALOG")
+	),
+	STM32_PIN(
+		PINCTRL_PIN(51, "PD3"),
+		STM32_FUNCTION(0, "GPIOD3"),
+		STM32_FUNCTION(3, "TIM2_CH1"),
+		STM32_FUNCTION(4, "USART2_CTS USART2_NSS"),
+		STM32_FUNCTION(5, "DFSDM1_CKOUT"),
+		STM32_FUNCTION(6, "I2C1_SDA"),
+		STM32_FUNCTION(7, "SAI1_D3"),
+		STM32_FUNCTION(13, "FMC_CLK"),
+		STM32_FUNCTION(14, "DCMIPP_D5"),
+		STM32_FUNCTION(17, "ANALOG")
+	),
+	STM32_PIN(
+		PINCTRL_PIN(52, "PD4"),
+		STM32_FUNCTION(0, "GPIOD4"),
+		STM32_FUNCTION(4, "USART2_RTS USART2_DE"),
+		STM32_FUNCTION(6, "SPI3_MISO I2S3_SDI"),
+		STM32_FUNCTION(7, "DFSDM1_CKIN0"),
+		STM32_FUNCTION(10, "QUADSPI_CLK"),
+		STM32_FUNCTION(12, "LCD_R1"),
+		STM32_FUNCTION(13, "FMC_NOE"),
+		STM32_FUNCTION(14, "LCD_R4"),
+		STM32_FUNCTION(15, "LCD_R6"),
+		STM32_FUNCTION(17, "ANALOG")
+	),
+	STM32_PIN(
+		PINCTRL_PIN(53, "PD5"),
+		STM32_FUNCTION(0, "GPIOD5"),
+		STM32_FUNCTION(10, "QUADSPI_BK1_IO0"),
+		STM32_FUNCTION(13, "FMC_NWE"),
+		STM32_FUNCTION(14, "LCD_B0"),
+		STM32_FUNCTION(15, "LCD_G4"),
+		STM32_FUNCTION(17, "ANALOG")
+	),
+	STM32_PIN(
+		PINCTRL_PIN(54, "PD6"),
+		STM32_FUNCTION(0, "GPIOD6"),
+		STM32_FUNCTION(2, "TIM16_CH1N"),
+		STM32_FUNCTION(3, "SAI1_D1"),
+		STM32_FUNCTION(7, "SAI1_SD_A"),
+		STM32_FUNCTION(9, "UART4_TX"),
+		STM32_FUNCTION(12, "TSC_G2_IO1"),
+		STM32_FUNCTION(14, "DCMIPP_D4"),
+		STM32_FUNCTION(15, "DCMIPP_D0"),
+		STM32_FUNCTION(17, "ANALOG")
+	),
+	STM32_PIN(
+		PINCTRL_PIN(55, "PD7"),
+		STM32_FUNCTION(0, "GPIOD7"),
+		STM32_FUNCTION(1, "MCO1"),
+		STM32_FUNCTION(4, "USART2_CK"),
+		STM32_FUNCTION(5, "I2C2_SCL"),
+		STM32_FUNCTION(6, "I2C3_SDA"),
+		STM32_FUNCTION(10, "SPDIFRX_IN0"),
+		STM32_FUNCTION(11, "ETH1_MII_RX_CLK ETH1_RGMII_RX_CLK ETH1_RMII_REF_CLK"),
+		STM32_FUNCTION(12, "QUADSPI_BK1_IO2"),
+		STM32_FUNCTION(13, "FMC_NE1"),
+		STM32_FUNCTION(17, "ANALOG")
+	),
+	STM32_PIN(
+		PINCTRL_PIN(56, "PD8"),
+		STM32_FUNCTION(0, "GPIOD8"),
+		STM32_FUNCTION(4, "USART2_TX"),
+		STM32_FUNCTION(6, "I2S4_WS"),
+		STM32_FUNCTION(8, "USART3_TX"),
+		STM32_FUNCTION(9, "UART4_RX"),
+		STM32_FUNCTION(11, "TSC_G1_IO3"),
+		STM32_FUNCTION(14, "DCMIPP_D9"),
+		STM32_FUNCTION(15, "DCMIPP_D3"),
+		STM32_FUNCTION(17, "ANALOG")
+	),
+	STM32_PIN(
+		PINCTRL_PIN(57, "PD9"),
+		STM32_FUNCTION(0, "GPIOD9"),
+		STM32_FUNCTION(1, "TRACECLK"),
+		STM32_FUNCTION(4, "DFSDM1_DATIN3"),
+		STM32_FUNCTION(11, "SDMMC2_CDIR"),
+		STM32_FUNCTION(12, "LCD_B5"),
+		STM32_FUNCTION(13, "FMC_D14 FMC_AD14"),
+		STM32_FUNCTION(14, "LCD_CLK"),
+		STM32_FUNCTION(15, "LCD_B0"),
+		STM32_FUNCTION(17, "ANALOG")
+	),
+	STM32_PIN(
+		PINCTRL_PIN(58, "PD10"),
+		STM32_FUNCTION(0, "GPIOD10"),
+		STM32_FUNCTION(1, "RTC_REFIN"),
+		STM32_FUNCTION(5, "I2C5_SMBA"),
+		STM32_FUNCTION(6, "SPI4_NSS I2S4_WS"),
+		STM32_FUNCTION(8, "USART3_CK"),
+		STM32_FUNCTION(10, "LCD_G5"),
+		STM32_FUNCTION(11, "TSC_G2_IO2"),
+		STM32_FUNCTION(12, "LCD_B7"),
+		STM32_FUNCTION(13, "FMC_D15 FMC_AD15"),
+		STM32_FUNCTION(14, "DCMIPP_VSYNC"),
+		STM32_FUNCTION(15, "LCD_B2"),
+		STM32_FUNCTION(17, "ANALOG")
+	),
+	STM32_PIN(
+		PINCTRL_PIN(59, "PD11"),
+		STM32_FUNCTION(0, "GPIOD11"),
+		STM32_FUNCTION(4, "LPTIM2_IN2"),
+		STM32_FUNCTION(5, "I2C4_SMBA"),
+		STM32_FUNCTION(8, "USART3_CTS USART3_NSS"),
+		STM32_FUNCTION(9, "SPDIFRX_IN0"),
+		STM32_FUNCTION(10, "QUADSPI_BK1_IO2"),
+		STM32_FUNCTION(11, "ETH2_RGMII_CLK125"),
+		STM32_FUNCTION(12, "LCD_R7"),
+		STM32_FUNCTION(13, "FMC_CLE FMC_A16"),
+		STM32_FUNCTION(14, "UART7_RX"),
+		STM32_FUNCTION(15, "DCMIPP_D4"),
+		STM32_FUNCTION(17, "ANALOG")
+	),
+	STM32_PIN(
+		PINCTRL_PIN(60, "PD12"),
+		STM32_FUNCTION(0, "GPIOD12"),
+		STM32_FUNCTION(2, "LPTIM1_IN1"),
+		STM32_FUNCTION(3, "TIM4_CH1"),
+		STM32_FUNCTION(6, "I2C1_SCL"),
+		STM32_FUNCTION(8, "USART3_RTS USART3_DE"),
+		STM32_FUNCTION(13, "FMC_ALE FMC_A17"),
+		STM32_FUNCTION(14, "DCMIPP_D6"),
+		STM32_FUNCTION(17, "ANALOG")
+	),
+	STM32_PIN(
+		PINCTRL_PIN(61, "PD13"),
+		STM32_FUNCTION(0, "GPIOD13"),
+		STM32_FUNCTION(2, "LPTIM2_ETR"),
+		STM32_FUNCTION(3, "TIM4_CH2"),
+		STM32_FUNCTION(4, "TIM8_CH2"),
+		STM32_FUNCTION(5, "SAI1_CK1"),
+		STM32_FUNCTION(7, "SAI1_MCLK_A"),
+		STM32_FUNCTION(8, "USART1_RX"),
+		STM32_FUNCTION(10, "QUADSPI_BK1_IO3"),
+		STM32_FUNCTION(11, "TSC_G2_IO4"),
+		STM32_FUNCTION(12, "QUADSPI_BK2_IO2"),
+		STM32_FUNCTION(13, "FMC_A18"),
+		STM32_FUNCTION(15, "LCD_G4"),
+		STM32_FUNCTION(17, "ANALOG")
+	),
+	STM32_PIN(
+		PINCTRL_PIN(62, "PD14"),
+		STM32_FUNCTION(0, "GPIOD14"),
+		STM32_FUNCTION(3, "TIM4_CH3"),
+		STM32_FUNCTION(5, "I2C3_SDA"),
+		STM32_FUNCTION(8, "USART1_RX"),
+		STM32_FUNCTION(9, "UART8_CTS"),
+		STM32_FUNCTION(13, "FMC_D0 FMC_AD0"),
+		STM32_FUNCTION(14, "DCMIPP_D8"),
+		STM32_FUNCTION(15, "LCD_R4"),
+		STM32_FUNCTION(17, "ANALOG")
+	),
+	STM32_PIN(
+		PINCTRL_PIN(63, "PD15"),
+		STM32_FUNCTION(0, "GPIOD15"),
+		STM32_FUNCTION(2, "USART2_RX"),
+		STM32_FUNCTION(3, "TIM4_CH4"),
+		STM32_FUNCTION(4, "DFSDM1_DATIN2"),
+		STM32_FUNCTION(10, "QUADSPI_BK1_IO3"),
+		STM32_FUNCTION(13, "FMC_D1 FMC_AD1"),
+		STM32_FUNCTION(15, "LCD_B5"),
+		STM32_FUNCTION(17, "ANALOG")
+	),
+	STM32_PIN(
+		PINCTRL_PIN(64, "PE0"),
+		STM32_FUNCTION(0, "GPIOE0"),
+		STM32_FUNCTION(7, "DCMIPP_D12"),
+		STM32_FUNCTION(9, "UART8_RX"),
+		STM32_FUNCTION(10, "FDCAN2_RX"),
+		STM32_FUNCTION(11, "TSC_G4_IO1"),
+		STM32_FUNCTION(12, "LCD_B1"),
+		STM32_FUNCTION(13, "FMC_A11"),
+		STM32_FUNCTION(14, "DCMIPP_D1"),
+		STM32_FUNCTION(15, "LCD_B5"),
+		STM32_FUNCTION(17, "ANALOG")
+	),
+	STM32_PIN(
+		PINCTRL_PIN(65, "PE1"),
+		STM32_FUNCTION(0, "GPIOE1"),
+		STM32_FUNCTION(2, "LPTIM1_IN2"),
+		STM32_FUNCTION(4, "TSC_G2_IO3"),
+		STM32_FUNCTION(9, "UART8_TX"),
+		STM32_FUNCTION(10, "LCD_HSYNC"),
+		STM32_FUNCTION(12, "LCD_R4"),
+		STM32_FUNCTION(13, "FMC_NBL1"),
+		STM32_FUNCTION(14, "DCMIPP_D3"),
+		STM32_FUNCTION(15, "DCMIPP_D12"),
+		STM32_FUNCTION(17, "ANALOG")
+	),
+	STM32_PIN(
+		PINCTRL_PIN(66, "PE2"),
+		STM32_FUNCTION(0, "GPIOE2"),
+		STM32_FUNCTION(1, "TRACECLK"),
+		STM32_FUNCTION(2, "TIM2_ETR"),
+		STM32_FUNCTION(4, "TSC_G5_IO1"),
+		STM32_FUNCTION(5, "I2C4_SCL"),
+		STM32_FUNCTION(6, "SPI5_MOSI"),
+		STM32_FUNCTION(7, "SAI1_FS_B"),
+		STM32_FUNCTION(8, "USART6_RTS USART6_DE"),
+		STM32_FUNCTION(10, "SPDIFRX_IN1"),
+		STM32_FUNCTION(11, "ETH2_MII_RXD1 ETH2_RGMII_RXD1 ETH2_RMII_RXD1"),
+		STM32_FUNCTION(13, "FMC_A23"),
+		STM32_FUNCTION(15, "LCD_R1"),
+		STM32_FUNCTION(17, "ANALOG")
+	),
+	STM32_PIN(
+		PINCTRL_PIN(67, "PE3"),
+		STM32_FUNCTION(0, "GPIOE3"),
+		STM32_FUNCTION(1, "TRACED11"),
+		STM32_FUNCTION(3, "SAI2_D4"),
+		STM32_FUNCTION(5, "TIM15_BKIN"),
+		STM32_FUNCTION(6, "SPI4_MISO I2S4_SDI"),
+		STM32_FUNCTION(9, "USART3_RTS USART3_DE"),
+		STM32_FUNCTION(10, "FDCAN1_RX"),
+		STM32_FUNCTION(11, "SDMMC2_CK"),
+		STM32_FUNCTION(14, "LCD_R4"),
+		STM32_FUNCTION(17, "ANALOG")
+	),
+	STM32_PIN(
+		PINCTRL_PIN(68, "PE4"),
+		STM32_FUNCTION(0, "GPIOE4"),
+		STM32_FUNCTION(2, "SPI5_MISO"),
+		STM32_FUNCTION(3, "SAI1_D2"),
+		STM32_FUNCTION(4, "DFSDM1_DATIN3"),
+		STM32_FUNCTION(5, "TIM15_CH1N"),
+		STM32_FUNCTION(6, "I2S_CKIN"),
+		STM32_FUNCTION(7, "SAI1_FS_A"),
+		STM32_FUNCTION(8, "UART7_RTS UART7_DE"),
+		STM32_FUNCTION(9, "UART8_TX"),
+		STM32_FUNCTION(10, "QUADSPI_BK2_NCS"),
+		STM32_FUNCTION(11, "FMC_NCE2"),
+		STM32_FUNCTION(12, "TSC_G1_IO1"),
+		STM32_FUNCTION(13, "FMC_A25"),
+		STM32_FUNCTION(14, "DCMIPP_D3"),
+		STM32_FUNCTION(15, "LCD_G7"),
+		STM32_FUNCTION(17, "ANALOG")
+	),
+	STM32_PIN(
+		PINCTRL_PIN(69, "PE5"),
+		STM32_FUNCTION(0, "GPIOE5"),
+		STM32_FUNCTION(3, "SAI2_SCK_B"),
+		STM32_FUNCTION(4, "TIM8_CH3"),
+		STM32_FUNCTION(5, "TIM15_CH1"),
+		STM32_FUNCTION(9, "UART4_RX"),
+		STM32_FUNCTION(11, "ETH1_MII_TXD3 ETH1_RGMII_TXD3"),
+		STM32_FUNCTION(13, "FMC_NE1"),
+		STM32_FUNCTION(17, "ANALOG")
+	),
+	STM32_PIN(
+		PINCTRL_PIN(70, "PE6"),
+		STM32_FUNCTION(0, "GPIOE6"),
+		STM32_FUNCTION(1, "MCO2"),
+		STM32_FUNCTION(2, "TIM1_BKIN2"),
+		STM32_FUNCTION(3, "SAI2_SCK_B"),
+		STM32_FUNCTION(5, "TIM15_CH2"),
+		STM32_FUNCTION(6, "I2C3_SMBA"),
+		STM32_FUNCTION(7, "SAI1_SCK_B"),
+		STM32_FUNCTION(9, "UART4_RTS UART4_DE"),
+		STM32_FUNCTION(12, "ETH2_MII_TXD3 ETH2_RGMII_TXD3"),
+		STM32_FUNCTION(13, "FMC_A22"),
+		STM32_FUNCTION(14, "DCMIPP_D7"),
+		STM32_FUNCTION(15, "LCD_G3"),
+		STM32_FUNCTION(17, "ANALOG")
+	),
+	STM32_PIN(
+		PINCTRL_PIN(71, "PE7"),
+		STM32_FUNCTION(0, "GPIOE7"),
+		STM32_FUNCTION(2, "TIM1_ETR"),
+		STM32_FUNCTION(5, "LPTIM2_IN1"),
+		STM32_FUNCTION(9, "UART5_TX"),
+		STM32_FUNCTION(13, "FMC_D4 FMC_AD4"),
+		STM32_FUNCTION(14, "LCD_B3"),
+		STM32_FUNCTION(15, "LCD_R5"),
+		STM32_FUNCTION(17, "ANALOG")
+	),
+	STM32_PIN(
+		PINCTRL_PIN(72, "PE8"),
+		STM32_FUNCTION(0, "GPIOE8"),
+		STM32_FUNCTION(2, "TIM1_CH1N"),
+		STM32_FUNCTION(4, "DFSDM1_CKIN2"),
+		STM32_FUNCTION(6, "I2C1_SDA"),
+		STM32_FUNCTION(8, "UART7_TX"),
+		STM32_FUNCTION(13, "FMC_D5 FMC_AD5"),
+		STM32_FUNCTION(17, "ANALOG")
+	),
+	STM32_PIN(
+		PINCTRL_PIN(73, "PE9"),
+		STM32_FUNCTION(0, "GPIOE9"),
+		STM32_FUNCTION(2, "TIM1_CH1"),
+		STM32_FUNCTION(10, "QUADSPI_BK1_IO1"),
+		STM32_FUNCTION(12, "LCD_HSYNC"),
+		STM32_FUNCTION(13, "FMC_D6 FMC_AD6"),
+		STM32_FUNCTION(14, "DCMIPP_D7"),
+		STM32_FUNCTION(15, "LCD_R7"),
+		STM32_FUNCTION(16, "HDP3"),
+		STM32_FUNCTION(17, "ANALOG")
+	),
+	STM32_PIN(
+		PINCTRL_PIN(74, "PE10"),
+		STM32_FUNCTION(0, "GPIOE10"),
+		STM32_FUNCTION(2, "TIM1_CH2N"),
+		STM32_FUNCTION(8, "UART7_RX"),
+		STM32_FUNCTION(10, "FDCAN1_TX"),
+		STM32_FUNCTION(13, "FMC_D7 FMC_AD7"),
+		STM32_FUNCTION(17, "ANALOG")
+	),
+	STM32_PIN(
+		PINCTRL_PIN(75, "PE11"),
+		STM32_FUNCTION(0, "GPIOE11"),
+		STM32_FUNCTION(2, "TIM1_CH2"),
+		STM32_FUNCTION(3, "USART2_CTS USART2_NSS"),
+		STM32_FUNCTION(5, "SAI1_D2"),
+		STM32_FUNCTION(6, "SPI4_MOSI I2S4_SDO"),
+		STM32_FUNCTION(7, "SAI1_FS_A"),
+		STM32_FUNCTION(8, "USART6_CK"),
+		STM32_FUNCTION(10, "LCD_R0"),
+		STM32_FUNCTION(11, "ETH2_MII_TX_ER"),
+		STM32_FUNCTION(12, "ETH1_MII_TX_ER"),
+		STM32_FUNCTION(13, "FMC_D8 FMC_AD8"),
+		STM32_FUNCTION(14, "DCMIPP_D10"),
+		STM32_FUNCTION(15, "LCD_R5"),
+		STM32_FUNCTION(17, "ANALOG")
+	),
+	STM32_PIN(
+		PINCTRL_PIN(76, "PE12"),
+		STM32_FUNCTION(0, "GPIOE12"),
+		STM32_FUNCTION(2, "TIM1_CH3N"),
+		STM32_FUNCTION(6, "SPI4_SCK I2S4_CK"),
+		STM32_FUNCTION(9, "UART8_RTS UART8_DE"),
+		STM32_FUNCTION(10, "LCD_VSYNC"),
+		STM32_FUNCTION(11, "TSC_G3_IO2"),
+		STM32_FUNCTION(12, "LCD_G4"),
+		STM32_FUNCTION(13, "FMC_D9 FMC_AD9"),
+		STM32_FUNCTION(14, "DCMIPP_D11"),
+		STM32_FUNCTION(15, "LCD_G6"),
+		STM32_FUNCTION(16, "HDP4"),
+		STM32_FUNCTION(17, "ANALOG")
+	),
+	STM32_PIN(
+		PINCTRL_PIN(77, "PE13"),
+		STM32_FUNCTION(0, "GPIOE13"),
+		STM32_FUNCTION(2, "TIM1_CH3"),
+		STM32_FUNCTION(5, "I2C5_SDA"),
+		STM32_FUNCTION(6, "SPI4_MISO I2S4_SDI"),
+		STM32_FUNCTION(12, "LCD_B1"),
+		STM32_FUNCTION(13, "FMC_D10 FMC_AD10"),
+		STM32_FUNCTION(14, "DCMIPP_D4"),
+		STM32_FUNCTION(15, "LCD_R6"),
+		STM32_FUNCTION(17, "ANALOG")
+	),
+	STM32_PIN(
+		PINCTRL_PIN(78, "PE14"),
+		STM32_FUNCTION(0, "GPIOE14"),
+		STM32_FUNCTION(2, "TIM1_BKIN"),
+		STM32_FUNCTION(5, "SAI1_D4"),
+		STM32_FUNCTION(9, "UART8_RTS UART8_DE"),
+		STM32_FUNCTION(10, "QUADSPI_BK1_NCS"),
+		STM32_FUNCTION(11, "QUADSPI_BK2_IO2"),
+		STM32_FUNCTION(13, "FMC_D11 FMC_AD11"),
+		STM32_FUNCTION(14, "DCMIPP_D7"),
+		STM32_FUNCTION(15, "LCD_G0"),
+		STM32_FUNCTION(17, "ANALOG")
+	),
+	STM32_PIN(
+		PINCTRL_PIN(79, "PE15"),
+		STM32_FUNCTION(0, "GPIOE15"),
+		STM32_FUNCTION(2, "TIM2_ETR"),
+		STM32_FUNCTION(3, "TIM1_BKIN"),
+		STM32_FUNCTION(4, "USART2_CTS USART2_NSS"),
+		STM32_FUNCTION(7, "I2C4_SCL"),
+		STM32_FUNCTION(13, "FMC_D12 FMC_AD12"),
+		STM32_FUNCTION(14, "DCMIPP_D10"),
+		STM32_FUNCTION(15, "LCD_B7"),
+		STM32_FUNCTION(16, "HDP7"),
+		STM32_FUNCTION(17, "ANALOG")
+	),
+	STM32_PIN(
+		PINCTRL_PIN(80, "PF0"),
+		STM32_FUNCTION(0, "GPIOF0"),
+		STM32_FUNCTION(1, "TRACED13"),
+		STM32_FUNCTION(4, "DFSDM1_CKOUT"),
+		STM32_FUNCTION(8, "USART3_CK"),
+		STM32_FUNCTION(11, "SDMMC2_D4"),
+		STM32_FUNCTION(13, "FMC_A0"),
+		STM32_FUNCTION(14, "LCD_R6"),
+		STM32_FUNCTION(15, "LCD_G0"),
+		STM32_FUNCTION(17, "ANALOG")
+	),
+	STM32_PIN(
+		PINCTRL_PIN(81, "PF1"),
+		STM32_FUNCTION(0, "GPIOF1"),
+		STM32_FUNCTION(1, "TRACED7"),
+		STM32_FUNCTION(5, "I2C2_SDA"),
+		STM32_FUNCTION(6, "SPI3_MOSI I2S3_SDO"),
+		STM32_FUNCTION(13, "FMC_A1"),
+		STM32_FUNCTION(14, "LCD_B7"),
+		STM32_FUNCTION(15, "LCD_G1"),
+		STM32_FUNCTION(16, "HDP7"),
+		STM32_FUNCTION(17, "ANALOG")
+	),
+	STM32_PIN(
+		PINCTRL_PIN(82, "PF2"),
+		STM32_FUNCTION(0, "GPIOF2"),
+		STM32_FUNCTION(1, "TRACED1"),
+		STM32_FUNCTION(5, "I2C2_SCL"),
+		STM32_FUNCTION(7, "DFSDM1_CKIN1"),
+		STM32_FUNCTION(8, "USART6_CK"),
+		STM32_FUNCTION(10, "SDMMC2_D0DIR"),
+		STM32_FUNCTION(12, "SDMMC1_D0DIR"),
+		STM32_FUNCTION(13, "FMC_A2"),
+		STM32_FUNCTION(14, "LCD_G4"),
+		STM32_FUNCTION(15, "LCD_B3"),
+		STM32_FUNCTION(17, "ANALOG")
+	),
+	STM32_PIN(
+		PINCTRL_PIN(83, "PF3"),
+		STM32_FUNCTION(0, "GPIOF3"),
+		STM32_FUNCTION(4, "LPTIM2_IN2"),
+		STM32_FUNCTION(5, "I2C5_SDA"),
+		STM32_FUNCTION(6, "SPI4_MISO I2S4_SDI"),
+		STM32_FUNCTION(7, "SPI3_NSS I2S3_WS"),
+		STM32_FUNCTION(13, "FMC_A3"),
+		STM32_FUNCTION(15, "LCD_G3"),
+		STM32_FUNCTION(17, "ANALOG")
+	),
+	STM32_PIN(
+		PINCTRL_PIN(84, "PF4"),
+		STM32_FUNCTION(0, "GPIOF4"),
+		STM32_FUNCTION(4, "USART2_RX"),
+		STM32_FUNCTION(11, "TSC_G3_IO3"),
+		STM32_FUNCTION(12, "ETH2_MII_RXD0 ETH2_RGMII_RXD0 ETH2_RMII_RXD0"),
+		STM32_FUNCTION(13, "FMC_A4"),
+		STM32_FUNCTION(14, "DCMIPP_D4"),
+		STM32_FUNCTION(15, "LCD_B6"),
+		STM32_FUNCTION(17, "ANALOG")
+	),
+	STM32_PIN(
+		PINCTRL_PIN(85, "PF5"),
+		STM32_FUNCTION(0, "GPIOF5"),
+		STM32_FUNCTION(1, "TRACED12"),
+		STM32_FUNCTION(5, "DFSDM1_CKIN0"),
+		STM32_FUNCTION(6, "I2C1_SMBA"),
+		STM32_FUNCTION(10, "LCD_G0"),
+		STM32_FUNCTION(13, "FMC_A5"),
+		STM32_FUNCTION(14, "DCMIPP_D11"),
+		STM32_FUNCTION(15, "LCD_R5"),
+		STM32_FUNCTION(17, "ANALOG")
+	),
+	STM32_PIN(
+		PINCTRL_PIN(86, "PF6"),
+		STM32_FUNCTION(0, "GPIOF6"),
+		STM32_FUNCTION(2, "TIM16_CH1"),
+		STM32_FUNCTION(6, "SPI5_NSS"),
+		STM32_FUNCTION(8, "UART7_RX"),
+		STM32_FUNCTION(10, "QUADSPI_BK1_IO2"),
+		STM32_FUNCTION(12, "ETH2_MII_TX_EN ETH2_RGMII_TX_CTL ETH2_RMII_TX_EN"),
+		STM32_FUNCTION(14, "LCD_R7"),
+		STM32_FUNCTION(15, "LCD_G4"),
+		STM32_FUNCTION(17, "ANALOG")
+	),
+	STM32_PIN(
+		PINCTRL_PIN(87, "PF7"),
+		STM32_FUNCTION(0, "GPIOF7"),
+		STM32_FUNCTION(2, "TIM17_CH1"),
+		STM32_FUNCTION(8, "UART7_TX"),
+		STM32_FUNCTION(9, "UART4_CTS"),
+		STM32_FUNCTION(11, "ETH1_RGMII_CLK125"),
+		STM32_FUNCTION(12, "ETH2_MII_TXD0 ETH2_RGMII_TXD0 ETH2_RMII_TXD0"),
+		STM32_FUNCTION(13, "FMC_A18"),
+		STM32_FUNCTION(15, "LCD_G2"),
+		STM32_FUNCTION(17, "ANALOG")
+	),
+	STM32_PIN(
+		PINCTRL_PIN(88, "PF8"),
+		STM32_FUNCTION(0, "GPIOF8"),
+		STM32_FUNCTION(2, "TIM16_CH1N"),
+		STM32_FUNCTION(3, "TIM4_CH3"),
+		STM32_FUNCTION(4, "TIM8_CH3"),
+		STM32_FUNCTION(7, "SAI1_SCK_B"),
+		STM32_FUNCTION(8, "USART6_TX"),
+		STM32_FUNCTION(10, "TIM13_CH1"),
+		STM32_FUNCTION(11, "QUADSPI_BK1_IO0"),
+		STM32_FUNCTION(14, "DCMIPP_D15"),
+		STM32_FUNCTION(15, "LCD_B3"),
+		STM32_FUNCTION(17, "ANALOG")
+	),
+	STM32_PIN(
+		PINCTRL_PIN(89, "PF9"),
+		STM32_FUNCTION(0, "GPIOF9"),
+		STM32_FUNCTION(2, "TIM17_CH1N"),
+		STM32_FUNCTION(3, "TIM1_CH1"),
+		STM32_FUNCTION(4, "DFSDM1_CKIN3"),
+		STM32_FUNCTION(7, "SAI1_D4"),
+		STM32_FUNCTION(8, "UART7_CTS"),
+		STM32_FUNCTION(9, "UART8_RX"),
+		STM32_FUNCTION(10, "TIM14_CH1"),
+		STM32_FUNCTION(11, "QUADSPI_BK1_IO1"),
+		STM32_FUNCTION(12, "QUADSPI_BK2_IO3"),
+		STM32_FUNCTION(13, "FMC_A9"),
+		STM32_FUNCTION(15, "LCD_B6"),
+		STM32_FUNCTION(17, "ANALOG")
+	),
+	STM32_PIN(
+		PINCTRL_PIN(90, "PF10"),
+		STM32_FUNCTION(0, "GPIOF10"),
+		STM32_FUNCTION(2, "TIM16_BKIN"),
+		STM32_FUNCTION(3, "SAI1_D3"),
+		STM32_FUNCTION(4, "TIM8_BKIN"),
+		STM32_FUNCTION(6, "SPI5_NSS"),
+		STM32_FUNCTION(8, "USART6_RTS USART6_DE"),
+		STM32_FUNCTION(9, "UART7_RTS UART7_DE"),
+		STM32_FUNCTION(10, "QUADSPI_CLK"),
+		STM32_FUNCTION(14, "DCMIPP_HSYNC"),
+		STM32_FUNCTION(15, "LCD_B5"),
+		STM32_FUNCTION(17, "ANALOG")
+	),
+	STM32_PIN(
+		PINCTRL_PIN(91, "PF11"),
+		STM32_FUNCTION(0, "GPIOF11"),
+		STM32_FUNCTION(2, "USART2_TX"),
+		STM32_FUNCTION(3, "SAI1_D2"),
+		STM32_FUNCTION(4, "DFSDM1_CKIN3"),
+		STM32_FUNCTION(7, "SAI1_FS_A"),
+		STM32_FUNCTION(13, "ETH2_MII_RX_ER"),
+		STM32_FUNCTION(17, "ANALOG")
+	),
+	STM32_PIN(
+		PINCTRL_PIN(92, "PF12"),
+		STM32_FUNCTION(0, "GPIOF12"),
+		STM32_FUNCTION(6, "SPI1_NSS I2S1_WS"),
+		STM32_FUNCTION(7, "SAI1_SD_A"),
+		STM32_FUNCTION(9, "UART4_TX"),
+		STM32_FUNCTION(11, "ETH1_MII_TX_ER"),
+		STM32_FUNCTION(12, "ETH1_RGMII_CLK125"),
+		STM32_FUNCTION(17, "ANALOG")
+	),
+	STM32_PIN(
+		PINCTRL_PIN(93, "PF13"),
+		STM32_FUNCTION(0, "GPIOF13"),
+		STM32_FUNCTION(2, "TIM2_ETR"),
+		STM32_FUNCTION(3, "SAI1_MCLK_B"),
+		STM32_FUNCTION(7, "DFSDM1_DATIN3"),
+		STM32_FUNCTION(8, "USART2_TX"),
+		STM32_FUNCTION(9, "UART5_RX"),
+		STM32_FUNCTION(17, "ANALOG")
+	),
+	STM32_PIN(
+		PINCTRL_PIN(94, "PF14"),
+		STM32_FUNCTION(0, "GPIOF14"),
+		STM32_FUNCTION(1, "JTCK SWCLK"),
+		STM32_FUNCTION(17, "ANALOG")
+	),
+	STM32_PIN(
+		PINCTRL_PIN(95, "PF15"),
+		STM32_FUNCTION(0, "GPIOF15"),
+		STM32_FUNCTION(1, "JTMS SWDIO"),
+		STM32_FUNCTION(17, "ANALOG")
+	),
+	STM32_PIN(
+		PINCTRL_PIN(96, "PG0"),
+		STM32_FUNCTION(0, "GPIOG0"),
+		STM32_FUNCTION(10, "FDCAN2_TX"),
+		STM32_FUNCTION(11, "TSC_G4_IO2"),
+		STM32_FUNCTION(13, "FMC_A10"),
+		STM32_FUNCTION(14, "DCMIPP_PIXCLK"),
+		STM32_FUNCTION(15, "LCD_G5"),
+		STM32_FUNCTION(17, "ANALOG")
+	),
+	STM32_PIN(
+		PINCTRL_PIN(97, "PG1"),
+		STM32_FUNCTION(0, "GPIOG1"),
+		STM32_FUNCTION(2, "LPTIM1_ETR"),
+		STM32_FUNCTION(3, "TIM4_ETR"),
+		STM32_FUNCTION(4, "SAI2_FS_A"),
+		STM32_FUNCTION(5, "I2C2_SMBA"),
+		STM32_FUNCTION(6, "SPI2_MISO I2S2_SDI"),
+		STM32_FUNCTION(7, "SAI2_D2"),
+		STM32_FUNCTION(10, "FDCAN2_TX"),
+		STM32_FUNCTION(11, "ETH2_MII_TXD2 ETH2_RGMII_TXD2"),
+		STM32_FUNCTION(13, "FMC_NBL0"),
+		STM32_FUNCTION(15, "LCD_G7"),
+		STM32_FUNCTION(17, "ANALOG")
+	),
+	STM32_PIN(
+		PINCTRL_PIN(98, "PG2"),
+		STM32_FUNCTION(0, "GPIOG2"),
+		STM32_FUNCTION(2, "MCO2"),
+		STM32_FUNCTION(4, "TIM8_BKIN"),
+		STM32_FUNCTION(11, "SAI2_MCLK_B"),
+		STM32_FUNCTION(12, "ETH1_MDC"),
+		STM32_FUNCTION(14, "DCMIPP_D1"),
+		STM32_FUNCTION(17, "ANALOG")
+	),
+	STM32_PIN(
+		PINCTRL_PIN(99, "PG3"),
+		STM32_FUNCTION(0, "GPIOG3"),
+		STM32_FUNCTION(4, "TIM8_BKIN2"),
+		STM32_FUNCTION(5, "I2C2_SDA"),
+		STM32_FUNCTION(7, "SAI2_SD_B"),
+		STM32_FUNCTION(10, "FDCAN2_RX"),
+		STM32_FUNCTION(11, "ETH2_RGMII_GTX_CLK"),
+		STM32_FUNCTION(12, "ETH1_MDIO"),
+		STM32_FUNCTION(13, "FMC_A13"),
+		STM32_FUNCTION(14, "DCMIPP_D15"),
+		STM32_FUNCTION(15, "DCMIPP_D12"),
+		STM32_FUNCTION(17, "ANALOG")
+	),
+	STM32_PIN(
+		PINCTRL_PIN(100, "PG4"),
+		STM32_FUNCTION(0, "GPIOG4"),
+		STM32_FUNCTION(1, "TRACED1"),
+		STM32_FUNCTION(2, "TIM1_BKIN2"),
+		STM32_FUNCTION(5, "DFSDM1_CKIN3"),
+		STM32_FUNCTION(9, "USART3_RX"),
+		STM32_FUNCTION(11, "SDMMC2_D123DIR"),
+		STM32_FUNCTION(12, "LCD_VSYNC"),
+		STM32_FUNCTION(13, "FMC_A14"),
+		STM32_FUNCTION(14, "DCMIPP_D8"),
+		STM32_FUNCTION(15, "DCMIPP_D13"),
+		STM32_FUNCTION(16, "HDP1"),
+		STM32_FUNCTION(17, "ANALOG")
+	),
+	STM32_PIN(
+		PINCTRL_PIN(101, "PG5"),
+		STM32_FUNCTION(0, "GPIOG5"),
+		STM32_FUNCTION(2, "TIM17_CH1"),
+		STM32_FUNCTION(11, "ETH2_MDC"),
+		STM32_FUNCTION(12, "LCD_G4"),
+		STM32_FUNCTION(13, "FMC_A15"),
+		STM32_FUNCTION(14, "DCMIPP_VSYNC"),
+		STM32_FUNCTION(15, "DCMIPP_D3"),
+		STM32_FUNCTION(17, "ANALOG")
+	),
+	STM32_PIN(
+		PINCTRL_PIN(102, "PG6"),
+		STM32_FUNCTION(0, "GPIOG6"),
+		STM32_FUNCTION(1, "TRACED3"),
+		STM32_FUNCTION(2, "TIM17_BKIN"),
+		STM32_FUNCTION(3, "TIM5_CH4"),
+		STM32_FUNCTION(4, "SAI2_D1"),
+		STM32_FUNCTION(5, "USART1_RX"),
+		STM32_FUNCTION(7, "SAI2_SD_A"),
+		STM32_FUNCTION(11, "SDMMC2_CMD"),
+		STM32_FUNCTION(12, "LCD_G0"),
+		STM32_FUNCTION(14, "LCD_DE"),
+		STM32_FUNCTION(15, "LCD_R7"),
+		STM32_FUNCTION(16, "HDP3"),
+		STM32_FUNCTION(17, "ANALOG")
+	),
+	STM32_PIN(
+		PINCTRL_PIN(103, "PG7"),
+		STM32_FUNCTION(0, "GPIOG7"),
+		STM32_FUNCTION(1, "TRACED8"),
+		STM32_FUNCTION(2, "TIM1_ETR"),
+		STM32_FUNCTION(6, "SPI3_MISO I2S3_SDI"),
+		STM32_FUNCTION(9, "UART7_CTS"),
+		STM32_FUNCTION(11, "SDMMC2_CKIN"),
+		STM32_FUNCTION(12, "LCD_R1"),
+		STM32_FUNCTION(14, "LCD_R5"),
+		STM32_FUNCTION(15, "LCD_R2"),
+		STM32_FUNCTION(17, "ANALOG")
+	),
+	STM32_PIN(
+		PINCTRL_PIN(104, "PG8"),
+		STM32_FUNCTION(0, "GPIOG8"),
+		STM32_FUNCTION(2, "TIM2_CH1"),
+		STM32_FUNCTION(4, "TIM8_ETR"),
+		STM32_FUNCTION(6, "SPI5_MISO"),
+		STM32_FUNCTION(7, "SAI1_MCLK_B"),
+		STM32_FUNCTION(8, "LCD_B1"),
+		STM32_FUNCTION(9, "USART3_RTS USART3_DE"),
+		STM32_FUNCTION(10, "SPDIFRX_IN2"),
+		STM32_FUNCTION(11, "QUADSPI_BK2_IO2"),
+		STM32_FUNCTION(12, "QUADSPI_BK1_IO3"),
+		STM32_FUNCTION(13, "FMC_NE2"),
+		STM32_FUNCTION(14, "ETH2_CLK"),
+		STM32_FUNCTION(15, "DCMIPP_D6"),
+		STM32_FUNCTION(17, "ANALOG")
+	),
+	STM32_PIN(
+		PINCTRL_PIN(105, "PG9"),
+		STM32_FUNCTION(0, "GPIOG9"),
+		STM32_FUNCTION(1, "DBTRGO"),
+		STM32_FUNCTION(5, "I2C2_SDA"),
+		STM32_FUNCTION(8, "USART6_RX"),
+		STM32_FUNCTION(9, "SPDIFRX_IN3"),
+		STM32_FUNCTION(10, "FDCAN1_RX"),
+		STM32_FUNCTION(11, "FMC_NE2"),
+		STM32_FUNCTION(13, "FMC_NCE"),
+		STM32_FUNCTION(14, "DCMIPP_VSYNC"),
+		STM32_FUNCTION(17, "ANALOG")
+	),
+	STM32_PIN(
+		PINCTRL_PIN(106, "PG10"),
+		STM32_FUNCTION(0, "GPIOG10"),
+		STM32_FUNCTION(6, "SPI5_SCK"),
+		STM32_FUNCTION(7, "SAI1_SD_B"),
+		STM32_FUNCTION(9, "UART8_CTS"),
+		STM32_FUNCTION(10, "FDCAN1_TX"),
+		STM32_FUNCTION(11, "QUADSPI_BK2_IO1"),
+		STM32_FUNCTION(13, "FMC_NE3"),
+		STM32_FUNCTION(14, "DCMIPP_D2"),
+		STM32_FUNCTION(17, "ANALOG")
+	),
+	STM32_PIN(
+		PINCTRL_PIN(107, "PG11"),
+		STM32_FUNCTION(0, "GPIOG11"),
+		STM32_FUNCTION(5, "SAI2_D3"),
+		STM32_FUNCTION(6, "I2S2_MCK"),
+		STM32_FUNCTION(8, "USART3_TX"),
+		STM32_FUNCTION(9, "UART4_TX"),
+		STM32_FUNCTION(11, "ETH2_MII_TXD1 ETH2_RGMII_TXD1 ETH2_RMII_TXD1"),
+		STM32_FUNCTION(13, "FMC_A24"),
+		STM32_FUNCTION(14, "DCMIPP_D14"),
+		STM32_FUNCTION(15, "LCD_B2"),
+		STM32_FUNCTION(17, "ANALOG")
+	),
+	STM32_PIN(
+		PINCTRL_PIN(108, "PG12"),
+		STM32_FUNCTION(0, "GPIOG12"),
+		STM32_FUNCTION(2, "LPTIM1_IN1"),
+		STM32_FUNCTION(4, "TSC_G5_IO2"),
+		STM32_FUNCTION(5, "SAI2_SCK_A"),
+		STM32_FUNCTION(7, "SAI2_CK2"),
+		STM32_FUNCTION(8, "USART6_RTS USART6_DE"),
+		STM32_FUNCTION(9, "USART3_CTS"),
+		STM32_FUNCTION(11, "ETH2_PHY_INTN"),
+		STM32_FUNCTION(12, "ETH1_PHY_INTN"),
+		STM32_FUNCTION(13, "ETH2_MII_RX_DV ETH2_RGMII_RX_CTL ETH2_RMII_CRS_DV"),
+		STM32_FUNCTION(17, "ANALOG")
+	),
+	STM32_PIN(
+		PINCTRL_PIN(109, "PG13"),
+		STM32_FUNCTION(0, "GPIOG13"),
+		STM32_FUNCTION(2, "LPTIM1_OUT"),
+		STM32_FUNCTION(8, "USART6_CTS USART6_NSS"),
+		STM32_FUNCTION(12, "ETH1_MII_TXD0 ETH1_RGMII_TXD0 ETH1_RMII_TXD0"),
+		STM32_FUNCTION(17, "ANALOG")
+	),
+	STM32_PIN(
+		PINCTRL_PIN(110, "PG14"),
+		STM32_FUNCTION(0, "GPIOG14"),
+		STM32_FUNCTION(2, "LPTIM1_ETR"),
+		STM32_FUNCTION(7, "SAI2_D1"),
+		STM32_FUNCTION(8, "USART6_TX"),
+		STM32_FUNCTION(11, "SAI2_SD_A"),
+		STM32_FUNCTION(12, "ETH1_MII_TXD1 ETH1_RGMII_TXD1 ETH1_RMII_TXD1"),
+		STM32_FUNCTION(17, "ANALOG")
+	),
+	STM32_PIN(
+		PINCTRL_PIN(111, "PG15"),
+		STM32_FUNCTION(0, "GPIOG15"),
+		STM32_FUNCTION(8, "USART6_CTS USART6_NSS"),
+		STM32_FUNCTION(9, "UART7_CTS"),
+		STM32_FUNCTION(10, "QUADSPI_BK1_IO1"),
+		STM32_FUNCTION(11, "ETH2_PHY_INTN"),
+		STM32_FUNCTION(12, "LCD_B4"),
+		STM32_FUNCTION(14, "DCMIPP_D10"),
+		STM32_FUNCTION(15, "LCD_B3"),
+		STM32_FUNCTION(17, "ANALOG")
+	),
+	STM32_PIN(
+		PINCTRL_PIN(112, "PH0"),
+		STM32_FUNCTION(0, "GPIOH0"),
+		STM32_FUNCTION(17, "ANALOG")
+	),
+	STM32_PIN(
+		PINCTRL_PIN(113, "PH1"),
+		STM32_FUNCTION(0, "GPIOH1"),
+		STM32_FUNCTION(17, "ANALOG")
+	),
+	STM32_PIN(
+		PINCTRL_PIN(114, "PH2"),
+		STM32_FUNCTION(0, "GPIOH2"),
+		STM32_FUNCTION(2, "LPTIM1_IN2"),
+		STM32_FUNCTION(4, "TSC_G4_IO3"),
+		STM32_FUNCTION(7, "DCMIPP_D9"),
+		STM32_FUNCTION(8, "LCD_G1"),
+		STM32_FUNCTION(9, "UART7_TX"),
+		STM32_FUNCTION(10, "QUADSPI_BK2_IO0"),
+		STM32_FUNCTION(11, "ETH2_MII_CRS"),
+		STM32_FUNCTION(12, "ETH1_MII_CRS"),
+		STM32_FUNCTION(13, "FMC_NE4"),
+		STM32_FUNCTION(14, "ETH2_RGMII_CLK125"),
+		STM32_FUNCTION(15, "LCD_B0"),
+		STM32_FUNCTION(17, "ANALOG")
+	),
+	STM32_PIN(
+		PINCTRL_PIN(115, "PH3"),
+		STM32_FUNCTION(0, "GPIOH3"),
+		STM32_FUNCTION(5, "I2C3_SCL"),
+		STM32_FUNCTION(6, "SPI5_MOSI"),
+		STM32_FUNCTION(10, "QUADSPI_BK2_IO1"),
+		STM32_FUNCTION(11, "ETH1_MII_COL"),
+		STM32_FUNCTION(12, "LCD_R5"),
+		STM32_FUNCTION(13, "ETH2_MII_COL"),
+		STM32_FUNCTION(14, "QUADSPI_BK1_IO0"),
+		STM32_FUNCTION(15, "LCD_B4"),
+		STM32_FUNCTION(17, "ANALOG")
+	),
+	STM32_PIN(
+		PINCTRL_PIN(116, "PH4"),
+		STM32_FUNCTION(0, "GPIOH4"),
+		STM32_FUNCTION(1, "JTDI"),
+		STM32_FUNCTION(17, "ANALOG")
+	),
+	STM32_PIN(
+		PINCTRL_PIN(117, "PH5"),
+		STM32_FUNCTION(0, "GPIOH5"),
+		STM32_FUNCTION(1, "JTDO"),
+		STM32_FUNCTION(17, "ANALOG")
+	),
+	STM32_PIN(
+		PINCTRL_PIN(118, "PH6"),
+		STM32_FUNCTION(0, "GPIOH6"),
+		STM32_FUNCTION(3, "TIM12_CH1"),
+		STM32_FUNCTION(4, "USART2_CK"),
+		STM32_FUNCTION(5, "I2C5_SDA"),
+		STM32_FUNCTION(6, "SPI2_SCK I2S2_CK"),
+		STM32_FUNCTION(10, "QUADSPI_BK1_IO2"),
+		STM32_FUNCTION(11, "ETH1_PHY_INTN"),
+		STM32_FUNCTION(12, "ETH1_MII_RX_ER"),
+		STM32_FUNCTION(13, "ETH2_MII_RXD2 ETH2_RGMII_RXD2"),
+		STM32_FUNCTION(14, "QUADSPI_BK1_NCS"),
+		STM32_FUNCTION(17, "ANALOG")
+	),
+	STM32_PIN(
+		PINCTRL_PIN(119, "PH7"),
+		STM32_FUNCTION(0, "GPIOH7"),
+		STM32_FUNCTION(3, "SAI2_FS_B"),
+		STM32_FUNCTION(6, "I2C3_SDA"),
+		STM32_FUNCTION(7, "SPI5_SCK"),
+		STM32_FUNCTION(10, "QUADSPI_BK2_IO3"),
+		STM32_FUNCTION(11, "ETH2_MII_TX_CLK"),
+		STM32_FUNCTION(12, "ETH1_MII_TX_CLK"),
+		STM32_FUNCTION(14, "QUADSPI_BK1_IO3"),
+		STM32_FUNCTION(15, "LCD_B2"),
+		STM32_FUNCTION(17, "ANALOG")
+	),
+	STM32_PIN(
+		PINCTRL_PIN(120, "PH8"),
+		STM32_FUNCTION(0, "GPIOH8"),
+		STM32_FUNCTION(1, "TRACED9"),
+		STM32_FUNCTION(3, "TIM5_ETR"),
+		STM32_FUNCTION(4, "USART2_RX"),
+		STM32_FUNCTION(5, "I2C3_SDA"),
+		STM32_FUNCTION(12, "LCD_R6"),
+		STM32_FUNCTION(13, "FMC_A8"),
+		STM32_FUNCTION(14, "DCMIPP_HSYNC"),
+		STM32_FUNCTION(15, "LCD_R2"),
+		STM32_FUNCTION(16, "HDP2"),
+		STM32_FUNCTION(17, "ANALOG")
+	),
+	STM32_PIN(
+		PINCTRL_PIN(121, "PH9"),
+		STM32_FUNCTION(0, "GPIOH9"),
+		STM32_FUNCTION(2, "TIM1_CH4"),
+		STM32_FUNCTION(3, "TIM12_CH2"),
+		STM32_FUNCTION(4, "TSC_SYNC"),
+		STM32_FUNCTION(6, "SPI4_SCK I2S4_CK"),
+		STM32_FUNCTION(7, "DCMIPP_D13"),
+		STM32_FUNCTION(10, "LCD_B5"),
+		STM32_FUNCTION(12, "LCD_DE"),
+		STM32_FUNCTION(13, "FMC_A20"),
+		STM32_FUNCTION(14, "DCMIPP_D9"),
+		STM32_FUNCTION(15, "DCMIPP_D8"),
+		STM32_FUNCTION(17, "ANALOG")
+	),
+	STM32_PIN(
+		PINCTRL_PIN(122, "PH10"),
+		STM32_FUNCTION(0, "GPIOH10"),
+		STM32_FUNCTION(1, "TRACED0"),
+		STM32_FUNCTION(3, "TIM5_CH1"),
+		STM32_FUNCTION(4, "SAI2_D3"),
+		STM32_FUNCTION(5, "DFSDM1_DATIN2"),
+		STM32_FUNCTION(6, "I2S3_MCK"),
+		STM32_FUNCTION(7, "SPI2_MOSI I2S2_SDO"),
+		STM32_FUNCTION(8, "USART3_CTS USART3_NSS"),
+		STM32_FUNCTION(9, "SDMMC1_D4"),
+		STM32_FUNCTION(14, "LCD_HSYNC"),
+		STM32_FUNCTION(15, "LCD_R2"),
+		STM32_FUNCTION(16, "HDP0"),
+		STM32_FUNCTION(17, "ANALOG")
+	),
+	STM32_PIN(
+		PINCTRL_PIN(123, "PH11"),
+		STM32_FUNCTION(0, "GPIOH11"),
+		STM32_FUNCTION(2, "SPI5_NSS"),
+		STM32_FUNCTION(3, "TIM5_CH2"),
+		STM32_FUNCTION(4, "SAI2_SD_A"),
+		STM32_FUNCTION(6, "SPI2_NSS I2S2_WS"),
+		STM32_FUNCTION(7, "I2C4_SCL"),
+		STM32_FUNCTION(8, "USART6_RX"),
+		STM32_FUNCTION(10, "QUADSPI_BK2_IO0"),
+		STM32_FUNCTION(12, "ETH2_MII_RX_CLK ETH2_RGMII_RX_CLK ETH2_RMII_REF_CLK"),
+		STM32_FUNCTION(13, "FMC_A12"),
+		STM32_FUNCTION(15, "LCD_G6"),
+		STM32_FUNCTION(17, "ANALOG")
+	),
+	STM32_PIN(
+		PINCTRL_PIN(124, "PH12"),
+		STM32_FUNCTION(0, "GPIOH12"),
+		STM32_FUNCTION(2, "USART2_TX"),
+		STM32_FUNCTION(3, "TIM5_CH3"),
+		STM32_FUNCTION(4, "DFSDM1_CKIN1"),
+		STM32_FUNCTION(5, "I2C3_SCL"),
+		STM32_FUNCTION(6, "SPI5_MOSI"),
+		STM32_FUNCTION(7, "SAI1_SCK_A"),
+		STM32_FUNCTION(10, "QUADSPI_BK2_IO2"),
+		STM32_FUNCTION(11, "SAI1_CK2"),
+		STM32_FUNCTION(12, "ETH1_MII_CRS"),
+		STM32_FUNCTION(13, "FMC_A6"),
+		STM32_FUNCTION(14, "DCMIPP_D3"),
+		STM32_FUNCTION(17, "ANALOG")
+	),
+	STM32_PIN(
+		PINCTRL_PIN(125, "PH13"),
+		STM32_FUNCTION(0, "GPIOH13"),
+		STM32_FUNCTION(1, "TRACED15"),
+		STM32_FUNCTION(3, "USART2_CK"),
+		STM32_FUNCTION(4, "TIM8_CH1N"),
+		STM32_FUNCTION(5, "I2C5_SCL"),
+		STM32_FUNCTION(7, "SPI3_SCK I2S3_CK"),
+		STM32_FUNCTION(9, "UART4_TX"),
+		STM32_FUNCTION(14, "LCD_G3"),
+		STM32_FUNCTION(15, "LCD_G2"),
+		STM32_FUNCTION(17, "ANALOG")
+	),
+	STM32_PIN(
+		PINCTRL_PIN(126, "PH14"),
+		STM32_FUNCTION(0, "GPIOH14"),
+		STM32_FUNCTION(4, "DFSDM1_DATIN2"),
+		STM32_FUNCTION(5, "I2C3_SDA"),
+		STM32_FUNCTION(7, "DCMIPP_D8"),
+		STM32_FUNCTION(9, "UART4_RX"),
+		STM32_FUNCTION(12, "LCD_B4"),
+		STM32_FUNCTION(14, "DCMIPP_D2"),
+		STM32_FUNCTION(15, "DCMIPP_PIXCLK"),
+		STM32_FUNCTION(17, "ANALOG")
+	),
+	STM32_PIN(
+		PINCTRL_PIN(128, "PI0"),
+		STM32_FUNCTION(0, "GPIOI0"),
+		STM32_FUNCTION(9, "SPDIFRX_IN0"),
+		STM32_FUNCTION(17, "ANALOG")
+	),
+	STM32_PIN(
+		PINCTRL_PIN(129, "PI1"),
+		STM32_FUNCTION(0, "GPIOI1"),
+		STM32_FUNCTION(9, "SPDIFRX_IN1"),
+		STM32_FUNCTION(17, "ANALOG")
+	),
+	STM32_PIN(
+		PINCTRL_PIN(130, "PI2"),
+		STM32_FUNCTION(0, "GPIOI2"),
+		STM32_FUNCTION(9, "SPDIFRX_IN2"),
+		STM32_FUNCTION(17, "ANALOG")
+	),
+	STM32_PIN(
+		PINCTRL_PIN(131, "PI3"),
+		STM32_FUNCTION(0, "GPIOI3"),
+		STM32_FUNCTION(9, "SPDIFRX_IN3"),
+		STM32_FUNCTION(12, "ETH1_MII_RX_ER"),
+		STM32_FUNCTION(17, "ANALOG")
+	),
+	STM32_PIN(
+		PINCTRL_PIN(132, "PI4"),
+		STM32_FUNCTION(0, "GPIOI4"),
+		STM32_FUNCTION(1, "BOOT0"),
+		STM32_FUNCTION(17, "ANALOG")
+	),
+	STM32_PIN(
+		PINCTRL_PIN(133, "PI5"),
+		STM32_FUNCTION(0, "GPIOI5"),
+		STM32_FUNCTION(1, "BOOT1"),
+		STM32_FUNCTION(17, "ANALOG")
+	),
+	STM32_PIN(
+		PINCTRL_PIN(134, "PI6"),
+		STM32_FUNCTION(0, "GPIOI6"),
+		STM32_FUNCTION(1, "BOOT2"),
+		STM32_FUNCTION(17, "ANALOG")
+	),
+	STM32_PIN(
+		PINCTRL_PIN(135, "PI7"),
+		STM32_FUNCTION(0, "GPIOI7"),
+		STM32_FUNCTION(17, "ANALOG")
+	),
+};
+
+static struct stm32_pinctrl_match_data stm32mp135_match_data = {
+	.pins = stm32mp135_pins,
+	.npins = ARRAY_SIZE(stm32mp135_pins),
+};
+
+static const struct of_device_id stm32mp135_pctrl_match[] = {
+	{
+		.compatible = "st,stm32mp135-pinctrl",
+		.data = &stm32mp135_match_data,
+	},
+	{ }
+};
+
+static const struct dev_pm_ops stm32_pinctrl_dev_pm_ops = {
+	 SET_LATE_SYSTEM_SLEEP_PM_OPS(NULL, stm32_pinctrl_resume)
+};
+
+static struct platform_driver stm32mp135_pinctrl_driver = {
+	.probe = stm32_pctl_probe,
+	.driver = {
+		.name = "stm32mp135-pinctrl",
+		.of_match_table = stm32mp135_pctrl_match,
+		.pm = &stm32_pinctrl_dev_pm_ops,
+	},
+};
+
+static int __init stm32mp135_pinctrl_init(void)
+{
+	return platform_driver_register(&stm32mp135_pinctrl_driver);
+}
+arch_initcall(stm32mp135_pinctrl_init);
diff --git a/drivers/platform/chrome/Makefile b/drivers/platform/chrome/Makefile
index 41baccba033f..f901d2e43166 100644
--- a/drivers/platform/chrome/Makefile
+++ b/drivers/platform/chrome/Makefile
@@ -20,7 +20,7 @@ obj-$(CONFIG_CROS_EC_CHARDEV)		+= cros_ec_chardev.o
 obj-$(CONFIG_CROS_EC_LIGHTBAR)		+= cros_ec_lightbar.o
 obj-$(CONFIG_CROS_EC_VBC)		+= cros_ec_vbc.o
 obj-$(CONFIG_CROS_EC_DEBUGFS)		+= cros_ec_debugfs.o
-cros-ec-sensorhub-objs			:= cros_ec_sensorhub.o cros_ec_sensorhub_ring.o
+cros-ec-sensorhub-objs			:= cros_ec_sensorhub.o cros_ec_sensorhub_ring.o cros_ec_trace.o
 obj-$(CONFIG_CROS_EC_SENSORHUB)		+= cros-ec-sensorhub.o
 obj-$(CONFIG_CROS_EC_SYSFS)		+= cros_ec_sysfs.o
 obj-$(CONFIG_CROS_USBPD_LOGGER)		+= cros_usbpd_logger.o
diff --git a/drivers/platform/chrome/cros_ec_proto.c b/drivers/platform/chrome/cros_ec_proto.c
index aa7f7aa77297..a7404d69b2d3 100644
--- a/drivers/platform/chrome/cros_ec_proto.c
+++ b/drivers/platform/chrome/cros_ec_proto.c
@@ -279,6 +279,15 @@ static int cros_ec_host_command_proto_query(struct cros_ec_device *ec_dev,
 	msg->insize = sizeof(struct ec_response_get_protocol_info);
 
 	ret = send_command(ec_dev, msg);
+	/*
+	 * Send command once again when timeout occurred.
+	 * Fingerprint MCU (FPMCU) is restarted during system boot which
+	 * introduces small window in which FPMCU won't respond for any
+	 * messages sent by kernel. There is no need to wait before next
+	 * attempt because we waited at least EC_MSG_DEADLINE_MS.
+	 */
+	if (ret == -ETIMEDOUT)
+		ret = send_command(ec_dev, msg);
 
 	if (ret < 0) {
 		dev_dbg(ec_dev->dev,
diff --git a/drivers/platform/chrome/cros_ec_sensorhub_ring.c b/drivers/platform/chrome/cros_ec_sensorhub_ring.c
index 8921f24e83ba..98e37080f760 100644
--- a/drivers/platform/chrome/cros_ec_sensorhub_ring.c
+++ b/drivers/platform/chrome/cros_ec_sensorhub_ring.c
@@ -17,6 +17,8 @@
 #include <linux/sort.h>
 #include <linux/slab.h>
 
+#include "cros_ec_trace.h"
+
 /* Precision of fixed point for the m values from the filter */
 #define M_PRECISION BIT(23)
 
@@ -291,6 +293,7 @@ cros_ec_sensor_ring_ts_filter_update(struct cros_ec_sensors_ts_filter_state
 		state->median_m = 0;
 		state->median_error = 0;
 	}
+	trace_cros_ec_sensorhub_filter(state, dx, dy);
 }
 
 /**
@@ -427,6 +430,11 @@ cros_ec_sensor_ring_process_event(struct cros_ec_sensorhub *sensorhub,
 			if (new_timestamp - *current_timestamp > 0)
 				*current_timestamp = new_timestamp;
 		}
+		trace_cros_ec_sensorhub_timestamp(in->timestamp,
+						  fifo_info->timestamp,
+						  fifo_timestamp,
+						  *current_timestamp,
+						  now);
 	}
 
 	if (in->flags & MOTIONSENSE_SENSOR_FLAG_ODR) {
@@ -460,6 +468,12 @@ cros_ec_sensor_ring_process_event(struct cros_ec_sensorhub *sensorhub,
 
 	/* Regular sample */
 	out->sensor_id = in->sensor_num;
+	trace_cros_ec_sensorhub_data(in->sensor_num,
+				     fifo_info->timestamp,
+				     fifo_timestamp,
+				     *current_timestamp,
+				     now);
+
 	if (*current_timestamp - now > 0) {
 		/*
 		 * This fix is needed to overcome the timestamp filter putting
diff --git a/drivers/platform/chrome/cros_ec_trace.h b/drivers/platform/chrome/cros_ec_trace.h
index f744b21bc655..7e7cfc98657a 100644
--- a/drivers/platform/chrome/cros_ec_trace.h
+++ b/drivers/platform/chrome/cros_ec_trace.h
@@ -15,6 +15,7 @@
 #include <linux/types.h>
 #include <linux/platform_data/cros_ec_commands.h>
 #include <linux/platform_data/cros_ec_proto.h>
+#include <linux/platform_data/cros_ec_sensorhub.h>
 
 #include <linux/tracepoint.h>
 
@@ -70,6 +71,99 @@ TRACE_EVENT(cros_ec_request_done,
 		  __entry->retval)
 );
 
+TRACE_EVENT(cros_ec_sensorhub_timestamp,
+	    TP_PROTO(u32 ec_sample_timestamp, u32 ec_fifo_timestamp, s64 fifo_timestamp,
+		     s64 current_timestamp, s64 current_time),
+	TP_ARGS(ec_sample_timestamp, ec_fifo_timestamp, fifo_timestamp, current_timestamp,
+		current_time),
+	TP_STRUCT__entry(
+		__field(u32, ec_sample_timestamp)
+		__field(u32, ec_fifo_timestamp)
+		__field(s64, fifo_timestamp)
+		__field(s64, current_timestamp)
+		__field(s64, current_time)
+		__field(s64, delta)
+	),
+	TP_fast_assign(
+		__entry->ec_sample_timestamp = ec_sample_timestamp;
+		__entry->ec_fifo_timestamp = ec_fifo_timestamp;
+		__entry->fifo_timestamp = fifo_timestamp;
+		__entry->current_timestamp = current_timestamp;
+		__entry->current_time = current_time;
+		__entry->delta = current_timestamp - current_time;
+	),
+	TP_printk("ec_ts: %9u, ec_fifo_ts: %9u, fifo_ts: %12lld, curr_ts: %12lld, curr_time: %12lld, delta %12lld",
+		  __entry->ec_sample_timestamp,
+		__entry->ec_fifo_timestamp,
+		__entry->fifo_timestamp,
+		__entry->current_timestamp,
+		__entry->current_time,
+		__entry->delta
+	)
+);
+
+TRACE_EVENT(cros_ec_sensorhub_data,
+	    TP_PROTO(u32 ec_sensor_num, u32 ec_fifo_timestamp, s64 fifo_timestamp,
+		     s64 current_timestamp, s64 current_time),
+	TP_ARGS(ec_sensor_num, ec_fifo_timestamp, fifo_timestamp, current_timestamp, current_time),
+	TP_STRUCT__entry(
+		__field(u32, ec_sensor_num)
+		__field(u32, ec_fifo_timestamp)
+		__field(s64, fifo_timestamp)
+		__field(s64, current_timestamp)
+		__field(s64, current_time)
+		__field(s64, delta)
+	),
+	TP_fast_assign(
+		__entry->ec_sensor_num = ec_sensor_num;
+		__entry->ec_fifo_timestamp = ec_fifo_timestamp;
+		__entry->fifo_timestamp = fifo_timestamp;
+		__entry->current_timestamp = current_timestamp;
+		__entry->current_time = current_time;
+		__entry->delta = current_timestamp - current_time;
+	),
+	TP_printk("ec_num: %4u, ec_fifo_ts: %9u, fifo_ts: %12lld, curr_ts: %12lld, curr_time: %12lld, delta %12lld",
+		  __entry->ec_sensor_num,
+		__entry->ec_fifo_timestamp,
+		__entry->fifo_timestamp,
+		__entry->current_timestamp,
+		__entry->current_time,
+		__entry->delta
+	)
+);
+
+TRACE_EVENT(cros_ec_sensorhub_filter,
+	    TP_PROTO(struct cros_ec_sensors_ts_filter_state *state, s64 dx, s64 dy),
+	TP_ARGS(state, dx, dy),
+	TP_STRUCT__entry(
+		__field(s64, dx)
+		__field(s64, dy)
+		__field(s64, median_m)
+		__field(s64, median_error)
+		__field(s64, history_len)
+		__field(s64, x)
+		__field(s64, y)
+	),
+	TP_fast_assign(
+		__entry->dx = dx;
+		__entry->dy = dy;
+		__entry->median_m = state->median_m;
+		__entry->median_error = state->median_error;
+		__entry->history_len = state->history_len;
+		__entry->x = state->x_offset;
+		__entry->y = state->y_offset;
+	),
+	TP_printk("dx: %12lld. dy: %12lld median_m: %12lld median_error: %12lld len: %lld x: %12lld y: %12lld",
+		  __entry->dx,
+		__entry->dy,
+		__entry->median_m,
+		__entry->median_error,
+		__entry->history_len,
+		__entry->x,
+		__entry->y
+	)
+);
+
 
 #endif /* _CROS_EC_TRACE_H_ */
 
diff --git a/drivers/platform/chrome/cros_ec_typec.c b/drivers/platform/chrome/cros_ec_typec.c
index 27c068c4c38d..262a891eded3 100644
--- a/drivers/platform/chrome/cros_ec_typec.c
+++ b/drivers/platform/chrome/cros_ec_typec.c
@@ -1054,24 +1054,6 @@ static int cros_typec_get_cmd_version(struct cros_typec_data *typec)
 	return 0;
 }
 
-/* Check the EC feature flags to see if TYPEC_* features are supported. */
-static int cros_typec_feature_supported(struct cros_typec_data *typec, enum ec_feature_code feature)
-{
-	struct ec_response_get_features resp = {};
-	int ret;
-
-	ret = cros_typec_ec_command(typec, 0, EC_CMD_GET_FEATURES, NULL, 0,
-				    &resp, sizeof(resp));
-	if (ret < 0) {
-		dev_warn(typec->dev,
-			 "Failed to get features, assuming typec feature=%d unsupported.\n",
-			 feature);
-		return 0;
-	}
-
-	return resp.flags[feature / 32] & EC_FEATURE_MASK_1(feature);
-}
-
 static void cros_typec_port_work(struct work_struct *work)
 {
 	struct cros_typec_data *typec = container_of(work, struct cros_typec_data, port_work);
@@ -1113,6 +1095,7 @@ MODULE_DEVICE_TABLE(of, cros_typec_of_match);
 
 static int cros_typec_probe(struct platform_device *pdev)
 {
+	struct cros_ec_dev *ec_dev = NULL;
 	struct device *dev = &pdev->dev;
 	struct cros_typec_data *typec;
 	struct ec_response_usb_pd_ports resp;
@@ -1132,10 +1115,10 @@ static int cros_typec_probe(struct platform_device *pdev)
 		return ret;
 	}
 
-	typec->typec_cmd_supported = !!cros_typec_feature_supported(typec,
-					EC_FEATURE_TYPEC_CMD);
-	typec->needs_mux_ack = !!cros_typec_feature_supported(typec,
-					EC_FEATURE_TYPEC_MUX_REQUIRE_AP_ACK);
+	ec_dev = dev_get_drvdata(&typec->ec->ec->dev);
+	typec->typec_cmd_supported = !!cros_ec_check_features(ec_dev, EC_FEATURE_TYPEC_CMD);
+	typec->needs_mux_ack = !!cros_ec_check_features(ec_dev,
+							EC_FEATURE_TYPEC_MUX_REQUIRE_AP_ACK);
 
 	ret = cros_typec_ec_command(typec, 0, EC_CMD_USB_PD_PORTS, NULL, 0,
 				    &resp, sizeof(resp));
diff --git a/drivers/platform/mellanox/mlxbf-pmc.c b/drivers/platform/mellanox/mlxbf-pmc.c
index 35883984251f..04bc3b50aa7a 100644
--- a/drivers/platform/mellanox/mlxbf-pmc.c
+++ b/drivers/platform/mellanox/mlxbf-pmc.c
@@ -79,7 +79,8 @@
 #define MLXBF_PMC_L3C_PERF_CNT_HIGH_VAL GENMASK(24, 0)
 
 /**
- * Structure to hold attribute and block info for each sysfs entry
+ * struct mlxbf_pmc_attribute - Structure to hold attribute and block info
+ * for each sysfs entry
  * @dev_attr: Device attribute struct
  * @index: index to identify counter number within a block
  * @nr: block number to which the sysfs belongs
@@ -91,7 +92,7 @@ struct mlxbf_pmc_attribute {
 };
 
 /**
- * Structure to hold info for each HW block
+ * struct mlxbf_pmc_block_info - Structure to hold info for each HW block
  *
  * @mmio_base: The VA at which the PMC block is mapped
  * @blk_size: Size of each mapped region
@@ -102,7 +103,7 @@ struct mlxbf_pmc_attribute {
  * @attr_event_list: Attributes for "event_list" sysfs files
  * @attr_enable: Attributes for "enable" sysfs files
  * @block_attr: All attributes needed for the block
- * @blcok_attr_grp: Attribute group for the block
+ * @block_attr_grp: Attribute group for the block
  */
 struct mlxbf_pmc_block_info {
 	void __iomem *mmio_base;
@@ -118,7 +119,7 @@ struct mlxbf_pmc_block_info {
 };
 
 /**
- * Structure to hold PMC context info
+ * struct mlxbf_pmc_context - Structure to hold PMC context info
  *
  * @pdev: The kernel structure representing the device
  * @total_blocks: Total number of blocks
@@ -127,7 +128,7 @@ struct mlxbf_pmc_block_info {
  * @block_name: Block name
  * @block:  Block info
  * @groups:  Attribute groups from each block
- * @sv_sreg_support: Whether SMCs are used to access performance registers
+ * @svc_sreg_support: Whether SMCs are used to access performance registers
  * @sreg_tbl_perf: Secure register access table number
  * @event_set: Event set to use
  */
@@ -145,7 +146,7 @@ struct mlxbf_pmc_context {
 };
 
 /**
- * Structure to hold supported events for each block
+ * struct mlxbf_pmc_events - Structure to hold supported events for each block
  * @evt_num: Event number used to program counters
  * @evt_name: Name of the event
  */
diff --git a/drivers/platform/surface/aggregator/Makefile b/drivers/platform/surface/aggregator/Makefile
index c8498c41e758..c0d550eda5cd 100644
--- a/drivers/platform/surface/aggregator/Makefile
+++ b/drivers/platform/surface/aggregator/Makefile
@@ -6,12 +6,9 @@ CFLAGS_core.o = -I$(src)
 
 obj-$(CONFIG_SURFACE_AGGREGATOR) += surface_aggregator.o
 
-surface_aggregator-objs := core.o
-surface_aggregator-objs += ssh_parser.o
-surface_aggregator-objs += ssh_packet_layer.o
-surface_aggregator-objs += ssh_request_layer.o
-surface_aggregator-objs += controller.o
-
-ifeq ($(CONFIG_SURFACE_AGGREGATOR_BUS),y)
-surface_aggregator-objs += bus.o
-endif
+surface_aggregator-y := core.o
+surface_aggregator-y += ssh_parser.o
+surface_aggregator-y += ssh_packet_layer.o
+surface_aggregator-y += ssh_request_layer.o
+surface_aggregator-$(CONFIG_SURFACE_AGGREGATOR_BUS) += bus.o
+surface_aggregator-y += controller.o
diff --git a/drivers/platform/surface/surface3_power.c b/drivers/platform/surface/surface3_power.c
index dea82aa1abd4..90c1568ea4e0 100644
--- a/drivers/platform/surface/surface3_power.c
+++ b/drivers/platform/surface/surface3_power.c
@@ -384,13 +384,7 @@ mshw0011_space_handler(u32 function, acpi_physical_address command,
 	if (ACPI_FAILURE(ret))
 		return ret;
 
-	if (!value64 || ares->type != ACPI_RESOURCE_TYPE_SERIAL_BUS) {
-		ret = AE_BAD_PARAMETER;
-		goto err;
-	}
-
-	sb = &ares->data.i2c_serial_bus;
-	if (sb->type != ACPI_RESOURCE_SERIAL_TYPE_I2C) {
+	if (!value64 || !i2c_acpi_get_i2c_resource(ares, &sb)) {
 		ret = AE_BAD_PARAMETER;
 		goto err;
 	}
diff --git a/drivers/platform/x86/Kconfig b/drivers/platform/x86/Kconfig
index d12db6c316ea..e21ea3d23e6f 100644
--- a/drivers/platform/x86/Kconfig
+++ b/drivers/platform/x86/Kconfig
@@ -77,28 +77,6 @@ config UV_SYSFS
 	  To compile this driver as a module, choose M here: the module will
 	  be called uv_sysfs.
 
-config INTEL_WMI_SBL_FW_UPDATE
-	tristate "Intel WMI Slim Bootloader firmware update signaling driver"
-	depends on ACPI_WMI
-	help
-	  Say Y here if you want to be able to use the WMI interface to signal
-	  Slim Bootloader to trigger update on next reboot.
-
-	  To compile this driver as a module, choose M here: the module will
-	  be called intel-wmi-sbl-fw-update.
-
-config INTEL_WMI_THUNDERBOLT
-	tristate "Intel WMI thunderbolt force power driver"
-	depends on ACPI_WMI
-	help
-	  Say Y here if you want to be able to use the WMI interface on select
-	  systems to force the power control of Intel Thunderbolt controllers.
-	  This is useful for updating the firmware when devices are not plugged
-	  into the controller.
-
-	  To compile this driver as a module, choose M here: the module will
-	  be called intel-wmi-thunderbolt.
-
 config MXM_WMI
        tristate "WMI support for MXM Laptop Graphics"
        depends on ACPI_WMI
@@ -281,6 +259,7 @@ config ASUS_WMI
 	select INPUT_SPARSEKMAP
 	select LEDS_CLASS
 	select NEW_LEDS
+	select ACPI_PLATFORM_PROFILE
 	help
 	  Say Y here if you have a WMI aware Asus laptop (like Eee PCs or new
 	  Asus Notebooks).
@@ -302,6 +281,19 @@ config ASUS_NB_WMI
 	  If you have an ACPI-WMI compatible Asus Notebook, say Y or M
 	  here.
 
+config MERAKI_MX100
+	tristate "Cisco Meraki MX100 Platform Driver"
+	depends on GPIOLIB
+	depends on GPIO_ICH
+	depends on LEDS_CLASS
+	select LEDS_GPIO
+	help
+	  This driver provides support for the front button and LEDs on
+	  the Cisco Meraki MX100 (Tinkerbell) 1U appliance.
+
+	  To compile this driver as a module, choose M here: the module
+	  will be called meraki-mx100.
+
 config EEEPC_LAPTOP
 	tristate "Eee PC Hotkey Driver"
 	depends on ACPI
@@ -654,105 +646,6 @@ config THINKPAD_LMI
 
 source "drivers/platform/x86/intel/Kconfig"
 
-config INTEL_ATOMISP2_LED
-	tristate "Intel AtomISP2 camera LED driver"
-	depends on GPIOLIB && LEDS_GPIO
-	help
-	  Many Bay Trail and Cherry Trail devices come with a camera attached
-	  to Intel's Image Signal Processor. Linux currently does not have a
-	  driver for these, so they do not work as a camera. Some of these
-	  camera's have a LED which is controlled through a GPIO.
-
-	  Some of these devices have a firmware issue where the LED gets turned
-	  on at boot. This driver will turn the LED off at boot and also allows
-	  controlling the LED (repurposing it) through the sysfs LED interface.
-
-	  Which GPIO is attached to the LED is usually not described in the
-	  ACPI tables, so this driver contains per-system info about the GPIO
-	  inside the driver, this means that this driver only works on systems
-	  the driver knows about.
-
-	  To compile this driver as a module, choose M here: the module
-	  will be called intel_atomisp2_led.
-
-config INTEL_ATOMISP2_PM
-	tristate "Intel AtomISP2 dummy / power-management driver"
-	depends on PCI && IOSF_MBI && PM
-	depends on !INTEL_ATOMISP
-	help
-	  Power-management driver for Intel's Image Signal Processor found on
-	  Bay Trail and Cherry Trail devices. This dummy driver's sole purpose
-	  is to turn the ISP off (put it in D3) to save power and to allow
-	  entering of S0ix modes.
-
-	  To compile this driver as a module, choose M here: the module
-	  will be called intel_atomisp2_pm.
-
-config INTEL_HID_EVENT
-	tristate "INTEL HID Event"
-	depends on ACPI
-	depends on INPUT
-	depends on I2C
-	select INPUT_SPARSEKMAP
-	help
-	  This driver provides support for the Intel HID Event hotkey interface.
-	  Some laptops require this driver for hotkey support.
-
-	  To compile this driver as a module, choose M here: the module will
-	  be called intel_hid.
-
-config INTEL_INT0002_VGPIO
-	tristate "Intel ACPI INT0002 Virtual GPIO driver"
-	depends on GPIOLIB && ACPI && PM_SLEEP
-	select GPIOLIB_IRQCHIP
-	help
-	  Some peripherals on Bay Trail and Cherry Trail platforms signal a
-	  Power Management Event (PME) to the Power Management Controller (PMC)
-	  to wakeup the system. When this happens software needs to explicitly
-	  clear the PME bus 0 status bit in the GPE0a_STS register to avoid an
-	  IRQ storm on IRQ 9.
-
-	  This is modelled in ACPI through the INT0002 ACPI device, which is
-	  called a "Virtual GPIO controller" in ACPI because it defines the
-	  event handler to call when the PME triggers through _AEI and _L02
-	  methods as would be done for a real GPIO interrupt in ACPI.
-
-	  To compile this driver as a module, choose M here: the module will
-	  be called intel_int0002_vgpio.
-
-config INTEL_MENLOW
-	tristate "Thermal Management driver for Intel menlow platform"
-	depends on ACPI_THERMAL
-	select THERMAL
-	help
-	  ACPI thermal management enhancement driver on
-	  Intel Menlow platform.
-
-	  If unsure, say N.
-
-config INTEL_OAKTRAIL
-	tristate "Intel Oaktrail Platform Extras"
-	depends on ACPI
-	depends on ACPI_VIDEO || ACPI_VIDEO = n
-	depends on RFKILL && BACKLIGHT_CLASS_DEVICE && ACPI
-	help
-	  Intel Oaktrail platform need this driver to provide interfaces to
-	  enable/disable the Camera, WiFi, BT etc. devices. If in doubt, say Y
-	  here; it will only load on supported platforms.
-
-config INTEL_VBTN
-	tristate "INTEL VIRTUAL BUTTON"
-	depends on ACPI
-	depends on INPUT
-	depends on I2C
-	select INPUT_SPARSEKMAP
-	help
-	  This driver provides support for the Intel Virtual Button interface.
-	  Some laptops require this driver for power button support.
-
-	  To compile this driver as a module, choose M here: the module will
-	  be called intel_vbtn.
-
 config MSI_LAPTOP
 	tristate "MSI Laptop Extras"
 	depends on ACPI
@@ -1106,150 +999,6 @@ config INTEL_IPS
 	  functionality.  If in doubt, say Y here; it will only load on
 	  supported platforms.
 
-config INTEL_RST
-        tristate "Intel Rapid Start Technology Driver"
-	depends on ACPI
-	help
-	  This driver provides support for modifying parameters on systems
-	  equipped with Intel's Rapid Start Technology. When put in an ACPI
-	  sleep state, these devices will wake after either a configured
-	  timeout or when the system battery reaches a critical state,
-	  automatically copying memory contents to disk. On resume, the
-	  firmware will copy the memory contents back to RAM and resume the OS
-	  as usual.
-
-config INTEL_SMARTCONNECT
-        tristate "Intel Smart Connect disabling driver"
-	depends on ACPI
-	help
-	  Intel Smart Connect is a technology intended to permit devices to
-	  update state by resuming for a short period of time at regular
-	  intervals. If a user enables this functionality under Windows and
-	  then reboots into Linux, the system may remain configured to resume
-	  on suspend. In the absence of any userspace to support it, the system
-	  will then remain awake until something triggers another suspend.
-
-	  This driver checks to determine whether the device has Intel Smart
-	  Connect enabled, and if so disables it.
-
-source "drivers/platform/x86/intel_speed_select_if/Kconfig"
-
-config INTEL_TURBO_MAX_3
-	bool "Intel Turbo Boost Max Technology 3.0 enumeration driver"
-	depends on X86_64 && SCHED_MC_PRIO
-	help
-	  This driver reads maximum performance ratio of each CPU and set up
-	  the scheduler priority metrics. In this way scheduler can prefer
-	  CPU with higher performance to schedule tasks.
-	  This driver is only required when the system is not using Hardware
-	  P-States (HWP). In HWP mode, priority can be read from ACPI tables.
-
-config INTEL_UNCORE_FREQ_CONTROL
-	tristate "Intel Uncore frequency control driver"
-	depends on X86_64
-	help
-	  This driver allows control of uncore frequency limits on
-	  supported server platforms.
-	  Uncore frequency controls RING/LLC (last-level cache) clocks.
-
-	  To compile this driver as a module, choose M here: the module
-	  will be called intel-uncore-frequency.
-
-config INTEL_BXTWC_PMIC_TMU
-	tristate "Intel BXT Whiskey Cove TMU Driver"
-	depends on REGMAP
-	depends on MFD_INTEL_PMC_BXT
-	depends on INTEL_SOC_PMIC_BXTWC
-	help
-	  Select this driver to use Intel BXT Whiskey Cove PMIC TMU feature.
-	  This driver enables the alarm wakeup functionality in the TMU unit
-	  of Whiskey Cove PMIC.
-
-config INTEL_CHTDC_TI_PWRBTN
-	tristate "Intel Cherry Trail Dollar Cove TI power button driver"
-	depends on INTEL_SOC_PMIC_CHTDC_TI
-	depends on INPUT
-	help
-	  This option adds a power button driver driver for Dollar Cove TI
-	  PMIC on Intel Cherry Trail devices.
-
-	  To compile this driver as a module, choose M here: the module
-	  will be called intel_chtdc_ti_pwrbtn.
-
-config INTEL_MRFLD_PWRBTN
-	tristate "Intel Merrifield Basin Cove power button driver"
-	depends on INTEL_SOC_PMIC_MRFLD
-	depends on INPUT
-	help
-	  This option adds a power button driver for Basin Cove PMIC
-	  on Intel Merrifield devices.
-
-	  To compile this driver as a module, choose M here: the module
-	  will be called intel_mrfld_pwrbtn.
-
-config INTEL_PMC_CORE
-	tristate "Intel PMC Core driver"
-	depends on PCI
-	depends on ACPI
-	help
-	  The Intel Platform Controller Hub for Intel Core SoCs provides access
-	  to Power Management Controller registers via various interfaces. This
-	  driver can utilize debugging capabilities and supported features as
-	  exposed by the Power Management Controller. It also may perform some
-	  tasks in the PMC in order to enable transition into the SLPS0 state.
-	  It should be selected on all Intel platforms supported by the driver.
-
-	  Supported features:
-		- SLP_S0_RESIDENCY counter
-		- PCH IP Power Gating status
-		- LTR Ignore / LTR Show
-		- MPHY/PLL gating status (Sunrisepoint PCH only)
-		- SLPS0 Debug registers (Cannonlake/Icelake PCH)
-		- Low Power Mode registers (Tigerlake and beyond)
-		- PMC quirks as needed to enable SLPS0/S0ix
-
-config INTEL_PMT_CLASS
-	tristate
-	help
-	  The Intel Platform Monitoring Technology (PMT) class driver provides
-	  the basic sysfs interface and file hierarchy used by PMT devices.
-
-	  For more information, see:
-	  <file:Documentation/ABI/testing/sysfs-class-intel_pmt>
-
-	  To compile this driver as a module, choose M here: the module
-	  will be called intel_pmt_class.
-
-config INTEL_PMT_TELEMETRY
-	tristate "Intel Platform Monitoring Technology (PMT) Telemetry driver"
-	depends on MFD_INTEL_PMT
-	select INTEL_PMT_CLASS
-	help
-	  The Intel Platform Monitory Technology (PMT) Telemetry driver provides
-	  access to hardware telemetry metrics on devices that support the
-	  feature.
-
-	  To compile this driver as a module, choose M here: the module
-	  will be called intel_pmt_telemetry.
-
-config INTEL_PMT_CRASHLOG
-	tristate "Intel Platform Monitoring Technology (PMT) Crashlog driver"
-	depends on MFD_INTEL_PMT
-	select INTEL_PMT_CLASS
-	help
-	  The Intel Platform Monitoring Technology (PMT) crashlog driver provides
-	  access to hardware crashlog capabilities on devices that support the
-	  feature.
-
-	  To compile this driver as a module, choose M here: the module
-	  will be called intel_pmt_crashlog.
-
-config INTEL_PUNIT_IPC
-	tristate "Intel P-Unit IPC Driver"
-	help
-	  This driver provides support for Intel P-Unit Mailbox IPC mechanism,
-	  which is used to bridge the communications between kernel and P-Unit.
-
 config INTEL_SCU_IPC
 	bool
 
@@ -1297,18 +1046,6 @@ config INTEL_SCU_IPC_UTIL
 	  low level access for debug work and updating the firmware. Say
 	  N unless you will be doing this on an Intel MID platform.
 
-config INTEL_TELEMETRY
-	tristate "Intel SoC Telemetry Driver"
-	depends on X86_64
-	depends on MFD_INTEL_PMC_BXT
-	depends on INTEL_PUNIT_IPC
-	help
-	  This driver provides interfaces to configure and use
-	  telemetry for INTEL SoC from APL onwards. It is also
-	  used to get various SoC events and parameters
-	  directly via debugfs files. Various tools may use
-	  this interface for SoC state monitoring.
-
 endif # X86_PLATFORM_DEVICES
 
 config PMC_ATOM
diff --git a/drivers/platform/x86/Makefile b/drivers/platform/x86/Makefile
index 7ee369aab10d..69690e26bb6d 100644
--- a/drivers/platform/x86/Makefile
+++ b/drivers/platform/x86/Makefile
@@ -10,8 +10,6 @@ obj-$(CONFIG_WMI_BMOF)		+= wmi-bmof.o
 
 # WMI drivers
 obj-$(CONFIG_HUAWEI_WMI)		+= huawei-wmi.o
-obj-$(CONFIG_INTEL_WMI_SBL_FW_UPDATE)	+= intel-wmi-sbl-fw-update.o
-obj-$(CONFIG_INTEL_WMI_THUNDERBOLT)	+= intel-wmi-thunderbolt.o
 obj-$(CONFIG_MXM_WMI)			+= mxm-wmi.o
 obj-$(CONFIG_PEAQ_WMI)			+= peaq-wmi.o
 obj-$(CONFIG_XIAOMI_WMI)		+= xiaomi-wmi.o
@@ -39,6 +37,9 @@ obj-$(CONFIG_ASUS_NB_WMI)	+= asus-nb-wmi.o
 obj-$(CONFIG_EEEPC_LAPTOP)	+= eeepc-laptop.o
 obj-$(CONFIG_EEEPC_WMI)		+= eeepc-wmi.o
 
+# Cisco/Meraki
+obj-$(CONFIG_MERAKI_MX100)	+= meraki-mx100.o
+
 # Dell
 obj-$(CONFIG_X86_PLATFORM_DRIVERS_DELL)		+= dell/
 
@@ -68,14 +69,6 @@ obj-$(CONFIG_THINKPAD_LMI)	+= think-lmi.o
 # Intel
 obj-$(CONFIG_X86_PLATFORM_DRIVERS_INTEL)		+= intel/
 
-obj-$(CONFIG_INTEL_ATOMISP2_LED)	+= intel_atomisp2_led.o
-obj-$(CONFIG_INTEL_ATOMISP2_PM)		+= intel_atomisp2_pm.o
-obj-$(CONFIG_INTEL_HID_EVENT)		+= intel-hid.o
-obj-$(CONFIG_INTEL_INT0002_VGPIO)	+= intel_int0002_vgpio.o
-obj-$(CONFIG_INTEL_MENLOW)		+= intel_menlow.o
-obj-$(CONFIG_INTEL_OAKTRAIL)		+= intel_oaktrail.o
-obj-$(CONFIG_INTEL_VBTN)		+= intel-vbtn.o
-
 # MSI
 obj-$(CONFIG_MSI_LAPTOP)	+= msi-laptop.o
 obj-$(CONFIG_MSI_WMI)		+= msi-wmi.o
@@ -118,27 +111,11 @@ obj-$(CONFIG_WIRELESS_HOTKEY)		+= wireless-hotkey.o
 
 # Intel uncore drivers
 obj-$(CONFIG_INTEL_IPS)				+= intel_ips.o
-obj-$(CONFIG_INTEL_RST)				+= intel-rst.o
-obj-$(CONFIG_INTEL_SMARTCONNECT)		+= intel-smartconnect.o
-obj-$(CONFIG_INTEL_SPEED_SELECT_INTERFACE)	+= intel_speed_select_if/
-obj-$(CONFIG_INTEL_TURBO_MAX_3)			+= intel_turbo_max_3.o
-obj-$(CONFIG_INTEL_UNCORE_FREQ_CONTROL)		+= intel-uncore-frequency.o
 
 # Intel PMIC / PMC / P-Unit devices
-obj-$(CONFIG_INTEL_BXTWC_PMIC_TMU)	+= intel_bxtwc_tmu.o
-obj-$(CONFIG_INTEL_CHTDC_TI_PWRBTN)	+= intel_chtdc_ti_pwrbtn.o
-obj-$(CONFIG_INTEL_MRFLD_PWRBTN)	+= intel_mrfld_pwrbtn.o
-obj-$(CONFIG_INTEL_PMC_CORE)		+= intel_pmc_core.o intel_pmc_core_pltdrv.o
-obj-$(CONFIG_INTEL_PMT_CLASS)		+= intel_pmt_class.o
-obj-$(CONFIG_INTEL_PMT_TELEMETRY)	+= intel_pmt_telemetry.o
-obj-$(CONFIG_INTEL_PMT_CRASHLOG)	+= intel_pmt_crashlog.o
-obj-$(CONFIG_INTEL_PUNIT_IPC)		+= intel_punit_ipc.o
 obj-$(CONFIG_INTEL_SCU_IPC)		+= intel_scu_ipc.o
 obj-$(CONFIG_INTEL_SCU_PCI)		+= intel_scu_pcidrv.o
 obj-$(CONFIG_INTEL_SCU_PLATFORM)	+= intel_scu_pltdrv.o
 obj-$(CONFIG_INTEL_SCU_WDT)		+= intel_scu_wdt.o
 obj-$(CONFIG_INTEL_SCU_IPC_UTIL)	+= intel_scu_ipcutil.o
-obj-$(CONFIG_INTEL_TELEMETRY)		+= intel_telemetry_core.o \
-					   intel_telemetry_pltdrv.o \
-					   intel_telemetry_debugfs.o
 obj-$(CONFIG_PMC_ATOM)			+= pmc_atom.o
diff --git a/drivers/platform/x86/acer-wmi.c b/drivers/platform/x86/acer-wmi.c
index 85db9403cc14..694b45ed06a2 100644
--- a/drivers/platform/x86/acer-wmi.c
+++ b/drivers/platform/x86/acer-wmi.c
@@ -60,6 +60,11 @@ MODULE_LICENSE("GPL");
 #define ACER_WMID_GET_THREEG_METHODID		10
 #define ACER_WMID_SET_THREEG_METHODID		11
 
+#define ACER_WMID_SET_GAMING_LED_METHODID 2
+#define ACER_WMID_GET_GAMING_LED_METHODID 4
+#define ACER_WMID_SET_GAMING_FAN_BEHAVIOR 14
+#define ACER_WMID_SET_GAMING_MISC_SETTING_METHODID 22
+
 /*
  * Acer ACPI method GUIDs
  */
@@ -68,6 +73,7 @@ MODULE_LICENSE("GPL");
 #define WMID_GUID1		"6AF4F258-B401-42FD-BE91-3D4AC2D7C0D3"
 #define WMID_GUID2		"95764E09-FB56-4E83-B31A-37761F60994A"
 #define WMID_GUID3		"61EF69EA-865C-4BC3-A502-A0DEBA0CB531"
+#define WMID_GUID4		"7A4DDFE7-5B5D-40B4-8595-4408E0CC7F56"
 
 /*
  * Acer ACPI event GUIDs
@@ -81,6 +87,7 @@ MODULE_ALIAS("wmi:676AA15E-6A47-4D9F-A2CC-1E6D18D14026");
 enum acer_wmi_event_ids {
 	WMID_HOTKEY_EVENT = 0x1,
 	WMID_ACCEL_OR_KBD_DOCK_EVENT = 0x5,
+	WMID_GAMING_TURBO_KEY_EVENT = 0x7,
 };
 
 static const struct key_entry acer_wmi_keymap[] __initconst = {
@@ -215,6 +222,9 @@ struct hotkey_function_type_aa {
 #define ACER_CAP_THREEG			BIT(4)
 #define ACER_CAP_SET_FUNCTION_MODE	BIT(5)
 #define ACER_CAP_KBD_DOCK		BIT(6)
+#define ACER_CAP_TURBO_OC     BIT(7)
+#define ACER_CAP_TURBO_LED     BIT(8)
+#define ACER_CAP_TURBO_FAN     BIT(9)
 
 /*
  * Interface type flags
@@ -301,6 +311,9 @@ struct quirk_entry {
 	u8 mailled;
 	s8 brightness;
 	u8 bluetooth;
+	u8 turbo;
+	u8 cpu_fans;
+	u8 gpu_fans;
 };
 
 static struct quirk_entry *quirks;
@@ -312,6 +325,10 @@ static void __init set_quirks(void)
 
 	if (quirks->brightness)
 		interface->capability |= ACER_CAP_BRIGHTNESS;
+
+	if (quirks->turbo)
+		interface->capability |= ACER_CAP_TURBO_OC | ACER_CAP_TURBO_LED
+					 | ACER_CAP_TURBO_FAN;
 }
 
 static int __init dmi_matched(const struct dmi_system_id *dmi)
@@ -340,6 +357,12 @@ static struct quirk_entry quirk_acer_travelmate_2490 = {
 	.mailled = 1,
 };
 
+static struct quirk_entry quirk_acer_predator_ph315_53 = {
+	.turbo = 1,
+	.cpu_fans = 1,
+	.gpu_fans = 1,
+};
+
 /* This AMW0 laptop has no bluetooth */
 static struct quirk_entry quirk_medion_md_98300 = {
 	.wireless = 1,
@@ -508,6 +531,15 @@ static const struct dmi_system_id acer_quirks[] __initconst = {
 		.driver_data = &quirk_acer_travelmate_2490,
 	},
 	{
+		.callback = dmi_matched,
+		.ident = "Acer Predator PH315-53",
+		.matches = {
+			DMI_MATCH(DMI_SYS_VENDOR, "Acer"),
+			DMI_MATCH(DMI_PRODUCT_NAME, "Predator PH315-53"),
+		},
+		.driver_data = &quirk_acer_predator_ph315_53,
+	},
+	{
 		.callback = set_force_caps,
 		.ident = "Acer Aspire Switch 10E SW3-016",
 		.matches = {
@@ -1345,6 +1377,114 @@ static struct wmi_interface wmid_v2_interface = {
 };
 
 /*
+ * WMID Gaming interface
+ */
+
+static acpi_status
+WMI_gaming_execute_u64(u32 method_id, u64 in, u64 *out)
+{
+	struct acpi_buffer input = { (acpi_size) sizeof(u64), (void *)(&in) };
+	struct acpi_buffer result = { ACPI_ALLOCATE_BUFFER, NULL };
+	union acpi_object *obj;
+	u32 tmp = 0;
+	acpi_status status;
+
+	status = wmi_evaluate_method(WMID_GUID4, 0, method_id, &input, &result);
+
+	if (ACPI_FAILURE(status))
+		return status;
+	obj = (union acpi_object *) result.pointer;
+
+	if (obj) {
+		if (obj->type == ACPI_TYPE_BUFFER) {
+			if (obj->buffer.length == sizeof(u32))
+				tmp = *((u32 *) obj->buffer.pointer);
+			else if (obj->buffer.length == sizeof(u64))
+				tmp = *((u64 *) obj->buffer.pointer);
+		} else if (obj->type == ACPI_TYPE_INTEGER) {
+			tmp = (u64) obj->integer.value;
+		}
+	}
+
+	if (out)
+		*out = tmp;
+
+	kfree(result.pointer);
+
+	return status;
+}
+
+static acpi_status WMID_gaming_set_u64(u64 value, u32 cap)
+{
+	u32 method_id = 0;
+
+	if (!(interface->capability & cap))
+		return AE_BAD_PARAMETER;
+
+	switch (cap) {
+	case ACER_CAP_TURBO_LED:
+		method_id = ACER_WMID_SET_GAMING_LED_METHODID;
+		break;
+	case ACER_CAP_TURBO_FAN:
+		method_id = ACER_WMID_SET_GAMING_FAN_BEHAVIOR;
+		break;
+	case ACER_CAP_TURBO_OC:
+		method_id = ACER_WMID_SET_GAMING_MISC_SETTING_METHODID;
+		break;
+	default:
+		return AE_BAD_PARAMETER;
+	}
+
+	return WMI_gaming_execute_u64(method_id, value, NULL);
+}
+
+static acpi_status WMID_gaming_get_u64(u64 *value, u32 cap)
+{
+	acpi_status status;
+	u64 result;
+	u64 input;
+	u32 method_id;
+
+	if (!(interface->capability & cap))
+		return AE_BAD_PARAMETER;
+
+	switch (cap) {
+	case ACER_CAP_TURBO_LED:
+		method_id = ACER_WMID_GET_GAMING_LED_METHODID;
+		input = 0x1;
+		break;
+	default:
+		return AE_BAD_PARAMETER;
+	}
+	status = WMI_gaming_execute_u64(method_id, input, &result);
+	if (ACPI_SUCCESS(status))
+		*value = (u64) result;
+
+	return status;
+}
+
+static void WMID_gaming_set_fan_mode(u8 fan_mode)
+{
+	/* fan_mode = 1 is used for auto, fan_mode = 2 used for turbo*/
+	u64 gpu_fan_config1 = 0, gpu_fan_config2 = 0;
+	int i;
+
+	if (quirks->cpu_fans > 0)
+		gpu_fan_config2 |= 1;
+	for (i = 0; i < (quirks->cpu_fans + quirks->gpu_fans); ++i)
+		gpu_fan_config2 |= 1 << (i + 1);
+	for (i = 0; i < quirks->gpu_fans; ++i)
+		gpu_fan_config2 |= 1 << (i + 3);
+	if (quirks->cpu_fans > 0)
+		gpu_fan_config1 |= fan_mode;
+	for (i = 0; i < (quirks->cpu_fans + quirks->gpu_fans); ++i)
+		gpu_fan_config1 |= fan_mode << (2 * i + 2);
+	for (i = 0; i < quirks->gpu_fans; ++i)
+		gpu_fan_config1 |= fan_mode << (2 * i + 6);
+	WMID_gaming_set_u64(gpu_fan_config2 | gpu_fan_config1 << 16, ACER_CAP_TURBO_FAN);
+}
+
+/*
  * Generic Device (interface-independent)
  */
 
@@ -1576,6 +1716,41 @@ static int acer_gsensor_event(void)
 }
 
 /*
+ *  Predator series turbo button
+ */
+static int acer_toggle_turbo(void)
+{
+	u64 turbo_led_state;
+
+	/* Get current state from turbo button */
+	if (ACPI_FAILURE(WMID_gaming_get_u64(&turbo_led_state, ACER_CAP_TURBO_LED)))
+		return -1;
+
+	if (turbo_led_state) {
+		/* Turn off turbo led */
+		WMID_gaming_set_u64(0x1, ACER_CAP_TURBO_LED);
+
+		/* Set FAN mode to auto */
+		WMID_gaming_set_fan_mode(0x1);
+
+		/* Set OC to normal */
+		WMID_gaming_set_u64(0x5, ACER_CAP_TURBO_OC);
+		WMID_gaming_set_u64(0x7, ACER_CAP_TURBO_OC);
+	} else {
+		/* Turn on turbo led */
+		WMID_gaming_set_u64(0x10001, ACER_CAP_TURBO_LED);
+
+		/* Set FAN mode to turbo */
+		WMID_gaming_set_fan_mode(0x2);
+
+		/* Set OC to turbo mode */
+		WMID_gaming_set_u64(0x205, ACER_CAP_TURBO_OC);
+		WMID_gaming_set_u64(0x207, ACER_CAP_TURBO_OC);
+	}
+	return turbo_led_state;
+}
+
+/*
  * Switch series keyboard dock status
  */
 static int acer_kbd_dock_state_to_sw_tablet_mode(u8 kbd_dock_state)
@@ -1872,6 +2047,10 @@ static void acer_wmi_notify(u32 value, void *context)
 		acer_gsensor_event();
 		acer_kbd_dock_event(&return_value);
 		break;
+	case WMID_GAMING_TURBO_KEY_EVENT:
+		if (return_value.key_num == 0x4)
+			acer_toggle_turbo();
+		break;
 	default:
 		pr_warn("Unknown function number - %d - %d\n",
 			return_value.function, return_value.key_num);
diff --git a/drivers/platform/x86/asus-wmi.c b/drivers/platform/x86/asus-wmi.c
index ebaeb7bb80f5..e14fb5fa7324 100644
--- a/drivers/platform/x86/asus-wmi.c
+++ b/drivers/platform/x86/asus-wmi.c
@@ -26,6 +26,7 @@
 #include <linux/rfkill.h>
 #include <linux/pci.h>
 #include <linux/pci_hotplug.h>
+#include <linux/platform_profile.h>
 #include <linux/power_supply.h>
 #include <linux/hwmon.h>
 #include <linux/hwmon-sysfs.h>
@@ -210,12 +211,24 @@ struct asus_wmi {
 	u8 fan_boost_mode_mask;
 	u8 fan_boost_mode;
 
+	bool egpu_enable_available; // 0 = enable
+	bool egpu_enable;
+
+	bool dgpu_disable_available;
+	bool dgpu_disable;
+
 	bool throttle_thermal_policy_available;
 	u8 throttle_thermal_policy_mode;
 
+	struct platform_profile_handler platform_profile_handler;
+	bool platform_profile_support;
+
 	// The RSOC controls the maximum charging percentage.
 	bool battery_rsoc_available;
 
+	bool panel_overdrive_available;
+	bool panel_overdrive;
+
 	struct hotplug_slot hotplug_slot;
 	struct mutex hotplug_lock;
 	struct mutex wmi_lock;
@@ -424,6 +437,181 @@ static void lid_flip_tablet_mode_get_state(struct asus_wmi *asus)
 	}
 }
 
+/* dGPU ********************************************************************/
+static int dgpu_disable_check_present(struct asus_wmi *asus)
+{
+	u32 result;
+	int err;
+
+	asus->dgpu_disable_available = false;
+
+	err = asus_wmi_get_devstate(asus, ASUS_WMI_DEVID_DGPU, &result);
+	if (err) {
+		if (err == -ENODEV)
+			return 0;
+		return err;
+	}
+
+	if (result & ASUS_WMI_DSTS_PRESENCE_BIT) {
+		asus->dgpu_disable_available = true;
+		asus->dgpu_disable = result & ASUS_WMI_DSTS_STATUS_BIT;
+	}
+
+	return 0;
+}
+
+static int dgpu_disable_write(struct asus_wmi *asus)
+{
+	u32 retval;
+	u8 value;
+	int err;
+
+	/* Don't rely on type conversion */
+	value = asus->dgpu_disable ? 1 : 0;
+
+	err = asus_wmi_set_devstate(ASUS_WMI_DEVID_DGPU, value, &retval);
+	if (err) {
+		pr_warn("Failed to set dgpu disable: %d\n", err);
+		return err;
+	}
+
+	if (retval > 1) {
+		pr_warn("Failed to set dgpu disable (retval): 0x%x\n", retval);
+		return -EIO;
+	}
+
+	sysfs_notify(&asus->platform_device->dev.kobj, NULL, "dgpu_disable");
+
+	return 0;
+}
+
+static ssize_t dgpu_disable_show(struct device *dev,
+				   struct device_attribute *attr, char *buf)
+{
+	struct asus_wmi *asus = dev_get_drvdata(dev);
+	u8 mode = asus->dgpu_disable;
+
+	return sysfs_emit(buf, "%d\n", mode);
+}
+
+/*
+ * A user may be required to store the value twice, typcial store first, then
+ * rescan PCI bus to activate power, then store a second time to save correctly.
+ * The reason for this is that an extra code path in the ACPI is enabled when
+ * the device and bus are powered.
+ */
+static ssize_t dgpu_disable_store(struct device *dev,
+				    struct device_attribute *attr,
+				    const char *buf, size_t count)
+{
+	bool disable;
+	int result;
+
+	struct asus_wmi *asus = dev_get_drvdata(dev);
+
+	result = kstrtobool(buf, &disable);
+	if (result)
+		return result;
+
+	asus->dgpu_disable = disable;
+
+	result = dgpu_disable_write(asus);
+	if (result)
+		return result;
+
+	return count;
+}
+
+static DEVICE_ATTR_RW(dgpu_disable);
+
+/* eGPU ********************************************************************/
+static int egpu_enable_check_present(struct asus_wmi *asus)
+{
+	u32 result;
+	int err;
+
+	asus->egpu_enable_available = false;
+
+	err = asus_wmi_get_devstate(asus, ASUS_WMI_DEVID_EGPU, &result);
+	if (err) {
+		if (err == -ENODEV)
+			return 0;
+		return err;
+	}
+
+	if (result & ASUS_WMI_DSTS_PRESENCE_BIT) {
+		asus->egpu_enable_available = true;
+		asus->egpu_enable = result & ASUS_WMI_DSTS_STATUS_BIT;
+	}
+
+	return 0;
+}
+
+static int egpu_enable_write(struct asus_wmi *asus)
+{
+	u32 retval;
+	u8 value;
+	int err;
+
+	/* Don't rely on type conversion */
+	value = asus->egpu_enable ? 1 : 0;
+
+	err = asus_wmi_set_devstate(ASUS_WMI_DEVID_EGPU, value, &retval);
+
+	if (err) {
+		pr_warn("Failed to set egpu disable: %d\n", err);
+		return err;
+	}
+
+	if (retval > 1) {
+		pr_warn("Failed to set egpu disable (retval): 0x%x\n", retval);
+		return -EIO;
+	}
+
+	sysfs_notify(&asus->platform_device->dev.kobj, NULL, "egpu_enable");
+
+	return 0;
+}
+
+static ssize_t egpu_enable_show(struct device *dev,
+				   struct device_attribute *attr, char *buf)
+{
+	struct asus_wmi *asus = dev_get_drvdata(dev);
+	bool mode = asus->egpu_enable;
+
+	return sysfs_emit(buf, "%d\n", mode);
+}
+
+/* The ACPI call to enable the eGPU also disables the internal dGPU */
+static ssize_t egpu_enable_store(struct device *dev,
+				    struct device_attribute *attr,
+				    const char *buf, size_t count)
+{
+	bool enable;
+	int result;
+
+	struct asus_wmi *asus = dev_get_drvdata(dev);
+
+	result = kstrtobool(buf, &enable);
+	if (result)
+		return result;
+
+	asus->egpu_enable = enable;
+
+	result = egpu_enable_write(asus);
+	if (result)
+		return result;
+
+	/* Ensure that the kernel status of dgpu is updated */
+	result = dgpu_disable_check_present(asus);
+	if (result)
+		return result;
+
+	return count;
+}
+
+static DEVICE_ATTR_RW(egpu_enable);
+
 /* Battery ********************************************************************/
 
 /* The battery maximum charging percentage */
@@ -1221,6 +1409,87 @@ exit:
 	return result;
 }
 
+/* Panel Overdrive ************************************************************/
+static int panel_od_check_present(struct asus_wmi *asus)
+{
+	u32 result;
+	int err;
+
+	asus->panel_overdrive_available = false;
+
+	err = asus_wmi_get_devstate(asus, ASUS_WMI_DEVID_PANEL_OD, &result);
+	if (err) {
+		if (err == -ENODEV)
+			return 0;
+		return err;
+	}
+
+	if (result & ASUS_WMI_DSTS_PRESENCE_BIT) {
+		asus->panel_overdrive_available = true;
+		asus->panel_overdrive = result & ASUS_WMI_DSTS_STATUS_BIT;
+	}
+
+	return 0;
+}
+
+static int panel_od_write(struct asus_wmi *asus)
+{
+	u32 retval;
+	u8 value;
+	int err;
+
+	/* Don't rely on type conversion */
+	value = asus->panel_overdrive ? 1 : 0;
+
+	err = asus_wmi_set_devstate(ASUS_WMI_DEVID_PANEL_OD, value, &retval);
+
+	if (err) {
+		pr_warn("Failed to set panel overdrive: %d\n", err);
+		return err;
+	}
+
+	if (retval > 1) {
+		pr_warn("Failed to set panel overdrive (retval): 0x%x\n", retval);
+		return -EIO;
+	}
+
+	sysfs_notify(&asus->platform_device->dev.kobj, NULL, "panel_od");
+
+	return 0;
+}
+
+static ssize_t panel_od_show(struct device *dev,
+				   struct device_attribute *attr, char *buf)
+{
+	struct asus_wmi *asus = dev_get_drvdata(dev);
+
+	return sysfs_emit(buf, "%d\n", asus->panel_overdrive);
+}
+
+static ssize_t panel_od_store(struct device *dev,
+				    struct device_attribute *attr,
+				    const char *buf, size_t count)
+{
+	bool overdrive;
+	int result;
+
+	struct asus_wmi *asus = dev_get_drvdata(dev);
+
+	result = kstrtobool(buf, &overdrive);
+	if (result)
+		return result;
+
+	asus->panel_overdrive = overdrive;
+	result = panel_od_write(asus);
+
+	if (result)
+		return result;
+
+	return count;
+}
+
+static DEVICE_ATTR_RW(panel_od);
+
 /* Quirks *********************************************************************/
 
 static void asus_wmi_set_xusb2pr(struct asus_wmi *asus)
@@ -1838,12 +2107,23 @@ static int throttle_thermal_policy_set_default(struct asus_wmi *asus)
 static int throttle_thermal_policy_switch_next(struct asus_wmi *asus)
 {
 	u8 new_mode = asus->throttle_thermal_policy_mode + 1;
+	int err;
 
 	if (new_mode > ASUS_THROTTLE_THERMAL_POLICY_SILENT)
 		new_mode = ASUS_THROTTLE_THERMAL_POLICY_DEFAULT;
 
 	asus->throttle_thermal_policy_mode = new_mode;
-	return throttle_thermal_policy_write(asus);
+	err = throttle_thermal_policy_write(asus);
+	if (err)
+		return err;
+
+	/*
+	 * Ensure that platform_profile updates userspace with the change to ensure
+	 * that platform_profile and throttle_thermal_policy_mode are in sync.
+	 */
+	platform_profile_notify();
+
+	return 0;
 }
 
 static ssize_t throttle_thermal_policy_show(struct device *dev,
@@ -1859,9 +2139,10 @@ static ssize_t throttle_thermal_policy_store(struct device *dev,
 				    struct device_attribute *attr,
 				    const char *buf, size_t count)
 {
-	int result;
-	u8 new_mode;
 	struct asus_wmi *asus = dev_get_drvdata(dev);
+	u8 new_mode;
+	int result;
+	int err;
 
 	result = kstrtou8(buf, 10, &new_mode);
 	if (result < 0)
@@ -1871,7 +2152,15 @@ static ssize_t throttle_thermal_policy_store(struct device *dev,
 		return -EINVAL;
 
 	asus->throttle_thermal_policy_mode = new_mode;
-	throttle_thermal_policy_write(asus);
+	err = throttle_thermal_policy_write(asus);
+	if (err)
+		return err;
+
+	/*
+	 * Ensure that platform_profile updates userspace with the change to ensure
+	 * that platform_profile and throttle_thermal_policy_mode are in sync.
+	 */
+	platform_profile_notify();
 
 	return count;
 }
@@ -1879,6 +2168,91 @@ static ssize_t throttle_thermal_policy_store(struct device *dev,
 // Throttle thermal policy: 0 - default, 1 - overboost, 2 - silent
 static DEVICE_ATTR_RW(throttle_thermal_policy);
 
+/* Platform profile ***********************************************************/
+static int platform_profile_get(struct platform_profile_handler *pprof,
+				enum platform_profile_option *profile)
+{
+	struct asus_wmi *asus;
+	int tp;
+
+	asus = container_of(pprof, struct asus_wmi, platform_profile_handler);
+
+	tp = asus->throttle_thermal_policy_mode;
+
+	switch (tp) {
+	case ASUS_THROTTLE_THERMAL_POLICY_DEFAULT:
+		*profile = PLATFORM_PROFILE_BALANCED;
+		break;
+	case ASUS_THROTTLE_THERMAL_POLICY_OVERBOOST:
+		*profile = PLATFORM_PROFILE_PERFORMANCE;
+		break;
+	case ASUS_THROTTLE_THERMAL_POLICY_SILENT:
+		*profile = PLATFORM_PROFILE_QUIET;
+		break;
+	default:
+		return -EINVAL;
+	}
+
+	return 0;
+}
+
+static int platform_profile_set(struct platform_profile_handler *pprof,
+				enum platform_profile_option profile)
+{
+	struct asus_wmi *asus;
+	int tp;
+
+	asus = container_of(pprof, struct asus_wmi, platform_profile_handler);
+
+	switch (profile) {
+	case PLATFORM_PROFILE_PERFORMANCE:
+		tp = ASUS_THROTTLE_THERMAL_POLICY_OVERBOOST;
+		break;
+	case PLATFORM_PROFILE_BALANCED:
+		tp = ASUS_THROTTLE_THERMAL_POLICY_DEFAULT;
+		break;
+	case PLATFORM_PROFILE_QUIET:
+		tp = ASUS_THROTTLE_THERMAL_POLICY_SILENT;
+		break;
+	default:
+		return -EOPNOTSUPP;
+	}
+
+	asus->throttle_thermal_policy_mode = tp;
+	return throttle_thermal_policy_write(asus);
+}
+
+static int platform_profile_setup(struct asus_wmi *asus)
+{
+	struct device *dev = &asus->platform_device->dev;
+	int err;
+
+	/*
+	 * Not an error if a component platform_profile relies on is unavailable
+	 * so early return, skipping the setup of platform_profile.
+	 */
+	if (!asus->throttle_thermal_policy_available)
+		return 0;
+
+	dev_info(dev, "Using throttle_thermal_policy for platform_profile support\n");
+
+	asus->platform_profile_handler.profile_get = platform_profile_get;
+	asus->platform_profile_handler.profile_set = platform_profile_set;
+
+	set_bit(PLATFORM_PROFILE_QUIET, asus->platform_profile_handler.choices);
+	set_bit(PLATFORM_PROFILE_BALANCED,
+		asus->platform_profile_handler.choices);
+	set_bit(PLATFORM_PROFILE_PERFORMANCE,
+		asus->platform_profile_handler.choices);
+
+	err = platform_profile_register(&asus->platform_profile_handler);
+	if (err)
+		return err;
+
+	asus->platform_profile_support = true;
+	return 0;
+}
+
 /* Backlight ******************************************************************/
 
 static int read_backlight_power(struct asus_wmi *asus)
@@ -2328,10 +2702,13 @@ static struct attribute *platform_attributes[] = {
 	&dev_attr_camera.attr,
 	&dev_attr_cardr.attr,
 	&dev_attr_touchpad.attr,
+	&dev_attr_egpu_enable.attr,
+	&dev_attr_dgpu_disable.attr,
 	&dev_attr_lid_resume.attr,
 	&dev_attr_als_enable.attr,
 	&dev_attr_fan_boost_mode.attr,
 	&dev_attr_throttle_thermal_policy.attr,
+	&dev_attr_panel_od.attr,
 	NULL
 };
 
@@ -2353,10 +2730,16 @@ static umode_t asus_sysfs_is_visible(struct kobject *kobj,
 		devid = ASUS_WMI_DEVID_LID_RESUME;
 	else if (attr == &dev_attr_als_enable.attr)
 		devid = ASUS_WMI_DEVID_ALS_ENABLE;
+	else if (attr == &dev_attr_egpu_enable.attr)
+		ok = asus->egpu_enable_available;
+	else if (attr == &dev_attr_dgpu_disable.attr)
+		ok = asus->dgpu_disable_available;
 	else if (attr == &dev_attr_fan_boost_mode.attr)
 		ok = asus->fan_boost_mode_available;
 	else if (attr == &dev_attr_throttle_thermal_policy.attr)
 		ok = asus->throttle_thermal_policy_available;
+	else if (attr == &dev_attr_panel_od.attr)
+		ok = asus->panel_overdrive_available;
 
 	if (devid != -1)
 		ok = !(asus_wmi_get_devstate_simple(asus, devid) < 0);
@@ -2612,6 +2995,14 @@ static int asus_wmi_add(struct platform_device *pdev)
 	if (err)
 		goto fail_platform;
 
+	err = egpu_enable_check_present(asus);
+	if (err)
+		goto fail_egpu_enable;
+
+	err = dgpu_disable_check_present(asus);
+	if (err)
+		goto fail_dgpu_disable;
+
 	err = fan_boost_mode_check_present(asus);
 	if (err)
 		goto fail_fan_boost_mode;
@@ -2622,6 +3013,14 @@ static int asus_wmi_add(struct platform_device *pdev)
 	else
 		throttle_thermal_policy_set_default(asus);
 
+	err = platform_profile_setup(asus);
+	if (err)
+		goto fail_platform_profile_setup;
+
+	err = panel_od_check_present(asus);
+	if (err)
+		goto fail_panel_od;
+
 	err = asus_wmi_sysfs_init(asus->platform_device);
 	if (err)
 		goto fail_sysfs;
@@ -2707,8 +3106,14 @@ fail_input:
 	asus_wmi_sysfs_exit(asus->platform_device);
 fail_sysfs:
 fail_throttle_thermal_policy:
+fail_platform_profile_setup:
+	if (asus->platform_profile_support)
+		platform_profile_remove();
 fail_fan_boost_mode:
+fail_egpu_enable:
+fail_dgpu_disable:
 fail_platform:
+fail_panel_od:
 	kfree(asus);
 	return err;
 }
@@ -2728,6 +3133,9 @@ static int asus_wmi_remove(struct platform_device *device)
 	asus_fan_set_auto(asus);
 	asus_wmi_battery_exit(asus);
 
+	if (asus->platform_profile_support)
+		platform_profile_remove();
+
 	kfree(asus);
 	return 0;
 }
diff --git a/drivers/platform/x86/dell/Kconfig b/drivers/platform/x86/dell/Kconfig
index 9e7314d90bea..821aba31821c 100644
--- a/drivers/platform/x86/dell/Kconfig
+++ b/drivers/platform/x86/dell/Kconfig
@@ -140,7 +140,7 @@ config DELL_SMBIOS_SMM
 config DELL_SMO8800
 	tristate "Dell Latitude freefall driver (ACPI SMO88XX)"
 	default m
-	depends on ACPI
+	depends on ACPI || COMPILE_TEST
 	help
 	  Say Y here if you want to support SMO88XX freefall devices
 	  on Dell Latitude laptops.
diff --git a/drivers/platform/x86/dell/dcdbas.c b/drivers/platform/x86/dell/dcdbas.c
index 28447c180be8..5e63d6225048 100644
--- a/drivers/platform/x86/dell/dcdbas.c
+++ b/drivers/platform/x86/dell/dcdbas.c
@@ -278,9 +278,9 @@ int dcdbas_smi_request(struct smi_cmd *smi_cmd)
 	}
 
 	/* SMI requires CPU 0 */
-	get_online_cpus();
+	cpus_read_lock();
 	ret = smp_call_on_cpu(0, raise_smi, smi_cmd, true);
-	put_online_cpus();
+	cpus_read_unlock();
 
 	return ret;
 }
diff --git a/drivers/platform/x86/dell/dell-smbios-smm.c b/drivers/platform/x86/dell/dell-smbios-smm.c
index 97c52a839a3e..320c032418ac 100644
--- a/drivers/platform/x86/dell/dell-smbios-smm.c
+++ b/drivers/platform/x86/dell/dell-smbios-smm.c
@@ -24,37 +24,6 @@ static struct calling_interface_buffer *buffer;
 static struct platform_device *platform_device;
 static DEFINE_MUTEX(smm_mutex);
 
-static const struct dmi_system_id dell_device_table[] __initconst = {
-	{
-		.ident = "Dell laptop",
-		.matches = {
-			DMI_MATCH(DMI_SYS_VENDOR, "Dell Inc."),
-			DMI_MATCH(DMI_CHASSIS_TYPE, "8"),
-		},
-	},
-	{
-		.matches = {
-			DMI_MATCH(DMI_SYS_VENDOR, "Dell Inc."),
-			DMI_MATCH(DMI_CHASSIS_TYPE, "9"), /*Laptop*/
-		},
-	},
-	{
-		.matches = {
-			DMI_MATCH(DMI_SYS_VENDOR, "Dell Inc."),
-			DMI_MATCH(DMI_CHASSIS_TYPE, "10"), /*Notebook*/
-		},
-	},
-	{
-		.ident = "Dell Computer Corporation",
-		.matches = {
-			DMI_MATCH(DMI_SYS_VENDOR, "Dell Computer Corporation"),
-			DMI_MATCH(DMI_CHASSIS_TYPE, "8"),
-		},
-	},
-	{ }
-};
-MODULE_DEVICE_TABLE(dmi, dell_device_table);
-
 static void parse_da_table(const struct dmi_header *dm)
 {
 	struct calling_interface_structure *table =
diff --git a/drivers/platform/x86/dell/dell-smbios-wmi.c b/drivers/platform/x86/dell/dell-smbios-wmi.c
index 33f823772733..931cc50136de 100644
--- a/drivers/platform/x86/dell/dell-smbios-wmi.c
+++ b/drivers/platform/x86/dell/dell-smbios-wmi.c
@@ -69,9 +69,10 @@ static int run_smbios_call(struct wmi_device *wdev)
 		if (obj->type == ACPI_TYPE_INTEGER)
 			dev_dbg(&wdev->dev, "SMBIOS call failed: %llu\n",
 				obj->integer.value);
+		kfree(output.pointer);
 		return -EIO;
 	}
-	memcpy(&priv->buf->std, obj->buffer.pointer, obj->buffer.length);
+	memcpy(input.pointer, obj->buffer.pointer, obj->buffer.length);
 	dev_dbg(&wdev->dev, "result: [%08x,%08x,%08x,%08x]\n",
 		priv->buf->std.output[0], priv->buf->std.output[1],
 		priv->buf->std.output[2], priv->buf->std.output[3]);
diff --git a/drivers/platform/x86/dell/dell-smo8800.c b/drivers/platform/x86/dell/dell-smo8800.c
index 5d9304a7de1b..3385e852104c 100644
--- a/drivers/platform/x86/dell/dell-smo8800.c
+++ b/drivers/platform/x86/dell/dell-smo8800.c
@@ -10,13 +10,14 @@
 
 #define DRIVER_NAME "smo8800"
 
-#include <linux/kernel.h>
-#include <linux/module.h>
-#include <linux/acpi.h>
+#include <linux/fs.h>
 #include <linux/interrupt.h>
+#include <linux/kernel.h>
 #include <linux/miscdevice.h>
+#include <linux/mod_devicetable.h>
+#include <linux/module.h>
+#include <linux/platform_device.h>
 #include <linux/uaccess.h>
-#include <linux/fs.h>
 
 struct smo8800_device {
 	u32 irq;                     /* acpi device irq */
@@ -44,37 +45,6 @@ static irqreturn_t smo8800_interrupt_thread(int irq, void *data)
 	return IRQ_HANDLED;
 }
 
-static acpi_status smo8800_get_resource(struct acpi_resource *resource,
-					void *context)
-{
-	struct acpi_resource_extended_irq *irq;
-
-	if (resource->type != ACPI_RESOURCE_TYPE_EXTENDED_IRQ)
-		return AE_OK;
-
-	irq = &resource->data.extended_irq;
-	if (!irq || !irq->interrupt_count)
-		return AE_OK;
-
-	*((u32 *)context) = irq->interrupts[0];
-	return AE_CTRL_TERMINATE;
-}
-
-static u32 smo8800_get_irq(struct acpi_device *device)
-{
-	u32 irq = 0;
-	acpi_status status;
-
-	status = acpi_walk_resources(device->handle, METHOD_NAME__CRS,
-				     smo8800_get_resource, &irq);
-	if (ACPI_FAILURE(status)) {
-		dev_err(&device->dev, "acpi_walk_resources failed\n");
-		return 0;
-	}
-
-	return irq;
-}
-
 static ssize_t smo8800_misc_read(struct file *file, char __user *buf,
 				 size_t count, loff_t *pos)
 {
@@ -136,7 +106,7 @@ static const struct file_operations smo8800_misc_fops = {
 	.release = smo8800_misc_release,
 };
 
-static int smo8800_add(struct acpi_device *device)
+static int smo8800_probe(struct platform_device *device)
 {
 	int err;
 	struct smo8800_device *smo8800;
@@ -160,14 +130,12 @@ static int smo8800_add(struct acpi_device *device)
 		return err;
 	}
 
-	device->driver_data = smo8800;
+	platform_set_drvdata(device, smo8800);
 
-	smo8800->irq = smo8800_get_irq(device);
-	if (!smo8800->irq) {
-		dev_err(&device->dev, "failed to obtain IRQ\n");
-		err = -EINVAL;
+	err = platform_get_irq(device, 0);
+	if (err < 0)
 		goto error;
-	}
+	smo8800->irq = err;
 
 	err = request_threaded_irq(smo8800->irq, smo8800_interrupt_quick,
 				   smo8800_interrupt_thread,
@@ -189,9 +157,9 @@ error:
 	return err;
 }
 
-static int smo8800_remove(struct acpi_device *device)
+static int smo8800_remove(struct platform_device *device)
 {
-	struct smo8800_device *smo8800 = device->driver_data;
+	struct smo8800_device *smo8800 = platform_get_drvdata(device);
 
 	free_irq(smo8800->irq, smo8800);
 	misc_deregister(&smo8800->miscdev);
@@ -211,21 +179,17 @@ static const struct acpi_device_id smo8800_ids[] = {
 	{ "SMO8831", 0 },
 	{ "", 0 },
 };
-
 MODULE_DEVICE_TABLE(acpi, smo8800_ids);
 
-static struct acpi_driver smo8800_driver = {
-	.name = DRIVER_NAME,
-	.class = "Latitude",
-	.ids = smo8800_ids,
-	.ops = {
-		.add = smo8800_add,
-		.remove = smo8800_remove,
+static struct platform_driver smo8800_driver = {
+	.probe = smo8800_probe,
+	.remove = smo8800_remove,
+	.driver = {
+		.name = DRIVER_NAME,
+		.acpi_match_table = smo8800_ids,
 	},
-	.owner = THIS_MODULE,
 };
-
-module_acpi_driver(smo8800_driver);
+module_platform_driver(smo8800_driver);
 
 MODULE_DESCRIPTION("Dell Latitude freefall driver (ACPI SMO88XX)");
 MODULE_LICENSE("GPL");
diff --git a/drivers/platform/x86/dual_accel_detect.h b/drivers/platform/x86/dual_accel_detect.h
index a9eae17cc43d..72e9624331c8 100644
--- a/drivers/platform/x86/dual_accel_detect.h
+++ b/drivers/platform/x86/dual_accel_detect.h
@@ -17,30 +17,6 @@
 #include <linux/acpi.h>
 #include <linux/i2c.h>
 
-static int dual_accel_i2c_resource_count(struct acpi_resource *ares, void *data)
-{
-	struct acpi_resource_i2c_serialbus *sb;
-	int *count = data;
-
-	if (i2c_acpi_get_i2c_resource(ares, &sb))
-		*count = *count + 1;
-
-	return 1;
-}
-
-static int dual_accel_i2c_client_count(struct acpi_device *adev)
-{
-	int ret, count = 0;
-	LIST_HEAD(r);
-
-	ret = acpi_dev_get_resources(adev, &r, dual_accel_i2c_resource_count, &count);
-	if (ret < 0)
-		return ret;
-
-	acpi_dev_free_resource_list(&r);
-	return count;
-}
-
 static bool dual_accel_detect_bosc0200(void)
 {
 	struct acpi_device *adev;
@@ -50,7 +26,7 @@ static bool dual_accel_detect_bosc0200(void)
 	if (!adev)
 		return false;
 
-	count = dual_accel_i2c_client_count(adev);
+	count = i2c_acpi_client_count(adev);
 
 	acpi_dev_put(adev);
 
diff --git a/drivers/platform/x86/hp_accel.c b/drivers/platform/x86/hp_accel.c
index 8c0867bda828..cc53f725c041 100644
--- a/drivers/platform/x86/hp_accel.c
+++ b/drivers/platform/x86/hp_accel.c
@@ -28,9 +28,6 @@
 #include <linux/serio.h>
 #include "../../misc/lis3lv02d/lis3lv02d.h"
 
-#define DRIVER_NAME     "hp_accel"
-#define ACPI_MDPS_CLASS "accelerometer"
-
 /* Delayed LEDs infrastructure ------------------------------------ */
 
 /* Special LED class that can defer work */
@@ -78,23 +75,14 @@ static const struct acpi_device_id lis3lv02d_device_ids[] = {
 };
 MODULE_DEVICE_TABLE(acpi, lis3lv02d_device_ids);
 
-
 /**
- * lis3lv02d_acpi_init - ACPI _INI method: initialize the device.
+ * lis3lv02d_acpi_init - initialize the device for ACPI
  * @lis3: pointer to the device struct
  *
  * Returns 0 on success.
  */
 static int lis3lv02d_acpi_init(struct lis3lv02d *lis3)
 {
-	struct acpi_device *dev = lis3->bus_priv;
-	if (!lis3->init_required)
-		return 0;
-
-	if (acpi_evaluate_object(dev->handle, METHOD_NAME__INI,
-				 NULL, NULL) != AE_OK)
-		return -EINVAL;
-
 	return 0;
 }
 
@@ -278,30 +266,6 @@ static struct delayed_led_classdev hpled_led = {
 	.set_brightness = hpled_set,
 };
 
-static acpi_status
-lis3lv02d_get_resource(struct acpi_resource *resource, void *context)
-{
-	if (resource->type == ACPI_RESOURCE_TYPE_EXTENDED_IRQ) {
-		struct acpi_resource_extended_irq *irq;
-		u32 *device_irq = context;
-
-		irq = &resource->data.extended_irq;
-		*device_irq = irq->interrupts[0];
-	}
-
-	return AE_OK;
-}
-
-static void lis3lv02d_enum_resources(struct acpi_device *device)
-{
-	acpi_status status;
-
-	status = acpi_walk_resources(device->handle, METHOD_NAME__CRS,
-					lis3lv02d_get_resource, &lis3_dev.irq);
-	if (ACPI_FAILURE(status))
-		printk(KERN_DEBUG DRIVER_NAME ": Error getting resources\n");
-}
-
 static bool hp_accel_i8042_filter(unsigned char data, unsigned char str,
 				  struct serio *port)
 {
@@ -331,23 +295,19 @@ static bool hp_accel_i8042_filter(unsigned char data, unsigned char str,
 	return false;
 }
 
-static int lis3lv02d_add(struct acpi_device *device)
+static int lis3lv02d_probe(struct platform_device *device)
 {
 	int ret;
 
-	if (!device)
-		return -EINVAL;
-
-	lis3_dev.bus_priv = device;
+	lis3_dev.bus_priv = ACPI_COMPANION(&device->dev);
 	lis3_dev.init = lis3lv02d_acpi_init;
 	lis3_dev.read = lis3lv02d_acpi_read;
 	lis3_dev.write = lis3lv02d_acpi_write;
-	strcpy(acpi_device_name(device), DRIVER_NAME);
-	strcpy(acpi_device_class(device), ACPI_MDPS_CLASS);
-	device->driver_data = &lis3_dev;
 
 	/* obtain IRQ number of our device from ACPI */
-	lis3lv02d_enum_resources(device);
+	ret = platform_get_irq_optional(device, 0);
+	if (ret > 0)
+		lis3_dev.irq = ret;
 
 	/* If possible use a "standard" axes order */
 	if (lis3_dev.ac.x && lis3_dev.ac.y && lis3_dev.ac.z) {
@@ -359,7 +319,6 @@ static int lis3lv02d_add(struct acpi_device *device)
 	}
 
 	/* call the core layer do its init */
-	lis3_dev.init_required = true;
 	ret = lis3lv02d_init_device(&lis3_dev);
 	if (ret)
 		return ret;
@@ -381,11 +340,8 @@ static int lis3lv02d_add(struct acpi_device *device)
 	return ret;
 }
 
-static int lis3lv02d_remove(struct acpi_device *device)
+static int lis3lv02d_remove(struct platform_device *device)
 {
-	if (!device)
-		return -EINVAL;
-
 	i8042_remove_filter(hp_accel_i8042_filter);
 	lis3lv02d_joystick_disable(&lis3_dev);
 	lis3lv02d_poweroff(&lis3_dev);
@@ -396,7 +352,6 @@ static int lis3lv02d_remove(struct acpi_device *device)
 	return lis3lv02d_remove_fs(&lis3_dev);
 }
 
-
 #ifdef CONFIG_PM_SLEEP
 static int lis3lv02d_suspend(struct device *dev)
 {
@@ -407,14 +362,12 @@ static int lis3lv02d_suspend(struct device *dev)
 
 static int lis3lv02d_resume(struct device *dev)
 {
-	lis3_dev.init_required = false;
 	lis3lv02d_poweron(&lis3_dev);
 	return 0;
 }
 
 static int lis3lv02d_restore(struct device *dev)
 {
-	lis3_dev.init_required = true;
 	lis3lv02d_poweron(&lis3_dev);
 	return 0;
 }
@@ -434,17 +387,16 @@ static const struct dev_pm_ops hp_accel_pm = {
 #endif
 
 /* For the HP MDPS aka 3D Driveguard */
-static struct acpi_driver lis3lv02d_driver = {
-	.name  = DRIVER_NAME,
-	.class = ACPI_MDPS_CLASS,
-	.ids   = lis3lv02d_device_ids,
-	.ops = {
-		.add     = lis3lv02d_add,
-		.remove  = lis3lv02d_remove,
+static struct platform_driver lis3lv02d_driver = {
+	.probe	= lis3lv02d_probe,
+	.remove	= lis3lv02d_remove,
+	.driver	= {
+		.name	= "hp_accel",
+		.pm	= HP_ACCEL_PM,
+		.acpi_match_table = lis3lv02d_device_ids,
 	},
-	.drv.pm = HP_ACCEL_PM,
 };
-module_acpi_driver(lis3lv02d_driver);
+module_platform_driver(lis3lv02d_driver);
 
 MODULE_DESCRIPTION("Glue between LIS3LV02Dx and HP ACPI BIOS and support for disk protection LED.");
 MODULE_AUTHOR("Yan Burman, Eric Piel, Pavel Machek");
diff --git a/drivers/platform/x86/i2c-multi-instantiate.c b/drivers/platform/x86/i2c-multi-instantiate.c
index 2cce82579d09..a50153ecd560 100644
--- a/drivers/platform/x86/i2c-multi-instantiate.c
+++ b/drivers/platform/x86/i2c-multi-instantiate.c
@@ -32,31 +32,6 @@ struct i2c_multi_inst_data {
 	struct i2c_client *clients[];
 };
 
-static int i2c_multi_inst_count(struct acpi_resource *ares, void *data)
-{
-	struct acpi_resource_i2c_serialbus *sb;
-	int *count = data;
-
-	if (i2c_acpi_get_i2c_resource(ares, &sb))
-		*count = *count + 1;
-
-	return 1;
-}
-
-static int i2c_multi_inst_count_resources(struct acpi_device *adev)
-{
-	LIST_HEAD(r);
-	int count = 0;
-	int ret;
-
-	ret = acpi_dev_get_resources(adev, &r, i2c_multi_inst_count, &count);
-	if (ret < 0)
-		return ret;
-
-	acpi_dev_free_resource_list(&r);
-	return count;
-}
-
 static int i2c_multi_inst_probe(struct platform_device *pdev)
 {
 	struct i2c_multi_inst_data *multi;
@@ -76,7 +51,7 @@ static int i2c_multi_inst_probe(struct platform_device *pdev)
 	adev = ACPI_COMPANION(dev);
 
 	/* Count number of clients to instantiate */
-	ret = i2c_multi_inst_count_resources(adev);
+	ret = i2c_acpi_client_count(adev);
 	if (ret < 0)
 		return ret;
 
diff --git a/drivers/platform/x86/ideapad-laptop.c b/drivers/platform/x86/ideapad-laptop.c
index 784326bd72f0..e7a1299e3776 100644
--- a/drivers/platform/x86/ideapad-laptop.c
+++ b/drivers/platform/x86/ideapad-laptop.c
@@ -41,6 +41,7 @@
 static const char *const ideapad_wmi_fnesc_events[] = {
 	"26CAB2E5-5CF1-46AE-AAC3-4A12B6BA50E6", /* Yoga 3 */
 	"56322276-8493-4CE8-A783-98C991274F5E", /* Yoga 700 */
+	"8FC0DE0C-B4E4-43FD-B0F3-8871711C1294", /* Legion 5 */
 };
 #endif
 
@@ -1459,11 +1460,19 @@ static void ideapad_acpi_notify(acpi_handle handle, u32 event, void *data)
 static void ideapad_wmi_notify(u32 value, void *context)
 {
 	struct ideapad_private *priv = context;
+	unsigned long result;
 
 	switch (value) {
 	case 128:
 		ideapad_input_report(priv, value);
 		break;
+	case 208:
+		if (!eval_hals(priv->adev->handle, &result)) {
+			bool state = test_bit(HALS_FNLOCK_STATE_BIT, &result);
+
+			exec_sals(priv->adev->handle, state ? SALS_FNLOCK_ON : SALS_FNLOCK_OFF);
+		}
+		break;
 	default:
 		dev_info(&priv->platform_device->dev,
 			 "Unknown WMI event: %u\n", value);
diff --git a/drivers/platform/x86/intel/Kconfig b/drivers/platform/x86/intel/Kconfig
index f2eef337eb98..0b21468e1bd0 100644
--- a/drivers/platform/x86/intel/Kconfig
+++ b/drivers/platform/x86/intel/Kconfig
@@ -16,7 +16,156 @@ menuconfig X86_PLATFORM_DRIVERS_INTEL
 
 if X86_PLATFORM_DRIVERS_INTEL
 
+source "drivers/platform/x86/intel/atomisp2/Kconfig"
+source "drivers/platform/x86/intel/int1092/Kconfig"
 source "drivers/platform/x86/intel/int33fe/Kconfig"
 source "drivers/platform/x86/intel/int3472/Kconfig"
+source "drivers/platform/x86/intel/pmc/Kconfig"
+source "drivers/platform/x86/intel/pmt/Kconfig"
+source "drivers/platform/x86/intel/speed_select_if/Kconfig"
+source "drivers/platform/x86/intel/telemetry/Kconfig"
+source "drivers/platform/x86/intel/wmi/Kconfig"
+
+config INTEL_HID_EVENT
+	tristate "Intel HID Event"
+	depends on ACPI
+	depends on INPUT
+	depends on I2C
+	select INPUT_SPARSEKMAP
+	help
+	  This driver provides support for the Intel HID Event hotkey interface.
+	  Some laptops require this driver for hotkey support.
+
+	  To compile this driver as a module, choose M here: the module will
+	  be called intel_hid.
+
+config INTEL_VBTN
+	tristate "Intel Virtual Button"
+	depends on ACPI
+	depends on INPUT
+	depends on I2C
+	select INPUT_SPARSEKMAP
+	help
+	  This driver provides support for the Intel Virtual Button interface.
+	  Some laptops require this driver for power button support.
+
+	  To compile this driver as a module, choose M here: the module will
+	  be called intel_vbtn.
+
+config INTEL_INT0002_VGPIO
+	tristate "Intel ACPI INT0002 Virtual GPIO driver"
+	depends on GPIOLIB && ACPI && PM_SLEEP
+	select GPIOLIB_IRQCHIP
+	help
+	  Some peripherals on Bay Trail and Cherry Trail platforms signal a
+	  Power Management Event (PME) to the Power Management Controller (PMC)
+	  to wakeup the system. When this happens software needs to explicitly
+	  clear the PME bus 0 status bit in the GPE0a_STS register to avoid an
+	  IRQ storm on IRQ 9.
+
+	  This is modelled in ACPI through the INT0002 ACPI device, which is
+	  called a "Virtual GPIO controller" in ACPI because it defines the
+	  event handler to call when the PME triggers through _AEI and _L02
+	  methods as would be done for a real GPIO interrupt in ACPI.
+
+	  To compile this driver as a module, choose M here: the module will
+	  be called intel_int0002_vgpio.
+
+config INTEL_OAKTRAIL
+	tristate "Intel Oaktrail Platform Extras"
+	depends on ACPI
+	depends on ACPI_VIDEO || ACPI_VIDEO=n
+	depends on RFKILL && BACKLIGHT_CLASS_DEVICE && ACPI
+	help
+	  Intel Oaktrail platform need this driver to provide interfaces to
+	  enable/disable the Camera, WiFi, BT etc. devices. If in doubt, say Y
+	  here; it will only load on supported platforms.
+
+config INTEL_BXTWC_PMIC_TMU
+	tristate "Intel Broxton Whiskey Cove TMU Driver"
+	depends on INTEL_SOC_PMIC_BXTWC
+	depends on MFD_INTEL_PMC_BXT
+	select REGMAP
+	help
+	  Select this driver to use Intel Broxton Whiskey Cove PMIC TMU feature.
+	  This driver enables the alarm wakeup functionality in the TMU unit of
+	  Whiskey Cove PMIC.
+
+config INTEL_CHTDC_TI_PWRBTN
+	tristate "Intel Cherry Trail Dollar Cove TI power button driver"
+	depends on INTEL_SOC_PMIC_CHTDC_TI
+	depends on INPUT
+	help
+	  This option adds a power button driver for Dollar Cove TI
+	  PMIC on Intel Cherry Trail devices.
+
+	  To compile this driver as a module, choose M here: the module
+	  will be called intel_chtdc_ti_pwrbtn.
+
+config INTEL_MRFLD_PWRBTN
+	tristate "Intel Merrifield Basin Cove power button driver"
+	depends on INTEL_SOC_PMIC_MRFLD
+	depends on INPUT
+	help
+	  This option adds a power button driver for Basin Cove PMIC
+	  on Intel Merrifield devices.
+
+	  To compile this driver as a module, choose M here: the module
+	  will be called intel_mrfld_pwrbtn.
+
+config INTEL_PUNIT_IPC
+	tristate "Intel P-Unit IPC Driver"
+	help
+	  This driver provides support for Intel P-Unit Mailbox IPC mechanism,
+	  which is used to bridge the communications between kernel and P-Unit.
+
+config INTEL_RST
+	tristate "Intel Rapid Start Technology Driver"
+	depends on ACPI
+	help
+	  This driver provides support for modifying parameters on systems
+	  equipped with Intel's Rapid Start Technology. When put in an ACPI
+	  sleep state, these devices will wake after either a configured
+	  timeout or when the system battery reaches a critical state,
+	  automatically copying memory contents to disk. On resume, the
+	  firmware will copy the memory contents back to RAM and resume the OS
+	  as usual.
+
+config INTEL_SMARTCONNECT
+	tristate "Intel Smart Connect disabling driver"
+	depends on ACPI
+	help
+	  Intel Smart Connect is a technology intended to permit devices to
+	  update state by resuming for a short period of time at regular
+	  intervals. If a user enables this functionality under Windows and
+	  then reboots into Linux, the system may remain configured to resume
+	  on suspend. In the absence of any userspace to support it, the system
+	  will then remain awake until something triggers another suspend.
+
+	  This driver checks to determine whether the device has Intel Smart
+	  Connect enabled, and if so disables it.
+
+config INTEL_TURBO_MAX_3
+	bool "Intel Turbo Boost Max Technology 3.0 enumeration driver"
+	depends on X86_64 && SCHED_MC_PRIO
+	help
+	  This driver reads maximum performance ratio of each CPU and set up
+	  the scheduler priority metrics. In this way scheduler can prefer
+	  CPU with higher performance to schedule tasks.
+
+	  This driver is only required when the system is not using Hardware
+	  P-States (HWP). In HWP mode, priority can be read from ACPI tables.
+
+config INTEL_UNCORE_FREQ_CONTROL
+	tristate "Intel Uncore frequency control driver"
+	depends on X86_64
+	help
+	  This driver allows control of Uncore frequency limits on
+	  supported server platforms.
+
+	  Uncore frequency controls RING/LLC (last-level cache) clocks.
+
+	  To compile this driver as a module, choose M here: the module
+	  will be called intel-uncore-frequency.
 
 endif # X86_PLATFORM_DRIVERS_INTEL
diff --git a/drivers/platform/x86/intel/Makefile b/drivers/platform/x86/intel/Makefile
index 0653055942d5..8b3a3f7bab49 100644
--- a/drivers/platform/x86/intel/Makefile
+++ b/drivers/platform/x86/intel/Makefile
@@ -4,5 +4,44 @@
 # Intel x86 Platform-Specific Drivers
 #
 
+obj-$(CONFIG_INTEL_ATOMISP2_PDX86)	+= atomisp2/
+obj-$(CONFIG_INTEL_SAR_INT1092)		+= int1092/
 obj-$(CONFIG_INTEL_CHT_INT33FE)		+= int33fe/
 obj-$(CONFIG_INTEL_SKL_INT3472)		+= int3472/
+obj-$(CONFIG_INTEL_PMC_CORE)		+= pmc/
+obj-$(CONFIG_INTEL_PMT_CLASS)		+= pmt/
+obj-$(CONFIG_INTEL_SPEED_SELECT_INTERFACE) += speed_select_if/
+obj-$(CONFIG_INTEL_TELEMETRY)		+= telemetry/
+obj-$(CONFIG_INTEL_WMI)			+= wmi/
+
+# Intel input drivers
+intel-hid-y				:= hid.o
+obj-$(CONFIG_INTEL_HID_EVENT)		+= intel-hid.o
+intel-vbtn-y				:= vbtn.o
+obj-$(CONFIG_INTEL_VBTN)		+= intel-vbtn.o
+
+# Intel miscellaneous drivers
+intel_int0002_vgpio-y			:= int0002_vgpio.o
+obj-$(CONFIG_INTEL_INT0002_VGPIO)	+= intel_int0002_vgpio.o
+intel_oaktrail-y			:= oaktrail.o
+obj-$(CONFIG_INTEL_OAKTRAIL)		+= intel_oaktrail.o
+
+# Intel PMIC / PMC / P-Unit drivers
+intel_bxtwc_tmu-y			:= bxtwc_tmu.o
+obj-$(CONFIG_INTEL_BXTWC_PMIC_TMU)	+= intel_bxtwc_tmu.o
+intel_chtdc_ti_pwrbtn-y			:= chtdc_ti_pwrbtn.o
+obj-$(CONFIG_INTEL_CHTDC_TI_PWRBTN)	+= intel_chtdc_ti_pwrbtn.o
+intel_mrfld_pwrbtn-y			:= mrfld_pwrbtn.o
+obj-$(CONFIG_INTEL_MRFLD_PWRBTN)	+= intel_mrfld_pwrbtn.o
+intel_punit_ipc-y			:= punit_ipc.o
+obj-$(CONFIG_INTEL_PUNIT_IPC)		+= intel_punit_ipc.o
+
+# Intel Uncore drivers
+intel-rst-y				:= rst.o
+obj-$(CONFIG_INTEL_RST)			+= intel-rst.o
+intel-smartconnect-y			:= smartconnect.o
+obj-$(CONFIG_INTEL_SMARTCONNECT)	+= intel-smartconnect.o
+intel_turbo_max_3-y			:= turbo_max_3.o
+obj-$(CONFIG_INTEL_TURBO_MAX_3)		+= intel_turbo_max_3.o
+intel-uncore-frequency-y		:= uncore-frequency.o
+obj-$(CONFIG_INTEL_UNCORE_FREQ_CONTROL)	+= intel-uncore-frequency.o
diff --git a/drivers/platform/x86/intel/atomisp2/Kconfig b/drivers/platform/x86/intel/atomisp2/Kconfig
new file mode 100644
index 000000000000..35dd2be9d2a1
--- /dev/null
+++ b/drivers/platform/x86/intel/atomisp2/Kconfig
@@ -0,0 +1,43 @@
+# SPDX-License-Identifier: GPL-2.0-only
+#
+# Intel x86 Platform Specific Drivers
+#
+
+config INTEL_ATOMISP2_PDX86
+	bool
+
+config INTEL_ATOMISP2_LED
+	tristate "Intel AtomISP v2 camera LED driver"
+	depends on GPIOLIB && LEDS_GPIO
+	select INTEL_ATOMISP2_PDX86
+	help
+	  Many Bay Trail and Cherry Trail devices come with a camera attached
+	  to Intel's Image Signal Processor. Linux currently does not have a
+	  driver for these, so they do not work as a camera. Some of these
+	  camera's have a LED which is controlled through a GPIO.
+
+	  Some of these devices have a firmware issue where the LED gets turned
+	  on at boot. This driver will turn the LED off at boot and also allows
+	  controlling the LED (repurposing it) through the sysfs LED interface.
+
+	  Which GPIO is attached to the LED is usually not described in the
+	  ACPI tables, so this driver contains per-system info about the GPIO
+	  inside the driver, this means that this driver only works on systems
+	  the driver knows about.
+
+	  To compile this driver as a module, choose M here: the module
+	  will be called intel_atomisp2_led.
+
+config INTEL_ATOMISP2_PM
+	tristate "Intel AtomISP v2 dummy / power-management driver"
+	depends on PCI && IOSF_MBI && PM
+	depends on !INTEL_ATOMISP
+	select INTEL_ATOMISP2_PDX86
+	help
+	  Power-management driver for Intel's Image Signal Processor found on
+	  Bay Trail and Cherry Trail devices. This dummy driver's sole purpose
+	  is to turn the ISP off (put it in D3) to save power and to allow
+	  entering of S0ix modes.
+
+	  To compile this driver as a module, choose M here: the module
+	  will be called intel_atomisp2_pm.
diff --git a/drivers/platform/x86/intel/atomisp2/Makefile b/drivers/platform/x86/intel/atomisp2/Makefile
new file mode 100644
index 000000000000..96b1e877d1f1
--- /dev/null
+++ b/drivers/platform/x86/intel/atomisp2/Makefile
@@ -0,0 +1,9 @@
+# SPDX-License-Identifier: GPL-2.0-only
+#
+# Intel x86 Platform Specific Drivers
+#
+
+intel_atomisp2_led-y			:= led.o
+obj-$(CONFIG_INTEL_ATOMISP2_LED)	+= intel_atomisp2_led.o
+intel_atomisp2_pm-y			+= pm.o
+obj-$(CONFIG_INTEL_ATOMISP2_PM)		+= intel_atomisp2_pm.o
diff --git a/drivers/platform/x86/intel_atomisp2_led.c b/drivers/platform/x86/intel/atomisp2/led.c
index 5935dfca166f..5935dfca166f 100644
--- a/drivers/platform/x86/intel_atomisp2_led.c
+++ b/drivers/platform/x86/intel/atomisp2/led.c
diff --git a/drivers/platform/x86/intel_atomisp2_pm.c b/drivers/platform/x86/intel/atomisp2/pm.c
index 805fc0d8515c..805fc0d8515c 100644
--- a/drivers/platform/x86/intel_atomisp2_pm.c
+++ b/drivers/platform/x86/intel/atomisp2/pm.c
diff --git a/drivers/platform/x86/intel_bxtwc_tmu.c b/drivers/platform/x86/intel/bxtwc_tmu.c
index 7ccf583649e6..7ccf583649e6 100644
--- a/drivers/platform/x86/intel_bxtwc_tmu.c
+++ b/drivers/platform/x86/intel/bxtwc_tmu.c
diff --git a/drivers/platform/x86/intel_chtdc_ti_pwrbtn.c b/drivers/platform/x86/intel/chtdc_ti_pwrbtn.c
index 9606a994af22..9606a994af22 100644
--- a/drivers/platform/x86/intel_chtdc_ti_pwrbtn.c
+++ b/drivers/platform/x86/intel/chtdc_ti_pwrbtn.c
diff --git a/drivers/platform/x86/intel-hid.c b/drivers/platform/x86/intel/hid.c
index 2e4e97a626a5..a33a5826e81a 100644
--- a/drivers/platform/x86/intel-hid.c
+++ b/drivers/platform/x86/intel/hid.c
@@ -14,7 +14,7 @@
 #include <linux/module.h>
 #include <linux/platform_device.h>
 #include <linux/suspend.h>
-#include "dual_accel_detect.h"
+#include "../dual_accel_detect.h"
 
 /* When NOT in tablet mode, VGBS returns with the flag 0x40 */
 #define TABLET_MODE_FLAG BIT(6)
diff --git a/drivers/platform/x86/intel_int0002_vgpio.c b/drivers/platform/x86/intel/int0002_vgpio.c
index 569342aa8926..569342aa8926 100644
--- a/drivers/platform/x86/intel_int0002_vgpio.c
+++ b/drivers/platform/x86/intel/int0002_vgpio.c
diff --git a/drivers/platform/x86/intel/int1092/Kconfig b/drivers/platform/x86/intel/int1092/Kconfig
new file mode 100644
index 000000000000..2e9a177241aa
--- /dev/null
+++ b/drivers/platform/x86/intel/int1092/Kconfig
@@ -0,0 +1,14 @@
+config INTEL_SAR_INT1092
+	tristate "Intel Specific Absorption Rate Driver"
+	depends on ACPI
+	help
+	  This driver helps to limit the exposure of human body to RF frequency by
+	  providing information to userspace application that will inform the Intel
+	  M.2 modem to regulate the RF power based on SAR data obtained from the
+	  sensors captured in the BIOS. ACPI interface exposes this data from the BIOS
+	  to SAR driver. The front end application in userspace will interact with SAR
+	  driver to obtain information like the device mode, Antenna index, baseband index,
+	  SAR table index and use available communication like MBIM interface to enable
+	  data communication to modem for RF power regulation. Enable this config when
+	  given platform needs to support "Dynamic SAR" configuration for a modem available
+	  on the platform.
diff --git a/drivers/platform/x86/intel/int1092/Makefile b/drivers/platform/x86/intel/int1092/Makefile
new file mode 100644
index 000000000000..4ab94e541de3
--- /dev/null
+++ b/drivers/platform/x86/intel/int1092/Makefile
@@ -0,0 +1 @@
+obj-$(CONFIG_INTEL_SAR_INT1092)		+= intel_sar.o
diff --git a/drivers/platform/x86/intel/int1092/intel_sar.c b/drivers/platform/x86/intel/int1092/intel_sar.c
new file mode 100644
index 000000000000..379560fe5df9
--- /dev/null
+++ b/drivers/platform/x86/intel/int1092/intel_sar.c
@@ -0,0 +1,316 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * Copyright (c) 2021, Intel Corporation.
+ */
+
+#include <linux/acpi.h>
+#include <linux/kobject.h>
+#include <linux/platform_device.h>
+#include <linux/sysfs.h>
+#include "intel_sar.h"
+
+/**
+ * get_int_value: Retrieve integer values from ACPI Object
+ * @obj: acpi_object pointer which has the integer value
+ * @out: output pointer will get integer value
+ *
+ * Function is used to retrieve integer value from acpi object.
+ *
+ * Return:
+ * * 0 on success
+ * * -EIO if there is an issue in acpi_object passed.
+ */
+static int get_int_value(union acpi_object *obj, int *out)
+{
+	if (!obj || obj->type != ACPI_TYPE_INTEGER)
+		return -EIO;
+	*out = (int)obj->integer.value;
+	return 0;
+}
+
+/**
+ * update_sar_data: sar data is updated based on regulatory mode
+ * @context: pointer to driver context structure
+ *
+ * sar_data is updated based on regulatory value
+ * context->reg_value will never exceed MAX_REGULATORY
+ */
+static void update_sar_data(struct wwan_sar_context *context)
+{
+	struct wwan_device_mode_configuration *config =
+		&context->config_data[context->reg_value];
+
+	if (config->device_mode_info &&
+	    context->sar_data.device_mode < config->total_dev_mode) {
+		struct wwan_device_mode_info *dev_mode =
+			&config->device_mode_info[context->sar_data.device_mode];
+
+		context->sar_data.antennatable_index = dev_mode->antennatable_index;
+		context->sar_data.bandtable_index = dev_mode->bandtable_index;
+		context->sar_data.sartable_index = dev_mode->sartable_index;
+	}
+}
+
+/**
+ * parse_package: parse acpi package for retrieving SAR information
+ * @context: pointer to driver context structure
+ * @item : acpi_object pointer
+ *
+ * Given acpi_object is iterated to retrieve information for each device mode.
+ * If a given package corresponding to a specific device mode is faulty, it is
+ * skipped and the specific entry in context structure will have the default value
+ * of zero. Decoding of subsequent device modes is realized by having "continue"
+ * statements in the for loop on encountering error in parsing given device mode.
+ *
+ * Return:
+ * AE_OK if success
+ * AE_ERROR on error
+ */
+static acpi_status parse_package(struct wwan_sar_context *context, union acpi_object *item)
+{
+	struct wwan_device_mode_configuration *data;
+	int value, itr, reg;
+	union acpi_object *num;
+
+	num = &item->package.elements[0];
+	if (get_int_value(num, &value) || value < 0 || value >= MAX_REGULATORY)
+		return AE_ERROR;
+
+	reg = value;
+
+	data = &context->config_data[reg];
+	if (data->total_dev_mode > MAX_DEV_MODES ||	data->total_dev_mode == 0 ||
+	    item->package.count <= data->total_dev_mode)
+		return AE_ERROR;
+
+	data->device_mode_info = kmalloc_array(data->total_dev_mode,
+					       sizeof(struct wwan_device_mode_info), GFP_KERNEL);
+	if (!data->device_mode_info)
+		return AE_ERROR;
+
+	for (itr = 0; itr < data->total_dev_mode; itr++) {
+		struct wwan_device_mode_info temp = { 0 };
+
+		num = &item->package.elements[itr + 1];
+		if (num->type != ACPI_TYPE_PACKAGE || num->package.count < TOTAL_DATA)
+			continue;
+		if (get_int_value(&num->package.elements[0], &temp.device_mode))
+			continue;
+		if (get_int_value(&num->package.elements[1], &temp.bandtable_index))
+			continue;
+		if (get_int_value(&num->package.elements[2], &temp.antennatable_index))
+			continue;
+		if (get_int_value(&num->package.elements[3], &temp.sartable_index))
+			continue;
+		data->device_mode_info[itr] = temp;
+	}
+	return AE_OK;
+}
+
+/**
+ * sar_get_device_mode: Extraction of information from BIOS via DSM calls
+ * @device: ACPI device for which to retrieve the data
+ *
+ * Retrieve the current device mode information from the BIOS.
+ *
+ * Return:
+ * AE_OK on success
+ * AE_ERROR on error
+ */
+static acpi_status sar_get_device_mode(struct platform_device *device)
+{
+	struct wwan_sar_context *context = dev_get_drvdata(&device->dev);
+	acpi_status status = AE_OK;
+	union acpi_object *out;
+	u32 rev = 0;
+	int value;
+
+	out = acpi_evaluate_dsm(context->handle, &context->guid, rev,
+				COMMAND_ID_DEV_MODE, NULL);
+	if (get_int_value(out, &value)) {
+		dev_err(&device->dev, "DSM cmd:%d Failed to retrieve value\n", COMMAND_ID_DEV_MODE);
+		status = AE_ERROR;
+		goto dev_mode_error;
+	}
+	context->sar_data.device_mode = value;
+	update_sar_data(context);
+	sysfs_notify(&device->dev.kobj, NULL, SYSFS_DATANAME);
+
+dev_mode_error:
+	ACPI_FREE(out);
+	return status;
+}
+
+static const struct acpi_device_id sar_device_ids[] = {
+	{ "INTC1092", 0},
+	{}
+};
+MODULE_DEVICE_TABLE(acpi, sar_device_ids);
+
+static ssize_t intc_data_show(struct device *dev, struct device_attribute *attr, char *buf)
+{
+	struct wwan_sar_context *context = dev_get_drvdata(dev);
+
+	return sysfs_emit(buf, "%d %d %d %d\n", context->sar_data.device_mode,
+		      context->sar_data.bandtable_index,
+		      context->sar_data.antennatable_index,
+		      context->sar_data.sartable_index);
+}
+static DEVICE_ATTR_RO(intc_data);
+
+static ssize_t intc_reg_show(struct device *dev, struct device_attribute *attr, char *buf)
+{
+	struct wwan_sar_context *context = dev_get_drvdata(dev);
+
+	return sysfs_emit(buf, "%d\n", context->reg_value);
+}
+
+static ssize_t intc_reg_store(struct device *dev, struct device_attribute *attr,
+			      const char *buf, size_t count)
+{
+	struct wwan_sar_context *context = dev_get_drvdata(dev);
+	unsigned int value;
+	int read;
+
+	if (!count)
+		return -EINVAL;
+	read = kstrtouint(buf, 10, &value);
+	if (read < 0)
+		return read;
+	if (value >= MAX_REGULATORY)
+		return -EOVERFLOW;
+	context->reg_value = value;
+	update_sar_data(context);
+	sysfs_notify(&dev->kobj, NULL, SYSFS_DATANAME);
+	return count;
+}
+static DEVICE_ATTR_RW(intc_reg);
+
+static struct attribute *intcsar_attrs[] = {
+	&dev_attr_intc_data.attr,
+	&dev_attr_intc_reg.attr,
+	NULL
+};
+
+static struct attribute_group intcsar_group = {
+	.attrs = intcsar_attrs,
+};
+
+static void sar_notify(acpi_handle handle, u32 event, void *data)
+{
+	struct platform_device *device = data;
+
+	if (event == SAR_EVENT) {
+		if (sar_get_device_mode(device) != AE_OK)
+			dev_err(&device->dev, "sar_get_device_mode error");
+	}
+}
+
+static void sar_get_data(int reg, struct wwan_sar_context *context)
+{
+	union acpi_object *out, req;
+	u32 rev = 0;
+
+	req.type = ACPI_TYPE_INTEGER;
+	req.integer.value = reg;
+	out = acpi_evaluate_dsm(context->handle, &context->guid, rev,
+				COMMAND_ID_CONFIG_TABLE, &req);
+	if (!out)
+		return;
+	if (out->type == ACPI_TYPE_PACKAGE && out->package.count >= 3 &&
+	    out->package.elements[0].type == ACPI_TYPE_INTEGER &&
+	    out->package.elements[1].type == ACPI_TYPE_INTEGER &&
+	    out->package.elements[2].type == ACPI_TYPE_PACKAGE &&
+	    out->package.elements[2].package.count > 0) {
+		context->config_data[reg].version = out->package.elements[0].integer.value;
+		context->config_data[reg].total_dev_mode =
+			out->package.elements[1].integer.value;
+		if (context->config_data[reg].total_dev_mode <= 0 ||
+		    context->config_data[reg].total_dev_mode > MAX_DEV_MODES) {
+			ACPI_FREE(out);
+			return;
+		}
+		parse_package(context, &out->package.elements[2]);
+	}
+	ACPI_FREE(out);
+}
+
+static int sar_probe(struct platform_device *device)
+{
+	struct wwan_sar_context *context;
+	int reg;
+	int result;
+
+	context = kzalloc(sizeof(*context), GFP_KERNEL);
+	if (!context)
+		return -ENOMEM;
+
+	context->sar_device = device;
+	context->handle = ACPI_HANDLE(&device->dev);
+	dev_set_drvdata(&device->dev, context);
+
+	result = guid_parse(SAR_DSM_UUID, &context->guid);
+	if (result) {
+		dev_err(&device->dev, "SAR UUID parse error: %d\n", result);
+		goto r_free;
+	}
+
+	for (reg = 0; reg < MAX_REGULATORY; reg++)
+		sar_get_data(reg, context);
+
+	if (sar_get_device_mode(device) != AE_OK) {
+		dev_err(&device->dev, "Failed to get device mode\n");
+		result = -EIO;
+		goto r_free;
+	}
+
+	result = sysfs_create_group(&device->dev.kobj, &intcsar_group);
+	if (result) {
+		dev_err(&device->dev, "sysfs creation failed\n");
+		goto r_free;
+	}
+
+	if (acpi_install_notify_handler(ACPI_HANDLE(&device->dev), ACPI_DEVICE_NOTIFY,
+					sar_notify, (void *)device) != AE_OK) {
+		dev_err(&device->dev, "Failed acpi_install_notify_handler\n");
+		result = -EIO;
+		goto r_sys;
+	}
+	return 0;
+
+r_sys:
+	sysfs_remove_group(&device->dev.kobj, &intcsar_group);
+r_free:
+	kfree(context);
+	return result;
+}
+
+static int sar_remove(struct platform_device *device)
+{
+	struct wwan_sar_context *context = dev_get_drvdata(&device->dev);
+	int reg;
+
+	acpi_remove_notify_handler(ACPI_HANDLE(&device->dev),
+				   ACPI_DEVICE_NOTIFY, sar_notify);
+	sysfs_remove_group(&device->dev.kobj, &intcsar_group);
+	for (reg = 0; reg < MAX_REGULATORY; reg++)
+		kfree(context->config_data[reg].device_mode_info);
+
+	kfree(context);
+	return 0;
+}
+
+static struct platform_driver sar_driver = {
+	.probe = sar_probe,
+	.remove = sar_remove,
+	.driver = {
+		.name = DRVNAME,
+		.owner = THIS_MODULE,
+		.acpi_match_table = ACPI_PTR(sar_device_ids)
+	}
+};
+module_platform_driver(sar_driver);
+
+MODULE_LICENSE("GPL v2");
+MODULE_DESCRIPTION("Platform device driver for INTEL MODEM BIOS SAR");
+MODULE_AUTHOR("Shravan S <s.shravan@intel.com>");
diff --git a/drivers/platform/x86/intel/int1092/intel_sar.h b/drivers/platform/x86/intel/int1092/intel_sar.h
new file mode 100644
index 000000000000..b5310510b84c
--- /dev/null
+++ b/drivers/platform/x86/intel/int1092/intel_sar.h
@@ -0,0 +1,86 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
+/*
+ * Copyright (c) 2021, Intel Corporation.
+ */
+#ifndef INTEL_SAR_H
+#define INTEL_SAR_H
+
+#define COMMAND_ID_DEV_MODE 1
+#define COMMAND_ID_CONFIG_TABLE 2
+#define DRVNAME "intc_sar"
+#define MAX_DEV_MODES 50
+#define MAX_REGULATORY 3
+#define SAR_DSM_UUID "82737E72-3A33-4C45-A9C7-57C0411A5F13"
+#define SAR_EVENT 0x80
+#define SYSFS_DATANAME "intc_data"
+#define TOTAL_DATA 4
+
+/**
+ * Structure wwan_device_mode_info - device mode information
+ * Holds the data that needs to be passed to userspace.
+ * The data is updated from the BIOS sensor information.
+ * @device_mode: Specific mode of the device
+ * @bandtable_index: Index of RF band
+ * @antennatable_index: Index of antenna
+ * @sartable_index: Index of SAR
+ */
+struct wwan_device_mode_info {
+	int device_mode;
+	int bandtable_index;
+	int antennatable_index;
+	int sartable_index;
+};
+
+/**
+ * Structure wwan_device_mode_configuration - device configuration
+ * Holds the data that is configured and obtained on probe event.
+ * The data is updated from the BIOS sensor information.
+ * @version: Mode configuration version
+ * @total_dev_mode: Total number of device modes
+ * @device_mode_info: pointer to structure wwan_device_mode_info
+ */
+struct wwan_device_mode_configuration {
+	int version;
+	int total_dev_mode;
+	struct wwan_device_mode_info *device_mode_info;
+};
+
+/**
+ * Structure wwan_supported_info - userspace datastore
+ * Holds the data that is obtained from userspace
+ * The data is updated from the userspace and send value back in the
+ * structure format that is mentioned here.
+ * @reg_mode_needed: regulatory mode set by user for tests
+ * @bios_table_revision: Version of SAR table
+ * @num_supported_modes: Total supported modes based on reg_mode
+ */
+struct wwan_supported_info {
+	int reg_mode_needed;
+	int bios_table_revision;
+	int num_supported_modes;
+};
+
+/**
+ * Structure wwan_sar_context - context of SAR
+ * Holds the complete context as long as the driver is in existence
+ * The context holds instance of the data used for different cases.
+ * @guid: Group id
+ * @handle: store acpi handle
+ * @reg_value: regulatory value
+ * Regulatory 0: FCC, 1: CE, 2: ISED
+ * @sar_device: platform_device type
+ * @sar_kobject: kobject for sysfs
+ * @supported_data: wwan_supported_info struct
+ * @sar_data: wwan_device_mode_info struct
+ * @config_data: wwan_device_mode_configuration array struct
+ */
+struct wwan_sar_context {
+	guid_t guid;
+	acpi_handle handle;
+	int reg_value;
+	struct platform_device *sar_device;
+	struct wwan_supported_info supported_data;
+	struct wwan_device_mode_info sar_data;
+	struct wwan_device_mode_configuration config_data[MAX_REGULATORY];
+};
+#endif /* INTEL_SAR_H */
diff --git a/drivers/platform/x86/intel/int33fe/Makefile b/drivers/platform/x86/intel/int33fe/Makefile
index cc11183ce179..9456e3b37f6f 100644
--- a/drivers/platform/x86/intel/int33fe/Makefile
+++ b/drivers/platform/x86/intel/int33fe/Makefile
@@ -1,5 +1,5 @@
 # SPDX-License-Identifier: GPL-2.0-only
 obj-$(CONFIG_INTEL_CHT_INT33FE)		+= intel_cht_int33fe.o
-intel_cht_int33fe-objs			:= intel_cht_int33fe_common.o \
+intel_cht_int33fe-y			:= intel_cht_int33fe_common.o \
 					   intel_cht_int33fe_typec.o \
 					   intel_cht_int33fe_microb.o
diff --git a/drivers/platform/x86/intel/int33fe/intel_cht_int33fe_common.c b/drivers/platform/x86/intel/int33fe/intel_cht_int33fe_common.c
index 251ed9bac789..463222521e61 100644
--- a/drivers/platform/x86/intel/int33fe/intel_cht_int33fe_common.c
+++ b/drivers/platform/x86/intel/int33fe/intel_cht_int33fe_common.c
@@ -16,33 +16,6 @@
 
 #define EXPECTED_PTYPE		4
 
-static int cht_int33fe_i2c_res_filter(struct acpi_resource *ares, void *data)
-{
-	struct acpi_resource_i2c_serialbus *sb;
-	int *count = data;
-
-	if (i2c_acpi_get_i2c_resource(ares, &sb))
-		(*count)++;
-
-	return 1;
-}
-
-static int cht_int33fe_count_i2c_clients(struct device *dev)
-{
-	struct acpi_device *adev = ACPI_COMPANION(dev);
-	LIST_HEAD(resource_list);
-	int count = 0;
-	int ret;
-
-	ret = acpi_dev_get_resources(adev, &resource_list,
-				     cht_int33fe_i2c_res_filter, &count);
-	acpi_dev_free_resource_list(&resource_list);
-	if (ret < 0)
-		return ret;
-
-	return count;
-}
-
 static int cht_int33fe_check_hw_type(struct device *dev)
 {
 	unsigned long long ptyp;
@@ -69,7 +42,7 @@ static int cht_int33fe_check_hw_type(struct device *dev)
 		return -ENODEV;
 	}
 
-	ret = cht_int33fe_count_i2c_clients(dev);
+	ret = i2c_acpi_client_count(ACPI_COMPANION(dev));
 	if (ret < 0)
 		return ret;
 
diff --git a/drivers/platform/x86/intel/int3472/Makefile b/drivers/platform/x86/intel/int3472/Makefile
index 48bd97f0a04e..2362e04db18d 100644
--- a/drivers/platform/x86/intel/int3472/Makefile
+++ b/drivers/platform/x86/intel/int3472/Makefile
@@ -1,5 +1,5 @@
 obj-$(CONFIG_INTEL_SKL_INT3472)		+= intel_skl_int3472.o
-intel_skl_int3472-objs			:= intel_skl_int3472_common.o \
+intel_skl_int3472-y			:= intel_skl_int3472_common.o \
 					   intel_skl_int3472_discrete.o \
 					   intel_skl_int3472_tps68470.o \
 					   intel_skl_int3472_clk_and_regulator.o
diff --git a/drivers/platform/x86/intel_mrfld_pwrbtn.c b/drivers/platform/x86/intel/mrfld_pwrbtn.c
index d58fea51747e..d58fea51747e 100644
--- a/drivers/platform/x86/intel_mrfld_pwrbtn.c
+++ b/drivers/platform/x86/intel/mrfld_pwrbtn.c
diff --git a/drivers/platform/x86/intel_oaktrail.c b/drivers/platform/x86/intel/oaktrail.c
index 1a09a75bd16d..1a09a75bd16d 100644
--- a/drivers/platform/x86/intel_oaktrail.c
+++ b/drivers/platform/x86/intel/oaktrail.c
diff --git a/drivers/platform/x86/intel/pmc/Kconfig b/drivers/platform/x86/intel/pmc/Kconfig
new file mode 100644
index 000000000000..b526597e4deb
--- /dev/null
+++ b/drivers/platform/x86/intel/pmc/Kconfig
@@ -0,0 +1,25 @@
+# SPDX-License-Identifier: GPL-2.0
+#
+# Intel x86 Platform-Specific Drivers
+#
+
+config INTEL_PMC_CORE
+	tristate "Intel PMC Core driver"
+	depends on PCI
+	depends on ACPI
+	help
+	  The Intel Platform Controller Hub for Intel Core SoCs provides access
+	  to Power Management Controller registers via various interfaces. This
+	  driver can utilize debugging capabilities and supported features as
+	  exposed by the Power Management Controller. It also may perform some
+	  tasks in the PMC in order to enable transition into the SLPS0 state.
+	  It should be selected on all Intel platforms supported by the driver.
+
+	  Supported features:
+		- SLP_S0_RESIDENCY counter
+		- PCH IP Power Gating status
+		- LTR Ignore / LTR Show
+		- MPHY/PLL gating status (Sunrisepoint PCH only)
+		- SLPS0 Debug registers (Cannonlake/Icelake PCH)
+		- Low Power Mode registers (Tigerlake and beyond)
+		- PMC quirks as needed to enable SLPS0/S0ix
diff --git a/drivers/platform/x86/intel/pmc/Makefile b/drivers/platform/x86/intel/pmc/Makefile
new file mode 100644
index 000000000000..8966fcdc0e1d
--- /dev/null
+++ b/drivers/platform/x86/intel/pmc/Makefile
@@ -0,0 +1,9 @@
+# SPDX-License-Identifier: GPL-2.0
+#
+# Intel x86 Platform-Specific Drivers
+#
+
+intel_pmc_core-y			:= core.o
+obj-$(CONFIG_INTEL_PMC_CORE)		+= intel_pmc_core.o
+intel_pmc_core_pltdrv-y			:= pltdrv.o
+obj-$(CONFIG_INTEL_PMC_CORE)		+= intel_pmc_core_pltdrv.o
diff --git a/drivers/platform/x86/intel_pmc_core.c b/drivers/platform/x86/intel/pmc/core.c
index b0e486a6bdfb..ac19fcc9abbf 100644
--- a/drivers/platform/x86/intel_pmc_core.c
+++ b/drivers/platform/x86/intel/pmc/core.c
@@ -31,7 +31,7 @@
 #include <asm/msr.h>
 #include <asm/tsc.h>
 
-#include "intel_pmc_core.h"
+#include "core.h"
 
 #define ACPI_S0IX_DSM_UUID		"57a6512e-3979-4e9d-9708-ff13b2508972"
 #define ACPI_GET_LOW_MODE_REGISTERS	1
@@ -645,6 +645,306 @@ free_acpi_obj:
 	ACPI_FREE(out_obj);
 }
 
+/* Alder Lake: PGD PFET Enable Ack Status Register(s) bitmap */
+static const struct pmc_bit_map adl_pfear_map[] = {
+	{"SPI/eSPI",		BIT(2)},
+	{"XHCI",		BIT(3)},
+	{"SPA",			BIT(4)},
+	{"SPB",			BIT(5)},
+	{"SPC",			BIT(6)},
+	{"GBE",			BIT(7)},
+
+	{"SATA",		BIT(0)},
+	{"HDA_PGD0",		BIT(1)},
+	{"HDA_PGD1",		BIT(2)},
+	{"HDA_PGD2",		BIT(3)},
+	{"HDA_PGD3",		BIT(4)},
+	{"SPD",			BIT(5)},
+	{"LPSS",		BIT(6)},
+
+	{"SMB",			BIT(0)},
+	{"ISH",			BIT(1)},
+	{"ITH",			BIT(3)},
+
+	{"XDCI",		BIT(1)},
+	{"DCI",			BIT(2)},
+	{"CSE",			BIT(3)},
+	{"CSME_KVM",		BIT(4)},
+	{"CSME_PMT",		BIT(5)},
+	{"CSME_CLINK",		BIT(6)},
+	{"CSME_PTIO",		BIT(7)},
+
+	{"CSME_USBR",		BIT(0)},
+	{"CSME_SUSRAM",		BIT(1)},
+	{"CSME_SMT1",		BIT(2)},
+	{"CSME_SMS2",		BIT(4)},
+	{"CSME_SMS1",		BIT(5)},
+	{"CSME_RTC",		BIT(6)},
+	{"CSME_PSF",		BIT(7)},
+
+	{"CNVI",		BIT(3)},
+
+	{"HDA_PGD4",		BIT(2)},
+	{"HDA_PGD5",		BIT(3)},
+	{"HDA_PGD6",		BIT(4)},
+	{}
+};
+
+static const struct pmc_bit_map *ext_adl_pfear_map[] = {
+	/*
+	 * Check intel_pmc_core_ids[] users of cnp_reg_map for
+	 * a list of core SoCs using this.
+	 */
+	adl_pfear_map,
+	NULL
+};
+
+static const struct pmc_bit_map adl_ltr_show_map[] = {
+	{"SOUTHPORT_A",		CNP_PMC_LTR_SPA},
+	{"SOUTHPORT_B",		CNP_PMC_LTR_SPB},
+	{"SATA",		CNP_PMC_LTR_SATA},
+	{"GIGABIT_ETHERNET",	CNP_PMC_LTR_GBE},
+	{"XHCI",		CNP_PMC_LTR_XHCI},
+	{"SOUTHPORT_F",		ADL_PMC_LTR_SPF},
+	{"ME",			CNP_PMC_LTR_ME},
+	/* EVA is Enterprise Value Add, doesn't really exist on PCH */
+	{"SATA1",		CNP_PMC_LTR_EVA},
+	{"SOUTHPORT_C",		CNP_PMC_LTR_SPC},
+	{"HD_AUDIO",		CNP_PMC_LTR_AZ},
+	{"CNV",			CNP_PMC_LTR_CNV},
+	{"LPSS",		CNP_PMC_LTR_LPSS},
+	{"SOUTHPORT_D",		CNP_PMC_LTR_SPD},
+	{"SOUTHPORT_E",		CNP_PMC_LTR_SPE},
+	{"SATA2",		CNP_PMC_LTR_CAM},
+	{"ESPI",		CNP_PMC_LTR_ESPI},
+	{"SCC",			CNP_PMC_LTR_SCC},
+	{"ISH",			CNP_PMC_LTR_ISH},
+	{"UFSX2",		CNP_PMC_LTR_UFSX2},
+	{"EMMC",		CNP_PMC_LTR_EMMC},
+	/*
+	 * Check intel_pmc_core_ids[] users of cnp_reg_map for
+	 * a list of core SoCs using this.
+	 */
+	{"WIGIG",		ICL_PMC_LTR_WIGIG},
+	{"THC0",		TGL_PMC_LTR_THC0},
+	{"THC1",		TGL_PMC_LTR_THC1},
+	{"SOUTHPORT_G",		CNP_PMC_LTR_RESERVED},
+
+	/* Below two cannot be used for LTR_IGNORE */
+	{"CURRENT_PLATFORM",	CNP_PMC_LTR_CUR_PLT},
+	{"AGGREGATED_SYSTEM",	CNP_PMC_LTR_CUR_ASLT},
+	{}
+};
+
+static const struct pmc_bit_map adl_clocksource_status_map[] = {
+	{"CLKPART1_OFF_STS",			BIT(0)},
+	{"CLKPART2_OFF_STS",			BIT(1)},
+	{"CLKPART3_OFF_STS",			BIT(2)},
+	{"CLKPART4_OFF_STS",			BIT(3)},
+	{"CLKPART5_OFF_STS",			BIT(4)},
+	{"CLKPART6_OFF_STS",			BIT(5)},
+	{"CLKPART7_OFF_STS",			BIT(6)},
+	{"CLKPART8_OFF_STS",			BIT(7)},
+	{"PCIE0PLL_OFF_STS",			BIT(10)},
+	{"PCIE1PLL_OFF_STS",			BIT(11)},
+	{"PCIE2PLL_OFF_STS",			BIT(12)},
+	{"PCIE3PLL_OFF_STS",			BIT(13)},
+	{"PCIE4PLL_OFF_STS",			BIT(14)},
+	{"PCIE5PLL_OFF_STS",			BIT(15)},
+	{"PCIE6PLL_OFF_STS",			BIT(16)},
+	{"USB2PLL_OFF_STS",			BIT(18)},
+	{"OCPLL_OFF_STS",			BIT(22)},
+	{"AUDIOPLL_OFF_STS",			BIT(23)},
+	{"GBEPLL_OFF_STS",			BIT(24)},
+	{"Fast_XTAL_Osc_OFF_STS",		BIT(25)},
+	{"AC_Ring_Osc_OFF_STS",			BIT(26)},
+	{"MC_Ring_Osc_OFF_STS",			BIT(27)},
+	{"SATAPLL_OFF_STS",			BIT(29)},
+	{"USB3PLL_OFF_STS",			BIT(31)},
+	{}
+};
+
+static const struct pmc_bit_map adl_power_gating_status_0_map[] = {
+	{"PMC_PGD0_PG_STS",			BIT(0)},
+	{"DMI_PGD0_PG_STS",			BIT(1)},
+	{"ESPISPI_PGD0_PG_STS",			BIT(2)},
+	{"XHCI_PGD0_PG_STS",			BIT(3)},
+	{"SPA_PGD0_PG_STS",			BIT(4)},
+	{"SPB_PGD0_PG_STS",			BIT(5)},
+	{"SPC_PGD0_PG_STS",			BIT(6)},
+	{"GBE_PGD0_PG_STS",			BIT(7)},
+	{"SATA_PGD0_PG_STS",			BIT(8)},
+	{"DSP_PGD0_PG_STS",			BIT(9)},
+	{"DSP_PGD1_PG_STS",			BIT(10)},
+	{"DSP_PGD2_PG_STS",			BIT(11)},
+	{"DSP_PGD3_PG_STS",			BIT(12)},
+	{"SPD_PGD0_PG_STS",			BIT(13)},
+	{"LPSS_PGD0_PG_STS",			BIT(14)},
+	{"SMB_PGD0_PG_STS",			BIT(16)},
+	{"ISH_PGD0_PG_STS",			BIT(17)},
+	{"NPK_PGD0_PG_STS",			BIT(19)},
+	{"PECI_PGD0_PG_STS",			BIT(21)},
+	{"XDCI_PGD0_PG_STS",			BIT(25)},
+	{"EXI_PGD0_PG_STS",			BIT(26)},
+	{"CSE_PGD0_PG_STS",			BIT(27)},
+	{"KVMCC_PGD0_PG_STS",			BIT(28)},
+	{"PMT_PGD0_PG_STS",			BIT(29)},
+	{"CLINK_PGD0_PG_STS",			BIT(30)},
+	{"PTIO_PGD0_PG_STS",			BIT(31)},
+	{}
+};
+
+static const struct pmc_bit_map adl_power_gating_status_1_map[] = {
+	{"USBR0_PGD0_PG_STS",			BIT(0)},
+	{"SMT1_PGD0_PG_STS",			BIT(2)},
+	{"CSMERTC_PGD0_PG_STS",			BIT(6)},
+	{"CSMEPSF_PGD0_PG_STS",			BIT(7)},
+	{"CNVI_PGD0_PG_STS",			BIT(19)},
+	{"DSP_PGD4_PG_STS",			BIT(26)},
+	{"SPG_PGD0_PG_STS",			BIT(27)},
+	{"SPE_PGD0_PG_STS",			BIT(28)},
+	{}
+};
+
+static const struct pmc_bit_map adl_power_gating_status_2_map[] = {
+	{"THC0_PGD0_PG_STS",			BIT(7)},
+	{"THC1_PGD0_PG_STS",			BIT(8)},
+	{"SPF_PGD0_PG_STS",			BIT(14)},
+	{}
+};
+
+static const struct pmc_bit_map adl_d3_status_0_map[] = {
+	{"ISH_D3_STS",				BIT(2)},
+	{"LPSS_D3_STS",				BIT(3)},
+	{"XDCI_D3_STS",				BIT(4)},
+	{"XHCI_D3_STS",				BIT(5)},
+	{"SPA_D3_STS",				BIT(12)},
+	{"SPB_D3_STS",				BIT(13)},
+	{"SPC_D3_STS",				BIT(14)},
+	{"SPD_D3_STS",				BIT(15)},
+	{"SPE_D3_STS",				BIT(16)},
+	{"DSP_D3_STS",				BIT(19)},
+	{"SATA_D3_STS",				BIT(20)},
+	{"DMI_D3_STS",				BIT(22)},
+	{}
+};
+
+static const struct pmc_bit_map adl_d3_status_1_map[] = {
+	{"GBE_D3_STS",				BIT(19)},
+	{"CNVI_D3_STS",				BIT(27)},
+	{}
+};
+
+static const struct pmc_bit_map adl_d3_status_2_map[] = {
+	{"CSMERTC_D3_STS",			BIT(1)},
+	{"CSE_D3_STS",				BIT(4)},
+	{"KVMCC_D3_STS",			BIT(5)},
+	{"USBR0_D3_STS",			BIT(6)},
+	{"SMT1_D3_STS",				BIT(8)},
+	{"PTIO_D3_STS",				BIT(16)},
+	{"PMT_D3_STS",				BIT(17)},
+	{}
+};
+
+static const struct pmc_bit_map adl_d3_status_3_map[] = {
+	{"THC0_D3_STS",				BIT(14)},
+	{"THC1_D3_STS",				BIT(15)},
+	{}
+};
+
+static const struct pmc_bit_map adl_vnn_req_status_0_map[] = {
+	{"ISH_VNN_REQ_STS",			BIT(2)},
+	{"ESPISPI_VNN_REQ_STS",			BIT(18)},
+	{"DSP_VNN_REQ_STS",			BIT(19)},
+	{}
+};
+
+static const struct pmc_bit_map adl_vnn_req_status_1_map[] = {
+	{"NPK_VNN_REQ_STS",			BIT(4)},
+	{"EXI_VNN_REQ_STS",			BIT(9)},
+	{"GBE_VNN_REQ_STS",			BIT(19)},
+	{"SMB_VNN_REQ_STS",			BIT(25)},
+	{"CNVI_VNN_REQ_STS",			BIT(27)},
+	{}
+};
+
+static const struct pmc_bit_map adl_vnn_req_status_2_map[] = {
+	{"CSMERTC_VNN_REQ_STS",			BIT(1)},
+	{"CSE_VNN_REQ_STS",			BIT(4)},
+	{"SMT1_VNN_REQ_STS",			BIT(8)},
+	{"CLINK_VNN_REQ_STS",			BIT(14)},
+	{"GPIOCOM4_VNN_REQ_STS",		BIT(20)},
+	{"GPIOCOM3_VNN_REQ_STS",		BIT(21)},
+	{"GPIOCOM2_VNN_REQ_STS",		BIT(22)},
+	{"GPIOCOM1_VNN_REQ_STS",		BIT(23)},
+	{"GPIOCOM0_VNN_REQ_STS",		BIT(24)},
+	{}
+};
+
+static const struct pmc_bit_map adl_vnn_req_status_3_map[] = {
+	{"GPIOCOM5_VNN_REQ_STS",		BIT(11)},
+	{}
+};
+
+static const struct pmc_bit_map adl_vnn_misc_status_map[] = {
+	{"CPU_C10_REQ_STS",			BIT(0)},
+	{"PCIe_LPM_En_REQ_STS",			BIT(3)},
+	{"ITH_REQ_STS",				BIT(5)},
+	{"CNVI_REQ_STS",			BIT(6)},
+	{"ISH_REQ_STS",				BIT(7)},
+	{"USB2_SUS_PG_Sys_REQ_STS",		BIT(10)},
+	{"PCIe_Clk_REQ_STS",			BIT(12)},
+	{"MPHY_Core_DL_REQ_STS",		BIT(16)},
+	{"Break-even_En_REQ_STS",		BIT(17)},
+	{"MPHY_SUS_REQ_STS",			BIT(22)},
+	{"xDCI_attached_REQ_STS",		BIT(24)},
+	{}
+};
+
+static const struct pmc_bit_map *adl_lpm_maps[] = {
+	adl_clocksource_status_map,
+	adl_power_gating_status_0_map,
+	adl_power_gating_status_1_map,
+	adl_power_gating_status_2_map,
+	adl_d3_status_0_map,
+	adl_d3_status_1_map,
+	adl_d3_status_2_map,
+	adl_d3_status_3_map,
+	adl_vnn_req_status_0_map,
+	adl_vnn_req_status_1_map,
+	adl_vnn_req_status_2_map,
+	adl_vnn_req_status_3_map,
+	adl_vnn_misc_status_map,
+	tgl_signal_status_map,
+	NULL
+};
+
+static const struct pmc_reg_map adl_reg_map = {
+	.pfear_sts = ext_adl_pfear_map,
+	.slp_s0_offset = ADL_PMC_SLP_S0_RES_COUNTER_OFFSET,
+	.slp_s0_res_counter_step = TGL_PMC_SLP_S0_RES_COUNTER_STEP,
+	.ltr_show_sts = adl_ltr_show_map,
+	.msr_sts = msr_map,
+	.ltr_ignore_offset = CNP_PMC_LTR_IGNORE_OFFSET,
+	.regmap_length = CNP_PMC_MMIO_REG_LEN,
+	.ppfear0_offset = CNP_PMC_HOST_PPFEAR0A,
+	.ppfear_buckets = CNP_PPFEAR_NUM_ENTRIES,
+	.pm_cfg_offset = CNP_PMC_PM_CFG_OFFSET,
+	.pm_read_disable_bit = CNP_PMC_READ_DISABLE_BIT,
+	.ltr_ignore_max = ADL_NUM_IP_IGN_ALLOWED,
+	.lpm_num_modes = ADL_LPM_NUM_MODES,
+	.lpm_num_maps = ADL_LPM_NUM_MAPS,
+	.lpm_res_counter_step_x2 = TGL_PMC_LPM_RES_COUNTER_STEP_X2,
+	.etr3_offset = ETR3_OFFSET,
+	.lpm_sts_latch_en_offset = ADL_LPM_STATUS_LATCH_EN_OFFSET,
+	.lpm_priority_offset = ADL_LPM_PRI_OFFSET,
+	.lpm_en_offset = ADL_LPM_EN_OFFSET,
+	.lpm_residency_offset = ADL_LPM_RESIDENCY_OFFSET,
+	.lpm_sts = adl_lpm_maps,
+	.lpm_status_offset = ADL_LPM_STATUS_OFFSET,
+	.lpm_live_status_offset = ADL_LPM_LIVE_STATUS_OFFSET,
+};
+
 static inline u32 pmc_core_reg_read(struct pmc_dev *pmcdev, int reg_offset)
 {
 	return readl(pmcdev->regbase + reg_offset);
@@ -1449,9 +1749,42 @@ static int pmc_core_pkgc_show(struct seq_file *s, void *unused)
 }
 DEFINE_SHOW_ATTRIBUTE(pmc_core_pkgc);
 
-static void pmc_core_get_low_power_modes(struct pmc_dev *pmcdev)
+static bool pmc_core_pri_verify(u32 lpm_pri, u8 *mode_order)
 {
-	u8 lpm_priority[LPM_MAX_NUM_MODES];
+	int i, j;
+
+	if (!lpm_pri)
+		return false;
+	/*
+	 * Each byte contains the priority level for 2 modes (7:4 and 3:0).
+	 * In a 32 bit register this allows for describing 8 modes. Store the
+	 * levels and look for values out of range.
+	 */
+	for (i = 0; i < 8; i++) {
+		int level = lpm_pri & GENMASK(3, 0);
+
+		if (level >= LPM_MAX_NUM_MODES)
+			return false;
+
+		mode_order[i] = level;
+		lpm_pri >>= 4;
+	}
+
+	/* Check that we have unique values */
+	for (i = 0; i < LPM_MAX_NUM_MODES - 1; i++)
+		for (j = i + 1; j < LPM_MAX_NUM_MODES; j++)
+			if (mode_order[i] == mode_order[j])
+				return false;
+
+	return true;
+}
+
+static void pmc_core_get_low_power_modes(struct platform_device *pdev)
+{
+	struct pmc_dev *pmcdev = platform_get_drvdata(pdev);
+	u8 pri_order[LPM_MAX_NUM_MODES] = LPM_DEFAULT_PRI;
+	u8 mode_order[LPM_MAX_NUM_MODES];
+	u32 lpm_pri;
 	u32 lpm_en;
 	int mode, i, p;
 
@@ -1462,24 +1795,28 @@ static void pmc_core_get_low_power_modes(struct pmc_dev *pmcdev)
 	lpm_en = pmc_core_reg_read(pmcdev, pmcdev->map->lpm_en_offset);
 	pmcdev->num_lpm_modes = hweight32(lpm_en);
 
-	/* Each byte contains information for 2 modes (7:4 and 3:0) */
-	for (mode = 0; mode < LPM_MAX_NUM_MODES; mode += 2) {
-		u8 priority = pmc_core_reg_read_byte(pmcdev,
-				pmcdev->map->lpm_priority_offset + (mode / 2));
-		int pri0 = GENMASK(3, 0) & priority;
-		int pri1 = (GENMASK(7, 4) & priority) >> 4;
+	/* Read 32 bit LPM_PRI register */
+	lpm_pri = pmc_core_reg_read(pmcdev, pmcdev->map->lpm_priority_offset);
 
-		lpm_priority[pri0] = mode;
-		lpm_priority[pri1] = mode + 1;
-	}
 
 	/*
-	 * Loop though all modes from lowest to highest priority,
+	 * If lpm_pri value passes verification, then override the default
+	 * modes here. Otherwise stick with the default.
+	 */
+	if (pmc_core_pri_verify(lpm_pri, mode_order))
+		/* Get list of modes in priority order */
+		for (mode = 0; mode < LPM_MAX_NUM_MODES; mode++)
+			pri_order[mode_order[mode]] = mode;
+	else
+		dev_warn(&pdev->dev, "Assuming a default substate order for this platform\n");
+
+	/*
+	 * Loop through all modes from lowest to highest priority,
 	 * and capture all enabled modes in order
 	 */
 	i = 0;
 	for (p = LPM_MAX_NUM_MODES - 1; p >= 0; p--) {
-		int mode = lpm_priority[p];
+		int mode = pri_order[p];
 
 		if (!(BIT(mode) & lpm_en))
 			continue;
@@ -1574,6 +1911,7 @@ static const struct x86_cpu_id intel_pmc_core_ids[] = {
 	X86_MATCH_INTEL_FAM6_MODEL(ATOM_TREMONT_L,	&icl_reg_map),
 	X86_MATCH_INTEL_FAM6_MODEL(ROCKETLAKE,		&tgl_reg_map),
 	X86_MATCH_INTEL_FAM6_MODEL(ALDERLAKE_L,		&tgl_reg_map),
+	X86_MATCH_INTEL_FAM6_MODEL(ALDERLAKE,		&adl_reg_map),
 	{}
 };
 
@@ -1675,17 +2013,17 @@ static int pmc_core_probe(struct platform_device *pdev)
 	mutex_init(&pmcdev->lock);
 
 	pmcdev->pmc_xram_read_bit = pmc_core_check_read_lock_bit(pmcdev);
-	pmc_core_get_low_power_modes(pmcdev);
+	pmc_core_get_low_power_modes(pdev);
 	pmc_core_do_dmi_quirks(pmcdev);
 
 	if (pmcdev->map == &tgl_reg_map)
 		pmc_core_get_tgl_lpm_reqs(pdev);
 
 	/*
-	 * On TGL, due to a hardware limitation, the GBE LTR blocks PC10 when
-	 * a cable is attached. Tell the PMC to ignore it.
+	 * On TGL and ADL, due to a hardware limitation, the GBE LTR blocks PC10
+	 * when a cable is attached. Tell the PMC to ignore it.
 	 */
-	if (pmcdev->map == &tgl_reg_map) {
+	if (pmcdev->map == &tgl_reg_map || pmcdev->map == &adl_reg_map) {
 		dev_dbg(&pdev->dev, "ignoring GBE LTR\n");
 		pmc_core_send_ltr_ignore(pmcdev, 3);
 	}
diff --git a/drivers/platform/x86/intel_pmc_core.h b/drivers/platform/x86/intel/pmc/core.h
index e8dae9c6c45f..a46d3b53bf61 100644
--- a/drivers/platform/x86/intel_pmc_core.h
+++ b/drivers/platform/x86/intel/pmc/core.h
@@ -188,6 +188,8 @@ enum ppfear_regs {
 #define ICL_PMC_SLP_S0_RES_COUNTER_STEP		0x64
 
 #define LPM_MAX_NUM_MODES			8
+#define LPM_DEFAULT_PRI				{ 7, 6, 2, 5, 4, 1, 3, 0 }
+
 #define GET_X2_COUNTER(v)			((v) >> 1)
 #define LPM_STS_LATCH_MODE			BIT(31)
 
@@ -197,6 +199,10 @@ enum ppfear_regs {
 #define TGL_NUM_IP_IGN_ALLOWED			23
 #define TGL_PMC_LPM_RES_COUNTER_STEP_X2		61	/* 30.5us * 2 */
 
+#define ADL_PMC_LTR_SPF				0x1C00
+#define ADL_NUM_IP_IGN_ALLOWED			23
+#define ADL_PMC_SLP_S0_RES_COUNTER_OFFSET	0x1098
+
 /*
  * Tigerlake Power Management Controller register offsets
  */
@@ -218,6 +224,18 @@ enum ppfear_regs {
 /* Extended Test Mode Register LPM bits (TGL and later */
 #define ETR3_CLEAR_LPM_EVENTS			BIT(28)
 
+/* Alder Lake Power Management Controller register offsets */
+#define ADL_LPM_EN_OFFSET			0x179C
+#define ADL_LPM_RESIDENCY_OFFSET		0x17A4
+#define ADL_LPM_NUM_MODES			2
+#define ADL_LPM_NUM_MAPS			14
+
+/* Alder Lake Low Power Mode debug registers */
+#define ADL_LPM_STATUS_OFFSET			0x170C
+#define ADL_LPM_PRI_OFFSET			0x17A0
+#define ADL_LPM_STATUS_LATCH_EN_OFFSET		0x1704
+#define ADL_LPM_LIVE_STATUS_OFFSET		0x1764
+
 const char *pmc_lpm_modes[] = {
 	"S0i2.0",
 	"S0i2.1",
@@ -277,6 +295,7 @@ struct pmc_reg_map {
 	const u32 pm_vric1_offset;
 	/* Low Power Mode registers */
 	const int lpm_num_maps;
+	const int lpm_num_modes;
 	const int lpm_res_counter_step_x2;
 	const u32 lpm_sts_latch_en_offset;
 	const u32 lpm_en_offset;
diff --git a/drivers/platform/x86/intel_pmc_core_pltdrv.c b/drivers/platform/x86/intel/pmc/pltdrv.c
index 73797680b895..73797680b895 100644
--- a/drivers/platform/x86/intel_pmc_core_pltdrv.c
+++ b/drivers/platform/x86/intel/pmc/pltdrv.c
diff --git a/drivers/platform/x86/intel/pmt/Kconfig b/drivers/platform/x86/intel/pmt/Kconfig
new file mode 100644
index 000000000000..d630f883a717
--- /dev/null
+++ b/drivers/platform/x86/intel/pmt/Kconfig
@@ -0,0 +1,40 @@
+# SPDX-License-Identifier: GPL-2.0-only
+#
+# Intel Platform Monitoring Technology drivers
+#
+
+config INTEL_PMT_CLASS
+	tristate
+	help
+	  The Intel Platform Monitoring Technology (PMT) class driver provides
+	  the basic sysfs interface and file hierarchy used by PMT devices.
+
+	  For more information, see:
+	  <file:Documentation/ABI/testing/sysfs-class-intel_pmt>
+
+	  To compile this driver as a module, choose M here: the module
+	  will be called intel_pmt_class.
+
+config INTEL_PMT_TELEMETRY
+	tristate "Intel Platform Monitoring Technology (PMT) Telemetry driver"
+	depends on MFD_INTEL_PMT
+	select INTEL_PMT_CLASS
+	help
+	  The Intel Platform Monitory Technology (PMT) Telemetry driver provides
+	  access to hardware telemetry metrics on devices that support the
+	  feature.
+
+	  To compile this driver as a module, choose M here: the module
+	  will be called intel_pmt_telemetry.
+
+config INTEL_PMT_CRASHLOG
+	tristate "Intel Platform Monitoring Technology (PMT) Crashlog driver"
+	depends on MFD_INTEL_PMT
+	select INTEL_PMT_CLASS
+	help
+	  The Intel Platform Monitoring Technology (PMT) crashlog driver provides
+	  access to hardware crashlog capabilities on devices that support the
+	  feature.
+
+	  To compile this driver as a module, choose M here: the module
+	  will be called intel_pmt_crashlog.
diff --git a/drivers/platform/x86/intel/pmt/Makefile b/drivers/platform/x86/intel/pmt/Makefile
new file mode 100644
index 000000000000..279e158c7c23
--- /dev/null
+++ b/drivers/platform/x86/intel/pmt/Makefile
@@ -0,0 +1,12 @@
+# SPDX-License-Identifier: GPL-2.0
+#
+# Makefile for linux/drivers/platform/x86/intel/pmt
+# Intel Platform Monitoring Technology Drivers
+#
+
+obj-$(CONFIG_INTEL_PMT_CLASS)		+= pmt_class.o
+pmt_class-y				:= class.o
+obj-$(CONFIG_INTEL_PMT_TELEMETRY)	+= pmt_telemetry.o
+pmt_telemetry-y				:= telemetry.o
+obj-$(CONFIG_INTEL_PMT_CRASHLOG)	+= pmt_crashlog.o
+pmt_crashlog-y				:= crashlog.o
diff --git a/drivers/platform/x86/intel_pmt_class.c b/drivers/platform/x86/intel/pmt/class.c
index c86ff15b1ed5..659b1073033c 100644
--- a/drivers/platform/x86/intel_pmt_class.c
+++ b/drivers/platform/x86/intel/pmt/class.c
@@ -13,7 +13,7 @@
 #include <linux/mm.h>
 #include <linux/pci.h>
 
-#include "intel_pmt_class.h"
+#include "class.h"
 
 #define PMT_XA_START		0
 #define PMT_XA_MAX		INT_MAX
diff --git a/drivers/platform/x86/intel_pmt_class.h b/drivers/platform/x86/intel/pmt/class.h
index 1337019c2873..1337019c2873 100644
--- a/drivers/platform/x86/intel_pmt_class.h
+++ b/drivers/platform/x86/intel/pmt/class.h
diff --git a/drivers/platform/x86/intel_pmt_crashlog.c b/drivers/platform/x86/intel/pmt/crashlog.c
index 56963ceb6345..1c1021f04d3c 100644
--- a/drivers/platform/x86/intel_pmt_crashlog.c
+++ b/drivers/platform/x86/intel/pmt/crashlog.c
@@ -15,7 +15,7 @@
 #include <linux/uaccess.h>
 #include <linux/overflow.h>
 
-#include "intel_pmt_class.h"
+#include "class.h"
 
 #define DRV_NAME		"pmt_crashlog"
 
diff --git a/drivers/platform/x86/intel_pmt_telemetry.c b/drivers/platform/x86/intel/pmt/telemetry.c
index 9b95ef050457..38d52651c572 100644
--- a/drivers/platform/x86/intel_pmt_telemetry.c
+++ b/drivers/platform/x86/intel/pmt/telemetry.c
@@ -15,7 +15,7 @@
 #include <linux/uaccess.h>
 #include <linux/overflow.h>
 
-#include "intel_pmt_class.h"
+#include "class.h"
 
 #define TELEM_DEV_NAME		"pmt_telemetry"
 
@@ -61,6 +61,14 @@ static int pmt_telem_header_decode(struct intel_pmt_entry *entry,
 	/* Size is measured in DWORDS, but accessor returns bytes */
 	header->size = TELEM_SIZE(readl(disc_table));
 
+	/*
+	 * Some devices may expose non-functioning entries that are
+	 * reserved for future use. They have zero size. Do not fail
+	 * probe for these. Just ignore them.
+	 */
+	if (header->size == 0)
+		return 1;
+
 	return 0;
 }
 
diff --git a/drivers/platform/x86/intel_punit_ipc.c b/drivers/platform/x86/intel/punit_ipc.c
index f58b8543f6ac..f58b8543f6ac 100644
--- a/drivers/platform/x86/intel_punit_ipc.c
+++ b/drivers/platform/x86/intel/punit_ipc.c
diff --git a/drivers/platform/x86/intel-rst.c b/drivers/platform/x86/intel/rst.c
index 3b81cb896fed..3b81cb896fed 100644
--- a/drivers/platform/x86/intel-rst.c
+++ b/drivers/platform/x86/intel/rst.c
diff --git a/drivers/platform/x86/intel-smartconnect.c b/drivers/platform/x86/intel/smartconnect.c
index 64c2dc93472f..64c2dc93472f 100644
--- a/drivers/platform/x86/intel-smartconnect.c
+++ b/drivers/platform/x86/intel/smartconnect.c
diff --git a/drivers/platform/x86/intel_speed_select_if/Kconfig b/drivers/platform/x86/intel/speed_select_if/Kconfig
index ce3e3dc076d2..ce3e3dc076d2 100644
--- a/drivers/platform/x86/intel_speed_select_if/Kconfig
+++ b/drivers/platform/x86/intel/speed_select_if/Kconfig
diff --git a/drivers/platform/x86/intel_speed_select_if/Makefile b/drivers/platform/x86/intel/speed_select_if/Makefile
index 856076206f35..856076206f35 100644
--- a/drivers/platform/x86/intel_speed_select_if/Makefile
+++ b/drivers/platform/x86/intel/speed_select_if/Makefile
diff --git a/drivers/platform/x86/intel_speed_select_if/isst_if_common.c b/drivers/platform/x86/intel/speed_select_if/isst_if_common.c
index 6f0cc679c8e5..c9a85eb2e860 100644
--- a/drivers/platform/x86/intel_speed_select_if/isst_if_common.c
+++ b/drivers/platform/x86/intel/speed_select_if/isst_if_common.c
@@ -265,9 +265,9 @@ static int isst_if_get_platform_info(void __user *argp)
 {
 	struct isst_if_platform_info info;
 
-	info.api_version = ISST_IF_API_VERSION,
-	info.driver_version = ISST_IF_DRIVER_VERSION,
-	info.max_cmds_per_ioctl = ISST_IF_CMD_LIMIT,
+	info.api_version = ISST_IF_API_VERSION;
+	info.driver_version = ISST_IF_DRIVER_VERSION;
+	info.max_cmds_per_ioctl = ISST_IF_CMD_LIMIT;
 	info.mbox_supported = punit_callbacks[ISST_IF_DEV_MBOX].registered;
 	info.mmio_supported = punit_callbacks[ISST_IF_DEV_MMIO].registered;
 
@@ -379,6 +379,8 @@ static int isst_if_cpu_online(unsigned int cpu)
 	u64 data;
 	int ret;
 
+	isst_cpu_info[cpu].numa_node = cpu_to_node(cpu);
+
 	ret = rdmsrl_safe(MSR_CPU_BUS_NUMBER, &data);
 	if (ret) {
 		/* This is not a fatal error on MSR mailbox only I/F */
@@ -397,7 +399,6 @@ static int isst_if_cpu_online(unsigned int cpu)
 		return ret;
 	}
 	isst_cpu_info[cpu].punit_cpu_id = data;
-	isst_cpu_info[cpu].numa_node = cpu_to_node(cpu);
 
 	isst_restore_msr_local(cpu);
 
diff --git a/drivers/platform/x86/intel_speed_select_if/isst_if_common.h b/drivers/platform/x86/intel/speed_select_if/isst_if_common.h
index fdecdae248d7..fdecdae248d7 100644
--- a/drivers/platform/x86/intel_speed_select_if/isst_if_common.h
+++ b/drivers/platform/x86/intel/speed_select_if/isst_if_common.h
diff --git a/drivers/platform/x86/intel_speed_select_if/isst_if_mbox_msr.c b/drivers/platform/x86/intel/speed_select_if/isst_if_mbox_msr.c
index 1b6eab071068..1b6eab071068 100644
--- a/drivers/platform/x86/intel_speed_select_if/isst_if_mbox_msr.c
+++ b/drivers/platform/x86/intel/speed_select_if/isst_if_mbox_msr.c
diff --git a/drivers/platform/x86/intel_speed_select_if/isst_if_mbox_pci.c b/drivers/platform/x86/intel/speed_select_if/isst_if_mbox_pci.c
index df1fc6c719f3..df1fc6c719f3 100644
--- a/drivers/platform/x86/intel_speed_select_if/isst_if_mbox_pci.c
+++ b/drivers/platform/x86/intel/speed_select_if/isst_if_mbox_pci.c
diff --git a/drivers/platform/x86/intel_speed_select_if/isst_if_mmio.c b/drivers/platform/x86/intel/speed_select_if/isst_if_mmio.c
index ff49025ec085..ff49025ec085 100644
--- a/drivers/platform/x86/intel_speed_select_if/isst_if_mmio.c
+++ b/drivers/platform/x86/intel/speed_select_if/isst_if_mmio.c
diff --git a/drivers/platform/x86/intel/telemetry/Kconfig b/drivers/platform/x86/intel/telemetry/Kconfig
new file mode 100644
index 000000000000..da887bd03731
--- /dev/null
+++ b/drivers/platform/x86/intel/telemetry/Kconfig
@@ -0,0 +1,16 @@
+# SPDX-License-Identifier: GPL-2.0-only
+#
+# Intel x86 Platform Specific Drivers
+#
+
+config INTEL_TELEMETRY
+	tristate "Intel SoC Telemetry driver"
+	depends on X86_64
+	depends on MFD_INTEL_PMC_BXT
+	depends on INTEL_PUNIT_IPC
+	help
+	  This driver provides interfaces to configure and use
+	  telemetry for Intel SoC from Apollo Lake onwards.
+	  It is also used to get various SoC events and parameters
+	  directly via debugfs files. Various tools may use
+	  this interface for SoC state monitoring.
diff --git a/drivers/platform/x86/intel/telemetry/Makefile b/drivers/platform/x86/intel/telemetry/Makefile
new file mode 100644
index 000000000000..bfdba5b6c59a
--- /dev/null
+++ b/drivers/platform/x86/intel/telemetry/Makefile
@@ -0,0 +1,11 @@
+# SPDX-License-Identifier: GPL-2.0-only
+#
+# Intel x86 Platform Specific Drivers
+#
+
+intel_telemetry_core-y			:= core.o
+obj-$(CONFIG_INTEL_TELEMETRY)		+= intel_telemetry_core.o
+intel_telemetry_pltdrv-y		:= pltdrv.o
+obj-$(CONFIG_INTEL_TELEMETRY)		+= intel_telemetry_pltdrv.o
+intel_telemetry_debugfs-y		:= debugfs.o
+obj-$(CONFIG_INTEL_TELEMETRY)		+= intel_telemetry_debugfs.o
diff --git a/drivers/platform/x86/intel_telemetry_core.c b/drivers/platform/x86/intel/telemetry/core.c
index fdf55b5d6948..fdf55b5d6948 100644
--- a/drivers/platform/x86/intel_telemetry_core.c
+++ b/drivers/platform/x86/intel/telemetry/core.c
diff --git a/drivers/platform/x86/intel_telemetry_debugfs.c b/drivers/platform/x86/intel/telemetry/debugfs.c
index 1d4d0fbfd63c..1d4d0fbfd63c 100644
--- a/drivers/platform/x86/intel_telemetry_debugfs.c
+++ b/drivers/platform/x86/intel/telemetry/debugfs.c
diff --git a/drivers/platform/x86/intel_telemetry_pltdrv.c b/drivers/platform/x86/intel/telemetry/pltdrv.c
index 405dea87de6b..405dea87de6b 100644
--- a/drivers/platform/x86/intel_telemetry_pltdrv.c
+++ b/drivers/platform/x86/intel/telemetry/pltdrv.c
diff --git a/drivers/platform/x86/intel_turbo_max_3.c b/drivers/platform/x86/intel/turbo_max_3.c
index 892140b62898..892140b62898 100644
--- a/drivers/platform/x86/intel_turbo_max_3.c
+++ b/drivers/platform/x86/intel/turbo_max_3.c
diff --git a/drivers/platform/x86/intel-uncore-frequency.c b/drivers/platform/x86/intel/uncore-frequency.c
index 3ee4c5c8a64f..3ee4c5c8a64f 100644
--- a/drivers/platform/x86/intel-uncore-frequency.c
+++ b/drivers/platform/x86/intel/uncore-frequency.c
diff --git a/drivers/platform/x86/intel-vbtn.c b/drivers/platform/x86/intel/vbtn.c
index 309166431063..15f013af9e62 100644
--- a/drivers/platform/x86/intel-vbtn.c
+++ b/drivers/platform/x86/intel/vbtn.c
@@ -14,7 +14,7 @@
 #include <linux/module.h>
 #include <linux/platform_device.h>
 #include <linux/suspend.h>
-#include "dual_accel_detect.h"
+#include "../dual_accel_detect.h"
 
 /* Returned when NOT in tablet mode on some HP Stream x360 11 models */
 #define VGBS_TABLET_MODE_FLAG_ALT	0x10
diff --git a/drivers/platform/x86/intel/wmi/Kconfig b/drivers/platform/x86/intel/wmi/Kconfig
new file mode 100644
index 000000000000..8e159f712179
--- /dev/null
+++ b/drivers/platform/x86/intel/wmi/Kconfig
@@ -0,0 +1,31 @@
+# SPDX-License-Identifier: GPL-2.0-only
+#
+# Intel x86 Platform Specific Drivers
+#
+
+config INTEL_WMI
+	bool
+
+config INTEL_WMI_SBL_FW_UPDATE
+	tristate "Intel WMI Slim Bootloader firmware update signaling driver"
+	depends on ACPI_WMI
+	select INTEL_WMI
+	help
+	  Say Y here if you want to be able to use the WMI interface to signal
+	  Slim Bootloader to trigger update on next reboot.
+
+	  To compile this driver as a module, choose M here: the module will
+	  be called intel-wmi-sbl-fw-update.
+
+config INTEL_WMI_THUNDERBOLT
+	tristate "Intel WMI thunderbolt force power driver"
+	depends on ACPI_WMI
+	select INTEL_WMI
+	help
+	  Say Y here if you want to be able to use the WMI interface on select
+	  systems to force the power control of Intel Thunderbolt controllers.
+	  This is useful for updating the firmware when devices are not plugged
+	  into the controller.
+
+	  To compile this driver as a module, choose M here: the module will
+	  be called intel-wmi-thunderbolt.
diff --git a/drivers/platform/x86/intel/wmi/Makefile b/drivers/platform/x86/intel/wmi/Makefile
new file mode 100644
index 000000000000..c2d56d25dea0
--- /dev/null
+++ b/drivers/platform/x86/intel/wmi/Makefile
@@ -0,0 +1,9 @@
+# SPDX-License-Identifier: GPL-2.0-only
+#
+# Intel x86 Platform Specific Drivers
+#
+
+intel-wmi-sbl-fw-update-y				:= sbl-fw-update.o
+obj-$(CONFIG_INTEL_WMI_SBL_FW_UPDATE)	+= intel-wmi-sbl-fw-update.o
+intel-wmi-thunderbolt-y					:= thunderbolt.o
+obj-$(CONFIG_INTEL_WMI_THUNDERBOLT)	+= intel-wmi-thunderbolt.o
diff --git a/drivers/platform/x86/intel-wmi-sbl-fw-update.c b/drivers/platform/x86/intel/wmi/sbl-fw-update.c
index 3c86e0108a24..3c86e0108a24 100644
--- a/drivers/platform/x86/intel-wmi-sbl-fw-update.c
+++ b/drivers/platform/x86/intel/wmi/sbl-fw-update.c
diff --git a/drivers/platform/x86/intel-wmi-thunderbolt.c b/drivers/platform/x86/intel/wmi/thunderbolt.c
index 4ae87060d18b..4ae87060d18b 100644
--- a/drivers/platform/x86/intel-wmi-thunderbolt.c
+++ b/drivers/platform/x86/intel/wmi/thunderbolt.c
diff --git a/drivers/platform/x86/intel_scu_ipc.c b/drivers/platform/x86/intel_scu_ipc.c
index 9171a46a9e3f..bfa0cc20750d 100644
--- a/drivers/platform/x86/intel_scu_ipc.c
+++ b/drivers/platform/x86/intel_scu_ipc.c
@@ -457,7 +457,7 @@ int intel_scu_ipc_dev_simple_command(struct intel_scu_ipc_dev *scu, int cmd,
 EXPORT_SYMBOL(intel_scu_ipc_dev_simple_command);
 
 /**
- * intel_scu_ipc_command_with_size() - Command with data
+ * intel_scu_ipc_dev_command_with_size() - Command with data
  * @scu: Optional SCU IPC instance
  * @cmd: Command
  * @sub: Sub type
diff --git a/drivers/platform/x86/lg-laptop.c b/drivers/platform/x86/lg-laptop.c
index 20145b539335..3e520d5bca07 100644
--- a/drivers/platform/x86/lg-laptop.c
+++ b/drivers/platform/x86/lg-laptop.c
@@ -8,6 +8,7 @@
 #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
 
 #include <linux/acpi.h>
+#include <linux/dmi.h>
 #include <linux/input.h>
 #include <linux/input/sparse-keymap.h>
 #include <linux/kernel.h>
@@ -16,11 +17,12 @@
 #include <linux/platform_device.h>
 #include <linux/types.h>
 
-#define LED_DEVICE(_name, max) struct led_classdev _name = { \
+#define LED_DEVICE(_name, max, flag) struct led_classdev _name = { \
 	.name           = __stringify(_name),   \
 	.max_brightness = max,                  \
 	.brightness_set = _name##_set,          \
 	.brightness_get = _name##_get,          \
+	.flags = flag,                          \
 }
 
 MODULE_AUTHOR("Matan Ziv-Av");
@@ -69,9 +71,13 @@ static u32 inited;
 #define INIT_INPUT_ACPI         0x04
 #define INIT_SPARSE_KEYMAP      0x80
 
+static int battery_limit_use_wmbb;
+static struct led_classdev kbd_backlight;
+static enum led_brightness get_kbd_backlight_level(void);
+
 static const struct key_entry wmi_keymap[] = {
 	{KE_KEY, 0x70, {KEY_F15} },	 /* LG control panel (F1) */
-	{KE_KEY, 0x74, {KEY_F13} },	 /* Touchpad toggle (F5) */
+	{KE_KEY, 0x74, {KEY_F21} },	 /* Touchpad toggle (F5) */
 	{KE_KEY, 0xf020000, {KEY_F14} }, /* Read mode (F9) */
 	{KE_KEY, 0x10000000, {KEY_F16} },/* Keyboard backlight (F8) - pressing
 					  * this key both sends an event and
@@ -214,10 +220,16 @@ static void wmi_notify(u32 value, void *context)
 		int eventcode = obj->integer.value;
 		struct key_entry *key;
 
-		key =
-		    sparse_keymap_entry_from_scancode(wmi_input_dev, eventcode);
-		if (key && key->type == KE_KEY)
-			sparse_keymap_report_entry(wmi_input_dev, key, 1, true);
+		if (eventcode == 0x10000000) {
+			led_classdev_notify_brightness_hw_changed(
+				&kbd_backlight, get_kbd_backlight_level());
+		} else {
+			key = sparse_keymap_entry_from_scancode(
+				wmi_input_dev, eventcode);
+			if (key && key->type == KE_KEY)
+				sparse_keymap_report_entry(wmi_input_dev,
+							   key, 1, true);
+		}
 	}
 
 	pr_debug("Type: %i    Eventcode: 0x%llx\n", obj->type,
@@ -461,7 +473,10 @@ static ssize_t battery_care_limit_store(struct device *dev,
 	if (value == 100 || value == 80) {
 		union acpi_object *r;
 
-		r = lg_wmab(WM_BATT_LIMIT, WM_SET, value);
+		if (battery_limit_use_wmbb)
+			r = lg_wmbb(WMBB_BATT_LIMIT, WM_SET, value);
+		else
+			r = lg_wmab(WM_BATT_LIMIT, WM_SET, value);
 		if (!r)
 			return -EIO;
 
@@ -479,16 +494,29 @@ static ssize_t battery_care_limit_show(struct device *dev,
 	unsigned int status;
 	union acpi_object *r;
 
-	r = lg_wmab(WM_BATT_LIMIT, WM_GET, 0);
-	if (!r)
-		return -EIO;
+	if (battery_limit_use_wmbb) {
+		r = lg_wmbb(WMBB_BATT_LIMIT, WM_GET, 0);
+		if (!r)
+			return -EIO;
 
-	if (r->type != ACPI_TYPE_INTEGER) {
-		kfree(r);
-		return -EIO;
-	}
+		if (r->type != ACPI_TYPE_BUFFER) {
+			kfree(r);
+			return -EIO;
+		}
 
-	status = r->integer.value;
+		status = r->buffer.pointer[0x10];
+	} else {
+		r = lg_wmab(WM_BATT_LIMIT, WM_GET, 0);
+		if (!r)
+			return -EIO;
+
+		if (r->type != ACPI_TYPE_INTEGER) {
+			kfree(r);
+			return -EIO;
+		}
+
+		status = r->integer.value;
+	}
 	kfree(r);
 	if (status != 80 && status != 100)
 		status = 0;
@@ -529,7 +557,7 @@ static enum led_brightness tpad_led_get(struct led_classdev *cdev)
 	return ggov(GOV_TLED) > 0 ? LED_ON : LED_OFF;
 }
 
-static LED_DEVICE(tpad_led, 1);
+static LED_DEVICE(tpad_led, 1, 0);
 
 static void kbd_backlight_set(struct led_classdev *cdev,
 			      enum led_brightness brightness)
@@ -546,7 +574,7 @@ static void kbd_backlight_set(struct led_classdev *cdev,
 	kfree(r);
 }
 
-static enum led_brightness kbd_backlight_get(struct led_classdev *cdev)
+static enum led_brightness get_kbd_backlight_level(void)
 {
 	union acpi_object *r;
 	int val;
@@ -577,7 +605,12 @@ static enum led_brightness kbd_backlight_get(struct led_classdev *cdev)
 	return val;
 }
 
-static LED_DEVICE(kbd_backlight, 255);
+static enum led_brightness kbd_backlight_get(struct led_classdev *cdev)
+{
+	return get_kbd_backlight_level();
+}
+
+static LED_DEVICE(kbd_backlight, 255, LED_BRIGHT_HW_CHANGED);
 
 static void wmi_input_destroy(void)
 {
@@ -602,6 +635,8 @@ static struct platform_driver pf_driver = {
 static int acpi_add(struct acpi_device *device)
 {
 	int ret;
+	const char *product;
+	int year = 2017;
 
 	if (pf_device)
 		return 0;
@@ -619,6 +654,42 @@ static int acpi_add(struct acpi_device *device)
 		pr_err("unable to register platform device\n");
 		goto out_platform_registered;
 	}
+	product = dmi_get_system_info(DMI_PRODUCT_NAME);
+	if (strlen(product) > 4)
+		switch (product[4]) {
+		case '5':
+		case '6':
+			year = 2016;
+			break;
+		case '7':
+			year = 2017;
+			break;
+		case '8':
+			year = 2018;
+			break;
+		case '9':
+			year = 2019;
+			break;
+		case '0':
+			if (strlen(product) > 5)
+				switch (product[5]) {
+				case 'N':
+					year = 2020;
+					break;
+				case 'P':
+					year = 2021;
+					break;
+				default:
+					year = 2022;
+				}
+			break;
+		default:
+			year = 2019;
+		}
+	pr_info("product: %s  year: %d\n", product, year);
+
+	if (year >= 2019)
+		battery_limit_use_wmbb = 1;
 
 	ret = sysfs_create_group(&pf_device->dev.kobj, &dev_attribute_group);
 	if (ret)
diff --git a/drivers/platform/x86/meraki-mx100.c b/drivers/platform/x86/meraki-mx100.c
new file mode 100644
index 000000000000..3751ed36a980
--- /dev/null
+++ b/drivers/platform/x86/meraki-mx100.c
@@ -0,0 +1,230 @@
+// SPDX-License-Identifier: GPL-2.0+
+
+/*
+ * Cisco Meraki MX100 (Tinkerbell) board platform driver
+ *
+ * Based off of arch/x86/platform/meraki/tink.c from the
+ * Meraki GPL release meraki-firmware-sources-r23-20150601
+ *
+ * Format inspired by platform/x86/pcengines-apuv2.c
+ *
+ * Copyright (C) 2021 Chris Blake <chrisrblake93@gmail.com>
+ */
+
+#define pr_fmt(fmt)	KBUILD_MODNAME ": " fmt
+
+#include <linux/dmi.h>
+#include <linux/err.h>
+#include <linux/gpio_keys.h>
+#include <linux/gpio/machine.h>
+#include <linux/input.h>
+#include <linux/io.h>
+#include <linux/kernel.h>
+#include <linux/leds.h>
+#include <linux/module.h>
+#include <linux/platform_device.h>
+
+#define TINK_GPIO_DRIVER_NAME "gpio_ich"
+
+/* LEDs */
+static const struct gpio_led tink_leds[] = {
+	{
+		.name = "mx100:green:internet",
+		.default_trigger = "default-on",
+	},
+	{
+		.name = "mx100:green:lan2",
+	},
+	{
+		.name = "mx100:green:lan3",
+	},
+	{
+		.name = "mx100:green:lan4",
+	},
+	{
+		.name = "mx100:green:lan5",
+	},
+	{
+		.name = "mx100:green:lan6",
+	},
+	{
+		.name = "mx100:green:lan7",
+	},
+	{
+		.name = "mx100:green:lan8",
+	},
+	{
+		.name = "mx100:green:lan9",
+	},
+	{
+		.name = "mx100:green:lan10",
+	},
+	{
+		.name = "mx100:green:lan11",
+	},
+	{
+		.name = "mx100:green:ha",
+	},
+	{
+		.name = "mx100:orange:ha",
+	},
+	{
+		.name = "mx100:green:usb",
+	},
+	{
+		.name = "mx100:orange:usb",
+	},
+};
+
+static const struct gpio_led_platform_data tink_leds_pdata = {
+	.num_leds	= ARRAY_SIZE(tink_leds),
+	.leds		= tink_leds,
+};
+
+static struct gpiod_lookup_table tink_leds_table = {
+	.dev_id = "leds-gpio",
+	.table = {
+		GPIO_LOOKUP_IDX(TINK_GPIO_DRIVER_NAME, 11,
+				NULL, 0, GPIO_ACTIVE_LOW),
+		GPIO_LOOKUP_IDX(TINK_GPIO_DRIVER_NAME, 18,
+				NULL, 1, GPIO_ACTIVE_HIGH),
+		GPIO_LOOKUP_IDX(TINK_GPIO_DRIVER_NAME, 20,
+				NULL, 2, GPIO_ACTIVE_HIGH),
+		GPIO_LOOKUP_IDX(TINK_GPIO_DRIVER_NAME, 22,
+				NULL, 3, GPIO_ACTIVE_HIGH),
+		GPIO_LOOKUP_IDX(TINK_GPIO_DRIVER_NAME, 23,
+				NULL, 4, GPIO_ACTIVE_HIGH),
+		GPIO_LOOKUP_IDX(TINK_GPIO_DRIVER_NAME, 32,
+				NULL, 5, GPIO_ACTIVE_HIGH),
+		GPIO_LOOKUP_IDX(TINK_GPIO_DRIVER_NAME, 34,
+				NULL, 6, GPIO_ACTIVE_HIGH),
+		GPIO_LOOKUP_IDX(TINK_GPIO_DRIVER_NAME, 35,
+				NULL, 7, GPIO_ACTIVE_HIGH),
+		GPIO_LOOKUP_IDX(TINK_GPIO_DRIVER_NAME, 36,
+				NULL, 8, GPIO_ACTIVE_HIGH),
+		GPIO_LOOKUP_IDX(TINK_GPIO_DRIVER_NAME, 37,
+				NULL, 9, GPIO_ACTIVE_HIGH),
+		GPIO_LOOKUP_IDX(TINK_GPIO_DRIVER_NAME, 48,
+				NULL, 10, GPIO_ACTIVE_HIGH),
+		GPIO_LOOKUP_IDX(TINK_GPIO_DRIVER_NAME, 16,
+				NULL, 11, GPIO_ACTIVE_LOW),
+		GPIO_LOOKUP_IDX(TINK_GPIO_DRIVER_NAME, 7,
+				NULL, 12, GPIO_ACTIVE_LOW),
+		GPIO_LOOKUP_IDX(TINK_GPIO_DRIVER_NAME, 21,
+				NULL, 13, GPIO_ACTIVE_LOW),
+		GPIO_LOOKUP_IDX(TINK_GPIO_DRIVER_NAME, 19,
+				NULL, 14, GPIO_ACTIVE_LOW),
+		{} /* Terminating entry */
+	}
+};
+
+/* Reset Button */
+static struct gpio_keys_button tink_buttons[] = {
+	{
+		.desc			= "Reset",
+		.type			= EV_KEY,
+		.code			= KEY_RESTART,
+		.active_low             = 1,
+		.debounce_interval      = 100,
+	},
+};
+
+static const struct gpio_keys_platform_data tink_buttons_pdata = {
+	.buttons	= tink_buttons,
+	.nbuttons	= ARRAY_SIZE(tink_buttons),
+	.poll_interval  = 20,
+	.rep		= 0,
+	.name		= "mx100-keys",
+};
+
+static struct gpiod_lookup_table tink_keys_table = {
+	.dev_id = "gpio-keys-polled",
+	.table = {
+		GPIO_LOOKUP_IDX(TINK_GPIO_DRIVER_NAME, 60,
+				NULL, 0, GPIO_ACTIVE_LOW),
+		{} /* Terminating entry */
+	}
+};
+
+/* Board setup */
+static const struct dmi_system_id tink_systems[] __initconst = {
+	{
+		.matches = {
+			DMI_EXACT_MATCH(DMI_SYS_VENDOR, "Cisco"),
+			DMI_EXACT_MATCH(DMI_PRODUCT_NAME, "MX100-HW"),
+		},
+	},
+	{} /* Terminating entry */
+};
+MODULE_DEVICE_TABLE(dmi, tink_systems);
+
+static struct platform_device *tink_leds_pdev;
+static struct platform_device *tink_keys_pdev;
+
+static struct platform_device * __init tink_create_dev(
+	const char *name, const void *pdata, size_t sz)
+{
+	struct platform_device *pdev;
+
+	pdev = platform_device_register_data(NULL,
+		name, PLATFORM_DEVID_NONE, pdata, sz);
+	if (IS_ERR(pdev))
+		pr_err("failed registering %s: %ld\n", name, PTR_ERR(pdev));
+
+	return pdev;
+}
+
+static int __init tink_board_init(void)
+{
+	int ret;
+
+	if (!dmi_first_match(tink_systems))
+		return -ENODEV;
+
+	/*
+	 * We need to make sure that GPIO60 isn't set to native mode as is default since it's our
+	 * Reset Button. To do this, write to GPIO_USE_SEL2 to have GPIO60 set to GPIO mode.
+	 * This is documented on page 1609 of the PCH datasheet, order number 327879-005US
+	 */
+	outl(inl(0x530) | BIT(28), 0x530);
+
+	gpiod_add_lookup_table(&tink_leds_table);
+	gpiod_add_lookup_table(&tink_keys_table);
+
+	tink_leds_pdev = tink_create_dev("leds-gpio",
+		&tink_leds_pdata, sizeof(tink_leds_pdata));
+	if (IS_ERR(tink_leds_pdev)) {
+		ret = PTR_ERR(tink_leds_pdev);
+		goto err;
+	}
+
+	tink_keys_pdev = tink_create_dev("gpio-keys-polled",
+		&tink_buttons_pdata, sizeof(tink_buttons_pdata));
+	if (IS_ERR(tink_keys_pdev)) {
+		ret = PTR_ERR(tink_keys_pdev);
+		platform_device_unregister(tink_leds_pdev);
+		goto err;
+	}
+
+	return 0;
+
+err:
+	gpiod_remove_lookup_table(&tink_keys_table);
+	gpiod_remove_lookup_table(&tink_leds_table);
+	return ret;
+}
+module_init(tink_board_init);
+
+static void __exit tink_board_exit(void)
+{
+	platform_device_unregister(tink_keys_pdev);
+	platform_device_unregister(tink_leds_pdev);
+	gpiod_remove_lookup_table(&tink_keys_table);
+	gpiod_remove_lookup_table(&tink_leds_table);
+}
+module_exit(tink_board_exit);
+
+MODULE_AUTHOR("Chris Blake <chrisrblake93@gmail.com>");
+MODULE_DESCRIPTION("Cisco Meraki MX100 Platform Driver");
+MODULE_LICENSE("GPL");
+MODULE_ALIAS("platform:meraki-mx100");
diff --git a/drivers/platform/x86/think-lmi.c b/drivers/platform/x86/think-lmi.c
index 6cfed4427fb0..9472aae72df2 100644
--- a/drivers/platform/x86/think-lmi.c
+++ b/drivers/platform/x86/think-lmi.c
@@ -20,6 +20,10 @@
 #include "firmware_attributes_class.h"
 #include "think-lmi.h"
 
+static bool debug_support;
+module_param(debug_support, bool, 0444);
+MODULE_PARM_DESC(debug_support, "Enable debug command support");
+
 /*
  * Name:
  *  Lenovo_BiosSetting
@@ -116,6 +120,14 @@
  */
 #define LENOVO_GET_BIOS_SELECTIONS_GUID	"7364651A-132F-4FE7-ADAA-40C6C7EE2E3B"
 
+/*
+ * Name:
+ *  Lenovo_DebugCmdGUID
+ * Description
+ *  Debug entry GUID method for entering debug commands to the BIOS
+ */
+#define LENOVO_DEBUG_CMD_GUID "7FF47003-3B6C-4E5E-A227-E979824A85D1"
+
 #define TLMI_POP_PWD (1 << 0)
 #define TLMI_PAP_PWD (1 << 1)
 #define to_tlmi_pwd_setting(kobj)  container_of(kobj, struct tlmi_pwd_setting, kobj)
@@ -660,6 +672,64 @@ static ssize_t pending_reboot_show(struct kobject *kobj, struct kobj_attribute *
 
 static struct kobj_attribute pending_reboot = __ATTR_RO(pending_reboot);
 
+/* ---- Debug interface--------------------------------------------------------- */
+static ssize_t debug_cmd_store(struct kobject *kobj, struct kobj_attribute *attr,
+				const char *buf, size_t count)
+{
+	char *set_str = NULL, *new_setting = NULL;
+	char *auth_str = NULL;
+	char *p;
+	int ret;
+
+	if (!tlmi_priv.can_debug_cmd)
+		return -EOPNOTSUPP;
+
+	new_setting = kstrdup(buf, GFP_KERNEL);
+	if (!new_setting)
+		return -ENOMEM;
+
+	/* Strip out CR if one is present */
+	p = strchrnul(new_setting, '\n');
+	*p = '\0';
+
+	if (tlmi_priv.pwd_admin->valid && tlmi_priv.pwd_admin->password[0]) {
+		auth_str = kasprintf(GFP_KERNEL, "%s,%s,%s;",
+				tlmi_priv.pwd_admin->password,
+				encoding_options[tlmi_priv.pwd_admin->encoding],
+				tlmi_priv.pwd_admin->kbdlang);
+		if (!auth_str) {
+			ret = -ENOMEM;
+			goto out;
+		}
+	}
+
+	if (auth_str)
+		set_str = kasprintf(GFP_KERNEL, "%s,%s", new_setting, auth_str);
+	else
+		set_str = kasprintf(GFP_KERNEL, "%s;", new_setting);
+	if (!set_str) {
+		ret = -ENOMEM;
+		goto out;
+	}
+
+	ret = tlmi_simple_call(LENOVO_DEBUG_CMD_GUID, set_str);
+	if (ret)
+		goto out;
+
+	if (!ret && !tlmi_priv.pending_changes) {
+		tlmi_priv.pending_changes = true;
+		/* let userland know it may need to check reboot pending again */
+		kobject_uevent(&tlmi_priv.class_dev->kobj, KOBJ_CHANGE);
+	}
+out:
+	kfree(auth_str);
+	kfree(set_str);
+	kfree(new_setting);
+	return ret ?: count;
+}
+
+static struct kobj_attribute debug_cmd = __ATTR_WO(debug_cmd);
+
 /* ---- Initialisation --------------------------------------------------------- */
 static void tlmi_release_attr(void)
 {
@@ -673,6 +743,8 @@ static void tlmi_release_attr(void)
 		}
 	}
 	sysfs_remove_file(&tlmi_priv.attribute_kset->kobj, &pending_reboot.attr);
+	if (tlmi_priv.can_debug_cmd && debug_support)
+		sysfs_remove_file(&tlmi_priv.attribute_kset->kobj, &debug_cmd.attr);
 	kset_unregister(tlmi_priv.attribute_kset);
 
 	/* Authentication structures */
@@ -737,6 +809,11 @@ static int tlmi_sysfs_init(void)
 	if (ret)
 		goto fail_create_attr;
 
+	if (tlmi_priv.can_debug_cmd && debug_support) {
+		ret = sysfs_create_file(&tlmi_priv.attribute_kset->kobj, &debug_cmd.attr);
+		if (ret)
+			goto fail_create_attr;
+	}
 	/* Create authentication entries */
 	tlmi_priv.authentication_kset = kset_create_and_add("authentication", NULL,
 								&tlmi_priv.class_dev->kobj);
@@ -793,6 +870,9 @@ static int tlmi_analyze(void)
 	if (wmi_has_guid(LENOVO_BIOS_PASSWORD_SETTINGS_GUID))
 		tlmi_priv.can_get_password_settings = true;
 
+	if (wmi_has_guid(LENOVO_DEBUG_CMD_GUID))
+		tlmi_priv.can_debug_cmd = true;
+
 	/*
 	 * Try to find the number of valid settings of this machine
 	 * and use it to create sysfs attributes.
diff --git a/drivers/platform/x86/think-lmi.h b/drivers/platform/x86/think-lmi.h
index eb598846628a..f8e26823075f 100644
--- a/drivers/platform/x86/think-lmi.h
+++ b/drivers/platform/x86/think-lmi.h
@@ -61,6 +61,7 @@ struct think_lmi {
 	bool can_set_bios_password;
 	bool can_get_password_settings;
 	bool pending_changes;
+	bool can_debug_cmd;
 
 	struct tlmi_attr_setting *setting[TLMI_SETTINGS_COUNT];
 	struct device *class_dev;
diff --git a/drivers/pwm/Kconfig b/drivers/pwm/Kconfig
index c76adedd58c9..aa29841bbb79 100644
--- a/drivers/pwm/Kconfig
+++ b/drivers/pwm/Kconfig
@@ -272,7 +272,7 @@ config PWM_IQS620A
 
 config PWM_JZ4740
 	tristate "Ingenic JZ47xx PWM support"
-	depends on MIPS
+	depends on MIPS || COMPILE_TEST
 	depends on COMMON_CLK
 	select MFD_SYSCON
 	help
@@ -284,7 +284,8 @@ config PWM_JZ4740
 
 config PWM_KEEMBAY
 	tristate "Intel Keem Bay PWM driver"
-	depends on ARCH_KEEMBAY || (ARM64 && COMPILE_TEST)
+	depends on ARCH_KEEMBAY || COMPILE_TEST
+	depends on COMMON_CLK && HAS_IOMEM
 	help
 	  The platform driver for Intel Keem Bay PWM controller.
 
diff --git a/drivers/pwm/core.c b/drivers/pwm/core.c
index 35e894f4a379..4527f09a5c50 100644
--- a/drivers/pwm/core.c
+++ b/drivers/pwm/core.c
@@ -304,7 +304,7 @@ EXPORT_SYMBOL_GPL(pwmchip_add);
  *
  * Returns: 0 on success or a negative error code on failure.
  */
-int pwmchip_remove(struct pwm_chip *chip)
+void pwmchip_remove(struct pwm_chip *chip)
 {
 	pwmchip_sysfs_unexport(chip);
 
@@ -318,8 +318,6 @@ int pwmchip_remove(struct pwm_chip *chip)
 	free_pwms(chip);
 
 	mutex_unlock(&pwm_lock);
-
-	return 0;
 }
 EXPORT_SYMBOL_GPL(pwmchip_remove);
 
diff --git a/drivers/pwm/pwm-ab8500.c b/drivers/pwm/pwm-ab8500.c
index e2a26d9da25b..ad37bc46f272 100644
--- a/drivers/pwm/pwm-ab8500.c
+++ b/drivers/pwm/pwm-ab8500.c
@@ -22,14 +22,21 @@
 
 struct ab8500_pwm_chip {
 	struct pwm_chip chip;
+	unsigned int hwid;
 };
 
+static struct ab8500_pwm_chip *ab8500_pwm_from_chip(struct pwm_chip *chip)
+{
+	return container_of(chip, struct ab8500_pwm_chip, chip);
+}
+
 static int ab8500_pwm_apply(struct pwm_chip *chip, struct pwm_device *pwm,
 			    const struct pwm_state *state)
 {
 	int ret;
 	u8 reg;
 	unsigned int higher_val, lower_val;
+	struct ab8500_pwm_chip *ab8500 = ab8500_pwm_from_chip(chip);
 
 	if (state->polarity != PWM_POLARITY_NORMAL)
 		return -EINVAL;
@@ -37,7 +44,7 @@ static int ab8500_pwm_apply(struct pwm_chip *chip, struct pwm_device *pwm,
 	if (!state->enabled) {
 		ret = abx500_mask_and_set_register_interruptible(chip->dev,
 					AB8500_MISC, AB8500_PWM_OUT_CTRL7_REG,
-					1 << (chip->base - 1), 0);
+					1 << ab8500->hwid, 0);
 
 		if (ret < 0)
 			dev_err(chip->dev, "%s: Failed to disable PWM, Error %d\n",
@@ -56,7 +63,7 @@ static int ab8500_pwm_apply(struct pwm_chip *chip, struct pwm_device *pwm,
 	 */
 	higher_val = ((state->duty_cycle & 0x0300) >> 8);
 
-	reg = AB8500_PWM_OUT_CTRL1_REG + ((chip->base - 1) * 2);
+	reg = AB8500_PWM_OUT_CTRL1_REG + (ab8500->hwid * 2);
 
 	ret = abx500_set_register_interruptible(chip->dev, AB8500_MISC,
 			reg, (u8)lower_val);
@@ -70,7 +77,7 @@ static int ab8500_pwm_apply(struct pwm_chip *chip, struct pwm_device *pwm,
 
 	ret = abx500_mask_and_set_register_interruptible(chip->dev,
 				AB8500_MISC, AB8500_PWM_OUT_CTRL7_REG,
-				1 << (chip->base - 1), 1 << (chip->base - 1));
+				1 << ab8500->hwid, 1 << ab8500->hwid);
 	if (ret < 0)
 		dev_err(chip->dev, "%s: Failed to enable PWM, Error %d\n",
 							pwm->label, ret);
@@ -88,6 +95,9 @@ static int ab8500_pwm_probe(struct platform_device *pdev)
 	struct ab8500_pwm_chip *ab8500;
 	int err;
 
+	if (pdev->id < 1 || pdev->id > 31)
+		return dev_err_probe(&pdev->dev, EINVAL, "Invalid device id %d\n", pdev->id);
+
 	/*
 	 * Nothing to be done in probe, this is required to get the
 	 * device which is required for ab8500 read and write
@@ -99,27 +109,13 @@ static int ab8500_pwm_probe(struct platform_device *pdev)
 	ab8500->chip.dev = &pdev->dev;
 	ab8500->chip.ops = &ab8500_pwm_ops;
 	ab8500->chip.npwm = 1;
+	ab8500->hwid = pdev->id - 1;
 
-	err = pwmchip_add(&ab8500->chip);
+	err = devm_pwmchip_add(&pdev->dev, &ab8500->chip);
 	if (err < 0)
 		return dev_err_probe(&pdev->dev, err, "Failed to add pwm chip\n");
 
 	dev_dbg(&pdev->dev, "pwm probe successful\n");
-	platform_set_drvdata(pdev, ab8500);
-
-	return 0;
-}
-
-static int ab8500_pwm_remove(struct platform_device *pdev)
-{
-	struct ab8500_pwm_chip *ab8500 = platform_get_drvdata(pdev);
-	int err;
-
-	err = pwmchip_remove(&ab8500->chip);
-	if (err < 0)
-		return err;
-
-	dev_dbg(&pdev->dev, "pwm driver removed\n");
 
 	return 0;
 }
@@ -129,7 +125,6 @@ static struct platform_driver ab8500_pwm_driver = {
 		.name = "ab8500-pwm",
 	},
 	.probe = ab8500_pwm_probe,
-	.remove = ab8500_pwm_remove,
 };
 module_platform_driver(ab8500_pwm_driver);
 
diff --git a/drivers/pwm/pwm-atmel-hlcdc.c b/drivers/pwm/pwm-atmel-hlcdc.c
index 4459325d3650..a43b2babc809 100644
--- a/drivers/pwm/pwm-atmel-hlcdc.c
+++ b/drivers/pwm/pwm-atmel-hlcdc.c
@@ -281,11 +281,8 @@ static int atmel_hlcdc_pwm_probe(struct platform_device *pdev)
 static int atmel_hlcdc_pwm_remove(struct platform_device *pdev)
 {
 	struct atmel_hlcdc_pwm *chip = platform_get_drvdata(pdev);
-	int ret;
 
-	ret = pwmchip_remove(&chip->chip);
-	if (ret)
-		return ret;
+	pwmchip_remove(&chip->chip);
 
 	clk_disable_unprepare(chip->hlcdc->periph_clk);
 
diff --git a/drivers/pwm/pwm-atmel-tcb.c b/drivers/pwm/pwm-atmel-tcb.c
index bf398f21484d..36f7ea381838 100644
--- a/drivers/pwm/pwm-atmel-tcb.c
+++ b/drivers/pwm/pwm-atmel-tcb.c
@@ -503,11 +503,8 @@ err_slow_clk:
 static int atmel_tcb_pwm_remove(struct platform_device *pdev)
 {
 	struct atmel_tcb_pwm_chip *tcbpwm = platform_get_drvdata(pdev);
-	int err;
 
-	err = pwmchip_remove(&tcbpwm->chip);
-	if (err < 0)
-		return err;
+	pwmchip_remove(&tcbpwm->chip);
 
 	clk_disable_unprepare(tcbpwm->slow_clk);
 	clk_put(tcbpwm->slow_clk);
diff --git a/drivers/pwm/pwm-atmel.c b/drivers/pwm/pwm-atmel.c
index a8162bae3e8a..e748604403cc 100644
--- a/drivers/pwm/pwm-atmel.c
+++ b/drivers/pwm/pwm-atmel.c
@@ -84,9 +84,19 @@ struct atmel_pwm_chip {
 	void __iomem *base;
 	const struct atmel_pwm_data *data;
 
-	unsigned int updated_pwms;
-	/* ISR is cleared when read, ensure only one thread does that */
-	struct mutex isr_lock;
+	/*
+	 * The hardware supports a mechanism to update a channel's duty cycle at
+	 * the end of the currently running period. When such an update is
+	 * pending we delay disabling the PWM until the new configuration is
+	 * active because otherwise pmw_config(duty_cycle=0); pwm_disable();
+	 * might not result in an inactive output.
+	 * This bitmask tracks for which channels an update is pending in
+	 * hardware.
+	 */
+	u32 update_pending;
+
+	/* Protects .update_pending */
+	spinlock_t lock;
 };
 
 static inline struct atmel_pwm_chip *to_atmel_pwm_chip(struct pwm_chip *chip)
@@ -123,6 +133,64 @@ static inline void atmel_pwm_ch_writel(struct atmel_pwm_chip *chip,
 	atmel_pwm_writel(chip, base + offset, val);
 }
 
+static void atmel_pwm_update_pending(struct atmel_pwm_chip *chip)
+{
+	/*
+	 * Each channel that has its bit in ISR set started a new period since
+	 * ISR was cleared and so there is no more update pending.  Note that
+	 * reading ISR clears it, so this needs to handle all channels to not
+	 * loose information.
+	 */
+	u32 isr = atmel_pwm_readl(chip, PWM_ISR);
+
+	chip->update_pending &= ~isr;
+}
+
+static void atmel_pwm_set_pending(struct atmel_pwm_chip *chip, unsigned int ch)
+{
+	spin_lock(&chip->lock);
+
+	/*
+	 * Clear pending flags in hardware because otherwise there might still
+	 * be a stale flag in ISR.
+	 */
+	atmel_pwm_update_pending(chip);
+
+	chip->update_pending |= (1 << ch);
+
+	spin_unlock(&chip->lock);
+}
+
+static int atmel_pwm_test_pending(struct atmel_pwm_chip *chip, unsigned int ch)
+{
+	int ret = 0;
+
+	spin_lock(&chip->lock);
+
+	if (chip->update_pending & (1 << ch)) {
+		atmel_pwm_update_pending(chip);
+
+		if (chip->update_pending & (1 << ch))
+			ret = 1;
+	}
+
+	spin_unlock(&chip->lock);
+
+	return ret;
+}
+
+static int atmel_pwm_wait_nonpending(struct atmel_pwm_chip *chip, unsigned int ch)
+{
+	unsigned long timeout = jiffies + 2 * HZ;
+	int ret;
+
+	while ((ret = atmel_pwm_test_pending(chip, ch)) &&
+	       time_before(jiffies, timeout))
+		usleep_range(10, 100);
+
+	return ret ? -ETIMEDOUT : 0;
+}
+
 static int atmel_pwm_calculate_cprd_and_pres(struct pwm_chip *chip,
 					     unsigned long clkrate,
 					     const struct pwm_state *state,
@@ -185,6 +253,7 @@ static void atmel_pwm_update_cdty(struct pwm_chip *chip, struct pwm_device *pwm,
 
 	atmel_pwm_ch_writel(atmel_pwm, pwm->hwpwm,
 			    atmel_pwm->data->regs.duty_upd, cdty);
+	atmel_pwm_set_pending(atmel_pwm, pwm->hwpwm);
 }
 
 static void atmel_pwm_set_cprd_cdty(struct pwm_chip *chip,
@@ -205,20 +274,8 @@ static void atmel_pwm_disable(struct pwm_chip *chip, struct pwm_device *pwm,
 	struct atmel_pwm_chip *atmel_pwm = to_atmel_pwm_chip(chip);
 	unsigned long timeout = jiffies + 2 * HZ;
 
-	/*
-	 * Wait for at least a complete period to have passed before disabling a
-	 * channel to be sure that CDTY has been updated
-	 */
-	mutex_lock(&atmel_pwm->isr_lock);
-	atmel_pwm->updated_pwms |= atmel_pwm_readl(atmel_pwm, PWM_ISR);
-
-	while (!(atmel_pwm->updated_pwms & (1 << pwm->hwpwm)) &&
-	       time_before(jiffies, timeout)) {
-		usleep_range(10, 100);
-		atmel_pwm->updated_pwms |= atmel_pwm_readl(atmel_pwm, PWM_ISR);
-	}
+	atmel_pwm_wait_nonpending(atmel_pwm, pwm->hwpwm);
 
-	mutex_unlock(&atmel_pwm->isr_lock);
 	atmel_pwm_writel(atmel_pwm, PWM_DIS, 1 << pwm->hwpwm);
 
 	/*
@@ -292,10 +349,6 @@ static int atmel_pwm_apply(struct pwm_chip *chip, struct pwm_device *pwm,
 			val |= PWM_CMR_CPOL;
 		atmel_pwm_ch_writel(atmel_pwm, pwm->hwpwm, PWM_CMR, val);
 		atmel_pwm_set_cprd_cdty(chip, pwm, cprd, cdty);
-		mutex_lock(&atmel_pwm->isr_lock);
-		atmel_pwm->updated_pwms |= atmel_pwm_readl(atmel_pwm, PWM_ISR);
-		atmel_pwm->updated_pwms &= ~(1 << pwm->hwpwm);
-		mutex_unlock(&atmel_pwm->isr_lock);
 		atmel_pwm_writel(atmel_pwm, PWM_ENA, 1 << pwm->hwpwm);
 	} else if (cstate.enabled) {
 		atmel_pwm_disable(chip, pwm, true);
@@ -326,6 +379,9 @@ static void atmel_pwm_get_state(struct pwm_chip *chip, struct pwm_device *pwm,
 		tmp <<= pres;
 		state->period = DIV64_U64_ROUND_UP(tmp, rate);
 
+		/* Wait for an updated duty_cycle queued in hardware */
+		atmel_pwm_wait_nonpending(atmel_pwm, pwm->hwpwm);
+
 		cdty = atmel_pwm_ch_readl(atmel_pwm, pwm->hwpwm,
 					  atmel_pwm->data->regs.duty);
 		tmp = (u64)(cprd - cdty) * NSEC_PER_SEC;
@@ -416,9 +472,10 @@ static int atmel_pwm_probe(struct platform_device *pdev)
 	if (!atmel_pwm)
 		return -ENOMEM;
 
-	mutex_init(&atmel_pwm->isr_lock);
 	atmel_pwm->data = of_device_get_match_data(&pdev->dev);
-	atmel_pwm->updated_pwms = 0;
+
+	atmel_pwm->update_pending = 0;
+	spin_lock_init(&atmel_pwm->lock);
 
 	atmel_pwm->base = devm_platform_ioremap_resource(pdev, 0);
 	if (IS_ERR(atmel_pwm->base))
@@ -460,7 +517,6 @@ static int atmel_pwm_remove(struct platform_device *pdev)
 	pwmchip_remove(&atmel_pwm->chip);
 
 	clk_unprepare(atmel_pwm->clk);
-	mutex_destroy(&atmel_pwm->isr_lock);
 
 	return 0;
 }
diff --git a/drivers/pwm/pwm-bcm-kona.c b/drivers/pwm/pwm-bcm-kona.c
index 8c85c66ea5c9..64148f5f81d0 100644
--- a/drivers/pwm/pwm-bcm-kona.c
+++ b/drivers/pwm/pwm-bcm-kona.c
@@ -267,8 +267,6 @@ static int kona_pwmc_probe(struct platform_device *pdev)
 	if (kp == NULL)
 		return -ENOMEM;
 
-	platform_set_drvdata(pdev, kp);
-
 	kp->chip.dev = &pdev->dev;
 	kp->chip.ops = &kona_pwm_ops;
 	kp->chip.npwm = 6;
@@ -298,20 +296,13 @@ static int kona_pwmc_probe(struct platform_device *pdev)
 
 	clk_disable_unprepare(kp->clk);
 
-	ret = pwmchip_add(&kp->chip);
+	ret = devm_pwmchip_add(&pdev->dev, &kp->chip);
 	if (ret < 0)
 		dev_err(&pdev->dev, "failed to add PWM chip: %d\n", ret);
 
 	return ret;
 }
 
-static int kona_pwmc_remove(struct platform_device *pdev)
-{
-	struct kona_pwmc *kp = platform_get_drvdata(pdev);
-
-	return pwmchip_remove(&kp->chip);
-}
-
 static const struct of_device_id bcm_kona_pwmc_dt[] = {
 	{ .compatible = "brcm,kona-pwm" },
 	{ },
@@ -324,7 +315,6 @@ static struct platform_driver kona_pwmc_driver = {
 		.of_match_table = bcm_kona_pwmc_dt,
 	},
 	.probe = kona_pwmc_probe,
-	.remove = kona_pwmc_remove,
 };
 module_platform_driver(kona_pwmc_driver);
 
diff --git a/drivers/pwm/pwm-brcmstb.c b/drivers/pwm/pwm-brcmstb.c
index 8b1d1e7aa856..3b529f82b97c 100644
--- a/drivers/pwm/pwm-brcmstb.c
+++ b/drivers/pwm/pwm-brcmstb.c
@@ -282,12 +282,11 @@ out_clk:
 static int brcmstb_pwm_remove(struct platform_device *pdev)
 {
 	struct brcmstb_pwm *p = platform_get_drvdata(pdev);
-	int ret;
 
-	ret = pwmchip_remove(&p->chip);
+	pwmchip_remove(&p->chip);
 	clk_disable_unprepare(p->clk);
 
-	return ret;
+	return 0;
 }
 
 #ifdef CONFIG_PM_SLEEP
diff --git a/drivers/pwm/pwm-cros-ec.c b/drivers/pwm/pwm-cros-ec.c
index 9fffb566af5f..5e29d9c682c3 100644
--- a/drivers/pwm/pwm-cros-ec.c
+++ b/drivers/pwm/pwm-cros-ec.c
@@ -280,7 +280,9 @@ static int cros_ec_pwm_remove(struct platform_device *dev)
 	struct cros_ec_pwm_device *ec_pwm = platform_get_drvdata(dev);
 	struct pwm_chip *chip = &ec_pwm->chip;
 
-	return pwmchip_remove(chip);
+	pwmchip_remove(chip);
+
+	return 0;
 }
 
 #ifdef CONFIG_OF
diff --git a/drivers/pwm/pwm-ep93xx.c b/drivers/pwm/pwm-ep93xx.c
index fc3cb7d669c6..c45a75e65c86 100644
--- a/drivers/pwm/pwm-ep93xx.c
+++ b/drivers/pwm/pwm-ep93xx.c
@@ -183,27 +183,18 @@ static int ep93xx_pwm_probe(struct platform_device *pdev)
 	ep93xx_pwm->chip.ops = &ep93xx_pwm_ops;
 	ep93xx_pwm->chip.npwm = 1;
 
-	ret = pwmchip_add(&ep93xx_pwm->chip);
+	ret = devm_pwmchip_add(&pdev->dev, &ep93xx_pwm->chip);
 	if (ret < 0)
 		return ret;
 
-	platform_set_drvdata(pdev, ep93xx_pwm);
 	return 0;
 }
 
-static int ep93xx_pwm_remove(struct platform_device *pdev)
-{
-	struct ep93xx_pwm *ep93xx_pwm = platform_get_drvdata(pdev);
-
-	return pwmchip_remove(&ep93xx_pwm->chip);
-}
-
 static struct platform_driver ep93xx_pwm_driver = {
 	.driver = {
 		.name = "ep93xx-pwm",
 	},
 	.probe = ep93xx_pwm_probe,
-	.remove = ep93xx_pwm_remove,
 };
 module_platform_driver(ep93xx_pwm_driver);
 
diff --git a/drivers/pwm/pwm-fsl-ftm.c b/drivers/pwm/pwm-fsl-ftm.c
index 96ccd772280c..0247757f9a72 100644
--- a/drivers/pwm/pwm-fsl-ftm.c
+++ b/drivers/pwm/pwm-fsl-ftm.c
@@ -453,7 +453,7 @@ static int fsl_pwm_probe(struct platform_device *pdev)
 	fpc->chip.ops = &fsl_pwm_ops;
 	fpc->chip.npwm = 8;
 
-	ret = pwmchip_add(&fpc->chip);
+	ret = devm_pwmchip_add(&pdev->dev, &fpc->chip);
 	if (ret < 0) {
 		dev_err(&pdev->dev, "failed to add PWM chip: %d\n", ret);
 		return ret;
@@ -464,13 +464,6 @@ static int fsl_pwm_probe(struct platform_device *pdev)
 	return fsl_pwm_init(fpc);
 }
 
-static int fsl_pwm_remove(struct platform_device *pdev)
-{
-	struct fsl_pwm_chip *fpc = platform_get_drvdata(pdev);
-
-	return pwmchip_remove(&fpc->chip);
-}
-
 #ifdef CONFIG_PM_SLEEP
 static int fsl_pwm_suspend(struct device *dev)
 {
@@ -552,7 +545,6 @@ static struct platform_driver fsl_pwm_driver = {
 		.pm = &fsl_pwm_pm_ops,
 	},
 	.probe = fsl_pwm_probe,
-	.remove = fsl_pwm_remove,
 };
 module_platform_driver(fsl_pwm_driver);
 
diff --git a/drivers/pwm/pwm-hibvt.c b/drivers/pwm/pwm-hibvt.c
index 4a6e9ad3c0ff..333f1b18ff4e 100644
--- a/drivers/pwm/pwm-hibvt.c
+++ b/drivers/pwm/pwm-hibvt.c
@@ -248,13 +248,15 @@ static int hibvt_pwm_remove(struct platform_device *pdev)
 
 	pwm_chip = platform_get_drvdata(pdev);
 
+	pwmchip_remove(&pwm_chip->chip);
+
 	reset_control_assert(pwm_chip->rstc);
 	msleep(30);
 	reset_control_deassert(pwm_chip->rstc);
 
 	clk_disable_unprepare(pwm_chip->clk);
 
-	return pwmchip_remove(&pwm_chip->chip);
+	return 0;
 }
 
 static const struct of_device_id hibvt_pwm_of_match[] = {
diff --git a/drivers/pwm/pwm-img.c b/drivers/pwm/pwm-img.c
index 11b16ecc4f96..f97f82548293 100644
--- a/drivers/pwm/pwm-img.c
+++ b/drivers/pwm/pwm-img.c
@@ -326,28 +326,14 @@ err_pm_disable:
 static int img_pwm_remove(struct platform_device *pdev)
 {
 	struct img_pwm_chip *pwm_chip = platform_get_drvdata(pdev);
-	u32 val;
-	unsigned int i;
-	int ret;
-
-	ret = pm_runtime_get_sync(&pdev->dev);
-	if (ret < 0) {
-		pm_runtime_put(&pdev->dev);
-		return ret;
-	}
-
-	for (i = 0; i < pwm_chip->chip.npwm; i++) {
-		val = img_pwm_readl(pwm_chip, PWM_CTRL_CFG);
-		val &= ~BIT(i);
-		img_pwm_writel(pwm_chip, PWM_CTRL_CFG, val);
-	}
 
-	pm_runtime_put(&pdev->dev);
 	pm_runtime_disable(&pdev->dev);
 	if (!pm_runtime_status_suspended(&pdev->dev))
 		img_pwm_runtime_suspend(&pdev->dev);
 
-	return pwmchip_remove(&pwm_chip->chip);
+	pwmchip_remove(&pwm_chip->chip);
+
+	return 0;
 }
 
 #ifdef CONFIG_PM_SLEEP
diff --git a/drivers/pwm/pwm-imx-tpm.c b/drivers/pwm/pwm-imx-tpm.c
index dbb50493abdd..e5e7b7c339a8 100644
--- a/drivers/pwm/pwm-imx-tpm.c
+++ b/drivers/pwm/pwm-imx-tpm.c
@@ -382,11 +382,12 @@ static int pwm_imx_tpm_probe(struct platform_device *pdev)
 static int pwm_imx_tpm_remove(struct platform_device *pdev)
 {
 	struct imx_tpm_pwm_chip *tpm = platform_get_drvdata(pdev);
-	int ret = pwmchip_remove(&tpm->chip);
+
+	pwmchip_remove(&tpm->chip);
 
 	clk_disable_unprepare(tpm->clk);
 
-	return ret;
+	return 0;
 }
 
 static int __maybe_unused pwm_imx_tpm_suspend(struct device *dev)
diff --git a/drivers/pwm/pwm-imx27.c b/drivers/pwm/pwm-imx27.c
index f6588a96fbd9..ea91a2f81a9f 100644
--- a/drivers/pwm/pwm-imx27.c
+++ b/drivers/pwm/pwm-imx27.c
@@ -313,8 +313,6 @@ static int pwm_imx27_probe(struct platform_device *pdev)
 	if (imx == NULL)
 		return -ENOMEM;
 
-	platform_set_drvdata(pdev, imx);
-
 	imx->clk_ipg = devm_clk_get(&pdev->dev, "ipg");
 	if (IS_ERR(imx->clk_ipg))
 		return dev_err_probe(&pdev->dev, PTR_ERR(imx->clk_ipg),
@@ -342,16 +340,7 @@ static int pwm_imx27_probe(struct platform_device *pdev)
 	if (!(pwmcr & MX3_PWMCR_EN))
 		pwm_imx27_clk_disable_unprepare(imx);
 
-	return pwmchip_add(&imx->chip);
-}
-
-static int pwm_imx27_remove(struct platform_device *pdev)
-{
-	struct pwm_imx27_chip *imx;
-
-	imx = platform_get_drvdata(pdev);
-
-	return pwmchip_remove(&imx->chip);
+	return devm_pwmchip_add(&pdev->dev, &imx->chip);
 }
 
 static struct platform_driver imx_pwm_driver = {
@@ -360,7 +349,6 @@ static struct platform_driver imx_pwm_driver = {
 		.of_match_table = pwm_imx27_dt_ids,
 	},
 	.probe = pwm_imx27_probe,
-	.remove = pwm_imx27_remove,
 };
 module_platform_driver(imx_pwm_driver);
 
diff --git a/drivers/pwm/pwm-intel-lgm.c b/drivers/pwm/pwm-intel-lgm.c
index 015f5eba09a1..b66c35074087 100644
--- a/drivers/pwm/pwm-intel-lgm.c
+++ b/drivers/pwm/pwm-intel-lgm.c
@@ -176,8 +176,6 @@ static int lgm_pwm_probe(struct platform_device *pdev)
 	if (!pc)
 		return -ENOMEM;
 
-	platform_set_drvdata(pdev, pc);
-
 	io_base = devm_platform_ioremap_resource(pdev, 0);
 	if (IS_ERR(io_base))
 		return PTR_ERR(io_base);
@@ -210,20 +208,13 @@ static int lgm_pwm_probe(struct platform_device *pdev)
 
 	lgm_pwm_init(pc);
 
-	ret = pwmchip_add(&pc->chip);
+	ret = devm_pwmchip_add(dev, &pc->chip);
 	if (ret < 0)
 		return dev_err_probe(dev, ret, "failed to add PWM chip\n");
 
 	return 0;
 }
 
-static int lgm_pwm_remove(struct platform_device *pdev)
-{
-	struct lgm_pwm_chip *pc = platform_get_drvdata(pdev);
-
-	return pwmchip_remove(&pc->chip);
-}
-
 static const struct of_device_id lgm_pwm_of_match[] = {
 	{ .compatible = "intel,lgm-pwm" },
 	{ }
@@ -236,7 +227,6 @@ static struct platform_driver lgm_pwm_driver = {
 		.of_match_table = lgm_pwm_of_match,
 	},
 	.probe = lgm_pwm_probe,
-	.remove = lgm_pwm_remove,
 };
 module_platform_driver(lgm_pwm_driver);
 
diff --git a/drivers/pwm/pwm-iqs620a.c b/drivers/pwm/pwm-iqs620a.c
index 6c6e26d18329..54bd95a5cab0 100644
--- a/drivers/pwm/pwm-iqs620a.c
+++ b/drivers/pwm/pwm-iqs620a.c
@@ -189,7 +189,6 @@ static int iqs620_pwm_probe(struct platform_device *pdev)
 	if (!iqs620_pwm)
 		return -ENOMEM;
 
-	platform_set_drvdata(pdev, iqs620_pwm);
 	iqs620_pwm->iqs62x = iqs62x;
 
 	ret = regmap_read(iqs62x->regmap, IQS620_PWR_SETTINGS, &val);
@@ -224,31 +223,18 @@ static int iqs620_pwm_probe(struct platform_device *pdev)
 	if (ret)
 		return ret;
 
-	ret = pwmchip_add(&iqs620_pwm->chip);
+	ret = devm_pwmchip_add(&pdev->dev, &iqs620_pwm->chip);
 	if (ret)
 		dev_err(&pdev->dev, "Failed to add device: %d\n", ret);
 
 	return ret;
 }
 
-static int iqs620_pwm_remove(struct platform_device *pdev)
-{
-	struct iqs620_pwm_private *iqs620_pwm = platform_get_drvdata(pdev);
-	int ret;
-
-	ret = pwmchip_remove(&iqs620_pwm->chip);
-	if (ret)
-		dev_err(&pdev->dev, "Failed to remove device: %d\n", ret);
-
-	return ret;
-}
-
 static struct platform_driver iqs620_pwm_platform_driver = {
 	.driver = {
 		.name = "iqs620a-pwm",
 	},
 	.probe = iqs620_pwm_probe,
-	.remove = iqs620_pwm_remove,
 };
 module_platform_driver(iqs620_pwm_platform_driver);
 
diff --git a/drivers/pwm/pwm-jz4740.c b/drivers/pwm/pwm-jz4740.c
index 990e7904c7f1..23dc1fb770e2 100644
--- a/drivers/pwm/pwm-jz4740.c
+++ b/drivers/pwm/pwm-jz4740.c
@@ -245,16 +245,7 @@ static int jz4740_pwm_probe(struct platform_device *pdev)
 	jz4740->chip.ops = &jz4740_pwm_ops;
 	jz4740->chip.npwm = info->num_pwms;
 
-	platform_set_drvdata(pdev, jz4740);
-
-	return pwmchip_add(&jz4740->chip);
-}
-
-static int jz4740_pwm_remove(struct platform_device *pdev)
-{
-	struct jz4740_pwm_chip *jz4740 = platform_get_drvdata(pdev);
-
-	return pwmchip_remove(&jz4740->chip);
+	return devm_pwmchip_add(dev, &jz4740->chip);
 }
 
 static const struct soc_info __maybe_unused jz4740_soc_info = {
@@ -280,7 +271,6 @@ static struct platform_driver jz4740_pwm_driver = {
 		.of_match_table = of_match_ptr(jz4740_pwm_dt_ids),
 	},
 	.probe = jz4740_pwm_probe,
-	.remove = jz4740_pwm_remove,
 };
 module_platform_driver(jz4740_pwm_driver);
 
diff --git a/drivers/pwm/pwm-keembay.c b/drivers/pwm/pwm-keembay.c
index 521a825c8ba0..733811b05721 100644
--- a/drivers/pwm/pwm-keembay.c
+++ b/drivers/pwm/pwm-keembay.c
@@ -207,22 +207,13 @@ static int keembay_pwm_probe(struct platform_device *pdev)
 	priv->chip.ops = &keembay_pwm_ops;
 	priv->chip.npwm = KMB_TOTAL_PWM_CHANNELS;
 
-	ret = pwmchip_add(&priv->chip);
+	ret = devm_pwmchip_add(dev, &priv->chip);
 	if (ret)
 		return dev_err_probe(dev, ret, "Failed to add PWM chip\n");
 
-	platform_set_drvdata(pdev, priv);
-
 	return 0;
 }
 
-static int keembay_pwm_remove(struct platform_device *pdev)
-{
-	struct keembay_pwm *priv = platform_get_drvdata(pdev);
-
-	return pwmchip_remove(&priv->chip);
-}
-
 static const struct of_device_id keembay_pwm_of_match[] = {
 	{ .compatible = "intel,keembay-pwm" },
 	{ }
@@ -231,7 +222,6 @@ MODULE_DEVICE_TABLE(of, keembay_pwm_of_match);
 
 static struct platform_driver keembay_pwm_driver = {
 	.probe	= keembay_pwm_probe,
-	.remove	= keembay_pwm_remove,
 	.driver	= {
 		.name = "pwm-keembay",
 		.of_match_table = keembay_pwm_of_match,
diff --git a/drivers/pwm/pwm-lp3943.c b/drivers/pwm/pwm-lp3943.c
index 7551253ada32..ea17d446a627 100644
--- a/drivers/pwm/pwm-lp3943.c
+++ b/drivers/pwm/pwm-lp3943.c
@@ -276,16 +276,7 @@ static int lp3943_pwm_probe(struct platform_device *pdev)
 	lp3943_pwm->chip.ops = &lp3943_pwm_ops;
 	lp3943_pwm->chip.npwm = LP3943_NUM_PWMS;
 
-	platform_set_drvdata(pdev, lp3943_pwm);
-
-	return pwmchip_add(&lp3943_pwm->chip);
-}
-
-static int lp3943_pwm_remove(struct platform_device *pdev)
-{
-	struct lp3943_pwm *lp3943_pwm = platform_get_drvdata(pdev);
-
-	return pwmchip_remove(&lp3943_pwm->chip);
+	return devm_pwmchip_add(&pdev->dev, &lp3943_pwm->chip);
 }
 
 #ifdef CONFIG_OF
@@ -298,7 +289,6 @@ MODULE_DEVICE_TABLE(of, lp3943_pwm_of_match);
 
 static struct platform_driver lp3943_pwm_driver = {
 	.probe = lp3943_pwm_probe,
-	.remove = lp3943_pwm_remove,
 	.driver = {
 		.name = "lp3943-pwm",
 		.of_match_table = of_match_ptr(lp3943_pwm_of_match),
diff --git a/drivers/pwm/pwm-lpc32xx.c b/drivers/pwm/pwm-lpc32xx.c
index 2834a0f001d3..ddeab5687cb8 100644
--- a/drivers/pwm/pwm-lpc32xx.c
+++ b/drivers/pwm/pwm-lpc32xx.c
@@ -117,29 +117,20 @@ static int lpc32xx_pwm_probe(struct platform_device *pdev)
 	lpc32xx->chip.ops = &lpc32xx_pwm_ops;
 	lpc32xx->chip.npwm = 1;
 
-	ret = pwmchip_add(&lpc32xx->chip);
-	if (ret < 0) {
-		dev_err(&pdev->dev, "failed to add PWM chip, error %d\n", ret);
-		return ret;
-	}
-
-	/* When PWM is disable, configure the output to the default value */
+	/* If PWM is disabled, configure the output to the default value */
 	val = readl(lpc32xx->base + (lpc32xx->chip.pwms[0].hwpwm << 2));
 	val &= ~PWM_PIN_LEVEL;
 	writel(val, lpc32xx->base + (lpc32xx->chip.pwms[0].hwpwm << 2));
 
-	platform_set_drvdata(pdev, lpc32xx);
+	ret = devm_pwmchip_add(&pdev->dev, &lpc32xx->chip);
+	if (ret < 0) {
+		dev_err(&pdev->dev, "failed to add PWM chip, error %d\n", ret);
+		return ret;
+	}
 
 	return 0;
 }
 
-static int lpc32xx_pwm_remove(struct platform_device *pdev)
-{
-	struct lpc32xx_pwm_chip *lpc32xx = platform_get_drvdata(pdev);
-
-	return pwmchip_remove(&lpc32xx->chip);
-}
-
 static const struct of_device_id lpc32xx_pwm_dt_ids[] = {
 	{ .compatible = "nxp,lpc3220-pwm", },
 	{ /* sentinel */ }
@@ -152,7 +143,6 @@ static struct platform_driver lpc32xx_pwm_driver = {
 		.of_match_table = lpc32xx_pwm_dt_ids,
 	},
 	.probe = lpc32xx_pwm_probe,
-	.remove = lpc32xx_pwm_remove,
 };
 module_platform_driver(lpc32xx_pwm_driver);
 
diff --git a/drivers/pwm/pwm-mediatek.c b/drivers/pwm/pwm-mediatek.c
index b4a31060bcd7..0d4dd80e9f07 100644
--- a/drivers/pwm/pwm-mediatek.c
+++ b/drivers/pwm/pwm-mediatek.c
@@ -253,13 +253,11 @@ static int pwm_mediatek_probe(struct platform_device *pdev)
 		}
 	}
 
-	platform_set_drvdata(pdev, pc);
-
 	pc->chip.dev = &pdev->dev;
 	pc->chip.ops = &pwm_mediatek_ops;
 	pc->chip.npwm = pc->soc->num_pwms;
 
-	ret = pwmchip_add(&pc->chip);
+	ret = devm_pwmchip_add(&pdev->dev, &pc->chip);
 	if (ret < 0) {
 		dev_err(&pdev->dev, "pwmchip_add() failed: %d\n", ret);
 		return ret;
@@ -268,13 +266,6 @@ static int pwm_mediatek_probe(struct platform_device *pdev)
 	return 0;
 }
 
-static int pwm_mediatek_remove(struct platform_device *pdev)
-{
-	struct pwm_mediatek_chip *pc = platform_get_drvdata(pdev);
-
-	return pwmchip_remove(&pc->chip);
-}
-
 static const struct pwm_mediatek_of_data mt2712_pwm_data = {
 	.num_pwms = 8,
 	.pwm45_fixup = false,
@@ -335,7 +326,6 @@ static struct platform_driver pwm_mediatek_driver = {
 		.of_match_table = pwm_mediatek_of_match,
 	},
 	.probe = pwm_mediatek_probe,
-	.remove = pwm_mediatek_remove,
 };
 module_platform_driver(pwm_mediatek_driver);
 
diff --git a/drivers/pwm/pwm-mtk-disp.c b/drivers/pwm/pwm-mtk-disp.c
index 9b3ba401a3db..c605013e4114 100644
--- a/drivers/pwm/pwm-mtk-disp.c
+++ b/drivers/pwm/pwm-mtk-disp.c
@@ -5,6 +5,7 @@
  * Author: YH Huang <yh.huang@mediatek.com>
  */
 
+#include <linux/bitfield.h>
 #include <linux/clk.h>
 #include <linux/err.h>
 #include <linux/io.h>
@@ -47,6 +48,7 @@ struct mtk_disp_pwm {
 	struct clk *clk_main;
 	struct clk *clk_mm;
 	void __iomem *base;
+	bool enabled;
 };
 
 static inline struct mtk_disp_pwm *to_mtk_disp_pwm(struct pwm_chip *chip)
@@ -66,14 +68,47 @@ static void mtk_disp_pwm_update_bits(struct mtk_disp_pwm *mdp, u32 offset,
 	writel(value, address);
 }
 
-static int mtk_disp_pwm_config(struct pwm_chip *chip, struct pwm_device *pwm,
-			       int duty_ns, int period_ns)
+static int mtk_disp_pwm_apply(struct pwm_chip *chip, struct pwm_device *pwm,
+			      const struct pwm_state *state)
 {
 	struct mtk_disp_pwm *mdp = to_mtk_disp_pwm(chip);
 	u32 clk_div, period, high_width, value;
 	u64 div, rate;
 	int err;
 
+	if (state->polarity != PWM_POLARITY_NORMAL)
+		return -EINVAL;
+
+	if (!state->enabled) {
+		mtk_disp_pwm_update_bits(mdp, DISP_PWM_EN, mdp->data->enable_mask,
+					 0x0);
+
+		if (mdp->enabled) {
+			clk_disable_unprepare(mdp->clk_mm);
+			clk_disable_unprepare(mdp->clk_main);
+		}
+
+		mdp->enabled = false;
+		return 0;
+	}
+
+	if (!mdp->enabled) {
+		err = clk_prepare_enable(mdp->clk_main);
+		if (err < 0) {
+			dev_err(chip->dev, "Can't enable mdp->clk_main: %pe\n",
+				ERR_PTR(err));
+			return err;
+		}
+
+		err = clk_prepare_enable(mdp->clk_mm);
+		if (err < 0) {
+			dev_err(chip->dev, "Can't enable mdp->clk_mm: %pe\n",
+				ERR_PTR(err));
+			clk_disable_unprepare(mdp->clk_main);
+			return err;
+		}
+	}
+
 	/*
 	 * Find period, high_width and clk_div to suit duty_ns and period_ns.
 	 * Calculate proper div value to keep period value in the bound.
@@ -85,29 +120,24 @@ static int mtk_disp_pwm_config(struct pwm_chip *chip, struct pwm_device *pwm,
 	 * high_width = (PWM_CLK_RATE * duty_ns) / (10^9 * (clk_div + 1))
 	 */
 	rate = clk_get_rate(mdp->clk_main);
-	clk_div = div_u64(rate * period_ns, NSEC_PER_SEC) >>
+	clk_div = mul_u64_u64_div_u64(state->period, rate, NSEC_PER_SEC) >>
 			  PWM_PERIOD_BIT_WIDTH;
-	if (clk_div > PWM_CLKDIV_MAX)
+	if (clk_div > PWM_CLKDIV_MAX) {
+		if (!mdp->enabled) {
+			clk_disable_unprepare(mdp->clk_mm);
+			clk_disable_unprepare(mdp->clk_main);
+		}
 		return -EINVAL;
+	}
 
 	div = NSEC_PER_SEC * (clk_div + 1);
-	period = div64_u64(rate * period_ns, div);
+	period = mul_u64_u64_div_u64(state->period, rate, div);
 	if (period > 0)
 		period--;
 
-	high_width = div64_u64(rate * duty_ns, div);
+	high_width = mul_u64_u64_div_u64(state->duty_cycle, rate, div);
 	value = period | (high_width << PWM_HIGH_WIDTH_SHIFT);
 
-	err = clk_enable(mdp->clk_main);
-	if (err < 0)
-		return err;
-
-	err = clk_enable(mdp->clk_mm);
-	if (err < 0) {
-		clk_disable(mdp->clk_main);
-		return err;
-	}
-
 	mtk_disp_pwm_update_bits(mdp, mdp->data->con0,
 				 PWM_CLKDIV_MASK,
 				 clk_div << PWM_CLKDIV_SHIFT);
@@ -122,50 +152,70 @@ static int mtk_disp_pwm_config(struct pwm_chip *chip, struct pwm_device *pwm,
 		mtk_disp_pwm_update_bits(mdp, mdp->data->commit,
 					 mdp->data->commit_mask,
 					 0x0);
+	} else {
+		/*
+		 * For MT2701, disable double buffer before writing register
+		 * and select manual mode and use PWM_PERIOD/PWM_HIGH_WIDTH.
+		 */
+		mtk_disp_pwm_update_bits(mdp, mdp->data->bls_debug,
+					 mdp->data->bls_debug_mask,
+					 mdp->data->bls_debug_mask);
+		mtk_disp_pwm_update_bits(mdp, mdp->data->con0,
+					 mdp->data->con0_sel,
+					 mdp->data->con0_sel);
 	}
 
-	clk_disable(mdp->clk_mm);
-	clk_disable(mdp->clk_main);
+	mtk_disp_pwm_update_bits(mdp, DISP_PWM_EN, mdp->data->enable_mask,
+				 mdp->data->enable_mask);
+	mdp->enabled = true;
 
 	return 0;
 }
 
-static int mtk_disp_pwm_enable(struct pwm_chip *chip, struct pwm_device *pwm)
+static void mtk_disp_pwm_get_state(struct pwm_chip *chip,
+				   struct pwm_device *pwm,
+				   struct pwm_state *state)
 {
 	struct mtk_disp_pwm *mdp = to_mtk_disp_pwm(chip);
+	u64 rate, period, high_width;
+	u32 clk_div, con0, con1;
 	int err;
 
-	err = clk_enable(mdp->clk_main);
-	if (err < 0)
-		return err;
-
-	err = clk_enable(mdp->clk_mm);
+	err = clk_prepare_enable(mdp->clk_main);
 	if (err < 0) {
-		clk_disable(mdp->clk_main);
-		return err;
+		dev_err(chip->dev, "Can't enable mdp->clk_main: %pe\n", ERR_PTR(err));
+		return;
 	}
 
-	mtk_disp_pwm_update_bits(mdp, DISP_PWM_EN, mdp->data->enable_mask,
-				 mdp->data->enable_mask);
-
-	return 0;
-}
-
-static void mtk_disp_pwm_disable(struct pwm_chip *chip, struct pwm_device *pwm)
-{
-	struct mtk_disp_pwm *mdp = to_mtk_disp_pwm(chip);
-
-	mtk_disp_pwm_update_bits(mdp, DISP_PWM_EN, mdp->data->enable_mask,
-				 0x0);
+	err = clk_prepare_enable(mdp->clk_mm);
+	if (err < 0) {
+		dev_err(chip->dev, "Can't enable mdp->clk_mm: %pe\n", ERR_PTR(err));
+		clk_disable_unprepare(mdp->clk_main);
+		return;
+	}
 
-	clk_disable(mdp->clk_mm);
-	clk_disable(mdp->clk_main);
+	rate = clk_get_rate(mdp->clk_main);
+	con0 = readl(mdp->base + mdp->data->con0);
+	con1 = readl(mdp->base + mdp->data->con1);
+	state->enabled = !!(con0 & BIT(0));
+	clk_div = FIELD_GET(PWM_CLKDIV_MASK, con0);
+	period = FIELD_GET(PWM_PERIOD_MASK, con1);
+	/*
+	 * period has 12 bits, clk_div 11 and NSEC_PER_SEC has 30,
+	 * so period * (clk_div + 1) * NSEC_PER_SEC doesn't overflow.
+	 */
+	state->period = DIV64_U64_ROUND_UP(period * (clk_div + 1) * NSEC_PER_SEC, rate);
+	high_width = FIELD_GET(PWM_HIGH_WIDTH_MASK, con1);
+	state->duty_cycle = DIV64_U64_ROUND_UP(high_width * (clk_div + 1) * NSEC_PER_SEC,
+					       rate);
+	state->polarity = PWM_POLARITY_NORMAL;
+	clk_disable_unprepare(mdp->clk_mm);
+	clk_disable_unprepare(mdp->clk_main);
 }
 
 static const struct pwm_ops mtk_disp_pwm_ops = {
-	.config = mtk_disp_pwm_config,
-	.enable = mtk_disp_pwm_enable,
-	.disable = mtk_disp_pwm_disable,
+	.apply = mtk_disp_pwm_apply,
+	.get_state = mtk_disp_pwm_get_state,
 	.owner = THIS_MODULE,
 };
 
@@ -192,58 +242,28 @@ static int mtk_disp_pwm_probe(struct platform_device *pdev)
 	if (IS_ERR(mdp->clk_mm))
 		return PTR_ERR(mdp->clk_mm);
 
-	ret = clk_prepare(mdp->clk_main);
-	if (ret < 0)
-		return ret;
-
-	ret = clk_prepare(mdp->clk_mm);
-	if (ret < 0)
-		goto disable_clk_main;
-
 	mdp->chip.dev = &pdev->dev;
 	mdp->chip.ops = &mtk_disp_pwm_ops;
 	mdp->chip.npwm = 1;
 
 	ret = pwmchip_add(&mdp->chip);
 	if (ret < 0) {
-		dev_err(&pdev->dev, "pwmchip_add() failed: %d\n", ret);
-		goto disable_clk_mm;
+		dev_err(&pdev->dev, "pwmchip_add() failed: %pe\n", ERR_PTR(ret));
+		return ret;
 	}
 
 	platform_set_drvdata(pdev, mdp);
 
-	/*
-	 * For MT2701, disable double buffer before writing register
-	 * and select manual mode and use PWM_PERIOD/PWM_HIGH_WIDTH.
-	 */
-	if (!mdp->data->has_commit) {
-		mtk_disp_pwm_update_bits(mdp, mdp->data->bls_debug,
-					 mdp->data->bls_debug_mask,
-					 mdp->data->bls_debug_mask);
-		mtk_disp_pwm_update_bits(mdp, mdp->data->con0,
-					 mdp->data->con0_sel,
-					 mdp->data->con0_sel);
-	}
-
 	return 0;
-
-disable_clk_mm:
-	clk_unprepare(mdp->clk_mm);
-disable_clk_main:
-	clk_unprepare(mdp->clk_main);
-	return ret;
 }
 
 static int mtk_disp_pwm_remove(struct platform_device *pdev)
 {
 	struct mtk_disp_pwm *mdp = platform_get_drvdata(pdev);
-	int ret;
 
-	ret = pwmchip_remove(&mdp->chip);
-	clk_unprepare(mdp->clk_mm);
-	clk_unprepare(mdp->clk_main);
+	pwmchip_remove(&mdp->chip);
 
-	return ret;
+	return 0;
 }
 
 static const struct mtk_pwm_data mt2701_pwm_data = {
diff --git a/drivers/pwm/pwm-mxs.c b/drivers/pwm/pwm-mxs.c
index a22180803bd7..766dbc58dad8 100644
--- a/drivers/pwm/pwm-mxs.c
+++ b/drivers/pwm/pwm-mxs.c
@@ -145,30 +145,18 @@ static int mxs_pwm_probe(struct platform_device *pdev)
 		return ret;
 	}
 
-	ret = pwmchip_add(&mxs->chip);
+	/* FIXME: Only do this if the PWM isn't already running */
+	ret = stmp_reset_block(mxs->base);
+	if (ret)
+		return dev_err_probe(&pdev->dev, ret, "failed to reset PWM\n");
+
+	ret = devm_pwmchip_add(&pdev->dev, &mxs->chip);
 	if (ret < 0) {
 		dev_err(&pdev->dev, "failed to add pwm chip %d\n", ret);
 		return ret;
 	}
 
-	platform_set_drvdata(pdev, mxs);
-
-	ret = stmp_reset_block(mxs->base);
-	if (ret)
-		goto pwm_remove;
-
 	return 0;
-
-pwm_remove:
-	pwmchip_remove(&mxs->chip);
-	return ret;
-}
-
-static int mxs_pwm_remove(struct platform_device *pdev)
-{
-	struct mxs_pwm_chip *mxs = platform_get_drvdata(pdev);
-
-	return pwmchip_remove(&mxs->chip);
 }
 
 static const struct of_device_id mxs_pwm_dt_ids[] = {
@@ -183,7 +171,6 @@ static struct platform_driver mxs_pwm_driver = {
 		.of_match_table = mxs_pwm_dt_ids,
 	},
 	.probe = mxs_pwm_probe,
-	.remove = mxs_pwm_remove,
 };
 module_platform_driver(mxs_pwm_driver);
 
diff --git a/drivers/pwm/pwm-ntxec.c b/drivers/pwm/pwm-ntxec.c
index 50c454c553c4..ab63b081df53 100644
--- a/drivers/pwm/pwm-ntxec.c
+++ b/drivers/pwm/pwm-ntxec.c
@@ -150,23 +150,12 @@ static int ntxec_pwm_probe(struct platform_device *pdev)
 	priv->ec = ec;
 	priv->dev = &pdev->dev;
 
-	platform_set_drvdata(pdev, priv);
-
 	chip = &priv->chip;
 	chip->dev = &pdev->dev;
 	chip->ops = &ntxec_pwm_ops;
-	chip->base = -1;
 	chip->npwm = 1;
 
-	return pwmchip_add(chip);
-}
-
-static int ntxec_pwm_remove(struct platform_device *pdev)
-{
-	struct ntxec_pwm *priv = platform_get_drvdata(pdev);
-	struct pwm_chip *chip = &priv->chip;
-
-	return pwmchip_remove(chip);
+	return devm_pwmchip_add(&pdev->dev, chip);
 }
 
 static struct platform_driver ntxec_pwm_driver = {
@@ -174,7 +163,6 @@ static struct platform_driver ntxec_pwm_driver = {
 		.name = "ntxec-pwm",
 	},
 	.probe = ntxec_pwm_probe,
-	.remove = ntxec_pwm_remove,
 };
 module_platform_driver(ntxec_pwm_driver);
 
diff --git a/drivers/pwm/pwm-omap-dmtimer.c b/drivers/pwm/pwm-omap-dmtimer.c
index 507a2d945b90..fa800fcf31d4 100644
--- a/drivers/pwm/pwm-omap-dmtimer.c
+++ b/drivers/pwm/pwm-omap-dmtimer.c
@@ -444,11 +444,8 @@ err_find_timer_pdev:
 static int pwm_omap_dmtimer_remove(struct platform_device *pdev)
 {
 	struct pwm_omap_dmtimer_chip *omap = platform_get_drvdata(pdev);
-	int ret;
 
-	ret = pwmchip_remove(&omap->chip);
-	if (ret)
-		return ret;
+	pwmchip_remove(&omap->chip);
 
 	if (pm_runtime_active(&omap->dm_timer_pdev->dev))
 		omap->pdata->stop(omap->dm_timer);
diff --git a/drivers/pwm/pwm-pca9685.c b/drivers/pwm/pwm-pca9685.c
index 42ed770b432c..c56001a790d0 100644
--- a/drivers/pwm/pwm-pca9685.c
+++ b/drivers/pwm/pwm-pca9685.c
@@ -601,11 +601,8 @@ static int pca9685_pwm_probe(struct i2c_client *client,
 static int pca9685_pwm_remove(struct i2c_client *client)
 {
 	struct pca9685 *pca = i2c_get_clientdata(client);
-	int ret;
 
-	ret = pwmchip_remove(&pca->chip);
-	if (ret)
-		return ret;
+	pwmchip_remove(&pca->chip);
 
 	if (!pm_runtime_enabled(&client->dev)) {
 		/* Put chip in sleep state if runtime PM is disabled */
diff --git a/drivers/pwm/pwm-pxa.c b/drivers/pwm/pwm-pxa.c
index e091a528e33c..a9efdcf839ae 100644
--- a/drivers/pwm/pwm-pxa.c
+++ b/drivers/pwm/pwm-pxa.c
@@ -195,32 +195,21 @@ static int pwm_probe(struct platform_device *pdev)
 	if (IS_ERR(pc->mmio_base))
 		return PTR_ERR(pc->mmio_base);
 
-	ret = pwmchip_add(&pc->chip);
+	ret = devm_pwmchip_add(&pdev->dev, &pc->chip);
 	if (ret < 0) {
 		dev_err(&pdev->dev, "pwmchip_add() failed: %d\n", ret);
 		return ret;
 	}
 
-	platform_set_drvdata(pdev, pc);
 	return 0;
 }
 
-static int pwm_remove(struct platform_device *pdev)
-{
-	struct pxa_pwm_chip *pc;
-
-	pc = platform_get_drvdata(pdev);
-
-	return pwmchip_remove(&pc->chip);
-}
-
 static struct platform_driver pwm_driver = {
 	.driver		= {
 		.name	= "pxa25x-pwm",
 		.of_match_table = pwm_of_match,
 	},
 	.probe		= pwm_probe,
-	.remove		= pwm_remove,
 	.id_table	= pwm_id_table,
 };
 
diff --git a/drivers/pwm/pwm-raspberrypi-poe.c b/drivers/pwm/pwm-raspberrypi-poe.c
index 043fc32e8be8..579a15240e0a 100644
--- a/drivers/pwm/pwm-raspberrypi-poe.c
+++ b/drivers/pwm/pwm-raspberrypi-poe.c
@@ -166,8 +166,6 @@ static int raspberrypi_pwm_probe(struct platform_device *pdev)
 	rpipwm->chip.base = -1;
 	rpipwm->chip.npwm = RASPBERRYPI_FIRMWARE_PWM_NUM;
 
-	platform_set_drvdata(pdev, rpipwm);
-
 	ret = raspberrypi_pwm_get_property(rpipwm->firmware, RPI_PWM_CUR_DUTY_REG,
 					   &rpipwm->duty_cycle);
 	if (ret) {
@@ -175,14 +173,7 @@ static int raspberrypi_pwm_probe(struct platform_device *pdev)
 		return ret;
 	}
 
-	return pwmchip_add(&rpipwm->chip);
-}
-
-static int raspberrypi_pwm_remove(struct platform_device *pdev)
-{
-	struct raspberrypi_pwm *rpipwm = platform_get_drvdata(pdev);
-
-	return pwmchip_remove(&rpipwm->chip);
+	return devm_pwmchip_add(dev, &rpipwm->chip);
 }
 
 static const struct of_device_id raspberrypi_pwm_of_match[] = {
@@ -197,7 +188,6 @@ static struct platform_driver raspberrypi_pwm_driver = {
 		.of_match_table = raspberrypi_pwm_of_match,
 	},
 	.probe = raspberrypi_pwm_probe,
-	.remove = raspberrypi_pwm_remove,
 };
 module_platform_driver(raspberrypi_pwm_driver);
 
diff --git a/drivers/pwm/pwm-rcar.c b/drivers/pwm/pwm-rcar.c
index 9daca0c772c7..b437192380e2 100644
--- a/drivers/pwm/pwm-rcar.c
+++ b/drivers/pwm/pwm-rcar.c
@@ -241,13 +241,12 @@ static int rcar_pwm_probe(struct platform_device *pdev)
 static int rcar_pwm_remove(struct platform_device *pdev)
 {
 	struct rcar_pwm_chip *rcar_pwm = platform_get_drvdata(pdev);
-	int ret;
 
-	ret = pwmchip_remove(&rcar_pwm->chip);
+	pwmchip_remove(&rcar_pwm->chip);
 
 	pm_runtime_disable(&pdev->dev);
 
-	return ret;
+	return 0;
 }
 
 static const struct of_device_id rcar_pwm_of_table[] = {
diff --git a/drivers/pwm/pwm-renesas-tpu.c b/drivers/pwm/pwm-renesas-tpu.c
index b853e7942605..4381df90a527 100644
--- a/drivers/pwm/pwm-renesas-tpu.c
+++ b/drivers/pwm/pwm-renesas-tpu.c
@@ -425,13 +425,12 @@ static int tpu_probe(struct platform_device *pdev)
 static int tpu_remove(struct platform_device *pdev)
 {
 	struct tpu_device *tpu = platform_get_drvdata(pdev);
-	int ret;
 
-	ret = pwmchip_remove(&tpu->chip);
+	pwmchip_remove(&tpu->chip);
 
 	pm_runtime_disable(&pdev->dev);
 
-	return ret;
+	return 0;
 }
 
 #ifdef CONFIG_OF
diff --git a/drivers/pwm/pwm-rockchip.c b/drivers/pwm/pwm-rockchip.c
index cbe900877724..f3647b317152 100644
--- a/drivers/pwm/pwm-rockchip.c
+++ b/drivers/pwm/pwm-rockchip.c
@@ -384,24 +384,12 @@ static int rockchip_pwm_remove(struct platform_device *pdev)
 {
 	struct rockchip_pwm_chip *pc = platform_get_drvdata(pdev);
 
-	/*
-	 * Disable the PWM clk before unpreparing it if the PWM device is still
-	 * running. This should only happen when the last PWM user left it
-	 * enabled, or when nobody requested a PWM that was previously enabled
-	 * by the bootloader.
-	 *
-	 * FIXME: Maybe the core should disable all PWM devices in
-	 * pwmchip_remove(). In this case we'd only have to call
-	 * clk_unprepare() after pwmchip_remove().
-	 *
-	 */
-	if (pwm_is_enabled(pc->chip.pwms))
-		clk_disable(pc->clk);
+	pwmchip_remove(&pc->chip);
 
 	clk_unprepare(pc->pclk);
 	clk_unprepare(pc->clk);
 
-	return pwmchip_remove(&pc->chip);
+	return 0;
 }
 
 static struct platform_driver rockchip_pwm_driver = {
diff --git a/drivers/pwm/pwm-samsung.c b/drivers/pwm/pwm-samsung.c
index f6c528f02d43..dd94c4312a0c 100644
--- a/drivers/pwm/pwm-samsung.c
+++ b/drivers/pwm/pwm-samsung.c
@@ -580,11 +580,8 @@ static int pwm_samsung_probe(struct platform_device *pdev)
 static int pwm_samsung_remove(struct platform_device *pdev)
 {
 	struct samsung_pwm_chip *chip = platform_get_drvdata(pdev);
-	int ret;
 
-	ret = pwmchip_remove(&chip->chip);
-	if (ret < 0)
-		return ret;
+	pwmchip_remove(&chip->chip);
 
 	clk_disable_unprepare(chip->base_clk);
 
diff --git a/drivers/pwm/pwm-sifive.c b/drivers/pwm/pwm-sifive.c
index 420edc4aa94a..253c4a17d255 100644
--- a/drivers/pwm/pwm-sifive.c
+++ b/drivers/pwm/pwm-sifive.c
@@ -291,7 +291,7 @@ static int pwm_sifive_remove(struct platform_device *dev)
 	struct pwm_sifive_ddata *ddata = platform_get_drvdata(dev);
 	bool is_enabled = false;
 	struct pwm_device *pwm;
-	int ret, ch;
+	int ch;
 
 	for (ch = 0; ch < ddata->chip.npwm; ch++) {
 		pwm = &ddata->chip.pwms[ch];
@@ -304,10 +304,10 @@ static int pwm_sifive_remove(struct platform_device *dev)
 		clk_disable(ddata->clk);
 
 	clk_disable_unprepare(ddata->clk);
-	ret = pwmchip_remove(&ddata->chip);
+	pwmchip_remove(&ddata->chip);
 	clk_notifier_unregister(ddata->clk, &ddata->notifier);
 
-	return ret;
+	return 0;
 }
 
 static const struct of_device_id pwm_sifive_of_match[] = {
diff --git a/drivers/pwm/pwm-sl28cpld.c b/drivers/pwm/pwm-sl28cpld.c
index 7a69c1a0c060..589aeaaa6ac8 100644
--- a/drivers/pwm/pwm-sl28cpld.c
+++ b/drivers/pwm/pwm-sl28cpld.c
@@ -231,9 +231,7 @@ static int sl28cpld_pwm_probe(struct platform_device *pdev)
 	chip->ops = &sl28cpld_pwm_ops;
 	chip->npwm = 1;
 
-	platform_set_drvdata(pdev, priv);
-
-	ret = pwmchip_add(&priv->pwm_chip);
+	ret = devm_pwmchip_add(&pdev->dev, &priv->pwm_chip);
 	if (ret) {
 		dev_err(&pdev->dev, "failed to add PWM chip (%pe)",
 			ERR_PTR(ret));
@@ -243,13 +241,6 @@ static int sl28cpld_pwm_probe(struct platform_device *pdev)
 	return 0;
 }
 
-static int sl28cpld_pwm_remove(struct platform_device *pdev)
-{
-	struct sl28cpld_pwm *priv = platform_get_drvdata(pdev);
-
-	return pwmchip_remove(&priv->pwm_chip);
-}
-
 static const struct of_device_id sl28cpld_pwm_of_match[] = {
 	{ .compatible = "kontron,sl28cpld-pwm" },
 	{}
@@ -258,7 +249,6 @@ MODULE_DEVICE_TABLE(of, sl28cpld_pwm_of_match);
 
 static struct platform_driver sl28cpld_pwm_driver = {
 	.probe = sl28cpld_pwm_probe,
-	.remove	= sl28cpld_pwm_remove,
 	.driver = {
 		.name = "sl28cpld-pwm",
 		.of_match_table = sl28cpld_pwm_of_match,
diff --git a/drivers/pwm/pwm-stm32-lp.c b/drivers/pwm/pwm-stm32-lp.c
index 93dd03618465..3115abb3f52a 100644
--- a/drivers/pwm/pwm-stm32-lp.c
+++ b/drivers/pwm/pwm-stm32-lp.c
@@ -209,7 +209,7 @@ static int stm32_pwm_lp_probe(struct platform_device *pdev)
 	priv->chip.ops = &stm32_pwm_lp_ops;
 	priv->chip.npwm = 1;
 
-	ret = pwmchip_add(&priv->chip);
+	ret = devm_pwmchip_add(&pdev->dev, &priv->chip);
 	if (ret < 0)
 		return ret;
 
@@ -218,15 +218,6 @@ static int stm32_pwm_lp_probe(struct platform_device *pdev)
 	return 0;
 }
 
-static int stm32_pwm_lp_remove(struct platform_device *pdev)
-{
-	struct stm32_pwm_lp *priv = platform_get_drvdata(pdev);
-
-	pwm_disable(&priv->chip.pwms[0]);
-
-	return pwmchip_remove(&priv->chip);
-}
-
 static int __maybe_unused stm32_pwm_lp_suspend(struct device *dev)
 {
 	struct stm32_pwm_lp *priv = dev_get_drvdata(dev);
@@ -258,7 +249,6 @@ MODULE_DEVICE_TABLE(of, stm32_pwm_lp_of_match);
 
 static struct platform_driver stm32_pwm_lp_driver = {
 	.probe	= stm32_pwm_lp_probe,
-	.remove	= stm32_pwm_lp_remove,
 	.driver	= {
 		.name = "stm32-pwm-lp",
 		.of_match_table = of_match_ptr(stm32_pwm_lp_of_match),
diff --git a/drivers/pwm/pwm-sun4i.c b/drivers/pwm/pwm-sun4i.c
index c952604e91f3..91ca67651abd 100644
--- a/drivers/pwm/pwm-sun4i.c
+++ b/drivers/pwm/pwm-sun4i.c
@@ -484,11 +484,8 @@ err_bus:
 static int sun4i_pwm_remove(struct platform_device *pdev)
 {
 	struct sun4i_pwm_chip *pwm = platform_get_drvdata(pdev);
-	int ret;
 
-	ret = pwmchip_remove(&pwm->chip);
-	if (ret)
-		return ret;
+	pwmchip_remove(&pwm->chip);
 
 	clk_disable_unprepare(pwm->bus_clk);
 	reset_control_assert(pwm->rst);
diff --git a/drivers/pwm/pwm-tiecap.c b/drivers/pwm/pwm-tiecap.c
index 35eb19a5a0d1..4701f0c9b921 100644
--- a/drivers/pwm/pwm-tiecap.c
+++ b/drivers/pwm/pwm-tiecap.c
@@ -253,7 +253,7 @@ static int ecap_pwm_probe(struct platform_device *pdev)
 	if (IS_ERR(pc->mmio_base))
 		return PTR_ERR(pc->mmio_base);
 
-	ret = pwmchip_add(&pc->chip);
+	ret = devm_pwmchip_add(&pdev->dev, &pc->chip);
 	if (ret < 0) {
 		dev_err(&pdev->dev, "pwmchip_add() failed: %d\n", ret);
 		return ret;
@@ -267,11 +267,9 @@ static int ecap_pwm_probe(struct platform_device *pdev)
 
 static int ecap_pwm_remove(struct platform_device *pdev)
 {
-	struct ecap_pwm_chip *pc = platform_get_drvdata(pdev);
-
 	pm_runtime_disable(&pdev->dev);
 
-	return pwmchip_remove(&pc->chip);
+	return 0;
 }
 
 #ifdef CONFIG_PM_SLEEP
diff --git a/drivers/pwm/pwm-tiehrpwm.c b/drivers/pwm/pwm-tiehrpwm.c
index 17909fa53211..5b723a48c5f1 100644
--- a/drivers/pwm/pwm-tiehrpwm.c
+++ b/drivers/pwm/pwm-tiehrpwm.c
@@ -485,11 +485,13 @@ static int ehrpwm_pwm_remove(struct platform_device *pdev)
 {
 	struct ehrpwm_pwm_chip *pc = platform_get_drvdata(pdev);
 
+	pwmchip_remove(&pc->chip);
+
 	clk_unprepare(pc->tbclk);
 
 	pm_runtime_disable(&pdev->dev);
 
-	return pwmchip_remove(&pc->chip);
+	return 0;
 }
 
 #ifdef CONFIG_PM_SLEEP
diff --git a/drivers/pwm/pwm-twl-led.c b/drivers/pwm/pwm-twl-led.c
index 6c8df5f4e87d..49d9f7a78012 100644
--- a/drivers/pwm/pwm-twl-led.c
+++ b/drivers/pwm/pwm-twl-led.c
@@ -276,7 +276,6 @@ static const struct pwm_ops twl6030_pwmled_ops = {
 static int twl_pwmled_probe(struct platform_device *pdev)
 {
 	struct twl_pwmled_chip *twl;
-	int ret;
 
 	twl = devm_kzalloc(&pdev->dev, sizeof(*twl), GFP_KERNEL);
 	if (!twl)
@@ -294,20 +293,7 @@ static int twl_pwmled_probe(struct platform_device *pdev)
 
 	mutex_init(&twl->mutex);
 
-	ret = pwmchip_add(&twl->chip);
-	if (ret < 0)
-		return ret;
-
-	platform_set_drvdata(pdev, twl);
-
-	return 0;
-}
-
-static int twl_pwmled_remove(struct platform_device *pdev)
-{
-	struct twl_pwmled_chip *twl = platform_get_drvdata(pdev);
-
-	return pwmchip_remove(&twl->chip);
+	return devm_pwmchip_add(&pdev->dev, &twl->chip);
 }
 
 #ifdef CONFIG_OF
@@ -325,7 +311,6 @@ static struct platform_driver twl_pwmled_driver = {
 		.of_match_table = of_match_ptr(twl_pwmled_of_match),
 	},
 	.probe = twl_pwmled_probe,
-	.remove = twl_pwmled_remove,
 };
 module_platform_driver(twl_pwmled_driver);
 
diff --git a/drivers/pwm/pwm-twl.c b/drivers/pwm/pwm-twl.c
index e83a826bf621..203194f2c92e 100644
--- a/drivers/pwm/pwm-twl.c
+++ b/drivers/pwm/pwm-twl.c
@@ -298,7 +298,6 @@ static const struct pwm_ops twl6030_pwm_ops = {
 static int twl_pwm_probe(struct platform_device *pdev)
 {
 	struct twl_pwm_chip *twl;
-	int ret;
 
 	twl = devm_kzalloc(&pdev->dev, sizeof(*twl), GFP_KERNEL);
 	if (!twl)
@@ -314,20 +313,7 @@ static int twl_pwm_probe(struct platform_device *pdev)
 
 	mutex_init(&twl->mutex);
 
-	ret = pwmchip_add(&twl->chip);
-	if (ret < 0)
-		return ret;
-
-	platform_set_drvdata(pdev, twl);
-
-	return 0;
-}
-
-static int twl_pwm_remove(struct platform_device *pdev)
-{
-	struct twl_pwm_chip *twl = platform_get_drvdata(pdev);
-
-	return pwmchip_remove(&twl->chip);
+	return devm_pwmchip_add(&pdev->dev, &twl->chip);
 }
 
 #ifdef CONFIG_OF
@@ -345,7 +331,6 @@ static struct platform_driver twl_pwm_driver = {
 		.of_match_table = of_match_ptr(twl_pwm_of_match),
 	},
 	.probe = twl_pwm_probe,
-	.remove = twl_pwm_remove,
 };
 module_platform_driver(twl_pwm_driver);
 
diff --git a/drivers/remoteproc/qcom_q6v5_pas.c b/drivers/remoteproc/qcom_q6v5_pas.c
index a79bee901e9b..401b1ec90785 100644
--- a/drivers/remoteproc/qcom_q6v5_pas.c
+++ b/drivers/remoteproc/qcom_q6v5_pas.c
@@ -833,6 +833,7 @@ static const struct of_device_id adsp_of_match[] = {
 	{ .compatible = "qcom,sc8180x-adsp-pas", .data = &sm8150_adsp_resource},
 	{ .compatible = "qcom,sc8180x-cdsp-pas", .data = &sm8150_cdsp_resource},
 	{ .compatible = "qcom,sc8180x-mpss-pas", .data = &sc8180x_mpss_resource},
+	{ .compatible = "qcom,sdm660-adsp-pas", .data = &adsp_resource_init},
 	{ .compatible = "qcom,sdm845-adsp-pas", .data = &adsp_resource_init},
 	{ .compatible = "qcom,sdm845-cdsp-pas", .data = &cdsp_resource_init},
 	{ .compatible = "qcom,sdx55-mpss-pas", .data = &sdx55_mpss_resource},
diff --git a/drivers/remoteproc/qcom_wcnss.c b/drivers/remoteproc/qcom_wcnss.c
index f1cbc6b2edbb..ebadc6c08e11 100644
--- a/drivers/remoteproc/qcom_wcnss.c
+++ b/drivers/remoteproc/qcom_wcnss.c
@@ -142,18 +142,6 @@ static const struct wcnss_data pronto_v2_data = {
 	.num_vregs = 1,
 };
 
-void qcom_wcnss_assign_iris(struct qcom_wcnss *wcnss,
-			    struct qcom_iris *iris,
-			    bool use_48mhz_xo)
-{
-	mutex_lock(&wcnss->iris_lock);
-
-	wcnss->iris = iris;
-	wcnss->use_48mhz_xo = use_48mhz_xo;
-
-	mutex_unlock(&wcnss->iris_lock);
-}
-
 static int wcnss_load(struct rproc *rproc, const struct firmware *fw)
 {
 	struct qcom_wcnss *wcnss = (struct qcom_wcnss *)rproc->priv;
@@ -639,12 +627,20 @@ static int wcnss_probe(struct platform_device *pdev)
 		goto detach_pds;
 	}
 
+	wcnss->iris = qcom_iris_probe(&pdev->dev, &wcnss->use_48mhz_xo);
+	if (IS_ERR(wcnss->iris)) {
+		ret = PTR_ERR(wcnss->iris);
+		goto detach_pds;
+	}
+
 	ret = rproc_add(rproc);
 	if (ret)
-		goto detach_pds;
+		goto remove_iris;
 
-	return of_platform_populate(pdev->dev.of_node, NULL, NULL, &pdev->dev);
+	return 0;
 
+remove_iris:
+	qcom_iris_remove(wcnss->iris);
 detach_pds:
 	wcnss_release_pds(wcnss);
 free_rproc:
@@ -657,7 +653,7 @@ static int wcnss_remove(struct platform_device *pdev)
 {
 	struct qcom_wcnss *wcnss = platform_get_drvdata(pdev);
 
-	of_platform_depopulate(&pdev->dev);
+	qcom_iris_remove(wcnss->iris);
 
 	rproc_del(wcnss->rproc);
 
@@ -686,28 +682,7 @@ static struct platform_driver wcnss_driver = {
 	},
 };
 
-static int __init wcnss_init(void)
-{
-	int ret;
-
-	ret = platform_driver_register(&wcnss_driver);
-	if (ret)
-		return ret;
-
-	ret = platform_driver_register(&qcom_iris_driver);
-	if (ret)
-		platform_driver_unregister(&wcnss_driver);
-
-	return ret;
-}
-module_init(wcnss_init);
-
-static void __exit wcnss_exit(void)
-{
-	platform_driver_unregister(&qcom_iris_driver);
-	platform_driver_unregister(&wcnss_driver);
-}
-module_exit(wcnss_exit);
+module_platform_driver(wcnss_driver);
 
 MODULE_DESCRIPTION("Qualcomm Peripheral Image Loader for Wireless Subsystem");
 MODULE_LICENSE("GPL v2");
diff --git a/drivers/remoteproc/qcom_wcnss.h b/drivers/remoteproc/qcom_wcnss.h
index 62c8682d0a92..6d01ee6afa7f 100644
--- a/drivers/remoteproc/qcom_wcnss.h
+++ b/drivers/remoteproc/qcom_wcnss.h
@@ -17,9 +17,9 @@ struct wcnss_vreg_info {
 	bool super_turbo;
 };
 
+struct qcom_iris *qcom_iris_probe(struct device *parent, bool *use_48mhz_xo);
+void qcom_iris_remove(struct qcom_iris *iris);
 int qcom_iris_enable(struct qcom_iris *iris);
 void qcom_iris_disable(struct qcom_iris *iris);
 
-void qcom_wcnss_assign_iris(struct qcom_wcnss *wcnss, struct qcom_iris *iris, bool use_48mhz_xo);
-
 #endif
diff --git a/drivers/remoteproc/qcom_wcnss_iris.c b/drivers/remoteproc/qcom_wcnss_iris.c
index 169acd305ae3..09720ddddc85 100644
--- a/drivers/remoteproc/qcom_wcnss_iris.c
+++ b/drivers/remoteproc/qcom_wcnss_iris.c
@@ -17,7 +17,7 @@
 #include "qcom_wcnss.h"
 
 struct qcom_iris {
-	struct device *dev;
+	struct device dev;
 
 	struct clk *xo_clk;
 
@@ -75,7 +75,7 @@ int qcom_iris_enable(struct qcom_iris *iris)
 
 	ret = clk_prepare_enable(iris->xo_clk);
 	if (ret) {
-		dev_err(iris->dev, "failed to enable xo clk\n");
+		dev_err(&iris->dev, "failed to enable xo clk\n");
 		goto disable_regulators;
 	}
 
@@ -93,43 +93,90 @@ void qcom_iris_disable(struct qcom_iris *iris)
 	regulator_bulk_disable(iris->num_vregs, iris->vregs);
 }
 
-static int qcom_iris_probe(struct platform_device *pdev)
+static const struct of_device_id iris_of_match[] = {
+	{ .compatible = "qcom,wcn3620", .data = &wcn3620_data },
+	{ .compatible = "qcom,wcn3660", .data = &wcn3660_data },
+	{ .compatible = "qcom,wcn3660b", .data = &wcn3680_data },
+	{ .compatible = "qcom,wcn3680", .data = &wcn3680_data },
+	{}
+};
+
+static void qcom_iris_release(struct device *dev)
+{
+	struct qcom_iris *iris = container_of(dev, struct qcom_iris, dev);
+
+	of_node_put(iris->dev.of_node);
+	kfree(iris);
+}
+
+struct qcom_iris *qcom_iris_probe(struct device *parent, bool *use_48mhz_xo)
 {
+	const struct of_device_id *match;
 	const struct iris_data *data;
-	struct qcom_wcnss *wcnss;
+	struct device_node *of_node;
 	struct qcom_iris *iris;
 	int ret;
 	int i;
 
-	iris = devm_kzalloc(&pdev->dev, sizeof(struct qcom_iris), GFP_KERNEL);
-	if (!iris)
-		return -ENOMEM;
+	of_node = of_get_child_by_name(parent->of_node, "iris");
+	if (!of_node) {
+		dev_err(parent, "No child node \"iris\" found\n");
+		return ERR_PTR(-EINVAL);
+	}
+
+	iris = kzalloc(sizeof(*iris), GFP_KERNEL);
+	if (!iris) {
+		of_node_put(of_node);
+		return ERR_PTR(-ENOMEM);
+	}
+
+	device_initialize(&iris->dev);
+	iris->dev.parent = parent;
+	iris->dev.release = qcom_iris_release;
+	iris->dev.of_node = of_node;
+
+	dev_set_name(&iris->dev, "%s.iris", dev_name(parent));
+
+	ret = device_add(&iris->dev);
+	if (ret) {
+		put_device(&iris->dev);
+		return ERR_PTR(ret);
+	}
+
+	match = of_match_device(iris_of_match, &iris->dev);
+	if (!match) {
+		dev_err(&iris->dev, "no matching compatible for iris\n");
+		ret = -EINVAL;
+		goto err_device_del;
+	}
 
-	data = of_device_get_match_data(&pdev->dev);
-	wcnss = dev_get_drvdata(pdev->dev.parent);
+	data = match->data;
 
-	iris->xo_clk = devm_clk_get(&pdev->dev, "xo");
+	iris->xo_clk = devm_clk_get(&iris->dev, "xo");
 	if (IS_ERR(iris->xo_clk)) {
-		if (PTR_ERR(iris->xo_clk) != -EPROBE_DEFER)
-			dev_err(&pdev->dev, "failed to acquire xo clk\n");
-		return PTR_ERR(iris->xo_clk);
+		ret = PTR_ERR(iris->xo_clk);
+		if (ret != -EPROBE_DEFER)
+			dev_err(&iris->dev, "failed to acquire xo clk\n");
+		goto err_device_del;
 	}
 
 	iris->num_vregs = data->num_vregs;
-	iris->vregs = devm_kcalloc(&pdev->dev,
+	iris->vregs = devm_kcalloc(&iris->dev,
 				   iris->num_vregs,
 				   sizeof(struct regulator_bulk_data),
 				   GFP_KERNEL);
-	if (!iris->vregs)
-		return -ENOMEM;
+	if (!iris->vregs) {
+		ret = -ENOMEM;
+		goto err_device_del;
+	}
 
 	for (i = 0; i < iris->num_vregs; i++)
 		iris->vregs[i].supply = data->vregs[i].name;
 
-	ret = devm_regulator_bulk_get(&pdev->dev, iris->num_vregs, iris->vregs);
+	ret = devm_regulator_bulk_get(&iris->dev, iris->num_vregs, iris->vregs);
 	if (ret) {
-		dev_err(&pdev->dev, "failed to get regulators\n");
-		return ret;
+		dev_err(&iris->dev, "failed to get regulators\n");
+		goto err_device_del;
 	}
 
 	for (i = 0; i < iris->num_vregs; i++) {
@@ -143,34 +190,17 @@ static int qcom_iris_probe(struct platform_device *pdev)
 					   data->vregs[i].load_uA);
 	}
 
-	qcom_wcnss_assign_iris(wcnss, iris, data->use_48mhz_xo);
-
-	return 0;
-}
+	*use_48mhz_xo = data->use_48mhz_xo;
 
-static int qcom_iris_remove(struct platform_device *pdev)
-{
-	struct qcom_wcnss *wcnss = dev_get_drvdata(pdev->dev.parent);
+	return iris;
 
-	qcom_wcnss_assign_iris(wcnss, NULL, false);
+err_device_del:
+	device_del(&iris->dev);
 
-	return 0;
+	return ERR_PTR(ret);
 }
 
-static const struct of_device_id iris_of_match[] = {
-	{ .compatible = "qcom,wcn3620", .data = &wcn3620_data },
-	{ .compatible = "qcom,wcn3660", .data = &wcn3660_data },
-	{ .compatible = "qcom,wcn3660b", .data = &wcn3680_data },
-	{ .compatible = "qcom,wcn3680", .data = &wcn3680_data },
-	{}
-};
-MODULE_DEVICE_TABLE(of, iris_of_match);
-
-struct platform_driver qcom_iris_driver = {
-	.probe = qcom_iris_probe,
-	.remove = qcom_iris_remove,
-	.driver = {
-		.name = "qcom-iris",
-		.of_match_table = iris_of_match,
-	},
-};
+void qcom_iris_remove(struct qcom_iris *iris)
+{
+	device_del(&iris->dev);
+}
diff --git a/drivers/remoteproc/remoteproc_core.c b/drivers/remoteproc/remoteproc_core.c
index 7de5905d276a..502b6604b757 100644
--- a/drivers/remoteproc/remoteproc_core.c
+++ b/drivers/remoteproc/remoteproc_core.c
@@ -2750,8 +2750,8 @@ void rproc_report_crash(struct rproc *rproc, enum rproc_crash_type type)
 	dev_err(&rproc->dev, "crash detected in %s: type %s\n",
 		rproc->name, rproc_crash_to_string(type));
 
-	/* create a new task to handle the error */
-	schedule_work(&rproc->crash_handler);
+	/* Have a worker handle the error; ensure system is not suspended */
+	queue_work(system_freezable_wq, &rproc->crash_handler);
 }
 EXPORT_SYMBOL(rproc_report_crash);
 
diff --git a/drivers/remoteproc/remoteproc_elf_helpers.h b/drivers/remoteproc/remoteproc_elf_helpers.h
index 26404e68e17a..e6de53a5000c 100644
--- a/drivers/remoteproc/remoteproc_elf_helpers.h
+++ b/drivers/remoteproc/remoteproc_elf_helpers.h
@@ -15,7 +15,7 @@
  * fw_elf_get_class - Get elf class
  * @fw: the ELF firmware image
  *
- * Note that we use and elf32_hdr to access the class since the start of the
+ * Note that we use elf32_hdr to access the class since the start of the
  * struct is the same for both elf class
  *
  * Return: elf class of the firmware
diff --git a/drivers/rtc/Kconfig b/drivers/rtc/Kconfig
index 12153d5801ce..e1bc5214494e 100644
--- a/drivers/rtc/Kconfig
+++ b/drivers/rtc/Kconfig
@@ -75,6 +75,15 @@ config RTC_DEBUG
 	  Say yes here to enable debugging support in the RTC framework
 	  and individual RTC drivers.
 
+config RTC_LIB_KUNIT_TEST
+	tristate "KUnit test for RTC lib functions" if !KUNIT_ALL_TESTS
+	depends on KUNIT
+	default KUNIT_ALL_TESTS
+	help
+	  Enable this option to test RTC library functions.
+
+	  If unsure, say N.
+
 config RTC_NVMEM
 	bool "RTC non volatile storage support"
 	select NVMEM
@@ -624,6 +633,7 @@ config RTC_DRV_FM3130
 
 config RTC_DRV_RX8010
 	tristate "Epson RX8010SJ"
+	select REGMAP_I2C
 	help
 	  If you say yes here you get support for the Epson RX8010SJ RTC
 	  chip.
diff --git a/drivers/rtc/Makefile b/drivers/rtc/Makefile
index 2dd0dd956b0e..5ceeafe4d5b2 100644
--- a/drivers/rtc/Makefile
+++ b/drivers/rtc/Makefile
@@ -15,6 +15,8 @@ rtc-core-$(CONFIG_RTC_INTF_DEV)		+= dev.o
 rtc-core-$(CONFIG_RTC_INTF_PROC)	+= proc.o
 rtc-core-$(CONFIG_RTC_INTF_SYSFS)	+= sysfs.o
 
+obj-$(CONFIG_RTC_LIB_KUNIT_TEST)	+= lib_test.o
+
 # Keep the list ordered.
 
 obj-$(CONFIG_RTC_DRV_88PM80X)	+= rtc-88pm80x.o
diff --git a/drivers/rtc/lib.c b/drivers/rtc/lib.c
index 23284580df97..fe361652727a 100644
--- a/drivers/rtc/lib.c
+++ b/drivers/rtc/lib.c
@@ -6,6 +6,8 @@
  * Author: Alessandro Zummo <a.zummo@towertech.it>
  *
  * based on arch/arm/common/rtctime.c and other bits
+ *
+ * Author: Cassio Neri <cassio.neri@gmail.com> (rtc_time64_to_tm)
  */
 
 #include <linux/export.h>
@@ -22,8 +24,6 @@ static const unsigned short rtc_ydays[2][13] = {
 	{ 0, 31, 60, 91, 121, 152, 182, 213, 244, 274, 305, 335, 366 }
 };
 
-#define LEAPS_THRU_END_OF(y) ((y) / 4 - (y) / 100 + (y) / 400)
-
 /*
  * The number of days in the month.
  */
@@ -42,42 +42,95 @@ int rtc_year_days(unsigned int day, unsigned int month, unsigned int year)
 }
 EXPORT_SYMBOL(rtc_year_days);
 
-/*
- * rtc_time64_to_tm - Converts time64_t to rtc_time.
- * Convert seconds since 01-01-1970 00:00:00 to Gregorian date.
+/**
+ * rtc_time64_to_tm - converts time64_t to rtc_time.
+ *
+ * @time:	The number of seconds since 01-01-1970 00:00:00.
+ *		(Must be positive.)
+ * @tm:		Pointer to the struct rtc_time.
  */
 void rtc_time64_to_tm(time64_t time, struct rtc_time *tm)
 {
-	unsigned int month, year, secs;
+	unsigned int secs;
 	int days;
 
+	u64 u64tmp;
+	u32 u32tmp, udays, century, day_of_century, year_of_century, year,
+		day_of_year, month, day;
+	bool is_Jan_or_Feb, is_leap_year;
+
 	/* time must be positive */
 	days = div_s64_rem(time, 86400, &secs);
 
 	/* day of the week, 1970-01-01 was a Thursday */
 	tm->tm_wday = (days + 4) % 7;
 
-	year = 1970 + days / 365;
-	days -= (year - 1970) * 365
-		+ LEAPS_THRU_END_OF(year - 1)
-		- LEAPS_THRU_END_OF(1970 - 1);
-	while (days < 0) {
-		year -= 1;
-		days += 365 + is_leap_year(year);
-	}
-	tm->tm_year = year - 1900;
-	tm->tm_yday = days + 1;
-
-	for (month = 0; month < 11; month++) {
-		int newdays;
-
-		newdays = days - rtc_month_days(month, year);
-		if (newdays < 0)
-			break;
-		days = newdays;
-	}
-	tm->tm_mon = month;
-	tm->tm_mday = days + 1;
+	/*
+	 * The following algorithm is, basically, Proposition 6.3 of Neri
+	 * and Schneider [1]. In a few words: it works on the computational
+	 * (fictitious) calendar where the year starts in March, month = 2
+	 * (*), and finishes in February, month = 13. This calendar is
+	 * mathematically convenient because the day of the year does not
+	 * depend on whether the year is leap or not. For instance:
+	 *
+	 * March 1st		0-th day of the year;
+	 * ...
+	 * April 1st		31-st day of the year;
+	 * ...
+	 * January 1st		306-th day of the year; (Important!)
+	 * ...
+	 * February 28th	364-th day of the year;
+	 * February 29th	365-th day of the year (if it exists).
+	 *
+	 * After having worked out the date in the computational calendar
+	 * (using just arithmetics) it's easy to convert it to the
+	 * corresponding date in the Gregorian calendar.
+	 *
+	 * [1] "Euclidean Affine Functions and Applications to Calendar
+	 * Algorithms". https://arxiv.org/abs/2102.06959
+	 *
+	 * (*) The numbering of months follows rtc_time more closely and
+	 * thus, is slightly different from [1].
+	 */
+
+	udays		= ((u32) days) + 719468;
+
+	u32tmp		= 4 * udays + 3;
+	century		= u32tmp / 146097;
+	day_of_century	= u32tmp % 146097 / 4;
+
+	u32tmp		= 4 * day_of_century + 3;
+	u64tmp		= 2939745ULL * u32tmp;
+	year_of_century	= upper_32_bits(u64tmp);
+	day_of_year	= lower_32_bits(u64tmp) / 2939745 / 4;
+
+	year		= 100 * century + year_of_century;
+	is_leap_year	= year_of_century != 0 ?
+		year_of_century % 4 == 0 : century % 4 == 0;
+
+	u32tmp		= 2141 * day_of_year + 132377;
+	month		= u32tmp >> 16;
+	day		= ((u16) u32tmp) / 2141;
+
+	/*
+	 * Recall that January 01 is the 306-th day of the year in the
+	 * computational (not Gregorian) calendar.
+	 */
+	is_Jan_or_Feb	= day_of_year >= 306;
+
+	/* Converts to the Gregorian calendar. */
+	year		= year + is_Jan_or_Feb;
+	month		= is_Jan_or_Feb ? month - 12 : month;
+	day		= day + 1;
+
+	day_of_year	= is_Jan_or_Feb ?
+		day_of_year - 306 : day_of_year + 31 + 28 + is_leap_year;
+
+	/* Converts to rtc_time's format. */
+	tm->tm_year	= (int) (year - 1900);
+	tm->tm_mon	= (int) month;
+	tm->tm_mday	= (int) day;
+	tm->tm_yday	= (int) day_of_year + 1;
 
 	tm->tm_hour = secs / 3600;
 	secs -= tm->tm_hour * 3600;
diff --git a/drivers/rtc/lib_test.c b/drivers/rtc/lib_test.c
new file mode 100644
index 000000000000..d5caf36c56cd
--- /dev/null
+++ b/drivers/rtc/lib_test.c
@@ -0,0 +1,81 @@
+// SPDX-License-Identifier: LGPL-2.1+
+
+#include <kunit/test.h>
+#include <linux/rtc.h>
+
+/*
+ * Advance a date by one day.
+ */
+static void advance_date(int *year, int *month, int *mday, int *yday)
+{
+	if (*mday != rtc_month_days(*month - 1, *year)) {
+		++*mday;
+		++*yday;
+		return;
+	}
+
+	*mday = 1;
+	if (*month != 12) {
+		++*month;
+		++*yday;
+		return;
+	}
+
+	*month = 1;
+	*yday  = 1;
+	++*year;
+}
+
+/*
+ * Checks every day in a 160000 years interval starting on 1970-01-01
+ * against the expected result.
+ */
+static void rtc_time64_to_tm_test_date_range(struct kunit *test)
+{
+	/*
+	 * 160000 years	= (160000 / 400) * 400 years
+	 *		= (160000 / 400) * 146097 days
+	 *		= (160000 / 400) * 146097 * 86400 seconds
+	 */
+	time64_t total_secs = ((time64_t) 160000) / 400 * 146097 * 86400;
+
+	int year	= 1970;
+	int month	= 1;
+	int mday	= 1;
+	int yday	= 1;
+
+	struct rtc_time result;
+	time64_t secs;
+	s64 days;
+
+	for (secs = 0; secs <= total_secs; secs += 86400) {
+
+		rtc_time64_to_tm(secs, &result);
+
+		days = div_s64(secs, 86400);
+
+		#define FAIL_MSG "%d/%02d/%02d (%2d) : %ld", \
+			year, month, mday, yday, days
+
+		KUNIT_ASSERT_EQ_MSG(test, year - 1900, result.tm_year, FAIL_MSG);
+		KUNIT_ASSERT_EQ_MSG(test, month - 1, result.tm_mon, FAIL_MSG);
+		KUNIT_ASSERT_EQ_MSG(test, mday, result.tm_mday, FAIL_MSG);
+		KUNIT_ASSERT_EQ_MSG(test, yday, result.tm_yday, FAIL_MSG);
+
+		advance_date(&year, &month, &mday, &yday);
+	}
+}
+
+static struct kunit_case rtc_lib_test_cases[] = {
+	KUNIT_CASE(rtc_time64_to_tm_test_date_range),
+	{}
+};
+
+static struct kunit_suite rtc_lib_test_suite = {
+	.name = "rtc_lib_test_cases",
+	.test_cases = rtc_lib_test_cases,
+};
+
+kunit_test_suite(rtc_lib_test_suite);
+
+MODULE_LICENSE("GPL");
diff --git a/drivers/rtc/rtc-cmos.c b/drivers/rtc/rtc-cmos.c
index 670fd8a2970e..eb15067a605e 100644
--- a/drivers/rtc/rtc-cmos.c
+++ b/drivers/rtc/rtc-cmos.c
@@ -229,19 +229,13 @@ static int cmos_read_time(struct device *dev, struct rtc_time *t)
 	if (!pm_trace_rtc_valid())
 		return -EIO;
 
-	/* REVISIT:  if the clock has a "century" register, use
-	 * that instead of the heuristic in mc146818_get_time().
-	 * That'll make Y3K compatility (year > 2070) easy!
-	 */
 	mc146818_get_time(t);
 	return 0;
 }
 
 static int cmos_set_time(struct device *dev, struct rtc_time *t)
 {
-	/* REVISIT:  set the "century" register if available
-	 *
-	 * NOTE: this ignores the issue whereby updating the seconds
+	/* NOTE: this ignores the issue whereby updating the seconds
 	 * takes effect exactly 500ms after we write the register.
 	 * (Also queueing and other delays before we get this far.)
 	 */
diff --git a/drivers/rtc/rtc-rx8025.c b/drivers/rtc/rtc-rx8025.c
index c914091819ba..d38aaf08108c 100644
--- a/drivers/rtc/rtc-rx8025.c
+++ b/drivers/rtc/rtc-rx8025.c
@@ -60,14 +60,23 @@
 #define RX8025_ADJ_DATA_MAX	62
 #define RX8025_ADJ_DATA_MIN	-62
 
+enum rx_model {
+	model_rx_unknown,
+	model_rx_8025,
+	model_rx_8035,
+	model_last
+};
+
 static const struct i2c_device_id rx8025_id[] = {
-	{ "rx8025", 0 },
+	{ "rx8025", model_rx_8025 },
+	{ "rx8035", model_rx_8035 },
 	{ }
 };
 MODULE_DEVICE_TABLE(i2c, rx8025_id);
 
 struct rx8025_data {
 	struct rtc_device *rtc;
+	enum rx_model model;
 	u8 ctrl1;
 };
 
@@ -100,10 +109,26 @@ static s32 rx8025_write_regs(const struct i2c_client *client,
 					      length, values);
 }
 
+static int rx8025_is_osc_stopped(enum rx_model model, int ctrl2)
+{
+	int xstp = ctrl2 & RX8025_BIT_CTRL2_XST;
+	/* XSTP bit has different polarity on RX-8025 vs RX-8035.
+	 * RX-8025: 0 == oscillator stopped
+	 * RX-8035: 1 == oscillator stopped
+	 */
+
+	if (model == model_rx_8025)
+		xstp = !xstp;
+
+	return xstp;
+}
+
 static int rx8025_check_validity(struct device *dev)
 {
 	struct i2c_client *client = to_i2c_client(dev);
+	struct rx8025_data *drvdata = dev_get_drvdata(dev);
 	int ctrl2;
+	int xstp;
 
 	ctrl2 = rx8025_read_reg(client, RX8025_REG_CTRL2);
 	if (ctrl2 < 0)
@@ -117,7 +142,8 @@ static int rx8025_check_validity(struct device *dev)
 		return -EINVAL;
 	}
 
-	if (!(ctrl2 & RX8025_BIT_CTRL2_XST)) {
+	xstp = rx8025_is_osc_stopped(drvdata->model, ctrl2);
+	if (xstp) {
 		dev_warn(dev, "crystal stopped, date is invalid\n");
 		return -EINVAL;
 	}
@@ -127,6 +153,7 @@ static int rx8025_check_validity(struct device *dev)
 
 static int rx8025_reset_validity(struct i2c_client *client)
 {
+	struct rx8025_data *drvdata = i2c_get_clientdata(client);
 	int ctrl2 = rx8025_read_reg(client, RX8025_REG_CTRL2);
 
 	if (ctrl2 < 0)
@@ -134,22 +161,28 @@ static int rx8025_reset_validity(struct i2c_client *client)
 
 	ctrl2 &= ~(RX8025_BIT_CTRL2_PON | RX8025_BIT_CTRL2_VDET);
 
+	if (drvdata->model == model_rx_8025)
+		ctrl2 |= RX8025_BIT_CTRL2_XST;
+	else
+		ctrl2 &= ~(RX8025_BIT_CTRL2_XST);
+
 	return rx8025_write_reg(client, RX8025_REG_CTRL2,
-				ctrl2 | RX8025_BIT_CTRL2_XST);
+				ctrl2);
 }
 
 static irqreturn_t rx8025_handle_irq(int irq, void *dev_id)
 {
 	struct i2c_client *client = dev_id;
 	struct rx8025_data *rx8025 = i2c_get_clientdata(client);
-	int status;
+	int status, xstp;
 
 	rtc_lock(rx8025->rtc);
 	status = rx8025_read_reg(client, RX8025_REG_CTRL2);
 	if (status < 0)
 		goto out;
 
-	if (!(status & RX8025_BIT_CTRL2_XST))
+	xstp = rx8025_is_osc_stopped(rx8025->model, status);
+	if (xstp)
 		dev_warn(&client->dev, "Oscillation stop was detected,"
 			 "you may have to readjust the clock\n");
 
@@ -519,6 +552,9 @@ static int rx8025_probe(struct i2c_client *client,
 
 	i2c_set_clientdata(client, rx8025);
 
+	if (id)
+		rx8025->model = id->driver_data;
+
 	err = rx8025_init_client(client);
 	if (err)
 		return err;
diff --git a/drivers/rtc/rtc-s5m.c b/drivers/rtc/rtc-s5m.c
index 6b56f8eacba6..fb9c6b709e13 100644
--- a/drivers/rtc/rtc-s5m.c
+++ b/drivers/rtc/rtc-s5m.c
@@ -204,15 +204,9 @@ static int s5m8767_tm_to_data(struct rtc_time *tm, u8 *data)
 	data[RTC_WEEKDAY] = 1 << tm->tm_wday;
 	data[RTC_DATE] = tm->tm_mday;
 	data[RTC_MONTH] = tm->tm_mon + 1;
-	data[RTC_YEAR1] = tm->tm_year > 100 ? (tm->tm_year - 100) : 0;
+	data[RTC_YEAR1] = tm->tm_year - 100;
 
-	if (tm->tm_year < 100) {
-		pr_err("RTC cannot handle the year %d\n",
-		       1900 + tm->tm_year);
-		return -EINVAL;
-	} else {
-		return 0;
-	}
+	return 0;
 }
 
 /*
@@ -786,29 +780,35 @@ static int s5m_rtc_probe(struct platform_device *pdev)
 	if (ret)
 		return ret;
 
-	device_init_wakeup(&pdev->dev, 1);
-
-	info->rtc_dev = devm_rtc_device_register(&pdev->dev, "s5m-rtc",
-						 &s5m_rtc_ops, THIS_MODULE);
-
+	info->rtc_dev = devm_rtc_allocate_device(&pdev->dev);
 	if (IS_ERR(info->rtc_dev))
 		return PTR_ERR(info->rtc_dev);
 
-	if (!info->irq) {
-		dev_info(&pdev->dev, "Alarm IRQ not available\n");
-		return 0;
+	info->rtc_dev->ops = &s5m_rtc_ops;
+
+	if (info->device_type == S5M8763X) {
+		info->rtc_dev->range_min = RTC_TIMESTAMP_BEGIN_0000;
+		info->rtc_dev->range_max = RTC_TIMESTAMP_END_9999;
+	} else {
+		info->rtc_dev->range_min = RTC_TIMESTAMP_BEGIN_2000;
+		info->rtc_dev->range_max = RTC_TIMESTAMP_END_2099;
 	}
 
-	ret = devm_request_threaded_irq(&pdev->dev, info->irq, NULL,
-					s5m_rtc_alarm_irq, 0, "rtc-alarm0",
-					info);
-	if (ret < 0) {
-		dev_err(&pdev->dev, "Failed to request alarm IRQ: %d: %d\n",
-			info->irq, ret);
-		return ret;
+	if (!info->irq) {
+		clear_bit(RTC_FEATURE_ALARM, info->rtc_dev->features);
+	} else {
+		ret = devm_request_threaded_irq(&pdev->dev, info->irq, NULL,
+						s5m_rtc_alarm_irq, 0, "rtc-alarm0",
+						info);
+		if (ret < 0) {
+			dev_err(&pdev->dev, "Failed to request alarm IRQ: %d: %d\n",
+				info->irq, ret);
+			return ret;
+		}
+		device_init_wakeup(&pdev->dev, 1);
 	}
 
-	return 0;
+	return devm_rtc_register_device(info->rtc_dev);
 }
 
 #ifdef CONFIG_PM_SLEEP
diff --git a/drivers/rtc/rtc-tps65910.c b/drivers/rtc/rtc-tps65910.c
index bc89c62ccb9b..75e4c2d777b9 100644
--- a/drivers/rtc/rtc-tps65910.c
+++ b/drivers/rtc/rtc-tps65910.c
@@ -467,6 +467,6 @@ static struct platform_driver tps65910_rtc_driver = {
 };
 
 module_platform_driver(tps65910_rtc_driver);
-MODULE_ALIAS("platform:rtc-tps65910");
+MODULE_ALIAS("platform:tps65910-rtc");
 MODULE_AUTHOR("Venu Byravarasu <vbyravarasu@nvidia.com>");
 MODULE_LICENSE("GPL");
diff --git a/drivers/s390/block/Kconfig b/drivers/s390/block/Kconfig
index 376f1efbbb86..d0416dbd0cd8 100644
--- a/drivers/s390/block/Kconfig
+++ b/drivers/s390/block/Kconfig
@@ -2,17 +2,6 @@
 comment "S/390 block device drivers"
 	depends on S390 && BLOCK
 
-config BLK_DEV_XPRAM
-	def_tristate m
-	prompt "XPRAM disk support"
-	depends on S390 && BLOCK
-	help
-	  Select this option if you want to use your expanded storage on S/390
-	  or zSeries as a disk.  This is useful as a _fast_ swap device if you
-	  want to access more than 2G of memory when running in 31 bit mode.
-	  This option is also available as a module which will be called
-	  xpram.  If unsure, say "N".
-
 config DCSSBLK
 	def_tristate m
 	select FS_DAX_LIMITED
diff --git a/drivers/s390/block/Makefile b/drivers/s390/block/Makefile
index 60c85cff556f..a0a54d2f063f 100644
--- a/drivers/s390/block/Makefile
+++ b/drivers/s390/block/Makefile
@@ -16,7 +16,6 @@ obj-$(CONFIG_DASD) += dasd_mod.o
 obj-$(CONFIG_DASD_DIAG) += dasd_diag_mod.o
 obj-$(CONFIG_DASD_ECKD) += dasd_eckd_mod.o
 obj-$(CONFIG_DASD_FBA)  += dasd_fba_mod.o
-obj-$(CONFIG_BLK_DEV_XPRAM) += xpram.o
 obj-$(CONFIG_DCSSBLK) += dcssblk.o
 
 scm_block-objs := scm_drv.o scm_blk.o
diff --git a/drivers/s390/block/xpram.c b/drivers/s390/block/xpram.c
deleted file mode 100644
index ce98fab4d43c..000000000000
--- a/drivers/s390/block/xpram.c
+++ /dev/null
@@ -1,416 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0
-/*
- * Xpram.c -- the S/390 expanded memory RAM-disk
- *           
- * significant parts of this code are based on
- * the sbull device driver presented in
- * A. Rubini: Linux Device Drivers
- *
- * Author of XPRAM specific coding: Reinhard Buendgen
- *                                  buendgen@de.ibm.com
- * Rewrite for 2.5: Martin Schwidefsky <schwidefsky@de.ibm.com>
- *
- * External interfaces:
- *   Interfaces to linux kernel
- *        xpram_setup: read kernel parameters
- *   Device specific file operations
- *        xpram_iotcl
- *        xpram_open
- *
- * "ad-hoc" partitioning:
- *    the expanded memory can be partitioned among several devices 
- *    (with different minors). The partitioning set up can be
- *    set by kernel or module parameters (int devs & int sizes[])
- *
- * Potential future improvements:
- *   generic hard disk support to replace ad-hoc partitioning
- */
-
-#define KMSG_COMPONENT "xpram"
-#define pr_fmt(fmt) KMSG_COMPONENT ": " fmt
-
-#include <linux/module.h>
-#include <linux/moduleparam.h>
-#include <linux/ctype.h>  /* isdigit, isxdigit */
-#include <linux/errno.h>
-#include <linux/init.h>
-#include <linux/blkdev.h>
-#include <linux/blkpg.h>
-#include <linux/hdreg.h>  /* HDIO_GETGEO */
-#include <linux/device.h>
-#include <linux/bio.h>
-#include <linux/gfp.h>
-#include <linux/uaccess.h>
-
-#define XPRAM_NAME	"xpram"
-#define XPRAM_DEVS	1	/* one partition */
-#define XPRAM_MAX_DEVS	32	/* maximal number of devices (partitions) */
-
-typedef struct {
-	unsigned int	size;		/* size of xpram segment in pages */
-	unsigned int	offset;		/* start page of xpram segment */
-} xpram_device_t;
-
-static xpram_device_t xpram_devices[XPRAM_MAX_DEVS];
-static unsigned int xpram_sizes[XPRAM_MAX_DEVS];
-static struct gendisk *xpram_disks[XPRAM_MAX_DEVS];
-static unsigned int xpram_pages;
-static int xpram_devs;
-
-/*
- * Parameter parsing functions.
- */
-static int devs = XPRAM_DEVS;
-static char *sizes[XPRAM_MAX_DEVS];
-
-module_param(devs, int, 0);
-module_param_array(sizes, charp, NULL, 0);
-
-MODULE_PARM_DESC(devs, "number of devices (\"partitions\"), " \
-		 "the default is " __MODULE_STRING(XPRAM_DEVS) "\n");
-MODULE_PARM_DESC(sizes, "list of device (partition) sizes " \
-		 "the defaults are 0s \n" \
-		 "All devices with size 0 equally partition the "
-		 "remaining space on the expanded strorage not "
-		 "claimed by explicit sizes\n");
-MODULE_LICENSE("GPL");
-
-/*
- * Copy expanded memory page (4kB) into main memory                  
- * Arguments                                                         
- *           page_addr:    address of target page                    
- *           xpage_index:  index of expandeded memory page           
- * Return value                                                      
- *           0:            if operation succeeds
- *           -EIO:         if pgin failed
- *           -ENXIO:       if xpram has vanished
- */
-static int xpram_page_in (unsigned long page_addr, unsigned int xpage_index)
-{
-	int cc = 2;	/* return unused cc 2 if pgin traps */
-
-	asm volatile(
-		"	.insn	rre,0xb22e0000,%1,%2\n"  /* pgin %1,%2 */
-		"0:	ipm	%0\n"
-		"	srl	%0,28\n"
-		"1:\n"
-		EX_TABLE(0b,1b)
-		: "+d" (cc) : "a" (__pa(page_addr)), "d" (xpage_index) : "cc");
-	if (cc == 3)
-		return -ENXIO;
-	if (cc == 2)
-		return -ENXIO;
-	if (cc == 1)
-		return -EIO;
-	return 0;
-}
-
-/*
- * Copy a 4kB page of main memory to an expanded memory page          
- * Arguments                                                          
- *           page_addr:    address of source page                     
- *           xpage_index:  index of expandeded memory page            
- * Return value                                                       
- *           0:            if operation succeeds
- *           -EIO:         if pgout failed
- *           -ENXIO:       if xpram has vanished
- */
-static long xpram_page_out (unsigned long page_addr, unsigned int xpage_index)
-{
-	int cc = 2;	/* return unused cc 2 if pgin traps */
-
-	asm volatile(
-		"	.insn	rre,0xb22f0000,%1,%2\n"  /* pgout %1,%2 */
-		"0:	ipm	%0\n"
-		"	srl	%0,28\n"
-		"1:\n"
-		EX_TABLE(0b,1b)
-		: "+d" (cc) : "a" (__pa(page_addr)), "d" (xpage_index) : "cc");
-	if (cc == 3)
-		return -ENXIO;
-	if (cc == 2)
-		return -ENXIO;
-	if (cc == 1)
-		return -EIO;
-	return 0;
-}
-
-/*
- * Check if xpram is available.
- */
-static int __init xpram_present(void)
-{
-	unsigned long mem_page;
-	int rc;
-
-	mem_page = (unsigned long) __get_free_page(GFP_KERNEL);
-	if (!mem_page)
-		return -ENOMEM;
-	rc = xpram_page_in(mem_page, 0);
-	free_page(mem_page);
-	return rc ? -ENXIO : 0;
-}
-
-/*
- * Return index of the last available xpram page.
- */
-static unsigned long __init xpram_highest_page_index(void)
-{
-	unsigned int page_index, add_bit;
-	unsigned long mem_page;
-
-	mem_page = (unsigned long) __get_free_page(GFP_KERNEL);
-	if (!mem_page)
-		return 0;
-
-	page_index = 0;
-	add_bit = 1ULL << (sizeof(unsigned int)*8 - 1);
-	while (add_bit > 0) {
-		if (xpram_page_in(mem_page, page_index | add_bit) == 0)
-			page_index |= add_bit;
-		add_bit >>= 1;
-	}
-
-	free_page (mem_page);
-
-	return page_index;
-}
-
-/*
- * Block device make request function.
- */
-static blk_qc_t xpram_submit_bio(struct bio *bio)
-{
-	xpram_device_t *xdev = bio->bi_bdev->bd_disk->private_data;
-	struct bio_vec bvec;
-	struct bvec_iter iter;
-	unsigned int index;
-	unsigned long page_addr;
-	unsigned long bytes;
-
-	blk_queue_split(&bio);
-
-	if ((bio->bi_iter.bi_sector & 7) != 0 ||
-	    (bio->bi_iter.bi_size & 4095) != 0)
-		/* Request is not page-aligned. */
-		goto fail;
-	if ((bio->bi_iter.bi_size >> 12) > xdev->size)
-		/* Request size is no page-aligned. */
-		goto fail;
-	if ((bio->bi_iter.bi_sector >> 3) > 0xffffffffU - xdev->offset)
-		goto fail;
-	index = (bio->bi_iter.bi_sector >> 3) + xdev->offset;
-	bio_for_each_segment(bvec, bio, iter) {
-		page_addr = (unsigned long)
-			kmap(bvec.bv_page) + bvec.bv_offset;
-		bytes = bvec.bv_len;
-		if ((page_addr & 4095) != 0 || (bytes & 4095) != 0)
-			/* More paranoia. */
-			goto fail;
-		while (bytes > 0) {
-			if (bio_data_dir(bio) == READ) {
-				if (xpram_page_in(page_addr, index) != 0)
-					goto fail;
-			} else {
-				if (xpram_page_out(page_addr, index) != 0)
-					goto fail;
-			}
-			page_addr += 4096;
-			bytes -= 4096;
-			index++;
-		}
-	}
-	bio_endio(bio);
-	return BLK_QC_T_NONE;
-fail:
-	bio_io_error(bio);
-	return BLK_QC_T_NONE;
-}
-
-static int xpram_getgeo(struct block_device *bdev, struct hd_geometry *geo)
-{
-	unsigned long size;
-
-	/*
-	 * get geometry: we have to fake one...  trim the size to a
-	 * multiple of 64 (32k): tell we have 16 sectors, 4 heads,
-	 * whatever cylinders. Tell also that data starts at sector. 4.
-	 */
-	size = (xpram_pages * 8) & ~0x3f;
-	geo->cylinders = size >> 6;
-	geo->heads = 4;
-	geo->sectors = 16;
-	geo->start = 4;
-	return 0;
-}
-
-static const struct block_device_operations xpram_devops =
-{
-	.owner	= THIS_MODULE,
-	.submit_bio = xpram_submit_bio,
-	.getgeo	= xpram_getgeo,
-};
-
-/*
- * Setup xpram_sizes array.
- */
-static int __init xpram_setup_sizes(unsigned long pages)
-{
-	unsigned long mem_needed;
-	unsigned long mem_auto;
-	unsigned long long size;
-	char *sizes_end;
-	int mem_auto_no;
-	int i;
-
-	/* Check number of devices. */
-	if (devs <= 0 || devs > XPRAM_MAX_DEVS) {
-		pr_err("%d is not a valid number of XPRAM devices\n",devs);
-		return -EINVAL;
-	}
-	xpram_devs = devs;
-
-	/*
-	 * Copy sizes array to xpram_sizes and align partition
-	 * sizes to page boundary.
-	 */
-	mem_needed = 0;
-	mem_auto_no = 0;
-	for (i = 0; i < xpram_devs; i++) {
-		if (sizes[i]) {
-			size = simple_strtoull(sizes[i], &sizes_end, 0);
-			switch (*sizes_end) {
-			case 'g':
-			case 'G':
-				size <<= 20;
-				break;
-			case 'm':
-			case 'M':
-				size <<= 10;
-			}
-			xpram_sizes[i] = (size + 3) & -4UL;
-		}
-		if (xpram_sizes[i])
-			mem_needed += xpram_sizes[i];
-		else
-			mem_auto_no++;
-	}
-	
-	pr_info("  number of devices (partitions): %d \n", xpram_devs);
-	for (i = 0; i < xpram_devs; i++) {
-		if (xpram_sizes[i])
-			pr_info("  size of partition %d: %u kB\n",
-				i, xpram_sizes[i]);
-		else
-			pr_info("  size of partition %d to be set "
-				"automatically\n",i);
-	}
-	pr_info("  memory needed (for sized partitions): %lu kB\n",
-		mem_needed);
-	pr_info("  partitions to be sized automatically: %d\n",
-		mem_auto_no);
-
-	if (mem_needed > pages * 4) {
-		pr_err("Not enough expanded memory available\n");
-		return -EINVAL;
-	}
-
-	/*
-	 * partitioning:
-	 * xpram_sizes[i] != 0; partition i has size xpram_sizes[i] kB
-	 * else:             ; all partitions with zero xpram_sizes[i]
-	 *                     partition equally the remaining space
-	 */
-	if (mem_auto_no) {
-		mem_auto = ((pages - mem_needed / 4) / mem_auto_no) * 4;
-		pr_info("  automatically determined "
-			"partition size: %lu kB\n", mem_auto);
-		for (i = 0; i < xpram_devs; i++)
-			if (xpram_sizes[i] == 0)
-				xpram_sizes[i] = mem_auto;
-	}
-	return 0;
-}
-
-static int __init xpram_setup_blkdev(void)
-{
-	unsigned long offset;
-	int i, rc = -ENOMEM;
-
-	for (i = 0; i < xpram_devs; i++) {
-		xpram_disks[i] = blk_alloc_disk(NUMA_NO_NODE);
-		if (!xpram_disks[i])
-			goto out;
-		blk_queue_flag_set(QUEUE_FLAG_NONROT, xpram_disks[i]->queue);
-		blk_queue_flag_clear(QUEUE_FLAG_ADD_RANDOM,
-				xpram_disks[i]->queue);
-		blk_queue_logical_block_size(xpram_disks[i]->queue, 4096);
-	}
-
-	/*
-	 * Register xpram major.
-	 */
-	rc = register_blkdev(XPRAM_MAJOR, XPRAM_NAME);
-	if (rc < 0)
-		goto out;
-
-	/*
-	 * Setup device structures.
-	 */
-	offset = 0;
-	for (i = 0; i < xpram_devs; i++) {
-		struct gendisk *disk = xpram_disks[i];
-
-		xpram_devices[i].size = xpram_sizes[i] / 4;
-		xpram_devices[i].offset = offset;
-		offset += xpram_devices[i].size;
-		disk->major = XPRAM_MAJOR;
-		disk->first_minor = i;
-		disk->minors = 1;
-		disk->fops = &xpram_devops;
-		disk->private_data = &xpram_devices[i];
-		sprintf(disk->disk_name, "slram%d", i);
-		set_capacity(disk, xpram_sizes[i] << 1);
-		add_disk(disk);
-	}
-
-	return 0;
-out:
-	while (i--)
-		blk_cleanup_disk(xpram_disks[i]);
-	return rc;
-}
-
-/*
- * Finally, the init/exit functions.
- */
-static void __exit xpram_exit(void)
-{
-	int i;
-	for (i = 0; i < xpram_devs; i++) {
-		del_gendisk(xpram_disks[i]);
-		blk_cleanup_disk(xpram_disks[i]);
-	}
-	unregister_blkdev(XPRAM_MAJOR, XPRAM_NAME);
-}
-
-static int __init xpram_init(void)
-{
-	int rc;
-
-	/* Find out size of expanded memory. */
-	if (xpram_present() != 0) {
-		pr_err("No expanded memory available\n");
-		return -ENODEV;
-	}
-	xpram_pages = xpram_highest_page_index() + 1;
-	pr_info("  %u pages expanded memory found (%lu KB).\n",
-		xpram_pages, (unsigned long) xpram_pages*4);
-	rc = xpram_setup_sizes(xpram_pages);
-	if (rc)
-		return rc;
-	return xpram_setup_blkdev();
-}
-
-module_init(xpram_init);
-module_exit(xpram_exit);
diff --git a/drivers/s390/char/con3270.c b/drivers/s390/char/con3270.c
index 87cdbace1453..e4592890f20a 100644
--- a/drivers/s390/char/con3270.c
+++ b/drivers/s390/char/con3270.c
@@ -292,13 +292,15 @@ con3270_update(struct timer_list *t)
  * Read tasklet.
  */
 static void
-con3270_read_tasklet(struct raw3270_request *rrq)
+con3270_read_tasklet(unsigned long data)
 {
 	static char kreset_data = TW_KR;
+	struct raw3270_request *rrq;
 	struct con3270 *cp;
 	unsigned long flags;
 	int nr_up, deactivate;
 
+	rrq = (struct raw3270_request *)data;
 	cp = (struct con3270 *) rrq->view;
 	spin_lock_irqsave(&cp->view.lock, flags);
 	nr_up = cp->nr_up;
@@ -625,8 +627,7 @@ con3270_init(void)
 	INIT_LIST_HEAD(&condev->lines);
 	INIT_LIST_HEAD(&condev->update);
 	timer_setup(&condev->timer, con3270_update, 0);
-	tasklet_init(&condev->readlet, 
-		     (void (*)(unsigned long)) con3270_read_tasklet,
+	tasklet_init(&condev->readlet, con3270_read_tasklet,
 		     (unsigned long) condev->read);
 
 	raw3270_add_view(&condev->view, &con3270_fn, 1, RAW3270_VIEW_LOCK_IRQ);
diff --git a/drivers/s390/char/ctrlchar.c b/drivers/s390/char/ctrlchar.c
index e1686a69a68e..6f2b64040078 100644
--- a/drivers/s390/char/ctrlchar.c
+++ b/drivers/s390/char/ctrlchar.c
@@ -34,12 +34,13 @@ void schedule_sysrq_work(struct sysrq_work *sw)
 
 
 /**
- * Check for special chars at start of input.
+ * ctrlchar_handle - check for special chars at start of input
  *
- * @param buf Console input buffer.
- * @param len Length of valid data in buffer.
- * @param tty The tty struct for this console.
- * @return CTRLCHAR_NONE, if nothing matched,
+ * @buf: console input buffer
+ * @len: length of valid data in buffer
+ * @tty: the tty struct for this console
+ *
+ * Return: CTRLCHAR_NONE, if nothing matched,
  *         CTRLCHAR_SYSRQ, if sysrq was encountered
  *         otherwise char to be inserted logically or'ed
  *         with CTRLCHAR_CTRL
diff --git a/drivers/s390/char/hmcdrv_ftp.c b/drivers/s390/char/hmcdrv_ftp.c
index 37ee8f698c3b..02b6f394aec2 100644
--- a/drivers/s390/char/hmcdrv_ftp.c
+++ b/drivers/s390/char/hmcdrv_ftp.c
@@ -26,7 +26,7 @@
  * struct hmcdrv_ftp_ops - HMC drive FTP operations
  * @startup: startup function
  * @shutdown: shutdown function
- * @cmd: FTP transfer function
+ * @transfer: FTP transfer function
  */
 struct hmcdrv_ftp_ops {
 	int (*startup)(void);
diff --git a/drivers/s390/char/sclp.c b/drivers/s390/char/sclp.c
index b4b84e3e0949..2cf7fe131ece 100644
--- a/drivers/s390/char/sclp.c
+++ b/drivers/s390/char/sclp.c
@@ -28,7 +28,7 @@
 #define SCLP_HEADER		"sclp: "
 
 struct sclp_trace_entry {
-	char id[4];
+	char id[4] __nonstring;
 	u32 a;
 	u64 b;
 };
diff --git a/drivers/s390/cio/blacklist.c b/drivers/s390/cio/blacklist.c
index 4dd2eb634856..f3c656975e05 100644
--- a/drivers/s390/cio/blacklist.c
+++ b/drivers/s390/cio/blacklist.c
@@ -262,7 +262,10 @@ static int blacklist_parse_proc_parameters(char *buf)
 
 	if (strcmp("free", parm) == 0) {
 		rc = blacklist_parse_parameters(buf, free, 0);
-		css_schedule_eval_all_unreg(0);
+		/* There could be subchannels without proper devices connected.
+		 * evaluate all the entries
+		 */
+		css_schedule_eval_all();
 	} else if (strcmp("add", parm) == 0)
 		rc = blacklist_parse_parameters(buf, add, 0);
 	else if (strcmp("purge", parm) == 0)
diff --git a/drivers/s390/cio/device.c b/drivers/s390/cio/device.c
index adf33b653d87..8d14569823d7 100644
--- a/drivers/s390/cio/device.c
+++ b/drivers/s390/cio/device.c
@@ -867,19 +867,6 @@ out_err:
 		wake_up(&ccw_device_init_wq);
 }
 
-static void ccw_device_call_sch_unregister(struct ccw_device *cdev)
-{
-	struct subchannel *sch;
-
-	/* Get subchannel reference for local processing. */
-	if (!get_device(cdev->dev.parent))
-		return;
-	sch = to_subchannel(cdev->dev.parent);
-	css_sch_device_unregister(sch);
-	/* Release subchannel reference for local processing. */
-	put_device(&sch->dev);
-}
-
 /*
  * subchannel recognition done. Called from the state machine.
  */
@@ -1857,10 +1844,10 @@ static void ccw_device_todo(struct work_struct *work)
 			css_schedule_eval(sch->schid);
 		fallthrough;
 	case CDEV_TODO_UNREG:
-		if (sch_is_pseudo_sch(sch))
-			ccw_device_unregister(cdev);
-		else
-			ccw_device_call_sch_unregister(cdev);
+		spin_lock_irq(sch->lock);
+		sch_set_cdev(sch, NULL);
+		spin_unlock_irq(sch->lock);
+		ccw_device_unregister(cdev);
 		break;
 	default:
 		break;
diff --git a/drivers/s390/cio/device_id.c b/drivers/s390/cio/device_id.c
index 740996d0dc8c..7835a87a60b5 100644
--- a/drivers/s390/cio/device_id.c
+++ b/drivers/s390/cio/device_id.c
@@ -91,7 +91,7 @@ static int diag210_to_senseid(struct senseid *senseid, struct diag210 *diag)
 }
 
 /**
- * diag_get_dev_info - retrieve device information via diag 0x210
+ * diag210_get_dev_info - retrieve device information via diag 0x210
  * @cdev: ccw device
  *
  * Returns zero on success, non-zero otherwise.
diff --git a/drivers/s390/cio/vfio_ccw_ops.c b/drivers/s390/cio/vfio_ccw_ops.c
index c57d2a7f0919..7f540ad0b568 100644
--- a/drivers/s390/cio/vfio_ccw_ops.c
+++ b/drivers/s390/cio/vfio_ccw_ops.c
@@ -159,7 +159,7 @@ static int vfio_ccw_mdev_remove(struct mdev_device *mdev)
 	return 0;
 }
 
-static int vfio_ccw_mdev_open(struct mdev_device *mdev)
+static int vfio_ccw_mdev_open_device(struct mdev_device *mdev)
 {
 	struct vfio_ccw_private *private =
 		dev_get_drvdata(mdev_parent_dev(mdev));
@@ -194,7 +194,7 @@ out_unregister:
 	return ret;
 }
 
-static void vfio_ccw_mdev_release(struct mdev_device *mdev)
+static void vfio_ccw_mdev_close_device(struct mdev_device *mdev)
 {
 	struct vfio_ccw_private *private =
 		dev_get_drvdata(mdev_parent_dev(mdev));
@@ -638,8 +638,8 @@ static const struct mdev_parent_ops vfio_ccw_mdev_ops = {
 	.supported_type_groups  = mdev_type_groups,
 	.create			= vfio_ccw_mdev_create,
 	.remove			= vfio_ccw_mdev_remove,
-	.open			= vfio_ccw_mdev_open,
-	.release		= vfio_ccw_mdev_release,
+	.open_device		= vfio_ccw_mdev_open_device,
+	.close_device		= vfio_ccw_mdev_close_device,
 	.read			= vfio_ccw_mdev_read,
 	.write			= vfio_ccw_mdev_write,
 	.ioctl			= vfio_ccw_mdev_ioctl,
diff --git a/drivers/s390/crypto/vfio_ap_ops.c b/drivers/s390/crypto/vfio_ap_ops.c
index 67f145589f58..118939a7729a 100644
--- a/drivers/s390/crypto/vfio_ap_ops.c
+++ b/drivers/s390/crypto/vfio_ap_ops.c
@@ -24,8 +24,9 @@
 #define VFIO_AP_MDEV_TYPE_HWVIRT "passthrough"
 #define VFIO_AP_MDEV_NAME_HWVIRT "VFIO AP Passthrough Device"
 
-static int vfio_ap_mdev_reset_queues(struct mdev_device *mdev);
+static int vfio_ap_mdev_reset_queues(struct ap_matrix_mdev *matrix_mdev);
 static struct vfio_ap_queue *vfio_ap_find_queue(int apqn);
+static const struct vfio_device_ops vfio_ap_matrix_dev_ops;
 
 static int match_apqn(struct device *dev, const void *data)
 {
@@ -295,15 +296,6 @@ static int handle_pqap(struct kvm_vcpu *vcpu)
 	matrix_mdev = container_of(vcpu->kvm->arch.crypto.pqap_hook,
 				   struct ap_matrix_mdev, pqap_hook);
 
-	/*
-	 * If the KVM pointer is in the process of being set, wait until the
-	 * process has completed.
-	 */
-	wait_event_cmd(matrix_mdev->wait_for_kvm,
-		       !matrix_mdev->kvm_busy,
-		       mutex_unlock(&matrix_dev->lock),
-		       mutex_lock(&matrix_dev->lock));
-
 	/* If the there is no guest using the mdev, there is nothing to do */
 	if (!matrix_mdev->kvm)
 		goto out_unlock;
@@ -336,45 +328,57 @@ static void vfio_ap_matrix_init(struct ap_config_info *info,
 	matrix->adm_max = info->apxa ? info->Nd : 15;
 }
 
-static int vfio_ap_mdev_create(struct mdev_device *mdev)
+static int vfio_ap_mdev_probe(struct mdev_device *mdev)
 {
 	struct ap_matrix_mdev *matrix_mdev;
+	int ret;
 
 	if ((atomic_dec_if_positive(&matrix_dev->available_instances) < 0))
 		return -EPERM;
 
 	matrix_mdev = kzalloc(sizeof(*matrix_mdev), GFP_KERNEL);
 	if (!matrix_mdev) {
-		atomic_inc(&matrix_dev->available_instances);
-		return -ENOMEM;
+		ret = -ENOMEM;
+		goto err_dec_available;
 	}
+	vfio_init_group_dev(&matrix_mdev->vdev, &mdev->dev,
+			    &vfio_ap_matrix_dev_ops);
 
 	matrix_mdev->mdev = mdev;
 	vfio_ap_matrix_init(&matrix_dev->info, &matrix_mdev->matrix);
-	init_waitqueue_head(&matrix_mdev->wait_for_kvm);
-	mdev_set_drvdata(mdev, matrix_mdev);
-	matrix_mdev->pqap_hook.hook = handle_pqap;
-	matrix_mdev->pqap_hook.owner = THIS_MODULE;
+	matrix_mdev->pqap_hook = handle_pqap;
 	mutex_lock(&matrix_dev->lock);
 	list_add(&matrix_mdev->node, &matrix_dev->mdev_list);
 	mutex_unlock(&matrix_dev->lock);
 
+	ret = vfio_register_group_dev(&matrix_mdev->vdev);
+	if (ret)
+		goto err_list;
+	dev_set_drvdata(&mdev->dev, matrix_mdev);
 	return 0;
+
+err_list:
+	mutex_lock(&matrix_dev->lock);
+	list_del(&matrix_mdev->node);
+	mutex_unlock(&matrix_dev->lock);
+	kfree(matrix_mdev);
+err_dec_available:
+	atomic_inc(&matrix_dev->available_instances);
+	return ret;
 }
 
-static int vfio_ap_mdev_remove(struct mdev_device *mdev)
+static void vfio_ap_mdev_remove(struct mdev_device *mdev)
 {
-	struct ap_matrix_mdev *matrix_mdev = mdev_get_drvdata(mdev);
+	struct ap_matrix_mdev *matrix_mdev = dev_get_drvdata(&mdev->dev);
+
+	vfio_unregister_group_dev(&matrix_mdev->vdev);
 
 	mutex_lock(&matrix_dev->lock);
-	vfio_ap_mdev_reset_queues(mdev);
+	vfio_ap_mdev_reset_queues(matrix_mdev);
 	list_del(&matrix_mdev->node);
 	kfree(matrix_mdev);
-	mdev_set_drvdata(mdev, NULL);
 	atomic_inc(&matrix_dev->available_instances);
 	mutex_unlock(&matrix_dev->lock);
-
-	return 0;
 }
 
 static ssize_t name_show(struct mdev_type *mtype,
@@ -614,16 +618,12 @@ static ssize_t assign_adapter_store(struct device *dev,
 {
 	int ret;
 	unsigned long apid;
-	struct mdev_device *mdev = mdev_from_dev(dev);
-	struct ap_matrix_mdev *matrix_mdev = mdev_get_drvdata(mdev);
+	struct ap_matrix_mdev *matrix_mdev = dev_get_drvdata(dev);
 
 	mutex_lock(&matrix_dev->lock);
 
-	/*
-	 * If the KVM pointer is in flux or the guest is running, disallow
-	 * un-assignment of adapter
-	 */
-	if (matrix_mdev->kvm_busy || matrix_mdev->kvm) {
+	/* If the KVM guest is running, disallow assignment of adapter */
+	if (matrix_mdev->kvm) {
 		ret = -EBUSY;
 		goto done;
 	}
@@ -685,16 +685,12 @@ static ssize_t unassign_adapter_store(struct device *dev,
 {
 	int ret;
 	unsigned long apid;
-	struct mdev_device *mdev = mdev_from_dev(dev);
-	struct ap_matrix_mdev *matrix_mdev = mdev_get_drvdata(mdev);
+	struct ap_matrix_mdev *matrix_mdev = dev_get_drvdata(dev);
 
 	mutex_lock(&matrix_dev->lock);
 
-	/*
-	 * If the KVM pointer is in flux or the guest is running, disallow
-	 * un-assignment of adapter
-	 */
-	if (matrix_mdev->kvm_busy || matrix_mdev->kvm) {
+	/* If the KVM guest is running, disallow unassignment of adapter */
+	if (matrix_mdev->kvm) {
 		ret = -EBUSY;
 		goto done;
 	}
@@ -773,17 +769,13 @@ static ssize_t assign_domain_store(struct device *dev,
 {
 	int ret;
 	unsigned long apqi;
-	struct mdev_device *mdev = mdev_from_dev(dev);
-	struct ap_matrix_mdev *matrix_mdev = mdev_get_drvdata(mdev);
+	struct ap_matrix_mdev *matrix_mdev = dev_get_drvdata(dev);
 	unsigned long max_apqi = matrix_mdev->matrix.aqm_max;
 
 	mutex_lock(&matrix_dev->lock);
 
-	/*
-	 * If the KVM pointer is in flux or the guest is running, disallow
-	 * assignment of domain
-	 */
-	if (matrix_mdev->kvm_busy || matrix_mdev->kvm) {
+	/* If the KVM guest is running, disallow assignment of domain */
+	if (matrix_mdev->kvm) {
 		ret = -EBUSY;
 		goto done;
 	}
@@ -840,16 +832,12 @@ static ssize_t unassign_domain_store(struct device *dev,
 {
 	int ret;
 	unsigned long apqi;
-	struct mdev_device *mdev = mdev_from_dev(dev);
-	struct ap_matrix_mdev *matrix_mdev = mdev_get_drvdata(mdev);
+	struct ap_matrix_mdev *matrix_mdev = dev_get_drvdata(dev);
 
 	mutex_lock(&matrix_dev->lock);
 
-	/*
-	 * If the KVM pointer is in flux or the guest is running, disallow
-	 * un-assignment of domain
-	 */
-	if (matrix_mdev->kvm_busy || matrix_mdev->kvm) {
+	/* If the KVM guest is running, disallow unassignment of domain */
+	if (matrix_mdev->kvm) {
 		ret = -EBUSY;
 		goto done;
 	}
@@ -893,16 +881,12 @@ static ssize_t assign_control_domain_store(struct device *dev,
 {
 	int ret;
 	unsigned long id;
-	struct mdev_device *mdev = mdev_from_dev(dev);
-	struct ap_matrix_mdev *matrix_mdev = mdev_get_drvdata(mdev);
+	struct ap_matrix_mdev *matrix_mdev = dev_get_drvdata(dev);
 
 	mutex_lock(&matrix_dev->lock);
 
-	/*
-	 * If the KVM pointer is in flux or the guest is running, disallow
-	 * assignment of control domain.
-	 */
-	if (matrix_mdev->kvm_busy || matrix_mdev->kvm) {
+	/* If the KVM guest is running, disallow assignment of control domain */
+	if (matrix_mdev->kvm) {
 		ret = -EBUSY;
 		goto done;
 	}
@@ -949,17 +933,13 @@ static ssize_t unassign_control_domain_store(struct device *dev,
 {
 	int ret;
 	unsigned long domid;
-	struct mdev_device *mdev = mdev_from_dev(dev);
-	struct ap_matrix_mdev *matrix_mdev = mdev_get_drvdata(mdev);
+	struct ap_matrix_mdev *matrix_mdev = dev_get_drvdata(dev);
 	unsigned long max_domid =  matrix_mdev->matrix.adm_max;
 
 	mutex_lock(&matrix_dev->lock);
 
-	/*
-	 * If the KVM pointer is in flux or the guest is running, disallow
-	 * un-assignment of control domain.
-	 */
-	if (matrix_mdev->kvm_busy || matrix_mdev->kvm) {
+	/* If a KVM guest is running, disallow unassignment of control domain */
+	if (matrix_mdev->kvm) {
 		ret = -EBUSY;
 		goto done;
 	}
@@ -988,8 +968,7 @@ static ssize_t control_domains_show(struct device *dev,
 	int nchars = 0;
 	int n;
 	char *bufpos = buf;
-	struct mdev_device *mdev = mdev_from_dev(dev);
-	struct ap_matrix_mdev *matrix_mdev = mdev_get_drvdata(mdev);
+	struct ap_matrix_mdev *matrix_mdev = dev_get_drvdata(dev);
 	unsigned long max_domid = matrix_mdev->matrix.adm_max;
 
 	mutex_lock(&matrix_dev->lock);
@@ -1007,8 +986,7 @@ static DEVICE_ATTR_RO(control_domains);
 static ssize_t matrix_show(struct device *dev, struct device_attribute *attr,
 			   char *buf)
 {
-	struct mdev_device *mdev = mdev_from_dev(dev);
-	struct ap_matrix_mdev *matrix_mdev = mdev_get_drvdata(mdev);
+	struct ap_matrix_mdev *matrix_mdev = dev_get_drvdata(dev);
 	char *bufpos = buf;
 	unsigned long apid;
 	unsigned long apqi;
@@ -1098,23 +1076,30 @@ static int vfio_ap_mdev_set_kvm(struct ap_matrix_mdev *matrix_mdev,
 	struct ap_matrix_mdev *m;
 
 	if (kvm->arch.crypto.crycbd) {
+		down_write(&kvm->arch.crypto.pqap_hook_rwsem);
+		kvm->arch.crypto.pqap_hook = &matrix_mdev->pqap_hook;
+		up_write(&kvm->arch.crypto.pqap_hook_rwsem);
+
+		mutex_lock(&kvm->lock);
+		mutex_lock(&matrix_dev->lock);
+
 		list_for_each_entry(m, &matrix_dev->mdev_list, node) {
-			if (m != matrix_mdev && m->kvm == kvm)
+			if (m != matrix_mdev && m->kvm == kvm) {
+				mutex_unlock(&kvm->lock);
+				mutex_unlock(&matrix_dev->lock);
 				return -EPERM;
+			}
 		}
 
 		kvm_get_kvm(kvm);
-		matrix_mdev->kvm_busy = true;
-		mutex_unlock(&matrix_dev->lock);
+		matrix_mdev->kvm = kvm;
 		kvm_arch_crypto_set_masks(kvm,
 					  matrix_mdev->matrix.apm,
 					  matrix_mdev->matrix.aqm,
 					  matrix_mdev->matrix.adm);
-		mutex_lock(&matrix_dev->lock);
-		kvm->arch.crypto.pqap_hook = &matrix_mdev->pqap_hook;
-		matrix_mdev->kvm = kvm;
-		matrix_mdev->kvm_busy = false;
-		wake_up_all(&matrix_mdev->wait_for_kvm);
+
+		mutex_unlock(&kvm->lock);
+		mutex_unlock(&matrix_dev->lock);
 	}
 
 	return 0;
@@ -1163,28 +1148,24 @@ static int vfio_ap_mdev_iommu_notifier(struct notifier_block *nb,
  * certain circumstances, will result in a circular lock dependency if this is
  * done under the @matrix_mdev->lock.
  */
-static void vfio_ap_mdev_unset_kvm(struct ap_matrix_mdev *matrix_mdev)
+static void vfio_ap_mdev_unset_kvm(struct ap_matrix_mdev *matrix_mdev,
+				   struct kvm *kvm)
 {
-	/*
-	 * If the KVM pointer is in the process of being set, wait until the
-	 * process has completed.
-	 */
-	wait_event_cmd(matrix_mdev->wait_for_kvm,
-		       !matrix_mdev->kvm_busy,
-		       mutex_unlock(&matrix_dev->lock),
-		       mutex_lock(&matrix_dev->lock));
+	if (kvm && kvm->arch.crypto.crycbd) {
+		down_write(&kvm->arch.crypto.pqap_hook_rwsem);
+		kvm->arch.crypto.pqap_hook = NULL;
+		up_write(&kvm->arch.crypto.pqap_hook_rwsem);
 
-	if (matrix_mdev->kvm) {
-		matrix_mdev->kvm_busy = true;
-		mutex_unlock(&matrix_dev->lock);
-		kvm_arch_crypto_clear_masks(matrix_mdev->kvm);
+		mutex_lock(&kvm->lock);
 		mutex_lock(&matrix_dev->lock);
-		vfio_ap_mdev_reset_queues(matrix_mdev->mdev);
-		matrix_mdev->kvm->arch.crypto.pqap_hook = NULL;
-		kvm_put_kvm(matrix_mdev->kvm);
+
+		kvm_arch_crypto_clear_masks(kvm);
+		vfio_ap_mdev_reset_queues(matrix_mdev);
+		kvm_put_kvm(kvm);
 		matrix_mdev->kvm = NULL;
-		matrix_mdev->kvm_busy = false;
-		wake_up_all(&matrix_mdev->wait_for_kvm);
+
+		mutex_unlock(&kvm->lock);
+		mutex_unlock(&matrix_dev->lock);
 	}
 }
 
@@ -1197,16 +1178,13 @@ static int vfio_ap_mdev_group_notifier(struct notifier_block *nb,
 	if (action != VFIO_GROUP_NOTIFY_SET_KVM)
 		return NOTIFY_OK;
 
-	mutex_lock(&matrix_dev->lock);
 	matrix_mdev = container_of(nb, struct ap_matrix_mdev, group_notifier);
 
 	if (!data)
-		vfio_ap_mdev_unset_kvm(matrix_mdev);
+		vfio_ap_mdev_unset_kvm(matrix_mdev, matrix_mdev->kvm);
 	else if (vfio_ap_mdev_set_kvm(matrix_mdev, data))
 		notify_rc = NOTIFY_DONE;
 
-	mutex_unlock(&matrix_dev->lock);
-
 	return notify_rc;
 }
 
@@ -1276,13 +1254,12 @@ free_resources:
 	return ret;
 }
 
-static int vfio_ap_mdev_reset_queues(struct mdev_device *mdev)
+static int vfio_ap_mdev_reset_queues(struct ap_matrix_mdev *matrix_mdev)
 {
 	int ret;
 	int rc = 0;
 	unsigned long apid, apqi;
 	struct vfio_ap_queue *q;
-	struct ap_matrix_mdev *matrix_mdev = mdev_get_drvdata(mdev);
 
 	for_each_set_bit_inv(apid, matrix_mdev->matrix.apm,
 			     matrix_mdev->matrix.apm_max + 1) {
@@ -1303,52 +1280,45 @@ static int vfio_ap_mdev_reset_queues(struct mdev_device *mdev)
 	return rc;
 }
 
-static int vfio_ap_mdev_open(struct mdev_device *mdev)
+static int vfio_ap_mdev_open_device(struct vfio_device *vdev)
 {
-	struct ap_matrix_mdev *matrix_mdev = mdev_get_drvdata(mdev);
+	struct ap_matrix_mdev *matrix_mdev =
+		container_of(vdev, struct ap_matrix_mdev, vdev);
 	unsigned long events;
 	int ret;
 
-
-	if (!try_module_get(THIS_MODULE))
-		return -ENODEV;
-
 	matrix_mdev->group_notifier.notifier_call = vfio_ap_mdev_group_notifier;
 	events = VFIO_GROUP_NOTIFY_SET_KVM;
 
-	ret = vfio_register_notifier(mdev_dev(mdev), VFIO_GROUP_NOTIFY,
+	ret = vfio_register_notifier(vdev->dev, VFIO_GROUP_NOTIFY,
 				     &events, &matrix_mdev->group_notifier);
-	if (ret) {
-		module_put(THIS_MODULE);
+	if (ret)
 		return ret;
-	}
 
 	matrix_mdev->iommu_notifier.notifier_call = vfio_ap_mdev_iommu_notifier;
 	events = VFIO_IOMMU_NOTIFY_DMA_UNMAP;
-	ret = vfio_register_notifier(mdev_dev(mdev), VFIO_IOMMU_NOTIFY,
+	ret = vfio_register_notifier(vdev->dev, VFIO_IOMMU_NOTIFY,
 				     &events, &matrix_mdev->iommu_notifier);
-	if (!ret)
-		return ret;
+	if (ret)
+		goto out_unregister_group;
+	return 0;
 
-	vfio_unregister_notifier(mdev_dev(mdev), VFIO_GROUP_NOTIFY,
+out_unregister_group:
+	vfio_unregister_notifier(vdev->dev, VFIO_GROUP_NOTIFY,
 				 &matrix_mdev->group_notifier);
-	module_put(THIS_MODULE);
 	return ret;
 }
 
-static void vfio_ap_mdev_release(struct mdev_device *mdev)
+static void vfio_ap_mdev_close_device(struct vfio_device *vdev)
 {
-	struct ap_matrix_mdev *matrix_mdev = mdev_get_drvdata(mdev);
-
-	mutex_lock(&matrix_dev->lock);
-	vfio_ap_mdev_unset_kvm(matrix_mdev);
-	mutex_unlock(&matrix_dev->lock);
+	struct ap_matrix_mdev *matrix_mdev =
+		container_of(vdev, struct ap_matrix_mdev, vdev);
 
-	vfio_unregister_notifier(mdev_dev(mdev), VFIO_IOMMU_NOTIFY,
+	vfio_unregister_notifier(vdev->dev, VFIO_IOMMU_NOTIFY,
 				 &matrix_mdev->iommu_notifier);
-	vfio_unregister_notifier(mdev_dev(mdev), VFIO_GROUP_NOTIFY,
+	vfio_unregister_notifier(vdev->dev, VFIO_GROUP_NOTIFY,
 				 &matrix_mdev->group_notifier);
-	module_put(THIS_MODULE);
+	vfio_ap_mdev_unset_kvm(matrix_mdev, matrix_mdev->kvm);
 }
 
 static int vfio_ap_mdev_get_device_info(unsigned long arg)
@@ -1371,11 +1341,12 @@ static int vfio_ap_mdev_get_device_info(unsigned long arg)
 	return copy_to_user((void __user *)arg, &info, minsz) ? -EFAULT : 0;
 }
 
-static ssize_t vfio_ap_mdev_ioctl(struct mdev_device *mdev,
+static ssize_t vfio_ap_mdev_ioctl(struct vfio_device *vdev,
 				    unsigned int cmd, unsigned long arg)
 {
+	struct ap_matrix_mdev *matrix_mdev =
+		container_of(vdev, struct ap_matrix_mdev, vdev);
 	int ret;
-	struct ap_matrix_mdev *matrix_mdev;
 
 	mutex_lock(&matrix_dev->lock);
 	switch (cmd) {
@@ -1383,22 +1354,7 @@ static ssize_t vfio_ap_mdev_ioctl(struct mdev_device *mdev,
 		ret = vfio_ap_mdev_get_device_info(arg);
 		break;
 	case VFIO_DEVICE_RESET:
-		matrix_mdev = mdev_get_drvdata(mdev);
-		if (WARN(!matrix_mdev, "Driver data missing from mdev!!")) {
-			ret = -EINVAL;
-			break;
-		}
-
-		/*
-		 * If the KVM pointer is in the process of being set, wait until
-		 * the process has completed.
-		 */
-		wait_event_cmd(matrix_mdev->wait_for_kvm,
-			       !matrix_mdev->kvm_busy,
-			       mutex_unlock(&matrix_dev->lock),
-			       mutex_lock(&matrix_dev->lock));
-
-		ret = vfio_ap_mdev_reset_queues(mdev);
+		ret = vfio_ap_mdev_reset_queues(matrix_mdev);
 		break;
 	default:
 		ret = -EOPNOTSUPP;
@@ -1409,25 +1365,51 @@ static ssize_t vfio_ap_mdev_ioctl(struct mdev_device *mdev,
 	return ret;
 }
 
+static const struct vfio_device_ops vfio_ap_matrix_dev_ops = {
+	.open_device = vfio_ap_mdev_open_device,
+	.close_device = vfio_ap_mdev_close_device,
+	.ioctl = vfio_ap_mdev_ioctl,
+};
+
+static struct mdev_driver vfio_ap_matrix_driver = {
+	.driver = {
+		.name = "vfio_ap_mdev",
+		.owner = THIS_MODULE,
+		.mod_name = KBUILD_MODNAME,
+		.dev_groups = vfio_ap_mdev_attr_groups,
+	},
+	.probe = vfio_ap_mdev_probe,
+	.remove = vfio_ap_mdev_remove,
+};
+
 static const struct mdev_parent_ops vfio_ap_matrix_ops = {
 	.owner			= THIS_MODULE,
+	.device_driver		= &vfio_ap_matrix_driver,
 	.supported_type_groups	= vfio_ap_mdev_type_groups,
-	.mdev_attr_groups	= vfio_ap_mdev_attr_groups,
-	.create			= vfio_ap_mdev_create,
-	.remove			= vfio_ap_mdev_remove,
-	.open			= vfio_ap_mdev_open,
-	.release		= vfio_ap_mdev_release,
-	.ioctl			= vfio_ap_mdev_ioctl,
 };
 
 int vfio_ap_mdev_register(void)
 {
+	int ret;
+
 	atomic_set(&matrix_dev->available_instances, MAX_ZDEV_ENTRIES_EXT);
 
-	return mdev_register_device(&matrix_dev->device, &vfio_ap_matrix_ops);
+	ret = mdev_register_driver(&vfio_ap_matrix_driver);
+	if (ret)
+		return ret;
+
+	ret = mdev_register_device(&matrix_dev->device, &vfio_ap_matrix_ops);
+	if (ret)
+		goto err_driver;
+	return 0;
+
+err_driver:
+	mdev_unregister_driver(&vfio_ap_matrix_driver);
+	return ret;
 }
 
 void vfio_ap_mdev_unregister(void)
 {
 	mdev_unregister_device(&matrix_dev->device);
+	mdev_unregister_driver(&vfio_ap_matrix_driver);
 }
diff --git a/drivers/s390/crypto/vfio_ap_private.h b/drivers/s390/crypto/vfio_ap_private.h
index f82a6396acae..77760e2b546f 100644
--- a/drivers/s390/crypto/vfio_ap_private.h
+++ b/drivers/s390/crypto/vfio_ap_private.h
@@ -18,6 +18,7 @@
 #include <linux/delay.h>
 #include <linux/mutex.h>
 #include <linux/kvm_host.h>
+#include <linux/vfio.h>
 
 #include "ap_bus.h"
 
@@ -79,14 +80,13 @@ struct ap_matrix {
  * @kvm:	the struct holding guest's state
  */
 struct ap_matrix_mdev {
+	struct vfio_device vdev;
 	struct list_head node;
 	struct ap_matrix matrix;
 	struct notifier_block group_notifier;
 	struct notifier_block iommu_notifier;
-	bool kvm_busy;
-	wait_queue_head_t wait_for_kvm;
 	struct kvm *kvm;
-	struct kvm_s390_module_hook pqap_hook;
+	crypto_hook pqap_hook;
 	struct mdev_device *mdev;
 };
 
diff --git a/drivers/s390/crypto/zcrypt_api.c b/drivers/s390/crypto/zcrypt_api.c
index fa0cb8633040..356318746dd1 100644
--- a/drivers/s390/crypto/zcrypt_api.c
+++ b/drivers/s390/crypto/zcrypt_api.c
@@ -71,7 +71,7 @@ static LIST_HEAD(zcrypt_ops_list);
 /* Zcrypt related debug feature stuff. */
 debug_info_t *zcrypt_dbf_info;
 
-/**
+/*
  * Process a rescan of the transport layer.
  *
  * Returns 1, if the rescan has been processed, otherwise 0.
@@ -462,7 +462,7 @@ static void zcdn_destroy_all(void)
 
 #endif
 
-/**
+/*
  * zcrypt_read (): Not supported beyond zcrypt 1.3.1.
  *
  * This function is not supported beyond zcrypt 1.3.1.
@@ -473,7 +473,7 @@ static ssize_t zcrypt_read(struct file *filp, char __user *buf,
 	return -EPERM;
 }
 
-/**
+/*
  * zcrypt_write(): Not allowed.
  *
  * Write is is not allowed
@@ -484,7 +484,7 @@ static ssize_t zcrypt_write(struct file *filp, const char __user *buf,
 	return -EPERM;
 }
 
-/**
+/*
  * zcrypt_open(): Count number of users.
  *
  * Device open function to count number of users.
@@ -512,7 +512,7 @@ static int zcrypt_open(struct inode *inode, struct file *filp)
 	return stream_open(inode, filp);
 }
 
-/**
+/*
  * zcrypt_release(): Count number of users.
  *
  * Device close function to count number of users.
@@ -2153,7 +2153,7 @@ static void zcdn_exit(void)
 
 #endif
 
-/**
+/*
  * zcrypt_api_init(): Module initialization.
  *
  * The module initialization code.
@@ -2191,7 +2191,7 @@ out:
 	return rc;
 }
 
-/**
+/*
  * zcrypt_api_exit(): Module termination.
  *
  * The module termination code.
diff --git a/drivers/s390/crypto/zcrypt_cex2a.c b/drivers/s390/crypto/zcrypt_cex2a.c
index fa8293d37006..2bd49950ba81 100644
--- a/drivers/s390/crypto/zcrypt_cex2a.c
+++ b/drivers/s390/crypto/zcrypt_cex2a.c
@@ -65,7 +65,7 @@ static struct ap_device_id zcrypt_cex2a_queue_ids[] = {
 
 MODULE_DEVICE_TABLE(ap, zcrypt_cex2a_queue_ids);
 
-/**
+/*
  * Probe function for CEX2A card devices. It always accepts the AP device
  * since the bus_match already checked the card type.
  * @ap_dev: pointer to the AP device.
@@ -124,7 +124,7 @@ static int zcrypt_cex2a_card_probe(struct ap_device *ap_dev)
 	return rc;
 }
 
-/**
+/*
  * This is called to remove the CEX2A card driver information
  * if an AP card device is removed.
  */
@@ -142,7 +142,7 @@ static struct ap_driver zcrypt_cex2a_card_driver = {
 	.flags = AP_DRIVER_FLAG_DEFAULT,
 };
 
-/**
+/*
  * Probe function for CEX2A queue devices. It always accepts the AP device
  * since the bus_match already checked the queue type.
  * @ap_dev: pointer to the AP device.
@@ -183,7 +183,7 @@ static int zcrypt_cex2a_queue_probe(struct ap_device *ap_dev)
 	return rc;
 }
 
-/**
+/*
  * This is called to remove the CEX2A queue driver information
  * if an AP queue device is removed.
  */
diff --git a/drivers/s390/crypto/zcrypt_cex2c.c b/drivers/s390/crypto/zcrypt_cex2c.c
index a0b9f1153e12..6360fdd06160 100644
--- a/drivers/s390/crypto/zcrypt_cex2c.c
+++ b/drivers/s390/crypto/zcrypt_cex2c.c
@@ -171,7 +171,7 @@ static const struct attribute_group cca_queue_attr_grp = {
 	.attrs = cca_queue_attrs,
 };
 
-/**
+/*
  * Large random number detection function. Its sends a message to a CEX2C/CEX3C
  * card to find out if large random numbers are supported.
  * @ap_dev: pointer to the AP device.
@@ -237,7 +237,7 @@ out_free:
 	return rc;
 }
 
-/**
+/*
  * Probe function for CEX2C/CEX3C card devices. It always accepts the
  * AP device since the bus_match already checked the hardware type.
  * @ap_dev: pointer to the AP card device.
@@ -303,7 +303,7 @@ static int zcrypt_cex2c_card_probe(struct ap_device *ap_dev)
 	return rc;
 }
 
-/**
+/*
  * This is called to remove the CEX2C/CEX3C card driver information
  * if an AP card device is removed.
  */
@@ -325,7 +325,7 @@ static struct ap_driver zcrypt_cex2c_card_driver = {
 	.flags = AP_DRIVER_FLAG_DEFAULT,
 };
 
-/**
+/*
  * Probe function for CEX2C/CEX3C queue devices. It always accepts the
  * AP device since the bus_match already checked the hardware type.
  * @ap_dev: pointer to the AP card device.
@@ -376,7 +376,7 @@ static int zcrypt_cex2c_queue_probe(struct ap_device *ap_dev)
 	return rc;
 }
 
-/**
+/*
  * This is called to remove the CEX2C/CEX3C queue driver information
  * if an AP queue device is removed.
  */
diff --git a/drivers/s390/crypto/zcrypt_cex4.c b/drivers/s390/crypto/zcrypt_cex4.c
index 1f7ec54142e1..06024bbe9a58 100644
--- a/drivers/s390/crypto/zcrypt_cex4.c
+++ b/drivers/s390/crypto/zcrypt_cex4.c
@@ -394,7 +394,7 @@ static const struct attribute_group ep11_queue_attr_grp = {
 	.attrs = ep11_queue_attrs,
 };
 
-/**
+/*
  * Probe function for CEX4/CEX5/CEX6/CEX7 card device. It always
  * accepts the AP device since the bus_match already checked
  * the hardware type.
@@ -562,7 +562,7 @@ static int zcrypt_cex4_card_probe(struct ap_device *ap_dev)
 	return rc;
 }
 
-/**
+/*
  * This is called to remove the CEX4/CEX5/CEX6/CEX7 card driver
  * information if an AP card device is removed.
  */
@@ -586,7 +586,7 @@ static struct ap_driver zcrypt_cex4_card_driver = {
 	.flags = AP_DRIVER_FLAG_DEFAULT,
 };
 
-/**
+/*
  * Probe function for CEX4/CEX5/CEX6/CEX7 queue device. It always
  * accepts the AP device since the bus_match already checked
  * the hardware type.
@@ -652,7 +652,7 @@ static int zcrypt_cex4_queue_probe(struct ap_device *ap_dev)
 	return rc;
 }
 
-/**
+/*
  * This is called to remove the CEX4/CEX5/CEX6/CEX7 queue driver
  * information if an AP queue device is removed.
  */
diff --git a/drivers/s390/crypto/zcrypt_msgtype50.c b/drivers/s390/crypto/zcrypt_msgtype50.c
index 99405472824d..99937f3e1d49 100644
--- a/drivers/s390/crypto/zcrypt_msgtype50.c
+++ b/drivers/s390/crypto/zcrypt_msgtype50.c
@@ -39,7 +39,7 @@ MODULE_DESCRIPTION("Cryptographic Accelerator (message type 50), " \
 		   "Copyright IBM Corp. 2001, 2012");
 MODULE_LICENSE("GPL");
 
-/**
+/*
  * The type 50 message family is associated with a CEXxA cards.
  *
  * The four members of the family are described below.
@@ -136,7 +136,7 @@ struct type50_crb3_msg {
 	unsigned char	message[512];
 } __packed;
 
-/**
+/*
  * The type 80 response family is associated with a CEXxA cards.
  *
  * Note that all unsigned char arrays are right-justified and left-padded
@@ -188,7 +188,7 @@ unsigned int get_rsa_crt_fc(struct ica_rsa_modexpo_crt *crt, int *fcode)
 	return 0;
 }
 
-/**
+/*
  * Convert a ICAMEX message to a type50 MEX message.
  *
  * @zq: crypto queue pointer
@@ -255,7 +255,7 @@ static int ICAMEX_msg_to_type50MEX_msg(struct zcrypt_queue *zq,
 	return 0;
 }
 
-/**
+/*
  * Convert a ICACRT message to a type50 CRT message.
  *
  * @zq: crypto queue pointer
@@ -346,7 +346,7 @@ static int ICACRT_msg_to_type50CRT_msg(struct zcrypt_queue *zq,
 	return 0;
 }
 
-/**
+/*
  * Copy results from a type 80 reply message back to user space.
  *
  * @zq: crypto device pointer
@@ -418,7 +418,7 @@ static int convert_response_cex2a(struct zcrypt_queue *zq,
 	}
 }
 
-/**
+/*
  * This function is called from the AP bus code after a crypto request
  * "msg" has finished with the reply message "reply".
  * It is called from tasklet context.
@@ -457,7 +457,7 @@ out:
 
 static atomic_t zcrypt_step = ATOMIC_INIT(0);
 
-/**
+/*
  * The request distributor calls this function if it picked the CEXxA
  * device to handle a modexpo request.
  * @zq: pointer to zcrypt_queue structure that identifies the
@@ -502,7 +502,7 @@ out:
 	return rc;
 }
 
-/**
+/*
  * The request distributor calls this function if it picked the CEXxA
  * device to handle a modexpo_crt request.
  * @zq: pointer to zcrypt_queue structure that identifies the
@@ -547,7 +547,7 @@ out:
 	return rc;
 }
 
-/**
+/*
  * The crypto operations for message type 50.
  */
 static struct zcrypt_ops zcrypt_msgtype50_ops = {
diff --git a/drivers/s390/crypto/zcrypt_msgtype6.c b/drivers/s390/crypto/zcrypt_msgtype6.c
index 752c6398fcd6..bc5a8c31ba73 100644
--- a/drivers/s390/crypto/zcrypt_msgtype6.c
+++ b/drivers/s390/crypto/zcrypt_msgtype6.c
@@ -44,7 +44,7 @@ MODULE_DESCRIPTION("Cryptographic Coprocessor (message type 6), " \
 		   "Copyright IBM Corp. 2001, 2012");
 MODULE_LICENSE("GPL");
 
-/**
+/*
  * CPRB
  *	  Note that all shorts, ints and longs are little-endian.
  *	  All pointer fields are 32-bits long, and mean nothing
@@ -107,7 +107,7 @@ struct function_and_rules_block {
 	unsigned char only_rule[8];
 } __packed;
 
-/**
+/*
  * The following is used to initialize the CPRBX passed to the CEXxC/CEXxP
  * card in a type6 message. The 3 fields that must be filled in at execution
  * time are  req_parml, rpl_parml and usage_domain.
@@ -236,7 +236,7 @@ int speed_idx_ep11(int req_type)
 }
 
 
-/**
+/*
  * Convert a ICAMEX message to a type6 MEX message.
  *
  * @zq: crypto device pointer
@@ -305,7 +305,7 @@ static int ICAMEX_msg_to_type6MEX_msgX(struct zcrypt_queue *zq,
 	return 0;
 }
 
-/**
+/*
  * Convert a ICACRT message to a type6 CRT message.
  *
  * @zq: crypto device pointer
@@ -374,7 +374,7 @@ static int ICACRT_msg_to_type6CRT_msgX(struct zcrypt_queue *zq,
 	return 0;
 }
 
-/**
+/*
  * Convert a XCRB message to a type6 CPRB message.
  *
  * @zq: crypto device pointer
@@ -571,7 +571,7 @@ static int xcrb_msg_to_type6_ep11cprb_msgx(bool userspace, struct ap_message *ap
 	return 0;
 }
 
-/**
+/*
  * Copy results from a type 86 ICA reply message back to user space.
  *
  * @zq: crypto device pointer
@@ -697,7 +697,7 @@ static int convert_type86_ica(struct zcrypt_queue *zq,
 	return 0;
 }
 
-/**
+/*
  * Copy results from a type 86 XCRB reply message back to user space.
  *
  * @zq: crypto device pointer
@@ -728,7 +728,7 @@ static int convert_type86_xcrb(bool userspace, struct zcrypt_queue *zq,
 	return 0;
 }
 
-/**
+/*
  * Copy results from a type 86 EP11 XCRB reply message back to user space.
  *
  * @zq: crypto device pointer
@@ -911,7 +911,7 @@ static int convert_response_rng(struct zcrypt_queue *zq,
 	}
 }
 
-/**
+/*
  * This function is called from the AP bus code after a crypto request
  * "msg" has finished with the reply message "reply".
  * It is called from tasklet context.
@@ -966,7 +966,7 @@ out:
 	complete(&(resp_type->work));
 }
 
-/**
+/*
  * This function is called from the AP bus code after a crypto request
  * "msg" has finished with the reply message "reply".
  * It is called from tasklet context.
@@ -1015,7 +1015,7 @@ out:
 
 static atomic_t zcrypt_step = ATOMIC_INIT(0);
 
-/**
+/*
  * The request distributor calls this function if it picked the CEXxC
  * device to handle a modexpo request.
  * @zq: pointer to zcrypt_queue structure that identifies the
@@ -1063,7 +1063,7 @@ out_free:
 	return rc;
 }
 
-/**
+/*
  * The request distributor calls this function if it picked the CEXxC
  * device to handle a modexpo_crt request.
  * @zq: pointer to zcrypt_queue structure that identifies the
@@ -1112,7 +1112,7 @@ out_free:
 	return rc;
 }
 
-/**
+/*
  * Fetch function code from cprb.
  * Extracting the fc requires to copy the cprb from userspace.
  * So this function allocates memory and needs an ap_msg prepared
@@ -1140,7 +1140,7 @@ unsigned int get_cprb_fc(bool userspace, struct ica_xcRB *xcRB,
 	return XCRB_msg_to_type6CPRB_msgX(userspace, ap_msg, xcRB, func_code, dom);
 }
 
-/**
+/*
  * The request distributor calls this function if it picked the CEXxC
  * device to handle a send_cprb request.
  * @zq: pointer to zcrypt_queue structure that identifies the
@@ -1170,7 +1170,7 @@ out:
 	return rc;
 }
 
-/**
+/*
  * Fetch function code from ep11 cprb.
  * Extracting the fc requires to copy the ep11 cprb from userspace.
  * So this function allocates memory and needs an ap_msg prepared
@@ -1198,7 +1198,7 @@ unsigned int get_ep11cprb_fc(bool userspace, struct ep11_urb *xcrb,
 	return xcrb_msg_to_type6_ep11cprb_msgx(userspace, ap_msg, xcrb, func_code);
 }
 
-/**
+/*
  * The request distributor calls this function if it picked the CEX4P
  * device to handle a send_ep11_cprb request.
  * @zq: pointer to zcrypt_queue structure that identifies the
@@ -1228,7 +1228,7 @@ static long zcrypt_msgtype6_send_ep11_cprb(bool userspace, struct zcrypt_queue *
 	} __packed * payload_hdr = NULL;
 
 
-	/**
+	/*
 	 * The target domain field within the cprb body/payload block will be
 	 * replaced by the usage domain for non-management commands only.
 	 * Therefore we check the first bit of the 'flags' parameter for
@@ -1299,7 +1299,7 @@ unsigned int get_rng_fc(struct ap_message *ap_msg, int *func_code,
 	return 0;
 }
 
-/**
+/*
  * The request distributor calls this function if it picked the CEXxC
  * device to generate random data.
  * @zq: pointer to zcrypt_queue structure that identifies the
@@ -1339,7 +1339,7 @@ out:
 	return rc;
 }
 
-/**
+/*
  * The crypto operations for a CEXxC card.
  */
 static struct zcrypt_ops zcrypt_msgtype6_norng_ops = {
diff --git a/drivers/s390/scsi/zfcp_dbf.c b/drivers/s390/scsi/zfcp_dbf.c
index ca473b368905..cbc3b62cd9e5 100644
--- a/drivers/s390/scsi/zfcp_dbf.c
+++ b/drivers/s390/scsi/zfcp_dbf.c
@@ -766,7 +766,7 @@ static void zfcp_dbf_unregister(struct zfcp_dbf *dbf)
 }
 
 /**
- * zfcp_adapter_debug_register - registers debug feature for an adapter
+ * zfcp_dbf_adapter_register - registers debug feature for an adapter
  * @adapter: pointer to adapter for which debug features should be registered
  * return: -ENOMEM on error, 0 otherwise
  */
@@ -824,7 +824,7 @@ err_out:
 }
 
 /**
- * zfcp_adapter_debug_unregister - unregisters debug feature for an adapter
+ * zfcp_dbf_adapter_unregister - unregisters debug feature for an adapter
  * @adapter: pointer to adapter for which debug features should be unregistered
  */
 void zfcp_dbf_adapter_unregister(struct zfcp_adapter *adapter)
diff --git a/drivers/s390/scsi/zfcp_fsf.c b/drivers/s390/scsi/zfcp_fsf.c
index 2e4804ef2fb9..c1f979296c1a 100644
--- a/drivers/s390/scsi/zfcp_fsf.c
+++ b/drivers/s390/scsi/zfcp_fsf.c
@@ -2275,7 +2275,7 @@ static void zfcp_fsf_close_lun_handler(struct zfcp_fsf_req *req)
 }
 
 /**
- * zfcp_fsf_close_LUN - close LUN
+ * zfcp_fsf_close_lun - close LUN
  * @erp_action: pointer to erp_action triggering the "close LUN"
  * Returns: 0 on success, error otherwise
  */
@@ -2377,7 +2377,7 @@ static void zfcp_fsf_req_trace(struct zfcp_fsf_req *req, struct scsi_cmnd *scsi)
 		}
 	}
 
-	blk_add_driver_data(scsi->request, &blktrc, sizeof(blktrc));
+	blk_add_driver_data(scsi_cmd_to_rq(scsi), &blktrc, sizeof(blktrc));
 }
 
 /**
@@ -2599,8 +2599,8 @@ int zfcp_fsf_fcp_cmnd(struct scsi_cmnd *scsi_cmnd)
 	io->fcp_cmnd_length = FCP_CMND_LEN;
 
 	if (scsi_get_prot_op(scsi_cmnd) != SCSI_PROT_NORMAL) {
-		io->data_block_length = scsi_cmnd->device->sector_size;
-		io->ref_tag_value = scsi_get_lba(scsi_cmnd) & 0xFFFFFFFF;
+		io->data_block_length = scsi_prot_interval(scsi_cmnd);
+		io->ref_tag_value = scsi_prot_ref_tag(scsi_cmnd);
 	}
 
 	if (zfcp_fsf_set_data_dir(scsi_cmnd, &io->data_direction))
diff --git a/drivers/s390/scsi/zfcp_qdio.c b/drivers/s390/scsi/zfcp_qdio.c
index 8f19bed6384e..6a2720105138 100644
--- a/drivers/s390/scsi/zfcp_qdio.c
+++ b/drivers/s390/scsi/zfcp_qdio.c
@@ -384,7 +384,7 @@ free_req_q:
 }
 
 /**
- * zfcp_close_qdio - close qdio queues for an adapter
+ * zfcp_qdio_close - close qdio queues for an adapter
  * @qdio: pointer to structure zfcp_qdio
  */
 void zfcp_qdio_close(struct zfcp_qdio *qdio)
diff --git a/drivers/s390/scsi/zfcp_unit.c b/drivers/s390/scsi/zfcp_unit.c
index 59333f0257a8..60f2a04f0869 100644
--- a/drivers/s390/scsi/zfcp_unit.c
+++ b/drivers/s390/scsi/zfcp_unit.c
@@ -111,9 +111,9 @@ static void zfcp_unit_release(struct device *dev)
 }
 
 /**
- * zfcp_unit_enqueue - enqueue unit to unit list of a port.
+ * zfcp_unit_add - add unit to unit list of a port.
  * @port: pointer to port where unit is added
- * @fcp_lun: FCP LUN of unit to be enqueued
+ * @fcp_lun: FCP LUN of unit to be added
  * Returns: 0 success
  *
  * Sets up some unit internal structures and creates sysfs entry.
diff --git a/drivers/scsi/53c700.c b/drivers/scsi/53c700.c
index 1c6b4e672687..a12e3525977d 100644
--- a/drivers/scsi/53c700.c
+++ b/drivers/scsi/53c700.c
@@ -1823,7 +1823,7 @@ NCR_700_queuecommand_lck(struct scsi_cmnd *SCp, void (*done)(struct scsi_cmnd *)
 
 	if ((hostdata->tag_negotiated & (1<<scmd_id(SCp))) &&
 	    SCp->device->simple_tags) {
-		slot->tag = SCp->request->tag;
+		slot->tag = scsi_cmd_to_rq(SCp)->tag;
 		CDEBUG(KERN_DEBUG, SCp, "sending out tag %d, slot %p\n",
 		       slot->tag, slot);
 	} else {
diff --git a/drivers/scsi/BusLogic.c b/drivers/scsi/BusLogic.c
index adddcd589941..40088dcb98cd 100644
--- a/drivers/scsi/BusLogic.c
+++ b/drivers/scsi/BusLogic.c
@@ -1711,7 +1711,7 @@ static bool __init blogic_reportconfig(struct blogic_adapter *adapter)
 	if (adapter->adapter_bus_type != BLOGIC_PCI_BUS) {
 		blogic_info("  DMA Channel: None, ", adapter);
 		if (adapter->bios_addr > 0)
-			blogic_info("BIOS Address: 0x%lX, ", adapter,
+			blogic_info("BIOS Address: 0x%X, ", adapter,
 					adapter->bios_addr);
 		else
 			blogic_info("BIOS Address: None, ", adapter);
@@ -3436,7 +3436,7 @@ static void blogic_msg(enum blogic_msglevel msglevel, char *fmt,
 	int len = 0;
 
 	va_start(args, adapter);
-	len = vsprintf(buf, fmt, args);
+	len = vscnprintf(buf, sizeof(buf), fmt, args);
 	va_end(args);
 	if (msglevel == BLOGIC_ANNOUNCE_LEVEL) {
 		static int msglines = 0;
@@ -3451,7 +3451,7 @@ static void blogic_msg(enum blogic_msglevel msglevel, char *fmt,
 			if (buf[0] != '\n' || len > 1)
 				printk("%sscsi%d: %s", blogic_msglevelmap[msglevel], adapter->host_no, buf);
 		} else
-			printk("%s", buf);
+			pr_cont("%s", buf);
 	} else {
 		if (begin) {
 			if (adapter != NULL && adapter->adapter_initd)
@@ -3459,7 +3459,7 @@ static void blogic_msg(enum blogic_msglevel msglevel, char *fmt,
 			else
 				printk("%s%s", blogic_msglevelmap[msglevel], buf);
 		} else
-			printk("%s", buf);
+			pr_cont("%s", buf);
 	}
 	begin = (buf[len - 1] == '\n');
 }
diff --git a/drivers/scsi/Kconfig b/drivers/scsi/Kconfig
index 8f44d433e06e..6e3a04107bb6 100644
--- a/drivers/scsi/Kconfig
+++ b/drivers/scsi/Kconfig
@@ -14,12 +14,16 @@ config RAID_ATTRS
 	help
 	  Provides RAID
 
+config SCSI_COMMON
+	tristate
+
 config SCSI
 	tristate "SCSI device support"
 	depends on BLOCK
 	select SCSI_DMA if HAS_DMA
 	select SG_POOL
-	select BLK_SCSI_REQUEST
+	select SCSI_COMMON
+	select BLK_DEV_BSG_COMMON if BLK_DEV_BSG
 	help
 	  If you want to use a SCSI hard disk, SCSI tape drive, SCSI CD-ROM or
 	  any other SCSI device under Linux, say Y and make sure that you know
@@ -140,6 +144,18 @@ config CHR_DEV_SG
 
 	  If unsure, say N.
 
+config BLK_DEV_BSG
+	bool "/dev/bsg support (SG v4)"
+	depends on SCSI
+	default y
+	help
+	  Saying Y here will enable generic SG (SCSI generic) v4 support
+	  for any SCSI device.
+
+	  This option is required by UDEV to access device serial numbers, etc.
+
+	  If unsure, say Y.
+
 config CHR_DEV_SCH
 	tristate "SCSI media changer support"
 	depends on SCSI
diff --git a/drivers/scsi/Makefile b/drivers/scsi/Makefile
index 1748d1ec1338..19814c26c908 100644
--- a/drivers/scsi/Makefile
+++ b/drivers/scsi/Makefile
@@ -20,7 +20,7 @@ CFLAGS_aha152x.o =   -DAHA152X_STAT -DAUTOCONF
 obj-$(CONFIG_PCMCIA)		+= pcmcia/
 
 obj-$(CONFIG_SCSI)		+= scsi_mod.o
-obj-$(CONFIG_BLK_SCSI_REQUEST)	+= scsi_common.o
+obj-$(CONFIG_SCSI_COMMON)	+= scsi_common.o
 
 obj-$(CONFIG_RAID_ATTRS)	+= raid_class.o
 
@@ -168,6 +168,7 @@ scsi_mod-$(CONFIG_BLK_DEBUG_FS)	+= scsi_debugfs.o
 scsi_mod-y			+= scsi_trace.o scsi_logging.o
 scsi_mod-$(CONFIG_PM)		+= scsi_pm.o
 scsi_mod-$(CONFIG_SCSI_DH)	+= scsi_dh.o
+scsi_mod-$(CONFIG_BLK_DEV_BSG)	+= scsi_bsg.o
 
 hv_storvsc-y			:= storvsc_drv.o
 
@@ -183,7 +184,7 @@ CFLAGS_ncr53c8xx.o	:= $(ncr53c8xx-flags-y) $(ncr53c8xx-flags-m)
 zalon7xx-objs	:= zalon.o ncr53c8xx.o
 
 # Files generated that shall be removed upon make clean
-clean-files :=	53c700_d.h 53c700_u.h scsi_devinfo_tbl.c
+clean-files :=	53c700_d.h 53c700_u.h
 
 $(obj)/53c700.o: $(obj)/53c700_d.h
 
@@ -192,9 +193,11 @@ $(obj)/scsi_sysfs.o: $(obj)/scsi_devinfo_tbl.c
 quiet_cmd_bflags = GEN     $@
 	cmd_bflags = sed -n 's/.*define *BLIST_\([A-Z0-9_]*\) *.*/BLIST_FLAG_NAME(\1),/p' $< > $@
 
-$(obj)/scsi_devinfo_tbl.c: include/scsi/scsi_devinfo.h
+$(obj)/scsi_devinfo_tbl.c: include/scsi/scsi_devinfo.h FORCE
 	$(call if_changed,bflags)
 
+targets +=  scsi_devinfo_tbl.c
+
 # If you want to play with the firmware, uncomment
 # GENERATE_FIRMWARE := 1
 
diff --git a/drivers/scsi/NCR5380.c b/drivers/scsi/NCR5380.c
index 3baadd068768..a85589a2a8af 100644
--- a/drivers/scsi/NCR5380.c
+++ b/drivers/scsi/NCR5380.c
@@ -778,7 +778,7 @@ static void NCR5380_dma_complete(struct Scsi_Host *instance)
 	}
 
 #ifdef CONFIG_SUN3
-	if ((sun3scsi_dma_finish(rq_data_dir(hostdata->connected->request)))) {
+	if (sun3scsi_dma_finish(hostdata->connected->sc_data_direction)) {
 		pr_err("scsi%d: overrun in UDC counter -- not prepared to deal with this!\n",
 		       instance->host_no);
 		BUG();
@@ -1710,7 +1710,7 @@ static void NCR5380_information_transfer(struct Scsi_Host *instance)
 				count = sun3scsi_dma_xfer_len(hostdata, cmd);
 
 				if (count > 0) {
-					if (rq_data_dir(cmd->request))
+					if (cmd->sc_data_direction == DMA_TO_DEVICE)
 						sun3scsi_dma_send_setup(hostdata,
 						                        cmd->SCp.ptr, count);
 					else
@@ -2158,7 +2158,7 @@ static void NCR5380_reselect(struct Scsi_Host *instance)
 		count = sun3scsi_dma_xfer_len(hostdata, tmp);
 
 		if (count > 0) {
-			if (rq_data_dir(tmp->request))
+			if (tmp->sc_data_direction == DMA_TO_DEVICE)
 				sun3scsi_dma_send_setup(hostdata,
 				                        tmp->SCp.ptr, count);
 			else
diff --git a/drivers/scsi/aacraid/aachba.c b/drivers/scsi/aacraid/aachba.c
index 46b8dffce2dd..c2d6f0a9e0b1 100644
--- a/drivers/scsi/aacraid/aachba.c
+++ b/drivers/scsi/aacraid/aachba.c
@@ -25,7 +25,6 @@
 #include <linux/completion.h>
 #include <linux/blkdev.h>
 #include <linux/uaccess.h>
-#include <linux/highmem.h> /* For flush_kernel_dcache_page */
 #include <linux/module.h>
 
 #include <asm/unaligned.h>
@@ -1505,7 +1504,7 @@ static struct aac_srb * aac_scsi_common(struct fib * fib, struct scsi_cmnd * cmd
 	srbcmd->id       = cpu_to_le32(scmd_id(cmd));
 	srbcmd->lun      = cpu_to_le32(cmd->device->lun);
 	srbcmd->flags    = cpu_to_le32(flag);
-	timeout = cmd->request->timeout/HZ;
+	timeout = scsi_cmd_to_rq(cmd)->timeout / HZ;
 	if (timeout == 0)
 		timeout = (dev->sa_firmware ? AAC_SA_TIMEOUT : AAC_ARC_TIMEOUT);
 	srbcmd->timeout  = cpu_to_le32(timeout);  // timeout in seconds
diff --git a/drivers/scsi/aacraid/commsup.c b/drivers/scsi/aacraid/commsup.c
index 54eb4d41bc2c..deb32c9f4b3e 100644
--- a/drivers/scsi/aacraid/commsup.c
+++ b/drivers/scsi/aacraid/commsup.c
@@ -224,7 +224,7 @@ struct fib *aac_fib_alloc_tag(struct aac_dev *dev, struct scsi_cmnd *scmd)
 {
 	struct fib *fibptr;
 
-	fibptr = &dev->fibs[scmd->request->tag];
+	fibptr = &dev->fibs[scsi_cmd_to_rq(scmd)->tag];
 	/*
 	 *	Null out fields that depend on being zero at the start of
 	 *	each I/O
diff --git a/drivers/scsi/advansys.c b/drivers/scsi/advansys.c
index f3377e2ef5fb..ffb391967573 100644
--- a/drivers/scsi/advansys.c
+++ b/drivers/scsi/advansys.c
@@ -7423,7 +7423,7 @@ static int asc_build_req(struct asc_board *boardp, struct scsi_cmnd *scp,
 	 * Set the srb_tag to the command tag + 1, as
 	 * srb_tag '0' is used internally by the chip.
 	 */
-	srb_tag = scp->request->tag + 1;
+	srb_tag = scsi_cmd_to_rq(scp)->tag + 1;
 	asc_scsi_q->q2.srb_tag = srb_tag;
 
 	/*
@@ -7637,7 +7637,7 @@ static int
 adv_build_req(struct asc_board *boardp, struct scsi_cmnd *scp,
 	      adv_req_t **adv_reqpp)
 {
-	u32 srb_tag = scp->request->tag;
+	u32 srb_tag = scsi_cmd_to_rq(scp)->tag;
 	adv_req_t *reqp;
 	ADV_SCSI_REQ_Q *scsiqp;
 	int ret;
diff --git a/drivers/scsi/aha1542.c b/drivers/scsi/aha1542.c
index 1210e61afb18..584a59522038 100644
--- a/drivers/scsi/aha1542.c
+++ b/drivers/scsi/aha1542.c
@@ -262,11 +262,12 @@ static void aha1542_free_cmd(struct scsi_cmnd *cmd)
 	struct aha1542_cmd *acmd = scsi_cmd_priv(cmd);
 
 	if (cmd->sc_data_direction == DMA_FROM_DEVICE) {
+		struct request *rq = scsi_cmd_to_rq(cmd);
 		void *buf = acmd->data_buffer;
 		struct req_iterator iter;
 		struct bio_vec bv;
 
-		rq_for_each_segment(bv, cmd->request, iter) {
+		rq_for_each_segment(bv, rq, iter) {
 			memcpy_to_page(bv.bv_page, bv.bv_offset, buf,
 				       bv.bv_len);
 			buf += bv.bv_len;
@@ -447,11 +448,12 @@ static int aha1542_queuecommand(struct Scsi_Host *sh, struct scsi_cmnd *cmd)
 #endif
 
 	if (cmd->sc_data_direction == DMA_TO_DEVICE) {
+		struct request *rq = scsi_cmd_to_rq(cmd);
 		void *buf = acmd->data_buffer;
 		struct req_iterator iter;
 		struct bio_vec bv;
 
-		rq_for_each_segment(bv, cmd->request, iter) {
+		rq_for_each_segment(bv, rq, iter) {
 			memcpy_from_page(buf, bv.bv_page, bv.bv_offset,
 					 bv.bv_len);
 			buf += bv.bv_len;
diff --git a/drivers/scsi/be2iscsi/be_mgmt.c b/drivers/scsi/be2iscsi/be_mgmt.c
index 462717bbb5b7..4e899ec1477d 100644
--- a/drivers/scsi/be2iscsi/be_mgmt.c
+++ b/drivers/scsi/be2iscsi/be_mgmt.c
@@ -235,8 +235,7 @@ static int beiscsi_exec_nemb_cmd(struct beiscsi_hba *phba,
 	wrb = alloc_mcc_wrb(phba, &tag);
 	if (!wrb) {
 		mutex_unlock(&ctrl->mbox_lock);
-		rc = -ENOMEM;
-		goto free_cmd;
+		return -ENOMEM;
 	}
 
 	sge = nonembedded_sgl(wrb);
@@ -269,24 +268,6 @@ static int beiscsi_exec_nemb_cmd(struct beiscsi_hba *phba,
 	/* copy the response, if any */
 	if (resp_buf)
 		memcpy(resp_buf, nonemb_cmd->va, resp_buf_len);
-	/**
-	 * This is special case of NTWK_GET_IF_INFO where the size of
-	 * response is not known. beiscsi_if_get_info checks the return
-	 * value to free DMA buffer.
-	 */
-	if (rc == -EAGAIN)
-		return rc;
-
-	/**
-	 * If FW is busy that is driver timed out, DMA buffer is saved with
-	 * the tag, only when the cmd completes this buffer is freed.
-	 */
-	if (rc == -EBUSY)
-		return rc;
-
-free_cmd:
-	dma_free_coherent(&ctrl->pdev->dev, nonemb_cmd->size,
-			    nonemb_cmd->va, nonemb_cmd->dma);
 	return rc;
 }
 
@@ -309,6 +290,19 @@ static int beiscsi_prep_nemb_cmd(struct beiscsi_hba *phba,
 	return 0;
 }
 
+static void beiscsi_free_nemb_cmd(struct beiscsi_hba *phba,
+				  struct be_dma_mem *cmd, int rc)
+{
+	/*
+	 * If FW is busy the DMA buffer is saved with the tag. When the cmd
+	 * completes this buffer is freed.
+	 */
+	if (rc == -EBUSY)
+		return;
+
+	dma_free_coherent(&phba->ctrl.pdev->dev, cmd->size, cmd->va, cmd->dma);
+}
+
 static void __beiscsi_eq_delay_compl(struct beiscsi_hba *phba, unsigned int tag)
 {
 	struct be_dma_mem *tag_mem;
@@ -344,8 +338,16 @@ int beiscsi_modify_eq_delay(struct beiscsi_hba *phba,
 				cpu_to_le32(set_eqd[i].delay_multiplier);
 	}
 
-	return beiscsi_exec_nemb_cmd(phba, &nonemb_cmd,
-				     __beiscsi_eq_delay_compl, NULL, 0);
+	rc = beiscsi_exec_nemb_cmd(phba, &nonemb_cmd, __beiscsi_eq_delay_compl,
+				   NULL, 0);
+	if (rc) {
+		/*
+		 * Only free on failure. Async cmds are handled like -EBUSY
+		 * where it's handled for us.
+		 */
+		beiscsi_free_nemb_cmd(phba, &nonemb_cmd, rc);
+	}
+	return rc;
 }
 
 /**
@@ -372,6 +374,7 @@ int beiscsi_get_initiator_name(struct beiscsi_hba *phba, char *name, bool cfg)
 		req->hdr.version = 1;
 	rc = beiscsi_exec_nemb_cmd(phba, &nonemb_cmd, NULL,
 				   &resp, sizeof(resp));
+	beiscsi_free_nemb_cmd(phba, &nonemb_cmd, rc);
 	if (rc) {
 		beiscsi_log(phba, KERN_ERR,
 			    BEISCSI_LOG_CONFIG | BEISCSI_LOG_MBOX,
@@ -449,7 +452,9 @@ static int beiscsi_if_mod_gw(struct beiscsi_hba *phba,
 	req->ip_addr.ip_type = ip_type;
 	memcpy(req->ip_addr.addr, gw,
 	       (ip_type < BEISCSI_IP_TYPE_V6) ? IP_V4_LEN : IP_V6_LEN);
-	return beiscsi_exec_nemb_cmd(phba, &nonemb_cmd, NULL, NULL, 0);
+	rt_val = beiscsi_exec_nemb_cmd(phba, &nonemb_cmd, NULL, NULL, 0);
+	beiscsi_free_nemb_cmd(phba, &nonemb_cmd, rt_val);
+	return rt_val;
 }
 
 int beiscsi_if_set_gw(struct beiscsi_hba *phba, u32 ip_type, u8 *gw)
@@ -499,8 +504,10 @@ int beiscsi_if_get_gw(struct beiscsi_hba *phba, u32 ip_type,
 	req = nonemb_cmd.va;
 	req->ip_type = ip_type;
 
-	return beiscsi_exec_nemb_cmd(phba, &nonemb_cmd, NULL,
-				     resp, sizeof(*resp));
+	rc = beiscsi_exec_nemb_cmd(phba, &nonemb_cmd, NULL, resp,
+				   sizeof(*resp));
+	beiscsi_free_nemb_cmd(phba, &nonemb_cmd, rc);
+	return rc;
 }
 
 static int
@@ -537,6 +544,7 @@ beiscsi_if_clr_ip(struct beiscsi_hba *phba,
 			    "BG_%d : failed to clear IP: rc %d status %d\n",
 			    rc, req->ip_params.ip_record.status);
 	}
+	beiscsi_free_nemb_cmd(phba, &nonemb_cmd, rc);
 	return rc;
 }
 
@@ -581,6 +589,7 @@ beiscsi_if_set_ip(struct beiscsi_hba *phba, u8 *ip,
 		if (req->ip_params.ip_record.status)
 			rc = -EINVAL;
 	}
+	beiscsi_free_nemb_cmd(phba, &nonemb_cmd, rc);
 	return rc;
 }
 
@@ -608,6 +617,7 @@ int beiscsi_if_en_static(struct beiscsi_hba *phba, u32 ip_type,
 		reldhcp->interface_hndl = phba->interface_handle;
 		reldhcp->ip_type = ip_type;
 		rc = beiscsi_exec_nemb_cmd(phba, &nonemb_cmd, NULL, NULL, 0);
+		beiscsi_free_nemb_cmd(phba, &nonemb_cmd, rc);
 		if (rc < 0) {
 			beiscsi_log(phba, KERN_WARNING, BEISCSI_LOG_CONFIG,
 				    "BG_%d : failed to release existing DHCP: %d\n",
@@ -689,7 +699,7 @@ int beiscsi_if_en_dhcp(struct beiscsi_hba *phba, u32 ip_type)
 	dhcpreq->interface_hndl = phba->interface_handle;
 	dhcpreq->ip_type = ip_type;
 	rc = beiscsi_exec_nemb_cmd(phba, &nonemb_cmd, NULL, NULL, 0);
-
+	beiscsi_free_nemb_cmd(phba, &nonemb_cmd, rc);
 exit:
 	kfree(if_info);
 	return rc;
@@ -762,11 +772,8 @@ int beiscsi_if_get_info(struct beiscsi_hba *phba, int ip_type,
 				    BEISCSI_LOG_INIT | BEISCSI_LOG_CONFIG,
 				    "BG_%d : Memory Allocation Failure\n");
 
-				/* Free the DMA memory for the IOCTL issuing */
-				dma_free_coherent(&phba->ctrl.pdev->dev,
-						    nonemb_cmd.size,
-						    nonemb_cmd.va,
-						    nonemb_cmd.dma);
+				beiscsi_free_nemb_cmd(phba, &nonemb_cmd,
+						      -ENOMEM);
 				return -ENOMEM;
 		}
 
@@ -781,15 +788,13 @@ int beiscsi_if_get_info(struct beiscsi_hba *phba, int ip_type,
 				      nonemb_cmd.va)->actual_resp_len;
 			ioctl_size += sizeof(struct be_cmd_req_hdr);
 
-			/* Free the previous allocated DMA memory */
-			dma_free_coherent(&phba->ctrl.pdev->dev, nonemb_cmd.size,
-					    nonemb_cmd.va,
-					    nonemb_cmd.dma);
-
+			beiscsi_free_nemb_cmd(phba, &nonemb_cmd, rc);
 			/* Free the virtual memory */
 			kfree(*if_info);
-		} else
+		} else {
+			beiscsi_free_nemb_cmd(phba, &nonemb_cmd, rc);
 			break;
+		}
 	} while (true);
 	return rc;
 }
@@ -806,8 +811,9 @@ int mgmt_get_nic_conf(struct beiscsi_hba *phba,
 	if (rc)
 		return rc;
 
-	return beiscsi_exec_nemb_cmd(phba, &nonemb_cmd, NULL,
-				     nic, sizeof(*nic));
+	rc = beiscsi_exec_nemb_cmd(phba, &nonemb_cmd, NULL, nic, sizeof(*nic));
+	beiscsi_free_nemb_cmd(phba, &nonemb_cmd, rc);
+	return rc;
 }
 
 static void beiscsi_boot_process_compl(struct beiscsi_hba *phba,
diff --git a/drivers/scsi/bnx2i/bnx2i_hwi.c b/drivers/scsi/bnx2i/bnx2i_hwi.c
index 43e8a1dafec0..5521469ce678 100644
--- a/drivers/scsi/bnx2i/bnx2i_hwi.c
+++ b/drivers/scsi/bnx2i/bnx2i_hwi.c
@@ -1918,7 +1918,7 @@ static int bnx2i_queue_scsi_cmd_resp(struct iscsi_session *session,
 
 	spin_unlock(&session->back_lock);
 
-	p = &per_cpu(bnx2i_percpu, blk_mq_rq_cpu(sc->request));
+	p = &per_cpu(bnx2i_percpu, blk_mq_rq_cpu(scsi_cmd_to_rq(sc)));
 	spin_lock(&p->p_work_lock);
 	if (unlikely(!p->iothread)) {
 		rc = -EINVAL;
diff --git a/drivers/scsi/ch.c b/drivers/scsi/ch.c
index fc7197abfcdf..27012908b586 100644
--- a/drivers/scsi/ch.c
+++ b/drivers/scsi/ch.c
@@ -618,6 +618,12 @@ ch_checkrange(scsi_changer *ch, unsigned int type, unsigned int unit)
 	return 0;
 }
 
+struct changer_element_status32 {
+	int		ces_type;
+	compat_uptr_t	ces_data;
+};
+#define CHIOGSTATUS32  _IOW('c', 8, struct changer_element_status32)
+
 static long ch_ioctl(struct file *file,
 		    unsigned int cmd, unsigned long arg)
 {
@@ -748,7 +754,20 @@ static long ch_ioctl(struct file *file,
 
 		return ch_gstatus(ch, ces.ces_type, ces.ces_data);
 	}
+#ifdef CONFIG_COMPAT
+	case CHIOGSTATUS32:
+	{
+		struct changer_element_status32 ces32;
 
+		if (copy_from_user(&ces32, argp, sizeof(ces32)))
+			return -EFAULT;
+		if (ces32.ces_type < 0 || ces32.ces_type >= CH_TYPES)
+			return -EINVAL;
+
+		return ch_gstatus(ch, ces32.ces_type,
+				  compat_ptr(ces32.ces_data));
+	}
+#endif
 	case CHIOGELEM:
 	{
 		struct changer_get_element cge;
@@ -858,59 +877,11 @@ static long ch_ioctl(struct file *file,
 	}
 
 	default:
-		return scsi_ioctl(ch->device, cmd, argp);
+		return scsi_ioctl(ch->device, NULL, file->f_mode, cmd, argp);
 
 	}
 }
 
-#ifdef CONFIG_COMPAT
-
-struct changer_element_status32 {
-	int		ces_type;
-	compat_uptr_t	ces_data;
-};
-#define CHIOGSTATUS32  _IOW('c', 8,struct changer_element_status32)
-
-static long ch_ioctl_compat(struct file * file,
-			    unsigned int cmd, unsigned long arg)
-{
-	scsi_changer *ch = file->private_data;
-	int retval = scsi_ioctl_block_when_processing_errors(ch->device, cmd,
-							file->f_flags & O_NDELAY);
-	if (retval)
-		return retval;
-
-	switch (cmd) {
-	case CHIOGPARAMS:
-	case CHIOGVPARAMS:
-	case CHIOPOSITION:
-	case CHIOMOVE:
-	case CHIOEXCHANGE:
-	case CHIOGELEM:
-	case CHIOINITELEM:
-	case CHIOSVOLTAG:
-		/* compatible */
-		return ch_ioctl(file, cmd, (unsigned long)compat_ptr(arg));
-	case CHIOGSTATUS32:
-	{
-		struct changer_element_status32 ces32;
-		unsigned char __user *data;
-
-		if (copy_from_user(&ces32, (void __user *)arg, sizeof (ces32)))
-			return -EFAULT;
-		if (ces32.ces_type < 0 || ces32.ces_type >= CH_TYPES)
-			return -EINVAL;
-
-		data = compat_ptr(ces32.ces_data);
-		return ch_gstatus(ch, ces32.ces_type, data);
-	}
-	default:
-		return scsi_compat_ioctl(ch->device, cmd, compat_ptr(arg));
-
-	}
-}
-#endif
-
 /* ------------------------------------------------------------------------ */
 
 static int ch_probe(struct device *dev)
@@ -1015,9 +986,7 @@ static const struct file_operations changer_fops = {
 	.open		= ch_open,
 	.release	= ch_release,
 	.unlocked_ioctl	= ch_ioctl,
-#ifdef CONFIG_COMPAT
-	.compat_ioctl	= ch_ioctl_compat,
-#endif
+	.compat_ioctl	= compat_ptr_ioctl,
 	.llseek		= noop_llseek,
 };
 
diff --git a/drivers/scsi/csiostor/csio_scsi.c b/drivers/scsi/csiostor/csio_scsi.c
index 56b9ad0a1ca0..3b2eb6ce1fcf 100644
--- a/drivers/scsi/csiostor/csio_scsi.c
+++ b/drivers/scsi/csiostor/csio_scsi.c
@@ -1786,7 +1786,7 @@ csio_queuecommand(struct Scsi_Host *host, struct scsi_cmnd *cmnd)
 	struct csio_scsi_qset *sqset;
 	struct fc_rport *rport = starget_to_rport(scsi_target(cmnd->device));
 
-	sqset = &hw->sqset[ln->portid][blk_mq_rq_cpu(cmnd->request)];
+	sqset = &hw->sqset[ln->portid][blk_mq_rq_cpu(scsi_cmd_to_rq(cmnd))];
 
 	nr = fc_remote_port_chkready(rport);
 	if (nr) {
@@ -1989,13 +1989,13 @@ inval_scmnd:
 		csio_info(hw,
 			"Aborted SCSI command to (%d:%llu) tag %u\n",
 			cmnd->device->id, cmnd->device->lun,
-			cmnd->request->tag);
+			scsi_cmd_to_rq(cmnd)->tag);
 		return SUCCESS;
 	} else {
 		csio_info(hw,
 			"Failed to abort SCSI command, (%d:%llu) tag %u\n",
 			cmnd->device->id, cmnd->device->lun,
-			cmnd->request->tag);
+			scsi_cmd_to_rq(cmnd)->tag);
 		return FAILED;
 	}
 }
diff --git a/drivers/scsi/cxlflash/main.c b/drivers/scsi/cxlflash/main.c
index 222593bc2afe..b2730e859df8 100644
--- a/drivers/scsi/cxlflash/main.c
+++ b/drivers/scsi/cxlflash/main.c
@@ -433,7 +433,7 @@ static u32 cmd_to_target_hwq(struct Scsi_Host *host, struct scsi_cmnd *scp,
 		hwq = afu->hwq_rr_count++ % afu->num_hwqs;
 		break;
 	case HWQ_MODE_TAG:
-		tag = blk_mq_unique_tag(scp->request);
+		tag = blk_mq_unique_tag(scsi_cmd_to_rq(scp));
 		hwq = blk_mq_unique_tag_to_hwq(tag);
 		break;
 	case HWQ_MODE_CPU:
@@ -1629,8 +1629,8 @@ static int read_vpd(struct cxlflash_cfg *cfg, u64 wwpn[])
 {
 	struct device *dev = &cfg->dev->dev;
 	struct pci_dev *pdev = cfg->dev;
-	int rc = 0;
-	int ro_start, ro_size, i, j, k;
+	int i, k, rc = 0;
+	unsigned int kw_size;
 	ssize_t vpd_size;
 	char vpd_data[CXLFLASH_VPD_LEN];
 	char tmp_buf[WWPN_BUF_LEN] = { 0 };
@@ -1648,24 +1648,6 @@ static int read_vpd(struct cxlflash_cfg *cfg, u64 wwpn[])
 		goto out;
 	}
 
-	/* Get the read only section offset */
-	ro_start = pci_vpd_find_tag(vpd_data, vpd_size, PCI_VPD_LRDT_RO_DATA);
-	if (unlikely(ro_start < 0)) {
-		dev_err(dev, "%s: VPD Read-only data not found\n", __func__);
-		rc = -ENODEV;
-		goto out;
-	}
-
-	/* Get the read only section size, cap when extends beyond read VPD */
-	ro_size = pci_vpd_lrdt_size(&vpd_data[ro_start]);
-	j = ro_size;
-	i = ro_start + PCI_VPD_LRDT_TAG_SIZE;
-	if (unlikely((i + j) > vpd_size)) {
-		dev_dbg(dev, "%s: Might need to read more VPD (%d > %ld)\n",
-			__func__, (i + j), vpd_size);
-		ro_size = vpd_size - i;
-	}
-
 	/*
 	 * Find the offset of the WWPN tag within the read only
 	 * VPD data and validate the found field (partials are
@@ -1681,11 +1663,9 @@ static int read_vpd(struct cxlflash_cfg *cfg, u64 wwpn[])
 	 * ports programmed and operate in an undefined state.
 	 */
 	for (k = 0; k < cfg->num_fc_ports; k++) {
-		j = ro_size;
-		i = ro_start + PCI_VPD_LRDT_TAG_SIZE;
-
-		i = pci_vpd_find_info_keyword(vpd_data, i, j, wwpn_vpd_tags[k]);
-		if (i < 0) {
+		i = pci_vpd_find_ro_info_keyword(vpd_data, vpd_size,
+						 wwpn_vpd_tags[k], &kw_size);
+		if (i == -ENOENT) {
 			if (wwpn_vpd_required)
 				dev_err(dev, "%s: Port %d WWPN not found\n",
 					__func__, k);
@@ -1693,9 +1673,7 @@ static int read_vpd(struct cxlflash_cfg *cfg, u64 wwpn[])
 			continue;
 		}
 
-		j = pci_vpd_info_field_size(&vpd_data[i]);
-		i += PCI_VPD_INFO_FLD_HDR_SIZE;
-		if (unlikely((i + j > vpd_size) || (j != WWPN_LEN))) {
+		if (i < 0 || kw_size != WWPN_LEN) {
 			dev_err(dev, "%s: Port %d WWPN incomplete or bad VPD\n",
 				__func__, k);
 			rc = -ENODEV;
diff --git a/drivers/scsi/dpt_i2o.c b/drivers/scsi/dpt_i2o.c
index a18a4a08f049..7af96d14c9bc 100644
--- a/drivers/scsi/dpt_i2o.c
+++ b/drivers/scsi/dpt_i2o.c
@@ -652,7 +652,7 @@ static int adpt_abort(struct scsi_cmnd * cmd)
 	msg[2] = 0;
 	msg[3]= 0;
 	/* Add 1 to avoid firmware treating it as invalid command */
-	msg[4] = cmd->request->tag + 1;
+	msg[4] = scsi_cmd_to_rq(cmd)->tag + 1;
 	if (pHba->host)
 		spin_lock_irq(pHba->host->host_lock);
 	rcode = adpt_i2o_post_wait(pHba, msg, sizeof(msg), FOREVER);
@@ -2236,7 +2236,7 @@ static s32 adpt_scsi_to_i2o(adpt_hba* pHba, struct scsi_cmnd* cmd, struct adpt_d
 	msg[1] = ((0xff<<24)|(HOST_TID<<12)|d->tid);
 	msg[2] = 0;
 	/* Add 1 to avoid firmware treating it as invalid command */
-	msg[3] = cmd->request->tag + 1;
+	msg[3] = scsi_cmd_to_rq(cmd)->tag + 1;
 	// Our cards use the transaction context as the tag for queueing
 	// Adaptec/DPT Private stuff 
 	msg[4] = I2O_CMD_SCSI_EXEC|(DPT_ORGANIZATION_ID<<16);
diff --git a/drivers/scsi/elx/efct/efct_driver.h b/drivers/scsi/elx/efct/efct_driver.h
index dab8eac4f243..0e3c931db7c2 100644
--- a/drivers/scsi/elx/efct/efct_driver.h
+++ b/drivers/scsi/elx/efct/efct_driver.h
@@ -10,7 +10,6 @@
 /***************************************************************************
  * OS specific includes
  */
-#include <stdarg.h>
 #include <linux/module.h>
 #include <linux/debugfs.h>
 #include <linux/firmware.h>
diff --git a/drivers/scsi/elx/efct/efct_lio.c b/drivers/scsi/elx/efct/efct_lio.c
index e0d798d6baee..bb3b460dc0bc 100644
--- a/drivers/scsi/elx/efct/efct_lio.c
+++ b/drivers/scsi/elx/efct/efct_lio.c
@@ -780,7 +780,7 @@ efct_lio_npiv_make_nport(struct target_fabric_configfs *tf,
 {
 	struct efct_lio_vport *lio_vport;
 	struct efct *efct;
-	int ret = -1;
+	int ret;
 	u64 p_wwpn, npiv_wwpn, npiv_wwnn;
 	char *p, *pbuf, tmp[128];
 	struct efct_lio_vport_list_t *vport_list;
diff --git a/drivers/scsi/fnic/fnic_scsi.c b/drivers/scsi/fnic/fnic_scsi.c
index 762cc8bd2653..f8afbfb468dc 100644
--- a/drivers/scsi/fnic/fnic_scsi.c
+++ b/drivers/scsi/fnic/fnic_scsi.c
@@ -107,7 +107,7 @@ static void fnic_cleanup_io(struct fnic *fnic);
 static inline spinlock_t *fnic_io_lock_hash(struct fnic *fnic,
 					    struct scsi_cmnd *sc)
 {
-	u32 hash = sc->request->tag & (FNIC_IO_LOCKS - 1);
+	u32 hash = scsi_cmd_to_rq(sc)->tag & (FNIC_IO_LOCKS - 1);
 
 	return &fnic->io_req_lock[hash];
 }
@@ -390,7 +390,7 @@ static inline int fnic_queue_wq_copy_desc(struct fnic *fnic,
 	    (rp->flags & FC_RP_FLAGS_RETRY))
 		exch_flags |= FCPIO_ICMND_SRFLAG_RETRY;
 
-	fnic_queue_wq_copy_desc_icmnd_16(wq, sc->request->tag,
+	fnic_queue_wq_copy_desc_icmnd_16(wq, scsi_cmd_to_rq(sc)->tag,
 					 0, exch_flags, io_req->sgl_cnt,
 					 SCSI_SENSE_BUFFERSIZE,
 					 io_req->sgl_list_pa,
@@ -422,6 +422,7 @@ static inline int fnic_queue_wq_copy_desc(struct fnic *fnic,
  */
 static int fnic_queuecommand_lck(struct scsi_cmnd *sc, void (*done)(struct scsi_cmnd *))
 {
+	const int tag = scsi_cmd_to_rq(sc)->tag;
 	struct fc_lport *lp = shost_priv(sc->device->host);
 	struct fc_rport *rport;
 	struct fnic_io_req *io_req = NULL;
@@ -511,8 +512,7 @@ static int fnic_queuecommand_lck(struct scsi_cmnd *sc, void (*done)(struct scsi_
 	sg_count = scsi_dma_map(sc);
 	if (sg_count < 0) {
 		FNIC_TRACE(fnic_queuecommand, sc->device->host->host_no,
-			  sc->request->tag, sc, 0, sc->cmnd[0],
-			  sg_count, CMD_STATE(sc));
+			  tag, sc, 0, sc->cmnd[0], sg_count, CMD_STATE(sc));
 		mempool_free(io_req, fnic->io_req_pool);
 		goto out;
 	}
@@ -571,7 +571,7 @@ static int fnic_queuecommand_lck(struct scsi_cmnd *sc, void (*done)(struct scsi_
 		 * refetch the pointer under the lock.
 		 */
 		FNIC_TRACE(fnic_queuecommand, sc->device->host->host_no,
-			  sc->request->tag, sc, 0, 0, 0,
+			  tag, sc, 0, 0, 0,
 			  (((u64)CMD_FLAGS(sc) << 32) | CMD_STATE(sc)));
 		io_req = (struct fnic_io_req *)CMD_SP(sc);
 		CMD_SP(sc) = NULL;
@@ -603,8 +603,7 @@ out:
 			sc->cmnd[5]);
 
 	FNIC_TRACE(fnic_queuecommand, sc->device->host->host_no,
-		  sc->request->tag, sc, io_req,
-		  sg_count, cmd_trace,
+		  tag, sc, io_req, sg_count, cmd_trace,
 		  (((u64)CMD_FLAGS(sc) >> 32) | CMD_STATE(sc)));
 
 	/* if only we issued IO, will we have the io lock */
@@ -1364,6 +1363,7 @@ int fnic_wq_copy_cmpl_handler(struct fnic *fnic, int copy_work_to_do)
 static bool fnic_cleanup_io_iter(struct scsi_cmnd *sc, void *data,
 				 bool reserved)
 {
+	const int tag = scsi_cmd_to_rq(sc)->tag;
 	struct fnic *fnic = data;
 	struct fnic_io_req *io_req;
 	unsigned long flags = 0;
@@ -1371,7 +1371,7 @@ static bool fnic_cleanup_io_iter(struct scsi_cmnd *sc, void *data,
 	unsigned long start_time = 0;
 	struct fnic_stats *fnic_stats = &fnic->fnic_stats;
 
-	io_lock = fnic_io_lock_tag(fnic, sc->request->tag);
+	io_lock = fnic_io_lock_tag(fnic, tag);
 	spin_lock_irqsave(io_lock, flags);
 
 	io_req = (struct fnic_io_req *)CMD_SP(sc);
@@ -1413,7 +1413,7 @@ cleanup_scsi_cmd:
 	sc->result = DID_TRANSPORT_DISRUPTED << 16;
 	FNIC_SCSI_DBG(KERN_DEBUG, fnic->lport->host,
 		      "fnic_cleanup_io: tag:0x%x : sc:0x%p duration = %lu DID_TRANSPORT_DISRUPTED\n",
-		      sc->request->tag, sc, (jiffies - start_time));
+		      tag, sc, jiffies - start_time);
 
 	if (atomic64_read(&fnic->io_cmpl_skip))
 		atomic64_dec(&fnic->io_cmpl_skip);
@@ -1425,10 +1425,10 @@ cleanup_scsi_cmd:
 		if (!(CMD_FLAGS(sc) & FNIC_IO_ISSUED))
 			shost_printk(KERN_ERR, fnic->lport->host,
 				     "Calling done for IO not issued to fw: tag:0x%x sc:0x%p\n",
-				     sc->request->tag, sc);
+				     tag, sc);
 
 		FNIC_TRACE(fnic_cleanup_io,
-			   sc->device->host->host_no, sc->request->tag, sc,
+			   sc->device->host->host_no, tag, sc,
 			   jiffies_to_msecs(jiffies - start_time),
 			   0, ((u64)sc->cmnd[0] << 32 |
 			       (u64)sc->cmnd[2] << 24 |
@@ -1566,7 +1566,7 @@ static bool fnic_rport_abort_io_iter(struct scsi_cmnd *sc, void *data,
 {
 	struct fnic_rport_abort_io_iter_data *iter_data = data;
 	struct fnic *fnic = iter_data->fnic;
-	int abt_tag = sc->request->tag;
+	int abt_tag = scsi_cmd_to_rq(sc)->tag;
 	struct fnic_io_req *io_req;
 	spinlock_t *io_lock;
 	unsigned long flags;
@@ -1727,6 +1727,7 @@ void fnic_terminate_rport_io(struct fc_rport *rport)
  */
 int fnic_abort_cmd(struct scsi_cmnd *sc)
 {
+	struct request *const rq = scsi_cmd_to_rq(sc);
 	struct fc_lport *lp;
 	struct fnic *fnic;
 	struct fnic_io_req *io_req = NULL;
@@ -1741,7 +1742,7 @@ int fnic_abort_cmd(struct scsi_cmnd *sc)
 	struct abort_stats *abts_stats;
 	struct terminate_stats *term_stats;
 	enum fnic_ioreq_state old_ioreq_state;
-	int tag;
+	const int tag = rq->tag;
 	unsigned long abt_issued_time;
 	DECLARE_COMPLETION_ONSTACK(tm_done);
 
@@ -1757,7 +1758,6 @@ int fnic_abort_cmd(struct scsi_cmnd *sc)
 	term_stats = &fnic->fnic_stats.term_stats;
 
 	rport = starget_to_rport(scsi_target(sc->device));
-	tag = sc->request->tag;
 	FNIC_SCSI_DBG(KERN_DEBUG,
 		fnic->lport->host,
 		"Abort Cmd called FCID 0x%x, LUN 0x%llx TAG %x flags %x\n",
@@ -1842,8 +1842,8 @@ int fnic_abort_cmd(struct scsi_cmnd *sc)
 	/* Now queue the abort command to firmware */
 	int_to_scsilun(sc->device->lun, &fc_lun);
 
-	if (fnic_queue_abort_io_req(fnic, sc->request->tag, task_req,
-				    fc_lun.scsi_lun, io_req)) {
+	if (fnic_queue_abort_io_req(fnic, tag, task_req, fc_lun.scsi_lun,
+				    io_req)) {
 		spin_lock_irqsave(io_lock, flags);
 		if (CMD_STATE(sc) == FNIC_IOREQ_ABTS_PENDING)
 			CMD_STATE(sc) = old_ioreq_state;
@@ -1943,8 +1943,7 @@ int fnic_abort_cmd(struct scsi_cmnd *sc)
 	}
 
 fnic_abort_cmd_end:
-	FNIC_TRACE(fnic_abort_cmd, sc->device->host->host_no,
-		  sc->request->tag, sc,
+	FNIC_TRACE(fnic_abort_cmd, sc->device->host->host_no, tag, sc,
 		  jiffies_to_msecs(jiffies - start_time),
 		  0, ((u64)sc->cmnd[0] << 32 |
 		  (u64)sc->cmnd[2] << 24 | (u64)sc->cmnd[3] << 16 |
@@ -1994,7 +1993,7 @@ static inline int fnic_queue_dr_io_req(struct fnic *fnic,
 	/* fill in the lun info */
 	int_to_scsilun(sc->device->lun, &fc_lun);
 
-	fnic_queue_wq_copy_desc_itmf(wq, sc->request->tag | FNIC_TAG_DEV_RST,
+	fnic_queue_wq_copy_desc_itmf(wq, scsi_cmd_to_rq(sc)->tag | FNIC_TAG_DEV_RST,
 				     0, FCPIO_ITMF_LUN_RESET, SCSI_NO_TAG,
 				     fc_lun.scsi_lun, io_req->port_id,
 				     fnic->config.ra_tov, fnic->config.ed_tov);
@@ -2025,7 +2024,7 @@ static bool fnic_pending_aborts_iter(struct scsi_cmnd *sc,
 	struct fnic_pending_aborts_iter_data *iter_data = data;
 	struct fnic *fnic = iter_data->fnic;
 	struct scsi_device *lun_dev = iter_data->lun_dev;
-	int abt_tag = sc->request->tag;
+	int abt_tag = scsi_cmd_to_rq(sc)->tag;
 	struct fnic_io_req *io_req;
 	spinlock_t *io_lock;
 	unsigned long flags;
@@ -2206,14 +2205,15 @@ clean_pending_aborts_end:
 static inline int
 fnic_scsi_host_start_tag(struct fnic *fnic, struct scsi_cmnd *sc)
 {
-	struct request_queue *q = sc->request->q;
+	struct request *rq = scsi_cmd_to_rq(sc);
+	struct request_queue *q = rq->q;
 	struct request *dummy;
 
 	dummy = blk_mq_alloc_request(q, REQ_OP_WRITE, BLK_MQ_REQ_NOWAIT);
 	if (IS_ERR(dummy))
 		return SCSI_NO_TAG;
 
-	sc->tag = sc->request->tag = dummy->tag;
+	rq->tag = dummy->tag;
 	sc->host_scribble = (unsigned char *)dummy;
 
 	return dummy->tag;
@@ -2238,6 +2238,7 @@ fnic_scsi_host_end_tag(struct fnic *fnic, struct scsi_cmnd *sc)
  */
 int fnic_device_reset(struct scsi_cmnd *sc)
 {
+	struct request *rq = scsi_cmd_to_rq(sc);
 	struct fc_lport *lp;
 	struct fnic *fnic;
 	struct fnic_io_req *io_req = NULL;
@@ -2250,7 +2251,7 @@ int fnic_device_reset(struct scsi_cmnd *sc)
 	struct scsi_lun fc_lun;
 	struct fnic_stats *fnic_stats;
 	struct reset_stats *reset_stats;
-	int tag = 0;
+	int tag = rq->tag;
 	DECLARE_COMPLETION_ONSTACK(tm_done);
 	int tag_gen_flag = 0;   /*to track tags allocated by fnic driver*/
 	bool new_sc = 0;
@@ -2284,7 +2285,6 @@ int fnic_device_reset(struct scsi_cmnd *sc)
 	CMD_FLAGS(sc) = FNIC_DEVICE_RESET;
 	/* Allocate tag if not present */
 
-	tag = sc->request->tag;
 	if (unlikely(tag < 0)) {
 		/*
 		 * Really should fix the midlayer to pass in a proper
@@ -2458,8 +2458,7 @@ fnic_device_reset_clean:
 	}
 
 fnic_device_reset_end:
-	FNIC_TRACE(fnic_device_reset, sc->device->host->host_no,
-		  sc->request->tag, sc,
+	FNIC_TRACE(fnic_device_reset, sc->device->host->host_no, rq->tag, sc,
 		  jiffies_to_msecs(jiffies - start_time),
 		  0, ((u64)sc->cmnd[0] << 32 |
 		  (u64)sc->cmnd[2] << 24 | (u64)sc->cmnd[3] << 16 |
diff --git a/drivers/scsi/hisi_sas/hisi_sas_main.c b/drivers/scsi/hisi_sas/hisi_sas_main.c
index 3a903e8e0384..9515c45affa5 100644
--- a/drivers/scsi/hisi_sas/hisi_sas_main.c
+++ b/drivers/scsi/hisi_sas/hisi_sas_main.c
@@ -185,7 +185,7 @@ static int hisi_sas_slot_index_alloc(struct hisi_hba *hisi_hba,
 	void *bitmap = hisi_hba->slot_index_tags;
 
 	if (scsi_cmnd)
-		return scsi_cmnd->request->tag;
+		return scsi_cmd_to_rq(scsi_cmnd)->tag;
 
 	spin_lock(&hisi_hba->lock);
 	index = find_next_zero_bit(bitmap, hisi_hba->slot_index_count,
@@ -449,7 +449,7 @@ static int hisi_sas_task_prep(struct sas_task *task,
 		unsigned int dq_index;
 		u32 blk_tag;
 
-		blk_tag = blk_mq_unique_tag(scmd->request);
+		blk_tag = blk_mq_unique_tag(scsi_cmd_to_rq(scmd));
 		dq_index = blk_mq_unique_tag_to_hwq(blk_tag);
 		*dq_pointer = dq = &hisi_hba->dq[dq_index];
 	} else {
diff --git a/drivers/scsi/hisi_sas/hisi_sas_v3_hw.c b/drivers/scsi/hisi_sas/hisi_sas_v3_hw.c
index a4885d03afe2..3ab669dc806f 100644
--- a/drivers/scsi/hisi_sas/hisi_sas_v3_hw.c
+++ b/drivers/scsi/hisi_sas/hisi_sas_v3_hw.c
@@ -1153,7 +1153,7 @@ static void fill_prot_v3_hw(struct scsi_cmnd *scsi_cmnd,
 {
 	unsigned char prot_op = scsi_get_prot_op(scsi_cmnd);
 	unsigned int interval = scsi_prot_interval(scsi_cmnd);
-	u32 lbrt_chk_val = t10_pi_ref_tag(scsi_cmnd->request);
+	u32 lbrt_chk_val = t10_pi_ref_tag(scsi_cmd_to_rq(scsi_cmnd));
 
 	switch (prot_op) {
 	case SCSI_PROT_READ_INSERT:
diff --git a/drivers/scsi/hpsa.c b/drivers/scsi/hpsa.c
index f135a10f582b..3faa87fa296a 100644
--- a/drivers/scsi/hpsa.c
+++ b/drivers/scsi/hpsa.c
@@ -5686,7 +5686,7 @@ static int hpsa_scsi_queue_command(struct Scsi_Host *sh, struct scsi_cmnd *cmd)
 	/* Get the ptr to our adapter structure out of cmd->host. */
 	h = sdev_to_hba(cmd->device);
 
-	BUG_ON(cmd->request->tag < 0);
+	BUG_ON(scsi_cmd_to_rq(cmd)->tag < 0);
 
 	dev = cmd->device->hostdata;
 	if (!dev) {
@@ -5729,7 +5729,7 @@ static int hpsa_scsi_queue_command(struct Scsi_Host *sh, struct scsi_cmnd *cmd)
 	 *       and is therefore a brand-new command.
 	 */
 	if (likely(cmd->retries == 0 &&
-			!blk_rq_is_passthrough(cmd->request) &&
+			!blk_rq_is_passthrough(scsi_cmd_to_rq(cmd)) &&
 			h->acciopath_status)) {
 		/* Submit with the retry_pending flag unset. */
 		rc = hpsa_ioaccel_submit(h, c, cmd, false);
@@ -5894,7 +5894,7 @@ static int hpsa_scsi_add_host(struct ctlr_info *h)
  */
 static int hpsa_get_cmd_index(struct scsi_cmnd *scmd)
 {
-	int idx = scmd->request->tag;
+	int idx = scsi_cmd_to_rq(scmd)->tag;
 
 	if (idx < 0)
 		return idx;
diff --git a/drivers/scsi/ibmvscsi/ibmvfc.c b/drivers/scsi/ibmvscsi/ibmvfc.c
index 935b01ee44b7..1f1586ad48fe 100644
--- a/drivers/scsi/ibmvscsi/ibmvfc.c
+++ b/drivers/scsi/ibmvscsi/ibmvfc.c
@@ -1926,7 +1926,7 @@ static int ibmvfc_queuecommand(struct Scsi_Host *shost, struct scsi_cmnd *cmnd)
 	struct ibmvfc_cmd *vfc_cmd;
 	struct ibmvfc_fcp_cmd_iu *iu;
 	struct ibmvfc_event *evt;
-	u32 tag_and_hwq = blk_mq_unique_tag(cmnd->request);
+	u32 tag_and_hwq = blk_mq_unique_tag(scsi_cmd_to_rq(cmnd));
 	u16 hwq = blk_mq_unique_tag_to_hwq(tag_and_hwq);
 	u16 scsi_channel;
 	int rc;
@@ -1956,7 +1956,7 @@ static int ibmvfc_queuecommand(struct Scsi_Host *shost, struct scsi_cmnd *cmnd)
 	memcpy(iu->cdb, cmnd->cmnd, cmnd->cmd_len);
 
 	if (cmnd->flags & SCMD_TAGGED) {
-		vfc_cmd->task_tag = cpu_to_be64(cmnd->tag);
+		vfc_cmd->task_tag = cpu_to_be64(scsi_cmd_to_rq(cmnd)->tag);
 		iu->pri_task_attr = IBMVFC_SIMPLE_TASK;
 	}
 
@@ -3292,14 +3292,18 @@ static int ibmvfc_scan_finished(struct Scsi_Host *shost, unsigned long time)
 	int done = 0;
 
 	spin_lock_irqsave(shost->host_lock, flags);
-	if (time >= (init_timeout * HZ)) {
+	if (!vhost->scan_timeout)
+		done = 1;
+	else if (time >= (vhost->scan_timeout * HZ)) {
 		dev_info(vhost->dev, "Scan taking longer than %d seconds, "
-			 "continuing initialization\n", init_timeout);
+			 "continuing initialization\n", vhost->scan_timeout);
 		done = 1;
 	}
 
-	if (vhost->scan_complete)
+	if (vhost->scan_complete) {
+		vhost->scan_timeout = init_timeout;
 		done = 1;
+	}
 	spin_unlock_irqrestore(shost->host_lock, flags);
 	return done;
 }
@@ -6084,6 +6088,7 @@ static int ibmvfc_probe(struct vio_dev *vdev, const struct vio_device_id *id)
 	vhost->client_scsi_channels = min(shost->nr_hw_queues, nr_scsi_channels);
 	vhost->using_channels = 0;
 	vhost->do_enquiry = 1;
+	vhost->scan_timeout = 0;
 
 	strcpy(vhost->partition_name, "UNKNOWN");
 	init_waitqueue_head(&vhost->work_wait_q);
diff --git a/drivers/scsi/ibmvscsi/ibmvfc.h b/drivers/scsi/ibmvscsi/ibmvfc.h
index 92fb889d7eb0..3718406e0988 100644
--- a/drivers/scsi/ibmvscsi/ibmvfc.h
+++ b/drivers/scsi/ibmvscsi/ibmvfc.h
@@ -876,6 +876,7 @@ struct ibmvfc_host {
 	int reinit;
 	int delay_init;
 	int scan_complete;
+	int scan_timeout;
 	int logged_in;
 	int mq_enabled;
 	int using_channels;
diff --git a/drivers/scsi/ibmvscsi/ibmvscsi.c b/drivers/scsi/ibmvscsi/ibmvscsi.c
index e6a3eaaa57d9..50df7dd9cb91 100644
--- a/drivers/scsi/ibmvscsi/ibmvscsi.c
+++ b/drivers/scsi/ibmvscsi/ibmvscsi.c
@@ -1072,7 +1072,7 @@ static int ibmvscsi_queuecommand_lck(struct scsi_cmnd *cmnd,
 	init_event_struct(evt_struct,
 			  handle_cmd_rsp,
 			  VIOSRP_SRP_FORMAT,
-			  cmnd->request->timeout/HZ);
+			  scsi_cmd_to_rq(cmnd)->timeout / HZ);
 
 	evt_struct->cmnd = cmnd;
 	evt_struct->cmnd_done = done;
diff --git a/drivers/scsi/ips.c b/drivers/scsi/ips.c
index 8b33c9871484..cdd94fb2aab7 100644
--- a/drivers/scsi/ips.c
+++ b/drivers/scsi/ips.c
@@ -3735,7 +3735,7 @@ ips_send_cmd(ips_ha_t * ha, ips_scb_t * scb)
 		scb->cmd.dcdb.segment_4G = 0;
 		scb->cmd.dcdb.enhanced_sg = 0;
 
-		TimeOut = scb->scsi_cmd->request->timeout;
+		TimeOut = scsi_cmd_to_rq(scb->scsi_cmd)->timeout;
 
 		if (ha->subsys->param[4] & 0x00100000) {	/* If NEW Tape DCDB is Supported */
 			if (!scb->sg_len) {
diff --git a/drivers/scsi/isci/request.c b/drivers/scsi/isci/request.c
index e1ff79464131..fcaa84a3c210 100644
--- a/drivers/scsi/isci/request.c
+++ b/drivers/scsi/isci/request.c
@@ -341,7 +341,7 @@ static void scu_ssp_ireq_dif_insert(struct isci_request *ireq, u8 type, u8 op)
 	tc->reserved_E8_0 = 0;
 
 	if ((type & SCSI_PROT_DIF_TYPE1) || (type & SCSI_PROT_DIF_TYPE2))
-		tc->ref_tag_seed_gen = scsi_get_lba(scmd) & 0xffffffff;
+		tc->ref_tag_seed_gen = scsi_prot_ref_tag(scmd);
 	else if (type & SCSI_PROT_DIF_TYPE3)
 		tc->ref_tag_seed_gen = 0;
 }
@@ -369,7 +369,7 @@ static void scu_ssp_ireq_dif_strip(struct isci_request *ireq, u8 type, u8 op)
 	tc->app_tag_gen = 0;
 
 	if ((type & SCSI_PROT_DIF_TYPE1) || (type & SCSI_PROT_DIF_TYPE2))
-		tc->ref_tag_seed_verify = scsi_get_lba(scmd) & 0xffffffff;
+		tc->ref_tag_seed_verify = scsi_prot_ref_tag(scmd);
 	else if (type & SCSI_PROT_DIF_TYPE3)
 		tc->ref_tag_seed_verify = 0;
 
diff --git a/drivers/scsi/lasi700.c b/drivers/scsi/lasi700.c
index 6d14a7a94d0b..86fe19e0468d 100644
--- a/drivers/scsi/lasi700.c
+++ b/drivers/scsi/lasi700.c
@@ -134,7 +134,7 @@ lasi700_probe(struct parisc_device *dev)
 	return -ENODEV;
 }
 
-static int __exit
+static void __exit
 lasi700_driver_remove(struct parisc_device *dev)
 {
 	struct Scsi_Host *host = dev_get_drvdata(&dev->dev);
@@ -146,8 +146,6 @@ lasi700_driver_remove(struct parisc_device *dev)
 	free_irq(host->irq, host);
 	iounmap(hostdata->base);
 	kfree(hostdata);
-
-	return 0;
 }
 
 static struct parisc_driver lasi700_driver __refdata = {
diff --git a/drivers/scsi/libsas/Kconfig b/drivers/scsi/libsas/Kconfig
index 052ee3a26f6e..c640535d1ac0 100644
--- a/drivers/scsi/libsas/Kconfig
+++ b/drivers/scsi/libsas/Kconfig
@@ -10,7 +10,6 @@ config SCSI_SAS_LIBSAS
 	tristate "SAS Domain Transport Attributes"
 	depends on SCSI
 	select SCSI_SAS_ATTRS
-	select BLK_DEV_BSGLIB
 	help
 	  This provides transport specific helpers for SAS drivers which
 	  use the domain device construct (like the aic94xxx).
diff --git a/drivers/scsi/libsas/Makefile b/drivers/scsi/libsas/Makefile
index e63a54f5ab8c..9dc32736cf21 100644
--- a/drivers/scsi/libsas/Makefile
+++ b/drivers/scsi/libsas/Makefile
@@ -18,4 +18,4 @@ libsas-y +=  sas_init.o     \
 libsas-$(CONFIG_SCSI_SAS_ATA) +=	sas_ata.o
 libsas-$(CONFIG_SCSI_SAS_HOST_SMP) +=	sas_host_smp.o
 
-ccflags-y := -DDEBUG
+ccflags-y := -DDEBUG -I$(srctree)/drivers/scsi
diff --git a/drivers/scsi/libsas/sas_ata.c b/drivers/scsi/libsas/sas_ata.c
index 4aa1fda95f35..a315715b3622 100644
--- a/drivers/scsi/libsas/sas_ata.c
+++ b/drivers/scsi/libsas/sas_ata.c
@@ -20,8 +20,8 @@
 #include <scsi/scsi.h>
 #include <scsi/scsi_transport.h>
 #include <scsi/scsi_transport_sas.h>
-#include "../scsi_sas_internal.h"
-#include "../scsi_transport_api.h"
+#include "scsi_sas_internal.h"
+#include "scsi_transport_api.h"
 #include <scsi/scsi_eh.h>
 
 static enum ata_completion_errors sas_to_ata_err(struct task_status_struct *ts)
@@ -596,7 +596,7 @@ void sas_ata_task_abort(struct sas_task *task)
 
 	/* Bounce SCSI-initiated commands to the SCSI EH */
 	if (qc->scsicmd) {
-		blk_abort_request(qc->scsicmd->request);
+		blk_abort_request(scsi_cmd_to_rq(qc->scsicmd));
 		return;
 	}
 
diff --git a/drivers/scsi/libsas/sas_discover.c b/drivers/scsi/libsas/sas_discover.c
index dd205414e505..12e1e36d7c04 100644
--- a/drivers/scsi/libsas/sas_discover.c
+++ b/drivers/scsi/libsas/sas_discover.c
@@ -16,7 +16,7 @@
 #include <scsi/scsi_transport.h>
 #include <scsi/scsi_transport_sas.h>
 #include <scsi/sas_ata.h>
-#include "../scsi_sas_internal.h"
+#include "scsi_sas_internal.h"
 
 /* ---------- Basic task processing for discovery purposes ---------- */
 
diff --git a/drivers/scsi/libsas/sas_expander.c b/drivers/scsi/libsas/sas_expander.c
index e00688540219..c2150a818423 100644
--- a/drivers/scsi/libsas/sas_expander.c
+++ b/drivers/scsi/libsas/sas_expander.c
@@ -18,7 +18,7 @@
 #include <scsi/sas_ata.h>
 #include <scsi/scsi_transport.h>
 #include <scsi/scsi_transport_sas.h>
-#include "../scsi_sas_internal.h"
+#include "scsi_sas_internal.h"
 
 static int sas_discover_expander(struct domain_device *dev);
 static int sas_configure_routing(struct domain_device *dev, u8 *sas_addr);
diff --git a/drivers/scsi/libsas/sas_host_smp.c b/drivers/scsi/libsas/sas_host_smp.c
index eca2a6bf3601..32cdc969b736 100644
--- a/drivers/scsi/libsas/sas_host_smp.c
+++ b/drivers/scsi/libsas/sas_host_smp.c
@@ -14,7 +14,7 @@
 
 #include <scsi/scsi_transport.h>
 #include <scsi/scsi_transport_sas.h>
-#include "../scsi_sas_internal.h"
+#include "scsi_sas_internal.h"
 
 static void sas_host_smp_discover(struct sas_ha_struct *sas_ha, u8 *resp_data,
 				  u8 phy_id)
diff --git a/drivers/scsi/libsas/sas_init.c b/drivers/scsi/libsas/sas_init.c
index 2b0f98ca6ec3..80592f53017a 100644
--- a/drivers/scsi/libsas/sas_init.c
+++ b/drivers/scsi/libsas/sas_init.c
@@ -19,7 +19,7 @@
 
 #include "sas_internal.h"
 
-#include "../scsi_sas_internal.h"
+#include "scsi_sas_internal.h"
 
 static struct kmem_cache *sas_task_cache;
 static struct kmem_cache *sas_event_cache;
diff --git a/drivers/scsi/libsas/sas_phy.c b/drivers/scsi/libsas/sas_phy.c
index 4ca4b1f30bd0..a0d592d11dfb 100644
--- a/drivers/scsi/libsas/sas_phy.c
+++ b/drivers/scsi/libsas/sas_phy.c
@@ -10,7 +10,7 @@
 #include <scsi/scsi_host.h>
 #include <scsi/scsi_transport.h>
 #include <scsi/scsi_transport_sas.h>
-#include "../scsi_sas_internal.h"
+#include "scsi_sas_internal.h"
 
 /* ---------- Phy events ---------- */
 
diff --git a/drivers/scsi/libsas/sas_port.c b/drivers/scsi/libsas/sas_port.c
index e3d03d744713..67b429dcf1ff 100644
--- a/drivers/scsi/libsas/sas_port.c
+++ b/drivers/scsi/libsas/sas_port.c
@@ -10,7 +10,7 @@
 
 #include <scsi/scsi_transport.h>
 #include <scsi/scsi_transport_sas.h>
-#include "../scsi_sas_internal.h"
+#include "scsi_sas_internal.h"
 
 static bool phy_is_wideport_member(struct asd_sas_port *port, struct asd_sas_phy *phy)
 {
diff --git a/drivers/scsi/libsas/sas_scsi_host.c b/drivers/scsi/libsas/sas_scsi_host.c
index ee44a0d7730b..08ffb8788290 100644
--- a/drivers/scsi/libsas/sas_scsi_host.c
+++ b/drivers/scsi/libsas/sas_scsi_host.c
@@ -22,9 +22,9 @@
 #include <scsi/scsi_transport.h>
 #include <scsi/scsi_transport_sas.h>
 #include <scsi/sas_ata.h>
-#include "../scsi_sas_internal.h"
-#include "../scsi_transport_api.h"
-#include "../scsi_priv.h"
+#include "scsi_sas_internal.h"
+#include "scsi_transport_api.h"
+#include "scsi_priv.h"
 
 #include <linux/err.h>
 #include <linux/blkdev.h>
@@ -908,7 +908,7 @@ void sas_task_abort(struct sas_task *task)
 	if (dev_is_sata(task->dev))
 		sas_ata_task_abort(task);
 	else
-		blk_abort_request(sc->request);
+		blk_abort_request(scsi_cmd_to_rq(sc));
 }
 
 int sas_slave_alloc(struct scsi_device *sdev)
diff --git a/drivers/scsi/lpfc/lpfc.h b/drivers/scsi/lpfc/lpfc.h
index 17028861234b..befeb7c34290 100644
--- a/drivers/scsi/lpfc/lpfc.h
+++ b/drivers/scsi/lpfc/lpfc.h
@@ -114,6 +114,12 @@ struct lpfc_sli2_slim;
 #define LPFC_MBX_NO_WAIT	0
 #define LPFC_MBX_WAIT		1
 
+#define LPFC_CFG_PARAM_MAGIC_NUM 0xFEAA0005
+#define LPFC_PORT_CFG_NAME "/cfg/port.cfg"
+
+#define lpfc_rangecheck(val, min, max) \
+	((uint)(val) >= (uint)(min) && (val) <= (max))
+
 enum lpfc_polling_flags {
 	ENABLE_FCP_RING_POLLING = 0x1,
 	DISABLE_FCP_RING_INT    = 0x2
@@ -403,6 +409,160 @@ struct lpfc_trunk_link  {
 				     link3;
 };
 
+/* Format of congestion module parameters */
+struct lpfc_cgn_param {
+	uint32_t cgn_param_magic;
+	uint8_t  cgn_param_version;	/* version 1 */
+	uint8_t  cgn_param_mode;	/* 0=off 1=managed 2=monitor only */
+#define LPFC_CFG_OFF		0
+#define LPFC_CFG_MANAGED	1
+#define LPFC_CFG_MONITOR	2
+	uint8_t  cgn_rsvd1;
+	uint8_t  cgn_rsvd2;
+	uint8_t  cgn_param_level0;
+	uint8_t  cgn_param_level1;
+	uint8_t  cgn_param_level2;
+	uint8_t  byte11;
+	uint8_t  byte12;
+	uint8_t  byte13;
+	uint8_t  byte14;
+	uint8_t  byte15;
+};
+
+/* Max number of days of congestion data */
+#define LPFC_MAX_CGN_DAYS 10
+
+/* Format of congestion buffer info
+ * This structure defines memory thats allocated and registered with
+ * the HBA firmware. When adding or removing fields from this structure
+ * the alignment must match the HBA firmware.
+ */
+
+struct lpfc_cgn_info {
+	/* Header */
+	__le16   cgn_info_size;		/* is sizeof(struct lpfc_cgn_info) */
+	uint8_t  cgn_info_version;	/* represents format of structure */
+#define LPFC_CGN_INFO_V1	1
+#define LPFC_CGN_INFO_V2	2
+#define LPFC_CGN_INFO_V3	3
+	uint8_t  cgn_info_mode;		/* 0=off 1=managed 2=monitor only */
+	uint8_t  cgn_info_detect;
+	uint8_t  cgn_info_action;
+	uint8_t  cgn_info_level0;
+	uint8_t  cgn_info_level1;
+	uint8_t  cgn_info_level2;
+
+	/* Start Time */
+	uint8_t  cgn_info_month;
+	uint8_t  cgn_info_day;
+	uint8_t  cgn_info_year;
+	uint8_t  cgn_info_hour;
+	uint8_t  cgn_info_minute;
+	uint8_t  cgn_info_second;
+
+	/* minute / hours / daily indices */
+	uint8_t  cgn_index_minute;
+	uint8_t  cgn_index_hour;
+	uint8_t  cgn_index_day;
+
+	__le16   cgn_warn_freq;
+	__le16   cgn_alarm_freq;
+	__le16   cgn_lunq;
+	uint8_t  cgn_pad1[8];
+
+	/* Driver Information */
+	__le16   cgn_drvr_min[60];
+	__le32   cgn_drvr_hr[24];
+	__le32   cgn_drvr_day[LPFC_MAX_CGN_DAYS];
+
+	/* Congestion Warnings */
+	__le16   cgn_warn_min[60];
+	__le32   cgn_warn_hr[24];
+	__le32   cgn_warn_day[LPFC_MAX_CGN_DAYS];
+
+	/* Latency Information */
+	__le32   cgn_latency_min[60];
+	__le32   cgn_latency_hr[24];
+	__le32   cgn_latency_day[LPFC_MAX_CGN_DAYS];
+
+	/* Bandwidth Information */
+	__le16   cgn_bw_min[60];
+	__le16   cgn_bw_hr[24];
+	__le16   cgn_bw_day[LPFC_MAX_CGN_DAYS];
+
+	/* Congestion Alarms */
+	__le16   cgn_alarm_min[60];
+	__le32   cgn_alarm_hr[24];
+	__le32   cgn_alarm_day[LPFC_MAX_CGN_DAYS];
+
+	/* Start of congestion statistics */
+	uint8_t  cgn_stat_npm;		/* Notifications per minute */
+
+	/* Start Time */
+	uint8_t  cgn_stat_month;
+	uint8_t  cgn_stat_day;
+	uint8_t  cgn_stat_year;
+	uint8_t  cgn_stat_hour;
+	uint8_t  cgn_stat_minute;
+	uint8_t  cgn_pad2[2];
+
+	__le32   cgn_notification;
+	__le32   cgn_peer_notification;
+	__le32   link_integ_notification;
+	__le32   delivery_notification;
+
+	uint8_t  cgn_stat_cgn_month; /* Last congestion notification FPIN */
+	uint8_t  cgn_stat_cgn_day;
+	uint8_t  cgn_stat_cgn_year;
+	uint8_t  cgn_stat_cgn_hour;
+	uint8_t  cgn_stat_cgn_min;
+	uint8_t  cgn_stat_cgn_sec;
+
+	uint8_t  cgn_stat_peer_month; /* Last peer congestion FPIN */
+	uint8_t  cgn_stat_peer_day;
+	uint8_t  cgn_stat_peer_year;
+	uint8_t  cgn_stat_peer_hour;
+	uint8_t  cgn_stat_peer_min;
+	uint8_t  cgn_stat_peer_sec;
+
+	uint8_t  cgn_stat_lnk_month; /* Last link integrity FPIN */
+	uint8_t  cgn_stat_lnk_day;
+	uint8_t  cgn_stat_lnk_year;
+	uint8_t  cgn_stat_lnk_hour;
+	uint8_t  cgn_stat_lnk_min;
+	uint8_t  cgn_stat_lnk_sec;
+
+	uint8_t  cgn_stat_del_month; /* Last delivery notification FPIN */
+	uint8_t  cgn_stat_del_day;
+	uint8_t  cgn_stat_del_year;
+	uint8_t  cgn_stat_del_hour;
+	uint8_t  cgn_stat_del_min;
+	uint8_t  cgn_stat_del_sec;
+#define LPFC_CGN_STAT_SIZE	48
+#define LPFC_CGN_DATA_SIZE	(sizeof(struct lpfc_cgn_info) -  \
+				LPFC_CGN_STAT_SIZE - sizeof(uint32_t))
+
+	__le32   cgn_info_crc;
+#define LPFC_CGN_CRC32_MAGIC_NUMBER	0x1EDC6F41
+#define LPFC_CGN_CRC32_SEED		0xFFFFFFFF
+};
+
+#define LPFC_CGN_INFO_SZ	(sizeof(struct lpfc_cgn_info) -  \
+				sizeof(uint32_t))
+
+struct lpfc_cgn_stat {
+	atomic64_t total_bytes;
+	atomic64_t rcv_bytes;
+	atomic64_t rx_latency;
+#define LPFC_CGN_NOT_SENT	0xFFFFFFFFFFFFFFFFLL
+	atomic_t rx_io_cnt;
+};
+
+struct lpfc_cgn_acqe_stat {
+	atomic64_t alarm;
+	atomic64_t warn;
+};
+
 struct lpfc_vport {
 	struct lpfc_hba *phba;
 	struct list_head listentry;
@@ -869,7 +1029,10 @@ struct lpfc_hba {
 					 * capability
 					 */
 #define HBA_FLOGI_ISSUED	0x100000 /* FLOGI was issued */
+#define HBA_CGN_RSVD1		0x200000 /* Reserved CGN flag */
+#define HBA_CGN_DAY_WRAP	0x400000 /* HBA Congestion info day wraps */
 #define HBA_DEFER_FLOGI		0x800000 /* Defer FLOGI till read_sparm cmpl */
+#define HBA_SETUP		0x1000000 /* Signifies HBA setup is completed */
 #define HBA_NEEDS_CFG_PORT	0x2000000 /* SLI3 - needs a CONFIG_PORT mbox */
 #define HBA_HBEAT_INP		0x4000000 /* mbox HBEAT is in progress */
 #define HBA_HBEAT_TMO		0x8000000 /* HBEAT initiated after timeout */
@@ -922,7 +1085,6 @@ struct lpfc_hba {
 	uint8_t  wwpn[8];
 	uint32_t RandomData[7];
 	uint8_t  fcp_embed_io;
-	uint8_t  nvme_support;	/* Firmware supports NVME */
 	uint8_t  nvmet_support;	/* driver supports NVMET */
 #define LPFC_NVMET_MAX_PORTS	32
 	uint8_t  mds_diags_support;
@@ -1121,6 +1283,7 @@ struct lpfc_hba {
 	uint32_t total_iocbq_bufs;
 	struct list_head active_rrq_list;
 	spinlock_t hbalock;
+	struct work_struct  unblock_request_work; /* SCSI layer unblock IOs */
 
 	/* dma_mem_pools */
 	struct dma_pool *lpfc_sg_dma_buf_pool;
@@ -1194,6 +1357,8 @@ struct lpfc_hba {
 #ifdef LPFC_HDWQ_LOCK_STAT
 	struct dentry *debug_lockstat;
 #endif
+	struct dentry *debug_cgn_buffer;
+	struct dentry *debug_rx_monitor;
 	struct dentry *debug_ras_log;
 	atomic_t nvmeio_trc_cnt;
 	uint32_t nvmeio_trc_size;
@@ -1344,6 +1509,76 @@ struct lpfc_hba {
 	uint64_t ktime_seg10_min;
 	uint64_t ktime_seg10_max;
 #endif
+	/* CMF objects */
+	struct lpfc_cgn_stat __percpu *cmf_stat;
+	uint32_t cmf_interval_rate;  /* timer interval limit in ms */
+	uint32_t cmf_timer_cnt;
+#define LPFC_CMF_INTERVAL 90
+	uint64_t cmf_link_byte_count;
+	uint64_t cmf_max_line_rate;
+	uint64_t cmf_max_bytes_per_interval;
+	uint64_t cmf_last_sync_bw;
+#define  LPFC_CMF_BLK_SIZE 512
+	struct hrtimer cmf_timer;
+	atomic_t cmf_bw_wait;
+	atomic_t cmf_busy;
+	atomic_t cmf_stop_io;      /* To block request and stop IO's */
+	uint32_t cmf_active_mode;
+	uint32_t cmf_info_per_interval;
+#define LPFC_MAX_CMF_INFO 32
+	struct timespec64 cmf_latency;  /* Interval congestion timestamp */
+	uint32_t cmf_last_ts;   /* Interval congestion time (ms) */
+	uint32_t cmf_active_info;
+
+	/* Signal / FPIN handling for Congestion Mgmt */
+	u8 cgn_reg_fpin;           /* Negotiated value from RDF */
+	u8 cgn_init_reg_fpin;      /* Initial value from READ_CONFIG */
+#define LPFC_CGN_FPIN_NONE	0x0
+#define LPFC_CGN_FPIN_WARN	0x1
+#define LPFC_CGN_FPIN_ALARM	0x2
+#define LPFC_CGN_FPIN_BOTH	(LPFC_CGN_FPIN_WARN | LPFC_CGN_FPIN_ALARM)
+
+	u8 cgn_reg_signal;          /* Negotiated value from EDC */
+	u8 cgn_init_reg_signal;     /* Initial value from READ_CONFIG */
+		/* cgn_reg_signal and cgn_init_reg_signal use
+		 * enum fc_edc_cg_signal_cap_types
+		 */
+	u16 cgn_fpin_frequency;
+#define LPFC_FPIN_INIT_FREQ	0xffff
+	u32 cgn_sig_freq;
+	u32 cgn_acqe_cnt;
+
+	/* RX monitor handling for CMF */
+	struct rxtable_entry *rxtable;  /* RX_monitor information */
+	atomic_t rxtable_idx_head;
+#define LPFC_RXMONITOR_TABLE_IN_USE     (LPFC_MAX_RXMONITOR_ENTRY + 73)
+	atomic_t rxtable_idx_tail;
+	atomic_t rx_max_read_cnt;       /* Maximum read bytes */
+	uint64_t rx_block_cnt;
+
+	/* Congestion parameters from flash */
+	struct lpfc_cgn_param cgn_p;
+
+	/* Statistics counter for ACQE cgn alarms and warnings */
+	struct lpfc_cgn_acqe_stat cgn_acqe_stat;
+
+	/* Congestion buffer information */
+	struct lpfc_dmabuf *cgn_i;      /* Congestion Info buffer */
+	atomic_t cgn_fabric_warn_cnt;   /* Total warning cgn events for info */
+	atomic_t cgn_fabric_alarm_cnt;  /* Total alarm cgn events for info */
+	atomic_t cgn_sync_warn_cnt;     /* Total warning events for SYNC wqe */
+	atomic_t cgn_sync_alarm_cnt;    /* Total alarm events for SYNC wqe */
+	atomic_t cgn_driver_evt_cnt;    /* Total driver cgn events for fmw */
+	atomic_t cgn_latency_evt_cnt;
+	struct timespec64 cgn_daily_ts;
+	atomic64_t cgn_latency_evt;     /* Avg latency per minute */
+	unsigned long cgn_evt_timestamp;
+#define LPFC_CGN_TIMER_TO_MIN   60000 /* ms in a minute */
+	uint32_t cgn_evt_minute;
+#define LPFC_SEC_MIN		60
+#define LPFC_MIN_HOUR		60
+#define LPFC_HOUR_DAY		24
+#define LPFC_MIN_DAY		(LPFC_MIN_HOUR * LPFC_HOUR_DAY)
 
 	struct hlist_node cpuhp;	/* used for cpuhp per hba callback */
 	struct timer_list cpuhp_poll_timer;
@@ -1364,6 +1599,22 @@ struct lpfc_hba {
 	struct dbg_log_ent dbg_log[DBG_LOG_SZ];
 };
 
+#define LPFC_MAX_RXMONITOR_ENTRY	800
+#define LPFC_MAX_RXMONITOR_DUMP		32
+struct rxtable_entry {
+	uint64_t total_bytes;   /* Total no of read bytes requested */
+	uint64_t rcv_bytes;     /* Total no of read bytes completed */
+	uint64_t avg_io_size;
+	uint64_t avg_io_latency;/* Average io latency in microseconds */
+	uint64_t max_read_cnt;  /* Maximum read bytes */
+	uint64_t max_bytes_per_interval;
+	uint32_t cmf_busy;
+	uint32_t cmf_info;      /* CMF_SYNC_WQE info */
+	uint32_t io_cnt;
+	uint32_t timer_utilization;
+	uint32_t timer_interval;
+};
+
 static inline struct Scsi_Host *
 lpfc_shost_from_vport(struct lpfc_vport *vport)
 {
diff --git a/drivers/scsi/lpfc/lpfc_attr.c b/drivers/scsi/lpfc/lpfc_attr.c
index eb88aaaf36eb..b35bf70a8c0d 100644
--- a/drivers/scsi/lpfc/lpfc_attr.c
+++ b/drivers/scsi/lpfc/lpfc_attr.c
@@ -57,6 +57,8 @@
 #define LPFC_MIN_DEVLOSS_TMO	1
 #define LPFC_MAX_DEVLOSS_TMO	255
 
+#define LPFC_MAX_INFO_TMP_LEN	100
+#define LPFC_INFO_MORE_STR	"\nCould be more info...\n"
 /*
  * Write key size should be multiple of 4. If write key is changed
  * make sure that library write key is also changed.
@@ -112,6 +114,186 @@ lpfc_jedec_to_ascii(int incr, char hdw[])
 	return;
 }
 
+static ssize_t
+lpfc_cmf_info_show(struct device *dev, struct device_attribute *attr,
+		   char *buf)
+{
+	struct Scsi_Host  *shost = class_to_shost(dev);
+	struct lpfc_vport *vport = (struct lpfc_vport *)shost->hostdata;
+	struct lpfc_hba   *phba = vport->phba;
+	struct lpfc_cgn_info *cp = NULL;
+	struct lpfc_cgn_stat *cgs;
+	int  len = 0;
+	int cpu;
+	u64 rcv, total;
+	char tmp[LPFC_MAX_INFO_TMP_LEN] = {0};
+
+	if (phba->cgn_i)
+		cp = (struct lpfc_cgn_info *)phba->cgn_i->virt;
+
+	scnprintf(tmp, sizeof(tmp),
+		  "Congestion Mgmt Info: E2Eattr %d Ver %d "
+		  "CMF %d cnt %d\n",
+		  phba->sli4_hba.pc_sli4_params.mi_ver,
+		  cp ? cp->cgn_info_version : 0,
+		  phba->sli4_hba.pc_sli4_params.cmf, phba->cmf_timer_cnt);
+
+	if (strlcat(buf, tmp, PAGE_SIZE) >= PAGE_SIZE)
+		goto buffer_done;
+
+	if (!phba->sli4_hba.pc_sli4_params.cmf)
+		goto buffer_done;
+
+	switch (phba->cgn_init_reg_signal) {
+	case EDC_CG_SIG_WARN_ONLY:
+		scnprintf(tmp, sizeof(tmp),
+			  "Register: Init:  Signal:WARN  ");
+		break;
+	case EDC_CG_SIG_WARN_ALARM:
+		scnprintf(tmp, sizeof(tmp),
+			  "Register: Init:  Signal:WARN|ALARM  ");
+		break;
+	default:
+		scnprintf(tmp, sizeof(tmp),
+			  "Register: Init:  Signal:NONE  ");
+		break;
+	}
+	if (strlcat(buf, tmp, PAGE_SIZE) >= PAGE_SIZE)
+		goto buffer_done;
+
+	switch (phba->cgn_init_reg_fpin) {
+	case LPFC_CGN_FPIN_WARN:
+		scnprintf(tmp, sizeof(tmp),
+			  "FPIN:WARN\n");
+		break;
+	case LPFC_CGN_FPIN_ALARM:
+		scnprintf(tmp, sizeof(tmp),
+			  "FPIN:ALARM\n");
+		break;
+	case LPFC_CGN_FPIN_BOTH:
+		scnprintf(tmp, sizeof(tmp),
+			  "FPIN:WARN|ALARM\n");
+		break;
+	default:
+		scnprintf(tmp, sizeof(tmp),
+			  "FPIN:NONE\n");
+		break;
+	}
+	if (strlcat(buf, tmp, PAGE_SIZE) >= PAGE_SIZE)
+		goto buffer_done;
+
+	switch (phba->cgn_reg_signal) {
+	case EDC_CG_SIG_WARN_ONLY:
+		scnprintf(tmp, sizeof(tmp),
+			  "       Current:  Signal:WARN  ");
+		break;
+	case EDC_CG_SIG_WARN_ALARM:
+		scnprintf(tmp, sizeof(tmp),
+			  "       Current:  Signal:WARN|ALARM  ");
+		break;
+	default:
+		scnprintf(tmp, sizeof(tmp),
+			  "       Current:  Signal:NONE  ");
+		break;
+	}
+	if (strlcat(buf, tmp, PAGE_SIZE) >= PAGE_SIZE)
+		goto buffer_done;
+
+	switch (phba->cgn_reg_fpin) {
+	case LPFC_CGN_FPIN_WARN:
+		scnprintf(tmp, sizeof(tmp),
+			  "FPIN:WARN  ACQEcnt:%d\n", phba->cgn_acqe_cnt);
+		break;
+	case LPFC_CGN_FPIN_ALARM:
+		scnprintf(tmp, sizeof(tmp),
+			  "FPIN:ALARM  ACQEcnt:%d\n", phba->cgn_acqe_cnt);
+		break;
+	case LPFC_CGN_FPIN_BOTH:
+		scnprintf(tmp, sizeof(tmp),
+			  "FPIN:WARN|ALARM  ACQEcnt:%d\n", phba->cgn_acqe_cnt);
+		break;
+	default:
+		scnprintf(tmp, sizeof(tmp),
+			  "FPIN:NONE  ACQEcnt:%d\n", phba->cgn_acqe_cnt);
+		break;
+	}
+	if (strlcat(buf, tmp, PAGE_SIZE) >= PAGE_SIZE)
+		goto buffer_done;
+
+	if (phba->cmf_active_mode != phba->cgn_p.cgn_param_mode) {
+		switch (phba->cmf_active_mode) {
+		case LPFC_CFG_OFF:
+			scnprintf(tmp, sizeof(tmp), "Active: Mode:Off\n");
+			break;
+		case LPFC_CFG_MANAGED:
+			scnprintf(tmp, sizeof(tmp), "Active: Mode:Managed\n");
+			break;
+		case LPFC_CFG_MONITOR:
+			scnprintf(tmp, sizeof(tmp), "Active: Mode:Monitor\n");
+			break;
+		default:
+			scnprintf(tmp, sizeof(tmp), "Active: Mode:Unknown\n");
+		}
+		if (strlcat(buf, tmp, PAGE_SIZE) >= PAGE_SIZE)
+			goto buffer_done;
+	}
+
+	switch (phba->cgn_p.cgn_param_mode) {
+	case LPFC_CFG_OFF:
+		scnprintf(tmp, sizeof(tmp), "Config: Mode:Off  ");
+		break;
+	case LPFC_CFG_MANAGED:
+		scnprintf(tmp, sizeof(tmp), "Config: Mode:Managed ");
+		break;
+	case LPFC_CFG_MONITOR:
+		scnprintf(tmp, sizeof(tmp), "Config: Mode:Monitor ");
+		break;
+	default:
+		scnprintf(tmp, sizeof(tmp), "Config: Mode:Unknown ");
+	}
+	if (strlcat(buf, tmp, PAGE_SIZE) >= PAGE_SIZE)
+		goto buffer_done;
+
+	total = 0;
+	rcv = 0;
+	for_each_present_cpu(cpu) {
+		cgs = per_cpu_ptr(phba->cmf_stat, cpu);
+		total += atomic64_read(&cgs->total_bytes);
+		rcv += atomic64_read(&cgs->rcv_bytes);
+	}
+
+	scnprintf(tmp, sizeof(tmp),
+		  "IObusy:%d Info:%d Bytes: Rcv:x%llx Total:x%llx\n",
+		  atomic_read(&phba->cmf_busy),
+		  phba->cmf_active_info, rcv, total);
+	if (strlcat(buf, tmp, PAGE_SIZE) >= PAGE_SIZE)
+		goto buffer_done;
+
+	scnprintf(tmp, sizeof(tmp),
+		  "Port_speed:%d  Link_byte_cnt:%ld  "
+		  "Max_byte_per_interval:%ld\n",
+		  lpfc_sli_port_speed_get(phba),
+		  (unsigned long)phba->cmf_link_byte_count,
+		  (unsigned long)phba->cmf_max_bytes_per_interval);
+	strlcat(buf, tmp, PAGE_SIZE);
+
+buffer_done:
+	len = strnlen(buf, PAGE_SIZE);
+
+	if (unlikely(len >= (PAGE_SIZE - 1))) {
+		lpfc_printf_log(phba, KERN_INFO, LOG_CGN_MGMT,
+				"6312 Catching potential buffer "
+				"overflow > PAGE_SIZE = %lu bytes\n",
+				PAGE_SIZE);
+		strscpy(buf + PAGE_SIZE - 1 -
+			strnlen(LPFC_INFO_MORE_STR, PAGE_SIZE - 1),
+			LPFC_INFO_MORE_STR,
+			strnlen(LPFC_INFO_MORE_STR, PAGE_SIZE - 1)
+			+ 1);
+	}
+	return len;
+}
+
 /**
  * lpfc_drvr_version_show - Return the Emulex driver string with version number
  * @dev: class unused variable.
@@ -168,7 +350,7 @@ lpfc_nvme_info_show(struct device *dev, struct device_attribute *attr,
 	char *statep;
 	int i;
 	int len = 0;
-	char tmp[LPFC_MAX_NVME_INFO_TMP_LEN] = {0};
+	char tmp[LPFC_MAX_INFO_TMP_LEN] = {0};
 
 	if (!(vport->cfg_enable_fc4_type & LPFC_ENABLE_NVME)) {
 		len = scnprintf(buf, PAGE_SIZE, "NVME Disabled\n");
@@ -512,9 +694,9 @@ lpfc_nvme_info_show(struct device *dev, struct device_attribute *attr,
 				"6314 Catching potential buffer "
 				"overflow > PAGE_SIZE = %lu bytes\n",
 				PAGE_SIZE);
-		strlcpy(buf + PAGE_SIZE - 1 - sizeof(LPFC_NVME_INFO_MORE_STR),
-			LPFC_NVME_INFO_MORE_STR,
-			sizeof(LPFC_NVME_INFO_MORE_STR) + 1);
+		strscpy(buf + PAGE_SIZE - 1 - sizeof(LPFC_INFO_MORE_STR),
+			LPFC_INFO_MORE_STR,
+			sizeof(LPFC_INFO_MORE_STR) + 1);
 	}
 
 	return len;
@@ -2248,11 +2430,6 @@ lpfc_sriov_hw_max_virtfn_show(struct device *dev,
 	return scnprintf(buf, PAGE_SIZE, "%d\n", max_nr_virtfn);
 }
 
-static inline bool lpfc_rangecheck(uint val, uint min, uint max)
-{
-	return val >= min && val <= max;
-}
-
 /**
  * lpfc_enable_bbcr_set: Sets an attribute value.
  * @phba: pointer the the adapter structure.
@@ -2641,6 +2818,7 @@ static DEVICE_ATTR_RO(lpfc_sriov_hw_max_virtfn);
 static DEVICE_ATTR(protocol, S_IRUGO, lpfc_sli4_protocol_show, NULL);
 static DEVICE_ATTR(lpfc_xlane_supported, S_IRUGO, lpfc_oas_supported_show,
 		   NULL);
+static DEVICE_ATTR(cmf_info, 0444, lpfc_cmf_info_show, NULL);
 
 static char *lpfc_soft_wwn_key = "C99G71SL8032A";
 #define WWN_SZ 8
@@ -4038,6 +4216,7 @@ lpfc_topology_store(struct device *dev, struct device_attribute *attr,
 	const char *val_buf = buf;
 	int err;
 	uint32_t prev_val;
+	u8 sli_family, if_type;
 
 	if (!strncmp(buf, "nolip ", strlen("nolip "))) {
 		nolip = 1;
@@ -4061,13 +4240,16 @@ lpfc_topology_store(struct device *dev, struct device_attribute *attr,
 		/*
 		 * The 'topology' is not a configurable parameter if :
 		 *   - persistent topology enabled
-		 *   - G7/G6 with no private loop support
+		 *   - ASIC_GEN_NUM >= 0xC, with no private loop support
 		 */
-
+		sli_family = bf_get(lpfc_sli_intf_sli_family,
+				    &phba->sli4_hba.sli_intf);
+		if_type = bf_get(lpfc_sli_intf_if_type,
+				 &phba->sli4_hba.sli_intf);
 		if ((phba->hba_flag & HBA_PERSISTENT_TOPO ||
-		     (!phba->sli4_hba.pc_sli4_params.pls &&
-		     (phba->pcidev->device == PCI_DEVICE_ID_LANCER_G6_FC ||
-		     phba->pcidev->device == PCI_DEVICE_ID_LANCER_G7_FC))) &&
+		    (!phba->sli4_hba.pc_sli4_params.pls &&
+		     (sli_family == LPFC_SLI_INTF_FAMILY_G6 ||
+		      if_type == LPFC_SLI_INTF_IF_TYPE_6))) &&
 		    val == 4) {
 			lpfc_printf_vlog(vport, KERN_ERR, LOG_INIT,
 				"3114 Loop mode not supported\n");
@@ -5412,9 +5594,9 @@ LPFC_VPORT_ATTR_R(fcp_class, 3, 2, 3,
 
 /*
 # lpfc_use_adisc: Use ADISC for FCP rediscovery instead of PLOGI. Value range
-# is [0,1]. Default value is 0.
+# is [0,1]. Default value is 1.
 */
-LPFC_VPORT_ATTR_RW(use_adisc, 0, 0, 1,
+LPFC_VPORT_ATTR_RW(use_adisc, 1, 0, 1,
 		   "Use ADISC on rediscovery to authenticate FCP devices");
 
 /*
@@ -6146,6 +6328,19 @@ LPFC_ATTR_RW(ras_fwlog_func, 0, 0, 7, "Firmware Logging Enabled on Function");
  */
 LPFC_BBCR_ATTR_RW(enable_bbcr, 1, 0, 1, "Enable BBC Recovery");
 
+/* Signaling module parameters */
+int lpfc_fabric_cgn_frequency = 100; /* 100 ms default */
+module_param(lpfc_fabric_cgn_frequency, int, 0444);
+MODULE_PARM_DESC(lpfc_fabric_cgn_frequency, "Congestion signaling fabric freq");
+
+int lpfc_acqe_cgn_frequency = 10; /* 10 sec default */
+module_param(lpfc_acqe_cgn_frequency, int, 0444);
+MODULE_PARM_DESC(lpfc_acqe_cgn_frequency, "Congestion signaling ACQE freq");
+
+int lpfc_use_cgn_signal = 1; /* 0 - only use FPINs, 1 - Use signals if avail  */
+module_param(lpfc_use_cgn_signal, int, 0444);
+MODULE_PARM_DESC(lpfc_use_cgn_signal, "Use Congestion signaling if available");
+
 /*
  * lpfc_enable_dpp: Enable DPP on G7
  *       0  = DPP on G7 disabled
@@ -6320,6 +6515,7 @@ struct device_attribute *lpfc_hba_attrs[] = {
 	&dev_attr_lpfc_enable_bbcr,
 	&dev_attr_lpfc_enable_dpp,
 	&dev_attr_lpfc_enable_mi,
+	&dev_attr_cmf_info,
 	&dev_attr_lpfc_max_vmid,
 	&dev_attr_lpfc_vmid_inactivity_timeout,
 	&dev_attr_lpfc_vmid_app_header,
@@ -6350,6 +6546,7 @@ struct device_attribute *lpfc_vport_attrs[] = {
 	&dev_attr_lpfc_max_scsicmpl_time,
 	&dev_attr_lpfc_stat_data_ctrl,
 	&dev_attr_lpfc_static_vport,
+	&dev_attr_cmf_info,
 	NULL,
 };
 
@@ -6741,6 +6938,9 @@ lpfc_get_host_speed(struct Scsi_Host *shost)
 		case LPFC_LINK_SPEED_128GHZ:
 			fc_host_speed(shost) = FC_PORTSPEED_128GBIT;
 			break;
+		case LPFC_LINK_SPEED_256GHZ:
+			fc_host_speed(shost) = FC_PORTSPEED_256GBIT;
+			break;
 		default:
 			fc_host_speed(shost) = FC_PORTSPEED_UNKNOWN;
 			break;
@@ -6908,6 +7108,9 @@ lpfc_get_stats(struct Scsi_Host *shost)
 	hs->invalid_crc_count = pmb->un.varRdLnk.crcCnt;
 	hs->error_frames = pmb->un.varRdLnk.crcCnt;
 
+	hs->cn_sig_warn = atomic64_read(&phba->cgn_acqe_stat.warn);
+	hs->cn_sig_alarm = atomic64_read(&phba->cgn_acqe_stat.alarm);
+
 	hs->link_failure_count -= lso->link_failure_count;
 	hs->loss_of_sync_count -= lso->loss_of_sync_count;
 	hs->loss_of_signal_count -= lso->loss_of_signal_count;
@@ -7019,6 +7222,12 @@ lpfc_reset_stats(struct Scsi_Host *shost)
 	else
 		lso->link_events = (phba->fc_eventTag >> 1);
 
+	atomic64_set(&phba->cgn_acqe_stat.warn, 0);
+	atomic64_set(&phba->cgn_acqe_stat.alarm, 0);
+
+	memset(&shost_to_fc_host(shost)->fpin_stats, 0,
+	       sizeof(shost_to_fc_host(shost)->fpin_stats));
+
 	psli->stats_start = ktime_get_seconds();
 
 	mempool_free(pmboxq, phba->mbox_mem_pool);
@@ -7452,6 +7661,12 @@ lpfc_get_cfgparam(struct lpfc_hba *phba)
 	lpfc_enable_dpp_init(phba, lpfc_enable_dpp);
 	lpfc_enable_mi_init(phba, lpfc_enable_mi);
 
+	phba->cgn_p.cgn_param_mode = LPFC_CFG_OFF;
+	phba->cmf_active_mode = LPFC_CFG_OFF;
+	if (lpfc_fabric_cgn_frequency > EDC_CG_SIGFREQ_CNT_MAX ||
+	   lpfc_fabric_cgn_frequency < EDC_CG_SIGFREQ_CNT_MIN)
+		lpfc_fabric_cgn_frequency = 100; /* 100 ms default */
+
 	if (phba->sli_rev != LPFC_SLI_REV4) {
 		/* NVME only supported on SLI4 */
 		phba->nvmet_support = 0;
diff --git a/drivers/scsi/lpfc/lpfc_bsg.c b/drivers/scsi/lpfc/lpfc_bsg.c
index 38cfe1bc6a4d..fdf08cb57207 100644
--- a/drivers/scsi/lpfc/lpfc_bsg.c
+++ b/drivers/scsi/lpfc/lpfc_bsg.c
@@ -5751,6 +5751,92 @@ job_error:
 
 }
 
+static int
+lpfc_get_cgnbuf_info(struct bsg_job *job)
+{
+	struct lpfc_vport *vport = shost_priv(fc_bsg_to_shost(job));
+	struct lpfc_hba *phba = vport->phba;
+	struct fc_bsg_request *bsg_request = job->request;
+	struct fc_bsg_reply *bsg_reply = job->reply;
+	struct get_cgnbuf_info_req *cgnbuf_req;
+	struct lpfc_cgn_info *cp;
+	uint8_t *cgn_buff;
+	int size, cinfosz;
+	int  rc = 0;
+
+	if (job->request_len < sizeof(struct fc_bsg_request) +
+	    sizeof(struct get_cgnbuf_info_req)) {
+		rc = -ENOMEM;
+		goto job_exit;
+	}
+
+	if (!phba->sli4_hba.pc_sli4_params.cmf) {
+		rc = -ENOENT;
+		goto job_exit;
+	}
+
+	if (!phba->cgn_i || !phba->cgn_i->virt) {
+		rc = -ENOENT;
+		goto job_exit;
+	}
+
+	cp = phba->cgn_i->virt;
+	if (cp->cgn_info_version < LPFC_CGN_INFO_V3) {
+		rc = -EPERM;
+		goto job_exit;
+	}
+
+	cgnbuf_req = (struct get_cgnbuf_info_req *)
+		bsg_request->rqst_data.h_vendor.vendor_cmd;
+
+	/* For reset or size == 0 */
+	bsg_reply->reply_payload_rcv_len = 0;
+
+	if (cgnbuf_req->reset == LPFC_BSG_CGN_RESET_STAT) {
+		lpfc_init_congestion_stat(phba);
+		goto job_exit;
+	}
+
+	/* We don't want to include the CRC at the end */
+	cinfosz = sizeof(struct lpfc_cgn_info) - sizeof(uint32_t);
+
+	size = cgnbuf_req->read_size;
+	if (!size)
+		goto job_exit;
+
+	if (size < cinfosz) {
+		/* Just copy back what we can */
+		cinfosz = size;
+		rc = -E2BIG;
+	}
+
+	/* Allocate memory to read congestion info */
+	cgn_buff = vmalloc(cinfosz);
+	if (!cgn_buff) {
+		rc = -ENOMEM;
+		goto job_exit;
+	}
+
+	memcpy(cgn_buff, cp, cinfosz);
+
+	bsg_reply->reply_payload_rcv_len =
+		sg_copy_from_buffer(job->reply_payload.sg_list,
+				    job->reply_payload.sg_cnt,
+				    cgn_buff, cinfosz);
+
+	vfree(cgn_buff);
+
+job_exit:
+	bsg_reply->result = rc;
+	if (!rc)
+		bsg_job_done(job, bsg_reply->result,
+			     bsg_reply->reply_payload_rcv_len);
+	else
+		lpfc_printf_log(phba, KERN_ERR, LOG_LIBDFC,
+				"2724 GET CGNBUF error: %d\n", rc);
+	return rc;
+}
+
 /**
  * lpfc_bsg_hst_vendor - process a vendor-specific fc_bsg_job
  * @job: fc_bsg_job to handle
@@ -5813,6 +5899,9 @@ lpfc_bsg_hst_vendor(struct bsg_job *job)
 	case LPFC_BSG_VENDOR_GET_TRUNK_INFO:
 		rc = lpfc_get_trunk_info(job);
 		break;
+	case LPFC_BSG_VENDOR_GET_CGNBUF_INFO:
+		rc = lpfc_get_cgnbuf_info(job);
+		break;
 	default:
 		rc = -EINVAL;
 		bsg_reply->reply_payload_rcv_len = 0;
diff --git a/drivers/scsi/lpfc/lpfc_bsg.h b/drivers/scsi/lpfc/lpfc_bsg.h
index 2dc71243775d..749d6c43cfce 100644
--- a/drivers/scsi/lpfc/lpfc_bsg.h
+++ b/drivers/scsi/lpfc/lpfc_bsg.h
@@ -1,7 +1,7 @@
 /*******************************************************************
  * This file is part of the Emulex Linux Device Driver for         *
  * Fibre Channel Host Bus Adapters.                                *
- * Copyright (C) 2017-2019 Broadcom. All Rights Reserved. The term *
+ * Copyright (C) 2017-2021 Broadcom. All Rights Reserved. The term *
  * “Broadcom” refers to Broadcom Inc. and/or its subsidiaries.     *
  * Copyright (C) 2010-2015 Emulex.  All rights reserved.           *
  * EMULEX and SLI are trademarks of Emulex.                        *
@@ -43,6 +43,7 @@
 #define LPFC_BSG_VENDOR_RAS_GET_CONFIG		18
 #define LPFC_BSG_VENDOR_RAS_SET_CONFIG		19
 #define LPFC_BSG_VENDOR_GET_TRUNK_INFO		20
+#define LPFC_BSG_VENDOR_GET_CGNBUF_INFO		21
 
 struct set_ct_event {
 	uint32_t command;
@@ -386,6 +387,13 @@ struct get_trunk_info_req {
 	uint32_t command;
 };
 
+struct get_cgnbuf_info_req {
+	uint32_t command;
+	uint32_t read_size;
+	uint32_t reset;
+#define LPFC_BSG_CGN_RESET_STAT		1
+};
+
 /* driver only */
 #define SLI_CONFIG_NOT_HANDLED		0
 #define SLI_CONFIG_HANDLED		1
diff --git a/drivers/scsi/lpfc/lpfc_crtn.h b/drivers/scsi/lpfc/lpfc_crtn.h
index 737483c3f01d..c512f4199142 100644
--- a/drivers/scsi/lpfc/lpfc_crtn.h
+++ b/drivers/scsi/lpfc/lpfc_crtn.h
@@ -58,6 +58,8 @@ void lpfc_request_features(struct lpfc_hba *, struct lpfcMboxq *);
 int lpfc_sli4_mbox_rsrc_extent(struct lpfc_hba *, struct lpfcMboxq *,
 			   uint16_t, uint16_t, bool);
 int lpfc_get_sli4_parameters(struct lpfc_hba *, LPFC_MBOXQ_t *);
+int lpfc_reg_congestion_buf(struct lpfc_hba *phba);
+int lpfc_unreg_congestion_buf(struct lpfc_hba *phba);
 struct lpfc_vport *lpfc_find_vport_by_did(struct lpfc_hba *, uint32_t);
 void lpfc_cleanup_rcv_buffers(struct lpfc_vport *);
 void lpfc_rcv_seq_check_edtov(struct lpfc_vport *);
@@ -74,6 +76,20 @@ int lpfc_init_iocb_list(struct lpfc_hba *phba, int cnt);
 void lpfc_free_iocb_list(struct lpfc_hba *phba);
 int lpfc_post_rq_buffer(struct lpfc_hba *phba, struct lpfc_queue *hrq,
 			struct lpfc_queue *drq, int count, int idx);
+uint32_t lpfc_calc_cmf_latency(struct lpfc_hba *phba);
+void lpfc_cmf_signal_init(struct lpfc_hba *phba);
+void lpfc_cmf_start(struct lpfc_hba *phba);
+void lpfc_cmf_stop(struct lpfc_hba *phba);
+void lpfc_init_congestion_stat(struct lpfc_hba *phba);
+void lpfc_init_congestion_buf(struct lpfc_hba *phba);
+int lpfc_sli4_cgn_params_read(struct lpfc_hba *phba);
+uint32_t lpfc_cgn_calc_crc32(void *bufp, uint32_t sz, uint32_t seed);
+int lpfc_config_cgn_signal(struct lpfc_hba *phba);
+int lpfc_issue_cmf_sync_wqe(struct lpfc_hba *phba, u32 ms, u64 total);
+void lpfc_cgn_dump_rxmonitor(struct lpfc_hba *phba);
+void lpfc_cgn_update_stat(struct lpfc_hba *phba, uint32_t dtag);
+void lpfc_unblock_requests(struct lpfc_hba *phba);
+void lpfc_block_requests(struct lpfc_hba *phba);
 
 void lpfc_mbx_cmpl_local_config_link(struct lpfc_hba *, LPFC_MBOXQ_t *);
 void lpfc_mbx_cmpl_reg_login(struct lpfc_hba *, LPFC_MBOXQ_t *);
@@ -87,6 +103,8 @@ void lpfc_unregister_vfi_cmpl(struct lpfc_hba *, LPFC_MBOXQ_t *);
 void lpfc_enqueue_node(struct lpfc_vport *, struct lpfc_nodelist *);
 void lpfc_dequeue_node(struct lpfc_vport *, struct lpfc_nodelist *);
 void lpfc_nlp_set_state(struct lpfc_vport *, struct lpfc_nodelist *, int);
+void lpfc_nlp_reg_node(struct lpfc_vport *vport, struct lpfc_nodelist *ndlp);
+void lpfc_nlp_unreg_node(struct lpfc_vport *vport, struct lpfc_nodelist *ndlp);
 void lpfc_drop_node(struct lpfc_vport *, struct lpfc_nodelist *);
 void lpfc_set_disctmo(struct lpfc_vport *);
 int  lpfc_can_disctmo(struct lpfc_vport *);
@@ -141,6 +159,8 @@ int lpfc_issue_els_scr(struct lpfc_vport *vport, uint8_t retry);
 int lpfc_issue_els_rscn(struct lpfc_vport *vport, uint8_t retry);
 int lpfc_issue_fabric_reglogin(struct lpfc_vport *);
 int lpfc_issue_els_rdf(struct lpfc_vport *vport, uint8_t retry);
+int lpfc_issue_els_edc(struct lpfc_vport *vport, uint8_t retry);
+void lpfc_els_rcv_fpin(struct lpfc_vport *vport, void *p, u32 fpin_length);
 int lpfc_els_free_iocb(struct lpfc_hba *, struct lpfc_iocbq *);
 int lpfc_ct_free_iocb(struct lpfc_hba *, struct lpfc_iocbq *);
 int lpfc_els_rsp_acc(struct lpfc_vport *, uint32_t, struct lpfc_iocbq *,
@@ -213,6 +233,9 @@ irqreturn_t lpfc_sli_fp_intr_handler(int, void *);
 irqreturn_t lpfc_sli4_intr_handler(int, void *);
 irqreturn_t lpfc_sli4_hba_intr_handler(int, void *);
 
+int lpfc_read_object(struct lpfc_hba *phba, char *s, uint32_t *datap,
+		     uint32_t len);
+
 void lpfc_sli4_cleanup_poll_list(struct lpfc_hba *phba);
 int lpfc_sli4_poll_eq(struct lpfc_queue *q, uint8_t path);
 void lpfc_sli4_poll_hbtimer(struct timer_list *t);
@@ -459,6 +482,9 @@ void lpfc_free_fast_evt(struct lpfc_hba *, struct lpfc_fast_path_event *);
 void lpfc_create_static_vport(struct lpfc_hba *);
 void lpfc_stop_hba_timers(struct lpfc_hba *);
 void lpfc_stop_port(struct lpfc_hba *);
+int lpfc_update_cmf_cmd(struct lpfc_hba *phba, uint32_t sz);
+int lpfc_update_cmf_cmpl(struct lpfc_hba *phba, uint64_t val, uint32_t sz,
+			 struct Scsi_Host *shost);
 void __lpfc_sli4_stop_fcf_redisc_wait_timer(struct lpfc_hba *);
 void lpfc_sli4_stop_fcf_redisc_wait_timer(struct lpfc_hba *);
 void lpfc_parse_fcoe_conf(struct lpfc_hba *, uint8_t *, uint32_t);
@@ -605,6 +631,10 @@ extern int lpfc_enable_nvmet_cnt;
 extern unsigned long long lpfc_enable_nvmet[];
 extern int lpfc_no_hba_reset_cnt;
 extern unsigned long lpfc_no_hba_reset[];
+extern int lpfc_acqe_cgn_frequency;
+extern int lpfc_fabric_cgn_frequency;
+extern int lpfc_use_cgn_signal;
+
 extern union lpfc_wqe128 lpfc_iread_cmd_template;
 extern union lpfc_wqe128 lpfc_iwrite_cmd_template;
 extern union lpfc_wqe128 lpfc_icmnd_cmd_template;
diff --git a/drivers/scsi/lpfc/lpfc_ct.c b/drivers/scsi/lpfc/lpfc_ct.c
index 610b6dabb3b5..dfcb7d4bd7fa 100644
--- a/drivers/scsi/lpfc/lpfc_ct.c
+++ b/drivers/scsi/lpfc/lpfc_ct.c
@@ -2288,6 +2288,8 @@ lpfc_cmpl_ct_disc_fdmi(struct lpfc_hba *phba, struct lpfc_iocbq *cmdiocb,
 			/* No retry on Vendor, RPA only done on physical port */
 			if (phba->link_flag & LS_CT_VEN_RPA) {
 				phba->link_flag &= ~LS_CT_VEN_RPA;
+				if (phba->cmf_active_mode == LPFC_CFG_OFF)
+					return;
 				lpfc_printf_log(phba, KERN_ERR,
 						LOG_DISCOVERY | LOG_ELS,
 						"6460 VEN FDMI RPA failure\n");
@@ -2332,24 +2334,29 @@ lpfc_cmpl_ct_disc_fdmi(struct lpfc_hba *phba, struct lpfc_iocbq *cmdiocb,
 		break;
 	case SLI_MGMT_RPA:
 		if (vport->port_type == LPFC_PHYSICAL_PORT &&
-		    phba->cfg_enable_mi &&
-		    phba->sli4_hba.pc_sli4_params.mi_ver > LPFC_MIB1_SUPPORT) {
+		    phba->sli4_hba.pc_sli4_params.mi_ver) {
 			/* mi is only for the phyical port, no vports */
 			if (phba->link_flag & LS_CT_VEN_RPA) {
 				lpfc_printf_vlog(vport, KERN_INFO,
-						 LOG_DISCOVERY | LOG_ELS,
+						 LOG_DISCOVERY | LOG_ELS |
+						 LOG_CGN_MGMT,
 						 "6449 VEN RPA FDMI Success\n");
 				phba->link_flag &= ~LS_CT_VEN_RPA;
 				break;
 			}
 
+			lpfc_printf_log(phba, KERN_INFO, LOG_CGN_MGMT,
+					"6210 Issue Vendor MI FDMI %x\n",
+					phba->sli4_hba.pc_sli4_params.mi_ver);
+
+			/* CGN is only for the physical port, no vports */
 			if (lpfc_fdmi_cmd(vport, ndlp, cmd,
 					  LPFC_FDMI_VENDOR_ATTR_mi) == 0)
 				phba->link_flag |= LS_CT_VEN_RPA;
 			lpfc_printf_log(phba, KERN_INFO,
 					LOG_DISCOVERY | LOG_ELS,
 					"6458 Send MI FDMI:%x Flag x%x\n",
-					phba->sli4_hba.pc_sli4_params.mi_value,
+					phba->sli4_hba.pc_sli4_params.mi_ver,
 					phba->link_flag);
 		} else {
 			lpfc_printf_log(phba, KERN_INFO,
@@ -2846,6 +2853,8 @@ lpfc_fdmi_port_attr_support_speed(struct lpfc_vport *vport,
 
 	ae->un.AttrInt = 0;
 	if (!(phba->hba_flag & HBA_FCOE_MODE)) {
+		if (phba->lmt & LMT_256Gb)
+			ae->un.AttrInt |= HBA_PORTSPEED_256GFC;
 		if (phba->lmt & LMT_128Gb)
 			ae->un.AttrInt |= HBA_PORTSPEED_128GFC;
 		if (phba->lmt & LMT_64Gb)
@@ -2927,6 +2936,9 @@ lpfc_fdmi_port_attr_speed(struct lpfc_vport *vport,
 		case LPFC_LINK_SPEED_128GHZ:
 			ae->un.AttrInt = HBA_PORTSPEED_128GFC;
 			break;
+		case LPFC_LINK_SPEED_256GHZ:
+			ae->un.AttrInt = HBA_PORTSPEED_256GFC;
+			break;
 		default:
 			ae->un.AttrInt = HBA_PORTSPEED_UNKNOWN;
 			break;
@@ -3343,7 +3355,7 @@ lpfc_fdmi_vendor_attr_mi(struct lpfc_vport *vport,
 	ae = (struct lpfc_fdmi_attr_entry *)&ad->AttrValue;
 	memset(ae, 0, 256);
 	sprintf(mibrevision, "ELXE2EM:%04d",
-		phba->sli4_hba.pc_sli4_params.mi_value);
+		phba->sli4_hba.pc_sli4_params.mi_ver);
 	strncpy(ae->un.AttrString, &mibrevision[0], sizeof(ae->un.AttrString));
 	len = strnlen(ae->un.AttrString, sizeof(ae->un.AttrString));
 	len += (len & 3) ? (4 - (len & 3)) : 4;
@@ -3884,9 +3896,8 @@ lpfc_cmpl_ct_cmd_vmid(struct lpfc_hba *phba, struct lpfc_iocbq *cmdiocb,
 /**
  * lpfc_vmid_cmd - Build and send a FDMI cmd to the specified NPort
  * @vport: pointer to a host virtual N_Port data structure.
- * @ndlp: ndlp to send FDMI cmd to (if NULL use FDMI_DID)
- * cmdcode: FDMI command to send
- * mask: Mask of HBA or PORT Attributes to send
+ * @cmdcode: application server command code to send
+ * @vmid: pointer to vmid info structure
  *
  * Builds and sends a FDMI command using the CT subsystem.
  */
diff --git a/drivers/scsi/lpfc/lpfc_debugfs.c b/drivers/scsi/lpfc/lpfc_debugfs.c
index 6ff85ae57e79..bd6d459afce5 100644
--- a/drivers/scsi/lpfc/lpfc_debugfs.c
+++ b/drivers/scsi/lpfc/lpfc_debugfs.c
@@ -5429,6 +5429,180 @@ lpfc_idiag_extacc_read(struct file *file, char __user *buf, size_t nbytes,
 	return simple_read_from_buffer(buf, nbytes, ppos, pbuffer, len);
 }
 
+static int
+lpfc_cgn_buffer_open(struct inode *inode, struct file *file)
+{
+	struct lpfc_debug *debug;
+	int rc = -ENOMEM;
+
+	debug = kmalloc(sizeof(*debug), GFP_KERNEL);
+	if (!debug)
+		goto out;
+
+	debug->buffer = vmalloc(LPFC_CGN_BUF_SIZE);
+	if (!debug->buffer) {
+		kfree(debug);
+		goto out;
+	}
+
+	debug->i_private = inode->i_private;
+	file->private_data = debug;
+
+	rc = 0;
+out:
+	return rc;
+}
+
+static ssize_t
+lpfc_cgn_buffer_read(struct file *file, char __user *buf, size_t nbytes,
+		     loff_t *ppos)
+{
+	struct lpfc_debug *debug = file->private_data;
+	struct lpfc_hba *phba = (struct lpfc_hba *)debug->i_private;
+	char *buffer = debug->buffer;
+	uint32_t *ptr;
+	int cnt, len = 0;
+
+	if (!phba->sli4_hba.pc_sli4_params.mi_ver || !phba->cgn_i) {
+		len += scnprintf(buffer + len, LPFC_CGN_BUF_SIZE - len,
+				 "Congestion Mgmt is not supported\n");
+		goto out;
+	}
+	ptr = (uint32_t *)phba->cgn_i->virt;
+	len += scnprintf(buffer + len, LPFC_CGN_BUF_SIZE - len,
+			 "Congestion Buffer Header\n");
+	/* Dump the first 32 bytes */
+	cnt = 32;
+	len += scnprintf(buffer + len, LPFC_CGN_BUF_SIZE - len,
+			 "000: %08x %08x %08x %08x %08x %08x %08x %08x\n",
+			 *ptr, *(ptr + 1), *(ptr + 2), *(ptr + 3),
+			 *(ptr + 4), *(ptr + 5), *(ptr + 6), *(ptr + 7));
+	ptr += 8;
+	len += scnprintf(buffer + len, LPFC_CGN_BUF_SIZE - len,
+			 "Congestion Buffer Data\n");
+	while (cnt < sizeof(struct lpfc_cgn_info)) {
+		if (len > (LPFC_CGN_BUF_SIZE - LPFC_DEBUG_OUT_LINE_SZ)) {
+			len += scnprintf(buffer + len, LPFC_CGN_BUF_SIZE - len,
+					 "Truncated . . .\n");
+			break;
+		}
+		len += scnprintf(buffer + len, LPFC_CGN_BUF_SIZE - len,
+				 "%03x: %08x %08x %08x %08x "
+				 "%08x %08x %08x %08x\n",
+				 cnt, *ptr, *(ptr + 1), *(ptr + 2),
+				 *(ptr + 3), *(ptr + 4), *(ptr + 5),
+				 *(ptr + 6), *(ptr + 7));
+		cnt += 32;
+		ptr += 8;
+	}
+out:
+	return simple_read_from_buffer(buf, nbytes, ppos, buffer, len);
+}
+
+static int
+lpfc_cgn_buffer_release(struct inode *inode, struct file *file)
+{
+	struct lpfc_debug *debug = file->private_data;
+
+	vfree(debug->buffer);
+	kfree(debug);
+
+	return 0;
+}
+
+static int
+lpfc_rx_monitor_open(struct inode *inode, struct file *file)
+{
+	struct lpfc_rx_monitor_debug *debug;
+	int rc = -ENOMEM;
+
+	debug = kmalloc(sizeof(*debug), GFP_KERNEL);
+	if (!debug)
+		goto out;
+
+	debug->buffer = vmalloc(MAX_DEBUGFS_RX_TABLE_SIZE);
+	if (!debug->buffer) {
+		kfree(debug);
+		goto out;
+	}
+
+	debug->i_private = inode->i_private;
+	file->private_data = debug;
+
+	rc = 0;
+out:
+	return rc;
+}
+
+static ssize_t
+lpfc_rx_monitor_read(struct file *file, char __user *buf, size_t nbytes,
+		     loff_t *ppos)
+{
+	struct lpfc_rx_monitor_debug *debug = file->private_data;
+	struct lpfc_hba *phba = (struct lpfc_hba *)debug->i_private;
+	char *buffer = debug->buffer;
+	struct rxtable_entry *entry;
+	int i, len = 0, head, tail, last, start;
+
+	head = atomic_read(&phba->rxtable_idx_head);
+	while (head == LPFC_RXMONITOR_TABLE_IN_USE) {
+		/* Table is getting updated */
+		msleep(20);
+		head = atomic_read(&phba->rxtable_idx_head);
+	}
+
+	tail = atomic_xchg(&phba->rxtable_idx_tail, head);
+	if (!phba->rxtable || head == tail) {
+		len += scnprintf(buffer + len, MAX_DEBUGFS_RX_TABLE_SIZE - len,
+				"Rxtable is empty\n");
+		goto out;
+	}
+	last = (head > tail) ?  head : LPFC_MAX_RXMONITOR_ENTRY;
+	start = tail;
+
+	len += scnprintf(buffer + len, MAX_DEBUGFS_RX_TABLE_SIZE - len,
+			"        MaxBPI\t Total Data Cmd  Total Data Cmpl "
+			"  Latency(us)    Avg IO Size\tMax IO Size   IO cnt "
+			"Info BWutil(ms)\n");
+get_table:
+	for (i = start; i < last; i++) {
+		entry = &phba->rxtable[i];
+		len += scnprintf(buffer + len, MAX_DEBUGFS_RX_TABLE_SIZE - len,
+				"%3d:%12lld  %12lld\t%12lld\t"
+				"%8lldus\t%8lld\t%10lld "
+				"%8d   %2d %2d(%2d)\n",
+				i, entry->max_bytes_per_interval,
+				entry->total_bytes,
+				entry->rcv_bytes,
+				entry->avg_io_latency,
+				entry->avg_io_size,
+				entry->max_read_cnt,
+				entry->io_cnt,
+				entry->cmf_info,
+				entry->timer_utilization,
+				entry->timer_interval);
+	}
+
+	if (head != last) {
+		start = 0;
+		last = head;
+		goto get_table;
+	}
+out:
+	return simple_read_from_buffer(buf, nbytes, ppos, buffer, len);
+}
+
+static int
+lpfc_rx_monitor_release(struct inode *inode, struct file *file)
+{
+	struct lpfc_rx_monitor_debug *debug = file->private_data;
+
+	vfree(debug->buffer);
+	kfree(debug);
+
+	return 0;
+}
+
 #undef lpfc_debugfs_op_disc_trc
 static const struct file_operations lpfc_debugfs_op_disc_trc = {
 	.owner =        THIS_MODULE,
@@ -5657,6 +5831,23 @@ static const struct file_operations lpfc_idiag_op_extAcc = {
 	.write =        lpfc_idiag_extacc_write,
 	.release =      lpfc_idiag_cmd_release,
 };
+#undef lpfc_cgn_buffer_op
+static const struct file_operations lpfc_cgn_buffer_op = {
+	.owner =        THIS_MODULE,
+	.open =         lpfc_cgn_buffer_open,
+	.llseek =       lpfc_debugfs_lseek,
+	.read =         lpfc_cgn_buffer_read,
+	.release =      lpfc_cgn_buffer_release,
+};
+
+#undef lpfc_rx_monitor_op
+static const struct file_operations lpfc_rx_monitor_op = {
+	.owner =        THIS_MODULE,
+	.open =         lpfc_rx_monitor_open,
+	.llseek =       lpfc_debugfs_lseek,
+	.read =         lpfc_rx_monitor_read,
+	.release =      lpfc_rx_monitor_release,
+};
 #endif
 
 /* lpfc_idiag_mbxacc_dump_bsg_mbox - idiag debugfs dump bsg mailbox command
@@ -5907,6 +6098,32 @@ lpfc_debugfs_initialize(struct lpfc_vport *vport)
 			goto debug_failed;
 		}
 
+		/* Congestion Info Buffer */
+		scnprintf(name, sizeof(name), "cgn_buffer");
+		phba->debug_cgn_buffer =
+			debugfs_create_file(name, S_IFREG | 0644,
+					    phba->hba_debugfs_root,
+					    phba, &lpfc_cgn_buffer_op);
+		if (!phba->debug_cgn_buffer) {
+			lpfc_printf_vlog(vport, KERN_ERR, LOG_INIT,
+					 "6527 Cannot create debugfs "
+					 "cgn_buffer\n");
+			goto debug_failed;
+		}
+
+		/* RX Monitor */
+		scnprintf(name, sizeof(name), "rx_monitor");
+		phba->debug_rx_monitor =
+			debugfs_create_file(name, S_IFREG | 0644,
+					    phba->hba_debugfs_root,
+					    phba, &lpfc_rx_monitor_op);
+		if (!phba->debug_rx_monitor) {
+			lpfc_printf_vlog(vport, KERN_ERR, LOG_INIT,
+					 "6528 Cannot create debugfs "
+					 "rx_monitor\n");
+			goto debug_failed;
+		}
+
 		/* RAS log */
 		snprintf(name, sizeof(name), "ras_log");
 		phba->debug_ras_log =
@@ -6335,6 +6552,12 @@ lpfc_debugfs_terminate(struct lpfc_vport *vport)
 		debugfs_remove(phba->debug_hbqinfo); /* hbqinfo */
 		phba->debug_hbqinfo = NULL;
 
+		debugfs_remove(phba->debug_cgn_buffer);
+		phba->debug_cgn_buffer = NULL;
+
+		debugfs_remove(phba->debug_rx_monitor);
+		phba->debug_rx_monitor = NULL;
+
 		debugfs_remove(phba->debug_ras_log);
 		phba->debug_ras_log = NULL;
 
diff --git a/drivers/scsi/lpfc/lpfc_debugfs.h b/drivers/scsi/lpfc/lpfc_debugfs.h
index 7ab6d3b08698..a5bf71b34972 100644
--- a/drivers/scsi/lpfc/lpfc_debugfs.h
+++ b/drivers/scsi/lpfc/lpfc_debugfs.h
@@ -1,7 +1,7 @@
 /*******************************************************************
  * This file is part of the Emulex Linux Device Driver for         *
  * Fibre Channel Host Bus Adapters.                                *
- * Copyright (C) 2017-2019 Broadcom. All Rights Reserved. The term *
+ * Copyright (C) 2017-2021 Broadcom. All Rights Reserved. The term *
  * “Broadcom” refers to Broadcom Inc. and/or its subsidiaries.     *
  * Copyright (C) 2007-2011 Emulex.  All rights reserved.           *
  * EMULEX and SLI are trademarks of Emulex.                        *
@@ -52,6 +52,9 @@
 /* scsistat output buffer size */
 #define LPFC_SCSISTAT_SIZE 8192
 
+/* Congestion Info Buffer size */
+#define LPFC_CGN_BUF_SIZE 8192
+
 #define LPFC_DEBUG_OUT_LINE_SZ	80
 
 /*
@@ -279,6 +282,12 @@ struct lpfc_idiag {
 	void *ptr_private;
 };
 
+#define MAX_DEBUGFS_RX_TABLE_SIZE	(100 * LPFC_MAX_RXMONITOR_ENTRY)
+struct lpfc_rx_monitor_debug {
+	char *i_private;
+	char *buffer;
+};
+
 #else
 
 #define lpfc_nvmeio_data(phba, fmt, arg...) \
diff --git a/drivers/scsi/lpfc/lpfc_disc.h b/drivers/scsi/lpfc/lpfc_disc.h
index 131374a61d7e..871b665bd72e 100644
--- a/drivers/scsi/lpfc/lpfc_disc.h
+++ b/drivers/scsi/lpfc/lpfc_disc.h
@@ -78,10 +78,11 @@ struct lpfc_node_rrqs {
 };
 
 enum lpfc_fc4_xpt_flags {
-	NLP_WAIT_FOR_UNREG = 0x1,
-	SCSI_XPT_REGD      = 0x2,
-	NVME_XPT_REGD      = 0x4,
-	NLP_XPT_HAS_HH     = 0x8,
+	NLP_XPT_REGD		= 0x1,
+	SCSI_XPT_REGD		= 0x2,
+	NVME_XPT_REGD		= 0x4,
+	NVME_XPT_UNREG_WAIT	= 0x8,
+	NLP_XPT_HAS_HH		= 0x10
 };
 
 struct lpfc_nodelist {
diff --git a/drivers/scsi/lpfc/lpfc_els.c b/drivers/scsi/lpfc/lpfc_els.c
index e481f5fe29d7..1254a575fd47 100644
--- a/drivers/scsi/lpfc/lpfc_els.c
+++ b/drivers/scsi/lpfc/lpfc_els.c
@@ -56,6 +56,9 @@ static int lpfc_issue_els_fdisc(struct lpfc_vport *vport,
 				struct lpfc_nodelist *ndlp, uint8_t retry);
 static int lpfc_issue_fabric_iocb(struct lpfc_hba *phba,
 				  struct lpfc_iocbq *iocb);
+static void lpfc_cmpl_els_edc(struct lpfc_hba *phba,
+			      struct lpfc_iocbq *cmdiocb,
+			      struct lpfc_iocbq *rspiocb);
 static void lpfc_cmpl_els_uvem(struct lpfc_hba *, struct lpfc_iocbq *,
 			       struct lpfc_iocbq *);
 
@@ -1664,6 +1667,12 @@ lpfc_plogi_confirm_nport(struct lpfc_hba *phba, uint32_t *prsp,
 	if (!new_ndlp || (new_ndlp == ndlp))
 		return ndlp;
 
+	/*
+	 * Unregister from backend if not done yet. Could have been skipped
+	 * due to ADISC
+	 */
+	lpfc_nlp_unreg_node(vport, new_ndlp);
+
 	if (phba->sli_rev == LPFC_SLI_REV4) {
 		active_rrqs_xri_bitmap = mempool_alloc(phba->active_rrq_pool,
 						       GFP_KERNEL);
@@ -2025,9 +2034,7 @@ lpfc_cmpl_els_plogi(struct lpfc_hba *phba, struct lpfc_iocbq *cmdiocb,
 				 irsp->un.ulpWord[4]);
 
 		/* Do not call DSM for lpfc_els_abort'ed ELS cmds */
-		if (lpfc_error_lost_link(irsp))
-			goto check_plogi;
-		else
+		if (!lpfc_error_lost_link(irsp))
 			lpfc_disc_state_machine(vport, ndlp, cmdiocb,
 						NLP_EVT_CMPL_PLOGI);
 
@@ -2080,7 +2087,6 @@ lpfc_cmpl_els_plogi(struct lpfc_hba *phba, struct lpfc_iocbq *cmdiocb,
 					NLP_EVT_CMPL_PLOGI);
 	}
 
- check_plogi:
 	if (disc && vport->num_disc_nodes) {
 		/* Check to see if there are more PLOGIs to be sent */
 		lpfc_more_plogi(vport);
@@ -2607,6 +2613,14 @@ lpfc_adisc_done(struct lpfc_vport *vport)
 	if ((phba->sli3_options & LPFC_SLI3_NPIV_ENABLED) &&
 	    !(vport->fc_flag & FC_RSCN_MODE) &&
 	    (phba->sli_rev < LPFC_SLI_REV4)) {
+
+		/*
+		 * If link is down, clear_la and reg_vpi will be done after
+		 * flogi following a link up event
+		 */
+		if (!lpfc_is_link_up(phba))
+			return;
+
 		/* The ADISCs are complete.  Doesn't matter if they
 		 * succeeded or failed because the ADISC completion
 		 * routine guarantees to call the state machine and
@@ -2749,12 +2763,9 @@ lpfc_cmpl_els_adisc(struct lpfc_hba *phba, struct lpfc_iocbq *cmdiocb,
 				 "2755 ADISC failure DID:%06X Status:x%x/x%x\n",
 				 ndlp->nlp_DID, irsp->ulpStatus,
 				 irsp->un.ulpWord[4]);
-		/* Do not call DSM for lpfc_els_abort'ed ELS cmds */
-		if (lpfc_error_lost_link(irsp))
-			goto check_adisc;
-		else
-			lpfc_disc_state_machine(vport, ndlp, cmdiocb,
-						NLP_EVT_CMPL_ADISC);
+
+		lpfc_disc_state_machine(vport, ndlp, cmdiocb,
+				NLP_EVT_CMPL_ADISC);
 
 		/* As long as this node is not registered with the SCSI or NVMe
 		 * transport, it is no longer an active node. Otherwise
@@ -2772,7 +2783,6 @@ lpfc_cmpl_els_adisc(struct lpfc_hba *phba, struct lpfc_iocbq *cmdiocb,
 		lpfc_disc_state_machine(vport, ndlp, cmdiocb,
 					NLP_EVT_CMPL_ADISC);
 
- check_adisc:
 	/* Check to see if there are more ADISCs to be sent */
 	if (disc && vport->num_disc_nodes)
 		lpfc_more_adisc(vport);
@@ -3253,7 +3263,7 @@ lpfc_cmpl_els_disc_cmd(struct lpfc_hba *phba, struct lpfc_iocbq *cmdiocb,
 		irsp->ulpStatus, irsp->un.ulpWord[4],
 		irsp->un.elsreq64.remoteID);
 	/* ELS cmd tag <ulpIoTag> completes */
-	lpfc_printf_vlog(vport, KERN_INFO, LOG_ELS,
+	lpfc_printf_vlog(vport, KERN_INFO, LOG_ELS | LOG_CGN_MGMT,
 			 "0217 ELS cmd tag x%x completes Data: x%x x%x x%x "
 			 "x%x\n",
 			 irsp->ulpIoTag, irsp->ulpStatus,
@@ -3279,6 +3289,9 @@ lpfc_cmpl_els_disc_cmd(struct lpfc_hba *phba, struct lpfc_iocbq *cmdiocb,
 			case ELS_CMD_SCR:
 				lpfc_issue_els_scr(vport, cmdiocb->retry);
 				break;
+			case ELS_CMD_EDC:
+				lpfc_issue_els_edc(vport, cmdiocb->retry);
+				break;
 			case ELS_CMD_RDF:
 				cmdiocb->context1 = NULL; /* save ndlp refcnt */
 				lpfc_issue_els_rdf(vport, cmdiocb->retry);
@@ -3288,6 +3301,11 @@ lpfc_cmpl_els_disc_cmd(struct lpfc_hba *phba, struct lpfc_iocbq *cmdiocb,
 		}
 		phba->fc_stat.elsRetryExceeded++;
 	}
+	if (cmd == ELS_CMD_EDC) {
+		/* must be called before checking uplStatus and returning */
+		lpfc_cmpl_els_edc(phba, cmdiocb, rspiocb);
+		return;
+	}
 	if (irsp->ulpStatus) {
 		/* ELS discovery cmd completes with error */
 		lpfc_printf_vlog(vport, KERN_WARNING, LOG_ELS,
@@ -3312,11 +3330,14 @@ lpfc_cmpl_els_disc_cmd(struct lpfc_hba *phba, struct lpfc_iocbq *cmdiocb,
 
 		for (i = 0; i < ELS_RDF_REG_TAG_CNT &&
 			    i < be32_to_cpu(prdf->reg_d1.reg_desc.count); i++)
-			lpfc_printf_vlog(vport, KERN_INFO, LOG_ELS,
-				 "4677 Fabric RDF Notification Grant Data: "
-				 "0x%08x\n",
-				 be32_to_cpu(
-					prdf->reg_d1.desc_tags[i]));
+			lpfc_printf_vlog(vport, KERN_INFO,
+					 LOG_ELS | LOG_CGN_MGMT,
+					 "4677 Fabric RDF Notification Grant "
+					 "Data: 0x%08x Reg: %x %x\n",
+					 be32_to_cpu(
+						prdf->reg_d1.desc_tags[i]),
+					 phba->cgn_reg_signal,
+					 phba->cgn_reg_fpin);
 	}
 
 out:
@@ -3375,6 +3396,7 @@ lpfc_issue_els_scr(struct lpfc_vport *vport, uint8_t retry)
 	if (phba->sli_rev == LPFC_SLI_REV4) {
 		rc = lpfc_reg_fab_ctrl_node(vport, ndlp);
 		if (rc) {
+			lpfc_els_free_iocb(phba, elsiocb);
 			lpfc_printf_vlog(vport, KERN_ERR, LOG_NODE,
 					 "0937 %s: Failed to reg fc node, rc %d\n",
 					 __func__, rc);
@@ -3413,7 +3435,6 @@ lpfc_issue_els_scr(struct lpfc_vport *vport, uint8_t retry)
 		return 1;
 	}
 
-	/* Keep the ndlp just in case RDF is being sent */
 	return 0;
 }
 
@@ -3657,28 +3678,15 @@ lpfc_issue_els_rdf(struct lpfc_vport *vport, uint8_t retry)
 		lpfc_enqueue_node(vport, ndlp);
 	}
 
-	/* RDF ELS is not required on an NPIV VN_Port.  */
-	if (vport->port_type == LPFC_NPIV_PORT) {
-		lpfc_nlp_put(ndlp);
+	/* RDF ELS is not required on an NPIV VN_Port. */
+	if (vport->port_type == LPFC_NPIV_PORT)
 		return -EACCES;
-	}
 
 	elsiocb = lpfc_prep_els_iocb(vport, 1, cmdsize, retry, ndlp,
 				     ndlp->nlp_DID, ELS_CMD_RDF);
 	if (!elsiocb)
 		return -ENOMEM;
 
-	if (phba->sli_rev == LPFC_SLI_REV4 &&
-	    !(ndlp->nlp_flag & NLP_RPI_REGISTERED)) {
-		lpfc_printf_vlog(vport, KERN_ERR, LOG_NODE,
-				 "0939 %s: FC_NODE x%x RPI x%x flag x%x "
-				 "ste x%x type x%x Not registered\n",
-				 __func__, ndlp->nlp_DID, ndlp->nlp_rpi,
-				 ndlp->nlp_flag, ndlp->nlp_state,
-				 ndlp->nlp_type);
-		return -ENODEV;
-	}
-
 	/* Configure the payload for the supported FPIN events. */
 	prdf = (struct lpfc_els_rdf_req *)
 		(((struct lpfc_dmabuf *)elsiocb->context2)->virt);
@@ -3695,10 +3703,12 @@ lpfc_issue_els_rdf(struct lpfc_vport *vport, uint8_t retry)
 	prdf->reg_d1.desc_tags[2] = cpu_to_be32(ELS_DTAG_PEER_CONGEST);
 	prdf->reg_d1.desc_tags[3] = cpu_to_be32(ELS_DTAG_CONGESTION);
 
-	lpfc_printf_vlog(vport, KERN_INFO, LOG_ELS,
-			 "6444 Xmit RDF to remote NPORT x%x\n",
-			 ndlp->nlp_DID);
+	lpfc_printf_vlog(vport, KERN_INFO, LOG_ELS | LOG_CGN_MGMT,
+			 "6444 Xmit RDF to remote NPORT x%x Reg: %x %x\n",
+			 ndlp->nlp_DID, phba->cgn_reg_signal,
+			 phba->cgn_reg_fpin);
 
+	phba->cgn_fpin_frequency = LPFC_FPIN_INIT_FREQ;
 	elsiocb->iocb_cmpl = lpfc_cmpl_els_disc_cmd;
 	elsiocb->context1 = lpfc_nlp_get(ndlp);
 	if (!elsiocb->context1) {
@@ -3739,7 +3749,7 @@ lpfc_els_rcv_rdf(struct lpfc_vport *vport, struct lpfc_iocbq *cmdiocb,
 {
 	/* Send LS_ACC */
 	if (lpfc_els_rsp_acc(vport, ELS_CMD_RDF, cmdiocb, ndlp, NULL)) {
-		lpfc_printf_vlog(vport, KERN_INFO, LOG_ELS,
+		lpfc_printf_vlog(vport, KERN_INFO, LOG_ELS | LOG_CGN_MGMT,
 				 "1623 Failed to RDF_ACC from x%x for x%x\n",
 				 ndlp->nlp_DID, vport->fc_myDID);
 		return -EIO;
@@ -3747,7 +3757,7 @@ lpfc_els_rcv_rdf(struct lpfc_vport *vport, struct lpfc_iocbq *cmdiocb,
 
 	/* Issue new RDF for reregistering */
 	if (lpfc_issue_els_rdf(vport, 0)) {
-		lpfc_printf_vlog(vport, KERN_INFO, LOG_ELS,
+		lpfc_printf_vlog(vport, KERN_INFO, LOG_ELS | LOG_CGN_MGMT,
 				 "2623 Failed to re register RDF for x%x\n",
 				 vport->fc_myDID);
 		return -EIO;
@@ -3757,6 +3767,448 @@ lpfc_els_rcv_rdf(struct lpfc_vport *vport, struct lpfc_iocbq *cmdiocb,
 }
 
 /**
+ * lpfc_least_capable_settings - helper function for EDC rsp processing
+ * @phba: pointer to lpfc hba data structure.
+ * @pcgd: pointer to congestion detection descriptor in EDC rsp.
+ *
+ * This helper routine determines the least capable setting for
+ * congestion signals, signal freq, including scale, from the
+ * congestion detection descriptor in the EDC rsp.  The routine
+ * sets @phba values in preparation for a set_featues mailbox.
+ **/
+static void
+lpfc_least_capable_settings(struct lpfc_hba *phba,
+			    struct fc_diag_cg_sig_desc *pcgd)
+{
+	u32 rsp_sig_cap = 0, drv_sig_cap = 0;
+	u32 rsp_sig_freq_cyc = 0, rsp_sig_freq_scale = 0;
+	struct lpfc_cgn_info *cp;
+	u32 crc;
+	u16 sig_freq;
+
+	/* Get rsp signal and frequency capabilities.  */
+	rsp_sig_cap = be32_to_cpu(pcgd->xmt_signal_capability);
+	rsp_sig_freq_cyc = be16_to_cpu(pcgd->xmt_signal_frequency.count);
+	rsp_sig_freq_scale = be16_to_cpu(pcgd->xmt_signal_frequency.units);
+
+	/* If the Fport does not support signals. Set FPIN only */
+	if (rsp_sig_cap == EDC_CG_SIG_NOTSUPPORTED)
+		goto out_no_support;
+
+	/* Apply the xmt scale to the xmt cycle to get the correct frequency.
+	 * Adapter default is 100 millisSeconds.  Convert all xmt cycle values
+	 * to milliSeconds.
+	 */
+	switch (rsp_sig_freq_scale) {
+	case EDC_CG_SIGFREQ_SEC:
+		rsp_sig_freq_cyc *= MSEC_PER_SEC;
+		break;
+	case EDC_CG_SIGFREQ_MSEC:
+		rsp_sig_freq_cyc = 1;
+		break;
+	default:
+		goto out_no_support;
+	}
+
+	/* Convenient shorthand. */
+	drv_sig_cap = phba->cgn_reg_signal;
+
+	/* Choose the least capable frequency. */
+	if (rsp_sig_freq_cyc > phba->cgn_sig_freq)
+		phba->cgn_sig_freq = rsp_sig_freq_cyc;
+
+	/* Should be some common signals support. Settle on least capable
+	 * signal and adjust FPIN values. Initialize defaults to ease the
+	 * decision.
+	 */
+	phba->cgn_reg_fpin = LPFC_CGN_FPIN_WARN | LPFC_CGN_FPIN_ALARM;
+	phba->cgn_reg_signal = EDC_CG_SIG_NOTSUPPORTED;
+	if (rsp_sig_cap == EDC_CG_SIG_WARN_ONLY &&
+	    (drv_sig_cap == EDC_CG_SIG_WARN_ONLY ||
+	     drv_sig_cap == EDC_CG_SIG_WARN_ALARM)) {
+		phba->cgn_reg_signal = EDC_CG_SIG_WARN_ONLY;
+		phba->cgn_reg_fpin &= ~LPFC_CGN_FPIN_WARN;
+	}
+	if (rsp_sig_cap == EDC_CG_SIG_WARN_ALARM) {
+		if (drv_sig_cap == EDC_CG_SIG_WARN_ALARM) {
+			phba->cgn_reg_signal = EDC_CG_SIG_WARN_ALARM;
+			phba->cgn_reg_fpin = LPFC_CGN_FPIN_NONE;
+		}
+		if (drv_sig_cap == EDC_CG_SIG_WARN_ONLY) {
+			phba->cgn_reg_signal = EDC_CG_SIG_WARN_ONLY;
+			phba->cgn_reg_fpin &= ~LPFC_CGN_FPIN_WARN;
+		}
+	}
+
+	if (!phba->cgn_i)
+		return;
+
+	/* Update signal frequency in congestion info buffer */
+	cp = (struct lpfc_cgn_info *)phba->cgn_i->virt;
+
+	/* Frequency (in ms) Signal Warning/Signal Congestion Notifications
+	 * are received by the HBA
+	 */
+	sig_freq = phba->cgn_sig_freq;
+
+	if (phba->cgn_reg_signal == EDC_CG_SIG_WARN_ONLY)
+		cp->cgn_warn_freq = cpu_to_le16(sig_freq);
+	if (phba->cgn_reg_signal == EDC_CG_SIG_WARN_ALARM) {
+		cp->cgn_alarm_freq = cpu_to_le16(sig_freq);
+		cp->cgn_warn_freq = cpu_to_le16(sig_freq);
+	}
+	crc = lpfc_cgn_calc_crc32(cp, LPFC_CGN_INFO_SZ, LPFC_CGN_CRC32_SEED);
+	cp->cgn_info_crc = cpu_to_le32(crc);
+	return;
+
+out_no_support:
+	phba->cgn_reg_signal = EDC_CG_SIG_NOTSUPPORTED;
+	phba->cgn_sig_freq = 0;
+	phba->cgn_reg_fpin = LPFC_CGN_FPIN_ALARM | LPFC_CGN_FPIN_WARN;
+}
+
+DECLARE_ENUM2STR_LOOKUP(lpfc_get_tlv_dtag_nm, fc_ls_tlv_dtag,
+			FC_LS_TLV_DTAG_INIT);
+
+/**
+ * lpfc_cmpl_els_edc - Completion callback function for EDC
+ * @phba: pointer to lpfc hba data structure.
+ * @cmdiocb: pointer to lpfc command iocb data structure.
+ * @rspiocb: pointer to lpfc response iocb data structure.
+ *
+ * This routine is the completion callback function for issuing the Exchange
+ * Diagnostic Capabilities (EDC) command. The driver issues an EDC to
+ * notify the FPort of its Congestion and Link Fault capabilities.  This
+ * routine parses the FPort's response and decides on the least common
+ * values applicable to both FPort and NPort for Warnings and Alarms that
+ * are communicated via hardware signals.
+ **/
+static void
+lpfc_cmpl_els_edc(struct lpfc_hba *phba, struct lpfc_iocbq *cmdiocb,
+		  struct lpfc_iocbq *rspiocb)
+{
+	IOCB_t *irsp;
+	struct fc_els_edc_resp *edc_rsp;
+	struct fc_tlv_desc *tlv;
+	struct fc_diag_cg_sig_desc *pcgd;
+	struct fc_diag_lnkflt_desc *plnkflt;
+	struct lpfc_dmabuf *pcmd, *prsp;
+	const char *dtag_nm;
+	u32 *pdata, dtag;
+	int desc_cnt = 0, bytes_remain;
+	bool rcv_cap_desc = false;
+	struct lpfc_nodelist *ndlp;
+
+	irsp = &rspiocb->iocb;
+	ndlp = cmdiocb->context1;
+
+	lpfc_debugfs_disc_trc(phba->pport, LPFC_DISC_TRC_ELS_CMD,
+			      "EDC cmpl:    status:x%x/x%x did:x%x",
+			      irsp->ulpStatus, irsp->un.ulpWord[4],
+			      irsp->un.elsreq64.remoteID);
+
+	/* ELS cmd tag <ulpIoTag> completes */
+	lpfc_printf_log(phba, KERN_INFO, LOG_ELS | LOG_CGN_MGMT,
+			"4201 EDC cmd tag x%x completes Data: x%x x%x x%x\n",
+			irsp->ulpIoTag, irsp->ulpStatus,
+			irsp->un.ulpWord[4], irsp->ulpTimeout);
+
+	pcmd = (struct lpfc_dmabuf *)cmdiocb->context2;
+	if (!pcmd)
+		goto out;
+
+	pdata = (u32 *)pcmd->virt;
+	if (!pdata)
+		goto out;
+
+	/* Need to clear signal values, send features MB and RDF with FPIN. */
+	if (irsp->ulpStatus)
+		goto out;
+
+	prsp = list_get_first(&pcmd->list, struct lpfc_dmabuf, list);
+	if (!prsp)
+		goto out;
+
+	edc_rsp = prsp->virt;
+	if (!edc_rsp)
+		goto out;
+
+	/* ELS cmd tag <ulpIoTag> completes */
+	lpfc_printf_log(phba, KERN_INFO, LOG_ELS | LOG_CGN_MGMT,
+			"4676 Fabric EDC Rsp: "
+			"0x%02x, 0x%08x\n",
+			edc_rsp->acc_hdr.la_cmd,
+			be32_to_cpu(edc_rsp->desc_list_len));
+
+	/*
+	 * Payload length in bytes is the response descriptor list
+	 * length minus the 12 bytes of Link Service Request
+	 * Information descriptor in the reply.
+	 */
+	bytes_remain = be32_to_cpu(edc_rsp->desc_list_len) -
+				   sizeof(struct fc_els_lsri_desc);
+	if (bytes_remain <= 0)
+		goto out;
+
+	tlv = edc_rsp->desc;
+
+	/*
+	 * cycle through EDC diagnostic descriptors to find the
+	 * congestion signaling capability descriptor
+	 */
+	while (bytes_remain) {
+		if (bytes_remain < FC_TLV_DESC_HDR_SZ) {
+			lpfc_printf_log(phba, KERN_WARNING, LOG_CGN_MGMT,
+					"6461 Truncated TLV hdr on "
+					"Diagnostic descriptor[%d]\n",
+					desc_cnt);
+			goto out;
+		}
+
+		dtag = be32_to_cpu(tlv->desc_tag);
+		switch (dtag) {
+		case ELS_DTAG_LNK_FAULT_CAP:
+			if (bytes_remain < FC_TLV_DESC_SZ_FROM_LENGTH(tlv) ||
+			    FC_TLV_DESC_SZ_FROM_LENGTH(tlv) !=
+					sizeof(struct fc_diag_lnkflt_desc)) {
+				lpfc_printf_log(
+					phba, KERN_WARNING, LOG_CGN_MGMT,
+					"6462 Truncated Link Fault Diagnostic "
+					"descriptor[%d]: %d vs 0x%zx 0x%zx\n",
+					desc_cnt, bytes_remain,
+					FC_TLV_DESC_SZ_FROM_LENGTH(tlv),
+					sizeof(struct fc_diag_cg_sig_desc));
+				goto out;
+			}
+			plnkflt = (struct fc_diag_lnkflt_desc *)tlv;
+			lpfc_printf_log(
+				phba, KERN_INFO, LOG_ELS | LOG_CGN_MGMT,
+				"4617 Link Fault Desc Data: 0x%08x 0x%08x "
+				"0x%08x 0x%08x 0x%08x\n",
+				be32_to_cpu(plnkflt->desc_tag),
+				be32_to_cpu(plnkflt->desc_len),
+				be32_to_cpu(
+					plnkflt->degrade_activate_threshold),
+				be32_to_cpu(
+					plnkflt->degrade_deactivate_threshold),
+				be32_to_cpu(plnkflt->fec_degrade_interval));
+			break;
+		case ELS_DTAG_CG_SIGNAL_CAP:
+			if (bytes_remain < FC_TLV_DESC_SZ_FROM_LENGTH(tlv) ||
+			    FC_TLV_DESC_SZ_FROM_LENGTH(tlv) !=
+					sizeof(struct fc_diag_cg_sig_desc)) {
+				lpfc_printf_log(
+					phba, KERN_WARNING, LOG_CGN_MGMT,
+					"6463 Truncated Cgn Signal Diagnostic "
+					"descriptor[%d]: %d vs 0x%zx 0x%zx\n",
+					desc_cnt, bytes_remain,
+					FC_TLV_DESC_SZ_FROM_LENGTH(tlv),
+					sizeof(struct fc_diag_cg_sig_desc));
+				goto out;
+			}
+
+			pcgd = (struct fc_diag_cg_sig_desc *)tlv;
+			lpfc_printf_log(
+				phba, KERN_INFO, LOG_ELS | LOG_CGN_MGMT,
+				"4616 CGN Desc Data: 0x%08x 0x%08x "
+				"0x%08x 0x%04x 0x%04x 0x%08x 0x%04x 0x%04x\n",
+				be32_to_cpu(pcgd->desc_tag),
+				be32_to_cpu(pcgd->desc_len),
+				be32_to_cpu(pcgd->xmt_signal_capability),
+				be32_to_cpu(pcgd->xmt_signal_frequency.count),
+				be32_to_cpu(pcgd->xmt_signal_frequency.units),
+				be32_to_cpu(pcgd->rcv_signal_capability),
+				be32_to_cpu(pcgd->rcv_signal_frequency.count),
+				be32_to_cpu(pcgd->rcv_signal_frequency.units));
+
+			/* Compare driver and Fport capabilities and choose
+			 * least common.
+			 */
+			lpfc_least_capable_settings(phba, pcgd);
+			rcv_cap_desc = true;
+			break;
+		default:
+			dtag_nm = lpfc_get_tlv_dtag_nm(dtag);
+			lpfc_printf_log(phba, KERN_WARNING, LOG_CGN_MGMT,
+					"4919 unknown Diagnostic "
+					"Descriptor[%d]: tag x%x (%s)\n",
+					desc_cnt, dtag, dtag_nm);
+		}
+
+		bytes_remain -= FC_TLV_DESC_SZ_FROM_LENGTH(tlv);
+		tlv = fc_tlv_next_desc(tlv);
+		desc_cnt++;
+	}
+
+out:
+	if (!rcv_cap_desc) {
+		phba->cgn_reg_fpin = LPFC_CGN_FPIN_ALARM | LPFC_CGN_FPIN_WARN;
+		phba->cgn_reg_signal = EDC_CG_SIG_NOTSUPPORTED;
+		phba->cgn_sig_freq = 0;
+		lpfc_printf_log(phba, KERN_WARNING, LOG_ELS | LOG_CGN_MGMT,
+				"4202 EDC rsp error - sending RDF "
+				"for FPIN only.\n");
+	}
+
+	lpfc_config_cgn_signal(phba);
+
+	/* Check to see if link went down during discovery */
+	lpfc_els_chk_latt(phba->pport);
+	lpfc_debugfs_disc_trc(phba->pport, LPFC_DISC_TRC_ELS_CMD,
+			      "EDC Cmpl:     did:x%x refcnt %d",
+			      ndlp->nlp_DID, kref_read(&ndlp->kref), 0);
+	lpfc_els_free_iocb(phba, cmdiocb);
+	lpfc_nlp_put(ndlp);
+}
+
+static void
+lpfc_format_edc_cgn_desc(struct lpfc_hba *phba, struct fc_diag_cg_sig_desc *cgd)
+{
+	/* We are assuming cgd was zero'ed before calling this routine */
+
+	/* Configure the congestion detection capability */
+	cgd->desc_tag = cpu_to_be32(ELS_DTAG_CG_SIGNAL_CAP);
+
+	/* Descriptor len doesn't include the tag or len fields. */
+	cgd->desc_len = cpu_to_be32(
+		FC_TLV_DESC_LENGTH_FROM_SZ(struct fc_diag_cg_sig_desc));
+
+	/* xmt_signal_capability already set to EDC_CG_SIG_NOTSUPPORTED.
+	 * xmt_signal_frequency.count already set to 0.
+	 * xmt_signal_frequency.units already set to 0.
+	 */
+
+	if (phba->cmf_active_mode == LPFC_CFG_OFF) {
+		/* rcv_signal_capability already set to EDC_CG_SIG_NOTSUPPORTED.
+		 * rcv_signal_frequency.count already set to 0.
+		 * rcv_signal_frequency.units already set to 0.
+		 */
+		phba->cgn_sig_freq = 0;
+		return;
+	}
+	switch (phba->cgn_reg_signal) {
+	case EDC_CG_SIG_WARN_ONLY:
+		cgd->rcv_signal_capability = cpu_to_be32(EDC_CG_SIG_WARN_ONLY);
+		break;
+	case EDC_CG_SIG_WARN_ALARM:
+		cgd->rcv_signal_capability = cpu_to_be32(EDC_CG_SIG_WARN_ALARM);
+		break;
+	default:
+		/* rcv_signal_capability left 0 thus no support */
+		break;
+	}
+
+	/* We start negotiation with lpfc_fabric_cgn_frequency, after
+	 * the completion we settle on the higher frequency.
+	 */
+	cgd->rcv_signal_frequency.count =
+		cpu_to_be16(lpfc_fabric_cgn_frequency);
+	cgd->rcv_signal_frequency.units =
+		cpu_to_be16(EDC_CG_SIGFREQ_MSEC);
+}
+
+ /**
+  * lpfc_issue_els_edc - Exchange Diagnostic Capabilities with the fabric.
+  * @vport: pointer to a host virtual N_Port data structure.
+  * @retry: retry counter for the command iocb.
+  *
+  * This routine issues an ELS EDC to the F-Port Controller to communicate
+  * this N_Port's support of hardware signals in its Congestion
+  * Capabilities Descriptor.
+  *
+  * Note: This routine does not check if one or more signals are
+  * set in the cgn_reg_signal parameter.  The caller makes the
+  * decision to enforce cgn_reg_signal as nonzero or zero depending
+  * on the conditions.  During Fabric requests, the driver
+  * requires cgn_reg_signals to be nonzero.  But a dynamic request
+  * to set the congestion mode to OFF from Monitor or Manage
+  * would correctly issue an EDC with no signals enabled to
+  * turn off switch functionality and then update the FW.
+  *
+  * Return code
+  *   0 - Successfully issued edc command
+  *   1 - Failed to issue edc command
+  **/
+int
+lpfc_issue_els_edc(struct lpfc_vport *vport, uint8_t retry)
+{
+	struct lpfc_hba  *phba = vport->phba;
+	struct lpfc_iocbq *elsiocb;
+	struct lpfc_els_edc_req *edc_req;
+	struct fc_diag_cg_sig_desc *cgn_desc;
+	u16 cmdsize;
+	struct lpfc_nodelist *ndlp;
+	u8 *pcmd = NULL;
+	u32 edc_req_size, cgn_desc_size;
+	int rc;
+
+	if (vport->port_type == LPFC_NPIV_PORT)
+		return -EACCES;
+
+	ndlp = lpfc_findnode_did(vport, Fabric_DID);
+	if (!ndlp || ndlp->nlp_state != NLP_STE_UNMAPPED_NODE)
+		return -ENODEV;
+
+	/* If HBA doesn't support signals, drop into RDF */
+	if (!phba->cgn_init_reg_signal)
+		goto try_rdf;
+
+	edc_req_size = sizeof(struct fc_els_edc);
+	cgn_desc_size = sizeof(struct fc_diag_cg_sig_desc);
+	cmdsize = edc_req_size + cgn_desc_size;
+	elsiocb = lpfc_prep_els_iocb(vport, 1, cmdsize, retry, ndlp,
+				     ndlp->nlp_DID, ELS_CMD_EDC);
+	if (!elsiocb)
+		goto try_rdf;
+
+	/* Configure the payload for the supported Diagnostics capabilities. */
+	pcmd = (u8 *)(((struct lpfc_dmabuf *)elsiocb->context2)->virt);
+	memset(pcmd, 0, cmdsize);
+	edc_req = (struct lpfc_els_edc_req *)pcmd;
+	edc_req->edc.desc_len = cpu_to_be32(cgn_desc_size);
+	edc_req->edc.edc_cmd = ELS_EDC;
+
+	cgn_desc = &edc_req->cgn_desc;
+
+	lpfc_format_edc_cgn_desc(phba, cgn_desc);
+
+	phba->cgn_sig_freq = lpfc_fabric_cgn_frequency;
+
+	lpfc_printf_vlog(vport, KERN_INFO, LOG_CGN_MGMT,
+			 "4623 Xmit EDC to remote "
+			 "NPORT x%x reg_sig x%x reg_fpin:x%x\n",
+			 ndlp->nlp_DID, phba->cgn_reg_signal,
+			 phba->cgn_reg_fpin);
+
+	elsiocb->iocb_cmpl = lpfc_cmpl_els_disc_cmd;
+	elsiocb->context1 = lpfc_nlp_get(ndlp);
+	if (!elsiocb->context1) {
+		lpfc_els_free_iocb(phba, elsiocb);
+		return -EIO;
+	}
+
+	lpfc_debugfs_disc_trc(vport, LPFC_DISC_TRC_ELS_CMD,
+			      "Issue EDC:     did:x%x refcnt %d",
+			      ndlp->nlp_DID, kref_read(&ndlp->kref), 0);
+	rc = lpfc_sli_issue_iocb(phba, LPFC_ELS_RING, elsiocb, 0);
+	if (rc == IOCB_ERROR) {
+		/* The additional lpfc_nlp_put will cause the following
+		 * lpfc_els_free_iocb routine to trigger the rlease of
+		 * the node.
+		 */
+		lpfc_els_free_iocb(phba, elsiocb);
+		lpfc_nlp_put(ndlp);
+		goto try_rdf;
+	}
+	return 0;
+try_rdf:
+	phba->cgn_reg_fpin = LPFC_CGN_FPIN_WARN | LPFC_CGN_FPIN_ALARM;
+	phba->cgn_reg_signal = EDC_CG_SIG_NOTSUPPORTED;
+	rc = lpfc_issue_els_rdf(vport, 0);
+	return rc;
+}
+
+/**
  * lpfc_cancel_retry_delay_tmo - Cancel the timer with delayed iocb-cmd retry
  * @vport: pointer to a host virtual N_Port data structure.
  * @nlp: pointer to a node-list data structure.
@@ -4378,7 +4830,7 @@ out_retry:
 			    (cmd == ELS_CMD_NVMEPRLI))
 				lpfc_nlp_set_state(vport, ndlp,
 					NLP_STE_PRLI_ISSUE);
-			else
+			else if (cmd != ELS_CMD_ADISC)
 				lpfc_nlp_set_state(vport, ndlp,
 					NLP_STE_NPR_NODE);
 			ndlp->nlp_last_elscmd = cmd;
@@ -4520,7 +4972,7 @@ lpfc_els_free_iocb(struct lpfc_hba *phba, struct lpfc_iocbq *elsiocb)
 {
 	struct lpfc_dmabuf *buf_ptr, *buf_ptr1;
 
-	/* The I/O job is complete.  Clear the context1 data. */
+	/* The I/O iocb is complete.  Clear the context1 data. */
 	elsiocb->context1 = NULL;
 
 	/* context2  = cmd,  context2->next = rsp, context3 = bpl */
@@ -4612,6 +5064,15 @@ lpfc_cmpl_els_logo_acc(struct lpfc_hba *phba, struct lpfc_iocbq *cmdiocb,
 		goto out;
 
 	if (ndlp->nlp_state == NLP_STE_NPR_NODE) {
+
+		/* If PLOGI is being retried, PLOGI completion will cleanup the
+		 * node. The NLP_NPR_2B_DISC flag needs to be retained to make
+		 * progress on nodes discovered from last RSCN.
+		 */
+		if ((ndlp->nlp_flag & NLP_DELAY_TMO) &&
+		    (ndlp->nlp_last_elscmd == ELS_CMD_PLOGI))
+			goto out;
+
 		/* NPort Recovery mode or node is just allocated */
 		if (!lpfc_nlp_not_used(ndlp)) {
 			/* A LOGO is completing and the node is in NPR state.
@@ -5158,6 +5619,86 @@ lpfc_els_rsp_reject(struct lpfc_vport *vport, uint32_t rejectError,
 	return 0;
 }
 
+ /**
+  * lpfc_issue_els_edc_rsp - Exchange Diagnostic Capabilities with the fabric.
+  * @vport: pointer to a host virtual N_Port data structure.
+  * @cmdiocb: pointer to the original lpfc command iocb data structure.
+  * @ndlp: NPort to where rsp is directed
+  *
+  * This routine issues an EDC ACC RSP to the F-Port Controller to communicate
+  * this N_Port's support of hardware signals in its Congestion
+  * Capabilities Descriptor.
+  *
+  * Return code
+  *   0 - Successfully issued edc rsp command
+  *   1 - Failed to issue edc rsp command
+  **/
+static int
+lpfc_issue_els_edc_rsp(struct lpfc_vport *vport, struct lpfc_iocbq *cmdiocb,
+		       struct lpfc_nodelist *ndlp)
+{
+	struct lpfc_hba  *phba = vport->phba;
+	struct lpfc_els_edc_rsp *edc_rsp;
+	struct lpfc_iocbq *elsiocb;
+	IOCB_t *icmd, *cmd;
+	uint8_t *pcmd;
+	int cmdsize, rc;
+
+	cmdsize = sizeof(struct lpfc_els_edc_rsp);
+	elsiocb = lpfc_prep_els_iocb(vport, 0, cmdsize, cmdiocb->retry,
+				     ndlp, ndlp->nlp_DID, ELS_CMD_ACC);
+	if (!elsiocb)
+		return 1;
+
+	icmd = &elsiocb->iocb;
+	cmd = &cmdiocb->iocb;
+	icmd->ulpContext = cmd->ulpContext;     /* Xri / rx_id */
+	icmd->unsli3.rcvsli3.ox_id = cmd->unsli3.rcvsli3.ox_id;
+	pcmd = (((struct lpfc_dmabuf *)elsiocb->context2)->virt);
+	memset(pcmd, 0, cmdsize);
+
+	edc_rsp = (struct lpfc_els_edc_rsp *)pcmd;
+	edc_rsp->edc_rsp.acc_hdr.la_cmd = ELS_LS_ACC;
+	edc_rsp->edc_rsp.desc_list_len = cpu_to_be32(
+		FC_TLV_DESC_LENGTH_FROM_SZ(struct lpfc_els_edc_rsp));
+	edc_rsp->edc_rsp.lsri.desc_tag = cpu_to_be32(ELS_DTAG_LS_REQ_INFO);
+	edc_rsp->edc_rsp.lsri.desc_len = cpu_to_be32(
+		FC_TLV_DESC_LENGTH_FROM_SZ(struct fc_els_lsri_desc));
+	edc_rsp->edc_rsp.lsri.rqst_w0.cmd = ELS_EDC;
+	lpfc_format_edc_cgn_desc(phba, &edc_rsp->cgn_desc);
+
+	lpfc_debugfs_disc_trc(vport, LPFC_DISC_TRC_ELS_RSP,
+			      "Issue EDC ACC:      did:x%x flg:x%x refcnt %d",
+			      ndlp->nlp_DID, ndlp->nlp_flag,
+			      kref_read(&ndlp->kref));
+	elsiocb->iocb_cmpl = lpfc_cmpl_els_rsp;
+
+	phba->fc_stat.elsXmitACC++;
+	elsiocb->context1 = lpfc_nlp_get(ndlp);
+	if (!elsiocb->context1) {
+		lpfc_els_free_iocb(phba, elsiocb);
+		return 1;
+	}
+
+	rc = lpfc_sli_issue_iocb(phba, LPFC_ELS_RING, elsiocb, 0);
+	if (rc == IOCB_ERROR) {
+		lpfc_els_free_iocb(phba, elsiocb);
+		lpfc_nlp_put(ndlp);
+		return 1;
+	}
+
+	/* Xmit ELS ACC response tag <ulpIoTag> */
+	lpfc_printf_vlog(vport, KERN_INFO, LOG_ELS,
+			 "0152 Xmit EDC ACC response Status: x%x, IoTag: x%x, "
+			 "XRI: x%x, DID: x%x, nlp_flag: x%x nlp_state: x%x "
+			 "RPI: x%x, fc_flag x%x\n",
+			 rc, elsiocb->iotag, elsiocb->sli4_xritag,
+			 ndlp->nlp_DID, ndlp->nlp_flag, ndlp->nlp_state,
+			 ndlp->nlp_rpi, vport->fc_flag);
+
+	return 0;
+}
+
 /**
  * lpfc_els_rsp_adisc_acc - Prepare and issue acc response to adisc iocb cmd
  * @vport: pointer to a virtual N_Port data structure.
@@ -5657,25 +6198,40 @@ lpfc_els_disc_adisc(struct lpfc_vport *vport)
 
 	/* go thru NPR nodes and issue any remaining ELS ADISCs */
 	list_for_each_entry_safe(ndlp, next_ndlp, &vport->fc_nodes, nlp_listp) {
-		if (ndlp->nlp_state == NLP_STE_NPR_NODE &&
-		    (ndlp->nlp_flag & NLP_NPR_2B_DISC) != 0 &&
-		    (ndlp->nlp_flag & NLP_NPR_ADISC) != 0) {
-			spin_lock_irq(&ndlp->lock);
-			ndlp->nlp_flag &= ~NLP_NPR_ADISC;
-			spin_unlock_irq(&ndlp->lock);
-			ndlp->nlp_prev_state = ndlp->nlp_state;
-			lpfc_nlp_set_state(vport, ndlp, NLP_STE_ADISC_ISSUE);
-			lpfc_issue_els_adisc(vport, ndlp, 0);
-			sentadisc++;
-			vport->num_disc_nodes++;
-			if (vport->num_disc_nodes >=
-			    vport->cfg_discovery_threads) {
-				spin_lock_irq(shost->host_lock);
-				vport->fc_flag |= FC_NLP_MORE;
-				spin_unlock_irq(shost->host_lock);
-				break;
-			}
+
+		if (ndlp->nlp_state != NLP_STE_NPR_NODE ||
+		    !(ndlp->nlp_flag & NLP_NPR_ADISC))
+			continue;
+
+		spin_lock_irq(&ndlp->lock);
+		ndlp->nlp_flag &= ~NLP_NPR_ADISC;
+		spin_unlock_irq(&ndlp->lock);
+
+		if (!(ndlp->nlp_flag & NLP_NPR_2B_DISC)) {
+			/* This node was marked for ADISC but was not picked
+			 * for discovery. This is possible if the node was
+			 * missing in gidft response.
+			 *
+			 * At time of marking node for ADISC, we skipped unreg
+			 * from backend
+			 */
+			lpfc_nlp_unreg_node(vport, ndlp);
+			continue;
+		}
+
+		ndlp->nlp_prev_state = ndlp->nlp_state;
+		lpfc_nlp_set_state(vport, ndlp, NLP_STE_ADISC_ISSUE);
+		lpfc_issue_els_adisc(vport, ndlp, 0);
+		sentadisc++;
+		vport->num_disc_nodes++;
+		if (vport->num_disc_nodes >=
+				vport->cfg_discovery_threads) {
+			spin_lock_irq(shost->host_lock);
+			vport->fc_flag |= FC_NLP_MORE;
+			spin_unlock_irq(shost->host_lock);
+			break;
 		}
+
 	}
 	if (sentadisc == 0) {
 		spin_lock_irq(shost->host_lock);
@@ -6087,6 +6643,12 @@ lpfc_rdp_res_speed(struct fc_rdp_port_speed_desc *desc, struct lpfc_hba *phba)
 	case LPFC_LINK_SPEED_64GHZ:
 		rdp_speed = RDP_PS_64GB;
 		break;
+	case LPFC_LINK_SPEED_128GHZ:
+		rdp_speed = RDP_PS_128GB;
+		break;
+	case LPFC_LINK_SPEED_256GHZ:
+		rdp_speed = RDP_PS_256GB;
+		break;
 	default:
 		rdp_speed = RDP_PS_UNKNOWN;
 		break;
@@ -6094,6 +6656,8 @@ lpfc_rdp_res_speed(struct fc_rdp_port_speed_desc *desc, struct lpfc_hba *phba)
 
 	desc->info.port_speed.speed = cpu_to_be16(rdp_speed);
 
+	if (phba->lmt & LMT_256Gb)
+		rdp_cap |= RDP_PS_256GB;
 	if (phba->lmt & LMT_128Gb)
 		rdp_cap |= RDP_PS_128GB;
 	if (phba->lmt & LMT_64Gb)
@@ -6886,13 +7450,6 @@ lpfc_rscn_recovery_check(struct lpfc_vport *vport)
 			continue;
 		}
 
-		/* Check to see if we need to NVME rescan this target
-		 * remoteport.
-		 */
-		if (ndlp->nlp_fc4_type & NLP_FC4_NVME &&
-		    ndlp->nlp_type & (NLP_NVME_TARGET | NLP_NVME_DISCOVERY))
-			lpfc_nvme_rescan_port(vport, ndlp);
-
 		lpfc_disc_state_machine(vport, ndlp, NULL,
 					NLP_EVT_DEVICE_RECOVERY);
 		lpfc_cancel_retry_delay_tmo(vport, ndlp);
@@ -8212,6 +8769,125 @@ lpfc_els_rcv_fan(struct lpfc_vport *vport, struct lpfc_iocbq *cmdiocb,
 }
 
 /**
+ * lpfc_els_rcv_edc - Process an unsolicited EDC iocb
+ * @vport: pointer to a host virtual N_Port data structure.
+ * @cmdiocb: pointer to lpfc command iocb data structure.
+ * @ndlp: pointer to a node-list data structure.
+ *
+ * Return code
+ *   0 - Successfully processed echo iocb (currently always return 0)
+ **/
+static int
+lpfc_els_rcv_edc(struct lpfc_vport *vport, struct lpfc_iocbq *cmdiocb,
+		 struct lpfc_nodelist *ndlp)
+{
+	struct lpfc_hba  *phba = vport->phba;
+	struct fc_els_edc *edc_req;
+	struct fc_tlv_desc *tlv;
+	uint8_t *payload;
+	uint32_t *ptr, dtag;
+	const char *dtag_nm;
+	int desc_cnt = 0, bytes_remain;
+	bool rcv_cap_desc = false;
+
+	payload = ((struct lpfc_dmabuf *)cmdiocb->context2)->virt;
+
+	edc_req = (struct fc_els_edc *)payload;
+	bytes_remain = be32_to_cpu(edc_req->desc_len);
+
+	ptr = (uint32_t *)payload;
+	lpfc_printf_vlog(vport, KERN_INFO, LOG_ELS | LOG_CGN_MGMT,
+			 "3319 Rcv EDC payload len %d: x%x x%x x%x\n",
+			 bytes_remain, be32_to_cpu(*ptr),
+			 be32_to_cpu(*(ptr + 1)), be32_to_cpu(*(ptr + 2)));
+
+	/* No signal support unless there is a congestion descriptor */
+	phba->cgn_reg_signal = EDC_CG_SIG_NOTSUPPORTED;
+	phba->cgn_sig_freq = 0;
+	phba->cgn_reg_fpin = LPFC_CGN_FPIN_ALARM | LPFC_CGN_FPIN_WARN;
+
+	if (bytes_remain <= 0)
+		goto out;
+
+	tlv = edc_req->desc;
+
+	/*
+	 * cycle through EDC diagnostic descriptors to find the
+	 * congestion signaling capability descriptor
+	 */
+	while (bytes_remain && !rcv_cap_desc) {
+		if (bytes_remain < FC_TLV_DESC_HDR_SZ) {
+			lpfc_printf_log(phba, KERN_WARNING, LOG_CGN_MGMT,
+					"6464 Truncated TLV hdr on "
+					"Diagnostic descriptor[%d]\n",
+					desc_cnt);
+			goto out;
+		}
+
+		dtag = be32_to_cpu(tlv->desc_tag);
+		switch (dtag) {
+		case ELS_DTAG_LNK_FAULT_CAP:
+			if (bytes_remain < FC_TLV_DESC_SZ_FROM_LENGTH(tlv) ||
+			    FC_TLV_DESC_SZ_FROM_LENGTH(tlv) !=
+				sizeof(struct fc_diag_lnkflt_desc)) {
+				lpfc_printf_log(
+					phba, KERN_WARNING, LOG_CGN_MGMT,
+					"6465 Truncated Link Fault Diagnostic "
+					"descriptor[%d]: %d vs 0x%zx 0x%zx\n",
+					desc_cnt, bytes_remain,
+					FC_TLV_DESC_SZ_FROM_LENGTH(tlv),
+					sizeof(struct fc_diag_cg_sig_desc));
+				goto out;
+			}
+			/* No action for Link Fault descriptor for now */
+			break;
+		case ELS_DTAG_CG_SIGNAL_CAP:
+			if (bytes_remain < FC_TLV_DESC_SZ_FROM_LENGTH(tlv) ||
+			    FC_TLV_DESC_SZ_FROM_LENGTH(tlv) !=
+				sizeof(struct fc_diag_cg_sig_desc)) {
+				lpfc_printf_log(
+					phba, KERN_WARNING, LOG_CGN_MGMT,
+					"6466 Truncated cgn signal Diagnostic "
+					"descriptor[%d]: %d vs 0x%zx 0x%zx\n",
+					desc_cnt, bytes_remain,
+					FC_TLV_DESC_SZ_FROM_LENGTH(tlv),
+					sizeof(struct fc_diag_cg_sig_desc));
+				goto out;
+			}
+
+			phba->cgn_reg_fpin = phba->cgn_init_reg_fpin;
+			phba->cgn_reg_signal = phba->cgn_init_reg_signal;
+
+			/* We start negotiation with lpfc_fabric_cgn_frequency.
+			 * When we process the EDC, we will settle on the
+			 * higher frequency.
+			 */
+			phba->cgn_sig_freq = lpfc_fabric_cgn_frequency;
+
+			lpfc_least_capable_settings(
+				phba, (struct fc_diag_cg_sig_desc *)tlv);
+			rcv_cap_desc = true;
+			break;
+		default:
+			dtag_nm = lpfc_get_tlv_dtag_nm(dtag);
+			lpfc_printf_log(phba, KERN_WARNING, LOG_CGN_MGMT,
+					"6467 unknown Diagnostic "
+					"Descriptor[%d]: tag x%x (%s)\n",
+					desc_cnt, dtag, dtag_nm);
+		}
+		bytes_remain -= FC_TLV_DESC_SZ_FROM_LENGTH(tlv);
+		tlv = fc_tlv_next_desc(tlv);
+		desc_cnt++;
+	}
+out:
+	/* Need to send back an ACC */
+	lpfc_issue_els_edc_rsp(vport, cmdiocb, ndlp);
+
+	lpfc_config_cgn_signal(phba);
+	return 0;
+}
+
+/**
  * lpfc_els_timeout - Handler funciton to the els timer
  * @t: timer context used to obtain the vport.
  *
@@ -8668,50 +9344,304 @@ lpfc_send_els_event(struct lpfc_vport *vport,
 }
 
 
-DECLARE_ENUM2STR_LOOKUP(lpfc_get_tlv_dtag_nm, fc_ls_tlv_dtag,
-			FC_LS_TLV_DTAG_INIT);
-
 DECLARE_ENUM2STR_LOOKUP(lpfc_get_fpin_li_event_nm, fc_fpin_li_event_types,
 			FC_FPIN_LI_EVT_TYPES_INIT);
 
+DECLARE_ENUM2STR_LOOKUP(lpfc_get_fpin_deli_event_nm, fc_fpin_deli_event_types,
+			FC_FPIN_DELI_EVT_TYPES_INIT);
+
+DECLARE_ENUM2STR_LOOKUP(lpfc_get_fpin_congn_event_nm, fc_fpin_congn_event_types,
+			FC_FPIN_CONGN_EVT_TYPES_INIT);
+
+DECLARE_ENUM2STR_LOOKUP(lpfc_get_fpin_congn_severity_nm,
+			fc_fpin_congn_severity_types,
+			FC_FPIN_CONGN_SEVERITY_INIT);
+
+
+/**
+ * lpfc_display_fpin_wwpn - Display WWPNs accessible by the attached port
+ * @phba: Pointer to phba object.
+ * @wwnlist: Pointer to list of WWPNs in FPIN payload
+ * @cnt: count of WWPNs in FPIN payload
+ *
+ * This routine is called by LI and PC descriptors.
+ * Limit the number of WWPNs displayed to 6 log messages, 6 per log message
+ */
+static void
+lpfc_display_fpin_wwpn(struct lpfc_hba *phba, __be64 *wwnlist, u32 cnt)
+{
+	char buf[LPFC_FPIN_WWPN_LINE_SZ];
+	__be64 wwn;
+	u64 wwpn;
+	int i, len;
+	int line = 0;
+	int wcnt = 0;
+	bool endit = false;
+
+	len = scnprintf(buf, LPFC_FPIN_WWPN_LINE_SZ, "Accessible WWPNs:");
+	for (i = 0; i < cnt; i++) {
+		/* Are we on the last WWPN */
+		if (i == (cnt - 1))
+			endit = true;
+
+		/* Extract the next WWPN from the payload */
+		wwn = *wwnlist++;
+		wwpn = be64_to_cpu(wwn);
+		len += scnprintf(buf + len, LPFC_FPIN_WWPN_LINE_SZ,
+				 " %016llx", wwpn);
+
+		/* Log a message if we are on the last WWPN
+		 * or if we hit the max allowed per message.
+		 */
+		wcnt++;
+		if (wcnt == LPFC_FPIN_WWPN_LINE_CNT || endit) {
+			buf[len] = 0;
+			lpfc_printf_log(phba, KERN_INFO, LOG_ELS,
+					"4686 %s\n", buf);
+
+			/* Check if we reached the last WWPN */
+			if (endit)
+				return;
+
+			/* Limit the number of log message displayed per FPIN */
+			line++;
+			if (line == LPFC_FPIN_WWPN_NUM_LINE) {
+				lpfc_printf_log(phba, KERN_INFO, LOG_ELS,
+						"4687 %d WWPNs Truncated\n",
+						cnt - i - 1);
+				return;
+			}
+
+			/* Start over with next log message */
+			wcnt = 0;
+			len = scnprintf(buf, LPFC_FPIN_WWPN_LINE_SZ,
+					"Additional WWPNs:");
+		}
+	}
+}
+
 /**
  * lpfc_els_rcv_fpin_li - Process an FPIN Link Integrity Event.
- * @vport: Pointer to vport object.
+ * @phba: Pointer to phba object.
  * @tlv:  Pointer to the Link Integrity Notification Descriptor.
  *
- * This function processes a link integrity FPIN event by
- * logging a message
+ * This function processes a Link Integrity FPIN event by logging a message.
  **/
 static void
-lpfc_els_rcv_fpin_li(struct lpfc_vport *vport, struct fc_tlv_desc *tlv)
+lpfc_els_rcv_fpin_li(struct lpfc_hba *phba, struct fc_tlv_desc *tlv)
 {
 	struct fc_fn_li_desc *li = (struct fc_fn_li_desc *)tlv;
 	const char *li_evt_str;
-	u32 li_evt;
+	u32 li_evt, cnt;
 
 	li_evt = be16_to_cpu(li->event_type);
 	li_evt_str = lpfc_get_fpin_li_event_nm(li_evt);
+	cnt = be32_to_cpu(li->pname_count);
 
-	lpfc_printf_vlog(vport, KERN_INFO, LOG_ELS,
-			 "4680 FPIN Link Integrity %s (x%x) "
-			 "Detecting PN x%016llx Attached PN x%016llx "
-			 "Duration %d mSecs Count %d Port Cnt %d\n",
-			 li_evt_str, li_evt,
-			 be64_to_cpu(li->detecting_wwpn),
-			 be64_to_cpu(li->attached_wwpn),
-			 be32_to_cpu(li->event_threshold),
-			 be32_to_cpu(li->event_count),
-			 be32_to_cpu(li->pname_count));
+	lpfc_printf_log(phba, KERN_INFO, LOG_ELS,
+			"4680 FPIN Link Integrity %s (x%x) "
+			"Detecting PN x%016llx Attached PN x%016llx "
+			"Duration %d mSecs Count %d Port Cnt %d\n",
+			li_evt_str, li_evt,
+			be64_to_cpu(li->detecting_wwpn),
+			be64_to_cpu(li->attached_wwpn),
+			be32_to_cpu(li->event_threshold),
+			be32_to_cpu(li->event_count), cnt);
+
+	lpfc_display_fpin_wwpn(phba, (__be64 *)&li->pname_list, cnt);
+}
+
+/**
+ * lpfc_els_rcv_fpin_del - Process an FPIN Delivery Event.
+ * @phba: Pointer to hba object.
+ * @tlv:  Pointer to the Delivery Notification Descriptor TLV
+ *
+ * This function processes a Delivery FPIN event by logging a message.
+ **/
+static void
+lpfc_els_rcv_fpin_del(struct lpfc_hba *phba, struct fc_tlv_desc *tlv)
+{
+	struct fc_fn_deli_desc *del = (struct fc_fn_deli_desc *)tlv;
+	const char *del_rsn_str;
+	u32 del_rsn;
+	__be32 *frame;
+
+	del_rsn = be16_to_cpu(del->deli_reason_code);
+	del_rsn_str = lpfc_get_fpin_deli_event_nm(del_rsn);
+
+	/* Skip over desc_tag/desc_len header to payload */
+	frame = (__be32 *)(del + 1);
+
+	lpfc_printf_log(phba, KERN_INFO, LOG_ELS,
+			"4681 FPIN Delivery %s (x%x) "
+			"Detecting PN x%016llx Attached PN x%016llx "
+			"DiscHdr0  x%08x "
+			"DiscHdr1 x%08x DiscHdr2 x%08x DiscHdr3 x%08x "
+			"DiscHdr4 x%08x DiscHdr5 x%08x\n",
+			del_rsn_str, del_rsn,
+			be64_to_cpu(del->detecting_wwpn),
+			be64_to_cpu(del->attached_wwpn),
+			be32_to_cpu(frame[0]),
+			be32_to_cpu(frame[1]),
+			be32_to_cpu(frame[2]),
+			be32_to_cpu(frame[3]),
+			be32_to_cpu(frame[4]),
+			be32_to_cpu(frame[5]));
 }
 
+/**
+ * lpfc_els_rcv_fpin_peer_cgn - Process a FPIN Peer Congestion Event.
+ * @phba: Pointer to hba object.
+ * @tlv:  Pointer to the Peer Congestion Notification Descriptor TLV
+ *
+ * This function processes a Peer Congestion FPIN event by logging a message.
+ **/
 static void
-lpfc_els_rcv_fpin(struct lpfc_vport *vport, struct fc_els_fpin *fpin,
-		  u32 fpin_length)
+lpfc_els_rcv_fpin_peer_cgn(struct lpfc_hba *phba, struct fc_tlv_desc *tlv)
 {
-	struct fc_tlv_desc *tlv;
+	struct fc_fn_peer_congn_desc *pc = (struct fc_fn_peer_congn_desc *)tlv;
+	const char *pc_evt_str;
+	u32 pc_evt, cnt;
+
+	pc_evt = be16_to_cpu(pc->event_type);
+	pc_evt_str = lpfc_get_fpin_congn_event_nm(pc_evt);
+	cnt = be32_to_cpu(pc->pname_count);
+
+	lpfc_printf_log(phba, KERN_INFO, LOG_CGN_MGMT | LOG_ELS,
+			"4684 FPIN Peer Congestion %s (x%x) "
+			"Duration %d mSecs "
+			"Detecting PN x%016llx Attached PN x%016llx "
+			"Impacted Port Cnt %d\n",
+			pc_evt_str, pc_evt,
+			be32_to_cpu(pc->event_period),
+			be64_to_cpu(pc->detecting_wwpn),
+			be64_to_cpu(pc->attached_wwpn),
+			cnt);
+
+	lpfc_display_fpin_wwpn(phba, (__be64 *)&pc->pname_list, cnt);
+}
+
+/**
+ * lpfc_els_rcv_fpin_cgn - Process an FPIN Congestion notification
+ * @phba: Pointer to hba object.
+ * @tlv:  Pointer to the Congestion Notification Descriptor TLV
+ *
+ * This function processes an FPIN Congestion Notifiction.  The notification
+ * could be an Alarm or Warning.  This routine feeds that data into driver's
+ * running congestion algorithm. It also processes the FPIN by
+ * logging a message. It returns 1 to indicate deliver this message
+ * to the upper layer or 0 to indicate don't deliver it.
+ **/
+static int
+lpfc_els_rcv_fpin_cgn(struct lpfc_hba *phba, struct fc_tlv_desc *tlv)
+{
+	struct lpfc_cgn_info *cp;
+	struct fc_fn_congn_desc *cgn = (struct fc_fn_congn_desc *)tlv;
+	const char *cgn_evt_str;
+	u32 cgn_evt;
+	const char *cgn_sev_str;
+	u32 cgn_sev;
+	uint16_t value;
+	u32 crc;
+	bool nm_log = false;
+	int rc = 1;
+
+	cgn_evt = be16_to_cpu(cgn->event_type);
+	cgn_evt_str = lpfc_get_fpin_congn_event_nm(cgn_evt);
+	cgn_sev = cgn->severity;
+	cgn_sev_str = lpfc_get_fpin_congn_severity_nm(cgn_sev);
+
+	/* The driver only takes action on a Credit Stall or Oversubscription
+	 * event type to engage the IO algorithm.  The driver prints an
+	 * unmaskable message only for Lost Credit and Credit Stall.
+	 * TODO: Still need to have definition of host action on clear,
+	 *       lost credit and device specific event types.
+	 */
+	switch (cgn_evt) {
+	case FPIN_CONGN_LOST_CREDIT:
+		nm_log = true;
+		break;
+	case FPIN_CONGN_CREDIT_STALL:
+		nm_log = true;
+		fallthrough;
+	case FPIN_CONGN_OVERSUBSCRIPTION:
+		if (cgn_evt == FPIN_CONGN_OVERSUBSCRIPTION)
+			nm_log = false;
+		switch (cgn_sev) {
+		case FPIN_CONGN_SEVERITY_ERROR:
+			/* Take action here for an Alarm event */
+			if (phba->cmf_active_mode != LPFC_CFG_OFF) {
+				if (phba->cgn_reg_fpin & LPFC_CGN_FPIN_ALARM) {
+					/* Track of alarm cnt for cgn_info */
+					atomic_inc(&phba->cgn_fabric_alarm_cnt);
+					/* Track of alarm cnt for SYNC_WQE */
+					atomic_inc(&phba->cgn_sync_alarm_cnt);
+				}
+				goto cleanup;
+			}
+			break;
+		case FPIN_CONGN_SEVERITY_WARNING:
+			/* Take action here for a Warning event */
+			if (phba->cmf_active_mode != LPFC_CFG_OFF) {
+				if (phba->cgn_reg_fpin & LPFC_CGN_FPIN_WARN) {
+					/* Track of warning cnt for cgn_info */
+					atomic_inc(&phba->cgn_fabric_warn_cnt);
+					/* Track of warning cnt for SYNC_WQE */
+					atomic_inc(&phba->cgn_sync_warn_cnt);
+				}
+cleanup:
+				/* Save frequency in ms */
+				phba->cgn_fpin_frequency =
+					be32_to_cpu(cgn->event_period);
+				value = phba->cgn_fpin_frequency;
+				if (phba->cgn_i) {
+					cp = (struct lpfc_cgn_info *)
+						phba->cgn_i->virt;
+					if (phba->cgn_reg_fpin &
+						LPFC_CGN_FPIN_ALARM)
+						cp->cgn_alarm_freq =
+							cpu_to_le16(value);
+					if (phba->cgn_reg_fpin &
+						LPFC_CGN_FPIN_WARN)
+						cp->cgn_warn_freq =
+							cpu_to_le16(value);
+					crc = lpfc_cgn_calc_crc32
+						(cp,
+						LPFC_CGN_INFO_SZ,
+						LPFC_CGN_CRC32_SEED);
+					cp->cgn_info_crc = cpu_to_le32(crc);
+				}
+
+				/* Don't deliver to upper layer since
+				 * driver took action on this tlv.
+				 */
+				rc = 0;
+			}
+			break;
+		}
+		break;
+	}
+
+	/* Change the log level to unmaskable for the following event types. */
+	lpfc_printf_log(phba, (nm_log ? KERN_WARNING : KERN_INFO),
+			LOG_CGN_MGMT | LOG_ELS,
+			"4683 FPIN CONGESTION %s type %s (x%x) Event "
+			"Duration %d mSecs\n",
+			cgn_sev_str, cgn_evt_str, cgn_evt,
+			be32_to_cpu(cgn->event_period));
+	return rc;
+}
+
+void
+lpfc_els_rcv_fpin(struct lpfc_vport *vport, void *p, u32 fpin_length)
+{
+	struct lpfc_hba *phba = vport->phba;
+	struct fc_els_fpin *fpin = (struct fc_els_fpin *)p;
+	struct fc_tlv_desc *tlv, *first_tlv, *current_tlv;
 	const char *dtag_nm;
-	uint32_t desc_cnt = 0, bytes_remain;
-	u32 dtag;
+	int desc_cnt = 0, bytes_remain, cnt;
+	u32 dtag, deliver = 0;
+	int len;
 
 	/* FPINs handled only if we are in the right discovery state */
 	if (vport->port_state < LPFC_DISC_AUTH)
@@ -8721,35 +9651,92 @@ lpfc_els_rcv_fpin(struct lpfc_vport *vport, struct fc_els_fpin *fpin,
 	if (fpin_length < sizeof(struct fc_els_fpin))
 		return;
 
+	/* Sanity check descriptor length. The desc_len value does not
+	 * include space for the ELS command and the desc_len fields.
+	 */
+	len = be32_to_cpu(fpin->desc_len);
+	if (fpin_length < len + sizeof(struct fc_els_fpin)) {
+		lpfc_printf_log(phba, KERN_WARNING, LOG_CGN_MGMT,
+				"4671 Bad ELS FPIN length %d: %d\n",
+				len, fpin_length);
+		return;
+	}
+
 	tlv = (struct fc_tlv_desc *)&fpin->fpin_desc[0];
+	first_tlv = tlv;
 	bytes_remain = fpin_length - offsetof(struct fc_els_fpin, fpin_desc);
 	bytes_remain = min_t(u32, bytes_remain, be32_to_cpu(fpin->desc_len));
 
-	/* process each descriptor */
+	/* process each descriptor separately */
 	while (bytes_remain >= FC_TLV_DESC_HDR_SZ &&
 	       bytes_remain >= FC_TLV_DESC_SZ_FROM_LENGTH(tlv)) {
-
 		dtag = be32_to_cpu(tlv->desc_tag);
 		switch (dtag) {
 		case ELS_DTAG_LNK_INTEGRITY:
-			lpfc_els_rcv_fpin_li(vport, tlv);
+			lpfc_els_rcv_fpin_li(phba, tlv);
+			deliver = 1;
+			break;
+		case ELS_DTAG_DELIVERY:
+			lpfc_els_rcv_fpin_del(phba, tlv);
+			deliver = 1;
+			break;
+		case ELS_DTAG_PEER_CONGEST:
+			lpfc_els_rcv_fpin_peer_cgn(phba, tlv);
+			deliver = 1;
+			break;
+		case ELS_DTAG_CONGESTION:
+			deliver = lpfc_els_rcv_fpin_cgn(phba, tlv);
 			break;
 		default:
 			dtag_nm = lpfc_get_tlv_dtag_nm(dtag);
-			lpfc_printf_vlog(vport, KERN_ERR, LOG_TRACE_EVENT,
-					 "4678  skipped FPIN descriptor[%d]: "
-					 "tag x%x (%s)\n",
-					 desc_cnt, dtag, dtag_nm);
-			break;
+			lpfc_printf_log(phba, KERN_WARNING, LOG_CGN_MGMT,
+					"4678 unknown FPIN descriptor[%d]: "
+					"tag x%x (%s)\n",
+					desc_cnt, dtag, dtag_nm);
+
+			/* If descriptor is bad, drop the rest of the data */
+			return;
 		}
+		lpfc_cgn_update_stat(phba, dtag);
+		cnt = be32_to_cpu(tlv->desc_len);
 
-		desc_cnt++;
+		/* Sanity check descriptor length. The desc_len value does not
+		 * include space for the desc_tag and the desc_len fields.
+		 */
+		len -= (cnt + sizeof(struct fc_tlv_desc));
+		if (len < 0) {
+			dtag_nm = lpfc_get_tlv_dtag_nm(dtag);
+			lpfc_printf_log(phba, KERN_WARNING, LOG_CGN_MGMT,
+					"4672 Bad FPIN descriptor TLV length "
+					"%d: %d %d %s\n",
+					cnt, len, fpin_length, dtag_nm);
+			return;
+		}
+
+		current_tlv = tlv;
 		bytes_remain -= FC_TLV_DESC_SZ_FROM_LENGTH(tlv);
 		tlv = fc_tlv_next_desc(tlv);
-	}
 
-	fc_host_fpin_rcv(lpfc_shost_from_vport(vport), fpin_length,
-			 (char *)fpin);
+		/* Format payload such that the FPIN delivered to the
+		 * upper layer is a single descriptor FPIN.
+		 */
+		if (desc_cnt)
+			memcpy(first_tlv, current_tlv,
+			       (cnt + sizeof(struct fc_els_fpin)));
+
+		/* Adjust the length so that it only reflects a
+		 * single descriptor FPIN.
+		 */
+		fpin_length = cnt + sizeof(struct fc_els_fpin);
+		fpin->desc_len = cpu_to_be32(fpin_length);
+		fpin_length += sizeof(struct fc_els_fpin); /* the entire FPIN */
+
+		/* Send every descriptor individually to the upper layer */
+		if (deliver)
+			fc_host_fpin_rcv(lpfc_shost_from_vport(vport),
+					 fpin_length, (char *)fpin);
+		desc_cnt++;
+	}
 }
 
 /**
@@ -8948,6 +9935,9 @@ lpfc_els_unsol_buffer(struct lpfc_hba *phba, struct lpfc_sli_ring *pring,
 			break;
 		}
 		lpfc_disc_state_machine(vport, ndlp, elsiocb, NLP_EVT_RCV_LOGO);
+		if (newnode)
+			lpfc_disc_state_machine(vport, ndlp, NULL,
+					NLP_EVT_DEVICE_RM);
 		break;
 	case ELS_CMD_PRLO:
 		lpfc_debugfs_disc_trc(vport, LPFC_DISC_TRC_ELS_UNSOL,
@@ -9137,6 +10127,9 @@ lpfc_els_unsol_buffer(struct lpfc_hba *phba, struct lpfc_sli_ring *pring,
 
 		/* There are no replies, so no rjt codes */
 		break;
+	case ELS_CMD_EDC:
+		lpfc_els_rcv_edc(vport, elsiocb, ndlp);
+		break;
 	case ELS_CMD_RDF:
 		phba->fc_stat.elsRcvRDF++;
 		/* Accept RDF only from fabric controller */
diff --git a/drivers/scsi/lpfc/lpfc_hbadisc.c b/drivers/scsi/lpfc/lpfc_hbadisc.c
index 7cc5920979f8..7195ca0275f9 100644
--- a/drivers/scsi/lpfc/lpfc_hbadisc.c
+++ b/drivers/scsi/lpfc/lpfc_hbadisc.c
@@ -3331,6 +3331,7 @@ lpfc_mbx_process_link_up(struct lpfc_hba *phba, struct lpfc_mbx_read_top *la)
 		case LPFC_LINK_SPEED_32GHZ:
 		case LPFC_LINK_SPEED_64GHZ:
 		case LPFC_LINK_SPEED_128GHZ:
+		case LPFC_LINK_SPEED_256GHZ:
 			break;
 		default:
 			phba->fc_linkspeed = LPFC_LINK_SPEED_UNKNOWN;
@@ -3646,6 +3647,10 @@ lpfc_mbx_cmpl_read_topology(struct lpfc_hba *phba, LPFC_MBOXQ_t *pmb)
 					phba->wait_4_mlo_maint_flg);
 		}
 		lpfc_mbx_process_link_up(phba, la);
+
+		if (phba->cmf_active_mode != LPFC_CFG_OFF)
+			lpfc_cmf_signal_init(phba);
+
 	} else if (attn_type == LPFC_ATT_LINK_DOWN ||
 		   attn_type == LPFC_ATT_UNEXP_WWPN) {
 		phba->fc_stat.LinkDown++;
@@ -4208,6 +4213,7 @@ lpfc_mbx_cmpl_ns_reg_login(struct lpfc_hba *phba, LPFC_MBOXQ_t *pmb)
 	struct lpfc_dmabuf *mp = (struct lpfc_dmabuf *)(pmb->ctx_buf);
 	struct lpfc_nodelist *ndlp = (struct lpfc_nodelist *)pmb->ctx_ndlp;
 	struct lpfc_vport *vport = pmb->vport;
+	int rc;
 
 	pmb->ctx_buf = NULL;
 	pmb->ctx_ndlp = NULL;
@@ -4283,9 +4289,23 @@ out:
 		/* Issue SCR just before NameServer GID_FT Query */
 		lpfc_issue_els_scr(vport, 0);
 
-		if (!phba->cfg_enable_mi ||
-		    phba->sli4_hba.pc_sli4_params.mi_ver < LPFC_MIB3_SUPPORT)
+		/* Link was bounced or a Fabric LOGO occurred.  Start EDC
+		 * with initial FW values provided the congestion mode is
+		 * not off.  Note that signals may or may not be supported
+		 * by the adapter but FPIN is provided by default for 1
+		 * or both missing signals support.
+		 */
+		if (phba->cmf_active_mode != LPFC_CFG_OFF) {
+			phba->cgn_reg_fpin = phba->cgn_init_reg_fpin;
+			phba->cgn_reg_signal = phba->cgn_init_reg_signal;
+			rc = lpfc_issue_els_edc(vport, 0);
+			lpfc_printf_log(phba, KERN_INFO,
+					LOG_INIT | LOG_ELS | LOG_DISCOVERY,
+					"4220 EDC issue error x%x, Data: x%x\n",
+					rc, phba->cgn_init_reg_signal);
+		} else {
 			lpfc_issue_els_rdf(vport, 0);
+		}
 	}
 
 	vport->fc_ns_retry = 0;
@@ -4501,10 +4521,152 @@ lpfc_nlp_counters(struct lpfc_vport *vport, int state, int count)
 	spin_unlock_irqrestore(shost->host_lock, iflags);
 }
 
+/* Register a node with backend if not already done */
+void
+lpfc_nlp_reg_node(struct lpfc_vport *vport, struct lpfc_nodelist *ndlp)
+{
+
+	unsigned long iflags;
+
+	spin_lock_irqsave(&ndlp->lock, iflags);
+	if (ndlp->fc4_xpt_flags & NLP_XPT_REGD) {
+		/* Already registered with backend, trigger rescan */
+		spin_unlock_irqrestore(&ndlp->lock, iflags);
+
+		if (ndlp->fc4_xpt_flags & NVME_XPT_REGD &&
+		    ndlp->nlp_type & (NLP_NVME_TARGET | NLP_NVME_DISCOVERY)) {
+			lpfc_nvme_rescan_port(vport, ndlp);
+		}
+		return;
+	}
+
+	ndlp->fc4_xpt_flags |= NLP_XPT_REGD;
+	spin_unlock_irqrestore(&ndlp->lock, iflags);
+
+	if (lpfc_valid_xpt_node(ndlp)) {
+		vport->phba->nport_event_cnt++;
+		/*
+		 * Tell the fc transport about the port, if we haven't
+		 * already. If we have, and it's a scsi entity, be
+		 */
+		lpfc_register_remote_port(vport, ndlp);
+	}
+
+	/* We are done if we do not have any NVME remote node */
+	if (!(ndlp->nlp_fc4_type & NLP_FC4_NVME))
+		return;
+
+	/* Notify the NVME transport of this new rport. */
+	if (vport->phba->sli_rev >= LPFC_SLI_REV4 &&
+			ndlp->nlp_fc4_type & NLP_FC4_NVME) {
+		if (vport->phba->nvmet_support == 0) {
+			/* Register this rport with the transport.
+			 * Only NVME Target Rports are registered with
+			 * the transport.
+			 */
+			if (ndlp->nlp_type & NLP_NVME_TARGET) {
+				vport->phba->nport_event_cnt++;
+				lpfc_nvme_register_port(vport, ndlp);
+			}
+		} else {
+			/* Just take an NDLP ref count since the
+			 * target does not register rports.
+			 */
+			lpfc_nlp_get(ndlp);
+		}
+	}
+}
+
+/* Unregister a node with backend if not already done */
+void
+lpfc_nlp_unreg_node(struct lpfc_vport *vport, struct lpfc_nodelist *ndlp)
+{
+	unsigned long iflags;
+
+	spin_lock_irqsave(&ndlp->lock, iflags);
+	if (!(ndlp->fc4_xpt_flags & NLP_XPT_REGD)) {
+		spin_unlock_irqrestore(&ndlp->lock, iflags);
+		return;
+	}
+
+	ndlp->fc4_xpt_flags &= ~NLP_XPT_REGD;
+	spin_unlock_irqrestore(&ndlp->lock, iflags);
+
+	if (ndlp->rport &&
+	    ndlp->fc4_xpt_flags & SCSI_XPT_REGD) {
+		vport->phba->nport_event_cnt++;
+		lpfc_unregister_remote_port(ndlp);
+	}
+
+	if (ndlp->fc4_xpt_flags & NVME_XPT_REGD) {
+		vport->phba->nport_event_cnt++;
+		if (vport->phba->nvmet_support == 0) {
+			/* Start devloss if target. */
+			if (ndlp->nlp_type & NLP_NVME_TARGET)
+				lpfc_nvme_unregister_port(vport, ndlp);
+		} else {
+			/* NVMET has no upcall. */
+			lpfc_nlp_put(ndlp);
+		}
+	}
+
+}
+
+/*
+ * Adisc state change handling
+ */
+static void
+lpfc_handle_adisc_state(struct lpfc_vport *vport, struct lpfc_nodelist *ndlp,
+		int new_state)
+{
+	switch (new_state) {
+	/*
+	 * Any state to ADISC_ISSUE
+	 * Do nothing, adisc cmpl handling will trigger state changes
+	 */
+	case NLP_STE_ADISC_ISSUE:
+		break;
+
+	/*
+	 * ADISC_ISSUE to mapped states
+	 * Trigger a registration with backend, it will be nop if
+	 * already registered
+	 */
+	case NLP_STE_UNMAPPED_NODE:
+		ndlp->nlp_type |= NLP_FC_NODE;
+		fallthrough;
+	case NLP_STE_MAPPED_NODE:
+		ndlp->nlp_flag &= ~NLP_NODEV_REMOVE;
+		lpfc_nlp_reg_node(vport, ndlp);
+		break;
+
+	/*
+	 * ADISC_ISSUE to non-mapped states
+	 * We are moving from ADISC_ISSUE to a non-mapped state because
+	 * ADISC failed, we would have skipped unregistering with
+	 * backend, attempt it now
+	 */
+	case NLP_STE_NPR_NODE:
+		ndlp->nlp_flag &= ~NLP_RCV_PLOGI;
+		fallthrough;
+	default:
+		lpfc_nlp_unreg_node(vport, ndlp);
+		break;
+	}
+
+}
+
 static void
 lpfc_nlp_state_cleanup(struct lpfc_vport *vport, struct lpfc_nodelist *ndlp,
 		       int old_state, int new_state)
 {
+	/* Trap ADISC changes here */
+	if (new_state == NLP_STE_ADISC_ISSUE ||
+	    old_state == NLP_STE_ADISC_ISSUE) {
+		lpfc_handle_adisc_state(vport, ndlp, new_state);
+		return;
+	}
+
 	if (new_state == NLP_STE_UNMAPPED_NODE) {
 		ndlp->nlp_flag &= ~NLP_NODEV_REMOVE;
 		ndlp->nlp_type |= NLP_FC_NODE;
@@ -4514,60 +4676,17 @@ lpfc_nlp_state_cleanup(struct lpfc_vport *vport, struct lpfc_nodelist *ndlp,
 	if (new_state == NLP_STE_NPR_NODE)
 		ndlp->nlp_flag &= ~NLP_RCV_PLOGI;
 
-	/* FCP and NVME Transport interface */
+	/* Reg/Unreg for FCP and NVME Transport interface */
 	if ((old_state == NLP_STE_MAPPED_NODE ||
 	     old_state == NLP_STE_UNMAPPED_NODE)) {
-		if (ndlp->rport &&
-		    lpfc_valid_xpt_node(ndlp)) {
-			vport->phba->nport_event_cnt++;
-			lpfc_unregister_remote_port(ndlp);
-		}
-
-		if (ndlp->nlp_fc4_type & NLP_FC4_NVME) {
-			vport->phba->nport_event_cnt++;
-			if (vport->phba->nvmet_support == 0) {
-				/* Start devloss if target. */
-				if (ndlp->nlp_type & NLP_NVME_TARGET)
-					lpfc_nvme_unregister_port(vport, ndlp);
-			} else {
-				/* NVMET has no upcall. */
-				lpfc_nlp_put(ndlp);
-			}
-		}
+		/* For nodes marked for ADISC, Handle unreg in ADISC cmpl */
+		if (!(ndlp->nlp_flag & NLP_NPR_ADISC))
+			lpfc_nlp_unreg_node(vport, ndlp);
 	}
 
-	/* FCP and NVME Transport interfaces */
-
 	if (new_state ==  NLP_STE_MAPPED_NODE ||
-	    new_state == NLP_STE_UNMAPPED_NODE) {
-		if (lpfc_valid_xpt_node(ndlp)) {
-			vport->phba->nport_event_cnt++;
-			/*
-			 * Tell the fc transport about the port, if we haven't
-			 * already. If we have, and it's a scsi entity, be
-			 */
-			lpfc_register_remote_port(vport, ndlp);
-		}
-		/* Notify the NVME transport of this new rport. */
-		if (vport->phba->sli_rev >= LPFC_SLI_REV4 &&
-		    ndlp->nlp_fc4_type & NLP_FC4_NVME) {
-			if (vport->phba->nvmet_support == 0) {
-				/* Register this rport with the transport.
-				 * Only NVME Target Rports are registered with
-				 * the transport.
-				 */
-				if (ndlp->nlp_type & NLP_NVME_TARGET) {
-					vport->phba->nport_event_cnt++;
-					lpfc_nvme_register_port(vport, ndlp);
-				}
-			} else {
-				/* Just take an NDLP ref count since the
-				 * target does not register rports.
-				 */
-				lpfc_nlp_get(ndlp);
-			}
-		}
-	}
+	    new_state == NLP_STE_UNMAPPED_NODE)
+		lpfc_nlp_reg_node(vport, ndlp);
 
 	if ((new_state ==  NLP_STE_MAPPED_NODE) &&
 		(vport->stat_data_enabled)) {
diff --git a/drivers/scsi/lpfc/lpfc_hw.h b/drivers/scsi/lpfc/lpfc_hw.h
index 4a5a85ed42ec..634f8fff7425 100644
--- a/drivers/scsi/lpfc/lpfc_hw.h
+++ b/drivers/scsi/lpfc/lpfc_hw.h
@@ -1,7 +1,7 @@
 /*******************************************************************
  * This file is part of the Emulex Linux Device Driver for         *
  * Fibre Channel Host Bus Adapters.                                *
- * Copyright (C) 2017-2020 Broadcom. All Rights Reserved. The term *
+ * Copyright (C) 2017-2021 Broadcom. All Rights Reserved. The term *
  * “Broadcom” refers to Broadcom Inc. and/or its subsidiaries.     *
  * Copyright (C) 2004-2016 Emulex.  All rights reserved.           *
  * EMULEX and SLI are trademarks of Emulex.                        *
@@ -608,6 +608,7 @@ struct fc_vft_header {
 #define ELS_CMD_LIRR      0x7A000000
 #define ELS_CMD_LCB	  0x81000000
 #define ELS_CMD_FPIN	  0x16000000
+#define ELS_CMD_EDC	  0x17000000
 #define ELS_CMD_QFPA      0xB0000000
 #define ELS_CMD_UVEM      0xB1000000
 #else	/*  __LITTLE_ENDIAN_BITFIELD */
@@ -652,6 +653,7 @@ struct fc_vft_header {
 #define ELS_CMD_LIRR      0x7A
 #define ELS_CMD_LCB	  0x81
 #define ELS_CMD_FPIN	  ELS_FPIN
+#define ELS_CMD_EDC	  ELS_EDC
 #define ELS_CMD_QFPA      0xB0
 #define ELS_CMD_UVEM      0xB1
 #endif
@@ -1694,6 +1696,7 @@ struct lpfc_fdmi_reg_portattr {
 #define PCI_DEVICE_ID_LANCER_FCOE_VF 0xe268
 #define PCI_DEVICE_ID_LANCER_G6_FC  0xe300
 #define PCI_DEVICE_ID_LANCER_G7_FC  0xf400
+#define PCI_DEVICE_ID_LANCER_G7P_FC 0xf500
 #define PCI_DEVICE_ID_SAT_SMB       0xf011
 #define PCI_DEVICE_ID_SAT_MID       0xf015
 #define PCI_DEVICE_ID_RFLY          0xf095
diff --git a/drivers/scsi/lpfc/lpfc_hw4.h b/drivers/scsi/lpfc/lpfc_hw4.h
index eb8c735a243b..79a4872c2edb 100644
--- a/drivers/scsi/lpfc/lpfc_hw4.h
+++ b/drivers/scsi/lpfc/lpfc_hw4.h
@@ -20,6 +20,7 @@
  * included with this package.                                     *
  *******************************************************************/
 
+#include <uapi/scsi/fc/fc_fs.h>
 #include <uapi/scsi/fc/fc_els.h>
 
 /* Macros to deal with bit fields. Each bit field must have 3 #defines
@@ -94,6 +95,9 @@ struct lpfc_sli_intf {
 #define LPFC_SLI_INTF_FAMILY_BE3	0x1
 #define LPFC_SLI_INTF_FAMILY_LNCR_A0	0xa
 #define LPFC_SLI_INTF_FAMILY_LNCR_B0	0xb
+#define LPFC_SLI_INTF_FAMILY_G6		0xc
+#define LPFC_SLI_INTF_FAMILY_G7		0xd
+#define LPFC_SLI_INTF_FAMILY_G7P	0xe
 #define lpfc_sli_intf_slirev_SHIFT		4
 #define lpfc_sli_intf_slirev_MASK		0x0000000F
 #define lpfc_sli_intf_slirev_WORD		word0
@@ -393,6 +397,12 @@ struct lpfc_wcqe_complete {
 #define lpfc_wcqe_c_ersp0_MASK		0x0000FFFF
 #define lpfc_wcqe_c_ersp0_WORD		word0
 	uint32_t total_data_placed;
+#define lpfc_wcqe_c_cmf_cg_SHIFT	31
+#define lpfc_wcqe_c_cmf_cg_MASK		0x00000001
+#define lpfc_wcqe_c_cmf_cg_WORD		total_data_placed
+#define lpfc_wcqe_c_cmf_bw_SHIFT	0
+#define lpfc_wcqe_c_cmf_bw_MASK		0x0FFFFFFF
+#define lpfc_wcqe_c_cmf_bw_WORD		total_data_placed
 	uint32_t parameter;
 #define lpfc_wcqe_c_bg_edir_SHIFT	5
 #define lpfc_wcqe_c_bg_edir_MASK	0x00000001
@@ -687,6 +697,7 @@ struct lpfc_register {
 #define lpfc_sliport_eqdelay_id_MASK	0xfff
 #define lpfc_sliport_eqdelay_id_WORD	word0
 #define LPFC_SEC_TO_USEC		1000000
+#define LPFC_SEC_TO_MSEC		1000
 
 /* The following Registers apply to SLI4 if_type 0 UCNAs. They typically
  * reside in BAR 2.
@@ -959,6 +970,12 @@ union lpfc_sli4_cfg_shdr {
 #define lpfc_mbox_hdr_add_status_SHIFT		8
 #define lpfc_mbox_hdr_add_status_MASK		0x000000FF
 #define lpfc_mbox_hdr_add_status_WORD		word7
+#define LPFC_ADD_STATUS_INCOMPAT_OBJ		0xA2
+#define lpfc_mbox_hdr_add_status_2_SHIFT	16
+#define lpfc_mbox_hdr_add_status_2_MASK		0x000000FF
+#define lpfc_mbox_hdr_add_status_2_WORD		word7
+#define LPFC_ADD_STATUS_2_INCOMPAT_FLASH	0x01
+#define LPFC_ADD_STATUS_2_INCORRECT_ASIC	0x02
 		uint32_t response_length;
 		uint32_t actual_response_length;
 	} response;
@@ -1015,6 +1032,7 @@ struct mbox_header {
 #define LPFC_MBOX_OPCODE_SET_HOST_DATA			0x5D
 #define LPFC_MBOX_OPCODE_SEND_ACTIVATION		0x73
 #define LPFC_MBOX_OPCODE_RESET_LICENSES			0x74
+#define LPFC_MBOX_OPCODE_REG_CONGESTION_BUF		0x8E
 #define LPFC_MBOX_OPCODE_GET_RSRC_EXTENT_INFO		0x9A
 #define LPFC_MBOX_OPCODE_GET_ALLOC_RSRC_EXTENT		0x9B
 #define LPFC_MBOX_OPCODE_ALLOC_RSRC_EXTENT		0x9C
@@ -1123,6 +1141,12 @@ struct lpfc_mbx_sge {
 	uint32_t length;
 };
 
+struct lpfc_mbx_host_buf {
+	uint32_t length;
+	uint32_t pa_lo;
+	uint32_t pa_hi;
+};
+
 struct lpfc_mbx_nembed_cmd {
 	struct lpfc_sli4_cfg_mhdr cfg_mhdr;
 #define LPFC_SLI4_MBX_SGE_MAX_PAGES	19
@@ -1133,6 +1157,31 @@ struct lpfc_mbx_nembed_sge_virt {
 	void *addr[LPFC_SLI4_MBX_SGE_MAX_PAGES];
 };
 
+#define LPFC_MBX_OBJECT_NAME_LEN_DW	26
+struct lpfc_mbx_read_object {  /* Version 0 */
+	struct mbox_header header;
+	union {
+		struct {
+			uint32_t word0;
+#define lpfc_mbx_rd_object_rlen_SHIFT	0
+#define lpfc_mbx_rd_object_rlen_MASK	0x00FFFFFF
+#define lpfc_mbx_rd_object_rlen_WORD	word0
+			uint32_t rd_object_offset;
+			uint32_t rd_object_name[LPFC_MBX_OBJECT_NAME_LEN_DW];
+#define LPFC_OBJ_NAME_SZ 104   /* 26 x sizeof(uint32_t) is 104. */
+			uint32_t rd_object_cnt;
+			struct lpfc_mbx_host_buf rd_object_hbuf[4];
+		} request;
+		struct {
+			uint32_t rd_object_actual_rlen;
+			uint32_t word1;
+#define lpfc_mbx_rd_object_eof_SHIFT	31
+#define lpfc_mbx_rd_object_eof_MASK	0x1
+#define lpfc_mbx_rd_object_eof_WORD	word1
+		} response;
+	} u;
+};
+
 struct lpfc_mbx_eq_create {
 	struct mbox_header header;
 	union {
@@ -1555,7 +1604,7 @@ struct rq_context {
 #define lpfc_rq_context_hdr_size_WORD	word1
 	uint32_t word2;
 #define lpfc_rq_context_cq_id_SHIFT	16
-#define lpfc_rq_context_cq_id_MASK	0x000003FF
+#define lpfc_rq_context_cq_id_MASK	0x0000FFFF
 #define lpfc_rq_context_cq_id_WORD	word2
 #define lpfc_rq_context_buf_size_SHIFT	0
 #define lpfc_rq_context_buf_size_MASK	0x0000FFFF
@@ -2328,6 +2377,7 @@ struct lpfc_mbx_redisc_fcf_tbl {
 #define ADD_STATUS_OPERATION_ALREADY_ACTIVE		0x67
 #define ADD_STATUS_FW_NOT_SUPPORTED			0xEB
 #define ADD_STATUS_INVALID_REQUEST			0x4B
+#define ADD_STATUS_INVALID_OBJECT_NAME			0xA0
 #define ADD_STATUS_FW_DOWNLOAD_HW_DISABLED              0x58
 
 struct lpfc_mbx_sli4_config {
@@ -2803,6 +2853,12 @@ struct lpfc_mbx_read_config {
 #define lpfc_mbx_rd_conf_extnts_inuse_SHIFT	31
 #define lpfc_mbx_rd_conf_extnts_inuse_MASK	0x00000001
 #define lpfc_mbx_rd_conf_extnts_inuse_WORD	word1
+#define lpfc_mbx_rd_conf_wcs_SHIFT		28	/* warning signaling */
+#define lpfc_mbx_rd_conf_wcs_MASK		0x00000001
+#define lpfc_mbx_rd_conf_wcs_WORD		word1
+#define lpfc_mbx_rd_conf_acs_SHIFT		27	/* alarm signaling */
+#define lpfc_mbx_rd_conf_acs_MASK		0x00000001
+#define lpfc_mbx_rd_conf_acs_WORD		word1
 	uint32_t word2;
 #define lpfc_mbx_rd_conf_lnk_numb_SHIFT		0
 #define lpfc_mbx_rd_conf_lnk_numb_MASK		0x0000003F
@@ -3328,17 +3384,20 @@ struct lpfc_sli4_parameters {
 #define cfg_nosr_SHIFT				9
 #define cfg_nosr_MASK				0x00000001
 #define cfg_nosr_WORD				word19
-
 #define cfg_bv1s_SHIFT                          10
 #define cfg_bv1s_MASK                           0x00000001
 #define cfg_bv1s_WORD                           word19
-#define cfg_pvl_SHIFT				13
-#define cfg_pvl_MASK				0x00000001
-#define cfg_pvl_WORD				word19
 
 #define cfg_nsler_SHIFT                         12
 #define cfg_nsler_MASK                          0x00000001
 #define cfg_nsler_WORD                          word19
+#define cfg_pvl_SHIFT				13
+#define cfg_pvl_MASK				0x00000001
+#define cfg_pvl_WORD				word19
+
+#define cfg_pbde_SHIFT				20
+#define cfg_pbde_MASK				0x00000001
+#define cfg_pbde_WORD				word19
 
 	uint32_t word20;
 #define cfg_max_tow_xri_SHIFT			0
@@ -3346,12 +3405,13 @@ struct lpfc_sli4_parameters {
 #define cfg_max_tow_xri_WORD			word20
 
 	uint32_t word21;
-#define cfg_mib_bde_cnt_SHIFT			16
-#define cfg_mib_bde_cnt_MASK			0x000000ff
-#define cfg_mib_bde_cnt_WORD			word21
 #define cfg_mi_ver_SHIFT			0
 #define cfg_mi_ver_MASK				0x0000ffff
 #define cfg_mi_ver_WORD				word21
+#define cfg_cmf_SHIFT				24
+#define cfg_cmf_MASK				0x000000ff
+#define cfg_cmf_WORD				word21
+
 	uint32_t mib_size;
 	uint32_t word23;                        /* RESERVED */
 
@@ -3380,7 +3440,10 @@ struct lpfc_sli4_parameters {
 
 #define LPFC_SET_UE_RECOVERY		0x10
 #define LPFC_SET_MDS_DIAGS		0x12
+#define LPFC_SET_CGN_SIGNAL		0x1f
 #define LPFC_SET_DUAL_DUMP		0x1e
+#define LPFC_SET_ENABLE_MI		0x21
+#define LPFC_SET_ENABLE_CMF		0x24
 struct lpfc_mbx_set_feature {
 	struct mbox_header header;
 	uint32_t feature;
@@ -3395,6 +3458,9 @@ struct lpfc_mbx_set_feature {
 #define lpfc_mbx_set_feature_mds_deep_loopbk_SHIFT  1
 #define lpfc_mbx_set_feature_mds_deep_loopbk_MASK   0x00000001
 #define lpfc_mbx_set_feature_mds_deep_loopbk_WORD   word6
+#define lpfc_mbx_set_feature_CGN_warn_freq_SHIFT 0
+#define lpfc_mbx_set_feature_CGN_warn_freq_MASK  0x0000ffff
+#define lpfc_mbx_set_feature_CGN_warn_freq_WORD  word6
 #define lpfc_mbx_set_feature_dd_SHIFT		0
 #define lpfc_mbx_set_feature_dd_MASK		0x00000001
 #define lpfc_mbx_set_feature_dd_WORD		word6
@@ -3404,6 +3470,15 @@ struct lpfc_mbx_set_feature {
 #define LPFC_DISABLE_DUAL_DUMP		0
 #define LPFC_ENABLE_DUAL_DUMP		1
 #define LPFC_QUERY_OP_DUAL_DUMP		2
+#define lpfc_mbx_set_feature_cmf_SHIFT		0
+#define lpfc_mbx_set_feature_cmf_MASK		0x00000001
+#define lpfc_mbx_set_feature_cmf_WORD		word6
+#define lpfc_mbx_set_feature_mi_SHIFT		0
+#define lpfc_mbx_set_feature_mi_MASK		0x0000ffff
+#define lpfc_mbx_set_feature_mi_WORD		word6
+#define lpfc_mbx_set_feature_milunq_SHIFT	16
+#define lpfc_mbx_set_feature_milunq_MASK	0x0000ffff
+#define lpfc_mbx_set_feature_milunq_WORD	word6
 	uint32_t word7;
 #define lpfc_mbx_set_feature_UERP_SHIFT 0
 #define lpfc_mbx_set_feature_UERP_MASK  0x0000ffff
@@ -3411,16 +3486,51 @@ struct lpfc_mbx_set_feature {
 #define lpfc_mbx_set_feature_UESR_SHIFT 16
 #define lpfc_mbx_set_feature_UESR_MASK  0x0000ffff
 #define lpfc_mbx_set_feature_UESR_WORD  word7
+#define lpfc_mbx_set_feature_CGN_alarm_freq_SHIFT 0
+#define lpfc_mbx_set_feature_CGN_alarm_freq_MASK  0x0000ffff
+#define lpfc_mbx_set_feature_CGN_alarm_freq_WORD  word7
+	u32 word8;
+#define lpfc_mbx_set_feature_CGN_acqe_freq_SHIFT 0
+#define lpfc_mbx_set_feature_CGN_acqe_freq_MASK  0x000000ff
+#define lpfc_mbx_set_feature_CGN_acqe_freq_WORD  word8
 };
 
 
 #define LPFC_SET_HOST_OS_DRIVER_VERSION    0x2
+#define LPFC_SET_HOST_DATE_TIME		   0x4
+
+struct lpfc_mbx_set_host_date_time {
+	uint32_t word6;
+#define lpfc_mbx_set_host_month_WORD	word6
+#define lpfc_mbx_set_host_month_SHIFT	16
+#define lpfc_mbx_set_host_month_MASK	0xFF
+#define lpfc_mbx_set_host_day_WORD	word6
+#define lpfc_mbx_set_host_day_SHIFT	8
+#define lpfc_mbx_set_host_day_MASK	0xFF
+#define lpfc_mbx_set_host_year_WORD	word6
+#define lpfc_mbx_set_host_year_SHIFT	0
+#define lpfc_mbx_set_host_year_MASK	0xFF
+	uint32_t word7;
+#define lpfc_mbx_set_host_hour_WORD	word7
+#define lpfc_mbx_set_host_hour_SHIFT	16
+#define lpfc_mbx_set_host_hour_MASK	0xFF
+#define lpfc_mbx_set_host_min_WORD	word7
+#define lpfc_mbx_set_host_min_SHIFT	8
+#define lpfc_mbx_set_host_min_MASK	0xFF
+#define lpfc_mbx_set_host_sec_WORD	word7
+#define lpfc_mbx_set_host_sec_SHIFT     0
+#define lpfc_mbx_set_host_sec_MASK      0xFF
+};
+
 struct lpfc_mbx_set_host_data {
 #define LPFC_HOST_OS_DRIVER_VERSION_SIZE   48
 	struct mbox_header header;
 	uint32_t param_id;
 	uint32_t param_len;
-	uint8_t  data[LPFC_HOST_OS_DRIVER_VERSION_SIZE];
+	union {
+		uint8_t data[LPFC_HOST_OS_DRIVER_VERSION_SIZE];
+		struct  lpfc_mbx_set_host_date_time tm;
+	} un;
 };
 
 struct lpfc_mbx_set_trunk_mode {
@@ -3438,6 +3548,21 @@ struct lpfc_mbx_get_sli4_parameters {
 	struct lpfc_sli4_parameters sli4_parameters;
 };
 
+struct lpfc_mbx_reg_congestion_buf {
+	struct mbox_header header;
+	uint32_t word0;
+#define lpfc_mbx_reg_cgn_buf_type_WORD		word0
+#define lpfc_mbx_reg_cgn_buf_type_SHIFT		0
+#define lpfc_mbx_reg_cgn_buf_type_MASK		0xFF
+#define lpfc_mbx_reg_cgn_buf_cnt_WORD		word0
+#define lpfc_mbx_reg_cgn_buf_cnt_SHIFT		16
+#define lpfc_mbx_reg_cgn_buf_cnt_MASK		0xFF
+	uint32_t word1;
+	uint32_t length;
+	uint32_t addr_lo;
+	uint32_t addr_hi;
+};
+
 struct lpfc_rscr_desc_generic {
 #define LPFC_RSRC_DESC_WSIZE			22
 	uint32_t desc[LPFC_RSRC_DESC_WSIZE];
@@ -3603,6 +3728,9 @@ struct lpfc_controller_attribute {
 #define lpfc_cntl_attr_eprom_ver_hi_SHIFT	8
 #define lpfc_cntl_attr_eprom_ver_hi_MASK	0x000000ff
 #define lpfc_cntl_attr_eprom_ver_hi_WORD	word17
+#define lpfc_cntl_attr_flash_id_SHIFT		16
+#define lpfc_cntl_attr_flash_id_MASK		0x000000ff
+#define lpfc_cntl_attr_flash_id_WORD		word17
 	uint32_t mbx_da_struct_ver;
 	uint32_t ep_fw_da_struct_ver;
 	uint32_t ncsi_ver_str[3];
@@ -3744,6 +3872,7 @@ struct lpfc_mbx_get_port_name {
 #define MB_CEQ_STATUS_QUEUE_FLUSHING		0x4
 #define MB_CQE_STATUS_DMA_FAILED		0x5
 
+
 #define LPFC_MBX_WR_CONFIG_MAX_BDE		1
 struct lpfc_mbx_wr_object {
 	struct mbox_header header;
@@ -3760,7 +3889,7 @@ struct lpfc_mbx_wr_object {
 #define lpfc_wr_object_write_length_MASK	0x00FFFFFF
 #define lpfc_wr_object_write_length_WORD	word4
 			uint32_t write_offset;
-			uint32_t object_name[26];
+			uint32_t object_name[LPFC_MBX_OBJECT_NAME_LEN_DW];
 			uint32_t bde_count;
 			struct ulp_bde64 bde[LPFC_MBX_WR_CONFIG_MAX_BDE];
 		} request;
@@ -3809,6 +3938,7 @@ struct lpfc_mqe {
 		struct lpfc_mbx_unreg_fcfi unreg_fcfi;
 		struct lpfc_mbx_mq_create mq_create;
 		struct lpfc_mbx_mq_create_ext mq_create_ext;
+		struct lpfc_mbx_read_object read_object;
 		struct lpfc_mbx_eq_create eq_create;
 		struct lpfc_mbx_modify_eq_delay eq_delay;
 		struct lpfc_mbx_cq_create cq_create;
@@ -3834,6 +3964,7 @@ struct lpfc_mqe {
 		struct lpfc_mbx_query_fw_config query_fw_cfg;
 		struct lpfc_mbx_set_beacon_config beacon_config;
 		struct lpfc_mbx_get_sli4_parameters get_sli4_parameters;
+		struct lpfc_mbx_reg_congestion_buf reg_congestion_buf;
 		struct lpfc_mbx_set_link_diag_state link_diag_state;
 		struct lpfc_mbx_set_link_diag_loopback link_diag_loopback;
 		struct lpfc_mbx_run_link_diag_test link_diag_test;
@@ -3888,6 +4019,7 @@ struct lpfc_mcqe {
 #define LPFC_TRAILER_CODE_GRP5	0x5
 #define LPFC_TRAILER_CODE_FC	0x10
 #define LPFC_TRAILER_CODE_SLI	0x11
+#define LPFC_TRAILER_CODE_CMSTAT        0x13
 };
 
 struct lpfc_acqe_link {
@@ -4122,6 +4254,19 @@ struct lpfc_acqe_misconfigured_event {
 #define LPFC_SLI_EVENT_STATUS_UNCERTIFIED	0x05
 };
 
+struct lpfc_acqe_cgn_signal {
+	u32 word0;
+#define lpfc_warn_acqe_SHIFT		0
+#define lpfc_warn_acqe_MASK		0x7FFFFFFF
+#define lpfc_warn_acqe_WORD		word0
+#define lpfc_imm_acqe_SHIFT		31
+#define lpfc_imm_acqe_MASK		0x1
+#define lpfc_imm_acqe_WORD		word0
+	u32 alarm_cnt;
+	u32 word2;
+	u32 trailer;
+};
+
 struct lpfc_acqe_sli {
 	uint32_t event_data1;
 	uint32_t event_data2;
@@ -4134,8 +4279,10 @@ struct lpfc_acqe_sli {
 #define LPFC_SLI_EVENT_TYPE_DIAG_DUMP		0x5
 #define LPFC_SLI_EVENT_TYPE_MISCONFIGURED	0x9
 #define LPFC_SLI_EVENT_TYPE_REMOTE_DPORT	0xA
+#define LPFC_SLI_EVENT_TYPE_PORT_PARAMS_CHG	0xE
 #define LPFC_SLI_EVENT_TYPE_MISCONF_FAWWN	0xF
 #define LPFC_SLI_EVENT_TYPE_EEPROM_FAILURE	0x10
+#define LPFC_SLI_EVENT_TYPE_CGN_SIGNAL		0x11
 };
 
 /*
@@ -4543,6 +4690,69 @@ struct create_xri_wqe {
 #define T_REQUEST_TAG 3
 #define T_XRI_TAG 1
 
+struct cmf_sync_wqe {
+	uint32_t rsrvd[3];
+	uint32_t word3;
+#define	cmf_sync_interval_SHIFT	0
+#define	cmf_sync_interval_MASK	0x00000ffff
+#define	cmf_sync_interval_WORD	word3
+#define	cmf_sync_afpin_SHIFT	16
+#define	cmf_sync_afpin_MASK	0x000000001
+#define	cmf_sync_afpin_WORD	word3
+#define	cmf_sync_asig_SHIFT	17
+#define	cmf_sync_asig_MASK	0x000000001
+#define	cmf_sync_asig_WORD	word3
+#define	cmf_sync_op_SHIFT	20
+#define	cmf_sync_op_MASK	0x00000000f
+#define	cmf_sync_op_WORD	word3
+#define	cmf_sync_ver_SHIFT	24
+#define	cmf_sync_ver_MASK	0x0000000ff
+#define	cmf_sync_ver_WORD	word3
+#define LPFC_CMF_SYNC_VER	1
+	uint32_t event_tag;
+	uint32_t word5;
+#define	cmf_sync_wsigmax_SHIFT	0
+#define	cmf_sync_wsigmax_MASK	0x00000ffff
+#define	cmf_sync_wsigmax_WORD	word5
+#define	cmf_sync_wsigcnt_SHIFT	16
+#define	cmf_sync_wsigcnt_MASK	0x00000ffff
+#define	cmf_sync_wsigcnt_WORD	word5
+	uint32_t word6;
+	uint32_t word7;
+#define	cmf_sync_cmnd_SHIFT	8
+#define	cmf_sync_cmnd_MASK	0x0000000ff
+#define	cmf_sync_cmnd_WORD	word7
+	uint32_t word8;
+	uint32_t word9;
+#define	cmf_sync_reqtag_SHIFT	0
+#define	cmf_sync_reqtag_MASK	0x00000ffff
+#define	cmf_sync_reqtag_WORD	word9
+#define	cmf_sync_wfpinmax_SHIFT	16
+#define	cmf_sync_wfpinmax_MASK	0x0000000ff
+#define	cmf_sync_wfpinmax_WORD	word9
+#define	cmf_sync_wfpincnt_SHIFT	24
+#define	cmf_sync_wfpincnt_MASK	0x0000000ff
+#define	cmf_sync_wfpincnt_WORD	word9
+	uint32_t word10;
+#define cmf_sync_qosd_SHIFT	9
+#define cmf_sync_qosd_MASK	0x00000001
+#define cmf_sync_qosd_WORD	word10
+	uint32_t word11;
+#define cmf_sync_cmd_type_SHIFT	0
+#define cmf_sync_cmd_type_MASK	0x0000000f
+#define cmf_sync_cmd_type_WORD	word11
+#define cmf_sync_wqec_SHIFT	7
+#define cmf_sync_wqec_MASK	0x00000001
+#define cmf_sync_wqec_WORD	word11
+#define cmf_sync_cqid_SHIFT	16
+#define cmf_sync_cqid_MASK	0x0000ffff
+#define cmf_sync_cqid_WORD	word11
+	uint32_t read_bytes;
+	uint32_t word13;
+	uint32_t word14;
+	uint32_t word15;
+};
+
 struct abort_cmd_wqe {
 	uint32_t rsrvd[3];
 	uint32_t word3;
@@ -4672,6 +4882,7 @@ union lpfc_wqe {
 	struct fcp_iread64_wqe fcp_iread;
 	struct fcp_iwrite64_wqe fcp_iwrite;
 	struct abort_cmd_wqe abort_cmd;
+	struct cmf_sync_wqe cmf_sync;
 	struct create_xri_wqe create_xri;
 	struct xmit_bcast64_wqe xmit_bcast64;
 	struct xmit_seq64_wqe xmit_sequence;
@@ -4692,6 +4903,7 @@ union lpfc_wqe128 {
 	struct fcp_iread64_wqe fcp_iread;
 	struct fcp_iwrite64_wqe fcp_iwrite;
 	struct abort_cmd_wqe abort_cmd;
+	struct cmf_sync_wqe cmf_sync;
 	struct create_xri_wqe create_xri;
 	struct xmit_bcast64_wqe xmit_bcast64;
 	struct xmit_seq64_wqe xmit_sequence;
@@ -4707,6 +4919,7 @@ union lpfc_wqe128 {
 
 #define MAGIC_NUMBER_G6 0xFEAA0003
 #define MAGIC_NUMBER_G7 0xFEAA0005
+#define MAGIC_NUMBER_G7P 0xFEAA0020
 
 struct lpfc_grp_hdr {
 	uint32_t size;
@@ -4734,6 +4947,7 @@ struct lpfc_grp_hdr {
 #define FCP_COMMAND_TRSP	0x3
 #define FCP_COMMAND_TSEND	0x7
 #define OTHER_COMMAND		0x8
+#define CMF_SYNC_COMMAND	0xA
 #define ELS_COMMAND_NON_FIP	0xC
 #define ELS_COMMAND_FIP		0xD
 
@@ -4755,6 +4969,7 @@ struct lpfc_grp_hdr {
 #define CMD_FCP_TRECEIVE64_WQE  0xA1
 #define CMD_FCP_TRSP64_WQE      0xA3
 #define CMD_GEN_REQUEST64_WQE   0xC2
+#define CMD_CMF_SYNC_WQE	0xE8
 
 #define CMD_WQE_MASK            0xff
 
@@ -4762,3 +4977,43 @@ struct lpfc_grp_hdr {
 #define LPFC_FW_DUMP	1
 #define LPFC_FW_RESET	2
 #define LPFC_DV_RESET	3
+
+/* On some kernels, enum fc_ls_tlv_dtag does not have
+ * these 2 enums defined, on other kernels it does.
+ * To get aound this we need to add these 2 defines here.
+ */
+#ifndef ELS_DTAG_LNK_FAULT_CAP
+#define ELS_DTAG_LNK_FAULT_CAP        0x0001000D
+#endif
+#ifndef ELS_DTAG_CG_SIGNAL_CAP
+#define ELS_DTAG_CG_SIGNAL_CAP        0x0001000F
+#endif
+
+/*
+ * Initializer useful for decoding FPIN string table.
+ */
+#define FC_FPIN_CONGN_SEVERITY_INIT {				\
+	{ FPIN_CONGN_SEVERITY_WARNING,		"Warning" },	\
+	{ FPIN_CONGN_SEVERITY_ERROR,		"Alarm" },	\
+}
+
+/* EDC supports two descriptors.  When allocated, it is the
+ * size of this structure plus each supported descriptor.
+ */
+struct lpfc_els_edc_req {
+	struct fc_els_edc               edc;       /* hdr up to descriptors */
+	struct fc_diag_cg_sig_desc      cgn_desc;  /* 1st descriptor */
+};
+
+/* Minimum structure defines for the EDC response.
+ * Balance is in buffer.
+ */
+struct lpfc_els_edc_rsp {
+	struct fc_els_edc_resp          edc_rsp;   /* hdr up to descriptors */
+	struct fc_diag_cg_sig_desc      cgn_desc;  /* 1st descriptor */
+};
+
+/* Used for logging FPIN messages */
+#define LPFC_FPIN_WWPN_LINE_SZ  128
+#define LPFC_FPIN_WWPN_LINE_CNT 6
+#define LPFC_FPIN_WWPN_NUM_LINE 6
diff --git a/drivers/scsi/lpfc/lpfc_ids.h b/drivers/scsi/lpfc/lpfc_ids.h
index d48414e295a0..6a90e6e53d09 100644
--- a/drivers/scsi/lpfc/lpfc_ids.h
+++ b/drivers/scsi/lpfc/lpfc_ids.h
@@ -1,7 +1,7 @@
 /*******************************************************************
  * This file is part of the Emulex Linux Device Driver for         *
  * Fibre Channel Host Bus Adapters.                                *
- * Copyright (C) 2017-2018 Broadcom. All Rights Reserved. The term *
+ * Copyright (C) 2017-2021 Broadcom. All Rights Reserved. The term *
  * “Broadcom” refers to Broadcom Inc. and/or its subsidiaries.     *
  * Copyright (C) 2004-2016 Emulex.  All rights reserved.           *
  * EMULEX and SLI are trademarks of Emulex.                        *
@@ -118,6 +118,8 @@ const struct pci_device_id lpfc_id_table[] = {
 		PCI_ANY_ID, PCI_ANY_ID, },
 	{PCI_VENDOR_ID_EMULEX, PCI_DEVICE_ID_LANCER_G7_FC,
 		PCI_ANY_ID, PCI_ANY_ID, },
+	{PCI_VENDOR_ID_EMULEX, PCI_DEVICE_ID_LANCER_G7P_FC,
+		PCI_ANY_ID, PCI_ANY_ID, },
 	{PCI_VENDOR_ID_EMULEX, PCI_DEVICE_ID_SKYHAWK,
 		PCI_ANY_ID, PCI_ANY_ID, },
 	{PCI_VENDOR_ID_EMULEX, PCI_DEVICE_ID_SKYHAWK_VF,
diff --git a/drivers/scsi/lpfc/lpfc_init.c b/drivers/scsi/lpfc/lpfc_init.c
index e29523a1b530..0ec322f0e3cb 100644
--- a/drivers/scsi/lpfc/lpfc_init.c
+++ b/drivers/scsi/lpfc/lpfc_init.c
@@ -93,6 +93,7 @@ static uint32_t lpfc_sli4_enable_intr(struct lpfc_hba *, uint32_t);
 static void lpfc_sli4_oas_verify(struct lpfc_hba *phba);
 static uint16_t lpfc_find_cpu_handle(struct lpfc_hba *, uint16_t, int);
 static void lpfc_setup_bg(struct lpfc_hba *, struct Scsi_Host *);
+static int lpfc_sli4_cgn_parm_chg_evt(struct lpfc_hba *);
 
 static struct scsi_transport_template *lpfc_transport_template = NULL;
 static struct scsi_transport_template *lpfc_vport_transport_template = NULL;
@@ -1243,7 +1244,8 @@ lpfc_idle_stat_delay_work(struct work_struct *work)
 		return;
 
 	if (phba->link_state == LPFC_HBA_ERROR ||
-	    phba->pport->fc_flag & FC_OFFLINE_MODE)
+	    phba->pport->fc_flag & FC_OFFLINE_MODE ||
+	    phba->cmf_active_mode != LPFC_CFG_OFF)
 		goto requeue;
 
 	for_each_present_cpu(i) {
@@ -1852,6 +1854,7 @@ lpfc_sli4_port_sta_fn_reset(struct lpfc_hba *phba, int mbx_action,
 {
 	int rc;
 	uint32_t intr_mode;
+	LPFC_MBOXQ_t *mboxq;
 
 	if (bf_get(lpfc_sli_intf_if_type, &phba->sli4_hba.sli_intf) >=
 	    LPFC_SLI_INTF_IF_TYPE_2) {
@@ -1871,11 +1874,19 @@ lpfc_sli4_port_sta_fn_reset(struct lpfc_hba *phba, int mbx_action,
 				"Recovery...\n");
 
 	/* If we are no wait, the HBA has been reset and is not
-	 * functional, thus we should clear LPFC_SLI_ACTIVE flag.
+	 * functional, thus we should clear
+	 * (LPFC_SLI_ACTIVE | LPFC_SLI_MBOX_ACTIVE) flags.
 	 */
 	if (mbx_action == LPFC_MBX_NO_WAIT) {
 		spin_lock_irq(&phba->hbalock);
 		phba->sli.sli_flag &= ~LPFC_SLI_ACTIVE;
+		if (phba->sli.mbox_active) {
+			mboxq = phba->sli.mbox_active;
+			mboxq->u.mb.mbxStatus = MBX_NOT_FINISHED;
+			__lpfc_mbox_cmpl_put(phba, mboxq);
+			phba->sli.sli_flag &= ~LPFC_SLI_MBOX_ACTIVE;
+			phba->sli.mbox_active = NULL;
+		}
 		spin_unlock_irq(&phba->hbalock);
 	}
 
@@ -2590,6 +2601,9 @@ lpfc_get_hba_model_desc(struct lpfc_hba *phba, uint8_t *mdp, uint8_t *descp)
 	case PCI_DEVICE_ID_LANCER_G7_FC:
 		m = (typeof(m)){"LPe36000", "PCIe", "Fibre Channel Adapter"};
 		break;
+	case PCI_DEVICE_ID_LANCER_G7P_FC:
+		m = (typeof(m)){"LPe38000", "PCIe", "Fibre Channel Adapter"};
+		break;
 	case PCI_DEVICE_ID_SKYHAWK:
 	case PCI_DEVICE_ID_SKYHAWK_VF:
 		oneConnect = 1;
@@ -3008,6 +3022,123 @@ lpfc_sli4_stop_fcf_redisc_wait_timer(struct lpfc_hba *phba)
 }
 
 /**
+ * lpfc_cmf_stop - Stop CMF processing
+ * @phba: pointer to lpfc hba data structure.
+ *
+ * This is called when the link goes down or if CMF mode is turned OFF.
+ * It is also called when going offline or unloaded just before the
+ * congestion info buffer is unregistered.
+ **/
+void
+lpfc_cmf_stop(struct lpfc_hba *phba)
+{
+	int cpu;
+	struct lpfc_cgn_stat *cgs;
+
+	/* We only do something if CMF is enabled */
+	if (!phba->sli4_hba.pc_sli4_params.cmf)
+		return;
+
+	lpfc_printf_log(phba, KERN_INFO, LOG_CGN_MGMT,
+			"6221 Stop CMF / Cancel Timer\n");
+
+	/* Cancel the CMF timer */
+	hrtimer_cancel(&phba->cmf_timer);
+
+	/* Zero CMF counters */
+	atomic_set(&phba->cmf_busy, 0);
+	for_each_present_cpu(cpu) {
+		cgs = per_cpu_ptr(phba->cmf_stat, cpu);
+		atomic64_set(&cgs->total_bytes, 0);
+		atomic64_set(&cgs->rcv_bytes, 0);
+		atomic_set(&cgs->rx_io_cnt, 0);
+		atomic64_set(&cgs->rx_latency, 0);
+	}
+	atomic_set(&phba->cmf_bw_wait, 0);
+
+	/* Resume any blocked IO - Queue unblock on workqueue */
+	queue_work(phba->wq, &phba->unblock_request_work);
+}
+
+static inline uint64_t
+lpfc_get_max_line_rate(struct lpfc_hba *phba)
+{
+	uint64_t rate = lpfc_sli_port_speed_get(phba);
+
+	return ((((unsigned long)rate) * 1024 * 1024) / 10);
+}
+
+void
+lpfc_cmf_signal_init(struct lpfc_hba *phba)
+{
+	lpfc_printf_log(phba, KERN_INFO, LOG_CGN_MGMT,
+			"6223 Signal CMF init\n");
+
+	/* Use the new fc_linkspeed to recalculate */
+	phba->cmf_interval_rate = LPFC_CMF_INTERVAL;
+	phba->cmf_max_line_rate = lpfc_get_max_line_rate(phba);
+	phba->cmf_link_byte_count = div_u64(phba->cmf_max_line_rate *
+					    phba->cmf_interval_rate, 1000);
+	phba->cmf_max_bytes_per_interval = phba->cmf_link_byte_count;
+
+	/* This is a signal to firmware to sync up CMF BW with link speed */
+	lpfc_issue_cmf_sync_wqe(phba, 0, 0);
+}
+
+/**
+ * lpfc_cmf_start - Start CMF processing
+ * @phba: pointer to lpfc hba data structure.
+ *
+ * This is called when the link comes up or if CMF mode is turned OFF
+ * to Monitor or Managed.
+ **/
+void
+lpfc_cmf_start(struct lpfc_hba *phba)
+{
+	struct lpfc_cgn_stat *cgs;
+	int cpu;
+
+	/* We only do something if CMF is enabled */
+	if (!phba->sli4_hba.pc_sli4_params.cmf ||
+	    phba->cmf_active_mode == LPFC_CFG_OFF)
+		return;
+
+	/* Reinitialize congestion buffer info */
+	lpfc_init_congestion_buf(phba);
+
+	atomic_set(&phba->cgn_fabric_warn_cnt, 0);
+	atomic_set(&phba->cgn_fabric_alarm_cnt, 0);
+	atomic_set(&phba->cgn_sync_alarm_cnt, 0);
+	atomic_set(&phba->cgn_sync_warn_cnt, 0);
+
+	atomic_set(&phba->cmf_busy, 0);
+	for_each_present_cpu(cpu) {
+		cgs = per_cpu_ptr(phba->cmf_stat, cpu);
+		atomic64_set(&cgs->total_bytes, 0);
+		atomic64_set(&cgs->rcv_bytes, 0);
+		atomic_set(&cgs->rx_io_cnt, 0);
+		atomic64_set(&cgs->rx_latency, 0);
+	}
+	phba->cmf_latency.tv_sec = 0;
+	phba->cmf_latency.tv_nsec = 0;
+
+	lpfc_cmf_signal_init(phba);
+
+	lpfc_printf_log(phba, KERN_INFO, LOG_CGN_MGMT,
+			"6222 Start CMF / Timer\n");
+
+	phba->cmf_timer_cnt = 0;
+	hrtimer_start(&phba->cmf_timer,
+		      ktime_set(0, LPFC_CMF_INTERVAL * 1000000),
+		      HRTIMER_MODE_REL);
+	/* Setup for latency check in IO cmpl routines */
+	ktime_get_real_ts64(&phba->cmf_latency);
+
+	atomic_set(&phba->cmf_bw_wait, 0);
+	atomic_set(&phba->cmf_stop_io, 0);
+}
+
+/**
  * lpfc_stop_hba_timers - Stop all the timers associated with an HBA
  * @phba: pointer to lpfc hba data structure.
  *
@@ -3541,6 +3672,8 @@ lpfc_offline_prep(struct lpfc_hba *phba, int mbx_action)
 				spin_lock_irq(&ndlp->lock);
 				ndlp->nlp_flag &= ~NLP_NPR_ADISC;
 				spin_unlock_irq(&ndlp->lock);
+
+				lpfc_unreg_rpi(vports[i], ndlp);
 				/*
 				 * Whenever an SLI4 port goes offline, free the
 				 * RPI. Get a new RPI when the adapter port
@@ -3556,7 +3689,6 @@ lpfc_offline_prep(struct lpfc_hba *phba, int mbx_action)
 					lpfc_sli4_free_rpi(phba, ndlp->nlp_rpi);
 					ndlp->nlp_rpi = LPFC_RPI_ALLOC_ERROR;
 				}
-				lpfc_unreg_rpi(vports[i], ndlp);
 
 				if (ndlp->nlp_type & NLP_FABRIC) {
 					lpfc_disc_state_machine(vports[i], ndlp,
@@ -4666,6 +4798,8 @@ static void lpfc_host_supported_speeds_set(struct Scsi_Host *shost)
 	if (phba->hba_flag & HBA_FCOE_MODE)
 		return;
 
+	if (phba->lmt & LMT_256Gb)
+		fc_host_supported_speeds(shost) |= FC_PORTSPEED_256GBIT;
 	if (phba->lmt & LMT_128Gb)
 		fc_host_supported_speeds(shost) |= FC_PORTSPEED_128GBIT;
 	if (phba->lmt & LMT_64Gb)
@@ -4845,7 +4979,7 @@ lpfc_sli4_fcf_redisc_wait_tmo(struct timer_list *t)
 
 /**
  * lpfc_vmid_poll - VMID timeout detection
- * @ptr: Map to lpfc_hba data structure pointer.
+ * @t: Timer context used to obtain the pointer to lpfc hba data structure.
  *
  * This routine is invoked when there is no I/O on by a VM for the specified
  * amount of time. When this situation is detected, the VMID has to be
@@ -5074,6 +5208,9 @@ lpfc_sli4_port_speed_parse(struct lpfc_hba *phba, uint32_t evt_code,
 		case LPFC_FC_LA_SPEED_128G:
 			port_speed = 128000;
 			break;
+		case LPFC_FC_LA_SPEED_256G:
+			port_speed = 256000;
+			break;
 		default:
 			port_speed = 0;
 		}
@@ -5267,6 +5404,645 @@ lpfc_async_link_speed_to_read_top(struct lpfc_hba *phba, uint8_t speed_code)
 	return port_speed;
 }
 
+void
+lpfc_cgn_dump_rxmonitor(struct lpfc_hba *phba)
+{
+	struct rxtable_entry *entry;
+	int cnt = 0, head, tail, last, start;
+
+	head = atomic_read(&phba->rxtable_idx_head);
+	tail = atomic_read(&phba->rxtable_idx_tail);
+	if (!phba->rxtable || head == tail) {
+		lpfc_printf_log(phba, KERN_ERR, LOG_CGN_MGMT,
+				"4411 Rxtable is empty\n");
+		return;
+	}
+	last = tail;
+	start = head;
+
+	/* Display the last LPFC_MAX_RXMONITOR_DUMP entries from the rxtable */
+	while (start != last) {
+		if (start)
+			start--;
+		else
+			start = LPFC_MAX_RXMONITOR_ENTRY - 1;
+		entry = &phba->rxtable[start];
+		lpfc_printf_log(phba, KERN_INFO, LOG_CGN_MGMT,
+				"4410 %02d: MBPI %lld Xmit %lld Cmpl %lld "
+				"Lat %lld ASz %lld Info %02d BWUtil %d "
+				"Int %d slot %d\n",
+				cnt, entry->max_bytes_per_interval,
+				entry->total_bytes, entry->rcv_bytes,
+				entry->avg_io_latency, entry->avg_io_size,
+				entry->cmf_info, entry->timer_utilization,
+				entry->timer_interval, start);
+		cnt++;
+		if (cnt >= LPFC_MAX_RXMONITOR_DUMP)
+			return;
+	}
+}
+
+/**
+ * lpfc_cgn_update_stat - Save data into congestion stats buffer
+ * @phba: pointer to lpfc hba data structure.
+ * @dtag: FPIN descriptor received
+ *
+ * Increment the FPIN received counter/time when it happens.
+ */
+void
+lpfc_cgn_update_stat(struct lpfc_hba *phba, uint32_t dtag)
+{
+	struct lpfc_cgn_info *cp;
+	struct tm broken;
+	struct timespec64 cur_time;
+	u32 cnt;
+	u16 value;
+
+	/* Make sure we have a congestion info buffer */
+	if (!phba->cgn_i)
+		return;
+	cp = (struct lpfc_cgn_info *)phba->cgn_i->virt;
+	ktime_get_real_ts64(&cur_time);
+	time64_to_tm(cur_time.tv_sec, 0, &broken);
+
+	/* Update congestion statistics */
+	switch (dtag) {
+	case ELS_DTAG_LNK_INTEGRITY:
+		cnt = le32_to_cpu(cp->link_integ_notification);
+		cnt++;
+		cp->link_integ_notification = cpu_to_le32(cnt);
+
+		cp->cgn_stat_lnk_month = broken.tm_mon + 1;
+		cp->cgn_stat_lnk_day = broken.tm_mday;
+		cp->cgn_stat_lnk_year = broken.tm_year - 100;
+		cp->cgn_stat_lnk_hour = broken.tm_hour;
+		cp->cgn_stat_lnk_min = broken.tm_min;
+		cp->cgn_stat_lnk_sec = broken.tm_sec;
+		break;
+	case ELS_DTAG_DELIVERY:
+		cnt = le32_to_cpu(cp->delivery_notification);
+		cnt++;
+		cp->delivery_notification = cpu_to_le32(cnt);
+
+		cp->cgn_stat_del_month = broken.tm_mon + 1;
+		cp->cgn_stat_del_day = broken.tm_mday;
+		cp->cgn_stat_del_year = broken.tm_year - 100;
+		cp->cgn_stat_del_hour = broken.tm_hour;
+		cp->cgn_stat_del_min = broken.tm_min;
+		cp->cgn_stat_del_sec = broken.tm_sec;
+		break;
+	case ELS_DTAG_PEER_CONGEST:
+		cnt = le32_to_cpu(cp->cgn_peer_notification);
+		cnt++;
+		cp->cgn_peer_notification = cpu_to_le32(cnt);
+
+		cp->cgn_stat_peer_month = broken.tm_mon + 1;
+		cp->cgn_stat_peer_day = broken.tm_mday;
+		cp->cgn_stat_peer_year = broken.tm_year - 100;
+		cp->cgn_stat_peer_hour = broken.tm_hour;
+		cp->cgn_stat_peer_min = broken.tm_min;
+		cp->cgn_stat_peer_sec = broken.tm_sec;
+		break;
+	case ELS_DTAG_CONGESTION:
+		cnt = le32_to_cpu(cp->cgn_notification);
+		cnt++;
+		cp->cgn_notification = cpu_to_le32(cnt);
+
+		cp->cgn_stat_cgn_month = broken.tm_mon + 1;
+		cp->cgn_stat_cgn_day = broken.tm_mday;
+		cp->cgn_stat_cgn_year = broken.tm_year - 100;
+		cp->cgn_stat_cgn_hour = broken.tm_hour;
+		cp->cgn_stat_cgn_min = broken.tm_min;
+		cp->cgn_stat_cgn_sec = broken.tm_sec;
+	}
+	if (phba->cgn_fpin_frequency &&
+	    phba->cgn_fpin_frequency != LPFC_FPIN_INIT_FREQ) {
+		value = LPFC_CGN_TIMER_TO_MIN / phba->cgn_fpin_frequency;
+		cp->cgn_stat_npm = cpu_to_le32(value);
+	}
+	value = lpfc_cgn_calc_crc32(cp, LPFC_CGN_INFO_SZ,
+				    LPFC_CGN_CRC32_SEED);
+	cp->cgn_info_crc = cpu_to_le32(value);
+}
+
+/**
+ * lpfc_cgn_save_evt_cnt - Save data into registered congestion buffer
+ * @phba: pointer to lpfc hba data structure.
+ *
+ * Save the congestion event data every minute.
+ * On the hour collapse all the minute data into hour data. Every day
+ * collapse all the hour data into daily data. Separate driver
+ * and fabrc congestion event counters that will be saved out
+ * to the registered congestion buffer every minute.
+ */
+static void
+lpfc_cgn_save_evt_cnt(struct lpfc_hba *phba)
+{
+	struct lpfc_cgn_info *cp;
+	struct tm broken;
+	struct timespec64 cur_time;
+	uint32_t i, index;
+	uint16_t value, mvalue;
+	uint64_t bps;
+	uint32_t mbps;
+	uint32_t dvalue, wvalue, lvalue, avalue;
+	uint64_t latsum;
+	uint16_t *ptr;
+	uint32_t *lptr;
+	uint16_t *mptr;
+
+	/* Make sure we have a congestion info buffer */
+	if (!phba->cgn_i)
+		return;
+	cp = (struct lpfc_cgn_info *)phba->cgn_i->virt;
+
+	if (time_before(jiffies, phba->cgn_evt_timestamp))
+		return;
+	phba->cgn_evt_timestamp = jiffies +
+			msecs_to_jiffies(LPFC_CGN_TIMER_TO_MIN);
+	phba->cgn_evt_minute++;
+
+	/* We should get to this point in the routine on 1 minute intervals */
+
+	ktime_get_real_ts64(&cur_time);
+	time64_to_tm(cur_time.tv_sec, 0, &broken);
+
+	if (phba->cgn_fpin_frequency &&
+	    phba->cgn_fpin_frequency != LPFC_FPIN_INIT_FREQ) {
+		value = LPFC_CGN_TIMER_TO_MIN / phba->cgn_fpin_frequency;
+		cp->cgn_stat_npm = cpu_to_le32(value);
+	}
+
+	/* Read and clear the latency counters for this minute */
+	lvalue = atomic_read(&phba->cgn_latency_evt_cnt);
+	latsum = atomic64_read(&phba->cgn_latency_evt);
+	atomic_set(&phba->cgn_latency_evt_cnt, 0);
+	atomic64_set(&phba->cgn_latency_evt, 0);
+
+	/* We need to store MB/sec bandwidth in the congestion information.
+	 * block_cnt is count of 512 byte blocks for the entire minute,
+	 * bps will get bytes per sec before finally converting to MB/sec.
+	 */
+	bps = div_u64(phba->rx_block_cnt, LPFC_SEC_MIN) * 512;
+	phba->rx_block_cnt = 0;
+	mvalue = bps / (1024 * 1024); /* convert to MB/sec */
+
+	/* Every minute */
+	/* cgn parameters */
+	cp->cgn_info_mode = phba->cgn_p.cgn_param_mode;
+	cp->cgn_info_level0 = phba->cgn_p.cgn_param_level0;
+	cp->cgn_info_level1 = phba->cgn_p.cgn_param_level1;
+	cp->cgn_info_level2 = phba->cgn_p.cgn_param_level2;
+
+	/* Fill in default LUN qdepth */
+	value = (uint16_t)(phba->pport->cfg_lun_queue_depth);
+	cp->cgn_lunq = cpu_to_le16(value);
+
+	/* Record congestion buffer info - every minute
+	 * cgn_driver_evt_cnt (Driver events)
+	 * cgn_fabric_warn_cnt (Congestion Warnings)
+	 * cgn_latency_evt_cnt / cgn_latency_evt (IO Latency)
+	 * cgn_fabric_alarm_cnt (Congestion Alarms)
+	 */
+	index = ++cp->cgn_index_minute;
+	if (cp->cgn_index_minute == LPFC_MIN_HOUR) {
+		cp->cgn_index_minute = 0;
+		index = 0;
+	}
+
+	/* Get the number of driver events in this sample and reset counter */
+	dvalue = atomic_read(&phba->cgn_driver_evt_cnt);
+	atomic_set(&phba->cgn_driver_evt_cnt, 0);
+
+	/* Get the number of warning events - FPIN and Signal for this minute */
+	wvalue = 0;
+	if ((phba->cgn_reg_fpin & LPFC_CGN_FPIN_WARN) ||
+	    phba->cgn_reg_signal == EDC_CG_SIG_WARN_ONLY ||
+	    phba->cgn_reg_signal == EDC_CG_SIG_WARN_ALARM)
+		wvalue = atomic_read(&phba->cgn_fabric_warn_cnt);
+	atomic_set(&phba->cgn_fabric_warn_cnt, 0);
+
+	/* Get the number of alarm events - FPIN and Signal for this minute */
+	avalue = 0;
+	if ((phba->cgn_reg_fpin & LPFC_CGN_FPIN_ALARM) ||
+	    phba->cgn_reg_signal == EDC_CG_SIG_WARN_ALARM)
+		avalue = atomic_read(&phba->cgn_fabric_alarm_cnt);
+	atomic_set(&phba->cgn_fabric_alarm_cnt, 0);
+
+	/* Collect the driver, warning, alarm and latency counts for this
+	 * minute into the driver congestion buffer.
+	 */
+	ptr = &cp->cgn_drvr_min[index];
+	value = (uint16_t)dvalue;
+	*ptr = cpu_to_le16(value);
+
+	ptr = &cp->cgn_warn_min[index];
+	value = (uint16_t)wvalue;
+	*ptr = cpu_to_le16(value);
+
+	ptr = &cp->cgn_alarm_min[index];
+	value = (uint16_t)avalue;
+	*ptr = cpu_to_le16(value);
+
+	lptr = &cp->cgn_latency_min[index];
+	if (lvalue) {
+		lvalue = (uint32_t)div_u64(latsum, lvalue);
+		*lptr = cpu_to_le32(lvalue);
+	} else {
+		*lptr = 0;
+	}
+
+	/* Collect the bandwidth value into the driver's congesion buffer. */
+	mptr = &cp->cgn_bw_min[index];
+	*mptr = cpu_to_le16(mvalue);
+
+	lpfc_printf_log(phba, KERN_INFO, LOG_CGN_MGMT,
+			"2418 Congestion Info - minute (%d): %d %d %d %d %d\n",
+			index, dvalue, wvalue, *lptr, mvalue, avalue);
+
+	/* Every hour */
+	if ((phba->cgn_evt_minute % LPFC_MIN_HOUR) == 0) {
+		/* Record congestion buffer info - every hour
+		 * Collapse all minutes into an hour
+		 */
+		index = ++cp->cgn_index_hour;
+		if (cp->cgn_index_hour == LPFC_HOUR_DAY) {
+			cp->cgn_index_hour = 0;
+			index = 0;
+		}
+
+		dvalue = 0;
+		wvalue = 0;
+		lvalue = 0;
+		avalue = 0;
+		mvalue = 0;
+		mbps = 0;
+		for (i = 0; i < LPFC_MIN_HOUR; i++) {
+			dvalue += le16_to_cpu(cp->cgn_drvr_min[i]);
+			wvalue += le16_to_cpu(cp->cgn_warn_min[i]);
+			lvalue += le32_to_cpu(cp->cgn_latency_min[i]);
+			mbps += le16_to_cpu(cp->cgn_bw_min[i]);
+			avalue += le16_to_cpu(cp->cgn_alarm_min[i]);
+		}
+		if (lvalue)		/* Avg of latency averages */
+			lvalue /= LPFC_MIN_HOUR;
+		if (mbps)		/* Avg of Bandwidth averages */
+			mvalue = mbps / LPFC_MIN_HOUR;
+
+		lptr = &cp->cgn_drvr_hr[index];
+		*lptr = cpu_to_le32(dvalue);
+		lptr = &cp->cgn_warn_hr[index];
+		*lptr = cpu_to_le32(wvalue);
+		lptr = &cp->cgn_latency_hr[index];
+		*lptr = cpu_to_le32(lvalue);
+		mptr = &cp->cgn_bw_hr[index];
+		*mptr = cpu_to_le16(mvalue);
+		lptr = &cp->cgn_alarm_hr[index];
+		*lptr = cpu_to_le32(avalue);
+
+		lpfc_printf_log(phba, KERN_INFO, LOG_CGN_MGMT,
+				"2419 Congestion Info - hour "
+				"(%d): %d %d %d %d %d\n",
+				index, dvalue, wvalue, lvalue, mvalue, avalue);
+	}
+
+	/* Every day */
+	if ((phba->cgn_evt_minute % LPFC_MIN_DAY) == 0) {
+		/* Record congestion buffer info - every hour
+		 * Collapse all hours into a day. Rotate days
+		 * after LPFC_MAX_CGN_DAYS.
+		 */
+		index = ++cp->cgn_index_day;
+		if (cp->cgn_index_day == LPFC_MAX_CGN_DAYS) {
+			cp->cgn_index_day = 0;
+			index = 0;
+		}
+
+		/* Anytime we overwrite daily index 0, after we wrap,
+		 * we will be overwriting the oldest day, so we must
+		 * update the congestion data start time for that day.
+		 * That start time should have previously been saved after
+		 * we wrote the last days worth of data.
+		 */
+		if ((phba->hba_flag & HBA_CGN_DAY_WRAP) && index == 0) {
+			time64_to_tm(phba->cgn_daily_ts.tv_sec, 0, &broken);
+
+			cp->cgn_info_month = broken.tm_mon + 1;
+			cp->cgn_info_day = broken.tm_mday;
+			cp->cgn_info_year = broken.tm_year - 100;
+			cp->cgn_info_hour = broken.tm_hour;
+			cp->cgn_info_minute = broken.tm_min;
+			cp->cgn_info_second = broken.tm_sec;
+
+			lpfc_printf_log
+				(phba, KERN_INFO, LOG_CGN_MGMT,
+				"2646 CGNInfo idx0 Start Time: "
+				"%d/%d/%d %d:%d:%d\n",
+				cp->cgn_info_day, cp->cgn_info_month,
+				cp->cgn_info_year, cp->cgn_info_hour,
+				cp->cgn_info_minute, cp->cgn_info_second);
+		}
+
+		dvalue = 0;
+		wvalue = 0;
+		lvalue = 0;
+		mvalue = 0;
+		mbps = 0;
+		avalue = 0;
+		for (i = 0; i < LPFC_HOUR_DAY; i++) {
+			dvalue += le32_to_cpu(cp->cgn_drvr_hr[i]);
+			wvalue += le32_to_cpu(cp->cgn_warn_hr[i]);
+			lvalue += le32_to_cpu(cp->cgn_latency_hr[i]);
+			mbps += le32_to_cpu(cp->cgn_bw_hr[i]);
+			avalue += le32_to_cpu(cp->cgn_alarm_hr[i]);
+		}
+		if (lvalue)		/* Avg of latency averages */
+			lvalue /= LPFC_HOUR_DAY;
+		if (mbps)		/* Avg of Bandwidth averages */
+			mvalue = mbps / LPFC_HOUR_DAY;
+
+		lptr = &cp->cgn_drvr_day[index];
+		*lptr = cpu_to_le32(dvalue);
+		lptr = &cp->cgn_warn_day[index];
+		*lptr = cpu_to_le32(wvalue);
+		lptr = &cp->cgn_latency_day[index];
+		*lptr = cpu_to_le32(lvalue);
+		mptr = &cp->cgn_bw_day[index];
+		*mptr = cpu_to_le16(mvalue);
+		lptr = &cp->cgn_alarm_day[index];
+		*lptr = cpu_to_le32(avalue);
+
+		lpfc_printf_log(phba, KERN_INFO, LOG_CGN_MGMT,
+				"2420 Congestion Info - daily (%d): "
+				"%d %d %d %d %d\n",
+				index, dvalue, wvalue, lvalue, mvalue, avalue);
+
+		/* We just wrote LPFC_MAX_CGN_DAYS of data,
+		 * so we are wrapped on any data after this.
+		 * Save this as the start time for the next day.
+		 */
+		if (index == (LPFC_MAX_CGN_DAYS - 1)) {
+			phba->hba_flag |= HBA_CGN_DAY_WRAP;
+			ktime_get_real_ts64(&phba->cgn_daily_ts);
+		}
+	}
+
+	/* Use the frequency found in the last rcv'ed FPIN */
+	value = phba->cgn_fpin_frequency;
+	if (phba->cgn_reg_fpin & LPFC_CGN_FPIN_WARN)
+		cp->cgn_warn_freq = cpu_to_le16(value);
+	if (phba->cgn_reg_fpin & LPFC_CGN_FPIN_ALARM)
+		cp->cgn_alarm_freq = cpu_to_le16(value);
+
+	/* Frequency (in ms) Signal Warning/Signal Congestion Notifications
+	 * are received by the HBA
+	 */
+	value = phba->cgn_sig_freq;
+
+	if (phba->cgn_reg_signal == EDC_CG_SIG_WARN_ONLY ||
+	    phba->cgn_reg_signal == EDC_CG_SIG_WARN_ALARM)
+		cp->cgn_warn_freq = cpu_to_le16(value);
+	if (phba->cgn_reg_signal == EDC_CG_SIG_WARN_ALARM)
+		cp->cgn_alarm_freq = cpu_to_le16(value);
+
+	lvalue = lpfc_cgn_calc_crc32(cp, LPFC_CGN_INFO_SZ,
+				     LPFC_CGN_CRC32_SEED);
+	cp->cgn_info_crc = cpu_to_le32(lvalue);
+}
+
+/**
+ * lpfc_calc_cmf_latency - latency from start of rxate timer interval
+ * @phba: The Hba for which this call is being executed.
+ *
+ * The routine calculates the latency from the beginning of the CMF timer
+ * interval to the current point in time. It is called from IO completion
+ * when we exceed our Bandwidth limitation for the time interval.
+ */
+uint32_t
+lpfc_calc_cmf_latency(struct lpfc_hba *phba)
+{
+	struct timespec64 cmpl_time;
+	uint32_t msec = 0;
+
+	ktime_get_real_ts64(&cmpl_time);
+
+	/* This routine works on a ms granularity so sec and usec are
+	 * converted accordingly.
+	 */
+	if (cmpl_time.tv_sec == phba->cmf_latency.tv_sec) {
+		msec = (cmpl_time.tv_nsec - phba->cmf_latency.tv_nsec) /
+			NSEC_PER_MSEC;
+	} else {
+		if (cmpl_time.tv_nsec >= phba->cmf_latency.tv_nsec) {
+			msec = (cmpl_time.tv_sec -
+				phba->cmf_latency.tv_sec) * MSEC_PER_SEC;
+			msec += ((cmpl_time.tv_nsec -
+				  phba->cmf_latency.tv_nsec) / NSEC_PER_MSEC);
+		} else {
+			msec = (cmpl_time.tv_sec - phba->cmf_latency.tv_sec -
+				1) * MSEC_PER_SEC;
+			msec += (((NSEC_PER_SEC - phba->cmf_latency.tv_nsec) +
+				 cmpl_time.tv_nsec) / NSEC_PER_MSEC);
+		}
+	}
+	return msec;
+}
+
+/**
+ * lpfc_cmf_timer -  This is the timer function for one congestion
+ * rate interval.
+ * @timer: Pointer to the high resolution timer that expired
+ */
+static enum hrtimer_restart
+lpfc_cmf_timer(struct hrtimer *timer)
+{
+	struct lpfc_hba *phba = container_of(timer, struct lpfc_hba,
+					     cmf_timer);
+	struct rxtable_entry *entry;
+	uint32_t io_cnt;
+	uint32_t head, tail;
+	uint32_t busy, max_read;
+	uint64_t total, rcv, lat, mbpi;
+	int timer_interval = LPFC_CMF_INTERVAL;
+	uint32_t ms;
+	struct lpfc_cgn_stat *cgs;
+	int cpu;
+
+	/* Only restart the timer if congestion mgmt is on */
+	if (phba->cmf_active_mode == LPFC_CFG_OFF ||
+	    !phba->cmf_latency.tv_sec) {
+		lpfc_printf_log(phba, KERN_INFO, LOG_CGN_MGMT,
+				"6224 CMF timer exit: %d %lld\n",
+				phba->cmf_active_mode,
+				(uint64_t)phba->cmf_latency.tv_sec);
+		return HRTIMER_NORESTART;
+	}
+
+	/* If pport is not ready yet, just exit and wait for
+	 * the next timer cycle to hit.
+	 */
+	if (!phba->pport)
+		goto skip;
+
+	/* Do not block SCSI IO while in the timer routine since
+	 * total_bytes will be cleared
+	 */
+	atomic_set(&phba->cmf_stop_io, 1);
+
+	/* First we need to calculate the actual ms between
+	 * the last timer interrupt and this one. We ask for
+	 * LPFC_CMF_INTERVAL, however the actual time may
+	 * vary depending on system overhead.
+	 */
+	ms = lpfc_calc_cmf_latency(phba);
+
+
+	/* Immediately after we calculate the time since the last
+	 * timer interrupt, set the start time for the next
+	 * interrupt
+	 */
+	ktime_get_real_ts64(&phba->cmf_latency);
+
+	phba->cmf_link_byte_count =
+		div_u64(phba->cmf_max_line_rate * LPFC_CMF_INTERVAL, 1000);
+
+	/* Collect all the stats from the prior timer interval */
+	total = 0;
+	io_cnt = 0;
+	lat = 0;
+	rcv = 0;
+	for_each_present_cpu(cpu) {
+		cgs = per_cpu_ptr(phba->cmf_stat, cpu);
+		total += atomic64_xchg(&cgs->total_bytes, 0);
+		io_cnt += atomic_xchg(&cgs->rx_io_cnt, 0);
+		lat += atomic64_xchg(&cgs->rx_latency, 0);
+		rcv += atomic64_xchg(&cgs->rcv_bytes, 0);
+	}
+
+	/* Before we issue another CMF_SYNC_WQE, retrieve the BW
+	 * returned from the last CMF_SYNC_WQE issued, from
+	 * cmf_last_sync_bw. This will be the target BW for
+	 * this next timer interval.
+	 */
+	if (phba->cmf_active_mode == LPFC_CFG_MANAGED &&
+	    phba->link_state != LPFC_LINK_DOWN &&
+	    phba->hba_flag & HBA_SETUP) {
+		mbpi = phba->cmf_last_sync_bw;
+		phba->cmf_last_sync_bw = 0;
+		lpfc_issue_cmf_sync_wqe(phba, LPFC_CMF_INTERVAL, total);
+	} else {
+		/* For Monitor mode or link down we want mbpi
+		 * to be the full link speed
+		 */
+		mbpi = phba->cmf_link_byte_count;
+	}
+	phba->cmf_timer_cnt++;
+
+	if (io_cnt) {
+		/* Update congestion info buffer latency in us */
+		atomic_add(io_cnt, &phba->cgn_latency_evt_cnt);
+		atomic64_add(lat, &phba->cgn_latency_evt);
+	}
+	busy = atomic_xchg(&phba->cmf_busy, 0);
+	max_read = atomic_xchg(&phba->rx_max_read_cnt, 0);
+
+	/* Calculate MBPI for the next timer interval */
+	if (mbpi) {
+		if (mbpi > phba->cmf_link_byte_count ||
+		    phba->cmf_active_mode == LPFC_CFG_MONITOR)
+			mbpi = phba->cmf_link_byte_count;
+
+		/* Change max_bytes_per_interval to what the prior
+		 * CMF_SYNC_WQE cmpl indicated.
+		 */
+		if (mbpi != phba->cmf_max_bytes_per_interval)
+			phba->cmf_max_bytes_per_interval = mbpi;
+	}
+
+	/* Save rxmonitor information for debug */
+	if (phba->rxtable) {
+		head = atomic_xchg(&phba->rxtable_idx_head,
+				   LPFC_RXMONITOR_TABLE_IN_USE);
+		entry = &phba->rxtable[head];
+		entry->total_bytes = total;
+		entry->rcv_bytes = rcv;
+		entry->cmf_busy = busy;
+		entry->cmf_info = phba->cmf_active_info;
+		if (io_cnt) {
+			entry->avg_io_latency = div_u64(lat, io_cnt);
+			entry->avg_io_size = div_u64(rcv, io_cnt);
+		} else {
+			entry->avg_io_latency = 0;
+			entry->avg_io_size = 0;
+		}
+		entry->max_read_cnt = max_read;
+		entry->io_cnt = io_cnt;
+		entry->max_bytes_per_interval = mbpi;
+		if (phba->cmf_active_mode == LPFC_CFG_MANAGED)
+			entry->timer_utilization = phba->cmf_last_ts;
+		else
+			entry->timer_utilization = ms;
+		entry->timer_interval = ms;
+		phba->cmf_last_ts = 0;
+
+		/* Increment rxtable index */
+		head = (head + 1) % LPFC_MAX_RXMONITOR_ENTRY;
+		tail = atomic_read(&phba->rxtable_idx_tail);
+		if (head == tail) {
+			tail = (tail + 1) % LPFC_MAX_RXMONITOR_ENTRY;
+			atomic_set(&phba->rxtable_idx_tail, tail);
+		}
+		atomic_set(&phba->rxtable_idx_head, head);
+	}
+
+	if (phba->cmf_active_mode == LPFC_CFG_MONITOR) {
+		/* If Monitor mode, check if we are oversubscribed
+		 * against the full line rate.
+		 */
+		if (mbpi && total > mbpi)
+			atomic_inc(&phba->cgn_driver_evt_cnt);
+	}
+	phba->rx_block_cnt += div_u64(rcv, 512);  /* save 512 byte block cnt */
+
+	/* Each minute save Fabric and Driver congestion information */
+	lpfc_cgn_save_evt_cnt(phba);
+
+	/* Since we need to call lpfc_cgn_save_evt_cnt every minute, on the
+	 * minute, adjust our next timer interval, if needed, to ensure a
+	 * 1 minute granularity when we get the next timer interrupt.
+	 */
+	if (time_after(jiffies + msecs_to_jiffies(LPFC_CMF_INTERVAL),
+		       phba->cgn_evt_timestamp)) {
+		timer_interval = jiffies_to_msecs(phba->cgn_evt_timestamp -
+						  jiffies);
+		if (timer_interval <= 0)
+			timer_interval = LPFC_CMF_INTERVAL;
+
+		/* If we adjust timer_interval, max_bytes_per_interval
+		 * needs to be adjusted as well.
+		 */
+		phba->cmf_link_byte_count = div_u64(phba->cmf_max_line_rate *
+						    timer_interval, 1000);
+		if (phba->cmf_active_mode == LPFC_CFG_MONITOR)
+			phba->cmf_max_bytes_per_interval =
+				phba->cmf_link_byte_count;
+	}
+
+	/* Since total_bytes has already been zero'ed, its okay to unblock
+	 * after max_bytes_per_interval is setup.
+	 */
+	if (atomic_xchg(&phba->cmf_bw_wait, 0))
+		queue_work(phba->wq, &phba->unblock_request_work);
+
+	/* SCSI IO is now unblocked */
+	atomic_set(&phba->cmf_stop_io, 0);
+
+skip:
+	hrtimer_forward_now(timer,
+			    ktime_set(0, timer_interval * NSEC_PER_MSEC));
+	return HRTIMER_RESTART;
+}
+
 #define trunk_link_status(__idx)\
 	bf_get(lpfc_acqe_fc_la_trunk_config_port##__idx, acqe_fc) ?\
 	       ((phba->trunk_link.link##__idx.state == LPFC_LINK_UP) ?\
@@ -5329,6 +6105,9 @@ lpfc_update_trunk_link_status(struct lpfc_hba *phba,
 			trunk_link_status(0), trunk_link_status(1),
 			trunk_link_status(2), trunk_link_status(3));
 
+	if (phba->cmf_active_mode != LPFC_CFG_OFF)
+		lpfc_cmf_signal_init(phba);
+
 	if (port_fault)
 		lpfc_printf_log(phba, KERN_ERR, LOG_TRACE_EVENT,
 				"3202 trunk error:0x%x (%s) seen on port0:%s "
@@ -5510,9 +6289,10 @@ lpfc_sli4_async_sli_evt(struct lpfc_hba *phba, struct lpfc_acqe_sli *acqe_sli)
 	uint8_t operational = 0;
 	struct temp_event temp_event_data;
 	struct lpfc_acqe_misconfigured_event *misconfigured;
+	struct lpfc_acqe_cgn_signal *cgn_signal;
 	struct Scsi_Host  *shost;
 	struct lpfc_vport **vports;
-	int rc, i;
+	int rc, i, cnt;
 
 	evt_type = bf_get(lpfc_trailer_type, acqe_sli);
 
@@ -5668,6 +6448,10 @@ lpfc_sli4_async_sli_evt(struct lpfc_hba *phba, struct lpfc_acqe_sli *acqe_sli)
 				"Event Data1:x%08x Event Data2: x%08x\n",
 				acqe_sli->event_data1, acqe_sli->event_data2);
 		break;
+	case LPFC_SLI_EVENT_TYPE_PORT_PARAMS_CHG:
+		/* Call FW to obtain active parms */
+		lpfc_sli4_cgn_parm_chg_evt(phba);
+		break;
 	case LPFC_SLI_EVENT_TYPE_MISCONF_FAWWN:
 		/* Misconfigured WWN. Reports that the SLI Port is configured
 		 * to use FA-WWN, but the attached device doesn’t support it.
@@ -5685,6 +6469,40 @@ lpfc_sli4_async_sli_evt(struct lpfc_hba *phba, struct lpfc_acqe_sli *acqe_sli)
 			     "Event Data1: x%08x Event Data2: x%08x\n",
 			     acqe_sli->event_data1, acqe_sli->event_data2);
 		break;
+	case LPFC_SLI_EVENT_TYPE_CGN_SIGNAL:
+		if (phba->cmf_active_mode == LPFC_CFG_OFF)
+			break;
+		cgn_signal = (struct lpfc_acqe_cgn_signal *)
+					&acqe_sli->event_data1;
+		phba->cgn_acqe_cnt++;
+
+		cnt = bf_get(lpfc_warn_acqe, cgn_signal);
+		atomic64_add(cnt, &phba->cgn_acqe_stat.warn);
+		atomic64_add(cgn_signal->alarm_cnt, &phba->cgn_acqe_stat.alarm);
+
+		/* no threshold for CMF, even 1 signal will trigger an event */
+
+		/* Alarm overrides warning, so check that first */
+		if (cgn_signal->alarm_cnt) {
+			if (phba->cgn_reg_signal == EDC_CG_SIG_WARN_ALARM) {
+				/* Keep track of alarm cnt for cgn_info */
+				atomic_add(cgn_signal->alarm_cnt,
+					   &phba->cgn_fabric_alarm_cnt);
+				/* Keep track of alarm cnt for CMF_SYNC_WQE */
+				atomic_add(cgn_signal->alarm_cnt,
+					   &phba->cgn_sync_alarm_cnt);
+			}
+		} else if (cnt) {
+			/* signal action needs to be taken */
+			if (phba->cgn_reg_signal == EDC_CG_SIG_WARN_ONLY ||
+			    phba->cgn_reg_signal == EDC_CG_SIG_WARN_ALARM) {
+				/* Keep track of warning cnt for cgn_info */
+				atomic_add(cnt, &phba->cgn_fabric_warn_cnt);
+				/* Keep track of warning cnt for CMF_SYNC_WQE */
+				atomic_add(cnt, &phba->cgn_sync_warn_cnt);
+			}
+		}
+		break;
 	default:
 		lpfc_printf_log(phba, KERN_INFO, LOG_SLI,
 				"3193 Unrecognized SLI event, type: 0x%x",
@@ -6060,6 +6878,276 @@ lpfc_sli4_async_grp5_evt(struct lpfc_hba *phba,
 }
 
 /**
+ * lpfc_sli4_async_cmstat_evt - Process the asynchronous cmstat event
+ * @phba: pointer to lpfc hba data structure.
+ *
+ * This routine is to handle the SLI4 asynchronous cmstat event. A cmstat event
+ * is an asynchronous notification of a request to reset CM stats.
+ **/
+static void
+lpfc_sli4_async_cmstat_evt(struct lpfc_hba *phba)
+{
+	if (!phba->cgn_i)
+		return;
+	lpfc_init_congestion_stat(phba);
+}
+
+/**
+ * lpfc_cgn_params_val - Validate FW congestion parameters.
+ * @phba: pointer to lpfc hba data structure.
+ * @p_cfg_param: pointer to FW provided congestion parameters.
+ *
+ * This routine validates the congestion parameters passed
+ * by the FW to the driver via an ACQE event.
+ **/
+static void
+lpfc_cgn_params_val(struct lpfc_hba *phba, struct lpfc_cgn_param *p_cfg_param)
+{
+	spin_lock_irq(&phba->hbalock);
+
+	if (!lpfc_rangecheck(p_cfg_param->cgn_param_mode, LPFC_CFG_OFF,
+			     LPFC_CFG_MONITOR)) {
+		lpfc_printf_log(phba, KERN_ERR, LOG_CGN_MGMT,
+				"6225 CMF mode param out of range: %d\n",
+				 p_cfg_param->cgn_param_mode);
+		p_cfg_param->cgn_param_mode = LPFC_CFG_OFF;
+	}
+
+	spin_unlock_irq(&phba->hbalock);
+}
+
+/**
+ * lpfc_cgn_params_parse - Process a FW cong parm change event
+ * @phba: pointer to lpfc hba data structure.
+ * @p_cgn_param: pointer to a data buffer with the FW cong params.
+ * @len: the size of pdata in bytes.
+ *
+ * This routine validates the congestion management buffer signature
+ * from the FW, validates the contents and makes corrections for
+ * valid, in-range values.  If the signature magic is correct and
+ * after parameter validation, the contents are copied to the driver's
+ * @phba structure. If the magic is incorrect, an error message is
+ * logged.
+ **/
+static void
+lpfc_cgn_params_parse(struct lpfc_hba *phba,
+		      struct lpfc_cgn_param *p_cgn_param, uint32_t len)
+{
+	struct lpfc_cgn_info *cp;
+	uint32_t crc, oldmode;
+
+	/* Make sure the FW has encoded the correct magic number to
+	 * validate the congestion parameter in FW memory.
+	 */
+	if (p_cgn_param->cgn_param_magic == LPFC_CFG_PARAM_MAGIC_NUM) {
+		lpfc_printf_log(phba, KERN_INFO, LOG_CGN_MGMT | LOG_INIT,
+				"4668 FW cgn parm buffer data: "
+				"magic 0x%x version %d mode %d "
+				"level0 %d level1 %d "
+				"level2 %d byte13 %d "
+				"byte14 %d byte15 %d "
+				"byte11 %d byte12 %d activeMode %d\n",
+				p_cgn_param->cgn_param_magic,
+				p_cgn_param->cgn_param_version,
+				p_cgn_param->cgn_param_mode,
+				p_cgn_param->cgn_param_level0,
+				p_cgn_param->cgn_param_level1,
+				p_cgn_param->cgn_param_level2,
+				p_cgn_param->byte13,
+				p_cgn_param->byte14,
+				p_cgn_param->byte15,
+				p_cgn_param->byte11,
+				p_cgn_param->byte12,
+				phba->cmf_active_mode);
+
+		oldmode = phba->cmf_active_mode;
+
+		/* Any parameters out of range are corrected to defaults
+		 * by this routine.  No need to fail.
+		 */
+		lpfc_cgn_params_val(phba, p_cgn_param);
+
+		/* Parameters are verified, move them into driver storage */
+		spin_lock_irq(&phba->hbalock);
+		memcpy(&phba->cgn_p, p_cgn_param,
+		       sizeof(struct lpfc_cgn_param));
+
+		/* Update parameters in congestion info buffer now */
+		if (phba->cgn_i) {
+			cp = (struct lpfc_cgn_info *)phba->cgn_i->virt;
+			cp->cgn_info_mode = phba->cgn_p.cgn_param_mode;
+			cp->cgn_info_level0 = phba->cgn_p.cgn_param_level0;
+			cp->cgn_info_level1 = phba->cgn_p.cgn_param_level1;
+			cp->cgn_info_level2 = phba->cgn_p.cgn_param_level2;
+			crc = lpfc_cgn_calc_crc32(cp, LPFC_CGN_INFO_SZ,
+						  LPFC_CGN_CRC32_SEED);
+			cp->cgn_info_crc = cpu_to_le32(crc);
+		}
+		spin_unlock_irq(&phba->hbalock);
+
+		phba->cmf_active_mode = phba->cgn_p.cgn_param_mode;
+
+		switch (oldmode) {
+		case LPFC_CFG_OFF:
+			if (phba->cgn_p.cgn_param_mode != LPFC_CFG_OFF) {
+				/* Turning CMF on */
+				lpfc_cmf_start(phba);
+
+				if (phba->link_state >= LPFC_LINK_UP) {
+					phba->cgn_reg_fpin =
+						phba->cgn_init_reg_fpin;
+					phba->cgn_reg_signal =
+						phba->cgn_init_reg_signal;
+					lpfc_issue_els_edc(phba->pport, 0);
+				}
+			}
+			break;
+		case LPFC_CFG_MANAGED:
+			switch (phba->cgn_p.cgn_param_mode) {
+			case LPFC_CFG_OFF:
+				/* Turning CMF off */
+				lpfc_cmf_stop(phba);
+				if (phba->link_state >= LPFC_LINK_UP)
+					lpfc_issue_els_edc(phba->pport, 0);
+				break;
+			case LPFC_CFG_MONITOR:
+				lpfc_printf_log(phba, KERN_INFO, LOG_CGN_MGMT,
+						"4661 Switch from MANAGED to "
+						"`MONITOR mode\n");
+				phba->cmf_max_bytes_per_interval =
+					phba->cmf_link_byte_count;
+
+				/* Resume blocked IO - unblock on workqueue */
+				queue_work(phba->wq,
+					   &phba->unblock_request_work);
+				break;
+			}
+			break;
+		case LPFC_CFG_MONITOR:
+			switch (phba->cgn_p.cgn_param_mode) {
+			case LPFC_CFG_OFF:
+				/* Turning CMF off */
+				lpfc_cmf_stop(phba);
+				if (phba->link_state >= LPFC_LINK_UP)
+					lpfc_issue_els_edc(phba->pport, 0);
+				break;
+			case LPFC_CFG_MANAGED:
+				lpfc_printf_log(phba, KERN_INFO, LOG_CGN_MGMT,
+						"4662 Switch from MONITOR to "
+						"MANAGED mode\n");
+				lpfc_cmf_signal_init(phba);
+				break;
+			}
+			break;
+		}
+	} else {
+		lpfc_printf_log(phba, KERN_ERR, LOG_CGN_MGMT | LOG_INIT,
+				"4669 FW cgn parm buf wrong magic 0x%x "
+				"version %d\n", p_cgn_param->cgn_param_magic,
+				p_cgn_param->cgn_param_version);
+	}
+}
+
+/**
+ * lpfc_sli4_cgn_params_read - Read and Validate FW congestion parameters.
+ * @phba: pointer to lpfc hba data structure.
+ *
+ * This routine issues a read_object mailbox command to
+ * get the congestion management parameters from the FW
+ * parses it and updates the driver maintained values.
+ *
+ * Returns
+ *  0     if the object was empty
+ *  -Eval if an error was encountered
+ *  Count if bytes were read from object
+ **/
+int
+lpfc_sli4_cgn_params_read(struct lpfc_hba *phba)
+{
+	int ret = 0;
+	struct lpfc_cgn_param *p_cgn_param = NULL;
+	u32 *pdata = NULL;
+	u32 len = 0;
+
+	/* Find out if the FW has a new set of congestion parameters. */
+	len = sizeof(struct lpfc_cgn_param);
+	pdata = kzalloc(len, GFP_KERNEL);
+	ret = lpfc_read_object(phba, (char *)LPFC_PORT_CFG_NAME,
+			       pdata, len);
+
+	/* 0 means no data.  A negative means error.  A positive means
+	 * bytes were copied.
+	 */
+	if (!ret) {
+		lpfc_printf_log(phba, KERN_ERR, LOG_CGN_MGMT | LOG_INIT,
+				"4670 CGN RD OBJ returns no data\n");
+		goto rd_obj_err;
+	} else if (ret < 0) {
+		/* Some error.  Just exit and return it to the caller.*/
+		goto rd_obj_err;
+	}
+
+	lpfc_printf_log(phba, KERN_INFO, LOG_CGN_MGMT | LOG_INIT,
+			"6234 READ CGN PARAMS Successful %d\n", len);
+
+	/* Parse data pointer over len and update the phba congestion
+	 * parameters with values passed back.  The receive rate values
+	 * may have been altered in FW, but take no action here.
+	 */
+	p_cgn_param = (struct lpfc_cgn_param *)pdata;
+	lpfc_cgn_params_parse(phba, p_cgn_param, len);
+
+ rd_obj_err:
+	kfree(pdata);
+	return ret;
+}
+
+/**
+ * lpfc_sli4_cgn_parm_chg_evt - Process a FW congestion param change event
+ * @phba: pointer to lpfc hba data structure.
+ *
+ * The FW generated Async ACQE SLI event calls this routine when
+ * the event type is an SLI Internal Port Event and the Event Code
+ * indicates a change to the FW maintained congestion parameters.
+ *
+ * This routine executes a Read_Object mailbox call to obtain the
+ * current congestion parameters maintained in FW and corrects
+ * the driver's active congestion parameters.
+ *
+ * The acqe event is not passed because there is no further data
+ * required.
+ *
+ * Returns nonzero error if event processing encountered an error.
+ * Zero otherwise for success.
+ **/
+static int
+lpfc_sli4_cgn_parm_chg_evt(struct lpfc_hba *phba)
+{
+	int ret = 0;
+
+	if (!phba->sli4_hba.pc_sli4_params.cmf) {
+		lpfc_printf_log(phba, KERN_ERR, LOG_CGN_MGMT | LOG_INIT,
+				"4664 Cgn Evt when E2E off. Drop event\n");
+		return -EACCES;
+	}
+
+	/* If the event is claiming an empty object, it's ok.  A write
+	 * could have cleared it.  Only error is a negative return
+	 * status.
+	 */
+	ret = lpfc_sli4_cgn_params_read(phba);
+	if (ret < 0) {
+		lpfc_printf_log(phba, KERN_ERR, LOG_CGN_MGMT | LOG_INIT,
+				"4667 Error reading Cgn Params (%d)\n",
+				ret);
+	} else if (!ret) {
+		lpfc_printf_log(phba, KERN_ERR, LOG_CGN_MGMT | LOG_INIT,
+				"4673 CGN Event empty object.\n");
+	}
+	return ret;
+}
+
+/**
  * lpfc_sli4_async_event_proc - Process all the pending asynchronous event
  * @phba: pointer to lpfc hba data structure.
  *
@@ -6107,6 +7195,9 @@ void lpfc_sli4_async_event_proc(struct lpfc_hba *phba)
 		case LPFC_TRAILER_CODE_SLI:
 			lpfc_sli4_async_sli_evt(phba, &cq_event->cqe.acqe_sli);
 			break;
+		case LPFC_TRAILER_CODE_CMSTAT:
+			lpfc_sli4_async_cmstat_evt(phba);
+			break;
 		default:
 			lpfc_printf_log(phba, KERN_ERR,
 					LOG_TRACE_EVENT,
@@ -6391,6 +7482,15 @@ lpfc_sli_probe_sriov_nr_virtfn(struct lpfc_hba *phba, int nr_vfn)
 	return rc;
 }
 
+static void
+lpfc_unblock_requests_work(struct work_struct *work)
+{
+	struct lpfc_hba *phba = container_of(work, struct lpfc_hba,
+					     unblock_request_work);
+
+	lpfc_unblock_requests(phba);
+}
+
 /**
  * lpfc_setup_driver_resource_phase1 - Phase1 etup driver internal resources.
  * @phba: pointer to lpfc hba data structure.
@@ -6466,7 +7566,7 @@ lpfc_setup_driver_resource_phase1(struct lpfc_hba *phba)
 
 	INIT_DELAYED_WORK(&phba->idle_stat_delay_work,
 			  lpfc_idle_stat_delay_work);
-
+	INIT_WORK(&phba->unblock_request_work, lpfc_unblock_requests_work);
 	return 0;
 }
 
@@ -6697,6 +7797,10 @@ lpfc_sli4_driver_resource_setup(struct lpfc_hba *phba)
 	/* FCF rediscover timer */
 	timer_setup(&phba->fcf.redisc_wait, lpfc_sli4_fcf_redisc_wait_tmo, 0);
 
+	/* CMF congestion timer */
+	hrtimer_init(&phba->cmf_timer, CLOCK_MONOTONIC, HRTIMER_MODE_REL);
+	phba->cmf_timer.function = lpfc_cmf_timer;
+
 	/*
 	 * Control structure for handling external multi-buffer mailbox
 	 * command pass-through.
@@ -7145,6 +8249,14 @@ lpfc_sli4_driver_resource_setup(struct lpfc_hba *phba)
 	}
 #endif
 
+	phba->cmf_stat = alloc_percpu(struct lpfc_cgn_stat);
+	if (!phba->cmf_stat) {
+		lpfc_printf_log(phba, KERN_ERR, LOG_TRACE_EVENT,
+				"3331 Failed allocating per cpu cgn stats\n");
+		rc = -ENOMEM;
+		goto out_free_hba_hdwq_info;
+	}
+
 	/*
 	 * Enable sr-iov virtual functions if supported and configured
 	 * through the module parameter.
@@ -7164,6 +8276,8 @@ lpfc_sli4_driver_resource_setup(struct lpfc_hba *phba)
 
 	return 0;
 
+out_free_hba_hdwq_info:
+	free_percpu(phba->sli4_hba.c_stat);
 #ifdef CONFIG_SCSI_LPFC_DEBUG_FS
 out_free_hba_idle_stat:
 	kfree(phba->sli4_hba.idle_stat);
@@ -7211,6 +8325,7 @@ lpfc_sli4_driver_resource_unset(struct lpfc_hba *phba)
 #ifdef CONFIG_SCSI_LPFC_DEBUG_FS
 	free_percpu(phba->sli4_hba.c_stat);
 #endif
+	free_percpu(phba->cmf_stat);
 	kfree(phba->sli4_hba.idle_stat);
 
 	/* Free memory allocated for msi-x interrupt vector to CPU mapping */
@@ -8537,9 +9652,12 @@ lpfc_map_topology(struct lpfc_hba *phba, struct lpfc_mbx_read_config *rd_config)
 	}
 	/* FW supports persistent topology - override module parameter value */
 	phba->hba_flag |= HBA_PERSISTENT_TOPO;
-	switch (phba->pcidev->device) {
-	case PCI_DEVICE_ID_LANCER_G7_FC:
-	case PCI_DEVICE_ID_LANCER_G6_FC:
+
+	/* if ASIC_GEN_NUM >= 0xC) */
+	if ((bf_get(lpfc_sli_intf_if_type, &phba->sli4_hba.sli_intf) ==
+		    LPFC_SLI_INTF_IF_TYPE_6) ||
+	    (bf_get(lpfc_sli_intf_sli_family, &phba->sli4_hba.sli_intf) ==
+		    LPFC_SLI_INTF_FAMILY_G6)) {
 		if (!tf) {
 			phba->cfg_topology = ((pt == LINK_FLAGS_LOOP)
 					? FLAGS_TOPOLOGY_MODE_LOOP
@@ -8547,8 +9665,7 @@ lpfc_map_topology(struct lpfc_hba *phba, struct lpfc_mbx_read_config *rd_config)
 		} else {
 			phba->hba_flag &= ~HBA_PERSISTENT_TOPO;
 		}
-		break;
-	default:	/* G5 */
+	} else { /* G5 */
 		if (tf) {
 			/* If topology failover set - pt is '0' or '1' */
 			phba->cfg_topology = (pt ? FLAGS_TOPOLOGY_MODE_PT_LOOP :
@@ -8558,7 +9675,6 @@ lpfc_map_topology(struct lpfc_hba *phba, struct lpfc_mbx_read_config *rd_config)
 					? FLAGS_TOPOLOGY_MODE_PT_PT
 					: FLAGS_TOPOLOGY_MODE_LOOP);
 		}
-		break;
 	}
 	if (phba->hba_flag & HBA_PERSISTENT_TOPO) {
 		lpfc_printf_log(phba, KERN_INFO, LOG_SLI,
@@ -8683,6 +9799,52 @@ lpfc_sli4_read_config(struct lpfc_hba *phba)
 		phba->max_vpi = (phba->sli4_hba.max_cfg_param.max_vpi > 0) ?
 				(phba->sli4_hba.max_cfg_param.max_vpi - 1) : 0;
 		phba->max_vports = phba->max_vpi;
+
+		/* Next decide on FPIN or Signal E2E CGN support
+		 * For congestion alarms and warnings valid combination are:
+		 * 1. FPIN alarms / FPIN warnings
+		 * 2. Signal alarms / Signal warnings
+		 * 3. FPIN alarms / Signal warnings
+		 * 4. Signal alarms / FPIN warnings
+		 *
+		 * Initialize the adapter frequency to 100 mSecs
+		 */
+		phba->cgn_reg_fpin = LPFC_CGN_FPIN_BOTH;
+		phba->cgn_reg_signal = EDC_CG_SIG_NOTSUPPORTED;
+		phba->cgn_sig_freq = lpfc_fabric_cgn_frequency;
+
+		if (lpfc_use_cgn_signal) {
+			if (bf_get(lpfc_mbx_rd_conf_wcs, rd_config)) {
+				phba->cgn_reg_signal = EDC_CG_SIG_WARN_ONLY;
+				phba->cgn_reg_fpin &= ~LPFC_CGN_FPIN_WARN;
+			}
+			if (bf_get(lpfc_mbx_rd_conf_acs, rd_config)) {
+				/* MUST support both alarm and warning
+				 * because EDC does not support alarm alone.
+				 */
+				if (phba->cgn_reg_signal !=
+				    EDC_CG_SIG_WARN_ONLY) {
+					/* Must support both or none */
+					phba->cgn_reg_fpin = LPFC_CGN_FPIN_BOTH;
+					phba->cgn_reg_signal =
+						EDC_CG_SIG_NOTSUPPORTED;
+				} else {
+					phba->cgn_reg_signal =
+						EDC_CG_SIG_WARN_ALARM;
+					phba->cgn_reg_fpin =
+						LPFC_CGN_FPIN_NONE;
+				}
+			}
+		}
+
+		/* Set the congestion initial signal and fpin values. */
+		phba->cgn_init_reg_fpin = phba->cgn_reg_fpin;
+		phba->cgn_init_reg_signal = phba->cgn_reg_signal;
+
+		lpfc_printf_log(phba, KERN_INFO, LOG_CGN_MGMT,
+				"6446 READ_CONFIG reg_sig x%x reg_fpin:x%x\n",
+				phba->cgn_reg_signal, phba->cgn_reg_fpin);
+
 		lpfc_map_topology(phba, rd_config);
 		lpfc_printf_log(phba, KERN_INFO, LOG_SLI,
 				"2003 cfg params Extents? %d "
@@ -12063,6 +13225,8 @@ lpfc_sli4_hba_unset(struct lpfc_hba *phba)
 	struct pci_dev *pdev = phba->pcidev;
 
 	lpfc_stop_hba_timers(phba);
+	hrtimer_cancel(&phba->cmf_timer);
+
 	if (phba->pport)
 		phba->sli4_hba.intr_enable = 0;
 
@@ -12133,6 +13297,240 @@ lpfc_sli4_hba_unset(struct lpfc_hba *phba)
 		phba->pport->work_port_events = 0;
 }
 
+static uint32_t
+lpfc_cgn_crc32(uint32_t crc, u8 byte)
+{
+	uint32_t msb = 0;
+	uint32_t bit;
+
+	for (bit = 0; bit < 8; bit++) {
+		msb = (crc >> 31) & 1;
+		crc <<= 1;
+
+		if (msb ^ (byte & 1)) {
+			crc ^= LPFC_CGN_CRC32_MAGIC_NUMBER;
+			crc |= 1;
+		}
+		byte >>= 1;
+	}
+	return crc;
+}
+
+static uint32_t
+lpfc_cgn_reverse_bits(uint32_t wd)
+{
+	uint32_t result = 0;
+	uint32_t i;
+
+	for (i = 0; i < 32; i++) {
+		result <<= 1;
+		result |= (1 & (wd >> i));
+	}
+	return result;
+}
+
+/*
+ * The routine corresponds with the algorithm the HBA firmware
+ * uses to validate the data integrity.
+ */
+uint32_t
+lpfc_cgn_calc_crc32(void *ptr, uint32_t byteLen, uint32_t crc)
+{
+	uint32_t  i;
+	uint32_t result;
+	uint8_t  *data = (uint8_t *)ptr;
+
+	for (i = 0; i < byteLen; ++i)
+		crc = lpfc_cgn_crc32(crc, data[i]);
+
+	result = ~lpfc_cgn_reverse_bits(crc);
+	return result;
+}
+
+void
+lpfc_init_congestion_buf(struct lpfc_hba *phba)
+{
+	struct lpfc_cgn_info *cp;
+	struct timespec64 cmpl_time;
+	struct tm broken;
+	uint16_t size;
+	uint32_t crc;
+
+	lpfc_printf_log(phba, KERN_INFO, LOG_CGN_MGMT,
+			"6235 INIT Congestion Buffer %p\n", phba->cgn_i);
+
+	if (!phba->cgn_i)
+		return;
+	cp = (struct lpfc_cgn_info *)phba->cgn_i->virt;
+
+	atomic_set(&phba->cgn_fabric_warn_cnt, 0);
+	atomic_set(&phba->cgn_fabric_alarm_cnt, 0);
+	atomic_set(&phba->cgn_sync_alarm_cnt, 0);
+	atomic_set(&phba->cgn_sync_warn_cnt, 0);
+
+	atomic64_set(&phba->cgn_acqe_stat.alarm, 0);
+	atomic64_set(&phba->cgn_acqe_stat.warn, 0);
+	atomic_set(&phba->cgn_driver_evt_cnt, 0);
+	atomic_set(&phba->cgn_latency_evt_cnt, 0);
+	atomic64_set(&phba->cgn_latency_evt, 0);
+	phba->cgn_evt_minute = 0;
+	phba->hba_flag &= ~HBA_CGN_DAY_WRAP;
+
+	memset(cp, 0xff, LPFC_CGN_DATA_SIZE);
+	cp->cgn_info_size = cpu_to_le16(LPFC_CGN_INFO_SZ);
+	cp->cgn_info_version = LPFC_CGN_INFO_V3;
+
+	/* cgn parameters */
+	cp->cgn_info_mode = phba->cgn_p.cgn_param_mode;
+	cp->cgn_info_level0 = phba->cgn_p.cgn_param_level0;
+	cp->cgn_info_level1 = phba->cgn_p.cgn_param_level1;
+	cp->cgn_info_level2 = phba->cgn_p.cgn_param_level2;
+
+	ktime_get_real_ts64(&cmpl_time);
+	time64_to_tm(cmpl_time.tv_sec, 0, &broken);
+
+	cp->cgn_info_month = broken.tm_mon + 1;
+	cp->cgn_info_day = broken.tm_mday;
+	cp->cgn_info_year = broken.tm_year - 100; /* relative to 2000 */
+	cp->cgn_info_hour = broken.tm_hour;
+	cp->cgn_info_minute = broken.tm_min;
+	cp->cgn_info_second = broken.tm_sec;
+
+	lpfc_printf_log(phba, KERN_INFO, LOG_CGN_MGMT | LOG_INIT,
+			"2643 CGNInfo Init: Start Time "
+			"%d/%d/%d %d:%d:%d\n",
+			cp->cgn_info_day, cp->cgn_info_month,
+			cp->cgn_info_year, cp->cgn_info_hour,
+			cp->cgn_info_minute, cp->cgn_info_second);
+
+	/* Fill in default LUN qdepth */
+	if (phba->pport) {
+		size = (uint16_t)(phba->pport->cfg_lun_queue_depth);
+		cp->cgn_lunq = cpu_to_le16(size);
+	}
+
+	/* last used Index initialized to 0xff already */
+
+	cp->cgn_warn_freq = LPFC_FPIN_INIT_FREQ;
+	cp->cgn_alarm_freq = LPFC_FPIN_INIT_FREQ;
+	crc = lpfc_cgn_calc_crc32(cp, LPFC_CGN_INFO_SZ, LPFC_CGN_CRC32_SEED);
+	cp->cgn_info_crc = cpu_to_le32(crc);
+
+	phba->cgn_evt_timestamp = jiffies +
+		msecs_to_jiffies(LPFC_CGN_TIMER_TO_MIN);
+}
+
+void
+lpfc_init_congestion_stat(struct lpfc_hba *phba)
+{
+	struct lpfc_cgn_info *cp;
+	struct timespec64 cmpl_time;
+	struct tm broken;
+	uint32_t crc;
+
+	lpfc_printf_log(phba, KERN_INFO, LOG_CGN_MGMT,
+			"6236 INIT Congestion Stat %p\n", phba->cgn_i);
+
+	if (!phba->cgn_i)
+		return;
+
+	cp = (struct lpfc_cgn_info *)phba->cgn_i->virt;
+	memset(&cp->cgn_stat_npm, 0, LPFC_CGN_STAT_SIZE);
+
+	ktime_get_real_ts64(&cmpl_time);
+	time64_to_tm(cmpl_time.tv_sec, 0, &broken);
+
+	cp->cgn_stat_month = broken.tm_mon + 1;
+	cp->cgn_stat_day = broken.tm_mday;
+	cp->cgn_stat_year = broken.tm_year - 100; /* relative to 2000 */
+	cp->cgn_stat_hour = broken.tm_hour;
+	cp->cgn_stat_minute = broken.tm_min;
+
+	lpfc_printf_log(phba, KERN_INFO, LOG_CGN_MGMT | LOG_INIT,
+			"2647 CGNstat Init: Start Time "
+			"%d/%d/%d %d:%d\n",
+			cp->cgn_stat_day, cp->cgn_stat_month,
+			cp->cgn_stat_year, cp->cgn_stat_hour,
+			cp->cgn_stat_minute);
+
+	crc = lpfc_cgn_calc_crc32(cp, LPFC_CGN_INFO_SZ, LPFC_CGN_CRC32_SEED);
+	cp->cgn_info_crc = cpu_to_le32(crc);
+}
+
+/**
+ * __lpfc_reg_congestion_buf - register congestion info buffer with HBA
+ * @phba: Pointer to hba context object.
+ * @reg: flag to determine register or unregister.
+ */
+static int
+__lpfc_reg_congestion_buf(struct lpfc_hba *phba, int reg)
+{
+	struct lpfc_mbx_reg_congestion_buf *reg_congestion_buf;
+	union  lpfc_sli4_cfg_shdr *shdr;
+	uint32_t shdr_status, shdr_add_status;
+	LPFC_MBOXQ_t *mboxq;
+	int length, rc;
+
+	if (!phba->cgn_i)
+		return -ENXIO;
+
+	mboxq = mempool_alloc(phba->mbox_mem_pool, GFP_KERNEL);
+	if (!mboxq) {
+		lpfc_printf_log(phba, KERN_ERR, LOG_MBOX,
+				"2641 REG_CONGESTION_BUF mbox allocation fail: "
+				"HBA state x%x reg %d\n",
+				phba->pport->port_state, reg);
+		return -ENOMEM;
+	}
+
+	length = (sizeof(struct lpfc_mbx_reg_congestion_buf) -
+		sizeof(struct lpfc_sli4_cfg_mhdr));
+	lpfc_sli4_config(phba, mboxq, LPFC_MBOX_SUBSYSTEM_COMMON,
+			 LPFC_MBOX_OPCODE_REG_CONGESTION_BUF, length,
+			 LPFC_SLI4_MBX_EMBED);
+	reg_congestion_buf = &mboxq->u.mqe.un.reg_congestion_buf;
+	bf_set(lpfc_mbx_reg_cgn_buf_type, reg_congestion_buf, 1);
+	if (reg > 0)
+		bf_set(lpfc_mbx_reg_cgn_buf_cnt, reg_congestion_buf, 1);
+	else
+		bf_set(lpfc_mbx_reg_cgn_buf_cnt, reg_congestion_buf, 0);
+	reg_congestion_buf->length = sizeof(struct lpfc_cgn_info);
+	reg_congestion_buf->addr_lo =
+		putPaddrLow(phba->cgn_i->phys);
+	reg_congestion_buf->addr_hi =
+		putPaddrHigh(phba->cgn_i->phys);
+
+	rc = lpfc_sli_issue_mbox(phba, mboxq, MBX_POLL);
+	shdr = (union lpfc_sli4_cfg_shdr *)
+		&mboxq->u.mqe.un.sli4_config.header.cfg_shdr;
+	shdr_status = bf_get(lpfc_mbox_hdr_status, &shdr->response);
+	shdr_add_status = bf_get(lpfc_mbox_hdr_add_status,
+				 &shdr->response);
+	mempool_free(mboxq, phba->mbox_mem_pool);
+	if (shdr_status || shdr_add_status || rc) {
+		lpfc_printf_log(phba, KERN_ERR, LOG_INIT,
+				"2642 REG_CONGESTION_BUF mailbox "
+				"failed with status x%x add_status x%x,"
+				" mbx status x%x reg %d\n",
+				shdr_status, shdr_add_status, rc, reg);
+		return -ENXIO;
+	}
+	return 0;
+}
+
+int
+lpfc_unreg_congestion_buf(struct lpfc_hba *phba)
+{
+	lpfc_cmf_stop(phba);
+	return __lpfc_reg_congestion_buf(phba, 0);
+}
+
+int
+lpfc_reg_congestion_buf(struct lpfc_hba *phba)
+{
+	return __lpfc_reg_congestion_buf(phba, 1);
+}
+
 /**
  * lpfc_get_sli4_parameters - Get the SLI4 Config PARAMETERS.
  * @phba: Pointer to HBA context object.
@@ -12241,7 +13639,6 @@ lpfc_get_sli4_parameters(struct lpfc_hba *phba, LPFC_MBOXQ_t *mboxq)
 					bf_get(cfg_xib, mbx_sli4_parameters),
 					phba->cfg_enable_fc4_type);
 fcponly:
-			phba->nvme_support = 0;
 			phba->nvmet_support = 0;
 			phba->cfg_nvmet_mrq = 0;
 			phba->cfg_nvme_seg_cnt = 0;
@@ -12259,9 +13656,10 @@ fcponly:
 	if (phba->cfg_enable_fc4_type & LPFC_ENABLE_NVME)
 		phba->cfg_sg_seg_cnt = LPFC_MAX_NVME_SEG_CNT;
 
-	/* Only embed PBDE for if_type 6, PBDE support requires xib be set */
-	if ((bf_get(lpfc_sli_intf_if_type, &phba->sli4_hba.sli_intf) !=
-	    LPFC_SLI_INTF_IF_TYPE_6) || (!bf_get(cfg_xib, mbx_sli4_parameters)))
+	/* Enable embedded Payload BDE if support is indicated */
+	if (bf_get(cfg_pbde, mbx_sli4_parameters))
+		phba->cfg_enable_pbde = 1;
+	else
 		phba->cfg_enable_pbde = 0;
 
 	/*
@@ -12299,7 +13697,7 @@ fcponly:
 			"6422 XIB %d PBDE %d: FCP %d NVME %d %d %d\n",
 			bf_get(cfg_xib, mbx_sli4_parameters),
 			phba->cfg_enable_pbde,
-			phba->fcp_embed_io, phba->nvme_support,
+			phba->fcp_embed_io, sli4_params->nvme,
 			phba->cfg_nvme_embed_cmd, phba->cfg_suppress_rsp);
 
 	if ((bf_get(lpfc_sli_intf_if_type, &phba->sli4_hba.sli_intf) ==
@@ -12331,21 +13729,6 @@ fcponly:
 	else
 		phba->nsler = 0;
 
-	/* Save PB info for use during HBA setup */
-	sli4_params->mi_ver = bf_get(cfg_mi_ver, mbx_sli4_parameters);
-	sli4_params->mib_bde_cnt = bf_get(cfg_mib_bde_cnt, mbx_sli4_parameters);
-	sli4_params->mib_size = mbx_sli4_parameters->mib_size;
-	sli4_params->mi_value = LPFC_DFLT_MIB_VAL;
-
-	/* Next we check for Vendor MIB support */
-	if (sli4_params->mi_ver && phba->cfg_enable_mi)
-		phba->cfg_fdmi_on = LPFC_FDMI_SUPPORT;
-
-	lpfc_printf_log(phba, KERN_INFO, LOG_INIT,
-			"6461 MIB attr %d  enable %d  FDMI %d buf %d:%d\n",
-			sli4_params->mi_ver, phba->cfg_enable_mi,
-			sli4_params->mi_value, sli4_params->mib_bde_cnt,
-			sli4_params->mib_size);
 	return 0;
 }
 
@@ -12978,7 +14361,9 @@ lpfc_log_write_firmware_error(struct lpfc_hba *phba, uint32_t offset,
 	const struct firmware *fw)
 {
 	int rc;
+	u8 sli_family;
 
+	sli_family = bf_get(lpfc_sli_intf_sli_family, &phba->sli4_hba.sli_intf);
 	/* Three cases:  (1) FW was not supported on the detected adapter.
 	 * (2) FW update has been locked out administratively.
 	 * (3) Some other error during FW update.
@@ -12986,10 +14371,12 @@ lpfc_log_write_firmware_error(struct lpfc_hba *phba, uint32_t offset,
 	 * for admin diagnosis.
 	 */
 	if (offset == ADD_STATUS_FW_NOT_SUPPORTED ||
-	    (phba->pcidev->device == PCI_DEVICE_ID_LANCER_G6_FC &&
+	    (sli_family == LPFC_SLI_INTF_FAMILY_G6 &&
 	     magic_number != MAGIC_NUMBER_G6) ||
-	    (phba->pcidev->device == PCI_DEVICE_ID_LANCER_G7_FC &&
-	     magic_number != MAGIC_NUMBER_G7)) {
+	    (sli_family == LPFC_SLI_INTF_FAMILY_G7 &&
+	     magic_number != MAGIC_NUMBER_G7) ||
+	    (sli_family == LPFC_SLI_INTF_FAMILY_G7P &&
+	     magic_number != MAGIC_NUMBER_G7P)) {
 		lpfc_printf_log(phba, KERN_ERR, LOG_TRACE_EVENT,
 				"3030 This firmware version is not supported on"
 				" this HBA model. Device:%x Magic:%x Type:%x "
@@ -13377,6 +14764,8 @@ lpfc_pci_remove_one_s4(struct pci_dev *pdev)
 	spin_lock_irq(&phba->hbalock);
 	vport->load_flag |= FC_UNLOADING;
 	spin_unlock_irq(&phba->hbalock);
+	if (phba->cgn_i)
+		lpfc_unreg_congestion_buf(phba);
 
 	lpfc_free_sysfs_attr(vport);
 
@@ -14041,17 +15430,18 @@ lpfc_sli4_oas_verify(struct lpfc_hba *phba)
 void
 lpfc_sli4_ras_init(struct lpfc_hba *phba)
 {
-	switch (phba->pcidev->device) {
-	case PCI_DEVICE_ID_LANCER_G6_FC:
-	case PCI_DEVICE_ID_LANCER_G7_FC:
+	/* if ASIC_GEN_NUM >= 0xC) */
+	if ((bf_get(lpfc_sli_intf_if_type, &phba->sli4_hba.sli_intf) ==
+		    LPFC_SLI_INTF_IF_TYPE_6) ||
+	    (bf_get(lpfc_sli_intf_sli_family, &phba->sli4_hba.sli_intf) ==
+		    LPFC_SLI_INTF_FAMILY_G6)) {
 		phba->ras_fwlog.ras_hwsupport = true;
 		if (phba->cfg_ras_fwlog_func == PCI_FUNC(phba->pcidev->devfn) &&
 		    phba->cfg_ras_fwlog_buffsize)
 			phba->ras_fwlog.ras_enabled = true;
 		else
 			phba->ras_fwlog.ras_enabled = false;
-		break;
-	default:
+	} else {
 		phba->ras_fwlog.ras_hwsupport = false;
 	}
 }
@@ -14164,8 +15554,9 @@ void lpfc_dmp_dbg(struct lpfc_hba *phba)
 	unsigned int temp_idx;
 	int i;
 	int j = 0;
-	unsigned long rem_nsec;
-	struct lpfc_vport **vports;
+	unsigned long rem_nsec, iflags;
+	bool log_verbose = false;
+	struct lpfc_vport *port_iterator;
 
 	/* Don't dump messages if we explicitly set log_verbose for the
 	 * physical port or any vport.
@@ -14173,16 +15564,24 @@ void lpfc_dmp_dbg(struct lpfc_hba *phba)
 	if (phba->cfg_log_verbose)
 		return;
 
-	vports = lpfc_create_vport_work_array(phba);
-	if (vports != NULL) {
-		for (i = 0; i <= phba->max_vpi && vports[i] != NULL; i++) {
-			if (vports[i]->cfg_log_verbose) {
-				lpfc_destroy_vport_work_array(phba, vports);
+	spin_lock_irqsave(&phba->port_list_lock, iflags);
+	list_for_each_entry(port_iterator, &phba->port_list, listentry) {
+		if (port_iterator->load_flag & FC_UNLOADING)
+			continue;
+		if (scsi_host_get(lpfc_shost_from_vport(port_iterator))) {
+			if (port_iterator->cfg_log_verbose)
+				log_verbose = true;
+
+			scsi_host_put(lpfc_shost_from_vport(port_iterator));
+
+			if (log_verbose) {
+				spin_unlock_irqrestore(&phba->port_list_lock,
+						       iflags);
 				return;
 			}
 		}
 	}
-	lpfc_destroy_vport_work_array(phba, vports);
+	spin_unlock_irqrestore(&phba->port_list_lock, iflags);
 
 	if (atomic_cmpxchg(&phba->dbg_log_dmping, 0, 1) != 0)
 		return;
diff --git a/drivers/scsi/lpfc/lpfc_logmsg.h b/drivers/scsi/lpfc/lpfc_logmsg.h
index 5660a8729462..7d480c798794 100644
--- a/drivers/scsi/lpfc/lpfc_logmsg.h
+++ b/drivers/scsi/lpfc/lpfc_logmsg.h
@@ -1,7 +1,7 @@
 /*******************************************************************
  * This file is part of the Emulex Linux Device Driver for         *
  * Fibre Channel Host Bus Adapters.                                *
- * Copyright (C) 2017-2018 Broadcom. All Rights Reserved. The term *
+ * Copyright (C) 2017-2021 Broadcom. All Rights Reserved. The term *
  * “Broadcom” refers to Broadcom Inc. and/or its subsidiaries.     *
  * Copyright (C) 2004-2009 Emulex.  All rights reserved.           *
  * EMULEX and SLI are trademarks of Emulex.                        *
@@ -44,6 +44,9 @@
 #define LOG_NVME_DISC   0x00200000      /* NVME Discovery/Connect events. */
 #define LOG_NVME_ABTS   0x00400000      /* NVME ABTS events. */
 #define LOG_NVME_IOERR  0x00800000      /* NVME IO Error events. */
+#define LOG_RSVD1	0x01000000	/* Reserved */
+#define LOG_RSVD2	0x02000000	/* Reserved */
+#define LOG_CGN_MGMT    0x04000000	/* Congestion Mgmt events */
 #define LOG_TRACE_EVENT 0x80000000	/* Dmp the DBG log on this err */
 #define LOG_ALL_MSG	0x7fffffff	/* LOG all messages */
 
diff --git a/drivers/scsi/lpfc/lpfc_mbox.c b/drivers/scsi/lpfc/lpfc_mbox.c
index 84bc373190d8..6c754ee96bee 100644
--- a/drivers/scsi/lpfc/lpfc_mbox.c
+++ b/drivers/scsi/lpfc/lpfc_mbox.c
@@ -513,8 +513,9 @@ lpfc_init_link(struct lpfc_hba * phba,
 		break;
 	}
 
-	if ((phba->pcidev->device == PCI_DEVICE_ID_LANCER_G6_FC ||
-	     phba->pcidev->device == PCI_DEVICE_ID_LANCER_G7_FC) &&
+	/* Topology handling for ASIC_GEN_NUM 0xC and later */
+	if ((phba->sli4_hba.pc_sli4_params.sli_family == LPFC_SLI_INTF_FAMILY_G6 ||
+	     phba->sli4_hba.pc_sli4_params.if_type == LPFC_SLI_INTF_IF_TYPE_6) &&
 	    !(phba->sli4_hba.pc_sli4_params.pls) &&
 	    mb->un.varInitLnk.link_flags & FLAGS_TOPOLOGY_MODE_LOOP) {
 		mb->un.varInitLnk.link_flags = FLAGS_TOPOLOGY_MODE_PT_PT;
diff --git a/drivers/scsi/lpfc/lpfc_mem.c b/drivers/scsi/lpfc/lpfc_mem.c
index be54fbf5146f..870e53b8f81d 100644
--- a/drivers/scsi/lpfc/lpfc_mem.c
+++ b/drivers/scsi/lpfc/lpfc_mem.c
@@ -1,7 +1,7 @@
 /*******************************************************************
  * This file is part of the Emulex Linux Device Driver for         *
  * Fibre Channel Host Bus Adapters.                                *
- * Copyright (C) 2017-2018 Broadcom. All Rights Reserved. The term *
+ * Copyright (C) 2017-2021 Broadcom. All Rights Reserved. The term *
  * “Broadcom” refers to Broadcom Inc. and/or its subsidiaries.     *
  * Copyright (C) 2004-2014 Emulex.  All rights reserved.           *
  * EMULEX and SLI are trademarks of Emulex.                        *
@@ -335,6 +335,19 @@ lpfc_mem_free_all(struct lpfc_hba *phba)
 	dma_pool_destroy(phba->lpfc_cmd_rsp_buf_pool);
 	phba->lpfc_cmd_rsp_buf_pool = NULL;
 
+	/* Free Congestion Data buffer */
+	if (phba->cgn_i) {
+		dma_free_coherent(&phba->pcidev->dev,
+				  sizeof(struct lpfc_cgn_info),
+				  phba->cgn_i->virt, phba->cgn_i->phys);
+		kfree(phba->cgn_i);
+		phba->cgn_i = NULL;
+	}
+
+	/* Free RX table */
+	kfree(phba->rxtable);
+	phba->rxtable = NULL;
+
 	/* Free the iocb lookup array */
 	kfree(psli->iocbq_lookup);
 	psli->iocbq_lookup = NULL;
diff --git a/drivers/scsi/lpfc/lpfc_nportdisc.c b/drivers/scsi/lpfc/lpfc_nportdisc.c
index e12f83fb795c..27263f02ab9f 100644
--- a/drivers/scsi/lpfc/lpfc_nportdisc.c
+++ b/drivers/scsi/lpfc/lpfc_nportdisc.c
@@ -736,9 +736,13 @@ out:
 		 * is already in MAPPED or UNMAPPED state.  Catch this
 		 * condition and don't set the nlp_state again because
 		 * it causes an unnecessary transport unregister/register.
+		 *
+		 * Nodes marked for ADISC will move MAPPED or UNMAPPED state
+		 * after issuing ADISC
 		 */
 		if (ndlp->nlp_type & (NLP_FCP_TARGET | NLP_NVME_TARGET)) {
-			if (ndlp->nlp_state != NLP_STE_MAPPED_NODE)
+			if ((ndlp->nlp_state != NLP_STE_MAPPED_NODE) &&
+			    !(ndlp->nlp_flag & NLP_NPR_ADISC))
 				lpfc_nlp_set_state(vport, ndlp,
 						   NLP_STE_MAPPED_NODE);
 		}
@@ -863,6 +867,9 @@ lpfc_rcv_logo(struct lpfc_vport *vport, struct lpfc_nodelist *ndlp,
 		ndlp->nlp_last_elscmd = ELS_CMD_PLOGI;
 	}
 out:
+	/* Unregister from backend, could have been skipped due to ADISC */
+	lpfc_nlp_unreg_node(vport, ndlp);
+
 	ndlp->nlp_prev_state = ndlp->nlp_state;
 	lpfc_nlp_set_state(vport, ndlp, NLP_STE_NPR_NODE);
 
@@ -1677,9 +1684,6 @@ lpfc_cmpl_adisc_adisc_issue(struct lpfc_vport *vport,
 		spin_unlock_irq(&ndlp->lock);
 		ndlp->nlp_last_elscmd = ELS_CMD_PLOGI;
 
-		memset(&ndlp->nlp_nodename, 0, sizeof(struct lpfc_name));
-		memset(&ndlp->nlp_portname, 0, sizeof(struct lpfc_name));
-
 		ndlp->nlp_prev_state = NLP_STE_ADISC_ISSUE;
 		lpfc_nlp_set_state(vport, ndlp, NLP_STE_NPR_NODE);
 		lpfc_unreg_rpi(vport, ndlp);
@@ -2597,13 +2601,14 @@ lpfc_device_recov_mapped_node(struct lpfc_vport *vport,
 			      void *arg,
 			      uint32_t evt)
 {
+	lpfc_disc_set_adisc(vport, ndlp);
+
 	ndlp->nlp_prev_state = NLP_STE_MAPPED_NODE;
 	lpfc_nlp_set_state(vport, ndlp, NLP_STE_NPR_NODE);
 	spin_lock_irq(&ndlp->lock);
 	ndlp->nlp_flag &= ~(NLP_NODEV_REMOVE | NLP_NPR_2B_DISC);
 	ndlp->nlp_fc4_type &= ~(NLP_FC4_FCP | NLP_FC4_NVME);
 	spin_unlock_irq(&ndlp->lock);
-	lpfc_disc_set_adisc(vport, ndlp);
 	return ndlp->nlp_state;
 }
 
@@ -2645,14 +2650,13 @@ lpfc_rcv_prli_npr_node(struct lpfc_vport *vport, struct lpfc_nodelist *ndlp,
 	lpfc_els_rsp_reject(vport, stat.un.lsRjtError, cmdiocb, ndlp, NULL);
 
 	if (!(ndlp->nlp_flag & NLP_DELAY_TMO)) {
-		if (ndlp->nlp_flag & NLP_NPR_ADISC) {
-			spin_lock_irq(&ndlp->lock);
-			ndlp->nlp_flag &= ~NLP_NPR_ADISC;
-			ndlp->nlp_prev_state = NLP_STE_NPR_NODE;
-			spin_unlock_irq(&ndlp->lock);
-			lpfc_nlp_set_state(vport, ndlp, NLP_STE_ADISC_ISSUE);
-			lpfc_issue_els_adisc(vport, ndlp, 0);
-		} else {
+		/*
+		 * ADISC nodes will be handled in regular discovery path after
+		 * receiving response from NS.
+		 *
+		 * For other nodes, Send PLOGI to trigger an implicit LOGO.
+		 */
+		if (!(ndlp->nlp_flag & NLP_NPR_ADISC)) {
 			ndlp->nlp_prev_state = NLP_STE_NPR_NODE;
 			lpfc_nlp_set_state(vport, ndlp, NLP_STE_PLOGI_ISSUE);
 			lpfc_issue_els_plogi(vport, ndlp->nlp_DID, 0);
@@ -2685,12 +2689,13 @@ lpfc_rcv_padisc_npr_node(struct lpfc_vport *vport, struct lpfc_nodelist *ndlp,
 	 */
 	if (!(ndlp->nlp_flag & NLP_DELAY_TMO) &&
 	    !(ndlp->nlp_flag & NLP_NPR_2B_DISC)) {
-		if (ndlp->nlp_flag & NLP_NPR_ADISC) {
-			ndlp->nlp_flag &= ~NLP_NPR_ADISC;
-			ndlp->nlp_prev_state = NLP_STE_NPR_NODE;
-			lpfc_nlp_set_state(vport, ndlp, NLP_STE_ADISC_ISSUE);
-			lpfc_issue_els_adisc(vport, ndlp, 0);
-		} else {
+		/*
+		 * ADISC nodes will be handled in regular discovery path after
+		 * receiving response from NS.
+		 *
+		 * For other nodes, Send PLOGI to trigger an implicit LOGO.
+		 */
+		if (!(ndlp->nlp_flag & NLP_NPR_ADISC)) {
 			ndlp->nlp_prev_state = NLP_STE_NPR_NODE;
 			lpfc_nlp_set_state(vport, ndlp, NLP_STE_PLOGI_ISSUE);
 			lpfc_issue_els_plogi(vport, ndlp->nlp_DID, 0);
diff --git a/drivers/scsi/lpfc/lpfc_nvme.c b/drivers/scsi/lpfc/lpfc_nvme.c
index bcc804cefd30..73a3568ff17e 100644
--- a/drivers/scsi/lpfc/lpfc_nvme.c
+++ b/drivers/scsi/lpfc/lpfc_nvme.c
@@ -216,8 +216,8 @@ lpfc_nvme_remoteport_delete(struct nvme_fc_remote_port *remoteport)
 	/* The register rebind might have occurred before the delete
 	 * downcall.  Guard against this race.
 	 */
-	if (ndlp->fc4_xpt_flags & NLP_WAIT_FOR_UNREG)
-		ndlp->fc4_xpt_flags &= ~(NLP_WAIT_FOR_UNREG | NVME_XPT_REGD);
+	if (ndlp->fc4_xpt_flags & NVME_XPT_UNREG_WAIT)
+		ndlp->fc4_xpt_flags &= ~(NVME_XPT_UNREG_WAIT | NVME_XPT_REGD);
 
 	spin_unlock_irq(&ndlp->lock);
 
@@ -931,6 +931,8 @@ lpfc_nvme_io_cmd_wqe_cmpl(struct lpfc_hba *phba, struct lpfc_iocbq *pwqeIn,
 	uint32_t code, status, idx;
 	uint16_t cid, sqhd, data;
 	uint32_t *ptr;
+	uint32_t lat;
+	bool call_done = false;
 #ifdef CONFIG_SCSI_LPFC_DEBUG_FS
 	int cpu;
 #endif
@@ -1135,10 +1137,21 @@ out_err:
 		freqpriv = nCmd->private;
 		freqpriv->nvme_buf = NULL;
 		lpfc_ncmd->nvmeCmd = NULL;
-		spin_unlock(&lpfc_ncmd->buf_lock);
+		call_done = true;
+	}
+	spin_unlock(&lpfc_ncmd->buf_lock);
+
+	/* Check if IO qualified for CMF */
+	if (phba->cmf_active_mode != LPFC_CFG_OFF &&
+	    nCmd->io_dir == NVMEFC_FCP_READ &&
+	    nCmd->payload_length) {
+		/* Used when calculating average latency */
+		lat = ktime_get_ns() - lpfc_ncmd->rx_cmd_start;
+		lpfc_update_cmf_cmpl(phba, lat, nCmd->payload_length, NULL);
+	}
+
+	if (call_done)
 		nCmd->done(nCmd);
-	} else
-		spin_unlock(&lpfc_ncmd->buf_lock);
 
 	/* Call release with XB=1 to queue the IO into the abort list. */
 	lpfc_release_nvme_buf(phba, lpfc_ncmd);
@@ -1212,6 +1225,10 @@ lpfc_nvme_prep_io_cmd(struct lpfc_vport *vport,
 			/* Word 5 */
 			wqe->fcp_iread.rsrvd5 = 0;
 
+			/* For a CMF Managed port, iod must be zero'ed */
+			if (phba->cmf_active_mode == LPFC_CFG_MANAGED)
+				bf_set(wqe_iod, &wqe->fcp_iread.wqe_com,
+				       LPFC_WQE_IOD_NONE);
 			cstat->input_requests++;
 		}
 	} else {
@@ -1562,6 +1579,19 @@ lpfc_nvme_fcp_io_submit(struct nvme_fc_local_port *pnvme_lport,
 			expedite = 1;
 	}
 
+	/* Check if IO qualifies for CMF */
+	if (phba->cmf_active_mode != LPFC_CFG_OFF &&
+	    pnvme_fcreq->io_dir == NVMEFC_FCP_READ &&
+	    pnvme_fcreq->payload_length) {
+		ret = lpfc_update_cmf_cmd(phba, pnvme_fcreq->payload_length);
+		if (ret) {
+			ret = -EBUSY;
+			goto out_fail;
+		}
+		/* Get start time for IO latency */
+		start = ktime_get_ns();
+	}
+
 	/* The node is shared with FCP IO, make sure the IO pending count does
 	 * not exceed the programmed depth.
 	 */
@@ -1576,7 +1606,7 @@ lpfc_nvme_fcp_io_submit(struct nvme_fc_local_port *pnvme_lport,
 					 ndlp->cmd_qdepth);
 			atomic_inc(&lport->xmt_fcp_qdepth);
 			ret = -EBUSY;
-			goto out_fail;
+			goto out_fail1;
 		}
 	}
 
@@ -1596,7 +1626,7 @@ lpfc_nvme_fcp_io_submit(struct nvme_fc_local_port *pnvme_lport,
 				 "idx %d DID %x\n",
 				 lpfc_queue_info->index, ndlp->nlp_DID);
 		ret = -EBUSY;
-		goto out_fail;
+		goto out_fail1;
 	}
 #ifdef CONFIG_SCSI_LPFC_DEBUG_FS
 	if (start) {
@@ -1606,6 +1636,7 @@ lpfc_nvme_fcp_io_submit(struct nvme_fc_local_port *pnvme_lport,
 		lpfc_ncmd->ts_cmd_start = 0;
 	}
 #endif
+	lpfc_ncmd->rx_cmd_start = start;
 
 	/*
 	 * Store the data needed by the driver to issue, abort, and complete
@@ -1687,6 +1718,9 @@ lpfc_nvme_fcp_io_submit(struct nvme_fc_local_port *pnvme_lport,
 	} else
 		cstat->control_requests--;
 	lpfc_release_nvme_buf(phba, lpfc_ncmd);
+ out_fail1:
+	lpfc_update_cmf_cmpl(phba, LPFC_CGN_NOT_SENT,
+			     pnvme_fcreq->payload_length, NULL);
  out_fail:
 	return ret;
 }
@@ -2324,7 +2358,7 @@ lpfc_nvme_register_port(struct lpfc_vport *vport, struct lpfc_nodelist *ndlp)
 		 * race that leaves the WAIT flag set.
 		 */
 		spin_lock_irq(&ndlp->lock);
-		ndlp->fc4_xpt_flags &= ~NLP_WAIT_FOR_UNREG;
+		ndlp->fc4_xpt_flags &= ~NVME_XPT_UNREG_WAIT;
 		ndlp->fc4_xpt_flags |= NVME_XPT_REGD;
 		spin_unlock_irq(&ndlp->lock);
 		rport = remote_port->private;
@@ -2336,7 +2370,7 @@ lpfc_nvme_register_port(struct lpfc_vport *vport, struct lpfc_nodelist *ndlp)
 			 */
 			spin_lock_irq(&ndlp->lock);
 			ndlp->nrport = NULL;
-			ndlp->fc4_xpt_flags &= ~NLP_WAIT_FOR_UNREG;
+			ndlp->fc4_xpt_flags &= ~NVME_XPT_UNREG_WAIT;
 			spin_unlock_irq(&ndlp->lock);
 			rport->ndlp = NULL;
 			rport->remoteport = NULL;
@@ -2488,7 +2522,7 @@ lpfc_nvme_unregister_port(struct lpfc_vport *vport, struct lpfc_nodelist *ndlp)
 		 * The transport will update it.
 		 */
 		spin_lock_irq(&vport->phba->hbalock);
-		ndlp->fc4_xpt_flags |= NLP_WAIT_FOR_UNREG;
+		ndlp->fc4_xpt_flags |= NVME_XPT_UNREG_WAIT;
 		spin_unlock_irq(&vport->phba->hbalock);
 
 		/* Don't let the host nvme transport keep sending keep-alives
diff --git a/drivers/scsi/lpfc/lpfc_nvme.h b/drivers/scsi/lpfc/lpfc_nvme.h
index 69a5a844c69c..cc54ffb5c205 100644
--- a/drivers/scsi/lpfc/lpfc_nvme.h
+++ b/drivers/scsi/lpfc/lpfc_nvme.h
@@ -1,7 +1,7 @@
 /*******************************************************************
  * This file is part of the Emulex Linux Device Driver for         *
  * Fibre Channel Host Bus Adapters.                                *
- * Copyright (C) 2017-2020 Broadcom. All Rights Reserved. The term *
+ * Copyright (C) 2017-2021 Broadcom. All Rights Reserved. The term *
  * “Broadcom” refers to Broadcom Inc. and/or its subsidiaries.  *
  * Copyright (C) 2004-2016 Emulex.  All rights reserved.           *
  * EMULEX and SLI are trademarks of Emulex.                        *
@@ -34,11 +34,8 @@
 #define LPFC_NVME_FB_SHIFT		9
 #define LPFC_NVME_MAX_FB		(1 << 20)	/* 1M */
 
-#define LPFC_MAX_NVME_INFO_TMP_LEN	100
-#define LPFC_NVME_INFO_MORE_STR		"\nCould be more info...\n"
-
-#define lpfc_ndlp_get_nrport(ndlp)					\
-	((!ndlp->nrport || (ndlp->fc4_xpt_flags & NLP_WAIT_FOR_UNREG))	\
+#define lpfc_ndlp_get_nrport(ndlp)				\
+	((!ndlp->nrport || (ndlp->fc4_xpt_flags & NVME_XPT_UNREG_WAIT))\
 	? NULL : ndlp->nrport)
 
 struct lpfc_nvme_qhandle {
diff --git a/drivers/scsi/lpfc/lpfc_nvmet.c b/drivers/scsi/lpfc/lpfc_nvmet.c
index f2d9a3580887..6e3dd0b9bcfa 100644
--- a/drivers/scsi/lpfc/lpfc_nvmet.c
+++ b/drivers/scsi/lpfc/lpfc_nvmet.c
@@ -1797,19 +1797,22 @@ lpfc_sli4_nvmet_xri_aborted(struct lpfc_hba *phba,
 		if (ctxp->ctxbuf->sglq->sli4_xritag != xri)
 			continue;
 
-		spin_lock(&ctxp->ctxlock);
+		spin_unlock_irqrestore(&phba->sli4_hba.abts_nvmet_buf_list_lock,
+				       iflag);
+
+		spin_lock_irqsave(&ctxp->ctxlock, iflag);
 		/* Check if we already received a free context call
 		 * and we have completed processing an abort situation.
 		 */
 		if (ctxp->flag & LPFC_NVME_CTX_RLS &&
 		    !(ctxp->flag & LPFC_NVME_ABORT_OP)) {
+			spin_lock(&phba->sli4_hba.abts_nvmet_buf_list_lock);
 			list_del_init(&ctxp->list);
+			spin_unlock(&phba->sli4_hba.abts_nvmet_buf_list_lock);
 			released = true;
 		}
 		ctxp->flag &= ~LPFC_NVME_XBUSY;
-		spin_unlock(&ctxp->ctxlock);
-		spin_unlock_irqrestore(&phba->sli4_hba.abts_nvmet_buf_list_lock,
-				       iflag);
+		spin_unlock_irqrestore(&ctxp->ctxlock, iflag);
 
 		rrq_empty = list_empty(&phba->active_rrq_list);
 		ndlp = lpfc_findnode_did(phba->pport, ctxp->sid);
diff --git a/drivers/scsi/lpfc/lpfc_scsi.c b/drivers/scsi/lpfc/lpfc_scsi.c
index 1b248c237be1..0fde1e874c7a 100644
--- a/drivers/scsi/lpfc/lpfc_scsi.c
+++ b/drivers/scsi/lpfc/lpfc_scsi.c
@@ -96,30 +96,6 @@ static void lpfc_vmid_update_entry(struct lpfc_vport *vport, struct scsi_cmnd
 static void lpfc_vmid_assign_cs_ctl(struct lpfc_vport *vport,
 				    struct lpfc_vmid *vmid);
 
-static inline unsigned
-lpfc_cmd_blksize(struct scsi_cmnd *sc)
-{
-	return sc->device->sector_size;
-}
-
-#define LPFC_CHECK_PROTECT_GUARD	1
-#define LPFC_CHECK_PROTECT_REF		2
-static inline unsigned
-lpfc_cmd_protect(struct scsi_cmnd *sc, int flag)
-{
-	return 1;
-}
-
-static inline unsigned
-lpfc_cmd_guard_csum(struct scsi_cmnd *sc)
-{
-	if (lpfc_prot_group_type(NULL, sc) == LPFC_PG_TYPE_NO_DIF)
-		return 0;
-	if (scsi_host_get_guard(sc->device->host) == SHOST_DIX_GUARD_IP)
-		return 1;
-	return 0;
-}
-
 /**
  * lpfc_sli4_set_rsp_sgl_last - Set the last bit in the response sge.
  * @phba: Pointer to HBA object.
@@ -683,7 +659,7 @@ lpfc_get_scsi_buf_s4(struct lpfc_hba *phba, struct lpfc_nodelist *ndlp,
 
 	cpu = raw_smp_processor_id();
 	if (cmnd && phba->cfg_fcp_io_sched == LPFC_FCP_SCHED_BY_HDWQ) {
-		tag = blk_mq_unique_tag(cmnd->request);
+		tag = blk_mq_unique_tag(scsi_cmd_to_rq(cmnd));
 		idx = blk_mq_unique_tag_to_hwq(tag);
 	} else {
 		idx = phba->sli4_hba.cpu_map[cpu].hdwq;
@@ -1046,13 +1022,13 @@ lpfc_bg_err_inject(struct lpfc_hba *phba, struct scsi_cmnd *sc,
 		return 0;
 
 	sgpe = scsi_prot_sglist(sc);
-	lba = t10_pi_ref_tag(sc->request);
+	lba = scsi_prot_ref_tag(sc);
 	if (lba == LPFC_INVALID_REFTAG)
 		return 0;
 
 	/* First check if we need to match the LBA */
 	if (phba->lpfc_injerr_lba != LPFC_INJERR_LBA_OFF) {
-		blksize = lpfc_cmd_blksize(sc);
+		blksize = scsi_prot_interval(sc);
 		numblks = (scsi_bufflen(sc) + blksize - 1) / blksize;
 
 		/* Make sure we have the right LBA if one is specified */
@@ -1441,7 +1417,7 @@ lpfc_sc_to_bg_opcodes(struct lpfc_hba *phba, struct scsi_cmnd *sc,
 {
 	uint8_t ret = 0;
 
-	if (lpfc_cmd_guard_csum(sc)) {
+	if (sc->prot_flags & SCSI_PROT_IP_CHECKSUM) {
 		switch (scsi_get_prot_op(sc)) {
 		case SCSI_PROT_READ_INSERT:
 		case SCSI_PROT_WRITE_STRIP:
@@ -1521,7 +1497,7 @@ lpfc_bg_err_opcodes(struct lpfc_hba *phba, struct scsi_cmnd *sc,
 {
 	uint8_t ret = 0;
 
-	if (lpfc_cmd_guard_csum(sc)) {
+	if (sc->prot_flags & SCSI_PROT_IP_CHECKSUM) {
 		switch (scsi_get_prot_op(sc)) {
 		case SCSI_PROT_READ_INSERT:
 		case SCSI_PROT_WRITE_STRIP:
@@ -1629,7 +1605,7 @@ lpfc_bg_setup_bpl(struct lpfc_hba *phba, struct scsi_cmnd *sc,
 		goto out;
 
 	/* extract some info from the scsi command for pde*/
-	reftag = t10_pi_ref_tag(sc->request);
+	reftag = scsi_prot_ref_tag(sc);
 	if (reftag == LPFC_INVALID_REFTAG)
 		goto out;
 
@@ -1668,12 +1644,12 @@ lpfc_bg_setup_bpl(struct lpfc_hba *phba, struct scsi_cmnd *sc,
 	 * protection data is automatically generated, not checked.
 	 */
 	if (datadir == DMA_FROM_DEVICE) {
-		if (lpfc_cmd_protect(sc, LPFC_CHECK_PROTECT_GUARD))
+		if (sc->prot_flags & SCSI_PROT_GUARD_CHECK)
 			bf_set(pde6_ce, pde6, checking);
 		else
 			bf_set(pde6_ce, pde6, 0);
 
-		if (lpfc_cmd_protect(sc, LPFC_CHECK_PROTECT_REF))
+		if (sc->prot_flags & SCSI_PROT_REF_CHECK)
 			bf_set(pde6_re, pde6, checking);
 		else
 			bf_set(pde6_re, pde6, 0);
@@ -1791,8 +1767,8 @@ lpfc_bg_setup_bpl_prot(struct lpfc_hba *phba, struct scsi_cmnd *sc,
 		goto out;
 
 	/* extract some info from the scsi command */
-	blksize = lpfc_cmd_blksize(sc);
-	reftag = t10_pi_ref_tag(sc->request);
+	blksize = scsi_prot_interval(sc);
+	reftag = scsi_prot_ref_tag(sc);
 	if (reftag == LPFC_INVALID_REFTAG)
 		goto out;
 
@@ -1832,12 +1808,12 @@ lpfc_bg_setup_bpl_prot(struct lpfc_hba *phba, struct scsi_cmnd *sc,
 		bf_set(pde6_optx, pde6, txop);
 		bf_set(pde6_oprx, pde6, rxop);
 
-		if (lpfc_cmd_protect(sc, LPFC_CHECK_PROTECT_GUARD))
+		if (sc->prot_flags & SCSI_PROT_GUARD_CHECK)
 			bf_set(pde6_ce, pde6, checking);
 		else
 			bf_set(pde6_ce, pde6, 0);
 
-		if (lpfc_cmd_protect(sc, LPFC_CHECK_PROTECT_REF))
+		if (sc->prot_flags & SCSI_PROT_REF_CHECK)
 			bf_set(pde6_re, pde6, checking);
 		else
 			bf_set(pde6_re, pde6, 0);
@@ -2023,7 +1999,7 @@ lpfc_bg_setup_sgl(struct lpfc_hba *phba, struct scsi_cmnd *sc,
 		goto out;
 
 	/* extract some info from the scsi command for pde*/
-	reftag = t10_pi_ref_tag(sc->request);
+	reftag = scsi_prot_ref_tag(sc);
 	if (reftag == LPFC_INVALID_REFTAG)
 		goto out;
 
@@ -2051,12 +2027,12 @@ lpfc_bg_setup_sgl(struct lpfc_hba *phba, struct scsi_cmnd *sc,
 	 * protection data is automatically generated, not checked.
 	 */
 	if (sc->sc_data_direction == DMA_FROM_DEVICE) {
-		if (lpfc_cmd_protect(sc, LPFC_CHECK_PROTECT_GUARD))
+		if (sc->prot_flags & SCSI_PROT_GUARD_CHECK)
 			bf_set(lpfc_sli4_sge_dif_ce, diseed, checking);
 		else
 			bf_set(lpfc_sli4_sge_dif_ce, diseed, 0);
 
-		if (lpfc_cmd_protect(sc, LPFC_CHECK_PROTECT_REF))
+		if (sc->prot_flags & SCSI_PROT_REF_CHECK)
 			bf_set(lpfc_sli4_sge_dif_re, diseed, checking);
 		else
 			bf_set(lpfc_sli4_sge_dif_re, diseed, 0);
@@ -2223,8 +2199,8 @@ lpfc_bg_setup_sgl_prot(struct lpfc_hba *phba, struct scsi_cmnd *sc,
 		goto out;
 
 	/* extract some info from the scsi command */
-	blksize = lpfc_cmd_blksize(sc);
-	reftag = t10_pi_ref_tag(sc->request);
+	blksize = scsi_prot_interval(sc);
+	reftag = scsi_prot_ref_tag(sc);
 	if (reftag == LPFC_INVALID_REFTAG)
 		goto out;
 
@@ -2281,9 +2257,8 @@ lpfc_bg_setup_sgl_prot(struct lpfc_hba *phba, struct scsi_cmnd *sc,
 		diseed->ref_tag = cpu_to_le32(reftag);
 		diseed->ref_tag_tran = diseed->ref_tag;
 
-		if (lpfc_cmd_protect(sc, LPFC_CHECK_PROTECT_GUARD)) {
+		if (sc->prot_flags & SCSI_PROT_GUARD_CHECK) {
 			bf_set(lpfc_sli4_sge_dif_ce, diseed, checking);
-
 		} else {
 			bf_set(lpfc_sli4_sge_dif_ce, diseed, 0);
 			/*
@@ -2300,7 +2275,7 @@ lpfc_bg_setup_sgl_prot(struct lpfc_hba *phba, struct scsi_cmnd *sc,
 		}
 
 
-		if (lpfc_cmd_protect(sc, LPFC_CHECK_PROTECT_REF))
+		if (sc->prot_flags & SCSI_PROT_REF_CHECK)
 			bf_set(lpfc_sli4_sge_dif_re, diseed, checking);
 		else
 			bf_set(lpfc_sli4_sge_dif_re, diseed, 0);
@@ -2557,7 +2532,7 @@ lpfc_bg_scsi_adjust_dl(struct lpfc_hba *phba,
 	 * DIF (trailer) attached to it. Must ajust FCP data length
 	 * to account for the protection data.
 	 */
-	fcpdl += (fcpdl / lpfc_cmd_blksize(sc)) * 8;
+	fcpdl += (fcpdl / scsi_prot_interval(sc)) * 8;
 
 	return fcpdl;
 }
@@ -2811,14 +2786,14 @@ lpfc_calc_bg_err(struct lpfc_hba *phba, struct lpfc_io_buf *lpfc_cmd)
 		 * data length is a multiple of the blksize.
 		 */
 		sgde = scsi_sglist(cmd);
-		blksize = lpfc_cmd_blksize(cmd);
+		blksize = scsi_prot_interval(cmd);
 		data_src = (uint8_t *)sg_virt(sgde);
 		data_len = sgde->length;
 		if ((data_len & (blksize - 1)) == 0)
 			chk_guard = 1;
 
 		src = (struct scsi_dif_tuple *)sg_virt(sgpe);
-		start_ref_tag = t10_pi_ref_tag(cmd->request);
+		start_ref_tag = scsi_prot_ref_tag(cmd);
 		if (start_ref_tag == LPFC_INVALID_REFTAG)
 			goto out;
 		start_app_tag = src->app_tag;
@@ -2839,7 +2814,8 @@ lpfc_calc_bg_err(struct lpfc_hba *phba, struct lpfc_io_buf *lpfc_cmd)
 				/* First Guard Tag checking */
 				if (chk_guard) {
 					guard_tag = src->guard_tag;
-					if (lpfc_cmd_guard_csum(cmd))
+					if (cmd->prot_flags
+					    & SCSI_PROT_IP_CHECKSUM)
 						sum = lpfc_bg_csum(data_src,
 								   blksize);
 					else
@@ -2910,7 +2886,7 @@ out:
 		phba->bg_guard_err_cnt++;
 		lpfc_printf_log(phba, KERN_WARNING, LOG_FCP | LOG_BG,
 				"9069 BLKGRD: reftag %x grd_tag err %x != %x\n",
-				t10_pi_ref_tag(cmd->request),
+				scsi_prot_ref_tag(cmd),
 				sum, guard_tag);
 
 	} else if (err_type == BGS_REFTAG_ERR_MASK) {
@@ -2920,7 +2896,7 @@ out:
 		phba->bg_reftag_err_cnt++;
 		lpfc_printf_log(phba, KERN_WARNING, LOG_FCP | LOG_BG,
 				"9066 BLKGRD: reftag %x ref_tag err %x != %x\n",
-				t10_pi_ref_tag(cmd->request),
+				scsi_prot_ref_tag(cmd),
 				ref_tag, start_ref_tag);
 
 	} else if (err_type == BGS_APPTAG_ERR_MASK) {
@@ -2930,7 +2906,7 @@ out:
 		phba->bg_apptag_err_cnt++;
 		lpfc_printf_log(phba, KERN_WARNING, LOG_FCP | LOG_BG,
 				"9041 BLKGRD: reftag %x app_tag err %x != %x\n",
-				t10_pi_ref_tag(cmd->request),
+				scsi_prot_ref_tag(cmd),
 				app_tag, start_app_tag);
 	}
 }
@@ -2992,7 +2968,7 @@ lpfc_sli4_parse_bg_err(struct lpfc_hba *phba, struct lpfc_io_buf *lpfc_cmd,
 				" 0x%x lba 0x%llx blk cnt 0x%x "
 				"bgstat=x%x bghm=x%x\n", cmd->cmnd[0],
 				(unsigned long long)scsi_get_lba(cmd),
-				blk_rq_sectors(cmd->request), bgstat, bghm);
+				scsi_logical_block_count(cmd), bgstat, bghm);
 	}
 
 	if (lpfc_bgs_get_reftag_err(bgstat)) {
@@ -3007,7 +2983,7 @@ lpfc_sli4_parse_bg_err(struct lpfc_hba *phba, struct lpfc_io_buf *lpfc_cmd,
 				" 0x%x lba 0x%llx blk cnt 0x%x "
 				"bgstat=x%x bghm=x%x\n", cmd->cmnd[0],
 				(unsigned long long)scsi_get_lba(cmd),
-				blk_rq_sectors(cmd->request), bgstat, bghm);
+				scsi_logical_block_count(cmd), bgstat, bghm);
 	}
 
 	if (lpfc_bgs_get_apptag_err(bgstat)) {
@@ -3022,7 +2998,7 @@ lpfc_sli4_parse_bg_err(struct lpfc_hba *phba, struct lpfc_io_buf *lpfc_cmd,
 				" 0x%x lba 0x%llx blk cnt 0x%x "
 				"bgstat=x%x bghm=x%x\n", cmd->cmnd[0],
 				(unsigned long long)scsi_get_lba(cmd),
-				blk_rq_sectors(cmd->request), bgstat, bghm);
+				scsi_logical_block_count(cmd), bgstat, bghm);
 	}
 
 	if (lpfc_bgs_get_hi_water_mark_present(bgstat)) {
@@ -3066,9 +3042,9 @@ lpfc_sli4_parse_bg_err(struct lpfc_hba *phba, struct lpfc_io_buf *lpfc_cmd,
 				" 0x%x lba 0x%llx blk cnt 0x%x "
 				"bgstat=x%x bghm=x%x\n", cmd->cmnd[0],
 				(unsigned long long)scsi_get_lba(cmd),
-				blk_rq_sectors(cmd->request), bgstat, bghm);
+				scsi_logical_block_count(cmd), bgstat, bghm);
 
-		/* Calcuate what type of error it was */
+		/* Calculate what type of error it was */
 		lpfc_calc_bg_err(phba, lpfc_cmd);
 	}
 	return ret;
@@ -3103,8 +3079,8 @@ lpfc_parse_bg_err(struct lpfc_hba *phba, struct lpfc_io_buf *lpfc_cmd,
 				"9072 BLKGRD: Invalid BG Profile in cmd "
 				"0x%x reftag 0x%x blk cnt 0x%x "
 				"bgstat=x%x bghm=x%x\n", cmd->cmnd[0],
-				t10_pi_ref_tag(cmd->request),
-				blk_rq_sectors(cmd->request), bgstat, bghm);
+				scsi_prot_ref_tag(cmd),
+				scsi_logical_block_count(cmd), bgstat, bghm);
 		ret = (-1);
 		goto out;
 	}
@@ -3115,8 +3091,8 @@ lpfc_parse_bg_err(struct lpfc_hba *phba, struct lpfc_io_buf *lpfc_cmd,
 				"9073 BLKGRD: Invalid BG PDIF Block in cmd "
 				"0x%x reftag 0x%x blk cnt 0x%x "
 				"bgstat=x%x bghm=x%x\n", cmd->cmnd[0],
-				t10_pi_ref_tag(cmd->request),
-				blk_rq_sectors(cmd->request), bgstat, bghm);
+				scsi_prot_ref_tag(cmd),
+				scsi_logical_block_count(cmd), bgstat, bghm);
 		ret = (-1);
 		goto out;
 	}
@@ -3131,8 +3107,8 @@ lpfc_parse_bg_err(struct lpfc_hba *phba, struct lpfc_io_buf *lpfc_cmd,
 				"9055 BLKGRD: Guard Tag error in cmd "
 				"0x%x reftag 0x%x blk cnt 0x%x "
 				"bgstat=x%x bghm=x%x\n", cmd->cmnd[0],
-				t10_pi_ref_tag(cmd->request),
-				blk_rq_sectors(cmd->request), bgstat, bghm);
+				scsi_prot_ref_tag(cmd),
+				scsi_logical_block_count(cmd), bgstat, bghm);
 	}
 
 	if (lpfc_bgs_get_reftag_err(bgstat)) {
@@ -3146,8 +3122,8 @@ lpfc_parse_bg_err(struct lpfc_hba *phba, struct lpfc_io_buf *lpfc_cmd,
 				"9056 BLKGRD: Ref Tag error in cmd "
 				"0x%x reftag 0x%x blk cnt 0x%x "
 				"bgstat=x%x bghm=x%x\n", cmd->cmnd[0],
-				t10_pi_ref_tag(cmd->request),
-				blk_rq_sectors(cmd->request), bgstat, bghm);
+				scsi_prot_ref_tag(cmd),
+				scsi_logical_block_count(cmd), bgstat, bghm);
 	}
 
 	if (lpfc_bgs_get_apptag_err(bgstat)) {
@@ -3161,8 +3137,8 @@ lpfc_parse_bg_err(struct lpfc_hba *phba, struct lpfc_io_buf *lpfc_cmd,
 				"9061 BLKGRD: App Tag error in cmd "
 				"0x%x reftag 0x%x blk cnt 0x%x "
 				"bgstat=x%x bghm=x%x\n", cmd->cmnd[0],
-				t10_pi_ref_tag(cmd->request),
-				blk_rq_sectors(cmd->request), bgstat, bghm);
+				scsi_prot_ref_tag(cmd),
+				scsi_logical_block_count(cmd), bgstat, bghm);
 	}
 
 	if (lpfc_bgs_get_hi_water_mark_present(bgstat)) {
@@ -3205,10 +3181,10 @@ lpfc_parse_bg_err(struct lpfc_hba *phba, struct lpfc_io_buf *lpfc_cmd,
 				"9057 BLKGRD: Unknown error in cmd "
 				"0x%x reftag 0x%x blk cnt 0x%x "
 				"bgstat=x%x bghm=x%x\n", cmd->cmnd[0],
-				t10_pi_ref_tag(cmd->request),
-				blk_rq_sectors(cmd->request), bgstat, bghm);
+				scsi_prot_ref_tag(cmd),
+				scsi_logical_block_count(cmd), bgstat, bghm);
 
-		/* Calcuate what type of error it was */
+		/* Calculate what type of error it was */
 		lpfc_calc_bg_err(phba, lpfc_cmd);
 	}
 out:
@@ -3854,6 +3830,143 @@ lpfc_scsi_unprep_dma_buf(struct lpfc_hba *phba, struct lpfc_io_buf *psb)
 }
 
 /**
+ * lpfc_unblock_requests - allow further commands to be queued.
+ * @phba: pointer to phba object
+ *
+ * For single vport, just call scsi_unblock_requests on physical port.
+ * For multiple vports, send scsi_unblock_requests for all the vports.
+ */
+void
+lpfc_unblock_requests(struct lpfc_hba *phba)
+{
+	struct lpfc_vport **vports;
+	struct Scsi_Host  *shost;
+	int i;
+
+	if (phba->sli_rev == LPFC_SLI_REV4 &&
+	    !phba->sli4_hba.max_cfg_param.vpi_used) {
+		shost = lpfc_shost_from_vport(phba->pport);
+		scsi_unblock_requests(shost);
+		return;
+	}
+
+	vports = lpfc_create_vport_work_array(phba);
+	if (vports != NULL)
+		for (i = 0; i <= phba->max_vports && vports[i] != NULL; i++) {
+			shost = lpfc_shost_from_vport(vports[i]);
+			scsi_unblock_requests(shost);
+		}
+	lpfc_destroy_vport_work_array(phba, vports);
+}
+
+/**
+ * lpfc_block_requests - prevent further commands from being queued.
+ * @phba: pointer to phba object
+ *
+ * For single vport, just call scsi_block_requests on physical port.
+ * For multiple vports, send scsi_block_requests for all the vports.
+ */
+void
+lpfc_block_requests(struct lpfc_hba *phba)
+{
+	struct lpfc_vport **vports;
+	struct Scsi_Host  *shost;
+	int i;
+
+	if (atomic_read(&phba->cmf_stop_io))
+		return;
+
+	if (phba->sli_rev == LPFC_SLI_REV4 &&
+	    !phba->sli4_hba.max_cfg_param.vpi_used) {
+		shost = lpfc_shost_from_vport(phba->pport);
+		scsi_block_requests(shost);
+		return;
+	}
+
+	vports = lpfc_create_vport_work_array(phba);
+	if (vports != NULL)
+		for (i = 0; i <= phba->max_vports && vports[i] != NULL; i++) {
+			shost = lpfc_shost_from_vport(vports[i]);
+			scsi_block_requests(shost);
+		}
+	lpfc_destroy_vport_work_array(phba, vports);
+}
+
+/**
+ * lpfc_update_cmf_cmpl - Adjust CMF counters for IO completion
+ * @phba: The HBA for which this call is being executed.
+ * @time: The latency of the IO that completed (in ns)
+ * @size: The size of the IO that completed
+ * @shost: SCSI host the IO completed on (NULL for a NVME IO)
+ *
+ * The routine adjusts the various Burst and Bandwidth counters used in
+ * Congestion management and E2E. If time is set to LPFC_CGN_NOT_SENT,
+ * that means the IO was never issued to the HBA, so this routine is
+ * just being called to cleanup the counter from a previous
+ * lpfc_update_cmf_cmd call.
+ */
+int
+lpfc_update_cmf_cmpl(struct lpfc_hba *phba,
+		     uint64_t time, uint32_t size, struct Scsi_Host *shost)
+{
+	struct lpfc_cgn_stat *cgs;
+
+	if (time != LPFC_CGN_NOT_SENT) {
+		/* lat is ns coming in, save latency in us */
+		if (time < 1000)
+			time = 1;
+		else
+			time = div_u64(time + 500, 1000); /* round it */
+
+		cgs = this_cpu_ptr(phba->cmf_stat);
+		atomic64_add(size, &cgs->rcv_bytes);
+		atomic64_add(time, &cgs->rx_latency);
+		atomic_inc(&cgs->rx_io_cnt);
+	}
+	return 0;
+}
+
+/**
+ * lpfc_update_cmf_cmd - Adjust CMF counters for IO submission
+ * @phba: The HBA for which this call is being executed.
+ * @size: The size of the IO that will be issued
+ *
+ * The routine adjusts the various Burst and Bandwidth counters used in
+ * Congestion management and E2E.
+ */
+int
+lpfc_update_cmf_cmd(struct lpfc_hba *phba, uint32_t size)
+{
+	uint64_t total;
+	struct lpfc_cgn_stat *cgs;
+	int cpu;
+
+	/* At this point we are either LPFC_CFG_MANAGED or LPFC_CFG_MONITOR */
+	if (phba->cmf_active_mode == LPFC_CFG_MANAGED) {
+		total = 0;
+		for_each_present_cpu(cpu) {
+			cgs = per_cpu_ptr(phba->cmf_stat, cpu);
+			total += atomic64_read(&cgs->total_bytes);
+		}
+		if (total >= phba->cmf_max_bytes_per_interval) {
+			if (!atomic_xchg(&phba->cmf_bw_wait, 1)) {
+				lpfc_block_requests(phba);
+				phba->cmf_last_ts =
+					lpfc_calc_cmf_latency(phba);
+			}
+			atomic_inc(&phba->cmf_busy);
+			return -EBUSY;
+		}
+		if (size > atomic_read(&phba->rx_max_read_cnt))
+			atomic_set(&phba->rx_max_read_cnt, size);
+	}
+
+	cgs = this_cpu_ptr(phba->cmf_stat);
+	atomic64_add(size, &cgs->total_bytes);
+	return 0;
+}
+
+/**
  * lpfc_handle_fcp_err - FCP response handler
  * @vport: The virtual port for which this call is being executed.
  * @lpfc_cmd: Pointer to lpfc_io_buf data structure.
@@ -4063,6 +4176,7 @@ lpfc_fcp_io_cmd_wqe_cmpl(struct lpfc_hba *phba, struct lpfc_iocbq *pwqeIn,
 	u32 logit = LOG_FCP;
 	u32 status, idx;
 	unsigned long iflags = 0;
+	u32 lat;
 	u8 wait_xb_clr = 0;
 
 	/* Sanity check on return of outstanding command */
@@ -4351,10 +4465,21 @@ lpfc_fcp_io_cmd_wqe_cmpl(struct lpfc_hba *phba, struct lpfc_iocbq *pwqeIn,
 		lpfc_io_ktime(phba, lpfc_cmd);
 	}
 #endif
+	if (likely(!wait_xb_clr))
+		lpfc_cmd->pCmd = NULL;
+	spin_unlock(&lpfc_cmd->buf_lock);
+
+	/* Check if IO qualified for CMF */
+	if (phba->cmf_active_mode != LPFC_CFG_OFF &&
+	    cmd->sc_data_direction == DMA_FROM_DEVICE &&
+	    (scsi_sg_count(cmd))) {
+		/* Used when calculating average latency */
+		lat = ktime_get_ns() - lpfc_cmd->rx_cmd_start;
+		lpfc_update_cmf_cmpl(phba, lat, scsi_bufflen(cmd), shost);
+	}
+
 	if (wait_xb_clr)
 		goto out;
-	lpfc_cmd->pCmd = NULL;
-	spin_unlock(&lpfc_cmd->buf_lock);
 
 	/* The sdev is not guaranteed to be valid post scsi_done upcall. */
 	cmd->scsi_done(cmd);
@@ -4367,8 +4492,8 @@ lpfc_fcp_io_cmd_wqe_cmpl(struct lpfc_hba *phba, struct lpfc_iocbq *pwqeIn,
 	lpfc_cmd->cur_iocbq.iocb_flag &= ~LPFC_DRIVER_ABORTED;
 	if (lpfc_cmd->waitq)
 		wake_up(lpfc_cmd->waitq);
-out:
 	spin_unlock(&lpfc_cmd->buf_lock);
+out:
 	lpfc_release_scsi_buf(phba, lpfc_cmd);
 }
 
@@ -4775,6 +4900,11 @@ static int lpfc_scsi_prep_cmnd_buf_s4(struct lpfc_vport *vport,
 			fcp_cmnd->fcpCntl3 = READ_DATA;
 			if (hdwq)
 				hdwq->scsi_cstat.input_requests++;
+
+			/* For a CMF Managed port, iod must be zero'ed */
+			if (phba->cmf_active_mode == LPFC_CFG_MANAGED)
+				bf_set(wqe_iod, &wqe->fcp_iread.wqe_com,
+				       LPFC_WQE_IOD_NONE);
 		}
 	} else {
 		/* From the icmnd template, initialize words 4 - 11 */
@@ -5029,12 +5159,8 @@ lpfc_check_pci_resettable(struct lpfc_hba *phba)
 		}
 
 		/* Check for valid Emulex Device ID */
-		switch (ptr->device) {
-		case PCI_DEVICE_ID_LANCER_FC:
-		case PCI_DEVICE_ID_LANCER_G6_FC:
-		case PCI_DEVICE_ID_LANCER_G7_FC:
-			break;
-		default:
+		if (phba->sli_rev != LPFC_SLI_REV4 ||
+		    phba->hba_flag & HBA_FCOE_MODE) {
 			lpfc_printf_log(phba, KERN_INFO, LOG_INIT,
 					"8347 Incapable PCI reset device: "
 					"0x%04x\n", ptr->device);
@@ -5423,13 +5549,9 @@ static int lpfc_vmid_get_appid(struct lpfc_vport *vport, char *uuid, struct
  */
 static char *lpfc_is_command_vm_io(struct scsi_cmnd *cmd)
 {
-	char *uuid = NULL;
+	struct bio *bio = scsi_cmd_to_rq(cmd)->bio;
 
-	if (cmd->request) {
-		if (cmd->request->bio)
-			uuid = blkcg_get_fc_appid(cmd->request->bio);
-	}
-	return uuid;
+	return bio ? blkcg_get_fc_appid(bio) : NULL;
 }
 
 /**
@@ -5462,7 +5584,7 @@ lpfc_queuecommand(struct Scsi_Host *shost, struct scsi_cmnd *cmnd)
 	if (phba->ktime_on)
 		start = ktime_get_ns();
 #endif
-
+	start = ktime_get_ns();
 	rdata = lpfc_rport_data_from_scsi_device(cmnd->device);
 
 	/* sanity check on references */
@@ -5493,7 +5615,18 @@ lpfc_queuecommand(struct Scsi_Host *shost, struct scsi_cmnd *cmnd)
 	 * transport is still transitioning.
 	 */
 	if (!ndlp)
-		goto out_tgt_busy;
+		goto out_tgt_busy1;
+
+	/* Check if IO qualifies for CMF */
+	if (phba->cmf_active_mode != LPFC_CFG_OFF &&
+	    cmnd->sc_data_direction == DMA_FROM_DEVICE &&
+	    (scsi_sg_count(cmnd))) {
+		/* Latency start time saved in rx_cmd_start later in routine */
+		err = lpfc_update_cmf_cmd(phba, scsi_bufflen(cmnd));
+		if (err)
+			goto out_tgt_busy1;
+	}
+
 	if (lpfc_ndlp_check_qdepth(phba, ndlp)) {
 		if (atomic_read(&ndlp->cmd_pending) >= ndlp->cmd_qdepth) {
 			lpfc_printf_vlog(vport, KERN_INFO, LOG_FCP_ERROR,
@@ -5521,7 +5654,7 @@ lpfc_queuecommand(struct Scsi_Host *shost, struct scsi_cmnd *cmnd)
 					 ndlp->nlp_portname.u.wwn[5],
 					 ndlp->nlp_portname.u.wwn[6],
 					 ndlp->nlp_portname.u.wwn[7]);
-			goto out_tgt_busy;
+			goto out_tgt_busy2;
 		}
 	}
 
@@ -5534,6 +5667,7 @@ lpfc_queuecommand(struct Scsi_Host *shost, struct scsi_cmnd *cmnd)
 				 "IO busied\n");
 		goto out_host_busy;
 	}
+	lpfc_cmd->rx_cmd_start = start;
 
 	/*
 	 * Store the midlayer's command structure for the completion phase
@@ -5557,8 +5691,8 @@ lpfc_queuecommand(struct Scsi_Host *shost, struct scsi_cmnd *cmnd)
 					 "reftag x%x cnt %u pt %x\n",
 					 dif_op_str[scsi_get_prot_op(cmnd)],
 					 cmnd->cmnd[0],
-					 t10_pi_ref_tag(cmnd->request),
-					 blk_rq_sectors(cmnd->request),
+					 scsi_prot_ref_tag(cmnd),
+					 scsi_logical_block_count(cmnd),
 					 (cmnd->cmnd[1]>>5));
 		}
 		err = lpfc_bg_scsi_prep_dma_buf(phba, lpfc_cmd);
@@ -5569,8 +5703,8 @@ lpfc_queuecommand(struct Scsi_Host *shost, struct scsi_cmnd *cmnd)
 					 "9038 BLKGRD: rcvd PROT_NORMAL cmd: "
 					 "x%x reftag x%x cnt %u pt %x\n",
 					 cmnd->cmnd[0],
-					 t10_pi_ref_tag(cmnd->request),
-					 blk_rq_sectors(cmnd->request),
+					 scsi_prot_ref_tag(cmnd),
+					 scsi_logical_block_count(cmnd),
 					 (cmnd->cmnd[1]>>5));
 		}
 		err = lpfc_scsi_prep_dma_buf(phba, lpfc_cmd);
@@ -5641,8 +5775,7 @@ lpfc_queuecommand(struct Scsi_Host *shost, struct scsi_cmnd *cmnd)
 				   bf_get(wqe_tmo,
 				   &lpfc_cmd->cur_iocbq.wqe.generic.wqe_com) :
 				   lpfc_cmd->cur_iocbq.iocb.ulpTimeout,
-				   (uint32_t)
-				   (cmnd->request->timeout / 1000));
+				   (uint32_t)(scsi_cmd_to_rq(cmnd)->timeout / 1000));
 
 		goto out_host_busy_free_buf;
 	}
@@ -5678,13 +5811,20 @@ lpfc_queuecommand(struct Scsi_Host *shost, struct scsi_cmnd *cmnd)
  out_host_busy_release_buf:
 	lpfc_release_scsi_buf(phba, lpfc_cmd);
  out_host_busy:
+	lpfc_update_cmf_cmpl(phba, LPFC_CGN_NOT_SENT, scsi_bufflen(cmnd),
+			     shost);
 	return SCSI_MLQUEUE_HOST_BUSY;
 
- out_tgt_busy:
+ out_tgt_busy2:
+	lpfc_update_cmf_cmpl(phba, LPFC_CGN_NOT_SENT, scsi_bufflen(cmnd),
+			     shost);
+ out_tgt_busy1:
 	return SCSI_MLQUEUE_TARGET_BUSY;
 
  out_fail_command_release_buf:
 	lpfc_release_scsi_buf(phba, lpfc_cmd);
+	lpfc_update_cmf_cmpl(phba, LPFC_CGN_NOT_SENT, scsi_bufflen(cmnd),
+			     shost);
 
  out_fail_command:
 	cmnd->scsi_done(cmnd);
@@ -6273,6 +6413,7 @@ lpfc_target_reset_handler(struct scsi_cmnd *cmnd)
 	struct lpfc_scsi_event_header scsi_event;
 	int status;
 	u32 logit = LOG_FCP;
+	u32 dev_loss_tmo = vport->cfg_devloss_tmo;
 	unsigned long flags;
 	DECLARE_WAIT_QUEUE_HEAD_ONSTACK(waitq);
 
@@ -6314,39 +6455,44 @@ lpfc_target_reset_handler(struct scsi_cmnd *cmnd)
 
 	status = lpfc_send_taskmgmt(vport, cmnd, tgt_id, lun_id,
 					FCP_TARGET_RESET);
-	if (status != SUCCESS)
-		logit =  LOG_TRACE_EVENT;
-	spin_lock_irqsave(&pnode->lock, flags);
-	if (status != SUCCESS &&
-	    (!(pnode->upcall_flags & NLP_WAIT_FOR_LOGO)) &&
-	     !pnode->logo_waitq) {
-		pnode->logo_waitq = &waitq;
-		pnode->nlp_fcp_info &= ~NLP_FCP_2_DEVICE;
-		pnode->nlp_flag |= NLP_ISSUE_LOGO;
-		pnode->upcall_flags |= NLP_WAIT_FOR_LOGO;
-		spin_unlock_irqrestore(&pnode->lock, flags);
-		lpfc_unreg_rpi(vport, pnode);
-		wait_event_timeout(waitq,
-				   (!(pnode->upcall_flags & NLP_WAIT_FOR_LOGO)),
-				    msecs_to_jiffies(vport->cfg_devloss_tmo *
-				    1000));
-
-		if (pnode->upcall_flags & NLP_WAIT_FOR_LOGO) {
-			lpfc_printf_vlog(vport, KERN_ERR, LOG_TRACE_EVENT,
-				"0725 SCSI layer TGTRST failed & LOGO TMO "
-				" (%d, %llu) return x%x\n", tgt_id,
-				 lun_id, status);
-			spin_lock_irqsave(&pnode->lock, flags);
-			pnode->upcall_flags &= ~NLP_WAIT_FOR_LOGO;
+	if (status != SUCCESS) {
+		logit = LOG_TRACE_EVENT;
+
+		/* Issue LOGO, if no LOGO is outstanding */
+		spin_lock_irqsave(&pnode->lock, flags);
+		if (!(pnode->upcall_flags & NLP_WAIT_FOR_LOGO) &&
+		    !pnode->logo_waitq) {
+			pnode->logo_waitq = &waitq;
+			pnode->nlp_fcp_info &= ~NLP_FCP_2_DEVICE;
+			pnode->nlp_flag |= NLP_ISSUE_LOGO;
+			pnode->upcall_flags |= NLP_WAIT_FOR_LOGO;
+			spin_unlock_irqrestore(&pnode->lock, flags);
+			lpfc_unreg_rpi(vport, pnode);
+			wait_event_timeout(waitq,
+					   (!(pnode->upcall_flags &
+					      NLP_WAIT_FOR_LOGO)),
+					   msecs_to_jiffies(dev_loss_tmo *
+							    1000));
+
+			if (pnode->upcall_flags & NLP_WAIT_FOR_LOGO) {
+				lpfc_printf_vlog(vport, KERN_ERR, logit,
+						 "0725 SCSI layer TGTRST "
+						 "failed & LOGO TMO (%d, %llu) "
+						 "return x%x\n",
+						 tgt_id, lun_id, status);
+				spin_lock_irqsave(&pnode->lock, flags);
+				pnode->upcall_flags &= ~NLP_WAIT_FOR_LOGO;
+			} else {
+				spin_lock_irqsave(&pnode->lock, flags);
+			}
+			pnode->logo_waitq = NULL;
+			spin_unlock_irqrestore(&pnode->lock, flags);
+			status = SUCCESS;
+
 		} else {
-			spin_lock_irqsave(&pnode->lock, flags);
+			spin_unlock_irqrestore(&pnode->lock, flags);
+			status = FAILED;
 		}
-		pnode->logo_waitq = NULL;
-		spin_unlock_irqrestore(&pnode->lock, flags);
-		status = SUCCESS;
-	} else {
-		status = FAILED;
-		spin_unlock_irqrestore(&pnode->lock, flags);
 	}
 
 	lpfc_printf_vlog(vport, KERN_ERR, logit,
diff --git a/drivers/scsi/lpfc/lpfc_scsi.h b/drivers/scsi/lpfc/lpfc_scsi.h
index f76667b7da7b..3836d7f6a575 100644
--- a/drivers/scsi/lpfc/lpfc_scsi.h
+++ b/drivers/scsi/lpfc/lpfc_scsi.h
@@ -1,7 +1,7 @@
 /*******************************************************************
  * This file is part of the Emulex Linux Device Driver for         *
  * Fibre Channel Host Bus Adapters.                                *
- * Copyright (C) 2017-2019 Broadcom. All Rights Reserved. The term *
+ * Copyright (C) 2017-2021 Broadcom. All Rights Reserved. The term *
  * “Broadcom” refers to Broadcom Inc and/or its subsidiaries.  *
  * Copyright (C) 2004-2016 Emulex.  All rights reserved.           *
  * EMULEX and SLI are trademarks of Emulex.                        *
@@ -142,6 +142,10 @@ struct lpfc_scsicmd_bkt {
 #define FC_PORTSPEED_128GBIT	0x2000
 #endif
 
+#ifndef FC_PORTSPEED_256GBIT
+#define FC_PORTSPEED_256GBIT	0x4000
+#endif
+
 #define TXRDY_PAYLOAD_LEN	12
 
 /* For sysfs/debugfs tmp string max len */
diff --git a/drivers/scsi/lpfc/lpfc_sli.c b/drivers/scsi/lpfc/lpfc_sli.c
index f530d8fe7a8c..ffd8a140638c 100644
--- a/drivers/scsi/lpfc/lpfc_sli.c
+++ b/drivers/scsi/lpfc/lpfc_sli.c
@@ -1439,7 +1439,7 @@ out:
 	memset((char *)iocbq + start_clean, 0, sizeof(*iocbq) - start_clean);
 	iocbq->sli4_lxritag = NO_XRI;
 	iocbq->sli4_xritag = NO_XRI;
-	iocbq->iocb_flag &= ~(LPFC_IO_NVME | LPFC_IO_NVMET |
+	iocbq->iocb_flag &= ~(LPFC_IO_NVME | LPFC_IO_NVMET | LPFC_IO_CMF |
 			      LPFC_IO_NVME_LS);
 	list_add_tail(&iocbq->list, &phba->lpfc_iocb_list);
 }
@@ -1769,6 +1769,254 @@ lpfc_sli_ringtx_get(struct lpfc_hba *phba, struct lpfc_sli_ring *pring)
 }
 
 /**
+ * lpfc_cmf_sync_cmpl - Process a CMF_SYNC_WQE cmpl
+ * @phba: Pointer to HBA context object.
+ * @cmdiocb: Pointer to driver command iocb object.
+ * @cmf_cmpl: Pointer to completed WCQE.
+ *
+ * This routine will inform the driver of any BW adjustments we need
+ * to make. These changes will be picked up during the next CMF
+ * timer interrupt. In addition, any BW changes will be logged
+ * with LOG_CGN_MGMT.
+ **/
+static void
+lpfc_cmf_sync_cmpl(struct lpfc_hba *phba, struct lpfc_iocbq *cmdiocb,
+		   struct lpfc_wcqe_complete *cmf_cmpl)
+{
+	union lpfc_wqe128 *wqe;
+	uint32_t status, info;
+	uint64_t bw, bwdif, slop;
+	uint64_t pcent, bwpcent;
+	int asig, afpin, sigcnt, fpincnt;
+	int wsigmax, wfpinmax, cg, tdp;
+	char *s;
+
+	/* First check for error */
+	status = bf_get(lpfc_wcqe_c_status, cmf_cmpl);
+	if (status) {
+		lpfc_printf_log(phba, KERN_INFO, LOG_CGN_MGMT,
+				"6211 CMF_SYNC_WQE Error "
+				"req_tag x%x status x%x hwstatus x%x "
+				"tdatap x%x parm x%x\n",
+				bf_get(lpfc_wcqe_c_request_tag, cmf_cmpl),
+				bf_get(lpfc_wcqe_c_status, cmf_cmpl),
+				bf_get(lpfc_wcqe_c_hw_status, cmf_cmpl),
+				cmf_cmpl->total_data_placed,
+				cmf_cmpl->parameter);
+		goto out;
+	}
+
+	/* Gather congestion information on a successful cmpl */
+	info = cmf_cmpl->parameter;
+	phba->cmf_active_info = info;
+
+	/* See if firmware info count is valid or has changed */
+	if (info > LPFC_MAX_CMF_INFO || phba->cmf_info_per_interval == info)
+		info = 0;
+	else
+		phba->cmf_info_per_interval = info;
+
+	tdp = bf_get(lpfc_wcqe_c_cmf_bw, cmf_cmpl);
+	cg = bf_get(lpfc_wcqe_c_cmf_cg, cmf_cmpl);
+
+	/* Get BW requirement from firmware */
+	bw = (uint64_t)tdp * LPFC_CMF_BLK_SIZE;
+	if (!bw) {
+		lpfc_printf_log(phba, KERN_INFO, LOG_CGN_MGMT,
+				"6212 CMF_SYNC_WQE x%x: NULL bw\n",
+				bf_get(lpfc_wcqe_c_request_tag, cmf_cmpl));
+		goto out;
+	}
+
+	/* Gather information needed for logging if a BW change is required */
+	wqe = &cmdiocb->wqe;
+	asig = bf_get(cmf_sync_asig, &wqe->cmf_sync);
+	afpin = bf_get(cmf_sync_afpin, &wqe->cmf_sync);
+	fpincnt = bf_get(cmf_sync_wfpincnt, &wqe->cmf_sync);
+	sigcnt = bf_get(cmf_sync_wsigcnt, &wqe->cmf_sync);
+	if (phba->cmf_max_bytes_per_interval != bw ||
+	    (asig || afpin || sigcnt || fpincnt)) {
+		/* Are we increasing or decreasing BW */
+		if (phba->cmf_max_bytes_per_interval <  bw) {
+			bwdif = bw - phba->cmf_max_bytes_per_interval;
+			s = "Increase";
+		} else {
+			bwdif = phba->cmf_max_bytes_per_interval - bw;
+			s = "Decrease";
+		}
+
+		/* What is the change percentage */
+		slop = div_u64(phba->cmf_link_byte_count, 200); /*For rounding*/
+		pcent = div64_u64(bwdif * 100 + slop,
+				  phba->cmf_link_byte_count);
+		bwpcent = div64_u64(bw * 100 + slop,
+				    phba->cmf_link_byte_count);
+		if (asig) {
+			lpfc_printf_log(phba, KERN_INFO, LOG_CGN_MGMT,
+					"6237 BW Threshold %lld%% (%lld): "
+					"%lld%% %s: Signal Alarm: cg:%d "
+					"Info:%u\n",
+					bwpcent, bw, pcent, s, cg,
+					phba->cmf_active_info);
+		} else if (afpin) {
+			lpfc_printf_log(phba, KERN_INFO, LOG_CGN_MGMT,
+					"6238 BW Threshold %lld%% (%lld): "
+					"%lld%% %s: FPIN Alarm: cg:%d "
+					"Info:%u\n",
+					bwpcent, bw, pcent, s, cg,
+					phba->cmf_active_info);
+		} else if (sigcnt) {
+			wsigmax = bf_get(cmf_sync_wsigmax, &wqe->cmf_sync);
+			lpfc_printf_log(phba, KERN_INFO, LOG_CGN_MGMT,
+					"6239 BW Threshold %lld%% (%lld): "
+					"%lld%% %s: Signal Warning: "
+					"Cnt %d Max %d: cg:%d Info:%u\n",
+					bwpcent, bw, pcent, s, sigcnt,
+					wsigmax, cg, phba->cmf_active_info);
+		} else if (fpincnt) {
+			wfpinmax = bf_get(cmf_sync_wfpinmax, &wqe->cmf_sync);
+			lpfc_printf_log(phba, KERN_INFO, LOG_CGN_MGMT,
+					"6240 BW Threshold %lld%% (%lld): "
+					"%lld%% %s: FPIN Warning: "
+					"Cnt %d Max %d: cg:%d Info:%u\n",
+					bwpcent, bw, pcent, s, fpincnt,
+					wfpinmax, cg, phba->cmf_active_info);
+		} else {
+			lpfc_printf_log(phba, KERN_INFO, LOG_CGN_MGMT,
+					"6241 BW Threshold %lld%% (%lld): "
+					"CMF %lld%% %s: cg:%d Info:%u\n",
+					bwpcent, bw, pcent, s, cg,
+					phba->cmf_active_info);
+		}
+	} else if (info) {
+		lpfc_printf_log(phba, KERN_INFO, LOG_CGN_MGMT,
+				"6246 Info Threshold %u\n", info);
+	}
+
+	/* Save BW change to be picked up during next timer interrupt */
+	phba->cmf_last_sync_bw = bw;
+out:
+	lpfc_sli_release_iocbq(phba, cmdiocb);
+}
+
+/**
+ * lpfc_issue_cmf_sync_wqe - Issue a CMF_SYNC_WQE
+ * @phba: Pointer to HBA context object.
+ * @ms:   ms to set in WQE interval, 0 means use init op
+ * @total: Total rcv bytes for this interval
+ *
+ * This routine is called every CMF timer interrupt. Its purpose is
+ * to issue a CMF_SYNC_WQE to the firmware to inform it of any events
+ * that may indicate we have congestion (FPINs or Signals). Upon
+ * completion, the firmware will indicate any BW restrictions the
+ * driver may need to take.
+ **/
+int
+lpfc_issue_cmf_sync_wqe(struct lpfc_hba *phba, u32 ms, u64 total)
+{
+	union lpfc_wqe128 *wqe;
+	struct lpfc_iocbq *sync_buf;
+	unsigned long iflags;
+	u32 ret_val;
+	u32 atot, wtot, max;
+
+	/* First address any alarm / warning activity */
+	atot = atomic_xchg(&phba->cgn_sync_alarm_cnt, 0);
+	wtot = atomic_xchg(&phba->cgn_sync_warn_cnt, 0);
+
+	/* ONLY Managed mode will send the CMF_SYNC_WQE to the HBA */
+	if (phba->cmf_active_mode != LPFC_CFG_MANAGED ||
+	    phba->link_state == LPFC_LINK_DOWN)
+		return 0;
+
+	spin_lock_irqsave(&phba->hbalock, iflags);
+	sync_buf = __lpfc_sli_get_iocbq(phba);
+	if (!sync_buf) {
+		lpfc_printf_log(phba, KERN_ERR, LOG_CGN_MGMT,
+				"6213 No available WQEs for CMF_SYNC_WQE\n");
+		ret_val = ENOMEM;
+		goto out_unlock;
+	}
+
+	wqe = &sync_buf->wqe;
+
+	/* WQEs are reused.  Clear stale data and set key fields to zero */
+	memset(wqe, 0, sizeof(*wqe));
+
+	/* If this is the very first CMF_SYNC_WQE, issue an init operation */
+	if (!ms) {
+		lpfc_printf_log(phba, KERN_INFO, LOG_CGN_MGMT,
+				"6441 CMF Init %d - CMF_SYNC_WQE\n",
+				phba->fc_eventTag);
+		bf_set(cmf_sync_op, &wqe->cmf_sync, 1); /* 1=init */
+		bf_set(cmf_sync_interval, &wqe->cmf_sync, LPFC_CMF_INTERVAL);
+		goto initpath;
+	}
+
+	bf_set(cmf_sync_op, &wqe->cmf_sync, 0); /* 0=recalc */
+	bf_set(cmf_sync_interval, &wqe->cmf_sync, ms);
+
+	/* Check for alarms / warnings */
+	if (atot) {
+		if (phba->cgn_reg_signal == EDC_CG_SIG_WARN_ALARM) {
+			/* We hit an Signal alarm condition */
+			bf_set(cmf_sync_asig, &wqe->cmf_sync, 1);
+		} else {
+			/* We hit a FPIN alarm condition */
+			bf_set(cmf_sync_afpin, &wqe->cmf_sync, 1);
+		}
+	} else if (wtot) {
+		if (phba->cgn_reg_signal == EDC_CG_SIG_WARN_ONLY ||
+		    phba->cgn_reg_signal == EDC_CG_SIG_WARN_ALARM) {
+			/* We hit an Signal warning condition */
+			max = LPFC_SEC_TO_MSEC / lpfc_fabric_cgn_frequency *
+				lpfc_acqe_cgn_frequency;
+			bf_set(cmf_sync_wsigmax, &wqe->cmf_sync, max);
+			bf_set(cmf_sync_wsigcnt, &wqe->cmf_sync, wtot);
+		} else {
+			/* We hit a FPIN warning condition */
+			bf_set(cmf_sync_wfpinmax, &wqe->cmf_sync, 1);
+			bf_set(cmf_sync_wfpincnt, &wqe->cmf_sync, 1);
+		}
+	}
+
+	/* Update total read blocks during previous timer interval */
+	wqe->cmf_sync.read_bytes = (u32)(total / LPFC_CMF_BLK_SIZE);
+
+initpath:
+	bf_set(cmf_sync_ver, &wqe->cmf_sync, LPFC_CMF_SYNC_VER);
+	wqe->cmf_sync.event_tag = phba->fc_eventTag;
+	bf_set(cmf_sync_cmnd, &wqe->cmf_sync, CMD_CMF_SYNC_WQE);
+
+	/* Setup reqtag to match the wqe completion. */
+	bf_set(cmf_sync_reqtag, &wqe->cmf_sync, sync_buf->iotag);
+
+	bf_set(cmf_sync_qosd, &wqe->cmf_sync, 1);
+
+	bf_set(cmf_sync_cmd_type, &wqe->cmf_sync, CMF_SYNC_COMMAND);
+	bf_set(cmf_sync_wqec, &wqe->cmf_sync, 1);
+	bf_set(cmf_sync_cqid, &wqe->cmf_sync, LPFC_WQE_CQ_ID_DEFAULT);
+
+	sync_buf->vport = phba->pport;
+	sync_buf->wqe_cmpl = lpfc_cmf_sync_cmpl;
+	sync_buf->iocb_cmpl = NULL;
+	sync_buf->context1 = NULL;
+	sync_buf->context2 = NULL;
+	sync_buf->context3 = NULL;
+	sync_buf->sli4_xritag = NO_XRI;
+
+	sync_buf->iocb_flag |= LPFC_IO_CMF;
+	ret_val = lpfc_sli4_issue_wqe(phba, &phba->sli4_hba.hdwq[0], sync_buf);
+	if (ret_val)
+		lpfc_printf_log(phba, KERN_INFO, LOG_CGN_MGMT,
+				"6214 Cannot issue CMF_SYNC_WQE: x%x\n",
+				ret_val);
+out_unlock:
+	spin_unlock_irqrestore(&phba->hbalock, iflags);
+	return ret_val;
+}
+
+/**
  * lpfc_sli_next_iocb_slot - Get next iocb slot in the ring
  * @phba: Pointer to HBA context object.
  * @pring: Pointer to driver SLI ring object.
@@ -4467,6 +4715,7 @@ lpfc_sli_brdready_s4(struct lpfc_hba *phba, uint32_t mask)
 	} else
 		phba->sli4_hba.intr_enable = 0;
 
+	phba->hba_flag &= ~HBA_SETUP;
 	return retval;
 }
 
@@ -4787,6 +5036,7 @@ lpfc_sli4_brdreset(struct lpfc_hba *phba)
 	phba->link_events = 0;
 	phba->pport->fc_myDID = 0;
 	phba->pport->fc_prevDID = 0;
+	phba->hba_flag &= ~HBA_SETUP;
 
 	spin_lock_irq(&phba->hbalock);
 	psli->sli_flag &= ~(LPFC_PROCESS_LA);
@@ -5674,16 +5924,20 @@ lpfc_sli4_get_ctl_attr(struct lpfc_hba *phba)
 		bf_get(lpfc_cntl_attr_lnk_type, cntl_attr);
 	phba->sli4_hba.lnk_info.lnk_no =
 		bf_get(lpfc_cntl_attr_lnk_numb, cntl_attr);
+	phba->sli4_hba.flash_id = bf_get(lpfc_cntl_attr_flash_id, cntl_attr);
+	phba->sli4_hba.asic_rev = bf_get(lpfc_cntl_attr_asic_rev, cntl_attr);
 
 	memset(phba->BIOSVersion, 0, sizeof(phba->BIOSVersion));
 	strlcat(phba->BIOSVersion, (char *)cntl_attr->bios_ver_str,
 		sizeof(phba->BIOSVersion));
 
 	lpfc_printf_log(phba, KERN_INFO, LOG_SLI,
-			"3086 lnk_type:%d, lnk_numb:%d, bios_ver:%s\n",
+			"3086 lnk_type:%d, lnk_numb:%d, bios_ver:%s, "
+			"flash_id: x%02x, asic_rev: x%02x\n",
 			phba->sli4_hba.lnk_info.lnk_tp,
 			phba->sli4_hba.lnk_info.lnk_no,
-			phba->BIOSVersion);
+			phba->BIOSVersion, phba->sli4_hba.flash_id,
+			phba->sli4_hba.asic_rev);
 out_free_mboxq:
 	if (bf_get(lpfc_mqe_command, &mboxq->u.mqe) == MBX_SLI4_CONFIG)
 		lpfc_sli4_mbox_cmd_free(phba, mboxq);
@@ -6413,6 +6667,7 @@ lpfc_set_features(struct lpfc_hba *phba, LPFC_MBOXQ_t *mbox,
 		  uint32_t feature)
 {
 	uint32_t len;
+	u32 sig_freq = 0;
 
 	len = sizeof(struct lpfc_mbx_set_feature) -
 		sizeof(struct lpfc_sli4_cfg_mhdr);
@@ -6435,6 +6690,35 @@ lpfc_set_features(struct lpfc_hba *phba, LPFC_MBOXQ_t *mbox,
 		mbox->u.mqe.un.set_feature.feature = LPFC_SET_MDS_DIAGS;
 		mbox->u.mqe.un.set_feature.param_len = 8;
 		break;
+	case LPFC_SET_CGN_SIGNAL:
+		if (phba->cmf_active_mode == LPFC_CFG_OFF)
+			sig_freq = 0;
+		else
+			sig_freq = phba->cgn_sig_freq;
+
+		if (phba->cgn_reg_signal == EDC_CG_SIG_WARN_ALARM) {
+			bf_set(lpfc_mbx_set_feature_CGN_alarm_freq,
+			       &mbox->u.mqe.un.set_feature, sig_freq);
+			bf_set(lpfc_mbx_set_feature_CGN_warn_freq,
+			       &mbox->u.mqe.un.set_feature, sig_freq);
+		}
+
+		if (phba->cgn_reg_signal == EDC_CG_SIG_WARN_ONLY)
+			bf_set(lpfc_mbx_set_feature_CGN_warn_freq,
+			       &mbox->u.mqe.un.set_feature, sig_freq);
+
+		if (phba->cmf_active_mode == LPFC_CFG_OFF ||
+		    phba->cgn_reg_signal == EDC_CG_SIG_NOTSUPPORTED)
+			sig_freq = 0;
+		else
+			sig_freq = lpfc_acqe_cgn_frequency;
+
+		bf_set(lpfc_mbx_set_feature_CGN_acqe_freq,
+		       &mbox->u.mqe.un.set_feature, sig_freq);
+
+		mbox->u.mqe.un.set_feature.feature = LPFC_SET_CGN_SIGNAL;
+		mbox->u.mqe.un.set_feature.param_len = 12;
+		break;
 	case LPFC_SET_DUAL_DUMP:
 		bf_set(lpfc_mbx_set_feature_dd,
 		       &mbox->u.mqe.un.set_feature, LPFC_ENABLE_DUAL_DUMP);
@@ -6443,8 +6727,22 @@ lpfc_set_features(struct lpfc_hba *phba, LPFC_MBOXQ_t *mbox,
 		mbox->u.mqe.un.set_feature.feature = LPFC_SET_DUAL_DUMP;
 		mbox->u.mqe.un.set_feature.param_len = 4;
 		break;
+	case LPFC_SET_ENABLE_MI:
+		mbox->u.mqe.un.set_feature.feature = LPFC_SET_ENABLE_MI;
+		mbox->u.mqe.un.set_feature.param_len = 4;
+		bf_set(lpfc_mbx_set_feature_milunq, &mbox->u.mqe.un.set_feature,
+		       phba->pport->cfg_lun_queue_depth);
+		bf_set(lpfc_mbx_set_feature_mi, &mbox->u.mqe.un.set_feature,
+		       phba->sli4_hba.pc_sli4_params.mi_ver);
+		break;
+	case LPFC_SET_ENABLE_CMF:
+		bf_set(lpfc_mbx_set_feature_dd, &mbox->u.mqe.un.set_feature, 1);
+		mbox->u.mqe.un.set_feature.feature = LPFC_SET_ENABLE_CMF;
+		mbox->u.mqe.un.set_feature.param_len = 4;
+		bf_set(lpfc_mbx_set_feature_cmf,
+		       &mbox->u.mqe.un.set_feature, 1);
+		break;
 	}
-
 	return;
 }
 
@@ -7365,7 +7663,7 @@ lpfc_set_host_data(struct lpfc_hba *phba, LPFC_MBOXQ_t *mbox)
 	mbox->u.mqe.un.set_host_data.param_id = LPFC_SET_HOST_OS_DRIVER_VERSION;
 	mbox->u.mqe.un.set_host_data.param_len =
 					LPFC_HOST_OS_DRIVER_VERSION_SIZE;
-	snprintf(mbox->u.mqe.un.set_host_data.data,
+	snprintf(mbox->u.mqe.un.set_host_data.un.data,
 		 LPFC_HOST_OS_DRIVER_VERSION_SIZE,
 		 "Linux %s v"LPFC_DRIVER_VERSION,
 		 (phba->hba_flag & HBA_FCOE_MODE) ? "FCoE" : "FC");
@@ -7433,6 +7731,91 @@ lpfc_post_rq_buffer(struct lpfc_hba *phba, struct lpfc_queue *hrq,
 	return 1;
 }
 
+static void
+lpfc_mbx_cmpl_cgn_set_ftrs(struct lpfc_hba *phba, LPFC_MBOXQ_t *pmb)
+{
+	struct lpfc_vport *vport = pmb->vport;
+	union lpfc_sli4_cfg_shdr *shdr;
+	u32 shdr_status, shdr_add_status;
+	u32 sig, acqe;
+
+	/* Two outcomes. (1) Set featurs was successul and EDC negotiation
+	 * is done. (2) Mailbox failed and send FPIN support only.
+	 */
+	shdr = (union lpfc_sli4_cfg_shdr *)
+		&pmb->u.mqe.un.sli4_config.header.cfg_shdr;
+	shdr_status = bf_get(lpfc_mbox_hdr_status, &shdr->response);
+	shdr_add_status = bf_get(lpfc_mbox_hdr_add_status, &shdr->response);
+	if (shdr_status || shdr_add_status || pmb->u.mb.mbxStatus) {
+		lpfc_printf_log(phba, KERN_ERR, LOG_INIT | LOG_CGN_MGMT,
+				"2516 CGN SET_FEATURE mbox failed with "
+				"status x%x add_status x%x, mbx status x%x "
+				"Reset Congestion to FPINs only\n",
+				shdr_status, shdr_add_status,
+				pmb->u.mb.mbxStatus);
+		/* If there is a mbox error, move on to RDF */
+		phba->cgn_reg_signal = EDC_CG_SIG_NOTSUPPORTED;
+		phba->cgn_reg_fpin = LPFC_CGN_FPIN_WARN | LPFC_CGN_FPIN_ALARM;
+		goto out;
+	}
+
+	/* Zero out Congestion Signal ACQE counter */
+	phba->cgn_acqe_cnt = 0;
+	atomic64_set(&phba->cgn_acqe_stat.warn, 0);
+	atomic64_set(&phba->cgn_acqe_stat.alarm, 0);
+
+	acqe = bf_get(lpfc_mbx_set_feature_CGN_acqe_freq,
+		      &pmb->u.mqe.un.set_feature);
+	sig = bf_get(lpfc_mbx_set_feature_CGN_warn_freq,
+		     &pmb->u.mqe.un.set_feature);
+	lpfc_printf_log(phba, KERN_INFO, LOG_CGN_MGMT,
+			"4620 SET_FEATURES Success: Freq: %ds %dms "
+			" Reg: x%x x%x\n", acqe, sig,
+			phba->cgn_reg_signal, phba->cgn_reg_fpin);
+out:
+	mempool_free(pmb, phba->mbox_mem_pool);
+
+	/* Register for FPIN events from the fabric now that the
+	 * EDC common_set_features has completed.
+	 */
+	lpfc_issue_els_rdf(vport, 0);
+}
+
+int
+lpfc_config_cgn_signal(struct lpfc_hba *phba)
+{
+	LPFC_MBOXQ_t *mboxq;
+	u32 rc;
+
+	mboxq = (LPFC_MBOXQ_t *)mempool_alloc(phba->mbox_mem_pool, GFP_KERNEL);
+	if (!mboxq)
+		goto out_rdf;
+
+	lpfc_set_features(phba, mboxq, LPFC_SET_CGN_SIGNAL);
+	mboxq->vport = phba->pport;
+	mboxq->mbox_cmpl = lpfc_mbx_cmpl_cgn_set_ftrs;
+
+	lpfc_printf_log(phba, KERN_INFO, LOG_CGN_MGMT,
+			"4621 SET_FEATURES: FREQ sig x%x acqe x%x: "
+			"Reg: x%x x%x\n",
+			phba->cgn_sig_freq, lpfc_acqe_cgn_frequency,
+			phba->cgn_reg_signal, phba->cgn_reg_fpin);
+
+	rc = lpfc_sli_issue_mbox(phba, mboxq, MBX_NOWAIT);
+	if (rc == MBX_NOT_FINISHED)
+		goto out;
+	return 0;
+
+out:
+	mempool_free(mboxq, phba->mbox_mem_pool);
+out_rdf:
+	/* If there is a mbox error, move on to RDF */
+	phba->cgn_reg_fpin = LPFC_CGN_FPIN_WARN | LPFC_CGN_FPIN_ALARM;
+	phba->cgn_reg_signal = EDC_CG_SIG_NOTSUPPORTED;
+	lpfc_issue_els_rdf(phba->pport, 0);
+	return -EIO;
+}
+
 /**
  * lpfc_init_idle_stat_hb - Initialize idle_stat tracking
  * @phba: pointer to lpfc hba data structure.
@@ -7464,7 +7847,8 @@ static void lpfc_init_idle_stat_hb(struct lpfc_hba *phba)
 		idle_stat->prev_idle = get_cpu_idle_time(i, &wall, 1);
 		idle_stat->prev_wall = wall;
 
-		if (phba->nvmet_support)
+		if (phba->nvmet_support ||
+		    phba->cmf_active_mode != LPFC_CFG_OFF)
 			cq->poll_mode = LPFC_QUEUE_WORK;
 		else
 			cq->poll_mode = LPFC_IRQ_POLL;
@@ -7496,6 +7880,258 @@ static void lpfc_sli4_dip(struct lpfc_hba *phba)
 }
 
 /**
+ * lpfc_cmf_setup - Initialize idle_stat tracking
+ * @phba: Pointer to HBA context object.
+ *
+ * This is called from HBA setup during driver load or when the HBA
+ * comes online. this does all the initialization to support CMF and MI.
+ **/
+static int
+lpfc_cmf_setup(struct lpfc_hba *phba)
+{
+	LPFC_MBOXQ_t *mboxq;
+	struct lpfc_mqe *mqe;
+	struct lpfc_dmabuf *mp;
+	struct lpfc_pc_sli4_params *sli4_params;
+	struct lpfc_sli4_parameters *mbx_sli4_parameters;
+	int length;
+	int rc, cmf, mi_ver;
+
+	mboxq = (LPFC_MBOXQ_t *)mempool_alloc(phba->mbox_mem_pool, GFP_KERNEL);
+	if (!mboxq)
+		return -ENOMEM;
+	mqe = &mboxq->u.mqe;
+
+	/* Read the port's SLI4 Config Parameters */
+	length = (sizeof(struct lpfc_mbx_get_sli4_parameters) -
+		  sizeof(struct lpfc_sli4_cfg_mhdr));
+	lpfc_sli4_config(phba, mboxq, LPFC_MBOX_SUBSYSTEM_COMMON,
+			 LPFC_MBOX_OPCODE_GET_SLI4_PARAMETERS,
+			 length, LPFC_SLI4_MBX_EMBED);
+
+	rc = lpfc_sli_issue_mbox(phba, mboxq, MBX_POLL);
+	if (unlikely(rc)) {
+		mempool_free(mboxq, phba->mbox_mem_pool);
+		return rc;
+	}
+
+	/* Gather info on CMF and MI support */
+	sli4_params = &phba->sli4_hba.pc_sli4_params;
+	mbx_sli4_parameters = &mqe->un.get_sli4_parameters.sli4_parameters;
+	sli4_params->mi_ver = bf_get(cfg_mi_ver, mbx_sli4_parameters);
+	sli4_params->cmf = bf_get(cfg_cmf, mbx_sli4_parameters);
+
+	/* Are we forcing MI off via module parameter? */
+	if (!phba->cfg_enable_mi)
+		sli4_params->mi_ver = 0;
+
+	/* Always try to enable MI feature if we can */
+	if (sli4_params->mi_ver) {
+		lpfc_set_features(phba, mboxq, LPFC_SET_ENABLE_MI);
+		rc = lpfc_sli_issue_mbox(phba, mboxq, MBX_POLL);
+		mi_ver = bf_get(lpfc_mbx_set_feature_mi,
+				 &mboxq->u.mqe.un.set_feature);
+
+		if (rc == MBX_SUCCESS) {
+			if (mi_ver) {
+				lpfc_printf_log(phba,
+						KERN_WARNING, LOG_CGN_MGMT,
+						"6215 MI is enabled\n");
+				sli4_params->mi_ver = mi_ver;
+			} else {
+				lpfc_printf_log(phba,
+						KERN_WARNING, LOG_CGN_MGMT,
+						"6338 MI is disabled\n");
+				sli4_params->mi_ver = 0;
+			}
+		} else {
+			/* mi_ver is already set from GET_SLI4_PARAMETERS */
+			lpfc_printf_log(phba, KERN_INFO,
+					LOG_CGN_MGMT | LOG_INIT,
+					"6245 Enable MI Mailbox x%x (x%x/x%x) "
+					"failed, rc:x%x mi:x%x\n",
+					bf_get(lpfc_mqe_command, &mboxq->u.mqe),
+					lpfc_sli_config_mbox_subsys_get
+						(phba, mboxq),
+					lpfc_sli_config_mbox_opcode_get
+						(phba, mboxq),
+					rc, sli4_params->mi_ver);
+		}
+	} else {
+		lpfc_printf_log(phba, KERN_WARNING, LOG_CGN_MGMT,
+				"6217 MI is disabled\n");
+	}
+
+	/* Ensure FDMI is enabled for MI if enable_mi is set */
+	if (sli4_params->mi_ver)
+		phba->cfg_fdmi_on = LPFC_FDMI_SUPPORT;
+
+	/* Always try to enable CMF feature if we can */
+	if (sli4_params->cmf) {
+		lpfc_set_features(phba, mboxq, LPFC_SET_ENABLE_CMF);
+		rc = lpfc_sli_issue_mbox(phba, mboxq, MBX_POLL);
+		cmf = bf_get(lpfc_mbx_set_feature_cmf,
+			     &mboxq->u.mqe.un.set_feature);
+		if (rc == MBX_SUCCESS && cmf) {
+			lpfc_printf_log(phba, KERN_WARNING, LOG_CGN_MGMT,
+					"6218 CMF is enabled: mode %d\n",
+					phba->cmf_active_mode);
+		} else {
+			lpfc_printf_log(phba, KERN_WARNING,
+					LOG_CGN_MGMT | LOG_INIT,
+					"6219 Enable CMF Mailbox x%x (x%x/x%x) "
+					"failed, rc:x%x dd:x%x\n",
+					bf_get(lpfc_mqe_command, &mboxq->u.mqe),
+					lpfc_sli_config_mbox_subsys_get
+						(phba, mboxq),
+					lpfc_sli_config_mbox_opcode_get
+						(phba, mboxq),
+					rc, cmf);
+			sli4_params->cmf = 0;
+			phba->cmf_active_mode = LPFC_CFG_OFF;
+			goto no_cmf;
+		}
+
+		/* Allocate Congestion Information Buffer */
+		if (!phba->cgn_i) {
+			mp = kmalloc(sizeof(*mp), GFP_KERNEL);
+			if (mp)
+				mp->virt = dma_alloc_coherent
+						(&phba->pcidev->dev,
+						sizeof(struct lpfc_cgn_info),
+						&mp->phys, GFP_KERNEL);
+			if (!mp || !mp->virt) {
+				lpfc_printf_log(phba, KERN_ERR, LOG_INIT,
+						"2640 Failed to alloc memory "
+						"for Congestion Info\n");
+				kfree(mp);
+				sli4_params->cmf = 0;
+				phba->cmf_active_mode = LPFC_CFG_OFF;
+				goto no_cmf;
+			}
+			phba->cgn_i = mp;
+
+			/* initialize congestion buffer info */
+			lpfc_init_congestion_buf(phba);
+			lpfc_init_congestion_stat(phba);
+		}
+
+		rc = lpfc_sli4_cgn_params_read(phba);
+		if (rc < 0) {
+			lpfc_printf_log(phba, KERN_ERR, LOG_CGN_MGMT | LOG_INIT,
+					"6242 Error reading Cgn Params (%d)\n",
+					rc);
+			/* Ensure CGN Mode is off */
+			sli4_params->cmf = 0;
+		} else if (!rc) {
+			lpfc_printf_log(phba, KERN_ERR, LOG_CGN_MGMT | LOG_INIT,
+					"6243 CGN Event empty object.\n");
+			/* Ensure CGN Mode is off */
+			sli4_params->cmf = 0;
+		}
+	} else {
+no_cmf:
+		lpfc_printf_log(phba, KERN_WARNING, LOG_CGN_MGMT,
+				"6220 CMF is disabled\n");
+	}
+
+	/* Only register congestion buffer with firmware if BOTH
+	 * CMF and E2E are enabled.
+	 */
+	if (sli4_params->cmf && sli4_params->mi_ver) {
+		rc = lpfc_reg_congestion_buf(phba);
+		if (rc) {
+			dma_free_coherent(&phba->pcidev->dev,
+					  sizeof(struct lpfc_cgn_info),
+					  phba->cgn_i->virt, phba->cgn_i->phys);
+			kfree(phba->cgn_i);
+			phba->cgn_i = NULL;
+			/* Ensure CGN Mode is off */
+			phba->cmf_active_mode = LPFC_CFG_OFF;
+			return 0;
+		}
+	}
+	lpfc_printf_log(phba, KERN_INFO, LOG_INIT,
+			"6470 Setup MI version %d CMF %d mode %d\n",
+			sli4_params->mi_ver, sli4_params->cmf,
+			phba->cmf_active_mode);
+
+	mempool_free(mboxq, phba->mbox_mem_pool);
+
+	/* Initialize atomic counters */
+	atomic_set(&phba->cgn_fabric_warn_cnt, 0);
+	atomic_set(&phba->cgn_fabric_alarm_cnt, 0);
+	atomic_set(&phba->cgn_sync_alarm_cnt, 0);
+	atomic_set(&phba->cgn_sync_warn_cnt, 0);
+	atomic_set(&phba->cgn_driver_evt_cnt, 0);
+	atomic_set(&phba->cgn_latency_evt_cnt, 0);
+	atomic64_set(&phba->cgn_latency_evt, 0);
+
+	phba->cmf_interval_rate = LPFC_CMF_INTERVAL;
+
+	/* Allocate RX Monitor Buffer */
+	if (!phba->rxtable) {
+		phba->rxtable = kmalloc_array(LPFC_MAX_RXMONITOR_ENTRY,
+					      sizeof(struct rxtable_entry),
+					      GFP_KERNEL);
+		if (!phba->rxtable) {
+			lpfc_printf_log(phba, KERN_ERR, LOG_INIT,
+					"2644 Failed to alloc memory "
+					"for RX Monitor Buffer\n");
+			return -ENOMEM;
+		}
+	}
+	atomic_set(&phba->rxtable_idx_head, 0);
+	atomic_set(&phba->rxtable_idx_tail, 0);
+	return 0;
+}
+
+static int
+lpfc_set_host_tm(struct lpfc_hba *phba)
+{
+	LPFC_MBOXQ_t *mboxq;
+	uint32_t len, rc;
+	struct timespec64 cur_time;
+	struct tm broken;
+	uint32_t month, day, year;
+	uint32_t hour, minute, second;
+	struct lpfc_mbx_set_host_date_time *tm;
+
+	mboxq = (LPFC_MBOXQ_t *)mempool_alloc(phba->mbox_mem_pool, GFP_KERNEL);
+	if (!mboxq)
+		return -ENOMEM;
+
+	len = sizeof(struct lpfc_mbx_set_host_data) -
+		sizeof(struct lpfc_sli4_cfg_mhdr);
+	lpfc_sli4_config(phba, mboxq, LPFC_MBOX_SUBSYSTEM_COMMON,
+			 LPFC_MBOX_OPCODE_SET_HOST_DATA, len,
+			 LPFC_SLI4_MBX_EMBED);
+
+	mboxq->u.mqe.un.set_host_data.param_id = LPFC_SET_HOST_DATE_TIME;
+	mboxq->u.mqe.un.set_host_data.param_len =
+			sizeof(struct lpfc_mbx_set_host_date_time);
+	tm = &mboxq->u.mqe.un.set_host_data.un.tm;
+	ktime_get_real_ts64(&cur_time);
+	time64_to_tm(cur_time.tv_sec, 0, &broken);
+	month = broken.tm_mon + 1;
+	day = broken.tm_mday;
+	year = broken.tm_year - 100;
+	hour = broken.tm_hour;
+	minute = broken.tm_min;
+	second = broken.tm_sec;
+	bf_set(lpfc_mbx_set_host_month, tm, month);
+	bf_set(lpfc_mbx_set_host_day, tm, day);
+	bf_set(lpfc_mbx_set_host_year, tm, year);
+	bf_set(lpfc_mbx_set_host_hour, tm, hour);
+	bf_set(lpfc_mbx_set_host_min, tm, minute);
+	bf_set(lpfc_mbx_set_host_sec, tm, second);
+
+	rc = lpfc_sli_issue_mbox(phba, mboxq, MBX_POLL);
+	mempool_free(mboxq, phba->mbox_mem_pool);
+	return rc;
+}
+
+/**
  * lpfc_sli4_hba_setup - SLI4 device initialization PCI function
  * @phba: Pointer to HBA context object.
  *
@@ -7584,6 +8220,10 @@ lpfc_sli4_hba_setup(struct lpfc_hba *phba)
 		goto out_free_mbox;
 	}
 
+	rc = lpfc_set_host_tm(phba);
+	lpfc_printf_log(phba, KERN_ERR, LOG_MBOX | LOG_INIT,
+			"6468 Set host date / time: Status x%x:\n", rc);
+
 	/*
 	 * Continue initialization with default values even if driver failed
 	 * to read FCoE param config regions, only read parameters if the
@@ -8111,6 +8751,9 @@ lpfc_sli4_hba_setup(struct lpfc_hba *phba)
 	/* Indicate device interrupt mode */
 	phba->sli4_hba.intr_enable = 1;
 
+	/* Setup CMF after HBA is initialized */
+	lpfc_cmf_setup(phba);
+
 	if (!(phba->hba_flag & HBA_FCOE_MODE) &&
 	    (phba->hba_flag & LINK_DISABLED)) {
 		lpfc_printf_log(phba, KERN_ERR, LOG_TRACE_EVENT,
@@ -8132,7 +8775,10 @@ lpfc_sli4_hba_setup(struct lpfc_hba *phba)
 		}
 	}
 	mempool_free(mboxq, phba->mbox_mem_pool);
+
+	phba->hba_flag |= HBA_SETUP;
 	return rc;
+
 out_io_buff_free:
 	/* Free allocated IO Buffers */
 	lpfc_io_free(phba);
@@ -8790,8 +9436,11 @@ static int
 lpfc_sli4_async_mbox_block(struct lpfc_hba *phba)
 {
 	struct lpfc_sli *psli = &phba->sli;
+	LPFC_MBOXQ_t *mboxq;
 	int rc = 0;
 	unsigned long timeout = 0;
+	u32 sli_flag;
+	u8 cmd, subsys, opcode;
 
 	/* Mark the asynchronous mailbox command posting as blocked */
 	spin_lock_irq(&phba->hbalock);
@@ -8809,12 +9458,37 @@ lpfc_sli4_async_mbox_block(struct lpfc_hba *phba)
 	if (timeout)
 		lpfc_sli4_process_missed_mbox_completions(phba);
 
-	/* Wait for the outstnading mailbox command to complete */
+	/* Wait for the outstanding mailbox command to complete */
 	while (phba->sli.mbox_active) {
 		/* Check active mailbox complete status every 2ms */
 		msleep(2);
 		if (time_after(jiffies, timeout)) {
-			/* Timeout, marked the outstanding cmd not complete */
+			/* Timeout, mark the outstanding cmd not complete */
+
+			/* Sanity check sli.mbox_active has not completed or
+			 * cancelled from another context during last 2ms sleep,
+			 * so take hbalock to be sure before logging.
+			 */
+			spin_lock_irq(&phba->hbalock);
+			if (phba->sli.mbox_active) {
+				mboxq = phba->sli.mbox_active;
+				cmd = mboxq->u.mb.mbxCommand;
+				subsys = lpfc_sli_config_mbox_subsys_get(phba,
+									 mboxq);
+				opcode = lpfc_sli_config_mbox_opcode_get(phba,
+									 mboxq);
+				sli_flag = psli->sli_flag;
+				spin_unlock_irq(&phba->hbalock);
+				lpfc_printf_log(phba, KERN_ERR, LOG_TRACE_EVENT,
+						"2352 Mailbox command x%x "
+						"(x%x/x%x) sli_flag x%x could "
+						"not complete\n",
+						cmd, subsys, opcode,
+						sli_flag);
+			} else {
+				spin_unlock_irq(&phba->hbalock);
+			}
+
 			rc = 1;
 			break;
 		}
@@ -9763,6 +10437,7 @@ lpfc_sli4_iocb2wqe(struct lpfc_hba *phba, struct lpfc_iocbq *iocbq,
 			if (pcmd && (*pcmd == ELS_CMD_FLOGI ||
 				*pcmd == ELS_CMD_SCR ||
 				*pcmd == ELS_CMD_RDF ||
+				*pcmd == ELS_CMD_EDC ||
 				*pcmd == ELS_CMD_RSCN_XMT ||
 				*pcmd == ELS_CMD_FDISC ||
 				*pcmd == ELS_CMD_LOGO ||
@@ -10097,8 +10772,6 @@ lpfc_sli4_iocb2wqe(struct lpfc_hba *phba, struct lpfc_iocbq *iocbq,
 		bf_set(wqe_ebde_cnt, &wqe->xmit_els_rsp.wqe_com, 0);
 		bf_set(wqe_rsp_temp_rpi, &wqe->xmit_els_rsp,
 		       phba->sli4_hba.rpi_ids[ndlp->nlp_rpi]);
-		pcmd = (uint32_t *) (((struct lpfc_dmabuf *)
-					iocbq->context2)->virt);
 		if (phba->fc_topology == LPFC_TOPOLOGY_LOOP) {
 				bf_set(els_rsp64_sp, &wqe->xmit_els_rsp, 1);
 				bf_set(els_rsp64_sid, &wqe->xmit_els_rsp,
@@ -11619,6 +12292,7 @@ void
 lpfc_ignore_els_cmpl(struct lpfc_hba *phba, struct lpfc_iocbq *cmdiocb,
 		     struct lpfc_iocbq *rspiocb)
 {
+	struct lpfc_nodelist *ndlp = (struct lpfc_nodelist *) cmdiocb->context1;
 	IOCB_t *irsp = &rspiocb->iocb;
 
 	/* ELS cmd tag <ulpIoTag> completes */
@@ -11627,11 +12301,16 @@ lpfc_ignore_els_cmpl(struct lpfc_hba *phba, struct lpfc_iocbq *cmdiocb,
 			"x%x x%x x%x\n",
 			irsp->ulpIoTag, irsp->ulpStatus,
 			irsp->un.ulpWord[4], irsp->ulpTimeout);
-	lpfc_nlp_put((struct lpfc_nodelist *)cmdiocb->context1);
+	/*
+	 * Deref the ndlp after free_iocb. sli_release_iocb will access the ndlp
+	 * if exchange is busy.
+	 */
 	if (cmdiocb->iocb.ulpCommand == CMD_GEN_REQUEST64_CR)
 		lpfc_ct_free_iocb(phba, cmdiocb);
 	else
 		lpfc_els_free_iocb(phba, cmdiocb);
+
+	lpfc_nlp_put(ndlp);
 }
 
 /**
@@ -14626,8 +15305,12 @@ static void lpfc_sli4_sched_cq_work(struct lpfc_hba *phba,
 
 	switch (cq->poll_mode) {
 	case LPFC_IRQ_POLL:
-		irq_poll_sched(&cq->iop);
-		break;
+		/* CGN mgmt is mutually exclusive from softirq processing */
+		if (phba->cmf_active_mode == LPFC_CFG_OFF) {
+			irq_poll_sched(&cq->iop);
+			break;
+		}
+		fallthrough;
 	case LPFC_QUEUE_WORK:
 	default:
 		if (is_kdump_kernel())
@@ -20021,6 +20704,91 @@ out:
 }
 
 /**
+ * lpfc_log_fw_write_cmpl - logs firmware write completion status
+ * @phba: pointer to lpfc hba data structure
+ * @shdr_status: wr_object rsp's status field
+ * @shdr_add_status: wr_object rsp's add_status field
+ * @shdr_add_status_2: wr_object rsp's add_status_2 field
+ * @shdr_change_status: wr_object rsp's change_status field
+ * @shdr_csf: wr_object rsp's csf bit
+ *
+ * This routine is intended to be called after a firmware write completes.
+ * It will log next action items to be performed by the user to instantiate
+ * the newly downloaded firmware or reason for incompatibility.
+ **/
+static void
+lpfc_log_fw_write_cmpl(struct lpfc_hba *phba, u32 shdr_status,
+		       u32 shdr_add_status, u32 shdr_add_status_2,
+		       u32 shdr_change_status, u32 shdr_csf)
+{
+	lpfc_printf_log(phba, KERN_INFO, LOG_MBOX | LOG_SLI,
+			"4198 %s: flash_id x%02x, asic_rev x%02x, "
+			"status x%02x, add_status x%02x, add_status_2 x%02x, "
+			"change_status x%02x, csf %01x\n", __func__,
+			phba->sli4_hba.flash_id, phba->sli4_hba.asic_rev,
+			shdr_status, shdr_add_status, shdr_add_status_2,
+			shdr_change_status, shdr_csf);
+
+	if (shdr_add_status == LPFC_ADD_STATUS_INCOMPAT_OBJ) {
+		switch (shdr_add_status_2) {
+		case LPFC_ADD_STATUS_2_INCOMPAT_FLASH:
+			lpfc_printf_log(phba, KERN_WARNING, LOG_MBOX | LOG_SLI,
+					"4199 Firmware write failed: "
+					"image incompatible with flash x%02x\n",
+					phba->sli4_hba.flash_id);
+			break;
+		case LPFC_ADD_STATUS_2_INCORRECT_ASIC:
+			lpfc_printf_log(phba, KERN_WARNING, LOG_MBOX | LOG_SLI,
+					"4200 Firmware write failed: "
+					"image incompatible with ASIC "
+					"architecture x%02x\n",
+					phba->sli4_hba.asic_rev);
+			break;
+		default:
+			lpfc_printf_log(phba, KERN_WARNING, LOG_MBOX | LOG_SLI,
+					"4210 Firmware write failed: "
+					"add_status_2 x%02x\n",
+					shdr_add_status_2);
+			break;
+		}
+	} else if (!shdr_status && !shdr_add_status) {
+		if (shdr_change_status == LPFC_CHANGE_STATUS_FW_RESET ||
+		    shdr_change_status == LPFC_CHANGE_STATUS_PORT_MIGRATION) {
+			if (shdr_csf)
+				shdr_change_status =
+						   LPFC_CHANGE_STATUS_PCI_RESET;
+		}
+
+		switch (shdr_change_status) {
+		case (LPFC_CHANGE_STATUS_PHYS_DEV_RESET):
+			lpfc_printf_log(phba, KERN_INFO, LOG_MBOX | LOG_SLI,
+					"3198 Firmware write complete: System "
+					"reboot required to instantiate\n");
+			break;
+		case (LPFC_CHANGE_STATUS_FW_RESET):
+			lpfc_printf_log(phba, KERN_INFO, LOG_MBOX | LOG_SLI,
+					"3199 Firmware write complete: "
+					"Firmware reset required to "
+					"instantiate\n");
+			break;
+		case (LPFC_CHANGE_STATUS_PORT_MIGRATION):
+			lpfc_printf_log(phba, KERN_INFO, LOG_MBOX | LOG_SLI,
+					"3200 Firmware write complete: Port "
+					"Migration or PCI Reset required to "
+					"instantiate\n");
+			break;
+		case (LPFC_CHANGE_STATUS_PCI_RESET):
+			lpfc_printf_log(phba, KERN_INFO, LOG_MBOX | LOG_SLI,
+					"3201 Firmware write complete: PCI "
+					"Reset required to instantiate\n");
+			break;
+		default:
+			break;
+		}
+	}
+}
+
+/**
  * lpfc_wr_object - write an object to the firmware
  * @phba: HBA structure that indicates port to create a queue on.
  * @dmabuf_list: list of dmabufs to write to the port.
@@ -20046,7 +20814,8 @@ lpfc_wr_object(struct lpfc_hba *phba, struct list_head *dmabuf_list,
 	struct lpfc_mbx_wr_object *wr_object;
 	LPFC_MBOXQ_t *mbox;
 	int rc = 0, i = 0;
-	uint32_t shdr_status, shdr_add_status, shdr_change_status, shdr_csf;
+	uint32_t shdr_status, shdr_add_status, shdr_add_status_2;
+	uint32_t shdr_change_status = 0, shdr_csf = 0;
 	uint32_t mbox_tmo;
 	struct lpfc_dmabuf *dmabuf;
 	uint32_t written = 0;
@@ -20100,58 +20869,36 @@ lpfc_wr_object(struct lpfc_hba *phba, struct list_head *dmabuf_list,
 			     &wr_object->header.cfg_shdr.response);
 	shdr_add_status = bf_get(lpfc_mbox_hdr_add_status,
 				 &wr_object->header.cfg_shdr.response);
+	shdr_add_status_2 = bf_get(lpfc_mbox_hdr_add_status_2,
+				   &wr_object->header.cfg_shdr.response);
 	if (check_change_status) {
 		shdr_change_status = bf_get(lpfc_wr_object_change_status,
 					    &wr_object->u.response);
-
-		if (shdr_change_status == LPFC_CHANGE_STATUS_FW_RESET ||
-		    shdr_change_status == LPFC_CHANGE_STATUS_PORT_MIGRATION) {
-			shdr_csf = bf_get(lpfc_wr_object_csf,
-					  &wr_object->u.response);
-			if (shdr_csf)
-				shdr_change_status =
-						   LPFC_CHANGE_STATUS_PCI_RESET;
-		}
-
-		switch (shdr_change_status) {
-		case (LPFC_CHANGE_STATUS_PHYS_DEV_RESET):
-			lpfc_printf_log(phba, KERN_INFO, LOG_INIT,
-					"3198 Firmware write complete: System "
-					"reboot required to instantiate\n");
-			break;
-		case (LPFC_CHANGE_STATUS_FW_RESET):
-			lpfc_printf_log(phba, KERN_INFO, LOG_INIT,
-					"3199 Firmware write complete: Firmware"
-					" reset required to instantiate\n");
-			break;
-		case (LPFC_CHANGE_STATUS_PORT_MIGRATION):
-			lpfc_printf_log(phba, KERN_INFO, LOG_INIT,
-					"3200 Firmware write complete: Port "
-					"Migration or PCI Reset required to "
-					"instantiate\n");
-			break;
-		case (LPFC_CHANGE_STATUS_PCI_RESET):
-			lpfc_printf_log(phba, KERN_INFO, LOG_INIT,
-					"3201 Firmware write complete: PCI "
-					"Reset required to instantiate\n");
-			break;
-		default:
-			break;
-		}
+		shdr_csf = bf_get(lpfc_wr_object_csf,
+				  &wr_object->u.response);
 	}
+
 	if (!phba->sli4_hba.intr_enable)
 		mempool_free(mbox, phba->mbox_mem_pool);
 	else if (rc != MBX_TIMEOUT)
 		mempool_free(mbox, phba->mbox_mem_pool);
-	if (shdr_status || shdr_add_status || rc) {
+	if (shdr_status || shdr_add_status || shdr_add_status_2 || rc) {
 		lpfc_printf_log(phba, KERN_ERR, LOG_TRACE_EVENT,
 				"3025 Write Object mailbox failed with "
-				"status x%x add_status x%x, mbx status x%x\n",
-				shdr_status, shdr_add_status, rc);
+				"status x%x add_status x%x, add_status_2 x%x, "
+				"mbx status x%x\n",
+				shdr_status, shdr_add_status, shdr_add_status_2,
+				rc);
 		rc = -ENXIO;
 		*offset = shdr_add_status;
-	} else
+	} else {
 		*offset += wr_object->u.response.actual_write_length;
+	}
+
+	if (rc || check_change_status)
+		lpfc_log_fw_write_cmpl(phba, shdr_status, shdr_add_status,
+				       shdr_add_status_2, shdr_change_status,
+				       shdr_csf);
 	return rc;
 }
 
@@ -20543,8 +21290,7 @@ lpfc_sli4_issue_wqe(struct lpfc_hba *phba, struct lpfc_sli4_hdw_queue *qp,
 	}
 
 	/* NVME_FCREQ and NVME_ABTS requests */
-	if (pwqe->iocb_flag & LPFC_IO_NVME ||
-	    pwqe->iocb_flag & LPFC_IO_FCP) {
+	if (pwqe->iocb_flag & (LPFC_IO_NVME | LPFC_IO_FCP | LPFC_IO_CMF)) {
 		/* Get the IO distribution (hba_wqidx) for WQ assignment. */
 		wq = qp->io_wq;
 		pring = wq->pring;
@@ -21323,6 +22069,116 @@ struct lpfc_io_buf *lpfc_get_io_buf(struct lpfc_hba *phba,
 }
 
 /**
+ * lpfc_read_object - Retrieve object data from HBA
+ * @phba: The HBA for which this call is being executed.
+ * @rdobject: Pathname of object data we want to read.
+ * @datap: Pointer to where data will be copied to.
+ * @datasz: size of data area
+ *
+ * This routine is limited to object sizes of LPFC_BPL_SIZE (1024) or less.
+ * The data will be truncated if datasz is not large enough.
+ * Version 1 is not supported with Embedded mbox cmd, so we must use version 0.
+ * Returns the actual bytes read from the object.
+ */
+int
+lpfc_read_object(struct lpfc_hba *phba, char *rdobject, uint32_t *datap,
+		 uint32_t datasz)
+{
+	struct lpfc_mbx_read_object *read_object;
+	LPFC_MBOXQ_t *mbox;
+	int rc, length, eof, j, byte_cnt = 0;
+	uint32_t shdr_status, shdr_add_status;
+	union lpfc_sli4_cfg_shdr *shdr;
+	struct lpfc_dmabuf *pcmd;
+
+	/* sanity check on queue memory */
+	if (!datap)
+		return -ENODEV;
+
+	mbox = mempool_alloc(phba->mbox_mem_pool, GFP_KERNEL);
+	if (!mbox)
+		return -ENOMEM;
+	length = (sizeof(struct lpfc_mbx_read_object) -
+		  sizeof(struct lpfc_sli4_cfg_mhdr));
+	lpfc_sli4_config(phba, mbox, LPFC_MBOX_SUBSYSTEM_COMMON,
+			 LPFC_MBOX_OPCODE_READ_OBJECT,
+			 length, LPFC_SLI4_MBX_EMBED);
+	read_object = &mbox->u.mqe.un.read_object;
+	shdr = (union lpfc_sli4_cfg_shdr *)&read_object->header.cfg_shdr;
+
+	bf_set(lpfc_mbox_hdr_version, &shdr->request, LPFC_Q_CREATE_VERSION_0);
+	bf_set(lpfc_mbx_rd_object_rlen, &read_object->u.request, datasz);
+	read_object->u.request.rd_object_offset = 0;
+	read_object->u.request.rd_object_cnt = 1;
+
+	memset((void *)read_object->u.request.rd_object_name, 0,
+	       LPFC_OBJ_NAME_SZ);
+	sprintf((uint8_t *)read_object->u.request.rd_object_name, rdobject);
+	for (j = 0; j < strlen(rdobject); j++)
+		read_object->u.request.rd_object_name[j] =
+			cpu_to_le32(read_object->u.request.rd_object_name[j]);
+
+	pcmd = kmalloc(sizeof(*pcmd), GFP_KERNEL);
+	if (pcmd)
+		pcmd->virt = lpfc_mbuf_alloc(phba, MEM_PRI, &pcmd->phys);
+	if (!pcmd || !pcmd->virt) {
+		kfree(pcmd);
+		mempool_free(mbox, phba->mbox_mem_pool);
+		return -ENOMEM;
+	}
+	memset((void *)pcmd->virt, 0, LPFC_BPL_SIZE);
+	read_object->u.request.rd_object_hbuf[0].pa_lo =
+		putPaddrLow(pcmd->phys);
+	read_object->u.request.rd_object_hbuf[0].pa_hi =
+		putPaddrHigh(pcmd->phys);
+	read_object->u.request.rd_object_hbuf[0].length = LPFC_BPL_SIZE;
+
+	mbox->vport = phba->pport;
+	mbox->mbox_cmpl = lpfc_sli_def_mbox_cmpl;
+	mbox->ctx_buf = NULL;
+	mbox->ctx_ndlp = NULL;
+
+	rc = lpfc_sli_issue_mbox(phba, mbox, MBX_POLL);
+	shdr_status = bf_get(lpfc_mbox_hdr_status, &shdr->response);
+	shdr_add_status = bf_get(lpfc_mbox_hdr_add_status, &shdr->response);
+
+	if (shdr_status == STATUS_FAILED &&
+	    shdr_add_status == ADD_STATUS_INVALID_OBJECT_NAME) {
+		lpfc_printf_log(phba, KERN_ERR, LOG_INIT | LOG_CGN_MGMT,
+				"4674 No port cfg file in FW.\n");
+		byte_cnt = -ENOENT;
+	} else if (shdr_status || shdr_add_status || rc) {
+		lpfc_printf_log(phba, KERN_ERR, LOG_INIT | LOG_CGN_MGMT,
+				"2625 READ_OBJECT mailbox failed with "
+				"status x%x add_status x%x, mbx status x%x\n",
+				shdr_status, shdr_add_status, rc);
+		byte_cnt = -ENXIO;
+	} else {
+		/* Success */
+		length = read_object->u.response.rd_object_actual_rlen;
+		eof = bf_get(lpfc_mbx_rd_object_eof, &read_object->u.response);
+		lpfc_printf_log(phba, KERN_INFO, LOG_INIT | LOG_CGN_MGMT,
+				"2626 READ_OBJECT Success len %d:%d, EOF %d\n",
+				length, datasz, eof);
+
+		/* Detect the port config file exists but is empty */
+		if (!length && eof) {
+			byte_cnt = 0;
+			goto exit;
+		}
+
+		byte_cnt = length;
+		lpfc_sli_pcimem_bcopy(pcmd->virt, datap, byte_cnt);
+	}
+
+ exit:
+	lpfc_mbuf_free(phba, pcmd->virt, pcmd->phys);
+	kfree(pcmd);
+	mempool_free(mbox, phba->mbox_mem_pool);
+	return byte_cnt;
+}
+
+/**
  * lpfc_get_sgl_per_hdwq - Get one SGL chunk from hdwq's pool
  * @phba: The HBA for which this call is being executed.
  * @lpfc_buf: IO buf structure to append the SGL chunk
diff --git a/drivers/scsi/lpfc/lpfc_sli.h b/drivers/scsi/lpfc/lpfc_sli.h
index dde8eb9d796d..5161ccacea3e 100644
--- a/drivers/scsi/lpfc/lpfc_sli.h
+++ b/drivers/scsi/lpfc/lpfc_sli.h
@@ -107,6 +107,7 @@ struct lpfc_iocbq {
 #define LPFC_IO_NVME_LS		0x400000 /* NVME LS command */
 #define LPFC_IO_NVMET		0x800000 /* NVMET command */
 #define LPFC_IO_VMID            0x1000000 /* VMID tagged IO */
+#define LPFC_IO_CMF		0x4000000 /* CMF command */
 
 	uint32_t drvrTimeout;	/* driver timeout in seconds */
 	struct lpfc_vport *vport;/* virtual port pointer */
@@ -462,4 +463,5 @@ struct lpfc_io_buf {
 	uint64_t ts_isr_cmpl;
 	uint64_t ts_data_io;
 #endif
+	uint64_t rx_cmd_start;
 };
diff --git a/drivers/scsi/lpfc/lpfc_sli4.h b/drivers/scsi/lpfc/lpfc_sli4.h
index 26f19c95380f..99c5d1e4da5e 100644
--- a/drivers/scsi/lpfc/lpfc_sli4.h
+++ b/drivers/scsi/lpfc/lpfc_sli4.h
@@ -1,7 +1,7 @@
 /*******************************************************************
  * This file is part of the Emulex Linux Device Driver for         *
  * Fibre Channel Host Bus Adapters.                                *
- * Copyright (C) 2017-2019 Broadcom. All Rights Reserved. The term *
+ * Copyright (C) 2017-2021 Broadcom. All Rights Reserved. The term *
  * “Broadcom” refers to Broadcom Inc. and/or its subsidiaries.     *
  * Copyright (C) 2009-2016 Emulex.  All rights reserved.           *
  * EMULEX and SLI are trademarks of Emulex.                        *
@@ -557,6 +557,7 @@ struct lpfc_pc_sli4_params {
 	uint16_t mi_value;
 #define LPFC_DFLT_MIB_VAL	2
 	uint8_t mib_bde_cnt;
+	uint8_t cmf;
 	uint8_t cqv;
 	uint8_t mqv;
 	uint8_t wqv;
@@ -978,6 +979,8 @@ struct lpfc_sli4_hba {
 #define lpfc_conf_trunk_port3_nd_WORD	conf_trunk
 #define lpfc_conf_trunk_port3_nd_SHIFT	7
 #define lpfc_conf_trunk_port3_nd_MASK	0x1
+	uint8_t flash_id;
+	uint8_t asic_rev;
 };
 
 enum lpfc_sge_type {
diff --git a/drivers/scsi/lpfc/lpfc_version.h b/drivers/scsi/lpfc/lpfc_version.h
index 2d62fd2a9824..a7aba7833425 100644
--- a/drivers/scsi/lpfc/lpfc_version.h
+++ b/drivers/scsi/lpfc/lpfc_version.h
@@ -20,7 +20,7 @@
  * included with this package.                                     *
  *******************************************************************/
 
-#define LPFC_DRIVER_VERSION "12.8.0.10"
+#define LPFC_DRIVER_VERSION "14.0.0.1"
 #define LPFC_DRIVER_NAME		"lpfc"
 
 /* Used for SLI 2/3 */
diff --git a/drivers/scsi/megaraid/megaraid_sas_base.c b/drivers/scsi/megaraid/megaraid_sas_base.c
index ec10b2497310..e4298bf4a482 100644
--- a/drivers/scsi/megaraid/megaraid_sas_base.c
+++ b/drivers/scsi/megaraid/megaraid_sas_base.c
@@ -1451,10 +1451,10 @@ megasas_build_dcdb(struct megasas_instance *instance, struct scsi_cmnd *scp,
 	 * pthru timeout to the os layer timeout value.
 	 */
 	if (scp->device->type == TYPE_TAPE) {
-		if ((scp->request->timeout / HZ) > 0xFFFF)
+		if (scsi_cmd_to_rq(scp)->timeout / HZ > 0xFFFF)
 			pthru->timeout = cpu_to_le16(0xFFFF);
 		else
-			pthru->timeout = cpu_to_le16(scp->request->timeout / HZ);
+			pthru->timeout = cpu_to_le16(scsi_cmd_to_rq(scp)->timeout / HZ);
 	}
 
 	/*
diff --git a/drivers/scsi/megaraid/megaraid_sas_fusion.c b/drivers/scsi/megaraid/megaraid_sas_fusion.c
index 06399c026a8d..26d0cf9353dd 100644
--- a/drivers/scsi/megaraid/megaraid_sas_fusion.c
+++ b/drivers/scsi/megaraid/megaraid_sas_fusion.c
@@ -402,7 +402,7 @@ megasas_get_msix_index(struct megasas_instance *instance,
 			(mega_mod64(atomic64_add_return(1, &instance->total_io_count),
 				instance->msix_vectors));
 	} else if (instance->host->nr_hw_queues > 1) {
-		u32 tag = blk_mq_unique_tag(scmd->request);
+		u32 tag = blk_mq_unique_tag(scsi_cmd_to_rq(scmd));
 
 		cmd->request_desc->SCSIIO.MSIxIndex = blk_mq_unique_tag_to_hwq(tag) +
 			instance->low_latency_index_start;
@@ -3023,7 +3023,7 @@ static void megasas_build_ld_nonrw_fusion(struct megasas_instance *instance,
 		io_request->DevHandle = cpu_to_le16(device_id);
 		io_request->LUN[1] = scmd->device->lun;
 		pRAID_Context->timeout_value =
-			cpu_to_le16 (scmd->request->timeout / HZ);
+			cpu_to_le16(scsi_cmd_to_rq(scmd)->timeout / HZ);
 		cmd->request_desc->SCSIIO.RequestFlags =
 			(MPI2_REQ_DESCRIPT_FLAGS_SCSI_IO <<
 			MEGASAS_REQ_DESCRIPT_FLAGS_TYPE_SHIFT);
@@ -3086,7 +3086,7 @@ megasas_build_syspd_fusion(struct megasas_instance *instance,
 
 	device_id = MEGASAS_DEV_INDEX(scmd);
 	pd_index = MEGASAS_PD_INDEX(scmd);
-	os_timeout_value = scmd->request->timeout / HZ;
+	os_timeout_value = scsi_cmd_to_rq(scmd)->timeout / HZ;
 	mr_device_priv_data = scmd->device->hostdata;
 	cmd->pd_interface = mr_device_priv_data->interface_type;
 
@@ -3381,7 +3381,7 @@ megasas_build_and_issue_cmd_fusion(struct megasas_instance *instance,
 		return SCSI_MLQUEUE_HOST_BUSY;
 	}
 
-	cmd = megasas_get_cmd_fusion(instance, scmd->request->tag);
+	cmd = megasas_get_cmd_fusion(instance, scsi_cmd_to_rq(scmd)->tag);
 
 	if (!cmd) {
 		atomic_dec(&instance->fw_outstanding);
@@ -3422,7 +3422,7 @@ megasas_build_and_issue_cmd_fusion(struct megasas_instance *instance,
 	 */
 	if (cmd->r1_alt_dev_handle != MR_DEVHANDLE_INVALID) {
 		r1_cmd = megasas_get_cmd_fusion(instance,
-				(scmd->request->tag + instance->max_fw_cmds));
+				scsi_cmd_to_rq(scmd)->tag + instance->max_fw_cmds);
 		megasas_prepare_secondRaid1_IO(instance, cmd, r1_cmd);
 	}
 
diff --git a/drivers/scsi/mpi3mr/mpi3mr.h b/drivers/scsi/mpi3mr/mpi3mr.h
index 6f5dc9e78553..9787b53a2b59 100644
--- a/drivers/scsi/mpi3mr/mpi3mr.h
+++ b/drivers/scsi/mpi3mr/mpi3mr.h
@@ -183,6 +183,20 @@ enum mpi3mr_iocstate {
 	MRIOC_STATE_UNRECOVERABLE,
 };
 
+/* Init type definitions */
+enum mpi3mr_init_type {
+	MPI3MR_IT_INIT = 0,
+	MPI3MR_IT_RESET,
+	MPI3MR_IT_RESUME,
+};
+
+/* Cleanup reason definitions */
+enum mpi3mr_cleanup_reason {
+	MPI3MR_COMPLETE_CLEANUP = 0,
+	MPI3MR_REINIT_FAILURE,
+	MPI3MR_SUSPEND,
+};
+
 /* Reset reason code definitions*/
 enum mpi3mr_reset_reason {
 	MPI3MR_RESET_FROM_BRINGUP = 1,
@@ -855,8 +869,8 @@ struct delayed_dev_rmhs_node {
 
 int mpi3mr_setup_resources(struct mpi3mr_ioc *mrioc);
 void mpi3mr_cleanup_resources(struct mpi3mr_ioc *mrioc);
-int mpi3mr_init_ioc(struct mpi3mr_ioc *mrioc, u8 re_init);
-void mpi3mr_cleanup_ioc(struct mpi3mr_ioc *mrioc, u8 re_init);
+int mpi3mr_init_ioc(struct mpi3mr_ioc *mrioc, u8 init_type);
+void mpi3mr_cleanup_ioc(struct mpi3mr_ioc *mrioc, u8 reason);
 int mpi3mr_issue_port_enable(struct mpi3mr_ioc *mrioc, u8 async);
 int mpi3mr_admin_request_post(struct mpi3mr_ioc *mrioc, void *admin_req,
 u16 admin_req_sz, u8 ignore_reset);
@@ -872,6 +886,7 @@ void *mpi3mr_get_reply_virt_addr(struct mpi3mr_ioc *mrioc,
 void mpi3mr_repost_sense_buf(struct mpi3mr_ioc *mrioc,
 				     u64 sense_buf_dma);
 
+void mpi3mr_memset_buffers(struct mpi3mr_ioc *mrioc);
 void mpi3mr_os_handle_events(struct mpi3mr_ioc *mrioc,
 			     struct mpi3_event_notification_reply *event_reply);
 void mpi3mr_process_op_reply_desc(struct mpi3mr_ioc *mrioc,
diff --git a/drivers/scsi/mpi3mr/mpi3mr_fw.c b/drivers/scsi/mpi3mr/mpi3mr_fw.c
index 2dba2b0af166..4a8316c6bd41 100644
--- a/drivers/scsi/mpi3mr/mpi3mr_fw.c
+++ b/drivers/scsi/mpi3mr/mpi3mr_fw.c
@@ -3205,7 +3205,7 @@ out_failed:
 /**
  * mpi3mr_init_ioc - Initialize the controller
  * @mrioc: Adapter instance reference
- * @re_init: Flag to indicate is this fresh init or re-init
+ * @init_type: Flag to indicate is the init_type
  *
  * This the controller initialization routine, executed either
  * after soft reset or from pci probe callback.
@@ -3218,7 +3218,7 @@ out_failed:
  *
  * Return: 0 on success and non-zero on failure.
  */
-int mpi3mr_init_ioc(struct mpi3mr_ioc *mrioc, u8 re_init)
+int mpi3mr_init_ioc(struct mpi3mr_ioc *mrioc, u8 init_type)
 {
 	int retval = 0;
 	enum mpi3mr_iocstate ioc_state;
@@ -3229,7 +3229,7 @@ int mpi3mr_init_ioc(struct mpi3mr_ioc *mrioc, u8 re_init)
 
 	mrioc->irqpoll_sleep = MPI3MR_IRQ_POLL_SLEEP;
 	mrioc->change_count = 0;
-	if (!re_init) {
+	if (init_type == MPI3MR_IT_INIT) {
 		mrioc->cpu_count = num_online_cpus();
 		retval = mpi3mr_setup_resources(mrioc);
 		if (retval) {
@@ -3314,7 +3314,7 @@ int mpi3mr_init_ioc(struct mpi3mr_ioc *mrioc, u8 re_init)
 		goto out_failed;
 	}
 
-	if (!re_init) {
+	if (init_type != MPI3MR_IT_RESET) {
 		retval = mpi3mr_setup_isr(mrioc, 1);
 		if (retval) {
 			ioc_err(mrioc, "Failed to setup ISR error %d\n",
@@ -3332,7 +3332,7 @@ int mpi3mr_init_ioc(struct mpi3mr_ioc *mrioc, u8 re_init)
 	}
 
 	mpi3mr_process_factsdata(mrioc, &facts_data);
-	if (!re_init) {
+	if (init_type == MPI3MR_IT_INIT) {
 		retval = mpi3mr_check_reset_dma_mask(mrioc);
 		if (retval) {
 			ioc_err(mrioc, "Resetting dma mask failed %d\n",
@@ -3351,7 +3351,7 @@ int mpi3mr_init_ioc(struct mpi3mr_ioc *mrioc, u8 re_init)
 		goto out_failed;
 	}
 
-	if (!re_init) {
+	if (init_type == MPI3MR_IT_INIT) {
 		retval = mpi3mr_alloc_chain_bufs(mrioc);
 		if (retval) {
 			ioc_err(mrioc, "Failed to allocated chain buffers %d\n",
@@ -3374,7 +3374,7 @@ int mpi3mr_init_ioc(struct mpi3mr_ioc *mrioc, u8 re_init)
 	writel(mrioc->sbq_host_index,
 	    &mrioc->sysif_regs->sense_buffer_free_host_index);
 
-	if (!re_init)  {
+	if (init_type != MPI3MR_IT_RESET) {
 		retval = mpi3mr_setup_isr(mrioc, 0);
 		if (retval) {
 			ioc_err(mrioc, "Failed to re-setup ISR, error %d\n",
@@ -3390,7 +3390,7 @@ int mpi3mr_init_ioc(struct mpi3mr_ioc *mrioc, u8 re_init)
 		goto out_failed;
 	}
 
-	if (re_init &&
+	if ((init_type != MPI3MR_IT_INIT) &&
 	    (mrioc->shost->nr_hw_queues > mrioc->num_op_reply_q)) {
 		retval = -1;
 		ioc_err(mrioc,
@@ -3422,7 +3422,7 @@ int mpi3mr_init_ioc(struct mpi3mr_ioc *mrioc, u8 re_init)
 		goto out_failed;
 	}
 
-	if (re_init) {
+	if (init_type != MPI3MR_IT_INIT) {
 		ioc_info(mrioc, "Issuing Port Enable\n");
 		retval = mpi3mr_issue_port_enable(mrioc, 0);
 		if (retval) {
@@ -3434,7 +3434,10 @@ int mpi3mr_init_ioc(struct mpi3mr_ioc *mrioc, u8 re_init)
 	return retval;
 
 out_failed:
-	mpi3mr_cleanup_ioc(mrioc, re_init);
+	if (init_type == MPI3MR_IT_INIT)
+		mpi3mr_cleanup_ioc(mrioc, MPI3MR_COMPLETE_CLEANUP);
+	else
+		mpi3mr_cleanup_ioc(mrioc, MPI3MR_REINIT_FAILURE);
 out_nocleanup:
 	return retval;
 }
@@ -3495,7 +3498,7 @@ static void mpi3mr_memset_op_req_q_buffers(struct mpi3mr_ioc *mrioc, u16 qidx)
  *
  * Return: Nothing.
  */
-static void mpi3mr_memset_buffers(struct mpi3mr_ioc *mrioc)
+void mpi3mr_memset_buffers(struct mpi3mr_ioc *mrioc)
 {
 	u16 i;
 
@@ -3710,7 +3713,7 @@ static void mpi3mr_issue_ioc_shutdown(struct mpi3mr_ioc *mrioc)
 /**
  * mpi3mr_cleanup_ioc - Cleanup controller
  * @mrioc: Adapter instance reference
- * @re_init: Cleanup due to a reinit or not
+ * @reason: Cleanup reason
  *
  * controller cleanup handler, Message unit reset or soft reset
  * and shutdown notification is issued to the controller and the
@@ -3718,11 +3721,11 @@ static void mpi3mr_issue_ioc_shutdown(struct mpi3mr_ioc *mrioc)
  *
  * Return: Nothing.
  */
-void mpi3mr_cleanup_ioc(struct mpi3mr_ioc *mrioc, u8 re_init)
+void mpi3mr_cleanup_ioc(struct mpi3mr_ioc *mrioc, u8 reason)
 {
 	enum mpi3mr_iocstate ioc_state;
 
-	if (!re_init)
+	if (reason == MPI3MR_COMPLETE_CLEANUP)
 		mpi3mr_stop_watchdog(mrioc);
 
 	mpi3mr_ioc_disable_intr(mrioc);
@@ -3737,11 +3740,11 @@ void mpi3mr_cleanup_ioc(struct mpi3mr_ioc *mrioc, u8 re_init)
 			    MPI3_SYSIF_HOST_DIAG_RESET_ACTION_SOFT_RESET,
 			    MPI3MR_RESET_FROM_MUR_FAILURE);
 
-		if (!re_init)
+		if (reason != MPI3MR_REINIT_FAILURE)
 			mpi3mr_issue_ioc_shutdown(mrioc);
 	}
 
-	if (!re_init) {
+	if (reason == MPI3MR_COMPLETE_CLEANUP) {
 		mpi3mr_free_mem(mrioc);
 		mpi3mr_cleanup_resources(mrioc);
 	}
@@ -3923,7 +3926,7 @@ int mpi3mr_soft_reset_handler(struct mpi3mr_ioc *mrioc,
 	mpi3mr_flush_host_io(mrioc);
 	mpi3mr_invalidate_devhandles(mrioc);
 	mpi3mr_memset_buffers(mrioc);
-	retval = mpi3mr_init_ioc(mrioc, 1);
+	retval = mpi3mr_init_ioc(mrioc, MPI3MR_IT_RESET);
 	if (retval) {
 		pr_err(IOCNAME "reinit after soft reset failed: reason %d\n",
 		    mrioc->name, reset_reason);
diff --git a/drivers/scsi/mpi3mr/mpi3mr_os.c b/drivers/scsi/mpi3mr/mpi3mr_os.c
index 24ac7ddec749..2197988333fe 100644
--- a/drivers/scsi/mpi3mr/mpi3mr_os.c
+++ b/drivers/scsi/mpi3mr/mpi3mr_os.c
@@ -50,7 +50,7 @@ static u16 mpi3mr_host_tag_for_scmd(struct mpi3mr_ioc *mrioc,
 	u32 unique_tag;
 	u16 host_tag, hw_queue;
 
-	unique_tag = blk_mq_unique_tag(scmd->request);
+	unique_tag = blk_mq_unique_tag(scsi_cmd_to_rq(scmd));
 
 	hw_queue = blk_mq_unique_tag_to_hwq(unique_tag);
 	if (hw_queue >= mrioc->num_op_reply_q)
@@ -1963,7 +1963,6 @@ static void mpi3mr_setup_eedp(struct mpi3mr_ioc *mrioc,
 {
 	u16 eedp_flags = 0;
 	unsigned char prot_op = scsi_get_prot_op(scmd);
-	unsigned char prot_type = scsi_get_prot_type(scmd);
 
 	switch (prot_op) {
 	case SCSI_PROT_NORMAL:
@@ -1983,60 +1982,42 @@ static void mpi3mr_setup_eedp(struct mpi3mr_ioc *mrioc,
 		scsiio_req->msg_flags |= MPI3_SCSIIO_MSGFLAGS_METASGL_VALID;
 		break;
 	case SCSI_PROT_READ_PASS:
-		eedp_flags = MPI3_EEDPFLAGS_EEDP_OP_CHECK |
-		    MPI3_EEDPFLAGS_CHK_REF_TAG | MPI3_EEDPFLAGS_CHK_APP_TAG |
-		    MPI3_EEDPFLAGS_CHK_GUARD;
+		eedp_flags = MPI3_EEDPFLAGS_EEDP_OP_CHECK;
 		scsiio_req->msg_flags |= MPI3_SCSIIO_MSGFLAGS_METASGL_VALID;
 		break;
 	case SCSI_PROT_WRITE_PASS:
-		if (scsi_host_get_guard(scmd->device->host)
-		    & SHOST_DIX_GUARD_IP) {
-			eedp_flags = MPI3_EEDPFLAGS_EEDP_OP_CHECK_REGEN |
-			    MPI3_EEDPFLAGS_CHK_APP_TAG |
-			    MPI3_EEDPFLAGS_CHK_GUARD |
-			    MPI3_EEDPFLAGS_INCR_PRI_REF_TAG;
+		if (scmd->prot_flags & SCSI_PROT_IP_CHECKSUM) {
+			eedp_flags = MPI3_EEDPFLAGS_EEDP_OP_CHECK_REGEN;
 			scsiio_req->sgl[0].eedp.application_tag_translation_mask =
 			    0xffff;
-		} else {
-			eedp_flags = MPI3_EEDPFLAGS_EEDP_OP_CHECK |
-			    MPI3_EEDPFLAGS_CHK_REF_TAG |
-			    MPI3_EEDPFLAGS_CHK_APP_TAG |
-			    MPI3_EEDPFLAGS_CHK_GUARD;
-		}
+		} else
+			eedp_flags = MPI3_EEDPFLAGS_EEDP_OP_CHECK;
+
 		scsiio_req->msg_flags |= MPI3_SCSIIO_MSGFLAGS_METASGL_VALID;
 		break;
 	default:
 		return;
 	}
 
-	if (scsi_host_get_guard(scmd->device->host) & SHOST_DIX_GUARD_IP)
+	if (scmd->prot_flags & SCSI_PROT_GUARD_CHECK)
+		eedp_flags |= MPI3_EEDPFLAGS_CHK_GUARD;
+
+	if (scmd->prot_flags & SCSI_PROT_IP_CHECKSUM)
 		eedp_flags |= MPI3_EEDPFLAGS_HOST_GUARD_IP_CHKSUM;
 
-	switch (prot_type) {
-	case SCSI_PROT_DIF_TYPE0:
-		eedp_flags |= MPI3_EEDPFLAGS_INCR_PRI_REF_TAG;
+	if (scmd->prot_flags & SCSI_PROT_REF_CHECK) {
+		eedp_flags |= MPI3_EEDPFLAGS_CHK_REF_TAG |
+			MPI3_EEDPFLAGS_INCR_PRI_REF_TAG;
 		scsiio_req->cdb.eedp32.primary_reference_tag =
-		    cpu_to_be32(t10_pi_ref_tag(scmd->request));
-		break;
-	case SCSI_PROT_DIF_TYPE1:
-	case SCSI_PROT_DIF_TYPE2:
-		eedp_flags |= MPI3_EEDPFLAGS_INCR_PRI_REF_TAG |
-		    MPI3_EEDPFLAGS_ESC_MODE_APPTAG_DISABLE |
-		    MPI3_EEDPFLAGS_CHK_GUARD;
-		scsiio_req->cdb.eedp32.primary_reference_tag =
-		    cpu_to_be32(t10_pi_ref_tag(scmd->request));
-		break;
-	case SCSI_PROT_DIF_TYPE3:
-		eedp_flags |= MPI3_EEDPFLAGS_CHK_GUARD |
-		    MPI3_EEDPFLAGS_ESC_MODE_APPTAG_DISABLE;
-		break;
-
-	default:
-		scsiio_req->msg_flags &= ~(MPI3_SCSIIO_MSGFLAGS_METASGL_VALID);
-		return;
+			cpu_to_be32(scsi_prot_ref_tag(scmd));
 	}
 
-	switch (scmd->device->sector_size) {
+	if (scmd->prot_flags & SCSI_PROT_REF_INCREMENT)
+		eedp_flags |= MPI3_EEDPFLAGS_INCR_PRI_REF_TAG;
+
+	eedp_flags |= MPI3_EEDPFLAGS_ESC_MODE_APPTAG_DISABLE;
+
+	switch (scsi_prot_interval(scmd)) {
 	case 512:
 		scsiio_req->sgl[0].eedp.user_data_size = MPI3_EEDP_UDS_512;
 		break;
@@ -3451,7 +3432,7 @@ static int mpi3mr_qcmd(struct Scsi_Host *shost,
 	u16 dev_handle;
 	u16 host_tag;
 	u32 scsiio_flags = 0;
-	struct request *rq = scmd->request;
+	struct request *rq = scsi_cmd_to_rq(scmd);
 	int iprio_class;
 
 	sdev_priv_data = scmd->device->hostdata;
@@ -3795,7 +3776,7 @@ mpi3mr_probe(struct pci_dev *pdev, const struct pci_device_id *id)
 	}
 
 	mrioc->is_driver_loading = 1;
-	if (mpi3mr_init_ioc(mrioc, 0)) {
+	if (mpi3mr_init_ioc(mrioc, MPI3MR_IT_INIT)) {
 		ioc_err(mrioc, "failure at %s:%d/%s()!\n",
 		    __FILE__, __LINE__, __func__);
 		retval = -ENODEV;
@@ -3818,7 +3799,7 @@ mpi3mr_probe(struct pci_dev *pdev, const struct pci_device_id *id)
 	return retval;
 
 addhost_failed:
-	mpi3mr_cleanup_ioc(mrioc, 0);
+	mpi3mr_cleanup_ioc(mrioc, MPI3MR_COMPLETE_CLEANUP);
 out_iocinit_failed:
 	destroy_workqueue(mrioc->fwevt_worker_thread);
 out_fwevtthread_failed:
@@ -3870,7 +3851,7 @@ static void mpi3mr_remove(struct pci_dev *pdev)
 		mpi3mr_tgtdev_del_from_list(mrioc, tgtdev);
 		mpi3mr_tgtdev_put(tgtdev);
 	}
-	mpi3mr_cleanup_ioc(mrioc, 0);
+	mpi3mr_cleanup_ioc(mrioc, MPI3MR_COMPLETE_CLEANUP);
 
 	spin_lock(&mrioc_list_lock);
 	list_del(&mrioc->list);
@@ -3910,7 +3891,7 @@ static void mpi3mr_shutdown(struct pci_dev *pdev)
 	spin_unlock_irqrestore(&mrioc->fwevt_lock, flags);
 	if (wq)
 		destroy_workqueue(wq);
-	mpi3mr_cleanup_ioc(mrioc, 0);
+	mpi3mr_cleanup_ioc(mrioc, MPI3MR_COMPLETE_CLEANUP);
 }
 
 #ifdef CONFIG_PM
@@ -3940,7 +3921,7 @@ static int mpi3mr_suspend(struct pci_dev *pdev, pm_message_t state)
 	mpi3mr_cleanup_fwevt_list(mrioc);
 	scsi_block_requests(shost);
 	mpi3mr_stop_watchdog(mrioc);
-	mpi3mr_cleanup_ioc(mrioc, 1);
+	mpi3mr_cleanup_ioc(mrioc, MPI3MR_SUSPEND);
 
 	device_state = pci_choose_state(pdev, state);
 	ioc_info(mrioc, "pdev=0x%p, slot=%s, entering operating state [D%d]\n",
@@ -3988,7 +3969,8 @@ static int mpi3mr_resume(struct pci_dev *pdev)
 	}
 
 	mrioc->stop_drv_processing = 0;
-	mpi3mr_init_ioc(mrioc, 1);
+	mpi3mr_memset_buffers(mrioc);
+	mpi3mr_init_ioc(mrioc, MPI3MR_IT_RESUME);
 	scsi_unblock_requests(shost);
 	mpi3mr_start_watchdog(mrioc);
 
diff --git a/drivers/scsi/mpt3sas/mpt3sas_base.c b/drivers/scsi/mpt3sas/mpt3sas_base.c
index cf4a3a2c22ad..6c82435bc9cc 100644
--- a/drivers/scsi/mpt3sas/mpt3sas_base.c
+++ b/drivers/scsi/mpt3sas/mpt3sas_base.c
@@ -116,6 +116,14 @@ MODULE_PARM_DESC(perf_mode,
 	"\t\tdefault - default perf_mode is 'balanced'"
 	);
 
+static int poll_queues;
+module_param(poll_queues, int, 0444);
+MODULE_PARM_DESC(poll_queues, "Number of queues to be use for io_uring poll mode.\n\t\t"
+	"This parameter is effective only if host_tagset_enable=1. &\n\t\t"
+	"when poll_queues are enabled then &\n\t\t"
+	"perf_mode is set to latency mode. &\n\t\t"
+	);
+
 enum mpt3sas_perf_mode {
 	MPT_PERF_MODE_DEFAULT	= -1,
 	MPT_PERF_MODE_BALANCED	= 0,
@@ -709,6 +717,7 @@ _base_fault_reset_work(struct work_struct *work)
 		 * and this call is safe since dead ioc will never return any
 		 * command back from HW.
 		 */
+		mpt3sas_base_pause_mq_polling(ioc);
 		ioc->schedule_dead_ioc_flush_running_cmds(ioc);
 		/*
 		 * Set remove_host flag early since kernel thread will
@@ -744,6 +753,7 @@ _base_fault_reset_work(struct work_struct *work)
 			spin_unlock_irqrestore(
 			    &ioc->ioc_reset_in_progress_lock, flags);
 			mpt3sas_base_mask_interrupts(ioc);
+			mpt3sas_base_pause_mq_polling(ioc);
 			_base_clear_outstanding_commands(ioc);
 		}
 
@@ -1548,6 +1558,53 @@ _base_get_cb_idx(struct MPT3SAS_ADAPTER *ioc, u16 smid)
 }
 
 /**
+ * mpt3sas_base_pause_mq_polling - pause polling on the mq poll queues
+ *				when driver is flushing out the IOs.
+ * @ioc: per adapter object
+ *
+ * Pause polling on the mq poll (io uring) queues when driver is flushing
+ * out the IOs. Otherwise we may see the race condition of completing the same
+ * IO from two paths.
+ *
+ * Returns nothing.
+ */
+void
+mpt3sas_base_pause_mq_polling(struct MPT3SAS_ADAPTER *ioc)
+{
+	int iopoll_q_count =
+	    ioc->reply_queue_count - ioc->iopoll_q_start_index;
+	int qid;
+
+	for (qid = 0; qid < iopoll_q_count; qid++)
+		atomic_set(&ioc->io_uring_poll_queues[qid].pause, 1);
+
+	/*
+	 * wait for current poll to complete.
+	 */
+	for (qid = 0; qid < iopoll_q_count; qid++) {
+		while (atomic_read(&ioc->io_uring_poll_queues[qid].busy))
+			udelay(500);
+	}
+}
+
+/**
+ * mpt3sas_base_resume_mq_polling - Resume polling on mq poll queues.
+ * @ioc: per adapter object
+ *
+ * Returns nothing.
+ */
+void
+mpt3sas_base_resume_mq_polling(struct MPT3SAS_ADAPTER *ioc)
+{
+	int iopoll_q_count =
+	    ioc->reply_queue_count - ioc->iopoll_q_start_index;
+	int qid;
+
+	for (qid = 0; qid < iopoll_q_count; qid++)
+		atomic_set(&ioc->io_uring_poll_queues[qid].pause, 0);
+}
+
+/**
  * mpt3sas_base_mask_interrupts - disable interrupts
  * @ioc: per adapter object
  *
@@ -1722,7 +1779,8 @@ _base_process_reply_queue(struct adapter_reply_queue *reply_q)
 						 MPI2_RPHI_MSIX_INDEX_SHIFT),
 						&ioc->chip->ReplyPostHostIndex);
 			}
-			if (!reply_q->irq_poll_scheduled) {
+			if (!reply_q->is_iouring_poll_q &&
+			    !reply_q->irq_poll_scheduled) {
 				reply_q->irq_poll_scheduled = true;
 				irq_poll_sched(&reply_q->irqpoll);
 			}
@@ -1779,6 +1837,33 @@ _base_process_reply_queue(struct adapter_reply_queue *reply_q)
 }
 
 /**
+ * mpt3sas_blk_mq_poll - poll the blk mq poll queue
+ * @shost: Scsi_Host object
+ * @queue_num: hw ctx queue number
+ *
+ * Return number of entries that has been processed from poll queue.
+ */
+int mpt3sas_blk_mq_poll(struct Scsi_Host *shost, unsigned int queue_num)
+{
+	struct MPT3SAS_ADAPTER *ioc =
+	    (struct MPT3SAS_ADAPTER *)shost->hostdata;
+	struct adapter_reply_queue *reply_q;
+	int num_entries = 0;
+	int qid = queue_num - ioc->iopoll_q_start_index;
+
+	if (atomic_read(&ioc->io_uring_poll_queues[qid].pause) ||
+	    !atomic_add_unless(&ioc->io_uring_poll_queues[qid].busy, 1, 1))
+		return 0;
+
+	reply_q = ioc->io_uring_poll_queues[qid].reply_q;
+
+	num_entries = _base_process_reply_queue(reply_q);
+	atomic_dec(&ioc->io_uring_poll_queues[qid].busy);
+
+	return num_entries;
+}
+
+/**
  * _base_interrupt - MPT adapter (IOC) specific interrupt handler.
  * @irq: irq number (not used)
  * @bus_id: bus identifier cookie == pointer to MPT_ADAPTER structure
@@ -1851,6 +1936,8 @@ _base_init_irqpolls(struct MPT3SAS_ADAPTER *ioc)
 		return;
 
 	list_for_each_entry_safe(reply_q, next, &ioc->reply_queue_list, list) {
+		if (reply_q->is_iouring_poll_q)
+			continue;
 		irq_poll_init(&reply_q->irqpoll,
 			ioc->hba_queue_depth/4, _base_irqpoll);
 		reply_q->irq_poll_scheduled = false;
@@ -1900,6 +1987,12 @@ mpt3sas_base_sync_reply_irqs(struct MPT3SAS_ADAPTER *ioc, u8 poll)
 		/* TMs are on msix_index == 0 */
 		if (reply_q->msix_index == 0)
 			continue;
+
+		if (reply_q->is_iouring_poll_q) {
+			_base_process_reply_queue(reply_q);
+			continue;
+		}
+
 		synchronize_irq(pci_irq_vector(ioc->pdev, reply_q->msix_index));
 		if (reply_q->irq_poll_scheduled) {
 			/* Calling irq_poll_disable will wait for any pending
@@ -2998,6 +3091,11 @@ mpt3sas_base_free_irq(struct MPT3SAS_ADAPTER *ioc)
 
 	list_for_each_entry_safe(reply_q, next, &ioc->reply_queue_list, list) {
 		list_del(&reply_q->list);
+		if (reply_q->is_iouring_poll_q) {
+			kfree(reply_q);
+			continue;
+		}
+
 		if (ioc->smp_affinity_enable)
 			irq_set_affinity_hint(pci_irq_vector(ioc->pdev,
 			    reply_q->msix_index), NULL);
@@ -3019,7 +3117,7 @@ _base_request_irq(struct MPT3SAS_ADAPTER *ioc, u8 index)
 {
 	struct pci_dev *pdev = ioc->pdev;
 	struct adapter_reply_queue *reply_q;
-	int r;
+	int r, qid;
 
 	reply_q =  kzalloc(sizeof(struct adapter_reply_queue), GFP_KERNEL);
 	if (!reply_q) {
@@ -3031,6 +3129,17 @@ _base_request_irq(struct MPT3SAS_ADAPTER *ioc, u8 index)
 	reply_q->msix_index = index;
 
 	atomic_set(&reply_q->busy, 0);
+
+	if (index >= ioc->iopoll_q_start_index) {
+		qid = index - ioc->iopoll_q_start_index;
+		snprintf(reply_q->name, MPT_NAME_LENGTH, "%s%d-mq-poll%d",
+		    ioc->driver_name, ioc->id, qid);
+		reply_q->is_iouring_poll_q = 1;
+		ioc->io_uring_poll_queues[qid].reply_q = reply_q;
+		goto out;
+	}
+
+
 	if (ioc->msix_enable)
 		snprintf(reply_q->name, MPT_NAME_LENGTH, "%s%d-msix%d",
 		    ioc->driver_name, ioc->id, index);
@@ -3045,7 +3154,7 @@ _base_request_irq(struct MPT3SAS_ADAPTER *ioc, u8 index)
 		kfree(reply_q);
 		return -EBUSY;
 	}
-
+out:
 	INIT_LIST_HEAD(&reply_q->list);
 	list_add_tail(&reply_q->list, &ioc->reply_queue_list);
 	return 0;
@@ -3066,6 +3175,8 @@ _base_assign_reply_queues(struct MPT3SAS_ADAPTER *ioc)
 	unsigned int cpu, nr_cpus, nr_msix, index = 0;
 	struct adapter_reply_queue *reply_q;
 	int local_numa_node;
+	int iopoll_q_count = ioc->reply_queue_count -
+	    ioc->iopoll_q_start_index;
 
 	if (!_base_is_controller_msix_enabled(ioc))
 		return;
@@ -3099,7 +3210,8 @@ _base_assign_reply_queues(struct MPT3SAS_ADAPTER *ioc)
 		list_for_each_entry(reply_q, &ioc->reply_queue_list, list) {
 			const cpumask_t *mask;
 
-			if (reply_q->msix_index < ioc->high_iops_queues)
+			if (reply_q->msix_index < ioc->high_iops_queues ||
+			    reply_q->msix_index >= ioc->iopoll_q_start_index)
 				continue;
 
 			mask = pci_irq_get_affinity(ioc->pdev,
@@ -3121,13 +3233,14 @@ _base_assign_reply_queues(struct MPT3SAS_ADAPTER *ioc)
 
 fall_back:
 	cpu = cpumask_first(cpu_online_mask);
-	nr_msix -= ioc->high_iops_queues;
+	nr_msix -= (ioc->high_iops_queues - iopoll_q_count);
 	index = 0;
 
 	list_for_each_entry(reply_q, &ioc->reply_queue_list, list) {
 		unsigned int i, group = nr_cpus / nr_msix;
 
-		if (reply_q->msix_index < ioc->high_iops_queues)
+		if (reply_q->msix_index < ioc->high_iops_queues ||
+		    reply_q->msix_index >= ioc->iopoll_q_start_index)
 			continue;
 
 		if (cpu >= nr_cpus)
@@ -3164,8 +3277,12 @@ _base_check_and_enable_high_iops_queues(struct MPT3SAS_ADAPTER *ioc,
 {
 	u16 lnksta, speed;
 
+	/*
+	 * Disable high iops queues if io uring poll queues are enabled.
+	 */
 	if (perf_mode == MPT_PERF_MODE_IOPS ||
-	    perf_mode == MPT_PERF_MODE_LATENCY) {
+	    perf_mode == MPT_PERF_MODE_LATENCY ||
+	    ioc->io_uring_poll_queues) {
 		ioc->high_iops_queues = 0;
 		return;
 	}
@@ -3202,6 +3319,7 @@ mpt3sas_base_disable_msix(struct MPT3SAS_ADAPTER *ioc)
 		return;
 	pci_free_irq_vectors(ioc->pdev);
 	ioc->msix_enable = 0;
+	kfree(ioc->io_uring_poll_queues);
 }
 
 /**
@@ -3215,18 +3333,24 @@ _base_alloc_irq_vectors(struct MPT3SAS_ADAPTER *ioc)
 	int i, irq_flags = PCI_IRQ_MSIX;
 	struct irq_affinity desc = { .pre_vectors = ioc->high_iops_queues };
 	struct irq_affinity *descp = &desc;
+	/*
+	 * Don't allocate msix vectors for poll_queues.
+	 * msix_vectors is always within a range of FW supported reply queue.
+	 */
+	int nr_msix_vectors = ioc->iopoll_q_start_index;
+
 
 	if (ioc->smp_affinity_enable)
-		irq_flags |= PCI_IRQ_AFFINITY;
+		irq_flags |= PCI_IRQ_AFFINITY | PCI_IRQ_ALL_TYPES;
 	else
 		descp = NULL;
 
-	ioc_info(ioc, " %d %d\n", ioc->high_iops_queues,
-	    ioc->reply_queue_count);
+	ioc_info(ioc, " %d %d %d\n", ioc->high_iops_queues,
+	    ioc->reply_queue_count, nr_msix_vectors);
 
 	i = pci_alloc_irq_vectors_affinity(ioc->pdev,
 	    ioc->high_iops_queues,
-	    ioc->reply_queue_count, irq_flags, descp);
+	    nr_msix_vectors, irq_flags, descp);
 
 	return i;
 }
@@ -3242,6 +3366,7 @@ _base_enable_msix(struct MPT3SAS_ADAPTER *ioc)
 	int r;
 	int i, local_max_msix_vectors;
 	u8 try_msix = 0;
+	int iopoll_q_count = 0;
 
 	ioc->msix_load_balance = false;
 
@@ -3257,22 +3382,16 @@ _base_enable_msix(struct MPT3SAS_ADAPTER *ioc)
 	ioc_info(ioc, "MSI-X vectors supported: %d\n", ioc->msix_vector_count);
 	pr_info("\t no of cores: %d, max_msix_vectors: %d\n",
 		ioc->cpu_count, max_msix_vectors);
-	if (ioc->is_aero_ioc)
-		_base_check_and_enable_high_iops_queues(ioc,
-			ioc->msix_vector_count);
+
 	ioc->reply_queue_count =
-		min_t(int, ioc->cpu_count + ioc->high_iops_queues,
-		ioc->msix_vector_count);
+		min_t(int, ioc->cpu_count, ioc->msix_vector_count);
 
 	if (!ioc->rdpq_array_enable && max_msix_vectors == -1)
 		local_max_msix_vectors = (reset_devices) ? 1 : 8;
 	else
 		local_max_msix_vectors = max_msix_vectors;
 
-	if (local_max_msix_vectors > 0)
-		ioc->reply_queue_count = min_t(int, local_max_msix_vectors,
-			ioc->reply_queue_count);
-	else if (local_max_msix_vectors == 0)
+	if (local_max_msix_vectors == 0)
 		goto try_ioapic;
 
 	/*
@@ -3293,14 +3412,77 @@ _base_enable_msix(struct MPT3SAS_ADAPTER *ioc)
 	if (ioc->msix_load_balance)
 		ioc->smp_affinity_enable = 0;
 
+	if (!ioc->smp_affinity_enable || ioc->reply_queue_count <= 1)
+		ioc->shost->host_tagset = 0;
+
+	/*
+	 * Enable io uring poll queues only if host_tagset is enabled.
+	 */
+	if (ioc->shost->host_tagset)
+		iopoll_q_count = poll_queues;
+
+	if (iopoll_q_count) {
+		ioc->io_uring_poll_queues = kcalloc(iopoll_q_count,
+		    sizeof(struct io_uring_poll_queue), GFP_KERNEL);
+		if (!ioc->io_uring_poll_queues)
+			iopoll_q_count = 0;
+	}
+
+	if (ioc->is_aero_ioc)
+		_base_check_and_enable_high_iops_queues(ioc,
+		    ioc->msix_vector_count);
+
+	/*
+	 * Add high iops queues count to reply queue count if high iops queues
+	 * are enabled.
+	 */
+	ioc->reply_queue_count = min_t(int,
+	    ioc->reply_queue_count + ioc->high_iops_queues,
+	    ioc->msix_vector_count);
+
+	/*
+	 * Adjust the reply queue count incase reply queue count
+	 * exceeds the user provided MSIx vectors count.
+	 */
+	if (local_max_msix_vectors > 0)
+		ioc->reply_queue_count = min_t(int, local_max_msix_vectors,
+		    ioc->reply_queue_count);
+	/*
+	 * Add io uring poll queues count to reply queues count
+	 * if io uring is enabled in driver.
+	 */
+	if (iopoll_q_count) {
+		if (ioc->reply_queue_count < (iopoll_q_count + MPT3_MIN_IRQS))
+			iopoll_q_count = 0;
+		ioc->reply_queue_count = min_t(int,
+		    ioc->reply_queue_count + iopoll_q_count,
+		    ioc->msix_vector_count);
+	}
+
+	/*
+	 * Starting index of io uring poll queues in reply queue list.
+	 */
+	ioc->iopoll_q_start_index =
+	    ioc->reply_queue_count - iopoll_q_count;
+
 	r = _base_alloc_irq_vectors(ioc);
 	if (r < 0) {
 		ioc_info(ioc, "pci_alloc_irq_vectors failed (r=%d) !!!\n", r);
 		goto try_ioapic;
 	}
 
+	/*
+	 * Adjust the reply queue count if the allocated
+	 * MSIx vectors is less then the requested number
+	 * of MSIx vectors.
+	 */
+	if (r < ioc->iopoll_q_start_index) {
+		ioc->reply_queue_count = r + iopoll_q_count;
+		ioc->iopoll_q_start_index =
+		    ioc->reply_queue_count - iopoll_q_count;
+	}
+
 	ioc->msix_enable = 1;
-	ioc->reply_queue_count = r;
 	for (i = 0; i < ioc->reply_queue_count; i++) {
 		r = _base_request_irq(ioc, i);
 		if (r) {
@@ -3320,6 +3502,7 @@ _base_enable_msix(struct MPT3SAS_ADAPTER *ioc)
 	ioc->high_iops_queues = 0;
 	ioc_info(ioc, "High IOPs queues : disabled\n");
 	ioc->reply_queue_count = 1;
+	ioc->iopoll_q_start_index = ioc->reply_queue_count - 0;
 	r = pci_alloc_irq_vectors(ioc->pdev, 1, 1, PCI_IRQ_LEGACY);
 	if (r < 0) {
 		dfailprintk(ioc,
@@ -3416,6 +3599,7 @@ mpt3sas_base_map_resources(struct MPT3SAS_ADAPTER *ioc)
 	u64 pio_chip = 0;
 	phys_addr_t chip_phys = 0;
 	struct adapter_reply_queue *reply_q;
+	int iopoll_q_count = 0;
 
 	dinitprintk(ioc, ioc_info(ioc, "%s\n", __func__));
 
@@ -3489,6 +3673,12 @@ mpt3sas_base_map_resources(struct MPT3SAS_ADAPTER *ioc)
 	if (r)
 		goto out_fail;
 
+	iopoll_q_count = ioc->reply_queue_count - ioc->iopoll_q_start_index;
+	for (i = 0; i < iopoll_q_count; i++) {
+		atomic_set(&ioc->io_uring_poll_queues[i].busy, 0);
+		atomic_set(&ioc->io_uring_poll_queues[i].pause, 0);
+	}
+
 	if (!ioc->is_driver_loading)
 		_base_init_irqpolls(ioc);
 	/* Use the Combined reply queue feature only for SAS3 C0 & higher
@@ -3530,11 +3720,18 @@ mpt3sas_base_map_resources(struct MPT3SAS_ADAPTER *ioc)
 			* 4)));
 	}
 
-	list_for_each_entry(reply_q, &ioc->reply_queue_list, list)
+	list_for_each_entry(reply_q, &ioc->reply_queue_list, list) {
+		if (reply_q->msix_index >= ioc->iopoll_q_start_index) {
+			pr_info("%s: enabled: index: %d\n",
+			    reply_q->name, reply_q->msix_index);
+			continue;
+		}
+
 		pr_info("%s: %s enabled: IRQ %d\n",
 			reply_q->name,
 			ioc->msix_enable ? "PCI-MSI-X" : "IO-APIC",
 			pci_irq_vector(ioc->pdev, reply_q->msix_index));
+	}
 
 	ioc_info(ioc, "iomem(%pap), mapped(0x%p), size(%d)\n",
 		 &chip_phys, ioc->chip, memap_sz);
@@ -3651,7 +3848,7 @@ _base_get_msix_index(struct MPT3SAS_ADAPTER *ioc,
 		    &ioc->total_io_cnt), ioc->reply_queue_count) : 0;
 
 	if (scmd && ioc->shost->nr_hw_queues > 1) {
-		u32 tag = blk_mq_unique_tag(scmd->request);
+		u32 tag = blk_mq_unique_tag(scsi_cmd_to_rq(scmd));
 
 		return blk_mq_unique_tag_to_hwq(tag) +
 			ioc->high_iops_queues;
@@ -3735,7 +3932,7 @@ mpt3sas_base_get_smid_scsiio(struct MPT3SAS_ADAPTER *ioc, u8 cb_idx,
 	u16 smid;
 	u32 tag, unique_tag;
 
-	unique_tag = blk_mq_unique_tag(scmd->request);
+	unique_tag = blk_mq_unique_tag(scsi_cmd_to_rq(scmd));
 	tag = blk_mq_unique_tag_to_tag(unique_tag);
 
 	/*
@@ -5169,6 +5366,73 @@ _base_update_diag_trigger_pages(struct MPT3SAS_ADAPTER *ioc)
 }
 
 /**
+ * _base_assign_fw_reported_qd	- Get FW reported QD for SAS/SATA devices.
+ *				- On failure set default QD values.
+ * @ioc : per adapter object
+ *
+ * Returns 0 for success, non-zero for failure.
+ *
+ */
+static int _base_assign_fw_reported_qd(struct MPT3SAS_ADAPTER *ioc)
+{
+	Mpi2ConfigReply_t mpi_reply;
+	Mpi2SasIOUnitPage1_t *sas_iounit_pg1 = NULL;
+	Mpi26PCIeIOUnitPage1_t pcie_iounit_pg1;
+	int sz;
+	int rc = 0;
+
+	ioc->max_wideport_qd = MPT3SAS_SAS_QUEUE_DEPTH;
+	ioc->max_narrowport_qd = MPT3SAS_SAS_QUEUE_DEPTH;
+	ioc->max_sata_qd = MPT3SAS_SATA_QUEUE_DEPTH;
+	ioc->max_nvme_qd = MPT3SAS_NVME_QUEUE_DEPTH;
+	if (!ioc->is_gen35_ioc)
+		goto out;
+	/* sas iounit page 1 */
+	sz = offsetof(Mpi2SasIOUnitPage1_t, PhyData);
+	sas_iounit_pg1 = kzalloc(sz, GFP_KERNEL);
+	if (!sas_iounit_pg1) {
+		pr_err("%s: failure at %s:%d/%s()!\n",
+		    ioc->name, __FILE__, __LINE__, __func__);
+		return rc;
+	}
+	rc = mpt3sas_config_get_sas_iounit_pg1(ioc, &mpi_reply,
+	    sas_iounit_pg1, sz);
+	if (rc) {
+		pr_err("%s: failure at %s:%d/%s()!\n",
+		    ioc->name, __FILE__, __LINE__, __func__);
+		goto out;
+	}
+	ioc->max_wideport_qd =
+	    (le16_to_cpu(sas_iounit_pg1->SASWideMaxQueueDepth)) ?
+	    le16_to_cpu(sas_iounit_pg1->SASWideMaxQueueDepth) :
+	    MPT3SAS_SAS_QUEUE_DEPTH;
+	ioc->max_narrowport_qd =
+	    (le16_to_cpu(sas_iounit_pg1->SASNarrowMaxQueueDepth)) ?
+	    le16_to_cpu(sas_iounit_pg1->SASNarrowMaxQueueDepth) :
+	    MPT3SAS_SAS_QUEUE_DEPTH;
+	ioc->max_sata_qd = (sas_iounit_pg1->SATAMaxQDepth) ?
+	    sas_iounit_pg1->SATAMaxQDepth : MPT3SAS_SATA_QUEUE_DEPTH;
+	/* pcie iounit page 1 */
+	rc = mpt3sas_config_get_pcie_iounit_pg1(ioc, &mpi_reply,
+	    &pcie_iounit_pg1, sizeof(Mpi26PCIeIOUnitPage1_t));
+	if (rc) {
+		pr_err("%s: failure at %s:%d/%s()!\n",
+		    ioc->name, __FILE__, __LINE__, __func__);
+		goto out;
+	}
+	ioc->max_nvme_qd = (le16_to_cpu(pcie_iounit_pg1.NVMeMaxQueueDepth)) ?
+	    (le16_to_cpu(pcie_iounit_pg1.NVMeMaxQueueDepth)) :
+	    MPT3SAS_NVME_QUEUE_DEPTH;
+out:
+	dinitprintk(ioc, pr_err(
+	    "MaxWidePortQD: 0x%x MaxNarrowPortQD: 0x%x MaxSataQD: 0x%x MaxNvmeQD: 0x%x\n",
+	    ioc->max_wideport_qd, ioc->max_narrowport_qd,
+	    ioc->max_sata_qd, ioc->max_nvme_qd));
+	kfree(sas_iounit_pg1);
+	return rc;
+}
+
+/**
  * _base_static_config_pages - static start of day config pages
  * @ioc: per adapter object
  */
@@ -5237,6 +5501,9 @@ _base_static_config_pages(struct MPT3SAS_ADAPTER *ioc)
 			ioc_warn(ioc,
 			    "TimeSync Interval in Manuf page-11 is not enabled. Periodic Time-Sync will be disabled\n");
 	}
+	rc = _base_assign_fw_reported_qd(ioc);
+	if (rc)
+		return rc;
 	rc = mpt3sas_config_get_bios_pg2(ioc, &mpi_reply, &ioc->bios_pg2);
 	if (rc)
 		return rc;
@@ -8471,6 +8738,7 @@ mpt3sas_base_hard_reset_handler(struct MPT3SAS_ADAPTER *ioc,
 	_base_pre_reset_handler(ioc);
 	mpt3sas_wait_for_commands_to_complete(ioc);
 	mpt3sas_base_mask_interrupts(ioc);
+	mpt3sas_base_pause_mq_polling(ioc);
 	r = mpt3sas_base_make_ioc_ready(ioc, type);
 	if (r)
 		goto out;
@@ -8512,6 +8780,7 @@ mpt3sas_base_hard_reset_handler(struct MPT3SAS_ADAPTER *ioc,
 	spin_unlock_irqrestore(&ioc->ioc_reset_in_progress_lock, flags);
 	ioc->ioc_reset_count++;
 	mutex_unlock(&ioc->reset_in_progress_mutex);
+	mpt3sas_base_resume_mq_polling(ioc);
 
  out_unlocked:
 	if ((r == 0) && is_trigger) {
diff --git a/drivers/scsi/mpt3sas/mpt3sas_base.h b/drivers/scsi/mpt3sas/mpt3sas_base.h
index 0c6c3df0038d..f87c0911f66a 100644
--- a/drivers/scsi/mpt3sas/mpt3sas_base.h
+++ b/drivers/scsi/mpt3sas/mpt3sas_base.h
@@ -77,9 +77,9 @@
 #define MPT3SAS_DRIVER_NAME		"mpt3sas"
 #define MPT3SAS_AUTHOR "Avago Technologies <MPT-FusionLinux.pdl@avagotech.com>"
 #define MPT3SAS_DESCRIPTION	"LSI MPT Fusion SAS 3.0 Device Driver"
-#define MPT3SAS_DRIVER_VERSION		"37.101.00.00"
-#define MPT3SAS_MAJOR_VERSION		37
-#define MPT3SAS_MINOR_VERSION		101
+#define MPT3SAS_DRIVER_VERSION		"39.100.00.00"
+#define MPT3SAS_MAJOR_VERSION		39
+#define MPT3SAS_MINOR_VERSION		100
 #define MPT3SAS_BUILD_VERSION		0
 #define MPT3SAS_RELEASE_VERSION	00
 
@@ -354,6 +354,7 @@ struct mpt3sas_nvme_cmd {
 #define MPT3_SUP_REPLY_POST_HOST_INDEX_REG_COUNT_G3	12
 #define MPT3_SUP_REPLY_POST_HOST_INDEX_REG_COUNT_G35	16
 #define MPT3_SUP_REPLY_POST_HOST_INDEX_REG_OFFSET	(0x10)
+#define MPT3_MIN_IRQS					1
 
 /* OEM Identifiers */
 #define MFG10_OEM_ID_INVALID                   (0x00000000)
@@ -575,6 +576,7 @@ struct _sas_device {
 	u8	is_chassis_slot_valid;
 	u8	connector_name[5];
 	struct kref refcount;
+	u8	port_type;
 	struct hba_port *port;
 	struct sas_rphy *rphy;
 };
@@ -936,6 +938,8 @@ struct _event_ack_list {
  * @os_irq: irq number
  * @irqpoll: irq_poll object
  * @irq_poll_scheduled: Tells whether irq poll is scheduled or not
+ * @is_iouring_poll_q: Tells whether reply queues is assigned
+ *			to io uring poll queues or not
  * @list: this list
 */
 struct adapter_reply_queue {
@@ -949,9 +953,22 @@ struct adapter_reply_queue {
 	struct irq_poll         irqpoll;
 	bool			irq_poll_scheduled;
 	bool			irq_line_enable;
+	bool			is_iouring_poll_q;
 	struct list_head	list;
 };
 
+/**
+ * struct io_uring_poll_queue - the io uring poll queue structure
+ * @busy: Tells whether io uring poll queue is busy or not
+ * @pause: Tells whether IOs are paused on io uring poll queue or not
+ * @reply_q: reply queue mapped for io uring poll queue
+ */
+struct io_uring_poll_queue {
+	atomic_t	busy;
+	atomic_t	pause;
+	struct adapter_reply_queue *reply_q;
+};
+
 typedef void (*MPT_ADD_SGE)(void *paddr, u32 flags_length, dma_addr_t dma_addr);
 
 /* SAS3.0 support */
@@ -1176,6 +1193,8 @@ typedef void (*MPT3SAS_FLUSH_RUNNING_CMDS)(struct MPT3SAS_ADAPTER *ioc);
  * @schedule_dead_ioc_flush_running_cmds: callback to flush pending commands
  * @thresh_hold: Max number of reply descriptors processed
  *				before updating Host Index
+ * @iopoll_q_start_index: starting index of io uring poll queues
+ *				in reply queue list
  * @drv_internal_flags: Bit map internal to driver
  * @drv_support_bitmap: driver's supported feature bit map
  * @use_32bit_dma: Flag to use 32 bit consistent dma mask
@@ -1372,11 +1391,13 @@ struct MPT3SAS_ADAPTER {
 	bool            msix_load_balance;
 	u16		thresh_hold;
 	u8		high_iops_queues;
+	u8		iopoll_q_start_index;
 	u32             drv_internal_flags;
 	u32		drv_support_bitmap;
 	u32             dma_mask;
 	bool		enable_sdev_max_qd;
 	bool		use_32bit_dma;
+	struct io_uring_poll_queue *io_uring_poll_queues;
 
 	/* internal commands, callback index */
 	u8		scsi_io_cb_idx;
@@ -1423,6 +1444,10 @@ struct MPT3SAS_ADAPTER {
 	u8		tm_custom_handling;
 	u8		nvme_abort_timeout;
 	u16		max_shutdown_latency;
+	u16		max_wideport_qd;
+	u16		max_narrowport_qd;
+	u16		max_nvme_qd;
+	u8		max_sata_qd;
 
 	/* static config pages */
 	struct mpt3sas_facts facts;
@@ -1730,10 +1755,12 @@ do {	ioc_err(ioc, "In func: %s\n", __func__); \
 	status, mpi_request, sz); } while (0)
 
 int mpt3sas_wait_for_ioc(struct MPT3SAS_ADAPTER *ioc, int wait_count);
-int
-mpt3sas_base_make_ioc_ready(struct MPT3SAS_ADAPTER *ioc, enum reset_type type);
+int mpt3sas_base_make_ioc_ready(struct MPT3SAS_ADAPTER *ioc, enum reset_type type);
 void mpt3sas_base_free_irq(struct MPT3SAS_ADAPTER *ioc);
 void mpt3sas_base_disable_msix(struct MPT3SAS_ADAPTER *ioc);
+int mpt3sas_blk_mq_poll(struct Scsi_Host *shost, unsigned int queue_num);
+void mpt3sas_base_pause_mq_polling(struct MPT3SAS_ADAPTER *ioc);
+void mpt3sas_base_resume_mq_polling(struct MPT3SAS_ADAPTER *ioc);
 
 /* scsih shared API */
 struct scsi_cmnd *mpt3sas_scsih_scsi_lookup_get(struct MPT3SAS_ADAPTER *ioc,
@@ -1829,6 +1856,9 @@ int mpt3sas_config_get_pcie_device_pg0(struct MPT3SAS_ADAPTER *ioc,
 int mpt3sas_config_get_pcie_device_pg2(struct MPT3SAS_ADAPTER *ioc,
 	Mpi2ConfigReply_t *mpi_reply, Mpi26PCIeDevicePage2_t *config_page,
 	u32 form, u32 handle);
+int mpt3sas_config_get_pcie_iounit_pg1(struct MPT3SAS_ADAPTER *ioc,
+	Mpi2ConfigReply_t *mpi_reply, Mpi26PCIeIOUnitPage1_t *config_page,
+	u16 sz);
 int mpt3sas_config_get_sas_iounit_pg0(struct MPT3SAS_ADAPTER *ioc,
 	Mpi2ConfigReply_t *mpi_reply, Mpi2SasIOUnitPage0_t *config_page,
 	u16 sz);
diff --git a/drivers/scsi/mpt3sas/mpt3sas_config.c b/drivers/scsi/mpt3sas/mpt3sas_config.c
index 83a5c2172ad4..0563078227de 100644
--- a/drivers/scsi/mpt3sas/mpt3sas_config.c
+++ b/drivers/scsi/mpt3sas/mpt3sas_config.c
@@ -1169,6 +1169,43 @@ out:
 }
 
 /**
+ * mpt3sas_config_get_pcie_iounit_pg1 - obtain pcie iounit page 1
+ * @ioc: per adapter object
+ * @mpi_reply: reply mf payload returned from firmware
+ * @config_page: contents of the config page
+ * @sz: size of buffer passed in config_page
+ * Context: sleep.
+ *
+ * Returns 0 for success, non-zero for failure.
+ */
+int
+mpt3sas_config_get_pcie_iounit_pg1(struct MPT3SAS_ADAPTER *ioc,
+	Mpi2ConfigReply_t *mpi_reply, Mpi26PCIeIOUnitPage1_t *config_page,
+	u16 sz)
+{
+	Mpi2ConfigRequest_t mpi_request;
+	int r;
+
+	memset(&mpi_request, 0, sizeof(Mpi2ConfigRequest_t));
+	mpi_request.Function = MPI2_FUNCTION_CONFIG;
+	mpi_request.Action = MPI2_CONFIG_ACTION_PAGE_HEADER;
+	mpi_request.Header.PageType = MPI2_CONFIG_PAGETYPE_EXTENDED;
+	mpi_request.ExtPageType = MPI2_CONFIG_EXTPAGETYPE_PCIE_IO_UNIT;
+	mpi_request.Header.PageVersion = MPI26_PCIEIOUNITPAGE1_PAGEVERSION;
+	mpi_request.Header.PageNumber = 1;
+	ioc->build_zero_len_sge_mpi(ioc, &mpi_request.PageBufferSGE);
+	r = _config_request(ioc, &mpi_request, mpi_reply,
+	    MPT3_CONFIG_PAGE_DEFAULT_TIMEOUT, NULL, 0);
+	if (r)
+		goto out;
+	mpi_request.Action = MPI2_CONFIG_ACTION_PAGE_READ_CURRENT;
+	r = _config_request(ioc, &mpi_request, mpi_reply,
+	    MPT3_CONFIG_PAGE_DEFAULT_TIMEOUT, config_page, sz);
+out:
+	return r;
+}
+
+/**
  * mpt3sas_config_get_pcie_device_pg2 - obtain pcie device page 2
  * @ioc: per adapter object
  * @mpi_reply: reply mf payload returned from firmware
diff --git a/drivers/scsi/mpt3sas/mpt3sas_ctl.c b/drivers/scsi/mpt3sas/mpt3sas_ctl.c
index b66140e4c370..770b241d7bb2 100644
--- a/drivers/scsi/mpt3sas/mpt3sas_ctl.c
+++ b/drivers/scsi/mpt3sas/mpt3sas_ctl.c
@@ -3820,9 +3820,10 @@ enable_sdev_max_qd_store(struct device *cdev,
 				}
 			} else if (sas_target_priv_data->flags &
 			    MPT_TARGET_FLAGS_PCIE_DEVICE)
-				qdepth = MPT3SAS_NVME_QUEUE_DEPTH;
+				qdepth = ioc->max_nvme_qd;
 			else
-				qdepth = MPT3SAS_SAS_QUEUE_DEPTH;
+				qdepth = (sas_target_priv_data->sas_dev->port_type > 1) ?
+				    ioc->max_wideport_qd : ioc->max_narrowport_qd;
 
 			mpt3sas_scsih_change_queue_depth(sdev, qdepth);
 		}
@@ -3919,6 +3920,24 @@ sas_device_handle_show(struct device *dev, struct device_attribute *attr,
 static DEVICE_ATTR_RO(sas_device_handle);
 
 /**
+ * sas_ncq_prio_supported_show - Indicate if device supports NCQ priority
+ * @dev: pointer to embedded device
+ * @attr: sas_ncq_prio_supported attribute descriptor
+ * @buf: the buffer returned
+ *
+ * A sysfs 'read-only' sdev attribute, only works with SATA
+ */
+static ssize_t
+sas_ncq_prio_supported_show(struct device *dev,
+			    struct device_attribute *attr, char *buf)
+{
+	struct scsi_device *sdev = to_scsi_device(dev);
+
+	return sysfs_emit(buf, "%d\n", scsih_ncq_prio_supp(sdev));
+}
+static DEVICE_ATTR_RO(sas_ncq_prio_supported);
+
+/**
  * sas_ncq_prio_enable_show - send prioritized io commands to device
  * @dev: pointer to embedded device
  * @attr: ?
@@ -3960,6 +3979,7 @@ static DEVICE_ATTR_RW(sas_ncq_prio_enable);
 struct device_attribute *mpt3sas_dev_attrs[] = {
 	&dev_attr_sas_address,
 	&dev_attr_sas_device_handle,
+	&dev_attr_sas_ncq_prio_supported,
 	&dev_attr_sas_ncq_prio_enable,
 	NULL,
 };
diff --git a/drivers/scsi/mpt3sas/mpt3sas_scsih.c b/drivers/scsi/mpt3sas/mpt3sas_scsih.c
index 8e64a6f14542..2f82b1e629af 100644
--- a/drivers/scsi/mpt3sas/mpt3sas_scsih.c
+++ b/drivers/scsi/mpt3sas/mpt3sas_scsih.c
@@ -1803,7 +1803,7 @@ scsih_change_queue_depth(struct scsi_device *sdev, int qdepth)
 	 * limit max device queue for SATA to 32 if enable_sdev_max_qd
 	 * is disabled.
 	 */
-	if (ioc->enable_sdev_max_qd)
+	if (ioc->enable_sdev_max_qd || ioc->is_gen35_ioc)
 		goto not_sata;
 
 	sas_device_priv_data = sdev->hostdata;
@@ -2657,7 +2657,7 @@ scsih_slave_configure(struct scsi_device *sdev)
 			return 1;
 		}
 
-		qdepth = MPT3SAS_NVME_QUEUE_DEPTH;
+		qdepth = ioc->max_nvme_qd;
 		ds = "NVMe";
 		sdev_printk(KERN_INFO, sdev,
 			"%s: handle(0x%04x), wwid(0x%016llx), port(%d)\n",
@@ -2709,7 +2709,8 @@ scsih_slave_configure(struct scsi_device *sdev)
 	sas_device->volume_handle = volume_handle;
 	sas_device->volume_wwid = volume_wwid;
 	if (sas_device->device_info & MPI2_SAS_DEVICE_INFO_SSP_TARGET) {
-		qdepth = MPT3SAS_SAS_QUEUE_DEPTH;
+		qdepth = (sas_device->port_type > 1) ?
+			ioc->max_wideport_qd : ioc->max_narrowport_qd;
 		ssp_target = 1;
 		if (sas_device->device_info &
 				MPI2_SAS_DEVICE_INFO_SEP) {
@@ -2721,7 +2722,7 @@ scsih_slave_configure(struct scsi_device *sdev)
 		} else
 			ds = "SSP";
 	} else {
-		qdepth = MPT3SAS_SATA_QUEUE_DEPTH;
+		qdepth = ioc->max_sata_qd;
 		if (sas_device->device_info & MPI2_SAS_DEVICE_INFO_STP_TARGET)
 			ds = "STP";
 		else if (sas_device->device_info &
@@ -3304,7 +3305,7 @@ scsih_abort(struct scsi_cmnd *scmd)
 	sdev_printk(KERN_INFO, scmd->device, "attempting task abort!"
 	    "scmd(0x%p), outstanding for %u ms & timeout %u ms\n",
 	    scmd, jiffies_to_msecs(jiffies - scmd->jiffies_at_alloc),
-	    (scmd->request->timeout / HZ) * 1000);
+	    (scsi_cmd_to_rq(scmd)->timeout / HZ) * 1000);
 	_scsih_tm_display_info(ioc, scmd);
 
 	sas_device_priv_data = scmd->device->hostdata;
@@ -5047,48 +5048,31 @@ _scsih_setup_eedp(struct MPT3SAS_ADAPTER *ioc, struct scsi_cmnd *scmd,
 	Mpi25SCSIIORequest_t *mpi_request)
 {
 	u16 eedp_flags;
-	unsigned char prot_op = scsi_get_prot_op(scmd);
-	unsigned char prot_type = scsi_get_prot_type(scmd);
 	Mpi25SCSIIORequest_t *mpi_request_3v =
 	   (Mpi25SCSIIORequest_t *)mpi_request;
 
-	if (prot_type == SCSI_PROT_DIF_TYPE0 || prot_op == SCSI_PROT_NORMAL)
-		return;
-
-	if (prot_op ==  SCSI_PROT_READ_STRIP)
+	switch (scsi_get_prot_op(scmd)) {
+	case SCSI_PROT_READ_STRIP:
 		eedp_flags = MPI2_SCSIIO_EEDPFLAGS_CHECK_REMOVE_OP;
-	else if (prot_op ==  SCSI_PROT_WRITE_INSERT)
+		break;
+	case SCSI_PROT_WRITE_INSERT:
 		eedp_flags = MPI2_SCSIIO_EEDPFLAGS_INSERT_OP;
-	else
+		break;
+	default:
 		return;
+	}
 
-	switch (prot_type) {
-	case SCSI_PROT_DIF_TYPE1:
-	case SCSI_PROT_DIF_TYPE2:
+	if (scmd->prot_flags & SCSI_PROT_GUARD_CHECK)
+		eedp_flags |= MPI2_SCSIIO_EEDPFLAGS_CHECK_GUARD;
 
-		/*
-		* enable ref/guard checking
-		* auto increment ref tag
-		*/
+	if (scmd->prot_flags & SCSI_PROT_REF_CHECK) {
 		eedp_flags |= MPI2_SCSIIO_EEDPFLAGS_INC_PRI_REFTAG |
-		    MPI2_SCSIIO_EEDPFLAGS_CHECK_REFTAG |
-		    MPI2_SCSIIO_EEDPFLAGS_CHECK_GUARD;
+			MPI2_SCSIIO_EEDPFLAGS_CHECK_REFTAG;
 		mpi_request->CDB.EEDP32.PrimaryReferenceTag =
-		    cpu_to_be32(t10_pi_ref_tag(scmd->request));
-		break;
-
-	case SCSI_PROT_DIF_TYPE3:
-
-		/*
-		* enable guard checking
-		*/
-		eedp_flags |= MPI2_SCSIIO_EEDPFLAGS_CHECK_GUARD;
-
-		break;
+			cpu_to_be32(scsi_prot_ref_tag(scmd));
 	}
 
-	mpi_request_3v->EEDPBlockSize =
-	    cpu_to_le16(scmd->device->sector_size);
+	mpi_request_3v->EEDPBlockSize = cpu_to_le16(scsi_prot_interval(scmd));
 
 	if (ioc->is_gen35_ioc)
 		eedp_flags |= MPI25_SCSIIO_EEDPFLAGS_APPTAG_DISABLE_MODE;
@@ -5141,7 +5125,7 @@ scsih_qcmd(struct Scsi_Host *shost, struct scsi_cmnd *scmd)
 	struct MPT3SAS_DEVICE *sas_device_priv_data;
 	struct MPT3SAS_TARGET *sas_target_priv_data;
 	struct _raid_device *raid_device;
-	struct request *rq = scmd->request;
+	struct request *rq = scsi_cmd_to_rq(scmd);
 	int class;
 	Mpi25SCSIIORequest_t *mpi_request;
 	struct _pcie_device *pcie_device = NULL;
@@ -7371,6 +7355,10 @@ _scsih_add_device(struct MPT3SAS_ADAPTER *ioc, u16 handle, u8 phy_num,
 
 	/* get device name */
 	sas_device->device_name = le64_to_cpu(sas_device_pg0.DeviceName);
+	sas_device->port_type = sas_device_pg0.MaxPortConnections;
+	ioc_info(ioc,
+	    "handle(0x%0x) sas_address(0x%016llx) port_type(0x%0x)\n",
+	    handle, sas_device->sas_address, sas_device->port_type);
 
 	if (ioc->wait_for_discovery_to_complete)
 		_scsih_sas_device_init_add(ioc, sas_device);
@@ -9604,6 +9592,42 @@ _scsih_prep_device_scan(struct MPT3SAS_ADAPTER *ioc)
 }
 
 /**
+ * _scsih_update_device_qdepth - Update QD during Reset.
+ * @ioc: per adapter object
+ *
+ */
+static void
+_scsih_update_device_qdepth(struct MPT3SAS_ADAPTER *ioc)
+{
+	struct MPT3SAS_DEVICE *sas_device_priv_data;
+	struct MPT3SAS_TARGET *sas_target_priv_data;
+	struct _sas_device *sas_device;
+	struct scsi_device *sdev;
+	u16 qdepth;
+
+	ioc_info(ioc, "Update devices with firmware reported queue depth\n");
+	shost_for_each_device(sdev, ioc->shost) {
+		sas_device_priv_data = sdev->hostdata;
+		if (sas_device_priv_data && sas_device_priv_data->sas_target) {
+			sas_target_priv_data = sas_device_priv_data->sas_target;
+			sas_device = sas_device_priv_data->sas_target->sas_dev;
+			if (sas_target_priv_data->flags & MPT_TARGET_FLAGS_PCIE_DEVICE)
+				qdepth = ioc->max_nvme_qd;
+			else if (sas_device &&
+			    sas_device->device_info & MPI2_SAS_DEVICE_INFO_SSP_TARGET)
+				qdepth = (sas_device->port_type > 1) ?
+				    ioc->max_wideport_qd : ioc->max_narrowport_qd;
+			else if (sas_device &&
+			    sas_device->device_info & MPI2_SAS_DEVICE_INFO_SATA_DEVICE)
+				qdepth = ioc->max_sata_qd;
+			else
+				continue;
+			mpt3sas_scsih_change_queue_depth(sdev, qdepth);
+		}
+	}
+}
+
+/**
  * _scsih_mark_responding_sas_device - mark a sas_devices as responding
  * @ioc: per adapter object
  * @sas_device_pg0: SAS Device page 0
@@ -10654,6 +10678,8 @@ _mpt3sas_fw_work(struct MPT3SAS_ADAPTER *ioc, struct fw_event_work *fw_event)
 		_scsih_remove_unresponding_devices(ioc);
 		_scsih_del_dirty_vphy(ioc);
 		_scsih_del_dirty_port_entries(ioc);
+		if (ioc->is_gen35_ioc)
+			_scsih_update_device_qdepth(ioc);
 		_scsih_scan_for_devices_after_reset(ioc);
 		/*
 		 * If diag reset has occurred during the driver load
@@ -11178,8 +11204,10 @@ static void scsih_remove(struct pci_dev *pdev)
 
 	ioc->remove_host = 1;
 
-	if (!pci_device_is_present(pdev))
+	if (!pci_device_is_present(pdev)) {
+		mpt3sas_base_pause_mq_polling(ioc);
 		_scsih_flush_running_cmds(ioc);
+	}
 
 	_scsih_fw_event_cleanup_queue(ioc);
 
@@ -11274,8 +11302,10 @@ scsih_shutdown(struct pci_dev *pdev)
 
 	ioc->remove_host = 1;
 
-	if (!pci_device_is_present(pdev))
+	if (!pci_device_is_present(pdev)) {
+		mpt3sas_base_pause_mq_polling(ioc);
 		_scsih_flush_running_cmds(ioc);
+	}
 
 	_scsih_fw_event_cleanup_queue(ioc);
 
@@ -11785,12 +11815,41 @@ static int scsih_map_queues(struct Scsi_Host *shost)
 {
 	struct MPT3SAS_ADAPTER *ioc =
 	    (struct MPT3SAS_ADAPTER *)shost->hostdata;
+	struct blk_mq_queue_map *map;
+	int i, qoff, offset;
+	int nr_msix_vectors = ioc->iopoll_q_start_index;
+	int iopoll_q_count = ioc->reply_queue_count - nr_msix_vectors;
 
-	if (ioc->shost->nr_hw_queues == 1)
+	if (shost->nr_hw_queues == 1)
 		return 0;
 
-	return blk_mq_pci_map_queues(&shost->tag_set.map[HCTX_TYPE_DEFAULT],
-	    ioc->pdev, ioc->high_iops_queues);
+	for (i = 0, qoff = 0; i < shost->nr_maps; i++) {
+		map = &shost->tag_set.map[i];
+		map->nr_queues = 0;
+		offset = 0;
+		if (i == HCTX_TYPE_DEFAULT) {
+			map->nr_queues =
+			    nr_msix_vectors - ioc->high_iops_queues;
+			offset = ioc->high_iops_queues;
+		} else if (i == HCTX_TYPE_POLL)
+			map->nr_queues = iopoll_q_count;
+
+		if (!map->nr_queues)
+			BUG_ON(i == HCTX_TYPE_DEFAULT);
+
+		/*
+		 * The poll queue(s) doesn't have an IRQ (and hence IRQ
+		 * affinity), so use the regular blk-mq cpu mapping
+		 */
+		map->queue_offset = qoff;
+		if (i != HCTX_TYPE_POLL)
+			blk_mq_pci_map_queues(map, ioc->pdev, offset);
+		else
+			blk_mq_map_queues(map);
+
+		qoff += map->nr_queues;
+	}
+	return 0;
 }
 
 /* shost template for SAS 2.0 HBA devices */
@@ -11861,6 +11920,7 @@ static struct scsi_host_template mpt3sas_driver_template = {
 	.track_queue_depth		= 1,
 	.cmd_size			= sizeof(struct scsiio_tracker),
 	.map_queues			= scsih_map_queues,
+	.mq_poll			= mpt3sas_blk_mq_poll,
 };
 
 /* raid transport support for SAS 3.0 HBA devices */
@@ -11957,6 +12017,7 @@ _scsih_probe(struct pci_dev *pdev, const struct pci_device_id *id)
 	struct Scsi_Host *shost = NULL;
 	int rv;
 	u16 hba_mpi_version;
+	int iopoll_q_count = 0;
 
 	/* Determine in which MPI version class this pci device belongs */
 	hba_mpi_version = _scsih_determine_hba_mpi_version(pdev);
@@ -12204,6 +12265,11 @@ _scsih_probe(struct pci_dev *pdev, const struct pci_device_id *id)
 		goto out_thread_fail;
 	}
 
+	shost->host_tagset = 0;
+
+	if (ioc->is_gen35_ioc && host_tagset_enable)
+		shost->host_tagset = 1;
+
 	ioc->is_driver_loading = 1;
 	if ((mpt3sas_base_attach(ioc))) {
 		ioc_err(ioc, "failure at %s:%d/%s()!\n",
@@ -12226,16 +12292,17 @@ _scsih_probe(struct pci_dev *pdev, const struct pci_device_id *id)
 	} else
 		ioc->hide_drives = 0;
 
-	shost->host_tagset = 0;
 	shost->nr_hw_queues = 1;
 
-	if (ioc->is_gen35_ioc && ioc->reply_queue_count > 1 &&
-	    host_tagset_enable && ioc->smp_affinity_enable) {
-
-		shost->host_tagset = 1;
+	if (shost->host_tagset) {
 		shost->nr_hw_queues =
 		    ioc->reply_queue_count - ioc->high_iops_queues;
 
+		iopoll_q_count =
+		    ioc->reply_queue_count - ioc->iopoll_q_start_index;
+
+		shost->nr_maps = iopoll_q_count ? 3 : 1;
+
 		dev_info(&ioc->pdev->dev,
 		    "Max SCSIIO MPT commands: %d shared with nr_hw_queues = %d\n",
 		    shost->can_queue, shost->nr_hw_queues);
@@ -12359,6 +12426,7 @@ scsih_pci_error_detected(struct pci_dev *pdev, pci_channel_state_t state)
 		/* Permanent error, prepare for device removal */
 		ioc->pci_error_recovery = 1;
 		mpt3sas_base_stop_watchdog(ioc);
+		mpt3sas_base_pause_mq_polling(ioc);
 		_scsih_flush_running_cmds(ioc);
 		return PCI_ERS_RESULT_DISCONNECT;
 	}
diff --git a/drivers/scsi/mvumi.c b/drivers/scsi/mvumi.c
index 6bb03d7a254d..4d251bf630a3 100644
--- a/drivers/scsi/mvumi.c
+++ b/drivers/scsi/mvumi.c
@@ -702,7 +702,7 @@ static int mvumi_host_reset(struct scsi_cmnd *scmd)
 	mhba = (struct mvumi_hba *) scmd->device->host->hostdata;
 
 	scmd_printk(KERN_NOTICE, scmd, "RESET -%u cmd=%x retries=%x\n",
-			scmd->request->tag, scmd->cmnd[0], scmd->retries);
+			scsi_cmd_to_rq(scmd)->tag, scmd->cmnd[0], scmd->retries);
 
 	return mhba->instancet->reset_host(mhba);
 }
diff --git a/drivers/scsi/myrb.c b/drivers/scsi/myrb.c
index 542ed88ef90d..a4a88323e020 100644
--- a/drivers/scsi/myrb.c
+++ b/drivers/scsi/myrb.c
@@ -1263,6 +1263,7 @@ static int myrb_host_reset(struct scsi_cmnd *scmd)
 static int myrb_pthru_queuecommand(struct Scsi_Host *shost,
 		struct scsi_cmnd *scmd)
 {
+	struct request *rq = scsi_cmd_to_rq(scmd);
 	struct myrb_hba *cb = shost_priv(shost);
 	struct myrb_cmdblk *cmd_blk = scsi_cmd_priv(scmd);
 	union myrb_cmd_mbox *mbox = &cmd_blk->mbox;
@@ -1286,7 +1287,7 @@ static int myrb_pthru_queuecommand(struct Scsi_Host *shost,
 	}
 
 	mbox->type3.opcode = MYRB_CMD_DCDB;
-	mbox->type3.id = scmd->request->tag + 3;
+	mbox->type3.id = rq->tag + 3;
 	mbox->type3.addr = dcdb_addr;
 	dcdb->channel = sdev->channel;
 	dcdb->target = sdev->id;
@@ -1305,11 +1306,11 @@ static int myrb_pthru_queuecommand(struct Scsi_Host *shost,
 		break;
 	}
 	dcdb->early_status = false;
-	if (scmd->request->timeout <= 10)
+	if (rq->timeout <= 10)
 		dcdb->timeout = MYRB_DCDB_TMO_10_SECS;
-	else if (scmd->request->timeout <= 60)
+	else if (rq->timeout <= 60)
 		dcdb->timeout = MYRB_DCDB_TMO_60_SECS;
-	else if (scmd->request->timeout <= 600)
+	else if (rq->timeout <= 600)
 		dcdb->timeout = MYRB_DCDB_TMO_10_MINS;
 	else
 		dcdb->timeout = MYRB_DCDB_TMO_24_HRS;
@@ -1550,7 +1551,7 @@ static int myrb_ldev_queuecommand(struct Scsi_Host *shost,
 	}
 
 	myrb_reset_cmd(cmd_blk);
-	mbox->type5.id = scmd->request->tag + 3;
+	mbox->type5.id = scsi_cmd_to_rq(scmd)->tag + 3;
 	if (scmd->sc_data_direction == DMA_NONE)
 		goto submit;
 	nsge = scsi_dma_map(scmd);
diff --git a/drivers/scsi/myrs.c b/drivers/scsi/myrs.c
index 26326af23dbc..07f274afd7e5 100644
--- a/drivers/scsi/myrs.c
+++ b/drivers/scsi/myrs.c
@@ -1582,6 +1582,7 @@ static void myrs_mode_sense(struct myrs_hba *cs, struct scsi_cmnd *scmd,
 static int myrs_queuecommand(struct Scsi_Host *shost,
 		struct scsi_cmnd *scmd)
 {
+	struct request *rq = scsi_cmd_to_rq(scmd);
 	struct myrs_hba *cs = shost_priv(shost);
 	struct myrs_cmdblk *cmd_blk = scsi_cmd_priv(scmd);
 	union myrs_cmd_mbox *mbox = &cmd_blk->mbox;
@@ -1628,7 +1629,7 @@ static int myrs_queuecommand(struct Scsi_Host *shost,
 		return SCSI_MLQUEUE_HOST_BUSY;
 	cmd_blk->sense_addr = sense_addr;
 
-	timeout = scmd->request->timeout;
+	timeout = rq->timeout;
 	if (scmd->cmd_len <= 10) {
 		if (scmd->device->channel >= cs->ctlr_info->physchan_present) {
 			struct myrs_ldev_info *ldev_info = sdev->hostdata;
@@ -1644,10 +1645,10 @@ static int myrs_queuecommand(struct Scsi_Host *shost,
 			mbox->SCSI_10.pdev.target = sdev->id;
 			mbox->SCSI_10.pdev.channel = sdev->channel;
 		}
-		mbox->SCSI_10.id = scmd->request->tag + 3;
+		mbox->SCSI_10.id = rq->tag + 3;
 		mbox->SCSI_10.control.dma_ctrl_to_host =
 			(scmd->sc_data_direction == DMA_FROM_DEVICE);
-		if (scmd->request->cmd_flags & REQ_FUA)
+		if (rq->cmd_flags & REQ_FUA)
 			mbox->SCSI_10.control.fua = true;
 		mbox->SCSI_10.dma_size = scsi_bufflen(scmd);
 		mbox->SCSI_10.sense_addr = cmd_blk->sense_addr;
@@ -1690,10 +1691,10 @@ static int myrs_queuecommand(struct Scsi_Host *shost,
 			mbox->SCSI_255.pdev.target = sdev->id;
 			mbox->SCSI_255.pdev.channel = sdev->channel;
 		}
-		mbox->SCSI_255.id = scmd->request->tag + 3;
+		mbox->SCSI_255.id = rq->tag + 3;
 		mbox->SCSI_255.control.dma_ctrl_to_host =
 			(scmd->sc_data_direction == DMA_FROM_DEVICE);
-		if (scmd->request->cmd_flags & REQ_FUA)
+		if (rq->cmd_flags & REQ_FUA)
 			mbox->SCSI_255.control.fua = true;
 		mbox->SCSI_255.dma_size = scsi_bufflen(scmd);
 		mbox->SCSI_255.sense_addr = cmd_blk->sense_addr;
diff --git a/drivers/scsi/ncr53c8xx.c b/drivers/scsi/ncr53c8xx.c
index c76e9f05d042..7a4f5d4dd670 100644
--- a/drivers/scsi/ncr53c8xx.c
+++ b/drivers/scsi/ncr53c8xx.c
@@ -1453,11 +1453,6 @@ struct head {
 #define  xerr_status   phys.xerr_st
 #define  nego_status   phys.nego_st
 
-#if 0
-#define  sync_status   phys.sync_st
-#define  wide_status   phys.wide_st
-#endif
-
 /*==========================================================
 **
 **      Declaration of structs:     Data structure block
@@ -1980,9 +1975,6 @@ static inline char *ncr_name (struct ncb *np)
 #define	RELOC_SOFTC	0x40000000
 #define	RELOC_LABEL	0x50000000
 #define	RELOC_REGISTER	0x60000000
-#if 0
-#define	RELOC_KVAR	0x70000000
-#endif
 #define	RELOC_LABELH	0x80000000
 #define	RELOC_MASK	0xf0000000
 
@@ -1991,21 +1983,7 @@ static inline char *ncr_name (struct ncb *np)
 #define PADDRH(label)   (RELOC_LABELH | offsetof(struct scripth, label))
 #define	RADDR(label)	(RELOC_REGISTER | REG(label))
 #define	FADDR(label,ofs)(RELOC_REGISTER | ((REG(label))+(ofs)))
-#if 0
-#define	KVAR(which)	(RELOC_KVAR | (which))
-#endif
 
-#if 0
-#define	SCRIPT_KVAR_JIFFIES	(0)
-#define	SCRIPT_KVAR_FIRST		SCRIPT_KVAR_JIFFIES
-#define	SCRIPT_KVAR_LAST		SCRIPT_KVAR_JIFFIES
-/*
- * Kernel variables referenced in the scripts.
- * THESE MUST ALL BE ALIGNED TO A 4-BYTE BOUNDARY.
- */
-static void *script_kvars[] __initdata =
-	{ (void *)&jiffies };
-#endif
 
 static	struct script script0 __initdata = {
 /*--------------------------< START >-----------------------*/ {
@@ -2162,11 +2140,6 @@ static	struct script script0 __initdata = {
 	SCR_COPY (1),
 		RADDR (scratcha),
 		NADDR (msgout),
-#if 0
-	SCR_COPY (1),
-		RADDR (scratcha),
-		NADDR (msgin),
-#endif
 	/*
 	**	Anticipate the COMMAND phase.
 	**	This is the normal case for initial selection.
@@ -4164,8 +4137,8 @@ static int ncr_queue_command (struct ncb *np, struct scsi_cmnd *cmd)
 	**
 	**----------------------------------------------------
 	*/
-	if (np->settle_time && cmd->request->timeout >= HZ) {
-		u_long tlimit = jiffies + cmd->request->timeout - HZ;
+	if (np->settle_time && scsi_cmd_to_rq(cmd)->timeout >= HZ) {
+		u_long tlimit = jiffies + scsi_cmd_to_rq(cmd)->timeout - HZ;
 		if (time_after(np->settle_time, tlimit))
 			np->settle_time = tlimit;
 	}
@@ -4378,10 +4351,6 @@ static int ncr_queue_command (struct ncb *np, struct scsi_cmnd *cmd)
 	cp->parity_status		= 0;
 
 	cp->xerr_status			= XE_OK;
-#if 0
-	cp->sync_status			= tp->sval;
-	cp->wide_status			= tp->wval;
-#endif
 
 	/*----------------------------------------------------
 	**
@@ -4553,12 +4522,8 @@ static void ncr_start_reset(struct ncb *np)
 **
 **==========================================================
 */
-static int ncr_reset_bus (struct ncb *np, struct scsi_cmnd *cmd, int sync_reset)
+static int ncr_reset_bus (struct ncb *np)
 {
-/*	struct scsi_device        *device    = cmd->device; */
-	struct ccb *cp;
-	int found;
-
 /*
  * Return immediately if reset is in progress.
  */
@@ -4573,24 +4538,6 @@ static int ncr_reset_bus (struct ncb *np, struct scsi_cmnd *cmd, int sync_reset)
  */
 	ncr_start_reset(np);
 /*
- * First, look in the wakeup list
- */
-	for (found=0, cp=np->ccb; cp; cp=cp->link_ccb) {
-		/*
-		**	look for the ccb of this command.
-		*/
-		if (cp->host_status == HS_IDLE) continue;
-		if (cp->cmd == cmd) {
-			found = 1;
-			break;
-		}
-	}
-/*
- * Then, look in the waiting list
- */
-	if (!found && retrieve_from_waiting_list(0, np, cmd))
-		found = 1;
-/*
  * Wake-up all awaiting commands with DID_RESET.
  */
 	reset_waiting_list(np);
@@ -4598,103 +4545,10 @@ static int ncr_reset_bus (struct ncb *np, struct scsi_cmnd *cmd, int sync_reset)
  * Wake-up all pending commands with HS_RESET -> DID_RESET.
  */
 	ncr_wakeup(np, HS_RESET);
-/*
- * If the involved command was not in a driver queue, and the 
- * scsi driver told us reset is synchronous, and the command is not 
- * currently in the waiting list, complete it with DID_RESET status,
- * in order to keep it alive.
- */
-	if (!found && sync_reset && !retrieve_from_waiting_list(0, np, cmd)) {
-		set_host_byte(cmd, DID_RESET);
-		ncr_queue_done_cmd(np, cmd);
-	}
 
 	return SUCCESS;
 }
 
-#if 0 /* unused and broken.. */
-/*==========================================================
-**
-**
-**	Abort an SCSI command.
-**	This is called from the generic SCSI driver.
-**
-**
-**==========================================================
-*/
-static int ncr_abort_command (struct ncb *np, struct scsi_cmnd *cmd)
-{
-/*	struct scsi_device        *device    = cmd->device; */
-	struct ccb *cp;
-	int found;
-	int retv;
-
-/*
- * First, look for the scsi command in the waiting list
- */
-	if (remove_from_waiting_list(np, cmd)) {
-		set_host_byte(cmd, DID_ABORT);
-		ncr_queue_done_cmd(np, cmd);
-		return SCSI_ABORT_SUCCESS;
-	}
-
-/*
- * Then, look in the wakeup list
- */
-	for (found=0, cp=np->ccb; cp; cp=cp->link_ccb) {
-		/*
-		**	look for the ccb of this command.
-		*/
-		if (cp->host_status == HS_IDLE) continue;
-		if (cp->cmd == cmd) {
-			found = 1;
-			break;
-		}
-	}
-
-	if (!found) {
-		return SCSI_ABORT_NOT_RUNNING;
-	}
-
-	if (np->settle_time) {
-		return SCSI_ABORT_SNOOZE;
-	}
-
-	/*
-	**	If the CCB is active, patch schedule jumps for the 
-	**	script to abort the command.
-	*/
-
-	switch(cp->host_status) {
-	case HS_BUSY:
-	case HS_NEGOTIATE:
-		printk ("%s: abort ccb=%p (cancel)\n", ncr_name (np), cp);
-			cp->start.schedule.l_paddr =
-				cpu_to_scr(NCB_SCRIPTH_PHYS (np, cancel));
-		retv = SCSI_ABORT_PENDING;
-		break;
-	case HS_DISCONNECT:
-		cp->restart.schedule.l_paddr =
-				cpu_to_scr(NCB_SCRIPTH_PHYS (np, abort));
-		retv = SCSI_ABORT_PENDING;
-		break;
-	default:
-		retv = SCSI_ABORT_NOT_RUNNING;
-		break;
-
-	}
-
-	/*
-	**      If there are no requests, the script
-	**      processor will sleep on SEL_WAIT_RESEL.
-	**      Let's wake it up, since it may have to work.
-	*/
-	OUTB (nc_istat, SIGP);
-
-	return retv;
-}
-#endif
-
 static void ncr_detach(struct ncb *np)
 {
 	struct ccb *cp;
@@ -5453,27 +5307,6 @@ static void ncr_getsync(struct ncb *np, u_char sfac, u_char *fakp, u_char *scntl
 	*/
 	fak = (kpc - 1) / div_10M[div] + 1;
 
-#if 0	/* This optimization does not seem very useful */
-
-	per = (fak * div_10M[div]) / clk;
-
-	/*
-	**	Why not to try the immediate lower divisor and to choose 
-	**	the one that allows the fastest output speed ?
-	**	We don't want input speed too much greater than output speed.
-	*/
-	if (div >= 1 && fak < 8) {
-		u_long fak2, per2;
-		fak2 = (kpc - 1) / div_10M[div-1] + 1;
-		per2 = (fak2 * div_10M[div-1]) / clk;
-		if (per2 < per && fak2 <= 8) {
-			fak = fak2;
-			per = per2;
-			--div;
-		}
-	}
-#endif
-
 	if (fak < 4) fak = 4;	/* Should never happen, too bad ... */
 
 	/*
@@ -5511,10 +5344,6 @@ static void ncr_set_sync_wide_status (struct ncb *np, u_char target)
 	for (cp = np->ccb; cp; cp = cp->link_ccb) {
 		if (!cp->cmd) continue;
 		if (scmd_id(cp->cmd) != target) continue;
-#if 0
-		cp->sync_status = tp->sval;
-		cp->wide_status = tp->wval;
-#endif
 		cp->phys.select.sel_scntl3 = tp->wval;
 		cp->phys.select.sel_sxfer  = tp->sval;
 	}
@@ -8125,7 +7954,7 @@ static int ncr53c8xx_bus_reset(struct scsi_cmnd *cmd)
 	 */
 
 	spin_lock_irqsave(&np->smp_lock, flags);
-	sts = ncr_reset_bus(np, cmd, 1);
+	sts = ncr_reset_bus(np);
 
 	done_list     = np->done_list;
 	np->done_list = NULL;
@@ -8136,30 +7965,6 @@ static int ncr53c8xx_bus_reset(struct scsi_cmnd *cmd)
 	return sts;
 }
 
-#if 0 /* unused and broken */
-static int ncr53c8xx_abort(struct scsi_cmnd *cmd)
-{
-	struct ncb *np = ((struct host_data *) cmd->device->host->hostdata)->ncb;
-	int sts;
-	unsigned long flags;
-	struct scsi_cmnd *done_list;
-
-	printk("ncr53c8xx_abort\n");
-
-	NCR_LOCK_NCB(np, flags);
-
-	sts = ncr_abort_command(np, cmd);
-out:
-	done_list     = np->done_list;
-	np->done_list = NULL;
-	NCR_UNLOCK_NCB(np, flags);
-
-	ncr_flush_done_cmds(done_list);
-
-	return sts;
-}
-#endif
-
 
 /*
 **	Scsi command waiting list management.
diff --git a/drivers/scsi/pcmcia/fdomain_cs.c b/drivers/scsi/pcmcia/fdomain_cs.c
index e42acf314d06..33df6a9ba9b5 100644
--- a/drivers/scsi/pcmcia/fdomain_cs.c
+++ b/drivers/scsi/pcmcia/fdomain_cs.c
@@ -45,8 +45,10 @@ static int fdomain_probe(struct pcmcia_device *link)
 		goto fail_disable;
 
 	if (!request_region(link->resource[0]->start, FDOMAIN_REGION_SIZE,
-			    "fdomain_cs"))
+			    "fdomain_cs")) {
+		ret = -EBUSY;
 		goto fail_disable;
+	}
 
 	sh = fdomain_create(link->resource[0]->start, link->irq, 7, &link->dev);
 	if (!sh) {
diff --git a/drivers/scsi/pm8001/pm8001_hwi.c b/drivers/scsi/pm8001/pm8001_hwi.c
index 17c0f26e683a..63690508313b 100644
--- a/drivers/scsi/pm8001/pm8001_hwi.c
+++ b/drivers/scsi/pm8001/pm8001_hwi.c
@@ -1323,7 +1323,7 @@ int pm8001_mpi_build_cmd(struct pm8001_hba_info *pm8001_ha,
 	void *pMessage;
 	unsigned long flags;
 	int q_index = circularQ - pm8001_ha->inbnd_q_tbl;
-	int rv = -1;
+	int rv;
 
 	WARN_ON(q_index >= PM8001_MAX_INB_NUM);
 	spin_lock_irqsave(&circularQ->iq_lock, flags);
diff --git a/drivers/scsi/qedf/qedf_io.c b/drivers/scsi/qedf/qedf_io.c
index 6b5b6a75ac88..3404782988d5 100644
--- a/drivers/scsi/qedf/qedf_io.c
+++ b/drivers/scsi/qedf/qedf_io.c
@@ -1162,13 +1162,7 @@ void qedf_scsi_completion(struct qedf_ctx *qedf, struct fcoe_cqe *cqe,
 		return;
 	}
 
-	if (!sc_cmd->request) {
-		QEDF_WARN(&(qedf->dbg_ctx), "sc_cmd->request is NULL, "
-		    "sc_cmd=%p.\n", sc_cmd);
-		return;
-	}
-
-	if (!sc_cmd->request->q) {
+	if (!scsi_cmd_to_rq(sc_cmd)->q) {
 		QEDF_WARN(&(qedf->dbg_ctx), "request->q is NULL so request "
 		   "is not valid, sc_cmd=%p.\n", sc_cmd);
 		return;
diff --git a/drivers/scsi/qedf/qedf_main.c b/drivers/scsi/qedf/qedf_main.c
index 85f41abcb56c..42d0d941dba5 100644
--- a/drivers/scsi/qedf/qedf_main.c
+++ b/drivers/scsi/qedf/qedf_main.c
@@ -3004,7 +3004,7 @@ static int qedf_alloc_global_queues(struct qedf_ctx *qedf)
 {
 	u32 *list;
 	int i;
-	int status = 0, rc;
+	int status;
 	u32 *pbl;
 	dma_addr_t page;
 	int num_pages;
@@ -3016,7 +3016,7 @@ static int qedf_alloc_global_queues(struct qedf_ctx *qedf)
 	 */
 	if (!qedf->num_queues) {
 		QEDF_ERR(&(qedf->dbg_ctx), "No MSI-X vectors available!\n");
-		return 1;
+		return -ENOMEM;
 	}
 
 	/*
@@ -3024,7 +3024,7 @@ static int qedf_alloc_global_queues(struct qedf_ctx *qedf)
 	 * addresses of our queues
 	 */
 	if (!qedf->p_cpuq) {
-		status = 1;
+		status = -EINVAL;
 		QEDF_ERR(&qedf->dbg_ctx, "p_cpuq is NULL.\n");
 		goto mem_alloc_failure;
 	}
@@ -3040,8 +3040,8 @@ static int qedf_alloc_global_queues(struct qedf_ctx *qedf)
 		   "qedf->global_queues=%p.\n", qedf->global_queues);
 
 	/* Allocate DMA coherent buffers for BDQ */
-	rc = qedf_alloc_bdq(qedf);
-	if (rc) {
+	status = qedf_alloc_bdq(qedf);
+	if (status) {
 		QEDF_ERR(&qedf->dbg_ctx, "Unable to allocate bdq.\n");
 		goto mem_alloc_failure;
 	}
diff --git a/drivers/scsi/qedi/qedi_fw.c b/drivers/scsi/qedi/qedi_fw.c
index 71333d3c5c86..d01cd829ef97 100644
--- a/drivers/scsi/qedi/qedi_fw.c
+++ b/drivers/scsi/qedi/qedi_fw.c
@@ -609,14 +609,7 @@ static void qedi_scsi_completion(struct qedi_ctx *qedi,
 		goto error;
 	}
 
-	if (!sc_cmd->request) {
-		QEDI_WARN(&qedi->dbg_ctx,
-			  "sc_cmd->request is NULL, sc_cmd=%p.\n",
-			  sc_cmd);
-		goto error;
-	}
-
-	if (!sc_cmd->request->q) {
+	if (!scsi_cmd_to_rq(sc_cmd)->q) {
 		QEDI_WARN(&qedi->dbg_ctx,
 			  "request->q is NULL so request is not valid, sc_cmd=%p.\n",
 			  sc_cmd);
@@ -936,17 +929,11 @@ exit_fp_process:
 
 static void qedi_ring_doorbell(struct qedi_conn *qedi_conn)
 {
-	struct iscsi_db_data dbell = { 0 };
-
-	dbell.agg_flags = 0;
+	qedi_conn->ep->db_data.sq_prod = qedi_conn->ep->fw_sq_prod_idx;
 
-	dbell.params |= DB_DEST_XCM << ISCSI_DB_DATA_DEST_SHIFT;
-	dbell.params |= DB_AGG_CMD_SET << ISCSI_DB_DATA_AGG_CMD_SHIFT;
-	dbell.params |=
-		   DQ_XCM_ISCSI_SQ_PROD_CMD << ISCSI_DB_DATA_AGG_VAL_SEL_SHIFT;
-
-	dbell.sq_prod = qedi_conn->ep->fw_sq_prod_idx;
-	writel(*(u32 *)&dbell, qedi_conn->ep->p_doorbell);
+	/* wmb - Make sure fw idx is coherent */
+	wmb();
+	writel(*(u32 *)&qedi_conn->ep->db_data, qedi_conn->ep->p_doorbell);
 
 	/* Make sure fw write idx is coherent, and include both memory barriers
 	 * as a failsafe as for some architectures the call is the same but on
diff --git a/drivers/scsi/qedi/qedi_iscsi.c b/drivers/scsi/qedi/qedi_iscsi.c
index 97f83760da88..c5260429c637 100644
--- a/drivers/scsi/qedi/qedi_iscsi.c
+++ b/drivers/scsi/qedi/qedi_iscsi.c
@@ -499,8 +499,8 @@ static u16 qedi_calc_mss(u16 pmtu, u8 is_ipv6, u8 tcp_ts_en, u8 vlan_en)
 
 static int qedi_iscsi_offload_conn(struct qedi_endpoint *qedi_ep)
 {
-	struct qedi_ctx *qedi = qedi_ep->qedi;
 	struct qed_iscsi_params_offload *conn_info;
+	struct qedi_ctx *qedi = qedi_ep->qedi;
 	int rval;
 	int i;
 
@@ -577,10 +577,37 @@ static int qedi_iscsi_offload_conn(struct qedi_endpoint *qedi_ep)
 		  "Default cq index [%d], mss [%d]\n",
 		  conn_info->default_cq, conn_info->mss);
 
+	/* Prepare the doorbell parameters */
+	qedi_ep->db_data.agg_flags = 0;
+	qedi_ep->db_data.params = 0;
+	SET_FIELD(qedi_ep->db_data.params, ISCSI_DB_DATA_DEST, DB_DEST_XCM);
+	SET_FIELD(qedi_ep->db_data.params, ISCSI_DB_DATA_AGG_CMD,
+		  DB_AGG_CMD_MAX);
+	SET_FIELD(qedi_ep->db_data.params, ISCSI_DB_DATA_AGG_VAL_SEL,
+		  DQ_XCM_ISCSI_SQ_PROD_CMD);
+	SET_FIELD(qedi_ep->db_data.params, ISCSI_DB_DATA_BYPASS_EN, 1);
+
+	/* Register doorbell with doorbell recovery mechanism */
+	rval = qedi_ops->common->db_recovery_add(qedi->cdev,
+						 qedi_ep->p_doorbell,
+						 &qedi_ep->db_data,
+						 DB_REC_WIDTH_32B,
+						 DB_REC_KERNEL);
+	if (rval) {
+		kfree(conn_info);
+		return rval;
+	}
+
 	rval = qedi_ops->offload_conn(qedi->cdev, qedi_ep->handle, conn_info);
-	if (rval)
+	if (rval) {
+		/* delete doorbell from doorbell recovery mechanism */
+		rval = qedi_ops->common->db_recovery_del(qedi->cdev,
+							 qedi_ep->p_doorbell,
+							 &qedi_ep->db_data);
+
 		QEDI_ERR(&qedi->dbg_ctx, "offload_conn returned %d, ep=%p\n",
 			 rval, qedi_ep);
+	}
 
 	kfree(conn_info);
 	return rval;
@@ -1109,6 +1136,11 @@ static void qedi_ep_disconnect(struct iscsi_endpoint *ep)
 	    test_bit(QEDI_IN_RECOVERY, &qedi->flags))
 		goto ep_release_conn;
 
+	/* Delete doorbell from doorbell recovery mechanism */
+	ret = qedi_ops->common->db_recovery_del(qedi->cdev,
+					       qedi_ep->p_doorbell,
+					       &qedi_ep->db_data);
+
 	ret = qedi_ops->destroy_conn(qedi->cdev, qedi_ep->handle, abrt_conn);
 	if (ret) {
 		QEDI_WARN(&qedi->dbg_ctx,
diff --git a/drivers/scsi/qedi/qedi_iscsi.h b/drivers/scsi/qedi/qedi_iscsi.h
index 758735209e15..a31c5de74754 100644
--- a/drivers/scsi/qedi/qedi_iscsi.h
+++ b/drivers/scsi/qedi/qedi_iscsi.h
@@ -80,6 +80,7 @@ struct qedi_endpoint {
 	u32 handle;
 	u32 fw_cid;
 	void __iomem *p_doorbell;
+	struct iscsi_db_data db_data;
 
 	/* Send queue management */
 	struct iscsi_wqe *sq;
diff --git a/drivers/scsi/qedi/qedi_main.c b/drivers/scsi/qedi/qedi_main.c
index 0b0acb827071..e6dc0b495a82 100644
--- a/drivers/scsi/qedi/qedi_main.c
+++ b/drivers/scsi/qedi/qedi_main.c
@@ -1621,7 +1621,7 @@ static int qedi_alloc_global_queues(struct qedi_ctx *qedi)
 {
 	u32 *list;
 	int i;
-	int status = 0, rc;
+	int status;
 	u32 *pbl;
 	dma_addr_t page;
 	int num_pages;
@@ -1632,14 +1632,14 @@ static int qedi_alloc_global_queues(struct qedi_ctx *qedi)
 	 */
 	if (!qedi->num_queues) {
 		QEDI_ERR(&qedi->dbg_ctx, "No MSI-X vectors available!\n");
-		return 1;
+		return -ENOMEM;
 	}
 
 	/* Make sure we allocated the PBL that will contain the physical
 	 * addresses of our queues
 	 */
 	if (!qedi->p_cpuq) {
-		status = 1;
+		status = -EINVAL;
 		goto mem_alloc_failure;
 	}
 
@@ -1654,13 +1654,13 @@ static int qedi_alloc_global_queues(struct qedi_ctx *qedi)
 		  "qedi->global_queues=%p.\n", qedi->global_queues);
 
 	/* Allocate DMA coherent buffers for BDQ */
-	rc = qedi_alloc_bdq(qedi);
-	if (rc)
+	status = qedi_alloc_bdq(qedi);
+	if (status)
 		goto mem_alloc_failure;
 
 	/* Allocate DMA coherent buffers for NVM_ISCSI_CFG */
-	rc = qedi_alloc_nvm_iscsi_cfg(qedi);
-	if (rc)
+	status = qedi_alloc_nvm_iscsi_cfg(qedi);
+	if (status)
 		goto mem_alloc_failure;
 
 	/* Allocate a CQ and an associated PBL for each MSI-X
diff --git a/drivers/scsi/qla1280.c b/drivers/scsi/qla1280.c
index 928da90b79be..d0b4e063bfe1 100644
--- a/drivers/scsi/qla1280.c
+++ b/drivers/scsi/qla1280.c
@@ -490,7 +490,6 @@ __setup("qla1280=", qla1280_setup);
 #define	CMD_SNSLEN(Cmnd)	SCSI_SENSE_BUFFERSIZE
 #define	CMD_RESULT(Cmnd)	Cmnd->result
 #define	CMD_HANDLE(Cmnd)	Cmnd->host_scribble
-#define CMD_REQUEST(Cmnd)	Cmnd->request->cmd
 
 #define CMD_HOST(Cmnd)		Cmnd->device->host
 #define SCSI_BUS_32(Cmnd)	Cmnd->device->channel
@@ -2827,7 +2826,7 @@ qla1280_64bit_start_scsi(struct scsi_qla_host *ha, struct srb * sp)
 	memset(((char *)pkt + 8), 0, (REQUEST_ENTRY_SIZE - 8));
 
 	/* Set ISP command timeout. */
-	pkt->timeout = cpu_to_le16(cmd->request->timeout/HZ);
+	pkt->timeout = cpu_to_le16(scsi_cmd_to_rq(cmd)->timeout / HZ);
 
 	/* Set device target ID and LUN */
 	pkt->lun = SCSI_LUN_32(cmd);
@@ -3082,7 +3081,7 @@ qla1280_32bit_start_scsi(struct scsi_qla_host *ha, struct srb * sp)
 	memset(((char *)pkt + 8), 0, (REQUEST_ENTRY_SIZE - 8));
 
 	/* Set ISP command timeout. */
-	pkt->timeout = cpu_to_le16(cmd->request->timeout/HZ);
+	pkt->timeout = cpu_to_le16(scsi_cmd_to_rq(cmd)->timeout / HZ);
 
 	/* Set device target ID and LUN */
 	pkt->lun = SCSI_LUN_32(cmd);
@@ -3981,7 +3980,7 @@ __qla1280_print_scsi_cmd(struct scsi_cmnd *cmd)
 	   qla1280_dump_buffer(1, (char *)sg, (cmd->use_sg*sizeof(struct scatterlist)));
 	   } */
 	printk("  tag=%d, transfersize=0x%x \n",
-	       cmd->tag, cmd->transfersize);
+	       scsi_cmd_to_rq(cmd)->tag, cmd->transfersize);
 	printk("  SP=0x%p\n", CMD_SP(cmd));
 	printk(" underflow size = 0x%x, direction=0x%x\n",
 	       cmd->underflow, cmd->sc_data_direction);
diff --git a/drivers/scsi/qla2xxx/Makefile b/drivers/scsi/qla2xxx/Makefile
index 17d5bc1cc56b..cbc1303e761e 100644
--- a/drivers/scsi/qla2xxx/Makefile
+++ b/drivers/scsi/qla2xxx/Makefile
@@ -1,7 +1,8 @@
 # SPDX-License-Identifier: GPL-2.0
 qla2xxx-y := qla_os.o qla_init.o qla_mbx.o qla_iocb.o qla_isr.o qla_gs.o \
 		qla_dbg.o qla_sup.o qla_attr.o qla_mid.o qla_dfs.o qla_bsg.o \
-		qla_nx.o qla_mr.o qla_nx2.o qla_target.o qla_tmpl.o qla_nvme.o
+		qla_nx.o qla_mr.o qla_nx2.o qla_target.o qla_tmpl.o qla_nvme.o \
+		qla_edif.o
 
 obj-$(CONFIG_SCSI_QLA_FC) += qla2xxx.o
 obj-$(CONFIG_TCM_QLA2XXX) += tcm_qla2xxx.o
diff --git a/drivers/scsi/qla2xxx/qla_attr.c b/drivers/scsi/qla2xxx/qla_attr.c
index 3aa9869f6fae..d09776b77af2 100644
--- a/drivers/scsi/qla2xxx/qla_attr.c
+++ b/drivers/scsi/qla2xxx/qla_attr.c
@@ -1887,6 +1887,30 @@ qla2x00_port_speed_show(struct device *dev, struct device_attribute *attr,
 	return scnprintf(buf, PAGE_SIZE, "%s\n", spd[ha->link_data_rate]);
 }
 
+static ssize_t
+qla2x00_mpi_pause_store(struct device *dev,
+	struct device_attribute *attr, const char *buf, size_t count)
+{
+	scsi_qla_host_t *vha = shost_priv(class_to_shost(dev));
+	int rval = 0;
+
+	if (sscanf(buf, "%d", &rval) != 1)
+		return -EINVAL;
+
+	ql_log(ql_log_warn, vha, 0x7089, "Pausing MPI...\n");
+
+	rval = qla83xx_wr_reg(vha, 0x002012d4, 0x30000001);
+
+	if (rval != QLA_SUCCESS) {
+		ql_log(ql_log_warn, vha, 0x708a, "Unable to pause MPI.\n");
+		count = 0;
+	}
+
+	return count;
+}
+
+static DEVICE_ATTR(mpi_pause, S_IWUSR, NULL, qla2x00_mpi_pause_store);
+
 /* ----- */
 
 static ssize_t
@@ -2435,6 +2459,7 @@ static DEVICE_ATTR(port_speed, 0644, qla2x00_port_speed_show,
     qla2x00_port_speed_store);
 static DEVICE_ATTR(port_no, 0444, qla2x00_port_no_show, NULL);
 static DEVICE_ATTR(fw_attr, 0444, qla2x00_fw_attr_show, NULL);
+static DEVICE_ATTR_RO(edif_doorbell);
 
 
 struct device_attribute *qla2x00_host_attrs[] = {
@@ -2480,6 +2505,8 @@ struct device_attribute *qla2x00_host_attrs[] = {
 	&dev_attr_port_no,
 	&dev_attr_fw_attr,
 	&dev_attr_dport_diagnostics,
+	&dev_attr_edif_doorbell,
+	&dev_attr_mpi_pause,
 	NULL, /* reserve for qlini_mode */
 	NULL, /* reserve for ql2xiniexchg */
 	NULL, /* reserve for ql2xexchoffld */
@@ -2706,12 +2733,14 @@ qla2x00_terminate_rport_io(struct fc_rport *rport)
 	 * final cleanup of firmware resources (PCBs and XCBs).
 	 */
 	if (fcport->loop_id != FC_NO_LOOP_ID) {
-		if (IS_FWI2_CAPABLE(fcport->vha->hw))
-			fcport->vha->hw->isp_ops->fabric_logout(fcport->vha,
-			    fcport->loop_id, fcport->d_id.b.domain,
-			    fcport->d_id.b.area, fcport->d_id.b.al_pa);
-		else
+		if (IS_FWI2_CAPABLE(fcport->vha->hw)) {
+			if (fcport->loop_id != FC_NO_LOOP_ID)
+				fcport->logout_on_delete = 1;
+
+			qlt_schedule_sess_for_deletion(fcport);
+		} else {
 			qla2x00_port_logout(fcport->vha, fcport);
+		}
 	}
 }
 
@@ -3107,6 +3136,9 @@ qla24xx_vport_delete(struct fc_vport *fc_vport)
 	qla2x00_wait_for_sess_deletion(vha);
 
 	qla_nvme_delete(vha);
+	qla_enode_stop(vha);
+	qla_edb_stop(vha);
+
 	vha->flags.delete_progress = 1;
 
 	qlt_remove_target(ha, vha);
diff --git a/drivers/scsi/qla2xxx/qla_bsg.c b/drivers/scsi/qla2xxx/qla_bsg.c
index d42b2ad84049..4b5d28d89d69 100644
--- a/drivers/scsi/qla2xxx/qla_bsg.c
+++ b/drivers/scsi/qla2xxx/qla_bsg.c
@@ -25,6 +25,10 @@ void qla2x00_bsg_job_done(srb_t *sp, int res)
 	struct bsg_job *bsg_job = sp->u.bsg_job;
 	struct fc_bsg_reply *bsg_reply = bsg_job->reply;
 
+	ql_dbg(ql_dbg_user, sp->vha, 0x7009,
+	    "%s: sp hdl %x, result=%x bsg ptr %p\n",
+	    __func__, sp->handle, res, bsg_job);
+
 	sp->free(sp);
 
 	bsg_reply->result = res;
@@ -53,11 +57,19 @@ void qla2x00_bsg_sp_free(srb_t *sp)
 			    bsg_job->reply_payload.sg_list,
 			    bsg_job->reply_payload.sg_cnt, DMA_FROM_DEVICE);
 	} else {
-		dma_unmap_sg(&ha->pdev->dev, bsg_job->request_payload.sg_list,
-		    bsg_job->request_payload.sg_cnt, DMA_TO_DEVICE);
 
-		dma_unmap_sg(&ha->pdev->dev, bsg_job->reply_payload.sg_list,
-		    bsg_job->reply_payload.sg_cnt, DMA_FROM_DEVICE);
+		if (sp->remap.remapped) {
+			dma_pool_free(ha->purex_dma_pool, sp->remap.rsp.buf,
+			    sp->remap.rsp.dma);
+			dma_pool_free(ha->purex_dma_pool, sp->remap.req.buf,
+			    sp->remap.req.dma);
+		} else {
+			dma_unmap_sg(&ha->pdev->dev, bsg_job->request_payload.sg_list,
+				bsg_job->request_payload.sg_cnt, DMA_TO_DEVICE);
+
+			dma_unmap_sg(&ha->pdev->dev, bsg_job->reply_payload.sg_list,
+				bsg_job->reply_payload.sg_cnt, DMA_FROM_DEVICE);
+		}
 	}
 
 	if (sp->type == SRB_CT_CMD ||
@@ -266,6 +278,7 @@ qla2x00_process_els(struct bsg_job *bsg_job)
 	int req_sg_cnt, rsp_sg_cnt;
 	int rval =  (DID_ERROR << 16);
 	uint16_t nextlid = 0;
+	uint32_t els_cmd = 0;
 
 	if (bsg_request->msgcode == FC_BSG_RPT_ELS) {
 		rport = fc_bsg_to_rport(bsg_job);
@@ -279,6 +292,9 @@ qla2x00_process_els(struct bsg_job *bsg_job)
 		vha = shost_priv(host);
 		ha = vha->hw;
 		type = "FC_BSG_HST_ELS_NOLOGIN";
+		els_cmd = bsg_request->rqst_data.h_els.command_code;
+		if (els_cmd == ELS_AUTH_ELS)
+			return qla_edif_process_els(vha, bsg_job);
 	}
 
 	if (!vha->flags.online) {
@@ -2768,10 +2784,13 @@ qla2x00_manage_host_port(struct bsg_job *bsg_job)
 }
 
 static int
-qla2x00_process_vendor_specific(struct bsg_job *bsg_job)
+qla2x00_process_vendor_specific(struct scsi_qla_host *vha, struct bsg_job *bsg_job)
 {
 	struct fc_bsg_request *bsg_request = bsg_job->request;
 
+	ql_dbg(ql_dbg_edif, vha, 0x911b, "%s FC_BSG_HST_VENDOR cmd[0]=0x%x\n",
+	    __func__, bsg_request->rqst_data.h_vendor.vendor_cmd[0]);
+
 	switch (bsg_request->rqst_data.h_vendor.vendor_cmd[0]) {
 	case QL_VND_LOOPBACK:
 		return qla2x00_process_loopback(bsg_job);
@@ -2840,6 +2859,9 @@ qla2x00_process_vendor_specific(struct bsg_job *bsg_job)
 	case QL_VND_DPORT_DIAGNOSTICS:
 		return qla2x00_do_dport_diagnostics(bsg_job);
 
+	case QL_VND_EDIF_MGMT:
+		return qla_edif_app_mgmt(bsg_job);
+
 	case QL_VND_SS_GET_FLASH_IMAGE_STATUS:
 		return qla2x00_get_flash_image_status(bsg_job);
 
@@ -2897,12 +2919,19 @@ qla24xx_bsg_request(struct bsg_job *bsg_job)
 		ql_dbg(ql_dbg_user, vha, 0x709f,
 		    "BSG: ISP abort active/needed -- cmd=%d.\n",
 		    bsg_request->msgcode);
+		SET_DID_STATUS(bsg_reply->result, DID_ERROR);
 		return -EBUSY;
 	}
 
+	if (test_bit(PFLG_DRIVER_REMOVING, &vha->pci_flags)) {
+		SET_DID_STATUS(bsg_reply->result, DID_ERROR);
+		return -EIO;
+	}
+
 skip_chip_chk:
-	ql_dbg(ql_dbg_user, vha, 0x7000,
-	    "Entered %s msgcode=0x%x.\n", __func__, bsg_request->msgcode);
+	ql_dbg(ql_dbg_user + ql_dbg_verbose, vha, 0x7000,
+	    "Entered %s msgcode=0x%x. bsg ptr %px\n",
+	    __func__, bsg_request->msgcode, bsg_job);
 
 	switch (bsg_request->msgcode) {
 	case FC_BSG_RPT_ELS:
@@ -2913,7 +2942,7 @@ skip_chip_chk:
 		ret = qla2x00_process_ct(bsg_job);
 		break;
 	case FC_BSG_HST_VENDOR:
-		ret = qla2x00_process_vendor_specific(bsg_job);
+		ret = qla2x00_process_vendor_specific(vha, bsg_job);
 		break;
 	case FC_BSG_HST_ADD_RPORT:
 	case FC_BSG_HST_DEL_RPORT:
@@ -2922,6 +2951,10 @@ skip_chip_chk:
 		ql_log(ql_log_warn, vha, 0x705a, "Unsupported BSG request.\n");
 		break;
 	}
+
+	ql_dbg(ql_dbg_user + ql_dbg_verbose, vha, 0x7000,
+	    "%s done with return %x\n", __func__, ret);
+
 	return ret;
 }
 
@@ -2936,6 +2969,8 @@ qla24xx_bsg_timeout(struct bsg_job *bsg_job)
 	unsigned long flags;
 	struct req_que *req;
 
+	ql_log(ql_log_info, vha, 0x708b, "%s CMD timeout. bsg ptr %p.\n",
+	    __func__, bsg_job);
 	/* find the bsg job from the active list of commands */
 	spin_lock_irqsave(&ha->hardware_lock, flags);
 	for (que = 0; que < ha->max_req_queues; que++) {
@@ -2945,27 +2980,26 @@ qla24xx_bsg_timeout(struct bsg_job *bsg_job)
 
 		for (cnt = 1; cnt < req->num_outstanding_cmds; cnt++) {
 			sp = req->outstanding_cmds[cnt];
-			if (sp) {
-				if (((sp->type == SRB_CT_CMD) ||
-					(sp->type == SRB_ELS_CMD_HST) ||
-					(sp->type == SRB_FXIOCB_BCMD))
-					&& (sp->u.bsg_job == bsg_job)) {
-					req->outstanding_cmds[cnt] = NULL;
-					spin_unlock_irqrestore(&ha->hardware_lock, flags);
-					if (ha->isp_ops->abort_command(sp)) {
-						ql_log(ql_log_warn, vha, 0x7089,
-						    "mbx abort_command "
-						    "failed.\n");
-						bsg_reply->result = -EIO;
-					} else {
-						ql_dbg(ql_dbg_user, vha, 0x708a,
-						    "mbx abort_command "
-						    "success.\n");
-						bsg_reply->result = 0;
-					}
-					spin_lock_irqsave(&ha->hardware_lock, flags);
-					goto done;
+			if (sp &&
+			    (sp->type == SRB_CT_CMD ||
+			     sp->type == SRB_ELS_CMD_HST ||
+			     sp->type == SRB_ELS_CMD_HST_NOLOGIN ||
+			     sp->type == SRB_FXIOCB_BCMD) &&
+			    sp->u.bsg_job == bsg_job) {
+				req->outstanding_cmds[cnt] = NULL;
+				spin_unlock_irqrestore(&ha->hardware_lock, flags);
+				if (ha->isp_ops->abort_command(sp)) {
+					ql_log(ql_log_warn, vha, 0x7089,
+					    "mbx abort_command failed.\n");
+					bsg_reply->result = -EIO;
+				} else {
+					ql_dbg(ql_dbg_user, vha, 0x708a,
+					    "mbx abort_command success.\n");
+					bsg_reply->result = 0;
 				}
+				spin_lock_irqsave(&ha->hardware_lock, flags);
+				goto done;
+
 			}
 		}
 	}
diff --git a/drivers/scsi/qla2xxx/qla_bsg.h b/drivers/scsi/qla2xxx/qla_bsg.h
index 0274e99e4a12..dd793cf8bc1e 100644
--- a/drivers/scsi/qla2xxx/qla_bsg.h
+++ b/drivers/scsi/qla2xxx/qla_bsg.h
@@ -31,6 +31,7 @@
 #define QL_VND_DPORT_DIAGNOSTICS	0x19
 #define QL_VND_GET_PRIV_STATS_EX	0x1A
 #define QL_VND_SS_GET_FLASH_IMAGE_STATUS	0x1E
+#define QL_VND_EDIF_MGMT                0X1F
 #define QL_VND_MANAGE_HOST_STATS	0x23
 #define QL_VND_GET_HOST_STATS		0x24
 #define QL_VND_GET_TGT_STATS		0x25
@@ -294,4 +295,6 @@ struct qla_active_regions {
 	uint8_t reserved[32];
 } __packed;
 
+#include "qla_edif_bsg.h"
+
 #endif
diff --git a/drivers/scsi/qla2xxx/qla_dbg.c b/drivers/scsi/qla2xxx/qla_dbg.c
index f2d05592c1e2..25549a8a2d72 100644
--- a/drivers/scsi/qla2xxx/qla_dbg.c
+++ b/drivers/scsi/qla2xxx/qla_dbg.c
@@ -12,8 +12,7 @@
  * ----------------------------------------------------------------------
  * | Module Init and Probe        |       0x0199       |                |
  * | Mailbox commands             |       0x1206       | 0x11a5-0x11ff	|
- * | Device Discovery             |       0x2134       | 0x210e-0x2116  |
- * |				  | 		       | 0x211a         |
+ * | Device Discovery             |       0x2134       | 0x210e-0x2115  |
  * |                              |                    | 0x211c-0x2128  |
  * |                              |                    | 0x212c-0x2134  |
  * | Queue Command and IO tracing |       0x3074       | 0x300b         |
diff --git a/drivers/scsi/qla2xxx/qla_dbg.h b/drivers/scsi/qla2xxx/qla_dbg.h
index 9eb708e5e22e..f1f6c740bdcd 100644
--- a/drivers/scsi/qla2xxx/qla_dbg.h
+++ b/drivers/scsi/qla2xxx/qla_dbg.h
@@ -367,6 +367,7 @@ ql_log_qp(uint32_t, struct qla_qpair *, int32_t, const char *fmt, ...);
 #define ql_dbg_tgt_mgt	0x00002000 /* Target mode management */
 #define ql_dbg_tgt_tmr	0x00001000 /* Target mode task management */
 #define ql_dbg_tgt_dif  0x00000800 /* Target mode dif */
+#define ql_dbg_edif	0x00000400 /* edif and purex debug */
 
 extern int qla27xx_dump_mpi_ram(struct qla_hw_data *, uint32_t, uint32_t *,
 	uint32_t, void **);
diff --git a/drivers/scsi/qla2xxx/qla_def.h b/drivers/scsi/qla2xxx/qla_def.h
index 2f67ec1df3e6..be2eb75ee1a3 100644
--- a/drivers/scsi/qla2xxx/qla_def.h
+++ b/drivers/scsi/qla2xxx/qla_def.h
@@ -49,6 +49,28 @@ typedef struct {
 	uint8_t domain;
 } le_id_t;
 
+/*
+ * 24 bit port ID type definition.
+ */
+typedef union {
+	uint32_t b24 : 24;
+	struct {
+#ifdef __BIG_ENDIAN
+		uint8_t domain;
+		uint8_t area;
+		uint8_t al_pa;
+#elif defined(__LITTLE_ENDIAN)
+		uint8_t al_pa;
+		uint8_t area;
+		uint8_t domain;
+#else
+#error "__BIG_ENDIAN or __LITTLE_ENDIAN must be defined!"
+#endif
+		uint8_t rsvd_1;
+	} b;
+} port_id_t;
+#define INVALID_PORT_ID	0xFFFFFF
+
 #include "qla_bsg.h"
 #include "qla_dsd.h"
 #include "qla_nx.h"
@@ -319,6 +341,13 @@ struct name_list_extended {
 	u32			size;
 	u8			sent;
 };
+
+struct els_reject {
+	struct fc_els_ls_rjt *c;
+	dma_addr_t  cdma;
+	u16 size;
+};
+
 /*
  * Timeout timer counts in seconds
  */
@@ -345,6 +374,8 @@ struct name_list_extended {
 #define FW_MAX_EXCHANGES_CNT (32 * 1024)
 #define REDUCE_EXCHANGES_CNT  (8 * 1024)
 
+#define SET_DID_STATUS(stat_var, status) (stat_var = status << 16)
+
 struct req_que;
 struct qla_tgt_sess;
 
@@ -370,32 +401,10 @@ struct srb_cmd {
 #define SRB_CRC_CTX_DSD_VALID		BIT_5	/* DIF: dsd_list valid */
 #define SRB_WAKEUP_ON_COMP		BIT_6
 #define SRB_DIF_BUNDL_DMA_VALID		BIT_7   /* DIF: DMA list valid */
+#define SRB_EDIF_CLEANUP_DELETE		BIT_9
 
 /* To identify if a srb is of T10-CRC type. @sp => srb_t pointer */
 #define IS_PROT_IO(sp)	(sp->flags & SRB_CRC_CTX_DSD_VALID)
-
-/*
- * 24 bit port ID type definition.
- */
-typedef union {
-	uint32_t b24 : 24;
-
-	struct {
-#ifdef __BIG_ENDIAN
-		uint8_t domain;
-		uint8_t area;
-		uint8_t al_pa;
-#elif defined(__LITTLE_ENDIAN)
-		uint8_t al_pa;
-		uint8_t area;
-		uint8_t domain;
-#else
-#error "__BIG_ENDIAN or __LITTLE_ENDIAN must be defined!"
-#endif
-		uint8_t rsvd_1;
-	} b;
-} port_id_t;
-#define INVALID_PORT_ID	0xFFFFFF
 #define ISP_REG16_DISCONNECT 0xFFFF
 
 static inline le_id_t be_id_to_le(be_id_t id)
@@ -483,6 +492,7 @@ struct srb_iocb {
 #define SRB_LOGIN_SKIP_PRLI	BIT_2
 #define SRB_LOGIN_NVME_PRLI	BIT_3
 #define SRB_LOGIN_PRLI_ONLY	BIT_4
+#define SRB_LOGIN_FCSP		BIT_5
 			uint16_t data[2];
 			u32 iop[2];
 		} logio;
@@ -587,6 +597,10 @@ struct srb_iocb {
 			u16 cmd;
 			u16 vp_index;
 		} ctrlvp;
+		struct {
+			struct edif_sa_ctl	*sa_ctl;
+			struct qla_sa_update_frame sa_frame;
+		} sa_update;
 	} u;
 
 	struct timer_list timer;
@@ -617,6 +631,21 @@ struct srb_iocb {
 #define SRB_PRLI_CMD	21
 #define SRB_CTRL_VP	22
 #define SRB_PRLO_CMD	23
+#define SRB_SA_UPDATE	25
+#define SRB_ELS_CMD_HST_NOLOGIN 26
+#define SRB_SA_REPLACE	27
+
+struct qla_els_pt_arg {
+	u8 els_opcode;
+	u8 vp_idx;
+	__le16 nport_handle;
+	u16 control_flags;
+	__le32 rx_xchg_address;
+	port_id_t did;
+	u32 tx_len, tx_byte_count, rx_len, rx_byte_count;
+	dma_addr_t tx_addr, rx_addr;
+
+};
 
 enum {
 	TYPE_SRB,
@@ -630,6 +659,13 @@ struct iocb_resource {
 	u16 iocb_cnt;
 };
 
+struct bsg_cmd {
+	struct bsg_job *bsg_job;
+	union {
+		struct qla_els_pt_arg els_arg;
+	} u;
+};
+
 typedef struct srb {
 	/*
 	 * Do not move cmd_type field, it needs to
@@ -662,7 +698,21 @@ typedef struct srb {
 		struct srb_iocb iocb_cmd;
 		struct bsg_job *bsg_job;
 		struct srb_cmd scmd;
+		struct bsg_cmd bsg_cmd;
 	} u;
+	struct {
+		bool remapped;
+		struct {
+			dma_addr_t dma;
+			void *buf;
+			uint len;
+		} req;
+		struct {
+			dma_addr_t dma;
+			void *buf;
+			uint len;
+		} rsp;
+	} remap;
 	/*
 	 * Report completion status @res and call sp_put(@sp). @res is
 	 * an NVMe status code, a SCSI result (e.g. DID_OK << 16) or a
@@ -2294,6 +2344,7 @@ struct imm_ntfy_from_isp {
 			__le16	nport_handle;
 			uint16_t reserved_2;
 			__le16	flags;
+#define NOTIFY24XX_FLAGS_FCSP		BIT_5
 #define NOTIFY24XX_FLAGS_GLOBAL_TPRLO   BIT_1
 #define NOTIFY24XX_FLAGS_PUREX_IOCB     BIT_0
 			__le16	srr_rx_id;
@@ -2377,11 +2428,9 @@ struct mbx_24xx_entry {
  */
 typedef enum {
 	FCT_UNKNOWN,
-	FCT_RSCN,
-	FCT_SWITCH,
-	FCT_BROADCAST,
-	FCT_INITIATOR,
-	FCT_TARGET,
+	FCT_BROADCAST = 0x01,
+	FCT_INITIATOR = 0x02,
+	FCT_TARGET    = 0x04,
 	FCT_NVME_INITIATOR = 0x10,
 	FCT_NVME_TARGET = 0x20,
 	FCT_NVME_DISCOVERY = 0x40,
@@ -2424,6 +2473,7 @@ enum discovery_state {
 	DSC_LOGIN_COMPLETE,
 	DSC_ADISC,
 	DSC_DELETE_PEND,
+	DSC_LOGIN_AUTH_PEND,
 };
 
 enum login_state {	/* FW control Target side */
@@ -2467,6 +2517,8 @@ typedef struct fc_port {
 	unsigned int n2n_flag:1;
 	unsigned int explicit_logout:1;
 	unsigned int prli_pend_timer:1;
+	unsigned int do_prli_nvme:1;
+
 	uint8_t nvme_flag;
 
 	uint8_t node_name[WWN_SIZE];
@@ -2563,6 +2615,33 @@ typedef struct fc_port {
 	u64 tgt_short_link_down_cnt;
 	u64 tgt_link_down_time;
 	u64 dev_loss_tmo;
+	/*
+	 * EDIF parameters for encryption.
+	 */
+	struct {
+		uint32_t	enable:1;	/* device is edif enabled/req'd */
+		uint32_t	app_stop:2;
+		uint32_t	app_started:1;
+		uint32_t	aes_gmac:1;
+		uint32_t	app_sess_online:1;
+		uint32_t	tx_sa_set:1;
+		uint32_t	rx_sa_set:1;
+		uint32_t	tx_sa_pending:1;
+		uint32_t	rx_sa_pending:1;
+		uint32_t	tx_rekey_cnt;
+		uint32_t	rx_rekey_cnt;
+		uint64_t	tx_bytes;
+		uint64_t	rx_bytes;
+		uint8_t		auth_state;
+		uint16_t	authok:1;
+		uint16_t	rekey_cnt;
+		struct list_head edif_indx_list;
+		spinlock_t  indx_list_lock;
+
+		struct list_head tx_sa_list;
+		struct list_head rx_sa_list;
+		spinlock_t	sa_list_lock;
+	} edif;
 } fc_port_t;
 
 enum {
@@ -2604,7 +2683,8 @@ static const char * const port_dstate_str[] = {
 	"UPD_FCPORT",
 	"LOGIN_COMPLETE",
 	"ADISC",
-	"DELETE_PEND"
+	"DELETE_PEND",
+	"LOGIN_AUTH_PEND",
 };
 
 /*
@@ -2616,6 +2696,8 @@ static const char * const port_dstate_str[] = {
 #define FCF_ASYNC_SENT		BIT_3
 #define FCF_CONF_COMP_SUPPORTED BIT_4
 #define FCF_ASYNC_ACTIVE	BIT_5
+#define FCF_FCSP_DEVICE		BIT_6
+#define FCF_EDIF_DELETE		BIT_7
 
 /* No loop ID flag. */
 #define FC_NO_LOOP_ID		0x1000
@@ -2707,7 +2789,7 @@ static const char * const port_dstate_str[] = {
 /*
  * FDMI HBA attribute types.
  */
-#define FDMI1_HBA_ATTR_COUNT			9
+#define FDMI1_HBA_ATTR_COUNT			10
 #define FDMI2_HBA_ATTR_COUNT			17
 
 #define FDMI_HBA_NODE_NAME			0x1
@@ -3386,6 +3468,7 @@ enum qla_work_type {
 	QLA_EVT_SP_RETRY,
 	QLA_EVT_IIDMA,
 	QLA_EVT_ELS_PLOGI,
+	QLA_EVT_SA_REPLACE,
 };
 
 
@@ -3444,6 +3527,11 @@ struct qla_work_evt {
 			u8 fc4_type;
 			srb_t *sp;
 		} gpnft;
+		struct {
+			struct edif_sa_ctl	*sa_ctl;
+			fc_port_t *fcport;
+			uint16_t nport_handle;
+		} sa_update;
 	 } u;
 };
 
@@ -3845,7 +3933,6 @@ struct qlt_hw_data {
 	int num_act_qpairs;
 #define DEFAULT_NAQP 2
 	spinlock_t atio_lock ____cacheline_aligned;
-	struct btree_head32 host_map;
 };
 
 #define MAX_QFULL_CMDS_ALLOC	8192
@@ -3935,7 +4022,9 @@ struct qla_hw_data {
 		uint32_t	scm_supported_f:1;
 				/* Enabled in Driver */
 		uint32_t	scm_enabled:1;
-		uint32_t	max_req_queue_warned:1;
+		uint32_t	edif_hw:1;
+		uint32_t	edif_enabled:1;
+		uint32_t	n2n_fw_acc_sec:1;
 		uint32_t	plogi_template_valid:1;
 		uint32_t	port_isolated:1;
 	} flags;
@@ -4347,6 +4436,7 @@ struct qla_hw_data {
 	/* Cisco fabric attached */
 #define FW_ATTR_EXT0_SCM_CISCO		0x00002000
 #define FW_ATTR_EXT0_NVME2	BIT_13
+#define FW_ATTR_EXT0_EDIF	BIT_5
 	uint16_t	fw_attributes_ext[2];
 	uint32_t	fw_memory_size;
 	uint32_t	fw_transfer_size;
@@ -4619,8 +4709,24 @@ struct qla_hw_data {
 	struct qla_hw_data_stat stat;
 	pci_error_state_t pci_error_state;
 	u64 prev_cmd_cnt;
+	struct dma_pool *purex_dma_pool;
+	struct btree_head32 host_map;
+
+#define EDIF_NUM_SA_INDEX	512
+#define EDIF_TX_SA_INDEX_BASE	EDIF_NUM_SA_INDEX
+	void *edif_rx_sa_id_map;
+	void *edif_tx_sa_id_map;
+	spinlock_t sadb_fp_lock;
+
+	struct list_head sadb_tx_index_list;
+	struct list_head sadb_rx_index_list;
+	spinlock_t sadb_lock;	/* protects list */
+	struct els_reject elsrej;
+	u8 edif_post_stop_cnt_down;
 };
 
+#define RX_ELS_SIZE (roundup(sizeof(struct enode) + ELS_MAX_PAYLOAD, SMP_CACHE_BYTES))
+
 struct active_regions {
 	uint8_t global;
 	struct {
@@ -4659,6 +4765,8 @@ struct purex_item {
 	} iocb;
 };
 
+#include "qla_edif.h"
+
 #define SCM_FLAG_RDF_REJECT		0x00
 #define SCM_FLAG_RDF_COMPLETED		0x01
 
@@ -4888,6 +4996,8 @@ typedef struct scsi_qla_host {
 	u64 reset_cmd_err_cnt;
 	u64 link_down_time;
 	u64 short_link_down_cnt;
+	struct edif_dbell e_dbell;
+	struct pur_core pur_cinfo;
 } scsi_qla_host_t;
 
 struct qla27xx_image_status {
@@ -5058,6 +5168,9 @@ struct secure_flash_update_block_pk {
 #define QLA_BUSY			0x107
 #define QLA_ALREADY_REGISTERED		0x109
 #define QLA_OS_TIMER_EXPIRED		0x10a
+#define QLA_ERR_NO_QPAIR		0x10b
+#define QLA_ERR_NOT_FOUND		0x10c
+#define QLA_ERR_FROM_FW			0x10d
 
 #define NVRAM_DELAY()		udelay(10)
 
@@ -5088,6 +5201,43 @@ enum nexus_wait_type {
 	WAIT_LUN,
 };
 
+#define INVALID_EDIF_SA_INDEX	0xffff
+#define RX_DELETE_NO_EDIF_SA_INDEX	0xfffe
+
+#define QLA_SKIP_HANDLE QLA_TGT_SKIP_HANDLE
+
+/* edif hash element */
+struct edif_list_entry {
+	uint16_t handle;			/* nport_handle */
+	uint32_t update_sa_index;
+	uint32_t delete_sa_index;
+	uint32_t count;				/* counter for filtering sa_index */
+#define EDIF_ENTRY_FLAGS_CLEANUP	0x01	/* this index is being cleaned up */
+	uint32_t flags;				/* used by sadb cleanup code */
+	fc_port_t *fcport;			/* needed by rx delay timer function */
+	struct timer_list timer;		/* rx delay timer */
+	struct list_head next;
+};
+
+#define EDIF_TX_INDX_BASE 512
+#define EDIF_RX_INDX_BASE 0
+#define EDIF_RX_DELETE_FILTER_COUNT 3	/* delay queuing rx delete until this many */
+
+/* entry in the sa_index free pool */
+
+struct sa_index_pair {
+	uint16_t sa_index;
+	uint32_t spi;
+};
+
+/* edif sa_index data structure */
+struct edif_sa_index_entry {
+	struct sa_index_pair sa_pair[2];
+	fc_port_t *fcport;
+	uint16_t handle;
+	struct list_head next;
+};
+
 /* Refer to SNIA SFF 8247 */
 struct sff_8247_a0 {
 	u8 txid;	/* transceiver id */
@@ -5203,9 +5353,12 @@ struct sff_8247_a0 {
 #define NVME_FCP_TARGET(fcport) \
 	(FCP_TYPE(fcport) && NVME_TYPE(fcport)) \
 
+#define NVME_PRIORITY(ha, fcport) \
+	(NVME_FCP_TARGET(fcport) && \
+	 (ha->fc4_type_priority == FC4_PRIORITY_NVME))
+
 #define NVME_TARGET(ha, fcport) \
-	((NVME_FCP_TARGET(fcport) && \
-	(ha->fc4_type_priority == FC4_PRIORITY_NVME)) || \
+	(fcport->do_prli_nvme || \
 	NVME_ONLY_TARGET(fcport)) \
 
 #define PRLI_PHASE(_cls) \
diff --git a/drivers/scsi/qla2xxx/qla_edif.c b/drivers/scsi/qla2xxx/qla_edif.c
new file mode 100644
index 000000000000..ad746c62f0d4
--- /dev/null
+++ b/drivers/scsi/qla2xxx/qla_edif.c
@@ -0,0 +1,3461 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * Marvell Fibre Channel HBA Driver
+ * Copyright (c)  2021     Marvell
+ */
+#include "qla_def.h"
+#include "qla_edif.h"
+
+#include <linux/kthread.h>
+#include <linux/vmalloc.h>
+#include <linux/delay.h>
+#include <scsi/scsi_tcq.h>
+
+static struct edif_sa_index_entry *qla_edif_sadb_find_sa_index_entry(uint16_t nport_handle,
+		struct list_head *sa_list);
+static uint16_t qla_edif_sadb_get_sa_index(fc_port_t *fcport,
+		struct qla_sa_update_frame *sa_frame);
+static int qla_edif_sadb_delete_sa_index(fc_port_t *fcport, uint16_t nport_handle,
+		uint16_t sa_index);
+static int qla_pur_get_pending(scsi_qla_host_t *, fc_port_t *, struct bsg_job *);
+
+struct edb_node {
+	struct  list_head	list;
+	uint32_t		ntype;
+	union {
+		port_id_t	plogi_did;
+		uint32_t	async;
+		port_id_t	els_sid;
+		struct edif_sa_update_aen	sa_aen;
+	} u;
+};
+
+static struct els_sub_cmd {
+	uint16_t cmd;
+	const char *str;
+} sc_str[] = {
+	{SEND_ELS, "send ELS"},
+	{SEND_ELS_REPLY, "send ELS Reply"},
+	{PULL_ELS, "retrieve ELS"},
+};
+
+const char *sc_to_str(uint16_t cmd)
+{
+	int i;
+	struct els_sub_cmd *e;
+
+	for (i = 0; i < ARRAY_SIZE(sc_str); i++) {
+		e = sc_str + i;
+		if (cmd == e->cmd)
+			return e->str;
+	}
+	return "unknown";
+}
+
+static struct edif_list_entry *qla_edif_list_find_sa_index(fc_port_t *fcport,
+		uint16_t handle)
+{
+	struct edif_list_entry *entry;
+	struct edif_list_entry *tentry;
+	struct list_head *indx_list = &fcport->edif.edif_indx_list;
+
+	list_for_each_entry_safe(entry, tentry, indx_list, next) {
+		if (entry->handle == handle)
+			return entry;
+	}
+	return NULL;
+}
+
+/* timeout called when no traffic and delayed rx sa_index delete */
+static void qla2x00_sa_replace_iocb_timeout(struct timer_list *t)
+{
+	struct edif_list_entry *edif_entry = from_timer(edif_entry, t, timer);
+	fc_port_t *fcport = edif_entry->fcport;
+	struct scsi_qla_host *vha = fcport->vha;
+	struct  edif_sa_ctl *sa_ctl;
+	uint16_t nport_handle;
+	unsigned long flags = 0;
+
+	ql_dbg(ql_dbg_edif, vha, 0x3069,
+	    "%s:  nport_handle 0x%x,  SA REPL Delay Timeout, %8phC portid=%06x\n",
+	    __func__, edif_entry->handle, fcport->port_name, fcport->d_id.b24);
+
+	/*
+	 * if delete_sa_index is valid then no one has serviced this
+	 * delayed delete
+	 */
+	spin_lock_irqsave(&fcport->edif.indx_list_lock, flags);
+
+	/*
+	 * delete_sa_index is invalidated when we find the new sa_index in
+	 * the incoming data stream.  If it is not invalidated then we are
+	 * still looking for the new sa_index because there is no I/O and we
+	 * need to just force the rx delete and move on.  Otherwise
+	 * we could get another rekey which will result in an error 66.
+	 */
+	if (edif_entry->delete_sa_index != INVALID_EDIF_SA_INDEX) {
+		uint16_t delete_sa_index = edif_entry->delete_sa_index;
+
+		edif_entry->delete_sa_index = INVALID_EDIF_SA_INDEX;
+		nport_handle = edif_entry->handle;
+		spin_unlock_irqrestore(&fcport->edif.indx_list_lock, flags);
+
+		sa_ctl = qla_edif_find_sa_ctl_by_index(fcport,
+		    delete_sa_index, 0);
+
+		if (sa_ctl) {
+			ql_dbg(ql_dbg_edif, vha, 0x3063,
+			    "%s: sa_ctl: %p, delete index %d, update index: %d, lid: 0x%x\n",
+			    __func__, sa_ctl, delete_sa_index, edif_entry->update_sa_index,
+			    nport_handle);
+
+			sa_ctl->flags = EDIF_SA_CTL_FLG_DEL;
+			set_bit(EDIF_SA_CTL_REPL, &sa_ctl->state);
+			qla_post_sa_replace_work(fcport->vha, fcport,
+			    nport_handle, sa_ctl);
+
+		} else {
+			ql_dbg(ql_dbg_edif, vha, 0x3063,
+			    "%s: sa_ctl not found for delete_sa_index: %d\n",
+			    __func__, edif_entry->delete_sa_index);
+		}
+	} else {
+		spin_unlock_irqrestore(&fcport->edif.indx_list_lock, flags);
+	}
+}
+
+/*
+ * create a new list entry for this nport handle and
+ * add an sa_update index to the list - called for sa_update
+ */
+static int qla_edif_list_add_sa_update_index(fc_port_t *fcport,
+		uint16_t sa_index, uint16_t handle)
+{
+	struct edif_list_entry *entry;
+	unsigned long flags = 0;
+
+	/* if the entry exists, then just update the sa_index */
+	entry = qla_edif_list_find_sa_index(fcport, handle);
+	if (entry) {
+		entry->update_sa_index = sa_index;
+		entry->count = 0;
+		return 0;
+	}
+
+	/*
+	 * This is the normal path - there should be no existing entry
+	 * when update is called.  The exception is at startup
+	 * when update is called for the first two sa_indexes
+	 * followed by a delete of the first sa_index
+	 */
+	entry = kzalloc((sizeof(struct edif_list_entry)), GFP_ATOMIC);
+	if (!entry)
+		return -ENOMEM;
+
+	INIT_LIST_HEAD(&entry->next);
+	entry->handle = handle;
+	entry->update_sa_index = sa_index;
+	entry->delete_sa_index = INVALID_EDIF_SA_INDEX;
+	entry->count = 0;
+	entry->flags = 0;
+	timer_setup(&entry->timer, qla2x00_sa_replace_iocb_timeout, 0);
+	spin_lock_irqsave(&fcport->edif.indx_list_lock, flags);
+	list_add_tail(&entry->next, &fcport->edif.edif_indx_list);
+	spin_unlock_irqrestore(&fcport->edif.indx_list_lock, flags);
+	return 0;
+}
+
+/* remove an entry from the list */
+static void qla_edif_list_delete_sa_index(fc_port_t *fcport, struct edif_list_entry *entry)
+{
+	unsigned long flags = 0;
+
+	spin_lock_irqsave(&fcport->edif.indx_list_lock, flags);
+	list_del(&entry->next);
+	spin_unlock_irqrestore(&fcport->edif.indx_list_lock, flags);
+}
+
+int qla_post_sa_replace_work(struct scsi_qla_host *vha,
+	 fc_port_t *fcport, uint16_t nport_handle, struct edif_sa_ctl *sa_ctl)
+{
+	struct qla_work_evt *e;
+
+	e = qla2x00_alloc_work(vha, QLA_EVT_SA_REPLACE);
+	if (!e)
+		return QLA_FUNCTION_FAILED;
+
+	e->u.sa_update.fcport = fcport;
+	e->u.sa_update.sa_ctl = sa_ctl;
+	e->u.sa_update.nport_handle = nport_handle;
+	fcport->flags |= FCF_ASYNC_ACTIVE;
+	return qla2x00_post_work(vha, e);
+}
+
+static void
+qla_edif_sa_ctl_init(scsi_qla_host_t *vha, struct fc_port  *fcport)
+{
+	ql_dbg(ql_dbg_edif, vha, 0x2058,
+	    "Init SA_CTL List for fcport - nn %8phN pn %8phN portid=%06x.\n",
+	    fcport->node_name, fcport->port_name, fcport->d_id.b24);
+
+	fcport->edif.tx_rekey_cnt = 0;
+	fcport->edif.rx_rekey_cnt = 0;
+
+	fcport->edif.tx_bytes = 0;
+	fcport->edif.rx_bytes = 0;
+}
+
+static int qla_bsg_check(scsi_qla_host_t *vha, struct bsg_job *bsg_job,
+fc_port_t *fcport)
+{
+	struct extra_auth_els *p;
+	struct fc_bsg_reply *bsg_reply = bsg_job->reply;
+	struct qla_bsg_auth_els_request *req =
+	    (struct qla_bsg_auth_els_request *)bsg_job->request;
+
+	if (!vha->hw->flags.edif_enabled) {
+		ql_dbg(ql_dbg_edif, vha, 0x9105,
+		    "%s edif not enabled\n", __func__);
+		goto done;
+	}
+	if (vha->e_dbell.db_flags != EDB_ACTIVE) {
+		ql_dbg(ql_dbg_edif, vha, 0x09102,
+		    "%s doorbell not enabled\n", __func__);
+		goto done;
+	}
+
+	p = &req->e;
+
+	/* Get response */
+	if (p->sub_cmd == PULL_ELS) {
+		struct qla_bsg_auth_els_reply *rpl =
+			(struct qla_bsg_auth_els_reply *)bsg_job->reply;
+
+		qla_pur_get_pending(vha, fcport, bsg_job);
+
+		ql_dbg(ql_dbg_edif, vha, 0x911d,
+			"%s %s %8phN sid=%x. xchg %x, nb=%xh bsg ptr %p\n",
+			__func__, sc_to_str(p->sub_cmd), fcport->port_name,
+			fcport->d_id.b24, rpl->rx_xchg_address,
+			rpl->r.reply_payload_rcv_len, bsg_job);
+
+		goto done;
+	}
+	return 0;
+
+done:
+
+	bsg_job_done(bsg_job, bsg_reply->result,
+			bsg_reply->reply_payload_rcv_len);
+	return -EIO;
+}
+
+fc_port_t *
+qla2x00_find_fcport_by_pid(scsi_qla_host_t *vha, port_id_t *id)
+{
+	fc_port_t *f, *tf;
+
+	f = NULL;
+	list_for_each_entry_safe(f, tf, &vha->vp_fcports, list) {
+		if ((f->flags & FCF_FCSP_DEVICE)) {
+			ql_dbg(ql_dbg_edif + ql_dbg_verbose, vha, 0x2058,
+			    "Found secure fcport - nn %8phN pn %8phN portid=0x%x, 0x%x.\n",
+			    f->node_name, f->port_name,
+			    f->d_id.b24, id->b24);
+			if (f->d_id.b24 == id->b24)
+				return f;
+		}
+	}
+	return NULL;
+}
+
+/**
+ * qla_edif_app_check(): check for valid application id.
+ * @vha: host adapter pointer
+ * @appid: application id
+ * Return: false = fail, true = pass
+ */
+static bool
+qla_edif_app_check(scsi_qla_host_t *vha, struct app_id appid)
+{
+	/* check that the app is allow/known to the driver */
+
+	if (appid.app_vid == EDIF_APP_ID) {
+		ql_dbg(ql_dbg_edif + ql_dbg_verbose, vha, 0x911d, "%s app id ok\n", __func__);
+		return true;
+	}
+	ql_dbg(ql_dbg_edif, vha, 0x911d, "%s app id not ok (%x)",
+	    __func__, appid.app_vid);
+
+	return false;
+}
+
+static void qla_edif_reset_auth_wait(struct fc_port *fcport, int state,
+		int waitonly)
+{
+	int cnt, max_cnt = 200;
+	bool traced = false;
+
+	fcport->keep_nport_handle = 1;
+
+	if (!waitonly) {
+		qla2x00_set_fcport_disc_state(fcport, state);
+		qlt_schedule_sess_for_deletion(fcport);
+	} else {
+		qla2x00_set_fcport_disc_state(fcport, state);
+	}
+
+	ql_dbg(ql_dbg_edif, fcport->vha, 0xf086,
+		"%s: waiting for session, max_cnt=%u\n",
+		__func__, max_cnt);
+
+	cnt = 0;
+
+	if (waitonly) {
+		/* Marker wait min 10 msecs. */
+		msleep(50);
+		cnt += 50;
+	}
+	while (1) {
+		if (!traced) {
+			ql_dbg(ql_dbg_edif, fcport->vha, 0xf086,
+			    "%s: session sleep.\n",
+			    __func__);
+			traced = true;
+		}
+		msleep(20);
+		cnt++;
+		if (waitonly && (fcport->disc_state == state ||
+			fcport->disc_state == DSC_LOGIN_COMPLETE))
+			break;
+		if (fcport->disc_state == DSC_LOGIN_AUTH_PEND)
+			break;
+		if (cnt > max_cnt)
+			break;
+	}
+
+	if (!waitonly) {
+		ql_dbg(ql_dbg_edif, fcport->vha, 0xf086,
+		    "%s: waited for session - %8phC, loopid=%x portid=%06x fcport=%p state=%u, cnt=%u\n",
+		    __func__, fcport->port_name, fcport->loop_id,
+		    fcport->d_id.b24, fcport, fcport->disc_state, cnt);
+	} else {
+		ql_dbg(ql_dbg_edif, fcport->vha, 0xf086,
+		    "%s: waited ONLY for session - %8phC, loopid=%x portid=%06x fcport=%p state=%u, cnt=%u\n",
+		    __func__, fcport->port_name, fcport->loop_id,
+		    fcport->d_id.b24, fcport, fcport->disc_state, cnt);
+	}
+}
+
+static void
+qla_edif_free_sa_ctl(fc_port_t *fcport, struct edif_sa_ctl *sa_ctl,
+	int index)
+{
+	unsigned long flags = 0;
+
+	spin_lock_irqsave(&fcport->edif.sa_list_lock, flags);
+	list_del(&sa_ctl->next);
+	spin_unlock_irqrestore(&fcport->edif.sa_list_lock, flags);
+	if (index >= 512)
+		fcport->edif.tx_rekey_cnt--;
+	else
+		fcport->edif.rx_rekey_cnt--;
+	kfree(sa_ctl);
+}
+
+/* return an index to the freepool */
+static void qla_edif_add_sa_index_to_freepool(fc_port_t *fcport, int dir,
+		uint16_t sa_index)
+{
+	void *sa_id_map;
+	struct scsi_qla_host *vha = fcport->vha;
+	struct qla_hw_data *ha = vha->hw;
+	unsigned long flags = 0;
+	u16 lsa_index = sa_index;
+
+	ql_dbg(ql_dbg_edif + ql_dbg_verbose, vha, 0x3063,
+	    "%s: entry\n", __func__);
+
+	if (dir) {
+		sa_id_map = ha->edif_tx_sa_id_map;
+		lsa_index -= EDIF_TX_SA_INDEX_BASE;
+	} else {
+		sa_id_map = ha->edif_rx_sa_id_map;
+	}
+
+	spin_lock_irqsave(&ha->sadb_fp_lock, flags);
+	clear_bit(lsa_index, sa_id_map);
+	spin_unlock_irqrestore(&ha->sadb_fp_lock, flags);
+	ql_dbg(ql_dbg_edif, vha, 0x3063,
+	    "%s: index %d added to free pool\n", __func__, sa_index);
+}
+
+static void __qla2x00_release_all_sadb(struct scsi_qla_host *vha,
+	struct fc_port *fcport, struct edif_sa_index_entry *entry,
+	int pdir)
+{
+	struct edif_list_entry *edif_entry;
+	struct  edif_sa_ctl *sa_ctl;
+	int i, dir;
+	int key_cnt = 0;
+
+	for (i = 0; i < 2; i++) {
+		if (entry->sa_pair[i].sa_index == INVALID_EDIF_SA_INDEX)
+			continue;
+
+		if (fcport->loop_id != entry->handle) {
+			ql_dbg(ql_dbg_edif, vha, 0x3063,
+			    "%s: ** WARNING %d** entry handle: 0x%x, lid: 0x%x, sa_index: %d\n",
+			    __func__, i, entry->handle, fcport->loop_id,
+			    entry->sa_pair[i].sa_index);
+		}
+
+		/* release the sa_ctl */
+		sa_ctl = qla_edif_find_sa_ctl_by_index(fcport,
+				entry->sa_pair[i].sa_index, pdir);
+		if (sa_ctl &&
+		    qla_edif_find_sa_ctl_by_index(fcport, sa_ctl->index, pdir)) {
+			ql_dbg(ql_dbg_edif, vha, 0x3063,
+			    "%s: freeing sa_ctl for index %d\n", __func__, sa_ctl->index);
+			qla_edif_free_sa_ctl(fcport, sa_ctl, sa_ctl->index);
+		} else {
+			ql_dbg(ql_dbg_edif, vha, 0x3063,
+			    "%s: sa_ctl NOT freed, sa_ctl: %p\n", __func__, sa_ctl);
+		}
+
+		/* Release the index */
+		ql_dbg(ql_dbg_edif, vha, 0x3063,
+			"%s: freeing sa_index %d, nph: 0x%x\n",
+			__func__, entry->sa_pair[i].sa_index, entry->handle);
+
+		dir = (entry->sa_pair[i].sa_index <
+			EDIF_TX_SA_INDEX_BASE) ? 0 : 1;
+		qla_edif_add_sa_index_to_freepool(fcport, dir,
+			entry->sa_pair[i].sa_index);
+
+		/* Delete timer on RX */
+		if (pdir != SAU_FLG_TX) {
+			edif_entry =
+				qla_edif_list_find_sa_index(fcport, entry->handle);
+			if (edif_entry) {
+				ql_dbg(ql_dbg_edif, vha, 0x5033,
+				    "%s: remove edif_entry %p, update_sa_index: 0x%x, delete_sa_index: 0x%x\n",
+				    __func__, edif_entry, edif_entry->update_sa_index,
+				    edif_entry->delete_sa_index);
+				qla_edif_list_delete_sa_index(fcport, edif_entry);
+				/*
+				 * valid delete_sa_index indicates there is a rx
+				 * delayed delete queued
+				 */
+				if (edif_entry->delete_sa_index !=
+						INVALID_EDIF_SA_INDEX) {
+					del_timer(&edif_entry->timer);
+
+					/* build and send the aen */
+					fcport->edif.rx_sa_set = 1;
+					fcport->edif.rx_sa_pending = 0;
+					qla_edb_eventcreate(vha,
+							VND_CMD_AUTH_STATE_SAUPDATE_COMPL,
+							QL_VND_SA_STAT_SUCCESS,
+							QL_VND_RX_SA_KEY, fcport);
+				}
+				ql_dbg(ql_dbg_edif, vha, 0x5033,
+				    "%s: release edif_entry %p, update_sa_index: 0x%x, delete_sa_index: 0x%x\n",
+				    __func__, edif_entry, edif_entry->update_sa_index,
+				    edif_entry->delete_sa_index);
+
+				kfree(edif_entry);
+			}
+		}
+		key_cnt++;
+	}
+	ql_dbg(ql_dbg_edif, vha, 0x3063,
+	    "%s: %d %s keys released\n",
+	    __func__, key_cnt, pdir ? "tx" : "rx");
+}
+
+/* find an release all outstanding sadb sa_indicies */
+void qla2x00_release_all_sadb(struct scsi_qla_host *vha, struct fc_port *fcport)
+{
+	struct edif_sa_index_entry *entry, *tmp;
+	struct qla_hw_data *ha = vha->hw;
+	unsigned long flags;
+
+	ql_dbg(ql_dbg_edif + ql_dbg_verbose, vha, 0x3063,
+	    "%s: Starting...\n", __func__);
+
+	spin_lock_irqsave(&ha->sadb_lock, flags);
+
+	list_for_each_entry_safe(entry, tmp, &ha->sadb_rx_index_list, next) {
+		if (entry->fcport == fcport) {
+			list_del(&entry->next);
+			spin_unlock_irqrestore(&ha->sadb_lock, flags);
+			__qla2x00_release_all_sadb(vha, fcport, entry, 0);
+			kfree(entry);
+			spin_lock_irqsave(&ha->sadb_lock, flags);
+			break;
+		}
+	}
+
+	list_for_each_entry_safe(entry, tmp, &ha->sadb_tx_index_list, next) {
+		if (entry->fcport == fcport) {
+			list_del(&entry->next);
+			spin_unlock_irqrestore(&ha->sadb_lock, flags);
+
+			__qla2x00_release_all_sadb(vha, fcport, entry, SAU_FLG_TX);
+
+			kfree(entry);
+			spin_lock_irqsave(&ha->sadb_lock, flags);
+			break;
+		}
+	}
+	spin_unlock_irqrestore(&ha->sadb_lock, flags);
+}
+
+/**
+ * qla_edif_app_start:  application has announce its present
+ * @vha: host adapter pointer
+ * @bsg_job: user request
+ *
+ * Set/activate doorbell.  Reset current sessions and re-login with
+ * secure flag.
+ */
+static int
+qla_edif_app_start(scsi_qla_host_t *vha, struct bsg_job *bsg_job)
+{
+	int32_t			rval = 0;
+	struct fc_bsg_reply	*bsg_reply = bsg_job->reply;
+	struct app_start	appstart;
+	struct app_start_reply	appreply;
+	struct fc_port  *fcport, *tf;
+
+	ql_dbg(ql_dbg_edif, vha, 0x911d, "%s app start\n", __func__);
+
+	sg_copy_to_buffer(bsg_job->request_payload.sg_list,
+	    bsg_job->request_payload.sg_cnt, &appstart,
+	    sizeof(struct app_start));
+
+	ql_dbg(ql_dbg_edif, vha, 0x911d, "%s app_vid=%x app_start_flags %x\n",
+	     __func__, appstart.app_info.app_vid, appstart.app_start_flags);
+
+	if (vha->e_dbell.db_flags != EDB_ACTIVE) {
+		/* mark doorbell as active since an app is now present */
+		vha->e_dbell.db_flags = EDB_ACTIVE;
+	} else {
+		ql_dbg(ql_dbg_edif, vha, 0x911e, "%s doorbell already active\n",
+		     __func__);
+	}
+
+	if (N2N_TOPO(vha->hw)) {
+		if (vha->hw->flags.n2n_fw_acc_sec)
+			set_bit(N2N_LINK_RESET, &vha->dpc_flags);
+		else
+			set_bit(ISP_ABORT_NEEDED, &vha->dpc_flags);
+		qla2xxx_wake_dpc(vha);
+	} else {
+		list_for_each_entry_safe(fcport, tf, &vha->vp_fcports, list) {
+			ql_dbg(ql_dbg_edif, vha, 0xf084,
+			       "%s: sess %p %8phC lid %#04x s_id %06x logout %d\n",
+			       __func__, fcport, fcport->port_name,
+			       fcport->loop_id, fcport->d_id.b24,
+			       fcport->logout_on_delete);
+
+			ql_dbg(ql_dbg_edif, vha, 0xf084,
+			       "keep %d els_logo %d disc state %d auth state %d stop state %d\n",
+			       fcport->keep_nport_handle,
+			       fcport->send_els_logo, fcport->disc_state,
+			       fcport->edif.auth_state, fcport->edif.app_stop);
+
+			if (atomic_read(&vha->loop_state) == LOOP_DOWN)
+				break;
+			if (!(fcport->flags & FCF_FCSP_DEVICE))
+				continue;
+
+			fcport->edif.app_started = 1;
+			if (fcport->edif.app_stop ||
+			    (fcport->disc_state != DSC_LOGIN_COMPLETE &&
+			     fcport->disc_state != DSC_LOGIN_PEND &&
+			     fcport->disc_state != DSC_DELETED)) {
+				/* no activity */
+				fcport->edif.app_stop = 0;
+
+				ql_dbg(ql_dbg_edif, vha, 0x911e,
+				       "%s wwpn %8phC calling qla_edif_reset_auth_wait\n",
+				       __func__, fcport->port_name);
+				fcport->edif.app_sess_online = 1;
+				qla_edif_reset_auth_wait(fcport, DSC_LOGIN_PEND, 0);
+			}
+			qla_edif_sa_ctl_init(vha, fcport);
+		}
+	}
+
+	if (vha->pur_cinfo.enode_flags != ENODE_ACTIVE) {
+		/* mark as active since an app is now present */
+		vha->pur_cinfo.enode_flags = ENODE_ACTIVE;
+	} else {
+		ql_dbg(ql_dbg_edif, vha, 0x911f, "%s enode already active\n",
+		     __func__);
+	}
+
+	appreply.host_support_edif = vha->hw->flags.edif_enabled;
+	appreply.edif_enode_active = vha->pur_cinfo.enode_flags;
+	appreply.edif_edb_active = vha->e_dbell.db_flags;
+
+	bsg_job->reply_len = sizeof(struct fc_bsg_reply) +
+	    sizeof(struct app_start_reply);
+
+	SET_DID_STATUS(bsg_reply->result, DID_OK);
+
+	sg_copy_from_buffer(bsg_job->reply_payload.sg_list,
+	    bsg_job->reply_payload.sg_cnt, &appreply,
+	    sizeof(struct app_start_reply));
+
+	ql_dbg(ql_dbg_edif, vha, 0x911d,
+	    "%s app start completed with 0x%x\n",
+	    __func__, rval);
+
+	return rval;
+}
+
+/**
+ * qla_edif_app_stop - app has announced it's exiting.
+ * @vha: host adapter pointer
+ * @bsg_job: user space command pointer
+ *
+ * Free any in flight messages, clear all doorbell events
+ * to application. Reject any message relate to security.
+ */
+static int
+qla_edif_app_stop(scsi_qla_host_t *vha, struct bsg_job *bsg_job)
+{
+	struct app_stop         appstop;
+	struct fc_bsg_reply     *bsg_reply = bsg_job->reply;
+	struct fc_port  *fcport, *tf;
+
+	sg_copy_to_buffer(bsg_job->request_payload.sg_list,
+	    bsg_job->request_payload.sg_cnt, &appstop,
+	    sizeof(struct app_stop));
+
+	ql_dbg(ql_dbg_edif, vha, 0x911d, "%s Stopping APP: app_vid=%x\n",
+	    __func__, appstop.app_info.app_vid);
+
+	/* Call db stop and enode stop functions */
+
+	/* if we leave this running short waits are operational < 16 secs */
+	qla_enode_stop(vha);        /* stop enode */
+	qla_edb_stop(vha);          /* stop db */
+
+	list_for_each_entry_safe(fcport, tf, &vha->vp_fcports, list) {
+		if (!(fcport->flags & FCF_FCSP_DEVICE))
+			continue;
+
+		if (fcport->flags & FCF_FCSP_DEVICE) {
+			ql_dbg(ql_dbg_edif, vha, 0xf084,
+			    "%s: sess %p from port %8phC lid %#04x s_id %06x logout %d keep %d els_logo %d\n",
+			    __func__, fcport,
+			    fcport->port_name, fcport->loop_id, fcport->d_id.b24,
+			    fcport->logout_on_delete, fcport->keep_nport_handle,
+			    fcport->send_els_logo);
+
+			if (atomic_read(&vha->loop_state) == LOOP_DOWN)
+				break;
+
+			fcport->edif.app_stop = 1;
+			ql_dbg(ql_dbg_edif, vha, 0x911e,
+				"%s wwpn %8phC calling qla_edif_reset_auth_wait\n",
+				__func__, fcport->port_name);
+
+			fcport->send_els_logo = 1;
+			qlt_schedule_sess_for_deletion(fcport);
+
+			/* qla_edif_flush_sa_ctl_lists(fcport); */
+			fcport->edif.app_started = 0;
+		}
+	}
+
+	bsg_job->reply_len = sizeof(struct fc_bsg_reply);
+	SET_DID_STATUS(bsg_reply->result, DID_OK);
+
+	/* no return interface to app - it assumes we cleaned up ok */
+
+	return 0;
+}
+
+static int
+qla_edif_app_chk_sa_update(scsi_qla_host_t *vha, fc_port_t *fcport,
+		struct app_plogi_reply *appplogireply)
+{
+	int	ret = 0;
+
+	if (!(fcport->edif.rx_sa_set && fcport->edif.tx_sa_set)) {
+		ql_dbg(ql_dbg_edif, vha, 0x911e,
+		    "%s: wwpn %8phC Both SA indexes has not been SET TX %d, RX %d.\n",
+		    __func__, fcport->port_name, fcport->edif.tx_sa_set,
+		    fcport->edif.rx_sa_set);
+		appplogireply->prli_status = 0;
+		ret = 1;
+	} else  {
+		ql_dbg(ql_dbg_edif, vha, 0x911e,
+		    "%s wwpn %8phC Both SA(s) updated.\n", __func__,
+		    fcport->port_name);
+		fcport->edif.rx_sa_set = fcport->edif.tx_sa_set = 0;
+		fcport->edif.rx_sa_pending = fcport->edif.tx_sa_pending = 0;
+		appplogireply->prli_status = 1;
+	}
+	return ret;
+}
+
+/**
+ * qla_edif_app_authok - authentication by app succeeded.  Driver can proceed
+ *   with prli
+ * @vha: host adapter pointer
+ * @bsg_job: user request
+ */
+static int
+qla_edif_app_authok(scsi_qla_host_t *vha, struct bsg_job *bsg_job)
+{
+	int32_t			rval = 0;
+	struct auth_complete_cmd appplogiok;
+	struct app_plogi_reply	appplogireply = {0};
+	struct fc_bsg_reply	*bsg_reply = bsg_job->reply;
+	fc_port_t		*fcport = NULL;
+	port_id_t		portid = {0};
+
+	sg_copy_to_buffer(bsg_job->request_payload.sg_list,
+	    bsg_job->request_payload.sg_cnt, &appplogiok,
+	    sizeof(struct auth_complete_cmd));
+
+	switch (appplogiok.type) {
+	case PL_TYPE_WWPN:
+		fcport = qla2x00_find_fcport_by_wwpn(vha,
+		    appplogiok.u.wwpn, 0);
+		if (!fcport)
+			ql_dbg(ql_dbg_edif, vha, 0x911d,
+			    "%s wwpn lookup failed: %8phC\n",
+			    __func__, appplogiok.u.wwpn);
+		break;
+	case PL_TYPE_DID:
+		fcport = qla2x00_find_fcport_by_pid(vha, &appplogiok.u.d_id);
+		if (!fcport)
+			ql_dbg(ql_dbg_edif, vha, 0x911d,
+			    "%s d_id lookup failed: %x\n", __func__,
+			    portid.b24);
+		break;
+	default:
+		ql_dbg(ql_dbg_edif, vha, 0x911d,
+		    "%s undefined type: %x\n", __func__,
+		    appplogiok.type);
+		break;
+	}
+
+	if (!fcport) {
+		SET_DID_STATUS(bsg_reply->result, DID_ERROR);
+		goto errstate_exit;
+	}
+
+	/*
+	 * if port is online then this is a REKEY operation
+	 * Only do sa update checking
+	 */
+	if (atomic_read(&fcport->state) == FCS_ONLINE) {
+		ql_dbg(ql_dbg_edif, vha, 0x911d,
+		    "%s Skipping PRLI complete based on rekey\n", __func__);
+		appplogireply.prli_status = 1;
+		SET_DID_STATUS(bsg_reply->result, DID_OK);
+		qla_edif_app_chk_sa_update(vha, fcport, &appplogireply);
+		goto errstate_exit;
+	}
+
+	/* make sure in AUTH_PENDING or else reject */
+	if (fcport->disc_state != DSC_LOGIN_AUTH_PEND) {
+		ql_dbg(ql_dbg_edif, vha, 0x911e,
+		    "%s wwpn %8phC is not in auth pending state (%x)\n",
+		    __func__, fcport->port_name, fcport->disc_state);
+		SET_DID_STATUS(bsg_reply->result, DID_OK);
+		appplogireply.prli_status = 0;
+		goto errstate_exit;
+	}
+
+	SET_DID_STATUS(bsg_reply->result, DID_OK);
+	appplogireply.prli_status = 1;
+	fcport->edif.authok = 1;
+	if (!(fcport->edif.rx_sa_set && fcport->edif.tx_sa_set)) {
+		ql_dbg(ql_dbg_edif, vha, 0x911e,
+		    "%s: wwpn %8phC Both SA indexes has not been SET TX %d, RX %d.\n",
+		    __func__, fcport->port_name, fcport->edif.tx_sa_set,
+		    fcport->edif.rx_sa_set);
+		SET_DID_STATUS(bsg_reply->result, DID_OK);
+		appplogireply.prli_status = 0;
+		goto errstate_exit;
+
+	} else {
+		ql_dbg(ql_dbg_edif, vha, 0x911e,
+		    "%s wwpn %8phC Both SA(s) updated.\n", __func__,
+		    fcport->port_name);
+		fcport->edif.rx_sa_set = fcport->edif.tx_sa_set = 0;
+		fcport->edif.rx_sa_pending = fcport->edif.tx_sa_pending = 0;
+	}
+
+	if (qla_ini_mode_enabled(vha)) {
+		ql_dbg(ql_dbg_edif, vha, 0x911e,
+		    "%s AUTH complete - RESUME with prli for wwpn %8phC\n",
+		    __func__, fcport->port_name);
+		qla_edif_reset_auth_wait(fcport, DSC_LOGIN_PEND, 1);
+		qla24xx_post_prli_work(vha, fcport);
+	}
+
+errstate_exit:
+	bsg_job->reply_len = sizeof(struct fc_bsg_reply);
+	sg_copy_from_buffer(bsg_job->reply_payload.sg_list,
+	    bsg_job->reply_payload.sg_cnt, &appplogireply,
+	    sizeof(struct app_plogi_reply));
+
+	return rval;
+}
+
+/**
+ * qla_edif_app_authfail - authentication by app has failed.  Driver is given
+ *   notice to tear down current session.
+ * @vha: host adapter pointer
+ * @bsg_job: user request
+ */
+static int
+qla_edif_app_authfail(scsi_qla_host_t *vha, struct bsg_job *bsg_job)
+{
+	int32_t			rval = 0;
+	struct auth_complete_cmd appplogifail;
+	struct fc_bsg_reply	*bsg_reply = bsg_job->reply;
+	fc_port_t		*fcport = NULL;
+	port_id_t		portid = {0};
+
+	ql_dbg(ql_dbg_edif, vha, 0x911d, "%s app auth fail\n", __func__);
+
+	sg_copy_to_buffer(bsg_job->request_payload.sg_list,
+	    bsg_job->request_payload.sg_cnt, &appplogifail,
+	    sizeof(struct auth_complete_cmd));
+
+	/*
+	 * TODO: edif: app has failed this plogi. Inform driver to
+	 * take any action (if any).
+	 */
+	switch (appplogifail.type) {
+	case PL_TYPE_WWPN:
+		fcport = qla2x00_find_fcport_by_wwpn(vha,
+		    appplogifail.u.wwpn, 0);
+		SET_DID_STATUS(bsg_reply->result, DID_OK);
+		break;
+	case PL_TYPE_DID:
+		fcport = qla2x00_find_fcport_by_pid(vha, &appplogifail.u.d_id);
+		if (!fcport)
+			ql_dbg(ql_dbg_edif, vha, 0x911d,
+			    "%s d_id lookup failed: %x\n", __func__,
+			    portid.b24);
+		SET_DID_STATUS(bsg_reply->result, DID_OK);
+		break;
+	default:
+		ql_dbg(ql_dbg_edif, vha, 0x911e,
+		    "%s undefined type: %x\n", __func__,
+		    appplogifail.type);
+		bsg_job->reply_len = sizeof(struct fc_bsg_reply);
+		SET_DID_STATUS(bsg_reply->result, DID_ERROR);
+		rval = -1;
+		break;
+	}
+
+	ql_dbg(ql_dbg_edif, vha, 0x911d,
+	    "%s fcport is 0x%p\n", __func__, fcport);
+
+	if (fcport) {
+		/* set/reset edif values and flags */
+		ql_dbg(ql_dbg_edif, vha, 0x911e,
+		    "%s reset the auth process - %8phC, loopid=%x portid=%06x.\n",
+		    __func__, fcport->port_name, fcport->loop_id, fcport->d_id.b24);
+
+		if (qla_ini_mode_enabled(fcport->vha)) {
+			fcport->send_els_logo = 1;
+			qla_edif_reset_auth_wait(fcport, DSC_LOGIN_PEND, 0);
+		}
+	}
+
+	return rval;
+}
+
+/**
+ * qla_edif_app_getfcinfo - app would like to read session info (wwpn, nportid,
+ *   [initiator|target] mode.  It can specific session with specific nport id or
+ *   all sessions.
+ * @vha: host adapter pointer
+ * @bsg_job: user request pointer
+ */
+static int
+qla_edif_app_getfcinfo(scsi_qla_host_t *vha, struct bsg_job *bsg_job)
+{
+	int32_t			rval = 0;
+	int32_t			num_cnt;
+	struct fc_bsg_reply	*bsg_reply = bsg_job->reply;
+	struct app_pinfo_req	app_req;
+	struct app_pinfo_reply	*app_reply;
+	port_id_t		tdid;
+
+	ql_dbg(ql_dbg_edif, vha, 0x911d, "%s app get fcinfo\n", __func__);
+
+	sg_copy_to_buffer(bsg_job->request_payload.sg_list,
+	    bsg_job->request_payload.sg_cnt, &app_req,
+	    sizeof(struct app_pinfo_req));
+
+	num_cnt = app_req.num_ports;	/* num of ports alloc'd by app */
+
+	app_reply = kzalloc((sizeof(struct app_pinfo_reply) +
+	    sizeof(struct app_pinfo) * num_cnt), GFP_KERNEL);
+	if (!app_reply) {
+		SET_DID_STATUS(bsg_reply->result, DID_ERROR);
+		rval = -1;
+	} else {
+		struct fc_port	*fcport = NULL, *tf;
+		uint32_t	pcnt = 0;
+
+		list_for_each_entry_safe(fcport, tf, &vha->vp_fcports, list) {
+			if (!(fcport->flags & FCF_FCSP_DEVICE))
+				continue;
+
+			tdid = app_req.remote_pid;
+
+			ql_dbg(ql_dbg_edif, vha, 0x2058,
+			    "APP request entry - portid=%06x.\n", tdid.b24);
+
+			/* Ran out of space */
+			if (pcnt > app_req.num_ports)
+				break;
+
+			if (tdid.b24 != 0 && tdid.b24 != fcport->d_id.b24)
+				continue;
+
+			app_reply->ports[pcnt].rekey_count =
+				fcport->edif.rekey_cnt;
+
+			app_reply->ports[pcnt].remote_type =
+				VND_CMD_RTYPE_UNKNOWN;
+			if (fcport->port_type & (FCT_NVME_TARGET | FCT_TARGET))
+				app_reply->ports[pcnt].remote_type |=
+					VND_CMD_RTYPE_TARGET;
+			if (fcport->port_type & (FCT_NVME_INITIATOR | FCT_INITIATOR))
+				app_reply->ports[pcnt].remote_type |=
+					VND_CMD_RTYPE_INITIATOR;
+
+			app_reply->ports[pcnt].remote_pid = fcport->d_id;
+
+			ql_dbg(ql_dbg_edif, vha, 0x2058,
+			    "Found FC_SP fcport - nn %8phN pn %8phN pcnt %d portid=%06x secure %d.\n",
+			    fcport->node_name, fcport->port_name, pcnt,
+			    fcport->d_id.b24, fcport->flags & FCF_FCSP_DEVICE);
+
+			switch (fcport->edif.auth_state) {
+			case VND_CMD_AUTH_STATE_ELS_RCVD:
+				if (fcport->disc_state == DSC_LOGIN_AUTH_PEND) {
+					fcport->edif.auth_state = VND_CMD_AUTH_STATE_NEEDED;
+					app_reply->ports[pcnt].auth_state =
+						VND_CMD_AUTH_STATE_NEEDED;
+				} else {
+					app_reply->ports[pcnt].auth_state =
+						VND_CMD_AUTH_STATE_ELS_RCVD;
+				}
+				break;
+			default:
+				app_reply->ports[pcnt].auth_state = fcport->edif.auth_state;
+				break;
+			}
+
+			memcpy(app_reply->ports[pcnt].remote_wwpn,
+			    fcport->port_name, 8);
+
+			app_reply->ports[pcnt].remote_state =
+				(atomic_read(&fcport->state) ==
+				    FCS_ONLINE ? 1 : 0);
+
+			pcnt++;
+
+			if (tdid.b24 != 0)
+				break;
+		}
+		app_reply->port_count = pcnt;
+		SET_DID_STATUS(bsg_reply->result, DID_OK);
+	}
+
+	sg_copy_from_buffer(bsg_job->reply_payload.sg_list,
+	    bsg_job->reply_payload.sg_cnt, app_reply,
+	    sizeof(struct app_pinfo_reply) + sizeof(struct app_pinfo) * num_cnt);
+
+	kfree(app_reply);
+
+	return rval;
+}
+
+/**
+ * qla_edif_app_getstats - app would like to read various statistics info
+ * @vha: host adapter pointer
+ * @bsg_job: user request
+ */
+static int32_t
+qla_edif_app_getstats(scsi_qla_host_t *vha, struct bsg_job *bsg_job)
+{
+	int32_t			rval = 0;
+	struct fc_bsg_reply	*bsg_reply = bsg_job->reply;
+	uint32_t ret_size, size;
+
+	struct app_sinfo_req	app_req;
+	struct app_stats_reply	*app_reply;
+
+	sg_copy_to_buffer(bsg_job->request_payload.sg_list,
+	    bsg_job->request_payload.sg_cnt, &app_req,
+	    sizeof(struct app_sinfo_req));
+	if (app_req.num_ports == 0) {
+		ql_dbg(ql_dbg_async, vha, 0x911d,
+		   "%s app did not indicate number of ports to return\n",
+		    __func__);
+		SET_DID_STATUS(bsg_reply->result, DID_ERROR);
+		rval = -1;
+	}
+
+	size = sizeof(struct app_stats_reply) +
+	    (sizeof(struct app_sinfo) * app_req.num_ports);
+
+	if (size > bsg_job->reply_payload.payload_len)
+		ret_size = bsg_job->reply_payload.payload_len;
+	else
+		ret_size = size;
+
+	app_reply = kzalloc(size, GFP_KERNEL);
+	if (!app_reply) {
+		SET_DID_STATUS(bsg_reply->result, DID_ERROR);
+		rval = -1;
+	} else {
+		struct fc_port	*fcport = NULL, *tf;
+		uint32_t	pcnt = 0;
+
+		list_for_each_entry_safe(fcport, tf, &vha->vp_fcports, list) {
+			if (fcport->edif.enable) {
+				if (pcnt > app_req.num_ports)
+					break;
+
+				app_reply->elem[pcnt].rekey_count =
+				    fcport->edif.rekey_cnt;
+				app_reply->elem[pcnt].tx_bytes =
+				    fcport->edif.tx_bytes;
+				app_reply->elem[pcnt].rx_bytes =
+				    fcport->edif.rx_bytes;
+
+				memcpy(app_reply->elem[pcnt].remote_wwpn,
+				    fcport->port_name, 8);
+
+				pcnt++;
+			}
+		}
+		app_reply->elem_count = pcnt;
+		SET_DID_STATUS(bsg_reply->result, DID_OK);
+	}
+
+	bsg_reply->reply_payload_rcv_len =
+	    sg_copy_from_buffer(bsg_job->reply_payload.sg_list,
+	       bsg_job->reply_payload.sg_cnt, app_reply, ret_size);
+
+	kfree(app_reply);
+
+	return rval;
+}
+
+int32_t
+qla_edif_app_mgmt(struct bsg_job *bsg_job)
+{
+	struct fc_bsg_request	*bsg_request = bsg_job->request;
+	struct fc_bsg_reply	*bsg_reply = bsg_job->reply;
+	struct Scsi_Host *host = fc_bsg_to_shost(bsg_job);
+	scsi_qla_host_t		*vha = shost_priv(host);
+	struct app_id		appcheck;
+	bool done = true;
+	int32_t         rval = 0;
+	uint32_t	vnd_sc = bsg_request->rqst_data.h_vendor.vendor_cmd[1];
+
+	ql_dbg(ql_dbg_edif, vha, 0x911d, "%s vnd subcmd=%x\n",
+	    __func__, vnd_sc);
+
+	sg_copy_to_buffer(bsg_job->request_payload.sg_list,
+	    bsg_job->request_payload.sg_cnt, &appcheck,
+	    sizeof(struct app_id));
+
+	if (!vha->hw->flags.edif_enabled ||
+		test_bit(VPORT_DELETE, &vha->dpc_flags)) {
+		ql_dbg(ql_dbg_edif, vha, 0x911d,
+		    "%s edif not enabled or vp delete. bsg ptr done %p. dpc_flags %lx\n",
+		    __func__, bsg_job, vha->dpc_flags);
+
+		SET_DID_STATUS(bsg_reply->result, DID_ERROR);
+		goto done;
+	}
+
+	if (!qla_edif_app_check(vha, appcheck)) {
+		ql_dbg(ql_dbg_edif, vha, 0x911d,
+		    "%s app checked failed.\n",
+		    __func__);
+
+		bsg_job->reply_len = sizeof(struct fc_bsg_reply);
+		SET_DID_STATUS(bsg_reply->result, DID_ERROR);
+		goto done;
+	}
+
+	switch (vnd_sc) {
+	case QL_VND_SC_SA_UPDATE:
+		done = false;
+		rval = qla24xx_sadb_update(bsg_job);
+		break;
+	case QL_VND_SC_APP_START:
+		rval = qla_edif_app_start(vha, bsg_job);
+		break;
+	case QL_VND_SC_APP_STOP:
+		rval = qla_edif_app_stop(vha, bsg_job);
+		break;
+	case QL_VND_SC_AUTH_OK:
+		rval = qla_edif_app_authok(vha, bsg_job);
+		break;
+	case QL_VND_SC_AUTH_FAIL:
+		rval = qla_edif_app_authfail(vha, bsg_job);
+		break;
+	case QL_VND_SC_GET_FCINFO:
+		rval = qla_edif_app_getfcinfo(vha, bsg_job);
+		break;
+	case QL_VND_SC_GET_STATS:
+		rval = qla_edif_app_getstats(vha, bsg_job);
+		break;
+	default:
+		ql_dbg(ql_dbg_edif, vha, 0x911d, "%s unknown cmd=%x\n",
+		    __func__,
+		    bsg_request->rqst_data.h_vendor.vendor_cmd[1]);
+		rval = EXT_STATUS_INVALID_PARAM;
+		bsg_job->reply_len = sizeof(struct fc_bsg_reply);
+		SET_DID_STATUS(bsg_reply->result, DID_ERROR);
+		break;
+	}
+
+done:
+	if (done) {
+		ql_dbg(ql_dbg_user, vha, 0x7009,
+		    "%s: %d  bsg ptr done %p\n", __func__, __LINE__, bsg_job);
+		bsg_job_done(bsg_job, bsg_reply->result,
+		    bsg_reply->reply_payload_rcv_len);
+	}
+
+	return rval;
+}
+
+static struct edif_sa_ctl *
+qla_edif_add_sa_ctl(fc_port_t *fcport, struct qla_sa_update_frame *sa_frame,
+	int dir)
+{
+	struct	edif_sa_ctl *sa_ctl;
+	struct qla_sa_update_frame *sap;
+	int	index = sa_frame->fast_sa_index;
+	unsigned long flags = 0;
+
+	sa_ctl = kzalloc(sizeof(*sa_ctl), GFP_KERNEL);
+	if (!sa_ctl) {
+		/* couldn't get space */
+		ql_dbg(ql_dbg_edif, fcport->vha, 0x9100,
+		    "unable to allocate SA CTL\n");
+		return NULL;
+	}
+
+	/*
+	 * need to allocate sa_index here and save it
+	 * in both sa_ctl->index and sa_frame->fast_sa_index;
+	 * If alloc fails then delete sa_ctl and return NULL
+	 */
+	INIT_LIST_HEAD(&sa_ctl->next);
+	sap = &sa_ctl->sa_frame;
+	*sap = *sa_frame;
+	sa_ctl->index = index;
+	sa_ctl->fcport = fcport;
+	sa_ctl->flags = 0;
+	sa_ctl->state = 0L;
+	ql_dbg(ql_dbg_edif, fcport->vha, 0x9100,
+	    "%s: Added sa_ctl %p, index %d, state 0x%lx\n",
+	    __func__, sa_ctl, sa_ctl->index, sa_ctl->state);
+	spin_lock_irqsave(&fcport->edif.sa_list_lock, flags);
+	if (dir == SAU_FLG_TX)
+		list_add_tail(&sa_ctl->next, &fcport->edif.tx_sa_list);
+	else
+		list_add_tail(&sa_ctl->next, &fcport->edif.rx_sa_list);
+	spin_unlock_irqrestore(&fcport->edif.sa_list_lock, flags);
+
+	return sa_ctl;
+}
+
+void
+qla_edif_flush_sa_ctl_lists(fc_port_t *fcport)
+{
+	struct edif_sa_ctl *sa_ctl, *tsa_ctl;
+	unsigned long flags = 0;
+
+	spin_lock_irqsave(&fcport->edif.sa_list_lock, flags);
+
+	list_for_each_entry_safe(sa_ctl, tsa_ctl, &fcport->edif.tx_sa_list,
+	    next) {
+		list_del(&sa_ctl->next);
+		kfree(sa_ctl);
+	}
+
+	list_for_each_entry_safe(sa_ctl, tsa_ctl, &fcport->edif.rx_sa_list,
+	    next) {
+		list_del(&sa_ctl->next);
+		kfree(sa_ctl);
+	}
+
+	spin_unlock_irqrestore(&fcport->edif.sa_list_lock, flags);
+}
+
+struct edif_sa_ctl *
+qla_edif_find_sa_ctl_by_index(fc_port_t *fcport, int index, int dir)
+{
+	struct edif_sa_ctl *sa_ctl, *tsa_ctl;
+	struct list_head *sa_list;
+
+	if (dir == SAU_FLG_TX)
+		sa_list = &fcport->edif.tx_sa_list;
+	else
+		sa_list = &fcport->edif.rx_sa_list;
+
+	list_for_each_entry_safe(sa_ctl, tsa_ctl, sa_list, next) {
+		if (test_bit(EDIF_SA_CTL_USED, &sa_ctl->state) &&
+		    sa_ctl->index == index)
+			return sa_ctl;
+	}
+	return NULL;
+}
+
+/* add the sa to the correct list */
+static int
+qla24xx_check_sadb_avail_slot(struct bsg_job *bsg_job, fc_port_t *fcport,
+	struct qla_sa_update_frame *sa_frame)
+{
+	struct edif_sa_ctl *sa_ctl = NULL;
+	int dir;
+	uint16_t sa_index;
+
+	dir = (sa_frame->flags & SAU_FLG_TX);
+
+	/* map the spi to an sa_index */
+	sa_index = qla_edif_sadb_get_sa_index(fcport, sa_frame);
+	if (sa_index == RX_DELETE_NO_EDIF_SA_INDEX) {
+		/* process rx delete */
+		ql_dbg(ql_dbg_edif, fcport->vha, 0x3063,
+		    "%s: rx delete for lid 0x%x, spi 0x%x, no entry found\n",
+		    __func__, fcport->loop_id, sa_frame->spi);
+
+		/* build and send the aen */
+		fcport->edif.rx_sa_set = 1;
+		fcport->edif.rx_sa_pending = 0;
+		qla_edb_eventcreate(fcport->vha,
+		    VND_CMD_AUTH_STATE_SAUPDATE_COMPL,
+		    QL_VND_SA_STAT_SUCCESS,
+		    QL_VND_RX_SA_KEY, fcport);
+
+		/* force a return of good bsg status; */
+		return RX_DELETE_NO_EDIF_SA_INDEX;
+	} else if (sa_index == INVALID_EDIF_SA_INDEX) {
+		ql_dbg(ql_dbg_edif, fcport->vha, 0x9100,
+		    "%s: Failed to get sa_index for spi 0x%x, dir: %d\n",
+		    __func__, sa_frame->spi, dir);
+		return INVALID_EDIF_SA_INDEX;
+	}
+
+	ql_dbg(ql_dbg_edif, fcport->vha, 0x9100,
+	    "%s: index %d allocated to spi 0x%x, dir: %d, nport_handle: 0x%x\n",
+	    __func__, sa_index, sa_frame->spi, dir, fcport->loop_id);
+
+	/* This is a local copy of sa_frame. */
+	sa_frame->fast_sa_index = sa_index;
+	/* create the sa_ctl */
+	sa_ctl = qla_edif_add_sa_ctl(fcport, sa_frame, dir);
+	if (!sa_ctl) {
+		ql_dbg(ql_dbg_edif, fcport->vha, 0x9100,
+		    "%s: Failed to add sa_ctl for spi 0x%x, dir: %d, sa_index: %d\n",
+		    __func__, sa_frame->spi, dir, sa_index);
+		return -1;
+	}
+
+	set_bit(EDIF_SA_CTL_USED, &sa_ctl->state);
+
+	if (dir == SAU_FLG_TX)
+		fcport->edif.tx_rekey_cnt++;
+	else
+		fcport->edif.rx_rekey_cnt++;
+
+	ql_dbg(ql_dbg_edif, fcport->vha, 0x9100,
+	    "%s: Found sa_ctl %p, index %d, state 0x%lx, tx_cnt %d, rx_cnt %d, nport_handle: 0x%x\n",
+	    __func__, sa_ctl, sa_ctl->index, sa_ctl->state,
+	    fcport->edif.tx_rekey_cnt,
+	    fcport->edif.rx_rekey_cnt, fcport->loop_id);
+
+	return 0;
+}
+
+#define QLA_SA_UPDATE_FLAGS_RX_KEY      0x0
+#define QLA_SA_UPDATE_FLAGS_TX_KEY      0x2
+
+int
+qla24xx_sadb_update(struct bsg_job *bsg_job)
+{
+	struct	fc_bsg_reply	*bsg_reply = bsg_job->reply;
+	struct Scsi_Host *host = fc_bsg_to_shost(bsg_job);
+	scsi_qla_host_t *vha = shost_priv(host);
+	fc_port_t		*fcport = NULL;
+	srb_t			*sp = NULL;
+	struct edif_list_entry *edif_entry = NULL;
+	int			found = 0;
+	int			rval = 0;
+	int result = 0;
+	struct qla_sa_update_frame sa_frame;
+	struct srb_iocb *iocb_cmd;
+
+	ql_dbg(ql_dbg_edif + ql_dbg_verbose, vha, 0x911d,
+	    "%s entered, vha: 0x%p\n", __func__, vha);
+
+	sg_copy_to_buffer(bsg_job->request_payload.sg_list,
+	    bsg_job->request_payload.sg_cnt, &sa_frame,
+	    sizeof(struct qla_sa_update_frame));
+
+	/* Check if host is online */
+	if (!vha->flags.online) {
+		ql_log(ql_log_warn, vha, 0x70a1, "Host is not online\n");
+		rval = -EIO;
+		SET_DID_STATUS(bsg_reply->result, DID_ERROR);
+		goto done;
+	}
+
+	if (vha->e_dbell.db_flags != EDB_ACTIVE) {
+		ql_log(ql_log_warn, vha, 0x70a1, "App not started\n");
+		rval = -EIO;
+		SET_DID_STATUS(bsg_reply->result, DID_ERROR);
+		goto done;
+	}
+
+	fcport = qla2x00_find_fcport_by_pid(vha, &sa_frame.port_id);
+	if (fcport) {
+		found = 1;
+		if (sa_frame.flags == QLA_SA_UPDATE_FLAGS_TX_KEY)
+			fcport->edif.tx_bytes = 0;
+		if (sa_frame.flags == QLA_SA_UPDATE_FLAGS_RX_KEY)
+			fcport->edif.rx_bytes = 0;
+	}
+
+	if (!found) {
+		ql_dbg(ql_dbg_edif, vha, 0x70a3, "Failed to find port= %06x\n",
+		    sa_frame.port_id.b24);
+		rval = -EINVAL;
+		SET_DID_STATUS(bsg_reply->result, DID_TARGET_FAILURE);
+		goto done;
+	}
+
+	/* make sure the nport_handle is valid */
+	if (fcport->loop_id == FC_NO_LOOP_ID) {
+		ql_dbg(ql_dbg_edif, vha, 0x70e1,
+		    "%s: %8phN lid=FC_NO_LOOP_ID, spi: 0x%x, DS %d, returning NO_CONNECT\n",
+		    __func__, fcport->port_name, sa_frame.spi,
+		    fcport->disc_state);
+		rval = -EINVAL;
+		SET_DID_STATUS(bsg_reply->result, DID_NO_CONNECT);
+		goto done;
+	}
+
+	/* allocate and queue an sa_ctl */
+	result = qla24xx_check_sadb_avail_slot(bsg_job, fcport, &sa_frame);
+
+	/* failure of bsg */
+	if (result == INVALID_EDIF_SA_INDEX) {
+		ql_dbg(ql_dbg_edif, vha, 0x70e1,
+		    "%s: %8phN, skipping update.\n",
+		    __func__, fcport->port_name);
+		rval = -EINVAL;
+		SET_DID_STATUS(bsg_reply->result, DID_ERROR);
+		goto done;
+
+	/* rx delete failure */
+	} else if (result == RX_DELETE_NO_EDIF_SA_INDEX) {
+		ql_dbg(ql_dbg_edif, vha, 0x70e1,
+		    "%s: %8phN, skipping rx delete.\n",
+		    __func__, fcport->port_name);
+		SET_DID_STATUS(bsg_reply->result, DID_OK);
+		goto done;
+	}
+
+	ql_dbg(ql_dbg_edif, vha, 0x70e1,
+	    "%s: %8phN, sa_index in sa_frame: %d flags %xh\n",
+	    __func__, fcport->port_name, sa_frame.fast_sa_index,
+	    sa_frame.flags);
+
+	/* looking for rx index and delete */
+	if (((sa_frame.flags & SAU_FLG_TX) == 0) &&
+	    (sa_frame.flags & SAU_FLG_INV)) {
+		uint16_t nport_handle = fcport->loop_id;
+		uint16_t sa_index = sa_frame.fast_sa_index;
+
+		/*
+		 * make sure we have an existing rx key, otherwise just process
+		 * this as a straight delete just like TX
+		 * This is NOT a normal case, it indicates an error recovery or key cleanup
+		 * by the ipsec code above us.
+		 */
+		edif_entry = qla_edif_list_find_sa_index(fcport, fcport->loop_id);
+		if (!edif_entry) {
+			ql_dbg(ql_dbg_edif, vha, 0x911d,
+			    "%s: WARNING: no active sa_index for nport_handle 0x%x, forcing delete for sa_index 0x%x\n",
+			    __func__, fcport->loop_id, sa_index);
+			goto force_rx_delete;
+		}
+
+		/*
+		 * if we have a forced delete for rx, remove the sa_index from the edif list
+		 * and proceed with normal delete.  The rx delay timer should not be running
+		 */
+		if ((sa_frame.flags & SAU_FLG_FORCE_DELETE) == SAU_FLG_FORCE_DELETE) {
+			qla_edif_list_delete_sa_index(fcport, edif_entry);
+			ql_dbg(ql_dbg_edif, vha, 0x911d,
+			    "%s: FORCE DELETE flag found for nport_handle 0x%x, sa_index 0x%x, forcing DELETE\n",
+			    __func__, fcport->loop_id, sa_index);
+			kfree(edif_entry);
+			goto force_rx_delete;
+		}
+
+		/*
+		 * delayed rx delete
+		 *
+		 * if delete_sa_index is not invalid then there is already
+		 * a delayed index in progress, return bsg bad status
+		 */
+		if (edif_entry->delete_sa_index != INVALID_EDIF_SA_INDEX) {
+			struct edif_sa_ctl *sa_ctl;
+
+			ql_dbg(ql_dbg_edif, vha, 0x911d,
+			    "%s: delete for lid 0x%x, delete_sa_index %d is pending\n",
+			    __func__, edif_entry->handle, edif_entry->delete_sa_index);
+
+			/* free up the sa_ctl that was allocated with the sa_index */
+			sa_ctl = qla_edif_find_sa_ctl_by_index(fcport, sa_index,
+			    (sa_frame.flags & SAU_FLG_TX));
+			if (sa_ctl) {
+				ql_dbg(ql_dbg_edif, vha, 0x3063,
+				    "%s: freeing sa_ctl for index %d\n",
+				    __func__, sa_ctl->index);
+				qla_edif_free_sa_ctl(fcport, sa_ctl, sa_ctl->index);
+			}
+
+			/* release the sa_index */
+			ql_dbg(ql_dbg_edif, vha, 0x3063,
+			    "%s: freeing sa_index %d, nph: 0x%x\n",
+			    __func__, sa_index, nport_handle);
+			qla_edif_sadb_delete_sa_index(fcport, nport_handle, sa_index);
+
+			rval = -EINVAL;
+			SET_DID_STATUS(bsg_reply->result, DID_ERROR);
+			goto done;
+		}
+
+		fcport->edif.rekey_cnt++;
+
+		/* configure and start the rx delay timer */
+		edif_entry->fcport = fcport;
+		edif_entry->timer.expires = jiffies + RX_DELAY_DELETE_TIMEOUT * HZ;
+
+		ql_dbg(ql_dbg_edif, vha, 0x911d,
+		    "%s: adding timer, entry: %p, delete sa_index %d, lid 0x%x to edif_list\n",
+		    __func__, edif_entry, sa_index, nport_handle);
+
+		/*
+		 * Start the timer when we queue the delayed rx delete.
+		 * This is an activity timer that goes off if we have not
+		 * received packets with the new sa_index
+		 */
+		add_timer(&edif_entry->timer);
+
+		/*
+		 * sa_delete for rx key with an active rx key including this one
+		 * add the delete rx sa index to the hash so we can look for it
+		 * in the rsp queue.  Do this after making any changes to the
+		 * edif_entry as part of the rx delete.
+		 */
+
+		ql_dbg(ql_dbg_edif, vha, 0x911d,
+		    "%s: delete sa_index %d, lid 0x%x to edif_list. bsg done ptr %p\n",
+		    __func__, sa_index, nport_handle, bsg_job);
+
+		edif_entry->delete_sa_index = sa_index;
+
+		bsg_job->reply_len = sizeof(struct fc_bsg_reply);
+		bsg_reply->result = DID_OK << 16;
+
+		goto done;
+
+	/*
+	 * rx index and update
+	 * add the index to the list and continue with normal update
+	 */
+	} else if (((sa_frame.flags & SAU_FLG_TX) == 0) &&
+	    ((sa_frame.flags & SAU_FLG_INV) == 0)) {
+		/* sa_update for rx key */
+		uint32_t nport_handle = fcport->loop_id;
+		uint16_t sa_index = sa_frame.fast_sa_index;
+		int result;
+
+		/*
+		 * add the update rx sa index to the hash so we can look for it
+		 * in the rsp queue and continue normally
+		 */
+
+		ql_dbg(ql_dbg_edif, vha, 0x911d,
+		    "%s:  adding update sa_index %d, lid 0x%x to edif_list\n",
+		    __func__, sa_index, nport_handle);
+
+		result = qla_edif_list_add_sa_update_index(fcport, sa_index,
+		    nport_handle);
+		if (result) {
+			ql_dbg(ql_dbg_edif, vha, 0x911d,
+			    "%s: SA_UPDATE failed to add new sa index %d to list for lid 0x%x\n",
+			    __func__, sa_index, nport_handle);
+		}
+	}
+	if (sa_frame.flags & SAU_FLG_GMAC_MODE)
+		fcport->edif.aes_gmac = 1;
+	else
+		fcport->edif.aes_gmac = 0;
+
+force_rx_delete:
+	/*
+	 * sa_update for both rx and tx keys, sa_delete for tx key
+	 * immediately process the request
+	 */
+	sp = qla2x00_get_sp(vha, fcport, GFP_KERNEL);
+	if (!sp) {
+		rval = -ENOMEM;
+		SET_DID_STATUS(bsg_reply->result, DID_IMM_RETRY);
+		goto done;
+	}
+
+	sp->type = SRB_SA_UPDATE;
+	sp->name = "bsg_sa_update";
+	sp->u.bsg_job = bsg_job;
+	/* sp->free = qla2x00_bsg_sp_free; */
+	sp->free = qla2x00_rel_sp;
+	sp->done = qla2x00_bsg_job_done;
+	iocb_cmd = &sp->u.iocb_cmd;
+	iocb_cmd->u.sa_update.sa_frame  = sa_frame;
+
+	rval = qla2x00_start_sp(sp);
+	if (rval != QLA_SUCCESS) {
+		ql_log(ql_dbg_edif, vha, 0x70e3,
+		    "qla2x00_start_sp failed=%d.\n", rval);
+
+		qla2x00_rel_sp(sp);
+		rval = -EIO;
+		SET_DID_STATUS(bsg_reply->result, DID_IMM_RETRY);
+		goto done;
+	}
+
+	ql_dbg(ql_dbg_edif, vha, 0x911d,
+	    "%s:  %s sent, hdl=%x, portid=%06x.\n",
+	    __func__, sp->name, sp->handle, fcport->d_id.b24);
+
+	fcport->edif.rekey_cnt++;
+	bsg_job->reply_len = sizeof(struct fc_bsg_reply);
+	SET_DID_STATUS(bsg_reply->result, DID_OK);
+
+	return 0;
+
+/*
+ * send back error status
+ */
+done:
+	bsg_job->reply_len = sizeof(struct fc_bsg_reply);
+	ql_dbg(ql_dbg_edif, vha, 0x911d,
+	    "%s:status: FAIL, result: 0x%x, bsg ptr done %p\n",
+	    __func__, bsg_reply->result, bsg_job);
+	bsg_job_done(bsg_job, bsg_reply->result,
+	    bsg_reply->reply_payload_rcv_len);
+
+	return 0;
+}
+
+static void
+qla_enode_free(scsi_qla_host_t *vha, struct enode *node)
+{
+	node->ntype = N_UNDEF;
+	kfree(node);
+}
+
+/**
+ * qla_enode_init - initialize enode structs & lock
+ * @vha: host adapter pointer
+ *
+ * should only be called when driver attaching
+ */
+void
+qla_enode_init(scsi_qla_host_t *vha)
+{
+	struct	qla_hw_data *ha = vha->hw;
+	char	name[32];
+
+	if (vha->pur_cinfo.enode_flags == ENODE_ACTIVE) {
+		/* list still active - error */
+		ql_dbg(ql_dbg_edif, vha, 0x09102, "%s enode still active\n",
+		    __func__);
+		return;
+	}
+
+	/* initialize lock which protects pur_core & init list */
+	spin_lock_init(&vha->pur_cinfo.pur_lock);
+	INIT_LIST_HEAD(&vha->pur_cinfo.head);
+
+	snprintf(name, sizeof(name), "%s_%d_purex", QLA2XXX_DRIVER_NAME,
+	    ha->pdev->device);
+}
+
+/**
+ * qla_enode_stop - stop and clear and enode data
+ * @vha: host adapter pointer
+ *
+ * called when app notified it is exiting
+ */
+void
+qla_enode_stop(scsi_qla_host_t *vha)
+{
+	unsigned long flags;
+	struct enode *node, *q;
+
+	if (vha->pur_cinfo.enode_flags != ENODE_ACTIVE) {
+		/* doorbell list not enabled */
+		ql_dbg(ql_dbg_edif, vha, 0x09102,
+		    "%s enode not active\n", __func__);
+		return;
+	}
+
+	/* grab lock so list doesn't move */
+	spin_lock_irqsave(&vha->pur_cinfo.pur_lock, flags);
+
+	vha->pur_cinfo.enode_flags &= ~ENODE_ACTIVE; /* mark it not active */
+
+	/* hopefully this is a null list at this point */
+	list_for_each_entry_safe(node, q, &vha->pur_cinfo.head, list) {
+		ql_dbg(ql_dbg_edif, vha, 0x910f,
+		    "%s freeing enode type=%x, cnt=%x\n", __func__, node->ntype,
+		    node->dinfo.nodecnt);
+		list_del_init(&node->list);
+		qla_enode_free(vha, node);
+	}
+	spin_unlock_irqrestore(&vha->pur_cinfo.pur_lock, flags);
+}
+
+/*
+ *  allocate enode struct and populate buffer
+ *  returns: enode pointer with buffers
+ *           NULL on error
+ */
+static struct enode *
+qla_enode_alloc(scsi_qla_host_t *vha, uint32_t ntype)
+{
+	struct enode		*node;
+	struct purexevent	*purex;
+
+	node = kzalloc(RX_ELS_SIZE, GFP_ATOMIC);
+	if (!node)
+		return NULL;
+
+	purex = &node->u.purexinfo;
+	purex->msgp = (u8 *)(node + 1);
+	purex->msgp_len = ELS_MAX_PAYLOAD;
+
+	node->ntype = ntype;
+	INIT_LIST_HEAD(&node->list);
+	return node;
+}
+
+static void
+qla_enode_add(scsi_qla_host_t *vha, struct enode *ptr)
+{
+	unsigned long flags;
+
+	ql_dbg(ql_dbg_edif + ql_dbg_verbose, vha, 0x9109,
+	    "%s add enode for type=%x, cnt=%x\n",
+	    __func__, ptr->ntype, ptr->dinfo.nodecnt);
+
+	spin_lock_irqsave(&vha->pur_cinfo.pur_lock, flags);
+	list_add_tail(&ptr->list, &vha->pur_cinfo.head);
+	spin_unlock_irqrestore(&vha->pur_cinfo.pur_lock, flags);
+
+	return;
+}
+
+static struct enode *
+qla_enode_find(scsi_qla_host_t *vha, uint32_t ntype, uint32_t p1, uint32_t p2)
+{
+	struct enode		*node_rtn = NULL;
+	struct enode		*list_node = NULL;
+	unsigned long		flags;
+	struct list_head	*pos, *q;
+	uint32_t		sid;
+	uint32_t		rw_flag;
+	struct purexevent	*purex;
+
+	/* secure the list from moving under us */
+	spin_lock_irqsave(&vha->pur_cinfo.pur_lock, flags);
+
+	list_for_each_safe(pos, q, &vha->pur_cinfo.head) {
+		list_node = list_entry(pos, struct enode, list);
+
+		/* node type determines what p1 and p2 are */
+		purex = &list_node->u.purexinfo;
+		sid = p1;
+		rw_flag = p2;
+
+		if (purex->pur_info.pur_sid.b24 == sid) {
+			if (purex->pur_info.pur_pend == 1 &&
+			    rw_flag == PUR_GET) {
+				/*
+				 * if the receive is in progress
+				 * and its a read/get then can't
+				 * transfer yet
+				 */
+				ql_dbg(ql_dbg_edif, vha, 0x9106,
+				    "%s purex xfer in progress for sid=%x\n",
+				    __func__, sid);
+			} else {
+				/* found it and its complete */
+				node_rtn = list_node;
+				list_del(pos);
+				break;
+			}
+		}
+	}
+
+	spin_unlock_irqrestore(&vha->pur_cinfo.pur_lock, flags);
+
+	return node_rtn;
+}
+
+/**
+ * qla_pur_get_pending - read/return authentication message sent
+ *  from remote port
+ * @vha: host adapter pointer
+ * @fcport: session pointer
+ * @bsg_job: user request where the message is copy to.
+ */
+static int
+qla_pur_get_pending(scsi_qla_host_t *vha, fc_port_t *fcport,
+	struct bsg_job *bsg_job)
+{
+	struct enode		*ptr;
+	struct purexevent	*purex;
+	struct qla_bsg_auth_els_reply *rpl =
+	    (struct qla_bsg_auth_els_reply *)bsg_job->reply;
+
+	bsg_job->reply_len = sizeof(*rpl);
+
+	ptr = qla_enode_find(vha, N_PUREX, fcport->d_id.b24, PUR_GET);
+	if (!ptr) {
+		ql_dbg(ql_dbg_edif, vha, 0x9111,
+		    "%s no enode data found for %8phN sid=%06x\n",
+		    __func__, fcport->port_name, fcport->d_id.b24);
+		SET_DID_STATUS(rpl->r.result, DID_IMM_RETRY);
+		return -EIO;
+	}
+
+	/*
+	 * enode is now off the linked list and is ours to deal with
+	 */
+	purex = &ptr->u.purexinfo;
+
+	/* Copy info back to caller */
+	rpl->rx_xchg_address = purex->pur_info.pur_rx_xchg_address;
+
+	SET_DID_STATUS(rpl->r.result, DID_OK);
+	rpl->r.reply_payload_rcv_len =
+	    sg_pcopy_from_buffer(bsg_job->reply_payload.sg_list,
+		bsg_job->reply_payload.sg_cnt, purex->msgp,
+		purex->pur_info.pur_bytes_rcvd, 0);
+
+	/* data copy / passback completed - destroy enode */
+	qla_enode_free(vha, ptr);
+
+	return 0;
+}
+
+/* it is assume qpair lock is held */
+static int
+qla_els_reject_iocb(scsi_qla_host_t *vha, struct qla_qpair *qp,
+	struct qla_els_pt_arg *a)
+{
+	struct els_entry_24xx *els_iocb;
+
+	els_iocb = __qla2x00_alloc_iocbs(qp, NULL);
+	if (!els_iocb) {
+		ql_log(ql_log_warn, vha, 0x700c,
+		    "qla2x00_alloc_iocbs failed.\n");
+		return QLA_FUNCTION_FAILED;
+	}
+
+	qla_els_pt_iocb(vha, els_iocb, a);
+
+	ql_dbg(ql_dbg_edif, vha, 0x0183,
+	    "Sending ELS reject...\n");
+	ql_dump_buffer(ql_dbg_edif + ql_dbg_verbose, vha, 0x0185,
+	    vha->hw->elsrej.c, sizeof(*vha->hw->elsrej.c));
+	/* flush iocb to mem before notifying hw doorbell */
+	wmb();
+	qla2x00_start_iocbs(vha, qp->req);
+	return 0;
+}
+
+void
+qla_edb_init(scsi_qla_host_t *vha)
+{
+	if (vha->e_dbell.db_flags == EDB_ACTIVE) {
+		/* list already init'd - error */
+		ql_dbg(ql_dbg_edif, vha, 0x09102,
+		    "edif db already initialized, cannot reinit\n");
+		return;
+	}
+
+	/* initialize lock which protects doorbell & init list */
+	spin_lock_init(&vha->e_dbell.db_lock);
+	INIT_LIST_HEAD(&vha->e_dbell.head);
+
+	/* create and initialize doorbell */
+	init_completion(&vha->e_dbell.dbell);
+}
+
+static void
+qla_edb_node_free(scsi_qla_host_t *vha, struct edb_node *node)
+{
+	/*
+	 * releases the space held by this edb node entry
+	 * this function does _not_ free the edb node itself
+	 * NB: the edb node entry passed should not be on any list
+	 *
+	 * currently for doorbell there's no additional cleanup
+	 * needed, but here as a placeholder for furture use.
+	 */
+
+	if (!node) {
+		ql_dbg(ql_dbg_edif, vha, 0x09122,
+		    "%s error - no valid node passed\n", __func__);
+		return;
+	}
+
+	node->ntype = N_UNDEF;
+}
+
+/* function called when app is stopping */
+
+void
+qla_edb_stop(scsi_qla_host_t *vha)
+{
+	unsigned long flags;
+	struct edb_node *node, *q;
+
+	if (vha->e_dbell.db_flags != EDB_ACTIVE) {
+		/* doorbell list not enabled */
+		ql_dbg(ql_dbg_edif, vha, 0x09102,
+		    "%s doorbell not enabled\n", __func__);
+		return;
+	}
+
+	/* grab lock so list doesn't move */
+	spin_lock_irqsave(&vha->e_dbell.db_lock, flags);
+
+	vha->e_dbell.db_flags &= ~EDB_ACTIVE; /* mark it not active */
+	/* hopefully this is a null list at this point */
+	list_for_each_entry_safe(node, q, &vha->e_dbell.head, list) {
+		ql_dbg(ql_dbg_edif, vha, 0x910f,
+		    "%s freeing edb_node type=%x\n",
+		    __func__, node->ntype);
+		qla_edb_node_free(vha, node);
+		list_del(&node->list);
+
+		kfree(node);
+	}
+	spin_unlock_irqrestore(&vha->e_dbell.db_lock, flags);
+
+	/* wake up doorbell waiters - they'll be dismissed with error code */
+	complete_all(&vha->e_dbell.dbell);
+}
+
+static struct edb_node *
+qla_edb_node_alloc(scsi_qla_host_t *vha, uint32_t ntype)
+{
+	struct edb_node	*node;
+
+	node = kzalloc(sizeof(*node), GFP_ATOMIC);
+	if (!node) {
+		/* couldn't get space */
+		ql_dbg(ql_dbg_edif, vha, 0x9100,
+		    "edb node unable to be allocated\n");
+		return NULL;
+	}
+
+	node->ntype = ntype;
+	INIT_LIST_HEAD(&node->list);
+	return node;
+}
+
+/* adds a already allocated enode to the linked list */
+static bool
+qla_edb_node_add(scsi_qla_host_t *vha, struct edb_node *ptr)
+{
+	unsigned long		flags;
+
+	if (vha->e_dbell.db_flags != EDB_ACTIVE) {
+		/* doorbell list not enabled */
+		ql_dbg(ql_dbg_edif, vha, 0x09102,
+		    "%s doorbell not enabled\n", __func__);
+		return false;
+	}
+
+	spin_lock_irqsave(&vha->e_dbell.db_lock, flags);
+	list_add_tail(&ptr->list, &vha->e_dbell.head);
+	spin_unlock_irqrestore(&vha->e_dbell.db_lock, flags);
+
+	/* ring doorbell for waiters */
+	complete(&vha->e_dbell.dbell);
+
+	return true;
+}
+
+/* adds event to doorbell list */
+void
+qla_edb_eventcreate(scsi_qla_host_t *vha, uint32_t dbtype,
+	uint32_t data, uint32_t data2, fc_port_t	*sfcport)
+{
+	struct edb_node	*edbnode;
+	fc_port_t *fcport = sfcport;
+	port_id_t id;
+
+	if (!vha->hw->flags.edif_enabled) {
+		/* edif not enabled */
+		return;
+	}
+
+	if (vha->e_dbell.db_flags != EDB_ACTIVE) {
+		if (fcport)
+			fcport->edif.auth_state = dbtype;
+		/* doorbell list not enabled */
+		ql_dbg(ql_dbg_edif, vha, 0x09102,
+		    "%s doorbell not enabled (type=%d\n", __func__, dbtype);
+		return;
+	}
+
+	edbnode = qla_edb_node_alloc(vha, dbtype);
+	if (!edbnode) {
+		ql_dbg(ql_dbg_edif, vha, 0x09102,
+		    "%s unable to alloc db node\n", __func__);
+		return;
+	}
+
+	if (!fcport) {
+		id.b.domain = (data >> 16) & 0xff;
+		id.b.area = (data >> 8) & 0xff;
+		id.b.al_pa = data & 0xff;
+		ql_dbg(ql_dbg_edif, vha, 0x09222,
+		    "%s: Arrived s_id: %06x\n", __func__,
+		    id.b24);
+		fcport = qla2x00_find_fcport_by_pid(vha, &id);
+		if (!fcport) {
+			ql_dbg(ql_dbg_edif, vha, 0x09102,
+			    "%s can't find fcport for sid= 0x%x - ignoring\n",
+			__func__, id.b24);
+			kfree(edbnode);
+			return;
+		}
+	}
+
+	/* populate the edb node */
+	switch (dbtype) {
+	case VND_CMD_AUTH_STATE_NEEDED:
+	case VND_CMD_AUTH_STATE_SESSION_SHUTDOWN:
+		edbnode->u.plogi_did.b24 = fcport->d_id.b24;
+		break;
+	case VND_CMD_AUTH_STATE_ELS_RCVD:
+		edbnode->u.els_sid.b24 = fcport->d_id.b24;
+		break;
+	case VND_CMD_AUTH_STATE_SAUPDATE_COMPL:
+		edbnode->u.sa_aen.port_id = fcport->d_id;
+		edbnode->u.sa_aen.status =  data;
+		edbnode->u.sa_aen.key_type =  data2;
+		break;
+	default:
+		ql_dbg(ql_dbg_edif, vha, 0x09102,
+			"%s unknown type: %x\n", __func__, dbtype);
+		qla_edb_node_free(vha, edbnode);
+		kfree(edbnode);
+		edbnode = NULL;
+		break;
+	}
+
+	if (edbnode && (!qla_edb_node_add(vha, edbnode))) {
+		ql_dbg(ql_dbg_edif, vha, 0x09102,
+		    "%s unable to add dbnode\n", __func__);
+		qla_edb_node_free(vha, edbnode);
+		kfree(edbnode);
+		return;
+	}
+	if (edbnode && fcport)
+		fcport->edif.auth_state = dbtype;
+	ql_dbg(ql_dbg_edif, vha, 0x09102,
+	    "%s Doorbell produced : type=%d %p\n", __func__, dbtype, edbnode);
+}
+
+static struct edb_node *
+qla_edb_getnext(scsi_qla_host_t *vha)
+{
+	unsigned long	flags;
+	struct edb_node	*edbnode = NULL;
+
+	spin_lock_irqsave(&vha->e_dbell.db_lock, flags);
+
+	/* db nodes are fifo - no qualifications done */
+	if (!list_empty(&vha->e_dbell.head)) {
+		edbnode = list_first_entry(&vha->e_dbell.head,
+		    struct edb_node, list);
+		list_del(&edbnode->list);
+	}
+
+	spin_unlock_irqrestore(&vha->e_dbell.db_lock, flags);
+
+	return edbnode;
+}
+
+void
+qla_edif_timer(scsi_qla_host_t *vha)
+{
+	struct qla_hw_data *ha = vha->hw;
+
+	if (!vha->vp_idx && N2N_TOPO(ha) && ha->flags.n2n_fw_acc_sec) {
+		if (vha->e_dbell.db_flags != EDB_ACTIVE &&
+		    ha->edif_post_stop_cnt_down) {
+			ha->edif_post_stop_cnt_down--;
+
+			/*
+			 * turn off auto 'Plogi Acc + secure=1' feature
+			 * Set Add FW option[3]
+			 * BIT_15, if.
+			 */
+			if (ha->edif_post_stop_cnt_down == 0) {
+				ql_dbg(ql_dbg_async, vha, 0x911d,
+				       "%s chip reset to turn off PLOGI ACC + secure\n",
+				       __func__);
+				set_bit(ISP_ABORT_NEEDED, &vha->dpc_flags);
+			}
+		} else {
+			ha->edif_post_stop_cnt_down = 60;
+		}
+	}
+}
+
+/*
+ * app uses separate thread to read this. It'll wait until the doorbell
+ * is rung by the driver or the max wait time has expired
+ */
+ssize_t
+edif_doorbell_show(struct device *dev, struct device_attribute *attr,
+		char *buf)
+{
+	scsi_qla_host_t *vha = shost_priv(class_to_shost(dev));
+	struct edb_node	*dbnode = NULL;
+	struct edif_app_dbell *ap = (struct edif_app_dbell *)buf;
+	uint32_t dat_siz, buf_size, sz;
+
+	/* TODO: app currently hardcoded to 256. Will transition to bsg */
+	sz = 256;
+
+	/* stop new threads from waiting if we're not init'd */
+	if (vha->e_dbell.db_flags != EDB_ACTIVE) {
+		ql_dbg(ql_dbg_edif + ql_dbg_verbose, vha, 0x09122,
+		    "%s error - edif db not enabled\n", __func__);
+		return 0;
+	}
+
+	if (!vha->hw->flags.edif_enabled) {
+		/* edif not enabled */
+		ql_dbg(ql_dbg_edif + ql_dbg_verbose, vha, 0x09122,
+		    "%s error - edif not enabled\n", __func__);
+		return -1;
+	}
+
+	buf_size = 0;
+	while ((sz - buf_size) >= sizeof(struct edb_node)) {
+		/* remove the next item from the doorbell list */
+		dat_siz = 0;
+		dbnode = qla_edb_getnext(vha);
+		if (dbnode) {
+			ap->event_code = dbnode->ntype;
+			switch (dbnode->ntype) {
+			case VND_CMD_AUTH_STATE_SESSION_SHUTDOWN:
+			case VND_CMD_AUTH_STATE_NEEDED:
+				ap->port_id = dbnode->u.plogi_did;
+				dat_siz += sizeof(ap->port_id);
+				break;
+			case VND_CMD_AUTH_STATE_ELS_RCVD:
+				ap->port_id = dbnode->u.els_sid;
+				dat_siz += sizeof(ap->port_id);
+				break;
+			case VND_CMD_AUTH_STATE_SAUPDATE_COMPL:
+				ap->port_id = dbnode->u.sa_aen.port_id;
+				memcpy(ap->event_data, &dbnode->u,
+						sizeof(struct edif_sa_update_aen));
+				dat_siz += sizeof(struct edif_sa_update_aen);
+				break;
+			default:
+				/* unknown node type, rtn unknown ntype */
+				ap->event_code = VND_CMD_AUTH_STATE_UNDEF;
+				memcpy(ap->event_data, &dbnode->ntype, 4);
+				dat_siz += 4;
+				break;
+			}
+
+			ql_dbg(ql_dbg_edif, vha, 0x09102,
+				"%s Doorbell consumed : type=%d %p\n",
+				__func__, dbnode->ntype, dbnode);
+			/* we're done with the db node, so free it up */
+			qla_edb_node_free(vha, dbnode);
+			kfree(dbnode);
+		} else {
+			break;
+		}
+
+		ap->event_data_size = dat_siz;
+		/* 8bytes = ap->event_code + ap->event_data_size */
+		buf_size += dat_siz + 8;
+		ap = (struct edif_app_dbell *)(buf + buf_size);
+	}
+	return buf_size;
+}
+
+static void qla_noop_sp_done(srb_t *sp, int res)
+{
+	sp->free(sp);
+}
+
+/*
+ * Called from work queue
+ * build and send the sa_update iocb to delete an rx sa_index
+ */
+int
+qla24xx_issue_sa_replace_iocb(scsi_qla_host_t *vha, struct qla_work_evt *e)
+{
+	srb_t *sp;
+	fc_port_t	*fcport = NULL;
+	struct srb_iocb *iocb_cmd = NULL;
+	int rval = QLA_SUCCESS;
+	struct	edif_sa_ctl *sa_ctl = e->u.sa_update.sa_ctl;
+	uint16_t nport_handle = e->u.sa_update.nport_handle;
+
+	ql_dbg(ql_dbg_edif, vha, 0x70e6,
+	    "%s: starting,  sa_ctl: %p\n", __func__, sa_ctl);
+
+	if (!sa_ctl) {
+		ql_dbg(ql_dbg_edif, vha, 0x70e6,
+		    "sa_ctl allocation failed\n");
+		return -ENOMEM;
+	}
+
+	fcport = sa_ctl->fcport;
+
+	/* Alloc SRB structure */
+	sp = qla2x00_get_sp(vha, fcport, GFP_KERNEL);
+	if (!sp) {
+		ql_dbg(ql_dbg_edif, vha, 0x70e6,
+		 "SRB allocation failed\n");
+		return -ENOMEM;
+	}
+
+	fcport->flags |= FCF_ASYNC_SENT;
+	iocb_cmd = &sp->u.iocb_cmd;
+	iocb_cmd->u.sa_update.sa_ctl = sa_ctl;
+
+	ql_dbg(ql_dbg_edif, vha, 0x3073,
+	    "Enter: SA REPL portid=%06x, sa_ctl %p, index %x, nport_handle: 0x%x\n",
+	    fcport->d_id.b24, sa_ctl, sa_ctl->index, nport_handle);
+	/*
+	 * if this is a sadb cleanup delete, mark it so the isr can
+	 * take the correct action
+	 */
+	if (sa_ctl->flags & EDIF_SA_CTL_FLG_CLEANUP_DEL) {
+		/* mark this srb as a cleanup delete */
+		sp->flags |= SRB_EDIF_CLEANUP_DELETE;
+		ql_dbg(ql_dbg_edif, vha, 0x70e6,
+		    "%s: sp 0x%p flagged as cleanup delete\n", __func__, sp);
+	}
+
+	sp->type = SRB_SA_REPLACE;
+	sp->name = "SA_REPLACE";
+	sp->fcport = fcport;
+	sp->free = qla2x00_rel_sp;
+	sp->done = qla_noop_sp_done;
+
+	rval = qla2x00_start_sp(sp);
+
+	if (rval != QLA_SUCCESS)
+		rval = QLA_FUNCTION_FAILED;
+
+	return rval;
+}
+
+void qla24xx_sa_update_iocb(srb_t *sp, struct sa_update_28xx *sa_update_iocb)
+{
+	int	itr = 0;
+	struct	scsi_qla_host		*vha = sp->vha;
+	struct	qla_sa_update_frame	*sa_frame =
+		&sp->u.iocb_cmd.u.sa_update.sa_frame;
+	u8 flags = 0;
+
+	switch (sa_frame->flags & (SAU_FLG_INV | SAU_FLG_TX)) {
+	case 0:
+		ql_dbg(ql_dbg_edif, vha, 0x911d,
+		    "%s: EDIF SA UPDATE RX IOCB  vha: 0x%p  index: %d\n",
+		    __func__, vha, sa_frame->fast_sa_index);
+		break;
+	case 1:
+		ql_dbg(ql_dbg_edif, vha, 0x911d,
+		    "%s: EDIF SA DELETE RX IOCB  vha: 0x%p  index: %d\n",
+		    __func__, vha, sa_frame->fast_sa_index);
+		flags |= SA_FLAG_INVALIDATE;
+		break;
+	case 2:
+		ql_dbg(ql_dbg_edif, vha, 0x911d,
+		    "%s: EDIF SA UPDATE TX IOCB  vha: 0x%p  index: %d\n",
+		    __func__, vha, sa_frame->fast_sa_index);
+		flags |= SA_FLAG_TX;
+		break;
+	case 3:
+		ql_dbg(ql_dbg_edif, vha, 0x911d,
+		    "%s: EDIF SA DELETE TX IOCB  vha: 0x%p  index: %d\n",
+		    __func__, vha, sa_frame->fast_sa_index);
+		flags |= SA_FLAG_TX | SA_FLAG_INVALIDATE;
+		break;
+	}
+
+	sa_update_iocb->entry_type = SA_UPDATE_IOCB_TYPE;
+	sa_update_iocb->entry_count = 1;
+	sa_update_iocb->sys_define = 0;
+	sa_update_iocb->entry_status = 0;
+	sa_update_iocb->handle = sp->handle;
+	sa_update_iocb->u.nport_handle = cpu_to_le16(sp->fcport->loop_id);
+	sa_update_iocb->vp_index = sp->fcport->vha->vp_idx;
+	sa_update_iocb->port_id[0] = sp->fcport->d_id.b.al_pa;
+	sa_update_iocb->port_id[1] = sp->fcport->d_id.b.area;
+	sa_update_iocb->port_id[2] = sp->fcport->d_id.b.domain;
+
+	sa_update_iocb->flags = flags;
+	sa_update_iocb->salt = cpu_to_le32(sa_frame->salt);
+	sa_update_iocb->spi = cpu_to_le32(sa_frame->spi);
+	sa_update_iocb->sa_index = cpu_to_le16(sa_frame->fast_sa_index);
+
+	sa_update_iocb->sa_control |= SA_CNTL_ENC_FCSP;
+	if (sp->fcport->edif.aes_gmac)
+		sa_update_iocb->sa_control |= SA_CNTL_AES_GMAC;
+
+	if (sa_frame->flags & SAU_FLG_KEY256) {
+		sa_update_iocb->sa_control |= SA_CNTL_KEY256;
+		for (itr = 0; itr < 32; itr++)
+			sa_update_iocb->sa_key[itr] = sa_frame->sa_key[itr];
+	} else {
+		sa_update_iocb->sa_control |= SA_CNTL_KEY128;
+		for (itr = 0; itr < 16; itr++)
+			sa_update_iocb->sa_key[itr] = sa_frame->sa_key[itr];
+	}
+
+	ql_dbg(ql_dbg_edif, vha, 0x921d,
+	    "%s SAU Port ID = %02x%02x%02x, flags=%xh, index=%u, ctl=%xh, SPI 0x%x flags 0x%x hdl=%x gmac %d\n",
+	    __func__, sa_update_iocb->port_id[2], sa_update_iocb->port_id[1],
+	    sa_update_iocb->port_id[0], sa_update_iocb->flags, sa_update_iocb->sa_index,
+	    sa_update_iocb->sa_control, sa_update_iocb->spi, sa_frame->flags, sp->handle,
+	    sp->fcport->edif.aes_gmac);
+
+	if (sa_frame->flags & SAU_FLG_TX)
+		sp->fcport->edif.tx_sa_pending = 1;
+	else
+		sp->fcport->edif.rx_sa_pending = 1;
+
+	sp->fcport->vha->qla_stats.control_requests++;
+}
+
+void
+qla24xx_sa_replace_iocb(srb_t *sp, struct sa_update_28xx *sa_update_iocb)
+{
+	struct	scsi_qla_host		*vha = sp->vha;
+	struct srb_iocb *srb_iocb = &sp->u.iocb_cmd;
+	struct	edif_sa_ctl		*sa_ctl = srb_iocb->u.sa_update.sa_ctl;
+	uint16_t nport_handle = sp->fcport->loop_id;
+
+	sa_update_iocb->entry_type = SA_UPDATE_IOCB_TYPE;
+	sa_update_iocb->entry_count = 1;
+	sa_update_iocb->sys_define = 0;
+	sa_update_iocb->entry_status = 0;
+	sa_update_iocb->handle = sp->handle;
+
+	sa_update_iocb->u.nport_handle = cpu_to_le16(nport_handle);
+
+	sa_update_iocb->vp_index = sp->fcport->vha->vp_idx;
+	sa_update_iocb->port_id[0] = sp->fcport->d_id.b.al_pa;
+	sa_update_iocb->port_id[1] = sp->fcport->d_id.b.area;
+	sa_update_iocb->port_id[2] = sp->fcport->d_id.b.domain;
+
+	/* Invalidate the index. salt, spi, control & key are ignore */
+	sa_update_iocb->flags = SA_FLAG_INVALIDATE;
+	sa_update_iocb->salt = 0;
+	sa_update_iocb->spi = 0;
+	sa_update_iocb->sa_index = cpu_to_le16(sa_ctl->index);
+	sa_update_iocb->sa_control = 0;
+
+	ql_dbg(ql_dbg_edif, vha, 0x921d,
+	    "%s SAU DELETE RX Port ID = %02x:%02x:%02x, lid %d flags=%xh, index=%u, hdl=%x\n",
+	    __func__, sa_update_iocb->port_id[2], sa_update_iocb->port_id[1],
+	    sa_update_iocb->port_id[0], nport_handle, sa_update_iocb->flags,
+	    sa_update_iocb->sa_index, sp->handle);
+
+	sp->fcport->vha->qla_stats.control_requests++;
+}
+
+void qla24xx_auth_els(scsi_qla_host_t *vha, void **pkt, struct rsp_que **rsp)
+{
+	struct purex_entry_24xx *p = *pkt;
+	struct enode		*ptr;
+	int		sid;
+	u16 totlen;
+	struct purexevent	*purex;
+	struct scsi_qla_host *host = NULL;
+	int rc;
+	struct fc_port *fcport;
+	struct qla_els_pt_arg a;
+	be_id_t beid;
+
+	memset(&a, 0, sizeof(a));
+
+	a.els_opcode = ELS_AUTH_ELS;
+	a.nport_handle = p->nport_handle;
+	a.rx_xchg_address = p->rx_xchg_addr;
+	a.did.b.domain = p->s_id[2];
+	a.did.b.area   = p->s_id[1];
+	a.did.b.al_pa  = p->s_id[0];
+	a.tx_byte_count = a.tx_len = sizeof(struct fc_els_ls_rjt);
+	a.tx_addr = vha->hw->elsrej.cdma;
+	a.vp_idx = vha->vp_idx;
+	a.control_flags = EPD_ELS_RJT;
+
+	sid = p->s_id[0] | (p->s_id[1] << 8) | (p->s_id[2] << 16);
+
+	totlen = (le16_to_cpu(p->frame_size) & 0x0fff) - PURX_ELS_HEADER_SIZE;
+	if (le16_to_cpu(p->status_flags) & 0x8000) {
+		totlen = le16_to_cpu(p->trunc_frame_size);
+		qla_els_reject_iocb(vha, (*rsp)->qpair, &a);
+		__qla_consume_iocb(vha, pkt, rsp);
+		return;
+	}
+
+	if (totlen > MAX_PAYLOAD) {
+		ql_dbg(ql_dbg_edif, vha, 0x0910d,
+		    "%s WARNING: verbose ELS frame received (totlen=%x)\n",
+		    __func__, totlen);
+		qla_els_reject_iocb(vha, (*rsp)->qpair, &a);
+		__qla_consume_iocb(vha, pkt, rsp);
+		return;
+	}
+
+	if (!vha->hw->flags.edif_enabled) {
+		/* edif support not enabled */
+		ql_dbg(ql_dbg_edif, vha, 0x910e, "%s edif not enabled\n",
+		    __func__);
+		qla_els_reject_iocb(vha, (*rsp)->qpair, &a);
+		__qla_consume_iocb(vha, pkt, rsp);
+		return;
+	}
+
+	ptr = qla_enode_alloc(vha, N_PUREX);
+	if (!ptr) {
+		ql_dbg(ql_dbg_edif, vha, 0x09109,
+		    "WARNING: enode alloc failed for sid=%x\n",
+		    sid);
+		qla_els_reject_iocb(vha, (*rsp)->qpair, &a);
+		__qla_consume_iocb(vha, pkt, rsp);
+		return;
+	}
+
+	purex = &ptr->u.purexinfo;
+	purex->pur_info.pur_sid = a.did;
+	purex->pur_info.pur_pend = 0;
+	purex->pur_info.pur_bytes_rcvd = totlen;
+	purex->pur_info.pur_rx_xchg_address = le32_to_cpu(p->rx_xchg_addr);
+	purex->pur_info.pur_nphdl = le16_to_cpu(p->nport_handle);
+	purex->pur_info.pur_did.b.domain =  p->d_id[2];
+	purex->pur_info.pur_did.b.area =  p->d_id[1];
+	purex->pur_info.pur_did.b.al_pa =  p->d_id[0];
+	purex->pur_info.vp_idx = p->vp_idx;
+
+	rc = __qla_copy_purex_to_buffer(vha, pkt, rsp, purex->msgp,
+		purex->msgp_len);
+	if (rc) {
+		qla_els_reject_iocb(vha, (*rsp)->qpair, &a);
+		qla_enode_free(vha, ptr);
+		return;
+	}
+	beid.al_pa = purex->pur_info.pur_did.b.al_pa;
+	beid.area   = purex->pur_info.pur_did.b.area;
+	beid.domain = purex->pur_info.pur_did.b.domain;
+	host = qla_find_host_by_d_id(vha, beid);
+	if (!host) {
+		ql_log(ql_log_fatal, vha, 0x508b,
+		    "%s Drop ELS due to unable to find host %06x\n",
+		    __func__, purex->pur_info.pur_did.b24);
+
+		qla_els_reject_iocb(vha, (*rsp)->qpair, &a);
+		qla_enode_free(vha, ptr);
+		return;
+	}
+
+	fcport = qla2x00_find_fcport_by_pid(host, &purex->pur_info.pur_sid);
+
+	if (host->e_dbell.db_flags != EDB_ACTIVE ||
+	    (fcport && EDIF_SESSION_DOWN(fcport))) {
+		ql_dbg(ql_dbg_edif, host, 0x0910c, "%s e_dbell.db_flags =%x %06x\n",
+		    __func__, host->e_dbell.db_flags,
+		    fcport ? fcport->d_id.b24 : 0);
+
+		qla_els_reject_iocb(host, (*rsp)->qpair, &a);
+		qla_enode_free(host, ptr);
+		return;
+	}
+
+	/* add the local enode to the list */
+	qla_enode_add(host, ptr);
+
+	ql_dbg(ql_dbg_edif, host, 0x0910c,
+	    "%s COMPLETE purex->pur_info.pur_bytes_rcvd =%xh s:%06x -> d:%06x xchg=%xh\n",
+	    __func__, purex->pur_info.pur_bytes_rcvd, purex->pur_info.pur_sid.b24,
+	    purex->pur_info.pur_did.b24, p->rx_xchg_addr);
+
+	qla_edb_eventcreate(host, VND_CMD_AUTH_STATE_ELS_RCVD, sid, 0, NULL);
+}
+
+static uint16_t  qla_edif_get_sa_index_from_freepool(fc_port_t *fcport, int dir)
+{
+	struct scsi_qla_host *vha = fcport->vha;
+	struct qla_hw_data *ha = vha->hw;
+	void *sa_id_map;
+	unsigned long flags = 0;
+	u16 sa_index;
+
+	ql_dbg(ql_dbg_edif + ql_dbg_verbose, vha, 0x3063,
+	    "%s: entry\n", __func__);
+
+	if (dir)
+		sa_id_map = ha->edif_tx_sa_id_map;
+	else
+		sa_id_map = ha->edif_rx_sa_id_map;
+
+	spin_lock_irqsave(&ha->sadb_fp_lock, flags);
+	sa_index = find_first_zero_bit(sa_id_map, EDIF_NUM_SA_INDEX);
+	if (sa_index >=  EDIF_NUM_SA_INDEX) {
+		spin_unlock_irqrestore(&ha->sadb_fp_lock, flags);
+		return INVALID_EDIF_SA_INDEX;
+	}
+	set_bit(sa_index, sa_id_map);
+	spin_unlock_irqrestore(&ha->sadb_fp_lock, flags);
+
+	if (dir)
+		sa_index += EDIF_TX_SA_INDEX_BASE;
+
+	ql_dbg(ql_dbg_edif, vha, 0x3063,
+	    "%s: index retrieved from free pool %d\n", __func__, sa_index);
+
+	return sa_index;
+}
+
+/* find an sadb entry for an nport_handle */
+static struct edif_sa_index_entry *
+qla_edif_sadb_find_sa_index_entry(uint16_t nport_handle,
+		struct list_head *sa_list)
+{
+	struct edif_sa_index_entry *entry;
+	struct edif_sa_index_entry *tentry;
+	struct list_head *indx_list = sa_list;
+
+	list_for_each_entry_safe(entry, tentry, indx_list, next) {
+		if (entry->handle == nport_handle)
+			return entry;
+	}
+	return NULL;
+}
+
+/* remove an sa_index from the nport_handle and return it to the free pool */
+static int qla_edif_sadb_delete_sa_index(fc_port_t *fcport, uint16_t nport_handle,
+		uint16_t sa_index)
+{
+	struct edif_sa_index_entry *entry;
+	struct list_head *sa_list;
+	int dir = (sa_index < EDIF_TX_SA_INDEX_BASE) ? 0 : 1;
+	int slot = 0;
+	int free_slot_count = 0;
+	scsi_qla_host_t *vha = fcport->vha;
+	struct qla_hw_data *ha = vha->hw;
+	unsigned long flags = 0;
+
+	ql_dbg(ql_dbg_edif, vha, 0x3063,
+	    "%s: entry\n", __func__);
+
+	if (dir)
+		sa_list = &ha->sadb_tx_index_list;
+	else
+		sa_list = &ha->sadb_rx_index_list;
+
+	entry = qla_edif_sadb_find_sa_index_entry(nport_handle, sa_list);
+	if (!entry) {
+		ql_dbg(ql_dbg_edif, vha, 0x3063,
+		    "%s: no entry found for nport_handle 0x%x\n",
+		    __func__, nport_handle);
+		return -1;
+	}
+
+	spin_lock_irqsave(&ha->sadb_lock, flags);
+	/*
+	 * each tx/rx direction has up to 2 sa indexes/slots. 1 slot for in flight traffic
+	 * the other is use at re-key time.
+	 */
+	for (slot = 0; slot < 2; slot++) {
+		if (entry->sa_pair[slot].sa_index == sa_index) {
+			entry->sa_pair[slot].sa_index = INVALID_EDIF_SA_INDEX;
+			entry->sa_pair[slot].spi = 0;
+			free_slot_count++;
+			qla_edif_add_sa_index_to_freepool(fcport, dir, sa_index);
+		} else if (entry->sa_pair[slot].sa_index == INVALID_EDIF_SA_INDEX) {
+			free_slot_count++;
+		}
+	}
+
+	if (free_slot_count == 2) {
+		list_del(&entry->next);
+		kfree(entry);
+	}
+	spin_unlock_irqrestore(&ha->sadb_lock, flags);
+
+	ql_dbg(ql_dbg_edif, vha, 0x3063,
+	    "%s: sa_index %d removed, free_slot_count: %d\n",
+	    __func__, sa_index, free_slot_count);
+
+	return 0;
+}
+
+void
+qla28xx_sa_update_iocb_entry(scsi_qla_host_t *v, struct req_que *req,
+	struct sa_update_28xx *pkt)
+{
+	const char *func = "SA_UPDATE_RESPONSE_IOCB";
+	srb_t *sp;
+	struct edif_sa_ctl *sa_ctl;
+	int old_sa_deleted = 1;
+	uint16_t nport_handle;
+	struct scsi_qla_host *vha;
+
+	sp = qla2x00_get_sp_from_handle(v, func, req, pkt);
+
+	if (!sp) {
+		ql_dbg(ql_dbg_edif, v, 0x3063,
+			"%s: no sp found for pkt\n", __func__);
+		return;
+	}
+	/* use sp->vha due to npiv */
+	vha = sp->vha;
+
+	switch (pkt->flags & (SA_FLAG_INVALIDATE | SA_FLAG_TX)) {
+	case 0:
+		ql_dbg(ql_dbg_edif, vha, 0x3063,
+		    "%s: EDIF SA UPDATE RX IOCB  vha: 0x%p  index: %d\n",
+		    __func__, vha, pkt->sa_index);
+		break;
+	case 1:
+		ql_dbg(ql_dbg_edif, vha, 0x3063,
+		    "%s: EDIF SA DELETE RX IOCB  vha: 0x%p  index: %d\n",
+		    __func__, vha, pkt->sa_index);
+		break;
+	case 2:
+		ql_dbg(ql_dbg_edif, vha, 0x3063,
+		    "%s: EDIF SA UPDATE TX IOCB  vha: 0x%p  index: %d\n",
+		    __func__, vha, pkt->sa_index);
+		break;
+	case 3:
+		ql_dbg(ql_dbg_edif, vha, 0x3063,
+		    "%s: EDIF SA DELETE TX IOCB  vha: 0x%p  index: %d\n",
+		    __func__, vha, pkt->sa_index);
+		break;
+	}
+
+	/*
+	 * dig the nport handle out of the iocb, fcport->loop_id can not be trusted
+	 * to be correct during cleanup sa_update iocbs.
+	 */
+	nport_handle = sp->fcport->loop_id;
+
+	ql_dbg(ql_dbg_edif, vha, 0x3063,
+	    "%s: %8phN comp status=%x old_sa_info=%x new_sa_info=%x lid %d, index=0x%x pkt_flags %xh hdl=%x\n",
+	    __func__, sp->fcport->port_name, pkt->u.comp_sts, pkt->old_sa_info, pkt->new_sa_info,
+	    nport_handle, pkt->sa_index, pkt->flags, sp->handle);
+
+	/* if rx delete, remove the timer */
+	if ((pkt->flags & (SA_FLAG_INVALIDATE | SA_FLAG_TX)) ==  SA_FLAG_INVALIDATE) {
+		struct edif_list_entry *edif_entry;
+
+		sp->fcport->flags &= ~(FCF_ASYNC_SENT | FCF_ASYNC_ACTIVE);
+
+		edif_entry = qla_edif_list_find_sa_index(sp->fcport, nport_handle);
+		if (edif_entry) {
+			ql_dbg(ql_dbg_edif, vha, 0x5033,
+			    "%s: removing edif_entry %p, new sa_index: 0x%x\n",
+			    __func__, edif_entry, pkt->sa_index);
+			qla_edif_list_delete_sa_index(sp->fcport, edif_entry);
+			del_timer(&edif_entry->timer);
+
+			ql_dbg(ql_dbg_edif, vha, 0x5033,
+			    "%s: releasing edif_entry %p, new sa_index: 0x%x\n",
+			    __func__, edif_entry, pkt->sa_index);
+
+			kfree(edif_entry);
+		}
+	}
+
+	/*
+	 * if this is a delete for either tx or rx, make sure it succeeded.
+	 * The new_sa_info field should be 0xffff on success
+	 */
+	if (pkt->flags & SA_FLAG_INVALIDATE)
+		old_sa_deleted = (le16_to_cpu(pkt->new_sa_info) == 0xffff) ? 1 : 0;
+
+	/* Process update and delete the same way */
+
+	/* If this is an sadb cleanup delete, bypass sending events to IPSEC */
+	if (sp->flags & SRB_EDIF_CLEANUP_DELETE) {
+		sp->fcport->flags &= ~(FCF_ASYNC_SENT | FCF_ASYNC_ACTIVE);
+		ql_dbg(ql_dbg_edif, vha, 0x3063,
+		    "%s: nph 0x%x, sa_index %d removed from fw\n",
+		    __func__, sp->fcport->loop_id, pkt->sa_index);
+
+	} else if ((pkt->entry_status == 0) && (pkt->u.comp_sts == 0) &&
+	    old_sa_deleted) {
+		/*
+		 * Note: Wa are only keeping track of latest SA,
+		 * so we know when we can start enableing encryption per I/O.
+		 * If all SA's get deleted, let FW reject the IOCB.
+
+		 * TODO: edif: don't set enabled here I think
+		 * TODO: edif: prli complete is where it should be set
+		 */
+		ql_dbg(ql_dbg_edif + ql_dbg_verbose, vha, 0x3063,
+			"SA(%x)updated for s_id %02x%02x%02x\n",
+			pkt->new_sa_info,
+			pkt->port_id[2], pkt->port_id[1], pkt->port_id[0]);
+		sp->fcport->edif.enable = 1;
+		if (pkt->flags & SA_FLAG_TX) {
+			sp->fcport->edif.tx_sa_set = 1;
+			sp->fcport->edif.tx_sa_pending = 0;
+			qla_edb_eventcreate(vha, VND_CMD_AUTH_STATE_SAUPDATE_COMPL,
+				QL_VND_SA_STAT_SUCCESS,
+				QL_VND_TX_SA_KEY, sp->fcport);
+		} else {
+			sp->fcport->edif.rx_sa_set = 1;
+			sp->fcport->edif.rx_sa_pending = 0;
+			qla_edb_eventcreate(vha, VND_CMD_AUTH_STATE_SAUPDATE_COMPL,
+				QL_VND_SA_STAT_SUCCESS,
+				QL_VND_RX_SA_KEY, sp->fcport);
+		}
+	} else {
+		ql_dbg(ql_dbg_edif, vha, 0x3063,
+		    "%s: %8phN SA update FAILED: sa_index: %d, new_sa_info %d, %02x%02x%02x\n",
+		    __func__, sp->fcport->port_name, pkt->sa_index, pkt->new_sa_info,
+		    pkt->port_id[2], pkt->port_id[1], pkt->port_id[0]);
+
+		if (pkt->flags & SA_FLAG_TX)
+			qla_edb_eventcreate(vha, VND_CMD_AUTH_STATE_SAUPDATE_COMPL,
+				(le16_to_cpu(pkt->u.comp_sts) << 16) | QL_VND_SA_STAT_FAILED,
+				QL_VND_TX_SA_KEY, sp->fcport);
+		else
+			qla_edb_eventcreate(vha, VND_CMD_AUTH_STATE_SAUPDATE_COMPL,
+				(le16_to_cpu(pkt->u.comp_sts) << 16) | QL_VND_SA_STAT_FAILED,
+				QL_VND_RX_SA_KEY, sp->fcport);
+	}
+
+	/* for delete, release sa_ctl, sa_index */
+	if (pkt->flags & SA_FLAG_INVALIDATE) {
+		/* release the sa_ctl */
+		sa_ctl = qla_edif_find_sa_ctl_by_index(sp->fcport,
+		    le16_to_cpu(pkt->sa_index), (pkt->flags & SA_FLAG_TX));
+		if (sa_ctl &&
+		    qla_edif_find_sa_ctl_by_index(sp->fcport, sa_ctl->index,
+			(pkt->flags & SA_FLAG_TX)) != NULL) {
+			ql_dbg(ql_dbg_edif + ql_dbg_verbose, vha, 0x3063,
+			    "%s: freeing sa_ctl for index %d\n",
+			    __func__, sa_ctl->index);
+			qla_edif_free_sa_ctl(sp->fcport, sa_ctl, sa_ctl->index);
+		} else {
+			ql_dbg(ql_dbg_edif, vha, 0x3063,
+			    "%s: sa_ctl NOT freed, sa_ctl: %p\n",
+			    __func__, sa_ctl);
+		}
+		ql_dbg(ql_dbg_edif, vha, 0x3063,
+		    "%s: freeing sa_index %d, nph: 0x%x\n",
+		    __func__, le16_to_cpu(pkt->sa_index), nport_handle);
+		qla_edif_sadb_delete_sa_index(sp->fcport, nport_handle,
+		    le16_to_cpu(pkt->sa_index));
+	/*
+	 * check for a failed sa_update and remove
+	 * the sadb entry.
+	 */
+	} else if (pkt->u.comp_sts) {
+		ql_dbg(ql_dbg_edif, vha, 0x3063,
+		    "%s: freeing sa_index %d, nph: 0x%x\n",
+		    __func__, pkt->sa_index, nport_handle);
+		qla_edif_sadb_delete_sa_index(sp->fcport, nport_handle,
+		    le16_to_cpu(pkt->sa_index));
+		switch (le16_to_cpu(pkt->u.comp_sts)) {
+		case CS_PORT_EDIF_UNAVAIL:
+		case CS_PORT_EDIF_LOGOUT:
+			qlt_schedule_sess_for_deletion(sp->fcport);
+			break;
+		default:
+			break;
+		}
+	}
+
+	sp->done(sp, 0);
+}
+
+/**
+ * qla28xx_start_scsi_edif() - Send a SCSI type 6 command to the ISP
+ * @sp: command to send to the ISP
+ *
+ * Return: non-zero if a failure occurred, else zero.
+ */
+int
+qla28xx_start_scsi_edif(srb_t *sp)
+{
+	int             nseg;
+	unsigned long   flags;
+	struct scsi_cmnd *cmd;
+	uint32_t        *clr_ptr;
+	uint32_t        index, i;
+	uint32_t        handle;
+	uint16_t        cnt;
+	int16_t        req_cnt;
+	uint16_t        tot_dsds;
+	__be32 *fcp_dl;
+	uint8_t additional_cdb_len;
+	struct ct6_dsd *ctx;
+	struct scsi_qla_host *vha = sp->vha;
+	struct qla_hw_data *ha = vha->hw;
+	struct cmd_type_6 *cmd_pkt;
+	struct dsd64	*cur_dsd;
+	uint8_t		avail_dsds = 0;
+	struct scatterlist *sg;
+	struct req_que *req = sp->qpair->req;
+	spinlock_t *lock = sp->qpair->qp_lock_ptr;
+
+	/* Setup device pointers. */
+	cmd = GET_CMD_SP(sp);
+
+	/* So we know we haven't pci_map'ed anything yet */
+	tot_dsds = 0;
+
+	/* Send marker if required */
+	if (vha->marker_needed != 0) {
+		if (qla2x00_marker(vha, sp->qpair, 0, 0, MK_SYNC_ALL) !=
+			QLA_SUCCESS) {
+			ql_log(ql_log_warn, vha, 0x300c,
+			    "qla2x00_marker failed for cmd=%p.\n", cmd);
+			return QLA_FUNCTION_FAILED;
+		}
+		vha->marker_needed = 0;
+	}
+
+	/* Acquire ring specific lock */
+	spin_lock_irqsave(lock, flags);
+
+	/* Check for room in outstanding command list. */
+	handle = req->current_outstanding_cmd;
+	for (index = 1; index < req->num_outstanding_cmds; index++) {
+		handle++;
+		if (handle == req->num_outstanding_cmds)
+			handle = 1;
+		if (!req->outstanding_cmds[handle])
+			break;
+	}
+	if (index == req->num_outstanding_cmds)
+		goto queuing_error;
+
+	/* Map the sg table so we have an accurate count of sg entries needed */
+	if (scsi_sg_count(cmd)) {
+		nseg = dma_map_sg(&ha->pdev->dev, scsi_sglist(cmd),
+		    scsi_sg_count(cmd), cmd->sc_data_direction);
+		if (unlikely(!nseg))
+			goto queuing_error;
+	} else {
+		nseg = 0;
+	}
+
+	tot_dsds = nseg;
+	req_cnt = qla24xx_calc_iocbs(vha, tot_dsds);
+	if (req->cnt < (req_cnt + 2)) {
+		cnt = IS_SHADOW_REG_CAPABLE(ha) ? *req->out_ptr :
+		    rd_reg_dword(req->req_q_out);
+		if (req->ring_index < cnt)
+			req->cnt = cnt - req->ring_index;
+		else
+			req->cnt = req->length -
+			    (req->ring_index - cnt);
+		if (req->cnt < (req_cnt + 2))
+			goto queuing_error;
+	}
+
+	ctx = sp->u.scmd.ct6_ctx =
+	    mempool_alloc(ha->ctx_mempool, GFP_ATOMIC);
+	if (!ctx) {
+		ql_log(ql_log_fatal, vha, 0x3010,
+		    "Failed to allocate ctx for cmd=%p.\n", cmd);
+		goto queuing_error;
+	}
+
+	memset(ctx, 0, sizeof(struct ct6_dsd));
+	ctx->fcp_cmnd = dma_pool_zalloc(ha->fcp_cmnd_dma_pool,
+	    GFP_ATOMIC, &ctx->fcp_cmnd_dma);
+	if (!ctx->fcp_cmnd) {
+		ql_log(ql_log_fatal, vha, 0x3011,
+		    "Failed to allocate fcp_cmnd for cmd=%p.\n", cmd);
+		goto queuing_error;
+	}
+
+	/* Initialize the DSD list and dma handle */
+	INIT_LIST_HEAD(&ctx->dsd_list);
+	ctx->dsd_use_cnt = 0;
+
+	if (cmd->cmd_len > 16) {
+		additional_cdb_len = cmd->cmd_len - 16;
+		if ((cmd->cmd_len % 4) != 0) {
+			/*
+			 * SCSI command bigger than 16 bytes must be
+			 * multiple of 4
+			 */
+			ql_log(ql_log_warn, vha, 0x3012,
+			    "scsi cmd len %d not multiple of 4 for cmd=%p.\n",
+			    cmd->cmd_len, cmd);
+			goto queuing_error_fcp_cmnd;
+		}
+		ctx->fcp_cmnd_len = 12 + cmd->cmd_len + 4;
+	} else {
+		additional_cdb_len = 0;
+		ctx->fcp_cmnd_len = 12 + 16 + 4;
+	}
+
+	cmd_pkt = (struct cmd_type_6 *)req->ring_ptr;
+	cmd_pkt->handle = make_handle(req->id, handle);
+
+	/*
+	 * Zero out remaining portion of packet.
+	 * tagged queuing modifier -- default is TSK_SIMPLE (0).
+	 */
+	clr_ptr = (uint32_t *)cmd_pkt + 2;
+	memset(clr_ptr, 0, REQUEST_ENTRY_SIZE - 8);
+	cmd_pkt->dseg_count = cpu_to_le16(tot_dsds);
+
+	/* No data transfer */
+	if (!scsi_bufflen(cmd) || cmd->sc_data_direction == DMA_NONE) {
+		cmd_pkt->byte_count = cpu_to_le32(0);
+		goto no_dsds;
+	}
+
+	/* Set transfer direction */
+	if (cmd->sc_data_direction == DMA_TO_DEVICE) {
+		cmd_pkt->control_flags = cpu_to_le16(CF_WRITE_DATA);
+		vha->qla_stats.output_bytes += scsi_bufflen(cmd);
+		vha->qla_stats.output_requests++;
+		sp->fcport->edif.tx_bytes += scsi_bufflen(cmd);
+	} else if (cmd->sc_data_direction == DMA_FROM_DEVICE) {
+		cmd_pkt->control_flags = cpu_to_le16(CF_READ_DATA);
+		vha->qla_stats.input_bytes += scsi_bufflen(cmd);
+		vha->qla_stats.input_requests++;
+		sp->fcport->edif.rx_bytes += scsi_bufflen(cmd);
+	}
+
+	cmd_pkt->control_flags |= cpu_to_le16(CF_EN_EDIF);
+	cmd_pkt->control_flags &= ~(cpu_to_le16(CF_NEW_SA));
+
+	/* One DSD is available in the Command Type 6 IOCB */
+	avail_dsds = 1;
+	cur_dsd = &cmd_pkt->fcp_dsd;
+
+	/* Load data segments */
+	scsi_for_each_sg(cmd, sg, tot_dsds, i) {
+		dma_addr_t      sle_dma;
+		cont_a64_entry_t *cont_pkt;
+
+		/* Allocate additional continuation packets? */
+		if (avail_dsds == 0) {
+			/*
+			 * Five DSDs are available in the Continuation
+			 * Type 1 IOCB.
+			 */
+			cont_pkt = qla2x00_prep_cont_type1_iocb(vha, req);
+			cur_dsd = cont_pkt->dsd;
+			avail_dsds = 5;
+		}
+
+		sle_dma = sg_dma_address(sg);
+		put_unaligned_le64(sle_dma, &cur_dsd->address);
+		cur_dsd->length = cpu_to_le32(sg_dma_len(sg));
+		cur_dsd++;
+		avail_dsds--;
+	}
+
+no_dsds:
+	/* Set NPORT-ID and LUN number*/
+	cmd_pkt->nport_handle = cpu_to_le16(sp->fcport->loop_id);
+	cmd_pkt->port_id[0] = sp->fcport->d_id.b.al_pa;
+	cmd_pkt->port_id[1] = sp->fcport->d_id.b.area;
+	cmd_pkt->port_id[2] = sp->fcport->d_id.b.domain;
+	cmd_pkt->vp_index = sp->vha->vp_idx;
+
+	cmd_pkt->entry_type = COMMAND_TYPE_6;
+
+	/* Set total data segment count. */
+	cmd_pkt->entry_count = (uint8_t)req_cnt;
+
+	int_to_scsilun(cmd->device->lun, &cmd_pkt->lun);
+	host_to_fcp_swap((uint8_t *)&cmd_pkt->lun, sizeof(cmd_pkt->lun));
+
+	/* build FCP_CMND IU */
+	int_to_scsilun(cmd->device->lun, &ctx->fcp_cmnd->lun);
+	ctx->fcp_cmnd->additional_cdb_len = additional_cdb_len;
+
+	if (cmd->sc_data_direction == DMA_TO_DEVICE)
+		ctx->fcp_cmnd->additional_cdb_len |= 1;
+	else if (cmd->sc_data_direction == DMA_FROM_DEVICE)
+		ctx->fcp_cmnd->additional_cdb_len |= 2;
+
+	/* Populate the FCP_PRIO. */
+	if (ha->flags.fcp_prio_enabled)
+		ctx->fcp_cmnd->task_attribute |=
+		    sp->fcport->fcp_prio << 3;
+
+	memcpy(ctx->fcp_cmnd->cdb, cmd->cmnd, cmd->cmd_len);
+
+	fcp_dl = (__be32 *)(ctx->fcp_cmnd->cdb + 16 +
+	    additional_cdb_len);
+	*fcp_dl = htonl((uint32_t)scsi_bufflen(cmd));
+
+	cmd_pkt->fcp_cmnd_dseg_len = cpu_to_le16(ctx->fcp_cmnd_len);
+	put_unaligned_le64(ctx->fcp_cmnd_dma, &cmd_pkt->fcp_cmnd_dseg_address);
+
+	sp->flags |= SRB_FCP_CMND_DMA_VALID;
+	cmd_pkt->byte_count = cpu_to_le32((uint32_t)scsi_bufflen(cmd));
+	/* Set total data segment count. */
+	cmd_pkt->entry_count = (uint8_t)req_cnt;
+	cmd_pkt->entry_status = 0;
+
+	/* Build command packet. */
+	req->current_outstanding_cmd = handle;
+	req->outstanding_cmds[handle] = sp;
+	sp->handle = handle;
+	cmd->host_scribble = (unsigned char *)(unsigned long)handle;
+	req->cnt -= req_cnt;
+
+	/* Adjust ring index. */
+	wmb();
+	req->ring_index++;
+	if (req->ring_index == req->length) {
+		req->ring_index = 0;
+		req->ring_ptr = req->ring;
+	} else {
+		req->ring_ptr++;
+	}
+
+	sp->qpair->cmd_cnt++;
+	/* Set chip new ring index. */
+	wrt_reg_dword(req->req_q_in, req->ring_index);
+
+	spin_unlock_irqrestore(lock, flags);
+
+	return QLA_SUCCESS;
+
+queuing_error_fcp_cmnd:
+	dma_pool_free(ha->fcp_cmnd_dma_pool, ctx->fcp_cmnd, ctx->fcp_cmnd_dma);
+queuing_error:
+	if (tot_dsds)
+		scsi_dma_unmap(cmd);
+
+	if (sp->u.scmd.ct6_ctx) {
+		mempool_free(sp->u.scmd.ct6_ctx, ha->ctx_mempool);
+		sp->u.scmd.ct6_ctx = NULL;
+	}
+	spin_unlock_irqrestore(lock, flags);
+
+	return QLA_FUNCTION_FAILED;
+}
+
+/**********************************************
+ * edif update/delete sa_index list functions *
+ **********************************************/
+
+/* clear the edif_indx_list for this port */
+void qla_edif_list_del(fc_port_t *fcport)
+{
+	struct edif_list_entry *indx_lst;
+	struct edif_list_entry *tindx_lst;
+	struct list_head *indx_list = &fcport->edif.edif_indx_list;
+	unsigned long flags = 0;
+
+	spin_lock_irqsave(&fcport->edif.indx_list_lock, flags);
+	list_for_each_entry_safe(indx_lst, tindx_lst, indx_list, next) {
+		list_del(&indx_lst->next);
+		kfree(indx_lst);
+	}
+	spin_unlock_irqrestore(&fcport->edif.indx_list_lock, flags);
+}
+
+/******************
+ * SADB functions *
+ ******************/
+
+/* allocate/retrieve an sa_index for a given spi */
+static uint16_t qla_edif_sadb_get_sa_index(fc_port_t *fcport,
+		struct qla_sa_update_frame *sa_frame)
+{
+	struct edif_sa_index_entry *entry;
+	struct list_head *sa_list;
+	uint16_t sa_index;
+	int dir = sa_frame->flags & SAU_FLG_TX;
+	int slot = 0;
+	int free_slot = -1;
+	scsi_qla_host_t *vha = fcport->vha;
+	struct qla_hw_data *ha = vha->hw;
+	unsigned long flags = 0;
+	uint16_t nport_handle = fcport->loop_id;
+
+	ql_dbg(ql_dbg_edif, vha, 0x3063,
+	    "%s: entry  fc_port: %p, nport_handle: 0x%x\n",
+	    __func__, fcport, nport_handle);
+
+	if (dir)
+		sa_list = &ha->sadb_tx_index_list;
+	else
+		sa_list = &ha->sadb_rx_index_list;
+
+	entry = qla_edif_sadb_find_sa_index_entry(nport_handle, sa_list);
+	if (!entry) {
+		if ((sa_frame->flags & (SAU_FLG_TX | SAU_FLG_INV)) == SAU_FLG_INV) {
+			ql_dbg(ql_dbg_edif, vha, 0x3063,
+			    "%s: rx delete request with no entry\n", __func__);
+			return RX_DELETE_NO_EDIF_SA_INDEX;
+		}
+
+		/* if there is no entry for this nport, add one */
+		entry = kzalloc((sizeof(struct edif_sa_index_entry)), GFP_ATOMIC);
+		if (!entry)
+			return INVALID_EDIF_SA_INDEX;
+
+		sa_index = qla_edif_get_sa_index_from_freepool(fcport, dir);
+		if (sa_index == INVALID_EDIF_SA_INDEX) {
+			kfree(entry);
+			return INVALID_EDIF_SA_INDEX;
+		}
+
+		INIT_LIST_HEAD(&entry->next);
+		entry->handle = nport_handle;
+		entry->fcport = fcport;
+		entry->sa_pair[0].spi = sa_frame->spi;
+		entry->sa_pair[0].sa_index = sa_index;
+		entry->sa_pair[1].spi = 0;
+		entry->sa_pair[1].sa_index = INVALID_EDIF_SA_INDEX;
+		spin_lock_irqsave(&ha->sadb_lock, flags);
+		list_add_tail(&entry->next, sa_list);
+		spin_unlock_irqrestore(&ha->sadb_lock, flags);
+		ql_dbg(ql_dbg_edif, vha, 0x3063,
+		    "%s: Created new sadb entry for nport_handle 0x%x, spi 0x%x, returning sa_index %d\n",
+		    __func__, nport_handle, sa_frame->spi, sa_index);
+
+		return sa_index;
+	}
+
+	spin_lock_irqsave(&ha->sadb_lock, flags);
+
+	/* see if we already have an entry for this spi */
+	for (slot = 0; slot < 2; slot++) {
+		if (entry->sa_pair[slot].sa_index == INVALID_EDIF_SA_INDEX) {
+			free_slot = slot;
+		} else {
+			if (entry->sa_pair[slot].spi == sa_frame->spi) {
+				spin_unlock_irqrestore(&ha->sadb_lock, flags);
+				ql_dbg(ql_dbg_edif, vha, 0x3063,
+				    "%s: sadb slot %d entry for lid 0x%x, spi 0x%x found, sa_index %d\n",
+				    __func__, slot, entry->handle, sa_frame->spi,
+				    entry->sa_pair[slot].sa_index);
+				return entry->sa_pair[slot].sa_index;
+			}
+		}
+	}
+	spin_unlock_irqrestore(&ha->sadb_lock, flags);
+
+	/* both slots are used */
+	if (free_slot == -1) {
+		ql_dbg(ql_dbg_edif, vha, 0x3063,
+		    "%s: WARNING: No free slots in sadb for nport_handle 0x%x, spi: 0x%x\n",
+		    __func__, entry->handle, sa_frame->spi);
+		ql_dbg(ql_dbg_edif, vha, 0x3063,
+		    "%s: Slot 0  spi: 0x%x  sa_index: %d,  Slot 1  spi: 0x%x  sa_index: %d\n",
+		    __func__, entry->sa_pair[0].spi, entry->sa_pair[0].sa_index,
+		    entry->sa_pair[1].spi, entry->sa_pair[1].sa_index);
+
+		return INVALID_EDIF_SA_INDEX;
+	}
+
+	/* there is at least one free slot, use it */
+	sa_index = qla_edif_get_sa_index_from_freepool(fcport, dir);
+	if (sa_index == INVALID_EDIF_SA_INDEX) {
+		ql_dbg(ql_dbg_edif, fcport->vha, 0x3063,
+		    "%s: empty freepool!!\n", __func__);
+		return INVALID_EDIF_SA_INDEX;
+	}
+
+	spin_lock_irqsave(&ha->sadb_lock, flags);
+	entry->sa_pair[free_slot].spi = sa_frame->spi;
+	entry->sa_pair[free_slot].sa_index = sa_index;
+	spin_unlock_irqrestore(&ha->sadb_lock, flags);
+	ql_dbg(ql_dbg_edif, fcport->vha, 0x3063,
+	    "%s: sadb slot %d entry for nport_handle 0x%x, spi 0x%x added, returning sa_index %d\n",
+	    __func__, free_slot, entry->handle, sa_frame->spi, sa_index);
+
+	return sa_index;
+}
+
+/* release any sadb entries -- only done at teardown */
+void qla_edif_sadb_release(struct qla_hw_data *ha)
+{
+	struct list_head *pos;
+	struct list_head *tmp;
+	struct edif_sa_index_entry *entry;
+
+	list_for_each_safe(pos, tmp, &ha->sadb_rx_index_list) {
+		entry = list_entry(pos, struct edif_sa_index_entry, next);
+		list_del(&entry->next);
+		kfree(entry);
+	}
+
+	list_for_each_safe(pos, tmp, &ha->sadb_tx_index_list) {
+		entry = list_entry(pos, struct edif_sa_index_entry, next);
+		list_del(&entry->next);
+		kfree(entry);
+	}
+}
+
+/**************************
+ * sadb freepool functions
+ **************************/
+
+/* build the rx and tx sa_index free pools -- only done at fcport init */
+int qla_edif_sadb_build_free_pool(struct qla_hw_data *ha)
+{
+	ha->edif_tx_sa_id_map =
+	    kcalloc(BITS_TO_LONGS(EDIF_NUM_SA_INDEX), sizeof(long), GFP_KERNEL);
+
+	if (!ha->edif_tx_sa_id_map) {
+		ql_log_pci(ql_log_fatal, ha->pdev, 0x0009,
+		    "Unable to allocate memory for sadb tx.\n");
+		return -ENOMEM;
+	}
+
+	ha->edif_rx_sa_id_map =
+	    kcalloc(BITS_TO_LONGS(EDIF_NUM_SA_INDEX), sizeof(long), GFP_KERNEL);
+	if (!ha->edif_rx_sa_id_map) {
+		kfree(ha->edif_tx_sa_id_map);
+		ha->edif_tx_sa_id_map = NULL;
+		ql_log_pci(ql_log_fatal, ha->pdev, 0x0009,
+		    "Unable to allocate memory for sadb rx.\n");
+		return -ENOMEM;
+	}
+	return 0;
+}
+
+/* release the free pool - only done during fcport teardown */
+void qla_edif_sadb_release_free_pool(struct qla_hw_data *ha)
+{
+	kfree(ha->edif_tx_sa_id_map);
+	ha->edif_tx_sa_id_map = NULL;
+	kfree(ha->edif_rx_sa_id_map);
+	ha->edif_rx_sa_id_map = NULL;
+}
+
+static void __chk_edif_rx_sa_delete_pending(scsi_qla_host_t *vha,
+		fc_port_t *fcport, uint32_t handle, uint16_t sa_index)
+{
+	struct edif_list_entry *edif_entry;
+	struct edif_sa_ctl *sa_ctl;
+	uint16_t delete_sa_index = INVALID_EDIF_SA_INDEX;
+	unsigned long flags = 0;
+	uint16_t nport_handle = fcport->loop_id;
+	uint16_t cached_nport_handle;
+
+	spin_lock_irqsave(&fcport->edif.indx_list_lock, flags);
+	edif_entry = qla_edif_list_find_sa_index(fcport, nport_handle);
+	if (!edif_entry) {
+		spin_unlock_irqrestore(&fcport->edif.indx_list_lock, flags);
+		return;		/* no pending delete for this handle */
+	}
+
+	/*
+	 * check for no pending delete for this index or iocb does not
+	 * match rx sa_index
+	 */
+	if (edif_entry->delete_sa_index == INVALID_EDIF_SA_INDEX ||
+	    edif_entry->update_sa_index != sa_index) {
+		spin_unlock_irqrestore(&fcport->edif.indx_list_lock, flags);
+		return;
+	}
+
+	/*
+	 * wait until we have seen at least EDIF_DELAY_COUNT transfers before
+	 * queueing RX delete
+	 */
+	if (edif_entry->count++ < EDIF_RX_DELETE_FILTER_COUNT) {
+		spin_unlock_irqrestore(&fcport->edif.indx_list_lock, flags);
+		return;
+	}
+
+	ql_dbg(ql_dbg_edif, vha, 0x5033,
+	    "%s: invalidating delete_sa_index,  update_sa_index: 0x%x sa_index: 0x%x, delete_sa_index: 0x%x\n",
+	    __func__, edif_entry->update_sa_index, sa_index, edif_entry->delete_sa_index);
+
+	delete_sa_index = edif_entry->delete_sa_index;
+	edif_entry->delete_sa_index = INVALID_EDIF_SA_INDEX;
+	cached_nport_handle = edif_entry->handle;
+	spin_unlock_irqrestore(&fcport->edif.indx_list_lock, flags);
+
+	/* sanity check on the nport handle */
+	if (nport_handle != cached_nport_handle) {
+		ql_dbg(ql_dbg_edif, vha, 0x3063,
+		    "%s: POST SA DELETE nport_handle mismatch: lid: 0x%x, edif_entry nph: 0x%x\n",
+		    __func__, nport_handle, cached_nport_handle);
+	}
+
+	/* find the sa_ctl for the delete and schedule the delete */
+	sa_ctl = qla_edif_find_sa_ctl_by_index(fcport, delete_sa_index, 0);
+	if (sa_ctl) {
+		ql_dbg(ql_dbg_edif, vha, 0x3063,
+		    "%s: POST SA DELETE sa_ctl: %p, index recvd %d\n",
+		    __func__, sa_ctl, sa_index);
+		ql_dbg(ql_dbg_edif, vha, 0x3063,
+		    "delete index %d, update index: %d, nport handle: 0x%x, handle: 0x%x\n",
+		    delete_sa_index,
+		    edif_entry->update_sa_index, nport_handle, handle);
+
+		sa_ctl->flags = EDIF_SA_CTL_FLG_DEL;
+		set_bit(EDIF_SA_CTL_REPL, &sa_ctl->state);
+		qla_post_sa_replace_work(fcport->vha, fcport,
+		    nport_handle, sa_ctl);
+	} else {
+		ql_dbg(ql_dbg_edif, vha, 0x3063,
+		    "%s: POST SA DELETE sa_ctl not found for delete_sa_index: %d\n",
+		    __func__, delete_sa_index);
+	}
+}
+
+void qla_chk_edif_rx_sa_delete_pending(scsi_qla_host_t *vha,
+		srb_t *sp, struct sts_entry_24xx *sts24)
+{
+	fc_port_t *fcport = sp->fcport;
+	/* sa_index used by this iocb */
+	struct scsi_cmnd *cmd = GET_CMD_SP(sp);
+	uint32_t handle;
+
+	handle = (uint32_t)LSW(sts24->handle);
+
+	/* find out if this status iosb is for a scsi read */
+	if (cmd->sc_data_direction != DMA_FROM_DEVICE)
+		return;
+
+	return __chk_edif_rx_sa_delete_pending(vha, fcport, handle,
+	   le16_to_cpu(sts24->edif_sa_index));
+}
+
+void qlt_chk_edif_rx_sa_delete_pending(scsi_qla_host_t *vha, fc_port_t *fcport,
+		struct ctio7_from_24xx *pkt)
+{
+	__chk_edif_rx_sa_delete_pending(vha, fcport,
+	    pkt->handle, le16_to_cpu(pkt->edif_sa_index));
+}
+
+static void qla_parse_auth_els_ctl(struct srb *sp)
+{
+	struct qla_els_pt_arg *a = &sp->u.bsg_cmd.u.els_arg;
+	struct bsg_job *bsg_job = sp->u.bsg_cmd.bsg_job;
+	struct fc_bsg_request *request = bsg_job->request;
+	struct qla_bsg_auth_els_request *p =
+	    (struct qla_bsg_auth_els_request *)bsg_job->request;
+
+	a->tx_len = a->tx_byte_count = sp->remap.req.len;
+	a->tx_addr = sp->remap.req.dma;
+	a->rx_len = a->rx_byte_count = sp->remap.rsp.len;
+	a->rx_addr = sp->remap.rsp.dma;
+
+	if (p->e.sub_cmd == SEND_ELS_REPLY) {
+		a->control_flags = p->e.extra_control_flags << 13;
+		a->rx_xchg_address = cpu_to_le32(p->e.extra_rx_xchg_address);
+		if (p->e.extra_control_flags == BSG_CTL_FLAG_LS_ACC)
+			a->els_opcode = ELS_LS_ACC;
+		else if (p->e.extra_control_flags == BSG_CTL_FLAG_LS_RJT)
+			a->els_opcode = ELS_LS_RJT;
+	}
+	a->did = sp->fcport->d_id;
+	a->els_opcode =  request->rqst_data.h_els.command_code;
+	a->nport_handle = cpu_to_le16(sp->fcport->loop_id);
+	a->vp_idx = sp->vha->vp_idx;
+}
+
+int qla_edif_process_els(scsi_qla_host_t *vha, struct bsg_job *bsg_job)
+{
+	struct fc_bsg_request *bsg_request = bsg_job->request;
+	struct fc_bsg_reply *bsg_reply = bsg_job->reply;
+	fc_port_t *fcport = NULL;
+	struct qla_hw_data *ha = vha->hw;
+	srb_t *sp;
+	int rval =  (DID_ERROR << 16);
+	port_id_t d_id;
+	struct qla_bsg_auth_els_request *p =
+	    (struct qla_bsg_auth_els_request *)bsg_job->request;
+
+	d_id.b.al_pa = bsg_request->rqst_data.h_els.port_id[2];
+	d_id.b.area = bsg_request->rqst_data.h_els.port_id[1];
+	d_id.b.domain = bsg_request->rqst_data.h_els.port_id[0];
+
+	/* find matching d_id in fcport list */
+	fcport = qla2x00_find_fcport_by_pid(vha, &d_id);
+	if (!fcport) {
+		ql_dbg(ql_dbg_edif, vha, 0x911a,
+		    "%s fcport not find online portid=%06x.\n",
+		    __func__, d_id.b24);
+		SET_DID_STATUS(bsg_reply->result, DID_ERROR);
+		return -EIO;
+	}
+
+	if (qla_bsg_check(vha, bsg_job, fcport))
+		return 0;
+
+	if (fcport->loop_id == FC_NO_LOOP_ID) {
+		ql_dbg(ql_dbg_edif, vha, 0x910d,
+		    "%s ELS code %x, no loop id.\n", __func__,
+		    bsg_request->rqst_data.r_els.els_code);
+		SET_DID_STATUS(bsg_reply->result, DID_BAD_TARGET);
+		return -ENXIO;
+	}
+
+	if (!vha->flags.online) {
+		ql_log(ql_log_warn, vha, 0x7005, "Host not online.\n");
+		SET_DID_STATUS(bsg_reply->result, DID_BAD_TARGET);
+		rval = -EIO;
+		goto done;
+	}
+
+	/* pass through is supported only for ISP 4Gb or higher */
+	if (!IS_FWI2_CAPABLE(ha)) {
+		ql_dbg(ql_dbg_user, vha, 0x7001,
+		    "ELS passthru not supported for ISP23xx based adapters.\n");
+		SET_DID_STATUS(bsg_reply->result, DID_BAD_TARGET);
+		rval = -EPERM;
+		goto done;
+	}
+
+	sp = qla2x00_get_sp(vha, fcport, GFP_KERNEL);
+	if (!sp) {
+		ql_dbg(ql_dbg_user, vha, 0x7004,
+		    "Failed get sp pid=%06x\n", fcport->d_id.b24);
+		rval = -ENOMEM;
+		SET_DID_STATUS(bsg_reply->result, DID_IMM_RETRY);
+		goto done;
+	}
+
+	sp->remap.req.len = bsg_job->request_payload.payload_len;
+	sp->remap.req.buf = dma_pool_alloc(ha->purex_dma_pool,
+	    GFP_KERNEL, &sp->remap.req.dma);
+	if (!sp->remap.req.buf) {
+		ql_dbg(ql_dbg_user, vha, 0x7005,
+		    "Failed allocate request dma len=%x\n",
+		    bsg_job->request_payload.payload_len);
+		rval = -ENOMEM;
+		SET_DID_STATUS(bsg_reply->result, DID_IMM_RETRY);
+		goto done_free_sp;
+	}
+
+	sp->remap.rsp.len = bsg_job->reply_payload.payload_len;
+	sp->remap.rsp.buf = dma_pool_alloc(ha->purex_dma_pool,
+	    GFP_KERNEL, &sp->remap.rsp.dma);
+	if (!sp->remap.rsp.buf) {
+		ql_dbg(ql_dbg_user, vha, 0x7006,
+		    "Failed allocate response dma len=%x\n",
+		    bsg_job->reply_payload.payload_len);
+		rval = -ENOMEM;
+		SET_DID_STATUS(bsg_reply->result, DID_IMM_RETRY);
+		goto done_free_remap_req;
+	}
+	sg_copy_to_buffer(bsg_job->request_payload.sg_list,
+	    bsg_job->request_payload.sg_cnt, sp->remap.req.buf,
+	    sp->remap.req.len);
+	sp->remap.remapped = true;
+
+	sp->type = SRB_ELS_CMD_HST_NOLOGIN;
+	sp->name = "SPCN_BSG_HST_NOLOGIN";
+	sp->u.bsg_cmd.bsg_job = bsg_job;
+	qla_parse_auth_els_ctl(sp);
+
+	sp->free = qla2x00_bsg_sp_free;
+	sp->done = qla2x00_bsg_job_done;
+
+	rval = qla2x00_start_sp(sp);
+
+	ql_dbg(ql_dbg_edif, vha, 0x700a,
+	    "%s %s %8phN xchg %x ctlflag %x hdl %x reqlen %xh bsg ptr %p\n",
+	    __func__, sc_to_str(p->e.sub_cmd), fcport->port_name,
+	    p->e.extra_rx_xchg_address, p->e.extra_control_flags,
+	    sp->handle, sp->remap.req.len, bsg_job);
+
+	if (rval != QLA_SUCCESS) {
+		ql_log(ql_log_warn, vha, 0x700e,
+		    "qla2x00_start_sp failed = %d\n", rval);
+		SET_DID_STATUS(bsg_reply->result, DID_IMM_RETRY);
+		rval = -EIO;
+		goto done_free_remap_rsp;
+	}
+	return rval;
+
+done_free_remap_rsp:
+	dma_pool_free(ha->purex_dma_pool, sp->remap.rsp.buf,
+	    sp->remap.rsp.dma);
+done_free_remap_req:
+	dma_pool_free(ha->purex_dma_pool, sp->remap.req.buf,
+	    sp->remap.req.dma);
+done_free_sp:
+	qla2x00_rel_sp(sp);
+
+done:
+	return rval;
+}
+
+void qla_edif_sess_down(struct scsi_qla_host *vha, struct fc_port *sess)
+{
+	if (sess->edif.app_sess_online && vha->e_dbell.db_flags & EDB_ACTIVE) {
+		ql_dbg(ql_dbg_disc, vha, 0xf09c,
+			"%s: sess %8phN send port_offline event\n",
+			__func__, sess->port_name);
+		sess->edif.app_sess_online = 0;
+		qla_edb_eventcreate(vha, VND_CMD_AUTH_STATE_SESSION_SHUTDOWN,
+		    sess->d_id.b24, 0, sess);
+		qla2x00_post_aen_work(vha, FCH_EVT_PORT_OFFLINE, sess->d_id.b24);
+	}
+}
diff --git a/drivers/scsi/qla2xxx/qla_edif.h b/drivers/scsi/qla2xxx/qla_edif.h
new file mode 100644
index 000000000000..9e8f28d0caa1
--- /dev/null
+++ b/drivers/scsi/qla2xxx/qla_edif.h
@@ -0,0 +1,136 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
+/*
+ * Marvell Fibre Channel HBA Driver
+ * Copyright (c)  2021    Marvell
+ */
+#ifndef __QLA_EDIF_H
+#define __QLA_EDIF_H
+
+struct qla_scsi_host;
+#define EDIF_APP_ID 0x73730001
+
+#define EDIF_MAX_INDEX	2048
+struct edif_sa_ctl {
+	struct list_head next;
+	uint16_t	del_index;
+	uint16_t	index;
+	uint16_t	slot;
+	uint16_t	flags;
+#define	EDIF_SA_CTL_FLG_REPL		BIT_0
+#define	EDIF_SA_CTL_FLG_DEL		BIT_1
+#define EDIF_SA_CTL_FLG_CLEANUP_DEL BIT_4
+	// Invalidate Index bit and mirrors QLA_SA_UPDATE_FLAGS_DELETE
+	unsigned long   state;
+#define EDIF_SA_CTL_USED	1	/* Active Sa update  */
+#define EDIF_SA_CTL_PEND	2	/* Waiting for slot */
+#define EDIF_SA_CTL_REPL	3	/* Active Replace and Delete */
+#define EDIF_SA_CTL_DEL		4	/* Delete Pending */
+	struct fc_port	*fcport;
+	struct bsg_job *bsg_job;
+	struct qla_sa_update_frame sa_frame;
+};
+
+enum enode_flags_t {
+	ENODE_ACTIVE = 0x1,
+};
+
+struct pur_core {
+	enum enode_flags_t	enode_flags;
+	spinlock_t		pur_lock;
+	struct  list_head	head;
+};
+
+enum db_flags_t {
+	EDB_ACTIVE = 0x1,
+};
+
+struct edif_dbell {
+	enum db_flags_t		db_flags;
+	spinlock_t		db_lock;
+	struct  list_head	head;
+	struct	completion	dbell;
+};
+
+#define SA_UPDATE_IOCB_TYPE            0x71    /* Security Association Update IOCB entry */
+struct sa_update_28xx {
+	uint8_t entry_type;             /* Entry type. */
+	uint8_t entry_count;            /* Entry count. */
+	uint8_t sys_define;             /* System Defined. */
+	uint8_t entry_status;           /* Entry Status. */
+
+	uint32_t handle;                /* IOCB System handle. */
+
+	union {
+		__le16 nport_handle;  /* in: N_PORT handle. */
+		__le16 comp_sts;              /* out: completion status */
+#define CS_PORT_EDIF_UNAVAIL	0x28
+#define CS_PORT_EDIF_LOGOUT	0x29
+#define CS_PORT_EDIF_SUPP_NOT_RDY 0x64
+#define CS_PORT_EDIF_INV_REQ      0x66
+	} u;
+	uint8_t vp_index;
+	uint8_t reserved_1;
+	uint8_t port_id[3];
+	uint8_t flags;
+#define SA_FLAG_INVALIDATE BIT_0
+#define SA_FLAG_TX	   BIT_1 // 1=tx, 0=rx
+
+	uint8_t sa_key[32];     /* 256 bit key */
+	__le32 salt;
+	__le32 spi;
+	uint8_t sa_control;
+#define SA_CNTL_ENC_FCSP        (1 << 3)
+#define SA_CNTL_ENC_OPD         (2 << 3)
+#define SA_CNTL_ENC_MSK         (3 << 3)  // mask bits 4,3
+#define SA_CNTL_AES_GMAC	(1 << 2)
+#define SA_CNTL_KEY256          (2 << 0)
+#define SA_CNTL_KEY128          0
+
+	uint8_t reserved_2;
+	__le16 sa_index;   // reserve: bit 11-15
+	__le16 old_sa_info;
+	__le16 new_sa_info;
+};
+
+#define        NUM_ENTRIES     256
+#define        MAX_PAYLOAD     1024
+#define        PUR_GET         1
+
+struct dinfo {
+	int		nodecnt;
+	int		lstate;
+};
+
+struct pur_ninfo {
+	unsigned int	pur_pend:1;
+	port_id_t       pur_sid;
+	port_id_t	pur_did;
+	uint8_t		vp_idx;
+	short           pur_bytes_rcvd;
+	unsigned short  pur_nphdl;
+	unsigned int    pur_rx_xchg_address;
+};
+
+struct purexevent {
+	struct  pur_ninfo	pur_info;
+	unsigned char		*msgp;
+	u32			msgp_len;
+};
+
+#define	N_UNDEF		0
+#define	N_PUREX		1
+struct enode {
+	struct list_head	list;
+	struct dinfo		dinfo;
+	uint32_t		ntype;
+	union {
+		struct purexevent	purexinfo;
+	} u;
+};
+
+#define EDIF_SESSION_DOWN(_s) \
+	(qla_ini_mode_enabled(_s->vha) && (_s->disc_state == DSC_DELETE_PEND || \
+	 _s->disc_state == DSC_DELETED || \
+	 !_s->edif.app_sess_online))
+
+#endif	/* __QLA_EDIF_H */
diff --git a/drivers/scsi/qla2xxx/qla_edif_bsg.h b/drivers/scsi/qla2xxx/qla_edif_bsg.h
new file mode 100644
index 000000000000..58b718d35d19
--- /dev/null
+++ b/drivers/scsi/qla2xxx/qla_edif_bsg.h
@@ -0,0 +1,220 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
+/*
+ * Marvell Fibre Channel HBA Driver
+ * Copyright (C)  2018-	    Marvell
+ *
+ */
+#ifndef __QLA_EDIF_BSG_H
+#define __QLA_EDIF_BSG_H
+
+/* BSG Vendor specific commands */
+#define	ELS_MAX_PAYLOAD		1024
+#ifndef	WWN_SIZE
+#define WWN_SIZE		8
+#endif
+#define	VND_CMD_APP_RESERVED_SIZE	32
+
+enum auth_els_sub_cmd {
+	SEND_ELS = 0,
+	SEND_ELS_REPLY,
+	PULL_ELS,
+};
+
+struct extra_auth_els {
+	enum auth_els_sub_cmd sub_cmd;
+	uint32_t        extra_rx_xchg_address;
+	uint8_t         extra_control_flags;
+#define BSG_CTL_FLAG_INIT       0
+#define BSG_CTL_FLAG_LS_ACC     1
+#define BSG_CTL_FLAG_LS_RJT     2
+#define BSG_CTL_FLAG_TRM        3
+	uint8_t         extra_rsvd[3];
+} __packed;
+
+struct qla_bsg_auth_els_request {
+	struct fc_bsg_request r;
+	struct extra_auth_els e;
+};
+
+struct qla_bsg_auth_els_reply {
+	struct fc_bsg_reply r;
+	uint32_t rx_xchg_address;
+};
+
+struct app_id {
+	int		app_vid;
+	uint8_t		app_key[32];
+} __packed;
+
+struct app_start_reply {
+	uint32_t	host_support_edif;
+	uint32_t	edif_enode_active;
+	uint32_t	edif_edb_active;
+	uint32_t	reserved[VND_CMD_APP_RESERVED_SIZE];
+} __packed;
+
+struct app_start {
+	struct app_id	app_info;
+	uint32_t	prli_to;
+	uint32_t	key_shred;
+	uint8_t         app_start_flags;
+	uint8_t         reserved[VND_CMD_APP_RESERVED_SIZE - 1];
+} __packed;
+
+struct app_stop {
+	struct app_id	app_info;
+	char		buf[16];
+} __packed;
+
+struct app_plogi_reply {
+	uint32_t	prli_status;
+	uint8_t		reserved[VND_CMD_APP_RESERVED_SIZE];
+} __packed;
+
+#define	RECFG_TIME	1
+#define	RECFG_BYTES	2
+
+struct app_rekey_cfg {
+	struct app_id app_info;
+	uint8_t	 rekey_mode;
+	port_id_t d_id;
+	uint8_t	 force;
+	union {
+		int64_t bytes;
+		int64_t time;
+	} rky_units;
+
+	uint8_t		reserved[VND_CMD_APP_RESERVED_SIZE];
+} __packed;
+
+struct app_pinfo_req {
+	struct app_id app_info;
+	uint8_t	 num_ports;
+	port_id_t remote_pid;
+	uint8_t	 reserved[VND_CMD_APP_RESERVED_SIZE];
+} __packed;
+
+struct app_pinfo {
+	port_id_t remote_pid;
+	uint8_t	remote_wwpn[WWN_SIZE];
+	uint8_t	remote_type;
+#define	VND_CMD_RTYPE_UNKNOWN		0
+#define	VND_CMD_RTYPE_TARGET		1
+#define	VND_CMD_RTYPE_INITIATOR		2
+	uint8_t	remote_state;
+	uint8_t	auth_state;
+	uint8_t	rekey_mode;
+	int64_t	rekey_count;
+	int64_t	rekey_config_value;
+	int64_t	rekey_consumed_value;
+
+	uint8_t	reserved[VND_CMD_APP_RESERVED_SIZE];
+} __packed;
+
+/* AUTH States */
+#define	VND_CMD_AUTH_STATE_UNDEF	0
+#define	VND_CMD_AUTH_STATE_SESSION_SHUTDOWN	1
+#define	VND_CMD_AUTH_STATE_NEEDED	2
+#define	VND_CMD_AUTH_STATE_ELS_RCVD	3
+#define	VND_CMD_AUTH_STATE_SAUPDATE_COMPL 4
+
+struct app_pinfo_reply {
+	uint8_t		port_count;
+	uint8_t		reserved[VND_CMD_APP_RESERVED_SIZE];
+	struct app_pinfo ports[0];
+} __packed;
+
+struct app_sinfo_req {
+	struct app_id	app_info;
+	uint8_t		num_ports;
+	uint8_t		reserved[VND_CMD_APP_RESERVED_SIZE];
+} __packed;
+
+struct app_sinfo {
+	uint8_t	remote_wwpn[WWN_SIZE];
+	int64_t	rekey_count;
+	uint8_t	rekey_mode;
+	int64_t	tx_bytes;
+	int64_t	rx_bytes;
+} __packed;
+
+struct app_stats_reply {
+	uint8_t		elem_count;
+	struct app_sinfo elem[0];
+} __packed;
+
+struct qla_sa_update_frame {
+	struct app_id	app_info;
+	uint16_t	flags;
+#define SAU_FLG_INV		0x01	/* delete key */
+#define SAU_FLG_TX		0x02	/* 1=tx, 0 = rx */
+#define SAU_FLG_FORCE_DELETE	0x08
+#define SAU_FLG_GMAC_MODE	0x20	/*
+					 * GMAC mode is cleartext for the IO
+					 * (i.e. NULL encryption)
+					 */
+#define SAU_FLG_KEY128          0x40
+#define SAU_FLG_KEY256          0x80
+	uint16_t        fast_sa_index:10,
+			reserved:6;
+	uint32_t	salt;
+	uint32_t	spi;
+	uint8_t		sa_key[32];
+	uint8_t		node_name[WWN_SIZE];
+	uint8_t		port_name[WWN_SIZE];
+	port_id_t	port_id;
+} __packed;
+
+// used for edif mgmt bsg interface
+#define	QL_VND_SC_UNDEF		0
+#define	QL_VND_SC_SA_UPDATE	1
+#define	QL_VND_SC_APP_START	2
+#define	QL_VND_SC_APP_STOP	3
+#define	QL_VND_SC_AUTH_OK	4
+#define	QL_VND_SC_AUTH_FAIL	5
+#define	QL_VND_SC_REKEY_CONFIG	6
+#define	QL_VND_SC_GET_FCINFO	7
+#define	QL_VND_SC_GET_STATS	8
+
+/* Application interface data structure for rtn data */
+#define	EXT_DEF_EVENT_DATA_SIZE	64
+struct edif_app_dbell {
+	uint32_t	event_code;
+	uint32_t	event_data_size;
+	union  {
+		port_id_t	port_id;
+		uint8_t		event_data[EXT_DEF_EVENT_DATA_SIZE];
+	};
+} __packed;
+
+struct edif_sa_update_aen {
+	port_id_t port_id;
+	uint32_t key_type;	/* Tx (1) or RX (2) */
+	uint32_t status;	/* 0 succes,  1 failed, 2 timeout , 3 error */
+	uint8_t		reserved[16];
+} __packed;
+
+#define	QL_VND_SA_STAT_SUCCESS	0
+#define	QL_VND_SA_STAT_FAILED	1
+#define	QL_VND_SA_STAT_TIMEOUT	2
+#define	QL_VND_SA_STAT_ERROR	3
+
+#define	QL_VND_RX_SA_KEY	1
+#define	QL_VND_TX_SA_KEY	2
+
+/* App defines for plogi auth'd ok and plogi auth bad requests */
+struct auth_complete_cmd {
+	struct app_id app_info;
+#define PL_TYPE_WWPN    1
+#define PL_TYPE_DID     2
+	uint32_t    type;
+	union {
+		uint8_t  wwpn[WWN_SIZE];
+		port_id_t d_id;
+	} u;
+	uint32_t reserved[VND_CMD_APP_RESERVED_SIZE];
+} __packed;
+
+#define RX_DELAY_DELETE_TIMEOUT 20
+
+#endif	/* QLA_EDIF_BSG_H */
diff --git a/drivers/scsi/qla2xxx/qla_fw.h b/drivers/scsi/qla2xxx/qla_fw.h
index 49df418030e4..073d06e88c58 100644
--- a/drivers/scsi/qla2xxx/qla_fw.h
+++ b/drivers/scsi/qla2xxx/qla_fw.h
@@ -82,10 +82,11 @@ struct port_database_24xx {
 	uint8_t port_name[WWN_SIZE];
 	uint8_t node_name[WWN_SIZE];
 
-	uint8_t reserved_3[4];
+	uint8_t reserved_3[2];
+	uint16_t nvme_first_burst_size;
 	uint16_t prli_nvme_svc_param_word_0;	/* Bits 15-0 of word 0 */
 	uint16_t prli_nvme_svc_param_word_3;	/* Bits 15-0 of word 3 */
-	uint16_t nvme_first_burst_size;
+	uint8_t secure_login;
 	uint8_t reserved_4[14];
 };
 
@@ -489,6 +490,9 @@ struct cmd_type_6 {
 	struct scsi_lun lun;		/* FCP LUN (BE). */
 
 	__le16	control_flags;		/* Control flags. */
+#define CF_NEW_SA			BIT_12
+#define CF_EN_EDIF			BIT_9
+#define CF_ADDITIONAL_PARAM_BLK		BIT_8
 #define CF_DIF_SEG_DESCR_ENABLE		BIT_3
 #define CF_DATA_SEG_DESCR_ENABLE	BIT_2
 #define CF_READ_DATA			BIT_1
@@ -611,6 +615,7 @@ struct sts_entry_24xx {
 	union {
 		__le16 reserved_1;
 		__le16	nvme_rsp_pyld_len;
+		__le16 edif_sa_index;	 /* edif sa_index used for initiator read data */
 	};
 
 	__le16	state_flags;		/* State flags. */
@@ -805,6 +810,7 @@ struct els_entry_24xx {
 #define EPD_RX_XCHG		(3 << 13)
 #define ECF_CLR_PASSTHRU_PEND	BIT_12
 #define ECF_INCL_FRAME_HDR	BIT_11
+#define ECF_SEC_LOGIN		BIT_3
 
 	union {
 		struct {
@@ -896,6 +902,7 @@ struct logio_entry_24xx {
 #define LCF_FCP2_OVERRIDE	BIT_9	/* Set/Reset word 3 of PRLI. */
 #define LCF_CLASS_2		BIT_8	/* Enable class 2 during PLOGI. */
 #define LCF_FREE_NPORT		BIT_7	/* Release NPORT handle after LOGO. */
+#define LCF_COMMON_FEAT		BIT_7	/* PLOGI - Set Common Features Field */
 #define LCF_EXPL_LOGO		BIT_6	/* Perform an explicit LOGO. */
 #define LCF_NVME_PRLI		BIT_6   /* Perform NVME FC4 PRLI */
 #define LCF_SKIP_PRLI		BIT_5	/* Skip PRLI after PLOGI. */
@@ -920,6 +927,8 @@ struct logio_entry_24xx {
 	uint8_t rsp_size;		/* Response size in 32bit words. */
 
 	__le32	io_parameter[11];	/* General I/O parameters. */
+#define LIO_COMM_FEAT_FCSP	BIT_21
+#define LIO_COMM_FEAT_CIO	BIT_31
 #define LSC_SCODE_NOLINK	0x01
 #define LSC_SCODE_NOIOCB	0x02
 #define LSC_SCODE_NOXCB		0x03
diff --git a/drivers/scsi/qla2xxx/qla_gbl.h b/drivers/scsi/qla2xxx/qla_gbl.h
index 2f867da822ae..1c3f055d41b8 100644
--- a/drivers/scsi/qla2xxx/qla_gbl.h
+++ b/drivers/scsi/qla2xxx/qla_gbl.h
@@ -12,6 +12,7 @@
  * Global Function Prototypes in qla_init.c source file.
  */
 extern int qla2x00_initialize_adapter(scsi_qla_host_t *);
+extern int qla24xx_post_prli_work(struct scsi_qla_host *vha, fc_port_t *fcport);
 
 extern int qla2100_pci_config(struct scsi_qla_host *);
 extern int qla2300_pci_config(struct scsi_qla_host *);
@@ -130,6 +131,18 @@ void qla24xx_free_purex_item(struct purex_item *item);
 extern bool qla24xx_risc_firmware_invalid(uint32_t *);
 void qla_init_iocb_limit(scsi_qla_host_t *);
 
+void qla_edif_list_del(fc_port_t *fcport);
+void qla_edif_sadb_release(struct qla_hw_data *ha);
+int qla_edif_sadb_build_free_pool(struct qla_hw_data *ha);
+void qla_edif_sadb_release_free_pool(struct qla_hw_data *ha);
+void qla_chk_edif_rx_sa_delete_pending(scsi_qla_host_t *vha,
+		srb_t *sp, struct sts_entry_24xx *sts24);
+void qlt_chk_edif_rx_sa_delete_pending(scsi_qla_host_t *vha, fc_port_t *fcport,
+		struct ctio7_from_24xx *ctio);
+void qla2x00_release_all_sadb(struct scsi_qla_host *vha, struct fc_port *fcport);
+int qla_edif_process_els(scsi_qla_host_t *vha, struct bsg_job *bsgjob);
+void qla_edif_sess_down(struct scsi_qla_host *vha, struct fc_port *sess);
+const char *sc_to_str(uint16_t cmd);
 
 /*
  * Global Data in qla_os.c source file.
@@ -175,6 +188,7 @@ extern int ql2xenablemsix;
 extern int qla2xuseresexchforels;
 extern int ql2xdifbundlinginternalbuffers;
 extern int ql2xfulldump_on_mpifail;
+extern int ql2xsecenable;
 extern int ql2xenforce_iocb_limit;
 extern int ql2xabts_wait_nvme;
 
@@ -236,6 +250,8 @@ void qla24xx_process_purex_rdp(struct scsi_qla_host *vha,
 			       struct purex_item *pkt);
 void qla_pci_set_eeh_busy(struct scsi_qla_host *);
 void qla_schedule_eeh_work(struct scsi_qla_host *);
+struct edif_sa_ctl *qla_edif_find_sa_ctl_by_index(fc_port_t *fcport,
+						  int index, int dir);
 
 /*
  * Global Functions in qla_mid.c source file.
@@ -280,7 +296,10 @@ extern int  qla2x00_vp_abort_isp(scsi_qla_host_t *);
 /*
  * Global Function Prototypes in qla_iocb.c source file.
  */
-
+void qla_els_pt_iocb(struct scsi_qla_host *vha,
+	struct els_entry_24xx *pkt, struct qla_els_pt_arg *a);
+cont_a64_entry_t *qla2x00_prep_cont_type1_iocb(scsi_qla_host_t *vha,
+		struct req_que *que);
 extern uint16_t qla2x00_calc_iocbs_32(uint16_t);
 extern uint16_t qla2x00_calc_iocbs_64(uint16_t);
 extern void qla2x00_build_scsi_iocbs_32(srb_t *, cmd_entry_t *, uint16_t);
@@ -310,6 +329,8 @@ extern int qla24xx_walk_and_build_prot_sglist(struct qla_hw_data *, srb_t *,
 	struct dsd64 *, uint16_t, struct qla_tgt_cmd *);
 extern int qla24xx_get_one_block_sg(uint32_t, struct qla2_sgx *, uint32_t *);
 extern int qla24xx_configure_prot_mode(srb_t *, uint16_t *);
+extern int qla24xx_issue_sa_replace_iocb(scsi_qla_host_t *vha,
+	struct qla_work_evt *e);
 
 /*
  * Global Function Prototypes in qla_mbx.c source file.
@@ -578,6 +599,7 @@ qla2xxx_msix_rsp_q_hs(int irq, void *dev_id);
 fc_port_t *qla2x00_find_fcport_by_loopid(scsi_qla_host_t *, uint16_t);
 fc_port_t *qla2x00_find_fcport_by_wwpn(scsi_qla_host_t *, u8 *, u8);
 fc_port_t *qla2x00_find_fcport_by_nportid(scsi_qla_host_t *, port_id_t *, u8);
+void __qla_consume_iocb(struct scsi_qla_host *vha, void **pkt, struct rsp_que **rsp);
 
 /*
  * Global Function Prototypes in qla_sup.c source file.
@@ -640,6 +662,8 @@ extern int qla2xxx_get_vpd_field(scsi_qla_host_t *, char *, char *, size_t);
 
 extern void qla2xxx_flash_npiv_conf(scsi_qla_host_t *);
 extern int qla24xx_read_fcp_prio_cfg(scsi_qla_host_t *);
+int __qla_copy_purex_to_buffer(struct scsi_qla_host *vha, void **pkt,
+	struct rsp_que **rsp, u8 *buf, u32 buf_len);
 
 /*
  * Global Function Prototypes in qla_dbg.c source file.
@@ -879,6 +903,9 @@ extern int qla2x00_issue_iocb_timeout(scsi_qla_host_t *, void *,
 	dma_addr_t, size_t, uint32_t);
 extern int qla2x00_get_idma_speed(scsi_qla_host_t *, uint16_t,
 	uint16_t *, uint16_t *);
+extern int qla24xx_sadb_update(struct bsg_job *bsg_job);
+extern int qla_post_sa_replace_work(struct scsi_qla_host *vha,
+	 fc_port_t *fcport, uint16_t nport_handle, struct edif_sa_ctl *sa_ctl);
 
 /* 83xx related functions */
 void qla83xx_fw_dump(scsi_qla_host_t *vha);
@@ -923,6 +950,7 @@ extern int qla_set_exchoffld_mem_cfg(scsi_qla_host_t *);
 extern void qlt_handle_abts_recv(struct scsi_qla_host *, struct rsp_que *,
 	response_t *);
 
+struct scsi_qla_host *qla_find_host_by_d_id(struct scsi_qla_host *vha, be_id_t d_id);
 int qla24xx_async_notify_ack(scsi_qla_host_t *, fc_port_t *,
 	struct imm_ntfy_from_isp *, int);
 void qla24xx_do_nack_work(struct scsi_qla_host *, struct qla_work_evt *);
@@ -935,7 +963,7 @@ extern struct fc_port *qlt_find_sess_invalidate_other(scsi_qla_host_t *,
 void qla24xx_delete_sess_fn(struct work_struct *);
 void qlt_unknown_atio_work_fn(struct work_struct *);
 void qlt_update_host_map(struct scsi_qla_host *, port_id_t);
-void qlt_remove_target_resources(struct qla_hw_data *);
+void qla_remove_hostmap(struct qla_hw_data *ha);
 void qlt_clr_qp_table(struct scsi_qla_host *vha);
 void qlt_set_mode(struct scsi_qla_host *);
 int qla2x00_set_data_rate(scsi_qla_host_t *vha, uint16_t mode);
@@ -950,6 +978,25 @@ extern void qla_nvme_abort_process_comp_status
 
 /* nvme.c */
 void qla_nvme_unregister_remote_port(struct fc_port *fcport);
+
+/* qla_edif.c */
+fc_port_t *qla2x00_find_fcport_by_pid(scsi_qla_host_t *vha, port_id_t *id);
+void qla_edb_eventcreate(scsi_qla_host_t *vha, uint32_t dbtype, uint32_t data, uint32_t data2,
+		fc_port_t *fcport);
+void qla_edb_stop(scsi_qla_host_t *vha);
+ssize_t edif_doorbell_show(struct device *dev, struct device_attribute *attr, char *buf);
+int32_t qla_edif_app_mgmt(struct bsg_job *bsg_job);
+void qla_enode_init(scsi_qla_host_t *vha);
+void qla_enode_stop(scsi_qla_host_t *vha);
+void qla_edif_flush_sa_ctl_lists(fc_port_t *fcport);
+void qla_edb_init(scsi_qla_host_t *vha);
+void qla_edif_timer(scsi_qla_host_t *vha);
+int qla28xx_start_scsi_edif(srb_t *sp);
+void qla24xx_sa_update_iocb(srb_t *sp, struct sa_update_28xx *sa_update_iocb);
+void qla24xx_sa_replace_iocb(srb_t *sp, struct sa_update_28xx *sa_update_iocb);
+void qla24xx_auth_els(scsi_qla_host_t *vha, void **pkt, struct rsp_que **rsp);
+void qla28xx_sa_update_iocb_entry(scsi_qla_host_t *vha, struct req_que *req,
+		struct sa_update_28xx *pkt);
 void qla_handle_els_plogi_done(scsi_qla_host_t *vha, struct event_arg *ea);
 
 #define QLA2XX_HW_ERROR			BIT_0
diff --git a/drivers/scsi/qla2xxx/qla_gs.c b/drivers/scsi/qla2xxx/qla_gs.c
index 5b6e04a91a18..ebc8fdb0b43d 100644
--- a/drivers/scsi/qla2xxx/qla_gs.c
+++ b/drivers/scsi/qla2xxx/qla_gs.c
@@ -632,7 +632,7 @@ static int qla_async_rftid(scsi_qla_host_t *vha, port_id_t *d_id)
 	ct_req->req.rft_id.port_id = port_id_to_be_id(vha->d_id);
 	ct_req->req.rft_id.fc4_types[2] = 0x01;		/* FCP-3 */
 
-	if (vha->flags.nvme_enabled)
+	if (vha->flags.nvme_enabled && qla_ini_mode_enabled(vha))
 		ct_req->req.rft_id.fc4_types[6] = 1;    /* NVMe type 28h */
 
 	sp->u.iocb_cmd.u.ctarg.req_size = RFT_ID_REQ_SIZE;
@@ -1730,8 +1730,6 @@ qla2x00_hba_attributes(scsi_qla_host_t *vha, void *entries,
 	size += alen;
 	ql_dbg(ql_dbg_disc, vha, 0x20a8,
 	    "FIRMWARE VERSION = %s.\n", eiter->a.fw_version);
-	if (callopt == CALLOPT_FDMI1)
-		goto done;
 	/* OS Name and Version */
 	eiter = entries + size;
 	eiter->type = cpu_to_be16(FDMI_HBA_OS_NAME_AND_VERSION);
@@ -1754,6 +1752,8 @@ qla2x00_hba_attributes(scsi_qla_host_t *vha, void *entries,
 	size += alen;
 	ql_dbg(ql_dbg_disc, vha, 0x20a9,
 	    "OS VERSION = %s.\n", eiter->a.os_version);
+	if (callopt == CALLOPT_FDMI1)
+		goto done;
 	/* MAX CT Payload Length */
 	eiter = entries + size;
 	eiter->type = cpu_to_be16(FDMI_HBA_MAXIMUM_CT_PAYLOAD_LENGTH);
@@ -2826,6 +2826,10 @@ void qla24xx_handle_gpsc_event(scsi_qla_host_t *vha, struct event_arg *ea)
 	if (fcport->disc_state == DSC_DELETE_PEND)
 		return;
 
+	/* We will figure-out what happen after AUTH completes */
+	if (fcport->disc_state == DSC_LOGIN_AUTH_PEND)
+		return;
+
 	if (ea->sp->gen2 != fcport->login_gen) {
 		/* target side must have changed it. */
 		ql_dbg(ql_dbg_disc, vha, 0x20d3,
@@ -3498,7 +3502,16 @@ void qla24xx_async_gnnft_done(scsi_qla_host_t *vha, srb_t *sp)
 				continue;
 			fcport->scan_state = QLA_FCPORT_FOUND;
 			fcport->last_rscn_gen = fcport->rscn_gen;
+			fcport->fc4_type = rp->fc4type;
 			found = true;
+
+			if (fcport->scan_needed) {
+				if (NVME_PRIORITY(vha->hw, fcport))
+					fcport->do_prli_nvme = 1;
+				else
+					fcport->do_prli_nvme = 0;
+			}
+
 			/*
 			 * If device was not a fabric device before.
 			 */
diff --git a/drivers/scsi/qla2xxx/qla_init.c b/drivers/scsi/qla2xxx/qla_init.c
index f8f471157109..1e4e3e83b5c7 100644
--- a/drivers/scsi/qla2xxx/qla_init.c
+++ b/drivers/scsi/qla2xxx/qla_init.c
@@ -34,7 +34,6 @@ static int qla2x00_restart_isp(scsi_qla_host_t *);
 static struct qla_chip_state_84xx *qla84xx_get_chip(struct scsi_qla_host *);
 static int qla84xx_init_chip(scsi_qla_host_t *);
 static int qla25xx_init_queues(struct qla_hw_data *);
-static int qla24xx_post_prli_work(struct scsi_qla_host*, fc_port_t *);
 static void qla24xx_handle_gpdb_event(scsi_qla_host_t *vha,
 				      struct event_arg *ea);
 static void qla24xx_handle_prli_done_event(struct scsi_qla_host *,
@@ -158,7 +157,7 @@ int qla24xx_async_abort_cmd(srb_t *cmd_sp, bool wait)
 	sp = qla2xxx_get_qpair_sp(cmd_sp->vha, cmd_sp->qpair, cmd_sp->fcport,
 				  GFP_ATOMIC);
 	if (!sp)
-		return rval;
+		return QLA_MEMORY_ALLOC_FAILED;
 
 	abt_iocb = &sp->u.iocb_cmd;
 	sp->type = SRB_ABT_CMD;
@@ -191,7 +190,7 @@ int qla24xx_async_abort_cmd(srb_t *cmd_sp, bool wait)
 	if (wait) {
 		wait_for_completion(&abt_iocb->u.abt.comp);
 		rval = abt_iocb->u.abt.comp_status == CS_COMPLETE ?
-			QLA_SUCCESS : QLA_FUNCTION_FAILED;
+			QLA_SUCCESS : QLA_ERR_FROM_FW;
 		sp->free(sp);
 	}
 
@@ -293,22 +292,6 @@ static void qla2x00_async_login_sp_done(srb_t *sp, int res)
 	sp->free(sp);
 }
 
-static inline bool
-fcport_is_smaller(fc_port_t *fcport)
-{
-	if (wwn_to_u64(fcport->port_name) <
-	    wwn_to_u64(fcport->vha->port_name))
-		return true;
-	else
-		return false;
-}
-
-static inline bool
-fcport_is_bigger(fc_port_t *fcport)
-{
-	return !fcport_is_smaller(fcport);
-}
-
 int
 qla2x00_async_login(struct scsi_qla_host *vha, fc_port_t *fcport,
     uint16_t *data)
@@ -343,19 +326,28 @@ qla2x00_async_login(struct scsi_qla_host *vha, fc_port_t *fcport,
 	qla2x00_init_timer(sp, qla2x00_get_async_timeout(vha) + 2);
 
 	sp->done = qla2x00_async_login_sp_done;
-	if (N2N_TOPO(fcport->vha->hw) && fcport_is_bigger(fcport))
+	if (N2N_TOPO(fcport->vha->hw) && fcport_is_bigger(fcport)) {
 		lio->u.logio.flags |= SRB_LOGIN_PRLI_ONLY;
-	else
-		lio->u.logio.flags |= SRB_LOGIN_COND_PLOGI;
+	} else {
+		if (vha->hw->flags.edif_enabled &&
+		    vha->e_dbell.db_flags & EDB_ACTIVE) {
+			lio->u.logio.flags |=
+				(SRB_LOGIN_FCSP | SRB_LOGIN_SKIP_PRLI);
+			ql_dbg(ql_dbg_disc, vha, 0x2072,
+			    "Async-login: w/ FCSP %8phC hdl=%x, loopid=%x portid=%06x\n",
+			    fcport->port_name, sp->handle, fcport->loop_id, fcport->d_id.b24);
+		} else {
+			lio->u.logio.flags |= SRB_LOGIN_COND_PLOGI;
+		}
+	}
 
 	if (NVME_TARGET(vha->hw, fcport))
 		lio->u.logio.flags |= SRB_LOGIN_SKIP_PRLI;
 
-	ql_log(ql_log_warn, vha, 0x2072,
-	       "Async-login - %8phC hdl=%x, loopid=%x portid=%02x%02x%02x retries=%d.\n",
+	ql_dbg(ql_dbg_disc, vha, 0x2072,
+	       "Async-login - %8phC hdl=%x, loopid=%x portid=%06x retries=%d.\n",
 	       fcport->port_name, sp->handle, fcport->loop_id,
-	       fcport->d_id.b.domain, fcport->d_id.b.area, fcport->d_id.b.al_pa,
-	       fcport->login_retry);
+	       fcport->d_id.b24, fcport->login_retry);
 
 	rval = qla2x00_start_sp(sp);
 	if (rval != QLA_SUCCESS) {
@@ -378,7 +370,7 @@ static void qla2x00_async_logout_sp_done(srb_t *sp, int res)
 {
 	sp->fcport->flags &= ~(FCF_ASYNC_SENT | FCF_ASYNC_ACTIVE);
 	sp->fcport->login_gen++;
-	qlt_logo_completion_handler(sp->fcport, res);
+	qlt_logo_completion_handler(sp->fcport, sp->u.iocb_cmd.u.logio.data[0]);
 	sp->free(sp);
 }
 
@@ -404,10 +396,10 @@ qla2x00_async_logout(struct scsi_qla_host *vha, fc_port_t *fcport)
 	sp->done = qla2x00_async_logout_sp_done;
 
 	ql_dbg(ql_dbg_disc, vha, 0x2070,
-	    "Async-logout - hdl=%x loop-id=%x portid=%02x%02x%02x %8phC.\n",
+	    "Async-logout - hdl=%x loop-id=%x portid=%02x%02x%02x %8phC explicit %d.\n",
 	    sp->handle, fcport->loop_id, fcport->d_id.b.domain,
 		fcport->d_id.b.area, fcport->d_id.b.al_pa,
-		fcport->port_name);
+		fcport->port_name, fcport->explicit_logout);
 
 	rval = qla2x00_start_sp(sp);
 	if (rval != QLA_SUCCESS)
@@ -692,11 +684,11 @@ static void qla24xx_handle_gnl_done_event(scsi_qla_host_t *vha,
 
 	fcport = ea->fcport;
 	ql_dbg(ql_dbg_disc, vha, 0xffff,
-	    "%s %8phC DS %d LS rc %d %d login %d|%d rscn %d|%d lid %d\n",
+	    "%s %8phC DS %d LS rc %d %d login %d|%d rscn %d|%d lid %d edif %d\n",
 	    __func__, fcport->port_name, fcport->disc_state,
 	    fcport->fw_login_state, ea->rc,
 	    fcport->login_gen, fcport->last_login_gen,
-	    fcport->rscn_gen, fcport->last_rscn_gen, vha->loop_id);
+	    fcport->rscn_gen, fcport->last_rscn_gen, vha->loop_id, fcport->edif.enable);
 
 	if (fcport->disc_state == DSC_DELETE_PEND)
 		return;
@@ -810,7 +802,7 @@ static void qla24xx_handle_gnl_done_event(scsi_qla_host_t *vha,
 		default:
 			switch (current_login_state) {
 			case DSC_LS_PRLI_COMP:
-				ql_dbg(ql_dbg_disc + ql_dbg_verbose,
+				ql_dbg(ql_dbg_disc,
 				    vha, 0x20e4, "%s %d %8phC post gpdb\n",
 				    __func__, __LINE__, fcport->port_name);
 
@@ -822,6 +814,13 @@ static void qla24xx_handle_gnl_done_event(scsi_qla_host_t *vha,
 				qla2x00_post_async_adisc_work(vha, fcport,
 				    data);
 				break;
+			case DSC_LS_PLOGI_COMP:
+				if (vha->hw->flags.edif_enabled) {
+					/* check to see if App support Secure */
+					qla24xx_post_gpdb_work(vha, fcport, 0);
+					break;
+				}
+				fallthrough;
 			case DSC_LS_PORT_UNAVAIL:
 			default:
 				if (fcport->loop_id == FC_NO_LOOP_ID) {
@@ -849,6 +848,7 @@ static void qla24xx_handle_gnl_done_event(scsi_qla_host_t *vha,
 				 */
 				qla2x00_set_fcport_disc_state(fcport,
 				    DSC_DELETED);
+				set_bit(RELOGIN_NEEDED, &vha->dpc_flags);
 				break;
 			case DSC_LS_PRLI_COMP:
 				if ((e->prli_svc_param_word_3[0] & BIT_4) == 0)
@@ -861,6 +861,12 @@ static void qla24xx_handle_gnl_done_event(scsi_qla_host_t *vha,
 				    data);
 				break;
 			case DSC_LS_PLOGI_COMP:
+				if (vha->hw->flags.edif_enabled &&
+				    vha->e_dbell.db_flags & EDB_ACTIVE) {
+					/* check to see if App support secure or not */
+					qla24xx_post_gpdb_work(vha, fcport, 0);
+					break;
+				}
 				if (fcport_is_bigger(fcport)) {
 					/* local adapter is smaller */
 					if (fcport->loop_id != FC_NO_LOOP_ID)
@@ -1191,7 +1197,7 @@ done:
 	sp->free(sp);
 }
 
-static int qla24xx_post_prli_work(struct scsi_qla_host *vha, fc_port_t *fcport)
+int qla24xx_post_prli_work(struct scsi_qla_host *vha, fc_port_t *fcport)
 {
 	struct qla_work_evt *e;
 
@@ -1214,7 +1220,7 @@ static void qla2x00_async_prli_sp_done(srb_t *sp, int res)
 	struct event_arg ea;
 
 	ql_dbg(ql_dbg_disc, vha, 0x2129,
-	    "%s %8phC res %d \n", __func__,
+	    "%s %8phC res %x\n", __func__,
 	    sp->fcport->port_name, res);
 
 	sp->fcport->flags &= ~FCF_ASYNC_SENT;
@@ -1227,6 +1233,8 @@ static void qla2x00_async_prli_sp_done(srb_t *sp, int res)
 		ea.iop[0] = lio->u.logio.iop[0];
 		ea.iop[1] = lio->u.logio.iop[1];
 		ea.sp = sp;
+		if (res == QLA_OS_TIMER_EXPIRED)
+			ea.data[0] = QLA_OS_TIMER_EXPIRED;
 
 		qla24xx_handle_prli_done_event(vha, &ea);
 	}
@@ -1418,6 +1426,57 @@ void __qla24xx_handle_gpdb_event(scsi_qla_host_t *vha, struct event_arg *ea)
 	spin_unlock_irqrestore(&vha->hw->tgt.sess_lock, flags);
 }
 
+static int	qla_chk_secure_login(scsi_qla_host_t	*vha, fc_port_t *fcport,
+	struct port_database_24xx *pd)
+{
+	int rc = 0;
+
+	if (pd->secure_login) {
+		ql_dbg(ql_dbg_disc, vha, 0x104d,
+		    "Secure Login established on %8phC\n",
+		    fcport->port_name);
+		fcport->flags |= FCF_FCSP_DEVICE;
+	} else {
+		ql_dbg(ql_dbg_disc, vha, 0x104d,
+		    "non-Secure Login %8phC",
+		    fcport->port_name);
+		fcport->flags &= ~FCF_FCSP_DEVICE;
+	}
+	if (vha->hw->flags.edif_enabled) {
+		if (fcport->flags & FCF_FCSP_DEVICE) {
+			qla2x00_set_fcport_disc_state(fcport, DSC_LOGIN_AUTH_PEND);
+			/* Start edif prli timer & ring doorbell for app */
+			fcport->edif.rx_sa_set = 0;
+			fcport->edif.tx_sa_set = 0;
+			fcport->edif.rx_sa_pending = 0;
+			fcport->edif.tx_sa_pending = 0;
+
+			qla2x00_post_aen_work(vha, FCH_EVT_PORT_ONLINE,
+			    fcport->d_id.b24);
+
+			if (vha->e_dbell.db_flags ==  EDB_ACTIVE) {
+				ql_dbg(ql_dbg_disc, vha, 0x20ef,
+				    "%s %d %8phC EDIF: post DB_AUTH: AUTH needed\n",
+				    __func__, __LINE__, fcport->port_name);
+				fcport->edif.app_started = 1;
+				fcport->edif.app_sess_online = 1;
+
+				qla_edb_eventcreate(vha, VND_CMD_AUTH_STATE_NEEDED,
+				    fcport->d_id.b24, 0, fcport);
+			}
+
+			rc = 1;
+		} else if (qla_ini_mode_enabled(vha) || qla_dual_mode_enabled(vha)) {
+			ql_dbg(ql_dbg_disc, vha, 0x2117,
+			    "%s %d %8phC post prli\n",
+			    __func__, __LINE__, fcport->port_name);
+			qla24xx_post_prli_work(vha, fcport);
+			rc = 1;
+		}
+	}
+	return rc;
+}
+
 static
 void qla24xx_handle_gpdb_event(scsi_qla_host_t *vha, struct event_arg *ea)
 {
@@ -1431,12 +1490,15 @@ void qla24xx_handle_gpdb_event(scsi_qla_host_t *vha, struct event_arg *ea)
 	fcport->flags &= ~FCF_ASYNC_SENT;
 
 	ql_dbg(ql_dbg_disc, vha, 0x20d2,
-	    "%s %8phC DS %d LS %d fc4_type %x rc %d\n", __func__,
+	    "%s %8phC DS %d LS %x fc4_type %x rc %x\n", __func__,
 	    fcport->port_name, fcport->disc_state, pd->current_login_state,
 	    fcport->fc4_type, ea->rc);
 
-	if (fcport->disc_state == DSC_DELETE_PEND)
+	if (fcport->disc_state == DSC_DELETE_PEND) {
+		ql_dbg(ql_dbg_disc, vha, 0x20d5, "%s %d %8phC\n",
+		       __func__, __LINE__, fcport->port_name);
 		return;
+	}
 
 	if (NVME_TARGET(vha->hw, fcport))
 		ls = pd->current_login_state >> 4;
@@ -1453,6 +1515,8 @@ void qla24xx_handle_gpdb_event(scsi_qla_host_t *vha, struct event_arg *ea)
 	} else if (ea->sp->gen1 != fcport->rscn_gen) {
 		qla_rscn_replay(fcport);
 		qlt_schedule_sess_for_deletion(fcport);
+		ql_dbg(ql_dbg_disc, vha, 0x20d5, "%s %d %8phC, ls %x\n",
+		       __func__, __LINE__, fcport->port_name, ls);
 		return;
 	}
 
@@ -1460,8 +1524,14 @@ void qla24xx_handle_gpdb_event(scsi_qla_host_t *vha, struct event_arg *ea)
 	case PDS_PRLI_COMPLETE:
 		__qla24xx_parse_gpdb(vha, fcport, pd);
 		break;
-	case PDS_PLOGI_PENDING:
 	case PDS_PLOGI_COMPLETE:
+		if (qla_chk_secure_login(vha, fcport, pd)) {
+			ql_dbg(ql_dbg_disc, vha, 0x20d5, "%s %d %8phC, ls %x\n",
+			       __func__, __LINE__, fcport->port_name, ls);
+			return;
+		}
+		fallthrough;
+	case PDS_PLOGI_PENDING:
 	case PDS_PRLI_PENDING:
 	case PDS_PRLI2_PENDING:
 		/* Set discovery state back to GNL to Relogin attempt */
@@ -1470,6 +1540,8 @@ void qla24xx_handle_gpdb_event(scsi_qla_host_t *vha, struct event_arg *ea)
 			qla2x00_set_fcport_disc_state(fcport, DSC_GNL);
 			set_bit(RELOGIN_NEEDED, &vha->dpc_flags);
 		}
+		ql_dbg(ql_dbg_disc, vha, 0x20d5, "%s %d %8phC, ls %x\n",
+		       __func__, __LINE__, fcport->port_name, ls);
 		return;
 	case PDS_LOGO_PENDING:
 	case PDS_PORT_UNAVAILABLE:
@@ -1538,11 +1610,12 @@ int qla24xx_fcport_handle_login(struct scsi_qla_host *vha, fc_port_t *fcport)
 	u16 sec;
 
 	ql_dbg(ql_dbg_disc, vha, 0x20d8,
-	    "%s %8phC DS %d LS %d P %d fl %x confl %p rscn %d|%d login %d lid %d scan %d\n",
+	    "%s %8phC DS %d LS %d P %d fl %x confl %p rscn %d|%d login %d lid %d scan %d fc4type %x\n",
 	    __func__, fcport->port_name, fcport->disc_state,
 	    fcport->fw_login_state, fcport->login_pause, fcport->flags,
 	    fcport->conflict, fcport->last_rscn_gen, fcport->rscn_gen,
-	    fcport->login_gen, fcport->loop_id, fcport->scan_state);
+	    fcport->login_gen, fcport->loop_id, fcport->scan_state,
+	    fcport->fc4_type);
 
 	if (fcport->scan_state != QLA_FCPORT_FOUND)
 		return 0;
@@ -1715,6 +1788,12 @@ void qla2x00_handle_rscn(scsi_qla_host_t *vha, struct event_arg *ea)
 
 	fcport = qla2x00_find_fcport_by_nportid(vha, &ea->id, 1);
 	if (fcport) {
+		if (fcport->flags & FCF_FCP2_DEVICE) {
+			ql_dbg(ql_dbg_disc, vha, 0x2115,
+			       "Delaying session delete for FCP2 portid=%06x %8phC ",
+			       fcport->d_id.b24, fcport->port_name);
+			return;
+		}
 		fcport->scan_needed = 1;
 		fcport->rscn_gen++;
 	}
@@ -1758,6 +1837,13 @@ void qla24xx_handle_relogin_event(scsi_qla_host_t *vha,
 void qla_handle_els_plogi_done(scsi_qla_host_t *vha,
 				      struct event_arg *ea)
 {
+	if (N2N_TOPO(vha->hw) && fcport_is_smaller(ea->fcport) &&
+	    vha->hw->flags.edif_enabled) {
+		/* check to see if App support Secure */
+		qla24xx_post_gpdb_work(vha, ea->fcport, 0);
+		return;
+	}
+
 	/* for pure Target Mode, PRLI will not be initiated */
 	if (vha->host->active_mode == MODE_TARGET)
 		return;
@@ -1902,7 +1988,7 @@ qla24xx_async_abort_command(srb_t *sp)
 
 	if (handle == req->num_outstanding_cmds) {
 		/* Command not found. */
-		return QLA_FUNCTION_FAILED;
+		return QLA_ERR_NOT_FOUND;
 	}
 	if (sp->type == SRB_FXIOCB_DCMD)
 		return qlafx00_fx_disc(vha, &vha->hw->mr.fcport,
@@ -1914,6 +2000,7 @@ qla24xx_async_abort_command(srb_t *sp)
 static void
 qla24xx_handle_prli_done_event(struct scsi_qla_host *vha, struct event_arg *ea)
 {
+	struct srb *sp;
 	WARN_ONCE(!qla2xxx_is_valid_mbs(ea->data[0]), "mbs: %#x\n",
 		  ea->data[0]);
 
@@ -1941,22 +2028,27 @@ qla24xx_handle_prli_done_event(struct scsi_qla_host *vha, struct event_arg *ea)
 			break;
 		}
 
+		sp = ea->sp;
 		ql_dbg(ql_dbg_disc, vha, 0x2118,
-		       "%s %d %8phC priority %s, fc4type %x\n",
+		       "%s %d %8phC priority %s, fc4type %x prev try %s\n",
 		       __func__, __LINE__, ea->fcport->port_name,
 		       vha->hw->fc4_type_priority == FC4_PRIORITY_FCP ?
-		       "FCP" : "NVMe", ea->fcport->fc4_type);
+		       "FCP" : "NVMe", ea->fcport->fc4_type,
+		       (sp->u.iocb_cmd.u.logio.flags & SRB_LOGIN_NVME_PRLI) ?
+			"NVME" : "FCP");
 
-		if (N2N_TOPO(vha->hw)) {
-			if (vha->hw->fc4_type_priority == FC4_PRIORITY_NVME) {
-				ea->fcport->fc4_type &= ~FS_FC4TYPE_NVME;
-				ea->fcport->fc4_type |= FS_FC4TYPE_FCP;
-			} else {
-				ea->fcport->fc4_type &= ~FS_FC4TYPE_FCP;
-				ea->fcport->fc4_type |= FS_FC4TYPE_NVME;
-			}
+		if (NVME_FCP_TARGET(ea->fcport)) {
+			if (sp->u.iocb_cmd.u.logio.flags & SRB_LOGIN_NVME_PRLI)
+				ea->fcport->do_prli_nvme = 0;
+			else
+				ea->fcport->do_prli_nvme = 1;
+		} else {
+			ea->fcport->do_prli_nvme = 0;
+		}
 
-			if (ea->fcport->n2n_link_reset_cnt < 3) {
+		if (N2N_TOPO(vha->hw)) {
+			if (ea->fcport->n2n_link_reset_cnt <
+			    vha->hw->login_retry_count) {
 				ea->fcport->n2n_link_reset_cnt++;
 				vha->relogin_jif = jiffies + 2 * HZ;
 				/*
@@ -1964,6 +2056,7 @@ qla24xx_handle_prli_done_event(struct scsi_qla_host *vha, struct event_arg *ea)
 				 * state machine
 				 */
 				set_bit(N2N_LINK_RESET, &vha->dpc_flags);
+				qla2xxx_wake_dpc(vha);
 			} else {
 				ql_log(ql_log_warn, vha, 0x2119,
 				       "%s %d %8phC Unable to reconnect\n",
@@ -1975,19 +2068,6 @@ qla24xx_handle_prli_done_event(struct scsi_qla_host *vha, struct event_arg *ea)
 			 * switch connect. login failed. Take connection down
 			 * and allow relogin to retrigger
 			 */
-			if (NVME_FCP_TARGET(ea->fcport)) {
-				ql_dbg(ql_dbg_disc, vha, 0x2118,
-				       "%s %d %8phC post %s prli\n",
-				       __func__, __LINE__,
-				       ea->fcport->port_name,
-				       (ea->fcport->fc4_type & FS_FC4TYPE_NVME)
-				       ? "NVMe" : "FCP");
-				if (vha->hw->fc4_type_priority == FC4_PRIORITY_NVME)
-					ea->fcport->fc4_type &= ~FS_FC4TYPE_NVME;
-				else
-					ea->fcport->fc4_type &= ~FS_FC4TYPE_FCP;
-			}
-
 			ea->fcport->flags &= ~FCF_ASYNC_SENT;
 			ea->fcport->keep_nport_handle = 0;
 			ea->fcport->logout_on_delete = 1;
@@ -2053,26 +2133,38 @@ qla24xx_handle_plogi_done_event(struct scsi_qla_host *vha, struct event_arg *ea)
 		 * force a relogin attempt via implicit LOGO, PLOGI, and PRLI
 		 * requests.
 		 */
-		if (NVME_TARGET(vha->hw, ea->fcport)) {
-			ql_dbg(ql_dbg_disc, vha, 0x2117,
-				"%s %d %8phC post prli\n",
-				__func__, __LINE__, ea->fcport->port_name);
-			qla24xx_post_prli_work(vha, ea->fcport);
-		} else {
-			ql_dbg(ql_dbg_disc, vha, 0x20ea,
-			    "%s %d %8phC LoopID 0x%x in use with %06x. post gpdb\n",
-			    __func__, __LINE__, ea->fcport->port_name,
-			    ea->fcport->loop_id, ea->fcport->d_id.b24);
-
+		if (vha->hw->flags.edif_enabled) {
 			set_bit(ea->fcport->loop_id, vha->hw->loop_id_map);
 			spin_lock_irqsave(&vha->hw->tgt.sess_lock, flags);
 			ea->fcport->chip_reset = vha->hw->base_qpair->chip_reset;
 			ea->fcport->logout_on_delete = 1;
 			ea->fcport->send_els_logo = 0;
-			ea->fcport->fw_login_state = DSC_LS_PRLI_COMP;
+			ea->fcport->fw_login_state = DSC_LS_PLOGI_COMP;
 			spin_unlock_irqrestore(&vha->hw->tgt.sess_lock, flags);
 
 			qla24xx_post_gpdb_work(vha, ea->fcport, 0);
+		} else {
+			if (NVME_TARGET(vha->hw, fcport)) {
+				ql_dbg(ql_dbg_disc, vha, 0x2117,
+				    "%s %d %8phC post prli\n",
+				    __func__, __LINE__, fcport->port_name);
+				qla24xx_post_prli_work(vha, fcport);
+			} else {
+				ql_dbg(ql_dbg_disc, vha, 0x20ea,
+				    "%s %d %8phC LoopID 0x%x in use with %06x. post gpdb\n",
+				    __func__, __LINE__, fcport->port_name,
+				    fcport->loop_id, fcport->d_id.b24);
+
+				set_bit(fcport->loop_id, vha->hw->loop_id_map);
+				spin_lock_irqsave(&vha->hw->tgt.sess_lock, flags);
+				fcport->chip_reset = vha->hw->base_qpair->chip_reset;
+				fcport->logout_on_delete = 1;
+				fcport->send_els_logo = 0;
+				fcport->fw_login_state = DSC_LS_PRLI_COMP;
+				spin_unlock_irqrestore(&vha->hw->tgt.sess_lock, flags);
+
+				qla24xx_post_gpdb_work(vha, fcport, 0);
+			}
 		}
 		break;
 	case MBS_COMMAND_ERROR:
@@ -3877,7 +3969,8 @@ enable_82xx_npiv:
 		}
 
 		/* Enable PUREX PASSTHRU */
-		if (ql2xrdpenable || ha->flags.scm_supported_f)
+		if (ql2xrdpenable || ha->flags.scm_supported_f ||
+		    ha->flags.edif_enabled)
 			qla25xx_set_els_cmds_supported(vha);
 	} else
 		goto failed;
@@ -4062,7 +4155,7 @@ qla24xx_update_fw_options(scsi_qla_host_t *vha)
 	}
 
 	/* Move PUREX, ABTS RX & RIDA to ATIOQ */
-	if (ql2xmvasynctoatio &&
+	if (ql2xmvasynctoatio && !ha->flags.edif_enabled &&
 	    (IS_QLA83XX(ha) || IS_QLA27XX(ha) || IS_QLA28XX(ha))) {
 		if (qla_tgt_mode_enabled(vha) ||
 		    qla_dual_mode_enabled(vha))
@@ -4081,16 +4174,30 @@ qla24xx_update_fw_options(scsi_qla_host_t *vha)
 		    qla_dual_mode_enabled(vha))
 			ha->fw_options[2] |= BIT_4;
 		else
-			ha->fw_options[2] &= ~BIT_4;
+			ha->fw_options[2] &= ~(BIT_4);
 
 		/* Reserve 1/2 of emergency exchanges for ELS.*/
 		if (qla2xuseresexchforels)
 			ha->fw_options[2] |= BIT_8;
 		else
 			ha->fw_options[2] &= ~BIT_8;
+
+		/*
+		 * N2N: set Secure=1 for PLOGI ACC and
+		 * fw shal not send PRLI after PLOGI Acc
+		 */
+		if (ha->flags.edif_enabled &&
+		    vha->e_dbell.db_flags & EDB_ACTIVE) {
+			ha->fw_options[3] |= BIT_15;
+			ha->flags.n2n_fw_acc_sec = 1;
+		} else {
+			ha->fw_options[3] &= ~BIT_15;
+			ha->flags.n2n_fw_acc_sec = 0;
+		}
 	}
 
-	if (ql2xrdpenable || ha->flags.scm_supported_f)
+	if (ql2xrdpenable || ha->flags.scm_supported_f ||
+	    ha->flags.edif_enabled)
 		ha->fw_options[1] |= ADD_FO1_ENABLE_PUREX_IOCB;
 
 	/* Enable Async 8130/8131 events -- transceiver insertion/removal */
@@ -4289,8 +4396,6 @@ qla2x00_init_rings(scsi_qla_host_t *vha)
 
 	spin_unlock_irqrestore(&ha->hardware_lock, flags);
 
-	ql_dbg(ql_dbg_init, vha, 0x00d1, "Issue init firmware.\n");
-
 	if (IS_QLAFX00(ha)) {
 		rval = qlafx00_init_firmware(vha, ha->init_cb_size);
 		goto next_check;
@@ -4299,6 +4404,12 @@ qla2x00_init_rings(scsi_qla_host_t *vha)
 	/* Update any ISP specific firmware options before initialization. */
 	ha->isp_ops->update_fw_options(vha);
 
+	ql_dbg(ql_dbg_init, vha, 0x00d1,
+	       "Issue init firmware FW opt 1-3= %08x %08x %08x.\n",
+	       le32_to_cpu(mid_init_cb->init_cb.firmware_options_1),
+	       le32_to_cpu(mid_init_cb->init_cb.firmware_options_2),
+	       le32_to_cpu(mid_init_cb->init_cb.firmware_options_3));
+
 	if (ha->flags.npiv_supported) {
 		if (ha->operating_mode == LOOP && !IS_CNA_CAPABLE(ha))
 			ha->max_npiv_vports = MIN_MULTI_ID_FABRIC - 1;
@@ -4531,11 +4642,11 @@ qla2x00_configure_hba(scsi_qla_host_t *vha)
 	/* initialize */
 	ha->min_external_loopid = SNS_FIRST_LOOP_ID;
 	ha->operating_mode = LOOP;
-	ha->switch_cap = 0;
 
 	switch (topo) {
 	case 0:
 		ql_dbg(ql_dbg_disc, vha, 0x200b, "HBA in NL topology.\n");
+		ha->switch_cap = 0;
 		ha->current_topology = ISP_CFG_NL;
 		strcpy(connect_type, "(Loop)");
 		break;
@@ -4549,6 +4660,7 @@ qla2x00_configure_hba(scsi_qla_host_t *vha)
 
 	case 2:
 		ql_dbg(ql_dbg_disc, vha, 0x200d, "HBA in N P2P topology.\n");
+		ha->switch_cap = 0;
 		ha->operating_mode = P2P;
 		ha->current_topology = ISP_CFG_N;
 		strcpy(connect_type, "(N_Port-to-N_Port)");
@@ -4565,6 +4677,7 @@ qla2x00_configure_hba(scsi_qla_host_t *vha)
 	default:
 		ql_dbg(ql_dbg_disc, vha, 0x200f,
 		    "HBA in unknown topology %x, using NL.\n", topo);
+		ha->switch_cap = 0;
 		ha->current_topology = ISP_CFG_NL;
 		strcpy(connect_type, "(Loop)");
 		break;
@@ -4577,7 +4690,10 @@ qla2x00_configure_hba(scsi_qla_host_t *vha)
 	id.b.al_pa = al_pa;
 	id.b.rsvd_1 = 0;
 	spin_lock_irqsave(&ha->hardware_lock, flags);
-	if (!(topo == 2 && ha->flags.n2n_bigger))
+	if (vha->hw->flags.edif_enabled) {
+		if (topo != 2)
+			qlt_update_host_map(vha, id);
+	} else if (!(topo == 2 && ha->flags.n2n_bigger))
 		qlt_update_host_map(vha, id);
 	spin_unlock_irqrestore(&ha->hardware_lock, flags);
 
@@ -5071,6 +5187,16 @@ qla2x00_alloc_fcport(scsi_qla_host_t *vha, gfp_t flags)
 	INIT_LIST_HEAD(&fcport->sess_cmd_list);
 	spin_lock_init(&fcport->sess_cmd_lock);
 
+	spin_lock_init(&fcport->edif.sa_list_lock);
+	INIT_LIST_HEAD(&fcport->edif.tx_sa_list);
+	INIT_LIST_HEAD(&fcport->edif.rx_sa_list);
+
+	if (vha->e_dbell.db_flags == EDB_ACTIVE)
+		fcport->edif.app_started = 1;
+
+	spin_lock_init(&fcport->edif.indx_list_lock);
+	INIT_LIST_HEAD(&fcport->edif.edif_indx_list);
+
 	return fcport;
 }
 
@@ -5084,8 +5210,13 @@ qla2x00_free_fcport(fc_port_t *fcport)
 
 		fcport->ct_desc.ct_sns = NULL;
 	}
+
+	qla_edif_flush_sa_ctl_lists(fcport);
 	list_del(&fcport->list);
 	qla2x00_clear_loop_id(fcport);
+
+	qla_edif_list_del(fcport);
+
 	kfree(fcport);
 }
 
@@ -5204,6 +5335,16 @@ qla2x00_configure_loop(scsi_qla_host_t *vha)
 			    "LOOP READY.\n");
 			ha->flags.fw_init_done = 1;
 
+			if (ha->flags.edif_enabled &&
+			    !(vha->e_dbell.db_flags & EDB_ACTIVE) &&
+			    N2N_TOPO(vha->hw)) {
+				/*
+				 * use port online to wake up app to get ready
+				 * for authentication
+				 */
+				qla2x00_post_aen_work(vha, FCH_EVT_PORT_ONLINE, 0);
+			}
+
 			/*
 			 * Process any ATIO queue entries that came in
 			 * while we weren't online.
@@ -5223,7 +5364,8 @@ qla2x00_configure_loop(scsi_qla_host_t *vha)
 		    "%s *** FAILED ***.\n", __func__);
 	} else {
 		ql_dbg(ql_dbg_disc, vha, 0x206b,
-		    "%s: exiting normally.\n", __func__);
+		    "%s: exiting normally. local port wwpn %8phN id %06x)\n",
+		    __func__, vha->port_name, vha->d_id.b24);
 	}
 
 	/* Restore state if a resync event occurred during processing */
@@ -5243,6 +5385,8 @@ static int qla2x00_configure_n2n_loop(scsi_qla_host_t *vha)
 	unsigned long flags;
 	fc_port_t *fcport;
 
+	ql_dbg(ql_dbg_disc, vha, 0x206a, "%s %d.\n", __func__, __LINE__);
+
 	if (test_and_clear_bit(N2N_LOGIN_NEEDED, &vha->dpc_flags))
 		set_bit(RELOGIN_NEEDED, &vha->dpc_flags);
 
@@ -6459,13 +6603,13 @@ void
 qla2x00_update_fcports(scsi_qla_host_t *base_vha)
 {
 	fc_port_t *fcport;
-	struct scsi_qla_host *vha;
+	struct scsi_qla_host *vha, *tvp;
 	struct qla_hw_data *ha = base_vha->hw;
 	unsigned long flags;
 
 	spin_lock_irqsave(&ha->vport_slock, flags);
 	/* Go with deferred removal of rport references. */
-	list_for_each_entry(vha, &base_vha->hw->vp_list, list) {
+	list_for_each_entry_safe(vha, tvp, &base_vha->hw->vp_list, list) {
 		atomic_inc(&vha->vref_count);
 		list_for_each_entry(fcport, &vha->vp_fcports, list) {
 			if (fcport->drport &&
@@ -6810,7 +6954,8 @@ void
 qla2x00_quiesce_io(scsi_qla_host_t *vha)
 {
 	struct qla_hw_data *ha = vha->hw;
-	struct scsi_qla_host *vp;
+	struct scsi_qla_host *vp, *tvp;
+	unsigned long flags;
 
 	ql_dbg(ql_dbg_dpc, vha, 0x401d,
 	    "Quiescing I/O - ha=%p.\n", ha);
@@ -6819,8 +6964,18 @@ qla2x00_quiesce_io(scsi_qla_host_t *vha)
 	if (atomic_read(&vha->loop_state) != LOOP_DOWN) {
 		atomic_set(&vha->loop_state, LOOP_DOWN);
 		qla2x00_mark_all_devices_lost(vha);
-		list_for_each_entry(vp, &ha->vp_list, list)
+
+		spin_lock_irqsave(&ha->vport_slock, flags);
+		list_for_each_entry_safe(vp, tvp, &ha->vp_list, list) {
+			atomic_inc(&vp->vref_count);
+			spin_unlock_irqrestore(&ha->vport_slock, flags);
+
 			qla2x00_mark_all_devices_lost(vp);
+
+			spin_lock_irqsave(&ha->vport_slock, flags);
+			atomic_dec(&vp->vref_count);
+		}
+		spin_unlock_irqrestore(&ha->vport_slock, flags);
 	} else {
 		if (!atomic_read(&vha->loop_down_timer))
 			atomic_set(&vha->loop_down_timer,
@@ -6835,7 +6990,7 @@ void
 qla2x00_abort_isp_cleanup(scsi_qla_host_t *vha)
 {
 	struct qla_hw_data *ha = vha->hw;
-	struct scsi_qla_host *vp;
+	struct scsi_qla_host *vp, *tvp;
 	unsigned long flags;
 	fc_port_t *fcport;
 	u16 i;
@@ -6903,7 +7058,7 @@ qla2x00_abort_isp_cleanup(scsi_qla_host_t *vha)
 		qla2x00_mark_all_devices_lost(vha);
 
 		spin_lock_irqsave(&ha->vport_slock, flags);
-		list_for_each_entry(vp, &ha->vp_list, list) {
+		list_for_each_entry_safe(vp, tvp, &ha->vp_list, list) {
 			atomic_inc(&vp->vref_count);
 			spin_unlock_irqrestore(&ha->vport_slock, flags);
 
@@ -6925,7 +7080,7 @@ qla2x00_abort_isp_cleanup(scsi_qla_host_t *vha)
 		fcport->scan_state = 0;
 	}
 	spin_lock_irqsave(&ha->vport_slock, flags);
-	list_for_each_entry(vp, &ha->vp_list, list) {
+	list_for_each_entry_safe(vp, tvp, &ha->vp_list, list) {
 		atomic_inc(&vp->vref_count);
 		spin_unlock_irqrestore(&ha->vport_slock, flags);
 
@@ -6969,7 +7124,7 @@ qla2x00_abort_isp(scsi_qla_host_t *vha)
 	int rval;
 	uint8_t        status = 0;
 	struct qla_hw_data *ha = vha->hw;
-	struct scsi_qla_host *vp;
+	struct scsi_qla_host *vp, *tvp;
 	struct req_que *req = ha->req_q_map[0];
 	unsigned long flags;
 
@@ -7125,7 +7280,7 @@ qla2x00_abort_isp(scsi_qla_host_t *vha)
 		ql_dbg(ql_dbg_taskm, vha, 0x8022, "%s succeeded.\n", __func__);
 		qla2x00_configure_hba(vha);
 		spin_lock_irqsave(&ha->vport_slock, flags);
-		list_for_each_entry(vp, &ha->vp_list, list) {
+		list_for_each_entry_safe(vp, tvp, &ha->vp_list, list) {
 			if (vp->vp_idx) {
 				atomic_inc(&vp->vref_count);
 				spin_unlock_irqrestore(&ha->vport_slock, flags);
@@ -8810,7 +8965,7 @@ qla82xx_restart_isp(scsi_qla_host_t *vha)
 {
 	int status, rval;
 	struct qla_hw_data *ha = vha->hw;
-	struct scsi_qla_host *vp;
+	struct scsi_qla_host *vp, *tvp;
 	unsigned long flags;
 
 	status = qla2x00_init_rings(vha);
@@ -8882,7 +9037,7 @@ qla82xx_restart_isp(scsi_qla_host_t *vha)
 		    "qla82xx_restart_isp succeeded.\n");
 
 		spin_lock_irqsave(&ha->vport_slock, flags);
-		list_for_each_entry(vp, &ha->vp_list, list) {
+		list_for_each_entry_safe(vp, tvp, &ha->vp_list, list) {
 			if (vp->vp_idx) {
 				atomic_inc(&vp->vref_count);
 				spin_unlock_irqrestore(&ha->vport_slock, flags);
diff --git a/drivers/scsi/qla2xxx/qla_inline.h b/drivers/scsi/qla2xxx/qla_inline.h
index 82937c6bd9c4..5f3b7995cc8f 100644
--- a/drivers/scsi/qla2xxx/qla_inline.h
+++ b/drivers/scsi/qla2xxx/qla_inline.h
@@ -478,3 +478,19 @@ bool qla_pci_disconnected(struct scsi_qla_host *vha,
 	}
 	return ret;
 }
+
+static inline bool
+fcport_is_smaller(fc_port_t *fcport)
+{
+	if (wwn_to_u64(fcport->port_name) <
+		wwn_to_u64(fcport->vha->port_name))
+		return true;
+	else
+		return false;
+}
+
+static inline bool
+fcport_is_bigger(fc_port_t *fcport)
+{
+	return !fcport_is_smaller(fcport);
+}
diff --git a/drivers/scsi/qla2xxx/qla_iocb.c b/drivers/scsi/qla2xxx/qla_iocb.c
index d0ee843f6b04..9d4ad1d2b00a 100644
--- a/drivers/scsi/qla2xxx/qla_iocb.c
+++ b/drivers/scsi/qla2xxx/qla_iocb.c
@@ -118,7 +118,7 @@ qla2x00_prep_cont_type0_iocb(struct scsi_qla_host *vha)
  *
  * Returns a pointer to the continuation type 1 IOCB packet.
  */
-static inline cont_a64_entry_t *
+cont_a64_entry_t *
 qla2x00_prep_cont_type1_iocb(scsi_qla_host_t *vha, struct req_que *req)
 {
 	cont_a64_entry_t *cont_pkt;
@@ -145,7 +145,6 @@ inline int
 qla24xx_configure_prot_mode(srb_t *sp, uint16_t *fw_prot_opts)
 {
 	struct scsi_cmnd *cmd = GET_CMD_SP(sp);
-	uint8_t	guard = scsi_host_get_guard(cmd->device->host);
 
 	/* We always use DIFF Bundling for best performance */
 	*fw_prot_opts = 0;
@@ -166,7 +165,7 @@ qla24xx_configure_prot_mode(srb_t *sp, uint16_t *fw_prot_opts)
 		break;
 	case SCSI_PROT_READ_PASS:
 	case SCSI_PROT_WRITE_PASS:
-		if (guard & SHOST_DIX_GUARD_IP)
+		if (cmd->prot_flags & SCSI_PROT_IP_CHECKSUM)
 			*fw_prot_opts |= PO_MODE_DIF_TCP_CKSUM;
 		else
 			*fw_prot_opts |= PO_MODE_DIF_PASS;
@@ -176,6 +175,9 @@ qla24xx_configure_prot_mode(srb_t *sp, uint16_t *fw_prot_opts)
 		break;
 	}
 
+	if (!(cmd->prot_flags & SCSI_PROT_GUARD_CHECK))
+		*fw_prot_opts |= PO_DISABLE_GUARD_CHECK;
+
 	return scsi_prot_sg_count(cmd);
 }
 
@@ -772,74 +774,19 @@ qla24xx_set_t10dif_tags(srb_t *sp, struct fw_dif_context *pkt,
 {
 	struct scsi_cmnd *cmd = GET_CMD_SP(sp);
 
-	switch (scsi_get_prot_type(cmd)) {
-	case SCSI_PROT_DIF_TYPE0:
-		/*
-		 * No check for ql2xenablehba_err_chk, as it would be an
-		 * I/O error if hba tag generation is not done.
-		 */
-		pkt->ref_tag = cpu_to_le32((uint32_t)
-		    (0xffffffff & scsi_get_lba(cmd)));
-
-		if (!qla2x00_hba_err_chk_enabled(sp))
-			break;
+	pkt->ref_tag = cpu_to_le32(scsi_prot_ref_tag(cmd));
 
+	if (cmd->prot_flags & SCSI_PROT_REF_CHECK &&
+	    qla2x00_hba_err_chk_enabled(sp)) {
 		pkt->ref_tag_mask[0] = 0xff;
 		pkt->ref_tag_mask[1] = 0xff;
 		pkt->ref_tag_mask[2] = 0xff;
 		pkt->ref_tag_mask[3] = 0xff;
-		break;
-
-	/*
-	 * For TYPE 2 protection: 16 bit GUARD + 32 bit REF tag has to
-	 * match LBA in CDB + N
-	 */
-	case SCSI_PROT_DIF_TYPE2:
-		pkt->app_tag = cpu_to_le16(0);
-		pkt->app_tag_mask[0] = 0x0;
-		pkt->app_tag_mask[1] = 0x0;
-
-		pkt->ref_tag = cpu_to_le32((uint32_t)
-		    (0xffffffff & scsi_get_lba(cmd)));
-
-		if (!qla2x00_hba_err_chk_enabled(sp))
-			break;
-
-		/* enable ALL bytes of the ref tag */
-		pkt->ref_tag_mask[0] = 0xff;
-		pkt->ref_tag_mask[1] = 0xff;
-		pkt->ref_tag_mask[2] = 0xff;
-		pkt->ref_tag_mask[3] = 0xff;
-		break;
-
-	/* For Type 3 protection: 16 bit GUARD only */
-	case SCSI_PROT_DIF_TYPE3:
-		pkt->ref_tag_mask[0] = pkt->ref_tag_mask[1] =
-			pkt->ref_tag_mask[2] = pkt->ref_tag_mask[3] =
-								0x00;
-		break;
-
-	/*
-	 * For TYpe 1 protection: 16 bit GUARD tag, 32 bit REF tag, and
-	 * 16 bit app tag.
-	 */
-	case SCSI_PROT_DIF_TYPE1:
-		pkt->ref_tag = cpu_to_le32((uint32_t)
-		    (0xffffffff & scsi_get_lba(cmd)));
-		pkt->app_tag = cpu_to_le16(0);
-		pkt->app_tag_mask[0] = 0x0;
-		pkt->app_tag_mask[1] = 0x0;
-
-		if (!qla2x00_hba_err_chk_enabled(sp))
-			break;
-
-		/* enable ALL bytes of the ref tag */
-		pkt->ref_tag_mask[0] = 0xff;
-		pkt->ref_tag_mask[1] = 0xff;
-		pkt->ref_tag_mask[2] = 0xff;
-		pkt->ref_tag_mask[3] = 0xff;
-		break;
 	}
+
+	pkt->app_tag = cpu_to_le16(0);
+	pkt->app_tag_mask[0] = 0x0;
+	pkt->app_tag_mask[1] = 0x0;
 }
 
 int
@@ -905,7 +852,7 @@ qla24xx_walk_and_build_sglist_no_difb(struct qla_hw_data *ha, srb_t *sp,
 	memset(&sgx, 0, sizeof(struct qla2_sgx));
 	if (sp) {
 		cmd = GET_CMD_SP(sp);
-		prot_int = cmd->device->sector_size;
+		prot_int = scsi_prot_interval(cmd);
 
 		sgx.tot_bytes = scsi_bufflen(cmd);
 		sgx.cur_sg = scsi_sglist(cmd);
@@ -1605,6 +1552,9 @@ qla24xx_start_scsi(srb_t *sp)
 	struct scsi_qla_host *vha = sp->vha;
 	struct qla_hw_data *ha = vha->hw;
 
+	if (sp->fcport->edif.enable  && (sp->fcport->flags & FCF_FCSP_DEVICE))
+		return qla28xx_start_scsi_edif(sp);
+
 	/* Setup device pointers. */
 	req = vha->req;
 	rsp = req->rsp;
@@ -1963,6 +1913,9 @@ qla2xxx_start_scsi_mq(srb_t *sp)
 	struct qla_hw_data *ha = vha->hw;
 	struct qla_qpair *qpair = sp->qpair;
 
+	if (sp->fcport->edif.enable && (sp->fcport->flags & FCF_FCSP_DEVICE))
+		return qla28xx_start_scsi_edif(sp);
+
 	/* Acquire qpair specific lock */
 	spin_lock_irqsave(&qpair->qp_lock, flags);
 
@@ -2466,6 +2419,12 @@ qla24xx_login_iocb(srb_t *sp, struct logio_entry_24xx *logio)
 			logio->control_flags |= cpu_to_le16(LCF_COND_PLOGI);
 		if (lio->u.logio.flags & SRB_LOGIN_SKIP_PRLI)
 			logio->control_flags |= cpu_to_le16(LCF_SKIP_PRLI);
+		if (lio->u.logio.flags & SRB_LOGIN_FCSP) {
+			logio->control_flags |=
+			    cpu_to_le16(LCF_COMMON_FEAT | LCF_SKIP_PRLI);
+			logio->io_parameter[0] =
+			    cpu_to_le32(LIO_COMM_FEAT_FCSP | LIO_COMM_FEAT_CIO);
+		}
 	}
 	logio->nport_handle = cpu_to_le16(sp->fcport->loop_id);
 	logio->port_id[0] = sp->fcport->d_id.b.al_pa;
@@ -2789,7 +2748,10 @@ qla24xx_els_logo_iocb(srb_t *sp, struct els_entry_24xx *els_iocb)
 	els_iocb->s_id[0] = vha->d_id.b.domain;
 
 	if (elsio->u.els_logo.els_cmd == ELS_DCMD_PLOGI) {
-		els_iocb->control_flags = 0;
+		if (vha->hw->flags.edif_enabled)
+			els_iocb->control_flags = cpu_to_le16(ECF_SEC_LOGIN);
+		else
+			els_iocb->control_flags = 0;
 		els_iocb->tx_byte_count = els_iocb->tx_len =
 			cpu_to_le32(sizeof(struct els_plogi_payload));
 		put_unaligned_le64(elsio->u.els_plogi.els_plogi_pyld_dma,
@@ -2806,7 +2768,6 @@ qla24xx_els_logo_iocb(srb_t *sp, struct els_entry_24xx *els_iocb)
 		    (uint8_t *)els_iocb,
 		    sizeof(*els_iocb));
 	} else {
-		els_iocb->control_flags = cpu_to_le16(1 << 13);
 		els_iocb->tx_byte_count =
 			cpu_to_le32(sizeof(struct els_logo_payload));
 		put_unaligned_le64(elsio->u.els_logo.els_logo_pyld_dma,
@@ -3030,7 +2991,7 @@ qla24xx_els_dcmd2_iocb(scsi_qla_host_t *vha, int els_opcode,
 	qla2x00_set_fcport_disc_state(fcport, DSC_LOGIN_PEND);
 	elsio = &sp->u.iocb_cmd;
 	ql_dbg(ql_dbg_io, vha, 0x3073,
-	    "Enter: PLOGI portid=%06x\n", fcport->d_id.b24);
+	       "%s Enter: PLOGI portid=%06x\n", __func__, fcport->d_id.b24);
 
 	sp->type = SRB_ELS_DCMD;
 	sp->name = "ELS_DCMD";
@@ -3073,6 +3034,13 @@ qla24xx_els_dcmd2_iocb(scsi_qla_host_t *vha, int els_opcode,
 	elsio->u.els_plogi.els_cmd = els_opcode;
 	elsio->u.els_plogi.els_plogi_pyld->opcode = els_opcode;
 
+	if (els_opcode == ELS_DCMD_PLOGI && vha->hw->flags.edif_enabled &&
+	    vha->e_dbell.db_flags & EDB_ACTIVE) {
+		struct fc_els_flogi *p = ptr;
+
+		p->fl_csp.sp_features |= cpu_to_be16(FC_SP_FT_SEC);
+	}
+
 	ql_dbg(ql_dbg_disc + ql_dbg_buffer, vha, 0x3073, "PLOGI buffer:\n");
 	ql_dump_buffer(ql_dbg_disc + ql_dbg_buffer, vha, 0x0109,
 	    (uint8_t *)elsio->u.els_plogi.els_plogi_pyld,
@@ -3106,6 +3074,43 @@ done:
 	return rval;
 }
 
+/* it is assume qpair lock is held */
+void qla_els_pt_iocb(struct scsi_qla_host *vha,
+	struct els_entry_24xx *els_iocb,
+	struct qla_els_pt_arg *a)
+{
+	els_iocb->entry_type = ELS_IOCB_TYPE;
+	els_iocb->entry_count = 1;
+	els_iocb->sys_define = 0;
+	els_iocb->entry_status = 0;
+	els_iocb->handle = QLA_SKIP_HANDLE;
+	els_iocb->nport_handle = a->nport_handle;
+	els_iocb->rx_xchg_address = a->rx_xchg_address;
+	els_iocb->tx_dsd_count = cpu_to_le16(1);
+	els_iocb->vp_index = a->vp_idx;
+	els_iocb->sof_type = EST_SOFI3;
+	els_iocb->rx_dsd_count = cpu_to_le16(0);
+	els_iocb->opcode = a->els_opcode;
+
+	els_iocb->d_id[0] = a->did.b.al_pa;
+	els_iocb->d_id[1] = a->did.b.area;
+	els_iocb->d_id[2] = a->did.b.domain;
+	/* For SID the byte order is different than DID */
+	els_iocb->s_id[1] = vha->d_id.b.al_pa;
+	els_iocb->s_id[2] = vha->d_id.b.area;
+	els_iocb->s_id[0] = vha->d_id.b.domain;
+
+	els_iocb->control_flags = cpu_to_le16(a->control_flags);
+
+	els_iocb->tx_byte_count = cpu_to_le32(a->tx_byte_count);
+	els_iocb->tx_len = cpu_to_le32(a->tx_len);
+	put_unaligned_le64(a->tx_addr, &els_iocb->tx_address);
+
+	els_iocb->rx_byte_count = cpu_to_le32(a->rx_byte_count);
+	els_iocb->rx_len = cpu_to_le32(a->rx_len);
+	put_unaligned_le64(a->rx_addr, &els_iocb->rx_address);
+}
+
 static void
 qla24xx_els_iocb(srb_t *sp, struct els_entry_24xx *els_iocb)
 {
@@ -3700,6 +3705,16 @@ static void qla2x00_send_notify_ack_iocb(srb_t *sp,
 	nack->u.isp24.srr_reject_code = 0;
 	nack->u.isp24.srr_reject_code_expl = 0;
 	nack->u.isp24.vp_index = ntfy->u.isp24.vp_index;
+
+	if (ntfy->u.isp24.status_subcode == ELS_PLOGI &&
+	    (le16_to_cpu(ntfy->u.isp24.flags) & NOTIFY24XX_FLAGS_FCSP) &&
+	    sp->vha->hw->flags.edif_enabled) {
+		ql_dbg(ql_dbg_disc, sp->vha, 0x3074,
+		    "%s PLOGI NACK sent with FC SECURITY bit, hdl=%x, loopid=%x, to pid %06x\n",
+		    sp->name, sp->handle, sp->fcport->loop_id,
+		    sp->fcport->d_id.b24);
+		nack->u.isp24.flags |= cpu_to_le16(NOTIFY_ACK_FLAGS_FCSP);
+	}
 }
 
 /*
@@ -3804,6 +3819,10 @@ qla2x00_start_sp(srb_t *sp)
 	case SRB_ELS_CMD_HST:
 		qla24xx_els_iocb(sp, pkt);
 		break;
+	case SRB_ELS_CMD_HST_NOLOGIN:
+		qla_els_pt_iocb(sp->vha, pkt,  &sp->u.bsg_cmd.u.els_arg);
+		((struct els_entry_24xx *)pkt)->handle = sp->handle;
+		break;
 	case SRB_CT_CMD:
 		IS_FWI2_CAPABLE(ha) ?
 		    qla24xx_ct_iocb(sp, pkt) :
@@ -3851,6 +3870,12 @@ qla2x00_start_sp(srb_t *sp)
 	case SRB_PRLO_CMD:
 		qla24xx_prlo_iocb(sp, pkt);
 		break;
+	case SRB_SA_UPDATE:
+		qla24xx_sa_update_iocb(sp, pkt);
+		break;
+	case SRB_SA_REPLACE:
+		qla24xx_sa_replace_iocb(sp, pkt);
+		break;
 	default:
 		break;
 	}
diff --git a/drivers/scsi/qla2xxx/qla_isr.c b/drivers/scsi/qla2xxx/qla_isr.c
index d9fb093a60a1..ece60267b971 100644
--- a/drivers/scsi/qla2xxx/qla_isr.c
+++ b/drivers/scsi/qla2xxx/qla_isr.c
@@ -170,6 +170,149 @@ qla24xx_process_abts(struct scsi_qla_host *vha, struct purex_item *pkt)
 }
 
 /**
+ * __qla_consume_iocb - this routine is used to tell fw driver has processed
+ *   or consumed the head IOCB along with the continuation IOCB's from the
+ *   provided respond queue.
+ * @vha: host adapter pointer
+ * @pkt: pointer to current packet.  On return, this pointer shall move
+ *       to the next packet.
+ * @rsp: respond queue pointer.
+ *
+ * it is assumed pkt is the head iocb, not the continuation iocbk
+ */
+void __qla_consume_iocb(struct scsi_qla_host *vha,
+	void **pkt, struct rsp_que **rsp)
+{
+	struct rsp_que *rsp_q = *rsp;
+	response_t *new_pkt;
+	uint16_t entry_count_remaining;
+	struct purex_entry_24xx *purex = *pkt;
+
+	entry_count_remaining = purex->entry_count;
+	while (entry_count_remaining > 0) {
+		new_pkt = rsp_q->ring_ptr;
+		*pkt = new_pkt;
+
+		rsp_q->ring_index++;
+		if (rsp_q->ring_index == rsp_q->length) {
+			rsp_q->ring_index = 0;
+			rsp_q->ring_ptr = rsp_q->ring;
+		} else {
+			rsp_q->ring_ptr++;
+		}
+
+		new_pkt->signature = RESPONSE_PROCESSED;
+		/* flush signature */
+		wmb();
+		--entry_count_remaining;
+	}
+}
+
+/**
+ * __qla_copy_purex_to_buffer - extract ELS payload from Purex IOCB
+ *    and save to provided buffer
+ * @vha: host adapter pointer
+ * @pkt: pointer Purex IOCB
+ * @rsp: respond queue
+ * @buf: extracted ELS payload copy here
+ * @buf_len: buffer length
+ */
+int __qla_copy_purex_to_buffer(struct scsi_qla_host *vha,
+	void **pkt, struct rsp_que **rsp, u8 *buf, u32 buf_len)
+{
+	struct purex_entry_24xx *purex = *pkt;
+	struct rsp_que *rsp_q = *rsp;
+	sts_cont_entry_t *new_pkt;
+	uint16_t no_bytes = 0, total_bytes = 0, pending_bytes = 0;
+	uint16_t buffer_copy_offset = 0;
+	uint16_t entry_count_remaining;
+	u16 tpad;
+
+	entry_count_remaining = purex->entry_count;
+	total_bytes = (le16_to_cpu(purex->frame_size) & 0x0FFF)
+		- PURX_ELS_HEADER_SIZE;
+
+	/*
+	 * end of payload may not end in 4bytes boundary.  Need to
+	 * round up / pad for room to swap, before saving data
+	 */
+	tpad = roundup(total_bytes, 4);
+
+	if (buf_len < tpad) {
+		ql_dbg(ql_dbg_async, vha, 0x5084,
+		    "%s buffer is too small %d < %d\n",
+		    __func__, buf_len, tpad);
+		__qla_consume_iocb(vha, pkt, rsp);
+		return -EIO;
+	}
+
+	pending_bytes = total_bytes = tpad;
+	no_bytes = (pending_bytes > sizeof(purex->els_frame_payload))  ?
+	    sizeof(purex->els_frame_payload) : pending_bytes;
+
+	memcpy(buf, &purex->els_frame_payload[0], no_bytes);
+	buffer_copy_offset += no_bytes;
+	pending_bytes -= no_bytes;
+	--entry_count_remaining;
+
+	((response_t *)purex)->signature = RESPONSE_PROCESSED;
+	/* flush signature */
+	wmb();
+
+	do {
+		while ((total_bytes > 0) && (entry_count_remaining > 0)) {
+			new_pkt = (sts_cont_entry_t *)rsp_q->ring_ptr;
+			*pkt = new_pkt;
+
+			if (new_pkt->entry_type != STATUS_CONT_TYPE) {
+				ql_log(ql_log_warn, vha, 0x507a,
+				    "Unexpected IOCB type, partial data 0x%x\n",
+				    buffer_copy_offset);
+				break;
+			}
+
+			rsp_q->ring_index++;
+			if (rsp_q->ring_index == rsp_q->length) {
+				rsp_q->ring_index = 0;
+				rsp_q->ring_ptr = rsp_q->ring;
+			} else {
+				rsp_q->ring_ptr++;
+			}
+			no_bytes = (pending_bytes > sizeof(new_pkt->data)) ?
+			    sizeof(new_pkt->data) : pending_bytes;
+			if ((buffer_copy_offset + no_bytes) <= total_bytes) {
+				memcpy((buf + buffer_copy_offset), new_pkt->data,
+				    no_bytes);
+				buffer_copy_offset += no_bytes;
+				pending_bytes -= no_bytes;
+				--entry_count_remaining;
+			} else {
+				ql_log(ql_log_warn, vha, 0x5044,
+				    "Attempt to copy more that we got, optimizing..%x\n",
+				    buffer_copy_offset);
+				memcpy((buf + buffer_copy_offset), new_pkt->data,
+				    total_bytes - buffer_copy_offset);
+			}
+
+			((response_t *)new_pkt)->signature = RESPONSE_PROCESSED;
+			/* flush signature */
+			wmb();
+		}
+
+		if (pending_bytes != 0 || entry_count_remaining != 0) {
+			ql_log(ql_log_fatal, vha, 0x508b,
+			    "Dropping partial Data, underrun bytes = 0x%x, entry cnts 0x%x\n",
+			    total_bytes, entry_count_remaining);
+			return -EIO;
+		}
+	} while (entry_count_remaining > 0);
+
+	be32_to_cpu_array((u32 *)buf, (__be32 *)buf, total_bytes >> 2);
+
+	return 0;
+}
+
+/**
  * qla2100_intr_handler() - Process interrupts for the ISP2100 and ISP2200.
  * @irq: interrupt number
  * @dev_id: SCSI driver HA context
@@ -505,7 +648,7 @@ const char *
 qla2x00_get_link_speed_str(struct qla_hw_data *ha, uint16_t speed)
 {
 	static const char *const link_speeds[] = {
-		"1", "2", "?", "4", "8", "16", "32", "10"
+		"1", "2", "?", "4", "8", "16", "32", "64", "10"
 	};
 #define	QLA_LAST_SPEED (ARRAY_SIZE(link_speeds) - 1)
 
@@ -1727,6 +1870,9 @@ qla2x00_get_sp_from_handle(scsi_qla_host_t *vha, const char *func,
 	srb_t *sp;
 	uint16_t index;
 
+	if (pkt->handle == QLA_SKIP_HANDLE)
+		return NULL;
+
 	index = LSW(pkt->handle);
 	if (index >= req->num_outstanding_cmds) {
 		ql_log(ql_log_warn, vha, 0x5031,
@@ -1971,7 +2117,7 @@ qla2x00_ct_entry(scsi_qla_host_t *vha, struct req_que *req,
 }
 
 static void
-qla24xx_els_ct_entry(scsi_qla_host_t *vha, struct req_que *req,
+qla24xx_els_ct_entry(scsi_qla_host_t *v, struct req_que *req,
     struct sts_entry_24xx *pkt, int iocb_type)
 {
 	struct els_sts_entry_24xx *ese = (struct els_sts_entry_24xx *)pkt;
@@ -1982,18 +2128,58 @@ qla24xx_els_ct_entry(scsi_qla_host_t *vha, struct req_que *req,
 	struct fc_bsg_reply *bsg_reply;
 	uint16_t comp_status;
 	uint32_t fw_status[3];
-	int res;
+	int res, logit = 1;
 	struct srb_iocb *els;
+	uint n;
+	scsi_qla_host_t *vha;
+	struct els_sts_entry_24xx *e = (struct els_sts_entry_24xx *)pkt;
 
-	sp = qla2x00_get_sp_from_handle(vha, func, req, pkt);
+	sp = qla2x00_get_sp_from_handle(v, func, req, pkt);
 	if (!sp)
 		return;
+	bsg_job = sp->u.bsg_job;
+	vha = sp->vha;
 
 	type = NULL;
+
+	comp_status = fw_status[0] = le16_to_cpu(pkt->comp_status);
+	fw_status[1] = le32_to_cpu(((struct els_sts_entry_24xx *)pkt)->error_subcode_1);
+	fw_status[2] = le32_to_cpu(((struct els_sts_entry_24xx *)pkt)->error_subcode_2);
+
 	switch (sp->type) {
 	case SRB_ELS_CMD_RPT:
 	case SRB_ELS_CMD_HST:
+		type = "rpt hst";
+		break;
+	case SRB_ELS_CMD_HST_NOLOGIN:
 		type = "els";
+		{
+			struct els_entry_24xx *els = (void *)pkt;
+			struct qla_bsg_auth_els_request *p =
+				(struct qla_bsg_auth_els_request *)bsg_job->request;
+
+			ql_dbg(ql_dbg_user, vha, 0x700f,
+			     "%s %s. portid=%02x%02x%02x status %x xchg %x bsg ptr %p\n",
+			     __func__, sc_to_str(p->e.sub_cmd),
+			     e->d_id[2], e->d_id[1], e->d_id[0],
+			     comp_status, p->e.extra_rx_xchg_address, bsg_job);
+
+			if (!(le16_to_cpu(els->control_flags) & ECF_PAYLOAD_DESCR_MASK)) {
+				if (sp->remap.remapped) {
+					n = sg_copy_from_buffer(bsg_job->reply_payload.sg_list,
+						bsg_job->reply_payload.sg_cnt,
+						sp->remap.rsp.buf,
+						sp->remap.rsp.len);
+					ql_dbg(ql_dbg_user + ql_dbg_verbose, vha, 0x700e,
+					   "%s: SG copied %x of %x\n",
+					   __func__, n, sp->remap.rsp.len);
+				} else {
+					ql_dbg(ql_dbg_user, vha, 0x700f,
+					   "%s: NOT REMAPPED (error)...!!!\n",
+					   __func__);
+				}
+			}
+		}
 		break;
 	case SRB_CT_CMD:
 		type = "ct pass-through";
@@ -2023,10 +2209,6 @@ qla24xx_els_ct_entry(scsi_qla_host_t *vha, struct req_que *req,
 		return;
 	}
 
-	comp_status = fw_status[0] = le16_to_cpu(pkt->comp_status);
-	fw_status[1] = le32_to_cpu(ese->error_subcode_1);
-	fw_status[2] = le32_to_cpu(ese->error_subcode_2);
-
 	if (iocb_type == ELS_IOCB_TYPE) {
 		els = &sp->u.iocb_cmd;
 		els->u.els_plogi.fw_status[0] = cpu_to_le32(fw_status[0]);
@@ -2040,15 +2222,53 @@ qla24xx_els_ct_entry(scsi_qla_host_t *vha, struct req_que *req,
 				res =  DID_OK << 16;
 				els->u.els_plogi.len = cpu_to_le16(le32_to_cpu(
 					ese->total_byte_count));
+
+				if (sp->remap.remapped &&
+				    ((u8 *)sp->remap.rsp.buf)[0] == ELS_LS_ACC) {
+					ql_dbg(ql_dbg_user, vha, 0x503f,
+					    "%s IOCB Done LS_ACC %02x%02x%02x -> %02x%02x%02x",
+					    __func__, e->s_id[0], e->s_id[2], e->s_id[1],
+					    e->d_id[2], e->d_id[1], e->d_id[0]);
+					logit = 0;
+				}
+
+			} else if (comp_status == CS_PORT_LOGGED_OUT) {
+				els->u.els_plogi.len = 0;
+				res = DID_IMM_RETRY << 16;
+				qlt_schedule_sess_for_deletion(sp->fcport);
 			} else {
 				els->u.els_plogi.len = 0;
 				res = DID_ERROR << 16;
 			}
+
+			if (logit) {
+				if (sp->remap.remapped &&
+				    ((u8 *)sp->remap.rsp.buf)[0] == ELS_LS_RJT) {
+					ql_dbg(ql_dbg_user, vha, 0x503f,
+					    "%s IOCB Done LS_RJT hdl=%x comp_status=0x%x\n",
+					    type, sp->handle, comp_status);
+
+					ql_dbg(ql_dbg_user, vha, 0x503f,
+					    "subcode 1=0x%x subcode 2=0x%x bytes=0x%x %02x%02x%02x -> %02x%02x%02x\n",
+					    fw_status[1], fw_status[2],
+					    le32_to_cpu(((struct els_sts_entry_24xx *)
+						pkt)->total_byte_count),
+					    e->s_id[0], e->s_id[2], e->s_id[1],
+					    e->d_id[2], e->d_id[1], e->d_id[0]);
+				} else {
+					ql_log(ql_log_info, vha, 0x503f,
+					    "%s IOCB Done hdl=%x comp_status=0x%x\n",
+					    type, sp->handle, comp_status);
+					ql_log(ql_log_info, vha, 0x503f,
+					    "subcode 1=0x%x subcode 2=0x%x bytes=0x%x %02x%02x%02x -> %02x%02x%02x\n",
+					    fw_status[1], fw_status[2],
+					    le32_to_cpu(((struct els_sts_entry_24xx *)
+						pkt)->total_byte_count),
+					    e->s_id[0], e->s_id[2], e->s_id[1],
+					    e->d_id[2], e->d_id[1], e->d_id[0]);
+				}
+			}
 		}
-		ql_dbg(ql_dbg_disc, vha, 0x503f,
-		    "ELS IOCB Done -%s hdl=%x comp_status=0x%x error subcode 1=0x%x error subcode 2=0x%x total_byte=0x%x\n",
-		    type, sp->handle, comp_status, fw_status[1], fw_status[2],
-		    le32_to_cpu(ese->total_byte_count));
 		goto els_ct_done;
 	}
 
@@ -2107,6 +2327,7 @@ qla24xx_logio_entry(scsi_qla_host_t *vha, struct req_que *req,
 	struct srb_iocb *lio;
 	uint16_t *data;
 	uint32_t iop[2];
+	int logit = 1;
 
 	sp = qla2x00_get_sp_from_handle(vha, func, req, logio);
 	if (!sp)
@@ -2153,6 +2374,10 @@ qla24xx_logio_entry(scsi_qla_host_t *vha, struct req_que *req,
 		if (sp->type != SRB_LOGIN_CMD)
 			goto logio_done;
 
+		lio->u.logio.iop[1] = le32_to_cpu(logio->io_parameter[5]);
+		if (le32_to_cpu(logio->io_parameter[5]) & LIO_COMM_FEAT_FCSP)
+			fcport->flags |= FCF_FCSP_DEVICE;
+
 		iop[0] = le32_to_cpu(logio->io_parameter[0]);
 		if (iop[0] & BIT_4) {
 			fcport->port_type = FCT_TARGET;
@@ -2180,9 +2405,11 @@ qla24xx_logio_entry(scsi_qla_host_t *vha, struct req_que *req,
 	case LSC_SCODE_PORTID_USED:
 		data[0] = MBS_PORT_ID_USED;
 		data[1] = LSW(iop[1]);
+		logit = 0;
 		break;
 	case LSC_SCODE_NPORT_USED:
 		data[0] = MBS_LOOP_ID_USED;
+		logit = 0;
 		break;
 	case LSC_SCODE_CMD_FAILED:
 		if (iop[1] == 0x0606) {
@@ -2215,12 +2442,20 @@ qla24xx_logio_entry(scsi_qla_host_t *vha, struct req_que *req,
 		break;
 	}
 
-	ql_log(ql_log_warn, sp->vha, 0x5037,
-	       "Async-%s failed: handle=%x pid=%06x wwpn=%8phC comp_status=%x iop0=%x iop1=%x\n",
-	       type, sp->handle, fcport->d_id.b24, fcport->port_name,
-	       le16_to_cpu(logio->comp_status),
-	       le32_to_cpu(logio->io_parameter[0]),
-	       le32_to_cpu(logio->io_parameter[1]));
+	if (logit)
+		ql_log(ql_log_warn, sp->vha, 0x5037, "Async-%s failed: "
+		       "handle=%x pid=%06x wwpn=%8phC comp_status=%x iop0=%x iop1=%x\n",
+		       type, sp->handle, fcport->d_id.b24, fcport->port_name,
+		       le16_to_cpu(logio->comp_status),
+		       le32_to_cpu(logio->io_parameter[0]),
+		       le32_to_cpu(logio->io_parameter[1]));
+	else
+		ql_dbg(ql_dbg_disc, sp->vha, 0x5037, "Async-%s failed: "
+		       "handle=%x pid=%06x wwpn=%8phC comp_status=%x iop0=%x iop1=%x\n",
+		       type, sp->handle, fcport->d_id.b24, fcport->port_name,
+		       le16_to_cpu(logio->comp_status),
+		       le32_to_cpu(logio->io_parameter[0]),
+		       le32_to_cpu(logio->io_parameter[1]));
 
 logio_done:
 	sp->done(sp, 0);
@@ -2417,6 +2652,15 @@ static void qla24xx_nvme_iocb_entry(scsi_qla_host_t *vha, struct req_que *req,
 	case CS_PORT_UNAVAILABLE:
 	case CS_PORT_LOGGED_OUT:
 		fcport->nvme_flag |= NVME_FLAG_RESETTING;
+		if (atomic_read(&fcport->state) == FCS_ONLINE) {
+			ql_dbg(ql_dbg_disc, fcport->vha, 0x3021,
+			       "Port to be marked lost on fcport=%06x, current "
+			       "port state= %s comp_status %x.\n",
+			       fcport->d_id.b24, port_state_str[FCS_ONLINE],
+			       comp_status);
+
+			qlt_schedule_sess_for_deletion(fcport);
+		}
 		fallthrough;
 	case CS_ABORTED:
 	case CS_PORT_BUSY:
@@ -2969,9 +3213,10 @@ qla2x00_status_entry(scsi_qla_host_t *vha, struct rsp_que *rsp, void *pkt)
 		return;
 	}
 
+	/* Fast path completion. */
+	qla_chk_edif_rx_sa_delete_pending(vha, sp, sts24);
 	sp->qpair->cmd_completion_cnt++;
 
-	/* Fast path completion. */
 	if (comp_status == CS_COMPLETE && scsi_status == 0) {
 		qla2x00_process_completed_request(vha, req, handle);
 
@@ -3366,6 +3611,7 @@ qla2x00_error_entry(scsi_qla_host_t *vha, struct rsp_que *rsp, sts_entry_t *pkt)
 		}
 		break;
 
+	case SA_UPDATE_IOCB_TYPE:
 	case ABTS_RESP_24XX:
 	case CTIO_TYPE7:
 	case CTIO_CRC2:
@@ -3453,6 +3699,63 @@ void qla24xx_nvme_ls4_iocb(struct scsi_qla_host *vha,
 }
 
 /**
+ * qla_chk_cont_iocb_avail - check for all continuation iocbs are available
+ *   before iocb processing can start.
+ * @vha: host adapter pointer
+ * @rsp: respond queue
+ * @pkt: head iocb describing how many continuation iocb
+ * Return: 0 all iocbs has arrived, xx- all iocbs have not arrived.
+ */
+static int qla_chk_cont_iocb_avail(struct scsi_qla_host *vha,
+	struct rsp_que *rsp, response_t *pkt)
+{
+	int start_pkt_ring_index, end_pkt_ring_index, n_ring_index;
+	response_t *end_pkt;
+	int rc = 0;
+	u32 rsp_q_in;
+
+	if (pkt->entry_count == 1)
+		return rc;
+
+	/* ring_index was pre-increment. set it back to current pkt */
+	if (rsp->ring_index == 0)
+		start_pkt_ring_index = rsp->length - 1;
+	else
+		start_pkt_ring_index = rsp->ring_index - 1;
+
+	if ((start_pkt_ring_index + pkt->entry_count) >= rsp->length)
+		end_pkt_ring_index = start_pkt_ring_index + pkt->entry_count -
+			rsp->length - 1;
+	else
+		end_pkt_ring_index = start_pkt_ring_index + pkt->entry_count - 1;
+
+	end_pkt = rsp->ring + end_pkt_ring_index;
+
+	/*  next pkt = end_pkt + 1 */
+	n_ring_index = end_pkt_ring_index + 1;
+	if (n_ring_index >= rsp->length)
+		n_ring_index = 0;
+
+	rsp_q_in = rsp->qpair->use_shadow_reg ? *rsp->in_ptr :
+		rd_reg_dword(rsp->rsp_q_in);
+
+	/* rsp_q_in is either wrapped or pointing beyond endpkt */
+	if ((rsp_q_in < start_pkt_ring_index && rsp_q_in < n_ring_index) ||
+			rsp_q_in >= n_ring_index)
+		/* all IOCBs arrived. */
+		rc = 0;
+	else
+		rc = -EIO;
+
+	ql_dbg(ql_dbg_init + ql_dbg_verbose, vha, 0x5091,
+	    "%s - ring %p pkt %p end pkt %p entry count %#x rsp_q_in %d rc %d\n",
+	    __func__, rsp->ring, pkt, end_pkt, pkt->entry_count,
+	    rsp_q_in, rc);
+
+	return rc;
+}
+
+/**
  * qla24xx_process_response_queue() - Process response queue entries.
  * @vha: SCSI driver HA context
  * @rsp: response queue
@@ -3592,12 +3895,26 @@ process_err:
 						 qla27xx_process_purex_fpin);
 				break;
 
+			case ELS_AUTH_ELS:
+				if (qla_chk_cont_iocb_avail(vha, rsp, (response_t *)pkt)) {
+					ql_dbg(ql_dbg_init, vha, 0x5091,
+					    "Defer processing ELS opcode %#x...\n",
+					    purex_entry->els_frame_payload[3]);
+					return;
+				}
+				qla24xx_auth_els(vha, (void **)&pkt, &rsp);
+				break;
 			default:
 				ql_log(ql_log_warn, vha, 0x509c,
 				       "Discarding ELS Request opcode 0x%x\n",
 				       purex_entry->els_frame_payload[3]);
 			}
 			break;
+		case SA_UPDATE_IOCB_TYPE:
+			qla28xx_sa_update_iocb_entry(vha, rsp->req,
+				(struct sa_update_28xx *)pkt);
+			break;
+
 		default:
 			/* Type Not Supported. */
 			ql_dbg(ql_dbg_async, vha, 0x5042,
@@ -4201,6 +4518,8 @@ skip_msi:
 		ql_dbg(ql_dbg_init, vha, 0x0125,
 		    "INTa mode: Enabled.\n");
 		ha->flags.mr_intr_valid = 1;
+		/* Set max_qpair to 0, as MSI-X and MSI in not enabled */
+		ha->max_qpairs = 0;
 	}
 
 clear_risc_ints:
diff --git a/drivers/scsi/qla2xxx/qla_mbx.c b/drivers/scsi/qla2xxx/qla_mbx.c
index 9f3ad8aa649c..7811c4952035 100644
--- a/drivers/scsi/qla2xxx/qla_mbx.c
+++ b/drivers/scsi/qla2xxx/qla_mbx.c
@@ -663,6 +663,7 @@ qla2x00_load_ram(scsi_qla_host_t *vha, dma_addr_t req_dma, uint32_t risc_addr,
 }
 
 #define	NVME_ENABLE_FLAG	BIT_3
+#define	EDIF_HW_SUPPORT		BIT_10
 
 /*
  * qla2x00_execute_fw
@@ -739,7 +740,7 @@ again:
 			mcp->mb[11] |= EXE_FW_FORCE_SEMAPHORE;
 
 		mcp->out_mb |= MBX_4 | MBX_3 | MBX_2 | MBX_1 | MBX_11;
-		mcp->in_mb |= MBX_3 | MBX_2 | MBX_1;
+		mcp->in_mb |= MBX_5 | MBX_3 | MBX_2 | MBX_1;
 	} else {
 		mcp->mb[1] = LSW(risc_addr);
 		mcp->out_mb |= MBX_1;
@@ -795,6 +796,12 @@ again:
 		}
 	}
 
+	if (IS_QLA28XX(ha) && (mcp->mb[5] & EDIF_HW_SUPPORT)) {
+		ha->flags.edif_hw = 1;
+		ql_log(ql_log_info, vha, 0xffff,
+		    "%s: edif HW\n", __func__);
+	}
+
 done:
 	ql_dbg(ql_dbg_mbx + ql_dbg_verbose, vha, 0x1028,
 	    "Done %s.\n", __func__);
@@ -1130,6 +1137,13 @@ qla2x00_get_fw_version(scsi_qla_host_t *vha)
 			       ha->fw_attributes_ext[0]);
 			vha->flags.nvme2_enabled = 1;
 		}
+
+		if (IS_QLA28XX(ha) && ha->flags.edif_hw && ql2xsecenable &&
+		    (ha->fw_attributes_ext[0] & FW_ATTR_EXT0_EDIF)) {
+			ha->flags.edif_enabled = 1;
+			ql_log(ql_log_info, vha, 0xffff,
+			       "%s: edif is enabled\n", __func__);
+		}
 	}
 
 	if (IS_QLA27XX(ha) || IS_QLA28XX(ha)) {
@@ -3231,7 +3245,7 @@ qla24xx_abort_command(srb_t *sp)
 	if (sp->qpair)
 		req = sp->qpair->req;
 	else
-		return QLA_FUNCTION_FAILED;
+		return QLA_ERR_NO_QPAIR;
 
 	if (ql2xasynctmfenable)
 		return qla24xx_async_abort_command(sp);
@@ -3244,7 +3258,7 @@ qla24xx_abort_command(srb_t *sp)
 	spin_unlock_irqrestore(qpair->qp_lock_ptr, flags);
 	if (handle == req->num_outstanding_cmds) {
 		/* Command not found. */
-		return QLA_FUNCTION_FAILED;
+		return QLA_ERR_NOT_FOUND;
 	}
 
 	abt = dma_pool_zalloc(ha->s_dma_pool, GFP_KERNEL, &abt_dma);
@@ -4035,6 +4049,10 @@ qla24xx_report_id_acquisition(scsi_qla_host_t *vha,
 				fcport->scan_state = QLA_FCPORT_FOUND;
 				fcport->n2n_flag = 1;
 				fcport->keep_nport_handle = 1;
+				fcport->login_retry = vha->hw->login_retry_count;
+				fcport->fc4_type = FS_FC4TYPE_FCP;
+				if (vha->flags.nvme_enabled)
+					fcport->fc4_type |= FS_FC4TYPE_NVME;
 
 				if (wwn_to_u64(vha->port_name) >
 				    wwn_to_u64(fcport->port_name)) {
@@ -4172,6 +4190,16 @@ qla24xx_report_id_acquisition(scsi_qla_host_t *vha,
 				rptid_entry->u.f2.remote_nport_id[1];
 			fcport->d_id.b.al_pa =
 				rptid_entry->u.f2.remote_nport_id[0];
+
+			/*
+			 * For the case where remote port sending PRLO, FW
+			 * sends up RIDA Format 2 as an indication of session
+			 * loss. In other word, FW state change from PRLI
+			 * complete back to PLOGI complete. Delete the
+			 * session and let relogin drive the reconnect.
+			 */
+			if (atomic_read(&fcport->state) == FCS_ONLINE)
+				qlt_schedule_sess_for_deletion(fcport);
 		}
 	}
 }
@@ -4946,7 +4974,7 @@ qla24xx_get_port_login_templ(scsi_qla_host_t *vha, dma_addr_t buf_dma,
 	return rval;
 }
 
-#define PUREX_CMD_COUNT	2
+#define PUREX_CMD_COUNT	4
 int
 qla25xx_set_els_cmds_supported(scsi_qla_host_t *vha)
 {
@@ -4954,6 +4982,7 @@ qla25xx_set_els_cmds_supported(scsi_qla_host_t *vha)
 	mbx_cmd_t mc;
 	mbx_cmd_t *mcp = &mc;
 	uint8_t *els_cmd_map;
+	uint8_t active_cnt = 0;
 	dma_addr_t els_cmd_map_dma;
 	uint8_t cmd_opcode[PUREX_CMD_COUNT];
 	uint8_t i, index, purex_bit;
@@ -4975,10 +5004,20 @@ qla25xx_set_els_cmds_supported(scsi_qla_host_t *vha)
 	}
 
 	/* List of Purex ELS */
-	cmd_opcode[0] = ELS_FPIN;
-	cmd_opcode[1] = ELS_RDP;
+	if (ql2xrdpenable) {
+		cmd_opcode[active_cnt] = ELS_RDP;
+		active_cnt++;
+	}
+	if (ha->flags.scm_supported_f) {
+		cmd_opcode[active_cnt] = ELS_FPIN;
+		active_cnt++;
+	}
+	if (ha->flags.edif_enabled) {
+		cmd_opcode[active_cnt] = ELS_AUTH_ELS;
+		active_cnt++;
+	}
 
-	for (i = 0; i < PUREX_CMD_COUNT; i++) {
+	for (i = 0; i < active_cnt; i++) {
 		index = cmd_opcode[i] / 8;
 		purex_bit = cmd_opcode[i] % 8;
 		els_cmd_map[index] |= 1 << purex_bit;
@@ -6588,6 +6627,12 @@ int __qla24xx_parse_gpdb(struct scsi_qla_host *vha, fc_port_t *fcport,
 	fcport->d_id.b.al_pa = pd->port_id[2];
 	fcport->d_id.b.rsvd_1 = 0;
 
+	ql_dbg(ql_dbg_disc, vha, 0x2062,
+	     "%8phC SVC Param w3 %02x%02x",
+	     fcport->port_name,
+	     pd->prli_svc_param_word_3[1],
+	     pd->prli_svc_param_word_3[0]);
+
 	if (NVME_TARGET(vha->hw, fcport)) {
 		fcport->port_type = FCT_NVME;
 		if ((pd->prli_svc_param_word_3[0] & BIT_5) == 0)
diff --git a/drivers/scsi/qla2xxx/qla_mid.c b/drivers/scsi/qla2xxx/qla_mid.c
index c7caf322f445..1c024055f8c5 100644
--- a/drivers/scsi/qla2xxx/qla_mid.c
+++ b/drivers/scsi/qla2xxx/qla_mid.c
@@ -65,7 +65,7 @@ qla24xx_deallocate_vp_id(scsi_qla_host_t *vha)
 	uint16_t vp_id;
 	struct qla_hw_data *ha = vha->hw;
 	unsigned long flags = 0;
-	u8 i;
+	u32 i, bailout;
 
 	mutex_lock(&ha->vport_lock);
 	/*
@@ -75,21 +75,29 @@ qla24xx_deallocate_vp_id(scsi_qla_host_t *vha)
 	 * ensures no active vp_list traversal while the vport is removed
 	 * from the queue)
 	 */
-	for (i = 0; i < 10; i++) {
-		if (wait_event_timeout(vha->vref_waitq,
-		    !atomic_read(&vha->vref_count), HZ) > 0)
+	bailout = 0;
+	for (i = 0; i < 500; i++) {
+		spin_lock_irqsave(&ha->vport_slock, flags);
+		if (atomic_read(&vha->vref_count) == 0) {
+			list_del(&vha->list);
+			qlt_update_vp_map(vha, RESET_VP_IDX);
+			bailout = 1;
+		}
+		spin_unlock_irqrestore(&ha->vport_slock, flags);
+
+		if (bailout)
 			break;
+		else
+			msleep(20);
 	}
-
-	spin_lock_irqsave(&ha->vport_slock, flags);
-	if (atomic_read(&vha->vref_count)) {
-		ql_dbg(ql_dbg_vport, vha, 0xfffa,
-		    "vha->vref_count=%u timeout\n", vha->vref_count.counter);
-		vha->vref_count = (atomic_t)ATOMIC_INIT(0);
+	if (!bailout) {
+		ql_log(ql_log_info, vha, 0xfffa,
+			"vha->vref_count=%u timeout\n", vha->vref_count.counter);
+		spin_lock_irqsave(&ha->vport_slock, flags);
+		list_del(&vha->list);
+		qlt_update_vp_map(vha, RESET_VP_IDX);
+		spin_unlock_irqrestore(&ha->vport_slock, flags);
 	}
-	list_del(&vha->list);
-	qlt_update_vp_map(vha, RESET_VP_IDX);
-	spin_unlock_irqrestore(&ha->vport_slock, flags);
 
 	vp_id = vha->vp_idx;
 	ha->num_vhosts--;
@@ -158,6 +166,10 @@ qla24xx_disable_vp(scsi_qla_host_t *vha)
 	int ret = QLA_SUCCESS;
 	fc_port_t *fcport;
 
+	if (vha->hw->flags.edif_enabled)
+		/* delete sessions and flush sa_indexes */
+		qla2x00_wait_for_sess_deletion(vha);
+
 	if (vha->hw->flags.fw_started)
 		ret = qla24xx_control_vp(vha, VCE_COMMAND_DISABLE_VPS_LOGO_ALL);
 
@@ -166,7 +178,8 @@ qla24xx_disable_vp(scsi_qla_host_t *vha)
 	list_for_each_entry(fcport, &vha->vp_fcports, list)
 		fcport->logout_on_delete = 0;
 
-	qla2x00_mark_all_devices_lost(vha);
+	if (!vha->hw->flags.edif_enabled)
+		qla2x00_wait_for_sess_deletion(vha);
 
 	/* Remove port id from vp target map */
 	spin_lock_irqsave(&vha->hw->hardware_lock, flags);
@@ -257,13 +270,13 @@ qla24xx_configure_vp(scsi_qla_host_t *vha)
 void
 qla2x00_alert_all_vps(struct rsp_que *rsp, uint16_t *mb)
 {
-	scsi_qla_host_t *vha;
+	scsi_qla_host_t *vha, *tvp;
 	struct qla_hw_data *ha = rsp->hw;
 	int i = 0;
 	unsigned long flags;
 
 	spin_lock_irqsave(&ha->vport_slock, flags);
-	list_for_each_entry(vha, &ha->vp_list, list) {
+	list_for_each_entry_safe(vha, tvp, &ha->vp_list, list) {
 		if (vha->vp_idx) {
 			if (test_bit(VPORT_DELETE, &vha->dpc_flags))
 				continue;
@@ -416,7 +429,7 @@ void
 qla2x00_do_dpc_all_vps(scsi_qla_host_t *vha)
 {
 	struct qla_hw_data *ha = vha->hw;
-	scsi_qla_host_t *vp;
+	scsi_qla_host_t *vp, *tvp;
 	unsigned long flags = 0;
 
 	if (vha->vp_idx)
@@ -430,7 +443,7 @@ qla2x00_do_dpc_all_vps(scsi_qla_host_t *vha)
 		return;
 
 	spin_lock_irqsave(&ha->vport_slock, flags);
-	list_for_each_entry(vp, &ha->vp_list, list) {
+	list_for_each_entry_safe(vp, tvp, &ha->vp_list, list) {
 		if (vp->vp_idx) {
 			atomic_inc(&vp->vref_count);
 			spin_unlock_irqrestore(&ha->vport_slock, flags);
diff --git a/drivers/scsi/qla2xxx/qla_nvme.c b/drivers/scsi/qla2xxx/qla_nvme.c
index 3e5c70a1d969..1c5da2dbd6f9 100644
--- a/drivers/scsi/qla2xxx/qla_nvme.c
+++ b/drivers/scsi/qla2xxx/qla_nvme.c
@@ -91,8 +91,9 @@ static int qla_nvme_alloc_queue(struct nvme_fc_local_port *lport,
 	struct qla_hw_data *ha;
 	struct qla_qpair *qpair;
 
-	if (!qidx)
-		qidx++;
+	/* Map admin queue and 1st IO queue to index 0 */
+	if (qidx)
+		qidx--;
 
 	vha = (struct scsi_qla_host *)lport->private;
 	ha = vha->hw;
@@ -108,19 +109,24 @@ static int qla_nvme_alloc_queue(struct nvme_fc_local_port *lport,
 		return -EINVAL;
 	}
 
-	if (ha->queue_pair_map[qidx]) {
-		*handle = ha->queue_pair_map[qidx];
-		ql_log(ql_log_info, vha, 0x2121,
-		    "Returning existing qpair of %p for idx=%x\n",
-		    *handle, qidx);
-		return 0;
-	}
+	/* Use base qpair if max_qpairs is 0 */
+	if (!ha->max_qpairs) {
+		qpair = ha->base_qpair;
+	} else {
+		if (ha->queue_pair_map[qidx]) {
+			*handle = ha->queue_pair_map[qidx];
+			ql_log(ql_log_info, vha, 0x2121,
+			       "Returning existing qpair of %p for idx=%x\n",
+			       *handle, qidx);
+			return 0;
+		}
 
-	qpair = qla2xxx_create_qpair(vha, 5, vha->vp_idx, true);
-	if (qpair == NULL) {
-		ql_log(ql_log_warn, vha, 0x2122,
-		    "Failed to allocate qpair\n");
-		return -EINVAL;
+		qpair = qla2xxx_create_qpair(vha, 5, vha->vp_idx, true);
+		if (!qpair) {
+			ql_log(ql_log_warn, vha, 0x2122,
+			       "Failed to allocate qpair\n");
+			return -EINVAL;
+		}
 	}
 	*handle = qpair;
 
@@ -221,13 +227,13 @@ static void qla_nvme_abort_work(struct work_struct *work)
 	srb_t *sp = priv->sp;
 	fc_port_t *fcport = sp->fcport;
 	struct qla_hw_data *ha = fcport->vha->hw;
-	int rval;
+	int rval, abts_done_called = 1;
 
 	ql_dbg(ql_dbg_io, fcport->vha, 0xffff,
-	       "%s called for sp=%p, hndl=%x on fcport=%p deleted=%d\n",
-	       __func__, sp, sp->handle, fcport, fcport->deleted);
+	       "%s called for sp=%p, hndl=%x on fcport=%p desc=%p deleted=%d\n",
+	       __func__, sp, sp->handle, fcport, sp->u.iocb_cmd.u.nvme.desc, fcport->deleted);
 
-	if (!ha->flags.fw_started || fcport->deleted)
+	if (!ha->flags.fw_started || fcport->deleted == QLA_SESS_DELETED)
 		goto out;
 
 	if (ha->flags.host_shutting_down) {
@@ -246,11 +252,19 @@ static void qla_nvme_abort_work(struct work_struct *work)
 	    sp, sp->handle, fcport, rval);
 
 	/*
+	 * If async tmf is enabled, the abort callback is called only on
+	 * return codes QLA_SUCCESS and QLA_ERR_FROM_FW.
+	 */
+	if (ql2xasynctmfenable &&
+	    rval != QLA_SUCCESS && rval != QLA_ERR_FROM_FW)
+		abts_done_called = 0;
+
+	/*
 	 * Returned before decreasing kref so that I/O requests
 	 * are waited until ABTS complete. This kref is decreased
 	 * at qla24xx_abort_sp_done function.
 	 */
-	if (ql2xabts_wait_nvme && QLA_ABTS_WAIT_ENABLED(sp))
+	if (abts_done_called && ql2xabts_wait_nvme && QLA_ABTS_WAIT_ENABLED(sp))
 		return;
 out:
 	/* kref_get was done before work was schedule. */
@@ -463,6 +477,10 @@ static inline int qla2x00_start_nvme_mq(srb_t *sp)
 	} else if (fd->io_dir == 0) {
 		cmd_pkt->control_flags = 0;
 	}
+
+	if (sp->fcport->edif.enable && fd->io_dir != 0)
+		cmd_pkt->control_flags |= cpu_to_le16(CF_EN_EDIF);
+
 	/* Set BIT_13 of control flags for Async event */
 	if (vha->flags.nvme2_enabled &&
 	    cmd->sqe.common.opcode == nvme_admin_async_event) {
@@ -727,18 +745,9 @@ int qla_nvme_register_hba(struct scsi_qla_host *vha)
 
 	WARN_ON(vha->nvme_local_port);
 
-	if (ha->max_req_queues < 3) {
-		if (!ha->flags.max_req_queue_warned)
-			ql_log(ql_log_info, vha, 0x2120,
-			       "%s: Disabling FC-NVME due to lack of free queue pairs (%d).\n",
-			       __func__, ha->max_req_queues);
-		ha->flags.max_req_queue_warned = 1;
-		return ret;
-	}
-
 	qla_nvme_fc_transport.max_hw_queues =
 	    min((uint8_t)(qla_nvme_fc_transport.max_hw_queues),
-		(uint8_t)(ha->max_req_queues - 2));
+		(uint8_t)(ha->max_qpairs ? ha->max_qpairs : 1));
 
 	pinfo.node_name = wwn_to_u64(vha->node_name);
 	pinfo.port_name = wwn_to_u64(vha->port_name);
@@ -803,14 +812,14 @@ void qla_nvme_abort_process_comp_status(struct abort_entry_24xx *abt, srb_t *ori
 	case CS_PORT_LOGGED_OUT:
 	/* BA_RJT was received for the ABTS */
 	case CS_PORT_CONFIG_CHG:
-		ql_dbg(ql_dbg_async + ql_dbg_mbx, vha, 0xf09d,
+		ql_dbg(ql_dbg_async, vha, 0xf09d,
 		       "Abort I/O IOCB completed with error, comp_status=%x\n",
 		comp_status);
 		break;
 
 	/* BA_RJT was received for the ABTS */
 	case CS_REJECT_RECEIVED:
-		ql_dbg(ql_dbg_async + ql_dbg_mbx, vha, 0xf09e,
+		ql_dbg(ql_dbg_async, vha, 0xf09e,
 		       "BA_RJT was received for the ABTS rjt_vendorUnique = %u",
 			abt->fw.ba_rjt_vendorUnique);
 		ql_dbg(ql_dbg_async + ql_dbg_mbx, vha, 0xf09e,
@@ -819,18 +828,18 @@ void qla_nvme_abort_process_comp_status(struct abort_entry_24xx *abt, srb_t *ori
 		break;
 
 	case CS_COMPLETE:
-		ql_dbg(ql_dbg_async + ql_dbg_mbx, vha, 0xf09f,
+		ql_dbg(ql_dbg_async + ql_dbg_verbose, vha, 0xf09f,
 		       "IOCB request is completed successfully comp_status=%x\n",
 		comp_status);
 		break;
 
 	case CS_IOCB_ERROR:
-		ql_dbg(ql_dbg_async + ql_dbg_mbx, vha, 0xf0a0,
+		ql_dbg(ql_dbg_async, vha, 0xf0a0,
 		       "IOCB request is failed, comp_status=%x\n", comp_status);
 		break;
 
 	default:
-		ql_dbg(ql_dbg_async + ql_dbg_mbx, vha, 0xf0a1,
+		ql_dbg(ql_dbg_async, vha, 0xf0a1,
 		       "Invalid Abort IO IOCB Completion Status %x\n",
 		comp_status);
 		break;
diff --git a/drivers/scsi/qla2xxx/qla_nx.c b/drivers/scsi/qla2xxx/qla_nx.c
index 615e44af1ca6..11aad97dfca8 100644
--- a/drivers/scsi/qla2xxx/qla_nx.c
+++ b/drivers/scsi/qla2xxx/qla_nx.c
@@ -2166,7 +2166,6 @@ qla82xx_poll(int irq, void *dev_id)
 	struct qla_hw_data *ha;
 	struct rsp_que *rsp;
 	struct device_reg_82xx __iomem *reg;
-	int status = 0;
 	uint32_t stat;
 	uint32_t host_int = 0;
 	uint16_t mb[8];
@@ -2195,7 +2194,6 @@ qla82xx_poll(int irq, void *dev_id)
 		case 0x10:
 		case 0x11:
 			qla82xx_mbx_completion(vha, MSW(stat));
-			status |= MBX_INTERRUPT;
 			break;
 		case 0x12:
 			mb[0] = MSW(stat);
diff --git a/drivers/scsi/qla2xxx/qla_os.c b/drivers/scsi/qla2xxx/qla_os.c
index cedd558f65eb..d2e40aaba734 100644
--- a/drivers/scsi/qla2xxx/qla_os.c
+++ b/drivers/scsi/qla2xxx/qla_os.c
@@ -14,6 +14,7 @@
 #include <linux/slab.h>
 #include <linux/blk-mq-pci.h>
 #include <linux/refcount.h>
+#include <linux/crash_dump.h>
 
 #include <scsi/scsi_tcq.h>
 #include <scsi/scsicam.h>
@@ -53,6 +54,11 @@ static struct kmem_cache *ctx_cachep;
  */
 uint ql_errlev = 0x8001;
 
+int ql2xsecenable;
+module_param(ql2xsecenable, int, S_IRUGO);
+MODULE_PARM_DESC(ql2xsecenable,
+	"Enable/disable security. 0(Default) - Security disabled. 1 - Security enabled.");
+
 static int ql2xenableclass2;
 module_param(ql2xenableclass2, int, S_IRUGO|S_IRUSR);
 MODULE_PARM_DESC(ql2xenableclass2,
@@ -849,7 +855,7 @@ qla2xxx_queuecommand(struct Scsi_Host *host, struct scsi_cmnd *cmd)
 		uint16_t hwq;
 		struct qla_qpair *qpair = NULL;
 
-		tag = blk_mq_unique_tag(cmd->request);
+		tag = blk_mq_unique_tag(scsi_cmd_to_rq(cmd));
 		hwq = blk_mq_unique_tag_to_hwq(tag);
 		qpair = ha->queue_pair_map[hwq];
 
@@ -1120,12 +1126,28 @@ static inline int test_fcport_count(scsi_qla_host_t *vha)
 	struct qla_hw_data *ha = vha->hw;
 	unsigned long flags;
 	int res;
+	/* Return 0 = sleep, x=wake */
 
 	spin_lock_irqsave(&ha->tgt.sess_lock, flags);
 	ql_dbg(ql_dbg_init, vha, 0x00ec,
 	    "tgt %p, fcport_count=%d\n",
 	    vha, vha->fcport_count);
 	res = (vha->fcport_count == 0);
+	if  (res) {
+		struct fc_port *fcport;
+
+		list_for_each_entry(fcport, &vha->vp_fcports, list) {
+			if (fcport->deleted != QLA_SESS_DELETED) {
+				/* session(s) may not be fully logged in
+				 * (ie fcport_count=0), but session
+				 * deletion thread(s) may be inflight.
+				 */
+
+				res = 0;
+				break;
+			}
+		}
+	}
 	spin_unlock_irqrestore(&ha->tgt.sess_lock, flags);
 
 	return res;
@@ -1367,18 +1389,27 @@ static char *reset_errors[] = {
 };
 
 static int
-__qla2xxx_eh_generic_reset(char *name, enum nexus_wait_type type,
-    struct scsi_cmnd *cmd, int (*do_reset)(struct fc_port *, uint64_t, int))
+qla2xxx_eh_device_reset(struct scsi_cmnd *cmd)
 {
-	scsi_qla_host_t *vha = shost_priv(cmd->device->host);
-	fc_port_t *fcport = (struct fc_port *) cmd->device->hostdata;
+	struct scsi_device *sdev = cmd->device;
+	scsi_qla_host_t *vha = shost_priv(sdev->host);
+	struct fc_rport *rport = starget_to_rport(scsi_target(sdev));
+	fc_port_t *fcport = (struct fc_port *) sdev->hostdata;
+	struct qla_hw_data *ha = vha->hw;
 	int err;
 
+	if (qla2x00_isp_reg_stat(ha)) {
+		ql_log(ql_log_info, vha, 0x803e,
+		    "PCI/Register disconnect, exiting.\n");
+		qla_pci_set_eeh_busy(vha);
+		return FAILED;
+	}
+
 	if (!fcport) {
 		return FAILED;
 	}
 
-	err = fc_block_scsi_eh(cmd);
+	err = fc_block_rport(rport);
 	if (err != 0)
 		return err;
 
@@ -1386,8 +1417,8 @@ __qla2xxx_eh_generic_reset(char *name, enum nexus_wait_type type,
 		return SUCCESS;
 
 	ql_log(ql_log_info, vha, 0x8009,
-	    "%s RESET ISSUED nexus=%ld:%d:%llu cmd=%p.\n", name, vha->host_no,
-	    cmd->device->id, cmd->device->lun, cmd);
+	    "DEVICE RESET ISSUED nexus=%ld:%d:%llu cmd=%p.\n", vha->host_no,
+	    sdev->id, sdev->lun, cmd);
 
 	err = 0;
 	if (qla2x00_wait_for_hba_online(vha) != QLA_SUCCESS) {
@@ -1396,67 +1427,100 @@ __qla2xxx_eh_generic_reset(char *name, enum nexus_wait_type type,
 		goto eh_reset_failed;
 	}
 	err = 2;
-	if (do_reset(fcport, cmd->device->lun, 1)
+	if (ha->isp_ops->lun_reset(fcport, sdev->lun, 1)
 		!= QLA_SUCCESS) {
 		ql_log(ql_log_warn, vha, 0x800c,
 		    "do_reset failed for cmd=%p.\n", cmd);
 		goto eh_reset_failed;
 	}
 	err = 3;
-	if (qla2x00_eh_wait_for_pending_commands(vha, cmd->device->id,
-	    cmd->device->lun, type) != QLA_SUCCESS) {
+	if (qla2x00_eh_wait_for_pending_commands(vha, sdev->id,
+	    sdev->lun, WAIT_LUN) != QLA_SUCCESS) {
 		ql_log(ql_log_warn, vha, 0x800d,
 		    "wait for pending cmds failed for cmd=%p.\n", cmd);
 		goto eh_reset_failed;
 	}
 
 	ql_log(ql_log_info, vha, 0x800e,
-	    "%s RESET SUCCEEDED nexus:%ld:%d:%llu cmd=%p.\n", name,
-	    vha->host_no, cmd->device->id, cmd->device->lun, cmd);
+	    "DEVICE RESET SUCCEEDED nexus:%ld:%d:%llu cmd=%p.\n",
+	    vha->host_no, sdev->id, sdev->lun, cmd);
 
 	return SUCCESS;
 
 eh_reset_failed:
 	ql_log(ql_log_info, vha, 0x800f,
-	    "%s RESET FAILED: %s nexus=%ld:%d:%llu cmd=%p.\n", name,
-	    reset_errors[err], vha->host_no, cmd->device->id, cmd->device->lun,
+	    "DEVICE RESET FAILED: %s nexus=%ld:%d:%llu cmd=%p.\n",
+	    reset_errors[err], vha->host_no, sdev->id, sdev->lun,
 	    cmd);
 	vha->reset_cmd_err_cnt++;
 	return FAILED;
 }
 
 static int
-qla2xxx_eh_device_reset(struct scsi_cmnd *cmd)
+qla2xxx_eh_target_reset(struct scsi_cmnd *cmd)
 {
-	scsi_qla_host_t *vha = shost_priv(cmd->device->host);
+	struct scsi_device *sdev = cmd->device;
+	struct fc_rport *rport = starget_to_rport(scsi_target(sdev));
+	scsi_qla_host_t *vha = shost_priv(rport_to_shost(rport));
 	struct qla_hw_data *ha = vha->hw;
+	fc_port_t *fcport = *(fc_port_t **)rport->dd_data;
+	int err;
 
 	if (qla2x00_isp_reg_stat(ha)) {
-		ql_log(ql_log_info, vha, 0x803e,
+		ql_log(ql_log_info, vha, 0x803f,
 		    "PCI/Register disconnect, exiting.\n");
 		qla_pci_set_eeh_busy(vha);
 		return FAILED;
 	}
 
-	return __qla2xxx_eh_generic_reset("DEVICE", WAIT_LUN, cmd,
-	    ha->isp_ops->lun_reset);
-}
+	if (!fcport) {
+		return FAILED;
+	}
 
-static int
-qla2xxx_eh_target_reset(struct scsi_cmnd *cmd)
-{
-	scsi_qla_host_t *vha = shost_priv(cmd->device->host);
-	struct qla_hw_data *ha = vha->hw;
+	err = fc_block_rport(rport);
+	if (err != 0)
+		return err;
 
-	if (qla2x00_isp_reg_stat(ha)) {
-		ql_log(ql_log_info, vha, 0x803f,
-		    "PCI/Register disconnect, exiting.\n");
-		qla_pci_set_eeh_busy(vha);
-		return FAILED;
+	if (fcport->deleted)
+		return SUCCESS;
+
+	ql_log(ql_log_info, vha, 0x8009,
+	    "TARGET RESET ISSUED nexus=%ld:%d cmd=%p.\n", vha->host_no,
+	    sdev->id, cmd);
+
+	err = 0;
+	if (qla2x00_wait_for_hba_online(vha) != QLA_SUCCESS) {
+		ql_log(ql_log_warn, vha, 0x800a,
+		    "Wait for hba online failed for cmd=%p.\n", cmd);
+		goto eh_reset_failed;
 	}
+	err = 2;
+	if (ha->isp_ops->target_reset(fcport, 0, 0) != QLA_SUCCESS) {
+		ql_log(ql_log_warn, vha, 0x800c,
+		    "target_reset failed for cmd=%p.\n", cmd);
+		goto eh_reset_failed;
+	}
+	err = 3;
+	if (qla2x00_eh_wait_for_pending_commands(vha, sdev->id,
+	    0, WAIT_TARGET) != QLA_SUCCESS) {
+		ql_log(ql_log_warn, vha, 0x800d,
+		    "wait for pending cmds failed for cmd=%p.\n", cmd);
+		goto eh_reset_failed;
+	}
+
+	ql_log(ql_log_info, vha, 0x800e,
+	    "TARGET RESET SUCCEEDED nexus:%ld:%d cmd=%p.\n",
+	    vha->host_no, sdev->id, cmd);
+
+	return SUCCESS;
 
-	return __qla2xxx_eh_generic_reset("TARGET", WAIT_TARGET, cmd,
-	    ha->isp_ops->target_reset);
+eh_reset_failed:
+	ql_log(ql_log_info, vha, 0x800f,
+	    "TARGET RESET FAILED: %s nexus=%ld:%d:%llu cmd=%p.\n",
+	    reset_errors[err], vha->host_no, cmd->device->id, cmd->device->lun,
+	    cmd);
+	vha->reset_cmd_err_cnt++;
+	return FAILED;
 }
 
 /**************************************************************************
@@ -1478,7 +1542,6 @@ static int
 qla2xxx_eh_bus_reset(struct scsi_cmnd *cmd)
 {
 	scsi_qla_host_t *vha = shost_priv(cmd->device->host);
-	fc_port_t *fcport = (struct fc_port *) cmd->device->hostdata;
 	int ret = FAILED;
 	unsigned int id;
 	uint64_t lun;
@@ -1494,15 +1557,6 @@ qla2xxx_eh_bus_reset(struct scsi_cmnd *cmd)
 	id = cmd->device->id;
 	lun = cmd->device->lun;
 
-	if (!fcport) {
-		return ret;
-	}
-
-	ret = fc_block_scsi_eh(cmd);
-	if (ret != 0)
-		return ret;
-	ret = FAILED;
-
 	if (qla2x00_chip_is_down(vha))
 		return ret;
 
@@ -1742,7 +1796,7 @@ static void qla2x00_abort_srb(struct qla_qpair *qp, srb_t *sp, const int res,
 		}
 
 		spin_lock_irqsave(qp->qp_lock_ptr, *flags);
-		if (ret_cmd && blk_mq_request_started(cmd->request))
+		if (ret_cmd && blk_mq_request_started(scsi_cmd_to_rq(cmd)))
 			sp->done(sp, res);
 	} else {
 		sp->done(sp, res);
@@ -2818,6 +2872,11 @@ qla2x00_probe_one(struct pci_dev *pdev, const struct pci_device_id *id)
 			return ret;
 	}
 
+	if (is_kdump_kernel()) {
+		ql2xmqsupport = 0;
+		ql2xallocfwdump = 0;
+	}
+
 	/* This may fail but that's ok */
 	pci_enable_pcie_error_reporting(pdev);
 
@@ -2835,6 +2894,17 @@ qla2x00_probe_one(struct pci_dev *pdev, const struct pci_device_id *id)
 	spin_lock_init(&ha->tgt.sess_lock);
 	spin_lock_init(&ha->tgt.atio_lock);
 
+	spin_lock_init(&ha->sadb_lock);
+	INIT_LIST_HEAD(&ha->sadb_tx_index_list);
+	INIT_LIST_HEAD(&ha->sadb_rx_index_list);
+
+	spin_lock_init(&ha->sadb_fp_lock);
+
+	if (qla_edif_sadb_build_free_pool(ha)) {
+		kfree(ha);
+		goto  disable_device;
+	}
+
 	atomic_set(&ha->nvme_active_aen_cnt, 0);
 
 	/* Clear our data area */
@@ -3033,8 +3103,8 @@ qla2x00_probe_one(struct pci_dev *pdev, const struct pci_device_id *id)
 		ha->portnum = PCI_FUNC(ha->pdev->devfn);
 		ha->max_fibre_devices = MAX_FIBRE_DEVICES_2400;
 		ha->mbx_count = MAILBOX_REGISTER_COUNT;
-		req_length = REQUEST_ENTRY_CNT_24XX;
-		rsp_length = RESPONSE_ENTRY_CNT_2300;
+		req_length = REQUEST_ENTRY_CNT_83XX;
+		rsp_length = RESPONSE_ENTRY_CNT_83XX;
 		ha->tgt.atio_q_length = ATIO_ENTRY_CNT_24XX;
 		ha->max_loop_id = SNS_LAST_LOOP_ID_2300;
 		ha->init_cb_size = sizeof(struct mid_init_cb_81xx);
@@ -3460,6 +3530,8 @@ skip_dpc:
 	return 0;
 
 probe_failed:
+	qla_enode_stop(base_vha);
+	qla_edb_stop(base_vha);
 	if (base_vha->gnl.l) {
 		dma_free_coherent(&ha->pdev->dev, base_vha->gnl.size,
 				base_vha->gnl.l, base_vha->gnl.ldma);
@@ -3762,6 +3834,8 @@ qla2x00_remove_one(struct pci_dev *pdev)
 		base_vha->gnl.size, base_vha->gnl.l, base_vha->gnl.ldma);
 
 	base_vha->gnl.l = NULL;
+	qla_enode_stop(base_vha);
+	qla_edb_stop(base_vha);
 
 	vfree(base_vha->scan.l);
 
@@ -3795,7 +3869,6 @@ qla2x00_remove_one(struct pci_dev *pdev)
 	qla2x00_free_sysfs_attr(base_vha, true);
 
 	fc_remove_host(base_vha->host);
-	qlt_remove_target_resources(ha);
 
 	scsi_remove_host(base_vha->host);
 
@@ -3867,6 +3940,9 @@ qla2x00_free_device(scsi_qla_host_t *vha)
 
 	qla82xx_md_free(vha);
 
+	qla_edif_sadb_release_free_pool(ha);
+	qla_edif_sadb_release(ha);
+
 	qla2x00_free_queues(ha);
 }
 
@@ -3919,6 +3995,7 @@ void qla2x00_mark_device_lost(scsi_qla_host_t *vha, fc_port_t *fcport,
 		qla2x00_set_fcport_state(fcport, FCS_DEVICE_LOST);
 		qla2x00_schedule_rport_del(vha, fcport);
 	}
+
 	/*
 	 * We may need to retry the login, so don't change the state of the
 	 * port but do the retries.
@@ -3941,6 +4018,16 @@ qla2x00_mark_all_devices_lost(scsi_qla_host_t *vha)
 	    "Mark all dev lost\n");
 
 	list_for_each_entry(fcport, &vha->vp_fcports, list) {
+		if (fcport->loop_id != FC_NO_LOOP_ID &&
+		    (fcport->flags & FCF_FCP2_DEVICE) &&
+		    fcport->port_type == FCT_TARGET &&
+		    !qla2x00_reset_active(vha)) {
+			ql_dbg(ql_dbg_disc, vha, 0x211a,
+			       "Delaying session delete for FCP2 flags 0x%x port_type = 0x%x port_id=%06x %phC",
+			       fcport->flags, fcport->port_type,
+			       fcport->d_id.b24, fcport->port_name);
+			continue;
+		}
 		fcport->scan_state = 0;
 		qlt_schedule_sess_for_deletion(fcport);
 	}
@@ -3972,15 +4059,20 @@ qla2x00_mem_alloc(struct qla_hw_data *ha, uint16_t req_len, uint16_t rsp_len,
 	struct req_que **req, struct rsp_que **rsp)
 {
 	char	name[16];
+	int rc;
 
 	ha->init_cb = dma_alloc_coherent(&ha->pdev->dev, ha->init_cb_size,
 		&ha->init_cb_dma, GFP_KERNEL);
 	if (!ha->init_cb)
 		goto fail;
 
-	if (qlt_mem_alloc(ha) < 0)
+	rc = btree_init32(&ha->host_map);
+	if (rc)
 		goto fail_free_init_cb;
 
+	if (qlt_mem_alloc(ha) < 0)
+		goto fail_free_btree;
+
 	ha->gid_list = dma_alloc_coherent(&ha->pdev->dev,
 		qla2x00_gid_list_size(ha), &ha->gid_list_dma, GFP_KERNEL);
 	if (!ha->gid_list)
@@ -3990,7 +4082,7 @@ qla2x00_mem_alloc(struct qla_hw_data *ha, uint16_t req_len, uint16_t rsp_len,
 	if (!ha->srb_mempool)
 		goto fail_free_gid_list;
 
-	if (IS_P3P_TYPE(ha)) {
+	if (IS_P3P_TYPE(ha) || IS_QLA27XX(ha) || (ql2xsecenable && IS_QLA28XX(ha))) {
 		/* Allocate cache for CT6 Ctx. */
 		if (!ctx_cachep) {
 			ctx_cachep = kmem_cache_create("qla2xxx_ctx",
@@ -4024,7 +4116,7 @@ qla2x00_mem_alloc(struct qla_hw_data *ha, uint16_t req_len, uint16_t rsp_len,
 	    "init_cb=%p gid_list=%p, srb_mempool=%p s_dma_pool=%p.\n",
 	    ha->init_cb, ha->gid_list, ha->srb_mempool, ha->s_dma_pool);
 
-	if (IS_P3P_TYPE(ha) || ql2xenabledif) {
+	if (IS_P3P_TYPE(ha) || ql2xenabledif || (IS_QLA28XX(ha) && ql2xsecenable)) {
 		ha->dl_dma_pool = dma_pool_create(name, &ha->pdev->dev,
 			DSD_LIST_DMA_POOL_SIZE, 8, 0);
 		if (!ha->dl_dma_pool) {
@@ -4264,8 +4356,36 @@ qla2x00_mem_alloc(struct qla_hw_data *ha, uint16_t req_len, uint16_t rsp_len,
 		goto fail_flt_buffer;
 	}
 
+	/* allocate the purex dma pool */
+	ha->purex_dma_pool = dma_pool_create(name, &ha->pdev->dev,
+	    MAX_PAYLOAD, 8, 0);
+
+	if (!ha->purex_dma_pool) {
+		ql_dbg_pci(ql_dbg_init, ha->pdev, 0x011b,
+		    "Unable to allocate purex_dma_pool.\n");
+		goto fail_flt;
+	}
+
+	ha->elsrej.size = sizeof(struct fc_els_ls_rjt) + 16;
+	ha->elsrej.c = dma_alloc_coherent(&ha->pdev->dev,
+	    ha->elsrej.size, &ha->elsrej.cdma, GFP_KERNEL);
+
+	if (!ha->elsrej.c) {
+		ql_dbg_pci(ql_dbg_init, ha->pdev, 0xffff,
+		    "Alloc failed for els reject cmd.\n");
+		goto fail_elsrej;
+	}
+	ha->elsrej.c->er_cmd = ELS_LS_RJT;
+	ha->elsrej.c->er_reason = ELS_RJT_LOGIC;
+	ha->elsrej.c->er_explan = ELS_EXPL_UNAB_DATA;
 	return 0;
 
+fail_elsrej:
+	dma_pool_destroy(ha->purex_dma_pool);
+fail_flt:
+	dma_free_coherent(&ha->pdev->dev, SFP_DEV_SIZE,
+	    ha->flt, ha->flt_dma);
+
 fail_flt_buffer:
 	dma_free_coherent(&ha->pdev->dev, SFP_DEV_SIZE,
 	    ha->sfp_data, ha->sfp_data_dma);
@@ -4356,6 +4476,8 @@ fail_free_gid_list:
 	ha->gid_list_dma = 0;
 fail_free_tgt_mem:
 	qlt_mem_free(ha);
+fail_free_btree:
+	btree_destroy32(&ha->host_map);
 fail_free_init_cb:
 	dma_free_coherent(&ha->pdev->dev, ha->init_cb_size, ha->init_cb,
 	ha->init_cb_dma);
@@ -4772,10 +4894,21 @@ qla2x00_mem_free(struct qla_hw_data *ha)
 	ha->dif_bundl_pool = NULL;
 
 	qlt_mem_free(ha);
+	qla_remove_hostmap(ha);
 
 	if (ha->init_cb)
 		dma_free_coherent(&ha->pdev->dev, ha->init_cb_size,
 			ha->init_cb, ha->init_cb_dma);
+
+	dma_pool_destroy(ha->purex_dma_pool);
+	ha->purex_dma_pool = NULL;
+
+	if (ha->elsrej.c) {
+		dma_free_coherent(&ha->pdev->dev, ha->elsrej.size,
+		    ha->elsrej.c, ha->elsrej.cdma);
+		ha->elsrej.c = NULL;
+	}
+
 	ha->init_cb = NULL;
 	ha->init_cb_dma = 0;
 
@@ -4837,6 +4970,9 @@ struct scsi_qla_host *qla2x00_create_host(struct scsi_host_template *sht,
 	spin_lock_init(&vha->cmd_list_lock);
 	init_waitqueue_head(&vha->fcport_waitQ);
 	init_waitqueue_head(&vha->vref_waitq);
+	qla_enode_init(vha);
+	qla_edb_init(vha);
+
 
 	vha->gnl.size = sizeof(struct get_name_list_extended) *
 			(ha->max_loop_id + 1);
@@ -5080,6 +5216,11 @@ void qla24xx_create_new_sess(struct scsi_qla_host *vha, struct qla_work_evt *e)
 			    WWN_SIZE);
 
 			fcport->fc4_type = e->u.new_sess.fc4_type;
+			if (NVME_PRIORITY(vha->hw, fcport))
+				fcport->do_prli_nvme = 1;
+			else
+				fcport->do_prli_nvme = 0;
+
 			if (e->u.new_sess.fc4_type & FS_FCP_IS_N2N) {
 				fcport->dm_login_expire = jiffies +
 					QLA_N2N_WAIT_TIME * HZ;
@@ -5327,6 +5468,9 @@ qla2x00_do_work(struct scsi_qla_host *vha)
 			qla24xx_els_dcmd2_iocb(vha, ELS_DCMD_PLOGI,
 			    e->u.fcport.fcport, false);
 			break;
+		case QLA_EVT_SA_REPLACE:
+			qla24xx_issue_sa_replace_iocb(vha, e);
+			break;
 		}
 
 		if (rc == EAGAIN) {
@@ -5376,6 +5520,7 @@ void qla2x00_relogin(struct scsi_qla_host *vha)
 		if (atomic_read(&fcport->state) != FCS_ONLINE &&
 		    fcport->login_retry) {
 			if (fcport->scan_state != QLA_FCPORT_FOUND ||
+			    fcport->disc_state == DSC_LOGIN_AUTH_PEND ||
 			    fcport->disc_state == DSC_LOGIN_COMPLETE)
 				continue;
 
@@ -7234,6 +7379,10 @@ qla2x00_timer(struct timer_list *t)
 		}
 	}
 
+	/* check if edif running */
+	if (vha->hw->flags.edif_enabled)
+		qla_edif_timer(vha);
+
 	/* Process any deferred work. */
 	if (!list_empty(&vha->work_list)) {
 		unsigned long flags;
@@ -7430,7 +7579,7 @@ static void qla_pci_error_cleanup(scsi_qla_host_t *vha)
 	struct qla_hw_data *ha = vha->hw;
 	scsi_qla_host_t *base_vha = pci_get_drvdata(ha->pdev);
 	struct qla_qpair *qpair = NULL;
-	struct scsi_qla_host *vp;
+	struct scsi_qla_host *vp, *tvp;
 	fc_port_t *fcport;
 	int i;
 	unsigned long flags;
@@ -7461,7 +7610,7 @@ static void qla_pci_error_cleanup(scsi_qla_host_t *vha)
 	qla2x00_mark_all_devices_lost(vha);
 
 	spin_lock_irqsave(&ha->vport_slock, flags);
-	list_for_each_entry(vp, &ha->vp_list, list) {
+	list_for_each_entry_safe(vp, tvp, &ha->vp_list, list) {
 		atomic_inc(&vp->vref_count);
 		spin_unlock_irqrestore(&ha->vport_slock, flags);
 		qla2x00_mark_all_devices_lost(vp);
@@ -7475,7 +7624,7 @@ static void qla_pci_error_cleanup(scsi_qla_host_t *vha)
 		fcport->flags &= ~(FCF_LOGIN_NEEDED | FCF_ASYNC_SENT);
 
 	spin_lock_irqsave(&ha->vport_slock, flags);
-	list_for_each_entry(vp, &ha->vp_list, list) {
+	list_for_each_entry_safe(vp, tvp, &ha->vp_list, list) {
 		atomic_inc(&vp->vref_count);
 		spin_unlock_irqrestore(&ha->vport_slock, flags);
 		list_for_each_entry(fcport, &vp->vp_fcports, list)
@@ -7887,7 +8036,7 @@ qla2x00_module_init(void)
 	BUILD_BUG_ON(sizeof(struct cmd_type_7_fx00) != 64);
 	BUILD_BUG_ON(sizeof(struct cmd_type_crc_2) != 64);
 	BUILD_BUG_ON(sizeof(struct ct_entry_24xx) != 64);
-	BUILD_BUG_ON(sizeof(struct ct_fdmi1_hba_attributes) != 2344);
+	BUILD_BUG_ON(sizeof(struct ct_fdmi1_hba_attributes) != 2604);
 	BUILD_BUG_ON(sizeof(struct ct_fdmi2_hba_attributes) != 4424);
 	BUILD_BUG_ON(sizeof(struct ct_fdmi2_port_attributes) != 4164);
 	BUILD_BUG_ON(sizeof(struct ct_fdmi_hba_attr) != 260);
diff --git a/drivers/scsi/qla2xxx/qla_sup.c b/drivers/scsi/qla2xxx/qla_sup.c
index 060c89237777..a0aeba69513d 100644
--- a/drivers/scsi/qla2xxx/qla_sup.c
+++ b/drivers/scsi/qla2xxx/qla_sup.c
@@ -2936,7 +2936,6 @@ qla28xx_write_flash_data(scsi_qla_host_t *vha, uint32_t *dwptr, uint32_t faddr,
 		liter += dburst - 1;
 		faddr += dburst - 1;
 		dwptr += dburst - 1;
-		continue;
 	}
 
 write_protect:
diff --git a/drivers/scsi/qla2xxx/qla_target.c b/drivers/scsi/qla2xxx/qla_target.c
index eb47140a899f..b3478ed9b12e 100644
--- a/drivers/scsi/qla2xxx/qla_target.c
+++ b/drivers/scsi/qla2xxx/qla_target.c
@@ -184,8 +184,7 @@ static inline int qlt_issue_marker(struct scsi_qla_host *vha, int vha_locked)
 	return QLA_SUCCESS;
 }
 
-static inline
-struct scsi_qla_host *qlt_find_host_by_d_id(struct scsi_qla_host *vha,
+struct scsi_qla_host *qla_find_host_by_d_id(struct scsi_qla_host *vha,
 					    be_id_t d_id)
 {
 	struct scsi_qla_host *host;
@@ -198,7 +197,7 @@ struct scsi_qla_host *qlt_find_host_by_d_id(struct scsi_qla_host *vha,
 
 	key = be_to_port_id(d_id).b24;
 
-	host = btree_lookup32(&vha->hw->tgt.host_map, key);
+	host = btree_lookup32(&vha->hw->host_map, key);
 	if (!host)
 		ql_dbg(ql_dbg_tgt_mgt + ql_dbg_verbose, vha, 0xf005,
 		    "Unable to find host %06x\n", key);
@@ -299,7 +298,7 @@ static void qlt_try_to_dequeue_unknown_atios(struct scsi_qla_host *vha,
 			goto abort;
 		}
 
-		host = qlt_find_host_by_d_id(vha, u->atio.u.isp24.fcp_hdr.d_id);
+		host = qla_find_host_by_d_id(vha, u->atio.u.isp24.fcp_hdr.d_id);
 		if (host != NULL) {
 			ql_dbg(ql_dbg_async + ql_dbg_verbose, vha, 0x502f,
 			    "Requeuing unknown ATIO_TYPE7 %p\n", u);
@@ -348,7 +347,7 @@ static bool qlt_24xx_atio_pkt_all_vps(struct scsi_qla_host *vha,
 	switch (atio->u.raw.entry_type) {
 	case ATIO_TYPE7:
 	{
-		struct scsi_qla_host *host = qlt_find_host_by_d_id(vha,
+		struct scsi_qla_host *host = qla_find_host_by_d_id(vha,
 		    atio->u.isp24.fcp_hdr.d_id);
 		if (unlikely(NULL == host)) {
 			ql_dbg(ql_dbg_tgt, vha, 0xe03e,
@@ -577,6 +576,18 @@ static void qla2x00_async_nack_sp_done(srb_t *sp, int res)
 		sp->fcport->logout_on_delete = 1;
 		sp->fcport->plogi_nack_done_deadline = jiffies + HZ;
 		sp->fcport->send_els_logo = 0;
+
+		if (sp->fcport->flags & FCF_FCSP_DEVICE) {
+			ql_dbg(ql_dbg_edif, vha, 0x20ef,
+			    "%s %8phC edif: PLOGI- AUTH WAIT\n", __func__,
+			    sp->fcport->port_name);
+			qla2x00_set_fcport_disc_state(sp->fcport,
+			    DSC_LOGIN_AUTH_PEND);
+			qla2x00_post_aen_work(vha, FCH_EVT_PORT_ONLINE,
+			    sp->fcport->d_id.b24);
+			qla_edb_eventcreate(vha, VND_CMD_AUTH_STATE_NEEDED, sp->fcport->d_id.b24,
+			    0, sp->fcport);
+		}
 		break;
 
 	case SRB_NACK_PRLI:
@@ -624,6 +635,9 @@ int qla24xx_async_notify_ack(scsi_qla_host_t *vha, fc_port_t *fcport,
 	case SRB_NACK_PLOGI:
 		fcport->fw_login_state = DSC_LS_PLOGI_PEND;
 		c = "PLOGI";
+		if (vha->hw->flags.edif_enabled &&
+		    (le16_to_cpu(ntfy->u.isp24.flags) & NOTIFY24XX_FLAGS_FCSP))
+			fcport->flags |= FCF_FCSP_DEVICE;
 		break;
 	case SRB_NACK_PRLI:
 		fcport->fw_login_state = DSC_LS_PRLI_PEND;
@@ -693,7 +707,12 @@ void qla24xx_do_nack_work(struct scsi_qla_host *vha, struct qla_work_evt *e)
 void qla24xx_delete_sess_fn(struct work_struct *work)
 {
 	fc_port_t *fcport = container_of(work, struct fc_port, del_work);
-	struct qla_hw_data *ha = fcport->vha->hw;
+	struct qla_hw_data *ha = NULL;
+
+	if (!fcport || !fcport->vha || !fcport->vha->hw)
+		return;
+
+	ha = fcport->vha->hw;
 
 	if (fcport->se_sess) {
 		ha->tgt.tgt_ops->shutdown_sess(fcport);
@@ -917,6 +936,11 @@ qlt_send_first_logo(struct scsi_qla_host *vha, qlt_port_logo_t *logo)
 	qlt_port_logo_t *tmp;
 	int res;
 
+	if (test_bit(PFLG_DRIVER_REMOVING, &vha->pci_flags)) {
+		res = 0;
+		goto out;
+	}
+
 	mutex_lock(&vha->vha_tgt.tgt_mutex);
 
 	list_for_each_entry(tmp, &vha->logo_list, list) {
@@ -937,6 +961,7 @@ qlt_send_first_logo(struct scsi_qla_host *vha, qlt_port_logo_t *logo)
 	list_del(&logo->list);
 	mutex_unlock(&vha->vha_tgt.tgt_mutex);
 
+out:
 	ql_dbg(ql_dbg_tgt_mgt, vha, 0xf098,
 	    "Finished LOGO to %02x:%02x:%02x, dropped %d cmds, res = %#x\n",
 	    logo->id.b.domain, logo->id.b.area, logo->id.b.al_pa,
@@ -965,6 +990,21 @@ void qlt_free_session_done(struct work_struct *work)
 		sess->send_els_logo);
 
 	if (!IS_SW_RESV_ADDR(sess->d_id)) {
+		if (ha->flags.edif_enabled &&
+		    (!own || own->iocb.u.isp24.status_subcode == ELS_PLOGI)) {
+			sess->edif.authok = 0;
+			if (!ha->flags.host_shutting_down) {
+				ql_dbg(ql_dbg_edif, vha, 0x911e,
+					"%s wwpn %8phC calling qla2x00_release_all_sadb\n",
+					__func__, sess->port_name);
+				qla2x00_release_all_sadb(vha, sess);
+			} else {
+				ql_dbg(ql_dbg_edif, vha, 0x911e,
+					"%s bypassing release_all_sadb\n",
+					__func__);
+			}
+			qla_edif_sess_down(vha, sess);
+		}
 		qla2x00_mark_device_lost(vha, sess, 0);
 
 		if (sess->send_els_logo) {
@@ -972,6 +1012,7 @@ void qlt_free_session_done(struct work_struct *work)
 
 			logo.id = sess->d_id;
 			logo.cmd_count = 0;
+			INIT_LIST_HEAD(&logo.list);
 			if (!own)
 				qlt_send_first_logo(vha, &logo);
 			sess->send_els_logo = 0;
@@ -982,6 +1023,7 @@ void qlt_free_session_done(struct work_struct *work)
 
 			if (!own ||
 			     (own->iocb.u.isp24.status_subcode == ELS_PLOGI)) {
+				sess->logout_completed = 0;
 				rc = qla2x00_post_async_logout_work(vha, sess,
 				    NULL);
 				if (rc != QLA_SUCCESS)
@@ -1278,8 +1320,8 @@ void qlt_schedule_sess_for_deletion(struct fc_port *sess)
 	qla24xx_chk_fcp_state(sess);
 
 	ql_dbg(ql_log_warn, sess->vha, 0xe001,
-	    "Scheduling sess %p for deletion %8phC\n",
-	    sess, sess->port_name);
+	    "Scheduling sess %p for deletion %8phC fc4_type %x\n",
+	    sess, sess->port_name, sess->fc4_type);
 
 	WARN_ON(!queue_work(sess->vha->hw->wq, &sess->del_work));
 }
@@ -1720,6 +1762,12 @@ static void qlt_send_notify_ack(struct qla_qpair *qpair,
 	nack->u.isp24.srr_reject_code_expl = srr_explan;
 	nack->u.isp24.vp_index = ntfy->u.isp24.vp_index;
 
+	/* TODO qualify this with EDIF enable */
+	if (ntfy->u.isp24.status_subcode == ELS_PLOGI &&
+	    (le16_to_cpu(ntfy->u.isp24.flags) & NOTIFY24XX_FLAGS_FCSP)) {
+		nack->u.isp24.flags |= cpu_to_le16(NOTIFY_ACK_FLAGS_FCSP);
+	}
+
 	ql_dbg(ql_dbg_tgt, vha, 0xe005,
 	    "qla_target(%d): Sending 24xx Notify Ack %d\n",
 	    vha->vp_idx, nack->u.isp24.status);
@@ -2571,6 +2619,7 @@ static int qlt_24xx_build_ctio_pkt(struct qla_qpair *qpair,
 	struct ctio7_to_24xx *pkt;
 	struct atio_from_isp *atio = &prm->cmd->atio;
 	uint16_t temp;
+	struct qla_tgt_cmd      *cmd = prm->cmd;
 
 	pkt = (struct ctio7_to_24xx *)qpair->req->ring_ptr;
 	prm->pkt = pkt;
@@ -2603,6 +2652,15 @@ static int qlt_24xx_build_ctio_pkt(struct qla_qpair *qpair,
 	pkt->u.status0.ox_id = cpu_to_le16(temp);
 	pkt->u.status0.relative_offset = cpu_to_le32(prm->cmd->offset);
 
+	if (cmd->edif) {
+		if (cmd->dma_data_direction == DMA_TO_DEVICE)
+			prm->cmd->sess->edif.rx_bytes += cmd->bufflen;
+		if (cmd->dma_data_direction == DMA_FROM_DEVICE)
+			prm->cmd->sess->edif.tx_bytes += cmd->bufflen;
+
+		pkt->u.status0.edif_flags |= EF_EN_EDIF;
+	}
+
 	return 0;
 }
 
@@ -3293,8 +3351,10 @@ int qlt_xmit_response(struct qla_tgt_cmd *cmd, int xmit_type,
 			if (xmit_type & QLA_TGT_XMIT_STATUS) {
 				pkt->u.status0.scsi_status =
 				    cpu_to_le16(prm.rq_result);
-				pkt->u.status0.residual =
-				    cpu_to_le32(prm.residual);
+				if (!cmd->edif)
+					pkt->u.status0.residual =
+						cpu_to_le32(prm.residual);
+
 				pkt->u.status0.flags |= cpu_to_le16(
 				    CTIO7_FLAGS_SEND_STATUS);
 				if (qlt_need_explicit_conf(cmd, 0)) {
@@ -3941,6 +4001,12 @@ static void qlt_do_ctio_completion(struct scsi_qla_host *vha,
 	if (cmd == NULL)
 		return;
 
+	if ((le16_to_cpu(((struct ctio7_from_24xx *)ctio)->flags) & CTIO7_FLAGS_DATA_OUT) &&
+	    cmd->sess) {
+		qlt_chk_edif_rx_sa_delete_pending(vha, cmd->sess,
+		    (struct ctio7_from_24xx *)ctio);
+	}
+
 	se_cmd = &cmd->se_cmd;
 	cmd->cmd_sent_to_fw = 0;
 
@@ -4011,6 +4077,16 @@ static void qlt_do_ctio_completion(struct scsi_qla_host *vha,
 			qlt_handle_dif_error(qpair, cmd, ctio);
 			return;
 		}
+
+		case CTIO_FAST_AUTH_ERR:
+		case CTIO_FAST_INCOMP_PAD_LEN:
+		case CTIO_FAST_INVALID_REQ:
+		case CTIO_FAST_SPI_ERR:
+			ql_dbg(ql_dbg_tgt_mgt, vha, 0xf05b,
+			    "qla_target(%d): CTIO with EDIF error status 0x%x received (state %x, se_cmd %p\n",
+			    vha->vp_idx, status, cmd->state, se_cmd);
+			break;
+
 		default:
 			ql_dbg(ql_dbg_tgt_mgt, vha, 0xf05b,
 			    "qla_target(%d): CTIO with error status 0x%x received (state %x, se_cmd %p\n",
@@ -4312,6 +4388,7 @@ static struct qla_tgt_cmd *qlt_get_tag(scsi_qla_host_t *vha,
 	qlt_assign_qpair(vha, cmd);
 	cmd->reset_count = vha->hw->base_qpair->chip_reset;
 	cmd->vp_idx = vha->vp_idx;
+	cmd->edif = sess->edif.enable;
 
 	return cmd;
 }
@@ -4727,6 +4804,34 @@ static int qlt_handle_login(struct scsi_qla_host *vha,
 		goto out;
 	}
 
+	if (vha->hw->flags.edif_enabled &&
+	    !(vha->e_dbell.db_flags & EDB_ACTIVE) &&
+	    iocb->u.isp24.status_subcode == ELS_PLOGI &&
+	    !(le16_to_cpu(iocb->u.isp24.flags) & NOTIFY24XX_FLAGS_FCSP)) {
+		ql_dbg(ql_dbg_disc, vha, 0xffff,
+			"%s %d Term INOT due to app not available lid=%d, NportID %06X ",
+			__func__, __LINE__, loop_id, port_id.b24);
+		qlt_send_term_imm_notif(vha, iocb, 1);
+		goto out;
+	}
+
+	if (vha->hw->flags.edif_enabled) {
+		if (!(vha->e_dbell.db_flags & EDB_ACTIVE)) {
+			ql_dbg(ql_dbg_disc, vha, 0xffff,
+			       "%s %d Term INOT due to app not started lid=%d, NportID %06X ",
+			       __func__, __LINE__, loop_id, port_id.b24);
+			qlt_send_term_imm_notif(vha, iocb, 1);
+			goto out;
+		} else if (iocb->u.isp24.status_subcode == ELS_PLOGI &&
+			   !(le16_to_cpu(iocb->u.isp24.flags) & NOTIFY24XX_FLAGS_FCSP)) {
+			ql_dbg(ql_dbg_disc, vha, 0xffff,
+			       "%s %d Term INOT due to unsecure lid=%d, NportID %06X ",
+			       __func__, __LINE__, loop_id, port_id.b24);
+			qlt_send_term_imm_notif(vha, iocb, 1);
+			goto out;
+		}
+	}
+
 	pla = qlt_plogi_ack_find_add(vha, &port_id, iocb);
 	if (!pla) {
 		ql_dbg(ql_dbg_disc + ql_dbg_verbose, vha, 0xffff,
@@ -4792,6 +4897,20 @@ static int qlt_handle_login(struct scsi_qla_host *vha,
 	qlt_plogi_ack_link(vha, pla, sess, QLT_PLOGI_LINK_SAME_WWN);
 	sess->d_id = port_id;
 	sess->login_gen++;
+	sess->loop_id = loop_id;
+
+	if (iocb->u.isp24.status_subcode == ELS_PLOGI) {
+		/* remote port has assigned Port ID */
+		if (N2N_TOPO(vha->hw) && fcport_is_bigger(sess))
+			vha->d_id = sess->d_id;
+
+		ql_dbg(ql_dbg_disc, vha, 0xffff,
+		    "%s %8phC - send port online\n",
+		    __func__, sess->port_name);
+
+		qla2x00_post_aen_work(vha, FCH_EVT_PORT_ONLINE,
+		    sess->d_id.b24);
+	}
 
 	if (iocb->u.isp24.status_subcode == ELS_PRLI) {
 		sess->fw_login_state = DSC_LS_PRLI_PEND;
@@ -4904,6 +5023,16 @@ static int qlt_24xx_handle_els(struct scsi_qla_host *vha,
 			sess = qla2x00_find_fcport_by_wwpn(vha,
 			    iocb->u.isp24.port_name, 1);
 
+			if (vha->hw->flags.edif_enabled && sess &&
+			    (!(sess->flags & FCF_FCSP_DEVICE) ||
+			     !sess->edif.authok)) {
+				ql_dbg(ql_dbg_disc, vha, 0xffff,
+				       "%s %d %8phC Term PRLI due to unauthorize PRLI\n",
+				       __func__, __LINE__, iocb->u.isp24.port_name);
+				qlt_send_term_imm_notif(vha, iocb, 1);
+				break;
+			}
+
 			if (sess && sess->plogi_link[QLT_PLOGI_LINK_SAME_WWN]) {
 				ql_dbg(ql_dbg_disc, vha, 0xffff,
 				    "%s %d %8phC Term PRLI due to PLOGI ACK not completed\n",
@@ -4952,6 +5081,16 @@ static int qlt_24xx_handle_els(struct scsi_qla_host *vha,
 			bool delete = false;
 			int sec;
 
+			if (vha->hw->flags.edif_enabled && sess &&
+			    (!(sess->flags & FCF_FCSP_DEVICE) ||
+			     !sess->edif.authok)) {
+				ql_dbg(ql_dbg_disc, vha, 0xffff,
+				       "%s %d %8phC Term PRLI due to unauthorize prli\n",
+				       __func__, __LINE__, iocb->u.isp24.port_name);
+				qlt_send_term_imm_notif(vha, iocb, 1);
+				break;
+			}
+
 			spin_lock_irqsave(&tgt->ha->tgt.sess_lock, flags);
 			switch (sess->fw_login_state) {
 			case DSC_LS_PLOGI_PEND:
@@ -5141,7 +5280,8 @@ static int qlt_24xx_handle_els(struct scsi_qla_host *vha,
 }
 
 /*
- * ha->hardware_lock supposed to be held on entry. Might drop it, then reaquire
+ * ha->hardware_lock supposed to be held on entry.
+ * Might drop it, then reacquire.
  */
 static void qlt_handle_imm_notify(struct scsi_qla_host *vha,
 	struct imm_ntfy_from_isp *iocb)
@@ -6444,15 +6584,15 @@ int qlt_remove_target(struct qla_hw_data *ha, struct scsi_qla_host *vha)
 	return 0;
 }
 
-void qlt_remove_target_resources(struct qla_hw_data *ha)
+void qla_remove_hostmap(struct qla_hw_data *ha)
 {
 	struct scsi_qla_host *node;
 	u32 key = 0;
 
-	btree_for_each_safe32(&ha->tgt.host_map, key, node)
-		btree_remove32(&ha->tgt.host_map, key);
+	btree_for_each_safe32(&ha->host_map, key, node)
+		btree_remove32(&ha->host_map, key);
 
-	btree_destroy32(&ha->tgt.host_map);
+	btree_destroy32(&ha->host_map);
 }
 
 static void qlt_lport_dump(struct scsi_qla_host *vha, u64 wwpn,
@@ -7080,8 +7220,7 @@ qlt_modify_vp_config(struct scsi_qla_host *vha,
 void
 qlt_probe_one_stage1(struct scsi_qla_host *base_vha, struct qla_hw_data *ha)
 {
-	int rc;
-
+	mutex_init(&base_vha->vha_tgt.tgt_mutex);
 	if (!QLA_TGT_MODE_ENABLED())
 		return;
 
@@ -7094,7 +7233,6 @@ qlt_probe_one_stage1(struct scsi_qla_host *base_vha, struct qla_hw_data *ha)
 		ISP_ATIO_Q_OUT(base_vha) = &ha->iobase->isp24.atio_q_out;
 	}
 
-	mutex_init(&base_vha->vha_tgt.tgt_mutex);
 	mutex_init(&base_vha->vha_tgt.tgt_host_action_mutex);
 
 	INIT_LIST_HEAD(&base_vha->unknown_atio_list);
@@ -7103,11 +7241,6 @@ qlt_probe_one_stage1(struct scsi_qla_host *base_vha, struct qla_hw_data *ha)
 
 	qlt_clear_mode(base_vha);
 
-	rc = btree_init32(&ha->tgt.host_map);
-	if (rc)
-		ql_log(ql_log_info, base_vha, 0xd03d,
-		    "Unable to initialize ha->host_map btree\n");
-
 	qlt_update_vp_map(base_vha, SET_VP_IDX);
 }
 
@@ -7228,21 +7361,20 @@ qlt_update_vp_map(struct scsi_qla_host *vha, int cmd)
 	u32 key;
 	int rc;
 
-	if (!QLA_TGT_MODE_ENABLED())
-		return;
-
 	key = vha->d_id.b24;
 
 	switch (cmd) {
 	case SET_VP_IDX:
+		if (!QLA_TGT_MODE_ENABLED())
+			return;
 		vha->hw->tgt.tgt_vp_map[vha->vp_idx].vha = vha;
 		break;
 	case SET_AL_PA:
-		slot = btree_lookup32(&vha->hw->tgt.host_map, key);
+		slot = btree_lookup32(&vha->hw->host_map, key);
 		if (!slot) {
 			ql_dbg(ql_dbg_tgt_mgt, vha, 0xf018,
 			    "Save vha in host_map %p %06x\n", vha, key);
-			rc = btree_insert32(&vha->hw->tgt.host_map,
+			rc = btree_insert32(&vha->hw->host_map,
 				key, vha, GFP_ATOMIC);
 			if (rc)
 				ql_log(ql_log_info, vha, 0xd03e,
@@ -7252,17 +7384,19 @@ qlt_update_vp_map(struct scsi_qla_host *vha, int cmd)
 		}
 		ql_dbg(ql_dbg_tgt_mgt, vha, 0xf019,
 		    "replace existing vha in host_map %p %06x\n", vha, key);
-		btree_update32(&vha->hw->tgt.host_map, key, vha);
+		btree_update32(&vha->hw->host_map, key, vha);
 		break;
 	case RESET_VP_IDX:
+		if (!QLA_TGT_MODE_ENABLED())
+			return;
 		vha->hw->tgt.tgt_vp_map[vha->vp_idx].vha = NULL;
 		break;
 	case RESET_AL_PA:
 		ql_dbg(ql_dbg_tgt_mgt, vha, 0xf01a,
 		   "clear vha in host_map %p %06x\n", vha, key);
-		slot = btree_lookup32(&vha->hw->tgt.host_map, key);
+		slot = btree_lookup32(&vha->hw->host_map, key);
 		if (slot)
-			btree_remove32(&vha->hw->tgt.host_map, key);
+			btree_remove32(&vha->hw->host_map, key);
 		vha->d_id.b24 = 0;
 		break;
 	}
diff --git a/drivers/scsi/qla2xxx/qla_target.h b/drivers/scsi/qla2xxx/qla_target.h
index 01620f3eab39..156b950ca7e7 100644
--- a/drivers/scsi/qla2xxx/qla_target.h
+++ b/drivers/scsi/qla2xxx/qla_target.h
@@ -176,6 +176,7 @@ struct nack_to_isp {
 	uint8_t  reserved[2];
 	__le16	ox_id;
 } __packed;
+#define NOTIFY_ACK_FLAGS_FCSP		BIT_5
 #define NOTIFY_ACK_FLAGS_TERMINATE	BIT_3
 #define NOTIFY_ACK_SRR_FLAGS_ACCEPT	0
 #define NOTIFY_ACK_SRR_FLAGS_REJECT	1
@@ -238,6 +239,10 @@ struct ctio_to_2xxx {
 #define CTIO_PORT_LOGGED_OUT		0x29
 #define CTIO_PORT_CONF_CHANGED		0x2A
 #define CTIO_SRR_RECEIVED		0x45
+#define CTIO_FAST_AUTH_ERR		0x63
+#define CTIO_FAST_INCOMP_PAD_LEN	0x65
+#define CTIO_FAST_INVALID_REQ		0x66
+#define CTIO_FAST_SPI_ERR		0x67
 #endif
 
 #ifndef CTIO_RET_TYPE
@@ -408,7 +413,16 @@ struct ctio7_to_24xx {
 		struct {
 			__le16	reserved1;
 			__le16 flags;
-			__le32	residual;
+			union {
+				__le32	residual;
+				struct {
+					uint8_t rsvd1;
+					uint8_t edif_flags;
+#define EF_EN_EDIF	BIT_0
+#define EF_NEW_SA	BIT_1
+					uint16_t rsvd2;
+				};
+			};
 			__le16 ox_id;
 			__le16	scsi_status;
 			__le32	relative_offset;
@@ -446,7 +460,7 @@ struct ctio7_from_24xx {
 	uint8_t  vp_index;
 	uint8_t  reserved1[5];
 	__le32	exchange_address;
-	__le16	reserved2;
+	__le16	edif_sa_index;
 	__le16	flags;
 	__le32	residual;
 	__le16	ox_id;
@@ -875,6 +889,7 @@ struct qla_tgt_cmd {
 	unsigned int term_exchg:1;
 	unsigned int cmd_sent_to_fw:1;
 	unsigned int cmd_in_wq:1;
+	unsigned int edif:1;
 
 	/*
 	 * This variable may be set from outside the LIO and I/O completion
diff --git a/drivers/scsi/qla2xxx/qla_version.h b/drivers/scsi/qla2xxx/qla_version.h
index da11829fa12d..055040cbef9b 100644
--- a/drivers/scsi/qla2xxx/qla_version.h
+++ b/drivers/scsi/qla2xxx/qla_version.h
@@ -6,9 +6,9 @@
 /*
  * Driver version
  */
-#define QLA2XXX_VERSION      "10.02.00.106-k"
+#define QLA2XXX_VERSION      "10.02.06.200-k"
 
 #define QLA_DRIVER_MAJOR_VER	10
 #define QLA_DRIVER_MINOR_VER	2
-#define QLA_DRIVER_PATCH_VER	0
-#define QLA_DRIVER_BETA_VER	106
+#define QLA_DRIVER_PATCH_VER	6
+#define QLA_DRIVER_BETA_VER	200
diff --git a/drivers/scsi/qla4xxx/ql4_init.c b/drivers/scsi/qla4xxx/ql4_init.c
index f786ac2f5548..301bc09c8365 100644
--- a/drivers/scsi/qla4xxx/ql4_init.c
+++ b/drivers/scsi/qla4xxx/ql4_init.c
@@ -119,8 +119,8 @@ int qla4xxx_init_rings(struct scsi_qla_host *ha)
 		 * the interrupt_handler to think there are responses to be
 		 * processed when there aren't.
 		 */
-		ha->shadow_regs->req_q_out = __constant_cpu_to_le32(0);
-		ha->shadow_regs->rsp_q_in = __constant_cpu_to_le32(0);
+		ha->shadow_regs->req_q_out = cpu_to_le32(0);
+		ha->shadow_regs->rsp_q_in = cpu_to_le32(0);
 		wmb();
 
 		writel(0, &ha->reg->req_q_in);
diff --git a/drivers/scsi/qla4xxx/ql4_iocb.c b/drivers/scsi/qla4xxx/ql4_iocb.c
index cbd1e6ffcd67..28eab07935ba 100644
--- a/drivers/scsi/qla4xxx/ql4_iocb.c
+++ b/drivers/scsi/qla4xxx/ql4_iocb.c
@@ -160,7 +160,7 @@ static void qla4xxx_build_scsi_iocbs(struct srb *srb,
 
 	if (!scsi_bufflen(cmd) || cmd->sc_data_direction == DMA_NONE) {
 		/* No data being transferred */
-		cmd_entry->ttlByteCnt = __constant_cpu_to_le32(0);
+		cmd_entry->ttlByteCnt = cpu_to_le32(0);
 		return;
 	}
 
@@ -288,7 +288,7 @@ int qla4xxx_send_command_to_isp(struct scsi_qla_host *ha, struct srb * srb)
 	/* Acquire hardware specific lock */
 	spin_lock_irqsave(&ha->hardware_lock, flags);
 
-	index = (uint32_t)cmd->request->tag;
+	index = scsi_cmd_to_rq(cmd)->tag;
 
 	/*
 	 * Check to see if adapter is online before placing request on
diff --git a/drivers/scsi/qla4xxx/ql4_mbx.c b/drivers/scsi/qla4xxx/ql4_mbx.c
index 187d78aa4f67..cd71074f3abe 100644
--- a/drivers/scsi/qla4xxx/ql4_mbx.c
+++ b/drivers/scsi/qla4xxx/ql4_mbx.c
@@ -645,8 +645,8 @@ int qla4xxx_initialize_fw_cb(struct scsi_qla_host * ha)
 	/* Fill in the request and response queue information. */
 	init_fw_cb->rqq_consumer_idx = cpu_to_le16(ha->request_out);
 	init_fw_cb->compq_producer_idx = cpu_to_le16(ha->response_in);
-	init_fw_cb->rqq_len = __constant_cpu_to_le16(REQUEST_QUEUE_DEPTH);
-	init_fw_cb->compq_len = __constant_cpu_to_le16(RESPONSE_QUEUE_DEPTH);
+	init_fw_cb->rqq_len = cpu_to_le16(REQUEST_QUEUE_DEPTH);
+	init_fw_cb->compq_len = cpu_to_le16(RESPONSE_QUEUE_DEPTH);
 	init_fw_cb->rqq_addr_lo = cpu_to_le32(LSDW(ha->request_dma));
 	init_fw_cb->rqq_addr_hi = cpu_to_le32(MSDW(ha->request_dma));
 	init_fw_cb->compq_addr_lo = cpu_to_le32(LSDW(ha->response_dma));
@@ -656,20 +656,20 @@ int qla4xxx_initialize_fw_cb(struct scsi_qla_host * ha)
 
 	/* Set up required options. */
 	init_fw_cb->fw_options |=
-		__constant_cpu_to_le16(FWOPT_SESSION_MODE |
-				       FWOPT_INITIATOR_MODE);
+		cpu_to_le16(FWOPT_SESSION_MODE |
+			    FWOPT_INITIATOR_MODE);
 
 	if (is_qla80XX(ha))
 		init_fw_cb->fw_options |=
-		    __constant_cpu_to_le16(FWOPT_ENABLE_CRBDB);
+		    cpu_to_le16(FWOPT_ENABLE_CRBDB);
 
-	init_fw_cb->fw_options &= __constant_cpu_to_le16(~FWOPT_TARGET_MODE);
+	init_fw_cb->fw_options &= cpu_to_le16(~FWOPT_TARGET_MODE);
 
 	init_fw_cb->add_fw_options = 0;
 	init_fw_cb->add_fw_options |=
-			__constant_cpu_to_le16(ADFWOPT_SERIALIZE_TASK_MGMT);
+			cpu_to_le16(ADFWOPT_SERIALIZE_TASK_MGMT);
 	init_fw_cb->add_fw_options |=
-			__constant_cpu_to_le16(ADFWOPT_AUTOCONN_DISABLE);
+			cpu_to_le16(ADFWOPT_AUTOCONN_DISABLE);
 
 	if (qla4xxx_set_ifcb(ha, &mbox_cmd[0], &mbox_sts[0], init_fw_cb_dma)
 		!= QLA_SUCCESS) {
@@ -1613,7 +1613,7 @@ int qla4xxx_get_chap(struct scsi_qla_host *ha, char *username, char *password,
 
 	strlcpy(password, chap_table->secret, QL4_CHAP_MAX_SECRET_LEN);
 	strlcpy(username, chap_table->name, QL4_CHAP_MAX_NAME_LEN);
-	chap_table->cookie = __constant_cpu_to_le16(CHAP_VALID_COOKIE);
+	chap_table->cookie = cpu_to_le16(CHAP_VALID_COOKIE);
 
 exit_get_chap:
 	dma_pool_free(ha->chap_dma_pool, chap_table, chap_dma);
@@ -1655,7 +1655,7 @@ int qla4xxx_set_chap(struct scsi_qla_host *ha, char *username, char *password,
 	chap_table->secret_len = strlen(password);
 	strncpy(chap_table->secret, password, MAX_CHAP_SECRET_LEN - 1);
 	strncpy(chap_table->name, username, MAX_CHAP_NAME_LEN - 1);
-	chap_table->cookie = __constant_cpu_to_le16(CHAP_VALID_COOKIE);
+	chap_table->cookie = cpu_to_le16(CHAP_VALID_COOKIE);
 
 	if (is_qla40XX(ha)) {
 		chap_size = MAX_CHAP_ENTRIES_40XX * sizeof(*chap_table);
@@ -1721,7 +1721,7 @@ int qla4xxx_get_uni_chap_at_index(struct scsi_qla_host *ha, char *username,
 
 	mutex_lock(&ha->chap_sem);
 	chap_table = (struct ql4_chap_table *)ha->chap_list + chap_index;
-	if (chap_table->cookie != __constant_cpu_to_le16(CHAP_VALID_COOKIE)) {
+	if (chap_table->cookie != cpu_to_le16(CHAP_VALID_COOKIE)) {
 		rval = QLA_ERROR;
 		goto exit_unlock_uni_chap;
 	}
@@ -1784,7 +1784,7 @@ int qla4xxx_get_chap_index(struct scsi_qla_host *ha, char *username,
 	for (i = 0; i < max_chap_entries; i++) {
 		chap_table = (struct ql4_chap_table *)ha->chap_list + i;
 		if (chap_table->cookie !=
-		    __constant_cpu_to_le16(CHAP_VALID_COOKIE)) {
+		    cpu_to_le16(CHAP_VALID_COOKIE)) {
 			if (i > MAX_RESRV_CHAP_IDX && free_index == -1)
 				free_index = i;
 			continue;
@@ -2105,18 +2105,18 @@ int qla4xxx_set_param_ddbentry(struct scsi_qla_host *ha,
 
 	if (conn->max_recv_dlength)
 		fw_ddb_entry->iscsi_max_rcv_data_seg_len =
-		  __constant_cpu_to_le16((conn->max_recv_dlength / BYTE_UNITS));
+		  cpu_to_le16((conn->max_recv_dlength / BYTE_UNITS));
 
 	if (sess->max_r2t)
 		fw_ddb_entry->iscsi_max_outsnd_r2t = cpu_to_le16(sess->max_r2t);
 
 	if (sess->first_burst)
 		fw_ddb_entry->iscsi_first_burst_len =
-		       __constant_cpu_to_le16((sess->first_burst / BYTE_UNITS));
+		       cpu_to_le16((sess->first_burst / BYTE_UNITS));
 
 	if (sess->max_burst)
 		fw_ddb_entry->iscsi_max_burst_len =
-			__constant_cpu_to_le16((sess->max_burst / BYTE_UNITS));
+			cpu_to_le16((sess->max_burst / BYTE_UNITS));
 
 	if (sess->time2wait)
 		fw_ddb_entry->iscsi_def_time2wait =
diff --git a/drivers/scsi/qla4xxx/ql4_nx.c b/drivers/scsi/qla4xxx/ql4_nx.c
index 66a487795c53..47adff9f0506 100644
--- a/drivers/scsi/qla4xxx/ql4_nx.c
+++ b/drivers/scsi/qla4xxx/ql4_nx.c
@@ -3658,7 +3658,7 @@ qla4_82xx_read_flash_data(struct scsi_qla_host *ha, uint32_t *dwptr,
 			    "Do ROM fast read failed\n");
 			goto done_read;
 		}
-		dwptr[i] = __constant_cpu_to_le32(val);
+		dwptr[i] = cpu_to_le32(val);
 	}
 
 done_read:
@@ -3721,9 +3721,9 @@ qla4_8xxx_get_flt_info(struct scsi_qla_host *ha, uint32_t flt_addr)
 			goto no_flash_data;
 	}
 
-	if (*wptr == __constant_cpu_to_le16(0xffff))
+	if (*wptr == cpu_to_le16(0xffff))
 		goto no_flash_data;
-	if (flt->version != __constant_cpu_to_le16(1)) {
+	if (flt->version != cpu_to_le16(1)) {
 		DEBUG2(ql4_printk(KERN_INFO, ha, "Unsupported FLT detected: "
 			"version=0x%x length=0x%x checksum=0x%x.\n",
 			le16_to_cpu(flt->version), le16_to_cpu(flt->length),
@@ -3826,7 +3826,7 @@ qla4_82xx_get_fdt_info(struct scsi_qla_host *ha)
 	qla4_82xx_read_optrom_data(ha, (uint8_t *)ha->request_ring,
 	    hw->flt_region_fdt << 2, OPTROM_BURST_SIZE);
 
-	if (*wptr == __constant_cpu_to_le16(0xffff))
+	if (*wptr == cpu_to_le16(0xffff))
 		goto no_flash_data;
 
 	if (fdt->sig[0] != 'Q' || fdt->sig[1] != 'L' || fdt->sig[2] != 'I' ||
@@ -3883,7 +3883,7 @@ qla4_82xx_get_idc_param(struct scsi_qla_host *ha)
 	qla4_82xx_read_optrom_data(ha, (uint8_t *)ha->request_ring,
 			QLA82XX_IDC_PARAM_ADDR , 8);
 
-	if (*wptr == __constant_cpu_to_le32(0xffffffff)) {
+	if (*wptr == cpu_to_le32(0xffffffff)) {
 		ha->nx_dev_init_timeout = ROM_DEV_INIT_TIMEOUT;
 		ha->nx_reset_timeout = ROM_DRV_RESET_ACK_TIMEOUT;
 	} else {
diff --git a/drivers/scsi/qla4xxx/ql4_os.c b/drivers/scsi/qla4xxx/ql4_os.c
index 6ee7ea4c27e0..f1ea65c6e5f5 100644
--- a/drivers/scsi/qla4xxx/ql4_os.c
+++ b/drivers/scsi/qla4xxx/ql4_os.c
@@ -702,7 +702,7 @@ static int qla4xxx_get_chap_by_index(struct scsi_qla_host *ha,
 
 	*chap_entry = (struct ql4_chap_table *)ha->chap_list + chap_index;
 	if ((*chap_entry)->cookie !=
-	     __constant_cpu_to_le16(CHAP_VALID_COOKIE)) {
+	     cpu_to_le16(CHAP_VALID_COOKIE)) {
 		*chap_entry = NULL;
 	} else {
 		rval = QLA_SUCCESS;
@@ -745,7 +745,7 @@ static int qla4xxx_find_free_chap_index(struct scsi_qla_host *ha,
 		chap_table = (struct ql4_chap_table *)ha->chap_list + i;
 
 		if ((chap_table->cookie !=
-		    __constant_cpu_to_le16(CHAP_VALID_COOKIE)) &&
+		    cpu_to_le16(CHAP_VALID_COOKIE)) &&
 		   (i > MAX_RESRV_CHAP_IDX)) {
 				free_index = i;
 				break;
@@ -794,7 +794,7 @@ static int qla4xxx_get_chap_list(struct Scsi_Host *shost, uint16_t chap_tbl_idx,
 	for (i = chap_tbl_idx; i < max_chap_entries; i++) {
 		chap_table = (struct ql4_chap_table *)ha->chap_list + i;
 		if (chap_table->cookie !=
-		    __constant_cpu_to_le16(CHAP_VALID_COOKIE))
+		    cpu_to_le16(CHAP_VALID_COOKIE))
 			continue;
 
 		chap_rec->chap_tbl_idx = i;
@@ -923,7 +923,7 @@ static int qla4xxx_delete_chap(struct Scsi_Host *shost, uint16_t chap_tbl_idx)
 		goto exit_delete_chap;
 	}
 
-	chap_table->cookie = __constant_cpu_to_le16(0xFFFF);
+	chap_table->cookie = cpu_to_le16(0xFFFF);
 
 	offset = FLASH_CHAP_OFFSET |
 			(chap_tbl_idx * sizeof(struct ql4_chap_table));
@@ -6043,7 +6043,7 @@ static int qla4xxx_get_bidi_chap(struct scsi_qla_host *ha, char *username,
 	for (i = 0; i < max_chap_entries; i++) {
 		chap_table = (struct ql4_chap_table *)ha->chap_list + i;
 		if (chap_table->cookie !=
-		    __constant_cpu_to_le16(CHAP_VALID_COOKIE)) {
+		    cpu_to_le16(CHAP_VALID_COOKIE)) {
 			continue;
 		}
 
@@ -9282,7 +9282,7 @@ static int qla4xxx_eh_device_reset(struct scsi_cmnd *cmd)
 	DEBUG2(printk(KERN_INFO
 		      "scsi%ld: DEVICE_RESET cmd=%p jiffies = 0x%lx, to=%x,"
 		      "dpc_flags=%lx, status=%x allowed=%d\n", ha->host_no,
-		      cmd, jiffies, cmd->request->timeout / HZ,
+		      cmd, jiffies, scsi_cmd_to_rq(cmd)->timeout / HZ,
 		      ha->dpc_flags, cmd->result, cmd->allowed));
 
 	rval = qla4xxx_isp_check_reg(ha);
@@ -9349,7 +9349,7 @@ static int qla4xxx_eh_target_reset(struct scsi_cmnd *cmd)
 	DEBUG2(printk(KERN_INFO
 		      "scsi%ld: TARGET_DEVICE_RESET cmd=%p jiffies = 0x%lx, "
 		      "to=%x,dpc_flags=%lx, status=%x allowed=%d\n",
-		      ha->host_no, cmd, jiffies, cmd->request->timeout / HZ,
+		      ha->host_no, cmd, jiffies, scsi_cmd_to_rq(cmd)->timeout / HZ,
 		      ha->dpc_flags, cmd->result, cmd->allowed));
 
 	rval = qla4xxx_isp_check_reg(ha);
diff --git a/drivers/scsi/qlogicpti.c b/drivers/scsi/qlogicpti.c
index d84e218d32cb..8e7e833a36cc 100644
--- a/drivers/scsi/qlogicpti.c
+++ b/drivers/scsi/qlogicpti.c
@@ -890,7 +890,7 @@ static inline void cmd_frob(struct Command_Entry *cmd, struct scsi_cmnd *Cmnd,
 		cmd->control_flags |= CFLAG_WRITE;
 	else
 		cmd->control_flags |= CFLAG_READ;
-	cmd->time_out = Cmnd->request->timeout/HZ;
+	cmd->time_out = scsi_cmd_to_rq(Cmnd)->timeout / HZ;
 	memcpy(cmd->cdb, Cmnd->cmnd, Cmnd->cmd_len);
 }
 
diff --git a/drivers/scsi/scsi.c b/drivers/scsi/scsi.c
index d26025cf5de3..b241f9e3885c 100644
--- a/drivers/scsi/scsi.c
+++ b/drivers/scsi/scsi.c
@@ -190,7 +190,7 @@ void scsi_finish_command(struct scsi_cmnd *cmd)
 				"(result %x)\n", cmd->result));
 
 	good_bytes = scsi_bufflen(cmd);
-	if (!blk_rq_is_passthrough(cmd->request)) {
+	if (!blk_rq_is_passthrough(scsi_cmd_to_rq(cmd))) {
 		int old_good_bytes = good_bytes;
 		drv = scsi_cmd_to_driver(cmd);
 		if (drv->done)
diff --git a/drivers/scsi/scsi_bsg.c b/drivers/scsi/scsi_bsg.c
new file mode 100644
index 000000000000..81c3853a2a80
--- /dev/null
+++ b/drivers/scsi/scsi_bsg.c
@@ -0,0 +1,106 @@
+// SPDX-License-Identifier: GPL-2.0
+#include <linux/bsg.h>
+#include <scsi/scsi.h>
+#include <scsi/scsi_ioctl.h>
+#include <scsi/scsi_cmnd.h>
+#include <scsi/scsi_device.h>
+#include <scsi/sg.h>
+#include "scsi_priv.h"
+
+#define uptr64(val) ((void __user *)(uintptr_t)(val))
+
+static int scsi_bsg_sg_io_fn(struct request_queue *q, struct sg_io_v4 *hdr,
+		fmode_t mode, unsigned int timeout)
+{
+	struct scsi_request *sreq;
+	struct request *rq;
+	struct bio *bio;
+	int ret;
+
+	if (hdr->protocol != BSG_PROTOCOL_SCSI  ||
+	    hdr->subprotocol != BSG_SUB_PROTOCOL_SCSI_CMD)
+		return -EINVAL;
+	if (hdr->dout_xfer_len && hdr->din_xfer_len) {
+		pr_warn_once("BIDI support in bsg has been removed.\n");
+		return -EOPNOTSUPP;
+	}
+
+	rq = blk_get_request(q, hdr->dout_xfer_len ?
+			     REQ_OP_DRV_OUT : REQ_OP_DRV_IN, 0);
+	if (IS_ERR(rq))
+		return PTR_ERR(rq);
+	rq->timeout = timeout;
+
+	ret = -ENOMEM;
+	sreq = scsi_req(rq);
+	sreq->cmd_len = hdr->request_len;
+	if (sreq->cmd_len > BLK_MAX_CDB) {
+		sreq->cmd = kzalloc(sreq->cmd_len, GFP_KERNEL);
+		if (!sreq->cmd)
+			goto out_put_request;
+	}
+
+	ret = -EFAULT;
+	if (copy_from_user(sreq->cmd, uptr64(hdr->request), sreq->cmd_len))
+		goto out_free_cmd;
+	ret = -EPERM;
+	if (!scsi_cmd_allowed(sreq->cmd, mode))
+		goto out_free_cmd;
+
+	ret = 0;
+	if (hdr->dout_xfer_len) {
+		ret = blk_rq_map_user(rq->q, rq, NULL, uptr64(hdr->dout_xferp),
+				hdr->dout_xfer_len, GFP_KERNEL);
+	} else if (hdr->din_xfer_len) {
+		ret = blk_rq_map_user(rq->q, rq, NULL, uptr64(hdr->din_xferp),
+				hdr->din_xfer_len, GFP_KERNEL);
+	}
+
+	if (ret)
+		goto out_free_cmd;
+
+	bio = rq->bio;
+	blk_execute_rq(NULL, rq, !(hdr->flags & BSG_FLAG_Q_AT_TAIL));
+
+	/*
+	 * fill in all the output members
+	 */
+	hdr->device_status = sreq->result & 0xff;
+	hdr->transport_status = host_byte(sreq->result);
+	hdr->driver_status = 0;
+	if (scsi_status_is_check_condition(sreq->result))
+		hdr->driver_status = DRIVER_SENSE;
+	hdr->info = 0;
+	if (hdr->device_status || hdr->transport_status || hdr->driver_status)
+		hdr->info |= SG_INFO_CHECK;
+	hdr->response_len = 0;
+
+	if (sreq->sense_len && hdr->response) {
+		int len = min_t(unsigned int, hdr->max_response_len,
+					sreq->sense_len);
+
+		if (copy_to_user(uptr64(hdr->response), sreq->sense, len))
+			ret = -EFAULT;
+		else
+			hdr->response_len = len;
+	}
+
+	if (rq_data_dir(rq) == READ)
+		hdr->din_resid = sreq->resid_len;
+	else
+		hdr->dout_resid = sreq->resid_len;
+
+	blk_rq_unmap_user(bio);
+
+out_free_cmd:
+	scsi_req_free_cmd(scsi_req(rq));
+out_put_request:
+	blk_put_request(rq);
+	return ret;
+}
+
+struct bsg_device *scsi_bsg_register_queue(struct scsi_device *sdev)
+{
+	return bsg_register_queue(sdev->request_queue, &sdev->sdev_gendev,
+			dev_name(&sdev->sdev_gendev), scsi_bsg_sg_io_fn);
+}
diff --git a/drivers/scsi/scsi_common.c b/drivers/scsi/scsi_common.c
index 90349498f686..6e50e81a8216 100644
--- a/drivers/scsi/scsi_common.c
+++ b/drivers/scsi/scsi_common.c
@@ -7,9 +7,18 @@
 #include <linux/kernel.h>
 #include <linux/string.h>
 #include <linux/errno.h>
+#include <linux/module.h>
 #include <asm/unaligned.h>
 #include <scsi/scsi_common.h>
 
+MODULE_LICENSE("GPL v2");
+
+/* Command group 3 is reserved and should never be used.  */
+const unsigned char scsi_command_size_tbl[8] = {
+	6, 10, 10, 12, 16, 12, 10, 10
+};
+EXPORT_SYMBOL(scsi_command_size_tbl);
+
 /* NB: These are exposed through /proc/scsi/scsi and form part of the ABI.
  * You may not alter any existing entry (although adding new ones is
  * encouraged once assigned by ANSI/INCITS T10).
diff --git a/drivers/scsi/scsi_debug.c b/drivers/scsi/scsi_debug.c
index 58f69366bdcc..66f507469a31 100644
--- a/drivers/scsi/scsi_debug.c
+++ b/drivers/scsi/scsi_debug.c
@@ -3076,6 +3076,7 @@ static void dif_copy_prot(struct scsi_cmnd *scp, sector_t sector,
 static int prot_verify_read(struct scsi_cmnd *scp, sector_t start_sec,
 			    unsigned int sectors, u32 ei_lba)
 {
+	int ret = 0;
 	unsigned int i;
 	sector_t sector;
 	struct sdeb_store_info *sip = devip2sip((struct sdebug_dev_info *)
@@ -3083,26 +3084,33 @@ static int prot_verify_read(struct scsi_cmnd *scp, sector_t start_sec,
 	struct t10_pi_tuple *sdt;
 
 	for (i = 0; i < sectors; i++, ei_lba++) {
-		int ret;
-
 		sector = start_sec + i;
 		sdt = dif_store(sip, sector);
 
 		if (sdt->app_tag == cpu_to_be16(0xffff))
 			continue;
 
-		ret = dif_verify(sdt, lba2fake_store(sip, sector), sector,
-				 ei_lba);
-		if (ret) {
-			dif_errors++;
-			return ret;
+		/*
+		 * Because scsi_debug acts as both initiator and
+		 * target we proceed to verify the PI even if
+		 * RDPROTECT=3. This is done so the "initiator" knows
+		 * which type of error to return. Otherwise we would
+		 * have to iterate over the PI twice.
+		 */
+		if (scp->cmnd[1] >> 5) { /* RDPROTECT */
+			ret = dif_verify(sdt, lba2fake_store(sip, sector),
+					 sector, ei_lba);
+			if (ret) {
+				dif_errors++;
+				break;
+			}
 		}
 	}
 
 	dif_copy_prot(scp, start_sec, sectors, true);
 	dix_reads++;
 
-	return 0;
+	return ret;
 }
 
 static int resp_read_dt0(struct scsi_cmnd *scp, struct sdebug_dev_info *devip)
@@ -3196,12 +3204,29 @@ static int resp_read_dt0(struct scsi_cmnd *scp, struct sdebug_dev_info *devip)
 
 	/* DIX + T10 DIF */
 	if (unlikely(sdebug_dix && scsi_prot_sg_count(scp))) {
-		int prot_ret = prot_verify_read(scp, lba, num, ei_lba);
-
-		if (prot_ret) {
-			read_unlock(macc_lckp);
-			mk_sense_buffer(scp, ABORTED_COMMAND, 0x10, prot_ret);
-			return illegal_condition_result;
+		switch (prot_verify_read(scp, lba, num, ei_lba)) {
+		case 1: /* Guard tag error */
+			if (cmd[1] >> 5 != 3) { /* RDPROTECT != 3 */
+				read_unlock(macc_lckp);
+				mk_sense_buffer(scp, ABORTED_COMMAND, 0x10, 1);
+				return check_condition_result;
+			} else if (scp->prot_flags & SCSI_PROT_GUARD_CHECK) {
+				read_unlock(macc_lckp);
+				mk_sense_buffer(scp, ILLEGAL_REQUEST, 0x10, 1);
+				return illegal_condition_result;
+			}
+			break;
+		case 3: /* Reference tag error */
+			if (cmd[1] >> 5 != 3) { /* RDPROTECT != 3 */
+				read_unlock(macc_lckp);
+				mk_sense_buffer(scp, ABORTED_COMMAND, 0x10, 3);
+				return check_condition_result;
+			} else if (scp->prot_flags & SCSI_PROT_REF_CHECK) {
+				read_unlock(macc_lckp);
+				mk_sense_buffer(scp, ILLEGAL_REQUEST, 0x10, 3);
+				return illegal_condition_result;
+			}
+			break;
 		}
 	}
 
@@ -3232,28 +3257,6 @@ static int resp_read_dt0(struct scsi_cmnd *scp, struct sdebug_dev_info *devip)
 	return 0;
 }
 
-static void dump_sector(unsigned char *buf, int len)
-{
-	int i, j, n;
-
-	pr_err(">>> Sector Dump <<<\n");
-	for (i = 0 ; i < len ; i += 16) {
-		char b[128];
-
-		for (j = 0, n = 0; j < 16; j++) {
-			unsigned char c = buf[i+j];
-
-			if (c >= 0x20 && c < 0x7e)
-				n += scnprintf(b + n, sizeof(b) - n,
-					       " %c ", buf[i+j]);
-			else
-				n += scnprintf(b + n, sizeof(b) - n,
-					       "%02x ", buf[i+j]);
-		}
-		pr_err("%04d: %s\n", i, b);
-	}
-}
-
 static int prot_verify_write(struct scsi_cmnd *SCpnt, sector_t start_sec,
 			     unsigned int sectors, u32 ei_lba)
 {
@@ -3299,10 +3302,10 @@ static int prot_verify_write(struct scsi_cmnd *SCpnt, sector_t start_sec,
 			sdt = piter.addr + ppage_offset;
 			daddr = diter.addr + dpage_offset;
 
-			ret = dif_verify(sdt, daddr, sector, ei_lba);
-			if (ret) {
-				dump_sector(daddr, sdebug_sector_size);
-				goto out;
+			if (SCpnt->cmnd[1] >> 5 != 3) { /* WRPROTECT */
+				ret = dif_verify(sdt, daddr, sector, ei_lba);
+				if (ret)
+					goto out;
 			}
 
 			sector++;
@@ -3480,12 +3483,29 @@ static int resp_write_dt0(struct scsi_cmnd *scp, struct sdebug_dev_info *devip)
 
 	/* DIX + T10 DIF */
 	if (unlikely(sdebug_dix && scsi_prot_sg_count(scp))) {
-		int prot_ret = prot_verify_write(scp, lba, num, ei_lba);
-
-		if (prot_ret) {
-			write_unlock(macc_lckp);
-			mk_sense_buffer(scp, ILLEGAL_REQUEST, 0x10, prot_ret);
-			return illegal_condition_result;
+		switch (prot_verify_write(scp, lba, num, ei_lba)) {
+		case 1: /* Guard tag error */
+			if (scp->prot_flags & SCSI_PROT_GUARD_CHECK) {
+				write_unlock(macc_lckp);
+				mk_sense_buffer(scp, ILLEGAL_REQUEST, 0x10, 1);
+				return illegal_condition_result;
+			} else if (scp->cmnd[1] >> 5 != 3) { /* WRPROTECT != 3 */
+				write_unlock(macc_lckp);
+				mk_sense_buffer(scp, ABORTED_COMMAND, 0x10, 1);
+				return check_condition_result;
+			}
+			break;
+		case 3: /* Reference tag error */
+			if (scp->prot_flags & SCSI_PROT_REF_CHECK) {
+				write_unlock(macc_lckp);
+				mk_sense_buffer(scp, ILLEGAL_REQUEST, 0x10, 3);
+				return illegal_condition_result;
+			} else if (scp->cmnd[1] >> 5 != 3) { /* WRPROTECT != 3 */
+				write_unlock(macc_lckp);
+				mk_sense_buffer(scp, ABORTED_COMMAND, 0x10, 3);
+				return check_condition_result;
+			}
+			break;
 		}
 	}
 
@@ -4702,7 +4722,7 @@ fini:
 static struct sdebug_queue *get_queue(struct scsi_cmnd *cmnd)
 {
 	u16 hwq;
-	u32 tag = blk_mq_unique_tag(cmnd->request);
+	u32 tag = blk_mq_unique_tag(scsi_cmd_to_rq(cmnd));
 
 	hwq = blk_mq_unique_tag_to_hwq(tag);
 
@@ -4715,7 +4735,7 @@ static struct sdebug_queue *get_queue(struct scsi_cmnd *cmnd)
 
 static u32 get_tag(struct scsi_cmnd *cmnd)
 {
-	return blk_mq_unique_tag(cmnd->request);
+	return blk_mq_unique_tag(scsi_cmd_to_rq(cmnd));
 }
 
 /* Queued (deferred) command completions converge here. */
@@ -5364,7 +5384,7 @@ static int schedule_resp(struct scsi_cmnd *cmnd, struct sdebug_dev_info *devip,
 {
 	bool new_sd_dp;
 	bool inject = false;
-	bool hipri = (cmnd->request->cmd_flags & REQ_HIPRI);
+	bool hipri = scsi_cmd_to_rq(cmnd)->cmd_flags & REQ_HIPRI;
 	int k, num_in_q, qdepth;
 	unsigned long iflags;
 	u64 ns_from_boot = 0;
@@ -5567,8 +5587,9 @@ static int schedule_resp(struct scsi_cmnd *cmnd, struct sdebug_dev_info *devip,
 		if (sdebug_statistics)
 			sd_dp->issuing_cpu = raw_smp_processor_id();
 		if (unlikely(sd_dp->aborted)) {
-			sdev_printk(KERN_INFO, sdp, "abort request tag %d\n", cmnd->request->tag);
-			blk_abort_request(cmnd->request);
+			sdev_printk(KERN_INFO, sdp, "abort request tag %d\n",
+				    scsi_cmd_to_rq(cmnd)->tag);
+			blk_abort_request(scsi_cmd_to_rq(cmnd));
 			atomic_set(&sdeb_inject_pending, 0);
 			sd_dp->aborted = false;
 		}
@@ -7394,7 +7415,7 @@ static int scsi_debug_queuecommand(struct Scsi_Host *shost,
 					       (u32)cmd[k]);
 		}
 		sdev_printk(KERN_INFO, sdp, "%s: tag=%#x, cmd %s\n", my_name,
-			    blk_mq_unique_tag(scp->request), b);
+			    blk_mq_unique_tag(scsi_cmd_to_rq(scp)), b);
 	}
 	if (unlikely(inject_now && (sdebug_opts & SDEBUG_OPT_HOST_BUSY)))
 		return SCSI_MLQUEUE_HOST_BUSY;
diff --git a/drivers/scsi/scsi_devinfo.c b/drivers/scsi/scsi_devinfo.c
index d33355ab6e14..c7080454aea9 100644
--- a/drivers/scsi/scsi_devinfo.c
+++ b/drivers/scsi/scsi_devinfo.c
@@ -171,6 +171,7 @@ static struct {
 	{"FUJITSU", "ETERNUS_DXM", "*", BLIST_RETRY_ASC_C1},
 	{"Generic", "USB SD Reader", "1.00", BLIST_FORCELUN | BLIST_INQUIRY_36},
 	{"Generic", "USB Storage-SMC", NULL, BLIST_FORCELUN | BLIST_INQUIRY_36}, /* FW: 0180 and 0207 */
+	{"Generic", "Ultra HS-SD/MMC", "2.09", BLIST_IGN_MEDIA_CHANGE | BLIST_INQUIRY_36},
 	{"HITACHI", "DF400", "*", BLIST_REPORTLUN2},
 	{"HITACHI", "DF500", "*", BLIST_REPORTLUN2},
 	{"HITACHI", "DISK-SUBSYSTEM", "*", BLIST_REPORTLUN2},
diff --git a/drivers/scsi/scsi_error.c b/drivers/scsi/scsi_error.c
index 58a252c38992..b6c86cce57bf 100644
--- a/drivers/scsi/scsi_error.c
+++ b/drivers/scsi/scsi_error.c
@@ -242,7 +242,7 @@ scsi_abort_command(struct scsi_cmnd *scmd)
  */
 static void scsi_eh_reset(struct scsi_cmnd *scmd)
 {
-	if (!blk_rq_is_passthrough(scmd->request)) {
+	if (!blk_rq_is_passthrough(scsi_cmd_to_rq(scmd))) {
 		struct scsi_driver *sdrv = scsi_cmd_to_driver(scmd);
 		if (sdrv->eh_reset)
 			sdrv->eh_reset(scmd);
@@ -1182,7 +1182,7 @@ static enum scsi_disposition scsi_request_sense(struct scsi_cmnd *scmd)
 static enum scsi_disposition
 scsi_eh_action(struct scsi_cmnd *scmd, enum scsi_disposition rtn)
 {
-	if (!blk_rq_is_passthrough(scmd->request)) {
+	if (!blk_rq_is_passthrough(scsi_cmd_to_rq(scmd))) {
 		struct scsi_driver *sdrv = scsi_cmd_to_driver(scmd);
 		if (sdrv->eh_action)
 			rtn = sdrv->eh_action(scmd, rtn);
@@ -1750,21 +1750,23 @@ static void scsi_eh_offline_sdevs(struct list_head *work_q,
  */
 int scsi_noretry_cmd(struct scsi_cmnd *scmd)
 {
+	struct request *req = scsi_cmd_to_rq(scmd);
+
 	switch (host_byte(scmd->result)) {
 	case DID_OK:
 		break;
 	case DID_TIME_OUT:
 		goto check_type;
 	case DID_BUS_BUSY:
-		return (scmd->request->cmd_flags & REQ_FAILFAST_TRANSPORT);
+		return req->cmd_flags & REQ_FAILFAST_TRANSPORT;
 	case DID_PARITY:
-		return (scmd->request->cmd_flags & REQ_FAILFAST_DEV);
+		return req->cmd_flags & REQ_FAILFAST_DEV;
 	case DID_ERROR:
 		if (get_status_byte(scmd) == SAM_STAT_RESERVATION_CONFLICT)
 			return 0;
 		fallthrough;
 	case DID_SOFT_ERROR:
-		return (scmd->request->cmd_flags & REQ_FAILFAST_DRIVER);
+		return req->cmd_flags & REQ_FAILFAST_DRIVER;
 	}
 
 	if (!scsi_status_is_check_condition(scmd->result))
@@ -1775,8 +1777,7 @@ check_type:
 	 * assume caller has checked sense and determined
 	 * the check condition was retryable.
 	 */
-	if (scmd->request->cmd_flags & REQ_FAILFAST_DEV ||
-	    blk_rq_is_passthrough(scmd->request))
+	if (req->cmd_flags & REQ_FAILFAST_DEV || blk_rq_is_passthrough(req))
 		return 1;
 
 	return 0;
@@ -2376,7 +2377,6 @@ scsi_ioctl_reset(struct scsi_device *dev, int __user *arg)
 
 	scmd = (struct scsi_cmnd *)(rq + 1);
 	scsi_init_command(dev, scmd);
-	scmd->request = rq;
 	scmd->cmnd = scsi_req(rq)->cmd;
 
 	scmd->scsi_done		= scsi_reset_provider_done_command;
diff --git a/drivers/scsi/scsi_ioctl.c b/drivers/scsi/scsi_ioctl.c
index 0d13610cd6bf..6ff2207bd45a 100644
--- a/drivers/scsi/scsi_ioctl.c
+++ b/drivers/scsi/scsi_ioctl.c
@@ -14,6 +14,7 @@
 #include <linux/mm.h>
 #include <linux/string.h>
 #include <linux/uaccess.h>
+#include <linux/cdrom.h>
 
 #include <scsi/scsi.h>
 #include <scsi/scsi_cmnd.h>
@@ -63,29 +64,6 @@ static int ioctl_probe(struct Scsi_Host *host, void __user *buffer)
 	return 1;
 }
 
-/*
-
- * The SCSI_IOCTL_SEND_COMMAND ioctl sends a command out to the SCSI host.
- * The IOCTL_NORMAL_TIMEOUT and NORMAL_RETRIES  variables are used.  
- * 
- * dev is the SCSI device struct ptr, *(int *) arg is the length of the
- * input data, if any, not including the command string & counts, 
- * *((int *)arg + 1) is the output buffer size in bytes.
- * 
- * *(char *) ((int *) arg)[2] the actual command byte.   
- * 
- * Note that if more than MAX_BUF bytes are requested to be transferred,
- * the ioctl will fail with error EINVAL.
- * 
- * This size *does not* include the initial lengths that were passed.
- * 
- * The SCSI command is read from the memory location immediately after the
- * length words, and the input data is right after the command.  The SCSI
- * routines know the command size based on the opcode decode.  
- * 
- * The output area is then filled in starting from the command byte. 
- */
-
 static int ioctl_internal_command(struct scsi_device *sdev, char *cmd,
 				  int timeout, int retries)
 {
@@ -189,10 +167,732 @@ static int scsi_ioctl_get_pci(struct scsi_device *sdev, void __user *arg)
 		? -EFAULT: 0;
 }
 
+static int sg_get_version(int __user *p)
+{
+	static const int sg_version_num = 30527;
+	return put_user(sg_version_num, p);
+}
 
-static int scsi_ioctl_common(struct scsi_device *sdev, int cmd, void __user *arg)
+static int sg_set_timeout(struct scsi_device *sdev, int __user *p)
 {
-	char scsi_cmd[MAX_COMMAND_SIZE];
+	int timeout, err = get_user(timeout, p);
+
+	if (!err)
+		sdev->sg_timeout = clock_t_to_jiffies(timeout);
+
+	return err;
+}
+
+static int sg_get_reserved_size(struct scsi_device *sdev, int __user *p)
+{
+	int val = min(sdev->sg_reserved_size,
+		      queue_max_bytes(sdev->request_queue));
+
+	return put_user(val, p);
+}
+
+static int sg_set_reserved_size(struct scsi_device *sdev, int __user *p)
+{
+	int size, err = get_user(size, p);
+
+	if (err)
+		return err;
+
+	if (size < 0)
+		return -EINVAL;
+
+	sdev->sg_reserved_size = min_t(unsigned int, size,
+				       queue_max_bytes(sdev->request_queue));
+	return 0;
+}
+
+/*
+ * will always return that we are ATAPI even for a real SCSI drive, I'm not
+ * so sure this is worth doing anything about (why would you care??)
+ */
+static int sg_emulated_host(struct request_queue *q, int __user *p)
+{
+	return put_user(1, p);
+}
+
+static int scsi_get_idlun(struct scsi_device *sdev, void __user *argp)
+{
+	struct scsi_idlun v = {
+		.dev_id = (sdev->id & 0xff) +
+			((sdev->lun & 0xff) << 8) +
+			((sdev->channel & 0xff) << 16) +
+			((sdev->host->host_no & 0xff) << 24),
+		.host_unique_id = sdev->host->unique_id
+	};
+	if (copy_to_user(argp, &v, sizeof(struct scsi_idlun)))
+		return -EFAULT;
+	return 0;
+}
+
+static int scsi_send_start_stop(struct scsi_device *sdev, int data)
+{
+	u8 cdb[MAX_COMMAND_SIZE] = { };
+
+	cdb[0] = START_STOP;
+	cdb[4] = data;
+	return ioctl_internal_command(sdev, cdb, START_STOP_TIMEOUT,
+				      NORMAL_RETRIES);
+}
+
+/*
+ * Check if the given command is allowed.
+ *
+ * Only a subset of commands are allowed for unprivileged users. Commands used
+ * to format the media, update the firmware, etc. are not permitted.
+ */
+bool scsi_cmd_allowed(unsigned char *cmd, fmode_t mode)
+{
+	/* root can do any command. */
+	if (capable(CAP_SYS_RAWIO))
+		return true;
+
+	/* Anybody who can open the device can do a read-safe command */
+	switch (cmd[0]) {
+	/* Basic read-only commands */
+	case TEST_UNIT_READY:
+	case REQUEST_SENSE:
+	case READ_6:
+	case READ_10:
+	case READ_12:
+	case READ_16:
+	case READ_BUFFER:
+	case READ_DEFECT_DATA:
+	case READ_CAPACITY: /* also GPCMD_READ_CDVD_CAPACITY */
+	case READ_LONG:
+	case INQUIRY:
+	case MODE_SENSE:
+	case MODE_SENSE_10:
+	case LOG_SENSE:
+	case START_STOP:
+	case GPCMD_VERIFY_10:
+	case VERIFY_16:
+	case REPORT_LUNS:
+	case SERVICE_ACTION_IN_16:
+	case RECEIVE_DIAGNOSTIC:
+	case MAINTENANCE_IN: /* also GPCMD_SEND_KEY, which is a write command */
+	case GPCMD_READ_BUFFER_CAPACITY:
+	/* Audio CD commands */
+	case GPCMD_PLAY_CD:
+	case GPCMD_PLAY_AUDIO_10:
+	case GPCMD_PLAY_AUDIO_MSF:
+	case GPCMD_PLAY_AUDIO_TI:
+	case GPCMD_PAUSE_RESUME:
+	/* CD/DVD data reading */
+	case GPCMD_READ_CD:
+	case GPCMD_READ_CD_MSF:
+	case GPCMD_READ_DISC_INFO:
+	case GPCMD_READ_DVD_STRUCTURE:
+	case GPCMD_READ_HEADER:
+	case GPCMD_READ_TRACK_RZONE_INFO:
+	case GPCMD_READ_SUBCHANNEL:
+	case GPCMD_READ_TOC_PMA_ATIP:
+	case GPCMD_REPORT_KEY:
+	case GPCMD_SCAN:
+	case GPCMD_GET_CONFIGURATION:
+	case GPCMD_READ_FORMAT_CAPACITIES:
+	case GPCMD_GET_EVENT_STATUS_NOTIFICATION:
+	case GPCMD_GET_PERFORMANCE:
+	case GPCMD_SEEK:
+	case GPCMD_STOP_PLAY_SCAN:
+	/* ZBC */
+	case ZBC_IN:
+		return true;
+	/* Basic writing commands */
+	case WRITE_6:
+	case WRITE_10:
+	case WRITE_VERIFY:
+	case WRITE_12:
+	case WRITE_VERIFY_12:
+	case WRITE_16:
+	case WRITE_LONG:
+	case WRITE_LONG_2:
+	case WRITE_SAME:
+	case WRITE_SAME_16:
+	case WRITE_SAME_32:
+	case ERASE:
+	case GPCMD_MODE_SELECT_10:
+	case MODE_SELECT:
+	case LOG_SELECT:
+	case GPCMD_BLANK:
+	case GPCMD_CLOSE_TRACK:
+	case GPCMD_FLUSH_CACHE:
+	case GPCMD_FORMAT_UNIT:
+	case GPCMD_REPAIR_RZONE_TRACK:
+	case GPCMD_RESERVE_RZONE_TRACK:
+	case GPCMD_SEND_DVD_STRUCTURE:
+	case GPCMD_SEND_EVENT:
+	case GPCMD_SEND_OPC:
+	case GPCMD_SEND_CUE_SHEET:
+	case GPCMD_SET_SPEED:
+	case GPCMD_PREVENT_ALLOW_MEDIUM_REMOVAL:
+	case GPCMD_LOAD_UNLOAD:
+	case GPCMD_SET_STREAMING:
+	case GPCMD_SET_READ_AHEAD:
+	/* ZBC */
+	case ZBC_OUT:
+		return (mode & FMODE_WRITE);
+	default:
+		return false;
+	}
+}
+EXPORT_SYMBOL(scsi_cmd_allowed);
+
+static int scsi_fill_sghdr_rq(struct scsi_device *sdev, struct request *rq,
+		struct sg_io_hdr *hdr, fmode_t mode)
+{
+	struct scsi_request *req = scsi_req(rq);
+
+	if (copy_from_user(req->cmd, hdr->cmdp, hdr->cmd_len))
+		return -EFAULT;
+	if (!scsi_cmd_allowed(req->cmd, mode))
+		return -EPERM;
+
+	/*
+	 * fill in request structure
+	 */
+	req->cmd_len = hdr->cmd_len;
+
+	rq->timeout = msecs_to_jiffies(hdr->timeout);
+	if (!rq->timeout)
+		rq->timeout = sdev->sg_timeout;
+	if (!rq->timeout)
+		rq->timeout = BLK_DEFAULT_SG_TIMEOUT;
+	if (rq->timeout < BLK_MIN_SG_TIMEOUT)
+		rq->timeout = BLK_MIN_SG_TIMEOUT;
+
+	return 0;
+}
+
+static int scsi_complete_sghdr_rq(struct request *rq, struct sg_io_hdr *hdr,
+		struct bio *bio)
+{
+	struct scsi_request *req = scsi_req(rq);
+	int r, ret = 0;
+
+	/*
+	 * fill in all the output members
+	 */
+	hdr->status = req->result & 0xff;
+	hdr->masked_status = status_byte(req->result);
+	hdr->msg_status = COMMAND_COMPLETE;
+	hdr->host_status = host_byte(req->result);
+	hdr->driver_status = 0;
+	if (scsi_status_is_check_condition(hdr->status))
+		hdr->driver_status = DRIVER_SENSE;
+	hdr->info = 0;
+	if (hdr->masked_status || hdr->host_status || hdr->driver_status)
+		hdr->info |= SG_INFO_CHECK;
+	hdr->resid = req->resid_len;
+	hdr->sb_len_wr = 0;
+
+	if (req->sense_len && hdr->sbp) {
+		int len = min((unsigned int) hdr->mx_sb_len, req->sense_len);
+
+		if (!copy_to_user(hdr->sbp, req->sense, len))
+			hdr->sb_len_wr = len;
+		else
+			ret = -EFAULT;
+	}
+
+	r = blk_rq_unmap_user(bio);
+	if (!ret)
+		ret = r;
+
+	return ret;
+}
+
+static int sg_io(struct scsi_device *sdev, struct gendisk *disk,
+		struct sg_io_hdr *hdr, fmode_t mode)
+{
+	unsigned long start_time;
+	ssize_t ret = 0;
+	int writing = 0;
+	int at_head = 0;
+	struct request *rq;
+	struct scsi_request *req;
+	struct bio *bio;
+
+	if (hdr->interface_id != 'S')
+		return -EINVAL;
+
+	if (hdr->dxfer_len > (queue_max_hw_sectors(sdev->request_queue) << 9))
+		return -EIO;
+
+	if (hdr->dxfer_len)
+		switch (hdr->dxfer_direction) {
+		default:
+			return -EINVAL;
+		case SG_DXFER_TO_DEV:
+			writing = 1;
+			break;
+		case SG_DXFER_TO_FROM_DEV:
+		case SG_DXFER_FROM_DEV:
+			break;
+		}
+	if (hdr->flags & SG_FLAG_Q_AT_HEAD)
+		at_head = 1;
+
+	ret = -ENOMEM;
+	rq = blk_get_request(sdev->request_queue, writing ?
+			     REQ_OP_DRV_OUT : REQ_OP_DRV_IN, 0);
+	if (IS_ERR(rq))
+		return PTR_ERR(rq);
+	req = scsi_req(rq);
+
+	if (hdr->cmd_len > BLK_MAX_CDB) {
+		req->cmd = kzalloc(hdr->cmd_len, GFP_KERNEL);
+		if (!req->cmd)
+			goto out_put_request;
+	}
+
+	ret = scsi_fill_sghdr_rq(sdev, rq, hdr, mode);
+	if (ret < 0)
+		goto out_free_cdb;
+
+	ret = 0;
+	if (hdr->iovec_count) {
+		struct iov_iter i;
+		struct iovec *iov = NULL;
+
+		ret = import_iovec(rq_data_dir(rq), hdr->dxferp,
+				   hdr->iovec_count, 0, &iov, &i);
+		if (ret < 0)
+			goto out_free_cdb;
+
+		/* SG_IO howto says that the shorter of the two wins */
+		iov_iter_truncate(&i, hdr->dxfer_len);
+
+		ret = blk_rq_map_user_iov(rq->q, rq, NULL, &i, GFP_KERNEL);
+		kfree(iov);
+	} else if (hdr->dxfer_len)
+		ret = blk_rq_map_user(rq->q, rq, NULL, hdr->dxferp,
+				      hdr->dxfer_len, GFP_KERNEL);
+
+	if (ret)
+		goto out_free_cdb;
+
+	bio = rq->bio;
+	req->retries = 0;
+
+	start_time = jiffies;
+
+	blk_execute_rq(disk, rq, at_head);
+
+	hdr->duration = jiffies_to_msecs(jiffies - start_time);
+
+	ret = scsi_complete_sghdr_rq(rq, hdr, bio);
+
+out_free_cdb:
+	scsi_req_free_cmd(req);
+out_put_request:
+	blk_put_request(rq);
+	return ret;
+}
+
+/**
+ * sg_scsi_ioctl  --  handle deprecated SCSI_IOCTL_SEND_COMMAND ioctl
+ * @q:		request queue to send scsi commands down
+ * @disk:	gendisk to operate on (option)
+ * @mode:	mode used to open the file through which the ioctl has been
+ *		submitted
+ * @sic:	userspace structure describing the command to perform
+ *
+ * Send down the scsi command described by @sic to the device below
+ * the request queue @q.  If @file is non-NULL it's used to perform
+ * fine-grained permission checks that allow users to send down
+ * non-destructive SCSI commands.  If the caller has a struct gendisk
+ * available it should be passed in as @disk to allow the low level
+ * driver to use the information contained in it.  A non-NULL @disk
+ * is only allowed if the caller knows that the low level driver doesn't
+ * need it (e.g. in the scsi subsystem).
+ *
+ * Notes:
+ *   -  This interface is deprecated - users should use the SG_IO
+ *      interface instead, as this is a more flexible approach to
+ *      performing SCSI commands on a device.
+ *   -  The SCSI command length is determined by examining the 1st byte
+ *      of the given command. There is no way to override this.
+ *   -  Data transfers are limited to PAGE_SIZE
+ *   -  The length (x + y) must be at least OMAX_SB_LEN bytes long to
+ *      accommodate the sense buffer when an error occurs.
+ *      The sense buffer is truncated to OMAX_SB_LEN (16) bytes so that
+ *      old code will not be surprised.
+ *   -  If a Unix error occurs (e.g. ENOMEM) then the user will receive
+ *      a negative return and the Unix error code in 'errno'.
+ *      If the SCSI command succeeds then 0 is returned.
+ *      Positive numbers returned are the compacted SCSI error codes (4
+ *      bytes in one int) where the lowest byte is the SCSI status.
+ */
+static int sg_scsi_ioctl(struct request_queue *q, struct gendisk *disk,
+		fmode_t mode, struct scsi_ioctl_command __user *sic)
+{
+	enum { OMAX_SB_LEN = 16 };	/* For backward compatibility */
+	struct request *rq;
+	struct scsi_request *req;
+	int err;
+	unsigned int in_len, out_len, bytes, opcode, cmdlen;
+	char *buffer = NULL;
+
+	if (!sic)
+		return -EINVAL;
+
+	/*
+	 * get in an out lengths, verify they don't exceed a page worth of data
+	 */
+	if (get_user(in_len, &sic->inlen))
+		return -EFAULT;
+	if (get_user(out_len, &sic->outlen))
+		return -EFAULT;
+	if (in_len > PAGE_SIZE || out_len > PAGE_SIZE)
+		return -EINVAL;
+	if (get_user(opcode, sic->data))
+		return -EFAULT;
+
+	bytes = max(in_len, out_len);
+	if (bytes) {
+		buffer = kzalloc(bytes, GFP_NOIO | GFP_USER | __GFP_NOWARN);
+		if (!buffer)
+			return -ENOMEM;
+
+	}
+
+	rq = blk_get_request(q, in_len ? REQ_OP_DRV_OUT : REQ_OP_DRV_IN, 0);
+	if (IS_ERR(rq)) {
+		err = PTR_ERR(rq);
+		goto error_free_buffer;
+	}
+	req = scsi_req(rq);
+
+	cmdlen = COMMAND_SIZE(opcode);
+
+	/*
+	 * get command and data to send to device, if any
+	 */
+	err = -EFAULT;
+	req->cmd_len = cmdlen;
+	if (copy_from_user(req->cmd, sic->data, cmdlen))
+		goto error;
+
+	if (in_len && copy_from_user(buffer, sic->data + cmdlen, in_len))
+		goto error;
+
+	err = -EPERM;
+	if (!scsi_cmd_allowed(req->cmd, mode))
+		goto error;
+
+	/* default.  possible overridden later */
+	req->retries = 5;
+
+	switch (opcode) {
+	case SEND_DIAGNOSTIC:
+	case FORMAT_UNIT:
+		rq->timeout = FORMAT_UNIT_TIMEOUT;
+		req->retries = 1;
+		break;
+	case START_STOP:
+		rq->timeout = START_STOP_TIMEOUT;
+		break;
+	case MOVE_MEDIUM:
+		rq->timeout = MOVE_MEDIUM_TIMEOUT;
+		break;
+	case READ_ELEMENT_STATUS:
+		rq->timeout = READ_ELEMENT_STATUS_TIMEOUT;
+		break;
+	case READ_DEFECT_DATA:
+		rq->timeout = READ_DEFECT_DATA_TIMEOUT;
+		req->retries = 1;
+		break;
+	default:
+		rq->timeout = BLK_DEFAULT_SG_TIMEOUT;
+		break;
+	}
+
+	if (bytes) {
+		err = blk_rq_map_kern(q, rq, buffer, bytes, GFP_NOIO);
+		if (err)
+			goto error;
+	}
+
+	blk_execute_rq(disk, rq, 0);
+
+	err = req->result & 0xff;	/* only 8 bit SCSI status */
+	if (err) {
+		if (req->sense_len && req->sense) {
+			bytes = (OMAX_SB_LEN > req->sense_len) ?
+				req->sense_len : OMAX_SB_LEN;
+			if (copy_to_user(sic->data, req->sense, bytes))
+				err = -EFAULT;
+		}
+	} else {
+		if (copy_to_user(sic->data, buffer, out_len))
+			err = -EFAULT;
+	}
+
+error:
+	blk_put_request(rq);
+
+error_free_buffer:
+	kfree(buffer);
+
+	return err;
+}
+
+int put_sg_io_hdr(const struct sg_io_hdr *hdr, void __user *argp)
+{
+#ifdef CONFIG_COMPAT
+	if (in_compat_syscall()) {
+		struct compat_sg_io_hdr hdr32 =  {
+			.interface_id	 = hdr->interface_id,
+			.dxfer_direction = hdr->dxfer_direction,
+			.cmd_len	 = hdr->cmd_len,
+			.mx_sb_len	 = hdr->mx_sb_len,
+			.iovec_count	 = hdr->iovec_count,
+			.dxfer_len	 = hdr->dxfer_len,
+			.dxferp		 = (uintptr_t)hdr->dxferp,
+			.cmdp		 = (uintptr_t)hdr->cmdp,
+			.sbp		 = (uintptr_t)hdr->sbp,
+			.timeout	 = hdr->timeout,
+			.flags		 = hdr->flags,
+			.pack_id	 = hdr->pack_id,
+			.usr_ptr	 = (uintptr_t)hdr->usr_ptr,
+			.status		 = hdr->status,
+			.masked_status	 = hdr->masked_status,
+			.msg_status	 = hdr->msg_status,
+			.sb_len_wr	 = hdr->sb_len_wr,
+			.host_status	 = hdr->host_status,
+			.driver_status	 = hdr->driver_status,
+			.resid		 = hdr->resid,
+			.duration	 = hdr->duration,
+			.info		 = hdr->info,
+		};
+
+		if (copy_to_user(argp, &hdr32, sizeof(hdr32)))
+			return -EFAULT;
+
+		return 0;
+	}
+#endif
+
+	if (copy_to_user(argp, hdr, sizeof(*hdr)))
+		return -EFAULT;
+
+	return 0;
+}
+EXPORT_SYMBOL(put_sg_io_hdr);
+
+int get_sg_io_hdr(struct sg_io_hdr *hdr, const void __user *argp)
+{
+#ifdef CONFIG_COMPAT
+	struct compat_sg_io_hdr hdr32;
+
+	if (in_compat_syscall()) {
+		if (copy_from_user(&hdr32, argp, sizeof(hdr32)))
+			return -EFAULT;
+
+		*hdr = (struct sg_io_hdr) {
+			.interface_id	 = hdr32.interface_id,
+			.dxfer_direction = hdr32.dxfer_direction,
+			.cmd_len	 = hdr32.cmd_len,
+			.mx_sb_len	 = hdr32.mx_sb_len,
+			.iovec_count	 = hdr32.iovec_count,
+			.dxfer_len	 = hdr32.dxfer_len,
+			.dxferp		 = compat_ptr(hdr32.dxferp),
+			.cmdp		 = compat_ptr(hdr32.cmdp),
+			.sbp		 = compat_ptr(hdr32.sbp),
+			.timeout	 = hdr32.timeout,
+			.flags		 = hdr32.flags,
+			.pack_id	 = hdr32.pack_id,
+			.usr_ptr	 = compat_ptr(hdr32.usr_ptr),
+			.status		 = hdr32.status,
+			.masked_status	 = hdr32.masked_status,
+			.msg_status	 = hdr32.msg_status,
+			.sb_len_wr	 = hdr32.sb_len_wr,
+			.host_status	 = hdr32.host_status,
+			.driver_status	 = hdr32.driver_status,
+			.resid		 = hdr32.resid,
+			.duration	 = hdr32.duration,
+			.info		 = hdr32.info,
+		};
+
+		return 0;
+	}
+#endif
+
+	if (copy_from_user(hdr, argp, sizeof(*hdr)))
+		return -EFAULT;
+
+	return 0;
+}
+EXPORT_SYMBOL(get_sg_io_hdr);
+
+#ifdef CONFIG_COMPAT
+struct compat_cdrom_generic_command {
+	unsigned char	cmd[CDROM_PACKET_SIZE];
+	compat_caddr_t	buffer;
+	compat_uint_t	buflen;
+	compat_int_t	stat;
+	compat_caddr_t	sense;
+	unsigned char	data_direction;
+	unsigned char	pad[3];
+	compat_int_t	quiet;
+	compat_int_t	timeout;
+	compat_caddr_t	unused;
+};
+#endif
+
+static int scsi_get_cdrom_generic_arg(struct cdrom_generic_command *cgc,
+				      const void __user *arg)
+{
+#ifdef CONFIG_COMPAT
+	if (in_compat_syscall()) {
+		struct compat_cdrom_generic_command cgc32;
+
+		if (copy_from_user(&cgc32, arg, sizeof(cgc32)))
+			return -EFAULT;
+
+		*cgc = (struct cdrom_generic_command) {
+			.buffer		= compat_ptr(cgc32.buffer),
+			.buflen		= cgc32.buflen,
+			.stat		= cgc32.stat,
+			.sense		= compat_ptr(cgc32.sense),
+			.data_direction	= cgc32.data_direction,
+			.quiet		= cgc32.quiet,
+			.timeout	= cgc32.timeout,
+			.unused		= compat_ptr(cgc32.unused),
+		};
+		memcpy(&cgc->cmd, &cgc32.cmd, CDROM_PACKET_SIZE);
+		return 0;
+	}
+#endif
+	if (copy_from_user(cgc, arg, sizeof(*cgc)))
+		return -EFAULT;
+
+	return 0;
+}
+
+static int scsi_put_cdrom_generic_arg(const struct cdrom_generic_command *cgc,
+				      void __user *arg)
+{
+#ifdef CONFIG_COMPAT
+	if (in_compat_syscall()) {
+		struct compat_cdrom_generic_command cgc32 = {
+			.buffer		= (uintptr_t)(cgc->buffer),
+			.buflen		= cgc->buflen,
+			.stat		= cgc->stat,
+			.sense		= (uintptr_t)(cgc->sense),
+			.data_direction	= cgc->data_direction,
+			.quiet		= cgc->quiet,
+			.timeout	= cgc->timeout,
+			.unused		= (uintptr_t)(cgc->unused),
+		};
+		memcpy(&cgc32.cmd, &cgc->cmd, CDROM_PACKET_SIZE);
+
+		if (copy_to_user(arg, &cgc32, sizeof(cgc32)))
+			return -EFAULT;
+
+		return 0;
+	}
+#endif
+	if (copy_to_user(arg, cgc, sizeof(*cgc)))
+		return -EFAULT;
+
+	return 0;
+}
+
+static int scsi_cdrom_send_packet(struct scsi_device *sdev, struct gendisk *disk,
+		fmode_t mode, void __user *arg)
+{
+	struct cdrom_generic_command cgc;
+	struct sg_io_hdr hdr;
+	int err;
+
+	err = scsi_get_cdrom_generic_arg(&cgc, arg);
+	if (err)
+		return err;
+
+	cgc.timeout = clock_t_to_jiffies(cgc.timeout);
+	memset(&hdr, 0, sizeof(hdr));
+	hdr.interface_id = 'S';
+	hdr.cmd_len = sizeof(cgc.cmd);
+	hdr.dxfer_len = cgc.buflen;
+	switch (cgc.data_direction) {
+	case CGC_DATA_UNKNOWN:
+		hdr.dxfer_direction = SG_DXFER_UNKNOWN;
+		break;
+	case CGC_DATA_WRITE:
+		hdr.dxfer_direction = SG_DXFER_TO_DEV;
+		break;
+	case CGC_DATA_READ:
+		hdr.dxfer_direction = SG_DXFER_FROM_DEV;
+		break;
+	case CGC_DATA_NONE:
+		hdr.dxfer_direction = SG_DXFER_NONE;
+		break;
+	default:
+		return -EINVAL;
+	}
+
+	hdr.dxferp = cgc.buffer;
+	hdr.sbp = cgc.sense;
+	if (hdr.sbp)
+		hdr.mx_sb_len = sizeof(struct request_sense);
+	hdr.timeout = jiffies_to_msecs(cgc.timeout);
+	hdr.cmdp = ((struct cdrom_generic_command __user *) arg)->cmd;
+	hdr.cmd_len = sizeof(cgc.cmd);
+
+	err = sg_io(sdev, disk, &hdr, mode);
+	if (err == -EFAULT)
+		return -EFAULT;
+
+	if (hdr.status)
+		return -EIO;
+
+	cgc.stat = err;
+	cgc.buflen = hdr.resid;
+	if (scsi_put_cdrom_generic_arg(&cgc, arg))
+		return -EFAULT;
+
+	return err;
+}
+
+static int scsi_ioctl_sg_io(struct scsi_device *sdev, struct gendisk *disk,
+		fmode_t mode, void __user *argp)
+{
+	struct sg_io_hdr hdr;
+	int error;
+
+	error = get_sg_io_hdr(&hdr, argp);
+	if (error)
+		return error;
+	error = sg_io(sdev, disk, &hdr, mode);
+	if (error == -EFAULT)
+		return error;
+	if (put_sg_io_hdr(&hdr, argp))
+		return -EFAULT;
+	return error;
+}
+
+/**
+ * scsi_ioctl - Dispatch ioctl to scsi device
+ * @sdev: scsi device receiving ioctl
+ * @disk: disk receiving the ioctl
+ * @mode: mode the block/char device is opened with
+ * @cmd: which ioctl is it
+ * @arg: data associated with ioctl
+ *
+ * Description: The scsi_ioctl() function differs from most ioctls in that it
+ * does not take a major/minor number as the dev field.  Rather, it takes
+ * a pointer to a &struct scsi_device.
+ */
+int scsi_ioctl(struct scsi_device *sdev, struct gendisk *disk, fmode_t mode,
+		int cmd, void __user *arg)
+{
+	struct request_queue *q = sdev->request_queue;
 	struct scsi_sense_hdr sense_hdr;
 
 	/* Check for deprecated ioctls ... all the ioctls which don't
@@ -212,26 +912,34 @@ static int scsi_ioctl_common(struct scsi_device *sdev, int cmd, void __user *arg
 	}
 
 	switch (cmd) {
-	case SCSI_IOCTL_GET_IDLUN: {
-		struct scsi_idlun v = {
-			.dev_id = (sdev->id & 0xff)
-				 + ((sdev->lun & 0xff) << 8)
-				 + ((sdev->channel & 0xff) << 16)
-				 + ((sdev->host->host_no & 0xff) << 24),
-			.host_unique_id = sdev->host->unique_id
-		};
-		if (copy_to_user(arg, &v, sizeof(struct scsi_idlun)))
-			return -EFAULT;
-		return 0;
-	}
+	case SG_GET_VERSION_NUM:
+		return sg_get_version(arg);
+	case SG_SET_TIMEOUT:
+		return sg_set_timeout(sdev, arg);
+	case SG_GET_TIMEOUT:
+		return jiffies_to_clock_t(sdev->sg_timeout);
+	case SG_GET_RESERVED_SIZE:
+		return sg_get_reserved_size(sdev, arg);
+	case SG_SET_RESERVED_SIZE:
+		return sg_set_reserved_size(sdev, arg);
+	case SG_EMULATED_HOST:
+		return sg_emulated_host(q, arg);
+	case SG_IO:
+		return scsi_ioctl_sg_io(sdev, disk, mode, arg);
+	case SCSI_IOCTL_SEND_COMMAND:
+		return sg_scsi_ioctl(q, disk, mode, arg);
+	case CDROM_SEND_PACKET:
+		return scsi_cdrom_send_packet(sdev, disk, mode, arg);
+	case CDROMCLOSETRAY:
+		return scsi_send_start_stop(sdev, 3);
+	case CDROMEJECT:
+		return scsi_send_start_stop(sdev, 2);
+	case SCSI_IOCTL_GET_IDLUN:
+		return scsi_get_idlun(sdev, arg);
 	case SCSI_IOCTL_GET_BUS_NUMBER:
 		return put_user(sdev->host->host_no, (int __user *)arg);
 	case SCSI_IOCTL_PROBE_HOST:
 		return ioctl_probe(sdev->host, arg);
-	case SCSI_IOCTL_SEND_COMMAND:
-		if (!capable(CAP_SYS_ADMIN) || !capable(CAP_SYS_RAWIO))
-			return -EACCES;
-		return sg_scsi_ioctl(sdev->request_queue, NULL, 0, arg);
 	case SCSI_IOCTL_DOORLOCK:
 		return scsi_set_medium_removal(sdev, SCSI_REMOVAL_PREVENT);
 	case SCSI_IOCTL_DOORUNLOCK:
@@ -240,66 +948,27 @@ static int scsi_ioctl_common(struct scsi_device *sdev, int cmd, void __user *arg
 		return scsi_test_unit_ready(sdev, IOCTL_NORMAL_TIMEOUT,
 					    NORMAL_RETRIES, &sense_hdr);
 	case SCSI_IOCTL_START_UNIT:
-		scsi_cmd[0] = START_STOP;
-		scsi_cmd[1] = 0;
-		scsi_cmd[2] = scsi_cmd[3] = scsi_cmd[5] = 0;
-		scsi_cmd[4] = 1;
-		return ioctl_internal_command(sdev, scsi_cmd,
-				     START_STOP_TIMEOUT, NORMAL_RETRIES);
+		return scsi_send_start_stop(sdev, 1);
 	case SCSI_IOCTL_STOP_UNIT:
-		scsi_cmd[0] = START_STOP;
-		scsi_cmd[1] = 0;
-		scsi_cmd[2] = scsi_cmd[3] = scsi_cmd[5] = 0;
-		scsi_cmd[4] = 0;
-		return ioctl_internal_command(sdev, scsi_cmd,
-				     START_STOP_TIMEOUT, NORMAL_RETRIES);
+		return scsi_send_start_stop(sdev, 0);
         case SCSI_IOCTL_GET_PCI:
                 return scsi_ioctl_get_pci(sdev, arg);
 	case SG_SCSI_RESET:
 		return scsi_ioctl_reset(sdev, arg);
 	}
-	return -ENOIOCTLCMD;
-}
-
-/**
- * scsi_ioctl - Dispatch ioctl to scsi device
- * @sdev: scsi device receiving ioctl
- * @cmd: which ioctl is it
- * @arg: data associated with ioctl
- *
- * Description: The scsi_ioctl() function differs from most ioctls in that it
- * does not take a major/minor number as the dev field.  Rather, it takes
- * a pointer to a &struct scsi_device.
- */
-int scsi_ioctl(struct scsi_device *sdev, int cmd, void __user *arg)
-{
-	int ret = scsi_ioctl_common(sdev, cmd, arg);
-
-	if (ret != -ENOIOCTLCMD)
-		return ret;
-
-	if (sdev->host->hostt->ioctl)
-		return sdev->host->hostt->ioctl(sdev, cmd, arg);
-
-	return -EINVAL;
-}
-EXPORT_SYMBOL(scsi_ioctl);
 
 #ifdef CONFIG_COMPAT
-int scsi_compat_ioctl(struct scsi_device *sdev, int cmd, void __user *arg)
-{
-	int ret = scsi_ioctl_common(sdev, cmd, arg);
-
-	if (ret != -ENOIOCTLCMD)
-		return ret;
-
-	if (sdev->host->hostt->compat_ioctl)
+	if (in_compat_syscall()) {
+		if (!sdev->host->hostt->compat_ioctl)
+			return -EINVAL;
 		return sdev->host->hostt->compat_ioctl(sdev, cmd, arg);
-
-	return ret;
-}
-EXPORT_SYMBOL(scsi_compat_ioctl);
+	}
 #endif
+	if (!sdev->host->hostt->ioctl)
+		return -EINVAL;
+	return sdev->host->hostt->ioctl(sdev, cmd, arg);
+}
+EXPORT_SYMBOL(scsi_ioctl);
 
 /*
  * We can process a reset even when a device isn't fully operable.
diff --git a/drivers/scsi/scsi_lib.c b/drivers/scsi/scsi_lib.c
index 7456a26aef51..572673873ddf 100644
--- a/drivers/scsi/scsi_lib.c
+++ b/drivers/scsi/scsi_lib.c
@@ -119,13 +119,15 @@ scsi_set_blocked(struct scsi_cmnd *cmd, int reason)
 
 static void scsi_mq_requeue_cmd(struct scsi_cmnd *cmd)
 {
-	if (cmd->request->rq_flags & RQF_DONTPREP) {
-		cmd->request->rq_flags &= ~RQF_DONTPREP;
+	struct request *rq = scsi_cmd_to_rq(cmd);
+
+	if (rq->rq_flags & RQF_DONTPREP) {
+		rq->rq_flags &= ~RQF_DONTPREP;
 		scsi_mq_uninit_cmd(cmd);
 	} else {
 		WARN_ON_ONCE(true);
 	}
-	blk_mq_requeue_request(cmd->request, true);
+	blk_mq_requeue_request(rq, true);
 }
 
 /**
@@ -164,7 +166,7 @@ static void __scsi_queue_insert(struct scsi_cmnd *cmd, int reason, bool unbusy)
 	 */
 	cmd->result = 0;
 
-	blk_mq_requeue_request(cmd->request, true);
+	blk_mq_requeue_request(scsi_cmd_to_rq(cmd), true);
 }
 
 /**
@@ -478,7 +480,7 @@ void scsi_run_host_queues(struct Scsi_Host *shost)
 
 static void scsi_uninit_cmd(struct scsi_cmnd *cmd)
 {
-	if (!blk_rq_is_passthrough(cmd->request)) {
+	if (!blk_rq_is_passthrough(scsi_cmd_to_rq(cmd))) {
 		struct scsi_driver *drv = scsi_cmd_to_driver(cmd);
 
 		if (drv->uninit_command)
@@ -624,7 +626,7 @@ static void scsi_io_completion_reprep(struct scsi_cmnd *cmd,
 
 static bool scsi_cmd_runtime_exceeced(struct scsi_cmnd *cmd)
 {
-	struct request *req = cmd->request;
+	struct request *req = scsi_cmd_to_rq(cmd);
 	unsigned long wait_for;
 
 	if (cmd->allowed == SCSI_CMD_RETRIES_NO_LIMIT)
@@ -643,7 +645,7 @@ static bool scsi_cmd_runtime_exceeced(struct scsi_cmnd *cmd)
 static void scsi_io_completion_action(struct scsi_cmnd *cmd, int result)
 {
 	struct request_queue *q = cmd->device->request_queue;
-	struct request *req = cmd->request;
+	struct request *req = scsi_cmd_to_rq(cmd);
 	int level = 0;
 	enum {ACTION_FAIL, ACTION_REPREP, ACTION_RETRY,
 	      ACTION_DELAYED_RETRY} action;
@@ -818,7 +820,7 @@ static int scsi_io_completion_nz_result(struct scsi_cmnd *cmd, int result,
 {
 	bool sense_valid;
 	bool sense_current = true;	/* false implies "deferred sense" */
-	struct request *req = cmd->request;
+	struct request *req = scsi_cmd_to_rq(cmd);
 	struct scsi_sense_hdr sshdr;
 
 	sense_valid = scsi_command_normalize_sense(cmd, &sshdr);
@@ -907,7 +909,7 @@ void scsi_io_completion(struct scsi_cmnd *cmd, unsigned int good_bytes)
 {
 	int result = cmd->result;
 	struct request_queue *q = cmd->device->request_queue;
-	struct request *req = cmd->request;
+	struct request *req = scsi_cmd_to_rq(cmd);
 	blk_status_t blk_stat = BLK_STS_OK;
 
 	if (unlikely(result))	/* a nz result may or may not be an error */
@@ -978,7 +980,7 @@ static inline bool scsi_cmd_needs_dma_drain(struct scsi_device *sdev,
 blk_status_t scsi_alloc_sgtables(struct scsi_cmnd *cmd)
 {
 	struct scsi_device *sdev = cmd->device;
-	struct request *rq = cmd->request;
+	struct request *rq = scsi_cmd_to_rq(cmd);
 	unsigned short nr_segs = blk_rq_nr_phys_segments(rq);
 	struct scatterlist *last_sg = NULL;
 	blk_status_t ret;
@@ -1083,8 +1085,13 @@ EXPORT_SYMBOL(scsi_alloc_sgtables);
 static void scsi_initialize_rq(struct request *rq)
 {
 	struct scsi_cmnd *cmd = blk_mq_rq_to_pdu(rq);
+	struct scsi_request *req = &cmd->req;
+
+	memset(req->__cmd, 0, sizeof(req->__cmd));
+	req->cmd = req->__cmd;
+	req->cmd_len = BLK_MAX_CDB;
+	req->sense_len = 0;
 
-	scsi_req_init(&cmd->req);
 	init_rcu_head(&cmd->rcu);
 	cmd->jiffies_at_alloc = jiffies;
 	cmd->retries = 0;
@@ -1107,7 +1114,7 @@ void scsi_init_command(struct scsi_device *dev, struct scsi_cmnd *cmd)
 {
 	void *buf = cmd->sense_buffer;
 	void *prot = cmd->prot_sdb;
-	struct request *rq = blk_mq_rq_from_pdu(cmd);
+	struct request *rq = scsi_cmd_to_rq(cmd);
 	unsigned int flags = cmd->flags & SCMD_PRESERVED_FLAGS;
 	unsigned long jiffies_at_alloc;
 	int retries, to_clear;
@@ -1533,8 +1540,6 @@ static blk_status_t scsi_prepare_cmd(struct request *req)
 
 	scsi_init_command(sdev, cmd);
 
-	cmd->request = req;
-	cmd->tag = req->tag;
 	cmd->prot_op = SCSI_PROT_NORMAL;
 	if (blk_rq_bytes(req))
 		cmd->sc_data_direction = rq_dma_dir(req);
@@ -1572,12 +1577,12 @@ static blk_status_t scsi_prepare_cmd(struct request *req)
 
 static void scsi_mq_done(struct scsi_cmnd *cmd)
 {
-	if (unlikely(blk_should_fake_timeout(cmd->request->q)))
+	if (unlikely(blk_should_fake_timeout(scsi_cmd_to_rq(cmd)->q)))
 		return;
 	if (unlikely(test_and_set_bit(SCMD_STATE_COMPLETE, &cmd->state)))
 		return;
 	trace_scsi_dispatch_cmd_done(cmd);
-	blk_mq_complete_request(cmd->request);
+	blk_mq_complete_request(scsi_cmd_to_rq(cmd));
 }
 
 static void scsi_mq_put_budget(struct request_queue *q, int budget_token)
diff --git a/drivers/scsi/scsi_logging.c b/drivers/scsi/scsi_logging.c
index 2317717935e9..ed9572252a42 100644
--- a/drivers/scsi/scsi_logging.c
+++ b/drivers/scsi/scsi_logging.c
@@ -28,8 +28,9 @@ static void scsi_log_release_buffer(char *bufptr)
 
 static inline const char *scmd_name(const struct scsi_cmnd *scmd)
 {
-	return scmd->request->rq_disk ?
-		scmd->request->rq_disk->disk_name : NULL;
+	struct request *rq = scsi_cmd_to_rq((struct scsi_cmnd *)scmd);
+
+	return rq->rq_disk ? rq->rq_disk->disk_name : NULL;
 }
 
 static size_t sdev_format_header(char *logbuf, size_t logbuf_len,
@@ -91,7 +92,7 @@ void scmd_printk(const char *level, const struct scsi_cmnd *scmd,
 	if (!logbuf)
 		return;
 	off = sdev_format_header(logbuf, logbuf_len, scmd_name(scmd),
-				 scmd->request->tag);
+				 scsi_cmd_to_rq((struct scsi_cmnd *)scmd)->tag);
 	if (off < logbuf_len) {
 		va_start(args, fmt);
 		off += vscnprintf(logbuf + off, logbuf_len - off, fmt, args);
@@ -188,7 +189,7 @@ void scsi_print_command(struct scsi_cmnd *cmd)
 		return;
 
 	off = sdev_format_header(logbuf, logbuf_len,
-				 scmd_name(cmd), cmd->request->tag);
+				 scmd_name(cmd), scsi_cmd_to_rq(cmd)->tag);
 	if (off >= logbuf_len)
 		goto out_printk;
 	off += scnprintf(logbuf + off, logbuf_len - off, "CDB: ");
@@ -210,7 +211,7 @@ void scsi_print_command(struct scsi_cmnd *cmd)
 
 			off = sdev_format_header(logbuf, logbuf_len,
 						 scmd_name(cmd),
-						 cmd->request->tag);
+						 scsi_cmd_to_rq(cmd)->tag);
 			if (!WARN_ON(off > logbuf_len - 58)) {
 				off += scnprintf(logbuf + off, logbuf_len - off,
 						 "CDB[%02x]: ", k);
@@ -373,7 +374,8 @@ EXPORT_SYMBOL(__scsi_print_sense);
 /* Normalize and print sense buffer in SCSI command */
 void scsi_print_sense(const struct scsi_cmnd *cmd)
 {
-	scsi_log_print_sense(cmd->device, scmd_name(cmd), cmd->request->tag,
+	scsi_log_print_sense(cmd->device, scmd_name(cmd),
+			     scsi_cmd_to_rq((struct scsi_cmnd *)cmd)->tag,
 			     cmd->sense_buffer, SCSI_SENSE_BUFFERSIZE);
 }
 EXPORT_SYMBOL(scsi_print_sense);
@@ -391,8 +393,8 @@ void scsi_print_result(const struct scsi_cmnd *cmd, const char *msg,
 	if (!logbuf)
 		return;
 
-	off = sdev_format_header(logbuf, logbuf_len,
-				 scmd_name(cmd), cmd->request->tag);
+	off = sdev_format_header(logbuf, logbuf_len, scmd_name(cmd),
+				 scsi_cmd_to_rq((struct scsi_cmnd *)cmd)->tag);
 
 	if (off >= logbuf_len)
 		goto out_printk;
diff --git a/drivers/scsi/scsi_priv.h b/drivers/scsi/scsi_priv.h
index eae2235f79b5..6d9152031a40 100644
--- a/drivers/scsi/scsi_priv.h
+++ b/drivers/scsi/scsi_priv.h
@@ -7,6 +7,7 @@
 #include <scsi/scsi_device.h>
 #include <linux/sbitmap.h>
 
+struct bsg_device;
 struct request_queue;
 struct request;
 struct scsi_cmnd;
@@ -180,6 +181,8 @@ static inline void scsi_dh_add_device(struct scsi_device *sdev) { }
 static inline void scsi_dh_release_device(struct scsi_device *sdev) { }
 #endif
 
+struct bsg_device *scsi_bsg_register_queue(struct scsi_device *sdev);
+
 extern int scsi_device_max_queue_depth(struct scsi_device *sdev);
 
 /* 
diff --git a/drivers/scsi/scsi_scan.c b/drivers/scsi/scsi_scan.c
index 5b6996a2401b..fe22191522a3 100644
--- a/drivers/scsi/scsi_scan.c
+++ b/drivers/scsi/scsi_scan.c
@@ -267,6 +267,8 @@ static struct scsi_device *scsi_alloc_sdev(struct scsi_target *starget,
 	 */
 	sdev->borken = 1;
 
+	sdev->sg_reserved_size = INT_MAX;
+
 	q = blk_mq_init_queue(&sdev->host->tag_set);
 	if (IS_ERR(q)) {
 		/* release fn is set up in scsi_sysfs_device_initialise, so
@@ -974,6 +976,9 @@ static int scsi_add_lun(struct scsi_device *sdev, unsigned char *inq_result,
 	if (*bflags & BLIST_UNMAP_LIMIT_WS)
 		sdev->unmap_limit_for_ws = 1;
 
+	if (*bflags & BLIST_IGN_MEDIA_CHANGE)
+		sdev->ignore_media_change = 1;
+
 	sdev->eh_timeout = SCSI_DEFAULT_EH_TIMEOUT;
 
 	if (*bflags & BLIST_TRY_VPD_PAGES)
diff --git a/drivers/scsi/scsi_sysfs.c b/drivers/scsi/scsi_sysfs.c
index c0d31119d6d7..86793259e541 100644
--- a/drivers/scsi/scsi_sysfs.c
+++ b/drivers/scsi/scsi_sysfs.c
@@ -13,6 +13,7 @@
 #include <linux/blkdev.h>
 #include <linux/device.h>
 #include <linux/pm_runtime.h>
+#include <linux/bsg.h>
 
 #include <scsi/scsi.h>
 #include <scsi/scsi_device.h>
@@ -1333,7 +1334,6 @@ static int scsi_target_add(struct scsi_target *starget)
 int scsi_sysfs_add_sdev(struct scsi_device *sdev)
 {
 	int error, i;
-	struct request_queue *rq = sdev->request_queue;
 	struct scsi_target *starget = sdev->sdev_target;
 
 	error = scsi_target_add(starget);
@@ -1372,12 +1372,19 @@ int scsi_sysfs_add_sdev(struct scsi_device *sdev)
 	transport_add_device(&sdev->sdev_gendev);
 	sdev->is_visible = 1;
 
-	error = bsg_scsi_register_queue(rq, &sdev->sdev_gendev);
-	if (error)
-		/* we're treating error on bsg register as non-fatal,
-		 * so pretend nothing went wrong */
-		sdev_printk(KERN_INFO, sdev,
-			    "Failed to register bsg queue, errno=%d\n", error);
+	if (IS_ENABLED(CONFIG_BLK_DEV_BSG)) {
+		sdev->bsg_dev = scsi_bsg_register_queue(sdev);
+		if (IS_ERR(sdev->bsg_dev)) {
+			/*
+			 * We're treating error on bsg register as non-fatal, so
+			 * pretend nothing went wrong.
+			 */
+			sdev_printk(KERN_INFO, sdev,
+				    "Failed to register bsg queue, errno=%d\n",
+				    error);
+			sdev->bsg_dev = NULL;
+		}
+	}
 
 	/* add additional host specific attributes */
 	if (sdev->host->hostt->sdev_attrs) {
@@ -1439,7 +1446,8 @@ void __scsi_remove_device(struct scsi_device *sdev)
 			sysfs_remove_groups(&sdev->sdev_gendev.kobj,
 					sdev->host->hostt->sdev_groups);
 
-		bsg_unregister_queue(sdev->request_queue);
+		if (IS_ENABLED(CONFIG_BLK_DEV_BSG) && sdev->bsg_dev)
+			bsg_unregister_queue(sdev->bsg_dev);
 		device_unregister(&sdev->sdev_dev);
 		transport_remove_device(dev);
 		device_del(dev);
diff --git a/drivers/scsi/scsi_transport_fc.c b/drivers/scsi/scsi_transport_fc.c
index 49748cd817a5..60e406bcf42a 100644
--- a/drivers/scsi/scsi_transport_fc.c
+++ b/drivers/scsi/scsi_transport_fc.c
@@ -3804,7 +3804,7 @@ bool fc_eh_should_retry_cmd(struct scsi_cmnd *scmd)
 	struct fc_rport *rport = starget_to_rport(scsi_target(scmd->device));
 
 	if ((rport->port_state != FC_PORTSTATE_ONLINE) &&
-		(scmd->request->cmd_flags & REQ_FAILFAST_TRANSPORT)) {
+		(scsi_cmd_to_rq(scmd)->cmd_flags & REQ_FAILFAST_TRANSPORT)) {
 		set_host_byte(scmd, DID_TRANSPORT_MARGINAL);
 		return false;
 	}
diff --git a/drivers/scsi/scsi_transport_spi.c b/drivers/scsi/scsi_transport_spi.c
index 5af7a10e9514..bd72c38d7bfc 100644
--- a/drivers/scsi/scsi_transport_spi.c
+++ b/drivers/scsi/scsi_transport_spi.c
@@ -1230,7 +1230,7 @@ int spi_populate_tag_msg(unsigned char *msg, struct scsi_cmnd *cmd)
 {
         if (cmd->flags & SCMD_TAGGED) {
 		*msg++ = SIMPLE_QUEUE_TAG;
-        	*msg++ = cmd->request->tag;
+		*msg++ = scsi_cmd_to_rq(cmd)->tag;
         	return 2;
 	}
 
diff --git a/drivers/scsi/sd.c b/drivers/scsi/sd.c
index 610ebba0d66e..cbd9999f93a6 100644
--- a/drivers/scsi/sd.c
+++ b/drivers/scsi/sd.c
@@ -110,6 +110,7 @@ static void sd_shutdown(struct device *);
 static int sd_suspend_system(struct device *);
 static int sd_suspend_runtime(struct device *);
 static int sd_resume(struct device *);
+static int sd_resume_runtime(struct device *);
 static void sd_rescan(struct device *);
 static blk_status_t sd_init_command(struct scsi_cmnd *SCpnt);
 static void sd_uninit_command(struct scsi_cmnd *SCpnt);
@@ -605,7 +606,7 @@ static const struct dev_pm_ops sd_pm_ops = {
 	.poweroff		= sd_suspend_system,
 	.restore		= sd_resume,
 	.runtime_suspend	= sd_suspend_runtime,
-	.runtime_resume		= sd_resume,
+	.runtime_resume		= sd_resume_runtime,
 };
 
 static struct scsi_driver sd_template = {
@@ -776,8 +777,9 @@ static unsigned int sd_prot_flag_mask(unsigned int prot_op)
 static unsigned char sd_setup_protect_cmnd(struct scsi_cmnd *scmd,
 					   unsigned int dix, unsigned int dif)
 {
-	struct bio *bio = scmd->request->bio;
-	unsigned int prot_op = sd_prot_op(rq_data_dir(scmd->request), dix, dif);
+	struct request *rq = scsi_cmd_to_rq(scmd);
+	struct bio *bio = rq->bio;
+	unsigned int prot_op = sd_prot_op(rq_data_dir(rq), dix, dif);
 	unsigned int protect = 0;
 
 	if (dix) {				/* DIX Type 0, 1, 2, 3 */
@@ -868,7 +870,7 @@ static void sd_config_discard(struct scsi_disk *sdkp, unsigned int mode)
 static blk_status_t sd_setup_unmap_cmnd(struct scsi_cmnd *cmd)
 {
 	struct scsi_device *sdp = cmd->device;
-	struct request *rq = cmd->request;
+	struct request *rq = scsi_cmd_to_rq(cmd);
 	struct scsi_disk *sdkp = scsi_disk(rq->rq_disk);
 	u64 lba = sectors_to_logical(sdp, blk_rq_pos(rq));
 	u32 nr_blocks = sectors_to_logical(sdp, blk_rq_sectors(rq));
@@ -904,7 +906,7 @@ static blk_status_t sd_setup_write_same16_cmnd(struct scsi_cmnd *cmd,
 		bool unmap)
 {
 	struct scsi_device *sdp = cmd->device;
-	struct request *rq = cmd->request;
+	struct request *rq = scsi_cmd_to_rq(cmd);
 	struct scsi_disk *sdkp = scsi_disk(rq->rq_disk);
 	u64 lba = sectors_to_logical(sdp, blk_rq_pos(rq));
 	u32 nr_blocks = sectors_to_logical(sdp, blk_rq_sectors(rq));
@@ -936,7 +938,7 @@ static blk_status_t sd_setup_write_same10_cmnd(struct scsi_cmnd *cmd,
 		bool unmap)
 {
 	struct scsi_device *sdp = cmd->device;
-	struct request *rq = cmd->request;
+	struct request *rq = scsi_cmd_to_rq(cmd);
 	struct scsi_disk *sdkp = scsi_disk(rq->rq_disk);
 	u64 lba = sectors_to_logical(sdp, blk_rq_pos(rq));
 	u32 nr_blocks = sectors_to_logical(sdp, blk_rq_sectors(rq));
@@ -966,7 +968,7 @@ static blk_status_t sd_setup_write_same10_cmnd(struct scsi_cmnd *cmd,
 
 static blk_status_t sd_setup_write_zeroes_cmnd(struct scsi_cmnd *cmd)
 {
-	struct request *rq = cmd->request;
+	struct request *rq = scsi_cmd_to_rq(cmd);
 	struct scsi_device *sdp = cmd->device;
 	struct scsi_disk *sdkp = scsi_disk(rq->rq_disk);
 	u64 lba = sectors_to_logical(sdp, blk_rq_pos(rq));
@@ -1063,7 +1065,7 @@ out:
  **/
 static blk_status_t sd_setup_write_same_cmnd(struct scsi_cmnd *cmd)
 {
-	struct request *rq = cmd->request;
+	struct request *rq = scsi_cmd_to_rq(cmd);
 	struct scsi_device *sdp = cmd->device;
 	struct scsi_disk *sdkp = scsi_disk(rq->rq_disk);
 	struct bio *bio = rq->bio;
@@ -1112,7 +1114,7 @@ static blk_status_t sd_setup_write_same_cmnd(struct scsi_cmnd *cmd)
 
 static blk_status_t sd_setup_flush_cmnd(struct scsi_cmnd *cmd)
 {
-	struct request *rq = cmd->request;
+	struct request *rq = scsi_cmd_to_rq(cmd);
 	struct scsi_disk *sdkp = scsi_disk(rq->rq_disk);
 
 	/* flush requests don't perform I/O, zero the S/G table */
@@ -1210,7 +1212,7 @@ static blk_status_t sd_setup_rw6_cmnd(struct scsi_cmnd *cmd, bool write,
 
 static blk_status_t sd_setup_read_write_cmnd(struct scsi_cmnd *cmd)
 {
-	struct request *rq = cmd->request;
+	struct request *rq = scsi_cmd_to_rq(cmd);
 	struct scsi_device *sdp = cmd->device;
 	struct scsi_disk *sdkp = scsi_disk(rq->rq_disk);
 	sector_t lba = sectors_to_logical(sdp, blk_rq_pos(rq));
@@ -1324,7 +1326,7 @@ fail:
 
 static blk_status_t sd_init_command(struct scsi_cmnd *cmd)
 {
-	struct request *rq = cmd->request;
+	struct request *rq = scsi_cmd_to_rq(cmd);
 
 	switch (req_op(rq)) {
 	case REQ_OP_DISCARD:
@@ -1370,7 +1372,7 @@ static blk_status_t sd_init_command(struct scsi_cmnd *cmd)
 
 static void sd_uninit_command(struct scsi_cmnd *SCpnt)
 {
-	struct request *rq = SCpnt->request;
+	struct request *rq = scsi_cmd_to_rq(SCpnt);
 	u8 *cmnd;
 
 	if (rq->rq_flags & RQF_SPECIAL_PAYLOAD)
@@ -1530,11 +1532,11 @@ static int sd_getgeo(struct block_device *bdev, struct hd_geometry *geo)
 }
 
 /**
- *	sd_ioctl_common - process an ioctl
+ *	sd_ioctl - process an ioctl
  *	@bdev: target block device
  *	@mode: FMODE_* mask
  *	@cmd: ioctl command number
- *	@p: this is third argument given to ioctl(2) system call.
+ *	@arg: this is third argument given to ioctl(2) system call.
  *	Often contains a pointer.
  *
  *	Returns 0 if successful (some ioctls return positive numbers on
@@ -1543,20 +1545,20 @@ static int sd_getgeo(struct block_device *bdev, struct hd_geometry *geo)
  *	Note: most ioctls are forward onto the block subsystem or further
  *	down in the scsi subsystem.
  **/
-static int sd_ioctl_common(struct block_device *bdev, fmode_t mode,
-			   unsigned int cmd, void __user *p)
+static int sd_ioctl(struct block_device *bdev, fmode_t mode,
+		    unsigned int cmd, unsigned long arg)
 {
 	struct gendisk *disk = bdev->bd_disk;
 	struct scsi_disk *sdkp = scsi_disk(disk);
 	struct scsi_device *sdp = sdkp->device;
+	void __user *p = (void __user *)arg;
 	int error;
     
 	SCSI_LOG_IOCTL(1, sd_printk(KERN_INFO, sdkp, "sd_ioctl: disk=%s, "
 				    "cmd=0x%x\n", disk->disk_name, cmd));
 
-	error = scsi_verify_blk_ioctl(bdev, cmd);
-	if (error < 0)
-		return error;
+	if (bdev_is_partition(bdev) && !capable(CAP_SYS_RAWIO))
+		return -ENOIOCTLCMD;
 
 	/*
 	 * If we are in the middle of error recovery, don't let anyone
@@ -1567,27 +1569,11 @@ static int sd_ioctl_common(struct block_device *bdev, fmode_t mode,
 	error = scsi_ioctl_block_when_processing_errors(sdp, cmd,
 			(mode & FMODE_NDELAY) != 0);
 	if (error)
-		goto out;
+		return error;
 
 	if (is_sed_ioctl(cmd))
 		return sed_ioctl(sdkp->opal_dev, cmd, p);
-
-	/*
-	 * Send SCSI addressing ioctls directly to mid level, send other
-	 * ioctls to block level and then onto mid level if they can't be
-	 * resolved.
-	 */
-	switch (cmd) {
-		case SCSI_IOCTL_GET_IDLUN:
-		case SCSI_IOCTL_GET_BUS_NUMBER:
-			error = scsi_ioctl(sdp, cmd, p);
-			break;
-		default:
-			error = scsi_cmd_blk_ioctl(bdev, mode, cmd, p);
-			break;
-	}
-out:
-	return error;
+	return scsi_ioctl(sdp, disk, mode, cmd, p);
 }
 
 static void set_media_not_present(struct scsi_disk *sdkp)
@@ -1770,34 +1756,6 @@ static void sd_rescan(struct device *dev)
 	sd_revalidate_disk(sdkp->disk);
 }
 
-static int sd_ioctl(struct block_device *bdev, fmode_t mode,
-		    unsigned int cmd, unsigned long arg)
-{
-	void __user *p = (void __user *)arg;
-	int ret;
-
-	ret = sd_ioctl_common(bdev, mode, cmd, p);
-	if (ret != -ENOTTY)
-		return ret;
-
-	return scsi_ioctl(scsi_disk(bdev->bd_disk)->device, cmd, p);
-}
-
-#ifdef CONFIG_COMPAT
-static int sd_compat_ioctl(struct block_device *bdev, fmode_t mode,
-			   unsigned int cmd, unsigned long arg)
-{
-	void __user *p = compat_ptr(arg);
-	int ret;
-
-	ret = sd_ioctl_common(bdev, mode, cmd, p);
-	if (ret != -ENOTTY)
-		return ret;
-
-	return scsi_compat_ioctl(scsi_disk(bdev->bd_disk)->device, cmd, p);
-}
-#endif
-
 static char sd_pr_type(enum pr_type type)
 {
 	switch (type) {
@@ -1898,9 +1856,7 @@ static const struct block_device_operations sd_fops = {
 	.release		= sd_release,
 	.ioctl			= sd_ioctl,
 	.getgeo			= sd_getgeo,
-#ifdef CONFIG_COMPAT
-	.compat_ioctl		= sd_compat_ioctl,
-#endif
+	.compat_ioctl		= blkdev_compat_ptr_ioctl,
 	.check_events		= sd_check_events,
 	.unlock_native_capacity	= sd_unlock_native_capacity,
 	.report_zones		= sd_zbc_report_zones,
@@ -1921,7 +1877,7 @@ static const struct block_device_operations sd_fops = {
  **/
 static void sd_eh_reset(struct scsi_cmnd *scmd)
 {
-	struct scsi_disk *sdkp = scsi_disk(scmd->request->rq_disk);
+	struct scsi_disk *sdkp = scsi_disk(scsi_cmd_to_rq(scmd)->rq_disk);
 
 	/* New SCSI EH run, reset gate variable */
 	sdkp->ignore_medium_access_errors = false;
@@ -1941,7 +1897,7 @@ static void sd_eh_reset(struct scsi_cmnd *scmd)
  **/
 static int sd_eh_action(struct scsi_cmnd *scmd, int eh_disp)
 {
-	struct scsi_disk *sdkp = scsi_disk(scmd->request->rq_disk);
+	struct scsi_disk *sdkp = scsi_disk(scsi_cmd_to_rq(scmd)->rq_disk);
 	struct scsi_device *sdev = scmd->device;
 
 	if (!scsi_device_online(sdev) ||
@@ -1982,7 +1938,7 @@ static int sd_eh_action(struct scsi_cmnd *scmd, int eh_disp)
 
 static unsigned int sd_completed_bytes(struct scsi_cmnd *scmd)
 {
-	struct request *req = scmd->request;
+	struct request *req = scsi_cmd_to_rq(scmd);
 	struct scsi_device *sdev = scmd->device;
 	unsigned int transferred, good_bytes;
 	u64 start_lba, end_lba, bad_lba;
@@ -2037,8 +1993,8 @@ static int sd_done(struct scsi_cmnd *SCpnt)
 	unsigned int sector_size = SCpnt->device->sector_size;
 	unsigned int resid;
 	struct scsi_sense_hdr sshdr;
-	struct scsi_disk *sdkp = scsi_disk(SCpnt->request->rq_disk);
-	struct request *req = SCpnt->request;
+	struct request *req = scsi_cmd_to_rq(SCpnt);
+	struct scsi_disk *sdkp = scsi_disk(req->rq_disk);
 	int sense_valid = 0;
 	int sense_deferred = 0;
 
@@ -2181,8 +2137,10 @@ sd_spinup_disk(struct scsi_disk *sdkp)
 			 * doesn't have any media in it, don't bother
 			 * with any more polling.
 			 */
-			if (media_not_present(sdkp, &sshdr))
+			if (media_not_present(sdkp, &sshdr)) {
+				sd_printk(KERN_NOTICE, sdkp, "Media removed, stopped polling\n");
 				return;
+			}
 
 			if (the_result)
 				sense_valid = scsi_sense_valid(&sshdr);
@@ -3718,6 +3676,25 @@ static int sd_resume(struct device *dev)
 	return ret;
 }
 
+static int sd_resume_runtime(struct device *dev)
+{
+	struct scsi_disk *sdkp = dev_get_drvdata(dev);
+	struct scsi_device *sdp = sdkp->device;
+
+	if (sdp->ignore_media_change) {
+		/* clear the device's sense data */
+		static const u8 cmd[10] = { REQUEST_SENSE };
+
+		if (scsi_execute(sdp, cmd, DMA_NONE, NULL, 0, NULL,
+				 NULL, sdp->request_queue->rq_timeout, 1, 0,
+				 RQF_PM, NULL))
+			sd_printk(KERN_NOTICE, sdkp,
+				  "Failed to clear sense data\n");
+	}
+
+	return sd_resume(dev);
+}
+
 /**
  *	init_sd - entry point for this driver (both when built in or when
  *	a module).
diff --git a/drivers/scsi/sd_zbc.c b/drivers/scsi/sd_zbc.c
index 186b5ff52c3a..b9757f24b0d6 100644
--- a/drivers/scsi/sd_zbc.c
+++ b/drivers/scsi/sd_zbc.c
@@ -243,7 +243,7 @@ out:
 
 static blk_status_t sd_zbc_cmnd_checks(struct scsi_cmnd *cmd)
 {
-	struct request *rq = cmd->request;
+	struct request *rq = scsi_cmd_to_rq(cmd);
 	struct scsi_disk *sdkp = scsi_disk(rq->rq_disk);
 	sector_t sector = blk_rq_pos(rq);
 
@@ -321,7 +321,7 @@ static void sd_zbc_update_wp_offset_workfn(struct work_struct *work)
 blk_status_t sd_zbc_prepare_zone_append(struct scsi_cmnd *cmd, sector_t *lba,
 					unsigned int nr_blocks)
 {
-	struct request *rq = cmd->request;
+	struct request *rq = scsi_cmd_to_rq(cmd);
 	struct scsi_disk *sdkp = scsi_disk(rq->rq_disk);
 	unsigned int wp_offset, zno = blk_rq_zone_no(rq);
 	unsigned long flags;
@@ -386,7 +386,7 @@ blk_status_t sd_zbc_prepare_zone_append(struct scsi_cmnd *cmd, sector_t *lba,
 blk_status_t sd_zbc_setup_zone_mgmt_cmnd(struct scsi_cmnd *cmd,
 					 unsigned char op, bool all)
 {
-	struct request *rq = cmd->request;
+	struct request *rq = scsi_cmd_to_rq(cmd);
 	sector_t sector = blk_rq_pos(rq);
 	struct scsi_disk *sdkp = scsi_disk(rq->rq_disk);
 	sector_t block = sectors_to_logical(sdkp->device, sector);
@@ -442,7 +442,7 @@ static unsigned int sd_zbc_zone_wp_update(struct scsi_cmnd *cmd,
 					  unsigned int good_bytes)
 {
 	int result = cmd->result;
-	struct request *rq = cmd->request;
+	struct request *rq = scsi_cmd_to_rq(cmd);
 	struct scsi_disk *sdkp = scsi_disk(rq->rq_disk);
 	unsigned int zno = blk_rq_zone_no(rq);
 	enum req_opf op = req_op(rq);
@@ -516,7 +516,7 @@ unsigned int sd_zbc_complete(struct scsi_cmnd *cmd, unsigned int good_bytes,
 		     struct scsi_sense_hdr *sshdr)
 {
 	int result = cmd->result;
-	struct request *rq = cmd->request;
+	struct request *rq = scsi_cmd_to_rq(cmd);
 
 	if (op_is_zone_mgmt(req_op(rq)) &&
 	    result &&
diff --git a/drivers/scsi/sg.c b/drivers/scsi/sg.c
index d5889b4f0fd4..8f05248920e8 100644
--- a/drivers/scsi/sg.c
+++ b/drivers/scsi/sg.c
@@ -237,8 +237,9 @@ static int sg_allow_access(struct file *filp, unsigned char *cmd)
 
 	if (sfp->parentdp->device->type == TYPE_SCANNER)
 		return 0;
-
-	return blk_verify_command(cmd, filp->f_mode);
+	if (!scsi_cmd_allowed(cmd, filp->f_mode))
+		return -EPERM;
+	return 0;
 }
 
 static int
@@ -1107,7 +1108,7 @@ sg_ioctl_common(struct file *filp, Sg_device *sdp, Sg_fd *sfp,
 	case SCSI_IOCTL_SEND_COMMAND:
 		if (atomic_read(&sdp->detaching))
 			return -ENODEV;
-		return sg_scsi_ioctl(sdp->device->request_queue, NULL, filp->f_mode, p);
+		return scsi_ioctl(sdp->device, NULL, filp->f_mode, cmd_in, p);
 	case SG_SET_DEBUG:
 		result = get_user(val, ip);
 		if (result)
@@ -1163,28 +1164,8 @@ sg_ioctl(struct file *filp, unsigned int cmd_in, unsigned long arg)
 	ret = sg_ioctl_common(filp, sdp, sfp, cmd_in, p);
 	if (ret != -ENOIOCTLCMD)
 		return ret;
-
-	return scsi_ioctl(sdp->device, cmd_in, p);
-}
-
-#ifdef CONFIG_COMPAT
-static long sg_compat_ioctl(struct file *filp, unsigned int cmd_in, unsigned long arg)
-{
-	void __user *p = compat_ptr(arg);
-	Sg_device *sdp;
-	Sg_fd *sfp;
-	int ret;
-
-	if ((!(sfp = (Sg_fd *) filp->private_data)) || (!(sdp = sfp->parentdp)))
-		return -ENXIO;
-
-	ret = sg_ioctl_common(filp, sdp, sfp, cmd_in, p);
-	if (ret != -ENOIOCTLCMD)
-		return ret;
-
-	return scsi_compat_ioctl(sdp->device, cmd_in, p);
+	return scsi_ioctl(sdp->device, NULL, filp->f_mode, cmd_in, p);
 }
-#endif
 
 static __poll_t
 sg_poll(struct file *filp, poll_table * wait)
@@ -1439,9 +1420,7 @@ static const struct file_operations sg_fops = {
 	.write = sg_write,
 	.poll = sg_poll,
 	.unlocked_ioctl = sg_ioctl,
-#ifdef CONFIG_COMPAT
-	.compat_ioctl = sg_compat_ioctl,
-#endif
+	.compat_ioctl = compat_ptr_ioctl,
 	.open = sg_open,
 	.mmap = sg_mmap,
 	.release = sg_release,
diff --git a/drivers/scsi/smartpqi/Kconfig b/drivers/scsi/smartpqi/Kconfig
index cb9e4e968b60..6f83e2df4d64 100644
--- a/drivers/scsi/smartpqi/Kconfig
+++ b/drivers/scsi/smartpqi/Kconfig
@@ -1,7 +1,7 @@
 #
 # Kernel configuration file for the SMARTPQI
 #
-# Copyright (c) 2019-2020 Microchip Technology Inc. and its subsidiaries
+# Copyright (c) 2019-2021 Microchip Technology Inc. and its subsidiaries
 # Copyright (c) 2017-2018 Microsemi Corporation
 # Copyright (c) 2016 Microsemi Corporation
 # Copyright (c) 2016 PMC-Sierra, Inc.
@@ -38,14 +38,14 @@
 # HEREUNDER, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGES
 
 config SCSI_SMARTPQI
-	tristate "Microsemi PQI Driver"
+	tristate "Microchip PQI Driver"
 	depends on PCI && SCSI && !S390
 	select SCSI_SAS_ATTRS
 	select RAID_ATTRS
 	help
-	This driver supports Microsemi PQI controllers.
+	This driver supports Microchip PQI controllers.
 
-	<http://www.microsemi.com>
+	<http://www.microchip.com>
 
 	To compile this driver as a module, choose M here: the
 	module will be called smartpqi.
diff --git a/drivers/scsi/smartpqi/smartpqi.h b/drivers/scsi/smartpqi/smartpqi.h
index d7dac5572274..70eca203d72f 100644
--- a/drivers/scsi/smartpqi/smartpqi.h
+++ b/drivers/scsi/smartpqi/smartpqi.h
@@ -1,7 +1,7 @@
 /* SPDX-License-Identifier: GPL-2.0 */
 /*
- *    driver for Microsemi PQI-based storage controllers
- *    Copyright (c) 2019-2020 Microchip Technology Inc. and its subsidiaries
+ *    driver for Microchip PQI-based storage controllers
+ *    Copyright (c) 2019-2021 Microchip Technology Inc. and its subsidiaries
  *    Copyright (c) 2016-2018 Microsemi Corporation
  *    Copyright (c) 2016 PMC-Sierra, Inc.
  *
@@ -59,7 +59,7 @@ struct pqi_device_registers {
 /*
  * controller registers
  *
- * These are defined by the Microsemi implementation.
+ * These are defined by the Microchip implementation.
  *
  * Some registers (those named sis_*) are only used when in
  * legacy SIS mode before we transition the controller into
@@ -415,7 +415,7 @@ struct pqi_event_config {
 	u8	reserved[2];
 	u8	num_event_descriptors;
 	u8	reserved1;
-	struct pqi_event_descriptor descriptors[1];
+	struct pqi_event_descriptor descriptors[];
 };
 
 #define PQI_MAX_EVENT_DESCRIPTORS	255
diff --git a/drivers/scsi/smartpqi/smartpqi_init.c b/drivers/scsi/smartpqi/smartpqi_init.c
index dcc0b9618a64..ecb2af3f43ca 100644
--- a/drivers/scsi/smartpqi/smartpqi_init.c
+++ b/drivers/scsi/smartpqi/smartpqi_init.c
@@ -1,7 +1,7 @@
 // SPDX-License-Identifier: GPL-2.0
 /*
- *    driver for Microsemi PQI-based storage controllers
- *    Copyright (c) 2019-2020 Microchip Technology Inc. and its subsidiaries
+ *    driver for Microchip PQI-based storage controllers
+ *    Copyright (c) 2019-2021 Microchip Technology Inc. and its subsidiaries
  *    Copyright (c) 2016-2018 Microsemi Corporation
  *    Copyright (c) 2016 PMC-Sierra, Inc.
  *
@@ -33,13 +33,13 @@
 #define BUILD_TIMESTAMP
 #endif
 
-#define DRIVER_VERSION		"2.1.8-045"
+#define DRIVER_VERSION		"2.1.10-020"
 #define DRIVER_MAJOR		2
 #define DRIVER_MINOR		1
-#define DRIVER_RELEASE		8
-#define DRIVER_REVISION		45
+#define DRIVER_RELEASE		10
+#define DRIVER_REVISION		20
 
-#define DRIVER_NAME		"Microsemi PQI Driver (v" \
+#define DRIVER_NAME		"Microchip SmartPQI Driver (v" \
 				DRIVER_VERSION BUILD_TIMESTAMP ")"
 #define DRIVER_NAME_SHORT	"smartpqi"
 
@@ -48,8 +48,8 @@
 #define PQI_POST_RESET_DELAY_SECS			5
 #define PQI_POST_OFA_RESET_DELAY_UPON_TIMEOUT_SECS	10
 
-MODULE_AUTHOR("Microsemi");
-MODULE_DESCRIPTION("Driver for Microsemi Smart Family Controller version "
+MODULE_AUTHOR("Microchip");
+MODULE_DESCRIPTION("Driver for Microchip Smart Family Controller version "
 	DRIVER_VERSION);
 MODULE_VERSION(DRIVER_VERSION);
 MODULE_LICENSE("GPL");
@@ -1322,6 +1322,7 @@ static int pqi_get_raid_map(struct pqi_ctrl_info *ctrl_info,
 				"requested %u bytes, received %u bytes\n",
 				raid_map_size,
 				get_unaligned_le32(&raid_map->structure_size));
+			rc = -EINVAL;
 			goto error;
 		}
 	}
@@ -4740,8 +4741,7 @@ static int pqi_create_queues(struct pqi_ctrl_info *ctrl_info)
 }
 
 #define PQI_REPORT_EVENT_CONFIG_BUFFER_LENGTH	\
-	(offsetof(struct pqi_event_config, descriptors) + \
-	(PQI_MAX_EVENT_DESCRIPTORS * sizeof(struct pqi_event_descriptor)))
+	struct_size((struct pqi_event_config *)0, descriptors, PQI_MAX_EVENT_DESCRIPTORS)
 
 static int pqi_configure_events(struct pqi_ctrl_info *ctrl_info,
 	bool enable_events)
@@ -5568,7 +5568,7 @@ static inline u16 pqi_get_hw_queue(struct pqi_ctrl_info *ctrl_info,
 {
 	u16 hw_queue;
 
-	hw_queue = blk_mq_unique_tag_to_hwq(blk_mq_unique_tag(scmd->request));
+	hw_queue = blk_mq_unique_tag_to_hwq(blk_mq_unique_tag(scsi_cmd_to_rq(scmd)));
 	if (hw_queue > ctrl_info->max_hw_queue_index)
 		hw_queue = 0;
 
@@ -5577,7 +5577,7 @@ static inline u16 pqi_get_hw_queue(struct pqi_ctrl_info *ctrl_info,
 
 static inline bool pqi_is_bypass_eligible_request(struct scsi_cmnd *scmd)
 {
-	if (blk_rq_is_passthrough(scmd->request))
+	if (blk_rq_is_passthrough(scsi_cmd_to_rq(scmd)))
 		return false;
 
 	return scmd->SCp.this_residual == 0;
@@ -6033,8 +6033,10 @@ static int pqi_eh_device_reset_handler(struct scsi_cmnd *scmd)
 	mutex_lock(&ctrl_info->lun_reset_mutex);
 
 	dev_err(&ctrl_info->pci_dev->dev,
-		"resetting scsi %d:%d:%d:%d\n",
-		shost->host_no, device->bus, device->target, device->lun);
+		"resetting scsi %d:%d:%d:%d due to cmd 0x%02x\n",
+		shost->host_no,
+		device->bus, device->target, device->lun,
+		scmd->cmd_len > 0 ? scmd->cmnd[0] : 0xff);
 
 	pqi_check_ctrl_health(ctrl_info);
 	if (pqi_ctrl_offline(ctrl_info))
@@ -7758,11 +7760,11 @@ static int pqi_ctrl_init(struct pqi_ctrl_info *ctrl_info)
 
 	pqi_init_operational_queues(ctrl_info);
 
-	rc = pqi_request_irqs(ctrl_info);
+	rc = pqi_create_queues(ctrl_info);
 	if (rc)
 		return rc;
 
-	rc = pqi_create_queues(ctrl_info);
+	rc = pqi_request_irqs(ctrl_info);
 	if (rc)
 		return rc;
 
@@ -8451,7 +8453,7 @@ static void pqi_print_ctrl_info(struct pci_dev *pci_dev,
 	if (id->driver_data)
 		ctrl_description = (char *)id->driver_data;
 	else
-		ctrl_description = "Microsemi Smart Family Controller";
+		ctrl_description = "Microchip Smart Family Controller";
 
 	dev_info(&pci_dev->dev, "%s found\n", ctrl_description);
 }
@@ -8713,6 +8715,14 @@ static const struct pci_device_id pqi_pci_id_table[] = {
 	},
 	{
 		PCI_DEVICE_SUB(PCI_VENDOR_ID_ADAPTEC2, 0x028f,
+			       0x193d, 0x1108)
+	},
+	{
+		PCI_DEVICE_SUB(PCI_VENDOR_ID_ADAPTEC2, 0x028f,
+			       0x193d, 0x1109)
+	},
+	{
+		PCI_DEVICE_SUB(PCI_VENDOR_ID_ADAPTEC2, 0x028f,
 			       0x193d, 0x8460)
 	},
 	{
@@ -9173,6 +9183,34 @@ static const struct pci_device_id pqi_pci_id_table[] = {
 	},
 	{
 		PCI_DEVICE_SUB(PCI_VENDOR_ID_ADAPTEC2, 0x028f,
+			       0x1dfc, 0x3161)
+	},
+	{
+		PCI_DEVICE_SUB(PCI_VENDOR_ID_ADAPTEC2, 0x028f,
+			       0x1cf2, 0x5445)
+	},
+	{
+		PCI_DEVICE_SUB(PCI_VENDOR_ID_ADAPTEC2, 0x028f,
+			       0x1cf2, 0x5446)
+	},
+	{
+		PCI_DEVICE_SUB(PCI_VENDOR_ID_ADAPTEC2, 0x028f,
+			       0x1cf2, 0x5447)
+	},
+	{
+		PCI_DEVICE_SUB(PCI_VENDOR_ID_ADAPTEC2, 0x028f,
+			       0x1cf2, 0x0b27)
+	},
+	{
+		PCI_DEVICE_SUB(PCI_VENDOR_ID_ADAPTEC2, 0x028f,
+			       0x1cf2, 0x0b29)
+	},
+	{
+		PCI_DEVICE_SUB(PCI_VENDOR_ID_ADAPTEC2, 0x028f,
+			       0x1cf2, 0x0b45)
+	},
+	{
+		PCI_DEVICE_SUB(PCI_VENDOR_ID_ADAPTEC2, 0x028f,
 			       PCI_ANY_ID, PCI_ANY_ID)
 	},
 	{ 0 }
diff --git a/drivers/scsi/smartpqi/smartpqi_sas_transport.c b/drivers/scsi/smartpqi/smartpqi_sas_transport.c
index dd628cc87f78..afd9bafebd1d 100644
--- a/drivers/scsi/smartpqi/smartpqi_sas_transport.c
+++ b/drivers/scsi/smartpqi/smartpqi_sas_transport.c
@@ -1,7 +1,7 @@
 // SPDX-License-Identifier: GPL-2.0
 /*
- *    driver for Microsemi PQI-based storage controllers
- *    Copyright (c) 2019-2020 Microchip Technology Inc. and its subsidiaries
+ *    driver for Microchip PQI-based storage controllers
+ *    Copyright (c) 2019-2021 Microchip Technology Inc. and its subsidiaries
  *    Copyright (c) 2016-2018 Microsemi Corporation
  *    Copyright (c) 2016 PMC-Sierra, Inc.
  *
diff --git a/drivers/scsi/smartpqi/smartpqi_sis.c b/drivers/scsi/smartpqi/smartpqi_sis.c
index c954620628e0..d63c46a8e38b 100644
--- a/drivers/scsi/smartpqi/smartpqi_sis.c
+++ b/drivers/scsi/smartpqi/smartpqi_sis.c
@@ -1,7 +1,7 @@
 // SPDX-License-Identifier: GPL-2.0
 /*
- *    driver for Microsemi PQI-based storage controllers
- *    Copyright (c) 2019-2020 Microchip Technology Inc. and its subsidiaries
+ *    driver for Microchip PQI-based storage controllers
+ *    Copyright (c) 2019-2021 Microchip Technology Inc. and its subsidiaries
  *    Copyright (c) 2016-2018 Microsemi Corporation
  *    Copyright (c) 2016 PMC-Sierra, Inc.
  *
diff --git a/drivers/scsi/smartpqi/smartpqi_sis.h b/drivers/scsi/smartpqi/smartpqi_sis.h
index 12cd2ab1aead..d29c1352a826 100644
--- a/drivers/scsi/smartpqi/smartpqi_sis.h
+++ b/drivers/scsi/smartpqi/smartpqi_sis.h
@@ -1,7 +1,7 @@
 /* SPDX-License-Identifier: GPL-2.0 */
 /*
- *    driver for Microsemi PQI-based storage controllers
- *    Copyright (c) 2019-2020 Microchip Technology Inc. and its subsidiaries
+ *    driver for Microchip PQI-based storage controllers
+ *    Copyright (c) 2019-2021 Microchip Technology Inc. and its subsidiaries
  *    Copyright (c) 2016-2018 Microsemi Corporation
  *    Copyright (c) 2016 PMC-Sierra, Inc.
  *
diff --git a/drivers/scsi/snic/snic_scsi.c b/drivers/scsi/snic/snic_scsi.c
index 6dd0ff188bb4..43a950185e24 100644
--- a/drivers/scsi/snic/snic_scsi.c
+++ b/drivers/scsi/snic/snic_scsi.c
@@ -33,7 +33,7 @@
 #include "snic_io.h"
 #include "snic.h"
 
-#define snic_cmd_tag(sc)	(((struct scsi_cmnd *) sc)->request->tag)
+#define snic_cmd_tag(sc)	(scsi_cmd_to_rq(sc)->tag)
 
 const char *snic_state_str[] = {
 	[SNIC_INIT]	= "SNIC_INIT",
@@ -1636,7 +1636,7 @@ snic_abort_cmd(struct scsi_cmnd *sc)
 	u32 start_time = jiffies;
 
 	SNIC_SCSI_DBG(snic->shost, "abt_cmd:sc %p :0x%x :req = %p :tag = %d\n",
-		       sc, sc->cmnd[0], sc->request, tag);
+		       sc, sc->cmnd[0], scsi_cmd_to_rq(sc), tag);
 
 	if (unlikely(snic_get_state(snic) != SNIC_ONLINE)) {
 		SNIC_HOST_ERR(snic->shost,
@@ -2152,7 +2152,7 @@ snic_device_reset(struct scsi_cmnd *sc)
 	int dr_supp = 0;
 
 	SNIC_SCSI_DBG(shost, "dev_reset:sc %p :0x%x :req = %p :tag = %d\n",
-		      sc, sc->cmnd[0], sc->request,
+		      sc, sc->cmnd[0], scsi_cmd_to_rq(sc),
 		      snic_cmd_tag(sc));
 	dr_supp = snic_dev_reset_supported(sc->device);
 	if (!dr_supp) {
@@ -2335,7 +2335,7 @@ snic_reset(struct Scsi_Host *shost, struct scsi_cmnd *sc)
 	spin_lock_irqsave(&snic->snic_lock, flags);
 	if (snic_get_state(snic) == SNIC_FWRESET) {
 		spin_unlock_irqrestore(&snic->snic_lock, flags);
-		SNIC_HOST_INFO(shost, "reset:prev reset is in progres\n");
+		SNIC_HOST_INFO(shost, "reset:prev reset is in progress\n");
 
 		msleep(SNIC_HOST_RESET_TIMEOUT);
 		ret = SUCCESS;
@@ -2383,11 +2383,11 @@ snic_host_reset(struct scsi_cmnd *sc)
 {
 	struct Scsi_Host *shost = sc->device->host;
 	u32 start_time  = jiffies;
-	int ret = FAILED;
+	int ret;
 
 	SNIC_SCSI_DBG(shost,
 		      "host reset:sc %p sc_cmd 0x%x req %p tag %d flags 0x%llx\n",
-		      sc, sc->cmnd[0], sc->request,
+		      sc, sc->cmnd[0], scsi_cmd_to_rq(sc),
 		      snic_cmd_tag(sc), CMD_FLAGS(sc));
 
 	ret = snic_reset(shost, sc);
@@ -2494,7 +2494,7 @@ cleanup:
 		sc->result = DID_TRANSPORT_DISRUPTED << 16;
 		SNIC_HOST_INFO(snic->shost,
 			       "sc_clean: DID_TRANSPORT_DISRUPTED for sc %p, Tag %d flags 0x%llx rqi %p duration %u msecs\n",
-			       sc, sc->request->tag, CMD_FLAGS(sc), rqi,
+			       sc, scsi_cmd_to_rq(sc)->tag, CMD_FLAGS(sc), rqi,
 			       jiffies_to_msecs(jiffies - st_time));
 
 		/* Update IO stats */
diff --git a/drivers/scsi/sr.c b/drivers/scsi/sr.c
index 2942a4ec9bdd..8b17b35283aa 100644
--- a/drivers/scsi/sr.c
+++ b/drivers/scsi/sr.c
@@ -122,6 +122,8 @@ static void get_capabilities(struct scsi_cd *);
 static unsigned int sr_check_events(struct cdrom_device_info *cdi,
 				    unsigned int clearing, int slot);
 static int sr_packet(struct cdrom_device_info *, struct packet_command *);
+static int sr_read_cdda_bpc(struct cdrom_device_info *cdi, void __user *ubuf,
+		u32 lba, u32 nr, u8 *last_sense);
 
 static const struct cdrom_device_ops sr_dops = {
 	.open			= sr_open,
@@ -135,8 +137,9 @@ static const struct cdrom_device_ops sr_dops = {
 	.get_mcn		= sr_get_mcn,
 	.reset			= sr_reset,
 	.audio_ioctl		= sr_audio_ioctl,
-	.capability		= SR_CAPABILITIES,
 	.generic_packet		= sr_packet,
+	.read_cdda_bpc		= sr_read_cdda_bpc,
+	.capability		= SR_CAPABILITIES,
 };
 
 static void sr_kref_release(struct kref *kref);
@@ -330,7 +333,8 @@ static int sr_done(struct scsi_cmnd *SCpnt)
 	int good_bytes = (result == 0 ? this_count : 0);
 	int block_sectors = 0;
 	long error_sector;
-	struct scsi_cd *cd = scsi_cd(SCpnt->request->rq_disk);
+	struct request *rq = scsi_cmd_to_rq(SCpnt);
+	struct scsi_cd *cd = scsi_cd(rq->rq_disk);
 
 #ifdef DEBUG
 	scmd_printk(KERN_INFO, SCpnt, "done: %x\n", result);
@@ -352,16 +356,14 @@ static int sr_done(struct scsi_cmnd *SCpnt)
 				break;
 			error_sector =
 				get_unaligned_be32(&SCpnt->sense_buffer[3]);
-			if (SCpnt->request->bio != NULL)
-				block_sectors =
-					bio_sectors(SCpnt->request->bio);
+			if (rq->bio != NULL)
+				block_sectors = bio_sectors(rq->bio);
 			if (block_sectors < 4)
 				block_sectors = 4;
 			if (cd->device->sector_size == 2048)
 				error_sector <<= 2;
 			error_sector &= ~(block_sectors - 1);
-			good_bytes = (error_sector -
-				      blk_rq_pos(SCpnt->request)) << 9;
+			good_bytes = (error_sector - blk_rq_pos(rq)) << 9;
 			if (good_bytes < 0 || good_bytes >= this_count)
 				good_bytes = 0;
 			/*
@@ -393,7 +395,7 @@ static blk_status_t sr_init_command(struct scsi_cmnd *SCpnt)
 {
 	int block = 0, this_count, s_size;
 	struct scsi_cd *cd;
-	struct request *rq = SCpnt->request;
+	struct request *rq = scsi_cmd_to_rq(SCpnt);
 	blk_status_t ret;
 
 	ret = scsi_alloc_sgtables(SCpnt);
@@ -558,53 +560,14 @@ static void sr_block_release(struct gendisk *disk, fmode_t mode)
 static int sr_block_ioctl(struct block_device *bdev, fmode_t mode, unsigned cmd,
 			  unsigned long arg)
 {
-	struct scsi_cd *cd = scsi_cd(bdev->bd_disk);
+	struct gendisk *disk = bdev->bd_disk;
+	struct scsi_cd *cd = scsi_cd(disk);
 	struct scsi_device *sdev = cd->device;
 	void __user *argp = (void __user *)arg;
 	int ret;
 
-	mutex_lock(&cd->lock);
-
-	ret = scsi_ioctl_block_when_processing_errors(sdev, cmd,
-			(mode & FMODE_NDELAY) != 0);
-	if (ret)
-		goto out;
-
-	scsi_autopm_get_device(sdev);
-
-	/*
-	 * Send SCSI addressing ioctls directly to mid level, send other
-	 * ioctls to cdrom/block level.
-	 */
-	switch (cmd) {
-	case SCSI_IOCTL_GET_IDLUN:
-	case SCSI_IOCTL_GET_BUS_NUMBER:
-		ret = scsi_ioctl(sdev, cmd, argp);
-		goto put;
-	}
-
-	ret = cdrom_ioctl(&cd->cdi, bdev, mode, cmd, arg);
-	if (ret != -ENOSYS)
-		goto put;
-
-	ret = scsi_ioctl(sdev, cmd, argp);
-
-put:
-	scsi_autopm_put_device(sdev);
-
-out:
-	mutex_unlock(&cd->lock);
-	return ret;
-}
-
-#ifdef CONFIG_COMPAT
-static int sr_block_compat_ioctl(struct block_device *bdev, fmode_t mode, unsigned cmd,
-			  unsigned long arg)
-{
-	struct scsi_cd *cd = scsi_cd(bdev->bd_disk);
-	struct scsi_device *sdev = cd->device;
-	void __user *argp = compat_ptr(arg);
-	int ret;
+	if (bdev_is_partition(bdev) && !capable(CAP_SYS_RAWIO))
+		return -ENOIOCTLCMD;
 
 	mutex_lock(&cd->lock);
 
@@ -615,32 +578,19 @@ static int sr_block_compat_ioctl(struct block_device *bdev, fmode_t mode, unsign
 
 	scsi_autopm_get_device(sdev);
 
-	/*
-	 * Send SCSI addressing ioctls directly to mid level, send other
-	 * ioctls to cdrom/block level.
-	 */
-	switch (cmd) {
-	case SCSI_IOCTL_GET_IDLUN:
-	case SCSI_IOCTL_GET_BUS_NUMBER:
-		ret = scsi_compat_ioctl(sdev, cmd, argp);
-		goto put;
+	if (ret != CDROMCLOSETRAY && ret != CDROMEJECT) {
+		ret = cdrom_ioctl(&cd->cdi, bdev, mode, cmd, arg);
+		if (ret != -ENOSYS)
+			goto put;
 	}
-
-	ret = cdrom_ioctl(&cd->cdi, bdev, mode, cmd, (unsigned long)argp);
-	if (ret != -ENOSYS)
-		goto put;
-
-	ret = scsi_compat_ioctl(sdev, cmd, argp);
+	ret = scsi_ioctl(sdev, disk, mode, cmd, argp);
 
 put:
 	scsi_autopm_put_device(sdev);
-
 out:
 	mutex_unlock(&cd->lock);
 	return ret;
-
 }
-#endif
 
 static unsigned int sr_block_check_events(struct gendisk *disk,
 					  unsigned int clearing)
@@ -665,9 +615,7 @@ static const struct block_device_operations sr_bdops =
 	.open		= sr_block_open,
 	.release	= sr_block_release,
 	.ioctl		= sr_block_ioctl,
-#ifdef CONFIG_COMPAT
-	.compat_ioctl	= sr_block_compat_ioctl,
-#endif
+	.compat_ioctl	= blkdev_compat_ptr_ioctl,
 	.check_events	= sr_block_check_events,
 };
 
@@ -1008,6 +956,57 @@ static int sr_packet(struct cdrom_device_info *cdi,
 	return cgc->stat;
 }
 
+static int sr_read_cdda_bpc(struct cdrom_device_info *cdi, void __user *ubuf,
+		u32 lba, u32 nr, u8 *last_sense)
+{
+	struct gendisk *disk = cdi->disk;
+	u32 len = nr * CD_FRAMESIZE_RAW;
+	struct scsi_request *req;
+	struct request *rq;
+	struct bio *bio;
+	int ret;
+
+	rq = blk_get_request(disk->queue, REQ_OP_DRV_IN, 0);
+	if (IS_ERR(rq))
+		return PTR_ERR(rq);
+	req = scsi_req(rq);
+
+	ret = blk_rq_map_user(disk->queue, rq, NULL, ubuf, len, GFP_KERNEL);
+	if (ret)
+		goto out_put_request;
+
+	req->cmd[0] = GPCMD_READ_CD;
+	req->cmd[1] = 1 << 2;
+	req->cmd[2] = (lba >> 24) & 0xff;
+	req->cmd[3] = (lba >> 16) & 0xff;
+	req->cmd[4] = (lba >>  8) & 0xff;
+	req->cmd[5] = lba & 0xff;
+	req->cmd[6] = (nr >> 16) & 0xff;
+	req->cmd[7] = (nr >>  8) & 0xff;
+	req->cmd[8] = nr & 0xff;
+	req->cmd[9] = 0xf8;
+	req->cmd_len = 12;
+	rq->timeout = 60 * HZ;
+	bio = rq->bio;
+
+	blk_execute_rq(disk, rq, 0);
+	if (scsi_req(rq)->result) {
+		struct scsi_sense_hdr sshdr;
+
+		scsi_normalize_sense(req->sense, req->sense_len,
+				     &sshdr);
+		*last_sense = sshdr.sense_key;
+		ret = -EIO;
+	}
+
+	if (blk_rq_unmap_user(bio))
+		ret = -EFAULT;
+out_put_request:
+	blk_put_request(rq);
+	return ret;
+}
+
+
 /**
  *	sr_kref_release - Called to free the scsi_cd structure
  *	@kref: pointer to embedded kref
diff --git a/drivers/scsi/st.c b/drivers/scsi/st.c
index d1abc020f3c0..9d04929f03a1 100644
--- a/drivers/scsi/st.c
+++ b/drivers/scsi/st.c
@@ -3494,8 +3494,9 @@ out:
 
 
 /* The ioctl command */
-static long st_ioctl_common(struct file *file, unsigned int cmd_in, void __user *p)
+static long st_ioctl(struct file *file, unsigned int cmd_in, unsigned long arg)
 {
+	void __user *p = (void __user *)arg;
 	int i, cmd_nr, cmd_type, bt;
 	int retval = 0;
 	unsigned int blk;
@@ -3815,74 +3816,44 @@ static long st_ioctl_common(struct file *file, unsigned int cmd_in, void __user
 		goto out;
 	}
 	mutex_unlock(&STp->lock);
-	switch (cmd_in) {
-		case SCSI_IOCTL_STOP_UNIT:
-			/* unload */
-			retval = scsi_ioctl(STp->device, cmd_in, p);
-			if (!retval) {
-				STp->rew_at_close = 0;
-				STp->ready = ST_NO_TAPE;
-			}
-			return retval;
 
-		case SCSI_IOCTL_GET_IDLUN:
-		case SCSI_IOCTL_GET_BUS_NUMBER:
-			break;
+	switch (cmd_in) {
+	case SG_IO:
+	case SCSI_IOCTL_SEND_COMMAND:
+	case CDROM_SEND_PACKET:
+		if (!capable(CAP_SYS_RAWIO))
+			return -EPERM;
+	default:
+		break;
+	}
 
-		default:
-			if ((cmd_in == SG_IO ||
-			     cmd_in == SCSI_IOCTL_SEND_COMMAND ||
-			     cmd_in == CDROM_SEND_PACKET) &&
-			    !capable(CAP_SYS_RAWIO))
-				i = -EPERM;
-			else
-				i = scsi_cmd_ioctl(STp->device->request_queue,
-						   NULL, file->f_mode, cmd_in,
-						   p);
-			if (i != -ENOTTY)
-				return i;
-			break;
+	retval = scsi_ioctl(STp->device, NULL, file->f_mode, cmd_in, p);
+	if (!retval && cmd_in == SCSI_IOCTL_STOP_UNIT) {
+		/* unload */
+		STp->rew_at_close = 0;
+		STp->ready = ST_NO_TAPE;
 	}
-	return -ENOTTY;
+	return retval;
 
  out:
 	mutex_unlock(&STp->lock);
 	return retval;
 }
 
-static long st_ioctl(struct file *file, unsigned int cmd_in, unsigned long arg)
-{
-	void __user *p = (void __user *)arg;
-	struct scsi_tape *STp = file->private_data;
-	int ret;
-
-	ret = st_ioctl_common(file, cmd_in, p);
-	if (ret != -ENOTTY)
-		return ret;
-
-	return scsi_ioctl(STp->device, cmd_in, p);
-}
-
 #ifdef CONFIG_COMPAT
 static long st_compat_ioctl(struct file *file, unsigned int cmd_in, unsigned long arg)
 {
-	void __user *p = compat_ptr(arg);
-	struct scsi_tape *STp = file->private_data;
-	int ret;
-
 	/* argument conversion is handled using put_user_mtpos/put_user_mtget */
 	switch (cmd_in) {
 	case MTIOCPOS32:
-		return st_ioctl_common(file, MTIOCPOS, p);
+		cmd_in = MTIOCPOS;
+		break;
 	case MTIOCGET32:
-		return st_ioctl_common(file, MTIOCGET, p);
+		cmd_in = MTIOCGET;
+		break;
 	}
 
-	ret = st_ioctl_common(file, cmd_in, p);
-	if (ret != -ENOTTY)
-		return ret;
-
-	return scsi_compat_ioctl(STp->device, cmd_in, p);
+	return st_ioctl(file, cmd_in, arg);
 }
 #endif
 
diff --git a/drivers/scsi/stex.c b/drivers/scsi/stex.c
index 491b435273a6..f1ba7f5b52a8 100644
--- a/drivers/scsi/stex.c
+++ b/drivers/scsi/stex.c
@@ -540,7 +540,7 @@ stex_ss_send_cmd(struct st_hba *hba, struct req_msg *req, u16 tag)
 	msg_h = (struct st_msg_header *)req - 1;
 	if (likely(cmd)) {
 		msg_h->channel = (u8)cmd->device->channel;
-		msg_h->timeout = cpu_to_le16(cmd->request->timeout/HZ);
+		msg_h->timeout = cpu_to_le16(scsi_cmd_to_rq(cmd)->timeout / HZ);
 	}
 	addr = hba->dma_handle + hba->req_head * hba->rq_size;
 	addr += (hba->ccb[tag].sg_count+4)/11;
@@ -690,7 +690,7 @@ stex_queuecommand_lck(struct scsi_cmnd *cmd, void (*done)(struct scsi_cmnd *))
 
 	cmd->scsi_done = done;
 
-	tag = cmd->request->tag;
+	tag = scsi_cmd_to_rq(cmd)->tag;
 
 	if (unlikely(tag >= host->can_queue))
 		return SCSI_MLQUEUE_HOST_BUSY;
@@ -1246,7 +1246,7 @@ static int stex_abort(struct scsi_cmnd *cmd)
 {
 	struct Scsi_Host *host = cmd->device->host;
 	struct st_hba *hba = (struct st_hba *)host->hostdata;
-	u16 tag = cmd->request->tag;
+	u16 tag = scsi_cmd_to_rq(cmd)->tag;
 	void __iomem *base;
 	u32 data;
 	int result = SUCCESS;
diff --git a/drivers/scsi/storvsc_drv.c b/drivers/scsi/storvsc_drv.c
index 37506b3fe5a9..ebbbc1299c62 100644
--- a/drivers/scsi/storvsc_drv.c
+++ b/drivers/scsi/storvsc_drv.c
@@ -710,7 +710,7 @@ static u64 storvsc_next_request_id(struct vmbus_channel *channel, u64 rqst_addr)
 	 * Cannot return an ID of 0, which is reserved for an unsolicited
 	 * message from Hyper-V.
 	 */
-	return (u64)blk_mq_unique_tag(request->cmd->request) + 1;
+	return (u64)blk_mq_unique_tag(scsi_cmd_to_rq(request->cmd)) + 1;
 }
 
 static void handle_sc_creation(struct vmbus_channel *new_sc)
@@ -1211,7 +1211,7 @@ static void storvsc_on_io_completion(struct storvsc_device *stor_device,
 
 		storvsc_log(device, loglevel,
 			"tag#%d cmd 0x%x status: scsi 0x%x srb 0x%x hv 0x%x\n",
-			request->cmd->request->tag,
+			scsi_cmd_to_rq(request->cmd)->tag,
 			stor_pkt->vm_srb.cdb[0],
 			vstor_packet->vm_srb.scsi_status,
 			vstor_packet->vm_srb.srb_status,
diff --git a/drivers/scsi/sun3_scsi.c b/drivers/scsi/sun3_scsi.c
index 2e3fbc2fae97..f7f724a3ff1d 100644
--- a/drivers/scsi/sun3_scsi.c
+++ b/drivers/scsi/sun3_scsi.c
@@ -336,7 +336,7 @@ static int sun3scsi_dma_xfer_len(struct NCR5380_hostdata *hostdata,
 {
 	int wanted_len = cmd->SCp.this_residual;
 
-	if (wanted_len < DMA_MIN_SIZE || blk_rq_is_passthrough(cmd->request))
+	if (wanted_len < DMA_MIN_SIZE || blk_rq_is_passthrough(scsi_cmd_to_rq(cmd)))
 		return 0;
 
 	return wanted_len;
@@ -366,8 +366,9 @@ static inline int sun3scsi_dma_start(unsigned long count, unsigned char *data)
 }
 
 /* clean up after our dma is done */
-static int sun3scsi_dma_finish(int write_flag)
+static int sun3scsi_dma_finish(enum dma_data_direction data_dir)
 {
+	const bool write_flag = data_dir == DMA_TO_DEVICE;
 	unsigned short __maybe_unused count;
 	unsigned short fifo;
 	int ret = 0;
diff --git a/drivers/scsi/sym53c8xx_2/sym_glue.c b/drivers/scsi/sym53c8xx_2/sym_glue.c
index 16b65fc4405c..6d0b07b9cb31 100644
--- a/drivers/scsi/sym53c8xx_2/sym_glue.c
+++ b/drivers/scsi/sym53c8xx_2/sym_glue.c
@@ -500,8 +500,8 @@ static int sym53c8xx_queue_command_lck(struct scsi_cmnd *cmd,
 	 *  Shorten our settle_time if needed for 
 	 *  this command not to time out.
 	 */
-	if (np->s.settle_time_valid && cmd->request->timeout) {
-		unsigned long tlimit = jiffies + cmd->request->timeout;
+	if (np->s.settle_time_valid && scsi_cmd_to_rq(cmd)->timeout) {
+		unsigned long tlimit = jiffies + scsi_cmd_to_rq(cmd)->timeout;
 		tlimit -= SYM_CONF_TIMER_INTERVAL*2;
 		if (time_after(np->s.settle_time, tlimit)) {
 			np->s.settle_time = tlimit;
diff --git a/drivers/scsi/ufs/Kconfig b/drivers/scsi/ufs/Kconfig
index 2d137953e7b4..432df76e6318 100644
--- a/drivers/scsi/ufs/Kconfig
+++ b/drivers/scsi/ufs/Kconfig
@@ -183,3 +183,19 @@ config SCSI_UFS_CRYPTO
 	  Enabling this makes it possible for the kernel to use the crypto
 	  capabilities of the UFS device (if present) to perform crypto
 	  operations on data being transferred to/from the device.
+
+config SCSI_UFS_HPB
+	bool "Support UFS Host Performance Booster"
+	depends on SCSI_UFSHCD
+	help
+	  The UFS HPB feature improves random read performance. It caches
+	  L2P (logical to physical) map of UFS to host DRAM. The driver uses HPB
+	  read command by piggybacking physical page number for bypassing FTL (flash
+	  translation layer)'s L2P address translation.
+
+config SCSI_UFS_FAULT_INJECTION
+	bool "UFS Fault Injection Support"
+	depends on SCSI_UFSHCD && FAULT_INJECTION
+	help
+	  Enable fault injection support in the UFS driver. This makes it easier
+	  to test the UFS error handler and abort handler.
diff --git a/drivers/scsi/ufs/Makefile b/drivers/scsi/ufs/Makefile
index 06f3a3fe4a44..c407da9b5171 100644
--- a/drivers/scsi/ufs/Makefile
+++ b/drivers/scsi/ufs/Makefile
@@ -8,6 +8,8 @@ ufshcd-core-y				+= ufshcd.o ufs-sysfs.o
 ufshcd-core-$(CONFIG_DEBUG_FS)		+= ufs-debugfs.o
 ufshcd-core-$(CONFIG_SCSI_UFS_BSG)	+= ufs_bsg.o
 ufshcd-core-$(CONFIG_SCSI_UFS_CRYPTO)	+= ufshcd-crypto.o
+ufshcd-core-$(CONFIG_SCSI_UFS_HPB)	+= ufshpb.o
+ufshcd-core-$(CONFIG_SCSI_UFS_FAULT_INJECTION) += ufs-fault-injection.o
 
 obj-$(CONFIG_SCSI_UFS_DWC_TC_PCI) += tc-dwc-g210-pci.o ufshcd-dwc.o tc-dwc-g210.o
 obj-$(CONFIG_SCSI_UFS_DWC_TC_PLATFORM) += tc-dwc-g210-pltfrm.o ufshcd-dwc.o tc-dwc-g210.o
diff --git a/drivers/scsi/ufs/cdns-pltfrm.c b/drivers/scsi/ufs/cdns-pltfrm.c
index 908ff39c4856..7da8be2f35c4 100644
--- a/drivers/scsi/ufs/cdns-pltfrm.c
+++ b/drivers/scsi/ufs/cdns-pltfrm.c
@@ -318,11 +318,8 @@ static int cdns_ufs_pltfrm_remove(struct platform_device *pdev)
 }
 
 static const struct dev_pm_ops cdns_ufs_dev_pm_ops = {
-	.suspend         = ufshcd_pltfrm_suspend,
-	.resume          = ufshcd_pltfrm_resume,
-	.runtime_suspend = ufshcd_pltfrm_runtime_suspend,
-	.runtime_resume  = ufshcd_pltfrm_runtime_resume,
-	.runtime_idle    = ufshcd_pltfrm_runtime_idle,
+	SET_SYSTEM_SLEEP_PM_OPS(ufshcd_system_suspend, ufshcd_system_resume)
+	SET_RUNTIME_PM_OPS(ufshcd_runtime_suspend, ufshcd_runtime_resume, NULL)
 	.prepare	 = ufshcd_suspend_prepare,
 	.complete	 = ufshcd_resume_complete,
 };
diff --git a/drivers/scsi/ufs/tc-dwc-g210-pci.c b/drivers/scsi/ufs/tc-dwc-g210-pci.c
index ec4589afbc13..679289e1a78e 100644
--- a/drivers/scsi/ufs/tc-dwc-g210-pci.c
+++ b/drivers/scsi/ufs/tc-dwc-g210-pci.c
@@ -23,31 +23,6 @@ static int tc_type = TC_G210_INV;
 module_param(tc_type, int, 0);
 MODULE_PARM_DESC(tc_type, "Test Chip Type (20 = 20-bit, 40 = 40-bit)");
 
-static int tc_dwc_g210_pci_suspend(struct device *dev)
-{
-	return ufshcd_system_suspend(dev_get_drvdata(dev));
-}
-
-static int tc_dwc_g210_pci_resume(struct device *dev)
-{
-	return ufshcd_system_resume(dev_get_drvdata(dev));
-}
-
-static int tc_dwc_g210_pci_runtime_suspend(struct device *dev)
-{
-	return ufshcd_runtime_suspend(dev_get_drvdata(dev));
-}
-
-static int tc_dwc_g210_pci_runtime_resume(struct device *dev)
-{
-	return ufshcd_runtime_resume(dev_get_drvdata(dev));
-}
-
-static int tc_dwc_g210_pci_runtime_idle(struct device *dev)
-{
-	return ufshcd_runtime_idle(dev_get_drvdata(dev));
-}
-
 /*
  * struct ufs_hba_dwc_vops - UFS DWC specific variant operations
  */
@@ -143,11 +118,8 @@ tc_dwc_g210_pci_probe(struct pci_dev *pdev, const struct pci_device_id *id)
 }
 
 static const struct dev_pm_ops tc_dwc_g210_pci_pm_ops = {
-	.suspend	= tc_dwc_g210_pci_suspend,
-	.resume		= tc_dwc_g210_pci_resume,
-	.runtime_suspend = tc_dwc_g210_pci_runtime_suspend,
-	.runtime_resume  = tc_dwc_g210_pci_runtime_resume,
-	.runtime_idle    = tc_dwc_g210_pci_runtime_idle,
+	SET_SYSTEM_SLEEP_PM_OPS(ufshcd_system_suspend, ufshcd_system_resume)
+	SET_RUNTIME_PM_OPS(ufshcd_runtime_suspend, ufshcd_runtime_resume, NULL)
 	.prepare	 = ufshcd_suspend_prepare,
 	.complete	 = ufshcd_resume_complete,
 };
diff --git a/drivers/scsi/ufs/tc-dwc-g210-pltfrm.c b/drivers/scsi/ufs/tc-dwc-g210-pltfrm.c
index a1268e4f44d6..783ec43efa78 100644
--- a/drivers/scsi/ufs/tc-dwc-g210-pltfrm.c
+++ b/drivers/scsi/ufs/tc-dwc-g210-pltfrm.c
@@ -84,11 +84,8 @@ static int tc_dwc_g210_pltfm_remove(struct platform_device *pdev)
 }
 
 static const struct dev_pm_ops tc_dwc_g210_pltfm_pm_ops = {
-	.suspend	= ufshcd_pltfrm_suspend,
-	.resume		= ufshcd_pltfrm_resume,
-	.runtime_suspend = ufshcd_pltfrm_runtime_suspend,
-	.runtime_resume  = ufshcd_pltfrm_runtime_resume,
-	.runtime_idle    = ufshcd_pltfrm_runtime_idle,
+	SET_SYSTEM_SLEEP_PM_OPS(ufshcd_system_suspend, ufshcd_system_resume)
+	SET_RUNTIME_PM_OPS(ufshcd_runtime_suspend, ufshcd_runtime_resume, NULL)
 };
 
 static struct platform_driver tc_dwc_g210_pltfm_driver = {
diff --git a/drivers/scsi/ufs/ufs-exynos.c b/drivers/scsi/ufs/ufs-exynos.c
index cf46d6f86e0e..a14dd8ce56d4 100644
--- a/drivers/scsi/ufs/ufs-exynos.c
+++ b/drivers/scsi/ufs/ufs-exynos.c
@@ -260,7 +260,7 @@ static int exynos_ufs_get_clk_info(struct exynos_ufs *ufs)
 	struct ufs_hba *hba = ufs->hba;
 	struct list_head *head = &hba->clk_list_head;
 	struct ufs_clk_info *clki;
-	u32 pclk_rate;
+	unsigned long pclk_rate;
 	u32 f_min, f_max;
 	u8 div = 0;
 	int ret = 0;
@@ -299,7 +299,7 @@ static int exynos_ufs_get_clk_info(struct exynos_ufs *ufs)
 	}
 
 	if (unlikely(pclk_rate < f_min || pclk_rate > f_max)) {
-		dev_err(hba->dev, "not available pclk range %d\n", pclk_rate);
+		dev_err(hba->dev, "not available pclk range %lu\n", pclk_rate);
 		ret = -EINVAL;
 		goto out;
 	}
@@ -1287,11 +1287,8 @@ static const struct of_device_id exynos_ufs_of_match[] = {
 };
 
 static const struct dev_pm_ops exynos_ufs_pm_ops = {
-	.suspend	= ufshcd_pltfrm_suspend,
-	.resume		= ufshcd_pltfrm_resume,
-	.runtime_suspend = ufshcd_pltfrm_runtime_suspend,
-	.runtime_resume  = ufshcd_pltfrm_runtime_resume,
-	.runtime_idle    = ufshcd_pltfrm_runtime_idle,
+	SET_SYSTEM_SLEEP_PM_OPS(ufshcd_system_suspend, ufshcd_system_resume)
+	SET_RUNTIME_PM_OPS(ufshcd_runtime_suspend, ufshcd_runtime_resume, NULL)
 	.prepare	 = ufshcd_suspend_prepare,
 	.complete	 = ufshcd_resume_complete,
 };
diff --git a/drivers/scsi/ufs/ufs-exynos.h b/drivers/scsi/ufs/ufs-exynos.h
index 67505fe32ebf..dadf4fd10dd8 100644
--- a/drivers/scsi/ufs/ufs-exynos.h
+++ b/drivers/scsi/ufs/ufs-exynos.h
@@ -184,7 +184,7 @@ struct exynos_ufs {
 	u32 pclk_div;
 	u32 pclk_avail_min;
 	u32 pclk_avail_max;
-	u32 mclk_rate;
+	unsigned long mclk_rate;
 	int avail_ln_rx;
 	int avail_ln_tx;
 	int rx_sel_idx;
diff --git a/drivers/scsi/ufs/ufs-fault-injection.c b/drivers/scsi/ufs/ufs-fault-injection.c
new file mode 100644
index 000000000000..7ac7c4e7ff83
--- /dev/null
+++ b/drivers/scsi/ufs/ufs-fault-injection.c
@@ -0,0 +1,70 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
+
+#include <linux/kconfig.h>
+#include <linux/types.h>
+#include <linux/fault-inject.h>
+#include <linux/module.h>
+#include "ufs-fault-injection.h"
+
+static int ufs_fault_get(char *buffer, const struct kernel_param *kp);
+static int ufs_fault_set(const char *val, const struct kernel_param *kp);
+
+static const struct kernel_param_ops ufs_fault_ops = {
+	.get = ufs_fault_get,
+	.set = ufs_fault_set,
+};
+
+enum { FAULT_INJ_STR_SIZE = 80 };
+
+/*
+ * For more details about fault injection, please refer to
+ * Documentation/fault-injection/fault-injection.rst.
+ */
+static char g_trigger_eh_str[FAULT_INJ_STR_SIZE];
+module_param_cb(trigger_eh, &ufs_fault_ops, g_trigger_eh_str, 0644);
+MODULE_PARM_DESC(trigger_eh,
+	"Fault injection. trigger_eh=<interval>,<probability>,<space>,<times>");
+static DECLARE_FAULT_ATTR(ufs_trigger_eh_attr);
+
+static char g_timeout_str[FAULT_INJ_STR_SIZE];
+module_param_cb(timeout, &ufs_fault_ops, g_timeout_str, 0644);
+MODULE_PARM_DESC(timeout,
+	"Fault injection. timeout=<interval>,<probability>,<space>,<times>");
+static DECLARE_FAULT_ATTR(ufs_timeout_attr);
+
+static int ufs_fault_get(char *buffer, const struct kernel_param *kp)
+{
+	const char *fault_str = kp->arg;
+
+	return sysfs_emit(buffer, "%s\n", fault_str);
+}
+
+static int ufs_fault_set(const char *val, const struct kernel_param *kp)
+{
+	struct fault_attr *attr = NULL;
+
+	if (kp->arg == g_trigger_eh_str)
+		attr = &ufs_trigger_eh_attr;
+	else if (kp->arg == g_timeout_str)
+		attr = &ufs_timeout_attr;
+
+	if (WARN_ON_ONCE(!attr))
+		return -EINVAL;
+
+	if (!setup_fault_attr(attr, (char *)val))
+		return -EINVAL;
+
+	strlcpy(kp->arg, val, FAULT_INJ_STR_SIZE);
+
+	return 0;
+}
+
+bool ufs_trigger_eh(void)
+{
+	return should_fail(&ufs_trigger_eh_attr, 1);
+}
+
+bool ufs_fail_completion(void)
+{
+	return should_fail(&ufs_timeout_attr, 1);
+}
diff --git a/drivers/scsi/ufs/ufs-fault-injection.h b/drivers/scsi/ufs/ufs-fault-injection.h
new file mode 100644
index 000000000000..6d0cd8e10c87
--- /dev/null
+++ b/drivers/scsi/ufs/ufs-fault-injection.h
@@ -0,0 +1,24 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+
+#ifndef _UFS_FAULT_INJECTION_H
+#define _UFS_FAULT_INJECTION_H
+
+#include <linux/kconfig.h>
+#include <linux/types.h>
+
+#ifdef CONFIG_SCSI_UFS_FAULT_INJECTION
+bool ufs_trigger_eh(void);
+bool ufs_fail_completion(void);
+#else
+static inline bool ufs_trigger_eh(void)
+{
+	return false;
+}
+
+static inline bool ufs_fail_completion(void)
+{
+	return false;
+}
+#endif
+
+#endif /* _UFS_FAULT_INJECTION_H */
diff --git a/drivers/scsi/ufs/ufs-hisi.c b/drivers/scsi/ufs/ufs-hisi.c
index 5b147a48161b..6b706de8354b 100644
--- a/drivers/scsi/ufs/ufs-hisi.c
+++ b/drivers/scsi/ufs/ufs-hisi.c
@@ -572,11 +572,8 @@ static int ufs_hisi_remove(struct platform_device *pdev)
 }
 
 static const struct dev_pm_ops ufs_hisi_pm_ops = {
-	.suspend	= ufshcd_pltfrm_suspend,
-	.resume		= ufshcd_pltfrm_resume,
-	.runtime_suspend = ufshcd_pltfrm_runtime_suspend,
-	.runtime_resume  = ufshcd_pltfrm_runtime_resume,
-	.runtime_idle    = ufshcd_pltfrm_runtime_idle,
+	SET_SYSTEM_SLEEP_PM_OPS(ufshcd_system_suspend, ufshcd_system_resume)
+	SET_RUNTIME_PM_OPS(ufshcd_runtime_suspend, ufshcd_runtime_resume, NULL)
 	.prepare	 = ufshcd_suspend_prepare,
 	.complete	 = ufshcd_resume_complete,
 };
diff --git a/drivers/scsi/ufs/ufs-mediatek.c b/drivers/scsi/ufs/ufs-mediatek.c
index d2c251628a05..80b3545dd17d 100644
--- a/drivers/scsi/ufs/ufs-mediatek.c
+++ b/drivers/scsi/ufs/ufs-mediatek.c
@@ -1140,11 +1140,8 @@ static int ufs_mtk_remove(struct platform_device *pdev)
 }
 
 static const struct dev_pm_ops ufs_mtk_pm_ops = {
-	.suspend         = ufshcd_pltfrm_suspend,
-	.resume          = ufshcd_pltfrm_resume,
-	.runtime_suspend = ufshcd_pltfrm_runtime_suspend,
-	.runtime_resume  = ufshcd_pltfrm_runtime_resume,
-	.runtime_idle    = ufshcd_pltfrm_runtime_idle,
+	SET_SYSTEM_SLEEP_PM_OPS(ufshcd_system_suspend, ufshcd_system_resume)
+	SET_RUNTIME_PM_OPS(ufshcd_runtime_suspend, ufshcd_runtime_resume, NULL)
 	.prepare	 = ufshcd_suspend_prepare,
 	.complete	 = ufshcd_resume_complete,
 };
diff --git a/drivers/scsi/ufs/ufs-qcom.c b/drivers/scsi/ufs/ufs-qcom.c
index 9b1d18d7c9bb..9d9770f1db4f 100644
--- a/drivers/scsi/ufs/ufs-qcom.c
+++ b/drivers/scsi/ufs/ufs-qcom.c
@@ -1546,11 +1546,8 @@ MODULE_DEVICE_TABLE(acpi, ufs_qcom_acpi_match);
 #endif
 
 static const struct dev_pm_ops ufs_qcom_pm_ops = {
-	.suspend	= ufshcd_pltfrm_suspend,
-	.resume		= ufshcd_pltfrm_resume,
-	.runtime_suspend = ufshcd_pltfrm_runtime_suspend,
-	.runtime_resume  = ufshcd_pltfrm_runtime_resume,
-	.runtime_idle    = ufshcd_pltfrm_runtime_idle,
+	SET_SYSTEM_SLEEP_PM_OPS(ufshcd_system_suspend, ufshcd_system_resume)
+	SET_RUNTIME_PM_OPS(ufshcd_runtime_suspend, ufshcd_runtime_resume, NULL)
 	.prepare	 = ufshcd_suspend_prepare,
 	.complete	 = ufshcd_resume_complete,
 };
diff --git a/drivers/scsi/ufs/ufs-sysfs.c b/drivers/scsi/ufs/ufs-sysfs.c
index 52bd807f7940..5c405ff7b6ea 100644
--- a/drivers/scsi/ufs/ufs-sysfs.c
+++ b/drivers/scsi/ufs/ufs-sysfs.c
@@ -604,6 +604,8 @@ UFS_DEVICE_DESC_PARAM(device_version, _DEV_VER, 2);
 UFS_DEVICE_DESC_PARAM(number_of_secure_wpa, _NUM_SEC_WPA, 1);
 UFS_DEVICE_DESC_PARAM(psa_max_data_size, _PSA_MAX_DATA, 4);
 UFS_DEVICE_DESC_PARAM(psa_state_timeout, _PSA_TMT, 1);
+UFS_DEVICE_DESC_PARAM(hpb_version, _HPB_VER, 2);
+UFS_DEVICE_DESC_PARAM(hpb_control, _HPB_CONTROL, 1);
 UFS_DEVICE_DESC_PARAM(ext_feature_sup, _EXT_UFS_FEATURE_SUP, 4);
 UFS_DEVICE_DESC_PARAM(wb_presv_us_en, _WB_PRESRV_USRSPC_EN, 1);
 UFS_DEVICE_DESC_PARAM(wb_type, _WB_TYPE, 1);
@@ -636,6 +638,8 @@ static struct attribute *ufs_sysfs_device_descriptor[] = {
 	&dev_attr_number_of_secure_wpa.attr,
 	&dev_attr_psa_max_data_size.attr,
 	&dev_attr_psa_state_timeout.attr,
+	&dev_attr_hpb_version.attr,
+	&dev_attr_hpb_control.attr,
 	&dev_attr_ext_feature_sup.attr,
 	&dev_attr_wb_presv_us_en.attr,
 	&dev_attr_wb_type.attr,
@@ -709,6 +713,10 @@ UFS_GEOMETRY_DESC_PARAM(enh4_memory_max_alloc_units,
 	_ENM4_MAX_NUM_UNITS, 4);
 UFS_GEOMETRY_DESC_PARAM(enh4_memory_capacity_adjustment_factor,
 	_ENM4_CAP_ADJ_FCTR, 2);
+UFS_GEOMETRY_DESC_PARAM(hpb_region_size, _HPB_REGION_SIZE, 1);
+UFS_GEOMETRY_DESC_PARAM(hpb_number_lu, _HPB_NUMBER_LU, 1);
+UFS_GEOMETRY_DESC_PARAM(hpb_subregion_size, _HPB_SUBREGION_SIZE, 1);
+UFS_GEOMETRY_DESC_PARAM(hpb_max_active_regions, _HPB_MAX_ACTIVE_REGS, 2);
 UFS_GEOMETRY_DESC_PARAM(wb_max_alloc_units, _WB_MAX_ALLOC_UNITS, 4);
 UFS_GEOMETRY_DESC_PARAM(wb_max_wb_luns, _WB_MAX_WB_LUNS, 1);
 UFS_GEOMETRY_DESC_PARAM(wb_buff_cap_adj, _WB_BUFF_CAP_ADJ, 1);
@@ -746,6 +754,10 @@ static struct attribute *ufs_sysfs_geometry_descriptor[] = {
 	&dev_attr_enh3_memory_capacity_adjustment_factor.attr,
 	&dev_attr_enh4_memory_max_alloc_units.attr,
 	&dev_attr_enh4_memory_capacity_adjustment_factor.attr,
+	&dev_attr_hpb_region_size.attr,
+	&dev_attr_hpb_number_lu.attr,
+	&dev_attr_hpb_subregion_size.attr,
+	&dev_attr_hpb_max_active_regions.attr,
 	&dev_attr_wb_max_alloc_units.attr,
 	&dev_attr_wb_max_wb_luns.attr,
 	&dev_attr_wb_buff_cap_adj.attr,
@@ -1006,6 +1018,7 @@ UFS_FLAG(disable_fw_update, _PERMANENTLY_DISABLE_FW_UPDATE);
 UFS_FLAG(wb_enable, _WB_EN);
 UFS_FLAG(wb_flush_en, _WB_BUFF_FLUSH_EN);
 UFS_FLAG(wb_flush_during_h8, _WB_BUFF_FLUSH_DURING_HIBERN8);
+UFS_FLAG(hpb_enable, _HPB_EN);
 
 static struct attribute *ufs_sysfs_device_flags[] = {
 	&dev_attr_device_init.attr,
@@ -1019,6 +1032,7 @@ static struct attribute *ufs_sysfs_device_flags[] = {
 	&dev_attr_wb_enable.attr,
 	&dev_attr_wb_flush_en.attr,
 	&dev_attr_wb_flush_during_h8.attr,
+	&dev_attr_hpb_enable.attr,
 	NULL,
 };
 
@@ -1065,6 +1079,7 @@ out:									\
 static DEVICE_ATTR_RO(_name)
 
 UFS_ATTRIBUTE(boot_lun_enabled, _BOOT_LU_EN);
+UFS_ATTRIBUTE(max_data_size_hpb_single_cmd, _MAX_HPB_SINGLE_CMD);
 UFS_ATTRIBUTE(current_power_mode, _POWER_MODE);
 UFS_ATTRIBUTE(active_icc_level, _ACTIVE_ICC_LVL);
 UFS_ATTRIBUTE(ooo_data_enabled, _OOO_DATA_EN);
@@ -1088,6 +1103,7 @@ UFS_ATTRIBUTE(wb_cur_buf, _CURR_WB_BUFF_SIZE);
 
 static struct attribute *ufs_sysfs_attributes[] = {
 	&dev_attr_boot_lun_enabled.attr,
+	&dev_attr_max_data_size_hpb_single_cmd.attr,
 	&dev_attr_current_power_mode.attr,
 	&dev_attr_active_icc_level.attr,
 	&dev_attr_ooo_data_enabled.attr,
@@ -1147,6 +1163,7 @@ static DEVICE_ATTR_RO(_pname)
 #define UFS_UNIT_DESC_PARAM(_name, _uname, _size)			\
 	UFS_LUN_DESC_PARAM(_name, _uname, UNIT, _size)
 
+UFS_UNIT_DESC_PARAM(lu_enable, _LU_ENABLE, 1);
 UFS_UNIT_DESC_PARAM(boot_lun_id, _BOOT_LUN_ID, 1);
 UFS_UNIT_DESC_PARAM(lun_write_protect, _LU_WR_PROTECT, 1);
 UFS_UNIT_DESC_PARAM(lun_queue_depth, _LU_Q_DEPTH, 1);
@@ -1160,10 +1177,13 @@ UFS_UNIT_DESC_PARAM(provisioning_type, _PROVISIONING_TYPE, 1);
 UFS_UNIT_DESC_PARAM(physical_memory_resourse_count, _PHY_MEM_RSRC_CNT, 8);
 UFS_UNIT_DESC_PARAM(context_capabilities, _CTX_CAPABILITIES, 2);
 UFS_UNIT_DESC_PARAM(large_unit_granularity, _LARGE_UNIT_SIZE_M1, 1);
+UFS_UNIT_DESC_PARAM(hpb_lu_max_active_regions, _HPB_LU_MAX_ACTIVE_RGNS, 2);
+UFS_UNIT_DESC_PARAM(hpb_pinned_region_start_offset, _HPB_PIN_RGN_START_OFF, 2);
+UFS_UNIT_DESC_PARAM(hpb_number_pinned_regions, _HPB_NUM_PIN_RGNS, 2);
 UFS_UNIT_DESC_PARAM(wb_buf_alloc_units, _WB_BUF_ALLOC_UNITS, 4);
 
-
 static struct attribute *ufs_sysfs_unit_descriptor[] = {
+	&dev_attr_lu_enable.attr,
 	&dev_attr_boot_lun_id.attr,
 	&dev_attr_lun_write_protect.attr,
 	&dev_attr_lun_queue_depth.attr,
@@ -1177,6 +1197,9 @@ static struct attribute *ufs_sysfs_unit_descriptor[] = {
 	&dev_attr_physical_memory_resourse_count.attr,
 	&dev_attr_context_capabilities.attr,
 	&dev_attr_large_unit_granularity.attr,
+	&dev_attr_hpb_lu_max_active_regions.attr,
+	&dev_attr_hpb_pinned_region_start_offset.attr,
+	&dev_attr_hpb_number_pinned_regions.attr,
 	&dev_attr_wb_buf_alloc_units.attr,
 	NULL,
 };
diff --git a/drivers/scsi/ufs/ufs.h b/drivers/scsi/ufs/ufs.h
index cb80b9670bfe..8c6b38b1b142 100644
--- a/drivers/scsi/ufs/ufs.h
+++ b/drivers/scsi/ufs/ufs.h
@@ -122,12 +122,14 @@ enum flag_idn {
 	QUERY_FLAG_IDN_WB_EN                            = 0x0E,
 	QUERY_FLAG_IDN_WB_BUFF_FLUSH_EN                 = 0x0F,
 	QUERY_FLAG_IDN_WB_BUFF_FLUSH_DURING_HIBERN8     = 0x10,
+	QUERY_FLAG_IDN_HPB_RESET                        = 0x11,
+	QUERY_FLAG_IDN_HPB_EN				= 0x12,
 };
 
 /* Attribute idn for Query requests */
 enum attr_idn {
 	QUERY_ATTR_IDN_BOOT_LU_EN		= 0x00,
-	QUERY_ATTR_IDN_RESERVED			= 0x01,
+	QUERY_ATTR_IDN_MAX_HPB_SINGLE_CMD	= 0x01,
 	QUERY_ATTR_IDN_POWER_MODE		= 0x02,
 	QUERY_ATTR_IDN_ACTIVE_ICC_LVL		= 0x03,
 	QUERY_ATTR_IDN_OOO_DATA_EN		= 0x04,
@@ -195,6 +197,9 @@ enum unit_desc_param {
 	UNIT_DESC_PARAM_PHY_MEM_RSRC_CNT	= 0x18,
 	UNIT_DESC_PARAM_CTX_CAPABILITIES	= 0x20,
 	UNIT_DESC_PARAM_LARGE_UNIT_SIZE_M1	= 0x22,
+	UNIT_DESC_PARAM_HPB_LU_MAX_ACTIVE_RGNS	= 0x23,
+	UNIT_DESC_PARAM_HPB_PIN_RGN_START_OFF	= 0x25,
+	UNIT_DESC_PARAM_HPB_NUM_PIN_RGNS	= 0x27,
 	UNIT_DESC_PARAM_WB_BUF_ALLOC_UNITS	= 0x29,
 };
 
@@ -235,6 +240,8 @@ enum device_desc_param {
 	DEVICE_DESC_PARAM_PSA_MAX_DATA		= 0x25,
 	DEVICE_DESC_PARAM_PSA_TMT		= 0x29,
 	DEVICE_DESC_PARAM_PRDCT_REV		= 0x2A,
+	DEVICE_DESC_PARAM_HPB_VER		= 0x40,
+	DEVICE_DESC_PARAM_HPB_CONTROL		= 0x42,
 	DEVICE_DESC_PARAM_EXT_UFS_FEATURE_SUP	= 0x4F,
 	DEVICE_DESC_PARAM_WB_PRESRV_USRSPC_EN	= 0x53,
 	DEVICE_DESC_PARAM_WB_TYPE		= 0x54,
@@ -283,6 +290,10 @@ enum geometry_desc_param {
 	GEOMETRY_DESC_PARAM_ENM4_MAX_NUM_UNITS	= 0x3E,
 	GEOMETRY_DESC_PARAM_ENM4_CAP_ADJ_FCTR	= 0x42,
 	GEOMETRY_DESC_PARAM_OPT_LOG_BLK_SIZE	= 0x44,
+	GEOMETRY_DESC_PARAM_HPB_REGION_SIZE	= 0x48,
+	GEOMETRY_DESC_PARAM_HPB_NUMBER_LU	= 0x49,
+	GEOMETRY_DESC_PARAM_HPB_SUBREGION_SIZE	= 0x4A,
+	GEOMETRY_DESC_PARAM_HPB_MAX_ACTIVE_REGS	= 0x4B,
 	GEOMETRY_DESC_PARAM_WB_MAX_ALLOC_UNITS	= 0x4F,
 	GEOMETRY_DESC_PARAM_WB_MAX_WB_LUNS	= 0x53,
 	GEOMETRY_DESC_PARAM_WB_BUFF_CAP_ADJ	= 0x54,
@@ -327,8 +338,10 @@ enum {
 
 /* Possible values for dExtendedUFSFeaturesSupport */
 enum {
+	UFS_DEV_HPB_SUPPORT		= BIT(7),
 	UFS_DEV_WRITE_BOOSTER_SUP	= BIT(8),
 };
+#define UFS_DEV_HPB_SUPPORT_VERSION		0x310
 
 #define POWER_DESC_MAX_ACTV_ICC_LVLS		16
 
@@ -466,6 +479,41 @@ struct utp_cmd_rsp {
 	u8 sense_data[UFS_SENSE_SIZE];
 };
 
+struct ufshpb_active_field {
+	__be16 active_rgn;
+	__be16 active_srgn;
+};
+#define HPB_ACT_FIELD_SIZE 4
+
+/**
+ * struct utp_hpb_rsp - Response UPIU structure
+ * @residual_transfer_count: Residual transfer count DW-3
+ * @reserved1: Reserved double words DW-4 to DW-7
+ * @sense_data_len: Sense data length DW-8 U16
+ * @desc_type: Descriptor type of sense data
+ * @additional_len: Additional length of sense data
+ * @hpb_op: HPB operation type
+ * @lun: LUN of response UPIU
+ * @active_rgn_cnt: Active region count
+ * @inactive_rgn_cnt: Inactive region count
+ * @hpb_active_field: Recommended to read HPB region and subregion
+ * @hpb_inactive_field: To be inactivated HPB region and subregion
+ */
+struct utp_hpb_rsp {
+	__be32 residual_transfer_count;
+	__be32 reserved1[4];
+	__be16 sense_data_len;
+	u8 desc_type;
+	u8 additional_len;
+	u8 hpb_op;
+	u8 lun;
+	u8 active_rgn_cnt;
+	u8 inactive_rgn_cnt;
+	struct ufshpb_active_field hpb_active_field[2];
+	__be16 hpb_inactive_field[2];
+};
+#define UTP_HPB_RSP_SIZE 40
+
 /**
  * struct utp_upiu_rsp - general upiu response structure
  * @header: UPIU header structure DW-0 to DW-2
@@ -476,6 +524,7 @@ struct utp_upiu_rsp {
 	struct utp_upiu_header header;
 	union {
 		struct utp_cmd_rsp sr;
+		struct utp_hpb_rsp hr;
 		struct utp_upiu_query qr;
 	};
 };
@@ -544,6 +593,9 @@ struct ufs_dev_info {
 	u16	wspecversion;
 	u32	clk_gating_wait_us;
 
+	/* UFS HPB related flag */
+	bool	hpb_enabled;
+
 	/* UFS WB related flags */
 	bool    wb_enabled;
 	bool    wb_buf_flush_enabled;
diff --git a/drivers/scsi/ufs/ufs_quirks.h b/drivers/scsi/ufs/ufs_quirks.h
index 07f559ac5883..35ec9ea79869 100644
--- a/drivers/scsi/ufs/ufs_quirks.h
+++ b/drivers/scsi/ufs/ufs_quirks.h
@@ -116,4 +116,10 @@ struct ufs_dev_fix {
  */
 #define UFS_DEVICE_QUIRK_DELAY_AFTER_LPM        (1 << 11)
 
+/*
+ * Some UFS devices require L2P entry should be swapped before being sent to the
+ * UFS device for HPB READ command.
+ */
+#define UFS_DEVICE_QUIRK_SWAP_L2P_ENTRY_FOR_HPB_READ (1 << 12)
+
 #endif /* UFS_QUIRKS_H_ */
diff --git a/drivers/scsi/ufs/ufshcd-pci.c b/drivers/scsi/ufs/ufshcd-pci.c
index e6c334bfb4c2..b3bcc5c882da 100644
--- a/drivers/scsi/ufs/ufshcd-pci.c
+++ b/drivers/scsi/ufs/ufshcd-pci.c
@@ -385,48 +385,6 @@ static struct ufs_hba_variant_ops ufs_intel_lkf_hba_vops = {
 	.device_reset		= ufs_intel_device_reset,
 };
 
-#ifdef CONFIG_PM_SLEEP
-/**
- * ufshcd_pci_suspend - suspend power management function
- * @dev: pointer to PCI device handle
- *
- * Returns 0 if successful
- * Returns non-zero otherwise
- */
-static int ufshcd_pci_suspend(struct device *dev)
-{
-	return ufshcd_system_suspend(dev_get_drvdata(dev));
-}
-
-/**
- * ufshcd_pci_resume - resume power management function
- * @dev: pointer to PCI device handle
- *
- * Returns 0 if successful
- * Returns non-zero otherwise
- */
-static int ufshcd_pci_resume(struct device *dev)
-{
-	return ufshcd_system_resume(dev_get_drvdata(dev));
-}
-
-#endif /* !CONFIG_PM_SLEEP */
-
-#ifdef CONFIG_PM
-static int ufshcd_pci_runtime_suspend(struct device *dev)
-{
-	return ufshcd_runtime_suspend(dev_get_drvdata(dev));
-}
-static int ufshcd_pci_runtime_resume(struct device *dev)
-{
-	return ufshcd_runtime_resume(dev_get_drvdata(dev));
-}
-static int ufshcd_pci_runtime_idle(struct device *dev)
-{
-	return ufshcd_runtime_idle(dev_get_drvdata(dev));
-}
-#endif /* !CONFIG_PM */
-
 /**
  * ufshcd_pci_shutdown - main function to put the controller in reset state
  * @pdev: pointer to PCI device handle
@@ -510,10 +468,8 @@ ufshcd_pci_probe(struct pci_dev *pdev, const struct pci_device_id *id)
 }
 
 static const struct dev_pm_ops ufshcd_pci_pm_ops = {
-	SET_RUNTIME_PM_OPS(ufshcd_pci_runtime_suspend,
-			   ufshcd_pci_runtime_resume,
-			   ufshcd_pci_runtime_idle)
-	SET_SYSTEM_SLEEP_PM_OPS(ufshcd_pci_suspend, ufshcd_pci_resume)
+	SET_SYSTEM_SLEEP_PM_OPS(ufshcd_system_suspend, ufshcd_system_resume)
+	SET_RUNTIME_PM_OPS(ufshcd_runtime_suspend, ufshcd_runtime_resume, NULL)
 #ifdef CONFIG_PM_SLEEP
 	.prepare	= ufshcd_suspend_prepare,
 	.complete	= ufshcd_resume_complete,
diff --git a/drivers/scsi/ufs/ufshcd-pltfrm.c b/drivers/scsi/ufs/ufshcd-pltfrm.c
index 298e22ef907e..8859c13f4e09 100644
--- a/drivers/scsi/ufs/ufshcd-pltfrm.c
+++ b/drivers/scsi/ufs/ufshcd-pltfrm.c
@@ -170,53 +170,6 @@ out:
 	return err;
 }
 
-#ifdef CONFIG_PM
-/**
- * ufshcd_pltfrm_suspend - suspend power management function
- * @dev: pointer to device handle
- *
- * Returns 0 if successful
- * Returns non-zero otherwise
- */
-int ufshcd_pltfrm_suspend(struct device *dev)
-{
-	return ufshcd_system_suspend(dev_get_drvdata(dev));
-}
-EXPORT_SYMBOL_GPL(ufshcd_pltfrm_suspend);
-
-/**
- * ufshcd_pltfrm_resume - resume power management function
- * @dev: pointer to device handle
- *
- * Returns 0 if successful
- * Returns non-zero otherwise
- */
-int ufshcd_pltfrm_resume(struct device *dev)
-{
-	return ufshcd_system_resume(dev_get_drvdata(dev));
-}
-EXPORT_SYMBOL_GPL(ufshcd_pltfrm_resume);
-
-int ufshcd_pltfrm_runtime_suspend(struct device *dev)
-{
-	return ufshcd_runtime_suspend(dev_get_drvdata(dev));
-}
-EXPORT_SYMBOL_GPL(ufshcd_pltfrm_runtime_suspend);
-
-int ufshcd_pltfrm_runtime_resume(struct device *dev)
-{
-	return ufshcd_runtime_resume(dev_get_drvdata(dev));
-}
-EXPORT_SYMBOL_GPL(ufshcd_pltfrm_runtime_resume);
-
-int ufshcd_pltfrm_runtime_idle(struct device *dev)
-{
-	return ufshcd_runtime_idle(dev_get_drvdata(dev));
-}
-EXPORT_SYMBOL_GPL(ufshcd_pltfrm_runtime_idle);
-
-#endif /* CONFIG_PM */
-
 void ufshcd_pltfrm_shutdown(struct platform_device *pdev)
 {
 	ufshcd_shutdown((struct ufs_hba *)platform_get_drvdata(pdev));
diff --git a/drivers/scsi/ufs/ufshcd-pltfrm.h b/drivers/scsi/ufs/ufshcd-pltfrm.h
index 772a8e848098..c33e28ac6ef6 100644
--- a/drivers/scsi/ufs/ufshcd-pltfrm.h
+++ b/drivers/scsi/ufs/ufshcd-pltfrm.h
@@ -33,22 +33,4 @@ int ufshcd_pltfrm_init(struct platform_device *pdev,
 		       const struct ufs_hba_variant_ops *vops);
 void ufshcd_pltfrm_shutdown(struct platform_device *pdev);
 
-#ifdef CONFIG_PM
-
-int ufshcd_pltfrm_suspend(struct device *dev);
-int ufshcd_pltfrm_resume(struct device *dev);
-int ufshcd_pltfrm_runtime_suspend(struct device *dev);
-int ufshcd_pltfrm_runtime_resume(struct device *dev);
-int ufshcd_pltfrm_runtime_idle(struct device *dev);
-
-#else /* !CONFIG_PM */
-
-#define ufshcd_pltfrm_suspend	NULL
-#define ufshcd_pltfrm_resume	NULL
-#define ufshcd_pltfrm_runtime_suspend	NULL
-#define ufshcd_pltfrm_runtime_resume	NULL
-#define ufshcd_pltfrm_runtime_idle	NULL
-
-#endif /* CONFIG_PM */
-
 #endif /* UFSHCD_PLTFRM_H_ */
diff --git a/drivers/scsi/ufs/ufshcd.c b/drivers/scsi/ufs/ufshcd.c
index 708b3b62fc4d..3841ab49f556 100644
--- a/drivers/scsi/ufs/ufshcd.c
+++ b/drivers/scsi/ufs/ufshcd.c
@@ -17,15 +17,18 @@
 #include <linux/blk-pm.h>
 #include <linux/blkdev.h>
 #include <scsi/scsi_driver.h>
+#include <scsi/scsi_transport.h>
+#include "../scsi_transport_api.h"
 #include "ufshcd.h"
 #include "ufs_quirks.h"
 #include "unipro.h"
 #include "ufs-sysfs.h"
 #include "ufs-debugfs.h"
+#include "ufs-fault-injection.h"
 #include "ufs_bsg.h"
 #include "ufshcd-crypto.h"
+#include "ufshpb.h"
 #include <asm/unaligned.h>
-#include "../sd.h"
 
 #define CREATE_TRACE_POINTS
 #include <trace/events/ufs.h>
@@ -128,15 +131,6 @@ enum {
 	UFSHCD_CAN_QUEUE	= 32,
 };
 
-/* UFSHCD states */
-enum {
-	UFSHCD_STATE_RESET,
-	UFSHCD_STATE_ERROR,
-	UFSHCD_STATE_OPERATIONAL,
-	UFSHCD_STATE_EH_SCHEDULED_FATAL,
-	UFSHCD_STATE_EH_SCHEDULED_NON_FATAL,
-};
-
 /* UFSHCD error handling flags */
 enum {
 	UFSHCD_EH_IN_PROGRESS = (1 << 0),
@@ -205,7 +199,8 @@ ufs_get_desired_pm_lvl_for_dev_link_state(enum ufs_dev_pwr_mode dev_state,
 static struct ufs_dev_fix ufs_fixups[] = {
 	/* UFS cards deviations table */
 	UFS_FIX(UFS_VENDOR_MICRON, UFS_ANY_MODEL,
-		UFS_DEVICE_QUIRK_DELAY_BEFORE_LPM),
+		UFS_DEVICE_QUIRK_DELAY_BEFORE_LPM |
+		UFS_DEVICE_QUIRK_SWAP_L2P_ENTRY_FOR_HPB_READ),
 	UFS_FIX(UFS_VENDOR_SAMSUNG, UFS_ANY_MODEL,
 		UFS_DEVICE_QUIRK_DELAY_BEFORE_LPM |
 		UFS_DEVICE_QUIRK_HOST_PA_TACTIVATE |
@@ -242,7 +237,6 @@ static int ufshcd_scale_clks(struct ufs_hba *hba, bool scale_up);
 static irqreturn_t ufshcd_intr(int irq, void *__hba);
 static int ufshcd_change_power_mode(struct ufs_hba *hba,
 			     struct ufs_pa_layer_attr *pwr_mode);
-static void ufshcd_schedule_eh_work(struct ufs_hba *hba);
 static int ufshcd_setup_hba_vreg(struct ufs_hba *hba, bool on);
 static int ufshcd_setup_vreg(struct ufs_hba *hba, bool on);
 static inline int ufshcd_config_vreg_hpm(struct ufs_hba *hba,
@@ -253,11 +247,6 @@ static inline void ufshcd_wb_toggle_flush(struct ufs_hba *hba, bool enable);
 static void ufshcd_hba_vreg_set_lpm(struct ufs_hba *hba);
 static void ufshcd_hba_vreg_set_hpm(struct ufs_hba *hba);
 
-static inline bool ufshcd_valid_tag(struct ufs_hba *hba, int tag)
-{
-	return tag >= 0 && tag < hba->nutrs;
-}
-
 static inline void ufshcd_enable_irq(struct ufs_hba *hba)
 {
 	if (!hba->is_irq_enabled) {
@@ -371,26 +360,24 @@ static void ufshcd_add_uic_command_trace(struct ufs_hba *hba,
 static void ufshcd_add_command_trace(struct ufs_hba *hba, unsigned int tag,
 				     enum ufs_trace_str_t str_t)
 {
-	u64 lba = -1;
+	u64 lba;
 	u8 opcode = 0, group_id = 0;
 	u32 intr, doorbell;
 	struct ufshcd_lrb *lrbp = &hba->lrb[tag];
 	struct scsi_cmnd *cmd = lrbp->cmd;
+	struct request *rq = scsi_cmd_to_rq(cmd);
 	int transfer_len = -1;
 
 	if (!cmd)
 		return;
 
-	if (!trace_ufshcd_command_enabled()) {
-		/* trace UPIU W/O tracing command */
-		ufshcd_add_cmd_upiu_trace(hba, tag, str_t);
-		return;
-	}
-
 	/* trace UPIU also */
 	ufshcd_add_cmd_upiu_trace(hba, tag, str_t);
+	if (!trace_ufshcd_command_enabled())
+		return;
+
 	opcode = cmd->cmnd[0];
-	lba = sectors_to_logical(cmd->device, blk_rq_pos(cmd->request));
+	lba = scsi_get_lba(cmd);
 
 	if (opcode == READ_10 || opcode == WRITE_10) {
 		/*
@@ -404,7 +391,7 @@ static void ufshcd_add_command_trace(struct ufs_hba *hba, unsigned int tag,
 		/*
 		 * The number of Bytes to be unmapped beginning with the lba.
 		 */
-		transfer_len = blk_rq_bytes(cmd->request);
+		transfer_len = blk_rq_bytes(rq);
 	}
 
 	intr = ufshcd_readl(hba, REG_INTERRUPT_STATUS);
@@ -758,16 +745,6 @@ static inline void ufshcd_utmrl_clear(struct ufs_hba *hba, u32 pos)
 }
 
 /**
- * ufshcd_outstanding_req_clear - Clear a bit in outstanding request field
- * @hba: per adapter instance
- * @tag: position of the bit to be cleared
- */
-static inline void ufshcd_outstanding_req_clear(struct ufs_hba *hba, int tag)
-{
-	clear_bit(tag, &hba->outstanding_reqs);
-}
-
-/**
  * ufshcd_get_lists_status - Check UCRDY, UTRLRDY and UTMRLRDY
  * @reg: Register value of host controller status
  *
@@ -2078,7 +2055,7 @@ static void ufshcd_update_monitor(struct ufs_hba *hba, struct ufshcd_lrb *lrbp)
 
 	spin_lock_irqsave(hba->host->host_lock, flags);
 	if (dir >= 0 && hba->monitor.nr_queued[dir] > 0) {
-		struct request *req = lrbp->cmd->request;
+		struct request *req = scsi_cmd_to_rq(lrbp->cmd);
 		struct ufs_hba_monitor *m = &hba->monitor;
 		ktime_t now, inc, lat;
 
@@ -2112,27 +2089,22 @@ static inline
 void ufshcd_send_command(struct ufs_hba *hba, unsigned int task_tag)
 {
 	struct ufshcd_lrb *lrbp = &hba->lrb[task_tag];
+	unsigned long flags;
 
 	lrbp->issue_time_stamp = ktime_get();
 	lrbp->compl_time_stamp = ktime_set(0, 0);
-	ufshcd_vops_setup_xfer_req(hba, task_tag, (lrbp->cmd ? true : false));
 	ufshcd_add_command_trace(hba, task_tag, UFS_CMD_SEND);
 	ufshcd_clk_scaling_start_busy(hba);
 	if (unlikely(ufshcd_should_inform_monitor(hba, lrbp)))
 		ufshcd_start_monitor(hba, lrbp);
-	if (ufshcd_has_utrlcnr(hba)) {
-		set_bit(task_tag, &hba->outstanding_reqs);
-		ufshcd_writel(hba, 1 << task_tag,
-			      REG_UTP_TRANSFER_REQ_DOOR_BELL);
-	} else {
-		unsigned long flags;
 
-		spin_lock_irqsave(hba->host->host_lock, flags);
-		set_bit(task_tag, &hba->outstanding_reqs);
-		ufshcd_writel(hba, 1 << task_tag,
-			      REG_UTP_TRANSFER_REQ_DOOR_BELL);
-		spin_unlock_irqrestore(hba->host->host_lock, flags);
-	}
+	spin_lock_irqsave(&hba->outstanding_lock, flags);
+	if (hba->vops && hba->vops->setup_xfer_req)
+		hba->vops->setup_xfer_req(hba, task_tag, !!lrbp->cmd);
+	__set_bit(task_tag, &hba->outstanding_reqs);
+	ufshcd_writel(hba, 1 << task_tag, REG_UTP_TRANSFER_REQ_DOOR_BELL);
+	spin_unlock_irqrestore(&hba->outstanding_lock, flags);
+
 	/* Make sure that doorbell is committed immediately */
 	wmb();
 }
@@ -2247,15 +2219,15 @@ static inline u8 ufshcd_get_upmcrs(struct ufs_hba *hba)
 }
 
 /**
- * ufshcd_dispatch_uic_cmd - Dispatch UIC commands to unipro layers
+ * ufshcd_dispatch_uic_cmd - Dispatch an UIC command to the Unipro layer
  * @hba: per adapter instance
  * @uic_cmd: UIC command
- *
- * Mutex must be held.
  */
 static inline void
 ufshcd_dispatch_uic_cmd(struct ufs_hba *hba, struct uic_command *uic_cmd)
 {
+	lockdep_assert_held(&hba->uic_cmd_mutex);
+
 	WARN_ON(hba->active_uic_cmd);
 
 	hba->active_uic_cmd = uic_cmd;
@@ -2273,11 +2245,10 @@ ufshcd_dispatch_uic_cmd(struct ufs_hba *hba, struct uic_command *uic_cmd)
 }
 
 /**
- * ufshcd_wait_for_uic_cmd - Wait complectioin of UIC command
+ * ufshcd_wait_for_uic_cmd - Wait for completion of an UIC command
  * @hba: per adapter instance
  * @uic_cmd: UIC command
  *
- * Must be called with mutex held.
  * Returns 0 only if success.
  */
 static int
@@ -2286,6 +2257,8 @@ ufshcd_wait_for_uic_cmd(struct ufs_hba *hba, struct uic_command *uic_cmd)
 	int ret;
 	unsigned long flags;
 
+	lockdep_assert_held(&hba->uic_cmd_mutex);
+
 	if (wait_for_completion_timeout(&uic_cmd->done,
 					msecs_to_jiffies(UIC_CMD_TIMEOUT))) {
 		ret = uic_cmd->argument2 & MASK_UIC_COMMAND_RESULT;
@@ -2315,14 +2288,15 @@ ufshcd_wait_for_uic_cmd(struct ufs_hba *hba, struct uic_command *uic_cmd)
  * @uic_cmd: UIC command
  * @completion: initialize the completion only if this is set to true
  *
- * Identical to ufshcd_send_uic_cmd() expect mutex. Must be called
- * with mutex held and host_lock locked.
  * Returns 0 only if success.
  */
 static int
 __ufshcd_send_uic_cmd(struct ufs_hba *hba, struct uic_command *uic_cmd,
 		      bool completion)
 {
+	lockdep_assert_held(&hba->uic_cmd_mutex);
+	lockdep_assert_held(hba->host->host_lock);
+
 	if (!ufshcd_ready_for_uic_cmd(hba)) {
 		dev_err(hba->dev,
 			"Controller not ready to accept UIC commands\n");
@@ -2701,20 +2675,12 @@ static void ufshcd_init_lrb(struct ufs_hba *hba, struct ufshcd_lrb *lrb, int i)
  */
 static int ufshcd_queuecommand(struct Scsi_Host *host, struct scsi_cmnd *cmd)
 {
+	struct ufs_hba *hba = shost_priv(host);
+	int tag = scsi_cmd_to_rq(cmd)->tag;
 	struct ufshcd_lrb *lrbp;
-	struct ufs_hba *hba;
-	int tag;
 	int err = 0;
 
-	hba = shost_priv(host);
-
-	tag = cmd->request->tag;
-	if (!ufshcd_valid_tag(hba, tag)) {
-		dev_err(hba->dev,
-			"%s: invalid command tag %d: cmd=0x%p, cmd->request=0x%p",
-			__func__, tag, cmd, cmd->request);
-		BUG();
-	}
+	WARN_ONCE(tag < 0, "Invalid tag %d\n", tag);
 
 	if (!down_read_trylock(&hba->clk_scaling_lock))
 		return SCSI_MLQUEUE_HOST_BUSY;
@@ -2748,12 +2714,6 @@ static int ufshcd_queuecommand(struct Scsi_Host *host, struct scsi_cmnd *cmd)
 		set_host_byte(cmd, DID_ERROR);
 		cmd->scsi_done(cmd);
 		goto out;
-	default:
-		dev_WARN_ONCE(hba->dev, 1, "%s: invalid state %d\n",
-				__func__, hba->ufshcd_state);
-		set_host_byte(cmd, DID_BAD_TARGET);
-		cmd->scsi_done(cmd);
-		goto out;
 	}
 
 	hba->req_abort_count = 0;
@@ -2766,15 +2726,6 @@ static int ufshcd_queuecommand(struct Scsi_Host *host, struct scsi_cmnd *cmd)
 	WARN_ON(ufshcd_is_clkgating_allowed(hba) &&
 		(hba->clk_gating.state != CLKS_ON));
 
-	if (unlikely(test_bit(tag, &hba->outstanding_reqs))) {
-		if (hba->pm_op_in_progress)
-			set_host_byte(cmd, DID_BAD_TARGET);
-		else
-			err = SCSI_MLQUEUE_HOST_BUSY;
-		ufshcd_release(hba);
-		goto out;
-	}
-
 	lrbp = &hba->lrb[tag];
 	WARN_ON(lrbp->cmd);
 	lrbp->cmd = cmd;
@@ -2784,10 +2735,17 @@ static int ufshcd_queuecommand(struct Scsi_Host *host, struct scsi_cmnd *cmd)
 	lrbp->lun = ufshcd_scsi_to_upiu_lun(cmd->device->lun);
 	lrbp->intr_cmd = !ufshcd_is_intr_aggr_allowed(hba) ? true : false;
 
-	ufshcd_prepare_lrbp_crypto(cmd->request, lrbp);
+	ufshcd_prepare_lrbp_crypto(scsi_cmd_to_rq(cmd), lrbp);
 
 	lrbp->req_abort_skip = false;
 
+	err = ufshpb_prep(hba, lrbp);
+	if (err == -EAGAIN) {
+		lrbp->cmd = NULL;
+		ufshcd_release(hba);
+		goto out;
+	}
+
 	ufshcd_comp_scsi_upiu(hba, lrbp);
 
 	err = ufshcd_map_sg(hba, lrbp);
@@ -2796,12 +2754,14 @@ static int ufshcd_queuecommand(struct Scsi_Host *host, struct scsi_cmnd *cmd)
 		ufshcd_release(hba);
 		goto out;
 	}
-	/* Make sure descriptors are ready before ringing the doorbell */
-	wmb();
 
 	ufshcd_send_command(hba, tag);
 out:
 	up_read(&hba->clk_scaling_lock);
+
+	if (ufs_trigger_eh())
+		scsi_schedule_eh(hba->host);
+
 	return err;
 }
 
@@ -2907,8 +2867,6 @@ static int ufshcd_wait_for_dev_cmd(struct ufs_hba *hba,
 	time_left = wait_for_completion_timeout(hba->dev_cmd.complete,
 			msecs_to_jiffies(max_timeout));
 
-	/* Make sure descriptors are ready before ringing the doorbell */
-	wmb();
 	spin_lock_irqsave(hba->host->host_lock, flags);
 	hba->dev_cmd.complete = NULL;
 	if (likely(time_left)) {
@@ -2930,7 +2888,9 @@ static int ufshcd_wait_for_dev_cmd(struct ufs_hba *hba,
 		 * we also need to clear the outstanding_request
 		 * field in hba
 		 */
-		ufshcd_outstanding_req_clear(hba, lrbp->task_tag);
+		spin_lock_irqsave(&hba->outstanding_lock, flags);
+		__clear_bit(lrbp->task_tag, &hba->outstanding_reqs);
+		spin_unlock_irqrestore(&hba->outstanding_lock, flags);
 	}
 
 	return err;
@@ -2949,11 +2909,11 @@ static int ufshcd_exec_dev_cmd(struct ufs_hba *hba,
 		enum dev_cmd_type cmd_type, int timeout)
 {
 	struct request_queue *q = hba->cmd_queue;
+	DECLARE_COMPLETION_ONSTACK(wait);
 	struct request *req;
 	struct ufshcd_lrb *lrbp;
 	int err;
 	int tag;
-	struct completion wait;
 
 	down_read(&hba->clk_scaling_lock);
 
@@ -2968,17 +2928,11 @@ static int ufshcd_exec_dev_cmd(struct ufs_hba *hba,
 		goto out_unlock;
 	}
 	tag = req->tag;
-	WARN_ON_ONCE(!ufshcd_valid_tag(hba, tag));
+	WARN_ONCE(tag < 0, "Invalid tag %d\n", tag);
 	/* Set the timeout such that the SCSI error handler is not activated. */
 	req->timeout = msecs_to_jiffies(2 * timeout);
 	blk_mq_start_request(req);
 
-	if (unlikely(test_bit(tag, &hba->outstanding_reqs))) {
-		err = -EBUSY;
-		goto out;
-	}
-
-	init_completion(&wait);
 	lrbp = &hba->lrb[tag];
 	WARN_ON(lrbp->cmd);
 	err = ufshcd_compose_dev_cmd(hba, lrbp, cmd_type, tag);
@@ -2988,8 +2942,6 @@ static int ufshcd_exec_dev_cmd(struct ufs_hba *hba,
 	hba->dev_cmd.complete = &wait;
 
 	ufshcd_add_query_upiu_trace(hba, UFS_QUERY_SEND, lrbp->ucd_req_ptr);
-	/* Make sure descriptors are ready before ringing the doorbell */
-	wmb();
 
 	ufshcd_send_command(hba, tag);
 	err = ufshcd_wait_for_dev_cmd(hba, lrbp, timeout);
@@ -3194,7 +3146,7 @@ out_unlock:
  *
  * Returns 0 for success, non-zero in case of failure
 */
-static int ufshcd_query_attr_retry(struct ufs_hba *hba,
+int ufshcd_query_attr_retry(struct ufs_hba *hba,
 	enum query_opcode opcode, enum attr_idn idn, u8 index, u8 selector,
 	u32 *attr_val)
 {
@@ -3419,9 +3371,11 @@ int ufshcd_read_desc_param(struct ufs_hba *hba,
 
 	if (is_kmalloc) {
 		/* Make sure we don't copy more data than available */
-		if (param_offset + param_size > buff_len)
-			param_size = buff_len - param_offset;
-		memcpy(param_read_buf, &desc_buf[param_offset], param_size);
+		if (param_offset >= buff_len)
+			ret = -EINVAL;
+		else
+			memcpy(param_read_buf, &desc_buf[param_offset],
+			       min_t(u32, param_size, buff_len - param_offset));
 	}
 out:
 	if (is_kmalloc)
@@ -3965,6 +3919,35 @@ out:
 }
 EXPORT_SYMBOL_GPL(ufshcd_dme_get_attr);
 
+static inline bool ufshcd_is_saved_err_fatal(struct ufs_hba *hba)
+{
+	lockdep_assert_held(hba->host->host_lock);
+
+	return (hba->saved_uic_err & UFSHCD_UIC_DL_PA_INIT_ERROR) ||
+	       (hba->saved_err & (INT_FATAL_ERRORS | UFSHCD_UIC_HIBERN8_MASK));
+}
+
+static void ufshcd_schedule_eh(struct ufs_hba *hba)
+{
+	bool schedule_eh = false;
+	unsigned long flags;
+
+	spin_lock_irqsave(hba->host->host_lock, flags);
+	/* handle fatal errors only when link is not in error state */
+	if (hba->ufshcd_state != UFSHCD_STATE_ERROR) {
+		if (hba->force_reset || ufshcd_is_link_broken(hba) ||
+		    ufshcd_is_saved_err_fatal(hba))
+			hba->ufshcd_state = UFSHCD_STATE_EH_SCHEDULED_FATAL;
+		else
+			hba->ufshcd_state = UFSHCD_STATE_EH_SCHEDULED_NON_FATAL;
+		schedule_eh = true;
+	}
+	spin_unlock_irqrestore(hba->host->host_lock, flags);
+
+	if (schedule_eh)
+		scsi_schedule_eh(hba->host);
+}
+
 /**
  * ufshcd_uic_pwr_ctrl - executes UIC commands (which affects the link power
  * state) and waits for it to take effect.
@@ -3983,14 +3966,14 @@ EXPORT_SYMBOL_GPL(ufshcd_dme_get_attr);
  */
 static int ufshcd_uic_pwr_ctrl(struct ufs_hba *hba, struct uic_command *cmd)
 {
-	struct completion uic_async_done;
+	DECLARE_COMPLETION_ONSTACK(uic_async_done);
 	unsigned long flags;
+	bool schedule_eh = false;
 	u8 status;
 	int ret;
 	bool reenable_intr = false;
 
 	mutex_lock(&hba->uic_cmd_mutex);
-	init_completion(&uic_async_done);
 	ufshcd_add_delay_before_dme_cmd(hba);
 
 	spin_lock_irqsave(hba->host->host_lock, flags);
@@ -4055,10 +4038,14 @@ out:
 		ufshcd_enable_intr(hba, UIC_COMMAND_COMPL);
 	if (ret) {
 		ufshcd_set_link_broken(hba);
-		ufshcd_schedule_eh_work(hba);
+		schedule_eh = true;
 	}
+
 out_unlock:
 	spin_unlock_irqrestore(hba->host->host_lock, flags);
+
+	if (schedule_eh)
+		ufshcd_schedule_eh(hba);
 	mutex_unlock(&hba->uic_cmd_mutex);
 
 	return ret;
@@ -4987,6 +4974,26 @@ static int ufshcd_change_queue_depth(struct scsi_device *sdev, int depth)
 	return scsi_change_queue_depth(sdev, depth);
 }
 
+static void ufshcd_hpb_destroy(struct ufs_hba *hba, struct scsi_device *sdev)
+{
+	/* skip well-known LU */
+	if ((sdev->lun >= UFS_UPIU_MAX_UNIT_NUM_ID) ||
+	    !(hba->dev_info.hpb_enabled) || !ufshpb_is_allowed(hba))
+		return;
+
+	ufshpb_destroy_lu(hba, sdev);
+}
+
+static void ufshcd_hpb_configure(struct ufs_hba *hba, struct scsi_device *sdev)
+{
+	/* skip well-known LU */
+	if ((sdev->lun >= UFS_UPIU_MAX_UNIT_NUM_ID) ||
+	    !(hba->dev_info.hpb_enabled) || !ufshpb_is_allowed(hba))
+		return;
+
+	ufshpb_init_hpb_lu(hba, sdev);
+}
+
 /**
  * ufshcd_slave_configure - adjust SCSI device configurations
  * @sdev: pointer to SCSI device
@@ -4996,6 +5003,8 @@ static int ufshcd_slave_configure(struct scsi_device *sdev)
 	struct ufs_hba *hba = shost_priv(sdev->host);
 	struct request_queue *q = sdev->request_queue;
 
+	ufshcd_hpb_configure(hba, sdev);
+
 	blk_queue_update_dma_pad(q, PRDT_DATA_BYTE_COUNT_PAD - 1);
 	if (hba->quirks & UFSHCD_QUIRK_ALIGN_SG_WITH_PAGE_SIZE)
 		blk_queue_update_dma_alignment(q, PAGE_SIZE - 1);
@@ -5020,15 +5029,37 @@ static int ufshcd_slave_configure(struct scsi_device *sdev)
 static void ufshcd_slave_destroy(struct scsi_device *sdev)
 {
 	struct ufs_hba *hba;
+	unsigned long flags;
 
 	hba = shost_priv(sdev->host);
+
+	ufshcd_hpb_destroy(hba, sdev);
+
 	/* Drop the reference as it won't be needed anymore */
 	if (ufshcd_scsi_to_upiu_lun(sdev->lun) == UFS_UPIU_UFS_DEVICE_WLUN) {
-		unsigned long flags;
-
 		spin_lock_irqsave(hba->host->host_lock, flags);
 		hba->sdev_ufs_device = NULL;
 		spin_unlock_irqrestore(hba->host->host_lock, flags);
+	} else if (hba->sdev_ufs_device) {
+		struct device *supplier = NULL;
+
+		/* Ensure UFS Device WLUN exists and does not disappear */
+		spin_lock_irqsave(hba->host->host_lock, flags);
+		if (hba->sdev_ufs_device) {
+			supplier = &hba->sdev_ufs_device->sdev_gendev;
+			get_device(supplier);
+		}
+		spin_unlock_irqrestore(hba->host->host_lock, flags);
+
+		if (supplier) {
+			/*
+			 * If a LUN fails to probe (e.g. absent BOOT WLUN), the
+			 * device will not have been registered but can still
+			 * have a device link holding a reference to the device.
+			 */
+			device_link_remove(&sdev->sdev_gendev, supplier);
+			put_device(supplier);
+		}
 	}
 }
 
@@ -5124,6 +5155,9 @@ ufshcd_transfer_rsp_status(struct ufs_hba *hba, struct ufshcd_lrb *lrbp)
 			    ufshcd_is_exception_event(lrbp->ucd_rsp_ptr))
 				/* Flushed in suspend */
 				schedule_work(&hba->eeh_work);
+
+			if (scsi_status == SAM_STAT_GOOD)
+				ufshpb_rsp_upiu(hba, lrbp);
 			break;
 		case UPIU_TRANSACTION_REJECT_UPIU:
 			/* TODO: handle Reject UPIU Response */
@@ -5232,10 +5266,12 @@ static irqreturn_t ufshcd_uic_cmd_compl(struct ufs_hba *hba, u32 intr_status)
 /**
  * __ufshcd_transfer_req_compl - handle SCSI and query command completion
  * @hba: per adapter instance
- * @completed_reqs: requests to complete
+ * @completed_reqs: bitmask that indicates which requests to complete
+ * @retry_requests: whether to ask the SCSI core to retry completed requests
  */
 static void __ufshcd_transfer_req_compl(struct ufs_hba *hba,
-					unsigned long completed_reqs)
+					unsigned long completed_reqs,
+					bool retry_requests)
 {
 	struct ufshcd_lrb *lrbp;
 	struct scsi_cmnd *cmd;
@@ -5244,8 +5280,6 @@ static void __ufshcd_transfer_req_compl(struct ufs_hba *hba,
 	bool update_scaling = false;
 
 	for_each_set_bit(index, &completed_reqs, hba->nutrs) {
-		if (!test_and_clear_bit(index, &hba->outstanding_reqs))
-			continue;
 		lrbp = &hba->lrb[index];
 		lrbp->compl_time_stamp = ktime_get();
 		cmd = lrbp->cmd;
@@ -5253,7 +5287,8 @@ static void __ufshcd_transfer_req_compl(struct ufs_hba *hba,
 			if (unlikely(ufshcd_should_inform_monitor(hba, lrbp)))
 				ufshcd_update_monitor(hba, lrbp);
 			ufshcd_add_command_trace(hba, index, UFS_CMD_COMP);
-			result = ufshcd_transfer_rsp_status(hba, lrbp);
+			result = retry_requests ? DID_BUS_BUSY << 16 :
+				ufshcd_transfer_rsp_status(hba, lrbp);
 			scsi_dma_unmap(cmd);
 			cmd->result = result;
 			/* Mark completed command as NULL in LRB */
@@ -5277,17 +5312,19 @@ static void __ufshcd_transfer_req_compl(struct ufs_hba *hba,
 }
 
 /**
- * ufshcd_trc_handler - handle transfer requests completion
+ * ufshcd_transfer_req_compl - handle SCSI and query command completion
  * @hba: per adapter instance
- * @use_utrlcnr: get completed requests from UTRLCNR
+ * @retry_requests: whether or not to ask to retry requests
  *
  * Returns
  *  IRQ_HANDLED - If interrupt is valid
  *  IRQ_NONE    - If invalid interrupt
  */
-static irqreturn_t ufshcd_trc_handler(struct ufs_hba *hba, bool use_utrlcnr)
+static irqreturn_t ufshcd_transfer_req_compl(struct ufs_hba *hba,
+					     bool retry_requests)
 {
-	unsigned long completed_reqs = 0;
+	unsigned long completed_reqs, flags;
+	u32 tr_doorbell;
 
 	/* Resetting interrupt aggregation counters first and reading the
 	 * DOOR_BELL afterward allows us to handle all the completed requests.
@@ -5300,27 +5337,21 @@ static irqreturn_t ufshcd_trc_handler(struct ufs_hba *hba, bool use_utrlcnr)
 	    !(hba->quirks & UFSHCI_QUIRK_SKIP_RESET_INTR_AGGR))
 		ufshcd_reset_intr_aggr(hba);
 
-	if (use_utrlcnr) {
-		u32 utrlcnr;
-
-		utrlcnr = ufshcd_readl(hba, REG_UTP_TRANSFER_REQ_LIST_COMPL);
-		if (utrlcnr) {
-			ufshcd_writel(hba, utrlcnr,
-				      REG_UTP_TRANSFER_REQ_LIST_COMPL);
-			completed_reqs = utrlcnr;
-		}
-	} else {
-		unsigned long flags;
-		u32 tr_doorbell;
+	if (ufs_fail_completion())
+		return IRQ_HANDLED;
 
-		spin_lock_irqsave(hba->host->host_lock, flags);
-		tr_doorbell = ufshcd_readl(hba, REG_UTP_TRANSFER_REQ_DOOR_BELL);
-		completed_reqs = tr_doorbell ^ hba->outstanding_reqs;
-		spin_unlock_irqrestore(hba->host->host_lock, flags);
-	}
+	spin_lock_irqsave(&hba->outstanding_lock, flags);
+	tr_doorbell = ufshcd_readl(hba, REG_UTP_TRANSFER_REQ_DOOR_BELL);
+	completed_reqs = ~tr_doorbell & hba->outstanding_reqs;
+	WARN_ONCE(completed_reqs & ~hba->outstanding_reqs,
+		  "completed: %#lx; outstanding: %#lx\n", completed_reqs,
+		  hba->outstanding_reqs);
+	hba->outstanding_reqs &= ~completed_reqs;
+	spin_unlock_irqrestore(&hba->outstanding_lock, flags);
 
 	if (completed_reqs) {
-		__ufshcd_transfer_req_compl(hba, completed_reqs);
+		__ufshcd_transfer_req_compl(hba, completed_reqs,
+					    retry_requests);
 		return IRQ_HANDLED;
 	} else {
 		return IRQ_NONE;
@@ -5799,7 +5830,13 @@ out:
 /* Complete requests that have door-bell cleared */
 static void ufshcd_complete_requests(struct ufs_hba *hba)
 {
-	ufshcd_trc_handler(hba, false);
+	ufshcd_transfer_req_compl(hba, /*retry_requests=*/false);
+	ufshcd_tmc_handler(hba);
+}
+
+static void ufshcd_retry_aborted_requests(struct ufs_hba *hba)
+{
+	ufshcd_transfer_req_compl(hba, /*retry_requests=*/true);
 	ufshcd_tmc_handler(hba);
 }
 
@@ -5874,27 +5911,6 @@ out:
 	return err_handling;
 }
 
-/* host lock must be held before calling this func */
-static inline bool ufshcd_is_saved_err_fatal(struct ufs_hba *hba)
-{
-	return (hba->saved_uic_err & UFSHCD_UIC_DL_PA_INIT_ERROR) ||
-	       (hba->saved_err & (INT_FATAL_ERRORS | UFSHCD_UIC_HIBERN8_MASK));
-}
-
-/* host lock must be held before calling this func */
-static inline void ufshcd_schedule_eh_work(struct ufs_hba *hba)
-{
-	/* handle fatal errors only when link is not in error state */
-	if (hba->ufshcd_state != UFSHCD_STATE_ERROR) {
-		if (hba->force_reset || ufshcd_is_link_broken(hba) ||
-		    ufshcd_is_saved_err_fatal(hba))
-			hba->ufshcd_state = UFSHCD_STATE_EH_SCHEDULED_FATAL;
-		else
-			hba->ufshcd_state = UFSHCD_STATE_EH_SCHEDULED_NON_FATAL;
-		queue_work(hba->eh_wq, &hba->eh_work);
-	}
-}
-
 static void ufshcd_clk_scaling_allow(struct ufs_hba *hba, bool allow)
 {
 	down_write(&hba->clk_scaling_lock);
@@ -6028,11 +6044,11 @@ static bool ufshcd_is_pwr_mode_restore_needed(struct ufs_hba *hba)
 
 /**
  * ufshcd_err_handler - handle UFS errors that require s/w attention
- * @work: pointer to work structure
+ * @host: SCSI host pointer
  */
-static void ufshcd_err_handler(struct work_struct *work)
+static void ufshcd_err_handler(struct Scsi_Host *host)
 {
-	struct ufs_hba *hba;
+	struct ufs_hba *hba = shost_priv(host);
 	unsigned long flags;
 	bool err_xfer = false;
 	bool err_tm = false;
@@ -6040,10 +6056,9 @@ static void ufshcd_err_handler(struct work_struct *work)
 	int tag;
 	bool needs_reset = false, needs_restore = false;
 
-	hba = container_of(work, struct ufs_hba, eh_work);
-
 	down(&hba->host_sem);
 	spin_lock_irqsave(hba->host->host_lock, flags);
+	hba->host->host_eh_scheduled = 0;
 	if (ufshcd_err_handling_should_stop(hba)) {
 		if (hba->ufshcd_state != UFSHCD_STATE_ERROR)
 			hba->ufshcd_state = UFSHCD_STATE_OPERATIONAL;
@@ -6141,8 +6156,7 @@ static void ufshcd_err_handler(struct work_struct *work)
 	}
 
 lock_skip_pending_xfer_clear:
-	/* Complete the requests that are cleared by s/w */
-	ufshcd_complete_requests(hba);
+	ufshcd_retry_aborted_requests(hba);
 
 	spin_lock_irqsave(hba->host->host_lock, flags);
 	hba->silence_err_logs = false;
@@ -6357,7 +6371,6 @@ static irqreturn_t ufshcd_check_errors(struct ufs_hba *hba, u32 intr_status)
 					 "host_regs: ");
 			ufshcd_print_pwr_info(hba);
 		}
-		ufshcd_schedule_eh_work(hba);
 		retval |= IRQ_HANDLED;
 	}
 	/*
@@ -6369,6 +6382,10 @@ static irqreturn_t ufshcd_check_errors(struct ufs_hba *hba, u32 intr_status)
 	hba->errors = 0;
 	hba->uic_error = 0;
 	spin_unlock(hba->host->host_lock);
+
+	if (queue_eh_work)
+		ufshcd_schedule_eh(hba);
+
 	return retval;
 }
 
@@ -6440,7 +6457,7 @@ static irqreturn_t ufshcd_sl_intr(struct ufs_hba *hba, u32 intr_status)
 		retval |= ufshcd_tmc_handler(hba);
 
 	if (intr_status & UTP_TRANSFER_REQ_COMPL)
-		retval |= ufshcd_trc_handler(hba, ufshcd_has_utrlcnr(hba));
+		retval |= ufshcd_transfer_req_compl(hba, /*retry_requests=*/false);
 
 	return retval;
 }
@@ -6548,9 +6565,6 @@ static int __ufshcd_issue_tm_cmd(struct ufs_hba *hba,
 	/* send command to the controller */
 	__set_bit(task_tag, &hba->outstanding_tasks);
 
-	/* Make sure descriptors are ready before ringing the task doorbell */
-	wmb();
-
 	ufshcd_writel(hba, 1 << task_tag, REG_UTP_TASK_REQ_DOOR_BELL);
 	/* Make sure that doorbell is committed immediately */
 	wmb();
@@ -6663,11 +6677,11 @@ static int ufshcd_issue_devman_upiu_cmd(struct ufs_hba *hba,
 					enum query_opcode desc_op)
 {
 	struct request_queue *q = hba->cmd_queue;
+	DECLARE_COMPLETION_ONSTACK(wait);
 	struct request *req;
 	struct ufshcd_lrb *lrbp;
 	int err = 0;
 	int tag;
-	struct completion wait;
 	u8 upiu_flags;
 
 	down_read(&hba->clk_scaling_lock);
@@ -6678,14 +6692,13 @@ static int ufshcd_issue_devman_upiu_cmd(struct ufs_hba *hba,
 		goto out_unlock;
 	}
 	tag = req->tag;
-	WARN_ON_ONCE(!ufshcd_valid_tag(hba, tag));
+	WARN_ONCE(tag < 0, "Invalid tag %d\n", tag);
 
 	if (unlikely(test_bit(tag, &hba->outstanding_reqs))) {
 		err = -EBUSY;
 		goto out;
 	}
 
-	init_completion(&wait);
 	lrbp = &hba->lrb[tag];
 	WARN_ON(lrbp->cmd);
 	lrbp->cmd = NULL;
@@ -6723,8 +6736,6 @@ static int ufshcd_issue_devman_upiu_cmd(struct ufs_hba *hba,
 	hba->dev_cmd.complete = &wait;
 
 	ufshcd_add_query_upiu_trace(hba, UFS_QUERY_SEND, lrbp->ucd_req_ptr);
-	/* Make sure descriptors are ready before ringing the doorbell */
-	wmb();
 
 	ufshcd_send_command(hba, tag);
 	/*
@@ -6865,7 +6876,7 @@ static int ufshcd_eh_device_reset_handler(struct scsi_cmnd *cmd)
 			err = ufshcd_clear_cmd(hba, pos);
 			if (err)
 				break;
-			__ufshcd_transfer_req_compl(hba, pos);
+			__ufshcd_transfer_req_compl(hba, pos, /*retry_requests=*/true);
 		}
 	}
 
@@ -6981,33 +6992,24 @@ out:
  */
 static int ufshcd_abort(struct scsi_cmnd *cmd)
 {
-	struct Scsi_Host *host;
-	struct ufs_hba *hba;
+	struct Scsi_Host *host = cmd->device->host;
+	struct ufs_hba *hba = shost_priv(host);
+	int tag = scsi_cmd_to_rq(cmd)->tag;
+	struct ufshcd_lrb *lrbp = &hba->lrb[tag];
 	unsigned long flags;
-	unsigned int tag;
-	int err = 0;
-	struct ufshcd_lrb *lrbp;
+	int err = FAILED;
 	u32 reg;
 
-	host = cmd->device->host;
-	hba = shost_priv(host);
-	tag = cmd->request->tag;
-	lrbp = &hba->lrb[tag];
-	if (!ufshcd_valid_tag(hba, tag)) {
-		dev_err(hba->dev,
-			"%s: invalid command tag %d: cmd=0x%p, cmd->request=0x%p",
-			__func__, tag, cmd, cmd->request);
-		BUG();
-	}
+	WARN_ONCE(tag < 0, "Invalid tag %d\n", tag);
 
 	ufshcd_hold(hba, false);
 	reg = ufshcd_readl(hba, REG_UTP_TRANSFER_REQ_DOOR_BELL);
-	/* If command is already aborted/completed, return SUCCESS */
+	/* If command is already aborted/completed, return FAILED. */
 	if (!(test_bit(tag, &hba->outstanding_reqs))) {
 		dev_err(hba->dev,
 			"%s: cmd at tag %d already completed, outstanding=0x%lx, doorbell=0x%x\n",
 			__func__, tag, hba->outstanding_reqs, reg);
-		goto out;
+		goto release;
 	}
 
 	/* Print Transfer Request of aborted task */
@@ -7036,7 +7038,8 @@ static int ufshcd_abort(struct scsi_cmnd *cmd)
 		dev_err(hba->dev,
 		"%s: cmd was completed, but without a notifying intr, tag = %d",
 		__func__, tag);
-		goto cleanup;
+		__ufshcd_transfer_req_compl(hba, 1UL << tag, /*retry_requests=*/false);
+		goto release;
 	}
 
 	/*
@@ -7045,40 +7048,39 @@ static int ufshcd_abort(struct scsi_cmnd *cmd)
 	 * will be to send LU reset which, again, is a spec violation.
 	 * To avoid these unnecessary/illegal steps, first we clean up
 	 * the lrb taken by this cmd and re-set it in outstanding_reqs,
-	 * then queue the eh_work and bail.
+	 * then queue the error handler and bail.
 	 */
 	if (lrbp->lun == UFS_UPIU_UFS_DEVICE_WLUN) {
 		ufshcd_update_evt_hist(hba, UFS_EVT_ABORT, lrbp->lun);
-		__ufshcd_transfer_req_compl(hba, (1UL << tag));
-		set_bit(tag, &hba->outstanding_reqs);
+
 		spin_lock_irqsave(host->host_lock, flags);
 		hba->force_reset = true;
-		ufshcd_schedule_eh_work(hba);
 		spin_unlock_irqrestore(host->host_lock, flags);
-		goto out;
+
+		ufshcd_schedule_eh(hba);
+
+		goto release;
 	}
 
 	/* Skip task abort in case previous aborts failed and report failure */
-	if (lrbp->req_abort_skip)
-		err = -EIO;
-	else
-		err = ufshcd_try_to_abort_task(hba, tag);
+	if (lrbp->req_abort_skip) {
+		dev_err(hba->dev, "%s: skipping abort\n", __func__);
+		ufshcd_set_req_abort_skip(hba, hba->outstanding_reqs);
+		goto release;
+	}
 
-	if (!err) {
-cleanup:
-		__ufshcd_transfer_req_compl(hba, (1UL << tag));
-out:
-		err = SUCCESS;
-	} else {
+	err = ufshcd_try_to_abort_task(hba, tag);
+	if (err) {
 		dev_err(hba->dev, "%s: failed with err %d\n", __func__, err);
 		ufshcd_set_req_abort_skip(hba, hba->outstanding_reqs);
 		err = FAILED;
+		goto release;
 	}
 
-	/*
-	 * This ufshcd_release() corresponds to the original scsi cmd that got
-	 * aborted here (as we won't get any IRQ for it).
-	 */
+	err = SUCCESS;
+
+release:
+	/* Matches the ufshcd_hold() call at the start of this function. */
 	ufshcd_release(hba);
 	return err;
 }
@@ -7101,9 +7103,10 @@ static int ufshcd_host_reset_and_restore(struct ufs_hba *hba)
 	 * Stop the host controller and complete the requests
 	 * cleared by h/w
 	 */
+	ufshpb_reset_host(hba);
 	ufshcd_hba_stop(hba);
 	hba->silence_err_logs = true;
-	ufshcd_complete_requests(hba);
+	ufshcd_retry_aborted_requests(hba);
 	hba->silence_err_logs = false;
 
 	/* scale up clocks to max frequency before full reinitialization */
@@ -7188,11 +7191,10 @@ static int ufshcd_eh_host_reset_handler(struct scsi_cmnd *cmd)
 
 	spin_lock_irqsave(hba->host->host_lock, flags);
 	hba->force_reset = true;
-	ufshcd_schedule_eh_work(hba);
 	dev_err(hba->dev, "%s: reset in progress - 1\n", __func__);
 	spin_unlock_irqrestore(hba->host->host_lock, flags);
 
-	flush_work(&hba->eh_work);
+	ufshcd_err_handler(hba->host);
 
 	spin_lock_irqsave(hba->host->host_lock, flags);
 	if (hba->ufshcd_state == UFSHCD_STATE_ERROR)
@@ -7499,6 +7501,7 @@ static int ufs_get_device_desc(struct ufs_hba *hba)
 {
 	int err;
 	u8 model_index;
+	u8 b_ufs_feature_sup;
 	u8 *desc_buf;
 	struct ufs_dev_info *dev_info = &hba->dev_info;
 
@@ -7526,9 +7529,26 @@ static int ufs_get_device_desc(struct ufs_hba *hba)
 	/* getting Specification Version in big endian format */
 	dev_info->wspecversion = desc_buf[DEVICE_DESC_PARAM_SPEC_VER] << 8 |
 				      desc_buf[DEVICE_DESC_PARAM_SPEC_VER + 1];
+	b_ufs_feature_sup = desc_buf[DEVICE_DESC_PARAM_UFS_FEAT];
 
 	model_index = desc_buf[DEVICE_DESC_PARAM_PRDCT_NAME];
 
+	if (dev_info->wspecversion >= UFS_DEV_HPB_SUPPORT_VERSION &&
+	    (b_ufs_feature_sup & UFS_DEV_HPB_SUPPORT)) {
+		bool hpb_en = false;
+
+		ufshpb_get_dev_info(hba, desc_buf);
+
+		if (!ufshpb_is_legacy(hba))
+			err = ufshcd_query_flag_retry(hba,
+						      UPIU_QUERY_OPCODE_READ_FLAG,
+						      QUERY_FLAG_IDN_HPB_EN, 0,
+						      &hpb_en);
+
+		if (ufshpb_is_legacy(hba) || (!err && hpb_en))
+			dev_info->hpb_enabled = true;
+	}
+
 	err = ufshcd_read_string_desc(hba, model_index,
 				      &dev_info->model, SD_ASCII_STD);
 	if (err < 0) {
@@ -7760,6 +7780,10 @@ static int ufshcd_device_geo_params_init(struct ufs_hba *hba)
 	else if (desc_buf[GEOMETRY_DESC_PARAM_MAX_NUM_LUN] == 0)
 		hba->dev_info.max_lu_supported = 8;
 
+	if (hba->desc_size[QUERY_DESC_IDN_GEOMETRY] >=
+		GEOMETRY_DESC_PARAM_HPB_MAX_ACTIVE_REGS)
+		ufshpb_get_geo_info(hba, desc_buf);
+
 out:
 	kfree(desc_buf);
 	return err;
@@ -7902,6 +7926,7 @@ static int ufshcd_add_lus(struct ufs_hba *hba)
 	}
 
 	ufs_bsg_probe(hba);
+	ufshpb_init(hba);
 	scsi_scan_host(hba->host);
 	pm_runtime_put_sync(hba->dev);
 
@@ -7909,8 +7934,61 @@ out:
 	return ret;
 }
 
+static void ufshcd_request_sense_done(struct request *rq, blk_status_t error)
+{
+	if (error != BLK_STS_OK)
+		pr_err("%s: REQUEST SENSE failed (%d)\n", __func__, error);
+	kfree(rq->end_io_data);
+	blk_put_request(rq);
+}
+
 static int
-ufshcd_send_request_sense(struct ufs_hba *hba, struct scsi_device *sdp);
+ufshcd_request_sense_async(struct ufs_hba *hba, struct scsi_device *sdev)
+{
+	/*
+	 * Some UFS devices clear unit attention condition only if the sense
+	 * size used (UFS_SENSE_SIZE in this case) is non-zero.
+	 */
+	static const u8 cmd[6] = {REQUEST_SENSE, 0, 0, 0, UFS_SENSE_SIZE, 0};
+	struct scsi_request *rq;
+	struct request *req;
+	char *buffer;
+	int ret;
+
+	buffer = kzalloc(UFS_SENSE_SIZE, GFP_KERNEL);
+	if (!buffer)
+		return -ENOMEM;
+
+	req = blk_get_request(sdev->request_queue, REQ_OP_DRV_IN,
+			      /*flags=*/BLK_MQ_REQ_PM);
+	if (IS_ERR(req)) {
+		ret = PTR_ERR(req);
+		goto out_free;
+	}
+
+	ret = blk_rq_map_kern(sdev->request_queue, req,
+			      buffer, UFS_SENSE_SIZE, GFP_NOIO);
+	if (ret)
+		goto out_put;
+
+	rq = scsi_req(req);
+	rq->cmd_len = ARRAY_SIZE(cmd);
+	memcpy(rq->cmd, cmd, rq->cmd_len);
+	rq->retries = 3;
+	req->timeout = 1 * HZ;
+	req->rq_flags |= RQF_PM | RQF_QUIET;
+	req->end_io_data = buffer;
+
+	blk_execute_rq_nowait(/*bd_disk=*/NULL, req, /*at_head=*/true,
+			      ufshcd_request_sense_done);
+	return 0;
+
+out_put:
+	blk_put_request(req);
+out_free:
+	kfree(buffer);
+	return ret;
+}
 
 static int ufshcd_clear_ua_wlun(struct ufs_hba *hba, u8 wlun)
 {
@@ -7938,7 +8016,7 @@ static int ufshcd_clear_ua_wlun(struct ufs_hba *hba, u8 wlun)
 	if (ret)
 		goto out_err;
 
-	ret = ufshcd_send_request_sense(hba, sdp);
+	ret = ufshcd_request_sense_async(hba, sdp);
 	scsi_device_put(sdp);
 out_err:
 	if (ret)
@@ -7967,13 +8045,13 @@ out:
 }
 
 /**
- * ufshcd_probe_hba - probe hba to detect device and initialize
+ * ufshcd_probe_hba - probe hba to detect device and initialize it
  * @hba: per-adapter instance
- * @async: asynchronous execution or not
+ * @init_dev_params: whether or not to call ufshcd_device_params_init().
  *
  * Execute link-startup and verify device initialization
  */
-static int ufshcd_probe_hba(struct ufs_hba *hba, bool async)
+static int ufshcd_probe_hba(struct ufs_hba *hba, bool init_dev_params)
 {
 	int ret;
 	unsigned long flags;
@@ -8005,7 +8083,7 @@ static int ufshcd_probe_hba(struct ufs_hba *hba, bool async)
 	 * Initialize UFS device parameters used by driver, these
 	 * parameters are associated with UFS descriptors.
 	 */
-	if (async) {
+	if (init_dev_params) {
 		ret = ufshcd_device_params_init(hba);
 		if (ret)
 			goto out;
@@ -8050,6 +8128,7 @@ static int ufshcd_probe_hba(struct ufs_hba *hba, bool async)
 	/* Enable Auto-Hibernate if configured */
 	ufshcd_auto_hibern8_enable(hba);
 
+	ufshpb_reset(hba);
 out:
 	spin_lock_irqsave(hba->host->host_lock, flags);
 	if (ret)
@@ -8097,6 +8176,10 @@ out:
 static const struct attribute_group *ufshcd_driver_groups[] = {
 	&ufs_sysfs_unit_descriptor_group,
 	&ufs_sysfs_lun_attributes_group,
+#ifdef CONFIG_SCSI_UFS_HPB
+	&ufs_sysfs_hpb_stat_group,
+	&ufs_sysfs_hpb_param_group,
+#endif
 	NULL,
 };
 
@@ -8521,8 +8604,6 @@ static void ufshcd_hba_exit(struct ufs_hba *hba)
 	if (hba->is_powered) {
 		ufshcd_exit_clk_scaling(hba);
 		ufshcd_exit_clk_gating(hba);
-		if (hba->eh_wq)
-			destroy_workqueue(hba->eh_wq);
 		ufs_debugfs_hba_exit(hba);
 		ufshcd_variant_hba_exit(hba);
 		ufshcd_setup_vreg(hba, false);
@@ -8533,35 +8614,6 @@ static void ufshcd_hba_exit(struct ufs_hba *hba)
 	}
 }
 
-static int
-ufshcd_send_request_sense(struct ufs_hba *hba, struct scsi_device *sdp)
-{
-	unsigned char cmd[6] = {REQUEST_SENSE,
-				0,
-				0,
-				0,
-				UFS_SENSE_SIZE,
-				0};
-	char *buffer;
-	int ret;
-
-	buffer = kzalloc(UFS_SENSE_SIZE, GFP_KERNEL);
-	if (!buffer) {
-		ret = -ENOMEM;
-		goto out;
-	}
-
-	ret = scsi_execute(sdp, cmd, DMA_FROM_DEVICE, buffer,
-			UFS_SENSE_SIZE, NULL, NULL,
-			msecs_to_jiffies(1000), 3, 0, RQF_PM, NULL);
-	if (ret)
-		pr_err("%s: failed with err %d\n", __func__, ret);
-
-	kfree(buffer);
-out:
-	return ret;
-}
-
 /**
  * ufshcd_set_dev_pwr_mode - sends START STOP UNIT command to set device
  *			     power mode
@@ -8739,6 +8791,7 @@ static void ufshcd_vreg_set_lpm(struct ufs_hba *hba)
 		usleep_range(5000, 5100);
 }
 
+#ifdef CONFIG_PM
 static int ufshcd_vreg_set_hpm(struct ufs_hba *hba)
 {
 	int ret = 0;
@@ -8766,6 +8819,7 @@ vcc_disable:
 out:
 	return ret;
 }
+#endif /* CONFIG_PM */
 
 static void ufshcd_hba_vreg_set_lpm(struct ufs_hba *hba)
 {
@@ -8798,6 +8852,8 @@ static int __ufshcd_wl_suspend(struct ufs_hba *hba, enum ufs_pm_op pm_op)
 		req_link_state = UIC_LINK_OFF_STATE;
 	}
 
+	ufshpb_suspend(hba);
+
 	/*
 	 * If we can't transition into any of the low power modes
 	 * just gate the clocks.
@@ -8921,6 +8977,7 @@ out:
 		ufshcd_update_evt_hist(hba, UFS_EVT_WL_SUSP_ERR, (u32)ret);
 		hba->clk_gating.is_suspended = false;
 		ufshcd_release(hba);
+		ufshpb_resume(hba);
 	}
 	hba->pm_op_in_progress = false;
 	return ret;
@@ -8999,6 +9056,8 @@ static int __ufshcd_wl_resume(struct ufs_hba *hba, enum ufs_pm_op pm_op)
 
 	/* Enable Auto-Hibernate if configured */
 	ufshcd_auto_hibern8_enable(hba);
+
+	ufshpb_resume(hba);
 	goto out;
 
 set_old_link_state:
@@ -9168,6 +9227,7 @@ static int ufshcd_suspend(struct ufs_hba *hba)
 	return ret;
 }
 
+#ifdef CONFIG_PM
 /**
  * ufshcd_resume - helper function for resume operations
  * @hba: per adapter instance
@@ -9205,17 +9265,21 @@ out:
 		ufshcd_update_evt_hist(hba, UFS_EVT_RESUME_ERR, (u32)ret);
 	return ret;
 }
+#endif /* CONFIG_PM */
 
+#ifdef CONFIG_PM_SLEEP
 /**
- * ufshcd_system_suspend - system suspend routine
- * @hba: per adapter instance
+ * ufshcd_system_suspend - system suspend callback
+ * @dev: Device associated with the UFS controller.
  *
- * Check the description of ufshcd_suspend() function for more details.
+ * Executed before putting the system into a sleep state in which the contents
+ * of main memory are preserved.
  *
  * Returns 0 for success and non-zero for failure
  */
-int ufshcd_system_suspend(struct ufs_hba *hba)
+int ufshcd_system_suspend(struct device *dev)
 {
+	struct ufs_hba *hba = dev_get_drvdata(dev);
 	int ret = 0;
 	ktime_t start = ktime_get();
 
@@ -9232,16 +9296,19 @@ out:
 EXPORT_SYMBOL(ufshcd_system_suspend);
 
 /**
- * ufshcd_system_resume - system resume routine
- * @hba: per adapter instance
+ * ufshcd_system_resume - system resume callback
+ * @dev: Device associated with the UFS controller.
+ *
+ * Executed after waking the system up from a sleep state in which the contents
+ * of main memory were preserved.
  *
  * Returns 0 for success and non-zero for failure
  */
-
-int ufshcd_system_resume(struct ufs_hba *hba)
+int ufshcd_system_resume(struct device *dev)
 {
-	int ret = 0;
+	struct ufs_hba *hba = dev_get_drvdata(dev);
 	ktime_t start = ktime_get();
+	int ret = 0;
 
 	if (pm_runtime_suspended(hba->dev))
 		goto out;
@@ -9256,17 +9323,20 @@ out:
 	return ret;
 }
 EXPORT_SYMBOL(ufshcd_system_resume);
+#endif /* CONFIG_PM_SLEEP */
 
+#ifdef CONFIG_PM
 /**
- * ufshcd_runtime_suspend - runtime suspend routine
- * @hba: per adapter instance
+ * ufshcd_runtime_suspend - runtime suspend callback
+ * @dev: Device associated with the UFS controller.
  *
  * Check the description of ufshcd_suspend() function for more details.
  *
  * Returns 0 for success and non-zero for failure
  */
-int ufshcd_runtime_suspend(struct ufs_hba *hba)
+int ufshcd_runtime_suspend(struct device *dev)
 {
+	struct ufs_hba *hba = dev_get_drvdata(dev);
 	int ret;
 	ktime_t start = ktime_get();
 
@@ -9281,7 +9351,7 @@ EXPORT_SYMBOL(ufshcd_runtime_suspend);
 
 /**
  * ufshcd_runtime_resume - runtime resume routine
- * @hba: per adapter instance
+ * @dev: Device associated with the UFS controller.
  *
  * This function basically brings controller
  * to active state. Following operations are done in this function:
@@ -9289,8 +9359,9 @@ EXPORT_SYMBOL(ufshcd_runtime_suspend);
  * 1. Turn on all the controller related clocks
  * 2. Turn ON VCC rail
  */
-int ufshcd_runtime_resume(struct ufs_hba *hba)
+int ufshcd_runtime_resume(struct device *dev)
 {
+	struct ufs_hba *hba = dev_get_drvdata(dev);
 	int ret;
 	ktime_t start = ktime_get();
 
@@ -9302,12 +9373,7 @@ int ufshcd_runtime_resume(struct ufs_hba *hba)
 	return ret;
 }
 EXPORT_SYMBOL(ufshcd_runtime_resume);
-
-int ufshcd_runtime_idle(struct ufs_hba *hba)
-{
-	return 0;
-}
-EXPORT_SYMBOL(ufshcd_runtime_idle);
+#endif /* CONFIG_PM */
 
 /**
  * ufshcd_shutdown - shutdown routine
@@ -9343,6 +9409,7 @@ void ufshcd_remove(struct ufs_hba *hba)
 	if (hba->sdev_ufs_device)
 		ufshcd_rpm_get_sync(hba);
 	ufs_bsg_remove(hba);
+	ufshpb_remove(hba);
 	ufs_sysfs_remove_nodes(hba->dev);
 	blk_cleanup_queue(hba->tmf_queue);
 	blk_mq_free_tag_set(&hba->tmf_tag_set);
@@ -9381,6 +9448,10 @@ static int ufshcd_set_dma_mask(struct ufs_hba *hba)
 	return dma_set_mask_and_coherent(hba->dev, DMA_BIT_MASK(32));
 }
 
+static struct scsi_transport_template ufshcd_transport_template = {
+	.eh_strategy_handler = ufshcd_err_handler,
+};
+
 /**
  * ufshcd_alloc_host - allocate Host Bus Adapter (HBA)
  * @dev: pointer to device handle
@@ -9407,13 +9478,15 @@ int ufshcd_alloc_host(struct device *dev, struct ufs_hba **hba_handle)
 		err = -ENOMEM;
 		goto out_error;
 	}
+	host->transportt = &ufshcd_transport_template;
 	hba = shost_priv(host);
 	hba->host = host;
 	hba->dev = dev;
-	*hba_handle = hba;
 	hba->dev_ref_clk_freq = REF_CLK_FREQ_INVAL;
-
 	INIT_LIST_HEAD(&hba->clk_list_head);
+	spin_lock_init(&hba->outstanding_lock);
+
+	*hba_handle = hba;
 
 out_error:
 	return err;
@@ -9444,7 +9517,6 @@ int ufshcd_init(struct ufs_hba *hba, void __iomem *mmio_base, unsigned int irq)
 	int err;
 	struct Scsi_Host *host = hba->host;
 	struct device *dev = hba->dev;
-	char eh_wq_name[sizeof("ufs_eh_wq_00")];
 
 	if (!mmio_base) {
 		dev_err(hba->dev,
@@ -9498,17 +9570,6 @@ int ufshcd_init(struct ufs_hba *hba, void __iomem *mmio_base, unsigned int irq)
 
 	hba->max_pwr_info.is_valid = false;
 
-	/* Initialize work queues */
-	snprintf(eh_wq_name, sizeof(eh_wq_name), "ufs_eh_wq_%d",
-		 hba->host->host_no);
-	hba->eh_wq = create_singlethread_workqueue(eh_wq_name);
-	if (!hba->eh_wq) {
-		dev_err(hba->dev, "%s: failed to create eh workqueue\n",
-				__func__);
-		err = -ENOMEM;
-		goto out_disable;
-	}
-	INIT_WORK(&hba->eh_work, ufshcd_err_handler);
 	INIT_WORK(&hba->eeh_work, ufshcd_exception_event_handler);
 
 	sema_init(&hba->host_sem, 1);
@@ -9627,6 +9688,7 @@ int ufshcd_init(struct ufs_hba *hba, void __iomem *mmio_base, unsigned int irq)
 	async_schedule(ufshcd_async_scan, hba);
 	ufs_sysfs_add_nodes(hba->dev);
 
+	device_enable_async_suspend(dev);
 	return 0;
 
 free_tmf_queue:
diff --git a/drivers/scsi/ufs/ufshcd.h b/drivers/scsi/ufs/ufshcd.h
index 194755c9ddfe..52ea6f350b18 100644
--- a/drivers/scsi/ufs/ufshcd.h
+++ b/drivers/scsi/ufs/ufshcd.h
@@ -476,6 +476,27 @@ struct ufs_stats {
 	struct ufs_event_hist event[UFS_EVT_CNT];
 };
 
+/**
+ * enum ufshcd_state - UFS host controller state
+ * @UFSHCD_STATE_RESET: Link is not operational. Postpone SCSI command
+ *	processing.
+ * @UFSHCD_STATE_OPERATIONAL: The host controller is operational and can process
+ *	SCSI commands.
+ * @UFSHCD_STATE_EH_SCHEDULED_NON_FATAL: The error handler has been scheduled.
+ *	SCSI commands may be submitted to the controller.
+ * @UFSHCD_STATE_EH_SCHEDULED_FATAL: The error handler has been scheduled. Fail
+ *	newly submitted SCSI commands with error code DID_BAD_TARGET.
+ * @UFSHCD_STATE_ERROR: An unrecoverable error occurred, e.g. link recovery
+ *	failed. Fail all SCSI commands with error code DID_ERROR.
+ */
+enum ufshcd_state {
+	UFSHCD_STATE_RESET,
+	UFSHCD_STATE_OPERATIONAL,
+	UFSHCD_STATE_EH_SCHEDULED_NON_FATAL,
+	UFSHCD_STATE_EH_SCHEDULED_FATAL,
+	UFSHCD_STATE_ERROR,
+};
+
 enum ufshcd_quirks {
 	/* Interrupt aggregation support is broken */
 	UFSHCD_QUIRK_BROKEN_INTR_AGGR			= 1 << 0,
@@ -641,6 +662,31 @@ struct ufs_hba_variant_params {
 	u32 wb_flush_threshold;
 };
 
+#ifdef CONFIG_SCSI_UFS_HPB
+/**
+ * struct ufshpb_dev_info - UFSHPB device related info
+ * @num_lu: the number of user logical unit to check whether all lu finished
+ *          initialization
+ * @rgn_size: device reported HPB region size
+ * @srgn_size: device reported HPB sub-region size
+ * @slave_conf_cnt: counter to check all lu finished initialization
+ * @hpb_disabled: flag to check if HPB is disabled
+ * @max_hpb_single_cmd: device reported bMAX_DATA_SIZE_FOR_SINGLE_CMD value
+ * @is_legacy: flag to check HPB 1.0
+ * @control_mode: either host or device
+ */
+struct ufshpb_dev_info {
+	int num_lu;
+	int rgn_size;
+	int srgn_size;
+	atomic_t slave_conf_cnt;
+	bool hpb_disabled;
+	u8 max_hpb_single_cmd;
+	bool is_legacy;
+	u8 control_mode;
+};
+#endif
+
 struct ufs_hba_monitor {
 	unsigned long chunk_size;
 
@@ -674,6 +720,7 @@ struct ufs_hba_monitor {
  * @lrb: local reference block
  * @cmd_queue: Used to allocate command tags from hba->host->tag_set.
  * @outstanding_tasks: Bits representing outstanding task requests
+ * @outstanding_lock: Protects @outstanding_reqs.
  * @outstanding_reqs: Bits representing outstanding transfer requests
  * @capabilities: UFS Controller Capabilities
  * @nutrs: Transfer Request Queue depth supported by controller
@@ -683,19 +730,17 @@ struct ufs_hba_monitor {
  * @priv: pointer to variant specific private data
  * @irq: Irq number of the controller
  * @active_uic_cmd: handle of active UIC command
- * @uic_cmd_mutex: mutex for uic command
+ * @uic_cmd_mutex: mutex for UIC command
  * @tmf_tag_set: TMF tag set.
  * @tmf_queue: Used to allocate TMF tags.
  * @pwr_done: completion for power mode change
- * @ufshcd_state: UFSHCD states
+ * @ufshcd_state: UFSHCD state
  * @eh_flags: Error handling flags
  * @intr_mask: Interrupt Mask Bits
  * @ee_ctrl_mask: Exception event control mask
  * @is_powered: flag to check if HBA is powered
  * @shutting_down: flag to check if shutdown has been invoked
  * @host_sem: semaphore used to serialize concurrent contexts
- * @eh_wq: Workqueue that eh_work works on
- * @eh_work: Worker to handle UFS errors that require s/w attention
  * @eeh_work: Worker to handle exception events
  * @errors: HBA errors
  * @uic_error: UFS interconnect layer error status
@@ -760,6 +805,7 @@ struct ufs_hba {
 	struct ufshcd_lrb *lrb;
 
 	unsigned long outstanding_tasks;
+	spinlock_t outstanding_lock;
 	unsigned long outstanding_reqs;
 
 	u32 capabilities;
@@ -785,7 +831,7 @@ struct ufs_hba {
 	struct mutex uic_cmd_mutex;
 	struct completion *uic_async_done;
 
-	u32 ufshcd_state;
+	enum ufshcd_state ufshcd_state;
 	u32 eh_flags;
 	u32 intr_mask;
 	u16 ee_ctrl_mask; /* Exception event mask */
@@ -797,8 +843,6 @@ struct ufs_hba {
 	struct semaphore host_sem;
 
 	/* Work Queues */
-	struct workqueue_struct *eh_wq;
-	struct work_struct eh_work;
 	struct work_struct eeh_work;
 
 	/* HBA Errors */
@@ -851,6 +895,10 @@ struct ufs_hba {
 	struct request_queue	*bsg_queue;
 	struct delayed_work rpm_dev_flush_recheck_work;
 
+#ifdef CONFIG_SCSI_UFS_HPB
+	struct ufshpb_dev_info ufshpb_dev;
+#endif
+
 	struct ufs_hba_monitor	monitor;
 
 #ifdef CONFIG_SCSI_UFS_CRYPTO
@@ -893,16 +941,8 @@ static inline bool ufshcd_is_rpm_autosuspend_allowed(struct ufs_hba *hba)
 
 static inline bool ufshcd_is_intr_aggr_allowed(struct ufs_hba *hba)
 {
-/* DWC UFS Core has the Interrupt aggregation feature but is not detectable*/
-#ifndef CONFIG_SCSI_UFS_DWC
-	if ((hba->caps & UFSHCD_CAP_INTR_AGGR) &&
-	    !(hba->quirks & UFSHCD_QUIRK_BROKEN_INTR_AGGR))
-		return true;
-	else
-		return false;
-#else
-return true;
-#endif
+	return (hba->caps & UFSHCD_CAP_INTR_AGGR) &&
+		!(hba->quirks & UFSHCD_QUIRK_BROKEN_INTR_AGGR);
 }
 
 static inline bool ufshcd_can_aggressive_pc(struct ufs_hba *hba)
@@ -1009,11 +1049,14 @@ static inline u8 ufshcd_wb_get_query_index(struct ufs_hba *hba)
 	return 0;
 }
 
-extern int ufshcd_runtime_suspend(struct ufs_hba *hba);
-extern int ufshcd_runtime_resume(struct ufs_hba *hba);
-extern int ufshcd_runtime_idle(struct ufs_hba *hba);
-extern int ufshcd_system_suspend(struct ufs_hba *hba);
-extern int ufshcd_system_resume(struct ufs_hba *hba);
+#ifdef CONFIG_PM
+extern int ufshcd_runtime_suspend(struct device *dev);
+extern int ufshcd_runtime_resume(struct device *dev);
+#endif
+#ifdef CONFIG_PM_SLEEP
+extern int ufshcd_system_suspend(struct device *dev);
+extern int ufshcd_system_resume(struct device *dev);
+#endif
 extern int ufshcd_shutdown(struct ufs_hba *hba);
 extern int ufshcd_dme_configure_adapt(struct ufs_hba *hba,
 				      int agreed_gear,
@@ -1096,6 +1139,9 @@ int ufshcd_read_desc_param(struct ufs_hba *hba,
 			   u8 param_offset,
 			   u8 *param_read_buf,
 			   u8 param_size);
+int ufshcd_query_attr_retry(struct ufs_hba *hba, enum query_opcode opcode,
+			    enum attr_idn idn, u8 index, u8 selector,
+			    u32 *attr_val);
 int ufshcd_query_attr(struct ufs_hba *hba, enum query_opcode opcode,
 		      enum attr_idn idn, u8 index, u8 selector, u32 *attr_val);
 int ufshcd_query_flag(struct ufs_hba *hba, enum query_opcode opcode,
@@ -1160,11 +1206,6 @@ static inline u32 ufshcd_vops_get_ufs_hci_version(struct ufs_hba *hba)
 	return ufshcd_readl(hba, REG_UFS_VERSION);
 }
 
-static inline bool ufshcd_has_utrlcnr(struct ufs_hba *hba)
-{
-	return (hba->ufs_version >= ufshci_version(3, 0));
-}
-
 static inline int ufshcd_vops_clk_scale_notify(struct ufs_hba *hba,
 			bool up, enum ufs_notify_change_status status)
 {
@@ -1226,18 +1267,6 @@ static inline int ufshcd_vops_pwr_change_notify(struct ufs_hba *hba,
 	return -ENOTSUPP;
 }
 
-static inline void ufshcd_vops_setup_xfer_req(struct ufs_hba *hba, int tag,
-					bool is_scsi_cmd)
-{
-	if (hba->vops && hba->vops->setup_xfer_req) {
-		unsigned long flags;
-
-		spin_lock_irqsave(hba->host->host_lock, flags);
-		hba->vops->setup_xfer_req(hba, tag, is_scsi_cmd);
-		spin_unlock_irqrestore(hba->host->host_lock, flags);
-	}
-}
-
 static inline void ufshcd_vops_setup_task_mgmt(struct ufs_hba *hba,
 					int tag, u8 tm_function)
 {
diff --git a/drivers/scsi/ufs/ufshci.h b/drivers/scsi/ufs/ufshci.h
index 5affb1fce5ad..de95be5d11d4 100644
--- a/drivers/scsi/ufs/ufshci.h
+++ b/drivers/scsi/ufs/ufshci.h
@@ -39,7 +39,6 @@ enum {
 	REG_UTP_TRANSFER_REQ_DOOR_BELL		= 0x58,
 	REG_UTP_TRANSFER_REQ_LIST_CLEAR		= 0x5C,
 	REG_UTP_TRANSFER_REQ_LIST_RUN_STOP	= 0x60,
-	REG_UTP_TRANSFER_REQ_LIST_COMPL		= 0x64,
 	REG_UTP_TASK_REQ_LIST_BASE_L		= 0x70,
 	REG_UTP_TASK_REQ_LIST_BASE_H		= 0x74,
 	REG_UTP_TASK_REQ_DOOR_BELL		= 0x78,
diff --git a/drivers/scsi/ufs/ufshpb.c b/drivers/scsi/ufs/ufshpb.c
new file mode 100644
index 000000000000..02fb51ae8b25
--- /dev/null
+++ b/drivers/scsi/ufs/ufshpb.c
@@ -0,0 +1,2933 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Universal Flash Storage Host Performance Booster
+ *
+ * Copyright (C) 2017-2021 Samsung Electronics Co., Ltd.
+ *
+ * Authors:
+ *	Yongmyung Lee <ymhungry.lee@samsung.com>
+ *	Jinyoung Choi <j-young.choi@samsung.com>
+ */
+
+#include <asm/unaligned.h>
+#include <linux/async.h>
+
+#include "ufshcd.h"
+#include "ufshpb.h"
+#include "../sd.h"
+
+#define ACTIVATION_THRESHOLD 8 /* 8 IOs */
+#define READ_TO_MS 1000
+#define READ_TO_EXPIRIES 100
+#define POLLING_INTERVAL_MS 200
+#define THROTTLE_MAP_REQ_DEFAULT 1
+
+/* memory management */
+static struct kmem_cache *ufshpb_mctx_cache;
+static mempool_t *ufshpb_mctx_pool;
+static mempool_t *ufshpb_page_pool;
+/* A cache size of 2MB can cache ppn in the 1GB range. */
+static unsigned int ufshpb_host_map_kbytes = 2048;
+static int tot_active_srgn_pages;
+
+static struct workqueue_struct *ufshpb_wq;
+
+static void ufshpb_update_active_info(struct ufshpb_lu *hpb, int rgn_idx,
+				      int srgn_idx);
+
+bool ufshpb_is_allowed(struct ufs_hba *hba)
+{
+	return !(hba->ufshpb_dev.hpb_disabled);
+}
+
+/* HPB version 1.0 is called as legacy version. */
+bool ufshpb_is_legacy(struct ufs_hba *hba)
+{
+	return hba->ufshpb_dev.is_legacy;
+}
+
+static struct ufshpb_lu *ufshpb_get_hpb_data(struct scsi_device *sdev)
+{
+	return sdev->hostdata;
+}
+
+static int ufshpb_get_state(struct ufshpb_lu *hpb)
+{
+	return atomic_read(&hpb->hpb_state);
+}
+
+static void ufshpb_set_state(struct ufshpb_lu *hpb, int state)
+{
+	atomic_set(&hpb->hpb_state, state);
+}
+
+static int ufshpb_is_valid_srgn(struct ufshpb_region *rgn,
+				struct ufshpb_subregion *srgn)
+{
+	return rgn->rgn_state != HPB_RGN_INACTIVE &&
+		srgn->srgn_state == HPB_SRGN_VALID;
+}
+
+static bool ufshpb_is_read_cmd(struct scsi_cmnd *cmd)
+{
+	return req_op(scsi_cmd_to_rq(cmd)) == REQ_OP_READ;
+}
+
+static bool ufshpb_is_write_or_discard(struct scsi_cmnd *cmd)
+{
+	return op_is_write(req_op(scsi_cmd_to_rq(cmd))) ||
+	       op_is_discard(req_op(scsi_cmd_to_rq(cmd)));
+}
+
+static bool ufshpb_is_supported_chunk(struct ufshpb_lu *hpb, int transfer_len)
+{
+	return transfer_len <= hpb->pre_req_max_tr_len;
+}
+
+/*
+ * In this driver, WRITE_BUFFER CMD support 36KB (len=9) ~ 1MB (len=256) as
+ * default. It is possible to change range of transfer_len through sysfs.
+ */
+static inline bool ufshpb_is_required_wb(struct ufshpb_lu *hpb, int len)
+{
+	return len > hpb->pre_req_min_tr_len &&
+	       len <= hpb->pre_req_max_tr_len;
+}
+
+static bool ufshpb_is_general_lun(int lun)
+{
+	return lun < UFS_UPIU_MAX_UNIT_NUM_ID;
+}
+
+static bool ufshpb_is_pinned_region(struct ufshpb_lu *hpb, int rgn_idx)
+{
+	if (hpb->lu_pinned_end != PINNED_NOT_SET &&
+	    rgn_idx >= hpb->lu_pinned_start &&
+	    rgn_idx <= hpb->lu_pinned_end)
+		return true;
+
+	return false;
+}
+
+static void ufshpb_kick_map_work(struct ufshpb_lu *hpb)
+{
+	bool ret = false;
+	unsigned long flags;
+
+	if (ufshpb_get_state(hpb) != HPB_PRESENT)
+		return;
+
+	spin_lock_irqsave(&hpb->rsp_list_lock, flags);
+	if (!list_empty(&hpb->lh_inact_rgn) || !list_empty(&hpb->lh_act_srgn))
+		ret = true;
+	spin_unlock_irqrestore(&hpb->rsp_list_lock, flags);
+
+	if (ret)
+		queue_work(ufshpb_wq, &hpb->map_work);
+}
+
+static bool ufshpb_is_hpb_rsp_valid(struct ufs_hba *hba,
+				    struct ufshcd_lrb *lrbp,
+				    struct utp_hpb_rsp *rsp_field)
+{
+	/* Check HPB_UPDATE_ALERT */
+	if (!(lrbp->ucd_rsp_ptr->header.dword_2 &
+	      UPIU_HEADER_DWORD(0, 2, 0, 0)))
+		return false;
+
+	if (be16_to_cpu(rsp_field->sense_data_len) != DEV_SENSE_SEG_LEN ||
+	    rsp_field->desc_type != DEV_DES_TYPE ||
+	    rsp_field->additional_len != DEV_ADDITIONAL_LEN ||
+	    rsp_field->active_rgn_cnt > MAX_ACTIVE_NUM ||
+	    rsp_field->inactive_rgn_cnt > MAX_INACTIVE_NUM ||
+	    rsp_field->hpb_op == HPB_RSP_NONE ||
+	    (rsp_field->hpb_op == HPB_RSP_REQ_REGION_UPDATE &&
+	     !rsp_field->active_rgn_cnt && !rsp_field->inactive_rgn_cnt))
+		return false;
+
+	if (!ufshpb_is_general_lun(rsp_field->lun)) {
+		dev_warn(hba->dev, "ufshpb: lun(%d) not supported\n",
+			 lrbp->lun);
+		return false;
+	}
+
+	return true;
+}
+
+static void ufshpb_iterate_rgn(struct ufshpb_lu *hpb, int rgn_idx, int srgn_idx,
+			       int srgn_offset, int cnt, bool set_dirty)
+{
+	struct ufshpb_region *rgn;
+	struct ufshpb_subregion *srgn, *prev_srgn = NULL;
+	int set_bit_len;
+	int bitmap_len;
+	unsigned long flags;
+
+next_srgn:
+	rgn = hpb->rgn_tbl + rgn_idx;
+	srgn = rgn->srgn_tbl + srgn_idx;
+
+	if (likely(!srgn->is_last))
+		bitmap_len = hpb->entries_per_srgn;
+	else
+		bitmap_len = hpb->last_srgn_entries;
+
+	if ((srgn_offset + cnt) > bitmap_len)
+		set_bit_len = bitmap_len - srgn_offset;
+	else
+		set_bit_len = cnt;
+
+	spin_lock_irqsave(&hpb->rgn_state_lock, flags);
+	if (rgn->rgn_state != HPB_RGN_INACTIVE) {
+		if (set_dirty) {
+			if (srgn->srgn_state == HPB_SRGN_VALID)
+				bitmap_set(srgn->mctx->ppn_dirty, srgn_offset,
+					   set_bit_len);
+		} else if (hpb->is_hcm) {
+			 /* rewind the read timer for lru regions */
+			rgn->read_timeout = ktime_add_ms(ktime_get(),
+					rgn->hpb->params.read_timeout_ms);
+			rgn->read_timeout_expiries =
+				rgn->hpb->params.read_timeout_expiries;
+		}
+	}
+	spin_unlock_irqrestore(&hpb->rgn_state_lock, flags);
+
+	if (hpb->is_hcm && prev_srgn != srgn) {
+		bool activate = false;
+
+		spin_lock(&rgn->rgn_lock);
+		if (set_dirty) {
+			rgn->reads -= srgn->reads;
+			srgn->reads = 0;
+			set_bit(RGN_FLAG_DIRTY, &rgn->rgn_flags);
+		} else {
+			srgn->reads++;
+			rgn->reads++;
+			if (srgn->reads == hpb->params.activation_thld)
+				activate = true;
+		}
+		spin_unlock(&rgn->rgn_lock);
+
+		if (activate ||
+		    test_and_clear_bit(RGN_FLAG_UPDATE, &rgn->rgn_flags)) {
+			spin_lock_irqsave(&hpb->rsp_list_lock, flags);
+			ufshpb_update_active_info(hpb, rgn_idx, srgn_idx);
+			spin_unlock_irqrestore(&hpb->rsp_list_lock, flags);
+			dev_dbg(&hpb->sdev_ufs_lu->sdev_dev,
+				"activate region %d-%d\n", rgn_idx, srgn_idx);
+		}
+
+		prev_srgn = srgn;
+	}
+
+	srgn_offset = 0;
+	if (++srgn_idx == hpb->srgns_per_rgn) {
+		srgn_idx = 0;
+		rgn_idx++;
+	}
+
+	cnt -= set_bit_len;
+	if (cnt > 0)
+		goto next_srgn;
+}
+
+static bool ufshpb_test_ppn_dirty(struct ufshpb_lu *hpb, int rgn_idx,
+				  int srgn_idx, int srgn_offset, int cnt)
+{
+	struct ufshpb_region *rgn;
+	struct ufshpb_subregion *srgn;
+	int bitmap_len;
+	int bit_len;
+
+next_srgn:
+	rgn = hpb->rgn_tbl + rgn_idx;
+	srgn = rgn->srgn_tbl + srgn_idx;
+
+	if (likely(!srgn->is_last))
+		bitmap_len = hpb->entries_per_srgn;
+	else
+		bitmap_len = hpb->last_srgn_entries;
+
+	if (!ufshpb_is_valid_srgn(rgn, srgn))
+		return true;
+
+	/*
+	 * If the region state is active, mctx must be allocated.
+	 * In this case, check whether the region is evicted or
+	 * mctx allocation fail.
+	 */
+	if (unlikely(!srgn->mctx)) {
+		dev_err(&hpb->sdev_ufs_lu->sdev_dev,
+			"no mctx in region %d subregion %d.\n",
+			srgn->rgn_idx, srgn->srgn_idx);
+		return true;
+	}
+
+	if ((srgn_offset + cnt) > bitmap_len)
+		bit_len = bitmap_len - srgn_offset;
+	else
+		bit_len = cnt;
+
+	if (find_next_bit(srgn->mctx->ppn_dirty, bit_len + srgn_offset,
+			  srgn_offset) < bit_len + srgn_offset)
+		return true;
+
+	srgn_offset = 0;
+	if (++srgn_idx == hpb->srgns_per_rgn) {
+		srgn_idx = 0;
+		rgn_idx++;
+	}
+
+	cnt -= bit_len;
+	if (cnt > 0)
+		goto next_srgn;
+
+	return false;
+}
+
+static inline bool is_rgn_dirty(struct ufshpb_region *rgn)
+{
+	return test_bit(RGN_FLAG_DIRTY, &rgn->rgn_flags);
+}
+
+static int ufshpb_fill_ppn_from_page(struct ufshpb_lu *hpb,
+				     struct ufshpb_map_ctx *mctx, int pos,
+				     int len, __be64 *ppn_buf)
+{
+	struct page *page;
+	int index, offset;
+	int copied;
+
+	index = pos / (PAGE_SIZE / HPB_ENTRY_SIZE);
+	offset = pos % (PAGE_SIZE / HPB_ENTRY_SIZE);
+
+	if ((offset + len) <= (PAGE_SIZE / HPB_ENTRY_SIZE))
+		copied = len;
+	else
+		copied = (PAGE_SIZE / HPB_ENTRY_SIZE) - offset;
+
+	page = mctx->m_page[index];
+	if (unlikely(!page)) {
+		dev_err(&hpb->sdev_ufs_lu->sdev_dev,
+			"error. cannot find page in mctx\n");
+		return -ENOMEM;
+	}
+
+	memcpy(ppn_buf, page_address(page) + (offset * HPB_ENTRY_SIZE),
+	       copied * HPB_ENTRY_SIZE);
+
+	return copied;
+}
+
+static void
+ufshpb_get_pos_from_lpn(struct ufshpb_lu *hpb, unsigned long lpn, int *rgn_idx,
+			int *srgn_idx, int *offset)
+{
+	int rgn_offset;
+
+	*rgn_idx = lpn >> hpb->entries_per_rgn_shift;
+	rgn_offset = lpn & hpb->entries_per_rgn_mask;
+	*srgn_idx = rgn_offset >> hpb->entries_per_srgn_shift;
+	*offset = rgn_offset & hpb->entries_per_srgn_mask;
+}
+
+static void
+ufshpb_set_hpb_read_to_upiu(struct ufs_hba *hba, struct ufshpb_lu *hpb,
+			    struct ufshcd_lrb *lrbp, u32 lpn, __be64 ppn,
+			    u8 transfer_len, int read_id)
+{
+	unsigned char *cdb = lrbp->cmd->cmnd;
+	__be64 ppn_tmp = ppn;
+	cdb[0] = UFSHPB_READ;
+
+	if (hba->dev_quirks & UFS_DEVICE_QUIRK_SWAP_L2P_ENTRY_FOR_HPB_READ)
+		ppn_tmp = swab64(ppn);
+
+	/* ppn value is stored as big-endian in the host memory */
+	memcpy(&cdb[6], &ppn_tmp, sizeof(__be64));
+	cdb[14] = transfer_len;
+	cdb[15] = read_id;
+
+	lrbp->cmd->cmd_len = UFS_CDB_SIZE;
+}
+
+static inline void ufshpb_set_write_buf_cmd(unsigned char *cdb,
+					    unsigned long lpn, unsigned int len,
+					    int read_id)
+{
+	cdb[0] = UFSHPB_WRITE_BUFFER;
+	cdb[1] = UFSHPB_WRITE_BUFFER_PREFETCH_ID;
+
+	put_unaligned_be32(lpn, &cdb[2]);
+	cdb[6] = read_id;
+	put_unaligned_be16(len * HPB_ENTRY_SIZE, &cdb[7]);
+
+	cdb[9] = 0x00;	/* Control = 0x00 */
+}
+
+static struct ufshpb_req *ufshpb_get_pre_req(struct ufshpb_lu *hpb)
+{
+	struct ufshpb_req *pre_req;
+
+	if (hpb->num_inflight_pre_req >= hpb->throttle_pre_req) {
+		dev_info(&hpb->sdev_ufs_lu->sdev_dev,
+			 "pre_req throttle. inflight %d throttle %d",
+			 hpb->num_inflight_pre_req, hpb->throttle_pre_req);
+		return NULL;
+	}
+
+	pre_req = list_first_entry_or_null(&hpb->lh_pre_req_free,
+					   struct ufshpb_req, list_req);
+	if (!pre_req) {
+		dev_info(&hpb->sdev_ufs_lu->sdev_dev, "There is no pre_req");
+		return NULL;
+	}
+
+	list_del_init(&pre_req->list_req);
+	hpb->num_inflight_pre_req++;
+
+	return pre_req;
+}
+
+static inline void ufshpb_put_pre_req(struct ufshpb_lu *hpb,
+				      struct ufshpb_req *pre_req)
+{
+	pre_req->req = NULL;
+	bio_reset(pre_req->bio);
+	list_add_tail(&pre_req->list_req, &hpb->lh_pre_req_free);
+	hpb->num_inflight_pre_req--;
+}
+
+static void ufshpb_pre_req_compl_fn(struct request *req, blk_status_t error)
+{
+	struct ufshpb_req *pre_req = (struct ufshpb_req *)req->end_io_data;
+	struct ufshpb_lu *hpb = pre_req->hpb;
+	unsigned long flags;
+
+	if (error) {
+		struct scsi_cmnd *cmd = blk_mq_rq_to_pdu(req);
+		struct scsi_sense_hdr sshdr;
+
+		dev_err(&hpb->sdev_ufs_lu->sdev_dev, "block status %d", error);
+		scsi_command_normalize_sense(cmd, &sshdr);
+		dev_err(&hpb->sdev_ufs_lu->sdev_dev,
+			"code %x sense_key %x asc %x ascq %x",
+			sshdr.response_code,
+			sshdr.sense_key, sshdr.asc, sshdr.ascq);
+		dev_err(&hpb->sdev_ufs_lu->sdev_dev,
+			"byte4 %x byte5 %x byte6 %x additional_len %x",
+			sshdr.byte4, sshdr.byte5,
+			sshdr.byte6, sshdr.additional_length);
+	}
+
+	blk_mq_free_request(req);
+	spin_lock_irqsave(&hpb->rgn_state_lock, flags);
+	ufshpb_put_pre_req(pre_req->hpb, pre_req);
+	spin_unlock_irqrestore(&hpb->rgn_state_lock, flags);
+}
+
+static int ufshpb_prep_entry(struct ufshpb_req *pre_req, struct page *page)
+{
+	struct ufshpb_lu *hpb = pre_req->hpb;
+	struct ufshpb_region *rgn;
+	struct ufshpb_subregion *srgn;
+	__be64 *addr;
+	int offset = 0;
+	int copied;
+	unsigned long lpn = pre_req->wb.lpn;
+	int rgn_idx, srgn_idx, srgn_offset;
+	unsigned long flags;
+
+	addr = page_address(page);
+	ufshpb_get_pos_from_lpn(hpb, lpn, &rgn_idx, &srgn_idx, &srgn_offset);
+
+	spin_lock_irqsave(&hpb->rgn_state_lock, flags);
+
+next_offset:
+	rgn = hpb->rgn_tbl + rgn_idx;
+	srgn = rgn->srgn_tbl + srgn_idx;
+
+	if (!ufshpb_is_valid_srgn(rgn, srgn))
+		goto mctx_error;
+
+	if (!srgn->mctx)
+		goto mctx_error;
+
+	copied = ufshpb_fill_ppn_from_page(hpb, srgn->mctx, srgn_offset,
+					   pre_req->wb.len - offset,
+					   &addr[offset]);
+
+	if (copied < 0)
+		goto mctx_error;
+
+	offset += copied;
+	srgn_offset += copied;
+
+	if (srgn_offset == hpb->entries_per_srgn) {
+		srgn_offset = 0;
+
+		if (++srgn_idx == hpb->srgns_per_rgn) {
+			srgn_idx = 0;
+			rgn_idx++;
+		}
+	}
+
+	if (offset < pre_req->wb.len)
+		goto next_offset;
+
+	spin_unlock_irqrestore(&hpb->rgn_state_lock, flags);
+	return 0;
+mctx_error:
+	spin_unlock_irqrestore(&hpb->rgn_state_lock, flags);
+	return -ENOMEM;
+}
+
+static int ufshpb_pre_req_add_bio_page(struct ufshpb_lu *hpb,
+				       struct request_queue *q,
+				       struct ufshpb_req *pre_req)
+{
+	struct page *page = pre_req->wb.m_page;
+	struct bio *bio = pre_req->bio;
+	int entries_bytes, ret;
+
+	if (!page)
+		return -ENOMEM;
+
+	if (ufshpb_prep_entry(pre_req, page))
+		return -ENOMEM;
+
+	entries_bytes = pre_req->wb.len * sizeof(__be64);
+
+	ret = bio_add_pc_page(q, bio, page, entries_bytes, 0);
+	if (ret != entries_bytes) {
+		dev_err(&hpb->sdev_ufs_lu->sdev_dev,
+			"bio_add_pc_page fail: %d", ret);
+		return -ENOMEM;
+	}
+	return 0;
+}
+
+static inline int ufshpb_get_read_id(struct ufshpb_lu *hpb)
+{
+	if (++hpb->cur_read_id >= MAX_HPB_READ_ID)
+		hpb->cur_read_id = 1;
+	return hpb->cur_read_id;
+}
+
+static int ufshpb_execute_pre_req(struct ufshpb_lu *hpb, struct scsi_cmnd *cmd,
+				  struct ufshpb_req *pre_req, int read_id)
+{
+	struct scsi_device *sdev = cmd->device;
+	struct request_queue *q = sdev->request_queue;
+	struct request *req;
+	struct scsi_request *rq;
+	struct bio *bio = pre_req->bio;
+
+	pre_req->hpb = hpb;
+	pre_req->wb.lpn = sectors_to_logical(cmd->device,
+					     blk_rq_pos(scsi_cmd_to_rq(cmd)));
+	pre_req->wb.len = sectors_to_logical(cmd->device,
+					     blk_rq_sectors(scsi_cmd_to_rq(cmd)));
+	if (ufshpb_pre_req_add_bio_page(hpb, q, pre_req))
+		return -ENOMEM;
+
+	req = pre_req->req;
+
+	/* 1. request setup */
+	blk_rq_append_bio(req, bio);
+	req->rq_disk = NULL;
+	req->end_io_data = (void *)pre_req;
+	req->end_io = ufshpb_pre_req_compl_fn;
+
+	/* 2. scsi_request setup */
+	rq = scsi_req(req);
+	rq->retries = 1;
+
+	ufshpb_set_write_buf_cmd(rq->cmd, pre_req->wb.lpn, pre_req->wb.len,
+				 read_id);
+	rq->cmd_len = scsi_command_size(rq->cmd);
+
+	if (blk_insert_cloned_request(q, req) != BLK_STS_OK)
+		return -EAGAIN;
+
+	hpb->stats.pre_req_cnt++;
+
+	return 0;
+}
+
+static int ufshpb_issue_pre_req(struct ufshpb_lu *hpb, struct scsi_cmnd *cmd,
+				int *read_id)
+{
+	struct ufshpb_req *pre_req;
+	struct request *req = NULL;
+	unsigned long flags;
+	int _read_id;
+	int ret = 0;
+
+	req = blk_get_request(cmd->device->request_queue,
+			      REQ_OP_DRV_OUT | REQ_SYNC, BLK_MQ_REQ_NOWAIT);
+	if (IS_ERR(req))
+		return -EAGAIN;
+
+	spin_lock_irqsave(&hpb->rgn_state_lock, flags);
+	pre_req = ufshpb_get_pre_req(hpb);
+	if (!pre_req) {
+		ret = -EAGAIN;
+		goto unlock_out;
+	}
+	_read_id = ufshpb_get_read_id(hpb);
+	spin_unlock_irqrestore(&hpb->rgn_state_lock, flags);
+
+	pre_req->req = req;
+
+	ret = ufshpb_execute_pre_req(hpb, cmd, pre_req, _read_id);
+	if (ret)
+		goto free_pre_req;
+
+	*read_id = _read_id;
+
+	return ret;
+free_pre_req:
+	spin_lock_irqsave(&hpb->rgn_state_lock, flags);
+	ufshpb_put_pre_req(hpb, pre_req);
+unlock_out:
+	spin_unlock_irqrestore(&hpb->rgn_state_lock, flags);
+	blk_put_request(req);
+	return ret;
+}
+
+/*
+ * This function will set up HPB read command using host-side L2P map data.
+ */
+int ufshpb_prep(struct ufs_hba *hba, struct ufshcd_lrb *lrbp)
+{
+	struct ufshpb_lu *hpb;
+	struct ufshpb_region *rgn;
+	struct ufshpb_subregion *srgn;
+	struct scsi_cmnd *cmd = lrbp->cmd;
+	u32 lpn;
+	__be64 ppn;
+	unsigned long flags;
+	int transfer_len, rgn_idx, srgn_idx, srgn_offset;
+	int read_id = 0;
+	int err = 0;
+
+	hpb = ufshpb_get_hpb_data(cmd->device);
+	if (!hpb)
+		return -ENODEV;
+
+	if (ufshpb_get_state(hpb) == HPB_INIT)
+		return -ENODEV;
+
+	if (ufshpb_get_state(hpb) != HPB_PRESENT) {
+		dev_notice(&hpb->sdev_ufs_lu->sdev_dev,
+			   "%s: ufshpb state is not PRESENT", __func__);
+		return -ENODEV;
+	}
+
+	if (blk_rq_is_passthrough(scsi_cmd_to_rq(cmd)) ||
+	    (!ufshpb_is_write_or_discard(cmd) &&
+	     !ufshpb_is_read_cmd(cmd)))
+		return 0;
+
+	transfer_len = sectors_to_logical(cmd->device,
+					  blk_rq_sectors(scsi_cmd_to_rq(cmd)));
+	if (unlikely(!transfer_len))
+		return 0;
+
+	lpn = sectors_to_logical(cmd->device, blk_rq_pos(scsi_cmd_to_rq(cmd)));
+	ufshpb_get_pos_from_lpn(hpb, lpn, &rgn_idx, &srgn_idx, &srgn_offset);
+	rgn = hpb->rgn_tbl + rgn_idx;
+	srgn = rgn->srgn_tbl + srgn_idx;
+
+	/* If command type is WRITE or DISCARD, set bitmap as drity */
+	if (ufshpb_is_write_or_discard(cmd)) {
+		ufshpb_iterate_rgn(hpb, rgn_idx, srgn_idx, srgn_offset,
+				   transfer_len, true);
+		return 0;
+	}
+
+	if (!ufshpb_is_supported_chunk(hpb, transfer_len))
+		return 0;
+
+	WARN_ON_ONCE(transfer_len > HPB_MULTI_CHUNK_HIGH);
+
+	if (hpb->is_hcm) {
+		/*
+		 * in host control mode, reads are the main source for
+		 * activation trials.
+		 */
+		ufshpb_iterate_rgn(hpb, rgn_idx, srgn_idx, srgn_offset,
+				   transfer_len, false);
+
+		/* keep those counters normalized */
+		if (rgn->reads > hpb->entries_per_srgn)
+			schedule_work(&hpb->ufshpb_normalization_work);
+	}
+
+	spin_lock_irqsave(&hpb->rgn_state_lock, flags);
+	if (ufshpb_test_ppn_dirty(hpb, rgn_idx, srgn_idx, srgn_offset,
+				   transfer_len)) {
+		hpb->stats.miss_cnt++;
+		spin_unlock_irqrestore(&hpb->rgn_state_lock, flags);
+		return 0;
+	}
+
+	err = ufshpb_fill_ppn_from_page(hpb, srgn->mctx, srgn_offset, 1, &ppn);
+	spin_unlock_irqrestore(&hpb->rgn_state_lock, flags);
+	if (unlikely(err < 0)) {
+		/*
+		 * In this case, the region state is active,
+		 * but the ppn table is not allocated.
+		 * Make sure that ppn table must be allocated on
+		 * active state.
+		 */
+		dev_err(hba->dev, "get ppn failed. err %d\n", err);
+		return err;
+	}
+	if (!ufshpb_is_legacy(hba) &&
+	    ufshpb_is_required_wb(hpb, transfer_len)) {
+		err = ufshpb_issue_pre_req(hpb, cmd, &read_id);
+		if (err) {
+			unsigned long timeout;
+
+			timeout = cmd->jiffies_at_alloc + msecs_to_jiffies(
+				  hpb->params.requeue_timeout_ms);
+
+			if (time_before(jiffies, timeout))
+				return -EAGAIN;
+
+			hpb->stats.miss_cnt++;
+			return 0;
+		}
+	}
+
+	ufshpb_set_hpb_read_to_upiu(hba, hpb, lrbp, lpn, ppn, transfer_len,
+				    read_id);
+
+	hpb->stats.hit_cnt++;
+	return 0;
+}
+
+static struct ufshpb_req *ufshpb_get_req(struct ufshpb_lu *hpb,
+					 int rgn_idx, enum req_opf dir,
+					 bool atomic)
+{
+	struct ufshpb_req *rq;
+	struct request *req;
+	int retries = HPB_MAP_REQ_RETRIES;
+
+	rq = kmem_cache_alloc(hpb->map_req_cache, GFP_KERNEL);
+	if (!rq)
+		return NULL;
+
+retry:
+	req = blk_get_request(hpb->sdev_ufs_lu->request_queue, dir,
+			      BLK_MQ_REQ_NOWAIT);
+
+	if (!atomic && (PTR_ERR(req) == -EWOULDBLOCK) && (--retries > 0)) {
+		usleep_range(3000, 3100);
+		goto retry;
+	}
+
+	if (IS_ERR(req))
+		goto free_rq;
+
+	rq->hpb = hpb;
+	rq->req = req;
+	rq->rb.rgn_idx = rgn_idx;
+
+	return rq;
+
+free_rq:
+	kmem_cache_free(hpb->map_req_cache, rq);
+	return NULL;
+}
+
+static void ufshpb_put_req(struct ufshpb_lu *hpb, struct ufshpb_req *rq)
+{
+	blk_put_request(rq->req);
+	kmem_cache_free(hpb->map_req_cache, rq);
+}
+
+static struct ufshpb_req *ufshpb_get_map_req(struct ufshpb_lu *hpb,
+					     struct ufshpb_subregion *srgn)
+{
+	struct ufshpb_req *map_req;
+	struct bio *bio;
+	unsigned long flags;
+
+	if (hpb->is_hcm &&
+	    hpb->num_inflight_map_req >= hpb->params.inflight_map_req) {
+		dev_info(&hpb->sdev_ufs_lu->sdev_dev,
+			 "map_req throttle. inflight %d throttle %d",
+			 hpb->num_inflight_map_req,
+			 hpb->params.inflight_map_req);
+		return NULL;
+	}
+
+	map_req = ufshpb_get_req(hpb, srgn->rgn_idx, REQ_OP_DRV_IN, false);
+	if (!map_req)
+		return NULL;
+
+	bio = bio_alloc(GFP_KERNEL, hpb->pages_per_srgn);
+	if (!bio) {
+		ufshpb_put_req(hpb, map_req);
+		return NULL;
+	}
+
+	map_req->bio = bio;
+
+	map_req->rb.srgn_idx = srgn->srgn_idx;
+	map_req->rb.mctx = srgn->mctx;
+
+	spin_lock_irqsave(&hpb->param_lock, flags);
+	hpb->num_inflight_map_req++;
+	spin_unlock_irqrestore(&hpb->param_lock, flags);
+
+	return map_req;
+}
+
+static void ufshpb_put_map_req(struct ufshpb_lu *hpb,
+			       struct ufshpb_req *map_req)
+{
+	unsigned long flags;
+
+	bio_put(map_req->bio);
+	ufshpb_put_req(hpb, map_req);
+
+	spin_lock_irqsave(&hpb->param_lock, flags);
+	hpb->num_inflight_map_req--;
+	spin_unlock_irqrestore(&hpb->param_lock, flags);
+}
+
+static int ufshpb_clear_dirty_bitmap(struct ufshpb_lu *hpb,
+				     struct ufshpb_subregion *srgn)
+{
+	struct ufshpb_region *rgn;
+	u32 num_entries = hpb->entries_per_srgn;
+
+	if (!srgn->mctx) {
+		dev_err(&hpb->sdev_ufs_lu->sdev_dev,
+			"no mctx in region %d subregion %d.\n",
+			srgn->rgn_idx, srgn->srgn_idx);
+		return -1;
+	}
+
+	if (unlikely(srgn->is_last))
+		num_entries = hpb->last_srgn_entries;
+
+	bitmap_zero(srgn->mctx->ppn_dirty, num_entries);
+
+	rgn = hpb->rgn_tbl + srgn->rgn_idx;
+	clear_bit(RGN_FLAG_DIRTY, &rgn->rgn_flags);
+
+	return 0;
+}
+
+static void ufshpb_update_active_info(struct ufshpb_lu *hpb, int rgn_idx,
+				      int srgn_idx)
+{
+	struct ufshpb_region *rgn;
+	struct ufshpb_subregion *srgn;
+
+	rgn = hpb->rgn_tbl + rgn_idx;
+	srgn = rgn->srgn_tbl + srgn_idx;
+
+	list_del_init(&rgn->list_inact_rgn);
+
+	if (list_empty(&srgn->list_act_srgn))
+		list_add_tail(&srgn->list_act_srgn, &hpb->lh_act_srgn);
+
+	hpb->stats.rb_active_cnt++;
+}
+
+static void ufshpb_update_inactive_info(struct ufshpb_lu *hpb, int rgn_idx)
+{
+	struct ufshpb_region *rgn;
+	struct ufshpb_subregion *srgn;
+	int srgn_idx;
+
+	rgn = hpb->rgn_tbl + rgn_idx;
+
+	for_each_sub_region(rgn, srgn_idx, srgn)
+		list_del_init(&srgn->list_act_srgn);
+
+	if (list_empty(&rgn->list_inact_rgn))
+		list_add_tail(&rgn->list_inact_rgn, &hpb->lh_inact_rgn);
+
+	hpb->stats.rb_inactive_cnt++;
+}
+
+static void ufshpb_activate_subregion(struct ufshpb_lu *hpb,
+				      struct ufshpb_subregion *srgn)
+{
+	struct ufshpb_region *rgn;
+
+	/*
+	 * If there is no mctx in subregion
+	 * after I/O progress for HPB_READ_BUFFER, the region to which the
+	 * subregion belongs was evicted.
+	 * Make sure the region must not evict in I/O progress
+	 */
+	if (!srgn->mctx) {
+		dev_err(&hpb->sdev_ufs_lu->sdev_dev,
+			"no mctx in region %d subregion %d.\n",
+			srgn->rgn_idx, srgn->srgn_idx);
+		srgn->srgn_state = HPB_SRGN_INVALID;
+		return;
+	}
+
+	rgn = hpb->rgn_tbl + srgn->rgn_idx;
+
+	if (unlikely(rgn->rgn_state == HPB_RGN_INACTIVE)) {
+		dev_err(&hpb->sdev_ufs_lu->sdev_dev,
+			"region %d subregion %d evicted\n",
+			srgn->rgn_idx, srgn->srgn_idx);
+		srgn->srgn_state = HPB_SRGN_INVALID;
+		return;
+	}
+	srgn->srgn_state = HPB_SRGN_VALID;
+}
+
+static void ufshpb_umap_req_compl_fn(struct request *req, blk_status_t error)
+{
+	struct ufshpb_req *umap_req = (struct ufshpb_req *)req->end_io_data;
+
+	ufshpb_put_req(umap_req->hpb, umap_req);
+}
+
+static void ufshpb_map_req_compl_fn(struct request *req, blk_status_t error)
+{
+	struct ufshpb_req *map_req = (struct ufshpb_req *) req->end_io_data;
+	struct ufshpb_lu *hpb = map_req->hpb;
+	struct ufshpb_subregion *srgn;
+	unsigned long flags;
+
+	srgn = hpb->rgn_tbl[map_req->rb.rgn_idx].srgn_tbl +
+		map_req->rb.srgn_idx;
+
+	ufshpb_clear_dirty_bitmap(hpb, srgn);
+	spin_lock_irqsave(&hpb->rgn_state_lock, flags);
+	ufshpb_activate_subregion(hpb, srgn);
+	spin_unlock_irqrestore(&hpb->rgn_state_lock, flags);
+
+	ufshpb_put_map_req(map_req->hpb, map_req);
+}
+
+static void ufshpb_set_unmap_cmd(unsigned char *cdb, struct ufshpb_region *rgn)
+{
+	cdb[0] = UFSHPB_WRITE_BUFFER;
+	cdb[1] = rgn ? UFSHPB_WRITE_BUFFER_INACT_SINGLE_ID :
+			  UFSHPB_WRITE_BUFFER_INACT_ALL_ID;
+	if (rgn)
+		put_unaligned_be16(rgn->rgn_idx, &cdb[2]);
+	cdb[9] = 0x00;
+}
+
+static void ufshpb_set_read_buf_cmd(unsigned char *cdb, int rgn_idx,
+				    int srgn_idx, int srgn_mem_size)
+{
+	cdb[0] = UFSHPB_READ_BUFFER;
+	cdb[1] = UFSHPB_READ_BUFFER_ID;
+
+	put_unaligned_be16(rgn_idx, &cdb[2]);
+	put_unaligned_be16(srgn_idx, &cdb[4]);
+	put_unaligned_be24(srgn_mem_size, &cdb[6]);
+
+	cdb[9] = 0x00;
+}
+
+static void ufshpb_execute_umap_req(struct ufshpb_lu *hpb,
+				   struct ufshpb_req *umap_req,
+				   struct ufshpb_region *rgn)
+{
+	struct request *req;
+	struct scsi_request *rq;
+
+	req = umap_req->req;
+	req->timeout = 0;
+	req->end_io_data = (void *)umap_req;
+	rq = scsi_req(req);
+	ufshpb_set_unmap_cmd(rq->cmd, rgn);
+	rq->cmd_len = HPB_WRITE_BUFFER_CMD_LENGTH;
+
+	blk_execute_rq_nowait(NULL, req, 1, ufshpb_umap_req_compl_fn);
+
+	hpb->stats.umap_req_cnt++;
+}
+
+static int ufshpb_execute_map_req(struct ufshpb_lu *hpb,
+				  struct ufshpb_req *map_req, bool last)
+{
+	struct request_queue *q;
+	struct request *req;
+	struct scsi_request *rq;
+	int mem_size = hpb->srgn_mem_size;
+	int ret = 0;
+	int i;
+
+	q = hpb->sdev_ufs_lu->request_queue;
+	for (i = 0; i < hpb->pages_per_srgn; i++) {
+		ret = bio_add_pc_page(q, map_req->bio, map_req->rb.mctx->m_page[i],
+				      PAGE_SIZE, 0);
+		if (ret != PAGE_SIZE) {
+			dev_err(&hpb->sdev_ufs_lu->sdev_dev,
+				   "bio_add_pc_page fail %d - %d\n",
+				   map_req->rb.rgn_idx, map_req->rb.srgn_idx);
+			return ret;
+		}
+	}
+
+	req = map_req->req;
+
+	blk_rq_append_bio(req, map_req->bio);
+
+	req->end_io_data = map_req;
+
+	rq = scsi_req(req);
+
+	if (unlikely(last))
+		mem_size = hpb->last_srgn_entries * HPB_ENTRY_SIZE;
+
+	ufshpb_set_read_buf_cmd(rq->cmd, map_req->rb.rgn_idx,
+				map_req->rb.srgn_idx, mem_size);
+	rq->cmd_len = HPB_READ_BUFFER_CMD_LENGTH;
+
+	blk_execute_rq_nowait(NULL, req, 1, ufshpb_map_req_compl_fn);
+
+	hpb->stats.map_req_cnt++;
+	return 0;
+}
+
+static struct ufshpb_map_ctx *ufshpb_get_map_ctx(struct ufshpb_lu *hpb,
+						 bool last)
+{
+	struct ufshpb_map_ctx *mctx;
+	u32 num_entries = hpb->entries_per_srgn;
+	int i, j;
+
+	mctx = mempool_alloc(ufshpb_mctx_pool, GFP_KERNEL);
+	if (!mctx)
+		return NULL;
+
+	mctx->m_page = kmem_cache_alloc(hpb->m_page_cache, GFP_KERNEL);
+	if (!mctx->m_page)
+		goto release_mctx;
+
+	if (unlikely(last))
+		num_entries = hpb->last_srgn_entries;
+
+	mctx->ppn_dirty = bitmap_zalloc(num_entries, GFP_KERNEL);
+	if (!mctx->ppn_dirty)
+		goto release_m_page;
+
+	for (i = 0; i < hpb->pages_per_srgn; i++) {
+		mctx->m_page[i] = mempool_alloc(ufshpb_page_pool, GFP_KERNEL);
+		if (!mctx->m_page[i]) {
+			for (j = 0; j < i; j++)
+				mempool_free(mctx->m_page[j], ufshpb_page_pool);
+			goto release_ppn_dirty;
+		}
+		clear_page(page_address(mctx->m_page[i]));
+	}
+
+	return mctx;
+
+release_ppn_dirty:
+	bitmap_free(mctx->ppn_dirty);
+release_m_page:
+	kmem_cache_free(hpb->m_page_cache, mctx->m_page);
+release_mctx:
+	mempool_free(mctx, ufshpb_mctx_pool);
+	return NULL;
+}
+
+static void ufshpb_put_map_ctx(struct ufshpb_lu *hpb,
+			       struct ufshpb_map_ctx *mctx)
+{
+	int i;
+
+	for (i = 0; i < hpb->pages_per_srgn; i++)
+		mempool_free(mctx->m_page[i], ufshpb_page_pool);
+
+	bitmap_free(mctx->ppn_dirty);
+	kmem_cache_free(hpb->m_page_cache, mctx->m_page);
+	mempool_free(mctx, ufshpb_mctx_pool);
+}
+
+static int ufshpb_check_srgns_issue_state(struct ufshpb_lu *hpb,
+					  struct ufshpb_region *rgn)
+{
+	struct ufshpb_subregion *srgn;
+	int srgn_idx;
+
+	for_each_sub_region(rgn, srgn_idx, srgn)
+		if (srgn->srgn_state == HPB_SRGN_ISSUED)
+			return -EPERM;
+
+	return 0;
+}
+
+static void ufshpb_read_to_handler(struct work_struct *work)
+{
+	struct ufshpb_lu *hpb = container_of(work, struct ufshpb_lu,
+					     ufshpb_read_to_work.work);
+	struct victim_select_info *lru_info = &hpb->lru_info;
+	struct ufshpb_region *rgn, *next_rgn;
+	unsigned long flags;
+	unsigned int poll;
+	LIST_HEAD(expired_list);
+
+	if (test_and_set_bit(TIMEOUT_WORK_RUNNING, &hpb->work_data_bits))
+		return;
+
+	spin_lock_irqsave(&hpb->rgn_state_lock, flags);
+
+	list_for_each_entry_safe(rgn, next_rgn, &lru_info->lh_lru_rgn,
+				 list_lru_rgn) {
+		bool timedout = ktime_after(ktime_get(), rgn->read_timeout);
+
+		if (timedout) {
+			rgn->read_timeout_expiries--;
+			if (is_rgn_dirty(rgn) ||
+			    rgn->read_timeout_expiries == 0)
+				list_add(&rgn->list_expired_rgn, &expired_list);
+			else
+				rgn->read_timeout = ktime_add_ms(ktime_get(),
+						hpb->params.read_timeout_ms);
+		}
+	}
+
+	spin_unlock_irqrestore(&hpb->rgn_state_lock, flags);
+
+	list_for_each_entry_safe(rgn, next_rgn, &expired_list,
+				 list_expired_rgn) {
+		list_del_init(&rgn->list_expired_rgn);
+		spin_lock_irqsave(&hpb->rsp_list_lock, flags);
+		ufshpb_update_inactive_info(hpb, rgn->rgn_idx);
+		spin_unlock_irqrestore(&hpb->rsp_list_lock, flags);
+	}
+
+	ufshpb_kick_map_work(hpb);
+
+	clear_bit(TIMEOUT_WORK_RUNNING, &hpb->work_data_bits);
+
+	poll = hpb->params.timeout_polling_interval_ms;
+	schedule_delayed_work(&hpb->ufshpb_read_to_work,
+			      msecs_to_jiffies(poll));
+}
+
+static void ufshpb_add_lru_info(struct victim_select_info *lru_info,
+				struct ufshpb_region *rgn)
+{
+	rgn->rgn_state = HPB_RGN_ACTIVE;
+	list_add_tail(&rgn->list_lru_rgn, &lru_info->lh_lru_rgn);
+	atomic_inc(&lru_info->active_cnt);
+	if (rgn->hpb->is_hcm) {
+		rgn->read_timeout =
+			ktime_add_ms(ktime_get(),
+				     rgn->hpb->params.read_timeout_ms);
+		rgn->read_timeout_expiries =
+			rgn->hpb->params.read_timeout_expiries;
+	}
+}
+
+static void ufshpb_hit_lru_info(struct victim_select_info *lru_info,
+				struct ufshpb_region *rgn)
+{
+	list_move_tail(&rgn->list_lru_rgn, &lru_info->lh_lru_rgn);
+}
+
+static struct ufshpb_region *ufshpb_victim_lru_info(struct ufshpb_lu *hpb)
+{
+	struct victim_select_info *lru_info = &hpb->lru_info;
+	struct ufshpb_region *rgn, *victim_rgn = NULL;
+
+	list_for_each_entry(rgn, &lru_info->lh_lru_rgn, list_lru_rgn) {
+		if (!rgn) {
+			dev_err(&hpb->sdev_ufs_lu->sdev_dev,
+				"%s: no region allocated\n",
+				__func__);
+			return NULL;
+		}
+		if (ufshpb_check_srgns_issue_state(hpb, rgn))
+			continue;
+
+		/*
+		 * in host control mode, verify that the exiting region
+		 * has fewer reads
+		 */
+		if (hpb->is_hcm &&
+		    rgn->reads > hpb->params.eviction_thld_exit)
+			continue;
+
+		victim_rgn = rgn;
+		break;
+	}
+
+	return victim_rgn;
+}
+
+static void ufshpb_cleanup_lru_info(struct victim_select_info *lru_info,
+				    struct ufshpb_region *rgn)
+{
+	list_del_init(&rgn->list_lru_rgn);
+	rgn->rgn_state = HPB_RGN_INACTIVE;
+	atomic_dec(&lru_info->active_cnt);
+}
+
+static void ufshpb_purge_active_subregion(struct ufshpb_lu *hpb,
+					  struct ufshpb_subregion *srgn)
+{
+	if (srgn->srgn_state != HPB_SRGN_UNUSED) {
+		ufshpb_put_map_ctx(hpb, srgn->mctx);
+		srgn->srgn_state = HPB_SRGN_UNUSED;
+		srgn->mctx = NULL;
+	}
+}
+
+static int ufshpb_issue_umap_req(struct ufshpb_lu *hpb,
+				 struct ufshpb_region *rgn,
+				 bool atomic)
+{
+	struct ufshpb_req *umap_req;
+	int rgn_idx = rgn ? rgn->rgn_idx : 0;
+
+	umap_req = ufshpb_get_req(hpb, rgn_idx, REQ_OP_DRV_OUT, atomic);
+	if (!umap_req)
+		return -ENOMEM;
+
+	ufshpb_execute_umap_req(hpb, umap_req, rgn);
+
+	return 0;
+}
+
+static int ufshpb_issue_umap_single_req(struct ufshpb_lu *hpb,
+					struct ufshpb_region *rgn)
+{
+	return ufshpb_issue_umap_req(hpb, rgn, true);
+}
+
+static int ufshpb_issue_umap_all_req(struct ufshpb_lu *hpb)
+{
+	return ufshpb_issue_umap_req(hpb, NULL, false);
+}
+
+static void __ufshpb_evict_region(struct ufshpb_lu *hpb,
+				 struct ufshpb_region *rgn)
+{
+	struct victim_select_info *lru_info;
+	struct ufshpb_subregion *srgn;
+	int srgn_idx;
+
+	lru_info = &hpb->lru_info;
+
+	dev_dbg(&hpb->sdev_ufs_lu->sdev_dev, "evict region %d\n", rgn->rgn_idx);
+
+	ufshpb_cleanup_lru_info(lru_info, rgn);
+
+	for_each_sub_region(rgn, srgn_idx, srgn)
+		ufshpb_purge_active_subregion(hpb, srgn);
+}
+
+static int ufshpb_evict_region(struct ufshpb_lu *hpb, struct ufshpb_region *rgn)
+{
+	unsigned long flags;
+	int ret = 0;
+
+	spin_lock_irqsave(&hpb->rgn_state_lock, flags);
+	if (rgn->rgn_state == HPB_RGN_PINNED) {
+		dev_warn(&hpb->sdev_ufs_lu->sdev_dev,
+			 "pinned region cannot drop-out. region %d\n",
+			 rgn->rgn_idx);
+		goto out;
+	}
+
+	if (!list_empty(&rgn->list_lru_rgn)) {
+		if (ufshpb_check_srgns_issue_state(hpb, rgn)) {
+			ret = -EBUSY;
+			goto out;
+		}
+
+		if (hpb->is_hcm) {
+			spin_unlock_irqrestore(&hpb->rgn_state_lock, flags);
+			ret = ufshpb_issue_umap_single_req(hpb, rgn);
+			spin_lock_irqsave(&hpb->rgn_state_lock, flags);
+			if (ret)
+				goto out;
+		}
+
+		__ufshpb_evict_region(hpb, rgn);
+	}
+out:
+	spin_unlock_irqrestore(&hpb->rgn_state_lock, flags);
+	return ret;
+}
+
+static int ufshpb_issue_map_req(struct ufshpb_lu *hpb,
+				struct ufshpb_region *rgn,
+				struct ufshpb_subregion *srgn)
+{
+	struct ufshpb_req *map_req;
+	unsigned long flags;
+	int ret;
+	int err = -EAGAIN;
+	bool alloc_required = false;
+	enum HPB_SRGN_STATE state = HPB_SRGN_INVALID;
+
+	spin_lock_irqsave(&hpb->rgn_state_lock, flags);
+
+	if (ufshpb_get_state(hpb) != HPB_PRESENT) {
+		dev_notice(&hpb->sdev_ufs_lu->sdev_dev,
+			   "%s: ufshpb state is not PRESENT\n", __func__);
+		goto unlock_out;
+	}
+
+	if ((rgn->rgn_state == HPB_RGN_INACTIVE) &&
+	    (srgn->srgn_state == HPB_SRGN_INVALID)) {
+		err = 0;
+		goto unlock_out;
+	}
+
+	if (srgn->srgn_state == HPB_SRGN_UNUSED)
+		alloc_required = true;
+
+	/*
+	 * If the subregion is already ISSUED state,
+	 * a specific event (e.g., GC or wear-leveling, etc.) occurs in
+	 * the device and HPB response for map loading is received.
+	 * In this case, after finishing the HPB_READ_BUFFER,
+	 * the next HPB_READ_BUFFER is performed again to obtain the latest
+	 * map data.
+	 */
+	if (srgn->srgn_state == HPB_SRGN_ISSUED)
+		goto unlock_out;
+
+	srgn->srgn_state = HPB_SRGN_ISSUED;
+	spin_unlock_irqrestore(&hpb->rgn_state_lock, flags);
+
+	if (alloc_required) {
+		srgn->mctx = ufshpb_get_map_ctx(hpb, srgn->is_last);
+		if (!srgn->mctx) {
+			dev_err(&hpb->sdev_ufs_lu->sdev_dev,
+			    "get map_ctx failed. region %d - %d\n",
+			    rgn->rgn_idx, srgn->srgn_idx);
+			state = HPB_SRGN_UNUSED;
+			goto change_srgn_state;
+		}
+	}
+
+	map_req = ufshpb_get_map_req(hpb, srgn);
+	if (!map_req)
+		goto change_srgn_state;
+
+
+	ret = ufshpb_execute_map_req(hpb, map_req, srgn->is_last);
+	if (ret) {
+		dev_err(&hpb->sdev_ufs_lu->sdev_dev,
+			   "%s: issue map_req failed: %d, region %d - %d\n",
+			   __func__, ret, srgn->rgn_idx, srgn->srgn_idx);
+		goto free_map_req;
+	}
+	return 0;
+
+free_map_req:
+	ufshpb_put_map_req(hpb, map_req);
+change_srgn_state:
+	spin_lock_irqsave(&hpb->rgn_state_lock, flags);
+	srgn->srgn_state = state;
+unlock_out:
+	spin_unlock_irqrestore(&hpb->rgn_state_lock, flags);
+	return err;
+}
+
+static int ufshpb_add_region(struct ufshpb_lu *hpb, struct ufshpb_region *rgn)
+{
+	struct ufshpb_region *victim_rgn = NULL;
+	struct victim_select_info *lru_info = &hpb->lru_info;
+	unsigned long flags;
+	int ret = 0;
+
+	spin_lock_irqsave(&hpb->rgn_state_lock, flags);
+	/*
+	 * If region belongs to lru_list, just move the region
+	 * to the front of lru list because the state of the region
+	 * is already active-state.
+	 */
+	if (!list_empty(&rgn->list_lru_rgn)) {
+		ufshpb_hit_lru_info(lru_info, rgn);
+		goto out;
+	}
+
+	if (rgn->rgn_state == HPB_RGN_INACTIVE) {
+		if (atomic_read(&lru_info->active_cnt) ==
+		    lru_info->max_lru_active_cnt) {
+			/*
+			 * If the maximum number of active regions
+			 * is exceeded, evict the least recently used region.
+			 * This case may occur when the device responds
+			 * to the eviction information late.
+			 * It is okay to evict the least recently used region,
+			 * because the device could detect this region
+			 * by not issuing HPB_READ
+			 *
+			 * in host control mode, verify that the entering
+			 * region has enough reads
+			 */
+			if (hpb->is_hcm &&
+			    rgn->reads < hpb->params.eviction_thld_enter) {
+				ret = -EACCES;
+				goto out;
+			}
+
+			victim_rgn = ufshpb_victim_lru_info(hpb);
+			if (!victim_rgn) {
+				dev_warn(&hpb->sdev_ufs_lu->sdev_dev,
+				    "cannot get victim region %s\n",
+				    hpb->is_hcm ? "" : "error");
+				ret = -ENOMEM;
+				goto out;
+			}
+
+			dev_dbg(&hpb->sdev_ufs_lu->sdev_dev,
+				"LRU full (%d), choose victim %d\n",
+				atomic_read(&lru_info->active_cnt),
+				victim_rgn->rgn_idx);
+
+			if (hpb->is_hcm) {
+				spin_unlock_irqrestore(&hpb->rgn_state_lock,
+						       flags);
+				ret = ufshpb_issue_umap_single_req(hpb,
+								victim_rgn);
+				spin_lock_irqsave(&hpb->rgn_state_lock,
+						  flags);
+				if (ret)
+					goto out;
+			}
+
+			__ufshpb_evict_region(hpb, victim_rgn);
+		}
+
+		/*
+		 * When a region is added to lru_info list_head,
+		 * it is guaranteed that the subregion has been
+		 * assigned all mctx. If failed, try to receive mctx again
+		 * without being added to lru_info list_head
+		 */
+		ufshpb_add_lru_info(lru_info, rgn);
+	}
+out:
+	spin_unlock_irqrestore(&hpb->rgn_state_lock, flags);
+	return ret;
+}
+
+static void ufshpb_rsp_req_region_update(struct ufshpb_lu *hpb,
+					 struct utp_hpb_rsp *rsp_field)
+{
+	struct ufshpb_region *rgn;
+	struct ufshpb_subregion *srgn;
+	int i, rgn_i, srgn_i;
+
+	BUILD_BUG_ON(sizeof(struct ufshpb_active_field) != HPB_ACT_FIELD_SIZE);
+	/*
+	 * If the active region and the inactive region are the same,
+	 * we will inactivate this region.
+	 * The device could check this (region inactivated) and
+	 * will response the proper active region information
+	 */
+	for (i = 0; i < rsp_field->active_rgn_cnt; i++) {
+		rgn_i =
+			be16_to_cpu(rsp_field->hpb_active_field[i].active_rgn);
+		srgn_i =
+			be16_to_cpu(rsp_field->hpb_active_field[i].active_srgn);
+
+		rgn = hpb->rgn_tbl + rgn_i;
+		if (hpb->is_hcm &&
+		    (rgn->rgn_state != HPB_RGN_ACTIVE || is_rgn_dirty(rgn))) {
+			/*
+			 * in host control mode, subregion activation
+			 * recommendations are only allowed to active regions.
+			 * Also, ignore recommendations for dirty regions - the
+			 * host will make decisions concerning those by himself
+			 */
+			continue;
+		}
+
+		dev_dbg(&hpb->sdev_ufs_lu->sdev_dev,
+			"activate(%d) region %d - %d\n", i, rgn_i, srgn_i);
+
+		spin_lock(&hpb->rsp_list_lock);
+		ufshpb_update_active_info(hpb, rgn_i, srgn_i);
+		spin_unlock(&hpb->rsp_list_lock);
+
+		srgn = rgn->srgn_tbl + srgn_i;
+
+		/* blocking HPB_READ */
+		spin_lock(&hpb->rgn_state_lock);
+		if (srgn->srgn_state == HPB_SRGN_VALID)
+			srgn->srgn_state = HPB_SRGN_INVALID;
+		spin_unlock(&hpb->rgn_state_lock);
+	}
+
+	if (hpb->is_hcm) {
+		/*
+		 * in host control mode the device is not allowed to inactivate
+		 * regions
+		 */
+		goto out;
+	}
+
+	for (i = 0; i < rsp_field->inactive_rgn_cnt; i++) {
+		rgn_i = be16_to_cpu(rsp_field->hpb_inactive_field[i]);
+		dev_dbg(&hpb->sdev_ufs_lu->sdev_dev,
+			"inactivate(%d) region %d\n", i, rgn_i);
+
+		spin_lock(&hpb->rsp_list_lock);
+		ufshpb_update_inactive_info(hpb, rgn_i);
+		spin_unlock(&hpb->rsp_list_lock);
+
+		rgn = hpb->rgn_tbl + rgn_i;
+
+		spin_lock(&hpb->rgn_state_lock);
+		if (rgn->rgn_state != HPB_RGN_INACTIVE) {
+			for (srgn_i = 0; srgn_i < rgn->srgn_cnt; srgn_i++) {
+				srgn = rgn->srgn_tbl + srgn_i;
+				if (srgn->srgn_state == HPB_SRGN_VALID)
+					srgn->srgn_state = HPB_SRGN_INVALID;
+			}
+		}
+		spin_unlock(&hpb->rgn_state_lock);
+
+	}
+
+out:
+	dev_dbg(&hpb->sdev_ufs_lu->sdev_dev, "Noti: #ACT %u #INACT %u\n",
+		rsp_field->active_rgn_cnt, rsp_field->inactive_rgn_cnt);
+
+	if (ufshpb_get_state(hpb) == HPB_PRESENT)
+		queue_work(ufshpb_wq, &hpb->map_work);
+}
+
+static void ufshpb_dev_reset_handler(struct ufshpb_lu *hpb)
+{
+	struct victim_select_info *lru_info = &hpb->lru_info;
+	struct ufshpb_region *rgn;
+	unsigned long flags;
+
+	spin_lock_irqsave(&hpb->rgn_state_lock, flags);
+
+	list_for_each_entry(rgn, &lru_info->lh_lru_rgn, list_lru_rgn)
+		set_bit(RGN_FLAG_UPDATE, &rgn->rgn_flags);
+
+	spin_unlock_irqrestore(&hpb->rgn_state_lock, flags);
+}
+
+/*
+ * This function will parse recommended active subregion information in sense
+ * data field of response UPIU with SAM_STAT_GOOD state.
+ */
+void ufshpb_rsp_upiu(struct ufs_hba *hba, struct ufshcd_lrb *lrbp)
+{
+	struct ufshpb_lu *hpb = ufshpb_get_hpb_data(lrbp->cmd->device);
+	struct utp_hpb_rsp *rsp_field = &lrbp->ucd_rsp_ptr->hr;
+	int data_seg_len;
+
+	if (unlikely(lrbp->lun != rsp_field->lun)) {
+		struct scsi_device *sdev;
+		bool found = false;
+
+		__shost_for_each_device(sdev, hba->host) {
+			hpb = ufshpb_get_hpb_data(sdev);
+
+			if (!hpb)
+				continue;
+
+			if (rsp_field->lun == hpb->lun) {
+				found = true;
+				break;
+			}
+		}
+
+		if (!found)
+			return;
+	}
+
+	if (!hpb)
+		return;
+
+	if (ufshpb_get_state(hpb) == HPB_INIT)
+		return;
+
+	if ((ufshpb_get_state(hpb) != HPB_PRESENT) &&
+	    (ufshpb_get_state(hpb) != HPB_SUSPEND)) {
+		dev_notice(&hpb->sdev_ufs_lu->sdev_dev,
+			   "%s: ufshpb state is not PRESENT/SUSPEND\n",
+			   __func__);
+		return;
+	}
+
+	data_seg_len = be32_to_cpu(lrbp->ucd_rsp_ptr->header.dword_2)
+		& MASK_RSP_UPIU_DATA_SEG_LEN;
+
+	/* To flush remained rsp_list, we queue the map_work task */
+	if (!data_seg_len) {
+		if (!ufshpb_is_general_lun(hpb->lun))
+			return;
+
+		ufshpb_kick_map_work(hpb);
+		return;
+	}
+
+	BUILD_BUG_ON(sizeof(struct utp_hpb_rsp) != UTP_HPB_RSP_SIZE);
+
+	if (!ufshpb_is_hpb_rsp_valid(hba, lrbp, rsp_field))
+		return;
+
+	hpb->stats.rb_noti_cnt++;
+
+	switch (rsp_field->hpb_op) {
+	case HPB_RSP_REQ_REGION_UPDATE:
+		if (data_seg_len != DEV_DATA_SEG_LEN)
+			dev_warn(&hpb->sdev_ufs_lu->sdev_dev,
+				 "%s: data seg length is not same.\n",
+				 __func__);
+		ufshpb_rsp_req_region_update(hpb, rsp_field);
+		break;
+	case HPB_RSP_DEV_RESET:
+		dev_warn(&hpb->sdev_ufs_lu->sdev_dev,
+			 "UFS device lost HPB information during PM.\n");
+
+		if (hpb->is_hcm) {
+			struct scsi_device *sdev;
+
+			__shost_for_each_device(sdev, hba->host) {
+				struct ufshpb_lu *h = sdev->hostdata;
+
+				if (h)
+					ufshpb_dev_reset_handler(h);
+			}
+		}
+
+		break;
+	default:
+		dev_notice(&hpb->sdev_ufs_lu->sdev_dev,
+			   "hpb_op is not available: %d\n",
+			   rsp_field->hpb_op);
+		break;
+	}
+}
+
+static void ufshpb_add_active_list(struct ufshpb_lu *hpb,
+				   struct ufshpb_region *rgn,
+				   struct ufshpb_subregion *srgn)
+{
+	if (!list_empty(&rgn->list_inact_rgn))
+		return;
+
+	if (!list_empty(&srgn->list_act_srgn)) {
+		list_move(&srgn->list_act_srgn, &hpb->lh_act_srgn);
+		return;
+	}
+
+	list_add(&srgn->list_act_srgn, &hpb->lh_act_srgn);
+}
+
+static void ufshpb_add_pending_evict_list(struct ufshpb_lu *hpb,
+					  struct ufshpb_region *rgn,
+					  struct list_head *pending_list)
+{
+	struct ufshpb_subregion *srgn;
+	int srgn_idx;
+
+	if (!list_empty(&rgn->list_inact_rgn))
+		return;
+
+	for_each_sub_region(rgn, srgn_idx, srgn)
+		if (!list_empty(&srgn->list_act_srgn))
+			return;
+
+	list_add_tail(&rgn->list_inact_rgn, pending_list);
+}
+
+static void ufshpb_run_active_subregion_list(struct ufshpb_lu *hpb)
+{
+	struct ufshpb_region *rgn;
+	struct ufshpb_subregion *srgn;
+	unsigned long flags;
+	int ret = 0;
+
+	spin_lock_irqsave(&hpb->rsp_list_lock, flags);
+	while ((srgn = list_first_entry_or_null(&hpb->lh_act_srgn,
+						struct ufshpb_subregion,
+						list_act_srgn))) {
+		if (ufshpb_get_state(hpb) == HPB_SUSPEND)
+			break;
+
+		list_del_init(&srgn->list_act_srgn);
+		spin_unlock_irqrestore(&hpb->rsp_list_lock, flags);
+
+		rgn = hpb->rgn_tbl + srgn->rgn_idx;
+		ret = ufshpb_add_region(hpb, rgn);
+		if (ret)
+			goto active_failed;
+
+		ret = ufshpb_issue_map_req(hpb, rgn, srgn);
+		if (ret) {
+			dev_err(&hpb->sdev_ufs_lu->sdev_dev,
+			    "issue map_req failed. ret %d, region %d - %d\n",
+			    ret, rgn->rgn_idx, srgn->srgn_idx);
+			goto active_failed;
+		}
+		spin_lock_irqsave(&hpb->rsp_list_lock, flags);
+	}
+	spin_unlock_irqrestore(&hpb->rsp_list_lock, flags);
+	return;
+
+active_failed:
+	dev_err(&hpb->sdev_ufs_lu->sdev_dev, "failed to activate region %d - %d, will retry\n",
+		   rgn->rgn_idx, srgn->srgn_idx);
+	spin_lock_irqsave(&hpb->rsp_list_lock, flags);
+	ufshpb_add_active_list(hpb, rgn, srgn);
+	spin_unlock_irqrestore(&hpb->rsp_list_lock, flags);
+}
+
+static void ufshpb_run_inactive_region_list(struct ufshpb_lu *hpb)
+{
+	struct ufshpb_region *rgn;
+	unsigned long flags;
+	int ret;
+	LIST_HEAD(pending_list);
+
+	spin_lock_irqsave(&hpb->rsp_list_lock, flags);
+	while ((rgn = list_first_entry_or_null(&hpb->lh_inact_rgn,
+					       struct ufshpb_region,
+					       list_inact_rgn))) {
+		if (ufshpb_get_state(hpb) == HPB_SUSPEND)
+			break;
+
+		list_del_init(&rgn->list_inact_rgn);
+		spin_unlock_irqrestore(&hpb->rsp_list_lock, flags);
+
+		ret = ufshpb_evict_region(hpb, rgn);
+		if (ret) {
+			spin_lock_irqsave(&hpb->rsp_list_lock, flags);
+			ufshpb_add_pending_evict_list(hpb, rgn, &pending_list);
+			spin_unlock_irqrestore(&hpb->rsp_list_lock, flags);
+		}
+
+		spin_lock_irqsave(&hpb->rsp_list_lock, flags);
+	}
+
+	list_splice(&pending_list, &hpb->lh_inact_rgn);
+	spin_unlock_irqrestore(&hpb->rsp_list_lock, flags);
+}
+
+static void ufshpb_normalization_work_handler(struct work_struct *work)
+{
+	struct ufshpb_lu *hpb = container_of(work, struct ufshpb_lu,
+					     ufshpb_normalization_work);
+	int rgn_idx;
+	u8 factor = hpb->params.normalization_factor;
+
+	for (rgn_idx = 0; rgn_idx < hpb->rgns_per_lu; rgn_idx++) {
+		struct ufshpb_region *rgn = hpb->rgn_tbl + rgn_idx;
+		int srgn_idx;
+
+		spin_lock(&rgn->rgn_lock);
+		rgn->reads = 0;
+		for (srgn_idx = 0; srgn_idx < hpb->srgns_per_rgn; srgn_idx++) {
+			struct ufshpb_subregion *srgn = rgn->srgn_tbl + srgn_idx;
+
+			srgn->reads >>= factor;
+			rgn->reads += srgn->reads;
+		}
+		spin_unlock(&rgn->rgn_lock);
+
+		if (rgn->rgn_state != HPB_RGN_ACTIVE || rgn->reads)
+			continue;
+
+		/* if region is active but has no reads - inactivate it */
+		spin_lock(&hpb->rsp_list_lock);
+		ufshpb_update_inactive_info(hpb, rgn->rgn_idx);
+		spin_unlock(&hpb->rsp_list_lock);
+	}
+}
+
+static void ufshpb_map_work_handler(struct work_struct *work)
+{
+	struct ufshpb_lu *hpb = container_of(work, struct ufshpb_lu, map_work);
+
+	if (ufshpb_get_state(hpb) != HPB_PRESENT) {
+		dev_notice(&hpb->sdev_ufs_lu->sdev_dev,
+			   "%s: ufshpb state is not PRESENT\n", __func__);
+		return;
+	}
+
+	ufshpb_run_inactive_region_list(hpb);
+	ufshpb_run_active_subregion_list(hpb);
+}
+
+/*
+ * this function doesn't need to hold lock due to be called in init.
+ * (rgn_state_lock, rsp_list_lock, etc..)
+ */
+static int ufshpb_init_pinned_active_region(struct ufs_hba *hba,
+					    struct ufshpb_lu *hpb,
+					    struct ufshpb_region *rgn)
+{
+	struct ufshpb_subregion *srgn;
+	int srgn_idx, i;
+	int err = 0;
+
+	for_each_sub_region(rgn, srgn_idx, srgn) {
+		srgn->mctx = ufshpb_get_map_ctx(hpb, srgn->is_last);
+		srgn->srgn_state = HPB_SRGN_INVALID;
+		if (!srgn->mctx) {
+			err = -ENOMEM;
+			dev_err(hba->dev,
+				"alloc mctx for pinned region failed\n");
+			goto release;
+		}
+
+		list_add_tail(&srgn->list_act_srgn, &hpb->lh_act_srgn);
+	}
+
+	rgn->rgn_state = HPB_RGN_PINNED;
+	return 0;
+
+release:
+	for (i = 0; i < srgn_idx; i++) {
+		srgn = rgn->srgn_tbl + i;
+		ufshpb_put_map_ctx(hpb, srgn->mctx);
+	}
+	return err;
+}
+
+static void ufshpb_init_subregion_tbl(struct ufshpb_lu *hpb,
+				      struct ufshpb_region *rgn, bool last)
+{
+	int srgn_idx;
+	struct ufshpb_subregion *srgn;
+
+	for_each_sub_region(rgn, srgn_idx, srgn) {
+		INIT_LIST_HEAD(&srgn->list_act_srgn);
+
+		srgn->rgn_idx = rgn->rgn_idx;
+		srgn->srgn_idx = srgn_idx;
+		srgn->srgn_state = HPB_SRGN_UNUSED;
+	}
+
+	if (unlikely(last && hpb->last_srgn_entries))
+		srgn->is_last = true;
+}
+
+static int ufshpb_alloc_subregion_tbl(struct ufshpb_lu *hpb,
+				      struct ufshpb_region *rgn, int srgn_cnt)
+{
+	rgn->srgn_tbl = kvcalloc(srgn_cnt, sizeof(struct ufshpb_subregion),
+				 GFP_KERNEL);
+	if (!rgn->srgn_tbl)
+		return -ENOMEM;
+
+	rgn->srgn_cnt = srgn_cnt;
+	return 0;
+}
+
+static void ufshpb_lu_parameter_init(struct ufs_hba *hba,
+				     struct ufshpb_lu *hpb,
+				     struct ufshpb_dev_info *hpb_dev_info,
+				     struct ufshpb_lu_info *hpb_lu_info)
+{
+	u32 entries_per_rgn;
+	u64 rgn_mem_size, tmp;
+
+	/* for pre_req */
+	hpb->pre_req_min_tr_len = hpb_dev_info->max_hpb_single_cmd + 1;
+
+	if (ufshpb_is_legacy(hba))
+		hpb->pre_req_max_tr_len = HPB_LEGACY_CHUNK_HIGH;
+	else
+		hpb->pre_req_max_tr_len = HPB_MULTI_CHUNK_HIGH;
+
+	hpb->cur_read_id = 0;
+
+	hpb->lu_pinned_start = hpb_lu_info->pinned_start;
+	hpb->lu_pinned_end = hpb_lu_info->num_pinned ?
+		(hpb_lu_info->pinned_start + hpb_lu_info->num_pinned - 1)
+		: PINNED_NOT_SET;
+	hpb->lru_info.max_lru_active_cnt =
+		hpb_lu_info->max_active_rgns - hpb_lu_info->num_pinned;
+
+	rgn_mem_size = (1ULL << hpb_dev_info->rgn_size) * HPB_RGN_SIZE_UNIT
+			* HPB_ENTRY_SIZE;
+	do_div(rgn_mem_size, HPB_ENTRY_BLOCK_SIZE);
+	hpb->srgn_mem_size = (1ULL << hpb_dev_info->srgn_size)
+		* HPB_RGN_SIZE_UNIT / HPB_ENTRY_BLOCK_SIZE * HPB_ENTRY_SIZE;
+
+	tmp = rgn_mem_size;
+	do_div(tmp, HPB_ENTRY_SIZE);
+	entries_per_rgn = (u32)tmp;
+	hpb->entries_per_rgn_shift = ilog2(entries_per_rgn);
+	hpb->entries_per_rgn_mask = entries_per_rgn - 1;
+
+	hpb->entries_per_srgn = hpb->srgn_mem_size / HPB_ENTRY_SIZE;
+	hpb->entries_per_srgn_shift = ilog2(hpb->entries_per_srgn);
+	hpb->entries_per_srgn_mask = hpb->entries_per_srgn - 1;
+
+	tmp = rgn_mem_size;
+	do_div(tmp, hpb->srgn_mem_size);
+	hpb->srgns_per_rgn = (int)tmp;
+
+	hpb->rgns_per_lu = DIV_ROUND_UP(hpb_lu_info->num_blocks,
+				entries_per_rgn);
+	hpb->srgns_per_lu = DIV_ROUND_UP(hpb_lu_info->num_blocks,
+				(hpb->srgn_mem_size / HPB_ENTRY_SIZE));
+	hpb->last_srgn_entries = hpb_lu_info->num_blocks
+				 % (hpb->srgn_mem_size / HPB_ENTRY_SIZE);
+
+	hpb->pages_per_srgn = DIV_ROUND_UP(hpb->srgn_mem_size, PAGE_SIZE);
+
+	if (hpb_dev_info->control_mode == HPB_HOST_CONTROL)
+		hpb->is_hcm = true;
+}
+
+static int ufshpb_alloc_region_tbl(struct ufs_hba *hba, struct ufshpb_lu *hpb)
+{
+	struct ufshpb_region *rgn_table, *rgn;
+	int rgn_idx, i;
+	int ret = 0;
+
+	rgn_table = kvcalloc(hpb->rgns_per_lu, sizeof(struct ufshpb_region),
+			    GFP_KERNEL);
+	if (!rgn_table)
+		return -ENOMEM;
+
+	for (rgn_idx = 0; rgn_idx < hpb->rgns_per_lu; rgn_idx++) {
+		int srgn_cnt = hpb->srgns_per_rgn;
+		bool last_srgn = false;
+
+		rgn = rgn_table + rgn_idx;
+		rgn->rgn_idx = rgn_idx;
+
+		spin_lock_init(&rgn->rgn_lock);
+
+		INIT_LIST_HEAD(&rgn->list_inact_rgn);
+		INIT_LIST_HEAD(&rgn->list_lru_rgn);
+		INIT_LIST_HEAD(&rgn->list_expired_rgn);
+
+		if (rgn_idx == hpb->rgns_per_lu - 1) {
+			srgn_cnt = ((hpb->srgns_per_lu - 1) %
+				    hpb->srgns_per_rgn) + 1;
+			last_srgn = true;
+		}
+
+		ret = ufshpb_alloc_subregion_tbl(hpb, rgn, srgn_cnt);
+		if (ret)
+			goto release_srgn_table;
+		ufshpb_init_subregion_tbl(hpb, rgn, last_srgn);
+
+		if (ufshpb_is_pinned_region(hpb, rgn_idx)) {
+			ret = ufshpb_init_pinned_active_region(hba, hpb, rgn);
+			if (ret)
+				goto release_srgn_table;
+		} else {
+			rgn->rgn_state = HPB_RGN_INACTIVE;
+		}
+
+		rgn->rgn_flags = 0;
+		rgn->hpb = hpb;
+	}
+
+	hpb->rgn_tbl = rgn_table;
+
+	return 0;
+
+release_srgn_table:
+	for (i = 0; i <= rgn_idx; i++)
+		kvfree(rgn_table[i].srgn_tbl);
+
+	kvfree(rgn_table);
+	return ret;
+}
+
+static void ufshpb_destroy_subregion_tbl(struct ufshpb_lu *hpb,
+					 struct ufshpb_region *rgn)
+{
+	int srgn_idx;
+	struct ufshpb_subregion *srgn;
+
+	for_each_sub_region(rgn, srgn_idx, srgn)
+		if (srgn->srgn_state != HPB_SRGN_UNUSED) {
+			srgn->srgn_state = HPB_SRGN_UNUSED;
+			ufshpb_put_map_ctx(hpb, srgn->mctx);
+		}
+}
+
+static void ufshpb_destroy_region_tbl(struct ufshpb_lu *hpb)
+{
+	int rgn_idx;
+
+	for (rgn_idx = 0; rgn_idx < hpb->rgns_per_lu; rgn_idx++) {
+		struct ufshpb_region *rgn;
+
+		rgn = hpb->rgn_tbl + rgn_idx;
+		if (rgn->rgn_state != HPB_RGN_INACTIVE) {
+			rgn->rgn_state = HPB_RGN_INACTIVE;
+
+			ufshpb_destroy_subregion_tbl(hpb, rgn);
+		}
+
+		kvfree(rgn->srgn_tbl);
+	}
+
+	kvfree(hpb->rgn_tbl);
+}
+
+/* SYSFS functions */
+#define ufshpb_sysfs_attr_show_func(__name)				\
+static ssize_t __name##_show(struct device *dev,			\
+	struct device_attribute *attr, char *buf)			\
+{									\
+	struct scsi_device *sdev = to_scsi_device(dev);			\
+	struct ufshpb_lu *hpb = ufshpb_get_hpb_data(sdev);		\
+									\
+	if (!hpb)							\
+		return -ENODEV;						\
+									\
+	return sysfs_emit(buf, "%llu\n", hpb->stats.__name);		\
+}									\
+\
+static DEVICE_ATTR_RO(__name)
+
+ufshpb_sysfs_attr_show_func(hit_cnt);
+ufshpb_sysfs_attr_show_func(miss_cnt);
+ufshpb_sysfs_attr_show_func(rb_noti_cnt);
+ufshpb_sysfs_attr_show_func(rb_active_cnt);
+ufshpb_sysfs_attr_show_func(rb_inactive_cnt);
+ufshpb_sysfs_attr_show_func(map_req_cnt);
+ufshpb_sysfs_attr_show_func(umap_req_cnt);
+
+static struct attribute *hpb_dev_stat_attrs[] = {
+	&dev_attr_hit_cnt.attr,
+	&dev_attr_miss_cnt.attr,
+	&dev_attr_rb_noti_cnt.attr,
+	&dev_attr_rb_active_cnt.attr,
+	&dev_attr_rb_inactive_cnt.attr,
+	&dev_attr_map_req_cnt.attr,
+	&dev_attr_umap_req_cnt.attr,
+	NULL,
+};
+
+struct attribute_group ufs_sysfs_hpb_stat_group = {
+	.name = "hpb_stats",
+	.attrs = hpb_dev_stat_attrs,
+};
+
+/* SYSFS functions */
+#define ufshpb_sysfs_param_show_func(__name)				\
+static ssize_t __name##_show(struct device *dev,			\
+	struct device_attribute *attr, char *buf)			\
+{									\
+	struct scsi_device *sdev = to_scsi_device(dev);			\
+	struct ufshpb_lu *hpb = ufshpb_get_hpb_data(sdev);		\
+									\
+	if (!hpb)							\
+		return -ENODEV;						\
+									\
+	return sysfs_emit(buf, "%d\n", hpb->params.__name);		\
+}
+
+ufshpb_sysfs_param_show_func(requeue_timeout_ms);
+static ssize_t
+requeue_timeout_ms_store(struct device *dev, struct device_attribute *attr,
+			 const char *buf, size_t count)
+{
+	struct scsi_device *sdev = to_scsi_device(dev);
+	struct ufshpb_lu *hpb = ufshpb_get_hpb_data(sdev);
+	int val;
+
+	if (!hpb)
+		return -ENODEV;
+
+	if (kstrtouint(buf, 0, &val))
+		return -EINVAL;
+
+	if (val < 0)
+		return -EINVAL;
+
+	hpb->params.requeue_timeout_ms = val;
+
+	return count;
+}
+static DEVICE_ATTR_RW(requeue_timeout_ms);
+
+ufshpb_sysfs_param_show_func(activation_thld);
+static ssize_t
+activation_thld_store(struct device *dev, struct device_attribute *attr,
+		      const char *buf, size_t count)
+{
+	struct scsi_device *sdev = to_scsi_device(dev);
+	struct ufshpb_lu *hpb = ufshpb_get_hpb_data(sdev);
+	int val;
+
+	if (!hpb)
+		return -ENODEV;
+
+	if (!hpb->is_hcm)
+		return -EOPNOTSUPP;
+
+	if (kstrtouint(buf, 0, &val))
+		return -EINVAL;
+
+	if (val <= 0)
+		return -EINVAL;
+
+	hpb->params.activation_thld = val;
+
+	return count;
+}
+static DEVICE_ATTR_RW(activation_thld);
+
+ufshpb_sysfs_param_show_func(normalization_factor);
+static ssize_t
+normalization_factor_store(struct device *dev, struct device_attribute *attr,
+			   const char *buf, size_t count)
+{
+	struct scsi_device *sdev = to_scsi_device(dev);
+	struct ufshpb_lu *hpb = ufshpb_get_hpb_data(sdev);
+	int val;
+
+	if (!hpb)
+		return -ENODEV;
+
+	if (!hpb->is_hcm)
+		return -EOPNOTSUPP;
+
+	if (kstrtouint(buf, 0, &val))
+		return -EINVAL;
+
+	if (val <= 0 || val > ilog2(hpb->entries_per_srgn))
+		return -EINVAL;
+
+	hpb->params.normalization_factor = val;
+
+	return count;
+}
+static DEVICE_ATTR_RW(normalization_factor);
+
+ufshpb_sysfs_param_show_func(eviction_thld_enter);
+static ssize_t
+eviction_thld_enter_store(struct device *dev, struct device_attribute *attr,
+			  const char *buf, size_t count)
+{
+	struct scsi_device *sdev = to_scsi_device(dev);
+	struct ufshpb_lu *hpb = ufshpb_get_hpb_data(sdev);
+	int val;
+
+	if (!hpb)
+		return -ENODEV;
+
+	if (!hpb->is_hcm)
+		return -EOPNOTSUPP;
+
+	if (kstrtouint(buf, 0, &val))
+		return -EINVAL;
+
+	if (val <= hpb->params.eviction_thld_exit)
+		return -EINVAL;
+
+	hpb->params.eviction_thld_enter = val;
+
+	return count;
+}
+static DEVICE_ATTR_RW(eviction_thld_enter);
+
+ufshpb_sysfs_param_show_func(eviction_thld_exit);
+static ssize_t
+eviction_thld_exit_store(struct device *dev, struct device_attribute *attr,
+			 const char *buf, size_t count)
+{
+	struct scsi_device *sdev = to_scsi_device(dev);
+	struct ufshpb_lu *hpb = ufshpb_get_hpb_data(sdev);
+	int val;
+
+	if (!hpb)
+		return -ENODEV;
+
+	if (!hpb->is_hcm)
+		return -EOPNOTSUPP;
+
+	if (kstrtouint(buf, 0, &val))
+		return -EINVAL;
+
+	if (val <= hpb->params.activation_thld)
+		return -EINVAL;
+
+	hpb->params.eviction_thld_exit = val;
+
+	return count;
+}
+static DEVICE_ATTR_RW(eviction_thld_exit);
+
+ufshpb_sysfs_param_show_func(read_timeout_ms);
+static ssize_t
+read_timeout_ms_store(struct device *dev, struct device_attribute *attr,
+		      const char *buf, size_t count)
+{
+	struct scsi_device *sdev = to_scsi_device(dev);
+	struct ufshpb_lu *hpb = ufshpb_get_hpb_data(sdev);
+	int val;
+
+	if (!hpb)
+		return -ENODEV;
+
+	if (!hpb->is_hcm)
+		return -EOPNOTSUPP;
+
+	if (kstrtouint(buf, 0, &val))
+		return -EINVAL;
+
+	/* read_timeout >> timeout_polling_interval */
+	if (val < hpb->params.timeout_polling_interval_ms * 2)
+		return -EINVAL;
+
+	hpb->params.read_timeout_ms = val;
+
+	return count;
+}
+static DEVICE_ATTR_RW(read_timeout_ms);
+
+ufshpb_sysfs_param_show_func(read_timeout_expiries);
+static ssize_t
+read_timeout_expiries_store(struct device *dev, struct device_attribute *attr,
+			    const char *buf, size_t count)
+{
+	struct scsi_device *sdev = to_scsi_device(dev);
+	struct ufshpb_lu *hpb = ufshpb_get_hpb_data(sdev);
+	int val;
+
+	if (!hpb)
+		return -ENODEV;
+
+	if (!hpb->is_hcm)
+		return -EOPNOTSUPP;
+
+	if (kstrtouint(buf, 0, &val))
+		return -EINVAL;
+
+	if (val <= 0)
+		return -EINVAL;
+
+	hpb->params.read_timeout_expiries = val;
+
+	return count;
+}
+static DEVICE_ATTR_RW(read_timeout_expiries);
+
+ufshpb_sysfs_param_show_func(timeout_polling_interval_ms);
+static ssize_t
+timeout_polling_interval_ms_store(struct device *dev,
+				  struct device_attribute *attr,
+				  const char *buf, size_t count)
+{
+	struct scsi_device *sdev = to_scsi_device(dev);
+	struct ufshpb_lu *hpb = ufshpb_get_hpb_data(sdev);
+	int val;
+
+	if (!hpb)
+		return -ENODEV;
+
+	if (!hpb->is_hcm)
+		return -EOPNOTSUPP;
+
+	if (kstrtouint(buf, 0, &val))
+		return -EINVAL;
+
+	/* timeout_polling_interval << read_timeout */
+	if (val <= 0 || val > hpb->params.read_timeout_ms / 2)
+		return -EINVAL;
+
+	hpb->params.timeout_polling_interval_ms = val;
+
+	return count;
+}
+static DEVICE_ATTR_RW(timeout_polling_interval_ms);
+
+ufshpb_sysfs_param_show_func(inflight_map_req);
+static ssize_t inflight_map_req_store(struct device *dev,
+				      struct device_attribute *attr,
+				      const char *buf, size_t count)
+{
+	struct scsi_device *sdev = to_scsi_device(dev);
+	struct ufshpb_lu *hpb = ufshpb_get_hpb_data(sdev);
+	int val;
+
+	if (!hpb)
+		return -ENODEV;
+
+	if (!hpb->is_hcm)
+		return -EOPNOTSUPP;
+
+	if (kstrtouint(buf, 0, &val))
+		return -EINVAL;
+
+	if (val <= 0 || val > hpb->sdev_ufs_lu->queue_depth - 1)
+		return -EINVAL;
+
+	hpb->params.inflight_map_req = val;
+
+	return count;
+}
+static DEVICE_ATTR_RW(inflight_map_req);
+
+static void ufshpb_hcm_param_init(struct ufshpb_lu *hpb)
+{
+	hpb->params.activation_thld = ACTIVATION_THRESHOLD;
+	hpb->params.normalization_factor = 1;
+	hpb->params.eviction_thld_enter = (ACTIVATION_THRESHOLD << 5);
+	hpb->params.eviction_thld_exit = (ACTIVATION_THRESHOLD << 4);
+	hpb->params.read_timeout_ms = READ_TO_MS;
+	hpb->params.read_timeout_expiries = READ_TO_EXPIRIES;
+	hpb->params.timeout_polling_interval_ms = POLLING_INTERVAL_MS;
+	hpb->params.inflight_map_req = THROTTLE_MAP_REQ_DEFAULT;
+}
+
+static struct attribute *hpb_dev_param_attrs[] = {
+	&dev_attr_requeue_timeout_ms.attr,
+	&dev_attr_activation_thld.attr,
+	&dev_attr_normalization_factor.attr,
+	&dev_attr_eviction_thld_enter.attr,
+	&dev_attr_eviction_thld_exit.attr,
+	&dev_attr_read_timeout_ms.attr,
+	&dev_attr_read_timeout_expiries.attr,
+	&dev_attr_timeout_polling_interval_ms.attr,
+	&dev_attr_inflight_map_req.attr,
+	NULL,
+};
+
+struct attribute_group ufs_sysfs_hpb_param_group = {
+	.name = "hpb_params",
+	.attrs = hpb_dev_param_attrs,
+};
+
+static int ufshpb_pre_req_mempool_init(struct ufshpb_lu *hpb)
+{
+	struct ufshpb_req *pre_req = NULL, *t;
+	int qd = hpb->sdev_ufs_lu->queue_depth / 2;
+	int i;
+
+	INIT_LIST_HEAD(&hpb->lh_pre_req_free);
+
+	hpb->pre_req = kcalloc(qd, sizeof(struct ufshpb_req), GFP_KERNEL);
+	hpb->throttle_pre_req = qd;
+	hpb->num_inflight_pre_req = 0;
+
+	if (!hpb->pre_req)
+		goto release_mem;
+
+	for (i = 0; i < qd; i++) {
+		pre_req = hpb->pre_req + i;
+		INIT_LIST_HEAD(&pre_req->list_req);
+		pre_req->req = NULL;
+
+		pre_req->bio = bio_alloc(GFP_KERNEL, 1);
+		if (!pre_req->bio)
+			goto release_mem;
+
+		pre_req->wb.m_page = alloc_page(GFP_KERNEL | __GFP_ZERO);
+		if (!pre_req->wb.m_page) {
+			bio_put(pre_req->bio);
+			goto release_mem;
+		}
+
+		list_add_tail(&pre_req->list_req, &hpb->lh_pre_req_free);
+	}
+
+	return 0;
+release_mem:
+	list_for_each_entry_safe(pre_req, t, &hpb->lh_pre_req_free, list_req) {
+		list_del_init(&pre_req->list_req);
+		bio_put(pre_req->bio);
+		__free_page(pre_req->wb.m_page);
+	}
+
+	kfree(hpb->pre_req);
+	return -ENOMEM;
+}
+
+static void ufshpb_pre_req_mempool_destroy(struct ufshpb_lu *hpb)
+{
+	struct ufshpb_req *pre_req = NULL;
+	int i;
+
+	for (i = 0; i < hpb->throttle_pre_req; i++) {
+		pre_req = hpb->pre_req + i;
+		bio_put(hpb->pre_req[i].bio);
+		if (!pre_req->wb.m_page)
+			__free_page(hpb->pre_req[i].wb.m_page);
+		list_del_init(&pre_req->list_req);
+	}
+
+	kfree(hpb->pre_req);
+}
+
+static void ufshpb_stat_init(struct ufshpb_lu *hpb)
+{
+	hpb->stats.hit_cnt = 0;
+	hpb->stats.miss_cnt = 0;
+	hpb->stats.rb_noti_cnt = 0;
+	hpb->stats.rb_active_cnt = 0;
+	hpb->stats.rb_inactive_cnt = 0;
+	hpb->stats.map_req_cnt = 0;
+	hpb->stats.umap_req_cnt = 0;
+}
+
+static void ufshpb_param_init(struct ufshpb_lu *hpb)
+{
+	hpb->params.requeue_timeout_ms = HPB_REQUEUE_TIME_MS;
+	if (hpb->is_hcm)
+		ufshpb_hcm_param_init(hpb);
+}
+
+static int ufshpb_lu_hpb_init(struct ufs_hba *hba, struct ufshpb_lu *hpb)
+{
+	int ret;
+
+	spin_lock_init(&hpb->rgn_state_lock);
+	spin_lock_init(&hpb->rsp_list_lock);
+	spin_lock_init(&hpb->param_lock);
+
+	INIT_LIST_HEAD(&hpb->lru_info.lh_lru_rgn);
+	INIT_LIST_HEAD(&hpb->lh_act_srgn);
+	INIT_LIST_HEAD(&hpb->lh_inact_rgn);
+	INIT_LIST_HEAD(&hpb->list_hpb_lu);
+
+	INIT_WORK(&hpb->map_work, ufshpb_map_work_handler);
+	if (hpb->is_hcm) {
+		INIT_WORK(&hpb->ufshpb_normalization_work,
+			  ufshpb_normalization_work_handler);
+		INIT_DELAYED_WORK(&hpb->ufshpb_read_to_work,
+				  ufshpb_read_to_handler);
+	}
+
+	hpb->map_req_cache = kmem_cache_create("ufshpb_req_cache",
+			  sizeof(struct ufshpb_req), 0, 0, NULL);
+	if (!hpb->map_req_cache) {
+		dev_err(hba->dev, "ufshpb(%d) ufshpb_req_cache create fail",
+			hpb->lun);
+		return -ENOMEM;
+	}
+
+	hpb->m_page_cache = kmem_cache_create("ufshpb_m_page_cache",
+			  sizeof(struct page *) * hpb->pages_per_srgn,
+			  0, 0, NULL);
+	if (!hpb->m_page_cache) {
+		dev_err(hba->dev, "ufshpb(%d) ufshpb_m_page_cache create fail",
+			hpb->lun);
+		ret = -ENOMEM;
+		goto release_req_cache;
+	}
+
+	ret = ufshpb_pre_req_mempool_init(hpb);
+	if (ret) {
+		dev_err(hba->dev, "ufshpb(%d) pre_req_mempool init fail",
+			hpb->lun);
+		goto release_m_page_cache;
+	}
+
+	ret = ufshpb_alloc_region_tbl(hba, hpb);
+	if (ret)
+		goto release_pre_req_mempool;
+
+	ufshpb_stat_init(hpb);
+	ufshpb_param_init(hpb);
+
+	if (hpb->is_hcm) {
+		unsigned int poll;
+
+		poll = hpb->params.timeout_polling_interval_ms;
+		schedule_delayed_work(&hpb->ufshpb_read_to_work,
+				      msecs_to_jiffies(poll));
+	}
+
+	return 0;
+
+release_pre_req_mempool:
+	ufshpb_pre_req_mempool_destroy(hpb);
+release_m_page_cache:
+	kmem_cache_destroy(hpb->m_page_cache);
+release_req_cache:
+	kmem_cache_destroy(hpb->map_req_cache);
+	return ret;
+}
+
+static struct ufshpb_lu *
+ufshpb_alloc_hpb_lu(struct ufs_hba *hba, struct scsi_device *sdev,
+		    struct ufshpb_dev_info *hpb_dev_info,
+		    struct ufshpb_lu_info *hpb_lu_info)
+{
+	struct ufshpb_lu *hpb;
+	int ret;
+
+	hpb = kzalloc(sizeof(struct ufshpb_lu), GFP_KERNEL);
+	if (!hpb)
+		return NULL;
+
+	hpb->lun = sdev->lun;
+	hpb->sdev_ufs_lu = sdev;
+
+	ufshpb_lu_parameter_init(hba, hpb, hpb_dev_info, hpb_lu_info);
+
+	ret = ufshpb_lu_hpb_init(hba, hpb);
+	if (ret) {
+		dev_err(hba->dev, "hpb lu init failed. ret %d", ret);
+		goto release_hpb;
+	}
+
+	sdev->hostdata = hpb;
+	return hpb;
+
+release_hpb:
+	kfree(hpb);
+	return NULL;
+}
+
+static void ufshpb_discard_rsp_lists(struct ufshpb_lu *hpb)
+{
+	struct ufshpb_region *rgn, *next_rgn;
+	struct ufshpb_subregion *srgn, *next_srgn;
+	unsigned long flags;
+
+	/*
+	 * If the device reset occurred, the remaining HPB region information
+	 * may be stale. Therefore, by discarding the lists of HPB response
+	 * that remained after reset, we prevent unnecessary work.
+	 */
+	spin_lock_irqsave(&hpb->rsp_list_lock, flags);
+	list_for_each_entry_safe(rgn, next_rgn, &hpb->lh_inact_rgn,
+				 list_inact_rgn)
+		list_del_init(&rgn->list_inact_rgn);
+
+	list_for_each_entry_safe(srgn, next_srgn, &hpb->lh_act_srgn,
+				 list_act_srgn)
+		list_del_init(&srgn->list_act_srgn);
+	spin_unlock_irqrestore(&hpb->rsp_list_lock, flags);
+}
+
+static void ufshpb_cancel_jobs(struct ufshpb_lu *hpb)
+{
+	if (hpb->is_hcm) {
+		cancel_delayed_work_sync(&hpb->ufshpb_read_to_work);
+		cancel_work_sync(&hpb->ufshpb_normalization_work);
+	}
+	cancel_work_sync(&hpb->map_work);
+}
+
+static bool ufshpb_check_hpb_reset_query(struct ufs_hba *hba)
+{
+	int err = 0;
+	bool flag_res = true;
+	int try;
+
+	/* wait for the device to complete HPB reset query */
+	for (try = 0; try < HPB_RESET_REQ_RETRIES; try++) {
+		dev_dbg(hba->dev,
+			"%s start flag reset polling %d times\n",
+			__func__, try);
+
+		/* Poll fHpbReset flag to be cleared */
+		err = ufshcd_query_flag(hba, UPIU_QUERY_OPCODE_READ_FLAG,
+				QUERY_FLAG_IDN_HPB_RESET, 0, &flag_res);
+
+		if (err) {
+			dev_err(hba->dev,
+				"%s reading fHpbReset flag failed with error %d\n",
+				__func__, err);
+			return flag_res;
+		}
+
+		if (!flag_res)
+			goto out;
+
+		usleep_range(1000, 1100);
+	}
+	if (flag_res) {
+		dev_err(hba->dev,
+			"%s fHpbReset was not cleared by the device\n",
+			__func__);
+	}
+out:
+	return flag_res;
+}
+
+void ufshpb_reset(struct ufs_hba *hba)
+{
+	struct ufshpb_lu *hpb;
+	struct scsi_device *sdev;
+
+	shost_for_each_device(sdev, hba->host) {
+		hpb = ufshpb_get_hpb_data(sdev);
+		if (!hpb)
+			continue;
+
+		if (ufshpb_get_state(hpb) != HPB_RESET)
+			continue;
+
+		ufshpb_set_state(hpb, HPB_PRESENT);
+	}
+}
+
+void ufshpb_reset_host(struct ufs_hba *hba)
+{
+	struct ufshpb_lu *hpb;
+	struct scsi_device *sdev;
+
+	shost_for_each_device(sdev, hba->host) {
+		hpb = ufshpb_get_hpb_data(sdev);
+		if (!hpb)
+			continue;
+
+		if (ufshpb_get_state(hpb) != HPB_PRESENT)
+			continue;
+		ufshpb_set_state(hpb, HPB_RESET);
+		ufshpb_cancel_jobs(hpb);
+		ufshpb_discard_rsp_lists(hpb);
+	}
+}
+
+void ufshpb_suspend(struct ufs_hba *hba)
+{
+	struct ufshpb_lu *hpb;
+	struct scsi_device *sdev;
+
+	shost_for_each_device(sdev, hba->host) {
+		hpb = ufshpb_get_hpb_data(sdev);
+		if (!hpb)
+			continue;
+
+		if (ufshpb_get_state(hpb) != HPB_PRESENT)
+			continue;
+		ufshpb_set_state(hpb, HPB_SUSPEND);
+		ufshpb_cancel_jobs(hpb);
+	}
+}
+
+void ufshpb_resume(struct ufs_hba *hba)
+{
+	struct ufshpb_lu *hpb;
+	struct scsi_device *sdev;
+
+	shost_for_each_device(sdev, hba->host) {
+		hpb = ufshpb_get_hpb_data(sdev);
+		if (!hpb)
+			continue;
+
+		if ((ufshpb_get_state(hpb) != HPB_PRESENT) &&
+		    (ufshpb_get_state(hpb) != HPB_SUSPEND))
+			continue;
+		ufshpb_set_state(hpb, HPB_PRESENT);
+		ufshpb_kick_map_work(hpb);
+		if (hpb->is_hcm) {
+			unsigned int poll =
+				hpb->params.timeout_polling_interval_ms;
+
+			schedule_delayed_work(&hpb->ufshpb_read_to_work,
+				msecs_to_jiffies(poll));
+		}
+	}
+}
+
+static int ufshpb_get_lu_info(struct ufs_hba *hba, int lun,
+			      struct ufshpb_lu_info *hpb_lu_info)
+{
+	u16 max_active_rgns;
+	u8 lu_enable;
+	int size;
+	int ret;
+	char desc_buf[QUERY_DESC_MAX_SIZE];
+
+	ufshcd_map_desc_id_to_length(hba, QUERY_DESC_IDN_UNIT, &size);
+
+	pm_runtime_get_sync(hba->dev);
+	ret = ufshcd_query_descriptor_retry(hba, UPIU_QUERY_OPCODE_READ_DESC,
+					    QUERY_DESC_IDN_UNIT, lun, 0,
+					    desc_buf, &size);
+	pm_runtime_put_sync(hba->dev);
+
+	if (ret) {
+		dev_err(hba->dev,
+			"%s: idn: %d lun: %d  query request failed",
+			__func__, QUERY_DESC_IDN_UNIT, lun);
+		return ret;
+	}
+
+	lu_enable = desc_buf[UNIT_DESC_PARAM_LU_ENABLE];
+	if (lu_enable != LU_ENABLED_HPB_FUNC)
+		return -ENODEV;
+
+	max_active_rgns = get_unaligned_be16(
+			desc_buf + UNIT_DESC_PARAM_HPB_LU_MAX_ACTIVE_RGNS);
+	if (!max_active_rgns) {
+		dev_err(hba->dev,
+			"lun %d wrong number of max active regions\n", lun);
+		return -ENODEV;
+	}
+
+	hpb_lu_info->num_blocks = get_unaligned_be64(
+			desc_buf + UNIT_DESC_PARAM_LOGICAL_BLK_COUNT);
+	hpb_lu_info->pinned_start = get_unaligned_be16(
+			desc_buf + UNIT_DESC_PARAM_HPB_PIN_RGN_START_OFF);
+	hpb_lu_info->num_pinned = get_unaligned_be16(
+			desc_buf + UNIT_DESC_PARAM_HPB_NUM_PIN_RGNS);
+	hpb_lu_info->max_active_rgns = max_active_rgns;
+
+	return 0;
+}
+
+void ufshpb_destroy_lu(struct ufs_hba *hba, struct scsi_device *sdev)
+{
+	struct ufshpb_lu *hpb = ufshpb_get_hpb_data(sdev);
+
+	if (!hpb)
+		return;
+
+	ufshpb_set_state(hpb, HPB_FAILED);
+
+	sdev = hpb->sdev_ufs_lu;
+	sdev->hostdata = NULL;
+
+	ufshpb_cancel_jobs(hpb);
+
+	ufshpb_pre_req_mempool_destroy(hpb);
+	ufshpb_destroy_region_tbl(hpb);
+
+	kmem_cache_destroy(hpb->map_req_cache);
+	kmem_cache_destroy(hpb->m_page_cache);
+
+	list_del_init(&hpb->list_hpb_lu);
+
+	kfree(hpb);
+}
+
+static void ufshpb_hpb_lu_prepared(struct ufs_hba *hba)
+{
+	int pool_size;
+	struct ufshpb_lu *hpb;
+	struct scsi_device *sdev;
+	bool init_success;
+
+	if (tot_active_srgn_pages == 0) {
+		ufshpb_remove(hba);
+		return;
+	}
+
+	init_success = !ufshpb_check_hpb_reset_query(hba);
+
+	pool_size = PAGE_ALIGN(ufshpb_host_map_kbytes * 1024) / PAGE_SIZE;
+	if (pool_size > tot_active_srgn_pages) {
+		mempool_resize(ufshpb_mctx_pool, tot_active_srgn_pages);
+		mempool_resize(ufshpb_page_pool, tot_active_srgn_pages);
+	}
+
+	shost_for_each_device(sdev, hba->host) {
+		hpb = ufshpb_get_hpb_data(sdev);
+		if (!hpb)
+			continue;
+
+		if (init_success) {
+			ufshpb_set_state(hpb, HPB_PRESENT);
+			if ((hpb->lu_pinned_end - hpb->lu_pinned_start) > 0)
+				queue_work(ufshpb_wq, &hpb->map_work);
+			if (!hpb->is_hcm)
+				ufshpb_issue_umap_all_req(hpb);
+		} else {
+			dev_err(hba->dev, "destroy HPB lu %d\n", hpb->lun);
+			ufshpb_destroy_lu(hba, sdev);
+		}
+	}
+
+	if (!init_success)
+		ufshpb_remove(hba);
+}
+
+void ufshpb_init_hpb_lu(struct ufs_hba *hba, struct scsi_device *sdev)
+{
+	struct ufshpb_lu *hpb;
+	int ret;
+	struct ufshpb_lu_info hpb_lu_info = { 0 };
+	int lun = sdev->lun;
+
+	if (lun >= hba->dev_info.max_lu_supported)
+		goto out;
+
+	ret = ufshpb_get_lu_info(hba, lun, &hpb_lu_info);
+	if (ret)
+		goto out;
+
+	hpb = ufshpb_alloc_hpb_lu(hba, sdev, &hba->ufshpb_dev,
+				  &hpb_lu_info);
+	if (!hpb)
+		goto out;
+
+	tot_active_srgn_pages += hpb_lu_info.max_active_rgns *
+			hpb->srgns_per_rgn * hpb->pages_per_srgn;
+
+out:
+	/* All LUs are initialized */
+	if (atomic_dec_and_test(&hba->ufshpb_dev.slave_conf_cnt))
+		ufshpb_hpb_lu_prepared(hba);
+}
+
+static int ufshpb_init_mem_wq(struct ufs_hba *hba)
+{
+	int ret;
+	unsigned int pool_size;
+
+	ufshpb_mctx_cache = kmem_cache_create("ufshpb_mctx_cache",
+					sizeof(struct ufshpb_map_ctx),
+					0, 0, NULL);
+	if (!ufshpb_mctx_cache) {
+		dev_err(hba->dev, "ufshpb: cannot init mctx cache\n");
+		return -ENOMEM;
+	}
+
+	pool_size = PAGE_ALIGN(ufshpb_host_map_kbytes * 1024) / PAGE_SIZE;
+	dev_info(hba->dev, "%s:%d ufshpb_host_map_kbytes %u pool_size %u\n",
+	       __func__, __LINE__, ufshpb_host_map_kbytes, pool_size);
+
+	ufshpb_mctx_pool = mempool_create_slab_pool(pool_size,
+						    ufshpb_mctx_cache);
+	if (!ufshpb_mctx_pool) {
+		dev_err(hba->dev, "ufshpb: cannot init mctx pool\n");
+		ret = -ENOMEM;
+		goto release_mctx_cache;
+	}
+
+	ufshpb_page_pool = mempool_create_page_pool(pool_size, 0);
+	if (!ufshpb_page_pool) {
+		dev_err(hba->dev, "ufshpb: cannot init page pool\n");
+		ret = -ENOMEM;
+		goto release_mctx_pool;
+	}
+
+	ufshpb_wq = alloc_workqueue("ufshpb-wq",
+					WQ_UNBOUND | WQ_MEM_RECLAIM, 0);
+	if (!ufshpb_wq) {
+		dev_err(hba->dev, "ufshpb: alloc workqueue failed\n");
+		ret = -ENOMEM;
+		goto release_page_pool;
+	}
+
+	return 0;
+
+release_page_pool:
+	mempool_destroy(ufshpb_page_pool);
+release_mctx_pool:
+	mempool_destroy(ufshpb_mctx_pool);
+release_mctx_cache:
+	kmem_cache_destroy(ufshpb_mctx_cache);
+	return ret;
+}
+
+void ufshpb_get_geo_info(struct ufs_hba *hba, u8 *geo_buf)
+{
+	struct ufshpb_dev_info *hpb_info = &hba->ufshpb_dev;
+	int max_active_rgns = 0;
+	int hpb_num_lu;
+
+	hpb_num_lu = geo_buf[GEOMETRY_DESC_PARAM_HPB_NUMBER_LU];
+	if (hpb_num_lu == 0) {
+		dev_err(hba->dev, "No HPB LU supported\n");
+		hpb_info->hpb_disabled = true;
+		return;
+	}
+
+	hpb_info->rgn_size = geo_buf[GEOMETRY_DESC_PARAM_HPB_REGION_SIZE];
+	hpb_info->srgn_size = geo_buf[GEOMETRY_DESC_PARAM_HPB_SUBREGION_SIZE];
+	max_active_rgns = get_unaligned_be16(geo_buf +
+			  GEOMETRY_DESC_PARAM_HPB_MAX_ACTIVE_REGS);
+
+	if (hpb_info->rgn_size == 0 || hpb_info->srgn_size == 0 ||
+	    max_active_rgns == 0) {
+		dev_err(hba->dev, "No HPB supported device\n");
+		hpb_info->hpb_disabled = true;
+		return;
+	}
+}
+
+void ufshpb_get_dev_info(struct ufs_hba *hba, u8 *desc_buf)
+{
+	struct ufshpb_dev_info *hpb_dev_info = &hba->ufshpb_dev;
+	int version, ret;
+	u32 max_hpb_single_cmd = HPB_MULTI_CHUNK_LOW;
+
+	hpb_dev_info->control_mode = desc_buf[DEVICE_DESC_PARAM_HPB_CONTROL];
+
+	version = get_unaligned_be16(desc_buf + DEVICE_DESC_PARAM_HPB_VER);
+	if ((version != HPB_SUPPORT_VERSION) &&
+	    (version != HPB_SUPPORT_LEGACY_VERSION)) {
+		dev_err(hba->dev, "%s: HPB %x version is not supported.\n",
+			__func__, version);
+		hpb_dev_info->hpb_disabled = true;
+		return;
+	}
+
+	if (version == HPB_SUPPORT_LEGACY_VERSION)
+		hpb_dev_info->is_legacy = true;
+
+	pm_runtime_get_sync(hba->dev);
+	ret = ufshcd_query_attr_retry(hba, UPIU_QUERY_OPCODE_READ_ATTR,
+		QUERY_ATTR_IDN_MAX_HPB_SINGLE_CMD, 0, 0, &max_hpb_single_cmd);
+	pm_runtime_put_sync(hba->dev);
+
+	if (ret)
+		dev_err(hba->dev, "%s: idn: read max size of single hpb cmd query request failed",
+			__func__);
+	hpb_dev_info->max_hpb_single_cmd = max_hpb_single_cmd;
+
+	/*
+	 * Get the number of user logical unit to check whether all
+	 * scsi_device finish initialization
+	 */
+	hpb_dev_info->num_lu = desc_buf[DEVICE_DESC_PARAM_NUM_LU];
+}
+
+void ufshpb_init(struct ufs_hba *hba)
+{
+	struct ufshpb_dev_info *hpb_dev_info = &hba->ufshpb_dev;
+	int try;
+	int ret;
+
+	if (!ufshpb_is_allowed(hba) || !hba->dev_info.hpb_enabled)
+		return;
+
+	if (ufshpb_init_mem_wq(hba)) {
+		hpb_dev_info->hpb_disabled = true;
+		return;
+	}
+
+	atomic_set(&hpb_dev_info->slave_conf_cnt, hpb_dev_info->num_lu);
+	tot_active_srgn_pages = 0;
+	/* issue HPB reset query */
+	for (try = 0; try < HPB_RESET_REQ_RETRIES; try++) {
+		ret = ufshcd_query_flag(hba, UPIU_QUERY_OPCODE_SET_FLAG,
+					QUERY_FLAG_IDN_HPB_RESET, 0, NULL);
+		if (!ret)
+			break;
+	}
+}
+
+void ufshpb_remove(struct ufs_hba *hba)
+{
+	mempool_destroy(ufshpb_page_pool);
+	mempool_destroy(ufshpb_mctx_pool);
+	kmem_cache_destroy(ufshpb_mctx_cache);
+
+	destroy_workqueue(ufshpb_wq);
+}
+
+module_param(ufshpb_host_map_kbytes, uint, 0644);
+MODULE_PARM_DESC(ufshpb_host_map_kbytes,
+	"ufshpb host mapping memory kilo-bytes for ufshpb memory-pool");
diff --git a/drivers/scsi/ufs/ufshpb.h b/drivers/scsi/ufs/ufshpb.h
new file mode 100644
index 000000000000..a79e07398970
--- /dev/null
+++ b/drivers/scsi/ufs/ufshpb.h
@@ -0,0 +1,323 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/*
+ * Universal Flash Storage Host Performance Booster
+ *
+ * Copyright (C) 2017-2021 Samsung Electronics Co., Ltd.
+ *
+ * Authors:
+ *	Yongmyung Lee <ymhungry.lee@samsung.com>
+ *	Jinyoung Choi <j-young.choi@samsung.com>
+ */
+
+#ifndef _UFSHPB_H_
+#define _UFSHPB_H_
+
+/* hpb response UPIU macro */
+#define HPB_RSP_NONE				0x0
+#define HPB_RSP_REQ_REGION_UPDATE		0x1
+#define HPB_RSP_DEV_RESET			0x2
+#define MAX_ACTIVE_NUM				2
+#define MAX_INACTIVE_NUM			2
+#define DEV_DATA_SEG_LEN			0x14
+#define DEV_SENSE_SEG_LEN			0x12
+#define DEV_DES_TYPE				0x80
+#define DEV_ADDITIONAL_LEN			0x10
+
+/* hpb map & entries macro */
+#define HPB_RGN_SIZE_UNIT			512
+#define HPB_ENTRY_BLOCK_SIZE			4096
+#define HPB_ENTRY_SIZE				0x8
+#define PINNED_NOT_SET				U32_MAX
+
+/* hpb support chunk size */
+#define HPB_LEGACY_CHUNK_HIGH			1
+#define HPB_MULTI_CHUNK_LOW			7
+#define HPB_MULTI_CHUNK_HIGH			255
+
+/* hpb vender defined opcode */
+#define UFSHPB_READ				0xF8
+#define UFSHPB_READ_BUFFER			0xF9
+#define UFSHPB_READ_BUFFER_ID			0x01
+#define UFSHPB_WRITE_BUFFER			0xFA
+#define UFSHPB_WRITE_BUFFER_INACT_SINGLE_ID	0x01
+#define UFSHPB_WRITE_BUFFER_PREFETCH_ID		0x02
+#define UFSHPB_WRITE_BUFFER_INACT_ALL_ID	0x03
+#define HPB_WRITE_BUFFER_CMD_LENGTH		10
+#define MAX_HPB_READ_ID				0x7F
+#define HPB_READ_BUFFER_CMD_LENGTH		10
+#define LU_ENABLED_HPB_FUNC			0x02
+
+#define HPB_RESET_REQ_RETRIES			10
+#define HPB_MAP_REQ_RETRIES			5
+#define HPB_REQUEUE_TIME_MS			0
+
+#define HPB_SUPPORT_VERSION			0x200
+#define HPB_SUPPORT_LEGACY_VERSION		0x100
+
+enum UFSHPB_MODE {
+	HPB_HOST_CONTROL,
+	HPB_DEVICE_CONTROL,
+};
+
+enum UFSHPB_STATE {
+	HPB_INIT = 0,
+	HPB_PRESENT = 1,
+	HPB_SUSPEND,
+	HPB_FAILED,
+	HPB_RESET,
+};
+
+enum HPB_RGN_STATE {
+	HPB_RGN_INACTIVE,
+	HPB_RGN_ACTIVE,
+	/* pinned regions are always active */
+	HPB_RGN_PINNED,
+};
+
+enum HPB_SRGN_STATE {
+	HPB_SRGN_UNUSED,
+	HPB_SRGN_INVALID,
+	HPB_SRGN_VALID,
+	HPB_SRGN_ISSUED,
+};
+
+/**
+ * struct ufshpb_lu_info - UFSHPB logical unit related info
+ * @num_blocks: the number of logical block
+ * @pinned_start: the start region number of pinned region
+ * @num_pinned: the number of pinned regions
+ * @max_active_rgns: maximum number of active regions
+ */
+struct ufshpb_lu_info {
+	int num_blocks;
+	int pinned_start;
+	int num_pinned;
+	int max_active_rgns;
+};
+
+struct ufshpb_map_ctx {
+	struct page **m_page;
+	unsigned long *ppn_dirty;
+};
+
+struct ufshpb_subregion {
+	struct ufshpb_map_ctx *mctx;
+	enum HPB_SRGN_STATE srgn_state;
+	int rgn_idx;
+	int srgn_idx;
+	bool is_last;
+
+	/* subregion reads - for host mode */
+	unsigned int reads;
+
+	/* below information is used by rsp_list */
+	struct list_head list_act_srgn;
+};
+
+struct ufshpb_region {
+	struct ufshpb_lu *hpb;
+	struct ufshpb_subregion *srgn_tbl;
+	enum HPB_RGN_STATE rgn_state;
+	int rgn_idx;
+	int srgn_cnt;
+
+	/* below information is used by rsp_list */
+	struct list_head list_inact_rgn;
+
+	/* below information is used by lru */
+	struct list_head list_lru_rgn;
+	unsigned long rgn_flags;
+#define RGN_FLAG_DIRTY 0
+#define RGN_FLAG_UPDATE 1
+
+	/* region reads - for host mode */
+	spinlock_t rgn_lock;
+	unsigned int reads;
+	/* region "cold" timer - for host mode */
+	ktime_t read_timeout;
+	unsigned int read_timeout_expiries;
+	struct list_head list_expired_rgn;
+};
+
+#define for_each_sub_region(rgn, i, srgn)				\
+	for ((i) = 0;							\
+	     ((i) < (rgn)->srgn_cnt) && ((srgn) = &(rgn)->srgn_tbl[i]); \
+	     (i)++)
+
+/**
+ * struct ufshpb_req - HPB related request structure (write/read buffer)
+ * @req: block layer request structure
+ * @bio: bio for this request
+ * @hpb: ufshpb_lu structure that related to
+ * @list_req: ufshpb_req mempool list
+ * @sense: store its sense data
+ * @mctx: L2P map information
+ * @rgn_idx: target region index
+ * @srgn_idx: target sub-region index
+ * @lun: target logical unit number
+ * @m_page: L2P map information data for pre-request
+ * @len: length of host-side cached L2P map in m_page
+ * @lpn: start LPN of L2P map in m_page
+ */
+struct ufshpb_req {
+	struct request *req;
+	struct bio *bio;
+	struct ufshpb_lu *hpb;
+	struct list_head list_req;
+	union {
+		struct {
+			struct ufshpb_map_ctx *mctx;
+			unsigned int rgn_idx;
+			unsigned int srgn_idx;
+			unsigned int lun;
+		} rb;
+		struct {
+			struct page *m_page;
+			unsigned int len;
+			unsigned long lpn;
+		} wb;
+	};
+};
+
+struct victim_select_info {
+	struct list_head lh_lru_rgn; /* LRU list of regions */
+	int max_lru_active_cnt; /* supported hpb #region - pinned #region */
+	atomic_t active_cnt;
+};
+
+/**
+ * ufshpb_params - ufs hpb parameters
+ * @requeue_timeout_ms - requeue threshold of wb command (0x2)
+ * @activation_thld - min reads [IOs] to activate/update a region
+ * @normalization_factor - shift right the region's reads
+ * @eviction_thld_enter - min reads [IOs] for the entering region in eviction
+ * @eviction_thld_exit - max reads [IOs] for the exiting region in eviction
+ * @read_timeout_ms - timeout [ms] from the last read IO to the region
+ * @read_timeout_expiries - amount of allowable timeout expireis
+ * @timeout_polling_interval_ms - frequency in which timeouts are checked
+ * @inflight_map_req - number of inflight map requests
+ */
+struct ufshpb_params {
+	unsigned int requeue_timeout_ms;
+	unsigned int activation_thld;
+	unsigned int normalization_factor;
+	unsigned int eviction_thld_enter;
+	unsigned int eviction_thld_exit;
+	unsigned int read_timeout_ms;
+	unsigned int read_timeout_expiries;
+	unsigned int timeout_polling_interval_ms;
+	unsigned int inflight_map_req;
+};
+
+struct ufshpb_stats {
+	u64 hit_cnt;
+	u64 miss_cnt;
+	u64 rb_noti_cnt;
+	u64 rb_active_cnt;
+	u64 rb_inactive_cnt;
+	u64 map_req_cnt;
+	u64 pre_req_cnt;
+	u64 umap_req_cnt;
+};
+
+struct ufshpb_lu {
+	int lun;
+	struct scsi_device *sdev_ufs_lu;
+
+	spinlock_t rgn_state_lock; /* for protect rgn/srgn state */
+	struct ufshpb_region *rgn_tbl;
+
+	atomic_t hpb_state;
+
+	spinlock_t rsp_list_lock;
+	struct list_head lh_act_srgn; /* hold rsp_list_lock */
+	struct list_head lh_inact_rgn; /* hold rsp_list_lock */
+
+	/* pre request information */
+	struct ufshpb_req *pre_req;
+	int num_inflight_pre_req;
+	int throttle_pre_req;
+	int num_inflight_map_req; /* hold param_lock */
+	spinlock_t param_lock;
+
+	struct list_head lh_pre_req_free;
+	int cur_read_id;
+	int pre_req_min_tr_len;
+	int pre_req_max_tr_len;
+
+	/* cached L2P map management worker */
+	struct work_struct map_work;
+
+	/* for selecting victim */
+	struct victim_select_info lru_info;
+	struct work_struct ufshpb_normalization_work;
+	struct delayed_work ufshpb_read_to_work;
+	unsigned long work_data_bits;
+#define TIMEOUT_WORK_RUNNING 0
+
+	/* pinned region information */
+	u32 lu_pinned_start;
+	u32 lu_pinned_end;
+
+	/* HPB related configuration */
+	u32 rgns_per_lu;
+	u32 srgns_per_lu;
+	u32 last_srgn_entries;
+	int srgns_per_rgn;
+	u32 srgn_mem_size;
+	u32 entries_per_rgn_mask;
+	u32 entries_per_rgn_shift;
+	u32 entries_per_srgn;
+	u32 entries_per_srgn_mask;
+	u32 entries_per_srgn_shift;
+	u32 pages_per_srgn;
+
+	bool is_hcm;
+
+	struct ufshpb_stats stats;
+	struct ufshpb_params params;
+
+	struct kmem_cache *map_req_cache;
+	struct kmem_cache *m_page_cache;
+
+	struct list_head list_hpb_lu;
+};
+
+struct ufs_hba;
+struct ufshcd_lrb;
+
+#ifndef CONFIG_SCSI_UFS_HPB
+static int ufshpb_prep(struct ufs_hba *hba, struct ufshcd_lrb *lrbp) { return 0; }
+static void ufshpb_rsp_upiu(struct ufs_hba *hba, struct ufshcd_lrb *lrbp) {}
+static void ufshpb_resume(struct ufs_hba *hba) {}
+static void ufshpb_suspend(struct ufs_hba *hba) {}
+static void ufshpb_reset(struct ufs_hba *hba) {}
+static void ufshpb_reset_host(struct ufs_hba *hba) {}
+static void ufshpb_init(struct ufs_hba *hba) {}
+static void ufshpb_init_hpb_lu(struct ufs_hba *hba, struct scsi_device *sdev) {}
+static void ufshpb_destroy_lu(struct ufs_hba *hba, struct scsi_device *sdev) {}
+static void ufshpb_remove(struct ufs_hba *hba) {}
+static bool ufshpb_is_allowed(struct ufs_hba *hba) { return false; }
+static void ufshpb_get_geo_info(struct ufs_hba *hba, u8 *geo_buf) {}
+static void ufshpb_get_dev_info(struct ufs_hba *hba, u8 *desc_buf) {}
+static bool ufshpb_is_legacy(struct ufs_hba *hba) { return false; }
+#else
+int ufshpb_prep(struct ufs_hba *hba, struct ufshcd_lrb *lrbp);
+void ufshpb_rsp_upiu(struct ufs_hba *hba, struct ufshcd_lrb *lrbp);
+void ufshpb_resume(struct ufs_hba *hba);
+void ufshpb_suspend(struct ufs_hba *hba);
+void ufshpb_reset(struct ufs_hba *hba);
+void ufshpb_reset_host(struct ufs_hba *hba);
+void ufshpb_init(struct ufs_hba *hba);
+void ufshpb_init_hpb_lu(struct ufs_hba *hba, struct scsi_device *sdev);
+void ufshpb_destroy_lu(struct ufs_hba *hba, struct scsi_device *sdev);
+void ufshpb_remove(struct ufs_hba *hba);
+bool ufshpb_is_allowed(struct ufs_hba *hba);
+void ufshpb_get_geo_info(struct ufs_hba *hba, u8 *geo_buf);
+void ufshpb_get_dev_info(struct ufs_hba *hba, u8 *desc_buf);
+bool ufshpb_is_legacy(struct ufs_hba *hba);
+extern struct attribute_group ufs_sysfs_hpb_stat_group;
+extern struct attribute_group ufs_sysfs_hpb_param_group;
+#endif
+
+#endif /* End of Header */
diff --git a/drivers/scsi/virtio_scsi.c b/drivers/scsi/virtio_scsi.c
index b0deaf4af5a3..c25ce8f0e0af 100644
--- a/drivers/scsi/virtio_scsi.c
+++ b/drivers/scsi/virtio_scsi.c
@@ -519,7 +519,7 @@ static void virtio_scsi_init_hdr_pi(struct virtio_device *vdev,
 				    struct virtio_scsi_cmd_req_pi *cmd_pi,
 				    struct scsi_cmnd *sc)
 {
-	struct request *rq = sc->request;
+	struct request *rq = scsi_cmd_to_rq(sc);
 	struct blk_integrity *bi;
 
 	virtio_scsi_init_hdr(vdev, (struct virtio_scsi_cmd_req *)cmd_pi, sc);
@@ -543,7 +543,7 @@ static void virtio_scsi_init_hdr_pi(struct virtio_device *vdev,
 static struct virtio_scsi_vq *virtscsi_pick_vq_mq(struct virtio_scsi *vscsi,
 						  struct scsi_cmnd *sc)
 {
-	u32 tag = blk_mq_unique_tag(sc->request);
+	u32 tag = blk_mq_unique_tag(scsi_cmd_to_rq(sc));
 	u16 hwq = blk_mq_unique_tag_to_hwq(tag);
 
 	return &vscsi->req_vqs[hwq];
diff --git a/drivers/scsi/wd719x.c b/drivers/scsi/wd719x.c
index edc8a139a60d..6f10a43510fb 100644
--- a/drivers/scsi/wd719x.c
+++ b/drivers/scsi/wd719x.c
@@ -466,14 +466,16 @@ static int wd719x_abort(struct scsi_cmnd *cmd)
 	unsigned long flags;
 	struct wd719x_scb *scb = scsi_cmd_priv(cmd);
 	struct wd719x *wd = shost_priv(cmd->device->host);
+	struct device *dev = &wd->pdev->dev;
 
-	dev_info(&wd->pdev->dev, "abort command, tag: %x\n", cmd->tag);
+	dev_info(dev, "abort command, tag: %x\n", scsi_cmd_to_rq(cmd)->tag);
 
-	action = /*cmd->tag ? WD719X_CMD_ABORT_TAG : */WD719X_CMD_ABORT;
+	action = WD719X_CMD_ABORT;
 
 	spin_lock_irqsave(wd->sh->host_lock, flags);
 	result = wd719x_direct_cmd(wd, action, cmd->device->id,
-				   cmd->device->lun, cmd->tag, scb->phys, 0);
+				   cmd->device->lun, scsi_cmd_to_rq(cmd)->tag,
+				   scb->phys, 0);
 	wd719x_finish_cmd(scb, DID_ABORT);
 	spin_unlock_irqrestore(wd->sh->host_lock, flags);
 	if (result)
diff --git a/drivers/scsi/xen-scsifront.c b/drivers/scsi/xen-scsifront.c
index ec9d399fbbd8..0204e314b482 100644
--- a/drivers/scsi/xen-scsifront.c
+++ b/drivers/scsi/xen-scsifront.c
@@ -212,7 +212,7 @@ static int scsifront_do_request(struct vscsifrnt_info *info,
 	memcpy(ring_req->cmnd, sc->cmnd, sc->cmd_len);
 
 	ring_req->sc_data_direction   = (uint8_t)sc->sc_data_direction;
-	ring_req->timeout_per_command = sc->request->timeout / HZ;
+	ring_req->timeout_per_command = scsi_cmd_to_rq(sc)->timeout / HZ;
 
 	for (i = 0; i < (shadow->nr_segments & ~VSCSIIF_SG_GRANT); i++)
 		ring_req->seg[i] = shadow->seg[i];
diff --git a/drivers/scsi/zalon.c b/drivers/scsi/zalon.c
index 7eac76cccc4c..f1e5cf8a17d9 100644
--- a/drivers/scsi/zalon.c
+++ b/drivers/scsi/zalon.c
@@ -168,15 +168,13 @@ static const struct parisc_device_id zalon_tbl[] __initconst = {
 
 MODULE_DEVICE_TABLE(parisc, zalon_tbl);
 
-static int __exit zalon_remove(struct parisc_device *dev)
+static void __exit zalon_remove(struct parisc_device *dev)
 {
 	struct Scsi_Host *host = dev_get_drvdata(&dev->dev);
 
 	scsi_remove_host(host);
 	ncr53c8xx_release(host);
 	free_irq(dev->irq, host);
-
-	return 0;
 }
 
 static struct parisc_driver zalon_driver __refdata = {
diff --git a/drivers/staging/media/atomisp/pci/hive_isp_css_common/host/isp_local.h b/drivers/staging/media/atomisp/pci/hive_isp_css_common/host/isp_local.h
index eceeb5d160ad..4dbec4063b3d 100644
--- a/drivers/staging/media/atomisp/pci/hive_isp_css_common/host/isp_local.h
+++ b/drivers/staging/media/atomisp/pci/hive_isp_css_common/host/isp_local.h
@@ -16,8 +16,6 @@
 #ifndef __ISP_LOCAL_H_INCLUDED__
 #define __ISP_LOCAL_H_INCLUDED__
 
-#include <stdbool.h>
-
 #include "isp_global.h"
 
 #include <isp2400_support.h>
diff --git a/drivers/staging/media/atomisp/pci/hive_isp_css_include/print_support.h b/drivers/staging/media/atomisp/pci/hive_isp_css_include/print_support.h
index 540b405cc0f7..a3c7f3de6d17 100644
--- a/drivers/staging/media/atomisp/pci/hive_isp_css_include/print_support.h
+++ b/drivers/staging/media/atomisp/pci/hive_isp_css_include/print_support.h
@@ -16,7 +16,7 @@
 #ifndef __PRINT_SUPPORT_H_INCLUDED__
 #define __PRINT_SUPPORT_H_INCLUDED__
 
-#include <stdarg.h>
+#include <linux/stdarg.h>
 
 extern int (*sh_css_printf)(const char *fmt, va_list args);
 /* depends on host supplied print function in ia_css_init() */
diff --git a/drivers/staging/media/atomisp/pci/ia_css_env.h b/drivers/staging/media/atomisp/pci/ia_css_env.h
index 6b38723b27cd..3b89bbd837a0 100644
--- a/drivers/staging/media/atomisp/pci/ia_css_env.h
+++ b/drivers/staging/media/atomisp/pci/ia_css_env.h
@@ -17,7 +17,7 @@
 #define __IA_CSS_ENV_H
 
 #include <type_support.h>
-#include <stdarg.h> /* va_list */
+#include <linux/stdarg.h> /* va_list */
 #include "ia_css_types.h"
 #include "ia_css_acc_types.h"
 
diff --git a/drivers/staging/media/atomisp/pci/runtime/debug/interface/ia_css_debug.h b/drivers/staging/media/atomisp/pci/runtime/debug/interface/ia_css_debug.h
index 5e6e7447ae00..e37ef4232c55 100644
--- a/drivers/staging/media/atomisp/pci/runtime/debug/interface/ia_css_debug.h
+++ b/drivers/staging/media/atomisp/pci/runtime/debug/interface/ia_css_debug.h
@@ -19,7 +19,7 @@
 /*! \file */
 
 #include <type_support.h>
-#include <stdarg.h>
+#include <linux/stdarg.h>
 #include "ia_css_types.h"
 #include "ia_css_binary.h"
 #include "ia_css_frame_public.h"
diff --git a/drivers/staging/media/atomisp/pci/sh_css_internal.h b/drivers/staging/media/atomisp/pci/sh_css_internal.h
index 3c669ec79b68..496faa7297a5 100644
--- a/drivers/staging/media/atomisp/pci/sh_css_internal.h
+++ b/drivers/staging/media/atomisp/pci/sh_css_internal.h
@@ -20,7 +20,7 @@
 #include <math_support.h>
 #include <type_support.h>
 #include <platform_support.h>
-#include <stdarg.h>
+#include <linux/stdarg.h>
 
 #if !defined(ISP2401)
 #include "input_formatter.h"
diff --git a/drivers/target/Kconfig b/drivers/target/Kconfig
index c163b14774d7..72171ea3dd53 100644
--- a/drivers/target/Kconfig
+++ b/drivers/target/Kconfig
@@ -5,7 +5,7 @@ menuconfig TARGET_CORE
 	depends on BLOCK
 	select CONFIGFS_FS
 	select CRC_T10DIF
-	select BLK_SCSI_REQUEST
+	select SCSI_COMMON
 	select SGL_ALLOC
 	default n
 	help
diff --git a/drivers/target/iscsi/cxgbit/cxgbit_ddp.c b/drivers/target/iscsi/cxgbit/cxgbit_ddp.c
index b044999ad002..072afd070f3e 100644
--- a/drivers/target/iscsi/cxgbit/cxgbit_ddp.c
+++ b/drivers/target/iscsi/cxgbit/cxgbit_ddp.c
@@ -234,7 +234,7 @@ cxgbit_get_r2t_ttt(struct iscsi_conn *conn, struct iscsi_cmd *cmd,
 	struct cxgbit_device *cdev = csk->com.cdev;
 	struct cxgbit_cmd *ccmd = iscsit_priv_cmd(cmd);
 	struct cxgbi_task_tag_info *ttinfo = &ccmd->ttinfo;
-	int ret = -EINVAL;
+	int ret;
 
 	if ((!ccmd->setup_ddp) ||
 	    (!test_bit(CSK_DDP_ENABLE, &csk->com.flags)))
diff --git a/drivers/target/loopback/tcm_loop.c b/drivers/target/loopback/tcm_loop.c
index cbb2118fb35e..52db28d868d5 100644
--- a/drivers/target/loopback/tcm_loop.c
+++ b/drivers/target/loopback/tcm_loop.c
@@ -183,7 +183,7 @@ static int tcm_loop_queuecommand(struct Scsi_Host *sh, struct scsi_cmnd *sc)
 
 	memset(tl_cmd, 0, sizeof(*tl_cmd));
 	tl_cmd->sc = sc;
-	tl_cmd->sc_cmd_tag = sc->request->tag;
+	tl_cmd->sc_cmd_tag = scsi_cmd_to_rq(sc)->tag;
 
 	tcm_loop_target_queue_cmd(tl_cmd);
 	return 0;
@@ -241,7 +241,7 @@ static int tcm_loop_abort_task(struct scsi_cmnd *sc)
 {
 	struct tcm_loop_hba *tl_hba;
 	struct tcm_loop_tpg *tl_tpg;
-	int ret = FAILED;
+	int ret;
 
 	/*
 	 * Locate the tcm_loop_hba_t pointer
@@ -249,7 +249,7 @@ static int tcm_loop_abort_task(struct scsi_cmnd *sc)
 	tl_hba = *(struct tcm_loop_hba **)shost_priv(sc->device->host);
 	tl_tpg = &tl_hba->tl_hba_tpgs[sc->device->id];
 	ret = tcm_loop_issue_tmr(tl_tpg, sc->device->lun,
-				 sc->request->tag, TMR_ABORT_TASK);
+				 scsi_cmd_to_rq(sc)->tag, TMR_ABORT_TASK);
 	return (ret == TMR_FUNCTION_COMPLETE) ? SUCCESS : FAILED;
 }
 
@@ -261,7 +261,7 @@ static int tcm_loop_device_reset(struct scsi_cmnd *sc)
 {
 	struct tcm_loop_hba *tl_hba;
 	struct tcm_loop_tpg *tl_tpg;
-	int ret = FAILED;
+	int ret;
 
 	/*
 	 * Locate the tcm_loop_hba_t pointer
diff --git a/drivers/target/sbp/sbp_target.c b/drivers/target/sbp/sbp_target.c
index 4d3ceee23622..b9f9fb5d7e63 100644
--- a/drivers/target/sbp/sbp_target.c
+++ b/drivers/target/sbp/sbp_target.c
@@ -1389,8 +1389,8 @@ static void sbp_sense_mangle(struct sbp_target_request *req)
 		(sense[0] & 0x80) |		/* valid */
 		((sense[2] & 0xe0) >> 1) |	/* mark, eom, ili */
 		(sense[2] & 0x0f);		/* sense_key */
-	status[2] = se_cmd->scsi_asc;		/* sense_code */
-	status[3] = se_cmd->scsi_ascq;		/* sense_qualifier */
+	status[2] = 0;				/* XXX sense_code */
+	status[3] = 0;				/* XXX sense_qualifier */
 
 	/* information */
 	status[4] = sense[3];
diff --git a/drivers/target/target_core_alua.c b/drivers/target/target_core_alua.c
index 3bb921345bce..cb1de1ecaaa6 100644
--- a/drivers/target/target_core_alua.c
+++ b/drivers/target/target_core_alua.c
@@ -428,22 +428,6 @@ out:
 	return rc;
 }
 
-static inline void set_ascq(struct se_cmd *cmd, u8 alua_ascq)
-{
-	/*
-	 * Set SCSI additional sense code (ASC) to 'LUN Not Accessible';
-	 * The ALUA additional sense code qualifier (ASCQ) is determined
-	 * by the ALUA primary or secondary access state..
-	 */
-	pr_debug("[%s]: ALUA TG Port not available, "
-		"SenseKey: NOT_READY, ASC/ASCQ: "
-		"0x04/0x%02x\n",
-		cmd->se_tfo->fabric_name, alua_ascq);
-
-	cmd->scsi_asc = 0x04;
-	cmd->scsi_ascq = alua_ascq;
-}
-
 static inline void core_alua_state_nonoptimized(
 	struct se_cmd *cmd,
 	unsigned char *cdb,
@@ -458,9 +442,9 @@ static inline void core_alua_state_nonoptimized(
 	cmd->alua_nonop_delay = nonop_delay_msecs;
 }
 
-static inline int core_alua_state_lba_dependent(
+static inline sense_reason_t core_alua_state_lba_dependent(
 	struct se_cmd *cmd,
-	struct t10_alua_tg_pt_gp *tg_pt_gp)
+	u16 tg_pt_gp_id)
 {
 	struct se_device *dev = cmd->se_dev;
 	u64 segment_size, segment_mult, sectors, lba;
@@ -506,23 +490,19 @@ static inline int core_alua_state_lba_dependent(
 		}
 		if (!cur_map) {
 			spin_unlock(&dev->t10_alua.lba_map_lock);
-			set_ascq(cmd, ASCQ_04H_ALUA_TG_PT_UNAVAILABLE);
-			return 1;
+			return TCM_ALUA_TG_PT_UNAVAILABLE;
 		}
 		list_for_each_entry(map_mem, &cur_map->lba_map_mem_list,
 				    lba_map_mem_list) {
-			if (map_mem->lba_map_mem_alua_pg_id !=
-			    tg_pt_gp->tg_pt_gp_id)
+			if (map_mem->lba_map_mem_alua_pg_id != tg_pt_gp_id)
 				continue;
 			switch(map_mem->lba_map_mem_alua_state) {
 			case ALUA_ACCESS_STATE_STANDBY:
 				spin_unlock(&dev->t10_alua.lba_map_lock);
-				set_ascq(cmd, ASCQ_04H_ALUA_TG_PT_STANDBY);
-				return 1;
+				return TCM_ALUA_TG_PT_STANDBY;
 			case ALUA_ACCESS_STATE_UNAVAILABLE:
 				spin_unlock(&dev->t10_alua.lba_map_lock);
-				set_ascq(cmd, ASCQ_04H_ALUA_TG_PT_UNAVAILABLE);
-				return 1;
+				return TCM_ALUA_TG_PT_UNAVAILABLE;
 			default:
 				break;
 			}
@@ -532,7 +512,7 @@ static inline int core_alua_state_lba_dependent(
 	return 0;
 }
 
-static inline int core_alua_state_standby(
+static inline sense_reason_t core_alua_state_standby(
 	struct se_cmd *cmd,
 	unsigned char *cdb)
 {
@@ -556,24 +536,21 @@ static inline int core_alua_state_standby(
 		case SAI_READ_CAPACITY_16:
 			return 0;
 		default:
-			set_ascq(cmd, ASCQ_04H_ALUA_TG_PT_STANDBY);
-			return 1;
+			return TCM_ALUA_TG_PT_STANDBY;
 		}
 	case MAINTENANCE_IN:
 		switch (cdb[1] & 0x1f) {
 		case MI_REPORT_TARGET_PGS:
 			return 0;
 		default:
-			set_ascq(cmd, ASCQ_04H_ALUA_TG_PT_STANDBY);
-			return 1;
+			return TCM_ALUA_TG_PT_STANDBY;
 		}
 	case MAINTENANCE_OUT:
 		switch (cdb[1]) {
 		case MO_SET_TARGET_PGS:
 			return 0;
 		default:
-			set_ascq(cmd, ASCQ_04H_ALUA_TG_PT_STANDBY);
-			return 1;
+			return TCM_ALUA_TG_PT_STANDBY;
 		}
 	case REQUEST_SENSE:
 	case PERSISTENT_RESERVE_IN:
@@ -582,14 +559,13 @@ static inline int core_alua_state_standby(
 	case WRITE_BUFFER:
 		return 0;
 	default:
-		set_ascq(cmd, ASCQ_04H_ALUA_TG_PT_STANDBY);
-		return 1;
+		return TCM_ALUA_TG_PT_STANDBY;
 	}
 
 	return 0;
 }
 
-static inline int core_alua_state_unavailable(
+static inline sense_reason_t core_alua_state_unavailable(
 	struct se_cmd *cmd,
 	unsigned char *cdb)
 {
@@ -606,30 +582,27 @@ static inline int core_alua_state_unavailable(
 		case MI_REPORT_TARGET_PGS:
 			return 0;
 		default:
-			set_ascq(cmd, ASCQ_04H_ALUA_TG_PT_UNAVAILABLE);
-			return 1;
+			return TCM_ALUA_TG_PT_UNAVAILABLE;
 		}
 	case MAINTENANCE_OUT:
 		switch (cdb[1]) {
 		case MO_SET_TARGET_PGS:
 			return 0;
 		default:
-			set_ascq(cmd, ASCQ_04H_ALUA_TG_PT_UNAVAILABLE);
-			return 1;
+			return TCM_ALUA_TG_PT_UNAVAILABLE;
 		}
 	case REQUEST_SENSE:
 	case READ_BUFFER:
 	case WRITE_BUFFER:
 		return 0;
 	default:
-		set_ascq(cmd, ASCQ_04H_ALUA_TG_PT_UNAVAILABLE);
-		return 1;
+		return TCM_ALUA_TG_PT_UNAVAILABLE;
 	}
 
 	return 0;
 }
 
-static inline int core_alua_state_transition(
+static inline sense_reason_t core_alua_state_transition(
 	struct se_cmd *cmd,
 	unsigned char *cdb)
 {
@@ -646,16 +619,14 @@ static inline int core_alua_state_transition(
 		case MI_REPORT_TARGET_PGS:
 			return 0;
 		default:
-			set_ascq(cmd, ASCQ_04H_ALUA_STATE_TRANSITION);
-			return 1;
+			return TCM_ALUA_STATE_TRANSITION;
 		}
 	case REQUEST_SENSE:
 	case READ_BUFFER:
 	case WRITE_BUFFER:
 		return 0;
 	default:
-		set_ascq(cmd, ASCQ_04H_ALUA_STATE_TRANSITION);
-		return 1;
+		return TCM_ALUA_STATE_TRANSITION;
 	}
 
 	return 0;
@@ -674,6 +645,8 @@ target_alua_state_check(struct se_cmd *cmd)
 	struct se_lun *lun = cmd->se_lun;
 	struct t10_alua_tg_pt_gp *tg_pt_gp;
 	int out_alua_state, nonop_delay_msecs;
+	u16 tg_pt_gp_id;
+	sense_reason_t rc = TCM_NO_SENSE;
 
 	if (dev->se_hba->hba_flags & HBA_FLAGS_INTERNAL_USE)
 		return 0;
@@ -687,8 +660,7 @@ target_alua_state_check(struct se_cmd *cmd)
 	if (atomic_read(&lun->lun_tg_pt_secondary_offline)) {
 		pr_debug("ALUA: Got secondary offline status for local"
 				" target port\n");
-		set_ascq(cmd, ASCQ_04H_ALUA_OFFLINE);
-		return TCM_CHECK_CONDITION_NOT_READY;
+		return TCM_ALUA_OFFLINE;
 	}
 
 	if (!lun->lun_tg_pt_gp)
@@ -698,8 +670,8 @@ target_alua_state_check(struct se_cmd *cmd)
 	tg_pt_gp = lun->lun_tg_pt_gp;
 	out_alua_state = tg_pt_gp->tg_pt_gp_alua_access_state;
 	nonop_delay_msecs = tg_pt_gp->tg_pt_gp_nonop_delay_msecs;
+	tg_pt_gp_id = tg_pt_gp->tg_pt_gp_id;
 
-	// XXX: keeps using tg_pt_gp witout reference after unlock
 	spin_unlock(&lun->lun_tg_pt_gp_lock);
 	/*
 	 * Process ALUA_ACCESS_STATE_ACTIVE_OPTIMIZED in a separate conditional
@@ -715,20 +687,16 @@ target_alua_state_check(struct se_cmd *cmd)
 		core_alua_state_nonoptimized(cmd, cdb, nonop_delay_msecs);
 		break;
 	case ALUA_ACCESS_STATE_STANDBY:
-		if (core_alua_state_standby(cmd, cdb))
-			return TCM_CHECK_CONDITION_NOT_READY;
+		rc = core_alua_state_standby(cmd, cdb);
 		break;
 	case ALUA_ACCESS_STATE_UNAVAILABLE:
-		if (core_alua_state_unavailable(cmd, cdb))
-			return TCM_CHECK_CONDITION_NOT_READY;
+		rc = core_alua_state_unavailable(cmd, cdb);
 		break;
 	case ALUA_ACCESS_STATE_TRANSITION:
-		if (core_alua_state_transition(cmd, cdb))
-			return TCM_CHECK_CONDITION_NOT_READY;
+		rc = core_alua_state_transition(cmd, cdb);
 		break;
 	case ALUA_ACCESS_STATE_LBA_DEPENDENT:
-		if (core_alua_state_lba_dependent(cmd, tg_pt_gp))
-			return TCM_CHECK_CONDITION_NOT_READY;
+		rc = core_alua_state_lba_dependent(cmd, tg_pt_gp_id);
 		break;
 	/*
 	 * OFFLINE is a secondary ALUA target port group access state, that is
@@ -738,10 +706,16 @@ target_alua_state_check(struct se_cmd *cmd)
 	default:
 		pr_err("Unknown ALUA access state: 0x%02x\n",
 				out_alua_state);
-		return TCM_INVALID_CDB_FIELD;
+		rc = TCM_INVALID_CDB_FIELD;
 	}
 
-	return 0;
+	if (rc && rc != TCM_INVALID_CDB_FIELD) {
+		pr_debug("[%s]: ALUA TG Port not available, "
+			"SenseKey: NOT_READY, ASC/rc: 0x04/%d\n",
+			cmd->se_tfo->fabric_name, rc);
+	}
+
+	return rc;
 }
 
 /*
diff --git a/drivers/target/target_core_iblock.c b/drivers/target/target_core_iblock.c
index 44d9d028f716..4069a1edcfa3 100644
--- a/drivers/target/target_core_iblock.c
+++ b/drivers/target/target_core_iblock.c
@@ -83,7 +83,7 @@ static int iblock_configure_device(struct se_device *dev)
 	struct blk_integrity *bi;
 	fmode_t mode;
 	unsigned int max_write_zeroes_sectors;
-	int ret = -ENOMEM;
+	int ret;
 
 	if (!(ib_dev->ibd_flags & IBDF_HAS_UDEV_PATH)) {
 		pr_err("Missing udev_path= parameters for IBLOCK\n");
diff --git a/drivers/target/target_core_pscsi.c b/drivers/target/target_core_pscsi.c
index 2629d2ef3970..75ef52f008ff 100644
--- a/drivers/target/target_core_pscsi.c
+++ b/drivers/target/target_core_pscsi.c
@@ -620,17 +620,17 @@ static void pscsi_complete_cmd(struct se_cmd *cmd, u8 scsi_status,
 			buf = transport_kmap_data_sg(cmd);
 			if (!buf) {
 				; /* XXX: TCM_LOGICAL_UNIT_COMMUNICATION_FAILURE */
-			}
-
-			if (cdb[0] == MODE_SENSE_10) {
-				if (!(buf[3] & 0x80))
-					buf[3] |= 0x80;
 			} else {
-				if (!(buf[2] & 0x80))
-					buf[2] |= 0x80;
-			}
+				if (cdb[0] == MODE_SENSE_10) {
+					if (!(buf[3] & 0x80))
+						buf[3] |= 0x80;
+				} else {
+					if (!(buf[2] & 0x80))
+						buf[2] |= 0x80;
+				}
 
-			transport_kunmap_data_sg(cmd);
+				transport_kunmap_data_sg(cmd);
+			}
 		}
 	}
 after_mode_sense:
diff --git a/drivers/target/target_core_transport.c b/drivers/target/target_core_transport.c
index 26ceabe34de5..14c6f2bb1b01 100644
--- a/drivers/target/target_core_transport.c
+++ b/drivers/target/target_core_transport.c
@@ -736,8 +736,7 @@ static void target_complete_failure_work(struct work_struct *work)
 {
 	struct se_cmd *cmd = container_of(work, struct se_cmd, work);
 
-	transport_generic_request_failure(cmd,
-			TCM_LOGICAL_UNIT_COMMUNICATION_FAILURE);
+	transport_generic_request_failure(cmd, cmd->sense_reason);
 }
 
 /*
@@ -855,7 +854,8 @@ static bool target_cmd_interrupted(struct se_cmd *cmd)
 }
 
 /* May be called from interrupt context so must not sleep. */
-void target_complete_cmd(struct se_cmd *cmd, u8 scsi_status)
+void target_complete_cmd_with_sense(struct se_cmd *cmd, u8 scsi_status,
+				    sense_reason_t sense_reason)
 {
 	struct se_wwn *wwn = cmd->se_sess->se_tpg->se_tpg_wwn;
 	int success, cpu;
@@ -865,6 +865,7 @@ void target_complete_cmd(struct se_cmd *cmd, u8 scsi_status)
 		return;
 
 	cmd->scsi_status = scsi_status;
+	cmd->sense_reason = sense_reason;
 
 	spin_lock_irqsave(&cmd->t_state_lock, flags);
 	switch (cmd->scsi_status) {
@@ -893,6 +894,14 @@ void target_complete_cmd(struct se_cmd *cmd, u8 scsi_status)
 
 	queue_work_on(cpu, target_completion_wq, &cmd->work);
 }
+EXPORT_SYMBOL(target_complete_cmd_with_sense);
+
+void target_complete_cmd(struct se_cmd *cmd, u8 scsi_status)
+{
+	target_complete_cmd_with_sense(cmd, scsi_status, scsi_status ?
+			      TCM_LOGICAL_UNIT_COMMUNICATION_FAILURE :
+			      TCM_NO_SENSE);
+}
 EXPORT_SYMBOL(target_complete_cmd);
 
 void target_set_cmd_data_length(struct se_cmd *cmd, int length)
@@ -2003,7 +2012,6 @@ void transport_generic_request_failure(struct se_cmd *cmd,
 	case TCM_ADDRESS_OUT_OF_RANGE:
 	case TCM_CHECK_CONDITION_ABORT_CMD:
 	case TCM_CHECK_CONDITION_UNIT_ATTENTION:
-	case TCM_CHECK_CONDITION_NOT_READY:
 	case TCM_LOGICAL_BLOCK_GUARD_CHECK_FAILED:
 	case TCM_LOGICAL_BLOCK_APP_TAG_CHECK_FAILED:
 	case TCM_LOGICAL_BLOCK_REF_TAG_CHECK_FAILED:
@@ -2013,6 +2021,10 @@ void transport_generic_request_failure(struct se_cmd *cmd,
 	case TCM_TOO_MANY_SEGMENT_DESCS:
 	case TCM_UNSUPPORTED_SEGMENT_DESC_TYPE_CODE:
 	case TCM_INVALID_FIELD_IN_COMMAND_IU:
+	case TCM_ALUA_TG_PT_STANDBY:
+	case TCM_ALUA_TG_PT_UNAVAILABLE:
+	case TCM_ALUA_STATE_TRANSITION:
+	case TCM_ALUA_OFFLINE:
 		break;
 	case TCM_OUT_OF_RESOURCES:
 		cmd->scsi_status = SAM_STAT_TASK_SET_FULL;
@@ -3277,9 +3289,6 @@ static const struct sense_detail sense_detail_table[] = {
 	[TCM_CHECK_CONDITION_UNIT_ATTENTION] = {
 		.key = UNIT_ATTENTION,
 	},
-	[TCM_CHECK_CONDITION_NOT_READY] = {
-		.key = NOT_READY,
-	},
 	[TCM_MISCOMPARE_VERIFY] = {
 		.key = MISCOMPARE,
 		.asc = 0x1d, /* MISCOMPARE DURING VERIFY OPERATION */
@@ -3340,6 +3349,26 @@ static const struct sense_detail sense_detail_table[] = {
 		.asc = 0x0e,
 		.ascq = 0x03, /* INVALID FIELD IN COMMAND INFORMATION UNIT */
 	},
+	[TCM_ALUA_TG_PT_STANDBY] = {
+		.key = NOT_READY,
+		.asc = 0x04,
+		.ascq = ASCQ_04H_ALUA_TG_PT_STANDBY,
+	},
+	[TCM_ALUA_TG_PT_UNAVAILABLE] = {
+		.key = NOT_READY,
+		.asc = 0x04,
+		.ascq = ASCQ_04H_ALUA_TG_PT_UNAVAILABLE,
+	},
+	[TCM_ALUA_STATE_TRANSITION] = {
+		.key = NOT_READY,
+		.asc = 0x04,
+		.ascq = ASCQ_04H_ALUA_STATE_TRANSITION,
+	},
+	[TCM_ALUA_OFFLINE] = {
+		.key = NOT_READY,
+		.asc = 0x04,
+		.ascq = ASCQ_04H_ALUA_OFFLINE,
+	},
 };
 
 /**
@@ -3374,11 +3403,8 @@ static void translate_sense_reason(struct se_cmd *cmd, sense_reason_t reason)
 			cmd->scsi_status = SAM_STAT_BUSY;
 			return;
 		}
-	} else if (sd->asc == 0) {
-		WARN_ON_ONCE(cmd->scsi_asc == 0);
-		asc = cmd->scsi_asc;
-		ascq = cmd->scsi_ascq;
 	} else {
+		WARN_ON_ONCE(sd->asc == 0);
 		asc = sd->asc;
 		ascq = sd->ascq;
 	}
diff --git a/drivers/target/target_core_user.c b/drivers/target/target_core_user.c
index fbb6ffaddfbe..9f552f48084c 100644
--- a/drivers/target/target_core_user.c
+++ b/drivers/target/target_core_user.c
@@ -191,6 +191,7 @@ struct tcmu_cmd {
 	unsigned long deadline;
 
 #define TCMU_CMD_BIT_EXPIRED 0
+#define TCMU_CMD_BIT_KEEP_BUF 1
 	unsigned long flags;
 };
 
@@ -1315,11 +1316,13 @@ unlock:
 	mutex_unlock(&udev->cmdr_lock);
 }
 
-static void tcmu_handle_completion(struct tcmu_cmd *cmd, struct tcmu_cmd_entry *entry)
+static bool tcmu_handle_completion(struct tcmu_cmd *cmd,
+				   struct tcmu_cmd_entry *entry, bool keep_buf)
 {
 	struct se_cmd *se_cmd = cmd->se_cmd;
 	struct tcmu_dev *udev = cmd->tcmu_dev;
 	bool read_len_valid = false;
+	bool ret = true;
 	uint32_t read_len;
 
 	/*
@@ -1330,6 +1333,13 @@ static void tcmu_handle_completion(struct tcmu_cmd *cmd, struct tcmu_cmd_entry *
 		WARN_ON_ONCE(se_cmd);
 		goto out;
 	}
+	if (test_bit(TCMU_CMD_BIT_KEEP_BUF, &cmd->flags)) {
+		pr_err("cmd_id %u already completed with KEEP_BUF, ring is broken\n",
+		       entry->hdr.cmd_id);
+		set_bit(TCMU_DEV_BIT_BROKEN, &udev->flags);
+		ret = false;
+		goto out;
+	}
 
 	list_del_init(&cmd->queue_entry);
 
@@ -1379,8 +1389,22 @@ done:
 		target_complete_cmd(cmd->se_cmd, entry->rsp.scsi_status);
 
 out:
-	tcmu_cmd_free_data(cmd, cmd->dbi_cnt);
-	tcmu_free_cmd(cmd);
+	if (!keep_buf) {
+		tcmu_cmd_free_data(cmd, cmd->dbi_cnt);
+		tcmu_free_cmd(cmd);
+	} else {
+		/*
+		 * Keep this command after completion, since userspace still
+		 * needs the data buffer. Mark it with TCMU_CMD_BIT_KEEP_BUF
+		 * and reset potential TCMU_CMD_BIT_EXPIRED, so we don't accept
+		 * a second completion later.
+		 * Userspace can free the buffer later by writing the cmd_id
+		 * to new action attribute free_kept_buf.
+		 */
+		clear_bit(TCMU_CMD_BIT_EXPIRED, &cmd->flags);
+		set_bit(TCMU_CMD_BIT_KEEP_BUF, &cmd->flags);
+	}
+	return ret;
 }
 
 static int tcmu_run_tmr_queue(struct tcmu_dev *udev)
@@ -1432,6 +1456,7 @@ static bool tcmu_handle_completions(struct tcmu_dev *udev)
 	while (udev->cmdr_last_cleaned != READ_ONCE(mb->cmd_tail)) {
 
 		struct tcmu_cmd_entry *entry = udev->cmdr + udev->cmdr_last_cleaned;
+		bool keep_buf;
 
 		/*
 		 * Flush max. up to end of cmd ring since current entry might
@@ -1453,7 +1478,11 @@ static bool tcmu_handle_completions(struct tcmu_dev *udev)
 		}
 		WARN_ON(tcmu_hdr_get_op(entry->hdr.len_op) != TCMU_OP_CMD);
 
-		cmd = xa_erase(&udev->commands, entry->hdr.cmd_id);
+		keep_buf = !!(entry->hdr.uflags & TCMU_UFLAG_KEEP_BUF);
+		if (keep_buf)
+			cmd = xa_load(&udev->commands, entry->hdr.cmd_id);
+		else
+			cmd = xa_erase(&udev->commands, entry->hdr.cmd_id);
 		if (!cmd) {
 			pr_err("cmd_id %u not found, ring is broken\n",
 			       entry->hdr.cmd_id);
@@ -1461,7 +1490,8 @@ static bool tcmu_handle_completions(struct tcmu_dev *udev)
 			return false;
 		}
 
-		tcmu_handle_completion(cmd, entry);
+		if (!tcmu_handle_completion(cmd, entry, keep_buf))
+			break;
 
 		UPDATE_HEAD(udev->cmdr_last_cleaned,
 			    tcmu_hdr_get_len(entry->hdr.len_op),
@@ -1619,7 +1649,8 @@ static void tcmu_dev_call_rcu(struct rcu_head *p)
 
 static int tcmu_check_and_free_pending_cmd(struct tcmu_cmd *cmd)
 {
-	if (test_bit(TCMU_CMD_BIT_EXPIRED, &cmd->flags)) {
+	if (test_bit(TCMU_CMD_BIT_EXPIRED, &cmd->flags) ||
+	    test_bit(TCMU_CMD_BIT_KEEP_BUF, &cmd->flags)) {
 		kmem_cache_free(tcmu_cmd_cache, cmd);
 		return 0;
 	}
@@ -1903,6 +1934,38 @@ static int tcmu_open(struct uio_info *info, struct inode *inode)
 static int tcmu_release(struct uio_info *info, struct inode *inode)
 {
 	struct tcmu_dev *udev = container_of(info, struct tcmu_dev, uio_info);
+	struct tcmu_cmd *cmd;
+	unsigned long i;
+	bool freed = false;
+
+	mutex_lock(&udev->cmdr_lock);
+
+	xa_for_each(&udev->commands, i, cmd) {
+		/* Cmds with KEEP_BUF set are no longer on the ring, but
+		 * userspace still holds the data buffer. If userspace closes
+		 * we implicitly free these cmds and buffers, since after new
+		 * open the (new ?) userspace cannot find the cmd in the ring
+		 * and thus never will release the buffer by writing cmd_id to
+		 * free_kept_buf action attribute.
+		 */
+		if (!test_bit(TCMU_CMD_BIT_KEEP_BUF, &cmd->flags))
+			continue;
+		pr_debug("removing KEEP_BUF cmd %u on dev %s from ring\n",
+			 cmd->cmd_id, udev->name);
+		freed = true;
+
+		xa_erase(&udev->commands, i);
+		tcmu_cmd_free_data(cmd, cmd->dbi_cnt);
+		tcmu_free_cmd(cmd);
+	}
+	/*
+	 * We only freed data space, not ring space. Therefore we dont call
+	 * run_tmr_queue, but call run_qfull_queue if tmr_list is empty.
+	 */
+	if (freed && list_empty(&udev->tmr_queue))
+		run_qfull_queue(udev, false);
+
+	mutex_unlock(&udev->cmdr_lock);
 
 	clear_bit(TCMU_DEV_BIT_OPEN, &udev->flags);
 
@@ -2147,7 +2210,8 @@ static int tcmu_configure_device(struct se_device *dev)
 	mb->version = TCMU_MAILBOX_VERSION;
 	mb->flags = TCMU_MAILBOX_FLAG_CAP_OOOC |
 		    TCMU_MAILBOX_FLAG_CAP_READ_LEN |
-		    TCMU_MAILBOX_FLAG_CAP_TMR;
+		    TCMU_MAILBOX_FLAG_CAP_TMR |
+		    TCMU_MAILBOX_FLAG_CAP_KEEP_BUF;
 	mb->cmdr_off = CMDR_OFF;
 	mb->cmdr_size = udev->cmdr_size;
 
@@ -2279,12 +2343,16 @@ static void tcmu_reset_ring(struct tcmu_dev *udev, u8 err_level)
 	mutex_lock(&udev->cmdr_lock);
 
 	xa_for_each(&udev->commands, i, cmd) {
-		pr_debug("removing cmd %u on dev %s from ring (is expired %d)\n",
-			  cmd->cmd_id, udev->name,
-			  test_bit(TCMU_CMD_BIT_EXPIRED, &cmd->flags));
+		pr_debug("removing cmd %u on dev %s from ring %s\n",
+			 cmd->cmd_id, udev->name,
+			 test_bit(TCMU_CMD_BIT_EXPIRED, &cmd->flags) ?
+			 "(is expired)" :
+			 (test_bit(TCMU_CMD_BIT_KEEP_BUF, &cmd->flags) ?
+			 "(is keep buffer)" : ""));
 
 		xa_erase(&udev->commands, i);
-		if (!test_bit(TCMU_CMD_BIT_EXPIRED, &cmd->flags)) {
+		if (!test_bit(TCMU_CMD_BIT_EXPIRED, &cmd->flags) &&
+		    !test_bit(TCMU_CMD_BIT_KEEP_BUF, &cmd->flags)) {
 			WARN_ON(!cmd->se_cmd);
 			list_del_init(&cmd->queue_entry);
 			cmd->se_cmd->priv = NULL;
@@ -2933,6 +3001,65 @@ static ssize_t tcmu_reset_ring_store(struct config_item *item, const char *page,
 }
 CONFIGFS_ATTR_WO(tcmu_, reset_ring);
 
+static ssize_t tcmu_free_kept_buf_store(struct config_item *item, const char *page,
+					size_t count)
+{
+	struct se_device *se_dev = container_of(to_config_group(item),
+						struct se_device,
+						dev_action_group);
+	struct tcmu_dev *udev = TCMU_DEV(se_dev);
+	struct tcmu_cmd *cmd;
+	u16 cmd_id;
+	int ret;
+
+	if (!target_dev_configured(&udev->se_dev)) {
+		pr_err("Device is not configured.\n");
+		return -EINVAL;
+	}
+
+	ret = kstrtou16(page, 0, &cmd_id);
+	if (ret < 0)
+		return ret;
+
+	mutex_lock(&udev->cmdr_lock);
+
+	{
+		XA_STATE(xas, &udev->commands, cmd_id);
+
+		xas_lock(&xas);
+		cmd = xas_load(&xas);
+		if (!cmd) {
+			pr_err("free_kept_buf: cmd_id %d not found\n", cmd_id);
+			count = -EINVAL;
+			xas_unlock(&xas);
+			goto out_unlock;
+		}
+		if (!test_bit(TCMU_CMD_BIT_KEEP_BUF, &cmd->flags)) {
+			pr_err("free_kept_buf: cmd_id %d was not completed with KEEP_BUF\n",
+			       cmd_id);
+			count = -EINVAL;
+			xas_unlock(&xas);
+			goto out_unlock;
+		}
+		xas_store(&xas, NULL);
+		xas_unlock(&xas);
+	}
+
+	tcmu_cmd_free_data(cmd, cmd->dbi_cnt);
+	tcmu_free_cmd(cmd);
+	/*
+	 * We only freed data space, not ring space. Therefore we dont call
+	 * run_tmr_queue, but call run_qfull_queue if tmr_list is empty.
+	 */
+	if (list_empty(&udev->tmr_queue))
+		run_qfull_queue(udev, false);
+
+out_unlock:
+	mutex_unlock(&udev->cmdr_lock);
+	return count;
+}
+CONFIGFS_ATTR_WO(tcmu_, free_kept_buf);
+
 static struct configfs_attribute *tcmu_attrib_attrs[] = {
 	&tcmu_attr_cmd_time_out,
 	&tcmu_attr_qfull_time_out,
@@ -2951,6 +3078,7 @@ static struct configfs_attribute **tcmu_attrs;
 static struct configfs_attribute *tcmu_action_attrs[] = {
 	&tcmu_attr_block_dev,
 	&tcmu_attr_reset_ring,
+	&tcmu_attr_free_kept_buf,
 	NULL,
 };
 
diff --git a/drivers/target/target_core_xcopy.c b/drivers/target/target_core_xcopy.c
index 0f1319336f3e..d4fe7cb2bd00 100644
--- a/drivers/target/target_core_xcopy.c
+++ b/drivers/target/target_core_xcopy.c
@@ -674,12 +674,16 @@ static void target_xcopy_do_work(struct work_struct *work)
 	unsigned int max_sectors;
 	int rc = 0;
 	unsigned short nolb, max_nolb, copied_nolb = 0;
+	sense_reason_t sense_rc;
 
-	if (target_parse_xcopy_cmd(xop) != TCM_NO_SENSE)
+	sense_rc = target_parse_xcopy_cmd(xop);
+	if (sense_rc != TCM_NO_SENSE)
 		goto err_free;
 
-	if (WARN_ON_ONCE(!xop->src_dev) || WARN_ON_ONCE(!xop->dst_dev))
+	if (WARN_ON_ONCE(!xop->src_dev) || WARN_ON_ONCE(!xop->dst_dev)) {
+		sense_rc = TCM_INVALID_PARAMETER_LIST;
 		goto err_free;
+	}
 
 	src_dev = xop->src_dev;
 	dst_dev = xop->dst_dev;
@@ -762,20 +766,20 @@ static void target_xcopy_do_work(struct work_struct *work)
 	return;
 
 out:
+	/*
+	 * The XCOPY command was aborted after some data was transferred.
+	 * Terminate command with CHECK CONDITION status, with the sense key
+	 * set to COPY ABORTED.
+	 */
+	sense_rc = TCM_COPY_TARGET_DEVICE_NOT_REACHABLE;
 	xcopy_pt_undepend_remotedev(xop);
 	target_free_sgl(xop->xop_data_sg, xop->xop_data_nents);
 
 err_free:
 	kfree(xop);
-	/*
-	 * Don't override an error scsi status if it has already been set
-	 */
-	if (ec_cmd->scsi_status == SAM_STAT_GOOD) {
-		pr_warn_ratelimited("target_xcopy_do_work: rc: %d, Setting X-COPY"
-			" CHECK_CONDITION -> sending response\n", rc);
-		ec_cmd->scsi_status = SAM_STAT_CHECK_CONDITION;
-	}
-	target_complete_cmd(ec_cmd, ec_cmd->scsi_status);
+	pr_warn_ratelimited("target_xcopy_do_work: rc: %d, sense: %u, XCOPY operation failed\n",
+			   rc, sense_rc);
+	target_complete_cmd_with_sense(ec_cmd, SAM_STAT_CHECK_CONDITION, sense_rc);
 }
 
 /*
diff --git a/drivers/thermal/devfreq_cooling.c b/drivers/thermal/devfreq_cooling.c
index 5a86cffd78f6..4310cb342a9f 100644
--- a/drivers/thermal/devfreq_cooling.c
+++ b/drivers/thermal/devfreq_cooling.c
@@ -18,10 +18,10 @@
 #include <linux/pm_opp.h>
 #include <linux/pm_qos.h>
 #include <linux/thermal.h>
+#include <linux/units.h>
 
 #include <trace/events/thermal.h>
 
-#define HZ_PER_KHZ		1000
 #define SCALE_ERROR_MITIGATION	100
 
 /**
diff --git a/drivers/thermal/intel/Kconfig b/drivers/thermal/intel/Kconfig
index e4299ca3423c..c83ea5d04a1d 100644
--- a/drivers/thermal/intel/Kconfig
+++ b/drivers/thermal/intel/Kconfig
@@ -90,3 +90,12 @@ config INTEL_TCC_COOLING
 	  Note that, on different platforms, the behavior might be different
 	  on how fast the setting takes effect, and how much the CPU frequency
 	  is reduced.
+
+config INTEL_MENLOW
+	tristate "Thermal Management driver for Intel menlow platform"
+	depends on ACPI_THERMAL
+	help
+	  ACPI thermal management enhancement driver on
+	  Intel Menlow platform.
+
+	  If unsure, say N.
diff --git a/drivers/thermal/intel/Makefile b/drivers/thermal/intel/Makefile
index 5ff2afa388f7..960b56268b4a 100644
--- a/drivers/thermal/intel/Makefile
+++ b/drivers/thermal/intel/Makefile
@@ -12,3 +12,4 @@ obj-$(CONFIG_INTEL_BXT_PMIC_THERMAL) += intel_bxt_pmic_thermal.o
 obj-$(CONFIG_INTEL_PCH_THERMAL)	+= intel_pch_thermal.o
 obj-$(CONFIG_INTEL_TCC_COOLING)	+= intel_tcc_cooling.o
 obj-$(CONFIG_X86_THERMAL_VECTOR) += therm_throt.o
+obj-$(CONFIG_INTEL_MENLOW)	+= intel_menlow.o
diff --git a/drivers/thermal/intel/int340x_thermal/int3400_thermal.c b/drivers/thermal/intel/int340x_thermal/int3400_thermal.c
index 823354a1a91a..19926beeb3b7 100644
--- a/drivers/thermal/intel/int340x_thermal/int3400_thermal.c
+++ b/drivers/thermal/intel/int340x_thermal/int3400_thermal.c
@@ -108,9 +108,12 @@ static struct attribute *imok_attr[] = {
 	NULL
 };
 
+static const struct attribute_group imok_attribute_group = {
+	.attrs = imok_attr,
+};
+
 static const struct attribute_group data_attribute_group = {
 	.bin_attrs = data_attributes,
-	.attrs = imok_attr,
 };
 
 static ssize_t available_uuids_show(struct device *dev,
@@ -522,6 +525,12 @@ static int int3400_thermal_probe(struct platform_device *pdev)
 	if (result)
 		goto free_rel_misc;
 
+	if (acpi_has_method(priv->adev->handle, "IMOK")) {
+		result = sysfs_create_group(&pdev->dev.kobj, &imok_attribute_group);
+		if (result)
+			goto free_imok;
+	}
+
 	if (priv->data_vault) {
 		result = sysfs_create_group(&pdev->dev.kobj,
 					    &data_attribute_group);
@@ -545,6 +554,8 @@ free_sysfs:
 	}
 free_uuid:
 	sysfs_remove_group(&pdev->dev.kobj, &uuid_attribute_group);
+free_imok:
+	sysfs_remove_group(&pdev->dev.kobj, &imok_attribute_group);
 free_rel_misc:
 	if (!priv->rel_misc_dev_res)
 		acpi_thermal_rel_misc_device_remove(priv->adev->handle);
@@ -573,6 +584,7 @@ static int int3400_thermal_remove(struct platform_device *pdev)
 	if (priv->data_vault)
 		sysfs_remove_group(&pdev->dev.kobj, &data_attribute_group);
 	sysfs_remove_group(&pdev->dev.kobj, &uuid_attribute_group);
+	sysfs_remove_group(&pdev->dev.kobj, &imok_attribute_group);
 	thermal_zone_device_unregister(priv->thermal);
 	kfree(priv->data_vault);
 	kfree(priv->trts);
diff --git a/drivers/platform/x86/intel_menlow.c b/drivers/thermal/intel/intel_menlow.c
index 101d7e791a13..101d7e791a13 100644
--- a/drivers/platform/x86/intel_menlow.c
+++ b/drivers/thermal/intel/intel_menlow.c
diff --git a/drivers/thermal/intel/intel_powerclamp.c b/drivers/thermal/intel/intel_powerclamp.c
index b0eb5ece9243..a5b58ea89cc6 100644
--- a/drivers/thermal/intel/intel_powerclamp.c
+++ b/drivers/thermal/intel/intel_powerclamp.c
@@ -528,7 +528,7 @@ static int start_power_clamp(void)
 
 	set_target_ratio = clamp(set_target_ratio, 0U, MAX_TARGET_RATIO - 1);
 	/* prevent cpu hotplug */
-	get_online_cpus();
+	cpus_read_lock();
 
 	/* prefer BSP */
 	control_cpu = 0;
@@ -542,7 +542,7 @@ static int start_power_clamp(void)
 	for_each_online_cpu(cpu) {
 		start_power_clamp_worker(cpu);
 	}
-	put_online_cpus();
+	cpus_read_unlock();
 
 	return 0;
 }
diff --git a/drivers/thermal/intel/intel_tcc_cooling.c b/drivers/thermal/intel/intel_tcc_cooling.c
index 8ec10d55d421..cd80c7db4073 100644
--- a/drivers/thermal/intel/intel_tcc_cooling.c
+++ b/drivers/thermal/intel/intel_tcc_cooling.c
@@ -79,6 +79,8 @@ static const struct x86_cpu_id tcc_ids[] __initconst = {
 	X86_MATCH_INTEL_FAM6_MODEL(TIGERLAKE, NULL),
 	X86_MATCH_INTEL_FAM6_MODEL(TIGERLAKE_L, NULL),
 	X86_MATCH_INTEL_FAM6_MODEL(COMETLAKE, NULL),
+	X86_MATCH_INTEL_FAM6_MODEL(ALDERLAKE, NULL),
+	X86_MATCH_INTEL_FAM6_MODEL(ALDERLAKE_L, NULL),
 	{}
 };
 
diff --git a/drivers/thermal/qcom/Kconfig b/drivers/thermal/qcom/Kconfig
index 8d5ac2df26dc..7d942f71e532 100644
--- a/drivers/thermal/qcom/Kconfig
+++ b/drivers/thermal/qcom/Kconfig
@@ -31,3 +31,13 @@ config QCOM_SPMI_TEMP_ALARM
 	  trip points. The temperature reported by the thermal sensor reflects the
 	  real time die temperature if an ADC is present or an estimate of the
 	  temperature based upon the over temperature stage value.
+
+config QCOM_LMH
+	tristate "Qualcomm Limits Management Hardware"
+	depends on ARCH_QCOM
+	help
+	  This enables initialization of Qualcomm limits management
+	  hardware(LMh). LMh allows for hardware-enforced mitigation for cpus based on
+	  input from temperature and current sensors.  On many newer Qualcomm SoCs
+	  LMh is configured in the firmware and this feature need not be enabled.
+	  However, on certain SoCs like sdm845 LMh has to be configured from kernel.
diff --git a/drivers/thermal/qcom/Makefile b/drivers/thermal/qcom/Makefile
index 252ea7d9da0b..0fa2512042e7 100644
--- a/drivers/thermal/qcom/Makefile
+++ b/drivers/thermal/qcom/Makefile
@@ -5,3 +5,4 @@ qcom_tsens-y			+= tsens.o tsens-v2.o tsens-v1.o tsens-v0_1.o \
 				   tsens-8960.o
 obj-$(CONFIG_QCOM_SPMI_ADC_TM5)	+= qcom-spmi-adc-tm5.o
 obj-$(CONFIG_QCOM_SPMI_TEMP_ALARM)	+= qcom-spmi-temp-alarm.o
+obj-$(CONFIG_QCOM_LMH)		+= lmh.o
diff --git a/drivers/thermal/qcom/lmh.c b/drivers/thermal/qcom/lmh.c
new file mode 100644
index 000000000000..eafa7526eb8b
--- /dev/null
+++ b/drivers/thermal/qcom/lmh.c
@@ -0,0 +1,232 @@
+// SPDX-License-Identifier: GPL-2.0-only
+
+/*
+ * Copyright (C) 2021, Linaro Limited. All rights reserved.
+ */
+#include <linux/module.h>
+#include <linux/interrupt.h>
+#include <linux/irqdomain.h>
+#include <linux/err.h>
+#include <linux/platform_device.h>
+#include <linux/of_platform.h>
+#include <linux/slab.h>
+#include <linux/qcom_scm.h>
+
+#define LMH_NODE_DCVS			0x44435653
+#define LMH_CLUSTER0_NODE_ID		0x6370302D
+#define LMH_CLUSTER1_NODE_ID		0x6370312D
+
+#define LMH_SUB_FN_THERMAL		0x54484D4C
+#define LMH_SUB_FN_CRNT			0x43524E54
+#define LMH_SUB_FN_REL			0x52454C00
+#define LMH_SUB_FN_BCL			0x42434C00
+
+#define LMH_ALGO_MODE_ENABLE		0x454E424C
+#define LMH_TH_HI_THRESHOLD		0x48494748
+#define LMH_TH_LOW_THRESHOLD		0x4C4F5700
+#define LMH_TH_ARM_THRESHOLD		0x41524D00
+
+#define LMH_REG_DCVS_INTR_CLR		0x8
+
+struct lmh_hw_data {
+	void __iomem *base;
+	struct irq_domain *domain;
+	int irq;
+};
+
+static irqreturn_t lmh_handle_irq(int hw_irq, void *data)
+{
+	struct lmh_hw_data *lmh_data = data;
+	int irq = irq_find_mapping(lmh_data->domain, 0);
+
+	/* Call the cpufreq driver to handle the interrupt */
+	if (irq)
+		generic_handle_irq(irq);
+
+	return 0;
+}
+
+static void lmh_enable_interrupt(struct irq_data *d)
+{
+	struct lmh_hw_data *lmh_data = irq_data_get_irq_chip_data(d);
+
+	/* Clear the existing interrupt */
+	writel(0xff, lmh_data->base + LMH_REG_DCVS_INTR_CLR);
+	enable_irq(lmh_data->irq);
+}
+
+static void lmh_disable_interrupt(struct irq_data *d)
+{
+	struct lmh_hw_data *lmh_data = irq_data_get_irq_chip_data(d);
+
+	disable_irq_nosync(lmh_data->irq);
+}
+
+static struct irq_chip lmh_irq_chip = {
+	.name           = "lmh",
+	.irq_enable	= lmh_enable_interrupt,
+	.irq_disable	= lmh_disable_interrupt
+};
+
+static int lmh_irq_map(struct irq_domain *d, unsigned int irq, irq_hw_number_t hw)
+{
+	struct lmh_hw_data *lmh_data = d->host_data;
+
+	irq_set_chip_and_handler(irq, &lmh_irq_chip, handle_simple_irq);
+	irq_set_chip_data(irq, lmh_data);
+
+	return 0;
+}
+
+static const struct irq_domain_ops lmh_irq_ops = {
+	.map = lmh_irq_map,
+	.xlate = irq_domain_xlate_onecell,
+};
+
+static int lmh_probe(struct platform_device *pdev)
+{
+	struct device *dev = &pdev->dev;
+	struct device_node *np = dev->of_node;
+	struct device_node *cpu_node;
+	struct lmh_hw_data *lmh_data;
+	int temp_low, temp_high, temp_arm, cpu_id, ret;
+	u32 node_id;
+
+	lmh_data = devm_kzalloc(dev, sizeof(*lmh_data), GFP_KERNEL);
+	if (!lmh_data)
+		return -ENOMEM;
+
+	lmh_data->base = devm_platform_ioremap_resource(pdev, 0);
+	if (IS_ERR(lmh_data->base))
+		return PTR_ERR(lmh_data->base);
+
+	cpu_node = of_parse_phandle(np, "cpus", 0);
+	if (!cpu_node)
+		return -EINVAL;
+	cpu_id = of_cpu_node_to_id(cpu_node);
+	of_node_put(cpu_node);
+
+	ret = of_property_read_u32(np, "qcom,lmh-temp-high-millicelsius", &temp_high);
+	if (ret) {
+		dev_err(dev, "missing qcom,lmh-temp-high-millicelsius property\n");
+		return ret;
+	}
+
+	ret = of_property_read_u32(np, "qcom,lmh-temp-low-millicelsius", &temp_low);
+	if (ret) {
+		dev_err(dev, "missing qcom,lmh-temp-low-millicelsius property\n");
+		return ret;
+	}
+
+	ret = of_property_read_u32(np, "qcom,lmh-temp-arm-millicelsius", &temp_arm);
+	if (ret) {
+		dev_err(dev, "missing qcom,lmh-temp-arm-millicelsius property\n");
+		return ret;
+	}
+
+	/*
+	 * Only sdm845 has lmh hardware currently enabled from hlos. If this is needed
+	 * for other platforms, revisit this to check if the <cpu-id, node-id> should be part
+	 * of a dt match table.
+	 */
+	if (cpu_id == 0) {
+		node_id = LMH_CLUSTER0_NODE_ID;
+	} else if (cpu_id == 4) {
+		node_id = LMH_CLUSTER1_NODE_ID;
+	} else {
+		dev_err(dev, "Wrong CPU id associated with LMh node\n");
+		return -EINVAL;
+	}
+
+	if (!qcom_scm_lmh_dcvsh_available())
+		return -EINVAL;
+
+	ret = qcom_scm_lmh_dcvsh(LMH_SUB_FN_CRNT, LMH_ALGO_MODE_ENABLE, 1,
+				 LMH_NODE_DCVS, node_id, 0);
+	if (ret)
+		dev_err(dev, "Error %d enabling current subfunction\n", ret);
+
+	ret = qcom_scm_lmh_dcvsh(LMH_SUB_FN_REL, LMH_ALGO_MODE_ENABLE, 1,
+				 LMH_NODE_DCVS, node_id, 0);
+	if (ret)
+		dev_err(dev, "Error %d enabling reliability subfunction\n", ret);
+
+	ret = qcom_scm_lmh_dcvsh(LMH_SUB_FN_BCL, LMH_ALGO_MODE_ENABLE, 1,
+				 LMH_NODE_DCVS, node_id, 0);
+	if (ret)
+		dev_err(dev, "Error %d enabling BCL subfunction\n", ret);
+
+	ret = qcom_scm_lmh_dcvsh(LMH_SUB_FN_THERMAL, LMH_ALGO_MODE_ENABLE, 1,
+				 LMH_NODE_DCVS, node_id, 0);
+	if (ret) {
+		dev_err(dev, "Error %d enabling thermal subfunction\n", ret);
+		return ret;
+	}
+
+	ret = qcom_scm_lmh_profile_change(0x1);
+	if (ret) {
+		dev_err(dev, "Error %d changing profile\n", ret);
+		return ret;
+	}
+
+	/* Set default thermal trips */
+	ret = qcom_scm_lmh_dcvsh(LMH_SUB_FN_THERMAL, LMH_TH_ARM_THRESHOLD, temp_arm,
+				 LMH_NODE_DCVS, node_id, 0);
+	if (ret) {
+		dev_err(dev, "Error setting thermal ARM threshold%d\n", ret);
+		return ret;
+	}
+
+	ret = qcom_scm_lmh_dcvsh(LMH_SUB_FN_THERMAL, LMH_TH_HI_THRESHOLD, temp_high,
+				 LMH_NODE_DCVS, node_id, 0);
+	if (ret) {
+		dev_err(dev, "Error setting thermal HI threshold%d\n", ret);
+		return ret;
+	}
+
+	ret = qcom_scm_lmh_dcvsh(LMH_SUB_FN_THERMAL, LMH_TH_LOW_THRESHOLD, temp_low,
+				 LMH_NODE_DCVS, node_id, 0);
+	if (ret) {
+		dev_err(dev, "Error setting thermal ARM threshold%d\n", ret);
+		return ret;
+	}
+
+	lmh_data->irq = platform_get_irq(pdev, 0);
+	lmh_data->domain = irq_domain_add_linear(np, 1, &lmh_irq_ops, lmh_data);
+	if (!lmh_data->domain) {
+		dev_err(dev, "Error adding irq_domain\n");
+		return -EINVAL;
+	}
+
+	/* Disable the irq and let cpufreq enable it when ready to handle the interrupt */
+	irq_set_status_flags(lmh_data->irq, IRQ_NOAUTOEN);
+	ret = devm_request_irq(dev, lmh_data->irq, lmh_handle_irq,
+			       IRQF_ONESHOT | IRQF_NO_SUSPEND,
+			       "lmh-irq", lmh_data);
+	if (ret) {
+		dev_err(dev, "Error %d registering irq %x\n", ret, lmh_data->irq);
+		irq_domain_remove(lmh_data->domain);
+		return ret;
+	}
+
+	return 0;
+}
+
+static const struct of_device_id lmh_table[] = {
+	{ .compatible = "qcom,sdm845-lmh", },
+	{}
+};
+MODULE_DEVICE_TABLE(of, lmh_table);
+
+static struct platform_driver lmh_driver = {
+	.probe = lmh_probe,
+	.driver = {
+		.name = "qcom-lmh",
+		.of_match_table = lmh_table,
+		.suppress_bind_attrs = true,
+	},
+};
+module_platform_driver(lmh_driver);
+
+MODULE_LICENSE("GPL v2");
+MODULE_DESCRIPTION("QCOM LMh driver");
diff --git a/drivers/thermal/qcom/qcom-spmi-adc-tm5.c b/drivers/thermal/qcom/qcom-spmi-adc-tm5.c
index 232fd0b33325..8494cc04aa21 100644
--- a/drivers/thermal/qcom/qcom-spmi-adc-tm5.c
+++ b/drivers/thermal/qcom/qcom-spmi-adc-tm5.c
@@ -359,6 +359,12 @@ static int adc_tm5_register_tzd(struct adc_tm5_chip *adc_tm)
 							   &adc_tm->channels[i],
 							   &adc_tm5_ops);
 		if (IS_ERR(tzd)) {
+			if (PTR_ERR(tzd) == -ENODEV) {
+				dev_warn(adc_tm->dev, "thermal sensor on channel %d is not used\n",
+					 adc_tm->channels[i].channel);
+				continue;
+			}
+
 			dev_err(adc_tm->dev, "Error registering TZ zone for channel %d: %ld\n",
 				adc_tm->channels[i].channel, PTR_ERR(tzd));
 			return PTR_ERR(tzd);
diff --git a/drivers/thermal/rcar_gen3_thermal.c b/drivers/thermal/rcar_gen3_thermal.c
index fdf16aa34eb4..85228d308dd3 100644
--- a/drivers/thermal/rcar_gen3_thermal.c
+++ b/drivers/thermal/rcar_gen3_thermal.c
@@ -84,7 +84,7 @@ struct rcar_gen3_thermal_tsc {
 	struct thermal_zone_device *zone;
 	struct equation_coefs coef;
 	int tj_t;
-	int id; /* thermal channel id */
+	unsigned int id; /* thermal channel id */
 };
 
 struct rcar_gen3_thermal_priv {
@@ -190,10 +190,64 @@ static int rcar_gen3_thermal_get_temp(void *devdata, int *temp)
 	return 0;
 }
 
-static const struct thermal_zone_of_device_ops rcar_gen3_tz_of_ops = {
+static int rcar_gen3_thermal_mcelsius_to_temp(struct rcar_gen3_thermal_tsc *tsc,
+					      int mcelsius)
+{
+	int celsius, val;
+
+	celsius = DIV_ROUND_CLOSEST(mcelsius, 1000);
+	if (celsius <= INT_FIXPT(tsc->tj_t))
+		val = celsius * tsc->coef.a1 + tsc->coef.b1;
+	else
+		val = celsius * tsc->coef.a2 + tsc->coef.b2;
+
+	return INT_FIXPT(val);
+}
+
+static int rcar_gen3_thermal_set_trips(void *devdata, int low, int high)
+{
+	struct rcar_gen3_thermal_tsc *tsc = devdata;
+	u32 irqmsk = 0;
+
+	if (low != -INT_MAX) {
+		irqmsk |= IRQ_TEMPD1;
+		rcar_gen3_thermal_write(tsc, REG_GEN3_IRQTEMP1,
+					rcar_gen3_thermal_mcelsius_to_temp(tsc, low));
+	}
+
+	if (high != INT_MAX) {
+		irqmsk |= IRQ_TEMP2;
+		rcar_gen3_thermal_write(tsc, REG_GEN3_IRQTEMP2,
+					rcar_gen3_thermal_mcelsius_to_temp(tsc, high));
+	}
+
+	rcar_gen3_thermal_write(tsc, REG_GEN3_IRQMSK, irqmsk);
+
+	return 0;
+}
+
+static struct thermal_zone_of_device_ops rcar_gen3_tz_of_ops = {
 	.get_temp	= rcar_gen3_thermal_get_temp,
+	.set_trips	= rcar_gen3_thermal_set_trips,
 };
 
+static irqreturn_t rcar_gen3_thermal_irq(int irq, void *data)
+{
+	struct rcar_gen3_thermal_priv *priv = data;
+	unsigned int i;
+	u32 status;
+
+	for (i = 0; i < priv->num_tscs; i++) {
+		status = rcar_gen3_thermal_read(priv->tscs[i], REG_GEN3_IRQSTR);
+		rcar_gen3_thermal_write(priv->tscs[i], REG_GEN3_IRQSTR, 0);
+		if (status)
+			thermal_zone_device_update(priv->tscs[i]->zone,
+						   THERMAL_EVENT_UNSPECIFIED);
+	}
+
+	return IRQ_HANDLED;
+}
+
 static const struct soc_device_attribute r8a7795es1[] = {
 	{ .soc_id = "r8a7795", .revision = "ES1.*" },
 	{ /* sentinel */ }
@@ -210,6 +264,9 @@ static void rcar_gen3_thermal_init_r8a7795es1(struct rcar_gen3_thermal_tsc *tsc)
 
 	rcar_gen3_thermal_write(tsc, REG_GEN3_IRQCTL, 0x3F);
 	rcar_gen3_thermal_write(tsc, REG_GEN3_IRQMSK, 0);
+	if (tsc->zone->ops->set_trips)
+		rcar_gen3_thermal_write(tsc, REG_GEN3_IRQEN,
+					IRQ_TEMPD1 | IRQ_TEMP2);
 
 	rcar_gen3_thermal_write(tsc, REG_GEN3_CTSR,
 				CTSR_PONM | CTSR_AOUT | CTSR_THBGR | CTSR_VMEN);
@@ -235,6 +292,9 @@ static void rcar_gen3_thermal_init(struct rcar_gen3_thermal_tsc *tsc)
 
 	rcar_gen3_thermal_write(tsc, REG_GEN3_IRQCTL, 0);
 	rcar_gen3_thermal_write(tsc, REG_GEN3_IRQMSK, 0);
+	if (tsc->zone->ops->set_trips)
+		rcar_gen3_thermal_write(tsc, REG_GEN3_IRQEN,
+					IRQ_TEMPD1 | IRQ_TEMP2);
 
 	reg_val = rcar_gen3_thermal_read(tsc, REG_GEN3_THCTR);
 	reg_val |= THCTR_THSST;
@@ -303,6 +363,34 @@ static void rcar_gen3_hwmon_action(void *data)
 	thermal_remove_hwmon_sysfs(zone);
 }
 
+static int rcar_gen3_thermal_request_irqs(struct rcar_gen3_thermal_priv *priv,
+					  struct platform_device *pdev)
+{
+	struct device *dev = &pdev->dev;
+	unsigned int i;
+	char *irqname;
+	int ret, irq;
+
+	for (i = 0; i < 2; i++) {
+		irq = platform_get_irq_optional(pdev, i);
+		if (irq < 0)
+			return irq;
+
+		irqname = devm_kasprintf(dev, GFP_KERNEL, "%s:ch%d",
+					 dev_name(dev), i);
+		if (!irqname)
+			return -ENOMEM;
+
+		ret = devm_request_threaded_irq(dev, irq, NULL,
+						rcar_gen3_thermal_irq,
+						IRQF_ONESHOT, irqname, priv);
+		if (ret)
+			return ret;
+	}
+
+	return 0;
+}
+
 static int rcar_gen3_thermal_probe(struct platform_device *pdev)
 {
 	struct rcar_gen3_thermal_priv *priv;
@@ -310,7 +398,8 @@ static int rcar_gen3_thermal_probe(struct platform_device *pdev)
 	const int *ths_tj_1 = of_device_get_match_data(dev);
 	struct resource *res;
 	struct thermal_zone_device *zone;
-	int ret, i;
+	unsigned int i;
+	int ret;
 
 	/* default values if FUSEs are missing */
 	/* TODO: Read values from hardware on supported platforms */
@@ -326,6 +415,9 @@ static int rcar_gen3_thermal_probe(struct platform_device *pdev)
 
 	platform_set_drvdata(pdev, priv);
 
+	if (rcar_gen3_thermal_request_irqs(priv, pdev))
+		rcar_gen3_tz_of_ops.set_trips = NULL;
+
 	pm_runtime_enable(dev);
 	pm_runtime_get_sync(dev);
 
@@ -351,9 +443,6 @@ static int rcar_gen3_thermal_probe(struct platform_device *pdev)
 
 		priv->tscs[i] = tsc;
 
-		priv->thermal_init(tsc);
-		rcar_gen3_thermal_calc_coefs(tsc, ptat, thcodes[i], *ths_tj_1);
-
 		zone = devm_thermal_zone_of_sensor_register(dev, i, tsc,
 							    &rcar_gen3_tz_of_ops);
 		if (IS_ERR(zone)) {
@@ -363,6 +452,9 @@ static int rcar_gen3_thermal_probe(struct platform_device *pdev)
 		}
 		tsc->zone = zone;
 
+		priv->thermal_init(tsc);
+		rcar_gen3_thermal_calc_coefs(tsc, ptat, thcodes[i], *ths_tj_1);
+
 		tsc->zone->tzp->no_hwmon = false;
 		ret = thermal_add_hwmon_sysfs(tsc->zone);
 		if (ret)
@@ -376,7 +468,7 @@ static int rcar_gen3_thermal_probe(struct platform_device *pdev)
 		if (ret < 0)
 			goto error_unregister;
 
-		dev_info(dev, "TSC%d: Loaded %d trip points\n", i, ret);
+		dev_info(dev, "TSC%u: Loaded %d trip points\n", i, ret);
 	}
 
 	priv->num_tscs = i;
@@ -401,8 +493,12 @@ static int __maybe_unused rcar_gen3_thermal_resume(struct device *dev)
 
 	for (i = 0; i < priv->num_tscs; i++) {
 		struct rcar_gen3_thermal_tsc *tsc = priv->tscs[i];
+		struct thermal_zone_device *zone = tsc->zone;
 
 		priv->thermal_init(tsc);
+		if (zone->ops->set_trips)
+			rcar_gen3_thermal_set_trips(tsc, zone->prev_low_trip,
+						    zone->prev_high_trip);
 	}
 
 	return 0;
diff --git a/drivers/thermal/samsung/exynos_tmu.c b/drivers/thermal/samsung/exynos_tmu.c
index e9a90bc23b11..f4ab4c5b4b62 100644
--- a/drivers/thermal/samsung/exynos_tmu.c
+++ b/drivers/thermal/samsung/exynos_tmu.c
@@ -1073,6 +1073,7 @@ static int exynos_tmu_probe(struct platform_device *pdev)
 		data->sclk = devm_clk_get(&pdev->dev, "tmu_sclk");
 		if (IS_ERR(data->sclk)) {
 			dev_err(&pdev->dev, "Failed to get sclk\n");
+			ret = PTR_ERR(data->sclk);
 			goto err_clk;
 		} else {
 			ret = clk_prepare_enable(data->sclk);
diff --git a/drivers/thermal/tegra/Kconfig b/drivers/thermal/tegra/Kconfig
index 46c2215867cd..cfa41d87a794 100644
--- a/drivers/thermal/tegra/Kconfig
+++ b/drivers/thermal/tegra/Kconfig
@@ -1,6 +1,6 @@
 # SPDX-License-Identifier: GPL-2.0-only
 menu "NVIDIA Tegra thermal drivers"
-depends on ARCH_TEGRA
+depends on ARCH_TEGRA || COMPILE_TEST
 
 config TEGRA_SOCTHERM
 	tristate "Tegra SOCTHERM thermal management"
@@ -18,4 +18,11 @@ config TEGRA_BPMP_THERMAL
 	  Enable this option for support for sensing system temperature of NVIDIA
 	  Tegra systems-on-chip with the BPMP coprocessor (Tegra186).
 
+config TEGRA30_TSENSOR
+	tristate "Tegra30 Thermal Sensor"
+	depends on ARCH_TEGRA_3x_SOC || COMPILE_TEST
+	help
+	  Enable this option to support thermal management of NVIDIA Tegra30
+	  system-on-chip.
+
 endmenu
diff --git a/drivers/thermal/tegra/Makefile b/drivers/thermal/tegra/Makefile
index 0f2b66edf0d2..eb27d194c583 100644
--- a/drivers/thermal/tegra/Makefile
+++ b/drivers/thermal/tegra/Makefile
@@ -1,6 +1,7 @@
 # SPDX-License-Identifier: GPL-2.0
 obj-$(CONFIG_TEGRA_SOCTHERM)		+= tegra-soctherm.o
 obj-$(CONFIG_TEGRA_BPMP_THERMAL)	+= tegra-bpmp-thermal.o
+obj-$(CONFIG_TEGRA30_TSENSOR)		+= tegra30-tsensor.o
 
 tegra-soctherm-y				:= soctherm.o soctherm-fuse.o
 tegra-soctherm-$(CONFIG_ARCH_TEGRA_124_SOC)	+= tegra124-soctherm.o
diff --git a/drivers/thermal/tegra/soctherm.c b/drivers/thermal/tegra/soctherm.c
index 8e303e9d1dc0..210325f92559 100644
--- a/drivers/thermal/tegra/soctherm.c
+++ b/drivers/thermal/tegra/soctherm.c
@@ -450,8 +450,8 @@ static int enforce_temp_range(struct device *dev, int trip_temp)
 
 	temp = clamp_val(trip_temp, min_low_temp, max_high_temp);
 	if (temp != trip_temp)
-		dev_info(dev, "soctherm: trip temperature %d forced to %d\n",
-			 trip_temp, temp);
+		dev_dbg(dev, "soctherm: trip temperature %d forced to %d\n",
+			trip_temp, temp);
 	return temp;
 }
 
diff --git a/drivers/thermal/tegra/tegra30-tsensor.c b/drivers/thermal/tegra/tegra30-tsensor.c
new file mode 100644
index 000000000000..9b6b693cbcf8
--- /dev/null
+++ b/drivers/thermal/tegra/tegra30-tsensor.c
@@ -0,0 +1,673 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Tegra30 SoC Thermal Sensor driver
+ *
+ * Based on downstream HWMON driver from NVIDIA.
+ * Copyright (C) 2011 NVIDIA Corporation
+ *
+ * Author: Dmitry Osipenko <digetx@gmail.com>
+ * Copyright (C) 2021 GRATE-DRIVER project
+ */
+
+#include <linux/bitfield.h>
+#include <linux/clk.h>
+#include <linux/delay.h>
+#include <linux/errno.h>
+#include <linux/interrupt.h>
+#include <linux/io.h>
+#include <linux/iopoll.h>
+#include <linux/math.h>
+#include <linux/module.h>
+#include <linux/of_device.h>
+#include <linux/platform_device.h>
+#include <linux/pm.h>
+#include <linux/reset.h>
+#include <linux/slab.h>
+#include <linux/thermal.h>
+#include <linux/types.h>
+
+#include <soc/tegra/fuse.h>
+
+#include "../thermal_core.h"
+#include "../thermal_hwmon.h"
+
+#define TSENSOR_SENSOR0_CONFIG0				0x0
+#define TSENSOR_SENSOR0_CONFIG0_SENSOR_STOP		BIT(0)
+#define TSENSOR_SENSOR0_CONFIG0_HW_FREQ_DIV_EN		BIT(1)
+#define TSENSOR_SENSOR0_CONFIG0_THERMAL_RST_EN		BIT(2)
+#define TSENSOR_SENSOR0_CONFIG0_DVFS_EN			BIT(3)
+#define TSENSOR_SENSOR0_CONFIG0_INTR_OVERFLOW_EN	BIT(4)
+#define TSENSOR_SENSOR0_CONFIG0_INTR_HW_FREQ_DIV_EN	BIT(5)
+#define TSENSOR_SENSOR0_CONFIG0_INTR_THERMAL_RST_EN	BIT(6)
+#define TSENSOR_SENSOR0_CONFIG0_M			GENMASK(23,  8)
+#define TSENSOR_SENSOR0_CONFIG0_N			GENMASK(31, 24)
+
+#define TSENSOR_SENSOR0_CONFIG1				0x8
+#define TSENSOR_SENSOR0_CONFIG1_TH1			GENMASK(15,  0)
+#define TSENSOR_SENSOR0_CONFIG1_TH2			GENMASK(31, 16)
+
+#define TSENSOR_SENSOR0_CONFIG2				0xc
+#define TSENSOR_SENSOR0_CONFIG2_TH3			GENMASK(15,  0)
+
+#define TSENSOR_SENSOR0_STATUS0				0x18
+#define TSENSOR_SENSOR0_STATUS0_STATE			GENMASK(2, 0)
+#define TSENSOR_SENSOR0_STATUS0_INTR			BIT(8)
+#define TSENSOR_SENSOR0_STATUS0_CURRENT_VALID		BIT(9)
+
+#define TSENSOR_SENSOR0_TS_STATUS1			0x1c
+#define TSENSOR_SENSOR0_TS_STATUS1_CURRENT_COUNT	GENMASK(31, 16)
+
+#define TEGRA30_FUSE_TEST_PROG_VER			0x28
+
+#define TEGRA30_FUSE_TSENSOR_CALIB			0x98
+#define TEGRA30_FUSE_TSENSOR_CALIB_LOW			GENMASK(15,  0)
+#define TEGRA30_FUSE_TSENSOR_CALIB_HIGH			GENMASK(31, 16)
+
+#define TEGRA30_FUSE_SPARE_BIT				0x144
+
+struct tegra_tsensor;
+
+struct tegra_tsensor_calibration_data {
+	int a, b, m, n, p, r;
+};
+
+struct tegra_tsensor_channel {
+	void __iomem *regs;
+	unsigned int id;
+	struct tegra_tsensor *ts;
+	struct thermal_zone_device *tzd;
+};
+
+struct tegra_tsensor {
+	void __iomem *regs;
+	bool swap_channels;
+	struct clk *clk;
+	struct device *dev;
+	struct reset_control *rst;
+	struct tegra_tsensor_channel ch[2];
+	struct tegra_tsensor_calibration_data calib;
+};
+
+static int tegra_tsensor_hw_enable(const struct tegra_tsensor *ts)
+{
+	u32 val;
+	int err;
+
+	err = reset_control_assert(ts->rst);
+	if (err) {
+		dev_err(ts->dev, "failed to assert hardware reset: %d\n", err);
+		return err;
+	}
+
+	err = clk_prepare_enable(ts->clk);
+	if (err) {
+		dev_err(ts->dev, "failed to enable clock: %d\n", err);
+		return err;
+	}
+
+	fsleep(1000);
+
+	err = reset_control_deassert(ts->rst);
+	if (err) {
+		dev_err(ts->dev, "failed to deassert hardware reset: %d\n", err);
+		goto disable_clk;
+	}
+
+	/*
+	 * Sensors are enabled after reset by default, but not gauging
+	 * until clock counter is programmed.
+	 *
+	 * M: number of reference clock pulses after which every
+	 *    temperature / voltage measurement is made
+	 *
+	 * N: number of reference clock counts for which the counter runs
+	 */
+	val  = FIELD_PREP(TSENSOR_SENSOR0_CONFIG0_M, 12500);
+	val |= FIELD_PREP(TSENSOR_SENSOR0_CONFIG0_N, 255);
+
+	/* apply the same configuration to both channels */
+	writel_relaxed(val, ts->regs + 0x40 + TSENSOR_SENSOR0_CONFIG0);
+	writel_relaxed(val, ts->regs + 0x80 + TSENSOR_SENSOR0_CONFIG0);
+
+	return 0;
+
+disable_clk:
+	clk_disable_unprepare(ts->clk);
+
+	return err;
+}
+
+static int tegra_tsensor_hw_disable(const struct tegra_tsensor *ts)
+{
+	int err;
+
+	err = reset_control_assert(ts->rst);
+	if (err) {
+		dev_err(ts->dev, "failed to assert hardware reset: %d\n", err);
+		return err;
+	}
+
+	clk_disable_unprepare(ts->clk);
+
+	return 0;
+}
+
+static void devm_tegra_tsensor_hw_disable(void *data)
+{
+	const struct tegra_tsensor *ts = data;
+
+	tegra_tsensor_hw_disable(ts);
+}
+
+static int tegra_tsensor_get_temp(void *data, int *temp)
+{
+	const struct tegra_tsensor_channel *tsc = data;
+	const struct tegra_tsensor *ts = tsc->ts;
+	int err, c1, c2, c3, c4, counter;
+	u32 val;
+
+	/*
+	 * Counter will be invalid if hardware is misprogrammed or not enough
+	 * time passed since the time when sensor was enabled.
+	 */
+	err = readl_relaxed_poll_timeout(tsc->regs + TSENSOR_SENSOR0_STATUS0, val,
+					 val & TSENSOR_SENSOR0_STATUS0_CURRENT_VALID,
+					 21 * USEC_PER_MSEC,
+					 21 * USEC_PER_MSEC * 50);
+	if (err) {
+		dev_err_once(ts->dev, "ch%u: counter invalid\n", tsc->id);
+		return err;
+	}
+
+	val = readl_relaxed(tsc->regs + TSENSOR_SENSOR0_TS_STATUS1);
+	counter = FIELD_GET(TSENSOR_SENSOR0_TS_STATUS1_CURRENT_COUNT, val);
+
+	/*
+	 * This shouldn't happen with a valid counter status, nevertheless
+	 * lets verify the value since it's in a separate (from status)
+	 * register.
+	 */
+	if (counter == 0xffff) {
+		dev_err_once(ts->dev, "ch%u: counter overflow\n", tsc->id);
+		return -EINVAL;
+	}
+
+	/*
+	 * temperature = a * counter + b
+	 * temperature = m * (temperature ^ 2) + n * temperature + p
+	 */
+	c1 = DIV_ROUND_CLOSEST(ts->calib.a * counter + ts->calib.b, 1000000);
+	c1 = c1 ?: 1;
+	c2 = DIV_ROUND_CLOSEST(ts->calib.p, c1);
+	c3 = c1 * ts->calib.m;
+	c4 = ts->calib.n;
+
+	*temp = DIV_ROUND_CLOSEST(c1 * (c2 + c3 + c4), 1000);
+
+	return 0;
+}
+
+static int tegra_tsensor_temp_to_counter(const struct tegra_tsensor *ts, int temp)
+{
+	int c1, c2;
+
+	c1 = DIV_ROUND_CLOSEST(ts->calib.p - temp * 1000, ts->calib.m);
+	c2 = -ts->calib.r - int_sqrt(ts->calib.r * ts->calib.r - c1);
+
+	return DIV_ROUND_CLOSEST(c2 * 1000000 - ts->calib.b, ts->calib.a);
+}
+
+static int tegra_tsensor_set_trips(void *data, int low, int high)
+{
+	const struct tegra_tsensor_channel *tsc = data;
+	const struct tegra_tsensor *ts = tsc->ts;
+	u32 val;
+
+	/*
+	 * TSENSOR doesn't trigger interrupt on the "low" temperature breach,
+	 * hence bail out if high temperature is unspecified.
+	 */
+	if (high == INT_MAX)
+		return 0;
+
+	val = readl_relaxed(tsc->regs + TSENSOR_SENSOR0_CONFIG1);
+	val &= ~TSENSOR_SENSOR0_CONFIG1_TH1;
+
+	high = tegra_tsensor_temp_to_counter(ts, high);
+	val |= FIELD_PREP(TSENSOR_SENSOR0_CONFIG1_TH1, high);
+	writel_relaxed(val, tsc->regs + TSENSOR_SENSOR0_CONFIG1);
+
+	return 0;
+}
+
+static const struct thermal_zone_of_device_ops ops = {
+	.get_temp = tegra_tsensor_get_temp,
+	.set_trips = tegra_tsensor_set_trips,
+};
+
+static bool
+tegra_tsensor_handle_channel_interrupt(const struct tegra_tsensor *ts,
+				       unsigned int id)
+{
+	const struct tegra_tsensor_channel *tsc = &ts->ch[id];
+	u32 val;
+
+	val = readl_relaxed(tsc->regs + TSENSOR_SENSOR0_STATUS0);
+	writel_relaxed(val, tsc->regs + TSENSOR_SENSOR0_STATUS0);
+
+	if (FIELD_GET(TSENSOR_SENSOR0_STATUS0_STATE, val) == 5)
+		dev_err_ratelimited(ts->dev, "ch%u: counter overflowed\n", id);
+
+	if (!FIELD_GET(TSENSOR_SENSOR0_STATUS0_INTR, val))
+		return false;
+
+	thermal_zone_device_update(tsc->tzd, THERMAL_EVENT_UNSPECIFIED);
+
+	return true;
+}
+
+static irqreturn_t tegra_tsensor_isr(int irq, void *data)
+{
+	const struct tegra_tsensor *ts = data;
+	bool handled = false;
+	unsigned int i;
+
+	for (i = 0; i < ARRAY_SIZE(ts->ch); i++)
+		handled |= tegra_tsensor_handle_channel_interrupt(ts, i);
+
+	return handled ? IRQ_HANDLED : IRQ_NONE;
+}
+
+static int tegra_tsensor_disable_hw_channel(const struct tegra_tsensor *ts,
+					    unsigned int id)
+{
+	const struct tegra_tsensor_channel *tsc = &ts->ch[id];
+	struct thermal_zone_device *tzd = tsc->tzd;
+	u32 val;
+	int err;
+
+	if (!tzd)
+		goto stop_channel;
+
+	err = thermal_zone_device_disable(tzd);
+	if (err) {
+		dev_err(ts->dev, "ch%u: failed to disable zone: %d\n", id, err);
+		return err;
+	}
+
+stop_channel:
+	/* stop channel gracefully */
+	val = readl_relaxed(tsc->regs + TSENSOR_SENSOR0_CONFIG0);
+	val |= FIELD_PREP(TSENSOR_SENSOR0_CONFIG0_SENSOR_STOP, 1);
+	writel_relaxed(val, tsc->regs + TSENSOR_SENSOR0_CONFIG0);
+
+	return 0;
+}
+
+static void tegra_tsensor_get_hw_channel_trips(struct thermal_zone_device *tzd,
+					       int *hot_trip, int *crit_trip)
+{
+	unsigned int i;
+
+	/*
+	 * 90C is the maximal critical temperature of all Tegra30 SoC variants,
+	 * use it for the default trip if unspecified in a device-tree.
+	 */
+	*hot_trip  = 85000;
+	*crit_trip = 90000;
+
+	for (i = 0; i < tzd->trips; i++) {
+		enum thermal_trip_type type;
+		int trip_temp;
+
+		tzd->ops->get_trip_temp(tzd, i, &trip_temp);
+		tzd->ops->get_trip_type(tzd, i, &type);
+
+		if (type == THERMAL_TRIP_HOT)
+			*hot_trip = trip_temp;
+
+		if (type == THERMAL_TRIP_CRITICAL)
+			*crit_trip = trip_temp;
+	}
+
+	/* clamp hardware trips to the calibration limits */
+	*hot_trip = clamp(*hot_trip, 25000, 90000);
+
+	/*
+	 * Kernel will perform a normal system shut down if it will
+	 * see that critical temperature is breached, hence set the
+	 * hardware limit by 5C higher in order to allow system to
+	 * shut down gracefully before sending signal to the Power
+	 * Management controller.
+	 */
+	*crit_trip = clamp(*crit_trip + 5000, 25000, 90000);
+}
+
+static int tegra_tsensor_enable_hw_channel(const struct tegra_tsensor *ts,
+					   unsigned int id)
+{
+	const struct tegra_tsensor_channel *tsc = &ts->ch[id];
+	struct thermal_zone_device *tzd = tsc->tzd;
+	int err, hot_trip = 0, crit_trip = 0;
+	u32 val;
+
+	if (!tzd) {
+		val = readl_relaxed(tsc->regs + TSENSOR_SENSOR0_CONFIG0);
+		val &= ~TSENSOR_SENSOR0_CONFIG0_SENSOR_STOP;
+		writel_relaxed(val, tsc->regs + TSENSOR_SENSOR0_CONFIG0);
+
+		return 0;
+	}
+
+	tegra_tsensor_get_hw_channel_trips(tzd, &hot_trip, &crit_trip);
+
+	/* prevent potential racing with tegra_tsensor_set_trips() */
+	mutex_lock(&tzd->lock);
+
+	dev_info_once(ts->dev, "ch%u: PMC emergency shutdown trip set to %dC\n",
+		      id, DIV_ROUND_CLOSEST(crit_trip, 1000));
+
+	hot_trip  = tegra_tsensor_temp_to_counter(ts, hot_trip);
+	crit_trip = tegra_tsensor_temp_to_counter(ts, crit_trip);
+
+	/* program LEVEL2 counter threshold */
+	val = readl_relaxed(tsc->regs + TSENSOR_SENSOR0_CONFIG1);
+	val &= ~TSENSOR_SENSOR0_CONFIG1_TH2;
+	val |= FIELD_PREP(TSENSOR_SENSOR0_CONFIG1_TH2, hot_trip);
+	writel_relaxed(val, tsc->regs + TSENSOR_SENSOR0_CONFIG1);
+
+	/* program LEVEL3 counter threshold */
+	val = readl_relaxed(tsc->regs + TSENSOR_SENSOR0_CONFIG2);
+	val &= ~TSENSOR_SENSOR0_CONFIG2_TH3;
+	val |= FIELD_PREP(TSENSOR_SENSOR0_CONFIG2_TH3, crit_trip);
+	writel_relaxed(val, tsc->regs + TSENSOR_SENSOR0_CONFIG2);
+
+	/*
+	 * Enable sensor, emergency shutdown, interrupts for level 1/2/3
+	 * breaches and counter overflow condition.
+	 *
+	 * Disable DIV2 throttle for now since we need to figure out how
+	 * to integrate it properly with the thermal framework.
+	 *
+	 * Thermal levels supported by hardware:
+	 *
+	 *     Level 0 = cold
+	 *     Level 1 = passive cooling (cpufreq DVFS)
+	 *     Level 2 = passive cooling assisted by hardware (DIV2)
+	 *     Level 3 = emergency shutdown assisted by hardware (PMC)
+	 */
+	val = readl_relaxed(tsc->regs + TSENSOR_SENSOR0_CONFIG0);
+	val &= ~TSENSOR_SENSOR0_CONFIG0_SENSOR_STOP;
+	val |= FIELD_PREP(TSENSOR_SENSOR0_CONFIG0_DVFS_EN, 1);
+	val |= FIELD_PREP(TSENSOR_SENSOR0_CONFIG0_HW_FREQ_DIV_EN, 0);
+	val |= FIELD_PREP(TSENSOR_SENSOR0_CONFIG0_THERMAL_RST_EN, 1);
+	val |= FIELD_PREP(TSENSOR_SENSOR0_CONFIG0_INTR_OVERFLOW_EN, 1);
+	val |= FIELD_PREP(TSENSOR_SENSOR0_CONFIG0_INTR_HW_FREQ_DIV_EN, 1);
+	val |= FIELD_PREP(TSENSOR_SENSOR0_CONFIG0_INTR_THERMAL_RST_EN, 1);
+	writel_relaxed(val, tsc->regs + TSENSOR_SENSOR0_CONFIG0);
+
+	mutex_unlock(&tzd->lock);
+
+	err = thermal_zone_device_enable(tzd);
+	if (err) {
+		dev_err(ts->dev, "ch%u: failed to enable zone: %d\n", id, err);
+		return err;
+	}
+
+	return 0;
+}
+
+static bool tegra_tsensor_fuse_read_spare(unsigned int spare)
+{
+	u32 val = 0;
+
+	tegra_fuse_readl(TEGRA30_FUSE_SPARE_BIT + spare * 4, &val);
+
+	return !!val;
+}
+
+static int tegra_tsensor_nvmem_setup(struct tegra_tsensor *ts)
+{
+	u32 i, ate_ver = 0, cal = 0, t1_25C = 0, t2_90C = 0;
+	int err, c1_25C, c2_90C;
+
+	err = tegra_fuse_readl(TEGRA30_FUSE_TEST_PROG_VER, &ate_ver);
+	if (err) {
+		dev_err_probe(ts->dev, err, "failed to get ATE version\n");
+		return err;
+	}
+
+	if (ate_ver < 8) {
+		dev_info(ts->dev, "unsupported ATE version: %u\n", ate_ver);
+		return -ENODEV;
+	}
+
+	/*
+	 * We have two TSENSOR channels in a two different spots on SoC.
+	 * Second channel provides more accurate data on older SoC versions,
+	 * use it as a primary channel.
+	 */
+	if (ate_ver <= 21) {
+		dev_info_once(ts->dev,
+			      "older ATE version detected, channels remapped\n");
+		ts->swap_channels = true;
+	}
+
+	err = tegra_fuse_readl(TEGRA30_FUSE_TSENSOR_CALIB, &cal);
+	if (err) {
+		dev_err(ts->dev, "failed to get calibration data: %d\n", err);
+		return err;
+	}
+
+	/* get calibrated counter values for 25C/90C thresholds */
+	c1_25C = FIELD_GET(TEGRA30_FUSE_TSENSOR_CALIB_LOW, cal);
+	c2_90C = FIELD_GET(TEGRA30_FUSE_TSENSOR_CALIB_HIGH, cal);
+
+	/* and calibrated temperatures corresponding to the counter values */
+	for (i = 0; i < 7; i++) {
+		t1_25C |= tegra_tsensor_fuse_read_spare(14 + i) << i;
+		t1_25C |= tegra_tsensor_fuse_read_spare(21 + i) << i;
+
+		t2_90C |= tegra_tsensor_fuse_read_spare(0 + i) << i;
+		t2_90C |= tegra_tsensor_fuse_read_spare(7 + i) << i;
+	}
+
+	if (c2_90C - c1_25C <= t2_90C - t1_25C) {
+		dev_err(ts->dev, "invalid calibration data: %d %d %u %u\n",
+			c2_90C, c1_25C, t2_90C, t1_25C);
+		return -EINVAL;
+	}
+
+	/* all calibration coefficients are premultiplied by 1000000 */
+
+	ts->calib.a = DIV_ROUND_CLOSEST((t2_90C - t1_25C) * 1000000,
+					(c2_90C - c1_25C));
+
+	ts->calib.b = t1_25C * 1000000 - ts->calib.a * c1_25C;
+
+	if (tegra_sku_info.revision == TEGRA_REVISION_A01) {
+		ts->calib.m =     -2775;
+		ts->calib.n =   1338811;
+		ts->calib.p =  -7300000;
+	} else {
+		ts->calib.m =     -3512;
+		ts->calib.n =   1528943;
+		ts->calib.p = -11100000;
+	}
+
+	/* except the coefficient of a reduced quadratic equation */
+	ts->calib.r = DIV_ROUND_CLOSEST(ts->calib.n, ts->calib.m * 2);
+
+	dev_info_once(ts->dev,
+		      "calibration: %d %d %u %u ATE ver: %u SoC rev: %u\n",
+		      c2_90C, c1_25C, t2_90C, t1_25C, ate_ver,
+		      tegra_sku_info.revision);
+
+	return 0;
+}
+
+static int tegra_tsensor_register_channel(struct tegra_tsensor *ts,
+					  unsigned int id)
+{
+	struct tegra_tsensor_channel *tsc = &ts->ch[id];
+	unsigned int hw_id = ts->swap_channels ? !id : id;
+
+	tsc->ts = ts;
+	tsc->id = id;
+	tsc->regs = ts->regs + 0x40 * (hw_id + 1);
+
+	tsc->tzd = devm_thermal_zone_of_sensor_register(ts->dev, id, tsc, &ops);
+	if (IS_ERR(tsc->tzd)) {
+		if (PTR_ERR(tsc->tzd) != -ENODEV)
+			return dev_err_probe(ts->dev, PTR_ERR(tsc->tzd),
+					     "failed to register thermal zone\n");
+
+		/*
+		 * It's okay if sensor isn't assigned to any thermal zone
+		 * in a device-tree.
+		 */
+		tsc->tzd = NULL;
+		return 0;
+	}
+
+	if (devm_thermal_add_hwmon_sysfs(tsc->tzd))
+		dev_warn(ts->dev, "failed to add hwmon sysfs attributes\n");
+
+	return 0;
+}
+
+static int tegra_tsensor_probe(struct platform_device *pdev)
+{
+	struct tegra_tsensor *ts;
+	unsigned int i;
+	int err, irq;
+
+	ts = devm_kzalloc(&pdev->dev, sizeof(*ts), GFP_KERNEL);
+	if (!ts)
+		return -ENOMEM;
+
+	irq = platform_get_irq(pdev, 0);
+	if (irq < 0)
+		return irq;
+
+	ts->dev = &pdev->dev;
+	platform_set_drvdata(pdev, ts);
+
+	ts->regs = devm_platform_ioremap_resource(pdev, 0);
+	if (IS_ERR(ts->regs))
+		return PTR_ERR(ts->regs);
+
+	ts->clk = devm_clk_get(&pdev->dev, NULL);
+	if (IS_ERR(ts->clk))
+		return dev_err_probe(&pdev->dev, PTR_ERR(ts->clk),
+				     "failed to get clock\n");
+
+	ts->rst = devm_reset_control_get_exclusive(&pdev->dev, NULL);
+	if (IS_ERR(ts->rst))
+		return dev_err_probe(&pdev->dev, PTR_ERR(ts->rst),
+				     "failed to get reset control\n");
+
+	err = tegra_tsensor_nvmem_setup(ts);
+	if (err)
+		return err;
+
+	err = tegra_tsensor_hw_enable(ts);
+	if (err)
+		return err;
+
+	err = devm_add_action_or_reset(&pdev->dev,
+				       devm_tegra_tsensor_hw_disable,
+				       ts);
+	if (err)
+		return err;
+
+	for (i = 0; i < ARRAY_SIZE(ts->ch); i++) {
+		err = tegra_tsensor_register_channel(ts, i);
+		if (err)
+			return err;
+	}
+
+	err = devm_request_threaded_irq(&pdev->dev, irq, NULL,
+					tegra_tsensor_isr, IRQF_ONESHOT,
+					"tegra_tsensor", ts);
+	if (err)
+		return dev_err_probe(&pdev->dev, err,
+				     "failed to request interrupt\n");
+
+	for (i = 0; i < ARRAY_SIZE(ts->ch); i++) {
+		err = tegra_tsensor_enable_hw_channel(ts, i);
+		if (err)
+			return err;
+	}
+
+	return 0;
+}
+
+static int __maybe_unused tegra_tsensor_suspend(struct device *dev)
+{
+	struct tegra_tsensor *ts = dev_get_drvdata(dev);
+	unsigned int i;
+	int err;
+
+	for (i = 0; i < ARRAY_SIZE(ts->ch); i++) {
+		err = tegra_tsensor_disable_hw_channel(ts, i);
+		if (err)
+			goto enable_channel;
+	}
+
+	err = tegra_tsensor_hw_disable(ts);
+	if (err)
+		goto enable_channel;
+
+	return 0;
+
+enable_channel:
+	while (i--)
+		tegra_tsensor_enable_hw_channel(ts, i);
+
+	return err;
+}
+
+static int __maybe_unused tegra_tsensor_resume(struct device *dev)
+{
+	struct tegra_tsensor *ts = dev_get_drvdata(dev);
+	unsigned int i;
+	int err;
+
+	err = tegra_tsensor_hw_enable(ts);
+	if (err)
+		return err;
+
+	for (i = 0; i < ARRAY_SIZE(ts->ch); i++) {
+		err = tegra_tsensor_enable_hw_channel(ts, i);
+		if (err)
+			return err;
+	}
+
+	return 0;
+}
+
+static const struct dev_pm_ops tegra_tsensor_pm_ops = {
+	SET_NOIRQ_SYSTEM_SLEEP_PM_OPS(tegra_tsensor_suspend,
+				      tegra_tsensor_resume)
+};
+
+static const struct of_device_id tegra_tsensor_of_match[] = {
+	{ .compatible = "nvidia,tegra30-tsensor", },
+	{},
+};
+MODULE_DEVICE_TABLE(of, tegra_tsensor_of_match);
+
+static struct platform_driver tegra_tsensor_driver = {
+	.probe = tegra_tsensor_probe,
+	.driver = {
+		.name = "tegra30-tsensor",
+		.of_match_table = tegra_tsensor_of_match,
+		.pm = &tegra_tsensor_pm_ops,
+	},
+};
+module_platform_driver(tegra_tsensor_driver);
+
+MODULE_DESCRIPTION("NVIDIA Tegra30 Thermal Sensor driver");
+MODULE_AUTHOR("Dmitry Osipenko <digetx@gmail.com>");
+MODULE_LICENSE("GPL");
diff --git a/drivers/thunderbolt/test.c b/drivers/thunderbolt/test.c
index eca0ef311bde..1f69bab236ee 100644
--- a/drivers/thunderbolt/test.c
+++ b/drivers/thunderbolt/test.c
@@ -2206,23 +2206,13 @@ static void tb_test_credit_alloc_dma_multiple(struct kunit *test)
 	tb_tunnel_free(tunnel2);
 }
 
-static void tb_test_credit_alloc_all(struct kunit *test)
+static struct tb_tunnel *TB_TEST_PCIE_TUNNEL(struct kunit *test,
+			struct tb_switch *host, struct tb_switch *dev)
 {
-	struct tb_port *up, *down, *in, *out, *nhi, *port;
-	struct tb_tunnel *pcie_tunnel, *dp_tunnel1, *dp_tunnel2, *usb3_tunnel;
-	struct tb_tunnel *dma_tunnel1, *dma_tunnel2;
-	struct tb_switch *host, *dev;
+	struct tb_port *up, *down;
+	struct tb_tunnel *pcie_tunnel;
 	struct tb_path *path;
 
-	/*
-	 * Create PCIe, 2 x DP, USB 3.x and two DMA tunnels from host to
-	 * device. Expectation is that all these can be established with
-	 * the default credit allocation found in Intel hardware.
-	 */
-
-	host = alloc_host_usb4(test);
-	dev = alloc_dev_usb4(test, host, 0x1, true);
-
 	down = &host->ports[8];
 	up = &dev->ports[9];
 	pcie_tunnel = tb_tunnel_alloc_pci(NULL, up, down);
@@ -2243,9 +2233,18 @@ static void tb_test_credit_alloc_all(struct kunit *test)
 	KUNIT_EXPECT_EQ(test, path->hops[1].nfc_credits, 0U);
 	KUNIT_EXPECT_EQ(test, path->hops[1].initial_credits, 64U);
 
+	return pcie_tunnel;
+}
+
+static struct tb_tunnel *TB_TEST_DP_TUNNEL1(struct kunit *test,
+			struct tb_switch *host, struct tb_switch *dev)
+{
+	struct tb_port *in, *out;
+	struct tb_tunnel *dp_tunnel1;
+	struct tb_path *path;
+
 	in = &host->ports[5];
 	out = &dev->ports[13];
-
 	dp_tunnel1 = tb_tunnel_alloc_dp(NULL, in, out, 0, 0);
 	KUNIT_ASSERT_TRUE(test, dp_tunnel1 != NULL);
 	KUNIT_ASSERT_EQ(test, dp_tunnel1->npaths, (size_t)3);
@@ -2271,9 +2270,18 @@ static void tb_test_credit_alloc_all(struct kunit *test)
 	KUNIT_EXPECT_EQ(test, path->hops[1].nfc_credits, 0U);
 	KUNIT_EXPECT_EQ(test, path->hops[1].initial_credits, 1U);
 
+	return dp_tunnel1;
+}
+
+static struct tb_tunnel *TB_TEST_DP_TUNNEL2(struct kunit *test,
+			struct tb_switch *host, struct tb_switch *dev)
+{
+	struct tb_port *in, *out;
+	struct tb_tunnel *dp_tunnel2;
+	struct tb_path *path;
+
 	in = &host->ports[6];
 	out = &dev->ports[14];
-
 	dp_tunnel2 = tb_tunnel_alloc_dp(NULL, in, out, 0, 0);
 	KUNIT_ASSERT_TRUE(test, dp_tunnel2 != NULL);
 	KUNIT_ASSERT_EQ(test, dp_tunnel2->npaths, (size_t)3);
@@ -2299,6 +2307,16 @@ static void tb_test_credit_alloc_all(struct kunit *test)
 	KUNIT_EXPECT_EQ(test, path->hops[1].nfc_credits, 0U);
 	KUNIT_EXPECT_EQ(test, path->hops[1].initial_credits, 1U);
 
+	return dp_tunnel2;
+}
+
+static struct tb_tunnel *TB_TEST_USB3_TUNNEL(struct kunit *test,
+			struct tb_switch *host, struct tb_switch *dev)
+{
+	struct tb_port *up, *down;
+	struct tb_tunnel *usb3_tunnel;
+	struct tb_path *path;
+
 	down = &host->ports[12];
 	up = &dev->ports[16];
 	usb3_tunnel = tb_tunnel_alloc_usb3(NULL, up, down, 0, 0);
@@ -2319,9 +2337,18 @@ static void tb_test_credit_alloc_all(struct kunit *test)
 	KUNIT_EXPECT_EQ(test, path->hops[1].nfc_credits, 0U);
 	KUNIT_EXPECT_EQ(test, path->hops[1].initial_credits, 32U);
 
+	return usb3_tunnel;
+}
+
+static struct tb_tunnel *TB_TEST_DMA_TUNNEL1(struct kunit *test,
+			struct tb_switch *host, struct tb_switch *dev)
+{
+	struct tb_port *nhi, *port;
+	struct tb_tunnel *dma_tunnel1;
+	struct tb_path *path;
+
 	nhi = &host->ports[7];
 	port = &dev->ports[3];
-
 	dma_tunnel1 = tb_tunnel_alloc_dma(NULL, nhi, port, 8, 1, 8, 1);
 	KUNIT_ASSERT_TRUE(test, dma_tunnel1 != NULL);
 	KUNIT_ASSERT_EQ(test, dma_tunnel1->npaths, (size_t)2);
@@ -2340,6 +2367,18 @@ static void tb_test_credit_alloc_all(struct kunit *test)
 	KUNIT_EXPECT_EQ(test, path->hops[1].nfc_credits, 0U);
 	KUNIT_EXPECT_EQ(test, path->hops[1].initial_credits, 14U);
 
+	return dma_tunnel1;
+}
+
+static struct tb_tunnel *TB_TEST_DMA_TUNNEL2(struct kunit *test,
+			struct tb_switch *host, struct tb_switch *dev)
+{
+	struct tb_port *nhi, *port;
+	struct tb_tunnel *dma_tunnel2;
+	struct tb_path *path;
+
+	nhi = &host->ports[7];
+	port = &dev->ports[3];
 	dma_tunnel2 = tb_tunnel_alloc_dma(NULL, nhi, port, 9, 2, 9, 2);
 	KUNIT_ASSERT_TRUE(test, dma_tunnel2 != NULL);
 	KUNIT_ASSERT_EQ(test, dma_tunnel2->npaths, (size_t)2);
@@ -2358,6 +2397,31 @@ static void tb_test_credit_alloc_all(struct kunit *test)
 	KUNIT_EXPECT_EQ(test, path->hops[1].nfc_credits, 0U);
 	KUNIT_EXPECT_EQ(test, path->hops[1].initial_credits, 1U);
 
+	return dma_tunnel2;
+}
+
+static void tb_test_credit_alloc_all(struct kunit *test)
+{
+	struct tb_tunnel *pcie_tunnel, *dp_tunnel1, *dp_tunnel2, *usb3_tunnel;
+	struct tb_tunnel *dma_tunnel1, *dma_tunnel2;
+	struct tb_switch *host, *dev;
+
+	/*
+	 * Create PCIe, 2 x DP, USB 3.x and two DMA tunnels from host to
+	 * device. Expectation is that all these can be established with
+	 * the default credit allocation found in Intel hardware.
+	 */
+
+	host = alloc_host_usb4(test);
+	dev = alloc_dev_usb4(test, host, 0x1, true);
+
+	pcie_tunnel = TB_TEST_PCIE_TUNNEL(test, host, dev);
+	dp_tunnel1 = TB_TEST_DP_TUNNEL1(test, host, dev);
+	dp_tunnel2 = TB_TEST_DP_TUNNEL2(test, host, dev);
+	usb3_tunnel = TB_TEST_USB3_TUNNEL(test, host, dev);
+	dma_tunnel1 = TB_TEST_DMA_TUNNEL1(test, host, dev);
+	dma_tunnel2 = TB_TEST_DMA_TUNNEL2(test, host, dev);
+
 	tb_tunnel_free(dma_tunnel2);
 	tb_tunnel_free(dma_tunnel1);
 	tb_tunnel_free(usb3_tunnel);
diff --git a/drivers/tty/serial/mux.c b/drivers/tty/serial/mux.c
index be640d9863cd..643dfbcc43f9 100644
--- a/drivers/tty/serial/mux.c
+++ b/drivers/tty/serial/mux.c
@@ -496,7 +496,7 @@ static int __init mux_probe(struct parisc_device *dev)
 	return 0;
 }
 
-static int __exit mux_remove(struct parisc_device *dev)
+static void __exit mux_remove(struct parisc_device *dev)
 {
 	int i, j;
 	int port_count = (long)dev_get_drvdata(&dev->dev);
@@ -518,7 +518,6 @@ static int __exit mux_remove(struct parisc_device *dev)
 	}
 
 	release_mem_region(dev->hpa.start + MUX_OFFSET, port_count * MUX_LINE_OFFSET);
-	return 0;
 }
 
 /* Hack.  This idea was taken from the 8250_gsc.c on how to properly order
diff --git a/drivers/usb/serial/cp210x.c b/drivers/usb/serial/cp210x.c
index 3c80bfbf3bec..66a6ac50a4cd 100644
--- a/drivers/usb/serial/cp210x.c
+++ b/drivers/usb/serial/cp210x.c
@@ -400,6 +400,7 @@ struct cp210x_special_chars {
 };
 
 /* CP210X_VENDOR_SPECIFIC values */
+#define CP210X_GET_FW_VER	0x000E
 #define CP210X_READ_2NCONFIG	0x000E
 #define CP210X_GET_FW_VER_2N	0x0010
 #define CP210X_READ_LATCH	0x00C2
@@ -638,7 +639,7 @@ static int cp210x_read_reg_block(struct usb_serial_port *port, u8 req,
 	result = usb_control_msg(serial->dev, usb_rcvctrlpipe(serial->dev, 0),
 			req, REQTYPE_INTERFACE_TO_HOST, 0,
 			port_priv->bInterfaceNumber, dmabuf, bufsize,
-			USB_CTRL_SET_TIMEOUT);
+			USB_CTRL_GET_TIMEOUT);
 	if (result == bufsize) {
 		memcpy(buf, dmabuf, bufsize);
 		result = 0;
@@ -1145,33 +1146,6 @@ static void cp210x_disable_event_mode(struct usb_serial_port *port)
 	port_priv->event_mode = false;
 }
 
-static int cp210x_set_chars(struct usb_serial_port *port,
-		struct cp210x_special_chars *chars)
-{
-	struct cp210x_port_private *port_priv = usb_get_serial_port_data(port);
-	struct usb_serial *serial = port->serial;
-	void *dmabuf;
-	int result;
-
-	dmabuf = kmemdup(chars, sizeof(*chars), GFP_KERNEL);
-	if (!dmabuf)
-		return -ENOMEM;
-
-	result = usb_control_msg(serial->dev, usb_sndctrlpipe(serial->dev, 0),
-				CP210X_SET_CHARS, REQTYPE_HOST_TO_INTERFACE, 0,
-				port_priv->bInterfaceNumber,
-				dmabuf, sizeof(*chars), USB_CTRL_SET_TIMEOUT);
-
-	kfree(dmabuf);
-
-	if (result < 0) {
-		dev_err(&port->dev, "failed to set special chars: %d\n", result);
-		return result;
-	}
-
-	return 0;
-}
-
 static bool cp210x_termios_change(const struct ktermios *a, const struct ktermios *b)
 {
 	bool iflag_change, cc_change;
@@ -1192,6 +1166,7 @@ static void cp210x_set_flow_control(struct tty_struct *tty,
 	struct cp210x_flow_ctl flow_ctl;
 	u32 flow_repl;
 	u32 ctl_hs;
+	bool crtscts;
 	int ret;
 
 	/*
@@ -1218,9 +1193,12 @@ static void cp210x_set_flow_control(struct tty_struct *tty,
 		chars.bXonChar = START_CHAR(tty);
 		chars.bXoffChar = STOP_CHAR(tty);
 
-		ret = cp210x_set_chars(port, &chars);
-		if (ret)
-			return;
+		ret = cp210x_write_reg_block(port, CP210X_SET_CHARS, &chars,
+				sizeof(chars));
+		if (ret) {
+			dev_err(&port->dev, "failed to set special chars: %d\n",
+					ret);
+		}
 	}
 
 	mutex_lock(&port_priv->mutex);
@@ -1249,14 +1227,14 @@ static void cp210x_set_flow_control(struct tty_struct *tty,
 			flow_repl |= CP210X_SERIAL_RTS_FLOW_CTL;
 		else
 			flow_repl |= CP210X_SERIAL_RTS_INACTIVE;
-		port_priv->crtscts = true;
+		crtscts = true;
 	} else {
 		ctl_hs &= ~CP210X_SERIAL_CTS_HANDSHAKE;
 		if (port_priv->rts)
 			flow_repl |= CP210X_SERIAL_RTS_ACTIVE;
 		else
 			flow_repl |= CP210X_SERIAL_RTS_INACTIVE;
-		port_priv->crtscts = false;
+		crtscts = false;
 	}
 
 	if (I_IXOFF(tty)) {
@@ -1279,8 +1257,12 @@ static void cp210x_set_flow_control(struct tty_struct *tty,
 	flow_ctl.ulControlHandshake = cpu_to_le32(ctl_hs);
 	flow_ctl.ulFlowReplace = cpu_to_le32(flow_repl);
 
-	cp210x_write_reg_block(port, CP210X_SET_FLOW, &flow_ctl,
+	ret = cp210x_write_reg_block(port, CP210X_SET_FLOW, &flow_ctl,
 			sizeof(flow_ctl));
+	if (ret)
+		goto out_unlock;
+
+	port_priv->crtscts = crtscts;
 out_unlock:
 	mutex_unlock(&port_priv->mutex);
 }
@@ -2111,12 +2093,26 @@ static int cp210x_get_fw_version(struct usb_serial *serial, u16 value)
 	return 0;
 }
 
-static void cp210x_determine_quirks(struct usb_serial *serial)
+static void cp210x_determine_type(struct usb_serial *serial)
 {
 	struct cp210x_serial_private *priv = usb_get_serial_data(serial);
 	int ret;
 
+	ret = cp210x_read_vendor_block(serial, REQTYPE_DEVICE_TO_HOST,
+			CP210X_GET_PARTNUM, &priv->partnum,
+			sizeof(priv->partnum));
+	if (ret < 0) {
+		dev_warn(&serial->interface->dev,
+				"querying part number failed\n");
+		priv->partnum = CP210X_PARTNUM_UNKNOWN;
+		return;
+	}
+
 	switch (priv->partnum) {
+	case CP210X_PARTNUM_CP2105:
+	case CP210X_PARTNUM_CP2108:
+		cp210x_get_fw_version(serial, CP210X_GET_FW_VER);
+		break;
 	case CP210X_PARTNUM_CP2102N_QFN28:
 	case CP210X_PARTNUM_CP2102N_QFN24:
 	case CP210X_PARTNUM_CP2102N_QFN20:
@@ -2140,18 +2136,9 @@ static int cp210x_attach(struct usb_serial *serial)
 	if (!priv)
 		return -ENOMEM;
 
-	result = cp210x_read_vendor_block(serial, REQTYPE_DEVICE_TO_HOST,
-					  CP210X_GET_PARTNUM, &priv->partnum,
-					  sizeof(priv->partnum));
-	if (result < 0) {
-		dev_warn(&serial->interface->dev,
-			 "querying part number failed\n");
-		priv->partnum = CP210X_PARTNUM_UNKNOWN;
-	}
-
 	usb_set_serial_data(serial, priv);
 
-	cp210x_determine_quirks(serial);
+	cp210x_determine_type(serial);
 	cp210x_init_max_speed(serial);
 
 	result = cp210x_gpio_init(serial);
diff --git a/drivers/usb/serial/cypress_m8.c b/drivers/usb/serial/cypress_m8.c
index 6b18990258c3..6924fa95f6bd 100644
--- a/drivers/usb/serial/cypress_m8.c
+++ b/drivers/usb/serial/cypress_m8.c
@@ -1199,9 +1199,9 @@ MODULE_AUTHOR(DRIVER_AUTHOR);
 MODULE_DESCRIPTION(DRIVER_DESC);
 MODULE_LICENSE("GPL");
 
-module_param(stats, bool, S_IRUGO | S_IWUSR);
+module_param(stats, bool, 0644);
 MODULE_PARM_DESC(stats, "Enable statistics or not");
-module_param(interval, int, S_IRUGO | S_IWUSR);
+module_param(interval, int, 0644);
 MODULE_PARM_DESC(interval, "Overrides interrupt interval");
-module_param(unstable_bauds, bool, S_IRUGO | S_IWUSR);
+module_param(unstable_bauds, bool, 0644);
 MODULE_PARM_DESC(unstable_bauds, "Allow unstable baud rates");
diff --git a/drivers/usb/serial/ftdi_sio.c b/drivers/usb/serial/ftdi_sio.c
index 33bbb3470ca3..99d19828dae6 100644
--- a/drivers/usb/serial/ftdi_sio.c
+++ b/drivers/usb/serial/ftdi_sio.c
@@ -2938,5 +2938,5 @@ MODULE_AUTHOR(DRIVER_AUTHOR);
 MODULE_DESCRIPTION(DRIVER_DESC);
 MODULE_LICENSE("GPL");
 
-module_param(ndi_latency_timer, int, S_IRUGO | S_IWUSR);
+module_param(ndi_latency_timer, int, 0644);
 MODULE_PARM_DESC(ndi_latency_timer, "NDI device latency timer override");
diff --git a/drivers/usb/serial/garmin_gps.c b/drivers/usb/serial/garmin_gps.c
index 756d1ac7e96f..e5c75944ebb7 100644
--- a/drivers/usb/serial/garmin_gps.c
+++ b/drivers/usb/serial/garmin_gps.c
@@ -1444,5 +1444,5 @@ MODULE_AUTHOR(DRIVER_AUTHOR);
 MODULE_DESCRIPTION(DRIVER_DESC);
 MODULE_LICENSE("GPL");
 
-module_param(initial_mode, int, S_IRUGO);
+module_param(initial_mode, int, 0444);
 MODULE_PARM_DESC(initial_mode, "Initial mode");
diff --git a/drivers/usb/serial/io_edgeport.c b/drivers/usb/serial/io_edgeport.c
index ea4edf5eed27..bdee78cc4a07 100644
--- a/drivers/usb/serial/io_edgeport.c
+++ b/drivers/usb/serial/io_edgeport.c
@@ -389,39 +389,6 @@ static void update_edgeport_E2PROM(struct edgeport_serial *edge_serial)
 	release_firmware(fw);
 }
 
-#if 0
-/************************************************************************
- *
- *  Get string descriptor from device
- *
- ************************************************************************/
-static int get_string_desc(struct usb_device *dev, int Id,
-				struct usb_string_descriptor **pRetDesc)
-{
-	struct usb_string_descriptor StringDesc;
-	struct usb_string_descriptor *pStringDesc;
-
-	dev_dbg(&dev->dev, "%s - USB String ID = %d\n", __func__, Id);
-
-	if (!usb_get_descriptor(dev, USB_DT_STRING, Id, &StringDesc,
-						sizeof(StringDesc)))
-		return 0;
-
-	pStringDesc = kmalloc(StringDesc.bLength, GFP_KERNEL);
-	if (!pStringDesc)
-		return -1;
-
-	if (!usb_get_descriptor(dev, USB_DT_STRING, Id, pStringDesc,
-							StringDesc.bLength)) {
-		kfree(pStringDesc);
-		return -1;
-	}
-
-	*pRetDesc = pStringDesc;
-	return 0;
-}
-#endif
-
 static void dump_product_info(struct edgeport_serial *edge_serial,
 			      struct edgeport_product_info *product_info)
 {
diff --git a/drivers/usb/serial/io_ti.c b/drivers/usb/serial/io_ti.c
index 84b848d2794e..a7b3c15957ba 100644
--- a/drivers/usb/serial/io_ti.c
+++ b/drivers/usb/serial/io_ti.c
@@ -2746,9 +2746,9 @@ MODULE_DESCRIPTION(DRIVER_DESC);
 MODULE_LICENSE("GPL");
 MODULE_FIRMWARE("edgeport/down3.bin");
 
-module_param(ignore_cpu_rev, bool, S_IRUGO | S_IWUSR);
+module_param(ignore_cpu_rev, bool, 0644);
 MODULE_PARM_DESC(ignore_cpu_rev,
 			"Ignore the cpu revision when connecting to a device");
 
-module_param(default_uart_mode, int, S_IRUGO | S_IWUSR);
+module_param(default_uart_mode, int, 0644);
 MODULE_PARM_DESC(default_uart_mode, "Default uart_mode, 0=RS232, ...");
diff --git a/drivers/usb/serial/ipaq.c b/drivers/usb/serial/ipaq.c
index f81746c3c26c..e11441bac44f 100644
--- a/drivers/usb/serial/ipaq.c
+++ b/drivers/usb/serial/ipaq.c
@@ -599,10 +599,10 @@ MODULE_AUTHOR(DRIVER_AUTHOR);
 MODULE_DESCRIPTION(DRIVER_DESC);
 MODULE_LICENSE("GPL");
 
-module_param(connect_retries, int, S_IRUGO|S_IWUSR);
+module_param(connect_retries, int, 0644);
 MODULE_PARM_DESC(connect_retries,
 		"Maximum number of connect retries (one second each)");
 
-module_param(initial_wait, int, S_IRUGO|S_IWUSR);
+module_param(initial_wait, int, 0644);
 MODULE_PARM_DESC(initial_wait,
 		"Time to wait before attempting a connection (in seconds)");
diff --git a/drivers/usb/serial/iuu_phoenix.c b/drivers/usb/serial/iuu_phoenix.c
index 19753611e7b0..0be3b5e1eaf3 100644
--- a/drivers/usb/serial/iuu_phoenix.c
+++ b/drivers/usb/serial/iuu_phoenix.c
@@ -1188,20 +1188,20 @@ MODULE_AUTHOR("Alain Degreffe eczema@ecze.com");
 MODULE_DESCRIPTION(DRIVER_DESC);
 MODULE_LICENSE("GPL");
 
-module_param(xmas, bool, S_IRUGO | S_IWUSR);
+module_param(xmas, bool, 0644);
 MODULE_PARM_DESC(xmas, "Xmas colors enabled or not");
 
-module_param(boost, int, S_IRUGO | S_IWUSR);
+module_param(boost, int, 0644);
 MODULE_PARM_DESC(boost, "Card overclock boost (in percent 100-500)");
 
-module_param(clockmode, int, S_IRUGO | S_IWUSR);
+module_param(clockmode, int, 0644);
 MODULE_PARM_DESC(clockmode, "Card clock mode (1=3.579 MHz, 2=3.680 MHz, "
 		"3=6 Mhz)");
 
-module_param(cdmode, int, S_IRUGO | S_IWUSR);
+module_param(cdmode, int, 0644);
 MODULE_PARM_DESC(cdmode, "Card detect mode (0=none, 1=CD, 2=!CD, 3=DSR, "
 		 "4=!DSR, 5=CTS, 6=!CTS, 7=RING, 8=!RING)");
 
-module_param(vcc_default, int, S_IRUGO | S_IWUSR);
+module_param(vcc_default, int, 0644);
 MODULE_PARM_DESC(vcc_default, "Set default VCC (either 3 for 3.3V or 5 "
 		"for 5V). Default to 5.");
diff --git a/drivers/usb/serial/pl2303.c b/drivers/usb/serial/pl2303.c
index 930b3d50a330..f45ca7ddf78e 100644
--- a/drivers/usb/serial/pl2303.c
+++ b/drivers/usb/serial/pl2303.c
@@ -433,6 +433,7 @@ static int pl2303_detect_type(struct usb_serial *serial)
 		switch (bcdDevice) {
 		case 0x100:
 		case 0x305:
+		case 0x405:
 			/*
 			 * Assume it's an HXN-type if the device doesn't
 			 * support the old read request value.
diff --git a/drivers/usb/serial/sierra.c b/drivers/usb/serial/sierra.c
index 4b49b7c33364..9d56138133a9 100644
--- a/drivers/usb/serial/sierra.c
+++ b/drivers/usb/serial/sierra.c
@@ -1056,5 +1056,5 @@ MODULE_AUTHOR(DRIVER_AUTHOR);
 MODULE_DESCRIPTION(DRIVER_DESC);
 MODULE_LICENSE("GPL v2");
 
-module_param(nmea, bool, S_IRUGO | S_IWUSR);
+module_param(nmea, bool, 0644);
 MODULE_PARM_DESC(nmea, "NMEA streaming");
diff --git a/drivers/usb/storage/transport.c b/drivers/usb/storage/transport.c
index f4304ce69350..4c5a0a49035f 100644
--- a/drivers/usb/storage/transport.c
+++ b/drivers/usb/storage/transport.c
@@ -551,7 +551,7 @@ static void last_sector_hacks(struct us_data *us, struct scsi_cmnd *srb)
 	/* Did this command access the last sector? */
 	sector = (srb->cmnd[2] << 24) | (srb->cmnd[3] << 16) |
 			(srb->cmnd[4] << 8) | (srb->cmnd[5]);
-	disk = srb->request->rq_disk;
+	disk = scsi_cmd_to_rq(srb)->rq_disk;
 	if (!disk)
 		goto done;
 	sdkp = scsi_disk(disk);
diff --git a/drivers/vdpa/Kconfig b/drivers/vdpa/Kconfig
index a503c1b2bfd9..3d91982d8371 100644
--- a/drivers/vdpa/Kconfig
+++ b/drivers/vdpa/Kconfig
@@ -33,6 +33,16 @@ config VDPA_SIM_BLOCK
 	  vDPA block device simulator which terminates IO request in a
 	  memory buffer.
 
+config VDPA_USER
+	tristate "VDUSE (vDPA Device in Userspace) support"
+	depends on EVENTFD && MMU && HAS_DMA
+	select DMA_OPS
+	select VHOST_IOTLB
+	select IOMMU_IOVA
+	help
+	  With VDUSE it is possible to emulate a vDPA Device
+	  in a userspace program.
+
 config IFCVF
 	tristate "Intel IFC VF vDPA driver"
 	depends on PCI_MSI
@@ -53,6 +63,7 @@ config MLX5_VDPA
 config MLX5_VDPA_NET
 	tristate "vDPA driver for ConnectX devices"
 	select MLX5_VDPA
+	select VHOST_RING
 	depends on MLX5_CORE
 	help
 	  VDPA network driver for ConnectX6 and newer. Provides offloading
diff --git a/drivers/vdpa/Makefile b/drivers/vdpa/Makefile
index 67fe7f3d6943..f02ebed33f19 100644
--- a/drivers/vdpa/Makefile
+++ b/drivers/vdpa/Makefile
@@ -1,6 +1,7 @@
 # SPDX-License-Identifier: GPL-2.0
 obj-$(CONFIG_VDPA) += vdpa.o
 obj-$(CONFIG_VDPA_SIM) += vdpa_sim/
+obj-$(CONFIG_VDPA_USER) += vdpa_user/
 obj-$(CONFIG_IFCVF)    += ifcvf/
 obj-$(CONFIG_MLX5_VDPA) += mlx5/
 obj-$(CONFIG_VP_VDPA)    += virtio_pci/
diff --git a/drivers/vdpa/ifcvf/ifcvf_base.c b/drivers/vdpa/ifcvf/ifcvf_base.c
index 6e197fe0fcf9..2808f1ba9f7b 100644
--- a/drivers/vdpa/ifcvf/ifcvf_base.c
+++ b/drivers/vdpa/ifcvf/ifcvf_base.c
@@ -158,7 +158,9 @@ next:
 		return -EIO;
 	}
 
-	for (i = 0; i < IFCVF_MAX_QUEUE_PAIRS * 2; i++) {
+	hw->nr_vring = ifc_ioread16(&hw->common_cfg->num_queues);
+
+	for (i = 0; i < hw->nr_vring; i++) {
 		ifc_iowrite16(i, &hw->common_cfg->queue_select);
 		notify_off = ifc_ioread16(&hw->common_cfg->queue_notify_off);
 		hw->vring[i].notify_addr = hw->notify_base +
@@ -304,7 +306,7 @@ u16 ifcvf_get_vq_state(struct ifcvf_hw *hw, u16 qid)
 	u32 q_pair_id;
 
 	ifcvf_lm = (struct ifcvf_lm_cfg __iomem *)hw->lm_cfg;
-	q_pair_id = qid / (IFCVF_MAX_QUEUE_PAIRS * 2);
+	q_pair_id = qid / hw->nr_vring;
 	avail_idx_addr = &ifcvf_lm->vring_lm_cfg[q_pair_id].idx_addr[qid % 2];
 	last_avail_idx = ifc_ioread16(avail_idx_addr);
 
@@ -318,7 +320,7 @@ int ifcvf_set_vq_state(struct ifcvf_hw *hw, u16 qid, u16 num)
 	u32 q_pair_id;
 
 	ifcvf_lm = (struct ifcvf_lm_cfg __iomem *)hw->lm_cfg;
-	q_pair_id = qid / (IFCVF_MAX_QUEUE_PAIRS * 2);
+	q_pair_id = qid / hw->nr_vring;
 	avail_idx_addr = &ifcvf_lm->vring_lm_cfg[q_pair_id].idx_addr[qid % 2];
 	hw->vring[qid].last_avail_idx = num;
 	ifc_iowrite16(num, avail_idx_addr);
diff --git a/drivers/vdpa/ifcvf/ifcvf_base.h b/drivers/vdpa/ifcvf/ifcvf_base.h
index 2996db0da490..09918af3ecf8 100644
--- a/drivers/vdpa/ifcvf/ifcvf_base.h
+++ b/drivers/vdpa/ifcvf/ifcvf_base.h
@@ -22,17 +22,8 @@
 #define N3000_DEVICE_ID		0x1041
 #define N3000_SUBSYS_DEVICE_ID	0x001A
 
-#define IFCVF_NET_SUPPORTED_FEATURES \
-		((1ULL << VIRTIO_NET_F_MAC)			| \
-		 (1ULL << VIRTIO_F_ANY_LAYOUT)			| \
-		 (1ULL << VIRTIO_F_VERSION_1)			| \
-		 (1ULL << VIRTIO_NET_F_STATUS)			| \
-		 (1ULL << VIRTIO_F_ORDER_PLATFORM)		| \
-		 (1ULL << VIRTIO_F_ACCESS_PLATFORM)		| \
-		 (1ULL << VIRTIO_NET_F_MRG_RXBUF))
-
-/* Only one queue pair for now. */
-#define IFCVF_MAX_QUEUE_PAIRS	1
+/* Max 8 data queue pairs(16 queues) and one control vq for now. */
+#define IFCVF_MAX_QUEUES	17
 
 #define IFCVF_QUEUE_ALIGNMENT	PAGE_SIZE
 #define IFCVF_QUEUE_MAX		32768
@@ -51,8 +42,6 @@
 #define ifcvf_private_to_vf(adapter) \
 	(&((struct ifcvf_adapter *)adapter)->vf)
 
-#define IFCVF_MAX_INTR (IFCVF_MAX_QUEUE_PAIRS * 2 + 1)
-
 struct vring_info {
 	u64 desc;
 	u64 avail;
@@ -83,7 +72,7 @@ struct ifcvf_hw {
 	u32 dev_type;
 	struct virtio_pci_common_cfg __iomem *common_cfg;
 	void __iomem *net_cfg;
-	struct vring_info vring[IFCVF_MAX_QUEUE_PAIRS * 2];
+	struct vring_info vring[IFCVF_MAX_QUEUES];
 	void __iomem * const *base;
 	char config_msix_name[256];
 	struct vdpa_callback config_cb;
@@ -103,7 +92,13 @@ struct ifcvf_vring_lm_cfg {
 
 struct ifcvf_lm_cfg {
 	u8 reserved[IFCVF_LM_RING_STATE_OFFSET];
-	struct ifcvf_vring_lm_cfg vring_lm_cfg[IFCVF_MAX_QUEUE_PAIRS];
+	struct ifcvf_vring_lm_cfg vring_lm_cfg[IFCVF_MAX_QUEUES];
+};
+
+struct ifcvf_vdpa_mgmt_dev {
+	struct vdpa_mgmt_dev mdev;
+	struct ifcvf_adapter *adapter;
+	struct pci_dev *pdev;
 };
 
 int ifcvf_init_hw(struct ifcvf_hw *hw, struct pci_dev *dev);
diff --git a/drivers/vdpa/ifcvf/ifcvf_main.c b/drivers/vdpa/ifcvf/ifcvf_main.c
index 351c6cfb24c3..dcd648e1f7e7 100644
--- a/drivers/vdpa/ifcvf/ifcvf_main.c
+++ b/drivers/vdpa/ifcvf/ifcvf_main.c
@@ -63,9 +63,13 @@ static int ifcvf_request_irq(struct ifcvf_adapter *adapter)
 	struct pci_dev *pdev = adapter->pdev;
 	struct ifcvf_hw *vf = &adapter->vf;
 	int vector, i, ret, irq;
+	u16 max_intr;
 
-	ret = pci_alloc_irq_vectors(pdev, IFCVF_MAX_INTR,
-				    IFCVF_MAX_INTR, PCI_IRQ_MSIX);
+	/* all queues and config interrupt  */
+	max_intr = vf->nr_vring + 1;
+
+	ret = pci_alloc_irq_vectors(pdev, max_intr,
+				    max_intr, PCI_IRQ_MSIX);
 	if (ret < 0) {
 		IFCVF_ERR(pdev, "Failed to alloc IRQ vectors\n");
 		return ret;
@@ -83,7 +87,7 @@ static int ifcvf_request_irq(struct ifcvf_adapter *adapter)
 		return ret;
 	}
 
-	for (i = 0; i < IFCVF_MAX_QUEUE_PAIRS * 2; i++) {
+	for (i = 0; i < vf->nr_vring; i++) {
 		snprintf(vf->vring[i].msix_name, 256, "ifcvf[%s]-%d\n",
 			 pci_name(pdev), i);
 		vector = i + IFCVF_MSI_QUEUE_OFF;
@@ -112,7 +116,6 @@ static int ifcvf_start_datapath(void *private)
 	u8 status;
 	int ret;
 
-	vf->nr_vring = IFCVF_MAX_QUEUE_PAIRS * 2;
 	ret = ifcvf_start_hw(vf);
 	if (ret < 0) {
 		status = ifcvf_get_status(vf);
@@ -128,7 +131,7 @@ static int ifcvf_stop_datapath(void *private)
 	struct ifcvf_hw *vf = ifcvf_private_to_vf(private);
 	int i;
 
-	for (i = 0; i < IFCVF_MAX_QUEUE_PAIRS * 2; i++)
+	for (i = 0; i < vf->nr_vring; i++)
 		vf->vring[i].cb.callback = NULL;
 
 	ifcvf_stop_hw(vf);
@@ -141,7 +144,7 @@ static void ifcvf_reset_vring(struct ifcvf_adapter *adapter)
 	struct ifcvf_hw *vf = ifcvf_private_to_vf(adapter);
 	int i;
 
-	for (i = 0; i < IFCVF_MAX_QUEUE_PAIRS * 2; i++) {
+	for (i = 0; i < vf->nr_vring; i++) {
 		vf->vring[i].last_avail_idx = 0;
 		vf->vring[i].desc = 0;
 		vf->vring[i].avail = 0;
@@ -171,17 +174,12 @@ static u64 ifcvf_vdpa_get_features(struct vdpa_device *vdpa_dev)
 	struct ifcvf_adapter *adapter = vdpa_to_adapter(vdpa_dev);
 	struct ifcvf_hw *vf = vdpa_to_vf(vdpa_dev);
 	struct pci_dev *pdev = adapter->pdev;
-
+	u32 type = vf->dev_type;
 	u64 features;
 
-	switch (vf->dev_type) {
-	case VIRTIO_ID_NET:
-		features = ifcvf_get_features(vf) & IFCVF_NET_SUPPORTED_FEATURES;
-		break;
-	case VIRTIO_ID_BLOCK:
+	if (type == VIRTIO_ID_NET || type == VIRTIO_ID_BLOCK)
 		features = ifcvf_get_features(vf);
-		break;
-	default:
+	else {
 		features = 0;
 		IFCVF_ERR(pdev, "VIRTIO ID %u not supported\n", vf->dev_type);
 	}
@@ -218,23 +216,12 @@ static void ifcvf_vdpa_set_status(struct vdpa_device *vdpa_dev, u8 status)
 	int ret;
 
 	vf  = vdpa_to_vf(vdpa_dev);
-	adapter = dev_get_drvdata(vdpa_dev->dev.parent);
+	adapter = vdpa_to_adapter(vdpa_dev);
 	status_old = ifcvf_get_status(vf);
 
 	if (status_old == status)
 		return;
 
-	if ((status_old & VIRTIO_CONFIG_S_DRIVER_OK) &&
-	    !(status & VIRTIO_CONFIG_S_DRIVER_OK)) {
-		ifcvf_stop_datapath(adapter);
-		ifcvf_free_irq(adapter, IFCVF_MAX_QUEUE_PAIRS * 2);
-	}
-
-	if (status == 0) {
-		ifcvf_reset_vring(adapter);
-		return;
-	}
-
 	if ((status & VIRTIO_CONFIG_S_DRIVER_OK) &&
 	    !(status_old & VIRTIO_CONFIG_S_DRIVER_OK)) {
 		ret = ifcvf_request_irq(adapter);
@@ -254,6 +241,29 @@ static void ifcvf_vdpa_set_status(struct vdpa_device *vdpa_dev, u8 status)
 	ifcvf_set_status(vf, status);
 }
 
+static int ifcvf_vdpa_reset(struct vdpa_device *vdpa_dev)
+{
+	struct ifcvf_adapter *adapter;
+	struct ifcvf_hw *vf;
+	u8 status_old;
+
+	vf  = vdpa_to_vf(vdpa_dev);
+	adapter = vdpa_to_adapter(vdpa_dev);
+	status_old = ifcvf_get_status(vf);
+
+	if (status_old == 0)
+		return 0;
+
+	if (status_old & VIRTIO_CONFIG_S_DRIVER_OK) {
+		ifcvf_stop_datapath(adapter);
+		ifcvf_free_irq(adapter, vf->nr_vring);
+	}
+
+	ifcvf_reset_vring(adapter);
+
+	return 0;
+}
+
 static u16 ifcvf_vdpa_get_vq_num_max(struct vdpa_device *vdpa_dev)
 {
 	return IFCVF_QUEUE_MAX;
@@ -437,6 +447,7 @@ static const struct vdpa_config_ops ifc_vdpa_ops = {
 	.set_features	= ifcvf_vdpa_set_features,
 	.get_status	= ifcvf_vdpa_get_status,
 	.set_status	= ifcvf_vdpa_set_status,
+	.reset		= ifcvf_vdpa_reset,
 	.get_vq_num_max	= ifcvf_vdpa_get_vq_num_max,
 	.get_vq_state	= ifcvf_vdpa_get_vq_state,
 	.set_vq_state	= ifcvf_vdpa_set_vq_state,
@@ -458,63 +469,63 @@ static const struct vdpa_config_ops ifc_vdpa_ops = {
 	.get_vq_notification = ifcvf_get_vq_notification,
 };
 
-static int ifcvf_probe(struct pci_dev *pdev, const struct pci_device_id *id)
+static struct virtio_device_id id_table_net[] = {
+	{VIRTIO_ID_NET, VIRTIO_DEV_ANY_ID},
+	{0},
+};
+
+static struct virtio_device_id id_table_blk[] = {
+	{VIRTIO_ID_BLOCK, VIRTIO_DEV_ANY_ID},
+	{0},
+};
+
+static u32 get_dev_type(struct pci_dev *pdev)
 {
-	struct device *dev = &pdev->dev;
-	struct ifcvf_adapter *adapter;
-	struct ifcvf_hw *vf;
-	int ret, i;
+	u32 dev_type;
 
-	ret = pcim_enable_device(pdev);
-	if (ret) {
-		IFCVF_ERR(pdev, "Failed to enable device\n");
-		return ret;
-	}
+	/* This drirver drives both modern virtio devices and transitional
+	 * devices in modern mode.
+	 * vDPA requires feature bit VIRTIO_F_ACCESS_PLATFORM,
+	 * so legacy devices and transitional devices in legacy
+	 * mode will not work for vDPA, this driver will not
+	 * drive devices with legacy interface.
+	 */
 
-	ret = pcim_iomap_regions(pdev, BIT(0) | BIT(2) | BIT(4),
-				 IFCVF_DRIVER_NAME);
-	if (ret) {
-		IFCVF_ERR(pdev, "Failed to request MMIO region\n");
-		return ret;
-	}
+	if (pdev->device < 0x1040)
+		dev_type =  pdev->subsystem_device;
+	else
+		dev_type =  pdev->device - 0x1040;
 
-	ret = dma_set_mask_and_coherent(dev, DMA_BIT_MASK(64));
-	if (ret) {
-		IFCVF_ERR(pdev, "No usable DMA configuration\n");
-		return ret;
-	}
+	return dev_type;
+}
 
-	ret = devm_add_action_or_reset(dev, ifcvf_free_irq_vectors, pdev);
-	if (ret) {
-		IFCVF_ERR(pdev,
-			  "Failed for adding devres for freeing irq vectors\n");
-		return ret;
-	}
+static int ifcvf_vdpa_dev_add(struct vdpa_mgmt_dev *mdev, const char *name)
+{
+	struct ifcvf_vdpa_mgmt_dev *ifcvf_mgmt_dev;
+	struct ifcvf_adapter *adapter;
+	struct pci_dev *pdev;
+	struct ifcvf_hw *vf;
+	struct device *dev;
+	int ret, i;
 
+	ifcvf_mgmt_dev = container_of(mdev, struct ifcvf_vdpa_mgmt_dev, mdev);
+	if (ifcvf_mgmt_dev->adapter)
+		return -EOPNOTSUPP;
+
+	pdev = ifcvf_mgmt_dev->pdev;
+	dev = &pdev->dev;
 	adapter = vdpa_alloc_device(struct ifcvf_adapter, vdpa,
-				    dev, &ifc_vdpa_ops, NULL);
+				    dev, &ifc_vdpa_ops, name, false);
 	if (IS_ERR(adapter)) {
 		IFCVF_ERR(pdev, "Failed to allocate vDPA structure");
 		return PTR_ERR(adapter);
 	}
 
-	pci_set_master(pdev);
-	pci_set_drvdata(pdev, adapter);
+	ifcvf_mgmt_dev->adapter = adapter;
+	pci_set_drvdata(pdev, ifcvf_mgmt_dev);
 
 	vf = &adapter->vf;
-
-	/* This drirver drives both modern virtio devices and transitional
-	 * devices in modern mode.
-	 * vDPA requires feature bit VIRTIO_F_ACCESS_PLATFORM,
-	 * so legacy devices and transitional devices in legacy
-	 * mode will not work for vDPA, this driver will not
-	 * drive devices with legacy interface.
-	 */
-	if (pdev->device < 0x1040)
-		vf->dev_type =  pdev->subsystem_device;
-	else
-		vf->dev_type =  pdev->device - 0x1040;
-
+	vf->dev_type = get_dev_type(pdev);
 	vf->base = pcim_iomap_table(pdev);
 
 	adapter->pdev = pdev;
@@ -526,14 +537,15 @@ static int ifcvf_probe(struct pci_dev *pdev, const struct pci_device_id *id)
 		goto err;
 	}
 
-	for (i = 0; i < IFCVF_MAX_QUEUE_PAIRS * 2; i++)
+	for (i = 0; i < vf->nr_vring; i++)
 		vf->vring[i].irq = -EINVAL;
 
 	vf->hw_features = ifcvf_get_hw_features(vf);
 
-	ret = vdpa_register_device(&adapter->vdpa, IFCVF_MAX_QUEUE_PAIRS * 2);
+	adapter->vdpa.mdev = &ifcvf_mgmt_dev->mdev;
+	ret = _vdpa_register_device(&adapter->vdpa, vf->nr_vring);
 	if (ret) {
-		IFCVF_ERR(pdev, "Failed to register ifcvf to vdpa bus");
+		IFCVF_ERR(pdev, "Failed to register to vDPA bus");
 		goto err;
 	}
 
@@ -544,11 +556,100 @@ err:
 	return ret;
 }
 
+static void ifcvf_vdpa_dev_del(struct vdpa_mgmt_dev *mdev, struct vdpa_device *dev)
+{
+	struct ifcvf_vdpa_mgmt_dev *ifcvf_mgmt_dev;
+
+	ifcvf_mgmt_dev = container_of(mdev, struct ifcvf_vdpa_mgmt_dev, mdev);
+	_vdpa_unregister_device(dev);
+	ifcvf_mgmt_dev->adapter = NULL;
+}
+
+static const struct vdpa_mgmtdev_ops ifcvf_vdpa_mgmt_dev_ops = {
+	.dev_add = ifcvf_vdpa_dev_add,
+	.dev_del = ifcvf_vdpa_dev_del
+};
+
+static int ifcvf_probe(struct pci_dev *pdev, const struct pci_device_id *id)
+{
+	struct ifcvf_vdpa_mgmt_dev *ifcvf_mgmt_dev;
+	struct device *dev = &pdev->dev;
+	u32 dev_type;
+	int ret;
+
+	ifcvf_mgmt_dev = kzalloc(sizeof(struct ifcvf_vdpa_mgmt_dev), GFP_KERNEL);
+	if (!ifcvf_mgmt_dev) {
+		IFCVF_ERR(pdev, "Failed to alloc memory for the vDPA management device\n");
+		return -ENOMEM;
+	}
+
+	dev_type = get_dev_type(pdev);
+	switch (dev_type) {
+	case VIRTIO_ID_NET:
+		ifcvf_mgmt_dev->mdev.id_table = id_table_net;
+		break;
+	case VIRTIO_ID_BLOCK:
+		ifcvf_mgmt_dev->mdev.id_table = id_table_blk;
+		break;
+	default:
+		IFCVF_ERR(pdev, "VIRTIO ID %u not supported\n", dev_type);
+		ret = -EOPNOTSUPP;
+		goto err;
+	}
+
+	ifcvf_mgmt_dev->mdev.ops = &ifcvf_vdpa_mgmt_dev_ops;
+	ifcvf_mgmt_dev->mdev.device = dev;
+	ifcvf_mgmt_dev->pdev = pdev;
+
+	ret = pcim_enable_device(pdev);
+	if (ret) {
+		IFCVF_ERR(pdev, "Failed to enable device\n");
+		goto err;
+	}
+
+	ret = pcim_iomap_regions(pdev, BIT(0) | BIT(2) | BIT(4),
+				 IFCVF_DRIVER_NAME);
+	if (ret) {
+		IFCVF_ERR(pdev, "Failed to request MMIO region\n");
+		goto err;
+	}
+
+	ret = dma_set_mask_and_coherent(dev, DMA_BIT_MASK(64));
+	if (ret) {
+		IFCVF_ERR(pdev, "No usable DMA configuration\n");
+		goto err;
+	}
+
+	ret = devm_add_action_or_reset(dev, ifcvf_free_irq_vectors, pdev);
+	if (ret) {
+		IFCVF_ERR(pdev,
+			  "Failed for adding devres for freeing irq vectors\n");
+		goto err;
+	}
+
+	pci_set_master(pdev);
+
+	ret = vdpa_mgmtdev_register(&ifcvf_mgmt_dev->mdev);
+	if (ret) {
+		IFCVF_ERR(pdev,
+			  "Failed to initialize the management interfaces\n");
+		goto err;
+	}
+
+	return 0;
+
+err:
+	kfree(ifcvf_mgmt_dev);
+	return ret;
+}
+
 static void ifcvf_remove(struct pci_dev *pdev)
 {
-	struct ifcvf_adapter *adapter = pci_get_drvdata(pdev);
+	struct ifcvf_vdpa_mgmt_dev *ifcvf_mgmt_dev;
 
-	vdpa_unregister_device(&adapter->vdpa);
+	ifcvf_mgmt_dev = pci_get_drvdata(pdev);
+	vdpa_mgmtdev_unregister(&ifcvf_mgmt_dev->mdev);
+	kfree(ifcvf_mgmt_dev);
 }
 
 static struct pci_device_id ifcvf_pci_ids[] = {
diff --git a/drivers/vdpa/mlx5/core/mlx5_vdpa.h b/drivers/vdpa/mlx5/core/mlx5_vdpa.h
index 0002b2136b48..01a848adf590 100644
--- a/drivers/vdpa/mlx5/core/mlx5_vdpa.h
+++ b/drivers/vdpa/mlx5/core/mlx5_vdpa.h
@@ -5,7 +5,7 @@
 #define __MLX5_VDPA_H__
 
 #include <linux/etherdevice.h>
-#include <linux/if_vlan.h>
+#include <linux/vringh.h>
 #include <linux/vdpa.h>
 #include <linux/mlx5/driver.h>
 
@@ -48,6 +48,26 @@ struct mlx5_vdpa_resources {
 	bool valid;
 };
 
+struct mlx5_control_vq {
+	struct vhost_iotlb *iotlb;
+	/* spinlock to synchronize iommu table */
+	spinlock_t iommu_lock;
+	struct vringh vring;
+	bool ready;
+	u64 desc_addr;
+	u64 device_addr;
+	u64 driver_addr;
+	struct vdpa_callback event_cb;
+	struct vringh_kiov riov;
+	struct vringh_kiov wiov;
+	unsigned short head;
+};
+
+struct mlx5_ctrl_wq_ent {
+	struct work_struct work;
+	struct mlx5_vdpa_dev *mvdev;
+};
+
 struct mlx5_vdpa_dev {
 	struct vdpa_device vdev;
 	struct mlx5_core_dev *mdev;
@@ -57,9 +77,12 @@ struct mlx5_vdpa_dev {
 	u64 actual_features;
 	u8 status;
 	u32 max_vqs;
+	u16 max_idx;
 	u32 generation;
 
 	struct mlx5_vdpa_mr mr;
+	struct mlx5_control_vq cvq;
+	struct workqueue_struct *wq;
 };
 
 int mlx5_vdpa_alloc_pd(struct mlx5_vdpa_dev *dev, u32 *pdn, u16 uid);
@@ -68,6 +91,7 @@ int mlx5_vdpa_get_null_mkey(struct mlx5_vdpa_dev *dev, u32 *null_mkey);
 int mlx5_vdpa_create_tis(struct mlx5_vdpa_dev *mvdev, void *in, u32 *tisn);
 void mlx5_vdpa_destroy_tis(struct mlx5_vdpa_dev *mvdev, u32 tisn);
 int mlx5_vdpa_create_rqt(struct mlx5_vdpa_dev *mvdev, void *in, int inlen, u32 *rqtn);
+int mlx5_vdpa_modify_rqt(struct mlx5_vdpa_dev *mvdev, void *in, int inlen, u32 rqtn);
 void mlx5_vdpa_destroy_rqt(struct mlx5_vdpa_dev *mvdev, u32 rqtn);
 int mlx5_vdpa_create_tir(struct mlx5_vdpa_dev *mvdev, void *in, u32 *tirn);
 void mlx5_vdpa_destroy_tir(struct mlx5_vdpa_dev *mvdev, u32 tirn);
diff --git a/drivers/vdpa/mlx5/core/mr.c b/drivers/vdpa/mlx5/core/mr.c
index e59135fa867e..ff010c6d0cd3 100644
--- a/drivers/vdpa/mlx5/core/mr.c
+++ b/drivers/vdpa/mlx5/core/mr.c
@@ -1,6 +1,7 @@
 // SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB
 /* Copyright (c) 2020 Mellanox Technologies Ltd. */
 
+#include <linux/vhost_types.h>
 #include <linux/vdpa.h>
 #include <linux/gcd.h>
 #include <linux/string.h>
@@ -451,33 +452,30 @@ static void destroy_dma_mr(struct mlx5_vdpa_dev *mvdev, struct mlx5_vdpa_mr *mr)
 	mlx5_vdpa_destroy_mkey(mvdev, &mr->mkey);
 }
 
-static int _mlx5_vdpa_create_mr(struct mlx5_vdpa_dev *mvdev, struct vhost_iotlb *iotlb)
+static int dup_iotlb(struct mlx5_vdpa_dev *mvdev, struct vhost_iotlb *src)
 {
-	struct mlx5_vdpa_mr *mr = &mvdev->mr;
+	struct vhost_iotlb_map *map;
+	u64 start = 0, last = ULLONG_MAX;
 	int err;
 
-	if (mr->initialized)
-		return 0;
-
-	if (iotlb)
-		err = create_user_mr(mvdev, iotlb);
-	else
-		err = create_dma_mr(mvdev, mr);
-
-	if (!err)
-		mr->initialized = true;
+	if (!src) {
+		err = vhost_iotlb_add_range(mvdev->cvq.iotlb, start, last, start, VHOST_ACCESS_RW);
+		return err;
+	}
 
-	return err;
+	for (map = vhost_iotlb_itree_first(src, start, last); map;
+		map = vhost_iotlb_itree_next(map, start, last)) {
+		err = vhost_iotlb_add_range(mvdev->cvq.iotlb, map->start, map->last,
+					    map->addr, map->perm);
+		if (err)
+			return err;
+	}
+	return 0;
 }
 
-int mlx5_vdpa_create_mr(struct mlx5_vdpa_dev *mvdev, struct vhost_iotlb *iotlb)
+static void prune_iotlb(struct mlx5_vdpa_dev *mvdev)
 {
-	int err;
-
-	mutex_lock(&mvdev->mr.mkey_mtx);
-	err = _mlx5_vdpa_create_mr(mvdev, iotlb);
-	mutex_unlock(&mvdev->mr.mkey_mtx);
-	return err;
+	vhost_iotlb_del_range(mvdev->cvq.iotlb, 0, ULLONG_MAX);
 }
 
 static void destroy_user_mr(struct mlx5_vdpa_dev *mvdev, struct mlx5_vdpa_mr *mr)
@@ -501,6 +499,7 @@ void mlx5_vdpa_destroy_mr(struct mlx5_vdpa_dev *mvdev)
 	if (!mr->initialized)
 		goto out;
 
+	prune_iotlb(mvdev);
 	if (mr->user_mr)
 		destroy_user_mr(mvdev, mr);
 	else
@@ -512,6 +511,48 @@ out:
 	mutex_unlock(&mr->mkey_mtx);
 }
 
+static int _mlx5_vdpa_create_mr(struct mlx5_vdpa_dev *mvdev, struct vhost_iotlb *iotlb)
+{
+	struct mlx5_vdpa_mr *mr = &mvdev->mr;
+	int err;
+
+	if (mr->initialized)
+		return 0;
+
+	if (iotlb)
+		err = create_user_mr(mvdev, iotlb);
+	else
+		err = create_dma_mr(mvdev, mr);
+
+	if (err)
+		return err;
+
+	err = dup_iotlb(mvdev, iotlb);
+	if (err)
+		goto out_err;
+
+	mr->initialized = true;
+	return 0;
+
+out_err:
+	if (iotlb)
+		destroy_user_mr(mvdev, mr);
+	else
+		destroy_dma_mr(mvdev, mr);
+
+	return err;
+}
+
+int mlx5_vdpa_create_mr(struct mlx5_vdpa_dev *mvdev, struct vhost_iotlb *iotlb)
+{
+	int err;
+
+	mutex_lock(&mvdev->mr.mkey_mtx);
+	err = _mlx5_vdpa_create_mr(mvdev, iotlb);
+	mutex_unlock(&mvdev->mr.mkey_mtx);
+	return err;
+}
+
 int mlx5_vdpa_handle_set_map(struct mlx5_vdpa_dev *mvdev, struct vhost_iotlb *iotlb,
 			     bool *change_map)
 {
diff --git a/drivers/vdpa/mlx5/core/resources.c b/drivers/vdpa/mlx5/core/resources.c
index d4606213f88a..15e266d0e27a 100644
--- a/drivers/vdpa/mlx5/core/resources.c
+++ b/drivers/vdpa/mlx5/core/resources.c
@@ -1,6 +1,7 @@
 // SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB
 /* Copyright (c) 2020 Mellanox Technologies Ltd. */
 
+#include <linux/iova.h>
 #include <linux/mlx5/driver.h>
 #include "mlx5_vdpa.h"
 
@@ -128,6 +129,16 @@ int mlx5_vdpa_create_rqt(struct mlx5_vdpa_dev *mvdev, void *in, int inlen, u32 *
 	return err;
 }
 
+int mlx5_vdpa_modify_rqt(struct mlx5_vdpa_dev *mvdev, void *in, int inlen, u32 rqtn)
+{
+	u32 out[MLX5_ST_SZ_DW(create_rqt_out)] = {};
+
+	MLX5_SET(modify_rqt_in, in, uid, mvdev->res.uid);
+	MLX5_SET(modify_rqt_in, in, rqtn, rqtn);
+	MLX5_SET(modify_rqt_in, in, opcode, MLX5_CMD_OP_MODIFY_RQT);
+	return mlx5_cmd_exec(mvdev->mdev, in, inlen, out, sizeof(out));
+}
+
 void mlx5_vdpa_destroy_rqt(struct mlx5_vdpa_dev *mvdev, u32 rqtn)
 {
 	u32 in[MLX5_ST_SZ_DW(destroy_rqt_in)] = {};
@@ -221,6 +232,22 @@ int mlx5_vdpa_destroy_mkey(struct mlx5_vdpa_dev *mvdev, struct mlx5_core_mkey *m
 	return mlx5_cmd_exec_in(mvdev->mdev, destroy_mkey, in);
 }
 
+static int init_ctrl_vq(struct mlx5_vdpa_dev *mvdev)
+{
+	mvdev->cvq.iotlb = vhost_iotlb_alloc(0, 0);
+	if (!mvdev->cvq.iotlb)
+		return -ENOMEM;
+
+	vringh_set_iotlb(&mvdev->cvq.vring, mvdev->cvq.iotlb, &mvdev->cvq.iommu_lock);
+
+	return 0;
+}
+
+static void cleanup_ctrl_vq(struct mlx5_vdpa_dev *mvdev)
+{
+	vhost_iotlb_free(mvdev->cvq.iotlb);
+}
+
 int mlx5_vdpa_alloc_resources(struct mlx5_vdpa_dev *mvdev)
 {
 	u64 offset = MLX5_CAP64_DEV_VDPA_EMULATION(mvdev->mdev, doorbell_bar_offset);
@@ -260,10 +287,17 @@ int mlx5_vdpa_alloc_resources(struct mlx5_vdpa_dev *mvdev)
 		err = -ENOMEM;
 		goto err_key;
 	}
+
+	err = init_ctrl_vq(mvdev);
+	if (err)
+		goto err_ctrl;
+
 	res->valid = true;
 
 	return 0;
 
+err_ctrl:
+	iounmap(res->kick_addr);
 err_key:
 	dealloc_pd(mvdev, res->pdn, res->uid);
 err_pd:
@@ -282,6 +316,7 @@ void mlx5_vdpa_free_resources(struct mlx5_vdpa_dev *mvdev)
 	if (!res->valid)
 		return;
 
+	cleanup_ctrl_vq(mvdev);
 	iounmap(res->kick_addr);
 	res->kick_addr = NULL;
 	dealloc_pd(mvdev, res->pdn, res->uid);
diff --git a/drivers/vdpa/mlx5/net/mlx5_vnet.c b/drivers/vdpa/mlx5/net/mlx5_vnet.c
index 5906cada2293..294ba05e6fc9 100644
--- a/drivers/vdpa/mlx5/net/mlx5_vnet.c
+++ b/drivers/vdpa/mlx5/net/mlx5_vnet.c
@@ -45,6 +45,8 @@ MODULE_LICENSE("Dual BSD/GPL");
 	(VIRTIO_CONFIG_S_ACKNOWLEDGE | VIRTIO_CONFIG_S_DRIVER | VIRTIO_CONFIG_S_DRIVER_OK |        \
 	 VIRTIO_CONFIG_S_FEATURES_OK | VIRTIO_CONFIG_S_NEEDS_RESET | VIRTIO_CONFIG_S_FAILED)
 
+#define MLX5_FEATURE(_mvdev, _feature) (!!((_mvdev)->actual_features & BIT_ULL(_feature)))
+
 struct mlx5_vdpa_net_resources {
 	u32 tisn;
 	u32 tdn;
@@ -90,7 +92,6 @@ struct mlx5_vq_restore_info {
 	u16 avail_index;
 	u16 used_index;
 	bool ready;
-	struct vdpa_callback cb;
 	bool restore;
 };
 
@@ -100,7 +101,6 @@ struct mlx5_vdpa_virtqueue {
 	u64 device_addr;
 	u64 driver_addr;
 	u32 num_ent;
-	struct vdpa_callback event_cb;
 
 	/* Resources for implementing the notification channel from the device
 	 * to the driver. fwqp is the firmware end of an RC connection; the
@@ -135,11 +135,20 @@ struct mlx5_vdpa_virtqueue {
  */
 #define MLX5_MAX_SUPPORTED_VQS 16
 
+static bool is_index_valid(struct mlx5_vdpa_dev *mvdev, u16 idx)
+{
+	if (unlikely(idx > mvdev->max_idx))
+		return false;
+
+	return true;
+}
+
 struct mlx5_vdpa_net {
 	struct mlx5_vdpa_dev mvdev;
 	struct mlx5_vdpa_net_resources res;
 	struct virtio_net_config config;
 	struct mlx5_vdpa_virtqueue vqs[MLX5_MAX_SUPPORTED_VQS];
+	struct vdpa_callback event_cbs[MLX5_MAX_SUPPORTED_VQS + 1];
 
 	/* Serialize vq resources creation and destruction. This is required
 	 * since memory map might change and we need to destroy and create
@@ -151,15 +160,18 @@ struct mlx5_vdpa_net {
 	struct mlx5_flow_handle *rx_rule;
 	bool setup;
 	u16 mtu;
+	u32 cur_num_vqs;
 };
 
 static void free_resources(struct mlx5_vdpa_net *ndev);
 static void init_mvqs(struct mlx5_vdpa_net *ndev);
-static int setup_driver(struct mlx5_vdpa_net *ndev);
+static int setup_driver(struct mlx5_vdpa_dev *mvdev);
 static void teardown_driver(struct mlx5_vdpa_net *ndev);
 
 static bool mlx5_vdpa_debug;
 
+#define MLX5_CVQ_MAX_ENT 16
+
 #define MLX5_LOG_VIO_FLAG(_feature)                                                                \
 	do {                                                                                       \
 		if (features & BIT_ULL(_feature))                                                  \
@@ -172,11 +184,41 @@ static bool mlx5_vdpa_debug;
 			mlx5_vdpa_info(mvdev, "%s\n", #_status);                                   \
 	} while (0)
 
+/* TODO: cross-endian support */
+static inline bool mlx5_vdpa_is_little_endian(struct mlx5_vdpa_dev *mvdev)
+{
+	return virtio_legacy_is_little_endian() ||
+		(mvdev->actual_features & BIT_ULL(VIRTIO_F_VERSION_1));
+}
+
+static u16 mlx5vdpa16_to_cpu(struct mlx5_vdpa_dev *mvdev, __virtio16 val)
+{
+	return __virtio16_to_cpu(mlx5_vdpa_is_little_endian(mvdev), val);
+}
+
+static __virtio16 cpu_to_mlx5vdpa16(struct mlx5_vdpa_dev *mvdev, u16 val)
+{
+	return __cpu_to_virtio16(mlx5_vdpa_is_little_endian(mvdev), val);
+}
+
 static inline u32 mlx5_vdpa_max_qps(int max_vqs)
 {
 	return max_vqs / 2;
 }
 
+static u16 ctrl_vq_idx(struct mlx5_vdpa_dev *mvdev)
+{
+	if (!(mvdev->actual_features & BIT_ULL(VIRTIO_NET_F_MQ)))
+		return 2;
+
+	return 2 * mlx5_vdpa_max_qps(mvdev->max_vqs);
+}
+
+static bool is_ctrl_vq_idx(struct mlx5_vdpa_dev *mvdev, u16 idx)
+{
+	return idx == ctrl_vq_idx(mvdev);
+}
+
 static void print_status(struct mlx5_vdpa_dev *mvdev, u8 status, bool set)
 {
 	if (status & ~VALID_STATUS_MASK)
@@ -481,6 +523,10 @@ static int mlx5_vdpa_poll_one(struct mlx5_vdpa_cq *vcq)
 
 static void mlx5_vdpa_handle_completions(struct mlx5_vdpa_virtqueue *mvq, int num)
 {
+	struct mlx5_vdpa_net *ndev = mvq->ndev;
+	struct vdpa_callback *event_cb;
+
+	event_cb = &ndev->event_cbs[mvq->index];
 	mlx5_cq_set_ci(&mvq->cq.mcq);
 
 	/* make sure CQ cosumer update is visible to the hardware before updating
@@ -488,8 +534,8 @@ static void mlx5_vdpa_handle_completions(struct mlx5_vdpa_virtqueue *mvq, int nu
 	 */
 	dma_wmb();
 	rx_post(&mvq->vqqp, num);
-	if (mvq->event_cb.callback)
-		mvq->event_cb.callback(mvq->event_cb.private);
+	if (event_cb->callback)
+		event_cb->callback(event_cb->private);
 }
 
 static void mlx5_vdpa_cq_comp(struct mlx5_core_cq *mcq, struct mlx5_eqe *eqe)
@@ -1100,10 +1146,8 @@ static int setup_vq(struct mlx5_vdpa_net *ndev, struct mlx5_vdpa_virtqueue *mvq)
 	if (!mvq->num_ent)
 		return 0;
 
-	if (mvq->initialized) {
-		mlx5_vdpa_warn(&ndev->mvdev, "attempt re init\n");
-		return -EINVAL;
-	}
+	if (mvq->initialized)
+		return 0;
 
 	err = cq_create(ndev, idx, mvq->num_ent);
 	if (err)
@@ -1190,19 +1234,20 @@ static void teardown_vq(struct mlx5_vdpa_net *ndev, struct mlx5_vdpa_virtqueue *
 
 static int create_rqt(struct mlx5_vdpa_net *ndev)
 {
-	int log_max_rqt;
 	__be32 *list;
+	int max_rqt;
 	void *rqtc;
 	int inlen;
 	void *in;
 	int i, j;
 	int err;
 
-	log_max_rqt = min_t(int, 1, MLX5_CAP_GEN(ndev->mvdev.mdev, log_max_rqt_size));
-	if (log_max_rqt < 1)
+	max_rqt = min_t(int, MLX5_MAX_SUPPORTED_VQS / 2,
+			1 << MLX5_CAP_GEN(ndev->mvdev.mdev, log_max_rqt_size));
+	if (max_rqt < 1)
 		return -EOPNOTSUPP;
 
-	inlen = MLX5_ST_SZ_BYTES(create_rqt_in) + (1 << log_max_rqt) * MLX5_ST_SZ_BYTES(rq_num);
+	inlen = MLX5_ST_SZ_BYTES(create_rqt_in) + max_rqt * MLX5_ST_SZ_BYTES(rq_num);
 	in = kzalloc(inlen, GFP_KERNEL);
 	if (!in)
 		return -ENOMEM;
@@ -1211,10 +1256,9 @@ static int create_rqt(struct mlx5_vdpa_net *ndev)
 	rqtc = MLX5_ADDR_OF(create_rqt_in, in, rqt_context);
 
 	MLX5_SET(rqtc, rqtc, list_q_type, MLX5_RQTC_LIST_Q_TYPE_VIRTIO_NET_Q);
-	MLX5_SET(rqtc, rqtc, rqt_max_size, 1 << log_max_rqt);
-	MLX5_SET(rqtc, rqtc, rqt_actual_size, 1);
+	MLX5_SET(rqtc, rqtc, rqt_max_size, max_rqt);
 	list = MLX5_ADDR_OF(rqtc, rqtc, rq_num[0]);
-	for (i = 0, j = 0; j < ndev->mvdev.max_vqs; j++) {
+	for (i = 0, j = 0; j < max_rqt; j++) {
 		if (!ndev->vqs[j].initialized)
 			continue;
 
@@ -1223,6 +1267,7 @@ static int create_rqt(struct mlx5_vdpa_net *ndev)
 			i++;
 		}
 	}
+	MLX5_SET(rqtc, rqtc, rqt_actual_size, i);
 
 	err = mlx5_vdpa_create_rqt(&ndev->mvdev, in, inlen, &ndev->res.rqtn);
 	kfree(in);
@@ -1232,6 +1277,52 @@ static int create_rqt(struct mlx5_vdpa_net *ndev)
 	return 0;
 }
 
+#define MLX5_MODIFY_RQT_NUM_RQS ((u64)1)
+
+static int modify_rqt(struct mlx5_vdpa_net *ndev, int num)
+{
+	__be32 *list;
+	int max_rqt;
+	void *rqtc;
+	int inlen;
+	void *in;
+	int i, j;
+	int err;
+
+	max_rqt = min_t(int, ndev->cur_num_vqs / 2,
+			1 << MLX5_CAP_GEN(ndev->mvdev.mdev, log_max_rqt_size));
+	if (max_rqt < 1)
+		return -EOPNOTSUPP;
+
+	inlen = MLX5_ST_SZ_BYTES(modify_rqt_in) + max_rqt * MLX5_ST_SZ_BYTES(rq_num);
+	in = kzalloc(inlen, GFP_KERNEL);
+	if (!in)
+		return -ENOMEM;
+
+	MLX5_SET(modify_rqt_in, in, uid, ndev->mvdev.res.uid);
+	MLX5_SET64(modify_rqt_in, in, bitmask, MLX5_MODIFY_RQT_NUM_RQS);
+	rqtc = MLX5_ADDR_OF(modify_rqt_in, in, ctx);
+	MLX5_SET(rqtc, rqtc, list_q_type, MLX5_RQTC_LIST_Q_TYPE_VIRTIO_NET_Q);
+
+	list = MLX5_ADDR_OF(rqtc, rqtc, rq_num[0]);
+	for (i = 0, j = 0; j < num; j++) {
+		if (!ndev->vqs[j].initialized)
+			continue;
+
+		if (!vq_is_tx(ndev->vqs[j].index)) {
+			list[i] = cpu_to_be32(ndev->vqs[j].virtq_id);
+			i++;
+		}
+	}
+	MLX5_SET(rqtc, rqtc, rqt_actual_size, i);
+	err = mlx5_vdpa_modify_rqt(&ndev->mvdev, in, inlen, ndev->res.rqtn);
+	kfree(in);
+	if (err)
+		return err;
+
+	return 0;
+}
+
 static void destroy_rqt(struct mlx5_vdpa_net *ndev)
 {
 	mlx5_vdpa_destroy_rqt(&ndev->mvdev, ndev->res.rqtn);
@@ -1345,12 +1436,206 @@ static void remove_fwd_to_tir(struct mlx5_vdpa_net *ndev)
 	ndev->rx_rule = NULL;
 }
 
+static virtio_net_ctrl_ack handle_ctrl_mac(struct mlx5_vdpa_dev *mvdev, u8 cmd)
+{
+	struct mlx5_vdpa_net *ndev = to_mlx5_vdpa_ndev(mvdev);
+	struct mlx5_control_vq *cvq = &mvdev->cvq;
+	virtio_net_ctrl_ack status = VIRTIO_NET_ERR;
+	struct mlx5_core_dev *pfmdev;
+	size_t read;
+	u8 mac[ETH_ALEN];
+
+	pfmdev = pci_get_drvdata(pci_physfn(mvdev->mdev->pdev));
+	switch (cmd) {
+	case VIRTIO_NET_CTRL_MAC_ADDR_SET:
+		read = vringh_iov_pull_iotlb(&cvq->vring, &cvq->riov, (void *)mac, ETH_ALEN);
+		if (read != ETH_ALEN)
+			break;
+
+		if (!memcmp(ndev->config.mac, mac, 6)) {
+			status = VIRTIO_NET_OK;
+			break;
+		}
+
+		if (!is_zero_ether_addr(ndev->config.mac)) {
+			if (mlx5_mpfs_del_mac(pfmdev, ndev->config.mac)) {
+				mlx5_vdpa_warn(mvdev, "failed to delete old MAC %pM from MPFS table\n",
+					       ndev->config.mac);
+				break;
+			}
+		}
+
+		if (mlx5_mpfs_add_mac(pfmdev, mac)) {
+			mlx5_vdpa_warn(mvdev, "failed to insert new MAC %pM into MPFS table\n",
+				       mac);
+			break;
+		}
+
+		memcpy(ndev->config.mac, mac, ETH_ALEN);
+		status = VIRTIO_NET_OK;
+		break;
+
+	default:
+		break;
+	}
+
+	return status;
+}
+
+static int change_num_qps(struct mlx5_vdpa_dev *mvdev, int newqps)
+{
+	struct mlx5_vdpa_net *ndev = to_mlx5_vdpa_ndev(mvdev);
+	int cur_qps = ndev->cur_num_vqs / 2;
+	int err;
+	int i;
+
+	if (cur_qps > newqps) {
+		err = modify_rqt(ndev, 2 * newqps);
+		if (err)
+			return err;
+
+		for (i = ndev->cur_num_vqs - 1; i >= 2 * newqps; i--)
+			teardown_vq(ndev, &ndev->vqs[i]);
+
+		ndev->cur_num_vqs = 2 * newqps;
+	} else {
+		ndev->cur_num_vqs = 2 * newqps;
+		for (i = cur_qps * 2; i < 2 * newqps; i++) {
+			err = setup_vq(ndev, &ndev->vqs[i]);
+			if (err)
+				goto clean_added;
+		}
+		err = modify_rqt(ndev, 2 * newqps);
+		if (err)
+			goto clean_added;
+	}
+	return 0;
+
+clean_added:
+	for (--i; i >= cur_qps; --i)
+		teardown_vq(ndev, &ndev->vqs[i]);
+
+	return err;
+}
+
+static virtio_net_ctrl_ack handle_ctrl_mq(struct mlx5_vdpa_dev *mvdev, u8 cmd)
+{
+	struct mlx5_vdpa_net *ndev = to_mlx5_vdpa_ndev(mvdev);
+	virtio_net_ctrl_ack status = VIRTIO_NET_ERR;
+	struct mlx5_control_vq *cvq = &mvdev->cvq;
+	struct virtio_net_ctrl_mq mq;
+	size_t read;
+	u16 newqps;
+
+	switch (cmd) {
+	case VIRTIO_NET_CTRL_MQ_VQ_PAIRS_SET:
+		read = vringh_iov_pull_iotlb(&cvq->vring, &cvq->riov, (void *)&mq, sizeof(mq));
+		if (read != sizeof(mq))
+			break;
+
+		newqps = mlx5vdpa16_to_cpu(mvdev, mq.virtqueue_pairs);
+		if (ndev->cur_num_vqs == 2 * newqps) {
+			status = VIRTIO_NET_OK;
+			break;
+		}
+
+		if (newqps & (newqps - 1))
+			break;
+
+		if (!change_num_qps(mvdev, newqps))
+			status = VIRTIO_NET_OK;
+
+		break;
+	default:
+		break;
+	}
+
+	return status;
+}
+
+static void mlx5_cvq_kick_handler(struct work_struct *work)
+{
+	virtio_net_ctrl_ack status = VIRTIO_NET_ERR;
+	struct virtio_net_ctrl_hdr ctrl;
+	struct mlx5_ctrl_wq_ent *wqent;
+	struct mlx5_vdpa_dev *mvdev;
+	struct mlx5_control_vq *cvq;
+	struct mlx5_vdpa_net *ndev;
+	size_t read, write;
+	int err;
+
+	wqent = container_of(work, struct mlx5_ctrl_wq_ent, work);
+	mvdev = wqent->mvdev;
+	ndev = to_mlx5_vdpa_ndev(mvdev);
+	cvq = &mvdev->cvq;
+	if (!(ndev->mvdev.actual_features & BIT_ULL(VIRTIO_NET_F_CTRL_VQ)))
+		goto out;
+
+	if (!cvq->ready)
+		goto out;
+
+	while (true) {
+		err = vringh_getdesc_iotlb(&cvq->vring, &cvq->riov, &cvq->wiov, &cvq->head,
+					   GFP_ATOMIC);
+		if (err <= 0)
+			break;
+
+		read = vringh_iov_pull_iotlb(&cvq->vring, &cvq->riov, &ctrl, sizeof(ctrl));
+		if (read != sizeof(ctrl))
+			break;
+
+		switch (ctrl.class) {
+		case VIRTIO_NET_CTRL_MAC:
+			status = handle_ctrl_mac(mvdev, ctrl.cmd);
+			break;
+		case VIRTIO_NET_CTRL_MQ:
+			status = handle_ctrl_mq(mvdev, ctrl.cmd);
+			break;
+
+		default:
+			break;
+		}
+
+		/* Make sure data is written before advancing index */
+		smp_wmb();
+
+		write = vringh_iov_push_iotlb(&cvq->vring, &cvq->wiov, &status, sizeof(status));
+		vringh_complete_iotlb(&cvq->vring, cvq->head, write);
+		vringh_kiov_cleanup(&cvq->riov);
+		vringh_kiov_cleanup(&cvq->wiov);
+
+		if (vringh_need_notify_iotlb(&cvq->vring))
+			vringh_notify(&cvq->vring);
+	}
+out:
+	kfree(wqent);
+}
+
 static void mlx5_vdpa_kick_vq(struct vdpa_device *vdev, u16 idx)
 {
 	struct mlx5_vdpa_dev *mvdev = to_mvdev(vdev);
 	struct mlx5_vdpa_net *ndev = to_mlx5_vdpa_ndev(mvdev);
-	struct mlx5_vdpa_virtqueue *mvq = &ndev->vqs[idx];
+	struct mlx5_vdpa_virtqueue *mvq;
+	struct mlx5_ctrl_wq_ent *wqent;
+
+	if (!is_index_valid(mvdev, idx))
+		return;
+
+	if (unlikely(is_ctrl_vq_idx(mvdev, idx))) {
+		if (!mvdev->cvq.ready)
+			return;
+
+		wqent = kzalloc(sizeof(*wqent), GFP_ATOMIC);
+		if (!wqent)
+			return;
 
+		wqent->mvdev = mvdev;
+		INIT_WORK(&wqent->work, mlx5_cvq_kick_handler);
+		queue_work(mvdev->wq, &wqent->work);
+		return;
+	}
+
+	mvq = &ndev->vqs[idx];
 	if (unlikely(!mvq->ready))
 		return;
 
@@ -1362,8 +1647,19 @@ static int mlx5_vdpa_set_vq_address(struct vdpa_device *vdev, u16 idx, u64 desc_
 {
 	struct mlx5_vdpa_dev *mvdev = to_mvdev(vdev);
 	struct mlx5_vdpa_net *ndev = to_mlx5_vdpa_ndev(mvdev);
-	struct mlx5_vdpa_virtqueue *mvq = &ndev->vqs[idx];
+	struct mlx5_vdpa_virtqueue *mvq;
+
+	if (!is_index_valid(mvdev, idx))
+		return -EINVAL;
 
+	if (is_ctrl_vq_idx(mvdev, idx)) {
+		mvdev->cvq.desc_addr = desc_area;
+		mvdev->cvq.device_addr = device_area;
+		mvdev->cvq.driver_addr = driver_area;
+		return 0;
+	}
+
+	mvq = &ndev->vqs[idx];
 	mvq->desc_addr = desc_area;
 	mvq->device_addr = device_area;
 	mvq->driver_addr = driver_area;
@@ -1376,6 +1672,9 @@ static void mlx5_vdpa_set_vq_num(struct vdpa_device *vdev, u16 idx, u32 num)
 	struct mlx5_vdpa_net *ndev = to_mlx5_vdpa_ndev(mvdev);
 	struct mlx5_vdpa_virtqueue *mvq;
 
+	if (!is_index_valid(mvdev, idx) || is_ctrl_vq_idx(mvdev, idx))
+		return;
+
 	mvq = &ndev->vqs[idx];
 	mvq->num_ent = num;
 }
@@ -1384,17 +1683,46 @@ static void mlx5_vdpa_set_vq_cb(struct vdpa_device *vdev, u16 idx, struct vdpa_c
 {
 	struct mlx5_vdpa_dev *mvdev = to_mvdev(vdev);
 	struct mlx5_vdpa_net *ndev = to_mlx5_vdpa_ndev(mvdev);
-	struct mlx5_vdpa_virtqueue *vq = &ndev->vqs[idx];
 
-	vq->event_cb = *cb;
+	ndev->event_cbs[idx] = *cb;
+}
+
+static void mlx5_cvq_notify(struct vringh *vring)
+{
+	struct mlx5_control_vq *cvq = container_of(vring, struct mlx5_control_vq, vring);
+
+	if (!cvq->event_cb.callback)
+		return;
+
+	cvq->event_cb.callback(cvq->event_cb.private);
+}
+
+static void set_cvq_ready(struct mlx5_vdpa_dev *mvdev, bool ready)
+{
+	struct mlx5_control_vq *cvq = &mvdev->cvq;
+
+	cvq->ready = ready;
+	if (!ready)
+		return;
+
+	cvq->vring.notify = mlx5_cvq_notify;
 }
 
 static void mlx5_vdpa_set_vq_ready(struct vdpa_device *vdev, u16 idx, bool ready)
 {
 	struct mlx5_vdpa_dev *mvdev = to_mvdev(vdev);
 	struct mlx5_vdpa_net *ndev = to_mlx5_vdpa_ndev(mvdev);
-	struct mlx5_vdpa_virtqueue *mvq = &ndev->vqs[idx];
+	struct mlx5_vdpa_virtqueue *mvq;
+
+	if (!is_index_valid(mvdev, idx))
+		return;
+
+	if (is_ctrl_vq_idx(mvdev, idx)) {
+		set_cvq_ready(mvdev, ready);
+		return;
+	}
 
+	mvq = &ndev->vqs[idx];
 	if (!ready)
 		suspend_vq(ndev, mvq);
 
@@ -1405,9 +1733,14 @@ static bool mlx5_vdpa_get_vq_ready(struct vdpa_device *vdev, u16 idx)
 {
 	struct mlx5_vdpa_dev *mvdev = to_mvdev(vdev);
 	struct mlx5_vdpa_net *ndev = to_mlx5_vdpa_ndev(mvdev);
-	struct mlx5_vdpa_virtqueue *mvq = &ndev->vqs[idx];
 
-	return mvq->ready;
+	if (!is_index_valid(mvdev, idx))
+		return false;
+
+	if (is_ctrl_vq_idx(mvdev, idx))
+		return mvdev->cvq.ready;
+
+	return ndev->vqs[idx].ready;
 }
 
 static int mlx5_vdpa_set_vq_state(struct vdpa_device *vdev, u16 idx,
@@ -1415,8 +1748,17 @@ static int mlx5_vdpa_set_vq_state(struct vdpa_device *vdev, u16 idx,
 {
 	struct mlx5_vdpa_dev *mvdev = to_mvdev(vdev);
 	struct mlx5_vdpa_net *ndev = to_mlx5_vdpa_ndev(mvdev);
-	struct mlx5_vdpa_virtqueue *mvq = &ndev->vqs[idx];
+	struct mlx5_vdpa_virtqueue *mvq;
 
+	if (!is_index_valid(mvdev, idx))
+		return -EINVAL;
+
+	if (is_ctrl_vq_idx(mvdev, idx)) {
+		mvdev->cvq.vring.last_avail_idx = state->split.avail_index;
+		return 0;
+	}
+
+	mvq = &ndev->vqs[idx];
 	if (mvq->fw_state == MLX5_VIRTIO_NET_Q_OBJECT_STATE_RDY) {
 		mlx5_vdpa_warn(mvdev, "can't modify available index\n");
 		return -EINVAL;
@@ -1431,10 +1773,19 @@ static int mlx5_vdpa_get_vq_state(struct vdpa_device *vdev, u16 idx, struct vdpa
 {
 	struct mlx5_vdpa_dev *mvdev = to_mvdev(vdev);
 	struct mlx5_vdpa_net *ndev = to_mlx5_vdpa_ndev(mvdev);
-	struct mlx5_vdpa_virtqueue *mvq = &ndev->vqs[idx];
+	struct mlx5_vdpa_virtqueue *mvq;
 	struct mlx5_virtq_attr attr;
 	int err;
 
+	if (!is_index_valid(mvdev, idx))
+		return -EINVAL;
+
+	if (is_ctrl_vq_idx(mvdev, idx)) {
+		state->split.avail_index = mvdev->cvq.vring.last_avail_idx;
+		return 0;
+	}
+
+	mvq = &ndev->vqs[idx];
 	/* If the virtq object was destroyed, use the value saved at
 	 * the last minute of suspend_vq. This caters for userspace
 	 * that cares about emulating the index after vq is stopped.
@@ -1491,10 +1842,14 @@ static u64 mlx5_vdpa_get_features(struct vdpa_device *vdev)
 	u16 dev_features;
 
 	dev_features = MLX5_CAP_DEV_VDPA_EMULATION(mvdev->mdev, device_features_bits_mask);
-	ndev->mvdev.mlx_features = mlx_to_vritio_features(dev_features);
+	ndev->mvdev.mlx_features |= mlx_to_vritio_features(dev_features);
 	if (MLX5_CAP_DEV_VDPA_EMULATION(mvdev->mdev, virtio_version_1_0))
 		ndev->mvdev.mlx_features |= BIT_ULL(VIRTIO_F_VERSION_1);
 	ndev->mvdev.mlx_features |= BIT_ULL(VIRTIO_F_ACCESS_PLATFORM);
+	ndev->mvdev.mlx_features |= BIT_ULL(VIRTIO_NET_F_CTRL_VQ);
+	ndev->mvdev.mlx_features |= BIT_ULL(VIRTIO_NET_F_CTRL_MAC_ADDR);
+	ndev->mvdev.mlx_features |= BIT_ULL(VIRTIO_NET_F_MQ);
+
 	print_features(mvdev, ndev->mvdev.mlx_features, false);
 	return ndev->mvdev.mlx_features;
 }
@@ -1507,17 +1862,29 @@ static int verify_min_features(struct mlx5_vdpa_dev *mvdev, u64 features)
 	return 0;
 }
 
-static int setup_virtqueues(struct mlx5_vdpa_net *ndev)
+static int setup_virtqueues(struct mlx5_vdpa_dev *mvdev)
 {
+	struct mlx5_vdpa_net *ndev = to_mlx5_vdpa_ndev(mvdev);
+	struct mlx5_control_vq *cvq = &mvdev->cvq;
 	int err;
 	int i;
 
-	for (i = 0; i < 2 * mlx5_vdpa_max_qps(ndev->mvdev.max_vqs); i++) {
+	for (i = 0; i < 2 * mlx5_vdpa_max_qps(mvdev->max_vqs); i++) {
 		err = setup_vq(ndev, &ndev->vqs[i]);
 		if (err)
 			goto err_vq;
 	}
 
+	if (mvdev->actual_features & BIT_ULL(VIRTIO_NET_F_CTRL_VQ)) {
+		err = vringh_init_iotlb(&cvq->vring, mvdev->actual_features,
+					MLX5_CVQ_MAX_ENT, false,
+					(struct vring_desc *)(uintptr_t)cvq->desc_addr,
+					(struct vring_avail *)(uintptr_t)cvq->driver_addr,
+					(struct vring_used *)(uintptr_t)cvq->device_addr);
+		if (err)
+			goto err_vq;
+	}
+
 	return 0;
 
 err_vq:
@@ -1541,16 +1908,22 @@ static void teardown_virtqueues(struct mlx5_vdpa_net *ndev)
 	}
 }
 
-/* TODO: cross-endian support */
-static inline bool mlx5_vdpa_is_little_endian(struct mlx5_vdpa_dev *mvdev)
-{
-	return virtio_legacy_is_little_endian() ||
-		(mvdev->actual_features & BIT_ULL(VIRTIO_F_VERSION_1));
-}
-
-static __virtio16 cpu_to_mlx5vdpa16(struct mlx5_vdpa_dev *mvdev, u16 val)
+static void update_cvq_info(struct mlx5_vdpa_dev *mvdev)
 {
-	return __cpu_to_virtio16(mlx5_vdpa_is_little_endian(mvdev), val);
+	if (MLX5_FEATURE(mvdev, VIRTIO_NET_F_CTRL_VQ)) {
+		if (MLX5_FEATURE(mvdev, VIRTIO_NET_F_MQ)) {
+			/* MQ supported. CVQ index is right above the last data virtqueue's */
+			mvdev->max_idx = mvdev->max_vqs;
+		} else {
+			/* Only CVQ supportted. data virtqueues occupy indices 0 and 1.
+			 * CVQ gets index 2
+			 */
+			mvdev->max_idx = 2;
+		}
+	} else {
+		/* Two data virtqueues only: one for rx and one for tx */
+		mvdev->max_idx = 1;
+	}
 }
 
 static int mlx5_vdpa_set_features(struct vdpa_device *vdev, u64 features)
@@ -1568,6 +1941,7 @@ static int mlx5_vdpa_set_features(struct vdpa_device *vdev, u64 features)
 	ndev->mvdev.actual_features = features & ndev->mvdev.mlx_features;
 	ndev->config.mtu = cpu_to_mlx5vdpa16(mvdev, ndev->mtu);
 	ndev->config.status |= cpu_to_mlx5vdpa16(mvdev, VIRTIO_NET_S_LINK_UP);
+	update_cvq_info(mvdev);
 	return err;
 }
 
@@ -1605,15 +1979,14 @@ static u8 mlx5_vdpa_get_status(struct vdpa_device *vdev)
 static int save_channel_info(struct mlx5_vdpa_net *ndev, struct mlx5_vdpa_virtqueue *mvq)
 {
 	struct mlx5_vq_restore_info *ri = &mvq->ri;
-	struct mlx5_virtq_attr attr;
+	struct mlx5_virtq_attr attr = {};
 	int err;
 
-	if (!mvq->initialized)
-		return 0;
-
-	err = query_virtqueue(ndev, mvq, &attr);
-	if (err)
-		return err;
+	if (mvq->initialized) {
+		err = query_virtqueue(ndev, mvq, &attr);
+		if (err)
+			return err;
+	}
 
 	ri->avail_index = attr.available_index;
 	ri->used_index = attr.used_index;
@@ -1622,7 +1995,6 @@ static int save_channel_info(struct mlx5_vdpa_net *ndev, struct mlx5_vdpa_virtqu
 	ri->desc_addr = mvq->desc_addr;
 	ri->device_addr = mvq->device_addr;
 	ri->driver_addr = mvq->driver_addr;
-	ri->cb = mvq->event_cb;
 	ri->restore = true;
 	return 0;
 }
@@ -1667,12 +2039,12 @@ static void restore_channels_info(struct mlx5_vdpa_net *ndev)
 		mvq->desc_addr = ri->desc_addr;
 		mvq->device_addr = ri->device_addr;
 		mvq->driver_addr = ri->driver_addr;
-		mvq->event_cb = ri->cb;
 	}
 }
 
-static int mlx5_vdpa_change_map(struct mlx5_vdpa_net *ndev, struct vhost_iotlb *iotlb)
+static int mlx5_vdpa_change_map(struct mlx5_vdpa_dev *mvdev, struct vhost_iotlb *iotlb)
 {
+	struct mlx5_vdpa_net *ndev = to_mlx5_vdpa_ndev(mvdev);
 	int err;
 
 	suspend_vqs(ndev);
@@ -1681,58 +2053,59 @@ static int mlx5_vdpa_change_map(struct mlx5_vdpa_net *ndev, struct vhost_iotlb *
 		goto err_mr;
 
 	teardown_driver(ndev);
-	mlx5_vdpa_destroy_mr(&ndev->mvdev);
-	err = mlx5_vdpa_create_mr(&ndev->mvdev, iotlb);
+	mlx5_vdpa_destroy_mr(mvdev);
+	err = mlx5_vdpa_create_mr(mvdev, iotlb);
 	if (err)
 		goto err_mr;
 
-	if (!(ndev->mvdev.status & VIRTIO_CONFIG_S_DRIVER_OK))
+	if (!(mvdev->status & VIRTIO_CONFIG_S_DRIVER_OK))
 		return 0;
 
 	restore_channels_info(ndev);
-	err = setup_driver(ndev);
+	err = setup_driver(mvdev);
 	if (err)
 		goto err_setup;
 
 	return 0;
 
 err_setup:
-	mlx5_vdpa_destroy_mr(&ndev->mvdev);
+	mlx5_vdpa_destroy_mr(mvdev);
 err_mr:
 	return err;
 }
 
-static int setup_driver(struct mlx5_vdpa_net *ndev)
+static int setup_driver(struct mlx5_vdpa_dev *mvdev)
 {
+	struct mlx5_vdpa_net *ndev = to_mlx5_vdpa_ndev(mvdev);
 	int err;
 
 	mutex_lock(&ndev->reslock);
 	if (ndev->setup) {
-		mlx5_vdpa_warn(&ndev->mvdev, "setup driver called for already setup driver\n");
+		mlx5_vdpa_warn(mvdev, "setup driver called for already setup driver\n");
 		err = 0;
 		goto out;
 	}
-	err = setup_virtqueues(ndev);
+	err = setup_virtqueues(mvdev);
 	if (err) {
-		mlx5_vdpa_warn(&ndev->mvdev, "setup_virtqueues\n");
+		mlx5_vdpa_warn(mvdev, "setup_virtqueues\n");
 		goto out;
 	}
 
 	err = create_rqt(ndev);
 	if (err) {
-		mlx5_vdpa_warn(&ndev->mvdev, "create_rqt\n");
+		mlx5_vdpa_warn(mvdev, "create_rqt\n");
 		goto err_rqt;
 	}
 
 	err = create_tir(ndev);
 	if (err) {
-		mlx5_vdpa_warn(&ndev->mvdev, "create_tir\n");
+		mlx5_vdpa_warn(mvdev, "create_tir\n");
 		goto err_tir;
 	}
 
 	err = add_fwd_to_tir(ndev);
 	if (err) {
-		mlx5_vdpa_warn(&ndev->mvdev, "add_fwd_to_tir\n");
+		mlx5_vdpa_warn(mvdev, "add_fwd_to_tir\n");
 		goto err_fwd;
 	}
 	ndev->setup = true;
@@ -1781,24 +2154,10 @@ static void mlx5_vdpa_set_status(struct vdpa_device *vdev, u8 status)
 	int err;
 
 	print_status(mvdev, status, true);
-	if (!status) {
-		mlx5_vdpa_info(mvdev, "performing device reset\n");
-		teardown_driver(ndev);
-		clear_vqs_ready(ndev);
-		mlx5_vdpa_destroy_mr(&ndev->mvdev);
-		ndev->mvdev.status = 0;
-		ndev->mvdev.mlx_features = 0;
-		++mvdev->generation;
-		if (MLX5_CAP_GEN(mvdev->mdev, umem_uid_0)) {
-			if (mlx5_vdpa_create_mr(mvdev, NULL))
-				mlx5_vdpa_warn(mvdev, "create MR failed\n");
-		}
-		return;
-	}
 
 	if ((status ^ ndev->mvdev.status) & VIRTIO_CONFIG_S_DRIVER_OK) {
 		if (status & VIRTIO_CONFIG_S_DRIVER_OK) {
-			err = setup_driver(ndev);
+			err = setup_driver(mvdev);
 			if (err) {
 				mlx5_vdpa_warn(mvdev, "failed to setup driver\n");
 				goto err_setup;
@@ -1817,6 +2176,29 @@ err_setup:
 	ndev->mvdev.status |= VIRTIO_CONFIG_S_FAILED;
 }
 
+static int mlx5_vdpa_reset(struct vdpa_device *vdev)
+{
+	struct mlx5_vdpa_dev *mvdev = to_mvdev(vdev);
+	struct mlx5_vdpa_net *ndev = to_mlx5_vdpa_ndev(mvdev);
+
+	print_status(mvdev, 0, true);
+	mlx5_vdpa_info(mvdev, "performing device reset\n");
+	teardown_driver(ndev);
+	clear_vqs_ready(ndev);
+	mlx5_vdpa_destroy_mr(&ndev->mvdev);
+	ndev->mvdev.status = 0;
+	ndev->mvdev.mlx_features = 0;
+	memset(ndev->event_cbs, 0, sizeof(ndev->event_cbs));
+	ndev->mvdev.actual_features = 0;
+	++mvdev->generation;
+	if (MLX5_CAP_GEN(mvdev->mdev, umem_uid_0)) {
+		if (mlx5_vdpa_create_mr(mvdev, NULL))
+			mlx5_vdpa_warn(mvdev, "create MR failed\n");
+	}
+
+	return 0;
+}
+
 static size_t mlx5_vdpa_get_config_size(struct vdpa_device *vdev)
 {
 	return sizeof(struct virtio_net_config);
@@ -1848,7 +2230,6 @@ static u32 mlx5_vdpa_get_generation(struct vdpa_device *vdev)
 static int mlx5_vdpa_set_map(struct vdpa_device *vdev, struct vhost_iotlb *iotlb)
 {
 	struct mlx5_vdpa_dev *mvdev = to_mvdev(vdev);
-	struct mlx5_vdpa_net *ndev = to_mlx5_vdpa_ndev(mvdev);
 	bool change_map;
 	int err;
 
@@ -1859,7 +2240,7 @@ static int mlx5_vdpa_set_map(struct vdpa_device *vdev, struct vhost_iotlb *iotlb
 	}
 
 	if (change_map)
-		return mlx5_vdpa_change_map(ndev, iotlb);
+		return mlx5_vdpa_change_map(mvdev, iotlb);
 
 	return 0;
 }
@@ -1889,6 +2270,9 @@ static struct vdpa_notification_area mlx5_get_vq_notification(struct vdpa_device
 	struct mlx5_vdpa_net *ndev;
 	phys_addr_t addr;
 
+	if (!is_index_valid(mvdev, idx) || is_ctrl_vq_idx(mvdev, idx))
+		return ret;
+
 	/* If SF BAR size is smaller than PAGE_SIZE, do not use direct
 	 * notification to avoid the risk of mapping pages that contain BAR of more
 	 * than one SF
@@ -1928,6 +2312,7 @@ static const struct vdpa_config_ops mlx5_vdpa_ops = {
 	.get_vendor_id = mlx5_vdpa_get_vendor_id,
 	.get_status = mlx5_vdpa_get_status,
 	.set_status = mlx5_vdpa_set_status,
+	.reset = mlx5_vdpa_reset,
 	.get_config_size = mlx5_vdpa_get_config_size,
 	.get_config = mlx5_vdpa_get_config,
 	.set_config = mlx5_vdpa_set_config,
@@ -2040,7 +2425,7 @@ static int mlx5_vdpa_dev_add(struct vdpa_mgmt_dev *v_mdev, const char *name)
 	max_vqs = min_t(u32, max_vqs, MLX5_MAX_SUPPORTED_VQS);
 
 	ndev = vdpa_alloc_device(struct mlx5_vdpa_net, mvdev.vdev, mdev->device, &mlx5_vdpa_ops,
-				 name);
+				 name, false);
 	if (IS_ERR(ndev))
 		return PTR_ERR(ndev);
 
@@ -2063,8 +2448,11 @@ static int mlx5_vdpa_dev_add(struct vdpa_mgmt_dev *v_mdev, const char *name)
 		err = mlx5_mpfs_add_mac(pfmdev, config->mac);
 		if (err)
 			goto err_mtu;
+
+		ndev->mvdev.mlx_features |= BIT_ULL(VIRTIO_NET_F_MAC);
 	}
 
+	config->max_virtqueue_pairs = cpu_to_mlx5vdpa16(mvdev, mlx5_vdpa_max_qps(max_vqs));
 	mvdev->vdev.dma_dev = &mdev->pdev->dev;
 	err = mlx5_vdpa_alloc_resources(&ndev->mvdev);
 	if (err)
@@ -2080,8 +2468,15 @@ static int mlx5_vdpa_dev_add(struct vdpa_mgmt_dev *v_mdev, const char *name)
 	if (err)
 		goto err_mr;
 
+	mvdev->wq = create_singlethread_workqueue("mlx5_vdpa_ctrl_wq");
+	if (!mvdev->wq) {
+		err = -ENOMEM;
+		goto err_res2;
+	}
+
+	ndev->cur_num_vqs = 2 * mlx5_vdpa_max_qps(max_vqs);
 	mvdev->vdev.mdev = &mgtdev->mgtdev;
-	err = _vdpa_register_device(&mvdev->vdev, 2 * mlx5_vdpa_max_qps(max_vqs));
+	err = _vdpa_register_device(&mvdev->vdev, ndev->cur_num_vqs + 1);
 	if (err)
 		goto err_reg;
 
@@ -2089,6 +2484,8 @@ static int mlx5_vdpa_dev_add(struct vdpa_mgmt_dev *v_mdev, const char *name)
 	return 0;
 
 err_reg:
+	destroy_workqueue(mvdev->wq);
+err_res2:
 	free_resources(ndev);
 err_mr:
 	mlx5_vdpa_destroy_mr(mvdev);
@@ -2106,7 +2503,9 @@ err_mtu:
 static void mlx5_vdpa_dev_del(struct vdpa_mgmt_dev *v_mdev, struct vdpa_device *dev)
 {
 	struct mlx5_vdpa_mgmtdev *mgtdev = container_of(v_mdev, struct mlx5_vdpa_mgmtdev, mgtdev);
+	struct mlx5_vdpa_dev *mvdev = to_mvdev(dev);
 
+	destroy_workqueue(mvdev->wq);
 	_vdpa_unregister_device(dev);
 	mgtdev->ndev = NULL;
 }
diff --git a/drivers/vdpa/vdpa.c b/drivers/vdpa/vdpa.c
index 3fc4525fc05c..1dc121a07a93 100644
--- a/drivers/vdpa/vdpa.c
+++ b/drivers/vdpa/vdpa.c
@@ -69,6 +69,7 @@ static void vdpa_release_dev(struct device *d)
  * @config: the bus operations that is supported by this device
  * @size: size of the parent structure that contains private data
  * @name: name of the vdpa device; optional.
+ * @use_va: indicate whether virtual address must be used by this device
  *
  * Driver should use vdpa_alloc_device() wrapper macro instead of
  * using this directly.
@@ -78,7 +79,8 @@ static void vdpa_release_dev(struct device *d)
  */
 struct vdpa_device *__vdpa_alloc_device(struct device *parent,
 					const struct vdpa_config_ops *config,
-					size_t size, const char *name)
+					size_t size, const char *name,
+					bool use_va)
 {
 	struct vdpa_device *vdev;
 	int err = -EINVAL;
@@ -89,6 +91,10 @@ struct vdpa_device *__vdpa_alloc_device(struct device *parent,
 	if (!!config->dma_map != !!config->dma_unmap)
 		goto err;
 
+	/* It should only work for the device that use on-chip IOMMU */
+	if (use_va && !(config->dma_map || config->set_map))
+		goto err;
+
 	err = -ENOMEM;
 	vdev = kzalloc(size, GFP_KERNEL);
 	if (!vdev)
@@ -104,6 +110,7 @@ struct vdpa_device *__vdpa_alloc_device(struct device *parent,
 	vdev->index = err;
 	vdev->config = config;
 	vdev->features_valid = false;
+	vdev->use_va = use_va;
 
 	if (name)
 		err = dev_set_name(&vdev->dev, "%s", name);
diff --git a/drivers/vdpa/vdpa_sim/vdpa_sim.c b/drivers/vdpa/vdpa_sim/vdpa_sim.c
index c621cf7feec0..5f484fff8dbe 100644
--- a/drivers/vdpa/vdpa_sim/vdpa_sim.c
+++ b/drivers/vdpa/vdpa_sim/vdpa_sim.c
@@ -92,7 +92,7 @@ static void vdpasim_vq_reset(struct vdpasim *vdpasim,
 	vq->vring.notify = NULL;
 }
 
-static void vdpasim_reset(struct vdpasim *vdpasim)
+static void vdpasim_do_reset(struct vdpasim *vdpasim)
 {
 	int i;
 
@@ -137,7 +137,8 @@ static dma_addr_t vdpasim_map_range(struct vdpasim *vdpasim, phys_addr_t paddr,
 	int ret;
 
 	/* We set the limit_pfn to the maximum (ULONG_MAX - 1) */
-	iova = alloc_iova(&vdpasim->iova, size, ULONG_MAX - 1, true);
+	iova = alloc_iova(&vdpasim->iova, size >> iova_shift(&vdpasim->iova),
+			  ULONG_MAX - 1, true);
 	if (!iova)
 		return DMA_MAPPING_ERROR;
 
@@ -250,7 +251,7 @@ struct vdpasim *vdpasim_create(struct vdpasim_dev_attr *dev_attr)
 		ops = &vdpasim_config_ops;
 
 	vdpasim = vdpa_alloc_device(struct vdpasim, vdpa, NULL, ops,
-				    dev_attr->name);
+				    dev_attr->name, false);
 	if (IS_ERR(vdpasim)) {
 		ret = PTR_ERR(vdpasim);
 		goto err_alloc;
@@ -459,11 +460,21 @@ static void vdpasim_set_status(struct vdpa_device *vdpa, u8 status)
 
 	spin_lock(&vdpasim->lock);
 	vdpasim->status = status;
-	if (status == 0)
-		vdpasim_reset(vdpasim);
 	spin_unlock(&vdpasim->lock);
 }
 
+static int vdpasim_reset(struct vdpa_device *vdpa)
+{
+	struct vdpasim *vdpasim = vdpa_to_sim(vdpa);
+
+	spin_lock(&vdpasim->lock);
+	vdpasim->status = 0;
+	vdpasim_do_reset(vdpasim);
+	spin_unlock(&vdpasim->lock);
+
+	return 0;
+}
+
 static size_t vdpasim_get_config_size(struct vdpa_device *vdpa)
 {
 	struct vdpasim *vdpasim = vdpa_to_sim(vdpa);
@@ -544,14 +555,14 @@ err:
 }
 
 static int vdpasim_dma_map(struct vdpa_device *vdpa, u64 iova, u64 size,
-			   u64 pa, u32 perm)
+			   u64 pa, u32 perm, void *opaque)
 {
 	struct vdpasim *vdpasim = vdpa_to_sim(vdpa);
 	int ret;
 
 	spin_lock(&vdpasim->iommu_lock);
-	ret = vhost_iotlb_add_range(vdpasim->iommu, iova, iova + size - 1, pa,
-				    perm);
+	ret = vhost_iotlb_add_range_ctx(vdpasim->iommu, iova, iova + size - 1,
+					pa, perm, opaque);
 	spin_unlock(&vdpasim->iommu_lock);
 
 	return ret;
@@ -607,6 +618,7 @@ static const struct vdpa_config_ops vdpasim_config_ops = {
 	.get_vendor_id          = vdpasim_get_vendor_id,
 	.get_status             = vdpasim_get_status,
 	.set_status             = vdpasim_set_status,
+	.reset			= vdpasim_reset,
 	.get_config_size        = vdpasim_get_config_size,
 	.get_config             = vdpasim_get_config,
 	.set_config             = vdpasim_set_config,
@@ -635,6 +647,7 @@ static const struct vdpa_config_ops vdpasim_batch_config_ops = {
 	.get_vendor_id          = vdpasim_get_vendor_id,
 	.get_status             = vdpasim_get_status,
 	.set_status             = vdpasim_set_status,
+	.reset			= vdpasim_reset,
 	.get_config_size        = vdpasim_get_config_size,
 	.get_config             = vdpasim_get_config,
 	.set_config             = vdpasim_set_config,
diff --git a/drivers/vdpa/vdpa_user/Makefile b/drivers/vdpa/vdpa_user/Makefile
new file mode 100644
index 000000000000..260e0b26af99
--- /dev/null
+++ b/drivers/vdpa/vdpa_user/Makefile
@@ -0,0 +1,5 @@
+# SPDX-License-Identifier: GPL-2.0
+
+vduse-y := vduse_dev.o iova_domain.o
+
+obj-$(CONFIG_VDPA_USER) += vduse.o
diff --git a/drivers/vdpa/vdpa_user/iova_domain.c b/drivers/vdpa/vdpa_user/iova_domain.c
new file mode 100644
index 000000000000..1daae2608860
--- /dev/null
+++ b/drivers/vdpa/vdpa_user/iova_domain.c
@@ -0,0 +1,545 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * MMU-based software IOTLB.
+ *
+ * Copyright (C) 2020-2021 Bytedance Inc. and/or its affiliates. All rights reserved.
+ *
+ * Author: Xie Yongji <xieyongji@bytedance.com>
+ *
+ */
+
+#include <linux/slab.h>
+#include <linux/file.h>
+#include <linux/anon_inodes.h>
+#include <linux/highmem.h>
+#include <linux/vmalloc.h>
+#include <linux/vdpa.h>
+
+#include "iova_domain.h"
+
+static int vduse_iotlb_add_range(struct vduse_iova_domain *domain,
+				 u64 start, u64 last,
+				 u64 addr, unsigned int perm,
+				 struct file *file, u64 offset)
+{
+	struct vdpa_map_file *map_file;
+	int ret;
+
+	map_file = kmalloc(sizeof(*map_file), GFP_ATOMIC);
+	if (!map_file)
+		return -ENOMEM;
+
+	map_file->file = get_file(file);
+	map_file->offset = offset;
+
+	ret = vhost_iotlb_add_range_ctx(domain->iotlb, start, last,
+					addr, perm, map_file);
+	if (ret) {
+		fput(map_file->file);
+		kfree(map_file);
+		return ret;
+	}
+	return 0;
+}
+
+static void vduse_iotlb_del_range(struct vduse_iova_domain *domain,
+				  u64 start, u64 last)
+{
+	struct vdpa_map_file *map_file;
+	struct vhost_iotlb_map *map;
+
+	while ((map = vhost_iotlb_itree_first(domain->iotlb, start, last))) {
+		map_file = (struct vdpa_map_file *)map->opaque;
+		fput(map_file->file);
+		kfree(map_file);
+		vhost_iotlb_map_free(domain->iotlb, map);
+	}
+}
+
+int vduse_domain_set_map(struct vduse_iova_domain *domain,
+			 struct vhost_iotlb *iotlb)
+{
+	struct vdpa_map_file *map_file;
+	struct vhost_iotlb_map *map;
+	u64 start = 0ULL, last = ULLONG_MAX;
+	int ret;
+
+	spin_lock(&domain->iotlb_lock);
+	vduse_iotlb_del_range(domain, start, last);
+
+	for (map = vhost_iotlb_itree_first(iotlb, start, last); map;
+	     map = vhost_iotlb_itree_next(map, start, last)) {
+		map_file = (struct vdpa_map_file *)map->opaque;
+		ret = vduse_iotlb_add_range(domain, map->start, map->last,
+					    map->addr, map->perm,
+					    map_file->file,
+					    map_file->offset);
+		if (ret)
+			goto err;
+	}
+	spin_unlock(&domain->iotlb_lock);
+
+	return 0;
+err:
+	vduse_iotlb_del_range(domain, start, last);
+	spin_unlock(&domain->iotlb_lock);
+	return ret;
+}
+
+void vduse_domain_clear_map(struct vduse_iova_domain *domain,
+			    struct vhost_iotlb *iotlb)
+{
+	struct vhost_iotlb_map *map;
+	u64 start = 0ULL, last = ULLONG_MAX;
+
+	spin_lock(&domain->iotlb_lock);
+	for (map = vhost_iotlb_itree_first(iotlb, start, last); map;
+	     map = vhost_iotlb_itree_next(map, start, last)) {
+		vduse_iotlb_del_range(domain, map->start, map->last);
+	}
+	spin_unlock(&domain->iotlb_lock);
+}
+
+static int vduse_domain_map_bounce_page(struct vduse_iova_domain *domain,
+					 u64 iova, u64 size, u64 paddr)
+{
+	struct vduse_bounce_map *map;
+	u64 last = iova + size - 1;
+
+	while (iova <= last) {
+		map = &domain->bounce_maps[iova >> PAGE_SHIFT];
+		if (!map->bounce_page) {
+			map->bounce_page = alloc_page(GFP_ATOMIC);
+			if (!map->bounce_page)
+				return -ENOMEM;
+		}
+		map->orig_phys = paddr;
+		paddr += PAGE_SIZE;
+		iova += PAGE_SIZE;
+	}
+	return 0;
+}
+
+static void vduse_domain_unmap_bounce_page(struct vduse_iova_domain *domain,
+					   u64 iova, u64 size)
+{
+	struct vduse_bounce_map *map;
+	u64 last = iova + size - 1;
+
+	while (iova <= last) {
+		map = &domain->bounce_maps[iova >> PAGE_SHIFT];
+		map->orig_phys = INVALID_PHYS_ADDR;
+		iova += PAGE_SIZE;
+	}
+}
+
+static void do_bounce(phys_addr_t orig, void *addr, size_t size,
+		      enum dma_data_direction dir)
+{
+	unsigned long pfn = PFN_DOWN(orig);
+	unsigned int offset = offset_in_page(orig);
+	char *buffer;
+	unsigned int sz = 0;
+
+	while (size) {
+		sz = min_t(size_t, PAGE_SIZE - offset, size);
+
+		buffer = kmap_atomic(pfn_to_page(pfn));
+		if (dir == DMA_TO_DEVICE)
+			memcpy(addr, buffer + offset, sz);
+		else
+			memcpy(buffer + offset, addr, sz);
+		kunmap_atomic(buffer);
+
+		size -= sz;
+		pfn++;
+		addr += sz;
+		offset = 0;
+	}
+}
+
+static void vduse_domain_bounce(struct vduse_iova_domain *domain,
+				dma_addr_t iova, size_t size,
+				enum dma_data_direction dir)
+{
+	struct vduse_bounce_map *map;
+	unsigned int offset;
+	void *addr;
+	size_t sz;
+
+	if (iova >= domain->bounce_size)
+		return;
+
+	while (size) {
+		map = &domain->bounce_maps[iova >> PAGE_SHIFT];
+		offset = offset_in_page(iova);
+		sz = min_t(size_t, PAGE_SIZE - offset, size);
+
+		if (WARN_ON(!map->bounce_page ||
+			    map->orig_phys == INVALID_PHYS_ADDR))
+			return;
+
+		addr = page_address(map->bounce_page) + offset;
+		do_bounce(map->orig_phys + offset, addr, sz, dir);
+		size -= sz;
+		iova += sz;
+	}
+}
+
+static struct page *
+vduse_domain_get_coherent_page(struct vduse_iova_domain *domain, u64 iova)
+{
+	u64 start = iova & PAGE_MASK;
+	u64 last = start + PAGE_SIZE - 1;
+	struct vhost_iotlb_map *map;
+	struct page *page = NULL;
+
+	spin_lock(&domain->iotlb_lock);
+	map = vhost_iotlb_itree_first(domain->iotlb, start, last);
+	if (!map)
+		goto out;
+
+	page = pfn_to_page((map->addr + iova - map->start) >> PAGE_SHIFT);
+	get_page(page);
+out:
+	spin_unlock(&domain->iotlb_lock);
+
+	return page;
+}
+
+static struct page *
+vduse_domain_get_bounce_page(struct vduse_iova_domain *domain, u64 iova)
+{
+	struct vduse_bounce_map *map;
+	struct page *page = NULL;
+
+	spin_lock(&domain->iotlb_lock);
+	map = &domain->bounce_maps[iova >> PAGE_SHIFT];
+	if (!map->bounce_page)
+		goto out;
+
+	page = map->bounce_page;
+	get_page(page);
+out:
+	spin_unlock(&domain->iotlb_lock);
+
+	return page;
+}
+
+static void
+vduse_domain_free_bounce_pages(struct vduse_iova_domain *domain)
+{
+	struct vduse_bounce_map *map;
+	unsigned long pfn, bounce_pfns;
+
+	bounce_pfns = domain->bounce_size >> PAGE_SHIFT;
+
+	for (pfn = 0; pfn < bounce_pfns; pfn++) {
+		map = &domain->bounce_maps[pfn];
+		if (WARN_ON(map->orig_phys != INVALID_PHYS_ADDR))
+			continue;
+
+		if (!map->bounce_page)
+			continue;
+
+		__free_page(map->bounce_page);
+		map->bounce_page = NULL;
+	}
+}
+
+void vduse_domain_reset_bounce_map(struct vduse_iova_domain *domain)
+{
+	if (!domain->bounce_map)
+		return;
+
+	spin_lock(&domain->iotlb_lock);
+	if (!domain->bounce_map)
+		goto unlock;
+
+	vduse_iotlb_del_range(domain, 0, domain->bounce_size - 1);
+	domain->bounce_map = 0;
+unlock:
+	spin_unlock(&domain->iotlb_lock);
+}
+
+static int vduse_domain_init_bounce_map(struct vduse_iova_domain *domain)
+{
+	int ret = 0;
+
+	if (domain->bounce_map)
+		return 0;
+
+	spin_lock(&domain->iotlb_lock);
+	if (domain->bounce_map)
+		goto unlock;
+
+	ret = vduse_iotlb_add_range(domain, 0, domain->bounce_size - 1,
+				    0, VHOST_MAP_RW, domain->file, 0);
+	if (ret)
+		goto unlock;
+
+	domain->bounce_map = 1;
+unlock:
+	spin_unlock(&domain->iotlb_lock);
+	return ret;
+}
+
+static dma_addr_t
+vduse_domain_alloc_iova(struct iova_domain *iovad,
+			unsigned long size, unsigned long limit)
+{
+	unsigned long shift = iova_shift(iovad);
+	unsigned long iova_len = iova_align(iovad, size) >> shift;
+	unsigned long iova_pfn;
+
+	/*
+	 * Freeing non-power-of-two-sized allocations back into the IOVA caches
+	 * will come back to bite us badly, so we have to waste a bit of space
+	 * rounding up anything cacheable to make sure that can't happen. The
+	 * order of the unadjusted size will still match upon freeing.
+	 */
+	if (iova_len < (1 << (IOVA_RANGE_CACHE_MAX_SIZE - 1)))
+		iova_len = roundup_pow_of_two(iova_len);
+	iova_pfn = alloc_iova_fast(iovad, iova_len, limit >> shift, true);
+
+	return iova_pfn << shift;
+}
+
+static void vduse_domain_free_iova(struct iova_domain *iovad,
+				   dma_addr_t iova, size_t size)
+{
+	unsigned long shift = iova_shift(iovad);
+	unsigned long iova_len = iova_align(iovad, size) >> shift;
+
+	free_iova_fast(iovad, iova >> shift, iova_len);
+}
+
+dma_addr_t vduse_domain_map_page(struct vduse_iova_domain *domain,
+				 struct page *page, unsigned long offset,
+				 size_t size, enum dma_data_direction dir,
+				 unsigned long attrs)
+{
+	struct iova_domain *iovad = &domain->stream_iovad;
+	unsigned long limit = domain->bounce_size - 1;
+	phys_addr_t pa = page_to_phys(page) + offset;
+	dma_addr_t iova = vduse_domain_alloc_iova(iovad, size, limit);
+
+	if (!iova)
+		return DMA_MAPPING_ERROR;
+
+	if (vduse_domain_init_bounce_map(domain))
+		goto err;
+
+	if (vduse_domain_map_bounce_page(domain, (u64)iova, (u64)size, pa))
+		goto err;
+
+	if (dir == DMA_TO_DEVICE || dir == DMA_BIDIRECTIONAL)
+		vduse_domain_bounce(domain, iova, size, DMA_TO_DEVICE);
+
+	return iova;
+err:
+	vduse_domain_free_iova(iovad, iova, size);
+	return DMA_MAPPING_ERROR;
+}
+
+void vduse_domain_unmap_page(struct vduse_iova_domain *domain,
+			     dma_addr_t dma_addr, size_t size,
+			     enum dma_data_direction dir, unsigned long attrs)
+{
+	struct iova_domain *iovad = &domain->stream_iovad;
+
+	if (dir == DMA_FROM_DEVICE || dir == DMA_BIDIRECTIONAL)
+		vduse_domain_bounce(domain, dma_addr, size, DMA_FROM_DEVICE);
+
+	vduse_domain_unmap_bounce_page(domain, (u64)dma_addr, (u64)size);
+	vduse_domain_free_iova(iovad, dma_addr, size);
+}
+
+void *vduse_domain_alloc_coherent(struct vduse_iova_domain *domain,
+				  size_t size, dma_addr_t *dma_addr,
+				  gfp_t flag, unsigned long attrs)
+{
+	struct iova_domain *iovad = &domain->consistent_iovad;
+	unsigned long limit = domain->iova_limit;
+	dma_addr_t iova = vduse_domain_alloc_iova(iovad, size, limit);
+	void *orig = alloc_pages_exact(size, flag);
+
+	if (!iova || !orig)
+		goto err;
+
+	spin_lock(&domain->iotlb_lock);
+	if (vduse_iotlb_add_range(domain, (u64)iova, (u64)iova + size - 1,
+				  virt_to_phys(orig), VHOST_MAP_RW,
+				  domain->file, (u64)iova)) {
+		spin_unlock(&domain->iotlb_lock);
+		goto err;
+	}
+	spin_unlock(&domain->iotlb_lock);
+
+	*dma_addr = iova;
+
+	return orig;
+err:
+	*dma_addr = DMA_MAPPING_ERROR;
+	if (orig)
+		free_pages_exact(orig, size);
+	if (iova)
+		vduse_domain_free_iova(iovad, iova, size);
+
+	return NULL;
+}
+
+void vduse_domain_free_coherent(struct vduse_iova_domain *domain, size_t size,
+				void *vaddr, dma_addr_t dma_addr,
+				unsigned long attrs)
+{
+	struct iova_domain *iovad = &domain->consistent_iovad;
+	struct vhost_iotlb_map *map;
+	struct vdpa_map_file *map_file;
+	phys_addr_t pa;
+
+	spin_lock(&domain->iotlb_lock);
+	map = vhost_iotlb_itree_first(domain->iotlb, (u64)dma_addr,
+				      (u64)dma_addr + size - 1);
+	if (WARN_ON(!map)) {
+		spin_unlock(&domain->iotlb_lock);
+		return;
+	}
+	map_file = (struct vdpa_map_file *)map->opaque;
+	fput(map_file->file);
+	kfree(map_file);
+	pa = map->addr;
+	vhost_iotlb_map_free(domain->iotlb, map);
+	spin_unlock(&domain->iotlb_lock);
+
+	vduse_domain_free_iova(iovad, dma_addr, size);
+	free_pages_exact(phys_to_virt(pa), size);
+}
+
+static vm_fault_t vduse_domain_mmap_fault(struct vm_fault *vmf)
+{
+	struct vduse_iova_domain *domain = vmf->vma->vm_private_data;
+	unsigned long iova = vmf->pgoff << PAGE_SHIFT;
+	struct page *page;
+
+	if (!domain)
+		return VM_FAULT_SIGBUS;
+
+	if (iova < domain->bounce_size)
+		page = vduse_domain_get_bounce_page(domain, iova);
+	else
+		page = vduse_domain_get_coherent_page(domain, iova);
+
+	if (!page)
+		return VM_FAULT_SIGBUS;
+
+	vmf->page = page;
+
+	return 0;
+}
+
+static const struct vm_operations_struct vduse_domain_mmap_ops = {
+	.fault = vduse_domain_mmap_fault,
+};
+
+static int vduse_domain_mmap(struct file *file, struct vm_area_struct *vma)
+{
+	struct vduse_iova_domain *domain = file->private_data;
+
+	vma->vm_flags |= VM_DONTDUMP | VM_DONTEXPAND;
+	vma->vm_private_data = domain;
+	vma->vm_ops = &vduse_domain_mmap_ops;
+
+	return 0;
+}
+
+static int vduse_domain_release(struct inode *inode, struct file *file)
+{
+	struct vduse_iova_domain *domain = file->private_data;
+
+	spin_lock(&domain->iotlb_lock);
+	vduse_iotlb_del_range(domain, 0, ULLONG_MAX);
+	vduse_domain_free_bounce_pages(domain);
+	spin_unlock(&domain->iotlb_lock);
+	put_iova_domain(&domain->stream_iovad);
+	put_iova_domain(&domain->consistent_iovad);
+	vhost_iotlb_free(domain->iotlb);
+	vfree(domain->bounce_maps);
+	kfree(domain);
+
+	return 0;
+}
+
+static const struct file_operations vduse_domain_fops = {
+	.owner = THIS_MODULE,
+	.mmap = vduse_domain_mmap,
+	.release = vduse_domain_release,
+};
+
+void vduse_domain_destroy(struct vduse_iova_domain *domain)
+{
+	fput(domain->file);
+}
+
+struct vduse_iova_domain *
+vduse_domain_create(unsigned long iova_limit, size_t bounce_size)
+{
+	struct vduse_iova_domain *domain;
+	struct file *file;
+	struct vduse_bounce_map *map;
+	unsigned long pfn, bounce_pfns;
+
+	bounce_pfns = PAGE_ALIGN(bounce_size) >> PAGE_SHIFT;
+	if (iova_limit <= bounce_size)
+		return NULL;
+
+	domain = kzalloc(sizeof(*domain), GFP_KERNEL);
+	if (!domain)
+		return NULL;
+
+	domain->iotlb = vhost_iotlb_alloc(0, 0);
+	if (!domain->iotlb)
+		goto err_iotlb;
+
+	domain->iova_limit = iova_limit;
+	domain->bounce_size = PAGE_ALIGN(bounce_size);
+	domain->bounce_maps = vzalloc(bounce_pfns *
+				sizeof(struct vduse_bounce_map));
+	if (!domain->bounce_maps)
+		goto err_map;
+
+	for (pfn = 0; pfn < bounce_pfns; pfn++) {
+		map = &domain->bounce_maps[pfn];
+		map->orig_phys = INVALID_PHYS_ADDR;
+	}
+	file = anon_inode_getfile("[vduse-domain]", &vduse_domain_fops,
+				domain, O_RDWR);
+	if (IS_ERR(file))
+		goto err_file;
+
+	domain->file = file;
+	spin_lock_init(&domain->iotlb_lock);
+	init_iova_domain(&domain->stream_iovad,
+			PAGE_SIZE, IOVA_START_PFN);
+	init_iova_domain(&domain->consistent_iovad,
+			PAGE_SIZE, bounce_pfns);
+
+	return domain;
+err_file:
+	vfree(domain->bounce_maps);
+err_map:
+	vhost_iotlb_free(domain->iotlb);
+err_iotlb:
+	kfree(domain);
+	return NULL;
+}
+
+int vduse_domain_init(void)
+{
+	return iova_cache_get();
+}
+
+void vduse_domain_exit(void)
+{
+	iova_cache_put();
+}
diff --git a/drivers/vdpa/vdpa_user/iova_domain.h b/drivers/vdpa/vdpa_user/iova_domain.h
new file mode 100644
index 000000000000..2722d9b8e21a
--- /dev/null
+++ b/drivers/vdpa/vdpa_user/iova_domain.h
@@ -0,0 +1,73 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
+/*
+ * MMU-based software IOTLB.
+ *
+ * Copyright (C) 2020-2021 Bytedance Inc. and/or its affiliates. All rights reserved.
+ *
+ * Author: Xie Yongji <xieyongji@bytedance.com>
+ *
+ */
+
+#ifndef _VDUSE_IOVA_DOMAIN_H
+#define _VDUSE_IOVA_DOMAIN_H
+
+#include <linux/iova.h>
+#include <linux/dma-mapping.h>
+#include <linux/vhost_iotlb.h>
+
+#define IOVA_START_PFN 1
+
+#define INVALID_PHYS_ADDR (~(phys_addr_t)0)
+
+struct vduse_bounce_map {
+	struct page *bounce_page;
+	u64 orig_phys;
+};
+
+struct vduse_iova_domain {
+	struct iova_domain stream_iovad;
+	struct iova_domain consistent_iovad;
+	struct vduse_bounce_map *bounce_maps;
+	size_t bounce_size;
+	unsigned long iova_limit;
+	int bounce_map;
+	struct vhost_iotlb *iotlb;
+	spinlock_t iotlb_lock;
+	struct file *file;
+};
+
+int vduse_domain_set_map(struct vduse_iova_domain *domain,
+			 struct vhost_iotlb *iotlb);
+
+void vduse_domain_clear_map(struct vduse_iova_domain *domain,
+			    struct vhost_iotlb *iotlb);
+
+dma_addr_t vduse_domain_map_page(struct vduse_iova_domain *domain,
+				 struct page *page, unsigned long offset,
+				 size_t size, enum dma_data_direction dir,
+				 unsigned long attrs);
+
+void vduse_domain_unmap_page(struct vduse_iova_domain *domain,
+			     dma_addr_t dma_addr, size_t size,
+			     enum dma_data_direction dir, unsigned long attrs);
+
+void *vduse_domain_alloc_coherent(struct vduse_iova_domain *domain,
+				  size_t size, dma_addr_t *dma_addr,
+				  gfp_t flag, unsigned long attrs);
+
+void vduse_domain_free_coherent(struct vduse_iova_domain *domain, size_t size,
+				void *vaddr, dma_addr_t dma_addr,
+				unsigned long attrs);
+
+void vduse_domain_reset_bounce_map(struct vduse_iova_domain *domain);
+
+void vduse_domain_destroy(struct vduse_iova_domain *domain);
+
+struct vduse_iova_domain *vduse_domain_create(unsigned long iova_limit,
+					      size_t bounce_size);
+
+int vduse_domain_init(void);
+
+void vduse_domain_exit(void);
+
+#endif /* _VDUSE_IOVA_DOMAIN_H */
diff --git a/drivers/vdpa/vdpa_user/vduse_dev.c b/drivers/vdpa/vdpa_user/vduse_dev.c
new file mode 100644
index 000000000000..29a38ecba19e
--- /dev/null
+++ b/drivers/vdpa/vdpa_user/vduse_dev.c
@@ -0,0 +1,1641 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * VDUSE: vDPA Device in Userspace
+ *
+ * Copyright (C) 2020-2021 Bytedance Inc. and/or its affiliates. All rights reserved.
+ *
+ * Author: Xie Yongji <xieyongji@bytedance.com>
+ *
+ */
+
+#include <linux/init.h>
+#include <linux/module.h>
+#include <linux/cdev.h>
+#include <linux/device.h>
+#include <linux/eventfd.h>
+#include <linux/slab.h>
+#include <linux/wait.h>
+#include <linux/dma-map-ops.h>
+#include <linux/poll.h>
+#include <linux/file.h>
+#include <linux/uio.h>
+#include <linux/vdpa.h>
+#include <linux/nospec.h>
+#include <uapi/linux/vduse.h>
+#include <uapi/linux/vdpa.h>
+#include <uapi/linux/virtio_config.h>
+#include <uapi/linux/virtio_ids.h>
+#include <uapi/linux/virtio_blk.h>
+#include <linux/mod_devicetable.h>
+
+#include "iova_domain.h"
+
+#define DRV_AUTHOR   "Yongji Xie <xieyongji@bytedance.com>"
+#define DRV_DESC     "vDPA Device in Userspace"
+#define DRV_LICENSE  "GPL v2"
+
+#define VDUSE_DEV_MAX (1U << MINORBITS)
+#define VDUSE_BOUNCE_SIZE (64 * 1024 * 1024)
+#define VDUSE_IOVA_SIZE (128 * 1024 * 1024)
+#define VDUSE_MSG_DEFAULT_TIMEOUT 30
+
+struct vduse_virtqueue {
+	u16 index;
+	u16 num_max;
+	u32 num;
+	u64 desc_addr;
+	u64 driver_addr;
+	u64 device_addr;
+	struct vdpa_vq_state state;
+	bool ready;
+	bool kicked;
+	spinlock_t kick_lock;
+	spinlock_t irq_lock;
+	struct eventfd_ctx *kickfd;
+	struct vdpa_callback cb;
+	struct work_struct inject;
+	struct work_struct kick;
+};
+
+struct vduse_dev;
+
+struct vduse_vdpa {
+	struct vdpa_device vdpa;
+	struct vduse_dev *dev;
+};
+
+struct vduse_dev {
+	struct vduse_vdpa *vdev;
+	struct device *dev;
+	struct vduse_virtqueue *vqs;
+	struct vduse_iova_domain *domain;
+	char *name;
+	struct mutex lock;
+	spinlock_t msg_lock;
+	u64 msg_unique;
+	u32 msg_timeout;
+	wait_queue_head_t waitq;
+	struct list_head send_list;
+	struct list_head recv_list;
+	struct vdpa_callback config_cb;
+	struct work_struct inject;
+	spinlock_t irq_lock;
+	int minor;
+	bool broken;
+	bool connected;
+	u64 api_version;
+	u64 device_features;
+	u64 driver_features;
+	u32 device_id;
+	u32 vendor_id;
+	u32 generation;
+	u32 config_size;
+	void *config;
+	u8 status;
+	u32 vq_num;
+	u32 vq_align;
+};
+
+struct vduse_dev_msg {
+	struct vduse_dev_request req;
+	struct vduse_dev_response resp;
+	struct list_head list;
+	wait_queue_head_t waitq;
+	bool completed;
+};
+
+struct vduse_control {
+	u64 api_version;
+};
+
+static DEFINE_MUTEX(vduse_lock);
+static DEFINE_IDR(vduse_idr);
+
+static dev_t vduse_major;
+static struct class *vduse_class;
+static struct cdev vduse_ctrl_cdev;
+static struct cdev vduse_cdev;
+static struct workqueue_struct *vduse_irq_wq;
+
+static u32 allowed_device_id[] = {
+	VIRTIO_ID_BLOCK,
+};
+
+static inline struct vduse_dev *vdpa_to_vduse(struct vdpa_device *vdpa)
+{
+	struct vduse_vdpa *vdev = container_of(vdpa, struct vduse_vdpa, vdpa);
+
+	return vdev->dev;
+}
+
+static inline struct vduse_dev *dev_to_vduse(struct device *dev)
+{
+	struct vdpa_device *vdpa = dev_to_vdpa(dev);
+
+	return vdpa_to_vduse(vdpa);
+}
+
+static struct vduse_dev_msg *vduse_find_msg(struct list_head *head,
+					    uint32_t request_id)
+{
+	struct vduse_dev_msg *msg;
+
+	list_for_each_entry(msg, head, list) {
+		if (msg->req.request_id == request_id) {
+			list_del(&msg->list);
+			return msg;
+		}
+	}
+
+	return NULL;
+}
+
+static struct vduse_dev_msg *vduse_dequeue_msg(struct list_head *head)
+{
+	struct vduse_dev_msg *msg = NULL;
+
+	if (!list_empty(head)) {
+		msg = list_first_entry(head, struct vduse_dev_msg, list);
+		list_del(&msg->list);
+	}
+
+	return msg;
+}
+
+static void vduse_enqueue_msg(struct list_head *head,
+			      struct vduse_dev_msg *msg)
+{
+	list_add_tail(&msg->list, head);
+}
+
+static void vduse_dev_broken(struct vduse_dev *dev)
+{
+	struct vduse_dev_msg *msg, *tmp;
+
+	if (unlikely(dev->broken))
+		return;
+
+	list_splice_init(&dev->recv_list, &dev->send_list);
+	list_for_each_entry_safe(msg, tmp, &dev->send_list, list) {
+		list_del(&msg->list);
+		msg->completed = 1;
+		msg->resp.result = VDUSE_REQ_RESULT_FAILED;
+		wake_up(&msg->waitq);
+	}
+	dev->broken = true;
+	wake_up(&dev->waitq);
+}
+
+static int vduse_dev_msg_sync(struct vduse_dev *dev,
+			      struct vduse_dev_msg *msg)
+{
+	int ret;
+
+	if (unlikely(dev->broken))
+		return -EIO;
+
+	init_waitqueue_head(&msg->waitq);
+	spin_lock(&dev->msg_lock);
+	if (unlikely(dev->broken)) {
+		spin_unlock(&dev->msg_lock);
+		return -EIO;
+	}
+	msg->req.request_id = dev->msg_unique++;
+	vduse_enqueue_msg(&dev->send_list, msg);
+	wake_up(&dev->waitq);
+	spin_unlock(&dev->msg_lock);
+	if (dev->msg_timeout)
+		ret = wait_event_killable_timeout(msg->waitq, msg->completed,
+						  (long)dev->msg_timeout * HZ);
+	else
+		ret = wait_event_killable(msg->waitq, msg->completed);
+
+	spin_lock(&dev->msg_lock);
+	if (!msg->completed) {
+		list_del(&msg->list);
+		msg->resp.result = VDUSE_REQ_RESULT_FAILED;
+		/* Mark the device as malfunction when there is a timeout */
+		if (!ret)
+			vduse_dev_broken(dev);
+	}
+	ret = (msg->resp.result == VDUSE_REQ_RESULT_OK) ? 0 : -EIO;
+	spin_unlock(&dev->msg_lock);
+
+	return ret;
+}
+
+static int vduse_dev_get_vq_state_packed(struct vduse_dev *dev,
+					 struct vduse_virtqueue *vq,
+					 struct vdpa_vq_state_packed *packed)
+{
+	struct vduse_dev_msg msg = { 0 };
+	int ret;
+
+	msg.req.type = VDUSE_GET_VQ_STATE;
+	msg.req.vq_state.index = vq->index;
+
+	ret = vduse_dev_msg_sync(dev, &msg);
+	if (ret)
+		return ret;
+
+	packed->last_avail_counter =
+			msg.resp.vq_state.packed.last_avail_counter & 0x0001;
+	packed->last_avail_idx =
+			msg.resp.vq_state.packed.last_avail_idx & 0x7FFF;
+	packed->last_used_counter =
+			msg.resp.vq_state.packed.last_used_counter & 0x0001;
+	packed->last_used_idx =
+			msg.resp.vq_state.packed.last_used_idx & 0x7FFF;
+
+	return 0;
+}
+
+static int vduse_dev_get_vq_state_split(struct vduse_dev *dev,
+					struct vduse_virtqueue *vq,
+					struct vdpa_vq_state_split *split)
+{
+	struct vduse_dev_msg msg = { 0 };
+	int ret;
+
+	msg.req.type = VDUSE_GET_VQ_STATE;
+	msg.req.vq_state.index = vq->index;
+
+	ret = vduse_dev_msg_sync(dev, &msg);
+	if (ret)
+		return ret;
+
+	split->avail_index = msg.resp.vq_state.split.avail_index;
+
+	return 0;
+}
+
+static int vduse_dev_set_status(struct vduse_dev *dev, u8 status)
+{
+	struct vduse_dev_msg msg = { 0 };
+
+	msg.req.type = VDUSE_SET_STATUS;
+	msg.req.s.status = status;
+
+	return vduse_dev_msg_sync(dev, &msg);
+}
+
+static int vduse_dev_update_iotlb(struct vduse_dev *dev,
+				  u64 start, u64 last)
+{
+	struct vduse_dev_msg msg = { 0 };
+
+	if (last < start)
+		return -EINVAL;
+
+	msg.req.type = VDUSE_UPDATE_IOTLB;
+	msg.req.iova.start = start;
+	msg.req.iova.last = last;
+
+	return vduse_dev_msg_sync(dev, &msg);
+}
+
+static ssize_t vduse_dev_read_iter(struct kiocb *iocb, struct iov_iter *to)
+{
+	struct file *file = iocb->ki_filp;
+	struct vduse_dev *dev = file->private_data;
+	struct vduse_dev_msg *msg;
+	int size = sizeof(struct vduse_dev_request);
+	ssize_t ret;
+
+	if (iov_iter_count(to) < size)
+		return -EINVAL;
+
+	spin_lock(&dev->msg_lock);
+	while (1) {
+		msg = vduse_dequeue_msg(&dev->send_list);
+		if (msg)
+			break;
+
+		ret = -EAGAIN;
+		if (file->f_flags & O_NONBLOCK)
+			goto unlock;
+
+		spin_unlock(&dev->msg_lock);
+		ret = wait_event_interruptible_exclusive(dev->waitq,
+					!list_empty(&dev->send_list));
+		if (ret)
+			return ret;
+
+		spin_lock(&dev->msg_lock);
+	}
+	spin_unlock(&dev->msg_lock);
+	ret = copy_to_iter(&msg->req, size, to);
+	spin_lock(&dev->msg_lock);
+	if (ret != size) {
+		ret = -EFAULT;
+		vduse_enqueue_msg(&dev->send_list, msg);
+		goto unlock;
+	}
+	vduse_enqueue_msg(&dev->recv_list, msg);
+unlock:
+	spin_unlock(&dev->msg_lock);
+
+	return ret;
+}
+
+static bool is_mem_zero(const char *ptr, int size)
+{
+	int i;
+
+	for (i = 0; i < size; i++) {
+		if (ptr[i])
+			return false;
+	}
+	return true;
+}
+
+static ssize_t vduse_dev_write_iter(struct kiocb *iocb, struct iov_iter *from)
+{
+	struct file *file = iocb->ki_filp;
+	struct vduse_dev *dev = file->private_data;
+	struct vduse_dev_response resp;
+	struct vduse_dev_msg *msg;
+	size_t ret;
+
+	ret = copy_from_iter(&resp, sizeof(resp), from);
+	if (ret != sizeof(resp))
+		return -EINVAL;
+
+	if (!is_mem_zero((const char *)resp.reserved, sizeof(resp.reserved)))
+		return -EINVAL;
+
+	spin_lock(&dev->msg_lock);
+	msg = vduse_find_msg(&dev->recv_list, resp.request_id);
+	if (!msg) {
+		ret = -ENOENT;
+		goto unlock;
+	}
+
+	memcpy(&msg->resp, &resp, sizeof(resp));
+	msg->completed = 1;
+	wake_up(&msg->waitq);
+unlock:
+	spin_unlock(&dev->msg_lock);
+
+	return ret;
+}
+
+static __poll_t vduse_dev_poll(struct file *file, poll_table *wait)
+{
+	struct vduse_dev *dev = file->private_data;
+	__poll_t mask = 0;
+
+	poll_wait(file, &dev->waitq, wait);
+
+	spin_lock(&dev->msg_lock);
+
+	if (unlikely(dev->broken))
+		mask |= EPOLLERR;
+	if (!list_empty(&dev->send_list))
+		mask |= EPOLLIN | EPOLLRDNORM;
+	if (!list_empty(&dev->recv_list))
+		mask |= EPOLLOUT | EPOLLWRNORM;
+
+	spin_unlock(&dev->msg_lock);
+
+	return mask;
+}
+
+static void vduse_dev_reset(struct vduse_dev *dev)
+{
+	int i;
+	struct vduse_iova_domain *domain = dev->domain;
+
+	/* The coherent mappings are handled in vduse_dev_free_coherent() */
+	if (domain->bounce_map)
+		vduse_domain_reset_bounce_map(domain);
+
+	dev->status = 0;
+	dev->driver_features = 0;
+	dev->generation++;
+	spin_lock(&dev->irq_lock);
+	dev->config_cb.callback = NULL;
+	dev->config_cb.private = NULL;
+	spin_unlock(&dev->irq_lock);
+	flush_work(&dev->inject);
+
+	for (i = 0; i < dev->vq_num; i++) {
+		struct vduse_virtqueue *vq = &dev->vqs[i];
+
+		vq->ready = false;
+		vq->desc_addr = 0;
+		vq->driver_addr = 0;
+		vq->device_addr = 0;
+		vq->num = 0;
+		memset(&vq->state, 0, sizeof(vq->state));
+
+		spin_lock(&vq->kick_lock);
+		vq->kicked = false;
+		if (vq->kickfd)
+			eventfd_ctx_put(vq->kickfd);
+		vq->kickfd = NULL;
+		spin_unlock(&vq->kick_lock);
+
+		spin_lock(&vq->irq_lock);
+		vq->cb.callback = NULL;
+		vq->cb.private = NULL;
+		spin_unlock(&vq->irq_lock);
+		flush_work(&vq->inject);
+		flush_work(&vq->kick);
+	}
+}
+
+static int vduse_vdpa_set_vq_address(struct vdpa_device *vdpa, u16 idx,
+				u64 desc_area, u64 driver_area,
+				u64 device_area)
+{
+	struct vduse_dev *dev = vdpa_to_vduse(vdpa);
+	struct vduse_virtqueue *vq = &dev->vqs[idx];
+
+	vq->desc_addr = desc_area;
+	vq->driver_addr = driver_area;
+	vq->device_addr = device_area;
+
+	return 0;
+}
+
+static void vduse_vq_kick(struct vduse_virtqueue *vq)
+{
+	spin_lock(&vq->kick_lock);
+	if (!vq->ready)
+		goto unlock;
+
+	if (vq->kickfd)
+		eventfd_signal(vq->kickfd, 1);
+	else
+		vq->kicked = true;
+unlock:
+	spin_unlock(&vq->kick_lock);
+}
+
+static void vduse_vq_kick_work(struct work_struct *work)
+{
+	struct vduse_virtqueue *vq = container_of(work,
+					struct vduse_virtqueue, kick);
+
+	vduse_vq_kick(vq);
+}
+
+static void vduse_vdpa_kick_vq(struct vdpa_device *vdpa, u16 idx)
+{
+	struct vduse_dev *dev = vdpa_to_vduse(vdpa);
+	struct vduse_virtqueue *vq = &dev->vqs[idx];
+
+	if (!eventfd_signal_allowed()) {
+		schedule_work(&vq->kick);
+		return;
+	}
+	vduse_vq_kick(vq);
+}
+
+static void vduse_vdpa_set_vq_cb(struct vdpa_device *vdpa, u16 idx,
+			      struct vdpa_callback *cb)
+{
+	struct vduse_dev *dev = vdpa_to_vduse(vdpa);
+	struct vduse_virtqueue *vq = &dev->vqs[idx];
+
+	spin_lock(&vq->irq_lock);
+	vq->cb.callback = cb->callback;
+	vq->cb.private = cb->private;
+	spin_unlock(&vq->irq_lock);
+}
+
+static void vduse_vdpa_set_vq_num(struct vdpa_device *vdpa, u16 idx, u32 num)
+{
+	struct vduse_dev *dev = vdpa_to_vduse(vdpa);
+	struct vduse_virtqueue *vq = &dev->vqs[idx];
+
+	vq->num = num;
+}
+
+static void vduse_vdpa_set_vq_ready(struct vdpa_device *vdpa,
+					u16 idx, bool ready)
+{
+	struct vduse_dev *dev = vdpa_to_vduse(vdpa);
+	struct vduse_virtqueue *vq = &dev->vqs[idx];
+
+	vq->ready = ready;
+}
+
+static bool vduse_vdpa_get_vq_ready(struct vdpa_device *vdpa, u16 idx)
+{
+	struct vduse_dev *dev = vdpa_to_vduse(vdpa);
+	struct vduse_virtqueue *vq = &dev->vqs[idx];
+
+	return vq->ready;
+}
+
+static int vduse_vdpa_set_vq_state(struct vdpa_device *vdpa, u16 idx,
+				const struct vdpa_vq_state *state)
+{
+	struct vduse_dev *dev = vdpa_to_vduse(vdpa);
+	struct vduse_virtqueue *vq = &dev->vqs[idx];
+
+	if (dev->driver_features & BIT_ULL(VIRTIO_F_RING_PACKED)) {
+		vq->state.packed.last_avail_counter =
+				state->packed.last_avail_counter;
+		vq->state.packed.last_avail_idx = state->packed.last_avail_idx;
+		vq->state.packed.last_used_counter =
+				state->packed.last_used_counter;
+		vq->state.packed.last_used_idx = state->packed.last_used_idx;
+	} else
+		vq->state.split.avail_index = state->split.avail_index;
+
+	return 0;
+}
+
+static int vduse_vdpa_get_vq_state(struct vdpa_device *vdpa, u16 idx,
+				struct vdpa_vq_state *state)
+{
+	struct vduse_dev *dev = vdpa_to_vduse(vdpa);
+	struct vduse_virtqueue *vq = &dev->vqs[idx];
+
+	if (dev->driver_features & BIT_ULL(VIRTIO_F_RING_PACKED))
+		return vduse_dev_get_vq_state_packed(dev, vq, &state->packed);
+
+	return vduse_dev_get_vq_state_split(dev, vq, &state->split);
+}
+
+static u32 vduse_vdpa_get_vq_align(struct vdpa_device *vdpa)
+{
+	struct vduse_dev *dev = vdpa_to_vduse(vdpa);
+
+	return dev->vq_align;
+}
+
+static u64 vduse_vdpa_get_features(struct vdpa_device *vdpa)
+{
+	struct vduse_dev *dev = vdpa_to_vduse(vdpa);
+
+	return dev->device_features;
+}
+
+static int vduse_vdpa_set_features(struct vdpa_device *vdpa, u64 features)
+{
+	struct vduse_dev *dev = vdpa_to_vduse(vdpa);
+
+	dev->driver_features = features;
+	return 0;
+}
+
+static void vduse_vdpa_set_config_cb(struct vdpa_device *vdpa,
+				  struct vdpa_callback *cb)
+{
+	struct vduse_dev *dev = vdpa_to_vduse(vdpa);
+
+	spin_lock(&dev->irq_lock);
+	dev->config_cb.callback = cb->callback;
+	dev->config_cb.private = cb->private;
+	spin_unlock(&dev->irq_lock);
+}
+
+static u16 vduse_vdpa_get_vq_num_max(struct vdpa_device *vdpa)
+{
+	struct vduse_dev *dev = vdpa_to_vduse(vdpa);
+	u16 num_max = 0;
+	int i;
+
+	for (i = 0; i < dev->vq_num; i++)
+		if (num_max < dev->vqs[i].num_max)
+			num_max = dev->vqs[i].num_max;
+
+	return num_max;
+}
+
+static u32 vduse_vdpa_get_device_id(struct vdpa_device *vdpa)
+{
+	struct vduse_dev *dev = vdpa_to_vduse(vdpa);
+
+	return dev->device_id;
+}
+
+static u32 vduse_vdpa_get_vendor_id(struct vdpa_device *vdpa)
+{
+	struct vduse_dev *dev = vdpa_to_vduse(vdpa);
+
+	return dev->vendor_id;
+}
+
+static u8 vduse_vdpa_get_status(struct vdpa_device *vdpa)
+{
+	struct vduse_dev *dev = vdpa_to_vduse(vdpa);
+
+	return dev->status;
+}
+
+static void vduse_vdpa_set_status(struct vdpa_device *vdpa, u8 status)
+{
+	struct vduse_dev *dev = vdpa_to_vduse(vdpa);
+
+	if (vduse_dev_set_status(dev, status))
+		return;
+
+	dev->status = status;
+}
+
+static size_t vduse_vdpa_get_config_size(struct vdpa_device *vdpa)
+{
+	struct vduse_dev *dev = vdpa_to_vduse(vdpa);
+
+	return dev->config_size;
+}
+
+static void vduse_vdpa_get_config(struct vdpa_device *vdpa, unsigned int offset,
+				  void *buf, unsigned int len)
+{
+	struct vduse_dev *dev = vdpa_to_vduse(vdpa);
+
+	if (len > dev->config_size - offset)
+		return;
+
+	memcpy(buf, dev->config + offset, len);
+}
+
+static void vduse_vdpa_set_config(struct vdpa_device *vdpa, unsigned int offset,
+			const void *buf, unsigned int len)
+{
+	/* Now we only support read-only configuration space */
+}
+
+static int vduse_vdpa_reset(struct vdpa_device *vdpa)
+{
+	struct vduse_dev *dev = vdpa_to_vduse(vdpa);
+
+	if (vduse_dev_set_status(dev, 0))
+		return -EIO;
+
+	vduse_dev_reset(dev);
+
+	return 0;
+}
+
+static u32 vduse_vdpa_get_generation(struct vdpa_device *vdpa)
+{
+	struct vduse_dev *dev = vdpa_to_vduse(vdpa);
+
+	return dev->generation;
+}
+
+static int vduse_vdpa_set_map(struct vdpa_device *vdpa,
+				struct vhost_iotlb *iotlb)
+{
+	struct vduse_dev *dev = vdpa_to_vduse(vdpa);
+	int ret;
+
+	ret = vduse_domain_set_map(dev->domain, iotlb);
+	if (ret)
+		return ret;
+
+	ret = vduse_dev_update_iotlb(dev, 0ULL, ULLONG_MAX);
+	if (ret) {
+		vduse_domain_clear_map(dev->domain, iotlb);
+		return ret;
+	}
+
+	return 0;
+}
+
+static void vduse_vdpa_free(struct vdpa_device *vdpa)
+{
+	struct vduse_dev *dev = vdpa_to_vduse(vdpa);
+
+	dev->vdev = NULL;
+}
+
+static const struct vdpa_config_ops vduse_vdpa_config_ops = {
+	.set_vq_address		= vduse_vdpa_set_vq_address,
+	.kick_vq		= vduse_vdpa_kick_vq,
+	.set_vq_cb		= vduse_vdpa_set_vq_cb,
+	.set_vq_num             = vduse_vdpa_set_vq_num,
+	.set_vq_ready		= vduse_vdpa_set_vq_ready,
+	.get_vq_ready		= vduse_vdpa_get_vq_ready,
+	.set_vq_state		= vduse_vdpa_set_vq_state,
+	.get_vq_state		= vduse_vdpa_get_vq_state,
+	.get_vq_align		= vduse_vdpa_get_vq_align,
+	.get_features		= vduse_vdpa_get_features,
+	.set_features		= vduse_vdpa_set_features,
+	.set_config_cb		= vduse_vdpa_set_config_cb,
+	.get_vq_num_max		= vduse_vdpa_get_vq_num_max,
+	.get_device_id		= vduse_vdpa_get_device_id,
+	.get_vendor_id		= vduse_vdpa_get_vendor_id,
+	.get_status		= vduse_vdpa_get_status,
+	.set_status		= vduse_vdpa_set_status,
+	.get_config_size	= vduse_vdpa_get_config_size,
+	.get_config		= vduse_vdpa_get_config,
+	.set_config		= vduse_vdpa_set_config,
+	.get_generation		= vduse_vdpa_get_generation,
+	.reset			= vduse_vdpa_reset,
+	.set_map		= vduse_vdpa_set_map,
+	.free			= vduse_vdpa_free,
+};
+
+static dma_addr_t vduse_dev_map_page(struct device *dev, struct page *page,
+				     unsigned long offset, size_t size,
+				     enum dma_data_direction dir,
+				     unsigned long attrs)
+{
+	struct vduse_dev *vdev = dev_to_vduse(dev);
+	struct vduse_iova_domain *domain = vdev->domain;
+
+	return vduse_domain_map_page(domain, page, offset, size, dir, attrs);
+}
+
+static void vduse_dev_unmap_page(struct device *dev, dma_addr_t dma_addr,
+				size_t size, enum dma_data_direction dir,
+				unsigned long attrs)
+{
+	struct vduse_dev *vdev = dev_to_vduse(dev);
+	struct vduse_iova_domain *domain = vdev->domain;
+
+	return vduse_domain_unmap_page(domain, dma_addr, size, dir, attrs);
+}
+
+static void *vduse_dev_alloc_coherent(struct device *dev, size_t size,
+					dma_addr_t *dma_addr, gfp_t flag,
+					unsigned long attrs)
+{
+	struct vduse_dev *vdev = dev_to_vduse(dev);
+	struct vduse_iova_domain *domain = vdev->domain;
+	unsigned long iova;
+	void *addr;
+
+	*dma_addr = DMA_MAPPING_ERROR;
+	addr = vduse_domain_alloc_coherent(domain, size,
+				(dma_addr_t *)&iova, flag, attrs);
+	if (!addr)
+		return NULL;
+
+	*dma_addr = (dma_addr_t)iova;
+
+	return addr;
+}
+
+static void vduse_dev_free_coherent(struct device *dev, size_t size,
+					void *vaddr, dma_addr_t dma_addr,
+					unsigned long attrs)
+{
+	struct vduse_dev *vdev = dev_to_vduse(dev);
+	struct vduse_iova_domain *domain = vdev->domain;
+
+	vduse_domain_free_coherent(domain, size, vaddr, dma_addr, attrs);
+}
+
+static size_t vduse_dev_max_mapping_size(struct device *dev)
+{
+	struct vduse_dev *vdev = dev_to_vduse(dev);
+	struct vduse_iova_domain *domain = vdev->domain;
+
+	return domain->bounce_size;
+}
+
+static const struct dma_map_ops vduse_dev_dma_ops = {
+	.map_page = vduse_dev_map_page,
+	.unmap_page = vduse_dev_unmap_page,
+	.alloc = vduse_dev_alloc_coherent,
+	.free = vduse_dev_free_coherent,
+	.max_mapping_size = vduse_dev_max_mapping_size,
+};
+
+static unsigned int perm_to_file_flags(u8 perm)
+{
+	unsigned int flags = 0;
+
+	switch (perm) {
+	case VDUSE_ACCESS_WO:
+		flags |= O_WRONLY;
+		break;
+	case VDUSE_ACCESS_RO:
+		flags |= O_RDONLY;
+		break;
+	case VDUSE_ACCESS_RW:
+		flags |= O_RDWR;
+		break;
+	default:
+		WARN(1, "invalidate vhost IOTLB permission\n");
+		break;
+	}
+
+	return flags;
+}
+
+static int vduse_kickfd_setup(struct vduse_dev *dev,
+			struct vduse_vq_eventfd *eventfd)
+{
+	struct eventfd_ctx *ctx = NULL;
+	struct vduse_virtqueue *vq;
+	u32 index;
+
+	if (eventfd->index >= dev->vq_num)
+		return -EINVAL;
+
+	index = array_index_nospec(eventfd->index, dev->vq_num);
+	vq = &dev->vqs[index];
+	if (eventfd->fd >= 0) {
+		ctx = eventfd_ctx_fdget(eventfd->fd);
+		if (IS_ERR(ctx))
+			return PTR_ERR(ctx);
+	} else if (eventfd->fd != VDUSE_EVENTFD_DEASSIGN)
+		return 0;
+
+	spin_lock(&vq->kick_lock);
+	if (vq->kickfd)
+		eventfd_ctx_put(vq->kickfd);
+	vq->kickfd = ctx;
+	if (vq->ready && vq->kicked && vq->kickfd) {
+		eventfd_signal(vq->kickfd, 1);
+		vq->kicked = false;
+	}
+	spin_unlock(&vq->kick_lock);
+
+	return 0;
+}
+
+static bool vduse_dev_is_ready(struct vduse_dev *dev)
+{
+	int i;
+
+	for (i = 0; i < dev->vq_num; i++)
+		if (!dev->vqs[i].num_max)
+			return false;
+
+	return true;
+}
+
+static void vduse_dev_irq_inject(struct work_struct *work)
+{
+	struct vduse_dev *dev = container_of(work, struct vduse_dev, inject);
+
+	spin_lock_irq(&dev->irq_lock);
+	if (dev->config_cb.callback)
+		dev->config_cb.callback(dev->config_cb.private);
+	spin_unlock_irq(&dev->irq_lock);
+}
+
+static void vduse_vq_irq_inject(struct work_struct *work)
+{
+	struct vduse_virtqueue *vq = container_of(work,
+					struct vduse_virtqueue, inject);
+
+	spin_lock_irq(&vq->irq_lock);
+	if (vq->ready && vq->cb.callback)
+		vq->cb.callback(vq->cb.private);
+	spin_unlock_irq(&vq->irq_lock);
+}
+
+static long vduse_dev_ioctl(struct file *file, unsigned int cmd,
+			    unsigned long arg)
+{
+	struct vduse_dev *dev = file->private_data;
+	void __user *argp = (void __user *)arg;
+	int ret;
+
+	if (unlikely(dev->broken))
+		return -EPERM;
+
+	switch (cmd) {
+	case VDUSE_IOTLB_GET_FD: {
+		struct vduse_iotlb_entry entry;
+		struct vhost_iotlb_map *map;
+		struct vdpa_map_file *map_file;
+		struct vduse_iova_domain *domain = dev->domain;
+		struct file *f = NULL;
+
+		ret = -EFAULT;
+		if (copy_from_user(&entry, argp, sizeof(entry)))
+			break;
+
+		ret = -EINVAL;
+		if (entry.start > entry.last)
+			break;
+
+		spin_lock(&domain->iotlb_lock);
+		map = vhost_iotlb_itree_first(domain->iotlb,
+					      entry.start, entry.last);
+		if (map) {
+			map_file = (struct vdpa_map_file *)map->opaque;
+			f = get_file(map_file->file);
+			entry.offset = map_file->offset;
+			entry.start = map->start;
+			entry.last = map->last;
+			entry.perm = map->perm;
+		}
+		spin_unlock(&domain->iotlb_lock);
+		ret = -EINVAL;
+		if (!f)
+			break;
+
+		ret = -EFAULT;
+		if (copy_to_user(argp, &entry, sizeof(entry))) {
+			fput(f);
+			break;
+		}
+		ret = receive_fd(f, perm_to_file_flags(entry.perm));
+		fput(f);
+		break;
+	}
+	case VDUSE_DEV_GET_FEATURES:
+		/*
+		 * Just mirror what driver wrote here.
+		 * The driver is expected to check FEATURE_OK later.
+		 */
+		ret = put_user(dev->driver_features, (u64 __user *)argp);
+		break;
+	case VDUSE_DEV_SET_CONFIG: {
+		struct vduse_config_data config;
+		unsigned long size = offsetof(struct vduse_config_data,
+					      buffer);
+
+		ret = -EFAULT;
+		if (copy_from_user(&config, argp, size))
+			break;
+
+		ret = -EINVAL;
+		if (config.length == 0 ||
+		    config.length > dev->config_size - config.offset)
+			break;
+
+		ret = -EFAULT;
+		if (copy_from_user(dev->config + config.offset, argp + size,
+				   config.length))
+			break;
+
+		ret = 0;
+		break;
+	}
+	case VDUSE_DEV_INJECT_CONFIG_IRQ:
+		ret = 0;
+		queue_work(vduse_irq_wq, &dev->inject);
+		break;
+	case VDUSE_VQ_SETUP: {
+		struct vduse_vq_config config;
+		u32 index;
+
+		ret = -EFAULT;
+		if (copy_from_user(&config, argp, sizeof(config)))
+			break;
+
+		ret = -EINVAL;
+		if (config.index >= dev->vq_num)
+			break;
+
+		if (!is_mem_zero((const char *)config.reserved,
+				 sizeof(config.reserved)))
+			break;
+
+		index = array_index_nospec(config.index, dev->vq_num);
+		dev->vqs[index].num_max = config.max_size;
+		ret = 0;
+		break;
+	}
+	case VDUSE_VQ_GET_INFO: {
+		struct vduse_vq_info vq_info;
+		struct vduse_virtqueue *vq;
+		u32 index;
+
+		ret = -EFAULT;
+		if (copy_from_user(&vq_info, argp, sizeof(vq_info)))
+			break;
+
+		ret = -EINVAL;
+		if (vq_info.index >= dev->vq_num)
+			break;
+
+		index = array_index_nospec(vq_info.index, dev->vq_num);
+		vq = &dev->vqs[index];
+		vq_info.desc_addr = vq->desc_addr;
+		vq_info.driver_addr = vq->driver_addr;
+		vq_info.device_addr = vq->device_addr;
+		vq_info.num = vq->num;
+
+		if (dev->driver_features & BIT_ULL(VIRTIO_F_RING_PACKED)) {
+			vq_info.packed.last_avail_counter =
+				vq->state.packed.last_avail_counter;
+			vq_info.packed.last_avail_idx =
+				vq->state.packed.last_avail_idx;
+			vq_info.packed.last_used_counter =
+				vq->state.packed.last_used_counter;
+			vq_info.packed.last_used_idx =
+				vq->state.packed.last_used_idx;
+		} else
+			vq_info.split.avail_index =
+				vq->state.split.avail_index;
+
+		vq_info.ready = vq->ready;
+
+		ret = -EFAULT;
+		if (copy_to_user(argp, &vq_info, sizeof(vq_info)))
+			break;
+
+		ret = 0;
+		break;
+	}
+	case VDUSE_VQ_SETUP_KICKFD: {
+		struct vduse_vq_eventfd eventfd;
+
+		ret = -EFAULT;
+		if (copy_from_user(&eventfd, argp, sizeof(eventfd)))
+			break;
+
+		ret = vduse_kickfd_setup(dev, &eventfd);
+		break;
+	}
+	case VDUSE_VQ_INJECT_IRQ: {
+		u32 index;
+
+		ret = -EFAULT;
+		if (get_user(index, (u32 __user *)argp))
+			break;
+
+		ret = -EINVAL;
+		if (index >= dev->vq_num)
+			break;
+
+		ret = 0;
+		index = array_index_nospec(index, dev->vq_num);
+		queue_work(vduse_irq_wq, &dev->vqs[index].inject);
+		break;
+	}
+	default:
+		ret = -ENOIOCTLCMD;
+		break;
+	}
+
+	return ret;
+}
+
+static int vduse_dev_release(struct inode *inode, struct file *file)
+{
+	struct vduse_dev *dev = file->private_data;
+
+	spin_lock(&dev->msg_lock);
+	/* Make sure the inflight messages can processed after reconncection */
+	list_splice_init(&dev->recv_list, &dev->send_list);
+	spin_unlock(&dev->msg_lock);
+	dev->connected = false;
+
+	return 0;
+}
+
+static struct vduse_dev *vduse_dev_get_from_minor(int minor)
+{
+	struct vduse_dev *dev;
+
+	mutex_lock(&vduse_lock);
+	dev = idr_find(&vduse_idr, minor);
+	mutex_unlock(&vduse_lock);
+
+	return dev;
+}
+
+static int vduse_dev_open(struct inode *inode, struct file *file)
+{
+	int ret;
+	struct vduse_dev *dev = vduse_dev_get_from_minor(iminor(inode));
+
+	if (!dev)
+		return -ENODEV;
+
+	ret = -EBUSY;
+	mutex_lock(&dev->lock);
+	if (dev->connected)
+		goto unlock;
+
+	ret = 0;
+	dev->connected = true;
+	file->private_data = dev;
+unlock:
+	mutex_unlock(&dev->lock);
+
+	return ret;
+}
+
+static const struct file_operations vduse_dev_fops = {
+	.owner		= THIS_MODULE,
+	.open		= vduse_dev_open,
+	.release	= vduse_dev_release,
+	.read_iter	= vduse_dev_read_iter,
+	.write_iter	= vduse_dev_write_iter,
+	.poll		= vduse_dev_poll,
+	.unlocked_ioctl	= vduse_dev_ioctl,
+	.compat_ioctl	= compat_ptr_ioctl,
+	.llseek		= noop_llseek,
+};
+
+static struct vduse_dev *vduse_dev_create(void)
+{
+	struct vduse_dev *dev = kzalloc(sizeof(*dev), GFP_KERNEL);
+
+	if (!dev)
+		return NULL;
+
+	mutex_init(&dev->lock);
+	spin_lock_init(&dev->msg_lock);
+	INIT_LIST_HEAD(&dev->send_list);
+	INIT_LIST_HEAD(&dev->recv_list);
+	spin_lock_init(&dev->irq_lock);
+
+	INIT_WORK(&dev->inject, vduse_dev_irq_inject);
+	init_waitqueue_head(&dev->waitq);
+
+	return dev;
+}
+
+static void vduse_dev_destroy(struct vduse_dev *dev)
+{
+	kfree(dev);
+}
+
+static struct vduse_dev *vduse_find_dev(const char *name)
+{
+	struct vduse_dev *dev;
+	int id;
+
+	idr_for_each_entry(&vduse_idr, dev, id)
+		if (!strcmp(dev->name, name))
+			return dev;
+
+	return NULL;
+}
+
+static int vduse_destroy_dev(char *name)
+{
+	struct vduse_dev *dev = vduse_find_dev(name);
+
+	if (!dev)
+		return -EINVAL;
+
+	mutex_lock(&dev->lock);
+	if (dev->vdev || dev->connected) {
+		mutex_unlock(&dev->lock);
+		return -EBUSY;
+	}
+	dev->connected = true;
+	mutex_unlock(&dev->lock);
+
+	vduse_dev_reset(dev);
+	device_destroy(vduse_class, MKDEV(MAJOR(vduse_major), dev->minor));
+	idr_remove(&vduse_idr, dev->minor);
+	kvfree(dev->config);
+	kfree(dev->vqs);
+	vduse_domain_destroy(dev->domain);
+	kfree(dev->name);
+	vduse_dev_destroy(dev);
+	module_put(THIS_MODULE);
+
+	return 0;
+}
+
+static bool device_is_allowed(u32 device_id)
+{
+	int i;
+
+	for (i = 0; i < ARRAY_SIZE(allowed_device_id); i++)
+		if (allowed_device_id[i] == device_id)
+			return true;
+
+	return false;
+}
+
+static bool features_is_valid(u64 features)
+{
+	if (!(features & (1ULL << VIRTIO_F_ACCESS_PLATFORM)))
+		return false;
+
+	/* Now we only support read-only configuration space */
+	if (features & (1ULL << VIRTIO_BLK_F_CONFIG_WCE))
+		return false;
+
+	return true;
+}
+
+static bool vduse_validate_config(struct vduse_dev_config *config)
+{
+	if (!is_mem_zero((const char *)config->reserved,
+			 sizeof(config->reserved)))
+		return false;
+
+	if (config->vq_align > PAGE_SIZE)
+		return false;
+
+	if (config->config_size > PAGE_SIZE)
+		return false;
+
+	if (!device_is_allowed(config->device_id))
+		return false;
+
+	if (!features_is_valid(config->features))
+		return false;
+
+	return true;
+}
+
+static ssize_t msg_timeout_show(struct device *device,
+				struct device_attribute *attr, char *buf)
+{
+	struct vduse_dev *dev = dev_get_drvdata(device);
+
+	return sysfs_emit(buf, "%u\n", dev->msg_timeout);
+}
+
+static ssize_t msg_timeout_store(struct device *device,
+				 struct device_attribute *attr,
+				 const char *buf, size_t count)
+{
+	struct vduse_dev *dev = dev_get_drvdata(device);
+	int ret;
+
+	ret = kstrtouint(buf, 10, &dev->msg_timeout);
+	if (ret < 0)
+		return ret;
+
+	return count;
+}
+
+static DEVICE_ATTR_RW(msg_timeout);
+
+static struct attribute *vduse_dev_attrs[] = {
+	&dev_attr_msg_timeout.attr,
+	NULL
+};
+
+ATTRIBUTE_GROUPS(vduse_dev);
+
+static int vduse_create_dev(struct vduse_dev_config *config,
+			    void *config_buf, u64 api_version)
+{
+	int i, ret;
+	struct vduse_dev *dev;
+
+	ret = -EEXIST;
+	if (vduse_find_dev(config->name))
+		goto err;
+
+	ret = -ENOMEM;
+	dev = vduse_dev_create();
+	if (!dev)
+		goto err;
+
+	dev->api_version = api_version;
+	dev->device_features = config->features;
+	dev->device_id = config->device_id;
+	dev->vendor_id = config->vendor_id;
+	dev->name = kstrdup(config->name, GFP_KERNEL);
+	if (!dev->name)
+		goto err_str;
+
+	dev->domain = vduse_domain_create(VDUSE_IOVA_SIZE - 1,
+					  VDUSE_BOUNCE_SIZE);
+	if (!dev->domain)
+		goto err_domain;
+
+	dev->config = config_buf;
+	dev->config_size = config->config_size;
+	dev->vq_align = config->vq_align;
+	dev->vq_num = config->vq_num;
+	dev->vqs = kcalloc(dev->vq_num, sizeof(*dev->vqs), GFP_KERNEL);
+	if (!dev->vqs)
+		goto err_vqs;
+
+	for (i = 0; i < dev->vq_num; i++) {
+		dev->vqs[i].index = i;
+		INIT_WORK(&dev->vqs[i].inject, vduse_vq_irq_inject);
+		INIT_WORK(&dev->vqs[i].kick, vduse_vq_kick_work);
+		spin_lock_init(&dev->vqs[i].kick_lock);
+		spin_lock_init(&dev->vqs[i].irq_lock);
+	}
+
+	ret = idr_alloc(&vduse_idr, dev, 1, VDUSE_DEV_MAX, GFP_KERNEL);
+	if (ret < 0)
+		goto err_idr;
+
+	dev->minor = ret;
+	dev->msg_timeout = VDUSE_MSG_DEFAULT_TIMEOUT;
+	dev->dev = device_create(vduse_class, NULL,
+				 MKDEV(MAJOR(vduse_major), dev->minor),
+				 dev, "%s", config->name);
+	if (IS_ERR(dev->dev)) {
+		ret = PTR_ERR(dev->dev);
+		goto err_dev;
+	}
+	__module_get(THIS_MODULE);
+
+	return 0;
+err_dev:
+	idr_remove(&vduse_idr, dev->minor);
+err_idr:
+	kfree(dev->vqs);
+err_vqs:
+	vduse_domain_destroy(dev->domain);
+err_domain:
+	kfree(dev->name);
+err_str:
+	vduse_dev_destroy(dev);
+err:
+	kvfree(config_buf);
+	return ret;
+}
+
+static long vduse_ioctl(struct file *file, unsigned int cmd,
+			unsigned long arg)
+{
+	int ret;
+	void __user *argp = (void __user *)arg;
+	struct vduse_control *control = file->private_data;
+
+	mutex_lock(&vduse_lock);
+	switch (cmd) {
+	case VDUSE_GET_API_VERSION:
+		ret = put_user(control->api_version, (u64 __user *)argp);
+		break;
+	case VDUSE_SET_API_VERSION: {
+		u64 api_version;
+
+		ret = -EFAULT;
+		if (get_user(api_version, (u64 __user *)argp))
+			break;
+
+		ret = -EINVAL;
+		if (api_version > VDUSE_API_VERSION)
+			break;
+
+		ret = 0;
+		control->api_version = api_version;
+		break;
+	}
+	case VDUSE_CREATE_DEV: {
+		struct vduse_dev_config config;
+		unsigned long size = offsetof(struct vduse_dev_config, config);
+		void *buf;
+
+		ret = -EFAULT;
+		if (copy_from_user(&config, argp, size))
+			break;
+
+		ret = -EINVAL;
+		if (vduse_validate_config(&config) == false)
+			break;
+
+		buf = vmemdup_user(argp + size, config.config_size);
+		if (IS_ERR(buf)) {
+			ret = PTR_ERR(buf);
+			break;
+		}
+		config.name[VDUSE_NAME_MAX - 1] = '\0';
+		ret = vduse_create_dev(&config, buf, control->api_version);
+		break;
+	}
+	case VDUSE_DESTROY_DEV: {
+		char name[VDUSE_NAME_MAX];
+
+		ret = -EFAULT;
+		if (copy_from_user(name, argp, VDUSE_NAME_MAX))
+			break;
+
+		name[VDUSE_NAME_MAX - 1] = '\0';
+		ret = vduse_destroy_dev(name);
+		break;
+	}
+	default:
+		ret = -EINVAL;
+		break;
+	}
+	mutex_unlock(&vduse_lock);
+
+	return ret;
+}
+
+static int vduse_release(struct inode *inode, struct file *file)
+{
+	struct vduse_control *control = file->private_data;
+
+	kfree(control);
+	return 0;
+}
+
+static int vduse_open(struct inode *inode, struct file *file)
+{
+	struct vduse_control *control;
+
+	control = kmalloc(sizeof(struct vduse_control), GFP_KERNEL);
+	if (!control)
+		return -ENOMEM;
+
+	control->api_version = VDUSE_API_VERSION;
+	file->private_data = control;
+
+	return 0;
+}
+
+static const struct file_operations vduse_ctrl_fops = {
+	.owner		= THIS_MODULE,
+	.open		= vduse_open,
+	.release	= vduse_release,
+	.unlocked_ioctl	= vduse_ioctl,
+	.compat_ioctl	= compat_ptr_ioctl,
+	.llseek		= noop_llseek,
+};
+
+static char *vduse_devnode(struct device *dev, umode_t *mode)
+{
+	return kasprintf(GFP_KERNEL, "vduse/%s", dev_name(dev));
+}
+
+static void vduse_mgmtdev_release(struct device *dev)
+{
+}
+
+static struct device vduse_mgmtdev = {
+	.init_name = "vduse",
+	.release = vduse_mgmtdev_release,
+};
+
+static struct vdpa_mgmt_dev mgmt_dev;
+
+static int vduse_dev_init_vdpa(struct vduse_dev *dev, const char *name)
+{
+	struct vduse_vdpa *vdev;
+	int ret;
+
+	if (dev->vdev)
+		return -EEXIST;
+
+	vdev = vdpa_alloc_device(struct vduse_vdpa, vdpa, dev->dev,
+				 &vduse_vdpa_config_ops, name, true);
+	if (IS_ERR(vdev))
+		return PTR_ERR(vdev);
+
+	dev->vdev = vdev;
+	vdev->dev = dev;
+	vdev->vdpa.dev.dma_mask = &vdev->vdpa.dev.coherent_dma_mask;
+	ret = dma_set_mask_and_coherent(&vdev->vdpa.dev, DMA_BIT_MASK(64));
+	if (ret) {
+		put_device(&vdev->vdpa.dev);
+		return ret;
+	}
+	set_dma_ops(&vdev->vdpa.dev, &vduse_dev_dma_ops);
+	vdev->vdpa.dma_dev = &vdev->vdpa.dev;
+	vdev->vdpa.mdev = &mgmt_dev;
+
+	return 0;
+}
+
+static int vdpa_dev_add(struct vdpa_mgmt_dev *mdev, const char *name)
+{
+	struct vduse_dev *dev;
+	int ret;
+
+	mutex_lock(&vduse_lock);
+	dev = vduse_find_dev(name);
+	if (!dev || !vduse_dev_is_ready(dev)) {
+		mutex_unlock(&vduse_lock);
+		return -EINVAL;
+	}
+	ret = vduse_dev_init_vdpa(dev, name);
+	mutex_unlock(&vduse_lock);
+	if (ret)
+		return ret;
+
+	ret = _vdpa_register_device(&dev->vdev->vdpa, dev->vq_num);
+	if (ret) {
+		put_device(&dev->vdev->vdpa.dev);
+		return ret;
+	}
+
+	return 0;
+}
+
+static void vdpa_dev_del(struct vdpa_mgmt_dev *mdev, struct vdpa_device *dev)
+{
+	_vdpa_unregister_device(dev);
+}
+
+static const struct vdpa_mgmtdev_ops vdpa_dev_mgmtdev_ops = {
+	.dev_add = vdpa_dev_add,
+	.dev_del = vdpa_dev_del,
+};
+
+static struct virtio_device_id id_table[] = {
+	{ VIRTIO_ID_BLOCK, VIRTIO_DEV_ANY_ID },
+	{ 0 },
+};
+
+static struct vdpa_mgmt_dev mgmt_dev = {
+	.device = &vduse_mgmtdev,
+	.id_table = id_table,
+	.ops = &vdpa_dev_mgmtdev_ops,
+};
+
+static int vduse_mgmtdev_init(void)
+{
+	int ret;
+
+	ret = device_register(&vduse_mgmtdev);
+	if (ret)
+		return ret;
+
+	ret = vdpa_mgmtdev_register(&mgmt_dev);
+	if (ret)
+		goto err;
+
+	return 0;
+err:
+	device_unregister(&vduse_mgmtdev);
+	return ret;
+}
+
+static void vduse_mgmtdev_exit(void)
+{
+	vdpa_mgmtdev_unregister(&mgmt_dev);
+	device_unregister(&vduse_mgmtdev);
+}
+
+static int vduse_init(void)
+{
+	int ret;
+	struct device *dev;
+
+	vduse_class = class_create(THIS_MODULE, "vduse");
+	if (IS_ERR(vduse_class))
+		return PTR_ERR(vduse_class);
+
+	vduse_class->devnode = vduse_devnode;
+	vduse_class->dev_groups = vduse_dev_groups;
+
+	ret = alloc_chrdev_region(&vduse_major, 0, VDUSE_DEV_MAX, "vduse");
+	if (ret)
+		goto err_chardev_region;
+
+	/* /dev/vduse/control */
+	cdev_init(&vduse_ctrl_cdev, &vduse_ctrl_fops);
+	vduse_ctrl_cdev.owner = THIS_MODULE;
+	ret = cdev_add(&vduse_ctrl_cdev, vduse_major, 1);
+	if (ret)
+		goto err_ctrl_cdev;
+
+	dev = device_create(vduse_class, NULL, vduse_major, NULL, "control");
+	if (IS_ERR(dev)) {
+		ret = PTR_ERR(dev);
+		goto err_device;
+	}
+
+	/* /dev/vduse/$DEVICE */
+	cdev_init(&vduse_cdev, &vduse_dev_fops);
+	vduse_cdev.owner = THIS_MODULE;
+	ret = cdev_add(&vduse_cdev, MKDEV(MAJOR(vduse_major), 1),
+		       VDUSE_DEV_MAX - 1);
+	if (ret)
+		goto err_cdev;
+
+	vduse_irq_wq = alloc_workqueue("vduse-irq",
+				WQ_HIGHPRI | WQ_SYSFS | WQ_UNBOUND, 0);
+	if (!vduse_irq_wq)
+		goto err_wq;
+
+	ret = vduse_domain_init();
+	if (ret)
+		goto err_domain;
+
+	ret = vduse_mgmtdev_init();
+	if (ret)
+		goto err_mgmtdev;
+
+	return 0;
+err_mgmtdev:
+	vduse_domain_exit();
+err_domain:
+	destroy_workqueue(vduse_irq_wq);
+err_wq:
+	cdev_del(&vduse_cdev);
+err_cdev:
+	device_destroy(vduse_class, vduse_major);
+err_device:
+	cdev_del(&vduse_ctrl_cdev);
+err_ctrl_cdev:
+	unregister_chrdev_region(vduse_major, VDUSE_DEV_MAX);
+err_chardev_region:
+	class_destroy(vduse_class);
+	return ret;
+}
+module_init(vduse_init);
+
+static void vduse_exit(void)
+{
+	vduse_mgmtdev_exit();
+	vduse_domain_exit();
+	destroy_workqueue(vduse_irq_wq);
+	cdev_del(&vduse_cdev);
+	device_destroy(vduse_class, vduse_major);
+	cdev_del(&vduse_ctrl_cdev);
+	unregister_chrdev_region(vduse_major, VDUSE_DEV_MAX);
+	class_destroy(vduse_class);
+}
+module_exit(vduse_exit);
+
+MODULE_LICENSE(DRV_LICENSE);
+MODULE_AUTHOR(DRV_AUTHOR);
+MODULE_DESCRIPTION(DRV_DESC);
diff --git a/drivers/vdpa/virtio_pci/vp_vdpa.c b/drivers/vdpa/virtio_pci/vp_vdpa.c
index fe0527329857..5bcd00246d2e 100644
--- a/drivers/vdpa/virtio_pci/vp_vdpa.c
+++ b/drivers/vdpa/virtio_pci/vp_vdpa.c
@@ -189,10 +189,20 @@ static void vp_vdpa_set_status(struct vdpa_device *vdpa, u8 status)
 	}
 
 	vp_modern_set_status(mdev, status);
+}
 
-	if (!(status & VIRTIO_CONFIG_S_DRIVER_OK) &&
-	    (s & VIRTIO_CONFIG_S_DRIVER_OK))
+static int vp_vdpa_reset(struct vdpa_device *vdpa)
+{
+	struct vp_vdpa *vp_vdpa = vdpa_to_vp(vdpa);
+	struct virtio_pci_modern_device *mdev = &vp_vdpa->mdev;
+	u8 s = vp_vdpa_get_status(vdpa);
+
+	vp_modern_set_status(mdev, 0);
+
+	if (s & VIRTIO_CONFIG_S_DRIVER_OK)
 		vp_vdpa_free_irq(vp_vdpa);
+
+	return 0;
 }
 
 static u16 vp_vdpa_get_vq_num_max(struct vdpa_device *vdpa)
@@ -398,6 +408,7 @@ static const struct vdpa_config_ops vp_vdpa_ops = {
 	.set_features	= vp_vdpa_set_features,
 	.get_status	= vp_vdpa_get_status,
 	.set_status	= vp_vdpa_set_status,
+	.reset		= vp_vdpa_reset,
 	.get_vq_num_max	= vp_vdpa_get_vq_num_max,
 	.get_vq_state	= vp_vdpa_get_vq_state,
 	.get_vq_notification = vp_vdpa_get_vq_notification,
@@ -435,7 +446,7 @@ static int vp_vdpa_probe(struct pci_dev *pdev, const struct pci_device_id *id)
 		return ret;
 
 	vp_vdpa = vdpa_alloc_device(struct vp_vdpa, vdpa,
-				    dev, &vp_vdpa_ops, NULL);
+				    dev, &vp_vdpa_ops, NULL, false);
 	if (IS_ERR(vp_vdpa)) {
 		dev_err(dev, "vp_vdpa: Failed to allocate vDPA structure\n");
 		return PTR_ERR(vp_vdpa);
diff --git a/drivers/vfio/Kconfig b/drivers/vfio/Kconfig
index 67d0bf4efa16..6130d00252ed 100644
--- a/drivers/vfio/Kconfig
+++ b/drivers/vfio/Kconfig
@@ -1,12 +1,22 @@
 # SPDX-License-Identifier: GPL-2.0-only
+menuconfig VFIO
+	tristate "VFIO Non-Privileged userspace driver framework"
+	select IOMMU_API
+	select VFIO_IOMMU_TYPE1 if MMU && (X86 || S390 || ARM || ARM64)
+	help
+	  VFIO provides a framework for secure userspace device drivers.
+	  See Documentation/driver-api/vfio.rst for more details.
+
+	  If you don't know what to do here, say N.
+
+if VFIO
 config VFIO_IOMMU_TYPE1
 	tristate
-	depends on VFIO
 	default n
 
 config VFIO_IOMMU_SPAPR_TCE
 	tristate
-	depends on VFIO && SPAPR_TCE_IOMMU
+	depends on SPAPR_TCE_IOMMU
 	default VFIO
 
 config VFIO_SPAPR_EEH
@@ -16,22 +26,11 @@ config VFIO_SPAPR_EEH
 
 config VFIO_VIRQFD
 	tristate
-	depends on VFIO && EVENTFD
+	select EVENTFD
 	default n
 
-menuconfig VFIO
-	tristate "VFIO Non-Privileged userspace driver framework"
-	select IOMMU_API
-	select VFIO_IOMMU_TYPE1 if MMU && (X86 || S390 || ARM || ARM64)
-	help
-	  VFIO provides a framework for secure userspace device drivers.
-	  See Documentation/driver-api/vfio.rst for more details.
-
-	  If you don't know what to do here, say N.
-
-menuconfig VFIO_NOIOMMU
+config VFIO_NOIOMMU
 	bool "VFIO No-IOMMU support"
-	depends on VFIO
 	help
 	  VFIO is built on the ability to isolate devices using the IOMMU.
 	  Only with an IOMMU can userspace access to DMA capable devices be
@@ -48,4 +47,6 @@ source "drivers/vfio/pci/Kconfig"
 source "drivers/vfio/platform/Kconfig"
 source "drivers/vfio/mdev/Kconfig"
 source "drivers/vfio/fsl-mc/Kconfig"
+endif
+
 source "virt/lib/Kconfig"
diff --git a/drivers/vfio/fsl-mc/Kconfig b/drivers/vfio/fsl-mc/Kconfig
index b1a527d6b6f2..597d338c5c8a 100644
--- a/drivers/vfio/fsl-mc/Kconfig
+++ b/drivers/vfio/fsl-mc/Kconfig
@@ -1,6 +1,7 @@
 config VFIO_FSL_MC
 	tristate "VFIO support for QorIQ DPAA2 fsl-mc bus devices"
-	depends on VFIO && FSL_MC_BUS && EVENTFD
+	depends on FSL_MC_BUS
+	select EVENTFD
 	help
 	  Driver to enable support for the VFIO QorIQ DPAA2 fsl-mc
 	  (Management Complex) devices. This is required to passthrough
diff --git a/drivers/vfio/fsl-mc/vfio_fsl_mc.c b/drivers/vfio/fsl-mc/vfio_fsl_mc.c
index 90cad109583b..0ead91bfa838 100644
--- a/drivers/vfio/fsl-mc/vfio_fsl_mc.c
+++ b/drivers/vfio/fsl-mc/vfio_fsl_mc.c
@@ -19,81 +19,10 @@
 
 static struct fsl_mc_driver vfio_fsl_mc_driver;
 
-static DEFINE_MUTEX(reflck_lock);
-
-static void vfio_fsl_mc_reflck_get(struct vfio_fsl_mc_reflck *reflck)
-{
-	kref_get(&reflck->kref);
-}
-
-static void vfio_fsl_mc_reflck_release(struct kref *kref)
-{
-	struct vfio_fsl_mc_reflck *reflck = container_of(kref,
-						      struct vfio_fsl_mc_reflck,
-						      kref);
-
-	mutex_destroy(&reflck->lock);
-	kfree(reflck);
-	mutex_unlock(&reflck_lock);
-}
-
-static void vfio_fsl_mc_reflck_put(struct vfio_fsl_mc_reflck *reflck)
-{
-	kref_put_mutex(&reflck->kref, vfio_fsl_mc_reflck_release, &reflck_lock);
-}
-
-static struct vfio_fsl_mc_reflck *vfio_fsl_mc_reflck_alloc(void)
-{
-	struct vfio_fsl_mc_reflck *reflck;
-
-	reflck = kzalloc(sizeof(*reflck), GFP_KERNEL);
-	if (!reflck)
-		return ERR_PTR(-ENOMEM);
-
-	kref_init(&reflck->kref);
-	mutex_init(&reflck->lock);
-
-	return reflck;
-}
-
-static int vfio_fsl_mc_reflck_attach(struct vfio_fsl_mc_device *vdev)
-{
-	int ret = 0;
-
-	mutex_lock(&reflck_lock);
-	if (is_fsl_mc_bus_dprc(vdev->mc_dev)) {
-		vdev->reflck = vfio_fsl_mc_reflck_alloc();
-		ret = PTR_ERR_OR_ZERO(vdev->reflck);
-	} else {
-		struct device *mc_cont_dev = vdev->mc_dev->dev.parent;
-		struct vfio_device *device;
-		struct vfio_fsl_mc_device *cont_vdev;
-
-		device = vfio_device_get_from_dev(mc_cont_dev);
-		if (!device) {
-			ret = -ENODEV;
-			goto unlock;
-		}
-
-		cont_vdev =
-			container_of(device, struct vfio_fsl_mc_device, vdev);
-		if (!cont_vdev || !cont_vdev->reflck) {
-			vfio_device_put(device);
-			ret = -ENODEV;
-			goto unlock;
-		}
-		vfio_fsl_mc_reflck_get(cont_vdev->reflck);
-		vdev->reflck = cont_vdev->reflck;
-		vfio_device_put(device);
-	}
-
-unlock:
-	mutex_unlock(&reflck_lock);
-	return ret;
-}
-
-static int vfio_fsl_mc_regions_init(struct vfio_fsl_mc_device *vdev)
+static int vfio_fsl_mc_open_device(struct vfio_device *core_vdev)
 {
+	struct vfio_fsl_mc_device *vdev =
+		container_of(core_vdev, struct vfio_fsl_mc_device, vdev);
 	struct fsl_mc_device *mc_dev = vdev->mc_dev;
 	int count = mc_dev->obj_desc.region_count;
 	int i;
@@ -136,58 +65,30 @@ static void vfio_fsl_mc_regions_cleanup(struct vfio_fsl_mc_device *vdev)
 	kfree(vdev->regions);
 }
 
-static int vfio_fsl_mc_open(struct vfio_device *core_vdev)
-{
-	struct vfio_fsl_mc_device *vdev =
-		container_of(core_vdev, struct vfio_fsl_mc_device, vdev);
-	int ret = 0;
-
-	mutex_lock(&vdev->reflck->lock);
-	if (!vdev->refcnt) {
-		ret = vfio_fsl_mc_regions_init(vdev);
-		if (ret)
-			goto out;
-	}
-	vdev->refcnt++;
-out:
-	mutex_unlock(&vdev->reflck->lock);
-
-	return ret;
-}
 
-static void vfio_fsl_mc_release(struct vfio_device *core_vdev)
+static void vfio_fsl_mc_close_device(struct vfio_device *core_vdev)
 {
 	struct vfio_fsl_mc_device *vdev =
 		container_of(core_vdev, struct vfio_fsl_mc_device, vdev);
+	struct fsl_mc_device *mc_dev = vdev->mc_dev;
+	struct device *cont_dev = fsl_mc_cont_dev(&mc_dev->dev);
+	struct fsl_mc_device *mc_cont = to_fsl_mc_device(cont_dev);
 	int ret;
 
-	mutex_lock(&vdev->reflck->lock);
+	vfio_fsl_mc_regions_cleanup(vdev);
 
-	if (!(--vdev->refcnt)) {
-		struct fsl_mc_device *mc_dev = vdev->mc_dev;
-		struct device *cont_dev = fsl_mc_cont_dev(&mc_dev->dev);
-		struct fsl_mc_device *mc_cont = to_fsl_mc_device(cont_dev);
+	/* reset the device before cleaning up the interrupts */
+	ret = dprc_reset_container(mc_cont->mc_io, 0, mc_cont->mc_handle,
+				   mc_cont->obj_desc.id,
+				   DPRC_RESET_OPTION_NON_RECURSIVE);
 
-		vfio_fsl_mc_regions_cleanup(vdev);
+	if (WARN_ON(ret))
+		dev_warn(&mc_cont->dev,
+			 "VFIO_FLS_MC: reset device has failed (%d)\n", ret);
 
-		/* reset the device before cleaning up the interrupts */
-		ret = dprc_reset_container(mc_cont->mc_io, 0,
-		      mc_cont->mc_handle,
-			  mc_cont->obj_desc.id,
-			  DPRC_RESET_OPTION_NON_RECURSIVE);
+	vfio_fsl_mc_irqs_cleanup(vdev);
 
-		if (ret) {
-			dev_warn(&mc_cont->dev, "VFIO_FLS_MC: reset device has failed (%d)\n",
-				 ret);
-			WARN_ON(1);
-		}
-
-		vfio_fsl_mc_irqs_cleanup(vdev);
-
-		fsl_mc_cleanup_irq_pool(mc_cont);
-	}
-
-	mutex_unlock(&vdev->reflck->lock);
+	fsl_mc_cleanup_irq_pool(mc_cont);
 }
 
 static long vfio_fsl_mc_ioctl(struct vfio_device *core_vdev,
@@ -504,8 +405,8 @@ static int vfio_fsl_mc_mmap(struct vfio_device *core_vdev,
 
 static const struct vfio_device_ops vfio_fsl_mc_ops = {
 	.name		= "vfio-fsl-mc",
-	.open		= vfio_fsl_mc_open,
-	.release	= vfio_fsl_mc_release,
+	.open_device	= vfio_fsl_mc_open_device,
+	.close_device	= vfio_fsl_mc_close_device,
 	.ioctl		= vfio_fsl_mc_ioctl,
 	.read		= vfio_fsl_mc_read,
 	.write		= vfio_fsl_mc_write,
@@ -625,13 +526,16 @@ static int vfio_fsl_mc_probe(struct fsl_mc_device *mc_dev)
 	vdev->mc_dev = mc_dev;
 	mutex_init(&vdev->igate);
 
-	ret = vfio_fsl_mc_reflck_attach(vdev);
+	if (is_fsl_mc_bus_dprc(mc_dev))
+		ret = vfio_assign_device_set(&vdev->vdev, &mc_dev->dev);
+	else
+		ret = vfio_assign_device_set(&vdev->vdev, mc_dev->dev.parent);
 	if (ret)
-		goto out_kfree;
+		goto out_uninit;
 
 	ret = vfio_fsl_mc_init_device(vdev);
 	if (ret)
-		goto out_reflck;
+		goto out_uninit;
 
 	ret = vfio_register_group_dev(&vdev->vdev);
 	if (ret) {
@@ -639,12 +543,6 @@ static int vfio_fsl_mc_probe(struct fsl_mc_device *mc_dev)
 		goto out_device;
 	}
 
-	/*
-	 * This triggers recursion into vfio_fsl_mc_probe() on another device
-	 * and the vfio_fsl_mc_reflck_attach() must succeed, which relies on the
-	 * vfio_add_group_dev() above. It has no impact on this vdev, so it is
-	 * safe to be after the vfio device is made live.
-	 */
 	ret = vfio_fsl_mc_scan_container(mc_dev);
 	if (ret)
 		goto out_group_dev;
@@ -655,9 +553,8 @@ out_group_dev:
 	vfio_unregister_group_dev(&vdev->vdev);
 out_device:
 	vfio_fsl_uninit_device(vdev);
-out_reflck:
-	vfio_fsl_mc_reflck_put(vdev->reflck);
-out_kfree:
+out_uninit:
+	vfio_uninit_group_dev(&vdev->vdev);
 	kfree(vdev);
 out_group_put:
 	vfio_iommu_group_put(group, dev);
@@ -674,8 +571,8 @@ static int vfio_fsl_mc_remove(struct fsl_mc_device *mc_dev)
 
 	dprc_remove_devices(mc_dev, NULL, 0);
 	vfio_fsl_uninit_device(vdev);
-	vfio_fsl_mc_reflck_put(vdev->reflck);
 
+	vfio_uninit_group_dev(&vdev->vdev);
 	kfree(vdev);
 	vfio_iommu_group_put(mc_dev->dev.iommu_group, dev);
 
diff --git a/drivers/vfio/fsl-mc/vfio_fsl_mc_intr.c b/drivers/vfio/fsl-mc/vfio_fsl_mc_intr.c
index 0d9f3002df7f..77e584093a23 100644
--- a/drivers/vfio/fsl-mc/vfio_fsl_mc_intr.c
+++ b/drivers/vfio/fsl-mc/vfio_fsl_mc_intr.c
@@ -120,7 +120,7 @@ static int vfio_fsl_mc_set_irq_trigger(struct vfio_fsl_mc_device *vdev,
 	if (start != 0 || count != 1)
 		return -EINVAL;
 
-	mutex_lock(&vdev->reflck->lock);
+	mutex_lock(&vdev->vdev.dev_set->lock);
 	ret = fsl_mc_populate_irq_pool(mc_cont,
 			FSL_MC_IRQ_POOL_MAX_TOTAL_IRQS);
 	if (ret)
@@ -129,7 +129,7 @@ static int vfio_fsl_mc_set_irq_trigger(struct vfio_fsl_mc_device *vdev,
 	ret = vfio_fsl_mc_irqs_allocate(vdev);
 	if (ret)
 		goto unlock;
-	mutex_unlock(&vdev->reflck->lock);
+	mutex_unlock(&vdev->vdev.dev_set->lock);
 
 	if (flags & VFIO_IRQ_SET_DATA_EVENTFD) {
 		s32 fd = *(s32 *)data;
@@ -154,7 +154,7 @@ static int vfio_fsl_mc_set_irq_trigger(struct vfio_fsl_mc_device *vdev,
 	return 0;
 
 unlock:
-	mutex_unlock(&vdev->reflck->lock);
+	mutex_unlock(&vdev->vdev.dev_set->lock);
 	return ret;
 
 }
diff --git a/drivers/vfio/fsl-mc/vfio_fsl_mc_private.h b/drivers/vfio/fsl-mc/vfio_fsl_mc_private.h
index 89700e00e77d..4ad63ececb91 100644
--- a/drivers/vfio/fsl-mc/vfio_fsl_mc_private.h
+++ b/drivers/vfio/fsl-mc/vfio_fsl_mc_private.h
@@ -22,11 +22,6 @@ struct vfio_fsl_mc_irq {
 	char            *name;
 };
 
-struct vfio_fsl_mc_reflck {
-	struct kref		kref;
-	struct mutex		lock;
-};
-
 struct vfio_fsl_mc_region {
 	u32			flags;
 	u32			type;
@@ -39,9 +34,7 @@ struct vfio_fsl_mc_device {
 	struct vfio_device		vdev;
 	struct fsl_mc_device		*mc_dev;
 	struct notifier_block        nb;
-	int				refcnt;
 	struct vfio_fsl_mc_region	*regions;
-	struct vfio_fsl_mc_reflck   *reflck;
 	struct mutex         igate;
 	struct vfio_fsl_mc_irq      *mc_irqs;
 };
diff --git a/drivers/vfio/mdev/Kconfig b/drivers/vfio/mdev/Kconfig
index 763c877a1318..646dbed44eb2 100644
--- a/drivers/vfio/mdev/Kconfig
+++ b/drivers/vfio/mdev/Kconfig
@@ -2,7 +2,6 @@
 
 config VFIO_MDEV
 	tristate "Mediated device driver framework"
-	depends on VFIO
 	default n
 	help
 	  Provides a framework to virtualize devices.
diff --git a/drivers/vfio/mdev/mdev_core.c b/drivers/vfio/mdev/mdev_core.c
index e4581ec093a6..b314101237fe 100644
--- a/drivers/vfio/mdev/mdev_core.c
+++ b/drivers/vfio/mdev/mdev_core.c
@@ -138,10 +138,6 @@ int mdev_register_device(struct device *dev, const struct mdev_parent_ops *ops)
 	if (!dev)
 		return -EINVAL;
 
-	/* Not mandatory, but its absence could be a problem */
-	if (!ops->request)
-		dev_info(dev, "Driver cannot be asked to release device\n");
-
 	mutex_lock(&parent_list_lock);
 
 	/* Check for duplicate */
@@ -398,7 +394,7 @@ static void __exit mdev_exit(void)
 	mdev_bus_unregister();
 }
 
-module_init(mdev_init)
+subsys_initcall(mdev_init)
 module_exit(mdev_exit)
 
 MODULE_VERSION(DRIVER_VERSION);
diff --git a/drivers/vfio/mdev/vfio_mdev.c b/drivers/vfio/mdev/vfio_mdev.c
index 39ef7489fe47..7a9883048216 100644
--- a/drivers/vfio/mdev/vfio_mdev.c
+++ b/drivers/vfio/mdev/vfio_mdev.c
@@ -17,24 +17,24 @@
 
 #include "mdev_private.h"
 
-static int vfio_mdev_open(struct vfio_device *core_vdev)
+static int vfio_mdev_open_device(struct vfio_device *core_vdev)
 {
 	struct mdev_device *mdev = to_mdev_device(core_vdev->dev);
 	struct mdev_parent *parent = mdev->type->parent;
 
-	if (unlikely(!parent->ops->open))
-		return -EINVAL;
+	if (unlikely(!parent->ops->open_device))
+		return 0;
 
-	return parent->ops->open(mdev);
+	return parent->ops->open_device(mdev);
 }
 
-static void vfio_mdev_release(struct vfio_device *core_vdev)
+static void vfio_mdev_close_device(struct vfio_device *core_vdev)
 {
 	struct mdev_device *mdev = to_mdev_device(core_vdev->dev);
 	struct mdev_parent *parent = mdev->type->parent;
 
-	if (likely(parent->ops->release))
-		parent->ops->release(mdev);
+	if (likely(parent->ops->close_device))
+		parent->ops->close_device(mdev);
 }
 
 static long vfio_mdev_unlocked_ioctl(struct vfio_device *core_vdev,
@@ -44,7 +44,7 @@ static long vfio_mdev_unlocked_ioctl(struct vfio_device *core_vdev,
 	struct mdev_parent *parent = mdev->type->parent;
 
 	if (unlikely(!parent->ops->ioctl))
-		return -EINVAL;
+		return 0;
 
 	return parent->ops->ioctl(mdev, cmd, arg);
 }
@@ -100,8 +100,8 @@ static void vfio_mdev_request(struct vfio_device *core_vdev, unsigned int count)
 
 static const struct vfio_device_ops vfio_mdev_dev_ops = {
 	.name		= "vfio-mdev",
-	.open		= vfio_mdev_open,
-	.release	= vfio_mdev_release,
+	.open_device	= vfio_mdev_open_device,
+	.close_device	= vfio_mdev_close_device,
 	.ioctl		= vfio_mdev_unlocked_ioctl,
 	.read		= vfio_mdev_read,
 	.write		= vfio_mdev_write,
@@ -120,12 +120,16 @@ static int vfio_mdev_probe(struct mdev_device *mdev)
 
 	vfio_init_group_dev(vdev, &mdev->dev, &vfio_mdev_dev_ops);
 	ret = vfio_register_group_dev(vdev);
-	if (ret) {
-		kfree(vdev);
-		return ret;
-	}
+	if (ret)
+		goto out_uninit;
+
 	dev_set_drvdata(&mdev->dev, vdev);
 	return 0;
+
+out_uninit:
+	vfio_uninit_group_dev(vdev);
+	kfree(vdev);
+	return ret;
 }
 
 static void vfio_mdev_remove(struct mdev_device *mdev)
@@ -133,6 +137,7 @@ static void vfio_mdev_remove(struct mdev_device *mdev)
 	struct vfio_device *vdev = dev_get_drvdata(&mdev->dev);
 
 	vfio_unregister_group_dev(vdev);
+	vfio_uninit_group_dev(vdev);
 	kfree(vdev);
 }
 
diff --git a/drivers/vfio/pci/Kconfig b/drivers/vfio/pci/Kconfig
index 5e2e1b9a9fd3..860424ccda1b 100644
--- a/drivers/vfio/pci/Kconfig
+++ b/drivers/vfio/pci/Kconfig
@@ -1,19 +1,29 @@
 # SPDX-License-Identifier: GPL-2.0-only
-config VFIO_PCI
-	tristate "VFIO support for PCI devices"
-	depends on VFIO && PCI && EVENTFD
-	depends on MMU
+if PCI && MMU
+config VFIO_PCI_CORE
+	tristate
 	select VFIO_VIRQFD
 	select IRQ_BYPASS_MANAGER
+
+config VFIO_PCI_MMAP
+	def_bool y if !S390
+
+config VFIO_PCI_INTX
+	def_bool y if !S390
+
+config VFIO_PCI
+	tristate "Generic VFIO support for any PCI device"
+	select VFIO_PCI_CORE
 	help
-	  Support for the PCI VFIO bus driver.  This is required to make
-	  use of PCI drivers using the VFIO framework.
+	  Support for the generic PCI VFIO bus driver which can connect any
+	  PCI device to the VFIO framework.
 
 	  If you don't know what to do here, say N.
 
+if VFIO_PCI
 config VFIO_PCI_VGA
-	bool "VFIO PCI support for VGA devices"
-	depends on VFIO_PCI && X86 && VGA_ARB
+	bool "Generic VFIO PCI support for VGA devices"
+	depends on X86 && VGA_ARB
 	help
 	  Support for VGA extension to VFIO PCI.  This exposes an additional
 	  region on VGA devices for accessing legacy VGA addresses used by
@@ -21,17 +31,9 @@ config VFIO_PCI_VGA
 
 	  If you don't know what to do here, say N.
 
-config VFIO_PCI_MMAP
-	depends on VFIO_PCI
-	def_bool y if !S390
-
-config VFIO_PCI_INTX
-	depends on VFIO_PCI
-	def_bool y if !S390
-
 config VFIO_PCI_IGD
-	bool "VFIO PCI extensions for Intel graphics (GVT-d)"
-	depends on VFIO_PCI && X86
+	bool "Generic VFIO PCI extensions for Intel graphics (GVT-d)"
+	depends on X86
 	default y
 	help
 	  Support for Intel IGD specific extensions to enable direct
@@ -40,3 +42,5 @@ config VFIO_PCI_IGD
 	  and LPC bridge config space.
 
 	  To enable Intel IGD assignment through vfio-pci, say Y.
+endif
+endif
diff --git a/drivers/vfio/pci/Makefile b/drivers/vfio/pci/Makefile
index 3ff42093962f..349d68d242b4 100644
--- a/drivers/vfio/pci/Makefile
+++ b/drivers/vfio/pci/Makefile
@@ -1,7 +1,9 @@
 # SPDX-License-Identifier: GPL-2.0-only
 
-vfio-pci-y := vfio_pci.o vfio_pci_intrs.o vfio_pci_rdwr.o vfio_pci_config.o
-vfio-pci-$(CONFIG_VFIO_PCI_IGD) += vfio_pci_igd.o
-vfio-pci-$(CONFIG_S390) += vfio_pci_zdev.o
+vfio-pci-core-y := vfio_pci_core.o vfio_pci_intrs.o vfio_pci_rdwr.o vfio_pci_config.o
+vfio-pci-core-$(CONFIG_S390) += vfio_pci_zdev.o
+obj-$(CONFIG_VFIO_PCI_CORE) += vfio-pci-core.o
 
+vfio-pci-y := vfio_pci.o
+vfio-pci-$(CONFIG_VFIO_PCI_IGD) += vfio_pci_igd.o
 obj-$(CONFIG_VFIO_PCI) += vfio-pci.o
diff --git a/drivers/vfio/pci/vfio_pci.c b/drivers/vfio/pci/vfio_pci.c
index cf27df8048db..a5ce92beb655 100644
--- a/drivers/vfio/pci/vfio_pci.c
+++ b/drivers/vfio/pci/vfio_pci.c
@@ -1,5 +1,7 @@
 // SPDX-License-Identifier: GPL-2.0-only
 /*
+ * Copyright (c) 2021, NVIDIA CORPORATION & AFFILIATES. All rights reserved
+ *
  * Copyright (C) 2012 Red Hat, Inc.  All rights reserved.
  *     Author: Alex Williamson <alex.williamson@redhat.com>
  *
@@ -18,19 +20,13 @@
 #include <linux/module.h>
 #include <linux/mutex.h>
 #include <linux/notifier.h>
-#include <linux/pci.h>
 #include <linux/pm_runtime.h>
 #include <linux/slab.h>
 #include <linux/types.h>
 #include <linux/uaccess.h>
-#include <linux/vfio.h>
-#include <linux/vgaarb.h>
-#include <linux/nospec.h>
-#include <linux/sched/mm.h>
 
-#include "vfio_pci_private.h"
+#include <linux/vfio_pci_core.h>
 
-#define DRIVER_VERSION  "0.2"
 #define DRIVER_AUTHOR   "Alex Williamson <alex.williamson@redhat.com>"
 #define DRIVER_DESC     "VFIO PCI - User Level meta-driver"
 
@@ -64,15 +60,6 @@ static bool disable_denylist;
 module_param(disable_denylist, bool, 0444);
 MODULE_PARM_DESC(disable_denylist, "Disable use of device denylist. Disabling the denylist allows binding to devices with known errata that may lead to exploitable stability or security issues when accessed by untrusted users.");
 
-static inline bool vfio_vga_disabled(void)
-{
-#ifdef CONFIG_VFIO_PCI_VGA
-	return disable_vga;
-#else
-	return true;
-#endif
-}
-
 static bool vfio_pci_dev_in_denylist(struct pci_dev *pdev)
 {
 	switch (pdev->vendor) {
@@ -111,2269 +98,103 @@ static bool vfio_pci_is_denylisted(struct pci_dev *pdev)
 	return true;
 }
 
-/*
- * Our VGA arbiter participation is limited since we don't know anything
- * about the device itself.  However, if the device is the only VGA device
- * downstream of a bridge and VFIO VGA support is disabled, then we can
- * safely return legacy VGA IO and memory as not decoded since the user
- * has no way to get to it and routing can be disabled externally at the
- * bridge.
- */
-static unsigned int vfio_pci_set_decode(struct pci_dev *pdev, bool single_vga)
-{
-	struct pci_dev *tmp = NULL;
-	unsigned char max_busnr;
-	unsigned int decodes;
-
-	if (single_vga || !vfio_vga_disabled() || pci_is_root_bus(pdev->bus))
-		return VGA_RSRC_NORMAL_IO | VGA_RSRC_NORMAL_MEM |
-		       VGA_RSRC_LEGACY_IO | VGA_RSRC_LEGACY_MEM;
-
-	max_busnr = pci_bus_max_busnr(pdev->bus);
-	decodes = VGA_RSRC_NORMAL_IO | VGA_RSRC_NORMAL_MEM;
-
-	while ((tmp = pci_get_class(PCI_CLASS_DISPLAY_VGA << 8, tmp)) != NULL) {
-		if (tmp == pdev ||
-		    pci_domain_nr(tmp->bus) != pci_domain_nr(pdev->bus) ||
-		    pci_is_root_bus(tmp->bus))
-			continue;
-
-		if (tmp->bus->number >= pdev->bus->number &&
-		    tmp->bus->number <= max_busnr) {
-			pci_dev_put(tmp);
-			decodes |= VGA_RSRC_LEGACY_IO | VGA_RSRC_LEGACY_MEM;
-			break;
-		}
-	}
-
-	return decodes;
-}
-
-static inline bool vfio_pci_is_vga(struct pci_dev *pdev)
-{
-	return (pdev->class >> 8) == PCI_CLASS_DISPLAY_VGA;
-}
-
-static void vfio_pci_probe_mmaps(struct vfio_pci_device *vdev)
-{
-	struct resource *res;
-	int i;
-	struct vfio_pci_dummy_resource *dummy_res;
-
-	for (i = 0; i < PCI_STD_NUM_BARS; i++) {
-		int bar = i + PCI_STD_RESOURCES;
-
-		res = &vdev->pdev->resource[bar];
-
-		if (!IS_ENABLED(CONFIG_VFIO_PCI_MMAP))
-			goto no_mmap;
-
-		if (!(res->flags & IORESOURCE_MEM))
-			goto no_mmap;
-
-		/*
-		 * The PCI core shouldn't set up a resource with a
-		 * type but zero size. But there may be bugs that
-		 * cause us to do that.
-		 */
-		if (!resource_size(res))
-			goto no_mmap;
-
-		if (resource_size(res) >= PAGE_SIZE) {
-			vdev->bar_mmap_supported[bar] = true;
-			continue;
-		}
-
-		if (!(res->start & ~PAGE_MASK)) {
-			/*
-			 * Add a dummy resource to reserve the remainder
-			 * of the exclusive page in case that hot-add
-			 * device's bar is assigned into it.
-			 */
-			dummy_res = kzalloc(sizeof(*dummy_res), GFP_KERNEL);
-			if (dummy_res == NULL)
-				goto no_mmap;
-
-			dummy_res->resource.name = "vfio sub-page reserved";
-			dummy_res->resource.start = res->end + 1;
-			dummy_res->resource.end = res->start + PAGE_SIZE - 1;
-			dummy_res->resource.flags = res->flags;
-			if (request_resource(res->parent,
-						&dummy_res->resource)) {
-				kfree(dummy_res);
-				goto no_mmap;
-			}
-			dummy_res->index = bar;
-			list_add(&dummy_res->res_next,
-					&vdev->dummy_resources_list);
-			vdev->bar_mmap_supported[bar] = true;
-			continue;
-		}
-		/*
-		 * Here we don't handle the case when the BAR is not page
-		 * aligned because we can't expect the BAR will be
-		 * assigned into the same location in a page in guest
-		 * when we passthrough the BAR. And it's hard to access
-		 * this BAR in userspace because we have no way to get
-		 * the BAR's location in a page.
-		 */
-no_mmap:
-		vdev->bar_mmap_supported[bar] = false;
-	}
-}
-
-static void vfio_pci_try_bus_reset(struct vfio_pci_device *vdev);
-static void vfio_pci_disable(struct vfio_pci_device *vdev);
-static int vfio_pci_try_zap_and_vma_lock_cb(struct pci_dev *pdev, void *data);
-
-/*
- * INTx masking requires the ability to disable INTx signaling via PCI_COMMAND
- * _and_ the ability detect when the device is asserting INTx via PCI_STATUS.
- * If a device implements the former but not the latter we would typically
- * expect broken_intx_masking be set and require an exclusive interrupt.
- * However since we do have control of the device's ability to assert INTx,
- * we can instead pretend that the device does not implement INTx, virtualizing
- * the pin register to report zero and maintaining DisINTx set on the host.
- */
-static bool vfio_pci_nointx(struct pci_dev *pdev)
-{
-	switch (pdev->vendor) {
-	case PCI_VENDOR_ID_INTEL:
-		switch (pdev->device) {
-		/* All i40e (XL710/X710/XXV710) 10/20/25/40GbE NICs */
-		case 0x1572:
-		case 0x1574:
-		case 0x1580 ... 0x1581:
-		case 0x1583 ... 0x158b:
-		case 0x37d0 ... 0x37d2:
-		/* X550 */
-		case 0x1563:
-			return true;
-		default:
-			return false;
-		}
-	}
-
-	return false;
-}
-
-static void vfio_pci_probe_power_state(struct vfio_pci_device *vdev)
+static int vfio_pci_open_device(struct vfio_device *core_vdev)
 {
+	struct vfio_pci_core_device *vdev =
+		container_of(core_vdev, struct vfio_pci_core_device, vdev);
 	struct pci_dev *pdev = vdev->pdev;
-	u16 pmcsr;
-
-	if (!pdev->pm_cap)
-		return;
-
-	pci_read_config_word(pdev, pdev->pm_cap + PCI_PM_CTRL, &pmcsr);
-
-	vdev->needs_pm_restore = !(pmcsr & PCI_PM_CTRL_NO_SOFT_RESET);
-}
-
-/*
- * pci_set_power_state() wrapper handling devices which perform a soft reset on
- * D3->D0 transition.  Save state prior to D0/1/2->D3, stash it on the vdev,
- * restore when returned to D0.  Saved separately from pci_saved_state for use
- * by PM capability emulation and separately from pci_dev internal saved state
- * to avoid it being overwritten and consumed around other resets.
- */
-int vfio_pci_set_power_state(struct vfio_pci_device *vdev, pci_power_t state)
-{
-	struct pci_dev *pdev = vdev->pdev;
-	bool needs_restore = false, needs_save = false;
 	int ret;
 
-	if (vdev->needs_pm_restore) {
-		if (pdev->current_state < PCI_D3hot && state >= PCI_D3hot) {
-			pci_save_state(pdev);
-			needs_save = true;
-		}
-
-		if (pdev->current_state >= PCI_D3hot && state <= PCI_D0)
-			needs_restore = true;
-	}
-
-	ret = pci_set_power_state(pdev, state);
-
-	if (!ret) {
-		/* D3 might be unsupported via quirk, skip unless in D3 */
-		if (needs_save && pdev->current_state >= PCI_D3hot) {
-			vdev->pm_save = pci_store_saved_state(pdev);
-		} else if (needs_restore) {
-			pci_load_and_free_saved_state(pdev, &vdev->pm_save);
-			pci_restore_state(pdev);
-		}
-	}
-
-	return ret;
-}
-
-static int vfio_pci_enable(struct vfio_pci_device *vdev)
-{
-	struct pci_dev *pdev = vdev->pdev;
-	int ret;
-	u16 cmd;
-	u8 msix_pos;
-
-	vfio_pci_set_power_state(vdev, PCI_D0);
-
-	/* Don't allow our initial saved state to include busmaster */
-	pci_clear_master(pdev);
-
-	ret = pci_enable_device(pdev);
+	ret = vfio_pci_core_enable(vdev);
 	if (ret)
 		return ret;
 
-	/* If reset fails because of the device lock, fail this path entirely */
-	ret = pci_try_reset_function(pdev);
-	if (ret == -EAGAIN) {
-		pci_disable_device(pdev);
-		return ret;
-	}
-
-	vdev->reset_works = !ret;
-	pci_save_state(pdev);
-	vdev->pci_saved_state = pci_store_saved_state(pdev);
-	if (!vdev->pci_saved_state)
-		pci_dbg(pdev, "%s: Couldn't store saved state\n", __func__);
-
-	if (likely(!nointxmask)) {
-		if (vfio_pci_nointx(pdev)) {
-			pci_info(pdev, "Masking broken INTx support\n");
-			vdev->nointx = true;
-			pci_intx(pdev, 0);
-		} else
-			vdev->pci_2_3 = pci_intx_mask_supported(pdev);
-	}
-
-	pci_read_config_word(pdev, PCI_COMMAND, &cmd);
-	if (vdev->pci_2_3 && (cmd & PCI_COMMAND_INTX_DISABLE)) {
-		cmd &= ~PCI_COMMAND_INTX_DISABLE;
-		pci_write_config_word(pdev, PCI_COMMAND, cmd);
-	}
-
-	ret = vfio_config_init(vdev);
-	if (ret) {
-		kfree(vdev->pci_saved_state);
-		vdev->pci_saved_state = NULL;
-		pci_disable_device(pdev);
-		return ret;
-	}
-
-	msix_pos = pdev->msix_cap;
-	if (msix_pos) {
-		u16 flags;
-		u32 table;
-
-		pci_read_config_word(pdev, msix_pos + PCI_MSIX_FLAGS, &flags);
-		pci_read_config_dword(pdev, msix_pos + PCI_MSIX_TABLE, &table);
-
-		vdev->msix_bar = table & PCI_MSIX_TABLE_BIR;
-		vdev->msix_offset = table & PCI_MSIX_TABLE_OFFSET;
-		vdev->msix_size = ((flags & PCI_MSIX_FLAGS_QSIZE) + 1) * 16;
-	} else
-		vdev->msix_bar = 0xFF;
-
-	if (!vfio_vga_disabled() && vfio_pci_is_vga(pdev))
-		vdev->has_vga = true;
-
 	if (vfio_pci_is_vga(pdev) &&
 	    pdev->vendor == PCI_VENDOR_ID_INTEL &&
 	    IS_ENABLED(CONFIG_VFIO_PCI_IGD)) {
 		ret = vfio_pci_igd_init(vdev);
 		if (ret && ret != -ENODEV) {
 			pci_warn(pdev, "Failed to setup Intel IGD regions\n");
-			goto disable_exit;
-		}
-	}
-
-	vfio_pci_probe_mmaps(vdev);
-
-	return 0;
-
-disable_exit:
-	vfio_pci_disable(vdev);
-	return ret;
-}
-
-static void vfio_pci_disable(struct vfio_pci_device *vdev)
-{
-	struct pci_dev *pdev = vdev->pdev;
-	struct vfio_pci_dummy_resource *dummy_res, *tmp;
-	struct vfio_pci_ioeventfd *ioeventfd, *ioeventfd_tmp;
-	int i, bar;
-
-	/* Stop the device from further DMA */
-	pci_clear_master(pdev);
-
-	vfio_pci_set_irqs_ioctl(vdev, VFIO_IRQ_SET_DATA_NONE |
-				VFIO_IRQ_SET_ACTION_TRIGGER,
-				vdev->irq_type, 0, 0, NULL);
-
-	/* Device closed, don't need mutex here */
-	list_for_each_entry_safe(ioeventfd, ioeventfd_tmp,
-				 &vdev->ioeventfds_list, next) {
-		vfio_virqfd_disable(&ioeventfd->virqfd);
-		list_del(&ioeventfd->next);
-		kfree(ioeventfd);
-	}
-	vdev->ioeventfds_nr = 0;
-
-	vdev->virq_disabled = false;
-
-	for (i = 0; i < vdev->num_regions; i++)
-		vdev->region[i].ops->release(vdev, &vdev->region[i]);
-
-	vdev->num_regions = 0;
-	kfree(vdev->region);
-	vdev->region = NULL; /* don't krealloc a freed pointer */
-
-	vfio_config_free(vdev);
-
-	for (i = 0; i < PCI_STD_NUM_BARS; i++) {
-		bar = i + PCI_STD_RESOURCES;
-		if (!vdev->barmap[bar])
-			continue;
-		pci_iounmap(pdev, vdev->barmap[bar]);
-		pci_release_selected_regions(pdev, 1 << bar);
-		vdev->barmap[bar] = NULL;
-	}
-
-	list_for_each_entry_safe(dummy_res, tmp,
-				 &vdev->dummy_resources_list, res_next) {
-		list_del(&dummy_res->res_next);
-		release_resource(&dummy_res->resource);
-		kfree(dummy_res);
-	}
-
-	vdev->needs_reset = true;
-
-	/*
-	 * If we have saved state, restore it.  If we can reset the device,
-	 * even better.  Resetting with current state seems better than
-	 * nothing, but saving and restoring current state without reset
-	 * is just busy work.
-	 */
-	if (pci_load_and_free_saved_state(pdev, &vdev->pci_saved_state)) {
-		pci_info(pdev, "%s: Couldn't reload saved state\n", __func__);
-
-		if (!vdev->reset_works)
-			goto out;
-
-		pci_save_state(pdev);
-	}
-
-	/*
-	 * Disable INTx and MSI, presumably to avoid spurious interrupts
-	 * during reset.  Stolen from pci_reset_function()
-	 */
-	pci_write_config_word(pdev, PCI_COMMAND, PCI_COMMAND_INTX_DISABLE);
-
-	/*
-	 * Try to get the locks ourselves to prevent a deadlock. The
-	 * success of this is dependent on being able to lock the device,
-	 * which is not always possible.
-	 * We can not use the "try" reset interface here, which will
-	 * overwrite the previously restored configuration information.
-	 */
-	if (vdev->reset_works && pci_dev_trylock(pdev)) {
-		if (!__pci_reset_function_locked(pdev))
-			vdev->needs_reset = false;
-		pci_dev_unlock(pdev);
-	}
-
-	pci_restore_state(pdev);
-out:
-	pci_disable_device(pdev);
-
-	vfio_pci_try_bus_reset(vdev);
-
-	if (!disable_idle_d3)
-		vfio_pci_set_power_state(vdev, PCI_D3hot);
-}
-
-static struct pci_driver vfio_pci_driver;
-
-static struct vfio_pci_device *get_pf_vdev(struct vfio_pci_device *vdev)
-{
-	struct pci_dev *physfn = pci_physfn(vdev->pdev);
-	struct vfio_device *pf_dev;
-
-	if (!vdev->pdev->is_virtfn)
-		return NULL;
-
-	pf_dev = vfio_device_get_from_dev(&physfn->dev);
-	if (!pf_dev)
-		return NULL;
-
-	if (pci_dev_driver(physfn) != &vfio_pci_driver) {
-		vfio_device_put(pf_dev);
-		return NULL;
-	}
-
-	return container_of(pf_dev, struct vfio_pci_device, vdev);
-}
-
-static void vfio_pci_vf_token_user_add(struct vfio_pci_device *vdev, int val)
-{
-	struct vfio_pci_device *pf_vdev = get_pf_vdev(vdev);
-
-	if (!pf_vdev)
-		return;
-
-	mutex_lock(&pf_vdev->vf_token->lock);
-	pf_vdev->vf_token->users += val;
-	WARN_ON(pf_vdev->vf_token->users < 0);
-	mutex_unlock(&pf_vdev->vf_token->lock);
-
-	vfio_device_put(&pf_vdev->vdev);
-}
-
-static void vfio_pci_release(struct vfio_device *core_vdev)
-{
-	struct vfio_pci_device *vdev =
-		container_of(core_vdev, struct vfio_pci_device, vdev);
-
-	mutex_lock(&vdev->reflck->lock);
-
-	if (!(--vdev->refcnt)) {
-		vfio_pci_vf_token_user_add(vdev, -1);
-		vfio_spapr_pci_eeh_release(vdev->pdev);
-		vfio_pci_disable(vdev);
-
-		mutex_lock(&vdev->igate);
-		if (vdev->err_trigger) {
-			eventfd_ctx_put(vdev->err_trigger);
-			vdev->err_trigger = NULL;
-		}
-		if (vdev->req_trigger) {
-			eventfd_ctx_put(vdev->req_trigger);
-			vdev->req_trigger = NULL;
-		}
-		mutex_unlock(&vdev->igate);
-	}
-
-	mutex_unlock(&vdev->reflck->lock);
-}
-
-static int vfio_pci_open(struct vfio_device *core_vdev)
-{
-	struct vfio_pci_device *vdev =
-		container_of(core_vdev, struct vfio_pci_device, vdev);
-	int ret = 0;
-
-	mutex_lock(&vdev->reflck->lock);
-
-	if (!vdev->refcnt) {
-		ret = vfio_pci_enable(vdev);
-		if (ret)
-			goto error;
-
-		vfio_spapr_pci_eeh_open(vdev->pdev);
-		vfio_pci_vf_token_user_add(vdev, 1);
-	}
-	vdev->refcnt++;
-error:
-	mutex_unlock(&vdev->reflck->lock);
-	return ret;
-}
-
-static int vfio_pci_get_irq_count(struct vfio_pci_device *vdev, int irq_type)
-{
-	if (irq_type == VFIO_PCI_INTX_IRQ_INDEX) {
-		u8 pin;
-
-		if (!IS_ENABLED(CONFIG_VFIO_PCI_INTX) ||
-		    vdev->nointx || vdev->pdev->is_virtfn)
-			return 0;
-
-		pci_read_config_byte(vdev->pdev, PCI_INTERRUPT_PIN, &pin);
-
-		return pin ? 1 : 0;
-	} else if (irq_type == VFIO_PCI_MSI_IRQ_INDEX) {
-		u8 pos;
-		u16 flags;
-
-		pos = vdev->pdev->msi_cap;
-		if (pos) {
-			pci_read_config_word(vdev->pdev,
-					     pos + PCI_MSI_FLAGS, &flags);
-			return 1 << ((flags & PCI_MSI_FLAGS_QMASK) >> 1);
-		}
-	} else if (irq_type == VFIO_PCI_MSIX_IRQ_INDEX) {
-		u8 pos;
-		u16 flags;
-
-		pos = vdev->pdev->msix_cap;
-		if (pos) {
-			pci_read_config_word(vdev->pdev,
-					     pos + PCI_MSIX_FLAGS, &flags);
-
-			return (flags & PCI_MSIX_FLAGS_QSIZE) + 1;
-		}
-	} else if (irq_type == VFIO_PCI_ERR_IRQ_INDEX) {
-		if (pci_is_pcie(vdev->pdev))
-			return 1;
-	} else if (irq_type == VFIO_PCI_REQ_IRQ_INDEX) {
-		return 1;
-	}
-
-	return 0;
-}
-
-static int vfio_pci_count_devs(struct pci_dev *pdev, void *data)
-{
-	(*(int *)data)++;
-	return 0;
-}
-
-struct vfio_pci_fill_info {
-	int max;
-	int cur;
-	struct vfio_pci_dependent_device *devices;
-};
-
-static int vfio_pci_fill_devs(struct pci_dev *pdev, void *data)
-{
-	struct vfio_pci_fill_info *fill = data;
-	struct iommu_group *iommu_group;
-
-	if (fill->cur == fill->max)
-		return -EAGAIN; /* Something changed, try again */
-
-	iommu_group = iommu_group_get(&pdev->dev);
-	if (!iommu_group)
-		return -EPERM; /* Cannot reset non-isolated devices */
-
-	fill->devices[fill->cur].group_id = iommu_group_id(iommu_group);
-	fill->devices[fill->cur].segment = pci_domain_nr(pdev->bus);
-	fill->devices[fill->cur].bus = pdev->bus->number;
-	fill->devices[fill->cur].devfn = pdev->devfn;
-	fill->cur++;
-	iommu_group_put(iommu_group);
-	return 0;
-}
-
-struct vfio_pci_group_entry {
-	struct vfio_group *group;
-	int id;
-};
-
-struct vfio_pci_group_info {
-	int count;
-	struct vfio_pci_group_entry *groups;
-};
-
-static int vfio_pci_validate_devs(struct pci_dev *pdev, void *data)
-{
-	struct vfio_pci_group_info *info = data;
-	struct iommu_group *group;
-	int id, i;
-
-	group = iommu_group_get(&pdev->dev);
-	if (!group)
-		return -EPERM;
-
-	id = iommu_group_id(group);
-
-	for (i = 0; i < info->count; i++)
-		if (info->groups[i].id == id)
-			break;
-
-	iommu_group_put(group);
-
-	return (i == info->count) ? -EINVAL : 0;
-}
-
-static bool vfio_pci_dev_below_slot(struct pci_dev *pdev, struct pci_slot *slot)
-{
-	for (; pdev; pdev = pdev->bus->self)
-		if (pdev->bus == slot->bus)
-			return (pdev->slot == slot);
-	return false;
-}
-
-struct vfio_pci_walk_info {
-	int (*fn)(struct pci_dev *, void *data);
-	void *data;
-	struct pci_dev *pdev;
-	bool slot;
-	int ret;
-};
-
-static int vfio_pci_walk_wrapper(struct pci_dev *pdev, void *data)
-{
-	struct vfio_pci_walk_info *walk = data;
-
-	if (!walk->slot || vfio_pci_dev_below_slot(pdev, walk->pdev->slot))
-		walk->ret = walk->fn(pdev, walk->data);
-
-	return walk->ret;
-}
-
-static int vfio_pci_for_each_slot_or_bus(struct pci_dev *pdev,
-					 int (*fn)(struct pci_dev *,
-						   void *data), void *data,
-					 bool slot)
-{
-	struct vfio_pci_walk_info walk = {
-		.fn = fn, .data = data, .pdev = pdev, .slot = slot, .ret = 0,
-	};
-
-	pci_walk_bus(pdev->bus, vfio_pci_walk_wrapper, &walk);
-
-	return walk.ret;
-}
-
-static int msix_mmappable_cap(struct vfio_pci_device *vdev,
-			      struct vfio_info_cap *caps)
-{
-	struct vfio_info_cap_header header = {
-		.id = VFIO_REGION_INFO_CAP_MSIX_MAPPABLE,
-		.version = 1
-	};
-
-	return vfio_info_add_capability(caps, &header, sizeof(header));
-}
-
-int vfio_pci_register_dev_region(struct vfio_pci_device *vdev,
-				 unsigned int type, unsigned int subtype,
-				 const struct vfio_pci_regops *ops,
-				 size_t size, u32 flags, void *data)
-{
-	struct vfio_pci_region *region;
-
-	region = krealloc(vdev->region,
-			  (vdev->num_regions + 1) * sizeof(*region),
-			  GFP_KERNEL);
-	if (!region)
-		return -ENOMEM;
-
-	vdev->region = region;
-	vdev->region[vdev->num_regions].type = type;
-	vdev->region[vdev->num_regions].subtype = subtype;
-	vdev->region[vdev->num_regions].ops = ops;
-	vdev->region[vdev->num_regions].size = size;
-	vdev->region[vdev->num_regions].flags = flags;
-	vdev->region[vdev->num_regions].data = data;
-
-	vdev->num_regions++;
-
-	return 0;
-}
-
-struct vfio_devices {
-	struct vfio_pci_device **devices;
-	int cur_index;
-	int max_index;
-};
-
-static long vfio_pci_ioctl(struct vfio_device *core_vdev,
-			   unsigned int cmd, unsigned long arg)
-{
-	struct vfio_pci_device *vdev =
-		container_of(core_vdev, struct vfio_pci_device, vdev);
-	unsigned long minsz;
-
-	if (cmd == VFIO_DEVICE_GET_INFO) {
-		struct vfio_device_info info;
-		struct vfio_info_cap caps = { .buf = NULL, .size = 0 };
-		unsigned long capsz;
-		int ret;
-
-		minsz = offsetofend(struct vfio_device_info, num_irqs);
-
-		/* For backward compatibility, cannot require this */
-		capsz = offsetofend(struct vfio_iommu_type1_info, cap_offset);
-
-		if (copy_from_user(&info, (void __user *)arg, minsz))
-			return -EFAULT;
-
-		if (info.argsz < minsz)
-			return -EINVAL;
-
-		if (info.argsz >= capsz) {
-			minsz = capsz;
-			info.cap_offset = 0;
-		}
-
-		info.flags = VFIO_DEVICE_FLAGS_PCI;
-
-		if (vdev->reset_works)
-			info.flags |= VFIO_DEVICE_FLAGS_RESET;
-
-		info.num_regions = VFIO_PCI_NUM_REGIONS + vdev->num_regions;
-		info.num_irqs = VFIO_PCI_NUM_IRQS;
-
-		ret = vfio_pci_info_zdev_add_caps(vdev, &caps);
-		if (ret && ret != -ENODEV) {
-			pci_warn(vdev->pdev, "Failed to setup zPCI info capabilities\n");
-			return ret;
-		}
-
-		if (caps.size) {
-			info.flags |= VFIO_DEVICE_FLAGS_CAPS;
-			if (info.argsz < sizeof(info) + caps.size) {
-				info.argsz = sizeof(info) + caps.size;
-			} else {
-				vfio_info_cap_shift(&caps, sizeof(info));
-				if (copy_to_user((void __user *)arg +
-						  sizeof(info), caps.buf,
-						  caps.size)) {
-					kfree(caps.buf);
-					return -EFAULT;
-				}
-				info.cap_offset = sizeof(info);
-			}
-
-			kfree(caps.buf);
-		}
-
-		return copy_to_user((void __user *)arg, &info, minsz) ?
-			-EFAULT : 0;
-
-	} else if (cmd == VFIO_DEVICE_GET_REGION_INFO) {
-		struct pci_dev *pdev = vdev->pdev;
-		struct vfio_region_info info;
-		struct vfio_info_cap caps = { .buf = NULL, .size = 0 };
-		int i, ret;
-
-		minsz = offsetofend(struct vfio_region_info, offset);
-
-		if (copy_from_user(&info, (void __user *)arg, minsz))
-			return -EFAULT;
-
-		if (info.argsz < minsz)
-			return -EINVAL;
-
-		switch (info.index) {
-		case VFIO_PCI_CONFIG_REGION_INDEX:
-			info.offset = VFIO_PCI_INDEX_TO_OFFSET(info.index);
-			info.size = pdev->cfg_size;
-			info.flags = VFIO_REGION_INFO_FLAG_READ |
-				     VFIO_REGION_INFO_FLAG_WRITE;
-			break;
-		case VFIO_PCI_BAR0_REGION_INDEX ... VFIO_PCI_BAR5_REGION_INDEX:
-			info.offset = VFIO_PCI_INDEX_TO_OFFSET(info.index);
-			info.size = pci_resource_len(pdev, info.index);
-			if (!info.size) {
-				info.flags = 0;
-				break;
-			}
-
-			info.flags = VFIO_REGION_INFO_FLAG_READ |
-				     VFIO_REGION_INFO_FLAG_WRITE;
-			if (vdev->bar_mmap_supported[info.index]) {
-				info.flags |= VFIO_REGION_INFO_FLAG_MMAP;
-				if (info.index == vdev->msix_bar) {
-					ret = msix_mmappable_cap(vdev, &caps);
-					if (ret)
-						return ret;
-				}
-			}
-
-			break;
-		case VFIO_PCI_ROM_REGION_INDEX:
-		{
-			void __iomem *io;
-			size_t size;
-			u16 cmd;
-
-			info.offset = VFIO_PCI_INDEX_TO_OFFSET(info.index);
-			info.flags = 0;
-
-			/* Report the BAR size, not the ROM size */
-			info.size = pci_resource_len(pdev, info.index);
-			if (!info.size) {
-				/* Shadow ROMs appear as PCI option ROMs */
-				if (pdev->resource[PCI_ROM_RESOURCE].flags &
-							IORESOURCE_ROM_SHADOW)
-					info.size = 0x20000;
-				else
-					break;
-			}
-
-			/*
-			 * Is it really there?  Enable memory decode for
-			 * implicit access in pci_map_rom().
-			 */
-			cmd = vfio_pci_memory_lock_and_enable(vdev);
-			io = pci_map_rom(pdev, &size);
-			if (io) {
-				info.flags = VFIO_REGION_INFO_FLAG_READ;
-				pci_unmap_rom(pdev, io);
-			} else {
-				info.size = 0;
-			}
-			vfio_pci_memory_unlock_and_restore(vdev, cmd);
-
-			break;
-		}
-		case VFIO_PCI_VGA_REGION_INDEX:
-			if (!vdev->has_vga)
-				return -EINVAL;
-
-			info.offset = VFIO_PCI_INDEX_TO_OFFSET(info.index);
-			info.size = 0xc0000;
-			info.flags = VFIO_REGION_INFO_FLAG_READ |
-				     VFIO_REGION_INFO_FLAG_WRITE;
-
-			break;
-		default:
-		{
-			struct vfio_region_info_cap_type cap_type = {
-					.header.id = VFIO_REGION_INFO_CAP_TYPE,
-					.header.version = 1 };
-
-			if (info.index >=
-			    VFIO_PCI_NUM_REGIONS + vdev->num_regions)
-				return -EINVAL;
-			info.index = array_index_nospec(info.index,
-							VFIO_PCI_NUM_REGIONS +
-							vdev->num_regions);
-
-			i = info.index - VFIO_PCI_NUM_REGIONS;
-
-			info.offset = VFIO_PCI_INDEX_TO_OFFSET(info.index);
-			info.size = vdev->region[i].size;
-			info.flags = vdev->region[i].flags;
-
-			cap_type.type = vdev->region[i].type;
-			cap_type.subtype = vdev->region[i].subtype;
-
-			ret = vfio_info_add_capability(&caps, &cap_type.header,
-						       sizeof(cap_type));
-			if (ret)
-				return ret;
-
-			if (vdev->region[i].ops->add_capability) {
-				ret = vdev->region[i].ops->add_capability(vdev,
-						&vdev->region[i], &caps);
-				if (ret)
-					return ret;
-			}
-		}
-		}
-
-		if (caps.size) {
-			info.flags |= VFIO_REGION_INFO_FLAG_CAPS;
-			if (info.argsz < sizeof(info) + caps.size) {
-				info.argsz = sizeof(info) + caps.size;
-				info.cap_offset = 0;
-			} else {
-				vfio_info_cap_shift(&caps, sizeof(info));
-				if (copy_to_user((void __user *)arg +
-						  sizeof(info), caps.buf,
-						  caps.size)) {
-					kfree(caps.buf);
-					return -EFAULT;
-				}
-				info.cap_offset = sizeof(info);
-			}
-
-			kfree(caps.buf);
-		}
-
-		return copy_to_user((void __user *)arg, &info, minsz) ?
-			-EFAULT : 0;
-
-	} else if (cmd == VFIO_DEVICE_GET_IRQ_INFO) {
-		struct vfio_irq_info info;
-
-		minsz = offsetofend(struct vfio_irq_info, count);
-
-		if (copy_from_user(&info, (void __user *)arg, minsz))
-			return -EFAULT;
-
-		if (info.argsz < minsz || info.index >= VFIO_PCI_NUM_IRQS)
-			return -EINVAL;
-
-		switch (info.index) {
-		case VFIO_PCI_INTX_IRQ_INDEX ... VFIO_PCI_MSIX_IRQ_INDEX:
-		case VFIO_PCI_REQ_IRQ_INDEX:
-			break;
-		case VFIO_PCI_ERR_IRQ_INDEX:
-			if (pci_is_pcie(vdev->pdev))
-				break;
-			fallthrough;
-		default:
-			return -EINVAL;
-		}
-
-		info.flags = VFIO_IRQ_INFO_EVENTFD;
-
-		info.count = vfio_pci_get_irq_count(vdev, info.index);
-
-		if (info.index == VFIO_PCI_INTX_IRQ_INDEX)
-			info.flags |= (VFIO_IRQ_INFO_MASKABLE |
-				       VFIO_IRQ_INFO_AUTOMASKED);
-		else
-			info.flags |= VFIO_IRQ_INFO_NORESIZE;
-
-		return copy_to_user((void __user *)arg, &info, minsz) ?
-			-EFAULT : 0;
-
-	} else if (cmd == VFIO_DEVICE_SET_IRQS) {
-		struct vfio_irq_set hdr;
-		u8 *data = NULL;
-		int max, ret = 0;
-		size_t data_size = 0;
-
-		minsz = offsetofend(struct vfio_irq_set, count);
-
-		if (copy_from_user(&hdr, (void __user *)arg, minsz))
-			return -EFAULT;
-
-		max = vfio_pci_get_irq_count(vdev, hdr.index);
-
-		ret = vfio_set_irqs_validate_and_prepare(&hdr, max,
-						 VFIO_PCI_NUM_IRQS, &data_size);
-		if (ret)
+			vfio_pci_core_disable(vdev);
 			return ret;
-
-		if (data_size) {
-			data = memdup_user((void __user *)(arg + minsz),
-					    data_size);
-			if (IS_ERR(data))
-				return PTR_ERR(data);
 		}
-
-		mutex_lock(&vdev->igate);
-
-		ret = vfio_pci_set_irqs_ioctl(vdev, hdr.flags, hdr.index,
-					      hdr.start, hdr.count, data);
-
-		mutex_unlock(&vdev->igate);
-		kfree(data);
-
-		return ret;
-
-	} else if (cmd == VFIO_DEVICE_RESET) {
-		int ret;
-
-		if (!vdev->reset_works)
-			return -EINVAL;
-
-		vfio_pci_zap_and_down_write_memory_lock(vdev);
-		ret = pci_try_reset_function(vdev->pdev);
-		up_write(&vdev->memory_lock);
-
-		return ret;
-
-	} else if (cmd == VFIO_DEVICE_GET_PCI_HOT_RESET_INFO) {
-		struct vfio_pci_hot_reset_info hdr;
-		struct vfio_pci_fill_info fill = { 0 };
-		struct vfio_pci_dependent_device *devices = NULL;
-		bool slot = false;
-		int ret = 0;
-
-		minsz = offsetofend(struct vfio_pci_hot_reset_info, count);
-
-		if (copy_from_user(&hdr, (void __user *)arg, minsz))
-			return -EFAULT;
-
-		if (hdr.argsz < minsz)
-			return -EINVAL;
-
-		hdr.flags = 0;
-
-		/* Can we do a slot or bus reset or neither? */
-		if (!pci_probe_reset_slot(vdev->pdev->slot))
-			slot = true;
-		else if (pci_probe_reset_bus(vdev->pdev->bus))
-			return -ENODEV;
-
-		/* How many devices are affected? */
-		ret = vfio_pci_for_each_slot_or_bus(vdev->pdev,
-						    vfio_pci_count_devs,
-						    &fill.max, slot);
-		if (ret)
-			return ret;
-
-		WARN_ON(!fill.max); /* Should always be at least one */
-
-		/*
-		 * If there's enough space, fill it now, otherwise return
-		 * -ENOSPC and the number of devices affected.
-		 */
-		if (hdr.argsz < sizeof(hdr) + (fill.max * sizeof(*devices))) {
-			ret = -ENOSPC;
-			hdr.count = fill.max;
-			goto reset_info_exit;
-		}
-
-		devices = kcalloc(fill.max, sizeof(*devices), GFP_KERNEL);
-		if (!devices)
-			return -ENOMEM;
-
-		fill.devices = devices;
-
-		ret = vfio_pci_for_each_slot_or_bus(vdev->pdev,
-						    vfio_pci_fill_devs,
-						    &fill, slot);
-
-		/*
-		 * If a device was removed between counting and filling,
-		 * we may come up short of fill.max.  If a device was
-		 * added, we'll have a return of -EAGAIN above.
-		 */
-		if (!ret)
-			hdr.count = fill.cur;
-
-reset_info_exit:
-		if (copy_to_user((void __user *)arg, &hdr, minsz))
-			ret = -EFAULT;
-
-		if (!ret) {
-			if (copy_to_user((void __user *)(arg + minsz), devices,
-					 hdr.count * sizeof(*devices)))
-				ret = -EFAULT;
-		}
-
-		kfree(devices);
-		return ret;
-
-	} else if (cmd == VFIO_DEVICE_PCI_HOT_RESET) {
-		struct vfio_pci_hot_reset hdr;
-		int32_t *group_fds;
-		struct vfio_pci_group_entry *groups;
-		struct vfio_pci_group_info info;
-		struct vfio_devices devs = { .cur_index = 0 };
-		bool slot = false;
-		int i, group_idx, mem_idx = 0, count = 0, ret = 0;
-
-		minsz = offsetofend(struct vfio_pci_hot_reset, count);
-
-		if (copy_from_user(&hdr, (void __user *)arg, minsz))
-			return -EFAULT;
-
-		if (hdr.argsz < minsz || hdr.flags)
-			return -EINVAL;
-
-		/* Can we do a slot or bus reset or neither? */
-		if (!pci_probe_reset_slot(vdev->pdev->slot))
-			slot = true;
-		else if (pci_probe_reset_bus(vdev->pdev->bus))
-			return -ENODEV;
-
-		/*
-		 * We can't let userspace give us an arbitrarily large
-		 * buffer to copy, so verify how many we think there
-		 * could be.  Note groups can have multiple devices so
-		 * one group per device is the max.
-		 */
-		ret = vfio_pci_for_each_slot_or_bus(vdev->pdev,
-						    vfio_pci_count_devs,
-						    &count, slot);
-		if (ret)
-			return ret;
-
-		/* Somewhere between 1 and count is OK */
-		if (!hdr.count || hdr.count > count)
-			return -EINVAL;
-
-		group_fds = kcalloc(hdr.count, sizeof(*group_fds), GFP_KERNEL);
-		groups = kcalloc(hdr.count, sizeof(*groups), GFP_KERNEL);
-		if (!group_fds || !groups) {
-			kfree(group_fds);
-			kfree(groups);
-			return -ENOMEM;
-		}
-
-		if (copy_from_user(group_fds, (void __user *)(arg + minsz),
-				   hdr.count * sizeof(*group_fds))) {
-			kfree(group_fds);
-			kfree(groups);
-			return -EFAULT;
-		}
-
-		/*
-		 * For each group_fd, get the group through the vfio external
-		 * user interface and store the group and iommu ID.  This
-		 * ensures the group is held across the reset.
-		 */
-		for (group_idx = 0; group_idx < hdr.count; group_idx++) {
-			struct vfio_group *group;
-			struct fd f = fdget(group_fds[group_idx]);
-			if (!f.file) {
-				ret = -EBADF;
-				break;
-			}
-
-			group = vfio_group_get_external_user(f.file);
-			fdput(f);
-			if (IS_ERR(group)) {
-				ret = PTR_ERR(group);
-				break;
-			}
-
-			groups[group_idx].group = group;
-			groups[group_idx].id =
-					vfio_external_user_iommu_id(group);
-		}
-
-		kfree(group_fds);
-
-		/* release reference to groups on error */
-		if (ret)
-			goto hot_reset_release;
-
-		info.count = hdr.count;
-		info.groups = groups;
-
-		/*
-		 * Test whether all the affected devices are contained
-		 * by the set of groups provided by the user.
-		 */
-		ret = vfio_pci_for_each_slot_or_bus(vdev->pdev,
-						    vfio_pci_validate_devs,
-						    &info, slot);
-		if (ret)
-			goto hot_reset_release;
-
-		devs.max_index = count;
-		devs.devices = kcalloc(count, sizeof(struct vfio_device *),
-				       GFP_KERNEL);
-		if (!devs.devices) {
-			ret = -ENOMEM;
-			goto hot_reset_release;
-		}
-
-		/*
-		 * We need to get memory_lock for each device, but devices
-		 * can share mmap_lock, therefore we need to zap and hold
-		 * the vma_lock for each device, and only then get each
-		 * memory_lock.
-		 */
-		ret = vfio_pci_for_each_slot_or_bus(vdev->pdev,
-					    vfio_pci_try_zap_and_vma_lock_cb,
-					    &devs, slot);
-		if (ret)
-			goto hot_reset_release;
-
-		for (; mem_idx < devs.cur_index; mem_idx++) {
-			struct vfio_pci_device *tmp = devs.devices[mem_idx];
-
-			ret = down_write_trylock(&tmp->memory_lock);
-			if (!ret) {
-				ret = -EBUSY;
-				goto hot_reset_release;
-			}
-			mutex_unlock(&tmp->vma_lock);
-		}
-
-		/* User has access, do the reset */
-		ret = pci_reset_bus(vdev->pdev);
-
-hot_reset_release:
-		for (i = 0; i < devs.cur_index; i++) {
-			struct vfio_pci_device *tmp = devs.devices[i];
-
-			if (i < mem_idx)
-				up_write(&tmp->memory_lock);
-			else
-				mutex_unlock(&tmp->vma_lock);
-			vfio_device_put(&tmp->vdev);
-		}
-		kfree(devs.devices);
-
-		for (group_idx--; group_idx >= 0; group_idx--)
-			vfio_group_put_external_user(groups[group_idx].group);
-
-		kfree(groups);
-		return ret;
-	} else if (cmd == VFIO_DEVICE_IOEVENTFD) {
-		struct vfio_device_ioeventfd ioeventfd;
-		int count;
-
-		minsz = offsetofend(struct vfio_device_ioeventfd, fd);
-
-		if (copy_from_user(&ioeventfd, (void __user *)arg, minsz))
-			return -EFAULT;
-
-		if (ioeventfd.argsz < minsz)
-			return -EINVAL;
-
-		if (ioeventfd.flags & ~VFIO_DEVICE_IOEVENTFD_SIZE_MASK)
-			return -EINVAL;
-
-		count = ioeventfd.flags & VFIO_DEVICE_IOEVENTFD_SIZE_MASK;
-
-		if (hweight8(count) != 1 || ioeventfd.fd < -1)
-			return -EINVAL;
-
-		return vfio_pci_ioeventfd(vdev, ioeventfd.offset,
-					  ioeventfd.data, count, ioeventfd.fd);
-	} else if (cmd == VFIO_DEVICE_FEATURE) {
-		struct vfio_device_feature feature;
-		uuid_t uuid;
-
-		minsz = offsetofend(struct vfio_device_feature, flags);
-
-		if (copy_from_user(&feature, (void __user *)arg, minsz))
-			return -EFAULT;
-
-		if (feature.argsz < minsz)
-			return -EINVAL;
-
-		/* Check unknown flags */
-		if (feature.flags & ~(VFIO_DEVICE_FEATURE_MASK |
-				      VFIO_DEVICE_FEATURE_SET |
-				      VFIO_DEVICE_FEATURE_GET |
-				      VFIO_DEVICE_FEATURE_PROBE))
-			return -EINVAL;
-
-		/* GET & SET are mutually exclusive except with PROBE */
-		if (!(feature.flags & VFIO_DEVICE_FEATURE_PROBE) &&
-		    (feature.flags & VFIO_DEVICE_FEATURE_SET) &&
-		    (feature.flags & VFIO_DEVICE_FEATURE_GET))
-			return -EINVAL;
-
-		switch (feature.flags & VFIO_DEVICE_FEATURE_MASK) {
-		case VFIO_DEVICE_FEATURE_PCI_VF_TOKEN:
-			if (!vdev->vf_token)
-				return -ENOTTY;
-
-			/*
-			 * We do not support GET of the VF Token UUID as this
-			 * could expose the token of the previous device user.
-			 */
-			if (feature.flags & VFIO_DEVICE_FEATURE_GET)
-				return -EINVAL;
-
-			if (feature.flags & VFIO_DEVICE_FEATURE_PROBE)
-				return 0;
-
-			/* Don't SET unless told to do so */
-			if (!(feature.flags & VFIO_DEVICE_FEATURE_SET))
-				return -EINVAL;
-
-			if (feature.argsz < minsz + sizeof(uuid))
-				return -EINVAL;
-
-			if (copy_from_user(&uuid, (void __user *)(arg + minsz),
-					   sizeof(uuid)))
-				return -EFAULT;
-
-			mutex_lock(&vdev->vf_token->lock);
-			uuid_copy(&vdev->vf_token->uuid, &uuid);
-			mutex_unlock(&vdev->vf_token->lock);
-
-			return 0;
-		default:
-			return -ENOTTY;
-		}
-	}
-
-	return -ENOTTY;
-}
-
-static ssize_t vfio_pci_rw(struct vfio_pci_device *vdev, char __user *buf,
-			   size_t count, loff_t *ppos, bool iswrite)
-{
-	unsigned int index = VFIO_PCI_OFFSET_TO_INDEX(*ppos);
-
-	if (index >= VFIO_PCI_NUM_REGIONS + vdev->num_regions)
-		return -EINVAL;
-
-	switch (index) {
-	case VFIO_PCI_CONFIG_REGION_INDEX:
-		return vfio_pci_config_rw(vdev, buf, count, ppos, iswrite);
-
-	case VFIO_PCI_ROM_REGION_INDEX:
-		if (iswrite)
-			return -EINVAL;
-		return vfio_pci_bar_rw(vdev, buf, count, ppos, false);
-
-	case VFIO_PCI_BAR0_REGION_INDEX ... VFIO_PCI_BAR5_REGION_INDEX:
-		return vfio_pci_bar_rw(vdev, buf, count, ppos, iswrite);
-
-	case VFIO_PCI_VGA_REGION_INDEX:
-		return vfio_pci_vga_rw(vdev, buf, count, ppos, iswrite);
-	default:
-		index -= VFIO_PCI_NUM_REGIONS;
-		return vdev->region[index].ops->rw(vdev, buf,
-						   count, ppos, iswrite);
-	}
-
-	return -EINVAL;
-}
-
-static ssize_t vfio_pci_read(struct vfio_device *core_vdev, char __user *buf,
-			     size_t count, loff_t *ppos)
-{
-	struct vfio_pci_device *vdev =
-		container_of(core_vdev, struct vfio_pci_device, vdev);
-
-	if (!count)
-		return 0;
-
-	return vfio_pci_rw(vdev, buf, count, ppos, false);
-}
-
-static ssize_t vfio_pci_write(struct vfio_device *core_vdev, const char __user *buf,
-			      size_t count, loff_t *ppos)
-{
-	struct vfio_pci_device *vdev =
-		container_of(core_vdev, struct vfio_pci_device, vdev);
-
-	if (!count)
-		return 0;
-
-	return vfio_pci_rw(vdev, (char __user *)buf, count, ppos, true);
-}
-
-/* Return 1 on zap and vma_lock acquired, 0 on contention (only with @try) */
-static int vfio_pci_zap_and_vma_lock(struct vfio_pci_device *vdev, bool try)
-{
-	struct vfio_pci_mmap_vma *mmap_vma, *tmp;
-
-	/*
-	 * Lock ordering:
-	 * vma_lock is nested under mmap_lock for vm_ops callback paths.
-	 * The memory_lock semaphore is used by both code paths calling
-	 * into this function to zap vmas and the vm_ops.fault callback
-	 * to protect the memory enable state of the device.
-	 *
-	 * When zapping vmas we need to maintain the mmap_lock => vma_lock
-	 * ordering, which requires using vma_lock to walk vma_list to
-	 * acquire an mm, then dropping vma_lock to get the mmap_lock and
-	 * reacquiring vma_lock.  This logic is derived from similar
-	 * requirements in uverbs_user_mmap_disassociate().
-	 *
-	 * mmap_lock must always be the top-level lock when it is taken.
-	 * Therefore we can only hold the memory_lock write lock when
-	 * vma_list is empty, as we'd need to take mmap_lock to clear
-	 * entries.  vma_list can only be guaranteed empty when holding
-	 * vma_lock, thus memory_lock is nested under vma_lock.
-	 *
-	 * This enables the vm_ops.fault callback to acquire vma_lock,
-	 * followed by memory_lock read lock, while already holding
-	 * mmap_lock without risk of deadlock.
-	 */
-	while (1) {
-		struct mm_struct *mm = NULL;
-
-		if (try) {
-			if (!mutex_trylock(&vdev->vma_lock))
-				return 0;
-		} else {
-			mutex_lock(&vdev->vma_lock);
-		}
-		while (!list_empty(&vdev->vma_list)) {
-			mmap_vma = list_first_entry(&vdev->vma_list,
-						    struct vfio_pci_mmap_vma,
-						    vma_next);
-			mm = mmap_vma->vma->vm_mm;
-			if (mmget_not_zero(mm))
-				break;
-
-			list_del(&mmap_vma->vma_next);
-			kfree(mmap_vma);
-			mm = NULL;
-		}
-		if (!mm)
-			return 1;
-		mutex_unlock(&vdev->vma_lock);
-
-		if (try) {
-			if (!mmap_read_trylock(mm)) {
-				mmput(mm);
-				return 0;
-			}
-		} else {
-			mmap_read_lock(mm);
-		}
-		if (try) {
-			if (!mutex_trylock(&vdev->vma_lock)) {
-				mmap_read_unlock(mm);
-				mmput(mm);
-				return 0;
-			}
-		} else {
-			mutex_lock(&vdev->vma_lock);
-		}
-		list_for_each_entry_safe(mmap_vma, tmp,
-					 &vdev->vma_list, vma_next) {
-			struct vm_area_struct *vma = mmap_vma->vma;
-
-			if (vma->vm_mm != mm)
-				continue;
-
-			list_del(&mmap_vma->vma_next);
-			kfree(mmap_vma);
-
-			zap_vma_ptes(vma, vma->vm_start,
-				     vma->vm_end - vma->vm_start);
-		}
-		mutex_unlock(&vdev->vma_lock);
-		mmap_read_unlock(mm);
-		mmput(mm);
-	}
-}
-
-void vfio_pci_zap_and_down_write_memory_lock(struct vfio_pci_device *vdev)
-{
-	vfio_pci_zap_and_vma_lock(vdev, false);
-	down_write(&vdev->memory_lock);
-	mutex_unlock(&vdev->vma_lock);
-}
-
-u16 vfio_pci_memory_lock_and_enable(struct vfio_pci_device *vdev)
-{
-	u16 cmd;
-
-	down_write(&vdev->memory_lock);
-	pci_read_config_word(vdev->pdev, PCI_COMMAND, &cmd);
-	if (!(cmd & PCI_COMMAND_MEMORY))
-		pci_write_config_word(vdev->pdev, PCI_COMMAND,
-				      cmd | PCI_COMMAND_MEMORY);
-
-	return cmd;
-}
-
-void vfio_pci_memory_unlock_and_restore(struct vfio_pci_device *vdev, u16 cmd)
-{
-	pci_write_config_word(vdev->pdev, PCI_COMMAND, cmd);
-	up_write(&vdev->memory_lock);
-}
-
-/* Caller holds vma_lock */
-static int __vfio_pci_add_vma(struct vfio_pci_device *vdev,
-			      struct vm_area_struct *vma)
-{
-	struct vfio_pci_mmap_vma *mmap_vma;
-
-	mmap_vma = kmalloc(sizeof(*mmap_vma), GFP_KERNEL);
-	if (!mmap_vma)
-		return -ENOMEM;
-
-	mmap_vma->vma = vma;
-	list_add(&mmap_vma->vma_next, &vdev->vma_list);
-
-	return 0;
-}
-
-/*
- * Zap mmaps on open so that we can fault them in on access and therefore
- * our vma_list only tracks mappings accessed since last zap.
- */
-static void vfio_pci_mmap_open(struct vm_area_struct *vma)
-{
-	zap_vma_ptes(vma, vma->vm_start, vma->vm_end - vma->vm_start);
-}
-
-static void vfio_pci_mmap_close(struct vm_area_struct *vma)
-{
-	struct vfio_pci_device *vdev = vma->vm_private_data;
-	struct vfio_pci_mmap_vma *mmap_vma;
-
-	mutex_lock(&vdev->vma_lock);
-	list_for_each_entry(mmap_vma, &vdev->vma_list, vma_next) {
-		if (mmap_vma->vma == vma) {
-			list_del(&mmap_vma->vma_next);
-			kfree(mmap_vma);
-			break;
-		}
-	}
-	mutex_unlock(&vdev->vma_lock);
-}
-
-static vm_fault_t vfio_pci_mmap_fault(struct vm_fault *vmf)
-{
-	struct vm_area_struct *vma = vmf->vma;
-	struct vfio_pci_device *vdev = vma->vm_private_data;
-	struct vfio_pci_mmap_vma *mmap_vma;
-	vm_fault_t ret = VM_FAULT_NOPAGE;
-
-	mutex_lock(&vdev->vma_lock);
-	down_read(&vdev->memory_lock);
-
-	if (!__vfio_pci_memory_enabled(vdev)) {
-		ret = VM_FAULT_SIGBUS;
-		goto up_out;
-	}
-
-	/*
-	 * We populate the whole vma on fault, so we need to test whether
-	 * the vma has already been mapped, such as for concurrent faults
-	 * to the same vma.  io_remap_pfn_range() will trigger a BUG_ON if
-	 * we ask it to fill the same range again.
-	 */
-	list_for_each_entry(mmap_vma, &vdev->vma_list, vma_next) {
-		if (mmap_vma->vma == vma)
-			goto up_out;
-	}
-
-	if (io_remap_pfn_range(vma, vma->vm_start, vma->vm_pgoff,
-			       vma->vm_end - vma->vm_start,
-			       vma->vm_page_prot)) {
-		ret = VM_FAULT_SIGBUS;
-		zap_vma_ptes(vma, vma->vm_start, vma->vm_end - vma->vm_start);
-		goto up_out;
-	}
-
-	if (__vfio_pci_add_vma(vdev, vma)) {
-		ret = VM_FAULT_OOM;
-		zap_vma_ptes(vma, vma->vm_start, vma->vm_end - vma->vm_start);
-	}
-
-up_out:
-	up_read(&vdev->memory_lock);
-	mutex_unlock(&vdev->vma_lock);
-	return ret;
-}
-
-static const struct vm_operations_struct vfio_pci_mmap_ops = {
-	.open = vfio_pci_mmap_open,
-	.close = vfio_pci_mmap_close,
-	.fault = vfio_pci_mmap_fault,
-};
-
-static int vfio_pci_mmap(struct vfio_device *core_vdev, struct vm_area_struct *vma)
-{
-	struct vfio_pci_device *vdev =
-		container_of(core_vdev, struct vfio_pci_device, vdev);
-	struct pci_dev *pdev = vdev->pdev;
-	unsigned int index;
-	u64 phys_len, req_len, pgoff, req_start;
-	int ret;
-
-	index = vma->vm_pgoff >> (VFIO_PCI_OFFSET_SHIFT - PAGE_SHIFT);
-
-	if (index >= VFIO_PCI_NUM_REGIONS + vdev->num_regions)
-		return -EINVAL;
-	if (vma->vm_end < vma->vm_start)
-		return -EINVAL;
-	if ((vma->vm_flags & VM_SHARED) == 0)
-		return -EINVAL;
-	if (index >= VFIO_PCI_NUM_REGIONS) {
-		int regnum = index - VFIO_PCI_NUM_REGIONS;
-		struct vfio_pci_region *region = vdev->region + regnum;
-
-		if (region->ops && region->ops->mmap &&
-		    (region->flags & VFIO_REGION_INFO_FLAG_MMAP))
-			return region->ops->mmap(vdev, region, vma);
-		return -EINVAL;
 	}
-	if (index >= VFIO_PCI_ROM_REGION_INDEX)
-		return -EINVAL;
-	if (!vdev->bar_mmap_supported[index])
-		return -EINVAL;
 
-	phys_len = PAGE_ALIGN(pci_resource_len(pdev, index));
-	req_len = vma->vm_end - vma->vm_start;
-	pgoff = vma->vm_pgoff &
-		((1U << (VFIO_PCI_OFFSET_SHIFT - PAGE_SHIFT)) - 1);
-	req_start = pgoff << PAGE_SHIFT;
-
-	if (req_start + req_len > phys_len)
-		return -EINVAL;
-
-	/*
-	 * Even though we don't make use of the barmap for the mmap,
-	 * we need to request the region and the barmap tracks that.
-	 */
-	if (!vdev->barmap[index]) {
-		ret = pci_request_selected_regions(pdev,
-						   1 << index, "vfio-pci");
-		if (ret)
-			return ret;
-
-		vdev->barmap[index] = pci_iomap(pdev, index, 0);
-		if (!vdev->barmap[index]) {
-			pci_release_selected_regions(pdev, 1 << index);
-			return -ENOMEM;
-		}
-	}
-
-	vma->vm_private_data = vdev;
-	vma->vm_page_prot = pgprot_noncached(vma->vm_page_prot);
-	vma->vm_pgoff = (pci_resource_start(pdev, index) >> PAGE_SHIFT) + pgoff;
-
-	/*
-	 * See remap_pfn_range(), called from vfio_pci_fault() but we can't
-	 * change vm_flags within the fault handler.  Set them now.
-	 */
-	vma->vm_flags |= VM_IO | VM_PFNMAP | VM_DONTEXPAND | VM_DONTDUMP;
-	vma->vm_ops = &vfio_pci_mmap_ops;
+	vfio_pci_core_finish_enable(vdev);
 
 	return 0;
 }
 
-static void vfio_pci_request(struct vfio_device *core_vdev, unsigned int count)
-{
-	struct vfio_pci_device *vdev =
-		container_of(core_vdev, struct vfio_pci_device, vdev);
-	struct pci_dev *pdev = vdev->pdev;
-
-	mutex_lock(&vdev->igate);
-
-	if (vdev->req_trigger) {
-		if (!(count % 10))
-			pci_notice_ratelimited(pdev,
-				"Relaying device request to user (#%u)\n",
-				count);
-		eventfd_signal(vdev->req_trigger, 1);
-	} else if (count == 0) {
-		pci_warn(pdev,
-			"No device request channel registered, blocked until released by user\n");
-	}
-
-	mutex_unlock(&vdev->igate);
-}
-
-static int vfio_pci_validate_vf_token(struct vfio_pci_device *vdev,
-				      bool vf_token, uuid_t *uuid)
-{
-	/*
-	 * There's always some degree of trust or collaboration between SR-IOV
-	 * PF and VFs, even if just that the PF hosts the SR-IOV capability and
-	 * can disrupt VFs with a reset, but often the PF has more explicit
-	 * access to deny service to the VF or access data passed through the
-	 * VF.  We therefore require an opt-in via a shared VF token (UUID) to
-	 * represent this trust.  This both prevents that a VF driver might
-	 * assume the PF driver is a trusted, in-kernel driver, and also that
-	 * a PF driver might be replaced with a rogue driver, unknown to in-use
-	 * VF drivers.
-	 *
-	 * Therefore when presented with a VF, if the PF is a vfio device and
-	 * it is bound to the vfio-pci driver, the user needs to provide a VF
-	 * token to access the device, in the form of appending a vf_token to
-	 * the device name, for example:
-	 *
-	 * "0000:04:10.0 vf_token=bd8d9d2b-5a5f-4f5a-a211-f591514ba1f3"
-	 *
-	 * When presented with a PF which has VFs in use, the user must also
-	 * provide the current VF token to prove collaboration with existing
-	 * VF users.  If VFs are not in use, the VF token provided for the PF
-	 * device will act to set the VF token.
-	 *
-	 * If the VF token is provided but unused, an error is generated.
-	 */
-	if (!vdev->pdev->is_virtfn && !vdev->vf_token && !vf_token)
-		return 0; /* No VF token provided or required */
-
-	if (vdev->pdev->is_virtfn) {
-		struct vfio_pci_device *pf_vdev = get_pf_vdev(vdev);
-		bool match;
-
-		if (!pf_vdev) {
-			if (!vf_token)
-				return 0; /* PF is not vfio-pci, no VF token */
-
-			pci_info_ratelimited(vdev->pdev,
-				"VF token incorrectly provided, PF not bound to vfio-pci\n");
-			return -EINVAL;
-		}
-
-		if (!vf_token) {
-			vfio_device_put(&pf_vdev->vdev);
-			pci_info_ratelimited(vdev->pdev,
-				"VF token required to access device\n");
-			return -EACCES;
-		}
-
-		mutex_lock(&pf_vdev->vf_token->lock);
-		match = uuid_equal(uuid, &pf_vdev->vf_token->uuid);
-		mutex_unlock(&pf_vdev->vf_token->lock);
-
-		vfio_device_put(&pf_vdev->vdev);
-
-		if (!match) {
-			pci_info_ratelimited(vdev->pdev,
-				"Incorrect VF token provided for device\n");
-			return -EACCES;
-		}
-	} else if (vdev->vf_token) {
-		mutex_lock(&vdev->vf_token->lock);
-		if (vdev->vf_token->users) {
-			if (!vf_token) {
-				mutex_unlock(&vdev->vf_token->lock);
-				pci_info_ratelimited(vdev->pdev,
-					"VF token required to access device\n");
-				return -EACCES;
-			}
-
-			if (!uuid_equal(uuid, &vdev->vf_token->uuid)) {
-				mutex_unlock(&vdev->vf_token->lock);
-				pci_info_ratelimited(vdev->pdev,
-					"Incorrect VF token provided for device\n");
-				return -EACCES;
-			}
-		} else if (vf_token) {
-			uuid_copy(&vdev->vf_token->uuid, uuid);
-		}
-
-		mutex_unlock(&vdev->vf_token->lock);
-	} else if (vf_token) {
-		pci_info_ratelimited(vdev->pdev,
-			"VF token incorrectly provided, not a PF or VF\n");
-		return -EINVAL;
-	}
-
-	return 0;
-}
-
-#define VF_TOKEN_ARG "vf_token="
-
-static int vfio_pci_match(struct vfio_device *core_vdev, char *buf)
-{
-	struct vfio_pci_device *vdev =
-		container_of(core_vdev, struct vfio_pci_device, vdev);
-	bool vf_token = false;
-	uuid_t uuid;
-	int ret;
-
-	if (strncmp(pci_name(vdev->pdev), buf, strlen(pci_name(vdev->pdev))))
-		return 0; /* No match */
-
-	if (strlen(buf) > strlen(pci_name(vdev->pdev))) {
-		buf += strlen(pci_name(vdev->pdev));
-
-		if (*buf != ' ')
-			return 0; /* No match: non-whitespace after name */
-
-		while (*buf) {
-			if (*buf == ' ') {
-				buf++;
-				continue;
-			}
-
-			if (!vf_token && !strncmp(buf, VF_TOKEN_ARG,
-						  strlen(VF_TOKEN_ARG))) {
-				buf += strlen(VF_TOKEN_ARG);
-
-				if (strlen(buf) < UUID_STRING_LEN)
-					return -EINVAL;
-
-				ret = uuid_parse(buf, &uuid);
-				if (ret)
-					return ret;
-
-				vf_token = true;
-				buf += UUID_STRING_LEN;
-			} else {
-				/* Unknown/duplicate option */
-				return -EINVAL;
-			}
-		}
-	}
-
-	ret = vfio_pci_validate_vf_token(vdev, vf_token, &uuid);
-	if (ret)
-		return ret;
-
-	return 1; /* Match */
-}
-
 static const struct vfio_device_ops vfio_pci_ops = {
 	.name		= "vfio-pci",
-	.open		= vfio_pci_open,
-	.release	= vfio_pci_release,
-	.ioctl		= vfio_pci_ioctl,
-	.read		= vfio_pci_read,
-	.write		= vfio_pci_write,
-	.mmap		= vfio_pci_mmap,
-	.request	= vfio_pci_request,
-	.match		= vfio_pci_match,
+	.open_device	= vfio_pci_open_device,
+	.close_device	= vfio_pci_core_close_device,
+	.ioctl		= vfio_pci_core_ioctl,
+	.read		= vfio_pci_core_read,
+	.write		= vfio_pci_core_write,
+	.mmap		= vfio_pci_core_mmap,
+	.request	= vfio_pci_core_request,
+	.match		= vfio_pci_core_match,
 };
 
-static int vfio_pci_reflck_attach(struct vfio_pci_device *vdev);
-static void vfio_pci_reflck_put(struct vfio_pci_reflck *reflck);
-
-static int vfio_pci_bus_notifier(struct notifier_block *nb,
-				 unsigned long action, void *data)
-{
-	struct vfio_pci_device *vdev = container_of(nb,
-						    struct vfio_pci_device, nb);
-	struct device *dev = data;
-	struct pci_dev *pdev = to_pci_dev(dev);
-	struct pci_dev *physfn = pci_physfn(pdev);
-
-	if (action == BUS_NOTIFY_ADD_DEVICE &&
-	    pdev->is_virtfn && physfn == vdev->pdev) {
-		pci_info(vdev->pdev, "Captured SR-IOV VF %s driver_override\n",
-			 pci_name(pdev));
-		pdev->driver_override = kasprintf(GFP_KERNEL, "%s",
-						  vfio_pci_ops.name);
-	} else if (action == BUS_NOTIFY_BOUND_DRIVER &&
-		   pdev->is_virtfn && physfn == vdev->pdev) {
-		struct pci_driver *drv = pci_dev_driver(pdev);
-
-		if (drv && drv != &vfio_pci_driver)
-			pci_warn(vdev->pdev,
-				 "VF %s bound to driver %s while PF bound to vfio-pci\n",
-				 pci_name(pdev), drv->name);
-	}
-
-	return 0;
-}
-
-static int vfio_pci_vf_init(struct vfio_pci_device *vdev)
-{
-	struct pci_dev *pdev = vdev->pdev;
-	int ret;
-
-	if (!pdev->is_physfn)
-		return 0;
-
-	vdev->vf_token = kzalloc(sizeof(*vdev->vf_token), GFP_KERNEL);
-	if (!vdev->vf_token)
-		return -ENOMEM;
-
-	mutex_init(&vdev->vf_token->lock);
-	uuid_gen(&vdev->vf_token->uuid);
-
-	vdev->nb.notifier_call = vfio_pci_bus_notifier;
-	ret = bus_register_notifier(&pci_bus_type, &vdev->nb);
-	if (ret) {
-		kfree(vdev->vf_token);
-		return ret;
-	}
-	return 0;
-}
-
-static void vfio_pci_vf_uninit(struct vfio_pci_device *vdev)
-{
-	if (!vdev->vf_token)
-		return;
-
-	bus_unregister_notifier(&pci_bus_type, &vdev->nb);
-	WARN_ON(vdev->vf_token->users);
-	mutex_destroy(&vdev->vf_token->lock);
-	kfree(vdev->vf_token);
-}
-
-static int vfio_pci_vga_init(struct vfio_pci_device *vdev)
-{
-	struct pci_dev *pdev = vdev->pdev;
-	int ret;
-
-	if (!vfio_pci_is_vga(pdev))
-		return 0;
-
-	ret = vga_client_register(pdev, vfio_pci_set_decode);
-	if (ret)
-		return ret;
-	vga_set_legacy_decoding(pdev, vfio_pci_set_decode(pdev, false));
-	return 0;
-}
-
-static void vfio_pci_vga_uninit(struct vfio_pci_device *vdev)
-{
-	struct pci_dev *pdev = vdev->pdev;
-
-	if (!vfio_pci_is_vga(pdev))
-		return;
-	vga_client_unregister(pdev);
-	vga_set_legacy_decoding(pdev, VGA_RSRC_NORMAL_IO | VGA_RSRC_NORMAL_MEM |
-					      VGA_RSRC_LEGACY_IO |
-					      VGA_RSRC_LEGACY_MEM);
-}
-
 static int vfio_pci_probe(struct pci_dev *pdev, const struct pci_device_id *id)
 {
-	struct vfio_pci_device *vdev;
-	struct iommu_group *group;
+	struct vfio_pci_core_device *vdev;
 	int ret;
 
 	if (vfio_pci_is_denylisted(pdev))
 		return -EINVAL;
 
-	if (pdev->hdr_type != PCI_HEADER_TYPE_NORMAL)
-		return -EINVAL;
-
-	/*
-	 * Prevent binding to PFs with VFs enabled, the VFs might be in use
-	 * by the host or other users.  We cannot capture the VFs if they
-	 * already exist, nor can we track VF users.  Disabling SR-IOV here
-	 * would initiate removing the VFs, which would unbind the driver,
-	 * which is prone to blocking if that VF is also in use by vfio-pci.
-	 * Just reject these PFs and let the user sort it out.
-	 */
-	if (pci_num_vf(pdev)) {
-		pci_warn(pdev, "Cannot bind to PF with SR-IOV enabled\n");
-		return -EBUSY;
-	}
-
-	group = vfio_iommu_group_get(&pdev->dev);
-	if (!group)
-		return -EINVAL;
-
 	vdev = kzalloc(sizeof(*vdev), GFP_KERNEL);
-	if (!vdev) {
-		ret = -ENOMEM;
-		goto out_group_put;
-	}
-
-	vfio_init_group_dev(&vdev->vdev, &pdev->dev, &vfio_pci_ops);
-	vdev->pdev = pdev;
-	vdev->irq_type = VFIO_PCI_NUM_IRQS;
-	mutex_init(&vdev->igate);
-	spin_lock_init(&vdev->irqlock);
-	mutex_init(&vdev->ioeventfds_lock);
-	INIT_LIST_HEAD(&vdev->dummy_resources_list);
-	INIT_LIST_HEAD(&vdev->ioeventfds_list);
-	mutex_init(&vdev->vma_lock);
-	INIT_LIST_HEAD(&vdev->vma_list);
-	init_rwsem(&vdev->memory_lock);
+	if (!vdev)
+		return -ENOMEM;
+	vfio_pci_core_init_device(vdev, pdev, &vfio_pci_ops);
 
-	ret = vfio_pci_reflck_attach(vdev);
+	ret = vfio_pci_core_register_device(vdev);
 	if (ret)
 		goto out_free;
-	ret = vfio_pci_vf_init(vdev);
-	if (ret)
-		goto out_reflck;
-	ret = vfio_pci_vga_init(vdev);
-	if (ret)
-		goto out_vf;
-
-	vfio_pci_probe_power_state(vdev);
-
-	if (!disable_idle_d3) {
-		/*
-		 * pci-core sets the device power state to an unknown value at
-		 * bootup and after being removed from a driver.  The only
-		 * transition it allows from this unknown state is to D0, which
-		 * typically happens when a driver calls pci_enable_device().
-		 * We're not ready to enable the device yet, but we do want to
-		 * be able to get to D3.  Therefore first do a D0 transition
-		 * before going to D3.
-		 */
-		vfio_pci_set_power_state(vdev, PCI_D0);
-		vfio_pci_set_power_state(vdev, PCI_D3hot);
-	}
-
-	ret = vfio_register_group_dev(&vdev->vdev);
-	if (ret)
-		goto out_power;
 	dev_set_drvdata(&pdev->dev, vdev);
 	return 0;
 
-out_power:
-	if (!disable_idle_d3)
-		vfio_pci_set_power_state(vdev, PCI_D0);
-out_vf:
-	vfio_pci_vf_uninit(vdev);
-out_reflck:
-	vfio_pci_reflck_put(vdev->reflck);
 out_free:
-	kfree(vdev->pm_save);
+	vfio_pci_core_uninit_device(vdev);
 	kfree(vdev);
-out_group_put:
-	vfio_iommu_group_put(group, &pdev->dev);
 	return ret;
 }
 
 static void vfio_pci_remove(struct pci_dev *pdev)
 {
-	struct vfio_pci_device *vdev = dev_get_drvdata(&pdev->dev);
-
-	pci_disable_sriov(pdev);
+	struct vfio_pci_core_device *vdev = dev_get_drvdata(&pdev->dev);
 
-	vfio_unregister_group_dev(&vdev->vdev);
-
-	vfio_pci_vf_uninit(vdev);
-	vfio_pci_reflck_put(vdev->reflck);
-	vfio_pci_vga_uninit(vdev);
-
-	vfio_iommu_group_put(pdev->dev.iommu_group, &pdev->dev);
-
-	if (!disable_idle_d3)
-		vfio_pci_set_power_state(vdev, PCI_D0);
-
-	mutex_destroy(&vdev->ioeventfds_lock);
-	kfree(vdev->region);
-	kfree(vdev->pm_save);
+	vfio_pci_core_unregister_device(vdev);
+	vfio_pci_core_uninit_device(vdev);
 	kfree(vdev);
 }
 
-static pci_ers_result_t vfio_pci_aer_err_detected(struct pci_dev *pdev,
-						  pci_channel_state_t state)
-{
-	struct vfio_pci_device *vdev;
-	struct vfio_device *device;
-
-	device = vfio_device_get_from_dev(&pdev->dev);
-	if (device == NULL)
-		return PCI_ERS_RESULT_DISCONNECT;
-
-	vdev = container_of(device, struct vfio_pci_device, vdev);
-
-	mutex_lock(&vdev->igate);
-
-	if (vdev->err_trigger)
-		eventfd_signal(vdev->err_trigger, 1);
-
-	mutex_unlock(&vdev->igate);
-
-	vfio_device_put(device);
-
-	return PCI_ERS_RESULT_CAN_RECOVER;
-}
-
 static int vfio_pci_sriov_configure(struct pci_dev *pdev, int nr_virtfn)
 {
-	struct vfio_device *device;
-	int ret = 0;
-
-	might_sleep();
-
 	if (!enable_sriov)
 		return -ENOENT;
 
-	device = vfio_device_get_from_dev(&pdev->dev);
-	if (!device)
-		return -ENODEV;
-
-	if (nr_virtfn == 0)
-		pci_disable_sriov(pdev);
-	else
-		ret = pci_enable_sriov(pdev, nr_virtfn);
-
-	vfio_device_put(device);
-
-	return ret < 0 ? ret : nr_virtfn;
+	return vfio_pci_core_sriov_configure(pdev, nr_virtfn);
 }
 
-static const struct pci_error_handlers vfio_err_handlers = {
-	.error_detected = vfio_pci_aer_err_detected,
+static const struct pci_device_id vfio_pci_table[] = {
+	{ PCI_DRIVER_OVERRIDE_DEVICE_VFIO(PCI_ANY_ID, PCI_ANY_ID) }, /* match all by default */
+	{}
 };
 
+MODULE_DEVICE_TABLE(pci, vfio_pci_table);
+
 static struct pci_driver vfio_pci_driver = {
 	.name			= "vfio-pci",
-	.id_table		= NULL, /* only dynamic ids */
+	.id_table		= vfio_pci_table,
 	.probe			= vfio_pci_probe,
 	.remove			= vfio_pci_remove,
 	.sriov_configure	= vfio_pci_sriov_configure,
-	.err_handler		= &vfio_err_handlers,
+	.err_handler		= &vfio_pci_core_err_handlers,
 };
 
-static DEFINE_MUTEX(reflck_lock);
-
-static struct vfio_pci_reflck *vfio_pci_reflck_alloc(void)
-{
-	struct vfio_pci_reflck *reflck;
-
-	reflck = kzalloc(sizeof(*reflck), GFP_KERNEL);
-	if (!reflck)
-		return ERR_PTR(-ENOMEM);
-
-	kref_init(&reflck->kref);
-	mutex_init(&reflck->lock);
-
-	return reflck;
-}
-
-static void vfio_pci_reflck_get(struct vfio_pci_reflck *reflck)
-{
-	kref_get(&reflck->kref);
-}
-
-static int vfio_pci_reflck_find(struct pci_dev *pdev, void *data)
-{
-	struct vfio_pci_reflck **preflck = data;
-	struct vfio_device *device;
-	struct vfio_pci_device *vdev;
-
-	device = vfio_device_get_from_dev(&pdev->dev);
-	if (!device)
-		return 0;
-
-	if (pci_dev_driver(pdev) != &vfio_pci_driver) {
-		vfio_device_put(device);
-		return 0;
-	}
-
-	vdev = container_of(device, struct vfio_pci_device, vdev);
-
-	if (vdev->reflck) {
-		vfio_pci_reflck_get(vdev->reflck);
-		*preflck = vdev->reflck;
-		vfio_device_put(device);
-		return 1;
-	}
-
-	vfio_device_put(device);
-	return 0;
-}
-
-static int vfio_pci_reflck_attach(struct vfio_pci_device *vdev)
-{
-	bool slot = !pci_probe_reset_slot(vdev->pdev->slot);
-
-	mutex_lock(&reflck_lock);
-
-	if (pci_is_root_bus(vdev->pdev->bus) ||
-	    vfio_pci_for_each_slot_or_bus(vdev->pdev, vfio_pci_reflck_find,
-					  &vdev->reflck, slot) <= 0)
-		vdev->reflck = vfio_pci_reflck_alloc();
-
-	mutex_unlock(&reflck_lock);
-
-	return PTR_ERR_OR_ZERO(vdev->reflck);
-}
-
-static void vfio_pci_reflck_release(struct kref *kref)
-{
-	struct vfio_pci_reflck *reflck = container_of(kref,
-						      struct vfio_pci_reflck,
-						      kref);
-
-	kfree(reflck);
-	mutex_unlock(&reflck_lock);
-}
-
-static void vfio_pci_reflck_put(struct vfio_pci_reflck *reflck)
-{
-	kref_put_mutex(&reflck->kref, vfio_pci_reflck_release, &reflck_lock);
-}
-
-static int vfio_pci_get_unused_devs(struct pci_dev *pdev, void *data)
-{
-	struct vfio_devices *devs = data;
-	struct vfio_device *device;
-	struct vfio_pci_device *vdev;
-
-	if (devs->cur_index == devs->max_index)
-		return -ENOSPC;
-
-	device = vfio_device_get_from_dev(&pdev->dev);
-	if (!device)
-		return -EINVAL;
-
-	if (pci_dev_driver(pdev) != &vfio_pci_driver) {
-		vfio_device_put(device);
-		return -EBUSY;
-	}
-
-	vdev = container_of(device, struct vfio_pci_device, vdev);
-
-	/* Fault if the device is not unused */
-	if (vdev->refcnt) {
-		vfio_device_put(device);
-		return -EBUSY;
-	}
-
-	devs->devices[devs->cur_index++] = vdev;
-	return 0;
-}
-
-static int vfio_pci_try_zap_and_vma_lock_cb(struct pci_dev *pdev, void *data)
-{
-	struct vfio_devices *devs = data;
-	struct vfio_device *device;
-	struct vfio_pci_device *vdev;
-
-	if (devs->cur_index == devs->max_index)
-		return -ENOSPC;
-
-	device = vfio_device_get_from_dev(&pdev->dev);
-	if (!device)
-		return -EINVAL;
-
-	if (pci_dev_driver(pdev) != &vfio_pci_driver) {
-		vfio_device_put(device);
-		return -EBUSY;
-	}
-
-	vdev = container_of(device, struct vfio_pci_device, vdev);
-
-	/*
-	 * Locking multiple devices is prone to deadlock, runaway and
-	 * unwind if we hit contention.
-	 */
-	if (!vfio_pci_zap_and_vma_lock(vdev, true)) {
-		vfio_device_put(device);
-		return -EBUSY;
-	}
-
-	devs->devices[devs->cur_index++] = vdev;
-	return 0;
-}
-
-/*
- * If a bus or slot reset is available for the provided device and:
- *  - All of the devices affected by that bus or slot reset are unused
- *    (!refcnt)
- *  - At least one of the affected devices is marked dirty via
- *    needs_reset (such as by lack of FLR support)
- * Then attempt to perform that bus or slot reset.  Callers are required
- * to hold vdev->reflck->lock, protecting the bus/slot reset group from
- * concurrent opens.  A vfio_device reference is acquired for each device
- * to prevent unbinds during the reset operation.
- *
- * NB: vfio-core considers a group to be viable even if some devices are
- * bound to drivers like pci-stub or pcieport.  Here we require all devices
- * to be bound to vfio_pci since that's the only way we can be sure they
- * stay put.
- */
-static void vfio_pci_try_bus_reset(struct vfio_pci_device *vdev)
-{
-	struct vfio_devices devs = { .cur_index = 0 };
-	int i = 0, ret = -EINVAL;
-	bool slot = false;
-	struct vfio_pci_device *tmp;
-
-	if (!pci_probe_reset_slot(vdev->pdev->slot))
-		slot = true;
-	else if (pci_probe_reset_bus(vdev->pdev->bus))
-		return;
-
-	if (vfio_pci_for_each_slot_or_bus(vdev->pdev, vfio_pci_count_devs,
-					  &i, slot) || !i)
-		return;
-
-	devs.max_index = i;
-	devs.devices = kcalloc(i, sizeof(struct vfio_device *), GFP_KERNEL);
-	if (!devs.devices)
-		return;
-
-	if (vfio_pci_for_each_slot_or_bus(vdev->pdev,
-					  vfio_pci_get_unused_devs,
-					  &devs, slot))
-		goto put_devs;
-
-	/* Does at least one need a reset? */
-	for (i = 0; i < devs.cur_index; i++) {
-		tmp = devs.devices[i];
-		if (tmp->needs_reset) {
-			ret = pci_reset_bus(vdev->pdev);
-			break;
-		}
-	}
-
-put_devs:
-	for (i = 0; i < devs.cur_index; i++) {
-		tmp = devs.devices[i];
-
-		/*
-		 * If reset was successful, affected devices no longer need
-		 * a reset and we should return all the collateral devices
-		 * to low power.  If not successful, we either didn't reset
-		 * the bus or timed out waiting for it, so let's not touch
-		 * the power state.
-		 */
-		if (!ret) {
-			tmp->needs_reset = false;
-
-			if (tmp != vdev && !disable_idle_d3)
-				vfio_pci_set_power_state(tmp, PCI_D3hot);
-		}
-
-		vfio_device_put(&tmp->vdev);
-	}
-
-	kfree(devs.devices);
-}
-
-static void __exit vfio_pci_cleanup(void)
-{
-	pci_unregister_driver(&vfio_pci_driver);
-	vfio_pci_uninit_perm_bits();
-}
-
 static void __init vfio_pci_fill_ids(void)
 {
 	char *p, *id;
@@ -2418,16 +239,18 @@ static void __init vfio_pci_fill_ids(void)
 static int __init vfio_pci_init(void)
 {
 	int ret;
+	bool is_disable_vga = true;
 
-	/* Allocate shared config space permission data used by all devices */
-	ret = vfio_pci_init_perm_bits();
-	if (ret)
-		return ret;
+#ifdef CONFIG_VFIO_PCI_VGA
+	is_disable_vga = disable_vga;
+#endif
+
+	vfio_pci_core_set_params(nointxmask, is_disable_vga, disable_idle_d3);
 
 	/* Register and scan for devices */
 	ret = pci_register_driver(&vfio_pci_driver);
 	if (ret)
-		goto out_driver;
+		return ret;
 
 	vfio_pci_fill_ids();
 
@@ -2435,16 +258,15 @@ static int __init vfio_pci_init(void)
 		pr_warn("device denylist disabled.\n");
 
 	return 0;
-
-out_driver:
-	vfio_pci_uninit_perm_bits();
-	return ret;
 }
-
 module_init(vfio_pci_init);
+
+static void __exit vfio_pci_cleanup(void)
+{
+	pci_unregister_driver(&vfio_pci_driver);
+}
 module_exit(vfio_pci_cleanup);
 
-MODULE_VERSION(DRIVER_VERSION);
 MODULE_LICENSE("GPL v2");
 MODULE_AUTHOR(DRIVER_AUTHOR);
 MODULE_DESCRIPTION(DRIVER_DESC);
diff --git a/drivers/vfio/pci/vfio_pci_config.c b/drivers/vfio/pci/vfio_pci_config.c
index 70e28efbc51f..6e58b4bf7a60 100644
--- a/drivers/vfio/pci/vfio_pci_config.c
+++ b/drivers/vfio/pci/vfio_pci_config.c
@@ -26,7 +26,7 @@
 #include <linux/vfio.h>
 #include <linux/slab.h>
 
-#include "vfio_pci_private.h"
+#include <linux/vfio_pci_core.h>
 
 /* Fake capability ID for standard config space */
 #define PCI_CAP_ID_BASIC	0
@@ -108,9 +108,9 @@ static const u16 pci_ext_cap_length[PCI_EXT_CAP_ID_MAX + 1] = {
 struct perm_bits {
 	u8	*virt;		/* read/write virtual data, not hw */
 	u8	*write;		/* writeable bits */
-	int	(*readfn)(struct vfio_pci_device *vdev, int pos, int count,
+	int	(*readfn)(struct vfio_pci_core_device *vdev, int pos, int count,
 			  struct perm_bits *perm, int offset, __le32 *val);
-	int	(*writefn)(struct vfio_pci_device *vdev, int pos, int count,
+	int	(*writefn)(struct vfio_pci_core_device *vdev, int pos, int count,
 			   struct perm_bits *perm, int offset, __le32 val);
 };
 
@@ -171,7 +171,7 @@ static int vfio_user_config_write(struct pci_dev *pdev, int offset,
 	return ret;
 }
 
-static int vfio_default_config_read(struct vfio_pci_device *vdev, int pos,
+static int vfio_default_config_read(struct vfio_pci_core_device *vdev, int pos,
 				    int count, struct perm_bits *perm,
 				    int offset, __le32 *val)
 {
@@ -197,7 +197,7 @@ static int vfio_default_config_read(struct vfio_pci_device *vdev, int pos,
 	return count;
 }
 
-static int vfio_default_config_write(struct vfio_pci_device *vdev, int pos,
+static int vfio_default_config_write(struct vfio_pci_core_device *vdev, int pos,
 				     int count, struct perm_bits *perm,
 				     int offset, __le32 val)
 {
@@ -244,7 +244,7 @@ static int vfio_default_config_write(struct vfio_pci_device *vdev, int pos,
 }
 
 /* Allow direct read from hardware, except for capability next pointer */
-static int vfio_direct_config_read(struct vfio_pci_device *vdev, int pos,
+static int vfio_direct_config_read(struct vfio_pci_core_device *vdev, int pos,
 				   int count, struct perm_bits *perm,
 				   int offset, __le32 *val)
 {
@@ -269,7 +269,7 @@ static int vfio_direct_config_read(struct vfio_pci_device *vdev, int pos,
 }
 
 /* Raw access skips any kind of virtualization */
-static int vfio_raw_config_write(struct vfio_pci_device *vdev, int pos,
+static int vfio_raw_config_write(struct vfio_pci_core_device *vdev, int pos,
 				 int count, struct perm_bits *perm,
 				 int offset, __le32 val)
 {
@@ -282,7 +282,7 @@ static int vfio_raw_config_write(struct vfio_pci_device *vdev, int pos,
 	return count;
 }
 
-static int vfio_raw_config_read(struct vfio_pci_device *vdev, int pos,
+static int vfio_raw_config_read(struct vfio_pci_core_device *vdev, int pos,
 				int count, struct perm_bits *perm,
 				int offset, __le32 *val)
 {
@@ -296,7 +296,7 @@ static int vfio_raw_config_read(struct vfio_pci_device *vdev, int pos,
 }
 
 /* Virt access uses only virtualization */
-static int vfio_virt_config_write(struct vfio_pci_device *vdev, int pos,
+static int vfio_virt_config_write(struct vfio_pci_core_device *vdev, int pos,
 				  int count, struct perm_bits *perm,
 				  int offset, __le32 val)
 {
@@ -304,7 +304,7 @@ static int vfio_virt_config_write(struct vfio_pci_device *vdev, int pos,
 	return count;
 }
 
-static int vfio_virt_config_read(struct vfio_pci_device *vdev, int pos,
+static int vfio_virt_config_read(struct vfio_pci_core_device *vdev, int pos,
 				 int count, struct perm_bits *perm,
 				 int offset, __le32 *val)
 {
@@ -396,7 +396,7 @@ static inline void p_setd(struct perm_bits *p, int off, u32 virt, u32 write)
 }
 
 /* Caller should hold memory_lock semaphore */
-bool __vfio_pci_memory_enabled(struct vfio_pci_device *vdev)
+bool __vfio_pci_memory_enabled(struct vfio_pci_core_device *vdev)
 {
 	struct pci_dev *pdev = vdev->pdev;
 	u16 cmd = le16_to_cpu(*(__le16 *)&vdev->vconfig[PCI_COMMAND]);
@@ -413,7 +413,7 @@ bool __vfio_pci_memory_enabled(struct vfio_pci_device *vdev)
  * Restore the *real* BARs after we detect a FLR or backdoor reset.
  * (backdoor = some device specific technique that we didn't catch)
  */
-static void vfio_bar_restore(struct vfio_pci_device *vdev)
+static void vfio_bar_restore(struct vfio_pci_core_device *vdev)
 {
 	struct pci_dev *pdev = vdev->pdev;
 	u32 *rbar = vdev->rbar;
@@ -460,7 +460,7 @@ static __le32 vfio_generate_bar_flags(struct pci_dev *pdev, int bar)
  * Pretend we're hardware and tweak the values of the *virtual* PCI BARs
  * to reflect the hardware capabilities.  This implements BAR sizing.
  */
-static void vfio_bar_fixup(struct vfio_pci_device *vdev)
+static void vfio_bar_fixup(struct vfio_pci_core_device *vdev)
 {
 	struct pci_dev *pdev = vdev->pdev;
 	int i;
@@ -514,7 +514,7 @@ static void vfio_bar_fixup(struct vfio_pci_device *vdev)
 	vdev->bardirty = false;
 }
 
-static int vfio_basic_config_read(struct vfio_pci_device *vdev, int pos,
+static int vfio_basic_config_read(struct vfio_pci_core_device *vdev, int pos,
 				  int count, struct perm_bits *perm,
 				  int offset, __le32 *val)
 {
@@ -536,7 +536,7 @@ static int vfio_basic_config_read(struct vfio_pci_device *vdev, int pos,
 }
 
 /* Test whether BARs match the value we think they should contain */
-static bool vfio_need_bar_restore(struct vfio_pci_device *vdev)
+static bool vfio_need_bar_restore(struct vfio_pci_core_device *vdev)
 {
 	int i = 0, pos = PCI_BASE_ADDRESS_0, ret;
 	u32 bar;
@@ -552,7 +552,7 @@ static bool vfio_need_bar_restore(struct vfio_pci_device *vdev)
 	return false;
 }
 
-static int vfio_basic_config_write(struct vfio_pci_device *vdev, int pos,
+static int vfio_basic_config_write(struct vfio_pci_core_device *vdev, int pos,
 				   int count, struct perm_bits *perm,
 				   int offset, __le32 val)
 {
@@ -692,7 +692,7 @@ static int __init init_pci_cap_basic_perm(struct perm_bits *perm)
 	return 0;
 }
 
-static int vfio_pm_config_write(struct vfio_pci_device *vdev, int pos,
+static int vfio_pm_config_write(struct vfio_pci_core_device *vdev, int pos,
 				int count, struct perm_bits *perm,
 				int offset, __le32 val)
 {
@@ -747,7 +747,7 @@ static int __init init_pci_cap_pm_perm(struct perm_bits *perm)
 	return 0;
 }
 
-static int vfio_vpd_config_write(struct vfio_pci_device *vdev, int pos,
+static int vfio_vpd_config_write(struct vfio_pci_core_device *vdev, int pos,
 				 int count, struct perm_bits *perm,
 				 int offset, __le32 val)
 {
@@ -829,7 +829,7 @@ static int __init init_pci_cap_pcix_perm(struct perm_bits *perm)
 	return 0;
 }
 
-static int vfio_exp_config_write(struct vfio_pci_device *vdev, int pos,
+static int vfio_exp_config_write(struct vfio_pci_core_device *vdev, int pos,
 				 int count, struct perm_bits *perm,
 				 int offset, __le32 val)
 {
@@ -913,7 +913,7 @@ static int __init init_pci_cap_exp_perm(struct perm_bits *perm)
 	return 0;
 }
 
-static int vfio_af_config_write(struct vfio_pci_device *vdev, int pos,
+static int vfio_af_config_write(struct vfio_pci_core_device *vdev, int pos,
 				int count, struct perm_bits *perm,
 				int offset, __le32 val)
 {
@@ -1072,7 +1072,7 @@ int __init vfio_pci_init_perm_bits(void)
 	return ret;
 }
 
-static int vfio_find_cap_start(struct vfio_pci_device *vdev, int pos)
+static int vfio_find_cap_start(struct vfio_pci_core_device *vdev, int pos)
 {
 	u8 cap;
 	int base = (pos >= PCI_CFG_SPACE_SIZE) ? PCI_CFG_SPACE_SIZE :
@@ -1089,7 +1089,7 @@ static int vfio_find_cap_start(struct vfio_pci_device *vdev, int pos)
 	return pos;
 }
 
-static int vfio_msi_config_read(struct vfio_pci_device *vdev, int pos,
+static int vfio_msi_config_read(struct vfio_pci_core_device *vdev, int pos,
 				int count, struct perm_bits *perm,
 				int offset, __le32 *val)
 {
@@ -1109,7 +1109,7 @@ static int vfio_msi_config_read(struct vfio_pci_device *vdev, int pos,
 	return vfio_default_config_read(vdev, pos, count, perm, offset, val);
 }
 
-static int vfio_msi_config_write(struct vfio_pci_device *vdev, int pos,
+static int vfio_msi_config_write(struct vfio_pci_core_device *vdev, int pos,
 				 int count, struct perm_bits *perm,
 				 int offset, __le32 val)
 {
@@ -1189,7 +1189,7 @@ static int init_pci_cap_msi_perm(struct perm_bits *perm, int len, u16 flags)
 }
 
 /* Determine MSI CAP field length; initialize msi_perms on 1st call per vdev */
-static int vfio_msi_cap_len(struct vfio_pci_device *vdev, u8 pos)
+static int vfio_msi_cap_len(struct vfio_pci_core_device *vdev, u8 pos)
 {
 	struct pci_dev *pdev = vdev->pdev;
 	int len, ret;
@@ -1222,7 +1222,7 @@ static int vfio_msi_cap_len(struct vfio_pci_device *vdev, u8 pos)
 }
 
 /* Determine extended capability length for VC (2 & 9) and MFVC */
-static int vfio_vc_cap_len(struct vfio_pci_device *vdev, u16 pos)
+static int vfio_vc_cap_len(struct vfio_pci_core_device *vdev, u16 pos)
 {
 	struct pci_dev *pdev = vdev->pdev;
 	u32 tmp;
@@ -1263,7 +1263,7 @@ static int vfio_vc_cap_len(struct vfio_pci_device *vdev, u16 pos)
 	return len;
 }
 
-static int vfio_cap_len(struct vfio_pci_device *vdev, u8 cap, u8 pos)
+static int vfio_cap_len(struct vfio_pci_core_device *vdev, u8 cap, u8 pos)
 {
 	struct pci_dev *pdev = vdev->pdev;
 	u32 dword;
@@ -1338,7 +1338,7 @@ static int vfio_cap_len(struct vfio_pci_device *vdev, u8 cap, u8 pos)
 	return 0;
 }
 
-static int vfio_ext_cap_len(struct vfio_pci_device *vdev, u16 ecap, u16 epos)
+static int vfio_ext_cap_len(struct vfio_pci_core_device *vdev, u16 ecap, u16 epos)
 {
 	struct pci_dev *pdev = vdev->pdev;
 	u8 byte;
@@ -1412,7 +1412,7 @@ static int vfio_ext_cap_len(struct vfio_pci_device *vdev, u16 ecap, u16 epos)
 	return 0;
 }
 
-static int vfio_fill_vconfig_bytes(struct vfio_pci_device *vdev,
+static int vfio_fill_vconfig_bytes(struct vfio_pci_core_device *vdev,
 				   int offset, int size)
 {
 	struct pci_dev *pdev = vdev->pdev;
@@ -1459,7 +1459,7 @@ static int vfio_fill_vconfig_bytes(struct vfio_pci_device *vdev,
 	return ret;
 }
 
-static int vfio_cap_init(struct vfio_pci_device *vdev)
+static int vfio_cap_init(struct vfio_pci_core_device *vdev)
 {
 	struct pci_dev *pdev = vdev->pdev;
 	u8 *map = vdev->pci_config_map;
@@ -1549,7 +1549,7 @@ static int vfio_cap_init(struct vfio_pci_device *vdev)
 	return 0;
 }
 
-static int vfio_ecap_init(struct vfio_pci_device *vdev)
+static int vfio_ecap_init(struct vfio_pci_core_device *vdev)
 {
 	struct pci_dev *pdev = vdev->pdev;
 	u8 *map = vdev->pci_config_map;
@@ -1669,7 +1669,7 @@ static const struct pci_device_id known_bogus_vf_intx_pin[] = {
  * for each area requiring emulated bits, but the array of pointers
  * would be comparable in size (at least for standard config space).
  */
-int vfio_config_init(struct vfio_pci_device *vdev)
+int vfio_config_init(struct vfio_pci_core_device *vdev)
 {
 	struct pci_dev *pdev = vdev->pdev;
 	u8 *map, *vconfig;
@@ -1773,7 +1773,7 @@ out:
 	return pcibios_err_to_errno(ret);
 }
 
-void vfio_config_free(struct vfio_pci_device *vdev)
+void vfio_config_free(struct vfio_pci_core_device *vdev)
 {
 	kfree(vdev->vconfig);
 	vdev->vconfig = NULL;
@@ -1790,7 +1790,7 @@ void vfio_config_free(struct vfio_pci_device *vdev)
  * Find the remaining number of bytes in a dword that match the given
  * position.  Stop at either the end of the capability or the dword boundary.
  */
-static size_t vfio_pci_cap_remaining_dword(struct vfio_pci_device *vdev,
+static size_t vfio_pci_cap_remaining_dword(struct vfio_pci_core_device *vdev,
 					   loff_t pos)
 {
 	u8 cap = vdev->pci_config_map[pos];
@@ -1802,7 +1802,7 @@ static size_t vfio_pci_cap_remaining_dword(struct vfio_pci_device *vdev,
 	return i;
 }
 
-static ssize_t vfio_config_do_rw(struct vfio_pci_device *vdev, char __user *buf,
+static ssize_t vfio_config_do_rw(struct vfio_pci_core_device *vdev, char __user *buf,
 				 size_t count, loff_t *ppos, bool iswrite)
 {
 	struct pci_dev *pdev = vdev->pdev;
@@ -1885,7 +1885,7 @@ static ssize_t vfio_config_do_rw(struct vfio_pci_device *vdev, char __user *buf,
 	return ret;
 }
 
-ssize_t vfio_pci_config_rw(struct vfio_pci_device *vdev, char __user *buf,
+ssize_t vfio_pci_config_rw(struct vfio_pci_core_device *vdev, char __user *buf,
 			   size_t count, loff_t *ppos, bool iswrite)
 {
 	size_t done = 0;
diff --git a/drivers/vfio/pci/vfio_pci_core.c b/drivers/vfio/pci/vfio_pci_core.c
new file mode 100644
index 000000000000..68198e0f2a63
--- /dev/null
+++ b/drivers/vfio/pci/vfio_pci_core.c
@@ -0,0 +1,2157 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * Copyright (C) 2012 Red Hat, Inc.  All rights reserved.
+ *     Author: Alex Williamson <alex.williamson@redhat.com>
+ *
+ * Derived from original vfio:
+ * Copyright 2010 Cisco Systems, Inc.  All rights reserved.
+ * Author: Tom Lyon, pugs@cisco.com
+ */
+
+#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
+
+#include <linux/device.h>
+#include <linux/eventfd.h>
+#include <linux/file.h>
+#include <linux/interrupt.h>
+#include <linux/iommu.h>
+#include <linux/module.h>
+#include <linux/mutex.h>
+#include <linux/notifier.h>
+#include <linux/pci.h>
+#include <linux/pm_runtime.h>
+#include <linux/slab.h>
+#include <linux/types.h>
+#include <linux/uaccess.h>
+#include <linux/vgaarb.h>
+#include <linux/nospec.h>
+#include <linux/sched/mm.h>
+
+#include <linux/vfio_pci_core.h>
+
+#define DRIVER_AUTHOR   "Alex Williamson <alex.williamson@redhat.com>"
+#define DRIVER_DESC "core driver for VFIO based PCI devices"
+
+static bool nointxmask;
+static bool disable_vga;
+static bool disable_idle_d3;
+
+static inline bool vfio_vga_disabled(void)
+{
+#ifdef CONFIG_VFIO_PCI_VGA
+	return disable_vga;
+#else
+	return true;
+#endif
+}
+
+/*
+ * Our VGA arbiter participation is limited since we don't know anything
+ * about the device itself.  However, if the device is the only VGA device
+ * downstream of a bridge and VFIO VGA support is disabled, then we can
+ * safely return legacy VGA IO and memory as not decoded since the user
+ * has no way to get to it and routing can be disabled externally at the
+ * bridge.
+ */
+static unsigned int vfio_pci_set_decode(struct pci_dev *pdev, bool single_vga)
+{
+	struct pci_dev *tmp = NULL;
+	unsigned char max_busnr;
+	unsigned int decodes;
+
+	if (single_vga || !vfio_vga_disabled() || pci_is_root_bus(pdev->bus))
+		return VGA_RSRC_NORMAL_IO | VGA_RSRC_NORMAL_MEM |
+		       VGA_RSRC_LEGACY_IO | VGA_RSRC_LEGACY_MEM;
+
+	max_busnr = pci_bus_max_busnr(pdev->bus);
+	decodes = VGA_RSRC_NORMAL_IO | VGA_RSRC_NORMAL_MEM;
+
+	while ((tmp = pci_get_class(PCI_CLASS_DISPLAY_VGA << 8, tmp)) != NULL) {
+		if (tmp == pdev ||
+		    pci_domain_nr(tmp->bus) != pci_domain_nr(pdev->bus) ||
+		    pci_is_root_bus(tmp->bus))
+			continue;
+
+		if (tmp->bus->number >= pdev->bus->number &&
+		    tmp->bus->number <= max_busnr) {
+			pci_dev_put(tmp);
+			decodes |= VGA_RSRC_LEGACY_IO | VGA_RSRC_LEGACY_MEM;
+			break;
+		}
+	}
+
+	return decodes;
+}
+
+static void vfio_pci_probe_mmaps(struct vfio_pci_core_device *vdev)
+{
+	struct resource *res;
+	int i;
+	struct vfio_pci_dummy_resource *dummy_res;
+
+	for (i = 0; i < PCI_STD_NUM_BARS; i++) {
+		int bar = i + PCI_STD_RESOURCES;
+
+		res = &vdev->pdev->resource[bar];
+
+		if (!IS_ENABLED(CONFIG_VFIO_PCI_MMAP))
+			goto no_mmap;
+
+		if (!(res->flags & IORESOURCE_MEM))
+			goto no_mmap;
+
+		/*
+		 * The PCI core shouldn't set up a resource with a
+		 * type but zero size. But there may be bugs that
+		 * cause us to do that.
+		 */
+		if (!resource_size(res))
+			goto no_mmap;
+
+		if (resource_size(res) >= PAGE_SIZE) {
+			vdev->bar_mmap_supported[bar] = true;
+			continue;
+		}
+
+		if (!(res->start & ~PAGE_MASK)) {
+			/*
+			 * Add a dummy resource to reserve the remainder
+			 * of the exclusive page in case that hot-add
+			 * device's bar is assigned into it.
+			 */
+			dummy_res = kzalloc(sizeof(*dummy_res), GFP_KERNEL);
+			if (dummy_res == NULL)
+				goto no_mmap;
+
+			dummy_res->resource.name = "vfio sub-page reserved";
+			dummy_res->resource.start = res->end + 1;
+			dummy_res->resource.end = res->start + PAGE_SIZE - 1;
+			dummy_res->resource.flags = res->flags;
+			if (request_resource(res->parent,
+						&dummy_res->resource)) {
+				kfree(dummy_res);
+				goto no_mmap;
+			}
+			dummy_res->index = bar;
+			list_add(&dummy_res->res_next,
+					&vdev->dummy_resources_list);
+			vdev->bar_mmap_supported[bar] = true;
+			continue;
+		}
+		/*
+		 * Here we don't handle the case when the BAR is not page
+		 * aligned because we can't expect the BAR will be
+		 * assigned into the same location in a page in guest
+		 * when we passthrough the BAR. And it's hard to access
+		 * this BAR in userspace because we have no way to get
+		 * the BAR's location in a page.
+		 */
+no_mmap:
+		vdev->bar_mmap_supported[bar] = false;
+	}
+}
+
+struct vfio_pci_group_info;
+static bool vfio_pci_dev_set_try_reset(struct vfio_device_set *dev_set);
+static int vfio_pci_dev_set_hot_reset(struct vfio_device_set *dev_set,
+				      struct vfio_pci_group_info *groups);
+
+/*
+ * INTx masking requires the ability to disable INTx signaling via PCI_COMMAND
+ * _and_ the ability detect when the device is asserting INTx via PCI_STATUS.
+ * If a device implements the former but not the latter we would typically
+ * expect broken_intx_masking be set and require an exclusive interrupt.
+ * However since we do have control of the device's ability to assert INTx,
+ * we can instead pretend that the device does not implement INTx, virtualizing
+ * the pin register to report zero and maintaining DisINTx set on the host.
+ */
+static bool vfio_pci_nointx(struct pci_dev *pdev)
+{
+	switch (pdev->vendor) {
+	case PCI_VENDOR_ID_INTEL:
+		switch (pdev->device) {
+		/* All i40e (XL710/X710/XXV710) 10/20/25/40GbE NICs */
+		case 0x1572:
+		case 0x1574:
+		case 0x1580 ... 0x1581:
+		case 0x1583 ... 0x158b:
+		case 0x37d0 ... 0x37d2:
+		/* X550 */
+		case 0x1563:
+			return true;
+		default:
+			return false;
+		}
+	}
+
+	return false;
+}
+
+static void vfio_pci_probe_power_state(struct vfio_pci_core_device *vdev)
+{
+	struct pci_dev *pdev = vdev->pdev;
+	u16 pmcsr;
+
+	if (!pdev->pm_cap)
+		return;
+
+	pci_read_config_word(pdev, pdev->pm_cap + PCI_PM_CTRL, &pmcsr);
+
+	vdev->needs_pm_restore = !(pmcsr & PCI_PM_CTRL_NO_SOFT_RESET);
+}
+
+/*
+ * pci_set_power_state() wrapper handling devices which perform a soft reset on
+ * D3->D0 transition.  Save state prior to D0/1/2->D3, stash it on the vdev,
+ * restore when returned to D0.  Saved separately from pci_saved_state for use
+ * by PM capability emulation and separately from pci_dev internal saved state
+ * to avoid it being overwritten and consumed around other resets.
+ */
+int vfio_pci_set_power_state(struct vfio_pci_core_device *vdev, pci_power_t state)
+{
+	struct pci_dev *pdev = vdev->pdev;
+	bool needs_restore = false, needs_save = false;
+	int ret;
+
+	if (vdev->needs_pm_restore) {
+		if (pdev->current_state < PCI_D3hot && state >= PCI_D3hot) {
+			pci_save_state(pdev);
+			needs_save = true;
+		}
+
+		if (pdev->current_state >= PCI_D3hot && state <= PCI_D0)
+			needs_restore = true;
+	}
+
+	ret = pci_set_power_state(pdev, state);
+
+	if (!ret) {
+		/* D3 might be unsupported via quirk, skip unless in D3 */
+		if (needs_save && pdev->current_state >= PCI_D3hot) {
+			vdev->pm_save = pci_store_saved_state(pdev);
+		} else if (needs_restore) {
+			pci_load_and_free_saved_state(pdev, &vdev->pm_save);
+			pci_restore_state(pdev);
+		}
+	}
+
+	return ret;
+}
+
+int vfio_pci_core_enable(struct vfio_pci_core_device *vdev)
+{
+	struct pci_dev *pdev = vdev->pdev;
+	int ret;
+	u16 cmd;
+	u8 msix_pos;
+
+	vfio_pci_set_power_state(vdev, PCI_D0);
+
+	/* Don't allow our initial saved state to include busmaster */
+	pci_clear_master(pdev);
+
+	ret = pci_enable_device(pdev);
+	if (ret)
+		return ret;
+
+	/* If reset fails because of the device lock, fail this path entirely */
+	ret = pci_try_reset_function(pdev);
+	if (ret == -EAGAIN) {
+		pci_disable_device(pdev);
+		return ret;
+	}
+
+	vdev->reset_works = !ret;
+	pci_save_state(pdev);
+	vdev->pci_saved_state = pci_store_saved_state(pdev);
+	if (!vdev->pci_saved_state)
+		pci_dbg(pdev, "%s: Couldn't store saved state\n", __func__);
+
+	if (likely(!nointxmask)) {
+		if (vfio_pci_nointx(pdev)) {
+			pci_info(pdev, "Masking broken INTx support\n");
+			vdev->nointx = true;
+			pci_intx(pdev, 0);
+		} else
+			vdev->pci_2_3 = pci_intx_mask_supported(pdev);
+	}
+
+	pci_read_config_word(pdev, PCI_COMMAND, &cmd);
+	if (vdev->pci_2_3 && (cmd & PCI_COMMAND_INTX_DISABLE)) {
+		cmd &= ~PCI_COMMAND_INTX_DISABLE;
+		pci_write_config_word(pdev, PCI_COMMAND, cmd);
+	}
+
+	ret = vfio_config_init(vdev);
+	if (ret) {
+		kfree(vdev->pci_saved_state);
+		vdev->pci_saved_state = NULL;
+		pci_disable_device(pdev);
+		return ret;
+	}
+
+	msix_pos = pdev->msix_cap;
+	if (msix_pos) {
+		u16 flags;
+		u32 table;
+
+		pci_read_config_word(pdev, msix_pos + PCI_MSIX_FLAGS, &flags);
+		pci_read_config_dword(pdev, msix_pos + PCI_MSIX_TABLE, &table);
+
+		vdev->msix_bar = table & PCI_MSIX_TABLE_BIR;
+		vdev->msix_offset = table & PCI_MSIX_TABLE_OFFSET;
+		vdev->msix_size = ((flags & PCI_MSIX_FLAGS_QSIZE) + 1) * 16;
+	} else
+		vdev->msix_bar = 0xFF;
+
+	if (!vfio_vga_disabled() && vfio_pci_is_vga(pdev))
+		vdev->has_vga = true;
+
+
+	return 0;
+}
+EXPORT_SYMBOL_GPL(vfio_pci_core_enable);
+
+void vfio_pci_core_disable(struct vfio_pci_core_device *vdev)
+{
+	struct pci_dev *pdev = vdev->pdev;
+	struct vfio_pci_dummy_resource *dummy_res, *tmp;
+	struct vfio_pci_ioeventfd *ioeventfd, *ioeventfd_tmp;
+	int i, bar;
+
+	/* For needs_reset */
+	lockdep_assert_held(&vdev->vdev.dev_set->lock);
+
+	/* Stop the device from further DMA */
+	pci_clear_master(pdev);
+
+	vfio_pci_set_irqs_ioctl(vdev, VFIO_IRQ_SET_DATA_NONE |
+				VFIO_IRQ_SET_ACTION_TRIGGER,
+				vdev->irq_type, 0, 0, NULL);
+
+	/* Device closed, don't need mutex here */
+	list_for_each_entry_safe(ioeventfd, ioeventfd_tmp,
+				 &vdev->ioeventfds_list, next) {
+		vfio_virqfd_disable(&ioeventfd->virqfd);
+		list_del(&ioeventfd->next);
+		kfree(ioeventfd);
+	}
+	vdev->ioeventfds_nr = 0;
+
+	vdev->virq_disabled = false;
+
+	for (i = 0; i < vdev->num_regions; i++)
+		vdev->region[i].ops->release(vdev, &vdev->region[i]);
+
+	vdev->num_regions = 0;
+	kfree(vdev->region);
+	vdev->region = NULL; /* don't krealloc a freed pointer */
+
+	vfio_config_free(vdev);
+
+	for (i = 0; i < PCI_STD_NUM_BARS; i++) {
+		bar = i + PCI_STD_RESOURCES;
+		if (!vdev->barmap[bar])
+			continue;
+		pci_iounmap(pdev, vdev->barmap[bar]);
+		pci_release_selected_regions(pdev, 1 << bar);
+		vdev->barmap[bar] = NULL;
+	}
+
+	list_for_each_entry_safe(dummy_res, tmp,
+				 &vdev->dummy_resources_list, res_next) {
+		list_del(&dummy_res->res_next);
+		release_resource(&dummy_res->resource);
+		kfree(dummy_res);
+	}
+
+	vdev->needs_reset = true;
+
+	/*
+	 * If we have saved state, restore it.  If we can reset the device,
+	 * even better.  Resetting with current state seems better than
+	 * nothing, but saving and restoring current state without reset
+	 * is just busy work.
+	 */
+	if (pci_load_and_free_saved_state(pdev, &vdev->pci_saved_state)) {
+		pci_info(pdev, "%s: Couldn't reload saved state\n", __func__);
+
+		if (!vdev->reset_works)
+			goto out;
+
+		pci_save_state(pdev);
+	}
+
+	/*
+	 * Disable INTx and MSI, presumably to avoid spurious interrupts
+	 * during reset.  Stolen from pci_reset_function()
+	 */
+	pci_write_config_word(pdev, PCI_COMMAND, PCI_COMMAND_INTX_DISABLE);
+
+	/*
+	 * Try to get the locks ourselves to prevent a deadlock. The
+	 * success of this is dependent on being able to lock the device,
+	 * which is not always possible.
+	 * We can not use the "try" reset interface here, which will
+	 * overwrite the previously restored configuration information.
+	 */
+	if (vdev->reset_works && pci_dev_trylock(pdev)) {
+		if (!__pci_reset_function_locked(pdev))
+			vdev->needs_reset = false;
+		pci_dev_unlock(pdev);
+	}
+
+	pci_restore_state(pdev);
+out:
+	pci_disable_device(pdev);
+
+	if (!vfio_pci_dev_set_try_reset(vdev->vdev.dev_set) && !disable_idle_d3)
+		vfio_pci_set_power_state(vdev, PCI_D3hot);
+}
+EXPORT_SYMBOL_GPL(vfio_pci_core_disable);
+
+static struct vfio_pci_core_device *get_pf_vdev(struct vfio_pci_core_device *vdev)
+{
+	struct pci_dev *physfn = pci_physfn(vdev->pdev);
+	struct vfio_device *pf_dev;
+
+	if (!vdev->pdev->is_virtfn)
+		return NULL;
+
+	pf_dev = vfio_device_get_from_dev(&physfn->dev);
+	if (!pf_dev)
+		return NULL;
+
+	if (pci_dev_driver(physfn) != pci_dev_driver(vdev->pdev)) {
+		vfio_device_put(pf_dev);
+		return NULL;
+	}
+
+	return container_of(pf_dev, struct vfio_pci_core_device, vdev);
+}
+
+static void vfio_pci_vf_token_user_add(struct vfio_pci_core_device *vdev, int val)
+{
+	struct vfio_pci_core_device *pf_vdev = get_pf_vdev(vdev);
+
+	if (!pf_vdev)
+		return;
+
+	mutex_lock(&pf_vdev->vf_token->lock);
+	pf_vdev->vf_token->users += val;
+	WARN_ON(pf_vdev->vf_token->users < 0);
+	mutex_unlock(&pf_vdev->vf_token->lock);
+
+	vfio_device_put(&pf_vdev->vdev);
+}
+
+void vfio_pci_core_close_device(struct vfio_device *core_vdev)
+{
+	struct vfio_pci_core_device *vdev =
+		container_of(core_vdev, struct vfio_pci_core_device, vdev);
+
+	vfio_pci_vf_token_user_add(vdev, -1);
+	vfio_spapr_pci_eeh_release(vdev->pdev);
+	vfio_pci_core_disable(vdev);
+
+	mutex_lock(&vdev->igate);
+	if (vdev->err_trigger) {
+		eventfd_ctx_put(vdev->err_trigger);
+		vdev->err_trigger = NULL;
+	}
+	if (vdev->req_trigger) {
+		eventfd_ctx_put(vdev->req_trigger);
+		vdev->req_trigger = NULL;
+	}
+	mutex_unlock(&vdev->igate);
+}
+EXPORT_SYMBOL_GPL(vfio_pci_core_close_device);
+
+void vfio_pci_core_finish_enable(struct vfio_pci_core_device *vdev)
+{
+	vfio_pci_probe_mmaps(vdev);
+	vfio_spapr_pci_eeh_open(vdev->pdev);
+	vfio_pci_vf_token_user_add(vdev, 1);
+}
+EXPORT_SYMBOL_GPL(vfio_pci_core_finish_enable);
+
+static int vfio_pci_get_irq_count(struct vfio_pci_core_device *vdev, int irq_type)
+{
+	if (irq_type == VFIO_PCI_INTX_IRQ_INDEX) {
+		u8 pin;
+
+		if (!IS_ENABLED(CONFIG_VFIO_PCI_INTX) ||
+		    vdev->nointx || vdev->pdev->is_virtfn)
+			return 0;
+
+		pci_read_config_byte(vdev->pdev, PCI_INTERRUPT_PIN, &pin);
+
+		return pin ? 1 : 0;
+	} else if (irq_type == VFIO_PCI_MSI_IRQ_INDEX) {
+		u8 pos;
+		u16 flags;
+
+		pos = vdev->pdev->msi_cap;
+		if (pos) {
+			pci_read_config_word(vdev->pdev,
+					     pos + PCI_MSI_FLAGS, &flags);
+			return 1 << ((flags & PCI_MSI_FLAGS_QMASK) >> 1);
+		}
+	} else if (irq_type == VFIO_PCI_MSIX_IRQ_INDEX) {
+		u8 pos;
+		u16 flags;
+
+		pos = vdev->pdev->msix_cap;
+		if (pos) {
+			pci_read_config_word(vdev->pdev,
+					     pos + PCI_MSIX_FLAGS, &flags);
+
+			return (flags & PCI_MSIX_FLAGS_QSIZE) + 1;
+		}
+	} else if (irq_type == VFIO_PCI_ERR_IRQ_INDEX) {
+		if (pci_is_pcie(vdev->pdev))
+			return 1;
+	} else if (irq_type == VFIO_PCI_REQ_IRQ_INDEX) {
+		return 1;
+	}
+
+	return 0;
+}
+
+static int vfio_pci_count_devs(struct pci_dev *pdev, void *data)
+{
+	(*(int *)data)++;
+	return 0;
+}
+
+struct vfio_pci_fill_info {
+	int max;
+	int cur;
+	struct vfio_pci_dependent_device *devices;
+};
+
+static int vfio_pci_fill_devs(struct pci_dev *pdev, void *data)
+{
+	struct vfio_pci_fill_info *fill = data;
+	struct iommu_group *iommu_group;
+
+	if (fill->cur == fill->max)
+		return -EAGAIN; /* Something changed, try again */
+
+	iommu_group = iommu_group_get(&pdev->dev);
+	if (!iommu_group)
+		return -EPERM; /* Cannot reset non-isolated devices */
+
+	fill->devices[fill->cur].group_id = iommu_group_id(iommu_group);
+	fill->devices[fill->cur].segment = pci_domain_nr(pdev->bus);
+	fill->devices[fill->cur].bus = pdev->bus->number;
+	fill->devices[fill->cur].devfn = pdev->devfn;
+	fill->cur++;
+	iommu_group_put(iommu_group);
+	return 0;
+}
+
+struct vfio_pci_group_info {
+	int count;
+	struct vfio_group **groups;
+};
+
+static bool vfio_pci_dev_below_slot(struct pci_dev *pdev, struct pci_slot *slot)
+{
+	for (; pdev; pdev = pdev->bus->self)
+		if (pdev->bus == slot->bus)
+			return (pdev->slot == slot);
+	return false;
+}
+
+struct vfio_pci_walk_info {
+	int (*fn)(struct pci_dev *, void *data);
+	void *data;
+	struct pci_dev *pdev;
+	bool slot;
+	int ret;
+};
+
+static int vfio_pci_walk_wrapper(struct pci_dev *pdev, void *data)
+{
+	struct vfio_pci_walk_info *walk = data;
+
+	if (!walk->slot || vfio_pci_dev_below_slot(pdev, walk->pdev->slot))
+		walk->ret = walk->fn(pdev, walk->data);
+
+	return walk->ret;
+}
+
+static int vfio_pci_for_each_slot_or_bus(struct pci_dev *pdev,
+					 int (*fn)(struct pci_dev *,
+						   void *data), void *data,
+					 bool slot)
+{
+	struct vfio_pci_walk_info walk = {
+		.fn = fn, .data = data, .pdev = pdev, .slot = slot, .ret = 0,
+	};
+
+	pci_walk_bus(pdev->bus, vfio_pci_walk_wrapper, &walk);
+
+	return walk.ret;
+}
+
+static int msix_mmappable_cap(struct vfio_pci_core_device *vdev,
+			      struct vfio_info_cap *caps)
+{
+	struct vfio_info_cap_header header = {
+		.id = VFIO_REGION_INFO_CAP_MSIX_MAPPABLE,
+		.version = 1
+	};
+
+	return vfio_info_add_capability(caps, &header, sizeof(header));
+}
+
+int vfio_pci_register_dev_region(struct vfio_pci_core_device *vdev,
+				 unsigned int type, unsigned int subtype,
+				 const struct vfio_pci_regops *ops,
+				 size_t size, u32 flags, void *data)
+{
+	struct vfio_pci_region *region;
+
+	region = krealloc(vdev->region,
+			  (vdev->num_regions + 1) * sizeof(*region),
+			  GFP_KERNEL);
+	if (!region)
+		return -ENOMEM;
+
+	vdev->region = region;
+	vdev->region[vdev->num_regions].type = type;
+	vdev->region[vdev->num_regions].subtype = subtype;
+	vdev->region[vdev->num_regions].ops = ops;
+	vdev->region[vdev->num_regions].size = size;
+	vdev->region[vdev->num_regions].flags = flags;
+	vdev->region[vdev->num_regions].data = data;
+
+	vdev->num_regions++;
+
+	return 0;
+}
+EXPORT_SYMBOL_GPL(vfio_pci_register_dev_region);
+
+long vfio_pci_core_ioctl(struct vfio_device *core_vdev, unsigned int cmd,
+		unsigned long arg)
+{
+	struct vfio_pci_core_device *vdev =
+		container_of(core_vdev, struct vfio_pci_core_device, vdev);
+	unsigned long minsz;
+
+	if (cmd == VFIO_DEVICE_GET_INFO) {
+		struct vfio_device_info info;
+		struct vfio_info_cap caps = { .buf = NULL, .size = 0 };
+		unsigned long capsz;
+		int ret;
+
+		minsz = offsetofend(struct vfio_device_info, num_irqs);
+
+		/* For backward compatibility, cannot require this */
+		capsz = offsetofend(struct vfio_iommu_type1_info, cap_offset);
+
+		if (copy_from_user(&info, (void __user *)arg, minsz))
+			return -EFAULT;
+
+		if (info.argsz < minsz)
+			return -EINVAL;
+
+		if (info.argsz >= capsz) {
+			minsz = capsz;
+			info.cap_offset = 0;
+		}
+
+		info.flags = VFIO_DEVICE_FLAGS_PCI;
+
+		if (vdev->reset_works)
+			info.flags |= VFIO_DEVICE_FLAGS_RESET;
+
+		info.num_regions = VFIO_PCI_NUM_REGIONS + vdev->num_regions;
+		info.num_irqs = VFIO_PCI_NUM_IRQS;
+
+		ret = vfio_pci_info_zdev_add_caps(vdev, &caps);
+		if (ret && ret != -ENODEV) {
+			pci_warn(vdev->pdev, "Failed to setup zPCI info capabilities\n");
+			return ret;
+		}
+
+		if (caps.size) {
+			info.flags |= VFIO_DEVICE_FLAGS_CAPS;
+			if (info.argsz < sizeof(info) + caps.size) {
+				info.argsz = sizeof(info) + caps.size;
+			} else {
+				vfio_info_cap_shift(&caps, sizeof(info));
+				if (copy_to_user((void __user *)arg +
+						  sizeof(info), caps.buf,
+						  caps.size)) {
+					kfree(caps.buf);
+					return -EFAULT;
+				}
+				info.cap_offset = sizeof(info);
+			}
+
+			kfree(caps.buf);
+		}
+
+		return copy_to_user((void __user *)arg, &info, minsz) ?
+			-EFAULT : 0;
+
+	} else if (cmd == VFIO_DEVICE_GET_REGION_INFO) {
+		struct pci_dev *pdev = vdev->pdev;
+		struct vfio_region_info info;
+		struct vfio_info_cap caps = { .buf = NULL, .size = 0 };
+		int i, ret;
+
+		minsz = offsetofend(struct vfio_region_info, offset);
+
+		if (copy_from_user(&info, (void __user *)arg, minsz))
+			return -EFAULT;
+
+		if (info.argsz < minsz)
+			return -EINVAL;
+
+		switch (info.index) {
+		case VFIO_PCI_CONFIG_REGION_INDEX:
+			info.offset = VFIO_PCI_INDEX_TO_OFFSET(info.index);
+			info.size = pdev->cfg_size;
+			info.flags = VFIO_REGION_INFO_FLAG_READ |
+				     VFIO_REGION_INFO_FLAG_WRITE;
+			break;
+		case VFIO_PCI_BAR0_REGION_INDEX ... VFIO_PCI_BAR5_REGION_INDEX:
+			info.offset = VFIO_PCI_INDEX_TO_OFFSET(info.index);
+			info.size = pci_resource_len(pdev, info.index);
+			if (!info.size) {
+				info.flags = 0;
+				break;
+			}
+
+			info.flags = VFIO_REGION_INFO_FLAG_READ |
+				     VFIO_REGION_INFO_FLAG_WRITE;
+			if (vdev->bar_mmap_supported[info.index]) {
+				info.flags |= VFIO_REGION_INFO_FLAG_MMAP;
+				if (info.index == vdev->msix_bar) {
+					ret = msix_mmappable_cap(vdev, &caps);
+					if (ret)
+						return ret;
+				}
+			}
+
+			break;
+		case VFIO_PCI_ROM_REGION_INDEX:
+		{
+			void __iomem *io;
+			size_t size;
+			u16 cmd;
+
+			info.offset = VFIO_PCI_INDEX_TO_OFFSET(info.index);
+			info.flags = 0;
+
+			/* Report the BAR size, not the ROM size */
+			info.size = pci_resource_len(pdev, info.index);
+			if (!info.size) {
+				/* Shadow ROMs appear as PCI option ROMs */
+				if (pdev->resource[PCI_ROM_RESOURCE].flags &
+							IORESOURCE_ROM_SHADOW)
+					info.size = 0x20000;
+				else
+					break;
+			}
+
+			/*
+			 * Is it really there?  Enable memory decode for
+			 * implicit access in pci_map_rom().
+			 */
+			cmd = vfio_pci_memory_lock_and_enable(vdev);
+			io = pci_map_rom(pdev, &size);
+			if (io) {
+				info.flags = VFIO_REGION_INFO_FLAG_READ;
+				pci_unmap_rom(pdev, io);
+			} else {
+				info.size = 0;
+			}
+			vfio_pci_memory_unlock_and_restore(vdev, cmd);
+
+			break;
+		}
+		case VFIO_PCI_VGA_REGION_INDEX:
+			if (!vdev->has_vga)
+				return -EINVAL;
+
+			info.offset = VFIO_PCI_INDEX_TO_OFFSET(info.index);
+			info.size = 0xc0000;
+			info.flags = VFIO_REGION_INFO_FLAG_READ |
+				     VFIO_REGION_INFO_FLAG_WRITE;
+
+			break;
+		default:
+		{
+			struct vfio_region_info_cap_type cap_type = {
+					.header.id = VFIO_REGION_INFO_CAP_TYPE,
+					.header.version = 1 };
+
+			if (info.index >=
+			    VFIO_PCI_NUM_REGIONS + vdev->num_regions)
+				return -EINVAL;
+			info.index = array_index_nospec(info.index,
+							VFIO_PCI_NUM_REGIONS +
+							vdev->num_regions);
+
+			i = info.index - VFIO_PCI_NUM_REGIONS;
+
+			info.offset = VFIO_PCI_INDEX_TO_OFFSET(info.index);
+			info.size = vdev->region[i].size;
+			info.flags = vdev->region[i].flags;
+
+			cap_type.type = vdev->region[i].type;
+			cap_type.subtype = vdev->region[i].subtype;
+
+			ret = vfio_info_add_capability(&caps, &cap_type.header,
+						       sizeof(cap_type));
+			if (ret)
+				return ret;
+
+			if (vdev->region[i].ops->add_capability) {
+				ret = vdev->region[i].ops->add_capability(vdev,
+						&vdev->region[i], &caps);
+				if (ret)
+					return ret;
+			}
+		}
+		}
+
+		if (caps.size) {
+			info.flags |= VFIO_REGION_INFO_FLAG_CAPS;
+			if (info.argsz < sizeof(info) + caps.size) {
+				info.argsz = sizeof(info) + caps.size;
+				info.cap_offset = 0;
+			} else {
+				vfio_info_cap_shift(&caps, sizeof(info));
+				if (copy_to_user((void __user *)arg +
+						  sizeof(info), caps.buf,
+						  caps.size)) {
+					kfree(caps.buf);
+					return -EFAULT;
+				}
+				info.cap_offset = sizeof(info);
+			}
+
+			kfree(caps.buf);
+		}
+
+		return copy_to_user((void __user *)arg, &info, minsz) ?
+			-EFAULT : 0;
+
+	} else if (cmd == VFIO_DEVICE_GET_IRQ_INFO) {
+		struct vfio_irq_info info;
+
+		minsz = offsetofend(struct vfio_irq_info, count);
+
+		if (copy_from_user(&info, (void __user *)arg, minsz))
+			return -EFAULT;
+
+		if (info.argsz < minsz || info.index >= VFIO_PCI_NUM_IRQS)
+			return -EINVAL;
+
+		switch (info.index) {
+		case VFIO_PCI_INTX_IRQ_INDEX ... VFIO_PCI_MSIX_IRQ_INDEX:
+		case VFIO_PCI_REQ_IRQ_INDEX:
+			break;
+		case VFIO_PCI_ERR_IRQ_INDEX:
+			if (pci_is_pcie(vdev->pdev))
+				break;
+			fallthrough;
+		default:
+			return -EINVAL;
+		}
+
+		info.flags = VFIO_IRQ_INFO_EVENTFD;
+
+		info.count = vfio_pci_get_irq_count(vdev, info.index);
+
+		if (info.index == VFIO_PCI_INTX_IRQ_INDEX)
+			info.flags |= (VFIO_IRQ_INFO_MASKABLE |
+				       VFIO_IRQ_INFO_AUTOMASKED);
+		else
+			info.flags |= VFIO_IRQ_INFO_NORESIZE;
+
+		return copy_to_user((void __user *)arg, &info, minsz) ?
+			-EFAULT : 0;
+
+	} else if (cmd == VFIO_DEVICE_SET_IRQS) {
+		struct vfio_irq_set hdr;
+		u8 *data = NULL;
+		int max, ret = 0;
+		size_t data_size = 0;
+
+		minsz = offsetofend(struct vfio_irq_set, count);
+
+		if (copy_from_user(&hdr, (void __user *)arg, minsz))
+			return -EFAULT;
+
+		max = vfio_pci_get_irq_count(vdev, hdr.index);
+
+		ret = vfio_set_irqs_validate_and_prepare(&hdr, max,
+						 VFIO_PCI_NUM_IRQS, &data_size);
+		if (ret)
+			return ret;
+
+		if (data_size) {
+			data = memdup_user((void __user *)(arg + minsz),
+					    data_size);
+			if (IS_ERR(data))
+				return PTR_ERR(data);
+		}
+
+		mutex_lock(&vdev->igate);
+
+		ret = vfio_pci_set_irqs_ioctl(vdev, hdr.flags, hdr.index,
+					      hdr.start, hdr.count, data);
+
+		mutex_unlock(&vdev->igate);
+		kfree(data);
+
+		return ret;
+
+	} else if (cmd == VFIO_DEVICE_RESET) {
+		int ret;
+
+		if (!vdev->reset_works)
+			return -EINVAL;
+
+		vfio_pci_zap_and_down_write_memory_lock(vdev);
+		ret = pci_try_reset_function(vdev->pdev);
+		up_write(&vdev->memory_lock);
+
+		return ret;
+
+	} else if (cmd == VFIO_DEVICE_GET_PCI_HOT_RESET_INFO) {
+		struct vfio_pci_hot_reset_info hdr;
+		struct vfio_pci_fill_info fill = { 0 };
+		struct vfio_pci_dependent_device *devices = NULL;
+		bool slot = false;
+		int ret = 0;
+
+		minsz = offsetofend(struct vfio_pci_hot_reset_info, count);
+
+		if (copy_from_user(&hdr, (void __user *)arg, minsz))
+			return -EFAULT;
+
+		if (hdr.argsz < minsz)
+			return -EINVAL;
+
+		hdr.flags = 0;
+
+		/* Can we do a slot or bus reset or neither? */
+		if (!pci_probe_reset_slot(vdev->pdev->slot))
+			slot = true;
+		else if (pci_probe_reset_bus(vdev->pdev->bus))
+			return -ENODEV;
+
+		/* How many devices are affected? */
+		ret = vfio_pci_for_each_slot_or_bus(vdev->pdev,
+						    vfio_pci_count_devs,
+						    &fill.max, slot);
+		if (ret)
+			return ret;
+
+		WARN_ON(!fill.max); /* Should always be at least one */
+
+		/*
+		 * If there's enough space, fill it now, otherwise return
+		 * -ENOSPC and the number of devices affected.
+		 */
+		if (hdr.argsz < sizeof(hdr) + (fill.max * sizeof(*devices))) {
+			ret = -ENOSPC;
+			hdr.count = fill.max;
+			goto reset_info_exit;
+		}
+
+		devices = kcalloc(fill.max, sizeof(*devices), GFP_KERNEL);
+		if (!devices)
+			return -ENOMEM;
+
+		fill.devices = devices;
+
+		ret = vfio_pci_for_each_slot_or_bus(vdev->pdev,
+						    vfio_pci_fill_devs,
+						    &fill, slot);
+
+		/*
+		 * If a device was removed between counting and filling,
+		 * we may come up short of fill.max.  If a device was
+		 * added, we'll have a return of -EAGAIN above.
+		 */
+		if (!ret)
+			hdr.count = fill.cur;
+
+reset_info_exit:
+		if (copy_to_user((void __user *)arg, &hdr, minsz))
+			ret = -EFAULT;
+
+		if (!ret) {
+			if (copy_to_user((void __user *)(arg + minsz), devices,
+					 hdr.count * sizeof(*devices)))
+				ret = -EFAULT;
+		}
+
+		kfree(devices);
+		return ret;
+
+	} else if (cmd == VFIO_DEVICE_PCI_HOT_RESET) {
+		struct vfio_pci_hot_reset hdr;
+		int32_t *group_fds;
+		struct vfio_group **groups;
+		struct vfio_pci_group_info info;
+		bool slot = false;
+		int group_idx, count = 0, ret = 0;
+
+		minsz = offsetofend(struct vfio_pci_hot_reset, count);
+
+		if (copy_from_user(&hdr, (void __user *)arg, minsz))
+			return -EFAULT;
+
+		if (hdr.argsz < minsz || hdr.flags)
+			return -EINVAL;
+
+		/* Can we do a slot or bus reset or neither? */
+		if (!pci_probe_reset_slot(vdev->pdev->slot))
+			slot = true;
+		else if (pci_probe_reset_bus(vdev->pdev->bus))
+			return -ENODEV;
+
+		/*
+		 * We can't let userspace give us an arbitrarily large
+		 * buffer to copy, so verify how many we think there
+		 * could be.  Note groups can have multiple devices so
+		 * one group per device is the max.
+		 */
+		ret = vfio_pci_for_each_slot_or_bus(vdev->pdev,
+						    vfio_pci_count_devs,
+						    &count, slot);
+		if (ret)
+			return ret;
+
+		/* Somewhere between 1 and count is OK */
+		if (!hdr.count || hdr.count > count)
+			return -EINVAL;
+
+		group_fds = kcalloc(hdr.count, sizeof(*group_fds), GFP_KERNEL);
+		groups = kcalloc(hdr.count, sizeof(*groups), GFP_KERNEL);
+		if (!group_fds || !groups) {
+			kfree(group_fds);
+			kfree(groups);
+			return -ENOMEM;
+		}
+
+		if (copy_from_user(group_fds, (void __user *)(arg + minsz),
+				   hdr.count * sizeof(*group_fds))) {
+			kfree(group_fds);
+			kfree(groups);
+			return -EFAULT;
+		}
+
+		/*
+		 * For each group_fd, get the group through the vfio external
+		 * user interface and store the group and iommu ID.  This
+		 * ensures the group is held across the reset.
+		 */
+		for (group_idx = 0; group_idx < hdr.count; group_idx++) {
+			struct vfio_group *group;
+			struct fd f = fdget(group_fds[group_idx]);
+			if (!f.file) {
+				ret = -EBADF;
+				break;
+			}
+
+			group = vfio_group_get_external_user(f.file);
+			fdput(f);
+			if (IS_ERR(group)) {
+				ret = PTR_ERR(group);
+				break;
+			}
+
+			groups[group_idx] = group;
+		}
+
+		kfree(group_fds);
+
+		/* release reference to groups on error */
+		if (ret)
+			goto hot_reset_release;
+
+		info.count = hdr.count;
+		info.groups = groups;
+
+		ret = vfio_pci_dev_set_hot_reset(vdev->vdev.dev_set, &info);
+
+hot_reset_release:
+		for (group_idx--; group_idx >= 0; group_idx--)
+			vfio_group_put_external_user(groups[group_idx]);
+
+		kfree(groups);
+		return ret;
+	} else if (cmd == VFIO_DEVICE_IOEVENTFD) {
+		struct vfio_device_ioeventfd ioeventfd;
+		int count;
+
+		minsz = offsetofend(struct vfio_device_ioeventfd, fd);
+
+		if (copy_from_user(&ioeventfd, (void __user *)arg, minsz))
+			return -EFAULT;
+
+		if (ioeventfd.argsz < minsz)
+			return -EINVAL;
+
+		if (ioeventfd.flags & ~VFIO_DEVICE_IOEVENTFD_SIZE_MASK)
+			return -EINVAL;
+
+		count = ioeventfd.flags & VFIO_DEVICE_IOEVENTFD_SIZE_MASK;
+
+		if (hweight8(count) != 1 || ioeventfd.fd < -1)
+			return -EINVAL;
+
+		return vfio_pci_ioeventfd(vdev, ioeventfd.offset,
+					  ioeventfd.data, count, ioeventfd.fd);
+	} else if (cmd == VFIO_DEVICE_FEATURE) {
+		struct vfio_device_feature feature;
+		uuid_t uuid;
+
+		minsz = offsetofend(struct vfio_device_feature, flags);
+
+		if (copy_from_user(&feature, (void __user *)arg, minsz))
+			return -EFAULT;
+
+		if (feature.argsz < minsz)
+			return -EINVAL;
+
+		/* Check unknown flags */
+		if (feature.flags & ~(VFIO_DEVICE_FEATURE_MASK |
+				      VFIO_DEVICE_FEATURE_SET |
+				      VFIO_DEVICE_FEATURE_GET |
+				      VFIO_DEVICE_FEATURE_PROBE))
+			return -EINVAL;
+
+		/* GET & SET are mutually exclusive except with PROBE */
+		if (!(feature.flags & VFIO_DEVICE_FEATURE_PROBE) &&
+		    (feature.flags & VFIO_DEVICE_FEATURE_SET) &&
+		    (feature.flags & VFIO_DEVICE_FEATURE_GET))
+			return -EINVAL;
+
+		switch (feature.flags & VFIO_DEVICE_FEATURE_MASK) {
+		case VFIO_DEVICE_FEATURE_PCI_VF_TOKEN:
+			if (!vdev->vf_token)
+				return -ENOTTY;
+
+			/*
+			 * We do not support GET of the VF Token UUID as this
+			 * could expose the token of the previous device user.
+			 */
+			if (feature.flags & VFIO_DEVICE_FEATURE_GET)
+				return -EINVAL;
+
+			if (feature.flags & VFIO_DEVICE_FEATURE_PROBE)
+				return 0;
+
+			/* Don't SET unless told to do so */
+			if (!(feature.flags & VFIO_DEVICE_FEATURE_SET))
+				return -EINVAL;
+
+			if (feature.argsz < minsz + sizeof(uuid))
+				return -EINVAL;
+
+			if (copy_from_user(&uuid, (void __user *)(arg + minsz),
+					   sizeof(uuid)))
+				return -EFAULT;
+
+			mutex_lock(&vdev->vf_token->lock);
+			uuid_copy(&vdev->vf_token->uuid, &uuid);
+			mutex_unlock(&vdev->vf_token->lock);
+
+			return 0;
+		default:
+			return -ENOTTY;
+		}
+	}
+
+	return -ENOTTY;
+}
+EXPORT_SYMBOL_GPL(vfio_pci_core_ioctl);
+
+static ssize_t vfio_pci_rw(struct vfio_pci_core_device *vdev, char __user *buf,
+			   size_t count, loff_t *ppos, bool iswrite)
+{
+	unsigned int index = VFIO_PCI_OFFSET_TO_INDEX(*ppos);
+
+	if (index >= VFIO_PCI_NUM_REGIONS + vdev->num_regions)
+		return -EINVAL;
+
+	switch (index) {
+	case VFIO_PCI_CONFIG_REGION_INDEX:
+		return vfio_pci_config_rw(vdev, buf, count, ppos, iswrite);
+
+	case VFIO_PCI_ROM_REGION_INDEX:
+		if (iswrite)
+			return -EINVAL;
+		return vfio_pci_bar_rw(vdev, buf, count, ppos, false);
+
+	case VFIO_PCI_BAR0_REGION_INDEX ... VFIO_PCI_BAR5_REGION_INDEX:
+		return vfio_pci_bar_rw(vdev, buf, count, ppos, iswrite);
+
+	case VFIO_PCI_VGA_REGION_INDEX:
+		return vfio_pci_vga_rw(vdev, buf, count, ppos, iswrite);
+	default:
+		index -= VFIO_PCI_NUM_REGIONS;
+		return vdev->region[index].ops->rw(vdev, buf,
+						   count, ppos, iswrite);
+	}
+
+	return -EINVAL;
+}
+
+ssize_t vfio_pci_core_read(struct vfio_device *core_vdev, char __user *buf,
+		size_t count, loff_t *ppos)
+{
+	struct vfio_pci_core_device *vdev =
+		container_of(core_vdev, struct vfio_pci_core_device, vdev);
+
+	if (!count)
+		return 0;
+
+	return vfio_pci_rw(vdev, buf, count, ppos, false);
+}
+EXPORT_SYMBOL_GPL(vfio_pci_core_read);
+
+ssize_t vfio_pci_core_write(struct vfio_device *core_vdev, const char __user *buf,
+		size_t count, loff_t *ppos)
+{
+	struct vfio_pci_core_device *vdev =
+		container_of(core_vdev, struct vfio_pci_core_device, vdev);
+
+	if (!count)
+		return 0;
+
+	return vfio_pci_rw(vdev, (char __user *)buf, count, ppos, true);
+}
+EXPORT_SYMBOL_GPL(vfio_pci_core_write);
+
+/* Return 1 on zap and vma_lock acquired, 0 on contention (only with @try) */
+static int vfio_pci_zap_and_vma_lock(struct vfio_pci_core_device *vdev, bool try)
+{
+	struct vfio_pci_mmap_vma *mmap_vma, *tmp;
+
+	/*
+	 * Lock ordering:
+	 * vma_lock is nested under mmap_lock for vm_ops callback paths.
+	 * The memory_lock semaphore is used by both code paths calling
+	 * into this function to zap vmas and the vm_ops.fault callback
+	 * to protect the memory enable state of the device.
+	 *
+	 * When zapping vmas we need to maintain the mmap_lock => vma_lock
+	 * ordering, which requires using vma_lock to walk vma_list to
+	 * acquire an mm, then dropping vma_lock to get the mmap_lock and
+	 * reacquiring vma_lock.  This logic is derived from similar
+	 * requirements in uverbs_user_mmap_disassociate().
+	 *
+	 * mmap_lock must always be the top-level lock when it is taken.
+	 * Therefore we can only hold the memory_lock write lock when
+	 * vma_list is empty, as we'd need to take mmap_lock to clear
+	 * entries.  vma_list can only be guaranteed empty when holding
+	 * vma_lock, thus memory_lock is nested under vma_lock.
+	 *
+	 * This enables the vm_ops.fault callback to acquire vma_lock,
+	 * followed by memory_lock read lock, while already holding
+	 * mmap_lock without risk of deadlock.
+	 */
+	while (1) {
+		struct mm_struct *mm = NULL;
+
+		if (try) {
+			if (!mutex_trylock(&vdev->vma_lock))
+				return 0;
+		} else {
+			mutex_lock(&vdev->vma_lock);
+		}
+		while (!list_empty(&vdev->vma_list)) {
+			mmap_vma = list_first_entry(&vdev->vma_list,
+						    struct vfio_pci_mmap_vma,
+						    vma_next);
+			mm = mmap_vma->vma->vm_mm;
+			if (mmget_not_zero(mm))
+				break;
+
+			list_del(&mmap_vma->vma_next);
+			kfree(mmap_vma);
+			mm = NULL;
+		}
+		if (!mm)
+			return 1;
+		mutex_unlock(&vdev->vma_lock);
+
+		if (try) {
+			if (!mmap_read_trylock(mm)) {
+				mmput(mm);
+				return 0;
+			}
+		} else {
+			mmap_read_lock(mm);
+		}
+		if (try) {
+			if (!mutex_trylock(&vdev->vma_lock)) {
+				mmap_read_unlock(mm);
+				mmput(mm);
+				return 0;
+			}
+		} else {
+			mutex_lock(&vdev->vma_lock);
+		}
+		list_for_each_entry_safe(mmap_vma, tmp,
+					 &vdev->vma_list, vma_next) {
+			struct vm_area_struct *vma = mmap_vma->vma;
+
+			if (vma->vm_mm != mm)
+				continue;
+
+			list_del(&mmap_vma->vma_next);
+			kfree(mmap_vma);
+
+			zap_vma_ptes(vma, vma->vm_start,
+				     vma->vm_end - vma->vm_start);
+		}
+		mutex_unlock(&vdev->vma_lock);
+		mmap_read_unlock(mm);
+		mmput(mm);
+	}
+}
+
+void vfio_pci_zap_and_down_write_memory_lock(struct vfio_pci_core_device *vdev)
+{
+	vfio_pci_zap_and_vma_lock(vdev, false);
+	down_write(&vdev->memory_lock);
+	mutex_unlock(&vdev->vma_lock);
+}
+
+u16 vfio_pci_memory_lock_and_enable(struct vfio_pci_core_device *vdev)
+{
+	u16 cmd;
+
+	down_write(&vdev->memory_lock);
+	pci_read_config_word(vdev->pdev, PCI_COMMAND, &cmd);
+	if (!(cmd & PCI_COMMAND_MEMORY))
+		pci_write_config_word(vdev->pdev, PCI_COMMAND,
+				      cmd | PCI_COMMAND_MEMORY);
+
+	return cmd;
+}
+
+void vfio_pci_memory_unlock_and_restore(struct vfio_pci_core_device *vdev, u16 cmd)
+{
+	pci_write_config_word(vdev->pdev, PCI_COMMAND, cmd);
+	up_write(&vdev->memory_lock);
+}
+
+/* Caller holds vma_lock */
+static int __vfio_pci_add_vma(struct vfio_pci_core_device *vdev,
+			      struct vm_area_struct *vma)
+{
+	struct vfio_pci_mmap_vma *mmap_vma;
+
+	mmap_vma = kmalloc(sizeof(*mmap_vma), GFP_KERNEL);
+	if (!mmap_vma)
+		return -ENOMEM;
+
+	mmap_vma->vma = vma;
+	list_add(&mmap_vma->vma_next, &vdev->vma_list);
+
+	return 0;
+}
+
+/*
+ * Zap mmaps on open so that we can fault them in on access and therefore
+ * our vma_list only tracks mappings accessed since last zap.
+ */
+static void vfio_pci_mmap_open(struct vm_area_struct *vma)
+{
+	zap_vma_ptes(vma, vma->vm_start, vma->vm_end - vma->vm_start);
+}
+
+static void vfio_pci_mmap_close(struct vm_area_struct *vma)
+{
+	struct vfio_pci_core_device *vdev = vma->vm_private_data;
+	struct vfio_pci_mmap_vma *mmap_vma;
+
+	mutex_lock(&vdev->vma_lock);
+	list_for_each_entry(mmap_vma, &vdev->vma_list, vma_next) {
+		if (mmap_vma->vma == vma) {
+			list_del(&mmap_vma->vma_next);
+			kfree(mmap_vma);
+			break;
+		}
+	}
+	mutex_unlock(&vdev->vma_lock);
+}
+
+static vm_fault_t vfio_pci_mmap_fault(struct vm_fault *vmf)
+{
+	struct vm_area_struct *vma = vmf->vma;
+	struct vfio_pci_core_device *vdev = vma->vm_private_data;
+	struct vfio_pci_mmap_vma *mmap_vma;
+	vm_fault_t ret = VM_FAULT_NOPAGE;
+
+	mutex_lock(&vdev->vma_lock);
+	down_read(&vdev->memory_lock);
+
+	if (!__vfio_pci_memory_enabled(vdev)) {
+		ret = VM_FAULT_SIGBUS;
+		goto up_out;
+	}
+
+	/*
+	 * We populate the whole vma on fault, so we need to test whether
+	 * the vma has already been mapped, such as for concurrent faults
+	 * to the same vma.  io_remap_pfn_range() will trigger a BUG_ON if
+	 * we ask it to fill the same range again.
+	 */
+	list_for_each_entry(mmap_vma, &vdev->vma_list, vma_next) {
+		if (mmap_vma->vma == vma)
+			goto up_out;
+	}
+
+	if (io_remap_pfn_range(vma, vma->vm_start, vma->vm_pgoff,
+			       vma->vm_end - vma->vm_start,
+			       vma->vm_page_prot)) {
+		ret = VM_FAULT_SIGBUS;
+		zap_vma_ptes(vma, vma->vm_start, vma->vm_end - vma->vm_start);
+		goto up_out;
+	}
+
+	if (__vfio_pci_add_vma(vdev, vma)) {
+		ret = VM_FAULT_OOM;
+		zap_vma_ptes(vma, vma->vm_start, vma->vm_end - vma->vm_start);
+	}
+
+up_out:
+	up_read(&vdev->memory_lock);
+	mutex_unlock(&vdev->vma_lock);
+	return ret;
+}
+
+static const struct vm_operations_struct vfio_pci_mmap_ops = {
+	.open = vfio_pci_mmap_open,
+	.close = vfio_pci_mmap_close,
+	.fault = vfio_pci_mmap_fault,
+};
+
+int vfio_pci_core_mmap(struct vfio_device *core_vdev, struct vm_area_struct *vma)
+{
+	struct vfio_pci_core_device *vdev =
+		container_of(core_vdev, struct vfio_pci_core_device, vdev);
+	struct pci_dev *pdev = vdev->pdev;
+	unsigned int index;
+	u64 phys_len, req_len, pgoff, req_start;
+	int ret;
+
+	index = vma->vm_pgoff >> (VFIO_PCI_OFFSET_SHIFT - PAGE_SHIFT);
+
+	if (index >= VFIO_PCI_NUM_REGIONS + vdev->num_regions)
+		return -EINVAL;
+	if (vma->vm_end < vma->vm_start)
+		return -EINVAL;
+	if ((vma->vm_flags & VM_SHARED) == 0)
+		return -EINVAL;
+	if (index >= VFIO_PCI_NUM_REGIONS) {
+		int regnum = index - VFIO_PCI_NUM_REGIONS;
+		struct vfio_pci_region *region = vdev->region + regnum;
+
+		if (region->ops && region->ops->mmap &&
+		    (region->flags & VFIO_REGION_INFO_FLAG_MMAP))
+			return region->ops->mmap(vdev, region, vma);
+		return -EINVAL;
+	}
+	if (index >= VFIO_PCI_ROM_REGION_INDEX)
+		return -EINVAL;
+	if (!vdev->bar_mmap_supported[index])
+		return -EINVAL;
+
+	phys_len = PAGE_ALIGN(pci_resource_len(pdev, index));
+	req_len = vma->vm_end - vma->vm_start;
+	pgoff = vma->vm_pgoff &
+		((1U << (VFIO_PCI_OFFSET_SHIFT - PAGE_SHIFT)) - 1);
+	req_start = pgoff << PAGE_SHIFT;
+
+	if (req_start + req_len > phys_len)
+		return -EINVAL;
+
+	/*
+	 * Even though we don't make use of the barmap for the mmap,
+	 * we need to request the region and the barmap tracks that.
+	 */
+	if (!vdev->barmap[index]) {
+		ret = pci_request_selected_regions(pdev,
+						   1 << index, "vfio-pci");
+		if (ret)
+			return ret;
+
+		vdev->barmap[index] = pci_iomap(pdev, index, 0);
+		if (!vdev->barmap[index]) {
+			pci_release_selected_regions(pdev, 1 << index);
+			return -ENOMEM;
+		}
+	}
+
+	vma->vm_private_data = vdev;
+	vma->vm_page_prot = pgprot_noncached(vma->vm_page_prot);
+	vma->vm_pgoff = (pci_resource_start(pdev, index) >> PAGE_SHIFT) + pgoff;
+
+	/*
+	 * See remap_pfn_range(), called from vfio_pci_fault() but we can't
+	 * change vm_flags within the fault handler.  Set them now.
+	 */
+	vma->vm_flags |= VM_IO | VM_PFNMAP | VM_DONTEXPAND | VM_DONTDUMP;
+	vma->vm_ops = &vfio_pci_mmap_ops;
+
+	return 0;
+}
+EXPORT_SYMBOL_GPL(vfio_pci_core_mmap);
+
+void vfio_pci_core_request(struct vfio_device *core_vdev, unsigned int count)
+{
+	struct vfio_pci_core_device *vdev =
+		container_of(core_vdev, struct vfio_pci_core_device, vdev);
+	struct pci_dev *pdev = vdev->pdev;
+
+	mutex_lock(&vdev->igate);
+
+	if (vdev->req_trigger) {
+		if (!(count % 10))
+			pci_notice_ratelimited(pdev,
+				"Relaying device request to user (#%u)\n",
+				count);
+		eventfd_signal(vdev->req_trigger, 1);
+	} else if (count == 0) {
+		pci_warn(pdev,
+			"No device request channel registered, blocked until released by user\n");
+	}
+
+	mutex_unlock(&vdev->igate);
+}
+EXPORT_SYMBOL_GPL(vfio_pci_core_request);
+
+static int vfio_pci_validate_vf_token(struct vfio_pci_core_device *vdev,
+				      bool vf_token, uuid_t *uuid)
+{
+	/*
+	 * There's always some degree of trust or collaboration between SR-IOV
+	 * PF and VFs, even if just that the PF hosts the SR-IOV capability and
+	 * can disrupt VFs with a reset, but often the PF has more explicit
+	 * access to deny service to the VF or access data passed through the
+	 * VF.  We therefore require an opt-in via a shared VF token (UUID) to
+	 * represent this trust.  This both prevents that a VF driver might
+	 * assume the PF driver is a trusted, in-kernel driver, and also that
+	 * a PF driver might be replaced with a rogue driver, unknown to in-use
+	 * VF drivers.
+	 *
+	 * Therefore when presented with a VF, if the PF is a vfio device and
+	 * it is bound to the vfio-pci driver, the user needs to provide a VF
+	 * token to access the device, in the form of appending a vf_token to
+	 * the device name, for example:
+	 *
+	 * "0000:04:10.0 vf_token=bd8d9d2b-5a5f-4f5a-a211-f591514ba1f3"
+	 *
+	 * When presented with a PF which has VFs in use, the user must also
+	 * provide the current VF token to prove collaboration with existing
+	 * VF users.  If VFs are not in use, the VF token provided for the PF
+	 * device will act to set the VF token.
+	 *
+	 * If the VF token is provided but unused, an error is generated.
+	 */
+	if (!vdev->pdev->is_virtfn && !vdev->vf_token && !vf_token)
+		return 0; /* No VF token provided or required */
+
+	if (vdev->pdev->is_virtfn) {
+		struct vfio_pci_core_device *pf_vdev = get_pf_vdev(vdev);
+		bool match;
+
+		if (!pf_vdev) {
+			if (!vf_token)
+				return 0; /* PF is not vfio-pci, no VF token */
+
+			pci_info_ratelimited(vdev->pdev,
+				"VF token incorrectly provided, PF not bound to vfio-pci\n");
+			return -EINVAL;
+		}
+
+		if (!vf_token) {
+			vfio_device_put(&pf_vdev->vdev);
+			pci_info_ratelimited(vdev->pdev,
+				"VF token required to access device\n");
+			return -EACCES;
+		}
+
+		mutex_lock(&pf_vdev->vf_token->lock);
+		match = uuid_equal(uuid, &pf_vdev->vf_token->uuid);
+		mutex_unlock(&pf_vdev->vf_token->lock);
+
+		vfio_device_put(&pf_vdev->vdev);
+
+		if (!match) {
+			pci_info_ratelimited(vdev->pdev,
+				"Incorrect VF token provided for device\n");
+			return -EACCES;
+		}
+	} else if (vdev->vf_token) {
+		mutex_lock(&vdev->vf_token->lock);
+		if (vdev->vf_token->users) {
+			if (!vf_token) {
+				mutex_unlock(&vdev->vf_token->lock);
+				pci_info_ratelimited(vdev->pdev,
+					"VF token required to access device\n");
+				return -EACCES;
+			}
+
+			if (!uuid_equal(uuid, &vdev->vf_token->uuid)) {
+				mutex_unlock(&vdev->vf_token->lock);
+				pci_info_ratelimited(vdev->pdev,
+					"Incorrect VF token provided for device\n");
+				return -EACCES;
+			}
+		} else if (vf_token) {
+			uuid_copy(&vdev->vf_token->uuid, uuid);
+		}
+
+		mutex_unlock(&vdev->vf_token->lock);
+	} else if (vf_token) {
+		pci_info_ratelimited(vdev->pdev,
+			"VF token incorrectly provided, not a PF or VF\n");
+		return -EINVAL;
+	}
+
+	return 0;
+}
+
+#define VF_TOKEN_ARG "vf_token="
+
+int vfio_pci_core_match(struct vfio_device *core_vdev, char *buf)
+{
+	struct vfio_pci_core_device *vdev =
+		container_of(core_vdev, struct vfio_pci_core_device, vdev);
+	bool vf_token = false;
+	uuid_t uuid;
+	int ret;
+
+	if (strncmp(pci_name(vdev->pdev), buf, strlen(pci_name(vdev->pdev))))
+		return 0; /* No match */
+
+	if (strlen(buf) > strlen(pci_name(vdev->pdev))) {
+		buf += strlen(pci_name(vdev->pdev));
+
+		if (*buf != ' ')
+			return 0; /* No match: non-whitespace after name */
+
+		while (*buf) {
+			if (*buf == ' ') {
+				buf++;
+				continue;
+			}
+
+			if (!vf_token && !strncmp(buf, VF_TOKEN_ARG,
+						  strlen(VF_TOKEN_ARG))) {
+				buf += strlen(VF_TOKEN_ARG);
+
+				if (strlen(buf) < UUID_STRING_LEN)
+					return -EINVAL;
+
+				ret = uuid_parse(buf, &uuid);
+				if (ret)
+					return ret;
+
+				vf_token = true;
+				buf += UUID_STRING_LEN;
+			} else {
+				/* Unknown/duplicate option */
+				return -EINVAL;
+			}
+		}
+	}
+
+	ret = vfio_pci_validate_vf_token(vdev, vf_token, &uuid);
+	if (ret)
+		return ret;
+
+	return 1; /* Match */
+}
+EXPORT_SYMBOL_GPL(vfio_pci_core_match);
+
+static int vfio_pci_bus_notifier(struct notifier_block *nb,
+				 unsigned long action, void *data)
+{
+	struct vfio_pci_core_device *vdev = container_of(nb,
+						    struct vfio_pci_core_device, nb);
+	struct device *dev = data;
+	struct pci_dev *pdev = to_pci_dev(dev);
+	struct pci_dev *physfn = pci_physfn(pdev);
+
+	if (action == BUS_NOTIFY_ADD_DEVICE &&
+	    pdev->is_virtfn && physfn == vdev->pdev) {
+		pci_info(vdev->pdev, "Captured SR-IOV VF %s driver_override\n",
+			 pci_name(pdev));
+		pdev->driver_override = kasprintf(GFP_KERNEL, "%s",
+						  vdev->vdev.ops->name);
+	} else if (action == BUS_NOTIFY_BOUND_DRIVER &&
+		   pdev->is_virtfn && physfn == vdev->pdev) {
+		struct pci_driver *drv = pci_dev_driver(pdev);
+
+		if (drv && drv != pci_dev_driver(vdev->pdev))
+			pci_warn(vdev->pdev,
+				 "VF %s bound to driver %s while PF bound to driver %s\n",
+				 pci_name(pdev), drv->name,
+				 pci_dev_driver(vdev->pdev)->name);
+	}
+
+	return 0;
+}
+
+static int vfio_pci_vf_init(struct vfio_pci_core_device *vdev)
+{
+	struct pci_dev *pdev = vdev->pdev;
+	int ret;
+
+	if (!pdev->is_physfn)
+		return 0;
+
+	vdev->vf_token = kzalloc(sizeof(*vdev->vf_token), GFP_KERNEL);
+	if (!vdev->vf_token)
+		return -ENOMEM;
+
+	mutex_init(&vdev->vf_token->lock);
+	uuid_gen(&vdev->vf_token->uuid);
+
+	vdev->nb.notifier_call = vfio_pci_bus_notifier;
+	ret = bus_register_notifier(&pci_bus_type, &vdev->nb);
+	if (ret) {
+		kfree(vdev->vf_token);
+		return ret;
+	}
+	return 0;
+}
+
+static void vfio_pci_vf_uninit(struct vfio_pci_core_device *vdev)
+{
+	if (!vdev->vf_token)
+		return;
+
+	bus_unregister_notifier(&pci_bus_type, &vdev->nb);
+	WARN_ON(vdev->vf_token->users);
+	mutex_destroy(&vdev->vf_token->lock);
+	kfree(vdev->vf_token);
+}
+
+static int vfio_pci_vga_init(struct vfio_pci_core_device *vdev)
+{
+	struct pci_dev *pdev = vdev->pdev;
+	int ret;
+
+	if (!vfio_pci_is_vga(pdev))
+		return 0;
+
+	ret = vga_client_register(pdev, vfio_pci_set_decode);
+	if (ret)
+		return ret;
+	vga_set_legacy_decoding(pdev, vfio_pci_set_decode(pdev, false));
+	return 0;
+}
+
+static void vfio_pci_vga_uninit(struct vfio_pci_core_device *vdev)
+{
+	struct pci_dev *pdev = vdev->pdev;
+
+	if (!vfio_pci_is_vga(pdev))
+		return;
+	vga_client_unregister(pdev);
+	vga_set_legacy_decoding(pdev, VGA_RSRC_NORMAL_IO | VGA_RSRC_NORMAL_MEM |
+					      VGA_RSRC_LEGACY_IO |
+					      VGA_RSRC_LEGACY_MEM);
+}
+
+void vfio_pci_core_init_device(struct vfio_pci_core_device *vdev,
+			       struct pci_dev *pdev,
+			       const struct vfio_device_ops *vfio_pci_ops)
+{
+	vfio_init_group_dev(&vdev->vdev, &pdev->dev, vfio_pci_ops);
+	vdev->pdev = pdev;
+	vdev->irq_type = VFIO_PCI_NUM_IRQS;
+	mutex_init(&vdev->igate);
+	spin_lock_init(&vdev->irqlock);
+	mutex_init(&vdev->ioeventfds_lock);
+	INIT_LIST_HEAD(&vdev->dummy_resources_list);
+	INIT_LIST_HEAD(&vdev->ioeventfds_list);
+	mutex_init(&vdev->vma_lock);
+	INIT_LIST_HEAD(&vdev->vma_list);
+	init_rwsem(&vdev->memory_lock);
+}
+EXPORT_SYMBOL_GPL(vfio_pci_core_init_device);
+
+void vfio_pci_core_uninit_device(struct vfio_pci_core_device *vdev)
+{
+	mutex_destroy(&vdev->igate);
+	mutex_destroy(&vdev->ioeventfds_lock);
+	mutex_destroy(&vdev->vma_lock);
+	vfio_uninit_group_dev(&vdev->vdev);
+	kfree(vdev->region);
+	kfree(vdev->pm_save);
+}
+EXPORT_SYMBOL_GPL(vfio_pci_core_uninit_device);
+
+int vfio_pci_core_register_device(struct vfio_pci_core_device *vdev)
+{
+	struct pci_dev *pdev = vdev->pdev;
+	struct iommu_group *group;
+	int ret;
+
+	if (pdev->hdr_type != PCI_HEADER_TYPE_NORMAL)
+		return -EINVAL;
+
+	/*
+	 * Prevent binding to PFs with VFs enabled, the VFs might be in use
+	 * by the host or other users.  We cannot capture the VFs if they
+	 * already exist, nor can we track VF users.  Disabling SR-IOV here
+	 * would initiate removing the VFs, which would unbind the driver,
+	 * which is prone to blocking if that VF is also in use by vfio-pci.
+	 * Just reject these PFs and let the user sort it out.
+	 */
+	if (pci_num_vf(pdev)) {
+		pci_warn(pdev, "Cannot bind to PF with SR-IOV enabled\n");
+		return -EBUSY;
+	}
+
+	group = vfio_iommu_group_get(&pdev->dev);
+	if (!group)
+		return -EINVAL;
+
+	if (pci_is_root_bus(pdev->bus)) {
+		ret = vfio_assign_device_set(&vdev->vdev, vdev);
+	} else if (!pci_probe_reset_slot(pdev->slot)) {
+		ret = vfio_assign_device_set(&vdev->vdev, pdev->slot);
+	} else {
+		/*
+		 * If there is no slot reset support for this device, the whole
+		 * bus needs to be grouped together to support bus-wide resets.
+		 */
+		ret = vfio_assign_device_set(&vdev->vdev, pdev->bus);
+	}
+
+	if (ret)
+		goto out_group_put;
+	ret = vfio_pci_vf_init(vdev);
+	if (ret)
+		goto out_group_put;
+	ret = vfio_pci_vga_init(vdev);
+	if (ret)
+		goto out_vf;
+
+	vfio_pci_probe_power_state(vdev);
+
+	if (!disable_idle_d3) {
+		/*
+		 * pci-core sets the device power state to an unknown value at
+		 * bootup and after being removed from a driver.  The only
+		 * transition it allows from this unknown state is to D0, which
+		 * typically happens when a driver calls pci_enable_device().
+		 * We're not ready to enable the device yet, but we do want to
+		 * be able to get to D3.  Therefore first do a D0 transition
+		 * before going to D3.
+		 */
+		vfio_pci_set_power_state(vdev, PCI_D0);
+		vfio_pci_set_power_state(vdev, PCI_D3hot);
+	}
+
+	ret = vfio_register_group_dev(&vdev->vdev);
+	if (ret)
+		goto out_power;
+	return 0;
+
+out_power:
+	if (!disable_idle_d3)
+		vfio_pci_set_power_state(vdev, PCI_D0);
+out_vf:
+	vfio_pci_vf_uninit(vdev);
+out_group_put:
+	vfio_iommu_group_put(group, &pdev->dev);
+	return ret;
+}
+EXPORT_SYMBOL_GPL(vfio_pci_core_register_device);
+
+void vfio_pci_core_unregister_device(struct vfio_pci_core_device *vdev)
+{
+	struct pci_dev *pdev = vdev->pdev;
+
+	pci_disable_sriov(pdev);
+
+	vfio_unregister_group_dev(&vdev->vdev);
+
+	vfio_pci_vf_uninit(vdev);
+	vfio_pci_vga_uninit(vdev);
+
+	vfio_iommu_group_put(pdev->dev.iommu_group, &pdev->dev);
+
+	if (!disable_idle_d3)
+		vfio_pci_set_power_state(vdev, PCI_D0);
+}
+EXPORT_SYMBOL_GPL(vfio_pci_core_unregister_device);
+
+static pci_ers_result_t vfio_pci_aer_err_detected(struct pci_dev *pdev,
+						  pci_channel_state_t state)
+{
+	struct vfio_pci_core_device *vdev;
+	struct vfio_device *device;
+
+	device = vfio_device_get_from_dev(&pdev->dev);
+	if (device == NULL)
+		return PCI_ERS_RESULT_DISCONNECT;
+
+	vdev = container_of(device, struct vfio_pci_core_device, vdev);
+
+	mutex_lock(&vdev->igate);
+
+	if (vdev->err_trigger)
+		eventfd_signal(vdev->err_trigger, 1);
+
+	mutex_unlock(&vdev->igate);
+
+	vfio_device_put(device);
+
+	return PCI_ERS_RESULT_CAN_RECOVER;
+}
+
+int vfio_pci_core_sriov_configure(struct pci_dev *pdev, int nr_virtfn)
+{
+	struct vfio_device *device;
+	int ret = 0;
+
+	device = vfio_device_get_from_dev(&pdev->dev);
+	if (!device)
+		return -ENODEV;
+
+	if (nr_virtfn == 0)
+		pci_disable_sriov(pdev);
+	else
+		ret = pci_enable_sriov(pdev, nr_virtfn);
+
+	vfio_device_put(device);
+
+	return ret < 0 ? ret : nr_virtfn;
+}
+EXPORT_SYMBOL_GPL(vfio_pci_core_sriov_configure);
+
+const struct pci_error_handlers vfio_pci_core_err_handlers = {
+	.error_detected = vfio_pci_aer_err_detected,
+};
+EXPORT_SYMBOL_GPL(vfio_pci_core_err_handlers);
+
+static bool vfio_dev_in_groups(struct vfio_pci_core_device *vdev,
+			       struct vfio_pci_group_info *groups)
+{
+	unsigned int i;
+
+	for (i = 0; i < groups->count; i++)
+		if (groups->groups[i] == vdev->vdev.group)
+			return true;
+	return false;
+}
+
+static int vfio_pci_is_device_in_set(struct pci_dev *pdev, void *data)
+{
+	struct vfio_device_set *dev_set = data;
+	struct vfio_device *cur;
+
+	list_for_each_entry(cur, &dev_set->device_list, dev_set_list)
+		if (cur->dev == &pdev->dev)
+			return 0;
+	return -EBUSY;
+}
+
+/*
+ * vfio-core considers a group to be viable and will create a vfio_device even
+ * if some devices are bound to drivers like pci-stub or pcieport. Here we
+ * require all PCI devices to be inside our dev_set since that ensures they stay
+ * put and that every driver controlling the device can co-ordinate with the
+ * device reset.
+ *
+ * Returns the pci_dev to pass to pci_reset_bus() if every PCI device to be
+ * reset is inside the dev_set, and pci_reset_bus() can succeed. NULL otherwise.
+ */
+static struct pci_dev *
+vfio_pci_dev_set_resettable(struct vfio_device_set *dev_set)
+{
+	struct pci_dev *pdev;
+
+	lockdep_assert_held(&dev_set->lock);
+
+	/*
+	 * By definition all PCI devices in the dev_set share the same PCI
+	 * reset, so any pci_dev will have the same outcomes for
+	 * pci_probe_reset_*() and pci_reset_bus().
+	 */
+	pdev = list_first_entry(&dev_set->device_list,
+				struct vfio_pci_core_device,
+				vdev.dev_set_list)->pdev;
+
+	/* pci_reset_bus() is supported */
+	if (pci_probe_reset_slot(pdev->slot) && pci_probe_reset_bus(pdev->bus))
+		return NULL;
+
+	if (vfio_pci_for_each_slot_or_bus(pdev, vfio_pci_is_device_in_set,
+					  dev_set,
+					  !pci_probe_reset_slot(pdev->slot)))
+		return NULL;
+	return pdev;
+}
+
+/*
+ * We need to get memory_lock for each device, but devices can share mmap_lock,
+ * therefore we need to zap and hold the vma_lock for each device, and only then
+ * get each memory_lock.
+ */
+static int vfio_pci_dev_set_hot_reset(struct vfio_device_set *dev_set,
+				      struct vfio_pci_group_info *groups)
+{
+	struct vfio_pci_core_device *cur_mem;
+	struct vfio_pci_core_device *cur_vma;
+	struct vfio_pci_core_device *cur;
+	struct pci_dev *pdev;
+	bool is_mem = true;
+	int ret;
+
+	mutex_lock(&dev_set->lock);
+	cur_mem = list_first_entry(&dev_set->device_list,
+				   struct vfio_pci_core_device,
+				   vdev.dev_set_list);
+
+	pdev = vfio_pci_dev_set_resettable(dev_set);
+	if (!pdev) {
+		ret = -EINVAL;
+		goto err_unlock;
+	}
+
+	list_for_each_entry(cur_vma, &dev_set->device_list, vdev.dev_set_list) {
+		/*
+		 * Test whether all the affected devices are contained by the
+		 * set of groups provided by the user.
+		 */
+		if (!vfio_dev_in_groups(cur_vma, groups)) {
+			ret = -EINVAL;
+			goto err_undo;
+		}
+
+		/*
+		 * Locking multiple devices is prone to deadlock, runaway and
+		 * unwind if we hit contention.
+		 */
+		if (!vfio_pci_zap_and_vma_lock(cur_vma, true)) {
+			ret = -EBUSY;
+			goto err_undo;
+		}
+	}
+	cur_vma = NULL;
+
+	list_for_each_entry(cur_mem, &dev_set->device_list, vdev.dev_set_list) {
+		if (!down_write_trylock(&cur_mem->memory_lock)) {
+			ret = -EBUSY;
+			goto err_undo;
+		}
+		mutex_unlock(&cur_mem->vma_lock);
+	}
+	cur_mem = NULL;
+
+	ret = pci_reset_bus(pdev);
+
+err_undo:
+	list_for_each_entry(cur, &dev_set->device_list, vdev.dev_set_list) {
+		if (cur == cur_mem)
+			is_mem = false;
+		if (cur == cur_vma)
+			break;
+		if (is_mem)
+			up_write(&cur->memory_lock);
+		else
+			mutex_unlock(&cur->vma_lock);
+	}
+err_unlock:
+	mutex_unlock(&dev_set->lock);
+	return ret;
+}
+
+static bool vfio_pci_dev_set_needs_reset(struct vfio_device_set *dev_set)
+{
+	struct vfio_pci_core_device *cur;
+	bool needs_reset = false;
+
+	list_for_each_entry(cur, &dev_set->device_list, vdev.dev_set_list) {
+		/* No VFIO device in the set can have an open device FD */
+		if (cur->vdev.open_count)
+			return false;
+		needs_reset |= cur->needs_reset;
+	}
+	return needs_reset;
+}
+
+/*
+ * If a bus or slot reset is available for the provided dev_set and:
+ *  - All of the devices affected by that bus or slot reset are unused
+ *  - At least one of the affected devices is marked dirty via
+ *    needs_reset (such as by lack of FLR support)
+ * Then attempt to perform that bus or slot reset.
+ * Returns true if the dev_set was reset.
+ */
+static bool vfio_pci_dev_set_try_reset(struct vfio_device_set *dev_set)
+{
+	struct vfio_pci_core_device *cur;
+	struct pci_dev *pdev;
+	int ret;
+
+	if (!vfio_pci_dev_set_needs_reset(dev_set))
+		return false;
+
+	pdev = vfio_pci_dev_set_resettable(dev_set);
+	if (!pdev)
+		return false;
+
+	ret = pci_reset_bus(pdev);
+	if (ret)
+		return false;
+
+	list_for_each_entry(cur, &dev_set->device_list, vdev.dev_set_list) {
+		cur->needs_reset = false;
+		if (!disable_idle_d3)
+			vfio_pci_set_power_state(cur, PCI_D3hot);
+	}
+	return true;
+}
+
+void vfio_pci_core_set_params(bool is_nointxmask, bool is_disable_vga,
+			      bool is_disable_idle_d3)
+{
+	nointxmask = is_nointxmask;
+	disable_vga = is_disable_vga;
+	disable_idle_d3 = is_disable_idle_d3;
+}
+EXPORT_SYMBOL_GPL(vfio_pci_core_set_params);
+
+static void vfio_pci_core_cleanup(void)
+{
+	vfio_pci_uninit_perm_bits();
+}
+
+static int __init vfio_pci_core_init(void)
+{
+	/* Allocate shared config space permission data used by all devices */
+	return vfio_pci_init_perm_bits();
+}
+
+module_init(vfio_pci_core_init);
+module_exit(vfio_pci_core_cleanup);
+
+MODULE_LICENSE("GPL v2");
+MODULE_AUTHOR(DRIVER_AUTHOR);
+MODULE_DESCRIPTION(DRIVER_DESC);
diff --git a/drivers/vfio/pci/vfio_pci_igd.c b/drivers/vfio/pci/vfio_pci_igd.c
index 228df565e9bc..7ca4109bba48 100644
--- a/drivers/vfio/pci/vfio_pci_igd.c
+++ b/drivers/vfio/pci/vfio_pci_igd.c
@@ -15,7 +15,7 @@
 #include <linux/uaccess.h>
 #include <linux/vfio.h>
 
-#include "vfio_pci_private.h"
+#include <linux/vfio_pci_core.h>
 
 #define OPREGION_SIGNATURE	"IntelGraphicsMem"
 #define OPREGION_SIZE		(8 * 1024)
@@ -25,8 +25,9 @@
 #define OPREGION_RVDS		0x3c2
 #define OPREGION_VERSION	0x16
 
-static size_t vfio_pci_igd_rw(struct vfio_pci_device *vdev, char __user *buf,
-			      size_t count, loff_t *ppos, bool iswrite)
+static ssize_t vfio_pci_igd_rw(struct vfio_pci_core_device *vdev,
+			       char __user *buf, size_t count, loff_t *ppos,
+			       bool iswrite)
 {
 	unsigned int i = VFIO_PCI_OFFSET_TO_INDEX(*ppos) - VFIO_PCI_NUM_REGIONS;
 	void *base = vdev->region[i].data;
@@ -45,7 +46,7 @@ static size_t vfio_pci_igd_rw(struct vfio_pci_device *vdev, char __user *buf,
 	return count;
 }
 
-static void vfio_pci_igd_release(struct vfio_pci_device *vdev,
+static void vfio_pci_igd_release(struct vfio_pci_core_device *vdev,
 				 struct vfio_pci_region *region)
 {
 	memunmap(region->data);
@@ -56,7 +57,7 @@ static const struct vfio_pci_regops vfio_pci_igd_regops = {
 	.release	= vfio_pci_igd_release,
 };
 
-static int vfio_pci_igd_opregion_init(struct vfio_pci_device *vdev)
+static int vfio_pci_igd_opregion_init(struct vfio_pci_core_device *vdev)
 {
 	__le32 *dwordp = (__le32 *)(vdev->vconfig + OPREGION_PCI_ADDR);
 	u32 addr, size;
@@ -160,9 +161,9 @@ static int vfio_pci_igd_opregion_init(struct vfio_pci_device *vdev)
 	return ret;
 }
 
-static size_t vfio_pci_igd_cfg_rw(struct vfio_pci_device *vdev,
-				  char __user *buf, size_t count, loff_t *ppos,
-				  bool iswrite)
+static ssize_t vfio_pci_igd_cfg_rw(struct vfio_pci_core_device *vdev,
+				   char __user *buf, size_t count, loff_t *ppos,
+				   bool iswrite)
 {
 	unsigned int i = VFIO_PCI_OFFSET_TO_INDEX(*ppos) - VFIO_PCI_NUM_REGIONS;
 	struct pci_dev *pdev = vdev->region[i].data;
@@ -253,7 +254,7 @@ static size_t vfio_pci_igd_cfg_rw(struct vfio_pci_device *vdev,
 	return count;
 }
 
-static void vfio_pci_igd_cfg_release(struct vfio_pci_device *vdev,
+static void vfio_pci_igd_cfg_release(struct vfio_pci_core_device *vdev,
 				     struct vfio_pci_region *region)
 {
 	struct pci_dev *pdev = region->data;
@@ -266,7 +267,7 @@ static const struct vfio_pci_regops vfio_pci_igd_cfg_regops = {
 	.release	= vfio_pci_igd_cfg_release,
 };
 
-static int vfio_pci_igd_cfg_init(struct vfio_pci_device *vdev)
+static int vfio_pci_igd_cfg_init(struct vfio_pci_core_device *vdev)
 {
 	struct pci_dev *host_bridge, *lpc_bridge;
 	int ret;
@@ -314,7 +315,7 @@ static int vfio_pci_igd_cfg_init(struct vfio_pci_device *vdev)
 	return 0;
 }
 
-int vfio_pci_igd_init(struct vfio_pci_device *vdev)
+int vfio_pci_igd_init(struct vfio_pci_core_device *vdev)
 {
 	int ret;
 
diff --git a/drivers/vfio/pci/vfio_pci_intrs.c b/drivers/vfio/pci/vfio_pci_intrs.c
index 869dce5f134d..6069a11fb51a 100644
--- a/drivers/vfio/pci/vfio_pci_intrs.c
+++ b/drivers/vfio/pci/vfio_pci_intrs.c
@@ -20,20 +20,20 @@
 #include <linux/wait.h>
 #include <linux/slab.h>
 
-#include "vfio_pci_private.h"
+#include <linux/vfio_pci_core.h>
 
 /*
  * INTx
  */
 static void vfio_send_intx_eventfd(void *opaque, void *unused)
 {
-	struct vfio_pci_device *vdev = opaque;
+	struct vfio_pci_core_device *vdev = opaque;
 
 	if (likely(is_intx(vdev) && !vdev->virq_disabled))
 		eventfd_signal(vdev->ctx[0].trigger, 1);
 }
 
-void vfio_pci_intx_mask(struct vfio_pci_device *vdev)
+void vfio_pci_intx_mask(struct vfio_pci_core_device *vdev)
 {
 	struct pci_dev *pdev = vdev->pdev;
 	unsigned long flags;
@@ -73,7 +73,7 @@ void vfio_pci_intx_mask(struct vfio_pci_device *vdev)
  */
 static int vfio_pci_intx_unmask_handler(void *opaque, void *unused)
 {
-	struct vfio_pci_device *vdev = opaque;
+	struct vfio_pci_core_device *vdev = opaque;
 	struct pci_dev *pdev = vdev->pdev;
 	unsigned long flags;
 	int ret = 0;
@@ -107,7 +107,7 @@ static int vfio_pci_intx_unmask_handler(void *opaque, void *unused)
 	return ret;
 }
 
-void vfio_pci_intx_unmask(struct vfio_pci_device *vdev)
+void vfio_pci_intx_unmask(struct vfio_pci_core_device *vdev)
 {
 	if (vfio_pci_intx_unmask_handler(vdev, NULL) > 0)
 		vfio_send_intx_eventfd(vdev, NULL);
@@ -115,7 +115,7 @@ void vfio_pci_intx_unmask(struct vfio_pci_device *vdev)
 
 static irqreturn_t vfio_intx_handler(int irq, void *dev_id)
 {
-	struct vfio_pci_device *vdev = dev_id;
+	struct vfio_pci_core_device *vdev = dev_id;
 	unsigned long flags;
 	int ret = IRQ_NONE;
 
@@ -139,7 +139,7 @@ static irqreturn_t vfio_intx_handler(int irq, void *dev_id)
 	return ret;
 }
 
-static int vfio_intx_enable(struct vfio_pci_device *vdev)
+static int vfio_intx_enable(struct vfio_pci_core_device *vdev)
 {
 	if (!is_irq_none(vdev))
 		return -EINVAL;
@@ -168,7 +168,7 @@ static int vfio_intx_enable(struct vfio_pci_device *vdev)
 	return 0;
 }
 
-static int vfio_intx_set_signal(struct vfio_pci_device *vdev, int fd)
+static int vfio_intx_set_signal(struct vfio_pci_core_device *vdev, int fd)
 {
 	struct pci_dev *pdev = vdev->pdev;
 	unsigned long irqflags = IRQF_SHARED;
@@ -223,7 +223,7 @@ static int vfio_intx_set_signal(struct vfio_pci_device *vdev, int fd)
 	return 0;
 }
 
-static void vfio_intx_disable(struct vfio_pci_device *vdev)
+static void vfio_intx_disable(struct vfio_pci_core_device *vdev)
 {
 	vfio_virqfd_disable(&vdev->ctx[0].unmask);
 	vfio_virqfd_disable(&vdev->ctx[0].mask);
@@ -244,7 +244,7 @@ static irqreturn_t vfio_msihandler(int irq, void *arg)
 	return IRQ_HANDLED;
 }
 
-static int vfio_msi_enable(struct vfio_pci_device *vdev, int nvec, bool msix)
+static int vfio_msi_enable(struct vfio_pci_core_device *vdev, int nvec, bool msix)
 {
 	struct pci_dev *pdev = vdev->pdev;
 	unsigned int flag = msix ? PCI_IRQ_MSIX : PCI_IRQ_MSI;
@@ -285,7 +285,7 @@ static int vfio_msi_enable(struct vfio_pci_device *vdev, int nvec, bool msix)
 	return 0;
 }
 
-static int vfio_msi_set_vector_signal(struct vfio_pci_device *vdev,
+static int vfio_msi_set_vector_signal(struct vfio_pci_core_device *vdev,
 				      int vector, int fd, bool msix)
 {
 	struct pci_dev *pdev = vdev->pdev;
@@ -364,7 +364,7 @@ static int vfio_msi_set_vector_signal(struct vfio_pci_device *vdev,
 	return 0;
 }
 
-static int vfio_msi_set_block(struct vfio_pci_device *vdev, unsigned start,
+static int vfio_msi_set_block(struct vfio_pci_core_device *vdev, unsigned start,
 			      unsigned count, int32_t *fds, bool msix)
 {
 	int i, j, ret = 0;
@@ -385,7 +385,7 @@ static int vfio_msi_set_block(struct vfio_pci_device *vdev, unsigned start,
 	return ret;
 }
 
-static void vfio_msi_disable(struct vfio_pci_device *vdev, bool msix)
+static void vfio_msi_disable(struct vfio_pci_core_device *vdev, bool msix)
 {
 	struct pci_dev *pdev = vdev->pdev;
 	int i;
@@ -417,7 +417,7 @@ static void vfio_msi_disable(struct vfio_pci_device *vdev, bool msix)
 /*
  * IOCTL support
  */
-static int vfio_pci_set_intx_unmask(struct vfio_pci_device *vdev,
+static int vfio_pci_set_intx_unmask(struct vfio_pci_core_device *vdev,
 				    unsigned index, unsigned start,
 				    unsigned count, uint32_t flags, void *data)
 {
@@ -444,7 +444,7 @@ static int vfio_pci_set_intx_unmask(struct vfio_pci_device *vdev,
 	return 0;
 }
 
-static int vfio_pci_set_intx_mask(struct vfio_pci_device *vdev,
+static int vfio_pci_set_intx_mask(struct vfio_pci_core_device *vdev,
 				  unsigned index, unsigned start,
 				  unsigned count, uint32_t flags, void *data)
 {
@@ -464,7 +464,7 @@ static int vfio_pci_set_intx_mask(struct vfio_pci_device *vdev,
 	return 0;
 }
 
-static int vfio_pci_set_intx_trigger(struct vfio_pci_device *vdev,
+static int vfio_pci_set_intx_trigger(struct vfio_pci_core_device *vdev,
 				     unsigned index, unsigned start,
 				     unsigned count, uint32_t flags, void *data)
 {
@@ -507,7 +507,7 @@ static int vfio_pci_set_intx_trigger(struct vfio_pci_device *vdev,
 	return 0;
 }
 
-static int vfio_pci_set_msi_trigger(struct vfio_pci_device *vdev,
+static int vfio_pci_set_msi_trigger(struct vfio_pci_core_device *vdev,
 				    unsigned index, unsigned start,
 				    unsigned count, uint32_t flags, void *data)
 {
@@ -613,7 +613,7 @@ static int vfio_pci_set_ctx_trigger_single(struct eventfd_ctx **ctx,
 	return -EINVAL;
 }
 
-static int vfio_pci_set_err_trigger(struct vfio_pci_device *vdev,
+static int vfio_pci_set_err_trigger(struct vfio_pci_core_device *vdev,
 				    unsigned index, unsigned start,
 				    unsigned count, uint32_t flags, void *data)
 {
@@ -624,7 +624,7 @@ static int vfio_pci_set_err_trigger(struct vfio_pci_device *vdev,
 					       count, flags, data);
 }
 
-static int vfio_pci_set_req_trigger(struct vfio_pci_device *vdev,
+static int vfio_pci_set_req_trigger(struct vfio_pci_core_device *vdev,
 				    unsigned index, unsigned start,
 				    unsigned count, uint32_t flags, void *data)
 {
@@ -635,11 +635,11 @@ static int vfio_pci_set_req_trigger(struct vfio_pci_device *vdev,
 					       count, flags, data);
 }
 
-int vfio_pci_set_irqs_ioctl(struct vfio_pci_device *vdev, uint32_t flags,
+int vfio_pci_set_irqs_ioctl(struct vfio_pci_core_device *vdev, uint32_t flags,
 			    unsigned index, unsigned start, unsigned count,
 			    void *data)
 {
-	int (*func)(struct vfio_pci_device *vdev, unsigned index,
+	int (*func)(struct vfio_pci_core_device *vdev, unsigned index,
 		    unsigned start, unsigned count, uint32_t flags,
 		    void *data) = NULL;
 
diff --git a/drivers/vfio/pci/vfio_pci_rdwr.c b/drivers/vfio/pci/vfio_pci_rdwr.c
index a0b5fc8e46f4..57d3b2cbbd8e 100644
--- a/drivers/vfio/pci/vfio_pci_rdwr.c
+++ b/drivers/vfio/pci/vfio_pci_rdwr.c
@@ -17,7 +17,7 @@
 #include <linux/vfio.h>
 #include <linux/vgaarb.h>
 
-#include "vfio_pci_private.h"
+#include <linux/vfio_pci_core.h>
 
 #ifdef __LITTLE_ENDIAN
 #define vfio_ioread64	ioread64
@@ -38,7 +38,7 @@
 #define vfio_iowrite8	iowrite8
 
 #define VFIO_IOWRITE(size) \
-static int vfio_pci_iowrite##size(struct vfio_pci_device *vdev,		\
+static int vfio_pci_iowrite##size(struct vfio_pci_core_device *vdev,		\
 			bool test_mem, u##size val, void __iomem *io)	\
 {									\
 	if (test_mem) {							\
@@ -65,7 +65,7 @@ VFIO_IOWRITE(64)
 #endif
 
 #define VFIO_IOREAD(size) \
-static int vfio_pci_ioread##size(struct vfio_pci_device *vdev,		\
+static int vfio_pci_ioread##size(struct vfio_pci_core_device *vdev,		\
 			bool test_mem, u##size *val, void __iomem *io)	\
 {									\
 	if (test_mem) {							\
@@ -94,7 +94,7 @@ VFIO_IOREAD(32)
  * reads with -1.  This is intended for handling MSI-X vector tables and
  * leftover space for ROM BARs.
  */
-static ssize_t do_io_rw(struct vfio_pci_device *vdev, bool test_mem,
+static ssize_t do_io_rw(struct vfio_pci_core_device *vdev, bool test_mem,
 			void __iomem *io, char __user *buf,
 			loff_t off, size_t count, size_t x_start,
 			size_t x_end, bool iswrite)
@@ -200,7 +200,7 @@ static ssize_t do_io_rw(struct vfio_pci_device *vdev, bool test_mem,
 	return done;
 }
 
-static int vfio_pci_setup_barmap(struct vfio_pci_device *vdev, int bar)
+static int vfio_pci_setup_barmap(struct vfio_pci_core_device *vdev, int bar)
 {
 	struct pci_dev *pdev = vdev->pdev;
 	int ret;
@@ -224,7 +224,7 @@ static int vfio_pci_setup_barmap(struct vfio_pci_device *vdev, int bar)
 	return 0;
 }
 
-ssize_t vfio_pci_bar_rw(struct vfio_pci_device *vdev, char __user *buf,
+ssize_t vfio_pci_bar_rw(struct vfio_pci_core_device *vdev, char __user *buf,
 			size_t count, loff_t *ppos, bool iswrite)
 {
 	struct pci_dev *pdev = vdev->pdev;
@@ -288,7 +288,7 @@ out:
 	return done;
 }
 
-ssize_t vfio_pci_vga_rw(struct vfio_pci_device *vdev, char __user *buf,
+ssize_t vfio_pci_vga_rw(struct vfio_pci_core_device *vdev, char __user *buf,
 			       size_t count, loff_t *ppos, bool iswrite)
 {
 	int ret;
@@ -384,7 +384,7 @@ static void vfio_pci_ioeventfd_do_write(struct vfio_pci_ioeventfd *ioeventfd,
 static int vfio_pci_ioeventfd_handler(void *opaque, void *unused)
 {
 	struct vfio_pci_ioeventfd *ioeventfd = opaque;
-	struct vfio_pci_device *vdev = ioeventfd->vdev;
+	struct vfio_pci_core_device *vdev = ioeventfd->vdev;
 
 	if (ioeventfd->test_mem) {
 		if (!down_read_trylock(&vdev->memory_lock))
@@ -410,7 +410,7 @@ static void vfio_pci_ioeventfd_thread(void *opaque, void *unused)
 	vfio_pci_ioeventfd_do_write(ioeventfd, ioeventfd->test_mem);
 }
 
-long vfio_pci_ioeventfd(struct vfio_pci_device *vdev, loff_t offset,
+long vfio_pci_ioeventfd(struct vfio_pci_core_device *vdev, loff_t offset,
 			uint64_t data, int count, int fd)
 {
 	struct pci_dev *pdev = vdev->pdev;
diff --git a/drivers/vfio/pci/vfio_pci_zdev.c b/drivers/vfio/pci/vfio_pci_zdev.c
index 7b011b62c766..ea4c0d2b0663 100644
--- a/drivers/vfio/pci/vfio_pci_zdev.c
+++ b/drivers/vfio/pci/vfio_pci_zdev.c
@@ -1,15 +1,10 @@
-// SPDX-License-Identifier: GPL-2.0+
+// SPDX-License-Identifier: GPL-2.0-only
 /*
  * VFIO ZPCI devices support
  *
  * Copyright (C) IBM Corp. 2020.  All rights reserved.
  *	Author(s): Pierre Morel <pmorel@linux.ibm.com>
  *                 Matthew Rosato <mjrosato@linux.ibm.com>
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 as
- * published by the Free Software Foundation.
- *
  */
 #include <linux/io.h>
 #include <linux/pci.h>
@@ -19,7 +14,7 @@
 #include <asm/pci_clp.h>
 #include <asm/pci_io.h>
 
-#include "vfio_pci_private.h"
+#include <linux/vfio_pci_core.h>
 
 /*
  * Add the Base PCI Function information to the device info region.
@@ -114,7 +109,7 @@ static int zpci_pfip_cap(struct zpci_dev *zdev, struct vfio_info_cap *caps)
 /*
  * Add all supported capabilities to the VFIO_DEVICE_GET_INFO capability chain.
  */
-int vfio_pci_info_zdev_add_caps(struct vfio_pci_device *vdev,
+int vfio_pci_info_zdev_add_caps(struct vfio_pci_core_device *vdev,
 				struct vfio_info_cap *caps)
 {
 	struct zpci_dev *zdev = to_zpci(vdev->pdev);
diff --git a/drivers/vfio/platform/Kconfig b/drivers/vfio/platform/Kconfig
index ab341108a0be..331a5920f5ab 100644
--- a/drivers/vfio/platform/Kconfig
+++ b/drivers/vfio/platform/Kconfig
@@ -1,7 +1,7 @@
 # SPDX-License-Identifier: GPL-2.0-only
 config VFIO_PLATFORM
 	tristate "VFIO support for platform devices"
-	depends on VFIO && EVENTFD && (ARM || ARM64 || COMPILE_TEST)
+	depends on ARM || ARM64 || COMPILE_TEST
 	select VFIO_VIRQFD
 	help
 	  Support for platform devices with VFIO. This is required to make
@@ -10,9 +10,10 @@ config VFIO_PLATFORM
 
 	  If you don't know what to do here, say N.
 
+if VFIO_PLATFORM
 config VFIO_AMBA
 	tristate "VFIO support for AMBA devices"
-	depends on VFIO_PLATFORM && (ARM_AMBA || COMPILE_TEST)
+	depends on ARM_AMBA || COMPILE_TEST
 	help
 	  Support for ARM AMBA devices with VFIO. This is required to make
 	  use of ARM AMBA devices present on the system using the VFIO
@@ -21,3 +22,4 @@ config VFIO_AMBA
 	  If you don't know what to do here, say N.
 
 source "drivers/vfio/platform/reset/Kconfig"
+endif
diff --git a/drivers/vfio/platform/reset/Kconfig b/drivers/vfio/platform/reset/Kconfig
index 1edbe9ee7356..12f5f3d80387 100644
--- a/drivers/vfio/platform/reset/Kconfig
+++ b/drivers/vfio/platform/reset/Kconfig
@@ -1,7 +1,6 @@
 # SPDX-License-Identifier: GPL-2.0-only
 config VFIO_PLATFORM_CALXEDAXGMAC_RESET
 	tristate "VFIO support for calxeda xgmac reset"
-	depends on VFIO_PLATFORM
 	help
 	  Enables the VFIO platform driver to handle reset for Calxeda xgmac
 
@@ -9,7 +8,6 @@ config VFIO_PLATFORM_CALXEDAXGMAC_RESET
 
 config VFIO_PLATFORM_AMDXGBE_RESET
 	tristate "VFIO support for AMD XGBE reset"
-	depends on VFIO_PLATFORM
 	help
 	  Enables the VFIO platform driver to handle reset for AMD XGBE
 
@@ -17,7 +15,7 @@ config VFIO_PLATFORM_AMDXGBE_RESET
 
 config VFIO_PLATFORM_BCMFLEXRM_RESET
 	tristate "VFIO support for Broadcom FlexRM reset"
-	depends on VFIO_PLATFORM && (ARCH_BCM_IPROC || COMPILE_TEST)
+	depends on ARCH_BCM_IPROC || COMPILE_TEST
 	default ARCH_BCM_IPROC
 	help
 	  Enables the VFIO platform driver to handle reset for Broadcom FlexRM
diff --git a/drivers/vfio/platform/reset/vfio_platform_bcmflexrm.c b/drivers/vfio/platform/reset/vfio_platform_bcmflexrm.c
index 96064ef8f629..1131ebe4837d 100644
--- a/drivers/vfio/platform/reset/vfio_platform_bcmflexrm.c
+++ b/drivers/vfio/platform/reset/vfio_platform_bcmflexrm.c
@@ -1,14 +1,6 @@
+// SPDX-License-Identifier: GPL-2.0
 /*
  * Copyright (C) 2017 Broadcom
- *
- * This program is free software; you can redistribute it and/or
- * modify it under the terms of the GNU General Public License as
- * published by the Free Software Foundation version 2.
- *
- * This program is distributed "as is" WITHOUT ANY WARRANTY of any
- * kind, whether express or implied; without even the implied warranty
- * of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
- * GNU General Public License for more details.
  */
 
 /*
diff --git a/drivers/vfio/platform/vfio_platform_common.c b/drivers/vfio/platform/vfio_platform_common.c
index 703164df7637..6af7ce7d619c 100644
--- a/drivers/vfio/platform/vfio_platform_common.c
+++ b/drivers/vfio/platform/vfio_platform_common.c
@@ -218,65 +218,52 @@ static int vfio_platform_call_reset(struct vfio_platform_device *vdev,
 	return -EINVAL;
 }
 
-static void vfio_platform_release(struct vfio_device *core_vdev)
+static void vfio_platform_close_device(struct vfio_device *core_vdev)
 {
 	struct vfio_platform_device *vdev =
 		container_of(core_vdev, struct vfio_platform_device, vdev);
+	const char *extra_dbg = NULL;
+	int ret;
 
-	mutex_lock(&driver_lock);
-
-	if (!(--vdev->refcnt)) {
-		const char *extra_dbg = NULL;
-		int ret;
-
-		ret = vfio_platform_call_reset(vdev, &extra_dbg);
-		if (ret && vdev->reset_required) {
-			dev_warn(vdev->device, "reset driver is required and reset call failed in release (%d) %s\n",
-				 ret, extra_dbg ? extra_dbg : "");
-			WARN_ON(1);
-		}
-		pm_runtime_put(vdev->device);
-		vfio_platform_regions_cleanup(vdev);
-		vfio_platform_irq_cleanup(vdev);
+	ret = vfio_platform_call_reset(vdev, &extra_dbg);
+	if (WARN_ON(ret && vdev->reset_required)) {
+		dev_warn(
+			vdev->device,
+			"reset driver is required and reset call failed in release (%d) %s\n",
+			ret, extra_dbg ? extra_dbg : "");
 	}
-
-	mutex_unlock(&driver_lock);
+	pm_runtime_put(vdev->device);
+	vfio_platform_regions_cleanup(vdev);
+	vfio_platform_irq_cleanup(vdev);
 }
 
-static int vfio_platform_open(struct vfio_device *core_vdev)
+static int vfio_platform_open_device(struct vfio_device *core_vdev)
 {
 	struct vfio_platform_device *vdev =
 		container_of(core_vdev, struct vfio_platform_device, vdev);
+	const char *extra_dbg = NULL;
 	int ret;
 
-	mutex_lock(&driver_lock);
-
-	if (!vdev->refcnt) {
-		const char *extra_dbg = NULL;
-
-		ret = vfio_platform_regions_init(vdev);
-		if (ret)
-			goto err_reg;
+	ret = vfio_platform_regions_init(vdev);
+	if (ret)
+		return ret;
 
-		ret = vfio_platform_irq_init(vdev);
-		if (ret)
-			goto err_irq;
+	ret = vfio_platform_irq_init(vdev);
+	if (ret)
+		goto err_irq;
 
-		ret = pm_runtime_get_sync(vdev->device);
-		if (ret < 0)
-			goto err_rst;
+	ret = pm_runtime_get_sync(vdev->device);
+	if (ret < 0)
+		goto err_rst;
 
-		ret = vfio_platform_call_reset(vdev, &extra_dbg);
-		if (ret && vdev->reset_required) {
-			dev_warn(vdev->device, "reset driver is required and reset call failed in open (%d) %s\n",
-				 ret, extra_dbg ? extra_dbg : "");
-			goto err_rst;
-		}
+	ret = vfio_platform_call_reset(vdev, &extra_dbg);
+	if (ret && vdev->reset_required) {
+		dev_warn(
+			vdev->device,
+			"reset driver is required and reset call failed in open (%d) %s\n",
+			ret, extra_dbg ? extra_dbg : "");
+		goto err_rst;
 	}
-
-	vdev->refcnt++;
-
-	mutex_unlock(&driver_lock);
 	return 0;
 
 err_rst:
@@ -284,8 +271,6 @@ err_rst:
 	vfio_platform_irq_cleanup(vdev);
 err_irq:
 	vfio_platform_regions_cleanup(vdev);
-err_reg:
-	mutex_unlock(&driver_lock);
 	return ret;
 }
 
@@ -616,8 +601,8 @@ static int vfio_platform_mmap(struct vfio_device *core_vdev, struct vm_area_stru
 
 static const struct vfio_device_ops vfio_platform_ops = {
 	.name		= "vfio-platform",
-	.open		= vfio_platform_open,
-	.release	= vfio_platform_release,
+	.open_device	= vfio_platform_open_device,
+	.close_device	= vfio_platform_close_device,
 	.ioctl		= vfio_platform_ioctl,
 	.read		= vfio_platform_read,
 	.write		= vfio_platform_write,
@@ -667,7 +652,7 @@ int vfio_platform_probe_common(struct vfio_platform_device *vdev,
 		ret = vfio_platform_of_probe(vdev, dev);
 
 	if (ret)
-		return ret;
+		goto out_uninit;
 
 	vdev->device = dev;
 
@@ -675,7 +660,7 @@ int vfio_platform_probe_common(struct vfio_platform_device *vdev,
 	if (ret && vdev->reset_required) {
 		dev_err(dev, "No reset function found for device %s\n",
 			vdev->name);
-		return ret;
+		goto out_uninit;
 	}
 
 	group = vfio_iommu_group_get(dev);
@@ -698,6 +683,8 @@ put_iommu:
 	vfio_iommu_group_put(group, dev);
 put_reset:
 	vfio_platform_put_reset(vdev);
+out_uninit:
+	vfio_uninit_group_dev(&vdev->vdev);
 	return ret;
 }
 EXPORT_SYMBOL_GPL(vfio_platform_probe_common);
@@ -708,6 +695,7 @@ void vfio_platform_remove_common(struct vfio_platform_device *vdev)
 
 	pm_runtime_disable(vdev->device);
 	vfio_platform_put_reset(vdev);
+	vfio_uninit_group_dev(&vdev->vdev);
 	vfio_iommu_group_put(vdev->vdev.dev->iommu_group, vdev->vdev.dev);
 }
 EXPORT_SYMBOL_GPL(vfio_platform_remove_common);
diff --git a/drivers/vfio/platform/vfio_platform_private.h b/drivers/vfio/platform/vfio_platform_private.h
index dfb834c13659..520d2a8e8375 100644
--- a/drivers/vfio/platform/vfio_platform_private.h
+++ b/drivers/vfio/platform/vfio_platform_private.h
@@ -48,7 +48,6 @@ struct vfio_platform_device {
 	u32				num_regions;
 	struct vfio_platform_irq	*irqs;
 	u32				num_irqs;
-	int				refcnt;
 	struct mutex			igate;
 	const char			*compat;
 	const char			*acpihid;
diff --git a/drivers/vfio/vfio.c b/drivers/vfio/vfio.c
index 02cc51ce6891..3c034fe14ccb 100644
--- a/drivers/vfio/vfio.c
+++ b/drivers/vfio/vfio.c
@@ -96,6 +96,79 @@ module_param_named(enable_unsafe_noiommu_mode,
 MODULE_PARM_DESC(enable_unsafe_noiommu_mode, "Enable UNSAFE, no-IOMMU mode.  This mode provides no device isolation, no DMA translation, no host kernel protection, cannot be used for device assignment to virtual machines, requires RAWIO permissions, and will taint the kernel.  If you do not know what this is for, step away. (default: false)");
 #endif
 
+static DEFINE_XARRAY(vfio_device_set_xa);
+
+int vfio_assign_device_set(struct vfio_device *device, void *set_id)
+{
+	unsigned long idx = (unsigned long)set_id;
+	struct vfio_device_set *new_dev_set;
+	struct vfio_device_set *dev_set;
+
+	if (WARN_ON(!set_id))
+		return -EINVAL;
+
+	/*
+	 * Atomically acquire a singleton object in the xarray for this set_id
+	 */
+	xa_lock(&vfio_device_set_xa);
+	dev_set = xa_load(&vfio_device_set_xa, idx);
+	if (dev_set)
+		goto found_get_ref;
+	xa_unlock(&vfio_device_set_xa);
+
+	new_dev_set = kzalloc(sizeof(*new_dev_set), GFP_KERNEL);
+	if (!new_dev_set)
+		return -ENOMEM;
+	mutex_init(&new_dev_set->lock);
+	INIT_LIST_HEAD(&new_dev_set->device_list);
+	new_dev_set->set_id = set_id;
+
+	xa_lock(&vfio_device_set_xa);
+	dev_set = __xa_cmpxchg(&vfio_device_set_xa, idx, NULL, new_dev_set,
+			       GFP_KERNEL);
+	if (!dev_set) {
+		dev_set = new_dev_set;
+		goto found_get_ref;
+	}
+
+	kfree(new_dev_set);
+	if (xa_is_err(dev_set)) {
+		xa_unlock(&vfio_device_set_xa);
+		return xa_err(dev_set);
+	}
+
+found_get_ref:
+	dev_set->device_count++;
+	xa_unlock(&vfio_device_set_xa);
+	mutex_lock(&dev_set->lock);
+	device->dev_set = dev_set;
+	list_add_tail(&device->dev_set_list, &dev_set->device_list);
+	mutex_unlock(&dev_set->lock);
+	return 0;
+}
+EXPORT_SYMBOL_GPL(vfio_assign_device_set);
+
+static void vfio_release_device_set(struct vfio_device *device)
+{
+	struct vfio_device_set *dev_set = device->dev_set;
+
+	if (!dev_set)
+		return;
+
+	mutex_lock(&dev_set->lock);
+	list_del(&device->dev_set_list);
+	mutex_unlock(&dev_set->lock);
+
+	xa_lock(&vfio_device_set_xa);
+	if (!--dev_set->device_count) {
+		__xa_erase(&vfio_device_set_xa,
+			   (unsigned long)dev_set->set_id);
+		mutex_destroy(&dev_set->lock);
+		kfree(dev_set);
+	}
+	xa_unlock(&vfio_device_set_xa);
+}
+
 /*
  * vfio_iommu_group_{get,put} are only intended for VFIO bus driver probe
  * and remove functions, any use cases other than acquiring the first
@@ -749,12 +822,25 @@ void vfio_init_group_dev(struct vfio_device *device, struct device *dev,
 }
 EXPORT_SYMBOL_GPL(vfio_init_group_dev);
 
+void vfio_uninit_group_dev(struct vfio_device *device)
+{
+	vfio_release_device_set(device);
+}
+EXPORT_SYMBOL_GPL(vfio_uninit_group_dev);
+
 int vfio_register_group_dev(struct vfio_device *device)
 {
 	struct vfio_device *existing_device;
 	struct iommu_group *iommu_group;
 	struct vfio_group *group;
 
+	/*
+	 * If the driver doesn't specify a set then the device is added to a
+	 * singleton set just for itself.
+	 */
+	if (!device->dev_set)
+		vfio_assign_device_set(device, device);
+
 	iommu_group = iommu_group_get(device->dev);
 	if (!iommu_group)
 		return -EINVAL;
@@ -1356,7 +1442,8 @@ static int vfio_group_get_device_fd(struct vfio_group *group, char *buf)
 {
 	struct vfio_device *device;
 	struct file *filep;
-	int ret;
+	int fdno;
+	int ret = 0;
 
 	if (0 == atomic_read(&group->container_users) ||
 	    !group->container->iommu_driver || !vfio_group_viable(group))
@@ -1370,38 +1457,32 @@ static int vfio_group_get_device_fd(struct vfio_group *group, char *buf)
 		return PTR_ERR(device);
 
 	if (!try_module_get(device->dev->driver->owner)) {
-		vfio_device_put(device);
-		return -ENODEV;
+		ret = -ENODEV;
+		goto err_device_put;
 	}
 
-	ret = device->ops->open(device);
-	if (ret) {
-		module_put(device->dev->driver->owner);
-		vfio_device_put(device);
-		return ret;
+	mutex_lock(&device->dev_set->lock);
+	device->open_count++;
+	if (device->open_count == 1 && device->ops->open_device) {
+		ret = device->ops->open_device(device);
+		if (ret)
+			goto err_undo_count;
 	}
+	mutex_unlock(&device->dev_set->lock);
 
 	/*
 	 * We can't use anon_inode_getfd() because we need to modify
 	 * the f_mode flags directly to allow more than just ioctls
 	 */
-	ret = get_unused_fd_flags(O_CLOEXEC);
-	if (ret < 0) {
-		device->ops->release(device);
-		module_put(device->dev->driver->owner);
-		vfio_device_put(device);
-		return ret;
-	}
+	fdno = ret = get_unused_fd_flags(O_CLOEXEC);
+	if (ret < 0)
+		goto err_close_device;
 
 	filep = anon_inode_getfile("[vfio-device]", &vfio_device_fops,
 				   device, O_RDWR);
 	if (IS_ERR(filep)) {
-		put_unused_fd(ret);
 		ret = PTR_ERR(filep);
-		device->ops->release(device);
-		module_put(device->dev->driver->owner);
-		vfio_device_put(device);
-		return ret;
+		goto err_fd;
 	}
 
 	/*
@@ -1413,12 +1494,25 @@ static int vfio_group_get_device_fd(struct vfio_group *group, char *buf)
 
 	atomic_inc(&group->container_users);
 
-	fd_install(ret, filep);
+	fd_install(fdno, filep);
 
 	if (group->noiommu)
 		dev_warn(device->dev, "vfio-noiommu device opened by user "
 			 "(%s:%d)\n", current->comm, task_pid_nr(current));
-
+	return fdno;
+
+err_fd:
+	put_unused_fd(fdno);
+err_close_device:
+	mutex_lock(&device->dev_set->lock);
+	if (device->open_count == 1 && device->ops->close_device)
+		device->ops->close_device(device);
+err_undo_count:
+	device->open_count--;
+	mutex_unlock(&device->dev_set->lock);
+	module_put(device->dev->driver->owner);
+err_device_put:
+	vfio_device_put(device);
 	return ret;
 }
 
@@ -1556,7 +1650,10 @@ static int vfio_device_fops_release(struct inode *inode, struct file *filep)
 {
 	struct vfio_device *device = filep->private_data;
 
-	device->ops->release(device);
+	mutex_lock(&device->dev_set->lock);
+	if (!--device->open_count && device->ops->close_device)
+		device->ops->close_device(device);
+	mutex_unlock(&device->dev_set->lock);
 
 	module_put(device->dev->driver->owner);
 
@@ -2359,6 +2456,7 @@ static void __exit vfio_cleanup(void)
 	class_destroy(vfio.class);
 	vfio.class = NULL;
 	misc_deregister(&vfio_dev);
+	xa_destroy(&vfio_device_set_xa);
 }
 
 module_init(vfio_init);
diff --git a/drivers/vfio/vfio_iommu_type1.c b/drivers/vfio/vfio_iommu_type1.c
index 0b4f7c174c7a..0e9217687f5c 100644
--- a/drivers/vfio/vfio_iommu_type1.c
+++ b/drivers/vfio/vfio_iommu_type1.c
@@ -612,17 +612,17 @@ static int vfio_wait(struct vfio_iommu *iommu)
 static int vfio_find_dma_valid(struct vfio_iommu *iommu, dma_addr_t start,
 			       size_t size, struct vfio_dma **dma_p)
 {
-	int ret;
+	int ret = 0;
 
 	do {
 		*dma_p = vfio_find_dma(iommu, start, size);
 		if (!*dma_p)
-			ret = -EINVAL;
+			return -EINVAL;
 		else if (!(*dma_p)->vaddr_invalid)
-			ret = 0;
+			return ret;
 		else
 			ret = vfio_wait(iommu);
-	} while (ret > 0);
+	} while (ret == WAITED);
 
 	return ret;
 }
diff --git a/drivers/vhost/iotlb.c b/drivers/vhost/iotlb.c
index 0582079e4bcc..670d56c879e5 100644
--- a/drivers/vhost/iotlb.c
+++ b/drivers/vhost/iotlb.c
@@ -36,19 +36,21 @@ void vhost_iotlb_map_free(struct vhost_iotlb *iotlb,
 EXPORT_SYMBOL_GPL(vhost_iotlb_map_free);
 
 /**
- * vhost_iotlb_add_range - add a new range to vhost IOTLB
+ * vhost_iotlb_add_range_ctx - add a new range to vhost IOTLB
  * @iotlb: the IOTLB
  * @start: start of the IOVA range
  * @last: last of IOVA range
  * @addr: the address that is mapped to @start
  * @perm: access permission of this range
+ * @opaque: the opaque pointer for the new mapping
  *
  * Returns an error last is smaller than start or memory allocation
  * fails
  */
-int vhost_iotlb_add_range(struct vhost_iotlb *iotlb,
-			  u64 start, u64 last,
-			  u64 addr, unsigned int perm)
+int vhost_iotlb_add_range_ctx(struct vhost_iotlb *iotlb,
+			      u64 start, u64 last,
+			      u64 addr, unsigned int perm,
+			      void *opaque)
 {
 	struct vhost_iotlb_map *map;
 
@@ -71,6 +73,7 @@ int vhost_iotlb_add_range(struct vhost_iotlb *iotlb,
 	map->last = last;
 	map->addr = addr;
 	map->perm = perm;
+	map->opaque = opaque;
 
 	iotlb->nmaps++;
 	vhost_iotlb_itree_insert(map, &iotlb->root);
@@ -80,6 +83,15 @@ int vhost_iotlb_add_range(struct vhost_iotlb *iotlb,
 
 	return 0;
 }
+EXPORT_SYMBOL_GPL(vhost_iotlb_add_range_ctx);
+
+int vhost_iotlb_add_range(struct vhost_iotlb *iotlb,
+			  u64 start, u64 last,
+			  u64 addr, unsigned int perm)
+{
+	return vhost_iotlb_add_range_ctx(iotlb, start, last,
+					 addr, perm, NULL);
+}
 EXPORT_SYMBOL_GPL(vhost_iotlb_add_range);
 
 /**
diff --git a/drivers/vhost/scsi.c b/drivers/vhost/scsi.c
index 46f897e41217..532e204f2b1b 100644
--- a/drivers/vhost/scsi.c
+++ b/drivers/vhost/scsi.c
@@ -1,24 +1,12 @@
+// SPDX-License-Identifier: GPL-2.0+
 /*******************************************************************************
  * Vhost kernel TCM fabric driver for virtio SCSI initiators
  *
  * (C) Copyright 2010-2013 Datera, Inc.
  * (C) Copyright 2010-2012 IBM Corp.
  *
- * Licensed to the Linux Foundation under the General Public License (GPL) version 2.
- *
  * Authors: Nicholas A. Bellinger <nab@daterainc.com>
  *          Stefan Hajnoczi <stefanha@linux.vnet.ibm.com>
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License as published by
- * the Free Software Foundation; either version 2 of the License, or
- * (at your option) any later version.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
- * GNU General Public License for more details.
- *
  ****************************************************************************/
 
 #include <linux/module.h>
diff --git a/drivers/vhost/vdpa.c b/drivers/vhost/vdpa.c
index 9479f7f79217..f41d081777f5 100644
--- a/drivers/vhost/vdpa.c
+++ b/drivers/vhost/vdpa.c
@@ -116,12 +116,13 @@ static void vhost_vdpa_unsetup_vq_irq(struct vhost_vdpa *v, u16 qid)
 	irq_bypass_unregister_producer(&vq->call_ctx.producer);
 }
 
-static void vhost_vdpa_reset(struct vhost_vdpa *v)
+static int vhost_vdpa_reset(struct vhost_vdpa *v)
 {
 	struct vdpa_device *vdpa = v->vdpa;
 
-	vdpa_reset(vdpa);
 	v->in_batch = 0;
+
+	return vdpa_reset(vdpa);
 }
 
 static long vhost_vdpa_get_device_id(struct vhost_vdpa *v, u8 __user *argp)
@@ -157,7 +158,7 @@ static long vhost_vdpa_set_status(struct vhost_vdpa *v, u8 __user *statusp)
 	struct vdpa_device *vdpa = v->vdpa;
 	const struct vdpa_config_ops *ops = vdpa->config;
 	u8 status, status_old;
-	int nvqs = v->nvqs;
+	int ret, nvqs = v->nvqs;
 	u16 i;
 
 	if (copy_from_user(&status, statusp, sizeof(status)))
@@ -172,7 +173,12 @@ static long vhost_vdpa_set_status(struct vhost_vdpa *v, u8 __user *statusp)
 	if (status != 0 && (ops->get_status(vdpa) & ~status) != 0)
 		return -EINVAL;
 
-	ops->set_status(vdpa, status);
+	if (status == 0) {
+		ret = ops->reset(vdpa);
+		if (ret)
+			return ret;
+	} else
+		ops->set_status(vdpa, status);
 
 	if ((status & VIRTIO_CONFIG_S_DRIVER_OK) && !(status_old & VIRTIO_CONFIG_S_DRIVER_OK))
 		for (i = 0; i < nvqs; i++)
@@ -498,7 +504,7 @@ static long vhost_vdpa_unlocked_ioctl(struct file *filep,
 	return r;
 }
 
-static void vhost_vdpa_iotlb_unmap(struct vhost_vdpa *v, u64 start, u64 last)
+static void vhost_vdpa_pa_unmap(struct vhost_vdpa *v, u64 start, u64 last)
 {
 	struct vhost_dev *dev = &v->vdev;
 	struct vhost_iotlb *iotlb = dev->iotlb;
@@ -507,19 +513,44 @@ static void vhost_vdpa_iotlb_unmap(struct vhost_vdpa *v, u64 start, u64 last)
 	unsigned long pfn, pinned;
 
 	while ((map = vhost_iotlb_itree_first(iotlb, start, last)) != NULL) {
-		pinned = map->size >> PAGE_SHIFT;
-		for (pfn = map->addr >> PAGE_SHIFT;
+		pinned = PFN_DOWN(map->size);
+		for (pfn = PFN_DOWN(map->addr);
 		     pinned > 0; pfn++, pinned--) {
 			page = pfn_to_page(pfn);
 			if (map->perm & VHOST_ACCESS_WO)
 				set_page_dirty_lock(page);
 			unpin_user_page(page);
 		}
-		atomic64_sub(map->size >> PAGE_SHIFT, &dev->mm->pinned_vm);
+		atomic64_sub(PFN_DOWN(map->size), &dev->mm->pinned_vm);
 		vhost_iotlb_map_free(iotlb, map);
 	}
 }
 
+static void vhost_vdpa_va_unmap(struct vhost_vdpa *v, u64 start, u64 last)
+{
+	struct vhost_dev *dev = &v->vdev;
+	struct vhost_iotlb *iotlb = dev->iotlb;
+	struct vhost_iotlb_map *map;
+	struct vdpa_map_file *map_file;
+
+	while ((map = vhost_iotlb_itree_first(iotlb, start, last)) != NULL) {
+		map_file = (struct vdpa_map_file *)map->opaque;
+		fput(map_file->file);
+		kfree(map_file);
+		vhost_iotlb_map_free(iotlb, map);
+	}
+}
+
+static void vhost_vdpa_iotlb_unmap(struct vhost_vdpa *v, u64 start, u64 last)
+{
+	struct vdpa_device *vdpa = v->vdpa;
+
+	if (vdpa->use_va)
+		return vhost_vdpa_va_unmap(v, start, last);
+
+	return vhost_vdpa_pa_unmap(v, start, last);
+}
+
 static void vhost_vdpa_iotlb_free(struct vhost_vdpa *v)
 {
 	struct vhost_dev *dev = &v->vdev;
@@ -551,21 +582,21 @@ static int perm_to_iommu_flags(u32 perm)
 	return flags | IOMMU_CACHE;
 }
 
-static int vhost_vdpa_map(struct vhost_vdpa *v,
-			  u64 iova, u64 size, u64 pa, u32 perm)
+static int vhost_vdpa_map(struct vhost_vdpa *v, u64 iova,
+			  u64 size, u64 pa, u32 perm, void *opaque)
 {
 	struct vhost_dev *dev = &v->vdev;
 	struct vdpa_device *vdpa = v->vdpa;
 	const struct vdpa_config_ops *ops = vdpa->config;
 	int r = 0;
 
-	r = vhost_iotlb_add_range(dev->iotlb, iova, iova + size - 1,
-				  pa, perm);
+	r = vhost_iotlb_add_range_ctx(dev->iotlb, iova, iova + size - 1,
+				      pa, perm, opaque);
 	if (r)
 		return r;
 
 	if (ops->dma_map) {
-		r = ops->dma_map(vdpa, iova, size, pa, perm);
+		r = ops->dma_map(vdpa, iova, size, pa, perm, opaque);
 	} else if (ops->set_map) {
 		if (!v->in_batch)
 			r = ops->set_map(vdpa, dev->iotlb);
@@ -573,13 +604,15 @@ static int vhost_vdpa_map(struct vhost_vdpa *v,
 		r = iommu_map(v->domain, iova, pa, size,
 			      perm_to_iommu_flags(perm));
 	}
-
-	if (r)
+	if (r) {
 		vhost_iotlb_del_range(dev->iotlb, iova, iova + size - 1);
-	else
-		atomic64_add(size >> PAGE_SHIFT, &dev->mm->pinned_vm);
+		return r;
+	}
 
-	return r;
+	if (!vdpa->use_va)
+		atomic64_add(PFN_DOWN(size), &dev->mm->pinned_vm);
+
+	return 0;
 }
 
 static void vhost_vdpa_unmap(struct vhost_vdpa *v, u64 iova, u64 size)
@@ -600,38 +633,78 @@ static void vhost_vdpa_unmap(struct vhost_vdpa *v, u64 iova, u64 size)
 	}
 }
 
-static int vhost_vdpa_process_iotlb_update(struct vhost_vdpa *v,
-					   struct vhost_iotlb_msg *msg)
+static int vhost_vdpa_va_map(struct vhost_vdpa *v,
+			     u64 iova, u64 size, u64 uaddr, u32 perm)
+{
+	struct vhost_dev *dev = &v->vdev;
+	u64 offset, map_size, map_iova = iova;
+	struct vdpa_map_file *map_file;
+	struct vm_area_struct *vma;
+	int ret;
+
+	mmap_read_lock(dev->mm);
+
+	while (size) {
+		vma = find_vma(dev->mm, uaddr);
+		if (!vma) {
+			ret = -EINVAL;
+			break;
+		}
+		map_size = min(size, vma->vm_end - uaddr);
+		if (!(vma->vm_file && (vma->vm_flags & VM_SHARED) &&
+			!(vma->vm_flags & (VM_IO | VM_PFNMAP))))
+			goto next;
+
+		map_file = kzalloc(sizeof(*map_file), GFP_KERNEL);
+		if (!map_file) {
+			ret = -ENOMEM;
+			break;
+		}
+		offset = (vma->vm_pgoff << PAGE_SHIFT) + uaddr - vma->vm_start;
+		map_file->offset = offset;
+		map_file->file = get_file(vma->vm_file);
+		ret = vhost_vdpa_map(v, map_iova, map_size, uaddr,
+				     perm, map_file);
+		if (ret) {
+			fput(map_file->file);
+			kfree(map_file);
+			break;
+		}
+next:
+		size -= map_size;
+		uaddr += map_size;
+		map_iova += map_size;
+	}
+	if (ret)
+		vhost_vdpa_unmap(v, iova, map_iova - iova);
+
+	mmap_read_unlock(dev->mm);
+
+	return ret;
+}
+
+static int vhost_vdpa_pa_map(struct vhost_vdpa *v,
+			     u64 iova, u64 size, u64 uaddr, u32 perm)
 {
 	struct vhost_dev *dev = &v->vdev;
-	struct vhost_iotlb *iotlb = dev->iotlb;
 	struct page **page_list;
 	unsigned long list_size = PAGE_SIZE / sizeof(struct page *);
 	unsigned int gup_flags = FOLL_LONGTERM;
 	unsigned long npages, cur_base, map_pfn, last_pfn = 0;
 	unsigned long lock_limit, sz2pin, nchunks, i;
-	u64 iova = msg->iova;
+	u64 start = iova;
 	long pinned;
 	int ret = 0;
 
-	if (msg->iova < v->range.first || !msg->size ||
-	    msg->iova > U64_MAX - msg->size + 1 ||
-	    msg->iova + msg->size - 1 > v->range.last)
-		return -EINVAL;
-
-	if (vhost_iotlb_itree_first(iotlb, msg->iova,
-				    msg->iova + msg->size - 1))
-		return -EEXIST;
-
 	/* Limit the use of memory for bookkeeping */
 	page_list = (struct page **) __get_free_page(GFP_KERNEL);
 	if (!page_list)
 		return -ENOMEM;
 
-	if (msg->perm & VHOST_ACCESS_WO)
+	if (perm & VHOST_ACCESS_WO)
 		gup_flags |= FOLL_WRITE;
 
-	npages = PAGE_ALIGN(msg->size + (iova & ~PAGE_MASK)) >> PAGE_SHIFT;
+	npages = PFN_UP(size + (iova & ~PAGE_MASK));
 	if (!npages) {
 		ret = -EINVAL;
 		goto free;
@@ -639,13 +712,13 @@ static int vhost_vdpa_process_iotlb_update(struct vhost_vdpa *v,
 
 	mmap_read_lock(dev->mm);
 
-	lock_limit = rlimit(RLIMIT_MEMLOCK) >> PAGE_SHIFT;
+	lock_limit = PFN_DOWN(rlimit(RLIMIT_MEMLOCK));
 	if (npages + atomic64_read(&dev->mm->pinned_vm) > lock_limit) {
 		ret = -ENOMEM;
 		goto unlock;
 	}
 
-	cur_base = msg->uaddr & PAGE_MASK;
+	cur_base = uaddr & PAGE_MASK;
 	iova &= PAGE_MASK;
 	nchunks = 0;
 
@@ -673,10 +746,10 @@ static int vhost_vdpa_process_iotlb_update(struct vhost_vdpa *v,
 
 			if (last_pfn && (this_pfn != last_pfn + 1)) {
 				/* Pin a contiguous chunk of memory */
-				csize = (last_pfn - map_pfn + 1) << PAGE_SHIFT;
+				csize = PFN_PHYS(last_pfn - map_pfn + 1);
 				ret = vhost_vdpa_map(v, iova, csize,
-						     map_pfn << PAGE_SHIFT,
-						     msg->perm);
+						     PFN_PHYS(map_pfn),
+						     perm, NULL);
 				if (ret) {
 					/*
 					 * Unpin the pages that are left unmapped
@@ -699,13 +772,13 @@ static int vhost_vdpa_process_iotlb_update(struct vhost_vdpa *v,
 			last_pfn = this_pfn;
 		}
 
-		cur_base += pinned << PAGE_SHIFT;
+		cur_base += PFN_PHYS(pinned);
 		npages -= pinned;
 	}
 
 	/* Pin the rest chunk */
-	ret = vhost_vdpa_map(v, iova, (last_pfn - map_pfn + 1) << PAGE_SHIFT,
-			     map_pfn << PAGE_SHIFT, msg->perm);
+	ret = vhost_vdpa_map(v, iova, PFN_PHYS(last_pfn - map_pfn + 1),
+			     PFN_PHYS(map_pfn), perm, NULL);
 out:
 	if (ret) {
 		if (nchunks) {
@@ -724,13 +797,38 @@ out:
 			for (pfn = map_pfn; pfn <= last_pfn; pfn++)
 				unpin_user_page(pfn_to_page(pfn));
 		}
-		vhost_vdpa_unmap(v, msg->iova, msg->size);
+		vhost_vdpa_unmap(v, start, size);
 	}
 unlock:
 	mmap_read_unlock(dev->mm);
 free:
 	free_page((unsigned long)page_list);
 	return ret;
+
+}
+
+static int vhost_vdpa_process_iotlb_update(struct vhost_vdpa *v,
+					   struct vhost_iotlb_msg *msg)
+{
+	struct vhost_dev *dev = &v->vdev;
+	struct vdpa_device *vdpa = v->vdpa;
+	struct vhost_iotlb *iotlb = dev->iotlb;
+
+	if (msg->iova < v->range.first || !msg->size ||
+	    msg->iova > U64_MAX - msg->size + 1 ||
+	    msg->iova + msg->size - 1 > v->range.last)
+		return -EINVAL;
+
+	if (vhost_iotlb_itree_first(iotlb, msg->iova,
+				    msg->iova + msg->size - 1))
+		return -EEXIST;
+
+	if (vdpa->use_va)
+		return vhost_vdpa_va_map(v, msg->iova, msg->size,
+					 msg->uaddr, msg->perm);
+
+	return vhost_vdpa_pa_map(v, msg->iova, msg->size, msg->uaddr,
+				 msg->perm);
 }
 
 static int vhost_vdpa_process_iotlb_msg(struct vhost_dev *dev,
@@ -860,7 +958,9 @@ static int vhost_vdpa_open(struct inode *inode, struct file *filep)
 		return -EBUSY;
 
 	nvqs = v->nvqs;
-	vhost_vdpa_reset(v);
+	r = vhost_vdpa_reset(v);
+	if (r)
+		goto err;
 
 	vqs = kmalloc_array(nvqs, sizeof(*vqs), GFP_KERNEL);
 	if (!vqs) {
@@ -945,7 +1045,7 @@ static vm_fault_t vhost_vdpa_fault(struct vm_fault *vmf)
 
 	vma->vm_page_prot = pgprot_noncached(vma->vm_page_prot);
 	if (remap_pfn_range(vma, vmf->address & PAGE_MASK,
-			    notify.addr >> PAGE_SHIFT, PAGE_SIZE,
+			    PFN_DOWN(notify.addr), PAGE_SIZE,
 			    vma->vm_page_prot))
 		return VM_FAULT_SIGBUS;
 
diff --git a/drivers/vhost/vsock.c b/drivers/vhost/vsock.c
index f249622ef11b..938aefbc75ec 100644
--- a/drivers/vhost/vsock.c
+++ b/drivers/vhost/vsock.c
@@ -114,7 +114,7 @@ vhost_transport_do_send_pkt(struct vhost_vsock *vsock,
 		size_t nbytes;
 		size_t iov_len, payload_len;
 		int head;
-		bool restore_flag = false;
+		u32 flags_to_restore = 0;
 
 		spin_lock_bh(&vsock->send_pkt_list_lock);
 		if (list_empty(&vsock->send_pkt_list)) {
@@ -178,16 +178,21 @@ vhost_transport_do_send_pkt(struct vhost_vsock *vsock,
 			 * small rx buffers, headers of packets in rx queue are
 			 * created dynamically and are initialized with header
 			 * of current packet(except length). But in case of
-			 * SOCK_SEQPACKET, we also must clear record delimeter
-			 * bit(VIRTIO_VSOCK_SEQ_EOR). Otherwise, instead of one
-			 * packet with delimeter(which marks end of record),
-			 * there will be sequence of packets with delimeter
-			 * bit set. After initialized header will be copied to
-			 * rx buffer, this bit will be restored.
+			 * SOCK_SEQPACKET, we also must clear message delimeter
+			 * bit (VIRTIO_VSOCK_SEQ_EOM) and MSG_EOR bit
+			 * (VIRTIO_VSOCK_SEQ_EOR) if set. Otherwise,
+			 * there will be sequence of packets with these
+			 * bits set. After initialized header will be copied to
+			 * rx buffer, these required bits will be restored.
 			 */
-			if (le32_to_cpu(pkt->hdr.flags) & VIRTIO_VSOCK_SEQ_EOR) {
-				pkt->hdr.flags &= ~cpu_to_le32(VIRTIO_VSOCK_SEQ_EOR);
-				restore_flag = true;
+			if (le32_to_cpu(pkt->hdr.flags) & VIRTIO_VSOCK_SEQ_EOM) {
+				pkt->hdr.flags &= ~cpu_to_le32(VIRTIO_VSOCK_SEQ_EOM);
+				flags_to_restore |= VIRTIO_VSOCK_SEQ_EOM;
+
+				if (le32_to_cpu(pkt->hdr.flags) & VIRTIO_VSOCK_SEQ_EOR) {
+					pkt->hdr.flags &= ~cpu_to_le32(VIRTIO_VSOCK_SEQ_EOR);
+					flags_to_restore |= VIRTIO_VSOCK_SEQ_EOR;
+				}
 			}
 		}
 
@@ -224,8 +229,7 @@ vhost_transport_do_send_pkt(struct vhost_vsock *vsock,
 		 * to send it with the next available buffer.
 		 */
 		if (pkt->off < pkt->len) {
-			if (restore_flag)
-				pkt->hdr.flags |= cpu_to_le32(VIRTIO_VSOCK_SEQ_EOR);
+			pkt->hdr.flags |= cpu_to_le32(flags_to_restore);
 
 			/* We are queueing the same virtio_vsock_pkt to handle
 			 * the remaining bytes, and we want to deliver it
diff --git a/drivers/video/backlight/ktd253-backlight.c b/drivers/video/backlight/ktd253-backlight.c
index a7df5bcca9da..37aa5a669530 100644
--- a/drivers/video/backlight/ktd253-backlight.c
+++ b/drivers/video/backlight/ktd253-backlight.c
@@ -25,6 +25,7 @@
 
 #define KTD253_T_LOW_NS (200 + 10) /* Additional 10ns as safety factor */
 #define KTD253_T_HIGH_NS (200 + 10) /* Additional 10ns as safety factor */
+#define KTD253_T_OFF_CRIT_NS 100000 /* 100 us, now it doesn't look good */
 #define KTD253_T_OFF_MS 3
 
 struct ktd253_backlight {
@@ -34,13 +35,50 @@ struct ktd253_backlight {
 	u16 ratio;
 };
 
+static void ktd253_backlight_set_max_ratio(struct ktd253_backlight *ktd253)
+{
+	gpiod_set_value_cansleep(ktd253->gpiod, 1);
+	ndelay(KTD253_T_HIGH_NS);
+	/* We always fall back to this when we power on */
+}
+
+static int ktd253_backlight_stepdown(struct ktd253_backlight *ktd253)
+{
+	/*
+	 * These GPIO operations absolutely can NOT sleep so no _cansleep
+	 * suffixes, and no using GPIO expanders on slow buses for this!
+	 *
+	 * The maximum number of cycles of the loop is 32  so the time taken
+	 * should nominally be:
+	 * (T_LOW_NS + T_HIGH_NS + loop_time) * 32
+	 *
+	 * Architectures do not always support ndelay() and we will get a few us
+	 * instead. If we get to a critical time limit an interrupt has likely
+	 * occured in the low part of the loop and we need to restart from the
+	 * top so we have the backlight in a known state.
+	 */
+	u64 ns;
+
+	ns = ktime_get_ns();
+	gpiod_set_value(ktd253->gpiod, 0);
+	ndelay(KTD253_T_LOW_NS);
+	gpiod_set_value(ktd253->gpiod, 1);
+	ns = ktime_get_ns() - ns;
+	if (ns >= KTD253_T_OFF_CRIT_NS) {
+		dev_err(ktd253->dev, "PCM on backlight took too long (%llu ns)\n", ns);
+		return -EAGAIN;
+	}
+	ndelay(KTD253_T_HIGH_NS);
+	return 0;
+}
+
 static int ktd253_backlight_update_status(struct backlight_device *bl)
 {
 	struct ktd253_backlight *ktd253 = bl_get_data(bl);
 	int brightness = backlight_get_brightness(bl);
 	u16 target_ratio;
 	u16 current_ratio = ktd253->ratio;
-	unsigned long flags;
+	int ret;
 
 	dev_dbg(ktd253->dev, "new brightness/ratio: %d/32\n", brightness);
 
@@ -62,37 +100,34 @@ static int ktd253_backlight_update_status(struct backlight_device *bl)
 	}
 
 	if (current_ratio == 0) {
-		gpiod_set_value_cansleep(ktd253->gpiod, 1);
-		ndelay(KTD253_T_HIGH_NS);
-		/* We always fall back to this when we power on */
+		ktd253_backlight_set_max_ratio(ktd253);
 		current_ratio = KTD253_MAX_RATIO;
 	}
 
-	/*
-	 * WARNING:
-	 * The loop to set the correct current level is performed
-	 * with interrupts disabled as it is timing critical.
-	 * The maximum number of cycles of the loop is 32
-	 * so the time taken will be (T_LOW_NS + T_HIGH_NS + loop_time) * 32,
-	 */
-	local_irq_save(flags);
 	while (current_ratio != target_ratio) {
 		/*
 		 * These GPIO operations absolutely can NOT sleep so no
 		 * _cansleep suffixes, and no using GPIO expanders on
 		 * slow buses for this!
 		 */
-		gpiod_set_value(ktd253->gpiod, 0);
-		ndelay(KTD253_T_LOW_NS);
-		gpiod_set_value(ktd253->gpiod, 1);
-		ndelay(KTD253_T_HIGH_NS);
-		/* After 1/32 we loop back to 32/32 */
-		if (current_ratio == KTD253_MIN_RATIO)
+		ret = ktd253_backlight_stepdown(ktd253);
+		if (ret == -EAGAIN) {
+			/*
+			 * Something disturbed the backlight setting code when
+			 * running so we need to bring the PWM back to a known
+			 * state. This shouldn't happen too much.
+			 */
+			gpiod_set_value_cansleep(ktd253->gpiod, 0);
+			msleep(KTD253_T_OFF_MS);
+			ktd253_backlight_set_max_ratio(ktd253);
+			current_ratio = KTD253_MAX_RATIO;
+		} else if (current_ratio == KTD253_MIN_RATIO) {
+			/* After 1/32 we loop back to 32/32 */
 			current_ratio = KTD253_MAX_RATIO;
-		else
+		} else {
 			current_ratio--;
+		}
 	}
-	local_irq_restore(flags);
 	ktd253->ratio = current_ratio;
 
 	dev_dbg(ktd253->dev, "new ratio set to %d/32\n", target_ratio);
diff --git a/drivers/video/backlight/pwm_bl.c b/drivers/video/backlight/pwm_bl.c
index e48fded3e414..8d8959a70e44 100644
--- a/drivers/video/backlight/pwm_bl.c
+++ b/drivers/video/backlight/pwm_bl.c
@@ -409,6 +409,33 @@ static bool pwm_backlight_is_linear(struct platform_pwm_backlight_data *data)
 static int pwm_backlight_initial_power_state(const struct pwm_bl_data *pb)
 {
 	struct device_node *node = pb->dev->of_node;
+	bool active = true;
+
+	/*
+	 * If the enable GPIO is present, observable (either as input
+	 * or output) and off then the backlight is not currently active.
+	 * */
+	if (pb->enable_gpio && gpiod_get_value_cansleep(pb->enable_gpio) == 0)
+		active = false;
+
+	if (!regulator_is_enabled(pb->power_supply))
+		active = false;
+
+	if (!pwm_is_enabled(pb->pwm))
+		active = false;
+
+	/*
+	 * Synchronize the enable_gpio with the observed state of the
+	 * hardware.
+	 */
+	if (pb->enable_gpio)
+		gpiod_direction_output(pb->enable_gpio, active);
+
+	/*
+	 * Do not change pb->enabled here! pb->enabled essentially
+	 * tells us if we own one of the regulator's use counts and
+	 * right now we do not.
+	 */
 
 	/* Not booted with device tree or no phandle link to the node */
 	if (!node || !node->phandle)
@@ -420,20 +447,7 @@ static int pwm_backlight_initial_power_state(const struct pwm_bl_data *pb)
 	 * assume that another driver will enable the backlight at the
 	 * appropriate time. Therefore, if it is disabled, keep it so.
 	 */
-
-	/* if the enable GPIO is disabled, do not enable the backlight */
-	if (pb->enable_gpio && gpiod_get_value_cansleep(pb->enable_gpio) == 0)
-		return FB_BLANK_POWERDOWN;
-
-	/* The regulator is disabled, do not enable the backlight */
-	if (!regulator_is_enabled(pb->power_supply))
-		return FB_BLANK_POWERDOWN;
-
-	/* The PWM is disabled, keep it like this */
-	if (!pwm_is_enabled(pb->pwm))
-		return FB_BLANK_POWERDOWN;
-
-	return FB_BLANK_UNBLANK;
+	return active ? FB_BLANK_UNBLANK: FB_BLANK_POWERDOWN;
 }
 
 static int pwm_backlight_probe(struct platform_device *pdev)
@@ -486,18 +500,6 @@ static int pwm_backlight_probe(struct platform_device *pdev)
 		goto err_alloc;
 	}
 
-	/*
-	 * If the GPIO is not known to be already configured as output, that
-	 * is, if gpiod_get_direction returns either 1 or -EINVAL, change the
-	 * direction to output and set the GPIO as active.
-	 * Do not force the GPIO to active when it was already output as it
-	 * could cause backlight flickering or we would enable the backlight too
-	 * early. Leave the decision of the initial backlight state for later.
-	 */
-	if (pb->enable_gpio &&
-	    gpiod_get_direction(pb->enable_gpio) != 0)
-		gpiod_direction_output(pb->enable_gpio, 1);
-
 	pb->power_supply = devm_regulator_get(&pdev->dev, "power");
 	if (IS_ERR(pb->power_supply)) {
 		ret = PTR_ERR(pb->power_supply);
diff --git a/drivers/video/fbdev/core/fbmem.c b/drivers/video/fbdev/core/fbmem.c
index 71fb710f1ce3..7420d2c16e47 100644
--- a/drivers/video/fbdev/core/fbmem.c
+++ b/drivers/video/fbdev/core/fbmem.c
@@ -962,6 +962,7 @@ fb_set_var(struct fb_info *info, struct fb_var_screeninfo *var)
 	struct fb_var_screeninfo old_var;
 	struct fb_videomode mode;
 	struct fb_event event;
+	u32 unused;
 
 	if (var->activate & FB_ACTIVATE_INV_MODE) {
 		struct fb_videomode mode1, mode2;
@@ -1008,6 +1009,11 @@ fb_set_var(struct fb_info *info, struct fb_var_screeninfo *var)
 	if (var->xres < 8 || var->yres < 8)
 		return -EINVAL;
 
+	/* Too huge resolution causes multiplication overflow. */
+	if (check_mul_overflow(var->xres, var->yres, &unused) ||
+	    check_mul_overflow(var->xres_virtual, var->yres_virtual, &unused))
+		return -EINVAL;
+
 	ret = info->fbops->fb_check_var(var, info);
 
 	if (ret)
diff --git a/drivers/virtio/virtio.c b/drivers/virtio/virtio.c
index 1ea0c1f6a1fd..588e02fb91d3 100644
--- a/drivers/virtio/virtio.c
+++ b/drivers/virtio/virtio.c
@@ -4,6 +4,7 @@
 #include <linux/virtio_config.h>
 #include <linux/module.h>
 #include <linux/idr.h>
+#include <linux/of.h>
 #include <uapi/linux/virtio_ids.h>
 
 /* Unique numbering for virtio devices. */
@@ -292,6 +293,8 @@ static void virtio_dev_remove(struct device *_d)
 
 	/* Acknowledge the device's existence again. */
 	virtio_add_status(dev, VIRTIO_CONFIG_S_ACKNOWLEDGE);
+
+	of_node_put(dev->dev.of_node);
 }
 
 static struct bus_type virtio_bus = {
@@ -318,6 +321,43 @@ void unregister_virtio_driver(struct virtio_driver *driver)
 }
 EXPORT_SYMBOL_GPL(unregister_virtio_driver);
 
+static int virtio_device_of_init(struct virtio_device *dev)
+{
+	struct device_node *np, *pnode = dev_of_node(dev->dev.parent);
+	char compat[] = "virtio,deviceXXXXXXXX";
+	int ret, count;
+
+	if (!pnode)
+		return 0;
+
+	count = of_get_available_child_count(pnode);
+	if (!count)
+		return 0;
+
+	/* There can be only 1 child node */
+	if (WARN_ON(count > 1))
+		return -EINVAL;
+
+	np = of_get_next_available_child(pnode, NULL);
+	if (WARN_ON(!np))
+		return -ENODEV;
+
+	ret = snprintf(compat, sizeof(compat), "virtio,device%x", dev->id.device);
+	BUG_ON(ret >= sizeof(compat));
+
+	if (!of_device_is_compatible(np, compat)) {
+		ret = -EINVAL;
+		goto out;
+	}
+
+	dev->dev.of_node = np;
+	return 0;
+
+out:
+	of_node_put(np);
+	return ret;
+}
+
 /**
  * register_virtio_device - register virtio device
  * @dev        : virtio device to be registered
@@ -342,6 +382,10 @@ int register_virtio_device(struct virtio_device *dev)
 	dev->index = err;
 	dev_set_name(&dev->dev, "virtio%u", dev->index);
 
+	err = virtio_device_of_init(dev);
+	if (err)
+		goto out_ida_remove;
+
 	spin_lock_init(&dev->config_lock);
 	dev->config_enabled = false;
 	dev->config_change_pending = false;
@@ -362,10 +406,16 @@ int register_virtio_device(struct virtio_device *dev)
 	 */
 	err = device_add(&dev->dev);
 	if (err)
-		ida_simple_remove(&virtio_index_ida, dev->index);
+		goto out_of_node_put;
+
+	return 0;
+
+out_of_node_put:
+	of_node_put(dev->dev.of_node);
+out_ida_remove:
+	ida_simple_remove(&virtio_index_ida, dev->index);
 out:
-	if (err)
-		virtio_add_status(dev, VIRTIO_CONFIG_S_FAILED);
+	virtio_add_status(dev, VIRTIO_CONFIG_S_FAILED);
 	return err;
 }
 EXPORT_SYMBOL_GPL(register_virtio_device);
diff --git a/drivers/virtio/virtio_balloon.c b/drivers/virtio/virtio_balloon.c
index 47dce91f788c..c22ff0117b46 100644
--- a/drivers/virtio/virtio_balloon.c
+++ b/drivers/virtio/virtio_balloon.c
@@ -531,8 +531,8 @@ static int init_vqs(struct virtio_balloon *vb)
 		callbacks[VIRTIO_BALLOON_VQ_REPORTING] = balloon_ack;
 	}
 
-	err = vb->vdev->config->find_vqs(vb->vdev, VIRTIO_BALLOON_VQ_MAX,
-					 vqs, callbacks, names, NULL, NULL);
+	err = virtio_find_vqs(vb->vdev, VIRTIO_BALLOON_VQ_MAX, vqs,
+			      callbacks, names, NULL);
 	if (err)
 		return err;
 
diff --git a/drivers/virtio/virtio_mem.c b/drivers/virtio/virtio_mem.c
index b91bc810a87e..bef8ad6bf466 100644
--- a/drivers/virtio/virtio_mem.c
+++ b/drivers/virtio/virtio_mem.c
@@ -143,6 +143,8 @@ struct virtio_mem {
 	 * add_memory_driver_managed().
 	 */
 	const char *resource_name;
+	/* Memory group identification. */
+	int mgid;
 
 	/*
 	 * We don't want to add too much memory if it's not getting onlined,
@@ -626,8 +628,8 @@ static int virtio_mem_add_memory(struct virtio_mem *vm, uint64_t addr,
 		addr + size - 1);
 	/* Memory might get onlined immediately. */
 	atomic64_add(size, &vm->offline_size);
-	rc = add_memory_driver_managed(vm->nid, addr, size, vm->resource_name,
-				       MHP_MERGE_RESOURCE);
+	rc = add_memory_driver_managed(vm->mgid, addr, size, vm->resource_name,
+				       MHP_MERGE_RESOURCE | MHP_NID_IS_MGID);
 	if (rc) {
 		atomic64_sub(size, &vm->offline_size);
 		dev_warn(&vm->vdev->dev, "adding memory failed: %d\n", rc);
@@ -677,7 +679,7 @@ static int virtio_mem_remove_memory(struct virtio_mem *vm, uint64_t addr,
 
 	dev_dbg(&vm->vdev->dev, "removing memory: 0x%llx - 0x%llx\n", addr,
 		addr + size - 1);
-	rc = remove_memory(vm->nid, addr, size);
+	rc = remove_memory(addr, size);
 	if (!rc) {
 		atomic64_sub(size, &vm->offline_size);
 		/*
@@ -720,7 +722,7 @@ static int virtio_mem_offline_and_remove_memory(struct virtio_mem *vm,
 		"offlining and removing memory: 0x%llx - 0x%llx\n", addr,
 		addr + size - 1);
 
-	rc = offline_and_remove_memory(vm->nid, addr, size);
+	rc = offline_and_remove_memory(addr, size);
 	if (!rc) {
 		atomic64_sub(size, &vm->offline_size);
 		/*
@@ -2569,6 +2571,7 @@ static bool virtio_mem_has_memory_added(struct virtio_mem *vm)
 static int virtio_mem_probe(struct virtio_device *vdev)
 {
 	struct virtio_mem *vm;
+	uint64_t unit_pages;
 	int rc;
 
 	BUILD_BUG_ON(sizeof(struct virtio_mem_req) != 24);
@@ -2603,6 +2606,16 @@ static int virtio_mem_probe(struct virtio_device *vdev)
 	if (rc)
 		goto out_del_vq;
 
+	/* use a single dynamic memory group to cover the whole memory device */
+	if (vm->in_sbm)
+		unit_pages = PHYS_PFN(memory_block_size_bytes());
+	else
+		unit_pages = PHYS_PFN(vm->bbm.bb_size);
+	rc = memory_group_register_dynamic(vm->nid, unit_pages);
+	if (rc < 0)
+		goto out_del_resource;
+	vm->mgid = rc;
+
 	/*
 	 * If we still have memory plugged, we have to unplug all memory first.
 	 * Registering our parent resource makes sure that this memory isn't
@@ -2617,7 +2630,7 @@ static int virtio_mem_probe(struct virtio_device *vdev)
 	vm->memory_notifier.notifier_call = virtio_mem_memory_notifier_cb;
 	rc = register_memory_notifier(&vm->memory_notifier);
 	if (rc)
-		goto out_del_resource;
+		goto out_unreg_group;
 	rc = register_virtio_mem_device(vm);
 	if (rc)
 		goto out_unreg_mem;
@@ -2631,6 +2644,8 @@ static int virtio_mem_probe(struct virtio_device *vdev)
 	return 0;
 out_unreg_mem:
 	unregister_memory_notifier(&vm->memory_notifier);
+out_unreg_group:
+	memory_group_unregister(vm->mgid);
 out_del_resource:
 	virtio_mem_delete_resource(vm);
 out_del_vq:
@@ -2695,6 +2710,7 @@ static void virtio_mem_remove(struct virtio_device *vdev)
 	} else {
 		virtio_mem_delete_resource(vm);
 		kfree_const(vm->resource_name);
+		memory_group_unregister(vm->mgid);
 	}
 
 	/* remove all tracking data - no locking needed */
diff --git a/drivers/watchdog/Kconfig b/drivers/watchdog/Kconfig
index 0bc7046ab942..b81fe4f7d434 100644
--- a/drivers/watchdog/Kconfig
+++ b/drivers/watchdog/Kconfig
@@ -168,18 +168,6 @@ config SOFT_WATCHDOG_PRETIMEOUT
 	  watchdog. Be aware that governors might affect the watchdog because it
 	  is purely software, e.g. the panic governor will stall it!
 
-config BD70528_WATCHDOG
-	tristate "ROHM BD70528 PMIC Watchdog"
-	depends on MFD_ROHM_BD70528
-	select WATCHDOG_CORE
-	help
-	  Support for the watchdog in the ROHM BD70528 PMIC. Watchdog trigger
-	  cause system reset.
-
-	  Say Y here to include support for the ROHM BD70528 watchdog.
-	  Alternatively say M to compile the driver as a module,
-	  which will be called bd70528_wdt.
-
 config BD957XMUF_WATCHDOG
 	tristate "ROHM BD9576MUF and BD9573MUF PMIC Watchdog"
 	depends on MFD_ROHM_BD957XMUF
diff --git a/drivers/watchdog/Makefile b/drivers/watchdog/Makefile
index abaf2ebd814e..1bd2d6f37c53 100644
--- a/drivers/watchdog/Makefile
+++ b/drivers/watchdog/Makefile
@@ -204,7 +204,6 @@ obj-$(CONFIG_WATCHDOG_SUN4V)		+= sun4v_wdt.o
 obj-$(CONFIG_XEN_WDT) += xen_wdt.o
 
 # Architecture Independent
-obj-$(CONFIG_BD70528_WATCHDOG) += bd70528_wdt.o
 obj-$(CONFIG_BD957XMUF_WATCHDOG) += bd9576_wdt.o
 obj-$(CONFIG_DA9052_WATCHDOG) += da9052_wdt.o
 obj-$(CONFIG_DA9055_WATCHDOG) += da9055_wdt.o
diff --git a/drivers/watchdog/bcm2835_wdt.c b/drivers/watchdog/bcm2835_wdt.c
index dec6ca019bea..94907176a0e4 100644
--- a/drivers/watchdog/bcm2835_wdt.c
+++ b/drivers/watchdog/bcm2835_wdt.c
@@ -205,9 +205,13 @@ static int bcm2835_wdt_probe(struct platform_device *pdev)
 	if (err)
 		return err;
 
-	if (pm_power_off == NULL) {
-		pm_power_off = bcm2835_power_off;
-		bcm2835_power_off_wdt = wdt;
+	if (of_device_is_system_power_controller(pdev->dev.parent->of_node)) {
+		if (!pm_power_off) {
+			pm_power_off = bcm2835_power_off;
+			bcm2835_power_off_wdt = wdt;
+		} else {
+			dev_info(dev, "Poweroff handler already present!\n");
+		}
 	}
 
 	dev_info(dev, "Broadcom BCM2835 watchdog timer");
diff --git a/drivers/watchdog/bd70528_wdt.c b/drivers/watchdog/bd70528_wdt.c
deleted file mode 100644
index 0170b37e6674..000000000000
--- a/drivers/watchdog/bd70528_wdt.c
+++ /dev/null
@@ -1,291 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0
-// Copyright (C) 2018 ROHM Semiconductors
-// ROHM BD70528MWV watchdog driver
-
-#include <linux/bcd.h>
-#include <linux/kernel.h>
-#include <linux/mfd/rohm-bd70528.h>
-#include <linux/module.h>
-#include <linux/of.h>
-#include <linux/platform_device.h>
-#include <linux/regmap.h>
-#include <linux/watchdog.h>
-
-/*
- * Max time we can set is 1 hour, 59 minutes and 59 seconds
- * and Minimum time is 1 second
- */
-#define WDT_MAX_MS	((2 * 60 * 60 - 1) * 1000)
-#define WDT_MIN_MS	1000
-#define DEFAULT_TIMEOUT	60
-
-#define WD_CTRL_MAGIC1 0x55
-#define WD_CTRL_MAGIC2 0xAA
-
-struct wdtbd70528 {
-	struct device *dev;
-	struct regmap *regmap;
-	struct rohm_regmap_dev *mfd;
-	struct watchdog_device wdt;
-};
-
-/**
- * bd70528_wdt_set - arm or disarm watchdog timer
- *
- * @data:	device data for the PMIC instance we want to operate on
- * @enable:	new state of WDT. zero to disable, non zero to enable
- * @old_state:	previous state of WDT will be filled here
- *
- * Arm or disarm WDT on BD70528 PMIC. Expected to be called only by
- * BD70528 RTC and BD70528 WDT drivers. The rtc_timer_lock must be taken
- * by calling bd70528_wdt_lock before calling bd70528_wdt_set.
- */
-int bd70528_wdt_set(struct rohm_regmap_dev *data, int enable, int *old_state)
-{
-	int ret, i;
-	unsigned int tmp;
-	struct bd70528_data *bd70528 = container_of(data, struct bd70528_data,
-						 chip);
-	u8 wd_ctrl_arr[3] = { WD_CTRL_MAGIC1, WD_CTRL_MAGIC2, 0 };
-	u8 *wd_ctrl = &wd_ctrl_arr[2];
-
-	ret = regmap_read(bd70528->chip.regmap, BD70528_REG_WDT_CTRL, &tmp);
-	if (ret)
-		return ret;
-
-	*wd_ctrl = (u8)tmp;
-
-	if (old_state) {
-		if (*wd_ctrl & BD70528_MASK_WDT_EN)
-			*old_state |= BD70528_WDT_STATE_BIT;
-		else
-			*old_state &= ~BD70528_WDT_STATE_BIT;
-		if ((!enable) == (!(*old_state & BD70528_WDT_STATE_BIT)))
-			return 0;
-	}
-
-	if (enable) {
-		if (*wd_ctrl & BD70528_MASK_WDT_EN)
-			return 0;
-		*wd_ctrl |= BD70528_MASK_WDT_EN;
-	} else {
-		if (*wd_ctrl & BD70528_MASK_WDT_EN)
-			*wd_ctrl &= ~BD70528_MASK_WDT_EN;
-		else
-			return 0;
-	}
-
-	for (i = 0; i < 3; i++) {
-		ret = regmap_write(bd70528->chip.regmap, BD70528_REG_WDT_CTRL,
-				   wd_ctrl_arr[i]);
-		if (ret)
-			return ret;
-	}
-
-	ret = regmap_read(bd70528->chip.regmap, BD70528_REG_WDT_CTRL, &tmp);
-	if ((tmp & BD70528_MASK_WDT_EN) != (*wd_ctrl & BD70528_MASK_WDT_EN)) {
-		dev_err(bd70528->chip.dev,
-			"Watchdog ctrl mismatch (hw) 0x%x (set) 0x%x\n",
-			tmp, *wd_ctrl);
-		ret = -EIO;
-	}
-
-	return ret;
-}
-EXPORT_SYMBOL(bd70528_wdt_set);
-
-/**
- * bd70528_wdt_lock - take WDT lock
- *
- * @data:	device data for the PMIC instance we want to operate on
- *
- * Lock WDT for arming/disarming in order to avoid race condition caused
- * by WDT state changes initiated by WDT and RTC drivers.
- */
-void bd70528_wdt_lock(struct rohm_regmap_dev *data)
-{
-	struct bd70528_data *bd70528 = container_of(data, struct bd70528_data,
-						 chip);
-
-	mutex_lock(&bd70528->rtc_timer_lock);
-}
-EXPORT_SYMBOL(bd70528_wdt_lock);
-
-/**
- * bd70528_wdt_unlock - unlock WDT lock
- *
- * @data:	device data for the PMIC instance we want to operate on
- *
- * Unlock WDT lock which has previously been taken by call to
- * bd70528_wdt_lock.
- */
-void bd70528_wdt_unlock(struct rohm_regmap_dev *data)
-{
-	struct bd70528_data *bd70528 = container_of(data, struct bd70528_data,
-						 chip);
-
-	mutex_unlock(&bd70528->rtc_timer_lock);
-}
-EXPORT_SYMBOL(bd70528_wdt_unlock);
-
-static int bd70528_wdt_set_locked(struct wdtbd70528 *w, int enable)
-{
-	return bd70528_wdt_set(w->mfd, enable, NULL);
-}
-
-static int bd70528_wdt_change(struct wdtbd70528 *w, int enable)
-{
-	int ret;
-
-	bd70528_wdt_lock(w->mfd);
-	ret = bd70528_wdt_set_locked(w, enable);
-	bd70528_wdt_unlock(w->mfd);
-
-	return ret;
-}
-
-static int bd70528_wdt_start(struct watchdog_device *wdt)
-{
-	struct wdtbd70528 *w = watchdog_get_drvdata(wdt);
-
-	dev_dbg(w->dev, "WDT ping...\n");
-	return bd70528_wdt_change(w, 1);
-}
-
-static int bd70528_wdt_stop(struct watchdog_device *wdt)
-{
-	struct wdtbd70528 *w = watchdog_get_drvdata(wdt);
-
-	dev_dbg(w->dev, "WDT stopping...\n");
-	return bd70528_wdt_change(w, 0);
-}
-
-static int bd70528_wdt_set_timeout(struct watchdog_device *wdt,
-				   unsigned int timeout)
-{
-	unsigned int hours;
-	unsigned int minutes;
-	unsigned int seconds;
-	int ret;
-	struct wdtbd70528 *w = watchdog_get_drvdata(wdt);
-
-	seconds = timeout;
-	hours = timeout / (60 * 60);
-	/* Maximum timeout is 1h 59m 59s => hours is 1 or 0 */
-	if (hours)
-		seconds -= (60 * 60);
-	minutes = seconds / 60;
-	seconds = seconds % 60;
-
-	bd70528_wdt_lock(w->mfd);
-
-	ret = bd70528_wdt_set_locked(w, 0);
-	if (ret)
-		goto out_unlock;
-
-	ret = regmap_update_bits(w->regmap, BD70528_REG_WDT_HOUR,
-				 BD70528_MASK_WDT_HOUR, hours);
-	if (ret) {
-		dev_err(w->dev, "Failed to set WDT hours\n");
-		goto out_en_unlock;
-	}
-	ret = regmap_update_bits(w->regmap, BD70528_REG_WDT_MINUTE,
-				 BD70528_MASK_WDT_MINUTE, bin2bcd(minutes));
-	if (ret) {
-		dev_err(w->dev, "Failed to set WDT minutes\n");
-		goto out_en_unlock;
-	}
-	ret = regmap_update_bits(w->regmap, BD70528_REG_WDT_SEC,
-				 BD70528_MASK_WDT_SEC, bin2bcd(seconds));
-	if (ret)
-		dev_err(w->dev, "Failed to set WDT seconds\n");
-	else
-		dev_dbg(w->dev, "WDT tmo set to %u\n", timeout);
-
-out_en_unlock:
-	ret = bd70528_wdt_set_locked(w, 1);
-out_unlock:
-	bd70528_wdt_unlock(w->mfd);
-
-	return ret;
-}
-
-static const struct watchdog_info bd70528_wdt_info = {
-	.identity = "bd70528-wdt",
-	.options = WDIOF_SETTIMEOUT | WDIOF_KEEPALIVEPING | WDIOF_MAGICCLOSE,
-};
-
-static const struct watchdog_ops bd70528_wdt_ops = {
-	.start		= bd70528_wdt_start,
-	.stop		= bd70528_wdt_stop,
-	.set_timeout	= bd70528_wdt_set_timeout,
-};
-
-static int bd70528_wdt_probe(struct platform_device *pdev)
-{
-	struct rohm_regmap_dev *bd70528;
-	struct wdtbd70528 *w;
-	int ret;
-	unsigned int reg;
-
-	bd70528 = dev_get_drvdata(pdev->dev.parent);
-	if (!bd70528) {
-		dev_err(&pdev->dev, "No MFD driver data\n");
-		return -EINVAL;
-	}
-	w = devm_kzalloc(&pdev->dev, sizeof(*w), GFP_KERNEL);
-	if (!w)
-		return -ENOMEM;
-
-	w->regmap = bd70528->regmap;
-	w->mfd = bd70528;
-	w->dev = &pdev->dev;
-
-	w->wdt.info = &bd70528_wdt_info;
-	w->wdt.ops =  &bd70528_wdt_ops;
-	w->wdt.min_hw_heartbeat_ms = WDT_MIN_MS;
-	w->wdt.max_hw_heartbeat_ms = WDT_MAX_MS;
-	w->wdt.parent = pdev->dev.parent;
-	w->wdt.timeout = DEFAULT_TIMEOUT;
-	watchdog_set_drvdata(&w->wdt, w);
-	watchdog_init_timeout(&w->wdt, 0, pdev->dev.parent);
-
-	ret = bd70528_wdt_set_timeout(&w->wdt, w->wdt.timeout);
-	if (ret) {
-		dev_err(&pdev->dev, "Failed to set the watchdog timeout\n");
-		return ret;
-	}
-
-	bd70528_wdt_lock(w->mfd);
-	ret = regmap_read(w->regmap, BD70528_REG_WDT_CTRL, &reg);
-	bd70528_wdt_unlock(w->mfd);
-
-	if (ret) {
-		dev_err(&pdev->dev, "Failed to get the watchdog state\n");
-		return ret;
-	}
-	if (reg & BD70528_MASK_WDT_EN) {
-		dev_dbg(&pdev->dev, "watchdog was running during probe\n");
-		set_bit(WDOG_HW_RUNNING, &w->wdt.status);
-	}
-
-	ret = devm_watchdog_register_device(&pdev->dev, &w->wdt);
-	if (ret < 0)
-		dev_err(&pdev->dev, "watchdog registration failed: %d\n", ret);
-
-	return ret;
-}
-
-static struct platform_driver bd70528_wdt = {
-	.driver = {
-		.name = "bd70528-wdt"
-	},
-	.probe = bd70528_wdt_probe,
-};
-
-module_platform_driver(bd70528_wdt);
-
-MODULE_AUTHOR("Matti Vaittinen <matti.vaittinen@fi.rohmeurope.com>");
-MODULE_DESCRIPTION("BD70528 watchdog driver");
-MODULE_LICENSE("GPL");
-MODULE_ALIAS("platform:bd70528-wdt");
diff --git a/drivers/watchdog/iTCO_wdt.c b/drivers/watchdog/iTCO_wdt.c
index b3f604669e2c..643c6c2d0b72 100644
--- a/drivers/watchdog/iTCO_wdt.c
+++ b/drivers/watchdog/iTCO_wdt.c
@@ -362,7 +362,7 @@ static int iTCO_wdt_set_timeout(struct watchdog_device *wd_dev, unsigned int t)
 	 * Otherwise, the BIOS generally reboots when the SMI triggers.
 	 */
 	if (p->smi_res &&
-	    (SMI_EN(p) & (TCO_EN | GBL_SMI_EN)) != (TCO_EN | GBL_SMI_EN))
+	    (inl(SMI_EN(p)) & (TCO_EN | GBL_SMI_EN)) != (TCO_EN | GBL_SMI_EN))
 		tmrval /= 2;
 
 	/* from the specs: */
diff --git a/drivers/watchdog/imx2_wdt.c b/drivers/watchdog/imx2_wdt.c
index cc86018c5eb5..51bfb796898b 100644
--- a/drivers/watchdog/imx2_wdt.c
+++ b/drivers/watchdog/imx2_wdt.c
@@ -317,6 +317,7 @@ static int __init imx2_wdt_probe(struct platform_device *pdev)
 	watchdog_set_nowayout(wdog, nowayout);
 	watchdog_set_restart_priority(wdog, 128);
 	watchdog_init_timeout(wdog, timeout, dev);
+	watchdog_stop_ping_on_suspend(wdog);
 
 	if (imx2_wdt_is_running(wdev)) {
 		imx2_wdt_set_timeout(wdog, wdog->timeout);
diff --git a/drivers/watchdog/max63xx_wdt.c b/drivers/watchdog/max63xx_wdt.c
index 3a899628a834..9e1541cfae0d 100644
--- a/drivers/watchdog/max63xx_wdt.c
+++ b/drivers/watchdog/max63xx_wdt.c
@@ -26,6 +26,7 @@
 #include <linux/spinlock.h>
 #include <linux/io.h>
 #include <linux/slab.h>
+#include <linux/property.h>
 
 #define DEFAULT_HEARTBEAT 60
 #define MAX_HEARTBEAT     60
@@ -99,8 +100,8 @@ static const struct max63xx_timeout max6373_table[] = {
 	{ },
 };
 
-static struct max63xx_timeout *
-max63xx_select_timeout(struct max63xx_timeout *table, int value)
+static const struct max63xx_timeout *
+max63xx_select_timeout(const struct max63xx_timeout *table, int value)
 {
 	while (table->twd) {
 		if (value <= table->twd) {
@@ -202,14 +203,17 @@ static int max63xx_wdt_probe(struct platform_device *pdev)
 {
 	struct device *dev = &pdev->dev;
 	struct max63xx_wdt *wdt;
-	struct max63xx_timeout *table;
+	const struct max63xx_timeout *table;
 	int err;
 
 	wdt = devm_kzalloc(dev, sizeof(*wdt), GFP_KERNEL);
 	if (!wdt)
 		return -ENOMEM;
 
-	table = (struct max63xx_timeout *)pdev->id_entry->driver_data;
+	/* Attempt to use fwnode first */
+	table = device_get_match_data(dev);
+	if (!table)
+		table = (struct max63xx_timeout *)pdev->id_entry->driver_data;
 
 	if (heartbeat < 1 || heartbeat > MAX_HEARTBEAT)
 		heartbeat = DEFAULT_HEARTBEAT;
@@ -255,11 +259,23 @@ static const struct platform_device_id max63xx_id_table[] = {
 };
 MODULE_DEVICE_TABLE(platform, max63xx_id_table);
 
+static const struct of_device_id max63xx_dt_id_table[] = {
+	{ .compatible = "maxim,max6369", .data = max6369_table, },
+	{ .compatible = "maxim,max6370", .data = max6369_table, },
+	{ .compatible = "maxim,max6371", .data = max6371_table, },
+	{ .compatible = "maxim,max6372", .data = max6371_table, },
+	{ .compatible = "maxim,max6373", .data = max6373_table, },
+	{ .compatible = "maxim,max6374", .data = max6373_table, },
+	{ }
+};
+MODULE_DEVICE_TABLE(of, max63xx_dt_id_table);
+
 static struct platform_driver max63xx_wdt_driver = {
 	.probe		= max63xx_wdt_probe,
 	.id_table	= max63xx_id_table,
 	.driver		= {
 		.name	= "max63xx_wdt",
+		.of_match_table = max63xx_dt_id_table,
 	},
 };
 
diff --git a/drivers/watchdog/mpc8xxx_wdt.c b/drivers/watchdog/mpc8xxx_wdt.c
index 2f7ded32e878..1c569be72ea2 100644
--- a/drivers/watchdog/mpc8xxx_wdt.c
+++ b/drivers/watchdog/mpc8xxx_wdt.c
@@ -118,7 +118,7 @@ static struct watchdog_info mpc8xxx_wdt_info = {
 	.identity = "MPC8xxx",
 };
 
-static struct watchdog_ops mpc8xxx_wdt_ops = {
+static const struct watchdog_ops mpc8xxx_wdt_ops = {
 	.owner = THIS_MODULE,
 	.start = mpc8xxx_wdt_start,
 	.ping = mpc8xxx_wdt_ping,
diff --git a/drivers/watchdog/mtk_wdt.c b/drivers/watchdog/mtk_wdt.c
index 16b6aff324a7..796fbb048cbe 100644
--- a/drivers/watchdog/mtk_wdt.c
+++ b/drivers/watchdog/mtk_wdt.c
@@ -12,6 +12,7 @@
 #include <dt-bindings/reset-controller/mt2712-resets.h>
 #include <dt-bindings/reset-controller/mt8183-resets.h>
 #include <dt-bindings/reset-controller/mt8192-resets.h>
+#include <dt-bindings/reset/mt8195-resets.h>
 #include <linux/delay.h>
 #include <linux/err.h>
 #include <linux/init.h>
@@ -82,6 +83,10 @@ static const struct mtk_wdt_data mt8192_data = {
 	.toprgu_sw_rst_num = MT8192_TOPRGU_SW_RST_NUM,
 };
 
+static const struct mtk_wdt_data mt8195_data = {
+	.toprgu_sw_rst_num = MT8195_TOPRGU_SW_RST_NUM,
+};
+
 static int toprgu_reset_update(struct reset_controller_dev *rcdev,
 			       unsigned long id, bool assert)
 {
@@ -408,6 +413,7 @@ static const struct of_device_id mtk_wdt_dt_ids[] = {
 	{ .compatible = "mediatek,mt6589-wdt" },
 	{ .compatible = "mediatek,mt8183-wdt", .data = &mt8183_data },
 	{ .compatible = "mediatek,mt8192-wdt", .data = &mt8192_data },
+	{ .compatible = "mediatek,mt8195-wdt", .data = &mt8195_data },
 	{ /* sentinel */ }
 };
 MODULE_DEVICE_TABLE(of, mtk_wdt_dt_ids);
diff --git a/drivers/watchdog/sl28cpld_wdt.c b/drivers/watchdog/sl28cpld_wdt.c
index 2de93298475f..9ce456f09f73 100644
--- a/drivers/watchdog/sl28cpld_wdt.c
+++ b/drivers/watchdog/sl28cpld_wdt.c
@@ -108,7 +108,7 @@ static const struct watchdog_info sl28cpld_wdt_info = {
 	.identity = "sl28cpld watchdog",
 };
 
-static struct watchdog_ops sl28cpld_wdt_ops = {
+static const struct watchdog_ops sl28cpld_wdt_ops = {
 	.owner = THIS_MODULE,
 	.start = sl28cpld_wdt_start,
 	.stop = sl28cpld_wdt_stop,
diff --git a/drivers/watchdog/tqmx86_wdt.c b/drivers/watchdog/tqmx86_wdt.c
index 72d0b0adde38..83860e94ce9d 100644
--- a/drivers/watchdog/tqmx86_wdt.c
+++ b/drivers/watchdog/tqmx86_wdt.c
@@ -62,7 +62,7 @@ static const struct watchdog_info tqmx86_wdt_info = {
 	.identity	= "TQMx86 Watchdog",
 };
 
-static struct watchdog_ops tqmx86_wdt_ops = {
+static const struct watchdog_ops tqmx86_wdt_ops = {
 	.owner		= THIS_MODULE,
 	.start		= tqmx86_wdt_start,
 	.set_timeout	= tqmx86_wdt_set_timeout,
diff --git a/drivers/watchdog/watchdog_core.c b/drivers/watchdog/watchdog_core.c
index 5df0a22e2cb4..3fe8a7edc252 100644
--- a/drivers/watchdog/watchdog_core.c
+++ b/drivers/watchdog/watchdog_core.c
@@ -34,6 +34,7 @@
 #include <linux/idr.h>		/* For ida_* macros */
 #include <linux/err.h>		/* For IS_ERR macros */
 #include <linux/of.h>		/* For of_get_timeout_sec */
+#include <linux/suspend.h>
 
 #include "watchdog_core.h"	/* For watchdog_dev_register/... */
 
@@ -185,6 +186,33 @@ static int watchdog_restart_notifier(struct notifier_block *nb,
 	return NOTIFY_DONE;
 }
 
+static int watchdog_pm_notifier(struct notifier_block *nb, unsigned long mode,
+				void *data)
+{
+	struct watchdog_device *wdd;
+	int ret = 0;
+
+	wdd = container_of(nb, struct watchdog_device, pm_nb);
+
+	switch (mode) {
+	case PM_HIBERNATION_PREPARE:
+	case PM_RESTORE_PREPARE:
+	case PM_SUSPEND_PREPARE:
+		ret = watchdog_dev_suspend(wdd);
+		break;
+	case PM_POST_HIBERNATION:
+	case PM_POST_RESTORE:
+	case PM_POST_SUSPEND:
+		ret = watchdog_dev_resume(wdd);
+		break;
+	}
+
+	if (ret)
+		return NOTIFY_BAD;
+
+	return NOTIFY_DONE;
+}
+
 /**
  * watchdog_set_restart_priority - Change priority of restart handler
  * @wdd: watchdog device
@@ -292,6 +320,15 @@ static int __watchdog_register_device(struct watchdog_device *wdd)
 				wdd->id, ret);
 	}
 
+	if (test_bit(WDOG_NO_PING_ON_SUSPEND, &wdd->status)) {
+		wdd->pm_nb.notifier_call = watchdog_pm_notifier;
+
+		ret = register_pm_notifier(&wdd->pm_nb);
+		if (ret)
+			pr_warn("watchdog%d: Cannot register pm handler (%d)\n",
+				wdd->id, ret);
+	}
+
 	return 0;
 }
 
diff --git a/drivers/watchdog/watchdog_dev.c b/drivers/watchdog/watchdog_dev.c
index 3bab32485273..3a3d8b5c7ad5 100644
--- a/drivers/watchdog/watchdog_dev.c
+++ b/drivers/watchdog/watchdog_dev.c
@@ -401,7 +401,7 @@ static int watchdog_set_pretimeout(struct watchdog_device *wdd,
 	if (watchdog_pretimeout_invalid(wdd, timeout))
 		return -EINVAL;
 
-	if (wdd->ops->set_pretimeout)
+	if (wdd->ops->set_pretimeout && (wdd->info->options & WDIOF_PRETIMEOUT))
 		err = wdd->ops->set_pretimeout(wdd, timeout);
 	else
 		wdd->pretimeout = timeout;
@@ -1096,6 +1096,8 @@ static void watchdog_cdev_unregister(struct watchdog_device *wdd)
 		watchdog_stop(wdd);
 	}
 
+	watchdog_hrtimer_pretimeout_stop(wdd);
+
 	mutex_lock(&wd_data->lock);
 	wd_data->wdd = NULL;
 	wdd->wd_data = NULL;
@@ -1103,7 +1105,6 @@ static void watchdog_cdev_unregister(struct watchdog_device *wdd)
 
 	hrtimer_cancel(&wd_data->timer);
 	kthread_cancel_work_sync(&wd_data->work);
-	watchdog_hrtimer_pretimeout_stop(wdd);
 
 	put_device(&wd_data->dev);
 }
@@ -1172,7 +1173,10 @@ int watchdog_set_last_hw_keepalive(struct watchdog_device *wdd,
 
 	wd_data->last_hw_keepalive = ktime_sub(now, ms_to_ktime(last_ping_ms));
 
-	return __watchdog_ping(wdd);
+	if (watchdog_hw_running(wdd) && handle_boot_enabled)
+		return __watchdog_ping(wdd);
+
+	return 0;
 }
 EXPORT_SYMBOL_GPL(watchdog_set_last_hw_keepalive);
 
@@ -1227,6 +1231,53 @@ void __exit watchdog_dev_exit(void)
 	kthread_destroy_worker(watchdog_kworker);
 }
 
+int watchdog_dev_suspend(struct watchdog_device *wdd)
+{
+	struct watchdog_core_data *wd_data = wdd->wd_data;
+	int ret = 0;
+
+	if (!wdd->wd_data)
+		return -ENODEV;
+
+	/* ping for the last time before suspend */
+	mutex_lock(&wd_data->lock);
+	if (watchdog_worker_should_ping(wd_data))
+		ret = __watchdog_ping(wd_data->wdd);
+	mutex_unlock(&wd_data->lock);
+
+	if (ret)
+		return ret;
+
+	/*
+	 * make sure that watchdog worker will not kick in when the wdog is
+	 * suspended
+	 */
+	hrtimer_cancel(&wd_data->timer);
+	kthread_cancel_work_sync(&wd_data->work);
+
+	return 0;
+}
+
+int watchdog_dev_resume(struct watchdog_device *wdd)
+{
+	struct watchdog_core_data *wd_data = wdd->wd_data;
+	int ret = 0;
+
+	if (!wdd->wd_data)
+		return -ENODEV;
+
+	/*
+	 * __watchdog_ping will also retrigger hrtimer and therefore restore the
+	 * ping worker if needed.
+	 */
+	mutex_lock(&wd_data->lock);
+	if (watchdog_worker_should_ping(wd_data))
+		ret = __watchdog_ping(wd_data->wdd);
+	mutex_unlock(&wd_data->lock);
+
+	return ret;
+}
+
 module_param(handle_boot_enabled, bool, 0444);
 MODULE_PARM_DESC(handle_boot_enabled,
 	"Watchdog core auto-updates boot enabled watchdogs before userspace takes over (default="
diff --git a/drivers/xen/features.c b/drivers/xen/features.c
index 25c053b09605..7b591443833c 100644
--- a/drivers/xen/features.c
+++ b/drivers/xen/features.c
@@ -9,13 +9,26 @@
 #include <linux/types.h>
 #include <linux/cache.h>
 #include <linux/export.h>
+#include <linux/printk.h>
 
 #include <asm/xen/hypercall.h>
 
+#include <xen/xen.h>
 #include <xen/interface/xen.h>
 #include <xen/interface/version.h>
 #include <xen/features.h>
 
+/*
+ * Linux kernel expects at least Xen 4.0.
+ *
+ * Assume some features to be available for that reason (depending on guest
+ * mode, of course).
+ */
+#define chk_required_feature(f) {					\
+		if (!xen_feature(f))					\
+			panic("Xen: feature %s not available!\n", #f);	\
+	}
+
 u8 xen_features[XENFEAT_NR_SUBMAPS * 32] __read_mostly;
 EXPORT_SYMBOL_GPL(xen_features);
 
@@ -31,4 +44,9 @@ void xen_setup_features(void)
 		for (j = 0; j < 32; j++)
 			xen_features[i * 32 + j] = !!(fi.submap & 1<<j);
 	}
+
+	if (xen_pv_domain()) {
+		chk_required_feature(XENFEAT_mmu_pt_update_preserve_ad);
+		chk_required_feature(XENFEAT_gnttab_map_avail_bits);
+	}
 }
diff --git a/drivers/xen/gntdev.c b/drivers/xen/gntdev.c
index a3e7be96527d..1e7f6b1c0c97 100644
--- a/drivers/xen/gntdev.c
+++ b/drivers/xen/gntdev.c
@@ -266,20 +266,13 @@ static int find_grant_ptes(pte_t *pte, unsigned long addr, void *data)
 {
 	struct gntdev_grant_map *map = data;
 	unsigned int pgnr = (addr - map->vma->vm_start) >> PAGE_SHIFT;
-	int flags = map->flags | GNTMAP_application_map | GNTMAP_contains_pte;
+	int flags = map->flags | GNTMAP_application_map | GNTMAP_contains_pte |
+		    (1 << _GNTMAP_guest_avail0);
 	u64 pte_maddr;
 
 	BUG_ON(pgnr >= map->count);
 	pte_maddr = arbitrary_virt_to_machine(pte).maddr;
 
-	/*
-	 * Set the PTE as special to force get_user_pages_fast() fall
-	 * back to the slow path.  If this is not supported as part of
-	 * the grant map, it will be done afterwards.
-	 */
-	if (xen_feature(XENFEAT_gnttab_map_avail_bits))
-		flags |= (1 << _GNTMAP_guest_avail0);
-
 	gnttab_set_map_op(&map->map_ops[pgnr], pte_maddr, flags,
 			  map->grants[pgnr].ref,
 			  map->grants[pgnr].domid);
@@ -288,14 +281,6 @@ static int find_grant_ptes(pte_t *pte, unsigned long addr, void *data)
 	return 0;
 }
 
-#ifdef CONFIG_X86
-static int set_grant_ptes_as_special(pte_t *pte, unsigned long addr, void *data)
-{
-	set_pte_at(current->mm, addr, pte, pte_mkspecial(*pte));
-	return 0;
-}
-#endif
-
 int gntdev_map_grant_pages(struct gntdev_grant_map *map)
 {
 	int i, err = 0;
@@ -1055,23 +1040,6 @@ static int gntdev_mmap(struct file *flip, struct vm_area_struct *vma)
 		err = vm_map_pages_zero(vma, map->pages, map->count);
 		if (err)
 			goto out_put_map;
-	} else {
-#ifdef CONFIG_X86
-		/*
-		 * If the PTEs were not made special by the grant map
-		 * hypercall, do so here.
-		 *
-		 * This is racy since the mapping is already visible
-		 * to userspace but userspace should be well-behaved
-		 * enough to not touch it until the mmap() call
-		 * returns.
-		 */
-		if (!xen_feature(XENFEAT_gnttab_map_avail_bits)) {
-			apply_to_page_range(vma->vm_mm, vma->vm_start,
-					    vma->vm_end - vma->vm_start,
-					    set_grant_ptes_as_special, NULL);
-		}
-#endif
 	}
 
 	return 0;
diff --git a/drivers/xen/swiotlb-xen.c b/drivers/xen/swiotlb-xen.c
index 24d11861ac7d..643fe440c46e 100644
--- a/drivers/xen/swiotlb-xen.c
+++ b/drivers/xen/swiotlb-xen.c
@@ -100,7 +100,7 @@ static int is_xen_swiotlb_buffer(struct device *dev, dma_addr_t dma_addr)
 	 * in our domain. Therefore _only_ check address within our domain.
 	 */
 	if (pfn_valid(PFN_DOWN(paddr)))
-		return is_swiotlb_buffer(paddr);
+		return is_swiotlb_buffer(dev, paddr);
 	return 0;
 }
 
@@ -164,7 +164,7 @@ int __ref xen_swiotlb_init(void)
 	int rc = -ENOMEM;
 	char *start;
 
-	if (io_tlb_default_mem != NULL) {
+	if (io_tlb_default_mem.nslabs) {
 		pr_warn("swiotlb buffer already initialized\n");
 		return -EEXIST;
 	}
@@ -374,7 +374,7 @@ static dma_addr_t xen_swiotlb_map_page(struct device *dev, struct page *page,
 	if (dma_capable(dev, dev_addr, size, true) &&
 	    !range_straddles_page_boundary(phys, size) &&
 		!xen_arch_need_swiotlb(dev, phys, dev_addr) &&
-		swiotlb_force != SWIOTLB_FORCE)
+		!is_swiotlb_force_bounce(dev))
 		goto done;
 
 	/*
@@ -509,7 +509,7 @@ xen_swiotlb_map_sg(struct device *dev, struct scatterlist *sgl, int nelems,
 out_unmap:
 	xen_swiotlb_unmap_sg(dev, sgl, i, dir, attrs | DMA_ATTR_SKIP_CPU_SYNC);
 	sg_dma_len(sgl) = 0;
-	return 0;
+	return -EIO;
 }
 
 static void
@@ -547,7 +547,7 @@ xen_swiotlb_sync_sg_for_device(struct device *dev, struct scatterlist *sgl,
 static int
 xen_swiotlb_dma_supported(struct device *hwdev, u64 mask)
 {
-	return xen_phys_to_dma(hwdev, io_tlb_default_mem->end - 1) <= mask;
+	return xen_phys_to_dma(hwdev, io_tlb_default_mem.end - 1) <= mask;
 }
 
 const struct dma_map_ops xen_swiotlb_dma_ops = {
diff --git a/drivers/xen/xen-scsiback.c b/drivers/xen/xen-scsiback.c
index 61ce0d142eea..0c5e565aa8cf 100644
--- a/drivers/xen/xen-scsiback.c
+++ b/drivers/xen/xen-scsiback.c
@@ -33,8 +33,6 @@
 
 #define pr_fmt(fmt) "xen-pvscsi: " fmt
 
-#include <stdarg.h>
-
 #include <linux/module.h>
 #include <linux/utsname.h>
 #include <linux/interrupt.h>
diff --git a/drivers/xen/xenbus/xenbus_client.c b/drivers/xen/xenbus/xenbus_client.c
index 0cd728961fce..e8bed1cb76ba 100644
--- a/drivers/xen/xenbus/xenbus_client.c
+++ b/drivers/xen/xenbus/xenbus_client.c
@@ -542,8 +542,7 @@ static int __xenbus_map_ring(struct xenbus_device *dev,
 		}
 	}
 
-	if (HYPERVISOR_grant_table_op(GNTTABOP_unmap_grant_ref, info->unmap, j))
-		BUG();
+	BUG_ON(HYPERVISOR_grant_table_op(GNTTABOP_unmap_grant_ref, info->unmap, j));
 
 	*leaked = false;
 	for (i = 0; i < j; i++) {
@@ -581,8 +580,7 @@ static int xenbus_unmap_ring(struct xenbus_device *dev, grant_handle_t *handles,
 		gnttab_set_unmap_op(&unmap[i], vaddrs[i],
 				    GNTMAP_host_map, handles[i]);
 
-	if (HYPERVISOR_grant_table_op(GNTTABOP_unmap_grant_ref, unmap, i))
-		BUG();
+	BUG_ON(HYPERVISOR_grant_table_op(GNTTABOP_unmap_grant_ref, unmap, i));
 
 	err = GNTST_okay;
 	for (i = 0; i < nr_handles; i++) {
@@ -778,8 +776,7 @@ static int xenbus_unmap_ring_pv(struct xenbus_device *dev, void *vaddr)
 		unmap[i].handle = node->handles[i];
 	}
 
-	if (HYPERVISOR_grant_table_op(GNTTABOP_unmap_grant_ref, unmap, i))
-		BUG();
+	BUG_ON(HYPERVISOR_grant_table_op(GNTTABOP_unmap_grant_ref, unmap, i));
 
 	err = GNTST_okay;
 	leaked = false;
diff --git a/fs/9p/acl.c b/fs/9p/acl.c
index bb1b286c49ae..c381499f5416 100644
--- a/fs/9p/acl.c
+++ b/fs/9p/acl.c
@@ -97,10 +97,13 @@ static struct posix_acl *v9fs_get_cached_acl(struct inode *inode, int type)
 	return acl;
 }
 
-struct posix_acl *v9fs_iop_get_acl(struct inode *inode, int type)
+struct posix_acl *v9fs_iop_get_acl(struct inode *inode, int type, bool rcu)
 {
 	struct v9fs_session_info *v9ses;
 
+	if (rcu)
+		return ERR_PTR(-ECHILD);
+
 	v9ses = v9fs_inode2v9ses(inode);
 	if (((v9ses->flags & V9FS_ACCESS_MASK) != V9FS_ACCESS_CLIENT) ||
 			((v9ses->flags & V9FS_ACL_MASK) != V9FS_POSIX_ACL)) {
diff --git a/fs/9p/acl.h b/fs/9p/acl.h
index e4f7e882272b..d43c8949e807 100644
--- a/fs/9p/acl.h
+++ b/fs/9p/acl.h
@@ -16,7 +16,7 @@
 
 #ifdef CONFIG_9P_FS_POSIX_ACL
 extern int v9fs_get_acl(struct inode *, struct p9_fid *);
-extern struct posix_acl *v9fs_iop_get_acl(struct inode *inode, int type);
+extern struct posix_acl *v9fs_iop_get_acl(struct inode *inode, int type, bool rcu);
 extern int v9fs_acl_chmod(struct inode *, struct p9_fid *);
 extern int v9fs_set_create_acl(struct inode *, struct p9_fid *,
 			       struct posix_acl *, struct posix_acl *);
diff --git a/fs/Kconfig b/fs/Kconfig
index b11bd4b387e1..a6313a969bc5 100644
--- a/fs/Kconfig
+++ b/fs/Kconfig
@@ -43,7 +43,7 @@ source "fs/f2fs/Kconfig"
 source "fs/zonefs/Kconfig"
 
 config FS_DAX
-	bool "Direct Access (DAX) support"
+	bool "File system based Direct Access (DAX) support"
 	depends on MMU
 	depends on !(ARM || MIPS || SPARC)
 	select DEV_PAGEMAP_OPS if (ZONE_DEVICE && !FS_DAX_LIMITED)
@@ -53,8 +53,23 @@ config FS_DAX
 	  Direct Access (DAX) can be used on memory-backed block devices.
 	  If the block device supports DAX and the filesystem supports DAX,
 	  then you can avoid using the pagecache to buffer I/Os.  Turning
-	  on this option will compile in support for DAX; you will need to
-	  mount the filesystem using the -o dax option.
+	  on this option will compile in support for DAX.
+
+	  For a DAX device to support file system access it needs to have
+	  struct pages.  For the nfit based NVDIMMs this can be enabled
+	  using the ndctl utility:
+
+		# ndctl create-namespace --force --reconfig=namespace0.0 \
+			--mode=fsdax --map=mem
+
+	  See the 'create-namespace' man page for details on the overhead of
+	  --map=mem:
+	  https://docs.pmem.io/ndctl-user-guide/ndctl-man-pages/ndctl-create-namespace
+
+          For ndctl to work CONFIG_DEV_DAX needs to be enabled as well. For most
+	  file systems DAX support needs to be manually enabled globally or
+	  per-inode using a mount option as well.  See the file documentation in
+	  Documentation/filesystems/dax.rst for details.
 
 	  If you do not have a block device that is capable of using this,
 	  or if unsure, say N.  Saying Y will increase the size of the kernel
@@ -136,6 +151,7 @@ menu "DOS/FAT/EXFAT/NT Filesystems"
 source "fs/fat/Kconfig"
 source "fs/exfat/Kconfig"
 source "fs/ntfs/Kconfig"
+source "fs/ntfs3/Kconfig"
 
 endmenu
 endif # BLOCK
@@ -218,8 +234,7 @@ config ARCH_SUPPORTS_HUGETLBFS
 
 config HUGETLBFS
 	bool "HugeTLB file system support"
-	depends on X86 || IA64 || SPARC64 || (S390 && 64BIT) || \
-		   ARCH_SUPPORTS_HUGETLBFS || BROKEN
+	depends on X86 || IA64 || SPARC64 || ARCH_SUPPORTS_HUGETLBFS || BROKEN
 	help
 	  hugetlbfs is a filesystem backing for HugeTLB pages, based on
 	  ramfs. For architectures that support it, say Y here and read
@@ -352,7 +367,7 @@ source "fs/ceph/Kconfig"
 source "fs/cifs/Kconfig"
 source "fs/ksmbd/Kconfig"
 
-config CIFS_COMMON
+config SMBFS_COMMON
 	tristate
 	default y if CIFS=y
 	default m if CIFS=m
diff --git a/fs/Makefile b/fs/Makefile
index 354e2ba3ee67..84c5e4cdfee5 100644
--- a/fs/Makefile
+++ b/fs/Makefile
@@ -17,7 +17,7 @@ obj-y :=	open.o read_write.o file_table.o super.o \
 		kernel_read_file.o remap_range.o
 
 ifeq ($(CONFIG_BLOCK),y)
-obj-y +=	buffer.o block_dev.o direct-io.o mpage.o
+obj-y +=	buffer.o direct-io.o mpage.o
 else
 obj-y +=	no-block.o
 endif
@@ -96,11 +96,12 @@ obj-$(CONFIG_LOCKD)		+= lockd/
 obj-$(CONFIG_NLS)		+= nls/
 obj-$(CONFIG_UNICODE)		+= unicode/
 obj-$(CONFIG_SYSV_FS)		+= sysv/
-obj-$(CONFIG_CIFS_COMMON)	+= cifs_common/
+obj-$(CONFIG_SMBFS_COMMON)	+= smbfs_common/
 obj-$(CONFIG_CIFS)		+= cifs/
 obj-$(CONFIG_SMB_SERVER)	+= ksmbd/
 obj-$(CONFIG_HPFS_FS)		+= hpfs/
 obj-$(CONFIG_NTFS_FS)		+= ntfs/
+obj-$(CONFIG_NTFS3_FS)		+= ntfs3/
 obj-$(CONFIG_UFS_FS)		+= ufs/
 obj-$(CONFIG_EFS_FS)		+= efs/
 obj-$(CONFIG_JFFS2_FS)		+= jffs2/
diff --git a/fs/attr.c b/fs/attr.c
index 87ef39db1c34..473d21b3a86d 100644
--- a/fs/attr.c
+++ b/fs/attr.c
@@ -249,6 +249,34 @@ void setattr_copy(struct user_namespace *mnt_userns, struct inode *inode,
 }
 EXPORT_SYMBOL(setattr_copy);
 
+int may_setattr(struct user_namespace *mnt_userns, struct inode *inode,
+		unsigned int ia_valid)
+{
+	int error;
+
+	if (ia_valid & (ATTR_MODE | ATTR_UID | ATTR_GID | ATTR_TIMES_SET)) {
+		if (IS_IMMUTABLE(inode) || IS_APPEND(inode))
+			return -EPERM;
+	}
+
+	/*
+	 * If utimes(2) and friends are called with times == NULL (or both
+	 * times are UTIME_NOW), then we need to check for write permission
+	 */
+	if (ia_valid & ATTR_TOUCH) {
+		if (IS_IMMUTABLE(inode))
+			return -EPERM;
+
+		if (!inode_owner_or_capable(mnt_userns, inode)) {
+			error = inode_permission(mnt_userns, inode, MAY_WRITE);
+			if (error)
+				return error;
+		}
+	}
+	return 0;
+}
+EXPORT_SYMBOL(may_setattr);
+
 /**
  * notify_change - modify attributes of a filesytem object
  * @mnt_userns:	user namespace of the mount the inode was found from
@@ -290,25 +318,9 @@ int notify_change(struct user_namespace *mnt_userns, struct dentry *dentry,
 
 	WARN_ON_ONCE(!inode_is_locked(inode));
 
-	if (ia_valid & (ATTR_MODE | ATTR_UID | ATTR_GID | ATTR_TIMES_SET)) {
-		if (IS_IMMUTABLE(inode) || IS_APPEND(inode))
-			return -EPERM;
-	}
-
-	/*
-	 * If utimes(2) and friends are called with times == NULL (or both
-	 * times are UTIME_NOW), then we need to check for write permission
-	 */
-	if (ia_valid & ATTR_TOUCH) {
-		if (IS_IMMUTABLE(inode))
-			return -EPERM;
-
-		if (!inode_owner_or_capable(mnt_userns, inode)) {
-			error = inode_permission(mnt_userns, inode, MAY_WRITE);
-			if (error)
-				return error;
-		}
-	}
+	error = may_setattr(mnt_userns, inode, ia_valid);
+	if (error)
+		return error;
 
 	if ((ia_valid & ATTR_MODE)) {
 		umode_t amode = attr->ia_mode;
diff --git a/fs/bad_inode.c b/fs/bad_inode.c
index 48e16144c1f7..12b8fdcc445b 100644
--- a/fs/bad_inode.c
+++ b/fs/bad_inode.c
@@ -121,7 +121,7 @@ static const char *bad_inode_get_link(struct dentry *dentry,
 	return ERR_PTR(-EIO);
 }
 
-static struct posix_acl *bad_inode_get_acl(struct inode *inode, int type)
+static struct posix_acl *bad_inode_get_acl(struct inode *inode, int type, bool rcu)
 {
 	return ERR_PTR(-EIO);
 }
diff --git a/fs/befs/debug.c b/fs/befs/debug.c
index eb7bd6c692c7..02fa66fb82c2 100644
--- a/fs/befs/debug.c
+++ b/fs/befs/debug.c
@@ -14,7 +14,7 @@
 #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
 #ifdef __KERNEL__
 
-#include <stdarg.h>
+#include <linux/stdarg.h>
 #include <linux/string.h>
 #include <linux/spinlock.h>
 #include <linux/kernel.h>
diff --git a/fs/binfmt_aout.c b/fs/binfmt_aout.c
index 145917f734fe..0dcfc691e7e2 100644
--- a/fs/binfmt_aout.c
+++ b/fs/binfmt_aout.c
@@ -221,8 +221,7 @@ static int load_aout_binary(struct linux_binprm * bprm)
 		}
 
 		error = vm_mmap(bprm->file, N_TXTADDR(ex), ex.a_text,
-			PROT_READ | PROT_EXEC,
-			MAP_FIXED | MAP_PRIVATE | MAP_DENYWRITE,
+			PROT_READ | PROT_EXEC, MAP_FIXED | MAP_PRIVATE,
 			fd_offset);
 
 		if (error != N_TXTADDR(ex))
@@ -230,7 +229,7 @@ static int load_aout_binary(struct linux_binprm * bprm)
 
 		error = vm_mmap(bprm->file, N_DATADDR(ex), ex.a_data,
 				PROT_READ | PROT_WRITE | PROT_EXEC,
-				MAP_FIXED | MAP_PRIVATE | MAP_DENYWRITE,
+				MAP_FIXED | MAP_PRIVATE,
 				fd_offset + ex.a_text);
 		if (error != N_DATADDR(ex))
 			return error;
@@ -309,7 +308,7 @@ static int load_aout_library(struct file *file)
 	/* Now use mmap to map the library into memory. */
 	error = vm_mmap(file, start_addr, ex.a_text + ex.a_data,
 			PROT_READ | PROT_WRITE | PROT_EXEC,
-			MAP_FIXED | MAP_PRIVATE | MAP_DENYWRITE,
+			MAP_FIXED | MAP_PRIVATE,
 			N_TXTOFF(ex));
 	retval = error;
 	if (error != start_addr)
diff --git a/fs/binfmt_elf.c b/fs/binfmt_elf.c
index 439ed81e755a..69d900a8473d 100644
--- a/fs/binfmt_elf.c
+++ b/fs/binfmt_elf.c
@@ -622,7 +622,7 @@ static unsigned long load_elf_interp(struct elfhdr *interp_elf_ex,
 	eppnt = interp_elf_phdata;
 	for (i = 0; i < interp_elf_ex->e_phnum; i++, eppnt++) {
 		if (eppnt->p_type == PT_LOAD) {
-			int elf_type = MAP_PRIVATE | MAP_DENYWRITE;
+			int elf_type = MAP_PRIVATE;
 			int elf_prot = make_prot(eppnt->p_flags, arch_state,
 						 true, true);
 			unsigned long vaddr = 0;
@@ -1070,7 +1070,7 @@ out_free_interp:
 		elf_prot = make_prot(elf_ppnt->p_flags, &arch_state,
 				     !!interpreter, false);
 
-		elf_flags = MAP_PRIVATE | MAP_DENYWRITE;
+		elf_flags = MAP_PRIVATE;
 
 		vaddr = elf_ppnt->p_vaddr;
 		/*
@@ -1384,7 +1384,7 @@ static int load_elf_library(struct file *file)
 			(eppnt->p_filesz +
 			 ELF_PAGEOFFSET(eppnt->p_vaddr)),
 			PROT_READ | PROT_WRITE | PROT_EXEC,
-			MAP_FIXED_NOREPLACE | MAP_PRIVATE | MAP_DENYWRITE,
+			MAP_FIXED_NOREPLACE | MAP_PRIVATE,
 			(eppnt->p_offset -
 			 ELF_PAGEOFFSET(eppnt->p_vaddr)));
 	if (error != ELF_PAGESTART(eppnt->p_vaddr))
diff --git a/fs/binfmt_elf_fdpic.c b/fs/binfmt_elf_fdpic.c
index cf4028487dcc..6d8fd6030cbb 100644
--- a/fs/binfmt_elf_fdpic.c
+++ b/fs/binfmt_elf_fdpic.c
@@ -1041,7 +1041,7 @@ static int elf_fdpic_map_file_by_direct_mmap(struct elf_fdpic_params *params,
 		if (phdr->p_flags & PF_W) prot |= PROT_WRITE;
 		if (phdr->p_flags & PF_X) prot |= PROT_EXEC;
 
-		flags = MAP_PRIVATE | MAP_DENYWRITE;
+		flags = MAP_PRIVATE;
 		maddr = 0;
 
 		switch (params->flags & ELF_FDPIC_FLAG_ARRANGEMENT) {
diff --git a/fs/btrfs/acl.c b/fs/btrfs/acl.c
index c9f9789e828f..0a0d0eccee4e 100644
--- a/fs/btrfs/acl.c
+++ b/fs/btrfs/acl.c
@@ -16,13 +16,16 @@
 #include "btrfs_inode.h"
 #include "xattr.h"
 
-struct posix_acl *btrfs_get_acl(struct inode *inode, int type)
+struct posix_acl *btrfs_get_acl(struct inode *inode, int type, bool rcu)
 {
 	int size;
 	const char *name;
 	char *value = NULL;
 	struct posix_acl *acl;
 
+	if (rcu)
+		return ERR_PTR(-ECHILD);
+
 	switch (type) {
 	case ACL_TYPE_ACCESS:
 		name = XATTR_NAME_POSIX_ACL_ACCESS;
diff --git a/fs/btrfs/ctree.h b/fs/btrfs/ctree.h
index f07c82fafa04..dff2c8a3e059 100644
--- a/fs/btrfs/ctree.h
+++ b/fs/btrfs/ctree.h
@@ -3706,7 +3706,7 @@ static inline int __btrfs_fs_compat_ro(struct btrfs_fs_info *fs_info, u64 flag)
 
 /* acl.c */
 #ifdef CONFIG_BTRFS_FS_POSIX_ACL
-struct posix_acl *btrfs_get_acl(struct inode *inode, int type);
+struct posix_acl *btrfs_get_acl(struct inode *inode, int type, bool rcu);
 int btrfs_set_acl(struct user_namespace *mnt_userns, struct inode *inode,
 		  struct posix_acl *acl, int type);
 int btrfs_init_acl(struct btrfs_trans_handle *trans,
diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c
index 2f9515dccce0..355ea88d5c5f 100644
--- a/fs/btrfs/disk-io.c
+++ b/fs/btrfs/disk-io.c
@@ -3314,6 +3314,30 @@ int __cold open_ctree(struct super_block *sb, struct btrfs_fs_devices *fs_device
 	 */
 	fs_info->compress_type = BTRFS_COMPRESS_ZLIB;
 
+	/*
+	 * Flag our filesystem as having big metadata blocks if they are bigger
+	 * than the page size.
+	 */
+	if (btrfs_super_nodesize(disk_super) > PAGE_SIZE) {
+		if (!(features & BTRFS_FEATURE_INCOMPAT_BIG_METADATA))
+			btrfs_info(fs_info,
+				"flagging fs with big metadata feature");
+		features |= BTRFS_FEATURE_INCOMPAT_BIG_METADATA;
+	}
+
+	/* Set up fs_info before parsing mount options */
+	nodesize = btrfs_super_nodesize(disk_super);
+	sectorsize = btrfs_super_sectorsize(disk_super);
+	stripesize = sectorsize;
+	fs_info->dirty_metadata_batch = nodesize * (1 + ilog2(nr_cpu_ids));
+	fs_info->delalloc_batch = sectorsize * 512 * (1 + ilog2(nr_cpu_ids));
+
+	fs_info->nodesize = nodesize;
+	fs_info->sectorsize = sectorsize;
+	fs_info->sectorsize_bits = ilog2(sectorsize);
+	fs_info->csums_per_leaf = BTRFS_MAX_ITEM_SIZE(fs_info) / fs_info->csum_size;
+	fs_info->stripesize = stripesize;
+
 	ret = btrfs_parse_options(fs_info, options, sb->s_flags);
 	if (ret) {
 		err = ret;
@@ -3341,30 +3365,6 @@ int __cold open_ctree(struct super_block *sb, struct btrfs_fs_devices *fs_device
 		btrfs_info(fs_info, "has skinny extents");
 
 	/*
-	 * flag our filesystem as having big metadata blocks if
-	 * they are bigger than the page size
-	 */
-	if (btrfs_super_nodesize(disk_super) > PAGE_SIZE) {
-		if (!(features & BTRFS_FEATURE_INCOMPAT_BIG_METADATA))
-			btrfs_info(fs_info,
-				"flagging fs with big metadata feature");
-		features |= BTRFS_FEATURE_INCOMPAT_BIG_METADATA;
-	}
-
-	nodesize = btrfs_super_nodesize(disk_super);
-	sectorsize = btrfs_super_sectorsize(disk_super);
-	stripesize = sectorsize;
-	fs_info->dirty_metadata_batch = nodesize * (1 + ilog2(nr_cpu_ids));
-	fs_info->delalloc_batch = sectorsize * 512 * (1 + ilog2(nr_cpu_ids));
-
-	/* Cache block sizes */
-	fs_info->nodesize = nodesize;
-	fs_info->sectorsize = sectorsize;
-	fs_info->sectorsize_bits = ilog2(sectorsize);
-	fs_info->csums_per_leaf = BTRFS_MAX_ITEM_SIZE(fs_info) / fs_info->csum_size;
-	fs_info->stripesize = stripesize;
-
-	/*
 	 * mixed block groups end up with duplicate but slightly offset
 	 * extent buffers for the same range.  It leads to corruptions
 	 */
diff --git a/fs/btrfs/ioctl.c b/fs/btrfs/ioctl.c
index 41524f9aeac3..cc61813213d8 100644
--- a/fs/btrfs/ioctl.c
+++ b/fs/btrfs/ioctl.c
@@ -3223,6 +3223,8 @@ static long btrfs_ioctl_rm_dev_v2(struct file *file, void __user *arg)
 	struct inode *inode = file_inode(file);
 	struct btrfs_fs_info *fs_info = btrfs_sb(inode->i_sb);
 	struct btrfs_ioctl_vol_args_v2 *vol_args;
+	struct block_device *bdev = NULL;
+	fmode_t mode;
 	int ret;
 	bool cancel = false;
 
@@ -3255,9 +3257,9 @@ static long btrfs_ioctl_rm_dev_v2(struct file *file, void __user *arg)
 	/* Exclusive operation is now claimed */
 
 	if (vol_args->flags & BTRFS_DEVICE_SPEC_BY_ID)
-		ret = btrfs_rm_device(fs_info, NULL, vol_args->devid);
+		ret = btrfs_rm_device(fs_info, NULL, vol_args->devid, &bdev, &mode);
 	else
-		ret = btrfs_rm_device(fs_info, vol_args->name, 0);
+		ret = btrfs_rm_device(fs_info, vol_args->name, 0, &bdev, &mode);
 
 	btrfs_exclop_finish(fs_info);
 
@@ -3273,6 +3275,8 @@ out:
 	kfree(vol_args);
 err_drop:
 	mnt_drop_write_file(file);
+	if (bdev)
+		blkdev_put(bdev, mode);
 	return ret;
 }
 
@@ -3281,6 +3285,8 @@ static long btrfs_ioctl_rm_dev(struct file *file, void __user *arg)
 	struct inode *inode = file_inode(file);
 	struct btrfs_fs_info *fs_info = btrfs_sb(inode->i_sb);
 	struct btrfs_ioctl_vol_args *vol_args;
+	struct block_device *bdev = NULL;
+	fmode_t mode;
 	int ret;
 	bool cancel;
 
@@ -3302,7 +3308,7 @@ static long btrfs_ioctl_rm_dev(struct file *file, void __user *arg)
 	ret = exclop_start_or_cancel_reloc(fs_info, BTRFS_EXCLOP_DEV_REMOVE,
 					   cancel);
 	if (ret == 0) {
-		ret = btrfs_rm_device(fs_info, vol_args->name, 0);
+		ret = btrfs_rm_device(fs_info, vol_args->name, 0, &bdev, &mode);
 		if (!ret)
 			btrfs_info(fs_info, "disk deleted %s", vol_args->name);
 		btrfs_exclop_finish(fs_info);
@@ -3311,7 +3317,8 @@ static long btrfs_ioctl_rm_dev(struct file *file, void __user *arg)
 	kfree(vol_args);
 out_drop_write:
 	mnt_drop_write_file(file);
-
+	if (bdev)
+		blkdev_put(bdev, mode);
 	return ret;
 }
 
diff --git a/fs/btrfs/misc.h b/fs/btrfs/misc.h
index 6461ebc3a1c1..340f995652f2 100644
--- a/fs/btrfs/misc.h
+++ b/fs/btrfs/misc.h
@@ -5,7 +5,7 @@
 
 #include <linux/sched.h>
 #include <linux/wait.h>
-#include <asm/div64.h>
+#include <linux/math64.h>
 #include <linux/rbtree.h>
 
 #define in_range(b, first, len) ((b) >= (first) && (b) < (first) + (len))
diff --git a/fs/btrfs/ordered-data.c b/fs/btrfs/ordered-data.c
index edb65abf0393..6b51fd2ec5ac 100644
--- a/fs/btrfs/ordered-data.c
+++ b/fs/btrfs/ordered-data.c
@@ -1049,6 +1049,7 @@ static int clone_ordered_extent(struct btrfs_ordered_extent *ordered, u64 pos,
 				u64 len)
 {
 	struct inode *inode = ordered->inode;
+	struct btrfs_fs_info *fs_info = BTRFS_I(inode)->root->fs_info;
 	u64 file_offset = ordered->file_offset + pos;
 	u64 disk_bytenr = ordered->disk_bytenr + pos;
 	u64 num_bytes = len;
@@ -1066,6 +1067,13 @@ static int clone_ordered_extent(struct btrfs_ordered_extent *ordered, u64 pos,
 	else
 		type = __ffs(flags_masked);
 
+	/*
+	 * The splitting extent is already counted and will be added again
+	 * in btrfs_add_ordered_extent_*(). Subtract num_bytes to avoid
+	 * double counting.
+	 */
+	percpu_counter_add_batch(&fs_info->ordered_bytes, -num_bytes,
+				 fs_info->delalloc_batch);
 	if (test_bit(BTRFS_ORDERED_COMPRESSED, &ordered->flags)) {
 		WARN_ON_ONCE(1);
 		ret = btrfs_add_ordered_extent_compress(BTRFS_I(inode),
diff --git a/fs/btrfs/volumes.c b/fs/btrfs/volumes.c
index ec3a874165de..464485aa7318 100644
--- a/fs/btrfs/volumes.c
+++ b/fs/btrfs/volumes.c
@@ -558,6 +558,8 @@ static int btrfs_free_stale_devices(const char *path,
 	struct btrfs_device *device, *tmp_device;
 	int ret = 0;
 
+	lockdep_assert_held(&uuid_mutex);
+
 	if (path)
 		ret = -ENOENT;
 
@@ -988,11 +990,12 @@ static struct btrfs_fs_devices *clone_fs_devices(struct btrfs_fs_devices *orig)
 	struct btrfs_device *orig_dev;
 	int ret = 0;
 
+	lockdep_assert_held(&uuid_mutex);
+
 	fs_devices = alloc_fs_devices(orig->fsid, NULL);
 	if (IS_ERR(fs_devices))
 		return fs_devices;
 
-	mutex_lock(&orig->device_list_mutex);
 	fs_devices->total_devices = orig->total_devices;
 
 	list_for_each_entry(orig_dev, &orig->devices, dev_list) {
@@ -1024,10 +1027,8 @@ static struct btrfs_fs_devices *clone_fs_devices(struct btrfs_fs_devices *orig)
 		device->fs_devices = fs_devices;
 		fs_devices->num_devices++;
 	}
-	mutex_unlock(&orig->device_list_mutex);
 	return fs_devices;
 error:
-	mutex_unlock(&orig->device_list_mutex);
 	free_fs_devices(fs_devices);
 	return ERR_PTR(ret);
 }
@@ -1869,15 +1870,17 @@ out:
  * Function to update ctime/mtime for a given device path.
  * Mainly used for ctime/mtime based probe like libblkid.
  */
-static void update_dev_time(const char *path_name)
+static void update_dev_time(struct block_device *bdev)
 {
-	struct file *filp;
+	struct inode *inode = bdev->bd_inode;
+	struct timespec64 now;
 
-	filp = filp_open(path_name, O_RDWR, 0);
-	if (IS_ERR(filp))
+	/* Shouldn't happen but just in case. */
+	if (!inode)
 		return;
-	file_update_time(filp);
-	filp_close(filp, NULL);
+
+	now = current_time(inode);
+	generic_update_time(inode, &now, S_MTIME | S_CTIME);
 }
 
 static int btrfs_rm_dev_item(struct btrfs_device *device)
@@ -2053,11 +2056,11 @@ void btrfs_scratch_superblocks(struct btrfs_fs_info *fs_info,
 	btrfs_kobject_uevent(bdev, KOBJ_CHANGE);
 
 	/* Update ctime/mtime for device path for libblkid */
-	update_dev_time(device_path);
+	update_dev_time(bdev);
 }
 
 int btrfs_rm_device(struct btrfs_fs_info *fs_info, const char *device_path,
-		    u64 devid)
+		    u64 devid, struct block_device **bdev, fmode_t *mode)
 {
 	struct btrfs_device *device;
 	struct btrfs_fs_devices *cur_devices;
@@ -2171,15 +2174,26 @@ int btrfs_rm_device(struct btrfs_fs_info *fs_info, const char *device_path,
 	mutex_unlock(&fs_devices->device_list_mutex);
 
 	/*
-	 * at this point, the device is zero sized and detached from
-	 * the devices list.  All that's left is to zero out the old
-	 * supers and free the device.
+	 * At this point, the device is zero sized and detached from the
+	 * devices list.  All that's left is to zero out the old supers and
+	 * free the device.
+	 *
+	 * We cannot call btrfs_close_bdev() here because we're holding the sb
+	 * write lock, and blkdev_put() will pull in the ->open_mutex on the
+	 * block device and it's dependencies.  Instead just flush the device
+	 * and let the caller do the final blkdev_put.
 	 */
-	if (test_bit(BTRFS_DEV_STATE_WRITEABLE, &device->dev_state))
+	if (test_bit(BTRFS_DEV_STATE_WRITEABLE, &device->dev_state)) {
 		btrfs_scratch_superblocks(fs_info, device->bdev,
 					  device->name->str);
+		if (device->bdev) {
+			sync_blockdev(device->bdev);
+			invalidate_bdev(device->bdev);
+		}
+	}
 
-	btrfs_close_bdev(device);
+	*bdev = device->bdev;
+	*mode = device->mode;
 	synchronize_rcu();
 	btrfs_free_device(device);
 
@@ -2706,7 +2720,7 @@ int btrfs_init_new_device(struct btrfs_fs_info *fs_info, const char *device_path
 	btrfs_forget_devices(device_path);
 
 	/* Update ctime/mtime for blkid or udev */
-	update_dev_time(device_path);
+	update_dev_time(bdev);
 
 	return ret;
 
diff --git a/fs/btrfs/volumes.h b/fs/btrfs/volumes.h
index b082250b42e0..2183361db614 100644
--- a/fs/btrfs/volumes.h
+++ b/fs/btrfs/volumes.h
@@ -472,7 +472,8 @@ struct btrfs_device *btrfs_alloc_device(struct btrfs_fs_info *fs_info,
 					const u8 *uuid);
 void btrfs_free_device(struct btrfs_device *device);
 int btrfs_rm_device(struct btrfs_fs_info *fs_info,
-		    const char *device_path, u64 devid);
+		    const char *device_path, u64 devid,
+		    struct block_device **bdev, fmode_t *mode);
 void __exit btrfs_cleanup_fs_uuids(void);
 int btrfs_num_copies(struct btrfs_fs_info *fs_info, u64 logical, u64 len);
 int btrfs_grow_device(struct btrfs_trans_handle *trans,
diff --git a/fs/cachefiles/Kconfig b/fs/cachefiles/Kconfig
index ff9ca55a9ae9..6827b40f7ddc 100644
--- a/fs/cachefiles/Kconfig
+++ b/fs/cachefiles/Kconfig
@@ -19,22 +19,3 @@ config CACHEFILES_DEBUG
 	  caching on files module.  If this is set, the debugging output may be
 	  enabled by setting bits in /sys/modules/cachefiles/parameter/debug or
 	  by including a debugging specifier in /etc/cachefilesd.conf.
-
-config CACHEFILES_HISTOGRAM
-	bool "Gather latency information on CacheFiles"
-	depends on CACHEFILES && PROC_FS
-	help
-
-	  This option causes latency information to be gathered on CacheFiles
-	  operation and exported through file:
-
-		/proc/fs/cachefiles/histogram
-
-	  The generation of this histogram adds a certain amount of overhead to
-	  execution as there are a number of points at which data is gathered,
-	  and on a multi-CPU system these may be on cachelines that keep
-	  bouncing between CPUs.  On the other hand, the histogram may be
-	  useful for debugging purposes.  Saying 'N' here is recommended.
-
-	  See Documentation/filesystems/caching/cachefiles.rst for more
-	  information.
diff --git a/fs/cachefiles/Makefile b/fs/cachefiles/Makefile
index 2227dc2d5498..02fd17731769 100644
--- a/fs/cachefiles/Makefile
+++ b/fs/cachefiles/Makefile
@@ -15,6 +15,4 @@ cachefiles-y := \
 	security.o \
 	xattr.o
 
-cachefiles-$(CONFIG_CACHEFILES_HISTOGRAM) += proc.o
-
 obj-$(CONFIG_CACHEFILES) := cachefiles.o
diff --git a/fs/cachefiles/bind.c b/fs/cachefiles/bind.c
index 38bb7764b454..d463d89f5db8 100644
--- a/fs/cachefiles/bind.c
+++ b/fs/cachefiles/bind.c
@@ -108,8 +108,6 @@ static int cachefiles_daemon_add_cache(struct cachefiles_cache *cache)
 	atomic_set(&fsdef->usage, 1);
 	fsdef->type = FSCACHE_COOKIE_TYPE_INDEX;
 
-	_debug("- fsdef %p", fsdef);
-
 	/* look up the directory at the root of the cache */
 	ret = kern_path(cache->rootdirname, LOOKUP_DIRECTORY, &path);
 	if (ret < 0)
diff --git a/fs/cachefiles/interface.c b/fs/cachefiles/interface.c
index da3948fdb615..da28ac1fa225 100644
--- a/fs/cachefiles/interface.c
+++ b/fs/cachefiles/interface.c
@@ -33,7 +33,7 @@ static struct fscache_object *cachefiles_alloc_object(
 
 	cache = container_of(_cache, struct cachefiles_cache, cache);
 
-	_enter("{%s},%p,", cache->cache.identifier, cookie);
+	_enter("{%s},%x,", cache->cache.identifier, cookie->debug_id);
 
 	lookup_data = kmalloc(sizeof(*lookup_data), cachefiles_gfp);
 	if (!lookup_data)
@@ -96,7 +96,7 @@ static struct fscache_object *cachefiles_alloc_object(
 	lookup_data->key = key;
 	object->lookup_data = lookup_data;
 
-	_leave(" = %p [%p]", &object->fscache, lookup_data);
+	_leave(" = %x [%p]", object->fscache.debug_id, lookup_data);
 	return &object->fscache;
 
 nomem_key:
@@ -379,7 +379,7 @@ static void cachefiles_sync_cache(struct fscache_cache *_cache)
 	const struct cred *saved_cred;
 	int ret;
 
-	_enter("%p", _cache);
+	_enter("%s", _cache->tag->name);
 
 	cache = container_of(_cache, struct cachefiles_cache, cache);
 
diff --git a/fs/cachefiles/internal.h b/fs/cachefiles/internal.h
index 4ed83aa5253b..0a511c36dab8 100644
--- a/fs/cachefiles/internal.h
+++ b/fs/cachefiles/internal.h
@@ -181,31 +181,6 @@ extern int cachefiles_check_in_use(struct cachefiles_cache *cache,
 				   struct dentry *dir, char *filename);
 
 /*
- * proc.c
- */
-#ifdef CONFIG_CACHEFILES_HISTOGRAM
-extern atomic_t cachefiles_lookup_histogram[HZ];
-extern atomic_t cachefiles_mkdir_histogram[HZ];
-extern atomic_t cachefiles_create_histogram[HZ];
-
-extern int __init cachefiles_proc_init(void);
-extern void cachefiles_proc_cleanup(void);
-static inline
-void cachefiles_hist(atomic_t histogram[], unsigned long start_jif)
-{
-	unsigned long jif = jiffies - start_jif;
-	if (jif >= HZ)
-		jif = HZ - 1;
-	atomic_inc(&histogram[jif]);
-}
-
-#else
-#define cachefiles_proc_init()		(0)
-#define cachefiles_proc_cleanup()	do {} while (0)
-#define cachefiles_hist(hist, start_jif) do {} while (0)
-#endif
-
-/*
  * rdwr.c
  */
 extern int cachefiles_read_or_alloc_page(struct fscache_retrieval *,
diff --git a/fs/cachefiles/io.c b/fs/cachefiles/io.c
index b13fb45fc3f3..fac2e8e7b533 100644
--- a/fs/cachefiles/io.c
+++ b/fs/cachefiles/io.c
@@ -70,7 +70,7 @@ static int cachefiles_read(struct netfs_cache_resources *cres,
 
 	_enter("%pD,%li,%llx,%zx/%llx",
 	       file, file_inode(file)->i_ino, start_pos, len,
-	       i_size_read(file->f_inode));
+	       i_size_read(file_inode(file)));
 
 	/* If the caller asked us to seek for data before doing the read, then
 	 * we should do that now.  If we find a gap, we fill it with zeros.
@@ -194,7 +194,7 @@ static int cachefiles_write(struct netfs_cache_resources *cres,
 
 	_enter("%pD,%li,%llx,%zx/%llx",
 	       file, file_inode(file)->i_ino, start_pos, len,
-	       i_size_read(file->f_inode));
+	       i_size_read(file_inode(file)));
 
 	ki = kzalloc(sizeof(struct cachefiles_kiocb), GFP_KERNEL);
 	if (!ki)
@@ -410,7 +410,7 @@ int cachefiles_begin_read_operation(struct netfs_read_request *rreq,
 	rreq->cache_resources.cache_priv = op;
 	rreq->cache_resources.cache_priv2 = file;
 	rreq->cache_resources.ops = &cachefiles_netfs_cache_ops;
-	rreq->cookie_debug_id = object->fscache.debug_id;
+	rreq->cache_resources.debug_id = object->fscache.debug_id;
 	_leave("");
 	return 0;
 
diff --git a/fs/cachefiles/key.c b/fs/cachefiles/key.c
index be96f5fc5cac..7f94efc97e23 100644
--- a/fs/cachefiles/key.c
+++ b/fs/cachefiles/key.c
@@ -150,6 +150,6 @@ char *cachefiles_cook_key(const u8 *raw, int keylen, uint8_t type)
 	key[len++] = 0;
 	key[len] = 0;
 
-	_leave(" = %p %d", key, len);
+	_leave(" = %s %d", key, len);
 	return key;
 }
diff --git a/fs/cachefiles/main.c b/fs/cachefiles/main.c
index ddf0cd58d60c..9c8d34c49b12 100644
--- a/fs/cachefiles/main.c
+++ b/fs/cachefiles/main.c
@@ -69,15 +69,9 @@ static int __init cachefiles_init(void)
 		goto error_object_jar;
 	}
 
-	ret = cachefiles_proc_init();
-	if (ret < 0)
-		goto error_proc;
-
 	pr_info("Loaded\n");
 	return 0;
 
-error_proc:
-	kmem_cache_destroy(cachefiles_object_jar);
 error_object_jar:
 	misc_deregister(&cachefiles_dev);
 error_dev:
@@ -94,7 +88,6 @@ static void __exit cachefiles_exit(void)
 {
 	pr_info("Unloading\n");
 
-	cachefiles_proc_cleanup();
 	kmem_cache_destroy(cachefiles_object_jar);
 	misc_deregister(&cachefiles_dev);
 }
diff --git a/fs/cachefiles/namei.c b/fs/cachefiles/namei.c
index 7bf0732ae25c..a9aca5ab5970 100644
--- a/fs/cachefiles/namei.c
+++ b/fs/cachefiles/namei.c
@@ -39,18 +39,18 @@ void __cachefiles_printk_object(struct cachefiles_object *object,
 	pr_err("%sops=%u inp=%u exc=%u\n",
 	       prefix, object->fscache.n_ops, object->fscache.n_in_progress,
 	       object->fscache.n_exclusive);
-	pr_err("%sparent=%p\n",
-	       prefix, object->fscache.parent);
+	pr_err("%sparent=%x\n",
+	       prefix, object->fscache.parent ? object->fscache.parent->debug_id : 0);
 
 	spin_lock(&object->fscache.lock);
 	cookie = object->fscache.cookie;
 	if (cookie) {
-		pr_err("%scookie=%p [pr=%p nd=%p fl=%lx]\n",
+		pr_err("%scookie=%x [pr=%x nd=%p fl=%lx]\n",
 		       prefix,
-		       object->fscache.cookie,
-		       object->fscache.cookie->parent,
-		       object->fscache.cookie->netfs_data,
-		       object->fscache.cookie->flags);
+		       cookie->debug_id,
+		       cookie->parent ? cookie->parent->debug_id : 0,
+		       cookie->netfs_data,
+		       cookie->flags);
 		pr_err("%skey=[%u] '", prefix, cookie->key_len);
 		k = (cookie->key_len <= sizeof(cookie->inline_key)) ?
 			cookie->inline_key : cookie->key;
@@ -110,7 +110,7 @@ static void cachefiles_mark_object_buried(struct cachefiles_cache *cache,
 
 	/* found the dentry for  */
 found_dentry:
-	kdebug("preemptive burial: OBJ%x [%s] %p",
+	kdebug("preemptive burial: OBJ%x [%s] %pd",
 	       object->fscache.debug_id,
 	       object->fscache.state->name,
 	       dentry);
@@ -140,7 +140,7 @@ static int cachefiles_mark_object_active(struct cachefiles_cache *cache,
 	struct rb_node **_p, *_parent = NULL;
 	struct dentry *dentry;
 
-	_enter(",%p", object);
+	_enter(",%x", object->fscache.debug_id);
 
 try_again:
 	write_lock(&cache->active_lock);
@@ -298,8 +298,6 @@ static int cachefiles_bury_object(struct cachefiles_cache *cache,
 
 	_enter(",'%pd','%pd'", dir, rep);
 
-	_debug("remove %p from %p", rep, dir);
-
 	/* non-directories can just be unlinked */
 	if (!d_is_dir(rep)) {
 		_debug("unlink stale object");
@@ -446,7 +444,7 @@ int cachefiles_delete_object(struct cachefiles_cache *cache,
 	struct dentry *dir;
 	int ret;
 
-	_enter(",OBJ%x{%p}", object->fscache.debug_id, object->dentry);
+	_enter(",OBJ%x{%pd}", object->fscache.debug_id, object->dentry);
 
 	ASSERT(object->dentry);
 	ASSERT(d_backing_inode(object->dentry));
@@ -496,11 +494,10 @@ int cachefiles_walk_to_object(struct cachefiles_object *parent,
 	struct dentry *dir, *next = NULL;
 	struct inode *inode;
 	struct path path;
-	unsigned long start;
 	const char *name;
 	int ret, nlen;
 
-	_enter("OBJ%x{%p},OBJ%x,%s,",
+	_enter("OBJ%x{%pd},OBJ%x,%s,",
 	       parent->fscache.debug_id, parent->dentry,
 	       object->fscache.debug_id, key);
 
@@ -535,9 +532,7 @@ lookup_again:
 
 	inode_lock_nested(d_inode(dir), I_MUTEX_PARENT);
 
-	start = jiffies;
 	next = lookup_one_len(name, dir, nlen);
-	cachefiles_hist(cachefiles_lookup_histogram, start);
 	if (IS_ERR(next)) {
 		trace_cachefiles_lookup(object, next, NULL);
 		goto lookup_error;
@@ -545,7 +540,7 @@ lookup_again:
 
 	inode = d_backing_inode(next);
 	trace_cachefiles_lookup(object, next, inode);
-	_debug("next -> %p %s", next, inode ? "positive" : "negative");
+	_debug("next -> %pd %s", next, inode ? "positive" : "negative");
 
 	if (!key)
 		object->new = !inode;
@@ -568,9 +563,7 @@ lookup_again:
 			ret = security_path_mkdir(&path, next, 0);
 			if (ret < 0)
 				goto create_error;
-			start = jiffies;
 			ret = vfs_mkdir(&init_user_ns, d_inode(dir), next, 0);
-			cachefiles_hist(cachefiles_mkdir_histogram, start);
 			if (!key)
 				trace_cachefiles_mkdir(object, next, ret);
 			if (ret < 0)
@@ -583,8 +576,8 @@ lookup_again:
 			}
 			ASSERT(d_backing_inode(next));
 
-			_debug("mkdir -> %p{%p{ino=%lu}}",
-			       next, d_backing_inode(next), d_backing_inode(next)->i_ino);
+			_debug("mkdir -> %pd{ino=%lu}",
+			       next, d_backing_inode(next)->i_ino);
 
 		} else if (!d_can_lookup(next)) {
 			pr_err("inode %lu is not a directory\n",
@@ -604,18 +597,16 @@ lookup_again:
 			ret = security_path_mknod(&path, next, S_IFREG, 0);
 			if (ret < 0)
 				goto create_error;
-			start = jiffies;
 			ret = vfs_create(&init_user_ns, d_inode(dir), next,
 					 S_IFREG, true);
-			cachefiles_hist(cachefiles_create_histogram, start);
 			trace_cachefiles_create(object, next, ret);
 			if (ret < 0)
 				goto create_error;
 
 			ASSERT(d_backing_inode(next));
 
-			_debug("create -> %p{%p{ino=%lu}}",
-			       next, d_backing_inode(next), d_backing_inode(next)->i_ino);
+			_debug("create -> %pd{ino=%lu}",
+			       next, d_backing_inode(next)->i_ino);
 
 		} else if (!d_can_lookup(next) &&
 			   !d_is_reg(next)
@@ -765,7 +756,6 @@ struct dentry *cachefiles_get_directory(struct cachefiles_cache *cache,
 					const char *dirname)
 {
 	struct dentry *subdir;
-	unsigned long start;
 	struct path path;
 	int ret;
 
@@ -775,16 +765,14 @@ struct dentry *cachefiles_get_directory(struct cachefiles_cache *cache,
 	inode_lock(d_inode(dir));
 
 retry:
-	start = jiffies;
 	subdir = lookup_one_len(dirname, dir, strlen(dirname));
-	cachefiles_hist(cachefiles_lookup_histogram, start);
 	if (IS_ERR(subdir)) {
 		if (PTR_ERR(subdir) == -ENOMEM)
 			goto nomem_d_alloc;
 		goto lookup_error;
 	}
 
-	_debug("subdir -> %p %s",
+	_debug("subdir -> %pd %s",
 	       subdir, d_backing_inode(subdir) ? "positive" : "negative");
 
 	/* we need to create the subdir if it doesn't exist yet */
@@ -810,10 +798,8 @@ retry:
 		}
 		ASSERT(d_backing_inode(subdir));
 
-		_debug("mkdir -> %p{%p{ino=%lu}}",
-		       subdir,
-		       d_backing_inode(subdir),
-		       d_backing_inode(subdir)->i_ino);
+		_debug("mkdir -> %pd{ino=%lu}",
+		       subdir, d_backing_inode(subdir)->i_ino);
 	}
 
 	inode_unlock(d_inode(dir));
@@ -876,7 +862,6 @@ static struct dentry *cachefiles_check_active(struct cachefiles_cache *cache,
 	struct cachefiles_object *object;
 	struct rb_node *_n;
 	struct dentry *victim;
-	unsigned long start;
 	int ret;
 
 	//_enter(",%pd/,%s",
@@ -885,13 +870,11 @@ static struct dentry *cachefiles_check_active(struct cachefiles_cache *cache,
 	/* look up the victim */
 	inode_lock_nested(d_inode(dir), I_MUTEX_PARENT);
 
-	start = jiffies;
 	victim = lookup_one_len(filename, dir, strlen(filename));
-	cachefiles_hist(cachefiles_lookup_histogram, start);
 	if (IS_ERR(victim))
 		goto lookup_error;
 
-	//_debug("victim -> %p %s",
+	//_debug("victim -> %pd %s",
 	//       victim, d_backing_inode(victim) ? "positive" : "negative");
 
 	/* if the object is no longer there then we probably retired the object
@@ -922,7 +905,7 @@ static struct dentry *cachefiles_check_active(struct cachefiles_cache *cache,
 
 	read_unlock(&cache->active_lock);
 
-	//_leave(" = %p", victim);
+	//_leave(" = %pd", victim);
 	return victim;
 
 object_in_use:
@@ -968,7 +951,7 @@ int cachefiles_cull(struct cachefiles_cache *cache, struct dentry *dir,
 	if (IS_ERR(victim))
 		return PTR_ERR(victim);
 
-	_debug("victim -> %p %s",
+	_debug("victim -> %pd %s",
 	       victim, d_backing_inode(victim) ? "positive" : "negative");
 
 	/* okay... the victim is not being used so we can cull it
diff --git a/fs/cachefiles/proc.c b/fs/cachefiles/proc.c
deleted file mode 100644
index 6e67aea0f24e..000000000000
--- a/fs/cachefiles/proc.c
+++ /dev/null
@@ -1,114 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0-or-later
-/* CacheFiles statistics
- *
- * Copyright (C) 2007 Red Hat, Inc. All Rights Reserved.
- * Written by David Howells (dhowells@redhat.com)
- */
-
-#include <linux/module.h>
-#include <linux/proc_fs.h>
-#include <linux/seq_file.h>
-#include "internal.h"
-
-atomic_t cachefiles_lookup_histogram[HZ];
-atomic_t cachefiles_mkdir_histogram[HZ];
-atomic_t cachefiles_create_histogram[HZ];
-
-/*
- * display the latency histogram
- */
-static int cachefiles_histogram_show(struct seq_file *m, void *v)
-{
-	unsigned long index;
-	unsigned x, y, z, t;
-
-	switch ((unsigned long) v) {
-	case 1:
-		seq_puts(m, "JIFS  SECS  LOOKUPS   MKDIRS    CREATES\n");
-		return 0;
-	case 2:
-		seq_puts(m, "===== ===== ========= ========= =========\n");
-		return 0;
-	default:
-		index = (unsigned long) v - 3;
-		x = atomic_read(&cachefiles_lookup_histogram[index]);
-		y = atomic_read(&cachefiles_mkdir_histogram[index]);
-		z = atomic_read(&cachefiles_create_histogram[index]);
-		if (x == 0 && y == 0 && z == 0)
-			return 0;
-
-		t = (index * 1000) / HZ;
-
-		seq_printf(m, "%4lu  0.%03u %9u %9u %9u\n", index, t, x, y, z);
-		return 0;
-	}
-}
-
-/*
- * set up the iterator to start reading from the first line
- */
-static void *cachefiles_histogram_start(struct seq_file *m, loff_t *_pos)
-{
-	if ((unsigned long long)*_pos >= HZ + 2)
-		return NULL;
-	if (*_pos == 0)
-		*_pos = 1;
-	return (void *)(unsigned long) *_pos;
-}
-
-/*
- * move to the next line
- */
-static void *cachefiles_histogram_next(struct seq_file *m, void *v, loff_t *pos)
-{
-	(*pos)++;
-	return (unsigned long long)*pos > HZ + 2 ?
-		NULL : (void *)(unsigned long) *pos;
-}
-
-/*
- * clean up after reading
- */
-static void cachefiles_histogram_stop(struct seq_file *m, void *v)
-{
-}
-
-static const struct seq_operations cachefiles_histogram_ops = {
-	.start		= cachefiles_histogram_start,
-	.stop		= cachefiles_histogram_stop,
-	.next		= cachefiles_histogram_next,
-	.show		= cachefiles_histogram_show,
-};
-
-/*
- * initialise the /proc/fs/cachefiles/ directory
- */
-int __init cachefiles_proc_init(void)
-{
-	_enter("");
-
-	if (!proc_mkdir("fs/cachefiles", NULL))
-		goto error_dir;
-
-	if (!proc_create_seq("fs/cachefiles/histogram", S_IFREG | 0444, NULL,
-			 &cachefiles_histogram_ops))
-		goto error_histogram;
-
-	_leave(" = 0");
-	return 0;
-
-error_histogram:
-	remove_proc_entry("fs/cachefiles", NULL);
-error_dir:
-	_leave(" = -ENOMEM");
-	return -ENOMEM;
-}
-
-/*
- * clean up the /proc/fs/cachefiles/ directory
- */
-void cachefiles_proc_cleanup(void)
-{
-	remove_proc_entry("fs/cachefiles/histogram", NULL);
-	remove_proc_entry("fs/cachefiles", NULL);
-}
diff --git a/fs/cachefiles/xattr.c b/fs/cachefiles/xattr.c
index a591b5e09637..9e82de668595 100644
--- a/fs/cachefiles/xattr.c
+++ b/fs/cachefiles/xattr.c
@@ -36,7 +36,7 @@ int cachefiles_check_object_type(struct cachefiles_object *object)
 	else
 		snprintf(type, 3, "%02x", object->fscache.cookie->def->type);
 
-	_enter("%p{%s}", object, type);
+	_enter("%x{%s}", object->fscache.debug_id, type);
 
 	/* attempt to install a type label directly */
 	ret = vfs_setxattr(&init_user_ns, dentry, cachefiles_xattr_cache, type,
@@ -134,7 +134,7 @@ int cachefiles_update_object_xattr(struct cachefiles_object *object,
 	if (!dentry)
 		return -ESTALE;
 
-	_enter("%p,#%d", object, auxdata->len);
+	_enter("%x,#%d", object->fscache.debug_id, auxdata->len);
 
 	/* attempt to install the cache metadata directly */
 	_debug("SET #%u", auxdata->len);
diff --git a/fs/ceph/acl.c b/fs/ceph/acl.c
index 529af59d9fd3..f4fc8e0b847c 100644
--- a/fs/ceph/acl.c
+++ b/fs/ceph/acl.c
@@ -29,7 +29,7 @@ static inline void ceph_set_cached_acl(struct inode *inode,
 	spin_unlock(&ci->i_ceph_lock);
 }
 
-struct posix_acl *ceph_get_acl(struct inode *inode, int type)
+struct posix_acl *ceph_get_acl(struct inode *inode, int type, bool rcu)
 {
 	int size;
 	unsigned int retry_cnt = 0;
@@ -37,6 +37,9 @@ struct posix_acl *ceph_get_acl(struct inode *inode, int type)
 	char *value = NULL;
 	struct posix_acl *acl;
 
+	if (rcu)
+		return ERR_PTR(-ECHILD);
+
 	switch (type) {
 	case ACL_TYPE_ACCESS:
 		name = XATTR_NAME_POSIX_ACL_ACCESS;
diff --git a/fs/ceph/addr.c b/fs/ceph/addr.c
index 7e7a897ae0d3..99b80b5c7a93 100644
--- a/fs/ceph/addr.c
+++ b/fs/ceph/addr.c
@@ -1281,8 +1281,8 @@ static int ceph_write_end(struct file *file, struct address_space *mapping,
 	dout("write_end file %p inode %p page %p %d~%d (%d)\n", file,
 	     inode, page, (int)pos, (int)copied, (int)len);
 
-	/* zero the stale part of the page if we did a short copy */
 	if (!PageUptodate(page)) {
+		/* just return that nothing was copied on a short copy */
 		if (copied < len) {
 			copied = 0;
 			goto out;
diff --git a/fs/ceph/cache.h b/fs/ceph/cache.h
index 1409d6149281..058ea2a04376 100644
--- a/fs/ceph/cache.h
+++ b/fs/ceph/cache.h
@@ -26,12 +26,6 @@ void ceph_fscache_unregister_inode_cookie(struct ceph_inode_info* ci);
 void ceph_fscache_file_set_cookie(struct inode *inode, struct file *filp);
 void ceph_fscache_revalidate_cookie(struct ceph_inode_info *ci);
 
-int ceph_readpage_from_fscache(struct inode *inode, struct page *page);
-int ceph_readpages_from_fscache(struct inode *inode,
-				struct address_space *mapping,
-				struct list_head *pages,
-				unsigned *nr_pages);
-
 static inline void ceph_fscache_inode_init(struct ceph_inode_info *ci)
 {
 	ci->fscache = NULL;
diff --git a/fs/ceph/caps.c b/fs/ceph/caps.c
index 39db97f149b9..6c0e52fd0743 100644
--- a/fs/ceph/caps.c
+++ b/fs/ceph/caps.c
@@ -703,29 +703,12 @@ void ceph_add_cap(struct inode *inode,
 		 */
 		struct ceph_snap_realm *realm = ceph_lookup_snap_realm(mdsc,
 							       realmino);
-		if (realm) {
-			struct ceph_snap_realm *oldrealm = ci->i_snap_realm;
-			if (oldrealm) {
-				spin_lock(&oldrealm->inodes_with_caps_lock);
-				list_del_init(&ci->i_snap_realm_item);
-				spin_unlock(&oldrealm->inodes_with_caps_lock);
-			}
-
-			spin_lock(&realm->inodes_with_caps_lock);
-			list_add(&ci->i_snap_realm_item,
-				 &realm->inodes_with_caps);
-			ci->i_snap_realm = realm;
-			if (realm->ino == ci->i_vino.ino)
-				realm->inode = inode;
-			spin_unlock(&realm->inodes_with_caps_lock);
-
-			if (oldrealm)
-				ceph_put_snap_realm(mdsc, oldrealm);
-		} else {
-			pr_err("ceph_add_cap: couldn't find snap realm %llx\n",
-			       realmino);
-			WARN_ON(!realm);
-		}
+		if (realm)
+			ceph_change_snap_realm(inode, realm);
+		else
+			WARN(1, "%s: couldn't find snap realm 0x%llx (ino 0x%llx oldrealm 0x%llx)\n",
+			     __func__, realmino, ci->i_vino.ino,
+			     ci->i_snap_realm ? ci->i_snap_realm->ino : 0);
 	}
 
 	__check_cap_issue(ci, cap, issued);
@@ -1112,20 +1095,6 @@ int ceph_is_any_caps(struct inode *inode)
 	return ret;
 }
 
-static void drop_inode_snap_realm(struct ceph_inode_info *ci)
-{
-	struct ceph_snap_realm *realm = ci->i_snap_realm;
-	spin_lock(&realm->inodes_with_caps_lock);
-	list_del_init(&ci->i_snap_realm_item);
-	ci->i_snap_realm_counter++;
-	ci->i_snap_realm = NULL;
-	if (realm->ino == ci->i_vino.ino)
-		realm->inode = NULL;
-	spin_unlock(&realm->inodes_with_caps_lock);
-	ceph_put_snap_realm(ceph_sb_to_client(ci->vfs_inode.i_sb)->mdsc,
-			    realm);
-}
-
 /*
  * Remove a cap.  Take steps to deal with a racing iterate_session_caps.
  *
@@ -1145,17 +1114,16 @@ void __ceph_remove_cap(struct ceph_cap *cap, bool queue_release)
 		return;
 	}
 
+	lockdep_assert_held(&ci->i_ceph_lock);
+
 	dout("__ceph_remove_cap %p from %p\n", cap, &ci->vfs_inode);
 
 	mdsc = ceph_inode_to_client(&ci->vfs_inode)->mdsc;
 
 	/* remove from inode's cap rbtree, and clear auth cap */
 	rb_erase(&cap->ci_node, &ci->i_caps);
-	if (ci->i_auth_cap == cap) {
-		WARN_ON_ONCE(!list_empty(&ci->i_dirty_item) &&
-			     !mdsc->fsc->blocklisted);
+	if (ci->i_auth_cap == cap)
 		ci->i_auth_cap = NULL;
-	}
 
 	/* remove from session list */
 	spin_lock(&session->s_cap_lock);
@@ -1201,12 +1169,34 @@ void __ceph_remove_cap(struct ceph_cap *cap, bool queue_release)
 		 * keep i_snap_realm.
 		 */
 		if (ci->i_wr_ref == 0 && ci->i_snap_realm)
-			drop_inode_snap_realm(ci);
+			ceph_change_snap_realm(&ci->vfs_inode, NULL);
 
 		__cap_delay_cancel(mdsc, ci);
 	}
 }
 
+void ceph_remove_cap(struct ceph_cap *cap, bool queue_release)
+{
+	struct ceph_inode_info *ci = cap->ci;
+	struct ceph_fs_client *fsc;
+
+	/* 'ci' being NULL means the remove have already occurred */
+	if (!ci) {
+		dout("%s: cap inode is NULL\n", __func__);
+		return;
+	}
+
+	lockdep_assert_held(&ci->i_ceph_lock);
+
+	fsc = ceph_sb_to_client(ci->vfs_inode.i_sb);
+	WARN_ON_ONCE(ci->i_auth_cap == cap &&
+		     !list_empty(&ci->i_dirty_item) &&
+		     !fsc->blocklisted &&
+		     READ_ONCE(fsc->mount_state) != CEPH_MOUNT_SHUTDOWN);
+
+	__ceph_remove_cap(cap, queue_release);
+}
+
 struct cap_msg_args {
 	struct ceph_mds_session	*session;
 	u64			ino, cid, follows;
@@ -1335,7 +1325,7 @@ void __ceph_remove_caps(struct ceph_inode_info *ci)
 	while (p) {
 		struct ceph_cap *cap = rb_entry(p, struct ceph_cap, ci_node);
 		p = rb_next(p);
-		__ceph_remove_cap(cap, true);
+		ceph_remove_cap(cap, true);
 	}
 	spin_unlock(&ci->i_ceph_lock);
 }
@@ -1746,6 +1736,9 @@ struct ceph_cap_flush *ceph_alloc_cap_flush(void)
 	struct ceph_cap_flush *cf;
 
 	cf = kmem_cache_alloc(ceph_cap_flush_cachep, GFP_KERNEL);
+	if (!cf)
+		return NULL;
+
 	cf->is_capsnap = false;
 	return cf;
 }
@@ -1856,6 +1849,8 @@ static u64 __mark_caps_flushing(struct inode *inode,
  * try to invalidate mapping pages without blocking.
  */
 static int try_nonblocking_invalidate(struct inode *inode)
+	__releases(ci->i_ceph_lock)
+	__acquires(ci->i_ceph_lock)
 {
 	struct ceph_inode_info *ci = ceph_inode(inode);
 	u32 invalidating_gen = ci->i_rdcache_gen;
@@ -2219,6 +2214,7 @@ static int caps_are_flushed(struct inode *inode, u64 flush_tid)
  */
 static int unsafe_request_wait(struct inode *inode)
 {
+	struct ceph_mds_client *mdsc = ceph_sb_to_client(inode->i_sb)->mdsc;
 	struct ceph_inode_info *ci = ceph_inode(inode);
 	struct ceph_mds_request *req1 = NULL, *req2 = NULL;
 	int ret, err = 0;
@@ -2238,6 +2234,81 @@ static int unsafe_request_wait(struct inode *inode)
 	}
 	spin_unlock(&ci->i_unsafe_lock);
 
+	/*
+	 * Trigger to flush the journal logs in all the relevant MDSes
+	 * manually, or in the worst case we must wait at most 5 seconds
+	 * to wait the journal logs to be flushed by the MDSes periodically.
+	 */
+	if (req1 || req2) {
+		struct ceph_mds_session **sessions = NULL;
+		struct ceph_mds_session *s;
+		struct ceph_mds_request *req;
+		unsigned int max;
+		int i;
+
+		/*
+		 * The mdsc->max_sessions is unlikely to be changed
+		 * mostly, here we will retry it by reallocating the
+		 * sessions arrary memory to get rid of the mdsc->mutex
+		 * lock.
+		 */
+retry:
+		max = mdsc->max_sessions;
+		sessions = krealloc(sessions, max * sizeof(s), __GFP_ZERO);
+		if (!sessions)
+			return -ENOMEM;
+
+		spin_lock(&ci->i_unsafe_lock);
+		if (req1) {
+			list_for_each_entry(req, &ci->i_unsafe_dirops,
+					    r_unsafe_dir_item) {
+				s = req->r_session;
+				if (unlikely(s->s_mds > max)) {
+					spin_unlock(&ci->i_unsafe_lock);
+					goto retry;
+				}
+				if (!sessions[s->s_mds]) {
+					s = ceph_get_mds_session(s);
+					sessions[s->s_mds] = s;
+				}
+			}
+		}
+		if (req2) {
+			list_for_each_entry(req, &ci->i_unsafe_iops,
+					    r_unsafe_target_item) {
+				s = req->r_session;
+				if (unlikely(s->s_mds > max)) {
+					spin_unlock(&ci->i_unsafe_lock);
+					goto retry;
+				}
+				if (!sessions[s->s_mds]) {
+					s = ceph_get_mds_session(s);
+					sessions[s->s_mds] = s;
+				}
+			}
+		}
+		spin_unlock(&ci->i_unsafe_lock);
+
+		/* the auth MDS */
+		spin_lock(&ci->i_ceph_lock);
+		if (ci->i_auth_cap) {
+		      s = ci->i_auth_cap->session;
+		      if (!sessions[s->s_mds])
+			      sessions[s->s_mds] = ceph_get_mds_session(s);
+		}
+		spin_unlock(&ci->i_ceph_lock);
+
+		/* send flush mdlog request to MDSes */
+		for (i = 0; i < max; i++) {
+			s = sessions[i];
+			if (s) {
+				send_flush_mdlog(s);
+				ceph_put_mds_session(s);
+			}
+		}
+		kfree(sessions);
+	}
+
 	dout("unsafe_request_wait %p wait on tid %llu %llu\n",
 	     inode, req1 ? req1->r_tid : 0ULL, req2 ? req2->r_tid : 0ULL);
 	if (req1) {
@@ -3008,7 +3079,7 @@ static void __ceph_put_cap_refs(struct ceph_inode_info *ci, int had,
 			}
 			/* see comment in __ceph_remove_cap() */
 			if (!__ceph_is_any_real_caps(ci) && ci->i_snap_realm)
-				drop_inode_snap_realm(ci);
+				ceph_change_snap_realm(inode, NULL);
 		}
 	}
 	if (check_flushsnaps && __ceph_have_pending_cap_snap(ci)) {
@@ -3114,7 +3185,16 @@ void ceph_put_wrbuffer_cap_refs(struct ceph_inode_info *ci, int nr,
 				break;
 			}
 		}
-		BUG_ON(!found);
+
+		if (!found) {
+			/*
+			 * The capsnap should already be removed when removing
+			 * auth cap in the case of a forced unmount.
+			 */
+			WARN_ON_ONCE(ci->i_auth_cap);
+			goto unlock;
+		}
+
 		capsnap->dirty_pages -= nr;
 		if (capsnap->dirty_pages == 0) {
 			complete_capsnap = true;
@@ -3136,6 +3216,7 @@ void ceph_put_wrbuffer_cap_refs(struct ceph_inode_info *ci, int nr,
 		     complete_capsnap ? " (complete capsnap)" : "");
 	}
 
+unlock:
 	spin_unlock(&ci->i_ceph_lock);
 
 	if (last) {
@@ -3606,6 +3687,43 @@ out:
 		iput(inode);
 }
 
+void __ceph_remove_capsnap(struct inode *inode, struct ceph_cap_snap *capsnap,
+			   bool *wake_ci, bool *wake_mdsc)
+{
+	struct ceph_inode_info *ci = ceph_inode(inode);
+	struct ceph_mds_client *mdsc = ceph_sb_to_client(inode->i_sb)->mdsc;
+	bool ret;
+
+	lockdep_assert_held(&ci->i_ceph_lock);
+
+	dout("removing capsnap %p, inode %p ci %p\n", capsnap, inode, ci);
+
+	list_del_init(&capsnap->ci_item);
+	ret = __detach_cap_flush_from_ci(ci, &capsnap->cap_flush);
+	if (wake_ci)
+		*wake_ci = ret;
+
+	spin_lock(&mdsc->cap_dirty_lock);
+	if (list_empty(&ci->i_cap_flush_list))
+		list_del_init(&ci->i_flushing_item);
+
+	ret = __detach_cap_flush_from_mdsc(mdsc, &capsnap->cap_flush);
+	if (wake_mdsc)
+		*wake_mdsc = ret;
+	spin_unlock(&mdsc->cap_dirty_lock);
+}
+
+void ceph_remove_capsnap(struct inode *inode, struct ceph_cap_snap *capsnap,
+			 bool *wake_ci, bool *wake_mdsc)
+{
+	struct ceph_inode_info *ci = ceph_inode(inode);
+
+	lockdep_assert_held(&ci->i_ceph_lock);
+
+	WARN_ON_ONCE(capsnap->dirty_pages || capsnap->writing);
+	__ceph_remove_capsnap(inode, capsnap, wake_ci, wake_mdsc);
+}
+
 /*
  * Handle FLUSHSNAP_ACK.  MDS has flushed snap data to disk and we can
  * throw away our cap_snap.
@@ -3643,23 +3761,10 @@ static void handle_cap_flushsnap_ack(struct inode *inode, u64 flush_tid,
 			     capsnap, capsnap->follows);
 		}
 	}
-	if (flushed) {
-		WARN_ON(capsnap->dirty_pages || capsnap->writing);
-		dout(" removing %p cap_snap %p follows %lld\n",
-		     inode, capsnap, follows);
-		list_del(&capsnap->ci_item);
-		wake_ci |= __detach_cap_flush_from_ci(ci, &capsnap->cap_flush);
-
-		spin_lock(&mdsc->cap_dirty_lock);
-
-		if (list_empty(&ci->i_cap_flush_list))
-			list_del_init(&ci->i_flushing_item);
-
-		wake_mdsc |= __detach_cap_flush_from_mdsc(mdsc,
-							  &capsnap->cap_flush);
-		spin_unlock(&mdsc->cap_dirty_lock);
-	}
+	if (flushed)
+		ceph_remove_capsnap(inode, capsnap, &wake_ci, &wake_mdsc);
 	spin_unlock(&ci->i_ceph_lock);
+
 	if (flushed) {
 		ceph_put_snap_context(capsnap->context);
 		ceph_put_cap_snap(capsnap);
@@ -3743,7 +3848,7 @@ retry:
 		goto out_unlock;
 
 	if (target < 0) {
-		__ceph_remove_cap(cap, false);
+		ceph_remove_cap(cap, false);
 		goto out_unlock;
 	}
 
@@ -3778,7 +3883,7 @@ retry:
 				change_auth_cap_ses(ci, tcap->session);
 			}
 		}
-		__ceph_remove_cap(cap, false);
+		ceph_remove_cap(cap, false);
 		goto out_unlock;
 	} else if (tsession) {
 		/* add placeholder for the export tagert */
@@ -3795,7 +3900,7 @@ retry:
 			spin_unlock(&mdsc->cap_dirty_lock);
 		}
 
-		__ceph_remove_cap(cap, false);
+		ceph_remove_cap(cap, false);
 		goto out_unlock;
 	}
 
@@ -3906,7 +4011,7 @@ retry:
 					ocap->mseq, mds, le32_to_cpu(ph->seq),
 					le32_to_cpu(ph->mseq));
 		}
-		__ceph_remove_cap(ocap, (ph->flags & CEPH_CAP_FLAG_RELEASE));
+		ceph_remove_cap(ocap, (ph->flags & CEPH_CAP_FLAG_RELEASE));
 	}
 
 	*old_issued = issued;
@@ -4134,8 +4239,9 @@ void ceph_handle_caps(struct ceph_mds_session *session,
 done:
 	mutex_unlock(&session->s_mutex);
 done_unlocked:
-	ceph_put_string(extra_info.pool_ns);
 	iput(inode);
+out:
+	ceph_put_string(extra_info.pool_ns);
 	return;
 
 flush_cap_releases:
@@ -4150,7 +4256,7 @@ flush_cap_releases:
 bad:
 	pr_err("ceph_handle_caps: corrupt message\n");
 	ceph_msg_dump(msg);
-	return;
+	goto out;
 }
 
 /*
@@ -4225,33 +4331,9 @@ static void flush_dirty_session_caps(struct ceph_mds_session *s)
 	dout("flush_dirty_caps done\n");
 }
 
-static void iterate_sessions(struct ceph_mds_client *mdsc,
-			     void (*cb)(struct ceph_mds_session *))
-{
-	int mds;
-
-	mutex_lock(&mdsc->mutex);
-	for (mds = 0; mds < mdsc->max_sessions; ++mds) {
-		struct ceph_mds_session *s;
-
-		if (!mdsc->sessions[mds])
-			continue;
-
-		s = ceph_get_mds_session(mdsc->sessions[mds]);
-		if (!s)
-			continue;
-
-		mutex_unlock(&mdsc->mutex);
-		cb(s);
-		ceph_put_mds_session(s);
-		mutex_lock(&mdsc->mutex);
-	}
-	mutex_unlock(&mdsc->mutex);
-}
-
 void ceph_flush_dirty_caps(struct ceph_mds_client *mdsc)
 {
-	iterate_sessions(mdsc, flush_dirty_session_caps);
+	ceph_mdsc_iterate_sessions(mdsc, flush_dirty_session_caps, true);
 }
 
 void __ceph_touch_fmode(struct ceph_inode_info *ci,
diff --git a/fs/ceph/file.c b/fs/ceph/file.c
index e1d605a02d4a..d16fd2d5fd42 100644
--- a/fs/ceph/file.c
+++ b/fs/ceph/file.c
@@ -1722,32 +1722,26 @@ retry_snap:
 		goto out;
 	}
 
-	err = file_remove_privs(file);
-	if (err)
+	down_read(&osdc->lock);
+	map_flags = osdc->osdmap->flags;
+	pool_flags = ceph_pg_pool_flags(osdc->osdmap, ci->i_layout.pool_id);
+	up_read(&osdc->lock);
+	if ((map_flags & CEPH_OSDMAP_FULL) ||
+	    (pool_flags & CEPH_POOL_FLAG_FULL)) {
+		err = -ENOSPC;
 		goto out;
+	}
 
-	err = file_update_time(file);
+	err = file_remove_privs(file);
 	if (err)
 		goto out;
 
-	inode_inc_iversion_raw(inode);
-
 	if (ci->i_inline_version != CEPH_INLINE_NONE) {
 		err = ceph_uninline_data(file, NULL);
 		if (err < 0)
 			goto out;
 	}
 
-	down_read(&osdc->lock);
-	map_flags = osdc->osdmap->flags;
-	pool_flags = ceph_pg_pool_flags(osdc->osdmap, ci->i_layout.pool_id);
-	up_read(&osdc->lock);
-	if ((map_flags & CEPH_OSDMAP_FULL) ||
-	    (pool_flags & CEPH_POOL_FLAG_FULL)) {
-		err = -ENOSPC;
-		goto out;
-	}
-
 	dout("aio_write %p %llx.%llx %llu~%zd getting caps. i_size %llu\n",
 	     inode, ceph_vinop(inode), pos, count, i_size_read(inode));
 	if (fi->fmode & CEPH_FILE_MODE_LAZY)
@@ -1759,6 +1753,12 @@ retry_snap:
 	if (err < 0)
 		goto out;
 
+	err = file_update_time(file);
+	if (err)
+		goto out_caps;
+
+	inode_inc_iversion_raw(inode);
+
 	dout("aio_write %p %llx.%llx %llu~%zd got cap refs on %s\n",
 	     inode, ceph_vinop(inode), pos, count, ceph_cap_string(got));
 
@@ -1842,6 +1842,8 @@ retry_snap:
 	}
 
 	goto out_unlocked;
+out_caps:
+	ceph_put_cap_refs(ci, got);
 out:
 	if (direct_lock)
 		ceph_end_io_direct(inode);
diff --git a/fs/ceph/inode.c b/fs/ceph/inode.c
index 1bd2cc015913..2df1e1284451 100644
--- a/fs/ceph/inode.c
+++ b/fs/ceph/inode.c
@@ -581,16 +581,9 @@ void ceph_evict_inode(struct inode *inode)
 	 */
 	if (ci->i_snap_realm) {
 		if (ceph_snap(inode) == CEPH_NOSNAP) {
-			struct ceph_snap_realm *realm = ci->i_snap_realm;
 			dout(" dropping residual ref to snap realm %p\n",
-			     realm);
-			spin_lock(&realm->inodes_with_caps_lock);
-			list_del_init(&ci->i_snap_realm_item);
-			ci->i_snap_realm = NULL;
-			if (realm->ino == ci->i_vino.ino)
-				realm->inode = NULL;
-			spin_unlock(&realm->inodes_with_caps_lock);
-			ceph_put_snap_realm(mdsc, realm);
+			     ci->i_snap_realm);
+			ceph_change_snap_realm(inode, NULL);
 		} else {
 			ceph_put_snapid_map(mdsc, ci->i_snapid_map);
 			ci->i_snap_realm = NULL;
diff --git a/fs/ceph/mds_client.c b/fs/ceph/mds_client.c
index 0b69aec23e5c..7cad180d6deb 100644
--- a/fs/ceph/mds_client.c
+++ b/fs/ceph/mds_client.c
@@ -11,6 +11,7 @@
 #include <linux/ratelimit.h>
 #include <linux/bits.h>
 #include <linux/ktime.h>
+#include <linux/bitmap.h>
 
 #include "super.h"
 #include "mds_client.h"
@@ -652,14 +653,9 @@ const char *ceph_session_state_name(int s)
 
 struct ceph_mds_session *ceph_get_mds_session(struct ceph_mds_session *s)
 {
-	if (refcount_inc_not_zero(&s->s_ref)) {
-		dout("mdsc get_session %p %d -> %d\n", s,
-		     refcount_read(&s->s_ref)-1, refcount_read(&s->s_ref));
+	if (refcount_inc_not_zero(&s->s_ref))
 		return s;
-	} else {
-		dout("mdsc get_session %p 0 -- FAIL\n", s);
-		return NULL;
-	}
+	return NULL;
 }
 
 void ceph_put_mds_session(struct ceph_mds_session *s)
@@ -667,8 +663,6 @@ void ceph_put_mds_session(struct ceph_mds_session *s)
 	if (IS_ERR_OR_NULL(s))
 		return;
 
-	dout("mdsc put_session %p %d -> %d\n", s,
-	     refcount_read(&s->s_ref), refcount_read(&s->s_ref)-1);
 	if (refcount_dec_and_test(&s->s_ref)) {
 		if (s->s_auth.authorizer)
 			ceph_auth_destroy_authorizer(s->s_auth.authorizer);
@@ -743,8 +737,6 @@ static struct ceph_mds_session *register_session(struct ceph_mds_client *mdsc,
 	s->s_mdsc = mdsc;
 	s->s_mds = mds;
 	s->s_state = CEPH_MDS_SESSION_NEW;
-	s->s_ttl = 0;
-	s->s_seq = 0;
 	mutex_init(&s->s_mutex);
 
 	ceph_con_init(&s->s_con, s, &mds_con_ops, &mdsc->fsc->client->msgr);
@@ -753,17 +745,11 @@ static struct ceph_mds_session *register_session(struct ceph_mds_client *mdsc,
 	s->s_cap_ttl = jiffies - 1;
 
 	spin_lock_init(&s->s_cap_lock);
-	s->s_renew_requested = 0;
-	s->s_renew_seq = 0;
 	INIT_LIST_HEAD(&s->s_caps);
-	s->s_nr_caps = 0;
 	refcount_set(&s->s_ref, 1);
 	INIT_LIST_HEAD(&s->s_waiting);
 	INIT_LIST_HEAD(&s->s_unsafe);
 	xa_init(&s->s_delegated_inos);
-	s->s_num_cap_releases = 0;
-	s->s_cap_reconnect = 0;
-	s->s_cap_iterator = NULL;
 	INIT_LIST_HEAD(&s->s_cap_releases);
 	INIT_WORK(&s->s_cap_release_work, ceph_cap_release_work);
 
@@ -811,6 +797,33 @@ static void put_request_session(struct ceph_mds_request *req)
 	}
 }
 
+void ceph_mdsc_iterate_sessions(struct ceph_mds_client *mdsc,
+				void (*cb)(struct ceph_mds_session *),
+				bool check_state)
+{
+	int mds;
+
+	mutex_lock(&mdsc->mutex);
+	for (mds = 0; mds < mdsc->max_sessions; ++mds) {
+		struct ceph_mds_session *s;
+
+		s = __ceph_lookup_mds_session(mdsc, mds);
+		if (!s)
+			continue;
+
+		if (check_state && !check_session_state(s)) {
+			ceph_put_mds_session(s);
+			continue;
+		}
+
+		mutex_unlock(&mdsc->mutex);
+		cb(s);
+		ceph_put_mds_session(s);
+		mutex_lock(&mdsc->mutex);
+	}
+	mutex_unlock(&mdsc->mutex);
+}
+
 void ceph_mdsc_release_request(struct kref *kref)
 {
 	struct ceph_mds_request *req = container_of(kref,
@@ -1155,7 +1168,7 @@ random:
 /*
  * session messages
  */
-static struct ceph_msg *create_session_msg(u32 op, u64 seq)
+struct ceph_msg *ceph_create_session_msg(u32 op, u64 seq)
 {
 	struct ceph_msg *msg;
 	struct ceph_mds_session_head *h;
@@ -1163,7 +1176,8 @@ static struct ceph_msg *create_session_msg(u32 op, u64 seq)
 	msg = ceph_msg_new(CEPH_MSG_CLIENT_SESSION, sizeof(*h), GFP_NOFS,
 			   false);
 	if (!msg) {
-		pr_err("create_session_msg ENOMEM creating msg\n");
+		pr_err("ENOMEM creating session %s msg\n",
+		       ceph_session_op_name(op));
 		return NULL;
 	}
 	h = msg->front.iov_base;
@@ -1294,7 +1308,7 @@ static struct ceph_msg *create_session_open_msg(struct ceph_mds_client *mdsc, u6
 	msg = ceph_msg_new(CEPH_MSG_CLIENT_SESSION, sizeof(*h) + extra_bytes,
 			   GFP_NOFS, false);
 	if (!msg) {
-		pr_err("create_session_msg ENOMEM creating msg\n");
+		pr_err("ENOMEM creating session open msg\n");
 		return ERR_PTR(-ENOMEM);
 	}
 	p = msg->front.iov_base;
@@ -1583,14 +1597,39 @@ out:
 	return ret;
 }
 
+static int remove_capsnaps(struct ceph_mds_client *mdsc, struct inode *inode)
+{
+	struct ceph_inode_info *ci = ceph_inode(inode);
+	struct ceph_cap_snap *capsnap;
+	int capsnap_release = 0;
+
+	lockdep_assert_held(&ci->i_ceph_lock);
+
+	dout("removing capsnaps, ci is %p, inode is %p\n", ci, inode);
+
+	while (!list_empty(&ci->i_cap_snaps)) {
+		capsnap = list_first_entry(&ci->i_cap_snaps,
+					   struct ceph_cap_snap, ci_item);
+		__ceph_remove_capsnap(inode, capsnap, NULL, NULL);
+		ceph_put_snap_context(capsnap->context);
+		ceph_put_cap_snap(capsnap);
+		capsnap_release++;
+	}
+	wake_up_all(&ci->i_cap_wq);
+	wake_up_all(&mdsc->cap_flushing_wq);
+	return capsnap_release;
+}
+
 static int remove_session_caps_cb(struct inode *inode, struct ceph_cap *cap,
 				  void *arg)
 {
 	struct ceph_fs_client *fsc = (struct ceph_fs_client *)arg;
+	struct ceph_mds_client *mdsc = fsc->mdsc;
 	struct ceph_inode_info *ci = ceph_inode(inode);
 	LIST_HEAD(to_remove);
 	bool dirty_dropped = false;
 	bool invalidate = false;
+	int capsnap_release = 0;
 
 	dout("removing cap %p, ci is %p, inode is %p\n",
 	     cap, ci, &ci->vfs_inode);
@@ -1598,7 +1637,6 @@ static int remove_session_caps_cb(struct inode *inode, struct ceph_cap *cap,
 	__ceph_remove_cap(cap, false);
 	if (!ci->i_auth_cap) {
 		struct ceph_cap_flush *cf;
-		struct ceph_mds_client *mdsc = fsc->mdsc;
 
 		if (READ_ONCE(fsc->mount_state) >= CEPH_MOUNT_SHUTDOWN) {
 			if (inode->i_data.nrpages > 0)
@@ -1662,6 +1700,9 @@ static int remove_session_caps_cb(struct inode *inode, struct ceph_cap *cap,
 			list_add(&ci->i_prealloc_cap_flush->i_list, &to_remove);
 			ci->i_prealloc_cap_flush = NULL;
 		}
+
+		if (!list_empty(&ci->i_cap_snaps))
+			capsnap_release = remove_capsnaps(mdsc, inode);
 	}
 	spin_unlock(&ci->i_ceph_lock);
 	while (!list_empty(&to_remove)) {
@@ -1678,6 +1719,8 @@ static int remove_session_caps_cb(struct inode *inode, struct ceph_cap *cap,
 		ceph_queue_invalidate(inode);
 	if (dirty_dropped)
 		iput(inode);
+	while (capsnap_release--)
+		iput(inode);
 	return 0;
 }
 
@@ -1803,8 +1846,8 @@ static int send_renew_caps(struct ceph_mds_client *mdsc,
 
 	dout("send_renew_caps to mds%d (%s)\n", session->s_mds,
 		ceph_mds_state_name(state));
-	msg = create_session_msg(CEPH_SESSION_REQUEST_RENEWCAPS,
-				 ++session->s_renew_seq);
+	msg = ceph_create_session_msg(CEPH_SESSION_REQUEST_RENEWCAPS,
+				      ++session->s_renew_seq);
 	if (!msg)
 		return -ENOMEM;
 	ceph_con_send(&session->s_con, msg);
@@ -1818,7 +1861,7 @@ static int send_flushmsg_ack(struct ceph_mds_client *mdsc,
 
 	dout("send_flushmsg_ack to mds%d (%s)s seq %lld\n",
 	     session->s_mds, ceph_session_state_name(session->s_state), seq);
-	msg = create_session_msg(CEPH_SESSION_FLUSHMSG_ACK, seq);
+	msg = ceph_create_session_msg(CEPH_SESSION_FLUSHMSG_ACK, seq);
 	if (!msg)
 		return -ENOMEM;
 	ceph_con_send(&session->s_con, msg);
@@ -1870,7 +1913,8 @@ static int request_close_session(struct ceph_mds_session *session)
 	dout("request_close_session mds%d state %s seq %lld\n",
 	     session->s_mds, ceph_session_state_name(session->s_state),
 	     session->s_seq);
-	msg = create_session_msg(CEPH_SESSION_REQUEST_CLOSE, session->s_seq);
+	msg = ceph_create_session_msg(CEPH_SESSION_REQUEST_CLOSE,
+				      session->s_seq);
 	if (!msg)
 		return -ENOMEM;
 	ceph_con_send(&session->s_con, msg);
@@ -1965,7 +2009,7 @@ static int trim_caps_cb(struct inode *inode, struct ceph_cap *cap, void *arg)
 
 	if (oissued) {
 		/* we aren't the only cap.. just remove us */
-		__ceph_remove_cap(cap, true);
+		ceph_remove_cap(cap, true);
 		(*remaining)--;
 	} else {
 		struct dentry *dentry;
@@ -4150,13 +4194,21 @@ static void check_new_map(struct ceph_mds_client *mdsc,
 			  struct ceph_mdsmap *newmap,
 			  struct ceph_mdsmap *oldmap)
 {
-	int i;
+	int i, j, err;
 	int oldstate, newstate;
 	struct ceph_mds_session *s;
+	unsigned long targets[DIV_ROUND_UP(CEPH_MAX_MDS, sizeof(unsigned long))] = {0};
 
 	dout("check_new_map new %u old %u\n",
 	     newmap->m_epoch, oldmap->m_epoch);
 
+	if (newmap->m_info) {
+		for (i = 0; i < newmap->possible_max_rank; i++) {
+			for (j = 0; j < newmap->m_info[i].num_export_targets; j++)
+				set_bit(newmap->m_info[i].export_targets[j], targets);
+		}
+	}
+
 	for (i = 0; i < oldmap->possible_max_rank && i < mdsc->max_sessions; i++) {
 		if (!mdsc->sessions[i])
 			continue;
@@ -4210,6 +4262,7 @@ static void check_new_map(struct ceph_mds_client *mdsc,
 		if (s->s_state == CEPH_MDS_SESSION_RESTARTING &&
 		    newstate >= CEPH_MDS_STATE_RECONNECT) {
 			mutex_unlock(&mdsc->mutex);
+			clear_bit(i, targets);
 			send_mds_reconnect(mdsc, s);
 			mutex_lock(&mdsc->mutex);
 		}
@@ -4232,6 +4285,51 @@ static void check_new_map(struct ceph_mds_client *mdsc,
 		}
 	}
 
+	/*
+	 * Only open and reconnect sessions that don't exist yet.
+	 */
+	for (i = 0; i < newmap->possible_max_rank; i++) {
+		/*
+		 * In case the import MDS is crashed just after
+		 * the EImportStart journal is flushed, so when
+		 * a standby MDS takes over it and is replaying
+		 * the EImportStart journal the new MDS daemon
+		 * will wait the client to reconnect it, but the
+		 * client may never register/open the session yet.
+		 *
+		 * Will try to reconnect that MDS daemon if the
+		 * rank number is in the export targets array and
+		 * is the up:reconnect state.
+		 */
+		newstate = ceph_mdsmap_get_state(newmap, i);
+		if (!test_bit(i, targets) || newstate != CEPH_MDS_STATE_RECONNECT)
+			continue;
+
+		/*
+		 * The session maybe registered and opened by some
+		 * requests which were choosing random MDSes during
+		 * the mdsc->mutex's unlock/lock gap below in rare
+		 * case. But the related MDS daemon will just queue
+		 * that requests and be still waiting for the client's
+		 * reconnection request in up:reconnect state.
+		 */
+		s = __ceph_lookup_mds_session(mdsc, i);
+		if (likely(!s)) {
+			s = __open_export_target_session(mdsc, i);
+			if (IS_ERR(s)) {
+				err = PTR_ERR(s);
+				pr_err("failed to open export target session, err %d\n",
+				       err);
+				continue;
+			}
+		}
+		dout("send reconnect to export target mds.%d\n", i);
+		mutex_unlock(&mdsc->mutex);
+		send_mds_reconnect(mdsc, s);
+		ceph_put_mds_session(s);
+		mutex_lock(&mdsc->mutex);
+	}
+
 	for (i = 0; i < newmap->possible_max_rank && i < mdsc->max_sessions; i++) {
 		s = mdsc->sessions[i];
 		if (!s)
@@ -4409,24 +4507,12 @@ void ceph_mdsc_lease_send_msg(struct ceph_mds_session *session,
 }
 
 /*
- * lock unlock sessions, to wait ongoing session activities
+ * lock unlock the session, to wait ongoing session activities
  */
-static void lock_unlock_sessions(struct ceph_mds_client *mdsc)
+static void lock_unlock_session(struct ceph_mds_session *s)
 {
-	int i;
-
-	mutex_lock(&mdsc->mutex);
-	for (i = 0; i < mdsc->max_sessions; i++) {
-		struct ceph_mds_session *s = __ceph_lookup_mds_session(mdsc, i);
-		if (!s)
-			continue;
-		mutex_unlock(&mdsc->mutex);
-		mutex_lock(&s->s_mutex);
-		mutex_unlock(&s->s_mutex);
-		ceph_put_mds_session(s);
-		mutex_lock(&mdsc->mutex);
-	}
-	mutex_unlock(&mdsc->mutex);
+	mutex_lock(&s->s_mutex);
+	mutex_unlock(&s->s_mutex);
 }
 
 static void maybe_recover_session(struct ceph_mds_client *mdsc)
@@ -4448,6 +4534,8 @@ static void maybe_recover_session(struct ceph_mds_client *mdsc)
 
 bool check_session_state(struct ceph_mds_session *s)
 {
+	struct ceph_fs_client *fsc = s->s_mdsc->fsc;
+
 	switch (s->s_state) {
 	case CEPH_MDS_SESSION_OPEN:
 		if (s->s_ttl && time_after(jiffies, s->s_ttl)) {
@@ -4456,8 +4544,9 @@ bool check_session_state(struct ceph_mds_session *s)
 		}
 		break;
 	case CEPH_MDS_SESSION_CLOSING:
-		/* Should never reach this when we're unmounting */
-		WARN_ON_ONCE(s->s_ttl);
+		/* Should never reach this when not force unmounting */
+		WARN_ON_ONCE(s->s_ttl &&
+			     READ_ONCE(fsc->mount_state) != CEPH_MOUNT_SHUTDOWN);
 		fallthrough;
 	case CEPH_MDS_SESSION_NEW:
 	case CEPH_MDS_SESSION_RESTARTING:
@@ -4584,21 +4673,12 @@ int ceph_mdsc_init(struct ceph_fs_client *fsc)
 	init_completion(&mdsc->safe_umount_waiters);
 	init_waitqueue_head(&mdsc->session_close_wq);
 	INIT_LIST_HEAD(&mdsc->waiting_for_map);
-	mdsc->sessions = NULL;
-	atomic_set(&mdsc->num_sessions, 0);
-	mdsc->max_sessions = 0;
-	mdsc->stopping = 0;
-	atomic64_set(&mdsc->quotarealms_count, 0);
 	mdsc->quotarealms_inodes = RB_ROOT;
 	mutex_init(&mdsc->quotarealms_inodes_mutex);
-	mdsc->last_snap_seq = 0;
 	init_rwsem(&mdsc->snap_rwsem);
 	mdsc->snap_realms = RB_ROOT;
 	INIT_LIST_HEAD(&mdsc->snap_empty);
-	mdsc->num_snap_realms = 0;
 	spin_lock_init(&mdsc->snap_empty_lock);
-	mdsc->last_tid = 0;
-	mdsc->oldest_tid = 0;
 	mdsc->request_tree = RB_ROOT;
 	INIT_DELAYED_WORK(&mdsc->delayed_work, delayed_work);
 	mdsc->last_renew_caps = jiffies;
@@ -4610,11 +4690,9 @@ int ceph_mdsc_init(struct ceph_fs_client *fsc)
 	mdsc->last_cap_flush_tid = 1;
 	INIT_LIST_HEAD(&mdsc->cap_flush_list);
 	INIT_LIST_HEAD(&mdsc->cap_dirty_migrating);
-	mdsc->num_cap_flushing = 0;
 	spin_lock_init(&mdsc->cap_dirty_lock);
 	init_waitqueue_head(&mdsc->cap_flushing_wq);
 	INIT_WORK(&mdsc->cap_reclaim_work, ceph_cap_reclaim_work);
-	atomic_set(&mdsc->cap_reclaim_pending, 0);
 	err = ceph_metric_init(&mdsc->metric);
 	if (err)
 		goto err_mdsmap;
@@ -4676,6 +4754,30 @@ static void wait_requests(struct ceph_mds_client *mdsc)
 	dout("wait_requests done\n");
 }
 
+void send_flush_mdlog(struct ceph_mds_session *s)
+{
+	struct ceph_msg *msg;
+
+	/*
+	 * Pre-luminous MDS crashes when it sees an unknown session request
+	 */
+	if (!CEPH_HAVE_FEATURE(s->s_con.peer_features, SERVER_LUMINOUS))
+		return;
+
+	mutex_lock(&s->s_mutex);
+	dout("request mdlog flush to mds%d (%s)s seq %lld\n", s->s_mds,
+	     ceph_session_state_name(s->s_state), s->s_seq);
+	msg = ceph_create_session_msg(CEPH_SESSION_REQUEST_FLUSH_MDLOG,
+				      s->s_seq);
+	if (!msg) {
+		pr_err("failed to request mdlog flush to mds%d (%s) seq %lld\n",
+		       s->s_mds, ceph_session_state_name(s->s_state), s->s_seq);
+	} else {
+		ceph_con_send(&s->s_con, msg);
+	}
+	mutex_unlock(&s->s_mutex);
+}
+
 /*
  * called before mount is ro, and before dentries are torn down.
  * (hmm, does this still race with new lookups?)
@@ -4685,7 +4787,8 @@ void ceph_mdsc_pre_umount(struct ceph_mds_client *mdsc)
 	dout("pre_umount\n");
 	mdsc->stopping = 1;
 
-	lock_unlock_sessions(mdsc);
+	ceph_mdsc_iterate_sessions(mdsc, send_flush_mdlog, true);
+	ceph_mdsc_iterate_sessions(mdsc, lock_unlock_session, false);
 	ceph_flush_dirty_caps(mdsc);
 	wait_requests(mdsc);
 
@@ -4912,7 +5015,6 @@ void ceph_mdsc_destroy(struct ceph_fs_client *fsc)
 
 	ceph_metric_destroy(&mdsc->metric);
 
-	flush_delayed_work(&mdsc->metric.delayed_work);
 	fsc->mdsc = NULL;
 	kfree(mdsc);
 	dout("mdsc_destroy %p done\n", mdsc);
diff --git a/fs/ceph/mds_client.h b/fs/ceph/mds_client.h
index 20e42d8b66c6..97c7f7bfa55f 100644
--- a/fs/ceph/mds_client.h
+++ b/fs/ceph/mds_client.h
@@ -522,6 +522,11 @@ static inline void ceph_mdsc_put_request(struct ceph_mds_request *req)
 	kref_put(&req->r_kref, ceph_mdsc_release_request);
 }
 
+extern void send_flush_mdlog(struct ceph_mds_session *s);
+extern void ceph_mdsc_iterate_sessions(struct ceph_mds_client *mdsc,
+				       void (*cb)(struct ceph_mds_session *),
+				       bool check_state);
+extern struct ceph_msg *ceph_create_session_msg(u32 op, u64 seq);
 extern void __ceph_queue_cap_release(struct ceph_mds_session *session,
 				    struct ceph_cap *cap);
 extern void ceph_flush_cap_releases(struct ceph_mds_client *mdsc,
diff --git a/fs/ceph/mdsmap.c b/fs/ceph/mdsmap.c
index 3c444b9cb17b..61d67cbcb367 100644
--- a/fs/ceph/mdsmap.c
+++ b/fs/ceph/mdsmap.c
@@ -122,6 +122,7 @@ struct ceph_mdsmap *ceph_mdsmap_decode(void **p, void *end, bool msgr2)
 	int err;
 	u8 mdsmap_v;
 	u16 mdsmap_ev;
+	u32 target;
 
 	m = kzalloc(sizeof(*m), GFP_NOFS);
 	if (!m)
@@ -260,9 +261,14 @@ struct ceph_mdsmap *ceph_mdsmap_decode(void **p, void *end, bool msgr2)
 						       sizeof(u32), GFP_NOFS);
 			if (!info->export_targets)
 				goto nomem;
-			for (j = 0; j < num_export_targets; j++)
-				info->export_targets[j] =
-				       ceph_decode_32(&pexport_targets);
+			for (j = 0; j < num_export_targets; j++) {
+				target = ceph_decode_32(&pexport_targets);
+				if (target >= m->possible_max_rank) {
+					err = -EIO;
+					goto corrupt;
+				}
+				info->export_targets[j] = target;
+			}
 		} else {
 			info->export_targets = NULL;
 		}
diff --git a/fs/ceph/metric.c b/fs/ceph/metric.c
index 5ac151eb0d49..04d5df29bbbf 100644
--- a/fs/ceph/metric.c
+++ b/fs/ceph/metric.c
@@ -302,6 +302,8 @@ void ceph_metric_destroy(struct ceph_client_metric *m)
 	if (!m)
 		return;
 
+	cancel_delayed_work_sync(&m->delayed_work);
+
 	percpu_counter_destroy(&m->total_inodes);
 	percpu_counter_destroy(&m->opened_inodes);
 	percpu_counter_destroy(&m->i_caps_mis);
@@ -309,8 +311,6 @@ void ceph_metric_destroy(struct ceph_client_metric *m)
 	percpu_counter_destroy(&m->d_lease_mis);
 	percpu_counter_destroy(&m->d_lease_hit);
 
-	cancel_delayed_work_sync(&m->delayed_work);
-
 	ceph_put_mds_session(m->session);
 }
 
diff --git a/fs/ceph/snap.c b/fs/ceph/snap.c
index 15105f9da3fd..b41e6724c591 100644
--- a/fs/ceph/snap.c
+++ b/fs/ceph/snap.c
@@ -849,6 +849,43 @@ static void flush_snaps(struct ceph_mds_client *mdsc)
 	dout("flush_snaps done\n");
 }
 
+/**
+ * ceph_change_snap_realm - change the snap_realm for an inode
+ * @inode: inode to move to new snap realm
+ * @realm: new realm to move inode into (may be NULL)
+ *
+ * Detach an inode from its old snaprealm (if any) and attach it to
+ * the new snaprealm (if any). The old snap realm reference held by
+ * the inode is put. If realm is non-NULL, then the caller's reference
+ * to it is taken over by the inode.
+ */
+void ceph_change_snap_realm(struct inode *inode, struct ceph_snap_realm *realm)
+{
+	struct ceph_inode_info *ci = ceph_inode(inode);
+	struct ceph_mds_client *mdsc = ceph_inode_to_client(inode)->mdsc;
+	struct ceph_snap_realm *oldrealm = ci->i_snap_realm;
+
+	lockdep_assert_held(&ci->i_ceph_lock);
+
+	if (oldrealm) {
+		spin_lock(&oldrealm->inodes_with_caps_lock);
+		list_del_init(&ci->i_snap_realm_item);
+		if (oldrealm->ino == ci->i_vino.ino)
+			oldrealm->inode = NULL;
+		spin_unlock(&oldrealm->inodes_with_caps_lock);
+		ceph_put_snap_realm(mdsc, oldrealm);
+	}
+
+	ci->i_snap_realm = realm;
+
+	if (realm) {
+		spin_lock(&realm->inodes_with_caps_lock);
+		list_add(&ci->i_snap_realm_item, &realm->inodes_with_caps);
+		if (realm->ino == ci->i_vino.ino)
+			realm->inode = inode;
+		spin_unlock(&realm->inodes_with_caps_lock);
+	}
+}
 
 /*
  * Handle a snap notification from the MDS.
@@ -935,7 +972,6 @@ void ceph_handle_snap(struct ceph_mds_client *mdsc,
 			};
 			struct inode *inode = ceph_find_inode(sb, vino);
 			struct ceph_inode_info *ci;
-			struct ceph_snap_realm *oldrealm;
 
 			if (!inode)
 				continue;
@@ -960,27 +996,10 @@ void ceph_handle_snap(struct ceph_mds_client *mdsc,
 			}
 			dout(" will move %p to split realm %llx %p\n",
 			     inode, realm->ino, realm);
-			/*
-			 * Move the inode to the new realm
-			 */
-			oldrealm = ci->i_snap_realm;
-			spin_lock(&oldrealm->inodes_with_caps_lock);
-			list_del_init(&ci->i_snap_realm_item);
-			spin_unlock(&oldrealm->inodes_with_caps_lock);
-
-			spin_lock(&realm->inodes_with_caps_lock);
-			list_add(&ci->i_snap_realm_item,
-				 &realm->inodes_with_caps);
-			ci->i_snap_realm = realm;
-			if (realm->ino == ci->i_vino.ino)
-                                realm->inode = inode;
-			spin_unlock(&realm->inodes_with_caps_lock);
-
-			spin_unlock(&ci->i_ceph_lock);
 
 			ceph_get_snap_realm(mdsc, realm);
-			ceph_put_snap_realm(mdsc, oldrealm);
-
+			ceph_change_snap_realm(inode, realm);
+			spin_unlock(&ci->i_ceph_lock);
 			iput(inode);
 			continue;
 
diff --git a/fs/ceph/strings.c b/fs/ceph/strings.c
index 4a79f3632260..573bb9556fb5 100644
--- a/fs/ceph/strings.c
+++ b/fs/ceph/strings.c
@@ -46,6 +46,7 @@ const char *ceph_session_op_name(int op)
 	case CEPH_SESSION_FLUSHMSG_ACK: return "flushmsg_ack";
 	case CEPH_SESSION_FORCE_RO: return "force_ro";
 	case CEPH_SESSION_REJECT: return "reject";
+	case CEPH_SESSION_REQUEST_FLUSH_MDLOG: return "flush_mdlog";
 	}
 	return "???";
 }
diff --git a/fs/ceph/super.h b/fs/ceph/super.h
index b1a363641beb..a40eb14c282a 100644
--- a/fs/ceph/super.h
+++ b/fs/ceph/super.h
@@ -418,7 +418,6 @@ struct ceph_inode_info {
 		struct ceph_snap_realm *i_snap_realm; /* snap realm (if caps) */
 		struct ceph_snapid_map *i_snapid_map; /* snapid -> dev_t */
 	};
-	int i_snap_realm_counter; /* snap realm (if caps) */
 	struct list_head i_snap_realm_item;
 	struct list_head i_snap_flush_item;
 	struct timespec64 i_btime;
@@ -929,6 +928,7 @@ extern void ceph_put_snap_realm(struct ceph_mds_client *mdsc,
 extern int ceph_update_snap_trace(struct ceph_mds_client *m,
 				  void *p, void *e, bool deletion,
 				  struct ceph_snap_realm **realm_ret);
+void ceph_change_snap_realm(struct inode *inode, struct ceph_snap_realm *realm);
 extern void ceph_handle_snap(struct ceph_mds_client *mdsc,
 			     struct ceph_mds_session *session,
 			     struct ceph_msg *msg);
@@ -1088,7 +1088,7 @@ void ceph_release_acl_sec_ctx(struct ceph_acl_sec_ctx *as_ctx);
 /* acl.c */
 #ifdef CONFIG_CEPH_FS_POSIX_ACL
 
-struct posix_acl *ceph_get_acl(struct inode *, int);
+struct posix_acl *ceph_get_acl(struct inode *, int, bool);
 int ceph_set_acl(struct user_namespace *mnt_userns,
 		 struct inode *inode, struct posix_acl *acl, int type);
 int ceph_pre_init_acls(struct inode *dir, umode_t *mode,
@@ -1138,6 +1138,7 @@ extern void ceph_add_cap(struct inode *inode,
 			 unsigned cap, unsigned seq, u64 realmino, int flags,
 			 struct ceph_cap **new_cap);
 extern void __ceph_remove_cap(struct ceph_cap *cap, bool queue_release);
+extern void ceph_remove_cap(struct ceph_cap *cap, bool queue_release);
 extern void __ceph_remove_caps(struct ceph_inode_info *ci);
 extern void ceph_put_cap(struct ceph_mds_client *mdsc,
 			 struct ceph_cap *cap);
@@ -1163,6 +1164,12 @@ extern void ceph_put_cap_refs_no_check_caps(struct ceph_inode_info *ci,
 					    int had);
 extern void ceph_put_wrbuffer_cap_refs(struct ceph_inode_info *ci, int nr,
 				       struct ceph_snap_context *snapc);
+extern void __ceph_remove_capsnap(struct inode *inode,
+				  struct ceph_cap_snap *capsnap,
+				  bool *wake_ci, bool *wake_mdsc);
+extern void ceph_remove_capsnap(struct inode *inode,
+				struct ceph_cap_snap *capsnap,
+				bool *wake_ci, bool *wake_mdsc);
 extern void ceph_flush_snaps(struct ceph_inode_info *ci,
 			     struct ceph_mds_session **psession);
 extern bool __ceph_should_report_size(struct ceph_inode_info *ci);
diff --git a/fs/ceph/xattr.c b/fs/ceph/xattr.c
index 1242db8d3444..159a1ffa4f4b 100644
--- a/fs/ceph/xattr.c
+++ b/fs/ceph/xattr.c
@@ -340,6 +340,18 @@ static ssize_t ceph_vxattrcb_caps(struct ceph_inode_info *ci, char *val,
 			      ceph_cap_string(issued), issued);
 }
 
+static ssize_t ceph_vxattrcb_auth_mds(struct ceph_inode_info *ci,
+				       char *val, size_t size)
+{
+	int ret;
+
+	spin_lock(&ci->i_ceph_lock);
+	ret = ceph_fmt_xattr(val, size, "%d",
+			     ci->i_auth_cap ? ci->i_auth_cap->session->s_mds : -1);
+	spin_unlock(&ci->i_ceph_lock);
+	return ret;
+}
+
 #define CEPH_XATTR_NAME(_type, _name)	XATTR_CEPH_PREFIX #_type "." #_name
 #define CEPH_XATTR_NAME2(_type, _name, _name2)	\
 	XATTR_CEPH_PREFIX #_type "." #_name "." #_name2
@@ -473,6 +485,13 @@ static struct ceph_vxattr ceph_common_vxattrs[] = {
 		.exists_cb = NULL,
 		.flags = VXATTR_FLAG_READONLY,
 	},
+	{
+		.name = "ceph.auth_mds",
+		.name_size = sizeof("ceph.auth_mds"),
+		.getxattr_cb = ceph_vxattrcb_auth_mds,
+		.exists_cb = NULL,
+		.flags = VXATTR_FLAG_READONLY,
+	},
 	{ .name = NULL, 0 }	/* Required table terminator */
 };
 
diff --git a/fs/cifs/cifsencrypt.c b/fs/cifs/cifsencrypt.c
index 6679e07e533e..2e6f40344037 100644
--- a/fs/cifs/cifsencrypt.c
+++ b/fs/cifs/cifsencrypt.c
@@ -22,7 +22,7 @@
 #include <linux/random.h>
 #include <linux/highmem.h>
 #include <linux/fips.h>
-#include "../cifs_common/arc4.h"
+#include "../smbfs_common/arc4.h"
 #include <crypto/aead.h>
 
 int __cifs_calc_signature(struct smb_rqst *rqst,
diff --git a/fs/cifs/cifspdu.h b/fs/cifs/cifspdu.h
index dc920e206336..98e8e5aa0613 100644
--- a/fs/cifs/cifspdu.h
+++ b/fs/cifs/cifspdu.h
@@ -12,7 +12,7 @@
 
 #include <net/sock.h>
 #include <asm/unaligned.h>
-#include "smbfsctl.h"
+#include "../smbfs_common/smbfsctl.h"
 
 #define CIFS_PROT   0
 #define POSIX_PROT  (CIFS_PROT+1)
diff --git a/fs/cifs/smb2ops.c b/fs/cifs/smb2ops.c
index ddc0e8f97872..bda606dc72b1 100644
--- a/fs/cifs/smb2ops.c
+++ b/fs/cifs/smb2ops.c
@@ -689,13 +689,19 @@ smb2_close_cached_fid(struct kref *ref)
 		cifs_dbg(FYI, "clear cached root file handle\n");
 		SMB2_close(0, cfid->tcon, cfid->fid->persistent_fid,
 			   cfid->fid->volatile_fid);
-		cfid->is_valid = false;
-		cfid->file_all_info_is_valid = false;
-		cfid->has_lease = false;
-		if (cfid->dentry) {
-			dput(cfid->dentry);
-			cfid->dentry = NULL;
-		}
+	}
+
+	/*
+	 * We only check validity above to send SMB2_close,
+	 * but we still need to invalidate these entries
+	 * when this function is called
+	 */
+	cfid->is_valid = false;
+	cfid->file_all_info_is_valid = false;
+	cfid->has_lease = false;
+	if (cfid->dentry) {
+		dput(cfid->dentry);
+		cfid->dentry = NULL;
 	}
 }
 
diff --git a/fs/cifs/smbencrypt.c b/fs/cifs/smbencrypt.c
index 10047cc55286..4a0487753869 100644
--- a/fs/cifs/smbencrypt.c
+++ b/fs/cifs/smbencrypt.c
@@ -24,7 +24,7 @@
 #include "cifsglob.h"
 #include "cifs_debug.h"
 #include "cifsproto.h"
-#include "../cifs_common/md4.h"
+#include "../smbfs_common/md4.h"
 
 #ifndef false
 #define false 0
diff --git a/fs/configfs/dir.c b/fs/configfs/dir.c
index ac5e0c0e9181..1466b5d01cbb 100644
--- a/fs/configfs/dir.c
+++ b/fs/configfs/dir.c
@@ -45,7 +45,7 @@ static void configfs_d_iput(struct dentry * dentry,
 		/*
 		 * Set sd->s_dentry to null only when this dentry is the one
 		 * that is going to be killed.  Otherwise configfs_d_iput may
-		 * run just after configfs_attach_attr and set sd->s_dentry to
+		 * run just after configfs_lookup and set sd->s_dentry to
 		 * NULL even it's still in use.
 		 */
 		if (sd->s_dentry == dentry)
@@ -417,44 +417,16 @@ static void configfs_remove_dir(struct config_item * item)
 	dput(dentry);
 }
 
-
-/* attaches attribute's configfs_dirent to the dentry corresponding to the
- * attribute file
- */
-static int configfs_attach_attr(struct configfs_dirent * sd, struct dentry * dentry)
-{
-	struct configfs_attribute * attr = sd->s_element;
-	struct inode *inode;
-
-	spin_lock(&configfs_dirent_lock);
-	dentry->d_fsdata = configfs_get(sd);
-	sd->s_dentry = dentry;
-	spin_unlock(&configfs_dirent_lock);
-
-	inode = configfs_create(dentry, (attr->ca_mode & S_IALLUGO) | S_IFREG);
-	if (IS_ERR(inode)) {
-		configfs_put(sd);
-		return PTR_ERR(inode);
-	}
-	if (sd->s_type & CONFIGFS_ITEM_BIN_ATTR) {
-		inode->i_size = 0;
-		inode->i_fop = &configfs_bin_file_operations;
-	} else {
-		inode->i_size = PAGE_SIZE;
-		inode->i_fop = &configfs_file_operations;
-	}
-	d_add(dentry, inode);
-	return 0;
-}
-
 static struct dentry * configfs_lookup(struct inode *dir,
 				       struct dentry *dentry,
 				       unsigned int flags)
 {
 	struct configfs_dirent * parent_sd = dentry->d_parent->d_fsdata;
 	struct configfs_dirent * sd;
-	int found = 0;
-	int err;
+	struct inode *inode = NULL;
+
+	if (dentry->d_name.len > NAME_MAX)
+		return ERR_PTR(-ENAMETOOLONG);
 
 	/*
 	 * Fake invisibility if dir belongs to a group/default groups hierarchy
@@ -464,36 +436,39 @@ static struct dentry * configfs_lookup(struct inode *dir,
 	 * not complete their initialization, since the dentries of the
 	 * attributes won't be instantiated.
 	 */
-	err = -ENOENT;
 	if (!configfs_dirent_is_ready(parent_sd))
-		goto out;
+		return ERR_PTR(-ENOENT);
 
+	spin_lock(&configfs_dirent_lock);
 	list_for_each_entry(sd, &parent_sd->s_children, s_sibling) {
-		if (sd->s_type & CONFIGFS_NOT_PINNED) {
-			const unsigned char * name = configfs_get_name(sd);
+		if ((sd->s_type & CONFIGFS_NOT_PINNED) &&
+		    !strcmp(configfs_get_name(sd), dentry->d_name.name)) {
+			struct configfs_attribute *attr = sd->s_element;
+			umode_t mode = (attr->ca_mode & S_IALLUGO) | S_IFREG;
 
-			if (strcmp(name, dentry->d_name.name))
-				continue;
+			dentry->d_fsdata = configfs_get(sd);
+			sd->s_dentry = dentry;
+			spin_unlock(&configfs_dirent_lock);
 
-			found = 1;
-			err = configfs_attach_attr(sd, dentry);
-			break;
+			inode = configfs_create(dentry, mode);
+			if (IS_ERR(inode)) {
+				configfs_put(sd);
+				return ERR_CAST(inode);
+			}
+			if (sd->s_type & CONFIGFS_ITEM_BIN_ATTR) {
+				inode->i_size = 0;
+				inode->i_fop = &configfs_bin_file_operations;
+			} else {
+				inode->i_size = PAGE_SIZE;
+				inode->i_fop = &configfs_file_operations;
+			}
+			goto done;
 		}
 	}
-
-	if (!found) {
-		/*
-		 * If it doesn't exist and it isn't a NOT_PINNED item,
-		 * it must be negative.
-		 */
-		if (dentry->d_name.len > NAME_MAX)
-			return ERR_PTR(-ENAMETOOLONG);
-		d_add(dentry, NULL);
-		return NULL;
-	}
-
-out:
-	return ERR_PTR(err);
+	spin_unlock(&configfs_dirent_lock);
+done:
+	d_add(dentry, inode);
+	return NULL;
 }
 
 /*
diff --git a/fs/coredump.c b/fs/coredump.c
index 07afb5ddb1c4..3224dee44d30 100644
--- a/fs/coredump.c
+++ b/fs/coredump.c
@@ -782,10 +782,17 @@ void do_coredump(const kernel_siginfo_t *siginfo)
 		 * filesystem.
 		 */
 		mnt_userns = file_mnt_user_ns(cprm.file);
-		if (!uid_eq(i_uid_into_mnt(mnt_userns, inode), current_fsuid()))
+		if (!uid_eq(i_uid_into_mnt(mnt_userns, inode),
+			    current_fsuid())) {
+			pr_info_ratelimited("Core dump to %s aborted: cannot preserve file owner\n",
+					    cn.corename);
 			goto close_fail;
-		if ((inode->i_mode & 0677) != 0600)
+		}
+		if ((inode->i_mode & 0677) != 0600) {
+			pr_info_ratelimited("Core dump to %s aborted: cannot preserve file permissions\n",
+					    cn.corename);
 			goto close_fail;
+		}
 		if (!(cprm.file->f_mode & FMODE_CAN_WRITE))
 			goto close_fail;
 		if (do_truncate(mnt_userns, cprm.file->f_path.dentry,
@@ -1127,8 +1134,10 @@ int dump_vma_snapshot(struct coredump_params *cprm, int *vma_count,
 
 	mmap_write_unlock(mm);
 
-	if (WARN_ON(i != *vma_count))
+	if (WARN_ON(i != *vma_count)) {
+		kvfree(*vma_meta);
 		return -EFAULT;
+	}
 
 	*vma_data_size_ptr = vma_data_size;
 	return 0;
diff --git a/fs/d_path.c b/fs/d_path.c
index 23a53f7b5c71..cd60c7535181 100644
--- a/fs/d_path.c
+++ b/fs/d_path.c
@@ -22,13 +22,57 @@ static char *extract_string(struct prepend_buffer *p)
 	return ERR_PTR(-ENAMETOOLONG);
 }
 
-static void prepend(struct prepend_buffer *p, const char *str, int namelen)
+static bool prepend_char(struct prepend_buffer *p, unsigned char c)
 {
-	p->len -= namelen;
-	if (likely(p->len >= 0)) {
-		p->buf -= namelen;
-		memcpy(p->buf, str, namelen);
+	if (likely(p->len > 0)) {
+		p->len--;
+		*--p->buf = c;
+		return true;
+	}
+	p->len = -1;
+	return false;
+}
+
+/*
+ * The source of the prepend data can be an optimistoc load
+ * of a dentry name and length. And because we don't hold any
+ * locks, the length and the pointer to the name may not be
+ * in sync if a concurrent rename happens, and the kernel
+ * copy might fault as a result.
+ *
+ * The end result will correct itself when we check the
+ * rename sequence count, but we need to be able to handle
+ * the fault gracefully.
+ */
+static bool prepend_copy(void *dst, const void *src, int len)
+{
+	if (unlikely(copy_from_kernel_nofault(dst, src, len))) {
+		memset(dst, 'x', len);
+		return false;
 	}
+	return true;
+}
+
+static bool prepend(struct prepend_buffer *p, const char *str, int namelen)
+{
+	// Already overflowed?
+	if (p->len < 0)
+		return false;
+
+	// Will overflow?
+	if (p->len < namelen) {
+		// Fill as much as possible from the end of the name
+		str += namelen - p->len;
+		p->buf -= p->len;
+		prepend_copy(p->buf, str, p->len);
+		p->len = -1;
+		return false;
+	}
+
+	// Fits fully
+	p->len -= namelen;
+	p->buf -= namelen;
+	return prepend_copy(p->buf, str, namelen);
 }
 
 /**
@@ -40,32 +84,21 @@ static void prepend(struct prepend_buffer *p, const char *str, int namelen)
  * With RCU path tracing, it may race with d_move(). Use READ_ONCE() to
  * make sure that either the old or the new name pointer and length are
  * fetched. However, there may be mismatch between length and pointer.
- * The length cannot be trusted, we need to copy it byte-by-byte until
- * the length is reached or a null byte is found. It also prepends "/" at
+ * But since the length cannot be trusted, we need to copy the name very
+ * carefully when doing the prepend_copy(). It also prepends "/" at
  * the beginning of the name. The sequence number check at the caller will
  * retry it again when a d_move() does happen. So any garbage in the buffer
  * due to mismatched pointer and length will be discarded.
  *
- * Load acquire is needed to make sure that we see that terminating NUL.
+ * Load acquire is needed to make sure that we see the new name data even
+ * if we might get the length wrong.
  */
 static bool prepend_name(struct prepend_buffer *p, const struct qstr *name)
 {
 	const char *dname = smp_load_acquire(&name->name); /* ^^^ */
 	u32 dlen = READ_ONCE(name->len);
-	char *s;
 
-	p->len -= dlen + 1;
-	if (unlikely(p->len < 0))
-		return false;
-	s = p->buf -= dlen + 1;
-	*s++ = '/';
-	while (dlen--) {
-		char c = *dname++;
-		if (!c)
-			break;
-		*s++ = c;
-	}
-	return true;
+	return prepend(p, dname, dlen) && prepend_char(p, '/');
 }
 
 static int __prepend_path(const struct dentry *dentry, const struct mount *mnt,
@@ -158,7 +191,7 @@ restart:
 		b = *p;
 
 	if (b.len == p->len)
-		prepend(&b, "/", 1);
+		prepend_char(&b, '/');
 
 	*p = b;
 	return error;
@@ -186,7 +219,7 @@ char *__d_path(const struct path *path,
 {
 	DECLARE_BUFFER(b, buf, buflen);
 
-	prepend(&b, "", 1);
+	prepend_char(&b, 0);
 	if (unlikely(prepend_path(path, root, &b) > 0))
 		return NULL;
 	return extract_string(&b);
@@ -198,7 +231,7 @@ char *d_absolute_path(const struct path *path,
 	struct path root = {};
 	DECLARE_BUFFER(b, buf, buflen);
 
-	prepend(&b, "", 1);
+	prepend_char(&b, 0);
 	if (unlikely(prepend_path(path, &root, &b) > 1))
 		return ERR_PTR(-EINVAL);
 	return extract_string(&b);
@@ -255,7 +288,7 @@ char *d_path(const struct path *path, char *buf, int buflen)
 	if (unlikely(d_unlinked(path->dentry)))
 		prepend(&b, " (deleted)", 11);
 	else
-		prepend(&b, "", 1);
+		prepend_char(&b, 0);
 	prepend_path(path, &root, &b);
 	rcu_read_unlock();
 
@@ -290,7 +323,7 @@ char *simple_dname(struct dentry *dentry, char *buffer, int buflen)
 	/* these dentries are never renamed, so d_lock is not needed */
 	prepend(&b, " (deleted)", 11);
 	prepend(&b, dentry->d_name.name, dentry->d_name.len);
-	prepend(&b, "/", 1);
+	prepend_char(&b, '/');
 	return extract_string(&b);
 }
 
@@ -324,7 +357,7 @@ restart:
 	}
 	done_seqretry(&rename_lock, seq);
 	if (b.len == p->len)
-		prepend(&b, "/", 1);
+		prepend_char(&b, '/');
 	return extract_string(&b);
 }
 
@@ -332,7 +365,7 @@ char *dentry_path_raw(const struct dentry *dentry, char *buf, int buflen)
 {
 	DECLARE_BUFFER(b, buf, buflen);
 
-	prepend(&b, "", 1);
+	prepend_char(&b, 0);
 	return __dentry_path(dentry, &b);
 }
 EXPORT_SYMBOL(dentry_path_raw);
@@ -344,7 +377,7 @@ char *dentry_path(const struct dentry *dentry, char *buf, int buflen)
 	if (unlikely(d_unlinked(dentry)))
 		prepend(&b, "//deleted", 10);
 	else
-		prepend(&b, "", 1);
+		prepend_char(&b, 0);
 	return __dentry_path(dentry, &b);
 }
 
@@ -397,7 +430,7 @@ SYSCALL_DEFINE2(getcwd, char __user *, buf, unsigned long, size)
 		unsigned len;
 		DECLARE_BUFFER(b, page, PATH_MAX);
 
-		prepend(&b, "", 1);
+		prepend_char(&b, 0);
 		if (unlikely(prepend_path(&pwd, &root, &b) > 0))
 			prepend(&b, "(unreachable)", 13);
 		rcu_read_unlock();
diff --git a/fs/dlm/dir.c b/fs/dlm/dir.c
index 10c36ae1a8f9..45ebbe602bbf 100644
--- a/fs/dlm/dir.c
+++ b/fs/dlm/dir.c
@@ -85,8 +85,10 @@ int dlm_recover_directory(struct dlm_ls *ls)
 		for (;;) {
 			int left;
 			error = dlm_recovery_stopped(ls);
-			if (error)
+			if (error) {
+				error = -EINTR;
 				goto out_free;
+			}
 
 			error = dlm_rcom_names(ls, memb->nodeid,
 					       last_name, last_len);
diff --git a/fs/dlm/dlm_internal.h b/fs/dlm/dlm_internal.h
index 91d1ca3a121a..5f57538b5d45 100644
--- a/fs/dlm/dlm_internal.h
+++ b/fs/dlm/dlm_internal.h
@@ -468,7 +468,7 @@ struct dlm_rcom {
 struct dlm_opt_header {
 	uint16_t	t_type;
 	uint16_t	t_length;
-	uint32_t	o_pad;
+	uint32_t	t_pad;
 	/* need to be 8 byte aligned */
 	char		t_value[];
 };
diff --git a/fs/dlm/lockspace.c b/fs/dlm/lockspace.c
index d71aba8c3e64..10eddfa6c3d7 100644
--- a/fs/dlm/lockspace.c
+++ b/fs/dlm/lockspace.c
@@ -498,7 +498,7 @@ static int new_lockspace(const char *name, const char *cluster,
 	ls->ls_exflags = (flags & ~(DLM_LSFL_TIMEWARN | DLM_LSFL_FS |
 				    DLM_LSFL_NEWEXCL));
 
-	size = dlm_config.ci_rsbtbl_size;
+	size = READ_ONCE(dlm_config.ci_rsbtbl_size);
 	ls->ls_rsbtbl_size = size;
 
 	ls->ls_rsbtbl = vmalloc(array_size(size, sizeof(struct dlm_rsbtable)));
@@ -793,6 +793,7 @@ static int release_lockspace(struct dlm_ls *ls, int force)
 
 	if (ls_count == 1) {
 		dlm_scand_stop();
+		dlm_clear_members(ls);
 		dlm_midcomms_shutdown();
 	}
 
diff --git a/fs/dlm/lowcomms.c b/fs/dlm/lowcomms.c
index 0ea9ae35da0b..8f715c620e1f 100644
--- a/fs/dlm/lowcomms.c
+++ b/fs/dlm/lowcomms.c
@@ -84,9 +84,7 @@ struct connection {
 	struct list_head writequeue;  /* List of outgoing writequeue_entries */
 	spinlock_t writequeue_lock;
 	atomic_t writequeue_cnt;
-	void (*connect_action) (struct connection *);	/* What to do to connect */
-	void (*shutdown_action)(struct connection *con); /* What to do to shutdown */
-	bool (*eof_condition)(struct connection *con); /* What to do to eof check */
+	struct mutex wq_alloc;
 	int retries;
 #define MAX_CONNECT_RETRIES 3
 	struct hlist_node list;
@@ -145,6 +143,24 @@ struct dlm_node_addr {
 	struct sockaddr_storage *addr[DLM_MAX_ADDR_COUNT];
 };
 
+struct dlm_proto_ops {
+	bool try_new_addr;
+	const char *name;
+	int proto;
+
+	int (*connect)(struct connection *con, struct socket *sock,
+		       struct sockaddr *addr, int addr_len);
+	void (*sockopts)(struct socket *sock);
+	int (*bind)(struct socket *sock);
+	int (*listen_validate)(void);
+	void (*listen_sockopts)(struct socket *sock);
+	int (*listen_bind)(struct socket *sock);
+	/* What to do to shutdown */
+	void (*shutdown_action)(struct connection *con);
+	/* What to do to eof check */
+	bool (*eof_condition)(struct connection *con);
+};
+
 static struct listen_sock_callbacks {
 	void (*sk_error_report)(struct sock *);
 	void (*sk_data_ready)(struct sock *);
@@ -168,12 +184,26 @@ static struct hlist_head connection_hash[CONN_HASH_SIZE];
 static DEFINE_SPINLOCK(connections_lock);
 DEFINE_STATIC_SRCU(connections_srcu);
 
+static const struct dlm_proto_ops *dlm_proto_ops;
+
 static void process_recv_sockets(struct work_struct *work);
 static void process_send_sockets(struct work_struct *work);
 
-static void sctp_connect_to_sock(struct connection *con);
-static void tcp_connect_to_sock(struct connection *con);
-static void dlm_tcp_shutdown(struct connection *con);
+/* need to held writequeue_lock */
+static struct writequeue_entry *con_next_wq(struct connection *con)
+{
+	struct writequeue_entry *e;
+
+	if (list_empty(&con->writequeue))
+		return NULL;
+
+	e = list_first_entry(&con->writequeue, struct writequeue_entry,
+			     list);
+	if (e->len == 0)
+		return NULL;
+
+	return e;
+}
 
 static struct connection *__find_con(int nodeid, int r)
 {
@@ -208,20 +238,6 @@ static int dlm_con_init(struct connection *con, int nodeid)
 	INIT_WORK(&con->rwork, process_recv_sockets);
 	init_waitqueue_head(&con->shutdown_wait);
 
-	switch (dlm_config.ci_protocol) {
-	case DLM_PROTO_TCP:
-		con->connect_action = tcp_connect_to_sock;
-		con->shutdown_action = dlm_tcp_shutdown;
-		con->eof_condition = tcp_eof_condition;
-		break;
-	case DLM_PROTO_SCTP:
-		con->connect_action = sctp_connect_to_sock;
-		break;
-	default:
-		kfree(con->rx_buf);
-		return -EINVAL;
-	}
-
 	return 0;
 }
 
@@ -249,6 +265,8 @@ static struct connection *nodeid2con(int nodeid, gfp_t alloc)
 		return NULL;
 	}
 
+	mutex_init(&con->wq_alloc);
+
 	spin_lock(&connections_lock);
 	/* Because multiple workqueues/threads calls this function it can
 	 * race on multiple cpu's. Instead of locking hot path __find_con()
@@ -583,8 +601,7 @@ static void lowcomms_error_report(struct sock *sk)
 		goto out;
 
 	orig_report = listen_sock.sk_error_report;
-	if (con->sock == NULL ||
-	    kernel_getpeername(con->sock, (struct sockaddr *)&saddr) < 0) {
+	if (kernel_getpeername(sk->sk_socket, (struct sockaddr *)&saddr) < 0) {
 		printk_ratelimited(KERN_ERR "dlm: node %d: socket error "
 				   "sending to node %d, port %d, "
 				   "sk_err=%d/%d\n", dlm_our_nodeid(),
@@ -801,6 +818,7 @@ static void close_connection(struct connection *con, bool and_other,
 
 	con->rx_leftover = 0;
 	con->retries = 0;
+	clear_bit(CF_APP_LIMITED, &con->flags);
 	clear_bit(CF_CONNECTED, &con->flags);
 	clear_bit(CF_DELAY_CONNECT, &con->flags);
 	clear_bit(CF_RECONNECT, &con->flags);
@@ -877,7 +895,6 @@ static int con_realloc_receive_buf(struct connection *con, int newlen)
 /* Data received from remote end */
 static int receive_from_sock(struct connection *con)
 {
-	int call_again_soon = 0;
 	struct msghdr msg;
 	struct kvec iov;
 	int ret, buflen;
@@ -897,41 +914,40 @@ static int receive_from_sock(struct connection *con)
 			goto out_resched;
 	}
 
-	/* calculate new buffer parameter regarding last receive and
-	 * possible leftover bytes
-	 */
-	iov.iov_base = con->rx_buf + con->rx_leftover;
-	iov.iov_len = con->rx_buflen - con->rx_leftover;
-
-	memset(&msg, 0, sizeof(msg));
-	msg.msg_flags = MSG_DONTWAIT | MSG_NOSIGNAL;
-	ret = kernel_recvmsg(con->sock, &msg, &iov, 1, iov.iov_len,
-			     msg.msg_flags);
-	if (ret <= 0)
-		goto out_close;
-	else if (ret == iov.iov_len)
-		call_again_soon = 1;
-
-	/* new buflen according readed bytes and leftover from last receive */
-	buflen = ret + con->rx_leftover;
-	ret = dlm_process_incoming_buffer(con->nodeid, con->rx_buf, buflen);
-	if (ret < 0)
-		goto out_close;
+	for (;;) {
+		/* calculate new buffer parameter regarding last receive and
+		 * possible leftover bytes
+		 */
+		iov.iov_base = con->rx_buf + con->rx_leftover;
+		iov.iov_len = con->rx_buflen - con->rx_leftover;
+
+		memset(&msg, 0, sizeof(msg));
+		msg.msg_flags = MSG_DONTWAIT | MSG_NOSIGNAL;
+		ret = kernel_recvmsg(con->sock, &msg, &iov, 1, iov.iov_len,
+				     msg.msg_flags);
+		if (ret == -EAGAIN)
+			break;
+		else if (ret <= 0)
+			goto out_close;
 
-	/* calculate leftover bytes from process and put it into begin of
-	 * the receive buffer, so next receive we have the full message
-	 * at the start address of the receive buffer.
-	 */
-	con->rx_leftover = buflen - ret;
-	if (con->rx_leftover) {
-		memmove(con->rx_buf, con->rx_buf + ret,
-			con->rx_leftover);
-		call_again_soon = true;
+		/* new buflen according readed bytes and leftover from last receive */
+		buflen = ret + con->rx_leftover;
+		ret = dlm_process_incoming_buffer(con->nodeid, con->rx_buf, buflen);
+		if (ret < 0)
+			goto out_close;
+
+		/* calculate leftover bytes from process and put it into begin of
+		 * the receive buffer, so next receive we have the full message
+		 * at the start address of the receive buffer.
+		 */
+		con->rx_leftover = buflen - ret;
+		if (con->rx_leftover) {
+			memmove(con->rx_buf, con->rx_buf + ret,
+				con->rx_leftover);
+		}
 	}
 
-	if (call_again_soon)
-		goto out_resched;
-
+	dlm_midcomms_receive_done(con->nodeid);
 	mutex_unlock(&con->sock_mutex);
 	return 0;
 
@@ -946,7 +962,8 @@ out_close:
 		log_print("connection %p got EOF from %d",
 			  con, con->nodeid);
 
-		if (con->eof_condition && con->eof_condition(con)) {
+		if (dlm_proto_ops->eof_condition &&
+		    dlm_proto_ops->eof_condition(con)) {
 			set_bit(CF_EOF, &con->flags);
 			mutex_unlock(&con->sock_mutex);
 		} else {
@@ -1134,242 +1151,6 @@ static int sctp_bind_addrs(struct socket *sock, uint16_t port)
 	return result;
 }
 
-/* Initiate an SCTP association.
-   This is a special case of send_to_sock() in that we don't yet have a
-   peeled-off socket for this association, so we use the listening socket
-   and add the primary IP address of the remote node.
- */
-static void sctp_connect_to_sock(struct connection *con)
-{
-	struct sockaddr_storage daddr;
-	int result;
-	int addr_len;
-	struct socket *sock;
-	unsigned int mark;
-
-	mutex_lock(&con->sock_mutex);
-
-	/* Some odd races can cause double-connects, ignore them */
-	if (con->retries++ > MAX_CONNECT_RETRIES)
-		goto out;
-
-	if (con->sock) {
-		log_print("node %d already connected.", con->nodeid);
-		goto out;
-	}
-
-	memset(&daddr, 0, sizeof(daddr));
-	result = nodeid_to_addr(con->nodeid, &daddr, NULL, true, &mark);
-	if (result < 0) {
-		log_print("no address for nodeid %d", con->nodeid);
-		goto out;
-	}
-
-	/* Create a socket to communicate with */
-	result = sock_create_kern(&init_net, dlm_local_addr[0]->ss_family,
-				  SOCK_STREAM, IPPROTO_SCTP, &sock);
-	if (result < 0)
-		goto socket_err;
-
-	sock_set_mark(sock->sk, mark);
-
-	add_sock(sock, con);
-
-	/* Bind to all addresses. */
-	if (sctp_bind_addrs(con->sock, 0))
-		goto bind_err;
-
-	make_sockaddr(&daddr, dlm_config.ci_tcp_port, &addr_len);
-
-	log_print_ratelimited("connecting to %d", con->nodeid);
-
-	/* Turn off Nagle's algorithm */
-	sctp_sock_set_nodelay(sock->sk);
-
-	/*
-	 * Make sock->ops->connect() function return in specified time,
-	 * since O_NONBLOCK argument in connect() function does not work here,
-	 * then, we should restore the default value of this attribute.
-	 */
-	sock_set_sndtimeo(sock->sk, 5);
-	result = sock->ops->connect(sock, (struct sockaddr *)&daddr, addr_len,
-				   0);
-	sock_set_sndtimeo(sock->sk, 0);
-
-	if (result == -EINPROGRESS)
-		result = 0;
-	if (result == 0) {
-		if (!test_and_set_bit(CF_CONNECTED, &con->flags))
-			log_print("successful connected to node %d", con->nodeid);
-		goto out;
-	}
-
-bind_err:
-	con->sock = NULL;
-	sock_release(sock);
-
-socket_err:
-	/*
-	 * Some errors are fatal and this list might need adjusting. For other
-	 * errors we try again until the max number of retries is reached.
-	 */
-	if (result != -EHOSTUNREACH &&
-	    result != -ENETUNREACH &&
-	    result != -ENETDOWN &&
-	    result != -EINVAL &&
-	    result != -EPROTONOSUPPORT) {
-		log_print("connect %d try %d error %d", con->nodeid,
-			  con->retries, result);
-		mutex_unlock(&con->sock_mutex);
-		msleep(1000);
-		lowcomms_connect_sock(con);
-		return;
-	}
-
-out:
-	mutex_unlock(&con->sock_mutex);
-}
-
-/* Connect a new socket to its peer */
-static void tcp_connect_to_sock(struct connection *con)
-{
-	struct sockaddr_storage saddr, src_addr;
-	unsigned int mark;
-	int addr_len;
-	struct socket *sock = NULL;
-	int result;
-
-	mutex_lock(&con->sock_mutex);
-	if (con->retries++ > MAX_CONNECT_RETRIES)
-		goto out;
-
-	/* Some odd races can cause double-connects, ignore them */
-	if (con->sock)
-		goto out;
-
-	/* Create a socket to communicate with */
-	result = sock_create_kern(&init_net, dlm_local_addr[0]->ss_family,
-				  SOCK_STREAM, IPPROTO_TCP, &sock);
-	if (result < 0)
-		goto out_err;
-
-	memset(&saddr, 0, sizeof(saddr));
-	result = nodeid_to_addr(con->nodeid, &saddr, NULL, false, &mark);
-	if (result < 0) {
-		log_print("no address for nodeid %d", con->nodeid);
-		goto out_err;
-	}
-
-	sock_set_mark(sock->sk, mark);
-
-	add_sock(sock, con);
-
-	/* Bind to our cluster-known address connecting to avoid
-	   routing problems */
-	memcpy(&src_addr, dlm_local_addr[0], sizeof(src_addr));
-	make_sockaddr(&src_addr, 0, &addr_len);
-	result = sock->ops->bind(sock, (struct sockaddr *) &src_addr,
-				 addr_len);
-	if (result < 0) {
-		log_print("could not bind for connect: %d", result);
-		/* This *may* not indicate a critical error */
-	}
-
-	make_sockaddr(&saddr, dlm_config.ci_tcp_port, &addr_len);
-
-	log_print_ratelimited("connecting to %d", con->nodeid);
-
-	/* Turn off Nagle's algorithm */
-	tcp_sock_set_nodelay(sock->sk);
-
-	result = sock->ops->connect(sock, (struct sockaddr *)&saddr, addr_len,
-				   O_NONBLOCK);
-	if (result == -EINPROGRESS)
-		result = 0;
-	if (result == 0)
-		goto out;
-
-out_err:
-	if (con->sock) {
-		sock_release(con->sock);
-		con->sock = NULL;
-	} else if (sock) {
-		sock_release(sock);
-	}
-	/*
-	 * Some errors are fatal and this list might need adjusting. For other
-	 * errors we try again until the max number of retries is reached.
-	 */
-	if (result != -EHOSTUNREACH &&
-	    result != -ENETUNREACH &&
-	    result != -ENETDOWN && 
-	    result != -EINVAL &&
-	    result != -EPROTONOSUPPORT) {
-		log_print("connect %d try %d error %d", con->nodeid,
-			  con->retries, result);
-		mutex_unlock(&con->sock_mutex);
-		msleep(1000);
-		lowcomms_connect_sock(con);
-		return;
-	}
-out:
-	mutex_unlock(&con->sock_mutex);
-	return;
-}
-
-/* On error caller must run dlm_close_sock() for the
- * listen connection socket.
- */
-static int tcp_create_listen_sock(struct listen_connection *con,
-				  struct sockaddr_storage *saddr)
-{
-	struct socket *sock = NULL;
-	int result = 0;
-	int addr_len;
-
-	if (dlm_local_addr[0]->ss_family == AF_INET)
-		addr_len = sizeof(struct sockaddr_in);
-	else
-		addr_len = sizeof(struct sockaddr_in6);
-
-	/* Create a socket to communicate with */
-	result = sock_create_kern(&init_net, dlm_local_addr[0]->ss_family,
-				  SOCK_STREAM, IPPROTO_TCP, &sock);
-	if (result < 0) {
-		log_print("Can't create listening comms socket");
-		goto create_out;
-	}
-
-	sock_set_mark(sock->sk, dlm_config.ci_mark);
-
-	/* Turn off Nagle's algorithm */
-	tcp_sock_set_nodelay(sock->sk);
-
-	sock_set_reuseaddr(sock->sk);
-
-	add_listen_sock(sock, con);
-
-	/* Bind to our port */
-	make_sockaddr(saddr, dlm_config.ci_tcp_port, &addr_len);
-	result = sock->ops->bind(sock, (struct sockaddr *) saddr, addr_len);
-	if (result < 0) {
-		log_print("Can't bind to port %d", dlm_config.ci_tcp_port);
-		goto create_out;
-	}
-	sock_set_keepalive(sock->sk);
-
-	result = sock->ops->listen(sock, 5);
-	if (result < 0) {
-		log_print("Can't listen on port %d", dlm_config.ci_tcp_port);
-		goto create_out;
-	}
-
-	return 0;
-
-create_out:
-	return result;
-}
-
 /* Get local addresses */
 static void init_local(void)
 {
@@ -1396,63 +1177,6 @@ static void deinit_local(void)
 		kfree(dlm_local_addr[i]);
 }
 
-/* Initialise SCTP socket and bind to all interfaces
- * On error caller must run dlm_close_sock() for the
- * listen connection socket.
- */
-static int sctp_listen_for_all(struct listen_connection *con)
-{
-	struct socket *sock = NULL;
-	int result = -EINVAL;
-
-	log_print("Using SCTP for communications");
-
-	result = sock_create_kern(&init_net, dlm_local_addr[0]->ss_family,
-				  SOCK_STREAM, IPPROTO_SCTP, &sock);
-	if (result < 0) {
-		log_print("Can't create comms socket, check SCTP is loaded");
-		goto out;
-	}
-
-	sock_set_rcvbuf(sock->sk, NEEDED_RMEM);
-	sock_set_mark(sock->sk, dlm_config.ci_mark);
-	sctp_sock_set_nodelay(sock->sk);
-
-	add_listen_sock(sock, con);
-
-	/* Bind to all addresses. */
-	result = sctp_bind_addrs(con->sock, dlm_config.ci_tcp_port);
-	if (result < 0)
-		goto out;
-
-	result = sock->ops->listen(sock, 5);
-	if (result < 0) {
-		log_print("Can't set socket listening");
-		goto out;
-	}
-
-	return 0;
-
-out:
-	return result;
-}
-
-static int tcp_listen_for_all(void)
-{
-	/* We don't support multi-homed hosts */
-	if (dlm_local_count > 1) {
-		log_print("TCP protocol can't handle multi-homed hosts, "
-			  "try SCTP");
-		return -EINVAL;
-	}
-
-	log_print("Using TCP for communications");
-
-	return tcp_create_listen_sock(&listen_con, dlm_local_addr[0]);
-}
-
-
-
 static struct writequeue_entry *new_writequeue_entry(struct connection *con,
 						     gfp_t allocation)
 {
@@ -1528,19 +1252,37 @@ static struct dlm_msg *dlm_lowcomms_new_msg_con(struct connection *con, int len,
 {
 	struct writequeue_entry *e;
 	struct dlm_msg *msg;
+	bool sleepable;
 
 	msg = kzalloc(sizeof(*msg), allocation);
 	if (!msg)
 		return NULL;
 
+	/* this mutex is being used as a wait to avoid multiple "fast"
+	 * new writequeue page list entry allocs in new_wq_entry in
+	 * normal operation which is sleepable context. Without it
+	 * we could end in multiple writequeue entries with one
+	 * dlm message because multiple callers were waiting at
+	 * the writequeue_lock in new_wq_entry().
+	 */
+	sleepable = gfpflags_normal_context(allocation);
+	if (sleepable)
+		mutex_lock(&con->wq_alloc);
+
 	kref_init(&msg->ref);
 
 	e = new_wq_entry(con, len, allocation, ppc, cb, mh);
 	if (!e) {
+		if (sleepable)
+			mutex_unlock(&con->wq_alloc);
+
 		kfree(msg);
 		return NULL;
 	}
 
+	if (sleepable)
+		mutex_unlock(&con->wq_alloc);
+
 	msg->ppc = *ppc;
 	msg->len = len;
 	msg->entry = e;
@@ -1646,10 +1388,9 @@ int dlm_lowcomms_resend_msg(struct dlm_msg *msg)
 /* Send a message */
 static void send_to_sock(struct connection *con)
 {
-	int ret = 0;
 	const int msg_flags = MSG_DONTWAIT | MSG_NOSIGNAL;
 	struct writequeue_entry *e;
-	int len, offset;
+	int len, offset, ret;
 	int count = 0;
 
 	mutex_lock(&con->sock_mutex);
@@ -1658,7 +1399,8 @@ static void send_to_sock(struct connection *con)
 
 	spin_lock(&con->writequeue_lock);
 	for (;;) {
-		if (list_empty(&con->writequeue))
+		e = con_next_wq(con);
+		if (!e)
 			break;
 
 		e = list_first_entry(&con->writequeue, struct writequeue_entry, list);
@@ -1667,25 +1409,22 @@ static void send_to_sock(struct connection *con)
 		BUG_ON(len == 0 && e->users == 0);
 		spin_unlock(&con->writequeue_lock);
 
-		ret = 0;
-		if (len) {
-			ret = kernel_sendpage(con->sock, e->page, offset, len,
-					      msg_flags);
-			if (ret == -EAGAIN || ret == 0) {
-				if (ret == -EAGAIN &&
-				    test_bit(SOCKWQ_ASYNC_NOSPACE, &con->sock->flags) &&
-				    !test_and_set_bit(CF_APP_LIMITED, &con->flags)) {
-					/* Notify TCP that we're limited by the
-					 * application window size.
-					 */
-					set_bit(SOCK_NOSPACE, &con->sock->flags);
-					con->sock->sk->sk_write_pending++;
-				}
-				cond_resched();
-				goto out;
-			} else if (ret < 0)
-				goto out;
-		}
+		ret = kernel_sendpage(con->sock, e->page, offset, len,
+				      msg_flags);
+		if (ret == -EAGAIN || ret == 0) {
+			if (ret == -EAGAIN &&
+			    test_bit(SOCKWQ_ASYNC_NOSPACE, &con->sock->flags) &&
+			    !test_and_set_bit(CF_APP_LIMITED, &con->flags)) {
+				/* Notify TCP that we're limited by the
+				 * application window size.
+				 */
+				set_bit(SOCK_NOSPACE, &con->sock->flags);
+				con->sock->sk->sk_write_pending++;
+			}
+			cond_resched();
+			goto out;
+		} else if (ret < 0)
+			goto out;
 
 		/* Don't starve people filling buffers */
 		if (++count >= MAX_SEND_MSG_COUNT) {
@@ -1770,12 +1509,9 @@ int dlm_lowcomms_close(int nodeid)
 static void process_recv_sockets(struct work_struct *work)
 {
 	struct connection *con = container_of(work, struct connection, rwork);
-	int err;
 
 	clear_bit(CF_READ_PENDING, &con->flags);
-	do {
-		err = receive_from_sock(con);
-	} while (!err);
+	receive_from_sock(con);
 }
 
 static void process_listen_recv_socket(struct work_struct *work)
@@ -1783,6 +1519,74 @@ static void process_listen_recv_socket(struct work_struct *work)
 	accept_from_sock(&listen_con);
 }
 
+static void dlm_connect(struct connection *con)
+{
+	struct sockaddr_storage addr;
+	int result, addr_len;
+	struct socket *sock;
+	unsigned int mark;
+
+	/* Some odd races can cause double-connects, ignore them */
+	if (con->retries++ > MAX_CONNECT_RETRIES)
+		return;
+
+	if (con->sock) {
+		log_print("node %d already connected.", con->nodeid);
+		return;
+	}
+
+	memset(&addr, 0, sizeof(addr));
+	result = nodeid_to_addr(con->nodeid, &addr, NULL,
+				dlm_proto_ops->try_new_addr, &mark);
+	if (result < 0) {
+		log_print("no address for nodeid %d", con->nodeid);
+		return;
+	}
+
+	/* Create a socket to communicate with */
+	result = sock_create_kern(&init_net, dlm_local_addr[0]->ss_family,
+				  SOCK_STREAM, dlm_proto_ops->proto, &sock);
+	if (result < 0)
+		goto socket_err;
+
+	sock_set_mark(sock->sk, mark);
+	dlm_proto_ops->sockopts(sock);
+
+	add_sock(sock, con);
+
+	result = dlm_proto_ops->bind(sock);
+	if (result < 0)
+		goto add_sock_err;
+
+	log_print_ratelimited("connecting to %d", con->nodeid);
+	make_sockaddr(&addr, dlm_config.ci_tcp_port, &addr_len);
+	result = dlm_proto_ops->connect(con, sock, (struct sockaddr *)&addr,
+					addr_len);
+	if (result < 0)
+		goto add_sock_err;
+
+	return;
+
+add_sock_err:
+	dlm_close_sock(&con->sock);
+
+socket_err:
+	/*
+	 * Some errors are fatal and this list might need adjusting. For other
+	 * errors we try again until the max number of retries is reached.
+	 */
+	if (result != -EHOSTUNREACH &&
+	    result != -ENETUNREACH &&
+	    result != -ENETDOWN &&
+	    result != -EINVAL &&
+	    result != -EPROTONOSUPPORT) {
+		log_print("connect %d try %d error %d", con->nodeid,
+			  con->retries, result);
+		msleep(1000);
+		lowcomms_connect_sock(con);
+	}
+}
+
 /* Send workqueue function */
 static void process_send_sockets(struct work_struct *work)
 {
@@ -1797,11 +1601,15 @@ static void process_send_sockets(struct work_struct *work)
 		dlm_midcomms_unack_msg_resend(con->nodeid);
 	}
 
-	if (con->sock == NULL) { /* not mutex protected so check it inside too */
+	if (con->sock == NULL) {
 		if (test_and_clear_bit(CF_DELAY_CONNECT, &con->flags))
 			msleep(1000);
-		con->connect_action(con);
+
+		mutex_lock(&con->sock_mutex);
+		dlm_connect(con);
+		mutex_unlock(&con->sock_mutex);
 	}
+
 	if (!list_empty(&con->writequeue))
 		send_to_sock(con);
 }
@@ -1840,8 +1648,8 @@ static int work_start(void)
 
 static void shutdown_conn(struct connection *con)
 {
-	if (con->shutdown_action)
-		con->shutdown_action(con);
+	if (dlm_proto_ops->shutdown_action)
+		dlm_proto_ops->shutdown_action(con);
 }
 
 void dlm_lowcomms_shutdown(void)
@@ -1948,8 +1756,198 @@ void dlm_lowcomms_stop(void)
 	srcu_read_unlock(&connections_srcu, idx);
 	work_stop();
 	deinit_local();
+
+	dlm_proto_ops = NULL;
 }
 
+static int dlm_listen_for_all(void)
+{
+	struct socket *sock;
+	int result;
+
+	log_print("Using %s for communications",
+		  dlm_proto_ops->name);
+
+	result = dlm_proto_ops->listen_validate();
+	if (result < 0)
+		return result;
+
+	result = sock_create_kern(&init_net, dlm_local_addr[0]->ss_family,
+				  SOCK_STREAM, dlm_proto_ops->proto, &sock);
+	if (result < 0) {
+		log_print("Can't create comms socket, check SCTP is loaded");
+		goto out;
+	}
+
+	sock_set_mark(sock->sk, dlm_config.ci_mark);
+	dlm_proto_ops->listen_sockopts(sock);
+
+	result = dlm_proto_ops->listen_bind(sock);
+	if (result < 0)
+		goto out;
+
+	save_listen_callbacks(sock);
+	add_listen_sock(sock, &listen_con);
+
+	INIT_WORK(&listen_con.rwork, process_listen_recv_socket);
+	result = sock->ops->listen(sock, 5);
+	if (result < 0) {
+		dlm_close_sock(&listen_con.sock);
+		goto out;
+	}
+
+	return 0;
+
+out:
+	sock_release(sock);
+	return result;
+}
+
+static int dlm_tcp_bind(struct socket *sock)
+{
+	struct sockaddr_storage src_addr;
+	int result, addr_len;
+
+	/* Bind to our cluster-known address connecting to avoid
+	 * routing problems.
+	 */
+	memcpy(&src_addr, dlm_local_addr[0], sizeof(src_addr));
+	make_sockaddr(&src_addr, 0, &addr_len);
+
+	result = sock->ops->bind(sock, (struct sockaddr *)&src_addr,
+				 addr_len);
+	if (result < 0) {
+		/* This *may* not indicate a critical error */
+		log_print("could not bind for connect: %d", result);
+	}
+
+	return 0;
+}
+
+static int dlm_tcp_connect(struct connection *con, struct socket *sock,
+			   struct sockaddr *addr, int addr_len)
+{
+	int ret;
+
+	ret = sock->ops->connect(sock, addr, addr_len, O_NONBLOCK);
+	switch (ret) {
+	case -EINPROGRESS:
+		fallthrough;
+	case 0:
+		return 0;
+	}
+
+	return ret;
+}
+
+static int dlm_tcp_listen_validate(void)
+{
+	/* We don't support multi-homed hosts */
+	if (dlm_local_count > 1) {
+		log_print("TCP protocol can't handle multi-homed hosts, try SCTP");
+		return -EINVAL;
+	}
+
+	return 0;
+}
+
+static void dlm_tcp_sockopts(struct socket *sock)
+{
+	/* Turn off Nagle's algorithm */
+	tcp_sock_set_nodelay(sock->sk);
+}
+
+static void dlm_tcp_listen_sockopts(struct socket *sock)
+{
+	dlm_tcp_sockopts(sock);
+	sock_set_reuseaddr(sock->sk);
+}
+
+static int dlm_tcp_listen_bind(struct socket *sock)
+{
+	int addr_len;
+
+	/* Bind to our port */
+	make_sockaddr(dlm_local_addr[0], dlm_config.ci_tcp_port, &addr_len);
+	return sock->ops->bind(sock, (struct sockaddr *)dlm_local_addr[0],
+			       addr_len);
+}
+
+static const struct dlm_proto_ops dlm_tcp_ops = {
+	.name = "TCP",
+	.proto = IPPROTO_TCP,
+	.connect = dlm_tcp_connect,
+	.sockopts = dlm_tcp_sockopts,
+	.bind = dlm_tcp_bind,
+	.listen_validate = dlm_tcp_listen_validate,
+	.listen_sockopts = dlm_tcp_listen_sockopts,
+	.listen_bind = dlm_tcp_listen_bind,
+	.shutdown_action = dlm_tcp_shutdown,
+	.eof_condition = tcp_eof_condition,
+};
+
+static int dlm_sctp_bind(struct socket *sock)
+{
+	return sctp_bind_addrs(sock, 0);
+}
+
+static int dlm_sctp_connect(struct connection *con, struct socket *sock,
+			    struct sockaddr *addr, int addr_len)
+{
+	int ret;
+
+	/*
+	 * Make sock->ops->connect() function return in specified time,
+	 * since O_NONBLOCK argument in connect() function does not work here,
+	 * then, we should restore the default value of this attribute.
+	 */
+	sock_set_sndtimeo(sock->sk, 5);
+	ret = sock->ops->connect(sock, addr, addr_len, 0);
+	sock_set_sndtimeo(sock->sk, 0);
+	if (ret < 0)
+		return ret;
+
+	if (!test_and_set_bit(CF_CONNECTED, &con->flags))
+		log_print("successful connected to node %d", con->nodeid);
+
+	return 0;
+}
+
+static int dlm_sctp_listen_validate(void)
+{
+	if (!IS_ENABLED(CONFIG_IP_SCTP)) {
+		log_print("SCTP is not enabled by this kernel");
+		return -EOPNOTSUPP;
+	}
+
+	request_module("sctp");
+	return 0;
+}
+
+static int dlm_sctp_bind_listen(struct socket *sock)
+{
+	return sctp_bind_addrs(sock, dlm_config.ci_tcp_port);
+}
+
+static void dlm_sctp_sockopts(struct socket *sock)
+{
+	/* Turn off Nagle's algorithm */
+	sctp_sock_set_nodelay(sock->sk);
+	sock_set_rcvbuf(sock->sk, NEEDED_RMEM);
+}
+
+static const struct dlm_proto_ops dlm_sctp_ops = {
+	.name = "SCTP",
+	.proto = IPPROTO_SCTP,
+	.try_new_addr = true,
+	.connect = dlm_sctp_connect,
+	.sockopts = dlm_sctp_sockopts,
+	.bind = dlm_sctp_bind,
+	.listen_validate = dlm_sctp_listen_validate,
+	.listen_sockopts = dlm_sctp_sockopts,
+	.listen_bind = dlm_sctp_bind_listen,
+};
+
 int dlm_lowcomms_start(void)
 {
 	int error = -EINVAL;
@@ -1976,23 +1974,27 @@ int dlm_lowcomms_start(void)
 	/* Start listening */
 	switch (dlm_config.ci_protocol) {
 	case DLM_PROTO_TCP:
-		error = tcp_listen_for_all();
+		dlm_proto_ops = &dlm_tcp_ops;
 		break;
 	case DLM_PROTO_SCTP:
-		error = sctp_listen_for_all(&listen_con);
+		dlm_proto_ops = &dlm_sctp_ops;
 		break;
 	default:
 		log_print("Invalid protocol identifier %d set",
 			  dlm_config.ci_protocol);
 		error = -EINVAL;
-		break;
+		goto fail_proto_ops;
 	}
+
+	error = dlm_listen_for_all();
 	if (error)
-		goto fail_unlisten;
+		goto fail_listen;
 
 	return 0;
 
-fail_unlisten:
+fail_listen:
+	dlm_proto_ops = NULL;
+fail_proto_ops:
 	dlm_allow_conn = 0;
 	dlm_close_sock(&listen_con.sock);
 	work_stop();
diff --git a/fs/dlm/lowcomms.h b/fs/dlm/lowcomms.h
index aaae7115c00d..4ccae07cf005 100644
--- a/fs/dlm/lowcomms.h
+++ b/fs/dlm/lowcomms.h
@@ -46,6 +46,7 @@ int dlm_lowcomms_resend_msg(struct dlm_msg *msg);
 int dlm_lowcomms_connect_node(int nodeid);
 int dlm_lowcomms_nodes_set_mark(int nodeid, unsigned int mark);
 int dlm_lowcomms_addr(int nodeid, struct sockaddr_storage *addr, int len);
+void dlm_midcomms_receive_done(int nodeid);
 
 #endif				/* __LOWCOMMS_DOT_H__ */
 
diff --git a/fs/dlm/member.c b/fs/dlm/member.c
index d9e1e4170eb1..731d489aa323 100644
--- a/fs/dlm/member.c
+++ b/fs/dlm/member.c
@@ -443,8 +443,10 @@ static int ping_members(struct dlm_ls *ls)
 
 	list_for_each_entry(memb, &ls->ls_nodes, list) {
 		error = dlm_recovery_stopped(ls);
-		if (error)
+		if (error) {
+			error = -EINTR;
 			break;
+		}
 		error = dlm_rcom_status(ls, memb->nodeid, 0);
 		if (error)
 			break;
diff --git a/fs/dlm/midcomms.c b/fs/dlm/midcomms.c
index e3de268898ed..7ae39ec8d9b0 100644
--- a/fs/dlm/midcomms.c
+++ b/fs/dlm/midcomms.c
@@ -109,12 +109,6 @@
  * compatibility. There exists better ways to make a better handling.
  * However this should be changed in the next major version bump of dlm.
  *
- * Ack handling:
- *
- * Currently we send an ack message for every dlm message. However we
- * can ack multiple dlm messages with one ack by just delaying the ack
- * message. Will reduce some traffic but makes the drop detection slower.
- *
  * Tail Size checking:
  *
  * There exists a message tail payload in e.g. DLM_MSG however we don't
@@ -169,6 +163,7 @@ struct midcomms_node {
 #define DLM_NODE_FLAG_CLOSE	1
 #define DLM_NODE_FLAG_STOP_TX	2
 #define DLM_NODE_FLAG_STOP_RX	3
+#define DLM_NODE_ULP_DELIVERED	4
 	unsigned long flags;
 	wait_queue_head_t shutdown_wait;
 
@@ -480,11 +475,12 @@ static void dlm_midcomms_receive_buffer(union dlm_packet *p,
 {
 	if (seq == node->seq_next) {
 		node->seq_next++;
-		/* send ack before fin */
-		dlm_send_ack(node->nodeid, node->seq_next);
 
 		switch (p->header.h_cmd) {
 		case DLM_FIN:
+			/* send ack before fin */
+			dlm_send_ack(node->nodeid, node->seq_next);
+
 			spin_lock(&node->state_lock);
 			pr_debug("receive fin msg from node %d with state %s\n",
 				 node->nodeid, dlm_state_str(node->state));
@@ -534,6 +530,7 @@ static void dlm_midcomms_receive_buffer(union dlm_packet *p,
 		default:
 			WARN_ON(test_bit(DLM_NODE_FLAG_STOP_RX, &node->flags));
 			dlm_receive_buffer(p, node->nodeid);
+			set_bit(DLM_NODE_ULP_DELIVERED, &node->flags);
 			break;
 		}
 	} else {
@@ -933,6 +930,49 @@ int dlm_process_incoming_buffer(int nodeid, unsigned char *buf, int len)
 	return ret;
 }
 
+void dlm_midcomms_receive_done(int nodeid)
+{
+	struct midcomms_node *node;
+	int idx;
+
+	idx = srcu_read_lock(&nodes_srcu);
+	node = nodeid2node(nodeid, 0);
+	if (!node) {
+		srcu_read_unlock(&nodes_srcu, idx);
+		return;
+	}
+
+	/* old protocol, we do nothing */
+	switch (node->version) {
+	case DLM_VERSION_3_2:
+		break;
+	default:
+		srcu_read_unlock(&nodes_srcu, idx);
+		return;
+	}
+
+	/* do nothing if we didn't delivered stateful to ulp */
+	if (!test_and_clear_bit(DLM_NODE_ULP_DELIVERED,
+				&node->flags)) {
+		srcu_read_unlock(&nodes_srcu, idx);
+		return;
+	}
+
+	spin_lock(&node->state_lock);
+	/* we only ack if state is ESTABLISHED */
+	switch (node->state) {
+	case DLM_ESTABLISHED:
+		spin_unlock(&node->state_lock);
+		dlm_send_ack(node->nodeid, node->seq_next);
+		break;
+	default:
+		spin_unlock(&node->state_lock);
+		/* do nothing FIN has it's own ack send */
+		break;
+	};
+	srcu_read_unlock(&nodes_srcu, idx);
+}
+
 void dlm_midcomms_unack_msg_resend(int nodeid)
 {
 	struct midcomms_node *node;
diff --git a/fs/dlm/rcom.c b/fs/dlm/rcom.c
index 5651933f54a4..6cba86470278 100644
--- a/fs/dlm/rcom.c
+++ b/fs/dlm/rcom.c
@@ -89,22 +89,15 @@ static int create_rcom_stateless(struct dlm_ls *ls, int to_nodeid, int type,
 	return 0;
 }
 
-static void _send_rcom(struct dlm_ls *ls, struct dlm_rcom *rc)
+static void send_rcom(struct dlm_mhandle *mh, struct dlm_rcom *rc)
 {
 	dlm_rcom_out(rc);
-}
-
-static void send_rcom(struct dlm_ls *ls, struct dlm_mhandle *mh,
-		      struct dlm_rcom *rc)
-{
-	_send_rcom(ls, rc);
 	dlm_midcomms_commit_mhandle(mh);
 }
 
-static void send_rcom_stateless(struct dlm_ls *ls, struct dlm_msg *msg,
-				struct dlm_rcom *rc)
+static void send_rcom_stateless(struct dlm_msg *msg, struct dlm_rcom *rc)
 {
-	_send_rcom(ls, rc);
+	dlm_rcom_out(rc);
 	dlm_lowcomms_commit_msg(msg);
 	dlm_lowcomms_put_msg(msg);
 }
@@ -204,7 +197,7 @@ retry:
 	allow_sync_reply(ls, &rc->rc_id);
 	memset(ls->ls_recover_buf, 0, DLM_MAX_SOCKET_BUFSIZE);
 
-	send_rcom_stateless(ls, msg, rc);
+	send_rcom_stateless(msg, rc);
 
 	error = dlm_wait_function(ls, &rcom_response);
 	disallow_sync_reply(ls);
@@ -287,7 +280,7 @@ static void receive_rcom_status(struct dlm_ls *ls, struct dlm_rcom *rc_in)
 	spin_unlock(&ls->ls_recover_lock);
 
  do_send:
-	send_rcom_stateless(ls, msg, rc);
+	send_rcom_stateless(msg, rc);
 }
 
 static void receive_sync_reply(struct dlm_ls *ls, struct dlm_rcom *rc_in)
@@ -327,7 +320,7 @@ retry:
 	allow_sync_reply(ls, &rc->rc_id);
 	memset(ls->ls_recover_buf, 0, DLM_MAX_SOCKET_BUFSIZE);
 
-	send_rcom_stateless(ls, msg, rc);
+	send_rcom_stateless(msg, rc);
 
 	error = dlm_wait_function(ls, &rcom_response);
 	disallow_sync_reply(ls);
@@ -356,7 +349,7 @@ static void receive_rcom_names(struct dlm_ls *ls, struct dlm_rcom *rc_in)
 
 	dlm_copy_master_names(ls, rc_in->rc_buf, inlen, rc->rc_buf, outlen,
 			      nodeid);
-	send_rcom_stateless(ls, msg, rc);
+	send_rcom_stateless(msg, rc);
 }
 
 int dlm_send_rcom_lookup(struct dlm_rsb *r, int dir_nodeid)
@@ -373,7 +366,7 @@ int dlm_send_rcom_lookup(struct dlm_rsb *r, int dir_nodeid)
 	memcpy(rc->rc_buf, r->res_name, r->res_length);
 	rc->rc_id = (unsigned long) r->res_id;
 
-	send_rcom(ls, mh, rc);
+	send_rcom(mh, rc);
  out:
 	return error;
 }
@@ -404,7 +397,7 @@ static void receive_rcom_lookup(struct dlm_ls *ls, struct dlm_rcom *rc_in)
 	rc->rc_id = rc_in->rc_id;
 	rc->rc_seq_reply = rc_in->rc_seq;
 
-	send_rcom(ls, mh, rc);
+	send_rcom(mh, rc);
 }
 
 static void receive_rcom_lookup_reply(struct dlm_ls *ls, struct dlm_rcom *rc_in)
@@ -461,7 +454,7 @@ int dlm_send_rcom_lock(struct dlm_rsb *r, struct dlm_lkb *lkb)
 	pack_rcom_lock(r, lkb, rl);
 	rc->rc_id = (unsigned long) r;
 
-	send_rcom(ls, mh, rc);
+	send_rcom(mh, rc);
  out:
 	return error;
 }
@@ -487,7 +480,7 @@ static void receive_rcom_lock(struct dlm_ls *ls, struct dlm_rcom *rc_in)
 	rc->rc_id = rc_in->rc_id;
 	rc->rc_seq_reply = rc_in->rc_seq;
 
-	send_rcom(ls, mh, rc);
+	send_rcom(mh, rc);
 }
 
 /* If the lockspace doesn't exist then still send a status message
diff --git a/fs/dlm/recoverd.c b/fs/dlm/recoverd.c
index 85e245392715..97d052cea5a9 100644
--- a/fs/dlm/recoverd.c
+++ b/fs/dlm/recoverd.c
@@ -125,8 +125,10 @@ static int ls_recover(struct dlm_ls *ls, struct dlm_recover *rv)
 	dlm_recover_waiters_pre(ls);
 
 	error = dlm_recovery_stopped(ls);
-	if (error)
+	if (error) {
+		error = -EINTR;
 		goto fail;
+	}
 
 	if (neg || dlm_no_directory(ls)) {
 		/*
diff --git a/fs/drop_caches.c b/fs/drop_caches.c
index f00fcc4a4f72..e619c31b6bd9 100644
--- a/fs/drop_caches.c
+++ b/fs/drop_caches.c
@@ -3,6 +3,7 @@
  * Implement the manual drop-all-pagecache function
  */
 
+#include <linux/pagemap.h>
 #include <linux/kernel.h>
 #include <linux/mm.h>
 #include <linux/fs.h>
@@ -27,7 +28,7 @@ static void drop_pagecache_sb(struct super_block *sb, void *unused)
 		 * we need to reschedule to avoid softlockups.
 		 */
 		if ((inode->i_state & (I_FREEING|I_WILL_FREE|I_NEW)) ||
-		    (inode->i_mapping->nrpages == 0 && !need_resched())) {
+		    (mapping_empty(inode->i_mapping) && !need_resched())) {
 			spin_unlock(&inode->i_lock);
 			continue;
 		}
diff --git a/fs/erofs/Kconfig b/fs/erofs/Kconfig
index 906af0c1998c..14b747026742 100644
--- a/fs/erofs/Kconfig
+++ b/fs/erofs/Kconfig
@@ -3,6 +3,7 @@
 config EROFS_FS
 	tristate "EROFS filesystem support"
 	depends on BLOCK
+	select FS_IOMAP
 	select LIBCRC32C
 	help
 	  EROFS (Enhanced Read-Only File System) is a lightweight
diff --git a/fs/erofs/data.c b/fs/erofs/data.c
index 3787a5fb0a42..9db829715652 100644
--- a/fs/erofs/data.c
+++ b/fs/erofs/data.c
@@ -2,35 +2,13 @@
 /*
  * Copyright (C) 2017-2018 HUAWEI, Inc.
  *             https://www.huawei.com/
+ * Copyright (C) 2021, Alibaba Cloud
  */
 #include "internal.h"
 #include <linux/prefetch.h>
-
+#include <linux/dax.h>
 #include <trace/events/erofs.h>
 
-static void erofs_readendio(struct bio *bio)
-{
-	struct bio_vec *bvec;
-	blk_status_t err = bio->bi_status;
-	struct bvec_iter_all iter_all;
-
-	bio_for_each_segment_all(bvec, bio, iter_all) {
-		struct page *page = bvec->bv_page;
-
-		/* page is already locked */
-		DBG_BUGON(PageUptodate(page));
-
-		if (err)
-			SetPageError(page);
-		else
-			SetPageUptodate(page);
-
-		unlock_page(page);
-		/* page could be reclaimed now */
-	}
-	bio_put(bio);
-}
-
 struct page *erofs_get_meta_page(struct super_block *sb, erofs_blk_t blkaddr)
 {
 	struct address_space *const mapping = sb->s_bdev->bd_inode->i_mapping;
@@ -59,13 +37,6 @@ static int erofs_map_blocks_flatmode(struct inode *inode,
 	nblocks = DIV_ROUND_UP(inode->i_size, PAGE_SIZE);
 	lastblk = nblocks - tailendpacking;
 
-	if (offset >= inode->i_size) {
-		/* leave out-of-bound access unmapped */
-		map->m_flags = 0;
-		map->m_plen = 0;
-		goto out;
-	}
-
 	/* there is no hole in flatmode */
 	map->m_flags = EROFS_MAP_MAPPED;
 
@@ -100,217 +71,273 @@ static int erofs_map_blocks_flatmode(struct inode *inode,
 		goto err_out;
 	}
 
-out:
 	map->m_llen = map->m_plen;
-
 err_out:
 	trace_erofs_map_blocks_flatmode_exit(inode, map, flags, 0);
 	return err;
 }
 
-static inline struct bio *erofs_read_raw_page(struct bio *bio,
-					      struct address_space *mapping,
-					      struct page *page,
-					      erofs_off_t *last_block,
-					      unsigned int nblocks,
-					      unsigned int *eblks,
-					      bool ra)
+static int erofs_map_blocks(struct inode *inode,
+			    struct erofs_map_blocks *map, int flags)
 {
-	struct inode *const inode = mapping->host;
-	struct super_block *const sb = inode->i_sb;
-	erofs_off_t current_block = (erofs_off_t)page->index;
-	int err;
-
-	DBG_BUGON(!nblocks);
-
-	if (PageUptodate(page)) {
-		err = 0;
-		goto has_updated;
-	}
+	struct super_block *sb = inode->i_sb;
+	struct erofs_inode *vi = EROFS_I(inode);
+	struct erofs_inode_chunk_index *idx;
+	struct page *page;
+	u64 chunknr;
+	unsigned int unit;
+	erofs_off_t pos;
+	int err = 0;
 
-	/* note that for readpage case, bio also equals to NULL */
-	if (bio &&
-	    (*last_block + 1 != current_block || !*eblks)) {
-submit_bio_retry:
-		submit_bio(bio);
-		bio = NULL;
+	if (map->m_la >= inode->i_size) {
+		/* leave out-of-bound access unmapped */
+		map->m_flags = 0;
+		map->m_plen = 0;
+		goto out;
 	}
 
-	if (!bio) {
-		struct erofs_map_blocks map = {
-			.m_la = blknr_to_addr(current_block),
-		};
-		erofs_blk_t blknr;
-		unsigned int blkoff;
-
-		err = erofs_map_blocks_flatmode(inode, &map, EROFS_GET_BLOCKS_RAW);
-		if (err)
-			goto err_out;
-
-		/* zero out the holed page */
-		if (!(map.m_flags & EROFS_MAP_MAPPED)) {
-			zero_user_segment(page, 0, PAGE_SIZE);
-			SetPageUptodate(page);
-
-			/* imply err = 0, see erofs_map_blocks */
-			goto has_updated;
-		}
-
-		/* for RAW access mode, m_plen must be equal to m_llen */
-		DBG_BUGON(map.m_plen != map.m_llen);
-
-		blknr = erofs_blknr(map.m_pa);
-		blkoff = erofs_blkoff(map.m_pa);
-
-		/* deal with inline page */
-		if (map.m_flags & EROFS_MAP_META) {
-			void *vsrc, *vto;
-			struct page *ipage;
+	if (vi->datalayout != EROFS_INODE_CHUNK_BASED)
+		return erofs_map_blocks_flatmode(inode, map, flags);
 
-			DBG_BUGON(map.m_plen > PAGE_SIZE);
+	if (vi->chunkformat & EROFS_CHUNK_FORMAT_INDEXES)
+		unit = sizeof(*idx);			/* chunk index */
+	else
+		unit = EROFS_BLOCK_MAP_ENTRY_SIZE;	/* block map */
 
-			ipage = erofs_get_meta_page(inode->i_sb, blknr);
+	chunknr = map->m_la >> vi->chunkbits;
+	pos = ALIGN(iloc(EROFS_SB(sb), vi->nid) + vi->inode_isize +
+		    vi->xattr_isize, unit) + unit * chunknr;
 
-			if (IS_ERR(ipage)) {
-				err = PTR_ERR(ipage);
-				goto err_out;
-			}
+	page = erofs_get_meta_page(inode->i_sb, erofs_blknr(pos));
+	if (IS_ERR(page))
+		return PTR_ERR(page);
 
-			vsrc = kmap_atomic(ipage);
-			vto = kmap_atomic(page);
-			memcpy(vto, vsrc + blkoff, map.m_plen);
-			memset(vto + map.m_plen, 0, PAGE_SIZE - map.m_plen);
-			kunmap_atomic(vto);
-			kunmap_atomic(vsrc);
-			flush_dcache_page(page);
+	map->m_la = chunknr << vi->chunkbits;
+	map->m_plen = min_t(erofs_off_t, 1UL << vi->chunkbits,
+			    roundup(inode->i_size - map->m_la, EROFS_BLKSIZ));
 
-			SetPageUptodate(page);
-			/* TODO: could we unlock the page earlier? */
-			unlock_page(ipage);
-			put_page(ipage);
+	/* handle block map */
+	if (!(vi->chunkformat & EROFS_CHUNK_FORMAT_INDEXES)) {
+		__le32 *blkaddr = page_address(page) + erofs_blkoff(pos);
 
-			/* imply err = 0, see erofs_map_blocks */
-			goto has_updated;
+		if (le32_to_cpu(*blkaddr) == EROFS_NULL_ADDR) {
+			map->m_flags = 0;
+		} else {
+			map->m_pa = blknr_to_addr(le32_to_cpu(*blkaddr));
+			map->m_flags = EROFS_MAP_MAPPED;
 		}
+		goto out_unlock;
+	}
+	/* parse chunk indexes */
+	idx = page_address(page) + erofs_blkoff(pos);
+	switch (le32_to_cpu(idx->blkaddr)) {
+	case EROFS_NULL_ADDR:
+		map->m_flags = 0;
+		break;
+	default:
+		/* only one device is supported for now */
+		if (idx->device_id) {
+			erofs_err(sb, "invalid device id %u @ %llu for nid %llu",
+				  le16_to_cpu(idx->device_id),
+				  chunknr, vi->nid);
+			err = -EFSCORRUPTED;
+			goto out_unlock;
+		}
+		map->m_pa = blknr_to_addr(le32_to_cpu(idx->blkaddr));
+		map->m_flags = EROFS_MAP_MAPPED;
+		break;
+	}
+out_unlock:
+	unlock_page(page);
+	put_page(page);
+out:
+	map->m_llen = map->m_plen;
+	return err;
+}
 
-		/* pa must be block-aligned for raw reading */
-		DBG_BUGON(erofs_blkoff(map.m_pa));
+static int erofs_iomap_begin(struct inode *inode, loff_t offset, loff_t length,
+		unsigned int flags, struct iomap *iomap, struct iomap *srcmap)
+{
+	int ret;
+	struct erofs_map_blocks map;
+
+	map.m_la = offset;
+	map.m_llen = length;
+
+	ret = erofs_map_blocks(inode, &map, EROFS_GET_BLOCKS_RAW);
+	if (ret < 0)
+		return ret;
+
+	iomap->bdev = inode->i_sb->s_bdev;
+	iomap->dax_dev = EROFS_I_SB(inode)->dax_dev;
+	iomap->offset = map.m_la;
+	iomap->length = map.m_llen;
+	iomap->flags = 0;
+	iomap->private = NULL;
+
+	if (!(map.m_flags & EROFS_MAP_MAPPED)) {
+		iomap->type = IOMAP_HOLE;
+		iomap->addr = IOMAP_NULL_ADDR;
+		if (!iomap->length)
+			iomap->length = length;
+		return 0;
+	}
 
-		/* max # of continuous pages */
-		if (nblocks > DIV_ROUND_UP(map.m_plen, PAGE_SIZE))
-			nblocks = DIV_ROUND_UP(map.m_plen, PAGE_SIZE);
+	if (map.m_flags & EROFS_MAP_META) {
+		struct page *ipage;
+
+		iomap->type = IOMAP_INLINE;
+		ipage = erofs_get_meta_page(inode->i_sb,
+					    erofs_blknr(map.m_pa));
+		if (IS_ERR(ipage))
+			return PTR_ERR(ipage);
+		iomap->inline_data = page_address(ipage) +
+					erofs_blkoff(map.m_pa);
+		iomap->private = ipage;
+	} else {
+		iomap->type = IOMAP_MAPPED;
+		iomap->addr = map.m_pa;
+	}
+	return 0;
+}
 
-		*eblks = bio_max_segs(nblocks);
-		bio = bio_alloc(GFP_NOIO, *eblks);
+static int erofs_iomap_end(struct inode *inode, loff_t pos, loff_t length,
+		ssize_t written, unsigned int flags, struct iomap *iomap)
+{
+	struct page *ipage = iomap->private;
 
-		bio->bi_end_io = erofs_readendio;
-		bio_set_dev(bio, sb->s_bdev);
-		bio->bi_iter.bi_sector = (sector_t)blknr <<
-			LOG_SECTORS_PER_BLOCK;
-		bio->bi_opf = REQ_OP_READ | (ra ? REQ_RAHEAD : 0);
+	if (ipage) {
+		DBG_BUGON(iomap->type != IOMAP_INLINE);
+		unlock_page(ipage);
+		put_page(ipage);
+	} else {
+		DBG_BUGON(iomap->type == IOMAP_INLINE);
 	}
+	return written;
+}
 
-	err = bio_add_page(bio, page, PAGE_SIZE, 0);
-	/* out of the extent or bio is full */
-	if (err < PAGE_SIZE)
-		goto submit_bio_retry;
-	--*eblks;
-	*last_block = current_block;
-	return bio;
+static const struct iomap_ops erofs_iomap_ops = {
+	.iomap_begin = erofs_iomap_begin,
+	.iomap_end = erofs_iomap_end,
+};
 
-err_out:
-	/* for sync reading, set page error immediately */
-	if (!ra) {
-		SetPageError(page);
-		ClearPageUptodate(page);
+int erofs_fiemap(struct inode *inode, struct fiemap_extent_info *fieinfo,
+		 u64 start, u64 len)
+{
+	if (erofs_inode_is_data_compressed(EROFS_I(inode)->datalayout)) {
+#ifdef CONFIG_EROFS_FS_ZIP
+		return iomap_fiemap(inode, fieinfo, start, len,
+				    &z_erofs_iomap_report_ops);
+#else
+		return -EOPNOTSUPP;
+#endif
 	}
-has_updated:
-	unlock_page(page);
-
-	/* if updated manually, continuous pages has a gap */
-	if (bio)
-		submit_bio(bio);
-	return err ? ERR_PTR(err) : NULL;
+	return iomap_fiemap(inode, fieinfo, start, len, &erofs_iomap_ops);
 }
 
 /*
  * since we dont have write or truncate flows, so no inode
  * locking needs to be held at the moment.
  */
-static int erofs_raw_access_readpage(struct file *file, struct page *page)
+static int erofs_readpage(struct file *file, struct page *page)
 {
-	erofs_off_t last_block;
-	unsigned int eblks;
-	struct bio *bio;
-
-	trace_erofs_readpage(page, true);
+	return iomap_readpage(page, &erofs_iomap_ops);
+}
 
-	bio = erofs_read_raw_page(NULL, page->mapping,
-				  page, &last_block, 1, &eblks, false);
+static void erofs_readahead(struct readahead_control *rac)
+{
+	return iomap_readahead(rac, &erofs_iomap_ops);
+}
 
-	if (IS_ERR(bio))
-		return PTR_ERR(bio);
+static sector_t erofs_bmap(struct address_space *mapping, sector_t block)
+{
+	return iomap_bmap(mapping, block, &erofs_iomap_ops);
+}
 
-	if (bio)
-		submit_bio(bio);
+static int erofs_prepare_dio(struct kiocb *iocb, struct iov_iter *to)
+{
+	struct inode *inode = file_inode(iocb->ki_filp);
+	loff_t align = iocb->ki_pos | iov_iter_count(to) |
+		iov_iter_alignment(to);
+	struct block_device *bdev = inode->i_sb->s_bdev;
+	unsigned int blksize_mask;
+
+	if (bdev)
+		blksize_mask = (1 << ilog2(bdev_logical_block_size(bdev))) - 1;
+	else
+		blksize_mask = (1 << inode->i_blkbits) - 1;
+
+	if (align & blksize_mask)
+		return -EINVAL;
 	return 0;
 }
 
-static void erofs_raw_access_readahead(struct readahead_control *rac)
+static ssize_t erofs_file_read_iter(struct kiocb *iocb, struct iov_iter *to)
 {
-	erofs_off_t last_block;
-	unsigned int eblks;
-	struct bio *bio = NULL;
-	struct page *page;
-
-	trace_erofs_readpages(rac->mapping->host, readahead_index(rac),
-			readahead_count(rac), true);
-
-	while ((page = readahead_page(rac))) {
-		prefetchw(&page->flags);
-
-		bio = erofs_read_raw_page(bio, rac->mapping, page, &last_block,
-				readahead_count(rac), &eblks, true);
-
-		/* all the page errors are ignored when readahead */
-		if (IS_ERR(bio)) {
-			pr_err("%s, readahead error at page %lu of nid %llu\n",
-			       __func__, page->index,
-			       EROFS_I(rac->mapping->host)->nid);
-
-			bio = NULL;
-		}
-
-		put_page(page);
+	/* no need taking (shared) inode lock since it's a ro filesystem */
+	if (!iov_iter_count(to))
+		return 0;
+
+#ifdef CONFIG_FS_DAX
+	if (IS_DAX(iocb->ki_filp->f_mapping->host))
+		return dax_iomap_rw(iocb, to, &erofs_iomap_ops);
+#endif
+	if (iocb->ki_flags & IOCB_DIRECT) {
+		int err = erofs_prepare_dio(iocb, to);
+
+		if (!err)
+			return iomap_dio_rw(iocb, to, &erofs_iomap_ops,
+					    NULL, 0);
+		if (err < 0)
+			return err;
 	}
+	return filemap_read(iocb, to, 0);
+}
+
+/* for uncompressed (aligned) files and raw access for other files */
+const struct address_space_operations erofs_raw_access_aops = {
+	.readpage = erofs_readpage,
+	.readahead = erofs_readahead,
+	.bmap = erofs_bmap,
+	.direct_IO = noop_direct_IO,
+};
 
-	if (bio)
-		submit_bio(bio);
+#ifdef CONFIG_FS_DAX
+static vm_fault_t erofs_dax_huge_fault(struct vm_fault *vmf,
+		enum page_entry_size pe_size)
+{
+	return dax_iomap_fault(vmf, pe_size, NULL, NULL, &erofs_iomap_ops);
 }
 
-static sector_t erofs_bmap(struct address_space *mapping, sector_t block)
+static vm_fault_t erofs_dax_fault(struct vm_fault *vmf)
 {
-	struct inode *inode = mapping->host;
-	struct erofs_map_blocks map = {
-		.m_la = blknr_to_addr(block),
-	};
+	return erofs_dax_huge_fault(vmf, PE_SIZE_PTE);
+}
 
-	if (EROFS_I(inode)->datalayout == EROFS_INODE_FLAT_INLINE) {
-		erofs_blk_t blks = i_size_read(inode) >> LOG_BLOCK_SIZE;
+static const struct vm_operations_struct erofs_dax_vm_ops = {
+	.fault		= erofs_dax_fault,
+	.huge_fault	= erofs_dax_huge_fault,
+};
 
-		if (block >> LOG_SECTORS_PER_BLOCK >= blks)
-			return 0;
-	}
+static int erofs_file_mmap(struct file *file, struct vm_area_struct *vma)
+{
+	if (!IS_DAX(file_inode(file)))
+		return generic_file_readonly_mmap(file, vma);
 
-	if (!erofs_map_blocks_flatmode(inode, &map, EROFS_GET_BLOCKS_RAW))
-		return erofs_blknr(map.m_pa);
+	if ((vma->vm_flags & VM_SHARED) && (vma->vm_flags & VM_MAYWRITE))
+		return -EINVAL;
 
+	vma->vm_ops = &erofs_dax_vm_ops;
+	vma->vm_flags |= VM_HUGEPAGE;
 	return 0;
 }
-
-/* for uncompressed (aligned) files and raw access for other files */
-const struct address_space_operations erofs_raw_access_aops = {
-	.readpage = erofs_raw_access_readpage,
-	.readahead = erofs_raw_access_readahead,
-	.bmap = erofs_bmap,
+#else
+#define erofs_file_mmap	generic_file_readonly_mmap
+#endif
+
+const struct file_operations erofs_file_fops = {
+	.llseek		= generic_file_llseek,
+	.read_iter	= erofs_file_read_iter,
+	.mmap		= erofs_file_mmap,
+	.splice_read	= generic_file_splice_read,
 };
diff --git a/fs/erofs/erofs_fs.h b/fs/erofs/erofs_fs.h
index 0f8da74570b4..b0b23f41abc3 100644
--- a/fs/erofs/erofs_fs.h
+++ b/fs/erofs/erofs_fs.h
@@ -4,6 +4,7 @@
  *
  * Copyright (C) 2017-2018 HUAWEI, Inc.
  *             https://www.huawei.com/
+ * Copyright (C) 2021, Alibaba Cloud
  */
 #ifndef __EROFS_FS_H
 #define __EROFS_FS_H
@@ -19,10 +20,12 @@
 #define EROFS_FEATURE_INCOMPAT_LZ4_0PADDING	0x00000001
 #define EROFS_FEATURE_INCOMPAT_COMPR_CFGS	0x00000002
 #define EROFS_FEATURE_INCOMPAT_BIG_PCLUSTER	0x00000002
+#define EROFS_FEATURE_INCOMPAT_CHUNKED_FILE	0x00000004
 #define EROFS_ALL_FEATURE_INCOMPAT		\
 	(EROFS_FEATURE_INCOMPAT_LZ4_0PADDING | \
 	 EROFS_FEATURE_INCOMPAT_COMPR_CFGS | \
-	 EROFS_FEATURE_INCOMPAT_BIG_PCLUSTER)
+	 EROFS_FEATURE_INCOMPAT_BIG_PCLUSTER | \
+	 EROFS_FEATURE_INCOMPAT_CHUNKED_FILE)
 
 #define EROFS_SB_EXTSLOT_SIZE	16
 
@@ -64,13 +67,16 @@ struct erofs_super_block {
  * inode, [xattrs], last_inline_data, ... | ... | no-holed data
  * 3 - inode compression D:
  * inode, [xattrs], map_header, extents ... | ...
- * 4~7 - reserved
+ * 4 - inode chunk-based E:
+ * inode, [xattrs], chunk indexes ... | ...
+ * 5~7 - reserved
  */
 enum {
 	EROFS_INODE_FLAT_PLAIN			= 0,
 	EROFS_INODE_FLAT_COMPRESSION_LEGACY	= 1,
 	EROFS_INODE_FLAT_INLINE			= 2,
 	EROFS_INODE_FLAT_COMPRESSION		= 3,
+	EROFS_INODE_CHUNK_BASED			= 4,
 	EROFS_INODE_DATALAYOUT_MAX
 };
 
@@ -90,6 +96,19 @@ static inline bool erofs_inode_is_data_compressed(unsigned int datamode)
 #define EROFS_I_ALL	\
 	((1 << (EROFS_I_DATALAYOUT_BIT + EROFS_I_DATALAYOUT_BITS)) - 1)
 
+/* indicate chunk blkbits, thus 'chunksize = blocksize << chunk blkbits' */
+#define EROFS_CHUNK_FORMAT_BLKBITS_MASK		0x001F
+/* with chunk indexes or just a 4-byte blkaddr array */
+#define EROFS_CHUNK_FORMAT_INDEXES		0x0020
+
+#define EROFS_CHUNK_FORMAT_ALL	\
+	(EROFS_CHUNK_FORMAT_BLKBITS_MASK | EROFS_CHUNK_FORMAT_INDEXES)
+
+struct erofs_inode_chunk_info {
+	__le16 format;		/* chunk blkbits, etc. */
+	__le16 reserved;
+};
+
 /* 32-byte reduced form of an ondisk inode */
 struct erofs_inode_compact {
 	__le16 i_format;	/* inode format hints */
@@ -107,6 +126,9 @@ struct erofs_inode_compact {
 
 		/* for device files, used to indicate old/new device # */
 		__le32 rdev;
+
+		/* for chunk-based files, it contains the summary info */
+		struct erofs_inode_chunk_info c;
 	} i_u;
 	__le32 i_ino;           /* only used for 32-bit stat compatibility */
 	__le16 i_uid;
@@ -135,6 +157,9 @@ struct erofs_inode_extended {
 
 		/* for device files, used to indicate old/new device # */
 		__le32 rdev;
+
+		/* for chunk-based files, it contains the summary info */
+		struct erofs_inode_chunk_info c;
 	} i_u;
 
 	/* only used for 32-bit stat compatibility */
@@ -204,6 +229,19 @@ static inline unsigned int erofs_xattr_entry_size(struct erofs_xattr_entry *e)
 				 e->e_name_len + le16_to_cpu(e->e_value_size));
 }
 
+/* represent a zeroed chunk (hole) */
+#define EROFS_NULL_ADDR			-1
+
+/* 4-byte block address array */
+#define EROFS_BLOCK_MAP_ENTRY_SIZE	sizeof(__le32)
+
+/* 8-byte inode chunk indexes */
+struct erofs_inode_chunk_index {
+	__le16 advise;		/* always 0, don't care for now */
+	__le16 device_id;	/* back-end storage id, always 0 for now */
+	__le32 blkaddr;		/* start block address of this inode chunk */
+};
+
 /* maximum supported size of a physical compression cluster */
 #define Z_EROFS_PCLUSTER_MAX_SIZE	(1024 * 1024)
 
@@ -338,9 +376,14 @@ static inline void erofs_check_ondisk_layout_definitions(void)
 	BUILD_BUG_ON(sizeof(struct erofs_inode_extended) != 64);
 	BUILD_BUG_ON(sizeof(struct erofs_xattr_ibody_header) != 12);
 	BUILD_BUG_ON(sizeof(struct erofs_xattr_entry) != 4);
+	BUILD_BUG_ON(sizeof(struct erofs_inode_chunk_info) != 4);
+	BUILD_BUG_ON(sizeof(struct erofs_inode_chunk_index) != 8);
 	BUILD_BUG_ON(sizeof(struct z_erofs_map_header) != 8);
 	BUILD_BUG_ON(sizeof(struct z_erofs_vle_decompressed_index) != 8);
 	BUILD_BUG_ON(sizeof(struct erofs_dirent) != 12);
+	/* keep in sync between 2 index structures for better extendibility */
+	BUILD_BUG_ON(sizeof(struct erofs_inode_chunk_index) !=
+		     sizeof(struct z_erofs_vle_decompressed_index));
 
 	BUILD_BUG_ON(BIT(Z_EROFS_VLE_DI_CLUSTER_TYPE_BITS) <
 		     Z_EROFS_VLE_CLUSTER_TYPE_MAX - 1);
diff --git a/fs/erofs/inode.c b/fs/erofs/inode.c
index aa8a0d770ba3..31ac3a73b390 100644
--- a/fs/erofs/inode.c
+++ b/fs/erofs/inode.c
@@ -2,6 +2,7 @@
 /*
  * Copyright (C) 2017-2018 HUAWEI, Inc.
  *             https://www.huawei.com/
+ * Copyright (C) 2021, Alibaba Cloud
  */
 #include "xattr.h"
 
@@ -122,8 +123,11 @@ static struct page *erofs_read_inode(struct inode *inode,
 		/* total blocks for compressed files */
 		if (erofs_inode_is_data_compressed(vi->datalayout))
 			nblks = le32_to_cpu(die->i_u.compressed_blocks);
-
+		else if (vi->datalayout == EROFS_INODE_CHUNK_BASED)
+			/* fill chunked inode summary info */
+			vi->chunkformat = le16_to_cpu(die->i_u.c.format);
 		kfree(copied);
+		copied = NULL;
 		break;
 	case EROFS_INODE_LAYOUT_COMPACT:
 		vi->inode_isize = sizeof(struct erofs_inode_compact);
@@ -160,6 +164,8 @@ static struct page *erofs_read_inode(struct inode *inode,
 		inode->i_size = le32_to_cpu(dic->i_size);
 		if (erofs_inode_is_data_compressed(vi->datalayout))
 			nblks = le32_to_cpu(dic->i_u.compressed_blocks);
+		else if (vi->datalayout == EROFS_INODE_CHUNK_BASED)
+			vi->chunkformat = le16_to_cpu(dic->i_u.c.format);
 		break;
 	default:
 		erofs_err(inode->i_sb,
@@ -169,11 +175,26 @@ static struct page *erofs_read_inode(struct inode *inode,
 		goto err_out;
 	}
 
+	if (vi->datalayout == EROFS_INODE_CHUNK_BASED) {
+		if (!(vi->chunkformat & EROFS_CHUNK_FORMAT_ALL)) {
+			erofs_err(inode->i_sb,
+				  "unsupported chunk format %x of nid %llu",
+				  vi->chunkformat, vi->nid);
+			err = -EOPNOTSUPP;
+			goto err_out;
+		}
+		vi->chunkbits = LOG_BLOCK_SIZE +
+			(vi->chunkformat & EROFS_CHUNK_FORMAT_BLKBITS_MASK);
+	}
 	inode->i_mtime.tv_sec = inode->i_ctime.tv_sec;
 	inode->i_atime.tv_sec = inode->i_ctime.tv_sec;
 	inode->i_mtime.tv_nsec = inode->i_ctime.tv_nsec;
 	inode->i_atime.tv_nsec = inode->i_ctime.tv_nsec;
 
+	inode->i_flags &= ~S_DAX;
+	if (test_opt(&sbi->ctx, DAX_ALWAYS) && S_ISREG(inode->i_mode) &&
+	    vi->datalayout == EROFS_INODE_FLAT_PLAIN)
+		inode->i_flags |= S_DAX;
 	if (!nblks)
 		/* measure inode.i_blocks as generic filesystems */
 		inode->i_blocks = roundup(inode->i_size, EROFS_BLKSIZ) >> 9;
@@ -247,7 +268,10 @@ static int erofs_fill_inode(struct inode *inode, int isdir)
 	switch (inode->i_mode & S_IFMT) {
 	case S_IFREG:
 		inode->i_op = &erofs_generic_iops;
-		inode->i_fop = &generic_ro_fops;
+		if (erofs_inode_is_data_compressed(vi->datalayout))
+			inode->i_fop = &generic_ro_fops;
+		else
+			inode->i_fop = &erofs_file_fops;
 		break;
 	case S_IFDIR:
 		inode->i_op = &erofs_dir_iops;
@@ -358,6 +382,7 @@ const struct inode_operations erofs_generic_iops = {
 	.getattr = erofs_getattr,
 	.listxattr = erofs_listxattr,
 	.get_acl = erofs_get_acl,
+	.fiemap = erofs_fiemap,
 };
 
 const struct inode_operations erofs_symlink_iops = {
diff --git a/fs/erofs/internal.h b/fs/erofs/internal.h
index 543c2ff97d30..9524e155b38f 100644
--- a/fs/erofs/internal.h
+++ b/fs/erofs/internal.h
@@ -2,6 +2,7 @@
 /*
  * Copyright (C) 2017-2018 HUAWEI, Inc.
  *             https://www.huawei.com/
+ * Copyright (C) 2021, Alibaba Cloud
  */
 #ifndef __EROFS_INTERNAL_H
 #define __EROFS_INTERNAL_H
@@ -15,6 +16,7 @@
 #include <linux/magic.h>
 #include <linux/slab.h>
 #include <linux/vmalloc.h>
+#include <linux/iomap.h>
 #include "erofs_fs.h"
 
 /* redefine pr_fmt "erofs: " */
@@ -83,6 +85,7 @@ struct erofs_sb_info {
 
 	struct erofs_sb_lz4_info lz4;
 #endif	/* CONFIG_EROFS_FS_ZIP */
+	struct dax_device *dax_dev;
 	u32 blocks;
 	u32 meta_blkaddr;
 #ifdef CONFIG_EROFS_FS_XATTR
@@ -115,6 +118,8 @@ struct erofs_sb_info {
 /* Mount flags set via mount options or defaults */
 #define EROFS_MOUNT_XATTR_USER		0x00000010
 #define EROFS_MOUNT_POSIX_ACL		0x00000020
+#define EROFS_MOUNT_DAX_ALWAYS		0x00000040
+#define EROFS_MOUNT_DAX_NEVER		0x00000080
 
 #define clear_opt(ctx, option)	((ctx)->mount_opt &= ~EROFS_MOUNT_##option)
 #define set_opt(ctx, option)	((ctx)->mount_opt |= EROFS_MOUNT_##option)
@@ -257,6 +262,10 @@ struct erofs_inode {
 
 	union {
 		erofs_blk_t raw_blkaddr;
+		struct {
+			unsigned short	chunkformat;
+			unsigned char	chunkbits;
+		};
 #ifdef CONFIG_EROFS_FS_ZIP
 		struct {
 			unsigned short z_advise;
@@ -353,8 +362,15 @@ struct erofs_map_blocks {
 
 /* Flags used by erofs_map_blocks_flatmode() */
 #define EROFS_GET_BLOCKS_RAW    0x0001
+/*
+ * Used to get the exact decompressed length, e.g. fiemap (consider lookback
+ * approach instead if possible since it's more metadata lightweight.)
+ */
+#define EROFS_GET_BLOCKS_FIEMAP	0x0002
 
 /* zmap.c */
+extern const struct iomap_ops z_erofs_iomap_report_ops;
+
 #ifdef CONFIG_EROFS_FS_ZIP
 int z_erofs_fill_inode(struct inode *inode);
 int z_erofs_map_blocks_iter(struct inode *inode,
@@ -371,7 +387,10 @@ static inline int z_erofs_map_blocks_iter(struct inode *inode,
 #endif	/* !CONFIG_EROFS_FS_ZIP */
 
 /* data.c */
+extern const struct file_operations erofs_file_fops;
 struct page *erofs_get_meta_page(struct super_block *sb, erofs_blk_t blkaddr);
+int erofs_fiemap(struct inode *inode, struct fiemap_extent_info *fieinfo,
+		 u64 start, u64 len);
 
 /* inode.c */
 static inline unsigned long erofs_inode_hash(erofs_nid_t nid)
@@ -441,8 +460,7 @@ int __init z_erofs_init_zip_subsystem(void);
 void z_erofs_exit_zip_subsystem(void);
 int erofs_try_to_free_all_cached_pages(struct erofs_sb_info *sbi,
 				       struct erofs_workgroup *egrp);
-int erofs_try_to_free_cached_page(struct address_space *mapping,
-				  struct page *page);
+int erofs_try_to_free_cached_page(struct page *page);
 int z_erofs_load_lz4_config(struct super_block *sb,
 			    struct erofs_super_block *dsb,
 			    struct z_erofs_lz4_cfgs *lz4, int len);
diff --git a/fs/erofs/namei.c b/fs/erofs/namei.c
index a8271ce5e13f..8629e616028c 100644
--- a/fs/erofs/namei.c
+++ b/fs/erofs/namei.c
@@ -245,4 +245,5 @@ const struct inode_operations erofs_dir_iops = {
 	.getattr = erofs_getattr,
 	.listxattr = erofs_listxattr,
 	.get_acl = erofs_get_acl,
+	.fiemap = erofs_fiemap,
 };
diff --git a/fs/erofs/super.c b/fs/erofs/super.c
index 8fc6c04b54f4..11b88559f8bf 100644
--- a/fs/erofs/super.c
+++ b/fs/erofs/super.c
@@ -11,6 +11,7 @@
 #include <linux/crc32c.h>
 #include <linux/fs_context.h>
 #include <linux/fs_parser.h>
+#include <linux/dax.h>
 #include "xattr.h"
 
 #define CREATE_TRACE_POINTS
@@ -355,6 +356,8 @@ enum {
 	Opt_user_xattr,
 	Opt_acl,
 	Opt_cache_strategy,
+	Opt_dax,
+	Opt_dax_enum,
 	Opt_err
 };
 
@@ -365,14 +368,47 @@ static const struct constant_table erofs_param_cache_strategy[] = {
 	{}
 };
 
+static const struct constant_table erofs_dax_param_enums[] = {
+	{"always",	EROFS_MOUNT_DAX_ALWAYS},
+	{"never",	EROFS_MOUNT_DAX_NEVER},
+	{}
+};
+
 static const struct fs_parameter_spec erofs_fs_parameters[] = {
 	fsparam_flag_no("user_xattr",	Opt_user_xattr),
 	fsparam_flag_no("acl",		Opt_acl),
 	fsparam_enum("cache_strategy",	Opt_cache_strategy,
 		     erofs_param_cache_strategy),
+	fsparam_flag("dax",             Opt_dax),
+	fsparam_enum("dax",		Opt_dax_enum, erofs_dax_param_enums),
 	{}
 };
 
+static bool erofs_fc_set_dax_mode(struct fs_context *fc, unsigned int mode)
+{
+#ifdef CONFIG_FS_DAX
+	struct erofs_fs_context *ctx = fc->fs_private;
+
+	switch (mode) {
+	case EROFS_MOUNT_DAX_ALWAYS:
+		warnfc(fc, "DAX enabled. Warning: EXPERIMENTAL, use at your own risk");
+		set_opt(ctx, DAX_ALWAYS);
+		clear_opt(ctx, DAX_NEVER);
+		return true;
+	case EROFS_MOUNT_DAX_NEVER:
+		set_opt(ctx, DAX_NEVER);
+		clear_opt(ctx, DAX_ALWAYS);
+		return true;
+	default:
+		DBG_BUGON(1);
+		return false;
+	}
+#else
+	errorfc(fc, "dax options not supported");
+	return false;
+#endif
+}
+
 static int erofs_fc_parse_param(struct fs_context *fc,
 				struct fs_parameter *param)
 {
@@ -412,6 +448,14 @@ static int erofs_fc_parse_param(struct fs_context *fc,
 		errorfc(fc, "compression not supported, cache_strategy ignored");
 #endif
 		break;
+	case Opt_dax:
+		if (!erofs_fc_set_dax_mode(fc, EROFS_MOUNT_DAX_ALWAYS))
+			return -EINVAL;
+		break;
+	case Opt_dax_enum:
+		if (!erofs_fc_set_dax_mode(fc, result.uint_32))
+			return -EINVAL;
+		break;
 	default:
 		return -ENOPARAM;
 	}
@@ -430,7 +474,7 @@ static int erofs_managed_cache_releasepage(struct page *page, gfp_t gfp_mask)
 	DBG_BUGON(mapping->a_ops != &managed_cache_aops);
 
 	if (PagePrivate(page))
-		ret = erofs_try_to_free_cached_page(mapping, page);
+		ret = erofs_try_to_free_cached_page(page);
 
 	return ret;
 }
@@ -496,10 +540,16 @@ static int erofs_fc_fill_super(struct super_block *sb, struct fs_context *fc)
 		return -ENOMEM;
 
 	sb->s_fs_info = sbi;
+	sbi->dax_dev = fs_dax_get_by_bdev(sb->s_bdev);
 	err = erofs_read_superblock(sb);
 	if (err)
 		return err;
 
+	if (test_opt(ctx, DAX_ALWAYS) &&
+	    !dax_supported(sbi->dax_dev, sb->s_bdev, EROFS_BLKSIZ, 0, bdev_nr_sectors(sb->s_bdev))) {
+		errorfc(fc, "DAX unsupported by block device. Turning off DAX.");
+		clear_opt(ctx, DAX_ALWAYS);
+	}
 	sb->s_flags |= SB_RDONLY | SB_NOATIME;
 	sb->s_maxbytes = MAX_LFS_FILESIZE;
 	sb->s_time_gran = 1;
@@ -609,6 +659,7 @@ static void erofs_kill_sb(struct super_block *sb)
 	sbi = EROFS_SB(sb);
 	if (!sbi)
 		return;
+	fs_put_dax(sbi->dax_dev);
 	kfree(sbi);
 	sb->s_fs_info = NULL;
 }
@@ -711,8 +762,8 @@ static int erofs_statfs(struct dentry *dentry, struct kstatfs *buf)
 
 static int erofs_show_options(struct seq_file *seq, struct dentry *root)
 {
-	struct erofs_sb_info *sbi __maybe_unused = EROFS_SB(root->d_sb);
-	struct erofs_fs_context *ctx __maybe_unused = &sbi->ctx;
+	struct erofs_sb_info *sbi = EROFS_SB(root->d_sb);
+	struct erofs_fs_context *ctx = &sbi->ctx;
 
 #ifdef CONFIG_EROFS_FS_XATTR
 	if (test_opt(ctx, XATTR_USER))
@@ -734,6 +785,10 @@ static int erofs_show_options(struct seq_file *seq, struct dentry *root)
 	else if (ctx->cache_strategy == EROFS_ZIP_CACHE_READAROUND)
 		seq_puts(seq, ",cache_strategy=readaround");
 #endif
+	if (test_opt(ctx, DAX_ALWAYS))
+		seq_puts(seq, ",dax=always");
+	if (test_opt(ctx, DAX_NEVER))
+		seq_puts(seq, ",dax=never");
 	return 0;
 }
 
diff --git a/fs/erofs/xattr.c b/fs/erofs/xattr.c
index 8dd54b420a1d..778f2c52295d 100644
--- a/fs/erofs/xattr.c
+++ b/fs/erofs/xattr.c
@@ -673,12 +673,15 @@ ssize_t erofs_listxattr(struct dentry *dentry,
 }
 
 #ifdef CONFIG_EROFS_FS_POSIX_ACL
-struct posix_acl *erofs_get_acl(struct inode *inode, int type)
+struct posix_acl *erofs_get_acl(struct inode *inode, int type, bool rcu)
 {
 	struct posix_acl *acl;
 	int prefix, rc;
 	char *value = NULL;
 
+	if (rcu)
+		return ERR_PTR(-ECHILD);
+
 	switch (type) {
 	case ACL_TYPE_ACCESS:
 		prefix = EROFS_XATTR_INDEX_POSIX_ACL_ACCESS;
diff --git a/fs/erofs/xattr.h b/fs/erofs/xattr.h
index 366dcb400525..94090c74b3f7 100644
--- a/fs/erofs/xattr.h
+++ b/fs/erofs/xattr.h
@@ -80,7 +80,7 @@ static inline int erofs_getxattr(struct inode *inode, int index,
 #endif	/* !CONFIG_EROFS_FS_XATTR */
 
 #ifdef CONFIG_EROFS_FS_POSIX_ACL
-struct posix_acl *erofs_get_acl(struct inode *inode, int type);
+struct posix_acl *erofs_get_acl(struct inode *inode, int type, bool rcu);
 #else
 #define erofs_get_acl	(NULL)
 #endif
diff --git a/fs/erofs/zdata.c b/fs/erofs/zdata.c
index cb4d0889eca9..11c7a1aaebad 100644
--- a/fs/erofs/zdata.c
+++ b/fs/erofs/zdata.c
@@ -309,7 +309,6 @@ int erofs_try_to_free_all_cached_pages(struct erofs_sb_info *sbi,
 {
 	struct z_erofs_pcluster *const pcl =
 		container_of(grp, struct z_erofs_pcluster, obj);
-	struct address_space *const mapping = MNGD_MAPPING(sbi);
 	int i;
 
 	/*
@@ -326,7 +325,7 @@ int erofs_try_to_free_all_cached_pages(struct erofs_sb_info *sbi,
 		if (!trylock_page(page))
 			return -EBUSY;
 
-		if (page->mapping != mapping)
+		if (!erofs_page_is_managed(sbi, page))
 			continue;
 
 		/* barrier is implied in the following 'unlock_page' */
@@ -337,8 +336,7 @@ int erofs_try_to_free_all_cached_pages(struct erofs_sb_info *sbi,
 	return 0;
 }
 
-int erofs_try_to_free_cached_page(struct address_space *mapping,
-				  struct page *page)
+int erofs_try_to_free_cached_page(struct page *page)
 {
 	struct z_erofs_pcluster *const pcl = (void *)page_private(page);
 	int ret = 0;	/* 0 - busy */
diff --git a/fs/erofs/zmap.c b/fs/erofs/zmap.c
index f68aea4baed7..9fb98d85a3ce 100644
--- a/fs/erofs/zmap.c
+++ b/fs/erofs/zmap.c
@@ -212,9 +212,34 @@ static unsigned int decode_compactedbits(unsigned int lobits,
 	return lo;
 }
 
+static int get_compacted_la_distance(unsigned int lclusterbits,
+				     unsigned int encodebits,
+				     unsigned int vcnt, u8 *in, int i)
+{
+	const unsigned int lomask = (1 << lclusterbits) - 1;
+	unsigned int lo, d1 = 0;
+	u8 type;
+
+	DBG_BUGON(i >= vcnt);
+
+	do {
+		lo = decode_compactedbits(lclusterbits, lomask,
+					  in, encodebits * i, &type);
+
+		if (type != Z_EROFS_VLE_CLUSTER_TYPE_NONHEAD)
+			return d1;
+		++d1;
+	} while (++i < vcnt);
+
+	/* vcnt - 1 (Z_EROFS_VLE_CLUSTER_TYPE_NONHEAD) item */
+	if (!(lo & Z_EROFS_VLE_DI_D0_CBLKCNT))
+		d1 += lo - 1;
+	return d1;
+}
+
 static int unpack_compacted_index(struct z_erofs_maprecorder *m,
 				  unsigned int amortizedshift,
-				  unsigned int eofs)
+				  unsigned int eofs, bool lookahead)
 {
 	struct erofs_inode *const vi = EROFS_I(m->inode);
 	const unsigned int lclusterbits = vi->z_logical_clusterbits;
@@ -243,6 +268,11 @@ static int unpack_compacted_index(struct z_erofs_maprecorder *m,
 	m->type = type;
 	if (type == Z_EROFS_VLE_CLUSTER_TYPE_NONHEAD) {
 		m->clusterofs = 1 << lclusterbits;
+
+		/* figure out lookahead_distance: delta[1] if needed */
+		if (lookahead)
+			m->delta[1] = get_compacted_la_distance(lclusterbits,
+						encodebits, vcnt, in, i);
 		if (lo & Z_EROFS_VLE_DI_D0_CBLKCNT) {
 			if (!big_pcluster) {
 				DBG_BUGON(1);
@@ -313,7 +343,7 @@ static int unpack_compacted_index(struct z_erofs_maprecorder *m,
 }
 
 static int compacted_load_cluster_from_disk(struct z_erofs_maprecorder *m,
-					    unsigned long lcn)
+					    unsigned long lcn, bool lookahead)
 {
 	struct inode *const inode = m->inode;
 	struct erofs_inode *const vi = EROFS_I(inode);
@@ -364,11 +394,12 @@ out:
 	err = z_erofs_reload_indexes(m, erofs_blknr(pos));
 	if (err)
 		return err;
-	return unpack_compacted_index(m, amortizedshift, erofs_blkoff(pos));
+	return unpack_compacted_index(m, amortizedshift, erofs_blkoff(pos),
+				      lookahead);
 }
 
 static int z_erofs_load_cluster_from_disk(struct z_erofs_maprecorder *m,
-					  unsigned int lcn)
+					  unsigned int lcn, bool lookahead)
 {
 	const unsigned int datamode = EROFS_I(m->inode)->datalayout;
 
@@ -376,7 +407,7 @@ static int z_erofs_load_cluster_from_disk(struct z_erofs_maprecorder *m,
 		return legacy_load_cluster_from_disk(m, lcn);
 
 	if (datamode == EROFS_INODE_FLAT_COMPRESSION)
-		return compacted_load_cluster_from_disk(m, lcn);
+		return compacted_load_cluster_from_disk(m, lcn, lookahead);
 
 	return -EINVAL;
 }
@@ -399,7 +430,7 @@ static int z_erofs_extent_lookback(struct z_erofs_maprecorder *m,
 
 	/* load extent head logical cluster if needed */
 	lcn -= lookback_distance;
-	err = z_erofs_load_cluster_from_disk(m, lcn);
+	err = z_erofs_load_cluster_from_disk(m, lcn, false);
 	if (err)
 		return err;
 
@@ -450,7 +481,7 @@ static int z_erofs_get_extent_compressedlen(struct z_erofs_maprecorder *m,
 	if (m->compressedlcs)
 		goto out;
 
-	err = z_erofs_load_cluster_from_disk(m, lcn);
+	err = z_erofs_load_cluster_from_disk(m, lcn, false);
 	if (err)
 		return err;
 
@@ -498,6 +529,48 @@ err_bonus_cblkcnt:
 	return -EFSCORRUPTED;
 }
 
+static int z_erofs_get_extent_decompressedlen(struct z_erofs_maprecorder *m)
+{
+	struct inode *inode = m->inode;
+	struct erofs_inode *vi = EROFS_I(inode);
+	struct erofs_map_blocks *map = m->map;
+	unsigned int lclusterbits = vi->z_logical_clusterbits;
+	u64 lcn = m->lcn, headlcn = map->m_la >> lclusterbits;
+	int err;
+
+	do {
+		/* handle the last EOF pcluster (no next HEAD lcluster) */
+		if ((lcn << lclusterbits) >= inode->i_size) {
+			map->m_llen = inode->i_size - map->m_la;
+			return 0;
+		}
+
+		err = z_erofs_load_cluster_from_disk(m, lcn, true);
+		if (err)
+			return err;
+
+		if (m->type == Z_EROFS_VLE_CLUSTER_TYPE_NONHEAD) {
+			DBG_BUGON(!m->delta[1] &&
+				  m->clusterofs != 1 << lclusterbits);
+		} else if (m->type == Z_EROFS_VLE_CLUSTER_TYPE_PLAIN ||
+			   m->type == Z_EROFS_VLE_CLUSTER_TYPE_HEAD) {
+			/* go on until the next HEAD lcluster */
+			if (lcn != headlcn)
+				break;
+			m->delta[1] = 1;
+		} else {
+			erofs_err(inode->i_sb, "unknown type %u @ lcn %llu of nid %llu",
+				  m->type, lcn, vi->nid);
+			DBG_BUGON(1);
+			return -EOPNOTSUPP;
+		}
+		lcn += m->delta[1];
+	} while (m->delta[1]);
+
+	map->m_llen = (lcn << lclusterbits) + m->clusterofs - map->m_la;
+	return 0;
+}
+
 int z_erofs_map_blocks_iter(struct inode *inode,
 			    struct erofs_map_blocks *map,
 			    int flags)
@@ -531,7 +604,7 @@ int z_erofs_map_blocks_iter(struct inode *inode,
 	initial_lcn = ofs >> lclusterbits;
 	endoff = ofs & ((1 << lclusterbits) - 1);
 
-	err = z_erofs_load_cluster_from_disk(&m, initial_lcn);
+	err = z_erofs_load_cluster_from_disk(&m, initial_lcn, false);
 	if (err)
 		goto unmap_out;
 
@@ -581,6 +654,12 @@ int z_erofs_map_blocks_iter(struct inode *inode,
 	err = z_erofs_get_extent_compressedlen(&m, initial_lcn);
 	if (err)
 		goto out;
+
+	if (flags & EROFS_GET_BLOCKS_FIEMAP) {
+		err = z_erofs_get_extent_decompressedlen(&m);
+		if (!err)
+			map->m_flags |= EROFS_MAP_FULL_MAPPED;
+	}
 unmap_out:
 	if (m.kaddr)
 		kunmap_atomic(m.kaddr);
@@ -596,3 +675,41 @@ out:
 	DBG_BUGON(err < 0 && err != -ENOMEM);
 	return err;
 }
+
+static int z_erofs_iomap_begin_report(struct inode *inode, loff_t offset,
+				loff_t length, unsigned int flags,
+				struct iomap *iomap, struct iomap *srcmap)
+{
+	int ret;
+	struct erofs_map_blocks map = { .m_la = offset };
+
+	ret = z_erofs_map_blocks_iter(inode, &map, EROFS_GET_BLOCKS_FIEMAP);
+	if (map.mpage)
+		put_page(map.mpage);
+	if (ret < 0)
+		return ret;
+
+	iomap->bdev = inode->i_sb->s_bdev;
+	iomap->offset = map.m_la;
+	iomap->length = map.m_llen;
+	if (map.m_flags & EROFS_MAP_MAPPED) {
+		iomap->type = IOMAP_MAPPED;
+		iomap->addr = map.m_pa;
+	} else {
+		iomap->type = IOMAP_HOLE;
+		iomap->addr = IOMAP_NULL_ADDR;
+		/*
+		 * No strict rule how to describe extents for post EOF, yet
+		 * we need do like below. Otherwise, iomap itself will get
+		 * into an endless loop on post EOF.
+		 */
+		if (iomap->offset >= inode->i_size)
+			iomap->length = length + map.m_la - offset;
+	}
+	iomap->flags = 0;
+	return 0;
+}
+
+const struct iomap_ops z_erofs_iomap_report_ops = {
+	.iomap_begin = z_erofs_iomap_begin_report,
+};
diff --git a/fs/eventpoll.c b/fs/eventpoll.c
index 1e596e1d0bba..06f4c5ae1451 100644
--- a/fs/eventpoll.c
+++ b/fs/eventpoll.c
@@ -723,7 +723,7 @@ static int ep_remove(struct eventpoll *ep, struct epitem *epi)
 	 */
 	call_rcu(&epi->rcu, epi_rcu_free);
 
-	atomic_long_dec(&ep->user->epoll_watches);
+	percpu_counter_dec(&ep->user->epoll_watches);
 
 	return 0;
 }
@@ -1439,7 +1439,6 @@ static int ep_insert(struct eventpoll *ep, const struct epoll_event *event,
 {
 	int error, pwake = 0;
 	__poll_t revents;
-	long user_watches;
 	struct epitem *epi;
 	struct ep_pqueue epq;
 	struct eventpoll *tep = NULL;
@@ -1449,11 +1448,15 @@ static int ep_insert(struct eventpoll *ep, const struct epoll_event *event,
 
 	lockdep_assert_irqs_enabled();
 
-	user_watches = atomic_long_read(&ep->user->epoll_watches);
-	if (unlikely(user_watches >= max_user_watches))
+	if (unlikely(percpu_counter_compare(&ep->user->epoll_watches,
+					    max_user_watches) >= 0))
 		return -ENOSPC;
-	if (!(epi = kmem_cache_zalloc(epi_cache, GFP_KERNEL)))
+	percpu_counter_inc(&ep->user->epoll_watches);
+
+	if (!(epi = kmem_cache_zalloc(epi_cache, GFP_KERNEL))) {
+		percpu_counter_dec(&ep->user->epoll_watches);
 		return -ENOMEM;
+	}
 
 	/* Item initialization follow here ... */
 	INIT_LIST_HEAD(&epi->rdllink);
@@ -1466,17 +1469,16 @@ static int ep_insert(struct eventpoll *ep, const struct epoll_event *event,
 		mutex_lock_nested(&tep->mtx, 1);
 	/* Add the current item to the list of active epoll hook for this file */
 	if (unlikely(attach_epitem(tfile, epi) < 0)) {
-		kmem_cache_free(epi_cache, epi);
 		if (tep)
 			mutex_unlock(&tep->mtx);
+		kmem_cache_free(epi_cache, epi);
+		percpu_counter_dec(&ep->user->epoll_watches);
 		return -ENOMEM;
 	}
 
 	if (full_check && !tep)
 		list_file(tfile);
 
-	atomic_long_inc(&ep->user->epoll_watches);
-
 	/*
 	 * Add the current item to the RB tree. All RB tree operations are
 	 * protected by "mtx", and ep_insert() is called with "mtx" held.
@@ -1684,8 +1686,8 @@ static int ep_send_events(struct eventpoll *ep,
 		if (!revents)
 			continue;
 
-		if (__put_user(revents, &events->events) ||
-		    __put_user(epi->event.data, &events->data)) {
+		events = epoll_put_uevent(revents, epi->event.data, events);
+		if (!events) {
 			list_add(&epi->rdllink, &txlist);
 			ep_pm_stay_awake(epi);
 			if (!res)
@@ -1693,7 +1695,6 @@ static int ep_send_events(struct eventpoll *ep,
 			break;
 		}
 		res++;
-		events++;
 		if (epi->event.events & EPOLLONESHOT)
 			epi->event.events &= EP_PRIVATE_BITS;
 		else if (!(epi->event.events & EPOLLET)) {
diff --git a/fs/exec.c b/fs/exec.c
index 3b78b22addfb..a098c133d8d7 100644
--- a/fs/exec.c
+++ b/fs/exec.c
@@ -217,8 +217,10 @@ static struct page *get_arg_page(struct linux_binprm *bprm, unsigned long pos,
 	 * We are doing an exec().  'current' is the process
 	 * doing the exec and bprm->mm is the new process's mm.
 	 */
+	mmap_read_lock(bprm->mm);
 	ret = get_user_pages_remote(bprm->mm, pos, 1, gup_flags,
 			&page, NULL, NULL);
+	mmap_read_unlock(bprm->mm);
 	if (ret <= 0)
 		return NULL;
 
@@ -574,7 +576,7 @@ static int copy_strings(int argc, struct user_arg_ptr argv,
 				}
 
 				if (kmapped_page) {
-					flush_kernel_dcache_page(kmapped_page);
+					flush_dcache_page(kmapped_page);
 					kunmap(kmapped_page);
 					put_arg_page(kmapped_page);
 				}
@@ -592,7 +594,7 @@ static int copy_strings(int argc, struct user_arg_ptr argv,
 	ret = 0;
 out:
 	if (kmapped_page) {
-		flush_kernel_dcache_page(kmapped_page);
+		flush_dcache_page(kmapped_page);
 		kunmap(kmapped_page);
 		put_arg_page(kmapped_page);
 	}
@@ -634,7 +636,7 @@ int copy_string_kernel(const char *arg, struct linux_binprm *bprm)
 		kaddr = kmap_atomic(page);
 		flush_arg_page(bprm, pos & PAGE_MASK, page);
 		memcpy(kaddr + offset_in_page(pos), arg, bytes_to_copy);
-		flush_kernel_dcache_page(page);
+		flush_dcache_page(page);
 		kunmap_atomic(kaddr);
 		put_arg_page(page);
 	}
@@ -1270,7 +1272,9 @@ int begin_new_exec(struct linux_binprm * bprm)
 	 * not visibile until then. This also enables the update
 	 * to be lockless.
 	 */
-	set_mm_exe_file(bprm->mm, bprm->file);
+	retval = set_mm_exe_file(bprm->mm, bprm->file);
+	if (retval)
+		goto out;
 
 	/* If the binary is not readable then enforce mm->dumpable=0 */
 	would_dump(bprm, bprm->file);
diff --git a/fs/ext2/acl.c b/fs/ext2/acl.c
index b9a9db98e94b..bf298967c5b8 100644
--- a/fs/ext2/acl.c
+++ b/fs/ext2/acl.c
@@ -141,13 +141,16 @@ fail:
  * inode->i_mutex: don't care
  */
 struct posix_acl *
-ext2_get_acl(struct inode *inode, int type)
+ext2_get_acl(struct inode *inode, int type, bool rcu)
 {
 	int name_index;
 	char *value = NULL;
 	struct posix_acl *acl;
 	int retval;
 
+	if (rcu)
+		return ERR_PTR(-ECHILD);
+
 	switch (type) {
 	case ACL_TYPE_ACCESS:
 		name_index = EXT2_XATTR_INDEX_POSIX_ACL_ACCESS;
diff --git a/fs/ext2/acl.h b/fs/ext2/acl.h
index 917db5f6630a..925ab6287d35 100644
--- a/fs/ext2/acl.h
+++ b/fs/ext2/acl.h
@@ -55,7 +55,7 @@ static inline int ext2_acl_count(size_t size)
 #ifdef CONFIG_EXT2_FS_POSIX_ACL
 
 /* acl.c */
-extern struct posix_acl *ext2_get_acl(struct inode *inode, int type);
+extern struct posix_acl *ext2_get_acl(struct inode *inode, int type, bool rcu);
 extern int ext2_set_acl(struct user_namespace *mnt_userns, struct inode *inode,
 			struct posix_acl *acl, int type);
 extern int ext2_init_acl (struct inode *, struct inode *);
diff --git a/fs/ext2/super.c b/fs/ext2/super.c
index 987bcf32ed46..d8d580b609ba 100644
--- a/fs/ext2/super.c
+++ b/fs/ext2/super.c
@@ -946,7 +946,8 @@ static int ext2_fill_super(struct super_block *sb, void *data, int silent)
 	blocksize = BLOCK_SIZE << le32_to_cpu(sbi->s_es->s_log_block_size);
 
 	if (test_opt(sb, DAX)) {
-		if (!bdev_dax_supported(sb->s_bdev, blocksize)) {
+		if (!dax_supported(dax_dev, sb->s_bdev, blocksize, 0,
+				bdev_nr_sectors(sb->s_bdev))) {
 			ext2_msg(sb, KERN_ERR,
 				"DAX unsupported by block device. Turning off DAX.");
 			clear_opt(sbi->s_mount_opt, DAX);
diff --git a/fs/ext4/Makefile b/fs/ext4/Makefile
index 49e7af6cc93f..7d89142e1421 100644
--- a/fs/ext4/Makefile
+++ b/fs/ext4/Makefile
@@ -10,7 +10,7 @@ ext4-y	:= balloc.o bitmap.o block_validity.o dir.o ext4_jbd2.o extents.o \
 		indirect.o inline.o inode.o ioctl.o mballoc.o migrate.o \
 		mmp.o move_extent.o namei.o page-io.o readpage.o resize.o \
 		super.o symlink.o sysfs.o xattr.o xattr_hurd.o xattr_trusted.o \
-		xattr_user.o fast_commit.o
+		xattr_user.o fast_commit.o orphan.o
 
 ext4-$(CONFIG_EXT4_FS_POSIX_ACL)	+= acl.o
 ext4-$(CONFIG_EXT4_FS_SECURITY)		+= xattr_security.o
diff --git a/fs/ext4/acl.c b/fs/ext4/acl.c
index c5eaffccecc3..0613dfcbfd4a 100644
--- a/fs/ext4/acl.c
+++ b/fs/ext4/acl.c
@@ -142,13 +142,16 @@ fail:
  * inode->i_mutex: don't care
  */
 struct posix_acl *
-ext4_get_acl(struct inode *inode, int type)
+ext4_get_acl(struct inode *inode, int type, bool rcu)
 {
 	int name_index;
 	char *value = NULL;
 	struct posix_acl *acl;
 	int retval;
 
+	if (rcu)
+		return ERR_PTR(-ECHILD);
+
 	switch (type) {
 	case ACL_TYPE_ACCESS:
 		name_index = EXT4_XATTR_INDEX_POSIX_ACL_ACCESS;
diff --git a/fs/ext4/acl.h b/fs/ext4/acl.h
index 84b8942a57f2..3219669732bf 100644
--- a/fs/ext4/acl.h
+++ b/fs/ext4/acl.h
@@ -55,7 +55,7 @@ static inline int ext4_acl_count(size_t size)
 #ifdef CONFIG_EXT4_FS_POSIX_ACL
 
 /* acl.c */
-struct posix_acl *ext4_get_acl(struct inode *inode, int type);
+struct posix_acl *ext4_get_acl(struct inode *inode, int type, bool rcu);
 int ext4_set_acl(struct user_namespace *mnt_userns, struct inode *inode,
 		 struct posix_acl *acl, int type);
 extern int ext4_init_acl(handle_t *, struct inode *, struct inode *);
diff --git a/fs/ext4/balloc.c b/fs/ext4/balloc.c
index 9dc6e74b265c..a0fb0c4bdc7c 100644
--- a/fs/ext4/balloc.c
+++ b/fs/ext4/balloc.c
@@ -652,8 +652,14 @@ int ext4_should_retry_alloc(struct super_block *sb, int *retries)
 	 * possible we just missed a transaction commit that did so
 	 */
 	smp_mb();
-	if (sbi->s_mb_free_pending == 0)
+	if (sbi->s_mb_free_pending == 0) {
+		if (test_opt(sb, DISCARD)) {
+			atomic_inc(&sbi->s_retry_alloc_pending);
+			flush_work(&sbi->s_discard_work);
+			atomic_dec(&sbi->s_retry_alloc_pending);
+		}
 		return ext4_has_free_clusters(sbi, 1, 0);
+	}
 
 	/*
 	 * it's possible we've just missed a transaction commit here,
diff --git a/fs/ext4/ext4.h b/fs/ext4/ext4.h
index 7ebaf66b6e31..90ff5acaf11f 100644
--- a/fs/ext4/ext4.h
+++ b/fs/ext4/ext4.h
@@ -1034,7 +1034,14 @@ struct ext4_inode_info {
 	 */
 	struct rw_semaphore xattr_sem;
 
-	struct list_head i_orphan;	/* unlinked but open inodes */
+	/*
+	 * Inodes with EXT4_STATE_ORPHAN_FILE use i_orphan_idx. Otherwise
+	 * i_orphan is used.
+	 */
+	union {
+		struct list_head i_orphan;	/* unlinked but open inodes */
+		unsigned int i_orphan_idx;	/* Index in orphan file */
+	};
 
 	/* Fast commit related info */
 
@@ -1419,7 +1426,8 @@ struct ext4_super_block {
 	__u8    s_last_error_errcode;
 	__le16  s_encoding;		/* Filename charset encoding */
 	__le16  s_encoding_flags;	/* Filename charset encoding flags */
-	__le32	s_reserved[95];		/* Padding to the end of the block */
+	__le32  s_orphan_file_inum;	/* Inode for tracking orphan inodes */
+	__le32	s_reserved[94];		/* Padding to the end of the block */
 	__le32	s_checksum;		/* crc32c(superblock) */
 };
 
@@ -1438,6 +1446,54 @@ struct ext4_super_block {
 
 #define EXT4_ENC_UTF8_12_1	1
 
+/* Types of ext4 journal triggers */
+enum ext4_journal_trigger_type {
+	EXT4_JTR_ORPHAN_FILE,
+	EXT4_JTR_NONE	/* This must be the last entry for indexing to work! */
+};
+
+#define EXT4_JOURNAL_TRIGGER_COUNT EXT4_JTR_NONE
+
+struct ext4_journal_trigger {
+	struct jbd2_buffer_trigger_type tr_triggers;
+	struct super_block *sb;
+};
+
+static inline struct ext4_journal_trigger *EXT4_TRIGGER(
+				struct jbd2_buffer_trigger_type *trigger)
+{
+	return container_of(trigger, struct ext4_journal_trigger, tr_triggers);
+}
+
+#define EXT4_ORPHAN_BLOCK_MAGIC 0x0b10ca04
+
+/* Structure at the tail of orphan block */
+struct ext4_orphan_block_tail {
+	__le32 ob_magic;
+	__le32 ob_checksum;
+};
+
+static inline int ext4_inodes_per_orphan_block(struct super_block *sb)
+{
+	return (sb->s_blocksize - sizeof(struct ext4_orphan_block_tail)) /
+			sizeof(u32);
+}
+
+struct ext4_orphan_block {
+	atomic_t ob_free_entries;	/* Number of free orphan entries in block */
+	struct buffer_head *ob_bh;	/* Buffer for orphan block */
+};
+
+/*
+ * Info about orphan file.
+ */
+struct ext4_orphan_info {
+	int of_blocks;			/* Number of orphan blocks in a file */
+	__u32 of_csum_seed;		/* Checksum seed for orphan file */
+	struct ext4_orphan_block *of_binfo;	/* Array with info about orphan
+						 * file blocks */
+};
+
 /*
  * fourth extended-fs super-block data in memory
  */
@@ -1492,9 +1548,11 @@ struct ext4_sb_info {
 
 	/* Journaling */
 	struct journal_s *s_journal;
-	struct list_head s_orphan;
-	struct mutex s_orphan_lock;
 	unsigned long s_ext4_flags;		/* Ext4 superblock flags */
+	struct mutex s_orphan_lock;	/* Protects on disk list changes */
+	struct list_head s_orphan;	/* List of orphaned inodes in on disk
+					   list */
+	struct ext4_orphan_info s_orphan_info;
 	unsigned long s_commit_interval;
 	u32 s_max_batch_time;
 	u32 s_min_batch_time;
@@ -1527,6 +1585,9 @@ struct ext4_sb_info {
 	unsigned int s_mb_free_pending;
 	struct list_head s_freed_data_list;	/* List of blocks to be freed
 						   after commit completed */
+	struct list_head s_discard_list;
+	struct work_struct s_discard_work;
+	atomic_t s_retry_alloc_pending;
 	struct rb_root s_mb_avg_fragment_size_root;
 	rwlock_t s_mb_rb_lock;
 	struct list_head *s_mb_largest_free_orders;
@@ -1616,6 +1677,9 @@ struct ext4_sb_info {
 	struct mb_cache *s_ea_inode_cache;
 	spinlock_t s_es_lock ____cacheline_aligned_in_smp;
 
+	/* Journal triggers for checksum computation */
+	struct ext4_journal_trigger s_journal_triggers[EXT4_JOURNAL_TRIGGER_COUNT];
+
 	/* Ratelimit ext4 messages. */
 	struct ratelimit_state s_err_ratelimit_state;
 	struct ratelimit_state s_warning_ratelimit_state;
@@ -1826,6 +1890,7 @@ enum {
 	EXT4_STATE_LUSTRE_EA_INODE,	/* Lustre-style ea_inode */
 	EXT4_STATE_VERITY_IN_PROGRESS,	/* building fs-verity Merkle tree */
 	EXT4_STATE_FC_COMMITTING,	/* Fast commit ongoing */
+	EXT4_STATE_ORPHAN_FILE,		/* Inode orphaned in orphan file */
 };
 
 #define EXT4_INODE_BIT_FNS(name, field, offset)				\
@@ -1927,6 +1992,7 @@ static inline bool ext4_verity_in_progress(struct inode *inode)
  */
 #define EXT4_FEATURE_COMPAT_FAST_COMMIT		0x0400
 #define EXT4_FEATURE_COMPAT_STABLE_INODES	0x0800
+#define EXT4_FEATURE_COMPAT_ORPHAN_FILE		0x1000	/* Orphan file exists */
 
 #define EXT4_FEATURE_RO_COMPAT_SPARSE_SUPER	0x0001
 #define EXT4_FEATURE_RO_COMPAT_LARGE_FILE	0x0002
@@ -1947,6 +2013,8 @@ static inline bool ext4_verity_in_progress(struct inode *inode)
 #define EXT4_FEATURE_RO_COMPAT_READONLY		0x1000
 #define EXT4_FEATURE_RO_COMPAT_PROJECT		0x2000
 #define EXT4_FEATURE_RO_COMPAT_VERITY		0x8000
+#define EXT4_FEATURE_RO_COMPAT_ORPHAN_PRESENT	0x10000 /* Orphan file may be
+							   non-empty */
 
 #define EXT4_FEATURE_INCOMPAT_COMPRESSION	0x0001
 #define EXT4_FEATURE_INCOMPAT_FILETYPE		0x0002
@@ -2030,6 +2098,7 @@ EXT4_FEATURE_COMPAT_FUNCS(dir_index,		DIR_INDEX)
 EXT4_FEATURE_COMPAT_FUNCS(sparse_super2,	SPARSE_SUPER2)
 EXT4_FEATURE_COMPAT_FUNCS(fast_commit,		FAST_COMMIT)
 EXT4_FEATURE_COMPAT_FUNCS(stable_inodes,	STABLE_INODES)
+EXT4_FEATURE_COMPAT_FUNCS(orphan_file,		ORPHAN_FILE)
 
 EXT4_FEATURE_RO_COMPAT_FUNCS(sparse_super,	SPARSE_SUPER)
 EXT4_FEATURE_RO_COMPAT_FUNCS(large_file,	LARGE_FILE)
@@ -2044,6 +2113,7 @@ EXT4_FEATURE_RO_COMPAT_FUNCS(metadata_csum,	METADATA_CSUM)
 EXT4_FEATURE_RO_COMPAT_FUNCS(readonly,		READONLY)
 EXT4_FEATURE_RO_COMPAT_FUNCS(project,		PROJECT)
 EXT4_FEATURE_RO_COMPAT_FUNCS(verity,		VERITY)
+EXT4_FEATURE_RO_COMPAT_FUNCS(orphan_present,	ORPHAN_PRESENT)
 
 EXT4_FEATURE_INCOMPAT_FUNCS(compression,	COMPRESSION)
 EXT4_FEATURE_INCOMPAT_FUNCS(filetype,		FILETYPE)
@@ -2077,7 +2147,8 @@ EXT4_FEATURE_INCOMPAT_FUNCS(casefold,		CASEFOLD)
 					 EXT4_FEATURE_RO_COMPAT_LARGE_FILE| \
 					 EXT4_FEATURE_RO_COMPAT_BTREE_DIR)
 
-#define EXT4_FEATURE_COMPAT_SUPP	EXT4_FEATURE_COMPAT_EXT_ATTR
+#define EXT4_FEATURE_COMPAT_SUPP	(EXT4_FEATURE_COMPAT_EXT_ATTR| \
+					 EXT4_FEATURE_COMPAT_ORPHAN_FILE)
 #define EXT4_FEATURE_INCOMPAT_SUPP	(EXT4_FEATURE_INCOMPAT_FILETYPE| \
 					 EXT4_FEATURE_INCOMPAT_RECOVER| \
 					 EXT4_FEATURE_INCOMPAT_META_BG| \
@@ -2102,7 +2173,8 @@ EXT4_FEATURE_INCOMPAT_FUNCS(casefold,		CASEFOLD)
 					 EXT4_FEATURE_RO_COMPAT_METADATA_CSUM|\
 					 EXT4_FEATURE_RO_COMPAT_QUOTA |\
 					 EXT4_FEATURE_RO_COMPAT_PROJECT |\
-					 EXT4_FEATURE_RO_COMPAT_VERITY)
+					 EXT4_FEATURE_RO_COMPAT_VERITY |\
+					 EXT4_FEATURE_RO_COMPAT_ORPHAN_PRESENT)
 
 #define EXTN_FEATURE_FUNCS(ver) \
 static inline bool ext4_has_unknown_ext##ver##_compat_features(struct super_block *sb) \
@@ -2138,6 +2210,8 @@ static inline bool ext4_has_incompat_features(struct super_block *sb)
 	return (EXT4_SB(sb)->s_es->s_feature_incompat != 0);
 }
 
+extern int ext4_feature_set_ok(struct super_block *sb, int readonly);
+
 /*
  * Superblock flags
  */
@@ -2150,7 +2224,6 @@ static inline int ext4_forced_shutdown(struct ext4_sb_info *sbi)
 	return test_bit(EXT4_FLAGS_SHUTDOWN, &sbi->s_ext4_flags);
 }
 
-
 /*
  * Default values for user and/or group using reserved blocks
  */
@@ -2911,13 +2984,14 @@ int ext4_get_block(struct inode *inode, sector_t iblock,
 int ext4_da_get_block_prep(struct inode *inode, sector_t iblock,
 			   struct buffer_head *bh, int create);
 int ext4_walk_page_buffers(handle_t *handle,
+			   struct inode *inode,
 			   struct buffer_head *head,
 			   unsigned from,
 			   unsigned to,
 			   int *partial,
-			   int (*fn)(handle_t *handle,
+			   int (*fn)(handle_t *handle, struct inode *inode,
 				     struct buffer_head *bh));
-int do_journal_get_write_access(handle_t *handle,
+int do_journal_get_write_access(handle_t *handle, struct inode *inode,
 				struct buffer_head *bh);
 #define FALL_BACK_TO_NONDELALLOC 1
 #define CONVERT_INLINE_DATA	 2
@@ -2996,8 +3070,6 @@ extern int ext4_init_new_dir(handle_t *handle, struct inode *dir,
 			     struct inode *inode);
 extern int ext4_dirblock_csum_verify(struct inode *inode,
 				     struct buffer_head *bh);
-extern int ext4_orphan_add(handle_t *, struct inode *);
-extern int ext4_orphan_del(handle_t *, struct inode *);
 extern int ext4_htree_fill_tree(struct file *dir_file, __u32 start_hash,
 				__u32 start_minor_hash, __u32 *next_hash);
 extern int ext4_search_dir(struct buffer_head *bh,
@@ -3466,6 +3538,7 @@ static inline bool ext4_is_quota_journalled(struct super_block *sb)
 	return (ext4_has_feature_quota(sb) ||
 		sbi->s_qf_names[USRQUOTA] || sbi->s_qf_names[GRPQUOTA]);
 }
+int ext4_enable_quotas(struct super_block *sb);
 #endif
 
 /*
@@ -3727,6 +3800,19 @@ extern void ext4_stop_mmpd(struct ext4_sb_info *sbi);
 /* verity.c */
 extern const struct fsverity_operations ext4_verityops;
 
+/* orphan.c */
+extern int ext4_orphan_add(handle_t *, struct inode *);
+extern int ext4_orphan_del(handle_t *, struct inode *);
+extern void ext4_orphan_cleanup(struct super_block *sb,
+				struct ext4_super_block *es);
+extern void ext4_release_orphan_info(struct super_block *sb);
+extern int ext4_init_orphan_info(struct super_block *sb);
+extern int ext4_orphan_file_empty(struct super_block *sb);
+extern void ext4_orphan_file_block_trigger(
+				struct jbd2_buffer_trigger_type *triggers,
+				struct buffer_head *bh,
+				void *data, size_t size);
+
 /*
  * Add new method to test whether block and inode bitmaps are properly
  * initialized. With uninit_bg reading the block from disk is not enough
diff --git a/fs/ext4/ext4_extents.h b/fs/ext4/ext4_extents.h
index 44e59881a1f0..26435f3a3094 100644
--- a/fs/ext4/ext4_extents.h
+++ b/fs/ext4/ext4_extents.h
@@ -173,10 +173,11 @@ struct partial_cluster {
 #define EXT_MAX_EXTENT(__hdr__)	\
 	((le16_to_cpu((__hdr__)->eh_max)) ? \
 	((EXT_FIRST_EXTENT((__hdr__)) + le16_to_cpu((__hdr__)->eh_max) - 1)) \
-					: 0)
+					: NULL)
 #define EXT_MAX_INDEX(__hdr__) \
 	((le16_to_cpu((__hdr__)->eh_max)) ? \
-	((EXT_FIRST_INDEX((__hdr__)) + le16_to_cpu((__hdr__)->eh_max) - 1)) : 0)
+	((EXT_FIRST_INDEX((__hdr__)) + le16_to_cpu((__hdr__)->eh_max) - 1)) \
+					: NULL)
 
 static inline struct ext4_extent_header *ext_inode_hdr(struct inode *inode)
 {
diff --git a/fs/ext4/ext4_jbd2.c b/fs/ext4/ext4_jbd2.c
index b60f0152ea57..6def7339056d 100644
--- a/fs/ext4/ext4_jbd2.c
+++ b/fs/ext4/ext4_jbd2.c
@@ -218,9 +218,11 @@ static void ext4_check_bdev_write_error(struct super_block *sb)
 }
 
 int __ext4_journal_get_write_access(const char *where, unsigned int line,
-				    handle_t *handle, struct buffer_head *bh)
+				    handle_t *handle, struct super_block *sb,
+				    struct buffer_head *bh,
+				    enum ext4_journal_trigger_type trigger_type)
 {
-	int err = 0;
+	int err;
 
 	might_sleep();
 
@@ -229,11 +231,18 @@ int __ext4_journal_get_write_access(const char *where, unsigned int line,
 
 	if (ext4_handle_valid(handle)) {
 		err = jbd2_journal_get_write_access(handle, bh);
-		if (err)
+		if (err) {
 			ext4_journal_abort_handle(where, line, __func__, bh,
 						  handle, err);
+			return err;
+		}
 	}
-	return err;
+	if (trigger_type == EXT4_JTR_NONE || !ext4_has_metadata_csum(sb))
+		return 0;
+	BUG_ON(trigger_type >= EXT4_JOURNAL_TRIGGER_COUNT);
+	jbd2_journal_set_triggers(bh,
+		&EXT4_SB(sb)->s_journal_triggers[trigger_type].tr_triggers);
+	return 0;
 }
 
 /*
@@ -301,17 +310,27 @@ int __ext4_forget(const char *where, unsigned int line, handle_t *handle,
 }
 
 int __ext4_journal_get_create_access(const char *where, unsigned int line,
-				handle_t *handle, struct buffer_head *bh)
+				handle_t *handle, struct super_block *sb,
+				struct buffer_head *bh,
+				enum ext4_journal_trigger_type trigger_type)
 {
-	int err = 0;
+	int err;
 
-	if (ext4_handle_valid(handle)) {
-		err = jbd2_journal_get_create_access(handle, bh);
-		if (err)
-			ext4_journal_abort_handle(where, line, __func__,
-						  bh, handle, err);
+	if (!ext4_handle_valid(handle))
+		return 0;
+
+	err = jbd2_journal_get_create_access(handle, bh);
+	if (err) {
+		ext4_journal_abort_handle(where, line, __func__, bh, handle,
+					  err);
+		return err;
 	}
-	return err;
+	if (trigger_type == EXT4_JTR_NONE || !ext4_has_metadata_csum(sb))
+		return 0;
+	BUG_ON(trigger_type >= EXT4_JOURNAL_TRIGGER_COUNT);
+	jbd2_journal_set_triggers(bh,
+		&EXT4_SB(sb)->s_journal_triggers[trigger_type].tr_triggers);
+	return 0;
 }
 
 int __ext4_handle_dirty_metadata(const char *where, unsigned int line,
diff --git a/fs/ext4/ext4_jbd2.h b/fs/ext4/ext4_jbd2.h
index 0d2fa423b7ad..0e4fa644df01 100644
--- a/fs/ext4/ext4_jbd2.h
+++ b/fs/ext4/ext4_jbd2.h
@@ -231,26 +231,32 @@ int ext4_expand_extra_isize(struct inode *inode,
  * Wrapper functions with which ext4 calls into JBD.
  */
 int __ext4_journal_get_write_access(const char *where, unsigned int line,
-				    handle_t *handle, struct buffer_head *bh);
+				    handle_t *handle, struct super_block *sb,
+				    struct buffer_head *bh,
+				    enum ext4_journal_trigger_type trigger_type);
 
 int __ext4_forget(const char *where, unsigned int line, handle_t *handle,
 		  int is_metadata, struct inode *inode,
 		  struct buffer_head *bh, ext4_fsblk_t blocknr);
 
 int __ext4_journal_get_create_access(const char *where, unsigned int line,
-				handle_t *handle, struct buffer_head *bh);
+				handle_t *handle, struct super_block *sb,
+				struct buffer_head *bh,
+				enum ext4_journal_trigger_type trigger_type);
 
 int __ext4_handle_dirty_metadata(const char *where, unsigned int line,
 				 handle_t *handle, struct inode *inode,
 				 struct buffer_head *bh);
 
-#define ext4_journal_get_write_access(handle, bh) \
-	__ext4_journal_get_write_access(__func__, __LINE__, (handle), (bh))
+#define ext4_journal_get_write_access(handle, sb, bh, trigger_type) \
+	__ext4_journal_get_write_access(__func__, __LINE__, (handle), (sb), \
+					(bh), (trigger_type))
 #define ext4_forget(handle, is_metadata, inode, bh, block_nr) \
 	__ext4_forget(__func__, __LINE__, (handle), (is_metadata), (inode), \
 		      (bh), (block_nr))
-#define ext4_journal_get_create_access(handle, bh) \
-	__ext4_journal_get_create_access(__func__, __LINE__, (handle), (bh))
+#define ext4_journal_get_create_access(handle, sb, bh, trigger_type) \
+	__ext4_journal_get_create_access(__func__, __LINE__, (handle), (sb), \
+					 (bh), (trigger_type))
 #define ext4_handle_dirty_metadata(handle, inode, bh) \
 	__ext4_handle_dirty_metadata(__func__, __LINE__, (handle), (inode), \
 				     (bh))
diff --git a/fs/ext4/extents.c b/fs/ext4/extents.c
index c33e0a2cb6c3..c0de30f25185 100644
--- a/fs/ext4/extents.c
+++ b/fs/ext4/extents.c
@@ -139,7 +139,8 @@ static int ext4_ext_get_access(handle_t *handle, struct inode *inode,
 	if (path->p_bh) {
 		/* path points to block */
 		BUFFER_TRACE(path->p_bh, "get_write_access");
-		return ext4_journal_get_write_access(handle, path->p_bh);
+		return ext4_journal_get_write_access(handle, inode->i_sb,
+						     path->p_bh, EXT4_JTR_NONE);
 	}
 	/* path points to leaf/index in inode body */
 	/* we use in-core data, no need to protect them */
@@ -1082,7 +1083,8 @@ static int ext4_ext_split(handle_t *handle, struct inode *inode,
 	}
 	lock_buffer(bh);
 
-	err = ext4_journal_get_create_access(handle, bh);
+	err = ext4_journal_get_create_access(handle, inode->i_sb, bh,
+					     EXT4_JTR_NONE);
 	if (err)
 		goto cleanup;
 
@@ -1160,7 +1162,8 @@ static int ext4_ext_split(handle_t *handle, struct inode *inode,
 		}
 		lock_buffer(bh);
 
-		err = ext4_journal_get_create_access(handle, bh);
+		err = ext4_journal_get_create_access(handle, inode->i_sb, bh,
+						     EXT4_JTR_NONE);
 		if (err)
 			goto cleanup;
 
@@ -1286,7 +1289,8 @@ static int ext4_ext_grow_indepth(handle_t *handle, struct inode *inode,
 		return -ENOMEM;
 	lock_buffer(bh);
 
-	err = ext4_journal_get_create_access(handle, bh);
+	err = ext4_journal_get_create_access(handle, inode->i_sb, bh,
+					     EXT4_JTR_NONE);
 	if (err) {
 		unlock_buffer(bh);
 		goto out;
@@ -3569,7 +3573,7 @@ static int ext4_ext_convert_to_initialized(handle_t *handle,
 				split_map.m_len - ee_block);
 			err = ext4_ext_zeroout(inode, &zero_ex1);
 			if (err)
-				goto out;
+				goto fallback;
 			split_map.m_len = allocated;
 		}
 		if (split_map.m_lblk - ee_block + split_map.m_len <
@@ -3583,7 +3587,7 @@ static int ext4_ext_convert_to_initialized(handle_t *handle,
 						      ext4_ext_pblock(ex));
 				err = ext4_ext_zeroout(inode, &zero_ex2);
 				if (err)
-					goto out;
+					goto fallback;
 			}
 
 			split_map.m_len += split_map.m_lblk - ee_block;
@@ -3592,6 +3596,7 @@ static int ext4_ext_convert_to_initialized(handle_t *handle,
 		}
 	}
 
+fallback:
 	err = ext4_split_extent(handle, inode, ppath, &split_map, split_flag,
 				flags);
 	if (err > 0)
diff --git a/fs/ext4/fast_commit.c b/fs/ext4/fast_commit.c
index e8195229c252..8e610a381862 100644
--- a/fs/ext4/fast_commit.c
+++ b/fs/ext4/fast_commit.c
@@ -775,28 +775,27 @@ static bool ext4_fc_add_tlv(struct super_block *sb, u16 tag, u16 len, u8 *val,
 }
 
 /* Same as above, but adds dentry tlv. */
-static  bool ext4_fc_add_dentry_tlv(struct super_block *sb, u16 tag,
-					int parent_ino, int ino, int dlen,
-					const unsigned char *dname,
-					u32 *crc)
+static bool ext4_fc_add_dentry_tlv(struct super_block *sb, u32 *crc,
+				   struct ext4_fc_dentry_update *fc_dentry)
 {
 	struct ext4_fc_dentry_info fcd;
 	struct ext4_fc_tl tl;
+	int dlen = fc_dentry->fcd_name.len;
 	u8 *dst = ext4_fc_reserve_space(sb, sizeof(tl) + sizeof(fcd) + dlen,
 					crc);
 
 	if (!dst)
 		return false;
 
-	fcd.fc_parent_ino = cpu_to_le32(parent_ino);
-	fcd.fc_ino = cpu_to_le32(ino);
-	tl.fc_tag = cpu_to_le16(tag);
+	fcd.fc_parent_ino = cpu_to_le32(fc_dentry->fcd_parent);
+	fcd.fc_ino = cpu_to_le32(fc_dentry->fcd_ino);
+	tl.fc_tag = cpu_to_le16(fc_dentry->fcd_op);
 	tl.fc_len = cpu_to_le16(sizeof(fcd) + dlen);
 	ext4_fc_memcpy(sb, dst, &tl, sizeof(tl), crc);
 	dst += sizeof(tl);
 	ext4_fc_memcpy(sb, dst, &fcd, sizeof(fcd), crc);
 	dst += sizeof(fcd);
-	ext4_fc_memcpy(sb, dst, dname, dlen, crc);
+	ext4_fc_memcpy(sb, dst, fc_dentry->fcd_name.name, dlen, crc);
 	dst += dlen;
 
 	return true;
@@ -992,11 +991,7 @@ __releases(&sbi->s_fc_lock)
 				 &sbi->s_fc_dentry_q[FC_Q_MAIN], fcd_list) {
 		if (fc_dentry->fcd_op != EXT4_FC_TAG_CREAT) {
 			spin_unlock(&sbi->s_fc_lock);
-			if (!ext4_fc_add_dentry_tlv(
-				sb, fc_dentry->fcd_op,
-				fc_dentry->fcd_parent, fc_dentry->fcd_ino,
-				fc_dentry->fcd_name.len,
-				fc_dentry->fcd_name.name, crc)) {
+			if (!ext4_fc_add_dentry_tlv(sb, crc, fc_dentry)) {
 				ret = -ENOSPC;
 				goto lock_and_exit;
 			}
@@ -1035,11 +1030,7 @@ __releases(&sbi->s_fc_lock)
 		if (ret)
 			goto lock_and_exit;
 
-		if (!ext4_fc_add_dentry_tlv(
-			sb, fc_dentry->fcd_op,
-			fc_dentry->fcd_parent, fc_dentry->fcd_ino,
-			fc_dentry->fcd_name.len,
-			fc_dentry->fcd_name.name, crc)) {
+		if (!ext4_fc_add_dentry_tlv(sb, crc, fc_dentry)) {
 			ret = -ENOSPC;
 			goto lock_and_exit;
 		}
diff --git a/fs/ext4/file.c b/fs/ext4/file.c
index d3b4ed91aa68..ac0e11bbb445 100644
--- a/fs/ext4/file.c
+++ b/fs/ext4/file.c
@@ -823,7 +823,8 @@ static int ext4_sample_last_mounted(struct super_block *sb,
 	if (IS_ERR(handle))
 		goto out;
 	BUFFER_TRACE(sbi->s_sbh, "get_write_access");
-	err = ext4_journal_get_write_access(handle, sbi->s_sbh);
+	err = ext4_journal_get_write_access(handle, sb, sbi->s_sbh,
+					    EXT4_JTR_NONE);
 	if (err)
 		goto out_journal;
 	lock_buffer(sbi->s_sbh);
diff --git a/fs/ext4/ialloc.c b/fs/ext4/ialloc.c
index e89fc0f770b0..f73e5eb43eae 100644
--- a/fs/ext4/ialloc.c
+++ b/fs/ext4/ialloc.c
@@ -300,7 +300,8 @@ void ext4_free_inode(handle_t *handle, struct inode *inode)
 	}
 
 	BUFFER_TRACE(bitmap_bh, "get_write_access");
-	fatal = ext4_journal_get_write_access(handle, bitmap_bh);
+	fatal = ext4_journal_get_write_access(handle, sb, bitmap_bh,
+					      EXT4_JTR_NONE);
 	if (fatal)
 		goto error_return;
 
@@ -308,7 +309,8 @@ void ext4_free_inode(handle_t *handle, struct inode *inode)
 	gdp = ext4_get_group_desc(sb, block_group, &bh2);
 	if (gdp) {
 		BUFFER_TRACE(bh2, "get_write_access");
-		fatal = ext4_journal_get_write_access(handle, bh2);
+		fatal = ext4_journal_get_write_access(handle, sb, bh2,
+						      EXT4_JTR_NONE);
 	}
 	ext4_lock_group(sb, block_group);
 	cleared = ext4_test_and_clear_bit(bit, bitmap_bh->b_data);
@@ -1085,7 +1087,8 @@ repeat_in_this_group:
 			}
 		}
 		BUFFER_TRACE(inode_bitmap_bh, "get_write_access");
-		err = ext4_journal_get_write_access(handle, inode_bitmap_bh);
+		err = ext4_journal_get_write_access(handle, sb, inode_bitmap_bh,
+						    EXT4_JTR_NONE);
 		if (err) {
 			ext4_std_error(sb, err);
 			goto out;
@@ -1127,7 +1130,8 @@ got:
 	}
 
 	BUFFER_TRACE(group_desc_bh, "get_write_access");
-	err = ext4_journal_get_write_access(handle, group_desc_bh);
+	err = ext4_journal_get_write_access(handle, sb, group_desc_bh,
+					    EXT4_JTR_NONE);
 	if (err) {
 		ext4_std_error(sb, err);
 		goto out;
@@ -1144,7 +1148,8 @@ got:
 			goto out;
 		}
 		BUFFER_TRACE(block_bitmap_bh, "get block bitmap access");
-		err = ext4_journal_get_write_access(handle, block_bitmap_bh);
+		err = ext4_journal_get_write_access(handle, sb, block_bitmap_bh,
+						    EXT4_JTR_NONE);
 		if (err) {
 			brelse(block_bitmap_bh);
 			ext4_std_error(sb, err);
@@ -1583,8 +1588,8 @@ int ext4_init_inode_table(struct super_block *sb, ext4_group_t group,
 	num = sbi->s_itb_per_group - used_blks;
 
 	BUFFER_TRACE(group_desc_bh, "get_write_access");
-	ret = ext4_journal_get_write_access(handle,
-					    group_desc_bh);
+	ret = ext4_journal_get_write_access(handle, sb, group_desc_bh,
+					    EXT4_JTR_NONE);
 	if (ret)
 		goto err_out;
 
diff --git a/fs/ext4/indirect.c b/fs/ext4/indirect.c
index a7bc6ad656a9..89efa78ed4b2 100644
--- a/fs/ext4/indirect.c
+++ b/fs/ext4/indirect.c
@@ -354,7 +354,8 @@ static int ext4_alloc_branch(handle_t *handle,
 		}
 		lock_buffer(bh);
 		BUFFER_TRACE(bh, "call get_create_access");
-		err = ext4_journal_get_create_access(handle, bh);
+		err = ext4_journal_get_create_access(handle, ar->inode->i_sb,
+						     bh, EXT4_JTR_NONE);
 		if (err) {
 			unlock_buffer(bh);
 			goto failed;
@@ -429,7 +430,8 @@ static int ext4_splice_branch(handle_t *handle,
 	 */
 	if (where->bh) {
 		BUFFER_TRACE(where->bh, "get_write_access");
-		err = ext4_journal_get_write_access(handle, where->bh);
+		err = ext4_journal_get_write_access(handle, ar->inode->i_sb,
+						    where->bh, EXT4_JTR_NONE);
 		if (err)
 			goto err_out;
 	}
@@ -728,7 +730,8 @@ static int ext4_ind_truncate_ensure_credits(handle_t *handle,
 		return ret;
 	if (bh) {
 		BUFFER_TRACE(bh, "retaking write access");
-		ret = ext4_journal_get_write_access(handle, bh);
+		ret = ext4_journal_get_write_access(handle, inode->i_sb, bh,
+						    EXT4_JTR_NONE);
 		if (unlikely(ret))
 			return ret;
 	}
@@ -916,7 +919,8 @@ static void ext4_free_data(handle_t *handle, struct inode *inode,
 
 	if (this_bh) {				/* For indirect block */
 		BUFFER_TRACE(this_bh, "get_write_access");
-		err = ext4_journal_get_write_access(handle, this_bh);
+		err = ext4_journal_get_write_access(handle, inode->i_sb,
+						    this_bh, EXT4_JTR_NONE);
 		/* Important: if we can't update the indirect pointers
 		 * to the blocks, we can't free them. */
 		if (err)
@@ -1079,7 +1083,8 @@ static void ext4_free_branches(handle_t *handle, struct inode *inode,
 				 */
 				BUFFER_TRACE(parent_bh, "get_write_access");
 				if (!ext4_journal_get_write_access(handle,
-								   parent_bh)){
+						inode->i_sb, parent_bh,
+						EXT4_JTR_NONE)) {
 					*p = 0;
 					BUFFER_TRACE(parent_bh,
 					"call ext4_handle_dirty_metadata");
diff --git a/fs/ext4/inline.c b/fs/ext4/inline.c
index 70cb64db33f7..82bf4ff6be28 100644
--- a/fs/ext4/inline.c
+++ b/fs/ext4/inline.c
@@ -264,7 +264,8 @@ static int ext4_create_inline_data(handle_t *handle,
 		return error;
 
 	BUFFER_TRACE(is.iloc.bh, "get_write_access");
-	error = ext4_journal_get_write_access(handle, is.iloc.bh);
+	error = ext4_journal_get_write_access(handle, inode->i_sb, is.iloc.bh,
+					      EXT4_JTR_NONE);
 	if (error)
 		goto out;
 
@@ -350,7 +351,8 @@ static int ext4_update_inline_data(handle_t *handle, struct inode *inode,
 		goto out;
 
 	BUFFER_TRACE(is.iloc.bh, "get_write_access");
-	error = ext4_journal_get_write_access(handle, is.iloc.bh);
+	error = ext4_journal_get_write_access(handle, inode->i_sb, is.iloc.bh,
+					      EXT4_JTR_NONE);
 	if (error)
 		goto out;
 
@@ -427,7 +429,8 @@ static int ext4_destroy_inline_data_nolock(handle_t *handle,
 		goto out;
 
 	BUFFER_TRACE(is.iloc.bh, "get_write_access");
-	error = ext4_journal_get_write_access(handle, is.iloc.bh);
+	error = ext4_journal_get_write_access(handle, inode->i_sb, is.iloc.bh,
+					      EXT4_JTR_NONE);
 	if (error)
 		goto out;
 
@@ -593,7 +596,7 @@ retry:
 		ret = __block_write_begin(page, from, to, ext4_get_block);
 
 	if (!ret && ext4_should_journal_data(inode)) {
-		ret = ext4_walk_page_buffers(handle, page_buffers(page),
+		ret = ext4_walk_page_buffers(handle, inode, page_buffers(page),
 					     from, to, NULL,
 					     do_journal_get_write_access);
 	}
@@ -682,7 +685,8 @@ int ext4_try_to_write_inline_data(struct address_space *mapping,
 		goto convert;
 	}
 
-	ret = ext4_journal_get_write_access(handle, iloc.bh);
+	ret = ext4_journal_get_write_access(handle, inode->i_sb, iloc.bh,
+					    EXT4_JTR_NONE);
 	if (ret)
 		goto out;
 
@@ -750,6 +754,12 @@ int ext4_write_inline_data_end(struct inode *inode, loff_t pos, unsigned len,
 	ext4_write_lock_xattr(inode, &no_expand);
 	BUG_ON(!ext4_has_inline_data(inode));
 
+	/*
+	 * ei->i_inline_off may have changed since ext4_write_begin()
+	 * called ext4_try_to_write_inline_data()
+	 */
+	(void) ext4_find_inline_data_nolock(inode);
+
 	kaddr = kmap_atomic(page);
 	ext4_write_inline_data(inode, &iloc, kaddr, pos, len);
 	kunmap_atomic(kaddr);
@@ -923,7 +933,8 @@ retry_journal:
 		if (ret < 0)
 			goto out_release_page;
 	}
-	ret = ext4_journal_get_write_access(handle, iloc.bh);
+	ret = ext4_journal_get_write_access(handle, inode->i_sb, iloc.bh,
+					    EXT4_JTR_NONE);
 	if (ret)
 		goto out_release_page;
 
@@ -1028,7 +1039,8 @@ static int ext4_add_dirent_to_inline(handle_t *handle,
 		return err;
 
 	BUFFER_TRACE(iloc->bh, "get_write_access");
-	err = ext4_journal_get_write_access(handle, iloc->bh);
+	err = ext4_journal_get_write_access(handle, dir->i_sb, iloc->bh,
+					    EXT4_JTR_NONE);
 	if (err)
 		return err;
 	ext4_insert_dentry(dir, inode, de, inline_size, fname);
@@ -1223,7 +1235,8 @@ static int ext4_convert_inline_data_nolock(handle_t *handle,
 	}
 
 	lock_buffer(data_bh);
-	error = ext4_journal_get_create_access(handle, data_bh);
+	error = ext4_journal_get_create_access(handle, inode->i_sb, data_bh,
+					       EXT4_JTR_NONE);
 	if (error) {
 		unlock_buffer(data_bh);
 		error = -EIO;
@@ -1707,7 +1720,8 @@ int ext4_delete_inline_entry(handle_t *handle,
 	}
 
 	BUFFER_TRACE(bh, "get_write_access");
-	err = ext4_journal_get_write_access(handle, bh);
+	err = ext4_journal_get_write_access(handle, dir->i_sb, bh,
+					    EXT4_JTR_NONE);
 	if (err)
 		goto out;
 
diff --git a/fs/ext4/inode.c b/fs/ext4/inode.c
index 325c038e7b23..d18852d6029c 100644
--- a/fs/ext4/inode.c
+++ b/fs/ext4/inode.c
@@ -139,7 +139,6 @@ static inline int ext4_begin_ordered_truncate(struct inode *inode,
 static void ext4_invalidatepage(struct page *page, unsigned int offset,
 				unsigned int length);
 static int __ext4_journalled_writepage(struct page *page, unsigned int len);
-static int ext4_bh_delay_or_unwritten(handle_t *handle, struct buffer_head *bh);
 static int ext4_meta_trans_blocks(struct inode *inode, int lblocks,
 				  int pextents);
 
@@ -869,7 +868,8 @@ struct buffer_head *ext4_getblk(handle_t *handle, struct inode *inode,
 		 */
 		lock_buffer(bh);
 		BUFFER_TRACE(bh, "call get_create_access");
-		err = ext4_journal_get_create_access(handle, bh);
+		err = ext4_journal_get_create_access(handle, inode->i_sb, bh,
+						     EXT4_JTR_NONE);
 		if (unlikely(err)) {
 			unlock_buffer(bh);
 			goto errout;
@@ -954,12 +954,12 @@ out_brelse:
 	return err;
 }
 
-int ext4_walk_page_buffers(handle_t *handle,
+int ext4_walk_page_buffers(handle_t *handle, struct inode *inode,
 			   struct buffer_head *head,
 			   unsigned from,
 			   unsigned to,
 			   int *partial,
-			   int (*fn)(handle_t *handle,
+			   int (*fn)(handle_t *handle, struct inode *inode,
 				     struct buffer_head *bh))
 {
 	struct buffer_head *bh;
@@ -978,7 +978,7 @@ int ext4_walk_page_buffers(handle_t *handle,
 				*partial = 1;
 			continue;
 		}
-		err = (*fn)(handle, bh);
+		err = (*fn)(handle, inode, bh);
 		if (!ret)
 			ret = err;
 	}
@@ -1009,7 +1009,7 @@ int ext4_walk_page_buffers(handle_t *handle,
  * is elevated.  We'll still have enough credits for the tiny quotafile
  * write.
  */
-int do_journal_get_write_access(handle_t *handle,
+int do_journal_get_write_access(handle_t *handle, struct inode *inode,
 				struct buffer_head *bh)
 {
 	int dirty = buffer_dirty(bh);
@@ -1028,7 +1028,8 @@ int do_journal_get_write_access(handle_t *handle,
 	if (dirty)
 		clear_buffer_dirty(bh);
 	BUFFER_TRACE(bh, "get write access");
-	ret = ext4_journal_get_write_access(handle, bh);
+	ret = ext4_journal_get_write_access(handle, inode->i_sb, bh,
+					    EXT4_JTR_NONE);
 	if (!ret && dirty)
 		ret = ext4_handle_dirty_metadata(handle, NULL, bh);
 	return ret;
@@ -1208,8 +1209,8 @@ retry_journal:
 		ret = __block_write_begin(page, pos, len, ext4_get_block);
 #endif
 	if (!ret && ext4_should_journal_data(inode)) {
-		ret = ext4_walk_page_buffers(handle, page_buffers(page),
-					     from, to, NULL,
+		ret = ext4_walk_page_buffers(handle, inode,
+					     page_buffers(page), from, to, NULL,
 					     do_journal_get_write_access);
 	}
 
@@ -1253,7 +1254,8 @@ retry_journal:
 }
 
 /* For write_end() in data=journal mode */
-static int write_end_fn(handle_t *handle, struct buffer_head *bh)
+static int write_end_fn(handle_t *handle, struct inode *inode,
+			struct buffer_head *bh)
 {
 	int ret;
 	if (!buffer_mapped(bh) || buffer_freed(bh))
@@ -1352,6 +1354,7 @@ errout:
  * to call ext4_handle_dirty_metadata() instead.
  */
 static void ext4_journalled_zero_new_buffers(handle_t *handle,
+					    struct inode *inode,
 					    struct page *page,
 					    unsigned from, unsigned to)
 {
@@ -1370,7 +1373,7 @@ static void ext4_journalled_zero_new_buffers(handle_t *handle,
 					size = min(to, block_end) - start;
 
 					zero_user(page, start, size);
-					write_end_fn(handle, bh);
+					write_end_fn(handle, inode, bh);
 				}
 				clear_buffer_new(bh);
 			}
@@ -1412,13 +1415,13 @@ static int ext4_journalled_write_end(struct file *file,
 		copied = ret;
 	} else if (unlikely(copied < len) && !PageUptodate(page)) {
 		copied = 0;
-		ext4_journalled_zero_new_buffers(handle, page, from, to);
+		ext4_journalled_zero_new_buffers(handle, inode, page, from, to);
 	} else {
 		if (unlikely(copied < len))
-			ext4_journalled_zero_new_buffers(handle, page,
+			ext4_journalled_zero_new_buffers(handle, inode, page,
 							 from + copied, to);
-		ret = ext4_walk_page_buffers(handle, page_buffers(page), from,
-					     from + copied, &partial,
+		ret = ext4_walk_page_buffers(handle, inode, page_buffers(page),
+					     from, from + copied, &partial,
 					     write_end_fn);
 		if (!partial)
 			SetPageUptodate(page);
@@ -1619,7 +1622,8 @@ static void ext4_print_free_blocks(struct inode *inode)
 	return;
 }
 
-static int ext4_bh_delay_or_unwritten(handle_t *handle, struct buffer_head *bh)
+static int ext4_bh_delay_or_unwritten(handle_t *handle, struct inode *inode,
+				      struct buffer_head *bh)
 {
 	return (buffer_delay(bh) || buffer_unwritten(bh)) && buffer_dirty(bh);
 }
@@ -1851,13 +1855,15 @@ int ext4_da_get_block_prep(struct inode *inode, sector_t iblock,
 	return 0;
 }
 
-static int bget_one(handle_t *handle, struct buffer_head *bh)
+static int bget_one(handle_t *handle, struct inode *inode,
+		    struct buffer_head *bh)
 {
 	get_bh(bh);
 	return 0;
 }
 
-static int bput_one(handle_t *handle, struct buffer_head *bh)
+static int bput_one(handle_t *handle, struct inode *inode,
+		    struct buffer_head *bh)
 {
 	put_bh(bh);
 	return 0;
@@ -1888,7 +1894,7 @@ static int __ext4_journalled_writepage(struct page *page,
 			BUG();
 			goto out;
 		}
-		ext4_walk_page_buffers(handle, page_bufs, 0, len,
+		ext4_walk_page_buffers(handle, inode, page_bufs, 0, len,
 				       NULL, bget_one);
 	}
 	/*
@@ -1920,11 +1926,11 @@ static int __ext4_journalled_writepage(struct page *page,
 	if (inline_data) {
 		ret = ext4_mark_inode_dirty(handle, inode);
 	} else {
-		ret = ext4_walk_page_buffers(handle, page_bufs, 0, len, NULL,
-					     do_journal_get_write_access);
+		ret = ext4_walk_page_buffers(handle, inode, page_bufs, 0, len,
+					     NULL, do_journal_get_write_access);
 
-		err = ext4_walk_page_buffers(handle, page_bufs, 0, len, NULL,
-					     write_end_fn);
+		err = ext4_walk_page_buffers(handle, inode, page_bufs, 0, len,
+					     NULL, write_end_fn);
 	}
 	if (ret == 0)
 		ret = err;
@@ -1941,7 +1947,7 @@ out:
 	unlock_page(page);
 out_no_pagelock:
 	if (!inline_data && page_bufs)
-		ext4_walk_page_buffers(NULL, page_bufs, 0, len,
+		ext4_walk_page_buffers(NULL, inode, page_bufs, 0, len,
 				       NULL, bput_one);
 	brelse(inode_bh);
 	return ret;
@@ -2031,7 +2037,7 @@ static int ext4_writepage(struct page *page,
 	 * for the extremely common case, this is an optimization that
 	 * skips a useless round trip through ext4_bio_write_page().
 	 */
-	if (ext4_walk_page_buffers(NULL, page_bufs, 0, len, NULL,
+	if (ext4_walk_page_buffers(NULL, inode, page_bufs, 0, len, NULL,
 				   ext4_bh_delay_or_unwritten)) {
 		redirty_page_for_writepage(wbc, page);
 		if ((current->flags & PF_MEMALLOC) ||
@@ -3794,7 +3800,8 @@ static int __ext4_block_zero_page_range(handle_t *handle,
 	}
 	if (ext4_should_journal_data(inode)) {
 		BUFFER_TRACE(bh, "get write access");
-		err = ext4_journal_get_write_access(handle, bh);
+		err = ext4_journal_get_write_access(handle, inode->i_sb, bh,
+						    EXT4_JTR_NONE);
 		if (err)
 			goto unlock;
 	}
@@ -4329,101 +4336,93 @@ static int __ext4_get_inode_loc(struct super_block *sb, unsigned long ino,
 	bh = sb_getblk(sb, block);
 	if (unlikely(!bh))
 		return -ENOMEM;
-	if (ext4_simulate_fail(sb, EXT4_SIM_INODE_EIO))
-		goto simulate_eio;
-	if (!buffer_uptodate(bh)) {
-		lock_buffer(bh);
+	if (ext4_buffer_uptodate(bh))
+		goto has_buffer;
 
-		if (ext4_buffer_uptodate(bh)) {
-			/* someone brought it uptodate while we waited */
-			unlock_buffer(bh);
-			goto has_buffer;
-		}
-
-		/*
-		 * If we have all information of the inode in memory and this
-		 * is the only valid inode in the block, we need not read the
-		 * block.
-		 */
-		if (in_mem) {
-			struct buffer_head *bitmap_bh;
-			int i, start;
+	lock_buffer(bh);
+	/*
+	 * If we have all information of the inode in memory and this
+	 * is the only valid inode in the block, we need not read the
+	 * block.
+	 */
+	if (in_mem) {
+		struct buffer_head *bitmap_bh;
+		int i, start;
 
-			start = inode_offset & ~(inodes_per_block - 1);
+		start = inode_offset & ~(inodes_per_block - 1);
 
-			/* Is the inode bitmap in cache? */
-			bitmap_bh = sb_getblk(sb, ext4_inode_bitmap(sb, gdp));
-			if (unlikely(!bitmap_bh))
-				goto make_io;
+		/* Is the inode bitmap in cache? */
+		bitmap_bh = sb_getblk(sb, ext4_inode_bitmap(sb, gdp));
+		if (unlikely(!bitmap_bh))
+			goto make_io;
 
-			/*
-			 * If the inode bitmap isn't in cache then the
-			 * optimisation may end up performing two reads instead
-			 * of one, so skip it.
-			 */
-			if (!buffer_uptodate(bitmap_bh)) {
-				brelse(bitmap_bh);
-				goto make_io;
-			}
-			for (i = start; i < start + inodes_per_block; i++) {
-				if (i == inode_offset)
-					continue;
-				if (ext4_test_bit(i, bitmap_bh->b_data))
-					break;
-			}
+		/*
+		 * If the inode bitmap isn't in cache then the
+		 * optimisation may end up performing two reads instead
+		 * of one, so skip it.
+		 */
+		if (!buffer_uptodate(bitmap_bh)) {
 			brelse(bitmap_bh);
-			if (i == start + inodes_per_block) {
-				/* all other inodes are free, so skip I/O */
-				memset(bh->b_data, 0, bh->b_size);
-				set_buffer_uptodate(bh);
-				unlock_buffer(bh);
-				goto has_buffer;
-			}
+			goto make_io;
 		}
+		for (i = start; i < start + inodes_per_block; i++) {
+			if (i == inode_offset)
+				continue;
+			if (ext4_test_bit(i, bitmap_bh->b_data))
+				break;
+		}
+		brelse(bitmap_bh);
+		if (i == start + inodes_per_block) {
+			/* all other inodes are free, so skip I/O */
+			memset(bh->b_data, 0, bh->b_size);
+			set_buffer_uptodate(bh);
+			unlock_buffer(bh);
+			goto has_buffer;
+		}
+	}
 
 make_io:
-		/*
-		 * If we need to do any I/O, try to pre-readahead extra
-		 * blocks from the inode table.
-		 */
-		blk_start_plug(&plug);
-		if (EXT4_SB(sb)->s_inode_readahead_blks) {
-			ext4_fsblk_t b, end, table;
-			unsigned num;
-			__u32 ra_blks = EXT4_SB(sb)->s_inode_readahead_blks;
-
-			table = ext4_inode_table(sb, gdp);
-			/* s_inode_readahead_blks is always a power of 2 */
-			b = block & ~((ext4_fsblk_t) ra_blks - 1);
-			if (table > b)
-				b = table;
-			end = b + ra_blks;
-			num = EXT4_INODES_PER_GROUP(sb);
-			if (ext4_has_group_desc_csum(sb))
-				num -= ext4_itable_unused_count(sb, gdp);
-			table += num / inodes_per_block;
-			if (end > table)
-				end = table;
-			while (b <= end)
-				ext4_sb_breadahead_unmovable(sb, b++);
-		}
+	/*
+	 * If we need to do any I/O, try to pre-readahead extra
+	 * blocks from the inode table.
+	 */
+	blk_start_plug(&plug);
+	if (EXT4_SB(sb)->s_inode_readahead_blks) {
+		ext4_fsblk_t b, end, table;
+		unsigned num;
+		__u32 ra_blks = EXT4_SB(sb)->s_inode_readahead_blks;
+
+		table = ext4_inode_table(sb, gdp);
+		/* s_inode_readahead_blks is always a power of 2 */
+		b = block & ~((ext4_fsblk_t) ra_blks - 1);
+		if (table > b)
+			b = table;
+		end = b + ra_blks;
+		num = EXT4_INODES_PER_GROUP(sb);
+		if (ext4_has_group_desc_csum(sb))
+			num -= ext4_itable_unused_count(sb, gdp);
+		table += num / inodes_per_block;
+		if (end > table)
+			end = table;
+		while (b <= end)
+			ext4_sb_breadahead_unmovable(sb, b++);
+	}
 
-		/*
-		 * There are other valid inodes in the buffer, this inode
-		 * has in-inode xattrs, or we don't have this inode in memory.
-		 * Read the block from disk.
-		 */
-		trace_ext4_load_inode(sb, ino);
-		ext4_read_bh_nowait(bh, REQ_META | REQ_PRIO, NULL);
-		blk_finish_plug(&plug);
-		wait_on_buffer(bh);
-		if (!buffer_uptodate(bh)) {
-		simulate_eio:
-			if (ret_block)
-				*ret_block = block;
-			brelse(bh);
-			return -EIO;
-		}
+	/*
+	 * There are other valid inodes in the buffer, this inode
+	 * has in-inode xattrs, or we don't have this inode in memory.
+	 * Read the block from disk.
+	 */
+	trace_ext4_load_inode(sb, ino);
+	ext4_read_bh_nowait(bh, REQ_META | REQ_PRIO, NULL);
+	blk_finish_plug(&plug);
+	wait_on_buffer(bh);
+	ext4_simulate_fail_bh(sb, bh, EXT4_SIM_INODE_EIO);
+	if (!buffer_uptodate(bh)) {
+		if (ret_block)
+			*ret_block = block;
+		brelse(bh);
+		return -EIO;
 	}
 has_buffer:
 	iloc->bh = bh;
@@ -4602,6 +4601,7 @@ struct inode *__ext4_iget(struct super_block *sb, unsigned long ino,
 	struct ext4_iloc iloc;
 	struct ext4_inode *raw_inode;
 	struct ext4_inode_info *ei;
+	struct ext4_super_block *es = EXT4_SB(sb)->s_es;
 	struct inode *inode;
 	journal_t *journal = EXT4_SB(sb)->s_journal;
 	long ret;
@@ -4612,9 +4612,13 @@ struct inode *__ext4_iget(struct super_block *sb, unsigned long ino,
 	projid_t i_projid;
 
 	if ((!(flags & EXT4_IGET_SPECIAL) &&
-	     (ino < EXT4_FIRST_INO(sb) && ino != EXT4_ROOT_INO)) ||
+	     ((ino < EXT4_FIRST_INO(sb) && ino != EXT4_ROOT_INO) ||
+	      ino == le32_to_cpu(es->s_usr_quota_inum) ||
+	      ino == le32_to_cpu(es->s_grp_quota_inum) ||
+	      ino == le32_to_cpu(es->s_prj_quota_inum) ||
+	      ino == le32_to_cpu(es->s_orphan_file_inum))) ||
 	    (ino < EXT4_ROOT_INO) ||
-	    (ino > le32_to_cpu(EXT4_SB(sb)->s_es->s_inodes_count))) {
+	    (ino > le32_to_cpu(es->s_inodes_count))) {
 		if (flags & EXT4_IGET_HANDLE)
 			return ERR_PTR(-ESTALE);
 		__ext4_error(sb, function, line, false, EFSCORRUPTED, 0,
@@ -4927,8 +4931,14 @@ static int ext4_inode_blocks_set(handle_t *handle,
 		ext4_clear_inode_flag(inode, EXT4_INODE_HUGE_FILE);
 		return 0;
 	}
+
+	/*
+	 * This should never happen since sb->s_maxbytes should not have
+	 * allowed this, sb->s_maxbytes was set according to the huge_file
+	 * feature in ext4_fill_super().
+	 */
 	if (!ext4_has_feature_huge_file(sb))
-		return -EFBIG;
+		return -EFSCORRUPTED;
 
 	if (i_blocks <= 0xffffffffffffULL) {
 		/*
@@ -5031,16 +5041,14 @@ static int ext4_do_update_inode(handle_t *handle,
 
 	spin_lock(&ei->i_raw_lock);
 
-	/* For fields not tracked in the in-memory inode,
-	 * initialise them to zero for new inodes. */
+	/*
+	 * For fields not tracked in the in-memory inode, initialise them
+	 * to zero for new inodes.
+	 */
 	if (ext4_test_inode_state(inode, EXT4_STATE_NEW))
 		memset(raw_inode, 0, EXT4_SB(inode->i_sb)->s_inode_size);
 
 	err = ext4_inode_blocks_set(handle, raw_inode, ei);
-	if (err) {
-		spin_unlock(&ei->i_raw_lock);
-		goto out_brelse;
-	}
 
 	raw_inode->i_mode = cpu_to_le16(inode->i_mode);
 	i_uid = i_uid_read(inode);
@@ -5049,10 +5057,11 @@ static int ext4_do_update_inode(handle_t *handle,
 	if (!(test_opt(inode->i_sb, NO_UID32))) {
 		raw_inode->i_uid_low = cpu_to_le16(low_16_bits(i_uid));
 		raw_inode->i_gid_low = cpu_to_le16(low_16_bits(i_gid));
-/*
- * Fix up interoperability with old kernels. Otherwise, old inodes get
- * re-used with the upper 16 bits of the uid/gid intact
- */
+		/*
+		 * Fix up interoperability with old kernels. Otherwise,
+		 * old inodes get re-used with the upper 16 bits of the
+		 * uid/gid intact.
+		 */
 		if (ei->i_dtime && list_empty(&ei->i_orphan)) {
 			raw_inode->i_uid_high = 0;
 			raw_inode->i_gid_high = 0;
@@ -5121,8 +5130,9 @@ static int ext4_do_update_inode(handle_t *handle,
 		}
 	}
 
-	BUG_ON(!ext4_has_feature_project(inode->i_sb) &&
-	       i_projid != EXT4_DEF_PROJID);
+	if (i_projid != EXT4_DEF_PROJID &&
+	    !ext4_has_feature_project(inode->i_sb))
+		err = err ?: -EFSCORRUPTED;
 
 	if (EXT4_INODE_SIZE(inode->i_sb) > EXT4_GOOD_OLD_INODE_SIZE &&
 	    EXT4_FITS_IN_INODE(raw_inode, ei, i_projid))
@@ -5130,6 +5140,11 @@ static int ext4_do_update_inode(handle_t *handle,
 
 	ext4_inode_csum_set(inode, raw_inode, ei);
 	spin_unlock(&ei->i_raw_lock);
+	if (err) {
+		EXT4_ERROR_INODE(inode, "corrupted inode contents");
+		goto out_brelse;
+	}
+
 	if (inode->i_sb->s_flags & SB_LAZYTIME)
 		ext4_update_other_inodes_time(inode->i_sb, inode->i_ino,
 					      bh->b_data);
@@ -5137,13 +5152,15 @@ static int ext4_do_update_inode(handle_t *handle,
 	BUFFER_TRACE(bh, "call ext4_handle_dirty_metadata");
 	err = ext4_handle_dirty_metadata(handle, NULL, bh);
 	if (err)
-		goto out_brelse;
+		goto out_error;
 	ext4_clear_inode_state(inode, EXT4_STATE_NEW);
 	if (set_large_file) {
 		BUFFER_TRACE(EXT4_SB(sb)->s_sbh, "get write access");
-		err = ext4_journal_get_write_access(handle, EXT4_SB(sb)->s_sbh);
+		err = ext4_journal_get_write_access(handle, sb,
+						    EXT4_SB(sb)->s_sbh,
+						    EXT4_JTR_NONE);
 		if (err)
-			goto out_brelse;
+			goto out_error;
 		lock_buffer(EXT4_SB(sb)->s_sbh);
 		ext4_set_feature_large_file(sb);
 		ext4_superblock_csum_set(sb);
@@ -5153,9 +5170,10 @@ static int ext4_do_update_inode(handle_t *handle,
 						 EXT4_SB(sb)->s_sbh);
 	}
 	ext4_update_inode_fsync_trans(handle, inode, need_datasync);
+out_error:
+	ext4_std_error(inode->i_sb, err);
 out_brelse:
 	brelse(bh);
-	ext4_std_error(inode->i_sb, err);
 	return err;
 }
 
@@ -5742,7 +5760,8 @@ ext4_reserve_inode_write(handle_t *handle, struct inode *inode,
 	err = ext4_get_inode_loc(inode, iloc);
 	if (!err) {
 		BUFFER_TRACE(iloc->bh, "get_write_access");
-		err = ext4_journal_get_write_access(handle, iloc->bh);
+		err = ext4_journal_get_write_access(handle, inode->i_sb,
+						    iloc->bh, EXT4_JTR_NONE);
 		if (err) {
 			brelse(iloc->bh);
 			iloc->bh = NULL;
@@ -5865,7 +5884,8 @@ int ext4_expand_extra_isize(struct inode *inode,
 	ext4_write_lock_xattr(inode, &no_expand);
 
 	BUFFER_TRACE(iloc->bh, "get_write_access");
-	error = ext4_journal_get_write_access(handle, iloc->bh);
+	error = ext4_journal_get_write_access(handle, inode->i_sb, iloc->bh,
+					      EXT4_JTR_NONE);
 	if (error) {
 		brelse(iloc->bh);
 		goto out_unlock;
@@ -6036,7 +6056,8 @@ int ext4_change_inode_journal_flag(struct inode *inode, int val)
 	return err;
 }
 
-static int ext4_bh_unmapped(handle_t *handle, struct buffer_head *bh)
+static int ext4_bh_unmapped(handle_t *handle, struct inode *inode,
+			    struct buffer_head *bh)
 {
 	return !buffer_mapped(bh);
 }
@@ -6109,7 +6130,7 @@ vm_fault_t ext4_page_mkwrite(struct vm_fault *vmf)
 	 * inode to the transaction's list to writeprotect pages on commit.
 	 */
 	if (page_has_buffers(page)) {
-		if (!ext4_walk_page_buffers(NULL, page_buffers(page),
+		if (!ext4_walk_page_buffers(NULL, inode, page_buffers(page),
 					    0, len, NULL,
 					    ext4_bh_unmapped)) {
 			/* Wait so that we don't change page under IO */
@@ -6155,11 +6176,13 @@ retry_alloc:
 		err = __block_write_begin(page, 0, len, ext4_get_block);
 		if (!err) {
 			ret = VM_FAULT_SIGBUS;
-			if (ext4_walk_page_buffers(handle, page_buffers(page),
-					0, len, NULL, do_journal_get_write_access))
+			if (ext4_walk_page_buffers(handle, inode,
+					page_buffers(page), 0, len, NULL,
+					do_journal_get_write_access))
 				goto out_error;
-			if (ext4_walk_page_buffers(handle, page_buffers(page),
-					0, len, NULL, write_end_fn))
+			if (ext4_walk_page_buffers(handle, inode,
+					page_buffers(page), 0, len, NULL,
+					write_end_fn))
 				goto out_error;
 			if (ext4_jbd2_inode_add_write(handle, inode,
 						      page_offset(page), len))
diff --git a/fs/ext4/ioctl.c b/fs/ext4/ioctl.c
index 4fb5fe083c2b..606dee9e08a3 100644
--- a/fs/ext4/ioctl.c
+++ b/fs/ext4/ioctl.c
@@ -1154,7 +1154,9 @@ resizefs_out:
 				err = PTR_ERR(handle);
 				goto pwsalt_err_exit;
 			}
-			err = ext4_journal_get_write_access(handle, sbi->s_sbh);
+			err = ext4_journal_get_write_access(handle, sb,
+							    sbi->s_sbh,
+							    EXT4_JTR_NONE);
 			if (err)
 				goto pwsalt_err_journal;
 			lock_buffer(sbi->s_sbh);
diff --git a/fs/ext4/mballoc.c b/fs/ext4/mballoc.c
index 089c958aa2c3..72bfac2d6dce 100644
--- a/fs/ext4/mballoc.c
+++ b/fs/ext4/mballoc.c
@@ -408,6 +408,10 @@ static void ext4_mb_new_preallocation(struct ext4_allocation_context *ac);
 static bool ext4_mb_good_group(struct ext4_allocation_context *ac,
 			       ext4_group_t group, int cr);
 
+static int ext4_try_to_trim_range(struct super_block *sb,
+		struct ext4_buddy *e4b, ext4_grpblk_t start,
+		ext4_grpblk_t max, ext4_grpblk_t minblocks);
+
 /*
  * The algorithm using this percpu seq counter goes below:
  * 1. We sample the percpu discard_pa_seq counter before trying for block
@@ -2474,6 +2478,12 @@ static bool ext4_mb_good_group(struct ext4_allocation_context *ac,
  * This could return negative error code if something goes wrong
  * during ext4_mb_init_group(). This should not be called with
  * ext4_lock_group() held.
+ *
+ * Note: because we are conditionally operating with the group lock in
+ * the EXT4_MB_STRICT_CHECK case, we need to fake out sparse in this
+ * function using __acquire and __release.  This means we need to be
+ * super careful before messing with the error path handling via "goto
+ * out"!
  */
 static int ext4_mb_good_group_nolock(struct ext4_allocation_context *ac,
 				     ext4_group_t group, int cr)
@@ -2487,8 +2497,10 @@ static int ext4_mb_good_group_nolock(struct ext4_allocation_context *ac,
 
 	if (sbi->s_mb_stats)
 		atomic64_inc(&sbi->s_bal_cX_groups_considered[ac->ac_criteria]);
-	if (should_lock)
+	if (should_lock) {
 		ext4_lock_group(sb, group);
+		__release(ext4_group_lock_ptr(sb, group));
+	}
 	free = grp->bb_free;
 	if (free == 0)
 		goto out;
@@ -2496,8 +2508,10 @@ static int ext4_mb_good_group_nolock(struct ext4_allocation_context *ac,
 		goto out;
 	if (unlikely(EXT4_MB_GRP_BBITMAP_CORRUPT(grp)))
 		goto out;
-	if (should_lock)
+	if (should_lock) {
+		__acquire(ext4_group_lock_ptr(sb, group));
 		ext4_unlock_group(sb, group);
+	}
 
 	/* We only do this if the grp has never been initialized */
 	if (unlikely(EXT4_MB_GRP_NEED_INIT(grp))) {
@@ -2524,12 +2538,16 @@ static int ext4_mb_good_group_nolock(struct ext4_allocation_context *ac,
 			return ret;
 	}
 
-	if (should_lock)
+	if (should_lock) {
 		ext4_lock_group(sb, group);
+		__release(ext4_group_lock_ptr(sb, group));
+	}
 	ret = ext4_mb_good_group(ac, group, cr);
 out:
-	if (should_lock)
+	if (should_lock) {
+		__acquire(ext4_group_lock_ptr(sb, group));
 		ext4_unlock_group(sb, group);
+	}
 	return ret;
 }
 
@@ -2965,6 +2983,7 @@ int ext4_seq_mb_stats_show(struct seq_file *seq, void *offset)
 }
 
 static void *ext4_mb_seq_structs_summary_start(struct seq_file *seq, loff_t *pos)
+__acquires(&EXT4_SB(sb)->s_mb_rb_lock)
 {
 	struct super_block *sb = PDE_DATA(file_inode(seq->file));
 	unsigned long position;
@@ -3037,6 +3056,7 @@ static int ext4_mb_seq_structs_summary_show(struct seq_file *seq, void *v)
 }
 
 static void ext4_mb_seq_structs_summary_stop(struct seq_file *seq, void *v)
+__releases(&EXT4_SB(sb)->s_mb_rb_lock)
 {
 	struct super_block *sb = PDE_DATA(file_inode(seq->file));
 
@@ -3308,6 +3328,57 @@ static int ext4_groupinfo_create_slab(size_t size)
 	return 0;
 }
 
+static void ext4_discard_work(struct work_struct *work)
+{
+	struct ext4_sb_info *sbi = container_of(work,
+			struct ext4_sb_info, s_discard_work);
+	struct super_block *sb = sbi->s_sb;
+	struct ext4_free_data *fd, *nfd;
+	struct ext4_buddy e4b;
+	struct list_head discard_list;
+	ext4_group_t grp, load_grp;
+	int err = 0;
+
+	INIT_LIST_HEAD(&discard_list);
+	spin_lock(&sbi->s_md_lock);
+	list_splice_init(&sbi->s_discard_list, &discard_list);
+	spin_unlock(&sbi->s_md_lock);
+
+	load_grp = UINT_MAX;
+	list_for_each_entry_safe(fd, nfd, &discard_list, efd_list) {
+		/*
+		 * If filesystem is umounting or no memory or suffering
+		 * from no space, give up the discard
+		 */
+		if ((sb->s_flags & SB_ACTIVE) && !err &&
+		    !atomic_read(&sbi->s_retry_alloc_pending)) {
+			grp = fd->efd_group;
+			if (grp != load_grp) {
+				if (load_grp != UINT_MAX)
+					ext4_mb_unload_buddy(&e4b);
+
+				err = ext4_mb_load_buddy(sb, grp, &e4b);
+				if (err) {
+					kmem_cache_free(ext4_free_data_cachep, fd);
+					load_grp = UINT_MAX;
+					continue;
+				} else {
+					load_grp = grp;
+				}
+			}
+
+			ext4_lock_group(sb, grp);
+			ext4_try_to_trim_range(sb, &e4b, fd->efd_start_cluster,
+						fd->efd_start_cluster + fd->efd_count - 1, 1);
+			ext4_unlock_group(sb, grp);
+		}
+		kmem_cache_free(ext4_free_data_cachep, fd);
+	}
+
+	if (load_grp != UINT_MAX)
+		ext4_mb_unload_buddy(&e4b);
+}
+
 int ext4_mb_init(struct super_block *sb)
 {
 	struct ext4_sb_info *sbi = EXT4_SB(sb);
@@ -3376,6 +3447,9 @@ int ext4_mb_init(struct super_block *sb)
 	spin_lock_init(&sbi->s_md_lock);
 	sbi->s_mb_free_pending = 0;
 	INIT_LIST_HEAD(&sbi->s_freed_data_list);
+	INIT_LIST_HEAD(&sbi->s_discard_list);
+	INIT_WORK(&sbi->s_discard_work, ext4_discard_work);
+	atomic_set(&sbi->s_retry_alloc_pending, 0);
 
 	sbi->s_mb_max_to_scan = MB_DEFAULT_MAX_TO_SCAN;
 	sbi->s_mb_min_to_scan = MB_DEFAULT_MIN_TO_SCAN;
@@ -3474,6 +3548,14 @@ int ext4_mb_release(struct super_block *sb)
 	struct kmem_cache *cachep = get_groupinfo_cache(sb->s_blocksize_bits);
 	int count;
 
+	if (test_opt(sb, DISCARD)) {
+		/*
+		 * wait the discard work to drain all of ext4_free_data
+		 */
+		flush_work(&sbi->s_discard_work);
+		WARN_ON_ONCE(!list_empty(&sbi->s_discard_list));
+	}
+
 	if (sbi->s_group_info) {
 		for (i = 0; i < ngroups; i++) {
 			cond_resched();
@@ -3596,7 +3678,6 @@ static void ext4_free_data_in_buddy(struct super_block *sb,
 		put_page(e4b.bd_bitmap_page);
 	}
 	ext4_unlock_group(sb, entry->efd_group);
-	kmem_cache_free(ext4_free_data_cachep, entry);
 	ext4_mb_unload_buddy(&e4b);
 
 	mb_debug(sb, "freed %d blocks in %d structures\n", count,
@@ -3611,10 +3692,9 @@ void ext4_process_freed_data(struct super_block *sb, tid_t commit_tid)
 {
 	struct ext4_sb_info *sbi = EXT4_SB(sb);
 	struct ext4_free_data *entry, *tmp;
-	struct bio *discard_bio = NULL;
 	struct list_head freed_data_list;
 	struct list_head *cut_pos = NULL;
-	int err;
+	bool wake;
 
 	INIT_LIST_HEAD(&freed_data_list);
 
@@ -3629,30 +3709,20 @@ void ext4_process_freed_data(struct super_block *sb, tid_t commit_tid)
 				  cut_pos);
 	spin_unlock(&sbi->s_md_lock);
 
-	if (test_opt(sb, DISCARD)) {
-		list_for_each_entry(entry, &freed_data_list, efd_list) {
-			err = ext4_issue_discard(sb, entry->efd_group,
-						 entry->efd_start_cluster,
-						 entry->efd_count,
-						 &discard_bio);
-			if (err && err != -EOPNOTSUPP) {
-				ext4_msg(sb, KERN_WARNING, "discard request in"
-					 " group:%d block:%d count:%d failed"
-					 " with %d", entry->efd_group,
-					 entry->efd_start_cluster,
-					 entry->efd_count, err);
-			} else if (err == -EOPNOTSUPP)
-				break;
-		}
+	list_for_each_entry(entry, &freed_data_list, efd_list)
+		ext4_free_data_in_buddy(sb, entry);
 
-		if (discard_bio) {
-			submit_bio_wait(discard_bio);
-			bio_put(discard_bio);
-		}
+	if (test_opt(sb, DISCARD)) {
+		spin_lock(&sbi->s_md_lock);
+		wake = list_empty(&sbi->s_discard_list);
+		list_splice_tail(&freed_data_list, &sbi->s_discard_list);
+		spin_unlock(&sbi->s_md_lock);
+		if (wake)
+			queue_work(system_unbound_wq, &sbi->s_discard_work);
+	} else {
+		list_for_each_entry_safe(entry, tmp, &freed_data_list, efd_list)
+			kmem_cache_free(ext4_free_data_cachep, entry);
 	}
-
-	list_for_each_entry_safe(entry, tmp, &freed_data_list, efd_list)
-		ext4_free_data_in_buddy(sb, entry);
 }
 
 int __init ext4_init_mballoc(void)
@@ -3726,7 +3796,8 @@ ext4_mb_mark_diskspace_used(struct ext4_allocation_context *ac,
 	}
 
 	BUFFER_TRACE(bitmap_bh, "getting write access");
-	err = ext4_journal_get_write_access(handle, bitmap_bh);
+	err = ext4_journal_get_write_access(handle, sb, bitmap_bh,
+					    EXT4_JTR_NONE);
 	if (err)
 		goto out_err;
 
@@ -3739,7 +3810,7 @@ ext4_mb_mark_diskspace_used(struct ext4_allocation_context *ac,
 			ext4_free_group_clusters(sb, gdp));
 
 	BUFFER_TRACE(gdp_bh, "get_write_access");
-	err = ext4_journal_get_write_access(handle, gdp_bh);
+	err = ext4_journal_get_write_access(handle, sb, gdp_bh, EXT4_JTR_NONE);
 	if (err)
 		goto out_err;
 
@@ -5916,7 +5987,8 @@ do_more:
 	}
 
 	BUFFER_TRACE(bitmap_bh, "getting write access");
-	err = ext4_journal_get_write_access(handle, bitmap_bh);
+	err = ext4_journal_get_write_access(handle, sb, bitmap_bh,
+					    EXT4_JTR_NONE);
 	if (err)
 		goto error_return;
 
@@ -5926,7 +5998,7 @@ do_more:
 	 * using it
 	 */
 	BUFFER_TRACE(gd_bh, "get_write_access");
-	err = ext4_journal_get_write_access(handle, gd_bh);
+	err = ext4_journal_get_write_access(handle, sb, gd_bh, EXT4_JTR_NONE);
 	if (err)
 		goto error_return;
 #ifdef AGGRESSIVE_CHECK
@@ -6107,7 +6179,8 @@ int ext4_group_add_blocks(handle_t *handle, struct super_block *sb,
 	}
 
 	BUFFER_TRACE(bitmap_bh, "getting write access");
-	err = ext4_journal_get_write_access(handle, bitmap_bh);
+	err = ext4_journal_get_write_access(handle, sb, bitmap_bh,
+					    EXT4_JTR_NONE);
 	if (err)
 		goto error_return;
 
@@ -6117,7 +6190,7 @@ int ext4_group_add_blocks(handle_t *handle, struct super_block *sb,
 	 * using it
 	 */
 	BUFFER_TRACE(gd_bh, "get_write_access");
-	err = ext4_journal_get_write_access(handle, gd_bh);
+	err = ext4_journal_get_write_access(handle, sb, gd_bh, EXT4_JTR_NONE);
 	if (err)
 		goto error_return;
 
@@ -6183,19 +6256,19 @@ error_return:
  * @sb:		super block for the file system
  * @start:	starting block of the free extent in the alloc. group
  * @count:	number of blocks to TRIM
- * @group:	alloc. group we are working with
  * @e4b:	ext4 buddy for the group
  *
  * Trim "count" blocks starting at "start" in the "group". To assure that no
  * one will allocate those blocks, mark it as used in buddy bitmap. This must
  * be called with under the group lock.
  */
-static int ext4_trim_extent(struct super_block *sb, int start, int count,
-			     ext4_group_t group, struct ext4_buddy *e4b)
+static int ext4_trim_extent(struct super_block *sb,
+		int start, int count, struct ext4_buddy *e4b)
 __releases(bitlock)
 __acquires(bitlock)
 {
 	struct ext4_free_extent ex;
+	ext4_group_t group = e4b->bd_group;
 	int ret = 0;
 
 	trace_ext4_trim_extent(sb, group, start, count);
@@ -6218,51 +6291,21 @@ __acquires(bitlock)
 	return ret;
 }
 
-/**
- * ext4_trim_all_free -- function to trim all free space in alloc. group
- * @sb:			super block for file system
- * @group:		group to be trimmed
- * @start:		first group block to examine
- * @max:		last group block to examine
- * @minblocks:		minimum extent block count
- *
- * ext4_trim_all_free walks through group's buddy bitmap searching for free
- * extents. When the free block is found, ext4_trim_extent is called to TRIM
- * the extent.
- *
- *
- * ext4_trim_all_free walks through group's block bitmap searching for free
- * extents. When the free extent is found, mark it as used in group buddy
- * bitmap. Then issue a TRIM command on this extent and free the extent in
- * the group buddy bitmap. This is done until whole group is scanned.
- */
-static ext4_grpblk_t
-ext4_trim_all_free(struct super_block *sb, ext4_group_t group,
-		   ext4_grpblk_t start, ext4_grpblk_t max,
-		   ext4_grpblk_t minblocks)
+static int ext4_try_to_trim_range(struct super_block *sb,
+		struct ext4_buddy *e4b, ext4_grpblk_t start,
+		ext4_grpblk_t max, ext4_grpblk_t minblocks)
+__acquires(ext4_group_lock_ptr(sb, e4b->bd_group))
+__releases(ext4_group_lock_ptr(sb, e4b->bd_group))
 {
+	ext4_grpblk_t next, count, free_count;
 	void *bitmap;
-	ext4_grpblk_t next, count = 0, free_count = 0;
-	struct ext4_buddy e4b;
 	int ret = 0;
 
-	trace_ext4_trim_all_free(sb, group, start, max);
-
-	ret = ext4_mb_load_buddy(sb, group, &e4b);
-	if (ret) {
-		ext4_warning(sb, "Error %d loading buddy information for %u",
-			     ret, group);
-		return ret;
-	}
-	bitmap = e4b.bd_bitmap;
-
-	ext4_lock_group(sb, group);
-	if (EXT4_MB_GRP_WAS_TRIMMED(e4b.bd_info) &&
-	    minblocks >= atomic_read(&EXT4_SB(sb)->s_last_trim_minblks))
-		goto out;
-
-	start = (e4b.bd_info->bb_first_free > start) ?
-		e4b.bd_info->bb_first_free : start;
+	bitmap = e4b->bd_bitmap;
+	start = (e4b->bd_info->bb_first_free > start) ?
+		e4b->bd_info->bb_first_free : start;
+	count = 0;
+	free_count = 0;
 
 	while (start <= max) {
 		start = mb_find_next_zero_bit(bitmap, max + 1, start);
@@ -6271,8 +6314,7 @@ ext4_trim_all_free(struct super_block *sb, ext4_group_t group,
 		next = mb_find_next_bit(bitmap, max + 1, start);
 
 		if ((next - start) >= minblocks) {
-			ret = ext4_trim_extent(sb, start,
-					       next - start, group, &e4b);
+			ret = ext4_trim_extent(sb, start, next - start, e4b);
 			if (ret && ret != -EOPNOTSUPP)
 				break;
 			ret = 0;
@@ -6287,25 +6329,64 @@ ext4_trim_all_free(struct super_block *sb, ext4_group_t group,
 		}
 
 		if (need_resched()) {
-			ext4_unlock_group(sb, group);
+			ext4_unlock_group(sb, e4b->bd_group);
 			cond_resched();
-			ext4_lock_group(sb, group);
+			ext4_lock_group(sb, e4b->bd_group);
 		}
 
-		if ((e4b.bd_info->bb_free - free_count) < minblocks)
+		if ((e4b->bd_info->bb_free - free_count) < minblocks)
 			break;
 	}
 
-	if (!ret) {
-		ret = count;
-		EXT4_MB_GRP_SET_TRIMMED(e4b.bd_info);
+	return count;
+}
+
+/**
+ * ext4_trim_all_free -- function to trim all free space in alloc. group
+ * @sb:			super block for file system
+ * @group:		group to be trimmed
+ * @start:		first group block to examine
+ * @max:		last group block to examine
+ * @minblocks:		minimum extent block count
+ *
+ * ext4_trim_all_free walks through group's block bitmap searching for free
+ * extents. When the free extent is found, mark it as used in group buddy
+ * bitmap. Then issue a TRIM command on this extent and free the extent in
+ * the group buddy bitmap.
+ */
+static ext4_grpblk_t
+ext4_trim_all_free(struct super_block *sb, ext4_group_t group,
+		   ext4_grpblk_t start, ext4_grpblk_t max,
+		   ext4_grpblk_t minblocks)
+{
+	struct ext4_buddy e4b;
+	int ret;
+
+	trace_ext4_trim_all_free(sb, group, start, max);
+
+	ret = ext4_mb_load_buddy(sb, group, &e4b);
+	if (ret) {
+		ext4_warning(sb, "Error %d loading buddy information for %u",
+			     ret, group);
+		return ret;
+	}
+
+	ext4_lock_group(sb, group);
+
+	if (!EXT4_MB_GRP_WAS_TRIMMED(e4b.bd_info) ||
+	    minblocks < atomic_read(&EXT4_SB(sb)->s_last_trim_minblks)) {
+		ret = ext4_try_to_trim_range(sb, &e4b, start, max, minblocks);
+		if (ret >= 0)
+			EXT4_MB_GRP_SET_TRIMMED(e4b.bd_info);
+	} else {
+		ret = 0;
 	}
-out:
+
 	ext4_unlock_group(sb, group);
 	ext4_mb_unload_buddy(&e4b);
 
 	ext4_debug("trimmed %d blocks in the group %d\n",
-		count, group);
+		ret, group);
 
 	return ret;
 }
diff --git a/fs/ext4/namei.c b/fs/ext4/namei.c
index f3bbcd4efb56..da7698341d7d 100644
--- a/fs/ext4/namei.c
+++ b/fs/ext4/namei.c
@@ -70,7 +70,8 @@ static struct buffer_head *ext4_append(handle_t *handle,
 	inode->i_size += inode->i_sb->s_blocksize;
 	EXT4_I(inode)->i_disksize = inode->i_size;
 	BUFFER_TRACE(bh, "get_write_access");
-	err = ext4_journal_get_write_access(handle, bh);
+	err = ext4_journal_get_write_access(handle, inode->i_sb, bh,
+					    EXT4_JTR_NONE);
 	if (err) {
 		brelse(bh);
 		ext4_std_error(inode->i_sb, err);
@@ -1927,12 +1928,14 @@ static struct ext4_dir_entry_2 *do_split(handle_t *handle, struct inode *dir,
 	}
 
 	BUFFER_TRACE(*bh, "get_write_access");
-	err = ext4_journal_get_write_access(handle, *bh);
+	err = ext4_journal_get_write_access(handle, dir->i_sb, *bh,
+					    EXT4_JTR_NONE);
 	if (err)
 		goto journal_error;
 
 	BUFFER_TRACE(frame->bh, "get_write_access");
-	err = ext4_journal_get_write_access(handle, frame->bh);
+	err = ext4_journal_get_write_access(handle, dir->i_sb, frame->bh,
+					    EXT4_JTR_NONE);
 	if (err)
 		goto journal_error;
 
@@ -2109,7 +2112,8 @@ static int add_dirent_to_buf(handle_t *handle, struct ext4_filename *fname,
 			return err;
 	}
 	BUFFER_TRACE(bh, "get_write_access");
-	err = ext4_journal_get_write_access(handle, bh);
+	err = ext4_journal_get_write_access(handle, dir->i_sb, bh,
+					    EXT4_JTR_NONE);
 	if (err) {
 		ext4_std_error(dir->i_sb, err);
 		return err;
@@ -2167,7 +2171,8 @@ static int make_indexed_dir(handle_t *handle, struct ext4_filename *fname,
 	blocksize =  dir->i_sb->s_blocksize;
 	dxtrace(printk(KERN_DEBUG "Creating index: inode %lu\n", dir->i_ino));
 	BUFFER_TRACE(bh, "get_write_access");
-	retval = ext4_journal_get_write_access(handle, bh);
+	retval = ext4_journal_get_write_access(handle, dir->i_sb, bh,
+					       EXT4_JTR_NONE);
 	if (retval) {
 		ext4_std_error(dir->i_sb, retval);
 		brelse(bh);
@@ -2419,7 +2424,7 @@ again:
 	}
 
 	BUFFER_TRACE(bh, "get_write_access");
-	err = ext4_journal_get_write_access(handle, bh);
+	err = ext4_journal_get_write_access(handle, sb, bh, EXT4_JTR_NONE);
 	if (err)
 		goto journal_error;
 
@@ -2476,7 +2481,8 @@ again:
 		node2->fake.rec_len = ext4_rec_len_to_disk(sb->s_blocksize,
 							   sb->s_blocksize);
 		BUFFER_TRACE(frame->bh, "get_write_access");
-		err = ext4_journal_get_write_access(handle, frame->bh);
+		err = ext4_journal_get_write_access(handle, sb, frame->bh,
+						    EXT4_JTR_NONE);
 		if (err)
 			goto journal_error;
 		if (!add_level) {
@@ -2486,8 +2492,9 @@ again:
 				       icount1, icount2));
 
 			BUFFER_TRACE(frame->bh, "get_write_access"); /* index root */
-			err = ext4_journal_get_write_access(handle,
-							     (frame - 1)->bh);
+			err = ext4_journal_get_write_access(handle, sb,
+							    (frame - 1)->bh,
+							    EXT4_JTR_NONE);
 			if (err)
 				goto journal_error;
 
@@ -2636,7 +2643,8 @@ static int ext4_delete_entry(handle_t *handle,
 		csum_size = sizeof(struct ext4_dir_entry_tail);
 
 	BUFFER_TRACE(bh, "get_write_access");
-	err = ext4_journal_get_write_access(handle, bh);
+	err = ext4_journal_get_write_access(handle, dir->i_sb, bh,
+					    EXT4_JTR_NONE);
 	if (unlikely(err))
 		goto out;
 
@@ -3046,186 +3054,6 @@ bool ext4_empty_dir(struct inode *inode)
 	return true;
 }
 
-/*
- * ext4_orphan_add() links an unlinked or truncated inode into a list of
- * such inodes, starting at the superblock, in case we crash before the
- * file is closed/deleted, or in case the inode truncate spans multiple
- * transactions and the last transaction is not recovered after a crash.
- *
- * At filesystem recovery time, we walk this list deleting unlinked
- * inodes and truncating linked inodes in ext4_orphan_cleanup().
- *
- * Orphan list manipulation functions must be called under i_mutex unless
- * we are just creating the inode or deleting it.
- */
-int ext4_orphan_add(handle_t *handle, struct inode *inode)
-{
-	struct super_block *sb = inode->i_sb;
-	struct ext4_sb_info *sbi = EXT4_SB(sb);
-	struct ext4_iloc iloc;
-	int err = 0, rc;
-	bool dirty = false;
-
-	if (!sbi->s_journal || is_bad_inode(inode))
-		return 0;
-
-	WARN_ON_ONCE(!(inode->i_state & (I_NEW | I_FREEING)) &&
-		     !inode_is_locked(inode));
-	/*
-	 * Exit early if inode already is on orphan list. This is a big speedup
-	 * since we don't have to contend on the global s_orphan_lock.
-	 */
-	if (!list_empty(&EXT4_I(inode)->i_orphan))
-		return 0;
-
-	/*
-	 * Orphan handling is only valid for files with data blocks
-	 * being truncated, or files being unlinked. Note that we either
-	 * hold i_mutex, or the inode can not be referenced from outside,
-	 * so i_nlink should not be bumped due to race
-	 */
-	ASSERT((S_ISREG(inode->i_mode) || S_ISDIR(inode->i_mode) ||
-		  S_ISLNK(inode->i_mode)) || inode->i_nlink == 0);
-
-	BUFFER_TRACE(sbi->s_sbh, "get_write_access");
-	err = ext4_journal_get_write_access(handle, sbi->s_sbh);
-	if (err)
-		goto out;
-
-	err = ext4_reserve_inode_write(handle, inode, &iloc);
-	if (err)
-		goto out;
-
-	mutex_lock(&sbi->s_orphan_lock);
-	/*
-	 * Due to previous errors inode may be already a part of on-disk
-	 * orphan list. If so skip on-disk list modification.
-	 */
-	if (!NEXT_ORPHAN(inode) || NEXT_ORPHAN(inode) >
-	    (le32_to_cpu(sbi->s_es->s_inodes_count))) {
-		/* Insert this inode at the head of the on-disk orphan list */
-		NEXT_ORPHAN(inode) = le32_to_cpu(sbi->s_es->s_last_orphan);
-		lock_buffer(sbi->s_sbh);
-		sbi->s_es->s_last_orphan = cpu_to_le32(inode->i_ino);
-		ext4_superblock_csum_set(sb);
-		unlock_buffer(sbi->s_sbh);
-		dirty = true;
-	}
-	list_add(&EXT4_I(inode)->i_orphan, &sbi->s_orphan);
-	mutex_unlock(&sbi->s_orphan_lock);
-
-	if (dirty) {
-		err = ext4_handle_dirty_metadata(handle, NULL, sbi->s_sbh);
-		rc = ext4_mark_iloc_dirty(handle, inode, &iloc);
-		if (!err)
-			err = rc;
-		if (err) {
-			/*
-			 * We have to remove inode from in-memory list if
-			 * addition to on disk orphan list failed. Stray orphan
-			 * list entries can cause panics at unmount time.
-			 */
-			mutex_lock(&sbi->s_orphan_lock);
-			list_del_init(&EXT4_I(inode)->i_orphan);
-			mutex_unlock(&sbi->s_orphan_lock);
-		}
-	} else
-		brelse(iloc.bh);
-
-	jbd_debug(4, "superblock will point to %lu\n", inode->i_ino);
-	jbd_debug(4, "orphan inode %lu will point to %d\n",
-			inode->i_ino, NEXT_ORPHAN(inode));
-out:
-	ext4_std_error(sb, err);
-	return err;
-}
-
-/*
- * ext4_orphan_del() removes an unlinked or truncated inode from the list
- * of such inodes stored on disk, because it is finally being cleaned up.
- */
-int ext4_orphan_del(handle_t *handle, struct inode *inode)
-{
-	struct list_head *prev;
-	struct ext4_inode_info *ei = EXT4_I(inode);
-	struct ext4_sb_info *sbi = EXT4_SB(inode->i_sb);
-	__u32 ino_next;
-	struct ext4_iloc iloc;
-	int err = 0;
-
-	if (!sbi->s_journal && !(sbi->s_mount_state & EXT4_ORPHAN_FS))
-		return 0;
-
-	WARN_ON_ONCE(!(inode->i_state & (I_NEW | I_FREEING)) &&
-		     !inode_is_locked(inode));
-	/* Do this quick check before taking global s_orphan_lock. */
-	if (list_empty(&ei->i_orphan))
-		return 0;
-
-	if (handle) {
-		/* Grab inode buffer early before taking global s_orphan_lock */
-		err = ext4_reserve_inode_write(handle, inode, &iloc);
-	}
-
-	mutex_lock(&sbi->s_orphan_lock);
-	jbd_debug(4, "remove inode %lu from orphan list\n", inode->i_ino);
-
-	prev = ei->i_orphan.prev;
-	list_del_init(&ei->i_orphan);
-
-	/* If we're on an error path, we may not have a valid
-	 * transaction handle with which to update the orphan list on
-	 * disk, but we still need to remove the inode from the linked
-	 * list in memory. */
-	if (!handle || err) {
-		mutex_unlock(&sbi->s_orphan_lock);
-		goto out_err;
-	}
-
-	ino_next = NEXT_ORPHAN(inode);
-	if (prev == &sbi->s_orphan) {
-		jbd_debug(4, "superblock will point to %u\n", ino_next);
-		BUFFER_TRACE(sbi->s_sbh, "get_write_access");
-		err = ext4_journal_get_write_access(handle, sbi->s_sbh);
-		if (err) {
-			mutex_unlock(&sbi->s_orphan_lock);
-			goto out_brelse;
-		}
-		lock_buffer(sbi->s_sbh);
-		sbi->s_es->s_last_orphan = cpu_to_le32(ino_next);
-		ext4_superblock_csum_set(inode->i_sb);
-		unlock_buffer(sbi->s_sbh);
-		mutex_unlock(&sbi->s_orphan_lock);
-		err = ext4_handle_dirty_metadata(handle, NULL, sbi->s_sbh);
-	} else {
-		struct ext4_iloc iloc2;
-		struct inode *i_prev =
-			&list_entry(prev, struct ext4_inode_info, i_orphan)->vfs_inode;
-
-		jbd_debug(4, "orphan inode %lu will point to %u\n",
-			  i_prev->i_ino, ino_next);
-		err = ext4_reserve_inode_write(handle, i_prev, &iloc2);
-		if (err) {
-			mutex_unlock(&sbi->s_orphan_lock);
-			goto out_brelse;
-		}
-		NEXT_ORPHAN(i_prev) = ino_next;
-		err = ext4_mark_iloc_dirty(handle, i_prev, &iloc2);
-		mutex_unlock(&sbi->s_orphan_lock);
-	}
-	if (err)
-		goto out_brelse;
-	NEXT_ORPHAN(inode) = 0;
-	err = ext4_mark_iloc_dirty(handle, inode, &iloc);
-out_err:
-	ext4_std_error(inode->i_sb, err);
-	return err;
-
-out_brelse:
-	brelse(iloc.bh);
-	goto out_err;
-}
-
 static int ext4_rmdir(struct inode *dir, struct dentry *dentry)
 {
 	int retval;
@@ -3675,7 +3503,8 @@ static int ext4_rename_dir_prepare(handle_t *handle, struct ext4_renament *ent)
 	if (le32_to_cpu(ent->parent_de->inode) != ent->dir->i_ino)
 		return -EFSCORRUPTED;
 	BUFFER_TRACE(ent->dir_bh, "get_write_access");
-	return ext4_journal_get_write_access(handle, ent->dir_bh);
+	return ext4_journal_get_write_access(handle, ent->dir->i_sb,
+					     ent->dir_bh, EXT4_JTR_NONE);
 }
 
 static int ext4_rename_dir_finish(handle_t *handle, struct ext4_renament *ent,
@@ -3710,7 +3539,8 @@ static int ext4_setent(handle_t *handle, struct ext4_renament *ent,
 	int retval, retval2;
 
 	BUFFER_TRACE(ent->bh, "get write access");
-	retval = ext4_journal_get_write_access(handle, ent->bh);
+	retval = ext4_journal_get_write_access(handle, ent->dir->i_sb, ent->bh,
+					       EXT4_JTR_NONE);
 	if (retval)
 		return retval;
 	ent->de->inode = cpu_to_le32(ino);
diff --git a/fs/ext4/orphan.c b/fs/ext4/orphan.c
new file mode 100644
index 000000000000..53adc8f570a3
--- /dev/null
+++ b/fs/ext4/orphan.c
@@ -0,0 +1,652 @@
+/*
+ * Ext4 orphan inode handling
+ */
+#include <linux/fs.h>
+#include <linux/quotaops.h>
+#include <linux/buffer_head.h>
+
+#include "ext4.h"
+#include "ext4_jbd2.h"
+
+static int ext4_orphan_file_add(handle_t *handle, struct inode *inode)
+{
+	int i, j, start;
+	struct ext4_orphan_info *oi = &EXT4_SB(inode->i_sb)->s_orphan_info;
+	int ret = 0;
+	bool found = false;
+	__le32 *bdata;
+	int inodes_per_ob = ext4_inodes_per_orphan_block(inode->i_sb);
+	int looped = 0;
+
+	/*
+	 * Find block with free orphan entry. Use CPU number for a naive hash
+	 * for a search start in the orphan file
+	 */
+	start = raw_smp_processor_id()*13 % oi->of_blocks;
+	i = start;
+	do {
+		if (atomic_dec_if_positive(&oi->of_binfo[i].ob_free_entries)
+		    >= 0) {
+			found = true;
+			break;
+		}
+		if (++i >= oi->of_blocks)
+			i = 0;
+	} while (i != start);
+
+	if (!found) {
+		/*
+		 * For now we don't grow or shrink orphan file. We just use
+		 * whatever was allocated at mke2fs time. The additional
+		 * credits we would have to reserve for each orphan inode
+		 * operation just don't seem worth it.
+		 */
+		return -ENOSPC;
+	}
+
+	ret = ext4_journal_get_write_access(handle, inode->i_sb,
+				oi->of_binfo[i].ob_bh, EXT4_JTR_ORPHAN_FILE);
+	if (ret) {
+		atomic_inc(&oi->of_binfo[i].ob_free_entries);
+		return ret;
+	}
+
+	bdata = (__le32 *)(oi->of_binfo[i].ob_bh->b_data);
+	/* Find empty slot in a block */
+	j = 0;
+	do {
+		if (looped) {
+			/*
+			 * Did we walk through the block several times without
+			 * finding free entry? It is theoretically possible
+			 * if entries get constantly allocated and freed or
+			 * if the block is corrupted. Avoid indefinite looping
+			 * and bail. We'll use orphan list instead.
+			 */
+			if (looped > 3) {
+				atomic_inc(&oi->of_binfo[i].ob_free_entries);
+				return -ENOSPC;
+			}
+			cond_resched();
+		}
+		while (bdata[j]) {
+			if (++j >= inodes_per_ob) {
+				j = 0;
+				looped++;
+			}
+		}
+	} while (cmpxchg(&bdata[j], (__le32)0, cpu_to_le32(inode->i_ino)) !=
+		 (__le32)0);
+
+	EXT4_I(inode)->i_orphan_idx = i * inodes_per_ob + j;
+	ext4_set_inode_state(inode, EXT4_STATE_ORPHAN_FILE);
+
+	return ext4_handle_dirty_metadata(handle, NULL, oi->of_binfo[i].ob_bh);
+}
+
+/*
+ * ext4_orphan_add() links an unlinked or truncated inode into a list of
+ * such inodes, starting at the superblock, in case we crash before the
+ * file is closed/deleted, or in case the inode truncate spans multiple
+ * transactions and the last transaction is not recovered after a crash.
+ *
+ * At filesystem recovery time, we walk this list deleting unlinked
+ * inodes and truncating linked inodes in ext4_orphan_cleanup().
+ *
+ * Orphan list manipulation functions must be called under i_mutex unless
+ * we are just creating the inode or deleting it.
+ */
+int ext4_orphan_add(handle_t *handle, struct inode *inode)
+{
+	struct super_block *sb = inode->i_sb;
+	struct ext4_sb_info *sbi = EXT4_SB(sb);
+	struct ext4_iloc iloc;
+	int err = 0, rc;
+	bool dirty = false;
+
+	if (!sbi->s_journal || is_bad_inode(inode))
+		return 0;
+
+	WARN_ON_ONCE(!(inode->i_state & (I_NEW | I_FREEING)) &&
+		     !inode_is_locked(inode));
+	/*
+	 * Inode orphaned in orphan file or in orphan list?
+	 */
+	if (ext4_test_inode_state(inode, EXT4_STATE_ORPHAN_FILE) ||
+	    !list_empty(&EXT4_I(inode)->i_orphan))
+		return 0;
+
+	/*
+	 * Orphan handling is only valid for files with data blocks
+	 * being truncated, or files being unlinked. Note that we either
+	 * hold i_mutex, or the inode can not be referenced from outside,
+	 * so i_nlink should not be bumped due to race
+	 */
+	ASSERT((S_ISREG(inode->i_mode) || S_ISDIR(inode->i_mode) ||
+		  S_ISLNK(inode->i_mode)) || inode->i_nlink == 0);
+
+	if (sbi->s_orphan_info.of_blocks) {
+		err = ext4_orphan_file_add(handle, inode);
+		/*
+		 * Fallback to normal orphan list of orphan file is
+		 * out of space
+		 */
+		if (err != -ENOSPC)
+			return err;
+	}
+
+	BUFFER_TRACE(sbi->s_sbh, "get_write_access");
+	err = ext4_journal_get_write_access(handle, sb, sbi->s_sbh,
+					    EXT4_JTR_NONE);
+	if (err)
+		goto out;
+
+	err = ext4_reserve_inode_write(handle, inode, &iloc);
+	if (err)
+		goto out;
+
+	mutex_lock(&sbi->s_orphan_lock);
+	/*
+	 * Due to previous errors inode may be already a part of on-disk
+	 * orphan list. If so skip on-disk list modification.
+	 */
+	if (!NEXT_ORPHAN(inode) || NEXT_ORPHAN(inode) >
+	    (le32_to_cpu(sbi->s_es->s_inodes_count))) {
+		/* Insert this inode at the head of the on-disk orphan list */
+		NEXT_ORPHAN(inode) = le32_to_cpu(sbi->s_es->s_last_orphan);
+		lock_buffer(sbi->s_sbh);
+		sbi->s_es->s_last_orphan = cpu_to_le32(inode->i_ino);
+		ext4_superblock_csum_set(sb);
+		unlock_buffer(sbi->s_sbh);
+		dirty = true;
+	}
+	list_add(&EXT4_I(inode)->i_orphan, &sbi->s_orphan);
+	mutex_unlock(&sbi->s_orphan_lock);
+
+	if (dirty) {
+		err = ext4_handle_dirty_metadata(handle, NULL, sbi->s_sbh);
+		rc = ext4_mark_iloc_dirty(handle, inode, &iloc);
+		if (!err)
+			err = rc;
+		if (err) {
+			/*
+			 * We have to remove inode from in-memory list if
+			 * addition to on disk orphan list failed. Stray orphan
+			 * list entries can cause panics at unmount time.
+			 */
+			mutex_lock(&sbi->s_orphan_lock);
+			list_del_init(&EXT4_I(inode)->i_orphan);
+			mutex_unlock(&sbi->s_orphan_lock);
+		}
+	} else
+		brelse(iloc.bh);
+
+	jbd_debug(4, "superblock will point to %lu\n", inode->i_ino);
+	jbd_debug(4, "orphan inode %lu will point to %d\n",
+			inode->i_ino, NEXT_ORPHAN(inode));
+out:
+	ext4_std_error(sb, err);
+	return err;
+}
+
+static int ext4_orphan_file_del(handle_t *handle, struct inode *inode)
+{
+	struct ext4_orphan_info *oi = &EXT4_SB(inode->i_sb)->s_orphan_info;
+	__le32 *bdata;
+	int blk, off;
+	int inodes_per_ob = ext4_inodes_per_orphan_block(inode->i_sb);
+	int ret = 0;
+
+	if (!handle)
+		goto out;
+	blk = EXT4_I(inode)->i_orphan_idx / inodes_per_ob;
+	off = EXT4_I(inode)->i_orphan_idx % inodes_per_ob;
+	if (WARN_ON_ONCE(blk >= oi->of_blocks))
+		goto out;
+
+	ret = ext4_journal_get_write_access(handle, inode->i_sb,
+				oi->of_binfo[blk].ob_bh, EXT4_JTR_ORPHAN_FILE);
+	if (ret)
+		goto out;
+
+	bdata = (__le32 *)(oi->of_binfo[blk].ob_bh->b_data);
+	bdata[off] = 0;
+	atomic_inc(&oi->of_binfo[blk].ob_free_entries);
+	ret = ext4_handle_dirty_metadata(handle, NULL, oi->of_binfo[blk].ob_bh);
+out:
+	ext4_clear_inode_state(inode, EXT4_STATE_ORPHAN_FILE);
+	INIT_LIST_HEAD(&EXT4_I(inode)->i_orphan);
+
+	return ret;
+}
+
+/*
+ * ext4_orphan_del() removes an unlinked or truncated inode from the list
+ * of such inodes stored on disk, because it is finally being cleaned up.
+ */
+int ext4_orphan_del(handle_t *handle, struct inode *inode)
+{
+	struct list_head *prev;
+	struct ext4_inode_info *ei = EXT4_I(inode);
+	struct ext4_sb_info *sbi = EXT4_SB(inode->i_sb);
+	__u32 ino_next;
+	struct ext4_iloc iloc;
+	int err = 0;
+
+	if (!sbi->s_journal && !(sbi->s_mount_state & EXT4_ORPHAN_FS))
+		return 0;
+
+	WARN_ON_ONCE(!(inode->i_state & (I_NEW | I_FREEING)) &&
+		     !inode_is_locked(inode));
+	if (ext4_test_inode_state(inode, EXT4_STATE_ORPHAN_FILE))
+		return ext4_orphan_file_del(handle, inode);
+
+	/* Do this quick check before taking global s_orphan_lock. */
+	if (list_empty(&ei->i_orphan))
+		return 0;
+
+	if (handle) {
+		/* Grab inode buffer early before taking global s_orphan_lock */
+		err = ext4_reserve_inode_write(handle, inode, &iloc);
+	}
+
+	mutex_lock(&sbi->s_orphan_lock);
+	jbd_debug(4, "remove inode %lu from orphan list\n", inode->i_ino);
+
+	prev = ei->i_orphan.prev;
+	list_del_init(&ei->i_orphan);
+
+	/* If we're on an error path, we may not have a valid
+	 * transaction handle with which to update the orphan list on
+	 * disk, but we still need to remove the inode from the linked
+	 * list in memory. */
+	if (!handle || err) {
+		mutex_unlock(&sbi->s_orphan_lock);
+		goto out_err;
+	}
+
+	ino_next = NEXT_ORPHAN(inode);
+	if (prev == &sbi->s_orphan) {
+		jbd_debug(4, "superblock will point to %u\n", ino_next);
+		BUFFER_TRACE(sbi->s_sbh, "get_write_access");
+		err = ext4_journal_get_write_access(handle, inode->i_sb,
+						    sbi->s_sbh, EXT4_JTR_NONE);
+		if (err) {
+			mutex_unlock(&sbi->s_orphan_lock);
+			goto out_brelse;
+		}
+		lock_buffer(sbi->s_sbh);
+		sbi->s_es->s_last_orphan = cpu_to_le32(ino_next);
+		ext4_superblock_csum_set(inode->i_sb);
+		unlock_buffer(sbi->s_sbh);
+		mutex_unlock(&sbi->s_orphan_lock);
+		err = ext4_handle_dirty_metadata(handle, NULL, sbi->s_sbh);
+	} else {
+		struct ext4_iloc iloc2;
+		struct inode *i_prev =
+			&list_entry(prev, struct ext4_inode_info, i_orphan)->vfs_inode;
+
+		jbd_debug(4, "orphan inode %lu will point to %u\n",
+			  i_prev->i_ino, ino_next);
+		err = ext4_reserve_inode_write(handle, i_prev, &iloc2);
+		if (err) {
+			mutex_unlock(&sbi->s_orphan_lock);
+			goto out_brelse;
+		}
+		NEXT_ORPHAN(i_prev) = ino_next;
+		err = ext4_mark_iloc_dirty(handle, i_prev, &iloc2);
+		mutex_unlock(&sbi->s_orphan_lock);
+	}
+	if (err)
+		goto out_brelse;
+	NEXT_ORPHAN(inode) = 0;
+	err = ext4_mark_iloc_dirty(handle, inode, &iloc);
+out_err:
+	ext4_std_error(inode->i_sb, err);
+	return err;
+
+out_brelse:
+	brelse(iloc.bh);
+	goto out_err;
+}
+
+#ifdef CONFIG_QUOTA
+static int ext4_quota_on_mount(struct super_block *sb, int type)
+{
+	return dquot_quota_on_mount(sb,
+		rcu_dereference_protected(EXT4_SB(sb)->s_qf_names[type],
+					  lockdep_is_held(&sb->s_umount)),
+		EXT4_SB(sb)->s_jquota_fmt, type);
+}
+#endif
+
+static void ext4_process_orphan(struct inode *inode,
+				int *nr_truncates, int *nr_orphans)
+{
+	struct super_block *sb = inode->i_sb;
+	int ret;
+
+	dquot_initialize(inode);
+	if (inode->i_nlink) {
+		if (test_opt(sb, DEBUG))
+			ext4_msg(sb, KERN_DEBUG,
+				"%s: truncating inode %lu to %lld bytes",
+				__func__, inode->i_ino, inode->i_size);
+		jbd_debug(2, "truncating inode %lu to %lld bytes\n",
+			  inode->i_ino, inode->i_size);
+		inode_lock(inode);
+		truncate_inode_pages(inode->i_mapping, inode->i_size);
+		ret = ext4_truncate(inode);
+		if (ret) {
+			/*
+			 * We need to clean up the in-core orphan list
+			 * manually if ext4_truncate() failed to get a
+			 * transaction handle.
+			 */
+			ext4_orphan_del(NULL, inode);
+			ext4_std_error(inode->i_sb, ret);
+		}
+		inode_unlock(inode);
+		(*nr_truncates)++;
+	} else {
+		if (test_opt(sb, DEBUG))
+			ext4_msg(sb, KERN_DEBUG,
+				"%s: deleting unreferenced inode %lu",
+				__func__, inode->i_ino);
+		jbd_debug(2, "deleting unreferenced inode %lu\n",
+			  inode->i_ino);
+		(*nr_orphans)++;
+	}
+	iput(inode);  /* The delete magic happens here! */
+}
+
+/* ext4_orphan_cleanup() walks a singly-linked list of inodes (starting at
+ * the superblock) which were deleted from all directories, but held open by
+ * a process at the time of a crash.  We walk the list and try to delete these
+ * inodes at recovery time (only with a read-write filesystem).
+ *
+ * In order to keep the orphan inode chain consistent during traversal (in
+ * case of crash during recovery), we link each inode into the superblock
+ * orphan list_head and handle it the same way as an inode deletion during
+ * normal operation (which journals the operations for us).
+ *
+ * We only do an iget() and an iput() on each inode, which is very safe if we
+ * accidentally point at an in-use or already deleted inode.  The worst that
+ * can happen in this case is that we get a "bit already cleared" message from
+ * ext4_free_inode().  The only reason we would point at a wrong inode is if
+ * e2fsck was run on this filesystem, and it must have already done the orphan
+ * inode cleanup for us, so we can safely abort without any further action.
+ */
+void ext4_orphan_cleanup(struct super_block *sb, struct ext4_super_block *es)
+{
+	unsigned int s_flags = sb->s_flags;
+	int nr_orphans = 0, nr_truncates = 0;
+	struct inode *inode;
+	int i, j;
+#ifdef CONFIG_QUOTA
+	int quota_update = 0;
+#endif
+	__le32 *bdata;
+	struct ext4_orphan_info *oi = &EXT4_SB(sb)->s_orphan_info;
+	int inodes_per_ob = ext4_inodes_per_orphan_block(sb);
+
+	if (!es->s_last_orphan && !oi->of_blocks) {
+		jbd_debug(4, "no orphan inodes to clean up\n");
+		return;
+	}
+
+	if (bdev_read_only(sb->s_bdev)) {
+		ext4_msg(sb, KERN_ERR, "write access "
+			"unavailable, skipping orphan cleanup");
+		return;
+	}
+
+	/* Check if feature set would not allow a r/w mount */
+	if (!ext4_feature_set_ok(sb, 0)) {
+		ext4_msg(sb, KERN_INFO, "Skipping orphan cleanup due to "
+			 "unknown ROCOMPAT features");
+		return;
+	}
+
+	if (EXT4_SB(sb)->s_mount_state & EXT4_ERROR_FS) {
+		/* don't clear list on RO mount w/ errors */
+		if (es->s_last_orphan && !(s_flags & SB_RDONLY)) {
+			ext4_msg(sb, KERN_INFO, "Errors on filesystem, "
+				  "clearing orphan list.\n");
+			es->s_last_orphan = 0;
+		}
+		jbd_debug(1, "Skipping orphan recovery on fs with errors.\n");
+		return;
+	}
+
+	if (s_flags & SB_RDONLY) {
+		ext4_msg(sb, KERN_INFO, "orphan cleanup on readonly fs");
+		sb->s_flags &= ~SB_RDONLY;
+	}
+#ifdef CONFIG_QUOTA
+	/*
+	 * Turn on quotas which were not enabled for read-only mounts if
+	 * filesystem has quota feature, so that they are updated correctly.
+	 */
+	if (ext4_has_feature_quota(sb) && (s_flags & SB_RDONLY)) {
+		int ret = ext4_enable_quotas(sb);
+
+		if (!ret)
+			quota_update = 1;
+		else
+			ext4_msg(sb, KERN_ERR,
+				"Cannot turn on quotas: error %d", ret);
+	}
+
+	/* Turn on journaled quotas used for old sytle */
+	for (i = 0; i < EXT4_MAXQUOTAS; i++) {
+		if (EXT4_SB(sb)->s_qf_names[i]) {
+			int ret = ext4_quota_on_mount(sb, i);
+
+			if (!ret)
+				quota_update = 1;
+			else
+				ext4_msg(sb, KERN_ERR,
+					"Cannot turn on journaled "
+					"quota: type %d: error %d", i, ret);
+		}
+	}
+#endif
+
+	while (es->s_last_orphan) {
+		/*
+		 * We may have encountered an error during cleanup; if
+		 * so, skip the rest.
+		 */
+		if (EXT4_SB(sb)->s_mount_state & EXT4_ERROR_FS) {
+			jbd_debug(1, "Skipping orphan recovery on fs with errors.\n");
+			es->s_last_orphan = 0;
+			break;
+		}
+
+		inode = ext4_orphan_get(sb, le32_to_cpu(es->s_last_orphan));
+		if (IS_ERR(inode)) {
+			es->s_last_orphan = 0;
+			break;
+		}
+
+		list_add(&EXT4_I(inode)->i_orphan, &EXT4_SB(sb)->s_orphan);
+		ext4_process_orphan(inode, &nr_truncates, &nr_orphans);
+	}
+
+	for (i = 0; i < oi->of_blocks; i++) {
+		bdata = (__le32 *)(oi->of_binfo[i].ob_bh->b_data);
+		for (j = 0; j < inodes_per_ob; j++) {
+			if (!bdata[j])
+				continue;
+			inode = ext4_orphan_get(sb, le32_to_cpu(bdata[j]));
+			if (IS_ERR(inode))
+				continue;
+			ext4_set_inode_state(inode, EXT4_STATE_ORPHAN_FILE);
+			EXT4_I(inode)->i_orphan_idx = i * inodes_per_ob + j;
+			ext4_process_orphan(inode, &nr_truncates, &nr_orphans);
+		}
+	}
+
+#define PLURAL(x) (x), ((x) == 1) ? "" : "s"
+
+	if (nr_orphans)
+		ext4_msg(sb, KERN_INFO, "%d orphan inode%s deleted",
+		       PLURAL(nr_orphans));
+	if (nr_truncates)
+		ext4_msg(sb, KERN_INFO, "%d truncate%s cleaned up",
+		       PLURAL(nr_truncates));
+#ifdef CONFIG_QUOTA
+	/* Turn off quotas if they were enabled for orphan cleanup */
+	if (quota_update) {
+		for (i = 0; i < EXT4_MAXQUOTAS; i++) {
+			if (sb_dqopt(sb)->files[i])
+				dquot_quota_off(sb, i);
+		}
+	}
+#endif
+	sb->s_flags = s_flags; /* Restore SB_RDONLY status */
+}
+
+void ext4_release_orphan_info(struct super_block *sb)
+{
+	int i;
+	struct ext4_orphan_info *oi = &EXT4_SB(sb)->s_orphan_info;
+
+	if (!oi->of_blocks)
+		return;
+	for (i = 0; i < oi->of_blocks; i++)
+		brelse(oi->of_binfo[i].ob_bh);
+	kfree(oi->of_binfo);
+}
+
+static struct ext4_orphan_block_tail *ext4_orphan_block_tail(
+						struct super_block *sb,
+						struct buffer_head *bh)
+{
+	return (struct ext4_orphan_block_tail *)(bh->b_data + sb->s_blocksize -
+				sizeof(struct ext4_orphan_block_tail));
+}
+
+static int ext4_orphan_file_block_csum_verify(struct super_block *sb,
+					      struct buffer_head *bh)
+{
+	__u32 calculated;
+	int inodes_per_ob = ext4_inodes_per_orphan_block(sb);
+	struct ext4_orphan_info *oi = &EXT4_SB(sb)->s_orphan_info;
+	struct ext4_orphan_block_tail *ot;
+	__le64 dsk_block_nr = cpu_to_le64(bh->b_blocknr);
+
+	if (!ext4_has_metadata_csum(sb))
+		return 1;
+
+	ot = ext4_orphan_block_tail(sb, bh);
+	calculated = ext4_chksum(EXT4_SB(sb), oi->of_csum_seed,
+				 (__u8 *)&dsk_block_nr, sizeof(dsk_block_nr));
+	calculated = ext4_chksum(EXT4_SB(sb), calculated, (__u8 *)bh->b_data,
+				 inodes_per_ob * sizeof(__u32));
+	return le32_to_cpu(ot->ob_checksum) == calculated;
+}
+
+/* This gets called only when checksumming is enabled */
+void ext4_orphan_file_block_trigger(struct jbd2_buffer_trigger_type *triggers,
+				    struct buffer_head *bh,
+				    void *data, size_t size)
+{
+	struct super_block *sb = EXT4_TRIGGER(triggers)->sb;
+	__u32 csum;
+	int inodes_per_ob = ext4_inodes_per_orphan_block(sb);
+	struct ext4_orphan_info *oi = &EXT4_SB(sb)->s_orphan_info;
+	struct ext4_orphan_block_tail *ot;
+	__le64 dsk_block_nr = cpu_to_le64(bh->b_blocknr);
+
+	csum = ext4_chksum(EXT4_SB(sb), oi->of_csum_seed,
+			   (__u8 *)&dsk_block_nr, sizeof(dsk_block_nr));
+	csum = ext4_chksum(EXT4_SB(sb), csum, (__u8 *)data,
+			   inodes_per_ob * sizeof(__u32));
+	ot = ext4_orphan_block_tail(sb, bh);
+	ot->ob_checksum = cpu_to_le32(csum);
+}
+
+int ext4_init_orphan_info(struct super_block *sb)
+{
+	struct ext4_orphan_info *oi = &EXT4_SB(sb)->s_orphan_info;
+	struct inode *inode;
+	int i, j;
+	int ret;
+	int free;
+	__le32 *bdata;
+	int inodes_per_ob = ext4_inodes_per_orphan_block(sb);
+	struct ext4_orphan_block_tail *ot;
+	ino_t orphan_ino = le32_to_cpu(EXT4_SB(sb)->s_es->s_orphan_file_inum);
+
+	if (!ext4_has_feature_orphan_file(sb))
+		return 0;
+
+	inode = ext4_iget(sb, orphan_ino, EXT4_IGET_SPECIAL);
+	if (IS_ERR(inode)) {
+		ext4_msg(sb, KERN_ERR, "get orphan inode failed");
+		return PTR_ERR(inode);
+	}
+	oi->of_blocks = inode->i_size >> sb->s_blocksize_bits;
+	oi->of_csum_seed = EXT4_I(inode)->i_csum_seed;
+	oi->of_binfo = kmalloc(oi->of_blocks*sizeof(struct ext4_orphan_block),
+			       GFP_KERNEL);
+	if (!oi->of_binfo) {
+		ret = -ENOMEM;
+		goto out_put;
+	}
+	for (i = 0; i < oi->of_blocks; i++) {
+		oi->of_binfo[i].ob_bh = ext4_bread(NULL, inode, i, 0);
+		if (IS_ERR(oi->of_binfo[i].ob_bh)) {
+			ret = PTR_ERR(oi->of_binfo[i].ob_bh);
+			goto out_free;
+		}
+		if (!oi->of_binfo[i].ob_bh) {
+			ret = -EIO;
+			goto out_free;
+		}
+		ot = ext4_orphan_block_tail(sb, oi->of_binfo[i].ob_bh);
+		if (le32_to_cpu(ot->ob_magic) != EXT4_ORPHAN_BLOCK_MAGIC) {
+			ext4_error(sb, "orphan file block %d: bad magic", i);
+			ret = -EIO;
+			goto out_free;
+		}
+		if (!ext4_orphan_file_block_csum_verify(sb,
+						oi->of_binfo[i].ob_bh)) {
+			ext4_error(sb, "orphan file block %d: bad checksum", i);
+			ret = -EIO;
+			goto out_free;
+		}
+		bdata = (__le32 *)(oi->of_binfo[i].ob_bh->b_data);
+		free = 0;
+		for (j = 0; j < inodes_per_ob; j++)
+			if (bdata[j] == 0)
+				free++;
+		atomic_set(&oi->of_binfo[i].ob_free_entries, free);
+	}
+	iput(inode);
+	return 0;
+out_free:
+	for (i--; i >= 0; i--)
+		brelse(oi->of_binfo[i].ob_bh);
+	kfree(oi->of_binfo);
+out_put:
+	iput(inode);
+	return ret;
+}
+
+int ext4_orphan_file_empty(struct super_block *sb)
+{
+	struct ext4_orphan_info *oi = &EXT4_SB(sb)->s_orphan_info;
+	int i;
+	int inodes_per_ob = ext4_inodes_per_orphan_block(sb);
+
+	if (!ext4_has_feature_orphan_file(sb))
+		return 1;
+	for (i = 0; i < oi->of_blocks; i++)
+		if (atomic_read(&oi->of_binfo[i].ob_free_entries) !=
+		    inodes_per_ob)
+			return 0;
+	return 1;
+}
diff --git a/fs/ext4/resize.c b/fs/ext4/resize.c
index 7a9f1adef679..b63cb88ccdae 100644
--- a/fs/ext4/resize.c
+++ b/fs/ext4/resize.c
@@ -409,7 +409,8 @@ static struct buffer_head *bclean(handle_t *handle, struct super_block *sb,
 	if (unlikely(!bh))
 		return ERR_PTR(-ENOMEM);
 	BUFFER_TRACE(bh, "get_write_access");
-	if ((err = ext4_journal_get_write_access(handle, bh))) {
+	err = ext4_journal_get_write_access(handle, sb, bh, EXT4_JTR_NONE);
+	if (err) {
 		brelse(bh);
 		bh = ERR_PTR(err);
 	} else {
@@ -474,7 +475,8 @@ static int set_flexbg_block_bitmap(struct super_block *sb, handle_t *handle,
 			return -ENOMEM;
 
 		BUFFER_TRACE(bh, "get_write_access");
-		err = ext4_journal_get_write_access(handle, bh);
+		err = ext4_journal_get_write_access(handle, sb, bh,
+						    EXT4_JTR_NONE);
 		if (err) {
 			brelse(bh);
 			return err;
@@ -569,7 +571,8 @@ static int setup_new_flex_group_blocks(struct super_block *sb,
 			}
 
 			BUFFER_TRACE(gdb, "get_write_access");
-			err = ext4_journal_get_write_access(handle, gdb);
+			err = ext4_journal_get_write_access(handle, sb, gdb,
+							    EXT4_JTR_NONE);
 			if (err) {
 				brelse(gdb);
 				goto out;
@@ -837,17 +840,18 @@ static int add_new_gdb(handle_t *handle, struct inode *inode,
 	}
 
 	BUFFER_TRACE(EXT4_SB(sb)->s_sbh, "get_write_access");
-	err = ext4_journal_get_write_access(handle, EXT4_SB(sb)->s_sbh);
+	err = ext4_journal_get_write_access(handle, sb, EXT4_SB(sb)->s_sbh,
+					    EXT4_JTR_NONE);
 	if (unlikely(err))
 		goto errout;
 
 	BUFFER_TRACE(gdb_bh, "get_write_access");
-	err = ext4_journal_get_write_access(handle, gdb_bh);
+	err = ext4_journal_get_write_access(handle, sb, gdb_bh, EXT4_JTR_NONE);
 	if (unlikely(err))
 		goto errout;
 
 	BUFFER_TRACE(dind, "get_write_access");
-	err = ext4_journal_get_write_access(handle, dind);
+	err = ext4_journal_get_write_access(handle, sb, dind, EXT4_JTR_NONE);
 	if (unlikely(err)) {
 		ext4_std_error(sb, err);
 		goto errout;
@@ -956,7 +960,7 @@ static int add_new_gdb_meta_bg(struct super_block *sb,
 	n_group_desc[gdb_num] = gdb_bh;
 
 	BUFFER_TRACE(gdb_bh, "get_write_access");
-	err = ext4_journal_get_write_access(handle, gdb_bh);
+	err = ext4_journal_get_write_access(handle, sb, gdb_bh, EXT4_JTR_NONE);
 	if (err) {
 		kvfree(n_group_desc);
 		brelse(gdb_bh);
@@ -1042,7 +1046,8 @@ static int reserve_backup_gdb(handle_t *handle, struct inode *inode,
 
 	for (i = 0; i < reserved_gdb; i++) {
 		BUFFER_TRACE(primary[i], "get_write_access");
-		if ((err = ext4_journal_get_write_access(handle, primary[i])))
+		if ((err = ext4_journal_get_write_access(handle, sb, primary[i],
+							 EXT4_JTR_NONE)))
 			goto exit_bh;
 	}
 
@@ -1149,10 +1154,9 @@ static void update_backups(struct super_block *sb, sector_t blk_off, char *data,
 			   backup_block, backup_block -
 			   ext4_group_first_block_no(sb, group));
 		BUFFER_TRACE(bh, "get_write_access");
-		if ((err = ext4_journal_get_write_access(handle, bh))) {
-			brelse(bh);
+		if ((err = ext4_journal_get_write_access(handle, sb, bh,
+							 EXT4_JTR_NONE)))
 			break;
-		}
 		lock_buffer(bh);
 		memcpy(bh->b_data, data, size);
 		if (rest)
@@ -1232,7 +1236,8 @@ static int ext4_add_new_descs(handle_t *handle, struct super_block *sb,
 			gdb_bh = sbi_array_rcu_deref(sbi, s_group_desc,
 						     gdb_num);
 			BUFFER_TRACE(gdb_bh, "get_write_access");
-			err = ext4_journal_get_write_access(handle, gdb_bh);
+			err = ext4_journal_get_write_access(handle, sb, gdb_bh,
+							    EXT4_JTR_NONE);
 
 			if (!err && reserved_gdb && ext4_bg_num_gdb(sb, group))
 				err = reserve_backup_gdb(handle, resize_inode, group);
@@ -1509,7 +1514,8 @@ static int ext4_flex_group_add(struct super_block *sb,
 	}
 
 	BUFFER_TRACE(sbi->s_sbh, "get_write_access");
-	err = ext4_journal_get_write_access(handle, sbi->s_sbh);
+	err = ext4_journal_get_write_access(handle, sb, sbi->s_sbh,
+					    EXT4_JTR_NONE);
 	if (err)
 		goto exit_journal;
 
@@ -1722,7 +1728,8 @@ static int ext4_group_extend_no_check(struct super_block *sb,
 	}
 
 	BUFFER_TRACE(EXT4_SB(sb)->s_sbh, "get_write_access");
-	err = ext4_journal_get_write_access(handle, EXT4_SB(sb)->s_sbh);
+	err = ext4_journal_get_write_access(handle, sb, EXT4_SB(sb)->s_sbh,
+					    EXT4_JTR_NONE);
 	if (err) {
 		ext4_warning(sb, "error %d on journal write access", err);
 		goto errout;
@@ -1884,7 +1891,8 @@ static int ext4_convert_meta_bg(struct super_block *sb, struct inode *inode)
 		return PTR_ERR(handle);
 
 	BUFFER_TRACE(sbi->s_sbh, "get_write_access");
-	err = ext4_journal_get_write_access(handle, sbi->s_sbh);
+	err = ext4_journal_get_write_access(handle, sb, sbi->s_sbh,
+					    EXT4_JTR_NONE);
 	if (err)
 		goto errout;
 
diff --git a/fs/ext4/super.c b/fs/ext4/super.c
index d6df62fc810c..0775950ee84e 100644
--- a/fs/ext4/super.c
+++ b/fs/ext4/super.c
@@ -80,7 +80,6 @@ static struct dentry *ext4_mount(struct file_system_type *fs_type, int flags,
 		       const char *dev_name, void *data);
 static inline int ext2_feature_set_ok(struct super_block *sb);
 static inline int ext3_feature_set_ok(struct super_block *sb);
-static int ext4_feature_set_ok(struct super_block *sb, int readonly);
 static void ext4_destroy_lazyinit_thread(void);
 static void ext4_unregister_li_request(struct super_block *sb);
 static void ext4_clear_request_list(void);
@@ -1173,6 +1172,7 @@ static void ext4_put_super(struct super_block *sb)
 
 	flush_work(&sbi->s_error_work);
 	destroy_workqueue(sbi->rsv_conversion_wq);
+	ext4_release_orphan_info(sb);
 
 	/*
 	 * Unregister sysfs before destroying jbd2 journal.
@@ -1198,6 +1198,7 @@ static void ext4_put_super(struct super_block *sb)
 
 	if (!sb_rdonly(sb) && !aborted) {
 		ext4_clear_feature_journal_needs_recovery(sb);
+		ext4_clear_feature_orphan_present(sb);
 		es->s_state = cpu_to_le16(sbi->s_mount_state);
 	}
 	if (!sb_rdonly(sb))
@@ -1582,14 +1583,12 @@ static int ext4_mark_dquot_dirty(struct dquot *dquot);
 static int ext4_write_info(struct super_block *sb, int type);
 static int ext4_quota_on(struct super_block *sb, int type, int format_id,
 			 const struct path *path);
-static int ext4_quota_on_mount(struct super_block *sb, int type);
 static ssize_t ext4_quota_read(struct super_block *sb, int type, char *data,
 			       size_t len, loff_t off);
 static ssize_t ext4_quota_write(struct super_block *sb, int type,
 				const char *data, size_t len, loff_t off);
 static int ext4_quota_enable(struct super_block *sb, int type, int format_id,
 			     unsigned int flags);
-static int ext4_enable_quotas(struct super_block *sb);
 
 static struct dquot **ext4_get_dquots(struct inode *inode)
 {
@@ -2684,8 +2683,11 @@ static int ext4_setup_super(struct super_block *sb, struct ext4_super_block *es,
 		es->s_max_mnt_count = cpu_to_le16(EXT4_DFL_MAX_MNT_COUNT);
 	le16_add_cpu(&es->s_mnt_count, 1);
 	ext4_update_tstamp(es, s_mtime);
-	if (sbi->s_journal)
+	if (sbi->s_journal) {
 		ext4_set_feature_journal_needs_recovery(sb);
+		if (ext4_has_feature_orphan_file(sb))
+			ext4_set_feature_orphan_present(sb);
+	}
 
 	err = ext4_commit_super(sb);
 done:
@@ -2967,169 +2969,6 @@ static int ext4_check_descriptors(struct super_block *sb,
 	return 1;
 }
 
-/* ext4_orphan_cleanup() walks a singly-linked list of inodes (starting at
- * the superblock) which were deleted from all directories, but held open by
- * a process at the time of a crash.  We walk the list and try to delete these
- * inodes at recovery time (only with a read-write filesystem).
- *
- * In order to keep the orphan inode chain consistent during traversal (in
- * case of crash during recovery), we link each inode into the superblock
- * orphan list_head and handle it the same way as an inode deletion during
- * normal operation (which journals the operations for us).
- *
- * We only do an iget() and an iput() on each inode, which is very safe if we
- * accidentally point at an in-use or already deleted inode.  The worst that
- * can happen in this case is that we get a "bit already cleared" message from
- * ext4_free_inode().  The only reason we would point at a wrong inode is if
- * e2fsck was run on this filesystem, and it must have already done the orphan
- * inode cleanup for us, so we can safely abort without any further action.
- */
-static void ext4_orphan_cleanup(struct super_block *sb,
-				struct ext4_super_block *es)
-{
-	unsigned int s_flags = sb->s_flags;
-	int ret, nr_orphans = 0, nr_truncates = 0;
-#ifdef CONFIG_QUOTA
-	int quota_update = 0;
-	int i;
-#endif
-	if (!es->s_last_orphan) {
-		jbd_debug(4, "no orphan inodes to clean up\n");
-		return;
-	}
-
-	if (bdev_read_only(sb->s_bdev)) {
-		ext4_msg(sb, KERN_ERR, "write access "
-			"unavailable, skipping orphan cleanup");
-		return;
-	}
-
-	/* Check if feature set would not allow a r/w mount */
-	if (!ext4_feature_set_ok(sb, 0)) {
-		ext4_msg(sb, KERN_INFO, "Skipping orphan cleanup due to "
-			 "unknown ROCOMPAT features");
-		return;
-	}
-
-	if (EXT4_SB(sb)->s_mount_state & EXT4_ERROR_FS) {
-		/* don't clear list on RO mount w/ errors */
-		if (es->s_last_orphan && !(s_flags & SB_RDONLY)) {
-			ext4_msg(sb, KERN_INFO, "Errors on filesystem, "
-				  "clearing orphan list.\n");
-			es->s_last_orphan = 0;
-		}
-		jbd_debug(1, "Skipping orphan recovery on fs with errors.\n");
-		return;
-	}
-
-	if (s_flags & SB_RDONLY) {
-		ext4_msg(sb, KERN_INFO, "orphan cleanup on readonly fs");
-		sb->s_flags &= ~SB_RDONLY;
-	}
-#ifdef CONFIG_QUOTA
-	/*
-	 * Turn on quotas which were not enabled for read-only mounts if
-	 * filesystem has quota feature, so that they are updated correctly.
-	 */
-	if (ext4_has_feature_quota(sb) && (s_flags & SB_RDONLY)) {
-		int ret = ext4_enable_quotas(sb);
-
-		if (!ret)
-			quota_update = 1;
-		else
-			ext4_msg(sb, KERN_ERR,
-				"Cannot turn on quotas: error %d", ret);
-	}
-
-	/* Turn on journaled quotas used for old sytle */
-	for (i = 0; i < EXT4_MAXQUOTAS; i++) {
-		if (EXT4_SB(sb)->s_qf_names[i]) {
-			int ret = ext4_quota_on_mount(sb, i);
-
-			if (!ret)
-				quota_update = 1;
-			else
-				ext4_msg(sb, KERN_ERR,
-					"Cannot turn on journaled "
-					"quota: type %d: error %d", i, ret);
-		}
-	}
-#endif
-
-	while (es->s_last_orphan) {
-		struct inode *inode;
-
-		/*
-		 * We may have encountered an error during cleanup; if
-		 * so, skip the rest.
-		 */
-		if (EXT4_SB(sb)->s_mount_state & EXT4_ERROR_FS) {
-			jbd_debug(1, "Skipping orphan recovery on fs with errors.\n");
-			es->s_last_orphan = 0;
-			break;
-		}
-
-		inode = ext4_orphan_get(sb, le32_to_cpu(es->s_last_orphan));
-		if (IS_ERR(inode)) {
-			es->s_last_orphan = 0;
-			break;
-		}
-
-		list_add(&EXT4_I(inode)->i_orphan, &EXT4_SB(sb)->s_orphan);
-		dquot_initialize(inode);
-		if (inode->i_nlink) {
-			if (test_opt(sb, DEBUG))
-				ext4_msg(sb, KERN_DEBUG,
-					"%s: truncating inode %lu to %lld bytes",
-					__func__, inode->i_ino, inode->i_size);
-			jbd_debug(2, "truncating inode %lu to %lld bytes\n",
-				  inode->i_ino, inode->i_size);
-			inode_lock(inode);
-			truncate_inode_pages(inode->i_mapping, inode->i_size);
-			ret = ext4_truncate(inode);
-			if (ret) {
-				/*
-				 * We need to clean up the in-core orphan list
-				 * manually if ext4_truncate() failed to get a
-				 * transaction handle.
-				 */
-				ext4_orphan_del(NULL, inode);
-				ext4_std_error(inode->i_sb, ret);
-			}
-			inode_unlock(inode);
-			nr_truncates++;
-		} else {
-			if (test_opt(sb, DEBUG))
-				ext4_msg(sb, KERN_DEBUG,
-					"%s: deleting unreferenced inode %lu",
-					__func__, inode->i_ino);
-			jbd_debug(2, "deleting unreferenced inode %lu\n",
-				  inode->i_ino);
-			nr_orphans++;
-		}
-		iput(inode);  /* The delete magic happens here! */
-	}
-
-#define PLURAL(x) (x), ((x) == 1) ? "" : "s"
-
-	if (nr_orphans)
-		ext4_msg(sb, KERN_INFO, "%d orphan inode%s deleted",
-		       PLURAL(nr_orphans));
-	if (nr_truncates)
-		ext4_msg(sb, KERN_INFO, "%d truncate%s cleaned up",
-		       PLURAL(nr_truncates));
-#ifdef CONFIG_QUOTA
-	/* Turn off quotas if they were enabled for orphan cleanup */
-	if (quota_update) {
-		for (i = 0; i < EXT4_MAXQUOTAS; i++) {
-			if (sb_dqopt(sb)->files[i])
-				dquot_quota_off(sb, i);
-		}
-	}
-#endif
-	sb->s_flags = s_flags; /* Restore SB_RDONLY status */
-}
-
 /*
  * Maximal extent format file size.
  * Resulting logical blkno at s_maxbytes must fit in our on-disk
@@ -3309,7 +3148,7 @@ static unsigned long ext4_get_stripe_size(struct ext4_sb_info *sbi)
  * Returns 1 if this filesystem can be mounted as requested,
  * 0 if it cannot be.
  */
-static int ext4_feature_set_ok(struct super_block *sb, int readonly)
+int ext4_feature_set_ok(struct super_block *sb, int readonly)
 {
 	if (ext4_has_unknown_ext4_incompat_features(sb)) {
 		ext4_msg(sb, KERN_ERR,
@@ -4011,6 +3850,20 @@ static const char *ext4_quota_mode(struct super_block *sb)
 #endif
 }
 
+static void ext4_setup_csum_trigger(struct super_block *sb,
+				    enum ext4_journal_trigger_type type,
+				    void (*trigger)(
+					struct jbd2_buffer_trigger_type *type,
+					struct buffer_head *bh,
+					void *mapped_data,
+					size_t size))
+{
+	struct ext4_sb_info *sbi = EXT4_SB(sb);
+
+	sbi->s_journal_triggers[type].sb = sb;
+	sbi->s_journal_triggers[type].tr_triggers.t_frozen = trigger;
+}
+
 static int ext4_fill_super(struct super_block *sb, void *data, int silent)
 {
 	struct dax_device *dax_dev = fs_dax_get_by_bdev(sb->s_bdev);
@@ -4109,6 +3962,8 @@ static int ext4_fill_super(struct super_block *sb, void *data, int silent)
 		silent = 1;
 		goto cantfind_ext4;
 	}
+	ext4_setup_csum_trigger(sb, EXT4_JTR_ORPHAN_FILE,
+				ext4_orphan_file_block_trigger);
 
 	/* Load the checksum driver */
 	sbi->s_chksum_driver = crypto_alloc_shash("crc32c", 0, 0);
@@ -4432,7 +4287,8 @@ static int ext4_fill_super(struct super_block *sb, void *data, int silent)
 		goto failed_mount;
 	}
 
-	if (bdev_dax_supported(sb->s_bdev, blocksize))
+	if (dax_supported(dax_dev, sb->s_bdev, blocksize, 0,
+			bdev_nr_sectors(sb->s_bdev)))
 		set_bit(EXT4_FLAGS_BDEV_IS_DAX, &sbi->s_ext4_flags);
 
 	if (sbi->s_mount_opt & EXT4_MOUNT_DAX_ALWAYS) {
@@ -4773,6 +4629,7 @@ static int ext4_fill_super(struct super_block *sb, void *data, int silent)
 	sb->s_root = NULL;
 
 	needs_recovery = (es->s_last_orphan != 0 ||
+			  ext4_has_feature_orphan_present(sb) ||
 			  ext4_has_feature_journal_needs_recovery(sb));
 
 	if (ext4_has_feature_mmp(sb) && !sb_rdonly(sb))
@@ -5029,6 +4886,14 @@ no_journal:
 		err = percpu_counter_init(&sbi->s_freeinodes_counter, freei,
 					  GFP_KERNEL);
 	}
+	/*
+	 * Update the checksum after updating free space/inode
+	 * counters.  Otherwise the superblock can have an incorrect
+	 * checksum in the buffer cache until it is written out and
+	 * e2fsprogs programs trying to open a file system immediately
+	 * after it is mounted can fail.
+	 */
+	ext4_superblock_csum_set(sb);
 	if (!err)
 		err = percpu_counter_init(&sbi->s_dirs_counter,
 					  ext4_count_dirs(sb), GFP_KERNEL);
@@ -5063,12 +4928,15 @@ no_journal:
 	if (err)
 		goto failed_mount7;
 
+	err = ext4_init_orphan_info(sb);
+	if (err)
+		goto failed_mount8;
 #ifdef CONFIG_QUOTA
 	/* Enable quota usage during mount. */
 	if (ext4_has_feature_quota(sb) && !sb_rdonly(sb)) {
 		err = ext4_enable_quotas(sb);
 		if (err)
-			goto failed_mount8;
+			goto failed_mount9;
 	}
 #endif  /* CONFIG_QUOTA */
 
@@ -5087,7 +4955,7 @@ no_journal:
 		ext4_msg(sb, KERN_INFO, "recovery complete");
 		err = ext4_mark_recovery_complete(sb, es);
 		if (err)
-			goto failed_mount8;
+			goto failed_mount9;
 	}
 	if (EXT4_SB(sb)->s_journal) {
 		if (test_opt(sb, DATA_FLAGS) == EXT4_MOUNT_JOURNAL_DATA)
@@ -5133,6 +5001,8 @@ cantfind_ext4:
 		ext4_msg(sb, KERN_ERR, "VFS: Can't find ext4 filesystem");
 	goto failed_mount;
 
+failed_mount9:
+	ext4_release_orphan_info(sb);
 failed_mount8:
 	ext4_unregister_sysfs(sb);
 	kobject_put(&sbi->s_kobj);
@@ -5643,8 +5513,15 @@ static int ext4_mark_recovery_complete(struct super_block *sb,
 	if (err < 0)
 		goto out;
 
-	if (ext4_has_feature_journal_needs_recovery(sb) && sb_rdonly(sb)) {
+	if (sb_rdonly(sb) && (ext4_has_feature_journal_needs_recovery(sb) ||
+	    ext4_has_feature_orphan_present(sb))) {
+		if (!ext4_orphan_file_empty(sb)) {
+			ext4_error(sb, "Orphan file not empty on read-only fs.");
+			err = -EFSCORRUPTED;
+			goto out;
+		}
 		ext4_clear_feature_journal_needs_recovery(sb);
+		ext4_clear_feature_orphan_present(sb);
 		ext4_commit_super(sb);
 	}
 out:
@@ -5787,6 +5664,8 @@ static int ext4_freeze(struct super_block *sb)
 
 		/* Journal blocked and flushed, clear needs_recovery flag. */
 		ext4_clear_feature_journal_needs_recovery(sb);
+		if (ext4_orphan_file_empty(sb))
+			ext4_clear_feature_orphan_present(sb);
 	}
 
 	error = ext4_commit_super(sb);
@@ -5809,6 +5688,8 @@ static int ext4_unfreeze(struct super_block *sb)
 	if (EXT4_SB(sb)->s_journal) {
 		/* Reset the needs_recovery flag before the fs is unlocked. */
 		ext4_set_feature_journal_needs_recovery(sb);
+		if (ext4_has_feature_orphan_file(sb))
+			ext4_set_feature_orphan_present(sb);
 	}
 
 	ext4_commit_super(sb);
@@ -6012,7 +5893,7 @@ static int ext4_remount(struct super_block *sb, int *flags, char *data)
 			 * around from a previously readonly bdev mount,
 			 * require a full umount/remount for now.
 			 */
-			if (es->s_last_orphan) {
+			if (es->s_last_orphan || !ext4_orphan_file_empty(sb)) {
 				ext4_msg(sb, KERN_WARNING, "Couldn't "
 				       "remount RDWR because of unprocessed "
 				       "orphan inode list.  Please "
@@ -6309,16 +6190,6 @@ static int ext4_write_info(struct super_block *sb, int type)
 	return ret;
 }
 
-/*
- * Turn on quotas during mount time - we need to find
- * the quota file and such...
- */
-static int ext4_quota_on_mount(struct super_block *sb, int type)
-{
-	return dquot_quota_on_mount(sb, get_qf_name(sb, EXT4_SB(sb), type),
-					EXT4_SB(sb)->s_jquota_fmt, type);
-}
-
 static void lockdep_set_quota_inode(struct inode *inode, int subclass)
 {
 	struct ext4_inode_info *ei = EXT4_I(inode);
@@ -6448,7 +6319,7 @@ static int ext4_quota_enable(struct super_block *sb, int type, int format_id,
 }
 
 /* Enable usage tracking for all quota types. */
-static int ext4_enable_quotas(struct super_block *sb)
+int ext4_enable_quotas(struct super_block *sb)
 {
 	int type, err = 0;
 	unsigned long qf_inums[EXT4_MAXQUOTAS] = {
@@ -6606,7 +6477,7 @@ static ssize_t ext4_quota_write(struct super_block *sb, int type,
 	if (!bh)
 		goto out;
 	BUFFER_TRACE(bh, "get write access");
-	err = ext4_journal_get_write_access(handle, bh);
+	err = ext4_journal_get_write_access(handle, sb, bh, EXT4_JTR_NONE);
 	if (err) {
 		brelse(bh);
 		return err;
diff --git a/fs/ext4/xattr.c b/fs/ext4/xattr.c
index 6dd5c05c444a..1e0fc1ed845b 100644
--- a/fs/ext4/xattr.c
+++ b/fs/ext4/xattr.c
@@ -791,7 +791,8 @@ static void ext4_xattr_update_super_block(handle_t *handle,
 		return;
 
 	BUFFER_TRACE(EXT4_SB(sb)->s_sbh, "get_write_access");
-	if (ext4_journal_get_write_access(handle, EXT4_SB(sb)->s_sbh) == 0) {
+	if (ext4_journal_get_write_access(handle, sb, EXT4_SB(sb)->s_sbh,
+					  EXT4_JTR_NONE) == 0) {
 		lock_buffer(EXT4_SB(sb)->s_sbh);
 		ext4_set_feature_xattr(sb);
 		ext4_superblock_csum_set(sb);
@@ -1169,7 +1170,8 @@ ext4_xattr_inode_dec_ref_all(handle_t *handle, struct inode *parent,
 			continue;
 		}
 		if (err > 0) {
-			err = ext4_journal_get_write_access(handle, bh);
+			err = ext4_journal_get_write_access(handle,
+					parent->i_sb, bh, EXT4_JTR_NONE);
 			if (err) {
 				ext4_warning_inode(ea_inode,
 						"Re-get write access err=%d",
@@ -1230,7 +1232,8 @@ ext4_xattr_release_block(handle_t *handle, struct inode *inode,
 	int error = 0;
 
 	BUFFER_TRACE(bh, "get_write_access");
-	error = ext4_journal_get_write_access(handle, bh);
+	error = ext4_journal_get_write_access(handle, inode->i_sb, bh,
+					      EXT4_JTR_NONE);
 	if (error)
 		goto out;
 
@@ -1371,7 +1374,8 @@ retry:
 					 "ext4_getblk() return bh = NULL");
 			return -EFSCORRUPTED;
 		}
-		ret = ext4_journal_get_write_access(handle, bh);
+		ret = ext4_journal_get_write_access(handle, ea_inode->i_sb, bh,
+						   EXT4_JTR_NONE);
 		if (ret)
 			goto out;
 
@@ -1855,7 +1859,8 @@ ext4_xattr_block_set(handle_t *handle, struct inode *inode,
 
 	if (s->base) {
 		BUFFER_TRACE(bs->bh, "get_write_access");
-		error = ext4_journal_get_write_access(handle, bs->bh);
+		error = ext4_journal_get_write_access(handle, sb, bs->bh,
+						      EXT4_JTR_NONE);
 		if (error)
 			goto cleanup;
 		lock_buffer(bs->bh);
@@ -1987,8 +1992,9 @@ inserted:
 				if (error)
 					goto cleanup;
 				BUFFER_TRACE(new_bh, "get_write_access");
-				error = ext4_journal_get_write_access(handle,
-								      new_bh);
+				error = ext4_journal_get_write_access(
+						handle, sb, new_bh,
+						EXT4_JTR_NONE);
 				if (error)
 					goto cleanup_dquot;
 				lock_buffer(new_bh);
@@ -2092,7 +2098,8 @@ getblk_failed:
 			}
 
 			lock_buffer(new_bh);
-			error = ext4_journal_get_create_access(handle, new_bh);
+			error = ext4_journal_get_create_access(handle, sb,
+							new_bh, EXT4_JTR_NONE);
 			if (error) {
 				unlock_buffer(new_bh);
 				error = -EIO;
@@ -2848,7 +2855,8 @@ int ext4_xattr_delete_inode(handle_t *handle, struct inode *inode,
 			goto cleanup;
 		}
 
-		error = ext4_journal_get_write_access(handle, iloc.bh);
+		error = ext4_journal_get_write_access(handle, inode->i_sb,
+						iloc.bh, EXT4_JTR_NONE);
 		if (error) {
 			EXT4_ERROR_INODE(inode, "write access (error %d)",
 					 error);
diff --git a/fs/f2fs/Kconfig b/fs/f2fs/Kconfig
index 7669de7b49ce..7eea3cfd894d 100644
--- a/fs/f2fs/Kconfig
+++ b/fs/f2fs/Kconfig
@@ -105,6 +105,13 @@ config F2FS_FS_LZO
 	help
 	  Support LZO compress algorithm, if unsure, say Y.
 
+config F2FS_FS_LZORLE
+	bool "LZO-RLE compression support"
+	depends on F2FS_FS_LZO
+	default y
+	help
+	  Support LZO-RLE compress algorithm, if unsure, say Y.
+
 config F2FS_FS_LZ4
 	bool "LZ4 compression support"
 	depends on F2FS_FS_COMPRESSION
@@ -114,7 +121,6 @@ config F2FS_FS_LZ4
 
 config F2FS_FS_LZ4HC
 	bool "LZ4HC compression support"
-	depends on F2FS_FS_COMPRESSION
 	depends on F2FS_FS_LZ4
 	default y
 	help
@@ -128,10 +134,11 @@ config F2FS_FS_ZSTD
 	help
 	  Support ZSTD compress algorithm, if unsure, say Y.
 
-config F2FS_FS_LZORLE
-	bool "LZO-RLE compression support"
-	depends on F2FS_FS_COMPRESSION
-	depends on F2FS_FS_LZO
+config F2FS_IOSTAT
+	bool "F2FS IO statistics information"
+	depends on F2FS_FS
 	default y
 	help
-	  Support LZO-RLE compress algorithm, if unsure, say Y.
+	  Support getting IO statistics through sysfs and printing out periodic
+	  IO statistics tracepoint events. You have to turn on "iostat_enable"
+	  sysfs node to enable this feature.
diff --git a/fs/f2fs/Makefile b/fs/f2fs/Makefile
index e5295746208b..8a7322d229e4 100644
--- a/fs/f2fs/Makefile
+++ b/fs/f2fs/Makefile
@@ -9,3 +9,4 @@ f2fs-$(CONFIG_F2FS_FS_XATTR) += xattr.o
 f2fs-$(CONFIG_F2FS_FS_POSIX_ACL) += acl.o
 f2fs-$(CONFIG_FS_VERITY) += verity.o
 f2fs-$(CONFIG_F2FS_FS_COMPRESSION) += compress.o
+f2fs-$(CONFIG_F2FS_IOSTAT) += iostat.o
diff --git a/fs/f2fs/acl.c b/fs/f2fs/acl.c
index 239ad9453b99..16e826e01f09 100644
--- a/fs/f2fs/acl.c
+++ b/fs/f2fs/acl.c
@@ -196,8 +196,11 @@ static struct posix_acl *__f2fs_get_acl(struct inode *inode, int type,
 	return acl;
 }
 
-struct posix_acl *f2fs_get_acl(struct inode *inode, int type)
+struct posix_acl *f2fs_get_acl(struct inode *inode, int type, bool rcu)
 {
+	if (rcu)
+		return ERR_PTR(-ECHILD);
+
 	return __f2fs_get_acl(inode, type, NULL);
 }
 
diff --git a/fs/f2fs/acl.h b/fs/f2fs/acl.h
index 986fd1bc780b..a26e33cab4ff 100644
--- a/fs/f2fs/acl.h
+++ b/fs/f2fs/acl.h
@@ -33,7 +33,7 @@ struct f2fs_acl_header {
 
 #ifdef CONFIG_F2FS_FS_POSIX_ACL
 
-extern struct posix_acl *f2fs_get_acl(struct inode *, int);
+extern struct posix_acl *f2fs_get_acl(struct inode *, int, bool);
 extern int f2fs_set_acl(struct user_namespace *, struct inode *,
 			struct posix_acl *, int);
 extern int f2fs_init_acl(struct inode *, struct inode *, struct page *,
diff --git a/fs/f2fs/checkpoint.c b/fs/f2fs/checkpoint.c
index 6c208108d69c..83e9bc0f91ff 100644
--- a/fs/f2fs/checkpoint.c
+++ b/fs/f2fs/checkpoint.c
@@ -18,6 +18,7 @@
 #include "f2fs.h"
 #include "node.h"
 #include "segment.h"
+#include "iostat.h"
 #include <trace/events/f2fs.h>
 
 #define DEFAULT_CHECKPOINT_IOPRIO (IOPRIO_PRIO_VALUE(IOPRIO_CLASS_BE, 3))
@@ -465,16 +466,29 @@ static void __add_ino_entry(struct f2fs_sb_info *sbi, nid_t ino,
 						unsigned int devidx, int type)
 {
 	struct inode_management *im = &sbi->im[type];
-	struct ino_entry *e, *tmp;
+	struct ino_entry *e = NULL, *new = NULL;
 
-	tmp = f2fs_kmem_cache_alloc(ino_entry_slab, GFP_NOFS);
+	if (type == FLUSH_INO) {
+		rcu_read_lock();
+		e = radix_tree_lookup(&im->ino_root, ino);
+		rcu_read_unlock();
+	}
+
+retry:
+	if (!e)
+		new = f2fs_kmem_cache_alloc(ino_entry_slab,
+						GFP_NOFS, true, NULL);
 
 	radix_tree_preload(GFP_NOFS | __GFP_NOFAIL);
 
 	spin_lock(&im->ino_lock);
 	e = radix_tree_lookup(&im->ino_root, ino);
 	if (!e) {
-		e = tmp;
+		if (!new) {
+			spin_unlock(&im->ino_lock);
+			goto retry;
+		}
+		e = new;
 		if (unlikely(radix_tree_insert(&im->ino_root, ino, e)))
 			f2fs_bug_on(sbi, 1);
 
@@ -492,8 +506,8 @@ static void __add_ino_entry(struct f2fs_sb_info *sbi, nid_t ino,
 	spin_unlock(&im->ino_lock);
 	radix_tree_preload_end();
 
-	if (e != tmp)
-		kmem_cache_free(ino_entry_slab, tmp);
+	if (new && e != new)
+		kmem_cache_free(ino_entry_slab, new);
 }
 
 static void __remove_ino_entry(struct f2fs_sb_info *sbi, nid_t ino, int type)
@@ -1289,12 +1303,20 @@ static void update_ckpt_flags(struct f2fs_sb_info *sbi, struct cp_control *cpc)
 	struct f2fs_checkpoint *ckpt = F2FS_CKPT(sbi);
 	unsigned long flags;
 
-	spin_lock_irqsave(&sbi->cp_lock, flags);
+	if (cpc->reason & CP_UMOUNT) {
+		if (le32_to_cpu(ckpt->cp_pack_total_block_count) >
+			sbi->blocks_per_seg - NM_I(sbi)->nat_bits_blocks) {
+			clear_ckpt_flags(sbi, CP_NAT_BITS_FLAG);
+			f2fs_notice(sbi, "Disable nat_bits due to no space");
+		} else if (!is_set_ckpt_flags(sbi, CP_NAT_BITS_FLAG) &&
+						f2fs_nat_bitmap_enabled(sbi)) {
+			f2fs_enable_nat_bits(sbi);
+			set_ckpt_flags(sbi, CP_NAT_BITS_FLAG);
+			f2fs_notice(sbi, "Rebuild and enable nat_bits");
+		}
+	}
 
-	if ((cpc->reason & CP_UMOUNT) &&
-			le32_to_cpu(ckpt->cp_pack_total_block_count) >
-			sbi->blocks_per_seg - NM_I(sbi)->nat_bits_blocks)
-		disable_nat_bits(sbi, false);
+	spin_lock_irqsave(&sbi->cp_lock, flags);
 
 	if (cpc->reason & CP_TRIMMED)
 		__set_ckpt_flags(ckpt, CP_TRIMMED_FLAG);
@@ -1480,7 +1502,8 @@ static int do_checkpoint(struct f2fs_sb_info *sbi, struct cp_control *cpc)
 	start_blk = __start_cp_next_addr(sbi);
 
 	/* write nat bits */
-	if (enabled_nat_bits(sbi, cpc)) {
+	if ((cpc->reason & CP_UMOUNT) &&
+			is_set_ckpt_flags(sbi, CP_NAT_BITS_FLAG)) {
 		__u64 cp_ver = cur_cp_version(ckpt);
 		block_t blk;
 
@@ -1639,8 +1662,11 @@ int f2fs_write_checkpoint(struct f2fs_sb_info *sbi, struct cp_control *cpc)
 
 	/* write cached NAT/SIT entries to NAT/SIT area */
 	err = f2fs_flush_nat_entries(sbi, cpc);
-	if (err)
+	if (err) {
+		f2fs_err(sbi, "f2fs_flush_nat_entries failed err:%d, stop checkpoint", err);
+		f2fs_bug_on(sbi, !f2fs_cp_error(sbi));
 		goto stop;
+	}
 
 	f2fs_flush_sit_entries(sbi, cpc);
 
@@ -1648,10 +1674,13 @@ int f2fs_write_checkpoint(struct f2fs_sb_info *sbi, struct cp_control *cpc)
 	f2fs_save_inmem_curseg(sbi);
 
 	err = do_checkpoint(sbi, cpc);
-	if (err)
+	if (err) {
+		f2fs_err(sbi, "do_checkpoint failed err:%d, stop checkpoint", err);
+		f2fs_bug_on(sbi, !f2fs_cp_error(sbi));
 		f2fs_release_discard_addrs(sbi);
-	else
+	} else {
 		f2fs_clear_prefree_segments(sbi, cpc);
+	}
 
 	f2fs_restore_inmem_curseg(sbi);
 stop:
diff --git a/fs/f2fs/compress.c b/fs/f2fs/compress.c
index 455561826c7d..c1bf9ad4c220 100644
--- a/fs/f2fs/compress.c
+++ b/fs/f2fs/compress.c
@@ -28,7 +28,8 @@ static void *page_array_alloc(struct inode *inode, int nr)
 	unsigned int size = sizeof(struct page *) * nr;
 
 	if (likely(size <= sbi->page_array_slab_size))
-		return kmem_cache_zalloc(sbi->page_array_slab, GFP_NOFS);
+		return f2fs_kmem_cache_alloc(sbi->page_array_slab,
+					GFP_F2FS_ZERO, false, F2FS_I_SB(inode));
 	return f2fs_kzalloc(sbi, size, GFP_NOFS);
 }
 
@@ -898,6 +899,54 @@ static bool cluster_has_invalid_data(struct compress_ctx *cc)
 	return false;
 }
 
+bool f2fs_sanity_check_cluster(struct dnode_of_data *dn)
+{
+	struct f2fs_sb_info *sbi = F2FS_I_SB(dn->inode);
+	unsigned int cluster_size = F2FS_I(dn->inode)->i_cluster_size;
+	bool compressed = dn->data_blkaddr == COMPRESS_ADDR;
+	int cluster_end = 0;
+	int i;
+	char *reason = "";
+
+	if (!compressed)
+		return false;
+
+	/* [..., COMPR_ADDR, ...] */
+	if (dn->ofs_in_node % cluster_size) {
+		reason = "[*|C|*|*]";
+		goto out;
+	}
+
+	for (i = 1; i < cluster_size; i++) {
+		block_t blkaddr = data_blkaddr(dn->inode, dn->node_page,
+							dn->ofs_in_node + i);
+
+		/* [COMPR_ADDR, ..., COMPR_ADDR] */
+		if (blkaddr == COMPRESS_ADDR) {
+			reason = "[C|*|C|*]";
+			goto out;
+		}
+		if (compressed) {
+			if (!__is_valid_data_blkaddr(blkaddr)) {
+				if (!cluster_end)
+					cluster_end = i;
+				continue;
+			}
+			/* [COMPR_ADDR, NULL_ADDR or NEW_ADDR, valid_blkaddr] */
+			if (cluster_end) {
+				reason = "[C|N|N|V]";
+				goto out;
+			}
+		}
+	}
+	return false;
+out:
+	f2fs_warn(sbi, "access invalid cluster, ino:%lu, nid:%u, ofs_in_node:%u, reason:%s",
+			dn->inode->i_ino, dn->nid, dn->ofs_in_node, reason);
+	set_sbi_flag(sbi, SBI_NEED_FSCK);
+	return true;
+}
+
 static int __f2fs_cluster_blocks(struct inode *inode,
 				unsigned int cluster_idx, bool compr)
 {
@@ -915,6 +964,11 @@ static int __f2fs_cluster_blocks(struct inode *inode,
 		goto fail;
 	}
 
+	if (f2fs_sanity_check_cluster(&dn)) {
+		ret = -EFSCORRUPTED;
+		goto fail;
+	}
+
 	if (dn.data_blkaddr == COMPRESS_ADDR) {
 		int i;
 
@@ -1228,7 +1282,7 @@ static int f2fs_write_compressed_pages(struct compress_ctx *cc,
 
 	fio.version = ni.version;
 
-	cic = kmem_cache_zalloc(cic_entry_slab, GFP_NOFS);
+	cic = f2fs_kmem_cache_alloc(cic_entry_slab, GFP_F2FS_ZERO, false, sbi);
 	if (!cic)
 		goto out_put_dnode;
 
@@ -1340,12 +1394,6 @@ out_destroy_crypt:
 
 	for (--i; i >= 0; i--)
 		fscrypt_finalize_bounce_page(&cc->cpages[i]);
-	for (i = 0; i < cc->nr_cpages; i++) {
-		if (!cc->cpages[i])
-			continue;
-		f2fs_compress_free_page(cc->cpages[i]);
-		cc->cpages[i] = NULL;
-	}
 out_put_cic:
 	kmem_cache_free(cic_entry_slab, cic);
 out_put_dnode:
@@ -1356,6 +1404,12 @@ out_unlock_op:
 	else
 		f2fs_unlock_op(sbi);
 out_free:
+	for (i = 0; i < cc->nr_cpages; i++) {
+		if (!cc->cpages[i])
+			continue;
+		f2fs_compress_free_page(cc->cpages[i]);
+		cc->cpages[i] = NULL;
+	}
 	page_array_free(cc->inode, cc->cpages, cc->nr_cpages);
 	cc->cpages = NULL;
 	return -EAGAIN;
@@ -1506,7 +1560,8 @@ struct decompress_io_ctx *f2fs_alloc_dic(struct compress_ctx *cc)
 	pgoff_t start_idx = start_idx_of_cluster(cc);
 	int i;
 
-	dic = kmem_cache_zalloc(dic_entry_slab, GFP_NOFS);
+	dic = f2fs_kmem_cache_alloc(dic_entry_slab, GFP_F2FS_ZERO,
+					false, F2FS_I_SB(cc->inode));
 	if (!dic)
 		return ERR_PTR(-ENOMEM);
 
@@ -1666,6 +1721,30 @@ void f2fs_put_page_dic(struct page *page)
 	f2fs_put_dic(dic);
 }
 
+/*
+ * check whether cluster blocks are contiguous, and add extent cache entry
+ * only if cluster blocks are logically and physically contiguous.
+ */
+unsigned int f2fs_cluster_blocks_are_contiguous(struct dnode_of_data *dn)
+{
+	bool compressed = f2fs_data_blkaddr(dn) == COMPRESS_ADDR;
+	int i = compressed ? 1 : 0;
+	block_t first_blkaddr = data_blkaddr(dn->inode, dn->node_page,
+						dn->ofs_in_node + i);
+
+	for (i += 1; i < F2FS_I(dn->inode)->i_cluster_size; i++) {
+		block_t blkaddr = data_blkaddr(dn->inode, dn->node_page,
+						dn->ofs_in_node + i);
+
+		if (!__is_valid_data_blkaddr(blkaddr))
+			break;
+		if (first_blkaddr + i - (compressed ? 1 : 0) != blkaddr)
+			return 0;
+	}
+
+	return compressed ? i - 1 : i;
+}
+
 const struct address_space_operations f2fs_compress_aops = {
 	.releasepage = f2fs_release_page,
 	.invalidatepage = f2fs_invalidate_page,
diff --git a/fs/f2fs/data.c b/fs/f2fs/data.c
index eb222b35edef..f4fd6c246c9a 100644
--- a/fs/f2fs/data.c
+++ b/fs/f2fs/data.c
@@ -25,6 +25,7 @@
 #include "f2fs.h"
 #include "node.h"
 #include "segment.h"
+#include "iostat.h"
 #include <trace/events/f2fs.h>
 
 #define NUM_PREALLOC_POST_READ_CTXS	128
@@ -116,6 +117,7 @@ struct bio_post_read_ctx {
 	struct f2fs_sb_info *sbi;
 	struct work_struct work;
 	unsigned int enabled_steps;
+	block_t fs_blkaddr;
 };
 
 static void f2fs_finish_read_bio(struct bio *bio)
@@ -228,7 +230,7 @@ static void f2fs_handle_step_decompress(struct bio_post_read_ctx *ctx)
 	struct bio_vec *bv;
 	struct bvec_iter_all iter_all;
 	bool all_compressed = true;
-	block_t blkaddr = SECTOR_TO_BLOCK(ctx->bio->bi_iter.bi_sector);
+	block_t blkaddr = ctx->fs_blkaddr;
 
 	bio_for_each_segment_all(bv, ctx->bio, iter_all) {
 		struct page *page = bv->bv_page;
@@ -269,7 +271,10 @@ static void f2fs_post_read_work(struct work_struct *work)
 static void f2fs_read_end_io(struct bio *bio)
 {
 	struct f2fs_sb_info *sbi = F2FS_P_SB(bio_first_page_all(bio));
-	struct bio_post_read_ctx *ctx = bio->bi_private;
+	struct bio_post_read_ctx *ctx;
+
+	iostat_update_and_unbind_ctx(bio, 0);
+	ctx = bio->bi_private;
 
 	if (time_to_inject(sbi, FAULT_READ_IO)) {
 		f2fs_show_injection_info(sbi, FAULT_READ_IO);
@@ -291,10 +296,13 @@ static void f2fs_read_end_io(struct bio *bio)
 
 static void f2fs_write_end_io(struct bio *bio)
 {
-	struct f2fs_sb_info *sbi = bio->bi_private;
+	struct f2fs_sb_info *sbi;
 	struct bio_vec *bvec;
 	struct bvec_iter_all iter_all;
 
+	iostat_update_and_unbind_ctx(bio, 1);
+	sbi = bio->bi_private;
+
 	if (time_to_inject(sbi, FAULT_WRITE_IO)) {
 		f2fs_show_injection_info(sbi, FAULT_WRITE_IO);
 		bio->bi_status = BLK_STS_IOERR;
@@ -398,6 +406,8 @@ static struct bio *__bio_alloc(struct f2fs_io_info *fio, int npages)
 		bio->bi_write_hint = f2fs_io_type_to_rw_hint(sbi,
 						fio->type, fio->temp);
 	}
+	iostat_alloc_and_bind_ctx(sbi, bio, NULL);
+
 	if (fio->io_wbc)
 		wbc_init_bio(fio->io_wbc, bio);
 
@@ -479,6 +489,8 @@ submit_io:
 		trace_f2fs_submit_read_bio(sbi->sb, type, bio);
 	else
 		trace_f2fs_submit_write_bio(sbi->sb, type, bio);
+
+	iostat_update_submit_ctx(bio, type);
 	submit_bio(bio);
 }
 
@@ -723,7 +735,7 @@ static void add_bio_entry(struct f2fs_sb_info *sbi, struct bio *bio,
 	struct f2fs_bio_info *io = sbi->write_io[DATA] + temp;
 	struct bio_entry *be;
 
-	be = f2fs_kmem_cache_alloc(bio_entry_slab, GFP_NOFS);
+	be = f2fs_kmem_cache_alloc(bio_entry_slab, GFP_NOFS, true, NULL);
 	be->bio = bio;
 	bio_get(bio);
 
@@ -970,7 +982,7 @@ static struct bio *f2fs_grab_read_bio(struct inode *inode, block_t blkaddr,
 {
 	struct f2fs_sb_info *sbi = F2FS_I_SB(inode);
 	struct bio *bio;
-	struct bio_post_read_ctx *ctx;
+	struct bio_post_read_ctx *ctx = NULL;
 	unsigned int post_read_steps = 0;
 
 	bio = bio_alloc_bioset(for_write ? GFP_NOIO : GFP_KERNEL,
@@ -1003,8 +1015,10 @@ static struct bio *f2fs_grab_read_bio(struct inode *inode, block_t blkaddr,
 		ctx->bio = bio;
 		ctx->sbi = sbi;
 		ctx->enabled_steps = post_read_steps;
+		ctx->fs_blkaddr = blkaddr;
 		bio->bi_private = ctx;
 	}
+	iostat_alloc_and_bind_ctx(sbi, bio, ctx);
 
 	return bio;
 }
@@ -1133,7 +1147,7 @@ int f2fs_reserve_block(struct dnode_of_data *dn, pgoff_t index)
 
 int f2fs_get_block(struct dnode_of_data *dn, pgoff_t index)
 {
-	struct extent_info ei = {0, 0, 0};
+	struct extent_info ei = {0, };
 	struct inode *inode = dn->inode;
 
 	if (f2fs_lookup_extent_cache(inode, index, &ei)) {
@@ -1150,7 +1164,7 @@ struct page *f2fs_get_read_data_page(struct inode *inode, pgoff_t index,
 	struct address_space *mapping = inode->i_mapping;
 	struct dnode_of_data dn;
 	struct page *page;
-	struct extent_info ei = {0,0,0};
+	struct extent_info ei = {0, };
 	int err;
 
 	page = f2fs_grab_cache_page(mapping, index, for_write);
@@ -1448,7 +1462,7 @@ int f2fs_map_blocks(struct inode *inode, struct f2fs_map_blocks *map,
 	int err = 0, ofs = 1;
 	unsigned int ofs_in_node, last_ofs_in_node;
 	blkcnt_t prealloc;
-	struct extent_info ei = {0,0,0};
+	struct extent_info ei = {0, };
 	block_t blkaddr;
 	unsigned int start_pgofs;
 
@@ -1490,7 +1504,21 @@ next_dnode:
 	if (err) {
 		if (flag == F2FS_GET_BLOCK_BMAP)
 			map->m_pblk = 0;
+
 		if (err == -ENOENT) {
+			/*
+			 * There is one exceptional case that read_node_page()
+			 * may return -ENOENT due to filesystem has been
+			 * shutdown or cp_error, so force to convert error
+			 * number to EIO for such case.
+			 */
+			if (map->m_may_create &&
+				(is_sbi_flag_set(sbi, SBI_IS_SHUTDOWN) ||
+				f2fs_cp_error(sbi))) {
+				err = -EIO;
+				goto unlock_out;
+			}
+
 			err = 0;
 			if (map->m_next_pgofs)
 				*map->m_next_pgofs =
@@ -1550,6 +1578,13 @@ next_block:
 			map->m_flags |= F2FS_MAP_NEW;
 			blkaddr = dn.data_blkaddr;
 		} else {
+			if (f2fs_compressed_file(inode) &&
+					f2fs_sanity_check_cluster(&dn) &&
+					(flag != F2FS_GET_BLOCK_FIEMAP ||
+					IS_ENABLED(CONFIG_F2FS_CHECK_FS))) {
+				err = -EFSCORRUPTED;
+				goto sync_out;
+			}
 			if (flag == F2FS_GET_BLOCK_BMAP) {
 				map->m_pblk = 0;
 				goto sync_out;
@@ -1843,8 +1878,9 @@ int f2fs_fiemap(struct inode *inode, struct fiemap_extent_info *fieinfo,
 	u64 logical = 0, phys = 0, size = 0;
 	u32 flags = 0;
 	int ret = 0;
-	bool compr_cluster = false;
+	bool compr_cluster = false, compr_appended;
 	unsigned int cluster_size = F2FS_I(inode)->i_cluster_size;
+	unsigned int count_in_cluster = 0;
 	loff_t maxbytes;
 
 	if (fieinfo->fi_flags & FIEMAP_FLAG_CACHE) {
@@ -1892,15 +1928,17 @@ next:
 	map.m_next_pgofs = &next_pgofs;
 	map.m_seg_type = NO_CHECK_TYPE;
 
-	if (compr_cluster)
-		map.m_len = cluster_size - 1;
+	if (compr_cluster) {
+		map.m_lblk += 1;
+		map.m_len = cluster_size - count_in_cluster;
+	}
 
 	ret = f2fs_map_blocks(inode, &map, 0, F2FS_GET_BLOCK_FIEMAP);
 	if (ret)
 		goto out;
 
 	/* HOLE */
-	if (!(map.m_flags & F2FS_MAP_FLAGS)) {
+	if (!compr_cluster && !(map.m_flags & F2FS_MAP_FLAGS)) {
 		start_blk = next_pgofs;
 
 		if (blks_to_bytes(inode, start_blk) < blks_to_bytes(inode,
@@ -1910,6 +1948,14 @@ next:
 		flags |= FIEMAP_EXTENT_LAST;
 	}
 
+	compr_appended = false;
+	/* In a case of compressed cluster, append this to the last extent */
+	if (compr_cluster && ((map.m_flags & F2FS_MAP_UNWRITTEN) ||
+			!(map.m_flags & F2FS_MAP_FLAGS))) {
+		compr_appended = true;
+		goto skip_fill;
+	}
+
 	if (size) {
 		flags |= FIEMAP_EXTENT_MERGED;
 		if (IS_ENCRYPTED(inode))
@@ -1926,38 +1972,36 @@ next:
 	if (start_blk > last_blk)
 		goto out;
 
-	if (compr_cluster) {
-		compr_cluster = false;
-
-
-		logical = blks_to_bytes(inode, start_blk - 1);
-		phys = blks_to_bytes(inode, map.m_pblk);
-		size = blks_to_bytes(inode, cluster_size);
-
-		flags |= FIEMAP_EXTENT_ENCODED;
-
-		start_blk += cluster_size - 1;
-
-		if (start_blk > last_blk)
-			goto out;
-
-		goto prep_next;
-	}
-
+skip_fill:
 	if (map.m_pblk == COMPRESS_ADDR) {
 		compr_cluster = true;
-		start_blk++;
-		goto prep_next;
-	}
-
-	logical = blks_to_bytes(inode, start_blk);
-	phys = blks_to_bytes(inode, map.m_pblk);
-	size = blks_to_bytes(inode, map.m_len);
-	flags = 0;
-	if (map.m_flags & F2FS_MAP_UNWRITTEN)
-		flags = FIEMAP_EXTENT_UNWRITTEN;
+		count_in_cluster = 1;
+	} else if (compr_appended) {
+		unsigned int appended_blks = cluster_size -
+						count_in_cluster + 1;
+		size += blks_to_bytes(inode, appended_blks);
+		start_blk += appended_blks;
+		compr_cluster = false;
+	} else {
+		logical = blks_to_bytes(inode, start_blk);
+		phys = __is_valid_data_blkaddr(map.m_pblk) ?
+			blks_to_bytes(inode, map.m_pblk) : 0;
+		size = blks_to_bytes(inode, map.m_len);
+		flags = 0;
+
+		if (compr_cluster) {
+			flags = FIEMAP_EXTENT_ENCODED;
+			count_in_cluster += map.m_len;
+			if (count_in_cluster == cluster_size) {
+				compr_cluster = false;
+				size += blks_to_bytes(inode, 1);
+			}
+		} else if (map.m_flags & F2FS_MAP_UNWRITTEN) {
+			flags = FIEMAP_EXTENT_UNWRITTEN;
+		}
 
-	start_blk += bytes_to_blks(inode, size);
+		start_blk += bytes_to_blks(inode, size);
+	}
 
 prep_next:
 	cond_resched();
@@ -2115,6 +2159,8 @@ int f2fs_read_multi_pages(struct compress_ctx *cc, struct bio **bio_ret,
 	sector_t last_block_in_file;
 	const unsigned blocksize = blks_to_bytes(inode, 1);
 	struct decompress_io_ctx *dic = NULL;
+	struct extent_info ei = {0, };
+	bool from_dnode = true;
 	int i;
 	int ret = 0;
 
@@ -2137,6 +2183,8 @@ int f2fs_read_multi_pages(struct compress_ctx *cc, struct bio **bio_ret,
 			continue;
 		}
 		unlock_page(page);
+		if (for_write)
+			put_page(page);
 		cc->rpages[i] = NULL;
 		cc->nr_rpages--;
 	}
@@ -2145,6 +2193,12 @@ int f2fs_read_multi_pages(struct compress_ctx *cc, struct bio **bio_ret,
 	if (f2fs_cluster_is_empty(cc))
 		goto out;
 
+	if (f2fs_lookup_extent_cache(inode, start_idx, &ei))
+		from_dnode = false;
+
+	if (!from_dnode)
+		goto skip_reading_dnode;
+
 	set_new_dnode(&dn, inode, NULL, NULL, 0);
 	ret = f2fs_get_dnode_of_data(&dn, start_idx, LOOKUP_NODE);
 	if (ret)
@@ -2152,11 +2206,13 @@ int f2fs_read_multi_pages(struct compress_ctx *cc, struct bio **bio_ret,
 
 	f2fs_bug_on(sbi, dn.data_blkaddr != COMPRESS_ADDR);
 
+skip_reading_dnode:
 	for (i = 1; i < cc->cluster_size; i++) {
 		block_t blkaddr;
 
-		blkaddr = data_blkaddr(dn.inode, dn.node_page,
-						dn.ofs_in_node + i);
+		blkaddr = from_dnode ? data_blkaddr(dn.inode, dn.node_page,
+					dn.ofs_in_node + i) :
+					ei.blk + i - 1;
 
 		if (!__is_valid_data_blkaddr(blkaddr))
 			break;
@@ -2166,6 +2222,9 @@ int f2fs_read_multi_pages(struct compress_ctx *cc, struct bio **bio_ret,
 			goto out_put_dnode;
 		}
 		cc->nr_cpages++;
+
+		if (!from_dnode && i >= ei.c_len)
+			break;
 	}
 
 	/* nothing to decompress */
@@ -2185,8 +2244,9 @@ int f2fs_read_multi_pages(struct compress_ctx *cc, struct bio **bio_ret,
 		block_t blkaddr;
 		struct bio_post_read_ctx *ctx;
 
-		blkaddr = data_blkaddr(dn.inode, dn.node_page,
-						dn.ofs_in_node + i + 1);
+		blkaddr = from_dnode ? data_blkaddr(dn.inode, dn.node_page,
+					dn.ofs_in_node + i + 1) :
+					ei.blk + i;
 
 		f2fs_wait_on_block_writeback(inode, blkaddr);
 
@@ -2220,7 +2280,7 @@ submit_and_realloc:
 		if (bio_add_page(bio, page, blocksize, 0) < blocksize)
 			goto submit_and_realloc;
 
-		ctx = bio->bi_private;
+		ctx = get_post_read_ctx(bio);
 		ctx->enabled_steps |= STEP_DECOMPRESS;
 		refcount_inc(&dic->refcnt);
 
@@ -2231,13 +2291,15 @@ submit_and_realloc:
 		*last_block_in_bio = blkaddr;
 	}
 
-	f2fs_put_dnode(&dn);
+	if (from_dnode)
+		f2fs_put_dnode(&dn);
 
 	*bio_ret = bio;
 	return 0;
 
 out_put_dnode:
-	f2fs_put_dnode(&dn);
+	if (from_dnode)
+		f2fs_put_dnode(&dn);
 out:
 	for (i = 0; i < cc->cluster_size; i++) {
 		if (cc->rpages[i]) {
@@ -2272,6 +2334,7 @@ static int f2fs_mpage_readpages(struct inode *inode,
 		.nr_rpages = 0,
 		.nr_cpages = 0,
 	};
+	pgoff_t nc_cluster_idx = NULL_CLUSTER;
 #endif
 	unsigned nr_pages = rac ? readahead_count(rac) : 1;
 	unsigned max_nr_pages = nr_pages;
@@ -2304,12 +2367,23 @@ static int f2fs_mpage_readpages(struct inode *inode,
 				if (ret)
 					goto set_error_page;
 			}
-			ret = f2fs_is_compressed_cluster(inode, page->index);
-			if (ret < 0)
-				goto set_error_page;
-			else if (!ret)
-				goto read_single_page;
+			if (cc.cluster_idx == NULL_CLUSTER) {
+				if (nc_cluster_idx ==
+					page->index >> cc.log_cluster_size) {
+					goto read_single_page;
+				}
 
+				ret = f2fs_is_compressed_cluster(inode, page->index);
+				if (ret < 0)
+					goto set_error_page;
+				else if (!ret) {
+					nc_cluster_idx =
+						page->index >> cc.log_cluster_size;
+					goto read_single_page;
+				}
+
+				nc_cluster_idx = NULL_CLUSTER;
+			}
 			ret = f2fs_init_compress_ctx(&cc);
 			if (ret)
 				goto set_error_page;
@@ -2498,6 +2572,8 @@ bool f2fs_should_update_outplace(struct inode *inode, struct f2fs_io_info *fio)
 		return true;
 	if (f2fs_is_atomic_file(inode))
 		return true;
+	if (is_sbi_flag_set(sbi, SBI_NEED_FSCK))
+		return true;
 
 	/* swap file is migrating in aligned write mode */
 	if (is_inode_flag_set(inode, FI_ALIGNED_WRITE))
@@ -2530,7 +2606,7 @@ int f2fs_do_write_data_page(struct f2fs_io_info *fio)
 	struct page *page = fio->page;
 	struct inode *inode = page->mapping->host;
 	struct dnode_of_data dn;
-	struct extent_info ei = {0,0,0};
+	struct extent_info ei = {0, };
 	struct node_info ni;
 	bool ipu_force = false;
 	int err = 0;
@@ -3176,9 +3252,8 @@ static int f2fs_write_data_pages(struct address_space *mapping,
 			FS_CP_DATA_IO : FS_DATA_IO);
 }
 
-static void f2fs_write_failed(struct address_space *mapping, loff_t to)
+static void f2fs_write_failed(struct inode *inode, loff_t to)
 {
-	struct inode *inode = mapping->host;
 	loff_t i_size = i_size_read(inode);
 
 	if (IS_NOQUOTA(inode))
@@ -3187,12 +3262,12 @@ static void f2fs_write_failed(struct address_space *mapping, loff_t to)
 	/* In the fs-verity case, f2fs_end_enable_verity() does the truncate */
 	if (to > i_size && !f2fs_verity_in_progress(inode)) {
 		down_write(&F2FS_I(inode)->i_gc_rwsem[WRITE]);
-		filemap_invalidate_lock(mapping);
+		filemap_invalidate_lock(inode->i_mapping);
 
 		truncate_pagecache(inode, i_size);
 		f2fs_truncate_blocks(inode, i_size, true);
 
-		filemap_invalidate_unlock(mapping);
+		filemap_invalidate_unlock(inode->i_mapping);
 		up_write(&F2FS_I(inode)->i_gc_rwsem[WRITE]);
 	}
 }
@@ -3206,7 +3281,7 @@ static int prepare_write_begin(struct f2fs_sb_info *sbi,
 	struct dnode_of_data dn;
 	struct page *ipage;
 	bool locked = false;
-	struct extent_info ei = {0,0,0};
+	struct extent_info ei = {0, };
 	int err = 0;
 	int flag;
 
@@ -3328,6 +3403,9 @@ static int f2fs_write_begin(struct file *file, struct address_space *mapping,
 
 		*fsdata = NULL;
 
+		if (len == PAGE_SIZE)
+			goto repeat;
+
 		ret = f2fs_prepare_compress_overwrite(inode, pagep,
 							index, fsdata);
 		if (ret < 0) {
@@ -3410,7 +3488,7 @@ repeat:
 
 fail:
 	f2fs_put_page(page, 1);
-	f2fs_write_failed(mapping, pos + len);
+	f2fs_write_failed(inode, pos + len);
 	if (drop_atomic)
 		f2fs_drop_inmem_pages_all(sbi, false);
 	return err;
@@ -3552,7 +3630,7 @@ static ssize_t f2fs_direct_IO(struct kiocb *iocb, struct iov_iter *iter)
 	if (f2fs_force_buffered_io(inode, iocb, iter))
 		return 0;
 
-	do_opu = allow_outplace_dio(inode, iocb, iter);
+	do_opu = rw == WRITE && f2fs_lfs_mode(sbi);
 
 	trace_f2fs_direct_IO_enter(inode, offset, count, rw);
 
@@ -3600,7 +3678,7 @@ static ssize_t f2fs_direct_IO(struct kiocb *iocb, struct iov_iter *iter)
 			f2fs_update_iostat(F2FS_I_SB(inode), APP_DIRECT_IO,
 						count - iov_iter_count(iter));
 		} else if (err < 0) {
-			f2fs_write_failed(mapping, offset + count);
+			f2fs_write_failed(inode, offset + count);
 		}
 	} else {
 		if (err > 0)
diff --git a/fs/f2fs/debug.c b/fs/f2fs/debug.c
index 833325038ef3..8c50518475a9 100644
--- a/fs/f2fs/debug.c
+++ b/fs/f2fs/debug.c
@@ -323,11 +323,27 @@ get_cache:
 #endif
 }
 
+static char *s_flag[] = {
+	[SBI_IS_DIRTY]		= " fs_dirty",
+	[SBI_IS_CLOSE]		= " closing",
+	[SBI_NEED_FSCK]		= " need_fsck",
+	[SBI_POR_DOING]		= " recovering",
+	[SBI_NEED_SB_WRITE]	= " sb_dirty",
+	[SBI_NEED_CP]		= " need_cp",
+	[SBI_IS_SHUTDOWN]	= " shutdown",
+	[SBI_IS_RECOVERED]	= " recovered",
+	[SBI_CP_DISABLED]	= " cp_disabled",
+	[SBI_CP_DISABLED_QUICK]	= " cp_disabled_quick",
+	[SBI_QUOTA_NEED_FLUSH]	= " quota_need_flush",
+	[SBI_QUOTA_SKIP_FLUSH]	= " quota_skip_flush",
+	[SBI_QUOTA_NEED_REPAIR]	= " quota_need_repair",
+	[SBI_IS_RESIZEFS]	= " resizefs",
+};
+
 static int stat_show(struct seq_file *s, void *v)
 {
 	struct f2fs_stat_info *si;
-	int i = 0;
-	int j;
+	int i = 0, j = 0;
 
 	mutex_lock(&f2fs_stat_mutex);
 	list_for_each_entry(si, &f2fs_stat_list, stat_list) {
@@ -337,7 +353,13 @@ static int stat_show(struct seq_file *s, void *v)
 			si->sbi->sb->s_bdev, i++,
 			f2fs_readonly(si->sbi->sb) ? "RO": "RW",
 			is_set_ckpt_flags(si->sbi, CP_DISABLED_FLAG) ?
-			"Disabled": (f2fs_cp_error(si->sbi) ? "Error": "Good"));
+			"Disabled" : (f2fs_cp_error(si->sbi) ? "Error" : "Good"));
+		if (si->sbi->s_flag) {
+			seq_puts(s, "[SBI:");
+			for_each_set_bit(j, &si->sbi->s_flag, 32)
+				seq_puts(s, s_flag[j]);
+			seq_puts(s, "]\n");
+		}
 		seq_printf(s, "[SB: 1] [CP: 2] [SIT: %d] [NAT: %d] ",
 			   si->sit_area_segs, si->nat_area_segs);
 		seq_printf(s, "[SSA: %d] [MAIN: %d",
@@ -450,6 +472,15 @@ static int stat_show(struct seq_file *s, void *v)
 				si->data_segs, si->bg_data_segs);
 		seq_printf(s, "  - node segments : %d (%d)\n",
 				si->node_segs, si->bg_node_segs);
+		seq_printf(s, "  - Reclaimed segs : Normal (%d), Idle CB (%d), "
+				"Idle Greedy (%d), Idle AT (%d), "
+				"Urgent High (%d), Urgent Low (%d)\n",
+				si->sbi->gc_reclaimed_segs[GC_NORMAL],
+				si->sbi->gc_reclaimed_segs[GC_IDLE_CB],
+				si->sbi->gc_reclaimed_segs[GC_IDLE_GREEDY],
+				si->sbi->gc_reclaimed_segs[GC_IDLE_AT],
+				si->sbi->gc_reclaimed_segs[GC_URGENT_HIGH],
+				si->sbi->gc_reclaimed_segs[GC_URGENT_LOW]);
 		seq_printf(s, "Try to move %d blocks (BG: %d)\n", si->tot_blks,
 				si->bg_data_blks + si->bg_node_blks);
 		seq_printf(s, "  - data blocks : %d (%d)\n", si->data_blks,
@@ -611,7 +642,7 @@ void __init f2fs_create_root_stats(void)
 #ifdef CONFIG_DEBUG_FS
 	f2fs_debugfs_root = debugfs_create_dir("f2fs", NULL);
 
-	debugfs_create_file("status", S_IRUGO, f2fs_debugfs_root, NULL,
+	debugfs_create_file("status", 0444, f2fs_debugfs_root, NULL,
 			    &stat_fops);
 #endif
 }
diff --git a/fs/f2fs/dir.c b/fs/f2fs/dir.c
index 456651682daf..1820e9c106f7 100644
--- a/fs/f2fs/dir.c
+++ b/fs/f2fs/dir.c
@@ -83,8 +83,8 @@ int f2fs_init_casefolded_name(const struct inode *dir,
 	struct super_block *sb = dir->i_sb;
 
 	if (IS_CASEFOLDED(dir)) {
-		fname->cf_name.name = kmem_cache_alloc(f2fs_cf_name_slab,
-								GFP_NOFS);
+		fname->cf_name.name = f2fs_kmem_cache_alloc(f2fs_cf_name_slab,
+					GFP_NOFS, false, F2FS_SB(sb));
 		if (!fname->cf_name.name)
 			return -ENOMEM;
 		fname->cf_name.len = utf8_casefold(sb->s_encoding,
@@ -1000,6 +1000,7 @@ int f2fs_fill_dentries(struct dir_context *ctx, struct f2fs_dentry_ptr *d,
 	struct f2fs_sb_info *sbi = F2FS_I_SB(d->inode);
 	struct blk_plug plug;
 	bool readdir_ra = sbi->readdir_ra == 1;
+	bool found_valid_dirent = false;
 	int err = 0;
 
 	bit_pos = ((unsigned long)ctx->pos % d->max);
@@ -1014,13 +1015,15 @@ int f2fs_fill_dentries(struct dir_context *ctx, struct f2fs_dentry_ptr *d,
 
 		de = &d->dentry[bit_pos];
 		if (de->name_len == 0) {
+			if (found_valid_dirent || !bit_pos) {
+				printk_ratelimited(
+					"%sF2FS-fs (%s): invalid namelen(0), ino:%u, run fsck to fix.",
+					KERN_WARNING, sbi->sb->s_id,
+					le32_to_cpu(de->ino));
+				set_sbi_flag(sbi, SBI_NEED_FSCK);
+			}
 			bit_pos++;
 			ctx->pos = start_pos + bit_pos;
-			printk_ratelimited(
-				"%sF2FS-fs (%s): invalid namelen(0), ino:%u, run fsck to fix.",
-				KERN_WARNING, sbi->sb->s_id,
-				le32_to_cpu(de->ino));
-			set_sbi_flag(sbi, SBI_NEED_FSCK);
 			continue;
 		}
 
@@ -1063,6 +1066,7 @@ int f2fs_fill_dentries(struct dir_context *ctx, struct f2fs_dentry_ptr *d,
 			f2fs_ra_node_page(sbi, le32_to_cpu(de->ino));
 
 		ctx->pos = start_pos + bit_pos;
+		found_valid_dirent = true;
 	}
 out:
 	if (readdir_ra)
diff --git a/fs/f2fs/extent_cache.c b/fs/f2fs/extent_cache.c
index 3ebf976a682d..866e72b29bd5 100644
--- a/fs/f2fs/extent_cache.c
+++ b/fs/f2fs/extent_cache.c
@@ -239,7 +239,7 @@ static struct extent_node *__attach_extent_node(struct f2fs_sb_info *sbi,
 {
 	struct extent_node *en;
 
-	en = kmem_cache_alloc(extent_node_slab, GFP_ATOMIC);
+	en = f2fs_kmem_cache_alloc(extent_node_slab, GFP_ATOMIC, false, sbi);
 	if (!en)
 		return NULL;
 
@@ -292,7 +292,8 @@ static struct extent_tree *__grab_extent_tree(struct inode *inode)
 	mutex_lock(&sbi->extent_tree_lock);
 	et = radix_tree_lookup(&sbi->extent_tree_root, ino);
 	if (!et) {
-		et = f2fs_kmem_cache_alloc(extent_tree_slab, GFP_NOFS);
+		et = f2fs_kmem_cache_alloc(extent_tree_slab,
+					GFP_NOFS, true, NULL);
 		f2fs_radix_tree_insert(&sbi->extent_tree_root, ino, et);
 		memset(et, 0, sizeof(struct extent_tree));
 		et->ino = ino;
@@ -661,6 +662,47 @@ static void f2fs_update_extent_tree_range(struct inode *inode,
 		f2fs_mark_inode_dirty_sync(inode, true);
 }
 
+#ifdef CONFIG_F2FS_FS_COMPRESSION
+void f2fs_update_extent_tree_range_compressed(struct inode *inode,
+				pgoff_t fofs, block_t blkaddr, unsigned int llen,
+				unsigned int c_len)
+{
+	struct f2fs_sb_info *sbi = F2FS_I_SB(inode);
+	struct extent_tree *et = F2FS_I(inode)->extent_tree;
+	struct extent_node *en = NULL;
+	struct extent_node *prev_en = NULL, *next_en = NULL;
+	struct extent_info ei;
+	struct rb_node **insert_p = NULL, *insert_parent = NULL;
+	bool leftmost = false;
+
+	trace_f2fs_update_extent_tree_range(inode, fofs, blkaddr, llen);
+
+	/* it is safe here to check FI_NO_EXTENT w/o et->lock in ro image */
+	if (is_inode_flag_set(inode, FI_NO_EXTENT))
+		return;
+
+	write_lock(&et->lock);
+
+	en = (struct extent_node *)f2fs_lookup_rb_tree_ret(&et->root,
+				(struct rb_entry *)et->cached_en, fofs,
+				(struct rb_entry **)&prev_en,
+				(struct rb_entry **)&next_en,
+				&insert_p, &insert_parent, false,
+				&leftmost);
+	if (en)
+		goto unlock_out;
+
+	set_extent_info(&ei, fofs, blkaddr, llen);
+	ei.c_len = c_len;
+
+	if (!__try_merge_extent_node(sbi, et, &ei, prev_en, next_en))
+		__insert_extent_tree(sbi, et, &ei,
+				insert_p, insert_parent, leftmost);
+unlock_out:
+	write_unlock(&et->lock);
+}
+#endif
+
 unsigned int f2fs_shrink_extent_tree(struct f2fs_sb_info *sbi, int nr_shrink)
 {
 	struct extent_tree *et, *next;
diff --git a/fs/f2fs/f2fs.h b/fs/f2fs/f2fs.h
index 906b2c4b50e7..b339ae89c1ad 100644
--- a/fs/f2fs/f2fs.h
+++ b/fs/f2fs/f2fs.h
@@ -43,6 +43,7 @@ enum {
 	FAULT_KVMALLOC,
 	FAULT_PAGE_ALLOC,
 	FAULT_PAGE_GET,
+	FAULT_ALLOC_BIO,	/* it's obsolete due to bio_alloc() will never fail */
 	FAULT_ALLOC_NID,
 	FAULT_ORPHAN,
 	FAULT_BLOCK,
@@ -53,6 +54,7 @@ enum {
 	FAULT_CHECKPOINT,
 	FAULT_DISCARD,
 	FAULT_WRITE_IO,
+	FAULT_SLAB_ALLOC,
 	FAULT_MAX,
 };
 
@@ -139,6 +141,11 @@ struct f2fs_mount_info {
 	int fsync_mode;			/* fsync policy */
 	int fs_mode;			/* fs mode: LFS or ADAPTIVE */
 	int bggc_mode;			/* bggc mode: off, on or sync */
+	int discard_unit;		/*
+					 * discard command's offset/size should
+					 * be aligned to this unit: block,
+					 * segment or section
+					 */
 	struct fscrypt_dummy_policy dummy_enc_policy; /* test dummy encryption */
 	block_t unusable_cap_perc;	/* percentage for cap */
 	block_t unusable_cap;		/* Amount of space allowed to be
@@ -542,7 +549,7 @@ enum {
 					 */
 };
 
-#define DEFAULT_RETRY_IO_COUNT	8	/* maximum retry read IO count */
+#define DEFAULT_RETRY_IO_COUNT	8	/* maximum retry read IO or flush count */
 
 /* congestion wait timeout value, default: 20ms */
 #define	DEFAULT_IO_TIMEOUT	(msecs_to_jiffies(20))
@@ -575,6 +582,9 @@ struct extent_info {
 	unsigned int fofs;		/* start offset in a file */
 	unsigned int len;		/* length of the extent */
 	u32 blk;			/* start block address of the extent */
+#ifdef CONFIG_F2FS_FS_COMPRESSION
+	unsigned int c_len;		/* physical extent length of compressed blocks */
+#endif
 };
 
 struct extent_node {
@@ -793,6 +803,9 @@ static inline void set_extent_info(struct extent_info *ei, unsigned int fofs,
 	ei->fofs = fofs;
 	ei->blk = blk;
 	ei->len = len;
+#ifdef CONFIG_F2FS_FS_COMPRESSION
+	ei->c_len = 0;
+#endif
 }
 
 static inline bool __is_discard_mergeable(struct discard_info *back,
@@ -817,6 +830,12 @@ static inline bool __is_discard_front_mergeable(struct discard_info *cur,
 static inline bool __is_extent_mergeable(struct extent_info *back,
 						struct extent_info *front)
 {
+#ifdef CONFIG_F2FS_FS_COMPRESSION
+	if (back->c_len && back->len != back->c_len)
+		return false;
+	if (front->c_len && front->len != front->c_len)
+		return false;
+#endif
 	return (back->fofs + back->len == front->fofs &&
 			back->blk + back->len == front->blk);
 }
@@ -1252,6 +1271,7 @@ enum {
 	GC_IDLE_AT,
 	GC_URGENT_HIGH,
 	GC_URGENT_LOW,
+	MAX_GC_MODE,
 };
 
 enum {
@@ -1297,6 +1317,12 @@ enum {
 				 */
 };
 
+enum {
+	DISCARD_UNIT_BLOCK,	/* basic discard unit is block */
+	DISCARD_UNIT_SEGMENT,	/* basic discard unit is segment */
+	DISCARD_UNIT_SECTION,	/* basic discard unit is section */
+};
+
 static inline int f2fs_test_bit(unsigned int nr, char *addr);
 static inline void f2fs_set_bit(unsigned int nr, char *addr);
 static inline void f2fs_clear_bit(unsigned int nr, char *addr);
@@ -1686,14 +1712,6 @@ struct f2fs_sb_info {
 #endif
 	spinlock_t stat_lock;			/* lock for stat operations */
 
-	/* For app/fs IO statistics */
-	spinlock_t iostat_lock;
-	unsigned long long rw_iostat[NR_IO_TYPE];
-	unsigned long long prev_rw_iostat[NR_IO_TYPE];
-	bool iostat_enable;
-	unsigned long iostat_next_period;
-	unsigned int iostat_period_ms;
-
 	/* to attach REQ_META|REQ_FUA flags */
 	unsigned int data_io_flag;
 	unsigned int node_io_flag;
@@ -1732,6 +1750,12 @@ struct f2fs_sb_info {
 	struct kmem_cache *inline_xattr_slab;	/* inline xattr entry */
 	unsigned int inline_xattr_slab_size;	/* default inline xattr slab size */
 
+	/* For reclaimed segs statistics per each GC mode */
+	unsigned int gc_segment_mode;		/* GC state for reclaimed segments */
+	unsigned int gc_reclaimed_segs[MAX_GC_MODE];	/* Reclaimed segs for each mode */
+
+	unsigned long seq_file_ra_mul;		/* multiplier for ra_pages of seq. files in fadvise */
+
 #ifdef CONFIG_F2FS_FS_COMPRESSION
 	struct kmem_cache *page_array_slab;	/* page array entry */
 	unsigned int page_array_slab_size;	/* default page array slab size */
@@ -1747,6 +1771,20 @@ struct f2fs_sb_info {
 	unsigned int compress_watermark;	/* cache page watermark */
 	atomic_t compress_page_hit;		/* cache hit count */
 #endif
+
+#ifdef CONFIG_F2FS_IOSTAT
+	/* For app/fs IO statistics */
+	spinlock_t iostat_lock;
+	unsigned long long rw_iostat[NR_IO_TYPE];
+	unsigned long long prev_rw_iostat[NR_IO_TYPE];
+	bool iostat_enable;
+	unsigned long iostat_next_period;
+	unsigned int iostat_period_ms;
+
+	/* For io latency related statistics info in one iostat period */
+	spinlock_t iostat_lat_lock;
+	struct iostat_lat_info *iostat_io_lat;
+#endif
 };
 
 struct f2fs_private_dio {
@@ -2034,36 +2072,6 @@ static inline void clear_ckpt_flags(struct f2fs_sb_info *sbi, unsigned int f)
 	spin_unlock_irqrestore(&sbi->cp_lock, flags);
 }
 
-static inline void disable_nat_bits(struct f2fs_sb_info *sbi, bool lock)
-{
-	unsigned long flags;
-	unsigned char *nat_bits;
-
-	/*
-	 * In order to re-enable nat_bits we need to call fsck.f2fs by
-	 * set_sbi_flag(sbi, SBI_NEED_FSCK). But it may give huge cost,
-	 * so let's rely on regular fsck or unclean shutdown.
-	 */
-
-	if (lock)
-		spin_lock_irqsave(&sbi->cp_lock, flags);
-	__clear_ckpt_flags(F2FS_CKPT(sbi), CP_NAT_BITS_FLAG);
-	nat_bits = NM_I(sbi)->nat_bits;
-	NM_I(sbi)->nat_bits = NULL;
-	if (lock)
-		spin_unlock_irqrestore(&sbi->cp_lock, flags);
-
-	kvfree(nat_bits);
-}
-
-static inline bool enabled_nat_bits(struct f2fs_sb_info *sbi,
-					struct cp_control *cpc)
-{
-	bool set = is_set_ckpt_flags(sbi, CP_NAT_BITS_FLAG);
-
-	return (cpc) ? (cpc->reason & CP_UMOUNT) && set : set;
-}
-
 static inline void f2fs_lock_op(struct f2fs_sb_info *sbi)
 {
 	down_read(&sbi->cp_rwsem);
@@ -2587,7 +2595,7 @@ static inline struct kmem_cache *f2fs_kmem_cache_create(const char *name,
 	return kmem_cache_create(name, size, 0, SLAB_RECLAIM_ACCOUNT, NULL);
 }
 
-static inline void *f2fs_kmem_cache_alloc(struct kmem_cache *cachep,
+static inline void *f2fs_kmem_cache_alloc_nofail(struct kmem_cache *cachep,
 						gfp_t flags)
 {
 	void *entry;
@@ -2598,6 +2606,20 @@ static inline void *f2fs_kmem_cache_alloc(struct kmem_cache *cachep,
 	return entry;
 }
 
+static inline void *f2fs_kmem_cache_alloc(struct kmem_cache *cachep,
+			gfp_t flags, bool nofail, struct f2fs_sb_info *sbi)
+{
+	if (nofail)
+		return f2fs_kmem_cache_alloc_nofail(cachep, flags);
+
+	if (time_to_inject(sbi, FAULT_SLAB_ALLOC)) {
+		f2fs_show_injection_info(sbi, FAULT_SLAB_ALLOC);
+		return NULL;
+	}
+
+	return kmem_cache_alloc(cachep, flags);
+}
+
 static inline bool is_inflight_io(struct f2fs_sb_info *sbi, int type)
 {
 	if (get_pages(sbi, F2FS_RD_DATA) || get_pages(sbi, F2FS_RD_NODE) ||
@@ -3210,47 +3232,6 @@ static inline int get_inline_xattr_addrs(struct inode *inode)
 		sizeof((f2fs_inode)->field))			\
 		<= (F2FS_OLD_ATTRIBUTE_SIZE + (extra_isize)))	\
 
-#define DEFAULT_IOSTAT_PERIOD_MS	3000
-#define MIN_IOSTAT_PERIOD_MS		100
-/* maximum period of iostat tracing is 1 day */
-#define MAX_IOSTAT_PERIOD_MS		8640000
-
-static inline void f2fs_reset_iostat(struct f2fs_sb_info *sbi)
-{
-	int i;
-
-	spin_lock(&sbi->iostat_lock);
-	for (i = 0; i < NR_IO_TYPE; i++) {
-		sbi->rw_iostat[i] = 0;
-		sbi->prev_rw_iostat[i] = 0;
-	}
-	spin_unlock(&sbi->iostat_lock);
-}
-
-extern void f2fs_record_iostat(struct f2fs_sb_info *sbi);
-
-static inline void f2fs_update_iostat(struct f2fs_sb_info *sbi,
-			enum iostat_type type, unsigned long long io_bytes)
-{
-	if (!sbi->iostat_enable)
-		return;
-	spin_lock(&sbi->iostat_lock);
-	sbi->rw_iostat[type] += io_bytes;
-
-	if (type == APP_WRITE_IO || type == APP_DIRECT_IO)
-		sbi->rw_iostat[APP_BUFFERED_IO] =
-			sbi->rw_iostat[APP_WRITE_IO] -
-			sbi->rw_iostat[APP_DIRECT_IO];
-
-	if (type == APP_READ_IO || type == APP_DIRECT_READ_IO)
-		sbi->rw_iostat[APP_BUFFERED_READ_IO] =
-			sbi->rw_iostat[APP_READ_IO] -
-			sbi->rw_iostat[APP_DIRECT_READ_IO];
-	spin_unlock(&sbi->iostat_lock);
-
-	f2fs_record_iostat(sbi);
-}
-
 #define __is_large_section(sbi)		((sbi)->segs_per_sec > 1)
 
 #define __is_meta_io(fio) (PAGE_TYPE_OF_BIO((fio)->type) == META)
@@ -3417,6 +3398,7 @@ int f2fs_truncate_inode_blocks(struct inode *inode, pgoff_t from);
 int f2fs_truncate_xattr_node(struct inode *inode);
 int f2fs_wait_on_node_pages_writeback(struct f2fs_sb_info *sbi,
 					unsigned int seq_id);
+bool f2fs_nat_bitmap_enabled(struct f2fs_sb_info *sbi);
 int f2fs_remove_inode_page(struct inode *inode);
 struct page *f2fs_new_inode_page(struct inode *inode);
 struct page *f2fs_new_node_page(struct dnode_of_data *dn, unsigned int ofs);
@@ -3441,6 +3423,7 @@ int f2fs_recover_xattr_data(struct inode *inode, struct page *page);
 int f2fs_recover_inode_page(struct f2fs_sb_info *sbi, struct page *page);
 int f2fs_restore_node_summary(struct f2fs_sb_info *sbi,
 			unsigned int segno, struct f2fs_summary_block *sum);
+void f2fs_enable_nat_bits(struct f2fs_sb_info *sbi);
 int f2fs_flush_nat_entries(struct f2fs_sb_info *sbi, struct cp_control *cpc);
 int f2fs_build_node_manager(struct f2fs_sb_info *sbi);
 void f2fs_destroy_node_manager(struct f2fs_sb_info *sbi);
@@ -3464,6 +3447,7 @@ int f2fs_flush_device_cache(struct f2fs_sb_info *sbi);
 void f2fs_destroy_flush_cmd_control(struct f2fs_sb_info *sbi, bool free);
 void f2fs_invalidate_blocks(struct f2fs_sb_info *sbi, block_t addr);
 bool f2fs_is_checkpointed_data(struct f2fs_sb_info *sbi, block_t blkaddr);
+int f2fs_start_discard_thread(struct f2fs_sb_info *sbi);
 void f2fs_drop_discard_cmd(struct f2fs_sb_info *sbi);
 void f2fs_stop_discard_thread(struct f2fs_sb_info *sbi);
 bool f2fs_issue_discard_timeout(struct f2fs_sb_info *sbi);
@@ -3986,6 +3970,9 @@ void f2fs_destroy_extent_cache(void);
 /*
  * sysfs.c
  */
+#define MIN_RA_MUL	2
+#define MAX_RA_MUL	256
+
 int __init f2fs_init_sysfs(void);
 void f2fs_exit_sysfs(void);
 int f2fs_register_sysfs(struct f2fs_sb_info *sbi);
@@ -4040,18 +4027,23 @@ void f2fs_end_read_compressed_page(struct page *page, bool failed,
 							block_t blkaddr);
 bool f2fs_cluster_is_empty(struct compress_ctx *cc);
 bool f2fs_cluster_can_merge_page(struct compress_ctx *cc, pgoff_t index);
+bool f2fs_sanity_check_cluster(struct dnode_of_data *dn);
 void f2fs_compress_ctx_add_page(struct compress_ctx *cc, struct page *page);
 int f2fs_write_multi_pages(struct compress_ctx *cc,
 						int *submitted,
 						struct writeback_control *wbc,
 						enum iostat_type io_type);
 int f2fs_is_compressed_cluster(struct inode *inode, pgoff_t index);
+void f2fs_update_extent_tree_range_compressed(struct inode *inode,
+				pgoff_t fofs, block_t blkaddr, unsigned int llen,
+				unsigned int c_len);
 int f2fs_read_multi_pages(struct compress_ctx *cc, struct bio **bio_ret,
 				unsigned nr_pages, sector_t *last_block_in_bio,
 				bool is_readahead, bool for_write);
 struct decompress_io_ctx *f2fs_alloc_dic(struct compress_ctx *cc);
 void f2fs_decompress_end_io(struct decompress_io_ctx *dic, bool failed);
 void f2fs_put_page_dic(struct page *page);
+unsigned int f2fs_cluster_blocks_are_contiguous(struct dnode_of_data *dn);
 int f2fs_init_compress_ctx(struct compress_ctx *cc);
 void f2fs_destroy_compress_ctx(struct compress_ctx *cc, bool reuse);
 void f2fs_init_compress_info(struct f2fs_sb_info *sbi);
@@ -4106,6 +4098,8 @@ static inline void f2fs_put_page_dic(struct page *page)
 {
 	WARN_ON_ONCE(1);
 }
+static inline unsigned int f2fs_cluster_blocks_are_contiguous(struct dnode_of_data *dn) { return 0; }
+static inline bool f2fs_sanity_check_cluster(struct dnode_of_data *dn) { return false; }
 static inline int f2fs_init_compress_inode(struct f2fs_sb_info *sbi) { return 0; }
 static inline void f2fs_destroy_compress_inode(struct f2fs_sb_info *sbi) { }
 static inline int f2fs_init_page_array_cache(struct f2fs_sb_info *sbi) { return 0; }
@@ -4121,6 +4115,9 @@ static inline bool f2fs_load_compressed_page(struct f2fs_sb_info *sbi,
 static inline void f2fs_invalidate_compress_pages(struct f2fs_sb_info *sbi,
 							nid_t ino) { }
 #define inc_compr_inode_stat(inode)		do { } while (0)
+static inline void f2fs_update_extent_tree_range_compressed(struct inode *inode,
+				pgoff_t fofs, block_t blkaddr, unsigned int llen,
+				unsigned int c_len) { }
 #endif
 
 static inline void set_compress_context(struct inode *inode)
@@ -4136,7 +4133,8 @@ static inline void set_compress_context(struct inode *inode)
 				1 << COMPRESS_CHKSUM : 0;
 	F2FS_I(inode)->i_cluster_size =
 			1 << F2FS_I(inode)->i_log_cluster_size;
-	if (F2FS_I(inode)->i_compress_algorithm == COMPRESS_LZ4 &&
+	if ((F2FS_I(inode)->i_compress_algorithm == COMPRESS_LZ4 ||
+		F2FS_I(inode)->i_compress_algorithm == COMPRESS_ZSTD) &&
 			F2FS_OPTION(sbi).compress_level)
 		F2FS_I(inode)->i_compress_flag |=
 				F2FS_OPTION(sbi).compress_level <<
@@ -4304,16 +4302,6 @@ static inline int block_unaligned_IO(struct inode *inode,
 	return align & blocksize_mask;
 }
 
-static inline int allow_outplace_dio(struct inode *inode,
-				struct kiocb *iocb, struct iov_iter *iter)
-{
-	struct f2fs_sb_info *sbi = F2FS_I_SB(inode);
-	int rw = iov_iter_rw(iter);
-
-	return (f2fs_lfs_mode(sbi) && (rw == WRITE) &&
-				!block_unaligned_IO(inode, iocb, iter));
-}
-
 static inline bool f2fs_force_buffered_io(struct inode *inode,
 				struct kiocb *iocb, struct iov_iter *iter)
 {
@@ -4368,6 +4356,11 @@ static inline bool is_journalled_quota(struct f2fs_sb_info *sbi)
 	return false;
 }
 
+static inline bool f2fs_block_unit_discard(struct f2fs_sb_info *sbi)
+{
+	return F2FS_OPTION(sbi).discard_unit == DISCARD_UNIT_BLOCK;
+}
+
 #define EFSBADCRC	EBADMSG		/* Bad CRC detected */
 #define EFSCORRUPTED	EUCLEAN		/* Filesystem is corrupted */
 
diff --git a/fs/f2fs/file.c b/fs/f2fs/file.c
index 1ff333755721..9c8ef33bd8d3 100644
--- a/fs/f2fs/file.c
+++ b/fs/f2fs/file.c
@@ -23,6 +23,7 @@
 #include <linux/nls.h>
 #include <linux/sched/signal.h>
 #include <linux/fileattr.h>
+#include <linux/fadvise.h>
 
 #include "f2fs.h"
 #include "node.h"
@@ -30,6 +31,7 @@
 #include "xattr.h"
 #include "acl.h"
 #include "gc.h"
+#include "iostat.h"
 #include <trace/events/f2fs.h>
 #include <uapi/linux/f2fs.h>
 
@@ -258,8 +260,7 @@ static int f2fs_do_sync_file(struct file *file, loff_t start, loff_t end,
 	};
 	unsigned int seq_id = 0;
 
-	if (unlikely(f2fs_readonly(inode->i_sb) ||
-				is_sbi_flag_set(sbi, SBI_CP_DISABLED)))
+	if (unlikely(f2fs_readonly(inode->i_sb)))
 		return 0;
 
 	trace_f2fs_sync_file_enter(inode);
@@ -273,7 +274,7 @@ static int f2fs_do_sync_file(struct file *file, loff_t start, loff_t end,
 	ret = file_write_and_wait_range(file, start, end);
 	clear_inode_flag(inode, FI_NEED_IPU);
 
-	if (ret) {
+	if (ret || is_sbi_flag_set(sbi, SBI_CP_DISABLED)) {
 		trace_f2fs_sync_file_exit(inode, cp_reason, datasync, ret);
 		return ret;
 	}
@@ -298,6 +299,18 @@ static int f2fs_do_sync_file(struct file *file, loff_t start, loff_t end,
 				f2fs_exist_written_data(sbi, ino, UPDATE_INO))
 			goto flush_out;
 		goto out;
+	} else {
+		/*
+		 * for OPU case, during fsync(), node can be persisted before
+		 * data when lower device doesn't support write barrier, result
+		 * in data corruption after SPO.
+		 * So for strict fsync mode, force to use atomic write sematics
+		 * to keep write order in between data/node and last node to
+		 * avoid potential data corruption.
+		 */
+		if (F2FS_OPTION(sbi).fsync_mode ==
+				FSYNC_MODE_STRICT && !atomic)
+			atomic = true;
 	}
 go_write:
 	/*
@@ -737,6 +750,14 @@ int f2fs_truncate_blocks(struct inode *inode, u64 from, bool lock)
 		return err;
 
 #ifdef CONFIG_F2FS_FS_COMPRESSION
+	/*
+	 * For compressed file, after release compress blocks, don't allow write
+	 * direct, but we should allow write direct after truncate to zero.
+	 */
+	if (f2fs_compressed_file(inode) && !free_from
+			&& is_inode_flag_set(inode, FI_COMPRESS_RELEASED))
+		clear_inode_flag(inode, FI_COMPRESS_RELEASED);
+
 	if (from != free_from) {
 		err = f2fs_truncate_partial_cluster(inode, from, lock);
 		if (err)
@@ -1082,7 +1103,6 @@ static int punch_hole(struct inode *inode, loff_t offset, loff_t len)
 		}
 
 		if (pg_start < pg_end) {
-			struct address_space *mapping = inode->i_mapping;
 			loff_t blk_start, blk_end;
 			struct f2fs_sb_info *sbi = F2FS_I_SB(inode);
 
@@ -1092,16 +1112,15 @@ static int punch_hole(struct inode *inode, loff_t offset, loff_t len)
 			blk_end = (loff_t)pg_end << PAGE_SHIFT;
 
 			down_write(&F2FS_I(inode)->i_gc_rwsem[WRITE]);
-			filemap_invalidate_lock(mapping);
+			filemap_invalidate_lock(inode->i_mapping);
 
-			truncate_inode_pages_range(mapping, blk_start,
-					blk_end - 1);
+			truncate_pagecache_range(inode, blk_start, blk_end - 1);
 
 			f2fs_lock_op(sbi);
 			ret = f2fs_truncate_hole(inode, pg_start, pg_end);
 			f2fs_unlock_op(sbi);
 
-			filemap_invalidate_unlock(mapping);
+			filemap_invalidate_unlock(inode->i_mapping);
 			up_write(&F2FS_I(inode)->i_gc_rwsem[WRITE]);
 		}
 	}
@@ -3473,8 +3492,8 @@ static int f2fs_release_compress_blocks(struct file *filp, unsigned long arg)
 		released_blocks += ret;
 	}
 
-	up_write(&F2FS_I(inode)->i_gc_rwsem[WRITE]);
 	filemap_invalidate_unlock(inode->i_mapping);
+	up_write(&F2FS_I(inode)->i_gc_rwsem[WRITE]);
 out:
 	inode_unlock(inode);
 
@@ -3626,8 +3645,8 @@ static int f2fs_reserve_compress_blocks(struct file *filp, unsigned long arg)
 		reserved_blocks += ret;
 	}
 
-	up_write(&F2FS_I(inode)->i_gc_rwsem[WRITE]);
 	filemap_invalidate_unlock(inode->i_mapping);
+	up_write(&F2FS_I(inode)->i_gc_rwsem[WRITE]);
 
 	if (ret >= 0) {
 		clear_inode_flag(inode, FI_COMPRESS_RELEASED);
@@ -4290,7 +4309,7 @@ static ssize_t f2fs_file_write_iter(struct kiocb *iocb, struct iov_iter *from)
 			 * back to buffered IO.
 			 */
 			if (!f2fs_force_buffered_io(inode, iocb, from) &&
-					allow_outplace_dio(inode, iocb, from))
+					f2fs_lfs_mode(F2FS_I_SB(inode)))
 				goto write;
 		}
 		preallocated = true;
@@ -4330,6 +4349,34 @@ out:
 	return ret;
 }
 
+static int f2fs_file_fadvise(struct file *filp, loff_t offset, loff_t len,
+		int advice)
+{
+	struct inode *inode;
+	struct address_space *mapping;
+	struct backing_dev_info *bdi;
+
+	if (advice == POSIX_FADV_SEQUENTIAL) {
+		inode = file_inode(filp);
+		if (S_ISFIFO(inode->i_mode))
+			return -ESPIPE;
+
+		mapping = filp->f_mapping;
+		if (!mapping || len < 0)
+			return -EINVAL;
+
+		bdi = inode_to_bdi(mapping->host);
+		filp->f_ra.ra_pages = bdi->ra_pages *
+			F2FS_I_SB(inode)->seq_file_ra_mul;
+		spin_lock(&filp->f_lock);
+		filp->f_mode &= ~FMODE_RANDOM;
+		spin_unlock(&filp->f_lock);
+		return 0;
+	}
+
+	return generic_fadvise(filp, offset, len, advice);
+}
+
 #ifdef CONFIG_COMPAT
 struct compat_f2fs_gc_range {
 	u32 sync;
@@ -4458,4 +4505,5 @@ const struct file_operations f2fs_file_operations = {
 #endif
 	.splice_read	= generic_file_splice_read,
 	.splice_write	= iter_file_splice_write,
+	.fadvise	= f2fs_file_fadvise,
 };
diff --git a/fs/f2fs/gc.c b/fs/f2fs/gc.c
index 0e42ee5f7770..77391e3b7d68 100644
--- a/fs/f2fs/gc.c
+++ b/fs/f2fs/gc.c
@@ -19,6 +19,7 @@
 #include "node.h"
 #include "segment.h"
 #include "gc.h"
+#include "iostat.h"
 #include <trace/events/f2fs.h>
 
 static struct kmem_cache *victim_entry_slab;
@@ -371,7 +372,8 @@ static struct victim_entry *attach_victim_entry(struct f2fs_sb_info *sbi,
 	struct atgc_management *am = &sbi->am;
 	struct victim_entry *ve;
 
-	ve =  f2fs_kmem_cache_alloc(victim_entry_slab, GFP_NOFS);
+	ve =  f2fs_kmem_cache_alloc(victim_entry_slab,
+				GFP_NOFS, true, NULL);
 
 	ve->mtime = mtime;
 	ve->segno = segno;
@@ -849,7 +851,8 @@ static void add_gc_inode(struct gc_inode_list *gc_list, struct inode *inode)
 		iput(inode);
 		return;
 	}
-	new_ie = f2fs_kmem_cache_alloc(f2fs_inode_entry_slab, GFP_NOFS);
+	new_ie = f2fs_kmem_cache_alloc(f2fs_inode_entry_slab,
+					GFP_NOFS, true, NULL);
 	new_ie->inode = inode;
 
 	f2fs_radix_tree_insert(&gc_list->iroot, inode->i_ino, new_ie);
@@ -1497,8 +1500,10 @@ next_step:
 			int err;
 
 			if (S_ISREG(inode->i_mode)) {
-				if (!down_write_trylock(&fi->i_gc_rwsem[READ]))
+				if (!down_write_trylock(&fi->i_gc_rwsem[READ])) {
+					sbi->skipped_gc_rwsem++;
 					continue;
+				}
 				if (!down_write_trylock(
 						&fi->i_gc_rwsem[WRITE])) {
 					sbi->skipped_gc_rwsem++;
@@ -1646,6 +1651,7 @@ static int do_garbage_collect(struct f2fs_sb_info *sbi,
 							force_migrate);
 
 		stat_inc_seg_count(sbi, type, gc_type);
+		sbi->gc_reclaimed_segs[sbi->gc_mode]++;
 		migrated++;
 
 freed:
@@ -1747,7 +1753,7 @@ gc_more:
 		round++;
 	}
 
-	if (gc_type == FG_GC && seg_freed)
+	if (gc_type == FG_GC)
 		sbi->cur_victim_sec = NULL_SEGNO;
 
 	if (sync)
diff --git a/fs/f2fs/iostat.c b/fs/f2fs/iostat.c
new file mode 100644
index 000000000000..cdcf54ae0db8
--- /dev/null
+++ b/fs/f2fs/iostat.c
@@ -0,0 +1,287 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * f2fs iostat support
+ *
+ * Copyright 2021 Google LLC
+ * Author: Daeho Jeong <daehojeong@google.com>
+ */
+
+#include <linux/fs.h>
+#include <linux/f2fs_fs.h>
+#include <linux/seq_file.h>
+
+#include "f2fs.h"
+#include "iostat.h"
+#include <trace/events/f2fs.h>
+
+#define NUM_PREALLOC_IOSTAT_CTXS	128
+static struct kmem_cache *bio_iostat_ctx_cache;
+static mempool_t *bio_iostat_ctx_pool;
+
+int __maybe_unused iostat_info_seq_show(struct seq_file *seq, void *offset)
+{
+	struct super_block *sb = seq->private;
+	struct f2fs_sb_info *sbi = F2FS_SB(sb);
+	time64_t now = ktime_get_real_seconds();
+
+	if (!sbi->iostat_enable)
+		return 0;
+
+	seq_printf(seq, "time:		%-16llu\n", now);
+
+	/* print app write IOs */
+	seq_puts(seq, "[WRITE]\n");
+	seq_printf(seq, "app buffered:	%-16llu\n",
+				sbi->rw_iostat[APP_BUFFERED_IO]);
+	seq_printf(seq, "app direct:	%-16llu\n",
+				sbi->rw_iostat[APP_DIRECT_IO]);
+	seq_printf(seq, "app mapped:	%-16llu\n",
+				sbi->rw_iostat[APP_MAPPED_IO]);
+
+	/* print fs write IOs */
+	seq_printf(seq, "fs data:	%-16llu\n",
+				sbi->rw_iostat[FS_DATA_IO]);
+	seq_printf(seq, "fs node:	%-16llu\n",
+				sbi->rw_iostat[FS_NODE_IO]);
+	seq_printf(seq, "fs meta:	%-16llu\n",
+				sbi->rw_iostat[FS_META_IO]);
+	seq_printf(seq, "fs gc data:	%-16llu\n",
+				sbi->rw_iostat[FS_GC_DATA_IO]);
+	seq_printf(seq, "fs gc node:	%-16llu\n",
+				sbi->rw_iostat[FS_GC_NODE_IO]);
+	seq_printf(seq, "fs cp data:	%-16llu\n",
+				sbi->rw_iostat[FS_CP_DATA_IO]);
+	seq_printf(seq, "fs cp node:	%-16llu\n",
+				sbi->rw_iostat[FS_CP_NODE_IO]);
+	seq_printf(seq, "fs cp meta:	%-16llu\n",
+				sbi->rw_iostat[FS_CP_META_IO]);
+
+	/* print app read IOs */
+	seq_puts(seq, "[READ]\n");
+	seq_printf(seq, "app buffered:	%-16llu\n",
+				sbi->rw_iostat[APP_BUFFERED_READ_IO]);
+	seq_printf(seq, "app direct:	%-16llu\n",
+				sbi->rw_iostat[APP_DIRECT_READ_IO]);
+	seq_printf(seq, "app mapped:	%-16llu\n",
+				sbi->rw_iostat[APP_MAPPED_READ_IO]);
+
+	/* print fs read IOs */
+	seq_printf(seq, "fs data:	%-16llu\n",
+				sbi->rw_iostat[FS_DATA_READ_IO]);
+	seq_printf(seq, "fs gc data:	%-16llu\n",
+				sbi->rw_iostat[FS_GDATA_READ_IO]);
+	seq_printf(seq, "fs compr_data:	%-16llu\n",
+				sbi->rw_iostat[FS_CDATA_READ_IO]);
+	seq_printf(seq, "fs node:	%-16llu\n",
+				sbi->rw_iostat[FS_NODE_READ_IO]);
+	seq_printf(seq, "fs meta:	%-16llu\n",
+				sbi->rw_iostat[FS_META_READ_IO]);
+
+	/* print other IOs */
+	seq_puts(seq, "[OTHER]\n");
+	seq_printf(seq, "fs discard:	%-16llu\n",
+				sbi->rw_iostat[FS_DISCARD]);
+
+	return 0;
+}
+
+static inline void __record_iostat_latency(struct f2fs_sb_info *sbi)
+{
+	int io, idx = 0;
+	unsigned int cnt;
+	struct f2fs_iostat_latency iostat_lat[MAX_IO_TYPE][NR_PAGE_TYPE];
+	struct iostat_lat_info *io_lat = sbi->iostat_io_lat;
+
+	spin_lock_irq(&sbi->iostat_lat_lock);
+	for (idx = 0; idx < MAX_IO_TYPE; idx++) {
+		for (io = 0; io < NR_PAGE_TYPE; io++) {
+			cnt = io_lat->bio_cnt[idx][io];
+			iostat_lat[idx][io].peak_lat =
+			   jiffies_to_msecs(io_lat->peak_lat[idx][io]);
+			iostat_lat[idx][io].cnt = cnt;
+			iostat_lat[idx][io].avg_lat = cnt ?
+			   jiffies_to_msecs(io_lat->sum_lat[idx][io]) / cnt : 0;
+			io_lat->sum_lat[idx][io] = 0;
+			io_lat->peak_lat[idx][io] = 0;
+			io_lat->bio_cnt[idx][io] = 0;
+		}
+	}
+	spin_unlock_irq(&sbi->iostat_lat_lock);
+
+	trace_f2fs_iostat_latency(sbi, iostat_lat);
+}
+
+static inline void f2fs_record_iostat(struct f2fs_sb_info *sbi)
+{
+	unsigned long long iostat_diff[NR_IO_TYPE];
+	int i;
+
+	if (time_is_after_jiffies(sbi->iostat_next_period))
+		return;
+
+	/* Need double check under the lock */
+	spin_lock(&sbi->iostat_lock);
+	if (time_is_after_jiffies(sbi->iostat_next_period)) {
+		spin_unlock(&sbi->iostat_lock);
+		return;
+	}
+	sbi->iostat_next_period = jiffies +
+				msecs_to_jiffies(sbi->iostat_period_ms);
+
+	for (i = 0; i < NR_IO_TYPE; i++) {
+		iostat_diff[i] = sbi->rw_iostat[i] -
+				sbi->prev_rw_iostat[i];
+		sbi->prev_rw_iostat[i] = sbi->rw_iostat[i];
+	}
+	spin_unlock(&sbi->iostat_lock);
+
+	trace_f2fs_iostat(sbi, iostat_diff);
+
+	__record_iostat_latency(sbi);
+}
+
+void f2fs_reset_iostat(struct f2fs_sb_info *sbi)
+{
+	struct iostat_lat_info *io_lat = sbi->iostat_io_lat;
+	int i;
+
+	spin_lock(&sbi->iostat_lock);
+	for (i = 0; i < NR_IO_TYPE; i++) {
+		sbi->rw_iostat[i] = 0;
+		sbi->prev_rw_iostat[i] = 0;
+	}
+	spin_unlock(&sbi->iostat_lock);
+
+	spin_lock_irq(&sbi->iostat_lat_lock);
+	memset(io_lat, 0, sizeof(struct iostat_lat_info));
+	spin_unlock_irq(&sbi->iostat_lat_lock);
+}
+
+void f2fs_update_iostat(struct f2fs_sb_info *sbi,
+			enum iostat_type type, unsigned long long io_bytes)
+{
+	if (!sbi->iostat_enable)
+		return;
+
+	spin_lock(&sbi->iostat_lock);
+	sbi->rw_iostat[type] += io_bytes;
+
+	if (type == APP_WRITE_IO || type == APP_DIRECT_IO)
+		sbi->rw_iostat[APP_BUFFERED_IO] =
+			sbi->rw_iostat[APP_WRITE_IO] -
+			sbi->rw_iostat[APP_DIRECT_IO];
+
+	if (type == APP_READ_IO || type == APP_DIRECT_READ_IO)
+		sbi->rw_iostat[APP_BUFFERED_READ_IO] =
+			sbi->rw_iostat[APP_READ_IO] -
+			sbi->rw_iostat[APP_DIRECT_READ_IO];
+	spin_unlock(&sbi->iostat_lock);
+
+	f2fs_record_iostat(sbi);
+}
+
+static inline void __update_iostat_latency(struct bio_iostat_ctx *iostat_ctx,
+				int rw, bool is_sync)
+{
+	unsigned long ts_diff;
+	unsigned int iotype = iostat_ctx->type;
+	unsigned long flags;
+	struct f2fs_sb_info *sbi = iostat_ctx->sbi;
+	struct iostat_lat_info *io_lat = sbi->iostat_io_lat;
+	int idx;
+
+	if (!sbi->iostat_enable)
+		return;
+
+	ts_diff = jiffies - iostat_ctx->submit_ts;
+	if (iotype >= META_FLUSH)
+		iotype = META;
+
+	if (rw == 0) {
+		idx = READ_IO;
+	} else {
+		if (is_sync)
+			idx = WRITE_SYNC_IO;
+		else
+			idx = WRITE_ASYNC_IO;
+	}
+
+	spin_lock_irqsave(&sbi->iostat_lat_lock, flags);
+	io_lat->sum_lat[idx][iotype] += ts_diff;
+	io_lat->bio_cnt[idx][iotype]++;
+	if (ts_diff > io_lat->peak_lat[idx][iotype])
+		io_lat->peak_lat[idx][iotype] = ts_diff;
+	spin_unlock_irqrestore(&sbi->iostat_lat_lock, flags);
+}
+
+void iostat_update_and_unbind_ctx(struct bio *bio, int rw)
+{
+	struct bio_iostat_ctx *iostat_ctx = bio->bi_private;
+	bool is_sync = bio->bi_opf & REQ_SYNC;
+
+	if (rw == 0)
+		bio->bi_private = iostat_ctx->post_read_ctx;
+	else
+		bio->bi_private = iostat_ctx->sbi;
+	__update_iostat_latency(iostat_ctx, rw, is_sync);
+	mempool_free(iostat_ctx, bio_iostat_ctx_pool);
+}
+
+void iostat_alloc_and_bind_ctx(struct f2fs_sb_info *sbi,
+		struct bio *bio, struct bio_post_read_ctx *ctx)
+{
+	struct bio_iostat_ctx *iostat_ctx;
+	/* Due to the mempool, this never fails. */
+	iostat_ctx = mempool_alloc(bio_iostat_ctx_pool, GFP_NOFS);
+	iostat_ctx->sbi = sbi;
+	iostat_ctx->submit_ts = 0;
+	iostat_ctx->type = 0;
+	iostat_ctx->post_read_ctx = ctx;
+	bio->bi_private = iostat_ctx;
+}
+
+int __init f2fs_init_iostat_processing(void)
+{
+	bio_iostat_ctx_cache =
+		kmem_cache_create("f2fs_bio_iostat_ctx",
+				  sizeof(struct bio_iostat_ctx), 0, 0, NULL);
+	if (!bio_iostat_ctx_cache)
+		goto fail;
+	bio_iostat_ctx_pool =
+		mempool_create_slab_pool(NUM_PREALLOC_IOSTAT_CTXS,
+					 bio_iostat_ctx_cache);
+	if (!bio_iostat_ctx_pool)
+		goto fail_free_cache;
+	return 0;
+
+fail_free_cache:
+	kmem_cache_destroy(bio_iostat_ctx_cache);
+fail:
+	return -ENOMEM;
+}
+
+void f2fs_destroy_iostat_processing(void)
+{
+	mempool_destroy(bio_iostat_ctx_pool);
+	kmem_cache_destroy(bio_iostat_ctx_cache);
+}
+
+int f2fs_init_iostat(struct f2fs_sb_info *sbi)
+{
+	/* init iostat info */
+	spin_lock_init(&sbi->iostat_lock);
+	spin_lock_init(&sbi->iostat_lat_lock);
+	sbi->iostat_enable = false;
+	sbi->iostat_period_ms = DEFAULT_IOSTAT_PERIOD_MS;
+	sbi->iostat_io_lat = f2fs_kzalloc(sbi, sizeof(struct iostat_lat_info),
+					GFP_KERNEL);
+	if (!sbi->iostat_io_lat)
+		return -ENOMEM;
+
+	return 0;
+}
+
+void f2fs_destroy_iostat(struct f2fs_sb_info *sbi)
+{
+	kfree(sbi->iostat_io_lat);
+}
diff --git a/fs/f2fs/iostat.h b/fs/f2fs/iostat.h
new file mode 100644
index 000000000000..22a2d01f57ef
--- /dev/null
+++ b/fs/f2fs/iostat.h
@@ -0,0 +1,84 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/*
+ * Copyright 2021 Google LLC
+ * Author: Daeho Jeong <daehojeong@google.com>
+ */
+#ifndef __F2FS_IOSTAT_H__
+#define __F2FS_IOSTAT_H__
+
+struct bio_post_read_ctx;
+
+#ifdef CONFIG_F2FS_IOSTAT
+
+#define DEFAULT_IOSTAT_PERIOD_MS	3000
+#define MIN_IOSTAT_PERIOD_MS		100
+/* maximum period of iostat tracing is 1 day */
+#define MAX_IOSTAT_PERIOD_MS		8640000
+
+enum {
+	READ_IO,
+	WRITE_SYNC_IO,
+	WRITE_ASYNC_IO,
+	MAX_IO_TYPE,
+};
+
+struct iostat_lat_info {
+	unsigned long sum_lat[MAX_IO_TYPE][NR_PAGE_TYPE];	/* sum of io latencies */
+	unsigned long peak_lat[MAX_IO_TYPE][NR_PAGE_TYPE];	/* peak io latency */
+	unsigned int bio_cnt[MAX_IO_TYPE][NR_PAGE_TYPE];	/* bio count */
+};
+
+extern int __maybe_unused iostat_info_seq_show(struct seq_file *seq,
+			void *offset);
+extern void f2fs_reset_iostat(struct f2fs_sb_info *sbi);
+extern void f2fs_update_iostat(struct f2fs_sb_info *sbi,
+			enum iostat_type type, unsigned long long io_bytes);
+
+struct bio_iostat_ctx {
+	struct f2fs_sb_info *sbi;
+	unsigned long submit_ts;
+	enum page_type type;
+	struct bio_post_read_ctx *post_read_ctx;
+};
+
+static inline void iostat_update_submit_ctx(struct bio *bio,
+			enum page_type type)
+{
+	struct bio_iostat_ctx *iostat_ctx = bio->bi_private;
+
+	iostat_ctx->submit_ts = jiffies;
+	iostat_ctx->type = type;
+}
+
+static inline struct bio_post_read_ctx *get_post_read_ctx(struct bio *bio)
+{
+	struct bio_iostat_ctx *iostat_ctx = bio->bi_private;
+
+	return iostat_ctx->post_read_ctx;
+}
+
+extern void iostat_update_and_unbind_ctx(struct bio *bio, int rw);
+extern void iostat_alloc_and_bind_ctx(struct f2fs_sb_info *sbi,
+		struct bio *bio, struct bio_post_read_ctx *ctx);
+extern int f2fs_init_iostat_processing(void);
+extern void f2fs_destroy_iostat_processing(void);
+extern int f2fs_init_iostat(struct f2fs_sb_info *sbi);
+extern void f2fs_destroy_iostat(struct f2fs_sb_info *sbi);
+#else
+static inline void f2fs_update_iostat(struct f2fs_sb_info *sbi,
+		enum iostat_type type, unsigned long long io_bytes) {}
+static inline void iostat_update_and_unbind_ctx(struct bio *bio, int rw) {}
+static inline void iostat_alloc_and_bind_ctx(struct f2fs_sb_info *sbi,
+		struct bio *bio, struct bio_post_read_ctx *ctx) {}
+static inline void iostat_update_submit_ctx(struct bio *bio,
+		enum page_type type) {}
+static inline struct bio_post_read_ctx *get_post_read_ctx(struct bio *bio)
+{
+	return bio->bi_private;
+}
+static inline int f2fs_init_iostat_processing(void) { return 0; }
+static inline void f2fs_destroy_iostat_processing(void) {}
+static inline int f2fs_init_iostat(struct f2fs_sb_info *sbi) { return 0; }
+static inline void f2fs_destroy_iostat(struct f2fs_sb_info *sbi) {}
+#endif
+#endif /* __F2FS_IOSTAT_H__ */
diff --git a/fs/f2fs/node.c b/fs/f2fs/node.c
index 0be9e2d7120e..e863136081b4 100644
--- a/fs/f2fs/node.c
+++ b/fs/f2fs/node.c
@@ -17,6 +17,7 @@
 #include "node.h"
 #include "segment.h"
 #include "xattr.h"
+#include "iostat.h"
 #include <trace/events/f2fs.h>
 
 #define on_f2fs_build_free_nids(nmi) mutex_is_locked(&(nm_i)->build_lock)
@@ -162,14 +163,13 @@ static struct page *get_next_nat_page(struct f2fs_sb_info *sbi, nid_t nid)
 	return dst_page;
 }
 
-static struct nat_entry *__alloc_nat_entry(nid_t nid, bool no_fail)
+static struct nat_entry *__alloc_nat_entry(struct f2fs_sb_info *sbi,
+						nid_t nid, bool no_fail)
 {
 	struct nat_entry *new;
 
-	if (no_fail)
-		new = f2fs_kmem_cache_alloc(nat_entry_slab, GFP_F2FS_ZERO);
-	else
-		new = kmem_cache_alloc(nat_entry_slab, GFP_F2FS_ZERO);
+	new = f2fs_kmem_cache_alloc(nat_entry_slab,
+					GFP_F2FS_ZERO, no_fail, sbi);
 	if (new) {
 		nat_set_nid(new, nid);
 		nat_reset_flag(new);
@@ -242,7 +242,8 @@ static struct nat_entry_set *__grab_nat_entry_set(struct f2fs_nm_info *nm_i,
 
 	head = radix_tree_lookup(&nm_i->nat_set_root, set);
 	if (!head) {
-		head = f2fs_kmem_cache_alloc(nat_entry_set_slab, GFP_NOFS);
+		head = f2fs_kmem_cache_alloc(nat_entry_set_slab,
+						GFP_NOFS, true, NULL);
 
 		INIT_LIST_HEAD(&head->entry_list);
 		INIT_LIST_HEAD(&head->set_list);
@@ -329,7 +330,8 @@ static unsigned int f2fs_add_fsync_node_entry(struct f2fs_sb_info *sbi,
 	unsigned long flags;
 	unsigned int seq_id;
 
-	fn = f2fs_kmem_cache_alloc(fsync_node_entry_slab, GFP_NOFS);
+	fn = f2fs_kmem_cache_alloc(fsync_node_entry_slab,
+					GFP_NOFS, true, NULL);
 
 	get_page(page);
 	fn->page = page;
@@ -428,7 +430,7 @@ static void cache_nat_entry(struct f2fs_sb_info *sbi, nid_t nid,
 	struct f2fs_nm_info *nm_i = NM_I(sbi);
 	struct nat_entry *new, *e;
 
-	new = __alloc_nat_entry(nid, false);
+	new = __alloc_nat_entry(sbi, nid, false);
 	if (!new)
 		return;
 
@@ -451,7 +453,7 @@ static void set_node_addr(struct f2fs_sb_info *sbi, struct node_info *ni,
 {
 	struct f2fs_nm_info *nm_i = NM_I(sbi);
 	struct nat_entry *e;
-	struct nat_entry *new = __alloc_nat_entry(ni->nid, true);
+	struct nat_entry *new = __alloc_nat_entry(sbi, ni->nid, true);
 
 	down_write(&nm_i->nat_tree_lock);
 	e = __lookup_nat_cache(nm_i, ni->nid);
@@ -552,7 +554,7 @@ int f2fs_get_node_info(struct f2fs_sb_info *sbi, nid_t nid,
 	int i;
 
 	ni->nid = nid;
-
+retry:
 	/* Check nat cache */
 	down_read(&nm_i->nat_tree_lock);
 	e = __lookup_nat_cache(nm_i, nid);
@@ -564,10 +566,19 @@ int f2fs_get_node_info(struct f2fs_sb_info *sbi, nid_t nid,
 		return 0;
 	}
 
-	memset(&ne, 0, sizeof(struct f2fs_nat_entry));
+	/*
+	 * Check current segment summary by trying to grab journal_rwsem first.
+	 * This sem is on the critical path on the checkpoint requiring the above
+	 * nat_tree_lock. Therefore, we should retry, if we failed to grab here
+	 * while not bothering checkpoint.
+	 */
+	if (!rwsem_is_locked(&sbi->cp_global_sem)) {
+		down_read(&curseg->journal_rwsem);
+	} else if (!down_read_trylock(&curseg->journal_rwsem)) {
+		up_read(&nm_i->nat_tree_lock);
+		goto retry;
+	}
 
-	/* Check current segment summary */
-	down_read(&curseg->journal_rwsem);
 	i = f2fs_lookup_journal_in_cursum(journal, NAT_JOURNAL, nid, 0);
 	if (i >= 0) {
 		ne = nat_in_journal(journal, i);
@@ -832,6 +843,26 @@ int f2fs_get_dnode_of_data(struct dnode_of_data *dn, pgoff_t index, int mode)
 	dn->ofs_in_node = offset[level];
 	dn->node_page = npage[level];
 	dn->data_blkaddr = f2fs_data_blkaddr(dn);
+
+	if (is_inode_flag_set(dn->inode, FI_COMPRESSED_FILE) &&
+					f2fs_sb_has_readonly(sbi)) {
+		unsigned int c_len = f2fs_cluster_blocks_are_contiguous(dn);
+		block_t blkaddr;
+
+		if (!c_len)
+			goto out;
+
+		blkaddr = f2fs_data_blkaddr(dn);
+		if (blkaddr == COMPRESS_ADDR)
+			blkaddr = data_blkaddr(dn->inode, dn->node_page,
+						dn->ofs_in_node + 1);
+
+		f2fs_update_extent_tree_range_compressed(dn->inode,
+					index, blkaddr,
+					F2FS_I(dn->inode)->i_cluster_size,
+					c_len);
+	}
+out:
 	return 0;
 
 release_pages:
@@ -1321,7 +1352,8 @@ static int read_node_page(struct page *page, int op_flags)
 	if (err)
 		return err;
 
-	if (unlikely(ni.blk_addr == NULL_ADDR) ||
+	/* NEW_ADDR can be seen, after cp_error drops some dirty node pages */
+	if (unlikely(ni.blk_addr == NULL_ADDR || ni.blk_addr == NEW_ADDR) ||
 			is_sbi_flag_set(sbi, SBI_IS_SHUTDOWN)) {
 		ClearPageUptodate(page);
 		return -ENOENT;
@@ -2181,6 +2213,24 @@ static void __move_free_nid(struct f2fs_sb_info *sbi, struct free_nid *i,
 	}
 }
 
+bool f2fs_nat_bitmap_enabled(struct f2fs_sb_info *sbi)
+{
+	struct f2fs_nm_info *nm_i = NM_I(sbi);
+	unsigned int i;
+	bool ret = true;
+
+	down_read(&nm_i->nat_tree_lock);
+	for (i = 0; i < nm_i->nat_blocks; i++) {
+		if (!test_bit_le(i, nm_i->nat_block_bitmap)) {
+			ret = false;
+			break;
+		}
+	}
+	up_read(&nm_i->nat_tree_lock);
+
+	return ret;
+}
+
 static void update_free_nid_bitmap(struct f2fs_sb_info *sbi, nid_t nid,
 							bool set, bool build)
 {
@@ -2222,7 +2272,7 @@ static bool add_free_nid(struct f2fs_sb_info *sbi,
 	if (unlikely(f2fs_check_nid_range(sbi, nid)))
 		return false;
 
-	i = f2fs_kmem_cache_alloc(free_nid_slab, GFP_NOFS);
+	i = f2fs_kmem_cache_alloc(free_nid_slab, GFP_NOFS, true, NULL);
 	i->nid = nid;
 	i->state = FREE_NID;
 
@@ -2812,7 +2862,7 @@ static void remove_nats_in_journal(struct f2fs_sb_info *sbi)
 
 		ne = __lookup_nat_cache(nm_i, nid);
 		if (!ne) {
-			ne = __alloc_nat_entry(nid, true);
+			ne = __alloc_nat_entry(sbi, nid, true);
 			__init_nat_entry(nm_i, ne, &raw_ne, true);
 		}
 
@@ -2852,7 +2902,23 @@ add_out:
 	list_add_tail(&nes->set_list, head);
 }
 
-static void __update_nat_bits(struct f2fs_sb_info *sbi, nid_t start_nid,
+static void __update_nat_bits(struct f2fs_nm_info *nm_i, unsigned int nat_ofs,
+							unsigned int valid)
+{
+	if (valid == 0) {
+		__set_bit_le(nat_ofs, nm_i->empty_nat_bits);
+		__clear_bit_le(nat_ofs, nm_i->full_nat_bits);
+		return;
+	}
+
+	__clear_bit_le(nat_ofs, nm_i->empty_nat_bits);
+	if (valid == NAT_ENTRY_PER_BLOCK)
+		__set_bit_le(nat_ofs, nm_i->full_nat_bits);
+	else
+		__clear_bit_le(nat_ofs, nm_i->full_nat_bits);
+}
+
+static void update_nat_bits(struct f2fs_sb_info *sbi, nid_t start_nid,
 						struct page *page)
 {
 	struct f2fs_nm_info *nm_i = NM_I(sbi);
@@ -2861,7 +2927,7 @@ static void __update_nat_bits(struct f2fs_sb_info *sbi, nid_t start_nid,
 	int valid = 0;
 	int i = 0;
 
-	if (!enabled_nat_bits(sbi, NULL))
+	if (!is_set_ckpt_flags(sbi, CP_NAT_BITS_FLAG))
 		return;
 
 	if (nat_index == 0) {
@@ -2872,17 +2938,36 @@ static void __update_nat_bits(struct f2fs_sb_info *sbi, nid_t start_nid,
 		if (le32_to_cpu(nat_blk->entries[i].block_addr) != NULL_ADDR)
 			valid++;
 	}
-	if (valid == 0) {
-		__set_bit_le(nat_index, nm_i->empty_nat_bits);
-		__clear_bit_le(nat_index, nm_i->full_nat_bits);
-		return;
+
+	__update_nat_bits(nm_i, nat_index, valid);
+}
+
+void f2fs_enable_nat_bits(struct f2fs_sb_info *sbi)
+{
+	struct f2fs_nm_info *nm_i = NM_I(sbi);
+	unsigned int nat_ofs;
+
+	down_read(&nm_i->nat_tree_lock);
+
+	for (nat_ofs = 0; nat_ofs < nm_i->nat_blocks; nat_ofs++) {
+		unsigned int valid = 0, nid_ofs = 0;
+
+		/* handle nid zero due to it should never be used */
+		if (unlikely(nat_ofs == 0)) {
+			valid = 1;
+			nid_ofs = 1;
+		}
+
+		for (; nid_ofs < NAT_ENTRY_PER_BLOCK; nid_ofs++) {
+			if (!test_bit_le(nid_ofs,
+					nm_i->free_nid_bitmap[nat_ofs]))
+				valid++;
+		}
+
+		__update_nat_bits(nm_i, nat_ofs, valid);
 	}
 
-	__clear_bit_le(nat_index, nm_i->empty_nat_bits);
-	if (valid == NAT_ENTRY_PER_BLOCK)
-		__set_bit_le(nat_index, nm_i->full_nat_bits);
-	else
-		__clear_bit_le(nat_index, nm_i->full_nat_bits);
+	up_read(&nm_i->nat_tree_lock);
 }
 
 static int __flush_nat_entry_set(struct f2fs_sb_info *sbi,
@@ -2901,7 +2986,7 @@ static int __flush_nat_entry_set(struct f2fs_sb_info *sbi,
 	 * #1, flush nat entries to journal in current hot data summary block.
 	 * #2, flush nat entries to nat page.
 	 */
-	if (enabled_nat_bits(sbi, cpc) ||
+	if ((cpc->reason & CP_UMOUNT) ||
 		!__has_cursum_space(journal, set->entry_cnt, NAT_JOURNAL))
 		to_journal = false;
 
@@ -2948,7 +3033,7 @@ static int __flush_nat_entry_set(struct f2fs_sb_info *sbi,
 	if (to_journal) {
 		up_write(&curseg->journal_rwsem);
 	} else {
-		__update_nat_bits(sbi, start_nid, page);
+		update_nat_bits(sbi, start_nid, page);
 		f2fs_put_page(page, 1);
 	}
 
@@ -2979,7 +3064,7 @@ int f2fs_flush_nat_entries(struct f2fs_sb_info *sbi, struct cp_control *cpc)
 	 * during unmount, let's flush nat_bits before checking
 	 * nat_cnt[DIRTY_NAT].
 	 */
-	if (enabled_nat_bits(sbi, cpc)) {
+	if (cpc->reason & CP_UMOUNT) {
 		down_write(&nm_i->nat_tree_lock);
 		remove_nats_in_journal(sbi);
 		up_write(&nm_i->nat_tree_lock);
@@ -2995,7 +3080,7 @@ int f2fs_flush_nat_entries(struct f2fs_sb_info *sbi, struct cp_control *cpc)
 	 * entries, remove all entries from journal and merge them
 	 * into nat entry set.
 	 */
-	if (enabled_nat_bits(sbi, cpc) ||
+	if (cpc->reason & CP_UMOUNT ||
 		!__has_cursum_space(journal,
 			nm_i->nat_cnt[DIRTY_NAT], NAT_JOURNAL))
 		remove_nats_in_journal(sbi);
@@ -3032,15 +3117,18 @@ static int __get_nat_bitmaps(struct f2fs_sb_info *sbi)
 	__u64 cp_ver = cur_cp_version(ckpt);
 	block_t nat_bits_addr;
 
-	if (!enabled_nat_bits(sbi, NULL))
-		return 0;
-
 	nm_i->nat_bits_blocks = F2FS_BLK_ALIGN((nat_bits_bytes << 1) + 8);
 	nm_i->nat_bits = f2fs_kvzalloc(sbi,
 			nm_i->nat_bits_blocks << F2FS_BLKSIZE_BITS, GFP_KERNEL);
 	if (!nm_i->nat_bits)
 		return -ENOMEM;
 
+	nm_i->full_nat_bits = nm_i->nat_bits + 8;
+	nm_i->empty_nat_bits = nm_i->full_nat_bits + nat_bits_bytes;
+
+	if (!is_set_ckpt_flags(sbi, CP_NAT_BITS_FLAG))
+		return 0;
+
 	nat_bits_addr = __start_cp_addr(sbi) + sbi->blocks_per_seg -
 						nm_i->nat_bits_blocks;
 	for (i = 0; i < nm_i->nat_bits_blocks; i++) {
@@ -3057,13 +3145,12 @@ static int __get_nat_bitmaps(struct f2fs_sb_info *sbi)
 
 	cp_ver |= (cur_cp_crc(ckpt) << 32);
 	if (cpu_to_le64(cp_ver) != *(__le64 *)nm_i->nat_bits) {
-		disable_nat_bits(sbi, true);
+		clear_ckpt_flags(sbi, CP_NAT_BITS_FLAG);
+		f2fs_notice(sbi, "Disable nat_bits due to incorrect cp_ver (%llu, %llu)",
+			cp_ver, le64_to_cpu(*(__le64 *)nm_i->nat_bits));
 		return 0;
 	}
 
-	nm_i->full_nat_bits = nm_i->nat_bits + 8;
-	nm_i->empty_nat_bits = nm_i->full_nat_bits + nat_bits_bytes;
-
 	f2fs_notice(sbi, "Found nat_bits in checkpoint");
 	return 0;
 }
@@ -3074,7 +3161,7 @@ static inline void load_free_nid_bitmap(struct f2fs_sb_info *sbi)
 	unsigned int i = 0;
 	nid_t nid, last_nid;
 
-	if (!enabled_nat_bits(sbi, NULL))
+	if (!is_set_ckpt_flags(sbi, CP_NAT_BITS_FLAG))
 		return;
 
 	for (i = 0; i < nm_i->nat_blocks; i++) {
diff --git a/fs/f2fs/recovery.c b/fs/f2fs/recovery.c
index 695eacfe776c..04655511d7f5 100644
--- a/fs/f2fs/recovery.c
+++ b/fs/f2fs/recovery.c
@@ -91,7 +91,8 @@ static struct fsync_inode_entry *add_fsync_inode(struct f2fs_sb_info *sbi,
 			goto err_out;
 	}
 
-	entry = f2fs_kmem_cache_alloc(fsync_entry_slab, GFP_F2FS_ZERO);
+	entry = f2fs_kmem_cache_alloc(fsync_entry_slab,
+					GFP_F2FS_ZERO, true, NULL);
 	entry->inode = inode;
 	list_add_tail(&entry->list, head);
 
diff --git a/fs/f2fs/segment.c b/fs/f2fs/segment.c
index 15cc89eef28d..a135d2247415 100644
--- a/fs/f2fs/segment.c
+++ b/fs/f2fs/segment.c
@@ -20,6 +20,7 @@
 #include "segment.h"
 #include "node.h"
 #include "gc.h"
+#include "iostat.h"
 #include <trace/events/f2fs.h>
 
 #define __reverse_ffz(x) __reverse_ffs(~(x))
@@ -188,7 +189,8 @@ void f2fs_register_inmem_page(struct inode *inode, struct page *page)
 
 	set_page_private_atomic(page);
 
-	new = f2fs_kmem_cache_alloc(inmem_entry_slab, GFP_NOFS);
+	new = f2fs_kmem_cache_alloc(inmem_entry_slab,
+					GFP_NOFS, true, NULL);
 
 	/* add atomic page indices to the list */
 	new->page = page;
@@ -776,11 +778,22 @@ int f2fs_flush_device_cache(struct f2fs_sb_info *sbi)
 		return 0;
 
 	for (i = 1; i < sbi->s_ndevs; i++) {
+		int count = DEFAULT_RETRY_IO_COUNT;
+
 		if (!f2fs_test_bit(i, (char *)&sbi->dirty_device))
 			continue;
-		ret = __submit_flush_wait(sbi, FDEV(i).bdev);
-		if (ret)
+
+		do {
+			ret = __submit_flush_wait(sbi, FDEV(i).bdev);
+			if (ret)
+				congestion_wait(BLK_RW_ASYNC,
+						DEFAULT_IO_TIMEOUT);
+		} while (ret && --count);
+
+		if (ret) {
+			f2fs_stop_checkpoint(sbi, false);
 			break;
+		}
 
 		spin_lock(&sbi->dev_lock);
 		f2fs_clear_bit(i, (char *)&sbi->dirty_device);
@@ -990,7 +1003,7 @@ static struct discard_cmd *__create_discard_cmd(struct f2fs_sb_info *sbi,
 
 	pend_list = &dcc->pend_list[plist_idx(len)];
 
-	dc = f2fs_kmem_cache_alloc(discard_cmd_slab, GFP_NOFS);
+	dc = f2fs_kmem_cache_alloc(discard_cmd_slab, GFP_NOFS, true, NULL);
 	INIT_LIST_HEAD(&dc->list);
 	dc->bdev = bdev;
 	dc->lstart = lstart;
@@ -1893,7 +1906,8 @@ static int f2fs_issue_discard(struct f2fs_sb_info *sbi,
 		se = get_seg_entry(sbi, GET_SEGNO(sbi, i));
 		offset = GET_BLKOFF_FROM_SEG0(sbi, i);
 
-		if (!f2fs_test_and_set_bit(offset, se->discard_map))
+		if (f2fs_block_unit_discard(sbi) &&
+				!f2fs_test_and_set_bit(offset, se->discard_map))
 			sbi->discard_blks--;
 	}
 
@@ -1918,7 +1932,8 @@ static bool add_discard_addrs(struct f2fs_sb_info *sbi, struct cp_control *cpc,
 	struct list_head *head = &SM_I(sbi)->dcc_info->entry_list;
 	int i;
 
-	if (se->valid_blocks == max_blocks || !f2fs_hw_support_discard(sbi))
+	if (se->valid_blocks == max_blocks || !f2fs_hw_support_discard(sbi) ||
+			!f2fs_block_unit_discard(sbi))
 		return false;
 
 	if (!force) {
@@ -1949,7 +1964,7 @@ static bool add_discard_addrs(struct f2fs_sb_info *sbi, struct cp_control *cpc,
 
 		if (!de) {
 			de = f2fs_kmem_cache_alloc(discard_entry_slab,
-								GFP_F2FS_ZERO);
+						GFP_F2FS_ZERO, true, NULL);
 			de->start_blkaddr = START_BLOCK(sbi, cpc->trim_start);
 			list_add_tail(&de->list, head);
 		}
@@ -2003,14 +2018,18 @@ void f2fs_clear_prefree_segments(struct f2fs_sb_info *sbi,
 	unsigned int start = 0, end = -1;
 	unsigned int secno, start_segno;
 	bool force = (cpc->reason & CP_DISCARD);
-	bool need_align = f2fs_lfs_mode(sbi) && __is_large_section(sbi);
+	bool section_alignment = F2FS_OPTION(sbi).discard_unit ==
+						DISCARD_UNIT_SECTION;
+
+	if (f2fs_lfs_mode(sbi) && __is_large_section(sbi))
+		section_alignment = true;
 
 	mutex_lock(&dirty_i->seglist_lock);
 
 	while (1) {
 		int i;
 
-		if (need_align && end != -1)
+		if (section_alignment && end != -1)
 			end--;
 		start = find_next_bit(prefree_map, MAIN_SEGS(sbi), end + 1);
 		if (start >= MAIN_SEGS(sbi))
@@ -2018,7 +2037,7 @@ void f2fs_clear_prefree_segments(struct f2fs_sb_info *sbi,
 		end = find_next_zero_bit(prefree_map, MAIN_SEGS(sbi),
 								start + 1);
 
-		if (need_align) {
+		if (section_alignment) {
 			start = rounddown(start, sbi->segs_per_sec);
 			end = roundup(end, sbi->segs_per_sec);
 		}
@@ -2056,6 +2075,9 @@ next:
 	}
 	mutex_unlock(&dirty_i->seglist_lock);
 
+	if (!f2fs_block_unit_discard(sbi))
+		goto wakeup;
+
 	/* send small discards */
 	list_for_each_entry_safe(entry, this, head, list) {
 		unsigned int cur_pos = 0, next_pos, len, total_len = 0;
@@ -2089,12 +2111,29 @@ skip:
 		dcc->nr_discards -= total_len;
 	}
 
+wakeup:
 	wake_up_discard_thread(sbi, false);
 }
 
-static int create_discard_cmd_control(struct f2fs_sb_info *sbi)
+int f2fs_start_discard_thread(struct f2fs_sb_info *sbi)
 {
 	dev_t dev = sbi->sb->s_bdev->bd_dev;
+	struct discard_cmd_control *dcc = SM_I(sbi)->dcc_info;
+	int err = 0;
+
+	if (!f2fs_realtime_discard_enable(sbi))
+		return 0;
+
+	dcc->f2fs_issue_discard = kthread_run(issue_discard_thread, sbi,
+				"f2fs_discard-%u:%u", MAJOR(dev), MINOR(dev));
+	if (IS_ERR(dcc->f2fs_issue_discard))
+		err = PTR_ERR(dcc->f2fs_issue_discard);
+
+	return err;
+}
+
+static int create_discard_cmd_control(struct f2fs_sb_info *sbi)
+{
 	struct discard_cmd_control *dcc;
 	int err = 0, i;
 
@@ -2108,6 +2147,11 @@ static int create_discard_cmd_control(struct f2fs_sb_info *sbi)
 		return -ENOMEM;
 
 	dcc->discard_granularity = DEFAULT_DISCARD_GRANULARITY;
+	if (F2FS_OPTION(sbi).discard_unit == DISCARD_UNIT_SEGMENT)
+		dcc->discard_granularity = sbi->blocks_per_seg;
+	else if (F2FS_OPTION(sbi).discard_unit == DISCARD_UNIT_SECTION)
+		dcc->discard_granularity = BLKS_PER_SEC(sbi);
+
 	INIT_LIST_HEAD(&dcc->entry_list);
 	for (i = 0; i < MAX_PLIST_NUM; i++)
 		INIT_LIST_HEAD(&dcc->pend_list[i]);
@@ -2127,13 +2171,10 @@ static int create_discard_cmd_control(struct f2fs_sb_info *sbi)
 	init_waitqueue_head(&dcc->discard_wait_queue);
 	SM_I(sbi)->dcc_info = dcc;
 init_thread:
-	dcc->f2fs_issue_discard = kthread_run(issue_discard_thread, sbi,
-				"f2fs_discard-%u:%u", MAJOR(dev), MINOR(dev));
-	if (IS_ERR(dcc->f2fs_issue_discard)) {
-		err = PTR_ERR(dcc->f2fs_issue_discard);
+	err = f2fs_start_discard_thread(sbi);
+	if (err) {
 		kfree(dcc);
 		SM_I(sbi)->dcc_info = NULL;
-		return err;
 	}
 
 	return err;
@@ -2255,7 +2296,8 @@ static void update_sit_entry(struct f2fs_sb_info *sbi, block_t blkaddr, int del)
 			del = 0;
 		}
 
-		if (!f2fs_test_and_set_bit(offset, se->discard_map))
+		if (f2fs_block_unit_discard(sbi) &&
+				!f2fs_test_and_set_bit(offset, se->discard_map))
 			sbi->discard_blks--;
 
 		/*
@@ -2297,7 +2339,8 @@ static void update_sit_entry(struct f2fs_sb_info *sbi, block_t blkaddr, int del)
 			}
 		}
 
-		if (f2fs_test_and_clear_bit(offset, se->discard_map))
+		if (f2fs_block_unit_discard(sbi) &&
+			f2fs_test_and_clear_bit(offset, se->discard_map))
 			sbi->discard_blks++;
 	}
 	if (!f2fs_test_bit(offset, se->ckpt_valid_map))
@@ -3563,7 +3606,7 @@ int f2fs_inplace_write_data(struct f2fs_io_info *fio)
 		goto drop_bio;
 	}
 
-	if (is_sbi_flag_set(sbi, SBI_NEED_FSCK) || f2fs_cp_error(sbi)) {
+	if (f2fs_cp_error(sbi)) {
 		err = -EIO;
 		goto drop_bio;
 	}
@@ -4071,7 +4114,8 @@ static struct page *get_next_sit_page(struct f2fs_sb_info *sbi,
 static struct sit_entry_set *grab_sit_entry_set(void)
 {
 	struct sit_entry_set *ses =
-			f2fs_kmem_cache_alloc(sit_entry_set_slab, GFP_NOFS);
+			f2fs_kmem_cache_alloc(sit_entry_set_slab,
+						GFP_NOFS, true, NULL);
 
 	ses->entry_cnt = 0;
 	INIT_LIST_HEAD(&ses->set_list);
@@ -4282,6 +4326,7 @@ static int build_sit_info(struct f2fs_sb_info *sbi)
 	unsigned int sit_segs, start;
 	char *src_bitmap, *bitmap;
 	unsigned int bitmap_size, main_bitmap_size, sit_bitmap_size;
+	unsigned int discard_map = f2fs_block_unit_discard(sbi) ? 1 : 0;
 
 	/* allocate memory for SIT information */
 	sit_i = f2fs_kzalloc(sbi, sizeof(struct sit_info), GFP_KERNEL);
@@ -4304,9 +4349,9 @@ static int build_sit_info(struct f2fs_sb_info *sbi)
 		return -ENOMEM;
 
 #ifdef CONFIG_F2FS_CHECK_FS
-	bitmap_size = MAIN_SEGS(sbi) * SIT_VBLOCK_MAP_SIZE * 4;
+	bitmap_size = MAIN_SEGS(sbi) * SIT_VBLOCK_MAP_SIZE * (3 + discard_map);
 #else
-	bitmap_size = MAIN_SEGS(sbi) * SIT_VBLOCK_MAP_SIZE * 3;
+	bitmap_size = MAIN_SEGS(sbi) * SIT_VBLOCK_MAP_SIZE * (2 + discard_map);
 #endif
 	sit_i->bitmap = f2fs_kvzalloc(sbi, bitmap_size, GFP_KERNEL);
 	if (!sit_i->bitmap)
@@ -4326,8 +4371,10 @@ static int build_sit_info(struct f2fs_sb_info *sbi)
 		bitmap += SIT_VBLOCK_MAP_SIZE;
 #endif
 
-		sit_i->sentries[start].discard_map = bitmap;
-		bitmap += SIT_VBLOCK_MAP_SIZE;
+		if (discard_map) {
+			sit_i->sentries[start].discard_map = bitmap;
+			bitmap += SIT_VBLOCK_MAP_SIZE;
+		}
 	}
 
 	sit_i->tmp_map = f2fs_kzalloc(sbi, SIT_VBLOCK_MAP_SIZE, GFP_KERNEL);
@@ -4489,17 +4536,19 @@ static int build_sit_entries(struct f2fs_sb_info *sbi)
 			if (IS_NODESEG(se->type))
 				total_node_blocks += se->valid_blocks;
 
-			/* build discard map only one time */
-			if (is_set_ckpt_flags(sbi, CP_TRIMMED_FLAG)) {
-				memset(se->discard_map, 0xff,
-					SIT_VBLOCK_MAP_SIZE);
-			} else {
-				memcpy(se->discard_map,
-					se->cur_valid_map,
-					SIT_VBLOCK_MAP_SIZE);
-				sbi->discard_blks +=
-					sbi->blocks_per_seg -
-					se->valid_blocks;
+			if (f2fs_block_unit_discard(sbi)) {
+				/* build discard map only one time */
+				if (is_set_ckpt_flags(sbi, CP_TRIMMED_FLAG)) {
+					memset(se->discard_map, 0xff,
+						SIT_VBLOCK_MAP_SIZE);
+				} else {
+					memcpy(se->discard_map,
+						se->cur_valid_map,
+						SIT_VBLOCK_MAP_SIZE);
+					sbi->discard_blks +=
+						sbi->blocks_per_seg -
+						se->valid_blocks;
+				}
 			}
 
 			if (__is_large_section(sbi))
@@ -4535,13 +4584,15 @@ static int build_sit_entries(struct f2fs_sb_info *sbi)
 		if (IS_NODESEG(se->type))
 			total_node_blocks += se->valid_blocks;
 
-		if (is_set_ckpt_flags(sbi, CP_TRIMMED_FLAG)) {
-			memset(se->discard_map, 0xff, SIT_VBLOCK_MAP_SIZE);
-		} else {
-			memcpy(se->discard_map, se->cur_valid_map,
-						SIT_VBLOCK_MAP_SIZE);
-			sbi->discard_blks += old_valid_blocks;
-			sbi->discard_blks -= se->valid_blocks;
+		if (f2fs_block_unit_discard(sbi)) {
+			if (is_set_ckpt_flags(sbi, CP_TRIMMED_FLAG)) {
+				memset(se->discard_map, 0xff, SIT_VBLOCK_MAP_SIZE);
+			} else {
+				memcpy(se->discard_map, se->cur_valid_map,
+							SIT_VBLOCK_MAP_SIZE);
+				sbi->discard_blks += old_valid_blocks;
+				sbi->discard_blks -= se->valid_blocks;
+			}
 		}
 
 		if (__is_large_section(sbi)) {
@@ -5159,7 +5210,7 @@ int f2fs_build_segment_manager(struct f2fs_sb_info *sbi)
 		sm_info->ipu_policy = 1 << F2FS_IPU_FSYNC;
 	sm_info->min_ipu_util = DEF_MIN_IPU_UTIL;
 	sm_info->min_fsync_blocks = DEF_MIN_FSYNC_BLOCKS;
-	sm_info->min_seq_blocks = sbi->blocks_per_seg * sbi->segs_per_sec;
+	sm_info->min_seq_blocks = sbi->blocks_per_seg;
 	sm_info->min_hot_blocks = DEF_MIN_HOT_BLOCKS;
 	sm_info->min_ssr_sections = reserved_sections(sbi);
 
diff --git a/fs/f2fs/segment.h b/fs/f2fs/segment.h
index 050230c70a53..89fff258727d 100644
--- a/fs/f2fs/segment.h
+++ b/fs/f2fs/segment.h
@@ -142,7 +142,7 @@ enum {
 };
 
 /*
- * In the victim_sel_policy->alloc_mode, there are two block allocation modes.
+ * In the victim_sel_policy->alloc_mode, there are three block allocation modes.
  * LFS writes data sequentially with cleaning operations.
  * SSR (Slack Space Recycle) reuses obsolete space without cleaning operations.
  * AT_SSR (Age Threshold based Slack Space Recycle) merges fragments into
@@ -155,7 +155,7 @@ enum {
 };
 
 /*
- * In the victim_sel_policy->gc_mode, there are two gc, aka cleaning, modes.
+ * In the victim_sel_policy->gc_mode, there are three gc, aka cleaning, modes.
  * GC_CB is based on cost-benefit algorithm.
  * GC_GREEDY is based on greedy algorithm.
  * GC_AT is based on age-threshold algorithm.
diff --git a/fs/f2fs/super.c b/fs/f2fs/super.c
index ce2ab1b85c11..78ebc306ee2b 100644
--- a/fs/f2fs/super.c
+++ b/fs/f2fs/super.c
@@ -33,6 +33,7 @@
 #include "segment.h"
 #include "xattr.h"
 #include "gc.h"
+#include "iostat.h"
 
 #define CREATE_TRACE_POINTS
 #include <trace/events/f2fs.h>
@@ -56,6 +57,7 @@ const char *f2fs_fault_name[FAULT_MAX] = {
 	[FAULT_CHECKPOINT]	= "checkpoint error",
 	[FAULT_DISCARD]		= "discard error",
 	[FAULT_WRITE_IO]	= "write IO error",
+	[FAULT_SLAB_ALLOC]	= "slab alloc",
 };
 
 void f2fs_build_fault_attr(struct f2fs_sb_info *sbi, unsigned int rate,
@@ -155,6 +157,7 @@ enum {
 	Opt_atgc,
 	Opt_gc_merge,
 	Opt_nogc_merge,
+	Opt_discard_unit,
 	Opt_err,
 };
 
@@ -231,6 +234,7 @@ static match_table_t f2fs_tokens = {
 	{Opt_atgc, "atgc"},
 	{Opt_gc_merge, "gc_merge"},
 	{Opt_nogc_merge, "nogc_merge"},
+	{Opt_discard_unit, "discard_unit=%s"},
 	{Opt_err, NULL},
 };
 
@@ -657,10 +661,14 @@ static int parse_options(struct super_block *sb, char *options, bool is_remount)
 				return -EINVAL;
 			break;
 		case Opt_discard:
+			if (!f2fs_hw_support_discard(sbi)) {
+				f2fs_warn(sbi, "device does not support discard");
+				break;
+			}
 			set_opt(sbi, DISCARD);
 			break;
 		case Opt_nodiscard:
-			if (f2fs_sb_has_blkzoned(sbi)) {
+			if (f2fs_hw_should_discard(sbi)) {
 				f2fs_warn(sbi, "discard is required for zoned block devices");
 				return -EINVAL;
 			}
@@ -1173,6 +1181,25 @@ static int parse_options(struct super_block *sb, char *options, bool is_remount)
 		case Opt_nogc_merge:
 			clear_opt(sbi, GC_MERGE);
 			break;
+		case Opt_discard_unit:
+			name = match_strdup(&args[0]);
+			if (!name)
+				return -ENOMEM;
+			if (!strcmp(name, "block")) {
+				F2FS_OPTION(sbi).discard_unit =
+						DISCARD_UNIT_BLOCK;
+			} else if (!strcmp(name, "segment")) {
+				F2FS_OPTION(sbi).discard_unit =
+						DISCARD_UNIT_SEGMENT;
+			} else if (!strcmp(name, "section")) {
+				F2FS_OPTION(sbi).discard_unit =
+						DISCARD_UNIT_SECTION;
+			} else {
+				kfree(name);
+				return -EINVAL;
+			}
+			kfree(name);
+			break;
 		default:
 			f2fs_err(sbi, "Unrecognized mount option \"%s\" or missing value",
 				 p);
@@ -1211,6 +1238,14 @@ default_check:
 		return -EINVAL;
 	}
 #endif
+	if (f2fs_sb_has_blkzoned(sbi)) {
+		if (F2FS_OPTION(sbi).discard_unit !=
+						DISCARD_UNIT_SECTION) {
+			f2fs_info(sbi, "Zoned block device doesn't need small discard, set discard_unit=section by default");
+			F2FS_OPTION(sbi).discard_unit =
+					DISCARD_UNIT_SECTION;
+		}
+	}
 
 #ifdef CONFIG_F2FS_FS_COMPRESSION
 	if (f2fs_test_compress_extension(sbi)) {
@@ -1271,7 +1306,8 @@ static struct inode *f2fs_alloc_inode(struct super_block *sb)
 {
 	struct f2fs_inode_info *fi;
 
-	fi = kmem_cache_alloc(f2fs_inode_cachep, GFP_F2FS_ZERO);
+	fi = f2fs_kmem_cache_alloc(f2fs_inode_cachep,
+				GFP_F2FS_ZERO, false, F2FS_SB(sb));
 	if (!fi)
 		return NULL;
 
@@ -1541,6 +1577,7 @@ static void f2fs_put_super(struct super_block *sb)
 #endif
 	fscrypt_free_dummy_policy(&F2FS_OPTION(sbi).dummy_enc_policy);
 	destroy_percpu_info(sbi);
+	f2fs_destroy_iostat(sbi);
 	for (i = 0; i < NR_PAGE_TYPE; i++)
 		kvfree(sbi->write_io[i]);
 #ifdef CONFIG_UNICODE
@@ -1924,6 +1961,14 @@ static int f2fs_show_options(struct seq_file *seq, struct dentry *root)
 
 	if (test_opt(sbi, ATGC))
 		seq_puts(seq, ",atgc");
+
+	if (F2FS_OPTION(sbi).discard_unit == DISCARD_UNIT_BLOCK)
+		seq_printf(seq, ",discard_unit=%s", "block");
+	else if (F2FS_OPTION(sbi).discard_unit == DISCARD_UNIT_SEGMENT)
+		seq_printf(seq, ",discard_unit=%s", "segment");
+	else if (F2FS_OPTION(sbi).discard_unit == DISCARD_UNIT_SECTION)
+		seq_printf(seq, ",discard_unit=%s", "section");
+
 	return 0;
 }
 
@@ -1959,11 +2004,15 @@ static void default_options(struct f2fs_sb_info *sbi)
 	F2FS_OPTION(sbi).unusable_cap = 0;
 	sbi->sb->s_flags |= SB_LAZYTIME;
 	set_opt(sbi, FLUSH_MERGE);
-	set_opt(sbi, DISCARD);
-	if (f2fs_sb_has_blkzoned(sbi))
+	if (f2fs_hw_support_discard(sbi) || f2fs_hw_should_discard(sbi))
+		set_opt(sbi, DISCARD);
+	if (f2fs_sb_has_blkzoned(sbi)) {
 		F2FS_OPTION(sbi).fs_mode = FS_MODE_LFS;
-	else
+		F2FS_OPTION(sbi).discard_unit = DISCARD_UNIT_SECTION;
+	} else {
 		F2FS_OPTION(sbi).fs_mode = FS_MODE_ADAPTIVE;
+		F2FS_OPTION(sbi).discard_unit = DISCARD_UNIT_BLOCK;
+	}
 
 #ifdef CONFIG_F2FS_FS_XATTR
 	set_opt(sbi, XATTR_USER);
@@ -2038,8 +2087,17 @@ restore_flag:
 
 static void f2fs_enable_checkpoint(struct f2fs_sb_info *sbi)
 {
+	int retry = DEFAULT_RETRY_IO_COUNT;
+
 	/* we should flush all the data to keep data consistency */
-	sync_inodes_sb(sbi->sb);
+	do {
+		sync_inodes_sb(sbi->sb);
+		cond_resched();
+		congestion_wait(BLK_RW_ASYNC, DEFAULT_IO_TIMEOUT);
+	} while (get_pages(sbi, F2FS_DIRTY_DATA) && retry--);
+
+	if (unlikely(retry < 0))
+		f2fs_warn(sbi, "checkpoint=enable has some unwritten data.");
 
 	down_write(&sbi->gc_lock);
 	f2fs_dirty_to_prefree(sbi);
@@ -2060,12 +2118,15 @@ static int f2fs_remount(struct super_block *sb, int *flags, char *data)
 	bool need_restart_gc = false, need_stop_gc = false;
 	bool need_restart_ckpt = false, need_stop_ckpt = false;
 	bool need_restart_flush = false, need_stop_flush = false;
+	bool need_restart_discard = false, need_stop_discard = false;
 	bool no_extent_cache = !test_opt(sbi, EXTENT_CACHE);
-	bool disable_checkpoint = test_opt(sbi, DISABLE_CHECKPOINT);
+	bool enable_checkpoint = !test_opt(sbi, DISABLE_CHECKPOINT);
 	bool no_io_align = !F2FS_IO_ALIGNED(sbi);
 	bool no_atgc = !test_opt(sbi, ATGC);
+	bool no_discard = !test_opt(sbi, DISCARD);
 	bool no_compress_cache = !test_opt(sbi, COMPRESS_CACHE);
-	bool checkpoint_changed;
+	bool block_unit_discard = f2fs_block_unit_discard(sbi);
+	struct discard_cmd_control *dcc;
 #ifdef CONFIG_QUOTA
 	int i, j;
 #endif
@@ -2110,8 +2171,6 @@ static int f2fs_remount(struct super_block *sb, int *flags, char *data)
 	err = parse_options(sb, data, true);
 	if (err)
 		goto restore_opts;
-	checkpoint_changed =
-			disable_checkpoint != test_opt(sbi, DISABLE_CHECKPOINT);
 
 	/*
 	 * Previous and new state of filesystem is RO,
@@ -2168,6 +2227,12 @@ static int f2fs_remount(struct super_block *sb, int *flags, char *data)
 		goto restore_opts;
 	}
 
+	if (block_unit_discard != f2fs_block_unit_discard(sbi)) {
+		err = -EINVAL;
+		f2fs_warn(sbi, "switch discard_unit option is not allowed");
+		goto restore_opts;
+	}
+
 	if ((*flags & SB_RDONLY) && test_opt(sbi, DISABLE_CHECKPOINT)) {
 		err = -EINVAL;
 		f2fs_warn(sbi, "disabling checkpoint not compatible with read-only");
@@ -2233,11 +2298,26 @@ static int f2fs_remount(struct super_block *sb, int *flags, char *data)
 		need_stop_flush = true;
 	}
 
-	if (checkpoint_changed) {
+	if (no_discard == !!test_opt(sbi, DISCARD)) {
+		if (test_opt(sbi, DISCARD)) {
+			err = f2fs_start_discard_thread(sbi);
+			if (err)
+				goto restore_flush;
+			need_stop_discard = true;
+		} else {
+			dcc = SM_I(sbi)->dcc_info;
+			f2fs_stop_discard_thread(sbi);
+			if (atomic_read(&dcc->discard_cmd_cnt))
+				f2fs_issue_discard_timeout(sbi);
+			need_restart_discard = true;
+		}
+	}
+
+	if (enable_checkpoint == !!test_opt(sbi, DISABLE_CHECKPOINT)) {
 		if (test_opt(sbi, DISABLE_CHECKPOINT)) {
 			err = f2fs_disable_checkpoint(sbi);
 			if (err)
-				goto restore_flush;
+				goto restore_discard;
 		} else {
 			f2fs_enable_checkpoint(sbi);
 		}
@@ -2257,6 +2337,13 @@ skip:
 	adjust_unusable_cap_perc(sbi);
 	*flags = (*flags & ~SB_LAZYTIME) | (sb->s_flags & SB_LAZYTIME);
 	return 0;
+restore_discard:
+	if (need_restart_discard) {
+		if (f2fs_start_discard_thread(sbi))
+			f2fs_warn(sbi, "discard has been stopped");
+	} else if (need_stop_discard) {
+		f2fs_stop_discard_thread(sbi);
+	}
 restore_flush:
 	if (need_restart_flush) {
 		if (f2fs_create_flush_cmd_control(sbi))
@@ -2517,6 +2604,33 @@ static int f2fs_enable_quotas(struct super_block *sb)
 	return 0;
 }
 
+static int f2fs_quota_sync_file(struct f2fs_sb_info *sbi, int type)
+{
+	struct quota_info *dqopt = sb_dqopt(sbi->sb);
+	struct address_space *mapping = dqopt->files[type]->i_mapping;
+	int ret = 0;
+
+	ret = dquot_writeback_dquots(sbi->sb, type);
+	if (ret)
+		goto out;
+
+	ret = filemap_fdatawrite(mapping);
+	if (ret)
+		goto out;
+
+	/* if we are using journalled quota */
+	if (is_journalled_quota(sbi))
+		goto out;
+
+	ret = filemap_fdatawait(mapping);
+
+	truncate_inode_pages(&dqopt->files[type]->i_data, 0);
+out:
+	if (ret)
+		set_sbi_flag(sbi, SBI_QUOTA_NEED_REPAIR);
+	return ret;
+}
+
 int f2fs_quota_sync(struct super_block *sb, int type)
 {
 	struct f2fs_sb_info *sbi = F2FS_SB(sb);
@@ -2525,56 +2639,41 @@ int f2fs_quota_sync(struct super_block *sb, int type)
 	int ret;
 
 	/*
-	 * do_quotactl
-	 *  f2fs_quota_sync
-	 *  down_read(quota_sem)
-	 *  dquot_writeback_dquots()
-	 *  f2fs_dquot_commit
-	 *                            block_operation
-	 *                            down_read(quota_sem)
-	 */
-	f2fs_lock_op(sbi);
-
-	down_read(&sbi->quota_sem);
-	ret = dquot_writeback_dquots(sb, type);
-	if (ret)
-		goto out;
-
-	/*
 	 * Now when everything is written we can discard the pagecache so
 	 * that userspace sees the changes.
 	 */
 	for (cnt = 0; cnt < MAXQUOTAS; cnt++) {
-		struct address_space *mapping;
 
 		if (type != -1 && cnt != type)
 			continue;
-		if (!sb_has_quota_active(sb, cnt))
-			continue;
 
-		mapping = dqopt->files[cnt]->i_mapping;
+		if (!sb_has_quota_active(sb, type))
+			return 0;
 
-		ret = filemap_fdatawrite(mapping);
-		if (ret)
-			goto out;
+		inode_lock(dqopt->files[cnt]);
 
-		/* if we are using journalled quota */
-		if (is_journalled_quota(sbi))
-			continue;
+		/*
+		 * do_quotactl
+		 *  f2fs_quota_sync
+		 *  down_read(quota_sem)
+		 *  dquot_writeback_dquots()
+		 *  f2fs_dquot_commit
+		 *			      block_operation
+		 *			      down_read(quota_sem)
+		 */
+		f2fs_lock_op(sbi);
+		down_read(&sbi->quota_sem);
 
-		ret = filemap_fdatawait(mapping);
-		if (ret)
-			set_sbi_flag(F2FS_SB(sb), SBI_QUOTA_NEED_REPAIR);
+		ret = f2fs_quota_sync_file(sbi, cnt);
+
+		up_read(&sbi->quota_sem);
+		f2fs_unlock_op(sbi);
 
-		inode_lock(dqopt->files[cnt]);
-		truncate_inode_pages(&dqopt->files[cnt]->i_data, 0);
 		inode_unlock(dqopt->files[cnt]);
+
+		if (ret)
+			break;
 	}
-out:
-	if (ret)
-		set_sbi_flag(F2FS_SB(sb), SBI_QUOTA_NEED_REPAIR);
-	up_read(&sbi->quota_sem);
-	f2fs_unlock_op(sbi);
 	return ret;
 }
 
@@ -3207,11 +3306,13 @@ static int sanity_check_raw_super(struct f2fs_sb_info *sbi,
 		return -EFSCORRUPTED;
 	}
 
-	if (le32_to_cpu(raw_super->cp_payload) >
-				(blocks_per_seg - F2FS_CP_PACKS)) {
-		f2fs_info(sbi, "Insane cp_payload (%u > %u)",
+	if (le32_to_cpu(raw_super->cp_payload) >=
+				(blocks_per_seg - F2FS_CP_PACKS -
+				NR_CURSEG_PERSIST_TYPE)) {
+		f2fs_info(sbi, "Insane cp_payload (%u >= %u)",
 			  le32_to_cpu(raw_super->cp_payload),
-			  blocks_per_seg - F2FS_CP_PACKS);
+			  blocks_per_seg - F2FS_CP_PACKS -
+			  NR_CURSEG_PERSIST_TYPE);
 		return -EFSCORRUPTED;
 	}
 
@@ -3247,6 +3348,7 @@ int f2fs_sanity_check_ckpt(struct f2fs_sb_info *sbi)
 	unsigned int cp_pack_start_sum, cp_payload;
 	block_t user_block_count, valid_user_blocks;
 	block_t avail_node_count, valid_node_count;
+	unsigned int nat_blocks, nat_bits_bytes, nat_bits_blocks;
 	int i, j;
 
 	total = le32_to_cpu(raw_super->segment_count);
@@ -3377,6 +3479,17 @@ skip_cross:
 		return 1;
 	}
 
+	nat_blocks = nat_segs << log_blocks_per_seg;
+	nat_bits_bytes = nat_blocks / BITS_PER_BYTE;
+	nat_bits_blocks = F2FS_BLK_ALIGN((nat_bits_bytes << 1) + 8);
+	if (__is_set_ckpt_flags(ckpt, CP_NAT_BITS_FLAG) &&
+		(cp_payload + F2FS_CP_PACKS +
+		NR_CURSEG_PERSIST_TYPE + nat_bits_blocks >= blocks_per_seg)) {
+		f2fs_warn(sbi, "Insane cp_payload: %u, nat_bits_blocks: %u)",
+			  cp_payload, nat_bits_blocks);
+		return -EFSCORRUPTED;
+	}
+
 	if (unlikely(f2fs_cp_error(sbi))) {
 		f2fs_err(sbi, "A bug case: need to run fsck");
 		return 1;
@@ -3409,6 +3522,7 @@ static void init_sb_info(struct f2fs_sb_info *sbi)
 	sbi->next_victim_seg[FG_GC] = NULL_SEGNO;
 	sbi->max_victim_search = DEF_MAX_VICTIM_SEARCH;
 	sbi->migration_granularity = sbi->segs_per_sec;
+	sbi->seq_file_ra_mul = MIN_RA_MUL;
 
 	sbi->dir_level = DEF_DIR_LEVEL;
 	sbi->interval_time[CP_TIME] = DEF_CP_INTERVAL;
@@ -3768,7 +3882,8 @@ static void f2fs_tuning_parameters(struct f2fs_sb_info *sbi)
 	/* adjust parameters according to the volume size */
 	if (sm_i->main_segments <= SMALL_VOLUME_SEGMENTS) {
 		F2FS_OPTION(sbi).alloc_mode = ALLOC_MODE_REUSE;
-		sm_i->dcc_info->discard_granularity = 1;
+		if (f2fs_block_unit_discard(sbi))
+			sm_i->dcc_info->discard_granularity = 1;
 		sm_i->ipu_policy = 1 << F2FS_IPU_FORCE;
 	}
 
@@ -3889,11 +4004,6 @@ try_onemore:
 	set_sbi_flag(sbi, SBI_POR_DOING);
 	spin_lock_init(&sbi->stat_lock);
 
-	/* init iostat info */
-	spin_lock_init(&sbi->iostat_lock);
-	sbi->iostat_enable = false;
-	sbi->iostat_period_ms = DEFAULT_IOSTAT_PERIOD_MS;
-
 	for (i = 0; i < NR_PAGE_TYPE; i++) {
 		int n = (i == META) ? 1 : NR_TEMP_TYPE;
 		int j;
@@ -3924,10 +4034,14 @@ try_onemore:
 	init_waitqueue_head(&sbi->cp_wait);
 	init_sb_info(sbi);
 
-	err = init_percpu_info(sbi);
+	err = f2fs_init_iostat(sbi);
 	if (err)
 		goto free_bio_info;
 
+	err = init_percpu_info(sbi);
+	if (err)
+		goto free_iostat;
+
 	if (F2FS_IO_ALIGNED(sbi)) {
 		sbi->write_io_dummy =
 			mempool_create_page_pool(2 * (F2FS_IO_SIZE(sbi) - 1), 0);
@@ -4259,6 +4373,8 @@ free_io_dummy:
 	mempool_destroy(sbi->write_io_dummy);
 free_percpu:
 	destroy_percpu_info(sbi);
+free_iostat:
+	f2fs_destroy_iostat(sbi);
 free_bio_info:
 	for (i = 0; i < NR_PAGE_TYPE; i++)
 		kvfree(sbi->write_io[i]);
@@ -4401,9 +4517,12 @@ static int __init init_f2fs_fs(void)
 	err = f2fs_init_post_read_processing();
 	if (err)
 		goto free_root_stats;
-	err = f2fs_init_bio_entry_cache();
+	err = f2fs_init_iostat_processing();
 	if (err)
 		goto free_post_read;
+	err = f2fs_init_bio_entry_cache();
+	if (err)
+		goto free_iostat;
 	err = f2fs_init_bioset();
 	if (err)
 		goto free_bio_enrty_cache;
@@ -4425,6 +4544,8 @@ free_bioset:
 	f2fs_destroy_bioset();
 free_bio_enrty_cache:
 	f2fs_destroy_bio_entry_cache();
+free_iostat:
+	f2fs_destroy_iostat_processing();
 free_post_read:
 	f2fs_destroy_post_read_processing();
 free_root_stats:
@@ -4459,6 +4580,7 @@ static void __exit exit_f2fs_fs(void)
 	f2fs_destroy_compress_mempool();
 	f2fs_destroy_bioset();
 	f2fs_destroy_bio_entry_cache();
+	f2fs_destroy_iostat_processing();
 	f2fs_destroy_post_read_processing();
 	f2fs_destroy_root_stats();
 	unregister_filesystem(&f2fs_fs_type);
diff --git a/fs/f2fs/sysfs.c b/fs/f2fs/sysfs.c
index daad532a4e2b..a32fe31c33b8 100644
--- a/fs/f2fs/sysfs.c
+++ b/fs/f2fs/sysfs.c
@@ -17,6 +17,7 @@
 #include "f2fs.h"
 #include "segment.h"
 #include "gc.h"
+#include "iostat.h"
 #include <trace/events/f2fs.h>
 
 static struct proc_dir_entry *f2fs_proc_root;
@@ -307,6 +308,14 @@ static ssize_t f2fs_sbi_show(struct f2fs_attr *a,
 		return sysfs_emit(buf, "%u\n", sbi->compr_new_inode);
 #endif
 
+	if (!strcmp(a->attr.name, "gc_segment_mode"))
+		return sysfs_emit(buf, "%u\n", sbi->gc_segment_mode);
+
+	if (!strcmp(a->attr.name, "gc_reclaimed_segments")) {
+		return sysfs_emit(buf, "%u\n",
+			sbi->gc_reclaimed_segs[sbi->gc_segment_mode]);
+	}
+
 	ui = (unsigned int *)(ptr + a->offset);
 
 	return sprintf(buf, "%u\n", *ui);
@@ -343,7 +352,7 @@ static ssize_t __sbi_store(struct f2fs_attr *a,
 			set = false;
 		}
 
-		if (strlen(name) >= F2FS_EXTENSION_LEN)
+		if (!strlen(name) || strlen(name) >= F2FS_EXTENSION_LEN)
 			return -EINVAL;
 
 		down_write(&sbi->sb_lock);
@@ -420,6 +429,8 @@ out:
 	if (!strcmp(a->attr.name, "discard_granularity")) {
 		if (t == 0 || t > MAX_PLIST_NUM)
 			return -EINVAL;
+		if (!f2fs_block_unit_discard(sbi))
+			return -EINVAL;
 		if (t == *ui)
 			return count;
 		*ui = t;
@@ -467,6 +478,7 @@ out:
 		return count;
 	}
 
+#ifdef CONFIG_F2FS_IOSTAT
 	if (!strcmp(a->attr.name, "iostat_enable")) {
 		sbi->iostat_enable = !!t;
 		if (!sbi->iostat_enable)
@@ -482,6 +494,7 @@ out:
 		spin_unlock(&sbi->iostat_lock);
 		return count;
 	}
+#endif
 
 #ifdef CONFIG_F2FS_FS_COMPRESSION
 	if (!strcmp(a->attr.name, "compr_written_block") ||
@@ -515,6 +528,29 @@ out:
 		return count;
 	}
 
+	if (!strcmp(a->attr.name, "gc_segment_mode")) {
+		if (t < MAX_GC_MODE)
+			sbi->gc_segment_mode = t;
+		else
+			return -EINVAL;
+		return count;
+	}
+
+	if (!strcmp(a->attr.name, "gc_reclaimed_segments")) {
+		if (t != 0)
+			return -EINVAL;
+		sbi->gc_reclaimed_segs[sbi->gc_segment_mode] = 0;
+		return count;
+	}
+
+	if (!strcmp(a->attr.name, "seq_file_ra_mul")) {
+		if (t >= MIN_RA_MUL && t <= MAX_RA_MUL)
+			sbi->seq_file_ra_mul = t;
+		else
+			return -EINVAL;
+		return count;
+	}
+
 	*ui = (unsigned int)t;
 
 	return count;
@@ -667,8 +703,10 @@ F2FS_RW_ATTR(F2FS_SBI, f2fs_sb_info, discard_idle_interval,
 F2FS_RW_ATTR(F2FS_SBI, f2fs_sb_info, gc_idle_interval, interval_time[GC_TIME]);
 F2FS_RW_ATTR(F2FS_SBI, f2fs_sb_info,
 		umount_discard_timeout, interval_time[UMOUNT_DISCARD_TIMEOUT]);
+#ifdef CONFIG_F2FS_IOSTAT
 F2FS_RW_ATTR(F2FS_SBI, f2fs_sb_info, iostat_enable, iostat_enable);
 F2FS_RW_ATTR(F2FS_SBI, f2fs_sb_info, iostat_period_ms, iostat_period_ms);
+#endif
 F2FS_RW_ATTR(F2FS_SBI, f2fs_sb_info, readdir_ra, readdir_ra);
 F2FS_RW_ATTR(F2FS_SBI, f2fs_sb_info, max_io_bytes, max_io_bytes);
 F2FS_RW_ATTR(F2FS_SBI, f2fs_sb_info, gc_pin_file_thresh, gc_pin_file_threshold);
@@ -740,6 +778,10 @@ F2FS_RW_ATTR(ATGC_INFO, atgc_management, atgc_candidate_count, max_candidate_cou
 F2FS_RW_ATTR(ATGC_INFO, atgc_management, atgc_age_weight, age_weight);
 F2FS_RW_ATTR(ATGC_INFO, atgc_management, atgc_age_threshold, age_threshold);
 
+F2FS_RW_ATTR(F2FS_SBI, f2fs_sb_info, seq_file_ra_mul, seq_file_ra_mul);
+F2FS_RW_ATTR(F2FS_SBI, f2fs_sb_info, gc_segment_mode, gc_segment_mode);
+F2FS_RW_ATTR(F2FS_SBI, f2fs_sb_info, gc_reclaimed_segments, gc_reclaimed_segs);
+
 #define ATTR_LIST(name) (&f2fs_attr_##name.attr)
 static struct attribute *f2fs_attrs[] = {
 	ATTR_LIST(gc_urgent_sleep_time),
@@ -770,8 +812,10 @@ static struct attribute *f2fs_attrs[] = {
 	ATTR_LIST(discard_idle_interval),
 	ATTR_LIST(gc_idle_interval),
 	ATTR_LIST(umount_discard_timeout),
+#ifdef CONFIG_F2FS_IOSTAT
 	ATTR_LIST(iostat_enable),
 	ATTR_LIST(iostat_period_ms),
+#endif
 	ATTR_LIST(readdir_ra),
 	ATTR_LIST(max_io_bytes),
 	ATTR_LIST(gc_pin_file_thresh),
@@ -812,6 +856,9 @@ static struct attribute *f2fs_attrs[] = {
 	ATTR_LIST(atgc_candidate_count),
 	ATTR_LIST(atgc_age_weight),
 	ATTR_LIST(atgc_age_threshold),
+	ATTR_LIST(seq_file_ra_mul),
+	ATTR_LIST(gc_segment_mode),
+	ATTR_LIST(gc_reclaimed_segments),
 	NULL,
 };
 ATTRIBUTE_GROUPS(f2fs);
@@ -1036,101 +1083,6 @@ static int __maybe_unused segment_bits_seq_show(struct seq_file *seq,
 	return 0;
 }
 
-void f2fs_record_iostat(struct f2fs_sb_info *sbi)
-{
-	unsigned long long iostat_diff[NR_IO_TYPE];
-	int i;
-
-	if (time_is_after_jiffies(sbi->iostat_next_period))
-		return;
-
-	/* Need double check under the lock */
-	spin_lock(&sbi->iostat_lock);
-	if (time_is_after_jiffies(sbi->iostat_next_period)) {
-		spin_unlock(&sbi->iostat_lock);
-		return;
-	}
-	sbi->iostat_next_period = jiffies +
-				msecs_to_jiffies(sbi->iostat_period_ms);
-
-	for (i = 0; i < NR_IO_TYPE; i++) {
-		iostat_diff[i] = sbi->rw_iostat[i] -
-				sbi->prev_rw_iostat[i];
-		sbi->prev_rw_iostat[i] = sbi->rw_iostat[i];
-	}
-	spin_unlock(&sbi->iostat_lock);
-
-	trace_f2fs_iostat(sbi, iostat_diff);
-}
-
-static int __maybe_unused iostat_info_seq_show(struct seq_file *seq,
-					       void *offset)
-{
-	struct super_block *sb = seq->private;
-	struct f2fs_sb_info *sbi = F2FS_SB(sb);
-	time64_t now = ktime_get_real_seconds();
-
-	if (!sbi->iostat_enable)
-		return 0;
-
-	seq_printf(seq, "time:		%-16llu\n", now);
-
-	/* print app write IOs */
-	seq_puts(seq, "[WRITE]\n");
-	seq_printf(seq, "app buffered:	%-16llu\n",
-				sbi->rw_iostat[APP_BUFFERED_IO]);
-	seq_printf(seq, "app direct:	%-16llu\n",
-				sbi->rw_iostat[APP_DIRECT_IO]);
-	seq_printf(seq, "app mapped:	%-16llu\n",
-				sbi->rw_iostat[APP_MAPPED_IO]);
-
-	/* print fs write IOs */
-	seq_printf(seq, "fs data:	%-16llu\n",
-				sbi->rw_iostat[FS_DATA_IO]);
-	seq_printf(seq, "fs node:	%-16llu\n",
-				sbi->rw_iostat[FS_NODE_IO]);
-	seq_printf(seq, "fs meta:	%-16llu\n",
-				sbi->rw_iostat[FS_META_IO]);
-	seq_printf(seq, "fs gc data:	%-16llu\n",
-				sbi->rw_iostat[FS_GC_DATA_IO]);
-	seq_printf(seq, "fs gc node:	%-16llu\n",
-				sbi->rw_iostat[FS_GC_NODE_IO]);
-	seq_printf(seq, "fs cp data:	%-16llu\n",
-				sbi->rw_iostat[FS_CP_DATA_IO]);
-	seq_printf(seq, "fs cp node:	%-16llu\n",
-				sbi->rw_iostat[FS_CP_NODE_IO]);
-	seq_printf(seq, "fs cp meta:	%-16llu\n",
-				sbi->rw_iostat[FS_CP_META_IO]);
-
-	/* print app read IOs */
-	seq_puts(seq, "[READ]\n");
-	seq_printf(seq, "app buffered:	%-16llu\n",
-				sbi->rw_iostat[APP_BUFFERED_READ_IO]);
-	seq_printf(seq, "app direct:	%-16llu\n",
-				sbi->rw_iostat[APP_DIRECT_READ_IO]);
-	seq_printf(seq, "app mapped:	%-16llu\n",
-				sbi->rw_iostat[APP_MAPPED_READ_IO]);
-
-	/* print fs read IOs */
-	seq_printf(seq, "fs data:	%-16llu\n",
-				sbi->rw_iostat[FS_DATA_READ_IO]);
-	seq_printf(seq, "fs gc data:	%-16llu\n",
-				sbi->rw_iostat[FS_GDATA_READ_IO]);
-	seq_printf(seq, "fs compr_data:	%-16llu\n",
-				sbi->rw_iostat[FS_CDATA_READ_IO]);
-	seq_printf(seq, "fs node:	%-16llu\n",
-				sbi->rw_iostat[FS_NODE_READ_IO]);
-	seq_printf(seq, "fs meta:	%-16llu\n",
-				sbi->rw_iostat[FS_META_READ_IO]);
-
-	/* print other IOs */
-	seq_puts(seq, "[OTHER]\n");
-	seq_printf(seq, "fs discard:	%-16llu\n",
-				sbi->rw_iostat[FS_DISCARD]);
-
-	return 0;
-}
-
 static int __maybe_unused victim_bits_seq_show(struct seq_file *seq,
 						void *offset)
 {
@@ -1213,13 +1165,15 @@ int f2fs_register_sysfs(struct f2fs_sb_info *sbi)
 		sbi->s_proc = proc_mkdir(sb->s_id, f2fs_proc_root);
 
 	if (sbi->s_proc) {
-		proc_create_single_data("segment_info", S_IRUGO, sbi->s_proc,
+		proc_create_single_data("segment_info", 0444, sbi->s_proc,
 				segment_info_seq_show, sb);
-		proc_create_single_data("segment_bits", S_IRUGO, sbi->s_proc,
+		proc_create_single_data("segment_bits", 0444, sbi->s_proc,
 				segment_bits_seq_show, sb);
-		proc_create_single_data("iostat_info", S_IRUGO, sbi->s_proc,
+#ifdef CONFIG_F2FS_IOSTAT
+		proc_create_single_data("iostat_info", 0444, sbi->s_proc,
 				iostat_info_seq_show, sb);
-		proc_create_single_data("victim_bits", S_IRUGO, sbi->s_proc,
+#endif
+		proc_create_single_data("victim_bits", 0444, sbi->s_proc,
 				victim_bits_seq_show, sb);
 	}
 	return 0;
@@ -1238,7 +1192,9 @@ put_sb_kobj:
 void f2fs_unregister_sysfs(struct f2fs_sb_info *sbi)
 {
 	if (sbi->s_proc) {
+#ifdef CONFIG_F2FS_IOSTAT
 		remove_proc_entry("iostat_info", sbi->s_proc);
+#endif
 		remove_proc_entry("segment_info", sbi->s_proc);
 		remove_proc_entry("segment_bits", sbi->s_proc);
 		remove_proc_entry("victim_bits", sbi->s_proc);
diff --git a/fs/f2fs/xattr.c b/fs/f2fs/xattr.c
index c8f34decbf8e..1d2d29dcd41c 100644
--- a/fs/f2fs/xattr.c
+++ b/fs/f2fs/xattr.c
@@ -27,7 +27,8 @@ static void *xattr_alloc(struct f2fs_sb_info *sbi, int size, bool *is_inline)
 {
 	if (likely(size == sbi->inline_xattr_slab_size)) {
 		*is_inline = true;
-		return kmem_cache_zalloc(sbi->inline_xattr_slab, GFP_NOFS);
+		return f2fs_kmem_cache_alloc(sbi->inline_xattr_slab,
+					GFP_F2FS_ZERO, false, sbi);
 	}
 	*is_inline = false;
 	return f2fs_kzalloc(sbi, size, GFP_NOFS);
diff --git a/fs/fat/.kunitconfig b/fs/fat/.kunitconfig
new file mode 100644
index 000000000000..0a6971dbeccb
--- /dev/null
+++ b/fs/fat/.kunitconfig
@@ -0,0 +1,5 @@
+CONFIG_KUNIT=y
+CONFIG_FAT_FS=y
+CONFIG_MSDOS_FS=y
+CONFIG_VFAT_FS=y
+CONFIG_FAT_KUNIT_TEST=y
diff --git a/fs/fat/Kconfig b/fs/fat/Kconfig
index 66532a71e8fd..238cc55f84c4 100644
--- a/fs/fat/Kconfig
+++ b/fs/fat/Kconfig
@@ -77,7 +77,7 @@ config VFAT_FS
 
 config FAT_DEFAULT_CODEPAGE
 	int "Default codepage for FAT"
-	depends on MSDOS_FS || VFAT_FS
+	depends on FAT_FS
 	default 437
 	help
 	  This option should be set to the codepage of your FAT filesystems.
@@ -115,3 +115,15 @@ config FAT_DEFAULT_UTF8
 	  Say Y if you use UTF-8 encoding for file names, N otherwise.
 
 	  See <file:Documentation/filesystems/vfat.rst> for more information.
+
+config FAT_KUNIT_TEST
+	tristate "Unit Tests for FAT filesystems" if !KUNIT_ALL_TESTS
+	depends on KUNIT && FAT_FS
+	default KUNIT_ALL_TESTS
+	help
+	  This builds the FAT KUnit tests
+
+	  For more information on KUnit and unit tests in general, please refer
+	  to the KUnit documentation in Documentation/dev-tools/kunit
+
+	  If unsure, say N
diff --git a/fs/fat/Makefile b/fs/fat/Makefile
index 70645ce2f7fc..2b034112690d 100644
--- a/fs/fat/Makefile
+++ b/fs/fat/Makefile
@@ -10,3 +10,5 @@ obj-$(CONFIG_MSDOS_FS) += msdos.o
 fat-y := cache.o dir.o fatent.o file.o inode.o misc.o nfs.o
 vfat-y := namei_vfat.o
 msdos-y := namei_msdos.o
+
+obj-$(CONFIG_FAT_KUNIT_TEST) += fat_test.o
diff --git a/fs/fat/fat_test.c b/fs/fat/fat_test.c
new file mode 100644
index 000000000000..2dab4ca1d0d8
--- /dev/null
+++ b/fs/fat/fat_test.c
@@ -0,0 +1,196 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * KUnit tests for FAT filesystems.
+ *
+ * Copyright (C) 2020 Google LLC.
+ * Author: David Gow <davidgow@google.com>
+ */
+
+#include <kunit/test.h>
+
+#include "fat.h"
+
+static void fat_checksum_test(struct kunit *test)
+{
+	/* With no extension. */
+	KUNIT_EXPECT_EQ(test, fat_checksum("VMLINUX    "), (u8)44);
+	/* With 3-letter extension. */
+	KUNIT_EXPECT_EQ(test, fat_checksum("README  TXT"), (u8)115);
+	/* With short (1-letter) extension. */
+	KUNIT_EXPECT_EQ(test, fat_checksum("ABCDEFGHA  "), (u8)98);
+}
+
+struct fat_timestamp_testcase {
+	const char *name;
+	struct timespec64 ts;
+	__le16 time;
+	__le16 date;
+	u8 cs;
+	int time_offset;
+};
+
+static struct fat_timestamp_testcase time_test_cases[] = {
+	{
+		.name = "Earliest possible UTC (1980-01-01 00:00:00)",
+		.ts = {.tv_sec = 315532800LL, .tv_nsec = 0L},
+		.time = cpu_to_le16(0),
+		.date = cpu_to_le16(33),
+		.cs = 0,
+		.time_offset = 0,
+	},
+	{
+		.name = "Latest possible UTC (2107-12-31 23:59:58)",
+		.ts = {.tv_sec = 4354819198LL, .tv_nsec = 0L},
+		.time = cpu_to_le16(49021),
+		.date = cpu_to_le16(65439),
+		.cs = 0,
+		.time_offset = 0,
+	},
+	{
+		.name = "Earliest possible (UTC-11) (== 1979-12-31 13:00:00 UTC)",
+		.ts = {.tv_sec = 315493200LL, .tv_nsec = 0L},
+		.time = cpu_to_le16(0),
+		.date = cpu_to_le16(33),
+		.cs = 0,
+		.time_offset = 11 * 60,
+	},
+	{
+		.name = "Latest possible (UTC+11) (== 2108-01-01 10:59:58 UTC)",
+		.ts = {.tv_sec = 4354858798LL, .tv_nsec = 0L},
+		.time = cpu_to_le16(49021),
+		.date = cpu_to_le16(65439),
+		.cs = 0,
+		.time_offset = -11 * 60,
+	},
+	{
+		.name = "Leap Day / Year (1996-02-29 00:00:00)",
+		.ts = {.tv_sec = 825552000LL, .tv_nsec = 0L},
+		.time = cpu_to_le16(0),
+		.date = cpu_to_le16(8285),
+		.cs = 0,
+		.time_offset = 0,
+	},
+	{
+		.name = "Year 2000 is leap year (2000-02-29 00:00:00)",
+		.ts = {.tv_sec = 951782400LL, .tv_nsec = 0L},
+		.time = cpu_to_le16(0),
+		.date = cpu_to_le16(10333),
+		.cs = 0,
+		.time_offset = 0,
+	},
+	{
+		.name = "Year 2100 not leap year (2100-03-01 00:00:00)",
+		.ts = {.tv_sec = 4107542400LL, .tv_nsec = 0L},
+		.time = cpu_to_le16(0),
+		.date = cpu_to_le16(61537),
+		.cs = 0,
+		.time_offset = 0,
+	},
+	{
+		.name = "Leap year + timezone UTC+1 (== 2004-02-29 00:30:00 UTC)",
+		.ts = {.tv_sec = 1078014600LL, .tv_nsec = 0L},
+		.time = cpu_to_le16(48064),
+		.date = cpu_to_le16(12380),
+		.cs = 0,
+		.time_offset = -60,
+	},
+	{
+		.name = "Leap year + timezone UTC-1 (== 2004-02-29 23:30:00 UTC)",
+		.ts = {.tv_sec = 1078097400LL, .tv_nsec = 0L},
+		.time = cpu_to_le16(960),
+		.date = cpu_to_le16(12385),
+		.cs = 0,
+		.time_offset = 60,
+	},
+	{
+		.name = "VFAT odd-second resolution (1999-12-31 23:59:59)",
+		.ts = {.tv_sec = 946684799LL, .tv_nsec = 0L},
+		.time = cpu_to_le16(49021),
+		.date = cpu_to_le16(10143),
+		.cs = 100,
+		.time_offset = 0,
+	},
+	{
+		.name = "VFAT 10ms resolution (1980-01-01 00:00:00:0010)",
+		.ts = {.tv_sec = 315532800LL, .tv_nsec = 10000000L},
+		.time = cpu_to_le16(0),
+		.date = cpu_to_le16(33),
+		.cs = 1,
+		.time_offset = 0,
+	},
+};
+
+static void time_testcase_desc(struct fat_timestamp_testcase *t,
+			       char *desc)
+{
+	strscpy(desc, t->name, KUNIT_PARAM_DESC_SIZE);
+}
+
+KUNIT_ARRAY_PARAM(fat_time, time_test_cases, time_testcase_desc);
+
+static void fat_time_fat2unix_test(struct kunit *test)
+{
+	static struct msdos_sb_info fake_sb;
+	struct timespec64 ts;
+	struct fat_timestamp_testcase *testcase =
+		(struct fat_timestamp_testcase *)test->param_value;
+
+	fake_sb.options.tz_set = 1;
+	fake_sb.options.time_offset = testcase->time_offset;
+
+	fat_time_fat2unix(&fake_sb, &ts,
+			  testcase->time,
+			  testcase->date,
+			  testcase->cs);
+	KUNIT_EXPECT_EQ_MSG(test,
+			    testcase->ts.tv_sec,
+			    ts.tv_sec,
+			    "Timestamp mismatch (seconds)\n");
+	KUNIT_EXPECT_EQ_MSG(test,
+			    testcase->ts.tv_nsec,
+			    ts.tv_nsec,
+			    "Timestamp mismatch (nanoseconds)\n");
+}
+
+static void fat_time_unix2fat_test(struct kunit *test)
+{
+	static struct msdos_sb_info fake_sb;
+	__le16 date, time;
+	u8 cs;
+	struct fat_timestamp_testcase *testcase =
+		(struct fat_timestamp_testcase *)test->param_value;
+
+	fake_sb.options.tz_set = 1;
+	fake_sb.options.time_offset = testcase->time_offset;
+
+	fat_time_unix2fat(&fake_sb, &testcase->ts,
+			  &time, &date, &cs);
+	KUNIT_EXPECT_EQ_MSG(test,
+			    le16_to_cpu(testcase->time),
+			    le16_to_cpu(time),
+			    "Time mismatch\n");
+	KUNIT_EXPECT_EQ_MSG(test,
+			    le16_to_cpu(testcase->date),
+			    le16_to_cpu(date),
+			    "Date mismatch\n");
+	KUNIT_EXPECT_EQ_MSG(test,
+			    testcase->cs,
+			    cs,
+			    "Centisecond mismatch\n");
+}
+
+static struct kunit_case fat_test_cases[] = {
+	KUNIT_CASE(fat_checksum_test),
+	KUNIT_CASE_PARAM(fat_time_fat2unix_test, fat_time_gen_params),
+	KUNIT_CASE_PARAM(fat_time_unix2fat_test, fat_time_gen_params),
+	{},
+};
+
+static struct kunit_suite fat_test_suite = {
+	.name = "fat_test",
+	.test_cases = fat_test_cases,
+};
+
+kunit_test_suites(&fat_test_suite);
+
+MODULE_LICENSE("GPL v2");
diff --git a/fs/fat/misc.c b/fs/fat/misc.c
index 18a50a46b57f..91ca3c304211 100644
--- a/fs/fat/misc.c
+++ b/fs/fat/misc.c
@@ -230,6 +230,9 @@ void fat_time_fat2unix(struct msdos_sb_info *sbi, struct timespec64 *ts,
 	}
 }
 
+/* Export fat_time_fat2unix() for the fat_test KUnit tests. */
+EXPORT_SYMBOL_GPL(fat_time_fat2unix);
+
 /* Convert linear UNIX date to a FAT time/date pair. */
 void fat_time_unix2fat(struct msdos_sb_info *sbi, struct timespec64 *ts,
 		       __le16 *time, __le16 *date, u8 *time_cs)
diff --git a/fs/fcntl.c b/fs/fcntl.c
index 68added37c15..9c6c6a3e2de5 100644
--- a/fs/fcntl.c
+++ b/fs/fcntl.c
@@ -1051,7 +1051,8 @@ static int __init fcntl_init(void)
 			__FMODE_EXEC | __FMODE_NONOTIFY));
 
 	fasync_cache = kmem_cache_create("fasync_cache",
-		sizeof(struct fasync_struct), 0, SLAB_PANIC, NULL);
+					 sizeof(struct fasync_struct), 0,
+					 SLAB_PANIC | SLAB_ACCOUNT, NULL);
 	return 0;
 }
 
diff --git a/fs/file.c b/fs/file.c
index d8afa8266859..8627dacfc424 100644
--- a/fs/file.c
+++ b/fs/file.c
@@ -1150,6 +1150,12 @@ int receive_fd_replace(int new_fd, struct file *file, unsigned int o_flags)
 	return new_fd;
 }
 
+int receive_fd(struct file *file, unsigned int o_flags)
+{
+	return __receive_fd(file, NULL, o_flags);
+}
+EXPORT_SYMBOL_GPL(receive_fd);
+
 static int ksys_dup3(unsigned int oldfd, unsigned int newfd, int flags)
 {
 	int err = -EBADF;
diff --git a/fs/filesystems.c b/fs/filesystems.c
index 90b8d879fbaf..58b9067b2391 100644
--- a/fs/filesystems.c
+++ b/fs/filesystems.c
@@ -209,21 +209,28 @@ SYSCALL_DEFINE3(sysfs, int, option, unsigned long, arg1, unsigned long, arg2)
 }
 #endif
 
-int __init get_filesystem_list(char *buf)
+int __init list_bdev_fs_names(char *buf, size_t size)
 {
-	int len = 0;
-	struct file_system_type * tmp;
+	struct file_system_type *p;
+	size_t len;
+	int count = 0;
 
 	read_lock(&file_systems_lock);
-	tmp = file_systems;
-	while (tmp && len < PAGE_SIZE - 80) {
-		len += sprintf(buf+len, "%s\t%s\n",
-			(tmp->fs_flags & FS_REQUIRES_DEV) ? "" : "nodev",
-			tmp->name);
-		tmp = tmp->next;
+	for (p = file_systems; p; p = p->next) {
+		if (!(p->fs_flags & FS_REQUIRES_DEV))
+			continue;
+		len = strlen(p->name) + 1;
+		if (len > size) {
+			pr_warn("%s: truncating file system list\n", __func__);
+			break;
+		}
+		memcpy(buf, p->name, len);
+		buf += len;
+		size -= len;
+		count++;
 	}
 	read_unlock(&file_systems_lock);
-	return len;
+	return count;
 }
 
 #ifdef CONFIG_PROC_FS
diff --git a/fs/fs-writeback.c b/fs/fs-writeback.c
index eb57dade6076..81ec192ce067 100644
--- a/fs/fs-writeback.c
+++ b/fs/fs-writeback.c
@@ -406,6 +406,11 @@ static bool inode_do_switch_wbs(struct inode *inode,
 		inc_wb_stat(new_wb, WB_WRITEBACK);
 	}
 
+	if (mapping_tagged(mapping, PAGECACHE_TAG_WRITEBACK)) {
+		atomic_dec(&old_wb->writeback_inodes);
+		atomic_inc(&new_wb->writeback_inodes);
+	}
+
 	wb_get(new_wb);
 
 	/*
@@ -1034,20 +1039,20 @@ restart:
  * cgroup_writeback_by_id - initiate cgroup writeback from bdi and memcg IDs
  * @bdi_id: target bdi id
  * @memcg_id: target memcg css id
- * @nr: number of pages to write, 0 for best-effort dirty flushing
  * @reason: reason why some writeback work initiated
  * @done: target wb_completion
  *
  * Initiate flush of the bdi_writeback identified by @bdi_id and @memcg_id
  * with the specified parameters.
  */
-int cgroup_writeback_by_id(u64 bdi_id, int memcg_id, unsigned long nr,
+int cgroup_writeback_by_id(u64 bdi_id, int memcg_id,
 			   enum wb_reason reason, struct wb_completion *done)
 {
 	struct backing_dev_info *bdi;
 	struct cgroup_subsys_state *memcg_css;
 	struct bdi_writeback *wb;
 	struct wb_writeback_work *work;
+	unsigned long dirty;
 	int ret;
 
 	/* lookup bdi and memcg */
@@ -1076,24 +1081,22 @@ int cgroup_writeback_by_id(u64 bdi_id, int memcg_id, unsigned long nr,
 	}
 
 	/*
-	 * If @nr is zero, the caller is attempting to write out most of
+	 * The caller is attempting to write out most of
 	 * the currently dirty pages.  Let's take the current dirty page
 	 * count and inflate it by 25% which should be large enough to
 	 * flush out most dirty pages while avoiding getting livelocked by
 	 * concurrent dirtiers.
+	 *
+	 * BTW the memcg stats are flushed periodically and this is best-effort
+	 * estimation, so some potential error is ok.
 	 */
-	if (!nr) {
-		unsigned long filepages, headroom, dirty, writeback;
-
-		mem_cgroup_wb_stats(wb, &filepages, &headroom, &dirty,
-				      &writeback);
-		nr = dirty * 10 / 8;
-	}
+	dirty = memcg_page_state(mem_cgroup_from_css(memcg_css), NR_FILE_DIRTY);
+	dirty = dirty * 10 / 8;
 
 	/* issue the writeback work */
 	work = kzalloc(sizeof(*work), GFP_NOWAIT | __GFP_NOWARN);
 	if (work) {
-		work->nr_pages = nr;
+		work->nr_pages = dirty;
 		work->sync_mode = WB_SYNC_NONE;
 		work->range_cyclic = 1;
 		work->reason = reason;
@@ -1999,7 +2002,6 @@ static long writeback_inodes_wb(struct bdi_writeback *wb, long nr_pages,
 static long wb_writeback(struct bdi_writeback *wb,
 			 struct wb_writeback_work *work)
 {
-	unsigned long wb_start = jiffies;
 	long nr_pages = work->nr_pages;
 	unsigned long dirtied_before = jiffies;
 	struct inode *inode;
@@ -2053,8 +2055,6 @@ static long wb_writeback(struct bdi_writeback *wb,
 			progress = __writeback_inodes_wb(wb, work);
 		trace_writeback_written(wb, work);
 
-		wb_update_bandwidth(wb, wb_start);
-
 		/*
 		 * Did we write something? Try for more
 		 *
diff --git a/fs/fs_context.c b/fs/fs_context.c
index de1985eae535..b7e43a780a62 100644
--- a/fs/fs_context.c
+++ b/fs/fs_context.c
@@ -254,7 +254,7 @@ static struct fs_context *alloc_fs_context(struct file_system_type *fs_type,
 	struct fs_context *fc;
 	int ret = -ENOMEM;
 
-	fc = kzalloc(sizeof(struct fs_context), GFP_KERNEL);
+	fc = kzalloc(sizeof(struct fs_context), GFP_KERNEL_ACCOUNT);
 	if (!fc)
 		return ERR_PTR(-ENOMEM);
 
@@ -649,7 +649,7 @@ const struct fs_context_operations legacy_fs_context_ops = {
  */
 static int legacy_init_fs_context(struct fs_context *fc)
 {
-	fc->fs_private = kzalloc(sizeof(struct legacy_fs_context), GFP_KERNEL);
+	fc->fs_private = kzalloc(sizeof(struct legacy_fs_context), GFP_KERNEL_ACCOUNT);
 	if (!fc->fs_private)
 		return -ENOMEM;
 	fc->ops = &legacy_fs_context_ops;
diff --git a/fs/fs_parser.c b/fs/fs_parser.c
index 980d44fd3a36..3df07c0e32b3 100644
--- a/fs/fs_parser.c
+++ b/fs/fs_parser.c
@@ -165,7 +165,6 @@ int fs_lookup_param(struct fs_context *fc,
 		return invalf(fc, "%s: not usable as path", param->key);
 	}
 
-	f->refcnt++; /* filename_lookup() drops our ref. */
 	ret = filename_lookup(param->dirfd, f, flags, _path, NULL);
 	if (ret < 0) {
 		errorf(fc, "%s: Lookup failure for '%s'", param->key, f->name);
diff --git a/fs/fscache/Kconfig b/fs/fscache/Kconfig
index 427efa73b9bd..b313a978ae0a 100644
--- a/fs/fscache/Kconfig
+++ b/fs/fscache/Kconfig
@@ -14,6 +14,7 @@ config FSCACHE
 config FSCACHE_STATS
 	bool "Gather statistical information on local caching"
 	depends on FSCACHE && PROC_FS
+	select NETFS_STATS
 	help
 	  This option causes statistical information to be gathered on local
 	  caching and exported through file:
@@ -28,23 +29,6 @@ config FSCACHE_STATS
 
 	  See Documentation/filesystems/caching/fscache.rst for more information.
 
-config FSCACHE_HISTOGRAM
-	bool "Gather latency information on local caching"
-	depends on FSCACHE && PROC_FS
-	help
-	  This option causes latency information to be gathered on local
-	  caching and exported through file:
-
-		/proc/fs/fscache/histogram
-
-	  The generation of this histogram adds a certain amount of overhead to
-	  execution as there are a number of points at which data is gathered,
-	  and on a multi-CPU system these may be on cachelines that keep
-	  bouncing between CPUs.  On the other hand, the histogram may be
-	  useful for debugging purposes.  Saying 'N' here is recommended.
-
-	  See Documentation/filesystems/caching/fscache.rst for more information.
-
 config FSCACHE_DEBUG
 	bool "Debug FS-Cache"
 	depends on FSCACHE
@@ -54,10 +38,3 @@ config FSCACHE_DEBUG
 	  enabled by setting bits in /sys/modules/fscache/parameter/debug.
 
 	  See Documentation/filesystems/caching/fscache.rst for more information.
-
-config FSCACHE_OBJECT_LIST
-	bool "Maintain global object list for debugging purposes"
-	depends on FSCACHE && PROC_FS
-	help
-	  Maintain a global list of active fscache objects that can be
-	  retrieved through /proc/fs/fscache/objects for debugging purposes
diff --git a/fs/fscache/Makefile b/fs/fscache/Makefile
index 3b2ffa93ac18..03a871d689bb 100644
--- a/fs/fscache/Makefile
+++ b/fs/fscache/Makefile
@@ -16,7 +16,5 @@ fscache-y := \
 
 fscache-$(CONFIG_PROC_FS) += proc.o
 fscache-$(CONFIG_FSCACHE_STATS) += stats.o
-fscache-$(CONFIG_FSCACHE_HISTOGRAM) += histogram.o
-fscache-$(CONFIG_FSCACHE_OBJECT_LIST) += object-list.o
 
 obj-$(CONFIG_FSCACHE) := fscache.o
diff --git a/fs/fscache/cache.c b/fs/fscache/cache.c
index fcc136361415..bd4f44c1cce0 100644
--- a/fs/fscache/cache.c
+++ b/fs/fscache/cache.c
@@ -116,7 +116,7 @@ struct fscache_cache *fscache_select_cache_for_object(
 			cache = NULL;
 
 		spin_unlock(&cookie->lock);
-		_leave(" = %p [parent]", cache);
+		_leave(" = %s [parent]", cache ? cache->tag->name : "NULL");
 		return cache;
 	}
 
@@ -152,14 +152,14 @@ struct fscache_cache *fscache_select_cache_for_object(
 	if (test_bit(FSCACHE_IOERROR, &tag->cache->flags))
 		return NULL;
 
-	_leave(" = %p [specific]", tag->cache);
+	_leave(" = %s [specific]", tag->name);
 	return tag->cache;
 
 no_preference:
 	/* netfs has no preference - just select first cache */
 	cache = list_entry(fscache_cache_list.next,
 			   struct fscache_cache, link);
-	_leave(" = %p [first]", cache);
+	_leave(" = %s [first]", cache->tag->name);
 	return cache;
 }
 
@@ -261,7 +261,6 @@ int fscache_add_cache(struct fscache_cache *cache,
 	spin_lock(&cache->object_list_lock);
 	list_add_tail(&ifsdef->cache_link, &cache->object_list);
 	spin_unlock(&cache->object_list_lock);
-	fscache_objlist_add(ifsdef);
 
 	/* add the cache's netfs definition index object to the top level index
 	 * cookie as a known backing object */
@@ -270,7 +269,7 @@ int fscache_add_cache(struct fscache_cache *cache,
 	hlist_add_head(&ifsdef->cookie_link,
 		       &fscache_fsdef_index.backing_objects);
 
-	atomic_inc(&fscache_fsdef_index.usage);
+	refcount_inc(&fscache_fsdef_index.ref);
 
 	/* done */
 	spin_unlock(&fscache_fsdef_index.lock);
@@ -335,7 +334,7 @@ static void fscache_withdraw_all_objects(struct fscache_cache *cache,
 					    struct fscache_object, cache_link);
 			list_move_tail(&object->cache_link, dying_objects);
 
-			_debug("withdraw %p", object->cookie);
+			_debug("withdraw %x", object->cookie->debug_id);
 
 			/* This must be done under object_list_lock to prevent
 			 * a race with fscache_drop_object().
diff --git a/fs/fscache/cookie.c b/fs/fscache/cookie.c
index 751bc5b1cddf..cd42be646ed3 100644
--- a/fs/fscache/cookie.c
+++ b/fs/fscache/cookie.c
@@ -19,6 +19,8 @@ static atomic_t fscache_object_debug_id = ATOMIC_INIT(0);
 
 #define fscache_cookie_hash_shift 15
 static struct hlist_bl_head fscache_cookie_hash[1 << fscache_cookie_hash_shift];
+static LIST_HEAD(fscache_cookies);
+static DEFINE_RWLOCK(fscache_cookies_lock);
 
 static int fscache_acquire_non_index_cookie(struct fscache_cookie *cookie,
 					    loff_t object_size);
@@ -29,21 +31,29 @@ static int fscache_attach_object(struct fscache_cookie *cookie,
 
 static void fscache_print_cookie(struct fscache_cookie *cookie, char prefix)
 {
-	struct hlist_node *object;
+	struct fscache_object *object;
+	struct hlist_node *o;
 	const u8 *k;
 	unsigned loop;
 
-	pr_err("%c-cookie c=%p [p=%p fl=%lx nc=%u na=%u]\n",
-	       prefix, cookie, cookie->parent, cookie->flags,
+	pr_err("%c-cookie c=%08x [p=%08x fl=%lx nc=%u na=%u]\n",
+	       prefix,
+	       cookie->debug_id,
+	       cookie->parent ? cookie->parent->debug_id : 0,
+	       cookie->flags,
 	       atomic_read(&cookie->n_children),
 	       atomic_read(&cookie->n_active));
-	pr_err("%c-cookie d=%p n=%p\n",
-	       prefix, cookie->def, cookie->netfs_data);
-
-	object = READ_ONCE(cookie->backing_objects.first);
-	if (object)
-		pr_err("%c-cookie o=%p\n",
-		       prefix, hlist_entry(object, struct fscache_object, cookie_link));
+	pr_err("%c-cookie d=%p{%s} n=%p\n",
+	       prefix,
+	       cookie->def,
+	       cookie->def ? cookie->def->name : "?",
+	       cookie->netfs_data);
+
+	o = READ_ONCE(cookie->backing_objects.first);
+	if (o) {
+		object = hlist_entry(o, struct fscache_object, cookie_link);
+		pr_err("%c-cookie o=%u\n", prefix, object->debug_id);
+	}
 
 	pr_err("%c-key=[%u] '", prefix, cookie->key_len);
 	k = (cookie->key_len <= sizeof(cookie->inline_key)) ?
@@ -57,6 +67,9 @@ void fscache_free_cookie(struct fscache_cookie *cookie)
 {
 	if (cookie) {
 		BUG_ON(!hlist_empty(&cookie->backing_objects));
+		write_lock(&fscache_cookies_lock);
+		list_del(&cookie->proc_link);
+		write_unlock(&fscache_cookies_lock);
 		if (cookie->aux_len > sizeof(cookie->inline_aux))
 			kfree(cookie->aux);
 		if (cookie->key_len > sizeof(cookie->inline_key))
@@ -74,10 +87,8 @@ void fscache_free_cookie(struct fscache_cookie *cookie)
 static int fscache_set_key(struct fscache_cookie *cookie,
 			   const void *index_key, size_t index_key_len)
 {
-	unsigned long long h;
 	u32 *buf;
 	int bufs;
-	int i;
 
 	bufs = DIV_ROUND_UP(index_key_len, sizeof(*buf));
 
@@ -91,17 +102,7 @@ static int fscache_set_key(struct fscache_cookie *cookie,
 	}
 
 	memcpy(buf, index_key, index_key_len);
-
-	/* Calculate a hash and combine this with the length in the first word
-	 * or first half word
-	 */
-	h = (unsigned long)cookie->parent;
-	h += index_key_len + cookie->type;
-
-	for (i = 0; i < bufs; i++)
-		h += buf[i];
-
-	cookie->key_hash = h ^ (h >> 32);
+	cookie->key_hash = fscache_hash(0, buf, bufs);
 	return 0;
 }
 
@@ -129,6 +130,8 @@ static long fscache_compare_cookie(const struct fscache_cookie *a,
 	return memcmp(ka, kb, a->key_len);
 }
 
+static atomic_t fscache_cookie_debug_id = ATOMIC_INIT(1);
+
 /*
  * Allocate a cookie.
  */
@@ -161,8 +164,9 @@ struct fscache_cookie *fscache_alloc_cookie(
 			goto nomem;
 	}
 
-	atomic_set(&cookie->usage, 1);
+	refcount_set(&cookie->ref, 1);
 	atomic_set(&cookie->n_children, 0);
+	cookie->debug_id = atomic_inc_return(&fscache_cookie_debug_id);
 
 	/* We keep the active count elevated until relinquishment to prevent an
 	 * attempt to wake up every time the object operations queue quiesces.
@@ -181,6 +185,10 @@ struct fscache_cookie *fscache_alloc_cookie(
 	/* radix tree insertion won't use the preallocation pool unless it's
 	 * told it may not wait */
 	INIT_RADIX_TREE(&cookie->stores, GFP_NOFS & ~__GFP_DIRECT_RECLAIM);
+
+	write_lock(&fscache_cookies_lock);
+	list_add_tail(&cookie->proc_link, &fscache_cookies);
+	write_unlock(&fscache_cookies_lock);
 	return cookie;
 
 nomem:
@@ -217,8 +225,8 @@ struct fscache_cookie *fscache_hash_cookie(struct fscache_cookie *candidate)
 
 collision:
 	if (test_and_set_bit(FSCACHE_COOKIE_ACQUIRED, &cursor->flags)) {
-		trace_fscache_cookie(cursor, fscache_cookie_collision,
-				     atomic_read(&cursor->usage));
+		trace_fscache_cookie(cursor->debug_id, refcount_read(&cursor->ref),
+				     fscache_cookie_collision);
 		pr_err("Duplicate cookie detected\n");
 		fscache_print_cookie(cursor, 'O');
 		fscache_print_cookie(candidate, 'N');
@@ -297,7 +305,8 @@ struct fscache_cookie *__fscache_acquire_cookie(
 
 	cookie = fscache_hash_cookie(candidate);
 	if (!cookie) {
-		trace_fscache_cookie(candidate, fscache_cookie_discard, 1);
+		trace_fscache_cookie(candidate->debug_id, 1,
+				     fscache_cookie_discard);
 		goto out;
 	}
 
@@ -355,7 +364,7 @@ void __fscache_enable_cookie(struct fscache_cookie *cookie,
 			     bool (*can_enable)(void *data),
 			     void *data)
 {
-	_enter("%p", cookie);
+	_enter("%x", cookie->debug_id);
 
 	trace_fscache_enable(cookie);
 
@@ -452,10 +461,8 @@ static int fscache_acquire_non_index_cookie(struct fscache_cookie *cookie,
 
 	/* we may be required to wait for lookup to complete at this point */
 	if (!fscache_defer_lookup) {
-		_debug("non-deferred lookup %p", &cookie->flags);
 		wait_on_bit(&cookie->flags, FSCACHE_COOKIE_LOOKING_UP,
 			    TASK_UNINTERRUPTIBLE);
-		_debug("complete");
 		if (test_bit(FSCACHE_COOKIE_UNAVAILABLE, &cookie->flags))
 			goto unavailable;
 	}
@@ -480,7 +487,7 @@ static int fscache_alloc_object(struct fscache_cache *cache,
 	struct fscache_object *object;
 	int ret;
 
-	_enter("%p,%p{%s}", cache, cookie, cookie->def->name);
+	_enter("%s,%x{%s}", cache->tag->name, cookie->debug_id, cookie->def->name);
 
 	spin_lock(&cookie->lock);
 	hlist_for_each_entry(object, &cookie->backing_objects,
@@ -600,8 +607,6 @@ static int fscache_attach_object(struct fscache_cookie *cookie,
 
 	/* Attach to the cookie.  The object already has a ref on it. */
 	hlist_add_head(&object->cookie_link, &cookie->backing_objects);
-
-	fscache_objlist_add(object);
 	ret = 0;
 
 cant_attach_object:
@@ -658,7 +663,7 @@ EXPORT_SYMBOL(__fscache_invalidate);
  */
 void __fscache_wait_on_invalidate(struct fscache_cookie *cookie)
 {
-	_enter("%p", cookie);
+	_enter("%x", cookie->debug_id);
 
 	wait_on_bit(&cookie->flags, FSCACHE_COOKIE_INVALIDATING,
 		    TASK_UNINTERRUPTIBLE);
@@ -713,7 +718,7 @@ void __fscache_disable_cookie(struct fscache_cookie *cookie,
 	struct fscache_object *object;
 	bool awaken = false;
 
-	_enter("%p,%u", cookie, invalidate);
+	_enter("%x,%u", cookie->debug_id, invalidate);
 
 	trace_fscache_disable(cookie);
 
@@ -803,8 +808,8 @@ void __fscache_relinquish_cookie(struct fscache_cookie *cookie,
 		return;
 	}
 
-	_enter("%p{%s,%p,%d},%d",
-	       cookie, cookie->def->name, cookie->netfs_data,
+	_enter("%x{%s,%d},%d",
+	       cookie->debug_id, cookie->def->name,
 	       atomic_read(&cookie->n_active), retire);
 
 	trace_fscache_relinquish(cookie, retire);
@@ -821,13 +826,12 @@ void __fscache_relinquish_cookie(struct fscache_cookie *cookie,
 	BUG_ON(!radix_tree_empty(&cookie->stores));
 
 	if (cookie->parent) {
-		ASSERTCMP(atomic_read(&cookie->parent->usage), >, 0);
+		ASSERTCMP(refcount_read(&cookie->parent->ref), >, 0);
 		ASSERTCMP(atomic_read(&cookie->parent->n_children), >, 0);
 		atomic_dec(&cookie->parent->n_children);
 	}
 
 	/* Dispose of the netfs's link to the cookie */
-	ASSERTCMP(atomic_read(&cookie->usage), >, 0);
 	fscache_cookie_put(cookie, fscache_cookie_put_relinquish);
 
 	_leave("");
@@ -857,17 +861,17 @@ void fscache_cookie_put(struct fscache_cookie *cookie,
 			enum fscache_cookie_trace where)
 {
 	struct fscache_cookie *parent;
-	int usage;
+	int ref;
 
-	_enter("%p", cookie);
+	_enter("%x", cookie->debug_id);
 
 	do {
-		usage = atomic_dec_return(&cookie->usage);
-		trace_fscache_cookie(cookie, where, usage);
+		unsigned int cookie_debug_id = cookie->debug_id;
+		bool zero = __refcount_dec_and_test(&cookie->ref, &ref);
 
-		if (usage > 0)
+		trace_fscache_cookie(cookie_debug_id, ref - 1, where);
+		if (!zero)
 			return;
-		BUG_ON(usage < 0);
 
 		parent = cookie->parent;
 		fscache_unhash_cookie(cookie);
@@ -881,6 +885,19 @@ void fscache_cookie_put(struct fscache_cookie *cookie,
 }
 
 /*
+ * Get a reference to a cookie.
+ */
+struct fscache_cookie *fscache_cookie_get(struct fscache_cookie *cookie,
+					  enum fscache_cookie_trace where)
+{
+	int ref;
+
+	__refcount_inc(&cookie->ref, &ref);
+	trace_fscache_cookie(cookie->debug_id, ref + 1, where);
+	return cookie;
+}
+
+/*
  * check the consistency between the netfs inode and the backing cache
  *
  * NOTE: it only serves no-index type
@@ -958,3 +975,97 @@ inconsistent:
 	return -ESTALE;
 }
 EXPORT_SYMBOL(__fscache_check_consistency);
+
+/*
+ * Generate a list of extant cookies in /proc/fs/fscache/cookies
+ */
+static int fscache_cookies_seq_show(struct seq_file *m, void *v)
+{
+	struct fscache_cookie *cookie;
+	unsigned int keylen = 0, auxlen = 0;
+	char _type[3], *type;
+	u8 *p;
+
+	if (v == &fscache_cookies) {
+		seq_puts(m,
+			 "COOKIE   PARENT   USAGE CHILD ACT TY FL  DEF              NETFS_DATA\n"
+			 "======== ======== ===== ===== === == === ================ ==========\n"
+			 );
+		return 0;
+	}
+
+	cookie = list_entry(v, struct fscache_cookie, proc_link);
+
+	switch (cookie->type) {
+	case 0:
+		type = "IX";
+		break;
+	case 1:
+		type = "DT";
+		break;
+	default:
+		snprintf(_type, sizeof(_type), "%02u",
+			 cookie->type);
+		type = _type;
+		break;
+	}
+
+	seq_printf(m,
+		   "%08x %08x %5u %5u %3u %s %03lx %-16s %px",
+		   cookie->debug_id,
+		   cookie->parent ? cookie->parent->debug_id : 0,
+		   refcount_read(&cookie->ref),
+		   atomic_read(&cookie->n_children),
+		   atomic_read(&cookie->n_active),
+		   type,
+		   cookie->flags,
+		   cookie->def->name,
+		   cookie->netfs_data);
+
+	keylen = cookie->key_len;
+	auxlen = cookie->aux_len;
+
+	if (keylen > 0 || auxlen > 0) {
+		seq_puts(m, " ");
+		p = keylen <= sizeof(cookie->inline_key) ?
+			cookie->inline_key : cookie->key;
+		for (; keylen > 0; keylen--)
+			seq_printf(m, "%02x", *p++);
+		if (auxlen > 0) {
+			seq_puts(m, ", ");
+			p = auxlen <= sizeof(cookie->inline_aux) ?
+				cookie->inline_aux : cookie->aux;
+			for (; auxlen > 0; auxlen--)
+				seq_printf(m, "%02x", *p++);
+		}
+	}
+
+	seq_puts(m, "\n");
+	return 0;
+}
+
+static void *fscache_cookies_seq_start(struct seq_file *m, loff_t *_pos)
+	__acquires(fscache_cookies_lock)
+{
+	read_lock(&fscache_cookies_lock);
+	return seq_list_start_head(&fscache_cookies, *_pos);
+}
+
+static void *fscache_cookies_seq_next(struct seq_file *m, void *v, loff_t *_pos)
+{
+	return seq_list_next(v, &fscache_cookies, _pos);
+}
+
+static void fscache_cookies_seq_stop(struct seq_file *m, void *v)
+	__releases(rcu)
+{
+	read_unlock(&fscache_cookies_lock);
+}
+
+
+const struct seq_operations fscache_cookies_seq_ops = {
+	.start  = fscache_cookies_seq_start,
+	.next   = fscache_cookies_seq_next,
+	.stop   = fscache_cookies_seq_stop,
+	.show   = fscache_cookies_seq_show,
+};
diff --git a/fs/fscache/fsdef.c b/fs/fscache/fsdef.c
index 09ed8795ad86..0402673c680e 100644
--- a/fs/fscache/fsdef.c
+++ b/fs/fscache/fsdef.c
@@ -45,7 +45,8 @@ static struct fscache_cookie_def fscache_fsdef_index_def = {
 };
 
 struct fscache_cookie fscache_fsdef_index = {
-	.usage		= ATOMIC_INIT(1),
+	.debug_id	= 1,
+	.ref		= REFCOUNT_INIT(1),
 	.n_active	= ATOMIC_INIT(1),
 	.lock		= __SPIN_LOCK_UNLOCKED(fscache_fsdef_index.lock),
 	.backing_objects = HLIST_HEAD_INIT,
diff --git a/fs/fscache/histogram.c b/fs/fscache/histogram.c
deleted file mode 100644
index 4e5beeaaf454..000000000000
--- a/fs/fscache/histogram.c
+++ /dev/null
@@ -1,87 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0-or-later
-/* FS-Cache latency histogram
- *
- * Copyright (C) 2008 Red Hat, Inc. All Rights Reserved.
- * Written by David Howells (dhowells@redhat.com)
- */
-
-#define FSCACHE_DEBUG_LEVEL THREAD
-#include <linux/module.h>
-#include <linux/proc_fs.h>
-#include <linux/seq_file.h>
-#include "internal.h"
-
-atomic_t fscache_obj_instantiate_histogram[HZ];
-atomic_t fscache_objs_histogram[HZ];
-atomic_t fscache_ops_histogram[HZ];
-atomic_t fscache_retrieval_delay_histogram[HZ];
-atomic_t fscache_retrieval_histogram[HZ];
-
-/*
- * display the time-taken histogram
- */
-static int fscache_histogram_show(struct seq_file *m, void *v)
-{
-	unsigned long index;
-	unsigned n[5], t;
-
-	switch ((unsigned long) v) {
-	case 1:
-		seq_puts(m, "JIFS  SECS  OBJ INST  OP RUNS   OBJ RUNS  RETRV DLY RETRIEVLS\n");
-		return 0;
-	case 2:
-		seq_puts(m, "===== ===== ========= ========= ========= ========= =========\n");
-		return 0;
-	default:
-		index = (unsigned long) v - 3;
-		n[0] = atomic_read(&fscache_obj_instantiate_histogram[index]);
-		n[1] = atomic_read(&fscache_ops_histogram[index]);
-		n[2] = atomic_read(&fscache_objs_histogram[index]);
-		n[3] = atomic_read(&fscache_retrieval_delay_histogram[index]);
-		n[4] = atomic_read(&fscache_retrieval_histogram[index]);
-		if (!(n[0] | n[1] | n[2] | n[3] | n[4]))
-			return 0;
-
-		t = (index * 1000) / HZ;
-
-		seq_printf(m, "%4lu  0.%03u %9u %9u %9u %9u %9u\n",
-			   index, t, n[0], n[1], n[2], n[3], n[4]);
-		return 0;
-	}
-}
-
-/*
- * set up the iterator to start reading from the first line
- */
-static void *fscache_histogram_start(struct seq_file *m, loff_t *_pos)
-{
-	if ((unsigned long long)*_pos >= HZ + 2)
-		return NULL;
-	if (*_pos == 0)
-		*_pos = 1;
-	return (void *)(unsigned long) *_pos;
-}
-
-/*
- * move to the next line
- */
-static void *fscache_histogram_next(struct seq_file *m, void *v, loff_t *pos)
-{
-	(*pos)++;
-	return (unsigned long long)*pos > HZ + 2 ?
-		NULL : (void *)(unsigned long) *pos;
-}
-
-/*
- * clean up after reading
- */
-static void fscache_histogram_stop(struct seq_file *m, void *v)
-{
-}
-
-const struct seq_operations fscache_histogram_ops = {
-	.start		= fscache_histogram_start,
-	.stop		= fscache_histogram_stop,
-	.next		= fscache_histogram_next,
-	.show		= fscache_histogram_show,
-};
diff --git a/fs/fscache/internal.h b/fs/fscache/internal.h
index c483863b740a..c3e4804b8fcb 100644
--- a/fs/fscache/internal.h
+++ b/fs/fscache/internal.h
@@ -45,6 +45,7 @@ extern struct fscache_cache *fscache_select_cache_for_object(
  * cookie.c
  */
 extern struct kmem_cache *fscache_cookie_jar;
+extern const struct seq_operations fscache_cookies_seq_ops;
 
 extern void fscache_free_cookie(struct fscache_cookie *);
 extern struct fscache_cookie *fscache_alloc_cookie(struct fscache_cookie *,
@@ -53,9 +54,18 @@ extern struct fscache_cookie *fscache_alloc_cookie(struct fscache_cookie *,
 						   const void *, size_t,
 						   void *, loff_t);
 extern struct fscache_cookie *fscache_hash_cookie(struct fscache_cookie *);
+extern struct fscache_cookie *fscache_cookie_get(struct fscache_cookie *,
+						 enum fscache_cookie_trace);
 extern void fscache_cookie_put(struct fscache_cookie *,
 			       enum fscache_cookie_trace);
 
+static inline void fscache_cookie_see(struct fscache_cookie *cookie,
+				      enum fscache_cookie_trace where)
+{
+	trace_fscache_cookie(cookie->debug_id, refcount_read(&cookie->ref),
+			     where);
+}
+
 /*
  * fsdef.c
  */
@@ -63,30 +73,6 @@ extern struct fscache_cookie fscache_fsdef_index;
 extern struct fscache_cookie_def fscache_fsdef_netfs_def;
 
 /*
- * histogram.c
- */
-#ifdef CONFIG_FSCACHE_HISTOGRAM
-extern atomic_t fscache_obj_instantiate_histogram[HZ];
-extern atomic_t fscache_objs_histogram[HZ];
-extern atomic_t fscache_ops_histogram[HZ];
-extern atomic_t fscache_retrieval_delay_histogram[HZ];
-extern atomic_t fscache_retrieval_histogram[HZ];
-
-static inline void fscache_hist(atomic_t histogram[], unsigned long start_jif)
-{
-	unsigned long jif = jiffies - start_jif;
-	if (jif >= HZ)
-		jif = HZ - 1;
-	atomic_inc(&histogram[jif]);
-}
-
-extern const struct seq_operations fscache_histogram_ops;
-
-#else
-#define fscache_hist(hist, start_jif) do {} while (0)
-#endif
-
-/*
  * main.c
  */
 extern unsigned fscache_defer_lookup;
@@ -97,6 +83,8 @@ extern struct workqueue_struct *fscache_object_wq;
 extern struct workqueue_struct *fscache_op_wq;
 DECLARE_PER_CPU(wait_queue_head_t, fscache_object_cong_wait);
 
+extern unsigned int fscache_hash(unsigned int salt, unsigned int *data, unsigned int n);
+
 static inline bool fscache_object_congested(void)
 {
 	return workqueue_congested(WORK_CPU_UNBOUND, fscache_object_wq);
@@ -108,19 +96,6 @@ static inline bool fscache_object_congested(void)
 extern void fscache_enqueue_object(struct fscache_object *);
 
 /*
- * object-list.c
- */
-#ifdef CONFIG_FSCACHE_OBJECT_LIST
-extern const struct proc_ops fscache_objlist_proc_ops;
-
-extern void fscache_objlist_add(struct fscache_object *);
-extern void fscache_objlist_remove(struct fscache_object *);
-#else
-#define fscache_objlist_add(object) do {} while(0)
-#define fscache_objlist_remove(object) do {} while(0)
-#endif
-
-/*
  * operation.c
  */
 extern int fscache_submit_exclusive_op(struct fscache_object *,
@@ -320,14 +295,6 @@ static inline void fscache_raise_event(struct fscache_object *object,
 		fscache_enqueue_object(object);
 }
 
-static inline void fscache_cookie_get(struct fscache_cookie *cookie,
-				      enum fscache_cookie_trace where)
-{
-	int usage = atomic_inc_return(&cookie->usage);
-
-	trace_fscache_cookie(cookie, where, usage);
-}
-
 /*
  * get an extra reference to a netfs retrieval context
  */
diff --git a/fs/fscache/main.c b/fs/fscache/main.c
index c1e6cc9091aa..4207f98e405f 100644
--- a/fs/fscache/main.c
+++ b/fs/fscache/main.c
@@ -94,6 +94,45 @@ static struct ctl_table fscache_sysctls_root[] = {
 #endif
 
 /*
+ * Mixing scores (in bits) for (7,20):
+ * Input delta: 1-bit      2-bit
+ * 1 round:     330.3     9201.6
+ * 2 rounds:   1246.4    25475.4
+ * 3 rounds:   1907.1    31295.1
+ * 4 rounds:   2042.3    31718.6
+ * Perfect:    2048      31744
+ *            (32*64)   (32*31/2 * 64)
+ */
+#define HASH_MIX(x, y, a)	\
+	(	x ^= (a),	\
+	y ^= x,	x = rol32(x, 7),\
+	x += y,	y = rol32(y,20),\
+	y *= 9			)
+
+static inline unsigned int fold_hash(unsigned long x, unsigned long y)
+{
+	/* Use arch-optimized multiply if one exists */
+	return __hash_32(y ^ __hash_32(x));
+}
+
+/*
+ * Generate a hash.  This is derived from full_name_hash(), but we want to be
+ * sure it is arch independent and that it doesn't change as bits of the
+ * computed hash value might appear on disk.  The caller also guarantees that
+ * the hashed data will be a series of aligned 32-bit words.
+ */
+unsigned int fscache_hash(unsigned int salt, unsigned int *data, unsigned int n)
+{
+	unsigned int a, x = 0, y = salt;
+
+	for (; n; n--) {
+		a = *data++;
+		HASH_MIX(x, y, a);
+	}
+	return fold_hash(x, y);
+}
+
+/*
  * initialise the fs caching module
  */
 static int __init fscache_init(void)
diff --git a/fs/fscache/netfs.c b/fs/fscache/netfs.c
index cce92216fa28..d6bdb7b5e723 100644
--- a/fs/fscache/netfs.c
+++ b/fs/fscache/netfs.c
@@ -37,7 +37,7 @@ int __fscache_register_netfs(struct fscache_netfs *netfs)
 	if (!cookie)
 		goto already_registered;
 	if (cookie != candidate) {
-		trace_fscache_cookie(candidate, fscache_cookie_discard, 1);
+		trace_fscache_cookie(candidate->debug_id, 1, fscache_cookie_discard);
 		fscache_free_cookie(candidate);
 	}
 
diff --git a/fs/fscache/object-list.c b/fs/fscache/object-list.c
deleted file mode 100644
index e106a1a1600d..000000000000
--- a/fs/fscache/object-list.c
+++ /dev/null
@@ -1,414 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0-or-later
-/* Global fscache object list maintainer and viewer
- *
- * Copyright (C) 2009 Red Hat, Inc. All Rights Reserved.
- * Written by David Howells (dhowells@redhat.com)
- */
-
-#define FSCACHE_DEBUG_LEVEL COOKIE
-#include <linux/module.h>
-#include <linux/proc_fs.h>
-#include <linux/seq_file.h>
-#include <linux/slab.h>
-#include <linux/key.h>
-#include <keys/user-type.h>
-#include "internal.h"
-
-static struct rb_root fscache_object_list;
-static DEFINE_RWLOCK(fscache_object_list_lock);
-
-struct fscache_objlist_data {
-	unsigned long	config;		/* display configuration */
-#define FSCACHE_OBJLIST_CONFIG_KEY	0x00000001	/* show object keys */
-#define FSCACHE_OBJLIST_CONFIG_AUX	0x00000002	/* show object auxdata */
-#define FSCACHE_OBJLIST_CONFIG_COOKIE	0x00000004	/* show objects with cookies */
-#define FSCACHE_OBJLIST_CONFIG_NOCOOKIE	0x00000008	/* show objects without cookies */
-#define FSCACHE_OBJLIST_CONFIG_BUSY	0x00000010	/* show busy objects */
-#define FSCACHE_OBJLIST_CONFIG_IDLE	0x00000020	/* show idle objects */
-#define FSCACHE_OBJLIST_CONFIG_PENDWR	0x00000040	/* show objects with pending writes */
-#define FSCACHE_OBJLIST_CONFIG_NOPENDWR	0x00000080	/* show objects without pending writes */
-#define FSCACHE_OBJLIST_CONFIG_READS	0x00000100	/* show objects with active reads */
-#define FSCACHE_OBJLIST_CONFIG_NOREADS	0x00000200	/* show objects without active reads */
-#define FSCACHE_OBJLIST_CONFIG_EVENTS	0x00000400	/* show objects with events */
-#define FSCACHE_OBJLIST_CONFIG_NOEVENTS	0x00000800	/* show objects without no events */
-#define FSCACHE_OBJLIST_CONFIG_WORK	0x00001000	/* show objects with work */
-#define FSCACHE_OBJLIST_CONFIG_NOWORK	0x00002000	/* show objects without work */
-};
-
-/*
- * Add an object to the object list
- * - we use the address of the fscache_object structure as the key into the
- *   tree
- */
-void fscache_objlist_add(struct fscache_object *obj)
-{
-	struct fscache_object *xobj;
-	struct rb_node **p = &fscache_object_list.rb_node, *parent = NULL;
-
-	ASSERT(RB_EMPTY_NODE(&obj->objlist_link));
-
-	write_lock(&fscache_object_list_lock);
-
-	while (*p) {
-		parent = *p;
-		xobj = rb_entry(parent, struct fscache_object, objlist_link);
-
-		if (obj < xobj)
-			p = &(*p)->rb_left;
-		else if (obj > xobj)
-			p = &(*p)->rb_right;
-		else
-			BUG();
-	}
-
-	rb_link_node(&obj->objlist_link, parent, p);
-	rb_insert_color(&obj->objlist_link, &fscache_object_list);
-
-	write_unlock(&fscache_object_list_lock);
-}
-
-/*
- * Remove an object from the object list.
- */
-void fscache_objlist_remove(struct fscache_object *obj)
-{
-	if (RB_EMPTY_NODE(&obj->objlist_link))
-		return;
-
-	write_lock(&fscache_object_list_lock);
-
-	BUG_ON(RB_EMPTY_ROOT(&fscache_object_list));
-	rb_erase(&obj->objlist_link, &fscache_object_list);
-
-	write_unlock(&fscache_object_list_lock);
-}
-
-/*
- * find the object in the tree on or after the specified index
- */
-static struct fscache_object *fscache_objlist_lookup(loff_t *_pos)
-{
-	struct fscache_object *pobj, *obj = NULL, *minobj = NULL;
-	struct rb_node *p;
-	unsigned long pos;
-
-	if (*_pos >= (unsigned long) ERR_PTR(-ENOENT))
-		return NULL;
-	pos = *_pos;
-
-	/* banners (can't represent line 0 by pos 0 as that would involve
-	 * returning a NULL pointer) */
-	if (pos == 0)
-		return (struct fscache_object *)(long)++(*_pos);
-	if (pos < 3)
-		return (struct fscache_object *)pos;
-
-	pobj = (struct fscache_object *)pos;
-	p = fscache_object_list.rb_node;
-	while (p) {
-		obj = rb_entry(p, struct fscache_object, objlist_link);
-		if (pobj < obj) {
-			if (!minobj || minobj > obj)
-				minobj = obj;
-			p = p->rb_left;
-		} else if (pobj > obj) {
-			p = p->rb_right;
-		} else {
-			minobj = obj;
-			break;
-		}
-		obj = NULL;
-	}
-
-	if (!minobj)
-		*_pos = (unsigned long) ERR_PTR(-ENOENT);
-	else if (minobj != obj)
-		*_pos = (unsigned long) minobj;
-	return minobj;
-}
-
-/*
- * set up the iterator to start reading from the first line
- */
-static void *fscache_objlist_start(struct seq_file *m, loff_t *_pos)
-	__acquires(&fscache_object_list_lock)
-{
-	read_lock(&fscache_object_list_lock);
-	return fscache_objlist_lookup(_pos);
-}
-
-/*
- * move to the next line
- */
-static void *fscache_objlist_next(struct seq_file *m, void *v, loff_t *_pos)
-{
-	(*_pos)++;
-	return fscache_objlist_lookup(_pos);
-}
-
-/*
- * clean up after reading
- */
-static void fscache_objlist_stop(struct seq_file *m, void *v)
-	__releases(&fscache_object_list_lock)
-{
-	read_unlock(&fscache_object_list_lock);
-}
-
-/*
- * display an object
- */
-static int fscache_objlist_show(struct seq_file *m, void *v)
-{
-	struct fscache_objlist_data *data = m->private;
-	struct fscache_object *obj = v;
-	struct fscache_cookie *cookie;
-	unsigned long config = data->config;
-	char _type[3], *type;
-	u8 *p;
-
-	if ((unsigned long) v == 1) {
-		seq_puts(m, "OBJECT   PARENT   STAT CHLDN OPS OOP IPR EX READS"
-			 " EM EV FL S"
-			 " | NETFS_COOKIE_DEF TY FL NETFS_DATA");
-		if (config & (FSCACHE_OBJLIST_CONFIG_KEY |
-			      FSCACHE_OBJLIST_CONFIG_AUX))
-			seq_puts(m, "       ");
-		if (config & FSCACHE_OBJLIST_CONFIG_KEY)
-			seq_puts(m, "OBJECT_KEY");
-		if ((config & (FSCACHE_OBJLIST_CONFIG_KEY |
-			       FSCACHE_OBJLIST_CONFIG_AUX)) ==
-		    (FSCACHE_OBJLIST_CONFIG_KEY | FSCACHE_OBJLIST_CONFIG_AUX))
-			seq_puts(m, ", ");
-		if (config & FSCACHE_OBJLIST_CONFIG_AUX)
-			seq_puts(m, "AUX_DATA");
-		seq_puts(m, "\n");
-		return 0;
-	}
-
-	if ((unsigned long) v == 2) {
-		seq_puts(m, "======== ======== ==== ===== === === === == ====="
-			 " == == == ="
-			 " | ================ == == ================");
-		if (config & (FSCACHE_OBJLIST_CONFIG_KEY |
-			      FSCACHE_OBJLIST_CONFIG_AUX))
-			seq_puts(m, " ================");
-		seq_puts(m, "\n");
-		return 0;
-	}
-
-	/* filter out any unwanted objects */
-#define FILTER(criterion, _yes, _no)					\
-	do {								\
-		unsigned long yes = FSCACHE_OBJLIST_CONFIG_##_yes;	\
-		unsigned long no = FSCACHE_OBJLIST_CONFIG_##_no;	\
-		if (criterion) {					\
-			if (!(config & yes))				\
-				return 0;				\
-		} else {						\
-			if (!(config & no))				\
-				return 0;				\
-		}							\
-	} while(0)
-
-	cookie = obj->cookie;
-	if (~config) {
-		FILTER(cookie->def,
-		       COOKIE, NOCOOKIE);
-		FILTER(fscache_object_is_active(obj) ||
-		       obj->n_ops != 0 ||
-		       obj->n_obj_ops != 0 ||
-		       obj->flags ||
-		       !list_empty(&obj->dependents),
-		       BUSY, IDLE);
-		FILTER(test_bit(FSCACHE_OBJECT_PENDING_WRITE, &obj->flags),
-		       PENDWR, NOPENDWR);
-		FILTER(atomic_read(&obj->n_reads),
-		       READS, NOREADS);
-		FILTER(obj->events & obj->event_mask,
-		       EVENTS, NOEVENTS);
-		FILTER(work_busy(&obj->work), WORK, NOWORK);
-	}
-
-	seq_printf(m,
-		   "%8x %8x %s %5u %3u %3u %3u %2u %5u %2lx %2lx %2lx %1x | ",
-		   obj->debug_id,
-		   obj->parent ? obj->parent->debug_id : -1,
-		   obj->state->short_name,
-		   obj->n_children,
-		   obj->n_ops,
-		   obj->n_obj_ops,
-		   obj->n_in_progress,
-		   obj->n_exclusive,
-		   atomic_read(&obj->n_reads),
-		   obj->event_mask,
-		   obj->events,
-		   obj->flags,
-		   work_busy(&obj->work));
-
-	if (fscache_use_cookie(obj)) {
-		uint16_t keylen = 0, auxlen = 0;
-
-		switch (cookie->type) {
-		case 0:
-			type = "IX";
-			break;
-		case 1:
-			type = "DT";
-			break;
-		default:
-			snprintf(_type, sizeof(_type), "%02u",
-				 cookie->type);
-			type = _type;
-			break;
-		}
-
-		seq_printf(m, "%-16s %s %2lx %16p",
-			   cookie->def->name,
-			   type,
-			   cookie->flags,
-			   cookie->netfs_data);
-
-		if (config & FSCACHE_OBJLIST_CONFIG_KEY)
-			keylen = cookie->key_len;
-
-		if (config & FSCACHE_OBJLIST_CONFIG_AUX)
-			auxlen = cookie->aux_len;
-
-		if (keylen > 0 || auxlen > 0) {
-			seq_puts(m, " ");
-			p = keylen <= sizeof(cookie->inline_key) ?
-				cookie->inline_key : cookie->key;
-			for (; keylen > 0; keylen--)
-				seq_printf(m, "%02x", *p++);
-			if (auxlen > 0) {
-				if (config & FSCACHE_OBJLIST_CONFIG_KEY)
-					seq_puts(m, ", ");
-				p = auxlen <= sizeof(cookie->inline_aux) ?
-					cookie->inline_aux : cookie->aux;
-				for (; auxlen > 0; auxlen--)
-					seq_printf(m, "%02x", *p++);
-			}
-		}
-
-		seq_puts(m, "\n");
-		fscache_unuse_cookie(obj);
-	} else {
-		seq_puts(m, "<no_netfs>\n");
-	}
-	return 0;
-}
-
-static const struct seq_operations fscache_objlist_ops = {
-	.start		= fscache_objlist_start,
-	.stop		= fscache_objlist_stop,
-	.next		= fscache_objlist_next,
-	.show		= fscache_objlist_show,
-};
-
-/*
- * get the configuration for filtering the list
- */
-static void fscache_objlist_config(struct fscache_objlist_data *data)
-{
-#ifdef CONFIG_KEYS
-	const struct user_key_payload *confkey;
-	unsigned long config;
-	struct key *key;
-	const char *buf;
-	int len;
-
-	key = request_key(&key_type_user, "fscache:objlist", NULL);
-	if (IS_ERR(key))
-		goto no_config;
-
-	config = 0;
-	rcu_read_lock();
-
-	confkey = user_key_payload_rcu(key);
-	if (!confkey) {
-		/* key was revoked */
-		rcu_read_unlock();
-		key_put(key);
-		goto no_config;
-	}
-
-	buf = confkey->data;
-
-	for (len = confkey->datalen - 1; len >= 0; len--) {
-		switch (buf[len]) {
-		case 'K': config |= FSCACHE_OBJLIST_CONFIG_KEY;		break;
-		case 'A': config |= FSCACHE_OBJLIST_CONFIG_AUX;		break;
-		case 'C': config |= FSCACHE_OBJLIST_CONFIG_COOKIE;	break;
-		case 'c': config |= FSCACHE_OBJLIST_CONFIG_NOCOOKIE;	break;
-		case 'B': config |= FSCACHE_OBJLIST_CONFIG_BUSY;	break;
-		case 'b': config |= FSCACHE_OBJLIST_CONFIG_IDLE;	break;
-		case 'W': config |= FSCACHE_OBJLIST_CONFIG_PENDWR;	break;
-		case 'w': config |= FSCACHE_OBJLIST_CONFIG_NOPENDWR;	break;
-		case 'R': config |= FSCACHE_OBJLIST_CONFIG_READS;	break;
-		case 'r': config |= FSCACHE_OBJLIST_CONFIG_NOREADS;	break;
-		case 'S': config |= FSCACHE_OBJLIST_CONFIG_WORK;	break;
-		case 's': config |= FSCACHE_OBJLIST_CONFIG_NOWORK;	break;
-		}
-	}
-
-	rcu_read_unlock();
-	key_put(key);
-
-	if (!(config & (FSCACHE_OBJLIST_CONFIG_COOKIE | FSCACHE_OBJLIST_CONFIG_NOCOOKIE)))
-	    config   |= FSCACHE_OBJLIST_CONFIG_COOKIE | FSCACHE_OBJLIST_CONFIG_NOCOOKIE;
-	if (!(config & (FSCACHE_OBJLIST_CONFIG_BUSY | FSCACHE_OBJLIST_CONFIG_IDLE)))
-	    config   |= FSCACHE_OBJLIST_CONFIG_BUSY | FSCACHE_OBJLIST_CONFIG_IDLE;
-	if (!(config & (FSCACHE_OBJLIST_CONFIG_PENDWR | FSCACHE_OBJLIST_CONFIG_NOPENDWR)))
-	    config   |= FSCACHE_OBJLIST_CONFIG_PENDWR | FSCACHE_OBJLIST_CONFIG_NOPENDWR;
-	if (!(config & (FSCACHE_OBJLIST_CONFIG_READS | FSCACHE_OBJLIST_CONFIG_NOREADS)))
-	    config   |= FSCACHE_OBJLIST_CONFIG_READS | FSCACHE_OBJLIST_CONFIG_NOREADS;
-	if (!(config & (FSCACHE_OBJLIST_CONFIG_EVENTS | FSCACHE_OBJLIST_CONFIG_NOEVENTS)))
-	    config   |= FSCACHE_OBJLIST_CONFIG_EVENTS | FSCACHE_OBJLIST_CONFIG_NOEVENTS;
-	if (!(config & (FSCACHE_OBJLIST_CONFIG_WORK | FSCACHE_OBJLIST_CONFIG_NOWORK)))
-	    config   |= FSCACHE_OBJLIST_CONFIG_WORK | FSCACHE_OBJLIST_CONFIG_NOWORK;
-
-	data->config = config;
-	return;
-
-no_config:
-#endif
-	data->config = ULONG_MAX;
-}
-
-/*
- * open "/proc/fs/fscache/objects" to provide a list of active objects
- * - can be configured by a user-defined key added to the caller's keyrings
- */
-static int fscache_objlist_open(struct inode *inode, struct file *file)
-{
-	struct fscache_objlist_data *data;
-
-	data = __seq_open_private(file, &fscache_objlist_ops, sizeof(*data));
-	if (!data)
-		return -ENOMEM;
-
-	/* get the configuration key */
-	fscache_objlist_config(data);
-
-	return 0;
-}
-
-/*
- * clean up on close
- */
-static int fscache_objlist_release(struct inode *inode, struct file *file)
-{
-	struct seq_file *m = file->private_data;
-
-	kfree(m->private);
-	m->private = NULL;
-	return seq_release(inode, file);
-}
-
-const struct proc_ops fscache_objlist_proc_ops = {
-	.proc_open	= fscache_objlist_open,
-	.proc_read	= seq_read,
-	.proc_lseek	= seq_lseek,
-	.proc_release	= fscache_objlist_release,
-};
diff --git a/fs/fscache/object.c b/fs/fscache/object.c
index cb2146e02cd5..f346a78f4bd6 100644
--- a/fs/fscache/object.c
+++ b/fs/fscache/object.c
@@ -277,13 +277,10 @@ static void fscache_object_work_func(struct work_struct *work)
 {
 	struct fscache_object *object =
 		container_of(work, struct fscache_object, work);
-	unsigned long start;
 
 	_enter("{OBJ%x}", object->debug_id);
 
-	start = jiffies;
 	fscache_object_sm_dispatcher(object);
-	fscache_hist(fscache_objs_histogram, start);
 	fscache_put_object(object, fscache_obj_put_work);
 }
 
@@ -436,7 +433,6 @@ static const struct fscache_state *fscache_parent_ready(struct fscache_object *o
 	spin_lock(&parent->lock);
 	parent->n_ops++;
 	parent->n_obj_ops++;
-	object->lookup_jif = jiffies;
 	spin_unlock(&parent->lock);
 
 	_leave("");
@@ -522,7 +518,6 @@ void fscache_object_lookup_negative(struct fscache_object *object)
 		set_bit(FSCACHE_COOKIE_NO_DATA_YET, &cookie->flags);
 		clear_bit(FSCACHE_COOKIE_UNAVAILABLE, &cookie->flags);
 
-		_debug("wake up lookup %p", &cookie->flags);
 		clear_bit_unlock(FSCACHE_COOKIE_LOOKING_UP, &cookie->flags);
 		wake_up_bit(&cookie->flags, FSCACHE_COOKIE_LOOKING_UP);
 	}
@@ -596,7 +591,6 @@ static const struct fscache_state *fscache_object_available(struct fscache_objec
 	object->cache->ops->lookup_complete(object);
 	fscache_stat_d(&fscache_n_cop_lookup_complete);
 
-	fscache_hist(fscache_obj_instantiate_histogram, object->lookup_jif);
 	fscache_stat(&fscache_n_object_avail);
 
 	_leave("");
@@ -799,8 +793,6 @@ static void fscache_put_object(struct fscache_object *object,
  */
 void fscache_object_destroy(struct fscache_object *object)
 {
-	fscache_objlist_remove(object);
-
 	/* We can get rid of the cookie now */
 	fscache_cookie_put(object->cookie, fscache_cookie_put_object);
 	object->cookie = NULL;
diff --git a/fs/fscache/operation.c b/fs/fscache/operation.c
index 4a5651d4904e..433877107700 100644
--- a/fs/fscache/operation.c
+++ b/fs/fscache/operation.c
@@ -616,7 +616,6 @@ void fscache_op_work_func(struct work_struct *work)
 {
 	struct fscache_operation *op =
 		container_of(work, struct fscache_operation, work);
-	unsigned long start;
 
 	_enter("{OBJ%x OP%x,%d}",
 	       op->object->debug_id, op->debug_id, atomic_read(&op->usage));
@@ -624,9 +623,7 @@ void fscache_op_work_func(struct work_struct *work)
 	trace_fscache_op(op->object->cookie, op, fscache_op_work);
 
 	ASSERT(op->processor != NULL);
-	start = jiffies;
 	op->processor(op);
-	fscache_hist(fscache_ops_histogram, start);
 	fscache_put_operation(op);
 
 	_leave("");
diff --git a/fs/fscache/page.c b/fs/fscache/page.c
index 991b0a871744..27df94ef0e0b 100644
--- a/fs/fscache/page.c
+++ b/fs/fscache/page.c
@@ -289,7 +289,6 @@ static void fscache_release_retrieval_op(struct fscache_operation *_op)
 	ASSERTIFCMP(op->op.state != FSCACHE_OP_ST_INITIALISED,
 		    atomic_read(&op->n_pages), ==, 0);
 
-	fscache_hist(fscache_retrieval_histogram, op->start_time);
 	if (op->context)
 		fscache_put_context(op->cookie, op->context);
 
@@ -324,7 +323,6 @@ struct fscache_retrieval *fscache_alloc_retrieval(
 	op->mapping	= mapping;
 	op->end_io_func	= end_io_func;
 	op->context	= context;
-	op->start_time	= jiffies;
 	INIT_LIST_HEAD(&op->to_do);
 
 	/* Pin the netfs read context in case we need to do the actual netfs
@@ -340,8 +338,6 @@ struct fscache_retrieval *fscache_alloc_retrieval(
  */
 int fscache_wait_for_deferred_lookup(struct fscache_cookie *cookie)
 {
-	unsigned long jif;
-
 	_enter("");
 
 	if (!test_bit(FSCACHE_COOKIE_LOOKING_UP, &cookie->flags)) {
@@ -351,7 +347,6 @@ int fscache_wait_for_deferred_lookup(struct fscache_cookie *cookie)
 
 	fscache_stat(&fscache_n_retrievals_wait);
 
-	jif = jiffies;
 	if (wait_on_bit(&cookie->flags, FSCACHE_COOKIE_LOOKING_UP,
 			TASK_INTERRUPTIBLE) != 0) {
 		fscache_stat(&fscache_n_retrievals_intr);
@@ -362,7 +357,6 @@ int fscache_wait_for_deferred_lookup(struct fscache_cookie *cookie)
 	ASSERT(!test_bit(FSCACHE_COOKIE_LOOKING_UP, &cookie->flags));
 
 	smp_rmb();
-	fscache_hist(fscache_retrieval_delay_histogram, jif);
 	_leave(" = 0 [dly]");
 	return 0;
 }
diff --git a/fs/fscache/proc.c b/fs/fscache/proc.c
index 90a7bc22f7e1..061df8f61ffc 100644
--- a/fs/fscache/proc.c
+++ b/fs/fscache/proc.c
@@ -21,18 +21,16 @@ int __init fscache_proc_init(void)
 	if (!proc_mkdir("fs/fscache", NULL))
 		goto error_dir;
 
+	if (!proc_create_seq("fs/fscache/cookies", S_IFREG | 0444, NULL,
+			     &fscache_cookies_seq_ops))
+		goto error_cookies;
+
 #ifdef CONFIG_FSCACHE_STATS
 	if (!proc_create_single("fs/fscache/stats", S_IFREG | 0444, NULL,
 			fscache_stats_show))
 		goto error_stats;
 #endif
 
-#ifdef CONFIG_FSCACHE_HISTOGRAM
-	if (!proc_create_seq("fs/fscache/histogram", S_IFREG | 0444, NULL,
-			 &fscache_histogram_ops))
-		goto error_histogram;
-#endif
-
 #ifdef CONFIG_FSCACHE_OBJECT_LIST
 	if (!proc_create("fs/fscache/objects", S_IFREG | 0444, NULL,
 			 &fscache_objlist_proc_ops))
@@ -45,14 +43,12 @@ int __init fscache_proc_init(void)
 #ifdef CONFIG_FSCACHE_OBJECT_LIST
 error_objects:
 #endif
-#ifdef CONFIG_FSCACHE_HISTOGRAM
-	remove_proc_entry("fs/fscache/histogram", NULL);
-error_histogram:
-#endif
 #ifdef CONFIG_FSCACHE_STATS
 	remove_proc_entry("fs/fscache/stats", NULL);
 error_stats:
 #endif
+	remove_proc_entry("fs/fscache/cookies", NULL);
+error_cookies:
 	remove_proc_entry("fs/fscache", NULL);
 error_dir:
 	_leave(" = -ENOMEM");
@@ -67,11 +63,9 @@ void fscache_proc_cleanup(void)
 #ifdef CONFIG_FSCACHE_OBJECT_LIST
 	remove_proc_entry("fs/fscache/objects", NULL);
 #endif
-#ifdef CONFIG_FSCACHE_HISTOGRAM
-	remove_proc_entry("fs/fscache/histogram", NULL);
-#endif
 #ifdef CONFIG_FSCACHE_STATS
 	remove_proc_entry("fs/fscache/stats", NULL);
 #endif
+	remove_proc_entry("fs/fscache/cookies", NULL);
 	remove_proc_entry("fs/fscache", NULL);
 }
diff --git a/fs/fuse/acl.c b/fs/fuse/acl.c
index 52b165319be1..337cb29a8dd5 100644
--- a/fs/fuse/acl.c
+++ b/fs/fuse/acl.c
@@ -11,7 +11,7 @@
 #include <linux/posix_acl.h>
 #include <linux/posix_acl_xattr.h>
 
-struct posix_acl *fuse_get_acl(struct inode *inode, int type)
+struct posix_acl *fuse_get_acl(struct inode *inode, int type, bool rcu)
 {
 	struct fuse_conn *fc = get_fuse_conn(inode);
 	int size;
@@ -19,6 +19,9 @@ struct posix_acl *fuse_get_acl(struct inode *inode, int type)
 	void *value = NULL;
 	struct posix_acl *acl;
 
+	if (rcu)
+		return ERR_PTR(-ECHILD);
+
 	if (fuse_is_bad(inode))
 		return ERR_PTR(-EIO);
 
diff --git a/fs/fuse/control.c b/fs/fuse/control.c
index cc7e94d73c6c..000d2e5627e9 100644
--- a/fs/fuse/control.c
+++ b/fs/fuse/control.c
@@ -328,7 +328,7 @@ void fuse_ctl_remove_conn(struct fuse_conn *fc)
 	drop_nlink(d_inode(fuse_control_sb->s_root));
 }
 
-static int fuse_ctl_fill_super(struct super_block *sb, struct fs_context *fctx)
+static int fuse_ctl_fill_super(struct super_block *sb, struct fs_context *fsc)
 {
 	static const struct tree_descr empty_descr = {""};
 	struct fuse_conn *fc;
@@ -354,18 +354,18 @@ static int fuse_ctl_fill_super(struct super_block *sb, struct fs_context *fctx)
 	return 0;
 }
 
-static int fuse_ctl_get_tree(struct fs_context *fc)
+static int fuse_ctl_get_tree(struct fs_context *fsc)
 {
-	return get_tree_single(fc, fuse_ctl_fill_super);
+	return get_tree_single(fsc, fuse_ctl_fill_super);
 }
 
 static const struct fs_context_operations fuse_ctl_context_ops = {
 	.get_tree	= fuse_ctl_get_tree,
 };
 
-static int fuse_ctl_init_fs_context(struct fs_context *fc)
+static int fuse_ctl_init_fs_context(struct fs_context *fsc)
 {
-	fc->ops = &fuse_ctl_context_ops;
+	fsc->ops = &fuse_ctl_context_ops;
 	return 0;
 }
 
diff --git a/fs/fuse/dev.c b/fs/fuse/dev.c
index 1c8f79b3dd06..dde341a6388a 100644
--- a/fs/fuse/dev.c
+++ b/fs/fuse/dev.c
@@ -288,10 +288,10 @@ void fuse_request_end(struct fuse_req *req)
 
 	/*
 	 * test_and_set_bit() implies smp_mb() between bit
-	 * changing and below intr_entry check. Pairs with
+	 * changing and below FR_INTERRUPTED check. Pairs with
 	 * smp_mb() from queue_interrupt().
 	 */
-	if (!list_empty(&req->intr_entry)) {
+	if (test_bit(FR_INTERRUPTED, &req->flags)) {
 		spin_lock(&fiq->lock);
 		list_del_init(&req->intr_entry);
 		spin_unlock(&fiq->lock);
diff --git a/fs/fuse/file.c b/fs/fuse/file.c
index 621a662c19fb..11404f8c21c7 100644
--- a/fs/fuse/file.c
+++ b/fs/fuse/file.c
@@ -198,12 +198,11 @@ void fuse_finish_open(struct inode *inode, struct file *file)
 	struct fuse_file *ff = file->private_data;
 	struct fuse_conn *fc = get_fuse_conn(inode);
 
-	if (!(ff->open_flags & FOPEN_KEEP_CACHE))
-		invalidate_inode_pages2(inode->i_mapping);
 	if (ff->open_flags & FOPEN_STREAM)
 		stream_open(inode, file);
 	else if (ff->open_flags & FOPEN_NONSEEKABLE)
 		nonseekable_open(inode, file);
+
 	if (fc->atomic_o_trunc && (file->f_flags & O_TRUNC)) {
 		struct fuse_inode *fi = get_fuse_inode(inode);
 
@@ -211,10 +210,14 @@ void fuse_finish_open(struct inode *inode, struct file *file)
 		fi->attr_version = atomic64_inc_return(&fc->attr_version);
 		i_size_write(inode, 0);
 		spin_unlock(&fi->lock);
+		truncate_pagecache(inode, 0);
 		fuse_invalidate_attr(inode);
 		if (fc->writeback_cache)
 			file_update_time(file);
+	} else if (!(ff->open_flags & FOPEN_KEEP_CACHE)) {
+		invalidate_inode_pages2(inode->i_mapping);
 	}
+
 	if ((file->f_mode & FMODE_WRITE) && fc->writeback_cache)
 		fuse_link_write_file(file);
 }
@@ -389,6 +392,7 @@ struct fuse_writepage_args {
 	struct list_head queue_entry;
 	struct fuse_writepage_args *next;
 	struct inode *inode;
+	struct fuse_sync_bucket *bucket;
 };
 
 static struct fuse_writepage_args *fuse_find_writeback(struct fuse_inode *fi,
@@ -1608,6 +1612,9 @@ static void fuse_writepage_free(struct fuse_writepage_args *wpa)
 	struct fuse_args_pages *ap = &wpa->ia.ap;
 	int i;
 
+	if (wpa->bucket)
+		fuse_sync_bucket_dec(wpa->bucket);
+
 	for (i = 0; i < ap->num_pages; i++)
 		__free_page(ap->pages[i]);
 
@@ -1813,8 +1820,7 @@ static void fuse_writepage_end(struct fuse_mount *fm, struct fuse_args *args,
 	fuse_writepage_free(wpa);
 }
 
-static struct fuse_file *__fuse_write_file_get(struct fuse_conn *fc,
-					       struct fuse_inode *fi)
+static struct fuse_file *__fuse_write_file_get(struct fuse_inode *fi)
 {
 	struct fuse_file *ff = NULL;
 
@@ -1829,22 +1835,20 @@ static struct fuse_file *__fuse_write_file_get(struct fuse_conn *fc,
 	return ff;
 }
 
-static struct fuse_file *fuse_write_file_get(struct fuse_conn *fc,
-					     struct fuse_inode *fi)
+static struct fuse_file *fuse_write_file_get(struct fuse_inode *fi)
 {
-	struct fuse_file *ff = __fuse_write_file_get(fc, fi);
+	struct fuse_file *ff = __fuse_write_file_get(fi);
 	WARN_ON(!ff);
 	return ff;
 }
 
 int fuse_write_inode(struct inode *inode, struct writeback_control *wbc)
 {
-	struct fuse_conn *fc = get_fuse_conn(inode);
 	struct fuse_inode *fi = get_fuse_inode(inode);
 	struct fuse_file *ff;
 	int err;
 
-	ff = __fuse_write_file_get(fc, fi);
+	ff = __fuse_write_file_get(fi);
 	err = fuse_flush_times(inode, ff);
 	if (ff)
 		fuse_file_put(ff, false, false);
@@ -1871,6 +1875,20 @@ static struct fuse_writepage_args *fuse_writepage_args_alloc(void)
 
 }
 
+static void fuse_writepage_add_to_bucket(struct fuse_conn *fc,
+					 struct fuse_writepage_args *wpa)
+{
+	if (!fc->sync_fs)
+		return;
+
+	rcu_read_lock();
+	/* Prevent resurrection of dead bucket in unlikely race with syncfs */
+	do {
+		wpa->bucket = rcu_dereference(fc->curr_bucket);
+	} while (unlikely(!atomic_inc_not_zero(&wpa->bucket->count)));
+	rcu_read_unlock();
+}
+
 static int fuse_writepage_locked(struct page *page)
 {
 	struct address_space *mapping = page->mapping;
@@ -1894,10 +1912,11 @@ static int fuse_writepage_locked(struct page *page)
 		goto err_free;
 
 	error = -EIO;
-	wpa->ia.ff = fuse_write_file_get(fc, fi);
+	wpa->ia.ff = fuse_write_file_get(fi);
 	if (!wpa->ia.ff)
 		goto err_nofile;
 
+	fuse_writepage_add_to_bucket(fc, wpa);
 	fuse_write_args_fill(&wpa->ia, wpa->ia.ff, page_offset(page), 0);
 
 	copy_highpage(tmp_page, page);
@@ -2113,7 +2132,7 @@ static int fuse_writepages_fill(struct page *page,
 
 	if (!data->ff) {
 		err = -EIO;
-		data->ff = fuse_write_file_get(fc, fi);
+		data->ff = fuse_write_file_get(fi);
 		if (!data->ff)
 			goto out_unlock;
 	}
@@ -2148,6 +2167,8 @@ static int fuse_writepages_fill(struct page *page,
 			__free_page(tmp_page);
 			goto out_unlock;
 		}
+		fuse_writepage_add_to_bucket(fc, wpa);
+
 		data->max_pages = 1;
 
 		ap = &wpa->ia.ap;
@@ -2881,7 +2902,7 @@ fuse_direct_IO(struct kiocb *iocb, struct iov_iter *iter)
 
 static int fuse_writeback_range(struct inode *inode, loff_t start, loff_t end)
 {
-	int err = filemap_write_and_wait_range(inode->i_mapping, start, end);
+	int err = filemap_write_and_wait_range(inode->i_mapping, start, -1);
 
 	if (!err)
 		fuse_sync_writes(inode);
diff --git a/fs/fuse/fuse_i.h b/fs/fuse/fuse_i.h
index 6fb639b97ea8..319596df5dc6 100644
--- a/fs/fuse/fuse_i.h
+++ b/fs/fuse/fuse_i.h
@@ -482,6 +482,7 @@ struct fuse_dev {
 
 struct fuse_fs_context {
 	int fd;
+	struct file *file;
 	unsigned int rootmode;
 	kuid_t user_id;
 	kgid_t group_id;
@@ -508,6 +509,13 @@ struct fuse_fs_context {
 	void **fudptr;
 };
 
+struct fuse_sync_bucket {
+	/* count is a possible scalability bottleneck */
+	atomic_t count;
+	wait_queue_head_t waitq;
+	struct rcu_head rcu;
+};
+
 /**
  * A Fuse connection.
  *
@@ -800,6 +808,9 @@ struct fuse_conn {
 
 	/** List of filesystems using this connection */
 	struct list_head mounts;
+
+	/* New writepages go into this bucket */
+	struct fuse_sync_bucket __rcu *curr_bucket;
 };
 
 /*
@@ -903,6 +914,15 @@ static inline void fuse_page_descs_length_init(struct fuse_page_desc *descs,
 		descs[i].length = PAGE_SIZE - descs[i].offset;
 }
 
+static inline void fuse_sync_bucket_dec(struct fuse_sync_bucket *bucket)
+{
+	/* Need RCU protection to prevent use after free after the decrement */
+	rcu_read_lock();
+	if (atomic_dec_and_test(&bucket->count))
+		wake_up(&bucket->waitq);
+	rcu_read_unlock();
+}
+
 /** Device operations */
 extern const struct file_operations fuse_dev_operations;
 
@@ -1209,7 +1229,7 @@ extern const struct xattr_handler *fuse_acl_xattr_handlers[];
 extern const struct xattr_handler *fuse_no_acl_xattr_handlers[];
 
 struct posix_acl;
-struct posix_acl *fuse_get_acl(struct inode *inode, int type);
+struct posix_acl *fuse_get_acl(struct inode *inode, int type, bool rcu);
 int fuse_set_acl(struct user_namespace *mnt_userns, struct inode *inode,
 		 struct posix_acl *acl, int type);
 
diff --git a/fs/fuse/inode.c b/fs/fuse/inode.c
index e07e429f32e1..36cd03114b6d 100644
--- a/fs/fuse/inode.c
+++ b/fs/fuse/inode.c
@@ -137,12 +137,12 @@ static void fuse_evict_inode(struct inode *inode)
 	}
 }
 
-static int fuse_reconfigure(struct fs_context *fc)
+static int fuse_reconfigure(struct fs_context *fsc)
 {
-	struct super_block *sb = fc->root->d_sb;
+	struct super_block *sb = fsc->root->d_sb;
 
 	sync_filesystem(sb);
-	if (fc->sb_flags & SB_MANDLOCK)
+	if (fsc->sb_flags & SB_MANDLOCK)
 		return -EINVAL;
 
 	return 0;
@@ -505,6 +505,57 @@ static int fuse_statfs(struct dentry *dentry, struct kstatfs *buf)
 	return err;
 }
 
+static struct fuse_sync_bucket *fuse_sync_bucket_alloc(void)
+{
+	struct fuse_sync_bucket *bucket;
+
+	bucket = kzalloc(sizeof(*bucket), GFP_KERNEL | __GFP_NOFAIL);
+	if (bucket) {
+		init_waitqueue_head(&bucket->waitq);
+		/* Initial active count */
+		atomic_set(&bucket->count, 1);
+	}
+	return bucket;
+}
+
+static void fuse_sync_fs_writes(struct fuse_conn *fc)
+{
+	struct fuse_sync_bucket *bucket, *new_bucket;
+	int count;
+
+	new_bucket = fuse_sync_bucket_alloc();
+	spin_lock(&fc->lock);
+	bucket = rcu_dereference_protected(fc->curr_bucket, 1);
+	count = atomic_read(&bucket->count);
+	WARN_ON(count < 1);
+	/* No outstanding writes? */
+	if (count == 1) {
+		spin_unlock(&fc->lock);
+		kfree(new_bucket);
+		return;
+	}
+
+	/*
+	 * Completion of new bucket depends on completion of this bucket, so add
+	 * one more count.
+	 */
+	atomic_inc(&new_bucket->count);
+	rcu_assign_pointer(fc->curr_bucket, new_bucket);
+	spin_unlock(&fc->lock);
+	/*
+	 * Drop initial active count.  At this point if all writes in this and
+	 * ancestor buckets complete, the count will go to zero and this task
+	 * will be woken up.
+	 */
+	atomic_dec(&bucket->count);
+
+	wait_event(bucket->waitq, atomic_read(&bucket->count) == 0);
+
+	/* Drop temp count on descendant bucket */
+	fuse_sync_bucket_dec(new_bucket);
+	kfree_rcu(bucket, rcu);
+}
+
 static int fuse_sync_fs(struct super_block *sb, int wait)
 {
 	struct fuse_mount *fm = get_fuse_mount_super(sb);
@@ -527,6 +578,8 @@ static int fuse_sync_fs(struct super_block *sb, int wait)
 	if (!fc->sync_fs)
 		return 0;
 
+	fuse_sync_fs_writes(fc);
+
 	memset(&inarg, 0, sizeof(inarg));
 	args.in_numargs = 1;
 	args.in_args[0].size = sizeof(inarg);
@@ -572,38 +625,38 @@ static const struct fs_parameter_spec fuse_fs_parameters[] = {
 	{}
 };
 
-static int fuse_parse_param(struct fs_context *fc, struct fs_parameter *param)
+static int fuse_parse_param(struct fs_context *fsc, struct fs_parameter *param)
 {
 	struct fs_parse_result result;
-	struct fuse_fs_context *ctx = fc->fs_private;
+	struct fuse_fs_context *ctx = fsc->fs_private;
 	int opt;
 
-	if (fc->purpose == FS_CONTEXT_FOR_RECONFIGURE) {
+	if (fsc->purpose == FS_CONTEXT_FOR_RECONFIGURE) {
 		/*
 		 * Ignore options coming from mount(MS_REMOUNT) for backward
 		 * compatibility.
 		 */
-		if (fc->oldapi)
+		if (fsc->oldapi)
 			return 0;
 
-		return invalfc(fc, "No changes allowed in reconfigure");
+		return invalfc(fsc, "No changes allowed in reconfigure");
 	}
 
-	opt = fs_parse(fc, fuse_fs_parameters, param, &result);
+	opt = fs_parse(fsc, fuse_fs_parameters, param, &result);
 	if (opt < 0)
 		return opt;
 
 	switch (opt) {
 	case OPT_SOURCE:
-		if (fc->source)
-			return invalfc(fc, "Multiple sources specified");
-		fc->source = param->string;
+		if (fsc->source)
+			return invalfc(fsc, "Multiple sources specified");
+		fsc->source = param->string;
 		param->string = NULL;
 		break;
 
 	case OPT_SUBTYPE:
 		if (ctx->subtype)
-			return invalfc(fc, "Multiple subtypes specified");
+			return invalfc(fsc, "Multiple subtypes specified");
 		ctx->subtype = param->string;
 		param->string = NULL;
 		return 0;
@@ -615,22 +668,22 @@ static int fuse_parse_param(struct fs_context *fc, struct fs_parameter *param)
 
 	case OPT_ROOTMODE:
 		if (!fuse_valid_type(result.uint_32))
-			return invalfc(fc, "Invalid rootmode");
+			return invalfc(fsc, "Invalid rootmode");
 		ctx->rootmode = result.uint_32;
 		ctx->rootmode_present = true;
 		break;
 
 	case OPT_USER_ID:
-		ctx->user_id = make_kuid(fc->user_ns, result.uint_32);
+		ctx->user_id = make_kuid(fsc->user_ns, result.uint_32);
 		if (!uid_valid(ctx->user_id))
-			return invalfc(fc, "Invalid user_id");
+			return invalfc(fsc, "Invalid user_id");
 		ctx->user_id_present = true;
 		break;
 
 	case OPT_GROUP_ID:
-		ctx->group_id = make_kgid(fc->user_ns, result.uint_32);
+		ctx->group_id = make_kgid(fsc->user_ns, result.uint_32);
 		if (!gid_valid(ctx->group_id))
-			return invalfc(fc, "Invalid group_id");
+			return invalfc(fsc, "Invalid group_id");
 		ctx->group_id_present = true;
 		break;
 
@@ -648,7 +701,7 @@ static int fuse_parse_param(struct fs_context *fc, struct fs_parameter *param)
 
 	case OPT_BLKSIZE:
 		if (!ctx->is_bdev)
-			return invalfc(fc, "blksize only supported for fuseblk");
+			return invalfc(fsc, "blksize only supported for fuseblk");
 		ctx->blksize = result.uint_32;
 		break;
 
@@ -659,9 +712,9 @@ static int fuse_parse_param(struct fs_context *fc, struct fs_parameter *param)
 	return 0;
 }
 
-static void fuse_free_fc(struct fs_context *fc)
+static void fuse_free_fsc(struct fs_context *fsc)
 {
-	struct fuse_fs_context *ctx = fc->fs_private;
+	struct fuse_fs_context *ctx = fsc->fs_private;
 
 	if (ctx) {
 		kfree(ctx->subtype);
@@ -762,6 +815,7 @@ void fuse_conn_put(struct fuse_conn *fc)
 {
 	if (refcount_dec_and_test(&fc->count)) {
 		struct fuse_iqueue *fiq = &fc->iq;
+		struct fuse_sync_bucket *bucket;
 
 		if (IS_ENABLED(CONFIG_FUSE_DAX))
 			fuse_dax_conn_free(fc);
@@ -769,6 +823,11 @@ void fuse_conn_put(struct fuse_conn *fc)
 			fiq->ops->release(fiq);
 		put_pid_ns(fc->pid_ns);
 		put_user_ns(fc->user_ns);
+		bucket = rcu_dereference_protected(fc->curr_bucket, 1);
+		if (bucket) {
+			WARN_ON(atomic_read(&bucket->count) != 1);
+			kfree(bucket);
+		}
 		fc->release(fc);
 	}
 }
@@ -1417,6 +1476,7 @@ int fuse_fill_super_common(struct super_block *sb, struct fuse_fs_context *ctx)
 	if (sb->s_flags & SB_MANDLOCK)
 		goto err;
 
+	rcu_assign_pointer(fc->curr_bucket, fuse_sync_bucket_alloc());
 	fuse_sb_defaults(sb);
 
 	if (ctx->is_bdev) {
@@ -1508,34 +1568,33 @@ EXPORT_SYMBOL_GPL(fuse_fill_super_common);
 static int fuse_fill_super(struct super_block *sb, struct fs_context *fsc)
 {
 	struct fuse_fs_context *ctx = fsc->fs_private;
-	struct file *file;
 	int err;
 	struct fuse_conn *fc;
 	struct fuse_mount *fm;
 
-	err = -EINVAL;
-	file = fget(ctx->fd);
-	if (!file)
-		goto err;
+	if (!ctx->file || !ctx->rootmode_present ||
+	    !ctx->user_id_present || !ctx->group_id_present)
+		return -EINVAL;
 
 	/*
 	 * Require mount to happen from the same user namespace which
 	 * opened /dev/fuse to prevent potential attacks.
 	 */
-	if ((file->f_op != &fuse_dev_operations) ||
-	    (file->f_cred->user_ns != sb->s_user_ns))
-		goto err_fput;
-	ctx->fudptr = &file->private_data;
+	err = -EINVAL;
+	if ((ctx->file->f_op != &fuse_dev_operations) ||
+	    (ctx->file->f_cred->user_ns != sb->s_user_ns))
+		goto err;
+	ctx->fudptr = &ctx->file->private_data;
 
 	fc = kmalloc(sizeof(*fc), GFP_KERNEL);
 	err = -ENOMEM;
 	if (!fc)
-		goto err_fput;
+		goto err;
 
 	fm = kzalloc(sizeof(*fm), GFP_KERNEL);
 	if (!fm) {
 		kfree(fc);
-		goto err_fput;
+		goto err;
 	}
 
 	fuse_conn_init(fc, fm, sb->s_user_ns, &fuse_dev_fiq_ops, NULL);
@@ -1546,12 +1605,8 @@ static int fuse_fill_super(struct super_block *sb, struct fs_context *fsc)
 	err = fuse_fill_super_common(sb, ctx);
 	if (err)
 		goto err_put_conn;
-	/*
-	 * atomic_dec_and_test() in fput() provides the necessary
-	 * memory barrier for file->private_data to be visible on all
-	 * CPUs after this
-	 */
-	fput(file);
+	/* file->private_data shall be visible on all CPUs after this */
+	smp_mb();
 	fuse_send_init(get_fuse_mount_super(sb));
 	return 0;
 
@@ -1559,30 +1614,68 @@ static int fuse_fill_super(struct super_block *sb, struct fs_context *fsc)
 	fuse_conn_put(fc);
 	kfree(fm);
 	sb->s_fs_info = NULL;
- err_fput:
-	fput(file);
  err:
 	return err;
 }
 
-static int fuse_get_tree(struct fs_context *fc)
+/*
+ * This is the path where user supplied an already initialized fuse dev.  In
+ * this case never create a new super if the old one is gone.
+ */
+static int fuse_set_no_super(struct super_block *sb, struct fs_context *fsc)
 {
-	struct fuse_fs_context *ctx = fc->fs_private;
+	return -ENOTCONN;
+}
 
-	if (!ctx->fd_present || !ctx->rootmode_present ||
-	    !ctx->user_id_present || !ctx->group_id_present)
-		return -EINVAL;
+static int fuse_test_super(struct super_block *sb, struct fs_context *fsc)
+{
 
-#ifdef CONFIG_BLOCK
-	if (ctx->is_bdev)
-		return get_tree_bdev(fc, fuse_fill_super);
-#endif
+	return fsc->sget_key == get_fuse_conn_super(sb);
+}
+
+static int fuse_get_tree(struct fs_context *fsc)
+{
+	struct fuse_fs_context *ctx = fsc->fs_private;
+	struct fuse_dev *fud;
+	struct super_block *sb;
+	int err;
 
-	return get_tree_nodev(fc, fuse_fill_super);
+	if (ctx->fd_present)
+		ctx->file = fget(ctx->fd);
+
+	if (IS_ENABLED(CONFIG_BLOCK) && ctx->is_bdev) {
+		err = get_tree_bdev(fsc, fuse_fill_super);
+		goto out_fput;
+	}
+	/*
+	 * While block dev mount can be initialized with a dummy device fd
+	 * (found by device name), normal fuse mounts can't
+	 */
+	if (!ctx->file)
+		return -EINVAL;
+
+	/*
+	 * Allow creating a fuse mount with an already initialized fuse
+	 * connection
+	 */
+	fud = READ_ONCE(ctx->file->private_data);
+	if (ctx->file->f_op == &fuse_dev_operations && fud) {
+		fsc->sget_key = fud->fc;
+		sb = sget_fc(fsc, fuse_test_super, fuse_set_no_super);
+		err = PTR_ERR_OR_ZERO(sb);
+		if (!IS_ERR(sb))
+			fsc->root = dget(sb->s_root);
+	} else {
+		err = get_tree_nodev(fsc, fuse_fill_super);
+	}
+out_fput:
+	if (ctx->file)
+		fput(ctx->file);
+	return err;
 }
 
 static const struct fs_context_operations fuse_context_ops = {
-	.free		= fuse_free_fc,
+	.free		= fuse_free_fsc,
 	.parse_param	= fuse_parse_param,
 	.reconfigure	= fuse_reconfigure,
 	.get_tree	= fuse_get_tree,
@@ -1591,7 +1684,7 @@ static const struct fs_context_operations fuse_context_ops = {
 /*
  * Set up the filesystem mount context.
  */
-static int fuse_init_fs_context(struct fs_context *fc)
+static int fuse_init_fs_context(struct fs_context *fsc)
 {
 	struct fuse_fs_context *ctx;
 
@@ -1604,14 +1697,14 @@ static int fuse_init_fs_context(struct fs_context *fc)
 	ctx->legacy_opts_show = true;
 
 #ifdef CONFIG_BLOCK
-	if (fc->fs_type == &fuseblk_fs_type) {
+	if (fsc->fs_type == &fuseblk_fs_type) {
 		ctx->is_bdev = true;
 		ctx->destroy = true;
 	}
 #endif
 
-	fc->fs_private = ctx;
-	fc->ops = &fuse_context_ops;
+	fsc->fs_private = ctx;
+	fsc->ops = &fuse_context_ops;
 	return 0;
 }
 
diff --git a/fs/fuse/virtio_fs.c b/fs/fuse/virtio_fs.c
index 8f52cdaa8445..0ad89c6629d7 100644
--- a/fs/fuse/virtio_fs.c
+++ b/fs/fuse/virtio_fs.c
@@ -97,14 +97,14 @@ static const struct fs_parameter_spec virtio_fs_parameters[] = {
 	{}
 };
 
-static int virtio_fs_parse_param(struct fs_context *fc,
+static int virtio_fs_parse_param(struct fs_context *fsc,
 				 struct fs_parameter *param)
 {
 	struct fs_parse_result result;
-	struct fuse_fs_context *ctx = fc->fs_private;
+	struct fuse_fs_context *ctx = fsc->fs_private;
 	int opt;
 
-	opt = fs_parse(fc, virtio_fs_parameters, param, &result);
+	opt = fs_parse(fsc, virtio_fs_parameters, param, &result);
 	if (opt < 0)
 		return opt;
 
@@ -119,9 +119,9 @@ static int virtio_fs_parse_param(struct fs_context *fc,
 	return 0;
 }
 
-static void virtio_fs_free_fc(struct fs_context *fc)
+static void virtio_fs_free_fsc(struct fs_context *fsc)
 {
-	struct fuse_fs_context *ctx = fc->fs_private;
+	struct fuse_fs_context *ctx = fsc->fs_private;
 
 	kfree(ctx);
 }
@@ -1488,7 +1488,7 @@ out_err:
 }
 
 static const struct fs_context_operations virtio_fs_context_ops = {
-	.free		= virtio_fs_free_fc,
+	.free		= virtio_fs_free_fsc,
 	.parse_param	= virtio_fs_parse_param,
 	.get_tree	= virtio_fs_get_tree,
 };
diff --git a/fs/gfs2/acl.c b/fs/gfs2/acl.c
index 9165d70ead07..734d1f05d823 100644
--- a/fs/gfs2/acl.c
+++ b/fs/gfs2/acl.c
@@ -57,13 +57,16 @@ static struct posix_acl *__gfs2_get_acl(struct inode *inode, int type)
 	return acl;
 }
 
-struct posix_acl *gfs2_get_acl(struct inode *inode, int type)
+struct posix_acl *gfs2_get_acl(struct inode *inode, int type, bool rcu)
 {
 	struct gfs2_inode *ip = GFS2_I(inode);
 	struct gfs2_holder gh;
 	bool need_unlock = false;
 	struct posix_acl *acl;
 
+	if (rcu)
+		return ERR_PTR(-ECHILD);
+
 	if (!gfs2_glock_is_locked_by_me(ip->i_gl)) {
 		int ret = gfs2_glock_nq_init(ip->i_gl, LM_ST_SHARED,
 					     LM_FLAG_ANY, &gh);
diff --git a/fs/gfs2/acl.h b/fs/gfs2/acl.h
index eccc6a43326c..cd180ca7c959 100644
--- a/fs/gfs2/acl.h
+++ b/fs/gfs2/acl.h
@@ -11,7 +11,7 @@
 
 #define GFS2_ACL_MAX_ENTRIES(sdp) ((300 << (sdp)->sd_sb.sb_bsize_shift) >> 12)
 
-extern struct posix_acl *gfs2_get_acl(struct inode *inode, int type);
+extern struct posix_acl *gfs2_get_acl(struct inode *inode, int type, bool rcu);
 extern int __gfs2_set_acl(struct inode *inode, struct posix_acl *acl, int type);
 extern int gfs2_set_acl(struct user_namespace *mnt_userns, struct inode *inode,
 			struct posix_acl *acl, int type);
diff --git a/fs/gfs2/inode.c b/fs/gfs2/inode.c
index 6e15434b23ac..3130f85d2b3f 100644
--- a/fs/gfs2/inode.c
+++ b/fs/gfs2/inode.c
@@ -1985,8 +1985,8 @@ static int gfs2_setattr(struct user_namespace *mnt_userns,
 	if (error)
 		goto out;
 
-	error = -EPERM;
-	if (IS_IMMUTABLE(inode) || IS_APPEND(inode))
+	error = may_setattr(&init_user_ns, inode, attr->ia_valid);
+	if (error)
 		goto error;
 
 	error = setattr_prepare(&init_user_ns, dentry, attr);
diff --git a/fs/hostfs/hostfs_kern.c b/fs/hostfs/hostfs_kern.c
index 7d0c3dbb2898..d5c9d886cd9f 100644
--- a/fs/hostfs/hostfs_kern.c
+++ b/fs/hostfs/hostfs_kern.c
@@ -381,6 +381,7 @@ static int hostfs_fsync(struct file *file, loff_t start, loff_t end,
 static const struct file_operations hostfs_file_fops = {
 	.llseek		= generic_file_llseek,
 	.splice_read	= generic_file_splice_read,
+	.splice_write	= iter_file_splice_write,
 	.read_iter	= generic_file_read_iter,
 	.write_iter	= generic_file_write_iter,
 	.mmap		= generic_file_mmap,
diff --git a/fs/inode.c b/fs/inode.c
index 84c528cd1955..37710ca863b5 100644
--- a/fs/inode.c
+++ b/fs/inode.c
@@ -770,7 +770,7 @@ static enum lru_status inode_lru_isolate(struct list_head *item,
 		return LRU_ROTATE;
 	}
 
-	if (inode_has_buffers(inode) || inode->i_data.nrpages) {
+	if (inode_has_buffers(inode) || !mapping_empty(&inode->i_data)) {
 		__iget(inode);
 		spin_unlock(&inode->i_lock);
 		spin_unlock(lru_lock);
diff --git a/fs/internal.h b/fs/internal.h
index 68a2ae029a27..3cd065c8a66b 100644
--- a/fs/internal.h
+++ b/fs/internal.h
@@ -18,7 +18,7 @@ struct user_namespace;
 struct pipe_inode_info;
 
 /*
- * block_dev.c
+ * block/bdev.c
  */
 #ifdef CONFIG_BLOCK
 extern void __init bdev_cache_init(void);
diff --git a/fs/io-wq.c b/fs/io-wq.c
index cd9bd095fb1b..6c55362c1f99 100644
--- a/fs/io-wq.c
+++ b/fs/io-wq.c
@@ -23,8 +23,7 @@ enum {
 	IO_WORKER_F_UP		= 1,	/* up and active */
 	IO_WORKER_F_RUNNING	= 2,	/* account as running */
 	IO_WORKER_F_FREE	= 4,	/* worker on free list */
-	IO_WORKER_F_FIXED	= 8,	/* static idle worker */
-	IO_WORKER_F_BOUND	= 16,	/* is doing bounded work */
+	IO_WORKER_F_BOUND	= 8,	/* is doing bounded work */
 };
 
 enum {
@@ -32,7 +31,7 @@ enum {
 };
 
 enum {
-	IO_WQE_FLAG_STALLED	= 1,	/* stalled on hash */
+	IO_ACCT_STALLED_BIT	= 0,	/* stalled on hash */
 };
 
 /*
@@ -55,7 +54,10 @@ struct io_worker {
 	struct callback_head create_work;
 	int create_index;
 
-	struct rcu_head rcu;
+	union {
+		struct rcu_head rcu;
+		struct work_struct work;
+	};
 };
 
 #if BITS_PER_LONG == 64
@@ -71,25 +73,24 @@ struct io_wqe_acct {
 	unsigned max_workers;
 	int index;
 	atomic_t nr_running;
+	struct io_wq_work_list work_list;
+	unsigned long flags;
 };
 
 enum {
 	IO_WQ_ACCT_BOUND,
 	IO_WQ_ACCT_UNBOUND,
+	IO_WQ_ACCT_NR,
 };
 
 /*
  * Per-node worker thread pool
  */
 struct io_wqe {
-	struct {
-		raw_spinlock_t lock;
-		struct io_wq_work_list work_list;
-		unsigned flags;
-	} ____cacheline_aligned_in_smp;
+	raw_spinlock_t lock;
+	struct io_wqe_acct acct[2];
 
 	int node;
-	struct io_wqe_acct acct[2];
 
 	struct hlist_nulls_head free_list;
 	struct list_head all_list;
@@ -133,8 +134,11 @@ struct io_cb_cancel_data {
 	bool cancel_all;
 };
 
-static void create_io_worker(struct io_wq *wq, struct io_wqe *wqe, int index, bool first);
+static bool create_io_worker(struct io_wq *wq, struct io_wqe *wqe, int index);
 static void io_wqe_dec_running(struct io_worker *worker);
+static bool io_acct_cancel_pending_work(struct io_wqe *wqe,
+					struct io_wqe_acct *acct,
+					struct io_cb_cancel_data *match);
 
 static bool io_worker_get(struct io_worker *worker)
 {
@@ -195,11 +199,10 @@ static void io_worker_exit(struct io_worker *worker)
 	do_exit(0);
 }
 
-static inline bool io_wqe_run_queue(struct io_wqe *wqe)
-	__must_hold(wqe->lock)
+static inline bool io_acct_run_queue(struct io_wqe_acct *acct)
 {
-	if (!wq_list_empty(&wqe->work_list) &&
-	    !(wqe->flags & IO_WQE_FLAG_STALLED))
+	if (!wq_list_empty(&acct->work_list) &&
+	    !test_bit(IO_ACCT_STALLED_BIT, &acct->flags))
 		return true;
 	return false;
 }
@@ -208,7 +211,8 @@ static inline bool io_wqe_run_queue(struct io_wqe *wqe)
  * Check head of free list for an available worker. If one isn't available,
  * caller must create one.
  */
-static bool io_wqe_activate_free_worker(struct io_wqe *wqe)
+static bool io_wqe_activate_free_worker(struct io_wqe *wqe,
+					struct io_wqe_acct *acct)
 	__must_hold(RCU)
 {
 	struct hlist_nulls_node *n;
@@ -222,6 +226,10 @@ static bool io_wqe_activate_free_worker(struct io_wqe *wqe)
 	hlist_nulls_for_each_entry_rcu(worker, n, &wqe->free_list, nulls_node) {
 		if (!io_worker_get(worker))
 			continue;
+		if (io_wqe_get_acct(worker) != acct) {
+			io_worker_release(worker);
+			continue;
+		}
 		if (wake_up_process(worker->task)) {
 			io_worker_release(worker);
 			return true;
@@ -236,9 +244,9 @@ static bool io_wqe_activate_free_worker(struct io_wqe *wqe)
  * We need a worker. If we find a free one, we're good. If not, and we're
  * below the max number of workers, create one.
  */
-static void io_wqe_wake_worker(struct io_wqe *wqe, struct io_wqe_acct *acct)
+static bool io_wqe_create_worker(struct io_wqe *wqe, struct io_wqe_acct *acct)
 {
-	bool ret;
+	bool do_create = false;
 
 	/*
 	 * Most likely an attempt to queue unbounded work on an io_wq that
@@ -247,27 +255,19 @@ static void io_wqe_wake_worker(struct io_wqe *wqe, struct io_wqe_acct *acct)
 	if (unlikely(!acct->max_workers))
 		pr_warn_once("io-wq is not configured for unbound workers");
 
-	rcu_read_lock();
-	ret = io_wqe_activate_free_worker(wqe);
-	rcu_read_unlock();
-
-	if (!ret) {
-		bool do_create = false, first = false;
-
-		raw_spin_lock(&wqe->lock);
-		if (acct->nr_workers < acct->max_workers) {
-			if (!acct->nr_workers)
-				first = true;
-			acct->nr_workers++;
-			do_create = true;
-		}
-		raw_spin_unlock(&wqe->lock);
-		if (do_create) {
-			atomic_inc(&acct->nr_running);
-			atomic_inc(&wqe->wq->worker_refs);
-			create_io_worker(wqe->wq, wqe, acct->index, first);
-		}
+	raw_spin_lock(&wqe->lock);
+	if (acct->nr_workers < acct->max_workers) {
+		acct->nr_workers++;
+		do_create = true;
 	}
+	raw_spin_unlock(&wqe->lock);
+	if (do_create) {
+		atomic_inc(&acct->nr_running);
+		atomic_inc(&wqe->wq->worker_refs);
+		return create_io_worker(wqe->wq, wqe, acct->index);
+	}
+
+	return true;
 }
 
 static void io_wqe_inc_running(struct io_worker *worker)
@@ -283,7 +283,7 @@ static void create_worker_cb(struct callback_head *cb)
 	struct io_wq *wq;
 	struct io_wqe *wqe;
 	struct io_wqe_acct *acct;
-	bool do_create = false, first = false;
+	bool do_create = false;
 
 	worker = container_of(cb, struct io_worker, create_work);
 	wqe = worker->wqe;
@@ -291,14 +291,12 @@ static void create_worker_cb(struct callback_head *cb)
 	acct = &wqe->acct[worker->create_index];
 	raw_spin_lock(&wqe->lock);
 	if (acct->nr_workers < acct->max_workers) {
-		if (!acct->nr_workers)
-			first = true;
 		acct->nr_workers++;
 		do_create = true;
 	}
 	raw_spin_unlock(&wqe->lock);
 	if (do_create) {
-		create_io_worker(wq, wqe, worker->create_index, first);
+		create_io_worker(wq, wqe, worker->create_index);
 	} else {
 		atomic_dec(&acct->nr_running);
 		io_worker_ref_put(wq);
@@ -307,9 +305,11 @@ static void create_worker_cb(struct callback_head *cb)
 	io_worker_release(worker);
 }
 
-static void io_queue_worker_create(struct io_wqe *wqe, struct io_worker *worker,
-				   struct io_wqe_acct *acct)
+static bool io_queue_worker_create(struct io_worker *worker,
+				   struct io_wqe_acct *acct,
+				   task_work_func_t func)
 {
+	struct io_wqe *wqe = worker->wqe;
 	struct io_wq *wq = wqe->wq;
 
 	/* raced with exit, just ignore create call */
@@ -327,16 +327,17 @@ static void io_queue_worker_create(struct io_wqe *wqe, struct io_worker *worker,
 	    test_and_set_bit_lock(0, &worker->create_state))
 		goto fail_release;
 
-	init_task_work(&worker->create_work, create_worker_cb);
+	init_task_work(&worker->create_work, func);
 	worker->create_index = acct->index;
 	if (!task_work_add(wq->task, &worker->create_work, TWA_SIGNAL))
-		return;
+		return true;
 	clear_bit_unlock(0, &worker->create_state);
 fail_release:
 	io_worker_release(worker);
 fail:
 	atomic_dec(&acct->nr_running);
 	io_worker_ref_put(wq);
+	return false;
 }
 
 static void io_wqe_dec_running(struct io_worker *worker)
@@ -348,10 +349,10 @@ static void io_wqe_dec_running(struct io_worker *worker)
 	if (!(worker->flags & IO_WORKER_F_UP))
 		return;
 
-	if (atomic_dec_and_test(&acct->nr_running) && io_wqe_run_queue(wqe)) {
+	if (atomic_dec_and_test(&acct->nr_running) && io_acct_run_queue(acct)) {
 		atomic_inc(&acct->nr_running);
 		atomic_inc(&wqe->wq->worker_refs);
-		io_queue_worker_create(wqe, worker, acct);
+		io_queue_worker_create(worker, acct, create_worker_cb);
 	}
 }
 
@@ -363,29 +364,10 @@ static void __io_worker_busy(struct io_wqe *wqe, struct io_worker *worker,
 			     struct io_wq_work *work)
 	__must_hold(wqe->lock)
 {
-	bool worker_bound, work_bound;
-
-	BUILD_BUG_ON((IO_WQ_ACCT_UNBOUND ^ IO_WQ_ACCT_BOUND) != 1);
-
 	if (worker->flags & IO_WORKER_F_FREE) {
 		worker->flags &= ~IO_WORKER_F_FREE;
 		hlist_nulls_del_init_rcu(&worker->nulls_node);
 	}
-
-	/*
-	 * If worker is moving from bound to unbound (or vice versa), then
-	 * ensure we update the running accounting.
-	 */
-	worker_bound = (worker->flags & IO_WORKER_F_BOUND) != 0;
-	work_bound = (work->flags & IO_WQ_WORK_UNBOUND) == 0;
-	if (worker_bound != work_bound) {
-		int index = work_bound ? IO_WQ_ACCT_UNBOUND : IO_WQ_ACCT_BOUND;
-		io_wqe_dec_running(worker);
-		worker->flags ^= IO_WORKER_F_BOUND;
-		wqe->acct[index].nr_workers--;
-		wqe->acct[index ^ 1].nr_workers++;
-		io_wqe_inc_running(worker);
-	 }
 }
 
 /*
@@ -413,7 +395,7 @@ static void io_wait_on_hash(struct io_wqe *wqe, unsigned int hash)
 {
 	struct io_wq *wq = wqe->wq;
 
-	spin_lock(&wq->hash->wait.lock);
+	spin_lock_irq(&wq->hash->wait.lock);
 	if (list_empty(&wqe->wait.entry)) {
 		__add_wait_queue(&wq->hash->wait, &wqe->wait);
 		if (!test_bit(hash, &wq->hash->map)) {
@@ -421,48 +403,26 @@ static void io_wait_on_hash(struct io_wqe *wqe, unsigned int hash)
 			list_del_init(&wqe->wait.entry);
 		}
 	}
-	spin_unlock(&wq->hash->wait.lock);
-}
-
-/*
- * We can always run the work if the worker is currently the same type as
- * the work (eg both are bound, or both are unbound). If they are not the
- * same, only allow it if incrementing the worker count would be allowed.
- */
-static bool io_worker_can_run_work(struct io_worker *worker,
-				   struct io_wq_work *work)
-{
-	struct io_wqe_acct *acct;
-
-	if (!(worker->flags & IO_WORKER_F_BOUND) !=
-	    !(work->flags & IO_WQ_WORK_UNBOUND))
-		return true;
-
-	/* not the same type, check if we'd go over the limit */
-	acct = io_work_get_acct(worker->wqe, work);
-	return acct->nr_workers < acct->max_workers;
+	spin_unlock_irq(&wq->hash->wait.lock);
 }
 
-static struct io_wq_work *io_get_next_work(struct io_wqe *wqe,
-					   struct io_worker *worker,
-					   bool *stalled)
+static struct io_wq_work *io_get_next_work(struct io_wqe_acct *acct,
+					   struct io_worker *worker)
 	__must_hold(wqe->lock)
 {
 	struct io_wq_work_node *node, *prev;
 	struct io_wq_work *work, *tail;
 	unsigned int stall_hash = -1U;
+	struct io_wqe *wqe = worker->wqe;
 
-	wq_list_for_each(node, prev, &wqe->work_list) {
+	wq_list_for_each(node, prev, &acct->work_list) {
 		unsigned int hash;
 
 		work = container_of(node, struct io_wq_work, list);
 
-		if (!io_worker_can_run_work(worker, work))
-			break;
-
 		/* not hashed, can run anytime */
 		if (!io_wq_is_hashed(work)) {
-			wq_list_del(&wqe->work_list, node, prev);
+			wq_list_del(&acct->work_list, node, prev);
 			return work;
 		}
 
@@ -473,7 +433,7 @@ static struct io_wq_work *io_get_next_work(struct io_wqe *wqe,
 		/* hashed, can run if not already running */
 		if (!test_and_set_bit(hash, &wqe->wq->hash->map)) {
 			wqe->hash_tail[hash] = NULL;
-			wq_list_cut(&wqe->work_list, &tail->list, prev);
+			wq_list_cut(&acct->work_list, &tail->list, prev);
 			return work;
 		}
 		if (stall_hash == -1U)
@@ -483,10 +443,14 @@ static struct io_wq_work *io_get_next_work(struct io_wqe *wqe,
 	}
 
 	if (stall_hash != -1U) {
+		/*
+		 * Set this before dropping the lock to avoid racing with new
+		 * work being added and clearing the stalled bit.
+		 */
+		set_bit(IO_ACCT_STALLED_BIT, &acct->flags);
 		raw_spin_unlock(&wqe->lock);
 		io_wait_on_hash(wqe, stall_hash);
 		raw_spin_lock(&wqe->lock);
-		*stalled = true;
 	}
 
 	return NULL;
@@ -520,13 +484,13 @@ static void io_wqe_enqueue(struct io_wqe *wqe, struct io_wq_work *work);
 static void io_worker_handle_work(struct io_worker *worker)
 	__releases(wqe->lock)
 {
+	struct io_wqe_acct *acct = io_wqe_get_acct(worker);
 	struct io_wqe *wqe = worker->wqe;
 	struct io_wq *wq = wqe->wq;
 	bool do_kill = test_bit(IO_WQ_BIT_EXIT, &wq->state);
 
 	do {
 		struct io_wq_work *work;
-		bool stalled;
 get_next:
 		/*
 		 * If we got some work, mark us as busy. If we didn't, but
@@ -535,12 +499,9 @@ get_next:
 		 * can't make progress, any work completion or insertion will
 		 * clear the stalled flag.
 		 */
-		stalled = false;
-		work = io_get_next_work(wqe, worker, &stalled);
+		work = io_get_next_work(acct, worker);
 		if (work)
 			__io_worker_busy(wqe, worker, work);
-		else if (stalled)
-			wqe->flags |= IO_WQE_FLAG_STALLED;
 
 		raw_spin_unlock(&wqe->lock);
 		if (!work)
@@ -572,10 +533,10 @@ get_next:
 
 			if (hash != -1U && !next_hashed) {
 				clear_bit(hash, &wq->hash->map);
+				clear_bit(IO_ACCT_STALLED_BIT, &acct->flags);
 				if (wq_has_sleeper(&wq->hash->wait))
 					wake_up(&wq->hash->wait);
 				raw_spin_lock(&wqe->lock);
-				wqe->flags &= ~IO_WQE_FLAG_STALLED;
 				/* skip unnecessary unlock-lock wqe->lock */
 				if (!work)
 					goto get_next;
@@ -590,8 +551,10 @@ get_next:
 static int io_wqe_worker(void *data)
 {
 	struct io_worker *worker = data;
+	struct io_wqe_acct *acct = io_wqe_get_acct(worker);
 	struct io_wqe *wqe = worker->wqe;
 	struct io_wq *wq = wqe->wq;
+	bool last_timeout = false;
 	char buf[TASK_COMM_LEN];
 
 	worker->flags |= (IO_WORKER_F_UP | IO_WORKER_F_RUNNING);
@@ -605,10 +568,17 @@ static int io_wqe_worker(void *data)
 		set_current_state(TASK_INTERRUPTIBLE);
 loop:
 		raw_spin_lock(&wqe->lock);
-		if (io_wqe_run_queue(wqe)) {
+		if (io_acct_run_queue(acct)) {
 			io_worker_handle_work(worker);
 			goto loop;
 		}
+		/* timed out, exit unless we're the last worker */
+		if (last_timeout && acct->nr_workers > 1) {
+			raw_spin_unlock(&wqe->lock);
+			__set_current_state(TASK_RUNNING);
+			break;
+		}
+		last_timeout = false;
 		__io_worker_idle(wqe, worker);
 		raw_spin_unlock(&wqe->lock);
 		if (io_flush_signals())
@@ -619,13 +589,11 @@ loop:
 
 			if (!get_signal(&ksig))
 				continue;
-			break;
-		}
-		if (ret)
+			if (fatal_signal_pending(current))
+				break;
 			continue;
-		/* timed out, exit unless we're the fixed worker */
-		if (!(worker->flags & IO_WORKER_F_FIXED))
-			break;
+		}
+		last_timeout = !ret;
 	}
 
 	if (test_bit(IO_WQ_BIT_EXIT, &wq->state)) {
@@ -676,51 +644,131 @@ void io_wq_worker_sleeping(struct task_struct *tsk)
 	raw_spin_unlock(&worker->wqe->lock);
 }
 
-static void create_io_worker(struct io_wq *wq, struct io_wqe *wqe, int index, bool first)
+static void io_init_new_worker(struct io_wqe *wqe, struct io_worker *worker,
+			       struct task_struct *tsk)
+{
+	tsk->pf_io_worker = worker;
+	worker->task = tsk;
+	set_cpus_allowed_ptr(tsk, wqe->cpu_mask);
+	tsk->flags |= PF_NO_SETAFFINITY;
+
+	raw_spin_lock(&wqe->lock);
+	hlist_nulls_add_head_rcu(&worker->nulls_node, &wqe->free_list);
+	list_add_tail_rcu(&worker->all_list, &wqe->all_list);
+	worker->flags |= IO_WORKER_F_FREE;
+	raw_spin_unlock(&wqe->lock);
+	wake_up_new_task(tsk);
+}
+
+static bool io_wq_work_match_all(struct io_wq_work *work, void *data)
+{
+	return true;
+}
+
+static inline bool io_should_retry_thread(long err)
+{
+	switch (err) {
+	case -EAGAIN:
+	case -ERESTARTSYS:
+	case -ERESTARTNOINTR:
+	case -ERESTARTNOHAND:
+		return true;
+	default:
+		return false;
+	}
+}
+
+static void create_worker_cont(struct callback_head *cb)
 {
-	struct io_wqe_acct *acct = &wqe->acct[index];
 	struct io_worker *worker;
 	struct task_struct *tsk;
+	struct io_wqe *wqe;
 
-	__set_current_state(TASK_RUNNING);
+	worker = container_of(cb, struct io_worker, create_work);
+	clear_bit_unlock(0, &worker->create_state);
+	wqe = worker->wqe;
+	tsk = create_io_thread(io_wqe_worker, worker, wqe->node);
+	if (!IS_ERR(tsk)) {
+		io_init_new_worker(wqe, worker, tsk);
+		io_worker_release(worker);
+		return;
+	} else if (!io_should_retry_thread(PTR_ERR(tsk))) {
+		struct io_wqe_acct *acct = io_wqe_get_acct(worker);
 
-	worker = kzalloc_node(sizeof(*worker), GFP_KERNEL, wqe->node);
-	if (!worker)
-		goto fail;
+		atomic_dec(&acct->nr_running);
+		raw_spin_lock(&wqe->lock);
+		acct->nr_workers--;
+		if (!acct->nr_workers) {
+			struct io_cb_cancel_data match = {
+				.fn		= io_wq_work_match_all,
+				.cancel_all	= true,
+			};
 
-	refcount_set(&worker->ref, 1);
-	worker->nulls_node.pprev = NULL;
-	worker->wqe = wqe;
-	spin_lock_init(&worker->lock);
-	init_completion(&worker->ref_done);
+			while (io_acct_cancel_pending_work(wqe, acct, &match))
+				raw_spin_lock(&wqe->lock);
+		}
+		raw_spin_unlock(&wqe->lock);
+		io_worker_ref_put(wqe->wq);
+		kfree(worker);
+		return;
+	}
 
-	tsk = create_io_thread(io_wqe_worker, worker, wqe->node);
-	if (IS_ERR(tsk)) {
+	/* re-create attempts grab a new worker ref, drop the existing one */
+	io_worker_release(worker);
+	schedule_work(&worker->work);
+}
+
+static void io_workqueue_create(struct work_struct *work)
+{
+	struct io_worker *worker = container_of(work, struct io_worker, work);
+	struct io_wqe_acct *acct = io_wqe_get_acct(worker);
+
+	if (!io_queue_worker_create(worker, acct, create_worker_cont)) {
+		clear_bit_unlock(0, &worker->create_state);
+		io_worker_release(worker);
 		kfree(worker);
+	}
+}
+
+static bool create_io_worker(struct io_wq *wq, struct io_wqe *wqe, int index)
+{
+	struct io_wqe_acct *acct = &wqe->acct[index];
+	struct io_worker *worker;
+	struct task_struct *tsk;
+
+	__set_current_state(TASK_RUNNING);
+
+	worker = kzalloc_node(sizeof(*worker), GFP_KERNEL, wqe->node);
+	if (!worker) {
 fail:
 		atomic_dec(&acct->nr_running);
 		raw_spin_lock(&wqe->lock);
 		acct->nr_workers--;
 		raw_spin_unlock(&wqe->lock);
 		io_worker_ref_put(wq);
-		return;
+		return false;
 	}
 
-	tsk->pf_io_worker = worker;
-	worker->task = tsk;
-	set_cpus_allowed_ptr(tsk, wqe->cpu_mask);
-	tsk->flags |= PF_NO_SETAFFINITY;
+	refcount_set(&worker->ref, 1);
+	worker->wqe = wqe;
+	spin_lock_init(&worker->lock);
+	init_completion(&worker->ref_done);
 
-	raw_spin_lock(&wqe->lock);
-	hlist_nulls_add_head_rcu(&worker->nulls_node, &wqe->free_list);
-	list_add_tail_rcu(&worker->all_list, &wqe->all_list);
-	worker->flags |= IO_WORKER_F_FREE;
 	if (index == IO_WQ_ACCT_BOUND)
 		worker->flags |= IO_WORKER_F_BOUND;
-	if (first && (worker->flags & IO_WORKER_F_BOUND))
-		worker->flags |= IO_WORKER_F_FIXED;
-	raw_spin_unlock(&wqe->lock);
-	wake_up_new_task(tsk);
+
+	tsk = create_io_thread(io_wqe_worker, worker, wqe->node);
+	if (!IS_ERR(tsk)) {
+		io_init_new_worker(wqe, worker, tsk);
+	} else if (!io_should_retry_thread(PTR_ERR(tsk))) {
+		kfree(worker);
+		goto fail;
+	} else {
+		INIT_WORK(&worker->work, io_workqueue_create);
+		schedule_work(&worker->work);
+	}
+
+	return true;
 }
 
 /*
@@ -755,11 +803,6 @@ static bool io_wq_worker_wake(struct io_worker *worker, void *data)
 	return false;
 }
 
-static bool io_wq_work_match_all(struct io_wq_work *work, void *data)
-{
-	return true;
-}
-
 static void io_run_cancel(struct io_wq_work *work, struct io_wqe *wqe)
 {
 	struct io_wq *wq = wqe->wq;
@@ -773,12 +816,13 @@ static void io_run_cancel(struct io_wq_work *work, struct io_wqe *wqe)
 
 static void io_wqe_insert_work(struct io_wqe *wqe, struct io_wq_work *work)
 {
+	struct io_wqe_acct *acct = io_work_get_acct(wqe, work);
 	unsigned int hash;
 	struct io_wq_work *tail;
 
 	if (!io_wq_is_hashed(work)) {
 append:
-		wq_list_add_tail(&work->list, &wqe->work_list);
+		wq_list_add_tail(&work->list, &acct->work_list);
 		return;
 	}
 
@@ -788,13 +832,19 @@ append:
 	if (!tail)
 		goto append;
 
-	wq_list_add_after(&work->list, &tail->list, &wqe->work_list);
+	wq_list_add_after(&work->list, &tail->list, &acct->work_list);
+}
+
+static bool io_wq_work_match_item(struct io_wq_work *work, void *data)
+{
+	return work == data;
 }
 
 static void io_wqe_enqueue(struct io_wqe *wqe, struct io_wq_work *work)
 {
 	struct io_wqe_acct *acct = io_work_get_acct(wqe, work);
-	bool do_wake;
+	unsigned work_flags = work->flags;
+	bool do_create;
 
 	/*
 	 * If io-wq is exiting for this task, or if the request has explicitly
@@ -808,13 +858,36 @@ static void io_wqe_enqueue(struct io_wqe *wqe, struct io_wq_work *work)
 
 	raw_spin_lock(&wqe->lock);
 	io_wqe_insert_work(wqe, work);
-	wqe->flags &= ~IO_WQE_FLAG_STALLED;
-	do_wake = (work->flags & IO_WQ_WORK_CONCURRENT) ||
-			!atomic_read(&acct->nr_running);
+	clear_bit(IO_ACCT_STALLED_BIT, &acct->flags);
+
+	rcu_read_lock();
+	do_create = !io_wqe_activate_free_worker(wqe, acct);
+	rcu_read_unlock();
+
 	raw_spin_unlock(&wqe->lock);
 
-	if (do_wake)
-		io_wqe_wake_worker(wqe, acct);
+	if (do_create && ((work_flags & IO_WQ_WORK_CONCURRENT) ||
+	    !atomic_read(&acct->nr_running))) {
+		bool did_create;
+
+		did_create = io_wqe_create_worker(wqe, acct);
+		if (likely(did_create))
+			return;
+
+		raw_spin_lock(&wqe->lock);
+		/* fatal condition, failed to create the first worker */
+		if (!acct->nr_workers) {
+			struct io_cb_cancel_data match = {
+				.fn		= io_wq_work_match_item,
+				.data		= work,
+				.cancel_all	= false,
+			};
+
+			if (io_acct_cancel_pending_work(wqe, acct, &match))
+				raw_spin_lock(&wqe->lock);
+		}
+		raw_spin_unlock(&wqe->lock);
+	}
 }
 
 void io_wq_enqueue(struct io_wq *wq, struct io_wq_work *work)
@@ -859,6 +932,7 @@ static inline void io_wqe_remove_pending(struct io_wqe *wqe,
 					 struct io_wq_work *work,
 					 struct io_wq_work_node *prev)
 {
+	struct io_wqe_acct *acct = io_work_get_acct(wqe, work);
 	unsigned int hash = io_get_work_hash(work);
 	struct io_wq_work *prev_work = NULL;
 
@@ -870,18 +944,18 @@ static inline void io_wqe_remove_pending(struct io_wqe *wqe,
 		else
 			wqe->hash_tail[hash] = NULL;
 	}
-	wq_list_del(&wqe->work_list, &work->list, prev);
+	wq_list_del(&acct->work_list, &work->list, prev);
 }
 
-static void io_wqe_cancel_pending_work(struct io_wqe *wqe,
-				       struct io_cb_cancel_data *match)
+static bool io_acct_cancel_pending_work(struct io_wqe *wqe,
+					struct io_wqe_acct *acct,
+					struct io_cb_cancel_data *match)
+	__releases(wqe->lock)
 {
 	struct io_wq_work_node *node, *prev;
 	struct io_wq_work *work;
 
-retry:
-	raw_spin_lock(&wqe->lock);
-	wq_list_for_each(node, prev, &wqe->work_list) {
+	wq_list_for_each(node, prev, &acct->work_list) {
 		work = container_of(node, struct io_wq_work, list);
 		if (!match->fn(work, match->data))
 			continue;
@@ -889,11 +963,27 @@ retry:
 		raw_spin_unlock(&wqe->lock);
 		io_run_cancel(work, wqe);
 		match->nr_pending++;
-		if (!match->cancel_all)
-			return;
-
 		/* not safe to continue after unlock */
-		goto retry;
+		return true;
+	}
+
+	return false;
+}
+
+static void io_wqe_cancel_pending_work(struct io_wqe *wqe,
+				       struct io_cb_cancel_data *match)
+{
+	int i;
+retry:
+	raw_spin_lock(&wqe->lock);
+	for (i = 0; i < IO_WQ_ACCT_NR; i++) {
+		struct io_wqe_acct *acct = io_get_acct(wqe, i == 0);
+
+		if (io_acct_cancel_pending_work(wqe, acct, match)) {
+			if (match->cancel_all)
+				goto retry;
+			return;
+		}
 	}
 	raw_spin_unlock(&wqe->lock);
 }
@@ -954,18 +1044,24 @@ static int io_wqe_hash_wake(struct wait_queue_entry *wait, unsigned mode,
 			    int sync, void *key)
 {
 	struct io_wqe *wqe = container_of(wait, struct io_wqe, wait);
+	int i;
 
 	list_del_init(&wait->entry);
 
 	rcu_read_lock();
-	io_wqe_activate_free_worker(wqe);
+	for (i = 0; i < IO_WQ_ACCT_NR; i++) {
+		struct io_wqe_acct *acct = &wqe->acct[i];
+
+		if (test_and_clear_bit(IO_ACCT_STALLED_BIT, &acct->flags))
+			io_wqe_activate_free_worker(wqe, acct);
+	}
 	rcu_read_unlock();
 	return 1;
 }
 
 struct io_wq *io_wq_create(unsigned bounded, struct io_wq_data *data)
 {
-	int ret, node;
+	int ret, node, i;
 	struct io_wq *wq;
 
 	if (WARN_ON_ONCE(!data->free_work || !data->do_work))
@@ -1000,18 +1096,20 @@ struct io_wq *io_wq_create(unsigned bounded, struct io_wq_data *data)
 		cpumask_copy(wqe->cpu_mask, cpumask_of_node(node));
 		wq->wqes[node] = wqe;
 		wqe->node = alloc_node;
-		wqe->acct[IO_WQ_ACCT_BOUND].index = IO_WQ_ACCT_BOUND;
-		wqe->acct[IO_WQ_ACCT_UNBOUND].index = IO_WQ_ACCT_UNBOUND;
 		wqe->acct[IO_WQ_ACCT_BOUND].max_workers = bounded;
-		atomic_set(&wqe->acct[IO_WQ_ACCT_BOUND].nr_running, 0);
 		wqe->acct[IO_WQ_ACCT_UNBOUND].max_workers =
 					task_rlimit(current, RLIMIT_NPROC);
-		atomic_set(&wqe->acct[IO_WQ_ACCT_UNBOUND].nr_running, 0);
-		wqe->wait.func = io_wqe_hash_wake;
 		INIT_LIST_HEAD(&wqe->wait.entry);
+		wqe->wait.func = io_wqe_hash_wake;
+		for (i = 0; i < IO_WQ_ACCT_NR; i++) {
+			struct io_wqe_acct *acct = &wqe->acct[i];
+
+			acct->index = i;
+			atomic_set(&acct->nr_running, 0);
+			INIT_WQ_LIST(&acct->work_list);
+		}
 		wqe->wq = wq;
 		raw_spin_lock_init(&wqe->lock);
-		INIT_WQ_LIST(&wqe->work_list);
 		INIT_HLIST_NULLS_HEAD(&wqe->free_list, 0);
 		INIT_LIST_HEAD(&wqe->all_list);
 	}
@@ -1038,7 +1136,7 @@ static bool io_task_work_match(struct callback_head *cb, void *data)
 {
 	struct io_worker *worker;
 
-	if (cb->func != create_worker_cb)
+	if (cb->func != create_worker_cb && cb->func != create_worker_cont)
 		return false;
 	worker = container_of(cb, struct io_worker, create_work);
 	return worker->wqe->wq == data;
@@ -1059,9 +1157,14 @@ static void io_wq_exit_workers(struct io_wq *wq)
 
 	while ((cb = task_work_cancel_match(wq->task, io_task_work_match, wq)) != NULL) {
 		struct io_worker *worker;
+		struct io_wqe_acct *acct;
 
 		worker = container_of(cb, struct io_worker, create_work);
-		atomic_dec(&worker->wqe->acct[worker->create_index].nr_running);
+		acct = io_wqe_get_acct(worker);
+		atomic_dec(&acct->nr_running);
+		raw_spin_lock(&worker->wqe->lock);
+		acct->nr_workers--;
+		raw_spin_unlock(&worker->wqe->lock);
 		io_worker_ref_put(wq);
 		clear_bit_unlock(0, &worker->create_state);
 		io_worker_release(worker);
@@ -1193,7 +1296,7 @@ int io_wq_max_workers(struct io_wq *wq, int *new_count)
 	for_each_node(node) {
 		struct io_wqe_acct *acct;
 
-		for (i = 0; i < 2; i++) {
+		for (i = 0; i < IO_WQ_ACCT_NR; i++) {
 			acct = &wq->wqes[node]->acct[i];
 			prev = max_t(int, acct->max_workers, prev);
 			if (new_count[i])
diff --git a/fs/io_uring.c b/fs/io_uring.c
index 6f35b1285865..16fb7436043c 100644
--- a/fs/io_uring.c
+++ b/fs/io_uring.c
@@ -1021,6 +1021,7 @@ static const struct io_op_def io_op_defs[] = {
 	},
 	[IORING_OP_WRITE] = {
 		.needs_file		= 1,
+		.hash_reg_file		= 1,
 		.unbound_nonreg_file	= 1,
 		.pollout		= 1,
 		.plug			= 1,
@@ -1481,6 +1482,8 @@ static void io_kill_timeout(struct io_kiocb *req, int status)
 	struct io_timeout_data *io = req->async_data;
 
 	if (hrtimer_try_to_cancel(&io->timer) != -1) {
+		if (status)
+			req_set_fail(req);
 		atomic_set(&req->ctx->cq_timeouts,
 			atomic_read(&req->ctx->cq_timeouts) + 1);
 		list_del_init(&req->timeout.list);
@@ -1618,8 +1621,11 @@ static void io_cqring_ev_posted(struct io_ring_ctx *ctx)
 
 static void io_cqring_ev_posted_iopoll(struct io_ring_ctx *ctx)
 {
+	/* see waitqueue_active() comment */
+	smp_mb();
+
 	if (ctx->flags & IORING_SETUP_SQPOLL) {
-		if (wq_has_sleeper(&ctx->cq_wait))
+		if (waitqueue_active(&ctx->cq_wait))
 			wake_up_all(&ctx->cq_wait);
 	}
 	if (io_should_trigger_evfd(ctx))
@@ -1851,6 +1857,17 @@ static void io_req_complete_failed(struct io_kiocb *req, long res)
 	io_req_complete_post(req, res, 0);
 }
 
+static void io_req_complete_fail_submit(struct io_kiocb *req)
+{
+	/*
+	 * We don't submit, fail them all, for that replace hardlinks with
+	 * normal links. Extra REQ_F_LINK is tolerated.
+	 */
+	req->flags &= ~REQ_F_HARDLINK;
+	req->flags |= REQ_F_LINK;
+	io_req_complete_failed(req, req->result);
+}
+
 /*
  * Don't initialise the fields below on every allocation, but do that in
  * advance and keep them valid across allocations.
@@ -2119,6 +2136,9 @@ static void tctx_task_work(struct callback_head *cb)
 	while (1) {
 		struct io_wq_work_node *node;
 
+		if (!tctx->task_list.first && locked && ctx->submit_state.compl_nr)
+			io_submit_flush_completions(ctx);
+
 		spin_lock_irq(&tctx->task_lock);
 		node = tctx->task_list.first;
 		INIT_WQ_LIST(&tctx->task_list);
@@ -2673,7 +2693,7 @@ static void __io_complete_rw(struct io_kiocb *req, long res, long res2,
 {
 	if (__io_complete_rw_common(req, res))
 		return;
-	__io_req_complete(req, 0, req->result, io_put_rw_kbuf(req));
+	__io_req_complete(req, issue_flags, req->result, io_put_rw_kbuf(req));
 }
 
 static void io_complete_rw(struct kiocb *kiocb, long res, long res2)
@@ -3410,6 +3430,12 @@ static inline int io_iter_do_read(struct io_kiocb *req, struct iov_iter *iter)
 		return -EINVAL;
 }
 
+static bool need_read_all(struct io_kiocb *req)
+{
+	return req->flags & REQ_F_ISREG ||
+		S_ISBLK(file_inode(req->file)->i_mode);
+}
+
 static int io_read(struct io_kiocb *req, unsigned int issue_flags)
 {
 	struct iovec inline_vecs[UIO_FASTIOV], *iovec = inline_vecs;
@@ -3459,12 +3485,13 @@ static int io_read(struct io_kiocb *req, unsigned int issue_flags)
 		if (req->flags & REQ_F_NOWAIT)
 			goto done;
 		/* some cases will consume bytes even on error returns */
+		iov_iter_reexpand(iter, iter->count + iter->truncated);
 		iov_iter_revert(iter, io_size - iov_iter_count(iter));
 		ret = 0;
 	} else if (ret == -EIOCBQUEUED) {
 		goto out_free;
 	} else if (ret <= 0 || ret == io_size || !force_nonblock ||
-		   (req->flags & REQ_F_NOWAIT) || !(req->flags & REQ_F_ISREG)) {
+		   (req->flags & REQ_F_NOWAIT) || !need_read_all(req)) {
 		/* read all, failed, already did sync or don't want to retry */
 		goto done;
 	}
@@ -3598,6 +3625,7 @@ done:
 	} else {
 copy_iov:
 		/* some cases will consume bytes even on error returns */
+		iov_iter_reexpand(iter, iter->count + iter->truncated);
 		iov_iter_revert(iter, io_size - iov_iter_count(iter));
 		ret = io_setup_async_rw(req, iovec, inline_vecs, iter, false);
 		return ret ?: -EAGAIN;
@@ -5249,7 +5277,7 @@ static void io_poll_remove_double(struct io_kiocb *req)
 	}
 }
 
-static bool io_poll_complete(struct io_kiocb *req, __poll_t mask)
+static bool __io_poll_complete(struct io_kiocb *req, __poll_t mask)
 	__must_hold(&req->ctx->completion_lock)
 {
 	struct io_ring_ctx *ctx = req->ctx;
@@ -5271,10 +5299,19 @@ static bool io_poll_complete(struct io_kiocb *req, __poll_t mask)
 	if (flags & IORING_CQE_F_MORE)
 		ctx->cq_extra++;
 
-	io_commit_cqring(ctx);
 	return !(flags & IORING_CQE_F_MORE);
 }
 
+static inline bool io_poll_complete(struct io_kiocb *req, __poll_t mask)
+	__must_hold(&req->ctx->completion_lock)
+{
+	bool done;
+
+	done = __io_poll_complete(req, mask);
+	io_commit_cqring(req->ctx);
+	return done;
+}
+
 static void io_poll_task_func(struct io_kiocb *req, bool *locked)
 {
 	struct io_ring_ctx *ctx = req->ctx;
@@ -5285,7 +5322,7 @@ static void io_poll_task_func(struct io_kiocb *req, bool *locked)
 	} else {
 		bool done;
 
-		done = io_poll_complete(req, req->result);
+		done = __io_poll_complete(req, req->result);
 		if (done) {
 			io_poll_remove_double(req);
 			hash_del(&req->hash_node);
@@ -5293,6 +5330,7 @@ static void io_poll_task_func(struct io_kiocb *req, bool *locked)
 			req->result = 0;
 			add_wait_queue(req->poll.head, &req->poll.wait);
 		}
+		io_commit_cqring(ctx);
 		spin_unlock(&ctx->completion_lock);
 		io_cqring_ev_posted(ctx);
 
@@ -6398,6 +6436,11 @@ static bool io_drain_req(struct io_kiocb *req)
 	int ret;
 	u32 seq;
 
+	if (req->flags & REQ_F_FAIL) {
+		io_req_complete_fail_submit(req);
+		return true;
+	}
+
 	/*
 	 * If we need to drain a request in the middle of a link, drain the
 	 * head request and the next request/link after the current link.
@@ -6914,7 +6957,7 @@ static inline void io_queue_sqe(struct io_kiocb *req)
 	if (likely(!(req->flags & (REQ_F_FORCE_ASYNC | REQ_F_FAIL)))) {
 		__io_queue_sqe(req);
 	} else if (req->flags & REQ_F_FAIL) {
-		io_req_complete_failed(req, req->result);
+		io_req_complete_fail_submit(req);
 	} else {
 		int ret = io_req_prep_async(req);
 
@@ -10498,26 +10541,53 @@ static int io_unregister_iowq_aff(struct io_ring_ctx *ctx)
 static int io_register_iowq_max_workers(struct io_ring_ctx *ctx,
 					void __user *arg)
 {
-	struct io_uring_task *tctx = current->io_uring;
+	struct io_uring_task *tctx = NULL;
+	struct io_sq_data *sqd = NULL;
 	__u32 new_count[2];
 	int i, ret;
 
-	if (!tctx || !tctx->io_wq)
-		return -EINVAL;
 	if (copy_from_user(new_count, arg, sizeof(new_count)))
 		return -EFAULT;
 	for (i = 0; i < ARRAY_SIZE(new_count); i++)
 		if (new_count[i] > INT_MAX)
 			return -EINVAL;
 
+	if (ctx->flags & IORING_SETUP_SQPOLL) {
+		sqd = ctx->sq_data;
+		if (sqd) {
+			/*
+			 * Observe the correct sqd->lock -> ctx->uring_lock
+			 * ordering. Fine to drop uring_lock here, we hold
+			 * a ref to the ctx.
+			 */
+			mutex_unlock(&ctx->uring_lock);
+			mutex_lock(&sqd->lock);
+			mutex_lock(&ctx->uring_lock);
+			tctx = sqd->thread->io_uring;
+		}
+	} else {
+		tctx = current->io_uring;
+	}
+
+	ret = -EINVAL;
+	if (!tctx || !tctx->io_wq)
+		goto err;
+
 	ret = io_wq_max_workers(tctx->io_wq, new_count);
 	if (ret)
-		return ret;
+		goto err;
+
+	if (sqd)
+		mutex_unlock(&sqd->lock);
 
 	if (copy_to_user(arg, new_count, sizeof(new_count)))
 		return -EFAULT;
 
 	return 0;
+err:
+	if (sqd)
+		mutex_unlock(&sqd->lock);
+	return ret;
 }
 
 static bool io_register_op_must_quiesce(int op)
@@ -10795,7 +10865,7 @@ static int __init io_uring_init(void)
 	BUILD_BUG_ON(SQE_VALID_FLAGS >= (1 << 8));
 
 	BUILD_BUG_ON(ARRAY_SIZE(io_op_defs) != IORING_OP_LAST);
-	BUILD_BUG_ON(__REQ_F_LAST_BIT >= 8 * sizeof(int));
+	BUILD_BUG_ON(__REQ_F_LAST_BIT > 8 * sizeof(int));
 
 	req_cachep = KMEM_CACHE(io_kiocb, SLAB_HWCACHE_ALIGN | SLAB_PANIC |
 				SLAB_ACCOUNT);
diff --git a/fs/jbd2/recovery.c b/fs/jbd2/recovery.c
index d47a0d96bf30..8ca3527189f8 100644
--- a/fs/jbd2/recovery.c
+++ b/fs/jbd2/recovery.c
@@ -179,8 +179,8 @@ static int jbd2_descriptor_block_csum_verify(journal_t *j, void *buf)
 	if (!jbd2_journal_has_csum_v2or3(j))
 		return 1;
 
-	tail = (struct jbd2_journal_block_tail *)(buf + j->j_blocksize -
-			sizeof(struct jbd2_journal_block_tail));
+	tail = (struct jbd2_journal_block_tail *)((char *)buf +
+		j->j_blocksize - sizeof(struct jbd2_journal_block_tail));
 	provided = tail->t_checksum;
 	tail->t_checksum = 0;
 	calculated = jbd2_chksum(j, j->j_csum_seed, buf, j->j_blocksize);
@@ -196,7 +196,7 @@ static int jbd2_descriptor_block_csum_verify(journal_t *j, void *buf)
 static int count_tags(journal_t *journal, struct buffer_head *bh)
 {
 	char *			tagp;
-	journal_block_tag_t *	tag;
+	journal_block_tag_t	tag;
 	int			nr = 0, size = journal->j_blocksize;
 	int			tag_bytes = journal_tag_bytes(journal);
 
@@ -206,14 +206,14 @@ static int count_tags(journal_t *journal, struct buffer_head *bh)
 	tagp = &bh->b_data[sizeof(journal_header_t)];
 
 	while ((tagp - bh->b_data + tag_bytes) <= size) {
-		tag = (journal_block_tag_t *) tagp;
+		memcpy(&tag, tagp, sizeof(tag));
 
 		nr++;
 		tagp += tag_bytes;
-		if (!(tag->t_flags & cpu_to_be16(JBD2_FLAG_SAME_UUID)))
+		if (!(tag.t_flags & cpu_to_be16(JBD2_FLAG_SAME_UUID)))
 			tagp += 16;
 
-		if (tag->t_flags & cpu_to_be16(JBD2_FLAG_LAST_TAG))
+		if (tag.t_flags & cpu_to_be16(JBD2_FLAG_LAST_TAG))
 			break;
 	}
 
@@ -433,9 +433,9 @@ static int jbd2_commit_block_csum_verify(journal_t *j, void *buf)
 }
 
 static int jbd2_block_tag_csum_verify(journal_t *j, journal_block_tag_t *tag,
+				      journal_block_tag3_t *tag3,
 				      void *buf, __u32 sequence)
 {
-	journal_block_tag3_t *tag3 = (journal_block_tag3_t *)tag;
 	__u32 csum32;
 	__be32 seq;
 
@@ -496,7 +496,7 @@ static int do_one_pass(journal_t *journal,
 	while (1) {
 		int			flags;
 		char *			tagp;
-		journal_block_tag_t *	tag;
+		journal_block_tag_t	tag;
 		struct buffer_head *	obh;
 		struct buffer_head *	nbh;
 
@@ -613,8 +613,8 @@ static int do_one_pass(journal_t *journal,
 			       <= journal->j_blocksize - descr_csum_size) {
 				unsigned long io_block;
 
-				tag = (journal_block_tag_t *) tagp;
-				flags = be16_to_cpu(tag->t_flags);
+				memcpy(&tag, tagp, sizeof(tag));
+				flags = be16_to_cpu(tag.t_flags);
 
 				io_block = next_log_block++;
 				wrap(journal, next_log_block);
@@ -632,7 +632,7 @@ static int do_one_pass(journal_t *journal,
 
 					J_ASSERT(obh != NULL);
 					blocknr = read_tag_block(journal,
-								 tag);
+								 &tag);
 
 					/* If the block has been
 					 * revoked, then we're all done
@@ -647,8 +647,8 @@ static int do_one_pass(journal_t *journal,
 
 					/* Look for block corruption */
 					if (!jbd2_block_tag_csum_verify(
-						journal, tag, obh->b_data,
-						be32_to_cpu(tmp->h_sequence))) {
+			journal, &tag, (journal_block_tag3_t *)tagp,
+			obh->b_data, be32_to_cpu(tmp->h_sequence))) {
 						brelse(obh);
 						success = -EFSBADCRC;
 						printk(KERN_ERR "JBD2: Invalid "
@@ -760,7 +760,6 @@ static int do_one_pass(journal_t *journal,
 				 */
 				jbd_debug(1, "JBD2: Invalid checksum ignored in transaction %u, likely stale data\n",
 					  next_commit_ID);
-				err = 0;
 				brelse(bh);
 				goto done;
 			}
@@ -897,7 +896,7 @@ static int scan_revoke_records(journal_t *journal, struct buffer_head *bh,
 {
 	jbd2_journal_revoke_header_t *header;
 	int offset, max;
-	int csum_size = 0;
+	unsigned csum_size = 0;
 	__u32 rcount;
 	int record_len = 4;
 
diff --git a/fs/jbd2/transaction.c b/fs/jbd2/transaction.c
index 8804e126805f..6a3caedd2285 100644
--- a/fs/jbd2/transaction.c
+++ b/fs/jbd2/transaction.c
@@ -223,9 +223,15 @@ static void sub_reserved_credits(journal_t *journal, int blocks)
  * with j_state_lock held for reading. Returns 0 if handle joined the running
  * transaction. Returns 1 if we had to wait, j_state_lock is dropped, and
  * caller must retry.
+ *
+ * Note: because j_state_lock may be dropped depending on the return
+ * value, we need to fake out sparse so ti doesn't complain about a
+ * locking imbalance.  Callers of add_transaction_credits will need to
+ * make a similar accomodation.
  */
 static int add_transaction_credits(journal_t *journal, int blocks,
 				   int rsv_blocks)
+__must_hold(&journal->j_state_lock)
 {
 	transaction_t *t = journal->j_running_transaction;
 	int needed;
@@ -238,6 +244,7 @@ static int add_transaction_credits(journal_t *journal, int blocks,
 	if (t->t_state != T_RUNNING) {
 		WARN_ON_ONCE(t->t_state >= T_FLUSH);
 		wait_transaction_locked(journal);
+		__acquire(&journal->j_state_lock); /* fake out sparse */
 		return 1;
 	}
 
@@ -266,10 +273,12 @@ static int add_transaction_credits(journal_t *journal, int blocks,
 			wait_event(journal->j_wait_reserved,
 				   atomic_read(&journal->j_reserved_credits) + total <=
 				   journal->j_max_transaction_buffers);
+			__acquire(&journal->j_state_lock); /* fake out sparse */
 			return 1;
 		}
 
 		wait_transaction_locked(journal);
+		__acquire(&journal->j_state_lock); /* fake out sparse */
 		return 1;
 	}
 
@@ -293,6 +302,7 @@ static int add_transaction_credits(journal_t *journal, int blocks,
 					journal->j_max_transaction_buffers)
 			__jbd2_log_wait_for_space(journal);
 		write_unlock(&journal->j_state_lock);
+		__acquire(&journal->j_state_lock); /* fake out sparse */
 		return 1;
 	}
 
@@ -310,6 +320,7 @@ static int add_transaction_credits(journal_t *journal, int blocks,
 		wait_event(journal->j_wait_reserved,
 			 atomic_read(&journal->j_reserved_credits) + rsv_blocks
 			 <= journal->j_max_transaction_buffers / 2);
+		__acquire(&journal->j_state_lock); /* fake out sparse */
 		return 1;
 	}
 	return 0;
@@ -413,8 +424,14 @@ repeat:
 
 	if (!handle->h_reserved) {
 		/* We may have dropped j_state_lock - restart in that case */
-		if (add_transaction_credits(journal, blocks, rsv_blocks))
+		if (add_transaction_credits(journal, blocks, rsv_blocks)) {
+			/*
+			 * add_transaction_credits releases
+			 * j_state_lock on a non-zero return
+			 */
+			__release(&journal->j_state_lock);
 			goto repeat;
+		}
 	} else {
 		/*
 		 * We have handle reserved so we are allowed to join T_LOCKED
@@ -1404,7 +1421,7 @@ void jbd2_journal_set_triggers(struct buffer_head *bh,
 {
 	struct journal_head *jh = jbd2_journal_grab_journal_head(bh);
 
-	if (WARN_ON(!jh))
+	if (WARN_ON_ONCE(!jh))
 		return;
 	jh->b_triggers = type;
 	jbd2_journal_put_journal_head(jh);
diff --git a/fs/jffs2/acl.c b/fs/jffs2/acl.c
index 55a79df70d24..e945e3484788 100644
--- a/fs/jffs2/acl.c
+++ b/fs/jffs2/acl.c
@@ -173,12 +173,15 @@ static void *jffs2_acl_to_medium(const struct posix_acl *acl, size_t *size)
 	return ERR_PTR(-EINVAL);
 }
 
-struct posix_acl *jffs2_get_acl(struct inode *inode, int type)
+struct posix_acl *jffs2_get_acl(struct inode *inode, int type, bool rcu)
 {
 	struct posix_acl *acl;
 	char *value = NULL;
 	int rc, xprefix;
 
+	if (rcu)
+		return ERR_PTR(-ECHILD);
+
 	switch (type) {
 	case ACL_TYPE_ACCESS:
 		xprefix = JFFS2_XPREFIX_ACL_ACCESS;
diff --git a/fs/jffs2/acl.h b/fs/jffs2/acl.h
index 62c50da9d493..9d9fb7cf093e 100644
--- a/fs/jffs2/acl.h
+++ b/fs/jffs2/acl.h
@@ -27,7 +27,7 @@ struct jffs2_acl_header {
 
 #ifdef CONFIG_JFFS2_FS_POSIX_ACL
 
-struct posix_acl *jffs2_get_acl(struct inode *inode, int type);
+struct posix_acl *jffs2_get_acl(struct inode *inode, int type, bool rcu);
 int jffs2_set_acl(struct user_namespace *mnt_userns, struct inode *inode,
 		  struct posix_acl *acl, int type);
 extern int jffs2_init_acl_pre(struct inode *, struct inode *, umode_t *);
diff --git a/fs/jfs/acl.c b/fs/jfs/acl.c
index 43c285c3d2a7..a653f34c6e26 100644
--- a/fs/jfs/acl.c
+++ b/fs/jfs/acl.c
@@ -14,13 +14,16 @@
 #include "jfs_xattr.h"
 #include "jfs_acl.h"
 
-struct posix_acl *jfs_get_acl(struct inode *inode, int type)
+struct posix_acl *jfs_get_acl(struct inode *inode, int type, bool rcu)
 {
 	struct posix_acl *acl;
 	char *ea_name;
 	int size;
 	char *value = NULL;
 
+	if (rcu)
+		return ERR_PTR(-ECHILD);
+
 	switch(type) {
 		case ACL_TYPE_ACCESS:
 			ea_name = XATTR_NAME_POSIX_ACL_ACCESS;
diff --git a/fs/jfs/jfs_acl.h b/fs/jfs/jfs_acl.h
index 7ae389a7a366..3de40286d31f 100644
--- a/fs/jfs/jfs_acl.h
+++ b/fs/jfs/jfs_acl.h
@@ -7,7 +7,7 @@
 
 #ifdef CONFIG_JFS_POSIX_ACL
 
-struct posix_acl *jfs_get_acl(struct inode *inode, int type);
+struct posix_acl *jfs_get_acl(struct inode *inode, int type, bool rcu);
 int jfs_set_acl(struct user_namespace *mnt_userns, struct inode *inode,
 		struct posix_acl *acl, int type);
 int jfs_init_acl(tid_t, struct inode *, struct inode *);
diff --git a/fs/ksmbd/ndr.c b/fs/ksmbd/ndr.c
index 2243a2c64b37..8317f7ca402b 100644
--- a/fs/ksmbd/ndr.c
+++ b/fs/ksmbd/ndr.c
@@ -28,37 +28,60 @@ static int try_to_realloc_ndr_blob(struct ndr *n, size_t sz)
 	return 0;
 }
 
-static void ndr_write_int16(struct ndr *n, __u16 value)
+static int ndr_write_int16(struct ndr *n, __u16 value)
 {
-	if (n->length <= n->offset + sizeof(value))
-		try_to_realloc_ndr_blob(n, sizeof(value));
+	if (n->length <= n->offset + sizeof(value)) {
+		int ret;
+
+		ret = try_to_realloc_ndr_blob(n, sizeof(value));
+		if (ret)
+			return ret;
+	}
 
 	*(__le16 *)ndr_get_field(n) = cpu_to_le16(value);
 	n->offset += sizeof(value);
+	return 0;
 }
 
-static void ndr_write_int32(struct ndr *n, __u32 value)
+static int ndr_write_int32(struct ndr *n, __u32 value)
 {
-	if (n->length <= n->offset + sizeof(value))
-		try_to_realloc_ndr_blob(n, sizeof(value));
+	if (n->length <= n->offset + sizeof(value)) {
+		int ret;
+
+		ret = try_to_realloc_ndr_blob(n, sizeof(value));
+		if (ret)
+			return ret;
+	}
 
 	*(__le32 *)ndr_get_field(n) = cpu_to_le32(value);
 	n->offset += sizeof(value);
+	return 0;
 }
 
-static void ndr_write_int64(struct ndr *n, __u64 value)
+static int ndr_write_int64(struct ndr *n, __u64 value)
 {
-	if (n->length <= n->offset + sizeof(value))
-		try_to_realloc_ndr_blob(n, sizeof(value));
+	if (n->length <= n->offset + sizeof(value)) {
+		int ret;
+
+		ret = try_to_realloc_ndr_blob(n, sizeof(value));
+		if (ret)
+			return ret;
+	}
 
 	*(__le64 *)ndr_get_field(n) = cpu_to_le64(value);
 	n->offset += sizeof(value);
+	return 0;
 }
 
 static int ndr_write_bytes(struct ndr *n, void *value, size_t sz)
 {
-	if (n->length <= n->offset + sz)
-		try_to_realloc_ndr_blob(n, sz);
+	if (n->length <= n->offset + sz) {
+		int ret;
+
+		ret = try_to_realloc_ndr_blob(n, sz);
+		if (ret)
+			return ret;
+	}
 
 	memcpy(ndr_get_field(n), value, sz);
 	n->offset += sz;
@@ -70,8 +93,13 @@ static int ndr_write_string(struct ndr *n, char *value)
 	size_t sz;
 
 	sz = strlen(value) + 1;
-	if (n->length <= n->offset + sz)
-		try_to_realloc_ndr_blob(n, sz);
+	if (n->length <= n->offset + sz) {
+		int ret;
+
+		ret = try_to_realloc_ndr_blob(n, sz);
+		if (ret)
+			return ret;
+	}
 
 	memcpy(ndr_get_field(n), value, sz);
 	n->offset += sz;
@@ -81,9 +109,14 @@ static int ndr_write_string(struct ndr *n, char *value)
 
 static int ndr_read_string(struct ndr *n, void *value, size_t sz)
 {
-	int len = strnlen(ndr_get_field(n), sz);
+	int len;
 
-	memcpy(value, ndr_get_field(n), len);
+	if (n->offset + sz > n->length)
+		return -EINVAL;
+
+	len = strnlen(ndr_get_field(n), sz);
+	if (value)
+		memcpy(value, ndr_get_field(n), len);
 	len++;
 	n->offset += len;
 	n->offset = ALIGN(n->offset, 2);
@@ -92,41 +125,52 @@ static int ndr_read_string(struct ndr *n, void *value, size_t sz)
 
 static int ndr_read_bytes(struct ndr *n, void *value, size_t sz)
 {
-	memcpy(value, ndr_get_field(n), sz);
+	if (n->offset + sz > n->length)
+		return -EINVAL;
+
+	if (value)
+		memcpy(value, ndr_get_field(n), sz);
 	n->offset += sz;
 	return 0;
 }
 
-static __u16 ndr_read_int16(struct ndr *n)
+static int ndr_read_int16(struct ndr *n, __u16 *value)
 {
-	__u16 ret;
+	if (n->offset + sizeof(__u16) > n->length)
+		return -EINVAL;
 
-	ret = le16_to_cpu(*(__le16 *)ndr_get_field(n));
+	if (value)
+		*value = le16_to_cpu(*(__le16 *)ndr_get_field(n));
 	n->offset += sizeof(__u16);
-	return ret;
+	return 0;
 }
 
-static __u32 ndr_read_int32(struct ndr *n)
+static int ndr_read_int32(struct ndr *n, __u32 *value)
 {
-	__u32 ret;
+	if (n->offset + sizeof(__u32) > n->length)
+		return 0;
 
-	ret = le32_to_cpu(*(__le32 *)ndr_get_field(n));
+	if (value)
+		*value = le32_to_cpu(*(__le32 *)ndr_get_field(n));
 	n->offset += sizeof(__u32);
-	return ret;
+	return 0;
 }
 
-static __u64 ndr_read_int64(struct ndr *n)
+static int ndr_read_int64(struct ndr *n, __u64 *value)
 {
-	__u64 ret;
+	if (n->offset + sizeof(__u64) > n->length)
+		return -EINVAL;
 
-	ret = le64_to_cpu(*(__le64 *)ndr_get_field(n));
+	if (value)
+		*value = le64_to_cpu(*(__le64 *)ndr_get_field(n));
 	n->offset += sizeof(__u64);
-	return ret;
+	return 0;
 }
 
 int ndr_encode_dos_attr(struct ndr *n, struct xattr_dos_attrib *da)
 {
 	char hex_attr[12] = {0};
+	int ret;
 
 	n->offset = 0;
 	n->length = 1024;
@@ -136,97 +180,161 @@ int ndr_encode_dos_attr(struct ndr *n, struct xattr_dos_attrib *da)
 
 	if (da->version == 3) {
 		snprintf(hex_attr, 10, "0x%x", da->attr);
-		ndr_write_string(n, hex_attr);
+		ret = ndr_write_string(n, hex_attr);
 	} else {
-		ndr_write_string(n, "");
+		ret = ndr_write_string(n, "");
 	}
-	ndr_write_int16(n, da->version);
-	ndr_write_int32(n, da->version);
+	if (ret)
+		return ret;
+
+	ret = ndr_write_int16(n, da->version);
+	if (ret)
+		return ret;
+
+	ret = ndr_write_int32(n, da->version);
+	if (ret)
+		return ret;
+
+	ret = ndr_write_int32(n, da->flags);
+	if (ret)
+		return ret;
+
+	ret = ndr_write_int32(n, da->attr);
+	if (ret)
+		return ret;
 
-	ndr_write_int32(n, da->flags);
-	ndr_write_int32(n, da->attr);
 	if (da->version == 3) {
-		ndr_write_int32(n, da->ea_size);
-		ndr_write_int64(n, da->size);
-		ndr_write_int64(n, da->alloc_size);
+		ret = ndr_write_int32(n, da->ea_size);
+		if (ret)
+			return ret;
+		ret = ndr_write_int64(n, da->size);
+		if (ret)
+			return ret;
+		ret = ndr_write_int64(n, da->alloc_size);
 	} else {
-		ndr_write_int64(n, da->itime);
+		ret = ndr_write_int64(n, da->itime);
 	}
-	ndr_write_int64(n, da->create_time);
+	if (ret)
+		return ret;
+
+	ret = ndr_write_int64(n, da->create_time);
+	if (ret)
+		return ret;
+
 	if (da->version == 3)
-		ndr_write_int64(n, da->change_time);
-	return 0;
+		ret = ndr_write_int64(n, da->change_time);
+	return ret;
 }
 
 int ndr_decode_dos_attr(struct ndr *n, struct xattr_dos_attrib *da)
 {
-	char *hex_attr;
-	int version2;
-
-	hex_attr = kzalloc(n->length, GFP_KERNEL);
-	if (!hex_attr)
-		return -ENOMEM;
+	char hex_attr[12];
+	unsigned int version2;
+	int ret;
 
 	n->offset = 0;
-	ndr_read_string(n, hex_attr, n->length);
-	kfree(hex_attr);
-	da->version = ndr_read_int16(n);
+	ret = ndr_read_string(n, hex_attr, sizeof(hex_attr));
+	if (ret)
+		return ret;
+
+	ret = ndr_read_int16(n, &da->version);
+	if (ret)
+		return ret;
 
 	if (da->version != 3 && da->version != 4) {
 		pr_err("v%d version is not supported\n", da->version);
 		return -EINVAL;
 	}
 
-	version2 = ndr_read_int32(n);
+	ret = ndr_read_int32(n, &version2);
+	if (ret)
+		return ret;
+
 	if (da->version != version2) {
 		pr_err("ndr version mismatched(version: %d, version2: %d)\n",
 		       da->version, version2);
 		return -EINVAL;
 	}
 
-	ndr_read_int32(n);
-	da->attr = ndr_read_int32(n);
+	ret = ndr_read_int32(n, NULL);
+	if (ret)
+		return ret;
+
+	ret = ndr_read_int32(n, &da->attr);
+	if (ret)
+		return ret;
+
 	if (da->version == 4) {
-		da->itime = ndr_read_int64(n);
-		da->create_time = ndr_read_int64(n);
+		ret = ndr_read_int64(n, &da->itime);
+		if (ret)
+			return ret;
+
+		ret = ndr_read_int64(n, &da->create_time);
 	} else {
-		ndr_read_int32(n);
-		ndr_read_int64(n);
-		ndr_read_int64(n);
-		da->create_time = ndr_read_int64(n);
-		ndr_read_int64(n);
+		ret = ndr_read_int32(n, NULL);
+		if (ret)
+			return ret;
+
+		ret = ndr_read_int64(n, NULL);
+		if (ret)
+			return ret;
+
+		ret = ndr_read_int64(n, NULL);
+		if (ret)
+			return ret;
+
+		ret = ndr_read_int64(n, &da->create_time);
+		if (ret)
+			return ret;
+
+		ret = ndr_read_int64(n, NULL);
 	}
 
-	return 0;
+	return ret;
 }
 
 static int ndr_encode_posix_acl_entry(struct ndr *n, struct xattr_smb_acl *acl)
 {
-	int i;
+	int i, ret;
+
+	ret = ndr_write_int32(n, acl->count);
+	if (ret)
+		return ret;
 
-	ndr_write_int32(n, acl->count);
 	n->offset = ALIGN(n->offset, 8);
-	ndr_write_int32(n, acl->count);
-	ndr_write_int32(n, 0);
+	ret = ndr_write_int32(n, acl->count);
+	if (ret)
+		return ret;
+
+	ret = ndr_write_int32(n, 0);
+	if (ret)
+		return ret;
 
 	for (i = 0; i < acl->count; i++) {
 		n->offset = ALIGN(n->offset, 8);
-		ndr_write_int16(n, acl->entries[i].type);
-		ndr_write_int16(n, acl->entries[i].type);
+		ret = ndr_write_int16(n, acl->entries[i].type);
+		if (ret)
+			return ret;
+
+		ret = ndr_write_int16(n, acl->entries[i].type);
+		if (ret)
+			return ret;
 
 		if (acl->entries[i].type == SMB_ACL_USER) {
 			n->offset = ALIGN(n->offset, 8);
-			ndr_write_int64(n, acl->entries[i].uid);
+			ret = ndr_write_int64(n, acl->entries[i].uid);
 		} else if (acl->entries[i].type == SMB_ACL_GROUP) {
 			n->offset = ALIGN(n->offset, 8);
-			ndr_write_int64(n, acl->entries[i].gid);
+			ret = ndr_write_int64(n, acl->entries[i].gid);
 		}
+		if (ret)
+			return ret;
 
 		/* push permission */
-		ndr_write_int32(n, acl->entries[i].perm);
+		ret = ndr_write_int32(n, acl->entries[i].perm);
 	}
 
-	return 0;
+	return ret;
 }
 
 int ndr_encode_posix_acl(struct ndr *n,
@@ -235,7 +343,8 @@ int ndr_encode_posix_acl(struct ndr *n,
 			 struct xattr_smb_acl *acl,
 			 struct xattr_smb_acl *def_acl)
 {
-	int ref_id = 0x00020000;
+	unsigned int ref_id = 0x00020000;
+	int ret;
 
 	n->offset = 0;
 	n->length = 1024;
@@ -245,35 +354,46 @@ int ndr_encode_posix_acl(struct ndr *n,
 
 	if (acl) {
 		/* ACL ACCESS */
-		ndr_write_int32(n, ref_id);
+		ret = ndr_write_int32(n, ref_id);
 		ref_id += 4;
 	} else {
-		ndr_write_int32(n, 0);
+		ret = ndr_write_int32(n, 0);
 	}
+	if (ret)
+		return ret;
 
 	if (def_acl) {
 		/* DEFAULT ACL ACCESS */
-		ndr_write_int32(n, ref_id);
+		ret = ndr_write_int32(n, ref_id);
 		ref_id += 4;
 	} else {
-		ndr_write_int32(n, 0);
+		ret = ndr_write_int32(n, 0);
 	}
-
-	ndr_write_int64(n, from_kuid(user_ns, inode->i_uid));
-	ndr_write_int64(n, from_kgid(user_ns, inode->i_gid));
-	ndr_write_int32(n, inode->i_mode);
+	if (ret)
+		return ret;
+
+	ret = ndr_write_int64(n, from_kuid(&init_user_ns, i_uid_into_mnt(user_ns, inode)));
+	if (ret)
+		return ret;
+	ret = ndr_write_int64(n, from_kgid(&init_user_ns, i_gid_into_mnt(user_ns, inode)));
+	if (ret)
+		return ret;
+	ret = ndr_write_int32(n, inode->i_mode);
+	if (ret)
+		return ret;
 
 	if (acl) {
-		ndr_encode_posix_acl_entry(n, acl);
-		if (def_acl)
-			ndr_encode_posix_acl_entry(n, def_acl);
+		ret = ndr_encode_posix_acl_entry(n, acl);
+		if (def_acl && !ret)
+			ret = ndr_encode_posix_acl_entry(n, def_acl);
 	}
-	return 0;
+	return ret;
 }
 
 int ndr_encode_v4_ntacl(struct ndr *n, struct xattr_ntacl *acl)
 {
-	int ref_id = 0x00020004;
+	unsigned int ref_id = 0x00020004;
+	int ret;
 
 	n->offset = 0;
 	n->length = 2048;
@@ -281,36 +401,65 @@ int ndr_encode_v4_ntacl(struct ndr *n, struct xattr_ntacl *acl)
 	if (!n->data)
 		return -ENOMEM;
 
-	ndr_write_int16(n, acl->version);
-	ndr_write_int32(n, acl->version);
-	ndr_write_int16(n, 2);
-	ndr_write_int32(n, ref_id);
+	ret = ndr_write_int16(n, acl->version);
+	if (ret)
+		return ret;
+
+	ret = ndr_write_int32(n, acl->version);
+	if (ret)
+		return ret;
+
+	ret = ndr_write_int16(n, 2);
+	if (ret)
+		return ret;
+
+	ret = ndr_write_int32(n, ref_id);
+	if (ret)
+		return ret;
 
 	/* push hash type and hash 64bytes */
-	ndr_write_int16(n, acl->hash_type);
-	ndr_write_bytes(n, acl->hash, XATTR_SD_HASH_SIZE);
-	ndr_write_bytes(n, acl->desc, acl->desc_len);
-	ndr_write_int64(n, acl->current_time);
-	ndr_write_bytes(n, acl->posix_acl_hash, XATTR_SD_HASH_SIZE);
+	ret = ndr_write_int16(n, acl->hash_type);
+	if (ret)
+		return ret;
 
-	/* push ndr for security descriptor */
-	ndr_write_bytes(n, acl->sd_buf, acl->sd_size);
+	ret = ndr_write_bytes(n, acl->hash, XATTR_SD_HASH_SIZE);
+	if (ret)
+		return ret;
 
-	return 0;
+	ret = ndr_write_bytes(n, acl->desc, acl->desc_len);
+	if (ret)
+		return ret;
+
+	ret = ndr_write_int64(n, acl->current_time);
+	if (ret)
+		return ret;
+
+	ret = ndr_write_bytes(n, acl->posix_acl_hash, XATTR_SD_HASH_SIZE);
+	if (ret)
+		return ret;
+
+	/* push ndr for security descriptor */
+	ret = ndr_write_bytes(n, acl->sd_buf, acl->sd_size);
+	return ret;
 }
 
 int ndr_decode_v4_ntacl(struct ndr *n, struct xattr_ntacl *acl)
 {
-	int version2;
+	unsigned int version2;
+	int ret;
 
 	n->offset = 0;
-	acl->version = ndr_read_int16(n);
+	ret = ndr_read_int16(n, &acl->version);
+	if (ret)
+		return ret;
 	if (acl->version != 4) {
 		pr_err("v%d version is not supported\n", acl->version);
 		return -EINVAL;
 	}
 
-	version2 = ndr_read_int32(n);
+	ret = ndr_read_int32(n, &version2);
+	if (ret)
+		return ret;
 	if (acl->version != version2) {
 		pr_err("ndr version mismatched(version: %d, version2: %d)\n",
 		       acl->version, version2);
@@ -318,11 +467,22 @@ int ndr_decode_v4_ntacl(struct ndr *n, struct xattr_ntacl *acl)
 	}
 
 	/* Read Level */
-	ndr_read_int16(n);
+	ret = ndr_read_int16(n, NULL);
+	if (ret)
+		return ret;
+
 	/* Read Ref Id */
-	ndr_read_int32(n);
-	acl->hash_type = ndr_read_int16(n);
-	ndr_read_bytes(n, acl->hash, XATTR_SD_HASH_SIZE);
+	ret = ndr_read_int32(n, NULL);
+	if (ret)
+		return ret;
+
+	ret = ndr_read_int16(n, &acl->hash_type);
+	if (ret)
+		return ret;
+
+	ret = ndr_read_bytes(n, acl->hash, XATTR_SD_HASH_SIZE);
+	if (ret)
+		return ret;
 
 	ndr_read_bytes(n, acl->desc, 10);
 	if (strncmp(acl->desc, "posix_acl", 9)) {
@@ -331,15 +491,20 @@ int ndr_decode_v4_ntacl(struct ndr *n, struct xattr_ntacl *acl)
 	}
 
 	/* Read Time */
-	ndr_read_int64(n);
+	ret = ndr_read_int64(n, NULL);
+	if (ret)
+		return ret;
+
 	/* Read Posix ACL hash */
-	ndr_read_bytes(n, acl->posix_acl_hash, XATTR_SD_HASH_SIZE);
+	ret = ndr_read_bytes(n, acl->posix_acl_hash, XATTR_SD_HASH_SIZE);
+	if (ret)
+		return ret;
+
 	acl->sd_size = n->length - n->offset;
 	acl->sd_buf = kzalloc(acl->sd_size, GFP_KERNEL);
 	if (!acl->sd_buf)
 		return -ENOMEM;
 
-	ndr_read_bytes(n, acl->sd_buf, acl->sd_size);
-
-	return 0;
+	ret = ndr_read_bytes(n, acl->sd_buf, acl->sd_size);
+	return ret;
 }
diff --git a/fs/ksmbd/oplock.c b/fs/ksmbd/oplock.c
index 6ace6c2f22dc..16b6236d1bd2 100644
--- a/fs/ksmbd/oplock.c
+++ b/fs/ksmbd/oplock.c
@@ -1614,9 +1614,11 @@ void create_posix_rsp_buf(char *cc, struct ksmbd_file *fp)
 	buf->nlink = cpu_to_le32(inode->i_nlink);
 	buf->reparse_tag = cpu_to_le32(fp->volatile_id);
 	buf->mode = cpu_to_le32(inode->i_mode);
-	id_to_sid(from_kuid(user_ns, inode->i_uid),
+	id_to_sid(from_kuid_munged(&init_user_ns,
+				   i_uid_into_mnt(user_ns, inode)),
 		  SIDNFS_USER, (struct smb_sid *)&buf->SidBuffer[0]);
-	id_to_sid(from_kgid(user_ns, inode->i_gid),
+	id_to_sid(from_kgid_munged(&init_user_ns,
+				   i_gid_into_mnt(user_ns, inode)),
 		  SIDNFS_GROUP, (struct smb_sid *)&buf->SidBuffer[20]);
 }
 
diff --git a/fs/ksmbd/smb2pdu.c b/fs/ksmbd/smb2pdu.c
index d329ea49fa14..c86164dc70bb 100644
--- a/fs/ksmbd/smb2pdu.c
+++ b/fs/ksmbd/smb2pdu.c
@@ -2381,10 +2381,12 @@ static int smb2_create_sd_buffer(struct ksmbd_work *work,
 			    le32_to_cpu(sd_buf->ccontext.DataLength), true);
 }
 
-static void ksmbd_acls_fattr(struct smb_fattr *fattr, struct inode *inode)
+static void ksmbd_acls_fattr(struct smb_fattr *fattr,
+			     struct user_namespace *mnt_userns,
+			     struct inode *inode)
 {
-	fattr->cf_uid = inode->i_uid;
-	fattr->cf_gid = inode->i_gid;
+	fattr->cf_uid = i_uid_into_mnt(mnt_userns, inode);
+	fattr->cf_gid = i_gid_into_mnt(mnt_userns, inode);
 	fattr->cf_mode = inode->i_mode;
 	fattr->cf_acls = NULL;
 	fattr->cf_dacls = NULL;
@@ -2893,7 +2895,7 @@ int smb2_open(struct ksmbd_work *work)
 					struct smb_ntsd *pntsd;
 					int pntsd_size, ace_num = 0;
 
-					ksmbd_acls_fattr(&fattr, inode);
+					ksmbd_acls_fattr(&fattr, user_ns, inode);
 					if (fattr.cf_acls)
 						ace_num = fattr.cf_acls->a_count;
 					if (fattr.cf_dacls)
@@ -3324,7 +3326,6 @@ static int dentry_name(struct ksmbd_dir_info *d_info, int info_level)
  */
 static int smb2_populate_readdir_entry(struct ksmbd_conn *conn, int info_level,
 				       struct ksmbd_dir_info *d_info,
-				       struct user_namespace *user_ns,
 				       struct ksmbd_kstat *ksmbd_kstat)
 {
 	int next_entry_offset = 0;
@@ -3478,9 +3479,9 @@ static int smb2_populate_readdir_entry(struct ksmbd_conn *conn, int info_level,
 			S_ISDIR(ksmbd_kstat->kstat->mode) ? ATTR_DIRECTORY_LE : ATTR_ARCHIVE_LE;
 		if (d_info->hide_dot_file && d_info->name[0] == '.')
 			posix_info->DosAttributes |= ATTR_HIDDEN_LE;
-		id_to_sid(from_kuid(user_ns, ksmbd_kstat->kstat->uid),
+		id_to_sid(from_kuid_munged(&init_user_ns, ksmbd_kstat->kstat->uid),
 			  SIDNFS_USER, (struct smb_sid *)&posix_info->SidBuffer[0]);
-		id_to_sid(from_kgid(user_ns, ksmbd_kstat->kstat->gid),
+		id_to_sid(from_kgid_munged(&init_user_ns, ksmbd_kstat->kstat->gid),
 			  SIDNFS_GROUP, (struct smb_sid *)&posix_info->SidBuffer[20]);
 		memcpy(posix_info->name, conv_name, conv_len);
 		posix_info->name_len = cpu_to_le32(conv_len);
@@ -3543,9 +3544,9 @@ static int process_query_dir_entries(struct smb2_query_dir_private *priv)
 			return -EINVAL;
 
 		lock_dir(priv->dir_fp);
-		dent = lookup_one_len(priv->d_info->name,
-				      priv->dir_fp->filp->f_path.dentry,
-				      priv->d_info->name_len);
+		dent = lookup_one(user_ns, priv->d_info->name,
+				  priv->dir_fp->filp->f_path.dentry,
+				  priv->d_info->name_len);
 		unlock_dir(priv->dir_fp);
 
 		if (IS_ERR(dent)) {
@@ -3571,7 +3572,6 @@ static int process_query_dir_entries(struct smb2_query_dir_private *priv)
 		rc = smb2_populate_readdir_entry(priv->work->conn,
 						 priv->info_level,
 						 priv->d_info,
-						 user_ns,
 						 &ksmbd_kstat);
 		dput(dent);
 		if (rc)
@@ -5008,7 +5008,7 @@ static int smb2_get_info_sec(struct ksmbd_work *work,
 
 	user_ns = file_mnt_user_ns(fp->filp);
 	inode = file_inode(fp->filp);
-	ksmbd_acls_fattr(&fattr, inode);
+	ksmbd_acls_fattr(&fattr, user_ns, inode);
 
 	if (test_share_config_flag(work->tcon->share_conf,
 				   KSMBD_SHARE_FLAG_ACL_XATTR))
@@ -5246,7 +5246,9 @@ int smb2_echo(struct ksmbd_work *work)
 	return 0;
 }
 
-static int smb2_rename(struct ksmbd_work *work, struct ksmbd_file *fp,
+static int smb2_rename(struct ksmbd_work *work,
+		       struct ksmbd_file *fp,
+		       struct user_namespace *user_ns,
 		       struct smb2_file_rename_info *file_info,
 		       struct nls_table *local_nls)
 {
@@ -5310,7 +5312,7 @@ static int smb2_rename(struct ksmbd_work *work, struct ksmbd_file *fp,
 		if (rc)
 			goto out;
 
-		rc = ksmbd_vfs_setxattr(file_mnt_user_ns(fp->filp),
+		rc = ksmbd_vfs_setxattr(user_ns,
 					fp->filp->f_path.dentry,
 					xattr_stream_name,
 					NULL, 0, 0);
@@ -5438,11 +5440,11 @@ static int set_file_basic_info(struct ksmbd_file *fp, char *buf,
 {
 	struct smb2_file_all_info *file_info;
 	struct iattr attrs;
-	struct iattr temp_attrs;
+	struct timespec64 ctime;
 	struct file *filp;
 	struct inode *inode;
 	struct user_namespace *user_ns;
-	int rc;
+	int rc = 0;
 
 	if (!(fp->daccess & FILE_WRITE_ATTRIBUTES_LE))
 		return -EACCES;
@@ -5462,11 +5464,11 @@ static int set_file_basic_info(struct ksmbd_file *fp, char *buf,
 	}
 
 	if (file_info->ChangeTime) {
-		temp_attrs.ia_ctime = ksmbd_NTtimeToUnix(file_info->ChangeTime);
-		attrs.ia_ctime = temp_attrs.ia_ctime;
+		attrs.ia_ctime = ksmbd_NTtimeToUnix(file_info->ChangeTime);
+		ctime = attrs.ia_ctime;
 		attrs.ia_valid |= ATTR_CTIME;
 	} else {
-		temp_attrs.ia_ctime = inode->i_ctime;
+		ctime = inode->i_ctime;
 	}
 
 	if (file_info->LastWriteTime) {
@@ -5505,13 +5507,6 @@ static int set_file_basic_info(struct ksmbd_file *fp, char *buf,
 		rc = 0;
 	}
 
-	/*
-	 * HACK : set ctime here to avoid ctime changed
-	 * when file_info->ChangeTime is zero.
-	 */
-	attrs.ia_ctime = temp_attrs.ia_ctime;
-	attrs.ia_valid |= ATTR_CTIME;
-
 	if (attrs.ia_valid) {
 		struct dentry *dentry = filp->f_path.dentry;
 		struct inode *inode = d_inode(dentry);
@@ -5519,17 +5514,15 @@ static int set_file_basic_info(struct ksmbd_file *fp, char *buf,
 		if (IS_IMMUTABLE(inode) || IS_APPEND(inode))
 			return -EACCES;
 
-		rc = setattr_prepare(user_ns, dentry, &attrs);
-		if (rc)
-			return -EINVAL;
-
 		inode_lock(inode);
-		setattr_copy(user_ns, inode, &attrs);
-		attrs.ia_valid &= ~ATTR_CTIME;
 		rc = notify_change(user_ns, dentry, &attrs, NULL);
+		if (!rc) {
+			inode->i_ctime = ctime;
+			mark_inode_dirty(inode);
+		}
 		inode_unlock(inode);
 	}
-	return 0;
+	return rc;
 }
 
 static int set_file_allocation_info(struct ksmbd_work *work,
@@ -5624,6 +5617,7 @@ static int set_end_of_file_info(struct ksmbd_work *work, struct ksmbd_file *fp,
 static int set_rename_info(struct ksmbd_work *work, struct ksmbd_file *fp,
 			   char *buf)
 {
+	struct user_namespace *user_ns;
 	struct ksmbd_file *parent_fp;
 	struct dentry *parent;
 	struct dentry *dentry = fp->filp->f_path.dentry;
@@ -5634,11 +5628,12 @@ static int set_rename_info(struct ksmbd_work *work, struct ksmbd_file *fp,
 		return -EACCES;
 	}
 
+	user_ns = file_mnt_user_ns(fp->filp);
 	if (ksmbd_stream_fd(fp))
 		goto next;
 
 	parent = dget_parent(dentry);
-	ret = ksmbd_vfs_lock_parent(parent, dentry);
+	ret = ksmbd_vfs_lock_parent(user_ns, parent, dentry);
 	if (ret) {
 		dput(parent);
 		return ret;
@@ -5655,7 +5650,7 @@ static int set_rename_info(struct ksmbd_work *work, struct ksmbd_file *fp,
 		}
 	}
 next:
-	return smb2_rename(work, fp,
+	return smb2_rename(work, fp, user_ns,
 			   (struct smb2_file_rename_info *)buf,
 			   work->sess->conn->local_nls);
 }
@@ -7116,8 +7111,8 @@ static int fsctl_query_iface_info_ioctl(struct ksmbd_conn *conn,
 			netdev->ethtool_ops->get_link_ksettings(netdev, &cmd);
 			speed = cmd.base.speed;
 		} else {
-			pr_err("%s %s\n", netdev->name,
-			       "speed is unknown, defaulting to 1Gb/sec");
+			ksmbd_debug(SMB, "%s %s\n", netdev->name,
+				    "speed is unknown, defaulting to 1Gb/sec");
 			speed = SPEED_1000;
 		}
 
diff --git a/fs/ksmbd/smb_common.c b/fs/ksmbd/smb_common.c
index b108b918ec84..43d3123d8b62 100644
--- a/fs/ksmbd/smb_common.c
+++ b/fs/ksmbd/smb_common.c
@@ -291,7 +291,6 @@ int ksmbd_populate_dot_dotdot_entries(struct ksmbd_work *work, int info_level,
 				      char *search_pattern,
 				      int (*fn)(struct ksmbd_conn *, int,
 						struct ksmbd_dir_info *,
-						struct user_namespace *,
 						struct ksmbd_kstat *))
 {
 	int i, rc = 0;
@@ -322,8 +321,7 @@ int ksmbd_populate_dot_dotdot_entries(struct ksmbd_work *work, int info_level,
 						    user_ns,
 						    dir->filp->f_path.dentry->d_parent,
 						    &ksmbd_kstat);
-			rc = fn(conn, info_level, d_info,
-				user_ns, &ksmbd_kstat);
+			rc = fn(conn, info_level, d_info, &ksmbd_kstat);
 			if (rc)
 				break;
 			if (d_info->out_buf_len <= 0)
diff --git a/fs/ksmbd/smb_common.h b/fs/ksmbd/smb_common.h
index eb667d85558e..57c667c1be06 100644
--- a/fs/ksmbd/smb_common.h
+++ b/fs/ksmbd/smb_common.h
@@ -511,7 +511,6 @@ int ksmbd_populate_dot_dotdot_entries(struct ksmbd_work *work,
 				      int (*fn)(struct ksmbd_conn *,
 						int,
 						struct ksmbd_dir_info *,
-						struct user_namespace *,
 						struct ksmbd_kstat *));
 
 int ksmbd_extract_shortname(struct ksmbd_conn *conn,
diff --git a/fs/ksmbd/smbacl.c b/fs/ksmbd/smbacl.c
index 5456e3ad943e..0a95cdec8c80 100644
--- a/fs/ksmbd/smbacl.c
+++ b/fs/ksmbd/smbacl.c
@@ -274,24 +274,34 @@ static int sid_to_id(struct user_namespace *user_ns,
 		uid_t id;
 
 		id = le32_to_cpu(psid->sub_auth[psid->num_subauth - 1]);
-		if (id > 0) {
-			uid = make_kuid(user_ns, id);
-			if (uid_valid(uid) && kuid_has_mapping(user_ns, uid)) {
-				fattr->cf_uid = uid;
-				rc = 0;
-			}
+		/*
+		 * Translate raw sid into kuid in the server's user
+		 * namespace.
+		 */
+		uid = make_kuid(&init_user_ns, id);
+
+		/* If this is an idmapped mount, apply the idmapping. */
+		uid = kuid_from_mnt(user_ns, uid);
+		if (uid_valid(uid)) {
+			fattr->cf_uid = uid;
+			rc = 0;
 		}
 	} else {
 		kgid_t gid;
 		gid_t id;
 
 		id = le32_to_cpu(psid->sub_auth[psid->num_subauth - 1]);
-		if (id > 0) {
-			gid = make_kgid(user_ns, id);
-			if (gid_valid(gid) && kgid_has_mapping(user_ns, gid)) {
-				fattr->cf_gid = gid;
-				rc = 0;
-			}
+		/*
+		 * Translate raw sid into kgid in the server's user
+		 * namespace.
+		 */
+		gid = make_kgid(&init_user_ns, id);
+
+		/* If this is an idmapped mount, apply the idmapping. */
+		gid = kgid_from_mnt(user_ns, gid);
+		if (gid_valid(gid)) {
+			fattr->cf_gid = gid;
+			rc = 0;
 		}
 	}
 
@@ -587,14 +597,14 @@ static void set_posix_acl_entries_dacl(struct user_namespace *user_ns,
 			uid_t uid;
 			unsigned int sid_type = SIDOWNER;
 
-			uid = from_kuid(user_ns, pace->e_uid);
+			uid = posix_acl_uid_translate(user_ns, pace);
 			if (!uid)
 				sid_type = SIDUNIX_USER;
 			id_to_sid(uid, sid_type, sid);
 		} else if (pace->e_tag == ACL_GROUP) {
 			gid_t gid;
 
-			gid = from_kgid(user_ns, pace->e_gid);
+			gid = posix_acl_gid_translate(user_ns, pace);
 			id_to_sid(gid, SIDUNIX_GROUP, sid);
 		} else if (pace->e_tag == ACL_OTHER && !nt_aces_num) {
 			smb_copy_sid(sid, &sid_everyone);
@@ -653,12 +663,12 @@ posix_default_acl:
 		if (pace->e_tag == ACL_USER) {
 			uid_t uid;
 
-			uid = from_kuid(user_ns, pace->e_uid);
+			uid = posix_acl_uid_translate(user_ns, pace);
 			id_to_sid(uid, SIDCREATOR_OWNER, sid);
 		} else if (pace->e_tag == ACL_GROUP) {
 			gid_t gid;
 
-			gid = from_kgid(user_ns, pace->e_gid);
+			gid = posix_acl_gid_translate(user_ns, pace);
 			id_to_sid(gid, SIDCREATOR_GROUP, sid);
 		} else {
 			kfree(sid);
@@ -723,7 +733,7 @@ static void set_mode_dacl(struct user_namespace *user_ns,
 	}
 
 	/* owner RID */
-	uid = from_kuid(user_ns, fattr->cf_uid);
+	uid = from_kuid(&init_user_ns, fattr->cf_uid);
 	if (uid)
 		sid = &server_conf.domain_sid;
 	else
@@ -739,7 +749,7 @@ static void set_mode_dacl(struct user_namespace *user_ns,
 	ace_size = fill_ace_for_sid(pace, &sid_unix_groups,
 				    ACCESS_ALLOWED, 0, fattr->cf_mode, 0070);
 	pace->sid.sub_auth[pace->sid.num_subauth++] =
-		cpu_to_le32(from_kgid(user_ns, fattr->cf_gid));
+		cpu_to_le32(from_kgid(&init_user_ns, fattr->cf_gid));
 	pace->size = cpu_to_le16(ace_size + 4);
 	size += le16_to_cpu(pace->size);
 	pace = (struct smb_ace *)((char *)pndace + size);
@@ -880,7 +890,7 @@ int build_sec_desc(struct user_namespace *user_ns,
 	if (!nowner_sid_ptr)
 		return -ENOMEM;
 
-	uid = from_kuid(user_ns, fattr->cf_uid);
+	uid = from_kuid(&init_user_ns, fattr->cf_uid);
 	if (!uid)
 		sid_type = SIDUNIX_USER;
 	id_to_sid(uid, sid_type, nowner_sid_ptr);
@@ -891,7 +901,7 @@ int build_sec_desc(struct user_namespace *user_ns,
 		return -ENOMEM;
 	}
 
-	gid = from_kgid(user_ns, fattr->cf_gid);
+	gid = from_kgid(&init_user_ns, fattr->cf_gid);
 	id_to_sid(gid, SIDUNIX_GROUP, ngroup_sid_ptr);
 
 	offset = sizeof(struct smb_ntsd);
@@ -1234,11 +1244,9 @@ int smb_check_perm_dacl(struct ksmbd_conn *conn, struct path *path,
 			pa_entry = posix_acls->a_entries;
 			for (i = 0; i < posix_acls->a_count; i++, pa_entry++) {
 				if (pa_entry->e_tag == ACL_USER)
-					id = from_kuid(user_ns,
-						       pa_entry->e_uid);
+					id = posix_acl_uid_translate(user_ns, pa_entry);
 				else if (pa_entry->e_tag == ACL_GROUP)
-					id = from_kgid(user_ns,
-						       pa_entry->e_gid);
+					id = posix_acl_gid_translate(user_ns, pa_entry);
 				else
 					continue;
 
@@ -1322,22 +1330,31 @@ int set_info_sec(struct ksmbd_conn *conn, struct ksmbd_tree_connect *tcon,
 	newattrs.ia_valid |= ATTR_MODE;
 	newattrs.ia_mode = (inode->i_mode & ~0777) | (fattr.cf_mode & 0777);
 
-	inode_lock(inode);
-	rc = notify_change(user_ns, path->dentry, &newattrs, NULL);
-	inode_unlock(inode);
-	if (rc)
-		goto out;
-
 	ksmbd_vfs_remove_acl_xattrs(user_ns, path->dentry);
 	/* Update posix acls */
 	if (IS_ENABLED(CONFIG_FS_POSIX_ACL) && fattr.cf_dacls) {
 		rc = set_posix_acl(user_ns, inode,
 				   ACL_TYPE_ACCESS, fattr.cf_acls);
-		if (S_ISDIR(inode->i_mode) && fattr.cf_dacls)
+		if (rc < 0)
+			ksmbd_debug(SMB,
+				    "Set posix acl(ACL_TYPE_ACCESS) failed, rc : %d\n",
+				    rc);
+		if (S_ISDIR(inode->i_mode) && fattr.cf_dacls) {
 			rc = set_posix_acl(user_ns, inode,
 					   ACL_TYPE_DEFAULT, fattr.cf_dacls);
+			if (rc)
+				ksmbd_debug(SMB,
+					    "Set posix acl(ACL_TYPE_DEFAULT) failed, rc : %d\n",
+					    rc);
+		}
 	}
 
+	inode_lock(inode);
+	rc = notify_change(user_ns, path->dentry, &newattrs, NULL);
+	inode_unlock(inode);
+	if (rc)
+		goto out;
+
 	/* Check it only calling from SD BUFFER context */
 	if (type_check && !(le16_to_cpu(pntsd->type) & DACL_PRESENT))
 		goto out;
diff --git a/fs/ksmbd/smbacl.h b/fs/ksmbd/smbacl.h
index 940f686a1d95..73e08cad412b 100644
--- a/fs/ksmbd/smbacl.h
+++ b/fs/ksmbd/smbacl.h
@@ -209,4 +209,29 @@ int set_info_sec(struct ksmbd_conn *conn, struct ksmbd_tree_connect *tcon,
 		 bool type_check);
 void id_to_sid(unsigned int cid, uint sidtype, struct smb_sid *ssid);
 void ksmbd_init_domain(u32 *sub_auth);
+
+static inline uid_t posix_acl_uid_translate(struct user_namespace *mnt_userns,
+					    struct posix_acl_entry *pace)
+{
+	kuid_t kuid;
+
+	/* If this is an idmapped mount, apply the idmapping. */
+	kuid = kuid_into_mnt(mnt_userns, pace->e_uid);
+
+	/* Translate the kuid into a userspace id ksmbd would see. */
+	return from_kuid(&init_user_ns, kuid);
+}
+
+static inline gid_t posix_acl_gid_translate(struct user_namespace *mnt_userns,
+					    struct posix_acl_entry *pace)
+{
+	kgid_t kgid;
+
+	/* If this is an idmapped mount, apply the idmapping. */
+	kgid = kgid_into_mnt(mnt_userns, pace->e_gid);
+
+	/* Translate the kgid into a userspace id ksmbd would see. */
+	return from_kgid(&init_user_ns, kgid);
+}
+
 #endif /* _SMBACL_H */
diff --git a/fs/ksmbd/transport_rdma.c b/fs/ksmbd/transport_rdma.c
index 58f530056ac0..52b2556e76b1 100644
--- a/fs/ksmbd/transport_rdma.c
+++ b/fs/ksmbd/transport_rdma.c
@@ -1168,7 +1168,7 @@ static int smb_direct_post_send_data(struct smb_direct_transport *t,
 			pr_err("failed to map buffer\n");
 			ret = -ENOMEM;
 			goto err;
-		} else if (sg_cnt + msg->num_sge > SMB_DIRECT_MAX_SEND_SGES - 1) {
+		} else if (sg_cnt + msg->num_sge > SMB_DIRECT_MAX_SEND_SGES) {
 			pr_err("buffer not fitted into sges\n");
 			ret = -E2BIG;
 			ib_dma_unmap_sg(t->cm_id->device, sg, sg_cnt,
diff --git a/fs/ksmbd/vfs.c b/fs/ksmbd/vfs.c
index aee28ee6b19c..b047f2980d96 100644
--- a/fs/ksmbd/vfs.c
+++ b/fs/ksmbd/vfs.c
@@ -69,14 +69,15 @@ static void ksmbd_vfs_inherit_owner(struct ksmbd_work *work,
  *
  * the reference count of @parent isn't incremented.
  */
-int ksmbd_vfs_lock_parent(struct dentry *parent, struct dentry *child)
+int ksmbd_vfs_lock_parent(struct user_namespace *user_ns, struct dentry *parent,
+			  struct dentry *child)
 {
 	struct dentry *dentry;
 	int ret = 0;
 
 	inode_lock_nested(d_inode(parent), I_MUTEX_PARENT);
-	dentry = lookup_one_len(child->d_name.name, parent,
-				child->d_name.len);
+	dentry = lookup_one(user_ns, child->d_name.name, parent,
+			    child->d_name.len);
 	if (IS_ERR(dentry)) {
 		ret = PTR_ERR(dentry);
 		goto out_err;
@@ -102,7 +103,7 @@ int ksmbd_vfs_may_delete(struct user_namespace *user_ns,
 	int ret;
 
 	parent = dget_parent(dentry);
-	ret = ksmbd_vfs_lock_parent(parent, dentry);
+	ret = ksmbd_vfs_lock_parent(user_ns, parent, dentry);
 	if (ret) {
 		dput(parent);
 		return ret;
@@ -137,7 +138,7 @@ int ksmbd_vfs_query_maximal_access(struct user_namespace *user_ns,
 		*daccess |= FILE_EXECUTE_LE;
 
 	parent = dget_parent(dentry);
-	ret = ksmbd_vfs_lock_parent(parent, dentry);
+	ret = ksmbd_vfs_lock_parent(user_ns, parent, dentry);
 	if (ret) {
 		dput(parent);
 		return ret;
@@ -197,6 +198,7 @@ int ksmbd_vfs_create(struct ksmbd_work *work, const char *name, umode_t mode)
  */
 int ksmbd_vfs_mkdir(struct ksmbd_work *work, const char *name, umode_t mode)
 {
+	struct user_namespace *user_ns;
 	struct path path;
 	struct dentry *dentry;
 	int err;
@@ -210,16 +212,16 @@ int ksmbd_vfs_mkdir(struct ksmbd_work *work, const char *name, umode_t mode)
 		return err;
 	}
 
+	user_ns = mnt_user_ns(path.mnt);
 	mode |= S_IFDIR;
-	err = vfs_mkdir(mnt_user_ns(path.mnt), d_inode(path.dentry),
-			dentry, mode);
+	err = vfs_mkdir(user_ns, d_inode(path.dentry), dentry, mode);
 	if (err) {
 		goto out;
 	} else if (d_unhashed(dentry)) {
 		struct dentry *d;
 
-		d = lookup_one_len(dentry->d_name.name, dentry->d_parent,
-				   dentry->d_name.len);
+		d = lookup_one(user_ns, dentry->d_name.name, dentry->d_parent,
+			       dentry->d_name.len);
 		if (IS_ERR(d)) {
 			err = PTR_ERR(d);
 			goto out;
@@ -582,6 +584,7 @@ int ksmbd_vfs_fsync(struct ksmbd_work *work, u64 fid, u64 p_id)
  */
 int ksmbd_vfs_remove_file(struct ksmbd_work *work, char *name)
 {
+	struct user_namespace *user_ns;
 	struct path path;
 	struct dentry *parent;
 	int err;
@@ -601,8 +604,9 @@ int ksmbd_vfs_remove_file(struct ksmbd_work *work, char *name)
 		return err;
 	}
 
+	user_ns = mnt_user_ns(path.mnt);
 	parent = dget_parent(path.dentry);
-	err = ksmbd_vfs_lock_parent(parent, path.dentry);
+	err = ksmbd_vfs_lock_parent(user_ns, parent, path.dentry);
 	if (err) {
 		dput(parent);
 		path_put(&path);
@@ -616,14 +620,12 @@ int ksmbd_vfs_remove_file(struct ksmbd_work *work, char *name)
 	}
 
 	if (S_ISDIR(d_inode(path.dentry)->i_mode)) {
-		err = vfs_rmdir(mnt_user_ns(path.mnt), d_inode(parent),
-				path.dentry);
+		err = vfs_rmdir(user_ns, d_inode(parent), path.dentry);
 		if (err && err != -ENOTEMPTY)
 			ksmbd_debug(VFS, "%s: rmdir failed, err %d\n", name,
 				    err);
 	} else {
-		err = vfs_unlink(mnt_user_ns(path.mnt), d_inode(parent),
-				 path.dentry, NULL);
+		err = vfs_unlink(user_ns, d_inode(parent), path.dentry, NULL);
 		if (err)
 			ksmbd_debug(VFS, "%s: unlink failed, err %d\n", name,
 				    err);
@@ -748,7 +750,8 @@ static int __ksmbd_vfs_rename(struct ksmbd_work *work,
 	if (ksmbd_override_fsids(work))
 		return -ENOMEM;
 
-	dst_dent = lookup_one_len(dst_name, dst_dent_parent, strlen(dst_name));
+	dst_dent = lookup_one(dst_user_ns, dst_name, dst_dent_parent,
+			      strlen(dst_name));
 	err = PTR_ERR(dst_dent);
 	if (IS_ERR(dst_dent)) {
 		pr_err("lookup failed %s [%d]\n", dst_name, err);
@@ -779,6 +782,7 @@ out:
 int ksmbd_vfs_fp_rename(struct ksmbd_work *work, struct ksmbd_file *fp,
 			char *newname)
 {
+	struct user_namespace *user_ns;
 	struct path dst_path;
 	struct dentry *src_dent_parent, *dst_dent_parent;
 	struct dentry *src_dent, *trap_dent, *src_child;
@@ -808,8 +812,9 @@ int ksmbd_vfs_fp_rename(struct ksmbd_work *work, struct ksmbd_file *fp,
 	trap_dent = lock_rename(src_dent_parent, dst_dent_parent);
 	dget(src_dent);
 	dget(dst_dent_parent);
-	src_child = lookup_one_len(src_dent->d_name.name, src_dent_parent,
-				   src_dent->d_name.len);
+	user_ns = file_mnt_user_ns(fp->filp);
+	src_child = lookup_one(user_ns, src_dent->d_name.name, src_dent_parent,
+			       src_dent->d_name.len);
 	if (IS_ERR(src_child)) {
 		err = PTR_ERR(src_child);
 		goto out_lock;
@@ -823,7 +828,7 @@ int ksmbd_vfs_fp_rename(struct ksmbd_work *work, struct ksmbd_file *fp,
 	dput(src_child);
 
 	err = __ksmbd_vfs_rename(work,
-				 file_mnt_user_ns(fp->filp),
+				 user_ns,
 				 src_dent_parent,
 				 src_dent,
 				 mnt_user_ns(dst_path.mnt),
@@ -1109,7 +1114,7 @@ int ksmbd_vfs_unlink(struct user_namespace *user_ns,
 {
 	int err = 0;
 
-	err = ksmbd_vfs_lock_parent(dir, dentry);
+	err = ksmbd_vfs_lock_parent(user_ns, dir, dentry);
 	if (err)
 		return err;
 	dget(dentry);
@@ -1385,14 +1390,14 @@ static struct xattr_smb_acl *ksmbd_vfs_make_xattr_posix_acl(struct user_namespac
 		switch (pa_entry->e_tag) {
 		case ACL_USER:
 			xa_entry->type = SMB_ACL_USER;
-			xa_entry->uid = from_kuid(user_ns, pa_entry->e_uid);
+			xa_entry->uid = posix_acl_uid_translate(user_ns, pa_entry);
 			break;
 		case ACL_USER_OBJ:
 			xa_entry->type = SMB_ACL_USER_OBJ;
 			break;
 		case ACL_GROUP:
 			xa_entry->type = SMB_ACL_GROUP;
-			xa_entry->gid = from_kgid(user_ns, pa_entry->e_gid);
+			xa_entry->gid = posix_acl_gid_translate(user_ns, pa_entry);
 			break;
 		case ACL_GROUP_OBJ:
 			xa_entry->type = SMB_ACL_GROUP_OBJ;
diff --git a/fs/ksmbd/vfs.h b/fs/ksmbd/vfs.h
index cb0cba0d5d07..85db50abdb24 100644
--- a/fs/ksmbd/vfs.h
+++ b/fs/ksmbd/vfs.h
@@ -107,7 +107,8 @@ struct ksmbd_kstat {
 	__le32			file_attributes;
 };
 
-int ksmbd_vfs_lock_parent(struct dentry *parent, struct dentry *child);
+int ksmbd_vfs_lock_parent(struct user_namespace *user_ns, struct dentry *parent,
+			  struct dentry *child);
 int ksmbd_vfs_may_delete(struct user_namespace *user_ns, struct dentry *dentry);
 int ksmbd_vfs_query_maximal_access(struct user_namespace *user_ns,
 				   struct dentry *dentry, __le32 *daccess);
diff --git a/fs/ksmbd/vfs_cache.c b/fs/ksmbd/vfs_cache.c
index 92d8c61ffd2a..29c1db66bd0f 100644
--- a/fs/ksmbd/vfs_cache.c
+++ b/fs/ksmbd/vfs_cache.c
@@ -666,22 +666,6 @@ void ksmbd_free_global_file_table(void)
 	ksmbd_destroy_file_table(&global_ft);
 }
 
-int ksmbd_file_table_flush(struct ksmbd_work *work)
-{
-	struct ksmbd_file	*fp = NULL;
-	unsigned int		id;
-	int			ret;
-
-	read_lock(&work->sess->file_table.lock);
-	idr_for_each_entry(work->sess->file_table.idr, fp, id) {
-		ret = ksmbd_vfs_fsync(work, fp->volatile_id, KSMBD_NO_FID);
-		if (ret)
-			break;
-	}
-	read_unlock(&work->sess->file_table.lock);
-	return ret;
-}
-
 int ksmbd_init_file_table(struct ksmbd_file_table *ft)
 {
 	ft->idr = kzalloc(sizeof(struct idr), GFP_KERNEL);
diff --git a/fs/ksmbd/vfs_cache.h b/fs/ksmbd/vfs_cache.h
index 70dfe6a99f13..448576fbe4b7 100644
--- a/fs/ksmbd/vfs_cache.h
+++ b/fs/ksmbd/vfs_cache.h
@@ -152,7 +152,6 @@ void ksmbd_close_session_fds(struct ksmbd_work *work);
 int ksmbd_close_inode_fds(struct ksmbd_work *work, struct inode *inode);
 int ksmbd_init_global_file_table(void);
 void ksmbd_free_global_file_table(void);
-int ksmbd_file_table_flush(struct ksmbd_work *work);
 void ksmbd_set_fd_limit(unsigned long limit);
 
 /*
diff --git a/fs/lockd/svc.c b/fs/lockd/svc.c
index 0ab9756ed235..b632be3ad57b 100644
--- a/fs/lockd/svc.c
+++ b/fs/lockd/svc.c
@@ -649,6 +649,7 @@ static int lockd_authenticate(struct svc_rqst *rqstp)
 	switch (rqstp->rq_authop->flavour) {
 		case RPC_AUTH_NULL:
 		case RPC_AUTH_UNIX:
+			rqstp->rq_auth_stat = rpc_auth_ok;
 			if (rqstp->rq_proc == 0)
 				return SVC_OK;
 			if (is_callback(rqstp->rq_proc)) {
@@ -659,6 +660,7 @@ static int lockd_authenticate(struct svc_rqst *rqstp)
 			}
 			return svc_set_client(rqstp);
 	}
+	rqstp->rq_auth_stat = rpc_autherr_badcred;
 	return SVC_DENIED;
 }
 
diff --git a/fs/namei.c b/fs/namei.c
index d049d3972695..1946d9667790 100644
--- a/fs/namei.c
+++ b/fs/namei.c
@@ -255,7 +255,7 @@ getname_kernel(const char * filename)
 
 void putname(struct filename *name)
 {
-	if (IS_ERR_OR_NULL(name))
+	if (IS_ERR(name))
 		return;
 
 	BUG_ON(name->refcnt <= 0);
@@ -2467,7 +2467,7 @@ static int path_lookupat(struct nameidata *nd, unsigned flags, struct path *path
 	return err;
 }
 
-static int __filename_lookup(int dfd, struct filename *name, unsigned flags,
+int filename_lookup(int dfd, struct filename *name, unsigned flags,
 		    struct path *path, struct path *root)
 {
 	int retval;
@@ -2488,15 +2488,6 @@ static int __filename_lookup(int dfd, struct filename *name, unsigned flags,
 	return retval;
 }
 
-int filename_lookup(int dfd, struct filename *name, unsigned flags,
-		    struct path *path, struct path *root)
-{
-	int retval = __filename_lookup(dfd, name, flags, path, root);
-
-	putname(name);
-	return retval;
-}
-
 /* Returns 0 and nd will be valid on success; Retuns error, otherwise. */
 static int path_parentat(struct nameidata *nd, unsigned flags,
 				struct path *parent)
@@ -2514,9 +2505,10 @@ static int path_parentat(struct nameidata *nd, unsigned flags,
 	return err;
 }
 
-static int __filename_parentat(int dfd, struct filename *name,
-				unsigned int flags, struct path *parent,
-				struct qstr *last, int *type)
+/* Note: this does not consume "name" */
+static int filename_parentat(int dfd, struct filename *name,
+			     unsigned int flags, struct path *parent,
+			     struct qstr *last, int *type)
 {
 	int retval;
 	struct nameidata nd;
@@ -2538,25 +2530,14 @@ static int __filename_parentat(int dfd, struct filename *name,
 	return retval;
 }
 
-static int filename_parentat(int dfd, struct filename *name,
-				unsigned int flags, struct path *parent,
-				struct qstr *last, int *type)
-{
-	int retval = __filename_parentat(dfd, name, flags, parent, last, type);
-
-	putname(name);
-	return retval;
-}
-
 /* does lookup, returns the object with parent locked */
-struct dentry *kern_path_locked(const char *name, struct path *path)
+static struct dentry *__kern_path_locked(struct filename *name, struct path *path)
 {
 	struct dentry *d;
 	struct qstr last;
 	int type, error;
 
-	error = filename_parentat(AT_FDCWD, getname_kernel(name), 0, path,
-				    &last, &type);
+	error = filename_parentat(AT_FDCWD, name, 0, path, &last, &type);
 	if (error)
 		return ERR_PTR(error);
 	if (unlikely(type != LAST_NORM)) {
@@ -2572,10 +2553,23 @@ struct dentry *kern_path_locked(const char *name, struct path *path)
 	return d;
 }
 
+struct dentry *kern_path_locked(const char *name, struct path *path)
+{
+	struct filename *filename = getname_kernel(name);
+	struct dentry *res = __kern_path_locked(filename, path);
+
+	putname(filename);
+	return res;
+}
+
 int kern_path(const char *name, unsigned int flags, struct path *path)
 {
-	return filename_lookup(AT_FDCWD, getname_kernel(name),
-			       flags, path, NULL);
+	struct filename *filename = getname_kernel(name);
+	int ret = filename_lookup(AT_FDCWD, filename, flags, path, NULL);
+
+	putname(filename);
+	return ret;
+
 }
 EXPORT_SYMBOL(kern_path);
 
@@ -2591,10 +2585,15 @@ int vfs_path_lookup(struct dentry *dentry, struct vfsmount *mnt,
 		    const char *name, unsigned int flags,
 		    struct path *path)
 {
+	struct filename *filename;
 	struct path root = {.mnt = mnt, .dentry = dentry};
+	int ret;
+
+	filename = getname_kernel(name);
 	/* the first argument of filename_lookup() is ignored with root */
-	return filename_lookup(AT_FDCWD, getname_kernel(name),
-			       flags , path, &root);
+	ret = filename_lookup(AT_FDCWD, filename, flags, path, &root);
+	putname(filename);
+	return ret;
 }
 EXPORT_SYMBOL(vfs_path_lookup);
 
@@ -2798,8 +2797,11 @@ int path_pts(struct path *path)
 int user_path_at_empty(int dfd, const char __user *name, unsigned flags,
 		 struct path *path, int *empty)
 {
-	return filename_lookup(dfd, getname_flags(name, flags, empty),
-			       flags, path, NULL);
+	struct filename *filename = getname_flags(name, flags, empty);
+	int ret = filename_lookup(dfd, filename, flags, path, NULL);
+
+	putname(filename);
+	return ret;
 }
 EXPORT_SYMBOL(user_path_at_empty);
 
@@ -3618,8 +3620,8 @@ struct file *do_file_open_root(const struct path *root,
 	return file;
 }
 
-static struct dentry *__filename_create(int dfd, struct filename *name,
-				struct path *path, unsigned int lookup_flags)
+static struct dentry *filename_create(int dfd, struct filename *name,
+				      struct path *path, unsigned int lookup_flags)
 {
 	struct dentry *dentry = ERR_PTR(-EEXIST);
 	struct qstr last;
@@ -3634,7 +3636,7 @@ static struct dentry *__filename_create(int dfd, struct filename *name,
 	 */
 	lookup_flags &= LOOKUP_REVAL;
 
-	error = __filename_parentat(dfd, name, lookup_flags, path, &last, &type);
+	error = filename_parentat(dfd, name, lookup_flags, path, &last, &type);
 	if (error)
 		return ERR_PTR(error);
 
@@ -3687,21 +3689,15 @@ out:
 	return dentry;
 }
 
-static inline struct dentry *filename_create(int dfd, struct filename *name,
+struct dentry *kern_path_create(int dfd, const char *pathname,
 				struct path *path, unsigned int lookup_flags)
 {
-	struct dentry *res = __filename_create(dfd, name, path, lookup_flags);
+	struct filename *filename = getname_kernel(pathname);
+	struct dentry *res = filename_create(dfd, filename, path, lookup_flags);
 
-	putname(name);
+	putname(filename);
 	return res;
 }
-
-struct dentry *kern_path_create(int dfd, const char *pathname,
-				struct path *path, unsigned int lookup_flags)
-{
-	return filename_create(dfd, getname_kernel(pathname),
-				path, lookup_flags);
-}
 EXPORT_SYMBOL(kern_path_create);
 
 void done_path_create(struct path *path, struct dentry *dentry)
@@ -3716,7 +3712,11 @@ EXPORT_SYMBOL(done_path_create);
 inline struct dentry *user_path_create(int dfd, const char __user *pathname,
 				struct path *path, unsigned int lookup_flags)
 {
-	return filename_create(dfd, getname(pathname), path, lookup_flags);
+	struct filename *filename = getname(pathname);
+	struct dentry *res = filename_create(dfd, filename, path, lookup_flags);
+
+	putname(filename);
+	return res;
 }
 EXPORT_SYMBOL(user_path_create);
 
@@ -3797,7 +3797,7 @@ static int do_mknodat(int dfd, struct filename *name, umode_t mode,
 	if (error)
 		goto out1;
 retry:
-	dentry = __filename_create(dfd, name, &path, lookup_flags);
+	dentry = filename_create(dfd, name, &path, lookup_flags);
 	error = PTR_ERR(dentry);
 	if (IS_ERR(dentry))
 		goto out1;
@@ -3897,7 +3897,7 @@ int do_mkdirat(int dfd, struct filename *name, umode_t mode)
 	unsigned int lookup_flags = LOOKUP_DIRECTORY;
 
 retry:
-	dentry = __filename_create(dfd, name, &path, lookup_flags);
+	dentry = filename_create(dfd, name, &path, lookup_flags);
 	error = PTR_ERR(dentry);
 	if (IS_ERR(dentry))
 		goto out_putname;
@@ -3996,7 +3996,7 @@ int do_rmdir(int dfd, struct filename *name)
 	int type;
 	unsigned int lookup_flags = 0;
 retry:
-	error = __filename_parentat(dfd, name, lookup_flags, &path, &last, &type);
+	error = filename_parentat(dfd, name, lookup_flags, &path, &last, &type);
 	if (error)
 		goto exit1;
 
@@ -4089,7 +4089,9 @@ int vfs_unlink(struct user_namespace *mnt_userns, struct inode *dir,
 		return -EPERM;
 
 	inode_lock(target);
-	if (is_local_mountpoint(dentry))
+	if (IS_SWAPFILE(target))
+		error = -EPERM;
+	else if (is_local_mountpoint(dentry))
 		error = -EBUSY;
 	else {
 		error = security_inode_unlink(dir, dentry);
@@ -4135,7 +4137,7 @@ int do_unlinkat(int dfd, struct filename *name)
 	struct inode *delegated_inode = NULL;
 	unsigned int lookup_flags = 0;
 retry:
-	error = __filename_parentat(dfd, name, lookup_flags, &path, &last, &type);
+	error = filename_parentat(dfd, name, lookup_flags, &path, &last, &type);
 	if (error)
 		goto exit1;
 
@@ -4264,7 +4266,7 @@ int do_symlinkat(struct filename *from, int newdfd, struct filename *to)
 		goto out_putnames;
 	}
 retry:
-	dentry = __filename_create(newdfd, to, &path, lookup_flags);
+	dentry = filename_create(newdfd, to, &path, lookup_flags);
 	error = PTR_ERR(dentry);
 	if (IS_ERR(dentry))
 		goto out_putnames;
@@ -4424,11 +4426,11 @@ int do_linkat(int olddfd, struct filename *old, int newdfd,
 	if (flags & AT_SYMLINK_FOLLOW)
 		how |= LOOKUP_FOLLOW;
 retry:
-	error = __filename_lookup(olddfd, old, how, &old_path, NULL);
+	error = filename_lookup(olddfd, old, how, &old_path, NULL);
 	if (error)
 		goto out_putnames;
 
-	new_dentry = __filename_create(newdfd, new, &new_path,
+	new_dentry = filename_create(newdfd, new, &new_path,
 					(how & LOOKUP_REVAL));
 	error = PTR_ERR(new_dentry);
 	if (IS_ERR(new_dentry))
@@ -4597,6 +4599,10 @@ int vfs_rename(struct renamedata *rd)
 	else if (target)
 		inode_lock(target);
 
+	error = -EPERM;
+	if (IS_SWAPFILE(source) || (target && IS_SWAPFILE(target)))
+		goto out;
+
 	error = -EBUSY;
 	if (is_local_mountpoint(old_dentry) || is_local_mountpoint(new_dentry))
 		goto out;
@@ -4683,13 +4689,13 @@ int do_renameat2(int olddfd, struct filename *from, int newdfd,
 		target_flags = 0;
 
 retry:
-	error = __filename_parentat(olddfd, from, lookup_flags, &old_path,
-					&old_last, &old_type);
+	error = filename_parentat(olddfd, from, lookup_flags, &old_path,
+				  &old_last, &old_type);
 	if (error)
 		goto put_names;
 
-	error = __filename_parentat(newdfd, to, lookup_flags, &new_path, &new_last,
-				&new_type);
+	error = filename_parentat(newdfd, to, lookup_flags, &new_path, &new_last,
+				  &new_type);
 	if (error)
 		goto exit1;
 
diff --git a/fs/namespace.c b/fs/namespace.c
index 12852363d90e..659a8f39c61a 100644
--- a/fs/namespace.c
+++ b/fs/namespace.c
@@ -203,7 +203,8 @@ static struct mount *alloc_vfsmnt(const char *name)
 			goto out_free_cache;
 
 		if (name) {
-			mnt->mnt_devname = kstrdup_const(name, GFP_KERNEL);
+			mnt->mnt_devname = kstrdup_const(name,
+							 GFP_KERNEL_ACCOUNT);
 			if (!mnt->mnt_devname)
 				goto out_free_id;
 		}
@@ -3370,7 +3371,7 @@ static struct mnt_namespace *alloc_mnt_ns(struct user_namespace *user_ns, bool a
 	if (!ucounts)
 		return ERR_PTR(-ENOSPC);
 
-	new_ns = kzalloc(sizeof(struct mnt_namespace), GFP_KERNEL);
+	new_ns = kzalloc(sizeof(struct mnt_namespace), GFP_KERNEL_ACCOUNT);
 	if (!new_ns) {
 		dec_mnt_namespaces(ucounts);
 		return ERR_PTR(-ENOMEM);
@@ -4306,7 +4307,7 @@ void __init mnt_init(void)
 	int err;
 
 	mnt_cache = kmem_cache_create("mnt_cache", sizeof(struct mount),
-			0, SLAB_HWCACHE_ALIGN | SLAB_PANIC, NULL);
+			0, SLAB_HWCACHE_ALIGN|SLAB_PANIC|SLAB_ACCOUNT, NULL);
 
 	mount_hashtable = alloc_large_system_hash("Mount-cache",
 				sizeof(struct hlist_head),
diff --git a/fs/nfs/callback.c b/fs/nfs/callback.c
index 7817ad94a6ba..86d856de1389 100644
--- a/fs/nfs/callback.c
+++ b/fs/nfs/callback.c
@@ -429,6 +429,8 @@ check_gss_callback_principal(struct nfs_client *clp, struct svc_rqst *rqstp)
  */
 static int nfs_callback_authenticate(struct svc_rqst *rqstp)
 {
+	rqstp->rq_auth_stat = rpc_autherr_badcred;
+
 	switch (rqstp->rq_authop->flavour) {
 	case RPC_AUTH_NULL:
 		if (rqstp->rq_proc != CB_NULL)
@@ -439,6 +441,8 @@ static int nfs_callback_authenticate(struct svc_rqst *rqstp)
 		 if (svc_is_backchannel(rqstp))
 			return SVC_DENIED;
 	}
+
+	rqstp->rq_auth_stat = rpc_auth_ok;
 	return SVC_OK;
 }
 
diff --git a/fs/nfs/callback_xdr.c b/fs/nfs/callback_xdr.c
index c5348ba81129..4c48d85f6517 100644
--- a/fs/nfs/callback_xdr.c
+++ b/fs/nfs/callback_xdr.c
@@ -63,11 +63,10 @@ static __be32 nfs4_callback_null(struct svc_rqst *rqstp)
 	return htonl(NFS4_OK);
 }
 
-static int nfs4_decode_void(struct svc_rqst *rqstp, __be32 *p)
-{
-	return xdr_argsize_check(rqstp, p);
-}
-
+/*
+ * svc_process_common() looks for an XDR encoder to know when
+ * not to drop a Reply.
+ */
 static int nfs4_encode_void(struct svc_rqst *rqstp, __be32 *p)
 {
 	return xdr_ressize_check(rqstp, p);
@@ -864,17 +863,16 @@ preprocess_nfs4_op(unsigned int op_nr, struct callback_op **op)
 }
 
 static __be32 process_op(int nop, struct svc_rqst *rqstp,
-		struct xdr_stream *xdr_in, void *argp,
-		struct xdr_stream *xdr_out, void *resp,
-		struct cb_process_state *cps)
+			 struct cb_process_state *cps)
 {
+	struct xdr_stream *xdr_out = &rqstp->rq_res_stream;
 	struct callback_op *op = &callback_ops[0];
 	unsigned int op_nr;
 	__be32 status;
 	long maxlen;
 	__be32 res;
 
-	status = decode_op_hdr(xdr_in, &op_nr);
+	status = decode_op_hdr(&rqstp->rq_arg_stream, &op_nr);
 	if (unlikely(status))
 		return status;
 
@@ -904,9 +902,11 @@ static __be32 process_op(int nop, struct svc_rqst *rqstp,
 
 	maxlen = xdr_out->end - xdr_out->p;
 	if (maxlen > 0 && maxlen < PAGE_SIZE) {
-		status = op->decode_args(rqstp, xdr_in, argp);
+		status = op->decode_args(rqstp, &rqstp->rq_arg_stream,
+					 rqstp->rq_argp);
 		if (likely(status == 0))
-			status = op->process_op(argp, resp, cps);
+			status = op->process_op(rqstp->rq_argp, rqstp->rq_resp,
+						cps);
 	} else
 		status = htonl(NFS4ERR_RESOURCE);
 
@@ -915,7 +915,7 @@ encode_hdr:
 	if (unlikely(res))
 		return res;
 	if (op->encode_res != NULL && status == 0)
-		status = op->encode_res(rqstp, xdr_out, resp);
+		status = op->encode_res(rqstp, xdr_out, rqstp->rq_resp);
 	return status;
 }
 
@@ -926,22 +926,15 @@ static __be32 nfs4_callback_compound(struct svc_rqst *rqstp)
 {
 	struct cb_compound_hdr_arg hdr_arg = { 0 };
 	struct cb_compound_hdr_res hdr_res = { NULL };
-	struct xdr_stream xdr_in, xdr_out;
-	__be32 *p, status;
 	struct cb_process_state cps = {
 		.drc_status = 0,
 		.clp = NULL,
 		.net = SVC_NET(rqstp),
 	};
 	unsigned int nops = 0;
+	__be32 status;
 
-	xdr_init_decode(&xdr_in, &rqstp->rq_arg,
-			rqstp->rq_arg.head[0].iov_base, NULL);
-
-	p = (__be32*)((char *)rqstp->rq_res.head[0].iov_base + rqstp->rq_res.head[0].iov_len);
-	xdr_init_encode(&xdr_out, &rqstp->rq_res, p, NULL);
-
-	status = decode_compound_hdr_arg(&xdr_in, &hdr_arg);
+	status = decode_compound_hdr_arg(&rqstp->rq_arg_stream, &hdr_arg);
 	if (status == htonl(NFS4ERR_RESOURCE))
 		return rpc_garbage_args;
 
@@ -961,15 +954,13 @@ static __be32 nfs4_callback_compound(struct svc_rqst *rqstp)
 	cps.minorversion = hdr_arg.minorversion;
 	hdr_res.taglen = hdr_arg.taglen;
 	hdr_res.tag = hdr_arg.tag;
-	if (encode_compound_hdr_res(&xdr_out, &hdr_res) != 0) {
+	if (encode_compound_hdr_res(&rqstp->rq_res_stream, &hdr_res) != 0) {
 		if (cps.clp)
 			nfs_put_client(cps.clp);
 		return rpc_system_err;
 	}
 	while (status == 0 && nops != hdr_arg.nops) {
-		status = process_op(nops, rqstp, &xdr_in,
-				    rqstp->rq_argp, &xdr_out, rqstp->rq_resp,
-				    &cps);
+		status = process_op(nops, rqstp, &cps);
 		nops++;
 	}
 
@@ -988,7 +979,20 @@ static __be32 nfs4_callback_compound(struct svc_rqst *rqstp)
 
 out_invalidcred:
 	pr_warn_ratelimited("NFS: NFSv4 callback contains invalid cred\n");
-	return svc_return_autherr(rqstp, rpc_autherr_badcred);
+	rqstp->rq_auth_stat = rpc_autherr_badcred;
+	return rpc_success;
+}
+
+static int
+nfs_callback_dispatch(struct svc_rqst *rqstp, __be32 *statp)
+{
+	const struct svc_procedure *procp = rqstp->rq_procinfo;
+
+	svcxdr_init_decode(rqstp);
+	svcxdr_init_encode(rqstp);
+
+	*statp = procp->pc_func(rqstp);
+	return 1;
 }
 
 /*
@@ -1057,7 +1061,6 @@ static struct callback_op callback_ops[] = {
 static const struct svc_procedure nfs4_callback_procedures1[] = {
 	[CB_NULL] = {
 		.pc_func = nfs4_callback_null,
-		.pc_decode = nfs4_decode_void,
 		.pc_encode = nfs4_encode_void,
 		.pc_xdrressize = 1,
 		.pc_name = "NULL",
@@ -1079,7 +1082,7 @@ const struct svc_version nfs4_callback_version1 = {
 	.vs_proc = nfs4_callback_procedures1,
 	.vs_count = nfs4_callback_count1,
 	.vs_xdrsize = NFS4_CALLBACK_XDRSIZE,
-	.vs_dispatch = NULL,
+	.vs_dispatch = nfs_callback_dispatch,
 	.vs_hidden = true,
 	.vs_need_cong_ctrl = true,
 };
@@ -1091,7 +1094,7 @@ const struct svc_version nfs4_callback_version4 = {
 	.vs_proc = nfs4_callback_procedures1,
 	.vs_count = nfs4_callback_count4,
 	.vs_xdrsize = NFS4_CALLBACK_XDRSIZE,
-	.vs_dispatch = NULL,
+	.vs_dispatch = nfs_callback_dispatch,
 	.vs_hidden = true,
 	.vs_need_cong_ctrl = true,
 };
diff --git a/fs/nfs/client.c b/fs/nfs/client.c
index 330f65727c45..23e165d5ec9c 100644
--- a/fs/nfs/client.c
+++ b/fs/nfs/client.c
@@ -179,6 +179,7 @@ struct nfs_client *nfs_alloc_client(const struct nfs_client_initdata *cl_init)
 
 	clp->cl_proto = cl_init->proto;
 	clp->cl_nconnect = cl_init->nconnect;
+	clp->cl_max_connect = cl_init->max_connect ? cl_init->max_connect : 1;
 	clp->cl_net = get_net(cl_init->net);
 
 	clp->cl_principal = "*";
@@ -540,6 +541,7 @@ int nfs_create_rpc_client(struct nfs_client *clp,
 
 	clnt->cl_principal = clp->cl_principal;
 	clp->cl_rpcclient = clnt;
+	clnt->cl_max_connect = clp->cl_max_connect;
 	return 0;
 }
 EXPORT_SYMBOL_GPL(nfs_create_rpc_client);
diff --git a/fs/nfs/fs_context.c b/fs/nfs/fs_context.c
index d95c9a39bc70..0d444a90f513 100644
--- a/fs/nfs/fs_context.c
+++ b/fs/nfs/fs_context.c
@@ -60,6 +60,7 @@ enum nfs_param {
 	Opt_mountvers,
 	Opt_namelen,
 	Opt_nconnect,
+	Opt_max_connect,
 	Opt_port,
 	Opt_posix,
 	Opt_proto,
@@ -158,6 +159,7 @@ static const struct fs_parameter_spec nfs_fs_parameters[] = {
 	fsparam_u32   ("mountvers",	Opt_mountvers),
 	fsparam_u32   ("namlen",	Opt_namelen),
 	fsparam_u32   ("nconnect",	Opt_nconnect),
+	fsparam_u32   ("max_connect",	Opt_max_connect),
 	fsparam_string("nfsvers",	Opt_vers),
 	fsparam_u32   ("port",		Opt_port),
 	fsparam_flag_no("posix",	Opt_posix),
@@ -770,6 +772,11 @@ static int nfs_fs_context_parse_param(struct fs_context *fc,
 			goto out_of_bounds;
 		ctx->nfs_server.nconnect = result.uint_32;
 		break;
+	case Opt_max_connect:
+		if (result.uint_32 < 1 || result.uint_32 > NFS_MAX_TRANSPORTS)
+			goto out_of_bounds;
+		ctx->nfs_server.max_connect = result.uint_32;
+		break;
 	case Opt_lookupcache:
 		switch (result.uint_32) {
 		case Opt_lookupcache_all:
diff --git a/fs/nfs/internal.h b/fs/nfs/internal.h
index a36af04188c2..66fc936834f2 100644
--- a/fs/nfs/internal.h
+++ b/fs/nfs/internal.h
@@ -67,6 +67,7 @@ struct nfs_client_initdata {
 	int proto;
 	u32 minorversion;
 	unsigned int nconnect;
+	unsigned int max_connect;
 	struct net *net;
 	const struct rpc_timeout *timeparms;
 	const struct cred *cred;
@@ -121,6 +122,7 @@ struct nfs_fs_context {
 		int			port;
 		unsigned short		protocol;
 		unsigned short		nconnect;
+		unsigned short		max_connect;
 		unsigned short		export_path_len;
 	} nfs_server;
 
diff --git a/fs/nfs/nfs3_fs.h b/fs/nfs/nfs3_fs.h
index c8a192802dda..03a4e679fd99 100644
--- a/fs/nfs/nfs3_fs.h
+++ b/fs/nfs/nfs3_fs.h
@@ -11,7 +11,7 @@
  * nfs3acl.c
  */
 #ifdef CONFIG_NFS_V3_ACL
-extern struct posix_acl *nfs3_get_acl(struct inode *inode, int type);
+extern struct posix_acl *nfs3_get_acl(struct inode *inode, int type, bool rcu);
 extern int nfs3_set_acl(struct user_namespace *mnt_userns, struct inode *inode,
 			struct posix_acl *acl, int type);
 extern int nfs3_proc_setacls(struct inode *inode, struct posix_acl *acl,
diff --git a/fs/nfs/nfs3acl.c b/fs/nfs/nfs3acl.c
index 9ec560aa4a50..93de0b58647a 100644
--- a/fs/nfs/nfs3acl.c
+++ b/fs/nfs/nfs3acl.c
@@ -44,7 +44,7 @@ static void nfs3_abort_get_acl(struct posix_acl **p)
 	cmpxchg(p, sentinel, ACL_NOT_CACHED);
 }
 
-struct posix_acl *nfs3_get_acl(struct inode *inode, int type)
+struct posix_acl *nfs3_get_acl(struct inode *inode, int type, bool rcu)
 {
 	struct nfs_server *server = NFS_SERVER(inode);
 	struct page *pages[NFSACL_MAXPAGES] = { };
@@ -62,6 +62,9 @@ struct posix_acl *nfs3_get_acl(struct inode *inode, int type)
 	};
 	int status, count;
 
+	if (rcu)
+		return ERR_PTR(-ECHILD);
+
 	if (!nfs_server_capable(inode, NFS_CAP_ACLS))
 		return ERR_PTR(-EOPNOTSUPP);
 
diff --git a/fs/nfs/nfs3proc.c b/fs/nfs/nfs3proc.c
index 2299446b3b89..f7524310ddf4 100644
--- a/fs/nfs/nfs3proc.c
+++ b/fs/nfs/nfs3proc.c
@@ -49,8 +49,7 @@ nfs3_async_handle_jukebox(struct rpc_task *task, struct inode *inode)
 {
 	if (task->tk_status != -EJUKEBOX)
 		return 0;
-	if (task->tk_status == -EJUKEBOX)
-		nfs_inc_stats(inode, NFSIOS_DELAY);
+	nfs_inc_stats(inode, NFSIOS_DELAY);
 	task->tk_status = 0;
 	rpc_restart_call(task);
 	rpc_delay(task, NFS_JUKEBOX_RETRY_TIME);
diff --git a/fs/nfs/nfs4client.c b/fs/nfs/nfs4client.c
index 28431acd1230..af57332503be 100644
--- a/fs/nfs/nfs4client.c
+++ b/fs/nfs/nfs4client.c
@@ -402,6 +402,33 @@ static int nfs4_init_client_minor_version(struct nfs_client *clp)
 	return nfs4_init_callback(clp);
 }
 
+static void nfs4_add_trunk(struct nfs_client *clp, struct nfs_client *old)
+{
+	struct sockaddr_storage clp_addr, old_addr;
+	struct sockaddr *clp_sap = (struct sockaddr *)&clp_addr;
+	struct sockaddr *old_sap = (struct sockaddr *)&old_addr;
+	size_t clp_salen;
+	struct xprt_create xprt_args = {
+		.ident = old->cl_proto,
+		.net = old->cl_net,
+		.servername = old->cl_hostname,
+	};
+
+	if (clp->cl_proto != old->cl_proto)
+		return;
+	clp_salen = rpc_peeraddr(clp->cl_rpcclient, clp_sap, sizeof(clp_addr));
+	rpc_peeraddr(old->cl_rpcclient, old_sap, sizeof(old_addr));
+
+	if (clp_addr.ss_family != old_addr.ss_family)
+		return;
+
+	xprt_args.dstaddr = clp_sap;
+	xprt_args.addrlen = clp_salen;
+
+	rpc_clnt_add_xprt(old->cl_rpcclient, &xprt_args,
+			  rpc_clnt_test_and_add_xprt, NULL);
+}
+
 /**
  * nfs4_init_client - Initialise an NFS4 client record
  *
@@ -436,6 +463,8 @@ struct nfs_client *nfs4_init_client(struct nfs_client *clp,
 		 * won't try to use it.
 		 */
 		nfs_mark_client_ready(clp, -EPERM);
+		if (old->cl_mvops->session_trunk)
+			nfs4_add_trunk(clp, old);
 	}
 	clear_bit(NFS_CS_TSM_POSSIBLE, &clp->cl_flags);
 	nfs_put_client(clp);
@@ -865,6 +894,7 @@ static int nfs4_set_client(struct nfs_server *server,
 		const char *ip_addr,
 		int proto, const struct rpc_timeout *timeparms,
 		u32 minorversion, unsigned int nconnect,
+		unsigned int max_connect,
 		struct net *net)
 {
 	struct nfs_client_initdata cl_init = {
@@ -883,6 +913,8 @@ static int nfs4_set_client(struct nfs_server *server,
 
 	if (minorversion == 0)
 		__set_bit(NFS_CS_REUSEPORT, &cl_init.init_flags);
+	else
+		cl_init.max_connect = max_connect;
 	if (proto == XPRT_TRANSPORT_TCP)
 		cl_init.nconnect = nconnect;
 
@@ -952,8 +984,10 @@ struct nfs_client *nfs4_set_ds_client(struct nfs_server *mds_srv,
 		return ERR_PTR(-EINVAL);
 	cl_init.hostname = buf;
 
-	if (mds_clp->cl_nconnect > 1 && ds_proto == XPRT_TRANSPORT_TCP)
+	if (mds_clp->cl_nconnect > 1 && ds_proto == XPRT_TRANSPORT_TCP) {
 		cl_init.nconnect = mds_clp->cl_nconnect;
+		cl_init.max_connect = NFS_MAX_TRANSPORTS;
+	}
 
 	if (mds_srv->flags & NFS_MOUNT_NORESVPORT)
 		__set_bit(NFS_CS_NORESVPORT, &cl_init.init_flags);
@@ -1122,6 +1156,7 @@ static int nfs4_init_server(struct nfs_server *server, struct fs_context *fc)
 				&timeparms,
 				ctx->minorversion,
 				ctx->nfs_server.nconnect,
+				ctx->nfs_server.max_connect,
 				fc->net_ns);
 	if (error < 0)
 		return error;
@@ -1211,6 +1246,7 @@ struct nfs_server *nfs4_create_referral_server(struct fs_context *fc)
 				parent_server->client->cl_timeout,
 				parent_client->cl_mvops->minor_version,
 				parent_client->cl_nconnect,
+				parent_client->cl_max_connect,
 				parent_client->cl_net);
 	if (!error)
 		goto init_server;
@@ -1226,6 +1262,7 @@ struct nfs_server *nfs4_create_referral_server(struct fs_context *fc)
 				parent_server->client->cl_timeout,
 				parent_client->cl_mvops->minor_version,
 				parent_client->cl_nconnect,
+				parent_client->cl_max_connect,
 				parent_client->cl_net);
 	if (error < 0)
 		goto error;
@@ -1323,7 +1360,7 @@ int nfs4_update_server(struct nfs_server *server, const char *hostname,
 	error = nfs4_set_client(server, hostname, sap, salen, buf,
 				clp->cl_proto, clnt->cl_timeout,
 				clp->cl_minorversion,
-				clp->cl_nconnect, net);
+				clp->cl_nconnect, clp->cl_max_connect, net);
 	clear_bit(NFS_MIG_TSM_POSSIBLE, &server->mig_status);
 	if (error != 0) {
 		nfs_server_insert_lists(server);
diff --git a/fs/nfs/nfs4file.c b/fs/nfs/nfs4file.c
index c820de58a661..c91565227ea2 100644
--- a/fs/nfs/nfs4file.c
+++ b/fs/nfs/nfs4file.c
@@ -158,13 +158,11 @@ static ssize_t __nfs4_copy_file_range(struct file *file_in, loff_t pos_in,
 		sync = true;
 retry:
 	if (!nfs42_files_from_same_server(file_in, file_out)) {
-		/* for inter copy, if copy size if smaller than 12 RPC
-		 * payloads, fallback to traditional copy. There are
-		 * 14 RPCs during an NFSv4.x mount between source/dest
-		 * servers.
+		/*
+		 * for inter copy, if copy size is too small
+		 * then fallback to generic copy.
 		 */
-		if (sync ||
-			count <= 14 * NFS_SERVER(file_inode(file_in))->rsize)
+		if (sync)
 			return -EOPNOTSUPP;
 		cn_resp = kzalloc(sizeof(struct nfs42_copy_notify_res),
 				GFP_NOFS);
diff --git a/fs/nfs/pnfs.c b/fs/nfs/pnfs.c
index ef14ea0b6ab8..7c9090a28e5c 100644
--- a/fs/nfs/pnfs.c
+++ b/fs/nfs/pnfs.c
@@ -335,7 +335,7 @@ static bool pnfs_seqid_is_newer(u32 s1, u32 s2)
 
 static void pnfs_barrier_update(struct pnfs_layout_hdr *lo, u32 newseq)
 {
-	if (pnfs_seqid_is_newer(newseq, lo->plh_barrier))
+	if (pnfs_seqid_is_newer(newseq, lo->plh_barrier) || !lo->plh_barrier)
 		lo->plh_barrier = newseq;
 }
 
@@ -347,11 +347,15 @@ pnfs_set_plh_return_info(struct pnfs_layout_hdr *lo, enum pnfs_iomode iomode,
 		iomode = IOMODE_ANY;
 	lo->plh_return_iomode = iomode;
 	set_bit(NFS_LAYOUT_RETURN_REQUESTED, &lo->plh_flags);
-	if (seq != 0) {
-		WARN_ON_ONCE(lo->plh_return_seq != 0 && lo->plh_return_seq != seq);
+	/*
+	 * We must set lo->plh_return_seq to avoid livelocks with
+	 * pnfs_layout_need_return()
+	 */
+	if (seq == 0)
+		seq = be32_to_cpu(lo->plh_stateid.seqid);
+	if (!lo->plh_return_seq || pnfs_seqid_is_newer(seq, lo->plh_return_seq))
 		lo->plh_return_seq = seq;
-		pnfs_barrier_update(lo, seq);
-	}
+	pnfs_barrier_update(lo, seq);
 }
 
 static void
@@ -592,10 +596,6 @@ pnfs_put_lseg(struct pnfs_layout_segment *lseg)
 	inode = lo->plh_inode;
 
 	if (refcount_dec_and_lock(&lseg->pls_refcount, &inode->i_lock)) {
-		if (test_bit(NFS_LSEG_VALID, &lseg->pls_flags)) {
-			spin_unlock(&inode->i_lock);
-			return;
-		}
 		pnfs_get_layout_hdr(lo);
 		pnfs_layout_remove_lseg(lo, lseg);
 		if (pnfs_cache_lseg_for_layoutreturn(lo, lseg))
@@ -1000,7 +1000,7 @@ pnfs_layout_stateid_blocked(const struct pnfs_layout_hdr *lo,
 {
 	u32 seqid = be32_to_cpu(stateid->seqid);
 
-	return !pnfs_seqid_is_newer(seqid, lo->plh_barrier) && lo->plh_barrier;
+	return lo->plh_barrier && pnfs_seqid_is_newer(lo->plh_barrier, seqid);
 }
 
 /* lget is set to 1 if called from inside send_layoutget call chain */
diff --git a/fs/nfs/read.c b/fs/nfs/read.c
index 9f39e0a1a38b..08d6cc57cbc3 100644
--- a/fs/nfs/read.c
+++ b/fs/nfs/read.c
@@ -293,15 +293,19 @@ static int
 readpage_async_filler(void *data, struct page *page)
 {
 	struct nfs_readdesc *desc = data;
+	struct inode *inode = page_file_mapping(page)->host;
+	unsigned int rsize = NFS_SERVER(inode)->rsize;
 	struct nfs_page *new;
-	unsigned int len;
+	unsigned int len, aligned_len;
 	int error;
 
 	len = nfs_page_length(page);
 	if (len == 0)
 		return nfs_return_empty_page(page);
 
-	new = nfs_create_request(desc->ctx, page, 0, len);
+	aligned_len = min_t(unsigned int, ALIGN(len, rsize), PAGE_SIZE);
+
+	new = nfs_create_request(desc->ctx, page, 0, aligned_len);
 	if (IS_ERR(new))
 		goto out_error;
 
diff --git a/fs/nfs/super.c b/fs/nfs/super.c
index fe58525cfed4..e65c83494c05 100644
--- a/fs/nfs/super.c
+++ b/fs/nfs/super.c
@@ -480,6 +480,8 @@ static void nfs_show_mount_options(struct seq_file *m, struct nfs_server *nfss,
 	if (clp->cl_nconnect > 0)
 		seq_printf(m, ",nconnect=%u", clp->cl_nconnect);
 	if (version == 4) {
+		if (clp->cl_max_connect > 1)
+			seq_printf(m, ",max_connect=%u", clp->cl_max_connect);
 		if (nfss->port != NFS_PORT)
 			seq_printf(m, ",port=%u", nfss->port);
 	} else
diff --git a/fs/nfsd/Kconfig b/fs/nfsd/Kconfig
index f229172652be..6e9ea4ee0f73 100644
--- a/fs/nfsd/Kconfig
+++ b/fs/nfsd/Kconfig
@@ -109,7 +109,7 @@ config NFSD_SCSILAYOUT
 	depends on NFSD_V4 && BLOCK
 	select NFSD_PNFS
 	select EXPORTFS_BLOCK_OPS
-	select BLK_SCSI_REQUEST
+	select SCSI_COMMON
 	help
 	  This option enables support for the exporting pNFS SCSI layouts
 	  in the kernel's NFS server. The pNFS SCSI layout enables NFS
diff --git a/fs/nilfs2/sysfs.c b/fs/nilfs2/sysfs.c
index 68e8d61e28dd..62f8a7ac19c8 100644
--- a/fs/nilfs2/sysfs.c
+++ b/fs/nilfs2/sysfs.c
@@ -51,11 +51,9 @@ static const struct sysfs_ops nilfs_##name##_attr_ops = { \
 #define NILFS_DEV_INT_GROUP_TYPE(name, parent_name) \
 static void nilfs_##name##_attr_release(struct kobject *kobj) \
 { \
-	struct nilfs_sysfs_##parent_name##_subgroups *subgroups; \
-	struct the_nilfs *nilfs = container_of(kobj->parent, \
-						struct the_nilfs, \
-						ns_##parent_name##_kobj); \
-	subgroups = nilfs->ns_##parent_name##_subgroups; \
+	struct nilfs_sysfs_##parent_name##_subgroups *subgroups = container_of(kobj, \
+						struct nilfs_sysfs_##parent_name##_subgroups, \
+						sg_##name##_kobj); \
 	complete(&subgroups->sg_##name##_kobj_unregister); \
 } \
 static struct kobj_type nilfs_##name##_ktype = { \
@@ -81,12 +79,12 @@ static int nilfs_sysfs_create_##name##_group(struct the_nilfs *nilfs) \
 	err = kobject_init_and_add(kobj, &nilfs_##name##_ktype, parent, \
 				    #name); \
 	if (err) \
-		return err; \
-	return 0; \
+		kobject_put(kobj); \
+	return err; \
 } \
 static void nilfs_sysfs_delete_##name##_group(struct the_nilfs *nilfs) \
 { \
-	kobject_del(&nilfs->ns_##parent_name##_subgroups->sg_##name##_kobj); \
+	kobject_put(&nilfs->ns_##parent_name##_subgroups->sg_##name##_kobj); \
 }
 
 /************************************************************************
@@ -197,14 +195,14 @@ int nilfs_sysfs_create_snapshot_group(struct nilfs_root *root)
 	}
 
 	if (err)
-		return err;
+		kobject_put(&root->snapshot_kobj);
 
-	return 0;
+	return err;
 }
 
 void nilfs_sysfs_delete_snapshot_group(struct nilfs_root *root)
 {
-	kobject_del(&root->snapshot_kobj);
+	kobject_put(&root->snapshot_kobj);
 }
 
 /************************************************************************
@@ -986,7 +984,7 @@ int nilfs_sysfs_create_device_group(struct super_block *sb)
 	err = kobject_init_and_add(&nilfs->ns_dev_kobj, &nilfs_dev_ktype, NULL,
 				    "%s", sb->s_id);
 	if (err)
-		goto free_dev_subgroups;
+		goto cleanup_dev_kobject;
 
 	err = nilfs_sysfs_create_mounted_snapshots_group(nilfs);
 	if (err)
@@ -1023,9 +1021,7 @@ delete_mounted_snapshots_group:
 	nilfs_sysfs_delete_mounted_snapshots_group(nilfs);
 
 cleanup_dev_kobject:
-	kobject_del(&nilfs->ns_dev_kobj);
-
-free_dev_subgroups:
+	kobject_put(&nilfs->ns_dev_kobj);
 	kfree(nilfs->ns_dev_subgroups);
 
 failed_create_device_group:
diff --git a/fs/nilfs2/the_nilfs.c b/fs/nilfs2/the_nilfs.c
index 8b7b01a380ce..c8bfc01da5d7 100644
--- a/fs/nilfs2/the_nilfs.c
+++ b/fs/nilfs2/the_nilfs.c
@@ -792,14 +792,13 @@ nilfs_find_or_create_root(struct the_nilfs *nilfs, __u64 cno)
 
 void nilfs_put_root(struct nilfs_root *root)
 {
-	if (refcount_dec_and_test(&root->count)) {
-		struct the_nilfs *nilfs = root->nilfs;
+	struct the_nilfs *nilfs = root->nilfs;
 
-		nilfs_sysfs_delete_snapshot_group(root);
-
-		spin_lock(&nilfs->ns_cptree_lock);
+	if (refcount_dec_and_lock(&root->count, &nilfs->ns_cptree_lock)) {
 		rb_erase(&root->rb_node, &nilfs->ns_cptree);
 		spin_unlock(&nilfs->ns_cptree_lock);
+
+		nilfs_sysfs_delete_snapshot_group(root);
 		iput(root->ifile);
 
 		kfree(root);
diff --git a/fs/notify/mark.c b/fs/notify/mark.c
index 95006d1d29ab..fa1d99101f89 100644
--- a/fs/notify/mark.c
+++ b/fs/notify/mark.c
@@ -531,6 +531,7 @@ static int fsnotify_attach_connector_to_object(fsnotify_connp_t *connp,
 		/* Someone else created list structure for us */
 		if (inode)
 			fsnotify_put_inode_ref(inode);
+		fsnotify_put_sb_connectors(conn);
 		kmem_cache_free(fsnotify_mark_connector_cachep, conn);
 	}
 
diff --git a/fs/ntfs3/Kconfig b/fs/ntfs3/Kconfig
new file mode 100644
index 000000000000..6e4cbc48ab8e
--- /dev/null
+++ b/fs/ntfs3/Kconfig
@@ -0,0 +1,46 @@
+# SPDX-License-Identifier: GPL-2.0-only
+config NTFS3_FS
+	tristate "NTFS Read-Write file system support"
+	select NLS
+	help
+	  Windows OS native file system (NTFS) support up to NTFS version 3.1.
+
+	  Y or M enables the NTFS3 driver with full features enabled (read,
+	  write, journal replaying, sparse/compressed files support).
+	  File system type to use on mount is "ntfs3". Module name (M option)
+	  is also "ntfs3".
+
+	  Documentation: <file:Documentation/filesystems/ntfs3.rst>
+
+config NTFS3_64BIT_CLUSTER
+	bool "64 bits per NTFS clusters"
+	depends on NTFS3_FS && 64BIT
+	help
+	  Windows implementation of ntfs.sys uses 32 bits per clusters.
+	  If activated 64 bits per clusters you will be able to use 4k cluster
+	  for 16T+ volumes. Windows will not be able to mount such volumes.
+
+	  It is recommended to say N here.
+
+config NTFS3_LZX_XPRESS
+	bool "activate support of external compressions lzx/xpress"
+	depends on NTFS3_FS
+	help
+	  In Windows 10 one can use command "compact" to compress any files.
+	  4 possible variants of compression are: xpress4k, xpress8k, xpress16k and lzx.
+	  If activated you will be able to read such files correctly.
+
+	  It is recommended to say Y here.
+
+config NTFS3_FS_POSIX_ACL
+	bool "NTFS POSIX Access Control Lists"
+	depends on NTFS3_FS
+	select FS_POSIX_ACL
+	help
+	  POSIX Access Control Lists (ACLs) support additional access rights
+	  for users and groups beyond the standard owner/group/world scheme,
+	  and this option selects support for ACLs specifically for ntfs
+	  filesystems.
+	  NOTE: this is linux only feature. Windows will ignore these ACLs.
+
+	  If you don't know what Access Control Lists are, say N.
diff --git a/fs/ntfs3/Makefile b/fs/ntfs3/Makefile
new file mode 100644
index 000000000000..279701b62bbe
--- /dev/null
+++ b/fs/ntfs3/Makefile
@@ -0,0 +1,36 @@
+# SPDX-License-Identifier: GPL-2.0
+#
+# Makefile for the ntfs3 filesystem support.
+#
+
+# to check robot warnings
+ccflags-y += -Wint-to-pointer-cast \
+	$(call cc-option,-Wunused-but-set-variable,-Wunused-const-variable) \
+	$(call cc-option,-Wold-style-declaration,-Wout-of-line-declaration)
+
+obj-$(CONFIG_NTFS3_FS) += ntfs3.o
+
+ntfs3-y :=	attrib.o \
+		attrlist.o \
+		bitfunc.o \
+		bitmap.o \
+		dir.o \
+		fsntfs.o \
+		frecord.o \
+		file.o \
+		fslog.o \
+		inode.o \
+		index.o \
+		lznt.o \
+		namei.o \
+		record.o \
+		run.o \
+		super.o \
+		upcase.o \
+		xattr.o
+
+ntfs3-$(CONFIG_NTFS3_LZX_XPRESS) += $(addprefix lib/,\
+		decompress_common.o \
+		lzx_decompress.o \
+		xpress_decompress.o \
+		)
+\ No newline at end of file
diff --git a/fs/ntfs3/attrib.c b/fs/ntfs3/attrib.c
new file mode 100644
index 000000000000..34c4cbf7e29b
--- /dev/null
+++ b/fs/ntfs3/attrib.c
@@ -0,0 +1,2093 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ *
+ * Copyright (C) 2019-2021 Paragon Software GmbH, All rights reserved.
+ *
+ * TODO: Merge attr_set_size/attr_data_get_block/attr_allocate_frame?
+ */
+
+#include <linux/blkdev.h>
+#include <linux/buffer_head.h>
+#include <linux/fs.h>
+#include <linux/hash.h>
+#include <linux/nls.h>
+#include <linux/ratelimit.h>
+#include <linux/slab.h>
+
+#include "debug.h"
+#include "ntfs.h"
+#include "ntfs_fs.h"
+
+/*
+ * You can set external NTFS_MIN_LOG2_OF_CLUMP/NTFS_MAX_LOG2_OF_CLUMP to manage
+ * preallocate algorithm.
+ */
+#ifndef NTFS_MIN_LOG2_OF_CLUMP
+#define NTFS_MIN_LOG2_OF_CLUMP 16
+#endif
+
+#ifndef NTFS_MAX_LOG2_OF_CLUMP
+#define NTFS_MAX_LOG2_OF_CLUMP 26
+#endif
+
+// 16M
+#define NTFS_CLUMP_MIN (1 << (NTFS_MIN_LOG2_OF_CLUMP + 8))
+// 16G
+#define NTFS_CLUMP_MAX (1ull << (NTFS_MAX_LOG2_OF_CLUMP + 8))
+
+static inline u64 get_pre_allocated(u64 size)
+{
+	u32 clump;
+	u8 align_shift;
+	u64 ret;
+
+	if (size <= NTFS_CLUMP_MIN) {
+		clump = 1 << NTFS_MIN_LOG2_OF_CLUMP;
+		align_shift = NTFS_MIN_LOG2_OF_CLUMP;
+	} else if (size >= NTFS_CLUMP_MAX) {
+		clump = 1 << NTFS_MAX_LOG2_OF_CLUMP;
+		align_shift = NTFS_MAX_LOG2_OF_CLUMP;
+	} else {
+		align_shift = NTFS_MIN_LOG2_OF_CLUMP - 1 +
+			      __ffs(size >> (8 + NTFS_MIN_LOG2_OF_CLUMP));
+		clump = 1u << align_shift;
+	}
+
+	ret = (((size + clump - 1) >> align_shift)) << align_shift;
+
+	return ret;
+}
+
+/*
+ * attr_must_be_resident
+ *
+ * Return: True if attribute must be resident.
+ */
+static inline bool attr_must_be_resident(struct ntfs_sb_info *sbi,
+					 enum ATTR_TYPE type)
+{
+	const struct ATTR_DEF_ENTRY *de;
+
+	switch (type) {
+	case ATTR_STD:
+	case ATTR_NAME:
+	case ATTR_ID:
+	case ATTR_LABEL:
+	case ATTR_VOL_INFO:
+	case ATTR_ROOT:
+	case ATTR_EA_INFO:
+		return true;
+	default:
+		de = ntfs_query_def(sbi, type);
+		if (de && (de->flags & NTFS_ATTR_MUST_BE_RESIDENT))
+			return true;
+		return false;
+	}
+}
+
+/*
+ * attr_load_runs - Load all runs stored in @attr.
+ */
+int attr_load_runs(struct ATTRIB *attr, struct ntfs_inode *ni,
+		   struct runs_tree *run, const CLST *vcn)
+{
+	int err;
+	CLST svcn = le64_to_cpu(attr->nres.svcn);
+	CLST evcn = le64_to_cpu(attr->nres.evcn);
+	u32 asize;
+	u16 run_off;
+
+	if (svcn >= evcn + 1 || run_is_mapped_full(run, svcn, evcn))
+		return 0;
+
+	if (vcn && (evcn < *vcn || *vcn < svcn))
+		return -EINVAL;
+
+	asize = le32_to_cpu(attr->size);
+	run_off = le16_to_cpu(attr->nres.run_off);
+	err = run_unpack_ex(run, ni->mi.sbi, ni->mi.rno, svcn, evcn,
+			    vcn ? *vcn : svcn, Add2Ptr(attr, run_off),
+			    asize - run_off);
+	if (err < 0)
+		return err;
+
+	return 0;
+}
+
+/*
+ * run_deallocate_ex - Deallocate clusters.
+ */
+static int run_deallocate_ex(struct ntfs_sb_info *sbi, struct runs_tree *run,
+			     CLST vcn, CLST len, CLST *done, bool trim)
+{
+	int err = 0;
+	CLST vcn_next, vcn0 = vcn, lcn, clen, dn = 0;
+	size_t idx;
+
+	if (!len)
+		goto out;
+
+	if (!run_lookup_entry(run, vcn, &lcn, &clen, &idx)) {
+failed:
+		run_truncate(run, vcn0);
+		err = -EINVAL;
+		goto out;
+	}
+
+	for (;;) {
+		if (clen > len)
+			clen = len;
+
+		if (!clen) {
+			err = -EINVAL;
+			goto out;
+		}
+
+		if (lcn != SPARSE_LCN) {
+			mark_as_free_ex(sbi, lcn, clen, trim);
+			dn += clen;
+		}
+
+		len -= clen;
+		if (!len)
+			break;
+
+		vcn_next = vcn + clen;
+		if (!run_get_entry(run, ++idx, &vcn, &lcn, &clen) ||
+		    vcn != vcn_next) {
+			/* Save memory - don't load entire run. */
+			goto failed;
+		}
+	}
+
+out:
+	if (done)
+		*done += dn;
+
+	return err;
+}
+
+/*
+ * attr_allocate_clusters - Find free space, mark it as used and store in @run.
+ */
+int attr_allocate_clusters(struct ntfs_sb_info *sbi, struct runs_tree *run,
+			   CLST vcn, CLST lcn, CLST len, CLST *pre_alloc,
+			   enum ALLOCATE_OPT opt, CLST *alen, const size_t fr,
+			   CLST *new_lcn)
+{
+	int err;
+	CLST flen, vcn0 = vcn, pre = pre_alloc ? *pre_alloc : 0;
+	struct wnd_bitmap *wnd = &sbi->used.bitmap;
+	size_t cnt = run->count;
+
+	for (;;) {
+		err = ntfs_look_for_free_space(sbi, lcn, len + pre, &lcn, &flen,
+					       opt);
+
+		if (err == -ENOSPC && pre) {
+			pre = 0;
+			if (*pre_alloc)
+				*pre_alloc = 0;
+			continue;
+		}
+
+		if (err)
+			goto out;
+
+		if (new_lcn && vcn == vcn0)
+			*new_lcn = lcn;
+
+		/* Add new fragment into run storage. */
+		if (!run_add_entry(run, vcn, lcn, flen, opt == ALLOCATE_MFT)) {
+			/* Undo last 'ntfs_look_for_free_space' */
+			down_write_nested(&wnd->rw_lock, BITMAP_MUTEX_CLUSTERS);
+			wnd_set_free(wnd, lcn, flen);
+			up_write(&wnd->rw_lock);
+			err = -ENOMEM;
+			goto out;
+		}
+
+		vcn += flen;
+
+		if (flen >= len || opt == ALLOCATE_MFT ||
+		    (fr && run->count - cnt >= fr)) {
+			*alen = vcn - vcn0;
+			return 0;
+		}
+
+		len -= flen;
+	}
+
+out:
+	/* Undo 'ntfs_look_for_free_space' */
+	if (vcn - vcn0) {
+		run_deallocate_ex(sbi, run, vcn0, vcn - vcn0, NULL, false);
+		run_truncate(run, vcn0);
+	}
+
+	return err;
+}
+
+/*
+ * attr_make_nonresident
+ *
+ * If page is not NULL - it is already contains resident data
+ * and locked (called from ni_write_frame()).
+ */
+int attr_make_nonresident(struct ntfs_inode *ni, struct ATTRIB *attr,
+			  struct ATTR_LIST_ENTRY *le, struct mft_inode *mi,
+			  u64 new_size, struct runs_tree *run,
+			  struct ATTRIB **ins_attr, struct page *page)
+{
+	struct ntfs_sb_info *sbi;
+	struct ATTRIB *attr_s;
+	struct MFT_REC *rec;
+	u32 used, asize, rsize, aoff, align;
+	bool is_data;
+	CLST len, alen;
+	char *next;
+	int err;
+
+	if (attr->non_res) {
+		*ins_attr = attr;
+		return 0;
+	}
+
+	sbi = mi->sbi;
+	rec = mi->mrec;
+	attr_s = NULL;
+	used = le32_to_cpu(rec->used);
+	asize = le32_to_cpu(attr->size);
+	next = Add2Ptr(attr, asize);
+	aoff = PtrOffset(rec, attr);
+	rsize = le32_to_cpu(attr->res.data_size);
+	is_data = attr->type == ATTR_DATA && !attr->name_len;
+
+	align = sbi->cluster_size;
+	if (is_attr_compressed(attr))
+		align <<= COMPRESSION_UNIT;
+	len = (rsize + align - 1) >> sbi->cluster_bits;
+
+	run_init(run);
+
+	/* Make a copy of original attribute. */
+	attr_s = kmemdup(attr, asize, GFP_NOFS);
+	if (!attr_s) {
+		err = -ENOMEM;
+		goto out;
+	}
+
+	if (!len) {
+		/* Empty resident -> Empty nonresident. */
+		alen = 0;
+	} else {
+		const char *data = resident_data(attr);
+
+		err = attr_allocate_clusters(sbi, run, 0, 0, len, NULL,
+					     ALLOCATE_DEF, &alen, 0, NULL);
+		if (err)
+			goto out1;
+
+		if (!rsize) {
+			/* Empty resident -> Non empty nonresident. */
+		} else if (!is_data) {
+			err = ntfs_sb_write_run(sbi, run, 0, data, rsize);
+			if (err)
+				goto out2;
+		} else if (!page) {
+			char *kaddr;
+
+			page = grab_cache_page(ni->vfs_inode.i_mapping, 0);
+			if (!page) {
+				err = -ENOMEM;
+				goto out2;
+			}
+			kaddr = kmap_atomic(page);
+			memcpy(kaddr, data, rsize);
+			memset(kaddr + rsize, 0, PAGE_SIZE - rsize);
+			kunmap_atomic(kaddr);
+			flush_dcache_page(page);
+			SetPageUptodate(page);
+			set_page_dirty(page);
+			unlock_page(page);
+			put_page(page);
+		}
+	}
+
+	/* Remove original attribute. */
+	used -= asize;
+	memmove(attr, Add2Ptr(attr, asize), used - aoff);
+	rec->used = cpu_to_le32(used);
+	mi->dirty = true;
+	if (le)
+		al_remove_le(ni, le);
+
+	err = ni_insert_nonresident(ni, attr_s->type, attr_name(attr_s),
+				    attr_s->name_len, run, 0, alen,
+				    attr_s->flags, &attr, NULL);
+	if (err)
+		goto out3;
+
+	kfree(attr_s);
+	attr->nres.data_size = cpu_to_le64(rsize);
+	attr->nres.valid_size = attr->nres.data_size;
+
+	*ins_attr = attr;
+
+	if (is_data)
+		ni->ni_flags &= ~NI_FLAG_RESIDENT;
+
+	/* Resident attribute becomes non resident. */
+	return 0;
+
+out3:
+	attr = Add2Ptr(rec, aoff);
+	memmove(next, attr, used - aoff);
+	memcpy(attr, attr_s, asize);
+	rec->used = cpu_to_le32(used + asize);
+	mi->dirty = true;
+out2:
+	/* Undo: do not trim new allocated clusters. */
+	run_deallocate(sbi, run, false);
+	run_close(run);
+out1:
+	kfree(attr_s);
+out:
+	return err;
+}
+
+/*
+ * attr_set_size_res - Helper for attr_set_size().
+ */
+static int attr_set_size_res(struct ntfs_inode *ni, struct ATTRIB *attr,
+			     struct ATTR_LIST_ENTRY *le, struct mft_inode *mi,
+			     u64 new_size, struct runs_tree *run,
+			     struct ATTRIB **ins_attr)
+{
+	struct ntfs_sb_info *sbi = mi->sbi;
+	struct MFT_REC *rec = mi->mrec;
+	u32 used = le32_to_cpu(rec->used);
+	u32 asize = le32_to_cpu(attr->size);
+	u32 aoff = PtrOffset(rec, attr);
+	u32 rsize = le32_to_cpu(attr->res.data_size);
+	u32 tail = used - aoff - asize;
+	char *next = Add2Ptr(attr, asize);
+	s64 dsize = ALIGN(new_size, 8) - ALIGN(rsize, 8);
+
+	if (dsize < 0) {
+		memmove(next + dsize, next, tail);
+	} else if (dsize > 0) {
+		if (used + dsize > sbi->max_bytes_per_attr)
+			return attr_make_nonresident(ni, attr, le, mi, new_size,
+						     run, ins_attr, NULL);
+
+		memmove(next + dsize, next, tail);
+		memset(next, 0, dsize);
+	}
+
+	if (new_size > rsize)
+		memset(Add2Ptr(resident_data(attr), rsize), 0,
+		       new_size - rsize);
+
+	rec->used = cpu_to_le32(used + dsize);
+	attr->size = cpu_to_le32(asize + dsize);
+	attr->res.data_size = cpu_to_le32(new_size);
+	mi->dirty = true;
+	*ins_attr = attr;
+
+	return 0;
+}
+
+/*
+ * attr_set_size - Change the size of attribute.
+ *
+ * Extend:
+ *   - Sparse/compressed: No allocated clusters.
+ *   - Normal: Append allocated and preallocated new clusters.
+ * Shrink:
+ *   - No deallocate if @keep_prealloc is set.
+ */
+int attr_set_size(struct ntfs_inode *ni, enum ATTR_TYPE type,
+		  const __le16 *name, u8 name_len, struct runs_tree *run,
+		  u64 new_size, const u64 *new_valid, bool keep_prealloc,
+		  struct ATTRIB **ret)
+{
+	int err = 0;
+	struct ntfs_sb_info *sbi = ni->mi.sbi;
+	u8 cluster_bits = sbi->cluster_bits;
+	bool is_mft =
+		ni->mi.rno == MFT_REC_MFT && type == ATTR_DATA && !name_len;
+	u64 old_valid, old_size, old_alloc, new_alloc, new_alloc_tmp;
+	struct ATTRIB *attr = NULL, *attr_b;
+	struct ATTR_LIST_ENTRY *le, *le_b;
+	struct mft_inode *mi, *mi_b;
+	CLST alen, vcn, lcn, new_alen, old_alen, svcn, evcn;
+	CLST next_svcn, pre_alloc = -1, done = 0;
+	bool is_ext;
+	u32 align;
+	struct MFT_REC *rec;
+
+again:
+	le_b = NULL;
+	attr_b = ni_find_attr(ni, NULL, &le_b, type, name, name_len, NULL,
+			      &mi_b);
+	if (!attr_b) {
+		err = -ENOENT;
+		goto out;
+	}
+
+	if (!attr_b->non_res) {
+		err = attr_set_size_res(ni, attr_b, le_b, mi_b, new_size, run,
+					&attr_b);
+		if (err || !attr_b->non_res)
+			goto out;
+
+		/* Layout of records may be changed, so do a full search. */
+		goto again;
+	}
+
+	is_ext = is_attr_ext(attr_b);
+
+again_1:
+	align = sbi->cluster_size;
+
+	if (is_ext) {
+		align <<= attr_b->nres.c_unit;
+		if (is_attr_sparsed(attr_b))
+			keep_prealloc = false;
+	}
+
+	old_valid = le64_to_cpu(attr_b->nres.valid_size);
+	old_size = le64_to_cpu(attr_b->nres.data_size);
+	old_alloc = le64_to_cpu(attr_b->nres.alloc_size);
+	old_alen = old_alloc >> cluster_bits;
+
+	new_alloc = (new_size + align - 1) & ~(u64)(align - 1);
+	new_alen = new_alloc >> cluster_bits;
+
+	if (keep_prealloc && is_ext)
+		keep_prealloc = false;
+
+	if (keep_prealloc && new_size < old_size) {
+		attr_b->nres.data_size = cpu_to_le64(new_size);
+		mi_b->dirty = true;
+		goto ok;
+	}
+
+	vcn = old_alen - 1;
+
+	svcn = le64_to_cpu(attr_b->nres.svcn);
+	evcn = le64_to_cpu(attr_b->nres.evcn);
+
+	if (svcn <= vcn && vcn <= evcn) {
+		attr = attr_b;
+		le = le_b;
+		mi = mi_b;
+	} else if (!le_b) {
+		err = -EINVAL;
+		goto out;
+	} else {
+		le = le_b;
+		attr = ni_find_attr(ni, attr_b, &le, type, name, name_len, &vcn,
+				    &mi);
+		if (!attr) {
+			err = -EINVAL;
+			goto out;
+		}
+
+next_le_1:
+		svcn = le64_to_cpu(attr->nres.svcn);
+		evcn = le64_to_cpu(attr->nres.evcn);
+	}
+
+next_le:
+	rec = mi->mrec;
+
+	err = attr_load_runs(attr, ni, run, NULL);
+	if (err)
+		goto out;
+
+	if (new_size > old_size) {
+		CLST to_allocate;
+		size_t free;
+
+		if (new_alloc <= old_alloc) {
+			attr_b->nres.data_size = cpu_to_le64(new_size);
+			mi_b->dirty = true;
+			goto ok;
+		}
+
+		to_allocate = new_alen - old_alen;
+add_alloc_in_same_attr_seg:
+		lcn = 0;
+		if (is_mft) {
+			/* MFT allocates clusters from MFT zone. */
+			pre_alloc = 0;
+		} else if (is_ext) {
+			/* No preallocate for sparse/compress. */
+			pre_alloc = 0;
+		} else if (pre_alloc == -1) {
+			pre_alloc = 0;
+			if (type == ATTR_DATA && !name_len &&
+			    sbi->options.prealloc) {
+				CLST new_alen2 = bytes_to_cluster(
+					sbi, get_pre_allocated(new_size));
+				pre_alloc = new_alen2 - new_alen;
+			}
+
+			/* Get the last LCN to allocate from. */
+			if (old_alen &&
+			    !run_lookup_entry(run, vcn, &lcn, NULL, NULL)) {
+				lcn = SPARSE_LCN;
+			}
+
+			if (lcn == SPARSE_LCN)
+				lcn = 0;
+			else if (lcn)
+				lcn += 1;
+
+			free = wnd_zeroes(&sbi->used.bitmap);
+			if (to_allocate > free) {
+				err = -ENOSPC;
+				goto out;
+			}
+
+			if (pre_alloc && to_allocate + pre_alloc > free)
+				pre_alloc = 0;
+		}
+
+		vcn = old_alen;
+
+		if (is_ext) {
+			if (!run_add_entry(run, vcn, SPARSE_LCN, to_allocate,
+					   false)) {
+				err = -ENOMEM;
+				goto out;
+			}
+			alen = to_allocate;
+		} else {
+			/* ~3 bytes per fragment. */
+			err = attr_allocate_clusters(
+				sbi, run, vcn, lcn, to_allocate, &pre_alloc,
+				is_mft ? ALLOCATE_MFT : 0, &alen,
+				is_mft ? 0
+				       : (sbi->record_size -
+					  le32_to_cpu(rec->used) + 8) /
+							 3 +
+						 1,
+				NULL);
+			if (err)
+				goto out;
+		}
+
+		done += alen;
+		vcn += alen;
+		if (to_allocate > alen)
+			to_allocate -= alen;
+		else
+			to_allocate = 0;
+
+pack_runs:
+		err = mi_pack_runs(mi, attr, run, vcn - svcn);
+		if (err)
+			goto out;
+
+		next_svcn = le64_to_cpu(attr->nres.evcn) + 1;
+		new_alloc_tmp = (u64)next_svcn << cluster_bits;
+		attr_b->nres.alloc_size = cpu_to_le64(new_alloc_tmp);
+		mi_b->dirty = true;
+
+		if (next_svcn >= vcn && !to_allocate) {
+			/* Normal way. Update attribute and exit. */
+			attr_b->nres.data_size = cpu_to_le64(new_size);
+			goto ok;
+		}
+
+		/* At least two MFT to avoid recursive loop. */
+		if (is_mft && next_svcn == vcn &&
+		    ((u64)done << sbi->cluster_bits) >= 2 * sbi->record_size) {
+			new_size = new_alloc_tmp;
+			attr_b->nres.data_size = attr_b->nres.alloc_size;
+			goto ok;
+		}
+
+		if (le32_to_cpu(rec->used) < sbi->record_size) {
+			old_alen = next_svcn;
+			evcn = old_alen - 1;
+			goto add_alloc_in_same_attr_seg;
+		}
+
+		attr_b->nres.data_size = attr_b->nres.alloc_size;
+		if (new_alloc_tmp < old_valid)
+			attr_b->nres.valid_size = attr_b->nres.data_size;
+
+		if (type == ATTR_LIST) {
+			err = ni_expand_list(ni);
+			if (err)
+				goto out;
+			if (next_svcn < vcn)
+				goto pack_runs;
+
+			/* Layout of records is changed. */
+			goto again;
+		}
+
+		if (!ni->attr_list.size) {
+			err = ni_create_attr_list(ni);
+			if (err)
+				goto out;
+			/* Layout of records is changed. */
+		}
+
+		if (next_svcn >= vcn) {
+			/* This is MFT data, repeat. */
+			goto again;
+		}
+
+		/* Insert new attribute segment. */
+		err = ni_insert_nonresident(ni, type, name, name_len, run,
+					    next_svcn, vcn - next_svcn,
+					    attr_b->flags, &attr, &mi);
+		if (err)
+			goto out;
+
+		if (!is_mft)
+			run_truncate_head(run, evcn + 1);
+
+		svcn = le64_to_cpu(attr->nres.svcn);
+		evcn = le64_to_cpu(attr->nres.evcn);
+
+		le_b = NULL;
+		/*
+		 * Layout of records maybe changed.
+		 * Find base attribute to update.
+		 */
+		attr_b = ni_find_attr(ni, NULL, &le_b, type, name, name_len,
+				      NULL, &mi_b);
+		if (!attr_b) {
+			err = -ENOENT;
+			goto out;
+		}
+
+		attr_b->nres.alloc_size = cpu_to_le64((u64)vcn << cluster_bits);
+		attr_b->nres.data_size = attr_b->nres.alloc_size;
+		attr_b->nres.valid_size = attr_b->nres.alloc_size;
+		mi_b->dirty = true;
+		goto again_1;
+	}
+
+	if (new_size != old_size ||
+	    (new_alloc != old_alloc && !keep_prealloc)) {
+		vcn = max(svcn, new_alen);
+		new_alloc_tmp = (u64)vcn << cluster_bits;
+
+		alen = 0;
+		err = run_deallocate_ex(sbi, run, vcn, evcn - vcn + 1, &alen,
+					true);
+		if (err)
+			goto out;
+
+		run_truncate(run, vcn);
+
+		if (vcn > svcn) {
+			err = mi_pack_runs(mi, attr, run, vcn - svcn);
+			if (err)
+				goto out;
+		} else if (le && le->vcn) {
+			u16 le_sz = le16_to_cpu(le->size);
+
+			/*
+			 * NOTE: List entries for one attribute are always
+			 * the same size. We deal with last entry (vcn==0)
+			 * and it is not first in entries array
+			 * (list entry for std attribute always first).
+			 * So it is safe to step back.
+			 */
+			mi_remove_attr(NULL, mi, attr);
+
+			if (!al_remove_le(ni, le)) {
+				err = -EINVAL;
+				goto out;
+			}
+
+			le = (struct ATTR_LIST_ENTRY *)((u8 *)le - le_sz);
+		} else {
+			attr->nres.evcn = cpu_to_le64((u64)vcn - 1);
+			mi->dirty = true;
+		}
+
+		attr_b->nres.alloc_size = cpu_to_le64(new_alloc_tmp);
+
+		if (vcn == new_alen) {
+			attr_b->nres.data_size = cpu_to_le64(new_size);
+			if (new_size < old_valid)
+				attr_b->nres.valid_size =
+					attr_b->nres.data_size;
+		} else {
+			if (new_alloc_tmp <=
+			    le64_to_cpu(attr_b->nres.data_size))
+				attr_b->nres.data_size =
+					attr_b->nres.alloc_size;
+			if (new_alloc_tmp <
+			    le64_to_cpu(attr_b->nres.valid_size))
+				attr_b->nres.valid_size =
+					attr_b->nres.alloc_size;
+		}
+
+		if (is_ext)
+			le64_sub_cpu(&attr_b->nres.total_size,
+				     ((u64)alen << cluster_bits));
+
+		mi_b->dirty = true;
+
+		if (new_alloc_tmp <= new_alloc)
+			goto ok;
+
+		old_size = new_alloc_tmp;
+		vcn = svcn - 1;
+
+		if (le == le_b) {
+			attr = attr_b;
+			mi = mi_b;
+			evcn = svcn - 1;
+			svcn = 0;
+			goto next_le;
+		}
+
+		if (le->type != type || le->name_len != name_len ||
+		    memcmp(le_name(le), name, name_len * sizeof(short))) {
+			err = -EINVAL;
+			goto out;
+		}
+
+		err = ni_load_mi(ni, le, &mi);
+		if (err)
+			goto out;
+
+		attr = mi_find_attr(mi, NULL, type, name, name_len, &le->id);
+		if (!attr) {
+			err = -EINVAL;
+			goto out;
+		}
+		goto next_le_1;
+	}
+
+ok:
+	if (new_valid) {
+		__le64 valid = cpu_to_le64(min(*new_valid, new_size));
+
+		if (attr_b->nres.valid_size != valid) {
+			attr_b->nres.valid_size = valid;
+			mi_b->dirty = true;
+		}
+	}
+
+out:
+	if (!err && attr_b && ret)
+		*ret = attr_b;
+
+	/* Update inode_set_bytes. */
+	if (!err && ((type == ATTR_DATA && !name_len) ||
+		     (type == ATTR_ALLOC && name == I30_NAME))) {
+		bool dirty = false;
+
+		if (ni->vfs_inode.i_size != new_size) {
+			ni->vfs_inode.i_size = new_size;
+			dirty = true;
+		}
+
+		if (attr_b && attr_b->non_res) {
+			new_alloc = le64_to_cpu(attr_b->nres.alloc_size);
+			if (inode_get_bytes(&ni->vfs_inode) != new_alloc) {
+				inode_set_bytes(&ni->vfs_inode, new_alloc);
+				dirty = true;
+			}
+		}
+
+		if (dirty) {
+			ni->ni_flags |= NI_FLAG_UPDATE_PARENT;
+			mark_inode_dirty(&ni->vfs_inode);
+		}
+	}
+
+	return err;
+}
+
+int attr_data_get_block(struct ntfs_inode *ni, CLST vcn, CLST clen, CLST *lcn,
+			CLST *len, bool *new)
+{
+	int err = 0;
+	struct runs_tree *run = &ni->file.run;
+	struct ntfs_sb_info *sbi;
+	u8 cluster_bits;
+	struct ATTRIB *attr = NULL, *attr_b;
+	struct ATTR_LIST_ENTRY *le, *le_b;
+	struct mft_inode *mi, *mi_b;
+	CLST hint, svcn, to_alloc, evcn1, next_svcn, asize, end;
+	u64 total_size;
+	u32 clst_per_frame;
+	bool ok;
+
+	if (new)
+		*new = false;
+
+	down_read(&ni->file.run_lock);
+	ok = run_lookup_entry(run, vcn, lcn, len, NULL);
+	up_read(&ni->file.run_lock);
+
+	if (ok && (*lcn != SPARSE_LCN || !new)) {
+		/* Normal way. */
+		return 0;
+	}
+
+	if (!clen)
+		clen = 1;
+
+	if (ok && clen > *len)
+		clen = *len;
+
+	sbi = ni->mi.sbi;
+	cluster_bits = sbi->cluster_bits;
+
+	ni_lock(ni);
+	down_write(&ni->file.run_lock);
+
+	le_b = NULL;
+	attr_b = ni_find_attr(ni, NULL, &le_b, ATTR_DATA, NULL, 0, NULL, &mi_b);
+	if (!attr_b) {
+		err = -ENOENT;
+		goto out;
+	}
+
+	if (!attr_b->non_res) {
+		*lcn = RESIDENT_LCN;
+		*len = 1;
+		goto out;
+	}
+
+	asize = le64_to_cpu(attr_b->nres.alloc_size) >> sbi->cluster_bits;
+	if (vcn >= asize) {
+		err = -EINVAL;
+		goto out;
+	}
+
+	clst_per_frame = 1u << attr_b->nres.c_unit;
+	to_alloc = (clen + clst_per_frame - 1) & ~(clst_per_frame - 1);
+
+	if (vcn + to_alloc > asize)
+		to_alloc = asize - vcn;
+
+	svcn = le64_to_cpu(attr_b->nres.svcn);
+	evcn1 = le64_to_cpu(attr_b->nres.evcn) + 1;
+
+	attr = attr_b;
+	le = le_b;
+	mi = mi_b;
+
+	if (le_b && (vcn < svcn || evcn1 <= vcn)) {
+		attr = ni_find_attr(ni, attr_b, &le, ATTR_DATA, NULL, 0, &vcn,
+				    &mi);
+		if (!attr) {
+			err = -EINVAL;
+			goto out;
+		}
+		svcn = le64_to_cpu(attr->nres.svcn);
+		evcn1 = le64_to_cpu(attr->nres.evcn) + 1;
+	}
+
+	err = attr_load_runs(attr, ni, run, NULL);
+	if (err)
+		goto out;
+
+	if (!ok) {
+		ok = run_lookup_entry(run, vcn, lcn, len, NULL);
+		if (ok && (*lcn != SPARSE_LCN || !new)) {
+			/* Normal way. */
+			err = 0;
+			goto ok;
+		}
+
+		if (!ok && !new) {
+			*len = 0;
+			err = 0;
+			goto ok;
+		}
+
+		if (ok && clen > *len) {
+			clen = *len;
+			to_alloc = (clen + clst_per_frame - 1) &
+				   ~(clst_per_frame - 1);
+		}
+	}
+
+	if (!is_attr_ext(attr_b)) {
+		err = -EINVAL;
+		goto out;
+	}
+
+	/* Get the last LCN to allocate from. */
+	hint = 0;
+
+	if (vcn > evcn1) {
+		if (!run_add_entry(run, evcn1, SPARSE_LCN, vcn - evcn1,
+				   false)) {
+			err = -ENOMEM;
+			goto out;
+		}
+	} else if (vcn && !run_lookup_entry(run, vcn - 1, &hint, NULL, NULL)) {
+		hint = -1;
+	}
+
+	err = attr_allocate_clusters(
+		sbi, run, vcn, hint + 1, to_alloc, NULL, 0, len,
+		(sbi->record_size - le32_to_cpu(mi->mrec->used) + 8) / 3 + 1,
+		lcn);
+	if (err)
+		goto out;
+	*new = true;
+
+	end = vcn + *len;
+
+	total_size = le64_to_cpu(attr_b->nres.total_size) +
+		     ((u64)*len << cluster_bits);
+
+repack:
+	err = mi_pack_runs(mi, attr, run, max(end, evcn1) - svcn);
+	if (err)
+		goto out;
+
+	attr_b->nres.total_size = cpu_to_le64(total_size);
+	inode_set_bytes(&ni->vfs_inode, total_size);
+	ni->ni_flags |= NI_FLAG_UPDATE_PARENT;
+
+	mi_b->dirty = true;
+	mark_inode_dirty(&ni->vfs_inode);
+
+	/* Stored [vcn : next_svcn) from [vcn : end). */
+	next_svcn = le64_to_cpu(attr->nres.evcn) + 1;
+
+	if (end <= evcn1) {
+		if (next_svcn == evcn1) {
+			/* Normal way. Update attribute and exit. */
+			goto ok;
+		}
+		/* Add new segment [next_svcn : evcn1 - next_svcn). */
+		if (!ni->attr_list.size) {
+			err = ni_create_attr_list(ni);
+			if (err)
+				goto out;
+			/* Layout of records is changed. */
+			le_b = NULL;
+			attr_b = ni_find_attr(ni, NULL, &le_b, ATTR_DATA, NULL,
+					      0, NULL, &mi_b);
+			if (!attr_b) {
+				err = -ENOENT;
+				goto out;
+			}
+
+			attr = attr_b;
+			le = le_b;
+			mi = mi_b;
+			goto repack;
+		}
+	}
+
+	svcn = evcn1;
+
+	/* Estimate next attribute. */
+	attr = ni_find_attr(ni, attr, &le, ATTR_DATA, NULL, 0, &svcn, &mi);
+
+	if (attr) {
+		CLST alloc = bytes_to_cluster(
+			sbi, le64_to_cpu(attr_b->nres.alloc_size));
+		CLST evcn = le64_to_cpu(attr->nres.evcn);
+
+		if (end < next_svcn)
+			end = next_svcn;
+		while (end > evcn) {
+			/* Remove segment [svcn : evcn). */
+			mi_remove_attr(NULL, mi, attr);
+
+			if (!al_remove_le(ni, le)) {
+				err = -EINVAL;
+				goto out;
+			}
+
+			if (evcn + 1 >= alloc) {
+				/* Last attribute segment. */
+				evcn1 = evcn + 1;
+				goto ins_ext;
+			}
+
+			if (ni_load_mi(ni, le, &mi)) {
+				attr = NULL;
+				goto out;
+			}
+
+			attr = mi_find_attr(mi, NULL, ATTR_DATA, NULL, 0,
+					    &le->id);
+			if (!attr) {
+				err = -EINVAL;
+				goto out;
+			}
+			svcn = le64_to_cpu(attr->nres.svcn);
+			evcn = le64_to_cpu(attr->nres.evcn);
+		}
+
+		if (end < svcn)
+			end = svcn;
+
+		err = attr_load_runs(attr, ni, run, &end);
+		if (err)
+			goto out;
+
+		evcn1 = evcn + 1;
+		attr->nres.svcn = cpu_to_le64(next_svcn);
+		err = mi_pack_runs(mi, attr, run, evcn1 - next_svcn);
+		if (err)
+			goto out;
+
+		le->vcn = cpu_to_le64(next_svcn);
+		ni->attr_list.dirty = true;
+		mi->dirty = true;
+
+		next_svcn = le64_to_cpu(attr->nres.evcn) + 1;
+	}
+ins_ext:
+	if (evcn1 > next_svcn) {
+		err = ni_insert_nonresident(ni, ATTR_DATA, NULL, 0, run,
+					    next_svcn, evcn1 - next_svcn,
+					    attr_b->flags, &attr, &mi);
+		if (err)
+			goto out;
+	}
+ok:
+	run_truncate_around(run, vcn);
+out:
+	up_write(&ni->file.run_lock);
+	ni_unlock(ni);
+
+	return err;
+}
+
+int attr_data_read_resident(struct ntfs_inode *ni, struct page *page)
+{
+	u64 vbo;
+	struct ATTRIB *attr;
+	u32 data_size;
+
+	attr = ni_find_attr(ni, NULL, NULL, ATTR_DATA, NULL, 0, NULL, NULL);
+	if (!attr)
+		return -EINVAL;
+
+	if (attr->non_res)
+		return E_NTFS_NONRESIDENT;
+
+	vbo = page->index << PAGE_SHIFT;
+	data_size = le32_to_cpu(attr->res.data_size);
+	if (vbo < data_size) {
+		const char *data = resident_data(attr);
+		char *kaddr = kmap_atomic(page);
+		u32 use = data_size - vbo;
+
+		if (use > PAGE_SIZE)
+			use = PAGE_SIZE;
+
+		memcpy(kaddr, data + vbo, use);
+		memset(kaddr + use, 0, PAGE_SIZE - use);
+		kunmap_atomic(kaddr);
+		flush_dcache_page(page);
+		SetPageUptodate(page);
+	} else if (!PageUptodate(page)) {
+		zero_user_segment(page, 0, PAGE_SIZE);
+		SetPageUptodate(page);
+	}
+
+	return 0;
+}
+
+int attr_data_write_resident(struct ntfs_inode *ni, struct page *page)
+{
+	u64 vbo;
+	struct mft_inode *mi;
+	struct ATTRIB *attr;
+	u32 data_size;
+
+	attr = ni_find_attr(ni, NULL, NULL, ATTR_DATA, NULL, 0, NULL, &mi);
+	if (!attr)
+		return -EINVAL;
+
+	if (attr->non_res) {
+		/* Return special error code to check this case. */
+		return E_NTFS_NONRESIDENT;
+	}
+
+	vbo = page->index << PAGE_SHIFT;
+	data_size = le32_to_cpu(attr->res.data_size);
+	if (vbo < data_size) {
+		char *data = resident_data(attr);
+		char *kaddr = kmap_atomic(page);
+		u32 use = data_size - vbo;
+
+		if (use > PAGE_SIZE)
+			use = PAGE_SIZE;
+		memcpy(data + vbo, kaddr, use);
+		kunmap_atomic(kaddr);
+		mi->dirty = true;
+	}
+	ni->i_valid = data_size;
+
+	return 0;
+}
+
+/*
+ * attr_load_runs_vcn - Load runs with VCN.
+ */
+int attr_load_runs_vcn(struct ntfs_inode *ni, enum ATTR_TYPE type,
+		       const __le16 *name, u8 name_len, struct runs_tree *run,
+		       CLST vcn)
+{
+	struct ATTRIB *attr;
+	int err;
+	CLST svcn, evcn;
+	u16 ro;
+
+	attr = ni_find_attr(ni, NULL, NULL, type, name, name_len, &vcn, NULL);
+	if (!attr) {
+		/* Is record corrupted? */
+		return -ENOENT;
+	}
+
+	svcn = le64_to_cpu(attr->nres.svcn);
+	evcn = le64_to_cpu(attr->nres.evcn);
+
+	if (evcn < vcn || vcn < svcn) {
+		/* Is record corrupted? */
+		return -EINVAL;
+	}
+
+	ro = le16_to_cpu(attr->nres.run_off);
+	err = run_unpack_ex(run, ni->mi.sbi, ni->mi.rno, svcn, evcn, svcn,
+			    Add2Ptr(attr, ro), le32_to_cpu(attr->size) - ro);
+	if (err < 0)
+		return err;
+	return 0;
+}
+
+/*
+ * attr_load_runs_range - Load runs for given range [from to).
+ */
+int attr_load_runs_range(struct ntfs_inode *ni, enum ATTR_TYPE type,
+			 const __le16 *name, u8 name_len, struct runs_tree *run,
+			 u64 from, u64 to)
+{
+	struct ntfs_sb_info *sbi = ni->mi.sbi;
+	u8 cluster_bits = sbi->cluster_bits;
+	CLST vcn = from >> cluster_bits;
+	CLST vcn_last = (to - 1) >> cluster_bits;
+	CLST lcn, clen;
+	int err;
+
+	for (vcn = from >> cluster_bits; vcn <= vcn_last; vcn += clen) {
+		if (!run_lookup_entry(run, vcn, &lcn, &clen, NULL)) {
+			err = attr_load_runs_vcn(ni, type, name, name_len, run,
+						 vcn);
+			if (err)
+				return err;
+			clen = 0; /* Next run_lookup_entry(vcn) must be success. */
+		}
+	}
+
+	return 0;
+}
+
+#ifdef CONFIG_NTFS3_LZX_XPRESS
+/*
+ * attr_wof_frame_info
+ *
+ * Read header of Xpress/LZX file to get info about frame.
+ */
+int attr_wof_frame_info(struct ntfs_inode *ni, struct ATTRIB *attr,
+			struct runs_tree *run, u64 frame, u64 frames,
+			u8 frame_bits, u32 *ondisk_size, u64 *vbo_data)
+{
+	struct ntfs_sb_info *sbi = ni->mi.sbi;
+	u64 vbo[2], off[2], wof_size;
+	u32 voff;
+	u8 bytes_per_off;
+	char *addr;
+	struct page *page;
+	int i, err;
+	__le32 *off32;
+	__le64 *off64;
+
+	if (ni->vfs_inode.i_size < 0x100000000ull) {
+		/* File starts with array of 32 bit offsets. */
+		bytes_per_off = sizeof(__le32);
+		vbo[1] = frame << 2;
+		*vbo_data = frames << 2;
+	} else {
+		/* File starts with array of 64 bit offsets. */
+		bytes_per_off = sizeof(__le64);
+		vbo[1] = frame << 3;
+		*vbo_data = frames << 3;
+	}
+
+	/*
+	 * Read 4/8 bytes at [vbo - 4(8)] == offset where compressed frame starts.
+	 * Read 4/8 bytes at [vbo] == offset where compressed frame ends.
+	 */
+	if (!attr->non_res) {
+		if (vbo[1] + bytes_per_off > le32_to_cpu(attr->res.data_size)) {
+			ntfs_inode_err(&ni->vfs_inode, "is corrupted");
+			return -EINVAL;
+		}
+		addr = resident_data(attr);
+
+		if (bytes_per_off == sizeof(__le32)) {
+			off32 = Add2Ptr(addr, vbo[1]);
+			off[0] = vbo[1] ? le32_to_cpu(off32[-1]) : 0;
+			off[1] = le32_to_cpu(off32[0]);
+		} else {
+			off64 = Add2Ptr(addr, vbo[1]);
+			off[0] = vbo[1] ? le64_to_cpu(off64[-1]) : 0;
+			off[1] = le64_to_cpu(off64[0]);
+		}
+
+		*vbo_data += off[0];
+		*ondisk_size = off[1] - off[0];
+		return 0;
+	}
+
+	wof_size = le64_to_cpu(attr->nres.data_size);
+	down_write(&ni->file.run_lock);
+	page = ni->file.offs_page;
+	if (!page) {
+		page = alloc_page(GFP_KERNEL);
+		if (!page) {
+			err = -ENOMEM;
+			goto out;
+		}
+		page->index = -1;
+		ni->file.offs_page = page;
+	}
+	lock_page(page);
+	addr = page_address(page);
+
+	if (vbo[1]) {
+		voff = vbo[1] & (PAGE_SIZE - 1);
+		vbo[0] = vbo[1] - bytes_per_off;
+		i = 0;
+	} else {
+		voff = 0;
+		vbo[0] = 0;
+		off[0] = 0;
+		i = 1;
+	}
+
+	do {
+		pgoff_t index = vbo[i] >> PAGE_SHIFT;
+
+		if (index != page->index) {
+			u64 from = vbo[i] & ~(u64)(PAGE_SIZE - 1);
+			u64 to = min(from + PAGE_SIZE, wof_size);
+
+			err = attr_load_runs_range(ni, ATTR_DATA, WOF_NAME,
+						   ARRAY_SIZE(WOF_NAME), run,
+						   from, to);
+			if (err)
+				goto out1;
+
+			err = ntfs_bio_pages(sbi, run, &page, 1, from,
+					     to - from, REQ_OP_READ);
+			if (err) {
+				page->index = -1;
+				goto out1;
+			}
+			page->index = index;
+		}
+
+		if (i) {
+			if (bytes_per_off == sizeof(__le32)) {
+				off32 = Add2Ptr(addr, voff);
+				off[1] = le32_to_cpu(*off32);
+			} else {
+				off64 = Add2Ptr(addr, voff);
+				off[1] = le64_to_cpu(*off64);
+			}
+		} else if (!voff) {
+			if (bytes_per_off == sizeof(__le32)) {
+				off32 = Add2Ptr(addr, PAGE_SIZE - sizeof(u32));
+				off[0] = le32_to_cpu(*off32);
+			} else {
+				off64 = Add2Ptr(addr, PAGE_SIZE - sizeof(u64));
+				off[0] = le64_to_cpu(*off64);
+			}
+		} else {
+			/* Two values in one page. */
+			if (bytes_per_off == sizeof(__le32)) {
+				off32 = Add2Ptr(addr, voff);
+				off[0] = le32_to_cpu(off32[-1]);
+				off[1] = le32_to_cpu(off32[0]);
+			} else {
+				off64 = Add2Ptr(addr, voff);
+				off[0] = le64_to_cpu(off64[-1]);
+				off[1] = le64_to_cpu(off64[0]);
+			}
+			break;
+		}
+	} while (++i < 2);
+
+	*vbo_data += off[0];
+	*ondisk_size = off[1] - off[0];
+
+out1:
+	unlock_page(page);
+out:
+	up_write(&ni->file.run_lock);
+	return err;
+}
+#endif
+
+/*
+ * attr_is_frame_compressed - Used to detect compressed frame.
+ */
+int attr_is_frame_compressed(struct ntfs_inode *ni, struct ATTRIB *attr,
+			     CLST frame, CLST *clst_data)
+{
+	int err;
+	u32 clst_frame;
+	CLST clen, lcn, vcn, alen, slen, vcn_next;
+	size_t idx;
+	struct runs_tree *run;
+
+	*clst_data = 0;
+
+	if (!is_attr_compressed(attr))
+		return 0;
+
+	if (!attr->non_res)
+		return 0;
+
+	clst_frame = 1u << attr->nres.c_unit;
+	vcn = frame * clst_frame;
+	run = &ni->file.run;
+
+	if (!run_lookup_entry(run, vcn, &lcn, &clen, &idx)) {
+		err = attr_load_runs_vcn(ni, attr->type, attr_name(attr),
+					 attr->name_len, run, vcn);
+		if (err)
+			return err;
+
+		if (!run_lookup_entry(run, vcn, &lcn, &clen, &idx))
+			return -EINVAL;
+	}
+
+	if (lcn == SPARSE_LCN) {
+		/* Sparsed frame. */
+		return 0;
+	}
+
+	if (clen >= clst_frame) {
+		/*
+		 * The frame is not compressed 'cause
+		 * it does not contain any sparse clusters.
+		 */
+		*clst_data = clst_frame;
+		return 0;
+	}
+
+	alen = bytes_to_cluster(ni->mi.sbi, le64_to_cpu(attr->nres.alloc_size));
+	slen = 0;
+	*clst_data = clen;
+
+	/*
+	 * The frame is compressed if *clst_data + slen >= clst_frame.
+	 * Check next fragments.
+	 */
+	while ((vcn += clen) < alen) {
+		vcn_next = vcn;
+
+		if (!run_get_entry(run, ++idx, &vcn, &lcn, &clen) ||
+		    vcn_next != vcn) {
+			err = attr_load_runs_vcn(ni, attr->type,
+						 attr_name(attr),
+						 attr->name_len, run, vcn_next);
+			if (err)
+				return err;
+			vcn = vcn_next;
+
+			if (!run_lookup_entry(run, vcn, &lcn, &clen, &idx))
+				return -EINVAL;
+		}
+
+		if (lcn == SPARSE_LCN) {
+			slen += clen;
+		} else {
+			if (slen) {
+				/*
+				 * Data_clusters + sparse_clusters =
+				 * not enough for frame.
+				 */
+				return -EINVAL;
+			}
+			*clst_data += clen;
+		}
+
+		if (*clst_data + slen >= clst_frame) {
+			if (!slen) {
+				/*
+				 * There is no sparsed clusters in this frame
+				 * so it is not compressed.
+				 */
+				*clst_data = clst_frame;
+			} else {
+				/* Frame is compressed. */
+			}
+			break;
+		}
+	}
+
+	return 0;
+}
+
+/*
+ * attr_allocate_frame - Allocate/free clusters for @frame.
+ *
+ * Assumed: down_write(&ni->file.run_lock);
+ */
+int attr_allocate_frame(struct ntfs_inode *ni, CLST frame, size_t compr_size,
+			u64 new_valid)
+{
+	int err = 0;
+	struct runs_tree *run = &ni->file.run;
+	struct ntfs_sb_info *sbi = ni->mi.sbi;
+	struct ATTRIB *attr = NULL, *attr_b;
+	struct ATTR_LIST_ENTRY *le, *le_b;
+	struct mft_inode *mi, *mi_b;
+	CLST svcn, evcn1, next_svcn, lcn, len;
+	CLST vcn, end, clst_data;
+	u64 total_size, valid_size, data_size;
+
+	le_b = NULL;
+	attr_b = ni_find_attr(ni, NULL, &le_b, ATTR_DATA, NULL, 0, NULL, &mi_b);
+	if (!attr_b)
+		return -ENOENT;
+
+	if (!is_attr_ext(attr_b))
+		return -EINVAL;
+
+	vcn = frame << NTFS_LZNT_CUNIT;
+	total_size = le64_to_cpu(attr_b->nres.total_size);
+
+	svcn = le64_to_cpu(attr_b->nres.svcn);
+	evcn1 = le64_to_cpu(attr_b->nres.evcn) + 1;
+	data_size = le64_to_cpu(attr_b->nres.data_size);
+
+	if (svcn <= vcn && vcn < evcn1) {
+		attr = attr_b;
+		le = le_b;
+		mi = mi_b;
+	} else if (!le_b) {
+		err = -EINVAL;
+		goto out;
+	} else {
+		le = le_b;
+		attr = ni_find_attr(ni, attr_b, &le, ATTR_DATA, NULL, 0, &vcn,
+				    &mi);
+		if (!attr) {
+			err = -EINVAL;
+			goto out;
+		}
+		svcn = le64_to_cpu(attr->nres.svcn);
+		evcn1 = le64_to_cpu(attr->nres.evcn) + 1;
+	}
+
+	err = attr_load_runs(attr, ni, run, NULL);
+	if (err)
+		goto out;
+
+	err = attr_is_frame_compressed(ni, attr_b, frame, &clst_data);
+	if (err)
+		goto out;
+
+	total_size -= (u64)clst_data << sbi->cluster_bits;
+
+	len = bytes_to_cluster(sbi, compr_size);
+
+	if (len == clst_data)
+		goto out;
+
+	if (len < clst_data) {
+		err = run_deallocate_ex(sbi, run, vcn + len, clst_data - len,
+					NULL, true);
+		if (err)
+			goto out;
+
+		if (!run_add_entry(run, vcn + len, SPARSE_LCN, clst_data - len,
+				   false)) {
+			err = -ENOMEM;
+			goto out;
+		}
+		end = vcn + clst_data;
+		/* Run contains updated range [vcn + len : end). */
+	} else {
+		CLST alen, hint = 0;
+		/* Get the last LCN to allocate from. */
+		if (vcn + clst_data &&
+		    !run_lookup_entry(run, vcn + clst_data - 1, &hint, NULL,
+				      NULL)) {
+			hint = -1;
+		}
+
+		err = attr_allocate_clusters(sbi, run, vcn + clst_data,
+					     hint + 1, len - clst_data, NULL, 0,
+					     &alen, 0, &lcn);
+		if (err)
+			goto out;
+
+		end = vcn + len;
+		/* Run contains updated range [vcn + clst_data : end). */
+	}
+
+	total_size += (u64)len << sbi->cluster_bits;
+
+repack:
+	err = mi_pack_runs(mi, attr, run, max(end, evcn1) - svcn);
+	if (err)
+		goto out;
+
+	attr_b->nres.total_size = cpu_to_le64(total_size);
+	inode_set_bytes(&ni->vfs_inode, total_size);
+
+	mi_b->dirty = true;
+	mark_inode_dirty(&ni->vfs_inode);
+
+	/* Stored [vcn : next_svcn) from [vcn : end). */
+	next_svcn = le64_to_cpu(attr->nres.evcn) + 1;
+
+	if (end <= evcn1) {
+		if (next_svcn == evcn1) {
+			/* Normal way. Update attribute and exit. */
+			goto ok;
+		}
+		/* Add new segment [next_svcn : evcn1 - next_svcn). */
+		if (!ni->attr_list.size) {
+			err = ni_create_attr_list(ni);
+			if (err)
+				goto out;
+			/* Layout of records is changed. */
+			le_b = NULL;
+			attr_b = ni_find_attr(ni, NULL, &le_b, ATTR_DATA, NULL,
+					      0, NULL, &mi_b);
+			if (!attr_b) {
+				err = -ENOENT;
+				goto out;
+			}
+
+			attr = attr_b;
+			le = le_b;
+			mi = mi_b;
+			goto repack;
+		}
+	}
+
+	svcn = evcn1;
+
+	/* Estimate next attribute. */
+	attr = ni_find_attr(ni, attr, &le, ATTR_DATA, NULL, 0, &svcn, &mi);
+
+	if (attr) {
+		CLST alloc = bytes_to_cluster(
+			sbi, le64_to_cpu(attr_b->nres.alloc_size));
+		CLST evcn = le64_to_cpu(attr->nres.evcn);
+
+		if (end < next_svcn)
+			end = next_svcn;
+		while (end > evcn) {
+			/* Remove segment [svcn : evcn). */
+			mi_remove_attr(NULL, mi, attr);
+
+			if (!al_remove_le(ni, le)) {
+				err = -EINVAL;
+				goto out;
+			}
+
+			if (evcn + 1 >= alloc) {
+				/* Last attribute segment. */
+				evcn1 = evcn + 1;
+				goto ins_ext;
+			}
+
+			if (ni_load_mi(ni, le, &mi)) {
+				attr = NULL;
+				goto out;
+			}
+
+			attr = mi_find_attr(mi, NULL, ATTR_DATA, NULL, 0,
+					    &le->id);
+			if (!attr) {
+				err = -EINVAL;
+				goto out;
+			}
+			svcn = le64_to_cpu(attr->nres.svcn);
+			evcn = le64_to_cpu(attr->nres.evcn);
+		}
+
+		if (end < svcn)
+			end = svcn;
+
+		err = attr_load_runs(attr, ni, run, &end);
+		if (err)
+			goto out;
+
+		evcn1 = evcn + 1;
+		attr->nres.svcn = cpu_to_le64(next_svcn);
+		err = mi_pack_runs(mi, attr, run, evcn1 - next_svcn);
+		if (err)
+			goto out;
+
+		le->vcn = cpu_to_le64(next_svcn);
+		ni->attr_list.dirty = true;
+		mi->dirty = true;
+
+		next_svcn = le64_to_cpu(attr->nres.evcn) + 1;
+	}
+ins_ext:
+	if (evcn1 > next_svcn) {
+		err = ni_insert_nonresident(ni, ATTR_DATA, NULL, 0, run,
+					    next_svcn, evcn1 - next_svcn,
+					    attr_b->flags, &attr, &mi);
+		if (err)
+			goto out;
+	}
+ok:
+	run_truncate_around(run, vcn);
+out:
+	if (new_valid > data_size)
+		new_valid = data_size;
+
+	valid_size = le64_to_cpu(attr_b->nres.valid_size);
+	if (new_valid != valid_size) {
+		attr_b->nres.valid_size = cpu_to_le64(valid_size);
+		mi_b->dirty = true;
+	}
+
+	return err;
+}
+
+/*
+ * attr_collapse_range - Collapse range in file.
+ */
+int attr_collapse_range(struct ntfs_inode *ni, u64 vbo, u64 bytes)
+{
+	int err = 0;
+	struct runs_tree *run = &ni->file.run;
+	struct ntfs_sb_info *sbi = ni->mi.sbi;
+	struct ATTRIB *attr = NULL, *attr_b;
+	struct ATTR_LIST_ENTRY *le, *le_b;
+	struct mft_inode *mi, *mi_b;
+	CLST svcn, evcn1, len, dealloc, alen;
+	CLST vcn, end;
+	u64 valid_size, data_size, alloc_size, total_size;
+	u32 mask;
+	__le16 a_flags;
+
+	if (!bytes)
+		return 0;
+
+	le_b = NULL;
+	attr_b = ni_find_attr(ni, NULL, &le_b, ATTR_DATA, NULL, 0, NULL, &mi_b);
+	if (!attr_b)
+		return -ENOENT;
+
+	if (!attr_b->non_res) {
+		/* Attribute is resident. Nothing to do? */
+		return 0;
+	}
+
+	data_size = le64_to_cpu(attr_b->nres.data_size);
+	alloc_size = le64_to_cpu(attr_b->nres.alloc_size);
+	a_flags = attr_b->flags;
+
+	if (is_attr_ext(attr_b)) {
+		total_size = le64_to_cpu(attr_b->nres.total_size);
+		mask = (sbi->cluster_size << attr_b->nres.c_unit) - 1;
+	} else {
+		total_size = alloc_size;
+		mask = sbi->cluster_mask;
+	}
+
+	if ((vbo & mask) || (bytes & mask)) {
+		/* Allow to collapse only cluster aligned ranges. */
+		return -EINVAL;
+	}
+
+	if (vbo > data_size)
+		return -EINVAL;
+
+	down_write(&ni->file.run_lock);
+
+	if (vbo + bytes >= data_size) {
+		u64 new_valid = min(ni->i_valid, vbo);
+
+		/* Simple truncate file at 'vbo'. */
+		truncate_setsize(&ni->vfs_inode, vbo);
+		err = attr_set_size(ni, ATTR_DATA, NULL, 0, &ni->file.run, vbo,
+				    &new_valid, true, NULL);
+
+		if (!err && new_valid < ni->i_valid)
+			ni->i_valid = new_valid;
+
+		goto out;
+	}
+
+	/*
+	 * Enumerate all attribute segments and collapse.
+	 */
+	alen = alloc_size >> sbi->cluster_bits;
+	vcn = vbo >> sbi->cluster_bits;
+	len = bytes >> sbi->cluster_bits;
+	end = vcn + len;
+	dealloc = 0;
+
+	svcn = le64_to_cpu(attr_b->nres.svcn);
+	evcn1 = le64_to_cpu(attr_b->nres.evcn) + 1;
+
+	if (svcn <= vcn && vcn < evcn1) {
+		attr = attr_b;
+		le = le_b;
+		mi = mi_b;
+	} else if (!le_b) {
+		err = -EINVAL;
+		goto out;
+	} else {
+		le = le_b;
+		attr = ni_find_attr(ni, attr_b, &le, ATTR_DATA, NULL, 0, &vcn,
+				    &mi);
+		if (!attr) {
+			err = -EINVAL;
+			goto out;
+		}
+
+		svcn = le64_to_cpu(attr->nres.svcn);
+		evcn1 = le64_to_cpu(attr->nres.evcn) + 1;
+	}
+
+	for (;;) {
+		if (svcn >= end) {
+			/* Shift VCN- */
+			attr->nres.svcn = cpu_to_le64(svcn - len);
+			attr->nres.evcn = cpu_to_le64(evcn1 - 1 - len);
+			if (le) {
+				le->vcn = attr->nres.svcn;
+				ni->attr_list.dirty = true;
+			}
+			mi->dirty = true;
+		} else if (svcn < vcn || end < evcn1) {
+			CLST vcn1, eat, next_svcn;
+
+			/* Collapse a part of this attribute segment. */
+			err = attr_load_runs(attr, ni, run, &svcn);
+			if (err)
+				goto out;
+			vcn1 = max(vcn, svcn);
+			eat = min(end, evcn1) - vcn1;
+
+			err = run_deallocate_ex(sbi, run, vcn1, eat, &dealloc,
+						true);
+			if (err)
+				goto out;
+
+			if (!run_collapse_range(run, vcn1, eat)) {
+				err = -ENOMEM;
+				goto out;
+			}
+
+			if (svcn >= vcn) {
+				/* Shift VCN */
+				attr->nres.svcn = cpu_to_le64(vcn);
+				if (le) {
+					le->vcn = attr->nres.svcn;
+					ni->attr_list.dirty = true;
+				}
+			}
+
+			err = mi_pack_runs(mi, attr, run, evcn1 - svcn - eat);
+			if (err)
+				goto out;
+
+			next_svcn = le64_to_cpu(attr->nres.evcn) + 1;
+			if (next_svcn + eat < evcn1) {
+				err = ni_insert_nonresident(
+					ni, ATTR_DATA, NULL, 0, run, next_svcn,
+					evcn1 - eat - next_svcn, a_flags, &attr,
+					&mi);
+				if (err)
+					goto out;
+
+				/* Layout of records maybe changed. */
+				attr_b = NULL;
+				le = al_find_ex(ni, NULL, ATTR_DATA, NULL, 0,
+						&next_svcn);
+				if (!le) {
+					err = -EINVAL;
+					goto out;
+				}
+			}
+
+			/* Free all allocated memory. */
+			run_truncate(run, 0);
+		} else {
+			u16 le_sz;
+			u16 roff = le16_to_cpu(attr->nres.run_off);
+
+			run_unpack_ex(RUN_DEALLOCATE, sbi, ni->mi.rno, svcn,
+				      evcn1 - 1, svcn, Add2Ptr(attr, roff),
+				      le32_to_cpu(attr->size) - roff);
+
+			/* Delete this attribute segment. */
+			mi_remove_attr(NULL, mi, attr);
+			if (!le)
+				break;
+
+			le_sz = le16_to_cpu(le->size);
+			if (!al_remove_le(ni, le)) {
+				err = -EINVAL;
+				goto out;
+			}
+
+			if (evcn1 >= alen)
+				break;
+
+			if (!svcn) {
+				/* Load next record that contains this attribute. */
+				if (ni_load_mi(ni, le, &mi)) {
+					err = -EINVAL;
+					goto out;
+				}
+
+				/* Look for required attribute. */
+				attr = mi_find_attr(mi, NULL, ATTR_DATA, NULL,
+						    0, &le->id);
+				if (!attr) {
+					err = -EINVAL;
+					goto out;
+				}
+				goto next_attr;
+			}
+			le = (struct ATTR_LIST_ENTRY *)((u8 *)le - le_sz);
+		}
+
+		if (evcn1 >= alen)
+			break;
+
+		attr = ni_enum_attr_ex(ni, attr, &le, &mi);
+		if (!attr) {
+			err = -EINVAL;
+			goto out;
+		}
+
+next_attr:
+		svcn = le64_to_cpu(attr->nres.svcn);
+		evcn1 = le64_to_cpu(attr->nres.evcn) + 1;
+	}
+
+	if (!attr_b) {
+		le_b = NULL;
+		attr_b = ni_find_attr(ni, NULL, &le_b, ATTR_DATA, NULL, 0, NULL,
+				      &mi_b);
+		if (!attr_b) {
+			err = -ENOENT;
+			goto out;
+		}
+	}
+
+	data_size -= bytes;
+	valid_size = ni->i_valid;
+	if (vbo + bytes <= valid_size)
+		valid_size -= bytes;
+	else if (vbo < valid_size)
+		valid_size = vbo;
+
+	attr_b->nres.alloc_size = cpu_to_le64(alloc_size - bytes);
+	attr_b->nres.data_size = cpu_to_le64(data_size);
+	attr_b->nres.valid_size = cpu_to_le64(min(valid_size, data_size));
+	total_size -= (u64)dealloc << sbi->cluster_bits;
+	if (is_attr_ext(attr_b))
+		attr_b->nres.total_size = cpu_to_le64(total_size);
+	mi_b->dirty = true;
+
+	/* Update inode size. */
+	ni->i_valid = valid_size;
+	ni->vfs_inode.i_size = data_size;
+	inode_set_bytes(&ni->vfs_inode, total_size);
+	ni->ni_flags |= NI_FLAG_UPDATE_PARENT;
+	mark_inode_dirty(&ni->vfs_inode);
+
+out:
+	up_write(&ni->file.run_lock);
+	if (err)
+		make_bad_inode(&ni->vfs_inode);
+
+	return err;
+}
+
+/*
+ * attr_punch_hole
+ *
+ * Not for normal files.
+ */
+int attr_punch_hole(struct ntfs_inode *ni, u64 vbo, u64 bytes, u32 *frame_size)
+{
+	int err = 0;
+	struct runs_tree *run = &ni->file.run;
+	struct ntfs_sb_info *sbi = ni->mi.sbi;
+	struct ATTRIB *attr = NULL, *attr_b;
+	struct ATTR_LIST_ENTRY *le, *le_b;
+	struct mft_inode *mi, *mi_b;
+	CLST svcn, evcn1, vcn, len, end, alen, dealloc;
+	u64 total_size, alloc_size;
+	u32 mask;
+
+	if (!bytes)
+		return 0;
+
+	le_b = NULL;
+	attr_b = ni_find_attr(ni, NULL, &le_b, ATTR_DATA, NULL, 0, NULL, &mi_b);
+	if (!attr_b)
+		return -ENOENT;
+
+	if (!attr_b->non_res) {
+		u32 data_size = le32_to_cpu(attr->res.data_size);
+		u32 from, to;
+
+		if (vbo > data_size)
+			return 0;
+
+		from = vbo;
+		to = (vbo + bytes) < data_size ? (vbo + bytes) : data_size;
+		memset(Add2Ptr(resident_data(attr_b), from), 0, to - from);
+		return 0;
+	}
+
+	if (!is_attr_ext(attr_b))
+		return -EOPNOTSUPP;
+
+	alloc_size = le64_to_cpu(attr_b->nres.alloc_size);
+	total_size = le64_to_cpu(attr_b->nres.total_size);
+
+	if (vbo >= alloc_size) {
+		/* NOTE: It is allowed. */
+		return 0;
+	}
+
+	mask = (sbi->cluster_size << attr_b->nres.c_unit) - 1;
+
+	bytes += vbo;
+	if (bytes > alloc_size)
+		bytes = alloc_size;
+	bytes -= vbo;
+
+	if ((vbo & mask) || (bytes & mask)) {
+		/* We have to zero a range(s). */
+		if (frame_size == NULL) {
+			/* Caller insists range is aligned. */
+			return -EINVAL;
+		}
+		*frame_size = mask + 1;
+		return E_NTFS_NOTALIGNED;
+	}
+
+	down_write(&ni->file.run_lock);
+	/*
+	 * Enumerate all attribute segments and punch hole where necessary.
+	 */
+	alen = alloc_size >> sbi->cluster_bits;
+	vcn = vbo >> sbi->cluster_bits;
+	len = bytes >> sbi->cluster_bits;
+	end = vcn + len;
+	dealloc = 0;
+
+	svcn = le64_to_cpu(attr_b->nres.svcn);
+	evcn1 = le64_to_cpu(attr_b->nres.evcn) + 1;
+
+	if (svcn <= vcn && vcn < evcn1) {
+		attr = attr_b;
+		le = le_b;
+		mi = mi_b;
+	} else if (!le_b) {
+		err = -EINVAL;
+		goto out;
+	} else {
+		le = le_b;
+		attr = ni_find_attr(ni, attr_b, &le, ATTR_DATA, NULL, 0, &vcn,
+				    &mi);
+		if (!attr) {
+			err = -EINVAL;
+			goto out;
+		}
+
+		svcn = le64_to_cpu(attr->nres.svcn);
+		evcn1 = le64_to_cpu(attr->nres.evcn) + 1;
+	}
+
+	while (svcn < end) {
+		CLST vcn1, zero, dealloc2;
+
+		err = attr_load_runs(attr, ni, run, &svcn);
+		if (err)
+			goto out;
+		vcn1 = max(vcn, svcn);
+		zero = min(end, evcn1) - vcn1;
+
+		dealloc2 = dealloc;
+		err = run_deallocate_ex(sbi, run, vcn1, zero, &dealloc, true);
+		if (err)
+			goto out;
+
+		if (dealloc2 == dealloc) {
+			/* Looks like the required range is already sparsed. */
+		} else {
+			if (!run_add_entry(run, vcn1, SPARSE_LCN, zero,
+					   false)) {
+				err = -ENOMEM;
+				goto out;
+			}
+
+			err = mi_pack_runs(mi, attr, run, evcn1 - svcn);
+			if (err)
+				goto out;
+		}
+		/* Free all allocated memory. */
+		run_truncate(run, 0);
+
+		if (evcn1 >= alen)
+			break;
+
+		attr = ni_enum_attr_ex(ni, attr, &le, &mi);
+		if (!attr) {
+			err = -EINVAL;
+			goto out;
+		}
+
+		svcn = le64_to_cpu(attr->nres.svcn);
+		evcn1 = le64_to_cpu(attr->nres.evcn) + 1;
+	}
+
+	total_size -= (u64)dealloc << sbi->cluster_bits;
+	attr_b->nres.total_size = cpu_to_le64(total_size);
+	mi_b->dirty = true;
+
+	/* Update inode size. */
+	inode_set_bytes(&ni->vfs_inode, total_size);
+	ni->ni_flags |= NI_FLAG_UPDATE_PARENT;
+	mark_inode_dirty(&ni->vfs_inode);
+
+out:
+	up_write(&ni->file.run_lock);
+	if (err)
+		make_bad_inode(&ni->vfs_inode);
+
+	return err;
+}
diff --git a/fs/ntfs3/attrlist.c b/fs/ntfs3/attrlist.c
new file mode 100644
index 000000000000..fa32399eb517
--- /dev/null
+++ b/fs/ntfs3/attrlist.c
@@ -0,0 +1,460 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ *
+ * Copyright (C) 2019-2021 Paragon Software GmbH, All rights reserved.
+ *
+ */
+
+#include <linux/blkdev.h>
+#include <linux/buffer_head.h>
+#include <linux/fs.h>
+#include <linux/nls.h>
+
+#include "debug.h"
+#include "ntfs.h"
+#include "ntfs_fs.h"
+
+/*
+ * al_is_valid_le
+ *
+ * Return: True if @le is valid.
+ */
+static inline bool al_is_valid_le(const struct ntfs_inode *ni,
+				  struct ATTR_LIST_ENTRY *le)
+{
+	if (!le || !ni->attr_list.le || !ni->attr_list.size)
+		return false;
+
+	return PtrOffset(ni->attr_list.le, le) + le16_to_cpu(le->size) <=
+	       ni->attr_list.size;
+}
+
+void al_destroy(struct ntfs_inode *ni)
+{
+	run_close(&ni->attr_list.run);
+	kfree(ni->attr_list.le);
+	ni->attr_list.le = NULL;
+	ni->attr_list.size = 0;
+	ni->attr_list.dirty = false;
+}
+
+/*
+ * ntfs_load_attr_list
+ *
+ * This method makes sure that the ATTRIB list, if present,
+ * has been properly set up.
+ */
+int ntfs_load_attr_list(struct ntfs_inode *ni, struct ATTRIB *attr)
+{
+	int err;
+	size_t lsize;
+	void *le = NULL;
+
+	if (ni->attr_list.size)
+		return 0;
+
+	if (!attr->non_res) {
+		lsize = le32_to_cpu(attr->res.data_size);
+		le = kmalloc(al_aligned(lsize), GFP_NOFS);
+		if (!le) {
+			err = -ENOMEM;
+			goto out;
+		}
+		memcpy(le, resident_data(attr), lsize);
+	} else if (attr->nres.svcn) {
+		err = -EINVAL;
+		goto out;
+	} else {
+		u16 run_off = le16_to_cpu(attr->nres.run_off);
+
+		lsize = le64_to_cpu(attr->nres.data_size);
+
+		run_init(&ni->attr_list.run);
+
+		err = run_unpack_ex(&ni->attr_list.run, ni->mi.sbi, ni->mi.rno,
+				    0, le64_to_cpu(attr->nres.evcn), 0,
+				    Add2Ptr(attr, run_off),
+				    le32_to_cpu(attr->size) - run_off);
+		if (err < 0)
+			goto out;
+
+		le = kmalloc(al_aligned(lsize), GFP_NOFS);
+		if (!le) {
+			err = -ENOMEM;
+			goto out;
+		}
+
+		err = ntfs_read_run_nb(ni->mi.sbi, &ni->attr_list.run, 0, le,
+				       lsize, NULL);
+		if (err)
+			goto out;
+	}
+
+	ni->attr_list.size = lsize;
+	ni->attr_list.le = le;
+
+	return 0;
+
+out:
+	ni->attr_list.le = le;
+	al_destroy(ni);
+
+	return err;
+}
+
+/*
+ * al_enumerate
+ *
+ * Return:
+ * * The next list le.
+ * * If @le is NULL then return the first le.
+ */
+struct ATTR_LIST_ENTRY *al_enumerate(struct ntfs_inode *ni,
+				     struct ATTR_LIST_ENTRY *le)
+{
+	size_t off;
+	u16 sz;
+
+	if (!le) {
+		le = ni->attr_list.le;
+	} else {
+		sz = le16_to_cpu(le->size);
+		if (sz < sizeof(struct ATTR_LIST_ENTRY)) {
+			/* Impossible 'cause we should not return such le. */
+			return NULL;
+		}
+		le = Add2Ptr(le, sz);
+	}
+
+	/* Check boundary. */
+	off = PtrOffset(ni->attr_list.le, le);
+	if (off + sizeof(struct ATTR_LIST_ENTRY) > ni->attr_list.size) {
+		/* The regular end of list. */
+		return NULL;
+	}
+
+	sz = le16_to_cpu(le->size);
+
+	/* Check le for errors. */
+	if (sz < sizeof(struct ATTR_LIST_ENTRY) ||
+	    off + sz > ni->attr_list.size ||
+	    sz < le->name_off + le->name_len * sizeof(short)) {
+		return NULL;
+	}
+
+	return le;
+}
+
+/*
+ * al_find_le
+ *
+ * Find the first le in the list which matches type, name and VCN.
+ *
+ * Return: NULL if not found.
+ */
+struct ATTR_LIST_ENTRY *al_find_le(struct ntfs_inode *ni,
+				   struct ATTR_LIST_ENTRY *le,
+				   const struct ATTRIB *attr)
+{
+	CLST svcn = attr_svcn(attr);
+
+	return al_find_ex(ni, le, attr->type, attr_name(attr), attr->name_len,
+			  &svcn);
+}
+
+/*
+ * al_find_ex
+ *
+ * Find the first le in the list which matches type, name and VCN.
+ *
+ * Return: NULL if not found.
+ */
+struct ATTR_LIST_ENTRY *al_find_ex(struct ntfs_inode *ni,
+				   struct ATTR_LIST_ENTRY *le,
+				   enum ATTR_TYPE type, const __le16 *name,
+				   u8 name_len, const CLST *vcn)
+{
+	struct ATTR_LIST_ENTRY *ret = NULL;
+	u32 type_in = le32_to_cpu(type);
+
+	while ((le = al_enumerate(ni, le))) {
+		u64 le_vcn;
+		int diff = le32_to_cpu(le->type) - type_in;
+
+		/* List entries are sorted by type, name and VCN. */
+		if (diff < 0)
+			continue;
+
+		if (diff > 0)
+			return ret;
+
+		if (le->name_len != name_len)
+			continue;
+
+		le_vcn = le64_to_cpu(le->vcn);
+		if (!le_vcn) {
+			/*
+			 * Compare entry names only for entry with vcn == 0.
+			 */
+			diff = ntfs_cmp_names(le_name(le), name_len, name,
+					      name_len, ni->mi.sbi->upcase,
+					      true);
+			if (diff < 0)
+				continue;
+
+			if (diff > 0)
+				return ret;
+		}
+
+		if (!vcn)
+			return le;
+
+		if (*vcn == le_vcn)
+			return le;
+
+		if (*vcn < le_vcn)
+			return ret;
+
+		ret = le;
+	}
+
+	return ret;
+}
+
+/*
+ * al_find_le_to_insert
+ *
+ * Find the first list entry which matches type, name and VCN.
+ */
+static struct ATTR_LIST_ENTRY *al_find_le_to_insert(struct ntfs_inode *ni,
+						    enum ATTR_TYPE type,
+						    const __le16 *name,
+						    u8 name_len, CLST vcn)
+{
+	struct ATTR_LIST_ENTRY *le = NULL, *prev;
+	u32 type_in = le32_to_cpu(type);
+
+	/* List entries are sorted by type, name and VCN. */
+	while ((le = al_enumerate(ni, prev = le))) {
+		int diff = le32_to_cpu(le->type) - type_in;
+
+		if (diff < 0)
+			continue;
+
+		if (diff > 0)
+			return le;
+
+		if (!le->vcn) {
+			/*
+			 * Compare entry names only for entry with vcn == 0.
+			 */
+			diff = ntfs_cmp_names(le_name(le), le->name_len, name,
+					      name_len, ni->mi.sbi->upcase,
+					      true);
+			if (diff < 0)
+				continue;
+
+			if (diff > 0)
+				return le;
+		}
+
+		if (le64_to_cpu(le->vcn) >= vcn)
+			return le;
+	}
+
+	return prev ? Add2Ptr(prev, le16_to_cpu(prev->size)) : ni->attr_list.le;
+}
+
+/*
+ * al_add_le
+ *
+ * Add an "attribute list entry" to the list.
+ */
+int al_add_le(struct ntfs_inode *ni, enum ATTR_TYPE type, const __le16 *name,
+	      u8 name_len, CLST svcn, __le16 id, const struct MFT_REF *ref,
+	      struct ATTR_LIST_ENTRY **new_le)
+{
+	int err;
+	struct ATTRIB *attr;
+	struct ATTR_LIST_ENTRY *le;
+	size_t off;
+	u16 sz;
+	size_t asize, new_asize, old_size;
+	u64 new_size;
+	typeof(ni->attr_list) *al = &ni->attr_list;
+
+	/*
+	 * Compute the size of the new 'le'
+	 */
+	sz = le_size(name_len);
+	old_size = al->size;
+	new_size = old_size + sz;
+	asize = al_aligned(old_size);
+	new_asize = al_aligned(new_size);
+
+	/* Scan forward to the point at which the new 'le' should be inserted. */
+	le = al_find_le_to_insert(ni, type, name, name_len, svcn);
+	off = PtrOffset(al->le, le);
+
+	if (new_size > asize) {
+		void *ptr = kmalloc(new_asize, GFP_NOFS);
+
+		if (!ptr)
+			return -ENOMEM;
+
+		memcpy(ptr, al->le, off);
+		memcpy(Add2Ptr(ptr, off + sz), le, old_size - off);
+		le = Add2Ptr(ptr, off);
+		kfree(al->le);
+		al->le = ptr;
+	} else {
+		memmove(Add2Ptr(le, sz), le, old_size - off);
+	}
+	*new_le = le;
+
+	al->size = new_size;
+
+	le->type = type;
+	le->size = cpu_to_le16(sz);
+	le->name_len = name_len;
+	le->name_off = offsetof(struct ATTR_LIST_ENTRY, name);
+	le->vcn = cpu_to_le64(svcn);
+	le->ref = *ref;
+	le->id = id;
+	memcpy(le->name, name, sizeof(short) * name_len);
+
+	err = attr_set_size(ni, ATTR_LIST, NULL, 0, &al->run, new_size,
+			    &new_size, true, &attr);
+	if (err) {
+		/* Undo memmove above. */
+		memmove(le, Add2Ptr(le, sz), old_size - off);
+		al->size = old_size;
+		return err;
+	}
+
+	al->dirty = true;
+
+	if (attr && attr->non_res) {
+		err = ntfs_sb_write_run(ni->mi.sbi, &al->run, 0, al->le,
+					al->size);
+		if (err)
+			return err;
+		al->dirty = false;
+	}
+
+	return 0;
+}
+
+/*
+ * al_remove_le - Remove @le from attribute list.
+ */
+bool al_remove_le(struct ntfs_inode *ni, struct ATTR_LIST_ENTRY *le)
+{
+	u16 size;
+	size_t off;
+	typeof(ni->attr_list) *al = &ni->attr_list;
+
+	if (!al_is_valid_le(ni, le))
+		return false;
+
+	/* Save on stack the size of 'le' */
+	size = le16_to_cpu(le->size);
+	off = PtrOffset(al->le, le);
+
+	memmove(le, Add2Ptr(le, size), al->size - (off + size));
+
+	al->size -= size;
+	al->dirty = true;
+
+	return true;
+}
+
+/*
+ * al_delete_le - Delete first le from the list which matches its parameters.
+ */
+bool al_delete_le(struct ntfs_inode *ni, enum ATTR_TYPE type, CLST vcn,
+		  const __le16 *name, size_t name_len,
+		  const struct MFT_REF *ref)
+{
+	u16 size;
+	struct ATTR_LIST_ENTRY *le;
+	size_t off;
+	typeof(ni->attr_list) *al = &ni->attr_list;
+
+	/* Scan forward to the first le that matches the input. */
+	le = al_find_ex(ni, NULL, type, name, name_len, &vcn);
+	if (!le)
+		return false;
+
+	off = PtrOffset(al->le, le);
+
+next:
+	if (off >= al->size)
+		return false;
+	if (le->type != type)
+		return false;
+	if (le->name_len != name_len)
+		return false;
+	if (name_len && ntfs_cmp_names(le_name(le), name_len, name, name_len,
+				       ni->mi.sbi->upcase, true))
+		return false;
+	if (le64_to_cpu(le->vcn) != vcn)
+		return false;
+
+	/*
+	 * The caller specified a segment reference, so we have to
+	 * scan through the matching entries until we find that segment
+	 * reference or we run of matching entries.
+	 */
+	if (ref && memcmp(ref, &le->ref, sizeof(*ref))) {
+		off += le16_to_cpu(le->size);
+		le = Add2Ptr(al->le, off);
+		goto next;
+	}
+
+	/* Save on stack the size of 'le'. */
+	size = le16_to_cpu(le->size);
+	/* Delete the le. */
+	memmove(le, Add2Ptr(le, size), al->size - (off + size));
+
+	al->size -= size;
+	al->dirty = true;
+
+	return true;
+}
+
+int al_update(struct ntfs_inode *ni)
+{
+	int err;
+	struct ATTRIB *attr;
+	typeof(ni->attr_list) *al = &ni->attr_list;
+
+	if (!al->dirty || !al->size)
+		return 0;
+
+	/*
+	 * Attribute list increased on demand in al_add_le.
+	 * Attribute list decreased here.
+	 */
+	err = attr_set_size(ni, ATTR_LIST, NULL, 0, &al->run, al->size, NULL,
+			    false, &attr);
+	if (err)
+		goto out;
+
+	if (!attr->non_res) {
+		memcpy(resident_data(attr), al->le, al->size);
+	} else {
+		err = ntfs_sb_write_run(ni->mi.sbi, &al->run, 0, al->le,
+					al->size);
+		if (err)
+			goto out;
+
+		attr->nres.valid_size = attr->nres.data_size;
+	}
+
+	ni->mi.dirty = true;
+	al->dirty = false;
+
+out:
+	return err;
+}
diff --git a/fs/ntfs3/bitfunc.c b/fs/ntfs3/bitfunc.c
new file mode 100644
index 000000000000..ce304d40b5e1
--- /dev/null
+++ b/fs/ntfs3/bitfunc.c
@@ -0,0 +1,134 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ *
+ * Copyright (C) 2019-2021 Paragon Software GmbH, All rights reserved.
+ *
+ */
+
+#include <linux/blkdev.h>
+#include <linux/buffer_head.h>
+#include <linux/fs.h>
+#include <linux/nls.h>
+
+#include "debug.h"
+#include "ntfs.h"
+#include "ntfs_fs.h"
+
+#define BITS_IN_SIZE_T (sizeof(size_t) * 8)
+
+/*
+ * fill_mask[i] - first i bits are '1' , i = 0,1,2,3,4,5,6,7,8
+ * fill_mask[i] = 0xFF >> (8-i)
+ */
+static const u8 fill_mask[] = { 0x00, 0x01, 0x03, 0x07, 0x0F,
+				0x1F, 0x3F, 0x7F, 0xFF };
+
+/*
+ * zero_mask[i] - first i bits are '0' , i = 0,1,2,3,4,5,6,7,8
+ * zero_mask[i] = 0xFF << i
+ */
+static const u8 zero_mask[] = { 0xFF, 0xFE, 0xFC, 0xF8, 0xF0,
+				0xE0, 0xC0, 0x80, 0x00 };
+
+/*
+ * are_bits_clear
+ *
+ * Return: True if all bits [bit, bit+nbits) are zeros "0".
+ */
+bool are_bits_clear(const ulong *lmap, size_t bit, size_t nbits)
+{
+	size_t pos = bit & 7;
+	const u8 *map = (u8 *)lmap + (bit >> 3);
+
+	if (pos) {
+		if (8 - pos >= nbits)
+			return !nbits || !(*map & fill_mask[pos + nbits] &
+					   zero_mask[pos]);
+
+		if (*map++ & zero_mask[pos])
+			return false;
+		nbits -= 8 - pos;
+	}
+
+	pos = ((size_t)map) & (sizeof(size_t) - 1);
+	if (pos) {
+		pos = sizeof(size_t) - pos;
+		if (nbits >= pos * 8) {
+			for (nbits -= pos * 8; pos; pos--, map++) {
+				if (*map)
+					return false;
+			}
+		}
+	}
+
+	for (pos = nbits / BITS_IN_SIZE_T; pos; pos--, map += sizeof(size_t)) {
+		if (*((size_t *)map))
+			return false;
+	}
+
+	for (pos = (nbits % BITS_IN_SIZE_T) >> 3; pos; pos--, map++) {
+		if (*map)
+			return false;
+	}
+
+	pos = nbits & 7;
+	if (pos && (*map & fill_mask[pos]))
+		return false;
+
+	return true;
+}
+
+/*
+ * are_bits_set
+ *
+ * Return: True if all bits [bit, bit+nbits) are ones "1".
+ */
+bool are_bits_set(const ulong *lmap, size_t bit, size_t nbits)
+{
+	u8 mask;
+	size_t pos = bit & 7;
+	const u8 *map = (u8 *)lmap + (bit >> 3);
+
+	if (pos) {
+		if (8 - pos >= nbits) {
+			mask = fill_mask[pos + nbits] & zero_mask[pos];
+			return !nbits || (*map & mask) == mask;
+		}
+
+		mask = zero_mask[pos];
+		if ((*map++ & mask) != mask)
+			return false;
+		nbits -= 8 - pos;
+	}
+
+	pos = ((size_t)map) & (sizeof(size_t) - 1);
+	if (pos) {
+		pos = sizeof(size_t) - pos;
+		if (nbits >= pos * 8) {
+			for (nbits -= pos * 8; pos; pos--, map++) {
+				if (*map != 0xFF)
+					return false;
+			}
+		}
+	}
+
+	for (pos = nbits / BITS_IN_SIZE_T; pos; pos--, map += sizeof(size_t)) {
+		if (*((size_t *)map) != MINUS_ONE_T)
+			return false;
+	}
+
+	for (pos = (nbits % BITS_IN_SIZE_T) >> 3; pos; pos--, map++) {
+		if (*map != 0xFF)
+			return false;
+	}
+
+	pos = nbits & 7;
+	if (pos) {
+		u8 mask = fill_mask[pos];
+
+		if ((*map & mask) != mask)
+			return false;
+	}
+
+	return true;
+}
diff --git a/fs/ntfs3/bitmap.c b/fs/ntfs3/bitmap.c
new file mode 100644
index 000000000000..831501555009
--- /dev/null
+++ b/fs/ntfs3/bitmap.c
@@ -0,0 +1,1493 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ *
+ * Copyright (C) 2019-2021 Paragon Software GmbH, All rights reserved.
+ *
+ * This code builds two trees of free clusters extents.
+ * Trees are sorted by start of extent and by length of extent.
+ * NTFS_MAX_WND_EXTENTS defines the maximum number of elements in trees.
+ * In extreme case code reads on-disk bitmap to find free clusters.
+ *
+ */
+
+#include <linux/blkdev.h>
+#include <linux/buffer_head.h>
+#include <linux/fs.h>
+#include <linux/nls.h>
+
+#include "debug.h"
+#include "ntfs.h"
+#include "ntfs_fs.h"
+
+/*
+ * Maximum number of extents in tree.
+ */
+#define NTFS_MAX_WND_EXTENTS (32u * 1024u)
+
+struct rb_node_key {
+	struct rb_node node;
+	size_t key;
+};
+
+struct e_node {
+	struct rb_node_key start; /* Tree sorted by start. */
+	struct rb_node_key count; /* Tree sorted by len. */
+};
+
+static int wnd_rescan(struct wnd_bitmap *wnd);
+static struct buffer_head *wnd_map(struct wnd_bitmap *wnd, size_t iw);
+static bool wnd_is_free_hlp(struct wnd_bitmap *wnd, size_t bit, size_t bits);
+
+static struct kmem_cache *ntfs_enode_cachep;
+
+int __init ntfs3_init_bitmap(void)
+{
+	ntfs_enode_cachep =
+		kmem_cache_create("ntfs3_enode_cache", sizeof(struct e_node), 0,
+				  SLAB_RECLAIM_ACCOUNT, NULL);
+	return ntfs_enode_cachep ? 0 : -ENOMEM;
+}
+
+void ntfs3_exit_bitmap(void)
+{
+	kmem_cache_destroy(ntfs_enode_cachep);
+}
+
+static inline u32 wnd_bits(const struct wnd_bitmap *wnd, size_t i)
+{
+	return i + 1 == wnd->nwnd ? wnd->bits_last : wnd->sb->s_blocksize * 8;
+}
+
+/*
+ * wnd_scan
+ *
+ * b_pos + b_len - biggest fragment.
+ * Scan range [wpos wbits) window @buf.
+ *
+ * Return: -1 if not found.
+ */
+static size_t wnd_scan(const ulong *buf, size_t wbit, u32 wpos, u32 wend,
+		       size_t to_alloc, size_t *prev_tail, size_t *b_pos,
+		       size_t *b_len)
+{
+	while (wpos < wend) {
+		size_t free_len;
+		u32 free_bits, end;
+		u32 used = find_next_zero_bit(buf, wend, wpos);
+
+		if (used >= wend) {
+			if (*b_len < *prev_tail) {
+				*b_pos = wbit - *prev_tail;
+				*b_len = *prev_tail;
+			}
+
+			*prev_tail = 0;
+			return -1;
+		}
+
+		if (used > wpos) {
+			wpos = used;
+			if (*b_len < *prev_tail) {
+				*b_pos = wbit - *prev_tail;
+				*b_len = *prev_tail;
+			}
+
+			*prev_tail = 0;
+		}
+
+		/*
+		 * Now we have a fragment [wpos, wend) staring with 0.
+		 */
+		end = wpos + to_alloc - *prev_tail;
+		free_bits = find_next_bit(buf, min(end, wend), wpos);
+
+		free_len = *prev_tail + free_bits - wpos;
+
+		if (*b_len < free_len) {
+			*b_pos = wbit + wpos - *prev_tail;
+			*b_len = free_len;
+		}
+
+		if (free_len >= to_alloc)
+			return wbit + wpos - *prev_tail;
+
+		if (free_bits >= wend) {
+			*prev_tail += free_bits - wpos;
+			return -1;
+		}
+
+		wpos = free_bits + 1;
+
+		*prev_tail = 0;
+	}
+
+	return -1;
+}
+
+/*
+ * wnd_close - Frees all resources.
+ */
+void wnd_close(struct wnd_bitmap *wnd)
+{
+	struct rb_node *node, *next;
+
+	kfree(wnd->free_bits);
+	run_close(&wnd->run);
+
+	node = rb_first(&wnd->start_tree);
+
+	while (node) {
+		next = rb_next(node);
+		rb_erase(node, &wnd->start_tree);
+		kmem_cache_free(ntfs_enode_cachep,
+				rb_entry(node, struct e_node, start.node));
+		node = next;
+	}
+}
+
+static struct rb_node *rb_lookup(struct rb_root *root, size_t v)
+{
+	struct rb_node **p = &root->rb_node;
+	struct rb_node *r = NULL;
+
+	while (*p) {
+		struct rb_node_key *k;
+
+		k = rb_entry(*p, struct rb_node_key, node);
+		if (v < k->key) {
+			p = &(*p)->rb_left;
+		} else if (v > k->key) {
+			r = &k->node;
+			p = &(*p)->rb_right;
+		} else {
+			return &k->node;
+		}
+	}
+
+	return r;
+}
+
+/*
+ * rb_insert_count - Helper function to insert special kind of 'count' tree.
+ */
+static inline bool rb_insert_count(struct rb_root *root, struct e_node *e)
+{
+	struct rb_node **p = &root->rb_node;
+	struct rb_node *parent = NULL;
+	size_t e_ckey = e->count.key;
+	size_t e_skey = e->start.key;
+
+	while (*p) {
+		struct e_node *k =
+			rb_entry(parent = *p, struct e_node, count.node);
+
+		if (e_ckey > k->count.key) {
+			p = &(*p)->rb_left;
+		} else if (e_ckey < k->count.key) {
+			p = &(*p)->rb_right;
+		} else if (e_skey < k->start.key) {
+			p = &(*p)->rb_left;
+		} else if (e_skey > k->start.key) {
+			p = &(*p)->rb_right;
+		} else {
+			WARN_ON(1);
+			return false;
+		}
+	}
+
+	rb_link_node(&e->count.node, parent, p);
+	rb_insert_color(&e->count.node, root);
+	return true;
+}
+
+/*
+ * rb_insert_start - Helper function to insert special kind of 'count' tree.
+ */
+static inline bool rb_insert_start(struct rb_root *root, struct e_node *e)
+{
+	struct rb_node **p = &root->rb_node;
+	struct rb_node *parent = NULL;
+	size_t e_skey = e->start.key;
+
+	while (*p) {
+		struct e_node *k;
+
+		parent = *p;
+
+		k = rb_entry(parent, struct e_node, start.node);
+		if (e_skey < k->start.key) {
+			p = &(*p)->rb_left;
+		} else if (e_skey > k->start.key) {
+			p = &(*p)->rb_right;
+		} else {
+			WARN_ON(1);
+			return false;
+		}
+	}
+
+	rb_link_node(&e->start.node, parent, p);
+	rb_insert_color(&e->start.node, root);
+	return true;
+}
+
+/*
+ * wnd_add_free_ext - Adds a new extent of free space.
+ * @build:	1 when building tree.
+ */
+static void wnd_add_free_ext(struct wnd_bitmap *wnd, size_t bit, size_t len,
+			     bool build)
+{
+	struct e_node *e, *e0 = NULL;
+	size_t ib, end_in = bit + len;
+	struct rb_node *n;
+
+	if (build) {
+		/* Use extent_min to filter too short extents. */
+		if (wnd->count >= NTFS_MAX_WND_EXTENTS &&
+		    len <= wnd->extent_min) {
+			wnd->uptodated = -1;
+			return;
+		}
+	} else {
+		/* Try to find extent before 'bit'. */
+		n = rb_lookup(&wnd->start_tree, bit);
+
+		if (!n) {
+			n = rb_first(&wnd->start_tree);
+		} else {
+			e = rb_entry(n, struct e_node, start.node);
+			n = rb_next(n);
+			if (e->start.key + e->count.key == bit) {
+				/* Remove left. */
+				bit = e->start.key;
+				len += e->count.key;
+				rb_erase(&e->start.node, &wnd->start_tree);
+				rb_erase(&e->count.node, &wnd->count_tree);
+				wnd->count -= 1;
+				e0 = e;
+			}
+		}
+
+		while (n) {
+			size_t next_end;
+
+			e = rb_entry(n, struct e_node, start.node);
+			next_end = e->start.key + e->count.key;
+			if (e->start.key > end_in)
+				break;
+
+			/* Remove right. */
+			n = rb_next(n);
+			len += next_end - end_in;
+			end_in = next_end;
+			rb_erase(&e->start.node, &wnd->start_tree);
+			rb_erase(&e->count.node, &wnd->count_tree);
+			wnd->count -= 1;
+
+			if (!e0)
+				e0 = e;
+			else
+				kmem_cache_free(ntfs_enode_cachep, e);
+		}
+
+		if (wnd->uptodated != 1) {
+			/* Check bits before 'bit'. */
+			ib = wnd->zone_bit == wnd->zone_end ||
+					     bit < wnd->zone_end
+				     ? 0
+				     : wnd->zone_end;
+
+			while (bit > ib && wnd_is_free_hlp(wnd, bit - 1, 1)) {
+				bit -= 1;
+				len += 1;
+			}
+
+			/* Check bits after 'end_in'. */
+			ib = wnd->zone_bit == wnd->zone_end ||
+					     end_in > wnd->zone_bit
+				     ? wnd->nbits
+				     : wnd->zone_bit;
+
+			while (end_in < ib && wnd_is_free_hlp(wnd, end_in, 1)) {
+				end_in += 1;
+				len += 1;
+			}
+		}
+	}
+	/* Insert new fragment. */
+	if (wnd->count >= NTFS_MAX_WND_EXTENTS) {
+		if (e0)
+			kmem_cache_free(ntfs_enode_cachep, e0);
+
+		wnd->uptodated = -1;
+
+		/* Compare with smallest fragment. */
+		n = rb_last(&wnd->count_tree);
+		e = rb_entry(n, struct e_node, count.node);
+		if (len <= e->count.key)
+			goto out; /* Do not insert small fragments. */
+
+		if (build) {
+			struct e_node *e2;
+
+			n = rb_prev(n);
+			e2 = rb_entry(n, struct e_node, count.node);
+			/* Smallest fragment will be 'e2->count.key'. */
+			wnd->extent_min = e2->count.key;
+		}
+
+		/* Replace smallest fragment by new one. */
+		rb_erase(&e->start.node, &wnd->start_tree);
+		rb_erase(&e->count.node, &wnd->count_tree);
+		wnd->count -= 1;
+	} else {
+		e = e0 ? e0 : kmem_cache_alloc(ntfs_enode_cachep, GFP_ATOMIC);
+		if (!e) {
+			wnd->uptodated = -1;
+			goto out;
+		}
+
+		if (build && len <= wnd->extent_min)
+			wnd->extent_min = len;
+	}
+	e->start.key = bit;
+	e->count.key = len;
+	if (len > wnd->extent_max)
+		wnd->extent_max = len;
+
+	rb_insert_start(&wnd->start_tree, e);
+	rb_insert_count(&wnd->count_tree, e);
+	wnd->count += 1;
+
+out:;
+}
+
+/*
+ * wnd_remove_free_ext - Remove a run from the cached free space.
+ */
+static void wnd_remove_free_ext(struct wnd_bitmap *wnd, size_t bit, size_t len)
+{
+	struct rb_node *n, *n3;
+	struct e_node *e, *e3;
+	size_t end_in = bit + len;
+	size_t end3, end, new_key, new_len, max_new_len;
+
+	/* Try to find extent before 'bit'. */
+	n = rb_lookup(&wnd->start_tree, bit);
+
+	if (!n)
+		return;
+
+	e = rb_entry(n, struct e_node, start.node);
+	end = e->start.key + e->count.key;
+
+	new_key = new_len = 0;
+	len = e->count.key;
+
+	/* Range [bit,end_in) must be inside 'e' or outside 'e' and 'n'. */
+	if (e->start.key > bit)
+		;
+	else if (end_in <= end) {
+		/* Range [bit,end_in) inside 'e'. */
+		new_key = end_in;
+		new_len = end - end_in;
+		len = bit - e->start.key;
+	} else if (bit > end) {
+		bool bmax = false;
+
+		n3 = rb_next(n);
+
+		while (n3) {
+			e3 = rb_entry(n3, struct e_node, start.node);
+			if (e3->start.key >= end_in)
+				break;
+
+			if (e3->count.key == wnd->extent_max)
+				bmax = true;
+
+			end3 = e3->start.key + e3->count.key;
+			if (end3 > end_in) {
+				e3->start.key = end_in;
+				rb_erase(&e3->count.node, &wnd->count_tree);
+				e3->count.key = end3 - end_in;
+				rb_insert_count(&wnd->count_tree, e3);
+				break;
+			}
+
+			n3 = rb_next(n3);
+			rb_erase(&e3->start.node, &wnd->start_tree);
+			rb_erase(&e3->count.node, &wnd->count_tree);
+			wnd->count -= 1;
+			kmem_cache_free(ntfs_enode_cachep, e3);
+		}
+		if (!bmax)
+			return;
+		n3 = rb_first(&wnd->count_tree);
+		wnd->extent_max =
+			n3 ? rb_entry(n3, struct e_node, count.node)->count.key
+			   : 0;
+		return;
+	}
+
+	if (e->count.key != wnd->extent_max) {
+		;
+	} else if (rb_prev(&e->count.node)) {
+		;
+	} else {
+		n3 = rb_next(&e->count.node);
+		max_new_len = len > new_len ? len : new_len;
+		if (!n3) {
+			wnd->extent_max = max_new_len;
+		} else {
+			e3 = rb_entry(n3, struct e_node, count.node);
+			wnd->extent_max = max(e3->count.key, max_new_len);
+		}
+	}
+
+	if (!len) {
+		if (new_len) {
+			e->start.key = new_key;
+			rb_erase(&e->count.node, &wnd->count_tree);
+			e->count.key = new_len;
+			rb_insert_count(&wnd->count_tree, e);
+		} else {
+			rb_erase(&e->start.node, &wnd->start_tree);
+			rb_erase(&e->count.node, &wnd->count_tree);
+			wnd->count -= 1;
+			kmem_cache_free(ntfs_enode_cachep, e);
+		}
+		goto out;
+	}
+	rb_erase(&e->count.node, &wnd->count_tree);
+	e->count.key = len;
+	rb_insert_count(&wnd->count_tree, e);
+
+	if (!new_len)
+		goto out;
+
+	if (wnd->count >= NTFS_MAX_WND_EXTENTS) {
+		wnd->uptodated = -1;
+
+		/* Get minimal extent. */
+		e = rb_entry(rb_last(&wnd->count_tree), struct e_node,
+			     count.node);
+		if (e->count.key > new_len)
+			goto out;
+
+		/* Replace minimum. */
+		rb_erase(&e->start.node, &wnd->start_tree);
+		rb_erase(&e->count.node, &wnd->count_tree);
+		wnd->count -= 1;
+	} else {
+		e = kmem_cache_alloc(ntfs_enode_cachep, GFP_ATOMIC);
+		if (!e)
+			wnd->uptodated = -1;
+	}
+
+	if (e) {
+		e->start.key = new_key;
+		e->count.key = new_len;
+		rb_insert_start(&wnd->start_tree, e);
+		rb_insert_count(&wnd->count_tree, e);
+		wnd->count += 1;
+	}
+
+out:
+	if (!wnd->count && 1 != wnd->uptodated)
+		wnd_rescan(wnd);
+}
+
+/*
+ * wnd_rescan - Scan all bitmap. Used while initialization.
+ */
+static int wnd_rescan(struct wnd_bitmap *wnd)
+{
+	int err = 0;
+	size_t prev_tail = 0;
+	struct super_block *sb = wnd->sb;
+	struct ntfs_sb_info *sbi = sb->s_fs_info;
+	u64 lbo, len = 0;
+	u32 blocksize = sb->s_blocksize;
+	u8 cluster_bits = sbi->cluster_bits;
+	u32 wbits = 8 * sb->s_blocksize;
+	u32 used, frb;
+	const ulong *buf;
+	size_t wpos, wbit, iw, vbo;
+	struct buffer_head *bh = NULL;
+	CLST lcn, clen;
+
+	wnd->uptodated = 0;
+	wnd->extent_max = 0;
+	wnd->extent_min = MINUS_ONE_T;
+	wnd->total_zeroes = 0;
+
+	vbo = 0;
+
+	for (iw = 0; iw < wnd->nwnd; iw++) {
+		if (iw + 1 == wnd->nwnd)
+			wbits = wnd->bits_last;
+
+		if (wnd->inited) {
+			if (!wnd->free_bits[iw]) {
+				/* All ones. */
+				if (prev_tail) {
+					wnd_add_free_ext(wnd,
+							 vbo * 8 - prev_tail,
+							 prev_tail, true);
+					prev_tail = 0;
+				}
+				goto next_wnd;
+			}
+			if (wbits == wnd->free_bits[iw]) {
+				/* All zeroes. */
+				prev_tail += wbits;
+				wnd->total_zeroes += wbits;
+				goto next_wnd;
+			}
+		}
+
+		if (!len) {
+			u32 off = vbo & sbi->cluster_mask;
+
+			if (!run_lookup_entry(&wnd->run, vbo >> cluster_bits,
+					      &lcn, &clen, NULL)) {
+				err = -ENOENT;
+				goto out;
+			}
+
+			lbo = ((u64)lcn << cluster_bits) + off;
+			len = ((u64)clen << cluster_bits) - off;
+		}
+
+		bh = ntfs_bread(sb, lbo >> sb->s_blocksize_bits);
+		if (!bh) {
+			err = -EIO;
+			goto out;
+		}
+
+		buf = (ulong *)bh->b_data;
+
+		used = __bitmap_weight(buf, wbits);
+		if (used < wbits) {
+			frb = wbits - used;
+			wnd->free_bits[iw] = frb;
+			wnd->total_zeroes += frb;
+		}
+
+		wpos = 0;
+		wbit = vbo * 8;
+
+		if (wbit + wbits > wnd->nbits)
+			wbits = wnd->nbits - wbit;
+
+		do {
+			used = find_next_zero_bit(buf, wbits, wpos);
+
+			if (used > wpos && prev_tail) {
+				wnd_add_free_ext(wnd, wbit + wpos - prev_tail,
+						 prev_tail, true);
+				prev_tail = 0;
+			}
+
+			wpos = used;
+
+			if (wpos >= wbits) {
+				/* No free blocks. */
+				prev_tail = 0;
+				break;
+			}
+
+			frb = find_next_bit(buf, wbits, wpos);
+			if (frb >= wbits) {
+				/* Keep last free block. */
+				prev_tail += frb - wpos;
+				break;
+			}
+
+			wnd_add_free_ext(wnd, wbit + wpos - prev_tail,
+					 frb + prev_tail - wpos, true);
+
+			/* Skip free block and first '1'. */
+			wpos = frb + 1;
+			/* Reset previous tail. */
+			prev_tail = 0;
+		} while (wpos < wbits);
+
+next_wnd:
+
+		if (bh)
+			put_bh(bh);
+		bh = NULL;
+
+		vbo += blocksize;
+		if (len) {
+			len -= blocksize;
+			lbo += blocksize;
+		}
+	}
+
+	/* Add last block. */
+	if (prev_tail)
+		wnd_add_free_ext(wnd, wnd->nbits - prev_tail, prev_tail, true);
+
+	/*
+	 * Before init cycle wnd->uptodated was 0.
+	 * If any errors or limits occurs while initialization then
+	 * wnd->uptodated will be -1.
+	 * If 'uptodated' is still 0 then Tree is really updated.
+	 */
+	if (!wnd->uptodated)
+		wnd->uptodated = 1;
+
+	if (wnd->zone_bit != wnd->zone_end) {
+		size_t zlen = wnd->zone_end - wnd->zone_bit;
+
+		wnd->zone_end = wnd->zone_bit;
+		wnd_zone_set(wnd, wnd->zone_bit, zlen);
+	}
+
+out:
+	return err;
+}
+
+int wnd_init(struct wnd_bitmap *wnd, struct super_block *sb, size_t nbits)
+{
+	int err;
+	u32 blocksize = sb->s_blocksize;
+	u32 wbits = blocksize * 8;
+
+	init_rwsem(&wnd->rw_lock);
+
+	wnd->sb = sb;
+	wnd->nbits = nbits;
+	wnd->total_zeroes = nbits;
+	wnd->extent_max = MINUS_ONE_T;
+	wnd->zone_bit = wnd->zone_end = 0;
+	wnd->nwnd = bytes_to_block(sb, bitmap_size(nbits));
+	wnd->bits_last = nbits & (wbits - 1);
+	if (!wnd->bits_last)
+		wnd->bits_last = wbits;
+
+	wnd->free_bits = kcalloc(wnd->nwnd, sizeof(u16), GFP_NOFS);
+	if (!wnd->free_bits)
+		return -ENOMEM;
+
+	err = wnd_rescan(wnd);
+	if (err)
+		return err;
+
+	wnd->inited = true;
+
+	return 0;
+}
+
+/*
+ * wnd_map - Call sb_bread for requested window.
+ */
+static struct buffer_head *wnd_map(struct wnd_bitmap *wnd, size_t iw)
+{
+	size_t vbo;
+	CLST lcn, clen;
+	struct super_block *sb = wnd->sb;
+	struct ntfs_sb_info *sbi;
+	struct buffer_head *bh;
+	u64 lbo;
+
+	sbi = sb->s_fs_info;
+	vbo = (u64)iw << sb->s_blocksize_bits;
+
+	if (!run_lookup_entry(&wnd->run, vbo >> sbi->cluster_bits, &lcn, &clen,
+			      NULL)) {
+		return ERR_PTR(-ENOENT);
+	}
+
+	lbo = ((u64)lcn << sbi->cluster_bits) + (vbo & sbi->cluster_mask);
+
+	bh = ntfs_bread(wnd->sb, lbo >> sb->s_blocksize_bits);
+	if (!bh)
+		return ERR_PTR(-EIO);
+
+	return bh;
+}
+
+/*
+ * wnd_set_free - Mark the bits range from bit to bit + bits as free.
+ */
+int wnd_set_free(struct wnd_bitmap *wnd, size_t bit, size_t bits)
+{
+	int err = 0;
+	struct super_block *sb = wnd->sb;
+	size_t bits0 = bits;
+	u32 wbits = 8 * sb->s_blocksize;
+	size_t iw = bit >> (sb->s_blocksize_bits + 3);
+	u32 wbit = bit & (wbits - 1);
+	struct buffer_head *bh;
+
+	while (iw < wnd->nwnd && bits) {
+		u32 tail, op;
+		ulong *buf;
+
+		if (iw + 1 == wnd->nwnd)
+			wbits = wnd->bits_last;
+
+		tail = wbits - wbit;
+		op = tail < bits ? tail : bits;
+
+		bh = wnd_map(wnd, iw);
+		if (IS_ERR(bh)) {
+			err = PTR_ERR(bh);
+			break;
+		}
+
+		buf = (ulong *)bh->b_data;
+
+		lock_buffer(bh);
+
+		__bitmap_clear(buf, wbit, op);
+
+		wnd->free_bits[iw] += op;
+
+		set_buffer_uptodate(bh);
+		mark_buffer_dirty(bh);
+		unlock_buffer(bh);
+		put_bh(bh);
+
+		wnd->total_zeroes += op;
+		bits -= op;
+		wbit = 0;
+		iw += 1;
+	}
+
+	wnd_add_free_ext(wnd, bit, bits0, false);
+
+	return err;
+}
+
+/*
+ * wnd_set_used - Mark the bits range from bit to bit + bits as used.
+ */
+int wnd_set_used(struct wnd_bitmap *wnd, size_t bit, size_t bits)
+{
+	int err = 0;
+	struct super_block *sb = wnd->sb;
+	size_t bits0 = bits;
+	size_t iw = bit >> (sb->s_blocksize_bits + 3);
+	u32 wbits = 8 * sb->s_blocksize;
+	u32 wbit = bit & (wbits - 1);
+	struct buffer_head *bh;
+
+	while (iw < wnd->nwnd && bits) {
+		u32 tail, op;
+		ulong *buf;
+
+		if (unlikely(iw + 1 == wnd->nwnd))
+			wbits = wnd->bits_last;
+
+		tail = wbits - wbit;
+		op = tail < bits ? tail : bits;
+
+		bh = wnd_map(wnd, iw);
+		if (IS_ERR(bh)) {
+			err = PTR_ERR(bh);
+			break;
+		}
+		buf = (ulong *)bh->b_data;
+
+		lock_buffer(bh);
+
+		__bitmap_set(buf, wbit, op);
+		wnd->free_bits[iw] -= op;
+
+		set_buffer_uptodate(bh);
+		mark_buffer_dirty(bh);
+		unlock_buffer(bh);
+		put_bh(bh);
+
+		wnd->total_zeroes -= op;
+		bits -= op;
+		wbit = 0;
+		iw += 1;
+	}
+
+	if (!RB_EMPTY_ROOT(&wnd->start_tree))
+		wnd_remove_free_ext(wnd, bit, bits0);
+
+	return err;
+}
+
+/*
+ * wnd_is_free_hlp
+ *
+ * Return: True if all clusters [bit, bit+bits) are free (bitmap only).
+ */
+static bool wnd_is_free_hlp(struct wnd_bitmap *wnd, size_t bit, size_t bits)
+{
+	struct super_block *sb = wnd->sb;
+	size_t iw = bit >> (sb->s_blocksize_bits + 3);
+	u32 wbits = 8 * sb->s_blocksize;
+	u32 wbit = bit & (wbits - 1);
+
+	while (iw < wnd->nwnd && bits) {
+		u32 tail, op;
+
+		if (unlikely(iw + 1 == wnd->nwnd))
+			wbits = wnd->bits_last;
+
+		tail = wbits - wbit;
+		op = tail < bits ? tail : bits;
+
+		if (wbits != wnd->free_bits[iw]) {
+			bool ret;
+			struct buffer_head *bh = wnd_map(wnd, iw);
+
+			if (IS_ERR(bh))
+				return false;
+
+			ret = are_bits_clear((ulong *)bh->b_data, wbit, op);
+
+			put_bh(bh);
+			if (!ret)
+				return false;
+		}
+
+		bits -= op;
+		wbit = 0;
+		iw += 1;
+	}
+
+	return true;
+}
+
+/*
+ * wnd_is_free
+ *
+ * Return: True if all clusters [bit, bit+bits) are free.
+ */
+bool wnd_is_free(struct wnd_bitmap *wnd, size_t bit, size_t bits)
+{
+	bool ret;
+	struct rb_node *n;
+	size_t end;
+	struct e_node *e;
+
+	if (RB_EMPTY_ROOT(&wnd->start_tree))
+		goto use_wnd;
+
+	n = rb_lookup(&wnd->start_tree, bit);
+	if (!n)
+		goto use_wnd;
+
+	e = rb_entry(n, struct e_node, start.node);
+
+	end = e->start.key + e->count.key;
+
+	if (bit < end && bit + bits <= end)
+		return true;
+
+use_wnd:
+	ret = wnd_is_free_hlp(wnd, bit, bits);
+
+	return ret;
+}
+
+/*
+ * wnd_is_used
+ *
+ * Return: True if all clusters [bit, bit+bits) are used.
+ */
+bool wnd_is_used(struct wnd_bitmap *wnd, size_t bit, size_t bits)
+{
+	bool ret = false;
+	struct super_block *sb = wnd->sb;
+	size_t iw = bit >> (sb->s_blocksize_bits + 3);
+	u32 wbits = 8 * sb->s_blocksize;
+	u32 wbit = bit & (wbits - 1);
+	size_t end;
+	struct rb_node *n;
+	struct e_node *e;
+
+	if (RB_EMPTY_ROOT(&wnd->start_tree))
+		goto use_wnd;
+
+	end = bit + bits;
+	n = rb_lookup(&wnd->start_tree, end - 1);
+	if (!n)
+		goto use_wnd;
+
+	e = rb_entry(n, struct e_node, start.node);
+	if (e->start.key + e->count.key > bit)
+		return false;
+
+use_wnd:
+	while (iw < wnd->nwnd && bits) {
+		u32 tail, op;
+
+		if (unlikely(iw + 1 == wnd->nwnd))
+			wbits = wnd->bits_last;
+
+		tail = wbits - wbit;
+		op = tail < bits ? tail : bits;
+
+		if (wnd->free_bits[iw]) {
+			bool ret;
+			struct buffer_head *bh = wnd_map(wnd, iw);
+
+			if (IS_ERR(bh))
+				goto out;
+
+			ret = are_bits_set((ulong *)bh->b_data, wbit, op);
+			put_bh(bh);
+			if (!ret)
+				goto out;
+		}
+
+		bits -= op;
+		wbit = 0;
+		iw += 1;
+	}
+	ret = true;
+
+out:
+	return ret;
+}
+
+/*
+ * wnd_find - Look for free space.
+ *
+ * - flags - BITMAP_FIND_XXX flags
+ *
+ * Return: 0 if not found.
+ */
+size_t wnd_find(struct wnd_bitmap *wnd, size_t to_alloc, size_t hint,
+		size_t flags, size_t *allocated)
+{
+	struct super_block *sb;
+	u32 wbits, wpos, wzbit, wzend;
+	size_t fnd, max_alloc, b_len, b_pos;
+	size_t iw, prev_tail, nwnd, wbit, ebit, zbit, zend;
+	size_t to_alloc0 = to_alloc;
+	const ulong *buf;
+	const struct e_node *e;
+	const struct rb_node *pr, *cr;
+	u8 log2_bits;
+	bool fbits_valid;
+	struct buffer_head *bh;
+
+	/* Fast checking for available free space. */
+	if (flags & BITMAP_FIND_FULL) {
+		size_t zeroes = wnd_zeroes(wnd);
+
+		zeroes -= wnd->zone_end - wnd->zone_bit;
+		if (zeroes < to_alloc0)
+			goto no_space;
+
+		if (to_alloc0 > wnd->extent_max)
+			goto no_space;
+	} else {
+		if (to_alloc > wnd->extent_max)
+			to_alloc = wnd->extent_max;
+	}
+
+	if (wnd->zone_bit <= hint && hint < wnd->zone_end)
+		hint = wnd->zone_end;
+
+	max_alloc = wnd->nbits;
+	b_len = b_pos = 0;
+
+	if (hint >= max_alloc)
+		hint = 0;
+
+	if (RB_EMPTY_ROOT(&wnd->start_tree)) {
+		if (wnd->uptodated == 1) {
+			/* Extents tree is updated -> No free space. */
+			goto no_space;
+		}
+		goto scan_bitmap;
+	}
+
+	e = NULL;
+	if (!hint)
+		goto allocate_biggest;
+
+	/* Use hint: Enumerate extents by start >= hint. */
+	pr = NULL;
+	cr = wnd->start_tree.rb_node;
+
+	for (;;) {
+		e = rb_entry(cr, struct e_node, start.node);
+
+		if (e->start.key == hint)
+			break;
+
+		if (e->start.key < hint) {
+			pr = cr;
+			cr = cr->rb_right;
+			if (!cr)
+				break;
+			continue;
+		}
+
+		cr = cr->rb_left;
+		if (!cr) {
+			e = pr ? rb_entry(pr, struct e_node, start.node) : NULL;
+			break;
+		}
+	}
+
+	if (!e)
+		goto allocate_biggest;
+
+	if (e->start.key + e->count.key > hint) {
+		/* We have found extension with 'hint' inside. */
+		size_t len = e->start.key + e->count.key - hint;
+
+		if (len >= to_alloc && hint + to_alloc <= max_alloc) {
+			fnd = hint;
+			goto found;
+		}
+
+		if (!(flags & BITMAP_FIND_FULL)) {
+			if (len > to_alloc)
+				len = to_alloc;
+
+			if (hint + len <= max_alloc) {
+				fnd = hint;
+				to_alloc = len;
+				goto found;
+			}
+		}
+	}
+
+allocate_biggest:
+	/* Allocate from biggest free extent. */
+	e = rb_entry(rb_first(&wnd->count_tree), struct e_node, count.node);
+	if (e->count.key != wnd->extent_max)
+		wnd->extent_max = e->count.key;
+
+	if (e->count.key < max_alloc) {
+		if (e->count.key >= to_alloc) {
+			;
+		} else if (flags & BITMAP_FIND_FULL) {
+			if (e->count.key < to_alloc0) {
+				/* Biggest free block is less then requested. */
+				goto no_space;
+			}
+			to_alloc = e->count.key;
+		} else if (-1 != wnd->uptodated) {
+			to_alloc = e->count.key;
+		} else {
+			/* Check if we can use more bits. */
+			size_t op, max_check;
+			struct rb_root start_tree;
+
+			memcpy(&start_tree, &wnd->start_tree,
+			       sizeof(struct rb_root));
+			memset(&wnd->start_tree, 0, sizeof(struct rb_root));
+
+			max_check = e->start.key + to_alloc;
+			if (max_check > max_alloc)
+				max_check = max_alloc;
+			for (op = e->start.key + e->count.key; op < max_check;
+			     op++) {
+				if (!wnd_is_free(wnd, op, 1))
+					break;
+			}
+			memcpy(&wnd->start_tree, &start_tree,
+			       sizeof(struct rb_root));
+			to_alloc = op - e->start.key;
+		}
+
+		/* Prepare to return. */
+		fnd = e->start.key;
+		if (e->start.key + to_alloc > max_alloc)
+			to_alloc = max_alloc - e->start.key;
+		goto found;
+	}
+
+	if (wnd->uptodated == 1) {
+		/* Extents tree is updated -> no free space. */
+		goto no_space;
+	}
+
+	b_len = e->count.key;
+	b_pos = e->start.key;
+
+scan_bitmap:
+	sb = wnd->sb;
+	log2_bits = sb->s_blocksize_bits + 3;
+
+	/* At most two ranges [hint, max_alloc) + [0, hint). */
+Again:
+
+	/* TODO: Optimize request for case nbits > wbits. */
+	iw = hint >> log2_bits;
+	wbits = sb->s_blocksize * 8;
+	wpos = hint & (wbits - 1);
+	prev_tail = 0;
+	fbits_valid = true;
+
+	if (max_alloc == wnd->nbits) {
+		nwnd = wnd->nwnd;
+	} else {
+		size_t t = max_alloc + wbits - 1;
+
+		nwnd = likely(t > max_alloc) ? (t >> log2_bits) : wnd->nwnd;
+	}
+
+	/* Enumerate all windows. */
+	for (; iw < nwnd; iw++) {
+		wbit = iw << log2_bits;
+
+		if (!wnd->free_bits[iw]) {
+			if (prev_tail > b_len) {
+				b_pos = wbit - prev_tail;
+				b_len = prev_tail;
+			}
+
+			/* Skip full used window. */
+			prev_tail = 0;
+			wpos = 0;
+			continue;
+		}
+
+		if (unlikely(iw + 1 == nwnd)) {
+			if (max_alloc == wnd->nbits) {
+				wbits = wnd->bits_last;
+			} else {
+				size_t t = max_alloc & (wbits - 1);
+
+				if (t) {
+					wbits = t;
+					fbits_valid = false;
+				}
+			}
+		}
+
+		if (wnd->zone_end > wnd->zone_bit) {
+			ebit = wbit + wbits;
+			zbit = max(wnd->zone_bit, wbit);
+			zend = min(wnd->zone_end, ebit);
+
+			/* Here we have a window [wbit, ebit) and zone [zbit, zend). */
+			if (zend <= zbit) {
+				/* Zone does not overlap window. */
+			} else {
+				wzbit = zbit - wbit;
+				wzend = zend - wbit;
+
+				/* Zone overlaps window. */
+				if (wnd->free_bits[iw] == wzend - wzbit) {
+					prev_tail = 0;
+					wpos = 0;
+					continue;
+				}
+
+				/* Scan two ranges window: [wbit, zbit) and [zend, ebit). */
+				bh = wnd_map(wnd, iw);
+
+				if (IS_ERR(bh)) {
+					/* TODO: Error */
+					prev_tail = 0;
+					wpos = 0;
+					continue;
+				}
+
+				buf = (ulong *)bh->b_data;
+
+				/* Scan range [wbit, zbit). */
+				if (wpos < wzbit) {
+					/* Scan range [wpos, zbit). */
+					fnd = wnd_scan(buf, wbit, wpos, wzbit,
+						       to_alloc, &prev_tail,
+						       &b_pos, &b_len);
+					if (fnd != MINUS_ONE_T) {
+						put_bh(bh);
+						goto found;
+					}
+				}
+
+				prev_tail = 0;
+
+				/* Scan range [zend, ebit). */
+				if (wzend < wbits) {
+					fnd = wnd_scan(buf, wbit,
+						       max(wzend, wpos), wbits,
+						       to_alloc, &prev_tail,
+						       &b_pos, &b_len);
+					if (fnd != MINUS_ONE_T) {
+						put_bh(bh);
+						goto found;
+					}
+				}
+
+				wpos = 0;
+				put_bh(bh);
+				continue;
+			}
+		}
+
+		/* Current window does not overlap zone. */
+		if (!wpos && fbits_valid && wnd->free_bits[iw] == wbits) {
+			/* Window is empty. */
+			if (prev_tail + wbits >= to_alloc) {
+				fnd = wbit + wpos - prev_tail;
+				goto found;
+			}
+
+			/* Increase 'prev_tail' and process next window. */
+			prev_tail += wbits;
+			wpos = 0;
+			continue;
+		}
+
+		/* Read window. */
+		bh = wnd_map(wnd, iw);
+		if (IS_ERR(bh)) {
+			// TODO: Error.
+			prev_tail = 0;
+			wpos = 0;
+			continue;
+		}
+
+		buf = (ulong *)bh->b_data;
+
+		/* Scan range [wpos, eBits). */
+		fnd = wnd_scan(buf, wbit, wpos, wbits, to_alloc, &prev_tail,
+			       &b_pos, &b_len);
+		put_bh(bh);
+		if (fnd != MINUS_ONE_T)
+			goto found;
+	}
+
+	if (b_len < prev_tail) {
+		/* The last fragment. */
+		b_len = prev_tail;
+		b_pos = max_alloc - prev_tail;
+	}
+
+	if (hint) {
+		/*
+		 * We have scanned range [hint max_alloc).
+		 * Prepare to scan range [0 hint + to_alloc).
+		 */
+		size_t nextmax = hint + to_alloc;
+
+		if (likely(nextmax >= hint) && nextmax < max_alloc)
+			max_alloc = nextmax;
+		hint = 0;
+		goto Again;
+	}
+
+	if (!b_len)
+		goto no_space;
+
+	wnd->extent_max = b_len;
+
+	if (flags & BITMAP_FIND_FULL)
+		goto no_space;
+
+	fnd = b_pos;
+	to_alloc = b_len;
+
+found:
+	if (flags & BITMAP_FIND_MARK_AS_USED) {
+		/* TODO: Optimize remove extent (pass 'e'?). */
+		if (wnd_set_used(wnd, fnd, to_alloc))
+			goto no_space;
+	} else if (wnd->extent_max != MINUS_ONE_T &&
+		   to_alloc > wnd->extent_max) {
+		wnd->extent_max = to_alloc;
+	}
+
+	*allocated = fnd;
+	return to_alloc;
+
+no_space:
+	return 0;
+}
+
+/*
+ * wnd_extend - Extend bitmap ($MFT bitmap).
+ */
+int wnd_extend(struct wnd_bitmap *wnd, size_t new_bits)
+{
+	int err;
+	struct super_block *sb = wnd->sb;
+	struct ntfs_sb_info *sbi = sb->s_fs_info;
+	u32 blocksize = sb->s_blocksize;
+	u32 wbits = blocksize * 8;
+	u32 b0, new_last;
+	size_t bits, iw, new_wnd;
+	size_t old_bits = wnd->nbits;
+	u16 *new_free;
+
+	if (new_bits <= old_bits)
+		return -EINVAL;
+
+	/* Align to 8 byte boundary. */
+	new_wnd = bytes_to_block(sb, bitmap_size(new_bits));
+	new_last = new_bits & (wbits - 1);
+	if (!new_last)
+		new_last = wbits;
+
+	if (new_wnd != wnd->nwnd) {
+		new_free = kmalloc(new_wnd * sizeof(u16), GFP_NOFS);
+		if (!new_free)
+			return -ENOMEM;
+
+		if (new_free != wnd->free_bits)
+			memcpy(new_free, wnd->free_bits,
+			       wnd->nwnd * sizeof(short));
+		memset(new_free + wnd->nwnd, 0,
+		       (new_wnd - wnd->nwnd) * sizeof(short));
+		kfree(wnd->free_bits);
+		wnd->free_bits = new_free;
+	}
+
+	/* Zero bits [old_bits,new_bits). */
+	bits = new_bits - old_bits;
+	b0 = old_bits & (wbits - 1);
+
+	for (iw = old_bits >> (sb->s_blocksize_bits + 3); bits; iw += 1) {
+		u32 op;
+		size_t frb;
+		u64 vbo, lbo, bytes;
+		struct buffer_head *bh;
+		ulong *buf;
+
+		if (iw + 1 == new_wnd)
+			wbits = new_last;
+
+		op = b0 + bits > wbits ? wbits - b0 : bits;
+		vbo = (u64)iw * blocksize;
+
+		err = ntfs_vbo_to_lbo(sbi, &wnd->run, vbo, &lbo, &bytes);
+		if (err)
+			break;
+
+		bh = ntfs_bread(sb, lbo >> sb->s_blocksize_bits);
+		if (!bh)
+			return -EIO;
+
+		lock_buffer(bh);
+		buf = (ulong *)bh->b_data;
+
+		__bitmap_clear(buf, b0, blocksize * 8 - b0);
+		frb = wbits - __bitmap_weight(buf, wbits);
+		wnd->total_zeroes += frb - wnd->free_bits[iw];
+		wnd->free_bits[iw] = frb;
+
+		set_buffer_uptodate(bh);
+		mark_buffer_dirty(bh);
+		unlock_buffer(bh);
+		/* err = sync_dirty_buffer(bh); */
+
+		b0 = 0;
+		bits -= op;
+	}
+
+	wnd->nbits = new_bits;
+	wnd->nwnd = new_wnd;
+	wnd->bits_last = new_last;
+
+	wnd_add_free_ext(wnd, old_bits, new_bits - old_bits, false);
+
+	return 0;
+}
+
+void wnd_zone_set(struct wnd_bitmap *wnd, size_t lcn, size_t len)
+{
+	size_t zlen;
+
+	zlen = wnd->zone_end - wnd->zone_bit;
+	if (zlen)
+		wnd_add_free_ext(wnd, wnd->zone_bit, zlen, false);
+
+	if (!RB_EMPTY_ROOT(&wnd->start_tree) && len)
+		wnd_remove_free_ext(wnd, lcn, len);
+
+	wnd->zone_bit = lcn;
+	wnd->zone_end = lcn + len;
+}
+
+int ntfs_trim_fs(struct ntfs_sb_info *sbi, struct fstrim_range *range)
+{
+	int err = 0;
+	struct super_block *sb = sbi->sb;
+	struct wnd_bitmap *wnd = &sbi->used.bitmap;
+	u32 wbits = 8 * sb->s_blocksize;
+	CLST len = 0, lcn = 0, done = 0;
+	CLST minlen = bytes_to_cluster(sbi, range->minlen);
+	CLST lcn_from = bytes_to_cluster(sbi, range->start);
+	size_t iw = lcn_from >> (sb->s_blocksize_bits + 3);
+	u32 wbit = lcn_from & (wbits - 1);
+	const ulong *buf;
+	CLST lcn_to;
+
+	if (!minlen)
+		minlen = 1;
+
+	if (range->len == (u64)-1)
+		lcn_to = wnd->nbits;
+	else
+		lcn_to = bytes_to_cluster(sbi, range->start + range->len);
+
+	down_read_nested(&wnd->rw_lock, BITMAP_MUTEX_CLUSTERS);
+
+	for (; iw < wnd->nbits; iw++, wbit = 0) {
+		CLST lcn_wnd = iw * wbits;
+		struct buffer_head *bh;
+
+		if (lcn_wnd > lcn_to)
+			break;
+
+		if (!wnd->free_bits[iw])
+			continue;
+
+		if (iw + 1 == wnd->nwnd)
+			wbits = wnd->bits_last;
+
+		if (lcn_wnd + wbits > lcn_to)
+			wbits = lcn_to - lcn_wnd;
+
+		bh = wnd_map(wnd, iw);
+		if (IS_ERR(bh)) {
+			err = PTR_ERR(bh);
+			break;
+		}
+
+		buf = (ulong *)bh->b_data;
+
+		for (; wbit < wbits; wbit++) {
+			if (!test_bit(wbit, buf)) {
+				if (!len)
+					lcn = lcn_wnd + wbit;
+				len += 1;
+				continue;
+			}
+			if (len >= minlen) {
+				err = ntfs_discard(sbi, lcn, len);
+				if (err)
+					goto out;
+				done += len;
+			}
+			len = 0;
+		}
+		put_bh(bh);
+	}
+
+	/* Process the last fragment. */
+	if (len >= minlen) {
+		err = ntfs_discard(sbi, lcn, len);
+		if (err)
+			goto out;
+		done += len;
+	}
+
+out:
+	range->len = (u64)done << sbi->cluster_bits;
+
+	up_read(&wnd->rw_lock);
+
+	return err;
+}
diff --git a/fs/ntfs3/debug.h b/fs/ntfs3/debug.h
new file mode 100644
index 000000000000..31120569a87b
--- /dev/null
+++ b/fs/ntfs3/debug.h
@@ -0,0 +1,52 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/*
+ *
+ * Copyright (C) 2019-2021 Paragon Software GmbH, All rights reserved.
+ *
+ * Useful functions for debugging.
+ *
+ */
+
+// clang-format off
+#ifndef _LINUX_NTFS3_DEBUG_H
+#define _LINUX_NTFS3_DEBUG_H
+
+#ifndef Add2Ptr
+#define Add2Ptr(P, I)		((void *)((u8 *)(P) + (I)))
+#define PtrOffset(B, O)		((size_t)((size_t)(O) - (size_t)(B)))
+#endif
+
+#ifdef CONFIG_PRINTK
+__printf(2, 3)
+void ntfs_printk(const struct super_block *sb, const char *fmt, ...);
+__printf(2, 3)
+void ntfs_inode_printk(struct inode *inode, const char *fmt, ...);
+#else
+static inline __printf(2, 3)
+void ntfs_printk(const struct super_block *sb, const char *fmt, ...)
+{
+}
+
+static inline __printf(2, 3)
+void ntfs_inode_printk(struct inode *inode, const char *fmt, ...)
+{
+}
+#endif
+
+/*
+ * Logging macros. Thanks Joe Perches <joe@perches.com> for implementation.
+ */
+
+#define ntfs_err(sb, fmt, ...)  ntfs_printk(sb, KERN_ERR fmt, ##__VA_ARGS__)
+#define ntfs_warn(sb, fmt, ...) ntfs_printk(sb, KERN_WARNING fmt, ##__VA_ARGS__)
+#define ntfs_info(sb, fmt, ...) ntfs_printk(sb, KERN_INFO fmt, ##__VA_ARGS__)
+#define ntfs_notice(sb, fmt, ...)                                              \
+	ntfs_printk(sb, KERN_NOTICE fmt, ##__VA_ARGS__)
+
+#define ntfs_inode_err(inode, fmt, ...)                                        \
+	ntfs_inode_printk(inode, KERN_ERR fmt, ##__VA_ARGS__)
+#define ntfs_inode_warn(inode, fmt, ...)                                       \
+	ntfs_inode_printk(inode, KERN_WARNING fmt, ##__VA_ARGS__)
+
+#endif /* _LINUX_NTFS3_DEBUG_H */
+// clang-format on
diff --git a/fs/ntfs3/dir.c b/fs/ntfs3/dir.c
new file mode 100644
index 000000000000..93f6d485564e
--- /dev/null
+++ b/fs/ntfs3/dir.c
@@ -0,0 +1,599 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ *
+ * Copyright (C) 2019-2021 Paragon Software GmbH, All rights reserved.
+ *
+ *  Directory handling functions for NTFS-based filesystems.
+ *
+ */
+
+#include <linux/blkdev.h>
+#include <linux/buffer_head.h>
+#include <linux/fs.h>
+#include <linux/iversion.h>
+#include <linux/nls.h>
+
+#include "debug.h"
+#include "ntfs.h"
+#include "ntfs_fs.h"
+
+/* Convert little endian UTF-16 to NLS string. */
+int ntfs_utf16_to_nls(struct ntfs_sb_info *sbi, const struct le_str *uni,
+		      u8 *buf, int buf_len)
+{
+	int ret, uni_len, warn;
+	const __le16 *ip;
+	u8 *op;
+	struct nls_table *nls = sbi->options.nls;
+
+	static_assert(sizeof(wchar_t) == sizeof(__le16));
+
+	if (!nls) {
+		/* UTF-16 -> UTF-8 */
+		ret = utf16s_to_utf8s((wchar_t *)uni->name, uni->len,
+				      UTF16_LITTLE_ENDIAN, buf, buf_len);
+		buf[ret] = '\0';
+		return ret;
+	}
+
+	ip = uni->name;
+	op = buf;
+	uni_len = uni->len;
+	warn = 0;
+
+	while (uni_len--) {
+		u16 ec;
+		int charlen;
+		char dump[5];
+
+		if (buf_len < NLS_MAX_CHARSET_SIZE) {
+			ntfs_warn(sbi->sb,
+				  "filename was truncated while converting.");
+			break;
+		}
+
+		ec = le16_to_cpu(*ip++);
+		charlen = nls->uni2char(ec, op, buf_len);
+
+		if (charlen > 0) {
+			op += charlen;
+			buf_len -= charlen;
+			continue;
+		}
+
+		*op++ = '_';
+		buf_len -= 1;
+		if (warn)
+			continue;
+
+		warn = 1;
+		hex_byte_pack(&dump[0], ec >> 8);
+		hex_byte_pack(&dump[2], ec);
+		dump[4] = 0;
+
+		ntfs_err(sbi->sb, "failed to convert \"%s\" to %s", dump,
+			 nls->charset);
+	}
+
+	*op = '\0';
+	return op - buf;
+}
+
+// clang-format off
+#define PLANE_SIZE	0x00010000
+
+#define SURROGATE_PAIR	0x0000d800
+#define SURROGATE_LOW	0x00000400
+#define SURROGATE_BITS	0x000003ff
+// clang-format on
+
+/*
+ * put_utf16 - Modified version of put_utf16 from fs/nls/nls_base.c
+ *
+ * Function is sparse warnings free.
+ */
+static inline void put_utf16(wchar_t *s, unsigned int c,
+			     enum utf16_endian endian)
+{
+	static_assert(sizeof(wchar_t) == sizeof(__le16));
+	static_assert(sizeof(wchar_t) == sizeof(__be16));
+
+	switch (endian) {
+	default:
+		*s = (wchar_t)c;
+		break;
+	case UTF16_LITTLE_ENDIAN:
+		*(__le16 *)s = __cpu_to_le16(c);
+		break;
+	case UTF16_BIG_ENDIAN:
+		*(__be16 *)s = __cpu_to_be16(c);
+		break;
+	}
+}
+
+/*
+ * _utf8s_to_utf16s
+ *
+ * Modified version of 'utf8s_to_utf16s' allows to
+ * detect -ENAMETOOLONG without writing out of expected maximum.
+ */
+static int _utf8s_to_utf16s(const u8 *s, int inlen, enum utf16_endian endian,
+			    wchar_t *pwcs, int maxout)
+{
+	u16 *op;
+	int size;
+	unicode_t u;
+
+	op = pwcs;
+	while (inlen > 0 && *s) {
+		if (*s & 0x80) {
+			size = utf8_to_utf32(s, inlen, &u);
+			if (size < 0)
+				return -EINVAL;
+			s += size;
+			inlen -= size;
+
+			if (u >= PLANE_SIZE) {
+				if (maxout < 2)
+					return -ENAMETOOLONG;
+
+				u -= PLANE_SIZE;
+				put_utf16(op++,
+					  SURROGATE_PAIR |
+						  ((u >> 10) & SURROGATE_BITS),
+					  endian);
+				put_utf16(op++,
+					  SURROGATE_PAIR | SURROGATE_LOW |
+						  (u & SURROGATE_BITS),
+					  endian);
+				maxout -= 2;
+			} else {
+				if (maxout < 1)
+					return -ENAMETOOLONG;
+
+				put_utf16(op++, u, endian);
+				maxout--;
+			}
+		} else {
+			if (maxout < 1)
+				return -ENAMETOOLONG;
+
+			put_utf16(op++, *s++, endian);
+			inlen--;
+			maxout--;
+		}
+	}
+	return op - pwcs;
+}
+
+/*
+ * ntfs_nls_to_utf16 - Convert input string to UTF-16.
+ * @name:	Input name.
+ * @name_len:	Input name length.
+ * @uni:	Destination memory.
+ * @max_ulen:	Destination memory.
+ * @endian:	Endian of target UTF-16 string.
+ *
+ * This function is called:
+ * - to create NTFS name
+ * - to create symlink
+ *
+ * Return: UTF-16 string length or error (if negative).
+ */
+int ntfs_nls_to_utf16(struct ntfs_sb_info *sbi, const u8 *name, u32 name_len,
+		      struct cpu_str *uni, u32 max_ulen,
+		      enum utf16_endian endian)
+{
+	int ret, slen;
+	const u8 *end;
+	struct nls_table *nls = sbi->options.nls;
+	u16 *uname = uni->name;
+
+	static_assert(sizeof(wchar_t) == sizeof(u16));
+
+	if (!nls) {
+		/* utf8 -> utf16 */
+		ret = _utf8s_to_utf16s(name, name_len, endian, uname, max_ulen);
+		uni->len = ret;
+		return ret;
+	}
+
+	for (ret = 0, end = name + name_len; name < end; ret++, name += slen) {
+		if (ret >= max_ulen)
+			return -ENAMETOOLONG;
+
+		slen = nls->char2uni(name, end - name, uname + ret);
+		if (!slen)
+			return -EINVAL;
+		if (slen < 0)
+			return slen;
+	}
+
+#ifdef __BIG_ENDIAN
+	if (endian == UTF16_LITTLE_ENDIAN) {
+		int i = ret;
+
+		while (i--) {
+			__cpu_to_le16s(uname);
+			uname++;
+		}
+	}
+#else
+	if (endian == UTF16_BIG_ENDIAN) {
+		int i = ret;
+
+		while (i--) {
+			__cpu_to_be16s(uname);
+			uname++;
+		}
+	}
+#endif
+
+	uni->len = ret;
+	return ret;
+}
+
+/*
+ * dir_search_u - Helper function.
+ */
+struct inode *dir_search_u(struct inode *dir, const struct cpu_str *uni,
+			   struct ntfs_fnd *fnd)
+{
+	int err = 0;
+	struct super_block *sb = dir->i_sb;
+	struct ntfs_sb_info *sbi = sb->s_fs_info;
+	struct ntfs_inode *ni = ntfs_i(dir);
+	struct NTFS_DE *e;
+	int diff;
+	struct inode *inode = NULL;
+	struct ntfs_fnd *fnd_a = NULL;
+
+	if (!fnd) {
+		fnd_a = fnd_get();
+		if (!fnd_a) {
+			err = -ENOMEM;
+			goto out;
+		}
+		fnd = fnd_a;
+	}
+
+	err = indx_find(&ni->dir, ni, NULL, uni, 0, sbi, &diff, &e, fnd);
+
+	if (err)
+		goto out;
+
+	if (diff) {
+		err = -ENOENT;
+		goto out;
+	}
+
+	inode = ntfs_iget5(sb, &e->ref, uni);
+	if (!IS_ERR(inode) && is_bad_inode(inode)) {
+		iput(inode);
+		err = -EINVAL;
+	}
+out:
+	fnd_put(fnd_a);
+
+	return err == -ENOENT ? NULL : err ? ERR_PTR(err) : inode;
+}
+
+static inline int ntfs_filldir(struct ntfs_sb_info *sbi, struct ntfs_inode *ni,
+			       const struct NTFS_DE *e, u8 *name,
+			       struct dir_context *ctx)
+{
+	const struct ATTR_FILE_NAME *fname;
+	unsigned long ino;
+	int name_len;
+	u32 dt_type;
+
+	fname = Add2Ptr(e, sizeof(struct NTFS_DE));
+
+	if (fname->type == FILE_NAME_DOS)
+		return 0;
+
+	if (!mi_is_ref(&ni->mi, &fname->home))
+		return 0;
+
+	ino = ino_get(&e->ref);
+
+	if (ino == MFT_REC_ROOT)
+		return 0;
+
+	/* Skip meta files. Unless option to show metafiles is set. */
+	if (!sbi->options.showmeta && ntfs_is_meta_file(sbi, ino))
+		return 0;
+
+	if (sbi->options.nohidden && (fname->dup.fa & FILE_ATTRIBUTE_HIDDEN))
+		return 0;
+
+	name_len = ntfs_utf16_to_nls(sbi, (struct le_str *)&fname->name_len,
+				     name, PATH_MAX);
+	if (name_len <= 0) {
+		ntfs_warn(sbi->sb, "failed to convert name for inode %lx.",
+			  ino);
+		return 0;
+	}
+
+	dt_type = (fname->dup.fa & FILE_ATTRIBUTE_DIRECTORY) ? DT_DIR : DT_REG;
+
+	return !dir_emit(ctx, (s8 *)name, name_len, ino, dt_type);
+}
+
+/*
+ * ntfs_read_hdr - Helper function for ntfs_readdir().
+ */
+static int ntfs_read_hdr(struct ntfs_sb_info *sbi, struct ntfs_inode *ni,
+			 const struct INDEX_HDR *hdr, u64 vbo, u64 pos,
+			 u8 *name, struct dir_context *ctx)
+{
+	int err;
+	const struct NTFS_DE *e;
+	u32 e_size;
+	u32 end = le32_to_cpu(hdr->used);
+	u32 off = le32_to_cpu(hdr->de_off);
+
+	for (;; off += e_size) {
+		if (off + sizeof(struct NTFS_DE) > end)
+			return -1;
+
+		e = Add2Ptr(hdr, off);
+		e_size = le16_to_cpu(e->size);
+		if (e_size < sizeof(struct NTFS_DE) || off + e_size > end)
+			return -1;
+
+		if (de_is_last(e))
+			return 0;
+
+		/* Skip already enumerated. */
+		if (vbo + off < pos)
+			continue;
+
+		if (le16_to_cpu(e->key_size) < SIZEOF_ATTRIBUTE_FILENAME)
+			return -1;
+
+		ctx->pos = vbo + off;
+
+		/* Submit the name to the filldir callback. */
+		err = ntfs_filldir(sbi, ni, e, name, ctx);
+		if (err)
+			return err;
+	}
+}
+
+/*
+ * ntfs_readdir - file_operations::iterate_shared
+ *
+ * Use non sorted enumeration.
+ * We have an example of broken volume where sorted enumeration
+ * counts each name twice.
+ */
+static int ntfs_readdir(struct file *file, struct dir_context *ctx)
+{
+	const struct INDEX_ROOT *root;
+	u64 vbo;
+	size_t bit;
+	loff_t eod;
+	int err = 0;
+	struct inode *dir = file_inode(file);
+	struct ntfs_inode *ni = ntfs_i(dir);
+	struct super_block *sb = dir->i_sb;
+	struct ntfs_sb_info *sbi = sb->s_fs_info;
+	loff_t i_size = i_size_read(dir);
+	u32 pos = ctx->pos;
+	u8 *name = NULL;
+	struct indx_node *node = NULL;
+	u8 index_bits = ni->dir.index_bits;
+
+	/* Name is a buffer of PATH_MAX length. */
+	static_assert(NTFS_NAME_LEN * 4 < PATH_MAX);
+
+	eod = i_size + sbi->record_size;
+
+	if (pos >= eod)
+		return 0;
+
+	if (!dir_emit_dots(file, ctx))
+		return 0;
+
+	/* Allocate PATH_MAX bytes. */
+	name = __getname();
+	if (!name)
+		return -ENOMEM;
+
+	if (!ni->mi_loaded && ni->attr_list.size) {
+		/*
+		 * Directory inode is locked for read.
+		 * Load all subrecords to avoid 'write' access to 'ni' during
+		 * directory reading.
+		 */
+		ni_lock(ni);
+		if (!ni->mi_loaded && ni->attr_list.size) {
+			err = ni_load_all_mi(ni);
+			if (!err)
+				ni->mi_loaded = true;
+		}
+		ni_unlock(ni);
+		if (err)
+			goto out;
+	}
+
+	root = indx_get_root(&ni->dir, ni, NULL, NULL);
+	if (!root) {
+		err = -EINVAL;
+		goto out;
+	}
+
+	if (pos >= sbi->record_size) {
+		bit = (pos - sbi->record_size) >> index_bits;
+	} else {
+		err = ntfs_read_hdr(sbi, ni, &root->ihdr, 0, pos, name, ctx);
+		if (err)
+			goto out;
+		bit = 0;
+	}
+
+	if (!i_size) {
+		ctx->pos = eod;
+		goto out;
+	}
+
+	for (;;) {
+		vbo = (u64)bit << index_bits;
+		if (vbo >= i_size) {
+			ctx->pos = eod;
+			goto out;
+		}
+
+		err = indx_used_bit(&ni->dir, ni, &bit);
+		if (err)
+			goto out;
+
+		if (bit == MINUS_ONE_T) {
+			ctx->pos = eod;
+			goto out;
+		}
+
+		vbo = (u64)bit << index_bits;
+		if (vbo >= i_size) {
+			ntfs_inode_err(dir, "Looks like your dir is corrupt");
+			err = -EINVAL;
+			goto out;
+		}
+
+		err = indx_read(&ni->dir, ni, bit << ni->dir.idx2vbn_bits,
+				&node);
+		if (err)
+			goto out;
+
+		err = ntfs_read_hdr(sbi, ni, &node->index->ihdr,
+				    vbo + sbi->record_size, pos, name, ctx);
+		if (err)
+			goto out;
+
+		bit += 1;
+	}
+
+out:
+
+	__putname(name);
+	put_indx_node(node);
+
+	if (err == -ENOENT) {
+		err = 0;
+		ctx->pos = pos;
+	}
+
+	return err;
+}
+
+static int ntfs_dir_count(struct inode *dir, bool *is_empty, size_t *dirs,
+			  size_t *files)
+{
+	int err = 0;
+	struct ntfs_inode *ni = ntfs_i(dir);
+	struct NTFS_DE *e = NULL;
+	struct INDEX_ROOT *root;
+	struct INDEX_HDR *hdr;
+	const struct ATTR_FILE_NAME *fname;
+	u32 e_size, off, end;
+	u64 vbo = 0;
+	size_t drs = 0, fles = 0, bit = 0;
+	loff_t i_size = ni->vfs_inode.i_size;
+	struct indx_node *node = NULL;
+	u8 index_bits = ni->dir.index_bits;
+
+	if (is_empty)
+		*is_empty = true;
+
+	root = indx_get_root(&ni->dir, ni, NULL, NULL);
+	if (!root)
+		return -EINVAL;
+
+	hdr = &root->ihdr;
+
+	for (;;) {
+		end = le32_to_cpu(hdr->used);
+		off = le32_to_cpu(hdr->de_off);
+
+		for (; off + sizeof(struct NTFS_DE) <= end; off += e_size) {
+			e = Add2Ptr(hdr, off);
+			e_size = le16_to_cpu(e->size);
+			if (e_size < sizeof(struct NTFS_DE) ||
+			    off + e_size > end)
+				break;
+
+			if (de_is_last(e))
+				break;
+
+			fname = de_get_fname(e);
+			if (!fname)
+				continue;
+
+			if (fname->type == FILE_NAME_DOS)
+				continue;
+
+			if (is_empty) {
+				*is_empty = false;
+				if (!dirs && !files)
+					goto out;
+			}
+
+			if (fname->dup.fa & FILE_ATTRIBUTE_DIRECTORY)
+				drs += 1;
+			else
+				fles += 1;
+		}
+
+		if (vbo >= i_size)
+			goto out;
+
+		err = indx_used_bit(&ni->dir, ni, &bit);
+		if (err)
+			goto out;
+
+		if (bit == MINUS_ONE_T)
+			goto out;
+
+		vbo = (u64)bit << index_bits;
+		if (vbo >= i_size)
+			goto out;
+
+		err = indx_read(&ni->dir, ni, bit << ni->dir.idx2vbn_bits,
+				&node);
+		if (err)
+			goto out;
+
+		hdr = &node->index->ihdr;
+		bit += 1;
+		vbo = (u64)bit << ni->dir.idx2vbn_bits;
+	}
+
+out:
+	put_indx_node(node);
+	if (dirs)
+		*dirs = drs;
+	if (files)
+		*files = fles;
+
+	return err;
+}
+
+bool dir_is_empty(struct inode *dir)
+{
+	bool is_empty = false;
+
+	ntfs_dir_count(dir, &is_empty, NULL, NULL);
+
+	return is_empty;
+}
+
+// clang-format off
+const struct file_operations ntfs_dir_operations = {
+	.llseek		= generic_file_llseek,
+	.read		= generic_read_dir,
+	.iterate_shared	= ntfs_readdir,
+	.fsync		= generic_file_fsync,
+	.open		= ntfs_file_open,
+};
+// clang-format on
diff --git a/fs/ntfs3/file.c b/fs/ntfs3/file.c
new file mode 100644
index 000000000000..424450e77ad5
--- /dev/null
+++ b/fs/ntfs3/file.c
@@ -0,0 +1,1251 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ *
+ * Copyright (C) 2019-2021 Paragon Software GmbH, All rights reserved.
+ *
+ *  Regular file handling primitives for NTFS-based filesystems.
+ *
+ */
+
+#include <linux/backing-dev.h>
+#include <linux/buffer_head.h>
+#include <linux/compat.h>
+#include <linux/falloc.h>
+#include <linux/fiemap.h>
+#include <linux/nls.h>
+
+#include "debug.h"
+#include "ntfs.h"
+#include "ntfs_fs.h"
+
+static int ntfs_ioctl_fitrim(struct ntfs_sb_info *sbi, unsigned long arg)
+{
+	struct fstrim_range __user *user_range;
+	struct fstrim_range range;
+	struct request_queue *q = bdev_get_queue(sbi->sb->s_bdev);
+	int err;
+
+	if (!capable(CAP_SYS_ADMIN))
+		return -EPERM;
+
+	if (!blk_queue_discard(q))
+		return -EOPNOTSUPP;
+
+	user_range = (struct fstrim_range __user *)arg;
+	if (copy_from_user(&range, user_range, sizeof(range)))
+		return -EFAULT;
+
+	range.minlen = max_t(u32, range.minlen, q->limits.discard_granularity);
+
+	err = ntfs_trim_fs(sbi, &range);
+	if (err < 0)
+		return err;
+
+	if (copy_to_user(user_range, &range, sizeof(range)))
+		return -EFAULT;
+
+	return 0;
+}
+
+static long ntfs_ioctl(struct file *filp, u32 cmd, unsigned long arg)
+{
+	struct inode *inode = file_inode(filp);
+	struct ntfs_sb_info *sbi = inode->i_sb->s_fs_info;
+
+	switch (cmd) {
+	case FITRIM:
+		return ntfs_ioctl_fitrim(sbi, arg);
+	}
+	return -ENOTTY; /* Inappropriate ioctl for device. */
+}
+
+#ifdef CONFIG_COMPAT
+static long ntfs_compat_ioctl(struct file *filp, u32 cmd, unsigned long arg)
+
+{
+	return ntfs_ioctl(filp, cmd, (unsigned long)compat_ptr(arg));
+}
+#endif
+
+/*
+ * ntfs_getattr - inode_operations::getattr
+ */
+int ntfs_getattr(struct user_namespace *mnt_userns, const struct path *path,
+		 struct kstat *stat, u32 request_mask, u32 flags)
+{
+	struct inode *inode = d_inode(path->dentry);
+	struct ntfs_inode *ni = ntfs_i(inode);
+
+	if (is_compressed(ni))
+		stat->attributes |= STATX_ATTR_COMPRESSED;
+
+	if (is_encrypted(ni))
+		stat->attributes |= STATX_ATTR_ENCRYPTED;
+
+	stat->attributes_mask |= STATX_ATTR_COMPRESSED | STATX_ATTR_ENCRYPTED;
+
+	generic_fillattr(mnt_userns, inode, stat);
+
+	stat->result_mask |= STATX_BTIME;
+	stat->btime = ni->i_crtime;
+	stat->blksize = ni->mi.sbi->cluster_size; /* 512, 1K, ..., 2M */
+
+	return 0;
+}
+
+static int ntfs_extend_initialized_size(struct file *file,
+					struct ntfs_inode *ni,
+					const loff_t valid,
+					const loff_t new_valid)
+{
+	struct inode *inode = &ni->vfs_inode;
+	struct address_space *mapping = inode->i_mapping;
+	struct ntfs_sb_info *sbi = inode->i_sb->s_fs_info;
+	loff_t pos = valid;
+	int err;
+
+	if (is_resident(ni)) {
+		ni->i_valid = new_valid;
+		return 0;
+	}
+
+	WARN_ON(is_compressed(ni));
+	WARN_ON(valid >= new_valid);
+
+	for (;;) {
+		u32 zerofrom, len;
+		struct page *page;
+		void *fsdata;
+		u8 bits;
+		CLST vcn, lcn, clen;
+
+		if (is_sparsed(ni)) {
+			bits = sbi->cluster_bits;
+			vcn = pos >> bits;
+
+			err = attr_data_get_block(ni, vcn, 0, &lcn, &clen,
+						  NULL);
+			if (err)
+				goto out;
+
+			if (lcn == SPARSE_LCN) {
+				loff_t vbo = (loff_t)vcn << bits;
+				loff_t to = vbo + ((loff_t)clen << bits);
+
+				if (to <= new_valid) {
+					ni->i_valid = to;
+					pos = to;
+					goto next;
+				}
+
+				if (vbo < pos) {
+					pos = vbo;
+				} else {
+					to = (new_valid >> bits) << bits;
+					if (pos < to) {
+						ni->i_valid = to;
+						pos = to;
+						goto next;
+					}
+				}
+			}
+		}
+
+		zerofrom = pos & (PAGE_SIZE - 1);
+		len = PAGE_SIZE - zerofrom;
+
+		if (pos + len > new_valid)
+			len = new_valid - pos;
+
+		err = pagecache_write_begin(file, mapping, pos, len, 0, &page,
+					    &fsdata);
+		if (err)
+			goto out;
+
+		zero_user_segment(page, zerofrom, PAGE_SIZE);
+
+		/* This function in any case puts page. */
+		err = pagecache_write_end(file, mapping, pos, len, len, page,
+					  fsdata);
+		if (err < 0)
+			goto out;
+		pos += len;
+
+next:
+		if (pos >= new_valid)
+			break;
+
+		balance_dirty_pages_ratelimited(mapping);
+		cond_resched();
+	}
+
+	return 0;
+
+out:
+	ni->i_valid = valid;
+	ntfs_inode_warn(inode, "failed to extend initialized size to %llx.",
+			new_valid);
+	return err;
+}
+
+/*
+ * ntfs_zero_range - Helper function for punch_hole.
+ *
+ * It zeroes a range [vbo, vbo_to).
+ */
+static int ntfs_zero_range(struct inode *inode, u64 vbo, u64 vbo_to)
+{
+	int err = 0;
+	struct address_space *mapping = inode->i_mapping;
+	u32 blocksize = 1 << inode->i_blkbits;
+	pgoff_t idx = vbo >> PAGE_SHIFT;
+	u32 z_start = vbo & (PAGE_SIZE - 1);
+	pgoff_t idx_end = (vbo_to + PAGE_SIZE - 1) >> PAGE_SHIFT;
+	loff_t page_off;
+	struct buffer_head *head, *bh;
+	u32 bh_next, bh_off, z_end;
+	sector_t iblock;
+	struct page *page;
+
+	for (; idx < idx_end; idx += 1, z_start = 0) {
+		page_off = (loff_t)idx << PAGE_SHIFT;
+		z_end = (page_off + PAGE_SIZE) > vbo_to ? (vbo_to - page_off)
+							: PAGE_SIZE;
+		iblock = page_off >> inode->i_blkbits;
+
+		page = find_or_create_page(mapping, idx,
+					   mapping_gfp_constraint(mapping,
+								  ~__GFP_FS));
+		if (!page)
+			return -ENOMEM;
+
+		if (!page_has_buffers(page))
+			create_empty_buffers(page, blocksize, 0);
+
+		bh = head = page_buffers(page);
+		bh_off = 0;
+		do {
+			bh_next = bh_off + blocksize;
+
+			if (bh_next <= z_start || bh_off >= z_end)
+				continue;
+
+			if (!buffer_mapped(bh)) {
+				ntfs_get_block(inode, iblock, bh, 0);
+				/* Unmapped? It's a hole - nothing to do. */
+				if (!buffer_mapped(bh))
+					continue;
+			}
+
+			/* Ok, it's mapped. Make sure it's up-to-date. */
+			if (PageUptodate(page))
+				set_buffer_uptodate(bh);
+
+			if (!buffer_uptodate(bh)) {
+				lock_buffer(bh);
+				bh->b_end_io = end_buffer_read_sync;
+				get_bh(bh);
+				submit_bh(REQ_OP_READ, 0, bh);
+
+				wait_on_buffer(bh);
+				if (!buffer_uptodate(bh)) {
+					unlock_page(page);
+					put_page(page);
+					err = -EIO;
+					goto out;
+				}
+			}
+
+			mark_buffer_dirty(bh);
+
+		} while (bh_off = bh_next, iblock += 1,
+			 head != (bh = bh->b_this_page));
+
+		zero_user_segment(page, z_start, z_end);
+
+		unlock_page(page);
+		put_page(page);
+		cond_resched();
+	}
+out:
+	mark_inode_dirty(inode);
+	return err;
+}
+
+/*
+ * ntfs_sparse_cluster - Helper function to zero a new allocated clusters.
+ *
+ * NOTE: 512 <= cluster size <= 2M
+ */
+void ntfs_sparse_cluster(struct inode *inode, struct page *page0, CLST vcn,
+			 CLST len)
+{
+	struct address_space *mapping = inode->i_mapping;
+	struct ntfs_sb_info *sbi = inode->i_sb->s_fs_info;
+	u64 vbo = (u64)vcn << sbi->cluster_bits;
+	u64 bytes = (u64)len << sbi->cluster_bits;
+	u32 blocksize = 1 << inode->i_blkbits;
+	pgoff_t idx0 = page0 ? page0->index : -1;
+	loff_t vbo_clst = vbo & sbi->cluster_mask_inv;
+	loff_t end = ntfs_up_cluster(sbi, vbo + bytes);
+	pgoff_t idx = vbo_clst >> PAGE_SHIFT;
+	u32 from = vbo_clst & (PAGE_SIZE - 1);
+	pgoff_t idx_end = (end + PAGE_SIZE - 1) >> PAGE_SHIFT;
+	loff_t page_off;
+	u32 to;
+	bool partial;
+	struct page *page;
+
+	for (; idx < idx_end; idx += 1, from = 0) {
+		page = idx == idx0 ? page0 : grab_cache_page(mapping, idx);
+
+		if (!page)
+			continue;
+
+		page_off = (loff_t)idx << PAGE_SHIFT;
+		to = (page_off + PAGE_SIZE) > end ? (end - page_off)
+						  : PAGE_SIZE;
+		partial = false;
+
+		if ((from || PAGE_SIZE != to) &&
+		    likely(!page_has_buffers(page))) {
+			create_empty_buffers(page, blocksize, 0);
+		}
+
+		if (page_has_buffers(page)) {
+			struct buffer_head *head, *bh;
+			u32 bh_off = 0;
+
+			bh = head = page_buffers(page);
+			do {
+				u32 bh_next = bh_off + blocksize;
+
+				if (from <= bh_off && bh_next <= to) {
+					set_buffer_uptodate(bh);
+					mark_buffer_dirty(bh);
+				} else if (!buffer_uptodate(bh)) {
+					partial = true;
+				}
+				bh_off = bh_next;
+			} while (head != (bh = bh->b_this_page));
+		}
+
+		zero_user_segment(page, from, to);
+
+		if (!partial) {
+			if (!PageUptodate(page))
+				SetPageUptodate(page);
+			set_page_dirty(page);
+		}
+
+		if (idx != idx0) {
+			unlock_page(page);
+			put_page(page);
+		}
+		cond_resched();
+	}
+	mark_inode_dirty(inode);
+}
+
+/*
+ * ntfs_file_mmap - file_operations::mmap
+ */
+static int ntfs_file_mmap(struct file *file, struct vm_area_struct *vma)
+{
+	struct address_space *mapping = file->f_mapping;
+	struct inode *inode = mapping->host;
+	struct ntfs_inode *ni = ntfs_i(inode);
+	u64 from = ((u64)vma->vm_pgoff << PAGE_SHIFT);
+	bool rw = vma->vm_flags & VM_WRITE;
+	int err;
+
+	if (is_encrypted(ni)) {
+		ntfs_inode_warn(inode, "mmap encrypted not supported");
+		return -EOPNOTSUPP;
+	}
+
+	if (is_dedup(ni)) {
+		ntfs_inode_warn(inode, "mmap deduplicated not supported");
+		return -EOPNOTSUPP;
+	}
+
+	if (is_compressed(ni) && rw) {
+		ntfs_inode_warn(inode, "mmap(write) compressed not supported");
+		return -EOPNOTSUPP;
+	}
+
+	if (rw) {
+		u64 to = min_t(loff_t, i_size_read(inode),
+			       from + vma->vm_end - vma->vm_start);
+
+		if (is_sparsed(ni)) {
+			/* Allocate clusters for rw map. */
+			struct ntfs_sb_info *sbi = inode->i_sb->s_fs_info;
+			CLST lcn, len;
+			CLST vcn = from >> sbi->cluster_bits;
+			CLST end = bytes_to_cluster(sbi, to);
+			bool new;
+
+			for (; vcn < end; vcn += len) {
+				err = attr_data_get_block(ni, vcn, 1, &lcn,
+							  &len, &new);
+				if (err)
+					goto out;
+
+				if (!new)
+					continue;
+				ntfs_sparse_cluster(inode, NULL, vcn, 1);
+			}
+		}
+
+		if (ni->i_valid < to) {
+			if (!inode_trylock(inode)) {
+				err = -EAGAIN;
+				goto out;
+			}
+			err = ntfs_extend_initialized_size(file, ni,
+							   ni->i_valid, to);
+			inode_unlock(inode);
+			if (err)
+				goto out;
+		}
+	}
+
+	err = generic_file_mmap(file, vma);
+out:
+	return err;
+}
+
+static int ntfs_extend(struct inode *inode, loff_t pos, size_t count,
+		       struct file *file)
+{
+	struct ntfs_inode *ni = ntfs_i(inode);
+	struct address_space *mapping = inode->i_mapping;
+	loff_t end = pos + count;
+	bool extend_init = file && pos > ni->i_valid;
+	int err;
+
+	if (end <= inode->i_size && !extend_init)
+		return 0;
+
+	/* Mark rw ntfs as dirty. It will be cleared at umount. */
+	ntfs_set_state(ni->mi.sbi, NTFS_DIRTY_DIRTY);
+
+	if (end > inode->i_size) {
+		err = ntfs_set_size(inode, end);
+		if (err)
+			goto out;
+		inode->i_size = end;
+	}
+
+	if (extend_init && !is_compressed(ni)) {
+		err = ntfs_extend_initialized_size(file, ni, ni->i_valid, pos);
+		if (err)
+			goto out;
+	} else {
+		err = 0;
+	}
+
+	inode->i_ctime = inode->i_mtime = current_time(inode);
+	mark_inode_dirty(inode);
+
+	if (IS_SYNC(inode)) {
+		int err2;
+
+		err = filemap_fdatawrite_range(mapping, pos, end - 1);
+		err2 = sync_mapping_buffers(mapping);
+		if (!err)
+			err = err2;
+		err2 = write_inode_now(inode, 1);
+		if (!err)
+			err = err2;
+		if (!err)
+			err = filemap_fdatawait_range(mapping, pos, end - 1);
+	}
+
+out:
+	return err;
+}
+
+static int ntfs_truncate(struct inode *inode, loff_t new_size)
+{
+	struct super_block *sb = inode->i_sb;
+	struct ntfs_inode *ni = ntfs_i(inode);
+	int err, dirty = 0;
+	u64 new_valid;
+
+	if (!S_ISREG(inode->i_mode))
+		return 0;
+
+	if (is_compressed(ni)) {
+		if (ni->i_valid > new_size)
+			ni->i_valid = new_size;
+	} else {
+		err = block_truncate_page(inode->i_mapping, new_size,
+					  ntfs_get_block);
+		if (err)
+			return err;
+	}
+
+	new_valid = ntfs_up_block(sb, min_t(u64, ni->i_valid, new_size));
+
+	ni_lock(ni);
+
+	truncate_setsize(inode, new_size);
+
+	down_write(&ni->file.run_lock);
+	err = attr_set_size(ni, ATTR_DATA, NULL, 0, &ni->file.run, new_size,
+			    &new_valid, true, NULL);
+	up_write(&ni->file.run_lock);
+
+	if (new_valid < ni->i_valid)
+		ni->i_valid = new_valid;
+
+	ni_unlock(ni);
+
+	ni->std_fa |= FILE_ATTRIBUTE_ARCHIVE;
+	inode->i_ctime = inode->i_mtime = current_time(inode);
+	if (!IS_DIRSYNC(inode)) {
+		dirty = 1;
+	} else {
+		err = ntfs_sync_inode(inode);
+		if (err)
+			return err;
+	}
+
+	if (dirty)
+		mark_inode_dirty(inode);
+
+	/*ntfs_flush_inodes(inode->i_sb, inode, NULL);*/
+
+	return 0;
+}
+
+/*
+ * ntfs_fallocate
+ *
+ * Preallocate space for a file. This implements ntfs's fallocate file
+ * operation, which gets called from sys_fallocate system call. User
+ * space requests 'len' bytes at 'vbo'. If FALLOC_FL_KEEP_SIZE is set
+ * we just allocate clusters without zeroing them out. Otherwise we
+ * allocate and zero out clusters via an expanding truncate.
+ */
+static long ntfs_fallocate(struct file *file, int mode, loff_t vbo, loff_t len)
+{
+	struct inode *inode = file->f_mapping->host;
+	struct super_block *sb = inode->i_sb;
+	struct ntfs_sb_info *sbi = sb->s_fs_info;
+	struct ntfs_inode *ni = ntfs_i(inode);
+	loff_t end = vbo + len;
+	loff_t vbo_down = round_down(vbo, PAGE_SIZE);
+	loff_t i_size;
+	int err;
+
+	/* No support for dir. */
+	if (!S_ISREG(inode->i_mode))
+		return -EOPNOTSUPP;
+
+	/* Return error if mode is not supported. */
+	if (mode & ~(FALLOC_FL_KEEP_SIZE | FALLOC_FL_PUNCH_HOLE |
+		     FALLOC_FL_COLLAPSE_RANGE)) {
+		ntfs_inode_warn(inode, "fallocate(0x%x) is not supported",
+				mode);
+		return -EOPNOTSUPP;
+	}
+
+	ntfs_set_state(sbi, NTFS_DIRTY_DIRTY);
+
+	inode_lock(inode);
+	i_size = inode->i_size;
+
+	if (WARN_ON(ni->ni_flags & NI_FLAG_COMPRESSED_MASK)) {
+		/* Should never be here, see ntfs_file_open. */
+		err = -EOPNOTSUPP;
+		goto out;
+	}
+
+	if (mode & FALLOC_FL_PUNCH_HOLE) {
+		u32 frame_size;
+		loff_t mask, vbo_a, end_a, tmp;
+
+		if (!(mode & FALLOC_FL_KEEP_SIZE)) {
+			err = -EINVAL;
+			goto out;
+		}
+
+		err = filemap_write_and_wait_range(inode->i_mapping, vbo,
+						   end - 1);
+		if (err)
+			goto out;
+
+		err = filemap_write_and_wait_range(inode->i_mapping, end,
+						   LLONG_MAX);
+		if (err)
+			goto out;
+
+		inode_dio_wait(inode);
+
+		truncate_pagecache(inode, vbo_down);
+
+		if (!is_sparsed(ni) && !is_compressed(ni)) {
+			/* Normal file. */
+			err = ntfs_zero_range(inode, vbo, end);
+			goto out;
+		}
+
+		ni_lock(ni);
+		err = attr_punch_hole(ni, vbo, len, &frame_size);
+		ni_unlock(ni);
+		if (err != E_NTFS_NOTALIGNED)
+			goto out;
+
+		/* Process not aligned punch. */
+		mask = frame_size - 1;
+		vbo_a = (vbo + mask) & ~mask;
+		end_a = end & ~mask;
+
+		tmp = min(vbo_a, end);
+		if (tmp > vbo) {
+			err = ntfs_zero_range(inode, vbo, tmp);
+			if (err)
+				goto out;
+		}
+
+		if (vbo < end_a && end_a < end) {
+			err = ntfs_zero_range(inode, end_a, end);
+			if (err)
+				goto out;
+		}
+
+		/* Aligned punch_hole */
+		if (end_a > vbo_a) {
+			ni_lock(ni);
+			err = attr_punch_hole(ni, vbo_a, end_a - vbo_a, NULL);
+			ni_unlock(ni);
+		}
+	} else if (mode & FALLOC_FL_COLLAPSE_RANGE) {
+		if (mode & ~FALLOC_FL_COLLAPSE_RANGE) {
+			err = -EINVAL;
+			goto out;
+		}
+
+		/*
+		 * Write tail of the last page before removed range since
+		 * it will get removed from the page cache below.
+		 */
+		err = filemap_write_and_wait_range(inode->i_mapping, vbo_down,
+						   vbo);
+		if (err)
+			goto out;
+
+		/*
+		 * Write data that will be shifted to preserve them
+		 * when discarding page cache below.
+		 */
+		err = filemap_write_and_wait_range(inode->i_mapping, end,
+						   LLONG_MAX);
+		if (err)
+			goto out;
+
+		/* Wait for existing dio to complete. */
+		inode_dio_wait(inode);
+
+		truncate_pagecache(inode, vbo_down);
+
+		ni_lock(ni);
+		err = attr_collapse_range(ni, vbo, len);
+		ni_unlock(ni);
+	} else {
+		/*
+		 * Normal file: Allocate clusters, do not change 'valid' size.
+		 */
+		err = ntfs_set_size(inode, max(end, i_size));
+		if (err)
+			goto out;
+
+		if (is_sparsed(ni) || is_compressed(ni)) {
+			CLST vcn_v = ni->i_valid >> sbi->cluster_bits;
+			CLST vcn = vbo >> sbi->cluster_bits;
+			CLST cend = bytes_to_cluster(sbi, end);
+			CLST lcn, clen;
+			bool new;
+
+			/*
+			 * Allocate but do not zero new clusters. (see below comments)
+			 * This breaks security: One can read unused on-disk areas.
+			 * Zeroing these clusters may be too long.
+			 * Maybe we should check here for root rights?
+			 */
+			for (; vcn < cend; vcn += clen) {
+				err = attr_data_get_block(ni, vcn, cend - vcn,
+							  &lcn, &clen, &new);
+				if (err)
+					goto out;
+				if (!new || vcn >= vcn_v)
+					continue;
+
+				/*
+				 * Unwritten area.
+				 * NTFS is not able to store several unwritten areas.
+				 * Activate 'ntfs_sparse_cluster' to zero new allocated clusters.
+				 *
+				 * Dangerous in case:
+				 * 1G of sparsed clusters + 1 cluster of data =>
+				 * valid_size == 1G + 1 cluster
+				 * fallocate(1G) will zero 1G and this can be very long
+				 * xfstest 016/086 will fail without 'ntfs_sparse_cluster'.
+				 */
+				ntfs_sparse_cluster(inode, NULL, vcn,
+						    min(vcn_v - vcn, clen));
+			}
+		}
+
+		if (mode & FALLOC_FL_KEEP_SIZE) {
+			ni_lock(ni);
+			/* True - Keep preallocated. */
+			err = attr_set_size(ni, ATTR_DATA, NULL, 0,
+					    &ni->file.run, i_size, &ni->i_valid,
+					    true, NULL);
+			ni_unlock(ni);
+		}
+	}
+
+out:
+	if (err == -EFBIG)
+		err = -ENOSPC;
+
+	if (!err) {
+		inode->i_ctime = inode->i_mtime = current_time(inode);
+		mark_inode_dirty(inode);
+	}
+
+	inode_unlock(inode);
+	return err;
+}
+
+/*
+ * ntfs3_setattr - inode_operations::setattr
+ */
+int ntfs3_setattr(struct user_namespace *mnt_userns, struct dentry *dentry,
+		  struct iattr *attr)
+{
+	struct super_block *sb = dentry->d_sb;
+	struct ntfs_sb_info *sbi = sb->s_fs_info;
+	struct inode *inode = d_inode(dentry);
+	struct ntfs_inode *ni = ntfs_i(inode);
+	u32 ia_valid = attr->ia_valid;
+	umode_t mode = inode->i_mode;
+	int err;
+
+	if (sbi->options.no_acs_rules) {
+		/* "No access rules" - Force any changes of time etc. */
+		attr->ia_valid |= ATTR_FORCE;
+		/* and disable for editing some attributes. */
+		attr->ia_valid &= ~(ATTR_UID | ATTR_GID | ATTR_MODE);
+		ia_valid = attr->ia_valid;
+	}
+
+	err = setattr_prepare(mnt_userns, dentry, attr);
+	if (err)
+		goto out;
+
+	if (ia_valid & ATTR_SIZE) {
+		loff_t oldsize = inode->i_size;
+
+		if (WARN_ON(ni->ni_flags & NI_FLAG_COMPRESSED_MASK)) {
+			/* Should never be here, see ntfs_file_open(). */
+			err = -EOPNOTSUPP;
+			goto out;
+		}
+		inode_dio_wait(inode);
+
+		if (attr->ia_size < oldsize)
+			err = ntfs_truncate(inode, attr->ia_size);
+		else if (attr->ia_size > oldsize)
+			err = ntfs_extend(inode, attr->ia_size, 0, NULL);
+
+		if (err)
+			goto out;
+
+		ni->ni_flags |= NI_FLAG_UPDATE_PARENT;
+	}
+
+	setattr_copy(mnt_userns, inode, attr);
+
+	if (mode != inode->i_mode) {
+		err = ntfs_acl_chmod(mnt_userns, inode);
+		if (err)
+			goto out;
+
+		/* Linux 'w' -> Windows 'ro'. */
+		if (0222 & inode->i_mode)
+			ni->std_fa &= ~FILE_ATTRIBUTE_READONLY;
+		else
+			ni->std_fa |= FILE_ATTRIBUTE_READONLY;
+	}
+
+	if (ia_valid & (ATTR_UID | ATTR_GID | ATTR_MODE))
+		ntfs_save_wsl_perm(inode);
+	mark_inode_dirty(inode);
+out:
+	return err;
+}
+
+static ssize_t ntfs_file_read_iter(struct kiocb *iocb, struct iov_iter *iter)
+{
+	struct file *file = iocb->ki_filp;
+	struct inode *inode = file->f_mapping->host;
+	struct ntfs_inode *ni = ntfs_i(inode);
+
+	if (is_encrypted(ni)) {
+		ntfs_inode_warn(inode, "encrypted i/o not supported");
+		return -EOPNOTSUPP;
+	}
+
+	if (is_compressed(ni) && (iocb->ki_flags & IOCB_DIRECT)) {
+		ntfs_inode_warn(inode, "direct i/o + compressed not supported");
+		return -EOPNOTSUPP;
+	}
+
+#ifndef CONFIG_NTFS3_LZX_XPRESS
+	if (ni->ni_flags & NI_FLAG_COMPRESSED_MASK) {
+		ntfs_inode_warn(
+			inode,
+			"activate CONFIG_NTFS3_LZX_XPRESS to read external compressed files");
+		return -EOPNOTSUPP;
+	}
+#endif
+
+	if (is_dedup(ni)) {
+		ntfs_inode_warn(inode, "read deduplicated not supported");
+		return -EOPNOTSUPP;
+	}
+
+	return generic_file_read_iter(iocb, iter);
+}
+
+/*
+ * ntfs_get_frame_pages
+ *
+ * Return: Array of locked pages.
+ */
+static int ntfs_get_frame_pages(struct address_space *mapping, pgoff_t index,
+				struct page **pages, u32 pages_per_frame,
+				bool *frame_uptodate)
+{
+	gfp_t gfp_mask = mapping_gfp_mask(mapping);
+	u32 npages;
+
+	*frame_uptodate = true;
+
+	for (npages = 0; npages < pages_per_frame; npages++, index++) {
+		struct page *page;
+
+		page = find_or_create_page(mapping, index, gfp_mask);
+		if (!page) {
+			while (npages--) {
+				page = pages[npages];
+				unlock_page(page);
+				put_page(page);
+			}
+
+			return -ENOMEM;
+		}
+
+		if (!PageUptodate(page))
+			*frame_uptodate = false;
+
+		pages[npages] = page;
+	}
+
+	return 0;
+}
+
+/*
+ * ntfs_compress_write - Helper for ntfs_file_write_iter() (compressed files).
+ */
+static ssize_t ntfs_compress_write(struct kiocb *iocb, struct iov_iter *from)
+{
+	int err;
+	struct file *file = iocb->ki_filp;
+	size_t count = iov_iter_count(from);
+	loff_t pos = iocb->ki_pos;
+	struct inode *inode = file_inode(file);
+	loff_t i_size = inode->i_size;
+	struct address_space *mapping = inode->i_mapping;
+	struct ntfs_inode *ni = ntfs_i(inode);
+	u64 valid = ni->i_valid;
+	struct ntfs_sb_info *sbi = ni->mi.sbi;
+	struct page *page, **pages = NULL;
+	size_t written = 0;
+	u8 frame_bits = NTFS_LZNT_CUNIT + sbi->cluster_bits;
+	u32 frame_size = 1u << frame_bits;
+	u32 pages_per_frame = frame_size >> PAGE_SHIFT;
+	u32 ip, off;
+	CLST frame;
+	u64 frame_vbo;
+	pgoff_t index;
+	bool frame_uptodate;
+
+	if (frame_size < PAGE_SIZE) {
+		/*
+		 * frame_size == 8K if cluster 512
+		 * frame_size == 64K if cluster 4096
+		 */
+		ntfs_inode_warn(inode, "page size is bigger than frame size");
+		return -EOPNOTSUPP;
+	}
+
+	pages = kmalloc_array(pages_per_frame, sizeof(struct page *), GFP_NOFS);
+	if (!pages)
+		return -ENOMEM;
+
+	current->backing_dev_info = inode_to_bdi(inode);
+	err = file_remove_privs(file);
+	if (err)
+		goto out;
+
+	err = file_update_time(file);
+	if (err)
+		goto out;
+
+	/* Zero range [valid : pos). */
+	while (valid < pos) {
+		CLST lcn, clen;
+
+		frame = valid >> frame_bits;
+		frame_vbo = valid & ~(frame_size - 1);
+		off = valid & (frame_size - 1);
+
+		err = attr_data_get_block(ni, frame << NTFS_LZNT_CUNIT, 0, &lcn,
+					  &clen, NULL);
+		if (err)
+			goto out;
+
+		if (lcn == SPARSE_LCN) {
+			ni->i_valid = valid =
+				frame_vbo + ((u64)clen << sbi->cluster_bits);
+			continue;
+		}
+
+		/* Load full frame. */
+		err = ntfs_get_frame_pages(mapping, frame_vbo >> PAGE_SHIFT,
+					   pages, pages_per_frame,
+					   &frame_uptodate);
+		if (err)
+			goto out;
+
+		if (!frame_uptodate && off) {
+			err = ni_read_frame(ni, frame_vbo, pages,
+					    pages_per_frame);
+			if (err) {
+				for (ip = 0; ip < pages_per_frame; ip++) {
+					page = pages[ip];
+					unlock_page(page);
+					put_page(page);
+				}
+				goto out;
+			}
+		}
+
+		ip = off >> PAGE_SHIFT;
+		off = offset_in_page(valid);
+		for (; ip < pages_per_frame; ip++, off = 0) {
+			page = pages[ip];
+			zero_user_segment(page, off, PAGE_SIZE);
+			flush_dcache_page(page);
+			SetPageUptodate(page);
+		}
+
+		ni_lock(ni);
+		err = ni_write_frame(ni, pages, pages_per_frame);
+		ni_unlock(ni);
+
+		for (ip = 0; ip < pages_per_frame; ip++) {
+			page = pages[ip];
+			SetPageUptodate(page);
+			unlock_page(page);
+			put_page(page);
+		}
+
+		if (err)
+			goto out;
+
+		ni->i_valid = valid = frame_vbo + frame_size;
+	}
+
+	/* Copy user data [pos : pos + count). */
+	while (count) {
+		size_t copied, bytes;
+
+		off = pos & (frame_size - 1);
+		bytes = frame_size - off;
+		if (bytes > count)
+			bytes = count;
+
+		frame = pos >> frame_bits;
+		frame_vbo = pos & ~(frame_size - 1);
+		index = frame_vbo >> PAGE_SHIFT;
+
+		if (unlikely(iov_iter_fault_in_readable(from, bytes))) {
+			err = -EFAULT;
+			goto out;
+		}
+
+		/* Load full frame. */
+		err = ntfs_get_frame_pages(mapping, index, pages,
+					   pages_per_frame, &frame_uptodate);
+		if (err)
+			goto out;
+
+		if (!frame_uptodate) {
+			loff_t to = pos + bytes;
+
+			if (off || (to < i_size && (to & (frame_size - 1)))) {
+				err = ni_read_frame(ni, frame_vbo, pages,
+						    pages_per_frame);
+				if (err) {
+					for (ip = 0; ip < pages_per_frame;
+					     ip++) {
+						page = pages[ip];
+						unlock_page(page);
+						put_page(page);
+					}
+					goto out;
+				}
+			}
+		}
+
+		WARN_ON(!bytes);
+		copied = 0;
+		ip = off >> PAGE_SHIFT;
+		off = offset_in_page(pos);
+
+		/* Copy user data to pages. */
+		for (;;) {
+			size_t cp, tail = PAGE_SIZE - off;
+
+			page = pages[ip];
+			cp = copy_page_from_iter_atomic(page, off,
+							min(tail, bytes), from);
+			flush_dcache_page(page);
+
+			copied += cp;
+			bytes -= cp;
+			if (!bytes || !cp)
+				break;
+
+			if (cp < tail) {
+				off += cp;
+			} else {
+				ip++;
+				off = 0;
+			}
+		}
+
+		ni_lock(ni);
+		err = ni_write_frame(ni, pages, pages_per_frame);
+		ni_unlock(ni);
+
+		for (ip = 0; ip < pages_per_frame; ip++) {
+			page = pages[ip];
+			ClearPageDirty(page);
+			SetPageUptodate(page);
+			unlock_page(page);
+			put_page(page);
+		}
+
+		if (err)
+			goto out;
+
+		/*
+		 * We can loop for a long time in here. Be nice and allow
+		 * us to schedule out to avoid softlocking if preempt
+		 * is disabled.
+		 */
+		cond_resched();
+
+		pos += copied;
+		written += copied;
+
+		count = iov_iter_count(from);
+	}
+
+out:
+	kfree(pages);
+
+	current->backing_dev_info = NULL;
+
+	if (err < 0)
+		return err;
+
+	iocb->ki_pos += written;
+	if (iocb->ki_pos > ni->i_valid)
+		ni->i_valid = iocb->ki_pos;
+
+	return written;
+}
+
+/*
+ * ntfs_file_write_iter - file_operations::write_iter
+ */
+static ssize_t ntfs_file_write_iter(struct kiocb *iocb, struct iov_iter *from)
+{
+	struct file *file = iocb->ki_filp;
+	struct address_space *mapping = file->f_mapping;
+	struct inode *inode = mapping->host;
+	ssize_t ret;
+	struct ntfs_inode *ni = ntfs_i(inode);
+
+	if (is_encrypted(ni)) {
+		ntfs_inode_warn(inode, "encrypted i/o not supported");
+		return -EOPNOTSUPP;
+	}
+
+	if (is_compressed(ni) && (iocb->ki_flags & IOCB_DIRECT)) {
+		ntfs_inode_warn(inode, "direct i/o + compressed not supported");
+		return -EOPNOTSUPP;
+	}
+
+	if (is_dedup(ni)) {
+		ntfs_inode_warn(inode, "write into deduplicated not supported");
+		return -EOPNOTSUPP;
+	}
+
+	if (!inode_trylock(inode)) {
+		if (iocb->ki_flags & IOCB_NOWAIT)
+			return -EAGAIN;
+		inode_lock(inode);
+	}
+
+	ret = generic_write_checks(iocb, from);
+	if (ret <= 0)
+		goto out;
+
+	if (WARN_ON(ni->ni_flags & NI_FLAG_COMPRESSED_MASK)) {
+		/* Should never be here, see ntfs_file_open(). */
+		ret = -EOPNOTSUPP;
+		goto out;
+	}
+
+	ret = ntfs_extend(inode, iocb->ki_pos, ret, file);
+	if (ret)
+		goto out;
+
+	ret = is_compressed(ni) ? ntfs_compress_write(iocb, from)
+				: __generic_file_write_iter(iocb, from);
+
+out:
+	inode_unlock(inode);
+
+	if (ret > 0)
+		ret = generic_write_sync(iocb, ret);
+
+	return ret;
+}
+
+/*
+ * ntfs_file_open - file_operations::open
+ */
+int ntfs_file_open(struct inode *inode, struct file *file)
+{
+	struct ntfs_inode *ni = ntfs_i(inode);
+
+	if (unlikely((is_compressed(ni) || is_encrypted(ni)) &&
+		     (file->f_flags & O_DIRECT))) {
+		return -EOPNOTSUPP;
+	}
+
+	/* Decompress "external compressed" file if opened for rw. */
+	if ((ni->ni_flags & NI_FLAG_COMPRESSED_MASK) &&
+	    (file->f_flags & (O_WRONLY | O_RDWR | O_TRUNC))) {
+#ifdef CONFIG_NTFS3_LZX_XPRESS
+		int err = ni_decompress_file(ni);
+
+		if (err)
+			return err;
+#else
+		ntfs_inode_warn(
+			inode,
+			"activate CONFIG_NTFS3_LZX_XPRESS to write external compressed files");
+		return -EOPNOTSUPP;
+#endif
+	}
+
+	return generic_file_open(inode, file);
+}
+
+/*
+ * ntfs_file_release - file_operations::release
+ */
+static int ntfs_file_release(struct inode *inode, struct file *file)
+{
+	struct ntfs_inode *ni = ntfs_i(inode);
+	struct ntfs_sb_info *sbi = ni->mi.sbi;
+	int err = 0;
+
+	/* If we are last writer on the inode, drop the block reservation. */
+	if (sbi->options.prealloc && ((file->f_mode & FMODE_WRITE) &&
+				      atomic_read(&inode->i_writecount) == 1)) {
+		ni_lock(ni);
+		down_write(&ni->file.run_lock);
+
+		err = attr_set_size(ni, ATTR_DATA, NULL, 0, &ni->file.run,
+				    inode->i_size, &ni->i_valid, false, NULL);
+
+		up_write(&ni->file.run_lock);
+		ni_unlock(ni);
+	}
+	return err;
+}
+
+/*
+ * ntfs_fiemap - file_operations::fiemap
+ */
+int ntfs_fiemap(struct inode *inode, struct fiemap_extent_info *fieinfo,
+		__u64 start, __u64 len)
+{
+	int err;
+	struct ntfs_inode *ni = ntfs_i(inode);
+
+	err = fiemap_prep(inode, fieinfo, start, &len, ~FIEMAP_FLAG_XATTR);
+	if (err)
+		return err;
+
+	ni_lock(ni);
+
+	err = ni_fiemap(ni, fieinfo, start, len);
+
+	ni_unlock(ni);
+
+	return err;
+}
+
+// clang-format off
+const struct inode_operations ntfs_file_inode_operations = {
+	.getattr	= ntfs_getattr,
+	.setattr	= ntfs3_setattr,
+	.listxattr	= ntfs_listxattr,
+	.permission	= ntfs_permission,
+	.get_acl	= ntfs_get_acl,
+	.set_acl	= ntfs_set_acl,
+	.fiemap		= ntfs_fiemap,
+};
+
+const struct file_operations ntfs_file_operations = {
+	.llseek		= generic_file_llseek,
+	.read_iter	= ntfs_file_read_iter,
+	.write_iter	= ntfs_file_write_iter,
+	.unlocked_ioctl = ntfs_ioctl,
+#ifdef CONFIG_COMPAT
+	.compat_ioctl	= ntfs_compat_ioctl,
+#endif
+	.splice_read	= generic_file_splice_read,
+	.mmap		= ntfs_file_mmap,
+	.open		= ntfs_file_open,
+	.fsync		= generic_file_fsync,
+	.splice_write	= iter_file_splice_write,
+	.fallocate	= ntfs_fallocate,
+	.release	= ntfs_file_release,
+};
+// clang-format on
diff --git a/fs/ntfs3/frecord.c b/fs/ntfs3/frecord.c
new file mode 100644
index 000000000000..938b12d56ca6
--- /dev/null
+++ b/fs/ntfs3/frecord.c
@@ -0,0 +1,3257 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ *
+ * Copyright (C) 2019-2021 Paragon Software GmbH, All rights reserved.
+ *
+ */
+
+#include <linux/blkdev.h>
+#include <linux/buffer_head.h>
+#include <linux/fiemap.h>
+#include <linux/fs.h>
+#include <linux/nls.h>
+#include <linux/vmalloc.h>
+
+#include "debug.h"
+#include "ntfs.h"
+#include "ntfs_fs.h"
+#ifdef CONFIG_NTFS3_LZX_XPRESS
+#include "lib/lib.h"
+#endif
+
+static struct mft_inode *ni_ins_mi(struct ntfs_inode *ni, struct rb_root *tree,
+				   CLST ino, struct rb_node *ins)
+{
+	struct rb_node **p = &tree->rb_node;
+	struct rb_node *pr = NULL;
+
+	while (*p) {
+		struct mft_inode *mi;
+
+		pr = *p;
+		mi = rb_entry(pr, struct mft_inode, node);
+		if (mi->rno > ino)
+			p = &pr->rb_left;
+		else if (mi->rno < ino)
+			p = &pr->rb_right;
+		else
+			return mi;
+	}
+
+	if (!ins)
+		return NULL;
+
+	rb_link_node(ins, pr, p);
+	rb_insert_color(ins, tree);
+	return rb_entry(ins, struct mft_inode, node);
+}
+
+/*
+ * ni_find_mi - Find mft_inode by record number.
+ */
+static struct mft_inode *ni_find_mi(struct ntfs_inode *ni, CLST rno)
+{
+	return ni_ins_mi(ni, &ni->mi_tree, rno, NULL);
+}
+
+/*
+ * ni_add_mi - Add new mft_inode into ntfs_inode.
+ */
+static void ni_add_mi(struct ntfs_inode *ni, struct mft_inode *mi)
+{
+	ni_ins_mi(ni, &ni->mi_tree, mi->rno, &mi->node);
+}
+
+/*
+ * ni_remove_mi - Remove mft_inode from ntfs_inode.
+ */
+void ni_remove_mi(struct ntfs_inode *ni, struct mft_inode *mi)
+{
+	rb_erase(&mi->node, &ni->mi_tree);
+}
+
+/*
+ * ni_std - Return: Pointer into std_info from primary record.
+ */
+struct ATTR_STD_INFO *ni_std(struct ntfs_inode *ni)
+{
+	const struct ATTRIB *attr;
+
+	attr = mi_find_attr(&ni->mi, NULL, ATTR_STD, NULL, 0, NULL);
+	return attr ? resident_data_ex(attr, sizeof(struct ATTR_STD_INFO))
+		    : NULL;
+}
+
+/*
+ * ni_std5
+ *
+ * Return: Pointer into std_info from primary record.
+ */
+struct ATTR_STD_INFO5 *ni_std5(struct ntfs_inode *ni)
+{
+	const struct ATTRIB *attr;
+
+	attr = mi_find_attr(&ni->mi, NULL, ATTR_STD, NULL, 0, NULL);
+
+	return attr ? resident_data_ex(attr, sizeof(struct ATTR_STD_INFO5))
+		    : NULL;
+}
+
+/*
+ * ni_clear - Clear resources allocated by ntfs_inode.
+ */
+void ni_clear(struct ntfs_inode *ni)
+{
+	struct rb_node *node;
+
+	if (!ni->vfs_inode.i_nlink && is_rec_inuse(ni->mi.mrec))
+		ni_delete_all(ni);
+
+	al_destroy(ni);
+
+	for (node = rb_first(&ni->mi_tree); node;) {
+		struct rb_node *next = rb_next(node);
+		struct mft_inode *mi = rb_entry(node, struct mft_inode, node);
+
+		rb_erase(node, &ni->mi_tree);
+		mi_put(mi);
+		node = next;
+	}
+
+	/* Bad inode always has mode == S_IFREG. */
+	if (ni->ni_flags & NI_FLAG_DIR)
+		indx_clear(&ni->dir);
+	else {
+		run_close(&ni->file.run);
+#ifdef CONFIG_NTFS3_LZX_XPRESS
+		if (ni->file.offs_page) {
+			/* On-demand allocated page for offsets. */
+			put_page(ni->file.offs_page);
+			ni->file.offs_page = NULL;
+		}
+#endif
+	}
+
+	mi_clear(&ni->mi);
+}
+
+/*
+ * ni_load_mi_ex - Find mft_inode by record number.
+ */
+int ni_load_mi_ex(struct ntfs_inode *ni, CLST rno, struct mft_inode **mi)
+{
+	int err;
+	struct mft_inode *r;
+
+	r = ni_find_mi(ni, rno);
+	if (r)
+		goto out;
+
+	err = mi_get(ni->mi.sbi, rno, &r);
+	if (err)
+		return err;
+
+	ni_add_mi(ni, r);
+
+out:
+	if (mi)
+		*mi = r;
+	return 0;
+}
+
+/*
+ * ni_load_mi - Load mft_inode corresponded list_entry.
+ */
+int ni_load_mi(struct ntfs_inode *ni, const struct ATTR_LIST_ENTRY *le,
+	       struct mft_inode **mi)
+{
+	CLST rno;
+
+	if (!le) {
+		*mi = &ni->mi;
+		return 0;
+	}
+
+	rno = ino_get(&le->ref);
+	if (rno == ni->mi.rno) {
+		*mi = &ni->mi;
+		return 0;
+	}
+	return ni_load_mi_ex(ni, rno, mi);
+}
+
+/*
+ * ni_find_attr
+ *
+ * Return: Attribute and record this attribute belongs to.
+ */
+struct ATTRIB *ni_find_attr(struct ntfs_inode *ni, struct ATTRIB *attr,
+			    struct ATTR_LIST_ENTRY **le_o, enum ATTR_TYPE type,
+			    const __le16 *name, u8 name_len, const CLST *vcn,
+			    struct mft_inode **mi)
+{
+	struct ATTR_LIST_ENTRY *le;
+	struct mft_inode *m;
+
+	if (!ni->attr_list.size ||
+	    (!name_len && (type == ATTR_LIST || type == ATTR_STD))) {
+		if (le_o)
+			*le_o = NULL;
+		if (mi)
+			*mi = &ni->mi;
+
+		/* Look for required attribute in primary record. */
+		return mi_find_attr(&ni->mi, attr, type, name, name_len, NULL);
+	}
+
+	/* First look for list entry of required type. */
+	le = al_find_ex(ni, le_o ? *le_o : NULL, type, name, name_len, vcn);
+	if (!le)
+		return NULL;
+
+	if (le_o)
+		*le_o = le;
+
+	/* Load record that contains this attribute. */
+	if (ni_load_mi(ni, le, &m))
+		return NULL;
+
+	/* Look for required attribute. */
+	attr = mi_find_attr(m, NULL, type, name, name_len, &le->id);
+
+	if (!attr)
+		goto out;
+
+	if (!attr->non_res) {
+		if (vcn && *vcn)
+			goto out;
+	} else if (!vcn) {
+		if (attr->nres.svcn)
+			goto out;
+	} else if (le64_to_cpu(attr->nres.svcn) > *vcn ||
+		   *vcn > le64_to_cpu(attr->nres.evcn)) {
+		goto out;
+	}
+
+	if (mi)
+		*mi = m;
+	return attr;
+
+out:
+	ntfs_set_state(ni->mi.sbi, NTFS_DIRTY_ERROR);
+	return NULL;
+}
+
+/*
+ * ni_enum_attr_ex - Enumerates attributes in ntfs_inode.
+ */
+struct ATTRIB *ni_enum_attr_ex(struct ntfs_inode *ni, struct ATTRIB *attr,
+			       struct ATTR_LIST_ENTRY **le,
+			       struct mft_inode **mi)
+{
+	struct mft_inode *mi2;
+	struct ATTR_LIST_ENTRY *le2;
+
+	/* Do we have an attribute list? */
+	if (!ni->attr_list.size) {
+		*le = NULL;
+		if (mi)
+			*mi = &ni->mi;
+		/* Enum attributes in primary record. */
+		return mi_enum_attr(&ni->mi, attr);
+	}
+
+	/* Get next list entry. */
+	le2 = *le = al_enumerate(ni, attr ? *le : NULL);
+	if (!le2)
+		return NULL;
+
+	/* Load record that contains the required attribute. */
+	if (ni_load_mi(ni, le2, &mi2))
+		return NULL;
+
+	if (mi)
+		*mi = mi2;
+
+	/* Find attribute in loaded record. */
+	return rec_find_attr_le(mi2, le2);
+}
+
+/*
+ * ni_load_attr - Load attribute that contains given VCN.
+ */
+struct ATTRIB *ni_load_attr(struct ntfs_inode *ni, enum ATTR_TYPE type,
+			    const __le16 *name, u8 name_len, CLST vcn,
+			    struct mft_inode **pmi)
+{
+	struct ATTR_LIST_ENTRY *le;
+	struct ATTRIB *attr;
+	struct mft_inode *mi;
+	struct ATTR_LIST_ENTRY *next;
+
+	if (!ni->attr_list.size) {
+		if (pmi)
+			*pmi = &ni->mi;
+		return mi_find_attr(&ni->mi, NULL, type, name, name_len, NULL);
+	}
+
+	le = al_find_ex(ni, NULL, type, name, name_len, NULL);
+	if (!le)
+		return NULL;
+
+	/*
+	 * Unfortunately ATTR_LIST_ENTRY contains only start VCN.
+	 * So to find the ATTRIB segment that contains 'vcn' we should
+	 * enumerate some entries.
+	 */
+	if (vcn) {
+		for (;; le = next) {
+			next = al_find_ex(ni, le, type, name, name_len, NULL);
+			if (!next || le64_to_cpu(next->vcn) > vcn)
+				break;
+		}
+	}
+
+	if (ni_load_mi(ni, le, &mi))
+		return NULL;
+
+	if (pmi)
+		*pmi = mi;
+
+	attr = mi_find_attr(mi, NULL, type, name, name_len, &le->id);
+	if (!attr)
+		return NULL;
+
+	if (!attr->non_res)
+		return attr;
+
+	if (le64_to_cpu(attr->nres.svcn) <= vcn &&
+	    vcn <= le64_to_cpu(attr->nres.evcn))
+		return attr;
+
+	return NULL;
+}
+
+/*
+ * ni_load_all_mi - Load all subrecords.
+ */
+int ni_load_all_mi(struct ntfs_inode *ni)
+{
+	int err;
+	struct ATTR_LIST_ENTRY *le;
+
+	if (!ni->attr_list.size)
+		return 0;
+
+	le = NULL;
+
+	while ((le = al_enumerate(ni, le))) {
+		CLST rno = ino_get(&le->ref);
+
+		if (rno == ni->mi.rno)
+			continue;
+
+		err = ni_load_mi_ex(ni, rno, NULL);
+		if (err)
+			return err;
+	}
+
+	return 0;
+}
+
+/*
+ * ni_add_subrecord - Allocate + format + attach a new subrecord.
+ */
+bool ni_add_subrecord(struct ntfs_inode *ni, CLST rno, struct mft_inode **mi)
+{
+	struct mft_inode *m;
+
+	m = kzalloc(sizeof(struct mft_inode), GFP_NOFS);
+	if (!m)
+		return false;
+
+	if (mi_format_new(m, ni->mi.sbi, rno, 0, ni->mi.rno == MFT_REC_MFT)) {
+		mi_put(m);
+		return false;
+	}
+
+	mi_get_ref(&ni->mi, &m->mrec->parent_ref);
+
+	ni_add_mi(ni, m);
+	*mi = m;
+	return true;
+}
+
+/*
+ * ni_remove_attr - Remove all attributes for the given type/name/id.
+ */
+int ni_remove_attr(struct ntfs_inode *ni, enum ATTR_TYPE type,
+		   const __le16 *name, size_t name_len, bool base_only,
+		   const __le16 *id)
+{
+	int err;
+	struct ATTRIB *attr;
+	struct ATTR_LIST_ENTRY *le;
+	struct mft_inode *mi;
+	u32 type_in;
+	int diff;
+
+	if (base_only || type == ATTR_LIST || !ni->attr_list.size) {
+		attr = mi_find_attr(&ni->mi, NULL, type, name, name_len, id);
+		if (!attr)
+			return -ENOENT;
+
+		mi_remove_attr(ni, &ni->mi, attr);
+		return 0;
+	}
+
+	type_in = le32_to_cpu(type);
+	le = NULL;
+
+	for (;;) {
+		le = al_enumerate(ni, le);
+		if (!le)
+			return 0;
+
+next_le2:
+		diff = le32_to_cpu(le->type) - type_in;
+		if (diff < 0)
+			continue;
+
+		if (diff > 0)
+			return 0;
+
+		if (le->name_len != name_len)
+			continue;
+
+		if (name_len &&
+		    memcmp(le_name(le), name, name_len * sizeof(short)))
+			continue;
+
+		if (id && le->id != *id)
+			continue;
+		err = ni_load_mi(ni, le, &mi);
+		if (err)
+			return err;
+
+		al_remove_le(ni, le);
+
+		attr = mi_find_attr(mi, NULL, type, name, name_len, id);
+		if (!attr)
+			return -ENOENT;
+
+		mi_remove_attr(ni, mi, attr);
+
+		if (PtrOffset(ni->attr_list.le, le) >= ni->attr_list.size)
+			return 0;
+		goto next_le2;
+	}
+}
+
+/*
+ * ni_ins_new_attr - Insert the attribute into record.
+ *
+ * Return: Not full constructed attribute or NULL if not possible to create.
+ */
+static struct ATTRIB *
+ni_ins_new_attr(struct ntfs_inode *ni, struct mft_inode *mi,
+		struct ATTR_LIST_ENTRY *le, enum ATTR_TYPE type,
+		const __le16 *name, u8 name_len, u32 asize, u16 name_off,
+		CLST svcn, struct ATTR_LIST_ENTRY **ins_le)
+{
+	int err;
+	struct ATTRIB *attr;
+	bool le_added = false;
+	struct MFT_REF ref;
+
+	mi_get_ref(mi, &ref);
+
+	if (type != ATTR_LIST && !le && ni->attr_list.size) {
+		err = al_add_le(ni, type, name, name_len, svcn, cpu_to_le16(-1),
+				&ref, &le);
+		if (err) {
+			/* No memory or no space. */
+			return NULL;
+		}
+		le_added = true;
+
+		/*
+		 * al_add_le -> attr_set_size (list) -> ni_expand_list
+		 * which moves some attributes out of primary record
+		 * this means that name may point into moved memory
+		 * reinit 'name' from le.
+		 */
+		name = le->name;
+	}
+
+	attr = mi_insert_attr(mi, type, name, name_len, asize, name_off);
+	if (!attr) {
+		if (le_added)
+			al_remove_le(ni, le);
+		return NULL;
+	}
+
+	if (type == ATTR_LIST) {
+		/* Attr list is not in list entry array. */
+		goto out;
+	}
+
+	if (!le)
+		goto out;
+
+	/* Update ATTRIB Id and record reference. */
+	le->id = attr->id;
+	ni->attr_list.dirty = true;
+	le->ref = ref;
+
+out:
+	if (ins_le)
+		*ins_le = le;
+	return attr;
+}
+
+/*
+ * ni_repack
+ *
+ * Random write access to sparsed or compressed file may result to
+ * not optimized packed runs.
+ * Here is the place to optimize it.
+ */
+static int ni_repack(struct ntfs_inode *ni)
+{
+	int err = 0;
+	struct ntfs_sb_info *sbi = ni->mi.sbi;
+	struct mft_inode *mi, *mi_p = NULL;
+	struct ATTRIB *attr = NULL, *attr_p;
+	struct ATTR_LIST_ENTRY *le = NULL, *le_p;
+	CLST alloc = 0;
+	u8 cluster_bits = sbi->cluster_bits;
+	CLST svcn, evcn = 0, svcn_p, evcn_p, next_svcn;
+	u32 roff, rs = sbi->record_size;
+	struct runs_tree run;
+
+	run_init(&run);
+
+	while ((attr = ni_enum_attr_ex(ni, attr, &le, &mi))) {
+		if (!attr->non_res)
+			continue;
+
+		svcn = le64_to_cpu(attr->nres.svcn);
+		if (svcn != le64_to_cpu(le->vcn)) {
+			err = -EINVAL;
+			break;
+		}
+
+		if (!svcn) {
+			alloc = le64_to_cpu(attr->nres.alloc_size) >>
+				cluster_bits;
+			mi_p = NULL;
+		} else if (svcn != evcn + 1) {
+			err = -EINVAL;
+			break;
+		}
+
+		evcn = le64_to_cpu(attr->nres.evcn);
+
+		if (svcn > evcn + 1) {
+			err = -EINVAL;
+			break;
+		}
+
+		if (!mi_p) {
+			/* Do not try if not enogh free space. */
+			if (le32_to_cpu(mi->mrec->used) + 8 >= rs)
+				continue;
+
+			/* Do not try if last attribute segment. */
+			if (evcn + 1 == alloc)
+				continue;
+			run_close(&run);
+		}
+
+		roff = le16_to_cpu(attr->nres.run_off);
+		err = run_unpack(&run, sbi, ni->mi.rno, svcn, evcn, svcn,
+				 Add2Ptr(attr, roff),
+				 le32_to_cpu(attr->size) - roff);
+		if (err < 0)
+			break;
+
+		if (!mi_p) {
+			mi_p = mi;
+			attr_p = attr;
+			svcn_p = svcn;
+			evcn_p = evcn;
+			le_p = le;
+			err = 0;
+			continue;
+		}
+
+		/*
+		 * Run contains data from two records: mi_p and mi
+		 * Try to pack in one.
+		 */
+		err = mi_pack_runs(mi_p, attr_p, &run, evcn + 1 - svcn_p);
+		if (err)
+			break;
+
+		next_svcn = le64_to_cpu(attr_p->nres.evcn) + 1;
+
+		if (next_svcn >= evcn + 1) {
+			/* We can remove this attribute segment. */
+			al_remove_le(ni, le);
+			mi_remove_attr(NULL, mi, attr);
+			le = le_p;
+			continue;
+		}
+
+		attr->nres.svcn = le->vcn = cpu_to_le64(next_svcn);
+		mi->dirty = true;
+		ni->attr_list.dirty = true;
+
+		if (evcn + 1 == alloc) {
+			err = mi_pack_runs(mi, attr, &run,
+					   evcn + 1 - next_svcn);
+			if (err)
+				break;
+			mi_p = NULL;
+		} else {
+			mi_p = mi;
+			attr_p = attr;
+			svcn_p = next_svcn;
+			evcn_p = evcn;
+			le_p = le;
+			run_truncate_head(&run, next_svcn);
+		}
+	}
+
+	if (err) {
+		ntfs_inode_warn(&ni->vfs_inode, "repack problem");
+		ntfs_set_state(sbi, NTFS_DIRTY_ERROR);
+
+		/* Pack loaded but not packed runs. */
+		if (mi_p)
+			mi_pack_runs(mi_p, attr_p, &run, evcn_p + 1 - svcn_p);
+	}
+
+	run_close(&run);
+	return err;
+}
+
+/*
+ * ni_try_remove_attr_list
+ *
+ * Can we remove attribute list?
+ * Check the case when primary record contains enough space for all attributes.
+ */
+static int ni_try_remove_attr_list(struct ntfs_inode *ni)
+{
+	int err = 0;
+	struct ntfs_sb_info *sbi = ni->mi.sbi;
+	struct ATTRIB *attr, *attr_list, *attr_ins;
+	struct ATTR_LIST_ENTRY *le;
+	struct mft_inode *mi;
+	u32 asize, free;
+	struct MFT_REF ref;
+	__le16 id;
+
+	if (!ni->attr_list.dirty)
+		return 0;
+
+	err = ni_repack(ni);
+	if (err)
+		return err;
+
+	attr_list = mi_find_attr(&ni->mi, NULL, ATTR_LIST, NULL, 0, NULL);
+	if (!attr_list)
+		return 0;
+
+	asize = le32_to_cpu(attr_list->size);
+
+	/* Free space in primary record without attribute list. */
+	free = sbi->record_size - le32_to_cpu(ni->mi.mrec->used) + asize;
+	mi_get_ref(&ni->mi, &ref);
+
+	le = NULL;
+	while ((le = al_enumerate(ni, le))) {
+		if (!memcmp(&le->ref, &ref, sizeof(ref)))
+			continue;
+
+		if (le->vcn)
+			return 0;
+
+		mi = ni_find_mi(ni, ino_get(&le->ref));
+		if (!mi)
+			return 0;
+
+		attr = mi_find_attr(mi, NULL, le->type, le_name(le),
+				    le->name_len, &le->id);
+		if (!attr)
+			return 0;
+
+		asize = le32_to_cpu(attr->size);
+		if (asize > free)
+			return 0;
+
+		free -= asize;
+	}
+
+	/* It seems that attribute list can be removed from primary record. */
+	mi_remove_attr(NULL, &ni->mi, attr_list);
+
+	/*
+	 * Repeat the cycle above and move all attributes to primary record.
+	 * It should be success!
+	 */
+	le = NULL;
+	while ((le = al_enumerate(ni, le))) {
+		if (!memcmp(&le->ref, &ref, sizeof(ref)))
+			continue;
+
+		mi = ni_find_mi(ni, ino_get(&le->ref));
+
+		attr = mi_find_attr(mi, NULL, le->type, le_name(le),
+				    le->name_len, &le->id);
+		asize = le32_to_cpu(attr->size);
+
+		/* Insert into primary record. */
+		attr_ins = mi_insert_attr(&ni->mi, le->type, le_name(le),
+					  le->name_len, asize,
+					  le16_to_cpu(attr->name_off));
+		id = attr_ins->id;
+
+		/* Copy all except id. */
+		memcpy(attr_ins, attr, asize);
+		attr_ins->id = id;
+
+		/* Remove from original record. */
+		mi_remove_attr(NULL, mi, attr);
+	}
+
+	run_deallocate(sbi, &ni->attr_list.run, true);
+	run_close(&ni->attr_list.run);
+	ni->attr_list.size = 0;
+	kfree(ni->attr_list.le);
+	ni->attr_list.le = NULL;
+	ni->attr_list.dirty = false;
+
+	return 0;
+}
+
+/*
+ * ni_create_attr_list - Generates an attribute list for this primary record.
+ */
+int ni_create_attr_list(struct ntfs_inode *ni)
+{
+	struct ntfs_sb_info *sbi = ni->mi.sbi;
+	int err;
+	u32 lsize;
+	struct ATTRIB *attr;
+	struct ATTRIB *arr_move[7];
+	struct ATTR_LIST_ENTRY *le, *le_b[7];
+	struct MFT_REC *rec;
+	bool is_mft;
+	CLST rno = 0;
+	struct mft_inode *mi;
+	u32 free_b, nb, to_free, rs;
+	u16 sz;
+
+	is_mft = ni->mi.rno == MFT_REC_MFT;
+	rec = ni->mi.mrec;
+	rs = sbi->record_size;
+
+	/*
+	 * Skip estimating exact memory requirement.
+	 * Looks like one record_size is always enough.
+	 */
+	le = kmalloc(al_aligned(rs), GFP_NOFS);
+	if (!le) {
+		err = -ENOMEM;
+		goto out;
+	}
+
+	mi_get_ref(&ni->mi, &le->ref);
+	ni->attr_list.le = le;
+
+	attr = NULL;
+	nb = 0;
+	free_b = 0;
+	attr = NULL;
+
+	for (; (attr = mi_enum_attr(&ni->mi, attr)); le = Add2Ptr(le, sz)) {
+		sz = le_size(attr->name_len);
+		le->type = attr->type;
+		le->size = cpu_to_le16(sz);
+		le->name_len = attr->name_len;
+		le->name_off = offsetof(struct ATTR_LIST_ENTRY, name);
+		le->vcn = 0;
+		if (le != ni->attr_list.le)
+			le->ref = ni->attr_list.le->ref;
+		le->id = attr->id;
+
+		if (attr->name_len)
+			memcpy(le->name, attr_name(attr),
+			       sizeof(short) * attr->name_len);
+		else if (attr->type == ATTR_STD)
+			continue;
+		else if (attr->type == ATTR_LIST)
+			continue;
+		else if (is_mft && attr->type == ATTR_DATA)
+			continue;
+
+		if (!nb || nb < ARRAY_SIZE(arr_move)) {
+			le_b[nb] = le;
+			arr_move[nb++] = attr;
+			free_b += le32_to_cpu(attr->size);
+		}
+	}
+
+	lsize = PtrOffset(ni->attr_list.le, le);
+	ni->attr_list.size = lsize;
+
+	to_free = le32_to_cpu(rec->used) + lsize + SIZEOF_RESIDENT;
+	if (to_free <= rs) {
+		to_free = 0;
+	} else {
+		to_free -= rs;
+
+		if (to_free > free_b) {
+			err = -EINVAL;
+			goto out1;
+		}
+	}
+
+	/* Allocate child MFT. */
+	err = ntfs_look_free_mft(sbi, &rno, is_mft, ni, &mi);
+	if (err)
+		goto out1;
+
+	/* Call mi_remove_attr() in reverse order to keep pointers 'arr_move' valid. */
+	while (to_free > 0) {
+		struct ATTRIB *b = arr_move[--nb];
+		u32 asize = le32_to_cpu(b->size);
+		u16 name_off = le16_to_cpu(b->name_off);
+
+		attr = mi_insert_attr(mi, b->type, Add2Ptr(b, name_off),
+				      b->name_len, asize, name_off);
+		WARN_ON(!attr);
+
+		mi_get_ref(mi, &le_b[nb]->ref);
+		le_b[nb]->id = attr->id;
+
+		/* Copy all except id. */
+		memcpy(attr, b, asize);
+		attr->id = le_b[nb]->id;
+
+		/* Remove from primary record. */
+		WARN_ON(!mi_remove_attr(NULL, &ni->mi, b));
+
+		if (to_free <= asize)
+			break;
+		to_free -= asize;
+		WARN_ON(!nb);
+	}
+
+	attr = mi_insert_attr(&ni->mi, ATTR_LIST, NULL, 0,
+			      lsize + SIZEOF_RESIDENT, SIZEOF_RESIDENT);
+	WARN_ON(!attr);
+
+	attr->non_res = 0;
+	attr->flags = 0;
+	attr->res.data_size = cpu_to_le32(lsize);
+	attr->res.data_off = SIZEOF_RESIDENT_LE;
+	attr->res.flags = 0;
+	attr->res.res = 0;
+
+	memcpy(resident_data_ex(attr, lsize), ni->attr_list.le, lsize);
+
+	ni->attr_list.dirty = false;
+
+	mark_inode_dirty(&ni->vfs_inode);
+	goto out;
+
+out1:
+	kfree(ni->attr_list.le);
+	ni->attr_list.le = NULL;
+	ni->attr_list.size = 0;
+
+out:
+	return err;
+}
+
+/*
+ * ni_ins_attr_ext - Add an external attribute to the ntfs_inode.
+ */
+static int ni_ins_attr_ext(struct ntfs_inode *ni, struct ATTR_LIST_ENTRY *le,
+			   enum ATTR_TYPE type, const __le16 *name, u8 name_len,
+			   u32 asize, CLST svcn, u16 name_off, bool force_ext,
+			   struct ATTRIB **ins_attr, struct mft_inode **ins_mi,
+			   struct ATTR_LIST_ENTRY **ins_le)
+{
+	struct ATTRIB *attr;
+	struct mft_inode *mi;
+	CLST rno;
+	u64 vbo;
+	struct rb_node *node;
+	int err;
+	bool is_mft, is_mft_data;
+	struct ntfs_sb_info *sbi = ni->mi.sbi;
+
+	is_mft = ni->mi.rno == MFT_REC_MFT;
+	is_mft_data = is_mft && type == ATTR_DATA && !name_len;
+
+	if (asize > sbi->max_bytes_per_attr) {
+		err = -EINVAL;
+		goto out;
+	}
+
+	/*
+	 * Standard information and attr_list cannot be made external.
+	 * The Log File cannot have any external attributes.
+	 */
+	if (type == ATTR_STD || type == ATTR_LIST ||
+	    ni->mi.rno == MFT_REC_LOG) {
+		err = -EINVAL;
+		goto out;
+	}
+
+	/* Create attribute list if it is not already existed. */
+	if (!ni->attr_list.size) {
+		err = ni_create_attr_list(ni);
+		if (err)
+			goto out;
+	}
+
+	vbo = is_mft_data ? ((u64)svcn << sbi->cluster_bits) : 0;
+
+	if (force_ext)
+		goto insert_ext;
+
+	/* Load all subrecords into memory. */
+	err = ni_load_all_mi(ni);
+	if (err)
+		goto out;
+
+	/* Check each of loaded subrecord. */
+	for (node = rb_first(&ni->mi_tree); node; node = rb_next(node)) {
+		mi = rb_entry(node, struct mft_inode, node);
+
+		if (is_mft_data &&
+		    (mi_enum_attr(mi, NULL) ||
+		     vbo <= ((u64)mi->rno << sbi->record_bits))) {
+			/* We can't accept this record 'cause MFT's bootstrapping. */
+			continue;
+		}
+		if (is_mft &&
+		    mi_find_attr(mi, NULL, ATTR_DATA, NULL, 0, NULL)) {
+			/*
+			 * This child record already has a ATTR_DATA.
+			 * So it can't accept any other records.
+			 */
+			continue;
+		}
+
+		if ((type != ATTR_NAME || name_len) &&
+		    mi_find_attr(mi, NULL, type, name, name_len, NULL)) {
+			/* Only indexed attributes can share same record. */
+			continue;
+		}
+
+		/* Try to insert attribute into this subrecord. */
+		attr = ni_ins_new_attr(ni, mi, le, type, name, name_len, asize,
+				       name_off, svcn, ins_le);
+		if (!attr)
+			continue;
+
+		if (ins_attr)
+			*ins_attr = attr;
+		if (ins_mi)
+			*ins_mi = mi;
+		return 0;
+	}
+
+insert_ext:
+	/* We have to allocate a new child subrecord. */
+	err = ntfs_look_free_mft(sbi, &rno, is_mft_data, ni, &mi);
+	if (err)
+		goto out;
+
+	if (is_mft_data && vbo <= ((u64)rno << sbi->record_bits)) {
+		err = -EINVAL;
+		goto out1;
+	}
+
+	attr = ni_ins_new_attr(ni, mi, le, type, name, name_len, asize,
+			       name_off, svcn, ins_le);
+	if (!attr)
+		goto out2;
+
+	if (ins_attr)
+		*ins_attr = attr;
+	if (ins_mi)
+		*ins_mi = mi;
+
+	return 0;
+
+out2:
+	ni_remove_mi(ni, mi);
+	mi_put(mi);
+	err = -EINVAL;
+
+out1:
+	ntfs_mark_rec_free(sbi, rno);
+
+out:
+	return err;
+}
+
+/*
+ * ni_insert_attr - Insert an attribute into the file.
+ *
+ * If the primary record has room, it will just insert the attribute.
+ * If not, it may make the attribute external.
+ * For $MFT::Data it may make room for the attribute by
+ * making other attributes external.
+ *
+ * NOTE:
+ * The ATTR_LIST and ATTR_STD cannot be made external.
+ * This function does not fill new attribute full.
+ * It only fills 'size'/'type'/'id'/'name_len' fields.
+ */
+static int ni_insert_attr(struct ntfs_inode *ni, enum ATTR_TYPE type,
+			  const __le16 *name, u8 name_len, u32 asize,
+			  u16 name_off, CLST svcn, struct ATTRIB **ins_attr,
+			  struct mft_inode **ins_mi,
+			  struct ATTR_LIST_ENTRY **ins_le)
+{
+	struct ntfs_sb_info *sbi = ni->mi.sbi;
+	int err;
+	struct ATTRIB *attr, *eattr;
+	struct MFT_REC *rec;
+	bool is_mft;
+	struct ATTR_LIST_ENTRY *le;
+	u32 list_reserve, max_free, free, used, t32;
+	__le16 id;
+	u16 t16;
+
+	is_mft = ni->mi.rno == MFT_REC_MFT;
+	rec = ni->mi.mrec;
+
+	list_reserve = SIZEOF_NONRESIDENT + 3 * (1 + 2 * sizeof(u32));
+	used = le32_to_cpu(rec->used);
+	free = sbi->record_size - used;
+
+	if (is_mft && type != ATTR_LIST) {
+		/* Reserve space for the ATTRIB list. */
+		if (free < list_reserve)
+			free = 0;
+		else
+			free -= list_reserve;
+	}
+
+	if (asize <= free) {
+		attr = ni_ins_new_attr(ni, &ni->mi, NULL, type, name, name_len,
+				       asize, name_off, svcn, ins_le);
+		if (attr) {
+			if (ins_attr)
+				*ins_attr = attr;
+			if (ins_mi)
+				*ins_mi = &ni->mi;
+			err = 0;
+			goto out;
+		}
+	}
+
+	if (!is_mft || type != ATTR_DATA || svcn) {
+		/* This ATTRIB will be external. */
+		err = ni_ins_attr_ext(ni, NULL, type, name, name_len, asize,
+				      svcn, name_off, false, ins_attr, ins_mi,
+				      ins_le);
+		goto out;
+	}
+
+	/*
+	 * Here we have: "is_mft && type == ATTR_DATA && !svcn"
+	 *
+	 * The first chunk of the $MFT::Data ATTRIB must be the base record.
+	 * Evict as many other attributes as possible.
+	 */
+	max_free = free;
+
+	/* Estimate the result of moving all possible attributes away. */
+	attr = NULL;
+
+	while ((attr = mi_enum_attr(&ni->mi, attr))) {
+		if (attr->type == ATTR_STD)
+			continue;
+		if (attr->type == ATTR_LIST)
+			continue;
+		max_free += le32_to_cpu(attr->size);
+	}
+
+	if (max_free < asize + list_reserve) {
+		/* Impossible to insert this attribute into primary record. */
+		err = -EINVAL;
+		goto out;
+	}
+
+	/* Start real attribute moving. */
+	attr = NULL;
+
+	for (;;) {
+		attr = mi_enum_attr(&ni->mi, attr);
+		if (!attr) {
+			/* We should never be here 'cause we have already check this case. */
+			err = -EINVAL;
+			goto out;
+		}
+
+		/* Skip attributes that MUST be primary record. */
+		if (attr->type == ATTR_STD || attr->type == ATTR_LIST)
+			continue;
+
+		le = NULL;
+		if (ni->attr_list.size) {
+			le = al_find_le(ni, NULL, attr);
+			if (!le) {
+				/* Really this is a serious bug. */
+				err = -EINVAL;
+				goto out;
+			}
+		}
+
+		t32 = le32_to_cpu(attr->size);
+		t16 = le16_to_cpu(attr->name_off);
+		err = ni_ins_attr_ext(ni, le, attr->type, Add2Ptr(attr, t16),
+				      attr->name_len, t32, attr_svcn(attr), t16,
+				      false, &eattr, NULL, NULL);
+		if (err)
+			return err;
+
+		id = eattr->id;
+		memcpy(eattr, attr, t32);
+		eattr->id = id;
+
+		/* Remove from primary record. */
+		mi_remove_attr(NULL, &ni->mi, attr);
+
+		/* attr now points to next attribute. */
+		if (attr->type == ATTR_END)
+			goto out;
+	}
+	while (asize + list_reserve > sbi->record_size - le32_to_cpu(rec->used))
+		;
+
+	attr = ni_ins_new_attr(ni, &ni->mi, NULL, type, name, name_len, asize,
+			       name_off, svcn, ins_le);
+	if (!attr) {
+		err = -EINVAL;
+		goto out;
+	}
+
+	if (ins_attr)
+		*ins_attr = attr;
+	if (ins_mi)
+		*ins_mi = &ni->mi;
+
+out:
+	return err;
+}
+
+/* ni_expand_mft_list - Split ATTR_DATA of $MFT. */
+static int ni_expand_mft_list(struct ntfs_inode *ni)
+{
+	int err = 0;
+	struct runs_tree *run = &ni->file.run;
+	u32 asize, run_size, done = 0;
+	struct ATTRIB *attr;
+	struct rb_node *node;
+	CLST mft_min, mft_new, svcn, evcn, plen;
+	struct mft_inode *mi, *mi_min, *mi_new;
+	struct ntfs_sb_info *sbi = ni->mi.sbi;
+
+	/* Find the nearest MFT. */
+	mft_min = 0;
+	mft_new = 0;
+	mi_min = NULL;
+
+	for (node = rb_first(&ni->mi_tree); node; node = rb_next(node)) {
+		mi = rb_entry(node, struct mft_inode, node);
+
+		attr = mi_enum_attr(mi, NULL);
+
+		if (!attr) {
+			mft_min = mi->rno;
+			mi_min = mi;
+			break;
+		}
+	}
+
+	if (ntfs_look_free_mft(sbi, &mft_new, true, ni, &mi_new)) {
+		mft_new = 0;
+		/* Really this is not critical. */
+	} else if (mft_min > mft_new) {
+		mft_min = mft_new;
+		mi_min = mi_new;
+	} else {
+		ntfs_mark_rec_free(sbi, mft_new);
+		mft_new = 0;
+		ni_remove_mi(ni, mi_new);
+	}
+
+	attr = mi_find_attr(&ni->mi, NULL, ATTR_DATA, NULL, 0, NULL);
+	if (!attr) {
+		err = -EINVAL;
+		goto out;
+	}
+
+	asize = le32_to_cpu(attr->size);
+
+	evcn = le64_to_cpu(attr->nres.evcn);
+	svcn = bytes_to_cluster(sbi, (u64)(mft_min + 1) << sbi->record_bits);
+	if (evcn + 1 >= svcn) {
+		err = -EINVAL;
+		goto out;
+	}
+
+	/*
+	 * Split primary attribute [0 evcn] in two parts [0 svcn) + [svcn evcn].
+	 *
+	 * Update first part of ATTR_DATA in 'primary MFT.
+	 */
+	err = run_pack(run, 0, svcn, Add2Ptr(attr, SIZEOF_NONRESIDENT),
+		       asize - SIZEOF_NONRESIDENT, &plen);
+	if (err < 0)
+		goto out;
+
+	run_size = ALIGN(err, 8);
+	err = 0;
+
+	if (plen < svcn) {
+		err = -EINVAL;
+		goto out;
+	}
+
+	attr->nres.evcn = cpu_to_le64(svcn - 1);
+	attr->size = cpu_to_le32(run_size + SIZEOF_NONRESIDENT);
+	/* 'done' - How many bytes of primary MFT becomes free. */
+	done = asize - run_size - SIZEOF_NONRESIDENT;
+	le32_sub_cpu(&ni->mi.mrec->used, done);
+
+	/* Estimate the size of second part: run_buf=NULL. */
+	err = run_pack(run, svcn, evcn + 1 - svcn, NULL, sbi->record_size,
+		       &plen);
+	if (err < 0)
+		goto out;
+
+	run_size = ALIGN(err, 8);
+	err = 0;
+
+	if (plen < evcn + 1 - svcn) {
+		err = -EINVAL;
+		goto out;
+	}
+
+	/*
+	 * This function may implicitly call expand attr_list.
+	 * Insert second part of ATTR_DATA in 'mi_min'.
+	 */
+	attr = ni_ins_new_attr(ni, mi_min, NULL, ATTR_DATA, NULL, 0,
+			       SIZEOF_NONRESIDENT + run_size,
+			       SIZEOF_NONRESIDENT, svcn, NULL);
+	if (!attr) {
+		err = -EINVAL;
+		goto out;
+	}
+
+	attr->non_res = 1;
+	attr->name_off = SIZEOF_NONRESIDENT_LE;
+	attr->flags = 0;
+
+	run_pack(run, svcn, evcn + 1 - svcn, Add2Ptr(attr, SIZEOF_NONRESIDENT),
+		 run_size, &plen);
+
+	attr->nres.svcn = cpu_to_le64(svcn);
+	attr->nres.evcn = cpu_to_le64(evcn);
+	attr->nres.run_off = cpu_to_le16(SIZEOF_NONRESIDENT);
+
+out:
+	if (mft_new) {
+		ntfs_mark_rec_free(sbi, mft_new);
+		ni_remove_mi(ni, mi_new);
+	}
+
+	return !err && !done ? -EOPNOTSUPP : err;
+}
+
+/*
+ * ni_expand_list - Move all possible attributes out of primary record.
+ */
+int ni_expand_list(struct ntfs_inode *ni)
+{
+	int err = 0;
+	u32 asize, done = 0;
+	struct ATTRIB *attr, *ins_attr;
+	struct ATTR_LIST_ENTRY *le;
+	bool is_mft = ni->mi.rno == MFT_REC_MFT;
+	struct MFT_REF ref;
+
+	mi_get_ref(&ni->mi, &ref);
+	le = NULL;
+
+	while ((le = al_enumerate(ni, le))) {
+		if (le->type == ATTR_STD)
+			continue;
+
+		if (memcmp(&ref, &le->ref, sizeof(struct MFT_REF)))
+			continue;
+
+		if (is_mft && le->type == ATTR_DATA)
+			continue;
+
+		/* Find attribute in primary record. */
+		attr = rec_find_attr_le(&ni->mi, le);
+		if (!attr) {
+			err = -EINVAL;
+			goto out;
+		}
+
+		asize = le32_to_cpu(attr->size);
+
+		/* Always insert into new record to avoid collisions (deep recursive). */
+		err = ni_ins_attr_ext(ni, le, attr->type, attr_name(attr),
+				      attr->name_len, asize, attr_svcn(attr),
+				      le16_to_cpu(attr->name_off), true,
+				      &ins_attr, NULL, NULL);
+
+		if (err)
+			goto out;
+
+		memcpy(ins_attr, attr, asize);
+		ins_attr->id = le->id;
+		/* Remove from primary record. */
+		mi_remove_attr(NULL, &ni->mi, attr);
+
+		done += asize;
+		goto out;
+	}
+
+	if (!is_mft) {
+		err = -EFBIG; /* Attr list is too big(?) */
+		goto out;
+	}
+
+	/* Split MFT data as much as possible. */
+	err = ni_expand_mft_list(ni);
+	if (err)
+		goto out;
+
+out:
+	return !err && !done ? -EOPNOTSUPP : err;
+}
+
+/*
+ * ni_insert_nonresident - Insert new nonresident attribute.
+ */
+int ni_insert_nonresident(struct ntfs_inode *ni, enum ATTR_TYPE type,
+			  const __le16 *name, u8 name_len,
+			  const struct runs_tree *run, CLST svcn, CLST len,
+			  __le16 flags, struct ATTRIB **new_attr,
+			  struct mft_inode **mi)
+{
+	int err;
+	CLST plen;
+	struct ATTRIB *attr;
+	bool is_ext =
+		(flags & (ATTR_FLAG_SPARSED | ATTR_FLAG_COMPRESSED)) && !svcn;
+	u32 name_size = ALIGN(name_len * sizeof(short), 8);
+	u32 name_off = is_ext ? SIZEOF_NONRESIDENT_EX : SIZEOF_NONRESIDENT;
+	u32 run_off = name_off + name_size;
+	u32 run_size, asize;
+	struct ntfs_sb_info *sbi = ni->mi.sbi;
+
+	err = run_pack(run, svcn, len, NULL, sbi->max_bytes_per_attr - run_off,
+		       &plen);
+	if (err < 0)
+		goto out;
+
+	run_size = ALIGN(err, 8);
+
+	if (plen < len) {
+		err = -EINVAL;
+		goto out;
+	}
+
+	asize = run_off + run_size;
+
+	if (asize > sbi->max_bytes_per_attr) {
+		err = -EINVAL;
+		goto out;
+	}
+
+	err = ni_insert_attr(ni, type, name, name_len, asize, name_off, svcn,
+			     &attr, mi, NULL);
+
+	if (err)
+		goto out;
+
+	attr->non_res = 1;
+	attr->name_off = cpu_to_le16(name_off);
+	attr->flags = flags;
+
+	run_pack(run, svcn, len, Add2Ptr(attr, run_off), run_size, &plen);
+
+	attr->nres.svcn = cpu_to_le64(svcn);
+	attr->nres.evcn = cpu_to_le64((u64)svcn + len - 1);
+
+	err = 0;
+	if (new_attr)
+		*new_attr = attr;
+
+	*(__le64 *)&attr->nres.run_off = cpu_to_le64(run_off);
+
+	attr->nres.alloc_size =
+		svcn ? 0 : cpu_to_le64((u64)len << ni->mi.sbi->cluster_bits);
+	attr->nres.data_size = attr->nres.alloc_size;
+	attr->nres.valid_size = attr->nres.alloc_size;
+
+	if (is_ext) {
+		if (flags & ATTR_FLAG_COMPRESSED)
+			attr->nres.c_unit = COMPRESSION_UNIT;
+		attr->nres.total_size = attr->nres.alloc_size;
+	}
+
+out:
+	return err;
+}
+
+/*
+ * ni_insert_resident - Inserts new resident attribute.
+ */
+int ni_insert_resident(struct ntfs_inode *ni, u32 data_size,
+		       enum ATTR_TYPE type, const __le16 *name, u8 name_len,
+		       struct ATTRIB **new_attr, struct mft_inode **mi,
+		       struct ATTR_LIST_ENTRY **le)
+{
+	int err;
+	u32 name_size = ALIGN(name_len * sizeof(short), 8);
+	u32 asize = SIZEOF_RESIDENT + name_size + ALIGN(data_size, 8);
+	struct ATTRIB *attr;
+
+	err = ni_insert_attr(ni, type, name, name_len, asize, SIZEOF_RESIDENT,
+			     0, &attr, mi, le);
+	if (err)
+		return err;
+
+	attr->non_res = 0;
+	attr->flags = 0;
+
+	attr->res.data_size = cpu_to_le32(data_size);
+	attr->res.data_off = cpu_to_le16(SIZEOF_RESIDENT + name_size);
+	if (type == ATTR_NAME) {
+		attr->res.flags = RESIDENT_FLAG_INDEXED;
+
+		/* is_attr_indexed(attr)) == true */
+		le16_add_cpu(&ni->mi.mrec->hard_links, +1);
+		ni->mi.dirty = true;
+	}
+	attr->res.res = 0;
+
+	if (new_attr)
+		*new_attr = attr;
+
+	return 0;
+}
+
+/*
+ * ni_remove_attr_le - Remove attribute from record.
+ */
+void ni_remove_attr_le(struct ntfs_inode *ni, struct ATTRIB *attr,
+		       struct mft_inode *mi, struct ATTR_LIST_ENTRY *le)
+{
+	mi_remove_attr(ni, mi, attr);
+
+	if (le)
+		al_remove_le(ni, le);
+}
+
+/*
+ * ni_delete_all - Remove all attributes and frees allocates space.
+ *
+ * ntfs_evict_inode->ntfs_clear_inode->ni_delete_all (if no links).
+ */
+int ni_delete_all(struct ntfs_inode *ni)
+{
+	int err;
+	struct ATTR_LIST_ENTRY *le = NULL;
+	struct ATTRIB *attr = NULL;
+	struct rb_node *node;
+	u16 roff;
+	u32 asize;
+	CLST svcn, evcn;
+	struct ntfs_sb_info *sbi = ni->mi.sbi;
+	bool nt3 = is_ntfs3(sbi);
+	struct MFT_REF ref;
+
+	while ((attr = ni_enum_attr_ex(ni, attr, &le, NULL))) {
+		if (!nt3 || attr->name_len) {
+			;
+		} else if (attr->type == ATTR_REPARSE) {
+			mi_get_ref(&ni->mi, &ref);
+			ntfs_remove_reparse(sbi, 0, &ref);
+		} else if (attr->type == ATTR_ID && !attr->non_res &&
+			   le32_to_cpu(attr->res.data_size) >=
+				   sizeof(struct GUID)) {
+			ntfs_objid_remove(sbi, resident_data(attr));
+		}
+
+		if (!attr->non_res)
+			continue;
+
+		svcn = le64_to_cpu(attr->nres.svcn);
+		evcn = le64_to_cpu(attr->nres.evcn);
+
+		if (evcn + 1 <= svcn)
+			continue;
+
+		asize = le32_to_cpu(attr->size);
+		roff = le16_to_cpu(attr->nres.run_off);
+
+		/* run==1 means unpack and deallocate. */
+		run_unpack_ex(RUN_DEALLOCATE, sbi, ni->mi.rno, svcn, evcn, svcn,
+			      Add2Ptr(attr, roff), asize - roff);
+	}
+
+	if (ni->attr_list.size) {
+		run_deallocate(ni->mi.sbi, &ni->attr_list.run, true);
+		al_destroy(ni);
+	}
+
+	/* Free all subrecords. */
+	for (node = rb_first(&ni->mi_tree); node;) {
+		struct rb_node *next = rb_next(node);
+		struct mft_inode *mi = rb_entry(node, struct mft_inode, node);
+
+		clear_rec_inuse(mi->mrec);
+		mi->dirty = true;
+		mi_write(mi, 0);
+
+		ntfs_mark_rec_free(sbi, mi->rno);
+		ni_remove_mi(ni, mi);
+		mi_put(mi);
+		node = next;
+	}
+
+	/* Free base record. */
+	clear_rec_inuse(ni->mi.mrec);
+	ni->mi.dirty = true;
+	err = mi_write(&ni->mi, 0);
+
+	ntfs_mark_rec_free(sbi, ni->mi.rno);
+
+	return err;
+}
+
+/* ni_fname_name
+ *
+ * Return: File name attribute by its value.
+ */
+struct ATTR_FILE_NAME *ni_fname_name(struct ntfs_inode *ni,
+				     const struct cpu_str *uni,
+				     const struct MFT_REF *home_dir,
+				     struct mft_inode **mi,
+				     struct ATTR_LIST_ENTRY **le)
+{
+	struct ATTRIB *attr = NULL;
+	struct ATTR_FILE_NAME *fname;
+
+	*le = NULL;
+
+	/* Enumerate all names. */
+next:
+	attr = ni_find_attr(ni, attr, le, ATTR_NAME, NULL, 0, NULL, mi);
+	if (!attr)
+		return NULL;
+
+	fname = resident_data_ex(attr, SIZEOF_ATTRIBUTE_FILENAME);
+	if (!fname)
+		goto next;
+
+	if (home_dir && memcmp(home_dir, &fname->home, sizeof(*home_dir)))
+		goto next;
+
+	if (!uni)
+		goto next;
+
+	if (uni->len != fname->name_len)
+		goto next;
+
+	if (ntfs_cmp_names_cpu(uni, (struct le_str *)&fname->name_len, NULL,
+			       false))
+		goto next;
+
+	return fname;
+}
+
+/*
+ * ni_fname_type
+ *
+ * Return: File name attribute with given type.
+ */
+struct ATTR_FILE_NAME *ni_fname_type(struct ntfs_inode *ni, u8 name_type,
+				     struct mft_inode **mi,
+				     struct ATTR_LIST_ENTRY **le)
+{
+	struct ATTRIB *attr = NULL;
+	struct ATTR_FILE_NAME *fname;
+
+	*le = NULL;
+
+	if (FILE_NAME_POSIX == name_type)
+		return NULL;
+
+	/* Enumerate all names. */
+	for (;;) {
+		attr = ni_find_attr(ni, attr, le, ATTR_NAME, NULL, 0, NULL, mi);
+		if (!attr)
+			return NULL;
+
+		fname = resident_data_ex(attr, SIZEOF_ATTRIBUTE_FILENAME);
+		if (fname && name_type == fname->type)
+			return fname;
+	}
+}
+
+/*
+ * ni_new_attr_flags
+ *
+ * Process compressed/sparsed in special way.
+ * NOTE: You need to set ni->std_fa = new_fa
+ * after this function to keep internal structures in consistency.
+ */
+int ni_new_attr_flags(struct ntfs_inode *ni, enum FILE_ATTRIBUTE new_fa)
+{
+	struct ATTRIB *attr;
+	struct mft_inode *mi;
+	__le16 new_aflags;
+	u32 new_asize;
+
+	attr = ni_find_attr(ni, NULL, NULL, ATTR_DATA, NULL, 0, NULL, &mi);
+	if (!attr)
+		return -EINVAL;
+
+	new_aflags = attr->flags;
+
+	if (new_fa & FILE_ATTRIBUTE_SPARSE_FILE)
+		new_aflags |= ATTR_FLAG_SPARSED;
+	else
+		new_aflags &= ~ATTR_FLAG_SPARSED;
+
+	if (new_fa & FILE_ATTRIBUTE_COMPRESSED)
+		new_aflags |= ATTR_FLAG_COMPRESSED;
+	else
+		new_aflags &= ~ATTR_FLAG_COMPRESSED;
+
+	if (new_aflags == attr->flags)
+		return 0;
+
+	if ((new_aflags & (ATTR_FLAG_COMPRESSED | ATTR_FLAG_SPARSED)) ==
+	    (ATTR_FLAG_COMPRESSED | ATTR_FLAG_SPARSED)) {
+		ntfs_inode_warn(&ni->vfs_inode,
+				"file can't be sparsed and compressed");
+		return -EOPNOTSUPP;
+	}
+
+	if (!attr->non_res)
+		goto out;
+
+	if (attr->nres.data_size) {
+		ntfs_inode_warn(
+			&ni->vfs_inode,
+			"one can change sparsed/compressed only for empty files");
+		return -EOPNOTSUPP;
+	}
+
+	/* Resize nonresident empty attribute in-place only. */
+	new_asize = (new_aflags & (ATTR_FLAG_COMPRESSED | ATTR_FLAG_SPARSED))
+			    ? (SIZEOF_NONRESIDENT_EX + 8)
+			    : (SIZEOF_NONRESIDENT + 8);
+
+	if (!mi_resize_attr(mi, attr, new_asize - le32_to_cpu(attr->size)))
+		return -EOPNOTSUPP;
+
+	if (new_aflags & ATTR_FLAG_SPARSED) {
+		attr->name_off = SIZEOF_NONRESIDENT_EX_LE;
+		/* Windows uses 16 clusters per frame but supports one cluster per frame too. */
+		attr->nres.c_unit = 0;
+		ni->vfs_inode.i_mapping->a_ops = &ntfs_aops;
+	} else if (new_aflags & ATTR_FLAG_COMPRESSED) {
+		attr->name_off = SIZEOF_NONRESIDENT_EX_LE;
+		/* The only allowed: 16 clusters per frame. */
+		attr->nres.c_unit = NTFS_LZNT_CUNIT;
+		ni->vfs_inode.i_mapping->a_ops = &ntfs_aops_cmpr;
+	} else {
+		attr->name_off = SIZEOF_NONRESIDENT_LE;
+		/* Normal files. */
+		attr->nres.c_unit = 0;
+		ni->vfs_inode.i_mapping->a_ops = &ntfs_aops;
+	}
+	attr->nres.run_off = attr->name_off;
+out:
+	attr->flags = new_aflags;
+	mi->dirty = true;
+
+	return 0;
+}
+
+/*
+ * ni_parse_reparse
+ *
+ * Buffer is at least 24 bytes.
+ */
+enum REPARSE_SIGN ni_parse_reparse(struct ntfs_inode *ni, struct ATTRIB *attr,
+				   void *buffer)
+{
+	const struct REPARSE_DATA_BUFFER *rp = NULL;
+	u8 bits;
+	u16 len;
+	typeof(rp->CompressReparseBuffer) *cmpr;
+
+	static_assert(sizeof(struct REPARSE_DATA_BUFFER) <= 24);
+
+	/* Try to estimate reparse point. */
+	if (!attr->non_res) {
+		rp = resident_data_ex(attr, sizeof(struct REPARSE_DATA_BUFFER));
+	} else if (le64_to_cpu(attr->nres.data_size) >=
+		   sizeof(struct REPARSE_DATA_BUFFER)) {
+		struct runs_tree run;
+
+		run_init(&run);
+
+		if (!attr_load_runs_vcn(ni, ATTR_REPARSE, NULL, 0, &run, 0) &&
+		    !ntfs_read_run_nb(ni->mi.sbi, &run, 0, buffer,
+				      sizeof(struct REPARSE_DATA_BUFFER),
+				      NULL)) {
+			rp = buffer;
+		}
+
+		run_close(&run);
+	}
+
+	if (!rp)
+		return REPARSE_NONE;
+
+	len = le16_to_cpu(rp->ReparseDataLength);
+	switch (rp->ReparseTag) {
+	case (IO_REPARSE_TAG_MICROSOFT | IO_REPARSE_TAG_SYMBOLIC_LINK):
+		break; /* Symbolic link. */
+	case IO_REPARSE_TAG_MOUNT_POINT:
+		break; /* Mount points and junctions. */
+	case IO_REPARSE_TAG_SYMLINK:
+		break;
+	case IO_REPARSE_TAG_COMPRESS:
+		/*
+		 * WOF - Windows Overlay Filter - Used to compress files with
+		 * LZX/Xpress.
+		 *
+		 * Unlike native NTFS file compression, the Windows
+		 * Overlay Filter supports only read operations. This means
+		 * that it doesn't need to sector-align each compressed chunk,
+		 * so the compressed data can be packed more tightly together.
+		 * If you open the file for writing, the WOF just decompresses
+		 * the entire file, turning it back into a plain file.
+		 *
+		 * Ntfs3 driver decompresses the entire file only on write or
+		 * change size requests.
+		 */
+
+		cmpr = &rp->CompressReparseBuffer;
+		if (len < sizeof(*cmpr) ||
+		    cmpr->WofVersion != WOF_CURRENT_VERSION ||
+		    cmpr->WofProvider != WOF_PROVIDER_SYSTEM ||
+		    cmpr->ProviderVer != WOF_PROVIDER_CURRENT_VERSION) {
+			return REPARSE_NONE;
+		}
+
+		switch (cmpr->CompressionFormat) {
+		case WOF_COMPRESSION_XPRESS4K:
+			bits = 0xc; // 4k
+			break;
+		case WOF_COMPRESSION_XPRESS8K:
+			bits = 0xd; // 8k
+			break;
+		case WOF_COMPRESSION_XPRESS16K:
+			bits = 0xe; // 16k
+			break;
+		case WOF_COMPRESSION_LZX32K:
+			bits = 0xf; // 32k
+			break;
+		default:
+			bits = 0x10; // 64k
+			break;
+		}
+		ni_set_ext_compress_bits(ni, bits);
+		return REPARSE_COMPRESSED;
+
+	case IO_REPARSE_TAG_DEDUP:
+		ni->ni_flags |= NI_FLAG_DEDUPLICATED;
+		return REPARSE_DEDUPLICATED;
+
+	default:
+		if (rp->ReparseTag & IO_REPARSE_TAG_NAME_SURROGATE)
+			break;
+
+		return REPARSE_NONE;
+	}
+
+	/* Looks like normal symlink. */
+	return REPARSE_LINK;
+}
+
+/*
+ * ni_fiemap - Helper for file_fiemap().
+ *
+ * Assumed ni_lock.
+ * TODO: Less aggressive locks.
+ */
+int ni_fiemap(struct ntfs_inode *ni, struct fiemap_extent_info *fieinfo,
+	      __u64 vbo, __u64 len)
+{
+	int err = 0;
+	struct ntfs_sb_info *sbi = ni->mi.sbi;
+	u8 cluster_bits = sbi->cluster_bits;
+	struct runs_tree *run;
+	struct rw_semaphore *run_lock;
+	struct ATTRIB *attr;
+	CLST vcn = vbo >> cluster_bits;
+	CLST lcn, clen;
+	u64 valid = ni->i_valid;
+	u64 lbo, bytes;
+	u64 end, alloc_size;
+	size_t idx = -1;
+	u32 flags;
+	bool ok;
+
+	if (S_ISDIR(ni->vfs_inode.i_mode)) {
+		run = &ni->dir.alloc_run;
+		attr = ni_find_attr(ni, NULL, NULL, ATTR_ALLOC, I30_NAME,
+				    ARRAY_SIZE(I30_NAME), NULL, NULL);
+		run_lock = &ni->dir.run_lock;
+	} else {
+		run = &ni->file.run;
+		attr = ni_find_attr(ni, NULL, NULL, ATTR_DATA, NULL, 0, NULL,
+				    NULL);
+		if (!attr) {
+			err = -EINVAL;
+			goto out;
+		}
+		if (is_attr_compressed(attr)) {
+			/* Unfortunately cp -r incorrectly treats compressed clusters. */
+			err = -EOPNOTSUPP;
+			ntfs_inode_warn(
+				&ni->vfs_inode,
+				"fiemap is not supported for compressed file (cp -r)");
+			goto out;
+		}
+		run_lock = &ni->file.run_lock;
+	}
+
+	if (!attr || !attr->non_res) {
+		err = fiemap_fill_next_extent(
+			fieinfo, 0, 0,
+			attr ? le32_to_cpu(attr->res.data_size) : 0,
+			FIEMAP_EXTENT_DATA_INLINE | FIEMAP_EXTENT_LAST |
+				FIEMAP_EXTENT_MERGED);
+		goto out;
+	}
+
+	end = vbo + len;
+	alloc_size = le64_to_cpu(attr->nres.alloc_size);
+	if (end > alloc_size)
+		end = alloc_size;
+
+	down_read(run_lock);
+
+	while (vbo < end) {
+		if (idx == -1) {
+			ok = run_lookup_entry(run, vcn, &lcn, &clen, &idx);
+		} else {
+			CLST vcn_next = vcn;
+
+			ok = run_get_entry(run, ++idx, &vcn, &lcn, &clen) &&
+			     vcn == vcn_next;
+			if (!ok)
+				vcn = vcn_next;
+		}
+
+		if (!ok) {
+			up_read(run_lock);
+			down_write(run_lock);
+
+			err = attr_load_runs_vcn(ni, attr->type,
+						 attr_name(attr),
+						 attr->name_len, run, vcn);
+
+			up_write(run_lock);
+			down_read(run_lock);
+
+			if (err)
+				break;
+
+			ok = run_lookup_entry(run, vcn, &lcn, &clen, &idx);
+
+			if (!ok) {
+				err = -EINVAL;
+				break;
+			}
+		}
+
+		if (!clen) {
+			err = -EINVAL; // ?
+			break;
+		}
+
+		if (lcn == SPARSE_LCN) {
+			vcn += clen;
+			vbo = (u64)vcn << cluster_bits;
+			continue;
+		}
+
+		flags = FIEMAP_EXTENT_MERGED;
+		if (S_ISDIR(ni->vfs_inode.i_mode)) {
+			;
+		} else if (is_attr_compressed(attr)) {
+			CLST clst_data;
+
+			err = attr_is_frame_compressed(
+				ni, attr, vcn >> attr->nres.c_unit, &clst_data);
+			if (err)
+				break;
+			if (clst_data < NTFS_LZNT_CLUSTERS)
+				flags |= FIEMAP_EXTENT_ENCODED;
+		} else if (is_attr_encrypted(attr)) {
+			flags |= FIEMAP_EXTENT_DATA_ENCRYPTED;
+		}
+
+		vbo = (u64)vcn << cluster_bits;
+		bytes = (u64)clen << cluster_bits;
+		lbo = (u64)lcn << cluster_bits;
+
+		vcn += clen;
+
+		if (vbo + bytes >= end) {
+			bytes = end - vbo;
+			flags |= FIEMAP_EXTENT_LAST;
+		}
+
+		if (vbo + bytes <= valid) {
+			;
+		} else if (vbo >= valid) {
+			flags |= FIEMAP_EXTENT_UNWRITTEN;
+		} else {
+			/* vbo < valid && valid < vbo + bytes */
+			u64 dlen = valid - vbo;
+
+			err = fiemap_fill_next_extent(fieinfo, vbo, lbo, dlen,
+						      flags);
+			if (err < 0)
+				break;
+			if (err == 1) {
+				err = 0;
+				break;
+			}
+
+			vbo = valid;
+			bytes -= dlen;
+			if (!bytes)
+				continue;
+
+			lbo += dlen;
+			flags |= FIEMAP_EXTENT_UNWRITTEN;
+		}
+
+		err = fiemap_fill_next_extent(fieinfo, vbo, lbo, bytes, flags);
+		if (err < 0)
+			break;
+		if (err == 1) {
+			err = 0;
+			break;
+		}
+
+		vbo += bytes;
+	}
+
+	up_read(run_lock);
+
+out:
+	return err;
+}
+
+/*
+ * ni_readpage_cmpr
+ *
+ * When decompressing, we typically obtain more than one page per reference.
+ * We inject the additional pages into the page cache.
+ */
+int ni_readpage_cmpr(struct ntfs_inode *ni, struct page *page)
+{
+	int err;
+	struct ntfs_sb_info *sbi = ni->mi.sbi;
+	struct address_space *mapping = page->mapping;
+	pgoff_t index = page->index;
+	u64 frame_vbo, vbo = (u64)index << PAGE_SHIFT;
+	struct page **pages = NULL; /* Array of at most 16 pages. stack? */
+	u8 frame_bits;
+	CLST frame;
+	u32 i, idx, frame_size, pages_per_frame;
+	gfp_t gfp_mask;
+	struct page *pg;
+
+	if (vbo >= ni->vfs_inode.i_size) {
+		SetPageUptodate(page);
+		err = 0;
+		goto out;
+	}
+
+	if (ni->ni_flags & NI_FLAG_COMPRESSED_MASK) {
+		/* Xpress or LZX. */
+		frame_bits = ni_ext_compress_bits(ni);
+	} else {
+		/* LZNT compression. */
+		frame_bits = NTFS_LZNT_CUNIT + sbi->cluster_bits;
+	}
+	frame_size = 1u << frame_bits;
+	frame = vbo >> frame_bits;
+	frame_vbo = (u64)frame << frame_bits;
+	idx = (vbo - frame_vbo) >> PAGE_SHIFT;
+
+	pages_per_frame = frame_size >> PAGE_SHIFT;
+	pages = kcalloc(pages_per_frame, sizeof(struct page *), GFP_NOFS);
+	if (!pages) {
+		err = -ENOMEM;
+		goto out;
+	}
+
+	pages[idx] = page;
+	index = frame_vbo >> PAGE_SHIFT;
+	gfp_mask = mapping_gfp_mask(mapping);
+
+	for (i = 0; i < pages_per_frame; i++, index++) {
+		if (i == idx)
+			continue;
+
+		pg = find_or_create_page(mapping, index, gfp_mask);
+		if (!pg) {
+			err = -ENOMEM;
+			goto out1;
+		}
+		pages[i] = pg;
+	}
+
+	err = ni_read_frame(ni, frame_vbo, pages, pages_per_frame);
+
+out1:
+	if (err)
+		SetPageError(page);
+
+	for (i = 0; i < pages_per_frame; i++) {
+		pg = pages[i];
+		if (i == idx)
+			continue;
+		unlock_page(pg);
+		put_page(pg);
+	}
+
+out:
+	/* At this point, err contains 0 or -EIO depending on the "critical" page. */
+	kfree(pages);
+	unlock_page(page);
+
+	return err;
+}
+
+#ifdef CONFIG_NTFS3_LZX_XPRESS
+/*
+ * ni_decompress_file - Decompress LZX/Xpress compressed file.
+ *
+ * Remove ATTR_DATA::WofCompressedData.
+ * Remove ATTR_REPARSE.
+ */
+int ni_decompress_file(struct ntfs_inode *ni)
+{
+	struct ntfs_sb_info *sbi = ni->mi.sbi;
+	struct inode *inode = &ni->vfs_inode;
+	loff_t i_size = inode->i_size;
+	struct address_space *mapping = inode->i_mapping;
+	gfp_t gfp_mask = mapping_gfp_mask(mapping);
+	struct page **pages = NULL;
+	struct ATTR_LIST_ENTRY *le;
+	struct ATTRIB *attr;
+	CLST vcn, cend, lcn, clen, end;
+	pgoff_t index;
+	u64 vbo;
+	u8 frame_bits;
+	u32 i, frame_size, pages_per_frame, bytes;
+	struct mft_inode *mi;
+	int err;
+
+	/* Clusters for decompressed data. */
+	cend = bytes_to_cluster(sbi, i_size);
+
+	if (!i_size)
+		goto remove_wof;
+
+	/* Check in advance. */
+	if (cend > wnd_zeroes(&sbi->used.bitmap)) {
+		err = -ENOSPC;
+		goto out;
+	}
+
+	frame_bits = ni_ext_compress_bits(ni);
+	frame_size = 1u << frame_bits;
+	pages_per_frame = frame_size >> PAGE_SHIFT;
+	pages = kcalloc(pages_per_frame, sizeof(struct page *), GFP_NOFS);
+	if (!pages) {
+		err = -ENOMEM;
+		goto out;
+	}
+
+	/*
+	 * Step 1: Decompress data and copy to new allocated clusters.
+	 */
+	index = 0;
+	for (vbo = 0; vbo < i_size; vbo += bytes) {
+		u32 nr_pages;
+		bool new;
+
+		if (vbo + frame_size > i_size) {
+			bytes = i_size - vbo;
+			nr_pages = (bytes + PAGE_SIZE - 1) >> PAGE_SHIFT;
+		} else {
+			nr_pages = pages_per_frame;
+			bytes = frame_size;
+		}
+
+		end = bytes_to_cluster(sbi, vbo + bytes);
+
+		for (vcn = vbo >> sbi->cluster_bits; vcn < end; vcn += clen) {
+			err = attr_data_get_block(ni, vcn, cend - vcn, &lcn,
+						  &clen, &new);
+			if (err)
+				goto out;
+		}
+
+		for (i = 0; i < pages_per_frame; i++, index++) {
+			struct page *pg;
+
+			pg = find_or_create_page(mapping, index, gfp_mask);
+			if (!pg) {
+				while (i--) {
+					unlock_page(pages[i]);
+					put_page(pages[i]);
+				}
+				err = -ENOMEM;
+				goto out;
+			}
+			pages[i] = pg;
+		}
+
+		err = ni_read_frame(ni, vbo, pages, pages_per_frame);
+
+		if (!err) {
+			down_read(&ni->file.run_lock);
+			err = ntfs_bio_pages(sbi, &ni->file.run, pages,
+					     nr_pages, vbo, bytes,
+					     REQ_OP_WRITE);
+			up_read(&ni->file.run_lock);
+		}
+
+		for (i = 0; i < pages_per_frame; i++) {
+			unlock_page(pages[i]);
+			put_page(pages[i]);
+		}
+
+		if (err)
+			goto out;
+
+		cond_resched();
+	}
+
+remove_wof:
+	/*
+	 * Step 2: Deallocate attributes ATTR_DATA::WofCompressedData
+	 * and ATTR_REPARSE.
+	 */
+	attr = NULL;
+	le = NULL;
+	while ((attr = ni_enum_attr_ex(ni, attr, &le, NULL))) {
+		CLST svcn, evcn;
+		u32 asize, roff;
+
+		if (attr->type == ATTR_REPARSE) {
+			struct MFT_REF ref;
+
+			mi_get_ref(&ni->mi, &ref);
+			ntfs_remove_reparse(sbi, 0, &ref);
+		}
+
+		if (!attr->non_res)
+			continue;
+
+		if (attr->type != ATTR_REPARSE &&
+		    (attr->type != ATTR_DATA ||
+		     attr->name_len != ARRAY_SIZE(WOF_NAME) ||
+		     memcmp(attr_name(attr), WOF_NAME, sizeof(WOF_NAME))))
+			continue;
+
+		svcn = le64_to_cpu(attr->nres.svcn);
+		evcn = le64_to_cpu(attr->nres.evcn);
+
+		if (evcn + 1 <= svcn)
+			continue;
+
+		asize = le32_to_cpu(attr->size);
+		roff = le16_to_cpu(attr->nres.run_off);
+
+		/*run==1  Means unpack and deallocate. */
+		run_unpack_ex(RUN_DEALLOCATE, sbi, ni->mi.rno, svcn, evcn, svcn,
+			      Add2Ptr(attr, roff), asize - roff);
+	}
+
+	/*
+	 * Step 3: Remove attribute ATTR_DATA::WofCompressedData.
+	 */
+	err = ni_remove_attr(ni, ATTR_DATA, WOF_NAME, ARRAY_SIZE(WOF_NAME),
+			     false, NULL);
+	if (err)
+		goto out;
+
+	/*
+	 * Step 4: Remove ATTR_REPARSE.
+	 */
+	err = ni_remove_attr(ni, ATTR_REPARSE, NULL, 0, false, NULL);
+	if (err)
+		goto out;
+
+	/*
+	 * Step 5: Remove sparse flag from data attribute.
+	 */
+	attr = ni_find_attr(ni, NULL, NULL, ATTR_DATA, NULL, 0, NULL, &mi);
+	if (!attr) {
+		err = -EINVAL;
+		goto out;
+	}
+
+	if (attr->non_res && is_attr_sparsed(attr)) {
+		/* Sparsed attribute header is 8 bytes bigger than normal. */
+		struct MFT_REC *rec = mi->mrec;
+		u32 used = le32_to_cpu(rec->used);
+		u32 asize = le32_to_cpu(attr->size);
+		u16 roff = le16_to_cpu(attr->nres.run_off);
+		char *rbuf = Add2Ptr(attr, roff);
+
+		memmove(rbuf - 8, rbuf, used - PtrOffset(rec, rbuf));
+		attr->size = cpu_to_le32(asize - 8);
+		attr->flags &= ~ATTR_FLAG_SPARSED;
+		attr->nres.run_off = cpu_to_le16(roff - 8);
+		attr->nres.c_unit = 0;
+		rec->used = cpu_to_le32(used - 8);
+		mi->dirty = true;
+		ni->std_fa &= ~(FILE_ATTRIBUTE_SPARSE_FILE |
+				FILE_ATTRIBUTE_REPARSE_POINT);
+
+		mark_inode_dirty(inode);
+	}
+
+	/* Clear cached flag. */
+	ni->ni_flags &= ~NI_FLAG_COMPRESSED_MASK;
+	if (ni->file.offs_page) {
+		put_page(ni->file.offs_page);
+		ni->file.offs_page = NULL;
+	}
+	mapping->a_ops = &ntfs_aops;
+
+out:
+	kfree(pages);
+	if (err) {
+		make_bad_inode(inode);
+		ntfs_set_state(sbi, NTFS_DIRTY_ERROR);
+	}
+
+	return err;
+}
+
+/*
+ * decompress_lzx_xpress - External compression LZX/Xpress.
+ */
+static int decompress_lzx_xpress(struct ntfs_sb_info *sbi, const char *cmpr,
+				 size_t cmpr_size, void *unc, size_t unc_size,
+				 u32 frame_size)
+{
+	int err;
+	void *ctx;
+
+	if (cmpr_size == unc_size) {
+		/* Frame not compressed. */
+		memcpy(unc, cmpr, unc_size);
+		return 0;
+	}
+
+	err = 0;
+	if (frame_size == 0x8000) {
+		mutex_lock(&sbi->compress.mtx_lzx);
+		/* LZX: Frame compressed. */
+		ctx = sbi->compress.lzx;
+		if (!ctx) {
+			/* Lazy initialize LZX decompress context. */
+			ctx = lzx_allocate_decompressor();
+			if (!ctx) {
+				err = -ENOMEM;
+				goto out1;
+			}
+
+			sbi->compress.lzx = ctx;
+		}
+
+		if (lzx_decompress(ctx, cmpr, cmpr_size, unc, unc_size)) {
+			/* Treat all errors as "invalid argument". */
+			err = -EINVAL;
+		}
+out1:
+		mutex_unlock(&sbi->compress.mtx_lzx);
+	} else {
+		/* XPRESS: Frame compressed. */
+		mutex_lock(&sbi->compress.mtx_xpress);
+		ctx = sbi->compress.xpress;
+		if (!ctx) {
+			/* Lazy initialize Xpress decompress context. */
+			ctx = xpress_allocate_decompressor();
+			if (!ctx) {
+				err = -ENOMEM;
+				goto out2;
+			}
+
+			sbi->compress.xpress = ctx;
+		}
+
+		if (xpress_decompress(ctx, cmpr, cmpr_size, unc, unc_size)) {
+			/* Treat all errors as "invalid argument". */
+			err = -EINVAL;
+		}
+out2:
+		mutex_unlock(&sbi->compress.mtx_xpress);
+	}
+	return err;
+}
+#endif
+
+/*
+ * ni_read_frame
+ *
+ * Pages - Array of locked pages.
+ */
+int ni_read_frame(struct ntfs_inode *ni, u64 frame_vbo, struct page **pages,
+		  u32 pages_per_frame)
+{
+	int err;
+	struct ntfs_sb_info *sbi = ni->mi.sbi;
+	u8 cluster_bits = sbi->cluster_bits;
+	char *frame_ondisk = NULL;
+	char *frame_mem = NULL;
+	struct page **pages_disk = NULL;
+	struct ATTR_LIST_ENTRY *le = NULL;
+	struct runs_tree *run = &ni->file.run;
+	u64 valid_size = ni->i_valid;
+	u64 vbo_disk;
+	size_t unc_size;
+	u32 frame_size, i, npages_disk, ondisk_size;
+	struct page *pg;
+	struct ATTRIB *attr;
+	CLST frame, clst_data;
+
+	/*
+	 * To simplify decompress algorithm do vmap for source
+	 * and target pages.
+	 */
+	for (i = 0; i < pages_per_frame; i++)
+		kmap(pages[i]);
+
+	frame_size = pages_per_frame << PAGE_SHIFT;
+	frame_mem = vmap(pages, pages_per_frame, VM_MAP, PAGE_KERNEL);
+	if (!frame_mem) {
+		err = -ENOMEM;
+		goto out;
+	}
+
+	attr = ni_find_attr(ni, NULL, &le, ATTR_DATA, NULL, 0, NULL, NULL);
+	if (!attr) {
+		err = -ENOENT;
+		goto out1;
+	}
+
+	if (!attr->non_res) {
+		u32 data_size = le32_to_cpu(attr->res.data_size);
+
+		memset(frame_mem, 0, frame_size);
+		if (frame_vbo < data_size) {
+			ondisk_size = data_size - frame_vbo;
+			memcpy(frame_mem, resident_data(attr) + frame_vbo,
+			       min(ondisk_size, frame_size));
+		}
+		err = 0;
+		goto out1;
+	}
+
+	if (frame_vbo >= valid_size) {
+		memset(frame_mem, 0, frame_size);
+		err = 0;
+		goto out1;
+	}
+
+	if (ni->ni_flags & NI_FLAG_COMPRESSED_MASK) {
+#ifndef CONFIG_NTFS3_LZX_XPRESS
+		err = -EOPNOTSUPP;
+		goto out1;
+#else
+		u32 frame_bits = ni_ext_compress_bits(ni);
+		u64 frame64 = frame_vbo >> frame_bits;
+		u64 frames, vbo_data;
+
+		if (frame_size != (1u << frame_bits)) {
+			err = -EINVAL;
+			goto out1;
+		}
+		switch (frame_size) {
+		case 0x1000:
+		case 0x2000:
+		case 0x4000:
+		case 0x8000:
+			break;
+		default:
+			/* Unknown compression. */
+			err = -EOPNOTSUPP;
+			goto out1;
+		}
+
+		attr = ni_find_attr(ni, attr, &le, ATTR_DATA, WOF_NAME,
+				    ARRAY_SIZE(WOF_NAME), NULL, NULL);
+		if (!attr) {
+			ntfs_inode_err(
+				&ni->vfs_inode,
+				"external compressed file should contains data attribute \"WofCompressedData\"");
+			err = -EINVAL;
+			goto out1;
+		}
+
+		if (!attr->non_res) {
+			run = NULL;
+		} else {
+			run = run_alloc();
+			if (!run) {
+				err = -ENOMEM;
+				goto out1;
+			}
+		}
+
+		frames = (ni->vfs_inode.i_size - 1) >> frame_bits;
+
+		err = attr_wof_frame_info(ni, attr, run, frame64, frames,
+					  frame_bits, &ondisk_size, &vbo_data);
+		if (err)
+			goto out2;
+
+		if (frame64 == frames) {
+			unc_size = 1 + ((ni->vfs_inode.i_size - 1) &
+					(frame_size - 1));
+			ondisk_size = attr_size(attr) - vbo_data;
+		} else {
+			unc_size = frame_size;
+		}
+
+		if (ondisk_size > frame_size) {
+			err = -EINVAL;
+			goto out2;
+		}
+
+		if (!attr->non_res) {
+			if (vbo_data + ondisk_size >
+			    le32_to_cpu(attr->res.data_size)) {
+				err = -EINVAL;
+				goto out1;
+			}
+
+			err = decompress_lzx_xpress(
+				sbi, Add2Ptr(resident_data(attr), vbo_data),
+				ondisk_size, frame_mem, unc_size, frame_size);
+			goto out1;
+		}
+		vbo_disk = vbo_data;
+		/* Load all runs to read [vbo_disk-vbo_to). */
+		err = attr_load_runs_range(ni, ATTR_DATA, WOF_NAME,
+					   ARRAY_SIZE(WOF_NAME), run, vbo_disk,
+					   vbo_data + ondisk_size);
+		if (err)
+			goto out2;
+		npages_disk = (ondisk_size + (vbo_disk & (PAGE_SIZE - 1)) +
+			       PAGE_SIZE - 1) >>
+			      PAGE_SHIFT;
+#endif
+	} else if (is_attr_compressed(attr)) {
+		/* LZNT compression. */
+		if (sbi->cluster_size > NTFS_LZNT_MAX_CLUSTER) {
+			err = -EOPNOTSUPP;
+			goto out1;
+		}
+
+		if (attr->nres.c_unit != NTFS_LZNT_CUNIT) {
+			err = -EOPNOTSUPP;
+			goto out1;
+		}
+
+		down_write(&ni->file.run_lock);
+		run_truncate_around(run, le64_to_cpu(attr->nres.svcn));
+		frame = frame_vbo >> (cluster_bits + NTFS_LZNT_CUNIT);
+		err = attr_is_frame_compressed(ni, attr, frame, &clst_data);
+		up_write(&ni->file.run_lock);
+		if (err)
+			goto out1;
+
+		if (!clst_data) {
+			memset(frame_mem, 0, frame_size);
+			goto out1;
+		}
+
+		frame_size = sbi->cluster_size << NTFS_LZNT_CUNIT;
+		ondisk_size = clst_data << cluster_bits;
+
+		if (clst_data >= NTFS_LZNT_CLUSTERS) {
+			/* Frame is not compressed. */
+			down_read(&ni->file.run_lock);
+			err = ntfs_bio_pages(sbi, run, pages, pages_per_frame,
+					     frame_vbo, ondisk_size,
+					     REQ_OP_READ);
+			up_read(&ni->file.run_lock);
+			goto out1;
+		}
+		vbo_disk = frame_vbo;
+		npages_disk = (ondisk_size + PAGE_SIZE - 1) >> PAGE_SHIFT;
+	} else {
+		__builtin_unreachable();
+		err = -EINVAL;
+		goto out1;
+	}
+
+	pages_disk = kzalloc(npages_disk * sizeof(struct page *), GFP_NOFS);
+	if (!pages_disk) {
+		err = -ENOMEM;
+		goto out2;
+	}
+
+	for (i = 0; i < npages_disk; i++) {
+		pg = alloc_page(GFP_KERNEL);
+		if (!pg) {
+			err = -ENOMEM;
+			goto out3;
+		}
+		pages_disk[i] = pg;
+		lock_page(pg);
+		kmap(pg);
+	}
+
+	/* Read 'ondisk_size' bytes from disk. */
+	down_read(&ni->file.run_lock);
+	err = ntfs_bio_pages(sbi, run, pages_disk, npages_disk, vbo_disk,
+			     ondisk_size, REQ_OP_READ);
+	up_read(&ni->file.run_lock);
+	if (err)
+		goto out3;
+
+	/*
+	 * To simplify decompress algorithm do vmap for source and target pages.
+	 */
+	frame_ondisk = vmap(pages_disk, npages_disk, VM_MAP, PAGE_KERNEL_RO);
+	if (!frame_ondisk) {
+		err = -ENOMEM;
+		goto out3;
+	}
+
+	/* Decompress: Frame_ondisk -> frame_mem. */
+#ifdef CONFIG_NTFS3_LZX_XPRESS
+	if (run != &ni->file.run) {
+		/* LZX or XPRESS */
+		err = decompress_lzx_xpress(
+			sbi, frame_ondisk + (vbo_disk & (PAGE_SIZE - 1)),
+			ondisk_size, frame_mem, unc_size, frame_size);
+	} else
+#endif
+	{
+		/* LZNT - Native NTFS compression. */
+		unc_size = decompress_lznt(frame_ondisk, ondisk_size, frame_mem,
+					   frame_size);
+		if ((ssize_t)unc_size < 0)
+			err = unc_size;
+		else if (!unc_size || unc_size > frame_size)
+			err = -EINVAL;
+	}
+	if (!err && valid_size < frame_vbo + frame_size) {
+		size_t ok = valid_size - frame_vbo;
+
+		memset(frame_mem + ok, 0, frame_size - ok);
+	}
+
+	vunmap(frame_ondisk);
+
+out3:
+	for (i = 0; i < npages_disk; i++) {
+		pg = pages_disk[i];
+		if (pg) {
+			kunmap(pg);
+			unlock_page(pg);
+			put_page(pg);
+		}
+	}
+	kfree(pages_disk);
+
+out2:
+#ifdef CONFIG_NTFS3_LZX_XPRESS
+	if (run != &ni->file.run)
+		run_free(run);
+#endif
+out1:
+	vunmap(frame_mem);
+out:
+	for (i = 0; i < pages_per_frame; i++) {
+		pg = pages[i];
+		kunmap(pg);
+		ClearPageError(pg);
+		SetPageUptodate(pg);
+	}
+
+	return err;
+}
+
+/*
+ * ni_write_frame
+ *
+ * Pages - Array of locked pages.
+ */
+int ni_write_frame(struct ntfs_inode *ni, struct page **pages,
+		   u32 pages_per_frame)
+{
+	int err;
+	struct ntfs_sb_info *sbi = ni->mi.sbi;
+	u8 frame_bits = NTFS_LZNT_CUNIT + sbi->cluster_bits;
+	u32 frame_size = sbi->cluster_size << NTFS_LZNT_CUNIT;
+	u64 frame_vbo = (u64)pages[0]->index << PAGE_SHIFT;
+	CLST frame = frame_vbo >> frame_bits;
+	char *frame_ondisk = NULL;
+	struct page **pages_disk = NULL;
+	struct ATTR_LIST_ENTRY *le = NULL;
+	char *frame_mem;
+	struct ATTRIB *attr;
+	struct mft_inode *mi;
+	u32 i;
+	struct page *pg;
+	size_t compr_size, ondisk_size;
+	struct lznt *lznt;
+
+	attr = ni_find_attr(ni, NULL, &le, ATTR_DATA, NULL, 0, NULL, &mi);
+	if (!attr) {
+		err = -ENOENT;
+		goto out;
+	}
+
+	if (WARN_ON(!is_attr_compressed(attr))) {
+		err = -EINVAL;
+		goto out;
+	}
+
+	if (sbi->cluster_size > NTFS_LZNT_MAX_CLUSTER) {
+		err = -EOPNOTSUPP;
+		goto out;
+	}
+
+	if (!attr->non_res) {
+		down_write(&ni->file.run_lock);
+		err = attr_make_nonresident(ni, attr, le, mi,
+					    le32_to_cpu(attr->res.data_size),
+					    &ni->file.run, &attr, pages[0]);
+		up_write(&ni->file.run_lock);
+		if (err)
+			goto out;
+	}
+
+	if (attr->nres.c_unit != NTFS_LZNT_CUNIT) {
+		err = -EOPNOTSUPP;
+		goto out;
+	}
+
+	pages_disk = kcalloc(pages_per_frame, sizeof(struct page *), GFP_NOFS);
+	if (!pages_disk) {
+		err = -ENOMEM;
+		goto out;
+	}
+
+	for (i = 0; i < pages_per_frame; i++) {
+		pg = alloc_page(GFP_KERNEL);
+		if (!pg) {
+			err = -ENOMEM;
+			goto out1;
+		}
+		pages_disk[i] = pg;
+		lock_page(pg);
+		kmap(pg);
+	}
+
+	/* To simplify compress algorithm do vmap for source and target pages. */
+	frame_ondisk = vmap(pages_disk, pages_per_frame, VM_MAP, PAGE_KERNEL);
+	if (!frame_ondisk) {
+		err = -ENOMEM;
+		goto out1;
+	}
+
+	for (i = 0; i < pages_per_frame; i++)
+		kmap(pages[i]);
+
+	/* Map in-memory frame for read-only. */
+	frame_mem = vmap(pages, pages_per_frame, VM_MAP, PAGE_KERNEL_RO);
+	if (!frame_mem) {
+		err = -ENOMEM;
+		goto out2;
+	}
+
+	mutex_lock(&sbi->compress.mtx_lznt);
+	lznt = NULL;
+	if (!sbi->compress.lznt) {
+		/*
+		 * LZNT implements two levels of compression:
+		 * 0 - Standard compression
+		 * 1 - Best compression, requires a lot of cpu
+		 * use mount option?
+		 */
+		lznt = get_lznt_ctx(0);
+		if (!lznt) {
+			mutex_unlock(&sbi->compress.mtx_lznt);
+			err = -ENOMEM;
+			goto out3;
+		}
+
+		sbi->compress.lznt = lznt;
+		lznt = NULL;
+	}
+
+	/* Compress: frame_mem -> frame_ondisk */
+	compr_size = compress_lznt(frame_mem, frame_size, frame_ondisk,
+				   frame_size, sbi->compress.lznt);
+	mutex_unlock(&sbi->compress.mtx_lznt);
+	kfree(lznt);
+
+	if (compr_size + sbi->cluster_size > frame_size) {
+		/* Frame is not compressed. */
+		compr_size = frame_size;
+		ondisk_size = frame_size;
+	} else if (compr_size) {
+		/* Frame is compressed. */
+		ondisk_size = ntfs_up_cluster(sbi, compr_size);
+		memset(frame_ondisk + compr_size, 0, ondisk_size - compr_size);
+	} else {
+		/* Frame is sparsed. */
+		ondisk_size = 0;
+	}
+
+	down_write(&ni->file.run_lock);
+	run_truncate_around(&ni->file.run, le64_to_cpu(attr->nres.svcn));
+	err = attr_allocate_frame(ni, frame, compr_size, ni->i_valid);
+	up_write(&ni->file.run_lock);
+	if (err)
+		goto out2;
+
+	if (!ondisk_size)
+		goto out2;
+
+	down_read(&ni->file.run_lock);
+	err = ntfs_bio_pages(sbi, &ni->file.run,
+			     ondisk_size < frame_size ? pages_disk : pages,
+			     pages_per_frame, frame_vbo, ondisk_size,
+			     REQ_OP_WRITE);
+	up_read(&ni->file.run_lock);
+
+out3:
+	vunmap(frame_mem);
+
+out2:
+	for (i = 0; i < pages_per_frame; i++)
+		kunmap(pages[i]);
+
+	vunmap(frame_ondisk);
+out1:
+	for (i = 0; i < pages_per_frame; i++) {
+		pg = pages_disk[i];
+		if (pg) {
+			kunmap(pg);
+			unlock_page(pg);
+			put_page(pg);
+		}
+	}
+	kfree(pages_disk);
+out:
+	return err;
+}
+
+/*
+ * ni_remove_name - Removes name 'de' from MFT and from directory.
+ * 'de2' and 'undo_step' are used to restore MFT/dir, if error occurs.
+ */
+int ni_remove_name(struct ntfs_inode *dir_ni, struct ntfs_inode *ni,
+		   struct NTFS_DE *de, struct NTFS_DE **de2, int *undo_step)
+{
+	int err;
+	struct ntfs_sb_info *sbi = ni->mi.sbi;
+	struct ATTR_FILE_NAME *de_name = (struct ATTR_FILE_NAME *)(de + 1);
+	struct ATTR_FILE_NAME *fname;
+	struct ATTR_LIST_ENTRY *le;
+	struct mft_inode *mi;
+	u16 de_key_size = le16_to_cpu(de->key_size);
+	u8 name_type;
+
+	*undo_step = 0;
+
+	/* Find name in record. */
+	mi_get_ref(&dir_ni->mi, &de_name->home);
+
+	fname = ni_fname_name(ni, (struct cpu_str *)&de_name->name_len,
+			      &de_name->home, &mi, &le);
+	if (!fname)
+		return -ENOENT;
+
+	memcpy(&de_name->dup, &fname->dup, sizeof(struct NTFS_DUP_INFO));
+	name_type = paired_name(fname->type);
+
+	/* Mark ntfs as dirty. It will be cleared at umount. */
+	ntfs_set_state(sbi, NTFS_DIRTY_DIRTY);
+
+	/* Step 1: Remove name from directory. */
+	err = indx_delete_entry(&dir_ni->dir, dir_ni, fname, de_key_size, sbi);
+	if (err)
+		return err;
+
+	/* Step 2: Remove name from MFT. */
+	ni_remove_attr_le(ni, attr_from_name(fname), mi, le);
+
+	*undo_step = 2;
+
+	/* Get paired name. */
+	fname = ni_fname_type(ni, name_type, &mi, &le);
+	if (fname) {
+		u16 de2_key_size = fname_full_size(fname);
+
+		*de2 = Add2Ptr(de, 1024);
+		(*de2)->key_size = cpu_to_le16(de2_key_size);
+
+		memcpy(*de2 + 1, fname, de2_key_size);
+
+		/* Step 3: Remove paired name from directory. */
+		err = indx_delete_entry(&dir_ni->dir, dir_ni, fname,
+					de2_key_size, sbi);
+		if (err)
+			return err;
+
+		/* Step 4: Remove paired name from MFT. */
+		ni_remove_attr_le(ni, attr_from_name(fname), mi, le);
+
+		*undo_step = 4;
+	}
+	return 0;
+}
+
+/*
+ * ni_remove_name_undo - Paired function for ni_remove_name.
+ *
+ * Return: True if ok
+ */
+bool ni_remove_name_undo(struct ntfs_inode *dir_ni, struct ntfs_inode *ni,
+			 struct NTFS_DE *de, struct NTFS_DE *de2, int undo_step)
+{
+	struct ntfs_sb_info *sbi = ni->mi.sbi;
+	struct ATTRIB *attr;
+	u16 de_key_size = de2 ? le16_to_cpu(de2->key_size) : 0;
+
+	switch (undo_step) {
+	case 4:
+		if (ni_insert_resident(ni, de_key_size, ATTR_NAME, NULL, 0,
+				       &attr, NULL, NULL)) {
+			return false;
+		}
+		memcpy(Add2Ptr(attr, SIZEOF_RESIDENT), de2 + 1, de_key_size);
+
+		mi_get_ref(&ni->mi, &de2->ref);
+		de2->size = cpu_to_le16(ALIGN(de_key_size, 8) +
+					sizeof(struct NTFS_DE));
+		de2->flags = 0;
+		de2->res = 0;
+
+		if (indx_insert_entry(&dir_ni->dir, dir_ni, de2, sbi, NULL,
+				      1)) {
+			return false;
+		}
+		fallthrough;
+
+	case 2:
+		de_key_size = le16_to_cpu(de->key_size);
+
+		if (ni_insert_resident(ni, de_key_size, ATTR_NAME, NULL, 0,
+				       &attr, NULL, NULL)) {
+			return false;
+		}
+
+		memcpy(Add2Ptr(attr, SIZEOF_RESIDENT), de + 1, de_key_size);
+		mi_get_ref(&ni->mi, &de->ref);
+
+		if (indx_insert_entry(&dir_ni->dir, dir_ni, de, sbi, NULL, 1)) {
+			return false;
+		}
+	}
+
+	return true;
+}
+
+/*
+ * ni_add_name - Add new name in MFT and in directory.
+ */
+int ni_add_name(struct ntfs_inode *dir_ni, struct ntfs_inode *ni,
+		struct NTFS_DE *de)
+{
+	int err;
+	struct ATTRIB *attr;
+	struct ATTR_LIST_ENTRY *le;
+	struct mft_inode *mi;
+	struct ATTR_FILE_NAME *de_name = (struct ATTR_FILE_NAME *)(de + 1);
+	u16 de_key_size = le16_to_cpu(de->key_size);
+
+	mi_get_ref(&ni->mi, &de->ref);
+	mi_get_ref(&dir_ni->mi, &de_name->home);
+
+	/* Insert new name in MFT. */
+	err = ni_insert_resident(ni, de_key_size, ATTR_NAME, NULL, 0, &attr,
+				 &mi, &le);
+	if (err)
+		return err;
+
+	memcpy(Add2Ptr(attr, SIZEOF_RESIDENT), de_name, de_key_size);
+
+	/* Insert new name in directory. */
+	err = indx_insert_entry(&dir_ni->dir, dir_ni, de, ni->mi.sbi, NULL, 0);
+	if (err)
+		ni_remove_attr_le(ni, attr, mi, le);
+
+	return err;
+}
+
+/*
+ * ni_rename - Remove one name and insert new name.
+ */
+int ni_rename(struct ntfs_inode *dir_ni, struct ntfs_inode *new_dir_ni,
+	      struct ntfs_inode *ni, struct NTFS_DE *de, struct NTFS_DE *new_de,
+	      bool *is_bad)
+{
+	int err;
+	struct NTFS_DE *de2 = NULL;
+	int undo = 0;
+
+	/*
+	 * There are two possible ways to rename:
+	 * 1) Add new name and remove old name.
+	 * 2) Remove old name and add new name.
+	 *
+	 * In most cases (not all!) adding new name in MFT and in directory can
+	 * allocate additional cluster(s).
+	 * Second way may result to bad inode if we can't add new name
+	 * and then can't restore (add) old name.
+	 */
+
+	/*
+	 * Way 1 - Add new + remove old.
+	 */
+	err = ni_add_name(new_dir_ni, ni, new_de);
+	if (!err) {
+		err = ni_remove_name(dir_ni, ni, de, &de2, &undo);
+		if (err && ni_remove_name(new_dir_ni, ni, new_de, &de2, &undo))
+			*is_bad = true;
+	}
+
+	/*
+	 * Way 2 - Remove old + add new.
+	 */
+	/*
+	 *	err = ni_remove_name(dir_ni, ni, de, &de2, &undo);
+	 *	if (!err) {
+	 *		err = ni_add_name(new_dir_ni, ni, new_de);
+	 *		if (err && !ni_remove_name_undo(dir_ni, ni, de, de2, undo))
+	 *			*is_bad = true;
+	 *	}
+	 */
+
+	return err;
+}
+
+/*
+ * ni_is_dirty - Return: True if 'ni' requires ni_write_inode.
+ */
+bool ni_is_dirty(struct inode *inode)
+{
+	struct ntfs_inode *ni = ntfs_i(inode);
+	struct rb_node *node;
+
+	if (ni->mi.dirty || ni->attr_list.dirty ||
+	    (ni->ni_flags & NI_FLAG_UPDATE_PARENT))
+		return true;
+
+	for (node = rb_first(&ni->mi_tree); node; node = rb_next(node)) {
+		if (rb_entry(node, struct mft_inode, node)->dirty)
+			return true;
+	}
+
+	return false;
+}
+
+/*
+ * ni_update_parent
+ *
+ * Update duplicate info of ATTR_FILE_NAME in MFT and in parent directories.
+ */
+static bool ni_update_parent(struct ntfs_inode *ni, struct NTFS_DUP_INFO *dup,
+			     int sync)
+{
+	struct ATTRIB *attr;
+	struct mft_inode *mi;
+	struct ATTR_LIST_ENTRY *le = NULL;
+	struct ntfs_sb_info *sbi = ni->mi.sbi;
+	struct super_block *sb = sbi->sb;
+	bool re_dirty = false;
+
+	if (ni->mi.mrec->flags & RECORD_FLAG_DIR) {
+		dup->fa |= FILE_ATTRIBUTE_DIRECTORY;
+		attr = NULL;
+		dup->alloc_size = 0;
+		dup->data_size = 0;
+	} else {
+		dup->fa &= ~FILE_ATTRIBUTE_DIRECTORY;
+
+		attr = ni_find_attr(ni, NULL, &le, ATTR_DATA, NULL, 0, NULL,
+				    &mi);
+		if (!attr) {
+			dup->alloc_size = dup->data_size = 0;
+		} else if (!attr->non_res) {
+			u32 data_size = le32_to_cpu(attr->res.data_size);
+
+			dup->alloc_size = cpu_to_le64(ALIGN(data_size, 8));
+			dup->data_size = cpu_to_le64(data_size);
+		} else {
+			u64 new_valid = ni->i_valid;
+			u64 data_size = le64_to_cpu(attr->nres.data_size);
+			__le64 valid_le;
+
+			dup->alloc_size = is_attr_ext(attr)
+						  ? attr->nres.total_size
+						  : attr->nres.alloc_size;
+			dup->data_size = attr->nres.data_size;
+
+			if (new_valid > data_size)
+				new_valid = data_size;
+
+			valid_le = cpu_to_le64(new_valid);
+			if (valid_le != attr->nres.valid_size) {
+				attr->nres.valid_size = valid_le;
+				mi->dirty = true;
+			}
+		}
+	}
+
+	/* TODO: Fill reparse info. */
+	dup->reparse = 0;
+	dup->ea_size = 0;
+
+	if (ni->ni_flags & NI_FLAG_EA) {
+		attr = ni_find_attr(ni, attr, &le, ATTR_EA_INFO, NULL, 0, NULL,
+				    NULL);
+		if (attr) {
+			const struct EA_INFO *info;
+
+			info = resident_data_ex(attr, sizeof(struct EA_INFO));
+			dup->ea_size = info->size_pack;
+		}
+	}
+
+	attr = NULL;
+	le = NULL;
+
+	while ((attr = ni_find_attr(ni, attr, &le, ATTR_NAME, NULL, 0, NULL,
+				    &mi))) {
+		struct inode *dir;
+		struct ATTR_FILE_NAME *fname;
+
+		fname = resident_data_ex(attr, SIZEOF_ATTRIBUTE_FILENAME);
+		if (!fname || !memcmp(&fname->dup, dup, sizeof(fname->dup)))
+			continue;
+
+		/* ntfs_iget5 may sleep. */
+		dir = ntfs_iget5(sb, &fname->home, NULL);
+		if (IS_ERR(dir)) {
+			ntfs_inode_warn(
+				&ni->vfs_inode,
+				"failed to open parent directory r=%lx to update",
+				(long)ino_get(&fname->home));
+			continue;
+		}
+
+		if (!is_bad_inode(dir)) {
+			struct ntfs_inode *dir_ni = ntfs_i(dir);
+
+			if (!ni_trylock(dir_ni)) {
+				re_dirty = true;
+			} else {
+				indx_update_dup(dir_ni, sbi, fname, dup, sync);
+				ni_unlock(dir_ni);
+				memcpy(&fname->dup, dup, sizeof(fname->dup));
+				mi->dirty = true;
+			}
+		}
+		iput(dir);
+	}
+
+	return re_dirty;
+}
+
+/*
+ * ni_write_inode - Write MFT base record and all subrecords to disk.
+ */
+int ni_write_inode(struct inode *inode, int sync, const char *hint)
+{
+	int err = 0, err2;
+	struct ntfs_inode *ni = ntfs_i(inode);
+	struct super_block *sb = inode->i_sb;
+	struct ntfs_sb_info *sbi = sb->s_fs_info;
+	bool re_dirty = false;
+	struct ATTR_STD_INFO *std;
+	struct rb_node *node, *next;
+	struct NTFS_DUP_INFO dup;
+
+	if (is_bad_inode(inode) || sb_rdonly(sb))
+		return 0;
+
+	if (!ni_trylock(ni)) {
+		/* 'ni' is under modification, skip for now. */
+		mark_inode_dirty_sync(inode);
+		return 0;
+	}
+
+	if (is_rec_inuse(ni->mi.mrec) &&
+	    !(sbi->flags & NTFS_FLAGS_LOG_REPLAYING) && inode->i_nlink) {
+		bool modified = false;
+
+		/* Update times in standard attribute. */
+		std = ni_std(ni);
+		if (!std) {
+			err = -EINVAL;
+			goto out;
+		}
+
+		/* Update the access times if they have changed. */
+		dup.m_time = kernel2nt(&inode->i_mtime);
+		if (std->m_time != dup.m_time) {
+			std->m_time = dup.m_time;
+			modified = true;
+		}
+
+		dup.c_time = kernel2nt(&inode->i_ctime);
+		if (std->c_time != dup.c_time) {
+			std->c_time = dup.c_time;
+			modified = true;
+		}
+
+		dup.a_time = kernel2nt(&inode->i_atime);
+		if (std->a_time != dup.a_time) {
+			std->a_time = dup.a_time;
+			modified = true;
+		}
+
+		dup.fa = ni->std_fa;
+		if (std->fa != dup.fa) {
+			std->fa = dup.fa;
+			modified = true;
+		}
+
+		if (modified)
+			ni->mi.dirty = true;
+
+		if (!ntfs_is_meta_file(sbi, inode->i_ino) &&
+		    (modified || (ni->ni_flags & NI_FLAG_UPDATE_PARENT))
+		    /* Avoid __wait_on_freeing_inode(inode). */
+		    && (sb->s_flags & SB_ACTIVE)) {
+			dup.cr_time = std->cr_time;
+			/* Not critical if this function fail. */
+			re_dirty = ni_update_parent(ni, &dup, sync);
+
+			if (re_dirty)
+				ni->ni_flags |= NI_FLAG_UPDATE_PARENT;
+			else
+				ni->ni_flags &= ~NI_FLAG_UPDATE_PARENT;
+		}
+
+		/* Update attribute list. */
+		if (ni->attr_list.size && ni->attr_list.dirty) {
+			if (inode->i_ino != MFT_REC_MFT || sync) {
+				err = ni_try_remove_attr_list(ni);
+				if (err)
+					goto out;
+			}
+
+			err = al_update(ni);
+			if (err)
+				goto out;
+		}
+	}
+
+	for (node = rb_first(&ni->mi_tree); node; node = next) {
+		struct mft_inode *mi = rb_entry(node, struct mft_inode, node);
+		bool is_empty;
+
+		next = rb_next(node);
+
+		if (!mi->dirty)
+			continue;
+
+		is_empty = !mi_enum_attr(mi, NULL);
+
+		if (is_empty)
+			clear_rec_inuse(mi->mrec);
+
+		err2 = mi_write(mi, sync);
+		if (!err && err2)
+			err = err2;
+
+		if (is_empty) {
+			ntfs_mark_rec_free(sbi, mi->rno);
+			rb_erase(node, &ni->mi_tree);
+			mi_put(mi);
+		}
+	}
+
+	if (ni->mi.dirty) {
+		err2 = mi_write(&ni->mi, sync);
+		if (!err && err2)
+			err = err2;
+	}
+out:
+	ni_unlock(ni);
+
+	if (err) {
+		ntfs_err(sb, "%s r=%lx failed, %d.", hint, inode->i_ino, err);
+		ntfs_set_state(sbi, NTFS_DIRTY_ERROR);
+		return err;
+	}
+
+	if (re_dirty)
+		mark_inode_dirty_sync(inode);
+
+	return 0;
+}
diff --git a/fs/ntfs3/fslog.c b/fs/ntfs3/fslog.c
new file mode 100644
index 000000000000..b5853aed0e25
--- /dev/null
+++ b/fs/ntfs3/fslog.c
@@ -0,0 +1,5217 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ *
+ * Copyright (C) 2019-2021 Paragon Software GmbH, All rights reserved.
+ *
+ */
+
+#include <linux/blkdev.h>
+#include <linux/buffer_head.h>
+#include <linux/fs.h>
+#include <linux/hash.h>
+#include <linux/nls.h>
+#include <linux/random.h>
+#include <linux/ratelimit.h>
+#include <linux/slab.h>
+
+#include "debug.h"
+#include "ntfs.h"
+#include "ntfs_fs.h"
+
+/*
+ * LOG FILE structs
+ */
+
+// clang-format off
+
+#define MaxLogFileSize     0x100000000ull
+#define DefaultLogPageSize 4096
+#define MinLogRecordPages  0x30
+
+struct RESTART_HDR {
+	struct NTFS_RECORD_HEADER rhdr; // 'RSTR'
+	__le32 sys_page_size; // 0x10: Page size of the system which initialized the log.
+	__le32 page_size;     // 0x14: Log page size used for this log file.
+	__le16 ra_off;        // 0x18:
+	__le16 minor_ver;     // 0x1A:
+	__le16 major_ver;     // 0x1C:
+	__le16 fixups[];
+};
+
+#define LFS_NO_CLIENT 0xffff
+#define LFS_NO_CLIENT_LE cpu_to_le16(0xffff)
+
+struct CLIENT_REC {
+	__le64 oldest_lsn;
+	__le64 restart_lsn; // 0x08:
+	__le16 prev_client; // 0x10:
+	__le16 next_client; // 0x12:
+	__le16 seq_num;     // 0x14:
+	u8 align[6];        // 0x16:
+	__le32 name_bytes;  // 0x1C: In bytes.
+	__le16 name[32];    // 0x20: Name of client.
+};
+
+static_assert(sizeof(struct CLIENT_REC) == 0x60);
+
+/* Two copies of these will exist at the beginning of the log file */
+struct RESTART_AREA {
+	__le64 current_lsn;    // 0x00: Current logical end of log file.
+	__le16 log_clients;    // 0x08: Maximum number of clients.
+	__le16 client_idx[2];  // 0x0A: Free/use index into the client record arrays.
+	__le16 flags;          // 0x0E: See RESTART_SINGLE_PAGE_IO.
+	__le32 seq_num_bits;   // 0x10: The number of bits in sequence number.
+	__le16 ra_len;         // 0x14:
+	__le16 client_off;     // 0x16:
+	__le64 l_size;         // 0x18: Usable log file size.
+	__le32 last_lsn_data_len; // 0x20:
+	__le16 rec_hdr_len;    // 0x24: Log page data offset.
+	__le16 data_off;       // 0x26: Log page data length.
+	__le32 open_log_count; // 0x28:
+	__le32 align[5];       // 0x2C:
+	struct CLIENT_REC clients[]; // 0x40:
+};
+
+struct LOG_REC_HDR {
+	__le16 redo_op;      // 0x00:  NTFS_LOG_OPERATION
+	__le16 undo_op;      // 0x02:  NTFS_LOG_OPERATION
+	__le16 redo_off;     // 0x04:  Offset to Redo record.
+	__le16 redo_len;     // 0x06:  Redo length.
+	__le16 undo_off;     // 0x08:  Offset to Undo record.
+	__le16 undo_len;     // 0x0A:  Undo length.
+	__le16 target_attr;  // 0x0C:
+	__le16 lcns_follow;  // 0x0E:
+	__le16 record_off;   // 0x10:
+	__le16 attr_off;     // 0x12:
+	__le16 cluster_off;  // 0x14:
+	__le16 reserved;     // 0x16:
+	__le64 target_vcn;   // 0x18:
+	__le64 page_lcns[];  // 0x20:
+};
+
+static_assert(sizeof(struct LOG_REC_HDR) == 0x20);
+
+#define RESTART_ENTRY_ALLOCATED    0xFFFFFFFF
+#define RESTART_ENTRY_ALLOCATED_LE cpu_to_le32(0xFFFFFFFF)
+
+struct RESTART_TABLE {
+	__le16 size;       // 0x00: In bytes
+	__le16 used;       // 0x02: Entries
+	__le16 total;      // 0x04: Entries
+	__le16 res[3];     // 0x06:
+	__le32 free_goal;  // 0x0C:
+	__le32 first_free; // 0x10:
+	__le32 last_free;  // 0x14:
+
+};
+
+static_assert(sizeof(struct RESTART_TABLE) == 0x18);
+
+struct ATTR_NAME_ENTRY {
+	__le16 off; // Offset in the Open attribute Table.
+	__le16 name_bytes;
+	__le16 name[];
+};
+
+struct OPEN_ATTR_ENRTY {
+	__le32 next;            // 0x00: RESTART_ENTRY_ALLOCATED if allocated
+	__le32 bytes_per_index; // 0x04:
+	enum ATTR_TYPE type;    // 0x08:
+	u8 is_dirty_pages;      // 0x0C:
+	u8 is_attr_name;        // 0x0B: Faked field to manage 'ptr'
+	u8 name_len;            // 0x0C: Faked field to manage 'ptr'
+	u8 res;
+	struct MFT_REF ref;     // 0x10: File Reference of file containing attribute
+	__le64 open_record_lsn; // 0x18:
+	void *ptr;              // 0x20:
+};
+
+/* 32 bit version of 'struct OPEN_ATTR_ENRTY' */
+struct OPEN_ATTR_ENRTY_32 {
+	__le32 next;            // 0x00: RESTART_ENTRY_ALLOCATED if allocated
+	__le32 ptr;             // 0x04:
+	struct MFT_REF ref;     // 0x08:
+	__le64 open_record_lsn; // 0x10:
+	u8 is_dirty_pages;      // 0x18:
+	u8 is_attr_name;        // 0x19:
+	u8 res1[2];
+	enum ATTR_TYPE type;    // 0x1C:
+	u8 name_len;            // 0x20: In wchar
+	u8 res2[3];
+	__le32 AttributeName;   // 0x24:
+	__le32 bytes_per_index; // 0x28:
+};
+
+#define SIZEOF_OPENATTRIBUTEENTRY0 0x2c
+// static_assert( 0x2C == sizeof(struct OPEN_ATTR_ENRTY_32) );
+static_assert(sizeof(struct OPEN_ATTR_ENRTY) < SIZEOF_OPENATTRIBUTEENTRY0);
+
+/*
+ * One entry exists in the Dirty Pages Table for each page which is dirty at
+ * the time the Restart Area is written.
+ */
+struct DIR_PAGE_ENTRY {
+	__le32 next;         // 0x00: RESTART_ENTRY_ALLOCATED if allocated
+	__le32 target_attr;  // 0x04: Index into the Open attribute Table
+	__le32 transfer_len; // 0x08:
+	__le32 lcns_follow;  // 0x0C:
+	__le64 vcn;          // 0x10: Vcn of dirty page
+	__le64 oldest_lsn;   // 0x18:
+	__le64 page_lcns[];  // 0x20:
+};
+
+static_assert(sizeof(struct DIR_PAGE_ENTRY) == 0x20);
+
+/* 32 bit version of 'struct DIR_PAGE_ENTRY' */
+struct DIR_PAGE_ENTRY_32 {
+	__le32 next;		// 0x00: RESTART_ENTRY_ALLOCATED if allocated
+	__le32 target_attr;	// 0x04: Index into the Open attribute Table
+	__le32 transfer_len;	// 0x08:
+	__le32 lcns_follow;	// 0x0C:
+	__le32 reserved;	// 0x10:
+	__le32 vcn_low;		// 0x14: Vcn of dirty page
+	__le32 vcn_hi;		// 0x18: Vcn of dirty page
+	__le32 oldest_lsn_low;	// 0x1C:
+	__le32 oldest_lsn_hi;	// 0x1C:
+	__le32 page_lcns_low;	// 0x24:
+	__le32 page_lcns_hi;	// 0x24:
+};
+
+static_assert(offsetof(struct DIR_PAGE_ENTRY_32, vcn_low) == 0x14);
+static_assert(sizeof(struct DIR_PAGE_ENTRY_32) == 0x2c);
+
+enum transact_state {
+	TransactionUninitialized = 0,
+	TransactionActive,
+	TransactionPrepared,
+	TransactionCommitted
+};
+
+struct TRANSACTION_ENTRY {
+	__le32 next;          // 0x00: RESTART_ENTRY_ALLOCATED if allocated
+	u8 transact_state;    // 0x04:
+	u8 reserved[3];       // 0x05:
+	__le64 first_lsn;     // 0x08:
+	__le64 prev_lsn;      // 0x10:
+	__le64 undo_next_lsn; // 0x18:
+	__le32 undo_records;  // 0x20: Number of undo log records pending abort
+	__le32 undo_len;      // 0x24: Total undo size
+};
+
+static_assert(sizeof(struct TRANSACTION_ENTRY) == 0x28);
+
+struct NTFS_RESTART {
+	__le32 major_ver;             // 0x00:
+	__le32 minor_ver;             // 0x04:
+	__le64 check_point_start;     // 0x08:
+	__le64 open_attr_table_lsn;   // 0x10:
+	__le64 attr_names_lsn;        // 0x18:
+	__le64 dirty_pages_table_lsn; // 0x20:
+	__le64 transact_table_lsn;    // 0x28:
+	__le32 open_attr_len;         // 0x30: In bytes
+	__le32 attr_names_len;        // 0x34: In bytes
+	__le32 dirty_pages_len;       // 0x38: In bytes
+	__le32 transact_table_len;    // 0x3C: In bytes
+};
+
+static_assert(sizeof(struct NTFS_RESTART) == 0x40);
+
+struct NEW_ATTRIBUTE_SIZES {
+	__le64 alloc_size;
+	__le64 valid_size;
+	__le64 data_size;
+	__le64 total_size;
+};
+
+struct BITMAP_RANGE {
+	__le32 bitmap_off;
+	__le32 bits;
+};
+
+struct LCN_RANGE {
+	__le64 lcn;
+	__le64 len;
+};
+
+/* The following type defines the different log record types. */
+#define LfsClientRecord  cpu_to_le32(1)
+#define LfsClientRestart cpu_to_le32(2)
+
+/* This is used to uniquely identify a client for a particular log file. */
+struct CLIENT_ID {
+	__le16 seq_num;
+	__le16 client_idx;
+};
+
+/* This is the header that begins every Log Record in the log file. */
+struct LFS_RECORD_HDR {
+	__le64 this_lsn;		// 0x00:
+	__le64 client_prev_lsn;		// 0x08:
+	__le64 client_undo_next_lsn;	// 0x10:
+	__le32 client_data_len;		// 0x18:
+	struct CLIENT_ID client;	// 0x1C: Owner of this log record.
+	__le32 record_type;		// 0x20: LfsClientRecord or LfsClientRestart.
+	__le32 transact_id;		// 0x24:
+	__le16 flags;			// 0x28: LOG_RECORD_MULTI_PAGE
+	u8 align[6];			// 0x2A:
+};
+
+#define LOG_RECORD_MULTI_PAGE cpu_to_le16(1)
+
+static_assert(sizeof(struct LFS_RECORD_HDR) == 0x30);
+
+struct LFS_RECORD {
+	__le16 next_record_off;	// 0x00: Offset of the free space in the page,
+	u8 align[6];		// 0x02:
+	__le64 last_end_lsn;	// 0x08: lsn for the last log record which ends on the page,
+};
+
+static_assert(sizeof(struct LFS_RECORD) == 0x10);
+
+struct RECORD_PAGE_HDR {
+	struct NTFS_RECORD_HEADER rhdr;	// 'RCRD'
+	__le32 rflags;			// 0x10: See LOG_PAGE_LOG_RECORD_END
+	__le16 page_count;		// 0x14:
+	__le16 page_pos;		// 0x16:
+	struct LFS_RECORD record_hdr;	// 0x18:
+	__le16 fixups[10];		// 0x28:
+	__le32 file_off;		// 0x3c: Used when major version >= 2
+};
+
+// clang-format on
+
+// Page contains the end of a log record.
+#define LOG_PAGE_LOG_RECORD_END cpu_to_le32(0x00000001)
+
+static inline bool is_log_record_end(const struct RECORD_PAGE_HDR *hdr)
+{
+	return hdr->rflags & LOG_PAGE_LOG_RECORD_END;
+}
+
+static_assert(offsetof(struct RECORD_PAGE_HDR, file_off) == 0x3c);
+
+/*
+ * END of NTFS LOG structures
+ */
+
+/* Define some tuning parameters to keep the restart tables a reasonable size. */
+#define INITIAL_NUMBER_TRANSACTIONS 5
+
+enum NTFS_LOG_OPERATION {
+
+	Noop = 0x00,
+	CompensationLogRecord = 0x01,
+	InitializeFileRecordSegment = 0x02,
+	DeallocateFileRecordSegment = 0x03,
+	WriteEndOfFileRecordSegment = 0x04,
+	CreateAttribute = 0x05,
+	DeleteAttribute = 0x06,
+	UpdateResidentValue = 0x07,
+	UpdateNonresidentValue = 0x08,
+	UpdateMappingPairs = 0x09,
+	DeleteDirtyClusters = 0x0A,
+	SetNewAttributeSizes = 0x0B,
+	AddIndexEntryRoot = 0x0C,
+	DeleteIndexEntryRoot = 0x0D,
+	AddIndexEntryAllocation = 0x0E,
+	DeleteIndexEntryAllocation = 0x0F,
+	WriteEndOfIndexBuffer = 0x10,
+	SetIndexEntryVcnRoot = 0x11,
+	SetIndexEntryVcnAllocation = 0x12,
+	UpdateFileNameRoot = 0x13,
+	UpdateFileNameAllocation = 0x14,
+	SetBitsInNonresidentBitMap = 0x15,
+	ClearBitsInNonresidentBitMap = 0x16,
+	HotFix = 0x17,
+	EndTopLevelAction = 0x18,
+	PrepareTransaction = 0x19,
+	CommitTransaction = 0x1A,
+	ForgetTransaction = 0x1B,
+	OpenNonresidentAttribute = 0x1C,
+	OpenAttributeTableDump = 0x1D,
+	AttributeNamesDump = 0x1E,
+	DirtyPageTableDump = 0x1F,
+	TransactionTableDump = 0x20,
+	UpdateRecordDataRoot = 0x21,
+	UpdateRecordDataAllocation = 0x22,
+
+	UpdateRelativeDataInIndex =
+		0x23, // NtOfsRestartUpdateRelativeDataInIndex
+	UpdateRelativeDataInIndex2 = 0x24,
+	ZeroEndOfFileRecord = 0x25,
+};
+
+/*
+ * Array for log records which require a target attribute.
+ * A true indicates that the corresponding restart operation
+ * requires a target attribute.
+ */
+static const u8 AttributeRequired[] = {
+	0xFC, 0xFB, 0xFF, 0x10, 0x06,
+};
+
+static inline bool is_target_required(u16 op)
+{
+	bool ret = op <= UpdateRecordDataAllocation &&
+		   (AttributeRequired[op >> 3] >> (op & 7) & 1);
+	return ret;
+}
+
+static inline bool can_skip_action(enum NTFS_LOG_OPERATION op)
+{
+	switch (op) {
+	case Noop:
+	case DeleteDirtyClusters:
+	case HotFix:
+	case EndTopLevelAction:
+	case PrepareTransaction:
+	case CommitTransaction:
+	case ForgetTransaction:
+	case CompensationLogRecord:
+	case OpenNonresidentAttribute:
+	case OpenAttributeTableDump:
+	case AttributeNamesDump:
+	case DirtyPageTableDump:
+	case TransactionTableDump:
+		return true;
+	default:
+		return false;
+	}
+}
+
+enum { lcb_ctx_undo_next, lcb_ctx_prev, lcb_ctx_next };
+
+/* Bytes per restart table. */
+static inline u32 bytes_per_rt(const struct RESTART_TABLE *rt)
+{
+	return le16_to_cpu(rt->used) * le16_to_cpu(rt->size) +
+	       sizeof(struct RESTART_TABLE);
+}
+
+/* Log record length. */
+static inline u32 lrh_length(const struct LOG_REC_HDR *lr)
+{
+	u16 t16 = le16_to_cpu(lr->lcns_follow);
+
+	return struct_size(lr, page_lcns, max_t(u16, 1, t16));
+}
+
+struct lcb {
+	struct LFS_RECORD_HDR *lrh; // Log record header of the current lsn.
+	struct LOG_REC_HDR *log_rec;
+	u32 ctx_mode; // lcb_ctx_undo_next/lcb_ctx_prev/lcb_ctx_next
+	struct CLIENT_ID client;
+	bool alloc; // If true the we should deallocate 'log_rec'.
+};
+
+static void lcb_put(struct lcb *lcb)
+{
+	if (lcb->alloc)
+		kfree(lcb->log_rec);
+	kfree(lcb->lrh);
+	kfree(lcb);
+}
+
+/* Find the oldest lsn from active clients. */
+static inline void oldest_client_lsn(const struct CLIENT_REC *ca,
+				     __le16 next_client, u64 *oldest_lsn)
+{
+	while (next_client != LFS_NO_CLIENT_LE) {
+		const struct CLIENT_REC *cr = ca + le16_to_cpu(next_client);
+		u64 lsn = le64_to_cpu(cr->oldest_lsn);
+
+		/* Ignore this block if it's oldest lsn is 0. */
+		if (lsn && lsn < *oldest_lsn)
+			*oldest_lsn = lsn;
+
+		next_client = cr->next_client;
+	}
+}
+
+static inline bool is_rst_page_hdr_valid(u32 file_off,
+					 const struct RESTART_HDR *rhdr)
+{
+	u32 sys_page = le32_to_cpu(rhdr->sys_page_size);
+	u32 page_size = le32_to_cpu(rhdr->page_size);
+	u32 end_usa;
+	u16 ro;
+
+	if (sys_page < SECTOR_SIZE || page_size < SECTOR_SIZE ||
+	    sys_page & (sys_page - 1) || page_size & (page_size - 1)) {
+		return false;
+	}
+
+	/* Check that if the file offset isn't 0, it is the system page size. */
+	if (file_off && file_off != sys_page)
+		return false;
+
+	/* Check support version 1.1+. */
+	if (le16_to_cpu(rhdr->major_ver) <= 1 && !rhdr->minor_ver)
+		return false;
+
+	if (le16_to_cpu(rhdr->major_ver) > 2)
+		return false;
+
+	ro = le16_to_cpu(rhdr->ra_off);
+	if (!IS_ALIGNED(ro, 8) || ro > sys_page)
+		return false;
+
+	end_usa = ((sys_page >> SECTOR_SHIFT) + 1) * sizeof(short);
+	end_usa += le16_to_cpu(rhdr->rhdr.fix_off);
+
+	if (ro < end_usa)
+		return false;
+
+	return true;
+}
+
+static inline bool is_rst_area_valid(const struct RESTART_HDR *rhdr)
+{
+	const struct RESTART_AREA *ra;
+	u16 cl, fl, ul;
+	u32 off, l_size, file_dat_bits, file_size_round;
+	u16 ro = le16_to_cpu(rhdr->ra_off);
+	u32 sys_page = le32_to_cpu(rhdr->sys_page_size);
+
+	if (ro + offsetof(struct RESTART_AREA, l_size) >
+	    SECTOR_SIZE - sizeof(short))
+		return false;
+
+	ra = Add2Ptr(rhdr, ro);
+	cl = le16_to_cpu(ra->log_clients);
+
+	if (cl > 1)
+		return false;
+
+	off = le16_to_cpu(ra->client_off);
+
+	if (!IS_ALIGNED(off, 8) || ro + off > SECTOR_SIZE - sizeof(short))
+		return false;
+
+	off += cl * sizeof(struct CLIENT_REC);
+
+	if (off > sys_page)
+		return false;
+
+	/*
+	 * Check the restart length field and whether the entire
+	 * restart area is contained that length.
+	 */
+	if (le16_to_cpu(rhdr->ra_off) + le16_to_cpu(ra->ra_len) > sys_page ||
+	    off > le16_to_cpu(ra->ra_len)) {
+		return false;
+	}
+
+	/*
+	 * As a final check make sure that the use list and the free list
+	 * are either empty or point to a valid client.
+	 */
+	fl = le16_to_cpu(ra->client_idx[0]);
+	ul = le16_to_cpu(ra->client_idx[1]);
+	if ((fl != LFS_NO_CLIENT && fl >= cl) ||
+	    (ul != LFS_NO_CLIENT && ul >= cl))
+		return false;
+
+	/* Make sure the sequence number bits match the log file size. */
+	l_size = le64_to_cpu(ra->l_size);
+
+	file_dat_bits = sizeof(u64) * 8 - le32_to_cpu(ra->seq_num_bits);
+	file_size_round = 1u << (file_dat_bits + 3);
+	if (file_size_round != l_size &&
+	    (file_size_round < l_size || (file_size_round / 2) > l_size)) {
+		return false;
+	}
+
+	/* The log page data offset and record header length must be quad-aligned. */
+	if (!IS_ALIGNED(le16_to_cpu(ra->data_off), 8) ||
+	    !IS_ALIGNED(le16_to_cpu(ra->rec_hdr_len), 8))
+		return false;
+
+	return true;
+}
+
+static inline bool is_client_area_valid(const struct RESTART_HDR *rhdr,
+					bool usa_error)
+{
+	u16 ro = le16_to_cpu(rhdr->ra_off);
+	const struct RESTART_AREA *ra = Add2Ptr(rhdr, ro);
+	u16 ra_len = le16_to_cpu(ra->ra_len);
+	const struct CLIENT_REC *ca;
+	u32 i;
+
+	if (usa_error && ra_len + ro > SECTOR_SIZE - sizeof(short))
+		return false;
+
+	/* Find the start of the client array. */
+	ca = Add2Ptr(ra, le16_to_cpu(ra->client_off));
+
+	/*
+	 * Start with the free list.
+	 * Check that all the clients are valid and that there isn't a cycle.
+	 * Do the in-use list on the second pass.
+	 */
+	for (i = 0; i < 2; i++) {
+		u16 client_idx = le16_to_cpu(ra->client_idx[i]);
+		bool first_client = true;
+		u16 clients = le16_to_cpu(ra->log_clients);
+
+		while (client_idx != LFS_NO_CLIENT) {
+			const struct CLIENT_REC *cr;
+
+			if (!clients ||
+			    client_idx >= le16_to_cpu(ra->log_clients))
+				return false;
+
+			clients -= 1;
+			cr = ca + client_idx;
+
+			client_idx = le16_to_cpu(cr->next_client);
+
+			if (first_client) {
+				first_client = false;
+				if (cr->prev_client != LFS_NO_CLIENT_LE)
+					return false;
+			}
+		}
+	}
+
+	return true;
+}
+
+/*
+ * remove_client
+ *
+ * Remove a client record from a client record list an restart area.
+ */
+static inline void remove_client(struct CLIENT_REC *ca,
+				 const struct CLIENT_REC *cr, __le16 *head)
+{
+	if (cr->prev_client == LFS_NO_CLIENT_LE)
+		*head = cr->next_client;
+	else
+		ca[le16_to_cpu(cr->prev_client)].next_client = cr->next_client;
+
+	if (cr->next_client != LFS_NO_CLIENT_LE)
+		ca[le16_to_cpu(cr->next_client)].prev_client = cr->prev_client;
+}
+
+/*
+ * add_client - Add a client record to the start of a list.
+ */
+static inline void add_client(struct CLIENT_REC *ca, u16 index, __le16 *head)
+{
+	struct CLIENT_REC *cr = ca + index;
+
+	cr->prev_client = LFS_NO_CLIENT_LE;
+	cr->next_client = *head;
+
+	if (*head != LFS_NO_CLIENT_LE)
+		ca[le16_to_cpu(*head)].prev_client = cpu_to_le16(index);
+
+	*head = cpu_to_le16(index);
+}
+
+static inline void *enum_rstbl(struct RESTART_TABLE *t, void *c)
+{
+	__le32 *e;
+	u32 bprt;
+	u16 rsize = t ? le16_to_cpu(t->size) : 0;
+
+	if (!c) {
+		if (!t || !t->total)
+			return NULL;
+		e = Add2Ptr(t, sizeof(struct RESTART_TABLE));
+	} else {
+		e = Add2Ptr(c, rsize);
+	}
+
+	/* Loop until we hit the first one allocated, or the end of the list. */
+	for (bprt = bytes_per_rt(t); PtrOffset(t, e) < bprt;
+	     e = Add2Ptr(e, rsize)) {
+		if (*e == RESTART_ENTRY_ALLOCATED_LE)
+			return e;
+	}
+	return NULL;
+}
+
+/*
+ * find_dp - Search for a @vcn in Dirty Page Table.
+ */
+static inline struct DIR_PAGE_ENTRY *find_dp(struct RESTART_TABLE *dptbl,
+					     u32 target_attr, u64 vcn)
+{
+	__le32 ta = cpu_to_le32(target_attr);
+	struct DIR_PAGE_ENTRY *dp = NULL;
+
+	while ((dp = enum_rstbl(dptbl, dp))) {
+		u64 dp_vcn = le64_to_cpu(dp->vcn);
+
+		if (dp->target_attr == ta && vcn >= dp_vcn &&
+		    vcn < dp_vcn + le32_to_cpu(dp->lcns_follow)) {
+			return dp;
+		}
+	}
+	return NULL;
+}
+
+static inline u32 norm_file_page(u32 page_size, u32 *l_size, bool use_default)
+{
+	if (use_default)
+		page_size = DefaultLogPageSize;
+
+	/* Round the file size down to a system page boundary. */
+	*l_size &= ~(page_size - 1);
+
+	/* File should contain at least 2 restart pages and MinLogRecordPages pages. */
+	if (*l_size < (MinLogRecordPages + 2) * page_size)
+		return 0;
+
+	return page_size;
+}
+
+static bool check_log_rec(const struct LOG_REC_HDR *lr, u32 bytes, u32 tr,
+			  u32 bytes_per_attr_entry)
+{
+	u16 t16;
+
+	if (bytes < sizeof(struct LOG_REC_HDR))
+		return false;
+	if (!tr)
+		return false;
+
+	if ((tr - sizeof(struct RESTART_TABLE)) %
+	    sizeof(struct TRANSACTION_ENTRY))
+		return false;
+
+	if (le16_to_cpu(lr->redo_off) & 7)
+		return false;
+
+	if (le16_to_cpu(lr->undo_off) & 7)
+		return false;
+
+	if (lr->target_attr)
+		goto check_lcns;
+
+	if (is_target_required(le16_to_cpu(lr->redo_op)))
+		return false;
+
+	if (is_target_required(le16_to_cpu(lr->undo_op)))
+		return false;
+
+check_lcns:
+	if (!lr->lcns_follow)
+		goto check_length;
+
+	t16 = le16_to_cpu(lr->target_attr);
+	if ((t16 - sizeof(struct RESTART_TABLE)) % bytes_per_attr_entry)
+		return false;
+
+check_length:
+	if (bytes < lrh_length(lr))
+		return false;
+
+	return true;
+}
+
+static bool check_rstbl(const struct RESTART_TABLE *rt, size_t bytes)
+{
+	u32 ts;
+	u32 i, off;
+	u16 rsize = le16_to_cpu(rt->size);
+	u16 ne = le16_to_cpu(rt->used);
+	u32 ff = le32_to_cpu(rt->first_free);
+	u32 lf = le32_to_cpu(rt->last_free);
+
+	ts = rsize * ne + sizeof(struct RESTART_TABLE);
+
+	if (!rsize || rsize > bytes ||
+	    rsize + sizeof(struct RESTART_TABLE) > bytes || bytes < ts ||
+	    le16_to_cpu(rt->total) > ne || ff > ts || lf > ts ||
+	    (ff && ff < sizeof(struct RESTART_TABLE)) ||
+	    (lf && lf < sizeof(struct RESTART_TABLE))) {
+		return false;
+	}
+
+	/*
+	 * Verify each entry is either allocated or points
+	 * to a valid offset the table.
+	 */
+	for (i = 0; i < ne; i++) {
+		off = le32_to_cpu(*(__le32 *)Add2Ptr(
+			rt, i * rsize + sizeof(struct RESTART_TABLE)));
+
+		if (off != RESTART_ENTRY_ALLOCATED && off &&
+		    (off < sizeof(struct RESTART_TABLE) ||
+		     ((off - sizeof(struct RESTART_TABLE)) % rsize))) {
+			return false;
+		}
+	}
+
+	/*
+	 * Walk through the list headed by the first entry to make
+	 * sure none of the entries are currently being used.
+	 */
+	for (off = ff; off;) {
+		if (off == RESTART_ENTRY_ALLOCATED)
+			return false;
+
+		off = le32_to_cpu(*(__le32 *)Add2Ptr(rt, off));
+	}
+
+	return true;
+}
+
+/*
+ * free_rsttbl_idx - Free a previously allocated index a Restart Table.
+ */
+static inline void free_rsttbl_idx(struct RESTART_TABLE *rt, u32 off)
+{
+	__le32 *e;
+	u32 lf = le32_to_cpu(rt->last_free);
+	__le32 off_le = cpu_to_le32(off);
+
+	e = Add2Ptr(rt, off);
+
+	if (off < le32_to_cpu(rt->free_goal)) {
+		*e = rt->first_free;
+		rt->first_free = off_le;
+		if (!lf)
+			rt->last_free = off_le;
+	} else {
+		if (lf)
+			*(__le32 *)Add2Ptr(rt, lf) = off_le;
+		else
+			rt->first_free = off_le;
+
+		rt->last_free = off_le;
+		*e = 0;
+	}
+
+	le16_sub_cpu(&rt->total, 1);
+}
+
+static inline struct RESTART_TABLE *init_rsttbl(u16 esize, u16 used)
+{
+	__le32 *e, *last_free;
+	u32 off;
+	u32 bytes = esize * used + sizeof(struct RESTART_TABLE);
+	u32 lf = sizeof(struct RESTART_TABLE) + (used - 1) * esize;
+	struct RESTART_TABLE *t = kzalloc(bytes, GFP_NOFS);
+
+	if (!t)
+		return NULL;
+
+	t->size = cpu_to_le16(esize);
+	t->used = cpu_to_le16(used);
+	t->free_goal = cpu_to_le32(~0u);
+	t->first_free = cpu_to_le32(sizeof(struct RESTART_TABLE));
+	t->last_free = cpu_to_le32(lf);
+
+	e = (__le32 *)(t + 1);
+	last_free = Add2Ptr(t, lf);
+
+	for (off = sizeof(struct RESTART_TABLE) + esize; e < last_free;
+	     e = Add2Ptr(e, esize), off += esize) {
+		*e = cpu_to_le32(off);
+	}
+	return t;
+}
+
+static inline struct RESTART_TABLE *extend_rsttbl(struct RESTART_TABLE *tbl,
+						  u32 add, u32 free_goal)
+{
+	u16 esize = le16_to_cpu(tbl->size);
+	__le32 osize = cpu_to_le32(bytes_per_rt(tbl));
+	u32 used = le16_to_cpu(tbl->used);
+	struct RESTART_TABLE *rt;
+
+	rt = init_rsttbl(esize, used + add);
+	if (!rt)
+		return NULL;
+
+	memcpy(rt + 1, tbl + 1, esize * used);
+
+	rt->free_goal = free_goal == ~0u
+				? cpu_to_le32(~0u)
+				: cpu_to_le32(sizeof(struct RESTART_TABLE) +
+					      free_goal * esize);
+
+	if (tbl->first_free) {
+		rt->first_free = tbl->first_free;
+		*(__le32 *)Add2Ptr(rt, le32_to_cpu(tbl->last_free)) = osize;
+	} else {
+		rt->first_free = osize;
+	}
+
+	rt->total = tbl->total;
+
+	kfree(tbl);
+	return rt;
+}
+
+/*
+ * alloc_rsttbl_idx
+ *
+ * Allocate an index from within a previously initialized Restart Table.
+ */
+static inline void *alloc_rsttbl_idx(struct RESTART_TABLE **tbl)
+{
+	u32 off;
+	__le32 *e;
+	struct RESTART_TABLE *t = *tbl;
+
+	if (!t->first_free) {
+		*tbl = t = extend_rsttbl(t, 16, ~0u);
+		if (!t)
+			return NULL;
+	}
+
+	off = le32_to_cpu(t->first_free);
+
+	/* Dequeue this entry and zero it. */
+	e = Add2Ptr(t, off);
+
+	t->first_free = *e;
+
+	memset(e, 0, le16_to_cpu(t->size));
+
+	*e = RESTART_ENTRY_ALLOCATED_LE;
+
+	/* If list is going empty, then we fix the last_free as well. */
+	if (!t->first_free)
+		t->last_free = 0;
+
+	le16_add_cpu(&t->total, 1);
+
+	return Add2Ptr(t, off);
+}
+
+/*
+ * alloc_rsttbl_from_idx
+ *
+ * Allocate a specific index from within a previously initialized Restart Table.
+ */
+static inline void *alloc_rsttbl_from_idx(struct RESTART_TABLE **tbl, u32 vbo)
+{
+	u32 off;
+	__le32 *e;
+	struct RESTART_TABLE *rt = *tbl;
+	u32 bytes = bytes_per_rt(rt);
+	u16 esize = le16_to_cpu(rt->size);
+
+	/* If the entry is not the table, we will have to extend the table. */
+	if (vbo >= bytes) {
+		/*
+		 * Extend the size by computing the number of entries between
+		 * the existing size and the desired index and adding 1 to that.
+		 */
+		u32 bytes2idx = vbo - bytes;
+
+		/*
+		 * There should always be an integral number of entries
+		 * being added. Now extend the table.
+		 */
+		*tbl = rt = extend_rsttbl(rt, bytes2idx / esize + 1, bytes);
+		if (!rt)
+			return NULL;
+	}
+
+	/* See if the entry is already allocated, and just return if it is. */
+	e = Add2Ptr(rt, vbo);
+
+	if (*e == RESTART_ENTRY_ALLOCATED_LE)
+		return e;
+
+	/*
+	 * Walk through the table, looking for the entry we're
+	 * interested and the previous entry.
+	 */
+	off = le32_to_cpu(rt->first_free);
+	e = Add2Ptr(rt, off);
+
+	if (off == vbo) {
+		/* this is a match */
+		rt->first_free = *e;
+		goto skip_looking;
+	}
+
+	/*
+	 * Need to walk through the list looking for the predecessor
+	 * of our entry.
+	 */
+	for (;;) {
+		/* Remember the entry just found */
+		u32 last_off = off;
+		__le32 *last_e = e;
+
+		/* Should never run of entries. */
+
+		/* Lookup up the next entry the list. */
+		off = le32_to_cpu(*last_e);
+		e = Add2Ptr(rt, off);
+
+		/* If this is our match we are done. */
+		if (off == vbo) {
+			*last_e = *e;
+
+			/*
+			 * If this was the last entry, we update that
+			 * table as well.
+			 */
+			if (le32_to_cpu(rt->last_free) == off)
+				rt->last_free = cpu_to_le32(last_off);
+			break;
+		}
+	}
+
+skip_looking:
+	/* If the list is now empty, we fix the last_free as well. */
+	if (!rt->first_free)
+		rt->last_free = 0;
+
+	/* Zero this entry. */
+	memset(e, 0, esize);
+	*e = RESTART_ENTRY_ALLOCATED_LE;
+
+	le16_add_cpu(&rt->total, 1);
+
+	return e;
+}
+
+#define RESTART_SINGLE_PAGE_IO cpu_to_le16(0x0001)
+
+#define NTFSLOG_WRAPPED 0x00000001
+#define NTFSLOG_MULTIPLE_PAGE_IO 0x00000002
+#define NTFSLOG_NO_LAST_LSN 0x00000004
+#define NTFSLOG_REUSE_TAIL 0x00000010
+#define NTFSLOG_NO_OLDEST_LSN 0x00000020
+
+/* Helper struct to work with NTFS $LogFile. */
+struct ntfs_log {
+	struct ntfs_inode *ni;
+
+	u32 l_size;
+	u32 sys_page_size;
+	u32 sys_page_mask;
+	u32 page_size;
+	u32 page_mask; // page_size - 1
+	u8 page_bits;
+	struct RECORD_PAGE_HDR *one_page_buf;
+
+	struct RESTART_TABLE *open_attr_tbl;
+	u32 transaction_id;
+	u32 clst_per_page;
+
+	u32 first_page;
+	u32 next_page;
+	u32 ra_off;
+	u32 data_off;
+	u32 restart_size;
+	u32 data_size;
+	u16 record_header_len;
+	u64 seq_num;
+	u32 seq_num_bits;
+	u32 file_data_bits;
+	u32 seq_num_mask; /* (1 << file_data_bits) - 1 */
+
+	struct RESTART_AREA *ra; /* In-memory image of the next restart area. */
+	u32 ra_size; /* The usable size of the restart area. */
+
+	/*
+	 * If true, then the in-memory restart area is to be written
+	 * to the first position on the disk.
+	 */
+	bool init_ra;
+	bool set_dirty; /* True if we need to set dirty flag. */
+
+	u64 oldest_lsn;
+
+	u32 oldest_lsn_off;
+	u64 last_lsn;
+
+	u32 total_avail;
+	u32 total_avail_pages;
+	u32 total_undo_commit;
+	u32 max_current_avail;
+	u32 current_avail;
+	u32 reserved;
+
+	short major_ver;
+	short minor_ver;
+
+	u32 l_flags; /* See NTFSLOG_XXX */
+	u32 current_openlog_count; /* On-disk value for open_log_count. */
+
+	struct CLIENT_ID client_id;
+	u32 client_undo_commit;
+};
+
+static inline u32 lsn_to_vbo(struct ntfs_log *log, const u64 lsn)
+{
+	u32 vbo = (lsn << log->seq_num_bits) >> (log->seq_num_bits - 3);
+
+	return vbo;
+}
+
+/* Compute the offset in the log file of the next log page. */
+static inline u32 next_page_off(struct ntfs_log *log, u32 off)
+{
+	off = (off & ~log->sys_page_mask) + log->page_size;
+	return off >= log->l_size ? log->first_page : off;
+}
+
+static inline u32 lsn_to_page_off(struct ntfs_log *log, u64 lsn)
+{
+	return (((u32)lsn) << 3) & log->page_mask;
+}
+
+static inline u64 vbo_to_lsn(struct ntfs_log *log, u32 off, u64 Seq)
+{
+	return (off >> 3) + (Seq << log->file_data_bits);
+}
+
+static inline bool is_lsn_in_file(struct ntfs_log *log, u64 lsn)
+{
+	return lsn >= log->oldest_lsn &&
+	       lsn <= le64_to_cpu(log->ra->current_lsn);
+}
+
+static inline u32 hdr_file_off(struct ntfs_log *log,
+			       struct RECORD_PAGE_HDR *hdr)
+{
+	if (log->major_ver < 2)
+		return le64_to_cpu(hdr->rhdr.lsn);
+
+	return le32_to_cpu(hdr->file_off);
+}
+
+static inline u64 base_lsn(struct ntfs_log *log,
+			   const struct RECORD_PAGE_HDR *hdr, u64 lsn)
+{
+	u64 h_lsn = le64_to_cpu(hdr->rhdr.lsn);
+	u64 ret = (((h_lsn >> log->file_data_bits) +
+		    (lsn < (lsn_to_vbo(log, h_lsn) & ~log->page_mask) ? 1 : 0))
+		   << log->file_data_bits) +
+		  ((((is_log_record_end(hdr) &&
+		      h_lsn <= le64_to_cpu(hdr->record_hdr.last_end_lsn))
+			     ? le16_to_cpu(hdr->record_hdr.next_record_off)
+			     : log->page_size) +
+		    lsn) >>
+		   3);
+
+	return ret;
+}
+
+static inline bool verify_client_lsn(struct ntfs_log *log,
+				     const struct CLIENT_REC *client, u64 lsn)
+{
+	return lsn >= le64_to_cpu(client->oldest_lsn) &&
+	       lsn <= le64_to_cpu(log->ra->current_lsn) && lsn;
+}
+
+struct restart_info {
+	u64 last_lsn;
+	struct RESTART_HDR *r_page;
+	u32 vbo;
+	bool chkdsk_was_run;
+	bool valid_page;
+	bool initialized;
+	bool restart;
+};
+
+static int read_log_page(struct ntfs_log *log, u32 vbo,
+			 struct RECORD_PAGE_HDR **buffer, bool *usa_error)
+{
+	int err = 0;
+	u32 page_idx = vbo >> log->page_bits;
+	u32 page_off = vbo & log->page_mask;
+	u32 bytes = log->page_size - page_off;
+	void *to_free = NULL;
+	u32 page_vbo = page_idx << log->page_bits;
+	struct RECORD_PAGE_HDR *page_buf;
+	struct ntfs_inode *ni = log->ni;
+	bool bBAAD;
+
+	if (vbo >= log->l_size)
+		return -EINVAL;
+
+	if (!*buffer) {
+		to_free = kmalloc(bytes, GFP_NOFS);
+		if (!to_free)
+			return -ENOMEM;
+		*buffer = to_free;
+	}
+
+	page_buf = page_off ? log->one_page_buf : *buffer;
+
+	err = ntfs_read_run_nb(ni->mi.sbi, &ni->file.run, page_vbo, page_buf,
+			       log->page_size, NULL);
+	if (err)
+		goto out;
+
+	if (page_buf->rhdr.sign != NTFS_FFFF_SIGNATURE)
+		ntfs_fix_post_read(&page_buf->rhdr, PAGE_SIZE, false);
+
+	if (page_buf != *buffer)
+		memcpy(*buffer, Add2Ptr(page_buf, page_off), bytes);
+
+	bBAAD = page_buf->rhdr.sign == NTFS_BAAD_SIGNATURE;
+
+	if (usa_error)
+		*usa_error = bBAAD;
+	/* Check that the update sequence array for this page is valid */
+	/* If we don't allow errors, raise an error status */
+	else if (bBAAD)
+		err = -EINVAL;
+
+out:
+	if (err && to_free) {
+		kfree(to_free);
+		*buffer = NULL;
+	}
+
+	return err;
+}
+
+/*
+ * log_read_rst
+ *
+ * It walks through 512 blocks of the file looking for a valid
+ * restart page header. It will stop the first time we find a
+ * valid page header.
+ */
+static int log_read_rst(struct ntfs_log *log, u32 l_size, bool first,
+			struct restart_info *info)
+{
+	u32 skip, vbo;
+	struct RESTART_HDR *r_page = kmalloc(DefaultLogPageSize, GFP_NOFS);
+
+	if (!r_page)
+		return -ENOMEM;
+
+	memset(info, 0, sizeof(struct restart_info));
+
+	/* Determine which restart area we are looking for. */
+	if (first) {
+		vbo = 0;
+		skip = 512;
+	} else {
+		vbo = 512;
+		skip = 0;
+	}
+
+	/* Loop continuously until we succeed. */
+	for (; vbo < l_size; vbo = 2 * vbo + skip, skip = 0) {
+		bool usa_error;
+		u32 sys_page_size;
+		bool brst, bchk;
+		struct RESTART_AREA *ra;
+
+		/* Read a page header at the current offset. */
+		if (read_log_page(log, vbo, (struct RECORD_PAGE_HDR **)&r_page,
+				  &usa_error)) {
+			/* Ignore any errors. */
+			continue;
+		}
+
+		/* Exit if the signature is a log record page. */
+		if (r_page->rhdr.sign == NTFS_RCRD_SIGNATURE) {
+			info->initialized = true;
+			break;
+		}
+
+		brst = r_page->rhdr.sign == NTFS_RSTR_SIGNATURE;
+		bchk = r_page->rhdr.sign == NTFS_CHKD_SIGNATURE;
+
+		if (!bchk && !brst) {
+			if (r_page->rhdr.sign != NTFS_FFFF_SIGNATURE) {
+				/*
+				 * Remember if the signature does not
+				 * indicate uninitialized file.
+				 */
+				info->initialized = true;
+			}
+			continue;
+		}
+
+		ra = NULL;
+		info->valid_page = false;
+		info->initialized = true;
+		info->vbo = vbo;
+
+		/* Let's check the restart area if this is a valid page. */
+		if (!is_rst_page_hdr_valid(vbo, r_page))
+			goto check_result;
+		ra = Add2Ptr(r_page, le16_to_cpu(r_page->ra_off));
+
+		if (!is_rst_area_valid(r_page))
+			goto check_result;
+
+		/*
+		 * We have a valid restart page header and restart area.
+		 * If chkdsk was run or we have no clients then we have
+		 * no more checking to do.
+		 */
+		if (bchk || ra->client_idx[1] == LFS_NO_CLIENT_LE) {
+			info->valid_page = true;
+			goto check_result;
+		}
+
+		/* Read the entire restart area. */
+		sys_page_size = le32_to_cpu(r_page->sys_page_size);
+		if (DefaultLogPageSize != sys_page_size) {
+			kfree(r_page);
+			r_page = kzalloc(sys_page_size, GFP_NOFS);
+			if (!r_page)
+				return -ENOMEM;
+
+			if (read_log_page(log, vbo,
+					  (struct RECORD_PAGE_HDR **)&r_page,
+					  &usa_error)) {
+				/* Ignore any errors. */
+				kfree(r_page);
+				r_page = NULL;
+				continue;
+			}
+		}
+
+		if (is_client_area_valid(r_page, usa_error)) {
+			info->valid_page = true;
+			ra = Add2Ptr(r_page, le16_to_cpu(r_page->ra_off));
+		}
+
+check_result:
+		/*
+		 * If chkdsk was run then update the caller's
+		 * values and return.
+		 */
+		if (r_page->rhdr.sign == NTFS_CHKD_SIGNATURE) {
+			info->chkdsk_was_run = true;
+			info->last_lsn = le64_to_cpu(r_page->rhdr.lsn);
+			info->restart = true;
+			info->r_page = r_page;
+			return 0;
+		}
+
+		/*
+		 * If we have a valid page then copy the values
+		 * we need from it.
+		 */
+		if (info->valid_page) {
+			info->last_lsn = le64_to_cpu(ra->current_lsn);
+			info->restart = true;
+			info->r_page = r_page;
+			return 0;
+		}
+	}
+
+	kfree(r_page);
+
+	return 0;
+}
+
+/*
+ * Ilog_init_pg_hdr - Init @log from restart page header.
+ */
+static void log_init_pg_hdr(struct ntfs_log *log, u32 sys_page_size,
+			    u32 page_size, u16 major_ver, u16 minor_ver)
+{
+	log->sys_page_size = sys_page_size;
+	log->sys_page_mask = sys_page_size - 1;
+	log->page_size = page_size;
+	log->page_mask = page_size - 1;
+	log->page_bits = blksize_bits(page_size);
+
+	log->clst_per_page = log->page_size >> log->ni->mi.sbi->cluster_bits;
+	if (!log->clst_per_page)
+		log->clst_per_page = 1;
+
+	log->first_page = major_ver >= 2
+				  ? 0x22 * page_size
+				  : ((sys_page_size << 1) + (page_size << 1));
+	log->major_ver = major_ver;
+	log->minor_ver = minor_ver;
+}
+
+/*
+ * log_create - Init @log in cases when we don't have a restart area to use.
+ */
+static void log_create(struct ntfs_log *log, u32 l_size, const u64 last_lsn,
+		       u32 open_log_count, bool wrapped, bool use_multi_page)
+{
+	log->l_size = l_size;
+	/* All file offsets must be quadword aligned. */
+	log->file_data_bits = blksize_bits(l_size) - 3;
+	log->seq_num_mask = (8 << log->file_data_bits) - 1;
+	log->seq_num_bits = sizeof(u64) * 8 - log->file_data_bits;
+	log->seq_num = (last_lsn >> log->file_data_bits) + 2;
+	log->next_page = log->first_page;
+	log->oldest_lsn = log->seq_num << log->file_data_bits;
+	log->oldest_lsn_off = 0;
+	log->last_lsn = log->oldest_lsn;
+
+	log->l_flags |= NTFSLOG_NO_LAST_LSN | NTFSLOG_NO_OLDEST_LSN;
+
+	/* Set the correct flags for the I/O and indicate if we have wrapped. */
+	if (wrapped)
+		log->l_flags |= NTFSLOG_WRAPPED;
+
+	if (use_multi_page)
+		log->l_flags |= NTFSLOG_MULTIPLE_PAGE_IO;
+
+	/* Compute the log page values. */
+	log->data_off = ALIGN(
+		offsetof(struct RECORD_PAGE_HDR, fixups) +
+			sizeof(short) * ((log->page_size >> SECTOR_SHIFT) + 1),
+		8);
+	log->data_size = log->page_size - log->data_off;
+	log->record_header_len = sizeof(struct LFS_RECORD_HDR);
+
+	/* Remember the different page sizes for reservation. */
+	log->reserved = log->data_size - log->record_header_len;
+
+	/* Compute the restart page values. */
+	log->ra_off = ALIGN(
+		offsetof(struct RESTART_HDR, fixups) +
+			sizeof(short) *
+				((log->sys_page_size >> SECTOR_SHIFT) + 1),
+		8);
+	log->restart_size = log->sys_page_size - log->ra_off;
+	log->ra_size = struct_size(log->ra, clients, 1);
+	log->current_openlog_count = open_log_count;
+
+	/*
+	 * The total available log file space is the number of
+	 * log file pages times the space available on each page.
+	 */
+	log->total_avail_pages = log->l_size - log->first_page;
+	log->total_avail = log->total_avail_pages >> log->page_bits;
+
+	/*
+	 * We assume that we can't use the end of the page less than
+	 * the file record size.
+	 * Then we won't need to reserve more than the caller asks for.
+	 */
+	log->max_current_avail = log->total_avail * log->reserved;
+	log->total_avail = log->total_avail * log->data_size;
+	log->current_avail = log->max_current_avail;
+}
+
+/*
+ * log_create_ra - Fill a restart area from the values stored in @log.
+ */
+static struct RESTART_AREA *log_create_ra(struct ntfs_log *log)
+{
+	struct CLIENT_REC *cr;
+	struct RESTART_AREA *ra = kzalloc(log->restart_size, GFP_NOFS);
+
+	if (!ra)
+		return NULL;
+
+	ra->current_lsn = cpu_to_le64(log->last_lsn);
+	ra->log_clients = cpu_to_le16(1);
+	ra->client_idx[1] = LFS_NO_CLIENT_LE;
+	if (log->l_flags & NTFSLOG_MULTIPLE_PAGE_IO)
+		ra->flags = RESTART_SINGLE_PAGE_IO;
+	ra->seq_num_bits = cpu_to_le32(log->seq_num_bits);
+	ra->ra_len = cpu_to_le16(log->ra_size);
+	ra->client_off = cpu_to_le16(offsetof(struct RESTART_AREA, clients));
+	ra->l_size = cpu_to_le64(log->l_size);
+	ra->rec_hdr_len = cpu_to_le16(log->record_header_len);
+	ra->data_off = cpu_to_le16(log->data_off);
+	ra->open_log_count = cpu_to_le32(log->current_openlog_count + 1);
+
+	cr = ra->clients;
+
+	cr->prev_client = LFS_NO_CLIENT_LE;
+	cr->next_client = LFS_NO_CLIENT_LE;
+
+	return ra;
+}
+
+static u32 final_log_off(struct ntfs_log *log, u64 lsn, u32 data_len)
+{
+	u32 base_vbo = lsn << 3;
+	u32 final_log_off = (base_vbo & log->seq_num_mask) & ~log->page_mask;
+	u32 page_off = base_vbo & log->page_mask;
+	u32 tail = log->page_size - page_off;
+
+	page_off -= 1;
+
+	/* Add the length of the header. */
+	data_len += log->record_header_len;
+
+	/*
+	 * If this lsn is contained this log page we are done.
+	 * Otherwise we need to walk through several log pages.
+	 */
+	if (data_len > tail) {
+		data_len -= tail;
+		tail = log->data_size;
+		page_off = log->data_off - 1;
+
+		for (;;) {
+			final_log_off = next_page_off(log, final_log_off);
+
+			/*
+			 * We are done if the remaining bytes
+			 * fit on this page.
+			 */
+			if (data_len <= tail)
+				break;
+			data_len -= tail;
+		}
+	}
+
+	/*
+	 * We add the remaining bytes to our starting position on this page
+	 * and then add that value to the file offset of this log page.
+	 */
+	return final_log_off + data_len + page_off;
+}
+
+static int next_log_lsn(struct ntfs_log *log, const struct LFS_RECORD_HDR *rh,
+			u64 *lsn)
+{
+	int err;
+	u64 this_lsn = le64_to_cpu(rh->this_lsn);
+	u32 vbo = lsn_to_vbo(log, this_lsn);
+	u32 end =
+		final_log_off(log, this_lsn, le32_to_cpu(rh->client_data_len));
+	u32 hdr_off = end & ~log->sys_page_mask;
+	u64 seq = this_lsn >> log->file_data_bits;
+	struct RECORD_PAGE_HDR *page = NULL;
+
+	/* Remember if we wrapped. */
+	if (end <= vbo)
+		seq += 1;
+
+	/* Log page header for this page. */
+	err = read_log_page(log, hdr_off, &page, NULL);
+	if (err)
+		return err;
+
+	/*
+	 * If the lsn we were given was not the last lsn on this page,
+	 * then the starting offset for the next lsn is on a quad word
+	 * boundary following the last file offset for the current lsn.
+	 * Otherwise the file offset is the start of the data on the next page.
+	 */
+	if (this_lsn == le64_to_cpu(page->rhdr.lsn)) {
+		/* If we wrapped, we need to increment the sequence number. */
+		hdr_off = next_page_off(log, hdr_off);
+		if (hdr_off == log->first_page)
+			seq += 1;
+
+		vbo = hdr_off + log->data_off;
+	} else {
+		vbo = ALIGN(end, 8);
+	}
+
+	/* Compute the lsn based on the file offset and the sequence count. */
+	*lsn = vbo_to_lsn(log, vbo, seq);
+
+	/*
+	 * If this lsn is within the legal range for the file, we return true.
+	 * Otherwise false indicates that there are no more lsn's.
+	 */
+	if (!is_lsn_in_file(log, *lsn))
+		*lsn = 0;
+
+	kfree(page);
+
+	return 0;
+}
+
+/*
+ * current_log_avail - Calculate the number of bytes available for log records.
+ */
+static u32 current_log_avail(struct ntfs_log *log)
+{
+	u32 oldest_off, next_free_off, free_bytes;
+
+	if (log->l_flags & NTFSLOG_NO_LAST_LSN) {
+		/* The entire file is available. */
+		return log->max_current_avail;
+	}
+
+	/*
+	 * If there is a last lsn the restart area then we know that we will
+	 * have to compute the free range.
+	 * If there is no oldest lsn then start at the first page of the file.
+	 */
+	oldest_off = (log->l_flags & NTFSLOG_NO_OLDEST_LSN)
+			     ? log->first_page
+			     : (log->oldest_lsn_off & ~log->sys_page_mask);
+
+	/*
+	 * We will use the next log page offset to compute the next free page.
+	 * If we are going to reuse this page go to the next page.
+	 * If we are at the first page then use the end of the file.
+	 */
+	next_free_off = (log->l_flags & NTFSLOG_REUSE_TAIL)
+				? log->next_page + log->page_size
+				: log->next_page == log->first_page
+					  ? log->l_size
+					  : log->next_page;
+
+	/* If the two offsets are the same then there is no available space. */
+	if (oldest_off == next_free_off)
+		return 0;
+	/*
+	 * If the free offset follows the oldest offset then subtract
+	 * this range from the total available pages.
+	 */
+	free_bytes =
+		oldest_off < next_free_off
+			? log->total_avail_pages - (next_free_off - oldest_off)
+			: oldest_off - next_free_off;
+
+	free_bytes >>= log->page_bits;
+	return free_bytes * log->reserved;
+}
+
+static bool check_subseq_log_page(struct ntfs_log *log,
+				  const struct RECORD_PAGE_HDR *rp, u32 vbo,
+				  u64 seq)
+{
+	u64 lsn_seq;
+	const struct NTFS_RECORD_HEADER *rhdr = &rp->rhdr;
+	u64 lsn = le64_to_cpu(rhdr->lsn);
+
+	if (rhdr->sign == NTFS_FFFF_SIGNATURE || !rhdr->sign)
+		return false;
+
+	/*
+	 * If the last lsn on the page occurs was written after the page
+	 * that caused the original error then we have a fatal error.
+	 */
+	lsn_seq = lsn >> log->file_data_bits;
+
+	/*
+	 * If the sequence number for the lsn the page is equal or greater
+	 * than lsn we expect, then this is a subsequent write.
+	 */
+	return lsn_seq >= seq ||
+	       (lsn_seq == seq - 1 && log->first_page == vbo &&
+		vbo != (lsn_to_vbo(log, lsn) & ~log->page_mask));
+}
+
+/*
+ * last_log_lsn
+ *
+ * Walks through the log pages for a file, searching for the
+ * last log page written to the file.
+ */
+static int last_log_lsn(struct ntfs_log *log)
+{
+	int err;
+	bool usa_error = false;
+	bool replace_page = false;
+	bool reuse_page = log->l_flags & NTFSLOG_REUSE_TAIL;
+	bool wrapped_file, wrapped;
+
+	u32 page_cnt = 1, page_pos = 1;
+	u32 page_off = 0, page_off1 = 0, saved_off = 0;
+	u32 final_off, second_off, final_off_prev = 0, second_off_prev = 0;
+	u32 first_file_off = 0, second_file_off = 0;
+	u32 part_io_count = 0;
+	u32 tails = 0;
+	u32 this_off, curpage_off, nextpage_off, remain_pages;
+
+	u64 expected_seq, seq_base = 0, lsn_base = 0;
+	u64 best_lsn, best_lsn1, best_lsn2;
+	u64 lsn_cur, lsn1, lsn2;
+	u64 last_ok_lsn = reuse_page ? log->last_lsn : 0;
+
+	u16 cur_pos, best_page_pos;
+
+	struct RECORD_PAGE_HDR *page = NULL;
+	struct RECORD_PAGE_HDR *tst_page = NULL;
+	struct RECORD_PAGE_HDR *first_tail = NULL;
+	struct RECORD_PAGE_HDR *second_tail = NULL;
+	struct RECORD_PAGE_HDR *tail_page = NULL;
+	struct RECORD_PAGE_HDR *second_tail_prev = NULL;
+	struct RECORD_PAGE_HDR *first_tail_prev = NULL;
+	struct RECORD_PAGE_HDR *page_bufs = NULL;
+	struct RECORD_PAGE_HDR *best_page;
+
+	if (log->major_ver >= 2) {
+		final_off = 0x02 * log->page_size;
+		second_off = 0x12 * log->page_size;
+
+		// 0x10 == 0x12 - 0x2
+		page_bufs = kmalloc(log->page_size * 0x10, GFP_NOFS);
+		if (!page_bufs)
+			return -ENOMEM;
+	} else {
+		second_off = log->first_page - log->page_size;
+		final_off = second_off - log->page_size;
+	}
+
+next_tail:
+	/* Read second tail page (at pos 3/0x12000). */
+	if (read_log_page(log, second_off, &second_tail, &usa_error) ||
+	    usa_error || second_tail->rhdr.sign != NTFS_RCRD_SIGNATURE) {
+		kfree(second_tail);
+		second_tail = NULL;
+		second_file_off = 0;
+		lsn2 = 0;
+	} else {
+		second_file_off = hdr_file_off(log, second_tail);
+		lsn2 = le64_to_cpu(second_tail->record_hdr.last_end_lsn);
+	}
+
+	/* Read first tail page (at pos 2/0x2000). */
+	if (read_log_page(log, final_off, &first_tail, &usa_error) ||
+	    usa_error || first_tail->rhdr.sign != NTFS_RCRD_SIGNATURE) {
+		kfree(first_tail);
+		first_tail = NULL;
+		first_file_off = 0;
+		lsn1 = 0;
+	} else {
+		first_file_off = hdr_file_off(log, first_tail);
+		lsn1 = le64_to_cpu(first_tail->record_hdr.last_end_lsn);
+	}
+
+	if (log->major_ver < 2) {
+		int best_page;
+
+		first_tail_prev = first_tail;
+		final_off_prev = first_file_off;
+		second_tail_prev = second_tail;
+		second_off_prev = second_file_off;
+		tails = 1;
+
+		if (!first_tail && !second_tail)
+			goto tail_read;
+
+		if (first_tail && second_tail)
+			best_page = lsn1 < lsn2 ? 1 : 0;
+		else if (first_tail)
+			best_page = 0;
+		else
+			best_page = 1;
+
+		page_off = best_page ? second_file_off : first_file_off;
+		seq_base = (best_page ? lsn2 : lsn1) >> log->file_data_bits;
+		goto tail_read;
+	}
+
+	best_lsn1 = first_tail ? base_lsn(log, first_tail, first_file_off) : 0;
+	best_lsn2 =
+		second_tail ? base_lsn(log, second_tail, second_file_off) : 0;
+
+	if (first_tail && second_tail) {
+		if (best_lsn1 > best_lsn2) {
+			best_lsn = best_lsn1;
+			best_page = first_tail;
+			this_off = first_file_off;
+		} else {
+			best_lsn = best_lsn2;
+			best_page = second_tail;
+			this_off = second_file_off;
+		}
+	} else if (first_tail) {
+		best_lsn = best_lsn1;
+		best_page = first_tail;
+		this_off = first_file_off;
+	} else if (second_tail) {
+		best_lsn = best_lsn2;
+		best_page = second_tail;
+		this_off = second_file_off;
+	} else {
+		goto tail_read;
+	}
+
+	best_page_pos = le16_to_cpu(best_page->page_pos);
+
+	if (!tails) {
+		if (best_page_pos == page_pos) {
+			seq_base = best_lsn >> log->file_data_bits;
+			saved_off = page_off = le32_to_cpu(best_page->file_off);
+			lsn_base = best_lsn;
+
+			memmove(page_bufs, best_page, log->page_size);
+
+			page_cnt = le16_to_cpu(best_page->page_count);
+			if (page_cnt > 1)
+				page_pos += 1;
+
+			tails = 1;
+		}
+	} else if (seq_base == (best_lsn >> log->file_data_bits) &&
+		   saved_off + log->page_size == this_off &&
+		   lsn_base < best_lsn &&
+		   (page_pos != page_cnt || best_page_pos == page_pos ||
+		    best_page_pos == 1) &&
+		   (page_pos >= page_cnt || best_page_pos == page_pos)) {
+		u16 bppc = le16_to_cpu(best_page->page_count);
+
+		saved_off += log->page_size;
+		lsn_base = best_lsn;
+
+		memmove(Add2Ptr(page_bufs, tails * log->page_size), best_page,
+			log->page_size);
+
+		tails += 1;
+
+		if (best_page_pos != bppc) {
+			page_cnt = bppc;
+			page_pos = best_page_pos;
+
+			if (page_cnt > 1)
+				page_pos += 1;
+		} else {
+			page_pos = page_cnt = 1;
+		}
+	} else {
+		kfree(first_tail);
+		kfree(second_tail);
+		goto tail_read;
+	}
+
+	kfree(first_tail_prev);
+	first_tail_prev = first_tail;
+	final_off_prev = first_file_off;
+	first_tail = NULL;
+
+	kfree(second_tail_prev);
+	second_tail_prev = second_tail;
+	second_off_prev = second_file_off;
+	second_tail = NULL;
+
+	final_off += log->page_size;
+	second_off += log->page_size;
+
+	if (tails < 0x10)
+		goto next_tail;
+tail_read:
+	first_tail = first_tail_prev;
+	final_off = final_off_prev;
+
+	second_tail = second_tail_prev;
+	second_off = second_off_prev;
+
+	page_cnt = page_pos = 1;
+
+	curpage_off = seq_base == log->seq_num ? min(log->next_page, page_off)
+					       : log->next_page;
+
+	wrapped_file =
+		curpage_off == log->first_page &&
+		!(log->l_flags & (NTFSLOG_NO_LAST_LSN | NTFSLOG_REUSE_TAIL));
+
+	expected_seq = wrapped_file ? (log->seq_num + 1) : log->seq_num;
+
+	nextpage_off = curpage_off;
+
+next_page:
+	tail_page = NULL;
+	/* Read the next log page. */
+	err = read_log_page(log, curpage_off, &page, &usa_error);
+
+	/* Compute the next log page offset the file. */
+	nextpage_off = next_page_off(log, curpage_off);
+	wrapped = nextpage_off == log->first_page;
+
+	if (tails > 1) {
+		struct RECORD_PAGE_HDR *cur_page =
+			Add2Ptr(page_bufs, curpage_off - page_off);
+
+		if (curpage_off == saved_off) {
+			tail_page = cur_page;
+			goto use_tail_page;
+		}
+
+		if (page_off > curpage_off || curpage_off >= saved_off)
+			goto use_tail_page;
+
+		if (page_off1)
+			goto use_cur_page;
+
+		if (!err && !usa_error &&
+		    page->rhdr.sign == NTFS_RCRD_SIGNATURE &&
+		    cur_page->rhdr.lsn == page->rhdr.lsn &&
+		    cur_page->record_hdr.next_record_off ==
+			    page->record_hdr.next_record_off &&
+		    ((page_pos == page_cnt &&
+		      le16_to_cpu(page->page_pos) == 1) ||
+		     (page_pos != page_cnt &&
+		      le16_to_cpu(page->page_pos) == page_pos + 1 &&
+		      le16_to_cpu(page->page_count) == page_cnt))) {
+			cur_page = NULL;
+			goto use_tail_page;
+		}
+
+		page_off1 = page_off;
+
+use_cur_page:
+
+		lsn_cur = le64_to_cpu(cur_page->rhdr.lsn);
+
+		if (last_ok_lsn !=
+			    le64_to_cpu(cur_page->record_hdr.last_end_lsn) &&
+		    ((lsn_cur >> log->file_data_bits) +
+		     ((curpage_off <
+		       (lsn_to_vbo(log, lsn_cur) & ~log->page_mask))
+			      ? 1
+			      : 0)) != expected_seq) {
+			goto check_tail;
+		}
+
+		if (!is_log_record_end(cur_page)) {
+			tail_page = NULL;
+			last_ok_lsn = lsn_cur;
+			goto next_page_1;
+		}
+
+		log->seq_num = expected_seq;
+		log->l_flags &= ~NTFSLOG_NO_LAST_LSN;
+		log->last_lsn = le64_to_cpu(cur_page->record_hdr.last_end_lsn);
+		log->ra->current_lsn = cur_page->record_hdr.last_end_lsn;
+
+		if (log->record_header_len <=
+		    log->page_size -
+			    le16_to_cpu(cur_page->record_hdr.next_record_off)) {
+			log->l_flags |= NTFSLOG_REUSE_TAIL;
+			log->next_page = curpage_off;
+		} else {
+			log->l_flags &= ~NTFSLOG_REUSE_TAIL;
+			log->next_page = nextpage_off;
+		}
+
+		if (wrapped_file)
+			log->l_flags |= NTFSLOG_WRAPPED;
+
+		last_ok_lsn = le64_to_cpu(cur_page->record_hdr.last_end_lsn);
+		goto next_page_1;
+	}
+
+	/*
+	 * If we are at the expected first page of a transfer check to see
+	 * if either tail copy is at this offset.
+	 * If this page is the last page of a transfer, check if we wrote
+	 * a subsequent tail copy.
+	 */
+	if (page_cnt == page_pos || page_cnt == page_pos + 1) {
+		/*
+		 * Check if the offset matches either the first or second
+		 * tail copy. It is possible it will match both.
+		 */
+		if (curpage_off == final_off)
+			tail_page = first_tail;
+
+		/*
+		 * If we already matched on the first page then
+		 * check the ending lsn's.
+		 */
+		if (curpage_off == second_off) {
+			if (!tail_page ||
+			    (second_tail &&
+			     le64_to_cpu(second_tail->record_hdr.last_end_lsn) >
+				     le64_to_cpu(first_tail->record_hdr
+							 .last_end_lsn))) {
+				tail_page = second_tail;
+			}
+		}
+	}
+
+use_tail_page:
+	if (tail_page) {
+		/* We have a candidate for a tail copy. */
+		lsn_cur = le64_to_cpu(tail_page->record_hdr.last_end_lsn);
+
+		if (last_ok_lsn < lsn_cur) {
+			/*
+			 * If the sequence number is not expected,
+			 * then don't use the tail copy.
+			 */
+			if (expected_seq != (lsn_cur >> log->file_data_bits))
+				tail_page = NULL;
+		} else if (last_ok_lsn > lsn_cur) {
+			/*
+			 * If the last lsn is greater than the one on
+			 * this page then forget this tail.
+			 */
+			tail_page = NULL;
+		}
+	}
+
+	/*
+	 *If we have an error on the current page,
+	 * we will break of this loop.
+	 */
+	if (err || usa_error)
+		goto check_tail;
+
+	/*
+	 * Done if the last lsn on this page doesn't match the previous known
+	 * last lsn or the sequence number is not expected.
+	 */
+	lsn_cur = le64_to_cpu(page->rhdr.lsn);
+	if (last_ok_lsn != lsn_cur &&
+	    expected_seq != (lsn_cur >> log->file_data_bits)) {
+		goto check_tail;
+	}
+
+	/*
+	 * Check that the page position and page count values are correct.
+	 * If this is the first page of a transfer the position must be 1
+	 * and the count will be unknown.
+	 */
+	if (page_cnt == page_pos) {
+		if (page->page_pos != cpu_to_le16(1) &&
+		    (!reuse_page || page->page_pos != page->page_count)) {
+			/*
+			 * If the current page is the first page we are
+			 * looking at and we are reusing this page then
+			 * it can be either the first or last page of a
+			 * transfer. Otherwise it can only be the first.
+			 */
+			goto check_tail;
+		}
+	} else if (le16_to_cpu(page->page_count) != page_cnt ||
+		   le16_to_cpu(page->page_pos) != page_pos + 1) {
+		/*
+		 * The page position better be 1 more than the last page
+		 * position and the page count better match.
+		 */
+		goto check_tail;
+	}
+
+	/*
+	 * We have a valid page the file and may have a valid page
+	 * the tail copy area.
+	 * If the tail page was written after the page the file then
+	 * break of the loop.
+	 */
+	if (tail_page &&
+	    le64_to_cpu(tail_page->record_hdr.last_end_lsn) > lsn_cur) {
+		/* Remember if we will replace the page. */
+		replace_page = true;
+		goto check_tail;
+	}
+
+	tail_page = NULL;
+
+	if (is_log_record_end(page)) {
+		/*
+		 * Since we have read this page we know the sequence number
+		 * is the same as our expected value.
+		 */
+		log->seq_num = expected_seq;
+		log->last_lsn = le64_to_cpu(page->record_hdr.last_end_lsn);
+		log->ra->current_lsn = page->record_hdr.last_end_lsn;
+		log->l_flags &= ~NTFSLOG_NO_LAST_LSN;
+
+		/*
+		 * If there is room on this page for another header then
+		 * remember we want to reuse the page.
+		 */
+		if (log->record_header_len <=
+		    log->page_size -
+			    le16_to_cpu(page->record_hdr.next_record_off)) {
+			log->l_flags |= NTFSLOG_REUSE_TAIL;
+			log->next_page = curpage_off;
+		} else {
+			log->l_flags &= ~NTFSLOG_REUSE_TAIL;
+			log->next_page = nextpage_off;
+		}
+
+		/* Remember if we wrapped the log file. */
+		if (wrapped_file)
+			log->l_flags |= NTFSLOG_WRAPPED;
+	}
+
+	/*
+	 * Remember the last page count and position.
+	 * Also remember the last known lsn.
+	 */
+	page_cnt = le16_to_cpu(page->page_count);
+	page_pos = le16_to_cpu(page->page_pos);
+	last_ok_lsn = le64_to_cpu(page->rhdr.lsn);
+
+next_page_1:
+
+	if (wrapped) {
+		expected_seq += 1;
+		wrapped_file = 1;
+	}
+
+	curpage_off = nextpage_off;
+	kfree(page);
+	page = NULL;
+	reuse_page = 0;
+	goto next_page;
+
+check_tail:
+	if (tail_page) {
+		log->seq_num = expected_seq;
+		log->last_lsn = le64_to_cpu(tail_page->record_hdr.last_end_lsn);
+		log->ra->current_lsn = tail_page->record_hdr.last_end_lsn;
+		log->l_flags &= ~NTFSLOG_NO_LAST_LSN;
+
+		if (log->page_size -
+			    le16_to_cpu(
+				    tail_page->record_hdr.next_record_off) >=
+		    log->record_header_len) {
+			log->l_flags |= NTFSLOG_REUSE_TAIL;
+			log->next_page = curpage_off;
+		} else {
+			log->l_flags &= ~NTFSLOG_REUSE_TAIL;
+			log->next_page = nextpage_off;
+		}
+
+		if (wrapped)
+			log->l_flags |= NTFSLOG_WRAPPED;
+	}
+
+	/* Remember that the partial IO will start at the next page. */
+	second_off = nextpage_off;
+
+	/*
+	 * If the next page is the first page of the file then update
+	 * the sequence number for log records which begon the next page.
+	 */
+	if (wrapped)
+		expected_seq += 1;
+
+	/*
+	 * If we have a tail copy or are performing single page I/O we can
+	 * immediately look at the next page.
+	 */
+	if (replace_page || (log->ra->flags & RESTART_SINGLE_PAGE_IO)) {
+		page_cnt = 2;
+		page_pos = 1;
+		goto check_valid;
+	}
+
+	if (page_pos != page_cnt)
+		goto check_valid;
+	/*
+	 * If the next page causes us to wrap to the beginning of the log
+	 * file then we know which page to check next.
+	 */
+	if (wrapped) {
+		page_cnt = 2;
+		page_pos = 1;
+		goto check_valid;
+	}
+
+	cur_pos = 2;
+
+next_test_page:
+	kfree(tst_page);
+	tst_page = NULL;
+
+	/* Walk through the file, reading log pages. */
+	err = read_log_page(log, nextpage_off, &tst_page, &usa_error);
+
+	/*
+	 * If we get a USA error then assume that we correctly found
+	 * the end of the original transfer.
+	 */
+	if (usa_error)
+		goto file_is_valid;
+
+	/*
+	 * If we were able to read the page, we examine it to see if it
+	 * is the same or different Io block.
+	 */
+	if (err)
+		goto next_test_page_1;
+
+	if (le16_to_cpu(tst_page->page_pos) == cur_pos &&
+	    check_subseq_log_page(log, tst_page, nextpage_off, expected_seq)) {
+		page_cnt = le16_to_cpu(tst_page->page_count) + 1;
+		page_pos = le16_to_cpu(tst_page->page_pos);
+		goto check_valid;
+	} else {
+		goto file_is_valid;
+	}
+
+next_test_page_1:
+
+	nextpage_off = next_page_off(log, curpage_off);
+	wrapped = nextpage_off == log->first_page;
+
+	if (wrapped) {
+		expected_seq += 1;
+		page_cnt = 2;
+		page_pos = 1;
+	}
+
+	cur_pos += 1;
+	part_io_count += 1;
+	if (!wrapped)
+		goto next_test_page;
+
+check_valid:
+	/* Skip over the remaining pages this transfer. */
+	remain_pages = page_cnt - page_pos - 1;
+	part_io_count += remain_pages;
+
+	while (remain_pages--) {
+		nextpage_off = next_page_off(log, curpage_off);
+		wrapped = nextpage_off == log->first_page;
+
+		if (wrapped)
+			expected_seq += 1;
+	}
+
+	/* Call our routine to check this log page. */
+	kfree(tst_page);
+	tst_page = NULL;
+
+	err = read_log_page(log, nextpage_off, &tst_page, &usa_error);
+	if (!err && !usa_error &&
+	    check_subseq_log_page(log, tst_page, nextpage_off, expected_seq)) {
+		err = -EINVAL;
+		goto out;
+	}
+
+file_is_valid:
+
+	/* We have a valid file. */
+	if (page_off1 || tail_page) {
+		struct RECORD_PAGE_HDR *tmp_page;
+
+		if (sb_rdonly(log->ni->mi.sbi->sb)) {
+			err = -EROFS;
+			goto out;
+		}
+
+		if (page_off1) {
+			tmp_page = Add2Ptr(page_bufs, page_off1 - page_off);
+			tails -= (page_off1 - page_off) / log->page_size;
+			if (!tail_page)
+				tails -= 1;
+		} else {
+			tmp_page = tail_page;
+			tails = 1;
+		}
+
+		while (tails--) {
+			u64 off = hdr_file_off(log, tmp_page);
+
+			if (!page) {
+				page = kmalloc(log->page_size, GFP_NOFS);
+				if (!page)
+					return -ENOMEM;
+			}
+
+			/*
+			 * Correct page and copy the data from this page
+			 * into it and flush it to disk.
+			 */
+			memcpy(page, tmp_page, log->page_size);
+
+			/* Fill last flushed lsn value flush the page. */
+			if (log->major_ver < 2)
+				page->rhdr.lsn = page->record_hdr.last_end_lsn;
+			else
+				page->file_off = 0;
+
+			page->page_pos = page->page_count = cpu_to_le16(1);
+
+			ntfs_fix_pre_write(&page->rhdr, log->page_size);
+
+			err = ntfs_sb_write_run(log->ni->mi.sbi,
+						&log->ni->file.run, off, page,
+						log->page_size);
+
+			if (err)
+				goto out;
+
+			if (part_io_count && second_off == off) {
+				second_off += log->page_size;
+				part_io_count -= 1;
+			}
+
+			tmp_page = Add2Ptr(tmp_page, log->page_size);
+		}
+	}
+
+	if (part_io_count) {
+		if (sb_rdonly(log->ni->mi.sbi->sb)) {
+			err = -EROFS;
+			goto out;
+		}
+	}
+
+out:
+	kfree(second_tail);
+	kfree(first_tail);
+	kfree(page);
+	kfree(tst_page);
+	kfree(page_bufs);
+
+	return err;
+}
+
+/*
+ * read_log_rec_buf - Copy a log record from the file to a buffer.
+ *
+ * The log record may span several log pages and may even wrap the file.
+ */
+static int read_log_rec_buf(struct ntfs_log *log,
+			    const struct LFS_RECORD_HDR *rh, void *buffer)
+{
+	int err;
+	struct RECORD_PAGE_HDR *ph = NULL;
+	u64 lsn = le64_to_cpu(rh->this_lsn);
+	u32 vbo = lsn_to_vbo(log, lsn) & ~log->page_mask;
+	u32 off = lsn_to_page_off(log, lsn) + log->record_header_len;
+	u32 data_len = le32_to_cpu(rh->client_data_len);
+
+	/*
+	 * While there are more bytes to transfer,
+	 * we continue to attempt to perform the read.
+	 */
+	for (;;) {
+		bool usa_error;
+		u32 tail = log->page_size - off;
+
+		if (tail >= data_len)
+			tail = data_len;
+
+		data_len -= tail;
+
+		err = read_log_page(log, vbo, &ph, &usa_error);
+		if (err)
+			goto out;
+
+		/*
+		 * The last lsn on this page better be greater or equal
+		 * to the lsn we are copying.
+		 */
+		if (lsn > le64_to_cpu(ph->rhdr.lsn)) {
+			err = -EINVAL;
+			goto out;
+		}
+
+		memcpy(buffer, Add2Ptr(ph, off), tail);
+
+		/* If there are no more bytes to transfer, we exit the loop. */
+		if (!data_len) {
+			if (!is_log_record_end(ph) ||
+			    lsn > le64_to_cpu(ph->record_hdr.last_end_lsn)) {
+				err = -EINVAL;
+				goto out;
+			}
+			break;
+		}
+
+		if (ph->rhdr.lsn == ph->record_hdr.last_end_lsn ||
+		    lsn > le64_to_cpu(ph->rhdr.lsn)) {
+			err = -EINVAL;
+			goto out;
+		}
+
+		vbo = next_page_off(log, vbo);
+		off = log->data_off;
+
+		/*
+		 * Adjust our pointer the user's buffer to transfer
+		 * the next block to.
+		 */
+		buffer = Add2Ptr(buffer, tail);
+	}
+
+out:
+	kfree(ph);
+	return err;
+}
+
+static int read_rst_area(struct ntfs_log *log, struct NTFS_RESTART **rst_,
+			 u64 *lsn)
+{
+	int err;
+	struct LFS_RECORD_HDR *rh = NULL;
+	const struct CLIENT_REC *cr =
+		Add2Ptr(log->ra, le16_to_cpu(log->ra->client_off));
+	u64 lsnr, lsnc = le64_to_cpu(cr->restart_lsn);
+	u32 len;
+	struct NTFS_RESTART *rst;
+
+	*lsn = 0;
+	*rst_ = NULL;
+
+	/* If the client doesn't have a restart area, go ahead and exit now. */
+	if (!lsnc)
+		return 0;
+
+	err = read_log_page(log, lsn_to_vbo(log, lsnc),
+			    (struct RECORD_PAGE_HDR **)&rh, NULL);
+	if (err)
+		return err;
+
+	rst = NULL;
+	lsnr = le64_to_cpu(rh->this_lsn);
+
+	if (lsnc != lsnr) {
+		/* If the lsn values don't match, then the disk is corrupt. */
+		err = -EINVAL;
+		goto out;
+	}
+
+	*lsn = lsnr;
+	len = le32_to_cpu(rh->client_data_len);
+
+	if (!len) {
+		err = 0;
+		goto out;
+	}
+
+	if (len < sizeof(struct NTFS_RESTART)) {
+		err = -EINVAL;
+		goto out;
+	}
+
+	rst = kmalloc(len, GFP_NOFS);
+	if (!rst) {
+		err = -ENOMEM;
+		goto out;
+	}
+
+	/* Copy the data into the 'rst' buffer. */
+	err = read_log_rec_buf(log, rh, rst);
+	if (err)
+		goto out;
+
+	*rst_ = rst;
+	rst = NULL;
+
+out:
+	kfree(rh);
+	kfree(rst);
+
+	return err;
+}
+
+static int find_log_rec(struct ntfs_log *log, u64 lsn, struct lcb *lcb)
+{
+	int err;
+	struct LFS_RECORD_HDR *rh = lcb->lrh;
+	u32 rec_len, len;
+
+	/* Read the record header for this lsn. */
+	if (!rh) {
+		err = read_log_page(log, lsn_to_vbo(log, lsn),
+				    (struct RECORD_PAGE_HDR **)&rh, NULL);
+
+		lcb->lrh = rh;
+		if (err)
+			return err;
+	}
+
+	/*
+	 * If the lsn the log record doesn't match the desired
+	 * lsn then the disk is corrupt.
+	 */
+	if (lsn != le64_to_cpu(rh->this_lsn))
+		return -EINVAL;
+
+	len = le32_to_cpu(rh->client_data_len);
+
+	/*
+	 * Check that the length field isn't greater than the total
+	 * available space the log file.
+	 */
+	rec_len = len + log->record_header_len;
+	if (rec_len >= log->total_avail)
+		return -EINVAL;
+
+	/*
+	 * If the entire log record is on this log page,
+	 * put a pointer to the log record the context block.
+	 */
+	if (rh->flags & LOG_RECORD_MULTI_PAGE) {
+		void *lr = kmalloc(len, GFP_NOFS);
+
+		if (!lr)
+			return -ENOMEM;
+
+		lcb->log_rec = lr;
+		lcb->alloc = true;
+
+		/* Copy the data into the buffer returned. */
+		err = read_log_rec_buf(log, rh, lr);
+		if (err)
+			return err;
+	} else {
+		/* If beyond the end of the current page -> an error. */
+		u32 page_off = lsn_to_page_off(log, lsn);
+
+		if (page_off + len + log->record_header_len > log->page_size)
+			return -EINVAL;
+
+		lcb->log_rec = Add2Ptr(rh, sizeof(struct LFS_RECORD_HDR));
+		lcb->alloc = false;
+	}
+
+	return 0;
+}
+
+/*
+ * read_log_rec_lcb - Init the query operation.
+ */
+static int read_log_rec_lcb(struct ntfs_log *log, u64 lsn, u32 ctx_mode,
+			    struct lcb **lcb_)
+{
+	int err;
+	const struct CLIENT_REC *cr;
+	struct lcb *lcb;
+
+	switch (ctx_mode) {
+	case lcb_ctx_undo_next:
+	case lcb_ctx_prev:
+	case lcb_ctx_next:
+		break;
+	default:
+		return -EINVAL;
+	}
+
+	/* Check that the given lsn is the legal range for this client. */
+	cr = Add2Ptr(log->ra, le16_to_cpu(log->ra->client_off));
+
+	if (!verify_client_lsn(log, cr, lsn))
+		return -EINVAL;
+
+	lcb = kzalloc(sizeof(struct lcb), GFP_NOFS);
+	if (!lcb)
+		return -ENOMEM;
+	lcb->client = log->client_id;
+	lcb->ctx_mode = ctx_mode;
+
+	/* Find the log record indicated by the given lsn. */
+	err = find_log_rec(log, lsn, lcb);
+	if (err)
+		goto out;
+
+	*lcb_ = lcb;
+	return 0;
+
+out:
+	lcb_put(lcb);
+	*lcb_ = NULL;
+	return err;
+}
+
+/*
+ * find_client_next_lsn
+ *
+ * Attempt to find the next lsn to return to a client based on the context mode.
+ */
+static int find_client_next_lsn(struct ntfs_log *log, struct lcb *lcb, u64 *lsn)
+{
+	int err;
+	u64 next_lsn;
+	struct LFS_RECORD_HDR *hdr;
+
+	hdr = lcb->lrh;
+	*lsn = 0;
+
+	if (lcb_ctx_next != lcb->ctx_mode)
+		goto check_undo_next;
+
+	/* Loop as long as another lsn can be found. */
+	for (;;) {
+		u64 current_lsn;
+
+		err = next_log_lsn(log, hdr, &current_lsn);
+		if (err)
+			goto out;
+
+		if (!current_lsn)
+			break;
+
+		if (hdr != lcb->lrh)
+			kfree(hdr);
+
+		hdr = NULL;
+		err = read_log_page(log, lsn_to_vbo(log, current_lsn),
+				    (struct RECORD_PAGE_HDR **)&hdr, NULL);
+		if (err)
+			goto out;
+
+		if (memcmp(&hdr->client, &lcb->client,
+			   sizeof(struct CLIENT_ID))) {
+			/*err = -EINVAL; */
+		} else if (LfsClientRecord == hdr->record_type) {
+			kfree(lcb->lrh);
+			lcb->lrh = hdr;
+			*lsn = current_lsn;
+			return 0;
+		}
+	}
+
+out:
+	if (hdr != lcb->lrh)
+		kfree(hdr);
+	return err;
+
+check_undo_next:
+	if (lcb_ctx_undo_next == lcb->ctx_mode)
+		next_lsn = le64_to_cpu(hdr->client_undo_next_lsn);
+	else if (lcb_ctx_prev == lcb->ctx_mode)
+		next_lsn = le64_to_cpu(hdr->client_prev_lsn);
+	else
+		return 0;
+
+	if (!next_lsn)
+		return 0;
+
+	if (!verify_client_lsn(
+		    log, Add2Ptr(log->ra, le16_to_cpu(log->ra->client_off)),
+		    next_lsn))
+		return 0;
+
+	hdr = NULL;
+	err = read_log_page(log, lsn_to_vbo(log, next_lsn),
+			    (struct RECORD_PAGE_HDR **)&hdr, NULL);
+	if (err)
+		return err;
+	kfree(lcb->lrh);
+	lcb->lrh = hdr;
+
+	*lsn = next_lsn;
+
+	return 0;
+}
+
+static int read_next_log_rec(struct ntfs_log *log, struct lcb *lcb, u64 *lsn)
+{
+	int err;
+
+	err = find_client_next_lsn(log, lcb, lsn);
+	if (err)
+		return err;
+
+	if (!*lsn)
+		return 0;
+
+	if (lcb->alloc)
+		kfree(lcb->log_rec);
+
+	lcb->log_rec = NULL;
+	lcb->alloc = false;
+	kfree(lcb->lrh);
+	lcb->lrh = NULL;
+
+	return find_log_rec(log, *lsn, lcb);
+}
+
+static inline bool check_index_header(const struct INDEX_HDR *hdr, size_t bytes)
+{
+	__le16 mask;
+	u32 min_de, de_off, used, total;
+	const struct NTFS_DE *e;
+
+	if (hdr_has_subnode(hdr)) {
+		min_de = sizeof(struct NTFS_DE) + sizeof(u64);
+		mask = NTFS_IE_HAS_SUBNODES;
+	} else {
+		min_de = sizeof(struct NTFS_DE);
+		mask = 0;
+	}
+
+	de_off = le32_to_cpu(hdr->de_off);
+	used = le32_to_cpu(hdr->used);
+	total = le32_to_cpu(hdr->total);
+
+	if (de_off > bytes - min_de || used > bytes || total > bytes ||
+	    de_off + min_de > used || used > total) {
+		return false;
+	}
+
+	e = Add2Ptr(hdr, de_off);
+	for (;;) {
+		u16 esize = le16_to_cpu(e->size);
+		struct NTFS_DE *next = Add2Ptr(e, esize);
+
+		if (esize < min_de || PtrOffset(hdr, next) > used ||
+		    (e->flags & NTFS_IE_HAS_SUBNODES) != mask) {
+			return false;
+		}
+
+		if (de_is_last(e))
+			break;
+
+		e = next;
+	}
+
+	return true;
+}
+
+static inline bool check_index_buffer(const struct INDEX_BUFFER *ib, u32 bytes)
+{
+	u16 fo;
+	const struct NTFS_RECORD_HEADER *r = &ib->rhdr;
+
+	if (r->sign != NTFS_INDX_SIGNATURE)
+		return false;
+
+	fo = (SECTOR_SIZE - ((bytes >> SECTOR_SHIFT) + 1) * sizeof(short));
+
+	if (le16_to_cpu(r->fix_off) > fo)
+		return false;
+
+	if ((le16_to_cpu(r->fix_num) - 1) * SECTOR_SIZE != bytes)
+		return false;
+
+	return check_index_header(&ib->ihdr,
+				  bytes - offsetof(struct INDEX_BUFFER, ihdr));
+}
+
+static inline bool check_index_root(const struct ATTRIB *attr,
+				    struct ntfs_sb_info *sbi)
+{
+	bool ret;
+	const struct INDEX_ROOT *root = resident_data(attr);
+	u8 index_bits = le32_to_cpu(root->index_block_size) >= sbi->cluster_size
+				? sbi->cluster_bits
+				: SECTOR_SHIFT;
+	u8 block_clst = root->index_block_clst;
+
+	if (le32_to_cpu(attr->res.data_size) < sizeof(struct INDEX_ROOT) ||
+	    (root->type != ATTR_NAME && root->type != ATTR_ZERO) ||
+	    (root->type == ATTR_NAME &&
+	     root->rule != NTFS_COLLATION_TYPE_FILENAME) ||
+	    (le32_to_cpu(root->index_block_size) !=
+	     (block_clst << index_bits)) ||
+	    (block_clst != 1 && block_clst != 2 && block_clst != 4 &&
+	     block_clst != 8 && block_clst != 0x10 && block_clst != 0x20 &&
+	     block_clst != 0x40 && block_clst != 0x80)) {
+		return false;
+	}
+
+	ret = check_index_header(&root->ihdr,
+				 le32_to_cpu(attr->res.data_size) -
+					 offsetof(struct INDEX_ROOT, ihdr));
+	return ret;
+}
+
+static inline bool check_attr(const struct MFT_REC *rec,
+			      const struct ATTRIB *attr,
+			      struct ntfs_sb_info *sbi)
+{
+	u32 asize = le32_to_cpu(attr->size);
+	u32 rsize = 0;
+	u64 dsize, svcn, evcn;
+	u16 run_off;
+
+	/* Check the fixed part of the attribute record header. */
+	if (asize >= sbi->record_size ||
+	    asize + PtrOffset(rec, attr) >= sbi->record_size ||
+	    (attr->name_len &&
+	     le16_to_cpu(attr->name_off) + attr->name_len * sizeof(short) >
+		     asize)) {
+		return false;
+	}
+
+	/* Check the attribute fields. */
+	switch (attr->non_res) {
+	case 0:
+		rsize = le32_to_cpu(attr->res.data_size);
+		if (rsize >= asize ||
+		    le16_to_cpu(attr->res.data_off) + rsize > asize) {
+			return false;
+		}
+		break;
+
+	case 1:
+		dsize = le64_to_cpu(attr->nres.data_size);
+		svcn = le64_to_cpu(attr->nres.svcn);
+		evcn = le64_to_cpu(attr->nres.evcn);
+		run_off = le16_to_cpu(attr->nres.run_off);
+
+		if (svcn > evcn + 1 || run_off >= asize ||
+		    le64_to_cpu(attr->nres.valid_size) > dsize ||
+		    dsize > le64_to_cpu(attr->nres.alloc_size)) {
+			return false;
+		}
+
+		if (run_unpack(NULL, sbi, 0, svcn, evcn, svcn,
+			       Add2Ptr(attr, run_off), asize - run_off) < 0) {
+			return false;
+		}
+
+		return true;
+
+	default:
+		return false;
+	}
+
+	switch (attr->type) {
+	case ATTR_NAME:
+		if (fname_full_size(Add2Ptr(
+			    attr, le16_to_cpu(attr->res.data_off))) > asize) {
+			return false;
+		}
+		break;
+
+	case ATTR_ROOT:
+		return check_index_root(attr, sbi);
+
+	case ATTR_STD:
+		if (rsize < sizeof(struct ATTR_STD_INFO5) &&
+		    rsize != sizeof(struct ATTR_STD_INFO)) {
+			return false;
+		}
+		break;
+
+	case ATTR_LIST:
+	case ATTR_ID:
+	case ATTR_SECURE:
+	case ATTR_LABEL:
+	case ATTR_VOL_INFO:
+	case ATTR_DATA:
+	case ATTR_ALLOC:
+	case ATTR_BITMAP:
+	case ATTR_REPARSE:
+	case ATTR_EA_INFO:
+	case ATTR_EA:
+	case ATTR_PROPERTYSET:
+	case ATTR_LOGGED_UTILITY_STREAM:
+		break;
+
+	default:
+		return false;
+	}
+
+	return true;
+}
+
+static inline bool check_file_record(const struct MFT_REC *rec,
+				     const struct MFT_REC *rec2,
+				     struct ntfs_sb_info *sbi)
+{
+	const struct ATTRIB *attr;
+	u16 fo = le16_to_cpu(rec->rhdr.fix_off);
+	u16 fn = le16_to_cpu(rec->rhdr.fix_num);
+	u16 ao = le16_to_cpu(rec->attr_off);
+	u32 rs = sbi->record_size;
+
+	/* Check the file record header for consistency. */
+	if (rec->rhdr.sign != NTFS_FILE_SIGNATURE ||
+	    fo > (SECTOR_SIZE - ((rs >> SECTOR_SHIFT) + 1) * sizeof(short)) ||
+	    (fn - 1) * SECTOR_SIZE != rs || ao < MFTRECORD_FIXUP_OFFSET_1 ||
+	    ao > sbi->record_size - SIZEOF_RESIDENT || !is_rec_inuse(rec) ||
+	    le32_to_cpu(rec->total) != rs) {
+		return false;
+	}
+
+	/* Loop to check all of the attributes. */
+	for (attr = Add2Ptr(rec, ao); attr->type != ATTR_END;
+	     attr = Add2Ptr(attr, le32_to_cpu(attr->size))) {
+		if (check_attr(rec, attr, sbi))
+			continue;
+		return false;
+	}
+
+	return true;
+}
+
+static inline int check_lsn(const struct NTFS_RECORD_HEADER *hdr,
+			    const u64 *rlsn)
+{
+	u64 lsn;
+
+	if (!rlsn)
+		return true;
+
+	lsn = le64_to_cpu(hdr->lsn);
+
+	if (hdr->sign == NTFS_HOLE_SIGNATURE)
+		return false;
+
+	if (*rlsn > lsn)
+		return true;
+
+	return false;
+}
+
+static inline bool check_if_attr(const struct MFT_REC *rec,
+				 const struct LOG_REC_HDR *lrh)
+{
+	u16 ro = le16_to_cpu(lrh->record_off);
+	u16 o = le16_to_cpu(rec->attr_off);
+	const struct ATTRIB *attr = Add2Ptr(rec, o);
+
+	while (o < ro) {
+		u32 asize;
+
+		if (attr->type == ATTR_END)
+			break;
+
+		asize = le32_to_cpu(attr->size);
+		if (!asize)
+			break;
+
+		o += asize;
+		attr = Add2Ptr(attr, asize);
+	}
+
+	return o == ro;
+}
+
+static inline bool check_if_index_root(const struct MFT_REC *rec,
+				       const struct LOG_REC_HDR *lrh)
+{
+	u16 ro = le16_to_cpu(lrh->record_off);
+	u16 o = le16_to_cpu(rec->attr_off);
+	const struct ATTRIB *attr = Add2Ptr(rec, o);
+
+	while (o < ro) {
+		u32 asize;
+
+		if (attr->type == ATTR_END)
+			break;
+
+		asize = le32_to_cpu(attr->size);
+		if (!asize)
+			break;
+
+		o += asize;
+		attr = Add2Ptr(attr, asize);
+	}
+
+	return o == ro && attr->type == ATTR_ROOT;
+}
+
+static inline bool check_if_root_index(const struct ATTRIB *attr,
+				       const struct INDEX_HDR *hdr,
+				       const struct LOG_REC_HDR *lrh)
+{
+	u16 ao = le16_to_cpu(lrh->attr_off);
+	u32 de_off = le32_to_cpu(hdr->de_off);
+	u32 o = PtrOffset(attr, hdr) + de_off;
+	const struct NTFS_DE *e = Add2Ptr(hdr, de_off);
+	u32 asize = le32_to_cpu(attr->size);
+
+	while (o < ao) {
+		u16 esize;
+
+		if (o >= asize)
+			break;
+
+		esize = le16_to_cpu(e->size);
+		if (!esize)
+			break;
+
+		o += esize;
+		e = Add2Ptr(e, esize);
+	}
+
+	return o == ao;
+}
+
+static inline bool check_if_alloc_index(const struct INDEX_HDR *hdr,
+					u32 attr_off)
+{
+	u32 de_off = le32_to_cpu(hdr->de_off);
+	u32 o = offsetof(struct INDEX_BUFFER, ihdr) + de_off;
+	const struct NTFS_DE *e = Add2Ptr(hdr, de_off);
+	u32 used = le32_to_cpu(hdr->used);
+
+	while (o < attr_off) {
+		u16 esize;
+
+		if (de_off >= used)
+			break;
+
+		esize = le16_to_cpu(e->size);
+		if (!esize)
+			break;
+
+		o += esize;
+		de_off += esize;
+		e = Add2Ptr(e, esize);
+	}
+
+	return o == attr_off;
+}
+
+static inline void change_attr_size(struct MFT_REC *rec, struct ATTRIB *attr,
+				    u32 nsize)
+{
+	u32 asize = le32_to_cpu(attr->size);
+	int dsize = nsize - asize;
+	u8 *next = Add2Ptr(attr, asize);
+	u32 used = le32_to_cpu(rec->used);
+
+	memmove(Add2Ptr(attr, nsize), next, used - PtrOffset(rec, next));
+
+	rec->used = cpu_to_le32(used + dsize);
+	attr->size = cpu_to_le32(nsize);
+}
+
+struct OpenAttr {
+	struct ATTRIB *attr;
+	struct runs_tree *run1;
+	struct runs_tree run0;
+	struct ntfs_inode *ni;
+	// CLST rno;
+};
+
+/*
+ * cmp_type_and_name
+ *
+ * Return: 0 if 'attr' has the same type and name.
+ */
+static inline int cmp_type_and_name(const struct ATTRIB *a1,
+				    const struct ATTRIB *a2)
+{
+	return a1->type != a2->type || a1->name_len != a2->name_len ||
+	       (a1->name_len && memcmp(attr_name(a1), attr_name(a2),
+				       a1->name_len * sizeof(short)));
+}
+
+static struct OpenAttr *find_loaded_attr(struct ntfs_log *log,
+					 const struct ATTRIB *attr, CLST rno)
+{
+	struct OPEN_ATTR_ENRTY *oe = NULL;
+
+	while ((oe = enum_rstbl(log->open_attr_tbl, oe))) {
+		struct OpenAttr *op_attr;
+
+		if (ino_get(&oe->ref) != rno)
+			continue;
+
+		op_attr = (struct OpenAttr *)oe->ptr;
+		if (!cmp_type_and_name(op_attr->attr, attr))
+			return op_attr;
+	}
+	return NULL;
+}
+
+static struct ATTRIB *attr_create_nonres_log(struct ntfs_sb_info *sbi,
+					     enum ATTR_TYPE type, u64 size,
+					     const u16 *name, size_t name_len,
+					     __le16 flags)
+{
+	struct ATTRIB *attr;
+	u32 name_size = ALIGN(name_len * sizeof(short), 8);
+	bool is_ext = flags & (ATTR_FLAG_COMPRESSED | ATTR_FLAG_SPARSED);
+	u32 asize = name_size +
+		    (is_ext ? SIZEOF_NONRESIDENT_EX : SIZEOF_NONRESIDENT);
+
+	attr = kzalloc(asize, GFP_NOFS);
+	if (!attr)
+		return NULL;
+
+	attr->type = type;
+	attr->size = cpu_to_le32(asize);
+	attr->flags = flags;
+	attr->non_res = 1;
+	attr->name_len = name_len;
+
+	attr->nres.evcn = cpu_to_le64((u64)bytes_to_cluster(sbi, size) - 1);
+	attr->nres.alloc_size = cpu_to_le64(ntfs_up_cluster(sbi, size));
+	attr->nres.data_size = cpu_to_le64(size);
+	attr->nres.valid_size = attr->nres.data_size;
+	if (is_ext) {
+		attr->name_off = SIZEOF_NONRESIDENT_EX_LE;
+		if (is_attr_compressed(attr))
+			attr->nres.c_unit = COMPRESSION_UNIT;
+
+		attr->nres.run_off =
+			cpu_to_le16(SIZEOF_NONRESIDENT_EX + name_size);
+		memcpy(Add2Ptr(attr, SIZEOF_NONRESIDENT_EX), name,
+		       name_len * sizeof(short));
+	} else {
+		attr->name_off = SIZEOF_NONRESIDENT_LE;
+		attr->nres.run_off =
+			cpu_to_le16(SIZEOF_NONRESIDENT + name_size);
+		memcpy(Add2Ptr(attr, SIZEOF_NONRESIDENT), name,
+		       name_len * sizeof(short));
+	}
+
+	return attr;
+}
+
+/*
+ * do_action - Common routine for the Redo and Undo Passes.
+ * @rlsn: If it is NULL then undo.
+ */
+static int do_action(struct ntfs_log *log, struct OPEN_ATTR_ENRTY *oe,
+		     const struct LOG_REC_HDR *lrh, u32 op, void *data,
+		     u32 dlen, u32 rec_len, const u64 *rlsn)
+{
+	int err = 0;
+	struct ntfs_sb_info *sbi = log->ni->mi.sbi;
+	struct inode *inode = NULL, *inode_parent;
+	struct mft_inode *mi = NULL, *mi2_child = NULL;
+	CLST rno = 0, rno_base = 0;
+	struct INDEX_BUFFER *ib = NULL;
+	struct MFT_REC *rec = NULL;
+	struct ATTRIB *attr = NULL, *attr2;
+	struct INDEX_HDR *hdr;
+	struct INDEX_ROOT *root;
+	struct NTFS_DE *e, *e1, *e2;
+	struct NEW_ATTRIBUTE_SIZES *new_sz;
+	struct ATTR_FILE_NAME *fname;
+	struct OpenAttr *oa, *oa2;
+	u32 nsize, t32, asize, used, esize, bmp_off, bmp_bits;
+	u16 id, id2;
+	u32 record_size = sbi->record_size;
+	u64 t64;
+	u16 roff = le16_to_cpu(lrh->record_off);
+	u16 aoff = le16_to_cpu(lrh->attr_off);
+	u64 lco = 0;
+	u64 cbo = (u64)le16_to_cpu(lrh->cluster_off) << SECTOR_SHIFT;
+	u64 tvo = le64_to_cpu(lrh->target_vcn) << sbi->cluster_bits;
+	u64 vbo = cbo + tvo;
+	void *buffer_le = NULL;
+	u32 bytes = 0;
+	bool a_dirty = false;
+	u16 data_off;
+
+	oa = oe->ptr;
+
+	/* Big switch to prepare. */
+	switch (op) {
+	/* ============================================================
+	 * Process MFT records, as described by the current log record.
+	 * ============================================================
+	 */
+	case InitializeFileRecordSegment:
+	case DeallocateFileRecordSegment:
+	case WriteEndOfFileRecordSegment:
+	case CreateAttribute:
+	case DeleteAttribute:
+	case UpdateResidentValue:
+	case UpdateMappingPairs:
+	case SetNewAttributeSizes:
+	case AddIndexEntryRoot:
+	case DeleteIndexEntryRoot:
+	case SetIndexEntryVcnRoot:
+	case UpdateFileNameRoot:
+	case UpdateRecordDataRoot:
+	case ZeroEndOfFileRecord:
+		rno = vbo >> sbi->record_bits;
+		inode = ilookup(sbi->sb, rno);
+		if (inode) {
+			mi = &ntfs_i(inode)->mi;
+		} else if (op == InitializeFileRecordSegment) {
+			mi = kzalloc(sizeof(struct mft_inode), GFP_NOFS);
+			if (!mi)
+				return -ENOMEM;
+			err = mi_format_new(mi, sbi, rno, 0, false);
+			if (err)
+				goto out;
+		} else {
+			/* Read from disk. */
+			err = mi_get(sbi, rno, &mi);
+			if (err)
+				return err;
+		}
+		rec = mi->mrec;
+
+		if (op == DeallocateFileRecordSegment)
+			goto skip_load_parent;
+
+		if (InitializeFileRecordSegment != op) {
+			if (rec->rhdr.sign == NTFS_BAAD_SIGNATURE)
+				goto dirty_vol;
+			if (!check_lsn(&rec->rhdr, rlsn))
+				goto out;
+			if (!check_file_record(rec, NULL, sbi))
+				goto dirty_vol;
+			attr = Add2Ptr(rec, roff);
+		}
+
+		if (is_rec_base(rec) || InitializeFileRecordSegment == op) {
+			rno_base = rno;
+			goto skip_load_parent;
+		}
+
+		rno_base = ino_get(&rec->parent_ref);
+		inode_parent = ntfs_iget5(sbi->sb, &rec->parent_ref, NULL);
+		if (IS_ERR(inode_parent))
+			goto skip_load_parent;
+
+		if (is_bad_inode(inode_parent)) {
+			iput(inode_parent);
+			goto skip_load_parent;
+		}
+
+		if (ni_load_mi_ex(ntfs_i(inode_parent), rno, &mi2_child)) {
+			iput(inode_parent);
+		} else {
+			if (mi2_child->mrec != mi->mrec)
+				memcpy(mi2_child->mrec, mi->mrec,
+				       sbi->record_size);
+
+			if (inode)
+				iput(inode);
+			else if (mi)
+				mi_put(mi);
+
+			inode = inode_parent;
+			mi = mi2_child;
+			rec = mi2_child->mrec;
+			attr = Add2Ptr(rec, roff);
+		}
+
+skip_load_parent:
+		inode_parent = NULL;
+		break;
+
+	/*
+	 * Process attributes, as described by the current log record.
+	 */
+	case UpdateNonresidentValue:
+	case AddIndexEntryAllocation:
+	case DeleteIndexEntryAllocation:
+	case WriteEndOfIndexBuffer:
+	case SetIndexEntryVcnAllocation:
+	case UpdateFileNameAllocation:
+	case SetBitsInNonresidentBitMap:
+	case ClearBitsInNonresidentBitMap:
+	case UpdateRecordDataAllocation:
+		attr = oa->attr;
+		bytes = UpdateNonresidentValue == op ? dlen : 0;
+		lco = (u64)le16_to_cpu(lrh->lcns_follow) << sbi->cluster_bits;
+
+		if (attr->type == ATTR_ALLOC) {
+			t32 = le32_to_cpu(oe->bytes_per_index);
+			if (bytes < t32)
+				bytes = t32;
+		}
+
+		if (!bytes)
+			bytes = lco - cbo;
+
+		bytes += roff;
+		if (attr->type == ATTR_ALLOC)
+			bytes = (bytes + 511) & ~511; // align
+
+		buffer_le = kmalloc(bytes, GFP_NOFS);
+		if (!buffer_le)
+			return -ENOMEM;
+
+		err = ntfs_read_run_nb(sbi, oa->run1, vbo, buffer_le, bytes,
+				       NULL);
+		if (err)
+			goto out;
+
+		if (attr->type == ATTR_ALLOC && *(int *)buffer_le)
+			ntfs_fix_post_read(buffer_le, bytes, false);
+		break;
+
+	default:
+		WARN_ON(1);
+	}
+
+	/* Big switch to do operation. */
+	switch (op) {
+	case InitializeFileRecordSegment:
+		if (roff + dlen > record_size)
+			goto dirty_vol;
+
+		memcpy(Add2Ptr(rec, roff), data, dlen);
+		mi->dirty = true;
+		break;
+
+	case DeallocateFileRecordSegment:
+		clear_rec_inuse(rec);
+		le16_add_cpu(&rec->seq, 1);
+		mi->dirty = true;
+		break;
+
+	case WriteEndOfFileRecordSegment:
+		attr2 = (struct ATTRIB *)data;
+		if (!check_if_attr(rec, lrh) || roff + dlen > record_size)
+			goto dirty_vol;
+
+		memmove(attr, attr2, dlen);
+		rec->used = cpu_to_le32(ALIGN(roff + dlen, 8));
+
+		mi->dirty = true;
+		break;
+
+	case CreateAttribute:
+		attr2 = (struct ATTRIB *)data;
+		asize = le32_to_cpu(attr2->size);
+		used = le32_to_cpu(rec->used);
+
+		if (!check_if_attr(rec, lrh) || dlen < SIZEOF_RESIDENT ||
+		    !IS_ALIGNED(asize, 8) ||
+		    Add2Ptr(attr2, asize) > Add2Ptr(lrh, rec_len) ||
+		    dlen > record_size - used) {
+			goto dirty_vol;
+		}
+
+		memmove(Add2Ptr(attr, asize), attr, used - roff);
+		memcpy(attr, attr2, asize);
+
+		rec->used = cpu_to_le32(used + asize);
+		id = le16_to_cpu(rec->next_attr_id);
+		id2 = le16_to_cpu(attr2->id);
+		if (id <= id2)
+			rec->next_attr_id = cpu_to_le16(id2 + 1);
+		if (is_attr_indexed(attr))
+			le16_add_cpu(&rec->hard_links, 1);
+
+		oa2 = find_loaded_attr(log, attr, rno_base);
+		if (oa2) {
+			void *p2 = kmemdup(attr, le32_to_cpu(attr->size),
+					   GFP_NOFS);
+			if (p2) {
+				// run_close(oa2->run1);
+				kfree(oa2->attr);
+				oa2->attr = p2;
+			}
+		}
+
+		mi->dirty = true;
+		break;
+
+	case DeleteAttribute:
+		asize = le32_to_cpu(attr->size);
+		used = le32_to_cpu(rec->used);
+
+		if (!check_if_attr(rec, lrh))
+			goto dirty_vol;
+
+		rec->used = cpu_to_le32(used - asize);
+		if (is_attr_indexed(attr))
+			le16_add_cpu(&rec->hard_links, -1);
+
+		memmove(attr, Add2Ptr(attr, asize), used - asize - roff);
+
+		mi->dirty = true;
+		break;
+
+	case UpdateResidentValue:
+		nsize = aoff + dlen;
+
+		if (!check_if_attr(rec, lrh))
+			goto dirty_vol;
+
+		asize = le32_to_cpu(attr->size);
+		used = le32_to_cpu(rec->used);
+
+		if (lrh->redo_len == lrh->undo_len) {
+			if (nsize > asize)
+				goto dirty_vol;
+			goto move_data;
+		}
+
+		if (nsize > asize && nsize - asize > record_size - used)
+			goto dirty_vol;
+
+		nsize = ALIGN(nsize, 8);
+		data_off = le16_to_cpu(attr->res.data_off);
+
+		if (nsize < asize) {
+			memmove(Add2Ptr(attr, aoff), data, dlen);
+			data = NULL; // To skip below memmove().
+		}
+
+		memmove(Add2Ptr(attr, nsize), Add2Ptr(attr, asize),
+			used - le16_to_cpu(lrh->record_off) - asize);
+
+		rec->used = cpu_to_le32(used + nsize - asize);
+		attr->size = cpu_to_le32(nsize);
+		attr->res.data_size = cpu_to_le32(aoff + dlen - data_off);
+
+move_data:
+		if (data)
+			memmove(Add2Ptr(attr, aoff), data, dlen);
+
+		oa2 = find_loaded_attr(log, attr, rno_base);
+		if (oa2) {
+			void *p2 = kmemdup(attr, le32_to_cpu(attr->size),
+					   GFP_NOFS);
+			if (p2) {
+				// run_close(&oa2->run0);
+				oa2->run1 = &oa2->run0;
+				kfree(oa2->attr);
+				oa2->attr = p2;
+			}
+		}
+
+		mi->dirty = true;
+		break;
+
+	case UpdateMappingPairs:
+		nsize = aoff + dlen;
+		asize = le32_to_cpu(attr->size);
+		used = le32_to_cpu(rec->used);
+
+		if (!check_if_attr(rec, lrh) || !attr->non_res ||
+		    aoff < le16_to_cpu(attr->nres.run_off) || aoff > asize ||
+		    (nsize > asize && nsize - asize > record_size - used)) {
+			goto dirty_vol;
+		}
+
+		nsize = ALIGN(nsize, 8);
+
+		memmove(Add2Ptr(attr, nsize), Add2Ptr(attr, asize),
+			used - le16_to_cpu(lrh->record_off) - asize);
+		rec->used = cpu_to_le32(used + nsize - asize);
+		attr->size = cpu_to_le32(nsize);
+		memmove(Add2Ptr(attr, aoff), data, dlen);
+
+		if (run_get_highest_vcn(le64_to_cpu(attr->nres.svcn),
+					attr_run(attr), &t64)) {
+			goto dirty_vol;
+		}
+
+		attr->nres.evcn = cpu_to_le64(t64);
+		oa2 = find_loaded_attr(log, attr, rno_base);
+		if (oa2 && oa2->attr->non_res)
+			oa2->attr->nres.evcn = attr->nres.evcn;
+
+		mi->dirty = true;
+		break;
+
+	case SetNewAttributeSizes:
+		new_sz = data;
+		if (!check_if_attr(rec, lrh) || !attr->non_res)
+			goto dirty_vol;
+
+		attr->nres.alloc_size = new_sz->alloc_size;
+		attr->nres.data_size = new_sz->data_size;
+		attr->nres.valid_size = new_sz->valid_size;
+
+		if (dlen >= sizeof(struct NEW_ATTRIBUTE_SIZES))
+			attr->nres.total_size = new_sz->total_size;
+
+		oa2 = find_loaded_attr(log, attr, rno_base);
+		if (oa2) {
+			void *p2 = kmemdup(attr, le32_to_cpu(attr->size),
+					   GFP_NOFS);
+			if (p2) {
+				kfree(oa2->attr);
+				oa2->attr = p2;
+			}
+		}
+		mi->dirty = true;
+		break;
+
+	case AddIndexEntryRoot:
+		e = (struct NTFS_DE *)data;
+		esize = le16_to_cpu(e->size);
+		root = resident_data(attr);
+		hdr = &root->ihdr;
+		used = le32_to_cpu(hdr->used);
+
+		if (!check_if_index_root(rec, lrh) ||
+		    !check_if_root_index(attr, hdr, lrh) ||
+		    Add2Ptr(data, esize) > Add2Ptr(lrh, rec_len) ||
+		    esize > le32_to_cpu(rec->total) - le32_to_cpu(rec->used)) {
+			goto dirty_vol;
+		}
+
+		e1 = Add2Ptr(attr, le16_to_cpu(lrh->attr_off));
+
+		change_attr_size(rec, attr, le32_to_cpu(attr->size) + esize);
+
+		memmove(Add2Ptr(e1, esize), e1,
+			PtrOffset(e1, Add2Ptr(hdr, used)));
+		memmove(e1, e, esize);
+
+		le32_add_cpu(&attr->res.data_size, esize);
+		hdr->used = cpu_to_le32(used + esize);
+		le32_add_cpu(&hdr->total, esize);
+
+		mi->dirty = true;
+		break;
+
+	case DeleteIndexEntryRoot:
+		root = resident_data(attr);
+		hdr = &root->ihdr;
+		used = le32_to_cpu(hdr->used);
+
+		if (!check_if_index_root(rec, lrh) ||
+		    !check_if_root_index(attr, hdr, lrh)) {
+			goto dirty_vol;
+		}
+
+		e1 = Add2Ptr(attr, le16_to_cpu(lrh->attr_off));
+		esize = le16_to_cpu(e1->size);
+		e2 = Add2Ptr(e1, esize);
+
+		memmove(e1, e2, PtrOffset(e2, Add2Ptr(hdr, used)));
+
+		le32_sub_cpu(&attr->res.data_size, esize);
+		hdr->used = cpu_to_le32(used - esize);
+		le32_sub_cpu(&hdr->total, esize);
+
+		change_attr_size(rec, attr, le32_to_cpu(attr->size) - esize);
+
+		mi->dirty = true;
+		break;
+
+	case SetIndexEntryVcnRoot:
+		root = resident_data(attr);
+		hdr = &root->ihdr;
+
+		if (!check_if_index_root(rec, lrh) ||
+		    !check_if_root_index(attr, hdr, lrh)) {
+			goto dirty_vol;
+		}
+
+		e = Add2Ptr(attr, le16_to_cpu(lrh->attr_off));
+
+		de_set_vbn_le(e, *(__le64 *)data);
+		mi->dirty = true;
+		break;
+
+	case UpdateFileNameRoot:
+		root = resident_data(attr);
+		hdr = &root->ihdr;
+
+		if (!check_if_index_root(rec, lrh) ||
+		    !check_if_root_index(attr, hdr, lrh)) {
+			goto dirty_vol;
+		}
+
+		e = Add2Ptr(attr, le16_to_cpu(lrh->attr_off));
+		fname = (struct ATTR_FILE_NAME *)(e + 1);
+		memmove(&fname->dup, data, sizeof(fname->dup)); //
+		mi->dirty = true;
+		break;
+
+	case UpdateRecordDataRoot:
+		root = resident_data(attr);
+		hdr = &root->ihdr;
+
+		if (!check_if_index_root(rec, lrh) ||
+		    !check_if_root_index(attr, hdr, lrh)) {
+			goto dirty_vol;
+		}
+
+		e = Add2Ptr(attr, le16_to_cpu(lrh->attr_off));
+
+		memmove(Add2Ptr(e, le16_to_cpu(e->view.data_off)), data, dlen);
+
+		mi->dirty = true;
+		break;
+
+	case ZeroEndOfFileRecord:
+		if (roff + dlen > record_size)
+			goto dirty_vol;
+
+		memset(attr, 0, dlen);
+		mi->dirty = true;
+		break;
+
+	case UpdateNonresidentValue:
+		if (lco < cbo + roff + dlen)
+			goto dirty_vol;
+
+		memcpy(Add2Ptr(buffer_le, roff), data, dlen);
+
+		a_dirty = true;
+		if (attr->type == ATTR_ALLOC)
+			ntfs_fix_pre_write(buffer_le, bytes);
+		break;
+
+	case AddIndexEntryAllocation:
+		ib = Add2Ptr(buffer_le, roff);
+		hdr = &ib->ihdr;
+		e = data;
+		esize = le16_to_cpu(e->size);
+		e1 = Add2Ptr(ib, aoff);
+
+		if (is_baad(&ib->rhdr))
+			goto dirty_vol;
+		if (!check_lsn(&ib->rhdr, rlsn))
+			goto out;
+
+		used = le32_to_cpu(hdr->used);
+
+		if (!check_index_buffer(ib, bytes) ||
+		    !check_if_alloc_index(hdr, aoff) ||
+		    Add2Ptr(e, esize) > Add2Ptr(lrh, rec_len) ||
+		    used + esize > le32_to_cpu(hdr->total)) {
+			goto dirty_vol;
+		}
+
+		memmove(Add2Ptr(e1, esize), e1,
+			PtrOffset(e1, Add2Ptr(hdr, used)));
+		memcpy(e1, e, esize);
+
+		hdr->used = cpu_to_le32(used + esize);
+
+		a_dirty = true;
+
+		ntfs_fix_pre_write(&ib->rhdr, bytes);
+		break;
+
+	case DeleteIndexEntryAllocation:
+		ib = Add2Ptr(buffer_le, roff);
+		hdr = &ib->ihdr;
+		e = Add2Ptr(ib, aoff);
+		esize = le16_to_cpu(e->size);
+
+		if (is_baad(&ib->rhdr))
+			goto dirty_vol;
+		if (!check_lsn(&ib->rhdr, rlsn))
+			goto out;
+
+		if (!check_index_buffer(ib, bytes) ||
+		    !check_if_alloc_index(hdr, aoff)) {
+			goto dirty_vol;
+		}
+
+		e1 = Add2Ptr(e, esize);
+		nsize = esize;
+		used = le32_to_cpu(hdr->used);
+
+		memmove(e, e1, PtrOffset(e1, Add2Ptr(hdr, used)));
+
+		hdr->used = cpu_to_le32(used - nsize);
+
+		a_dirty = true;
+
+		ntfs_fix_pre_write(&ib->rhdr, bytes);
+		break;
+
+	case WriteEndOfIndexBuffer:
+		ib = Add2Ptr(buffer_le, roff);
+		hdr = &ib->ihdr;
+		e = Add2Ptr(ib, aoff);
+
+		if (is_baad(&ib->rhdr))
+			goto dirty_vol;
+		if (!check_lsn(&ib->rhdr, rlsn))
+			goto out;
+		if (!check_index_buffer(ib, bytes) ||
+		    !check_if_alloc_index(hdr, aoff) ||
+		    aoff + dlen > offsetof(struct INDEX_BUFFER, ihdr) +
+					  le32_to_cpu(hdr->total)) {
+			goto dirty_vol;
+		}
+
+		hdr->used = cpu_to_le32(dlen + PtrOffset(hdr, e));
+		memmove(e, data, dlen);
+
+		a_dirty = true;
+		ntfs_fix_pre_write(&ib->rhdr, bytes);
+		break;
+
+	case SetIndexEntryVcnAllocation:
+		ib = Add2Ptr(buffer_le, roff);
+		hdr = &ib->ihdr;
+		e = Add2Ptr(ib, aoff);
+
+		if (is_baad(&ib->rhdr))
+			goto dirty_vol;
+
+		if (!check_lsn(&ib->rhdr, rlsn))
+			goto out;
+		if (!check_index_buffer(ib, bytes) ||
+		    !check_if_alloc_index(hdr, aoff)) {
+			goto dirty_vol;
+		}
+
+		de_set_vbn_le(e, *(__le64 *)data);
+
+		a_dirty = true;
+		ntfs_fix_pre_write(&ib->rhdr, bytes);
+		break;
+
+	case UpdateFileNameAllocation:
+		ib = Add2Ptr(buffer_le, roff);
+		hdr = &ib->ihdr;
+		e = Add2Ptr(ib, aoff);
+
+		if (is_baad(&ib->rhdr))
+			goto dirty_vol;
+
+		if (!check_lsn(&ib->rhdr, rlsn))
+			goto out;
+		if (!check_index_buffer(ib, bytes) ||
+		    !check_if_alloc_index(hdr, aoff)) {
+			goto dirty_vol;
+		}
+
+		fname = (struct ATTR_FILE_NAME *)(e + 1);
+		memmove(&fname->dup, data, sizeof(fname->dup));
+
+		a_dirty = true;
+		ntfs_fix_pre_write(&ib->rhdr, bytes);
+		break;
+
+	case SetBitsInNonresidentBitMap:
+		bmp_off =
+			le32_to_cpu(((struct BITMAP_RANGE *)data)->bitmap_off);
+		bmp_bits = le32_to_cpu(((struct BITMAP_RANGE *)data)->bits);
+
+		if (cbo + (bmp_off + 7) / 8 > lco ||
+		    cbo + ((bmp_off + bmp_bits + 7) / 8) > lco) {
+			goto dirty_vol;
+		}
+
+		__bitmap_set(Add2Ptr(buffer_le, roff), bmp_off, bmp_bits);
+		a_dirty = true;
+		break;
+
+	case ClearBitsInNonresidentBitMap:
+		bmp_off =
+			le32_to_cpu(((struct BITMAP_RANGE *)data)->bitmap_off);
+		bmp_bits = le32_to_cpu(((struct BITMAP_RANGE *)data)->bits);
+
+		if (cbo + (bmp_off + 7) / 8 > lco ||
+		    cbo + ((bmp_off + bmp_bits + 7) / 8) > lco) {
+			goto dirty_vol;
+		}
+
+		__bitmap_clear(Add2Ptr(buffer_le, roff), bmp_off, bmp_bits);
+		a_dirty = true;
+		break;
+
+	case UpdateRecordDataAllocation:
+		ib = Add2Ptr(buffer_le, roff);
+		hdr = &ib->ihdr;
+		e = Add2Ptr(ib, aoff);
+
+		if (is_baad(&ib->rhdr))
+			goto dirty_vol;
+
+		if (!check_lsn(&ib->rhdr, rlsn))
+			goto out;
+		if (!check_index_buffer(ib, bytes) ||
+		    !check_if_alloc_index(hdr, aoff)) {
+			goto dirty_vol;
+		}
+
+		memmove(Add2Ptr(e, le16_to_cpu(e->view.data_off)), data, dlen);
+
+		a_dirty = true;
+		ntfs_fix_pre_write(&ib->rhdr, bytes);
+		break;
+
+	default:
+		WARN_ON(1);
+	}
+
+	if (rlsn) {
+		__le64 t64 = cpu_to_le64(*rlsn);
+
+		if (rec)
+			rec->rhdr.lsn = t64;
+		if (ib)
+			ib->rhdr.lsn = t64;
+	}
+
+	if (mi && mi->dirty) {
+		err = mi_write(mi, 0);
+		if (err)
+			goto out;
+	}
+
+	if (a_dirty) {
+		attr = oa->attr;
+		err = ntfs_sb_write_run(sbi, oa->run1, vbo, buffer_le, bytes);
+		if (err)
+			goto out;
+	}
+
+out:
+
+	if (inode)
+		iput(inode);
+	else if (mi != mi2_child)
+		mi_put(mi);
+
+	kfree(buffer_le);
+
+	return err;
+
+dirty_vol:
+	log->set_dirty = true;
+	goto out;
+}
+
+/*
+ * log_replay - Replays log and empties it.
+ *
+ * This function is called during mount operation.
+ * It replays log and empties it.
+ * Initialized is set false if logfile contains '-1'.
+ */
+int log_replay(struct ntfs_inode *ni, bool *initialized)
+{
+	int err;
+	struct ntfs_sb_info *sbi = ni->mi.sbi;
+	struct ntfs_log *log;
+
+	struct restart_info rst_info, rst_info2;
+	u64 rec_lsn, ra_lsn, checkpt_lsn = 0, rlsn = 0;
+	struct ATTR_NAME_ENTRY *attr_names = NULL;
+	struct ATTR_NAME_ENTRY *ane;
+	struct RESTART_TABLE *dptbl = NULL;
+	struct RESTART_TABLE *trtbl = NULL;
+	const struct RESTART_TABLE *rt;
+	struct RESTART_TABLE *oatbl = NULL;
+	struct inode *inode;
+	struct OpenAttr *oa;
+	struct ntfs_inode *ni_oe;
+	struct ATTRIB *attr = NULL;
+	u64 size, vcn, undo_next_lsn;
+	CLST rno, lcn, lcn0, len0, clen;
+	void *data;
+	struct NTFS_RESTART *rst = NULL;
+	struct lcb *lcb = NULL;
+	struct OPEN_ATTR_ENRTY *oe;
+	struct TRANSACTION_ENTRY *tr;
+	struct DIR_PAGE_ENTRY *dp;
+	u32 i, bytes_per_attr_entry;
+	u32 l_size = ni->vfs_inode.i_size;
+	u32 orig_file_size = l_size;
+	u32 page_size, vbo, tail, off, dlen;
+	u32 saved_len, rec_len, transact_id;
+	bool use_second_page;
+	struct RESTART_AREA *ra2, *ra = NULL;
+	struct CLIENT_REC *ca, *cr;
+	__le16 client;
+	struct RESTART_HDR *rh;
+	const struct LFS_RECORD_HDR *frh;
+	const struct LOG_REC_HDR *lrh;
+	bool is_mapped;
+	bool is_ro = sb_rdonly(sbi->sb);
+	u64 t64;
+	u16 t16;
+	u32 t32;
+
+	/* Get the size of page. NOTE: To replay we can use default page. */
+#if PAGE_SIZE >= DefaultLogPageSize && PAGE_SIZE <= DefaultLogPageSize * 2
+	page_size = norm_file_page(PAGE_SIZE, &l_size, true);
+#else
+	page_size = norm_file_page(PAGE_SIZE, &l_size, false);
+#endif
+	if (!page_size)
+		return -EINVAL;
+
+	log = kzalloc(sizeof(struct ntfs_log), GFP_NOFS);
+	if (!log)
+		return -ENOMEM;
+
+	log->ni = ni;
+	log->l_size = l_size;
+	log->one_page_buf = kmalloc(page_size, GFP_NOFS);
+
+	if (!log->one_page_buf) {
+		err = -ENOMEM;
+		goto out;
+	}
+
+	log->page_size = page_size;
+	log->page_mask = page_size - 1;
+	log->page_bits = blksize_bits(page_size);
+
+	/* Look for a restart area on the disk. */
+	err = log_read_rst(log, l_size, true, &rst_info);
+	if (err)
+		goto out;
+
+	/* remember 'initialized' */
+	*initialized = rst_info.initialized;
+
+	if (!rst_info.restart) {
+		if (rst_info.initialized) {
+			/* No restart area but the file is not initialized. */
+			err = -EINVAL;
+			goto out;
+		}
+
+		log_init_pg_hdr(log, page_size, page_size, 1, 1);
+		log_create(log, l_size, 0, get_random_int(), false, false);
+
+		log->ra = ra;
+
+		ra = log_create_ra(log);
+		if (!ra) {
+			err = -ENOMEM;
+			goto out;
+		}
+		log->ra = ra;
+		log->init_ra = true;
+
+		goto process_log;
+	}
+
+	/*
+	 * If the restart offset above wasn't zero then we won't
+	 * look for a second restart.
+	 */
+	if (rst_info.vbo)
+		goto check_restart_area;
+
+	err = log_read_rst(log, l_size, false, &rst_info2);
+
+	/* Determine which restart area to use. */
+	if (!rst_info2.restart || rst_info2.last_lsn <= rst_info.last_lsn)
+		goto use_first_page;
+
+	use_second_page = true;
+
+	if (rst_info.chkdsk_was_run && page_size != rst_info.vbo) {
+		struct RECORD_PAGE_HDR *sp = NULL;
+		bool usa_error;
+
+		if (!read_log_page(log, page_size, &sp, &usa_error) &&
+		    sp->rhdr.sign == NTFS_CHKD_SIGNATURE) {
+			use_second_page = false;
+		}
+		kfree(sp);
+	}
+
+	if (use_second_page) {
+		kfree(rst_info.r_page);
+		memcpy(&rst_info, &rst_info2, sizeof(struct restart_info));
+		rst_info2.r_page = NULL;
+	}
+
+use_first_page:
+	kfree(rst_info2.r_page);
+
+check_restart_area:
+	/*
+	 * If the restart area is at offset 0, we want
+	 * to write the second restart area first.
+	 */
+	log->init_ra = !!rst_info.vbo;
+
+	/* If we have a valid page then grab a pointer to the restart area. */
+	ra2 = rst_info.valid_page
+		      ? Add2Ptr(rst_info.r_page,
+				le16_to_cpu(rst_info.r_page->ra_off))
+		      : NULL;
+
+	if (rst_info.chkdsk_was_run ||
+	    (ra2 && ra2->client_idx[1] == LFS_NO_CLIENT_LE)) {
+		bool wrapped = false;
+		bool use_multi_page = false;
+		u32 open_log_count;
+
+		/* Do some checks based on whether we have a valid log page. */
+		if (!rst_info.valid_page) {
+			open_log_count = get_random_int();
+			goto init_log_instance;
+		}
+		open_log_count = le32_to_cpu(ra2->open_log_count);
+
+		/*
+		 * If the restart page size isn't changing then we want to
+		 * check how much work we need to do.
+		 */
+		if (page_size != le32_to_cpu(rst_info.r_page->sys_page_size))
+			goto init_log_instance;
+
+init_log_instance:
+		log_init_pg_hdr(log, page_size, page_size, 1, 1);
+
+		log_create(log, l_size, rst_info.last_lsn, open_log_count,
+			   wrapped, use_multi_page);
+
+		ra = log_create_ra(log);
+		if (!ra) {
+			err = -ENOMEM;
+			goto out;
+		}
+		log->ra = ra;
+
+		/* Put the restart areas and initialize
+		 * the log file as required.
+		 */
+		goto process_log;
+	}
+
+	if (!ra2) {
+		err = -EINVAL;
+		goto out;
+	}
+
+	/*
+	 * If the log page or the system page sizes have changed, we can't
+	 * use the log file. We must use the system page size instead of the
+	 * default size if there is not a clean shutdown.
+	 */
+	t32 = le32_to_cpu(rst_info.r_page->sys_page_size);
+	if (page_size != t32) {
+		l_size = orig_file_size;
+		page_size =
+			norm_file_page(t32, &l_size, t32 == DefaultLogPageSize);
+	}
+
+	if (page_size != t32 ||
+	    page_size != le32_to_cpu(rst_info.r_page->page_size)) {
+		err = -EINVAL;
+		goto out;
+	}
+
+	/* If the file size has shrunk then we won't mount it. */
+	if (l_size < le64_to_cpu(ra2->l_size)) {
+		err = -EINVAL;
+		goto out;
+	}
+
+	log_init_pg_hdr(log, page_size, page_size,
+			le16_to_cpu(rst_info.r_page->major_ver),
+			le16_to_cpu(rst_info.r_page->minor_ver));
+
+	log->l_size = le64_to_cpu(ra2->l_size);
+	log->seq_num_bits = le32_to_cpu(ra2->seq_num_bits);
+	log->file_data_bits = sizeof(u64) * 8 - log->seq_num_bits;
+	log->seq_num_mask = (8 << log->file_data_bits) - 1;
+	log->last_lsn = le64_to_cpu(ra2->current_lsn);
+	log->seq_num = log->last_lsn >> log->file_data_bits;
+	log->ra_off = le16_to_cpu(rst_info.r_page->ra_off);
+	log->restart_size = log->sys_page_size - log->ra_off;
+	log->record_header_len = le16_to_cpu(ra2->rec_hdr_len);
+	log->ra_size = le16_to_cpu(ra2->ra_len);
+	log->data_off = le16_to_cpu(ra2->data_off);
+	log->data_size = log->page_size - log->data_off;
+	log->reserved = log->data_size - log->record_header_len;
+
+	vbo = lsn_to_vbo(log, log->last_lsn);
+
+	if (vbo < log->first_page) {
+		/* This is a pseudo lsn. */
+		log->l_flags |= NTFSLOG_NO_LAST_LSN;
+		log->next_page = log->first_page;
+		goto find_oldest;
+	}
+
+	/* Find the end of this log record. */
+	off = final_log_off(log, log->last_lsn,
+			    le32_to_cpu(ra2->last_lsn_data_len));
+
+	/* If we wrapped the file then increment the sequence number. */
+	if (off <= vbo) {
+		log->seq_num += 1;
+		log->l_flags |= NTFSLOG_WRAPPED;
+	}
+
+	/* Now compute the next log page to use. */
+	vbo &= ~log->sys_page_mask;
+	tail = log->page_size - (off & log->page_mask) - 1;
+
+	/*
+	 *If we can fit another log record on the page,
+	 * move back a page the log file.
+	 */
+	if (tail >= log->record_header_len) {
+		log->l_flags |= NTFSLOG_REUSE_TAIL;
+		log->next_page = vbo;
+	} else {
+		log->next_page = next_page_off(log, vbo);
+	}
+
+find_oldest:
+	/*
+	 * Find the oldest client lsn. Use the last
+	 * flushed lsn as a starting point.
+	 */
+	log->oldest_lsn = log->last_lsn;
+	oldest_client_lsn(Add2Ptr(ra2, le16_to_cpu(ra2->client_off)),
+			  ra2->client_idx[1], &log->oldest_lsn);
+	log->oldest_lsn_off = lsn_to_vbo(log, log->oldest_lsn);
+
+	if (log->oldest_lsn_off < log->first_page)
+		log->l_flags |= NTFSLOG_NO_OLDEST_LSN;
+
+	if (!(ra2->flags & RESTART_SINGLE_PAGE_IO))
+		log->l_flags |= NTFSLOG_WRAPPED | NTFSLOG_MULTIPLE_PAGE_IO;
+
+	log->current_openlog_count = le32_to_cpu(ra2->open_log_count);
+	log->total_avail_pages = log->l_size - log->first_page;
+	log->total_avail = log->total_avail_pages >> log->page_bits;
+	log->max_current_avail = log->total_avail * log->reserved;
+	log->total_avail = log->total_avail * log->data_size;
+
+	log->current_avail = current_log_avail(log);
+
+	ra = kzalloc(log->restart_size, GFP_NOFS);
+	if (!ra) {
+		err = -ENOMEM;
+		goto out;
+	}
+	log->ra = ra;
+
+	t16 = le16_to_cpu(ra2->client_off);
+	if (t16 == offsetof(struct RESTART_AREA, clients)) {
+		memcpy(ra, ra2, log->ra_size);
+	} else {
+		memcpy(ra, ra2, offsetof(struct RESTART_AREA, clients));
+		memcpy(ra->clients, Add2Ptr(ra2, t16),
+		       le16_to_cpu(ra2->ra_len) - t16);
+
+		log->current_openlog_count = get_random_int();
+		ra->open_log_count = cpu_to_le32(log->current_openlog_count);
+		log->ra_size = offsetof(struct RESTART_AREA, clients) +
+			       sizeof(struct CLIENT_REC);
+		ra->client_off =
+			cpu_to_le16(offsetof(struct RESTART_AREA, clients));
+		ra->ra_len = cpu_to_le16(log->ra_size);
+	}
+
+	le32_add_cpu(&ra->open_log_count, 1);
+
+	/* Now we need to walk through looking for the last lsn. */
+	err = last_log_lsn(log);
+	if (err)
+		goto out;
+
+	log->current_avail = current_log_avail(log);
+
+	/* Remember which restart area to write first. */
+	log->init_ra = rst_info.vbo;
+
+process_log:
+	/* 1.0, 1.1, 2.0 log->major_ver/minor_ver - short values. */
+	switch ((log->major_ver << 16) + log->minor_ver) {
+	case 0x10000:
+	case 0x10001:
+	case 0x20000:
+		break;
+	default:
+		ntfs_warn(sbi->sb, "\x24LogFile version %d.%d is not supported",
+			  log->major_ver, log->minor_ver);
+		err = -EOPNOTSUPP;
+		log->set_dirty = true;
+		goto out;
+	}
+
+	/* One client "NTFS" per logfile. */
+	ca = Add2Ptr(ra, le16_to_cpu(ra->client_off));
+
+	for (client = ra->client_idx[1];; client = cr->next_client) {
+		if (client == LFS_NO_CLIENT_LE) {
+			/* Insert "NTFS" client LogFile. */
+			client = ra->client_idx[0];
+			if (client == LFS_NO_CLIENT_LE)
+				return -EINVAL;
+
+			t16 = le16_to_cpu(client);
+			cr = ca + t16;
+
+			remove_client(ca, cr, &ra->client_idx[0]);
+
+			cr->restart_lsn = 0;
+			cr->oldest_lsn = cpu_to_le64(log->oldest_lsn);
+			cr->name_bytes = cpu_to_le32(8);
+			cr->name[0] = cpu_to_le16('N');
+			cr->name[1] = cpu_to_le16('T');
+			cr->name[2] = cpu_to_le16('F');
+			cr->name[3] = cpu_to_le16('S');
+
+			add_client(ca, t16, &ra->client_idx[1]);
+			break;
+		}
+
+		cr = ca + le16_to_cpu(client);
+
+		if (cpu_to_le32(8) == cr->name_bytes &&
+		    cpu_to_le16('N') == cr->name[0] &&
+		    cpu_to_le16('T') == cr->name[1] &&
+		    cpu_to_le16('F') == cr->name[2] &&
+		    cpu_to_le16('S') == cr->name[3])
+			break;
+	}
+
+	/* Update the client handle with the client block information. */
+	log->client_id.seq_num = cr->seq_num;
+	log->client_id.client_idx = client;
+
+	err = read_rst_area(log, &rst, &ra_lsn);
+	if (err)
+		goto out;
+
+	if (!rst)
+		goto out;
+
+	bytes_per_attr_entry = !rst->major_ver ? 0x2C : 0x28;
+
+	checkpt_lsn = le64_to_cpu(rst->check_point_start);
+	if (!checkpt_lsn)
+		checkpt_lsn = ra_lsn;
+
+	/* Allocate and Read the Transaction Table. */
+	if (!rst->transact_table_len)
+		goto check_dirty_page_table;
+
+	t64 = le64_to_cpu(rst->transact_table_lsn);
+	err = read_log_rec_lcb(log, t64, lcb_ctx_prev, &lcb);
+	if (err)
+		goto out;
+
+	lrh = lcb->log_rec;
+	frh = lcb->lrh;
+	rec_len = le32_to_cpu(frh->client_data_len);
+
+	if (!check_log_rec(lrh, rec_len, le32_to_cpu(frh->transact_id),
+			   bytes_per_attr_entry)) {
+		err = -EINVAL;
+		goto out;
+	}
+
+	t16 = le16_to_cpu(lrh->redo_off);
+
+	rt = Add2Ptr(lrh, t16);
+	t32 = rec_len - t16;
+
+	/* Now check that this is a valid restart table. */
+	if (!check_rstbl(rt, t32)) {
+		err = -EINVAL;
+		goto out;
+	}
+
+	trtbl = kmemdup(rt, t32, GFP_NOFS);
+	if (!trtbl) {
+		err = -ENOMEM;
+		goto out;
+	}
+
+	lcb_put(lcb);
+	lcb = NULL;
+
+check_dirty_page_table:
+	/* The next record back should be the Dirty Pages Table. */
+	if (!rst->dirty_pages_len)
+		goto check_attribute_names;
+
+	t64 = le64_to_cpu(rst->dirty_pages_table_lsn);
+	err = read_log_rec_lcb(log, t64, lcb_ctx_prev, &lcb);
+	if (err)
+		goto out;
+
+	lrh = lcb->log_rec;
+	frh = lcb->lrh;
+	rec_len = le32_to_cpu(frh->client_data_len);
+
+	if (!check_log_rec(lrh, rec_len, le32_to_cpu(frh->transact_id),
+			   bytes_per_attr_entry)) {
+		err = -EINVAL;
+		goto out;
+	}
+
+	t16 = le16_to_cpu(lrh->redo_off);
+
+	rt = Add2Ptr(lrh, t16);
+	t32 = rec_len - t16;
+
+	/* Now check that this is a valid restart table. */
+	if (!check_rstbl(rt, t32)) {
+		err = -EINVAL;
+		goto out;
+	}
+
+	dptbl = kmemdup(rt, t32, GFP_NOFS);
+	if (!dptbl) {
+		err = -ENOMEM;
+		goto out;
+	}
+
+	/* Convert Ra version '0' into version '1'. */
+	if (rst->major_ver)
+		goto end_conv_1;
+
+	dp = NULL;
+	while ((dp = enum_rstbl(dptbl, dp))) {
+		struct DIR_PAGE_ENTRY_32 *dp0 = (struct DIR_PAGE_ENTRY_32 *)dp;
+		// NOTE: Danger. Check for of boundary.
+		memmove(&dp->vcn, &dp0->vcn_low,
+			2 * sizeof(u64) +
+				le32_to_cpu(dp->lcns_follow) * sizeof(u64));
+	}
+
+end_conv_1:
+	lcb_put(lcb);
+	lcb = NULL;
+
+	/*
+	 * Go through the table and remove the duplicates,
+	 * remembering the oldest lsn values.
+	 */
+	if (sbi->cluster_size <= log->page_size)
+		goto trace_dp_table;
+
+	dp = NULL;
+	while ((dp = enum_rstbl(dptbl, dp))) {
+		struct DIR_PAGE_ENTRY *next = dp;
+
+		while ((next = enum_rstbl(dptbl, next))) {
+			if (next->target_attr == dp->target_attr &&
+			    next->vcn == dp->vcn) {
+				if (le64_to_cpu(next->oldest_lsn) <
+				    le64_to_cpu(dp->oldest_lsn)) {
+					dp->oldest_lsn = next->oldest_lsn;
+				}
+
+				free_rsttbl_idx(dptbl, PtrOffset(dptbl, next));
+			}
+		}
+	}
+trace_dp_table:
+check_attribute_names:
+	/* The next record should be the Attribute Names. */
+	if (!rst->attr_names_len)
+		goto check_attr_table;
+
+	t64 = le64_to_cpu(rst->attr_names_lsn);
+	err = read_log_rec_lcb(log, t64, lcb_ctx_prev, &lcb);
+	if (err)
+		goto out;
+
+	lrh = lcb->log_rec;
+	frh = lcb->lrh;
+	rec_len = le32_to_cpu(frh->client_data_len);
+
+	if (!check_log_rec(lrh, rec_len, le32_to_cpu(frh->transact_id),
+			   bytes_per_attr_entry)) {
+		err = -EINVAL;
+		goto out;
+	}
+
+	t32 = lrh_length(lrh);
+	rec_len -= t32;
+
+	attr_names = kmemdup(Add2Ptr(lrh, t32), rec_len, GFP_NOFS);
+
+	lcb_put(lcb);
+	lcb = NULL;
+
+check_attr_table:
+	/* The next record should be the attribute Table. */
+	if (!rst->open_attr_len)
+		goto check_attribute_names2;
+
+	t64 = le64_to_cpu(rst->open_attr_table_lsn);
+	err = read_log_rec_lcb(log, t64, lcb_ctx_prev, &lcb);
+	if (err)
+		goto out;
+
+	lrh = lcb->log_rec;
+	frh = lcb->lrh;
+	rec_len = le32_to_cpu(frh->client_data_len);
+
+	if (!check_log_rec(lrh, rec_len, le32_to_cpu(frh->transact_id),
+			   bytes_per_attr_entry)) {
+		err = -EINVAL;
+		goto out;
+	}
+
+	t16 = le16_to_cpu(lrh->redo_off);
+
+	rt = Add2Ptr(lrh, t16);
+	t32 = rec_len - t16;
+
+	if (!check_rstbl(rt, t32)) {
+		err = -EINVAL;
+		goto out;
+	}
+
+	oatbl = kmemdup(rt, t32, GFP_NOFS);
+	if (!oatbl) {
+		err = -ENOMEM;
+		goto out;
+	}
+
+	log->open_attr_tbl = oatbl;
+
+	/* Clear all of the Attr pointers. */
+	oe = NULL;
+	while ((oe = enum_rstbl(oatbl, oe))) {
+		if (!rst->major_ver) {
+			struct OPEN_ATTR_ENRTY_32 oe0;
+
+			/* Really 'oe' points to OPEN_ATTR_ENRTY_32. */
+			memcpy(&oe0, oe, SIZEOF_OPENATTRIBUTEENTRY0);
+
+			oe->bytes_per_index = oe0.bytes_per_index;
+			oe->type = oe0.type;
+			oe->is_dirty_pages = oe0.is_dirty_pages;
+			oe->name_len = 0;
+			oe->ref = oe0.ref;
+			oe->open_record_lsn = oe0.open_record_lsn;
+		}
+
+		oe->is_attr_name = 0;
+		oe->ptr = NULL;
+	}
+
+	lcb_put(lcb);
+	lcb = NULL;
+
+check_attribute_names2:
+	if (!rst->attr_names_len)
+		goto trace_attribute_table;
+
+	ane = attr_names;
+	if (!oatbl)
+		goto trace_attribute_table;
+	while (ane->off) {
+		/* TODO: Clear table on exit! */
+		oe = Add2Ptr(oatbl, le16_to_cpu(ane->off));
+		t16 = le16_to_cpu(ane->name_bytes);
+		oe->name_len = t16 / sizeof(short);
+		oe->ptr = ane->name;
+		oe->is_attr_name = 2;
+		ane = Add2Ptr(ane, sizeof(struct ATTR_NAME_ENTRY) + t16);
+	}
+
+trace_attribute_table:
+	/*
+	 * If the checkpt_lsn is zero, then this is a freshly
+	 * formatted disk and we have no work to do.
+	 */
+	if (!checkpt_lsn) {
+		err = 0;
+		goto out;
+	}
+
+	if (!oatbl) {
+		oatbl = init_rsttbl(bytes_per_attr_entry, 8);
+		if (!oatbl) {
+			err = -ENOMEM;
+			goto out;
+		}
+	}
+
+	log->open_attr_tbl = oatbl;
+
+	/* Start the analysis pass from the Checkpoint lsn. */
+	rec_lsn = checkpt_lsn;
+
+	/* Read the first lsn. */
+	err = read_log_rec_lcb(log, checkpt_lsn, lcb_ctx_next, &lcb);
+	if (err)
+		goto out;
+
+	/* Loop to read all subsequent records to the end of the log file. */
+next_log_record_analyze:
+	err = read_next_log_rec(log, lcb, &rec_lsn);
+	if (err)
+		goto out;
+
+	if (!rec_lsn)
+		goto end_log_records_enumerate;
+
+	frh = lcb->lrh;
+	transact_id = le32_to_cpu(frh->transact_id);
+	rec_len = le32_to_cpu(frh->client_data_len);
+	lrh = lcb->log_rec;
+
+	if (!check_log_rec(lrh, rec_len, transact_id, bytes_per_attr_entry)) {
+		err = -EINVAL;
+		goto out;
+	}
+
+	/*
+	 * The first lsn after the previous lsn remembered
+	 * the checkpoint is the first candidate for the rlsn.
+	 */
+	if (!rlsn)
+		rlsn = rec_lsn;
+
+	if (LfsClientRecord != frh->record_type)
+		goto next_log_record_analyze;
+
+	/*
+	 * Now update the Transaction Table for this transaction. If there
+	 * is no entry present or it is unallocated we allocate the entry.
+	 */
+	if (!trtbl) {
+		trtbl = init_rsttbl(sizeof(struct TRANSACTION_ENTRY),
+				    INITIAL_NUMBER_TRANSACTIONS);
+		if (!trtbl) {
+			err = -ENOMEM;
+			goto out;
+		}
+	}
+
+	tr = Add2Ptr(trtbl, transact_id);
+
+	if (transact_id >= bytes_per_rt(trtbl) ||
+	    tr->next != RESTART_ENTRY_ALLOCATED_LE) {
+		tr = alloc_rsttbl_from_idx(&trtbl, transact_id);
+		if (!tr) {
+			err = -ENOMEM;
+			goto out;
+		}
+		tr->transact_state = TransactionActive;
+		tr->first_lsn = cpu_to_le64(rec_lsn);
+	}
+
+	tr->prev_lsn = tr->undo_next_lsn = cpu_to_le64(rec_lsn);
+
+	/*
+	 * If this is a compensation log record, then change
+	 * the undo_next_lsn to be the undo_next_lsn of this record.
+	 */
+	if (lrh->undo_op == cpu_to_le16(CompensationLogRecord))
+		tr->undo_next_lsn = frh->client_undo_next_lsn;
+
+	/* Dispatch to handle log record depending on type. */
+	switch (le16_to_cpu(lrh->redo_op)) {
+	case InitializeFileRecordSegment:
+	case DeallocateFileRecordSegment:
+	case WriteEndOfFileRecordSegment:
+	case CreateAttribute:
+	case DeleteAttribute:
+	case UpdateResidentValue:
+	case UpdateNonresidentValue:
+	case UpdateMappingPairs:
+	case SetNewAttributeSizes:
+	case AddIndexEntryRoot:
+	case DeleteIndexEntryRoot:
+	case AddIndexEntryAllocation:
+	case DeleteIndexEntryAllocation:
+	case WriteEndOfIndexBuffer:
+	case SetIndexEntryVcnRoot:
+	case SetIndexEntryVcnAllocation:
+	case UpdateFileNameRoot:
+	case UpdateFileNameAllocation:
+	case SetBitsInNonresidentBitMap:
+	case ClearBitsInNonresidentBitMap:
+	case UpdateRecordDataRoot:
+	case UpdateRecordDataAllocation:
+	case ZeroEndOfFileRecord:
+		t16 = le16_to_cpu(lrh->target_attr);
+		t64 = le64_to_cpu(lrh->target_vcn);
+		dp = find_dp(dptbl, t16, t64);
+
+		if (dp)
+			goto copy_lcns;
+
+		/*
+		 * Calculate the number of clusters per page the system
+		 * which wrote the checkpoint, possibly creating the table.
+		 */
+		if (dptbl) {
+			t32 = (le16_to_cpu(dptbl->size) -
+			       sizeof(struct DIR_PAGE_ENTRY)) /
+			      sizeof(u64);
+		} else {
+			t32 = log->clst_per_page;
+			kfree(dptbl);
+			dptbl = init_rsttbl(struct_size(dp, page_lcns, t32),
+					    32);
+			if (!dptbl) {
+				err = -ENOMEM;
+				goto out;
+			}
+		}
+
+		dp = alloc_rsttbl_idx(&dptbl);
+		if (!dp) {
+			err = -ENOMEM;
+			goto out;
+		}
+		dp->target_attr = cpu_to_le32(t16);
+		dp->transfer_len = cpu_to_le32(t32 << sbi->cluster_bits);
+		dp->lcns_follow = cpu_to_le32(t32);
+		dp->vcn = cpu_to_le64(t64 & ~((u64)t32 - 1));
+		dp->oldest_lsn = cpu_to_le64(rec_lsn);
+
+copy_lcns:
+		/*
+		 * Copy the Lcns from the log record into the Dirty Page Entry.
+		 * TODO: For different page size support, must somehow make
+		 * whole routine a loop, case Lcns do not fit below.
+		 */
+		t16 = le16_to_cpu(lrh->lcns_follow);
+		for (i = 0; i < t16; i++) {
+			size_t j = (size_t)(le64_to_cpu(lrh->target_vcn) -
+					    le64_to_cpu(dp->vcn));
+			dp->page_lcns[j + i] = lrh->page_lcns[i];
+		}
+
+		goto next_log_record_analyze;
+
+	case DeleteDirtyClusters: {
+		u32 range_count =
+			le16_to_cpu(lrh->redo_len) / sizeof(struct LCN_RANGE);
+		const struct LCN_RANGE *r =
+			Add2Ptr(lrh, le16_to_cpu(lrh->redo_off));
+
+		/* Loop through all of the Lcn ranges this log record. */
+		for (i = 0; i < range_count; i++, r++) {
+			u64 lcn0 = le64_to_cpu(r->lcn);
+			u64 lcn_e = lcn0 + le64_to_cpu(r->len) - 1;
+
+			dp = NULL;
+			while ((dp = enum_rstbl(dptbl, dp))) {
+				u32 j;
+
+				t32 = le32_to_cpu(dp->lcns_follow);
+				for (j = 0; j < t32; j++) {
+					t64 = le64_to_cpu(dp->page_lcns[j]);
+					if (t64 >= lcn0 && t64 <= lcn_e)
+						dp->page_lcns[j] = 0;
+				}
+			}
+		}
+		goto next_log_record_analyze;
+		;
+	}
+
+	case OpenNonresidentAttribute:
+		t16 = le16_to_cpu(lrh->target_attr);
+		if (t16 >= bytes_per_rt(oatbl)) {
+			/*
+			 * Compute how big the table needs to be.
+			 * Add 10 extra entries for some cushion.
+			 */
+			u32 new_e = t16 / le16_to_cpu(oatbl->size);
+
+			new_e += 10 - le16_to_cpu(oatbl->used);
+
+			oatbl = extend_rsttbl(oatbl, new_e, ~0u);
+			log->open_attr_tbl = oatbl;
+			if (!oatbl) {
+				err = -ENOMEM;
+				goto out;
+			}
+		}
+
+		/* Point to the entry being opened. */
+		oe = alloc_rsttbl_from_idx(&oatbl, t16);
+		log->open_attr_tbl = oatbl;
+		if (!oe) {
+			err = -ENOMEM;
+			goto out;
+		}
+
+		/* Initialize this entry from the log record. */
+		t16 = le16_to_cpu(lrh->redo_off);
+		if (!rst->major_ver) {
+			/* Convert version '0' into version '1'. */
+			struct OPEN_ATTR_ENRTY_32 *oe0 = Add2Ptr(lrh, t16);
+
+			oe->bytes_per_index = oe0->bytes_per_index;
+			oe->type = oe0->type;
+			oe->is_dirty_pages = oe0->is_dirty_pages;
+			oe->name_len = 0; //oe0.name_len;
+			oe->ref = oe0->ref;
+			oe->open_record_lsn = oe0->open_record_lsn;
+		} else {
+			memcpy(oe, Add2Ptr(lrh, t16), bytes_per_attr_entry);
+		}
+
+		t16 = le16_to_cpu(lrh->undo_len);
+		if (t16) {
+			oe->ptr = kmalloc(t16, GFP_NOFS);
+			if (!oe->ptr) {
+				err = -ENOMEM;
+				goto out;
+			}
+			oe->name_len = t16 / sizeof(short);
+			memcpy(oe->ptr,
+			       Add2Ptr(lrh, le16_to_cpu(lrh->undo_off)), t16);
+			oe->is_attr_name = 1;
+		} else {
+			oe->ptr = NULL;
+			oe->is_attr_name = 0;
+		}
+
+		goto next_log_record_analyze;
+
+	case HotFix:
+		t16 = le16_to_cpu(lrh->target_attr);
+		t64 = le64_to_cpu(lrh->target_vcn);
+		dp = find_dp(dptbl, t16, t64);
+		if (dp) {
+			size_t j = le64_to_cpu(lrh->target_vcn) -
+				   le64_to_cpu(dp->vcn);
+			if (dp->page_lcns[j])
+				dp->page_lcns[j] = lrh->page_lcns[0];
+		}
+		goto next_log_record_analyze;
+
+	case EndTopLevelAction:
+		tr = Add2Ptr(trtbl, transact_id);
+		tr->prev_lsn = cpu_to_le64(rec_lsn);
+		tr->undo_next_lsn = frh->client_undo_next_lsn;
+		goto next_log_record_analyze;
+
+	case PrepareTransaction:
+		tr = Add2Ptr(trtbl, transact_id);
+		tr->transact_state = TransactionPrepared;
+		goto next_log_record_analyze;
+
+	case CommitTransaction:
+		tr = Add2Ptr(trtbl, transact_id);
+		tr->transact_state = TransactionCommitted;
+		goto next_log_record_analyze;
+
+	case ForgetTransaction:
+		free_rsttbl_idx(trtbl, transact_id);
+		goto next_log_record_analyze;
+
+	case Noop:
+	case OpenAttributeTableDump:
+	case AttributeNamesDump:
+	case DirtyPageTableDump:
+	case TransactionTableDump:
+		/* The following cases require no action the Analysis Pass. */
+		goto next_log_record_analyze;
+
+	default:
+		/*
+		 * All codes will be explicitly handled.
+		 * If we see a code we do not expect, then we are trouble.
+		 */
+		goto next_log_record_analyze;
+	}
+
+end_log_records_enumerate:
+	lcb_put(lcb);
+	lcb = NULL;
+
+	/*
+	 * Scan the Dirty Page Table and Transaction Table for
+	 * the lowest lsn, and return it as the Redo lsn.
+	 */
+	dp = NULL;
+	while ((dp = enum_rstbl(dptbl, dp))) {
+		t64 = le64_to_cpu(dp->oldest_lsn);
+		if (t64 && t64 < rlsn)
+			rlsn = t64;
+	}
+
+	tr = NULL;
+	while ((tr = enum_rstbl(trtbl, tr))) {
+		t64 = le64_to_cpu(tr->first_lsn);
+		if (t64 && t64 < rlsn)
+			rlsn = t64;
+	}
+
+	/*
+	 * Only proceed if the Dirty Page Table or Transaction
+	 * table are not empty.
+	 */
+	if ((!dptbl || !dptbl->total) && (!trtbl || !trtbl->total))
+		goto end_reply;
+
+	sbi->flags |= NTFS_FLAGS_NEED_REPLAY;
+	if (is_ro)
+		goto out;
+
+	/* Reopen all of the attributes with dirty pages. */
+	oe = NULL;
+next_open_attribute:
+
+	oe = enum_rstbl(oatbl, oe);
+	if (!oe) {
+		err = 0;
+		dp = NULL;
+		goto next_dirty_page;
+	}
+
+	oa = kzalloc(sizeof(struct OpenAttr), GFP_NOFS);
+	if (!oa) {
+		err = -ENOMEM;
+		goto out;
+	}
+
+	inode = ntfs_iget5(sbi->sb, &oe->ref, NULL);
+	if (IS_ERR(inode))
+		goto fake_attr;
+
+	if (is_bad_inode(inode)) {
+		iput(inode);
+fake_attr:
+		if (oa->ni) {
+			iput(&oa->ni->vfs_inode);
+			oa->ni = NULL;
+		}
+
+		attr = attr_create_nonres_log(sbi, oe->type, 0, oe->ptr,
+					      oe->name_len, 0);
+		if (!attr) {
+			kfree(oa);
+			err = -ENOMEM;
+			goto out;
+		}
+		oa->attr = attr;
+		oa->run1 = &oa->run0;
+		goto final_oe;
+	}
+
+	ni_oe = ntfs_i(inode);
+	oa->ni = ni_oe;
+
+	attr = ni_find_attr(ni_oe, NULL, NULL, oe->type, oe->ptr, oe->name_len,
+			    NULL, NULL);
+
+	if (!attr)
+		goto fake_attr;
+
+	t32 = le32_to_cpu(attr->size);
+	oa->attr = kmemdup(attr, t32, GFP_NOFS);
+	if (!oa->attr)
+		goto fake_attr;
+
+	if (!S_ISDIR(inode->i_mode)) {
+		if (attr->type == ATTR_DATA && !attr->name_len) {
+			oa->run1 = &ni_oe->file.run;
+			goto final_oe;
+		}
+	} else {
+		if (attr->type == ATTR_ALLOC &&
+		    attr->name_len == ARRAY_SIZE(I30_NAME) &&
+		    !memcmp(attr_name(attr), I30_NAME, sizeof(I30_NAME))) {
+			oa->run1 = &ni_oe->dir.alloc_run;
+			goto final_oe;
+		}
+	}
+
+	if (attr->non_res) {
+		u16 roff = le16_to_cpu(attr->nres.run_off);
+		CLST svcn = le64_to_cpu(attr->nres.svcn);
+
+		err = run_unpack(&oa->run0, sbi, inode->i_ino, svcn,
+				 le64_to_cpu(attr->nres.evcn), svcn,
+				 Add2Ptr(attr, roff), t32 - roff);
+		if (err < 0) {
+			kfree(oa->attr);
+			oa->attr = NULL;
+			goto fake_attr;
+		}
+		err = 0;
+	}
+	oa->run1 = &oa->run0;
+	attr = oa->attr;
+
+final_oe:
+	if (oe->is_attr_name == 1)
+		kfree(oe->ptr);
+	oe->is_attr_name = 0;
+	oe->ptr = oa;
+	oe->name_len = attr->name_len;
+
+	goto next_open_attribute;
+
+	/*
+	 * Now loop through the dirty page table to extract all of the Vcn/Lcn.
+	 * Mapping that we have, and insert it into the appropriate run.
+	 */
+next_dirty_page:
+	dp = enum_rstbl(dptbl, dp);
+	if (!dp)
+		goto do_redo_1;
+
+	oe = Add2Ptr(oatbl, le32_to_cpu(dp->target_attr));
+
+	if (oe->next != RESTART_ENTRY_ALLOCATED_LE)
+		goto next_dirty_page;
+
+	oa = oe->ptr;
+	if (!oa)
+		goto next_dirty_page;
+
+	i = -1;
+next_dirty_page_vcn:
+	i += 1;
+	if (i >= le32_to_cpu(dp->lcns_follow))
+		goto next_dirty_page;
+
+	vcn = le64_to_cpu(dp->vcn) + i;
+	size = (vcn + 1) << sbi->cluster_bits;
+
+	if (!dp->page_lcns[i])
+		goto next_dirty_page_vcn;
+
+	rno = ino_get(&oe->ref);
+	if (rno <= MFT_REC_MIRR &&
+	    size < (MFT_REC_VOL + 1) * sbi->record_size &&
+	    oe->type == ATTR_DATA) {
+		goto next_dirty_page_vcn;
+	}
+
+	lcn = le64_to_cpu(dp->page_lcns[i]);
+
+	if ((!run_lookup_entry(oa->run1, vcn, &lcn0, &len0, NULL) ||
+	     lcn0 != lcn) &&
+	    !run_add_entry(oa->run1, vcn, lcn, 1, false)) {
+		err = -ENOMEM;
+		goto out;
+	}
+	attr = oa->attr;
+	t64 = le64_to_cpu(attr->nres.alloc_size);
+	if (size > t64) {
+		attr->nres.valid_size = attr->nres.data_size =
+			attr->nres.alloc_size = cpu_to_le64(size);
+	}
+	goto next_dirty_page_vcn;
+
+do_redo_1:
+	/*
+	 * Perform the Redo Pass, to restore all of the dirty pages to the same
+	 * contents that they had immediately before the crash. If the dirty
+	 * page table is empty, then we can skip the entire Redo Pass.
+	 */
+	if (!dptbl || !dptbl->total)
+		goto do_undo_action;
+
+	rec_lsn = rlsn;
+
+	/*
+	 * Read the record at the Redo lsn, before falling
+	 * into common code to handle each record.
+	 */
+	err = read_log_rec_lcb(log, rlsn, lcb_ctx_next, &lcb);
+	if (err)
+		goto out;
+
+	/*
+	 * Now loop to read all of our log records forwards, until
+	 * we hit the end of the file, cleaning up at the end.
+	 */
+do_action_next:
+	frh = lcb->lrh;
+
+	if (LfsClientRecord != frh->record_type)
+		goto read_next_log_do_action;
+
+	transact_id = le32_to_cpu(frh->transact_id);
+	rec_len = le32_to_cpu(frh->client_data_len);
+	lrh = lcb->log_rec;
+
+	if (!check_log_rec(lrh, rec_len, transact_id, bytes_per_attr_entry)) {
+		err = -EINVAL;
+		goto out;
+	}
+
+	/* Ignore log records that do not update pages. */
+	if (lrh->lcns_follow)
+		goto find_dirty_page;
+
+	goto read_next_log_do_action;
+
+find_dirty_page:
+	t16 = le16_to_cpu(lrh->target_attr);
+	t64 = le64_to_cpu(lrh->target_vcn);
+	dp = find_dp(dptbl, t16, t64);
+
+	if (!dp)
+		goto read_next_log_do_action;
+
+	if (rec_lsn < le64_to_cpu(dp->oldest_lsn))
+		goto read_next_log_do_action;
+
+	t16 = le16_to_cpu(lrh->target_attr);
+	if (t16 >= bytes_per_rt(oatbl)) {
+		err = -EINVAL;
+		goto out;
+	}
+
+	oe = Add2Ptr(oatbl, t16);
+
+	if (oe->next != RESTART_ENTRY_ALLOCATED_LE) {
+		err = -EINVAL;
+		goto out;
+	}
+
+	oa = oe->ptr;
+
+	if (!oa) {
+		err = -EINVAL;
+		goto out;
+	}
+	attr = oa->attr;
+
+	vcn = le64_to_cpu(lrh->target_vcn);
+
+	if (!run_lookup_entry(oa->run1, vcn, &lcn, NULL, NULL) ||
+	    lcn == SPARSE_LCN) {
+		goto read_next_log_do_action;
+	}
+
+	/* Point to the Redo data and get its length. */
+	data = Add2Ptr(lrh, le16_to_cpu(lrh->redo_off));
+	dlen = le16_to_cpu(lrh->redo_len);
+
+	/* Shorten length by any Lcns which were deleted. */
+	saved_len = dlen;
+
+	for (i = le16_to_cpu(lrh->lcns_follow); i; i--) {
+		size_t j;
+		u32 alen, voff;
+
+		voff = le16_to_cpu(lrh->record_off) +
+		       le16_to_cpu(lrh->attr_off);
+		voff += le16_to_cpu(lrh->cluster_off) << SECTOR_SHIFT;
+
+		/* If the Vcn question is allocated, we can just get out. */
+		j = le64_to_cpu(lrh->target_vcn) - le64_to_cpu(dp->vcn);
+		if (dp->page_lcns[j + i - 1])
+			break;
+
+		if (!saved_len)
+			saved_len = 1;
+
+		/*
+		 * Calculate the allocated space left relative to the
+		 * log record Vcn, after removing this unallocated Vcn.
+		 */
+		alen = (i - 1) << sbi->cluster_bits;
+
+		/*
+		 * If the update described this log record goes beyond
+		 * the allocated space, then we will have to reduce the length.
+		 */
+		if (voff >= alen)
+			dlen = 0;
+		else if (voff + dlen > alen)
+			dlen = alen - voff;
+	}
+
+	/*
+	 * If the resulting dlen from above is now zero,
+	 * we can skip this log record.
+	 */
+	if (!dlen && saved_len)
+		goto read_next_log_do_action;
+
+	t16 = le16_to_cpu(lrh->redo_op);
+	if (can_skip_action(t16))
+		goto read_next_log_do_action;
+
+	/* Apply the Redo operation a common routine. */
+	err = do_action(log, oe, lrh, t16, data, dlen, rec_len, &rec_lsn);
+	if (err)
+		goto out;
+
+	/* Keep reading and looping back until end of file. */
+read_next_log_do_action:
+	err = read_next_log_rec(log, lcb, &rec_lsn);
+	if (!err && rec_lsn)
+		goto do_action_next;
+
+	lcb_put(lcb);
+	lcb = NULL;
+
+do_undo_action:
+	/* Scan Transaction Table. */
+	tr = NULL;
+transaction_table_next:
+	tr = enum_rstbl(trtbl, tr);
+	if (!tr)
+		goto undo_action_done;
+
+	if (TransactionActive != tr->transact_state || !tr->undo_next_lsn) {
+		free_rsttbl_idx(trtbl, PtrOffset(trtbl, tr));
+		goto transaction_table_next;
+	}
+
+	log->transaction_id = PtrOffset(trtbl, tr);
+	undo_next_lsn = le64_to_cpu(tr->undo_next_lsn);
+
+	/*
+	 * We only have to do anything if the transaction has
+	 * something its undo_next_lsn field.
+	 */
+	if (!undo_next_lsn)
+		goto commit_undo;
+
+	/* Read the first record to be undone by this transaction. */
+	err = read_log_rec_lcb(log, undo_next_lsn, lcb_ctx_undo_next, &lcb);
+	if (err)
+		goto out;
+
+	/*
+	 * Now loop to read all of our log records forwards,
+	 * until we hit the end of the file, cleaning up at the end.
+	 */
+undo_action_next:
+
+	lrh = lcb->log_rec;
+	frh = lcb->lrh;
+	transact_id = le32_to_cpu(frh->transact_id);
+	rec_len = le32_to_cpu(frh->client_data_len);
+
+	if (!check_log_rec(lrh, rec_len, transact_id, bytes_per_attr_entry)) {
+		err = -EINVAL;
+		goto out;
+	}
+
+	if (lrh->undo_op == cpu_to_le16(Noop))
+		goto read_next_log_undo_action;
+
+	oe = Add2Ptr(oatbl, le16_to_cpu(lrh->target_attr));
+	oa = oe->ptr;
+
+	t16 = le16_to_cpu(lrh->lcns_follow);
+	if (!t16)
+		goto add_allocated_vcns;
+
+	is_mapped = run_lookup_entry(oa->run1, le64_to_cpu(lrh->target_vcn),
+				     &lcn, &clen, NULL);
+
+	/*
+	 * If the mapping isn't already the table or the  mapping
+	 * corresponds to a hole the mapping, we need to make sure
+	 * there is no partial page already memory.
+	 */
+	if (is_mapped && lcn != SPARSE_LCN && clen >= t16)
+		goto add_allocated_vcns;
+
+	vcn = le64_to_cpu(lrh->target_vcn);
+	vcn &= ~(log->clst_per_page - 1);
+
+add_allocated_vcns:
+	for (i = 0, vcn = le64_to_cpu(lrh->target_vcn),
+	    size = (vcn + 1) << sbi->cluster_bits;
+	     i < t16; i++, vcn += 1, size += sbi->cluster_size) {
+		attr = oa->attr;
+		if (!attr->non_res) {
+			if (size > le32_to_cpu(attr->res.data_size))
+				attr->res.data_size = cpu_to_le32(size);
+		} else {
+			if (size > le64_to_cpu(attr->nres.data_size))
+				attr->nres.valid_size = attr->nres.data_size =
+					attr->nres.alloc_size =
+						cpu_to_le64(size);
+		}
+	}
+
+	t16 = le16_to_cpu(lrh->undo_op);
+	if (can_skip_action(t16))
+		goto read_next_log_undo_action;
+
+	/* Point to the Redo data and get its length. */
+	data = Add2Ptr(lrh, le16_to_cpu(lrh->undo_off));
+	dlen = le16_to_cpu(lrh->undo_len);
+
+	/* It is time to apply the undo action. */
+	err = do_action(log, oe, lrh, t16, data, dlen, rec_len, NULL);
+
+read_next_log_undo_action:
+	/*
+	 * Keep reading and looping back until we have read the
+	 * last record for this transaction.
+	 */
+	err = read_next_log_rec(log, lcb, &rec_lsn);
+	if (err)
+		goto out;
+
+	if (rec_lsn)
+		goto undo_action_next;
+
+	lcb_put(lcb);
+	lcb = NULL;
+
+commit_undo:
+	free_rsttbl_idx(trtbl, log->transaction_id);
+
+	log->transaction_id = 0;
+
+	goto transaction_table_next;
+
+undo_action_done:
+
+	ntfs_update_mftmirr(sbi, 0);
+
+	sbi->flags &= ~NTFS_FLAGS_NEED_REPLAY;
+
+end_reply:
+
+	err = 0;
+	if (is_ro)
+		goto out;
+
+	rh = kzalloc(log->page_size, GFP_NOFS);
+	if (!rh) {
+		err = -ENOMEM;
+		goto out;
+	}
+
+	rh->rhdr.sign = NTFS_RSTR_SIGNATURE;
+	rh->rhdr.fix_off = cpu_to_le16(offsetof(struct RESTART_HDR, fixups));
+	t16 = (log->page_size >> SECTOR_SHIFT) + 1;
+	rh->rhdr.fix_num = cpu_to_le16(t16);
+	rh->sys_page_size = cpu_to_le32(log->page_size);
+	rh->page_size = cpu_to_le32(log->page_size);
+
+	t16 = ALIGN(offsetof(struct RESTART_HDR, fixups) + sizeof(short) * t16,
+		    8);
+	rh->ra_off = cpu_to_le16(t16);
+	rh->minor_ver = cpu_to_le16(1); // 0x1A:
+	rh->major_ver = cpu_to_le16(1); // 0x1C:
+
+	ra2 = Add2Ptr(rh, t16);
+	memcpy(ra2, ra, sizeof(struct RESTART_AREA));
+
+	ra2->client_idx[0] = 0;
+	ra2->client_idx[1] = LFS_NO_CLIENT_LE;
+	ra2->flags = cpu_to_le16(2);
+
+	le32_add_cpu(&ra2->open_log_count, 1);
+
+	ntfs_fix_pre_write(&rh->rhdr, log->page_size);
+
+	err = ntfs_sb_write_run(sbi, &ni->file.run, 0, rh, log->page_size);
+	if (!err)
+		err = ntfs_sb_write_run(sbi, &log->ni->file.run, log->page_size,
+					rh, log->page_size);
+
+	kfree(rh);
+	if (err)
+		goto out;
+
+out:
+	kfree(rst);
+	if (lcb)
+		lcb_put(lcb);
+
+	/*
+	 * Scan the Open Attribute Table to close all of
+	 * the open attributes.
+	 */
+	oe = NULL;
+	while ((oe = enum_rstbl(oatbl, oe))) {
+		rno = ino_get(&oe->ref);
+
+		if (oe->is_attr_name == 1) {
+			kfree(oe->ptr);
+			oe->ptr = NULL;
+			continue;
+		}
+
+		if (oe->is_attr_name)
+			continue;
+
+		oa = oe->ptr;
+		if (!oa)
+			continue;
+
+		run_close(&oa->run0);
+		kfree(oa->attr);
+		if (oa->ni)
+			iput(&oa->ni->vfs_inode);
+		kfree(oa);
+	}
+
+	kfree(trtbl);
+	kfree(oatbl);
+	kfree(dptbl);
+	kfree(attr_names);
+	kfree(rst_info.r_page);
+
+	kfree(ra);
+	kfree(log->one_page_buf);
+
+	if (err)
+		sbi->flags |= NTFS_FLAGS_NEED_REPLAY;
+
+	if (err == -EROFS)
+		err = 0;
+	else if (log->set_dirty)
+		ntfs_set_state(sbi, NTFS_DIRTY_ERROR);
+
+	kfree(log);
+
+	return err;
+}
diff --git a/fs/ntfs3/fsntfs.c b/fs/ntfs3/fsntfs.c
new file mode 100644
index 000000000000..91e3743e1442
--- /dev/null
+++ b/fs/ntfs3/fsntfs.c
@@ -0,0 +1,2509 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ *
+ * Copyright (C) 2019-2021 Paragon Software GmbH, All rights reserved.
+ *
+ */
+
+#include <linux/blkdev.h>
+#include <linux/buffer_head.h>
+#include <linux/fs.h>
+#include <linux/nls.h>
+
+#include "debug.h"
+#include "ntfs.h"
+#include "ntfs_fs.h"
+
+// clang-format off
+const struct cpu_str NAME_MFT = {
+	4, 0, { '$', 'M', 'F', 'T' },
+};
+const struct cpu_str NAME_MIRROR = {
+	8, 0, { '$', 'M', 'F', 'T', 'M', 'i', 'r', 'r' },
+};
+const struct cpu_str NAME_LOGFILE = {
+	8, 0, { '$', 'L', 'o', 'g', 'F', 'i', 'l', 'e' },
+};
+const struct cpu_str NAME_VOLUME = {
+	7, 0, { '$', 'V', 'o', 'l', 'u', 'm', 'e' },
+};
+const struct cpu_str NAME_ATTRDEF = {
+	8, 0, { '$', 'A', 't', 't', 'r', 'D', 'e', 'f' },
+};
+const struct cpu_str NAME_ROOT = {
+	1, 0, { '.' },
+};
+const struct cpu_str NAME_BITMAP = {
+	7, 0, { '$', 'B', 'i', 't', 'm', 'a', 'p' },
+};
+const struct cpu_str NAME_BOOT = {
+	5, 0, { '$', 'B', 'o', 'o', 't' },
+};
+const struct cpu_str NAME_BADCLUS = {
+	8, 0, { '$', 'B', 'a', 'd', 'C', 'l', 'u', 's' },
+};
+const struct cpu_str NAME_QUOTA = {
+	6, 0, { '$', 'Q', 'u', 'o', 't', 'a' },
+};
+const struct cpu_str NAME_SECURE = {
+	7, 0, { '$', 'S', 'e', 'c', 'u', 'r', 'e' },
+};
+const struct cpu_str NAME_UPCASE = {
+	7, 0, { '$', 'U', 'p', 'C', 'a', 's', 'e' },
+};
+const struct cpu_str NAME_EXTEND = {
+	7, 0, { '$', 'E', 'x', 't', 'e', 'n', 'd' },
+};
+const struct cpu_str NAME_OBJID = {
+	6, 0, { '$', 'O', 'b', 'j', 'I', 'd' },
+};
+const struct cpu_str NAME_REPARSE = {
+	8, 0, { '$', 'R', 'e', 'p', 'a', 'r', 's', 'e' },
+};
+const struct cpu_str NAME_USNJRNL = {
+	8, 0, { '$', 'U', 's', 'n', 'J', 'r', 'n', 'l' },
+};
+const __le16 BAD_NAME[4] = {
+	cpu_to_le16('$'), cpu_to_le16('B'), cpu_to_le16('a'), cpu_to_le16('d'),
+};
+const __le16 I30_NAME[4] = {
+	cpu_to_le16('$'), cpu_to_le16('I'), cpu_to_le16('3'), cpu_to_le16('0'),
+};
+const __le16 SII_NAME[4] = {
+	cpu_to_le16('$'), cpu_to_le16('S'), cpu_to_le16('I'), cpu_to_le16('I'),
+};
+const __le16 SDH_NAME[4] = {
+	cpu_to_le16('$'), cpu_to_le16('S'), cpu_to_le16('D'), cpu_to_le16('H'),
+};
+const __le16 SDS_NAME[4] = {
+	cpu_to_le16('$'), cpu_to_le16('S'), cpu_to_le16('D'), cpu_to_le16('S'),
+};
+const __le16 SO_NAME[2] = {
+	cpu_to_le16('$'), cpu_to_le16('O'),
+};
+const __le16 SQ_NAME[2] = {
+	cpu_to_le16('$'), cpu_to_le16('Q'),
+};
+const __le16 SR_NAME[2] = {
+	cpu_to_le16('$'), cpu_to_le16('R'),
+};
+
+#ifdef CONFIG_NTFS3_LZX_XPRESS
+const __le16 WOF_NAME[17] = {
+	cpu_to_le16('W'), cpu_to_le16('o'), cpu_to_le16('f'), cpu_to_le16('C'),
+	cpu_to_le16('o'), cpu_to_le16('m'), cpu_to_le16('p'), cpu_to_le16('r'),
+	cpu_to_le16('e'), cpu_to_le16('s'), cpu_to_le16('s'), cpu_to_le16('e'),
+	cpu_to_le16('d'), cpu_to_le16('D'), cpu_to_le16('a'), cpu_to_le16('t'),
+	cpu_to_le16('a'),
+};
+#endif
+
+// clang-format on
+
+/*
+ * ntfs_fix_pre_write - Insert fixups into @rhdr before writing to disk.
+ */
+bool ntfs_fix_pre_write(struct NTFS_RECORD_HEADER *rhdr, size_t bytes)
+{
+	u16 *fixup, *ptr;
+	u16 sample;
+	u16 fo = le16_to_cpu(rhdr->fix_off);
+	u16 fn = le16_to_cpu(rhdr->fix_num);
+
+	if ((fo & 1) || fo + fn * sizeof(short) > SECTOR_SIZE || !fn-- ||
+	    fn * SECTOR_SIZE > bytes) {
+		return false;
+	}
+
+	/* Get fixup pointer. */
+	fixup = Add2Ptr(rhdr, fo);
+
+	if (*fixup >= 0x7FFF)
+		*fixup = 1;
+	else
+		*fixup += 1;
+
+	sample = *fixup;
+
+	ptr = Add2Ptr(rhdr, SECTOR_SIZE - sizeof(short));
+
+	while (fn--) {
+		*++fixup = *ptr;
+		*ptr = sample;
+		ptr += SECTOR_SIZE / sizeof(short);
+	}
+	return true;
+}
+
+/*
+ * ntfs_fix_post_read - Remove fixups after reading from disk.
+ *
+ * Return: < 0 if error, 0 if ok, 1 if need to update fixups.
+ */
+int ntfs_fix_post_read(struct NTFS_RECORD_HEADER *rhdr, size_t bytes,
+		       bool simple)
+{
+	int ret;
+	u16 *fixup, *ptr;
+	u16 sample, fo, fn;
+
+	fo = le16_to_cpu(rhdr->fix_off);
+	fn = simple ? ((bytes >> SECTOR_SHIFT) + 1)
+		    : le16_to_cpu(rhdr->fix_num);
+
+	/* Check errors. */
+	if ((fo & 1) || fo + fn * sizeof(short) > SECTOR_SIZE || !fn-- ||
+	    fn * SECTOR_SIZE > bytes) {
+		return -EINVAL; /* Native chkntfs returns ok! */
+	}
+
+	/* Get fixup pointer. */
+	fixup = Add2Ptr(rhdr, fo);
+	sample = *fixup;
+	ptr = Add2Ptr(rhdr, SECTOR_SIZE - sizeof(short));
+	ret = 0;
+
+	while (fn--) {
+		/* Test current word. */
+		if (*ptr != sample) {
+			/* Fixup does not match! Is it serious error? */
+			ret = -E_NTFS_FIXUP;
+		}
+
+		/* Replace fixup. */
+		*ptr = *++fixup;
+		ptr += SECTOR_SIZE / sizeof(short);
+	}
+
+	return ret;
+}
+
+/*
+ * ntfs_extend_init - Load $Extend file.
+ */
+int ntfs_extend_init(struct ntfs_sb_info *sbi)
+{
+	int err;
+	struct super_block *sb = sbi->sb;
+	struct inode *inode, *inode2;
+	struct MFT_REF ref;
+
+	if (sbi->volume.major_ver < 3) {
+		ntfs_notice(sb, "Skip $Extend 'cause NTFS version");
+		return 0;
+	}
+
+	ref.low = cpu_to_le32(MFT_REC_EXTEND);
+	ref.high = 0;
+	ref.seq = cpu_to_le16(MFT_REC_EXTEND);
+	inode = ntfs_iget5(sb, &ref, &NAME_EXTEND);
+	if (IS_ERR(inode)) {
+		err = PTR_ERR(inode);
+		ntfs_err(sb, "Failed to load $Extend.");
+		inode = NULL;
+		goto out;
+	}
+
+	/* If ntfs_iget5() reads from disk it never returns bad inode. */
+	if (!S_ISDIR(inode->i_mode)) {
+		err = -EINVAL;
+		goto out;
+	}
+
+	/* Try to find $ObjId */
+	inode2 = dir_search_u(inode, &NAME_OBJID, NULL);
+	if (inode2 && !IS_ERR(inode2)) {
+		if (is_bad_inode(inode2)) {
+			iput(inode2);
+		} else {
+			sbi->objid.ni = ntfs_i(inode2);
+			sbi->objid_no = inode2->i_ino;
+		}
+	}
+
+	/* Try to find $Quota */
+	inode2 = dir_search_u(inode, &NAME_QUOTA, NULL);
+	if (inode2 && !IS_ERR(inode2)) {
+		sbi->quota_no = inode2->i_ino;
+		iput(inode2);
+	}
+
+	/* Try to find $Reparse */
+	inode2 = dir_search_u(inode, &NAME_REPARSE, NULL);
+	if (inode2 && !IS_ERR(inode2)) {
+		sbi->reparse.ni = ntfs_i(inode2);
+		sbi->reparse_no = inode2->i_ino;
+	}
+
+	/* Try to find $UsnJrnl */
+	inode2 = dir_search_u(inode, &NAME_USNJRNL, NULL);
+	if (inode2 && !IS_ERR(inode2)) {
+		sbi->usn_jrnl_no = inode2->i_ino;
+		iput(inode2);
+	}
+
+	err = 0;
+out:
+	iput(inode);
+	return err;
+}
+
+int ntfs_loadlog_and_replay(struct ntfs_inode *ni, struct ntfs_sb_info *sbi)
+{
+	int err = 0;
+	struct super_block *sb = sbi->sb;
+	bool initialized = false;
+	struct MFT_REF ref;
+	struct inode *inode;
+
+	/* Check for 4GB. */
+	if (ni->vfs_inode.i_size >= 0x100000000ull) {
+		ntfs_err(sb, "\x24LogFile is too big");
+		err = -EINVAL;
+		goto out;
+	}
+
+	sbi->flags |= NTFS_FLAGS_LOG_REPLAYING;
+
+	ref.low = cpu_to_le32(MFT_REC_MFT);
+	ref.high = 0;
+	ref.seq = cpu_to_le16(1);
+
+	inode = ntfs_iget5(sb, &ref, NULL);
+
+	if (IS_ERR(inode))
+		inode = NULL;
+
+	if (!inode) {
+		/* Try to use MFT copy. */
+		u64 t64 = sbi->mft.lbo;
+
+		sbi->mft.lbo = sbi->mft.lbo2;
+		inode = ntfs_iget5(sb, &ref, NULL);
+		sbi->mft.lbo = t64;
+		if (IS_ERR(inode))
+			inode = NULL;
+	}
+
+	if (!inode) {
+		err = -EINVAL;
+		ntfs_err(sb, "Failed to load $MFT.");
+		goto out;
+	}
+
+	sbi->mft.ni = ntfs_i(inode);
+
+	/* LogFile should not contains attribute list. */
+	err = ni_load_all_mi(sbi->mft.ni);
+	if (!err)
+		err = log_replay(ni, &initialized);
+
+	iput(inode);
+	sbi->mft.ni = NULL;
+
+	sync_blockdev(sb->s_bdev);
+	invalidate_bdev(sb->s_bdev);
+
+	if (sbi->flags & NTFS_FLAGS_NEED_REPLAY) {
+		err = 0;
+		goto out;
+	}
+
+	if (sb_rdonly(sb) || !initialized)
+		goto out;
+
+	/* Fill LogFile by '-1' if it is initialized. */
+	err = ntfs_bio_fill_1(sbi, &ni->file.run);
+
+out:
+	sbi->flags &= ~NTFS_FLAGS_LOG_REPLAYING;
+
+	return err;
+}
+
+/*
+ * ntfs_query_def
+ *
+ * Return: Current ATTR_DEF_ENTRY for given attribute type.
+ */
+const struct ATTR_DEF_ENTRY *ntfs_query_def(struct ntfs_sb_info *sbi,
+					    enum ATTR_TYPE type)
+{
+	int type_in = le32_to_cpu(type);
+	size_t min_idx = 0;
+	size_t max_idx = sbi->def_entries - 1;
+
+	while (min_idx <= max_idx) {
+		size_t i = min_idx + ((max_idx - min_idx) >> 1);
+		const struct ATTR_DEF_ENTRY *entry = sbi->def_table + i;
+		int diff = le32_to_cpu(entry->type) - type_in;
+
+		if (!diff)
+			return entry;
+		if (diff < 0)
+			min_idx = i + 1;
+		else if (i)
+			max_idx = i - 1;
+		else
+			return NULL;
+	}
+	return NULL;
+}
+
+/*
+ * ntfs_look_for_free_space - Look for a free space in bitmap.
+ */
+int ntfs_look_for_free_space(struct ntfs_sb_info *sbi, CLST lcn, CLST len,
+			     CLST *new_lcn, CLST *new_len,
+			     enum ALLOCATE_OPT opt)
+{
+	int err;
+	CLST alen = 0;
+	struct super_block *sb = sbi->sb;
+	size_t alcn, zlen, zeroes, zlcn, zlen2, ztrim, new_zlen;
+	struct wnd_bitmap *wnd = &sbi->used.bitmap;
+
+	down_write_nested(&wnd->rw_lock, BITMAP_MUTEX_CLUSTERS);
+	if (opt & ALLOCATE_MFT) {
+		zlen = wnd_zone_len(wnd);
+
+		if (!zlen) {
+			err = ntfs_refresh_zone(sbi);
+			if (err)
+				goto out;
+			zlen = wnd_zone_len(wnd);
+		}
+
+		if (!zlen) {
+			ntfs_err(sbi->sb, "no free space to extend mft");
+			goto out;
+		}
+
+		lcn = wnd_zone_bit(wnd);
+		alen = zlen > len ? len : zlen;
+
+		wnd_zone_set(wnd, lcn + alen, zlen - alen);
+
+		err = wnd_set_used(wnd, lcn, alen);
+		if (err) {
+			up_write(&wnd->rw_lock);
+			return err;
+		}
+		alcn = lcn;
+		goto out;
+	}
+	/*
+	 * 'Cause cluster 0 is always used this value means that we should use
+	 * cached value of 'next_free_lcn' to improve performance.
+	 */
+	if (!lcn)
+		lcn = sbi->used.next_free_lcn;
+
+	if (lcn >= wnd->nbits)
+		lcn = 0;
+
+	alen = wnd_find(wnd, len, lcn, BITMAP_FIND_MARK_AS_USED, &alcn);
+	if (alen)
+		goto out;
+
+	/* Try to use clusters from MftZone. */
+	zlen = wnd_zone_len(wnd);
+	zeroes = wnd_zeroes(wnd);
+
+	/* Check too big request */
+	if (len > zeroes + zlen || zlen <= NTFS_MIN_MFT_ZONE)
+		goto out;
+
+	/* How many clusters to cat from zone. */
+	zlcn = wnd_zone_bit(wnd);
+	zlen2 = zlen >> 1;
+	ztrim = len > zlen ? zlen : (len > zlen2 ? len : zlen2);
+	new_zlen = zlen - ztrim;
+
+	if (new_zlen < NTFS_MIN_MFT_ZONE) {
+		new_zlen = NTFS_MIN_MFT_ZONE;
+		if (new_zlen > zlen)
+			new_zlen = zlen;
+	}
+
+	wnd_zone_set(wnd, zlcn, new_zlen);
+
+	/* Allocate continues clusters. */
+	alen = wnd_find(wnd, len, 0,
+			BITMAP_FIND_MARK_AS_USED | BITMAP_FIND_FULL, &alcn);
+
+out:
+	if (alen) {
+		err = 0;
+		*new_len = alen;
+		*new_lcn = alcn;
+
+		ntfs_unmap_meta(sb, alcn, alen);
+
+		/* Set hint for next requests. */
+		if (!(opt & ALLOCATE_MFT))
+			sbi->used.next_free_lcn = alcn + alen;
+	} else {
+		err = -ENOSPC;
+	}
+
+	up_write(&wnd->rw_lock);
+	return err;
+}
+
+/*
+ * ntfs_extend_mft - Allocate additional MFT records.
+ *
+ * sbi->mft.bitmap is locked for write.
+ *
+ * NOTE: recursive:
+ *	ntfs_look_free_mft ->
+ *	ntfs_extend_mft ->
+ *	attr_set_size ->
+ *	ni_insert_nonresident ->
+ *	ni_insert_attr ->
+ *	ni_ins_attr_ext ->
+ *	ntfs_look_free_mft ->
+ *	ntfs_extend_mft
+ *
+ * To avoid recursive always allocate space for two new MFT records
+ * see attrib.c: "at least two MFT to avoid recursive loop".
+ */
+static int ntfs_extend_mft(struct ntfs_sb_info *sbi)
+{
+	int err;
+	struct ntfs_inode *ni = sbi->mft.ni;
+	size_t new_mft_total;
+	u64 new_mft_bytes, new_bitmap_bytes;
+	struct ATTRIB *attr;
+	struct wnd_bitmap *wnd = &sbi->mft.bitmap;
+
+	new_mft_total = (wnd->nbits + MFT_INCREASE_CHUNK + 127) & (CLST)~127;
+	new_mft_bytes = (u64)new_mft_total << sbi->record_bits;
+
+	/* Step 1: Resize $MFT::DATA. */
+	down_write(&ni->file.run_lock);
+	err = attr_set_size(ni, ATTR_DATA, NULL, 0, &ni->file.run,
+			    new_mft_bytes, NULL, false, &attr);
+
+	if (err) {
+		up_write(&ni->file.run_lock);
+		goto out;
+	}
+
+	attr->nres.valid_size = attr->nres.data_size;
+	new_mft_total = le64_to_cpu(attr->nres.alloc_size) >> sbi->record_bits;
+	ni->mi.dirty = true;
+
+	/* Step 2: Resize $MFT::BITMAP. */
+	new_bitmap_bytes = bitmap_size(new_mft_total);
+
+	err = attr_set_size(ni, ATTR_BITMAP, NULL, 0, &sbi->mft.bitmap.run,
+			    new_bitmap_bytes, &new_bitmap_bytes, true, NULL);
+
+	/* Refresh MFT Zone if necessary. */
+	down_write_nested(&sbi->used.bitmap.rw_lock, BITMAP_MUTEX_CLUSTERS);
+
+	ntfs_refresh_zone(sbi);
+
+	up_write(&sbi->used.bitmap.rw_lock);
+	up_write(&ni->file.run_lock);
+
+	if (err)
+		goto out;
+
+	err = wnd_extend(wnd, new_mft_total);
+
+	if (err)
+		goto out;
+
+	ntfs_clear_mft_tail(sbi, sbi->mft.used, new_mft_total);
+
+	err = _ni_write_inode(&ni->vfs_inode, 0);
+out:
+	return err;
+}
+
+/*
+ * ntfs_look_free_mft - Look for a free MFT record.
+ */
+int ntfs_look_free_mft(struct ntfs_sb_info *sbi, CLST *rno, bool mft,
+		       struct ntfs_inode *ni, struct mft_inode **mi)
+{
+	int err = 0;
+	size_t zbit, zlen, from, to, fr;
+	size_t mft_total;
+	struct MFT_REF ref;
+	struct super_block *sb = sbi->sb;
+	struct wnd_bitmap *wnd = &sbi->mft.bitmap;
+	u32 ir;
+
+	static_assert(sizeof(sbi->mft.reserved_bitmap) * 8 >=
+		      MFT_REC_FREE - MFT_REC_RESERVED);
+
+	if (!mft)
+		down_write_nested(&wnd->rw_lock, BITMAP_MUTEX_MFT);
+
+	zlen = wnd_zone_len(wnd);
+
+	/* Always reserve space for MFT. */
+	if (zlen) {
+		if (mft) {
+			zbit = wnd_zone_bit(wnd);
+			*rno = zbit;
+			wnd_zone_set(wnd, zbit + 1, zlen - 1);
+		}
+		goto found;
+	}
+
+	/* No MFT zone. Find the nearest to '0' free MFT. */
+	if (!wnd_find(wnd, 1, MFT_REC_FREE, 0, &zbit)) {
+		/* Resize MFT */
+		mft_total = wnd->nbits;
+
+		err = ntfs_extend_mft(sbi);
+		if (!err) {
+			zbit = mft_total;
+			goto reserve_mft;
+		}
+
+		if (!mft || MFT_REC_FREE == sbi->mft.next_reserved)
+			goto out;
+
+		err = 0;
+
+		/*
+		 * Look for free record reserved area [11-16) ==
+		 * [MFT_REC_RESERVED, MFT_REC_FREE ) MFT bitmap always
+		 * marks it as used.
+		 */
+		if (!sbi->mft.reserved_bitmap) {
+			/* Once per session create internal bitmap for 5 bits. */
+			sbi->mft.reserved_bitmap = 0xFF;
+
+			ref.high = 0;
+			for (ir = MFT_REC_RESERVED; ir < MFT_REC_FREE; ir++) {
+				struct inode *i;
+				struct ntfs_inode *ni;
+				struct MFT_REC *mrec;
+
+				ref.low = cpu_to_le32(ir);
+				ref.seq = cpu_to_le16(ir);
+
+				i = ntfs_iget5(sb, &ref, NULL);
+				if (IS_ERR(i)) {
+next:
+					ntfs_notice(
+						sb,
+						"Invalid reserved record %x",
+						ref.low);
+					continue;
+				}
+				if (is_bad_inode(i)) {
+					iput(i);
+					goto next;
+				}
+
+				ni = ntfs_i(i);
+
+				mrec = ni->mi.mrec;
+
+				if (!is_rec_base(mrec))
+					goto next;
+
+				if (mrec->hard_links)
+					goto next;
+
+				if (!ni_std(ni))
+					goto next;
+
+				if (ni_find_attr(ni, NULL, NULL, ATTR_NAME,
+						 NULL, 0, NULL, NULL))
+					goto next;
+
+				__clear_bit(ir - MFT_REC_RESERVED,
+					    &sbi->mft.reserved_bitmap);
+			}
+		}
+
+		/* Scan 5 bits for zero. Bit 0 == MFT_REC_RESERVED */
+		zbit = find_next_zero_bit(&sbi->mft.reserved_bitmap,
+					  MFT_REC_FREE, MFT_REC_RESERVED);
+		if (zbit >= MFT_REC_FREE) {
+			sbi->mft.next_reserved = MFT_REC_FREE;
+			goto out;
+		}
+
+		zlen = 1;
+		sbi->mft.next_reserved = zbit;
+	} else {
+reserve_mft:
+		zlen = zbit == MFT_REC_FREE ? (MFT_REC_USER - MFT_REC_FREE) : 4;
+		if (zbit + zlen > wnd->nbits)
+			zlen = wnd->nbits - zbit;
+
+		while (zlen > 1 && !wnd_is_free(wnd, zbit, zlen))
+			zlen -= 1;
+
+		/* [zbit, zbit + zlen) will be used for MFT itself. */
+		from = sbi->mft.used;
+		if (from < zbit)
+			from = zbit;
+		to = zbit + zlen;
+		if (from < to) {
+			ntfs_clear_mft_tail(sbi, from, to);
+			sbi->mft.used = to;
+		}
+	}
+
+	if (mft) {
+		*rno = zbit;
+		zbit += 1;
+		zlen -= 1;
+	}
+
+	wnd_zone_set(wnd, zbit, zlen);
+
+found:
+	if (!mft) {
+		/* The request to get record for general purpose. */
+		if (sbi->mft.next_free < MFT_REC_USER)
+			sbi->mft.next_free = MFT_REC_USER;
+
+		for (;;) {
+			if (sbi->mft.next_free >= sbi->mft.bitmap.nbits) {
+			} else if (!wnd_find(wnd, 1, MFT_REC_USER, 0, &fr)) {
+				sbi->mft.next_free = sbi->mft.bitmap.nbits;
+			} else {
+				*rno = fr;
+				sbi->mft.next_free = *rno + 1;
+				break;
+			}
+
+			err = ntfs_extend_mft(sbi);
+			if (err)
+				goto out;
+		}
+	}
+
+	if (ni && !ni_add_subrecord(ni, *rno, mi)) {
+		err = -ENOMEM;
+		goto out;
+	}
+
+	/* We have found a record that are not reserved for next MFT. */
+	if (*rno >= MFT_REC_FREE)
+		wnd_set_used(wnd, *rno, 1);
+	else if (*rno >= MFT_REC_RESERVED && sbi->mft.reserved_bitmap_inited)
+		__set_bit(*rno - MFT_REC_RESERVED, &sbi->mft.reserved_bitmap);
+
+out:
+	if (!mft)
+		up_write(&wnd->rw_lock);
+
+	return err;
+}
+
+/*
+ * ntfs_mark_rec_free - Mark record as free.
+ */
+void ntfs_mark_rec_free(struct ntfs_sb_info *sbi, CLST rno)
+{
+	struct wnd_bitmap *wnd = &sbi->mft.bitmap;
+
+	down_write_nested(&wnd->rw_lock, BITMAP_MUTEX_MFT);
+	if (rno >= wnd->nbits)
+		goto out;
+
+	if (rno >= MFT_REC_FREE) {
+		if (!wnd_is_used(wnd, rno, 1))
+			ntfs_set_state(sbi, NTFS_DIRTY_ERROR);
+		else
+			wnd_set_free(wnd, rno, 1);
+	} else if (rno >= MFT_REC_RESERVED && sbi->mft.reserved_bitmap_inited) {
+		__clear_bit(rno - MFT_REC_RESERVED, &sbi->mft.reserved_bitmap);
+	}
+
+	if (rno < wnd_zone_bit(wnd))
+		wnd_zone_set(wnd, rno, 1);
+	else if (rno < sbi->mft.next_free && rno >= MFT_REC_USER)
+		sbi->mft.next_free = rno;
+
+out:
+	up_write(&wnd->rw_lock);
+}
+
+/*
+ * ntfs_clear_mft_tail - Format empty records [from, to).
+ *
+ * sbi->mft.bitmap is locked for write.
+ */
+int ntfs_clear_mft_tail(struct ntfs_sb_info *sbi, size_t from, size_t to)
+{
+	int err;
+	u32 rs;
+	u64 vbo;
+	struct runs_tree *run;
+	struct ntfs_inode *ni;
+
+	if (from >= to)
+		return 0;
+
+	rs = sbi->record_size;
+	ni = sbi->mft.ni;
+	run = &ni->file.run;
+
+	down_read(&ni->file.run_lock);
+	vbo = (u64)from * rs;
+	for (; from < to; from++, vbo += rs) {
+		struct ntfs_buffers nb;
+
+		err = ntfs_get_bh(sbi, run, vbo, rs, &nb);
+		if (err)
+			goto out;
+
+		err = ntfs_write_bh(sbi, &sbi->new_rec->rhdr, &nb, 0);
+		nb_put(&nb);
+		if (err)
+			goto out;
+	}
+
+out:
+	sbi->mft.used = from;
+	up_read(&ni->file.run_lock);
+	return err;
+}
+
+/*
+ * ntfs_refresh_zone - Refresh MFT zone.
+ *
+ * sbi->used.bitmap is locked for rw.
+ * sbi->mft.bitmap is locked for write.
+ * sbi->mft.ni->file.run_lock for write.
+ */
+int ntfs_refresh_zone(struct ntfs_sb_info *sbi)
+{
+	CLST zone_limit, zone_max, lcn, vcn, len;
+	size_t lcn_s, zlen;
+	struct wnd_bitmap *wnd = &sbi->used.bitmap;
+	struct ntfs_inode *ni = sbi->mft.ni;
+
+	/* Do not change anything unless we have non empty MFT zone. */
+	if (wnd_zone_len(wnd))
+		return 0;
+
+	/*
+	 * Compute the MFT zone at two steps.
+	 * It would be nice if we are able to allocate 1/8 of
+	 * total clusters for MFT but not more then 512 MB.
+	 */
+	zone_limit = (512 * 1024 * 1024) >> sbi->cluster_bits;
+	zone_max = wnd->nbits >> 3;
+	if (zone_max > zone_limit)
+		zone_max = zone_limit;
+
+	vcn = bytes_to_cluster(sbi,
+			       (u64)sbi->mft.bitmap.nbits << sbi->record_bits);
+
+	if (!run_lookup_entry(&ni->file.run, vcn - 1, &lcn, &len, NULL))
+		lcn = SPARSE_LCN;
+
+	/* We should always find Last Lcn for MFT. */
+	if (lcn == SPARSE_LCN)
+		return -EINVAL;
+
+	lcn_s = lcn + 1;
+
+	/* Try to allocate clusters after last MFT run. */
+	zlen = wnd_find(wnd, zone_max, lcn_s, 0, &lcn_s);
+	if (!zlen) {
+		ntfs_notice(sbi->sb, "MftZone: unavailable");
+		return 0;
+	}
+
+	/* Truncate too large zone. */
+	wnd_zone_set(wnd, lcn_s, zlen);
+
+	return 0;
+}
+
+/*
+ * ntfs_update_mftmirr - Update $MFTMirr data.
+ */
+int ntfs_update_mftmirr(struct ntfs_sb_info *sbi, int wait)
+{
+	int err;
+	struct super_block *sb = sbi->sb;
+	u32 blocksize = sb->s_blocksize;
+	sector_t block1, block2;
+	u32 bytes;
+
+	if (!(sbi->flags & NTFS_FLAGS_MFTMIRR))
+		return 0;
+
+	err = 0;
+	bytes = sbi->mft.recs_mirr << sbi->record_bits;
+	block1 = sbi->mft.lbo >> sb->s_blocksize_bits;
+	block2 = sbi->mft.lbo2 >> sb->s_blocksize_bits;
+
+	for (; bytes >= blocksize; bytes -= blocksize) {
+		struct buffer_head *bh1, *bh2;
+
+		bh1 = sb_bread(sb, block1++);
+		if (!bh1) {
+			err = -EIO;
+			goto out;
+		}
+
+		bh2 = sb_getblk(sb, block2++);
+		if (!bh2) {
+			put_bh(bh1);
+			err = -EIO;
+			goto out;
+		}
+
+		if (buffer_locked(bh2))
+			__wait_on_buffer(bh2);
+
+		lock_buffer(bh2);
+		memcpy(bh2->b_data, bh1->b_data, blocksize);
+		set_buffer_uptodate(bh2);
+		mark_buffer_dirty(bh2);
+		unlock_buffer(bh2);
+
+		put_bh(bh1);
+		bh1 = NULL;
+
+		if (wait)
+			err = sync_dirty_buffer(bh2);
+
+		put_bh(bh2);
+		if (err)
+			goto out;
+	}
+
+	sbi->flags &= ~NTFS_FLAGS_MFTMIRR;
+
+out:
+	return err;
+}
+
+/*
+ * ntfs_set_state
+ *
+ * Mount: ntfs_set_state(NTFS_DIRTY_DIRTY)
+ * Umount: ntfs_set_state(NTFS_DIRTY_CLEAR)
+ * NTFS error: ntfs_set_state(NTFS_DIRTY_ERROR)
+ */
+int ntfs_set_state(struct ntfs_sb_info *sbi, enum NTFS_DIRTY_FLAGS dirty)
+{
+	int err;
+	struct ATTRIB *attr;
+	struct VOLUME_INFO *info;
+	struct mft_inode *mi;
+	struct ntfs_inode *ni;
+
+	/*
+	 * Do not change state if fs was real_dirty.
+	 * Do not change state if fs already dirty(clear).
+	 * Do not change any thing if mounted read only.
+	 */
+	if (sbi->volume.real_dirty || sb_rdonly(sbi->sb))
+		return 0;
+
+	/* Check cached value. */
+	if ((dirty == NTFS_DIRTY_CLEAR ? 0 : VOLUME_FLAG_DIRTY) ==
+	    (sbi->volume.flags & VOLUME_FLAG_DIRTY))
+		return 0;
+
+	ni = sbi->volume.ni;
+	if (!ni)
+		return -EINVAL;
+
+	mutex_lock_nested(&ni->ni_lock, NTFS_INODE_MUTEX_DIRTY);
+
+	attr = ni_find_attr(ni, NULL, NULL, ATTR_VOL_INFO, NULL, 0, NULL, &mi);
+	if (!attr) {
+		err = -EINVAL;
+		goto out;
+	}
+
+	info = resident_data_ex(attr, SIZEOF_ATTRIBUTE_VOLUME_INFO);
+	if (!info) {
+		err = -EINVAL;
+		goto out;
+	}
+
+	switch (dirty) {
+	case NTFS_DIRTY_ERROR:
+		ntfs_notice(sbi->sb, "Mark volume as dirty due to NTFS errors");
+		sbi->volume.real_dirty = true;
+		fallthrough;
+	case NTFS_DIRTY_DIRTY:
+		info->flags |= VOLUME_FLAG_DIRTY;
+		break;
+	case NTFS_DIRTY_CLEAR:
+		info->flags &= ~VOLUME_FLAG_DIRTY;
+		break;
+	}
+	/* Cache current volume flags. */
+	sbi->volume.flags = info->flags;
+	mi->dirty = true;
+	err = 0;
+
+out:
+	ni_unlock(ni);
+	if (err)
+		return err;
+
+	mark_inode_dirty(&ni->vfs_inode);
+	/* verify(!ntfs_update_mftmirr()); */
+
+	/*
+	 * If we used wait=1, sync_inode_metadata waits for the io for the
+	 * inode to finish. It hangs when media is removed.
+	 * So wait=0 is sent down to sync_inode_metadata
+	 * and filemap_fdatawrite is used for the data blocks.
+	 */
+	err = sync_inode_metadata(&ni->vfs_inode, 0);
+	if (!err)
+		err = filemap_fdatawrite(ni->vfs_inode.i_mapping);
+
+	return err;
+}
+
+/*
+ * security_hash - Calculates a hash of security descriptor.
+ */
+static inline __le32 security_hash(const void *sd, size_t bytes)
+{
+	u32 hash = 0;
+	const __le32 *ptr = sd;
+
+	bytes >>= 2;
+	while (bytes--)
+		hash = ((hash >> 0x1D) | (hash << 3)) + le32_to_cpu(*ptr++);
+	return cpu_to_le32(hash);
+}
+
+int ntfs_sb_read(struct super_block *sb, u64 lbo, size_t bytes, void *buffer)
+{
+	struct block_device *bdev = sb->s_bdev;
+	u32 blocksize = sb->s_blocksize;
+	u64 block = lbo >> sb->s_blocksize_bits;
+	u32 off = lbo & (blocksize - 1);
+	u32 op = blocksize - off;
+
+	for (; bytes; block += 1, off = 0, op = blocksize) {
+		struct buffer_head *bh = __bread(bdev, block, blocksize);
+
+		if (!bh)
+			return -EIO;
+
+		if (op > bytes)
+			op = bytes;
+
+		memcpy(buffer, bh->b_data + off, op);
+
+		put_bh(bh);
+
+		bytes -= op;
+		buffer = Add2Ptr(buffer, op);
+	}
+
+	return 0;
+}
+
+int ntfs_sb_write(struct super_block *sb, u64 lbo, size_t bytes,
+		  const void *buf, int wait)
+{
+	u32 blocksize = sb->s_blocksize;
+	struct block_device *bdev = sb->s_bdev;
+	sector_t block = lbo >> sb->s_blocksize_bits;
+	u32 off = lbo & (blocksize - 1);
+	u32 op = blocksize - off;
+	struct buffer_head *bh;
+
+	if (!wait && (sb->s_flags & SB_SYNCHRONOUS))
+		wait = 1;
+
+	for (; bytes; block += 1, off = 0, op = blocksize) {
+		if (op > bytes)
+			op = bytes;
+
+		if (op < blocksize) {
+			bh = __bread(bdev, block, blocksize);
+			if (!bh) {
+				ntfs_err(sb, "failed to read block %llx",
+					 (u64)block);
+				return -EIO;
+			}
+		} else {
+			bh = __getblk(bdev, block, blocksize);
+			if (!bh)
+				return -ENOMEM;
+		}
+
+		if (buffer_locked(bh))
+			__wait_on_buffer(bh);
+
+		lock_buffer(bh);
+		if (buf) {
+			memcpy(bh->b_data + off, buf, op);
+			buf = Add2Ptr(buf, op);
+		} else {
+			memset(bh->b_data + off, -1, op);
+		}
+
+		set_buffer_uptodate(bh);
+		mark_buffer_dirty(bh);
+		unlock_buffer(bh);
+
+		if (wait) {
+			int err = sync_dirty_buffer(bh);
+
+			if (err) {
+				ntfs_err(
+					sb,
+					"failed to sync buffer at block %llx, error %d",
+					(u64)block, err);
+				put_bh(bh);
+				return err;
+			}
+		}
+
+		put_bh(bh);
+
+		bytes -= op;
+	}
+	return 0;
+}
+
+int ntfs_sb_write_run(struct ntfs_sb_info *sbi, const struct runs_tree *run,
+		      u64 vbo, const void *buf, size_t bytes)
+{
+	struct super_block *sb = sbi->sb;
+	u8 cluster_bits = sbi->cluster_bits;
+	u32 off = vbo & sbi->cluster_mask;
+	CLST lcn, clen, vcn = vbo >> cluster_bits, vcn_next;
+	u64 lbo, len;
+	size_t idx;
+
+	if (!run_lookup_entry(run, vcn, &lcn, &clen, &idx))
+		return -ENOENT;
+
+	if (lcn == SPARSE_LCN)
+		return -EINVAL;
+
+	lbo = ((u64)lcn << cluster_bits) + off;
+	len = ((u64)clen << cluster_bits) - off;
+
+	for (;;) {
+		u32 op = len < bytes ? len : bytes;
+		int err = ntfs_sb_write(sb, lbo, op, buf, 0);
+
+		if (err)
+			return err;
+
+		bytes -= op;
+		if (!bytes)
+			break;
+
+		vcn_next = vcn + clen;
+		if (!run_get_entry(run, ++idx, &vcn, &lcn, &clen) ||
+		    vcn != vcn_next)
+			return -ENOENT;
+
+		if (lcn == SPARSE_LCN)
+			return -EINVAL;
+
+		if (buf)
+			buf = Add2Ptr(buf, op);
+
+		lbo = ((u64)lcn << cluster_bits);
+		len = ((u64)clen << cluster_bits);
+	}
+
+	return 0;
+}
+
+struct buffer_head *ntfs_bread_run(struct ntfs_sb_info *sbi,
+				   const struct runs_tree *run, u64 vbo)
+{
+	struct super_block *sb = sbi->sb;
+	u8 cluster_bits = sbi->cluster_bits;
+	CLST lcn;
+	u64 lbo;
+
+	if (!run_lookup_entry(run, vbo >> cluster_bits, &lcn, NULL, NULL))
+		return ERR_PTR(-ENOENT);
+
+	lbo = ((u64)lcn << cluster_bits) + (vbo & sbi->cluster_mask);
+
+	return ntfs_bread(sb, lbo >> sb->s_blocksize_bits);
+}
+
+int ntfs_read_run_nb(struct ntfs_sb_info *sbi, const struct runs_tree *run,
+		     u64 vbo, void *buf, u32 bytes, struct ntfs_buffers *nb)
+{
+	int err;
+	struct super_block *sb = sbi->sb;
+	u32 blocksize = sb->s_blocksize;
+	u8 cluster_bits = sbi->cluster_bits;
+	u32 off = vbo & sbi->cluster_mask;
+	u32 nbh = 0;
+	CLST vcn_next, vcn = vbo >> cluster_bits;
+	CLST lcn, clen;
+	u64 lbo, len;
+	size_t idx;
+	struct buffer_head *bh;
+
+	if (!run) {
+		/* First reading of $Volume + $MFTMirr + $LogFile goes here. */
+		if (vbo > MFT_REC_VOL * sbi->record_size) {
+			err = -ENOENT;
+			goto out;
+		}
+
+		/* Use absolute boot's 'MFTCluster' to read record. */
+		lbo = vbo + sbi->mft.lbo;
+		len = sbi->record_size;
+	} else if (!run_lookup_entry(run, vcn, &lcn, &clen, &idx)) {
+		err = -ENOENT;
+		goto out;
+	} else {
+		if (lcn == SPARSE_LCN) {
+			err = -EINVAL;
+			goto out;
+		}
+
+		lbo = ((u64)lcn << cluster_bits) + off;
+		len = ((u64)clen << cluster_bits) - off;
+	}
+
+	off = lbo & (blocksize - 1);
+	if (nb) {
+		nb->off = off;
+		nb->bytes = bytes;
+	}
+
+	for (;;) {
+		u32 len32 = len >= bytes ? bytes : len;
+		sector_t block = lbo >> sb->s_blocksize_bits;
+
+		do {
+			u32 op = blocksize - off;
+
+			if (op > len32)
+				op = len32;
+
+			bh = ntfs_bread(sb, block);
+			if (!bh) {
+				err = -EIO;
+				goto out;
+			}
+
+			if (buf) {
+				memcpy(buf, bh->b_data + off, op);
+				buf = Add2Ptr(buf, op);
+			}
+
+			if (!nb) {
+				put_bh(bh);
+			} else if (nbh >= ARRAY_SIZE(nb->bh)) {
+				err = -EINVAL;
+				goto out;
+			} else {
+				nb->bh[nbh++] = bh;
+				nb->nbufs = nbh;
+			}
+
+			bytes -= op;
+			if (!bytes)
+				return 0;
+			len32 -= op;
+			block += 1;
+			off = 0;
+
+		} while (len32);
+
+		vcn_next = vcn + clen;
+		if (!run_get_entry(run, ++idx, &vcn, &lcn, &clen) ||
+		    vcn != vcn_next) {
+			err = -ENOENT;
+			goto out;
+		}
+
+		if (lcn == SPARSE_LCN) {
+			err = -EINVAL;
+			goto out;
+		}
+
+		lbo = ((u64)lcn << cluster_bits);
+		len = ((u64)clen << cluster_bits);
+	}
+
+out:
+	if (!nbh)
+		return err;
+
+	while (nbh) {
+		put_bh(nb->bh[--nbh]);
+		nb->bh[nbh] = NULL;
+	}
+
+	nb->nbufs = 0;
+	return err;
+}
+
+/*
+ * ntfs_read_bh
+ *
+ * Return: < 0 if error, 0 if ok, -E_NTFS_FIXUP if need to update fixups.
+ */
+int ntfs_read_bh(struct ntfs_sb_info *sbi, const struct runs_tree *run, u64 vbo,
+		 struct NTFS_RECORD_HEADER *rhdr, u32 bytes,
+		 struct ntfs_buffers *nb)
+{
+	int err = ntfs_read_run_nb(sbi, run, vbo, rhdr, bytes, nb);
+
+	if (err)
+		return err;
+	return ntfs_fix_post_read(rhdr, nb->bytes, true);
+}
+
+int ntfs_get_bh(struct ntfs_sb_info *sbi, const struct runs_tree *run, u64 vbo,
+		u32 bytes, struct ntfs_buffers *nb)
+{
+	int err = 0;
+	struct super_block *sb = sbi->sb;
+	u32 blocksize = sb->s_blocksize;
+	u8 cluster_bits = sbi->cluster_bits;
+	CLST vcn_next, vcn = vbo >> cluster_bits;
+	u32 off;
+	u32 nbh = 0;
+	CLST lcn, clen;
+	u64 lbo, len;
+	size_t idx;
+
+	nb->bytes = bytes;
+
+	if (!run_lookup_entry(run, vcn, &lcn, &clen, &idx)) {
+		err = -ENOENT;
+		goto out;
+	}
+
+	off = vbo & sbi->cluster_mask;
+	lbo = ((u64)lcn << cluster_bits) + off;
+	len = ((u64)clen << cluster_bits) - off;
+
+	nb->off = off = lbo & (blocksize - 1);
+
+	for (;;) {
+		u32 len32 = len < bytes ? len : bytes;
+		sector_t block = lbo >> sb->s_blocksize_bits;
+
+		do {
+			u32 op;
+			struct buffer_head *bh;
+
+			if (nbh >= ARRAY_SIZE(nb->bh)) {
+				err = -EINVAL;
+				goto out;
+			}
+
+			op = blocksize - off;
+			if (op > len32)
+				op = len32;
+
+			if (op == blocksize) {
+				bh = sb_getblk(sb, block);
+				if (!bh) {
+					err = -ENOMEM;
+					goto out;
+				}
+				if (buffer_locked(bh))
+					__wait_on_buffer(bh);
+				set_buffer_uptodate(bh);
+			} else {
+				bh = ntfs_bread(sb, block);
+				if (!bh) {
+					err = -EIO;
+					goto out;
+				}
+			}
+
+			nb->bh[nbh++] = bh;
+			bytes -= op;
+			if (!bytes) {
+				nb->nbufs = nbh;
+				return 0;
+			}
+
+			block += 1;
+			len32 -= op;
+			off = 0;
+		} while (len32);
+
+		vcn_next = vcn + clen;
+		if (!run_get_entry(run, ++idx, &vcn, &lcn, &clen) ||
+		    vcn != vcn_next) {
+			err = -ENOENT;
+			goto out;
+		}
+
+		lbo = ((u64)lcn << cluster_bits);
+		len = ((u64)clen << cluster_bits);
+	}
+
+out:
+	while (nbh) {
+		put_bh(nb->bh[--nbh]);
+		nb->bh[nbh] = NULL;
+	}
+
+	nb->nbufs = 0;
+
+	return err;
+}
+
+int ntfs_write_bh(struct ntfs_sb_info *sbi, struct NTFS_RECORD_HEADER *rhdr,
+		  struct ntfs_buffers *nb, int sync)
+{
+	int err = 0;
+	struct super_block *sb = sbi->sb;
+	u32 block_size = sb->s_blocksize;
+	u32 bytes = nb->bytes;
+	u32 off = nb->off;
+	u16 fo = le16_to_cpu(rhdr->fix_off);
+	u16 fn = le16_to_cpu(rhdr->fix_num);
+	u32 idx;
+	__le16 *fixup;
+	__le16 sample;
+
+	if ((fo & 1) || fo + fn * sizeof(short) > SECTOR_SIZE || !fn-- ||
+	    fn * SECTOR_SIZE > bytes) {
+		return -EINVAL;
+	}
+
+	for (idx = 0; bytes && idx < nb->nbufs; idx += 1, off = 0) {
+		u32 op = block_size - off;
+		char *bh_data;
+		struct buffer_head *bh = nb->bh[idx];
+		__le16 *ptr, *end_data;
+
+		if (op > bytes)
+			op = bytes;
+
+		if (buffer_locked(bh))
+			__wait_on_buffer(bh);
+
+		lock_buffer(nb->bh[idx]);
+
+		bh_data = bh->b_data + off;
+		end_data = Add2Ptr(bh_data, op);
+		memcpy(bh_data, rhdr, op);
+
+		if (!idx) {
+			u16 t16;
+
+			fixup = Add2Ptr(bh_data, fo);
+			sample = *fixup;
+			t16 = le16_to_cpu(sample);
+			if (t16 >= 0x7FFF) {
+				sample = *fixup = cpu_to_le16(1);
+			} else {
+				sample = cpu_to_le16(t16 + 1);
+				*fixup = sample;
+			}
+
+			*(__le16 *)Add2Ptr(rhdr, fo) = sample;
+		}
+
+		ptr = Add2Ptr(bh_data, SECTOR_SIZE - sizeof(short));
+
+		do {
+			*++fixup = *ptr;
+			*ptr = sample;
+			ptr += SECTOR_SIZE / sizeof(short);
+		} while (ptr < end_data);
+
+		set_buffer_uptodate(bh);
+		mark_buffer_dirty(bh);
+		unlock_buffer(bh);
+
+		if (sync) {
+			int err2 = sync_dirty_buffer(bh);
+
+			if (!err && err2)
+				err = err2;
+		}
+
+		bytes -= op;
+		rhdr = Add2Ptr(rhdr, op);
+	}
+
+	return err;
+}
+
+static inline struct bio *ntfs_alloc_bio(u32 nr_vecs)
+{
+	struct bio *bio = bio_alloc(GFP_NOFS | __GFP_HIGH, nr_vecs);
+
+	if (!bio && (current->flags & PF_MEMALLOC)) {
+		while (!bio && (nr_vecs /= 2))
+			bio = bio_alloc(GFP_NOFS | __GFP_HIGH, nr_vecs);
+	}
+	return bio;
+}
+
+/*
+ * ntfs_bio_pages - Read/write pages from/to disk.
+ */
+int ntfs_bio_pages(struct ntfs_sb_info *sbi, const struct runs_tree *run,
+		   struct page **pages, u32 nr_pages, u64 vbo, u32 bytes,
+		   u32 op)
+{
+	int err = 0;
+	struct bio *new, *bio = NULL;
+	struct super_block *sb = sbi->sb;
+	struct block_device *bdev = sb->s_bdev;
+	struct page *page;
+	u8 cluster_bits = sbi->cluster_bits;
+	CLST lcn, clen, vcn, vcn_next;
+	u32 add, off, page_idx;
+	u64 lbo, len;
+	size_t run_idx;
+	struct blk_plug plug;
+
+	if (!bytes)
+		return 0;
+
+	blk_start_plug(&plug);
+
+	/* Align vbo and bytes to be 512 bytes aligned. */
+	lbo = (vbo + bytes + 511) & ~511ull;
+	vbo = vbo & ~511ull;
+	bytes = lbo - vbo;
+
+	vcn = vbo >> cluster_bits;
+	if (!run_lookup_entry(run, vcn, &lcn, &clen, &run_idx)) {
+		err = -ENOENT;
+		goto out;
+	}
+	off = vbo & sbi->cluster_mask;
+	page_idx = 0;
+	page = pages[0];
+
+	for (;;) {
+		lbo = ((u64)lcn << cluster_bits) + off;
+		len = ((u64)clen << cluster_bits) - off;
+new_bio:
+		new = ntfs_alloc_bio(nr_pages - page_idx);
+		if (!new) {
+			err = -ENOMEM;
+			goto out;
+		}
+		if (bio) {
+			bio_chain(bio, new);
+			submit_bio(bio);
+		}
+		bio = new;
+		bio_set_dev(bio, bdev);
+		bio->bi_iter.bi_sector = lbo >> 9;
+		bio->bi_opf = op;
+
+		while (len) {
+			off = vbo & (PAGE_SIZE - 1);
+			add = off + len > PAGE_SIZE ? (PAGE_SIZE - off) : len;
+
+			if (bio_add_page(bio, page, add, off) < add)
+				goto new_bio;
+
+			if (bytes <= add)
+				goto out;
+			bytes -= add;
+			vbo += add;
+
+			if (add + off == PAGE_SIZE) {
+				page_idx += 1;
+				if (WARN_ON(page_idx >= nr_pages)) {
+					err = -EINVAL;
+					goto out;
+				}
+				page = pages[page_idx];
+			}
+
+			if (len <= add)
+				break;
+			len -= add;
+			lbo += add;
+		}
+
+		vcn_next = vcn + clen;
+		if (!run_get_entry(run, ++run_idx, &vcn, &lcn, &clen) ||
+		    vcn != vcn_next) {
+			err = -ENOENT;
+			goto out;
+		}
+		off = 0;
+	}
+out:
+	if (bio) {
+		if (!err)
+			err = submit_bio_wait(bio);
+		bio_put(bio);
+	}
+	blk_finish_plug(&plug);
+
+	return err;
+}
+
+/*
+ * ntfs_bio_fill_1 - Helper for ntfs_loadlog_and_replay().
+ *
+ * Fill on-disk logfile range by (-1)
+ * this means empty logfile.
+ */
+int ntfs_bio_fill_1(struct ntfs_sb_info *sbi, const struct runs_tree *run)
+{
+	int err = 0;
+	struct super_block *sb = sbi->sb;
+	struct block_device *bdev = sb->s_bdev;
+	u8 cluster_bits = sbi->cluster_bits;
+	struct bio *new, *bio = NULL;
+	CLST lcn, clen;
+	u64 lbo, len;
+	size_t run_idx;
+	struct page *fill;
+	void *kaddr;
+	struct blk_plug plug;
+
+	fill = alloc_page(GFP_KERNEL);
+	if (!fill)
+		return -ENOMEM;
+
+	kaddr = kmap_atomic(fill);
+	memset(kaddr, -1, PAGE_SIZE);
+	kunmap_atomic(kaddr);
+	flush_dcache_page(fill);
+	lock_page(fill);
+
+	if (!run_lookup_entry(run, 0, &lcn, &clen, &run_idx)) {
+		err = -ENOENT;
+		goto out;
+	}
+
+	/*
+	 * TODO: Try blkdev_issue_write_same.
+	 */
+	blk_start_plug(&plug);
+	do {
+		lbo = (u64)lcn << cluster_bits;
+		len = (u64)clen << cluster_bits;
+new_bio:
+		new = ntfs_alloc_bio(BIO_MAX_VECS);
+		if (!new) {
+			err = -ENOMEM;
+			break;
+		}
+		if (bio) {
+			bio_chain(bio, new);
+			submit_bio(bio);
+		}
+		bio = new;
+		bio_set_dev(bio, bdev);
+		bio->bi_opf = REQ_OP_WRITE;
+		bio->bi_iter.bi_sector = lbo >> 9;
+
+		for (;;) {
+			u32 add = len > PAGE_SIZE ? PAGE_SIZE : len;
+
+			if (bio_add_page(bio, fill, add, 0) < add)
+				goto new_bio;
+
+			lbo += add;
+			if (len <= add)
+				break;
+			len -= add;
+		}
+	} while (run_get_entry(run, ++run_idx, NULL, &lcn, &clen));
+
+	if (bio) {
+		if (!err)
+			err = submit_bio_wait(bio);
+		bio_put(bio);
+	}
+	blk_finish_plug(&plug);
+out:
+	unlock_page(fill);
+	put_page(fill);
+
+	return err;
+}
+
+int ntfs_vbo_to_lbo(struct ntfs_sb_info *sbi, const struct runs_tree *run,
+		    u64 vbo, u64 *lbo, u64 *bytes)
+{
+	u32 off;
+	CLST lcn, len;
+	u8 cluster_bits = sbi->cluster_bits;
+
+	if (!run_lookup_entry(run, vbo >> cluster_bits, &lcn, &len, NULL))
+		return -ENOENT;
+
+	off = vbo & sbi->cluster_mask;
+	*lbo = lcn == SPARSE_LCN ? -1 : (((u64)lcn << cluster_bits) + off);
+	*bytes = ((u64)len << cluster_bits) - off;
+
+	return 0;
+}
+
+struct ntfs_inode *ntfs_new_inode(struct ntfs_sb_info *sbi, CLST rno, bool dir)
+{
+	int err = 0;
+	struct super_block *sb = sbi->sb;
+	struct inode *inode = new_inode(sb);
+	struct ntfs_inode *ni;
+
+	if (!inode)
+		return ERR_PTR(-ENOMEM);
+
+	ni = ntfs_i(inode);
+
+	err = mi_format_new(&ni->mi, sbi, rno, dir ? RECORD_FLAG_DIR : 0,
+			    false);
+	if (err)
+		goto out;
+
+	inode->i_ino = rno;
+	if (insert_inode_locked(inode) < 0) {
+		err = -EIO;
+		goto out;
+	}
+
+out:
+	if (err) {
+		iput(inode);
+		ni = ERR_PTR(err);
+	}
+	return ni;
+}
+
+/*
+ * O:BAG:BAD:(A;OICI;FA;;;WD)
+ * Owner S-1-5-32-544 (Administrators)
+ * Group S-1-5-32-544 (Administrators)
+ * ACE: allow S-1-1-0 (Everyone) with FILE_ALL_ACCESS
+ */
+const u8 s_default_security[] __aligned(8) = {
+	0x01, 0x00, 0x04, 0x80, 0x30, 0x00, 0x00, 0x00, 0x40, 0x00, 0x00, 0x00,
+	0x00, 0x00, 0x00, 0x00, 0x14, 0x00, 0x00, 0x00, 0x02, 0x00, 0x1C, 0x00,
+	0x01, 0x00, 0x00, 0x00, 0x00, 0x03, 0x14, 0x00, 0xFF, 0x01, 0x1F, 0x00,
+	0x01, 0x01, 0x00, 0x00, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x00,
+	0x01, 0x02, 0x00, 0x00, 0x00, 0x00, 0x00, 0x05, 0x20, 0x00, 0x00, 0x00,
+	0x20, 0x02, 0x00, 0x00, 0x01, 0x02, 0x00, 0x00, 0x00, 0x00, 0x00, 0x05,
+	0x20, 0x00, 0x00, 0x00, 0x20, 0x02, 0x00, 0x00,
+};
+
+static_assert(sizeof(s_default_security) == 0x50);
+
+static inline u32 sid_length(const struct SID *sid)
+{
+	return struct_size(sid, SubAuthority, sid->SubAuthorityCount);
+}
+
+/*
+ * is_acl_valid
+ *
+ * Thanks Mark Harmstone for idea.
+ */
+static bool is_acl_valid(const struct ACL *acl, u32 len)
+{
+	const struct ACE_HEADER *ace;
+	u32 i;
+	u16 ace_count, ace_size;
+
+	if (acl->AclRevision != ACL_REVISION &&
+	    acl->AclRevision != ACL_REVISION_DS) {
+		/*
+		 * This value should be ACL_REVISION, unless the ACL contains an
+		 * object-specific ACE, in which case this value must be ACL_REVISION_DS.
+		 * All ACEs in an ACL must be at the same revision level.
+		 */
+		return false;
+	}
+
+	if (acl->Sbz1)
+		return false;
+
+	if (le16_to_cpu(acl->AclSize) > len)
+		return false;
+
+	if (acl->Sbz2)
+		return false;
+
+	len -= sizeof(struct ACL);
+	ace = (struct ACE_HEADER *)&acl[1];
+	ace_count = le16_to_cpu(acl->AceCount);
+
+	for (i = 0; i < ace_count; i++) {
+		if (len < sizeof(struct ACE_HEADER))
+			return false;
+
+		ace_size = le16_to_cpu(ace->AceSize);
+		if (len < ace_size)
+			return false;
+
+		len -= ace_size;
+		ace = Add2Ptr(ace, ace_size);
+	}
+
+	return true;
+}
+
+bool is_sd_valid(const struct SECURITY_DESCRIPTOR_RELATIVE *sd, u32 len)
+{
+	u32 sd_owner, sd_group, sd_sacl, sd_dacl;
+
+	if (len < sizeof(struct SECURITY_DESCRIPTOR_RELATIVE))
+		return false;
+
+	if (sd->Revision != 1)
+		return false;
+
+	if (sd->Sbz1)
+		return false;
+
+	if (!(sd->Control & SE_SELF_RELATIVE))
+		return false;
+
+	sd_owner = le32_to_cpu(sd->Owner);
+	if (sd_owner) {
+		const struct SID *owner = Add2Ptr(sd, sd_owner);
+
+		if (sd_owner + offsetof(struct SID, SubAuthority) > len)
+			return false;
+
+		if (owner->Revision != 1)
+			return false;
+
+		if (sd_owner + sid_length(owner) > len)
+			return false;
+	}
+
+	sd_group = le32_to_cpu(sd->Group);
+	if (sd_group) {
+		const struct SID *group = Add2Ptr(sd, sd_group);
+
+		if (sd_group + offsetof(struct SID, SubAuthority) > len)
+			return false;
+
+		if (group->Revision != 1)
+			return false;
+
+		if (sd_group + sid_length(group) > len)
+			return false;
+	}
+
+	sd_sacl = le32_to_cpu(sd->Sacl);
+	if (sd_sacl) {
+		const struct ACL *sacl = Add2Ptr(sd, sd_sacl);
+
+		if (sd_sacl + sizeof(struct ACL) > len)
+			return false;
+
+		if (!is_acl_valid(sacl, len - sd_sacl))
+			return false;
+	}
+
+	sd_dacl = le32_to_cpu(sd->Dacl);
+	if (sd_dacl) {
+		const struct ACL *dacl = Add2Ptr(sd, sd_dacl);
+
+		if (sd_dacl + sizeof(struct ACL) > len)
+			return false;
+
+		if (!is_acl_valid(dacl, len - sd_dacl))
+			return false;
+	}
+
+	return true;
+}
+
+/*
+ * ntfs_security_init - Load and parse $Secure.
+ */
+int ntfs_security_init(struct ntfs_sb_info *sbi)
+{
+	int err;
+	struct super_block *sb = sbi->sb;
+	struct inode *inode;
+	struct ntfs_inode *ni;
+	struct MFT_REF ref;
+	struct ATTRIB *attr;
+	struct ATTR_LIST_ENTRY *le;
+	u64 sds_size;
+	size_t off;
+	struct NTFS_DE *ne;
+	struct NTFS_DE_SII *sii_e;
+	struct ntfs_fnd *fnd_sii = NULL;
+	const struct INDEX_ROOT *root_sii;
+	const struct INDEX_ROOT *root_sdh;
+	struct ntfs_index *indx_sdh = &sbi->security.index_sdh;
+	struct ntfs_index *indx_sii = &sbi->security.index_sii;
+
+	ref.low = cpu_to_le32(MFT_REC_SECURE);
+	ref.high = 0;
+	ref.seq = cpu_to_le16(MFT_REC_SECURE);
+
+	inode = ntfs_iget5(sb, &ref, &NAME_SECURE);
+	if (IS_ERR(inode)) {
+		err = PTR_ERR(inode);
+		ntfs_err(sb, "Failed to load $Secure.");
+		inode = NULL;
+		goto out;
+	}
+
+	ni = ntfs_i(inode);
+
+	le = NULL;
+
+	attr = ni_find_attr(ni, NULL, &le, ATTR_ROOT, SDH_NAME,
+			    ARRAY_SIZE(SDH_NAME), NULL, NULL);
+	if (!attr) {
+		err = -EINVAL;
+		goto out;
+	}
+
+	root_sdh = resident_data(attr);
+	if (root_sdh->type != ATTR_ZERO ||
+	    root_sdh->rule != NTFS_COLLATION_TYPE_SECURITY_HASH) {
+		err = -EINVAL;
+		goto out;
+	}
+
+	err = indx_init(indx_sdh, sbi, attr, INDEX_MUTEX_SDH);
+	if (err)
+		goto out;
+
+	attr = ni_find_attr(ni, attr, &le, ATTR_ROOT, SII_NAME,
+			    ARRAY_SIZE(SII_NAME), NULL, NULL);
+	if (!attr) {
+		err = -EINVAL;
+		goto out;
+	}
+
+	root_sii = resident_data(attr);
+	if (root_sii->type != ATTR_ZERO ||
+	    root_sii->rule != NTFS_COLLATION_TYPE_UINT) {
+		err = -EINVAL;
+		goto out;
+	}
+
+	err = indx_init(indx_sii, sbi, attr, INDEX_MUTEX_SII);
+	if (err)
+		goto out;
+
+	fnd_sii = fnd_get();
+	if (!fnd_sii) {
+		err = -ENOMEM;
+		goto out;
+	}
+
+	sds_size = inode->i_size;
+
+	/* Find the last valid Id. */
+	sbi->security.next_id = SECURITY_ID_FIRST;
+	/* Always write new security at the end of bucket. */
+	sbi->security.next_off =
+		ALIGN(sds_size - SecurityDescriptorsBlockSize, 16);
+
+	off = 0;
+	ne = NULL;
+
+	for (;;) {
+		u32 next_id;
+
+		err = indx_find_raw(indx_sii, ni, root_sii, &ne, &off, fnd_sii);
+		if (err || !ne)
+			break;
+
+		sii_e = (struct NTFS_DE_SII *)ne;
+		if (le16_to_cpu(ne->view.data_size) < SIZEOF_SECURITY_HDR)
+			continue;
+
+		next_id = le32_to_cpu(sii_e->sec_id) + 1;
+		if (next_id >= sbi->security.next_id)
+			sbi->security.next_id = next_id;
+	}
+
+	sbi->security.ni = ni;
+	inode = NULL;
+out:
+	iput(inode);
+	fnd_put(fnd_sii);
+
+	return err;
+}
+
+/*
+ * ntfs_get_security_by_id - Read security descriptor by id.
+ */
+int ntfs_get_security_by_id(struct ntfs_sb_info *sbi, __le32 security_id,
+			    struct SECURITY_DESCRIPTOR_RELATIVE **sd,
+			    size_t *size)
+{
+	int err;
+	int diff;
+	struct ntfs_inode *ni = sbi->security.ni;
+	struct ntfs_index *indx = &sbi->security.index_sii;
+	void *p = NULL;
+	struct NTFS_DE_SII *sii_e;
+	struct ntfs_fnd *fnd_sii;
+	struct SECURITY_HDR d_security;
+	const struct INDEX_ROOT *root_sii;
+	u32 t32;
+
+	*sd = NULL;
+
+	mutex_lock_nested(&ni->ni_lock, NTFS_INODE_MUTEX_SECURITY);
+
+	fnd_sii = fnd_get();
+	if (!fnd_sii) {
+		err = -ENOMEM;
+		goto out;
+	}
+
+	root_sii = indx_get_root(indx, ni, NULL, NULL);
+	if (!root_sii) {
+		err = -EINVAL;
+		goto out;
+	}
+
+	/* Try to find this SECURITY descriptor in SII indexes. */
+	err = indx_find(indx, ni, root_sii, &security_id, sizeof(security_id),
+			NULL, &diff, (struct NTFS_DE **)&sii_e, fnd_sii);
+	if (err)
+		goto out;
+
+	if (diff)
+		goto out;
+
+	t32 = le32_to_cpu(sii_e->sec_hdr.size);
+	if (t32 < SIZEOF_SECURITY_HDR) {
+		err = -EINVAL;
+		goto out;
+	}
+
+	if (t32 > SIZEOF_SECURITY_HDR + 0x10000) {
+		/* Looks like too big security. 0x10000 - is arbitrary big number. */
+		err = -EFBIG;
+		goto out;
+	}
+
+	*size = t32 - SIZEOF_SECURITY_HDR;
+
+	p = kmalloc(*size, GFP_NOFS);
+	if (!p) {
+		err = -ENOMEM;
+		goto out;
+	}
+
+	err = ntfs_read_run_nb(sbi, &ni->file.run,
+			       le64_to_cpu(sii_e->sec_hdr.off), &d_security,
+			       sizeof(d_security), NULL);
+	if (err)
+		goto out;
+
+	if (memcmp(&d_security, &sii_e->sec_hdr, SIZEOF_SECURITY_HDR)) {
+		err = -EINVAL;
+		goto out;
+	}
+
+	err = ntfs_read_run_nb(sbi, &ni->file.run,
+			       le64_to_cpu(sii_e->sec_hdr.off) +
+				       SIZEOF_SECURITY_HDR,
+			       p, *size, NULL);
+	if (err)
+		goto out;
+
+	*sd = p;
+	p = NULL;
+
+out:
+	kfree(p);
+	fnd_put(fnd_sii);
+	ni_unlock(ni);
+
+	return err;
+}
+
+/*
+ * ntfs_insert_security - Insert security descriptor into $Secure::SDS.
+ *
+ * SECURITY Descriptor Stream data is organized into chunks of 256K bytes
+ * and it contains a mirror copy of each security descriptor.  When writing
+ * to a security descriptor at location X, another copy will be written at
+ * location (X+256K).
+ * When writing a security descriptor that will cross the 256K boundary,
+ * the pointer will be advanced by 256K to skip
+ * over the mirror portion.
+ */
+int ntfs_insert_security(struct ntfs_sb_info *sbi,
+			 const struct SECURITY_DESCRIPTOR_RELATIVE *sd,
+			 u32 size_sd, __le32 *security_id, bool *inserted)
+{
+	int err, diff;
+	struct ntfs_inode *ni = sbi->security.ni;
+	struct ntfs_index *indx_sdh = &sbi->security.index_sdh;
+	struct ntfs_index *indx_sii = &sbi->security.index_sii;
+	struct NTFS_DE_SDH *e;
+	struct NTFS_DE_SDH sdh_e;
+	struct NTFS_DE_SII sii_e;
+	struct SECURITY_HDR *d_security;
+	u32 new_sec_size = size_sd + SIZEOF_SECURITY_HDR;
+	u32 aligned_sec_size = ALIGN(new_sec_size, 16);
+	struct SECURITY_KEY hash_key;
+	struct ntfs_fnd *fnd_sdh = NULL;
+	const struct INDEX_ROOT *root_sdh;
+	const struct INDEX_ROOT *root_sii;
+	u64 mirr_off, new_sds_size;
+	u32 next, left;
+
+	static_assert((1 << Log2OfSecurityDescriptorsBlockSize) ==
+		      SecurityDescriptorsBlockSize);
+
+	hash_key.hash = security_hash(sd, size_sd);
+	hash_key.sec_id = SECURITY_ID_INVALID;
+
+	if (inserted)
+		*inserted = false;
+	*security_id = SECURITY_ID_INVALID;
+
+	/* Allocate a temporal buffer. */
+	d_security = kzalloc(aligned_sec_size, GFP_NOFS);
+	if (!d_security)
+		return -ENOMEM;
+
+	mutex_lock_nested(&ni->ni_lock, NTFS_INODE_MUTEX_SECURITY);
+
+	fnd_sdh = fnd_get();
+	if (!fnd_sdh) {
+		err = -ENOMEM;
+		goto out;
+	}
+
+	root_sdh = indx_get_root(indx_sdh, ni, NULL, NULL);
+	if (!root_sdh) {
+		err = -EINVAL;
+		goto out;
+	}
+
+	root_sii = indx_get_root(indx_sii, ni, NULL, NULL);
+	if (!root_sii) {
+		err = -EINVAL;
+		goto out;
+	}
+
+	/*
+	 * Check if such security already exists.
+	 * Use "SDH" and hash -> to get the offset in "SDS".
+	 */
+	err = indx_find(indx_sdh, ni, root_sdh, &hash_key, sizeof(hash_key),
+			&d_security->key.sec_id, &diff, (struct NTFS_DE **)&e,
+			fnd_sdh);
+	if (err)
+		goto out;
+
+	while (e) {
+		if (le32_to_cpu(e->sec_hdr.size) == new_sec_size) {
+			err = ntfs_read_run_nb(sbi, &ni->file.run,
+					       le64_to_cpu(e->sec_hdr.off),
+					       d_security, new_sec_size, NULL);
+			if (err)
+				goto out;
+
+			if (le32_to_cpu(d_security->size) == new_sec_size &&
+			    d_security->key.hash == hash_key.hash &&
+			    !memcmp(d_security + 1, sd, size_sd)) {
+				*security_id = d_security->key.sec_id;
+				/* Such security already exists. */
+				err = 0;
+				goto out;
+			}
+		}
+
+		err = indx_find_sort(indx_sdh, ni, root_sdh,
+				     (struct NTFS_DE **)&e, fnd_sdh);
+		if (err)
+			goto out;
+
+		if (!e || e->key.hash != hash_key.hash)
+			break;
+	}
+
+	/* Zero unused space. */
+	next = sbi->security.next_off & (SecurityDescriptorsBlockSize - 1);
+	left = SecurityDescriptorsBlockSize - next;
+
+	/* Zero gap until SecurityDescriptorsBlockSize. */
+	if (left < new_sec_size) {
+		/* Zero "left" bytes from sbi->security.next_off. */
+		sbi->security.next_off += SecurityDescriptorsBlockSize + left;
+	}
+
+	/* Zero tail of previous security. */
+	//used = ni->vfs_inode.i_size & (SecurityDescriptorsBlockSize - 1);
+
+	/*
+	 * Example:
+	 * 0x40438 == ni->vfs_inode.i_size
+	 * 0x00440 == sbi->security.next_off
+	 * need to zero [0x438-0x440)
+	 * if (next > used) {
+	 *  u32 tozero = next - used;
+	 *  zero "tozero" bytes from sbi->security.next_off - tozero
+	 */
+
+	/* Format new security descriptor. */
+	d_security->key.hash = hash_key.hash;
+	d_security->key.sec_id = cpu_to_le32(sbi->security.next_id);
+	d_security->off = cpu_to_le64(sbi->security.next_off);
+	d_security->size = cpu_to_le32(new_sec_size);
+	memcpy(d_security + 1, sd, size_sd);
+
+	/* Write main SDS bucket. */
+	err = ntfs_sb_write_run(sbi, &ni->file.run, sbi->security.next_off,
+				d_security, aligned_sec_size);
+
+	if (err)
+		goto out;
+
+	mirr_off = sbi->security.next_off + SecurityDescriptorsBlockSize;
+	new_sds_size = mirr_off + aligned_sec_size;
+
+	if (new_sds_size > ni->vfs_inode.i_size) {
+		err = attr_set_size(ni, ATTR_DATA, SDS_NAME,
+				    ARRAY_SIZE(SDS_NAME), &ni->file.run,
+				    new_sds_size, &new_sds_size, false, NULL);
+		if (err)
+			goto out;
+	}
+
+	/* Write copy SDS bucket. */
+	err = ntfs_sb_write_run(sbi, &ni->file.run, mirr_off, d_security,
+				aligned_sec_size);
+	if (err)
+		goto out;
+
+	/* Fill SII entry. */
+	sii_e.de.view.data_off =
+		cpu_to_le16(offsetof(struct NTFS_DE_SII, sec_hdr));
+	sii_e.de.view.data_size = cpu_to_le16(SIZEOF_SECURITY_HDR);
+	sii_e.de.view.res = 0;
+	sii_e.de.size = cpu_to_le16(SIZEOF_SII_DIRENTRY);
+	sii_e.de.key_size = cpu_to_le16(sizeof(d_security->key.sec_id));
+	sii_e.de.flags = 0;
+	sii_e.de.res = 0;
+	sii_e.sec_id = d_security->key.sec_id;
+	memcpy(&sii_e.sec_hdr, d_security, SIZEOF_SECURITY_HDR);
+
+	err = indx_insert_entry(indx_sii, ni, &sii_e.de, NULL, NULL, 0);
+	if (err)
+		goto out;
+
+	/* Fill SDH entry. */
+	sdh_e.de.view.data_off =
+		cpu_to_le16(offsetof(struct NTFS_DE_SDH, sec_hdr));
+	sdh_e.de.view.data_size = cpu_to_le16(SIZEOF_SECURITY_HDR);
+	sdh_e.de.view.res = 0;
+	sdh_e.de.size = cpu_to_le16(SIZEOF_SDH_DIRENTRY);
+	sdh_e.de.key_size = cpu_to_le16(sizeof(sdh_e.key));
+	sdh_e.de.flags = 0;
+	sdh_e.de.res = 0;
+	sdh_e.key.hash = d_security->key.hash;
+	sdh_e.key.sec_id = d_security->key.sec_id;
+	memcpy(&sdh_e.sec_hdr, d_security, SIZEOF_SECURITY_HDR);
+	sdh_e.magic[0] = cpu_to_le16('I');
+	sdh_e.magic[1] = cpu_to_le16('I');
+
+	fnd_clear(fnd_sdh);
+	err = indx_insert_entry(indx_sdh, ni, &sdh_e.de, (void *)(size_t)1,
+				fnd_sdh, 0);
+	if (err)
+		goto out;
+
+	*security_id = d_security->key.sec_id;
+	if (inserted)
+		*inserted = true;
+
+	/* Update Id and offset for next descriptor. */
+	sbi->security.next_id += 1;
+	sbi->security.next_off += aligned_sec_size;
+
+out:
+	fnd_put(fnd_sdh);
+	mark_inode_dirty(&ni->vfs_inode);
+	ni_unlock(ni);
+	kfree(d_security);
+
+	return err;
+}
+
+/*
+ * ntfs_reparse_init - Load and parse $Extend/$Reparse.
+ */
+int ntfs_reparse_init(struct ntfs_sb_info *sbi)
+{
+	int err;
+	struct ntfs_inode *ni = sbi->reparse.ni;
+	struct ntfs_index *indx = &sbi->reparse.index_r;
+	struct ATTRIB *attr;
+	struct ATTR_LIST_ENTRY *le;
+	const struct INDEX_ROOT *root_r;
+
+	if (!ni)
+		return 0;
+
+	le = NULL;
+	attr = ni_find_attr(ni, NULL, &le, ATTR_ROOT, SR_NAME,
+			    ARRAY_SIZE(SR_NAME), NULL, NULL);
+	if (!attr) {
+		err = -EINVAL;
+		goto out;
+	}
+
+	root_r = resident_data(attr);
+	if (root_r->type != ATTR_ZERO ||
+	    root_r->rule != NTFS_COLLATION_TYPE_UINTS) {
+		err = -EINVAL;
+		goto out;
+	}
+
+	err = indx_init(indx, sbi, attr, INDEX_MUTEX_SR);
+	if (err)
+		goto out;
+
+out:
+	return err;
+}
+
+/*
+ * ntfs_objid_init - Load and parse $Extend/$ObjId.
+ */
+int ntfs_objid_init(struct ntfs_sb_info *sbi)
+{
+	int err;
+	struct ntfs_inode *ni = sbi->objid.ni;
+	struct ntfs_index *indx = &sbi->objid.index_o;
+	struct ATTRIB *attr;
+	struct ATTR_LIST_ENTRY *le;
+	const struct INDEX_ROOT *root;
+
+	if (!ni)
+		return 0;
+
+	le = NULL;
+	attr = ni_find_attr(ni, NULL, &le, ATTR_ROOT, SO_NAME,
+			    ARRAY_SIZE(SO_NAME), NULL, NULL);
+	if (!attr) {
+		err = -EINVAL;
+		goto out;
+	}
+
+	root = resident_data(attr);
+	if (root->type != ATTR_ZERO ||
+	    root->rule != NTFS_COLLATION_TYPE_UINTS) {
+		err = -EINVAL;
+		goto out;
+	}
+
+	err = indx_init(indx, sbi, attr, INDEX_MUTEX_SO);
+	if (err)
+		goto out;
+
+out:
+	return err;
+}
+
+int ntfs_objid_remove(struct ntfs_sb_info *sbi, struct GUID *guid)
+{
+	int err;
+	struct ntfs_inode *ni = sbi->objid.ni;
+	struct ntfs_index *indx = &sbi->objid.index_o;
+
+	if (!ni)
+		return -EINVAL;
+
+	mutex_lock_nested(&ni->ni_lock, NTFS_INODE_MUTEX_OBJID);
+
+	err = indx_delete_entry(indx, ni, guid, sizeof(*guid), NULL);
+
+	mark_inode_dirty(&ni->vfs_inode);
+	ni_unlock(ni);
+
+	return err;
+}
+
+int ntfs_insert_reparse(struct ntfs_sb_info *sbi, __le32 rtag,
+			const struct MFT_REF *ref)
+{
+	int err;
+	struct ntfs_inode *ni = sbi->reparse.ni;
+	struct ntfs_index *indx = &sbi->reparse.index_r;
+	struct NTFS_DE_R re;
+
+	if (!ni)
+		return -EINVAL;
+
+	memset(&re, 0, sizeof(re));
+
+	re.de.view.data_off = cpu_to_le16(offsetof(struct NTFS_DE_R, zero));
+	re.de.size = cpu_to_le16(sizeof(struct NTFS_DE_R));
+	re.de.key_size = cpu_to_le16(sizeof(re.key));
+
+	re.key.ReparseTag = rtag;
+	memcpy(&re.key.ref, ref, sizeof(*ref));
+
+	mutex_lock_nested(&ni->ni_lock, NTFS_INODE_MUTEX_REPARSE);
+
+	err = indx_insert_entry(indx, ni, &re.de, NULL, NULL, 0);
+
+	mark_inode_dirty(&ni->vfs_inode);
+	ni_unlock(ni);
+
+	return err;
+}
+
+int ntfs_remove_reparse(struct ntfs_sb_info *sbi, __le32 rtag,
+			const struct MFT_REF *ref)
+{
+	int err, diff;
+	struct ntfs_inode *ni = sbi->reparse.ni;
+	struct ntfs_index *indx = &sbi->reparse.index_r;
+	struct ntfs_fnd *fnd = NULL;
+	struct REPARSE_KEY rkey;
+	struct NTFS_DE_R *re;
+	struct INDEX_ROOT *root_r;
+
+	if (!ni)
+		return -EINVAL;
+
+	rkey.ReparseTag = rtag;
+	rkey.ref = *ref;
+
+	mutex_lock_nested(&ni->ni_lock, NTFS_INODE_MUTEX_REPARSE);
+
+	if (rtag) {
+		err = indx_delete_entry(indx, ni, &rkey, sizeof(rkey), NULL);
+		goto out1;
+	}
+
+	fnd = fnd_get();
+	if (!fnd) {
+		err = -ENOMEM;
+		goto out1;
+	}
+
+	root_r = indx_get_root(indx, ni, NULL, NULL);
+	if (!root_r) {
+		err = -EINVAL;
+		goto out;
+	}
+
+	/* 1 - forces to ignore rkey.ReparseTag when comparing keys. */
+	err = indx_find(indx, ni, root_r, &rkey, sizeof(rkey), (void *)1, &diff,
+			(struct NTFS_DE **)&re, fnd);
+	if (err)
+		goto out;
+
+	if (memcmp(&re->key.ref, ref, sizeof(*ref))) {
+		/* Impossible. Looks like volume corrupt? */
+		goto out;
+	}
+
+	memcpy(&rkey, &re->key, sizeof(rkey));
+
+	fnd_put(fnd);
+	fnd = NULL;
+
+	err = indx_delete_entry(indx, ni, &rkey, sizeof(rkey), NULL);
+	if (err)
+		goto out;
+
+out:
+	fnd_put(fnd);
+
+out1:
+	mark_inode_dirty(&ni->vfs_inode);
+	ni_unlock(ni);
+
+	return err;
+}
+
+static inline void ntfs_unmap_and_discard(struct ntfs_sb_info *sbi, CLST lcn,
+					  CLST len)
+{
+	ntfs_unmap_meta(sbi->sb, lcn, len);
+	ntfs_discard(sbi, lcn, len);
+}
+
+void mark_as_free_ex(struct ntfs_sb_info *sbi, CLST lcn, CLST len, bool trim)
+{
+	CLST end, i;
+	struct wnd_bitmap *wnd = &sbi->used.bitmap;
+
+	down_write_nested(&wnd->rw_lock, BITMAP_MUTEX_CLUSTERS);
+	if (!wnd_is_used(wnd, lcn, len)) {
+		ntfs_set_state(sbi, NTFS_DIRTY_ERROR);
+
+		end = lcn + len;
+		len = 0;
+		for (i = lcn; i < end; i++) {
+			if (wnd_is_used(wnd, i, 1)) {
+				if (!len)
+					lcn = i;
+				len += 1;
+				continue;
+			}
+
+			if (!len)
+				continue;
+
+			if (trim)
+				ntfs_unmap_and_discard(sbi, lcn, len);
+
+			wnd_set_free(wnd, lcn, len);
+			len = 0;
+		}
+
+		if (!len)
+			goto out;
+	}
+
+	if (trim)
+		ntfs_unmap_and_discard(sbi, lcn, len);
+	wnd_set_free(wnd, lcn, len);
+
+out:
+	up_write(&wnd->rw_lock);
+}
+
+/*
+ * run_deallocate - Deallocate clusters.
+ */
+int run_deallocate(struct ntfs_sb_info *sbi, struct runs_tree *run, bool trim)
+{
+	CLST lcn, len;
+	size_t idx = 0;
+
+	while (run_get_entry(run, idx++, NULL, &lcn, &len)) {
+		if (lcn == SPARSE_LCN)
+			continue;
+
+		mark_as_free_ex(sbi, lcn, len, trim);
+	}
+
+	return 0;
+}
diff --git a/fs/ntfs3/index.c b/fs/ntfs3/index.c
new file mode 100644
index 000000000000..0daca9adc54c
--- /dev/null
+++ b/fs/ntfs3/index.c
@@ -0,0 +1,2650 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ *
+ * Copyright (C) 2019-2021 Paragon Software GmbH, All rights reserved.
+ *
+ */
+
+#include <linux/blkdev.h>
+#include <linux/buffer_head.h>
+#include <linux/fs.h>
+#include <linux/nls.h>
+
+#include "debug.h"
+#include "ntfs.h"
+#include "ntfs_fs.h"
+
+static const struct INDEX_NAMES {
+	const __le16 *name;
+	u8 name_len;
+} s_index_names[INDEX_MUTEX_TOTAL] = {
+	{ I30_NAME, ARRAY_SIZE(I30_NAME) }, { SII_NAME, ARRAY_SIZE(SII_NAME) },
+	{ SDH_NAME, ARRAY_SIZE(SDH_NAME) }, { SO_NAME, ARRAY_SIZE(SO_NAME) },
+	{ SQ_NAME, ARRAY_SIZE(SQ_NAME) },   { SR_NAME, ARRAY_SIZE(SR_NAME) },
+};
+
+/*
+ * cmp_fnames - Compare two names in index.
+ *
+ * if l1 != 0
+ *   Both names are little endian on-disk ATTR_FILE_NAME structs.
+ * else
+ *   key1 - cpu_str, key2 - ATTR_FILE_NAME
+ */
+static int cmp_fnames(const void *key1, size_t l1, const void *key2, size_t l2,
+		      const void *data)
+{
+	const struct ATTR_FILE_NAME *f2 = key2;
+	const struct ntfs_sb_info *sbi = data;
+	const struct ATTR_FILE_NAME *f1;
+	u16 fsize2;
+	bool both_case;
+
+	if (l2 <= offsetof(struct ATTR_FILE_NAME, name))
+		return -1;
+
+	fsize2 = fname_full_size(f2);
+	if (l2 < fsize2)
+		return -1;
+
+	both_case = f2->type != FILE_NAME_DOS /*&& !sbi->options.nocase*/;
+	if (!l1) {
+		const struct le_str *s2 = (struct le_str *)&f2->name_len;
+
+		/*
+		 * If names are equal (case insensitive)
+		 * try to compare it case sensitive.
+		 */
+		return ntfs_cmp_names_cpu(key1, s2, sbi->upcase, both_case);
+	}
+
+	f1 = key1;
+	return ntfs_cmp_names(f1->name, f1->name_len, f2->name, f2->name_len,
+			      sbi->upcase, both_case);
+}
+
+/*
+ * cmp_uint - $SII of $Secure and $Q of Quota
+ */
+static int cmp_uint(const void *key1, size_t l1, const void *key2, size_t l2,
+		    const void *data)
+{
+	const u32 *k1 = key1;
+	const u32 *k2 = key2;
+
+	if (l2 < sizeof(u32))
+		return -1;
+
+	if (*k1 < *k2)
+		return -1;
+	if (*k1 > *k2)
+		return 1;
+	return 0;
+}
+
+/*
+ * cmp_sdh - $SDH of $Secure
+ */
+static int cmp_sdh(const void *key1, size_t l1, const void *key2, size_t l2,
+		   const void *data)
+{
+	const struct SECURITY_KEY *k1 = key1;
+	const struct SECURITY_KEY *k2 = key2;
+	u32 t1, t2;
+
+	if (l2 < sizeof(struct SECURITY_KEY))
+		return -1;
+
+	t1 = le32_to_cpu(k1->hash);
+	t2 = le32_to_cpu(k2->hash);
+
+	/* First value is a hash value itself. */
+	if (t1 < t2)
+		return -1;
+	if (t1 > t2)
+		return 1;
+
+	/* Second value is security Id. */
+	if (data) {
+		t1 = le32_to_cpu(k1->sec_id);
+		t2 = le32_to_cpu(k2->sec_id);
+		if (t1 < t2)
+			return -1;
+		if (t1 > t2)
+			return 1;
+	}
+
+	return 0;
+}
+
+/*
+ * cmp_uints - $O of ObjId and "$R" for Reparse.
+ */
+static int cmp_uints(const void *key1, size_t l1, const void *key2, size_t l2,
+		     const void *data)
+{
+	const __le32 *k1 = key1;
+	const __le32 *k2 = key2;
+	size_t count;
+
+	if ((size_t)data == 1) {
+		/*
+		 * ni_delete_all -> ntfs_remove_reparse ->
+		 * delete all with this reference.
+		 * k1, k2 - pointers to REPARSE_KEY
+		 */
+
+		k1 += 1; // Skip REPARSE_KEY.ReparseTag
+		k2 += 1; // Skip REPARSE_KEY.ReparseTag
+		if (l2 <= sizeof(int))
+			return -1;
+		l2 -= sizeof(int);
+		if (l1 <= sizeof(int))
+			return 1;
+		l1 -= sizeof(int);
+	}
+
+	if (l2 < sizeof(int))
+		return -1;
+
+	for (count = min(l1, l2) >> 2; count > 0; --count, ++k1, ++k2) {
+		u32 t1 = le32_to_cpu(*k1);
+		u32 t2 = le32_to_cpu(*k2);
+
+		if (t1 > t2)
+			return 1;
+		if (t1 < t2)
+			return -1;
+	}
+
+	if (l1 > l2)
+		return 1;
+	if (l1 < l2)
+		return -1;
+
+	return 0;
+}
+
+static inline NTFS_CMP_FUNC get_cmp_func(const struct INDEX_ROOT *root)
+{
+	switch (root->type) {
+	case ATTR_NAME:
+		if (root->rule == NTFS_COLLATION_TYPE_FILENAME)
+			return &cmp_fnames;
+		break;
+	case ATTR_ZERO:
+		switch (root->rule) {
+		case NTFS_COLLATION_TYPE_UINT:
+			return &cmp_uint;
+		case NTFS_COLLATION_TYPE_SECURITY_HASH:
+			return &cmp_sdh;
+		case NTFS_COLLATION_TYPE_UINTS:
+			return &cmp_uints;
+		default:
+			break;
+		}
+		break;
+	default:
+		break;
+	}
+
+	return NULL;
+}
+
+struct bmp_buf {
+	struct ATTRIB *b;
+	struct mft_inode *mi;
+	struct buffer_head *bh;
+	ulong *buf;
+	size_t bit;
+	u32 nbits;
+	u64 new_valid;
+};
+
+static int bmp_buf_get(struct ntfs_index *indx, struct ntfs_inode *ni,
+		       size_t bit, struct bmp_buf *bbuf)
+{
+	struct ATTRIB *b;
+	size_t data_size, valid_size, vbo, off = bit >> 3;
+	struct ntfs_sb_info *sbi = ni->mi.sbi;
+	CLST vcn = off >> sbi->cluster_bits;
+	struct ATTR_LIST_ENTRY *le = NULL;
+	struct buffer_head *bh;
+	struct super_block *sb;
+	u32 blocksize;
+	const struct INDEX_NAMES *in = &s_index_names[indx->type];
+
+	bbuf->bh = NULL;
+
+	b = ni_find_attr(ni, NULL, &le, ATTR_BITMAP, in->name, in->name_len,
+			 &vcn, &bbuf->mi);
+	bbuf->b = b;
+	if (!b)
+		return -EINVAL;
+
+	if (!b->non_res) {
+		data_size = le32_to_cpu(b->res.data_size);
+
+		if (off >= data_size)
+			return -EINVAL;
+
+		bbuf->buf = (ulong *)resident_data(b);
+		bbuf->bit = 0;
+		bbuf->nbits = data_size * 8;
+
+		return 0;
+	}
+
+	data_size = le64_to_cpu(b->nres.data_size);
+	if (WARN_ON(off >= data_size)) {
+		/* Looks like filesystem error. */
+		return -EINVAL;
+	}
+
+	valid_size = le64_to_cpu(b->nres.valid_size);
+
+	bh = ntfs_bread_run(sbi, &indx->bitmap_run, off);
+	if (!bh)
+		return -EIO;
+
+	if (IS_ERR(bh))
+		return PTR_ERR(bh);
+
+	bbuf->bh = bh;
+
+	if (buffer_locked(bh))
+		__wait_on_buffer(bh);
+
+	lock_buffer(bh);
+
+	sb = sbi->sb;
+	blocksize = sb->s_blocksize;
+
+	vbo = off & ~(size_t)sbi->block_mask;
+
+	bbuf->new_valid = vbo + blocksize;
+	if (bbuf->new_valid <= valid_size)
+		bbuf->new_valid = 0;
+	else if (bbuf->new_valid > data_size)
+		bbuf->new_valid = data_size;
+
+	if (vbo >= valid_size) {
+		memset(bh->b_data, 0, blocksize);
+	} else if (vbo + blocksize > valid_size) {
+		u32 voff = valid_size & sbi->block_mask;
+
+		memset(bh->b_data + voff, 0, blocksize - voff);
+	}
+
+	bbuf->buf = (ulong *)bh->b_data;
+	bbuf->bit = 8 * (off & ~(size_t)sbi->block_mask);
+	bbuf->nbits = 8 * blocksize;
+
+	return 0;
+}
+
+static void bmp_buf_put(struct bmp_buf *bbuf, bool dirty)
+{
+	struct buffer_head *bh = bbuf->bh;
+	struct ATTRIB *b = bbuf->b;
+
+	if (!bh) {
+		if (b && !b->non_res && dirty)
+			bbuf->mi->dirty = true;
+		return;
+	}
+
+	if (!dirty)
+		goto out;
+
+	if (bbuf->new_valid) {
+		b->nres.valid_size = cpu_to_le64(bbuf->new_valid);
+		bbuf->mi->dirty = true;
+	}
+
+	set_buffer_uptodate(bh);
+	mark_buffer_dirty(bh);
+
+out:
+	unlock_buffer(bh);
+	put_bh(bh);
+}
+
+/*
+ * indx_mark_used - Mark the bit @bit as used.
+ */
+static int indx_mark_used(struct ntfs_index *indx, struct ntfs_inode *ni,
+			  size_t bit)
+{
+	int err;
+	struct bmp_buf bbuf;
+
+	err = bmp_buf_get(indx, ni, bit, &bbuf);
+	if (err)
+		return err;
+
+	__set_bit(bit - bbuf.bit, bbuf.buf);
+
+	bmp_buf_put(&bbuf, true);
+
+	return 0;
+}
+
+/*
+ * indx_mark_free - Mark the bit @bit as free.
+ */
+static int indx_mark_free(struct ntfs_index *indx, struct ntfs_inode *ni,
+			  size_t bit)
+{
+	int err;
+	struct bmp_buf bbuf;
+
+	err = bmp_buf_get(indx, ni, bit, &bbuf);
+	if (err)
+		return err;
+
+	__clear_bit(bit - bbuf.bit, bbuf.buf);
+
+	bmp_buf_put(&bbuf, true);
+
+	return 0;
+}
+
+/*
+ * scan_nres_bitmap
+ *
+ * If ntfs_readdir calls this function (indx_used_bit -> scan_nres_bitmap),
+ * inode is shared locked and no ni_lock.
+ * Use rw_semaphore for read/write access to bitmap_run.
+ */
+static int scan_nres_bitmap(struct ntfs_inode *ni, struct ATTRIB *bitmap,
+			    struct ntfs_index *indx, size_t from,
+			    bool (*fn)(const ulong *buf, u32 bit, u32 bits,
+				       size_t *ret),
+			    size_t *ret)
+{
+	struct ntfs_sb_info *sbi = ni->mi.sbi;
+	struct super_block *sb = sbi->sb;
+	struct runs_tree *run = &indx->bitmap_run;
+	struct rw_semaphore *lock = &indx->run_lock;
+	u32 nbits = sb->s_blocksize * 8;
+	u32 blocksize = sb->s_blocksize;
+	u64 valid_size = le64_to_cpu(bitmap->nres.valid_size);
+	u64 data_size = le64_to_cpu(bitmap->nres.data_size);
+	sector_t eblock = bytes_to_block(sb, data_size);
+	size_t vbo = from >> 3;
+	sector_t blk = (vbo & sbi->cluster_mask) >> sb->s_blocksize_bits;
+	sector_t vblock = vbo >> sb->s_blocksize_bits;
+	sector_t blen, block;
+	CLST lcn, clen, vcn, vcn_next;
+	size_t idx;
+	struct buffer_head *bh;
+	bool ok;
+
+	*ret = MINUS_ONE_T;
+
+	if (vblock >= eblock)
+		return 0;
+
+	from &= nbits - 1;
+	vcn = vbo >> sbi->cluster_bits;
+
+	down_read(lock);
+	ok = run_lookup_entry(run, vcn, &lcn, &clen, &idx);
+	up_read(lock);
+
+next_run:
+	if (!ok) {
+		int err;
+		const struct INDEX_NAMES *name = &s_index_names[indx->type];
+
+		down_write(lock);
+		err = attr_load_runs_vcn(ni, ATTR_BITMAP, name->name,
+					 name->name_len, run, vcn);
+		up_write(lock);
+		if (err)
+			return err;
+		down_read(lock);
+		ok = run_lookup_entry(run, vcn, &lcn, &clen, &idx);
+		up_read(lock);
+		if (!ok)
+			return -EINVAL;
+	}
+
+	blen = (sector_t)clen * sbi->blocks_per_cluster;
+	block = (sector_t)lcn * sbi->blocks_per_cluster;
+
+	for (; blk < blen; blk++, from = 0) {
+		bh = ntfs_bread(sb, block + blk);
+		if (!bh)
+			return -EIO;
+
+		vbo = (u64)vblock << sb->s_blocksize_bits;
+		if (vbo >= valid_size) {
+			memset(bh->b_data, 0, blocksize);
+		} else if (vbo + blocksize > valid_size) {
+			u32 voff = valid_size & sbi->block_mask;
+
+			memset(bh->b_data + voff, 0, blocksize - voff);
+		}
+
+		if (vbo + blocksize > data_size)
+			nbits = 8 * (data_size - vbo);
+
+		ok = nbits > from ? (*fn)((ulong *)bh->b_data, from, nbits, ret)
+				  : false;
+		put_bh(bh);
+
+		if (ok) {
+			*ret += 8 * vbo;
+			return 0;
+		}
+
+		if (++vblock >= eblock) {
+			*ret = MINUS_ONE_T;
+			return 0;
+		}
+	}
+	blk = 0;
+	vcn_next = vcn + clen;
+	down_read(lock);
+	ok = run_get_entry(run, ++idx, &vcn, &lcn, &clen) && vcn == vcn_next;
+	if (!ok)
+		vcn = vcn_next;
+	up_read(lock);
+	goto next_run;
+}
+
+static bool scan_for_free(const ulong *buf, u32 bit, u32 bits, size_t *ret)
+{
+	size_t pos = find_next_zero_bit(buf, bits, bit);
+
+	if (pos >= bits)
+		return false;
+	*ret = pos;
+	return true;
+}
+
+/*
+ * indx_find_free - Look for free bit.
+ *
+ * Return: -1 if no free bits.
+ */
+static int indx_find_free(struct ntfs_index *indx, struct ntfs_inode *ni,
+			  size_t *bit, struct ATTRIB **bitmap)
+{
+	struct ATTRIB *b;
+	struct ATTR_LIST_ENTRY *le = NULL;
+	const struct INDEX_NAMES *in = &s_index_names[indx->type];
+	int err;
+
+	b = ni_find_attr(ni, NULL, &le, ATTR_BITMAP, in->name, in->name_len,
+			 NULL, NULL);
+
+	if (!b)
+		return -ENOENT;
+
+	*bitmap = b;
+	*bit = MINUS_ONE_T;
+
+	if (!b->non_res) {
+		u32 nbits = 8 * le32_to_cpu(b->res.data_size);
+		size_t pos = find_next_zero_bit(resident_data(b), nbits, 0);
+
+		if (pos < nbits)
+			*bit = pos;
+	} else {
+		err = scan_nres_bitmap(ni, b, indx, 0, &scan_for_free, bit);
+
+		if (err)
+			return err;
+	}
+
+	return 0;
+}
+
+static bool scan_for_used(const ulong *buf, u32 bit, u32 bits, size_t *ret)
+{
+	size_t pos = find_next_bit(buf, bits, bit);
+
+	if (pos >= bits)
+		return false;
+	*ret = pos;
+	return true;
+}
+
+/*
+ * indx_used_bit - Look for used bit.
+ *
+ * Return: MINUS_ONE_T if no used bits.
+ */
+int indx_used_bit(struct ntfs_index *indx, struct ntfs_inode *ni, size_t *bit)
+{
+	struct ATTRIB *b;
+	struct ATTR_LIST_ENTRY *le = NULL;
+	size_t from = *bit;
+	const struct INDEX_NAMES *in = &s_index_names[indx->type];
+	int err;
+
+	b = ni_find_attr(ni, NULL, &le, ATTR_BITMAP, in->name, in->name_len,
+			 NULL, NULL);
+
+	if (!b)
+		return -ENOENT;
+
+	*bit = MINUS_ONE_T;
+
+	if (!b->non_res) {
+		u32 nbits = le32_to_cpu(b->res.data_size) * 8;
+		size_t pos = find_next_bit(resident_data(b), nbits, from);
+
+		if (pos < nbits)
+			*bit = pos;
+	} else {
+		err = scan_nres_bitmap(ni, b, indx, from, &scan_for_used, bit);
+		if (err)
+			return err;
+	}
+
+	return 0;
+}
+
+/*
+ * hdr_find_split
+ *
+ * Find a point at which the index allocation buffer would like to be split.
+ * NOTE: This function should never return 'END' entry NULL returns on error.
+ */
+static const struct NTFS_DE *hdr_find_split(const struct INDEX_HDR *hdr)
+{
+	size_t o;
+	const struct NTFS_DE *e = hdr_first_de(hdr);
+	u32 used_2 = le32_to_cpu(hdr->used) >> 1;
+	u16 esize;
+
+	if (!e || de_is_last(e))
+		return NULL;
+
+	esize = le16_to_cpu(e->size);
+	for (o = le32_to_cpu(hdr->de_off) + esize; o < used_2; o += esize) {
+		const struct NTFS_DE *p = e;
+
+		e = Add2Ptr(hdr, o);
+
+		/* We must not return END entry. */
+		if (de_is_last(e))
+			return p;
+
+		esize = le16_to_cpu(e->size);
+	}
+
+	return e;
+}
+
+/*
+ * hdr_insert_head - Insert some entries at the beginning of the buffer.
+ *
+ * It is used to insert entries into a newly-created buffer.
+ */
+static const struct NTFS_DE *hdr_insert_head(struct INDEX_HDR *hdr,
+					     const void *ins, u32 ins_bytes)
+{
+	u32 to_move;
+	struct NTFS_DE *e = hdr_first_de(hdr);
+	u32 used = le32_to_cpu(hdr->used);
+
+	if (!e)
+		return NULL;
+
+	/* Now we just make room for the inserted entries and jam it in. */
+	to_move = used - le32_to_cpu(hdr->de_off);
+	memmove(Add2Ptr(e, ins_bytes), e, to_move);
+	memcpy(e, ins, ins_bytes);
+	hdr->used = cpu_to_le32(used + ins_bytes);
+
+	return e;
+}
+
+void fnd_clear(struct ntfs_fnd *fnd)
+{
+	int i;
+
+	for (i = 0; i < fnd->level; i++) {
+		struct indx_node *n = fnd->nodes[i];
+
+		if (!n)
+			continue;
+
+		put_indx_node(n);
+		fnd->nodes[i] = NULL;
+	}
+	fnd->level = 0;
+	fnd->root_de = NULL;
+}
+
+static int fnd_push(struct ntfs_fnd *fnd, struct indx_node *n,
+		    struct NTFS_DE *e)
+{
+	int i;
+
+	i = fnd->level;
+	if (i < 0 || i >= ARRAY_SIZE(fnd->nodes))
+		return -EINVAL;
+	fnd->nodes[i] = n;
+	fnd->de[i] = e;
+	fnd->level += 1;
+	return 0;
+}
+
+static struct indx_node *fnd_pop(struct ntfs_fnd *fnd)
+{
+	struct indx_node *n;
+	int i = fnd->level;
+
+	i -= 1;
+	n = fnd->nodes[i];
+	fnd->nodes[i] = NULL;
+	fnd->level = i;
+
+	return n;
+}
+
+static bool fnd_is_empty(struct ntfs_fnd *fnd)
+{
+	if (!fnd->level)
+		return !fnd->root_de;
+
+	return !fnd->de[fnd->level - 1];
+}
+
+/*
+ * hdr_find_e - Locate an entry the index buffer.
+ *
+ * If no matching entry is found, it returns the first entry which is greater
+ * than the desired entry If the search key is greater than all the entries the
+ * buffer, it returns the 'end' entry. This function does a binary search of the
+ * current index buffer, for the first entry that is <= to the search value.
+ *
+ * Return: NULL if error.
+ */
+static struct NTFS_DE *hdr_find_e(const struct ntfs_index *indx,
+				  const struct INDEX_HDR *hdr, const void *key,
+				  size_t key_len, const void *ctx, int *diff)
+{
+	struct NTFS_DE *e;
+	NTFS_CMP_FUNC cmp = indx->cmp;
+	u32 e_size, e_key_len;
+	u32 end = le32_to_cpu(hdr->used);
+	u32 off = le32_to_cpu(hdr->de_off);
+
+#ifdef NTFS3_INDEX_BINARY_SEARCH
+	int max_idx = 0, fnd, min_idx;
+	int nslots = 64;
+	u16 *offs;
+
+	if (end > 0x10000)
+		goto next;
+
+	offs = kmalloc(sizeof(u16) * nslots, GFP_NOFS);
+	if (!offs)
+		goto next;
+
+	/* Use binary search algorithm. */
+next1:
+	if (off + sizeof(struct NTFS_DE) > end) {
+		e = NULL;
+		goto out1;
+	}
+	e = Add2Ptr(hdr, off);
+	e_size = le16_to_cpu(e->size);
+
+	if (e_size < sizeof(struct NTFS_DE) || off + e_size > end) {
+		e = NULL;
+		goto out1;
+	}
+
+	if (max_idx >= nslots) {
+		u16 *ptr;
+		int new_slots = ALIGN(2 * nslots, 8);
+
+		ptr = kmalloc(sizeof(u16) * new_slots, GFP_NOFS);
+		if (ptr)
+			memcpy(ptr, offs, sizeof(u16) * max_idx);
+		kfree(offs);
+		offs = ptr;
+		nslots = new_slots;
+		if (!ptr)
+			goto next;
+	}
+
+	/* Store entry table. */
+	offs[max_idx] = off;
+
+	if (!de_is_last(e)) {
+		off += e_size;
+		max_idx += 1;
+		goto next1;
+	}
+
+	/*
+	 * Table of pointers is created.
+	 * Use binary search to find entry that is <= to the search value.
+	 */
+	fnd = -1;
+	min_idx = 0;
+
+	while (min_idx <= max_idx) {
+		int mid_idx = min_idx + ((max_idx - min_idx) >> 1);
+		int diff2;
+
+		e = Add2Ptr(hdr, offs[mid_idx]);
+
+		e_key_len = le16_to_cpu(e->key_size);
+
+		diff2 = (*cmp)(key, key_len, e + 1, e_key_len, ctx);
+
+		if (!diff2) {
+			*diff = 0;
+			goto out1;
+		}
+
+		if (diff2 < 0) {
+			max_idx = mid_idx - 1;
+			fnd = mid_idx;
+			if (!fnd)
+				break;
+		} else {
+			min_idx = mid_idx + 1;
+		}
+	}
+
+	if (fnd == -1) {
+		e = NULL;
+		goto out1;
+	}
+
+	*diff = -1;
+	e = Add2Ptr(hdr, offs[fnd]);
+
+out1:
+	kfree(offs);
+
+	return e;
+#endif
+
+next:
+	/*
+	 * Entries index are sorted.
+	 * Enumerate all entries until we find entry
+	 * that is <= to the search value.
+	 */
+	if (off + sizeof(struct NTFS_DE) > end)
+		return NULL;
+
+	e = Add2Ptr(hdr, off);
+	e_size = le16_to_cpu(e->size);
+
+	if (e_size < sizeof(struct NTFS_DE) || off + e_size > end)
+		return NULL;
+
+	off += e_size;
+
+	e_key_len = le16_to_cpu(e->key_size);
+
+	*diff = (*cmp)(key, key_len, e + 1, e_key_len, ctx);
+	if (!*diff)
+		return e;
+
+	if (*diff <= 0)
+		return e;
+
+	if (de_is_last(e)) {
+		*diff = 1;
+		return e;
+	}
+	goto next;
+}
+
+/*
+ * hdr_insert_de - Insert an index entry into the buffer.
+ *
+ * 'before' should be a pointer previously returned from hdr_find_e.
+ */
+static struct NTFS_DE *hdr_insert_de(const struct ntfs_index *indx,
+				     struct INDEX_HDR *hdr,
+				     const struct NTFS_DE *de,
+				     struct NTFS_DE *before, const void *ctx)
+{
+	int diff;
+	size_t off = PtrOffset(hdr, before);
+	u32 used = le32_to_cpu(hdr->used);
+	u32 total = le32_to_cpu(hdr->total);
+	u16 de_size = le16_to_cpu(de->size);
+
+	/* First, check to see if there's enough room. */
+	if (used + de_size > total)
+		return NULL;
+
+	/* We know there's enough space, so we know we'll succeed. */
+	if (before) {
+		/* Check that before is inside Index. */
+		if (off >= used || off < le32_to_cpu(hdr->de_off) ||
+		    off + le16_to_cpu(before->size) > total) {
+			return NULL;
+		}
+		goto ok;
+	}
+	/* No insert point is applied. Get it manually. */
+	before = hdr_find_e(indx, hdr, de + 1, le16_to_cpu(de->key_size), ctx,
+			    &diff);
+	if (!before)
+		return NULL;
+	off = PtrOffset(hdr, before);
+
+ok:
+	/* Now we just make room for the entry and jam it in. */
+	memmove(Add2Ptr(before, de_size), before, used - off);
+
+	hdr->used = cpu_to_le32(used + de_size);
+	memcpy(before, de, de_size);
+
+	return before;
+}
+
+/*
+ * hdr_delete_de - Remove an entry from the index buffer.
+ */
+static inline struct NTFS_DE *hdr_delete_de(struct INDEX_HDR *hdr,
+					    struct NTFS_DE *re)
+{
+	u32 used = le32_to_cpu(hdr->used);
+	u16 esize = le16_to_cpu(re->size);
+	u32 off = PtrOffset(hdr, re);
+	int bytes = used - (off + esize);
+
+	if (off >= used || esize < sizeof(struct NTFS_DE) ||
+	    bytes < sizeof(struct NTFS_DE))
+		return NULL;
+
+	hdr->used = cpu_to_le32(used - esize);
+	memmove(re, Add2Ptr(re, esize), bytes);
+
+	return re;
+}
+
+void indx_clear(struct ntfs_index *indx)
+{
+	run_close(&indx->alloc_run);
+	run_close(&indx->bitmap_run);
+}
+
+int indx_init(struct ntfs_index *indx, struct ntfs_sb_info *sbi,
+	      const struct ATTRIB *attr, enum index_mutex_classed type)
+{
+	u32 t32;
+	const struct INDEX_ROOT *root = resident_data(attr);
+
+	/* Check root fields. */
+	if (!root->index_block_clst)
+		return -EINVAL;
+
+	indx->type = type;
+	indx->idx2vbn_bits = __ffs(root->index_block_clst);
+
+	t32 = le32_to_cpu(root->index_block_size);
+	indx->index_bits = blksize_bits(t32);
+
+	/* Check index record size. */
+	if (t32 < sbi->cluster_size) {
+		/* Index record is smaller than a cluster, use 512 blocks. */
+		if (t32 != root->index_block_clst * SECTOR_SIZE)
+			return -EINVAL;
+
+		/* Check alignment to a cluster. */
+		if ((sbi->cluster_size >> SECTOR_SHIFT) &
+		    (root->index_block_clst - 1)) {
+			return -EINVAL;
+		}
+
+		indx->vbn2vbo_bits = SECTOR_SHIFT;
+	} else {
+		/* Index record must be a multiple of cluster size. */
+		if (t32 != root->index_block_clst << sbi->cluster_bits)
+			return -EINVAL;
+
+		indx->vbn2vbo_bits = sbi->cluster_bits;
+	}
+
+	init_rwsem(&indx->run_lock);
+
+	indx->cmp = get_cmp_func(root);
+	return indx->cmp ? 0 : -EINVAL;
+}
+
+static struct indx_node *indx_new(struct ntfs_index *indx,
+				  struct ntfs_inode *ni, CLST vbn,
+				  const __le64 *sub_vbn)
+{
+	int err;
+	struct NTFS_DE *e;
+	struct indx_node *r;
+	struct INDEX_HDR *hdr;
+	struct INDEX_BUFFER *index;
+	u64 vbo = (u64)vbn << indx->vbn2vbo_bits;
+	u32 bytes = 1u << indx->index_bits;
+	u16 fn;
+	u32 eo;
+
+	r = kzalloc(sizeof(struct indx_node), GFP_NOFS);
+	if (!r)
+		return ERR_PTR(-ENOMEM);
+
+	index = kzalloc(bytes, GFP_NOFS);
+	if (!index) {
+		kfree(r);
+		return ERR_PTR(-ENOMEM);
+	}
+
+	err = ntfs_get_bh(ni->mi.sbi, &indx->alloc_run, vbo, bytes, &r->nb);
+
+	if (err) {
+		kfree(index);
+		kfree(r);
+		return ERR_PTR(err);
+	}
+
+	/* Create header. */
+	index->rhdr.sign = NTFS_INDX_SIGNATURE;
+	index->rhdr.fix_off = cpu_to_le16(sizeof(struct INDEX_BUFFER)); // 0x28
+	fn = (bytes >> SECTOR_SHIFT) + 1; // 9
+	index->rhdr.fix_num = cpu_to_le16(fn);
+	index->vbn = cpu_to_le64(vbn);
+	hdr = &index->ihdr;
+	eo = ALIGN(sizeof(struct INDEX_BUFFER) + fn * sizeof(short), 8);
+	hdr->de_off = cpu_to_le32(eo);
+
+	e = Add2Ptr(hdr, eo);
+
+	if (sub_vbn) {
+		e->flags = NTFS_IE_LAST | NTFS_IE_HAS_SUBNODES;
+		e->size = cpu_to_le16(sizeof(struct NTFS_DE) + sizeof(u64));
+		hdr->used =
+			cpu_to_le32(eo + sizeof(struct NTFS_DE) + sizeof(u64));
+		de_set_vbn_le(e, *sub_vbn);
+		hdr->flags = 1;
+	} else {
+		e->size = cpu_to_le16(sizeof(struct NTFS_DE));
+		hdr->used = cpu_to_le32(eo + sizeof(struct NTFS_DE));
+		e->flags = NTFS_IE_LAST;
+	}
+
+	hdr->total = cpu_to_le32(bytes - offsetof(struct INDEX_BUFFER, ihdr));
+
+	r->index = index;
+	return r;
+}
+
+struct INDEX_ROOT *indx_get_root(struct ntfs_index *indx, struct ntfs_inode *ni,
+				 struct ATTRIB **attr, struct mft_inode **mi)
+{
+	struct ATTR_LIST_ENTRY *le = NULL;
+	struct ATTRIB *a;
+	const struct INDEX_NAMES *in = &s_index_names[indx->type];
+
+	a = ni_find_attr(ni, NULL, &le, ATTR_ROOT, in->name, in->name_len, NULL,
+			 mi);
+	if (!a)
+		return NULL;
+
+	if (attr)
+		*attr = a;
+
+	return resident_data_ex(a, sizeof(struct INDEX_ROOT));
+}
+
+static int indx_write(struct ntfs_index *indx, struct ntfs_inode *ni,
+		      struct indx_node *node, int sync)
+{
+	struct INDEX_BUFFER *ib = node->index;
+
+	return ntfs_write_bh(ni->mi.sbi, &ib->rhdr, &node->nb, sync);
+}
+
+/*
+ * indx_read
+ *
+ * If ntfs_readdir calls this function
+ * inode is shared locked and no ni_lock.
+ * Use rw_semaphore for read/write access to alloc_run.
+ */
+int indx_read(struct ntfs_index *indx, struct ntfs_inode *ni, CLST vbn,
+	      struct indx_node **node)
+{
+	int err;
+	struct INDEX_BUFFER *ib;
+	struct runs_tree *run = &indx->alloc_run;
+	struct rw_semaphore *lock = &indx->run_lock;
+	u64 vbo = (u64)vbn << indx->vbn2vbo_bits;
+	u32 bytes = 1u << indx->index_bits;
+	struct indx_node *in = *node;
+	const struct INDEX_NAMES *name;
+
+	if (!in) {
+		in = kzalloc(sizeof(struct indx_node), GFP_NOFS);
+		if (!in)
+			return -ENOMEM;
+	} else {
+		nb_put(&in->nb);
+	}
+
+	ib = in->index;
+	if (!ib) {
+		ib = kmalloc(bytes, GFP_NOFS);
+		if (!ib) {
+			err = -ENOMEM;
+			goto out;
+		}
+	}
+
+	down_read(lock);
+	err = ntfs_read_bh(ni->mi.sbi, run, vbo, &ib->rhdr, bytes, &in->nb);
+	up_read(lock);
+	if (!err)
+		goto ok;
+
+	if (err == -E_NTFS_FIXUP)
+		goto ok;
+
+	if (err != -ENOENT)
+		goto out;
+
+	name = &s_index_names[indx->type];
+	down_write(lock);
+	err = attr_load_runs_range(ni, ATTR_ALLOC, name->name, name->name_len,
+				   run, vbo, vbo + bytes);
+	up_write(lock);
+	if (err)
+		goto out;
+
+	down_read(lock);
+	err = ntfs_read_bh(ni->mi.sbi, run, vbo, &ib->rhdr, bytes, &in->nb);
+	up_read(lock);
+	if (err == -E_NTFS_FIXUP)
+		goto ok;
+
+	if (err)
+		goto out;
+
+ok:
+	if (err == -E_NTFS_FIXUP) {
+		ntfs_write_bh(ni->mi.sbi, &ib->rhdr, &in->nb, 0);
+		err = 0;
+	}
+
+	in->index = ib;
+	*node = in;
+
+out:
+	if (ib != in->index)
+		kfree(ib);
+
+	if (*node != in) {
+		nb_put(&in->nb);
+		kfree(in);
+	}
+
+	return err;
+}
+
+/*
+ * indx_find - Scan NTFS directory for given entry.
+ */
+int indx_find(struct ntfs_index *indx, struct ntfs_inode *ni,
+	      const struct INDEX_ROOT *root, const void *key, size_t key_len,
+	      const void *ctx, int *diff, struct NTFS_DE **entry,
+	      struct ntfs_fnd *fnd)
+{
+	int err;
+	struct NTFS_DE *e;
+	const struct INDEX_HDR *hdr;
+	struct indx_node *node;
+
+	if (!root)
+		root = indx_get_root(&ni->dir, ni, NULL, NULL);
+
+	if (!root) {
+		err = -EINVAL;
+		goto out;
+	}
+
+	hdr = &root->ihdr;
+
+	/* Check cache. */
+	e = fnd->level ? fnd->de[fnd->level - 1] : fnd->root_de;
+	if (e && !de_is_last(e) &&
+	    !(*indx->cmp)(key, key_len, e + 1, le16_to_cpu(e->key_size), ctx)) {
+		*entry = e;
+		*diff = 0;
+		return 0;
+	}
+
+	/* Soft finder reset. */
+	fnd_clear(fnd);
+
+	/* Lookup entry that is <= to the search value. */
+	e = hdr_find_e(indx, hdr, key, key_len, ctx, diff);
+	if (!e)
+		return -EINVAL;
+
+	if (fnd)
+		fnd->root_de = e;
+
+	err = 0;
+
+	for (;;) {
+		node = NULL;
+		if (*diff >= 0 || !de_has_vcn_ex(e)) {
+			*entry = e;
+			goto out;
+		}
+
+		/* Read next level. */
+		err = indx_read(indx, ni, de_get_vbn(e), &node);
+		if (err)
+			goto out;
+
+		/* Lookup entry that is <= to the search value. */
+		e = hdr_find_e(indx, &node->index->ihdr, key, key_len, ctx,
+			       diff);
+		if (!e) {
+			err = -EINVAL;
+			put_indx_node(node);
+			goto out;
+		}
+
+		fnd_push(fnd, node, e);
+	}
+
+out:
+	return err;
+}
+
+int indx_find_sort(struct ntfs_index *indx, struct ntfs_inode *ni,
+		   const struct INDEX_ROOT *root, struct NTFS_DE **entry,
+		   struct ntfs_fnd *fnd)
+{
+	int err;
+	struct indx_node *n = NULL;
+	struct NTFS_DE *e;
+	size_t iter = 0;
+	int level = fnd->level;
+
+	if (!*entry) {
+		/* Start find. */
+		e = hdr_first_de(&root->ihdr);
+		if (!e)
+			return 0;
+		fnd_clear(fnd);
+		fnd->root_de = e;
+	} else if (!level) {
+		if (de_is_last(fnd->root_de)) {
+			*entry = NULL;
+			return 0;
+		}
+
+		e = hdr_next_de(&root->ihdr, fnd->root_de);
+		if (!e)
+			return -EINVAL;
+		fnd->root_de = e;
+	} else {
+		n = fnd->nodes[level - 1];
+		e = fnd->de[level - 1];
+
+		if (de_is_last(e))
+			goto pop_level;
+
+		e = hdr_next_de(&n->index->ihdr, e);
+		if (!e)
+			return -EINVAL;
+
+		fnd->de[level - 1] = e;
+	}
+
+	/* Just to avoid tree cycle. */
+next_iter:
+	if (iter++ >= 1000)
+		return -EINVAL;
+
+	while (de_has_vcn_ex(e)) {
+		if (le16_to_cpu(e->size) <
+		    sizeof(struct NTFS_DE) + sizeof(u64)) {
+			if (n) {
+				fnd_pop(fnd);
+				kfree(n);
+			}
+			return -EINVAL;
+		}
+
+		/* Read next level. */
+		err = indx_read(indx, ni, de_get_vbn(e), &n);
+		if (err)
+			return err;
+
+		/* Try next level. */
+		e = hdr_first_de(&n->index->ihdr);
+		if (!e) {
+			kfree(n);
+			return -EINVAL;
+		}
+
+		fnd_push(fnd, n, e);
+	}
+
+	if (le16_to_cpu(e->size) > sizeof(struct NTFS_DE)) {
+		*entry = e;
+		return 0;
+	}
+
+pop_level:
+	for (;;) {
+		if (!de_is_last(e))
+			goto next_iter;
+
+		/* Pop one level. */
+		if (n) {
+			fnd_pop(fnd);
+			kfree(n);
+		}
+
+		level = fnd->level;
+
+		if (level) {
+			n = fnd->nodes[level - 1];
+			e = fnd->de[level - 1];
+		} else if (fnd->root_de) {
+			n = NULL;
+			e = fnd->root_de;
+			fnd->root_de = NULL;
+		} else {
+			*entry = NULL;
+			return 0;
+		}
+
+		if (le16_to_cpu(e->size) > sizeof(struct NTFS_DE)) {
+			*entry = e;
+			if (!fnd->root_de)
+				fnd->root_de = e;
+			return 0;
+		}
+	}
+}
+
+int indx_find_raw(struct ntfs_index *indx, struct ntfs_inode *ni,
+		  const struct INDEX_ROOT *root, struct NTFS_DE **entry,
+		  size_t *off, struct ntfs_fnd *fnd)
+{
+	int err;
+	struct indx_node *n = NULL;
+	struct NTFS_DE *e = NULL;
+	struct NTFS_DE *e2;
+	size_t bit;
+	CLST next_used_vbn;
+	CLST next_vbn;
+	u32 record_size = ni->mi.sbi->record_size;
+
+	/* Use non sorted algorithm. */
+	if (!*entry) {
+		/* This is the first call. */
+		e = hdr_first_de(&root->ihdr);
+		if (!e)
+			return 0;
+		fnd_clear(fnd);
+		fnd->root_de = e;
+
+		/* The first call with setup of initial element. */
+		if (*off >= record_size) {
+			next_vbn = (((*off - record_size) >> indx->index_bits))
+				   << indx->idx2vbn_bits;
+			/* Jump inside cycle 'for'. */
+			goto next;
+		}
+
+		/* Start enumeration from root. */
+		*off = 0;
+	} else if (!fnd->root_de)
+		return -EINVAL;
+
+	for (;;) {
+		/* Check if current entry can be used. */
+		if (e && le16_to_cpu(e->size) > sizeof(struct NTFS_DE))
+			goto ok;
+
+		if (!fnd->level) {
+			/* Continue to enumerate root. */
+			if (!de_is_last(fnd->root_de)) {
+				e = hdr_next_de(&root->ihdr, fnd->root_de);
+				if (!e)
+					return -EINVAL;
+				fnd->root_de = e;
+				continue;
+			}
+
+			/* Start to enumerate indexes from 0. */
+			next_vbn = 0;
+		} else {
+			/* Continue to enumerate indexes. */
+			e2 = fnd->de[fnd->level - 1];
+
+			n = fnd->nodes[fnd->level - 1];
+
+			if (!de_is_last(e2)) {
+				e = hdr_next_de(&n->index->ihdr, e2);
+				if (!e)
+					return -EINVAL;
+				fnd->de[fnd->level - 1] = e;
+				continue;
+			}
+
+			/* Continue with next index. */
+			next_vbn = le64_to_cpu(n->index->vbn) +
+				   root->index_block_clst;
+		}
+
+next:
+		/* Release current index. */
+		if (n) {
+			fnd_pop(fnd);
+			put_indx_node(n);
+			n = NULL;
+		}
+
+		/* Skip all free indexes. */
+		bit = next_vbn >> indx->idx2vbn_bits;
+		err = indx_used_bit(indx, ni, &bit);
+		if (err == -ENOENT || bit == MINUS_ONE_T) {
+			/* No used indexes. */
+			*entry = NULL;
+			return 0;
+		}
+
+		next_used_vbn = bit << indx->idx2vbn_bits;
+
+		/* Read buffer into memory. */
+		err = indx_read(indx, ni, next_used_vbn, &n);
+		if (err)
+			return err;
+
+		e = hdr_first_de(&n->index->ihdr);
+		fnd_push(fnd, n, e);
+		if (!e)
+			return -EINVAL;
+	}
+
+ok:
+	/* Return offset to restore enumerator if necessary. */
+	if (!n) {
+		/* 'e' points in root, */
+		*off = PtrOffset(&root->ihdr, e);
+	} else {
+		/* 'e' points in index, */
+		*off = (le64_to_cpu(n->index->vbn) << indx->vbn2vbo_bits) +
+		       record_size + PtrOffset(&n->index->ihdr, e);
+	}
+
+	*entry = e;
+	return 0;
+}
+
+/*
+ * indx_create_allocate - Create "Allocation + Bitmap" attributes.
+ */
+static int indx_create_allocate(struct ntfs_index *indx, struct ntfs_inode *ni,
+				CLST *vbn)
+{
+	int err = -ENOMEM;
+	struct ntfs_sb_info *sbi = ni->mi.sbi;
+	struct ATTRIB *bitmap;
+	struct ATTRIB *alloc;
+	u32 data_size = 1u << indx->index_bits;
+	u32 alloc_size = ntfs_up_cluster(sbi, data_size);
+	CLST len = alloc_size >> sbi->cluster_bits;
+	const struct INDEX_NAMES *in = &s_index_names[indx->type];
+	CLST alen;
+	struct runs_tree run;
+
+	run_init(&run);
+
+	err = attr_allocate_clusters(sbi, &run, 0, 0, len, NULL, 0, &alen, 0,
+				     NULL);
+	if (err)
+		goto out;
+
+	err = ni_insert_nonresident(ni, ATTR_ALLOC, in->name, in->name_len,
+				    &run, 0, len, 0, &alloc, NULL);
+	if (err)
+		goto out1;
+
+	alloc->nres.valid_size = alloc->nres.data_size = cpu_to_le64(data_size);
+
+	err = ni_insert_resident(ni, bitmap_size(1), ATTR_BITMAP, in->name,
+				 in->name_len, &bitmap, NULL, NULL);
+	if (err)
+		goto out2;
+
+	if (in->name == I30_NAME) {
+		ni->vfs_inode.i_size = data_size;
+		inode_set_bytes(&ni->vfs_inode, alloc_size);
+	}
+
+	memcpy(&indx->alloc_run, &run, sizeof(run));
+
+	*vbn = 0;
+
+	return 0;
+
+out2:
+	mi_remove_attr(NULL, &ni->mi, alloc);
+
+out1:
+	run_deallocate(sbi, &run, false);
+
+out:
+	return err;
+}
+
+/*
+ * indx_add_allocate - Add clusters to index.
+ */
+static int indx_add_allocate(struct ntfs_index *indx, struct ntfs_inode *ni,
+			     CLST *vbn)
+{
+	int err;
+	size_t bit;
+	u64 data_size;
+	u64 bmp_size, bmp_size_v;
+	struct ATTRIB *bmp, *alloc;
+	struct mft_inode *mi;
+	const struct INDEX_NAMES *in = &s_index_names[indx->type];
+
+	err = indx_find_free(indx, ni, &bit, &bmp);
+	if (err)
+		goto out1;
+
+	if (bit != MINUS_ONE_T) {
+		bmp = NULL;
+	} else {
+		if (bmp->non_res) {
+			bmp_size = le64_to_cpu(bmp->nres.data_size);
+			bmp_size_v = le64_to_cpu(bmp->nres.valid_size);
+		} else {
+			bmp_size = bmp_size_v = le32_to_cpu(bmp->res.data_size);
+		}
+
+		bit = bmp_size << 3;
+	}
+
+	data_size = (u64)(bit + 1) << indx->index_bits;
+
+	if (bmp) {
+		/* Increase bitmap. */
+		err = attr_set_size(ni, ATTR_BITMAP, in->name, in->name_len,
+				    &indx->bitmap_run, bitmap_size(bit + 1),
+				    NULL, true, NULL);
+		if (err)
+			goto out1;
+	}
+
+	alloc = ni_find_attr(ni, NULL, NULL, ATTR_ALLOC, in->name, in->name_len,
+			     NULL, &mi);
+	if (!alloc) {
+		err = -EINVAL;
+		if (bmp)
+			goto out2;
+		goto out1;
+	}
+
+	/* Increase allocation. */
+	err = attr_set_size(ni, ATTR_ALLOC, in->name, in->name_len,
+			    &indx->alloc_run, data_size, &data_size, true,
+			    NULL);
+	if (err) {
+		if (bmp)
+			goto out2;
+		goto out1;
+	}
+
+	*vbn = bit << indx->idx2vbn_bits;
+
+	return 0;
+
+out2:
+	/* Ops. No space? */
+	attr_set_size(ni, ATTR_BITMAP, in->name, in->name_len,
+		      &indx->bitmap_run, bmp_size, &bmp_size_v, false, NULL);
+
+out1:
+	return err;
+}
+
+/*
+ * indx_insert_into_root - Attempt to insert an entry into the index root.
+ *
+ * @undo - True if we undoing previous remove.
+ * If necessary, it will twiddle the index b-tree.
+ */
+static int indx_insert_into_root(struct ntfs_index *indx, struct ntfs_inode *ni,
+				 const struct NTFS_DE *new_de,
+				 struct NTFS_DE *root_de, const void *ctx,
+				 struct ntfs_fnd *fnd, bool undo)
+{
+	int err = 0;
+	struct NTFS_DE *e, *e0, *re;
+	struct mft_inode *mi;
+	struct ATTRIB *attr;
+	struct INDEX_HDR *hdr;
+	struct indx_node *n;
+	CLST new_vbn;
+	__le64 *sub_vbn, t_vbn;
+	u16 new_de_size;
+	u32 hdr_used, hdr_total, asize, to_move;
+	u32 root_size, new_root_size;
+	struct ntfs_sb_info *sbi;
+	int ds_root;
+	struct INDEX_ROOT *root, *a_root;
+
+	/* Get the record this root placed in. */
+	root = indx_get_root(indx, ni, &attr, &mi);
+	if (!root)
+		return -EINVAL;
+
+	/*
+	 * Try easy case:
+	 * hdr_insert_de will succeed if there's
+	 * room the root for the new entry.
+	 */
+	hdr = &root->ihdr;
+	sbi = ni->mi.sbi;
+	new_de_size = le16_to_cpu(new_de->size);
+	hdr_used = le32_to_cpu(hdr->used);
+	hdr_total = le32_to_cpu(hdr->total);
+	asize = le32_to_cpu(attr->size);
+	root_size = le32_to_cpu(attr->res.data_size);
+
+	ds_root = new_de_size + hdr_used - hdr_total;
+
+	/* If 'undo' is set then reduce requirements. */
+	if ((undo || asize + ds_root < sbi->max_bytes_per_attr) &&
+	    mi_resize_attr(mi, attr, ds_root)) {
+		hdr->total = cpu_to_le32(hdr_total + ds_root);
+		e = hdr_insert_de(indx, hdr, new_de, root_de, ctx);
+		WARN_ON(!e);
+		fnd_clear(fnd);
+		fnd->root_de = e;
+
+		return 0;
+	}
+
+	/* Make a copy of root attribute to restore if error. */
+	a_root = kmemdup(attr, asize, GFP_NOFS);
+	if (!a_root)
+		return -ENOMEM;
+
+	/*
+	 * Copy all the non-end entries from
+	 * the index root to the new buffer.
+	 */
+	to_move = 0;
+	e0 = hdr_first_de(hdr);
+
+	/* Calculate the size to copy. */
+	for (e = e0;; e = hdr_next_de(hdr, e)) {
+		if (!e) {
+			err = -EINVAL;
+			goto out_free_root;
+		}
+
+		if (de_is_last(e))
+			break;
+		to_move += le16_to_cpu(e->size);
+	}
+
+	if (!to_move) {
+		re = NULL;
+	} else {
+		re = kmemdup(e0, to_move, GFP_NOFS);
+		if (!re) {
+			err = -ENOMEM;
+			goto out_free_root;
+		}
+	}
+
+	sub_vbn = NULL;
+	if (de_has_vcn(e)) {
+		t_vbn = de_get_vbn_le(e);
+		sub_vbn = &t_vbn;
+	}
+
+	new_root_size = sizeof(struct INDEX_ROOT) + sizeof(struct NTFS_DE) +
+			sizeof(u64);
+	ds_root = new_root_size - root_size;
+
+	if (ds_root > 0 && asize + ds_root > sbi->max_bytes_per_attr) {
+		/* Make root external. */
+		err = -EOPNOTSUPP;
+		goto out_free_re;
+	}
+
+	if (ds_root)
+		mi_resize_attr(mi, attr, ds_root);
+
+	/* Fill first entry (vcn will be set later). */
+	e = (struct NTFS_DE *)(root + 1);
+	memset(e, 0, sizeof(struct NTFS_DE));
+	e->size = cpu_to_le16(sizeof(struct NTFS_DE) + sizeof(u64));
+	e->flags = NTFS_IE_HAS_SUBNODES | NTFS_IE_LAST;
+
+	hdr->flags = 1;
+	hdr->used = hdr->total =
+		cpu_to_le32(new_root_size - offsetof(struct INDEX_ROOT, ihdr));
+
+	fnd->root_de = hdr_first_de(hdr);
+	mi->dirty = true;
+
+	/* Create alloc and bitmap attributes (if not). */
+	err = run_is_empty(&indx->alloc_run)
+		      ? indx_create_allocate(indx, ni, &new_vbn)
+		      : indx_add_allocate(indx, ni, &new_vbn);
+
+	/* Layout of record may be changed, so rescan root. */
+	root = indx_get_root(indx, ni, &attr, &mi);
+	if (!root) {
+		/* Bug? */
+		ntfs_set_state(sbi, NTFS_DIRTY_ERROR);
+		err = -EINVAL;
+		goto out_free_re;
+	}
+
+	if (err) {
+		/* Restore root. */
+		if (mi_resize_attr(mi, attr, -ds_root))
+			memcpy(attr, a_root, asize);
+		else {
+			/* Bug? */
+			ntfs_set_state(sbi, NTFS_DIRTY_ERROR);
+		}
+		goto out_free_re;
+	}
+
+	e = (struct NTFS_DE *)(root + 1);
+	*(__le64 *)(e + 1) = cpu_to_le64(new_vbn);
+	mi->dirty = true;
+
+	/* Now we can create/format the new buffer and copy the entries into. */
+	n = indx_new(indx, ni, new_vbn, sub_vbn);
+	if (IS_ERR(n)) {
+		err = PTR_ERR(n);
+		goto out_free_re;
+	}
+
+	hdr = &n->index->ihdr;
+	hdr_used = le32_to_cpu(hdr->used);
+	hdr_total = le32_to_cpu(hdr->total);
+
+	/* Copy root entries into new buffer. */
+	hdr_insert_head(hdr, re, to_move);
+
+	/* Update bitmap attribute. */
+	indx_mark_used(indx, ni, new_vbn >> indx->idx2vbn_bits);
+
+	/* Check if we can insert new entry new index buffer. */
+	if (hdr_used + new_de_size > hdr_total) {
+		/*
+		 * This occurs if MFT record is the same or bigger than index
+		 * buffer. Move all root new index and have no space to add
+		 * new entry classic case when MFT record is 1K and index
+		 * buffer 4K the problem should not occurs.
+		 */
+		kfree(re);
+		indx_write(indx, ni, n, 0);
+
+		put_indx_node(n);
+		fnd_clear(fnd);
+		err = indx_insert_entry(indx, ni, new_de, ctx, fnd, undo);
+		goto out_free_root;
+	}
+
+	/*
+	 * Now root is a parent for new index buffer.
+	 * Insert NewEntry a new buffer.
+	 */
+	e = hdr_insert_de(indx, hdr, new_de, NULL, ctx);
+	if (!e) {
+		err = -EINVAL;
+		goto out_put_n;
+	}
+	fnd_push(fnd, n, e);
+
+	/* Just write updates index into disk. */
+	indx_write(indx, ni, n, 0);
+
+	n = NULL;
+
+out_put_n:
+	put_indx_node(n);
+out_free_re:
+	kfree(re);
+out_free_root:
+	kfree(a_root);
+	return err;
+}
+
+/*
+ * indx_insert_into_buffer
+ *
+ * Attempt to insert an entry into an Index Allocation Buffer.
+ * If necessary, it will split the buffer.
+ */
+static int
+indx_insert_into_buffer(struct ntfs_index *indx, struct ntfs_inode *ni,
+			struct INDEX_ROOT *root, const struct NTFS_DE *new_de,
+			const void *ctx, int level, struct ntfs_fnd *fnd)
+{
+	int err;
+	const struct NTFS_DE *sp;
+	struct NTFS_DE *e, *de_t, *up_e = NULL;
+	struct indx_node *n2 = NULL;
+	struct indx_node *n1 = fnd->nodes[level];
+	struct INDEX_HDR *hdr1 = &n1->index->ihdr;
+	struct INDEX_HDR *hdr2;
+	u32 to_copy, used;
+	CLST new_vbn;
+	__le64 t_vbn, *sub_vbn;
+	u16 sp_size;
+
+	/* Try the most easy case. */
+	e = fnd->level - 1 == level ? fnd->de[level] : NULL;
+	e = hdr_insert_de(indx, hdr1, new_de, e, ctx);
+	fnd->de[level] = e;
+	if (e) {
+		/* Just write updated index into disk. */
+		indx_write(indx, ni, n1, 0);
+		return 0;
+	}
+
+	/*
+	 * No space to insert into buffer. Split it.
+	 * To split we:
+	 *  - Save split point ('cause index buffers will be changed)
+	 * - Allocate NewBuffer and copy all entries <= sp into new buffer
+	 * - Remove all entries (sp including) from TargetBuffer
+	 * - Insert NewEntry into left or right buffer (depending on sp <=>
+	 *     NewEntry)
+	 * - Insert sp into parent buffer (or root)
+	 * - Make sp a parent for new buffer
+	 */
+	sp = hdr_find_split(hdr1);
+	if (!sp)
+		return -EINVAL;
+
+	sp_size = le16_to_cpu(sp->size);
+	up_e = kmalloc(sp_size + sizeof(u64), GFP_NOFS);
+	if (!up_e)
+		return -ENOMEM;
+	memcpy(up_e, sp, sp_size);
+
+	if (!hdr1->flags) {
+		up_e->flags |= NTFS_IE_HAS_SUBNODES;
+		up_e->size = cpu_to_le16(sp_size + sizeof(u64));
+		sub_vbn = NULL;
+	} else {
+		t_vbn = de_get_vbn_le(up_e);
+		sub_vbn = &t_vbn;
+	}
+
+	/* Allocate on disk a new index allocation buffer. */
+	err = indx_add_allocate(indx, ni, &new_vbn);
+	if (err)
+		goto out;
+
+	/* Allocate and format memory a new index buffer. */
+	n2 = indx_new(indx, ni, new_vbn, sub_vbn);
+	if (IS_ERR(n2)) {
+		err = PTR_ERR(n2);
+		goto out;
+	}
+
+	hdr2 = &n2->index->ihdr;
+
+	/* Make sp a parent for new buffer. */
+	de_set_vbn(up_e, new_vbn);
+
+	/* Copy all the entries <= sp into the new buffer. */
+	de_t = hdr_first_de(hdr1);
+	to_copy = PtrOffset(de_t, sp);
+	hdr_insert_head(hdr2, de_t, to_copy);
+
+	/* Remove all entries (sp including) from hdr1. */
+	used = le32_to_cpu(hdr1->used) - to_copy - sp_size;
+	memmove(de_t, Add2Ptr(sp, sp_size), used - le32_to_cpu(hdr1->de_off));
+	hdr1->used = cpu_to_le32(used);
+
+	/*
+	 * Insert new entry into left or right buffer
+	 * (depending on sp <=> new_de).
+	 */
+	hdr_insert_de(indx,
+		      (*indx->cmp)(new_de + 1, le16_to_cpu(new_de->key_size),
+				   up_e + 1, le16_to_cpu(up_e->key_size),
+				   ctx) < 0
+			      ? hdr2
+			      : hdr1,
+		      new_de, NULL, ctx);
+
+	indx_mark_used(indx, ni, new_vbn >> indx->idx2vbn_bits);
+
+	indx_write(indx, ni, n1, 0);
+	indx_write(indx, ni, n2, 0);
+
+	put_indx_node(n2);
+
+	/*
+	 * We've finished splitting everybody, so we are ready to
+	 * insert the promoted entry into the parent.
+	 */
+	if (!level) {
+		/* Insert in root. */
+		err = indx_insert_into_root(indx, ni, up_e, NULL, ctx, fnd, 0);
+		if (err)
+			goto out;
+	} else {
+		/*
+		 * The target buffer's parent is another index buffer.
+		 * TODO: Remove recursion.
+		 */
+		err = indx_insert_into_buffer(indx, ni, root, up_e, ctx,
+					      level - 1, fnd);
+		if (err)
+			goto out;
+	}
+
+out:
+	kfree(up_e);
+
+	return err;
+}
+
+/*
+ * indx_insert_entry - Insert new entry into index.
+ *
+ * @undo - True if we undoing previous remove.
+ */
+int indx_insert_entry(struct ntfs_index *indx, struct ntfs_inode *ni,
+		      const struct NTFS_DE *new_de, const void *ctx,
+		      struct ntfs_fnd *fnd, bool undo)
+{
+	int err;
+	int diff;
+	struct NTFS_DE *e;
+	struct ntfs_fnd *fnd_a = NULL;
+	struct INDEX_ROOT *root;
+
+	if (!fnd) {
+		fnd_a = fnd_get();
+		if (!fnd_a) {
+			err = -ENOMEM;
+			goto out1;
+		}
+		fnd = fnd_a;
+	}
+
+	root = indx_get_root(indx, ni, NULL, NULL);
+	if (!root) {
+		err = -EINVAL;
+		goto out;
+	}
+
+	if (fnd_is_empty(fnd)) {
+		/*
+		 * Find the spot the tree where we want to
+		 * insert the new entry.
+		 */
+		err = indx_find(indx, ni, root, new_de + 1,
+				le16_to_cpu(new_de->key_size), ctx, &diff, &e,
+				fnd);
+		if (err)
+			goto out;
+
+		if (!diff) {
+			err = -EEXIST;
+			goto out;
+		}
+	}
+
+	if (!fnd->level) {
+		/*
+		 * The root is also a leaf, so we'll insert the
+		 * new entry into it.
+		 */
+		err = indx_insert_into_root(indx, ni, new_de, fnd->root_de, ctx,
+					    fnd, undo);
+		if (err)
+			goto out;
+	} else {
+		/*
+		 * Found a leaf buffer, so we'll insert the new entry into it.
+		 */
+		err = indx_insert_into_buffer(indx, ni, root, new_de, ctx,
+					      fnd->level - 1, fnd);
+		if (err)
+			goto out;
+	}
+
+out:
+	fnd_put(fnd_a);
+out1:
+	return err;
+}
+
+/*
+ * indx_find_buffer - Locate a buffer from the tree.
+ */
+static struct indx_node *indx_find_buffer(struct ntfs_index *indx,
+					  struct ntfs_inode *ni,
+					  const struct INDEX_ROOT *root,
+					  __le64 vbn, struct indx_node *n)
+{
+	int err;
+	const struct NTFS_DE *e;
+	struct indx_node *r;
+	const struct INDEX_HDR *hdr = n ? &n->index->ihdr : &root->ihdr;
+
+	/* Step 1: Scan one level. */
+	for (e = hdr_first_de(hdr);; e = hdr_next_de(hdr, e)) {
+		if (!e)
+			return ERR_PTR(-EINVAL);
+
+		if (de_has_vcn(e) && vbn == de_get_vbn_le(e))
+			return n;
+
+		if (de_is_last(e))
+			break;
+	}
+
+	/* Step2: Do recursion. */
+	e = Add2Ptr(hdr, le32_to_cpu(hdr->de_off));
+	for (;;) {
+		if (de_has_vcn_ex(e)) {
+			err = indx_read(indx, ni, de_get_vbn(e), &n);
+			if (err)
+				return ERR_PTR(err);
+
+			r = indx_find_buffer(indx, ni, root, vbn, n);
+			if (r)
+				return r;
+		}
+
+		if (de_is_last(e))
+			break;
+
+		e = Add2Ptr(e, le16_to_cpu(e->size));
+	}
+
+	return NULL;
+}
+
+/*
+ * indx_shrink - Deallocate unused tail indexes.
+ */
+static int indx_shrink(struct ntfs_index *indx, struct ntfs_inode *ni,
+		       size_t bit)
+{
+	int err = 0;
+	u64 bpb, new_data;
+	size_t nbits;
+	struct ATTRIB *b;
+	struct ATTR_LIST_ENTRY *le = NULL;
+	const struct INDEX_NAMES *in = &s_index_names[indx->type];
+
+	b = ni_find_attr(ni, NULL, &le, ATTR_BITMAP, in->name, in->name_len,
+			 NULL, NULL);
+
+	if (!b)
+		return -ENOENT;
+
+	if (!b->non_res) {
+		unsigned long pos;
+		const unsigned long *bm = resident_data(b);
+
+		nbits = (size_t)le32_to_cpu(b->res.data_size) * 8;
+
+		if (bit >= nbits)
+			return 0;
+
+		pos = find_next_bit(bm, nbits, bit);
+		if (pos < nbits)
+			return 0;
+	} else {
+		size_t used = MINUS_ONE_T;
+
+		nbits = le64_to_cpu(b->nres.data_size) * 8;
+
+		if (bit >= nbits)
+			return 0;
+
+		err = scan_nres_bitmap(ni, b, indx, bit, &scan_for_used, &used);
+		if (err)
+			return err;
+
+		if (used != MINUS_ONE_T)
+			return 0;
+	}
+
+	new_data = (u64)bit << indx->index_bits;
+
+	err = attr_set_size(ni, ATTR_ALLOC, in->name, in->name_len,
+			    &indx->alloc_run, new_data, &new_data, false, NULL);
+	if (err)
+		return err;
+
+	bpb = bitmap_size(bit);
+	if (bpb * 8 == nbits)
+		return 0;
+
+	err = attr_set_size(ni, ATTR_BITMAP, in->name, in->name_len,
+			    &indx->bitmap_run, bpb, &bpb, false, NULL);
+
+	return err;
+}
+
+static int indx_free_children(struct ntfs_index *indx, struct ntfs_inode *ni,
+			      const struct NTFS_DE *e, bool trim)
+{
+	int err;
+	struct indx_node *n;
+	struct INDEX_HDR *hdr;
+	CLST vbn = de_get_vbn(e);
+	size_t i;
+
+	err = indx_read(indx, ni, vbn, &n);
+	if (err)
+		return err;
+
+	hdr = &n->index->ihdr;
+	/* First, recurse into the children, if any. */
+	if (hdr_has_subnode(hdr)) {
+		for (e = hdr_first_de(hdr); e; e = hdr_next_de(hdr, e)) {
+			indx_free_children(indx, ni, e, false);
+			if (de_is_last(e))
+				break;
+		}
+	}
+
+	put_indx_node(n);
+
+	i = vbn >> indx->idx2vbn_bits;
+	/*
+	 * We've gotten rid of the children; add this buffer to the free list.
+	 */
+	indx_mark_free(indx, ni, i);
+
+	if (!trim)
+		return 0;
+
+	/*
+	 * If there are no used indexes after current free index
+	 * then we can truncate allocation and bitmap.
+	 * Use bitmap to estimate the case.
+	 */
+	indx_shrink(indx, ni, i + 1);
+	return 0;
+}
+
+/*
+ * indx_get_entry_to_replace
+ *
+ * Find a replacement entry for a deleted entry.
+ * Always returns a node entry:
+ * NTFS_IE_HAS_SUBNODES is set the flags and the size includes the sub_vcn.
+ */
+static int indx_get_entry_to_replace(struct ntfs_index *indx,
+				     struct ntfs_inode *ni,
+				     const struct NTFS_DE *de_next,
+				     struct NTFS_DE **de_to_replace,
+				     struct ntfs_fnd *fnd)
+{
+	int err;
+	int level = -1;
+	CLST vbn;
+	struct NTFS_DE *e, *te, *re;
+	struct indx_node *n;
+	struct INDEX_BUFFER *ib;
+
+	*de_to_replace = NULL;
+
+	/* Find first leaf entry down from de_next. */
+	vbn = de_get_vbn(de_next);
+	for (;;) {
+		n = NULL;
+		err = indx_read(indx, ni, vbn, &n);
+		if (err)
+			goto out;
+
+		e = hdr_first_de(&n->index->ihdr);
+		fnd_push(fnd, n, e);
+
+		if (!de_is_last(e)) {
+			/*
+			 * This buffer is non-empty, so its first entry
+			 * could be used as the replacement entry.
+			 */
+			level = fnd->level - 1;
+		}
+
+		if (!de_has_vcn(e))
+			break;
+
+		/* This buffer is a node. Continue to go down. */
+		vbn = de_get_vbn(e);
+	}
+
+	if (level == -1)
+		goto out;
+
+	n = fnd->nodes[level];
+	te = hdr_first_de(&n->index->ihdr);
+	/* Copy the candidate entry into the replacement entry buffer. */
+	re = kmalloc(le16_to_cpu(te->size) + sizeof(u64), GFP_NOFS);
+	if (!re) {
+		err = -ENOMEM;
+		goto out;
+	}
+
+	*de_to_replace = re;
+	memcpy(re, te, le16_to_cpu(te->size));
+
+	if (!de_has_vcn(re)) {
+		/*
+		 * The replacement entry we found doesn't have a sub_vcn.
+		 * increase its size to hold one.
+		 */
+		le16_add_cpu(&re->size, sizeof(u64));
+		re->flags |= NTFS_IE_HAS_SUBNODES;
+	} else {
+		/*
+		 * The replacement entry we found was a node entry, which
+		 * means that all its child buffers are empty. Return them
+		 * to the free pool.
+		 */
+		indx_free_children(indx, ni, te, true);
+	}
+
+	/*
+	 * Expunge the replacement entry from its former location,
+	 * and then write that buffer.
+	 */
+	ib = n->index;
+	e = hdr_delete_de(&ib->ihdr, te);
+
+	fnd->de[level] = e;
+	indx_write(indx, ni, n, 0);
+
+	/* Check to see if this action created an empty leaf. */
+	if (ib_is_leaf(ib) && ib_is_empty(ib))
+		return 0;
+
+out:
+	fnd_clear(fnd);
+	return err;
+}
+
+/*
+ * indx_delete_entry - Delete an entry from the index.
+ */
+int indx_delete_entry(struct ntfs_index *indx, struct ntfs_inode *ni,
+		      const void *key, u32 key_len, const void *ctx)
+{
+	int err, diff;
+	struct INDEX_ROOT *root;
+	struct INDEX_HDR *hdr;
+	struct ntfs_fnd *fnd, *fnd2;
+	struct INDEX_BUFFER *ib;
+	struct NTFS_DE *e, *re, *next, *prev, *me;
+	struct indx_node *n, *n2d = NULL;
+	__le64 sub_vbn;
+	int level, level2;
+	struct ATTRIB *attr;
+	struct mft_inode *mi;
+	u32 e_size, root_size, new_root_size;
+	size_t trim_bit;
+	const struct INDEX_NAMES *in;
+
+	fnd = fnd_get();
+	if (!fnd) {
+		err = -ENOMEM;
+		goto out2;
+	}
+
+	fnd2 = fnd_get();
+	if (!fnd2) {
+		err = -ENOMEM;
+		goto out1;
+	}
+
+	root = indx_get_root(indx, ni, &attr, &mi);
+	if (!root) {
+		err = -EINVAL;
+		goto out;
+	}
+
+	/* Locate the entry to remove. */
+	err = indx_find(indx, ni, root, key, key_len, ctx, &diff, &e, fnd);
+	if (err)
+		goto out;
+
+	if (!e || diff) {
+		err = -ENOENT;
+		goto out;
+	}
+
+	level = fnd->level;
+
+	if (level) {
+		n = fnd->nodes[level - 1];
+		e = fnd->de[level - 1];
+		ib = n->index;
+		hdr = &ib->ihdr;
+	} else {
+		hdr = &root->ihdr;
+		e = fnd->root_de;
+		n = NULL;
+	}
+
+	e_size = le16_to_cpu(e->size);
+
+	if (!de_has_vcn_ex(e)) {
+		/* The entry to delete is a leaf, so we can just rip it out. */
+		hdr_delete_de(hdr, e);
+
+		if (!level) {
+			hdr->total = hdr->used;
+
+			/* Shrink resident root attribute. */
+			mi_resize_attr(mi, attr, 0 - e_size);
+			goto out;
+		}
+
+		indx_write(indx, ni, n, 0);
+
+		/*
+		 * Check to see if removing that entry made
+		 * the leaf empty.
+		 */
+		if (ib_is_leaf(ib) && ib_is_empty(ib)) {
+			fnd_pop(fnd);
+			fnd_push(fnd2, n, e);
+		}
+	} else {
+		/*
+		 * The entry we wish to delete is a node buffer, so we
+		 * have to find a replacement for it.
+		 */
+		next = de_get_next(e);
+
+		err = indx_get_entry_to_replace(indx, ni, next, &re, fnd2);
+		if (err)
+			goto out;
+
+		if (re) {
+			de_set_vbn_le(re, de_get_vbn_le(e));
+			hdr_delete_de(hdr, e);
+
+			err = level ? indx_insert_into_buffer(indx, ni, root,
+							      re, ctx,
+							      fnd->level - 1,
+							      fnd)
+				    : indx_insert_into_root(indx, ni, re, e,
+							    ctx, fnd, 0);
+			kfree(re);
+
+			if (err)
+				goto out;
+		} else {
+			/*
+			 * There is no replacement for the current entry.
+			 * This means that the subtree rooted at its node
+			 * is empty, and can be deleted, which turn means
+			 * that the node can just inherit the deleted
+			 * entry sub_vcn.
+			 */
+			indx_free_children(indx, ni, next, true);
+
+			de_set_vbn_le(next, de_get_vbn_le(e));
+			hdr_delete_de(hdr, e);
+			if (level) {
+				indx_write(indx, ni, n, 0);
+			} else {
+				hdr->total = hdr->used;
+
+				/* Shrink resident root attribute. */
+				mi_resize_attr(mi, attr, 0 - e_size);
+			}
+		}
+	}
+
+	/* Delete a branch of tree. */
+	if (!fnd2 || !fnd2->level)
+		goto out;
+
+	/* Reinit root 'cause it can be changed. */
+	root = indx_get_root(indx, ni, &attr, &mi);
+	if (!root) {
+		err = -EINVAL;
+		goto out;
+	}
+
+	n2d = NULL;
+	sub_vbn = fnd2->nodes[0]->index->vbn;
+	level2 = 0;
+	level = fnd->level;
+
+	hdr = level ? &fnd->nodes[level - 1]->index->ihdr : &root->ihdr;
+
+	/* Scan current level. */
+	for (e = hdr_first_de(hdr);; e = hdr_next_de(hdr, e)) {
+		if (!e) {
+			err = -EINVAL;
+			goto out;
+		}
+
+		if (de_has_vcn(e) && sub_vbn == de_get_vbn_le(e))
+			break;
+
+		if (de_is_last(e)) {
+			e = NULL;
+			break;
+		}
+	}
+
+	if (!e) {
+		/* Do slow search from root. */
+		struct indx_node *in;
+
+		fnd_clear(fnd);
+
+		in = indx_find_buffer(indx, ni, root, sub_vbn, NULL);
+		if (IS_ERR(in)) {
+			err = PTR_ERR(in);
+			goto out;
+		}
+
+		if (in)
+			fnd_push(fnd, in, NULL);
+	}
+
+	/* Merge fnd2 -> fnd. */
+	for (level = 0; level < fnd2->level; level++) {
+		fnd_push(fnd, fnd2->nodes[level], fnd2->de[level]);
+		fnd2->nodes[level] = NULL;
+	}
+	fnd2->level = 0;
+
+	hdr = NULL;
+	for (level = fnd->level; level; level--) {
+		struct indx_node *in = fnd->nodes[level - 1];
+
+		ib = in->index;
+		if (ib_is_empty(ib)) {
+			sub_vbn = ib->vbn;
+		} else {
+			hdr = &ib->ihdr;
+			n2d = in;
+			level2 = level;
+			break;
+		}
+	}
+
+	if (!hdr)
+		hdr = &root->ihdr;
+
+	e = hdr_first_de(hdr);
+	if (!e) {
+		err = -EINVAL;
+		goto out;
+	}
+
+	if (hdr != &root->ihdr || !de_is_last(e)) {
+		prev = NULL;
+		while (!de_is_last(e)) {
+			if (de_has_vcn(e) && sub_vbn == de_get_vbn_le(e))
+				break;
+			prev = e;
+			e = hdr_next_de(hdr, e);
+			if (!e) {
+				err = -EINVAL;
+				goto out;
+			}
+		}
+
+		if (sub_vbn != de_get_vbn_le(e)) {
+			/*
+			 * Didn't find the parent entry, although this buffer
+			 * is the parent trail. Something is corrupt.
+			 */
+			err = -EINVAL;
+			goto out;
+		}
+
+		if (de_is_last(e)) {
+			/*
+			 * Since we can't remove the end entry, we'll remove
+			 * its predecessor instead. This means we have to
+			 * transfer the predecessor's sub_vcn to the end entry.
+			 * Note: This index block is not empty, so the
+			 * predecessor must exist.
+			 */
+			if (!prev) {
+				err = -EINVAL;
+				goto out;
+			}
+
+			if (de_has_vcn(prev)) {
+				de_set_vbn_le(e, de_get_vbn_le(prev));
+			} else if (de_has_vcn(e)) {
+				le16_sub_cpu(&e->size, sizeof(u64));
+				e->flags &= ~NTFS_IE_HAS_SUBNODES;
+				le32_sub_cpu(&hdr->used, sizeof(u64));
+			}
+			e = prev;
+		}
+
+		/*
+		 * Copy the current entry into a temporary buffer (stripping
+		 * off its down-pointer, if any) and delete it from the current
+		 * buffer or root, as appropriate.
+		 */
+		e_size = le16_to_cpu(e->size);
+		me = kmemdup(e, e_size, GFP_NOFS);
+		if (!me) {
+			err = -ENOMEM;
+			goto out;
+		}
+
+		if (de_has_vcn(me)) {
+			me->flags &= ~NTFS_IE_HAS_SUBNODES;
+			le16_sub_cpu(&me->size, sizeof(u64));
+		}
+
+		hdr_delete_de(hdr, e);
+
+		if (hdr == &root->ihdr) {
+			level = 0;
+			hdr->total = hdr->used;
+
+			/* Shrink resident root attribute. */
+			mi_resize_attr(mi, attr, 0 - e_size);
+		} else {
+			indx_write(indx, ni, n2d, 0);
+			level = level2;
+		}
+
+		/* Mark unused buffers as free. */
+		trim_bit = -1;
+		for (; level < fnd->level; level++) {
+			ib = fnd->nodes[level]->index;
+			if (ib_is_empty(ib)) {
+				size_t k = le64_to_cpu(ib->vbn) >>
+					   indx->idx2vbn_bits;
+
+				indx_mark_free(indx, ni, k);
+				if (k < trim_bit)
+					trim_bit = k;
+			}
+		}
+
+		fnd_clear(fnd);
+		/*fnd->root_de = NULL;*/
+
+		/*
+		 * Re-insert the entry into the tree.
+		 * Find the spot the tree where we want to insert the new entry.
+		 */
+		err = indx_insert_entry(indx, ni, me, ctx, fnd, 0);
+		kfree(me);
+		if (err)
+			goto out;
+
+		if (trim_bit != -1)
+			indx_shrink(indx, ni, trim_bit);
+	} else {
+		/*
+		 * This tree needs to be collapsed down to an empty root.
+		 * Recreate the index root as an empty leaf and free all
+		 * the bits the index allocation bitmap.
+		 */
+		fnd_clear(fnd);
+		fnd_clear(fnd2);
+
+		in = &s_index_names[indx->type];
+
+		err = attr_set_size(ni, ATTR_ALLOC, in->name, in->name_len,
+				    &indx->alloc_run, 0, NULL, false, NULL);
+		err = ni_remove_attr(ni, ATTR_ALLOC, in->name, in->name_len,
+				     false, NULL);
+		run_close(&indx->alloc_run);
+
+		err = attr_set_size(ni, ATTR_BITMAP, in->name, in->name_len,
+				    &indx->bitmap_run, 0, NULL, false, NULL);
+		err = ni_remove_attr(ni, ATTR_BITMAP, in->name, in->name_len,
+				     false, NULL);
+		run_close(&indx->bitmap_run);
+
+		root = indx_get_root(indx, ni, &attr, &mi);
+		if (!root) {
+			err = -EINVAL;
+			goto out;
+		}
+
+		root_size = le32_to_cpu(attr->res.data_size);
+		new_root_size =
+			sizeof(struct INDEX_ROOT) + sizeof(struct NTFS_DE);
+
+		if (new_root_size != root_size &&
+		    !mi_resize_attr(mi, attr, new_root_size - root_size)) {
+			err = -EINVAL;
+			goto out;
+		}
+
+		/* Fill first entry. */
+		e = (struct NTFS_DE *)(root + 1);
+		e->ref.low = 0;
+		e->ref.high = 0;
+		e->ref.seq = 0;
+		e->size = cpu_to_le16(sizeof(struct NTFS_DE));
+		e->flags = NTFS_IE_LAST; // 0x02
+		e->key_size = 0;
+		e->res = 0;
+
+		hdr = &root->ihdr;
+		hdr->flags = 0;
+		hdr->used = hdr->total = cpu_to_le32(
+			new_root_size - offsetof(struct INDEX_ROOT, ihdr));
+		mi->dirty = true;
+	}
+
+out:
+	fnd_put(fnd2);
+out1:
+	fnd_put(fnd);
+out2:
+	return err;
+}
+
+/*
+ * Update duplicated information in directory entry
+ * 'dup' - info from MFT record
+ */
+int indx_update_dup(struct ntfs_inode *ni, struct ntfs_sb_info *sbi,
+		    const struct ATTR_FILE_NAME *fname,
+		    const struct NTFS_DUP_INFO *dup, int sync)
+{
+	int err, diff;
+	struct NTFS_DE *e = NULL;
+	struct ATTR_FILE_NAME *e_fname;
+	struct ntfs_fnd *fnd;
+	struct INDEX_ROOT *root;
+	struct mft_inode *mi;
+	struct ntfs_index *indx = &ni->dir;
+
+	fnd = fnd_get();
+	if (!fnd)
+		return -ENOMEM;
+
+	root = indx_get_root(indx, ni, NULL, &mi);
+	if (!root) {
+		err = -EINVAL;
+		goto out;
+	}
+
+	/* Find entry in directory. */
+	err = indx_find(indx, ni, root, fname, fname_full_size(fname), sbi,
+			&diff, &e, fnd);
+	if (err)
+		goto out;
+
+	if (!e) {
+		err = -EINVAL;
+		goto out;
+	}
+
+	if (diff) {
+		err = -EINVAL;
+		goto out;
+	}
+
+	e_fname = (struct ATTR_FILE_NAME *)(e + 1);
+
+	if (!memcmp(&e_fname->dup, dup, sizeof(*dup))) {
+		/*
+		 * Nothing to update in index! Try to avoid this call.
+		 */
+		goto out;
+	}
+
+	memcpy(&e_fname->dup, dup, sizeof(*dup));
+
+	if (fnd->level) {
+		/* Directory entry in index. */
+		err = indx_write(indx, ni, fnd->nodes[fnd->level - 1], sync);
+	} else {
+		/* Directory entry in directory MFT record. */
+		mi->dirty = true;
+		if (sync)
+			err = mi_write(mi, 1);
+		else
+			mark_inode_dirty(&ni->vfs_inode);
+	}
+
+out:
+	fnd_put(fnd);
+	return err;
+}
diff --git a/fs/ntfs3/inode.c b/fs/ntfs3/inode.c
new file mode 100644
index 000000000000..db2a5a4c38e4
--- /dev/null
+++ b/fs/ntfs3/inode.c
@@ -0,0 +1,1957 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ *
+ * Copyright (C) 2019-2021 Paragon Software GmbH, All rights reserved.
+ *
+ */
+
+#include <linux/blkdev.h>
+#include <linux/buffer_head.h>
+#include <linux/fs.h>
+#include <linux/iversion.h>
+#include <linux/mpage.h>
+#include <linux/namei.h>
+#include <linux/nls.h>
+#include <linux/uio.h>
+#include <linux/writeback.h>
+
+#include "debug.h"
+#include "ntfs.h"
+#include "ntfs_fs.h"
+
+/*
+ * ntfs_read_mft - Read record and parses MFT.
+ */
+static struct inode *ntfs_read_mft(struct inode *inode,
+				   const struct cpu_str *name,
+				   const struct MFT_REF *ref)
+{
+	int err = 0;
+	struct ntfs_inode *ni = ntfs_i(inode);
+	struct super_block *sb = inode->i_sb;
+	struct ntfs_sb_info *sbi = sb->s_fs_info;
+	mode_t mode = 0;
+	struct ATTR_STD_INFO5 *std5 = NULL;
+	struct ATTR_LIST_ENTRY *le;
+	struct ATTRIB *attr;
+	bool is_match = false;
+	bool is_root = false;
+	bool is_dir;
+	unsigned long ino = inode->i_ino;
+	u32 rp_fa = 0, asize, t32;
+	u16 roff, rsize, names = 0;
+	const struct ATTR_FILE_NAME *fname = NULL;
+	const struct INDEX_ROOT *root;
+	struct REPARSE_DATA_BUFFER rp; // 0x18 bytes
+	u64 t64;
+	struct MFT_REC *rec;
+	struct runs_tree *run;
+
+	inode->i_op = NULL;
+	/* Setup 'uid' and 'gid' */
+	inode->i_uid = sbi->options.fs_uid;
+	inode->i_gid = sbi->options.fs_gid;
+
+	err = mi_init(&ni->mi, sbi, ino);
+	if (err)
+		goto out;
+
+	if (!sbi->mft.ni && ino == MFT_REC_MFT && !sb->s_root) {
+		t64 = sbi->mft.lbo >> sbi->cluster_bits;
+		t32 = bytes_to_cluster(sbi, MFT_REC_VOL * sbi->record_size);
+		sbi->mft.ni = ni;
+		init_rwsem(&ni->file.run_lock);
+
+		if (!run_add_entry(&ni->file.run, 0, t64, t32, true)) {
+			err = -ENOMEM;
+			goto out;
+		}
+	}
+
+	err = mi_read(&ni->mi, ino == MFT_REC_MFT);
+
+	if (err)
+		goto out;
+
+	rec = ni->mi.mrec;
+
+	if (sbi->flags & NTFS_FLAGS_LOG_REPLAYING) {
+		;
+	} else if (ref->seq != rec->seq) {
+		err = -EINVAL;
+		ntfs_err(sb, "MFT: r=%lx, expect seq=%x instead of %x!", ino,
+			 le16_to_cpu(ref->seq), le16_to_cpu(rec->seq));
+		goto out;
+	} else if (!is_rec_inuse(rec)) {
+		err = -EINVAL;
+		ntfs_err(sb, "Inode r=%x is not in use!", (u32)ino);
+		goto out;
+	}
+
+	if (le32_to_cpu(rec->total) != sbi->record_size) {
+		/* Bad inode? */
+		err = -EINVAL;
+		goto out;
+	}
+
+	if (!is_rec_base(rec))
+		goto Ok;
+
+	/* Record should contain $I30 root. */
+	is_dir = rec->flags & RECORD_FLAG_DIR;
+
+	inode->i_generation = le16_to_cpu(rec->seq);
+
+	/* Enumerate all struct Attributes MFT. */
+	le = NULL;
+	attr = NULL;
+
+	/*
+	 * To reduce tab pressure use goto instead of
+	 * while( (attr = ni_enum_attr_ex(ni, attr, &le, NULL) ))
+	 */
+next_attr:
+	run = NULL;
+	err = -EINVAL;
+	attr = ni_enum_attr_ex(ni, attr, &le, NULL);
+	if (!attr)
+		goto end_enum;
+
+	if (le && le->vcn) {
+		/* This is non primary attribute segment. Ignore if not MFT. */
+		if (ino != MFT_REC_MFT || attr->type != ATTR_DATA)
+			goto next_attr;
+
+		run = &ni->file.run;
+		asize = le32_to_cpu(attr->size);
+		goto attr_unpack_run;
+	}
+
+	roff = attr->non_res ? 0 : le16_to_cpu(attr->res.data_off);
+	rsize = attr->non_res ? 0 : le32_to_cpu(attr->res.data_size);
+	asize = le32_to_cpu(attr->size);
+
+	switch (attr->type) {
+	case ATTR_STD:
+		if (attr->non_res ||
+		    asize < sizeof(struct ATTR_STD_INFO) + roff ||
+		    rsize < sizeof(struct ATTR_STD_INFO))
+			goto out;
+
+		if (std5)
+			goto next_attr;
+
+		std5 = Add2Ptr(attr, roff);
+
+#ifdef STATX_BTIME
+		nt2kernel(std5->cr_time, &ni->i_crtime);
+#endif
+		nt2kernel(std5->a_time, &inode->i_atime);
+		nt2kernel(std5->c_time, &inode->i_ctime);
+		nt2kernel(std5->m_time, &inode->i_mtime);
+
+		ni->std_fa = std5->fa;
+
+		if (asize >= sizeof(struct ATTR_STD_INFO5) + roff &&
+		    rsize >= sizeof(struct ATTR_STD_INFO5))
+			ni->std_security_id = std5->security_id;
+		goto next_attr;
+
+	case ATTR_LIST:
+		if (attr->name_len || le || ino == MFT_REC_LOG)
+			goto out;
+
+		err = ntfs_load_attr_list(ni, attr);
+		if (err)
+			goto out;
+
+		le = NULL;
+		attr = NULL;
+		goto next_attr;
+
+	case ATTR_NAME:
+		if (attr->non_res || asize < SIZEOF_ATTRIBUTE_FILENAME + roff ||
+		    rsize < SIZEOF_ATTRIBUTE_FILENAME)
+			goto out;
+
+		fname = Add2Ptr(attr, roff);
+		if (fname->type == FILE_NAME_DOS)
+			goto next_attr;
+
+		names += 1;
+		if (name && name->len == fname->name_len &&
+		    !ntfs_cmp_names_cpu(name, (struct le_str *)&fname->name_len,
+					NULL, false))
+			is_match = true;
+
+		goto next_attr;
+
+	case ATTR_DATA:
+		if (is_dir) {
+			/* Ignore data attribute in dir record. */
+			goto next_attr;
+		}
+
+		if (ino == MFT_REC_BADCLUST && !attr->non_res)
+			goto next_attr;
+
+		if (attr->name_len &&
+		    ((ino != MFT_REC_BADCLUST || !attr->non_res ||
+		      attr->name_len != ARRAY_SIZE(BAD_NAME) ||
+		      memcmp(attr_name(attr), BAD_NAME, sizeof(BAD_NAME))) &&
+		     (ino != MFT_REC_SECURE || !attr->non_res ||
+		      attr->name_len != ARRAY_SIZE(SDS_NAME) ||
+		      memcmp(attr_name(attr), SDS_NAME, sizeof(SDS_NAME))))) {
+			/* File contains stream attribute. Ignore it. */
+			goto next_attr;
+		}
+
+		if (is_attr_sparsed(attr))
+			ni->std_fa |= FILE_ATTRIBUTE_SPARSE_FILE;
+		else
+			ni->std_fa &= ~FILE_ATTRIBUTE_SPARSE_FILE;
+
+		if (is_attr_compressed(attr))
+			ni->std_fa |= FILE_ATTRIBUTE_COMPRESSED;
+		else
+			ni->std_fa &= ~FILE_ATTRIBUTE_COMPRESSED;
+
+		if (is_attr_encrypted(attr))
+			ni->std_fa |= FILE_ATTRIBUTE_ENCRYPTED;
+		else
+			ni->std_fa &= ~FILE_ATTRIBUTE_ENCRYPTED;
+
+		if (!attr->non_res) {
+			ni->i_valid = inode->i_size = rsize;
+			inode_set_bytes(inode, rsize);
+			t32 = asize;
+		} else {
+			t32 = le16_to_cpu(attr->nres.run_off);
+		}
+
+		mode = S_IFREG | (0777 & sbi->options.fs_fmask_inv);
+
+		if (!attr->non_res) {
+			ni->ni_flags |= NI_FLAG_RESIDENT;
+			goto next_attr;
+		}
+
+		inode_set_bytes(inode, attr_ondisk_size(attr));
+
+		ni->i_valid = le64_to_cpu(attr->nres.valid_size);
+		inode->i_size = le64_to_cpu(attr->nres.data_size);
+		if (!attr->nres.alloc_size)
+			goto next_attr;
+
+		run = ino == MFT_REC_BITMAP ? &sbi->used.bitmap.run
+					    : &ni->file.run;
+		break;
+
+	case ATTR_ROOT:
+		if (attr->non_res)
+			goto out;
+
+		root = Add2Ptr(attr, roff);
+		is_root = true;
+
+		if (attr->name_len != ARRAY_SIZE(I30_NAME) ||
+		    memcmp(attr_name(attr), I30_NAME, sizeof(I30_NAME)))
+			goto next_attr;
+
+		if (root->type != ATTR_NAME ||
+		    root->rule != NTFS_COLLATION_TYPE_FILENAME)
+			goto out;
+
+		if (!is_dir)
+			goto next_attr;
+
+		ni->ni_flags |= NI_FLAG_DIR;
+
+		err = indx_init(&ni->dir, sbi, attr, INDEX_MUTEX_I30);
+		if (err)
+			goto out;
+
+		mode = sb->s_root
+			       ? (S_IFDIR | (0777 & sbi->options.fs_dmask_inv))
+			       : (S_IFDIR | 0777);
+		goto next_attr;
+
+	case ATTR_ALLOC:
+		if (!is_root || attr->name_len != ARRAY_SIZE(I30_NAME) ||
+		    memcmp(attr_name(attr), I30_NAME, sizeof(I30_NAME)))
+			goto next_attr;
+
+		inode->i_size = le64_to_cpu(attr->nres.data_size);
+		ni->i_valid = le64_to_cpu(attr->nres.valid_size);
+		inode_set_bytes(inode, le64_to_cpu(attr->nres.alloc_size));
+
+		run = &ni->dir.alloc_run;
+		break;
+
+	case ATTR_BITMAP:
+		if (ino == MFT_REC_MFT) {
+			if (!attr->non_res)
+				goto out;
+#ifndef CONFIG_NTFS3_64BIT_CLUSTER
+			/* 0x20000000 = 2^32 / 8 */
+			if (le64_to_cpu(attr->nres.alloc_size) >= 0x20000000)
+				goto out;
+#endif
+			run = &sbi->mft.bitmap.run;
+			break;
+		} else if (is_dir && attr->name_len == ARRAY_SIZE(I30_NAME) &&
+			   !memcmp(attr_name(attr), I30_NAME,
+				   sizeof(I30_NAME)) &&
+			   attr->non_res) {
+			run = &ni->dir.bitmap_run;
+			break;
+		}
+		goto next_attr;
+
+	case ATTR_REPARSE:
+		if (attr->name_len)
+			goto next_attr;
+
+		rp_fa = ni_parse_reparse(ni, attr, &rp);
+		switch (rp_fa) {
+		case REPARSE_LINK:
+			if (!attr->non_res) {
+				inode->i_size = rsize;
+				inode_set_bytes(inode, rsize);
+				t32 = asize;
+			} else {
+				inode->i_size =
+					le64_to_cpu(attr->nres.data_size);
+				t32 = le16_to_cpu(attr->nres.run_off);
+			}
+
+			/* Looks like normal symlink. */
+			ni->i_valid = inode->i_size;
+
+			/* Clear directory bit. */
+			if (ni->ni_flags & NI_FLAG_DIR) {
+				indx_clear(&ni->dir);
+				memset(&ni->dir, 0, sizeof(ni->dir));
+				ni->ni_flags &= ~NI_FLAG_DIR;
+			} else {
+				run_close(&ni->file.run);
+			}
+			mode = S_IFLNK | 0777;
+			is_dir = false;
+			if (attr->non_res) {
+				run = &ni->file.run;
+				goto attr_unpack_run; // Double break.
+			}
+			break;
+
+		case REPARSE_COMPRESSED:
+			break;
+
+		case REPARSE_DEDUPLICATED:
+			break;
+		}
+		goto next_attr;
+
+	case ATTR_EA_INFO:
+		if (!attr->name_len &&
+		    resident_data_ex(attr, sizeof(struct EA_INFO))) {
+			ni->ni_flags |= NI_FLAG_EA;
+			/*
+			 * ntfs_get_wsl_perm updates inode->i_uid, inode->i_gid, inode->i_mode
+			 */
+			inode->i_mode = mode;
+			ntfs_get_wsl_perm(inode);
+			mode = inode->i_mode;
+		}
+		goto next_attr;
+
+	default:
+		goto next_attr;
+	}
+
+attr_unpack_run:
+	roff = le16_to_cpu(attr->nres.run_off);
+
+	t64 = le64_to_cpu(attr->nres.svcn);
+	err = run_unpack_ex(run, sbi, ino, t64, le64_to_cpu(attr->nres.evcn),
+			    t64, Add2Ptr(attr, roff), asize - roff);
+	if (err < 0)
+		goto out;
+	err = 0;
+	goto next_attr;
+
+end_enum:
+
+	if (!std5)
+		goto out;
+
+	if (!is_match && name) {
+		/* Reuse rec as buffer for ascii name. */
+		err = -ENOENT;
+		goto out;
+	}
+
+	if (std5->fa & FILE_ATTRIBUTE_READONLY)
+		mode &= ~0222;
+
+	if (!names) {
+		err = -EINVAL;
+		goto out;
+	}
+
+	if (names != le16_to_cpu(rec->hard_links)) {
+		/* Correct minor error on the fly. Do not mark inode as dirty. */
+		rec->hard_links = cpu_to_le16(names);
+		ni->mi.dirty = true;
+	}
+
+	set_nlink(inode, names);
+
+	if (S_ISDIR(mode)) {
+		ni->std_fa |= FILE_ATTRIBUTE_DIRECTORY;
+
+		/*
+		 * Dot and dot-dot should be included in count but was not
+		 * included in enumeration.
+		 * Usually a hard links to directories are disabled.
+		 */
+		inode->i_op = &ntfs_dir_inode_operations;
+		inode->i_fop = &ntfs_dir_operations;
+		ni->i_valid = 0;
+	} else if (S_ISLNK(mode)) {
+		ni->std_fa &= ~FILE_ATTRIBUTE_DIRECTORY;
+		inode->i_op = &ntfs_link_inode_operations;
+		inode->i_fop = NULL;
+		inode_nohighmem(inode); // ??
+	} else if (S_ISREG(mode)) {
+		ni->std_fa &= ~FILE_ATTRIBUTE_DIRECTORY;
+		inode->i_op = &ntfs_file_inode_operations;
+		inode->i_fop = &ntfs_file_operations;
+		inode->i_mapping->a_ops =
+			is_compressed(ni) ? &ntfs_aops_cmpr : &ntfs_aops;
+		if (ino != MFT_REC_MFT)
+			init_rwsem(&ni->file.run_lock);
+	} else if (S_ISCHR(mode) || S_ISBLK(mode) || S_ISFIFO(mode) ||
+		   S_ISSOCK(mode)) {
+		inode->i_op = &ntfs_special_inode_operations;
+		init_special_inode(inode, mode, inode->i_rdev);
+	} else if (fname && fname->home.low == cpu_to_le32(MFT_REC_EXTEND) &&
+		   fname->home.seq == cpu_to_le16(MFT_REC_EXTEND)) {
+		/* Records in $Extend are not a files or general directories. */
+	} else {
+		err = -EINVAL;
+		goto out;
+	}
+
+	if ((sbi->options.sys_immutable &&
+	     (std5->fa & FILE_ATTRIBUTE_SYSTEM)) &&
+	    !S_ISFIFO(mode) && !S_ISSOCK(mode) && !S_ISLNK(mode)) {
+		inode->i_flags |= S_IMMUTABLE;
+	} else {
+		inode->i_flags &= ~S_IMMUTABLE;
+	}
+
+	inode->i_mode = mode;
+	if (!(ni->ni_flags & NI_FLAG_EA)) {
+		/* If no xattr then no security (stored in xattr). */
+		inode->i_flags |= S_NOSEC;
+	}
+
+Ok:
+	if (ino == MFT_REC_MFT && !sb->s_root)
+		sbi->mft.ni = NULL;
+
+	unlock_new_inode(inode);
+
+	return inode;
+
+out:
+	if (ino == MFT_REC_MFT && !sb->s_root)
+		sbi->mft.ni = NULL;
+
+	iget_failed(inode);
+	return ERR_PTR(err);
+}
+
+/*
+ * ntfs_test_inode
+ *
+ * Return: 1 if match.
+ */
+static int ntfs_test_inode(struct inode *inode, void *data)
+{
+	struct MFT_REF *ref = data;
+
+	return ino_get(ref) == inode->i_ino;
+}
+
+static int ntfs_set_inode(struct inode *inode, void *data)
+{
+	const struct MFT_REF *ref = data;
+
+	inode->i_ino = ino_get(ref);
+	return 0;
+}
+
+struct inode *ntfs_iget5(struct super_block *sb, const struct MFT_REF *ref,
+			 const struct cpu_str *name)
+{
+	struct inode *inode;
+
+	inode = iget5_locked(sb, ino_get(ref), ntfs_test_inode, ntfs_set_inode,
+			     (void *)ref);
+	if (unlikely(!inode))
+		return ERR_PTR(-ENOMEM);
+
+	/* If this is a freshly allocated inode, need to read it now. */
+	if (inode->i_state & I_NEW)
+		inode = ntfs_read_mft(inode, name, ref);
+	else if (ref->seq != ntfs_i(inode)->mi.mrec->seq) {
+		/* Inode overlaps? */
+		make_bad_inode(inode);
+	}
+
+	return inode;
+}
+
+enum get_block_ctx {
+	GET_BLOCK_GENERAL = 0,
+	GET_BLOCK_WRITE_BEGIN = 1,
+	GET_BLOCK_DIRECT_IO_R = 2,
+	GET_BLOCK_DIRECT_IO_W = 3,
+	GET_BLOCK_BMAP = 4,
+};
+
+static noinline int ntfs_get_block_vbo(struct inode *inode, u64 vbo,
+				       struct buffer_head *bh, int create,
+				       enum get_block_ctx ctx)
+{
+	struct super_block *sb = inode->i_sb;
+	struct ntfs_sb_info *sbi = sb->s_fs_info;
+	struct ntfs_inode *ni = ntfs_i(inode);
+	struct page *page = bh->b_page;
+	u8 cluster_bits = sbi->cluster_bits;
+	u32 block_size = sb->s_blocksize;
+	u64 bytes, lbo, valid;
+	u32 off;
+	int err;
+	CLST vcn, lcn, len;
+	bool new;
+
+	/* Clear previous state. */
+	clear_buffer_new(bh);
+	clear_buffer_uptodate(bh);
+
+	/* Direct write uses 'create=0'. */
+	if (!create && vbo >= ni->i_valid) {
+		/* Out of valid. */
+		return 0;
+	}
+
+	if (vbo >= inode->i_size) {
+		/* Out of size. */
+		return 0;
+	}
+
+	if (is_resident(ni)) {
+		ni_lock(ni);
+		err = attr_data_read_resident(ni, page);
+		ni_unlock(ni);
+
+		if (!err)
+			set_buffer_uptodate(bh);
+		bh->b_size = block_size;
+		return err;
+	}
+
+	vcn = vbo >> cluster_bits;
+	off = vbo & sbi->cluster_mask;
+	new = false;
+
+	err = attr_data_get_block(ni, vcn, 1, &lcn, &len, create ? &new : NULL);
+	if (err)
+		goto out;
+
+	if (!len)
+		return 0;
+
+	bytes = ((u64)len << cluster_bits) - off;
+
+	if (lcn == SPARSE_LCN) {
+		if (!create) {
+			if (bh->b_size > bytes)
+				bh->b_size = bytes;
+			return 0;
+		}
+		WARN_ON(1);
+	}
+
+	if (new) {
+		set_buffer_new(bh);
+		if ((len << cluster_bits) > block_size)
+			ntfs_sparse_cluster(inode, page, vcn, len);
+	}
+
+	lbo = ((u64)lcn << cluster_bits) + off;
+
+	set_buffer_mapped(bh);
+	bh->b_bdev = sb->s_bdev;
+	bh->b_blocknr = lbo >> sb->s_blocksize_bits;
+
+	valid = ni->i_valid;
+
+	if (ctx == GET_BLOCK_DIRECT_IO_W) {
+		/* ntfs_direct_IO will update ni->i_valid. */
+		if (vbo >= valid)
+			set_buffer_new(bh);
+	} else if (create) {
+		/* Normal write. */
+		if (bytes > bh->b_size)
+			bytes = bh->b_size;
+
+		if (vbo >= valid)
+			set_buffer_new(bh);
+
+		if (vbo + bytes > valid) {
+			ni->i_valid = vbo + bytes;
+			mark_inode_dirty(inode);
+		}
+	} else if (vbo >= valid) {
+		/* Read out of valid data. */
+		/* Should never be here 'cause already checked. */
+		clear_buffer_mapped(bh);
+	} else if (vbo + bytes <= valid) {
+		/* Normal read. */
+	} else if (vbo + block_size <= valid) {
+		/* Normal short read. */
+		bytes = block_size;
+	} else {
+		/*
+		 * Read across valid size: vbo < valid && valid < vbo + block_size
+		 */
+		bytes = block_size;
+
+		if (page) {
+			u32 voff = valid - vbo;
+
+			bh->b_size = block_size;
+			off = vbo & (PAGE_SIZE - 1);
+			set_bh_page(bh, page, off);
+			ll_rw_block(REQ_OP_READ, 0, 1, &bh);
+			wait_on_buffer(bh);
+			if (!buffer_uptodate(bh)) {
+				err = -EIO;
+				goto out;
+			}
+			zero_user_segment(page, off + voff, off + block_size);
+		}
+	}
+
+	if (bh->b_size > bytes)
+		bh->b_size = bytes;
+
+#ifndef __LP64__
+	if (ctx == GET_BLOCK_DIRECT_IO_W || ctx == GET_BLOCK_DIRECT_IO_R) {
+		static_assert(sizeof(size_t) < sizeof(loff_t));
+		if (bytes > 0x40000000u)
+			bh->b_size = 0x40000000u;
+	}
+#endif
+
+	return 0;
+
+out:
+	return err;
+}
+
+int ntfs_get_block(struct inode *inode, sector_t vbn,
+		   struct buffer_head *bh_result, int create)
+{
+	return ntfs_get_block_vbo(inode, (u64)vbn << inode->i_blkbits,
+				  bh_result, create, GET_BLOCK_GENERAL);
+}
+
+static int ntfs_get_block_bmap(struct inode *inode, sector_t vsn,
+			       struct buffer_head *bh_result, int create)
+{
+	return ntfs_get_block_vbo(inode,
+				  (u64)vsn << inode->i_sb->s_blocksize_bits,
+				  bh_result, create, GET_BLOCK_BMAP);
+}
+
+static sector_t ntfs_bmap(struct address_space *mapping, sector_t block)
+{
+	return generic_block_bmap(mapping, block, ntfs_get_block_bmap);
+}
+
+static int ntfs_readpage(struct file *file, struct page *page)
+{
+	int err;
+	struct address_space *mapping = page->mapping;
+	struct inode *inode = mapping->host;
+	struct ntfs_inode *ni = ntfs_i(inode);
+
+	if (is_resident(ni)) {
+		ni_lock(ni);
+		err = attr_data_read_resident(ni, page);
+		ni_unlock(ni);
+		if (err != E_NTFS_NONRESIDENT) {
+			unlock_page(page);
+			return err;
+		}
+	}
+
+	if (is_compressed(ni)) {
+		ni_lock(ni);
+		err = ni_readpage_cmpr(ni, page);
+		ni_unlock(ni);
+		return err;
+	}
+
+	/* Normal + sparse files. */
+	return mpage_readpage(page, ntfs_get_block);
+}
+
+static void ntfs_readahead(struct readahead_control *rac)
+{
+	struct address_space *mapping = rac->mapping;
+	struct inode *inode = mapping->host;
+	struct ntfs_inode *ni = ntfs_i(inode);
+	u64 valid;
+	loff_t pos;
+
+	if (is_resident(ni)) {
+		/* No readahead for resident. */
+		return;
+	}
+
+	if (is_compressed(ni)) {
+		/* No readahead for compressed. */
+		return;
+	}
+
+	valid = ni->i_valid;
+	pos = readahead_pos(rac);
+
+	if (valid < i_size_read(inode) && pos <= valid &&
+	    valid < pos + readahead_length(rac)) {
+		/* Range cross 'valid'. Read it page by page. */
+		return;
+	}
+
+	mpage_readahead(rac, ntfs_get_block);
+}
+
+static int ntfs_get_block_direct_IO_R(struct inode *inode, sector_t iblock,
+				      struct buffer_head *bh_result, int create)
+{
+	return ntfs_get_block_vbo(inode, (u64)iblock << inode->i_blkbits,
+				  bh_result, create, GET_BLOCK_DIRECT_IO_R);
+}
+
+static int ntfs_get_block_direct_IO_W(struct inode *inode, sector_t iblock,
+				      struct buffer_head *bh_result, int create)
+{
+	return ntfs_get_block_vbo(inode, (u64)iblock << inode->i_blkbits,
+				  bh_result, create, GET_BLOCK_DIRECT_IO_W);
+}
+
+static ssize_t ntfs_direct_IO(struct kiocb *iocb, struct iov_iter *iter)
+{
+	struct file *file = iocb->ki_filp;
+	struct address_space *mapping = file->f_mapping;
+	struct inode *inode = mapping->host;
+	struct ntfs_inode *ni = ntfs_i(inode);
+	loff_t vbo = iocb->ki_pos;
+	loff_t end;
+	int wr = iov_iter_rw(iter) & WRITE;
+	loff_t valid;
+	ssize_t ret;
+
+	if (is_resident(ni)) {
+		/* Switch to buffered write. */
+		ret = 0;
+		goto out;
+	}
+
+	ret = blockdev_direct_IO(iocb, inode, iter,
+				 wr ? ntfs_get_block_direct_IO_W
+				    : ntfs_get_block_direct_IO_R);
+
+	if (ret <= 0)
+		goto out;
+
+	end = vbo + ret;
+	valid = ni->i_valid;
+	if (wr) {
+		if (end > valid && !S_ISBLK(inode->i_mode)) {
+			ni->i_valid = end;
+			mark_inode_dirty(inode);
+		}
+	} else if (vbo < valid && valid < end) {
+		/* Fix page. */
+		iov_iter_revert(iter, end - valid);
+		iov_iter_zero(end - valid, iter);
+	}
+
+out:
+	return ret;
+}
+
+int ntfs_set_size(struct inode *inode, u64 new_size)
+{
+	struct super_block *sb = inode->i_sb;
+	struct ntfs_sb_info *sbi = sb->s_fs_info;
+	struct ntfs_inode *ni = ntfs_i(inode);
+	int err;
+
+	/* Check for maximum file size. */
+	if (is_sparsed(ni) || is_compressed(ni)) {
+		if (new_size > sbi->maxbytes_sparse) {
+			err = -EFBIG;
+			goto out;
+		}
+	} else if (new_size > sbi->maxbytes) {
+		err = -EFBIG;
+		goto out;
+	}
+
+	ni_lock(ni);
+	down_write(&ni->file.run_lock);
+
+	err = attr_set_size(ni, ATTR_DATA, NULL, 0, &ni->file.run, new_size,
+			    &ni->i_valid, true, NULL);
+
+	up_write(&ni->file.run_lock);
+	ni_unlock(ni);
+
+	mark_inode_dirty(inode);
+
+out:
+	return err;
+}
+
+static int ntfs_writepage(struct page *page, struct writeback_control *wbc)
+{
+	struct address_space *mapping = page->mapping;
+	struct inode *inode = mapping->host;
+	struct ntfs_inode *ni = ntfs_i(inode);
+	int err;
+
+	if (is_resident(ni)) {
+		ni_lock(ni);
+		err = attr_data_write_resident(ni, page);
+		ni_unlock(ni);
+		if (err != E_NTFS_NONRESIDENT) {
+			unlock_page(page);
+			return err;
+		}
+	}
+
+	return block_write_full_page(page, ntfs_get_block, wbc);
+}
+
+static int ntfs_writepages(struct address_space *mapping,
+			   struct writeback_control *wbc)
+{
+	struct inode *inode = mapping->host;
+	struct ntfs_inode *ni = ntfs_i(inode);
+	/* Redirect call to 'ntfs_writepage' for resident files. */
+	get_block_t *get_block = is_resident(ni) ? NULL : &ntfs_get_block;
+
+	return mpage_writepages(mapping, wbc, get_block);
+}
+
+static int ntfs_get_block_write_begin(struct inode *inode, sector_t vbn,
+				      struct buffer_head *bh_result, int create)
+{
+	return ntfs_get_block_vbo(inode, (u64)vbn << inode->i_blkbits,
+				  bh_result, create, GET_BLOCK_WRITE_BEGIN);
+}
+
+static int ntfs_write_begin(struct file *file, struct address_space *mapping,
+			    loff_t pos, u32 len, u32 flags, struct page **pagep,
+			    void **fsdata)
+{
+	int err;
+	struct inode *inode = mapping->host;
+	struct ntfs_inode *ni = ntfs_i(inode);
+
+	*pagep = NULL;
+	if (is_resident(ni)) {
+		struct page *page = grab_cache_page_write_begin(
+			mapping, pos >> PAGE_SHIFT, flags);
+
+		if (!page) {
+			err = -ENOMEM;
+			goto out;
+		}
+
+		ni_lock(ni);
+		err = attr_data_read_resident(ni, page);
+		ni_unlock(ni);
+
+		if (!err) {
+			*pagep = page;
+			goto out;
+		}
+		unlock_page(page);
+		put_page(page);
+
+		if (err != E_NTFS_NONRESIDENT)
+			goto out;
+	}
+
+	err = block_write_begin(mapping, pos, len, flags, pagep,
+				ntfs_get_block_write_begin);
+
+out:
+	return err;
+}
+
+/*
+ * ntfs_write_end - Address_space_operations::write_end.
+ */
+static int ntfs_write_end(struct file *file, struct address_space *mapping,
+			  loff_t pos, u32 len, u32 copied, struct page *page,
+			  void *fsdata)
+
+{
+	struct inode *inode = mapping->host;
+	struct ntfs_inode *ni = ntfs_i(inode);
+	u64 valid = ni->i_valid;
+	bool dirty = false;
+	int err;
+
+	if (is_resident(ni)) {
+		ni_lock(ni);
+		err = attr_data_write_resident(ni, page);
+		ni_unlock(ni);
+		if (!err) {
+			dirty = true;
+			/* Clear any buffers in page. */
+			if (page_has_buffers(page)) {
+				struct buffer_head *head, *bh;
+
+				bh = head = page_buffers(page);
+				do {
+					clear_buffer_dirty(bh);
+					clear_buffer_mapped(bh);
+					set_buffer_uptodate(bh);
+				} while (head != (bh = bh->b_this_page));
+			}
+			SetPageUptodate(page);
+			err = copied;
+		}
+		unlock_page(page);
+		put_page(page);
+	} else {
+		err = generic_write_end(file, mapping, pos, len, copied, page,
+					fsdata);
+	}
+
+	if (err >= 0) {
+		if (!(ni->std_fa & FILE_ATTRIBUTE_ARCHIVE)) {
+			inode->i_ctime = inode->i_mtime = current_time(inode);
+			ni->std_fa |= FILE_ATTRIBUTE_ARCHIVE;
+			dirty = true;
+		}
+
+		if (valid != ni->i_valid) {
+			/* ni->i_valid is changed in ntfs_get_block_vbo. */
+			dirty = true;
+		}
+
+		if (dirty)
+			mark_inode_dirty(inode);
+	}
+
+	return err;
+}
+
+int reset_log_file(struct inode *inode)
+{
+	int err;
+	loff_t pos = 0;
+	u32 log_size = inode->i_size;
+	struct address_space *mapping = inode->i_mapping;
+
+	for (;;) {
+		u32 len;
+		void *kaddr;
+		struct page *page;
+
+		len = pos + PAGE_SIZE > log_size ? (log_size - pos) : PAGE_SIZE;
+
+		err = block_write_begin(mapping, pos, len, 0, &page,
+					ntfs_get_block_write_begin);
+		if (err)
+			goto out;
+
+		kaddr = kmap_atomic(page);
+		memset(kaddr, -1, len);
+		kunmap_atomic(kaddr);
+		flush_dcache_page(page);
+
+		err = block_write_end(NULL, mapping, pos, len, len, page, NULL);
+		if (err < 0)
+			goto out;
+		pos += len;
+
+		if (pos >= log_size)
+			break;
+		balance_dirty_pages_ratelimited(mapping);
+	}
+out:
+	mark_inode_dirty_sync(inode);
+
+	return err;
+}
+
+int ntfs3_write_inode(struct inode *inode, struct writeback_control *wbc)
+{
+	return _ni_write_inode(inode, wbc->sync_mode == WB_SYNC_ALL);
+}
+
+int ntfs_sync_inode(struct inode *inode)
+{
+	return _ni_write_inode(inode, 1);
+}
+
+/*
+ * writeback_inode - Helper function for ntfs_flush_inodes().
+ *
+ * This writes both the inode and the file data blocks, waiting
+ * for in flight data blocks before the start of the call.  It
+ * does not wait for any io started during the call.
+ */
+static int writeback_inode(struct inode *inode)
+{
+	int ret = sync_inode_metadata(inode, 0);
+
+	if (!ret)
+		ret = filemap_fdatawrite(inode->i_mapping);
+	return ret;
+}
+
+/*
+ * ntfs_flush_inodes
+ *
+ * Write data and metadata corresponding to i1 and i2.  The io is
+ * started but we do not wait for any of it to finish.
+ *
+ * filemap_flush() is used for the block device, so if there is a dirty
+ * page for a block already in flight, we will not wait and start the
+ * io over again.
+ */
+int ntfs_flush_inodes(struct super_block *sb, struct inode *i1,
+		      struct inode *i2)
+{
+	int ret = 0;
+
+	if (i1)
+		ret = writeback_inode(i1);
+	if (!ret && i2)
+		ret = writeback_inode(i2);
+	if (!ret)
+		ret = filemap_flush(sb->s_bdev->bd_inode->i_mapping);
+	return ret;
+}
+
+int inode_write_data(struct inode *inode, const void *data, size_t bytes)
+{
+	pgoff_t idx;
+
+	/* Write non resident data. */
+	for (idx = 0; bytes; idx++) {
+		size_t op = bytes > PAGE_SIZE ? PAGE_SIZE : bytes;
+		struct page *page = ntfs_map_page(inode->i_mapping, idx);
+
+		if (IS_ERR(page))
+			return PTR_ERR(page);
+
+		lock_page(page);
+		WARN_ON(!PageUptodate(page));
+		ClearPageUptodate(page);
+
+		memcpy(page_address(page), data, op);
+
+		flush_dcache_page(page);
+		SetPageUptodate(page);
+		unlock_page(page);
+
+		ntfs_unmap_page(page);
+
+		bytes -= op;
+		data = Add2Ptr(data, PAGE_SIZE);
+	}
+	return 0;
+}
+
+/*
+ * ntfs_reparse_bytes
+ *
+ * Number of bytes for REPARSE_DATA_BUFFER(IO_REPARSE_TAG_SYMLINK)
+ * for unicode string of @uni_len length.
+ */
+static inline u32 ntfs_reparse_bytes(u32 uni_len)
+{
+	/* Header + unicode string + decorated unicode string. */
+	return sizeof(short) * (2 * uni_len + 4) +
+	       offsetof(struct REPARSE_DATA_BUFFER,
+			SymbolicLinkReparseBuffer.PathBuffer);
+}
+
+static struct REPARSE_DATA_BUFFER *
+ntfs_create_reparse_buffer(struct ntfs_sb_info *sbi, const char *symname,
+			   u32 size, u16 *nsize)
+{
+	int i, err;
+	struct REPARSE_DATA_BUFFER *rp;
+	__le16 *rp_name;
+	typeof(rp->SymbolicLinkReparseBuffer) *rs;
+
+	rp = kzalloc(ntfs_reparse_bytes(2 * size + 2), GFP_NOFS);
+	if (!rp)
+		return ERR_PTR(-ENOMEM);
+
+	rs = &rp->SymbolicLinkReparseBuffer;
+	rp_name = rs->PathBuffer;
+
+	/* Convert link name to UTF-16. */
+	err = ntfs_nls_to_utf16(sbi, symname, size,
+				(struct cpu_str *)(rp_name - 1), 2 * size,
+				UTF16_LITTLE_ENDIAN);
+	if (err < 0)
+		goto out;
+
+	/* err = the length of unicode name of symlink. */
+	*nsize = ntfs_reparse_bytes(err);
+
+	if (*nsize > sbi->reparse.max_size) {
+		err = -EFBIG;
+		goto out;
+	}
+
+	/* Translate Linux '/' into Windows '\'. */
+	for (i = 0; i < err; i++) {
+		if (rp_name[i] == cpu_to_le16('/'))
+			rp_name[i] = cpu_to_le16('\\');
+	}
+
+	rp->ReparseTag = IO_REPARSE_TAG_SYMLINK;
+	rp->ReparseDataLength =
+		cpu_to_le16(*nsize - offsetof(struct REPARSE_DATA_BUFFER,
+					      SymbolicLinkReparseBuffer));
+
+	/* PrintName + SubstituteName. */
+	rs->SubstituteNameOffset = cpu_to_le16(sizeof(short) * err);
+	rs->SubstituteNameLength = cpu_to_le16(sizeof(short) * err + 8);
+	rs->PrintNameLength = rs->SubstituteNameOffset;
+
+	/*
+	 * TODO: Use relative path if possible to allow Windows to
+	 * parse this path.
+	 * 0-absolute path 1- relative path (SYMLINK_FLAG_RELATIVE).
+	 */
+	rs->Flags = 0;
+
+	memmove(rp_name + err + 4, rp_name, sizeof(short) * err);
+
+	/* Decorate SubstituteName. */
+	rp_name += err;
+	rp_name[0] = cpu_to_le16('\\');
+	rp_name[1] = cpu_to_le16('?');
+	rp_name[2] = cpu_to_le16('?');
+	rp_name[3] = cpu_to_le16('\\');
+
+	return rp;
+out:
+	kfree(rp);
+	return ERR_PTR(err);
+}
+
+struct inode *ntfs_create_inode(struct user_namespace *mnt_userns,
+				struct inode *dir, struct dentry *dentry,
+				const struct cpu_str *uni, umode_t mode,
+				dev_t dev, const char *symname, u32 size,
+				struct ntfs_fnd *fnd)
+{
+	int err;
+	struct super_block *sb = dir->i_sb;
+	struct ntfs_sb_info *sbi = sb->s_fs_info;
+	const struct qstr *name = &dentry->d_name;
+	CLST ino = 0;
+	struct ntfs_inode *dir_ni = ntfs_i(dir);
+	struct ntfs_inode *ni = NULL;
+	struct inode *inode = NULL;
+	struct ATTRIB *attr;
+	struct ATTR_STD_INFO5 *std5;
+	struct ATTR_FILE_NAME *fname;
+	struct MFT_REC *rec;
+	u32 asize, dsize, sd_size;
+	enum FILE_ATTRIBUTE fa;
+	__le32 security_id = SECURITY_ID_INVALID;
+	CLST vcn;
+	const void *sd;
+	u16 t16, nsize = 0, aid = 0;
+	struct INDEX_ROOT *root, *dir_root;
+	struct NTFS_DE *e, *new_de = NULL;
+	struct REPARSE_DATA_BUFFER *rp = NULL;
+	bool rp_inserted = false;
+
+	dir_root = indx_get_root(&dir_ni->dir, dir_ni, NULL, NULL);
+	if (!dir_root)
+		return ERR_PTR(-EINVAL);
+
+	if (S_ISDIR(mode)) {
+		/* Use parent's directory attributes. */
+		fa = dir_ni->std_fa | FILE_ATTRIBUTE_DIRECTORY |
+		     FILE_ATTRIBUTE_ARCHIVE;
+		/*
+		 * By default child directory inherits parent attributes.
+		 * Root directory is hidden + system.
+		 * Make an exception for children in root.
+		 */
+		if (dir->i_ino == MFT_REC_ROOT)
+			fa &= ~(FILE_ATTRIBUTE_HIDDEN | FILE_ATTRIBUTE_SYSTEM);
+	} else if (S_ISLNK(mode)) {
+		/* It is good idea that link should be the same type (file/dir) as target */
+		fa = FILE_ATTRIBUTE_REPARSE_POINT;
+
+		/*
+		 * Linux: there are dir/file/symlink and so on.
+		 * NTFS: symlinks are "dir + reparse" or "file + reparse"
+		 * It is good idea to create:
+		 * dir + reparse if 'symname' points to directory
+		 * or
+		 * file + reparse if 'symname' points to file
+		 * Unfortunately kern_path hangs if symname contains 'dir'.
+		 */
+
+		/*
+		 *	struct path path;
+		 *
+		 *	if (!kern_path(symname, LOOKUP_FOLLOW, &path)){
+		 *		struct inode *target = d_inode(path.dentry);
+		 *
+		 *		if (S_ISDIR(target->i_mode))
+		 *			fa |= FILE_ATTRIBUTE_DIRECTORY;
+		 *		// if ( target->i_sb == sb ){
+		 *		//	use relative path?
+		 *		// }
+		 *		path_put(&path);
+		 *	}
+		 */
+	} else if (S_ISREG(mode)) {
+		if (sbi->options.sparse) {
+			/* Sparsed regular file, cause option 'sparse'. */
+			fa = FILE_ATTRIBUTE_SPARSE_FILE |
+			     FILE_ATTRIBUTE_ARCHIVE;
+		} else if (dir_ni->std_fa & FILE_ATTRIBUTE_COMPRESSED) {
+			/* Compressed regular file, if parent is compressed. */
+			fa = FILE_ATTRIBUTE_COMPRESSED | FILE_ATTRIBUTE_ARCHIVE;
+		} else {
+			/* Regular file, default attributes. */
+			fa = FILE_ATTRIBUTE_ARCHIVE;
+		}
+	} else {
+		fa = FILE_ATTRIBUTE_ARCHIVE;
+	}
+
+	if (!(mode & 0222))
+		fa |= FILE_ATTRIBUTE_READONLY;
+
+	/* Allocate PATH_MAX bytes. */
+	new_de = __getname();
+	if (!new_de) {
+		err = -ENOMEM;
+		goto out1;
+	}
+
+	/* Mark rw ntfs as dirty. it will be cleared at umount. */
+	ntfs_set_state(sbi, NTFS_DIRTY_DIRTY);
+
+	/* Step 1: allocate and fill new mft record. */
+	err = ntfs_look_free_mft(sbi, &ino, false, NULL, NULL);
+	if (err)
+		goto out2;
+
+	ni = ntfs_new_inode(sbi, ino, fa & FILE_ATTRIBUTE_DIRECTORY);
+	if (IS_ERR(ni)) {
+		err = PTR_ERR(ni);
+		ni = NULL;
+		goto out3;
+	}
+	inode = &ni->vfs_inode;
+	inode_init_owner(mnt_userns, inode, dir, mode);
+	mode = inode->i_mode;
+
+	inode->i_atime = inode->i_mtime = inode->i_ctime = ni->i_crtime =
+		current_time(inode);
+
+	rec = ni->mi.mrec;
+	rec->hard_links = cpu_to_le16(1);
+	attr = Add2Ptr(rec, le16_to_cpu(rec->attr_off));
+
+	/* Get default security id. */
+	sd = s_default_security;
+	sd_size = sizeof(s_default_security);
+
+	if (is_ntfs3(sbi)) {
+		security_id = dir_ni->std_security_id;
+		if (le32_to_cpu(security_id) < SECURITY_ID_FIRST) {
+			security_id = sbi->security.def_security_id;
+
+			if (security_id == SECURITY_ID_INVALID &&
+			    !ntfs_insert_security(sbi, sd, sd_size,
+						  &security_id, NULL))
+				sbi->security.def_security_id = security_id;
+		}
+	}
+
+	/* Insert standard info. */
+	std5 = Add2Ptr(attr, SIZEOF_RESIDENT);
+
+	if (security_id == SECURITY_ID_INVALID) {
+		dsize = sizeof(struct ATTR_STD_INFO);
+	} else {
+		dsize = sizeof(struct ATTR_STD_INFO5);
+		std5->security_id = security_id;
+		ni->std_security_id = security_id;
+	}
+	asize = SIZEOF_RESIDENT + dsize;
+
+	attr->type = ATTR_STD;
+	attr->size = cpu_to_le32(asize);
+	attr->id = cpu_to_le16(aid++);
+	attr->res.data_off = SIZEOF_RESIDENT_LE;
+	attr->res.data_size = cpu_to_le32(dsize);
+
+	std5->cr_time = std5->m_time = std5->c_time = std5->a_time =
+		kernel2nt(&inode->i_atime);
+
+	ni->std_fa = fa;
+	std5->fa = fa;
+
+	attr = Add2Ptr(attr, asize);
+
+	/* Insert file name. */
+	err = fill_name_de(sbi, new_de, name, uni);
+	if (err)
+		goto out4;
+
+	mi_get_ref(&ni->mi, &new_de->ref);
+
+	fname = (struct ATTR_FILE_NAME *)(new_de + 1);
+	mi_get_ref(&dir_ni->mi, &fname->home);
+	fname->dup.cr_time = fname->dup.m_time = fname->dup.c_time =
+		fname->dup.a_time = std5->cr_time;
+	fname->dup.alloc_size = fname->dup.data_size = 0;
+	fname->dup.fa = std5->fa;
+	fname->dup.ea_size = fname->dup.reparse = 0;
+
+	dsize = le16_to_cpu(new_de->key_size);
+	asize = ALIGN(SIZEOF_RESIDENT + dsize, 8);
+
+	attr->type = ATTR_NAME;
+	attr->size = cpu_to_le32(asize);
+	attr->res.data_off = SIZEOF_RESIDENT_LE;
+	attr->res.flags = RESIDENT_FLAG_INDEXED;
+	attr->id = cpu_to_le16(aid++);
+	attr->res.data_size = cpu_to_le32(dsize);
+	memcpy(Add2Ptr(attr, SIZEOF_RESIDENT), fname, dsize);
+
+	attr = Add2Ptr(attr, asize);
+
+	if (security_id == SECURITY_ID_INVALID) {
+		/* Insert security attribute. */
+		asize = SIZEOF_RESIDENT + ALIGN(sd_size, 8);
+
+		attr->type = ATTR_SECURE;
+		attr->size = cpu_to_le32(asize);
+		attr->id = cpu_to_le16(aid++);
+		attr->res.data_off = SIZEOF_RESIDENT_LE;
+		attr->res.data_size = cpu_to_le32(sd_size);
+		memcpy(Add2Ptr(attr, SIZEOF_RESIDENT), sd, sd_size);
+
+		attr = Add2Ptr(attr, asize);
+	}
+
+	attr->id = cpu_to_le16(aid++);
+	if (fa & FILE_ATTRIBUTE_DIRECTORY) {
+		/*
+		 * Regular directory or symlink to directory.
+		 * Create root attribute.
+		 */
+		dsize = sizeof(struct INDEX_ROOT) + sizeof(struct NTFS_DE);
+		asize = sizeof(I30_NAME) + SIZEOF_RESIDENT + dsize;
+
+		attr->type = ATTR_ROOT;
+		attr->size = cpu_to_le32(asize);
+
+		attr->name_len = ARRAY_SIZE(I30_NAME);
+		attr->name_off = SIZEOF_RESIDENT_LE;
+		attr->res.data_off =
+			cpu_to_le16(sizeof(I30_NAME) + SIZEOF_RESIDENT);
+		attr->res.data_size = cpu_to_le32(dsize);
+		memcpy(Add2Ptr(attr, SIZEOF_RESIDENT), I30_NAME,
+		       sizeof(I30_NAME));
+
+		root = Add2Ptr(attr, sizeof(I30_NAME) + SIZEOF_RESIDENT);
+		memcpy(root, dir_root, offsetof(struct INDEX_ROOT, ihdr));
+		root->ihdr.de_off =
+			cpu_to_le32(sizeof(struct INDEX_HDR)); // 0x10
+		root->ihdr.used = cpu_to_le32(sizeof(struct INDEX_HDR) +
+					      sizeof(struct NTFS_DE));
+		root->ihdr.total = root->ihdr.used;
+
+		e = Add2Ptr(root, sizeof(struct INDEX_ROOT));
+		e->size = cpu_to_le16(sizeof(struct NTFS_DE));
+		e->flags = NTFS_IE_LAST;
+	} else if (S_ISLNK(mode)) {
+		/*
+		 * Symlink to file.
+		 * Create empty resident data attribute.
+		 */
+		asize = SIZEOF_RESIDENT;
+
+		/* Insert empty ATTR_DATA */
+		attr->type = ATTR_DATA;
+		attr->size = cpu_to_le32(SIZEOF_RESIDENT);
+		attr->name_off = SIZEOF_RESIDENT_LE;
+		attr->res.data_off = SIZEOF_RESIDENT_LE;
+	} else if (S_ISREG(mode)) {
+		/*
+		 * Regular file. Create empty non resident data attribute.
+		 */
+		attr->type = ATTR_DATA;
+		attr->non_res = 1;
+		attr->nres.evcn = cpu_to_le64(-1ll);
+		if (fa & FILE_ATTRIBUTE_SPARSE_FILE) {
+			attr->size = cpu_to_le32(SIZEOF_NONRESIDENT_EX + 8);
+			attr->name_off = SIZEOF_NONRESIDENT_EX_LE;
+			attr->flags = ATTR_FLAG_SPARSED;
+			asize = SIZEOF_NONRESIDENT_EX + 8;
+		} else if (fa & FILE_ATTRIBUTE_COMPRESSED) {
+			attr->size = cpu_to_le32(SIZEOF_NONRESIDENT_EX + 8);
+			attr->name_off = SIZEOF_NONRESIDENT_EX_LE;
+			attr->flags = ATTR_FLAG_COMPRESSED;
+			attr->nres.c_unit = COMPRESSION_UNIT;
+			asize = SIZEOF_NONRESIDENT_EX + 8;
+		} else {
+			attr->size = cpu_to_le32(SIZEOF_NONRESIDENT + 8);
+			attr->name_off = SIZEOF_NONRESIDENT_LE;
+			asize = SIZEOF_NONRESIDENT + 8;
+		}
+		attr->nres.run_off = attr->name_off;
+	} else {
+		/*
+		 * Node. Create empty resident data attribute.
+		 */
+		attr->type = ATTR_DATA;
+		attr->size = cpu_to_le32(SIZEOF_RESIDENT);
+		attr->name_off = SIZEOF_RESIDENT_LE;
+		if (fa & FILE_ATTRIBUTE_SPARSE_FILE)
+			attr->flags = ATTR_FLAG_SPARSED;
+		else if (fa & FILE_ATTRIBUTE_COMPRESSED)
+			attr->flags = ATTR_FLAG_COMPRESSED;
+		attr->res.data_off = SIZEOF_RESIDENT_LE;
+		asize = SIZEOF_RESIDENT;
+		ni->ni_flags |= NI_FLAG_RESIDENT;
+	}
+
+	if (S_ISDIR(mode)) {
+		ni->ni_flags |= NI_FLAG_DIR;
+		err = indx_init(&ni->dir, sbi, attr, INDEX_MUTEX_I30);
+		if (err)
+			goto out4;
+	} else if (S_ISLNK(mode)) {
+		rp = ntfs_create_reparse_buffer(sbi, symname, size, &nsize);
+
+		if (IS_ERR(rp)) {
+			err = PTR_ERR(rp);
+			rp = NULL;
+			goto out4;
+		}
+
+		/*
+		 * Insert ATTR_REPARSE.
+		 */
+		attr = Add2Ptr(attr, asize);
+		attr->type = ATTR_REPARSE;
+		attr->id = cpu_to_le16(aid++);
+
+		/* Resident or non resident? */
+		asize = ALIGN(SIZEOF_RESIDENT + nsize, 8);
+		t16 = PtrOffset(rec, attr);
+
+		/* 0x78 - the size of EA + EAINFO to store WSL */
+		if (asize + t16 + 0x78 + 8 > sbi->record_size) {
+			CLST alen;
+			CLST clst = bytes_to_cluster(sbi, nsize);
+
+			/* Bytes per runs. */
+			t16 = sbi->record_size - t16 - SIZEOF_NONRESIDENT;
+
+			attr->non_res = 1;
+			attr->nres.evcn = cpu_to_le64(clst - 1);
+			attr->name_off = SIZEOF_NONRESIDENT_LE;
+			attr->nres.run_off = attr->name_off;
+			attr->nres.data_size = cpu_to_le64(nsize);
+			attr->nres.valid_size = attr->nres.data_size;
+			attr->nres.alloc_size =
+				cpu_to_le64(ntfs_up_cluster(sbi, nsize));
+
+			err = attr_allocate_clusters(sbi, &ni->file.run, 0, 0,
+						     clst, NULL, 0, &alen, 0,
+						     NULL);
+			if (err)
+				goto out5;
+
+			err = run_pack(&ni->file.run, 0, clst,
+				       Add2Ptr(attr, SIZEOF_NONRESIDENT), t16,
+				       &vcn);
+			if (err < 0)
+				goto out5;
+
+			if (vcn != clst) {
+				err = -EINVAL;
+				goto out5;
+			}
+
+			asize = SIZEOF_NONRESIDENT + ALIGN(err, 8);
+			inode->i_size = nsize;
+		} else {
+			attr->res.data_off = SIZEOF_RESIDENT_LE;
+			attr->res.data_size = cpu_to_le32(nsize);
+			memcpy(Add2Ptr(attr, SIZEOF_RESIDENT), rp, nsize);
+			inode->i_size = nsize;
+			nsize = 0;
+		}
+
+		attr->size = cpu_to_le32(asize);
+
+		err = ntfs_insert_reparse(sbi, IO_REPARSE_TAG_SYMLINK,
+					  &new_de->ref);
+		if (err)
+			goto out5;
+
+		rp_inserted = true;
+	}
+
+	attr = Add2Ptr(attr, asize);
+	attr->type = ATTR_END;
+
+	rec->used = cpu_to_le32(PtrOffset(rec, attr) + 8);
+	rec->next_attr_id = cpu_to_le16(aid);
+
+	/* Step 2: Add new name in index. */
+	err = indx_insert_entry(&dir_ni->dir, dir_ni, new_de, sbi, fnd, 0);
+	if (err)
+		goto out6;
+
+	inode->i_generation = le16_to_cpu(rec->seq);
+
+	dir->i_mtime = dir->i_ctime = inode->i_atime;
+
+	if (S_ISDIR(mode)) {
+		inode->i_op = &ntfs_dir_inode_operations;
+		inode->i_fop = &ntfs_dir_operations;
+	} else if (S_ISLNK(mode)) {
+		inode->i_op = &ntfs_link_inode_operations;
+		inode->i_fop = NULL;
+		inode->i_mapping->a_ops = &ntfs_aops;
+	} else if (S_ISREG(mode)) {
+		inode->i_op = &ntfs_file_inode_operations;
+		inode->i_fop = &ntfs_file_operations;
+		inode->i_mapping->a_ops =
+			is_compressed(ni) ? &ntfs_aops_cmpr : &ntfs_aops;
+		init_rwsem(&ni->file.run_lock);
+	} else {
+		inode->i_op = &ntfs_special_inode_operations;
+		init_special_inode(inode, mode, dev);
+	}
+
+#ifdef CONFIG_NTFS3_FS_POSIX_ACL
+	if (!S_ISLNK(mode) && (sb->s_flags & SB_POSIXACL)) {
+		err = ntfs_init_acl(mnt_userns, inode, dir);
+		if (err)
+			goto out6;
+	} else
+#endif
+	{
+		inode->i_flags |= S_NOSEC;
+	}
+
+	/* Write non resident data. */
+	if (nsize) {
+		err = ntfs_sb_write_run(sbi, &ni->file.run, 0, rp, nsize);
+		if (err)
+			goto out7;
+	}
+
+	/*
+	 * Call 'd_instantiate' after inode->i_op is set
+	 * but before finish_open.
+	 */
+	d_instantiate(dentry, inode);
+
+	ntfs_save_wsl_perm(inode);
+	mark_inode_dirty(dir);
+	mark_inode_dirty(inode);
+
+	/* Normal exit. */
+	goto out2;
+
+out7:
+
+	/* Undo 'indx_insert_entry'. */
+	indx_delete_entry(&dir_ni->dir, dir_ni, new_de + 1,
+			  le16_to_cpu(new_de->key_size), sbi);
+out6:
+	if (rp_inserted)
+		ntfs_remove_reparse(sbi, IO_REPARSE_TAG_SYMLINK, &new_de->ref);
+
+out5:
+	if (S_ISDIR(mode) || run_is_empty(&ni->file.run))
+		goto out4;
+
+	run_deallocate(sbi, &ni->file.run, false);
+
+out4:
+	clear_rec_inuse(rec);
+	clear_nlink(inode);
+	ni->mi.dirty = false;
+	discard_new_inode(inode);
+out3:
+	ntfs_mark_rec_free(sbi, ino);
+
+out2:
+	__putname(new_de);
+	kfree(rp);
+
+out1:
+	if (err)
+		return ERR_PTR(err);
+
+	unlock_new_inode(inode);
+
+	return inode;
+}
+
+int ntfs_link_inode(struct inode *inode, struct dentry *dentry)
+{
+	int err;
+	struct ntfs_inode *ni = ntfs_i(inode);
+	struct ntfs_sb_info *sbi = inode->i_sb->s_fs_info;
+	struct NTFS_DE *de;
+	struct ATTR_FILE_NAME *de_name;
+
+	/* Allocate PATH_MAX bytes. */
+	de = __getname();
+	if (!de)
+		return -ENOMEM;
+
+	/* Mark rw ntfs as dirty. It will be cleared at umount. */
+	ntfs_set_state(sbi, NTFS_DIRTY_DIRTY);
+
+	/* Construct 'de'. */
+	err = fill_name_de(sbi, de, &dentry->d_name, NULL);
+	if (err)
+		goto out;
+
+	de_name = (struct ATTR_FILE_NAME *)(de + 1);
+	/* Fill duplicate info. */
+	de_name->dup.cr_time = de_name->dup.m_time = de_name->dup.c_time =
+		de_name->dup.a_time = kernel2nt(&inode->i_ctime);
+	de_name->dup.alloc_size = de_name->dup.data_size =
+		cpu_to_le64(inode->i_size);
+	de_name->dup.fa = ni->std_fa;
+	de_name->dup.ea_size = de_name->dup.reparse = 0;
+
+	err = ni_add_name(ntfs_i(d_inode(dentry->d_parent)), ni, de);
+out:
+	__putname(de);
+	return err;
+}
+
+/*
+ * ntfs_unlink_inode
+ *
+ * inode_operations::unlink
+ * inode_operations::rmdir
+ */
+int ntfs_unlink_inode(struct inode *dir, const struct dentry *dentry)
+{
+	int err;
+	struct ntfs_sb_info *sbi = dir->i_sb->s_fs_info;
+	struct inode *inode = d_inode(dentry);
+	struct ntfs_inode *ni = ntfs_i(inode);
+	struct ntfs_inode *dir_ni = ntfs_i(dir);
+	struct NTFS_DE *de, *de2 = NULL;
+	int undo_remove;
+
+	if (ntfs_is_meta_file(sbi, ni->mi.rno))
+		return -EINVAL;
+
+	/* Allocate PATH_MAX bytes. */
+	de = __getname();
+	if (!de)
+		return -ENOMEM;
+
+	ni_lock(ni);
+
+	if (S_ISDIR(inode->i_mode) && !dir_is_empty(inode)) {
+		err = -ENOTEMPTY;
+		goto out;
+	}
+
+	err = fill_name_de(sbi, de, &dentry->d_name, NULL);
+	if (err < 0)
+		goto out;
+
+	undo_remove = 0;
+	err = ni_remove_name(dir_ni, ni, de, &de2, &undo_remove);
+
+	if (!err) {
+		drop_nlink(inode);
+		dir->i_mtime = dir->i_ctime = current_time(dir);
+		mark_inode_dirty(dir);
+		inode->i_ctime = dir->i_ctime;
+		if (inode->i_nlink)
+			mark_inode_dirty(inode);
+	} else if (!ni_remove_name_undo(dir_ni, ni, de, de2, undo_remove)) {
+		make_bad_inode(inode);
+		ntfs_inode_err(inode, "failed to undo unlink");
+		ntfs_set_state(sbi, NTFS_DIRTY_ERROR);
+	} else {
+		if (ni_is_dirty(dir))
+			mark_inode_dirty(dir);
+		if (ni_is_dirty(inode))
+			mark_inode_dirty(inode);
+	}
+
+out:
+	ni_unlock(ni);
+	__putname(de);
+	return err;
+}
+
+void ntfs_evict_inode(struct inode *inode)
+{
+	truncate_inode_pages_final(&inode->i_data);
+
+	if (inode->i_nlink)
+		_ni_write_inode(inode, inode_needs_sync(inode));
+
+	invalidate_inode_buffers(inode);
+	clear_inode(inode);
+
+	ni_clear(ntfs_i(inode));
+}
+
+static noinline int ntfs_readlink_hlp(struct inode *inode, char *buffer,
+				      int buflen)
+{
+	int i, err = 0;
+	struct ntfs_inode *ni = ntfs_i(inode);
+	struct super_block *sb = inode->i_sb;
+	struct ntfs_sb_info *sbi = sb->s_fs_info;
+	u64 i_size = inode->i_size;
+	u16 nlen = 0;
+	void *to_free = NULL;
+	struct REPARSE_DATA_BUFFER *rp;
+	struct le_str *uni;
+	struct ATTRIB *attr;
+
+	/* Reparse data present. Try to parse it. */
+	static_assert(!offsetof(struct REPARSE_DATA_BUFFER, ReparseTag));
+	static_assert(sizeof(u32) == sizeof(rp->ReparseTag));
+
+	*buffer = 0;
+
+	/* Read into temporal buffer. */
+	if (i_size > sbi->reparse.max_size || i_size <= sizeof(u32)) {
+		err = -EINVAL;
+		goto out;
+	}
+
+	attr = ni_find_attr(ni, NULL, NULL, ATTR_REPARSE, NULL, 0, NULL, NULL);
+	if (!attr) {
+		err = -EINVAL;
+		goto out;
+	}
+
+	if (!attr->non_res) {
+		rp = resident_data_ex(attr, i_size);
+		if (!rp) {
+			err = -EINVAL;
+			goto out;
+		}
+	} else {
+		rp = kmalloc(i_size, GFP_NOFS);
+		if (!rp) {
+			err = -ENOMEM;
+			goto out;
+		}
+		to_free = rp;
+		err = ntfs_read_run_nb(sbi, &ni->file.run, 0, rp, i_size, NULL);
+		if (err)
+			goto out;
+	}
+
+	err = -EINVAL;
+
+	/* Microsoft Tag. */
+	switch (rp->ReparseTag) {
+	case IO_REPARSE_TAG_MOUNT_POINT:
+		/* Mount points and junctions. */
+		/* Can we use 'Rp->MountPointReparseBuffer.PrintNameLength'? */
+		if (i_size <= offsetof(struct REPARSE_DATA_BUFFER,
+				       MountPointReparseBuffer.PathBuffer))
+			goto out;
+		uni = Add2Ptr(rp,
+			      offsetof(struct REPARSE_DATA_BUFFER,
+				       MountPointReparseBuffer.PathBuffer) +
+				      le16_to_cpu(rp->MountPointReparseBuffer
+							  .PrintNameOffset) -
+				      2);
+		nlen = le16_to_cpu(rp->MountPointReparseBuffer.PrintNameLength);
+		break;
+
+	case IO_REPARSE_TAG_SYMLINK:
+		/* FolderSymbolicLink */
+		/* Can we use 'Rp->SymbolicLinkReparseBuffer.PrintNameLength'? */
+		if (i_size <= offsetof(struct REPARSE_DATA_BUFFER,
+				       SymbolicLinkReparseBuffer.PathBuffer))
+			goto out;
+		uni = Add2Ptr(rp,
+			      offsetof(struct REPARSE_DATA_BUFFER,
+				       SymbolicLinkReparseBuffer.PathBuffer) +
+				      le16_to_cpu(rp->SymbolicLinkReparseBuffer
+							  .PrintNameOffset) -
+				      2);
+		nlen = le16_to_cpu(
+			rp->SymbolicLinkReparseBuffer.PrintNameLength);
+		break;
+
+	case IO_REPARSE_TAG_CLOUD:
+	case IO_REPARSE_TAG_CLOUD_1:
+	case IO_REPARSE_TAG_CLOUD_2:
+	case IO_REPARSE_TAG_CLOUD_3:
+	case IO_REPARSE_TAG_CLOUD_4:
+	case IO_REPARSE_TAG_CLOUD_5:
+	case IO_REPARSE_TAG_CLOUD_6:
+	case IO_REPARSE_TAG_CLOUD_7:
+	case IO_REPARSE_TAG_CLOUD_8:
+	case IO_REPARSE_TAG_CLOUD_9:
+	case IO_REPARSE_TAG_CLOUD_A:
+	case IO_REPARSE_TAG_CLOUD_B:
+	case IO_REPARSE_TAG_CLOUD_C:
+	case IO_REPARSE_TAG_CLOUD_D:
+	case IO_REPARSE_TAG_CLOUD_E:
+	case IO_REPARSE_TAG_CLOUD_F:
+		err = sizeof("OneDrive") - 1;
+		if (err > buflen)
+			err = buflen;
+		memcpy(buffer, "OneDrive", err);
+		goto out;
+
+	default:
+		if (IsReparseTagMicrosoft(rp->ReparseTag)) {
+			/* Unknown Microsoft Tag. */
+			goto out;
+		}
+		if (!IsReparseTagNameSurrogate(rp->ReparseTag) ||
+		    i_size <= sizeof(struct REPARSE_POINT)) {
+			goto out;
+		}
+
+		/* Users tag. */
+		uni = Add2Ptr(rp, sizeof(struct REPARSE_POINT) - 2);
+		nlen = le16_to_cpu(rp->ReparseDataLength) -
+		       sizeof(struct REPARSE_POINT);
+	}
+
+	/* Convert nlen from bytes to UNICODE chars. */
+	nlen >>= 1;
+
+	/* Check that name is available. */
+	if (!nlen || &uni->name[nlen] > (__le16 *)Add2Ptr(rp, i_size))
+		goto out;
+
+	/* If name is already zero terminated then truncate it now. */
+	if (!uni->name[nlen - 1])
+		nlen -= 1;
+	uni->len = nlen;
+
+	err = ntfs_utf16_to_nls(sbi, uni, buffer, buflen);
+
+	if (err < 0)
+		goto out;
+
+	/* Translate Windows '\' into Linux '/'. */
+	for (i = 0; i < err; i++) {
+		if (buffer[i] == '\\')
+			buffer[i] = '/';
+	}
+
+	/* Always set last zero. */
+	buffer[err] = 0;
+out:
+	kfree(to_free);
+	return err;
+}
+
+static const char *ntfs_get_link(struct dentry *de, struct inode *inode,
+				 struct delayed_call *done)
+{
+	int err;
+	char *ret;
+
+	if (!de)
+		return ERR_PTR(-ECHILD);
+
+	ret = kmalloc(PAGE_SIZE, GFP_NOFS);
+	if (!ret)
+		return ERR_PTR(-ENOMEM);
+
+	err = ntfs_readlink_hlp(inode, ret, PAGE_SIZE);
+	if (err < 0) {
+		kfree(ret);
+		return ERR_PTR(err);
+	}
+
+	set_delayed_call(done, kfree_link, ret);
+
+	return ret;
+}
+
+// clang-format off
+const struct inode_operations ntfs_link_inode_operations = {
+	.get_link	= ntfs_get_link,
+	.setattr	= ntfs3_setattr,
+	.listxattr	= ntfs_listxattr,
+	.permission	= ntfs_permission,
+	.get_acl	= ntfs_get_acl,
+	.set_acl	= ntfs_set_acl,
+};
+
+const struct address_space_operations ntfs_aops = {
+	.readpage	= ntfs_readpage,
+	.readahead	= ntfs_readahead,
+	.writepage	= ntfs_writepage,
+	.writepages	= ntfs_writepages,
+	.write_begin	= ntfs_write_begin,
+	.write_end	= ntfs_write_end,
+	.direct_IO	= ntfs_direct_IO,
+	.bmap		= ntfs_bmap,
+	.set_page_dirty = __set_page_dirty_buffers,
+};
+
+const struct address_space_operations ntfs_aops_cmpr = {
+	.readpage	= ntfs_readpage,
+	.readahead	= ntfs_readahead,
+};
+// clang-format on
diff --git a/fs/ntfs3/lib/decompress_common.c b/fs/ntfs3/lib/decompress_common.c
new file mode 100644
index 000000000000..e96652240859
--- /dev/null
+++ b/fs/ntfs3/lib/decompress_common.c
@@ -0,0 +1,319 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
+/*
+ * decompress_common.c - Code shared by the XPRESS and LZX decompressors
+ *
+ * Copyright (C) 2015 Eric Biggers
+ */
+
+#include "decompress_common.h"
+
+/*
+ * make_huffman_decode_table() -
+ *
+ * Build a decoding table for a canonical prefix code, or "Huffman code".
+ *
+ * This is an internal function, not part of the library API!
+ *
+ * This takes as input the length of the codeword for each symbol in the
+ * alphabet and produces as output a table that can be used for fast
+ * decoding of prefix-encoded symbols using read_huffsym().
+ *
+ * Strictly speaking, a canonical prefix code might not be a Huffman
+ * code.  But this algorithm will work either way; and in fact, since
+ * Huffman codes are defined in terms of symbol frequencies, there is no
+ * way for the decompressor to know whether the code is a true Huffman
+ * code or not until all symbols have been decoded.
+ *
+ * Because the prefix code is assumed to be "canonical", it can be
+ * reconstructed directly from the codeword lengths.  A prefix code is
+ * canonical if and only if a longer codeword never lexicographically
+ * precedes a shorter codeword, and the lexicographic ordering of
+ * codewords of the same length is the same as the lexicographic ordering
+ * of the corresponding symbols.  Consequently, we can sort the symbols
+ * primarily by codeword length and secondarily by symbol value, then
+ * reconstruct the prefix code by generating codewords lexicographically
+ * in that order.
+ *
+ * This function does not, however, generate the prefix code explicitly.
+ * Instead, it directly builds a table for decoding symbols using the
+ * code.  The basic idea is this: given the next 'max_codeword_len' bits
+ * in the input, we can look up the decoded symbol by indexing a table
+ * containing 2**max_codeword_len entries.  A codeword with length
+ * 'max_codeword_len' will have exactly one entry in this table, whereas
+ * a codeword shorter than 'max_codeword_len' will have multiple entries
+ * in this table.  Precisely, a codeword of length n will be represented
+ * by 2**(max_codeword_len - n) entries in this table.  The 0-based index
+ * of each such entry will contain the corresponding codeword as a prefix
+ * when zero-padded on the left to 'max_codeword_len' binary digits.
+ *
+ * That's the basic idea, but we implement two optimizations regarding
+ * the format of the decode table itself:
+ *
+ * - For many compression formats, the maximum codeword length is too
+ *   long for it to be efficient to build the full decoding table
+ *   whenever a new prefix code is used.  Instead, we can build the table
+ *   using only 2**table_bits entries, where 'table_bits' is some number
+ *   less than or equal to 'max_codeword_len'.  Then, only codewords of
+ *   length 'table_bits' and shorter can be directly looked up.  For
+ *   longer codewords, the direct lookup instead produces the root of a
+ *   binary tree.  Using this tree, the decoder can do traditional
+ *   bit-by-bit decoding of the remainder of the codeword.  Child nodes
+ *   are allocated in extra entries at the end of the table; leaf nodes
+ *   contain symbols.  Note that the long-codeword case is, in general,
+ *   not performance critical, since in Huffman codes the most frequently
+ *   used symbols are assigned the shortest codeword lengths.
+ *
+ * - When we decode a symbol using a direct lookup of the table, we still
+ *   need to know its length so that the bitstream can be advanced by the
+ *   appropriate number of bits.  The simple solution is to simply retain
+ *   the 'lens' array and use the decoded symbol as an index into it.
+ *   However, this requires two separate array accesses in the fast path.
+ *   The optimization is to store the length directly in the decode
+ *   table.  We use the bottom 11 bits for the symbol and the top 5 bits
+ *   for the length.  In addition, to combine this optimization with the
+ *   previous one, we introduce a special case where the top 2 bits of
+ *   the length are both set if the entry is actually the root of a
+ *   binary tree.
+ *
+ * @decode_table:
+ *	The array in which to create the decoding table.  This must have
+ *	a length of at least ((2**table_bits) + 2 * num_syms) entries.
+ *
+ * @num_syms:
+ *	The number of symbols in the alphabet; also, the length of the
+ *	'lens' array.  Must be less than or equal to 2048.
+ *
+ * @table_bits:
+ *	The order of the decode table size, as explained above.  Must be
+ *	less than or equal to 13.
+ *
+ * @lens:
+ *	An array of length @num_syms, indexable by symbol, that gives the
+ *	length of the codeword, in bits, for that symbol.  The length can
+ *	be 0, which means that the symbol does not have a codeword
+ *	assigned.
+ *
+ * @max_codeword_len:
+ *	The longest codeword length allowed in the compression format.
+ *	All entries in 'lens' must be less than or equal to this value.
+ *	This must be less than or equal to 23.
+ *
+ * @working_space
+ *	A temporary array of length '2 * (max_codeword_len + 1) +
+ *	num_syms'.
+ *
+ * Returns 0 on success, or -1 if the lengths do not form a valid prefix
+ * code.
+ */
+int make_huffman_decode_table(u16 decode_table[], const u32 num_syms,
+			      const u32 table_bits, const u8 lens[],
+			      const u32 max_codeword_len,
+			      u16 working_space[])
+{
+	const u32 table_num_entries = 1 << table_bits;
+	u16 * const len_counts = &working_space[0];
+	u16 * const offsets = &working_space[1 * (max_codeword_len + 1)];
+	u16 * const sorted_syms = &working_space[2 * (max_codeword_len + 1)];
+	int left;
+	void *decode_table_ptr;
+	u32 sym_idx;
+	u32 codeword_len;
+	u32 stores_per_loop;
+	u32 decode_table_pos;
+	u32 len;
+	u32 sym;
+
+	/* Count how many symbols have each possible codeword length.
+	 * Note that a length of 0 indicates the corresponding symbol is not
+	 * used in the code and therefore does not have a codeword.
+	 */
+	for (len = 0; len <= max_codeword_len; len++)
+		len_counts[len] = 0;
+	for (sym = 0; sym < num_syms; sym++)
+		len_counts[lens[sym]]++;
+
+	/* We can assume all lengths are <= max_codeword_len, but we
+	 * cannot assume they form a valid prefix code.  A codeword of
+	 * length n should require a proportion of the codespace equaling
+	 * (1/2)^n.  The code is valid if and only if the codespace is
+	 * exactly filled by the lengths, by this measure.
+	 */
+	left = 1;
+	for (len = 1; len <= max_codeword_len; len++) {
+		left <<= 1;
+		left -= len_counts[len];
+		if (left < 0) {
+			/* The lengths overflow the codespace; that is, the code
+			 * is over-subscribed.
+			 */
+			return -1;
+		}
+	}
+
+	if (left) {
+		/* The lengths do not fill the codespace; that is, they form an
+		 * incomplete set.
+		 */
+		if (left == (1 << max_codeword_len)) {
+			/* The code is completely empty.  This is arguably
+			 * invalid, but in fact it is valid in LZX and XPRESS,
+			 * so we must allow it.  By definition, no symbols can
+			 * be decoded with an empty code.  Consequently, we
+			 * technically don't even need to fill in the decode
+			 * table.  However, to avoid accessing uninitialized
+			 * memory if the algorithm nevertheless attempts to
+			 * decode symbols using such a code, we zero out the
+			 * decode table.
+			 */
+			memset(decode_table, 0,
+			       table_num_entries * sizeof(decode_table[0]));
+			return 0;
+		}
+		return -1;
+	}
+
+	/* Sort the symbols primarily by length and secondarily by symbol order.
+	 */
+
+	/* Initialize 'offsets' so that offsets[len] for 1 <= len <=
+	 * max_codeword_len is the number of codewords shorter than 'len' bits.
+	 */
+	offsets[1] = 0;
+	for (len = 1; len < max_codeword_len; len++)
+		offsets[len + 1] = offsets[len] + len_counts[len];
+
+	/* Use the 'offsets' array to sort the symbols.  Note that we do not
+	 * include symbols that are not used in the code.  Consequently, fewer
+	 * than 'num_syms' entries in 'sorted_syms' may be filled.
+	 */
+	for (sym = 0; sym < num_syms; sym++)
+		if (lens[sym])
+			sorted_syms[offsets[lens[sym]]++] = sym;
+
+	/* Fill entries for codewords with length <= table_bits
+	 * --- that is, those short enough for a direct mapping.
+	 *
+	 * The table will start with entries for the shortest codeword(s), which
+	 * have the most entries.  From there, the number of entries per
+	 * codeword will decrease.
+	 */
+	decode_table_ptr = decode_table;
+	sym_idx = 0;
+	codeword_len = 1;
+	stores_per_loop = (1 << (table_bits - codeword_len));
+	for (; stores_per_loop != 0; codeword_len++, stores_per_loop >>= 1) {
+		u32 end_sym_idx = sym_idx + len_counts[codeword_len];
+
+		for (; sym_idx < end_sym_idx; sym_idx++) {
+			u16 entry;
+			u16 *p;
+			u32 n;
+
+			entry = ((u32)codeword_len << 11) | sorted_syms[sym_idx];
+			p = (u16 *)decode_table_ptr;
+			n = stores_per_loop;
+
+			do {
+				*p++ = entry;
+			} while (--n);
+
+			decode_table_ptr = p;
+		}
+	}
+
+	/* If we've filled in the entire table, we are done.  Otherwise,
+	 * there are codewords longer than table_bits for which we must
+	 * generate binary trees.
+	 */
+	decode_table_pos = (u16 *)decode_table_ptr - decode_table;
+	if (decode_table_pos != table_num_entries) {
+		u32 j;
+		u32 next_free_tree_slot;
+		u32 cur_codeword;
+
+		/* First, zero out the remaining entries.  This is
+		 * necessary so that these entries appear as
+		 * "unallocated" in the next part.  Each of these entries
+		 * will eventually be filled with the representation of
+		 * the root node of a binary tree.
+		 */
+		j = decode_table_pos;
+		do {
+			decode_table[j] = 0;
+		} while (++j != table_num_entries);
+
+		/* We allocate child nodes starting at the end of the
+		 * direct lookup table.  Note that there should be
+		 * 2*num_syms extra entries for this purpose, although
+		 * fewer than this may actually be needed.
+		 */
+		next_free_tree_slot = table_num_entries;
+
+		/* Iterate through each codeword with length greater than
+		 * 'table_bits', primarily in order of codeword length
+		 * and secondarily in order of symbol.
+		 */
+		for (cur_codeword = decode_table_pos << 1;
+		     codeword_len <= max_codeword_len;
+		     codeword_len++, cur_codeword <<= 1) {
+			u32 end_sym_idx = sym_idx + len_counts[codeword_len];
+
+			for (; sym_idx < end_sym_idx; sym_idx++, cur_codeword++) {
+				/* 'sorted_sym' is the symbol represented by the
+				 * codeword.
+				 */
+				u32 sorted_sym = sorted_syms[sym_idx];
+				u32 extra_bits = codeword_len - table_bits;
+				u32 node_idx = cur_codeword >> extra_bits;
+
+				/* Go through each bit of the current codeword
+				 * beyond the prefix of length @table_bits and
+				 * walk the appropriate binary tree, allocating
+				 * any slots that have not yet been allocated.
+				 *
+				 * Note that the 'pointer' entry to the binary
+				 * tree, which is stored in the direct lookup
+				 * portion of the table, is represented
+				 * identically to other internal (non-leaf)
+				 * nodes of the binary tree; it can be thought
+				 * of as simply the root of the tree.  The
+				 * representation of these internal nodes is
+				 * simply the index of the left child combined
+				 * with the special bits 0xC000 to distinguish
+				 * the entry from direct mapping and leaf node
+				 * entries.
+				 */
+				do {
+					/* At least one bit remains in the
+					 * codeword, but the current node is an
+					 * unallocated leaf.  Change it to an
+					 * internal node.
+					 */
+					if (decode_table[node_idx] == 0) {
+						decode_table[node_idx] =
+							next_free_tree_slot | 0xC000;
+						decode_table[next_free_tree_slot++] = 0;
+						decode_table[next_free_tree_slot++] = 0;
+					}
+
+					/* Go to the left child if the next bit
+					 * in the codeword is 0; otherwise go to
+					 * the right child.
+					 */
+					node_idx = decode_table[node_idx] & 0x3FFF;
+					--extra_bits;
+					node_idx += (cur_codeword >> extra_bits) & 1;
+				} while (extra_bits != 0);
+
+				/* We've traversed the tree using the entire
+				 * codeword, and we're now at the entry where
+				 * the actual symbol will be stored.  This is
+				 * distinguished from internal nodes by not
+				 * having its high two bits set.
+				 */
+				decode_table[node_idx] = sorted_sym;
+			}
+		}
+	}
+	return 0;
+}
diff --git a/fs/ntfs3/lib/decompress_common.h b/fs/ntfs3/lib/decompress_common.h
new file mode 100644
index 000000000000..2d70ae42f1b5
--- /dev/null
+++ b/fs/ntfs3/lib/decompress_common.h
@@ -0,0 +1,338 @@
+/* SPDX-License-Identifier: GPL-2.0-or-later */
+/*
+ * decompress_common.h - Code shared by the XPRESS and LZX decompressors
+ *
+ * Copyright (C) 2015 Eric Biggers
+ */
+
+#include <linux/string.h>
+#include <linux/compiler.h>
+#include <linux/types.h>
+#include <linux/slab.h>
+#include <asm/unaligned.h>
+
+
+/* "Force inline" macro (not required, but helpful for performance)  */
+#define forceinline __always_inline
+
+/* Enable whole-word match copying on selected architectures  */
+#if defined(__i386__) || defined(__x86_64__) || defined(__ARM_FEATURE_UNALIGNED)
+#  define FAST_UNALIGNED_ACCESS
+#endif
+
+/* Size of a machine word  */
+#define WORDBYTES (sizeof(size_t))
+
+static forceinline void
+copy_unaligned_word(const void *src, void *dst)
+{
+	put_unaligned(get_unaligned((const size_t *)src), (size_t *)dst);
+}
+
+
+/* Generate a "word" with platform-dependent size whose bytes all contain the
+ * value 'b'.
+ */
+static forceinline size_t repeat_byte(u8 b)
+{
+	size_t v;
+
+	v = b;
+	v |= v << 8;
+	v |= v << 16;
+	v |= v << ((WORDBYTES == 8) ? 32 : 0);
+	return v;
+}
+
+/* Structure that encapsulates a block of in-memory data being interpreted as a
+ * stream of bits, optionally with interwoven literal bytes.  Bits are assumed
+ * to be stored in little endian 16-bit coding units, with the bits ordered high
+ * to low.
+ */
+struct input_bitstream {
+
+	/* Bits that have been read from the input buffer.  The bits are
+	 * left-justified; the next bit is always bit 31.
+	 */
+	u32 bitbuf;
+
+	/* Number of bits currently held in @bitbuf.  */
+	u32 bitsleft;
+
+	/* Pointer to the next byte to be retrieved from the input buffer.  */
+	const u8 *next;
+
+	/* Pointer to just past the end of the input buffer.  */
+	const u8 *end;
+};
+
+/* Initialize a bitstream to read from the specified input buffer.  */
+static forceinline void init_input_bitstream(struct input_bitstream *is,
+					     const void *buffer, u32 size)
+{
+	is->bitbuf = 0;
+	is->bitsleft = 0;
+	is->next = buffer;
+	is->end = is->next + size;
+}
+
+/* Ensure the bit buffer variable for the bitstream contains at least @num_bits
+ * bits.  Following this, bitstream_peek_bits() and/or bitstream_remove_bits()
+ * may be called on the bitstream to peek or remove up to @num_bits bits.  Note
+ * that @num_bits must be <= 16.
+ */
+static forceinline void bitstream_ensure_bits(struct input_bitstream *is,
+					      u32 num_bits)
+{
+	if (is->bitsleft < num_bits) {
+		if (is->end - is->next >= 2) {
+			is->bitbuf |= (u32)get_unaligned_le16(is->next)
+					<< (16 - is->bitsleft);
+			is->next += 2;
+		}
+		is->bitsleft += 16;
+	}
+}
+
+/* Return the next @num_bits bits from the bitstream, without removing them.
+ * There must be at least @num_bits remaining in the buffer variable, from a
+ * previous call to bitstream_ensure_bits().
+ */
+static forceinline u32
+bitstream_peek_bits(const struct input_bitstream *is, const u32 num_bits)
+{
+	return (is->bitbuf >> 1) >> (sizeof(is->bitbuf) * 8 - num_bits - 1);
+}
+
+/* Remove @num_bits from the bitstream.  There must be at least @num_bits
+ * remaining in the buffer variable, from a previous call to
+ * bitstream_ensure_bits().
+ */
+static forceinline void
+bitstream_remove_bits(struct input_bitstream *is, u32 num_bits)
+{
+	is->bitbuf <<= num_bits;
+	is->bitsleft -= num_bits;
+}
+
+/* Remove and return @num_bits bits from the bitstream.  There must be at least
+ * @num_bits remaining in the buffer variable, from a previous call to
+ * bitstream_ensure_bits().
+ */
+static forceinline u32
+bitstream_pop_bits(struct input_bitstream *is, u32 num_bits)
+{
+	u32 bits = bitstream_peek_bits(is, num_bits);
+
+	bitstream_remove_bits(is, num_bits);
+	return bits;
+}
+
+/* Read and return the next @num_bits bits from the bitstream.  */
+static forceinline u32
+bitstream_read_bits(struct input_bitstream *is, u32 num_bits)
+{
+	bitstream_ensure_bits(is, num_bits);
+	return bitstream_pop_bits(is, num_bits);
+}
+
+/* Read and return the next literal byte embedded in the bitstream.  */
+static forceinline u8
+bitstream_read_byte(struct input_bitstream *is)
+{
+	if (unlikely(is->end == is->next))
+		return 0;
+	return *is->next++;
+}
+
+/* Read and return the next 16-bit integer embedded in the bitstream.  */
+static forceinline u16
+bitstream_read_u16(struct input_bitstream *is)
+{
+	u16 v;
+
+	if (unlikely(is->end - is->next < 2))
+		return 0;
+	v = get_unaligned_le16(is->next);
+	is->next += 2;
+	return v;
+}
+
+/* Read and return the next 32-bit integer embedded in the bitstream.  */
+static forceinline u32
+bitstream_read_u32(struct input_bitstream *is)
+{
+	u32 v;
+
+	if (unlikely(is->end - is->next < 4))
+		return 0;
+	v = get_unaligned_le32(is->next);
+	is->next += 4;
+	return v;
+}
+
+/* Read into @dst_buffer an array of literal bytes embedded in the bitstream.
+ * Return either a pointer to the byte past the last written, or NULL if the
+ * read overflows the input buffer.
+ */
+static forceinline void *bitstream_read_bytes(struct input_bitstream *is,
+					      void *dst_buffer, size_t count)
+{
+	if ((size_t)(is->end - is->next) < count)
+		return NULL;
+	memcpy(dst_buffer, is->next, count);
+	is->next += count;
+	return (u8 *)dst_buffer + count;
+}
+
+/* Align the input bitstream on a coding-unit boundary.  */
+static forceinline void bitstream_align(struct input_bitstream *is)
+{
+	is->bitsleft = 0;
+	is->bitbuf = 0;
+}
+
+extern int make_huffman_decode_table(u16 decode_table[], const u32 num_syms,
+				     const u32 num_bits, const u8 lens[],
+				     const u32 max_codeword_len,
+				     u16 working_space[]);
+
+
+/* Reads and returns the next Huffman-encoded symbol from a bitstream.  If the
+ * input data is exhausted, the Huffman symbol is decoded as if the missing bits
+ * are all zeroes.
+ */
+static forceinline u32 read_huffsym(struct input_bitstream *istream,
+					 const u16 decode_table[],
+					 u32 table_bits,
+					 u32 max_codeword_len)
+{
+	u32 entry;
+	u32 key_bits;
+
+	bitstream_ensure_bits(istream, max_codeword_len);
+
+	/* Index the decode table by the next table_bits bits of the input.  */
+	key_bits = bitstream_peek_bits(istream, table_bits);
+	entry = decode_table[key_bits];
+	if (entry < 0xC000) {
+		/* Fast case: The decode table directly provided the
+		 * symbol and codeword length.  The low 11 bits are the
+		 * symbol, and the high 5 bits are the codeword length.
+		 */
+		bitstream_remove_bits(istream, entry >> 11);
+		return entry & 0x7FF;
+	}
+	/* Slow case: The codeword for the symbol is longer than
+	 * table_bits, so the symbol does not have an entry
+	 * directly in the first (1 << table_bits) entries of the
+	 * decode table.  Traverse the appropriate binary tree
+	 * bit-by-bit to decode the symbol.
+	 */
+	bitstream_remove_bits(istream, table_bits);
+	do {
+		key_bits = (entry & 0x3FFF) + bitstream_pop_bits(istream, 1);
+	} while ((entry = decode_table[key_bits]) >= 0xC000);
+	return entry;
+}
+
+/*
+ * Copy an LZ77 match at (dst - offset) to dst.
+ *
+ * The length and offset must be already validated --- that is, (dst - offset)
+ * can't underrun the output buffer, and (dst + length) can't overrun the output
+ * buffer.  Also, the length cannot be 0.
+ *
+ * @bufend points to the byte past the end of the output buffer.  This function
+ * won't write any data beyond this position.
+ *
+ * Returns dst + length.
+ */
+static forceinline u8 *lz_copy(u8 *dst, u32 length, u32 offset, const u8 *bufend,
+			       u32 min_length)
+{
+	const u8 *src = dst - offset;
+
+	/*
+	 * Try to copy one machine word at a time.  On i386 and x86_64 this is
+	 * faster than copying one byte at a time, unless the data is
+	 * near-random and all the matches have very short lengths.  Note that
+	 * since this requires unaligned memory accesses, it won't necessarily
+	 * be faster on every architecture.
+	 *
+	 * Also note that we might copy more than the length of the match.  For
+	 * example, if a word is 8 bytes and the match is of length 5, then
+	 * we'll simply copy 8 bytes.  This is okay as long as we don't write
+	 * beyond the end of the output buffer, hence the check for (bufend -
+	 * end >= WORDBYTES - 1).
+	 */
+#ifdef FAST_UNALIGNED_ACCESS
+	u8 * const end = dst + length;
+
+	if (bufend - end >= (ptrdiff_t)(WORDBYTES - 1)) {
+
+		if (offset >= WORDBYTES) {
+			/* The source and destination words don't overlap.  */
+
+			/* To improve branch prediction, one iteration of this
+			 * loop is unrolled.  Most matches are short and will
+			 * fail the first check.  But if that check passes, then
+			 * it becomes increasing likely that the match is long
+			 * and we'll need to continue copying.
+			 */
+
+			copy_unaligned_word(src, dst);
+			src += WORDBYTES;
+			dst += WORDBYTES;
+
+			if (dst < end) {
+				do {
+					copy_unaligned_word(src, dst);
+					src += WORDBYTES;
+					dst += WORDBYTES;
+				} while (dst < end);
+			}
+			return end;
+		} else if (offset == 1) {
+
+			/* Offset 1 matches are equivalent to run-length
+			 * encoding of the previous byte.  This case is common
+			 * if the data contains many repeated bytes.
+			 */
+			size_t v = repeat_byte(*(dst - 1));
+
+			do {
+				put_unaligned(v, (size_t *)dst);
+				src += WORDBYTES;
+				dst += WORDBYTES;
+			} while (dst < end);
+			return end;
+		}
+		/*
+		 * We don't bother with special cases for other 'offset <
+		 * WORDBYTES', which are usually rarer than 'offset == 1'.  Extra
+		 * checks will just slow things down.  Actually, it's possible
+		 * to handle all the 'offset < WORDBYTES' cases using the same
+		 * code, but it still becomes more complicated doesn't seem any
+		 * faster overall; it definitely slows down the more common
+		 * 'offset == 1' case.
+		 */
+	}
+#endif /* FAST_UNALIGNED_ACCESS */
+
+	/* Fall back to a bytewise copy.  */
+
+	if (min_length >= 2) {
+		*dst++ = *src++;
+		length--;
+	}
+	if (min_length >= 3) {
+		*dst++ = *src++;
+		length--;
+	}
+	do {
+		*dst++ = *src++;
+	} while (--length);
+
+	return dst;
+}
diff --git a/fs/ntfs3/lib/lib.h b/fs/ntfs3/lib/lib.h
new file mode 100644
index 000000000000..f508fbad2e71
--- /dev/null
+++ b/fs/ntfs3/lib/lib.h
@@ -0,0 +1,26 @@
+/* SPDX-License-Identifier: GPL-2.0-or-later */
+/*
+ * Adapted for linux kernel by Alexander Mamaev:
+ * - remove implementations of get_unaligned_
+ * - assume GCC is always defined
+ * - ISO C90
+ * - linux kernel code style
+ */
+
+
+/* globals from xpress_decompress.c */
+struct xpress_decompressor *xpress_allocate_decompressor(void);
+void xpress_free_decompressor(struct xpress_decompressor *d);
+int xpress_decompress(struct xpress_decompressor *__restrict d,
+		      const void *__restrict compressed_data,
+		      size_t compressed_size,
+		      void *__restrict uncompressed_data,
+		      size_t uncompressed_size);
+
+/* globals from lzx_decompress.c */
+struct lzx_decompressor *lzx_allocate_decompressor(void);
+void lzx_free_decompressor(struct lzx_decompressor *d);
+int lzx_decompress(struct lzx_decompressor *__restrict d,
+		   const void *__restrict compressed_data,
+		   size_t compressed_size, void *__restrict uncompressed_data,
+		   size_t uncompressed_size);
diff --git a/fs/ntfs3/lib/lzx_decompress.c b/fs/ntfs3/lib/lzx_decompress.c
new file mode 100644
index 000000000000..6b16f07073c1
--- /dev/null
+++ b/fs/ntfs3/lib/lzx_decompress.c
@@ -0,0 +1,670 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
+/*
+ * lzx_decompress.c - A decompressor for the LZX compression format, which can
+ * be used in "System Compressed" files.  This is based on the code from wimlib.
+ * This code only supports a window size (dictionary size) of 32768 bytes, since
+ * this is the only size used in System Compression.
+ *
+ * Copyright (C) 2015 Eric Biggers
+ */
+
+#include "decompress_common.h"
+#include "lib.h"
+
+/* Number of literal byte values  */
+#define LZX_NUM_CHARS			256
+
+/* The smallest and largest allowed match lengths  */
+#define LZX_MIN_MATCH_LEN		2
+#define LZX_MAX_MATCH_LEN		257
+
+/* Number of distinct match lengths that can be represented  */
+#define LZX_NUM_LENS			(LZX_MAX_MATCH_LEN - LZX_MIN_MATCH_LEN + 1)
+
+/* Number of match lengths for which no length symbol is required  */
+#define LZX_NUM_PRIMARY_LENS		7
+#define LZX_NUM_LEN_HEADERS		(LZX_NUM_PRIMARY_LENS + 1)
+
+/* Valid values of the 3-bit block type field  */
+#define LZX_BLOCKTYPE_VERBATIM		1
+#define LZX_BLOCKTYPE_ALIGNED		2
+#define LZX_BLOCKTYPE_UNCOMPRESSED	3
+
+/* Number of offset slots for a window size of 32768  */
+#define LZX_NUM_OFFSET_SLOTS		30
+
+/* Number of symbols in the main code for a window size of 32768  */
+#define LZX_MAINCODE_NUM_SYMBOLS	\
+	(LZX_NUM_CHARS + (LZX_NUM_OFFSET_SLOTS * LZX_NUM_LEN_HEADERS))
+
+/* Number of symbols in the length code  */
+#define LZX_LENCODE_NUM_SYMBOLS		(LZX_NUM_LENS - LZX_NUM_PRIMARY_LENS)
+
+/* Number of symbols in the precode  */
+#define LZX_PRECODE_NUM_SYMBOLS		20
+
+/* Number of bits in which each precode codeword length is represented  */
+#define LZX_PRECODE_ELEMENT_SIZE	4
+
+/* Number of low-order bits of each match offset that are entropy-encoded in
+ * aligned offset blocks
+ */
+#define LZX_NUM_ALIGNED_OFFSET_BITS	3
+
+/* Number of symbols in the aligned offset code  */
+#define LZX_ALIGNEDCODE_NUM_SYMBOLS	(1 << LZX_NUM_ALIGNED_OFFSET_BITS)
+
+/* Mask for the match offset bits that are entropy-encoded in aligned offset
+ * blocks
+ */
+#define LZX_ALIGNED_OFFSET_BITMASK	((1 << LZX_NUM_ALIGNED_OFFSET_BITS) - 1)
+
+/* Number of bits in which each aligned offset codeword length is represented  */
+#define LZX_ALIGNEDCODE_ELEMENT_SIZE	3
+
+/* Maximum lengths (in bits) of the codewords in each Huffman code  */
+#define LZX_MAX_MAIN_CODEWORD_LEN	16
+#define LZX_MAX_LEN_CODEWORD_LEN	16
+#define LZX_MAX_PRE_CODEWORD_LEN	((1 << LZX_PRECODE_ELEMENT_SIZE) - 1)
+#define LZX_MAX_ALIGNED_CODEWORD_LEN	((1 << LZX_ALIGNEDCODE_ELEMENT_SIZE) - 1)
+
+/* The default "filesize" value used in pre/post-processing.  In the LZX format
+ * used in cabinet files this value must be given to the decompressor, whereas
+ * in the LZX format used in WIM files and system-compressed files this value is
+ * fixed at 12000000.
+ */
+#define LZX_DEFAULT_FILESIZE		12000000
+
+/* Assumed block size when the encoded block size begins with a 0 bit.  */
+#define LZX_DEFAULT_BLOCK_SIZE		32768
+
+/* Number of offsets in the recent (or "repeat") offsets queue.  */
+#define LZX_NUM_RECENT_OFFSETS		3
+
+/* These values are chosen for fast decompression.  */
+#define LZX_MAINCODE_TABLEBITS		11
+#define LZX_LENCODE_TABLEBITS		10
+#define LZX_PRECODE_TABLEBITS		6
+#define LZX_ALIGNEDCODE_TABLEBITS	7
+
+#define LZX_READ_LENS_MAX_OVERRUN	50
+
+/* Mapping: offset slot => first match offset that uses that offset slot.
+ */
+static const u32 lzx_offset_slot_base[LZX_NUM_OFFSET_SLOTS + 1] = {
+	0,	1,	2,	3,	4,	/* 0  --- 4  */
+	6,	8,	12,	16,	24,	/* 5  --- 9  */
+	32,	48,	64,	96,	128,	/* 10 --- 14 */
+	192,	256,	384,	512,	768,	/* 15 --- 19 */
+	1024,	1536,	2048,	3072,	4096,   /* 20 --- 24 */
+	6144,	8192,	12288,	16384,	24576,	/* 25 --- 29 */
+	32768,					/* extra     */
+};
+
+/* Mapping: offset slot => how many extra bits must be read and added to the
+ * corresponding offset slot base to decode the match offset.
+ */
+static const u8 lzx_extra_offset_bits[LZX_NUM_OFFSET_SLOTS] = {
+	0,	0,	0,	0,	1,
+	1,	2,	2,	3,	3,
+	4,	4,	5,	5,	6,
+	6,	7,	7,	8,	8,
+	9,	9,	10,	10,	11,
+	11,	12,	12,	13,	13,
+};
+
+/* Reusable heap-allocated memory for LZX decompression  */
+struct lzx_decompressor {
+
+	/* Huffman decoding tables, and arrays that map symbols to codeword
+	 * lengths
+	 */
+
+	u16 maincode_decode_table[(1 << LZX_MAINCODE_TABLEBITS) +
+					(LZX_MAINCODE_NUM_SYMBOLS * 2)];
+	u8 maincode_lens[LZX_MAINCODE_NUM_SYMBOLS + LZX_READ_LENS_MAX_OVERRUN];
+
+
+	u16 lencode_decode_table[(1 << LZX_LENCODE_TABLEBITS) +
+					(LZX_LENCODE_NUM_SYMBOLS * 2)];
+	u8 lencode_lens[LZX_LENCODE_NUM_SYMBOLS + LZX_READ_LENS_MAX_OVERRUN];
+
+
+	u16 alignedcode_decode_table[(1 << LZX_ALIGNEDCODE_TABLEBITS) +
+					(LZX_ALIGNEDCODE_NUM_SYMBOLS * 2)];
+	u8 alignedcode_lens[LZX_ALIGNEDCODE_NUM_SYMBOLS];
+
+	u16 precode_decode_table[(1 << LZX_PRECODE_TABLEBITS) +
+				 (LZX_PRECODE_NUM_SYMBOLS * 2)];
+	u8 precode_lens[LZX_PRECODE_NUM_SYMBOLS];
+
+	/* Temporary space for make_huffman_decode_table()  */
+	u16 working_space[2 * (1 + LZX_MAX_MAIN_CODEWORD_LEN) +
+			  LZX_MAINCODE_NUM_SYMBOLS];
+};
+
+static void undo_e8_translation(void *target, s32 input_pos)
+{
+	s32 abs_offset, rel_offset;
+
+	abs_offset = get_unaligned_le32(target);
+	if (abs_offset >= 0) {
+		if (abs_offset < LZX_DEFAULT_FILESIZE) {
+			/* "good translation" */
+			rel_offset = abs_offset - input_pos;
+			put_unaligned_le32(rel_offset, target);
+		}
+	} else {
+		if (abs_offset >= -input_pos) {
+			/* "compensating translation" */
+			rel_offset = abs_offset + LZX_DEFAULT_FILESIZE;
+			put_unaligned_le32(rel_offset, target);
+		}
+	}
+}
+
+/*
+ * Undo the 'E8' preprocessing used in LZX.  Before compression, the
+ * uncompressed data was preprocessed by changing the targets of suspected x86
+ * CALL instructions from relative offsets to absolute offsets.  After
+ * match/literal decoding, the decompressor must undo the translation.
+ */
+static void lzx_postprocess(u8 *data, u32 size)
+{
+	/*
+	 * A worthwhile optimization is to push the end-of-buffer check into the
+	 * relatively rare E8 case.  This is possible if we replace the last six
+	 * bytes of data with E8 bytes; then we are guaranteed to hit an E8 byte
+	 * before reaching end-of-buffer.  In addition, this scheme guarantees
+	 * that no translation can begin following an E8 byte in the last 10
+	 * bytes because a 4-byte offset containing E8 as its high byte is a
+	 * large negative number that is not valid for translation.  That is
+	 * exactly what we need.
+	 */
+	u8 *tail;
+	u8 saved_bytes[6];
+	u8 *p;
+
+	if (size <= 10)
+		return;
+
+	tail = &data[size - 6];
+	memcpy(saved_bytes, tail, 6);
+	memset(tail, 0xE8, 6);
+	p = data;
+	for (;;) {
+		while (*p != 0xE8)
+			p++;
+		if (p >= tail)
+			break;
+		undo_e8_translation(p + 1, p - data);
+		p += 5;
+	}
+	memcpy(tail, saved_bytes, 6);
+}
+
+/* Read a Huffman-encoded symbol using the precode.  */
+static forceinline u32 read_presym(const struct lzx_decompressor *d,
+					struct input_bitstream *is)
+{
+	return read_huffsym(is, d->precode_decode_table,
+			    LZX_PRECODE_TABLEBITS, LZX_MAX_PRE_CODEWORD_LEN);
+}
+
+/* Read a Huffman-encoded symbol using the main code.  */
+static forceinline u32 read_mainsym(const struct lzx_decompressor *d,
+					 struct input_bitstream *is)
+{
+	return read_huffsym(is, d->maincode_decode_table,
+			    LZX_MAINCODE_TABLEBITS, LZX_MAX_MAIN_CODEWORD_LEN);
+}
+
+/* Read a Huffman-encoded symbol using the length code.  */
+static forceinline u32 read_lensym(const struct lzx_decompressor *d,
+					struct input_bitstream *is)
+{
+	return read_huffsym(is, d->lencode_decode_table,
+			    LZX_LENCODE_TABLEBITS, LZX_MAX_LEN_CODEWORD_LEN);
+}
+
+/* Read a Huffman-encoded symbol using the aligned offset code.  */
+static forceinline u32 read_alignedsym(const struct lzx_decompressor *d,
+					    struct input_bitstream *is)
+{
+	return read_huffsym(is, d->alignedcode_decode_table,
+			    LZX_ALIGNEDCODE_TABLEBITS,
+			    LZX_MAX_ALIGNED_CODEWORD_LEN);
+}
+
+/*
+ * Read the precode from the compressed input bitstream, then use it to decode
+ * @num_lens codeword length values.
+ *
+ * @is:		The input bitstream.
+ *
+ * @lens:	An array that contains the length values from the previous time
+ *		the codeword lengths for this Huffman code were read, or all 0's
+ *		if this is the first time.  This array must have at least
+ *		(@num_lens + LZX_READ_LENS_MAX_OVERRUN) entries.
+ *
+ * @num_lens:	Number of length values to decode.
+ *
+ * Returns 0 on success, or -1 if the data was invalid.
+ */
+static int lzx_read_codeword_lens(struct lzx_decompressor *d,
+				  struct input_bitstream *is,
+				  u8 *lens, u32 num_lens)
+{
+	u8 *len_ptr = lens;
+	u8 *lens_end = lens + num_lens;
+	int i;
+
+	/* Read the lengths of the precode codewords.  These are given
+	 * explicitly.
+	 */
+	for (i = 0; i < LZX_PRECODE_NUM_SYMBOLS; i++) {
+		d->precode_lens[i] =
+			bitstream_read_bits(is, LZX_PRECODE_ELEMENT_SIZE);
+	}
+
+	/* Make the decoding table for the precode.  */
+	if (make_huffman_decode_table(d->precode_decode_table,
+				      LZX_PRECODE_NUM_SYMBOLS,
+				      LZX_PRECODE_TABLEBITS,
+				      d->precode_lens,
+				      LZX_MAX_PRE_CODEWORD_LEN,
+				      d->working_space))
+		return -1;
+
+	/* Decode the codeword lengths.  */
+	do {
+		u32 presym;
+		u8 len;
+
+		/* Read the next precode symbol.  */
+		presym = read_presym(d, is);
+		if (presym < 17) {
+			/* Difference from old length  */
+			len = *len_ptr - presym;
+			if ((s8)len < 0)
+				len += 17;
+			*len_ptr++ = len;
+		} else {
+			/* Special RLE values  */
+
+			u32 run_len;
+
+			if (presym == 17) {
+				/* Run of 0's  */
+				run_len = 4 + bitstream_read_bits(is, 4);
+				len = 0;
+			} else if (presym == 18) {
+				/* Longer run of 0's  */
+				run_len = 20 + bitstream_read_bits(is, 5);
+				len = 0;
+			} else {
+				/* Run of identical lengths  */
+				run_len = 4 + bitstream_read_bits(is, 1);
+				presym = read_presym(d, is);
+				if (presym > 17)
+					return -1;
+				len = *len_ptr - presym;
+				if ((s8)len < 0)
+					len += 17;
+			}
+
+			do {
+				*len_ptr++ = len;
+			} while (--run_len);
+			/* Worst case overrun is when presym == 18,
+			 * run_len == 20 + 31, and only 1 length was remaining.
+			 * So LZX_READ_LENS_MAX_OVERRUN == 50.
+			 *
+			 * Overrun while reading the first half of maincode_lens
+			 * can corrupt the previous values in the second half.
+			 * This doesn't really matter because the resulting
+			 * lengths will still be in range, and data that
+			 * generates overruns is invalid anyway.
+			 */
+		}
+	} while (len_ptr < lens_end);
+
+	return 0;
+}
+
+/*
+ * Read the header of an LZX block and save the block type and (uncompressed)
+ * size in *block_type_ret and *block_size_ret, respectively.
+ *
+ * If the block is compressed, also update the Huffman decode @tables with the
+ * new Huffman codes.  If the block is uncompressed, also update the match
+ * offset @queue with the new match offsets.
+ *
+ * Return 0 on success, or -1 if the data was invalid.
+ */
+static int lzx_read_block_header(struct lzx_decompressor *d,
+				 struct input_bitstream *is,
+				 int *block_type_ret,
+				 u32 *block_size_ret,
+				 u32 recent_offsets[])
+{
+	int block_type;
+	u32 block_size;
+	int i;
+
+	bitstream_ensure_bits(is, 4);
+
+	/* The first three bits tell us what kind of block it is, and should be
+	 * one of the LZX_BLOCKTYPE_* values.
+	 */
+	block_type = bitstream_pop_bits(is, 3);
+
+	/* Read the block size.  */
+	if (bitstream_pop_bits(is, 1)) {
+		block_size = LZX_DEFAULT_BLOCK_SIZE;
+	} else {
+		block_size = 0;
+		block_size |= bitstream_read_bits(is, 8);
+		block_size <<= 8;
+		block_size |= bitstream_read_bits(is, 8);
+	}
+
+	switch (block_type) {
+
+	case LZX_BLOCKTYPE_ALIGNED:
+
+		/* Read the aligned offset code and prepare its decode table.
+		 */
+
+		for (i = 0; i < LZX_ALIGNEDCODE_NUM_SYMBOLS; i++) {
+			d->alignedcode_lens[i] =
+				bitstream_read_bits(is,
+						    LZX_ALIGNEDCODE_ELEMENT_SIZE);
+		}
+
+		if (make_huffman_decode_table(d->alignedcode_decode_table,
+					      LZX_ALIGNEDCODE_NUM_SYMBOLS,
+					      LZX_ALIGNEDCODE_TABLEBITS,
+					      d->alignedcode_lens,
+					      LZX_MAX_ALIGNED_CODEWORD_LEN,
+					      d->working_space))
+			return -1;
+
+		/* Fall though, since the rest of the header for aligned offset
+		 * blocks is the same as that for verbatim blocks.
+		 */
+		fallthrough;
+
+	case LZX_BLOCKTYPE_VERBATIM:
+
+		/* Read the main code and prepare its decode table.
+		 *
+		 * Note that the codeword lengths in the main code are encoded
+		 * in two parts: one part for literal symbols, and one part for
+		 * match symbols.
+		 */
+
+		if (lzx_read_codeword_lens(d, is, d->maincode_lens,
+					   LZX_NUM_CHARS))
+			return -1;
+
+		if (lzx_read_codeword_lens(d, is,
+					   d->maincode_lens + LZX_NUM_CHARS,
+					   LZX_MAINCODE_NUM_SYMBOLS - LZX_NUM_CHARS))
+			return -1;
+
+		if (make_huffman_decode_table(d->maincode_decode_table,
+					      LZX_MAINCODE_NUM_SYMBOLS,
+					      LZX_MAINCODE_TABLEBITS,
+					      d->maincode_lens,
+					      LZX_MAX_MAIN_CODEWORD_LEN,
+					      d->working_space))
+			return -1;
+
+		/* Read the length code and prepare its decode table.  */
+
+		if (lzx_read_codeword_lens(d, is, d->lencode_lens,
+					   LZX_LENCODE_NUM_SYMBOLS))
+			return -1;
+
+		if (make_huffman_decode_table(d->lencode_decode_table,
+					      LZX_LENCODE_NUM_SYMBOLS,
+					      LZX_LENCODE_TABLEBITS,
+					      d->lencode_lens,
+					      LZX_MAX_LEN_CODEWORD_LEN,
+					      d->working_space))
+			return -1;
+
+		break;
+
+	case LZX_BLOCKTYPE_UNCOMPRESSED:
+
+		/* Before reading the three recent offsets from the uncompressed
+		 * block header, the stream must be aligned on a 16-bit
+		 * boundary.  But if the stream is *already* aligned, then the
+		 * next 16 bits must be discarded.
+		 */
+		bitstream_ensure_bits(is, 1);
+		bitstream_align(is);
+
+		recent_offsets[0] = bitstream_read_u32(is);
+		recent_offsets[1] = bitstream_read_u32(is);
+		recent_offsets[2] = bitstream_read_u32(is);
+
+		/* Offsets of 0 are invalid.  */
+		if (recent_offsets[0] == 0 || recent_offsets[1] == 0 ||
+		    recent_offsets[2] == 0)
+			return -1;
+		break;
+
+	default:
+		/* Unrecognized block type.  */
+		return -1;
+	}
+
+	*block_type_ret = block_type;
+	*block_size_ret = block_size;
+	return 0;
+}
+
+/* Decompress a block of LZX-compressed data.  */
+static int lzx_decompress_block(const struct lzx_decompressor *d,
+				struct input_bitstream *is,
+				int block_type, u32 block_size,
+				u8 * const out_begin, u8 *out_next,
+				u32 recent_offsets[])
+{
+	u8 * const block_end = out_next + block_size;
+	u32 ones_if_aligned = 0U - (block_type == LZX_BLOCKTYPE_ALIGNED);
+
+	do {
+		u32 mainsym;
+		u32 match_len;
+		u32 match_offset;
+		u32 offset_slot;
+		u32 num_extra_bits;
+
+		mainsym = read_mainsym(d, is);
+		if (mainsym < LZX_NUM_CHARS) {
+			/* Literal  */
+			*out_next++ = mainsym;
+			continue;
+		}
+
+		/* Match  */
+
+		/* Decode the length header and offset slot.  */
+		mainsym -= LZX_NUM_CHARS;
+		match_len = mainsym % LZX_NUM_LEN_HEADERS;
+		offset_slot = mainsym / LZX_NUM_LEN_HEADERS;
+
+		/* If needed, read a length symbol to decode the full length. */
+		if (match_len == LZX_NUM_PRIMARY_LENS)
+			match_len += read_lensym(d, is);
+		match_len += LZX_MIN_MATCH_LEN;
+
+		if (offset_slot < LZX_NUM_RECENT_OFFSETS) {
+			/* Repeat offset  */
+
+			/* Note: This isn't a real LRU queue, since using the R2
+			 * offset doesn't bump the R1 offset down to R2.  This
+			 * quirk allows all 3 recent offsets to be handled by
+			 * the same code.  (For R0, the swap is a no-op.)
+			 */
+			match_offset = recent_offsets[offset_slot];
+			recent_offsets[offset_slot] = recent_offsets[0];
+			recent_offsets[0] = match_offset;
+		} else {
+			/* Explicit offset  */
+
+			/* Look up the number of extra bits that need to be read
+			 * to decode offsets with this offset slot.
+			 */
+			num_extra_bits = lzx_extra_offset_bits[offset_slot];
+
+			/* Start with the offset slot base value.  */
+			match_offset = lzx_offset_slot_base[offset_slot];
+
+			/* In aligned offset blocks, the low-order 3 bits of
+			 * each offset are encoded using the aligned offset
+			 * code.  Otherwise, all the extra bits are literal.
+			 */
+
+			if ((num_extra_bits & ones_if_aligned) >= LZX_NUM_ALIGNED_OFFSET_BITS) {
+				match_offset +=
+					bitstream_read_bits(is, num_extra_bits -
+								LZX_NUM_ALIGNED_OFFSET_BITS)
+							<< LZX_NUM_ALIGNED_OFFSET_BITS;
+				match_offset += read_alignedsym(d, is);
+			} else {
+				match_offset += bitstream_read_bits(is, num_extra_bits);
+			}
+
+			/* Adjust the offset.  */
+			match_offset -= (LZX_NUM_RECENT_OFFSETS - 1);
+
+			/* Update the recent offsets.  */
+			recent_offsets[2] = recent_offsets[1];
+			recent_offsets[1] = recent_offsets[0];
+			recent_offsets[0] = match_offset;
+		}
+
+		/* Validate the match, then copy it to the current position.  */
+
+		if (match_len > (size_t)(block_end - out_next))
+			return -1;
+
+		if (match_offset > (size_t)(out_next - out_begin))
+			return -1;
+
+		out_next = lz_copy(out_next, match_len, match_offset,
+				   block_end, LZX_MIN_MATCH_LEN);
+
+	} while (out_next != block_end);
+
+	return 0;
+}
+
+/*
+ * lzx_allocate_decompressor - Allocate an LZX decompressor
+ *
+ * Return the pointer to the decompressor on success, or return NULL and set
+ * errno on failure.
+ */
+struct lzx_decompressor *lzx_allocate_decompressor(void)
+{
+	return kmalloc(sizeof(struct lzx_decompressor), GFP_NOFS);
+}
+
+/*
+ * lzx_decompress - Decompress a buffer of LZX-compressed data
+ *
+ * @decompressor:      A decompressor allocated with lzx_allocate_decompressor()
+ * @compressed_data:	The buffer of data to decompress
+ * @compressed_size:	Number of bytes of compressed data
+ * @uncompressed_data:	The buffer in which to store the decompressed data
+ * @uncompressed_size:	The number of bytes the data decompresses into
+ *
+ * Return 0 on success, or return -1 and set errno on failure.
+ */
+int lzx_decompress(struct lzx_decompressor *decompressor,
+		   const void *compressed_data, size_t compressed_size,
+		   void *uncompressed_data, size_t uncompressed_size)
+{
+	struct lzx_decompressor *d = decompressor;
+	u8 * const out_begin = uncompressed_data;
+	u8 *out_next = out_begin;
+	u8 * const out_end = out_begin + uncompressed_size;
+	struct input_bitstream is;
+	u32 recent_offsets[LZX_NUM_RECENT_OFFSETS] = {1, 1, 1};
+	int e8_status = 0;
+
+	init_input_bitstream(&is, compressed_data, compressed_size);
+
+	/* Codeword lengths begin as all 0's for delta encoding purposes.  */
+	memset(d->maincode_lens, 0, LZX_MAINCODE_NUM_SYMBOLS);
+	memset(d->lencode_lens, 0, LZX_LENCODE_NUM_SYMBOLS);
+
+	/* Decompress blocks until we have all the uncompressed data.  */
+
+	while (out_next != out_end) {
+		int block_type;
+		u32 block_size;
+
+		if (lzx_read_block_header(d, &is, &block_type, &block_size,
+					  recent_offsets))
+			goto invalid;
+
+		if (block_size < 1 || block_size > (size_t)(out_end - out_next))
+			goto invalid;
+
+		if (block_type != LZX_BLOCKTYPE_UNCOMPRESSED) {
+
+			/* Compressed block  */
+
+			if (lzx_decompress_block(d,
+						 &is,
+						 block_type,
+						 block_size,
+						 out_begin,
+						 out_next,
+						 recent_offsets))
+				goto invalid;
+
+			e8_status |= d->maincode_lens[0xe8];
+			out_next += block_size;
+		} else {
+			/* Uncompressed block  */
+
+			out_next = bitstream_read_bytes(&is, out_next,
+							block_size);
+			if (!out_next)
+				goto invalid;
+
+			if (block_size & 1)
+				bitstream_read_byte(&is);
+
+			e8_status = 1;
+		}
+	}
+
+	/* Postprocess the data unless it cannot possibly contain 0xe8 bytes. */
+	if (e8_status)
+		lzx_postprocess(uncompressed_data, uncompressed_size);
+
+	return 0;
+
+invalid:
+	return -1;
+}
+
+/*
+ * lzx_free_decompressor - Free an LZX decompressor
+ *
+ * @decompressor:       A decompressor that was allocated with
+ *			lzx_allocate_decompressor(), or NULL.
+ */
+void lzx_free_decompressor(struct lzx_decompressor *decompressor)
+{
+	kfree(decompressor);
+}
diff --git a/fs/ntfs3/lib/xpress_decompress.c b/fs/ntfs3/lib/xpress_decompress.c
new file mode 100644
index 000000000000..769c6d3dde67
--- /dev/null
+++ b/fs/ntfs3/lib/xpress_decompress.c
@@ -0,0 +1,142 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
+/*
+ * xpress_decompress.c - A decompressor for the XPRESS compression format
+ * (Huffman variant), which can be used in "System Compressed" files.  This is
+ * based on the code from wimlib.
+ *
+ * Copyright (C) 2015 Eric Biggers
+ */
+
+#include "decompress_common.h"
+#include "lib.h"
+
+#define XPRESS_NUM_SYMBOLS	512
+#define XPRESS_MAX_CODEWORD_LEN	15
+#define XPRESS_MIN_MATCH_LEN	3
+
+/* This value is chosen for fast decompression.  */
+#define XPRESS_TABLEBITS 12
+
+/* Reusable heap-allocated memory for XPRESS decompression  */
+struct xpress_decompressor {
+
+	/* The Huffman decoding table  */
+	u16 decode_table[(1 << XPRESS_TABLEBITS) + 2 * XPRESS_NUM_SYMBOLS];
+
+	/* An array that maps symbols to codeword lengths  */
+	u8 lens[XPRESS_NUM_SYMBOLS];
+
+	/* Temporary space for make_huffman_decode_table()  */
+	u16 working_space[2 * (1 + XPRESS_MAX_CODEWORD_LEN) +
+			  XPRESS_NUM_SYMBOLS];
+};
+
+/*
+ * xpress_allocate_decompressor - Allocate an XPRESS decompressor
+ *
+ * Return the pointer to the decompressor on success, or return NULL and set
+ * errno on failure.
+ */
+struct xpress_decompressor *xpress_allocate_decompressor(void)
+{
+	return kmalloc(sizeof(struct xpress_decompressor), GFP_NOFS);
+}
+
+/*
+ * xpress_decompress - Decompress a buffer of XPRESS-compressed data
+ *
+ * @decompressor:       A decompressor that was allocated with
+ *			xpress_allocate_decompressor()
+ * @compressed_data:	The buffer of data to decompress
+ * @compressed_size:	Number of bytes of compressed data
+ * @uncompressed_data:	The buffer in which to store the decompressed data
+ * @uncompressed_size:	The number of bytes the data decompresses into
+ *
+ * Return 0 on success, or return -1 and set errno on failure.
+ */
+int xpress_decompress(struct xpress_decompressor *decompressor,
+		      const void *compressed_data, size_t compressed_size,
+		      void *uncompressed_data, size_t uncompressed_size)
+{
+	struct xpress_decompressor *d = decompressor;
+	const u8 * const in_begin = compressed_data;
+	u8 * const out_begin = uncompressed_data;
+	u8 *out_next = out_begin;
+	u8 * const out_end = out_begin + uncompressed_size;
+	struct input_bitstream is;
+	u32 i;
+
+	/* Read the Huffman codeword lengths.  */
+	if (compressed_size < XPRESS_NUM_SYMBOLS / 2)
+		goto invalid;
+	for (i = 0; i < XPRESS_NUM_SYMBOLS / 2; i++) {
+		d->lens[i*2 + 0] = in_begin[i] & 0xF;
+		d->lens[i*2 + 1] = in_begin[i] >> 4;
+	}
+
+	/* Build a decoding table for the Huffman code.  */
+	if (make_huffman_decode_table(d->decode_table, XPRESS_NUM_SYMBOLS,
+				      XPRESS_TABLEBITS, d->lens,
+				      XPRESS_MAX_CODEWORD_LEN,
+				      d->working_space))
+		goto invalid;
+
+	/* Decode the matches and literals.  */
+
+	init_input_bitstream(&is, in_begin + XPRESS_NUM_SYMBOLS / 2,
+			     compressed_size - XPRESS_NUM_SYMBOLS / 2);
+
+	while (out_next != out_end) {
+		u32 sym;
+		u32 log2_offset;
+		u32 length;
+		u32 offset;
+
+		sym = read_huffsym(&is, d->decode_table,
+				   XPRESS_TABLEBITS, XPRESS_MAX_CODEWORD_LEN);
+		if (sym < 256) {
+			/* Literal  */
+			*out_next++ = sym;
+		} else {
+			/* Match  */
+			length = sym & 0xf;
+			log2_offset = (sym >> 4) & 0xf;
+
+			bitstream_ensure_bits(&is, 16);
+
+			offset = ((u32)1 << log2_offset) |
+				 bitstream_pop_bits(&is, log2_offset);
+
+			if (length == 0xf) {
+				length += bitstream_read_byte(&is);
+				if (length == 0xf + 0xff)
+					length = bitstream_read_u16(&is);
+			}
+			length += XPRESS_MIN_MATCH_LEN;
+
+			if (offset > (size_t)(out_next - out_begin))
+				goto invalid;
+
+			if (length > (size_t)(out_end - out_next))
+				goto invalid;
+
+			out_next = lz_copy(out_next, length, offset, out_end,
+					   XPRESS_MIN_MATCH_LEN);
+		}
+	}
+	return 0;
+
+invalid:
+	return -1;
+}
+
+/*
+ * xpress_free_decompressor - Free an XPRESS decompressor
+ *
+ * @decompressor:       A decompressor that was allocated with
+ *			xpress_allocate_decompressor(), or NULL.
+ */
+void xpress_free_decompressor(struct xpress_decompressor *decompressor)
+{
+	kfree(decompressor);
+}
diff --git a/fs/ntfs3/lznt.c b/fs/ntfs3/lznt.c
new file mode 100644
index 000000000000..f1f691a67cc4
--- /dev/null
+++ b/fs/ntfs3/lznt.c
@@ -0,0 +1,453 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ *
+ * Copyright (C) 2019-2021 Paragon Software GmbH, All rights reserved.
+ *
+ */
+
+#include <linux/blkdev.h>
+#include <linux/buffer_head.h>
+#include <linux/fs.h>
+#include <linux/nls.h>
+
+#include "debug.h"
+#include "ntfs.h"
+#include "ntfs_fs.h"
+
+// clang-format off
+/* Src buffer is zero. */
+#define LZNT_ERROR_ALL_ZEROS	1
+#define LZNT_CHUNK_SIZE		0x1000
+// clang-format on
+
+struct lznt_hash {
+	const u8 *p1;
+	const u8 *p2;
+};
+
+struct lznt {
+	const u8 *unc;
+	const u8 *unc_end;
+	const u8 *best_match;
+	size_t max_len;
+	bool std;
+
+	struct lznt_hash hash[LZNT_CHUNK_SIZE];
+};
+
+static inline size_t get_match_len(const u8 *ptr, const u8 *end, const u8 *prev,
+				   size_t max_len)
+{
+	size_t len = 0;
+
+	while (ptr + len < end && ptr[len] == prev[len] && ++len < max_len)
+		;
+	return len;
+}
+
+static size_t longest_match_std(const u8 *src, struct lznt *ctx)
+{
+	size_t hash_index;
+	size_t len1 = 0, len2 = 0;
+	const u8 **hash;
+
+	hash_index =
+		((40543U * ((((src[0] << 4) ^ src[1]) << 4) ^ src[2])) >> 4) &
+		(LZNT_CHUNK_SIZE - 1);
+
+	hash = &(ctx->hash[hash_index].p1);
+
+	if (hash[0] >= ctx->unc && hash[0] < src && hash[0][0] == src[0] &&
+	    hash[0][1] == src[1] && hash[0][2] == src[2]) {
+		len1 = 3;
+		if (ctx->max_len > 3)
+			len1 += get_match_len(src + 3, ctx->unc_end,
+					      hash[0] + 3, ctx->max_len - 3);
+	}
+
+	if (hash[1] >= ctx->unc && hash[1] < src && hash[1][0] == src[0] &&
+	    hash[1][1] == src[1] && hash[1][2] == src[2]) {
+		len2 = 3;
+		if (ctx->max_len > 3)
+			len2 += get_match_len(src + 3, ctx->unc_end,
+					      hash[1] + 3, ctx->max_len - 3);
+	}
+
+	/* Compare two matches and select the best one. */
+	if (len1 < len2) {
+		ctx->best_match = hash[1];
+		len1 = len2;
+	} else {
+		ctx->best_match = hash[0];
+	}
+
+	hash[1] = hash[0];
+	hash[0] = src;
+	return len1;
+}
+
+static size_t longest_match_best(const u8 *src, struct lznt *ctx)
+{
+	size_t max_len;
+	const u8 *ptr;
+
+	if (ctx->unc >= src || !ctx->max_len)
+		return 0;
+
+	max_len = 0;
+	for (ptr = ctx->unc; ptr < src; ++ptr) {
+		size_t len =
+			get_match_len(src, ctx->unc_end, ptr, ctx->max_len);
+		if (len >= max_len) {
+			max_len = len;
+			ctx->best_match = ptr;
+		}
+	}
+
+	return max_len >= 3 ? max_len : 0;
+}
+
+static const size_t s_max_len[] = {
+	0x1002, 0x802, 0x402, 0x202, 0x102, 0x82, 0x42, 0x22, 0x12,
+};
+
+static const size_t s_max_off[] = {
+	0x10, 0x20, 0x40, 0x80, 0x100, 0x200, 0x400, 0x800, 0x1000,
+};
+
+static inline u16 make_pair(size_t offset, size_t len, size_t index)
+{
+	return ((offset - 1) << (12 - index)) |
+	       ((len - 3) & (((1 << (12 - index)) - 1)));
+}
+
+static inline size_t parse_pair(u16 pair, size_t *offset, size_t index)
+{
+	*offset = 1 + (pair >> (12 - index));
+	return 3 + (pair & ((1 << (12 - index)) - 1));
+}
+
+/*
+ * compress_chunk
+ *
+ * Return:
+ * * 0	- Ok, @cmpr contains @cmpr_chunk_size bytes of compressed data.
+ * * 1	- Input buffer is full zero.
+ * * -2 - The compressed buffer is too small to hold the compressed data.
+ */
+static inline int compress_chunk(size_t (*match)(const u8 *, struct lznt *),
+				 const u8 *unc, const u8 *unc_end, u8 *cmpr,
+				 u8 *cmpr_end, size_t *cmpr_chunk_size,
+				 struct lznt *ctx)
+{
+	size_t cnt = 0;
+	size_t idx = 0;
+	const u8 *up = unc;
+	u8 *cp = cmpr + 3;
+	u8 *cp2 = cmpr + 2;
+	u8 not_zero = 0;
+	/* Control byte of 8-bit values: ( 0 - means byte as is, 1 - short pair ). */
+	u8 ohdr = 0;
+	u8 *last;
+	u16 t16;
+
+	if (unc + LZNT_CHUNK_SIZE < unc_end)
+		unc_end = unc + LZNT_CHUNK_SIZE;
+
+	last = min(cmpr + LZNT_CHUNK_SIZE + sizeof(short), cmpr_end);
+
+	ctx->unc = unc;
+	ctx->unc_end = unc_end;
+	ctx->max_len = s_max_len[0];
+
+	while (up < unc_end) {
+		size_t max_len;
+
+		while (unc + s_max_off[idx] < up)
+			ctx->max_len = s_max_len[++idx];
+
+		/* Find match. */
+		max_len = up + 3 <= unc_end ? (*match)(up, ctx) : 0;
+
+		if (!max_len) {
+			if (cp >= last)
+				goto NotCompressed;
+			not_zero |= *cp++ = *up++;
+		} else if (cp + 1 >= last) {
+			goto NotCompressed;
+		} else {
+			t16 = make_pair(up - ctx->best_match, max_len, idx);
+			*cp++ = t16;
+			*cp++ = t16 >> 8;
+
+			ohdr |= 1 << cnt;
+			up += max_len;
+		}
+
+		cnt = (cnt + 1) & 7;
+		if (!cnt) {
+			*cp2 = ohdr;
+			ohdr = 0;
+			cp2 = cp;
+			cp += 1;
+		}
+	}
+
+	if (cp2 < last)
+		*cp2 = ohdr;
+	else
+		cp -= 1;
+
+	*cmpr_chunk_size = cp - cmpr;
+
+	t16 = (*cmpr_chunk_size - 3) | 0xB000;
+	cmpr[0] = t16;
+	cmpr[1] = t16 >> 8;
+
+	return not_zero ? 0 : LZNT_ERROR_ALL_ZEROS;
+
+NotCompressed:
+
+	if ((cmpr + LZNT_CHUNK_SIZE + sizeof(short)) > last)
+		return -2;
+
+	/*
+	 * Copy non cmpr data.
+	 * 0x3FFF == ((LZNT_CHUNK_SIZE + 2 - 3) | 0x3000)
+	 */
+	cmpr[0] = 0xff;
+	cmpr[1] = 0x3f;
+
+	memcpy(cmpr + sizeof(short), unc, LZNT_CHUNK_SIZE);
+	*cmpr_chunk_size = LZNT_CHUNK_SIZE + sizeof(short);
+
+	return 0;
+}
+
+static inline ssize_t decompress_chunk(u8 *unc, u8 *unc_end, const u8 *cmpr,
+				       const u8 *cmpr_end)
+{
+	u8 *up = unc;
+	u8 ch = *cmpr++;
+	size_t bit = 0;
+	size_t index = 0;
+	u16 pair;
+	size_t offset, length;
+
+	/* Do decompression until pointers are inside range. */
+	while (up < unc_end && cmpr < cmpr_end) {
+		/* Correct index */
+		while (unc + s_max_off[index] < up)
+			index += 1;
+
+		/* Check the current flag for zero. */
+		if (!(ch & (1 << bit))) {
+			/* Just copy byte. */
+			*up++ = *cmpr++;
+			goto next;
+		}
+
+		/* Check for boundary. */
+		if (cmpr + 1 >= cmpr_end)
+			return -EINVAL;
+
+		/* Read a short from little endian stream. */
+		pair = cmpr[1];
+		pair <<= 8;
+		pair |= cmpr[0];
+
+		cmpr += 2;
+
+		/* Translate packed information into offset and length. */
+		length = parse_pair(pair, &offset, index);
+
+		/* Check offset for boundary. */
+		if (unc + offset > up)
+			return -EINVAL;
+
+		/* Truncate the length if necessary. */
+		if (up + length >= unc_end)
+			length = unc_end - up;
+
+		/* Now we copy bytes. This is the heart of LZ algorithm. */
+		for (; length > 0; length--, up++)
+			*up = *(up - offset);
+
+next:
+		/* Advance flag bit value. */
+		bit = (bit + 1) & 7;
+
+		if (!bit) {
+			if (cmpr >= cmpr_end)
+				break;
+
+			ch = *cmpr++;
+		}
+	}
+
+	/* Return the size of uncompressed data. */
+	return up - unc;
+}
+
+/*
+ * get_lznt_ctx
+ * @level: 0 - Standard compression.
+ * 	   !0 - Best compression, requires a lot of cpu.
+ */
+struct lznt *get_lznt_ctx(int level)
+{
+	struct lznt *r = kzalloc(level ? offsetof(struct lznt, hash)
+				       : sizeof(struct lznt),
+				 GFP_NOFS);
+
+	if (r)
+		r->std = !level;
+	return r;
+}
+
+/*
+ * compress_lznt - Compresses @unc into @cmpr
+ *
+ * Return:
+ * * +x - Ok, @cmpr contains 'final_compressed_size' bytes of compressed data.
+ * * 0 - Input buffer is full zero.
+ */
+size_t compress_lznt(const void *unc, size_t unc_size, void *cmpr,
+		     size_t cmpr_size, struct lznt *ctx)
+{
+	int err;
+	size_t (*match)(const u8 *src, struct lznt *ctx);
+	u8 *p = cmpr;
+	u8 *end = p + cmpr_size;
+	const u8 *unc_chunk = unc;
+	const u8 *unc_end = unc_chunk + unc_size;
+	bool is_zero = true;
+
+	if (ctx->std) {
+		match = &longest_match_std;
+		memset(ctx->hash, 0, sizeof(ctx->hash));
+	} else {
+		match = &longest_match_best;
+	}
+
+	/* Compression cycle. */
+	for (; unc_chunk < unc_end; unc_chunk += LZNT_CHUNK_SIZE) {
+		cmpr_size = 0;
+		err = compress_chunk(match, unc_chunk, unc_end, p, end,
+				     &cmpr_size, ctx);
+		if (err < 0)
+			return unc_size;
+
+		if (is_zero && err != LZNT_ERROR_ALL_ZEROS)
+			is_zero = false;
+
+		p += cmpr_size;
+	}
+
+	if (p <= end - 2)
+		p[0] = p[1] = 0;
+
+	return is_zero ? 0 : PtrOffset(cmpr, p);
+}
+
+/*
+ * decompress_lznt - Decompress @cmpr into @unc.
+ */
+ssize_t decompress_lznt(const void *cmpr, size_t cmpr_size, void *unc,
+			size_t unc_size)
+{
+	const u8 *cmpr_chunk = cmpr;
+	const u8 *cmpr_end = cmpr_chunk + cmpr_size;
+	u8 *unc_chunk = unc;
+	u8 *unc_end = unc_chunk + unc_size;
+	u16 chunk_hdr;
+
+	if (cmpr_size < sizeof(short))
+		return -EINVAL;
+
+	/* Read chunk header. */
+	chunk_hdr = cmpr_chunk[1];
+	chunk_hdr <<= 8;
+	chunk_hdr |= cmpr_chunk[0];
+
+	/* Loop through decompressing chunks. */
+	for (;;) {
+		size_t chunk_size_saved;
+		size_t unc_use;
+		size_t cmpr_use = 3 + (chunk_hdr & (LZNT_CHUNK_SIZE - 1));
+
+		/* Check that the chunk actually fits the supplied buffer. */
+		if (cmpr_chunk + cmpr_use > cmpr_end)
+			return -EINVAL;
+
+		/* First make sure the chunk contains compressed data. */
+		if (chunk_hdr & 0x8000) {
+			/* Decompress a chunk and return if we get an error. */
+			ssize_t err =
+				decompress_chunk(unc_chunk, unc_end,
+						 cmpr_chunk + sizeof(chunk_hdr),
+						 cmpr_chunk + cmpr_use);
+			if (err < 0)
+				return err;
+			unc_use = err;
+		} else {
+			/* This chunk does not contain compressed data. */
+			unc_use = unc_chunk + LZNT_CHUNK_SIZE > unc_end
+					  ? unc_end - unc_chunk
+					  : LZNT_CHUNK_SIZE;
+
+			if (cmpr_chunk + sizeof(chunk_hdr) + unc_use >
+			    cmpr_end) {
+				return -EINVAL;
+			}
+
+			memcpy(unc_chunk, cmpr_chunk + sizeof(chunk_hdr),
+			       unc_use);
+		}
+
+		/* Advance pointers. */
+		cmpr_chunk += cmpr_use;
+		unc_chunk += unc_use;
+
+		/* Check for the end of unc buffer. */
+		if (unc_chunk >= unc_end)
+			break;
+
+		/* Proceed the next chunk. */
+		if (cmpr_chunk > cmpr_end - 2)
+			break;
+
+		chunk_size_saved = LZNT_CHUNK_SIZE;
+
+		/* Read chunk header. */
+		chunk_hdr = cmpr_chunk[1];
+		chunk_hdr <<= 8;
+		chunk_hdr |= cmpr_chunk[0];
+
+		if (!chunk_hdr)
+			break;
+
+		/* Check the size of unc buffer. */
+		if (unc_use < chunk_size_saved) {
+			size_t t1 = chunk_size_saved - unc_use;
+			u8 *t2 = unc_chunk + t1;
+
+			/* 'Zero' memory. */
+			if (t2 >= unc_end)
+				break;
+
+			memset(unc_chunk, 0, t1);
+			unc_chunk = t2;
+		}
+	}
+
+	/* Check compression boundary. */
+	if (cmpr_chunk > cmpr_end)
+		return -EINVAL;
+
+	/*
+	 * The unc size is just a difference between current
+	 * pointer and original one.
+	 */
+	return PtrOffset(unc, unc_chunk);
+}
diff --git a/fs/ntfs3/namei.c b/fs/ntfs3/namei.c
new file mode 100644
index 000000000000..e58415d07132
--- /dev/null
+++ b/fs/ntfs3/namei.c
@@ -0,0 +1,411 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ *
+ * Copyright (C) 2019-2021 Paragon Software GmbH, All rights reserved.
+ *
+ */
+
+#include <linux/blkdev.h>
+#include <linux/buffer_head.h>
+#include <linux/fs.h>
+#include <linux/iversion.h>
+#include <linux/namei.h>
+#include <linux/nls.h>
+
+#include "debug.h"
+#include "ntfs.h"
+#include "ntfs_fs.h"
+
+/*
+ * fill_name_de - Format NTFS_DE in @buf.
+ */
+int fill_name_de(struct ntfs_sb_info *sbi, void *buf, const struct qstr *name,
+		 const struct cpu_str *uni)
+{
+	int err;
+	struct NTFS_DE *e = buf;
+	u16 data_size;
+	struct ATTR_FILE_NAME *fname = (struct ATTR_FILE_NAME *)(e + 1);
+
+#ifndef CONFIG_NTFS3_64BIT_CLUSTER
+	e->ref.high = fname->home.high = 0;
+#endif
+	if (uni) {
+#ifdef __BIG_ENDIAN
+		int ulen = uni->len;
+		__le16 *uname = fname->name;
+		const u16 *name_cpu = uni->name;
+
+		while (ulen--)
+			*uname++ = cpu_to_le16(*name_cpu++);
+#else
+		memcpy(fname->name, uni->name, uni->len * sizeof(u16));
+#endif
+		fname->name_len = uni->len;
+
+	} else {
+		/* Convert input string to unicode. */
+		err = ntfs_nls_to_utf16(sbi, name->name, name->len,
+					(struct cpu_str *)&fname->name_len,
+					NTFS_NAME_LEN, UTF16_LITTLE_ENDIAN);
+		if (err < 0)
+			return err;
+	}
+
+	fname->type = FILE_NAME_POSIX;
+	data_size = fname_full_size(fname);
+
+	e->size = cpu_to_le16(ALIGN(data_size, 8) + sizeof(struct NTFS_DE));
+	e->key_size = cpu_to_le16(data_size);
+	e->flags = 0;
+	e->res = 0;
+
+	return 0;
+}
+
+/*
+ * ntfs_lookup - inode_operations::lookup
+ */
+static struct dentry *ntfs_lookup(struct inode *dir, struct dentry *dentry,
+				  u32 flags)
+{
+	struct ntfs_inode *ni = ntfs_i(dir);
+	struct cpu_str *uni = __getname();
+	struct inode *inode;
+	int err;
+
+	if (!uni)
+		inode = ERR_PTR(-ENOMEM);
+	else {
+		err = ntfs_nls_to_utf16(ni->mi.sbi, dentry->d_name.name,
+					dentry->d_name.len, uni, NTFS_NAME_LEN,
+					UTF16_HOST_ENDIAN);
+		if (err < 0)
+			inode = ERR_PTR(err);
+		else {
+			ni_lock(ni);
+			inode = dir_search_u(dir, uni, NULL);
+			ni_unlock(ni);
+		}
+		__putname(uni);
+	}
+
+	return d_splice_alias(inode, dentry);
+}
+
+/*
+ * ntfs_create - inode_operations::create
+ */
+static int ntfs_create(struct user_namespace *mnt_userns, struct inode *dir,
+		       struct dentry *dentry, umode_t mode, bool excl)
+{
+	struct ntfs_inode *ni = ntfs_i(dir);
+	struct inode *inode;
+
+	ni_lock_dir(ni);
+
+	inode = ntfs_create_inode(mnt_userns, dir, dentry, NULL, S_IFREG | mode,
+				  0, NULL, 0, NULL);
+
+	ni_unlock(ni);
+
+	return IS_ERR(inode) ? PTR_ERR(inode) : 0;
+}
+
+/*
+ * ntfs_mknod
+ *
+ * inode_operations::mknod
+ */
+static int ntfs_mknod(struct user_namespace *mnt_userns, struct inode *dir,
+		      struct dentry *dentry, umode_t mode, dev_t rdev)
+{
+	struct ntfs_inode *ni = ntfs_i(dir);
+	struct inode *inode;
+
+	ni_lock_dir(ni);
+
+	inode = ntfs_create_inode(mnt_userns, dir, dentry, NULL, mode, rdev,
+				  NULL, 0, NULL);
+
+	ni_unlock(ni);
+
+	return IS_ERR(inode) ? PTR_ERR(inode) : 0;
+}
+
+/*
+ * ntfs_link - inode_operations::link
+ */
+static int ntfs_link(struct dentry *ode, struct inode *dir, struct dentry *de)
+{
+	int err;
+	struct inode *inode = d_inode(ode);
+	struct ntfs_inode *ni = ntfs_i(inode);
+
+	if (S_ISDIR(inode->i_mode))
+		return -EPERM;
+
+	if (inode->i_nlink >= NTFS_LINK_MAX)
+		return -EMLINK;
+
+	ni_lock_dir(ntfs_i(dir));
+	if (inode != dir)
+		ni_lock(ni);
+
+	inc_nlink(inode);
+	ihold(inode);
+
+	err = ntfs_link_inode(inode, de);
+
+	if (!err) {
+		dir->i_ctime = dir->i_mtime = inode->i_ctime =
+			current_time(dir);
+		mark_inode_dirty(inode);
+		mark_inode_dirty(dir);
+		d_instantiate(de, inode);
+	} else {
+		drop_nlink(inode);
+		iput(inode);
+	}
+
+	if (inode != dir)
+		ni_unlock(ni);
+	ni_unlock(ntfs_i(dir));
+
+	return err;
+}
+
+/*
+ * ntfs_unlink - inode_operations::unlink
+ */
+static int ntfs_unlink(struct inode *dir, struct dentry *dentry)
+{
+	struct ntfs_inode *ni = ntfs_i(dir);
+	int err;
+
+	ni_lock_dir(ni);
+
+	err = ntfs_unlink_inode(dir, dentry);
+
+	ni_unlock(ni);
+
+	return err;
+}
+
+/*
+ * ntfs_symlink - inode_operations::symlink
+ */
+static int ntfs_symlink(struct user_namespace *mnt_userns, struct inode *dir,
+			struct dentry *dentry, const char *symname)
+{
+	u32 size = strlen(symname);
+	struct inode *inode;
+	struct ntfs_inode *ni = ntfs_i(dir);
+
+	ni_lock_dir(ni);
+
+	inode = ntfs_create_inode(mnt_userns, dir, dentry, NULL, S_IFLNK | 0777,
+				  0, symname, size, NULL);
+
+	ni_unlock(ni);
+
+	return IS_ERR(inode) ? PTR_ERR(inode) : 0;
+}
+
+/*
+ * ntfs_mkdir- inode_operations::mkdir
+ */
+static int ntfs_mkdir(struct user_namespace *mnt_userns, struct inode *dir,
+		      struct dentry *dentry, umode_t mode)
+{
+	struct inode *inode;
+	struct ntfs_inode *ni = ntfs_i(dir);
+
+	ni_lock_dir(ni);
+
+	inode = ntfs_create_inode(mnt_userns, dir, dentry, NULL, S_IFDIR | mode,
+				  0, NULL, 0, NULL);
+
+	ni_unlock(ni);
+
+	return IS_ERR(inode) ? PTR_ERR(inode) : 0;
+}
+
+/*
+ * ntfs_rmdir - inode_operations::rm_dir
+ */
+static int ntfs_rmdir(struct inode *dir, struct dentry *dentry)
+{
+	struct ntfs_inode *ni = ntfs_i(dir);
+	int err;
+
+	ni_lock_dir(ni);
+
+	err = ntfs_unlink_inode(dir, dentry);
+
+	ni_unlock(ni);
+
+	return err;
+}
+
+/*
+ * ntfs_rename - inode_operations::rename
+ */
+static int ntfs_rename(struct user_namespace *mnt_userns, struct inode *dir,
+		       struct dentry *dentry, struct inode *new_dir,
+		       struct dentry *new_dentry, u32 flags)
+{
+	int err;
+	struct super_block *sb = dir->i_sb;
+	struct ntfs_sb_info *sbi = sb->s_fs_info;
+	struct ntfs_inode *dir_ni = ntfs_i(dir);
+	struct ntfs_inode *new_dir_ni = ntfs_i(new_dir);
+	struct inode *inode = d_inode(dentry);
+	struct ntfs_inode *ni = ntfs_i(inode);
+	struct inode *new_inode = d_inode(new_dentry);
+	struct NTFS_DE *de, *new_de;
+	bool is_same, is_bad;
+	/*
+	 * de		- memory of PATH_MAX bytes:
+	 * [0-1024)	- original name (dentry->d_name)
+	 * [1024-2048)	- paired to original name, usually DOS variant of dentry->d_name
+	 * [2048-3072)	- new name (new_dentry->d_name)
+	 */
+	static_assert(SIZEOF_ATTRIBUTE_FILENAME_MAX + SIZEOF_RESIDENT < 1024);
+	static_assert(SIZEOF_ATTRIBUTE_FILENAME_MAX + sizeof(struct NTFS_DE) <
+		      1024);
+	static_assert(PATH_MAX >= 4 * 1024);
+
+	if (flags & ~RENAME_NOREPLACE)
+		return -EINVAL;
+
+	is_same = dentry->d_name.len == new_dentry->d_name.len &&
+		  !memcmp(dentry->d_name.name, new_dentry->d_name.name,
+			  dentry->d_name.len);
+
+	if (is_same && dir == new_dir) {
+		/* Nothing to do. */
+		return 0;
+	}
+
+	if (ntfs_is_meta_file(sbi, inode->i_ino)) {
+		/* Should we print an error? */
+		return -EINVAL;
+	}
+
+	if (new_inode) {
+		/* Target name exists. Unlink it. */
+		dget(new_dentry);
+		ni_lock_dir(new_dir_ni);
+		err = ntfs_unlink_inode(new_dir, new_dentry);
+		ni_unlock(new_dir_ni);
+		dput(new_dentry);
+		if (err)
+			return err;
+	}
+
+	/* Allocate PATH_MAX bytes. */
+	de = __getname();
+	if (!de)
+		return -ENOMEM;
+
+	/* Translate dentry->d_name into unicode form. */
+	err = fill_name_de(sbi, de, &dentry->d_name, NULL);
+	if (err < 0)
+		goto out;
+
+	if (is_same) {
+		/* Reuse 'de'. */
+		new_de = de;
+	} else {
+		/* Translate new_dentry->d_name into unicode form. */
+		new_de = Add2Ptr(de, 2048);
+		err = fill_name_de(sbi, new_de, &new_dentry->d_name, NULL);
+		if (err < 0)
+			goto out;
+	}
+
+	ni_lock_dir(dir_ni);
+	ni_lock(ni);
+
+	is_bad = false;
+	err = ni_rename(dir_ni, new_dir_ni, ni, de, new_de, &is_bad);
+	if (is_bad) {
+		/* Restore after failed rename failed too. */
+		make_bad_inode(inode);
+		ntfs_inode_err(inode, "failed to undo rename");
+		ntfs_set_state(sbi, NTFS_DIRTY_ERROR);
+	} else if (!err) {
+		inode->i_ctime = dir->i_ctime = dir->i_mtime =
+			current_time(dir);
+		mark_inode_dirty(inode);
+		mark_inode_dirty(dir);
+		if (dir != new_dir) {
+			new_dir->i_mtime = new_dir->i_ctime = dir->i_ctime;
+			mark_inode_dirty(new_dir);
+		}
+
+		if (IS_DIRSYNC(dir))
+			ntfs_sync_inode(dir);
+
+		if (IS_DIRSYNC(new_dir))
+			ntfs_sync_inode(inode);
+	}
+
+	ni_unlock(ni);
+	ni_unlock(dir_ni);
+out:
+	__putname(de);
+	return err;
+}
+
+struct dentry *ntfs3_get_parent(struct dentry *child)
+{
+	struct inode *inode = d_inode(child);
+	struct ntfs_inode *ni = ntfs_i(inode);
+
+	struct ATTR_LIST_ENTRY *le = NULL;
+	struct ATTRIB *attr = NULL;
+	struct ATTR_FILE_NAME *fname;
+
+	while ((attr = ni_find_attr(ni, attr, &le, ATTR_NAME, NULL, 0, NULL,
+				    NULL))) {
+		fname = resident_data_ex(attr, SIZEOF_ATTRIBUTE_FILENAME);
+		if (!fname)
+			continue;
+
+		return d_obtain_alias(
+			ntfs_iget5(inode->i_sb, &fname->home, NULL));
+	}
+
+	return ERR_PTR(-ENOENT);
+}
+
+// clang-format off
+const struct inode_operations ntfs_dir_inode_operations = {
+	.lookup		= ntfs_lookup,
+	.create		= ntfs_create,
+	.link		= ntfs_link,
+	.unlink		= ntfs_unlink,
+	.symlink	= ntfs_symlink,
+	.mkdir		= ntfs_mkdir,
+	.rmdir		= ntfs_rmdir,
+	.mknod		= ntfs_mknod,
+	.rename		= ntfs_rename,
+	.permission	= ntfs_permission,
+	.get_acl	= ntfs_get_acl,
+	.set_acl	= ntfs_set_acl,
+	.setattr	= ntfs3_setattr,
+	.getattr	= ntfs_getattr,
+	.listxattr	= ntfs_listxattr,
+	.fiemap		= ntfs_fiemap,
+};
+
+const struct inode_operations ntfs_special_inode_operations = {
+	.setattr	= ntfs3_setattr,
+	.getattr	= ntfs_getattr,
+	.listxattr	= ntfs_listxattr,
+	.get_acl	= ntfs_get_acl,
+	.set_acl	= ntfs_set_acl,
+};
+// clang-format on
diff --git a/fs/ntfs3/ntfs.h b/fs/ntfs3/ntfs.h
new file mode 100644
index 000000000000..6bb3e595263b
--- /dev/null
+++ b/fs/ntfs3/ntfs.h
@@ -0,0 +1,1216 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/*
+ *
+ * Copyright (C) 2019-2021 Paragon Software GmbH, All rights reserved.
+ *
+ * on-disk ntfs structs
+ */
+
+// clang-format off
+#ifndef _LINUX_NTFS3_NTFS_H
+#define _LINUX_NTFS3_NTFS_H
+
+/* TODO: Check 4K MFT record and 512 bytes cluster. */
+
+/* Activate this define to use binary search in indexes. */
+#define NTFS3_INDEX_BINARY_SEARCH
+
+/* Check each run for marked clusters. */
+#define NTFS3_CHECK_FREE_CLST
+
+#define NTFS_NAME_LEN 255
+
+/* ntfs.sys used 500 maximum links on-disk struct allows up to 0xffff. */
+#define NTFS_LINK_MAX 0x400
+//#define NTFS_LINK_MAX 0xffff
+
+/*
+ * Activate to use 64 bit clusters instead of 32 bits in ntfs.sys.
+ * Logical and virtual cluster number if needed, may be
+ * redefined to use 64 bit value.
+ */
+//#define CONFIG_NTFS3_64BIT_CLUSTER
+
+#define NTFS_LZNT_MAX_CLUSTER	4096
+#define NTFS_LZNT_CUNIT		4
+#define NTFS_LZNT_CLUSTERS	(1u<<NTFS_LZNT_CUNIT)
+
+struct GUID {
+	__le32 Data1;
+	__le16 Data2;
+	__le16 Data3;
+	u8 Data4[8];
+};
+
+/*
+ * This struct repeats layout of ATTR_FILE_NAME
+ * at offset 0x40.
+ * It used to store global constants NAME_MFT/NAME_MIRROR...
+ * most constant names are shorter than 10.
+ */
+struct cpu_str {
+	u8 len;
+	u8 unused;
+	u16 name[10];
+};
+
+struct le_str {
+	u8 len;
+	u8 unused;
+	__le16 name[];
+};
+
+static_assert(SECTOR_SHIFT == 9);
+
+#ifdef CONFIG_NTFS3_64BIT_CLUSTER
+typedef u64 CLST;
+static_assert(sizeof(size_t) == 8);
+#else
+typedef u32 CLST;
+#endif
+
+#define SPARSE_LCN64   ((u64)-1)
+#define SPARSE_LCN     ((CLST)-1)
+#define RESIDENT_LCN   ((CLST)-2)
+#define COMPRESSED_LCN ((CLST)-3)
+
+#define COMPRESSION_UNIT     4
+#define COMPRESS_MAX_CLUSTER 0x1000
+#define MFT_INCREASE_CHUNK   1024
+
+enum RECORD_NUM {
+	MFT_REC_MFT		= 0,
+	MFT_REC_MIRR		= 1,
+	MFT_REC_LOG		= 2,
+	MFT_REC_VOL		= 3,
+	MFT_REC_ATTR		= 4,
+	MFT_REC_ROOT		= 5,
+	MFT_REC_BITMAP		= 6,
+	MFT_REC_BOOT		= 7,
+	MFT_REC_BADCLUST	= 8,
+	//MFT_REC_QUOTA		= 9,
+	MFT_REC_SECURE		= 9, // NTFS 3.0
+	MFT_REC_UPCASE		= 10,
+	MFT_REC_EXTEND		= 11, // NTFS 3.0
+	MFT_REC_RESERVED	= 11,
+	MFT_REC_FREE		= 16,
+	MFT_REC_USER		= 24,
+};
+
+enum ATTR_TYPE {
+	ATTR_ZERO		= cpu_to_le32(0x00),
+	ATTR_STD		= cpu_to_le32(0x10),
+	ATTR_LIST		= cpu_to_le32(0x20),
+	ATTR_NAME		= cpu_to_le32(0x30),
+	// ATTR_VOLUME_VERSION on Nt4
+	ATTR_ID			= cpu_to_le32(0x40),
+	ATTR_SECURE		= cpu_to_le32(0x50),
+	ATTR_LABEL		= cpu_to_le32(0x60),
+	ATTR_VOL_INFO		= cpu_to_le32(0x70),
+	ATTR_DATA		= cpu_to_le32(0x80),
+	ATTR_ROOT		= cpu_to_le32(0x90),
+	ATTR_ALLOC		= cpu_to_le32(0xA0),
+	ATTR_BITMAP		= cpu_to_le32(0xB0),
+	// ATTR_SYMLINK on Nt4
+	ATTR_REPARSE		= cpu_to_le32(0xC0),
+	ATTR_EA_INFO		= cpu_to_le32(0xD0),
+	ATTR_EA			= cpu_to_le32(0xE0),
+	ATTR_PROPERTYSET	= cpu_to_le32(0xF0),
+	ATTR_LOGGED_UTILITY_STREAM = cpu_to_le32(0x100),
+	ATTR_END		= cpu_to_le32(0xFFFFFFFF)
+};
+
+static_assert(sizeof(enum ATTR_TYPE) == 4);
+
+enum FILE_ATTRIBUTE {
+	FILE_ATTRIBUTE_READONLY		= cpu_to_le32(0x00000001),
+	FILE_ATTRIBUTE_HIDDEN		= cpu_to_le32(0x00000002),
+	FILE_ATTRIBUTE_SYSTEM		= cpu_to_le32(0x00000004),
+	FILE_ATTRIBUTE_ARCHIVE		= cpu_to_le32(0x00000020),
+	FILE_ATTRIBUTE_DEVICE		= cpu_to_le32(0x00000040),
+	FILE_ATTRIBUTE_TEMPORARY	= cpu_to_le32(0x00000100),
+	FILE_ATTRIBUTE_SPARSE_FILE	= cpu_to_le32(0x00000200),
+	FILE_ATTRIBUTE_REPARSE_POINT	= cpu_to_le32(0x00000400),
+	FILE_ATTRIBUTE_COMPRESSED	= cpu_to_le32(0x00000800),
+	FILE_ATTRIBUTE_OFFLINE		= cpu_to_le32(0x00001000),
+	FILE_ATTRIBUTE_NOT_CONTENT_INDEXED = cpu_to_le32(0x00002000),
+	FILE_ATTRIBUTE_ENCRYPTED	= cpu_to_le32(0x00004000),
+	FILE_ATTRIBUTE_VALID_FLAGS	= cpu_to_le32(0x00007fb7),
+	FILE_ATTRIBUTE_DIRECTORY	= cpu_to_le32(0x10000000),
+};
+
+static_assert(sizeof(enum FILE_ATTRIBUTE) == 4);
+
+extern const struct cpu_str NAME_MFT;
+extern const struct cpu_str NAME_MIRROR;
+extern const struct cpu_str NAME_LOGFILE;
+extern const struct cpu_str NAME_VOLUME;
+extern const struct cpu_str NAME_ATTRDEF;
+extern const struct cpu_str NAME_ROOT;
+extern const struct cpu_str NAME_BITMAP;
+extern const struct cpu_str NAME_BOOT;
+extern const struct cpu_str NAME_BADCLUS;
+extern const struct cpu_str NAME_QUOTA;
+extern const struct cpu_str NAME_SECURE;
+extern const struct cpu_str NAME_UPCASE;
+extern const struct cpu_str NAME_EXTEND;
+extern const struct cpu_str NAME_OBJID;
+extern const struct cpu_str NAME_REPARSE;
+extern const struct cpu_str NAME_USNJRNL;
+
+extern const __le16 I30_NAME[4];
+extern const __le16 SII_NAME[4];
+extern const __le16 SDH_NAME[4];
+extern const __le16 SO_NAME[2];
+extern const __le16 SQ_NAME[2];
+extern const __le16 SR_NAME[2];
+
+extern const __le16 BAD_NAME[4];
+extern const __le16 SDS_NAME[4];
+extern const __le16 WOF_NAME[17];	/* WofCompressedData */
+
+/* MFT record number structure. */
+struct MFT_REF {
+	__le32 low;	// The low part of the number.
+	__le16 high;	// The high part of the number.
+	__le16 seq;	// The sequence number of MFT record.
+};
+
+static_assert(sizeof(__le64) == sizeof(struct MFT_REF));
+
+static inline CLST ino_get(const struct MFT_REF *ref)
+{
+#ifdef CONFIG_NTFS3_64BIT_CLUSTER
+	return le32_to_cpu(ref->low) | ((u64)le16_to_cpu(ref->high) << 32);
+#else
+	return le32_to_cpu(ref->low);
+#endif
+}
+
+struct NTFS_BOOT {
+	u8 jump_code[3];	// 0x00: Jump to boot code.
+	u8 system_id[8];	// 0x03: System ID, equals "NTFS    "
+
+	// NOTE: This member is not aligned(!)
+	// bytes_per_sector[0] must be 0.
+	// bytes_per_sector[1] must be multiplied by 256.
+	u8 bytes_per_sector[2];	// 0x0B: Bytes per sector.
+
+	u8 sectors_per_clusters;// 0x0D: Sectors per cluster.
+	u8 unused1[7];
+	u8 media_type;		// 0x15: Media type (0xF8 - harddisk)
+	u8 unused2[2];
+	__le16 sct_per_track;	// 0x18: number of sectors per track.
+	__le16 heads;		// 0x1A: number of heads per cylinder.
+	__le32 hidden_sectors;	// 0x1C: number of 'hidden' sectors.
+	u8 unused3[4];
+	u8 bios_drive_num;	// 0x24: BIOS drive number =0x80.
+	u8 unused4;
+	u8 signature_ex;	// 0x26: Extended BOOT signature =0x80.
+	u8 unused5;
+	__le64 sectors_per_volume;// 0x28: Size of volume in sectors.
+	__le64 mft_clst;	// 0x30: First cluster of $MFT
+	__le64 mft2_clst;	// 0x38: First cluster of $MFTMirr
+	s8 record_size;		// 0x40: Size of MFT record in clusters(sectors).
+	u8 unused6[3];
+	s8 index_size;		// 0x44: Size of INDX record in clusters(sectors).
+	u8 unused7[3];
+	__le64 serial_num;	// 0x48: Volume serial number
+	__le32 check_sum;	// 0x50: Simple additive checksum of all
+				// of the u32's which precede the 'check_sum'.
+
+	u8 boot_code[0x200 - 0x50 - 2 - 4]; // 0x54:
+	u8 boot_magic[2];	// 0x1FE: Boot signature =0x55 + 0xAA
+};
+
+static_assert(sizeof(struct NTFS_BOOT) == 0x200);
+
+enum NTFS_SIGNATURE {
+	NTFS_FILE_SIGNATURE = cpu_to_le32(0x454C4946), // 'FILE'
+	NTFS_INDX_SIGNATURE = cpu_to_le32(0x58444E49), // 'INDX'
+	NTFS_CHKD_SIGNATURE = cpu_to_le32(0x444B4843), // 'CHKD'
+	NTFS_RSTR_SIGNATURE = cpu_to_le32(0x52545352), // 'RSTR'
+	NTFS_RCRD_SIGNATURE = cpu_to_le32(0x44524352), // 'RCRD'
+	NTFS_BAAD_SIGNATURE = cpu_to_le32(0x44414142), // 'BAAD'
+	NTFS_HOLE_SIGNATURE = cpu_to_le32(0x454C4F48), // 'HOLE'
+	NTFS_FFFF_SIGNATURE = cpu_to_le32(0xffffffff),
+};
+
+static_assert(sizeof(enum NTFS_SIGNATURE) == 4);
+
+/* MFT Record header structure. */
+struct NTFS_RECORD_HEADER {
+	/* Record magic number, equals 'FILE'/'INDX'/'RSTR'/'RCRD'. */
+	enum NTFS_SIGNATURE sign; // 0x00:
+	__le16 fix_off;		// 0x04:
+	__le16 fix_num;		// 0x06:
+	__le64 lsn;		// 0x08: Log file sequence number,
+};
+
+static_assert(sizeof(struct NTFS_RECORD_HEADER) == 0x10);
+
+static inline int is_baad(const struct NTFS_RECORD_HEADER *hdr)
+{
+	return hdr->sign == NTFS_BAAD_SIGNATURE;
+}
+
+/* Possible bits in struct MFT_REC.flags. */
+enum RECORD_FLAG {
+	RECORD_FLAG_IN_USE	= cpu_to_le16(0x0001),
+	RECORD_FLAG_DIR		= cpu_to_le16(0x0002),
+	RECORD_FLAG_SYSTEM	= cpu_to_le16(0x0004),
+	RECORD_FLAG_UNKNOWN	= cpu_to_le16(0x0008),
+};
+
+/* MFT Record structure. */
+struct MFT_REC {
+	struct NTFS_RECORD_HEADER rhdr; // 'FILE'
+
+	__le16 seq;		// 0x10: Sequence number for this record.
+	__le16 hard_links;	// 0x12: The number of hard links to record.
+	__le16 attr_off;	// 0x14: Offset to attributes.
+	__le16 flags;		// 0x16: See RECORD_FLAG.
+	__le32 used;		// 0x18: The size of used part.
+	__le32 total;		// 0x1C: Total record size.
+
+	struct MFT_REF parent_ref; // 0x20: Parent MFT record.
+	__le16 next_attr_id;	// 0x28: The next attribute Id.
+
+	__le16 res;		// 0x2A: High part of MFT record?
+	__le32 mft_record;	// 0x2C: Current MFT record number.
+	__le16 fixups[];	// 0x30:
+};
+
+#define MFTRECORD_FIXUP_OFFSET_1 offsetof(struct MFT_REC, res)
+#define MFTRECORD_FIXUP_OFFSET_3 offsetof(struct MFT_REC, fixups)
+
+static_assert(MFTRECORD_FIXUP_OFFSET_1 == 0x2A);
+static_assert(MFTRECORD_FIXUP_OFFSET_3 == 0x30);
+
+static inline bool is_rec_base(const struct MFT_REC *rec)
+{
+	const struct MFT_REF *r = &rec->parent_ref;
+
+	return !r->low && !r->high && !r->seq;
+}
+
+static inline bool is_mft_rec5(const struct MFT_REC *rec)
+{
+	return le16_to_cpu(rec->rhdr.fix_off) >=
+	       offsetof(struct MFT_REC, fixups);
+}
+
+static inline bool is_rec_inuse(const struct MFT_REC *rec)
+{
+	return rec->flags & RECORD_FLAG_IN_USE;
+}
+
+static inline bool clear_rec_inuse(struct MFT_REC *rec)
+{
+	return rec->flags &= ~RECORD_FLAG_IN_USE;
+}
+
+/* Possible values of ATTR_RESIDENT.flags */
+#define RESIDENT_FLAG_INDEXED 0x01
+
+struct ATTR_RESIDENT {
+	__le32 data_size;	// 0x10: The size of data.
+	__le16 data_off;	// 0x14: Offset to data.
+	u8 flags;		// 0x16: Resident flags ( 1 - indexed ).
+	u8 res;			// 0x17:
+}; // sizeof() = 0x18
+
+struct ATTR_NONRESIDENT {
+	__le64 svcn;		// 0x10: Starting VCN of this segment.
+	__le64 evcn;		// 0x18: End VCN of this segment.
+	__le16 run_off;		// 0x20: Offset to packed runs.
+	//  Unit of Compression size for this stream, expressed
+	//  as a log of the cluster size.
+	//
+	//	0 means file is not compressed
+	//	1, 2, 3, and 4 are potentially legal values if the
+	//	    stream is compressed, however the implementation
+	//	    may only choose to use 4, or possibly 3.  Note
+	//	    that 4 means cluster size time 16.	If convenient
+	//	    the implementation may wish to accept a
+	//	    reasonable range of legal values here (1-5?),
+	//	    even if the implementation only generates
+	//	    a smaller set of values itself.
+	u8 c_unit;		// 0x22:
+	u8 res1[5];		// 0x23:
+	__le64 alloc_size;	// 0x28: The allocated size of attribute in bytes.
+				// (multiple of cluster size)
+	__le64 data_size;	// 0x30: The size of attribute  in bytes <= alloc_size.
+	__le64 valid_size;	// 0x38: The size of valid part in bytes <= data_size.
+	__le64 total_size;	// 0x40: The sum of the allocated clusters for a file.
+				// (present only for the first segment (0 == vcn)
+				// of compressed attribute)
+
+}; // sizeof()=0x40 or 0x48 (if compressed)
+
+/* Possible values of ATTRIB.flags: */
+#define ATTR_FLAG_COMPRESSED	  cpu_to_le16(0x0001)
+#define ATTR_FLAG_COMPRESSED_MASK cpu_to_le16(0x00FF)
+#define ATTR_FLAG_ENCRYPTED	  cpu_to_le16(0x4000)
+#define ATTR_FLAG_SPARSED	  cpu_to_le16(0x8000)
+
+struct ATTRIB {
+	enum ATTR_TYPE type;	// 0x00: The type of this attribute.
+	__le32 size;		// 0x04: The size of this attribute.
+	u8 non_res;		// 0x08: Is this attribute non-resident?
+	u8 name_len;		// 0x09: This attribute name length.
+	__le16 name_off;	// 0x0A: Offset to the attribute name.
+	__le16 flags;		// 0x0C: See ATTR_FLAG_XXX.
+	__le16 id;		// 0x0E: Unique id (per record).
+
+	union {
+		struct ATTR_RESIDENT res;     // 0x10
+		struct ATTR_NONRESIDENT nres; // 0x10
+	};
+};
+
+/* Define attribute sizes. */
+#define SIZEOF_RESIDENT			0x18
+#define SIZEOF_NONRESIDENT_EX		0x48
+#define SIZEOF_NONRESIDENT		0x40
+
+#define SIZEOF_RESIDENT_LE		cpu_to_le16(0x18)
+#define SIZEOF_NONRESIDENT_EX_LE	cpu_to_le16(0x48)
+#define SIZEOF_NONRESIDENT_LE		cpu_to_le16(0x40)
+
+static inline u64 attr_ondisk_size(const struct ATTRIB *attr)
+{
+	return attr->non_res ? ((attr->flags &
+				 (ATTR_FLAG_COMPRESSED | ATTR_FLAG_SPARSED)) ?
+					le64_to_cpu(attr->nres.total_size) :
+					le64_to_cpu(attr->nres.alloc_size))
+			     : ALIGN(le32_to_cpu(attr->res.data_size), 8);
+}
+
+static inline u64 attr_size(const struct ATTRIB *attr)
+{
+	return attr->non_res ? le64_to_cpu(attr->nres.data_size) :
+			       le32_to_cpu(attr->res.data_size);
+}
+
+static inline bool is_attr_encrypted(const struct ATTRIB *attr)
+{
+	return attr->flags & ATTR_FLAG_ENCRYPTED;
+}
+
+static inline bool is_attr_sparsed(const struct ATTRIB *attr)
+{
+	return attr->flags & ATTR_FLAG_SPARSED;
+}
+
+static inline bool is_attr_compressed(const struct ATTRIB *attr)
+{
+	return attr->flags & ATTR_FLAG_COMPRESSED;
+}
+
+static inline bool is_attr_ext(const struct ATTRIB *attr)
+{
+	return attr->flags & (ATTR_FLAG_SPARSED | ATTR_FLAG_COMPRESSED);
+}
+
+static inline bool is_attr_indexed(const struct ATTRIB *attr)
+{
+	return !attr->non_res && (attr->res.flags & RESIDENT_FLAG_INDEXED);
+}
+
+static inline __le16 const *attr_name(const struct ATTRIB *attr)
+{
+	return Add2Ptr(attr, le16_to_cpu(attr->name_off));
+}
+
+static inline u64 attr_svcn(const struct ATTRIB *attr)
+{
+	return attr->non_res ? le64_to_cpu(attr->nres.svcn) : 0;
+}
+
+/* The size of resident attribute by its resident size. */
+#define BYTES_PER_RESIDENT(b) (0x18 + (b))
+
+static_assert(sizeof(struct ATTRIB) == 0x48);
+static_assert(sizeof(((struct ATTRIB *)NULL)->res) == 0x08);
+static_assert(sizeof(((struct ATTRIB *)NULL)->nres) == 0x38);
+
+static inline void *resident_data_ex(const struct ATTRIB *attr, u32 datasize)
+{
+	u32 asize, rsize;
+	u16 off;
+
+	if (attr->non_res)
+		return NULL;
+
+	asize = le32_to_cpu(attr->size);
+	off = le16_to_cpu(attr->res.data_off);
+
+	if (asize < datasize + off)
+		return NULL;
+
+	rsize = le32_to_cpu(attr->res.data_size);
+	if (rsize < datasize)
+		return NULL;
+
+	return Add2Ptr(attr, off);
+}
+
+static inline void *resident_data(const struct ATTRIB *attr)
+{
+	return Add2Ptr(attr, le16_to_cpu(attr->res.data_off));
+}
+
+static inline void *attr_run(const struct ATTRIB *attr)
+{
+	return Add2Ptr(attr, le16_to_cpu(attr->nres.run_off));
+}
+
+/* Standard information attribute (0x10). */
+struct ATTR_STD_INFO {
+	__le64 cr_time;		// 0x00: File creation file.
+	__le64 m_time;		// 0x08: File modification time.
+	__le64 c_time;		// 0x10: Last time any attribute was modified.
+	__le64 a_time;		// 0x18: File last access time.
+	enum FILE_ATTRIBUTE fa;	// 0x20: Standard DOS attributes & more.
+	__le32 max_ver_num;	// 0x24: Maximum Number of Versions.
+	__le32 ver_num;		// 0x28: Version Number.
+	__le32 class_id;	// 0x2C: Class Id from bidirectional Class Id index.
+};
+
+static_assert(sizeof(struct ATTR_STD_INFO) == 0x30);
+
+#define SECURITY_ID_INVALID 0x00000000
+#define SECURITY_ID_FIRST 0x00000100
+
+struct ATTR_STD_INFO5 {
+	__le64 cr_time;		// 0x00: File creation file.
+	__le64 m_time;		// 0x08: File modification time.
+	__le64 c_time;		// 0x10: Last time any attribute was modified.
+	__le64 a_time;		// 0x18: File last access time.
+	enum FILE_ATTRIBUTE fa;	// 0x20: Standard DOS attributes & more.
+	__le32 max_ver_num;	// 0x24: Maximum Number of Versions.
+	__le32 ver_num;		// 0x28: Version Number.
+	__le32 class_id;	// 0x2C: Class Id from bidirectional Class Id index.
+
+	__le32 owner_id;	// 0x30: Owner Id of the user owning the file.
+	__le32 security_id;	// 0x34: The Security Id is a key in the $SII Index and $SDS.
+	__le64 quota_charge;	// 0x38:
+	__le64 usn;		// 0x40: Last Update Sequence Number of the file. This is a direct
+				// index into the file $UsnJrnl. If zero, the USN Journal is
+				// disabled.
+};
+
+static_assert(sizeof(struct ATTR_STD_INFO5) == 0x48);
+
+/* Attribute list entry structure (0x20) */
+struct ATTR_LIST_ENTRY {
+	enum ATTR_TYPE type;	// 0x00: The type of attribute.
+	__le16 size;		// 0x04: The size of this record.
+	u8 name_len;		// 0x06: The length of attribute name.
+	u8 name_off;		// 0x07: The offset to attribute name.
+	__le64 vcn;		// 0x08: Starting VCN of this attribute.
+	struct MFT_REF ref;	// 0x10: MFT record number with attribute.
+	__le16 id;		// 0x18: struct ATTRIB ID.
+	__le16 name[3];		// 0x1A: Just to align. To get real name can use bNameOffset.
+
+}; // sizeof(0x20)
+
+static_assert(sizeof(struct ATTR_LIST_ENTRY) == 0x20);
+
+static inline u32 le_size(u8 name_len)
+{
+	return ALIGN(offsetof(struct ATTR_LIST_ENTRY, name) +
+		     name_len * sizeof(short), 8);
+}
+
+/* Returns 0 if 'attr' has the same type and name. */
+static inline int le_cmp(const struct ATTR_LIST_ENTRY *le,
+			 const struct ATTRIB *attr)
+{
+	return le->type != attr->type || le->name_len != attr->name_len ||
+	       (!le->name_len &&
+		memcmp(Add2Ptr(le, le->name_off),
+		       Add2Ptr(attr, le16_to_cpu(attr->name_off)),
+		       le->name_len * sizeof(short)));
+}
+
+static inline __le16 const *le_name(const struct ATTR_LIST_ENTRY *le)
+{
+	return Add2Ptr(le, le->name_off);
+}
+
+/* File name types (the field type in struct ATTR_FILE_NAME). */
+#define FILE_NAME_POSIX   0
+#define FILE_NAME_UNICODE 1
+#define FILE_NAME_DOS	  2
+#define FILE_NAME_UNICODE_AND_DOS (FILE_NAME_DOS | FILE_NAME_UNICODE)
+
+/* Filename attribute structure (0x30). */
+struct NTFS_DUP_INFO {
+	__le64 cr_time;		// 0x00: File creation file.
+	__le64 m_time;		// 0x08: File modification time.
+	__le64 c_time;		// 0x10: Last time any attribute was modified.
+	__le64 a_time;		// 0x18: File last access time.
+	__le64 alloc_size;	// 0x20: Data attribute allocated size, multiple of cluster size.
+	__le64 data_size;	// 0x28: Data attribute size <= Dataalloc_size.
+	enum FILE_ATTRIBUTE fa;	// 0x30: Standard DOS attributes & more.
+	__le16 ea_size;		// 0x34: Packed EAs.
+	__le16 reparse;		// 0x36: Used by Reparse.
+
+}; // 0x38
+
+struct ATTR_FILE_NAME {
+	struct MFT_REF home;	// 0x00: MFT record for directory.
+	struct NTFS_DUP_INFO dup;// 0x08:
+	u8 name_len;		// 0x40: File name length in words.
+	u8 type;		// 0x41: File name type.
+	__le16 name[];		// 0x42: File name.
+};
+
+static_assert(sizeof(((struct ATTR_FILE_NAME *)NULL)->dup) == 0x38);
+static_assert(offsetof(struct ATTR_FILE_NAME, name) == 0x42);
+#define SIZEOF_ATTRIBUTE_FILENAME     0x44
+#define SIZEOF_ATTRIBUTE_FILENAME_MAX (0x42 + 255 * 2)
+
+static inline struct ATTRIB *attr_from_name(struct ATTR_FILE_NAME *fname)
+{
+	return (struct ATTRIB *)((char *)fname - SIZEOF_RESIDENT);
+}
+
+static inline u16 fname_full_size(const struct ATTR_FILE_NAME *fname)
+{
+	/* Don't return struct_size(fname, name, fname->name_len); */
+	return offsetof(struct ATTR_FILE_NAME, name) +
+	       fname->name_len * sizeof(short);
+}
+
+static inline u8 paired_name(u8 type)
+{
+	if (type == FILE_NAME_UNICODE)
+		return FILE_NAME_DOS;
+	if (type == FILE_NAME_DOS)
+		return FILE_NAME_UNICODE;
+	return FILE_NAME_POSIX;
+}
+
+/* Index entry defines ( the field flags in NtfsDirEntry ). */
+#define NTFS_IE_HAS_SUBNODES	cpu_to_le16(1)
+#define NTFS_IE_LAST		cpu_to_le16(2)
+
+/* Directory entry structure. */
+struct NTFS_DE {
+	union {
+		struct MFT_REF ref; // 0x00: MFT record number with this file.
+		struct {
+			__le16 data_off;  // 0x00:
+			__le16 data_size; // 0x02:
+			__le32 res;	  // 0x04: Must be 0.
+		} view;
+	};
+	__le16 size;		// 0x08: The size of this entry.
+	__le16 key_size;	// 0x0A: The size of File name length in bytes + 0x42.
+	__le16 flags;		// 0x0C: Entry flags: NTFS_IE_XXX.
+	__le16 res;		// 0x0E:
+
+	// Here any indexed attribute can be placed.
+	// One of them is:
+	// struct ATTR_FILE_NAME AttrFileName;
+	//
+
+	// The last 8 bytes of this structure contains
+	// the VBN of subnode.
+	// !!! Note !!!
+	// This field is presented only if (flags & NTFS_IE_HAS_SUBNODES)
+	// __le64 vbn;
+};
+
+static_assert(sizeof(struct NTFS_DE) == 0x10);
+
+static inline void de_set_vbn_le(struct NTFS_DE *e, __le64 vcn)
+{
+	__le64 *v = Add2Ptr(e, le16_to_cpu(e->size) - sizeof(__le64));
+
+	*v = vcn;
+}
+
+static inline void de_set_vbn(struct NTFS_DE *e, CLST vcn)
+{
+	__le64 *v = Add2Ptr(e, le16_to_cpu(e->size) - sizeof(__le64));
+
+	*v = cpu_to_le64(vcn);
+}
+
+static inline __le64 de_get_vbn_le(const struct NTFS_DE *e)
+{
+	return *(__le64 *)Add2Ptr(e, le16_to_cpu(e->size) - sizeof(__le64));
+}
+
+static inline CLST de_get_vbn(const struct NTFS_DE *e)
+{
+	__le64 *v = Add2Ptr(e, le16_to_cpu(e->size) - sizeof(__le64));
+
+	return le64_to_cpu(*v);
+}
+
+static inline struct NTFS_DE *de_get_next(const struct NTFS_DE *e)
+{
+	return Add2Ptr(e, le16_to_cpu(e->size));
+}
+
+static inline struct ATTR_FILE_NAME *de_get_fname(const struct NTFS_DE *e)
+{
+	return le16_to_cpu(e->key_size) >= SIZEOF_ATTRIBUTE_FILENAME ?
+		       Add2Ptr(e, sizeof(struct NTFS_DE)) :
+		       NULL;
+}
+
+static inline bool de_is_last(const struct NTFS_DE *e)
+{
+	return e->flags & NTFS_IE_LAST;
+}
+
+static inline bool de_has_vcn(const struct NTFS_DE *e)
+{
+	return e->flags & NTFS_IE_HAS_SUBNODES;
+}
+
+static inline bool de_has_vcn_ex(const struct NTFS_DE *e)
+{
+	return (e->flags & NTFS_IE_HAS_SUBNODES) &&
+	       (u64)(-1) != *((u64 *)Add2Ptr(e, le16_to_cpu(e->size) -
+							sizeof(__le64)));
+}
+
+#define MAX_BYTES_PER_NAME_ENTRY \
+	ALIGN(sizeof(struct NTFS_DE) + \
+	      offsetof(struct ATTR_FILE_NAME, name) + \
+	      NTFS_NAME_LEN * sizeof(short), 8)
+
+struct INDEX_HDR {
+	__le32 de_off;	// 0x00: The offset from the start of this structure
+			// to the first NTFS_DE.
+	__le32 used;	// 0x04: The size of this structure plus all
+			// entries (quad-word aligned).
+	__le32 total;	// 0x08: The allocated size of for this structure plus all entries.
+	u8 flags;	// 0x0C: 0x00 = Small directory, 0x01 = Large directory.
+	u8 res[3];
+
+	//
+	// de_off + used <= total
+	//
+};
+
+static_assert(sizeof(struct INDEX_HDR) == 0x10);
+
+static inline struct NTFS_DE *hdr_first_de(const struct INDEX_HDR *hdr)
+{
+	u32 de_off = le32_to_cpu(hdr->de_off);
+	u32 used = le32_to_cpu(hdr->used);
+	struct NTFS_DE *e = Add2Ptr(hdr, de_off);
+	u16 esize;
+
+	if (de_off >= used || de_off >= le32_to_cpu(hdr->total))
+		return NULL;
+
+	esize = le16_to_cpu(e->size);
+	if (esize < sizeof(struct NTFS_DE) || de_off + esize > used)
+		return NULL;
+
+	return e;
+}
+
+static inline struct NTFS_DE *hdr_next_de(const struct INDEX_HDR *hdr,
+					  const struct NTFS_DE *e)
+{
+	size_t off = PtrOffset(hdr, e);
+	u32 used = le32_to_cpu(hdr->used);
+	u16 esize;
+
+	if (off >= used)
+		return NULL;
+
+	esize = le16_to_cpu(e->size);
+
+	if (esize < sizeof(struct NTFS_DE) ||
+	    off + esize + sizeof(struct NTFS_DE) > used)
+		return NULL;
+
+	return Add2Ptr(e, esize);
+}
+
+static inline bool hdr_has_subnode(const struct INDEX_HDR *hdr)
+{
+	return hdr->flags & 1;
+}
+
+struct INDEX_BUFFER {
+	struct NTFS_RECORD_HEADER rhdr; // 'INDX'
+	__le64 vbn; // 0x10: vcn if index >= cluster or vsn id index < cluster
+	struct INDEX_HDR ihdr; // 0x18:
+};
+
+static_assert(sizeof(struct INDEX_BUFFER) == 0x28);
+
+static inline bool ib_is_empty(const struct INDEX_BUFFER *ib)
+{
+	const struct NTFS_DE *first = hdr_first_de(&ib->ihdr);
+
+	return !first || de_is_last(first);
+}
+
+static inline bool ib_is_leaf(const struct INDEX_BUFFER *ib)
+{
+	return !(ib->ihdr.flags & 1);
+}
+
+/* Index root structure ( 0x90 ). */
+enum COLLATION_RULE {
+	NTFS_COLLATION_TYPE_BINARY	= cpu_to_le32(0),
+	// $I30
+	NTFS_COLLATION_TYPE_FILENAME	= cpu_to_le32(0x01),
+	// $SII of $Secure and $Q of Quota
+	NTFS_COLLATION_TYPE_UINT	= cpu_to_le32(0x10),
+	// $O of Quota
+	NTFS_COLLATION_TYPE_SID		= cpu_to_le32(0x11),
+	// $SDH of $Secure
+	NTFS_COLLATION_TYPE_SECURITY_HASH = cpu_to_le32(0x12),
+	// $O of ObjId and "$R" for Reparse
+	NTFS_COLLATION_TYPE_UINTS	= cpu_to_le32(0x13)
+};
+
+static_assert(sizeof(enum COLLATION_RULE) == 4);
+
+//
+struct INDEX_ROOT {
+	enum ATTR_TYPE type;	// 0x00: The type of attribute to index on.
+	enum COLLATION_RULE rule; // 0x04: The rule.
+	__le32 index_block_size;// 0x08: The size of index record.
+	u8 index_block_clst;	// 0x0C: The number of clusters or sectors per index.
+	u8 res[3];
+	struct INDEX_HDR ihdr;	// 0x10:
+};
+
+static_assert(sizeof(struct INDEX_ROOT) == 0x20);
+static_assert(offsetof(struct INDEX_ROOT, ihdr) == 0x10);
+
+#define VOLUME_FLAG_DIRTY	    cpu_to_le16(0x0001)
+#define VOLUME_FLAG_RESIZE_LOG_FILE cpu_to_le16(0x0002)
+
+struct VOLUME_INFO {
+	__le64 res1;	// 0x00
+	u8 major_ver;	// 0x08: NTFS major version number (before .)
+	u8 minor_ver;	// 0x09: NTFS minor version number (after .)
+	__le16 flags;	// 0x0A: Volume flags, see VOLUME_FLAG_XXX
+
+}; // sizeof=0xC
+
+#define SIZEOF_ATTRIBUTE_VOLUME_INFO 0xc
+
+#define NTFS_LABEL_MAX_LENGTH		(0x100 / sizeof(short))
+#define NTFS_ATTR_INDEXABLE		cpu_to_le32(0x00000002)
+#define NTFS_ATTR_DUPALLOWED		cpu_to_le32(0x00000004)
+#define NTFS_ATTR_MUST_BE_INDEXED	cpu_to_le32(0x00000010)
+#define NTFS_ATTR_MUST_BE_NAMED		cpu_to_le32(0x00000020)
+#define NTFS_ATTR_MUST_BE_RESIDENT	cpu_to_le32(0x00000040)
+#define NTFS_ATTR_LOG_ALWAYS		cpu_to_le32(0x00000080)
+
+/* $AttrDef file entry. */
+struct ATTR_DEF_ENTRY {
+	__le16 name[0x40];	// 0x00: Attr name.
+	enum ATTR_TYPE type;	// 0x80: struct ATTRIB type.
+	__le32 res;		// 0x84:
+	enum COLLATION_RULE rule; // 0x88:
+	__le32 flags;		// 0x8C: NTFS_ATTR_XXX (see above).
+	__le64 min_sz;		// 0x90: Minimum attribute data size.
+	__le64 max_sz;		// 0x98: Maximum attribute data size.
+};
+
+static_assert(sizeof(struct ATTR_DEF_ENTRY) == 0xa0);
+
+/* Object ID (0x40) */
+struct OBJECT_ID {
+	struct GUID ObjId;	// 0x00: Unique Id assigned to file.
+	struct GUID BirthVolumeId; // 0x10: Birth Volume Id is the Object Id of the Volume on.
+				// which the Object Id was allocated. It never changes.
+	struct GUID BirthObjectId; // 0x20: Birth Object Id is the first Object Id that was
+				// ever assigned to this MFT Record. I.e. If the Object Id
+				// is changed for some reason, this field will reflect the
+				// original value of the Object Id.
+	struct GUID DomainId;	// 0x30: Domain Id is currently unused but it is intended to be
+				// used in a network environment where the local machine is
+				// part of a Windows 2000 Domain. This may be used in a Windows
+				// 2000 Advanced Server managed domain.
+};
+
+static_assert(sizeof(struct OBJECT_ID) == 0x40);
+
+/* O Directory entry structure ( rule = 0x13 ) */
+struct NTFS_DE_O {
+	struct NTFS_DE de;
+	struct GUID ObjId;	// 0x10: Unique Id assigned to file.
+	struct MFT_REF ref;	// 0x20: MFT record number with this file.
+	struct GUID BirthVolumeId; // 0x28: Birth Volume Id is the Object Id of the Volume on
+				// which the Object Id was allocated. It never changes.
+	struct GUID BirthObjectId; // 0x38: Birth Object Id is the first Object Id that was
+				// ever assigned to this MFT Record. I.e. If the Object Id
+				// is changed for some reason, this field will reflect the
+				// original value of the Object Id.
+				// This field is valid if data_size == 0x48.
+	struct GUID BirthDomainId; // 0x48: Domain Id is currently unused but it is intended
+				// to be used in a network environment where the local
+				// machine is part of a Windows 2000 Domain. This may be
+				// used in a Windows 2000 Advanced Server managed domain.
+};
+
+static_assert(sizeof(struct NTFS_DE_O) == 0x58);
+
+#define NTFS_OBJECT_ENTRY_DATA_SIZE1					       \
+	0x38 // struct NTFS_DE_O.BirthDomainId is not used
+#define NTFS_OBJECT_ENTRY_DATA_SIZE2					       \
+	0x48 // struct NTFS_DE_O.BirthDomainId is used
+
+/* Q Directory entry structure ( rule = 0x11 ) */
+struct NTFS_DE_Q {
+	struct NTFS_DE de;
+	__le32 owner_id;	// 0x10: Unique Id assigned to file
+	__le32 Version;		// 0x14: 0x02
+	__le32 flags2;		// 0x18: Quota flags, see above
+	__le64 BytesUsed;	// 0x1C:
+	__le64 ChangeTime;	// 0x24:
+	__le64 WarningLimit;	// 0x28:
+	__le64 HardLimit;	// 0x34:
+	__le64 ExceededTime;	// 0x3C:
+
+	// SID is placed here
+}; // sizeof() = 0x44
+
+#define SIZEOF_NTFS_DE_Q 0x44
+
+#define SecurityDescriptorsBlockSize 0x40000 // 256K
+#define SecurityDescriptorMaxSize    0x20000 // 128K
+#define Log2OfSecurityDescriptorsBlockSize 18
+
+struct SECURITY_KEY {
+	__le32 hash; //  Hash value for descriptor
+	__le32 sec_id; //  Security Id (guaranteed unique)
+};
+
+/* Security descriptors (the content of $Secure::SDS data stream) */
+struct SECURITY_HDR {
+	struct SECURITY_KEY key;	// 0x00: Security Key.
+	__le64 off;			// 0x08: Offset of this entry in the file.
+	__le32 size;			// 0x10: Size of this entry, 8 byte aligned.
+	/*
+	 * Security descriptor itself is placed here.
+	 * Total size is 16 byte aligned.
+	 */
+} __packed;
+
+#define SIZEOF_SECURITY_HDR 0x14
+
+/* SII Directory entry structure */
+struct NTFS_DE_SII {
+	struct NTFS_DE de;
+	__le32 sec_id;			// 0x10: Key: sizeof(security_id) = wKeySize
+	struct SECURITY_HDR sec_hdr;	// 0x14:
+} __packed;
+
+#define SIZEOF_SII_DIRENTRY 0x28
+
+/* SDH Directory entry structure */
+struct NTFS_DE_SDH {
+	struct NTFS_DE de;
+	struct SECURITY_KEY key;	// 0x10: Key
+	struct SECURITY_HDR sec_hdr;	// 0x18: Data
+	__le16 magic[2];		// 0x2C: 0x00490049 "I I"
+};
+
+#define SIZEOF_SDH_DIRENTRY 0x30
+
+struct REPARSE_KEY {
+	__le32 ReparseTag;		// 0x00: Reparse Tag
+	struct MFT_REF ref;		// 0x04: MFT record number with this file
+}; // sizeof() = 0x0C
+
+static_assert(offsetof(struct REPARSE_KEY, ref) == 0x04);
+#define SIZEOF_REPARSE_KEY 0x0C
+
+/* Reparse Directory entry structure */
+struct NTFS_DE_R {
+	struct NTFS_DE de;
+	struct REPARSE_KEY key;		// 0x10: Reparse Key.
+	u32 zero;			// 0x1c:
+}; // sizeof() = 0x20
+
+static_assert(sizeof(struct NTFS_DE_R) == 0x20);
+
+/* CompressReparseBuffer.WofVersion */
+#define WOF_CURRENT_VERSION		cpu_to_le32(1)
+/* CompressReparseBuffer.WofProvider */
+#define WOF_PROVIDER_WIM		cpu_to_le32(1)
+/* CompressReparseBuffer.WofProvider */
+#define WOF_PROVIDER_SYSTEM		cpu_to_le32(2)
+/* CompressReparseBuffer.ProviderVer */
+#define WOF_PROVIDER_CURRENT_VERSION	cpu_to_le32(1)
+
+#define WOF_COMPRESSION_XPRESS4K	cpu_to_le32(0) // 4k
+#define WOF_COMPRESSION_LZX32K		cpu_to_le32(1) // 32k
+#define WOF_COMPRESSION_XPRESS8K	cpu_to_le32(2) // 8k
+#define WOF_COMPRESSION_XPRESS16K	cpu_to_le32(3) // 16k
+
+/*
+ * ATTR_REPARSE (0xC0)
+ *
+ * The reparse struct GUID structure is used by all 3rd party layered drivers to
+ * store data in a reparse point. For non-Microsoft tags, The struct GUID field
+ * cannot be GUID_NULL.
+ * The constraints on reparse tags are defined below.
+ * Microsoft tags can also be used with this format of the reparse point buffer.
+ */
+struct REPARSE_POINT {
+	__le32 ReparseTag;	// 0x00:
+	__le16 ReparseDataLength;// 0x04:
+	__le16 Reserved;
+
+	struct GUID Guid;	// 0x08:
+
+	//
+	// Here GenericReparseBuffer is placed
+	//
+};
+
+static_assert(sizeof(struct REPARSE_POINT) == 0x18);
+
+/* Maximum allowed size of the reparse data. */
+#define MAXIMUM_REPARSE_DATA_BUFFER_SIZE	(16 * 1024)
+
+/*
+ * The value of the following constant needs to satisfy the following
+ * conditions:
+ *  (1) Be at least as large as the largest of the reserved tags.
+ *  (2) Be strictly smaller than all the tags in use.
+ */
+#define IO_REPARSE_TAG_RESERVED_RANGE		1
+
+/*
+ * The reparse tags are a ULONG. The 32 bits are laid out as follows:
+ *
+ *   3 3 2 2 2 2 2 2 2 2 2 2 1 1 1 1 1 1 1 1 1 1
+ *   1 0 9 8 7 6 5 4 3 2 1 0 9 8 7 6 5 4 3 2 1 0 9 8 7 6 5 4 3 2 1 0
+ *  +-+-+-+-+-----------------------+-------------------------------+
+ *  |M|R|N|R|	  Reserved bits     |	    Reparse Tag Value	    |
+ *  +-+-+-+-+-----------------------+-------------------------------+
+ *
+ * M is the Microsoft bit. When set to 1, it denotes a tag owned by Microsoft.
+ *   All ISVs must use a tag with a 0 in this position.
+ *   Note: If a Microsoft tag is used by non-Microsoft software, the
+ *   behavior is not defined.
+ *
+ * R is reserved.  Must be zero for non-Microsoft tags.
+ *
+ * N is name surrogate. When set to 1, the file represents another named
+ *   entity in the system.
+ *
+ * The M and N bits are OR-able.
+ * The following macros check for the M and N bit values:
+ */
+
+/*
+ * Macro to determine whether a reparse point tag corresponds to a tag
+ * owned by Microsoft.
+ */
+#define IsReparseTagMicrosoft(_tag)	(((_tag)&IO_REPARSE_TAG_MICROSOFT))
+
+/* Macro to determine whether a reparse point tag is a name surrogate. */
+#define IsReparseTagNameSurrogate(_tag)	(((_tag)&IO_REPARSE_TAG_NAME_SURROGATE))
+
+/*
+ * The following constant represents the bits that are valid to use in
+ * reparse tags.
+ */
+#define IO_REPARSE_TAG_VALID_VALUES	0xF000FFFF
+
+/*
+ * Macro to determine whether a reparse tag is a valid tag.
+ */
+#define IsReparseTagValid(_tag)						       \
+	(!((_tag) & ~IO_REPARSE_TAG_VALID_VALUES) &&			       \
+	 ((_tag) > IO_REPARSE_TAG_RESERVED_RANGE))
+
+/* Microsoft tags for reparse points. */
+
+enum IO_REPARSE_TAG {
+	IO_REPARSE_TAG_SYMBOLIC_LINK	= cpu_to_le32(0),
+	IO_REPARSE_TAG_NAME_SURROGATE	= cpu_to_le32(0x20000000),
+	IO_REPARSE_TAG_MICROSOFT	= cpu_to_le32(0x80000000),
+	IO_REPARSE_TAG_MOUNT_POINT	= cpu_to_le32(0xA0000003),
+	IO_REPARSE_TAG_SYMLINK		= cpu_to_le32(0xA000000C),
+	IO_REPARSE_TAG_HSM		= cpu_to_le32(0xC0000004),
+	IO_REPARSE_TAG_SIS		= cpu_to_le32(0x80000007),
+	IO_REPARSE_TAG_DEDUP		= cpu_to_le32(0x80000013),
+	IO_REPARSE_TAG_COMPRESS		= cpu_to_le32(0x80000017),
+
+	/*
+	 * The reparse tag 0x80000008 is reserved for Microsoft internal use.
+	 * May be published in the future.
+	 */
+
+	/* Microsoft reparse tag reserved for DFS */
+	IO_REPARSE_TAG_DFS	= cpu_to_le32(0x8000000A),
+
+	/* Microsoft reparse tag reserved for the file system filter manager. */
+	IO_REPARSE_TAG_FILTER_MANAGER	= cpu_to_le32(0x8000000B),
+
+	/* Non-Microsoft tags for reparse points */
+
+	/* Tag allocated to CONGRUENT, May 2000. Used by IFSTEST. */
+	IO_REPARSE_TAG_IFSTEST_CONGRUENT = cpu_to_le32(0x00000009),
+
+	/* Tag allocated to ARKIVIO. */
+	IO_REPARSE_TAG_ARKIVIO	= cpu_to_le32(0x0000000C),
+
+	/* Tag allocated to SOLUTIONSOFT. */
+	IO_REPARSE_TAG_SOLUTIONSOFT	= cpu_to_le32(0x2000000D),
+
+	/* Tag allocated to COMMVAULT. */
+	IO_REPARSE_TAG_COMMVAULT	= cpu_to_le32(0x0000000E),
+
+	/* OneDrive?? */
+	IO_REPARSE_TAG_CLOUD	= cpu_to_le32(0x9000001A),
+	IO_REPARSE_TAG_CLOUD_1	= cpu_to_le32(0x9000101A),
+	IO_REPARSE_TAG_CLOUD_2	= cpu_to_le32(0x9000201A),
+	IO_REPARSE_TAG_CLOUD_3	= cpu_to_le32(0x9000301A),
+	IO_REPARSE_TAG_CLOUD_4	= cpu_to_le32(0x9000401A),
+	IO_REPARSE_TAG_CLOUD_5	= cpu_to_le32(0x9000501A),
+	IO_REPARSE_TAG_CLOUD_6	= cpu_to_le32(0x9000601A),
+	IO_REPARSE_TAG_CLOUD_7	= cpu_to_le32(0x9000701A),
+	IO_REPARSE_TAG_CLOUD_8	= cpu_to_le32(0x9000801A),
+	IO_REPARSE_TAG_CLOUD_9	= cpu_to_le32(0x9000901A),
+	IO_REPARSE_TAG_CLOUD_A	= cpu_to_le32(0x9000A01A),
+	IO_REPARSE_TAG_CLOUD_B	= cpu_to_le32(0x9000B01A),
+	IO_REPARSE_TAG_CLOUD_C	= cpu_to_le32(0x9000C01A),
+	IO_REPARSE_TAG_CLOUD_D	= cpu_to_le32(0x9000D01A),
+	IO_REPARSE_TAG_CLOUD_E	= cpu_to_le32(0x9000E01A),
+	IO_REPARSE_TAG_CLOUD_F	= cpu_to_le32(0x9000F01A),
+
+};
+
+#define SYMLINK_FLAG_RELATIVE		1
+
+/* Microsoft reparse buffer. (see DDK for details) */
+struct REPARSE_DATA_BUFFER {
+	__le32 ReparseTag;		// 0x00:
+	__le16 ReparseDataLength;	// 0x04:
+	__le16 Reserved;
+
+	union {
+		/* If ReparseTag == 0xA0000003 (IO_REPARSE_TAG_MOUNT_POINT) */
+		struct {
+			__le16 SubstituteNameOffset; // 0x08
+			__le16 SubstituteNameLength; // 0x0A
+			__le16 PrintNameOffset;      // 0x0C
+			__le16 PrintNameLength;      // 0x0E
+			__le16 PathBuffer[];	     // 0x10
+		} MountPointReparseBuffer;
+
+		/*
+		 * If ReparseTag == 0xA000000C (IO_REPARSE_TAG_SYMLINK)
+		 * https://msdn.microsoft.com/en-us/library/cc232006.aspx
+		 */
+		struct {
+			__le16 SubstituteNameOffset; // 0x08
+			__le16 SubstituteNameLength; // 0x0A
+			__le16 PrintNameOffset;      // 0x0C
+			__le16 PrintNameLength;      // 0x0E
+			// 0-absolute path 1- relative path, SYMLINK_FLAG_RELATIVE
+			__le32 Flags;		     // 0x10
+			__le16 PathBuffer[];	     // 0x14
+		} SymbolicLinkReparseBuffer;
+
+		/* If ReparseTag == 0x80000017U */
+		struct {
+			__le32 WofVersion;  // 0x08 == 1
+			/*
+			 * 1 - WIM backing provider ("WIMBoot"),
+			 * 2 - System compressed file provider
+			 */
+			__le32 WofProvider; // 0x0C:
+			__le32 ProviderVer; // 0x10: == 1 WOF_FILE_PROVIDER_CURRENT_VERSION == 1
+			__le32 CompressionFormat; // 0x14: 0, 1, 2, 3. See WOF_COMPRESSION_XXX
+		} CompressReparseBuffer;
+
+		struct {
+			u8 DataBuffer[1];   // 0x08:
+		} GenericReparseBuffer;
+	};
+};
+
+/* ATTR_EA_INFO (0xD0) */
+
+#define FILE_NEED_EA 0x80 // See ntifs.h
+/*
+ *FILE_NEED_EA, indicates that the file to which the EA belongs cannot be
+ * interpreted without understanding the associated extended attributes.
+ */
+struct EA_INFO {
+	__le16 size_pack;	// 0x00: Size of buffer to hold in packed form.
+	__le16 count;		// 0x02: Count of EA's with FILE_NEED_EA bit set.
+	__le32 size;		// 0x04: Size of buffer to hold in unpacked form.
+};
+
+static_assert(sizeof(struct EA_INFO) == 8);
+
+/* ATTR_EA (0xE0) */
+struct EA_FULL {
+	__le32 size;		// 0x00: (not in packed)
+	u8 flags;		// 0x04:
+	u8 name_len;		// 0x05:
+	__le16 elength;		// 0x06:
+	u8 name[];		// 0x08:
+};
+
+static_assert(offsetof(struct EA_FULL, name) == 8);
+
+#define ACL_REVISION	2
+#define ACL_REVISION_DS 4
+
+#define SE_SELF_RELATIVE cpu_to_le16(0x8000)
+
+struct SECURITY_DESCRIPTOR_RELATIVE {
+	u8 Revision;
+	u8 Sbz1;
+	__le16 Control;
+	__le32 Owner;
+	__le32 Group;
+	__le32 Sacl;
+	__le32 Dacl;
+};
+static_assert(sizeof(struct SECURITY_DESCRIPTOR_RELATIVE) == 0x14);
+
+struct ACE_HEADER {
+	u8 AceType;
+	u8 AceFlags;
+	__le16 AceSize;
+};
+static_assert(sizeof(struct ACE_HEADER) == 4);
+
+struct ACL {
+	u8 AclRevision;
+	u8 Sbz1;
+	__le16 AclSize;
+	__le16 AceCount;
+	__le16 Sbz2;
+};
+static_assert(sizeof(struct ACL) == 8);
+
+struct SID {
+	u8 Revision;
+	u8 SubAuthorityCount;
+	u8 IdentifierAuthority[6];
+	__le32 SubAuthority[];
+};
+static_assert(offsetof(struct SID, SubAuthority) == 8);
+
+#endif /* _LINUX_NTFS3_NTFS_H */
+// clang-format on
diff --git a/fs/ntfs3/ntfs_fs.h b/fs/ntfs3/ntfs_fs.h
new file mode 100644
index 000000000000..dc71c59fd445
--- /dev/null
+++ b/fs/ntfs3/ntfs_fs.h
@@ -0,0 +1,1111 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/*
+ *
+ * Copyright (C) 2019-2021 Paragon Software GmbH, All rights reserved.
+ *
+ */
+
+// clang-format off
+#ifndef _LINUX_NTFS3_NTFS_FS_H
+#define _LINUX_NTFS3_NTFS_FS_H
+
+#define MINUS_ONE_T			((size_t)(-1))
+/* Biggest MFT / smallest cluster */
+#define MAXIMUM_BYTES_PER_MFT		4096
+#define NTFS_BLOCKS_PER_MFT_RECORD	(MAXIMUM_BYTES_PER_MFT / 512)
+
+#define MAXIMUM_BYTES_PER_INDEX		4096
+#define NTFS_BLOCKS_PER_INODE		(MAXIMUM_BYTES_PER_INDEX / 512)
+
+/* NTFS specific error code when fixup failed. */
+#define E_NTFS_FIXUP			555
+/* NTFS specific error code about resident->nonresident. */
+#define E_NTFS_NONRESIDENT		556
+/* NTFS specific error code about punch hole. */
+#define E_NTFS_NOTALIGNED		557
+
+
+/* sbi->flags */
+#define NTFS_FLAGS_NODISCARD		0x00000001
+/* Set when LogFile is replaying. */
+#define NTFS_FLAGS_LOG_REPLAYING	0x00000008
+/* Set when we changed first MFT's which copy must be updated in $MftMirr. */
+#define NTFS_FLAGS_MFTMIRR		0x00001000
+#define NTFS_FLAGS_NEED_REPLAY		0x04000000
+
+
+/* ni->ni_flags */
+/*
+ * Data attribute is external compressed (LZX/Xpress)
+ * 1 - WOF_COMPRESSION_XPRESS4K
+ * 2 - WOF_COMPRESSION_XPRESS8K
+ * 3 - WOF_COMPRESSION_XPRESS16K
+ * 4 - WOF_COMPRESSION_LZX32K
+ */
+#define NI_FLAG_COMPRESSED_MASK		0x0000000f
+/* Data attribute is deduplicated. */
+#define NI_FLAG_DEDUPLICATED		0x00000010
+#define NI_FLAG_EA			0x00000020
+#define NI_FLAG_DIR			0x00000040
+#define NI_FLAG_RESIDENT		0x00000080
+#define NI_FLAG_UPDATE_PARENT		0x00000100
+// clang-format on
+
+struct ntfs_mount_options {
+	struct nls_table *nls;
+
+	kuid_t fs_uid;
+	kgid_t fs_gid;
+	u16 fs_fmask_inv;
+	u16 fs_dmask_inv;
+
+	unsigned uid : 1, /* uid was set. */
+		gid : 1, /* gid was set. */
+		fmask : 1, /* fmask was set. */
+		dmask : 1, /* dmask was set. */
+		sys_immutable : 1, /* Immutable system files. */
+		discard : 1, /* Issue discard requests on deletions. */
+		sparse : 1, /* Create sparse files. */
+		showmeta : 1, /* Show meta files. */
+		nohidden : 1, /* Do not show hidden files. */
+		force : 1, /* Rw mount dirty volume. */
+		no_acs_rules : 1, /*Exclude acs rules. */
+		prealloc : 1 /* Preallocate space when file is growing. */
+		;
+};
+
+/* Special value to unpack and deallocate. */
+#define RUN_DEALLOCATE ((struct runs_tree *)(size_t)1)
+
+/* TODO: Use rb tree instead of array. */
+struct runs_tree {
+	struct ntfs_run *runs;
+	size_t count; /* Currently used size a ntfs_run storage. */
+	size_t allocated; /* Currently allocated ntfs_run storage size. */
+};
+
+struct ntfs_buffers {
+	/* Biggest MFT / smallest cluster = 4096 / 512 = 8 */
+	/* Biggest index / smallest cluster = 4096 / 512 = 8 */
+	struct buffer_head *bh[PAGE_SIZE >> SECTOR_SHIFT];
+	u32 bytes;
+	u32 nbufs;
+	u32 off;
+};
+
+enum ALLOCATE_OPT {
+	ALLOCATE_DEF = 0, // Allocate all clusters.
+	ALLOCATE_MFT = 1, // Allocate for MFT.
+};
+
+enum bitmap_mutex_classes {
+	BITMAP_MUTEX_CLUSTERS = 0,
+	BITMAP_MUTEX_MFT = 1,
+};
+
+struct wnd_bitmap {
+	struct super_block *sb;
+	struct rw_semaphore rw_lock;
+
+	struct runs_tree run;
+	size_t nbits;
+
+	size_t total_zeroes; // Total number of free bits.
+	u16 *free_bits; // Free bits in each window.
+	size_t nwnd;
+	u32 bits_last; // Bits in last window.
+
+	struct rb_root start_tree; // Extents, sorted by 'start'.
+	struct rb_root count_tree; // Extents, sorted by 'count + start'.
+	size_t count; // Extents count.
+
+	/*
+	 * -1 Tree is activated but not updated (too many fragments).
+	 * 0 - Tree is not activated.
+	 * 1 - Tree is activated and updated.
+	 */
+	int uptodated;
+	size_t extent_min; // Minimal extent used while building.
+	size_t extent_max; // Upper estimate of biggest free block.
+
+	/* Zone [bit, end) */
+	size_t zone_bit;
+	size_t zone_end;
+
+	bool set_tail; // Not necessary in driver.
+	bool inited;
+};
+
+typedef int (*NTFS_CMP_FUNC)(const void *key1, size_t len1, const void *key2,
+			     size_t len2, const void *param);
+
+enum index_mutex_classed {
+	INDEX_MUTEX_I30 = 0,
+	INDEX_MUTEX_SII = 1,
+	INDEX_MUTEX_SDH = 2,
+	INDEX_MUTEX_SO = 3,
+	INDEX_MUTEX_SQ = 4,
+	INDEX_MUTEX_SR = 5,
+	INDEX_MUTEX_TOTAL
+};
+
+/* ntfs_index - Allocation unit inside directory. */
+struct ntfs_index {
+	struct runs_tree bitmap_run;
+	struct runs_tree alloc_run;
+	/* read/write access to 'bitmap_run'/'alloc_run' while ntfs_readdir */
+	struct rw_semaphore run_lock;
+
+	/*TODO: Remove 'cmp'. */
+	NTFS_CMP_FUNC cmp;
+
+	u8 index_bits; // log2(root->index_block_size)
+	u8 idx2vbn_bits; // log2(root->index_block_clst)
+	u8 vbn2vbo_bits; // index_block_size < cluster? 9 : cluster_bits
+	u8 type; // index_mutex_classed
+};
+
+/* Minimum MFT zone. */
+#define NTFS_MIN_MFT_ZONE 100
+
+/* Ntfs file system in-core superblock data. */
+struct ntfs_sb_info {
+	struct super_block *sb;
+
+	u32 discard_granularity;
+	u64 discard_granularity_mask_inv; // ~(discard_granularity_mask_inv-1)
+
+	u32 cluster_size; // bytes per cluster
+	u32 cluster_mask; // == cluster_size - 1
+	u64 cluster_mask_inv; // ~(cluster_size - 1)
+	u32 block_mask; // sb->s_blocksize - 1
+	u32 blocks_per_cluster; // cluster_size / sb->s_blocksize
+
+	u32 record_size;
+	u32 sector_size;
+	u32 index_size;
+
+	u8 sector_bits;
+	u8 cluster_bits;
+	u8 record_bits;
+
+	u64 maxbytes; // Maximum size for normal files.
+	u64 maxbytes_sparse; // Maximum size for sparse file.
+
+	u32 flags; // See NTFS_FLAGS_XXX.
+
+	CLST bad_clusters; // The count of marked bad clusters.
+
+	u16 max_bytes_per_attr; // Maximum attribute size in record.
+	u16 attr_size_tr; // Attribute size threshold (320 bytes).
+
+	/* Records in $Extend. */
+	CLST objid_no;
+	CLST quota_no;
+	CLST reparse_no;
+	CLST usn_jrnl_no;
+
+	struct ATTR_DEF_ENTRY *def_table; // Attribute definition table.
+	u32 def_entries;
+	u32 ea_max_size;
+
+	struct MFT_REC *new_rec;
+
+	u16 *upcase;
+
+	struct {
+		u64 lbo, lbo2;
+		struct ntfs_inode *ni;
+		struct wnd_bitmap bitmap; // $MFT::Bitmap
+		/*
+		 * MFT records [11-24) used to expand MFT itself.
+		 * They always marked as used in $MFT::Bitmap
+		 * 'reserved_bitmap' contains real bitmap of these records.
+		 */
+		ulong reserved_bitmap; // Bitmap of used records [11 - 24)
+		size_t next_free; // The next record to allocate from
+		size_t used; // MFT valid size in records.
+		u32 recs_mirr; // Number of records in MFTMirr
+		u8 next_reserved;
+		u8 reserved_bitmap_inited;
+	} mft;
+
+	struct {
+		struct wnd_bitmap bitmap; // $Bitmap::Data
+		CLST next_free_lcn;
+	} used;
+
+	struct {
+		u64 size; // In bytes.
+		u64 blocks; // In blocks.
+		u64 ser_num;
+		struct ntfs_inode *ni;
+		__le16 flags; // Cached current VOLUME_INFO::flags, VOLUME_FLAG_DIRTY.
+		u8 major_ver;
+		u8 minor_ver;
+		char label[65];
+		bool real_dirty; // Real fs state.
+	} volume;
+
+	struct {
+		struct ntfs_index index_sii;
+		struct ntfs_index index_sdh;
+		struct ntfs_inode *ni;
+		u32 next_id;
+		u64 next_off;
+
+		__le32 def_security_id;
+	} security;
+
+	struct {
+		struct ntfs_index index_r;
+		struct ntfs_inode *ni;
+		u64 max_size; // 16K
+	} reparse;
+
+	struct {
+		struct ntfs_index index_o;
+		struct ntfs_inode *ni;
+	} objid;
+
+	struct {
+		struct mutex mtx_lznt;
+		struct lznt *lznt;
+#ifdef CONFIG_NTFS3_LZX_XPRESS
+		struct mutex mtx_xpress;
+		struct xpress_decompressor *xpress;
+		struct mutex mtx_lzx;
+		struct lzx_decompressor *lzx;
+#endif
+	} compress;
+
+	struct ntfs_mount_options options;
+	struct ratelimit_state msg_ratelimit;
+};
+
+/* One MFT record(usually 1024 bytes), consists of attributes. */
+struct mft_inode {
+	struct rb_node node;
+	struct ntfs_sb_info *sbi;
+
+	struct MFT_REC *mrec;
+	struct ntfs_buffers nb;
+
+	CLST rno;
+	bool dirty;
+};
+
+/* Nested class for ntfs_inode::ni_lock. */
+enum ntfs_inode_mutex_lock_class {
+	NTFS_INODE_MUTEX_DIRTY,
+	NTFS_INODE_MUTEX_SECURITY,
+	NTFS_INODE_MUTEX_OBJID,
+	NTFS_INODE_MUTEX_REPARSE,
+	NTFS_INODE_MUTEX_NORMAL,
+	NTFS_INODE_MUTEX_PARENT,
+};
+
+/*
+ * sturct ntfs_inode
+ *
+ * Ntfs inode - extends linux inode. consists of one or more MFT inodes.
+ */
+struct ntfs_inode {
+	struct mft_inode mi; // base record
+
+	/*
+	 * Valid size: [0 - i_valid) - these range in file contains valid data.
+	 * Range [i_valid - inode->i_size) - contains 0.
+	 * Usually i_valid <= inode->i_size.
+	 */
+	u64 i_valid;
+	struct timespec64 i_crtime;
+
+	struct mutex ni_lock;
+
+	/* File attributes from std. */
+	enum FILE_ATTRIBUTE std_fa;
+	__le32 std_security_id;
+
+	/*
+	 * Tree of mft_inode.
+	 * Not empty when primary MFT record (usually 1024 bytes) can't save all attributes
+	 * e.g. file becomes too fragmented or contains a lot of names.
+	 */
+	struct rb_root mi_tree;
+
+	/*
+	 * This member is used in ntfs_readdir to ensure that all subrecords are loaded
+	 */
+	u8 mi_loaded;
+
+	union {
+		struct ntfs_index dir;
+		struct {
+			struct rw_semaphore run_lock;
+			struct runs_tree run;
+#ifdef CONFIG_NTFS3_LZX_XPRESS
+			struct page *offs_page;
+#endif
+		} file;
+	};
+
+	struct {
+		struct runs_tree run;
+		struct ATTR_LIST_ENTRY *le; // 1K aligned memory.
+		size_t size;
+		bool dirty;
+	} attr_list;
+
+	size_t ni_flags; // NI_FLAG_XXX
+
+	struct inode vfs_inode;
+};
+
+struct indx_node {
+	struct ntfs_buffers nb;
+	struct INDEX_BUFFER *index;
+};
+
+struct ntfs_fnd {
+	int level;
+	struct indx_node *nodes[20];
+	struct NTFS_DE *de[20];
+	struct NTFS_DE *root_de;
+};
+
+enum REPARSE_SIGN {
+	REPARSE_NONE = 0,
+	REPARSE_COMPRESSED = 1,
+	REPARSE_DEDUPLICATED = 2,
+	REPARSE_LINK = 3
+};
+
+/* Functions from attrib.c */
+int attr_load_runs(struct ATTRIB *attr, struct ntfs_inode *ni,
+		   struct runs_tree *run, const CLST *vcn);
+int attr_allocate_clusters(struct ntfs_sb_info *sbi, struct runs_tree *run,
+			   CLST vcn, CLST lcn, CLST len, CLST *pre_alloc,
+			   enum ALLOCATE_OPT opt, CLST *alen, const size_t fr,
+			   CLST *new_lcn);
+int attr_make_nonresident(struct ntfs_inode *ni, struct ATTRIB *attr,
+			  struct ATTR_LIST_ENTRY *le, struct mft_inode *mi,
+			  u64 new_size, struct runs_tree *run,
+			  struct ATTRIB **ins_attr, struct page *page);
+int attr_set_size(struct ntfs_inode *ni, enum ATTR_TYPE type,
+		  const __le16 *name, u8 name_len, struct runs_tree *run,
+		  u64 new_size, const u64 *new_valid, bool keep_prealloc,
+		  struct ATTRIB **ret);
+int attr_data_get_block(struct ntfs_inode *ni, CLST vcn, CLST clen, CLST *lcn,
+			CLST *len, bool *new);
+int attr_data_read_resident(struct ntfs_inode *ni, struct page *page);
+int attr_data_write_resident(struct ntfs_inode *ni, struct page *page);
+int attr_load_runs_vcn(struct ntfs_inode *ni, enum ATTR_TYPE type,
+		       const __le16 *name, u8 name_len, struct runs_tree *run,
+		       CLST vcn);
+int attr_load_runs_range(struct ntfs_inode *ni, enum ATTR_TYPE type,
+			 const __le16 *name, u8 name_len, struct runs_tree *run,
+			 u64 from, u64 to);
+int attr_wof_frame_info(struct ntfs_inode *ni, struct ATTRIB *attr,
+			struct runs_tree *run, u64 frame, u64 frames,
+			u8 frame_bits, u32 *ondisk_size, u64 *vbo_data);
+int attr_is_frame_compressed(struct ntfs_inode *ni, struct ATTRIB *attr,
+			     CLST frame, CLST *clst_data);
+int attr_allocate_frame(struct ntfs_inode *ni, CLST frame, size_t compr_size,
+			u64 new_valid);
+int attr_collapse_range(struct ntfs_inode *ni, u64 vbo, u64 bytes);
+int attr_punch_hole(struct ntfs_inode *ni, u64 vbo, u64 bytes, u32 *frame_size);
+
+/* Functions from attrlist.c */
+void al_destroy(struct ntfs_inode *ni);
+bool al_verify(struct ntfs_inode *ni);
+int ntfs_load_attr_list(struct ntfs_inode *ni, struct ATTRIB *attr);
+struct ATTR_LIST_ENTRY *al_enumerate(struct ntfs_inode *ni,
+				     struct ATTR_LIST_ENTRY *le);
+struct ATTR_LIST_ENTRY *al_find_le(struct ntfs_inode *ni,
+				   struct ATTR_LIST_ENTRY *le,
+				   const struct ATTRIB *attr);
+struct ATTR_LIST_ENTRY *al_find_ex(struct ntfs_inode *ni,
+				   struct ATTR_LIST_ENTRY *le,
+				   enum ATTR_TYPE type, const __le16 *name,
+				   u8 name_len, const CLST *vcn);
+int al_add_le(struct ntfs_inode *ni, enum ATTR_TYPE type, const __le16 *name,
+	      u8 name_len, CLST svcn, __le16 id, const struct MFT_REF *ref,
+	      struct ATTR_LIST_ENTRY **new_le);
+bool al_remove_le(struct ntfs_inode *ni, struct ATTR_LIST_ENTRY *le);
+bool al_delete_le(struct ntfs_inode *ni, enum ATTR_TYPE type, CLST vcn,
+		  const __le16 *name, size_t name_len,
+		  const struct MFT_REF *ref);
+int al_update(struct ntfs_inode *ni);
+static inline size_t al_aligned(size_t size)
+{
+	return (size + 1023) & ~(size_t)1023;
+}
+
+/* Globals from bitfunc.c */
+bool are_bits_clear(const ulong *map, size_t bit, size_t nbits);
+bool are_bits_set(const ulong *map, size_t bit, size_t nbits);
+size_t get_set_bits_ex(const ulong *map, size_t bit, size_t nbits);
+
+/* Globals from dir.c */
+int ntfs_utf16_to_nls(struct ntfs_sb_info *sbi, const struct le_str *uni,
+		      u8 *buf, int buf_len);
+int ntfs_nls_to_utf16(struct ntfs_sb_info *sbi, const u8 *name, u32 name_len,
+		      struct cpu_str *uni, u32 max_ulen,
+		      enum utf16_endian endian);
+struct inode *dir_search_u(struct inode *dir, const struct cpu_str *uni,
+			   struct ntfs_fnd *fnd);
+bool dir_is_empty(struct inode *dir);
+extern const struct file_operations ntfs_dir_operations;
+
+/* Globals from file.c */
+int ntfs_getattr(struct user_namespace *mnt_userns, const struct path *path,
+		 struct kstat *stat, u32 request_mask, u32 flags);
+void ntfs_sparse_cluster(struct inode *inode, struct page *page0, CLST vcn,
+			 CLST len);
+int ntfs3_setattr(struct user_namespace *mnt_userns, struct dentry *dentry,
+		  struct iattr *attr);
+int ntfs_file_open(struct inode *inode, struct file *file);
+int ntfs_fiemap(struct inode *inode, struct fiemap_extent_info *fieinfo,
+		__u64 start, __u64 len);
+extern const struct inode_operations ntfs_special_inode_operations;
+extern const struct inode_operations ntfs_file_inode_operations;
+extern const struct file_operations ntfs_file_operations;
+
+/* Globals from frecord.c */
+void ni_remove_mi(struct ntfs_inode *ni, struct mft_inode *mi);
+struct ATTR_STD_INFO *ni_std(struct ntfs_inode *ni);
+struct ATTR_STD_INFO5 *ni_std5(struct ntfs_inode *ni);
+void ni_clear(struct ntfs_inode *ni);
+int ni_load_mi_ex(struct ntfs_inode *ni, CLST rno, struct mft_inode **mi);
+int ni_load_mi(struct ntfs_inode *ni, const struct ATTR_LIST_ENTRY *le,
+	       struct mft_inode **mi);
+struct ATTRIB *ni_find_attr(struct ntfs_inode *ni, struct ATTRIB *attr,
+			    struct ATTR_LIST_ENTRY **entry_o,
+			    enum ATTR_TYPE type, const __le16 *name,
+			    u8 name_len, const CLST *vcn,
+			    struct mft_inode **mi);
+struct ATTRIB *ni_enum_attr_ex(struct ntfs_inode *ni, struct ATTRIB *attr,
+			       struct ATTR_LIST_ENTRY **le,
+			       struct mft_inode **mi);
+struct ATTRIB *ni_load_attr(struct ntfs_inode *ni, enum ATTR_TYPE type,
+			    const __le16 *name, u8 name_len, CLST vcn,
+			    struct mft_inode **pmi);
+int ni_load_all_mi(struct ntfs_inode *ni);
+bool ni_add_subrecord(struct ntfs_inode *ni, CLST rno, struct mft_inode **mi);
+int ni_remove_attr(struct ntfs_inode *ni, enum ATTR_TYPE type,
+		   const __le16 *name, size_t name_len, bool base_only,
+		   const __le16 *id);
+int ni_create_attr_list(struct ntfs_inode *ni);
+int ni_expand_list(struct ntfs_inode *ni);
+int ni_insert_nonresident(struct ntfs_inode *ni, enum ATTR_TYPE type,
+			  const __le16 *name, u8 name_len,
+			  const struct runs_tree *run, CLST svcn, CLST len,
+			  __le16 flags, struct ATTRIB **new_attr,
+			  struct mft_inode **mi);
+int ni_insert_resident(struct ntfs_inode *ni, u32 data_size,
+		       enum ATTR_TYPE type, const __le16 *name, u8 name_len,
+		       struct ATTRIB **new_attr, struct mft_inode **mi,
+		       struct ATTR_LIST_ENTRY **le);
+void ni_remove_attr_le(struct ntfs_inode *ni, struct ATTRIB *attr,
+		       struct mft_inode *mi, struct ATTR_LIST_ENTRY *le);
+int ni_delete_all(struct ntfs_inode *ni);
+struct ATTR_FILE_NAME *ni_fname_name(struct ntfs_inode *ni,
+				     const struct cpu_str *uni,
+				     const struct MFT_REF *home,
+				     struct mft_inode **mi,
+				     struct ATTR_LIST_ENTRY **entry);
+struct ATTR_FILE_NAME *ni_fname_type(struct ntfs_inode *ni, u8 name_type,
+				     struct mft_inode **mi,
+				     struct ATTR_LIST_ENTRY **entry);
+int ni_new_attr_flags(struct ntfs_inode *ni, enum FILE_ATTRIBUTE new_fa);
+enum REPARSE_SIGN ni_parse_reparse(struct ntfs_inode *ni, struct ATTRIB *attr,
+				   void *buffer);
+int ni_write_inode(struct inode *inode, int sync, const char *hint);
+#define _ni_write_inode(i, w) ni_write_inode(i, w, __func__)
+int ni_fiemap(struct ntfs_inode *ni, struct fiemap_extent_info *fieinfo,
+	      __u64 vbo, __u64 len);
+int ni_readpage_cmpr(struct ntfs_inode *ni, struct page *page);
+int ni_decompress_file(struct ntfs_inode *ni);
+int ni_read_frame(struct ntfs_inode *ni, u64 frame_vbo, struct page **pages,
+		  u32 pages_per_frame);
+int ni_write_frame(struct ntfs_inode *ni, struct page **pages,
+		   u32 pages_per_frame);
+int ni_remove_name(struct ntfs_inode *dir_ni, struct ntfs_inode *ni,
+		   struct NTFS_DE *de, struct NTFS_DE **de2, int *undo_step);
+
+bool ni_remove_name_undo(struct ntfs_inode *dir_ni, struct ntfs_inode *ni,
+			 struct NTFS_DE *de, struct NTFS_DE *de2,
+			 int undo_step);
+
+int ni_add_name(struct ntfs_inode *dir_ni, struct ntfs_inode *ni,
+		struct NTFS_DE *de);
+
+int ni_rename(struct ntfs_inode *dir_ni, struct ntfs_inode *new_dir_ni,
+	      struct ntfs_inode *ni, struct NTFS_DE *de, struct NTFS_DE *new_de,
+	      bool *is_bad);
+
+bool ni_is_dirty(struct inode *inode);
+
+/* Globals from fslog.c */
+int log_replay(struct ntfs_inode *ni, bool *initialized);
+
+/* Globals from fsntfs.c */
+bool ntfs_fix_pre_write(struct NTFS_RECORD_HEADER *rhdr, size_t bytes);
+int ntfs_fix_post_read(struct NTFS_RECORD_HEADER *rhdr, size_t bytes,
+		       bool simple);
+int ntfs_extend_init(struct ntfs_sb_info *sbi);
+int ntfs_loadlog_and_replay(struct ntfs_inode *ni, struct ntfs_sb_info *sbi);
+const struct ATTR_DEF_ENTRY *ntfs_query_def(struct ntfs_sb_info *sbi,
+					    enum ATTR_TYPE Type);
+int ntfs_look_for_free_space(struct ntfs_sb_info *sbi, CLST lcn, CLST len,
+			     CLST *new_lcn, CLST *new_len,
+			     enum ALLOCATE_OPT opt);
+int ntfs_look_free_mft(struct ntfs_sb_info *sbi, CLST *rno, bool mft,
+		       struct ntfs_inode *ni, struct mft_inode **mi);
+void ntfs_mark_rec_free(struct ntfs_sb_info *sbi, CLST rno);
+int ntfs_clear_mft_tail(struct ntfs_sb_info *sbi, size_t from, size_t to);
+int ntfs_refresh_zone(struct ntfs_sb_info *sbi);
+int ntfs_update_mftmirr(struct ntfs_sb_info *sbi, int wait);
+enum NTFS_DIRTY_FLAGS {
+	NTFS_DIRTY_CLEAR = 0,
+	NTFS_DIRTY_DIRTY = 1,
+	NTFS_DIRTY_ERROR = 2,
+};
+int ntfs_set_state(struct ntfs_sb_info *sbi, enum NTFS_DIRTY_FLAGS dirty);
+int ntfs_sb_read(struct super_block *sb, u64 lbo, size_t bytes, void *buffer);
+int ntfs_sb_write(struct super_block *sb, u64 lbo, size_t bytes,
+		  const void *buffer, int wait);
+int ntfs_sb_write_run(struct ntfs_sb_info *sbi, const struct runs_tree *run,
+		      u64 vbo, const void *buf, size_t bytes);
+struct buffer_head *ntfs_bread_run(struct ntfs_sb_info *sbi,
+				   const struct runs_tree *run, u64 vbo);
+int ntfs_read_run_nb(struct ntfs_sb_info *sbi, const struct runs_tree *run,
+		     u64 vbo, void *buf, u32 bytes, struct ntfs_buffers *nb);
+int ntfs_read_bh(struct ntfs_sb_info *sbi, const struct runs_tree *run, u64 vbo,
+		 struct NTFS_RECORD_HEADER *rhdr, u32 bytes,
+		 struct ntfs_buffers *nb);
+int ntfs_get_bh(struct ntfs_sb_info *sbi, const struct runs_tree *run, u64 vbo,
+		u32 bytes, struct ntfs_buffers *nb);
+int ntfs_write_bh(struct ntfs_sb_info *sbi, struct NTFS_RECORD_HEADER *rhdr,
+		  struct ntfs_buffers *nb, int sync);
+int ntfs_bio_pages(struct ntfs_sb_info *sbi, const struct runs_tree *run,
+		   struct page **pages, u32 nr_pages, u64 vbo, u32 bytes,
+		   u32 op);
+int ntfs_bio_fill_1(struct ntfs_sb_info *sbi, const struct runs_tree *run);
+int ntfs_vbo_to_lbo(struct ntfs_sb_info *sbi, const struct runs_tree *run,
+		    u64 vbo, u64 *lbo, u64 *bytes);
+struct ntfs_inode *ntfs_new_inode(struct ntfs_sb_info *sbi, CLST nRec,
+				  bool dir);
+extern const u8 s_default_security[0x50];
+bool is_sd_valid(const struct SECURITY_DESCRIPTOR_RELATIVE *sd, u32 len);
+int ntfs_security_init(struct ntfs_sb_info *sbi);
+int ntfs_get_security_by_id(struct ntfs_sb_info *sbi, __le32 security_id,
+			    struct SECURITY_DESCRIPTOR_RELATIVE **sd,
+			    size_t *size);
+int ntfs_insert_security(struct ntfs_sb_info *sbi,
+			 const struct SECURITY_DESCRIPTOR_RELATIVE *sd,
+			 u32 size, __le32 *security_id, bool *inserted);
+int ntfs_reparse_init(struct ntfs_sb_info *sbi);
+int ntfs_objid_init(struct ntfs_sb_info *sbi);
+int ntfs_objid_remove(struct ntfs_sb_info *sbi, struct GUID *guid);
+int ntfs_insert_reparse(struct ntfs_sb_info *sbi, __le32 rtag,
+			const struct MFT_REF *ref);
+int ntfs_remove_reparse(struct ntfs_sb_info *sbi, __le32 rtag,
+			const struct MFT_REF *ref);
+void mark_as_free_ex(struct ntfs_sb_info *sbi, CLST lcn, CLST len, bool trim);
+int run_deallocate(struct ntfs_sb_info *sbi, struct runs_tree *run, bool trim);
+
+/* Globals from index.c */
+int indx_used_bit(struct ntfs_index *indx, struct ntfs_inode *ni, size_t *bit);
+void fnd_clear(struct ntfs_fnd *fnd);
+static inline struct ntfs_fnd *fnd_get(void)
+{
+	return kzalloc(sizeof(struct ntfs_fnd), GFP_NOFS);
+}
+static inline void fnd_put(struct ntfs_fnd *fnd)
+{
+	if (fnd) {
+		fnd_clear(fnd);
+		kfree(fnd);
+	}
+}
+void indx_clear(struct ntfs_index *idx);
+int indx_init(struct ntfs_index *indx, struct ntfs_sb_info *sbi,
+	      const struct ATTRIB *attr, enum index_mutex_classed type);
+struct INDEX_ROOT *indx_get_root(struct ntfs_index *indx, struct ntfs_inode *ni,
+				 struct ATTRIB **attr, struct mft_inode **mi);
+int indx_read(struct ntfs_index *idx, struct ntfs_inode *ni, CLST vbn,
+	      struct indx_node **node);
+int indx_find(struct ntfs_index *indx, struct ntfs_inode *dir,
+	      const struct INDEX_ROOT *root, const void *Key, size_t KeyLen,
+	      const void *param, int *diff, struct NTFS_DE **entry,
+	      struct ntfs_fnd *fnd);
+int indx_find_sort(struct ntfs_index *indx, struct ntfs_inode *ni,
+		   const struct INDEX_ROOT *root, struct NTFS_DE **entry,
+		   struct ntfs_fnd *fnd);
+int indx_find_raw(struct ntfs_index *indx, struct ntfs_inode *ni,
+		  const struct INDEX_ROOT *root, struct NTFS_DE **entry,
+		  size_t *off, struct ntfs_fnd *fnd);
+int indx_insert_entry(struct ntfs_index *indx, struct ntfs_inode *ni,
+		      const struct NTFS_DE *new_de, const void *param,
+		      struct ntfs_fnd *fnd, bool undo);
+int indx_delete_entry(struct ntfs_index *indx, struct ntfs_inode *ni,
+		      const void *key, u32 key_len, const void *param);
+int indx_update_dup(struct ntfs_inode *ni, struct ntfs_sb_info *sbi,
+		    const struct ATTR_FILE_NAME *fname,
+		    const struct NTFS_DUP_INFO *dup, int sync);
+
+/* Globals from inode.c */
+struct inode *ntfs_iget5(struct super_block *sb, const struct MFT_REF *ref,
+			 const struct cpu_str *name);
+int ntfs_set_size(struct inode *inode, u64 new_size);
+int reset_log_file(struct inode *inode);
+int ntfs_get_block(struct inode *inode, sector_t vbn,
+		   struct buffer_head *bh_result, int create);
+int ntfs3_write_inode(struct inode *inode, struct writeback_control *wbc);
+int ntfs_sync_inode(struct inode *inode);
+int ntfs_flush_inodes(struct super_block *sb, struct inode *i1,
+		      struct inode *i2);
+int inode_write_data(struct inode *inode, const void *data, size_t bytes);
+struct inode *ntfs_create_inode(struct user_namespace *mnt_userns,
+				struct inode *dir, struct dentry *dentry,
+				const struct cpu_str *uni, umode_t mode,
+				dev_t dev, const char *symname, u32 size,
+				struct ntfs_fnd *fnd);
+int ntfs_link_inode(struct inode *inode, struct dentry *dentry);
+int ntfs_unlink_inode(struct inode *dir, const struct dentry *dentry);
+void ntfs_evict_inode(struct inode *inode);
+extern const struct inode_operations ntfs_link_inode_operations;
+extern const struct address_space_operations ntfs_aops;
+extern const struct address_space_operations ntfs_aops_cmpr;
+
+/* Globals from name_i.c */
+int fill_name_de(struct ntfs_sb_info *sbi, void *buf, const struct qstr *name,
+		 const struct cpu_str *uni);
+struct dentry *ntfs3_get_parent(struct dentry *child);
+
+extern const struct inode_operations ntfs_dir_inode_operations;
+extern const struct inode_operations ntfs_special_inode_operations;
+
+/* Globals from record.c */
+int mi_get(struct ntfs_sb_info *sbi, CLST rno, struct mft_inode **mi);
+void mi_put(struct mft_inode *mi);
+int mi_init(struct mft_inode *mi, struct ntfs_sb_info *sbi, CLST rno);
+int mi_read(struct mft_inode *mi, bool is_mft);
+struct ATTRIB *mi_enum_attr(struct mft_inode *mi, struct ATTRIB *attr);
+// TODO: id?
+struct ATTRIB *mi_find_attr(struct mft_inode *mi, struct ATTRIB *attr,
+			    enum ATTR_TYPE type, const __le16 *name,
+			    size_t name_len, const __le16 *id);
+static inline struct ATTRIB *rec_find_attr_le(struct mft_inode *rec,
+					      struct ATTR_LIST_ENTRY *le)
+{
+	return mi_find_attr(rec, NULL, le->type, le_name(le), le->name_len,
+			    &le->id);
+}
+int mi_write(struct mft_inode *mi, int wait);
+int mi_format_new(struct mft_inode *mi, struct ntfs_sb_info *sbi, CLST rno,
+		  __le16 flags, bool is_mft);
+void mi_mark_free(struct mft_inode *mi);
+struct ATTRIB *mi_insert_attr(struct mft_inode *mi, enum ATTR_TYPE type,
+			      const __le16 *name, u8 name_len, u32 asize,
+			      u16 name_off);
+
+bool mi_remove_attr(struct ntfs_inode *ni, struct mft_inode *mi,
+		    struct ATTRIB *attr);
+bool mi_resize_attr(struct mft_inode *mi, struct ATTRIB *attr, int bytes);
+int mi_pack_runs(struct mft_inode *mi, struct ATTRIB *attr,
+		 struct runs_tree *run, CLST len);
+static inline bool mi_is_ref(const struct mft_inode *mi,
+			     const struct MFT_REF *ref)
+{
+	if (le32_to_cpu(ref->low) != mi->rno)
+		return false;
+	if (ref->seq != mi->mrec->seq)
+		return false;
+
+#ifdef CONFIG_NTFS3_64BIT_CLUSTER
+	return le16_to_cpu(ref->high) == (mi->rno >> 32);
+#else
+	return !ref->high;
+#endif
+}
+
+static inline void mi_get_ref(const struct mft_inode *mi, struct MFT_REF *ref)
+{
+	ref->low = cpu_to_le32(mi->rno);
+#ifdef CONFIG_NTFS3_64BIT_CLUSTER
+	ref->high = cpu_to_le16(mi->rno >> 32);
+#else
+	ref->high = 0;
+#endif
+	ref->seq = mi->mrec->seq;
+}
+
+/* Globals from run.c */
+bool run_lookup_entry(const struct runs_tree *run, CLST vcn, CLST *lcn,
+		      CLST *len, size_t *index);
+void run_truncate(struct runs_tree *run, CLST vcn);
+void run_truncate_head(struct runs_tree *run, CLST vcn);
+void run_truncate_around(struct runs_tree *run, CLST vcn);
+bool run_lookup(const struct runs_tree *run, CLST vcn, size_t *Index);
+bool run_add_entry(struct runs_tree *run, CLST vcn, CLST lcn, CLST len,
+		   bool is_mft);
+bool run_collapse_range(struct runs_tree *run, CLST vcn, CLST len);
+bool run_get_entry(const struct runs_tree *run, size_t index, CLST *vcn,
+		   CLST *lcn, CLST *len);
+bool run_is_mapped_full(const struct runs_tree *run, CLST svcn, CLST evcn);
+
+int run_pack(const struct runs_tree *run, CLST svcn, CLST len, u8 *run_buf,
+	     u32 run_buf_size, CLST *packed_vcns);
+int run_unpack(struct runs_tree *run, struct ntfs_sb_info *sbi, CLST ino,
+	       CLST svcn, CLST evcn, CLST vcn, const u8 *run_buf,
+	       u32 run_buf_size);
+
+#ifdef NTFS3_CHECK_FREE_CLST
+int run_unpack_ex(struct runs_tree *run, struct ntfs_sb_info *sbi, CLST ino,
+		  CLST svcn, CLST evcn, CLST vcn, const u8 *run_buf,
+		  u32 run_buf_size);
+#else
+#define run_unpack_ex run_unpack
+#endif
+int run_get_highest_vcn(CLST vcn, const u8 *run_buf, u64 *highest_vcn);
+
+/* Globals from super.c */
+void *ntfs_set_shared(void *ptr, u32 bytes);
+void *ntfs_put_shared(void *ptr);
+void ntfs_unmap_meta(struct super_block *sb, CLST lcn, CLST len);
+int ntfs_discard(struct ntfs_sb_info *sbi, CLST Lcn, CLST Len);
+
+/* Globals from bitmap.c*/
+int __init ntfs3_init_bitmap(void);
+void ntfs3_exit_bitmap(void);
+void wnd_close(struct wnd_bitmap *wnd);
+static inline size_t wnd_zeroes(const struct wnd_bitmap *wnd)
+{
+	return wnd->total_zeroes;
+}
+int wnd_init(struct wnd_bitmap *wnd, struct super_block *sb, size_t nbits);
+int wnd_set_free(struct wnd_bitmap *wnd, size_t bit, size_t bits);
+int wnd_set_used(struct wnd_bitmap *wnd, size_t bit, size_t bits);
+bool wnd_is_free(struct wnd_bitmap *wnd, size_t bit, size_t bits);
+bool wnd_is_used(struct wnd_bitmap *wnd, size_t bit, size_t bits);
+
+/* Possible values for 'flags' 'wnd_find'. */
+#define BITMAP_FIND_MARK_AS_USED 0x01
+#define BITMAP_FIND_FULL 0x02
+size_t wnd_find(struct wnd_bitmap *wnd, size_t to_alloc, size_t hint,
+		size_t flags, size_t *allocated);
+int wnd_extend(struct wnd_bitmap *wnd, size_t new_bits);
+void wnd_zone_set(struct wnd_bitmap *wnd, size_t Lcn, size_t Len);
+int ntfs_trim_fs(struct ntfs_sb_info *sbi, struct fstrim_range *range);
+
+/* Globals from upcase.c */
+int ntfs_cmp_names(const __le16 *s1, size_t l1, const __le16 *s2, size_t l2,
+		   const u16 *upcase, bool bothcase);
+int ntfs_cmp_names_cpu(const struct cpu_str *uni1, const struct le_str *uni2,
+		       const u16 *upcase, bool bothcase);
+
+/* globals from xattr.c */
+#ifdef CONFIG_NTFS3_FS_POSIX_ACL
+struct posix_acl *ntfs_get_acl(struct inode *inode, int type, bool rcu);
+int ntfs_set_acl(struct user_namespace *mnt_userns, struct inode *inode,
+		 struct posix_acl *acl, int type);
+int ntfs_init_acl(struct user_namespace *mnt_userns, struct inode *inode,
+		  struct inode *dir);
+#else
+#define ntfs_get_acl NULL
+#define ntfs_set_acl NULL
+#endif
+
+int ntfs_acl_chmod(struct user_namespace *mnt_userns, struct inode *inode);
+int ntfs_permission(struct user_namespace *mnt_userns, struct inode *inode,
+		    int mask);
+ssize_t ntfs_listxattr(struct dentry *dentry, char *buffer, size_t size);
+extern const struct xattr_handler *ntfs_xattr_handlers[];
+
+int ntfs_save_wsl_perm(struct inode *inode);
+void ntfs_get_wsl_perm(struct inode *inode);
+
+/* globals from lznt.c */
+struct lznt *get_lznt_ctx(int level);
+size_t compress_lznt(const void *uncompressed, size_t uncompressed_size,
+		     void *compressed, size_t compressed_size,
+		     struct lznt *ctx);
+ssize_t decompress_lznt(const void *compressed, size_t compressed_size,
+			void *uncompressed, size_t uncompressed_size);
+
+static inline bool is_ntfs3(struct ntfs_sb_info *sbi)
+{
+	return sbi->volume.major_ver >= 3;
+}
+
+/* (sb->s_flags & SB_ACTIVE) */
+static inline bool is_mounted(struct ntfs_sb_info *sbi)
+{
+	return !!sbi->sb->s_root;
+}
+
+static inline bool ntfs_is_meta_file(struct ntfs_sb_info *sbi, CLST rno)
+{
+	return rno < MFT_REC_FREE || rno == sbi->objid_no ||
+	       rno == sbi->quota_no || rno == sbi->reparse_no ||
+	       rno == sbi->usn_jrnl_no;
+}
+
+static inline void ntfs_unmap_page(struct page *page)
+{
+	kunmap(page);
+	put_page(page);
+}
+
+static inline struct page *ntfs_map_page(struct address_space *mapping,
+					 unsigned long index)
+{
+	struct page *page = read_mapping_page(mapping, index, NULL);
+
+	if (!IS_ERR(page)) {
+		kmap(page);
+		if (!PageError(page))
+			return page;
+		ntfs_unmap_page(page);
+		return ERR_PTR(-EIO);
+	}
+	return page;
+}
+
+static inline size_t wnd_zone_bit(const struct wnd_bitmap *wnd)
+{
+	return wnd->zone_bit;
+}
+
+static inline size_t wnd_zone_len(const struct wnd_bitmap *wnd)
+{
+	return wnd->zone_end - wnd->zone_bit;
+}
+
+static inline void run_init(struct runs_tree *run)
+{
+	run->runs = NULL;
+	run->count = 0;
+	run->allocated = 0;
+}
+
+static inline struct runs_tree *run_alloc(void)
+{
+	return kzalloc(sizeof(struct runs_tree), GFP_NOFS);
+}
+
+static inline void run_close(struct runs_tree *run)
+{
+	kvfree(run->runs);
+	memset(run, 0, sizeof(*run));
+}
+
+static inline void run_free(struct runs_tree *run)
+{
+	if (run) {
+		kvfree(run->runs);
+		kfree(run);
+	}
+}
+
+static inline bool run_is_empty(struct runs_tree *run)
+{
+	return !run->count;
+}
+
+/* NTFS uses quad aligned bitmaps. */
+static inline size_t bitmap_size(size_t bits)
+{
+	return ALIGN((bits + 7) >> 3, 8);
+}
+
+#define _100ns2seconds 10000000
+#define SecondsToStartOf1970 0x00000002B6109100
+
+#define NTFS_TIME_GRAN 100
+
+/*
+ * kernel2nt - Converts in-memory kernel timestamp into nt time.
+ */
+static inline __le64 kernel2nt(const struct timespec64 *ts)
+{
+	// 10^7 units of 100 nanoseconds one second
+	return cpu_to_le64(_100ns2seconds *
+				   (ts->tv_sec + SecondsToStartOf1970) +
+			   ts->tv_nsec / NTFS_TIME_GRAN);
+}
+
+/*
+ * nt2kernel - Converts on-disk nt time into kernel timestamp.
+ */
+static inline void nt2kernel(const __le64 tm, struct timespec64 *ts)
+{
+	u64 t = le64_to_cpu(tm) - _100ns2seconds * SecondsToStartOf1970;
+
+	// WARNING: do_div changes its first argument(!)
+	ts->tv_nsec = do_div(t, _100ns2seconds) * 100;
+	ts->tv_sec = t;
+}
+
+static inline struct ntfs_sb_info *ntfs_sb(struct super_block *sb)
+{
+	return sb->s_fs_info;
+}
+
+/*
+ * ntfs_up_cluster - Align up on cluster boundary.
+ */
+static inline u64 ntfs_up_cluster(const struct ntfs_sb_info *sbi, u64 size)
+{
+	return (size + sbi->cluster_mask) & sbi->cluster_mask_inv;
+}
+
+/*
+ * ntfs_up_block - Align up on cluster boundary.
+ */
+static inline u64 ntfs_up_block(const struct super_block *sb, u64 size)
+{
+	return (size + sb->s_blocksize - 1) & ~(u64)(sb->s_blocksize - 1);
+}
+
+static inline CLST bytes_to_cluster(const struct ntfs_sb_info *sbi, u64 size)
+{
+	return (size + sbi->cluster_mask) >> sbi->cluster_bits;
+}
+
+static inline u64 bytes_to_block(const struct super_block *sb, u64 size)
+{
+	return (size + sb->s_blocksize - 1) >> sb->s_blocksize_bits;
+}
+
+static inline struct buffer_head *ntfs_bread(struct super_block *sb,
+					     sector_t block)
+{
+	struct buffer_head *bh = sb_bread(sb, block);
+
+	if (bh)
+		return bh;
+
+	ntfs_err(sb, "failed to read volume at offset 0x%llx",
+		 (u64)block << sb->s_blocksize_bits);
+	return NULL;
+}
+
+static inline struct ntfs_inode *ntfs_i(struct inode *inode)
+{
+	return container_of(inode, struct ntfs_inode, vfs_inode);
+}
+
+static inline bool is_compressed(const struct ntfs_inode *ni)
+{
+	return (ni->std_fa & FILE_ATTRIBUTE_COMPRESSED) ||
+	       (ni->ni_flags & NI_FLAG_COMPRESSED_MASK);
+}
+
+static inline int ni_ext_compress_bits(const struct ntfs_inode *ni)
+{
+	return 0xb + (ni->ni_flags & NI_FLAG_COMPRESSED_MASK);
+}
+
+/* Bits - 0xc, 0xd, 0xe, 0xf, 0x10 */
+static inline void ni_set_ext_compress_bits(struct ntfs_inode *ni, u8 bits)
+{
+	ni->ni_flags |= (bits - 0xb) & NI_FLAG_COMPRESSED_MASK;
+}
+
+static inline bool is_dedup(const struct ntfs_inode *ni)
+{
+	return ni->ni_flags & NI_FLAG_DEDUPLICATED;
+}
+
+static inline bool is_encrypted(const struct ntfs_inode *ni)
+{
+	return ni->std_fa & FILE_ATTRIBUTE_ENCRYPTED;
+}
+
+static inline bool is_sparsed(const struct ntfs_inode *ni)
+{
+	return ni->std_fa & FILE_ATTRIBUTE_SPARSE_FILE;
+}
+
+static inline int is_resident(struct ntfs_inode *ni)
+{
+	return ni->ni_flags & NI_FLAG_RESIDENT;
+}
+
+static inline void le16_sub_cpu(__le16 *var, u16 val)
+{
+	*var = cpu_to_le16(le16_to_cpu(*var) - val);
+}
+
+static inline void le32_sub_cpu(__le32 *var, u32 val)
+{
+	*var = cpu_to_le32(le32_to_cpu(*var) - val);
+}
+
+static inline void nb_put(struct ntfs_buffers *nb)
+{
+	u32 i, nbufs = nb->nbufs;
+
+	if (!nbufs)
+		return;
+
+	for (i = 0; i < nbufs; i++)
+		put_bh(nb->bh[i]);
+	nb->nbufs = 0;
+}
+
+static inline void put_indx_node(struct indx_node *in)
+{
+	if (!in)
+		return;
+
+	kfree(in->index);
+	nb_put(&in->nb);
+	kfree(in);
+}
+
+static inline void mi_clear(struct mft_inode *mi)
+{
+	nb_put(&mi->nb);
+	kfree(mi->mrec);
+	mi->mrec = NULL;
+}
+
+static inline void ni_lock(struct ntfs_inode *ni)
+{
+	mutex_lock_nested(&ni->ni_lock, NTFS_INODE_MUTEX_NORMAL);
+}
+
+static inline void ni_lock_dir(struct ntfs_inode *ni)
+{
+	mutex_lock_nested(&ni->ni_lock, NTFS_INODE_MUTEX_PARENT);
+}
+
+static inline void ni_unlock(struct ntfs_inode *ni)
+{
+	mutex_unlock(&ni->ni_lock);
+}
+
+static inline int ni_trylock(struct ntfs_inode *ni)
+{
+	return mutex_trylock(&ni->ni_lock);
+}
+
+static inline int attr_load_runs_attr(struct ntfs_inode *ni,
+				      struct ATTRIB *attr,
+				      struct runs_tree *run, CLST vcn)
+{
+	return attr_load_runs_vcn(ni, attr->type, attr_name(attr),
+				  attr->name_len, run, vcn);
+}
+
+static inline void le64_sub_cpu(__le64 *var, u64 val)
+{
+	*var = cpu_to_le64(le64_to_cpu(*var) - val);
+}
+
+#endif /* _LINUX_NTFS3_NTFS_FS_H */
diff --git a/fs/ntfs3/record.c b/fs/ntfs3/record.c
new file mode 100644
index 000000000000..103705c86772
--- /dev/null
+++ b/fs/ntfs3/record.c
@@ -0,0 +1,605 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ *
+ * Copyright (C) 2019-2021 Paragon Software GmbH, All rights reserved.
+ *
+ */
+
+#include <linux/blkdev.h>
+#include <linux/buffer_head.h>
+#include <linux/fs.h>
+#include <linux/nls.h>
+
+#include "debug.h"
+#include "ntfs.h"
+#include "ntfs_fs.h"
+
+static inline int compare_attr(const struct ATTRIB *left, enum ATTR_TYPE type,
+			       const __le16 *name, u8 name_len,
+			       const u16 *upcase)
+{
+	/* First, compare the type codes. */
+	int diff = le32_to_cpu(left->type) - le32_to_cpu(type);
+
+	if (diff)
+		return diff;
+
+	/* They have the same type code, so we have to compare the names. */
+	return ntfs_cmp_names(attr_name(left), left->name_len, name, name_len,
+			      upcase, true);
+}
+
+/*
+ * mi_new_attt_id
+ *
+ * Return: Unused attribute id that is less than mrec->next_attr_id.
+ */
+static __le16 mi_new_attt_id(struct mft_inode *mi)
+{
+	u16 free_id, max_id, t16;
+	struct MFT_REC *rec = mi->mrec;
+	struct ATTRIB *attr;
+	__le16 id;
+
+	id = rec->next_attr_id;
+	free_id = le16_to_cpu(id);
+	if (free_id < 0x7FFF) {
+		rec->next_attr_id = cpu_to_le16(free_id + 1);
+		return id;
+	}
+
+	/* One record can store up to 1024/24 ~= 42 attributes. */
+	free_id = 0;
+	max_id = 0;
+
+	attr = NULL;
+
+	for (;;) {
+		attr = mi_enum_attr(mi, attr);
+		if (!attr) {
+			rec->next_attr_id = cpu_to_le16(max_id + 1);
+			mi->dirty = true;
+			return cpu_to_le16(free_id);
+		}
+
+		t16 = le16_to_cpu(attr->id);
+		if (t16 == free_id) {
+			free_id += 1;
+			attr = NULL;
+		} else if (max_id < t16)
+			max_id = t16;
+	}
+}
+
+int mi_get(struct ntfs_sb_info *sbi, CLST rno, struct mft_inode **mi)
+{
+	int err;
+	struct mft_inode *m = kzalloc(sizeof(struct mft_inode), GFP_NOFS);
+
+	if (!m)
+		return -ENOMEM;
+
+	err = mi_init(m, sbi, rno);
+	if (err) {
+		kfree(m);
+		return err;
+	}
+
+	err = mi_read(m, false);
+	if (err) {
+		mi_put(m);
+		return err;
+	}
+
+	*mi = m;
+	return 0;
+}
+
+void mi_put(struct mft_inode *mi)
+{
+	mi_clear(mi);
+	kfree(mi);
+}
+
+int mi_init(struct mft_inode *mi, struct ntfs_sb_info *sbi, CLST rno)
+{
+	mi->sbi = sbi;
+	mi->rno = rno;
+	mi->mrec = kmalloc(sbi->record_size, GFP_NOFS);
+	if (!mi->mrec)
+		return -ENOMEM;
+
+	return 0;
+}
+
+/*
+ * mi_read - Read MFT data.
+ */
+int mi_read(struct mft_inode *mi, bool is_mft)
+{
+	int err;
+	struct MFT_REC *rec = mi->mrec;
+	struct ntfs_sb_info *sbi = mi->sbi;
+	u32 bpr = sbi->record_size;
+	u64 vbo = (u64)mi->rno << sbi->record_bits;
+	struct ntfs_inode *mft_ni = sbi->mft.ni;
+	struct runs_tree *run = mft_ni ? &mft_ni->file.run : NULL;
+	struct rw_semaphore *rw_lock = NULL;
+
+	if (is_mounted(sbi)) {
+		if (!is_mft) {
+			rw_lock = &mft_ni->file.run_lock;
+			down_read(rw_lock);
+		}
+	}
+
+	err = ntfs_read_bh(sbi, run, vbo, &rec->rhdr, bpr, &mi->nb);
+	if (rw_lock)
+		up_read(rw_lock);
+	if (!err)
+		goto ok;
+
+	if (err == -E_NTFS_FIXUP) {
+		mi->dirty = true;
+		goto ok;
+	}
+
+	if (err != -ENOENT)
+		goto out;
+
+	if (rw_lock) {
+		ni_lock(mft_ni);
+		down_write(rw_lock);
+	}
+	err = attr_load_runs_vcn(mft_ni, ATTR_DATA, NULL, 0, &mft_ni->file.run,
+				 vbo >> sbi->cluster_bits);
+	if (rw_lock) {
+		up_write(rw_lock);
+		ni_unlock(mft_ni);
+	}
+	if (err)
+		goto out;
+
+	if (rw_lock)
+		down_read(rw_lock);
+	err = ntfs_read_bh(sbi, run, vbo, &rec->rhdr, bpr, &mi->nb);
+	if (rw_lock)
+		up_read(rw_lock);
+
+	if (err == -E_NTFS_FIXUP) {
+		mi->dirty = true;
+		goto ok;
+	}
+	if (err)
+		goto out;
+
+ok:
+	/* Check field 'total' only here. */
+	if (le32_to_cpu(rec->total) != bpr) {
+		err = -EINVAL;
+		goto out;
+	}
+
+	return 0;
+
+out:
+	return err;
+}
+
+struct ATTRIB *mi_enum_attr(struct mft_inode *mi, struct ATTRIB *attr)
+{
+	const struct MFT_REC *rec = mi->mrec;
+	u32 used = le32_to_cpu(rec->used);
+	u32 t32, off, asize;
+	u16 t16;
+
+	if (!attr) {
+		u32 total = le32_to_cpu(rec->total);
+
+		off = le16_to_cpu(rec->attr_off);
+
+		if (used > total)
+			return NULL;
+
+		if (off >= used || off < MFTRECORD_FIXUP_OFFSET_1 ||
+		    !IS_ALIGNED(off, 4)) {
+			return NULL;
+		}
+
+		/* Skip non-resident records. */
+		if (!is_rec_inuse(rec))
+			return NULL;
+
+		attr = Add2Ptr(rec, off);
+	} else {
+		/* Check if input attr inside record. */
+		off = PtrOffset(rec, attr);
+		if (off >= used)
+			return NULL;
+
+		asize = le32_to_cpu(attr->size);
+		if (asize < SIZEOF_RESIDENT) {
+			/* Impossible 'cause we should not return such attribute. */
+			return NULL;
+		}
+
+		attr = Add2Ptr(attr, asize);
+		off += asize;
+	}
+
+	asize = le32_to_cpu(attr->size);
+
+	/* Can we use the first field (attr->type). */
+	if (off + 8 > used) {
+		static_assert(ALIGN(sizeof(enum ATTR_TYPE), 8) == 8);
+		return NULL;
+	}
+
+	if (attr->type == ATTR_END) {
+		/* End of enumeration. */
+		return NULL;
+	}
+
+	/* 0x100 is last known attribute for now. */
+	t32 = le32_to_cpu(attr->type);
+	if ((t32 & 0xf) || (t32 > 0x100))
+		return NULL;
+
+	/* Check boundary. */
+	if (off + asize > used)
+		return NULL;
+
+	/* Check size of attribute. */
+	if (!attr->non_res) {
+		if (asize < SIZEOF_RESIDENT)
+			return NULL;
+
+		t16 = le16_to_cpu(attr->res.data_off);
+
+		if (t16 > asize)
+			return NULL;
+
+		t32 = le32_to_cpu(attr->res.data_size);
+		if (t16 + t32 > asize)
+			return NULL;
+
+		return attr;
+	}
+
+	/* Check some nonresident fields. */
+	if (attr->name_len &&
+	    le16_to_cpu(attr->name_off) + sizeof(short) * attr->name_len >
+		    le16_to_cpu(attr->nres.run_off)) {
+		return NULL;
+	}
+
+	if (attr->nres.svcn || !is_attr_ext(attr)) {
+		if (asize + 8 < SIZEOF_NONRESIDENT)
+			return NULL;
+
+		if (attr->nres.c_unit)
+			return NULL;
+	} else if (asize + 8 < SIZEOF_NONRESIDENT_EX)
+		return NULL;
+
+	return attr;
+}
+
+/*
+ * mi_find_attr - Find the attribute by type and name and id.
+ */
+struct ATTRIB *mi_find_attr(struct mft_inode *mi, struct ATTRIB *attr,
+			    enum ATTR_TYPE type, const __le16 *name,
+			    size_t name_len, const __le16 *id)
+{
+	u32 type_in = le32_to_cpu(type);
+	u32 atype;
+
+next_attr:
+	attr = mi_enum_attr(mi, attr);
+	if (!attr)
+		return NULL;
+
+	atype = le32_to_cpu(attr->type);
+	if (atype > type_in)
+		return NULL;
+
+	if (atype < type_in)
+		goto next_attr;
+
+	if (attr->name_len != name_len)
+		goto next_attr;
+
+	if (name_len && memcmp(attr_name(attr), name, name_len * sizeof(short)))
+		goto next_attr;
+
+	if (id && *id != attr->id)
+		goto next_attr;
+
+	return attr;
+}
+
+int mi_write(struct mft_inode *mi, int wait)
+{
+	struct MFT_REC *rec;
+	int err;
+	struct ntfs_sb_info *sbi;
+
+	if (!mi->dirty)
+		return 0;
+
+	sbi = mi->sbi;
+	rec = mi->mrec;
+
+	err = ntfs_write_bh(sbi, &rec->rhdr, &mi->nb, wait);
+	if (err)
+		return err;
+
+	if (mi->rno < sbi->mft.recs_mirr)
+		sbi->flags |= NTFS_FLAGS_MFTMIRR;
+
+	mi->dirty = false;
+
+	return 0;
+}
+
+int mi_format_new(struct mft_inode *mi, struct ntfs_sb_info *sbi, CLST rno,
+		  __le16 flags, bool is_mft)
+{
+	int err;
+	u16 seq = 1;
+	struct MFT_REC *rec;
+	u64 vbo = (u64)rno << sbi->record_bits;
+
+	err = mi_init(mi, sbi, rno);
+	if (err)
+		return err;
+
+	rec = mi->mrec;
+
+	if (rno == MFT_REC_MFT) {
+		;
+	} else if (rno < MFT_REC_FREE) {
+		seq = rno;
+	} else if (rno >= sbi->mft.used) {
+		;
+	} else if (mi_read(mi, is_mft)) {
+		;
+	} else if (rec->rhdr.sign == NTFS_FILE_SIGNATURE) {
+		/* Record is reused. Update its sequence number. */
+		seq = le16_to_cpu(rec->seq) + 1;
+		if (!seq)
+			seq = 1;
+	}
+
+	memcpy(rec, sbi->new_rec, sbi->record_size);
+
+	rec->seq = cpu_to_le16(seq);
+	rec->flags = RECORD_FLAG_IN_USE | flags;
+
+	mi->dirty = true;
+
+	if (!mi->nb.nbufs) {
+		struct ntfs_inode *ni = sbi->mft.ni;
+		bool lock = false;
+
+		if (is_mounted(sbi) && !is_mft) {
+			down_read(&ni->file.run_lock);
+			lock = true;
+		}
+
+		err = ntfs_get_bh(sbi, &ni->file.run, vbo, sbi->record_size,
+				  &mi->nb);
+		if (lock)
+			up_read(&ni->file.run_lock);
+	}
+
+	return err;
+}
+
+/*
+ * mi_mark_free - Mark record as unused and marks it as free in bitmap.
+ */
+void mi_mark_free(struct mft_inode *mi)
+{
+	CLST rno = mi->rno;
+	struct ntfs_sb_info *sbi = mi->sbi;
+
+	if (rno >= MFT_REC_RESERVED && rno < MFT_REC_FREE) {
+		ntfs_clear_mft_tail(sbi, rno, rno + 1);
+		mi->dirty = false;
+		return;
+	}
+
+	if (mi->mrec) {
+		clear_rec_inuse(mi->mrec);
+		mi->dirty = true;
+		mi_write(mi, 0);
+	}
+	ntfs_mark_rec_free(sbi, rno);
+}
+
+/*
+ * mi_insert_attr - Reserve space for new attribute.
+ *
+ * Return: Not full constructed attribute or NULL if not possible to create.
+ */
+struct ATTRIB *mi_insert_attr(struct mft_inode *mi, enum ATTR_TYPE type,
+			      const __le16 *name, u8 name_len, u32 asize,
+			      u16 name_off)
+{
+	size_t tail;
+	struct ATTRIB *attr;
+	__le16 id;
+	struct MFT_REC *rec = mi->mrec;
+	struct ntfs_sb_info *sbi = mi->sbi;
+	u32 used = le32_to_cpu(rec->used);
+	const u16 *upcase = sbi->upcase;
+	int diff;
+
+	/* Can we insert mi attribute? */
+	if (used + asize > mi->sbi->record_size)
+		return NULL;
+
+	/*
+	 * Scan through the list of attributes to find the point
+	 * at which we should insert it.
+	 */
+	attr = NULL;
+	while ((attr = mi_enum_attr(mi, attr))) {
+		diff = compare_attr(attr, type, name, name_len, upcase);
+		if (diff > 0)
+			break;
+		if (diff < 0)
+			continue;
+
+		if (!is_attr_indexed(attr))
+			return NULL;
+		break;
+	}
+
+	if (!attr) {
+		tail = 8; /* Not used, just to suppress warning. */
+		attr = Add2Ptr(rec, used - 8);
+	} else {
+		tail = used - PtrOffset(rec, attr);
+	}
+
+	id = mi_new_attt_id(mi);
+
+	memmove(Add2Ptr(attr, asize), attr, tail);
+	memset(attr, 0, asize);
+
+	attr->type = type;
+	attr->size = cpu_to_le32(asize);
+	attr->name_len = name_len;
+	attr->name_off = cpu_to_le16(name_off);
+	attr->id = id;
+
+	memmove(Add2Ptr(attr, name_off), name, name_len * sizeof(short));
+	rec->used = cpu_to_le32(used + asize);
+
+	mi->dirty = true;
+
+	return attr;
+}
+
+/*
+ * mi_remove_attr - Remove the attribute from record.
+ *
+ * NOTE: The source attr will point to next attribute.
+ */
+bool mi_remove_attr(struct ntfs_inode *ni, struct mft_inode *mi,
+		    struct ATTRIB *attr)
+{
+	struct MFT_REC *rec = mi->mrec;
+	u32 aoff = PtrOffset(rec, attr);
+	u32 used = le32_to_cpu(rec->used);
+	u32 asize = le32_to_cpu(attr->size);
+
+	if (aoff + asize > used)
+		return false;
+
+	if (ni && is_attr_indexed(attr)) {
+		le16_add_cpu(&ni->mi.mrec->hard_links, -1);
+		ni->mi.dirty = true;
+	}
+
+	used -= asize;
+	memmove(attr, Add2Ptr(attr, asize), used - aoff);
+	rec->used = cpu_to_le32(used);
+	mi->dirty = true;
+
+	return true;
+}
+
+/* bytes = "new attribute size" - "old attribute size" */
+bool mi_resize_attr(struct mft_inode *mi, struct ATTRIB *attr, int bytes)
+{
+	struct MFT_REC *rec = mi->mrec;
+	u32 aoff = PtrOffset(rec, attr);
+	u32 total, used = le32_to_cpu(rec->used);
+	u32 nsize, asize = le32_to_cpu(attr->size);
+	u32 rsize = le32_to_cpu(attr->res.data_size);
+	int tail = (int)(used - aoff - asize);
+	int dsize;
+	char *next;
+
+	if (tail < 0 || aoff >= used)
+		return false;
+
+	if (!bytes)
+		return true;
+
+	total = le32_to_cpu(rec->total);
+	next = Add2Ptr(attr, asize);
+
+	if (bytes > 0) {
+		dsize = ALIGN(bytes, 8);
+		if (used + dsize > total)
+			return false;
+		nsize = asize + dsize;
+		/* Move tail */
+		memmove(next + dsize, next, tail);
+		memset(next, 0, dsize);
+		used += dsize;
+		rsize += dsize;
+	} else {
+		dsize = ALIGN(-bytes, 8);
+		if (dsize > asize)
+			return false;
+		nsize = asize - dsize;
+		memmove(next - dsize, next, tail);
+		used -= dsize;
+		rsize -= dsize;
+	}
+
+	rec->used = cpu_to_le32(used);
+	attr->size = cpu_to_le32(nsize);
+	if (!attr->non_res)
+		attr->res.data_size = cpu_to_le32(rsize);
+	mi->dirty = true;
+
+	return true;
+}
+
+int mi_pack_runs(struct mft_inode *mi, struct ATTRIB *attr,
+		 struct runs_tree *run, CLST len)
+{
+	int err = 0;
+	struct ntfs_sb_info *sbi = mi->sbi;
+	u32 new_run_size;
+	CLST plen;
+	struct MFT_REC *rec = mi->mrec;
+	CLST svcn = le64_to_cpu(attr->nres.svcn);
+	u32 used = le32_to_cpu(rec->used);
+	u32 aoff = PtrOffset(rec, attr);
+	u32 asize = le32_to_cpu(attr->size);
+	char *next = Add2Ptr(attr, asize);
+	u16 run_off = le16_to_cpu(attr->nres.run_off);
+	u32 run_size = asize - run_off;
+	u32 tail = used - aoff - asize;
+	u32 dsize = sbi->record_size - used;
+
+	/* Make a maximum gap in current record. */
+	memmove(next + dsize, next, tail);
+
+	/* Pack as much as possible. */
+	err = run_pack(run, svcn, len, Add2Ptr(attr, run_off), run_size + dsize,
+		       &plen);
+	if (err < 0) {
+		memmove(next, next + dsize, tail);
+		return err;
+	}
+
+	new_run_size = ALIGN(err, 8);
+
+	memmove(next + new_run_size - run_size, next + dsize, tail);
+
+	attr->size = cpu_to_le32(asize + new_run_size - run_size);
+	attr->nres.evcn = cpu_to_le64(svcn + plen - 1);
+	rec->used = cpu_to_le32(used + new_run_size - run_size);
+	mi->dirty = true;
+
+	return 0;
+}
diff --git a/fs/ntfs3/run.c b/fs/ntfs3/run.c
new file mode 100644
index 000000000000..26ed2b64345e
--- /dev/null
+++ b/fs/ntfs3/run.c
@@ -0,0 +1,1113 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ *
+ * Copyright (C) 2019-2021 Paragon Software GmbH, All rights reserved.
+ *
+ * TODO: try to use extents tree (instead of array)
+ */
+
+#include <linux/blkdev.h>
+#include <linux/buffer_head.h>
+#include <linux/fs.h>
+#include <linux/log2.h>
+#include <linux/nls.h>
+
+#include "debug.h"
+#include "ntfs.h"
+#include "ntfs_fs.h"
+
+/* runs_tree is a continues memory. Try to avoid big size. */
+#define NTFS3_RUN_MAX_BYTES 0x10000
+
+struct ntfs_run {
+	CLST vcn; /* Virtual cluster number. */
+	CLST len; /* Length in clusters. */
+	CLST lcn; /* Logical cluster number. */
+};
+
+/*
+ * run_lookup - Lookup the index of a MCB entry that is first <= vcn.
+ *
+ * Case of success it will return non-zero value and set
+ * @index parameter to index of entry been found.
+ * Case of entry missing from list 'index' will be set to
+ * point to insertion position for the entry question.
+ */
+bool run_lookup(const struct runs_tree *run, CLST vcn, size_t *index)
+{
+	size_t min_idx, max_idx, mid_idx;
+	struct ntfs_run *r;
+
+	if (!run->count) {
+		*index = 0;
+		return false;
+	}
+
+	min_idx = 0;
+	max_idx = run->count - 1;
+
+	/* Check boundary cases specially, 'cause they cover the often requests. */
+	r = run->runs;
+	if (vcn < r->vcn) {
+		*index = 0;
+		return false;
+	}
+
+	if (vcn < r->vcn + r->len) {
+		*index = 0;
+		return true;
+	}
+
+	r += max_idx;
+	if (vcn >= r->vcn + r->len) {
+		*index = run->count;
+		return false;
+	}
+
+	if (vcn >= r->vcn) {
+		*index = max_idx;
+		return true;
+	}
+
+	do {
+		mid_idx = min_idx + ((max_idx - min_idx) >> 1);
+		r = run->runs + mid_idx;
+
+		if (vcn < r->vcn) {
+			max_idx = mid_idx - 1;
+			if (!mid_idx)
+				break;
+		} else if (vcn >= r->vcn + r->len) {
+			min_idx = mid_idx + 1;
+		} else {
+			*index = mid_idx;
+			return true;
+		}
+	} while (min_idx <= max_idx);
+
+	*index = max_idx + 1;
+	return false;
+}
+
+/*
+ * run_consolidate - Consolidate runs starting from a given one.
+ */
+static void run_consolidate(struct runs_tree *run, size_t index)
+{
+	size_t i;
+	struct ntfs_run *r = run->runs + index;
+
+	while (index + 1 < run->count) {
+		/*
+		 * I should merge current run with next
+		 * if start of the next run lies inside one being tested.
+		 */
+		struct ntfs_run *n = r + 1;
+		CLST end = r->vcn + r->len;
+		CLST dl;
+
+		/* Stop if runs are not aligned one to another. */
+		if (n->vcn > end)
+			break;
+
+		dl = end - n->vcn;
+
+		/*
+		 * If range at index overlaps with next one
+		 * then I will either adjust it's start position
+		 * or (if completely matches) dust remove one from the list.
+		 */
+		if (dl > 0) {
+			if (n->len <= dl)
+				goto remove_next_range;
+
+			n->len -= dl;
+			n->vcn += dl;
+			if (n->lcn != SPARSE_LCN)
+				n->lcn += dl;
+			dl = 0;
+		}
+
+		/*
+		 * Stop if sparse mode does not match
+		 * both current and next runs.
+		 */
+		if ((n->lcn == SPARSE_LCN) != (r->lcn == SPARSE_LCN)) {
+			index += 1;
+			r = n;
+			continue;
+		}
+
+		/*
+		 * Check if volume block
+		 * of a next run lcn does not match
+		 * last volume block of the current run.
+		 */
+		if (n->lcn != SPARSE_LCN && n->lcn != r->lcn + r->len)
+			break;
+
+		/*
+		 * Next and current are siblings.
+		 * Eat/join.
+		 */
+		r->len += n->len - dl;
+
+remove_next_range:
+		i = run->count - (index + 1);
+		if (i > 1)
+			memmove(n, n + 1, sizeof(*n) * (i - 1));
+
+		run->count -= 1;
+	}
+}
+
+/*
+ * run_is_mapped_full
+ *
+ * Return: True if range [svcn - evcn] is mapped.
+ */
+bool run_is_mapped_full(const struct runs_tree *run, CLST svcn, CLST evcn)
+{
+	size_t i;
+	const struct ntfs_run *r, *end;
+	CLST next_vcn;
+
+	if (!run_lookup(run, svcn, &i))
+		return false;
+
+	end = run->runs + run->count;
+	r = run->runs + i;
+
+	for (;;) {
+		next_vcn = r->vcn + r->len;
+		if (next_vcn > evcn)
+			return true;
+
+		if (++r >= end)
+			return false;
+
+		if (r->vcn != next_vcn)
+			return false;
+	}
+}
+
+bool run_lookup_entry(const struct runs_tree *run, CLST vcn, CLST *lcn,
+		      CLST *len, size_t *index)
+{
+	size_t idx;
+	CLST gap;
+	struct ntfs_run *r;
+
+	/* Fail immediately if nrun was not touched yet. */
+	if (!run->runs)
+		return false;
+
+	if (!run_lookup(run, vcn, &idx))
+		return false;
+
+	r = run->runs + idx;
+
+	if (vcn >= r->vcn + r->len)
+		return false;
+
+	gap = vcn - r->vcn;
+	if (r->len <= gap)
+		return false;
+
+	*lcn = r->lcn == SPARSE_LCN ? SPARSE_LCN : (r->lcn + gap);
+
+	if (len)
+		*len = r->len - gap;
+	if (index)
+		*index = idx;
+
+	return true;
+}
+
+/*
+ * run_truncate_head - Decommit the range before vcn.
+ */
+void run_truncate_head(struct runs_tree *run, CLST vcn)
+{
+	size_t index;
+	struct ntfs_run *r;
+
+	if (run_lookup(run, vcn, &index)) {
+		r = run->runs + index;
+
+		if (vcn > r->vcn) {
+			CLST dlen = vcn - r->vcn;
+
+			r->vcn = vcn;
+			r->len -= dlen;
+			if (r->lcn != SPARSE_LCN)
+				r->lcn += dlen;
+		}
+
+		if (!index)
+			return;
+	}
+	r = run->runs;
+	memmove(r, r + index, sizeof(*r) * (run->count - index));
+
+	run->count -= index;
+
+	if (!run->count) {
+		kvfree(run->runs);
+		run->runs = NULL;
+		run->allocated = 0;
+	}
+}
+
+/*
+ * run_truncate - Decommit the range after vcn.
+ */
+void run_truncate(struct runs_tree *run, CLST vcn)
+{
+	size_t index;
+
+	/*
+	 * If I hit the range then
+	 * I have to truncate one.
+	 * If range to be truncated is becoming empty
+	 * then it will entirely be removed.
+	 */
+	if (run_lookup(run, vcn, &index)) {
+		struct ntfs_run *r = run->runs + index;
+
+		r->len = vcn - r->vcn;
+
+		if (r->len > 0)
+			index += 1;
+	}
+
+	/*
+	 * At this point 'index' is set to position that
+	 * should be thrown away (including index itself)
+	 * Simple one - just set the limit.
+	 */
+	run->count = index;
+
+	/* Do not reallocate array 'runs'. Only free if possible. */
+	if (!index) {
+		kvfree(run->runs);
+		run->runs = NULL;
+		run->allocated = 0;
+	}
+}
+
+/*
+ * run_truncate_around - Trim head and tail if necessary.
+ */
+void run_truncate_around(struct runs_tree *run, CLST vcn)
+{
+	run_truncate_head(run, vcn);
+
+	if (run->count >= NTFS3_RUN_MAX_BYTES / sizeof(struct ntfs_run) / 2)
+		run_truncate(run, (run->runs + (run->count >> 1))->vcn);
+}
+
+/*
+ * run_add_entry
+ *
+ * Sets location to known state.
+ * Run to be added may overlap with existing location.
+ *
+ * Return: false if of memory.
+ */
+bool run_add_entry(struct runs_tree *run, CLST vcn, CLST lcn, CLST len,
+		   bool is_mft)
+{
+	size_t used, index;
+	struct ntfs_run *r;
+	bool inrange;
+	CLST tail_vcn = 0, tail_len = 0, tail_lcn = 0;
+	bool should_add_tail = false;
+
+	/*
+	 * Lookup the insertion point.
+	 *
+	 * Execute bsearch for the entry containing
+	 * start position question.
+	 */
+	inrange = run_lookup(run, vcn, &index);
+
+	/*
+	 * Shortcut here would be case of
+	 * range not been found but one been added
+	 * continues previous run.
+	 * This case I can directly make use of
+	 * existing range as my start point.
+	 */
+	if (!inrange && index > 0) {
+		struct ntfs_run *t = run->runs + index - 1;
+
+		if (t->vcn + t->len == vcn &&
+		    (t->lcn == SPARSE_LCN) == (lcn == SPARSE_LCN) &&
+		    (lcn == SPARSE_LCN || lcn == t->lcn + t->len)) {
+			inrange = true;
+			index -= 1;
+		}
+	}
+
+	/*
+	 * At this point 'index' either points to the range
+	 * containing start position or to the insertion position
+	 * for a new range.
+	 * So first let's check if range I'm probing is here already.
+	 */
+	if (!inrange) {
+requires_new_range:
+		/*
+		 * Range was not found.
+		 * Insert at position 'index'
+		 */
+		used = run->count * sizeof(struct ntfs_run);
+
+		/*
+		 * Check allocated space.
+		 * If one is not enough to get one more entry
+		 * then it will be reallocated.
+		 */
+		if (run->allocated < used + sizeof(struct ntfs_run)) {
+			size_t bytes;
+			struct ntfs_run *new_ptr;
+
+			/* Use power of 2 for 'bytes'. */
+			if (!used) {
+				bytes = 64;
+			} else if (used <= 16 * PAGE_SIZE) {
+				if (is_power_of_2(run->allocated))
+					bytes = run->allocated << 1;
+				else
+					bytes = (size_t)1
+						<< (2 + blksize_bits(used));
+			} else {
+				bytes = run->allocated + (16 * PAGE_SIZE);
+			}
+
+			WARN_ON(!is_mft && bytes > NTFS3_RUN_MAX_BYTES);
+
+			new_ptr = kvmalloc(bytes, GFP_KERNEL);
+
+			if (!new_ptr)
+				return false;
+
+			r = new_ptr + index;
+			memcpy(new_ptr, run->runs,
+			       index * sizeof(struct ntfs_run));
+			memcpy(r + 1, run->runs + index,
+			       sizeof(struct ntfs_run) * (run->count - index));
+
+			kvfree(run->runs);
+			run->runs = new_ptr;
+			run->allocated = bytes;
+
+		} else {
+			size_t i = run->count - index;
+
+			r = run->runs + index;
+
+			/* memmove appears to be a bottle neck here... */
+			if (i > 0)
+				memmove(r + 1, r, sizeof(struct ntfs_run) * i);
+		}
+
+		r->vcn = vcn;
+		r->lcn = lcn;
+		r->len = len;
+		run->count += 1;
+	} else {
+		r = run->runs + index;
+
+		/*
+		 * If one of ranges was not allocated then we
+		 * have to split location we just matched and
+		 * insert current one.
+		 * A common case this requires tail to be reinserted
+		 * a recursive call.
+		 */
+		if (((lcn == SPARSE_LCN) != (r->lcn == SPARSE_LCN)) ||
+		    (lcn != SPARSE_LCN && lcn != r->lcn + (vcn - r->vcn))) {
+			CLST to_eat = vcn - r->vcn;
+			CLST Tovcn = to_eat + len;
+
+			should_add_tail = Tovcn < r->len;
+
+			if (should_add_tail) {
+				tail_lcn = r->lcn == SPARSE_LCN
+						   ? SPARSE_LCN
+						   : (r->lcn + Tovcn);
+				tail_vcn = r->vcn + Tovcn;
+				tail_len = r->len - Tovcn;
+			}
+
+			if (to_eat > 0) {
+				r->len = to_eat;
+				inrange = false;
+				index += 1;
+				goto requires_new_range;
+			}
+
+			/* lcn should match one were going to add. */
+			r->lcn = lcn;
+		}
+
+		/*
+		 * If existing range fits then were done.
+		 * Otherwise extend found one and fall back to range jocode.
+		 */
+		if (r->vcn + r->len < vcn + len)
+			r->len += len - ((r->vcn + r->len) - vcn);
+	}
+
+	/*
+	 * And normalize it starting from insertion point.
+	 * It's possible that no insertion needed case if
+	 * start point lies within the range of an entry
+	 * that 'index' points to.
+	 */
+	if (inrange && index > 0)
+		index -= 1;
+	run_consolidate(run, index);
+	run_consolidate(run, index + 1);
+
+	/*
+	 * A special case.
+	 * We have to add extra range a tail.
+	 */
+	if (should_add_tail &&
+	    !run_add_entry(run, tail_vcn, tail_lcn, tail_len, is_mft))
+		return false;
+
+	return true;
+}
+
+/* run_collapse_range
+ *
+ * Helper for attr_collapse_range(),
+ * which is helper for fallocate(collapse_range).
+ */
+bool run_collapse_range(struct runs_tree *run, CLST vcn, CLST len)
+{
+	size_t index, eat;
+	struct ntfs_run *r, *e, *eat_start, *eat_end;
+	CLST end;
+
+	if (WARN_ON(!run_lookup(run, vcn, &index)))
+		return true; /* Should never be here. */
+
+	e = run->runs + run->count;
+	r = run->runs + index;
+	end = vcn + len;
+
+	if (vcn > r->vcn) {
+		if (r->vcn + r->len <= end) {
+			/* Collapse tail of run .*/
+			r->len = vcn - r->vcn;
+		} else if (r->lcn == SPARSE_LCN) {
+			/* Collapse a middle part of sparsed run. */
+			r->len -= len;
+		} else {
+			/* Collapse a middle part of normal run, split. */
+			if (!run_add_entry(run, vcn, SPARSE_LCN, len, false))
+				return false;
+			return run_collapse_range(run, vcn, len);
+		}
+
+		r += 1;
+	}
+
+	eat_start = r;
+	eat_end = r;
+
+	for (; r < e; r++) {
+		CLST d;
+
+		if (r->vcn >= end) {
+			r->vcn -= len;
+			continue;
+		}
+
+		if (r->vcn + r->len <= end) {
+			/* Eat this run. */
+			eat_end = r + 1;
+			continue;
+		}
+
+		d = end - r->vcn;
+		if (r->lcn != SPARSE_LCN)
+			r->lcn += d;
+		r->len -= d;
+		r->vcn -= len - d;
+	}
+
+	eat = eat_end - eat_start;
+	memmove(eat_start, eat_end, (e - eat_end) * sizeof(*r));
+	run->count -= eat;
+
+	return true;
+}
+
+/*
+ * run_get_entry - Return index-th mapped region.
+ */
+bool run_get_entry(const struct runs_tree *run, size_t index, CLST *vcn,
+		   CLST *lcn, CLST *len)
+{
+	const struct ntfs_run *r;
+
+	if (index >= run->count)
+		return false;
+
+	r = run->runs + index;
+
+	if (!r->len)
+		return false;
+
+	if (vcn)
+		*vcn = r->vcn;
+	if (lcn)
+		*lcn = r->lcn;
+	if (len)
+		*len = r->len;
+	return true;
+}
+
+/*
+ * run_packed_size - Calculate the size of packed int64.
+ */
+#ifdef __BIG_ENDIAN
+static inline int run_packed_size(const s64 n)
+{
+	const u8 *p = (const u8 *)&n + sizeof(n) - 1;
+
+	if (n >= 0) {
+		if (p[-7] || p[-6] || p[-5] || p[-4])
+			p -= 4;
+		if (p[-3] || p[-2])
+			p -= 2;
+		if (p[-1])
+			p -= 1;
+		if (p[0] & 0x80)
+			p -= 1;
+	} else {
+		if (p[-7] != 0xff || p[-6] != 0xff || p[-5] != 0xff ||
+		    p[-4] != 0xff)
+			p -= 4;
+		if (p[-3] != 0xff || p[-2] != 0xff)
+			p -= 2;
+		if (p[-1] != 0xff)
+			p -= 1;
+		if (!(p[0] & 0x80))
+			p -= 1;
+	}
+	return (const u8 *)&n + sizeof(n) - p;
+}
+
+/* Full trusted function. It does not check 'size' for errors. */
+static inline void run_pack_s64(u8 *run_buf, u8 size, s64 v)
+{
+	const u8 *p = (u8 *)&v;
+
+	switch (size) {
+	case 8:
+		run_buf[7] = p[0];
+		fallthrough;
+	case 7:
+		run_buf[6] = p[1];
+		fallthrough;
+	case 6:
+		run_buf[5] = p[2];
+		fallthrough;
+	case 5:
+		run_buf[4] = p[3];
+		fallthrough;
+	case 4:
+		run_buf[3] = p[4];
+		fallthrough;
+	case 3:
+		run_buf[2] = p[5];
+		fallthrough;
+	case 2:
+		run_buf[1] = p[6];
+		fallthrough;
+	case 1:
+		run_buf[0] = p[7];
+	}
+}
+
+/* Full trusted function. It does not check 'size' for errors. */
+static inline s64 run_unpack_s64(const u8 *run_buf, u8 size, s64 v)
+{
+	u8 *p = (u8 *)&v;
+
+	switch (size) {
+	case 8:
+		p[0] = run_buf[7];
+		fallthrough;
+	case 7:
+		p[1] = run_buf[6];
+		fallthrough;
+	case 6:
+		p[2] = run_buf[5];
+		fallthrough;
+	case 5:
+		p[3] = run_buf[4];
+		fallthrough;
+	case 4:
+		p[4] = run_buf[3];
+		fallthrough;
+	case 3:
+		p[5] = run_buf[2];
+		fallthrough;
+	case 2:
+		p[6] = run_buf[1];
+		fallthrough;
+	case 1:
+		p[7] = run_buf[0];
+	}
+	return v;
+}
+
+#else
+
+static inline int run_packed_size(const s64 n)
+{
+	const u8 *p = (const u8 *)&n;
+
+	if (n >= 0) {
+		if (p[7] || p[6] || p[5] || p[4])
+			p += 4;
+		if (p[3] || p[2])
+			p += 2;
+		if (p[1])
+			p += 1;
+		if (p[0] & 0x80)
+			p += 1;
+	} else {
+		if (p[7] != 0xff || p[6] != 0xff || p[5] != 0xff ||
+		    p[4] != 0xff)
+			p += 4;
+		if (p[3] != 0xff || p[2] != 0xff)
+			p += 2;
+		if (p[1] != 0xff)
+			p += 1;
+		if (!(p[0] & 0x80))
+			p += 1;
+	}
+
+	return 1 + p - (const u8 *)&n;
+}
+
+/* Full trusted function. It does not check 'size' for errors. */
+static inline void run_pack_s64(u8 *run_buf, u8 size, s64 v)
+{
+	const u8 *p = (u8 *)&v;
+
+	/* memcpy( run_buf, &v, size); Is it faster? */
+	switch (size) {
+	case 8:
+		run_buf[7] = p[7];
+		fallthrough;
+	case 7:
+		run_buf[6] = p[6];
+		fallthrough;
+	case 6:
+		run_buf[5] = p[5];
+		fallthrough;
+	case 5:
+		run_buf[4] = p[4];
+		fallthrough;
+	case 4:
+		run_buf[3] = p[3];
+		fallthrough;
+	case 3:
+		run_buf[2] = p[2];
+		fallthrough;
+	case 2:
+		run_buf[1] = p[1];
+		fallthrough;
+	case 1:
+		run_buf[0] = p[0];
+	}
+}
+
+/* full trusted function. It does not check 'size' for errors */
+static inline s64 run_unpack_s64(const u8 *run_buf, u8 size, s64 v)
+{
+	u8 *p = (u8 *)&v;
+
+	/* memcpy( &v, run_buf, size); Is it faster? */
+	switch (size) {
+	case 8:
+		p[7] = run_buf[7];
+		fallthrough;
+	case 7:
+		p[6] = run_buf[6];
+		fallthrough;
+	case 6:
+		p[5] = run_buf[5];
+		fallthrough;
+	case 5:
+		p[4] = run_buf[4];
+		fallthrough;
+	case 4:
+		p[3] = run_buf[3];
+		fallthrough;
+	case 3:
+		p[2] = run_buf[2];
+		fallthrough;
+	case 2:
+		p[1] = run_buf[1];
+		fallthrough;
+	case 1:
+		p[0] = run_buf[0];
+	}
+	return v;
+}
+#endif
+
+/*
+ * run_pack - Pack runs into buffer.
+ *
+ * packed_vcns - How much runs we have packed.
+ * packed_size - How much bytes we have used run_buf.
+ */
+int run_pack(const struct runs_tree *run, CLST svcn, CLST len, u8 *run_buf,
+	     u32 run_buf_size, CLST *packed_vcns)
+{
+	CLST next_vcn, vcn, lcn;
+	CLST prev_lcn = 0;
+	CLST evcn1 = svcn + len;
+	int packed_size = 0;
+	size_t i;
+	bool ok;
+	s64 dlcn;
+	int offset_size, size_size, tmp;
+
+	next_vcn = vcn = svcn;
+
+	*packed_vcns = 0;
+
+	if (!len)
+		goto out;
+
+	ok = run_lookup_entry(run, vcn, &lcn, &len, &i);
+
+	if (!ok)
+		goto error;
+
+	if (next_vcn != vcn)
+		goto error;
+
+	for (;;) {
+		next_vcn = vcn + len;
+		if (next_vcn > evcn1)
+			len = evcn1 - vcn;
+
+		/* How much bytes required to pack len. */
+		size_size = run_packed_size(len);
+
+		/* offset_size - How much bytes is packed dlcn. */
+		if (lcn == SPARSE_LCN) {
+			offset_size = 0;
+			dlcn = 0;
+		} else {
+			/* NOTE: lcn can be less than prev_lcn! */
+			dlcn = (s64)lcn - prev_lcn;
+			offset_size = run_packed_size(dlcn);
+			prev_lcn = lcn;
+		}
+
+		tmp = run_buf_size - packed_size - 2 - offset_size;
+		if (tmp <= 0)
+			goto out;
+
+		/* Can we store this entire run. */
+		if (tmp < size_size)
+			goto out;
+
+		if (run_buf) {
+			/* Pack run header. */
+			run_buf[0] = ((u8)(size_size | (offset_size << 4)));
+			run_buf += 1;
+
+			/* Pack the length of run. */
+			run_pack_s64(run_buf, size_size, len);
+
+			run_buf += size_size;
+			/* Pack the offset from previous LCN. */
+			run_pack_s64(run_buf, offset_size, dlcn);
+			run_buf += offset_size;
+		}
+
+		packed_size += 1 + offset_size + size_size;
+		*packed_vcns += len;
+
+		if (packed_size + 1 >= run_buf_size || next_vcn >= evcn1)
+			goto out;
+
+		ok = run_get_entry(run, ++i, &vcn, &lcn, &len);
+		if (!ok)
+			goto error;
+
+		if (next_vcn != vcn)
+			goto error;
+	}
+
+out:
+	/* Store last zero. */
+	if (run_buf)
+		run_buf[0] = 0;
+
+	return packed_size + 1;
+
+error:
+	return -EOPNOTSUPP;
+}
+
+/*
+ * run_unpack - Unpack packed runs from @run_buf.
+ *
+ * Return: Error if negative, or real used bytes.
+ */
+int run_unpack(struct runs_tree *run, struct ntfs_sb_info *sbi, CLST ino,
+	       CLST svcn, CLST evcn, CLST vcn, const u8 *run_buf,
+	       u32 run_buf_size)
+{
+	u64 prev_lcn, vcn64, lcn, next_vcn;
+	const u8 *run_last, *run_0;
+	bool is_mft = ino == MFT_REC_MFT;
+
+	/* Check for empty. */
+	if (evcn + 1 == svcn)
+		return 0;
+
+	if (evcn < svcn)
+		return -EINVAL;
+
+	run_0 = run_buf;
+	run_last = run_buf + run_buf_size;
+	prev_lcn = 0;
+	vcn64 = svcn;
+
+	/* Read all runs the chain. */
+	/* size_size - How much bytes is packed len. */
+	while (run_buf < run_last) {
+		/* size_size - How much bytes is packed len. */
+		u8 size_size = *run_buf & 0xF;
+		/* offset_size - How much bytes is packed dlcn. */
+		u8 offset_size = *run_buf++ >> 4;
+		u64 len;
+
+		if (!size_size)
+			break;
+
+		/*
+		 * Unpack runs.
+		 * NOTE: Runs are stored little endian order
+		 * "len" is unsigned value, "dlcn" is signed.
+		 * Large positive number requires to store 5 bytes
+		 * e.g.: 05 FF 7E FF FF 00 00 00
+		 */
+		if (size_size > 8)
+			return -EINVAL;
+
+		len = run_unpack_s64(run_buf, size_size, 0);
+		/* Skip size_size. */
+		run_buf += size_size;
+
+		if (!len)
+			return -EINVAL;
+
+		if (!offset_size)
+			lcn = SPARSE_LCN64;
+		else if (offset_size <= 8) {
+			s64 dlcn;
+
+			/* Initial value of dlcn is -1 or 0. */
+			dlcn = (run_buf[offset_size - 1] & 0x80) ? (s64)-1 : 0;
+			dlcn = run_unpack_s64(run_buf, offset_size, dlcn);
+			/* Skip offset_size. */
+			run_buf += offset_size;
+
+			if (!dlcn)
+				return -EINVAL;
+			lcn = prev_lcn + dlcn;
+			prev_lcn = lcn;
+		} else
+			return -EINVAL;
+
+		next_vcn = vcn64 + len;
+		/* Check boundary. */
+		if (next_vcn > evcn + 1)
+			return -EINVAL;
+
+#ifndef CONFIG_NTFS3_64BIT_CLUSTER
+		if (next_vcn > 0x100000000ull || (lcn + len) > 0x100000000ull) {
+			ntfs_err(
+				sbi->sb,
+				"This driver is compiled without CONFIG_NTFS3_64BIT_CLUSTER (like windows driver).\n"
+				"Volume contains 64 bits run: vcn %llx, lcn %llx, len %llx.\n"
+				"Activate CONFIG_NTFS3_64BIT_CLUSTER to process this case",
+				vcn64, lcn, len);
+			return -EOPNOTSUPP;
+		}
+#endif
+		if (lcn != SPARSE_LCN64 && lcn + len > sbi->used.bitmap.nbits) {
+			/* LCN range is out of volume. */
+			return -EINVAL;
+		}
+
+		if (!run)
+			; /* Called from check_attr(fslog.c) to check run. */
+		else if (run == RUN_DEALLOCATE) {
+			/*
+			 * Called from ni_delete_all to free clusters
+			 * without storing in run.
+			 */
+			if (lcn != SPARSE_LCN64)
+				mark_as_free_ex(sbi, lcn, len, true);
+		} else if (vcn64 >= vcn) {
+			if (!run_add_entry(run, vcn64, lcn, len, is_mft))
+				return -ENOMEM;
+		} else if (next_vcn > vcn) {
+			u64 dlen = vcn - vcn64;
+
+			if (!run_add_entry(run, vcn, lcn + dlen, len - dlen,
+					   is_mft))
+				return -ENOMEM;
+		}
+
+		vcn64 = next_vcn;
+	}
+
+	if (vcn64 != evcn + 1) {
+		/* Not expected length of unpacked runs. */
+		return -EINVAL;
+	}
+
+	return run_buf - run_0;
+}
+
+#ifdef NTFS3_CHECK_FREE_CLST
+/*
+ * run_unpack_ex - Unpack packed runs from "run_buf".
+ *
+ * Checks unpacked runs to be used in bitmap.
+ *
+ * Return: Error if negative, or real used bytes.
+ */
+int run_unpack_ex(struct runs_tree *run, struct ntfs_sb_info *sbi, CLST ino,
+		  CLST svcn, CLST evcn, CLST vcn, const u8 *run_buf,
+		  u32 run_buf_size)
+{
+	int ret, err;
+	CLST next_vcn, lcn, len;
+	size_t index;
+	bool ok;
+	struct wnd_bitmap *wnd;
+
+	ret = run_unpack(run, sbi, ino, svcn, evcn, vcn, run_buf, run_buf_size);
+	if (ret <= 0)
+		return ret;
+
+	if (!sbi->used.bitmap.sb || !run || run == RUN_DEALLOCATE)
+		return ret;
+
+	if (ino == MFT_REC_BADCLUST)
+		return ret;
+
+	next_vcn = vcn = svcn;
+	wnd = &sbi->used.bitmap;
+
+	for (ok = run_lookup_entry(run, vcn, &lcn, &len, &index);
+	     next_vcn <= evcn;
+	     ok = run_get_entry(run, ++index, &vcn, &lcn, &len)) {
+		if (!ok || next_vcn != vcn)
+			return -EINVAL;
+
+		next_vcn = vcn + len;
+
+		if (lcn == SPARSE_LCN)
+			continue;
+
+		if (sbi->flags & NTFS_FLAGS_NEED_REPLAY)
+			continue;
+
+		down_read_nested(&wnd->rw_lock, BITMAP_MUTEX_CLUSTERS);
+		/* Check for free blocks. */
+		ok = wnd_is_used(wnd, lcn, len);
+		up_read(&wnd->rw_lock);
+		if (ok)
+			continue;
+
+		/* Looks like volume is corrupted. */
+		ntfs_set_state(sbi, NTFS_DIRTY_ERROR);
+
+		if (down_write_trylock(&wnd->rw_lock)) {
+			/* Mark all zero bits as used in range [lcn, lcn+len). */
+			CLST i, lcn_f = 0, len_f = 0;
+
+			err = 0;
+			for (i = 0; i < len; i++) {
+				if (wnd_is_free(wnd, lcn + i, 1)) {
+					if (!len_f)
+						lcn_f = lcn + i;
+					len_f += 1;
+				} else if (len_f) {
+					err = wnd_set_used(wnd, lcn_f, len_f);
+					len_f = 0;
+					if (err)
+						break;
+				}
+			}
+
+			if (len_f)
+				err = wnd_set_used(wnd, lcn_f, len_f);
+
+			up_write(&wnd->rw_lock);
+			if (err)
+				return err;
+		}
+	}
+
+	return ret;
+}
+#endif
+
+/*
+ * run_get_highest_vcn
+ *
+ * Return the highest vcn from a mapping pairs array
+ * it used while replaying log file.
+ */
+int run_get_highest_vcn(CLST vcn, const u8 *run_buf, u64 *highest_vcn)
+{
+	u64 vcn64 = vcn;
+	u8 size_size;
+
+	while ((size_size = *run_buf & 0xF)) {
+		u8 offset_size = *run_buf++ >> 4;
+		u64 len;
+
+		if (size_size > 8 || offset_size > 8)
+			return -EINVAL;
+
+		len = run_unpack_s64(run_buf, size_size, 0);
+		if (!len)
+			return -EINVAL;
+
+		run_buf += size_size + offset_size;
+		vcn64 += len;
+
+#ifndef CONFIG_NTFS3_64BIT_CLUSTER
+		if (vcn64 > 0x100000000ull)
+			return -EINVAL;
+#endif
+	}
+
+	*highest_vcn = vcn64 - 1;
+	return 0;
+}
diff --git a/fs/ntfs3/super.c b/fs/ntfs3/super.c
new file mode 100644
index 000000000000..55bbc9200a10
--- /dev/null
+++ b/fs/ntfs3/super.c
@@ -0,0 +1,1512 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ *
+ * Copyright (C) 2019-2021 Paragon Software GmbH, All rights reserved.
+ *
+ *
+ *                 terminology
+ *
+ * cluster - allocation unit     - 512,1K,2K,4K,...,2M
+ * vcn - virtual cluster number  - Offset inside the file in clusters.
+ * vbo - virtual byte offset     - Offset inside the file in bytes.
+ * lcn - logical cluster number  - 0 based cluster in clusters heap.
+ * lbo - logical byte offset     - Absolute position inside volume.
+ * run - maps VCN to LCN         - Stored in attributes in packed form.
+ * attr - attribute segment      - std/name/data etc records inside MFT.
+ * mi  - MFT inode               - One MFT record(usually 1024 bytes or 4K), consists of attributes.
+ * ni  - NTFS inode              - Extends linux inode. consists of one or more mft inodes.
+ * index - unit inside directory - 2K, 4K, <=page size, does not depend on cluster size.
+ *
+ * WSL - Windows Subsystem for Linux
+ * https://docs.microsoft.com/en-us/windows/wsl/file-permissions
+ * It stores uid/gid/mode/dev in xattr
+ *
+ */
+
+#include <linux/backing-dev.h>
+#include <linux/blkdev.h>
+#include <linux/buffer_head.h>
+#include <linux/exportfs.h>
+#include <linux/fs.h>
+#include <linux/iversion.h>
+#include <linux/log2.h>
+#include <linux/module.h>
+#include <linux/nls.h>
+#include <linux/parser.h>
+#include <linux/seq_file.h>
+#include <linux/statfs.h>
+
+#include "debug.h"
+#include "ntfs.h"
+#include "ntfs_fs.h"
+#ifdef CONFIG_NTFS3_LZX_XPRESS
+#include "lib/lib.h"
+#endif
+
+#ifdef CONFIG_PRINTK
+/*
+ * ntfs_printk - Trace warnings/notices/errors.
+ *
+ * Thanks Joe Perches <joe@perches.com> for implementation
+ */
+void ntfs_printk(const struct super_block *sb, const char *fmt, ...)
+{
+	struct va_format vaf;
+	va_list args;
+	int level;
+	struct ntfs_sb_info *sbi = sb->s_fs_info;
+
+	/* Should we use different ratelimits for warnings/notices/errors? */
+	if (!___ratelimit(&sbi->msg_ratelimit, "ntfs3"))
+		return;
+
+	va_start(args, fmt);
+
+	level = printk_get_level(fmt);
+	vaf.fmt = printk_skip_level(fmt);
+	vaf.va = &args;
+	printk("%c%cntfs3: %s: %pV\n", KERN_SOH_ASCII, level, sb->s_id, &vaf);
+
+	va_end(args);
+}
+
+static char s_name_buf[512];
+static atomic_t s_name_buf_cnt = ATOMIC_INIT(1); // 1 means 'free s_name_buf'.
+
+/*
+ * ntfs_inode_printk
+ *
+ * Print warnings/notices/errors about inode using name or inode number.
+ */
+void ntfs_inode_printk(struct inode *inode, const char *fmt, ...)
+{
+	struct super_block *sb = inode->i_sb;
+	struct ntfs_sb_info *sbi = sb->s_fs_info;
+	char *name;
+	va_list args;
+	struct va_format vaf;
+	int level;
+
+	if (!___ratelimit(&sbi->msg_ratelimit, "ntfs3"))
+		return;
+
+	/* Use static allocated buffer, if possible. */
+	name = atomic_dec_and_test(&s_name_buf_cnt)
+		       ? s_name_buf
+		       : kmalloc(sizeof(s_name_buf), GFP_NOFS);
+
+	if (name) {
+		struct dentry *de = d_find_alias(inode);
+		const u32 name_len = ARRAY_SIZE(s_name_buf) - 1;
+
+		if (de) {
+			spin_lock(&de->d_lock);
+			snprintf(name, name_len, " \"%s\"", de->d_name.name);
+			spin_unlock(&de->d_lock);
+			name[name_len] = 0; /* To be sure. */
+		} else {
+			name[0] = 0;
+		}
+		dput(de); /* Cocci warns if placed in branch "if (de)" */
+	}
+
+	va_start(args, fmt);
+
+	level = printk_get_level(fmt);
+	vaf.fmt = printk_skip_level(fmt);
+	vaf.va = &args;
+
+	printk("%c%cntfs3: %s: ino=%lx,%s %pV\n", KERN_SOH_ASCII, level,
+	       sb->s_id, inode->i_ino, name ? name : "", &vaf);
+
+	va_end(args);
+
+	atomic_inc(&s_name_buf_cnt);
+	if (name != s_name_buf)
+		kfree(name);
+}
+#endif
+
+/*
+ * Shared memory struct.
+ *
+ * On-disk ntfs's upcase table is created by ntfs formatter.
+ * 'upcase' table is 128K bytes of memory.
+ * We should read it into memory when mounting.
+ * Several ntfs volumes likely use the same 'upcase' table.
+ * It is good idea to share in-memory 'upcase' table between different volumes.
+ * Unfortunately winxp/vista/win7 use different upcase tables.
+ */
+static DEFINE_SPINLOCK(s_shared_lock);
+
+static struct {
+	void *ptr;
+	u32 len;
+	int cnt;
+} s_shared[8];
+
+/*
+ * ntfs_set_shared
+ *
+ * Return:
+ * * @ptr - If pointer was saved in shared memory.
+ * * NULL - If pointer was not shared.
+ */
+void *ntfs_set_shared(void *ptr, u32 bytes)
+{
+	void *ret = NULL;
+	int i, j = -1;
+
+	spin_lock(&s_shared_lock);
+	for (i = 0; i < ARRAY_SIZE(s_shared); i++) {
+		if (!s_shared[i].cnt) {
+			j = i;
+		} else if (bytes == s_shared[i].len &&
+			   !memcmp(s_shared[i].ptr, ptr, bytes)) {
+			s_shared[i].cnt += 1;
+			ret = s_shared[i].ptr;
+			break;
+		}
+	}
+
+	if (!ret && j != -1) {
+		s_shared[j].ptr = ptr;
+		s_shared[j].len = bytes;
+		s_shared[j].cnt = 1;
+		ret = ptr;
+	}
+	spin_unlock(&s_shared_lock);
+
+	return ret;
+}
+
+/*
+ * ntfs_put_shared
+ *
+ * Return:
+ * * @ptr - If pointer is not shared anymore.
+ * * NULL - If pointer is still shared.
+ */
+void *ntfs_put_shared(void *ptr)
+{
+	void *ret = ptr;
+	int i;
+
+	spin_lock(&s_shared_lock);
+	for (i = 0; i < ARRAY_SIZE(s_shared); i++) {
+		if (s_shared[i].cnt && s_shared[i].ptr == ptr) {
+			if (--s_shared[i].cnt)
+				ret = NULL;
+			break;
+		}
+	}
+	spin_unlock(&s_shared_lock);
+
+	return ret;
+}
+
+static inline void clear_mount_options(struct ntfs_mount_options *options)
+{
+	unload_nls(options->nls);
+}
+
+enum Opt {
+	Opt_uid,
+	Opt_gid,
+	Opt_umask,
+	Opt_dmask,
+	Opt_fmask,
+	Opt_immutable,
+	Opt_discard,
+	Opt_force,
+	Opt_sparse,
+	Opt_nohidden,
+	Opt_showmeta,
+	Opt_acl,
+	Opt_noatime,
+	Opt_nls,
+	Opt_prealloc,
+	Opt_no_acs_rules,
+	Opt_err,
+};
+
+static const match_table_t ntfs_tokens = {
+	{ Opt_uid, "uid=%u" },
+	{ Opt_gid, "gid=%u" },
+	{ Opt_umask, "umask=%o" },
+	{ Opt_dmask, "dmask=%o" },
+	{ Opt_fmask, "fmask=%o" },
+	{ Opt_immutable, "sys_immutable" },
+	{ Opt_discard, "discard" },
+	{ Opt_force, "force" },
+	{ Opt_sparse, "sparse" },
+	{ Opt_nohidden, "nohidden" },
+	{ Opt_acl, "acl" },
+	{ Opt_noatime, "noatime" },
+	{ Opt_showmeta, "showmeta" },
+	{ Opt_nls, "nls=%s" },
+	{ Opt_prealloc, "prealloc" },
+	{ Opt_no_acs_rules, "no_acs_rules" },
+	{ Opt_err, NULL },
+};
+
+static noinline int ntfs_parse_options(struct super_block *sb, char *options,
+				       int silent,
+				       struct ntfs_mount_options *opts)
+{
+	char *p;
+	substring_t args[MAX_OPT_ARGS];
+	int option;
+	char nls_name[30];
+	struct nls_table *nls;
+
+	opts->fs_uid = current_uid();
+	opts->fs_gid = current_gid();
+	opts->fs_fmask_inv = opts->fs_dmask_inv = ~current_umask();
+	nls_name[0] = 0;
+
+	if (!options)
+		goto out;
+
+	while ((p = strsep(&options, ","))) {
+		int token;
+
+		if (!*p)
+			continue;
+
+		token = match_token(p, ntfs_tokens, args);
+		switch (token) {
+		case Opt_immutable:
+			opts->sys_immutable = 1;
+			break;
+		case Opt_uid:
+			if (match_int(&args[0], &option))
+				return -EINVAL;
+			opts->fs_uid = make_kuid(current_user_ns(), option);
+			if (!uid_valid(opts->fs_uid))
+				return -EINVAL;
+			opts->uid = 1;
+			break;
+		case Opt_gid:
+			if (match_int(&args[0], &option))
+				return -EINVAL;
+			opts->fs_gid = make_kgid(current_user_ns(), option);
+			if (!gid_valid(opts->fs_gid))
+				return -EINVAL;
+			opts->gid = 1;
+			break;
+		case Opt_umask:
+			if (match_octal(&args[0], &option))
+				return -EINVAL;
+			opts->fs_fmask_inv = opts->fs_dmask_inv = ~option;
+			opts->fmask = opts->dmask = 1;
+			break;
+		case Opt_dmask:
+			if (match_octal(&args[0], &option))
+				return -EINVAL;
+			opts->fs_dmask_inv = ~option;
+			opts->dmask = 1;
+			break;
+		case Opt_fmask:
+			if (match_octal(&args[0], &option))
+				return -EINVAL;
+			opts->fs_fmask_inv = ~option;
+			opts->fmask = 1;
+			break;
+		case Opt_discard:
+			opts->discard = 1;
+			break;
+		case Opt_force:
+			opts->force = 1;
+			break;
+		case Opt_sparse:
+			opts->sparse = 1;
+			break;
+		case Opt_nohidden:
+			opts->nohidden = 1;
+			break;
+		case Opt_acl:
+#ifdef CONFIG_NTFS3_FS_POSIX_ACL
+			sb->s_flags |= SB_POSIXACL;
+			break;
+#else
+			ntfs_err(sb, "support for ACL not compiled in!");
+			return -EINVAL;
+#endif
+		case Opt_noatime:
+			sb->s_flags |= SB_NOATIME;
+			break;
+		case Opt_showmeta:
+			opts->showmeta = 1;
+			break;
+		case Opt_nls:
+			match_strlcpy(nls_name, &args[0], sizeof(nls_name));
+			break;
+		case Opt_prealloc:
+			opts->prealloc = 1;
+			break;
+		case Opt_no_acs_rules:
+			opts->no_acs_rules = 1;
+			break;
+		default:
+			if (!silent)
+				ntfs_err(
+					sb,
+					"Unrecognized mount option \"%s\" or missing value",
+					p);
+			//return -EINVAL;
+		}
+	}
+
+out:
+	if (!strcmp(nls_name[0] ? nls_name : CONFIG_NLS_DEFAULT, "utf8")) {
+		/*
+		 * For UTF-8 use utf16s_to_utf8s()/utf8s_to_utf16s()
+		 * instead of NLS.
+		 */
+		nls = NULL;
+	} else if (nls_name[0]) {
+		nls = load_nls(nls_name);
+		if (!nls) {
+			ntfs_err(sb, "failed to load \"%s\"", nls_name);
+			return -EINVAL;
+		}
+	} else {
+		nls = load_nls_default();
+		if (!nls) {
+			ntfs_err(sb, "failed to load default nls");
+			return -EINVAL;
+		}
+	}
+	opts->nls = nls;
+
+	return 0;
+}
+
+static int ntfs_remount(struct super_block *sb, int *flags, char *data)
+{
+	int err, ro_rw;
+	struct ntfs_sb_info *sbi = sb->s_fs_info;
+	struct ntfs_mount_options old_opts;
+	char *orig_data = kstrdup(data, GFP_KERNEL);
+
+	if (data && !orig_data)
+		return -ENOMEM;
+
+	/* Store  original options. */
+	memcpy(&old_opts, &sbi->options, sizeof(old_opts));
+	clear_mount_options(&sbi->options);
+	memset(&sbi->options, 0, sizeof(sbi->options));
+
+	err = ntfs_parse_options(sb, data, 0, &sbi->options);
+	if (err)
+		goto restore_opts;
+
+	ro_rw = sb_rdonly(sb) && !(*flags & SB_RDONLY);
+	if (ro_rw && (sbi->flags & NTFS_FLAGS_NEED_REPLAY)) {
+		ntfs_warn(
+			sb,
+			"Couldn't remount rw because journal is not replayed. Please umount/remount instead\n");
+		err = -EINVAL;
+		goto restore_opts;
+	}
+
+	sync_filesystem(sb);
+
+	if (ro_rw && (sbi->volume.flags & VOLUME_FLAG_DIRTY) &&
+	    !sbi->options.force) {
+		ntfs_warn(sb, "volume is dirty and \"force\" flag is not set!");
+		err = -EINVAL;
+		goto restore_opts;
+	}
+
+	clear_mount_options(&old_opts);
+
+	*flags = (*flags & ~SB_LAZYTIME) | (sb->s_flags & SB_LAZYTIME) |
+		 SB_NODIRATIME | SB_NOATIME;
+	ntfs_info(sb, "re-mounted. Opts: %s", orig_data);
+	err = 0;
+	goto out;
+
+restore_opts:
+	clear_mount_options(&sbi->options);
+	memcpy(&sbi->options, &old_opts, sizeof(old_opts));
+
+out:
+	kfree(orig_data);
+	return err;
+}
+
+static struct kmem_cache *ntfs_inode_cachep;
+
+static struct inode *ntfs_alloc_inode(struct super_block *sb)
+{
+	struct ntfs_inode *ni = kmem_cache_alloc(ntfs_inode_cachep, GFP_NOFS);
+
+	if (!ni)
+		return NULL;
+
+	memset(ni, 0, offsetof(struct ntfs_inode, vfs_inode));
+
+	mutex_init(&ni->ni_lock);
+
+	return &ni->vfs_inode;
+}
+
+static void ntfs_i_callback(struct rcu_head *head)
+{
+	struct inode *inode = container_of(head, struct inode, i_rcu);
+	struct ntfs_inode *ni = ntfs_i(inode);
+
+	mutex_destroy(&ni->ni_lock);
+
+	kmem_cache_free(ntfs_inode_cachep, ni);
+}
+
+static void ntfs_destroy_inode(struct inode *inode)
+{
+	call_rcu(&inode->i_rcu, ntfs_i_callback);
+}
+
+static void init_once(void *foo)
+{
+	struct ntfs_inode *ni = foo;
+
+	inode_init_once(&ni->vfs_inode);
+}
+
+/*
+ * put_ntfs - Noinline to reduce binary size.
+ */
+static noinline void put_ntfs(struct ntfs_sb_info *sbi)
+{
+	kfree(sbi->new_rec);
+	kvfree(ntfs_put_shared(sbi->upcase));
+	kfree(sbi->def_table);
+
+	wnd_close(&sbi->mft.bitmap);
+	wnd_close(&sbi->used.bitmap);
+
+	if (sbi->mft.ni)
+		iput(&sbi->mft.ni->vfs_inode);
+
+	if (sbi->security.ni)
+		iput(&sbi->security.ni->vfs_inode);
+
+	if (sbi->reparse.ni)
+		iput(&sbi->reparse.ni->vfs_inode);
+
+	if (sbi->objid.ni)
+		iput(&sbi->objid.ni->vfs_inode);
+
+	if (sbi->volume.ni)
+		iput(&sbi->volume.ni->vfs_inode);
+
+	ntfs_update_mftmirr(sbi, 0);
+
+	indx_clear(&sbi->security.index_sii);
+	indx_clear(&sbi->security.index_sdh);
+	indx_clear(&sbi->reparse.index_r);
+	indx_clear(&sbi->objid.index_o);
+	kfree(sbi->compress.lznt);
+#ifdef CONFIG_NTFS3_LZX_XPRESS
+	xpress_free_decompressor(sbi->compress.xpress);
+	lzx_free_decompressor(sbi->compress.lzx);
+#endif
+	clear_mount_options(&sbi->options);
+
+	kfree(sbi);
+}
+
+static void ntfs_put_super(struct super_block *sb)
+{
+	struct ntfs_sb_info *sbi = sb->s_fs_info;
+
+	/* Mark rw ntfs as clear, if possible. */
+	ntfs_set_state(sbi, NTFS_DIRTY_CLEAR);
+
+	put_ntfs(sbi);
+
+	sync_blockdev(sb->s_bdev);
+}
+
+static int ntfs_statfs(struct dentry *dentry, struct kstatfs *buf)
+{
+	struct super_block *sb = dentry->d_sb;
+	struct ntfs_sb_info *sbi = sb->s_fs_info;
+	struct wnd_bitmap *wnd = &sbi->used.bitmap;
+
+	buf->f_type = sb->s_magic;
+	buf->f_bsize = sbi->cluster_size;
+	buf->f_blocks = wnd->nbits;
+
+	buf->f_bfree = buf->f_bavail = wnd_zeroes(wnd);
+	buf->f_fsid.val[0] = sbi->volume.ser_num;
+	buf->f_fsid.val[1] = (sbi->volume.ser_num >> 32);
+	buf->f_namelen = NTFS_NAME_LEN;
+
+	return 0;
+}
+
+static int ntfs_show_options(struct seq_file *m, struct dentry *root)
+{
+	struct super_block *sb = root->d_sb;
+	struct ntfs_sb_info *sbi = sb->s_fs_info;
+	struct ntfs_mount_options *opts = &sbi->options;
+	struct user_namespace *user_ns = seq_user_ns(m);
+
+	if (opts->uid)
+		seq_printf(m, ",uid=%u",
+			   from_kuid_munged(user_ns, opts->fs_uid));
+	if (opts->gid)
+		seq_printf(m, ",gid=%u",
+			   from_kgid_munged(user_ns, opts->fs_gid));
+	if (opts->fmask)
+		seq_printf(m, ",fmask=%04o", ~opts->fs_fmask_inv);
+	if (opts->dmask)
+		seq_printf(m, ",dmask=%04o", ~opts->fs_dmask_inv);
+	if (opts->nls)
+		seq_printf(m, ",nls=%s", opts->nls->charset);
+	else
+		seq_puts(m, ",nls=utf8");
+	if (opts->sys_immutable)
+		seq_puts(m, ",sys_immutable");
+	if (opts->discard)
+		seq_puts(m, ",discard");
+	if (opts->sparse)
+		seq_puts(m, ",sparse");
+	if (opts->showmeta)
+		seq_puts(m, ",showmeta");
+	if (opts->nohidden)
+		seq_puts(m, ",nohidden");
+	if (opts->force)
+		seq_puts(m, ",force");
+	if (opts->no_acs_rules)
+		seq_puts(m, ",no_acs_rules");
+	if (opts->prealloc)
+		seq_puts(m, ",prealloc");
+	if (sb->s_flags & SB_POSIXACL)
+		seq_puts(m, ",acl");
+	if (sb->s_flags & SB_NOATIME)
+		seq_puts(m, ",noatime");
+
+	return 0;
+}
+
+/*
+ * ntfs_sync_fs - super_operations::sync_fs
+ */
+static int ntfs_sync_fs(struct super_block *sb, int wait)
+{
+	int err = 0, err2;
+	struct ntfs_sb_info *sbi = sb->s_fs_info;
+	struct ntfs_inode *ni;
+	struct inode *inode;
+
+	ni = sbi->security.ni;
+	if (ni) {
+		inode = &ni->vfs_inode;
+		err2 = _ni_write_inode(inode, wait);
+		if (err2 && !err)
+			err = err2;
+	}
+
+	ni = sbi->objid.ni;
+	if (ni) {
+		inode = &ni->vfs_inode;
+		err2 = _ni_write_inode(inode, wait);
+		if (err2 && !err)
+			err = err2;
+	}
+
+	ni = sbi->reparse.ni;
+	if (ni) {
+		inode = &ni->vfs_inode;
+		err2 = _ni_write_inode(inode, wait);
+		if (err2 && !err)
+			err = err2;
+	}
+
+	if (!err)
+		ntfs_set_state(sbi, NTFS_DIRTY_CLEAR);
+
+	ntfs_update_mftmirr(sbi, wait);
+
+	return err;
+}
+
+static const struct super_operations ntfs_sops = {
+	.alloc_inode = ntfs_alloc_inode,
+	.destroy_inode = ntfs_destroy_inode,
+	.evict_inode = ntfs_evict_inode,
+	.put_super = ntfs_put_super,
+	.statfs = ntfs_statfs,
+	.show_options = ntfs_show_options,
+	.sync_fs = ntfs_sync_fs,
+	.remount_fs = ntfs_remount,
+	.write_inode = ntfs3_write_inode,
+};
+
+static struct inode *ntfs_export_get_inode(struct super_block *sb, u64 ino,
+					   u32 generation)
+{
+	struct MFT_REF ref;
+	struct inode *inode;
+
+	ref.low = cpu_to_le32(ino);
+#ifdef CONFIG_NTFS3_64BIT_CLUSTER
+	ref.high = cpu_to_le16(ino >> 32);
+#else
+	ref.high = 0;
+#endif
+	ref.seq = cpu_to_le16(generation);
+
+	inode = ntfs_iget5(sb, &ref, NULL);
+	if (!IS_ERR(inode) && is_bad_inode(inode)) {
+		iput(inode);
+		inode = ERR_PTR(-ESTALE);
+	}
+
+	return inode;
+}
+
+static struct dentry *ntfs_fh_to_dentry(struct super_block *sb, struct fid *fid,
+					int fh_len, int fh_type)
+{
+	return generic_fh_to_dentry(sb, fid, fh_len, fh_type,
+				    ntfs_export_get_inode);
+}
+
+static struct dentry *ntfs_fh_to_parent(struct super_block *sb, struct fid *fid,
+					int fh_len, int fh_type)
+{
+	return generic_fh_to_parent(sb, fid, fh_len, fh_type,
+				    ntfs_export_get_inode);
+}
+
+/* TODO: == ntfs_sync_inode */
+static int ntfs_nfs_commit_metadata(struct inode *inode)
+{
+	return _ni_write_inode(inode, 1);
+}
+
+static const struct export_operations ntfs_export_ops = {
+	.fh_to_dentry = ntfs_fh_to_dentry,
+	.fh_to_parent = ntfs_fh_to_parent,
+	.get_parent = ntfs3_get_parent,
+	.commit_metadata = ntfs_nfs_commit_metadata,
+};
+
+/*
+ * format_size_gb - Return Gb,Mb to print with "%u.%02u Gb".
+ */
+static u32 format_size_gb(const u64 bytes, u32 *mb)
+{
+	/* Do simple right 30 bit shift of 64 bit value. */
+	u64 kbytes = bytes >> 10;
+	u32 kbytes32 = kbytes;
+
+	*mb = (100 * (kbytes32 & 0xfffff) + 0x7ffff) >> 20;
+	if (*mb >= 100)
+		*mb = 99;
+
+	return (kbytes32 >> 20) | (((u32)(kbytes >> 32)) << 12);
+}
+
+static u32 true_sectors_per_clst(const struct NTFS_BOOT *boot)
+{
+	return boot->sectors_per_clusters <= 0x80
+		       ? boot->sectors_per_clusters
+		       : (1u << (0 - boot->sectors_per_clusters));
+}
+
+/*
+ * ntfs_init_from_boot - Init internal info from on-disk boot sector.
+ */
+static int ntfs_init_from_boot(struct super_block *sb, u32 sector_size,
+			       u64 dev_size)
+{
+	struct ntfs_sb_info *sbi = sb->s_fs_info;
+	int err;
+	u32 mb, gb, boot_sector_size, sct_per_clst, record_size;
+	u64 sectors, clusters, fs_size, mlcn, mlcn2;
+	struct NTFS_BOOT *boot;
+	struct buffer_head *bh;
+	struct MFT_REC *rec;
+	u16 fn, ao;
+
+	sbi->volume.blocks = dev_size >> PAGE_SHIFT;
+
+	bh = ntfs_bread(sb, 0);
+	if (!bh)
+		return -EIO;
+
+	err = -EINVAL;
+	boot = (struct NTFS_BOOT *)bh->b_data;
+
+	if (memcmp(boot->system_id, "NTFS    ", sizeof("NTFS    ") - 1))
+		goto out;
+
+	/* 0x55AA is not mandaroty. Thanks Maxim Suhanov*/
+	/*if (0x55 != boot->boot_magic[0] || 0xAA != boot->boot_magic[1])
+	 *	goto out;
+	 */
+
+	boot_sector_size = (u32)boot->bytes_per_sector[1] << 8;
+	if (boot->bytes_per_sector[0] || boot_sector_size < SECTOR_SIZE ||
+	    !is_power_of_2(boot_sector_size)) {
+		goto out;
+	}
+
+	/* cluster size: 512, 1K, 2K, 4K, ... 2M */
+	sct_per_clst = true_sectors_per_clst(boot);
+	if (!is_power_of_2(sct_per_clst))
+		goto out;
+
+	mlcn = le64_to_cpu(boot->mft_clst);
+	mlcn2 = le64_to_cpu(boot->mft2_clst);
+	sectors = le64_to_cpu(boot->sectors_per_volume);
+
+	if (mlcn * sct_per_clst >= sectors)
+		goto out;
+
+	if (mlcn2 * sct_per_clst >= sectors)
+		goto out;
+
+	/* Check MFT record size. */
+	if ((boot->record_size < 0 &&
+	     SECTOR_SIZE > (2U << (-boot->record_size))) ||
+	    (boot->record_size >= 0 && !is_power_of_2(boot->record_size))) {
+		goto out;
+	}
+
+	/* Check index record size. */
+	if ((boot->index_size < 0 &&
+	     SECTOR_SIZE > (2U << (-boot->index_size))) ||
+	    (boot->index_size >= 0 && !is_power_of_2(boot->index_size))) {
+		goto out;
+	}
+
+	sbi->sector_size = boot_sector_size;
+	sbi->sector_bits = blksize_bits(boot_sector_size);
+	fs_size = (sectors + 1) << sbi->sector_bits;
+
+	gb = format_size_gb(fs_size, &mb);
+
+	/*
+	 * - Volume formatted and mounted with the same sector size.
+	 * - Volume formatted 4K and mounted as 512.
+	 * - Volume formatted 512 and mounted as 4K.
+	 */
+	if (sbi->sector_size != sector_size) {
+		ntfs_warn(sb,
+			  "Different NTFS' sector size and media sector size");
+		dev_size += sector_size - 1;
+	}
+
+	sbi->cluster_size = boot_sector_size * sct_per_clst;
+	sbi->cluster_bits = blksize_bits(sbi->cluster_size);
+
+	sbi->mft.lbo = mlcn << sbi->cluster_bits;
+	sbi->mft.lbo2 = mlcn2 << sbi->cluster_bits;
+
+	if (sbi->cluster_size < sbi->sector_size)
+		goto out;
+
+	sbi->cluster_mask = sbi->cluster_size - 1;
+	sbi->cluster_mask_inv = ~(u64)sbi->cluster_mask;
+	sbi->record_size = record_size = boot->record_size < 0
+						 ? 1 << (-boot->record_size)
+						 : (u32)boot->record_size
+							   << sbi->cluster_bits;
+
+	if (record_size > MAXIMUM_BYTES_PER_MFT)
+		goto out;
+
+	sbi->record_bits = blksize_bits(record_size);
+	sbi->attr_size_tr = (5 * record_size >> 4); // ~320 bytes
+
+	sbi->max_bytes_per_attr =
+		record_size - ALIGN(MFTRECORD_FIXUP_OFFSET_1, 8) -
+		ALIGN(((record_size >> SECTOR_SHIFT) * sizeof(short)), 8) -
+		ALIGN(sizeof(enum ATTR_TYPE), 8);
+
+	sbi->index_size = boot->index_size < 0
+				  ? 1u << (-boot->index_size)
+				  : (u32)boot->index_size << sbi->cluster_bits;
+
+	sbi->volume.ser_num = le64_to_cpu(boot->serial_num);
+	sbi->volume.size = sectors << sbi->sector_bits;
+
+	/* Warning if RAW volume. */
+	if (dev_size < fs_size) {
+		u32 mb0, gb0;
+
+		gb0 = format_size_gb(dev_size, &mb0);
+		ntfs_warn(
+			sb,
+			"RAW NTFS volume: Filesystem size %u.%02u Gb > volume size %u.%02u Gb. Mount in read-only",
+			gb, mb, gb0, mb0);
+		sb->s_flags |= SB_RDONLY;
+	}
+
+	clusters = sbi->volume.size >> sbi->cluster_bits;
+#ifndef CONFIG_NTFS3_64BIT_CLUSTER
+	/* 32 bits per cluster. */
+	if (clusters >> 32) {
+		ntfs_notice(
+			sb,
+			"NTFS %u.%02u Gb is too big to use 32 bits per cluster",
+			gb, mb);
+		goto out;
+	}
+#elif BITS_PER_LONG < 64
+#error "CONFIG_NTFS3_64BIT_CLUSTER incompatible in 32 bit OS"
+#endif
+
+	sbi->used.bitmap.nbits = clusters;
+
+	rec = kzalloc(record_size, GFP_NOFS);
+	if (!rec) {
+		err = -ENOMEM;
+		goto out;
+	}
+
+	sbi->new_rec = rec;
+	rec->rhdr.sign = NTFS_FILE_SIGNATURE;
+	rec->rhdr.fix_off = cpu_to_le16(MFTRECORD_FIXUP_OFFSET_1);
+	fn = (sbi->record_size >> SECTOR_SHIFT) + 1;
+	rec->rhdr.fix_num = cpu_to_le16(fn);
+	ao = ALIGN(MFTRECORD_FIXUP_OFFSET_1 + sizeof(short) * fn, 8);
+	rec->attr_off = cpu_to_le16(ao);
+	rec->used = cpu_to_le32(ao + ALIGN(sizeof(enum ATTR_TYPE), 8));
+	rec->total = cpu_to_le32(sbi->record_size);
+	((struct ATTRIB *)Add2Ptr(rec, ao))->type = ATTR_END;
+
+	if (sbi->cluster_size < PAGE_SIZE)
+		sb_set_blocksize(sb, sbi->cluster_size);
+
+	sbi->block_mask = sb->s_blocksize - 1;
+	sbi->blocks_per_cluster = sbi->cluster_size >> sb->s_blocksize_bits;
+	sbi->volume.blocks = sbi->volume.size >> sb->s_blocksize_bits;
+
+	/* Maximum size for normal files. */
+	sbi->maxbytes = (clusters << sbi->cluster_bits) - 1;
+
+#ifdef CONFIG_NTFS3_64BIT_CLUSTER
+	if (clusters >= (1ull << (64 - sbi->cluster_bits)))
+		sbi->maxbytes = -1;
+	sbi->maxbytes_sparse = -1;
+#else
+	/* Maximum size for sparse file. */
+	sbi->maxbytes_sparse = (1ull << (sbi->cluster_bits + 32)) - 1;
+#endif
+
+	err = 0;
+
+out:
+	brelse(bh);
+
+	return err;
+}
+
+/*
+ * ntfs_fill_super - Try to mount.
+ */
+static int ntfs_fill_super(struct super_block *sb, void *data, int silent)
+{
+	int err;
+	struct ntfs_sb_info *sbi;
+	struct block_device *bdev = sb->s_bdev;
+	struct inode *bd_inode = bdev->bd_inode;
+	struct request_queue *rq = bdev_get_queue(bdev);
+	struct inode *inode = NULL;
+	struct ntfs_inode *ni;
+	size_t i, tt;
+	CLST vcn, lcn, len;
+	struct ATTRIB *attr;
+	const struct VOLUME_INFO *info;
+	u32 idx, done, bytes;
+	struct ATTR_DEF_ENTRY *t;
+	u16 *upcase = NULL;
+	u16 *shared;
+	bool is_ro;
+	struct MFT_REF ref;
+
+	ref.high = 0;
+
+	sbi = kzalloc(sizeof(struct ntfs_sb_info), GFP_NOFS);
+	if (!sbi)
+		return -ENOMEM;
+
+	sb->s_fs_info = sbi;
+	sbi->sb = sb;
+	sb->s_flags |= SB_NODIRATIME;
+	sb->s_magic = 0x7366746e; // "ntfs"
+	sb->s_op = &ntfs_sops;
+	sb->s_export_op = &ntfs_export_ops;
+	sb->s_time_gran = NTFS_TIME_GRAN; // 100 nsec
+	sb->s_xattr = ntfs_xattr_handlers;
+
+	ratelimit_state_init(&sbi->msg_ratelimit, DEFAULT_RATELIMIT_INTERVAL,
+			     DEFAULT_RATELIMIT_BURST);
+
+	err = ntfs_parse_options(sb, data, silent, &sbi->options);
+	if (err)
+		goto out;
+
+	if (!rq || !blk_queue_discard(rq) || !rq->limits.discard_granularity) {
+		;
+	} else {
+		sbi->discard_granularity = rq->limits.discard_granularity;
+		sbi->discard_granularity_mask_inv =
+			~(u64)(sbi->discard_granularity - 1);
+	}
+
+	sb_set_blocksize(sb, PAGE_SIZE);
+
+	/* Parse boot. */
+	err = ntfs_init_from_boot(sb, rq ? queue_logical_block_size(rq) : 512,
+				  bd_inode->i_size);
+	if (err)
+		goto out;
+
+#ifdef CONFIG_NTFS3_64BIT_CLUSTER
+	sb->s_maxbytes = MAX_LFS_FILESIZE;
+#else
+	sb->s_maxbytes = 0xFFFFFFFFull << sbi->cluster_bits;
+#endif
+
+	mutex_init(&sbi->compress.mtx_lznt);
+#ifdef CONFIG_NTFS3_LZX_XPRESS
+	mutex_init(&sbi->compress.mtx_xpress);
+	mutex_init(&sbi->compress.mtx_lzx);
+#endif
+
+	/*
+	 * Load $Volume. This should be done before $LogFile
+	 * 'cause 'sbi->volume.ni' is used 'ntfs_set_state'.
+	 */
+	ref.low = cpu_to_le32(MFT_REC_VOL);
+	ref.seq = cpu_to_le16(MFT_REC_VOL);
+	inode = ntfs_iget5(sb, &ref, &NAME_VOLUME);
+	if (IS_ERR(inode)) {
+		err = PTR_ERR(inode);
+		ntfs_err(sb, "Failed to load $Volume.");
+		inode = NULL;
+		goto out;
+	}
+
+	ni = ntfs_i(inode);
+
+	/* Load and save label (not necessary). */
+	attr = ni_find_attr(ni, NULL, NULL, ATTR_LABEL, NULL, 0, NULL, NULL);
+
+	if (!attr) {
+		/* It is ok if no ATTR_LABEL */
+	} else if (!attr->non_res && !is_attr_ext(attr)) {
+		/* $AttrDef allows labels to be up to 128 symbols. */
+		err = utf16s_to_utf8s(resident_data(attr),
+				      le32_to_cpu(attr->res.data_size) >> 1,
+				      UTF16_LITTLE_ENDIAN, sbi->volume.label,
+				      sizeof(sbi->volume.label));
+		if (err < 0)
+			sbi->volume.label[0] = 0;
+	} else {
+		/* Should we break mounting here? */
+		//err = -EINVAL;
+		//goto out;
+	}
+
+	attr = ni_find_attr(ni, attr, NULL, ATTR_VOL_INFO, NULL, 0, NULL, NULL);
+	if (!attr || is_attr_ext(attr)) {
+		err = -EINVAL;
+		goto out;
+	}
+
+	info = resident_data_ex(attr, SIZEOF_ATTRIBUTE_VOLUME_INFO);
+	if (!info) {
+		err = -EINVAL;
+		goto out;
+	}
+
+	sbi->volume.major_ver = info->major_ver;
+	sbi->volume.minor_ver = info->minor_ver;
+	sbi->volume.flags = info->flags;
+
+	sbi->volume.ni = ni;
+	inode = NULL;
+
+	/* Load $MFTMirr to estimate recs_mirr. */
+	ref.low = cpu_to_le32(MFT_REC_MIRR);
+	ref.seq = cpu_to_le16(MFT_REC_MIRR);
+	inode = ntfs_iget5(sb, &ref, &NAME_MIRROR);
+	if (IS_ERR(inode)) {
+		err = PTR_ERR(inode);
+		ntfs_err(sb, "Failed to load $MFTMirr.");
+		inode = NULL;
+		goto out;
+	}
+
+	sbi->mft.recs_mirr =
+		ntfs_up_cluster(sbi, inode->i_size) >> sbi->record_bits;
+
+	iput(inode);
+
+	/* Load LogFile to replay. */
+	ref.low = cpu_to_le32(MFT_REC_LOG);
+	ref.seq = cpu_to_le16(MFT_REC_LOG);
+	inode = ntfs_iget5(sb, &ref, &NAME_LOGFILE);
+	if (IS_ERR(inode)) {
+		err = PTR_ERR(inode);
+		ntfs_err(sb, "Failed to load \x24LogFile.");
+		inode = NULL;
+		goto out;
+	}
+
+	ni = ntfs_i(inode);
+
+	err = ntfs_loadlog_and_replay(ni, sbi);
+	if (err)
+		goto out;
+
+	iput(inode);
+	inode = NULL;
+
+	is_ro = sb_rdonly(sbi->sb);
+
+	if (sbi->flags & NTFS_FLAGS_NEED_REPLAY) {
+		if (!is_ro) {
+			ntfs_warn(sb,
+				  "failed to replay log file. Can't mount rw!");
+			err = -EINVAL;
+			goto out;
+		}
+	} else if (sbi->volume.flags & VOLUME_FLAG_DIRTY) {
+		if (!is_ro && !sbi->options.force) {
+			ntfs_warn(
+				sb,
+				"volume is dirty and \"force\" flag is not set!");
+			err = -EINVAL;
+			goto out;
+		}
+	}
+
+	/* Load $MFT. */
+	ref.low = cpu_to_le32(MFT_REC_MFT);
+	ref.seq = cpu_to_le16(1);
+
+	inode = ntfs_iget5(sb, &ref, &NAME_MFT);
+	if (IS_ERR(inode)) {
+		err = PTR_ERR(inode);
+		ntfs_err(sb, "Failed to load $MFT.");
+		inode = NULL;
+		goto out;
+	}
+
+	ni = ntfs_i(inode);
+
+	sbi->mft.used = ni->i_valid >> sbi->record_bits;
+	tt = inode->i_size >> sbi->record_bits;
+	sbi->mft.next_free = MFT_REC_USER;
+
+	err = wnd_init(&sbi->mft.bitmap, sb, tt);
+	if (err)
+		goto out;
+
+	err = ni_load_all_mi(ni);
+	if (err)
+		goto out;
+
+	sbi->mft.ni = ni;
+
+	/* Load $BadClus. */
+	ref.low = cpu_to_le32(MFT_REC_BADCLUST);
+	ref.seq = cpu_to_le16(MFT_REC_BADCLUST);
+	inode = ntfs_iget5(sb, &ref, &NAME_BADCLUS);
+	if (IS_ERR(inode)) {
+		err = PTR_ERR(inode);
+		ntfs_err(sb, "Failed to load $BadClus.");
+		inode = NULL;
+		goto out;
+	}
+
+	ni = ntfs_i(inode);
+
+	for (i = 0; run_get_entry(&ni->file.run, i, &vcn, &lcn, &len); i++) {
+		if (lcn == SPARSE_LCN)
+			continue;
+
+		if (!sbi->bad_clusters)
+			ntfs_notice(sb, "Volume contains bad blocks");
+
+		sbi->bad_clusters += len;
+	}
+
+	iput(inode);
+
+	/* Load $Bitmap. */
+	ref.low = cpu_to_le32(MFT_REC_BITMAP);
+	ref.seq = cpu_to_le16(MFT_REC_BITMAP);
+	inode = ntfs_iget5(sb, &ref, &NAME_BITMAP);
+	if (IS_ERR(inode)) {
+		err = PTR_ERR(inode);
+		ntfs_err(sb, "Failed to load $Bitmap.");
+		inode = NULL;
+		goto out;
+	}
+
+	ni = ntfs_i(inode);
+
+#ifndef CONFIG_NTFS3_64BIT_CLUSTER
+	if (inode->i_size >> 32) {
+		err = -EINVAL;
+		goto out;
+	}
+#endif
+
+	/* Check bitmap boundary. */
+	tt = sbi->used.bitmap.nbits;
+	if (inode->i_size < bitmap_size(tt)) {
+		err = -EINVAL;
+		goto out;
+	}
+
+	/* Not necessary. */
+	sbi->used.bitmap.set_tail = true;
+	err = wnd_init(&sbi->used.bitmap, sbi->sb, tt);
+	if (err)
+		goto out;
+
+	iput(inode);
+
+	/* Compute the MFT zone. */
+	err = ntfs_refresh_zone(sbi);
+	if (err)
+		goto out;
+
+	/* Load $AttrDef. */
+	ref.low = cpu_to_le32(MFT_REC_ATTR);
+	ref.seq = cpu_to_le16(MFT_REC_ATTR);
+	inode = ntfs_iget5(sbi->sb, &ref, &NAME_ATTRDEF);
+	if (IS_ERR(inode)) {
+		err = PTR_ERR(inode);
+		ntfs_err(sb, "Failed to load $AttrDef -> %d", err);
+		inode = NULL;
+		goto out;
+	}
+
+	if (inode->i_size < sizeof(struct ATTR_DEF_ENTRY)) {
+		err = -EINVAL;
+		goto out;
+	}
+	bytes = inode->i_size;
+	sbi->def_table = t = kmalloc(bytes, GFP_NOFS);
+	if (!t) {
+		err = -ENOMEM;
+		goto out;
+	}
+
+	for (done = idx = 0; done < bytes; done += PAGE_SIZE, idx++) {
+		unsigned long tail = bytes - done;
+		struct page *page = ntfs_map_page(inode->i_mapping, idx);
+
+		if (IS_ERR(page)) {
+			err = PTR_ERR(page);
+			goto out;
+		}
+		memcpy(Add2Ptr(t, done), page_address(page),
+		       min(PAGE_SIZE, tail));
+		ntfs_unmap_page(page);
+
+		if (!idx && ATTR_STD != t->type) {
+			err = -EINVAL;
+			goto out;
+		}
+	}
+
+	t += 1;
+	sbi->def_entries = 1;
+	done = sizeof(struct ATTR_DEF_ENTRY);
+	sbi->reparse.max_size = MAXIMUM_REPARSE_DATA_BUFFER_SIZE;
+	sbi->ea_max_size = 0x10000; /* default formatter value */
+
+	while (done + sizeof(struct ATTR_DEF_ENTRY) <= bytes) {
+		u32 t32 = le32_to_cpu(t->type);
+		u64 sz = le64_to_cpu(t->max_sz);
+
+		if ((t32 & 0xF) || le32_to_cpu(t[-1].type) >= t32)
+			break;
+
+		if (t->type == ATTR_REPARSE)
+			sbi->reparse.max_size = sz;
+		else if (t->type == ATTR_EA)
+			sbi->ea_max_size = sz;
+
+		done += sizeof(struct ATTR_DEF_ENTRY);
+		t += 1;
+		sbi->def_entries += 1;
+	}
+	iput(inode);
+
+	/* Load $UpCase. */
+	ref.low = cpu_to_le32(MFT_REC_UPCASE);
+	ref.seq = cpu_to_le16(MFT_REC_UPCASE);
+	inode = ntfs_iget5(sb, &ref, &NAME_UPCASE);
+	if (IS_ERR(inode)) {
+		err = PTR_ERR(inode);
+		ntfs_err(sb, "Failed to load \x24LogFile.");
+		inode = NULL;
+		goto out;
+	}
+
+	ni = ntfs_i(inode);
+
+	if (inode->i_size != 0x10000 * sizeof(short)) {
+		err = -EINVAL;
+		goto out;
+	}
+
+	sbi->upcase = upcase = kvmalloc(0x10000 * sizeof(short), GFP_KERNEL);
+	if (!upcase) {
+		err = -ENOMEM;
+		goto out;
+	}
+
+	for (idx = 0; idx < (0x10000 * sizeof(short) >> PAGE_SHIFT); idx++) {
+		const __le16 *src;
+		u16 *dst = Add2Ptr(upcase, idx << PAGE_SHIFT);
+		struct page *page = ntfs_map_page(inode->i_mapping, idx);
+
+		if (IS_ERR(page)) {
+			err = PTR_ERR(page);
+			goto out;
+		}
+
+		src = page_address(page);
+
+#ifdef __BIG_ENDIAN
+		for (i = 0; i < PAGE_SIZE / sizeof(u16); i++)
+			*dst++ = le16_to_cpu(*src++);
+#else
+		memcpy(dst, src, PAGE_SIZE);
+#endif
+		ntfs_unmap_page(page);
+	}
+
+	shared = ntfs_set_shared(upcase, 0x10000 * sizeof(short));
+	if (shared && upcase != shared) {
+		sbi->upcase = shared;
+		kvfree(upcase);
+	}
+
+	iput(inode);
+	inode = NULL;
+
+	if (is_ntfs3(sbi)) {
+		/* Load $Secure. */
+		err = ntfs_security_init(sbi);
+		if (err)
+			goto out;
+
+		/* Load $Extend. */
+		err = ntfs_extend_init(sbi);
+		if (err)
+			goto load_root;
+
+		/* Load $Extend\$Reparse. */
+		err = ntfs_reparse_init(sbi);
+		if (err)
+			goto load_root;
+
+		/* Load $Extend\$ObjId. */
+		err = ntfs_objid_init(sbi);
+		if (err)
+			goto load_root;
+	}
+
+load_root:
+	/* Load root. */
+	ref.low = cpu_to_le32(MFT_REC_ROOT);
+	ref.seq = cpu_to_le16(MFT_REC_ROOT);
+	inode = ntfs_iget5(sb, &ref, &NAME_ROOT);
+	if (IS_ERR(inode)) {
+		err = PTR_ERR(inode);
+		ntfs_err(sb, "Failed to load root.");
+		inode = NULL;
+		goto out;
+	}
+
+	ni = ntfs_i(inode);
+
+	sb->s_root = d_make_root(inode);
+
+	if (!sb->s_root) {
+		err = -EINVAL;
+		goto out;
+	}
+
+	return 0;
+
+out:
+	iput(inode);
+
+	if (sb->s_root) {
+		d_drop(sb->s_root);
+		sb->s_root = NULL;
+	}
+
+	put_ntfs(sbi);
+
+	sb->s_fs_info = NULL;
+	return err;
+}
+
+void ntfs_unmap_meta(struct super_block *sb, CLST lcn, CLST len)
+{
+	struct ntfs_sb_info *sbi = sb->s_fs_info;
+	struct block_device *bdev = sb->s_bdev;
+	sector_t devblock = (u64)lcn * sbi->blocks_per_cluster;
+	unsigned long blocks = (u64)len * sbi->blocks_per_cluster;
+	unsigned long cnt = 0;
+	unsigned long limit = global_zone_page_state(NR_FREE_PAGES)
+			      << (PAGE_SHIFT - sb->s_blocksize_bits);
+
+	if (limit >= 0x2000)
+		limit -= 0x1000;
+	else if (limit < 32)
+		limit = 32;
+	else
+		limit >>= 1;
+
+	while (blocks--) {
+		clean_bdev_aliases(bdev, devblock++, 1);
+		if (cnt++ >= limit) {
+			sync_blockdev(bdev);
+			cnt = 0;
+		}
+	}
+}
+
+/*
+ * ntfs_discard - Issue a discard request (trim for SSD).
+ */
+int ntfs_discard(struct ntfs_sb_info *sbi, CLST lcn, CLST len)
+{
+	int err;
+	u64 lbo, bytes, start, end;
+	struct super_block *sb;
+
+	if (sbi->used.next_free_lcn == lcn + len)
+		sbi->used.next_free_lcn = lcn;
+
+	if (sbi->flags & NTFS_FLAGS_NODISCARD)
+		return -EOPNOTSUPP;
+
+	if (!sbi->options.discard)
+		return -EOPNOTSUPP;
+
+	lbo = (u64)lcn << sbi->cluster_bits;
+	bytes = (u64)len << sbi->cluster_bits;
+
+	/* Align up 'start' on discard_granularity. */
+	start = (lbo + sbi->discard_granularity - 1) &
+		sbi->discard_granularity_mask_inv;
+	/* Align down 'end' on discard_granularity. */
+	end = (lbo + bytes) & sbi->discard_granularity_mask_inv;
+
+	sb = sbi->sb;
+	if (start >= end)
+		return 0;
+
+	err = blkdev_issue_discard(sb->s_bdev, start >> 9, (end - start) >> 9,
+				   GFP_NOFS, 0);
+
+	if (err == -EOPNOTSUPP)
+		sbi->flags |= NTFS_FLAGS_NODISCARD;
+
+	return err;
+}
+
+static struct dentry *ntfs_mount(struct file_system_type *fs_type, int flags,
+				 const char *dev_name, void *data)
+{
+	return mount_bdev(fs_type, flags, dev_name, data, ntfs_fill_super);
+}
+
+// clang-format off
+static struct file_system_type ntfs_fs_type = {
+	.owner		= THIS_MODULE,
+	.name		= "ntfs3",
+	.mount		= ntfs_mount,
+	.kill_sb	= kill_block_super,
+	.fs_flags	= FS_REQUIRES_DEV | FS_ALLOW_IDMAP,
+};
+// clang-format on
+
+static int __init init_ntfs_fs(void)
+{
+	int err;
+
+	pr_info("ntfs3: Max link count %u\n", NTFS_LINK_MAX);
+
+	if (IS_ENABLED(CONFIG_NTFS3_FS_POSIX_ACL))
+		pr_info("ntfs3: Enabled Linux POSIX ACLs support\n");
+	if (IS_ENABLED(CONFIG_NTFS3_64BIT_CLUSTER))
+		pr_notice("ntfs3: Warning: Activated 64 bits per cluster. Windows does not support this\n");
+	if (IS_ENABLED(CONFIG_NTFS3_LZX_XPRESS))
+		pr_info("ntfs3: Read-only LZX/Xpress compression included\n");
+
+	err = ntfs3_init_bitmap();
+	if (err)
+		return err;
+
+	ntfs_inode_cachep = kmem_cache_create(
+		"ntfs_inode_cache", sizeof(struct ntfs_inode), 0,
+		(SLAB_RECLAIM_ACCOUNT | SLAB_MEM_SPREAD | SLAB_ACCOUNT),
+		init_once);
+	if (!ntfs_inode_cachep) {
+		err = -ENOMEM;
+		goto out1;
+	}
+
+	err = register_filesystem(&ntfs_fs_type);
+	if (err)
+		goto out;
+
+	return 0;
+out:
+	kmem_cache_destroy(ntfs_inode_cachep);
+out1:
+	ntfs3_exit_bitmap();
+	return err;
+}
+
+static void __exit exit_ntfs_fs(void)
+{
+	if (ntfs_inode_cachep) {
+		rcu_barrier();
+		kmem_cache_destroy(ntfs_inode_cachep);
+	}
+
+	unregister_filesystem(&ntfs_fs_type);
+	ntfs3_exit_bitmap();
+}
+
+MODULE_LICENSE("GPL");
+MODULE_DESCRIPTION("ntfs3 read/write filesystem");
+#ifdef CONFIG_NTFS3_FS_POSIX_ACL
+MODULE_INFO(behaviour, "Enabled Linux POSIX ACLs support");
+#endif
+#ifdef CONFIG_NTFS3_64BIT_CLUSTER
+MODULE_INFO(cluster, "Warning: Activated 64 bits per cluster. Windows does not support this");
+#endif
+#ifdef CONFIG_NTFS3_LZX_XPRESS
+MODULE_INFO(compression, "Read-only lzx/xpress compression included");
+#endif
+
+MODULE_AUTHOR("Konstantin Komarov");
+MODULE_ALIAS_FS("ntfs3");
+
+module_init(init_ntfs_fs);
+module_exit(exit_ntfs_fs);
diff --git a/fs/ntfs3/upcase.c b/fs/ntfs3/upcase.c
new file mode 100644
index 000000000000..bbeba778237e
--- /dev/null
+++ b/fs/ntfs3/upcase.c
@@ -0,0 +1,108 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ *
+ * Copyright (C) 2019-2021 Paragon Software GmbH, All rights reserved.
+ *
+ */
+
+#include <linux/blkdev.h>
+#include <linux/buffer_head.h>
+#include <linux/module.h>
+#include <linux/nls.h>
+
+#include "debug.h"
+#include "ntfs.h"
+#include "ntfs_fs.h"
+
+static inline u16 upcase_unicode_char(const u16 *upcase, u16 chr)
+{
+	if (chr < 'a')
+		return chr;
+
+	if (chr <= 'z')
+		return chr - ('a' - 'A');
+
+	return upcase[chr];
+}
+
+/*
+ * ntfs_cmp_names
+ *
+ * Thanks Kari Argillander <kari.argillander@gmail.com> for idea and implementation 'bothcase'
+ *
+ * Straight way to compare names:
+ * - Case insensitive
+ * - If name equals and 'bothcases' then
+ * - Case sensitive
+ * 'Straight way' code scans input names twice in worst case.
+ * Optimized code scans input names only once.
+ */
+int ntfs_cmp_names(const __le16 *s1, size_t l1, const __le16 *s2, size_t l2,
+		   const u16 *upcase, bool bothcase)
+{
+	int diff1 = 0;
+	int diff2;
+	size_t len = min(l1, l2);
+
+	if (!bothcase && upcase)
+		goto case_insentive;
+
+	for (; len; s1++, s2++, len--) {
+		diff1 = le16_to_cpu(*s1) - le16_to_cpu(*s2);
+		if (diff1) {
+			if (bothcase && upcase)
+				goto case_insentive;
+
+			return diff1;
+		}
+	}
+	return l1 - l2;
+
+case_insentive:
+	for (; len; s1++, s2++, len--) {
+		diff2 = upcase_unicode_char(upcase, le16_to_cpu(*s1)) -
+			upcase_unicode_char(upcase, le16_to_cpu(*s2));
+		if (diff2)
+			return diff2;
+	}
+
+	diff2 = l1 - l2;
+	return diff2 ? diff2 : diff1;
+}
+
+int ntfs_cmp_names_cpu(const struct cpu_str *uni1, const struct le_str *uni2,
+		       const u16 *upcase, bool bothcase)
+{
+	const u16 *s1 = uni1->name;
+	const __le16 *s2 = uni2->name;
+	size_t l1 = uni1->len;
+	size_t l2 = uni2->len;
+	size_t len = min(l1, l2);
+	int diff1 = 0;
+	int diff2;
+
+	if (!bothcase && upcase)
+		goto case_insentive;
+
+	for (; len; s1++, s2++, len--) {
+		diff1 = *s1 - le16_to_cpu(*s2);
+		if (diff1) {
+			if (bothcase && upcase)
+				goto case_insentive;
+
+			return diff1;
+		}
+	}
+	return l1 - l2;
+
+case_insentive:
+	for (; len; s1++, s2++, len--) {
+		diff2 = upcase_unicode_char(upcase, *s1) -
+			upcase_unicode_char(upcase, le16_to_cpu(*s2));
+		if (diff2)
+			return diff2;
+	}
+
+	diff2 = l1 - l2;
+	return diff2 ? diff2 : diff1;
+}
diff --git a/fs/ntfs3/xattr.c b/fs/ntfs3/xattr.c
new file mode 100644
index 000000000000..7282d85c4ece
--- /dev/null
+++ b/fs/ntfs3/xattr.c
@@ -0,0 +1,1122 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ *
+ * Copyright (C) 2019-2021 Paragon Software GmbH, All rights reserved.
+ *
+ */
+
+#include <linux/blkdev.h>
+#include <linux/buffer_head.h>
+#include <linux/fs.h>
+#include <linux/nls.h>
+#include <linux/posix_acl.h>
+#include <linux/posix_acl_xattr.h>
+#include <linux/xattr.h>
+
+#include "debug.h"
+#include "ntfs.h"
+#include "ntfs_fs.h"
+
+// clang-format off
+#define SYSTEM_DOS_ATTRIB    "system.dos_attrib"
+#define SYSTEM_NTFS_ATTRIB   "system.ntfs_attrib"
+#define SYSTEM_NTFS_SECURITY "system.ntfs_security"
+// clang-format on
+
+static inline size_t unpacked_ea_size(const struct EA_FULL *ea)
+{
+	return ea->size ? le32_to_cpu(ea->size)
+			: ALIGN(struct_size(ea, name,
+					    1 + ea->name_len +
+						    le16_to_cpu(ea->elength)),
+				4);
+}
+
+static inline size_t packed_ea_size(const struct EA_FULL *ea)
+{
+	return struct_size(ea, name,
+			   1 + ea->name_len + le16_to_cpu(ea->elength)) -
+	       offsetof(struct EA_FULL, flags);
+}
+
+/*
+ * find_ea
+ *
+ * Assume there is at least one xattr in the list.
+ */
+static inline bool find_ea(const struct EA_FULL *ea_all, u32 bytes,
+			   const char *name, u8 name_len, u32 *off)
+{
+	*off = 0;
+
+	if (!ea_all || !bytes)
+		return false;
+
+	for (;;) {
+		const struct EA_FULL *ea = Add2Ptr(ea_all, *off);
+		u32 next_off = *off + unpacked_ea_size(ea);
+
+		if (next_off > bytes)
+			return false;
+
+		if (ea->name_len == name_len &&
+		    !memcmp(ea->name, name, name_len))
+			return true;
+
+		*off = next_off;
+		if (next_off >= bytes)
+			return false;
+	}
+}
+
+/*
+ * ntfs_read_ea - Read all extended attributes.
+ * @ea:		New allocated memory.
+ * @info:	Pointer into resident data.
+ */
+static int ntfs_read_ea(struct ntfs_inode *ni, struct EA_FULL **ea,
+			size_t add_bytes, const struct EA_INFO **info)
+{
+	int err;
+	struct ATTR_LIST_ENTRY *le = NULL;
+	struct ATTRIB *attr_info, *attr_ea;
+	void *ea_p;
+	u32 size;
+
+	static_assert(le32_to_cpu(ATTR_EA_INFO) < le32_to_cpu(ATTR_EA));
+
+	*ea = NULL;
+	*info = NULL;
+
+	attr_info =
+		ni_find_attr(ni, NULL, &le, ATTR_EA_INFO, NULL, 0, NULL, NULL);
+	attr_ea =
+		ni_find_attr(ni, attr_info, &le, ATTR_EA, NULL, 0, NULL, NULL);
+
+	if (!attr_ea || !attr_info)
+		return 0;
+
+	*info = resident_data_ex(attr_info, sizeof(struct EA_INFO));
+	if (!*info)
+		return -EINVAL;
+
+	/* Check Ea limit. */
+	size = le32_to_cpu((*info)->size);
+	if (size > ni->mi.sbi->ea_max_size)
+		return -EFBIG;
+
+	if (attr_size(attr_ea) > ni->mi.sbi->ea_max_size)
+		return -EFBIG;
+
+	/* Allocate memory for packed Ea. */
+	ea_p = kmalloc(size + add_bytes, GFP_NOFS);
+	if (!ea_p)
+		return -ENOMEM;
+
+	if (attr_ea->non_res) {
+		struct runs_tree run;
+
+		run_init(&run);
+
+		err = attr_load_runs(attr_ea, ni, &run, NULL);
+		if (!err)
+			err = ntfs_read_run_nb(ni->mi.sbi, &run, 0, ea_p, size,
+					       NULL);
+		run_close(&run);
+
+		if (err)
+			goto out;
+	} else {
+		void *p = resident_data_ex(attr_ea, size);
+
+		if (!p) {
+			err = -EINVAL;
+			goto out;
+		}
+		memcpy(ea_p, p, size);
+	}
+
+	memset(Add2Ptr(ea_p, size), 0, add_bytes);
+	*ea = ea_p;
+	return 0;
+
+out:
+	kfree(ea_p);
+	*ea = NULL;
+	return err;
+}
+
+/*
+ * ntfs_list_ea
+ *
+ * Copy a list of xattrs names into the buffer
+ * provided, or compute the buffer size required.
+ *
+ * Return:
+ * * Number of bytes used / required on
+ * * -ERRNO - on failure
+ */
+static ssize_t ntfs_list_ea(struct ntfs_inode *ni, char *buffer,
+			    size_t bytes_per_buffer)
+{
+	const struct EA_INFO *info;
+	struct EA_FULL *ea_all = NULL;
+	const struct EA_FULL *ea;
+	u32 off, size;
+	int err;
+	size_t ret;
+
+	err = ntfs_read_ea(ni, &ea_all, 0, &info);
+	if (err)
+		return err;
+
+	if (!info || !ea_all)
+		return 0;
+
+	size = le32_to_cpu(info->size);
+
+	/* Enumerate all xattrs. */
+	for (ret = 0, off = 0; off < size; off += unpacked_ea_size(ea)) {
+		ea = Add2Ptr(ea_all, off);
+
+		if (buffer) {
+			if (ret + ea->name_len + 1 > bytes_per_buffer) {
+				err = -ERANGE;
+				goto out;
+			}
+
+			memcpy(buffer + ret, ea->name, ea->name_len);
+			buffer[ret + ea->name_len] = 0;
+		}
+
+		ret += ea->name_len + 1;
+	}
+
+out:
+	kfree(ea_all);
+	return err ? err : ret;
+}
+
+static int ntfs_get_ea(struct inode *inode, const char *name, size_t name_len,
+		       void *buffer, size_t size, size_t *required)
+{
+	struct ntfs_inode *ni = ntfs_i(inode);
+	const struct EA_INFO *info;
+	struct EA_FULL *ea_all = NULL;
+	const struct EA_FULL *ea;
+	u32 off, len;
+	int err;
+
+	if (!(ni->ni_flags & NI_FLAG_EA))
+		return -ENODATA;
+
+	if (!required)
+		ni_lock(ni);
+
+	len = 0;
+
+	if (name_len > 255) {
+		err = -ENAMETOOLONG;
+		goto out;
+	}
+
+	err = ntfs_read_ea(ni, &ea_all, 0, &info);
+	if (err)
+		goto out;
+
+	if (!info)
+		goto out;
+
+	/* Enumerate all xattrs. */
+	if (!find_ea(ea_all, le32_to_cpu(info->size), name, name_len, &off)) {
+		err = -ENODATA;
+		goto out;
+	}
+	ea = Add2Ptr(ea_all, off);
+
+	len = le16_to_cpu(ea->elength);
+	if (!buffer) {
+		err = 0;
+		goto out;
+	}
+
+	if (len > size) {
+		err = -ERANGE;
+		if (required)
+			*required = len;
+		goto out;
+	}
+
+	memcpy(buffer, ea->name + ea->name_len + 1, len);
+	err = 0;
+
+out:
+	kfree(ea_all);
+	if (!required)
+		ni_unlock(ni);
+
+	return err ? err : len;
+}
+
+static noinline int ntfs_set_ea(struct inode *inode, const char *name,
+				size_t name_len, const void *value,
+				size_t val_size, int flags, int locked)
+{
+	struct ntfs_inode *ni = ntfs_i(inode);
+	struct ntfs_sb_info *sbi = ni->mi.sbi;
+	int err;
+	struct EA_INFO ea_info;
+	const struct EA_INFO *info;
+	struct EA_FULL *new_ea;
+	struct EA_FULL *ea_all = NULL;
+	size_t add, new_pack;
+	u32 off, size;
+	__le16 size_pack;
+	struct ATTRIB *attr;
+	struct ATTR_LIST_ENTRY *le;
+	struct mft_inode *mi;
+	struct runs_tree ea_run;
+	u64 new_sz;
+	void *p;
+
+	if (!locked)
+		ni_lock(ni);
+
+	run_init(&ea_run);
+
+	if (name_len > 255) {
+		err = -ENAMETOOLONG;
+		goto out;
+	}
+
+	add = ALIGN(struct_size(ea_all, name, 1 + name_len + val_size), 4);
+
+	err = ntfs_read_ea(ni, &ea_all, add, &info);
+	if (err)
+		goto out;
+
+	if (!info) {
+		memset(&ea_info, 0, sizeof(ea_info));
+		size = 0;
+		size_pack = 0;
+	} else {
+		memcpy(&ea_info, info, sizeof(ea_info));
+		size = le32_to_cpu(ea_info.size);
+		size_pack = ea_info.size_pack;
+	}
+
+	if (info && find_ea(ea_all, size, name, name_len, &off)) {
+		struct EA_FULL *ea;
+		size_t ea_sz;
+
+		if (flags & XATTR_CREATE) {
+			err = -EEXIST;
+			goto out;
+		}
+
+		ea = Add2Ptr(ea_all, off);
+
+		/*
+		 * Check simple case when we try to insert xattr with the same value
+		 * e.g. ntfs_save_wsl_perm
+		 */
+		if (val_size && le16_to_cpu(ea->elength) == val_size &&
+		    !memcmp(ea->name + ea->name_len + 1, value, val_size)) {
+			/* xattr already contains the required value. */
+			goto out;
+		}
+
+		/* Remove current xattr. */
+		if (ea->flags & FILE_NEED_EA)
+			le16_add_cpu(&ea_info.count, -1);
+
+		ea_sz = unpacked_ea_size(ea);
+
+		le16_add_cpu(&ea_info.size_pack, 0 - packed_ea_size(ea));
+
+		memmove(ea, Add2Ptr(ea, ea_sz), size - off - ea_sz);
+
+		size -= ea_sz;
+		memset(Add2Ptr(ea_all, size), 0, ea_sz);
+
+		ea_info.size = cpu_to_le32(size);
+
+		if ((flags & XATTR_REPLACE) && !val_size) {
+			/* Remove xattr. */
+			goto update_ea;
+		}
+	} else {
+		if (flags & XATTR_REPLACE) {
+			err = -ENODATA;
+			goto out;
+		}
+
+		if (!ea_all) {
+			ea_all = kzalloc(add, GFP_NOFS);
+			if (!ea_all) {
+				err = -ENOMEM;
+				goto out;
+			}
+		}
+	}
+
+	/* Append new xattr. */
+	new_ea = Add2Ptr(ea_all, size);
+	new_ea->size = cpu_to_le32(add);
+	new_ea->flags = 0;
+	new_ea->name_len = name_len;
+	new_ea->elength = cpu_to_le16(val_size);
+	memcpy(new_ea->name, name, name_len);
+	new_ea->name[name_len] = 0;
+	memcpy(new_ea->name + name_len + 1, value, val_size);
+	new_pack = le16_to_cpu(ea_info.size_pack) + packed_ea_size(new_ea);
+
+	/* Should fit into 16 bits. */
+	if (new_pack > 0xffff) {
+		err = -EFBIG; // -EINVAL?
+		goto out;
+	}
+	ea_info.size_pack = cpu_to_le16(new_pack);
+
+	/* New size of ATTR_EA. */
+	size += add;
+	if (size > sbi->ea_max_size) {
+		err = -EFBIG; // -EINVAL?
+		goto out;
+	}
+	ea_info.size = cpu_to_le32(size);
+
+update_ea:
+
+	if (!info) {
+		/* Create xattr. */
+		if (!size) {
+			err = 0;
+			goto out;
+		}
+
+		err = ni_insert_resident(ni, sizeof(struct EA_INFO),
+					 ATTR_EA_INFO, NULL, 0, NULL, NULL,
+					 NULL);
+		if (err)
+			goto out;
+
+		err = ni_insert_resident(ni, 0, ATTR_EA, NULL, 0, NULL, NULL,
+					 NULL);
+		if (err)
+			goto out;
+	}
+
+	new_sz = size;
+	err = attr_set_size(ni, ATTR_EA, NULL, 0, &ea_run, new_sz, &new_sz,
+			    false, NULL);
+	if (err)
+		goto out;
+
+	le = NULL;
+	attr = ni_find_attr(ni, NULL, &le, ATTR_EA_INFO, NULL, 0, NULL, &mi);
+	if (!attr) {
+		err = -EINVAL;
+		goto out;
+	}
+
+	if (!size) {
+		/* Delete xattr, ATTR_EA_INFO */
+		ni_remove_attr_le(ni, attr, mi, le);
+	} else {
+		p = resident_data_ex(attr, sizeof(struct EA_INFO));
+		if (!p) {
+			err = -EINVAL;
+			goto out;
+		}
+		memcpy(p, &ea_info, sizeof(struct EA_INFO));
+		mi->dirty = true;
+	}
+
+	le = NULL;
+	attr = ni_find_attr(ni, NULL, &le, ATTR_EA, NULL, 0, NULL, &mi);
+	if (!attr) {
+		err = -EINVAL;
+		goto out;
+	}
+
+	if (!size) {
+		/* Delete xattr, ATTR_EA */
+		ni_remove_attr_le(ni, attr, mi, le);
+	} else if (attr->non_res) {
+		err = ntfs_sb_write_run(sbi, &ea_run, 0, ea_all, size);
+		if (err)
+			goto out;
+	} else {
+		p = resident_data_ex(attr, size);
+		if (!p) {
+			err = -EINVAL;
+			goto out;
+		}
+		memcpy(p, ea_all, size);
+		mi->dirty = true;
+	}
+
+	/* Check if we delete the last xattr. */
+	if (size)
+		ni->ni_flags |= NI_FLAG_EA;
+	else
+		ni->ni_flags &= ~NI_FLAG_EA;
+
+	if (ea_info.size_pack != size_pack)
+		ni->ni_flags |= NI_FLAG_UPDATE_PARENT;
+	mark_inode_dirty(&ni->vfs_inode);
+
+out:
+	if (!locked)
+		ni_unlock(ni);
+
+	run_close(&ea_run);
+	kfree(ea_all);
+
+	return err;
+}
+
+#ifdef CONFIG_NTFS3_FS_POSIX_ACL
+static inline void ntfs_posix_acl_release(struct posix_acl *acl)
+{
+	if (acl && refcount_dec_and_test(&acl->a_refcount))
+		kfree(acl);
+}
+
+static struct posix_acl *ntfs_get_acl_ex(struct user_namespace *mnt_userns,
+					 struct inode *inode, int type,
+					 int locked)
+{
+	struct ntfs_inode *ni = ntfs_i(inode);
+	const char *name;
+	size_t name_len;
+	struct posix_acl *acl;
+	size_t req;
+	int err;
+	void *buf;
+
+	/* Allocate PATH_MAX bytes. */
+	buf = __getname();
+	if (!buf)
+		return ERR_PTR(-ENOMEM);
+
+	/* Possible values of 'type' was already checked above. */
+	if (type == ACL_TYPE_ACCESS) {
+		name = XATTR_NAME_POSIX_ACL_ACCESS;
+		name_len = sizeof(XATTR_NAME_POSIX_ACL_ACCESS) - 1;
+	} else {
+		name = XATTR_NAME_POSIX_ACL_DEFAULT;
+		name_len = sizeof(XATTR_NAME_POSIX_ACL_DEFAULT) - 1;
+	}
+
+	if (!locked)
+		ni_lock(ni);
+
+	err = ntfs_get_ea(inode, name, name_len, buf, PATH_MAX, &req);
+
+	if (!locked)
+		ni_unlock(ni);
+
+	/* Translate extended attribute to acl. */
+	if (err >= 0) {
+		acl = posix_acl_from_xattr(mnt_userns, buf, err);
+		if (!IS_ERR(acl))
+			set_cached_acl(inode, type, acl);
+	} else {
+		acl = err == -ENODATA ? NULL : ERR_PTR(err);
+	}
+
+	__putname(buf);
+
+	return acl;
+}
+
+/*
+ * ntfs_get_acl - inode_operations::get_acl
+ */
+struct posix_acl *ntfs_get_acl(struct inode *inode, int type, bool rcu)
+{
+	if (rcu)
+		return ERR_PTR(-ECHILD);
+
+	/* TODO: init_user_ns? */
+	return ntfs_get_acl_ex(&init_user_ns, inode, type, 0);
+}
+
+static noinline int ntfs_set_acl_ex(struct user_namespace *mnt_userns,
+				    struct inode *inode, struct posix_acl *acl,
+				    int type, int locked)
+{
+	const char *name;
+	size_t size, name_len;
+	void *value = NULL;
+	int err = 0;
+
+	if (S_ISLNK(inode->i_mode))
+		return -EOPNOTSUPP;
+
+	switch (type) {
+	case ACL_TYPE_ACCESS:
+		if (acl) {
+			umode_t mode = inode->i_mode;
+
+			err = posix_acl_equiv_mode(acl, &mode);
+			if (err < 0)
+				return err;
+
+			if (inode->i_mode != mode) {
+				inode->i_mode = mode;
+				mark_inode_dirty(inode);
+			}
+
+			if (!err) {
+				/*
+				 * ACL can be exactly represented in the
+				 * traditional file mode permission bits.
+				 */
+				acl = NULL;
+			}
+		}
+		name = XATTR_NAME_POSIX_ACL_ACCESS;
+		name_len = sizeof(XATTR_NAME_POSIX_ACL_ACCESS) - 1;
+		break;
+
+	case ACL_TYPE_DEFAULT:
+		if (!S_ISDIR(inode->i_mode))
+			return acl ? -EACCES : 0;
+		name = XATTR_NAME_POSIX_ACL_DEFAULT;
+		name_len = sizeof(XATTR_NAME_POSIX_ACL_DEFAULT) - 1;
+		break;
+
+	default:
+		return -EINVAL;
+	}
+
+	if (!acl) {
+		size = 0;
+		value = NULL;
+	} else {
+		size = posix_acl_xattr_size(acl->a_count);
+		value = kmalloc(size, GFP_NOFS);
+		if (!value)
+			return -ENOMEM;
+
+		err = posix_acl_to_xattr(mnt_userns, acl, value, size);
+		if (err < 0)
+			goto out;
+	}
+
+	err = ntfs_set_ea(inode, name, name_len, value, size, 0, locked);
+	if (!err)
+		set_cached_acl(inode, type, acl);
+
+out:
+	kfree(value);
+
+	return err;
+}
+
+/*
+ * ntfs_set_acl - inode_operations::set_acl
+ */
+int ntfs_set_acl(struct user_namespace *mnt_userns, struct inode *inode,
+		 struct posix_acl *acl, int type)
+{
+	return ntfs_set_acl_ex(mnt_userns, inode, acl, type, 0);
+}
+
+static int ntfs_xattr_get_acl(struct user_namespace *mnt_userns,
+			      struct inode *inode, int type, void *buffer,
+			      size_t size)
+{
+	struct posix_acl *acl;
+	int err;
+
+	if (!(inode->i_sb->s_flags & SB_POSIXACL)) {
+		ntfs_inode_warn(inode, "add mount option \"acl\" to use acl");
+		return -EOPNOTSUPP;
+	}
+
+	acl = ntfs_get_acl(inode, type, false);
+	if (IS_ERR(acl))
+		return PTR_ERR(acl);
+
+	if (!acl)
+		return -ENODATA;
+
+	err = posix_acl_to_xattr(mnt_userns, acl, buffer, size);
+	ntfs_posix_acl_release(acl);
+
+	return err;
+}
+
+static int ntfs_xattr_set_acl(struct user_namespace *mnt_userns,
+			      struct inode *inode, int type, const void *value,
+			      size_t size)
+{
+	struct posix_acl *acl;
+	int err;
+
+	if (!(inode->i_sb->s_flags & SB_POSIXACL)) {
+		ntfs_inode_warn(inode, "add mount option \"acl\" to use acl");
+		return -EOPNOTSUPP;
+	}
+
+	if (!inode_owner_or_capable(mnt_userns, inode))
+		return -EPERM;
+
+	if (!value) {
+		acl = NULL;
+	} else {
+		acl = posix_acl_from_xattr(mnt_userns, value, size);
+		if (IS_ERR(acl))
+			return PTR_ERR(acl);
+
+		if (acl) {
+			err = posix_acl_valid(mnt_userns, acl);
+			if (err)
+				goto release_and_out;
+		}
+	}
+
+	err = ntfs_set_acl(mnt_userns, inode, acl, type);
+
+release_and_out:
+	ntfs_posix_acl_release(acl);
+	return err;
+}
+
+/*
+ * ntfs_init_acl - Initialize the ACLs of a new inode.
+ *
+ * Called from ntfs_create_inode().
+ */
+int ntfs_init_acl(struct user_namespace *mnt_userns, struct inode *inode,
+		  struct inode *dir)
+{
+	struct posix_acl *default_acl, *acl;
+	int err;
+
+	/*
+	 * TODO: Refactoring lock.
+	 * ni_lock(dir) ... -> posix_acl_create(dir,...) -> ntfs_get_acl -> ni_lock(dir)
+	 */
+	inode->i_default_acl = NULL;
+
+	default_acl = ntfs_get_acl_ex(mnt_userns, dir, ACL_TYPE_DEFAULT, 1);
+
+	if (!default_acl || default_acl == ERR_PTR(-EOPNOTSUPP)) {
+		inode->i_mode &= ~current_umask();
+		err = 0;
+		goto out;
+	}
+
+	if (IS_ERR(default_acl)) {
+		err = PTR_ERR(default_acl);
+		goto out;
+	}
+
+	acl = default_acl;
+	err = __posix_acl_create(&acl, GFP_NOFS, &inode->i_mode);
+	if (err < 0)
+		goto out1;
+	if (!err) {
+		posix_acl_release(acl);
+		acl = NULL;
+	}
+
+	if (!S_ISDIR(inode->i_mode)) {
+		posix_acl_release(default_acl);
+		default_acl = NULL;
+	}
+
+	if (default_acl)
+		err = ntfs_set_acl_ex(mnt_userns, inode, default_acl,
+				      ACL_TYPE_DEFAULT, 1);
+
+	if (!acl)
+		inode->i_acl = NULL;
+	else if (!err)
+		err = ntfs_set_acl_ex(mnt_userns, inode, acl, ACL_TYPE_ACCESS,
+				      1);
+
+	posix_acl_release(acl);
+out1:
+	posix_acl_release(default_acl);
+
+out:
+	return err;
+}
+#endif
+
+/*
+ * ntfs_acl_chmod - Helper for ntfs3_setattr().
+ */
+int ntfs_acl_chmod(struct user_namespace *mnt_userns, struct inode *inode)
+{
+	struct super_block *sb = inode->i_sb;
+
+	if (!(sb->s_flags & SB_POSIXACL))
+		return 0;
+
+	if (S_ISLNK(inode->i_mode))
+		return -EOPNOTSUPP;
+
+	return posix_acl_chmod(mnt_userns, inode, inode->i_mode);
+}
+
+/*
+ * ntfs_permission - inode_operations::permission
+ */
+int ntfs_permission(struct user_namespace *mnt_userns, struct inode *inode,
+		    int mask)
+{
+	if (ntfs_sb(inode->i_sb)->options.no_acs_rules) {
+		/* "No access rules" mode - Allow all changes. */
+		return 0;
+	}
+
+	return generic_permission(mnt_userns, inode, mask);
+}
+
+/*
+ * ntfs_listxattr - inode_operations::listxattr
+ */
+ssize_t ntfs_listxattr(struct dentry *dentry, char *buffer, size_t size)
+{
+	struct inode *inode = d_inode(dentry);
+	struct ntfs_inode *ni = ntfs_i(inode);
+	ssize_t ret;
+
+	if (!(ni->ni_flags & NI_FLAG_EA)) {
+		/* no xattr in file */
+		return 0;
+	}
+
+	ni_lock(ni);
+
+	ret = ntfs_list_ea(ni, buffer, size);
+
+	ni_unlock(ni);
+
+	return ret;
+}
+
+static int ntfs_getxattr(const struct xattr_handler *handler, struct dentry *de,
+			 struct inode *inode, const char *name, void *buffer,
+			 size_t size)
+{
+	int err;
+	struct ntfs_inode *ni = ntfs_i(inode);
+	size_t name_len = strlen(name);
+
+	/* Dispatch request. */
+	if (name_len == sizeof(SYSTEM_DOS_ATTRIB) - 1 &&
+	    !memcmp(name, SYSTEM_DOS_ATTRIB, sizeof(SYSTEM_DOS_ATTRIB))) {
+		/* system.dos_attrib */
+		if (!buffer) {
+			err = sizeof(u8);
+		} else if (size < sizeof(u8)) {
+			err = -ENODATA;
+		} else {
+			err = sizeof(u8);
+			*(u8 *)buffer = le32_to_cpu(ni->std_fa);
+		}
+		goto out;
+	}
+
+	if (name_len == sizeof(SYSTEM_NTFS_ATTRIB) - 1 &&
+	    !memcmp(name, SYSTEM_NTFS_ATTRIB, sizeof(SYSTEM_NTFS_ATTRIB))) {
+		/* system.ntfs_attrib */
+		if (!buffer) {
+			err = sizeof(u32);
+		} else if (size < sizeof(u32)) {
+			err = -ENODATA;
+		} else {
+			err = sizeof(u32);
+			*(u32 *)buffer = le32_to_cpu(ni->std_fa);
+		}
+		goto out;
+	}
+
+	if (name_len == sizeof(SYSTEM_NTFS_SECURITY) - 1 &&
+	    !memcmp(name, SYSTEM_NTFS_SECURITY, sizeof(SYSTEM_NTFS_SECURITY))) {
+		/* system.ntfs_security*/
+		struct SECURITY_DESCRIPTOR_RELATIVE *sd = NULL;
+		size_t sd_size = 0;
+
+		if (!is_ntfs3(ni->mi.sbi)) {
+			/* We should get nt4 security. */
+			err = -EINVAL;
+			goto out;
+		} else if (le32_to_cpu(ni->std_security_id) <
+			   SECURITY_ID_FIRST) {
+			err = -ENOENT;
+			goto out;
+		}
+
+		err = ntfs_get_security_by_id(ni->mi.sbi, ni->std_security_id,
+					      &sd, &sd_size);
+		if (err)
+			goto out;
+
+		if (!is_sd_valid(sd, sd_size)) {
+			ntfs_inode_warn(
+				inode,
+				"looks like you get incorrect security descriptor id=%u",
+				ni->std_security_id);
+		}
+
+		if (!buffer) {
+			err = sd_size;
+		} else if (size < sd_size) {
+			err = -ENODATA;
+		} else {
+			err = sd_size;
+			memcpy(buffer, sd, sd_size);
+		}
+		kfree(sd);
+		goto out;
+	}
+
+#ifdef CONFIG_NTFS3_FS_POSIX_ACL
+	if ((name_len == sizeof(XATTR_NAME_POSIX_ACL_ACCESS) - 1 &&
+	     !memcmp(name, XATTR_NAME_POSIX_ACL_ACCESS,
+		     sizeof(XATTR_NAME_POSIX_ACL_ACCESS))) ||
+	    (name_len == sizeof(XATTR_NAME_POSIX_ACL_DEFAULT) - 1 &&
+	     !memcmp(name, XATTR_NAME_POSIX_ACL_DEFAULT,
+		     sizeof(XATTR_NAME_POSIX_ACL_DEFAULT)))) {
+		/* TODO: init_user_ns? */
+		err = ntfs_xattr_get_acl(
+			&init_user_ns, inode,
+			name_len == sizeof(XATTR_NAME_POSIX_ACL_ACCESS) - 1
+				? ACL_TYPE_ACCESS
+				: ACL_TYPE_DEFAULT,
+			buffer, size);
+		goto out;
+	}
+#endif
+	/* Deal with NTFS extended attribute. */
+	err = ntfs_get_ea(inode, name, name_len, buffer, size, NULL);
+
+out:
+	return err;
+}
+
+/*
+ * ntfs_setxattr - inode_operations::setxattr
+ */
+static noinline int ntfs_setxattr(const struct xattr_handler *handler,
+				  struct user_namespace *mnt_userns,
+				  struct dentry *de, struct inode *inode,
+				  const char *name, const void *value,
+				  size_t size, int flags)
+{
+	int err = -EINVAL;
+	struct ntfs_inode *ni = ntfs_i(inode);
+	size_t name_len = strlen(name);
+	enum FILE_ATTRIBUTE new_fa;
+
+	/* Dispatch request. */
+	if (name_len == sizeof(SYSTEM_DOS_ATTRIB) - 1 &&
+	    !memcmp(name, SYSTEM_DOS_ATTRIB, sizeof(SYSTEM_DOS_ATTRIB))) {
+		if (sizeof(u8) != size)
+			goto out;
+		new_fa = cpu_to_le32(*(u8 *)value);
+		goto set_new_fa;
+	}
+
+	if (name_len == sizeof(SYSTEM_NTFS_ATTRIB) - 1 &&
+	    !memcmp(name, SYSTEM_NTFS_ATTRIB, sizeof(SYSTEM_NTFS_ATTRIB))) {
+		if (size != sizeof(u32))
+			goto out;
+		new_fa = cpu_to_le32(*(u32 *)value);
+
+		if (S_ISREG(inode->i_mode)) {
+			/* Process compressed/sparsed in special way. */
+			ni_lock(ni);
+			err = ni_new_attr_flags(ni, new_fa);
+			ni_unlock(ni);
+			if (err)
+				goto out;
+		}
+set_new_fa:
+		/*
+		 * Thanks Mark Harmstone:
+		 * Keep directory bit consistency.
+		 */
+		if (S_ISDIR(inode->i_mode))
+			new_fa |= FILE_ATTRIBUTE_DIRECTORY;
+		else
+			new_fa &= ~FILE_ATTRIBUTE_DIRECTORY;
+
+		if (ni->std_fa != new_fa) {
+			ni->std_fa = new_fa;
+			if (new_fa & FILE_ATTRIBUTE_READONLY)
+				inode->i_mode &= ~0222;
+			else
+				inode->i_mode |= 0222;
+			/* Std attribute always in primary record. */
+			ni->mi.dirty = true;
+			mark_inode_dirty(inode);
+		}
+		err = 0;
+
+		goto out;
+	}
+
+	if (name_len == sizeof(SYSTEM_NTFS_SECURITY) - 1 &&
+	    !memcmp(name, SYSTEM_NTFS_SECURITY, sizeof(SYSTEM_NTFS_SECURITY))) {
+		/* system.ntfs_security*/
+		__le32 security_id;
+		bool inserted;
+		struct ATTR_STD_INFO5 *std;
+
+		if (!is_ntfs3(ni->mi.sbi)) {
+			/*
+			 * We should replace ATTR_SECURE.
+			 * Skip this way cause it is nt4 feature.
+			 */
+			err = -EINVAL;
+			goto out;
+		}
+
+		if (!is_sd_valid(value, size)) {
+			err = -EINVAL;
+			ntfs_inode_warn(
+				inode,
+				"you try to set invalid security descriptor");
+			goto out;
+		}
+
+		err = ntfs_insert_security(ni->mi.sbi, value, size,
+					   &security_id, &inserted);
+		if (err)
+			goto out;
+
+		ni_lock(ni);
+		std = ni_std5(ni);
+		if (!std) {
+			err = -EINVAL;
+		} else if (std->security_id != security_id) {
+			std->security_id = ni->std_security_id = security_id;
+			/* Std attribute always in primary record. */
+			ni->mi.dirty = true;
+			mark_inode_dirty(&ni->vfs_inode);
+		}
+		ni_unlock(ni);
+		goto out;
+	}
+
+#ifdef CONFIG_NTFS3_FS_POSIX_ACL
+	if ((name_len == sizeof(XATTR_NAME_POSIX_ACL_ACCESS) - 1 &&
+	     !memcmp(name, XATTR_NAME_POSIX_ACL_ACCESS,
+		     sizeof(XATTR_NAME_POSIX_ACL_ACCESS))) ||
+	    (name_len == sizeof(XATTR_NAME_POSIX_ACL_DEFAULT) - 1 &&
+	     !memcmp(name, XATTR_NAME_POSIX_ACL_DEFAULT,
+		     sizeof(XATTR_NAME_POSIX_ACL_DEFAULT)))) {
+		err = ntfs_xattr_set_acl(
+			mnt_userns, inode,
+			name_len == sizeof(XATTR_NAME_POSIX_ACL_ACCESS) - 1
+				? ACL_TYPE_ACCESS
+				: ACL_TYPE_DEFAULT,
+			value, size);
+		goto out;
+	}
+#endif
+	/* Deal with NTFS extended attribute. */
+	err = ntfs_set_ea(inode, name, name_len, value, size, flags, 0);
+
+out:
+	return err;
+}
+
+/*
+ * ntfs_save_wsl_perm
+ *
+ * save uid/gid/mode in xattr
+ */
+int ntfs_save_wsl_perm(struct inode *inode)
+{
+	int err;
+	__le32 value;
+
+	value = cpu_to_le32(i_uid_read(inode));
+	err = ntfs_set_ea(inode, "$LXUID", sizeof("$LXUID") - 1, &value,
+			  sizeof(value), 0, 0);
+	if (err)
+		goto out;
+
+	value = cpu_to_le32(i_gid_read(inode));
+	err = ntfs_set_ea(inode, "$LXGID", sizeof("$LXGID") - 1, &value,
+			  sizeof(value), 0, 0);
+	if (err)
+		goto out;
+
+	value = cpu_to_le32(inode->i_mode);
+	err = ntfs_set_ea(inode, "$LXMOD", sizeof("$LXMOD") - 1, &value,
+			  sizeof(value), 0, 0);
+	if (err)
+		goto out;
+
+	if (S_ISCHR(inode->i_mode) || S_ISBLK(inode->i_mode)) {
+		value = cpu_to_le32(inode->i_rdev);
+		err = ntfs_set_ea(inode, "$LXDEV", sizeof("$LXDEV") - 1, &value,
+				  sizeof(value), 0, 0);
+		if (err)
+			goto out;
+	}
+
+out:
+	/* In case of error should we delete all WSL xattr? */
+	return err;
+}
+
+/*
+ * ntfs_get_wsl_perm
+ *
+ * get uid/gid/mode from xattr
+ * it is called from ntfs_iget5->ntfs_read_mft
+ */
+void ntfs_get_wsl_perm(struct inode *inode)
+{
+	size_t sz;
+	__le32 value[3];
+
+	if (ntfs_get_ea(inode, "$LXUID", sizeof("$LXUID") - 1, &value[0],
+			sizeof(value[0]), &sz) == sizeof(value[0]) &&
+	    ntfs_get_ea(inode, "$LXGID", sizeof("$LXGID") - 1, &value[1],
+			sizeof(value[1]), &sz) == sizeof(value[1]) &&
+	    ntfs_get_ea(inode, "$LXMOD", sizeof("$LXMOD") - 1, &value[2],
+			sizeof(value[2]), &sz) == sizeof(value[2])) {
+		i_uid_write(inode, (uid_t)le32_to_cpu(value[0]));
+		i_gid_write(inode, (gid_t)le32_to_cpu(value[1]));
+		inode->i_mode = le32_to_cpu(value[2]);
+
+		if (ntfs_get_ea(inode, "$LXDEV", sizeof("$$LXDEV") - 1,
+				&value[0], sizeof(value),
+				&sz) == sizeof(value[0])) {
+			inode->i_rdev = le32_to_cpu(value[0]);
+		}
+	}
+}
+
+static bool ntfs_xattr_user_list(struct dentry *dentry)
+{
+	return true;
+}
+
+// clang-format off
+static const struct xattr_handler ntfs_xattr_handler = {
+	.prefix	= "",
+	.get	= ntfs_getxattr,
+	.set	= ntfs_setxattr,
+	.list	= ntfs_xattr_user_list,
+};
+
+const struct xattr_handler *ntfs_xattr_handlers[] = {
+	&ntfs_xattr_handler,
+	NULL,
+};
+// clang-format on
diff --git a/fs/ocfs2/acl.c b/fs/ocfs2/acl.c
index 5c72a7e6d6c5..23a72a423955 100644
--- a/fs/ocfs2/acl.c
+++ b/fs/ocfs2/acl.c
@@ -289,7 +289,7 @@ unlock:
 	return status;
 }
 
-struct posix_acl *ocfs2_iop_get_acl(struct inode *inode, int type)
+struct posix_acl *ocfs2_iop_get_acl(struct inode *inode, int type, bool rcu)
 {
 	struct ocfs2_super *osb;
 	struct buffer_head *di_bh = NULL;
@@ -297,6 +297,9 @@ struct posix_acl *ocfs2_iop_get_acl(struct inode *inode, int type)
 	int had_lock;
 	struct ocfs2_lock_holder oh;
 
+	if (rcu)
+		return ERR_PTR(-ECHILD);
+
 	osb = OCFS2_SB(inode->i_sb);
 	if (!(osb->s_mount_opt & OCFS2_MOUNT_POSIX_ACL))
 		return NULL;
diff --git a/fs/ocfs2/acl.h b/fs/ocfs2/acl.h
index f59d8d0a61fa..95a57c888ab6 100644
--- a/fs/ocfs2/acl.h
+++ b/fs/ocfs2/acl.h
@@ -16,7 +16,7 @@ struct ocfs2_acl_entry {
 	__le32 e_id;
 };
 
-struct posix_acl *ocfs2_iop_get_acl(struct inode *inode, int type);
+struct posix_acl *ocfs2_iop_get_acl(struct inode *inode, int type, bool rcu);
 int ocfs2_iop_set_acl(struct user_namespace *mnt_userns, struct inode *inode,
 		      struct posix_acl *acl, int type);
 extern int ocfs2_acl_chmod(struct inode *, struct buffer_head *);
diff --git a/fs/ocfs2/dlmglue.c b/fs/ocfs2/dlmglue.c
index 48fd369c29a4..359524b7341f 100644
--- a/fs/ocfs2/dlmglue.c
+++ b/fs/ocfs2/dlmglue.c
@@ -16,6 +16,7 @@
 #include <linux/debugfs.h>
 #include <linux/seq_file.h>
 #include <linux/time.h>
+#include <linux/delay.h>
 #include <linux/quotaops.h>
 #include <linux/sched/signal.h>
 
@@ -2721,7 +2722,7 @@ int ocfs2_inode_lock_tracker(struct inode *inode,
 			return status;
 		}
 	}
-	return tmp_oh ? 1 : 0;
+	return 1;
 }
 
 void ocfs2_inode_unlock_tracker(struct inode *inode,
@@ -3912,6 +3913,17 @@ downconvert:
 	spin_unlock_irqrestore(&lockres->l_lock, flags);
 	ret = ocfs2_downconvert_lock(osb, lockres, new_level, set_lvb,
 				     gen);
+	/* The dlm lock convert is being cancelled in background,
+	 * ocfs2_cancel_convert() is asynchronous in fs/dlm,
+	 * requeue it, try again later.
+	 */
+	if (ret == -EBUSY) {
+		ctl->requeue = 1;
+		mlog(ML_BASTS, "lockres %s, ReQ: Downconvert busy\n",
+		     lockres->l_name);
+		ret = 0;
+		msleep(20);
+	}
 
 leave:
 	if (ret)
diff --git a/fs/ocfs2/quota_global.c b/fs/ocfs2/quota_global.c
index eda83487c9ec..f033de733adb 100644
--- a/fs/ocfs2/quota_global.c
+++ b/fs/ocfs2/quota_global.c
@@ -357,7 +357,6 @@ int ocfs2_global_read_info(struct super_block *sb, int type)
 	}
 	oinfo->dqi_gi.dqi_sb = sb;
 	oinfo->dqi_gi.dqi_type = type;
-	ocfs2_qinfo_lock_res_init(&oinfo->dqi_gqlock, oinfo);
 	oinfo->dqi_gi.dqi_entry_size = sizeof(struct ocfs2_global_disk_dqblk);
 	oinfo->dqi_gi.dqi_ops = &ocfs2_global_ops;
 	oinfo->dqi_gqi_bh = NULL;
diff --git a/fs/ocfs2/quota_local.c b/fs/ocfs2/quota_local.c
index b1a8b046f4c2..0e4b16d4c037 100644
--- a/fs/ocfs2/quota_local.c
+++ b/fs/ocfs2/quota_local.c
@@ -702,6 +702,8 @@ static int ocfs2_local_read_info(struct super_block *sb, int type)
 	info->dqi_priv = oinfo;
 	oinfo->dqi_type = type;
 	INIT_LIST_HEAD(&oinfo->dqi_chunk);
+	oinfo->dqi_gqinode = NULL;
+	ocfs2_qinfo_lock_res_init(&oinfo->dqi_gqlock, oinfo);
 	oinfo->dqi_rec = NULL;
 	oinfo->dqi_lqi_bh = NULL;
 	oinfo->dqi_libh = NULL;
diff --git a/fs/orangefs/acl.c b/fs/orangefs/acl.c
index 18852b9ed82b..605e5a3506ec 100644
--- a/fs/orangefs/acl.c
+++ b/fs/orangefs/acl.c
@@ -10,12 +10,15 @@
 #include "orangefs-bufmap.h"
 #include <linux/posix_acl_xattr.h>
 
-struct posix_acl *orangefs_get_acl(struct inode *inode, int type)
+struct posix_acl *orangefs_get_acl(struct inode *inode, int type, bool rcu)
 {
 	struct posix_acl *acl;
 	int ret;
 	char *key = NULL, *value = NULL;
 
+	if (rcu)
+		return ERR_PTR(-ECHILD);
+
 	switch (type) {
 	case ACL_TYPE_ACCESS:
 		key = XATTR_NAME_POSIX_ACL_ACCESS;
diff --git a/fs/orangefs/inode.c b/fs/orangefs/inode.c
index 16ac617df7d7..c1bb4c4b5d67 100644
--- a/fs/orangefs/inode.c
+++ b/fs/orangefs/inode.c
@@ -882,12 +882,7 @@ int orangefs_getattr(struct user_namespace *mnt_userns, const struct path *path,
 		if (!(request_mask & STATX_SIZE))
 			stat->result_mask &= ~STATX_SIZE;
 
-		stat->attributes_mask = STATX_ATTR_IMMUTABLE |
-		    STATX_ATTR_APPEND;
-		if (inode->i_flags & S_IMMUTABLE)
-			stat->attributes |= STATX_ATTR_IMMUTABLE;
-		if (inode->i_flags & S_APPEND)
-			stat->attributes |= STATX_ATTR_APPEND;
+		generic_fill_statx_attr(inode, stat);
 	}
 	return ret;
 }
diff --git a/fs/orangefs/orangefs-kernel.h b/fs/orangefs/orangefs-kernel.h
index 0e6b97682e41..b5940ec1836a 100644
--- a/fs/orangefs/orangefs-kernel.h
+++ b/fs/orangefs/orangefs-kernel.h
@@ -106,7 +106,7 @@ enum orangefs_vfs_op_states {
 extern int orangefs_init_acl(struct inode *inode, struct inode *dir);
 extern const struct xattr_handler *orangefs_xattr_handlers[];
 
-extern struct posix_acl *orangefs_get_acl(struct inode *inode, int type);
+extern struct posix_acl *orangefs_get_acl(struct inode *inode, int type, bool rcu);
 extern int orangefs_set_acl(struct user_namespace *mnt_userns,
 			    struct inode *inode, struct posix_acl *acl,
 			    int type);
diff --git a/fs/overlayfs/copy_up.c b/fs/overlayfs/copy_up.c
index 2846b943e80c..4e7d5bfa2949 100644
--- a/fs/overlayfs/copy_up.c
+++ b/fs/overlayfs/copy_up.c
@@ -8,6 +8,7 @@
 #include <linux/fs.h>
 #include <linux/slab.h>
 #include <linux/file.h>
+#include <linux/fileattr.h>
 #include <linux/splice.h>
 #include <linux/xattr.h>
 #include <linux/security.h>
@@ -62,7 +63,7 @@ int ovl_copy_xattr(struct super_block *sb, struct dentry *old,
 		return list_size;
 	}
 
-	buf = kzalloc(list_size, GFP_KERNEL);
+	buf = kvzalloc(list_size, GFP_KERNEL);
 	if (!buf)
 		return -ENOMEM;
 
@@ -105,11 +106,12 @@ retry:
 		if (size > value_size) {
 			void *new;
 
-			new = krealloc(value, size, GFP_KERNEL);
+			new = kvmalloc(size, GFP_KERNEL);
 			if (!new) {
 				error = -ENOMEM;
 				break;
 			}
+			kvfree(value);
 			value = new;
 			value_size = size;
 			goto retry;
@@ -124,12 +126,50 @@ retry:
 			error = 0;
 		}
 	}
-	kfree(value);
+	kvfree(value);
 out:
-	kfree(buf);
+	kvfree(buf);
 	return error;
 }
 
+static int ovl_copy_fileattr(struct inode *inode, struct path *old,
+			     struct path *new)
+{
+	struct fileattr oldfa = { .flags_valid = true };
+	struct fileattr newfa = { .flags_valid = true };
+	int err;
+
+	err = ovl_real_fileattr_get(old, &oldfa);
+	if (err)
+		return err;
+
+	err = ovl_real_fileattr_get(new, &newfa);
+	if (err)
+		return err;
+
+	/*
+	 * We cannot set immutable and append-only flags on upper inode,
+	 * because we would not be able to link upper inode to upper dir
+	 * not set overlay private xattr on upper inode.
+	 * Store these flags in overlay.protattr xattr instead.
+	 */
+	if (oldfa.flags & OVL_PROT_FS_FLAGS_MASK) {
+		err = ovl_set_protattr(inode, new->dentry, &oldfa);
+		if (err)
+			return err;
+	}
+
+	BUILD_BUG_ON(OVL_COPY_FS_FLAGS_MASK & ~FS_COMMON_FL);
+	newfa.flags &= ~OVL_COPY_FS_FLAGS_MASK;
+	newfa.flags |= (oldfa.flags & OVL_COPY_FS_FLAGS_MASK);
+
+	BUILD_BUG_ON(OVL_COPY_FSX_FLAGS_MASK & ~FS_XFLAG_COMMON);
+	newfa.fsx_xflags &= ~OVL_COPY_FSX_FLAGS_MASK;
+	newfa.fsx_xflags |= (oldfa.fsx_xflags & OVL_COPY_FSX_FLAGS_MASK);
+
+	return ovl_real_fileattr_set(new, &newfa);
+}
+
 static int ovl_copy_up_data(struct ovl_fs *ofs, struct path *old,
 			    struct path *new, loff_t len)
 {
@@ -331,8 +371,8 @@ out_err:
 	return ERR_PTR(err);
 }
 
-int ovl_set_origin(struct ovl_fs *ofs, struct dentry *dentry,
-		   struct dentry *lower, struct dentry *upper)
+int ovl_set_origin(struct ovl_fs *ofs, struct dentry *lower,
+		   struct dentry *upper)
 {
 	const struct ovl_fh *fh = NULL;
 	int err;
@@ -351,7 +391,7 @@ int ovl_set_origin(struct ovl_fs *ofs, struct dentry *dentry,
 	/*
 	 * Do not fail when upper doesn't support xattrs.
 	 */
-	err = ovl_check_setxattr(dentry, upper, OVL_XATTR_ORIGIN, fh->buf,
+	err = ovl_check_setxattr(ofs, upper, OVL_XATTR_ORIGIN, fh->buf,
 				 fh ? fh->fb.len : 0, 0);
 	kfree(fh);
 
@@ -493,20 +533,21 @@ static int ovl_link_up(struct ovl_copy_up_ctx *c)
 static int ovl_copy_up_inode(struct ovl_copy_up_ctx *c, struct dentry *temp)
 {
 	struct ovl_fs *ofs = OVL_FS(c->dentry->d_sb);
+	struct inode *inode = d_inode(c->dentry);
+	struct path upperpath, datapath;
 	int err;
 
+	ovl_path_upper(c->dentry, &upperpath);
+	if (WARN_ON(upperpath.dentry != NULL))
+		return -EIO;
+
+	upperpath.dentry = temp;
+
 	/*
 	 * Copy up data first and then xattrs. Writing data after
 	 * xattrs will remove security.capability xattr automatically.
 	 */
 	if (S_ISREG(c->stat.mode) && !c->metacopy) {
-		struct path upperpath, datapath;
-
-		ovl_path_upper(c->dentry, &upperpath);
-		if (WARN_ON(upperpath.dentry != NULL))
-			return -EIO;
-		upperpath.dentry = temp;
-
 		ovl_path_lowerdata(c->dentry, &datapath);
 		err = ovl_copy_up_data(ofs, &datapath, &upperpath,
 				       c->stat.size);
@@ -518,6 +559,16 @@ static int ovl_copy_up_inode(struct ovl_copy_up_ctx *c, struct dentry *temp)
 	if (err)
 		return err;
 
+	if (inode->i_flags & OVL_COPY_I_FLAGS_MASK) {
+		/*
+		 * Copy the fileattr inode flags that are the source of already
+		 * copied i_flags
+		 */
+		err = ovl_copy_fileattr(inode, &c->lowerpath, &upperpath);
+		if (err)
+			return err;
+	}
+
 	/*
 	 * Store identifier of lower inode in upper inode xattr to
 	 * allow lookup of the copy up origin inode.
@@ -526,13 +577,13 @@ static int ovl_copy_up_inode(struct ovl_copy_up_ctx *c, struct dentry *temp)
 	 * hard link.
 	 */
 	if (c->origin) {
-		err = ovl_set_origin(ofs, c->dentry, c->lowerpath.dentry, temp);
+		err = ovl_set_origin(ofs, c->lowerpath.dentry, temp);
 		if (err)
 			return err;
 	}
 
 	if (c->metacopy) {
-		err = ovl_check_setxattr(c->dentry, temp, OVL_XATTR_METACOPY,
+		err = ovl_check_setxattr(ofs, temp, OVL_XATTR_METACOPY,
 					 NULL, 0, -EOPNOTSUPP);
 		if (err)
 			return err;
diff --git a/fs/overlayfs/dir.c b/fs/overlayfs/dir.c
index 93efe7048a77..1fefb2b8960e 100644
--- a/fs/overlayfs/dir.c
+++ b/fs/overlayfs/dir.c
@@ -233,9 +233,10 @@ struct dentry *ovl_create_temp(struct dentry *workdir, struct ovl_cattr *attr)
 static int ovl_set_opaque_xerr(struct dentry *dentry, struct dentry *upper,
 			       int xerr)
 {
+	struct ovl_fs *ofs = OVL_FS(dentry->d_sb);
 	int err;
 
-	err = ovl_check_setxattr(dentry, upper, OVL_XATTR_OPAQUE, "y", 1, xerr);
+	err = ovl_check_setxattr(ofs, upper, OVL_XATTR_OPAQUE, "y", 1, xerr);
 	if (!err)
 		ovl_dentry_set_opaque(dentry);
 
@@ -320,6 +321,7 @@ static bool ovl_type_origin(struct dentry *dentry)
 static int ovl_create_upper(struct dentry *dentry, struct inode *inode,
 			    struct ovl_cattr *attr)
 {
+	struct ovl_fs *ofs = OVL_FS(dentry->d_sb);
 	struct dentry *upperdir = ovl_dentry_upper(dentry->d_parent);
 	struct inode *udir = upperdir->d_inode;
 	struct dentry *newdentry;
@@ -338,7 +340,8 @@ static int ovl_create_upper(struct dentry *dentry, struct inode *inode,
 	if (IS_ERR(newdentry))
 		goto out_unlock;
 
-	if (ovl_type_merge(dentry->d_parent) && d_is_dir(newdentry)) {
+	if (ovl_type_merge(dentry->d_parent) && d_is_dir(newdentry) &&
+	    !ovl_allow_offline_changes(ofs)) {
 		/* Setting opaque here is just an optimization, allow to fail */
 		ovl_set_opaque(dentry, newdentry);
 	}
@@ -542,8 +545,10 @@ static int ovl_create_over_whiteout(struct dentry *dentry, struct inode *inode,
 			goto out_cleanup;
 	}
 	err = ovl_instantiate(dentry, inode, newdentry, hardlink);
-	if (err)
-		goto out_cleanup;
+	if (err) {
+		ovl_cleanup(udir, newdentry);
+		dput(newdentry);
+	}
 out_dput:
 	dput(upper);
 out_unlock:
@@ -1043,6 +1048,7 @@ static bool ovl_need_absolute_redirect(struct dentry *dentry, bool samedir)
 static int ovl_set_redirect(struct dentry *dentry, bool samedir)
 {
 	int err;
+	struct ovl_fs *ofs = OVL_FS(dentry->d_sb);
 	const char *redirect = ovl_dentry_get_redirect(dentry);
 	bool absolute_redirect = ovl_need_absolute_redirect(dentry, samedir);
 
@@ -1053,7 +1059,7 @@ static int ovl_set_redirect(struct dentry *dentry, bool samedir)
 	if (IS_ERR(redirect))
 		return PTR_ERR(redirect);
 
-	err = ovl_check_setxattr(dentry, ovl_dentry_upper(dentry),
+	err = ovl_check_setxattr(ofs, ovl_dentry_upper(dentry),
 				 OVL_XATTR_REDIRECT,
 				 redirect, strlen(redirect), -EXDEV);
 	if (!err) {
diff --git a/fs/overlayfs/inode.c b/fs/overlayfs/inode.c
index 5e828a1c98a8..832b17589733 100644
--- a/fs/overlayfs/inode.c
+++ b/fs/overlayfs/inode.c
@@ -13,6 +13,7 @@
 #include <linux/fiemap.h>
 #include <linux/fileattr.h>
 #include <linux/security.h>
+#include <linux/namei.h>
 #include "overlayfs.h"
 
 
@@ -33,12 +34,6 @@ int ovl_setattr(struct user_namespace *mnt_userns, struct dentry *dentry,
 		goto out;
 
 	if (attr->ia_valid & ATTR_SIZE) {
-		struct inode *realinode = d_inode(ovl_dentry_real(dentry));
-
-		err = -ETXTBSY;
-		if (atomic_read(&realinode->i_writecount) < 0)
-			goto out_drop_write;
-
 		/* Truncate should trigger data copy up as well */
 		full_copy_up = true;
 	}
@@ -162,7 +157,8 @@ int ovl_getattr(struct user_namespace *mnt_userns, const struct path *path,
 	enum ovl_path_type type;
 	struct path realpath;
 	const struct cred *old_cred;
-	bool is_dir = S_ISDIR(dentry->d_inode->i_mode);
+	struct inode *inode = d_inode(dentry);
+	bool is_dir = S_ISDIR(inode->i_mode);
 	int fsid = 0;
 	int err;
 	bool metacopy_blocks = false;
@@ -175,6 +171,9 @@ int ovl_getattr(struct user_namespace *mnt_userns, const struct path *path,
 	if (err)
 		goto out;
 
+	/* Report the effective immutable/append-only STATX flags */
+	generic_fill_statx_attr(inode, stat);
+
 	/*
 	 * For non-dir or same fs, we use st_ino of the copy up origin.
 	 * This guaranties constant st_dev/st_ino across copy up.
@@ -448,7 +447,7 @@ ssize_t ovl_listxattr(struct dentry *dentry, char *list, size_t size)
 	return res;
 }
 
-struct posix_acl *ovl_get_acl(struct inode *inode, int type)
+struct posix_acl *ovl_get_acl(struct inode *inode, int type, bool rcu)
 {
 	struct inode *realinode = ovl_inode_real(inode);
 	const struct cred *old_cred;
@@ -457,6 +456,9 @@ struct posix_acl *ovl_get_acl(struct inode *inode, int type)
 	if (!IS_ENABLED(CONFIG_FS_POSIX_ACL) || !IS_POSIXACL(realinode))
 		return NULL;
 
+	if (rcu)
+		return get_cached_acl_rcu(realinode, type);
+
 	old_cred = ovl_override_creds(inode->i_sb);
 	acl = get_acl(realinode, type);
 	revert_creds(old_cred);
@@ -503,16 +505,14 @@ static int ovl_fiemap(struct inode *inode, struct fiemap_extent_info *fieinfo,
  * Introducing security_inode_fileattr_get/set() hooks would solve this issue
  * properly.
  */
-static int ovl_security_fileattr(struct dentry *dentry, struct fileattr *fa,
+static int ovl_security_fileattr(struct path *realpath, struct fileattr *fa,
 				 bool set)
 {
-	struct path realpath;
 	struct file *file;
 	unsigned int cmd;
 	int err;
 
-	ovl_path_real(dentry, &realpath);
-	file = dentry_open(&realpath, O_RDONLY, current_cred());
+	file = dentry_open(realpath, O_RDONLY, current_cred());
 	if (IS_ERR(file))
 		return PTR_ERR(file);
 
@@ -527,12 +527,24 @@ static int ovl_security_fileattr(struct dentry *dentry, struct fileattr *fa,
 	return err;
 }
 
+int ovl_real_fileattr_set(struct path *realpath, struct fileattr *fa)
+{
+	int err;
+
+	err = ovl_security_fileattr(realpath, fa, true);
+	if (err)
+		return err;
+
+	return vfs_fileattr_set(&init_user_ns, realpath->dentry, fa);
+}
+
 int ovl_fileattr_set(struct user_namespace *mnt_userns,
 		     struct dentry *dentry, struct fileattr *fa)
 {
 	struct inode *inode = d_inode(dentry);
-	struct dentry *upperdentry;
+	struct path upperpath;
 	const struct cred *old_cred;
+	unsigned int flags;
 	int err;
 
 	err = ovl_want_write(dentry);
@@ -541,31 +553,78 @@ int ovl_fileattr_set(struct user_namespace *mnt_userns,
 
 	err = ovl_copy_up(dentry);
 	if (!err) {
-		upperdentry = ovl_dentry_upper(dentry);
+		ovl_path_real(dentry, &upperpath);
 
 		old_cred = ovl_override_creds(inode->i_sb);
-		err = ovl_security_fileattr(dentry, fa, true);
+		/*
+		 * Store immutable/append-only flags in xattr and clear them
+		 * in upper fileattr (in case they were set by older kernel)
+		 * so children of "ovl-immutable" directories lower aliases of
+		 * "ovl-immutable" hardlinks could be copied up.
+		 * Clear xattr when flags are cleared.
+		 */
+		err = ovl_set_protattr(inode, upperpath.dentry, fa);
 		if (!err)
-			err = vfs_fileattr_set(&init_user_ns, upperdentry, fa);
+			err = ovl_real_fileattr_set(&upperpath, fa);
 		revert_creds(old_cred);
-		ovl_copyflags(ovl_inode_real(inode), inode);
+
+		/*
+		 * Merge real inode flags with inode flags read from
+		 * overlay.protattr xattr
+		 */
+		flags = ovl_inode_real(inode)->i_flags & OVL_COPY_I_FLAGS_MASK;
+
+		BUILD_BUG_ON(OVL_PROT_I_FLAGS_MASK & ~OVL_COPY_I_FLAGS_MASK);
+		flags |= inode->i_flags & OVL_PROT_I_FLAGS_MASK;
+		inode_set_flags(inode, flags, OVL_COPY_I_FLAGS_MASK);
+
+		/* Update ctime */
+		ovl_copyattr(ovl_inode_real(inode), inode);
 	}
 	ovl_drop_write(dentry);
 out:
 	return err;
 }
 
+/* Convert inode protection flags to fileattr flags */
+static void ovl_fileattr_prot_flags(struct inode *inode, struct fileattr *fa)
+{
+	BUILD_BUG_ON(OVL_PROT_FS_FLAGS_MASK & ~FS_COMMON_FL);
+	BUILD_BUG_ON(OVL_PROT_FSX_FLAGS_MASK & ~FS_XFLAG_COMMON);
+
+	if (inode->i_flags & S_APPEND) {
+		fa->flags |= FS_APPEND_FL;
+		fa->fsx_xflags |= FS_XFLAG_APPEND;
+	}
+	if (inode->i_flags & S_IMMUTABLE) {
+		fa->flags |= FS_IMMUTABLE_FL;
+		fa->fsx_xflags |= FS_XFLAG_IMMUTABLE;
+	}
+}
+
+int ovl_real_fileattr_get(struct path *realpath, struct fileattr *fa)
+{
+	int err;
+
+	err = ovl_security_fileattr(realpath, fa, false);
+	if (err)
+		return err;
+
+	return vfs_fileattr_get(realpath->dentry, fa);
+}
+
 int ovl_fileattr_get(struct dentry *dentry, struct fileattr *fa)
 {
 	struct inode *inode = d_inode(dentry);
-	struct dentry *realdentry = ovl_dentry_real(dentry);
+	struct path realpath;
 	const struct cred *old_cred;
 	int err;
 
+	ovl_path_real(dentry, &realpath);
+
 	old_cred = ovl_override_creds(inode->i_sb);
-	err = ovl_security_fileattr(dentry, fa, false);
-	if (!err)
-		err = vfs_fileattr_get(realdentry, fa);
+	err = ovl_real_fileattr_get(&realpath, fa);
+	ovl_fileattr_prot_flags(inode, fa);
 	revert_creds(old_cred);
 
 	return err;
@@ -1118,6 +1177,10 @@ struct inode *ovl_get_inode(struct super_block *sb,
 		}
 	}
 
+	/* Check for immutable/append-only inode flags in xattr */
+	if (upperdentry)
+		ovl_check_protattr(inode, upperdentry);
+
 	if (inode->i_state & I_NEW)
 		unlock_new_inode(inode);
 out:
diff --git a/fs/overlayfs/namei.c b/fs/overlayfs/namei.c
index 210cd6f66e28..1a9b515fc45d 100644
--- a/fs/overlayfs/namei.c
+++ b/fs/overlayfs/namei.c
@@ -392,7 +392,7 @@ invalid:
 			    upperdentry, d_inode(upperdentry)->i_mode & S_IFMT,
 			    d_inode(origin)->i_mode & S_IFMT);
 	dput(origin);
-	return -EIO;
+	return -ESTALE;
 }
 
 static int ovl_check_origin(struct ovl_fs *ofs, struct dentry *upperdentry,
@@ -811,7 +811,7 @@ static int ovl_fix_origin(struct ovl_fs *ofs, struct dentry *dentry,
 	if (err)
 		return err;
 
-	err = ovl_set_origin(ofs, dentry, lower, upper);
+	err = ovl_set_origin(ofs, lower, upper);
 	if (!err)
 		err = ovl_set_impure(dentry->d_parent, upper->d_parent);
 
diff --git a/fs/overlayfs/overlayfs.h b/fs/overlayfs/overlayfs.h
index 6ec73db4bf9e..3894f3347955 100644
--- a/fs/overlayfs/overlayfs.h
+++ b/fs/overlayfs/overlayfs.h
@@ -34,6 +34,7 @@ enum ovl_xattr {
 	OVL_XATTR_NLINK,
 	OVL_XATTR_UPPER,
 	OVL_XATTR_METACOPY,
+	OVL_XATTR_PROTATTR,
 };
 
 enum ovl_inode_flag {
@@ -262,6 +263,18 @@ static inline bool ovl_open_flags_need_copy_up(int flags)
 	return ((OPEN_FMODE(flags) & FMODE_WRITE) || (flags & O_TRUNC));
 }
 
+static inline bool ovl_allow_offline_changes(struct ovl_fs *ofs)
+{
+	/*
+	 * To avoid regressions in existing setups with overlay lower offline
+	 * changes, we allow lower changes only if none of the new features
+	 * are used.
+	 */
+	return (!ofs->config.index && !ofs->config.metacopy &&
+		!ofs->config.redirect_dir && ofs->config.xino != OVL_XINO_ON);
+}
+
+
 /* util.c */
 int ovl_want_write(struct dentry *dentry);
 void ovl_drop_write(struct dentry *dentry);
@@ -320,7 +333,7 @@ bool ovl_already_copied_up(struct dentry *dentry, int flags);
 bool ovl_check_origin_xattr(struct ovl_fs *ofs, struct dentry *dentry);
 bool ovl_check_dir_xattr(struct super_block *sb, struct dentry *dentry,
 			 enum ovl_xattr ox);
-int ovl_check_setxattr(struct dentry *dentry, struct dentry *upperdentry,
+int ovl_check_setxattr(struct ovl_fs *ofs, struct dentry *upperdentry,
 		       enum ovl_xattr ox, const void *value, size_t size,
 		       int xerr);
 int ovl_set_impure(struct dentry *dentry, struct dentry *upperdentry);
@@ -485,7 +498,7 @@ int ovl_xattr_set(struct dentry *dentry, struct inode *inode, const char *name,
 int ovl_xattr_get(struct dentry *dentry, struct inode *inode, const char *name,
 		  void *value, size_t size);
 ssize_t ovl_listxattr(struct dentry *dentry, char *list, size_t size);
-struct posix_acl *ovl_get_acl(struct inode *inode, int type);
+struct posix_acl *ovl_get_acl(struct inode *inode, int type, bool rcu);
 int ovl_update_time(struct inode *inode, struct timespec64 *ts, int flags);
 bool ovl_is_private_xattr(struct super_block *sb, const char *name);
 
@@ -518,9 +531,28 @@ static inline void ovl_copyattr(struct inode *from, struct inode *to)
 	i_size_write(to, i_size_read(from));
 }
 
+/* vfs inode flags copied from real to ovl inode */
+#define OVL_COPY_I_FLAGS_MASK	(S_SYNC | S_NOATIME | S_APPEND | S_IMMUTABLE)
+/* vfs inode flags read from overlay.protattr xattr to ovl inode */
+#define OVL_PROT_I_FLAGS_MASK	(S_APPEND | S_IMMUTABLE)
+
+/*
+ * fileattr flags copied from lower to upper inode on copy up.
+ * We cannot copy up immutable/append-only flags, because that would prevent
+ * linking temp inode to upper dir, so we store them in xattr instead.
+ */
+#define OVL_COPY_FS_FLAGS_MASK	(FS_SYNC_FL | FS_NOATIME_FL)
+#define OVL_COPY_FSX_FLAGS_MASK	(FS_XFLAG_SYNC | FS_XFLAG_NOATIME)
+#define OVL_PROT_FS_FLAGS_MASK  (FS_APPEND_FL | FS_IMMUTABLE_FL)
+#define OVL_PROT_FSX_FLAGS_MASK (FS_XFLAG_APPEND | FS_XFLAG_IMMUTABLE)
+
+void ovl_check_protattr(struct inode *inode, struct dentry *upper);
+int ovl_set_protattr(struct inode *inode, struct dentry *upper,
+		      struct fileattr *fa);
+
 static inline void ovl_copyflags(struct inode *from, struct inode *to)
 {
-	unsigned int mask = S_SYNC | S_IMMUTABLE | S_APPEND | S_NOATIME;
+	unsigned int mask = OVL_COPY_I_FLAGS_MASK;
 
 	inode_set_flags(to, from->i_flags & mask, mask);
 }
@@ -548,6 +580,8 @@ struct dentry *ovl_create_temp(struct dentry *workdir, struct ovl_cattr *attr);
 extern const struct file_operations ovl_file_operations;
 int __init ovl_aio_request_cache_init(void);
 void ovl_aio_request_cache_destroy(void);
+int ovl_real_fileattr_get(struct path *realpath, struct fileattr *fa);
+int ovl_real_fileattr_set(struct path *realpath, struct fileattr *fa);
 int ovl_fileattr_get(struct dentry *dentry, struct fileattr *fa);
 int ovl_fileattr_set(struct user_namespace *mnt_userns,
 		     struct dentry *dentry, struct fileattr *fa);
@@ -561,8 +595,8 @@ int ovl_copy_xattr(struct super_block *sb, struct dentry *old,
 int ovl_set_attr(struct dentry *upper, struct kstat *stat);
 struct ovl_fh *ovl_encode_real_fh(struct ovl_fs *ofs, struct dentry *real,
 				  bool is_upper);
-int ovl_set_origin(struct ovl_fs *ofs, struct dentry *dentry,
-		   struct dentry *lower, struct dentry *upper);
+int ovl_set_origin(struct ovl_fs *ofs, struct dentry *lower,
+		   struct dentry *upper);
 
 /* export.c */
 extern const struct export_operations ovl_export_operations;
diff --git a/fs/overlayfs/super.c b/fs/overlayfs/super.c
index b01d4147520d..178daa5e82c9 100644
--- a/fs/overlayfs/super.c
+++ b/fs/overlayfs/super.c
@@ -1599,9 +1599,7 @@ static bool ovl_lower_uuid_ok(struct ovl_fs *ofs, const uuid_t *uuid)
 	 * user opted-in to one of the new features that require following the
 	 * lower inode of non-dir upper.
 	 */
-	if (!ofs->config.index && !ofs->config.metacopy &&
-	    ofs->config.xino != OVL_XINO_ON &&
-	    uuid_is_null(uuid))
+	if (ovl_allow_offline_changes(ofs) && uuid_is_null(uuid))
 		return false;
 
 	for (i = 0; i < ofs->numfs; i++) {
diff --git a/fs/overlayfs/util.c b/fs/overlayfs/util.c
index b9d03627f364..f48284a2a896 100644
--- a/fs/overlayfs/util.c
+++ b/fs/overlayfs/util.c
@@ -10,6 +10,7 @@
 #include <linux/cred.h>
 #include <linux/xattr.h>
 #include <linux/exportfs.h>
+#include <linux/fileattr.h>
 #include <linux/uuid.h>
 #include <linux/namei.h>
 #include <linux/ratelimit.h>
@@ -585,6 +586,7 @@ bool ovl_check_dir_xattr(struct super_block *sb, struct dentry *dentry,
 #define OVL_XATTR_NLINK_POSTFIX		"nlink"
 #define OVL_XATTR_UPPER_POSTFIX		"upper"
 #define OVL_XATTR_METACOPY_POSTFIX	"metacopy"
+#define OVL_XATTR_PROTATTR_POSTFIX	"protattr"
 
 #define OVL_XATTR_TAB_ENTRY(x) \
 	[x] = { [false] = OVL_XATTR_TRUSTED_PREFIX x ## _POSTFIX, \
@@ -598,14 +600,14 @@ const char *const ovl_xattr_table[][2] = {
 	OVL_XATTR_TAB_ENTRY(OVL_XATTR_NLINK),
 	OVL_XATTR_TAB_ENTRY(OVL_XATTR_UPPER),
 	OVL_XATTR_TAB_ENTRY(OVL_XATTR_METACOPY),
+	OVL_XATTR_TAB_ENTRY(OVL_XATTR_PROTATTR),
 };
 
-int ovl_check_setxattr(struct dentry *dentry, struct dentry *upperdentry,
+int ovl_check_setxattr(struct ovl_fs *ofs, struct dentry *upperdentry,
 		       enum ovl_xattr ox, const void *value, size_t size,
 		       int xerr)
 {
 	int err;
-	struct ovl_fs *ofs = dentry->d_sb->s_fs_info;
 
 	if (ofs->noxattr)
 		return xerr;
@@ -623,6 +625,7 @@ int ovl_check_setxattr(struct dentry *dentry, struct dentry *upperdentry,
 
 int ovl_set_impure(struct dentry *dentry, struct dentry *upperdentry)
 {
+	struct ovl_fs *ofs = OVL_FS(dentry->d_sb);
 	int err;
 
 	if (ovl_test_flag(OVL_IMPURE, d_inode(dentry)))
@@ -632,14 +635,95 @@ int ovl_set_impure(struct dentry *dentry, struct dentry *upperdentry)
 	 * Do not fail when upper doesn't support xattrs.
 	 * Upper inodes won't have origin nor redirect xattr anyway.
 	 */
-	err = ovl_check_setxattr(dentry, upperdentry, OVL_XATTR_IMPURE,
-				 "y", 1, 0);
+	err = ovl_check_setxattr(ofs, upperdentry, OVL_XATTR_IMPURE, "y", 1, 0);
 	if (!err)
 		ovl_set_flag(OVL_IMPURE, d_inode(dentry));
 
 	return err;
 }
 
+
+#define OVL_PROTATTR_MAX 32 /* Reserved for future flags */
+
+void ovl_check_protattr(struct inode *inode, struct dentry *upper)
+{
+	struct ovl_fs *ofs = OVL_FS(inode->i_sb);
+	u32 iflags = inode->i_flags & OVL_PROT_I_FLAGS_MASK;
+	char buf[OVL_PROTATTR_MAX+1];
+	int res, n;
+
+	res = ovl_do_getxattr(ofs, upper, OVL_XATTR_PROTATTR, buf,
+			      OVL_PROTATTR_MAX);
+	if (res < 0)
+		return;
+
+	/*
+	 * Initialize inode flags from overlay.protattr xattr and upper inode
+	 * flags.  If upper inode has those fileattr flags set (i.e. from old
+	 * kernel), we do not clear them on ovl_get_inode(), but we will clear
+	 * them on next fileattr_set().
+	 */
+	for (n = 0; n < res; n++) {
+		if (buf[n] == 'a')
+			iflags |= S_APPEND;
+		else if (buf[n] == 'i')
+			iflags |= S_IMMUTABLE;
+		else
+			break;
+	}
+
+	if (!res || n < res) {
+		pr_warn_ratelimited("incompatible overlay.protattr format (%pd2, len=%d)\n",
+				    upper, res);
+	} else {
+		inode_set_flags(inode, iflags, OVL_PROT_I_FLAGS_MASK);
+	}
+}
+
+int ovl_set_protattr(struct inode *inode, struct dentry *upper,
+		      struct fileattr *fa)
+{
+	struct ovl_fs *ofs = OVL_FS(inode->i_sb);
+	char buf[OVL_PROTATTR_MAX];
+	int len = 0, err = 0;
+	u32 iflags = 0;
+
+	BUILD_BUG_ON(HWEIGHT32(OVL_PROT_FS_FLAGS_MASK) > OVL_PROTATTR_MAX);
+
+	if (fa->flags & FS_APPEND_FL) {
+		buf[len++] = 'a';
+		iflags |= S_APPEND;
+	}
+	if (fa->flags & FS_IMMUTABLE_FL) {
+		buf[len++] = 'i';
+		iflags |= S_IMMUTABLE;
+	}
+
+	/*
+	 * Do not allow to set protection flags when upper doesn't support
+	 * xattrs, because we do not set those fileattr flags on upper inode.
+	 * Remove xattr if it exist and all protection flags are cleared.
+	 */
+	if (len) {
+		err = ovl_check_setxattr(ofs, upper, OVL_XATTR_PROTATTR,
+					 buf, len, -EPERM);
+	} else if (inode->i_flags & OVL_PROT_I_FLAGS_MASK) {
+		err = ovl_do_removexattr(ofs, upper, OVL_XATTR_PROTATTR);
+		if (err == -EOPNOTSUPP || err == -ENODATA)
+			err = 0;
+	}
+	if (err)
+		return err;
+
+	inode_set_flags(inode, iflags, OVL_PROT_I_FLAGS_MASK);
+
+	/* Mask out the fileattr flags that should not be set in upper inode */
+	fa->flags &= ~OVL_PROT_FS_FLAGS_MASK;
+	fa->fsx_xflags &= ~OVL_PROT_FSX_FLAGS_MASK;
+
+	return 0;
+}
+
 /**
  * Caller must hold a reference to inode to prevent it from being freed while
  * it is marked inuse.
diff --git a/fs/posix_acl.c b/fs/posix_acl.c
index f3309a7edb49..f5c25f580dd9 100644
--- a/fs/posix_acl.c
+++ b/fs/posix_acl.c
@@ -22,6 +22,7 @@
 #include <linux/xattr.h>
 #include <linux/export.h>
 #include <linux/user_namespace.h>
+#include <linux/namei.h>
 
 static struct posix_acl **acl_by_type(struct inode *inode, int type)
 {
@@ -56,7 +57,17 @@ EXPORT_SYMBOL(get_cached_acl);
 
 struct posix_acl *get_cached_acl_rcu(struct inode *inode, int type)
 {
-	return rcu_dereference(*acl_by_type(inode, type));
+	struct posix_acl *acl = rcu_dereference(*acl_by_type(inode, type));
+
+	if (acl == ACL_DONT_CACHE) {
+		struct posix_acl *ret;
+
+		ret = inode->i_op->get_acl(inode, type, LOOKUP_RCU);
+		if (!IS_ERR(ret))
+			acl = ret;
+	}
+
+	return acl;
 }
 EXPORT_SYMBOL(get_cached_acl_rcu);
 
@@ -138,7 +149,7 @@ struct posix_acl *get_acl(struct inode *inode, int type)
 		set_cached_acl(inode, type, NULL);
 		return NULL;
 	}
-	acl = inode->i_op->get_acl(inode, type);
+	acl = inode->i_op->get_acl(inode, type, false);
 
 	if (IS_ERR(acl)) {
 		/*
diff --git a/fs/proc/array.c b/fs/proc/array.c
index ee0ce8cecc4a..49be8c8ef555 100644
--- a/fs/proc/array.c
+++ b/fs/proc/array.c
@@ -98,27 +98,17 @@
 
 void proc_task_name(struct seq_file *m, struct task_struct *p, bool escape)
 {
-	char *buf;
-	size_t size;
 	char tcomm[64];
-	int ret;
 
 	if (p->flags & PF_WQ_WORKER)
 		wq_worker_comm(tcomm, sizeof(tcomm), p);
 	else
 		__get_task_comm(tcomm, sizeof(tcomm), p);
 
-	size = seq_get_buf(m, &buf);
-	if (escape) {
-		ret = string_escape_str(tcomm, buf, size,
-					ESCAPE_SPACE | ESCAPE_SPECIAL, "\n\\");
-		if (ret >= size)
-			ret = -1;
-	} else {
-		ret = strscpy(buf, tcomm, size);
-	}
-
-	seq_commit(m, ret);
+	if (escape)
+		seq_escape_str(m, tcomm, ESCAPE_SPACE | ESCAPE_SPECIAL, "\n\\");
+	else
+		seq_printf(m, "%.64s", tcomm);
 }
 
 /*
diff --git a/fs/proc/base.c b/fs/proc/base.c
index e5b5f7709d48..533d5836eb9a 100644
--- a/fs/proc/base.c
+++ b/fs/proc/base.c
@@ -95,6 +95,7 @@
 #include <linux/posix-timers.h>
 #include <linux/time_namespace.h>
 #include <linux/resctrl.h>
+#include <linux/cn_proc.h>
 #include <trace/events/oom.h>
 #include "internal.h"
 #include "fd.h"
@@ -1674,8 +1675,10 @@ static ssize_t comm_write(struct file *file, const char __user *buf,
 	if (!p)
 		return -ESRCH;
 
-	if (same_thread_group(current, p))
+	if (same_thread_group(current, p)) {
 		set_task_comm(p, buffer);
+		proc_comm_connector(p);
+	}
 	else
 		count = -EINVAL;
 
diff --git a/fs/proc/task_mmu.c b/fs/proc/task_mmu.c
index eb97468dfe4c..cf25be3e0321 100644
--- a/fs/proc/task_mmu.c
+++ b/fs/proc/task_mmu.c
@@ -619,7 +619,6 @@ static void show_smap_vma_flags(struct seq_file *m, struct vm_area_struct *vma)
 		[ilog2(VM_MAYSHARE)]	= "ms",
 		[ilog2(VM_GROWSDOWN)]	= "gd",
 		[ilog2(VM_PFNMAP)]	= "pf",
-		[ilog2(VM_DENYWRITE)]	= "dw",
 		[ilog2(VM_LOCKED)]	= "lo",
 		[ilog2(VM_IO)]		= "io",
 		[ilog2(VM_SEQ_READ)]	= "sr",
diff --git a/fs/reiserfs/acl.h b/fs/reiserfs/acl.h
index fd58618da360..d9052b8ce6dd 100644
--- a/fs/reiserfs/acl.h
+++ b/fs/reiserfs/acl.h
@@ -48,7 +48,7 @@ static inline int reiserfs_acl_count(size_t size)
 }
 
 #ifdef CONFIG_REISERFS_FS_POSIX_ACL
-struct posix_acl *reiserfs_get_acl(struct inode *inode, int type);
+struct posix_acl *reiserfs_get_acl(struct inode *inode, int type, bool rcu);
 int reiserfs_set_acl(struct user_namespace *mnt_userns, struct inode *inode,
 		     struct posix_acl *acl, int type);
 int reiserfs_acl_chmod(struct inode *inode);
diff --git a/fs/reiserfs/prints.c b/fs/reiserfs/prints.c
index 500f2000eb41..30319dc33c18 100644
--- a/fs/reiserfs/prints.c
+++ b/fs/reiserfs/prints.c
@@ -8,7 +8,7 @@
 #include <linux/string.h>
 #include <linux/buffer_head.h>
 
-#include <stdarg.h>
+#include <linux/stdarg.h>
 
 static char error_buf[1024];
 static char fmt_buf[1024];
diff --git a/fs/reiserfs/xattr_acl.c b/fs/reiserfs/xattr_acl.c
index a9547144a099..d6fcddc46f5b 100644
--- a/fs/reiserfs/xattr_acl.c
+++ b/fs/reiserfs/xattr_acl.c
@@ -190,13 +190,16 @@ fail:
  * inode->i_mutex: down
  * BKL held [before 2.5.x]
  */
-struct posix_acl *reiserfs_get_acl(struct inode *inode, int type)
+struct posix_acl *reiserfs_get_acl(struct inode *inode, int type, bool rcu)
 {
 	char *name, *value;
 	struct posix_acl *acl;
 	int size;
 	int retval;
 
+	if (rcu)
+		return ERR_PTR(-ECHILD);
+
 	switch (type) {
 	case ACL_TYPE_ACCESS:
 		name = XATTR_NAME_POSIX_ACL_ACCESS;
diff --git a/fs/cifs_common/Makefile b/fs/smbfs_common/Makefile
index 6fedd2f88a25..cafc61a3bfc3 100644
--- a/fs/cifs_common/Makefile
+++ b/fs/smbfs_common/Makefile
@@ -3,5 +3,5 @@
 # Makefile for Linux filesystem routines that are shared by client and server.
 #
 
-obj-$(CONFIG_CIFS_COMMON) += cifs_arc4.o
-obj-$(CONFIG_CIFS_COMMON) += cifs_md4.o
+obj-$(CONFIG_SMBFS_COMMON) += cifs_arc4.o
+obj-$(CONFIG_SMBFS_COMMON) += cifs_md4.o
diff --git a/fs/cifs_common/arc4.h b/fs/smbfs_common/arc4.h
index 12e71ec033a1..12e71ec033a1 100644
--- a/fs/cifs_common/arc4.h
+++ b/fs/smbfs_common/arc4.h
diff --git a/fs/cifs_common/cifs_arc4.c b/fs/smbfs_common/cifs_arc4.c
index b964cc682944..85ba15a60b13 100644
--- a/fs/cifs_common/cifs_arc4.c
+++ b/fs/smbfs_common/cifs_arc4.c
@@ -74,14 +74,14 @@ void cifs_arc4_crypt(struct arc4_ctx *ctx, u8 *out, const u8 *in, unsigned int l
 EXPORT_SYMBOL_GPL(cifs_arc4_crypt);
 
 static int __init
-init_cifs_common(void)
+init_smbfs_common(void)
 {
 	return 0;
 }
 static void __init
-exit_cifs_common(void)
+exit_smbfs_common(void)
 {
 }
 
-module_init(init_cifs_common)
-module_exit(exit_cifs_common)
+module_init(init_smbfs_common)
+module_exit(exit_smbfs_common)
diff --git a/fs/cifs_common/cifs_md4.c b/fs/smbfs_common/cifs_md4.c
index 50f78cfc6ce9..50f78cfc6ce9 100644
--- a/fs/cifs_common/cifs_md4.c
+++ b/fs/smbfs_common/cifs_md4.c
diff --git a/fs/cifs_common/md4.h b/fs/smbfs_common/md4.h
index 5337becc699a..5337becc699a 100644
--- a/fs/cifs_common/md4.h
+++ b/fs/smbfs_common/md4.h
diff --git a/fs/cifs/smbfsctl.h b/fs/smbfs_common/smbfsctl.h
index d0fc42061f49..d01e8c9d7a31 100644
--- a/fs/cifs/smbfsctl.h
+++ b/fs/smbfs_common/smbfsctl.h
@@ -1,4 +1,4 @@
-/* SPDX-License-Identifier: LGPL-2.1 */
+/* SPDX-License-Identifier: LGPL-2.1+ */
 /*
  *   fs/cifs/smbfsctl.h: SMB, CIFS, SMB2 FSCTL definitions
  *
@@ -19,11 +19,14 @@
  * could be invoked from tools via a specialized hook into the VFS rather
  * than via the standard vfs entry points
  *
- * See MS-SMB2 Section 2.2.31 (last checked June 2013, all of that list are
+ * See MS-SMB2 Section 2.2.31 (last checked September 2021, all of that list are
  * below). Additional detail on less common ones can be found in MS-FSCC
  * section 2.3.
  */
 
+#ifndef __SMBFSCTL_H
+#define __SMBFSCTL_H
+
 /*
  * FSCTL values are 32 bits and are constructed as
  * <device 16bits> <access 2bits> <function 12bits> <method 2bits>
@@ -91,6 +94,7 @@
 #define FSCTL_SET_ZERO_ON_DEALLOC    0x00090194 /* BB add struct */
 #define FSCTL_SET_SHORT_NAME_BEHAVIOR 0x000901B4 /* BB add struct */
 #define FSCTL_GET_INTEGRITY_INFORMATION 0x0009027C
+#define FSCTL_GET_REFS_VOLUME_DATA   0x000902D8 /* See MS-FSCC 2.3.24 */
 #define FSCTL_GET_RETRIEVAL_POINTERS_AND_REFCOUNT 0x000903d3
 #define FSCTL_GET_RETRIEVAL_POINTER_COUNT 0x0009042b
 #define FSCTL_QUERY_ALLOCATED_RANGES 0x000940CF
@@ -146,7 +150,13 @@
 #define IO_REPARSE_TAG_LX_CHR	     0x80000025
 #define IO_REPARSE_TAG_LX_BLK	     0x80000026
 
+#define IO_REPARSE_TAG_LX_SYMLINK_LE	cpu_to_le32(0xA000001D)
+#define IO_REPARSE_TAG_AF_UNIX_LE	cpu_to_le32(0x80000023)
+#define IO_REPARSE_TAG_LX_FIFO_LE	cpu_to_le32(0x80000024)
+#define IO_REPARSE_TAG_LX_CHR_LE	cpu_to_le32(0x80000025)
+#define IO_REPARSE_TAG_LX_BLK_LE	cpu_to_le32(0x80000026)
+
 /* fsctl flags */
 /* If Flags is set to this value, the request is an FSCTL not ioctl request */
 #define SMB2_0_IOCTL_IS_FSCTL		0x00000001
-
+#endif /* __SMBFSCTL_H */
diff --git a/fs/stat.c b/fs/stat.c
index 1fa38bdec1a6..28d2020ba1f4 100644
--- a/fs/stat.c
+++ b/fs/stat.c
@@ -60,6 +60,24 @@ void generic_fillattr(struct user_namespace *mnt_userns, struct inode *inode,
 EXPORT_SYMBOL(generic_fillattr);
 
 /**
+ * generic_fill_statx_attr - Fill in the statx attributes from the inode flags
+ * @inode:	Inode to use as the source
+ * @stat:	Where to fill in the attribute flags
+ *
+ * Fill in the STATX_ATTR_* flags in the kstat structure for properties of the
+ * inode that are published on i_flags and enforced by the VFS.
+ */
+void generic_fill_statx_attr(struct inode *inode, struct kstat *stat)
+{
+	if (inode->i_flags & S_IMMUTABLE)
+		stat->attributes |= STATX_ATTR_IMMUTABLE;
+	if (inode->i_flags & S_APPEND)
+		stat->attributes |= STATX_ATTR_APPEND;
+	stat->attributes_mask |= KSTAT_ATTR_VFS_FLAGS;
+}
+EXPORT_SYMBOL(generic_fill_statx_attr);
+
+/**
  * vfs_getattr_nosec - getattr without security checks
  * @path: file to get attributes from
  * @stat: structure to return attributes in
diff --git a/fs/ufs/super.c b/fs/ufs/super.c
index 74028b5a7b0a..00a01471ea05 100644
--- a/fs/ufs/super.c
+++ b/fs/ufs/super.c
@@ -70,7 +70,7 @@
 #include <linux/module.h>
 #include <linux/bitops.h>
 
-#include <stdarg.h>
+#include <linux/stdarg.h>
 
 #include <linux/uaccess.h>
 
diff --git a/fs/userfaultfd.c b/fs/userfaultfd.c
index 5c2d806e6ae5..003f0d31743e 100644
--- a/fs/userfaultfd.c
+++ b/fs/userfaultfd.c
@@ -33,11 +33,6 @@ int sysctl_unprivileged_userfaultfd __read_mostly;
 
 static struct kmem_cache *userfaultfd_ctx_cachep __read_mostly;
 
-enum userfaultfd_state {
-	UFFD_STATE_WAIT_API,
-	UFFD_STATE_RUNNING,
-};
-
 /*
  * Start with fault_pending_wqh and fault_wqh so they're more likely
  * to be in the same cacheline.
@@ -69,12 +64,10 @@ struct userfaultfd_ctx {
 	unsigned int flags;
 	/* features requested from the userspace */
 	unsigned int features;
-	/* state machine */
-	enum userfaultfd_state state;
 	/* released */
 	bool released;
 	/* memory mappings are changing because of non-cooperative event */
-	bool mmap_changing;
+	atomic_t mmap_changing;
 	/* mm with one ore more vmas attached to this userfaultfd_ctx */
 	struct mm_struct *mm;
 };
@@ -104,6 +97,14 @@ struct userfaultfd_wake_range {
 	unsigned long len;
 };
 
+/* internal indication that UFFD_API ioctl was successfully executed */
+#define UFFD_FEATURE_INITIALIZED		(1u << 31)
+
+static bool userfaultfd_is_initialized(struct userfaultfd_ctx *ctx)
+{
+	return ctx->features & UFFD_FEATURE_INITIALIZED;
+}
+
 static int userfaultfd_wake_function(wait_queue_entry_t *wq, unsigned mode,
 				     int wake_flags, void *key)
 {
@@ -623,7 +624,8 @@ static void userfaultfd_event_wait_completion(struct userfaultfd_ctx *ctx,
 	 * already released.
 	 */
 out:
-	WRITE_ONCE(ctx->mmap_changing, false);
+	atomic_dec(&ctx->mmap_changing);
+	VM_BUG_ON(atomic_read(&ctx->mmap_changing) < 0);
 	userfaultfd_ctx_put(ctx);
 }
 
@@ -666,15 +668,14 @@ int dup_userfaultfd(struct vm_area_struct *vma, struct list_head *fcs)
 
 		refcount_set(&ctx->refcount, 1);
 		ctx->flags = octx->flags;
-		ctx->state = UFFD_STATE_RUNNING;
 		ctx->features = octx->features;
 		ctx->released = false;
-		ctx->mmap_changing = false;
+		atomic_set(&ctx->mmap_changing, 0);
 		ctx->mm = vma->vm_mm;
 		mmgrab(ctx->mm);
 
 		userfaultfd_ctx_get(octx);
-		WRITE_ONCE(octx->mmap_changing, true);
+		atomic_inc(&octx->mmap_changing);
 		fctx->orig = octx;
 		fctx->new = ctx;
 		list_add_tail(&fctx->list, fcs);
@@ -721,7 +722,7 @@ void mremap_userfaultfd_prep(struct vm_area_struct *vma,
 	if (ctx->features & UFFD_FEATURE_EVENT_REMAP) {
 		vm_ctx->ctx = ctx;
 		userfaultfd_ctx_get(ctx);
-		WRITE_ONCE(ctx->mmap_changing, true);
+		atomic_inc(&ctx->mmap_changing);
 	} else {
 		/* Drop uffd context if remap feature not enabled */
 		vma->vm_userfaultfd_ctx = NULL_VM_UFFD_CTX;
@@ -766,7 +767,7 @@ bool userfaultfd_remove(struct vm_area_struct *vma,
 		return true;
 
 	userfaultfd_ctx_get(ctx);
-	WRITE_ONCE(ctx->mmap_changing, true);
+	atomic_inc(&ctx->mmap_changing);
 	mmap_read_unlock(mm);
 
 	msg_init(&ewq.msg);
@@ -810,7 +811,7 @@ int userfaultfd_unmap_prep(struct vm_area_struct *vma,
 			return -ENOMEM;
 
 		userfaultfd_ctx_get(ctx);
-		WRITE_ONCE(ctx->mmap_changing, true);
+		atomic_inc(&ctx->mmap_changing);
 		unmap_ctx->ctx = ctx;
 		unmap_ctx->start = start;
 		unmap_ctx->end = end;
@@ -943,38 +944,33 @@ static __poll_t userfaultfd_poll(struct file *file, poll_table *wait)
 
 	poll_wait(file, &ctx->fd_wqh, wait);
 
-	switch (ctx->state) {
-	case UFFD_STATE_WAIT_API:
+	if (!userfaultfd_is_initialized(ctx))
 		return EPOLLERR;
-	case UFFD_STATE_RUNNING:
-		/*
-		 * poll() never guarantees that read won't block.
-		 * userfaults can be waken before they're read().
-		 */
-		if (unlikely(!(file->f_flags & O_NONBLOCK)))
-			return EPOLLERR;
-		/*
-		 * lockless access to see if there are pending faults
-		 * __pollwait last action is the add_wait_queue but
-		 * the spin_unlock would allow the waitqueue_active to
-		 * pass above the actual list_add inside
-		 * add_wait_queue critical section. So use a full
-		 * memory barrier to serialize the list_add write of
-		 * add_wait_queue() with the waitqueue_active read
-		 * below.
-		 */
-		ret = 0;
-		smp_mb();
-		if (waitqueue_active(&ctx->fault_pending_wqh))
-			ret = EPOLLIN;
-		else if (waitqueue_active(&ctx->event_wqh))
-			ret = EPOLLIN;
 
-		return ret;
-	default:
-		WARN_ON_ONCE(1);
+	/*
+	 * poll() never guarantees that read won't block.
+	 * userfaults can be waken before they're read().
+	 */
+	if (unlikely(!(file->f_flags & O_NONBLOCK)))
 		return EPOLLERR;
-	}
+	/*
+	 * lockless access to see if there are pending faults
+	 * __pollwait last action is the add_wait_queue but
+	 * the spin_unlock would allow the waitqueue_active to
+	 * pass above the actual list_add inside
+	 * add_wait_queue critical section. So use a full
+	 * memory barrier to serialize the list_add write of
+	 * add_wait_queue() with the waitqueue_active read
+	 * below.
+	 */
+	ret = 0;
+	smp_mb();
+	if (waitqueue_active(&ctx->fault_pending_wqh))
+		ret = EPOLLIN;
+	else if (waitqueue_active(&ctx->event_wqh))
+		ret = EPOLLIN;
+
+	return ret;
 }
 
 static const struct file_operations userfaultfd_fops;
@@ -1169,7 +1165,7 @@ static ssize_t userfaultfd_read(struct file *file, char __user *buf,
 	int no_wait = file->f_flags & O_NONBLOCK;
 	struct inode *inode = file_inode(file);
 
-	if (ctx->state == UFFD_STATE_WAIT_API)
+	if (!userfaultfd_is_initialized(ctx))
 		return -EINVAL;
 
 	for (;;) {
@@ -1700,7 +1696,7 @@ static int userfaultfd_copy(struct userfaultfd_ctx *ctx,
 	user_uffdio_copy = (struct uffdio_copy __user *) arg;
 
 	ret = -EAGAIN;
-	if (READ_ONCE(ctx->mmap_changing))
+	if (atomic_read(&ctx->mmap_changing))
 		goto out;
 
 	ret = -EFAULT;
@@ -1757,7 +1753,7 @@ static int userfaultfd_zeropage(struct userfaultfd_ctx *ctx,
 	user_uffdio_zeropage = (struct uffdio_zeropage __user *) arg;
 
 	ret = -EAGAIN;
-	if (READ_ONCE(ctx->mmap_changing))
+	if (atomic_read(&ctx->mmap_changing))
 		goto out;
 
 	ret = -EFAULT;
@@ -1807,7 +1803,7 @@ static int userfaultfd_writeprotect(struct userfaultfd_ctx *ctx,
 	struct userfaultfd_wake_range range;
 	bool mode_wp, mode_dontwake;
 
-	if (READ_ONCE(ctx->mmap_changing))
+	if (atomic_read(&ctx->mmap_changing))
 		return -EAGAIN;
 
 	user_uffdio_wp = (struct uffdio_writeprotect __user *) arg;
@@ -1855,7 +1851,7 @@ static int userfaultfd_continue(struct userfaultfd_ctx *ctx, unsigned long arg)
 	user_uffdio_continue = (struct uffdio_continue __user *)arg;
 
 	ret = -EAGAIN;
-	if (READ_ONCE(ctx->mmap_changing))
+	if (atomic_read(&ctx->mmap_changing))
 		goto out;
 
 	ret = -EFAULT;
@@ -1908,9 +1904,10 @@ out:
 static inline unsigned int uffd_ctx_features(__u64 user_features)
 {
 	/*
-	 * For the current set of features the bits just coincide
+	 * For the current set of features the bits just coincide. Set
+	 * UFFD_FEATURE_INITIALIZED to mark the features as enabled.
 	 */
-	return (unsigned int)user_features;
+	return (unsigned int)user_features | UFFD_FEATURE_INITIALIZED;
 }
 
 /*
@@ -1923,12 +1920,10 @@ static int userfaultfd_api(struct userfaultfd_ctx *ctx,
 {
 	struct uffdio_api uffdio_api;
 	void __user *buf = (void __user *)arg;
+	unsigned int ctx_features;
 	int ret;
 	__u64 features;
 
-	ret = -EINVAL;
-	if (ctx->state != UFFD_STATE_WAIT_API)
-		goto out;
 	ret = -EFAULT;
 	if (copy_from_user(&uffdio_api, buf, sizeof(uffdio_api)))
 		goto out;
@@ -1952,9 +1947,13 @@ static int userfaultfd_api(struct userfaultfd_ctx *ctx,
 	ret = -EFAULT;
 	if (copy_to_user(buf, &uffdio_api, sizeof(uffdio_api)))
 		goto out;
-	ctx->state = UFFD_STATE_RUNNING;
+
 	/* only enable the requested features for this uffd context */
-	ctx->features = uffd_ctx_features(features);
+	ctx_features = uffd_ctx_features(features);
+	ret = -EINVAL;
+	if (cmpxchg(&ctx->features, 0, ctx_features) != 0)
+		goto err_out;
+
 	ret = 0;
 out:
 	return ret;
@@ -1971,7 +1970,7 @@ static long userfaultfd_ioctl(struct file *file, unsigned cmd,
 	int ret = -EINVAL;
 	struct userfaultfd_ctx *ctx = file->private_data;
 
-	if (cmd != UFFDIO_API && ctx->state == UFFD_STATE_WAIT_API)
+	if (cmd != UFFDIO_API && !userfaultfd_is_initialized(ctx))
 		return -EINVAL;
 
 	switch(cmd) {
@@ -2085,9 +2084,8 @@ SYSCALL_DEFINE1(userfaultfd, int, flags)
 	refcount_set(&ctx->refcount, 1);
 	ctx->flags = flags;
 	ctx->features = 0;
-	ctx->state = UFFD_STATE_WAIT_API;
 	ctx->released = false;
-	ctx->mmap_changing = false;
+	atomic_set(&ctx->mmap_changing, 0);
 	ctx->mm = current->mm;
 	/* prevent the mm struct to be freed */
 	mmgrab(ctx->mm);
diff --git a/fs/xfs/kmem.c b/fs/xfs/kmem.c
index e986b95d94c9..6f49bf39183c 100644
--- a/fs/xfs/kmem.c
+++ b/fs/xfs/kmem.c
@@ -29,67 +29,3 @@ kmem_alloc(size_t size, xfs_km_flags_t flags)
 		congestion_wait(BLK_RW_ASYNC, HZ/50);
 	} while (1);
 }
-
-
-/*
- * __vmalloc() will allocate data pages and auxiliary structures (e.g.
- * pagetables) with GFP_KERNEL, yet we may be under GFP_NOFS context here. Hence
- * we need to tell memory reclaim that we are in such a context via
- * PF_MEMALLOC_NOFS to prevent memory reclaim re-entering the filesystem here
- * and potentially deadlocking.
- */
-static void *
-__kmem_vmalloc(size_t size, xfs_km_flags_t flags)
-{
-	unsigned nofs_flag = 0;
-	void	*ptr;
-	gfp_t	lflags = kmem_flags_convert(flags);
-
-	if (flags & KM_NOFS)
-		nofs_flag = memalloc_nofs_save();
-
-	ptr = __vmalloc(size, lflags);
-
-	if (flags & KM_NOFS)
-		memalloc_nofs_restore(nofs_flag);
-
-	return ptr;
-}
-
-/*
- * Same as kmem_alloc_large, except we guarantee the buffer returned is aligned
- * to the @align_mask. We only guarantee alignment up to page size, we'll clamp
- * alignment at page size if it is larger. vmalloc always returns a PAGE_SIZE
- * aligned region.
- */
-void *
-kmem_alloc_io(size_t size, int align_mask, xfs_km_flags_t flags)
-{
-	void	*ptr;
-
-	trace_kmem_alloc_io(size, flags, _RET_IP_);
-
-	if (WARN_ON_ONCE(align_mask >= PAGE_SIZE))
-		align_mask = PAGE_SIZE - 1;
-
-	ptr = kmem_alloc(size, flags | KM_MAYFAIL);
-	if (ptr) {
-		if (!((uintptr_t)ptr & align_mask))
-			return ptr;
-		kfree(ptr);
-	}
-	return __kmem_vmalloc(size, flags);
-}
-
-void *
-kmem_alloc_large(size_t size, xfs_km_flags_t flags)
-{
-	void	*ptr;
-
-	trace_kmem_alloc_large(size, flags, _RET_IP_);
-
-	ptr = kmem_alloc(size, flags | KM_MAYFAIL);
-	if (ptr)
-		return ptr;
-	return __kmem_vmalloc(size, flags);
-}
diff --git a/fs/xfs/kmem.h b/fs/xfs/kmem.h
index 38007117697e..54da6d717a06 100644
--- a/fs/xfs/kmem.h
+++ b/fs/xfs/kmem.h
@@ -57,8 +57,6 @@ kmem_flags_convert(xfs_km_flags_t flags)
 }
 
 extern void *kmem_alloc(size_t, xfs_km_flags_t);
-extern void *kmem_alloc_io(size_t size, int align_mask, xfs_km_flags_t flags);
-extern void *kmem_alloc_large(size_t size, xfs_km_flags_t);
 static inline void  kmem_free(const void *ptr)
 {
 	kvfree(ptr);
diff --git a/fs/xfs/libxfs/xfs_ag.c b/fs/xfs/libxfs/xfs_ag.c
index ee9ec0c50bec..005abfd9fd34 100644
--- a/fs/xfs/libxfs/xfs_ag.c
+++ b/fs/xfs/libxfs/xfs_ag.c
@@ -313,7 +313,6 @@ xfs_get_aghdr_buf(
 	if (error)
 		return error;
 
-	bp->b_bn = blkno;
 	bp->b_maps[0].bm_bn = blkno;
 	bp->b_ops = ops;
 
@@ -469,7 +468,7 @@ xfs_rmaproot_init(
 	rrec->rm_offset = 0;
 
 	/* account for refc btree root */
-	if (xfs_sb_version_hasreflink(&mp->m_sb)) {
+	if (xfs_has_reflink(mp)) {
 		rrec = XFS_RMAP_REC_ADDR(block, 5);
 		rrec->rm_startblock = cpu_to_be32(xfs_refc_block(mp));
 		rrec->rm_blockcount = cpu_to_be32(1);
@@ -528,7 +527,7 @@ xfs_agfblock_init(
 	agf->agf_roots[XFS_BTNUM_CNTi] = cpu_to_be32(XFS_CNT_BLOCK(mp));
 	agf->agf_levels[XFS_BTNUM_BNOi] = cpu_to_be32(1);
 	agf->agf_levels[XFS_BTNUM_CNTi] = cpu_to_be32(1);
-	if (xfs_sb_version_hasrmapbt(&mp->m_sb)) {
+	if (xfs_has_rmapbt(mp)) {
 		agf->agf_roots[XFS_BTNUM_RMAPi] =
 					cpu_to_be32(XFS_RMAP_BLOCK(mp));
 		agf->agf_levels[XFS_BTNUM_RMAPi] = cpu_to_be32(1);
@@ -541,9 +540,9 @@ xfs_agfblock_init(
 	tmpsize = id->agsize - mp->m_ag_prealloc_blocks;
 	agf->agf_freeblks = cpu_to_be32(tmpsize);
 	agf->agf_longest = cpu_to_be32(tmpsize);
-	if (xfs_sb_version_hascrc(&mp->m_sb))
+	if (xfs_has_crc(mp))
 		uuid_copy(&agf->agf_uuid, &mp->m_sb.sb_meta_uuid);
-	if (xfs_sb_version_hasreflink(&mp->m_sb)) {
+	if (xfs_has_reflink(mp)) {
 		agf->agf_refcount_root = cpu_to_be32(
 				xfs_refc_block(mp));
 		agf->agf_refcount_level = cpu_to_be32(1);
@@ -569,7 +568,7 @@ xfs_agflblock_init(
 	__be32			*agfl_bno;
 	int			bucket;
 
-	if (xfs_sb_version_hascrc(&mp->m_sb)) {
+	if (xfs_has_crc(mp)) {
 		agfl->agfl_magicnum = cpu_to_be32(XFS_AGFL_MAGIC);
 		agfl->agfl_seqno = cpu_to_be32(id->agno);
 		uuid_copy(&agfl->agfl_uuid, &mp->m_sb.sb_meta_uuid);
@@ -599,17 +598,17 @@ xfs_agiblock_init(
 	agi->agi_freecount = 0;
 	agi->agi_newino = cpu_to_be32(NULLAGINO);
 	agi->agi_dirino = cpu_to_be32(NULLAGINO);
-	if (xfs_sb_version_hascrc(&mp->m_sb))
+	if (xfs_has_crc(mp))
 		uuid_copy(&agi->agi_uuid, &mp->m_sb.sb_meta_uuid);
-	if (xfs_sb_version_hasfinobt(&mp->m_sb)) {
+	if (xfs_has_finobt(mp)) {
 		agi->agi_free_root = cpu_to_be32(XFS_FIBT_BLOCK(mp));
 		agi->agi_free_level = cpu_to_be32(1);
 	}
 	for (bucket = 0; bucket < XFS_AGI_UNLINKED_BUCKETS; bucket++)
 		agi->agi_unlinked[bucket] = cpu_to_be32(NULLAGINO);
-	if (xfs_sb_version_hasinobtcounts(&mp->m_sb)) {
+	if (xfs_has_inobtcounts(mp)) {
 		agi->agi_iblocks = cpu_to_be32(1);
-		if (xfs_sb_version_hasfinobt(&mp->m_sb))
+		if (xfs_has_finobt(mp))
 			agi->agi_fblocks = cpu_to_be32(1);
 	}
 }
@@ -719,14 +718,14 @@ xfs_ag_init_headers(
 		.ops = &xfs_finobt_buf_ops,
 		.work = &xfs_btroot_init,
 		.type = XFS_BTNUM_FINO,
-		.need_init =  xfs_sb_version_hasfinobt(&mp->m_sb)
+		.need_init =  xfs_has_finobt(mp)
 	},
 	{ /* RMAP root block */
 		.daddr = XFS_AGB_TO_DADDR(mp, id->agno, XFS_RMAP_BLOCK(mp)),
 		.numblks = BTOBB(mp->m_sb.sb_blocksize),
 		.ops = &xfs_rmapbt_buf_ops,
 		.work = &xfs_rmaproot_init,
-		.need_init = xfs_sb_version_hasrmapbt(&mp->m_sb)
+		.need_init = xfs_has_rmapbt(mp)
 	},
 	{ /* REFC root block */
 		.daddr = XFS_AGB_TO_DADDR(mp, id->agno, xfs_refc_block(mp)),
@@ -734,7 +733,7 @@ xfs_ag_init_headers(
 		.ops = &xfs_refcountbt_buf_ops,
 		.work = &xfs_btroot_init,
 		.type = XFS_BTNUM_REFC,
-		.need_init = xfs_sb_version_hasreflink(&mp->m_sb)
+		.need_init = xfs_has_reflink(mp)
 	},
 	{ /* NULL terminating block */
 		.daddr = XFS_BUF_DADDR_NULL,
diff --git a/fs/xfs/libxfs/xfs_alloc.c b/fs/xfs/libxfs/xfs_alloc.c
index 6929157d8d6e..95157f5a5a6c 100644
--- a/fs/xfs/libxfs/xfs_alloc.c
+++ b/fs/xfs/libxfs/xfs_alloc.c
@@ -51,7 +51,7 @@ xfs_agfl_size(
 {
 	unsigned int		size = mp->m_sb.sb_sectsize;
 
-	if (xfs_sb_version_hascrc(&mp->m_sb))
+	if (xfs_has_crc(mp))
 		size -= sizeof(struct xfs_agfl);
 
 	return size / sizeof(xfs_agblock_t);
@@ -61,9 +61,9 @@ unsigned int
 xfs_refc_block(
 	struct xfs_mount	*mp)
 {
-	if (xfs_sb_version_hasrmapbt(&mp->m_sb))
+	if (xfs_has_rmapbt(mp))
 		return XFS_RMAP_BLOCK(mp) + 1;
-	if (xfs_sb_version_hasfinobt(&mp->m_sb))
+	if (xfs_has_finobt(mp))
 		return XFS_FIBT_BLOCK(mp) + 1;
 	return XFS_IBT_BLOCK(mp) + 1;
 }
@@ -72,11 +72,11 @@ xfs_extlen_t
 xfs_prealloc_blocks(
 	struct xfs_mount	*mp)
 {
-	if (xfs_sb_version_hasreflink(&mp->m_sb))
+	if (xfs_has_reflink(mp))
 		return xfs_refc_block(mp) + 1;
-	if (xfs_sb_version_hasrmapbt(&mp->m_sb))
+	if (xfs_has_rmapbt(mp))
 		return XFS_RMAP_BLOCK(mp) + 1;
-	if (xfs_sb_version_hasfinobt(&mp->m_sb))
+	if (xfs_has_finobt(mp))
 		return XFS_FIBT_BLOCK(mp) + 1;
 	return XFS_IBT_BLOCK(mp) + 1;
 }
@@ -126,11 +126,11 @@ xfs_alloc_ag_max_usable(
 	blocks = XFS_BB_TO_FSB(mp, XFS_FSS_TO_BB(mp, 4)); /* ag headers */
 	blocks += XFS_ALLOC_AGFL_RESERVE;
 	blocks += 3;			/* AGF, AGI btree root blocks */
-	if (xfs_sb_version_hasfinobt(&mp->m_sb))
+	if (xfs_has_finobt(mp))
 		blocks++;		/* finobt root block */
-	if (xfs_sb_version_hasrmapbt(&mp->m_sb))
+	if (xfs_has_rmapbt(mp))
 		blocks++; 		/* rmap root block */
-	if (xfs_sb_version_hasreflink(&mp->m_sb))
+	if (xfs_has_reflink(mp))
 		blocks++;		/* refcount root block */
 
 	return mp->m_sb.sb_agblocks - blocks;
@@ -598,7 +598,7 @@ xfs_agfl_verify(
 	 * AGFL is what the AGF says is active. We can't get to the AGF, so we
 	 * can't verify just those entries are valid.
 	 */
-	if (!xfs_sb_version_hascrc(&mp->m_sb))
+	if (!xfs_has_crc(mp))
 		return NULL;
 
 	if (!xfs_verify_magic(bp, agfl->agfl_magicnum))
@@ -638,7 +638,7 @@ xfs_agfl_read_verify(
 	 * AGFL is what the AGF says is active. We can't get to the AGF, so we
 	 * can't verify just those entries are valid.
 	 */
-	if (!xfs_sb_version_hascrc(&mp->m_sb))
+	if (!xfs_has_crc(mp))
 		return;
 
 	if (!xfs_buf_verify_cksum(bp, XFS_AGFL_CRC_OFF))
@@ -659,7 +659,7 @@ xfs_agfl_write_verify(
 	xfs_failaddr_t		fa;
 
 	/* no verification of non-crc AGFLs */
-	if (!xfs_sb_version_hascrc(&mp->m_sb))
+	if (!xfs_has_crc(mp))
 		return;
 
 	fa = xfs_agfl_verify(bp);
@@ -2264,7 +2264,7 @@ xfs_alloc_min_freelist(
 	min_free += min_t(unsigned int, levels[XFS_BTNUM_CNTi] + 1,
 				       mp->m_ag_maxlevels);
 	/* space needed reverse mapping used space btree */
-	if (xfs_sb_version_hasrmapbt(&mp->m_sb))
+	if (xfs_has_rmapbt(mp))
 		min_free += min_t(unsigned int, levels[XFS_BTNUM_RMAPi] + 1,
 						mp->m_rmap_maxlevels);
 
@@ -2373,7 +2373,7 @@ xfs_agfl_needs_reset(
 	int			active;
 
 	/* no agfl header on v4 supers */
-	if (!xfs_sb_version_hascrc(&mp->m_sb))
+	if (!xfs_has_crc(mp))
 		return false;
 
 	/*
@@ -2877,7 +2877,7 @@ xfs_agf_verify(
 	struct xfs_mount	*mp = bp->b_mount;
 	struct xfs_agf		*agf = bp->b_addr;
 
-	if (xfs_sb_version_hascrc(&mp->m_sb)) {
+	if (xfs_has_crc(mp)) {
 		if (!uuid_equal(&agf->agf_uuid, &mp->m_sb.sb_meta_uuid))
 			return __this_address;
 		if (!xfs_log_check_lsn(mp, be64_to_cpu(agf->agf_lsn)))
@@ -2907,12 +2907,12 @@ xfs_agf_verify(
 	    be32_to_cpu(agf->agf_levels[XFS_BTNUM_CNT]) > mp->m_ag_maxlevels)
 		return __this_address;
 
-	if (xfs_sb_version_hasrmapbt(&mp->m_sb) &&
+	if (xfs_has_rmapbt(mp) &&
 	    (be32_to_cpu(agf->agf_levels[XFS_BTNUM_RMAP]) < 1 ||
 	     be32_to_cpu(agf->agf_levels[XFS_BTNUM_RMAP]) > mp->m_rmap_maxlevels))
 		return __this_address;
 
-	if (xfs_sb_version_hasrmapbt(&mp->m_sb) &&
+	if (xfs_has_rmapbt(mp) &&
 	    be32_to_cpu(agf->agf_rmap_blocks) > be32_to_cpu(agf->agf_length))
 		return __this_address;
 
@@ -2925,16 +2925,16 @@ xfs_agf_verify(
 	if (bp->b_pag && be32_to_cpu(agf->agf_seqno) != bp->b_pag->pag_agno)
 		return __this_address;
 
-	if (xfs_sb_version_haslazysbcount(&mp->m_sb) &&
+	if (xfs_has_lazysbcount(mp) &&
 	    be32_to_cpu(agf->agf_btreeblks) > be32_to_cpu(agf->agf_length))
 		return __this_address;
 
-	if (xfs_sb_version_hasreflink(&mp->m_sb) &&
+	if (xfs_has_reflink(mp) &&
 	    be32_to_cpu(agf->agf_refcount_blocks) >
 	    be32_to_cpu(agf->agf_length))
 		return __this_address;
 
-	if (xfs_sb_version_hasreflink(&mp->m_sb) &&
+	if (xfs_has_reflink(mp) &&
 	    (be32_to_cpu(agf->agf_refcount_level) < 1 ||
 	     be32_to_cpu(agf->agf_refcount_level) > mp->m_refc_maxlevels))
 		return __this_address;
@@ -2950,7 +2950,7 @@ xfs_agf_read_verify(
 	struct xfs_mount *mp = bp->b_mount;
 	xfs_failaddr_t	fa;
 
-	if (xfs_sb_version_hascrc(&mp->m_sb) &&
+	if (xfs_has_crc(mp) &&
 	    !xfs_buf_verify_cksum(bp, XFS_AGF_CRC_OFF))
 		xfs_verifier_error(bp, -EFSBADCRC, __this_address);
 	else {
@@ -2975,7 +2975,7 @@ xfs_agf_write_verify(
 		return;
 	}
 
-	if (!xfs_sb_version_hascrc(&mp->m_sb))
+	if (!xfs_has_crc(mp))
 		return;
 
 	if (bip)
@@ -3073,13 +3073,13 @@ xfs_alloc_read_agf(
 		 * counter only tracks non-root blocks.
 		 */
 		allocbt_blks = pag->pagf_btreeblks;
-		if (xfs_sb_version_hasrmapbt(&mp->m_sb))
+		if (xfs_has_rmapbt(mp))
 			allocbt_blks -= be32_to_cpu(agf->agf_rmap_blocks) - 1;
 		if (allocbt_blks > 0)
 			atomic64_add(allocbt_blks, &mp->m_allocbt_blks);
 	}
 #ifdef DEBUG
-	else if (!XFS_FORCED_SHUTDOWN(mp)) {
+	else if (!xfs_is_shutdown(mp)) {
 		ASSERT(pag->pagf_freeblks == be32_to_cpu(agf->agf_freeblks));
 		ASSERT(pag->pagf_btreeblks == be32_to_cpu(agf->agf_btreeblks));
 		ASSERT(pag->pagf_flcount == be32_to_cpu(agf->agf_flcount));
@@ -3166,7 +3166,7 @@ xfs_alloc_vextent(
 		 * the first a.g. fails.
 		 */
 		if ((args->datatype & XFS_ALLOC_INITIAL_USER_DATA) &&
-		    (mp->m_flags & XFS_MOUNT_32BITINODES)) {
+		    xfs_is_inode32(mp)) {
 			args->fsbno = XFS_AGB_TO_FSB(mp,
 					((mp->m_agfrotor / rotorstep) %
 					mp->m_sb.sb_agcount), 0);
@@ -3392,7 +3392,7 @@ struct xfs_alloc_query_range_info {
 STATIC int
 xfs_alloc_query_range_helper(
 	struct xfs_btree_cur		*cur,
-	union xfs_btree_rec		*rec,
+	const union xfs_btree_rec	*rec,
 	void				*priv)
 {
 	struct xfs_alloc_query_range_info	*query = priv;
@@ -3407,8 +3407,8 @@ xfs_alloc_query_range_helper(
 int
 xfs_alloc_query_range(
 	struct xfs_btree_cur			*cur,
-	struct xfs_alloc_rec_incore		*low_rec,
-	struct xfs_alloc_rec_incore		*high_rec,
+	const struct xfs_alloc_rec_incore	*low_rec,
+	const struct xfs_alloc_rec_incore	*high_rec,
 	xfs_alloc_query_range_fn		fn,
 	void					*priv)
 {
diff --git a/fs/xfs/libxfs/xfs_alloc.h b/fs/xfs/libxfs/xfs_alloc.h
index e30900b6f8ba..df4aefaf0046 100644
--- a/fs/xfs/libxfs/xfs_alloc.h
+++ b/fs/xfs/libxfs/xfs_alloc.h
@@ -220,13 +220,13 @@ int xfs_free_extent_fix_freelist(struct xfs_trans *tp, struct xfs_perag *pag,
 xfs_extlen_t xfs_prealloc_blocks(struct xfs_mount *mp);
 
 typedef int (*xfs_alloc_query_range_fn)(
-	struct xfs_btree_cur		*cur,
-	struct xfs_alloc_rec_incore	*rec,
-	void				*priv);
+	struct xfs_btree_cur			*cur,
+	const struct xfs_alloc_rec_incore	*rec,
+	void					*priv);
 
 int xfs_alloc_query_range(struct xfs_btree_cur *cur,
-		struct xfs_alloc_rec_incore *low_rec,
-		struct xfs_alloc_rec_incore *high_rec,
+		const struct xfs_alloc_rec_incore *low_rec,
+		const struct xfs_alloc_rec_incore *high_rec,
 		xfs_alloc_query_range_fn fn, void *priv);
 int xfs_alloc_query_all(struct xfs_btree_cur *cur, xfs_alloc_query_range_fn fn,
 		void *priv);
@@ -243,7 +243,7 @@ static inline __be32 *
 xfs_buf_to_agfl_bno(
 	struct xfs_buf		*bp)
 {
-	if (xfs_sb_version_hascrc(&bp->b_mount->m_sb))
+	if (xfs_has_crc(bp->b_mount))
 		return bp->b_addr + sizeof(struct xfs_agfl);
 	return bp->b_addr;
 }
diff --git a/fs/xfs/libxfs/xfs_alloc_btree.c b/fs/xfs/libxfs/xfs_alloc_btree.c
index 6b363f78cfa2..6746fd735550 100644
--- a/fs/xfs/libxfs/xfs_alloc_btree.c
+++ b/fs/xfs/libxfs/xfs_alloc_btree.c
@@ -31,9 +31,9 @@ xfs_allocbt_dup_cursor(
 
 STATIC void
 xfs_allocbt_set_root(
-	struct xfs_btree_cur	*cur,
-	union xfs_btree_ptr	*ptr,
-	int			inc)
+	struct xfs_btree_cur		*cur,
+	const union xfs_btree_ptr	*ptr,
+	int				inc)
 {
 	struct xfs_buf		*agbp = cur->bc_ag.agbp;
 	struct xfs_agf		*agf = agbp->b_addr;
@@ -50,10 +50,10 @@ xfs_allocbt_set_root(
 
 STATIC int
 xfs_allocbt_alloc_block(
-	struct xfs_btree_cur	*cur,
-	union xfs_btree_ptr	*start,
-	union xfs_btree_ptr	*new,
-	int			*stat)
+	struct xfs_btree_cur		*cur,
+	const union xfs_btree_ptr	*start,
+	union xfs_btree_ptr		*new,
+	int				*stat)
 {
 	int			error;
 	xfs_agblock_t		bno;
@@ -87,7 +87,7 @@ xfs_allocbt_free_block(
 	xfs_agblock_t		bno;
 	int			error;
 
-	bno = xfs_daddr_to_agbno(cur->bc_mp, XFS_BUF_ADDR(bp));
+	bno = xfs_daddr_to_agbno(cur->bc_mp, xfs_buf_daddr(bp));
 	error = xfs_alloc_put_freelist(cur->bc_tp, agbp, NULL, bno, 1);
 	if (error)
 		return error;
@@ -103,11 +103,11 @@ xfs_allocbt_free_block(
  */
 STATIC void
 xfs_allocbt_update_lastrec(
-	struct xfs_btree_cur	*cur,
-	struct xfs_btree_block	*block,
-	union xfs_btree_rec	*rec,
-	int			ptr,
-	int			reason)
+	struct xfs_btree_cur		*cur,
+	const struct xfs_btree_block	*block,
+	const union xfs_btree_rec	*rec,
+	int				ptr,
+	int				reason)
 {
 	struct xfs_agf		*agf = cur->bc_ag.agbp->b_addr;
 	struct xfs_perag	*pag;
@@ -177,8 +177,8 @@ xfs_allocbt_get_maxrecs(
 
 STATIC void
 xfs_allocbt_init_key_from_rec(
-	union xfs_btree_key	*key,
-	union xfs_btree_rec	*rec)
+	union xfs_btree_key		*key,
+	const union xfs_btree_rec	*rec)
 {
 	key->alloc.ar_startblock = rec->alloc.ar_startblock;
 	key->alloc.ar_blockcount = rec->alloc.ar_blockcount;
@@ -186,10 +186,10 @@ xfs_allocbt_init_key_from_rec(
 
 STATIC void
 xfs_bnobt_init_high_key_from_rec(
-	union xfs_btree_key	*key,
-	union xfs_btree_rec	*rec)
+	union xfs_btree_key		*key,
+	const union xfs_btree_rec	*rec)
 {
-	__u32			x;
+	__u32				x;
 
 	x = be32_to_cpu(rec->alloc.ar_startblock);
 	x += be32_to_cpu(rec->alloc.ar_blockcount) - 1;
@@ -199,8 +199,8 @@ xfs_bnobt_init_high_key_from_rec(
 
 STATIC void
 xfs_cntbt_init_high_key_from_rec(
-	union xfs_btree_key	*key,
-	union xfs_btree_rec	*rec)
+	union xfs_btree_key		*key,
+	const union xfs_btree_rec	*rec)
 {
 	key->alloc.ar_blockcount = rec->alloc.ar_blockcount;
 	key->alloc.ar_startblock = 0;
@@ -229,23 +229,23 @@ xfs_allocbt_init_ptr_from_cur(
 
 STATIC int64_t
 xfs_bnobt_key_diff(
-	struct xfs_btree_cur	*cur,
-	union xfs_btree_key	*key)
+	struct xfs_btree_cur		*cur,
+	const union xfs_btree_key	*key)
 {
-	xfs_alloc_rec_incore_t	*rec = &cur->bc_rec.a;
-	xfs_alloc_key_t		*kp = &key->alloc;
+	struct xfs_alloc_rec_incore	*rec = &cur->bc_rec.a;
+	const struct xfs_alloc_rec	*kp = &key->alloc;
 
 	return (int64_t)be32_to_cpu(kp->ar_startblock) - rec->ar_startblock;
 }
 
 STATIC int64_t
 xfs_cntbt_key_diff(
-	struct xfs_btree_cur	*cur,
-	union xfs_btree_key	*key)
+	struct xfs_btree_cur		*cur,
+	const union xfs_btree_key	*key)
 {
-	xfs_alloc_rec_incore_t	*rec = &cur->bc_rec.a;
-	xfs_alloc_key_t		*kp = &key->alloc;
-	int64_t			diff;
+	struct xfs_alloc_rec_incore	*rec = &cur->bc_rec.a;
+	const struct xfs_alloc_rec	*kp = &key->alloc;
+	int64_t				diff;
 
 	diff = (int64_t)be32_to_cpu(kp->ar_blockcount) - rec->ar_blockcount;
 	if (diff)
@@ -256,9 +256,9 @@ xfs_cntbt_key_diff(
 
 STATIC int64_t
 xfs_bnobt_diff_two_keys(
-	struct xfs_btree_cur	*cur,
-	union xfs_btree_key	*k1,
-	union xfs_btree_key	*k2)
+	struct xfs_btree_cur		*cur,
+	const union xfs_btree_key	*k1,
+	const union xfs_btree_key	*k2)
 {
 	return (int64_t)be32_to_cpu(k1->alloc.ar_startblock) -
 			  be32_to_cpu(k2->alloc.ar_startblock);
@@ -266,11 +266,11 @@ xfs_bnobt_diff_two_keys(
 
 STATIC int64_t
 xfs_cntbt_diff_two_keys(
-	struct xfs_btree_cur	*cur,
-	union xfs_btree_key	*k1,
-	union xfs_btree_key	*k2)
+	struct xfs_btree_cur		*cur,
+	const union xfs_btree_key	*k1,
+	const union xfs_btree_key	*k2)
 {
-	int64_t			diff;
+	int64_t				diff;
 
 	diff =  be32_to_cpu(k1->alloc.ar_blockcount) -
 		be32_to_cpu(k2->alloc.ar_blockcount);
@@ -295,7 +295,7 @@ xfs_allocbt_verify(
 	if (!xfs_verify_magic(bp, block->bb_magic))
 		return __this_address;
 
-	if (xfs_sb_version_hascrc(&mp->m_sb)) {
+	if (xfs_has_crc(mp)) {
 		fa = xfs_btree_sblock_v5hdr_verify(bp);
 		if (fa)
 			return fa;
@@ -376,9 +376,9 @@ const struct xfs_buf_ops xfs_cntbt_buf_ops = {
 
 STATIC int
 xfs_bnobt_keys_inorder(
-	struct xfs_btree_cur	*cur,
-	union xfs_btree_key	*k1,
-	union xfs_btree_key	*k2)
+	struct xfs_btree_cur		*cur,
+	const union xfs_btree_key	*k1,
+	const union xfs_btree_key	*k2)
 {
 	return be32_to_cpu(k1->alloc.ar_startblock) <
 	       be32_to_cpu(k2->alloc.ar_startblock);
@@ -386,9 +386,9 @@ xfs_bnobt_keys_inorder(
 
 STATIC int
 xfs_bnobt_recs_inorder(
-	struct xfs_btree_cur	*cur,
-	union xfs_btree_rec	*r1,
-	union xfs_btree_rec	*r2)
+	struct xfs_btree_cur		*cur,
+	const union xfs_btree_rec	*r1,
+	const union xfs_btree_rec	*r2)
 {
 	return be32_to_cpu(r1->alloc.ar_startblock) +
 		be32_to_cpu(r1->alloc.ar_blockcount) <=
@@ -397,9 +397,9 @@ xfs_bnobt_recs_inorder(
 
 STATIC int
 xfs_cntbt_keys_inorder(
-	struct xfs_btree_cur	*cur,
-	union xfs_btree_key	*k1,
-	union xfs_btree_key	*k2)
+	struct xfs_btree_cur		*cur,
+	const union xfs_btree_key	*k1,
+	const union xfs_btree_key	*k2)
 {
 	return be32_to_cpu(k1->alloc.ar_blockcount) <
 		be32_to_cpu(k2->alloc.ar_blockcount) ||
@@ -410,9 +410,9 @@ xfs_cntbt_keys_inorder(
 
 STATIC int
 xfs_cntbt_recs_inorder(
-	struct xfs_btree_cur	*cur,
-	union xfs_btree_rec	*r1,
-	union xfs_btree_rec	*r2)
+	struct xfs_btree_cur		*cur,
+	const union xfs_btree_rec	*r1,
+	const union xfs_btree_rec	*r2)
 {
 	return be32_to_cpu(r1->alloc.ar_blockcount) <
 		be32_to_cpu(r2->alloc.ar_blockcount) ||
@@ -498,7 +498,7 @@ xfs_allocbt_init_common(
 	atomic_inc(&pag->pag_ref);
 	cur->bc_ag.pag = pag;
 
-	if (xfs_sb_version_hascrc(&mp->m_sb))
+	if (xfs_has_crc(mp))
 		cur->bc_flags |= XFS_BTREE_CRC_BLOCKS;
 
 	return cur;
diff --git a/fs/xfs/libxfs/xfs_alloc_btree.h b/fs/xfs/libxfs/xfs_alloc_btree.h
index 9eb4c667a6b8..2f6b816aaf9f 100644
--- a/fs/xfs/libxfs/xfs_alloc_btree.h
+++ b/fs/xfs/libxfs/xfs_alloc_btree.h
@@ -20,7 +20,7 @@ struct xbtree_afakeroot;
  * Btree block header size depends on a superblock flag.
  */
 #define XFS_ALLOC_BLOCK_LEN(mp) \
-	(xfs_sb_version_hascrc(&((mp)->m_sb)) ? \
+	(xfs_has_crc(((mp))) ? \
 		XFS_BTREE_SBLOCK_CRC_LEN : XFS_BTREE_SBLOCK_LEN)
 
 /*
diff --git a/fs/xfs/libxfs/xfs_attr.c b/fs/xfs/libxfs/xfs_attr.c
index 191d51725988..fbc9d816882c 100644
--- a/fs/xfs/libxfs/xfs_attr.c
+++ b/fs/xfs/libxfs/xfs_attr.c
@@ -146,7 +146,7 @@ xfs_attr_get(
 
 	XFS_STATS_INC(args->dp->i_mount, xs_attr_get);
 
-	if (XFS_FORCED_SHUTDOWN(args->dp->i_mount))
+	if (xfs_is_shutdown(args->dp->i_mount))
 		return -EIO;
 
 	args->geo = args->dp->i_mount->m_attr_geo;
@@ -224,7 +224,7 @@ xfs_attr_try_sf_addname(
 	if (!error && !(args->op_flags & XFS_DA_OP_NOTIME))
 		xfs_trans_ichgtime(args->trans, dp, XFS_ICHGTIME_CHG);
 
-	if (dp->i_mount->m_flags & XFS_MOUNT_WSYNC)
+	if (xfs_has_wsync(dp->i_mount))
 		xfs_trans_set_sync(args->trans);
 
 	return error;
@@ -335,6 +335,7 @@ xfs_attr_sf_addname(
 	 * the attr fork to leaf format and will restart with the leaf
 	 * add.
 	 */
+	trace_xfs_attr_sf_addname_return(XFS_DAS_UNINIT, args->dp);
 	dac->flags |= XFS_DAC_DEFER_FINISH;
 	return -EAGAIN;
 }
@@ -394,6 +395,8 @@ xfs_attr_set_iter(
 				 * handling code below
 				 */
 				dac->flags |= XFS_DAC_DEFER_FINISH;
+				trace_xfs_attr_set_iter_return(
+					dac->dela_state, args->dp);
 				return -EAGAIN;
 			} else if (error) {
 				return error;
@@ -411,6 +414,7 @@ xfs_attr_set_iter(
 
 			dac->dela_state = XFS_DAS_FOUND_NBLK;
 		}
+		trace_xfs_attr_set_iter_return(dac->dela_state,	args->dp);
 		return -EAGAIN;
 	case XFS_DAS_FOUND_LBLK:
 		/*
@@ -438,6 +442,8 @@ xfs_attr_set_iter(
 			error = xfs_attr_rmtval_set_blk(dac);
 			if (error)
 				return error;
+			trace_xfs_attr_set_iter_return(dac->dela_state,
+						       args->dp);
 			return -EAGAIN;
 		}
 
@@ -472,6 +478,7 @@ xfs_attr_set_iter(
 		 * series.
 		 */
 		dac->dela_state = XFS_DAS_FLIP_LFLAG;
+		trace_xfs_attr_set_iter_return(dac->dela_state, args->dp);
 		return -EAGAIN;
 	case XFS_DAS_FLIP_LFLAG:
 		/*
@@ -488,11 +495,15 @@ xfs_attr_set_iter(
 		/* Set state in case xfs_attr_rmtval_remove returns -EAGAIN */
 		dac->dela_state = XFS_DAS_RM_LBLK;
 		if (args->rmtblkno) {
-			error = __xfs_attr_rmtval_remove(dac);
+			error = xfs_attr_rmtval_remove(dac);
+			if (error == -EAGAIN)
+				trace_xfs_attr_set_iter_return(
+					dac->dela_state, args->dp);
 			if (error)
 				return error;
 
 			dac->dela_state = XFS_DAS_RD_LEAF;
+			trace_xfs_attr_set_iter_return(dac->dela_state, args->dp);
 			return -EAGAIN;
 		}
 
@@ -542,6 +553,8 @@ xfs_attr_set_iter(
 				error = xfs_attr_rmtval_set_blk(dac);
 				if (error)
 					return error;
+				trace_xfs_attr_set_iter_return(
+					dac->dela_state, args->dp);
 				return -EAGAIN;
 			}
 
@@ -577,6 +590,7 @@ xfs_attr_set_iter(
 		 * series
 		 */
 		dac->dela_state = XFS_DAS_FLIP_NFLAG;
+		trace_xfs_attr_set_iter_return(dac->dela_state, args->dp);
 		return -EAGAIN;
 
 	case XFS_DAS_FLIP_NFLAG:
@@ -595,11 +609,16 @@ xfs_attr_set_iter(
 		/* Set state in case xfs_attr_rmtval_remove returns -EAGAIN */
 		dac->dela_state = XFS_DAS_RM_NBLK;
 		if (args->rmtblkno) {
-			error = __xfs_attr_rmtval_remove(dac);
+			error = xfs_attr_rmtval_remove(dac);
+			if (error == -EAGAIN)
+				trace_xfs_attr_set_iter_return(
+					dac->dela_state, args->dp);
+
 			if (error)
 				return error;
 
 			dac->dela_state = XFS_DAS_CLR_FLAG;
+			trace_xfs_attr_set_iter_return(dac->dela_state, args->dp);
 			return -EAGAIN;
 		}
 
@@ -623,8 +642,8 @@ out:
 /*
  * Return EEXIST if attr is found, or ENOATTR if not
  */
-int
-xfs_has_attr(
+static int
+xfs_attr_lookup(
 	struct xfs_da_args	*args)
 {
 	struct xfs_inode	*dp = args->dp;
@@ -691,7 +710,7 @@ xfs_attr_set(
 	int			rmt_blks = 0;
 	unsigned int		total;
 
-	if (XFS_FORCED_SHUTDOWN(dp->i_mount))
+	if (xfs_is_shutdown(dp->i_mount))
 		return -EIO;
 
 	error = xfs_qm_dqattach(dp);
@@ -761,8 +780,8 @@ xfs_attr_set(
 			goto out_trans_cancel;
 	}
 
+	error = xfs_attr_lookup(args);
 	if (args->value) {
-		error = xfs_has_attr(args);
 		if (error == -EEXIST && (args->attr_flags & XATTR_CREATE))
 			goto out_trans_cancel;
 		if (error == -ENOATTR && (args->attr_flags & XATTR_REPLACE))
@@ -777,7 +796,6 @@ xfs_attr_set(
 		if (!args->trans)
 			goto out_unlock;
 	} else {
-		error = xfs_has_attr(args);
 		if (error != -EEXIST)
 			goto out_trans_cancel;
 
@@ -790,7 +808,7 @@ xfs_attr_set(
 	 * If this is a synchronous mount, make sure that the
 	 * transaction goes to disk before returning to the user.
 	 */
-	if (mp->m_flags & XFS_MOUNT_WSYNC)
+	if (xfs_has_wsync(mp))
 		xfs_trans_set_sync(args->trans);
 
 	if (!(args->op_flags & XFS_DA_OP_NOTIME))
@@ -1176,6 +1194,8 @@ xfs_attr_node_addname(
 			 * this point.
 			 */
 			dac->flags |= XFS_DAC_DEFER_FINISH;
+			trace_xfs_attr_node_addname_return(
+					dac->dela_state, args->dp);
 			return -EAGAIN;
 		}
 
@@ -1421,11 +1441,14 @@ xfs_attr_remove_iter(
 			 * May return -EAGAIN. Roll and repeat until all remote
 			 * blocks are removed.
 			 */
-			error = __xfs_attr_rmtval_remove(dac);
-			if (error == -EAGAIN)
+			error = xfs_attr_rmtval_remove(dac);
+			if (error == -EAGAIN) {
+				trace_xfs_attr_remove_iter_return(
+						dac->dela_state, args->dp);
 				return error;
-			else if (error)
+			} else if (error) {
 				goto out;
+			}
 
 			/*
 			 * Refill the state structure with buffers (the prior
@@ -1438,6 +1461,7 @@ xfs_attr_remove_iter(
 				goto out;
 			dac->dela_state = XFS_DAS_RM_NAME;
 			dac->flags |= XFS_DAC_DEFER_FINISH;
+			trace_xfs_attr_remove_iter_return(dac->dela_state, args->dp);
 			return -EAGAIN;
 		}
 
@@ -1466,6 +1490,8 @@ xfs_attr_remove_iter(
 
 			dac->flags |= XFS_DAC_DEFER_FINISH;
 			dac->dela_state = XFS_DAS_RM_SHRINK;
+			trace_xfs_attr_remove_iter_return(
+					dac->dela_state, args->dp);
 			return -EAGAIN;
 		}
 
@@ -1514,7 +1540,7 @@ xfs_attr_fillstate(xfs_da_state_t *state)
 	ASSERT((path->active >= 0) && (path->active < XFS_DA_NODE_MAXDEPTH));
 	for (blk = path->blk, level = 0; level < path->active; blk++, level++) {
 		if (blk->bp) {
-			blk->disk_blkno = XFS_BUF_ADDR(blk->bp);
+			blk->disk_blkno = xfs_buf_daddr(blk->bp);
 			blk->bp = NULL;
 		} else {
 			blk->disk_blkno = 0;
@@ -1529,7 +1555,7 @@ xfs_attr_fillstate(xfs_da_state_t *state)
 	ASSERT((path->active >= 0) && (path->active < XFS_DA_NODE_MAXDEPTH));
 	for (blk = path->blk, level = 0; level < path->active; blk++, level++) {
 		if (blk->bp) {
-			blk->disk_blkno = XFS_BUF_ADDR(blk->bp);
+			blk->disk_blkno = xfs_buf_daddr(blk->bp);
 			blk->bp = NULL;
 		} else {
 			blk->disk_blkno = 0;
diff --git a/fs/xfs/libxfs/xfs_attr.h b/fs/xfs/libxfs/xfs_attr.h
index 8de5d1d2733e..5e71f719bdd5 100644
--- a/fs/xfs/libxfs/xfs_attr.h
+++ b/fs/xfs/libxfs/xfs_attr.h
@@ -490,7 +490,6 @@ int xfs_attr_get_ilocked(struct xfs_da_args *args);
 int xfs_attr_get(struct xfs_da_args *args);
 int xfs_attr_set(struct xfs_da_args *args);
 int xfs_attr_set_args(struct xfs_da_args *args);
-int xfs_has_attr(struct xfs_da_args *args);
 int xfs_attr_remove_args(struct xfs_da_args *args);
 int xfs_attr_remove_iter(struct xfs_delattr_context *dac);
 bool xfs_attr_namecheck(const void *name, size_t length);
diff --git a/fs/xfs/libxfs/xfs_attr_leaf.c b/fs/xfs/libxfs/xfs_attr_leaf.c
index b910bd209949..e1d11e314228 100644
--- a/fs/xfs/libxfs/xfs_attr_leaf.c
+++ b/fs/xfs/libxfs/xfs_attr_leaf.c
@@ -384,7 +384,7 @@ xfs_attr3_leaf_write_verify(
 		return;
 	}
 
-	if (!xfs_sb_version_hascrc(&mp->m_sb))
+	if (!xfs_has_crc(mp))
 		return;
 
 	if (bip)
@@ -406,7 +406,7 @@ xfs_attr3_leaf_read_verify(
 	struct xfs_mount	*mp = bp->b_mount;
 	xfs_failaddr_t		fa;
 
-	if (xfs_sb_version_hascrc(&mp->m_sb) &&
+	if (xfs_has_crc(mp) &&
 	     !xfs_buf_verify_cksum(bp, XFS_ATTR3_LEAF_CRC_OFF))
 		xfs_verifier_error(bp, -EFSBADCRC, __this_address);
 	else {
@@ -489,7 +489,7 @@ xfs_attr_copy_value(
 	}
 
 	if (!args->value) {
-		args->value = kmem_alloc_large(valuelen, KM_NOLOCKDEP);
+		args->value = kvmalloc(valuelen, GFP_KERNEL | __GFP_NOLOCKDEP);
 		if (!args->value)
 			return -ENOMEM;
 	}
@@ -568,7 +568,7 @@ xfs_attr_shortform_bytesfit(
 	 * literal area, but for the old format we are done if there is no
 	 * space in the fixed attribute fork.
 	 */
-	if (!(mp->m_flags & XFS_MOUNT_ATTR2))
+	if (!xfs_has_attr2(mp))
 		return 0;
 
 	dsize = dp->i_df.if_bytes;
@@ -576,7 +576,7 @@ xfs_attr_shortform_bytesfit(
 	switch (dp->i_df.if_format) {
 	case XFS_DINODE_FMT_EXTENTS:
 		/*
-		 * If there is no attr fork and the data fork is extents, 
+		 * If there is no attr fork and the data fork is extents,
 		 * determine if creating the default attr fork will result
 		 * in the extents form migrating to btree. If so, the
 		 * minimum offset only needs to be the space required for
@@ -621,21 +621,27 @@ xfs_attr_shortform_bytesfit(
 }
 
 /*
- * Switch on the ATTR2 superblock bit (implies also FEATURES2)
+ * Switch on the ATTR2 superblock bit (implies also FEATURES2) unless:
+ * - noattr2 mount option is set,
+ * - on-disk version bit says it is already set, or
+ * - the attr2 mount option is not set to enable automatic upgrade from attr1.
  */
 STATIC void
-xfs_sbversion_add_attr2(xfs_mount_t *mp, xfs_trans_t *tp)
+xfs_sbversion_add_attr2(
+	struct xfs_mount	*mp,
+	struct xfs_trans	*tp)
 {
-	if ((mp->m_flags & XFS_MOUNT_ATTR2) &&
-	    !(xfs_sb_version_hasattr2(&mp->m_sb))) {
-		spin_lock(&mp->m_sb_lock);
-		if (!xfs_sb_version_hasattr2(&mp->m_sb)) {
-			xfs_sb_version_addattr2(&mp->m_sb);
-			spin_unlock(&mp->m_sb_lock);
-			xfs_log_sb(tp);
-		} else
-			spin_unlock(&mp->m_sb_lock);
-	}
+	if (xfs_has_noattr2(mp))
+		return;
+	if (mp->m_sb.sb_features2 & XFS_SB_VERSION2_ATTR2BIT)
+		return;
+	if (!xfs_has_attr2(mp))
+		return;
+
+	spin_lock(&mp->m_sb_lock);
+	xfs_add_attr2(mp);
+	spin_unlock(&mp->m_sb_lock);
+	xfs_log_sb(tp);
 }
 
 /*
@@ -810,8 +816,7 @@ xfs_attr_sf_removename(
 	 * Fix up the start offset of the attribute fork
 	 */
 	totsize -= size;
-	if (totsize == sizeof(xfs_attr_sf_hdr_t) &&
-	    (mp->m_flags & XFS_MOUNT_ATTR2) &&
+	if (totsize == sizeof(xfs_attr_sf_hdr_t) && xfs_has_attr2(mp) &&
 	    (dp->i_df.if_format != XFS_DINODE_FMT_BTREE) &&
 	    !(args->op_flags & XFS_DA_OP_ADDNAME)) {
 		xfs_attr_fork_remove(dp, args->trans);
@@ -821,7 +826,7 @@ xfs_attr_sf_removename(
 		ASSERT(dp->i_forkoff);
 		ASSERT(totsize > sizeof(xfs_attr_sf_hdr_t) ||
 				(args->op_flags & XFS_DA_OP_ADDNAME) ||
-				!(mp->m_flags & XFS_MOUNT_ATTR2) ||
+				!xfs_has_attr2(mp) ||
 				dp->i_df.if_format == XFS_DINODE_FMT_BTREE);
 		xfs_trans_log_inode(args->trans, dp,
 					XFS_ILOG_CORE | XFS_ILOG_ADATA);
@@ -997,7 +1002,7 @@ xfs_attr_shortform_allfit(
 		bytes += xfs_attr_sf_entsize_byname(name_loc->namelen,
 					be16_to_cpu(name_loc->valuelen));
 	}
-	if ((dp->i_mount->m_flags & XFS_MOUNT_ATTR2) &&
+	if (xfs_has_attr2(dp->i_mount) &&
 	    (dp->i_df.if_format != XFS_DINODE_FMT_BTREE) &&
 	    (bytes == sizeof(struct xfs_attr_sf_hdr)))
 		return -1;
@@ -1122,7 +1127,7 @@ xfs_attr3_leaf_to_shortform(
 		goto out;
 
 	if (forkoff == -1) {
-		ASSERT(dp->i_mount->m_flags & XFS_MOUNT_ATTR2);
+		ASSERT(xfs_has_attr2(dp->i_mount));
 		ASSERT(dp->i_df.if_format != XFS_DINODE_FMT_BTREE);
 		xfs_attr_fork_remove(dp, args->trans);
 		goto out;
@@ -1199,9 +1204,9 @@ xfs_attr3_leaf_to_node(
 	xfs_trans_buf_set_type(args->trans, bp2, XFS_BLFT_ATTR_LEAF_BUF);
 	bp2->b_ops = bp1->b_ops;
 	memcpy(bp2->b_addr, bp1->b_addr, args->geo->blksize);
-	if (xfs_sb_version_hascrc(&mp->m_sb)) {
+	if (xfs_has_crc(mp)) {
 		struct xfs_da3_blkinfo *hdr3 = bp2->b_addr;
-		hdr3->blkno = cpu_to_be64(bp2->b_bn);
+		hdr3->blkno = cpu_to_be64(xfs_buf_daddr(bp2));
 	}
 	xfs_trans_log_buf(args->trans, bp2, 0, args->geo->blksize - 1);
 
@@ -1264,12 +1269,12 @@ xfs_attr3_leaf_create(
 	memset(&ichdr, 0, sizeof(ichdr));
 	ichdr.firstused = args->geo->blksize;
 
-	if (xfs_sb_version_hascrc(&mp->m_sb)) {
+	if (xfs_has_crc(mp)) {
 		struct xfs_da3_blkinfo *hdr3 = bp->b_addr;
 
 		ichdr.magic = XFS_ATTR3_LEAF_MAGIC;
 
-		hdr3->blkno = cpu_to_be64(bp->b_bn);
+		hdr3->blkno = cpu_to_be64(xfs_buf_daddr(bp));
 		hdr3->owner = cpu_to_be64(dp->i_ino);
 		uuid_copy(&hdr3->uuid, &mp->m_sb.sb_meta_uuid);
 
diff --git a/fs/xfs/libxfs/xfs_attr_remote.c b/fs/xfs/libxfs/xfs_attr_remote.c
index 0c8bee3abc3b..83b95be9ded8 100644
--- a/fs/xfs/libxfs/xfs_attr_remote.c
+++ b/fs/xfs/libxfs/xfs_attr_remote.c
@@ -51,7 +51,7 @@ xfs_attr3_rmt_blocks(
 	struct xfs_mount *mp,
 	int		attrlen)
 {
-	if (xfs_sb_version_hascrc(&mp->m_sb)) {
+	if (xfs_has_crc(mp)) {
 		int buflen = XFS_ATTR3_RMT_BUF_SPACE(mp, mp->m_sb.sb_blocksize);
 		return (attrlen + buflen - 1) / buflen;
 	}
@@ -126,11 +126,11 @@ __xfs_attr3_rmt_read_verify(
 	int		blksize = mp->m_attr_geo->blksize;
 
 	/* no verification of non-crc buffers */
-	if (!xfs_sb_version_hascrc(&mp->m_sb))
+	if (!xfs_has_crc(mp))
 		return 0;
 
 	ptr = bp->b_addr;
-	bno = bp->b_bn;
+	bno = xfs_buf_daddr(bp);
 	len = BBTOB(bp->b_length);
 	ASSERT(len >= blksize);
 
@@ -191,11 +191,11 @@ xfs_attr3_rmt_write_verify(
 	xfs_daddr_t	bno;
 
 	/* no verification of non-crc buffers */
-	if (!xfs_sb_version_hascrc(&mp->m_sb))
+	if (!xfs_has_crc(mp))
 		return;
 
 	ptr = bp->b_addr;
-	bno = bp->b_bn;
+	bno = xfs_buf_daddr(bp);
 	len = BBTOB(bp->b_length);
 	ASSERT(len >= blksize);
 
@@ -246,7 +246,7 @@ xfs_attr3_rmt_hdr_set(
 {
 	struct xfs_attr3_rmt_hdr *rmt = ptr;
 
-	if (!xfs_sb_version_hascrc(&mp->m_sb))
+	if (!xfs_has_crc(mp))
 		return 0;
 
 	rmt->rm_magic = cpu_to_be32(XFS_ATTR3_RMT_MAGIC);
@@ -284,7 +284,7 @@ xfs_attr_rmtval_copyout(
 	uint8_t		**dst)
 {
 	char		*src = bp->b_addr;
-	xfs_daddr_t	bno = bp->b_bn;
+	xfs_daddr_t	bno = xfs_buf_daddr(bp);
 	int		len = BBTOB(bp->b_length);
 	int		blksize = mp->m_attr_geo->blksize;
 
@@ -296,7 +296,7 @@ xfs_attr_rmtval_copyout(
 
 		byte_cnt = min(*valuelen, byte_cnt);
 
-		if (xfs_sb_version_hascrc(&mp->m_sb)) {
+		if (xfs_has_crc(mp)) {
 			if (xfs_attr3_rmt_hdr_ok(src, ino, *offset,
 						  byte_cnt, bno)) {
 				xfs_alert(mp,
@@ -332,7 +332,7 @@ xfs_attr_rmtval_copyin(
 	uint8_t		**src)
 {
 	char		*dst = bp->b_addr;
-	xfs_daddr_t	bno = bp->b_bn;
+	xfs_daddr_t	bno = xfs_buf_daddr(bp);
 	int		len = BBTOB(bp->b_length);
 	int		blksize = mp->m_attr_geo->blksize;
 
@@ -672,7 +672,7 @@ xfs_attr_rmtval_invalidate(
  * routine until it returns something other than -EAGAIN.
  */
 int
-__xfs_attr_rmtval_remove(
+xfs_attr_rmtval_remove(
 	struct xfs_delattr_context	*dac)
 {
 	struct xfs_da_args		*args = dac->da_args;
@@ -696,6 +696,7 @@ __xfs_attr_rmtval_remove(
 	 */
 	if (!done) {
 		dac->flags |= XFS_DAC_DEFER_FINISH;
+		trace_xfs_attr_rmtval_remove_return(dac->dela_state, args->dp);
 		return -EAGAIN;
 	}
 
diff --git a/fs/xfs/libxfs/xfs_attr_remote.h b/fs/xfs/libxfs/xfs_attr_remote.h
index 61b85b918db8..d72eff30ca18 100644
--- a/fs/xfs/libxfs/xfs_attr_remote.h
+++ b/fs/xfs/libxfs/xfs_attr_remote.h
@@ -12,7 +12,7 @@ int xfs_attr_rmtval_get(struct xfs_da_args *args);
 int xfs_attr_rmtval_stale(struct xfs_inode *ip, struct xfs_bmbt_irec *map,
 		xfs_buf_flags_t incore_flags);
 int xfs_attr_rmtval_invalidate(struct xfs_da_args *args);
-int __xfs_attr_rmtval_remove(struct xfs_delattr_context *dac);
+int xfs_attr_rmtval_remove(struct xfs_delattr_context *dac);
 int xfs_attr_rmt_find_hole(struct xfs_da_args *args);
 int xfs_attr_rmtval_set_value(struct xfs_da_args *args);
 int xfs_attr_rmtval_set_blk(struct xfs_delattr_context *dac);
diff --git a/fs/xfs/libxfs/xfs_bmap.c b/fs/xfs/libxfs/xfs_bmap.c
index 948092babb6a..b48230f1a361 100644
--- a/fs/xfs/libxfs/xfs_bmap.c
+++ b/fs/xfs/libxfs/xfs_bmap.c
@@ -242,7 +242,7 @@ xfs_bmap_get_bp(
 	for (i = 0; i < XFS_BTREE_MAXLEVELS; i++) {
 		if (!cur->bc_bufs[i])
 			break;
-		if (XFS_BUF_ADDR(cur->bc_bufs[i]) == bno)
+		if (xfs_buf_daddr(cur->bc_bufs[i]) == bno)
 			return cur->bc_bufs[i];
 	}
 
@@ -251,7 +251,7 @@ xfs_bmap_get_bp(
 		struct xfs_buf_log_item	*bip = (struct xfs_buf_log_item *)lip;
 
 		if (bip->bli_item.li_type == XFS_LI_BUF &&
-		    XFS_BUF_ADDR(bip->bli_buf) == bno)
+		    xfs_buf_daddr(bip->bli_buf) == bno)
 			return bip->bli_buf;
 	}
 
@@ -739,7 +739,7 @@ xfs_bmap_extents_to_btree(
 	 */
 	abp->b_ops = &xfs_bmbt_buf_ops;
 	ablock = XFS_BUF_TO_BLOCK(abp);
-	xfs_btree_init_block_int(mp, ablock, abp->b_bn,
+	xfs_btree_init_block_int(mp, ablock, xfs_buf_daddr(abp),
 				XFS_BTNUM_BMAP, 0, 0, ip->i_ino,
 				XFS_BTREE_LONG_PTRS);
 
@@ -1047,7 +1047,7 @@ xfs_bmap_set_attrforkoff(
 		ip->i_forkoff = xfs_attr_shortform_bytesfit(ip, size);
 		if (!ip->i_forkoff)
 			ip->i_forkoff = default_size;
-		else if ((ip->i_mount->m_flags & XFS_MOUNT_ATTR2) && version)
+		else if (xfs_has_attr2(ip->i_mount) && version)
 			*version = 2;
 		break;
 	default:
@@ -1115,17 +1115,17 @@ xfs_bmap_add_attrfork(
 		xfs_trans_log_inode(tp, ip, logflags);
 	if (error)
 		goto trans_cancel;
-	if (!xfs_sb_version_hasattr(&mp->m_sb) ||
-	   (!xfs_sb_version_hasattr2(&mp->m_sb) && version == 2)) {
+	if (!xfs_has_attr(mp) ||
+	   (!xfs_has_attr2(mp) && version == 2)) {
 		bool log_sb = false;
 
 		spin_lock(&mp->m_sb_lock);
-		if (!xfs_sb_version_hasattr(&mp->m_sb)) {
-			xfs_sb_version_addattr(&mp->m_sb);
+		if (!xfs_has_attr(mp)) {
+			xfs_add_attr(mp);
 			log_sb = true;
 		}
-		if (!xfs_sb_version_hasattr2(&mp->m_sb) && version == 2) {
-			xfs_sb_version_addattr2(&mp->m_sb);
+		if (!xfs_has_attr2(mp) && version == 2) {
+			xfs_add_attr2(mp);
 			log_sb = true;
 		}
 		spin_unlock(&mp->m_sb_lock);
@@ -3422,7 +3422,7 @@ xfs_bmap_compute_alignments(
 	int			stripe_align = 0;
 
 	/* stripe alignment for allocation is determined by mount parameters */
-	if (mp->m_swidth && (mp->m_flags & XFS_MOUNT_SWALLOC))
+	if (mp->m_swidth && xfs_has_swalloc(mp))
 		stripe_align = mp->m_swidth;
 	else if (mp->m_dalign)
 		stripe_align = mp->m_dalign;
@@ -3938,7 +3938,7 @@ xfs_bmapi_read(
 	    XFS_TEST_ERROR(false, mp, XFS_ERRTAG_BMAPIFORMAT))
 		return -EFSCORRUPTED;
 
-	if (XFS_FORCED_SHUTDOWN(mp))
+	if (xfs_is_shutdown(mp))
 		return -EIO;
 
 	XFS_STATS_INC(mp, xs_blk_mapr);
@@ -4420,7 +4420,7 @@ xfs_bmapi_write(
 		return -EFSCORRUPTED;
 	}
 
-	if (XFS_FORCED_SHUTDOWN(mp))
+	if (xfs_is_shutdown(mp))
 		return -EIO;
 
 	XFS_STATS_INC(mp, xs_blk_mapw);
@@ -4703,7 +4703,7 @@ xfs_bmapi_remap(
 		return -EFSCORRUPTED;
 	}
 
-	if (XFS_FORCED_SHUTDOWN(mp))
+	if (xfs_is_shutdown(mp))
 		return -EIO;
 
 	error = xfs_iread_extents(tp, ip, whichfork);
@@ -5361,7 +5361,7 @@ __xfs_bunmapi(
 	ifp = XFS_IFORK_PTR(ip, whichfork);
 	if (XFS_IS_CORRUPT(mp, !xfs_ifork_has_extents(ifp)))
 		return -EFSCORRUPTED;
-	if (XFS_FORCED_SHUTDOWN(mp))
+	if (xfs_is_shutdown(mp))
 		return -EIO;
 
 	ASSERT(xfs_isilocked(ip, XFS_ILOCK_EXCL));
@@ -5852,7 +5852,7 @@ xfs_bmap_collapse_extents(
 		return -EFSCORRUPTED;
 	}
 
-	if (XFS_FORCED_SHUTDOWN(mp))
+	if (xfs_is_shutdown(mp))
 		return -EIO;
 
 	ASSERT(xfs_isilocked(ip, XFS_IOLOCK_EXCL | XFS_ILOCK_EXCL));
@@ -5930,7 +5930,7 @@ xfs_bmap_can_insert_extents(
 
 	ASSERT(xfs_isilocked(ip, XFS_IOLOCK_EXCL));
 
-	if (XFS_FORCED_SHUTDOWN(ip->i_mount))
+	if (xfs_is_shutdown(ip->i_mount))
 		return -EIO;
 
 	xfs_ilock(ip, XFS_ILOCK_EXCL);
@@ -5967,7 +5967,7 @@ xfs_bmap_insert_extents(
 		return -EFSCORRUPTED;
 	}
 
-	if (XFS_FORCED_SHUTDOWN(mp))
+	if (xfs_is_shutdown(mp))
 		return -EIO;
 
 	ASSERT(xfs_isilocked(ip, XFS_IOLOCK_EXCL | XFS_ILOCK_EXCL));
@@ -6070,7 +6070,7 @@ xfs_bmap_split_extent(
 		return -EFSCORRUPTED;
 	}
 
-	if (XFS_FORCED_SHUTDOWN(mp))
+	if (xfs_is_shutdown(mp))
 		return -EIO;
 
 	/* Read in all the extents */
diff --git a/fs/xfs/libxfs/xfs_bmap_btree.c b/fs/xfs/libxfs/xfs_bmap_btree.c
index 1ceba020940e..72444b8b38a6 100644
--- a/fs/xfs/libxfs/xfs_bmap_btree.c
+++ b/fs/xfs/libxfs/xfs_bmap_btree.c
@@ -58,7 +58,7 @@ xfs_bmdr_to_bmbt(
 
 void
 xfs_bmbt_disk_get_all(
-	struct xfs_bmbt_rec	*rec,
+	const struct xfs_bmbt_rec *rec,
 	struct xfs_bmbt_irec	*irec)
 {
 	uint64_t		l0 = get_unaligned_be64(&rec->l0);
@@ -78,7 +78,7 @@ xfs_bmbt_disk_get_all(
  */
 xfs_filblks_t
 xfs_bmbt_disk_get_blockcount(
-	xfs_bmbt_rec_t	*r)
+	const struct xfs_bmbt_rec	*r)
 {
 	return (xfs_filblks_t)(be64_to_cpu(r->l1) & xfs_mask64lo(21));
 }
@@ -88,7 +88,7 @@ xfs_bmbt_disk_get_blockcount(
  */
 xfs_fileoff_t
 xfs_bmbt_disk_get_startoff(
-	xfs_bmbt_rec_t	*r)
+	const struct xfs_bmbt_rec	*r)
 {
 	return ((xfs_fileoff_t)be64_to_cpu(r->l0) &
 		 xfs_mask64lo(64 - BMBT_EXNTFLAG_BITLEN)) >> 9;
@@ -136,7 +136,7 @@ xfs_bmbt_to_bmdr(
 	xfs_bmbt_key_t		*tkp;
 	__be64			*tpp;
 
-	if (xfs_sb_version_hascrc(&mp->m_sb)) {
+	if (xfs_has_crc(mp)) {
 		ASSERT(rblock->bb_magic == cpu_to_be32(XFS_BMAP_CRC_MAGIC));
 		ASSERT(uuid_equal(&rblock->bb_u.l.bb_uuid,
 		       &mp->m_sb.sb_meta_uuid));
@@ -193,10 +193,10 @@ xfs_bmbt_update_cursor(
 
 STATIC int
 xfs_bmbt_alloc_block(
-	struct xfs_btree_cur	*cur,
-	union xfs_btree_ptr	*start,
-	union xfs_btree_ptr	*new,
-	int			*stat)
+	struct xfs_btree_cur		*cur,
+	const union xfs_btree_ptr	*start,
+	union xfs_btree_ptr		*new,
+	int				*stat)
 {
 	xfs_alloc_arg_t		args;		/* block allocation args */
 	int			error;		/* error return value */
@@ -282,7 +282,7 @@ xfs_bmbt_free_block(
 	struct xfs_mount	*mp = cur->bc_mp;
 	struct xfs_inode	*ip = cur->bc_ino.ip;
 	struct xfs_trans	*tp = cur->bc_tp;
-	xfs_fsblock_t		fsbno = XFS_DADDR_TO_FSB(mp, XFS_BUF_ADDR(bp));
+	xfs_fsblock_t		fsbno = XFS_DADDR_TO_FSB(mp, xfs_buf_daddr(bp));
 	struct xfs_owner_info	oinfo;
 
 	xfs_rmap_ino_bmbt_owner(&oinfo, ip->i_ino, cur->bc_ino.whichfork);
@@ -352,8 +352,8 @@ xfs_bmbt_get_dmaxrecs(
 
 STATIC void
 xfs_bmbt_init_key_from_rec(
-	union xfs_btree_key	*key,
-	union xfs_btree_rec	*rec)
+	union xfs_btree_key		*key,
+	const union xfs_btree_rec	*rec)
 {
 	key->bmbt.br_startoff =
 		cpu_to_be64(xfs_bmbt_disk_get_startoff(&rec->bmbt));
@@ -361,8 +361,8 @@ xfs_bmbt_init_key_from_rec(
 
 STATIC void
 xfs_bmbt_init_high_key_from_rec(
-	union xfs_btree_key	*key,
-	union xfs_btree_rec	*rec)
+	union xfs_btree_key		*key,
+	const union xfs_btree_rec	*rec)
 {
 	key->bmbt.br_startoff = cpu_to_be64(
 			xfs_bmbt_disk_get_startoff(&rec->bmbt) +
@@ -387,8 +387,8 @@ xfs_bmbt_init_ptr_from_cur(
 
 STATIC int64_t
 xfs_bmbt_key_diff(
-	struct xfs_btree_cur	*cur,
-	union xfs_btree_key	*key)
+	struct xfs_btree_cur		*cur,
+	const union xfs_btree_key	*key)
 {
 	return (int64_t)be64_to_cpu(key->bmbt.br_startoff) -
 				      cur->bc_rec.b.br_startoff;
@@ -396,12 +396,12 @@ xfs_bmbt_key_diff(
 
 STATIC int64_t
 xfs_bmbt_diff_two_keys(
-	struct xfs_btree_cur	*cur,
-	union xfs_btree_key	*k1,
-	union xfs_btree_key	*k2)
+	struct xfs_btree_cur		*cur,
+	const union xfs_btree_key	*k1,
+	const union xfs_btree_key	*k2)
 {
-	uint64_t		a = be64_to_cpu(k1->bmbt.br_startoff);
-	uint64_t		b = be64_to_cpu(k2->bmbt.br_startoff);
+	uint64_t			a = be64_to_cpu(k1->bmbt.br_startoff);
+	uint64_t			b = be64_to_cpu(k2->bmbt.br_startoff);
 
 	/*
 	 * Note: This routine previously casted a and b to int64 and subtracted
@@ -428,7 +428,7 @@ xfs_bmbt_verify(
 	if (!xfs_verify_magic(bp, block->bb_magic))
 		return __this_address;
 
-	if (xfs_sb_version_hascrc(&mp->m_sb)) {
+	if (xfs_has_crc(mp)) {
 		/*
 		 * XXX: need a better way of verifying the owner here. Right now
 		 * just make sure there has been one set.
@@ -497,9 +497,9 @@ const struct xfs_buf_ops xfs_bmbt_buf_ops = {
 
 STATIC int
 xfs_bmbt_keys_inorder(
-	struct xfs_btree_cur	*cur,
-	union xfs_btree_key	*k1,
-	union xfs_btree_key	*k2)
+	struct xfs_btree_cur		*cur,
+	const union xfs_btree_key	*k1,
+	const union xfs_btree_key	*k2)
 {
 	return be64_to_cpu(k1->bmbt.br_startoff) <
 		be64_to_cpu(k2->bmbt.br_startoff);
@@ -507,9 +507,9 @@ xfs_bmbt_keys_inorder(
 
 STATIC int
 xfs_bmbt_recs_inorder(
-	struct xfs_btree_cur	*cur,
-	union xfs_btree_rec	*r1,
-	union xfs_btree_rec	*r2)
+	struct xfs_btree_cur		*cur,
+	const union xfs_btree_rec	*r1,
+	const union xfs_btree_rec	*r2)
 {
 	return xfs_bmbt_disk_get_startoff(&r1->bmbt) +
 		xfs_bmbt_disk_get_blockcount(&r1->bmbt) <=
@@ -563,7 +563,7 @@ xfs_bmbt_init_cursor(
 
 	cur->bc_ops = &xfs_bmbt_ops;
 	cur->bc_flags = XFS_BTREE_LONG_PTRS | XFS_BTREE_ROOT_IN_INODE;
-	if (xfs_sb_version_hascrc(&mp->m_sb))
+	if (xfs_has_crc(mp))
 		cur->bc_flags |= XFS_BTREE_CRC_BLOCKS;
 
 	cur->bc_ino.forksize = XFS_IFORK_SIZE(ip, whichfork);
diff --git a/fs/xfs/libxfs/xfs_bmap_btree.h b/fs/xfs/libxfs/xfs_bmap_btree.h
index 72bf74c79fb9..729e3bc569be 100644
--- a/fs/xfs/libxfs/xfs_bmap_btree.h
+++ b/fs/xfs/libxfs/xfs_bmap_btree.h
@@ -16,7 +16,7 @@ struct xfs_trans;
  * Btree block header size depends on a superblock flag.
  */
 #define XFS_BMBT_BLOCK_LEN(mp) \
-	(xfs_sb_version_hascrc(&((mp)->m_sb)) ? \
+	(xfs_has_crc(((mp))) ? \
 		XFS_BTREE_LBLOCK_CRC_LEN : XFS_BTREE_LBLOCK_LEN)
 
 #define XFS_BMBT_REC_ADDR(mp, block, index) \
@@ -88,9 +88,10 @@ extern void xfs_bmdr_to_bmbt(struct xfs_inode *, xfs_bmdr_block_t *, int,
 			struct xfs_btree_block *, int);
 
 void xfs_bmbt_disk_set_all(struct xfs_bmbt_rec *r, struct xfs_bmbt_irec *s);
-extern xfs_filblks_t xfs_bmbt_disk_get_blockcount(xfs_bmbt_rec_t *r);
-extern xfs_fileoff_t xfs_bmbt_disk_get_startoff(xfs_bmbt_rec_t *r);
-extern void xfs_bmbt_disk_get_all(xfs_bmbt_rec_t *r, xfs_bmbt_irec_t *s);
+extern xfs_filblks_t xfs_bmbt_disk_get_blockcount(const struct xfs_bmbt_rec *r);
+extern xfs_fileoff_t xfs_bmbt_disk_get_startoff(const struct xfs_bmbt_rec *r);
+void xfs_bmbt_disk_get_all(const struct xfs_bmbt_rec *r,
+		struct xfs_bmbt_irec *s);
 
 extern void xfs_bmbt_to_bmdr(struct xfs_mount *, struct xfs_btree_block *, int,
 			xfs_bmdr_block_t *, int);
diff --git a/fs/xfs/libxfs/xfs_btree.c b/fs/xfs/libxfs/xfs_btree.c
index be74a6b53689..298395481713 100644
--- a/fs/xfs/libxfs/xfs_btree.c
+++ b/fs/xfs/libxfs/xfs_btree.c
@@ -64,13 +64,13 @@ __xfs_btree_check_lblock(
 {
 	struct xfs_mount	*mp = cur->bc_mp;
 	xfs_btnum_t		btnum = cur->bc_btnum;
-	int			crc = xfs_sb_version_hascrc(&mp->m_sb);
+	int			crc = xfs_has_crc(mp);
 
 	if (crc) {
 		if (!uuid_equal(&block->bb_u.l.bb_uuid, &mp->m_sb.sb_meta_uuid))
 			return __this_address;
 		if (block->bb_u.l.bb_blkno !=
-		    cpu_to_be64(bp ? bp->b_bn : XFS_BUF_DADDR_NULL))
+		    cpu_to_be64(bp ? xfs_buf_daddr(bp) : XFS_BUF_DADDR_NULL))
 			return __this_address;
 		if (block->bb_u.l.bb_pad != cpu_to_be32(0))
 			return __this_address;
@@ -129,13 +129,13 @@ __xfs_btree_check_sblock(
 {
 	struct xfs_mount	*mp = cur->bc_mp;
 	xfs_btnum_t		btnum = cur->bc_btnum;
-	int			crc = xfs_sb_version_hascrc(&mp->m_sb);
+	int			crc = xfs_has_crc(mp);
 
 	if (crc) {
 		if (!uuid_equal(&block->bb_u.s.bb_uuid, &mp->m_sb.sb_meta_uuid))
 			return __this_address;
 		if (block->bb_u.s.bb_blkno !=
-		    cpu_to_be64(bp ? bp->b_bn : XFS_BUF_DADDR_NULL))
+		    cpu_to_be64(bp ? xfs_buf_daddr(bp) : XFS_BUF_DADDR_NULL))
 			return __this_address;
 	}
 
@@ -225,10 +225,10 @@ xfs_btree_check_sptr(
  */
 static int
 xfs_btree_check_ptr(
-	struct xfs_btree_cur	*cur,
-	union xfs_btree_ptr	*ptr,
-	int			index,
-	int			level)
+	struct xfs_btree_cur		*cur,
+	const union xfs_btree_ptr	*ptr,
+	int				index,
+	int				level)
 {
 	if (cur->bc_flags & XFS_BTREE_LONG_PTRS) {
 		if (xfs_btree_check_lptr(cur, be64_to_cpu((&ptr->l)[index]),
@@ -273,7 +273,7 @@ xfs_btree_lblock_calc_crc(
 	struct xfs_btree_block	*block = XFS_BUF_TO_BLOCK(bp);
 	struct xfs_buf_log_item	*bip = bp->b_log_item;
 
-	if (!xfs_sb_version_hascrc(&bp->b_mount->m_sb))
+	if (!xfs_has_crc(bp->b_mount))
 		return;
 	if (bip)
 		block->bb_u.l.bb_lsn = cpu_to_be64(bip->bli_item.li_lsn);
@@ -287,7 +287,7 @@ xfs_btree_lblock_verify_crc(
 	struct xfs_btree_block	*block = XFS_BUF_TO_BLOCK(bp);
 	struct xfs_mount	*mp = bp->b_mount;
 
-	if (xfs_sb_version_hascrc(&mp->m_sb)) {
+	if (xfs_has_crc(mp)) {
 		if (!xfs_log_check_lsn(mp, be64_to_cpu(block->bb_u.l.bb_lsn)))
 			return false;
 		return xfs_buf_verify_cksum(bp, XFS_BTREE_LBLOCK_CRC_OFF);
@@ -311,7 +311,7 @@ xfs_btree_sblock_calc_crc(
 	struct xfs_btree_block	*block = XFS_BUF_TO_BLOCK(bp);
 	struct xfs_buf_log_item	*bip = bp->b_log_item;
 
-	if (!xfs_sb_version_hascrc(&bp->b_mount->m_sb))
+	if (!xfs_has_crc(bp->b_mount))
 		return;
 	if (bip)
 		block->bb_u.s.bb_lsn = cpu_to_be64(bip->bli_item.li_lsn);
@@ -325,7 +325,7 @@ xfs_btree_sblock_verify_crc(
 	struct xfs_btree_block  *block = XFS_BUF_TO_BLOCK(bp);
 	struct xfs_mount	*mp = bp->b_mount;
 
-	if (xfs_sb_version_hascrc(&mp->m_sb)) {
+	if (xfs_has_crc(mp)) {
 		if (!xfs_log_check_lsn(mp, be64_to_cpu(block->bb_u.s.bb_lsn)))
 			return false;
 		return xfs_buf_verify_cksum(bp, XFS_BTREE_SBLOCK_CRC_OFF);
@@ -374,7 +374,7 @@ xfs_btree_del_cursor(
 	}
 
 	ASSERT(cur->bc_btnum != XFS_BTNUM_BMAP || cur->bc_ino.allocated == 0 ||
-	       XFS_FORCED_SHUTDOWN(cur->bc_mp));
+	       xfs_is_shutdown(cur->bc_mp));
 	if (unlikely(cur->bc_flags & XFS_BTREE_STAGING))
 		kmem_free(cur->bc_ops);
 	if (!(cur->bc_flags & XFS_BTREE_LONG_PTRS) && cur->bc_ag.pag)
@@ -420,7 +420,7 @@ xfs_btree_dup_cursor(
 		bp = cur->bc_bufs[i];
 		if (bp) {
 			error = xfs_trans_read_buf(mp, tp, mp->m_ddev_targp,
-						   XFS_BUF_ADDR(bp), mp->m_bsize,
+						   xfs_buf_daddr(bp), mp->m_bsize,
 						   0, &bp,
 						   cur->bc_ops->buf_ops);
 			if (error) {
@@ -935,9 +935,9 @@ xfs_btree_readahead(
 
 STATIC int
 xfs_btree_ptr_to_daddr(
-	struct xfs_btree_cur	*cur,
-	union xfs_btree_ptr	*ptr,
-	xfs_daddr_t		*daddr)
+	struct xfs_btree_cur		*cur,
+	const union xfs_btree_ptr	*ptr,
+	xfs_daddr_t			*daddr)
 {
 	xfs_fsblock_t		fsbno;
 	xfs_agblock_t		agbno;
@@ -1012,8 +1012,8 @@ xfs_btree_setbuf(
 
 bool
 xfs_btree_ptr_is_null(
-	struct xfs_btree_cur	*cur,
-	union xfs_btree_ptr	*ptr)
+	struct xfs_btree_cur		*cur,
+	const union xfs_btree_ptr	*ptr)
 {
 	if (cur->bc_flags & XFS_BTREE_LONG_PTRS)
 		return ptr->l == cpu_to_be64(NULLFSBLOCK);
@@ -1059,10 +1059,10 @@ xfs_btree_get_sibling(
 
 void
 xfs_btree_set_sibling(
-	struct xfs_btree_cur	*cur,
-	struct xfs_btree_block	*block,
-	union xfs_btree_ptr	*ptr,
-	int			lr)
+	struct xfs_btree_cur		*cur,
+	struct xfs_btree_block		*block,
+	const union xfs_btree_ptr	*ptr,
+	int				lr)
 {
 	ASSERT(lr == XFS_BB_LEFTSIB || lr == XFS_BB_RIGHTSIB);
 
@@ -1090,7 +1090,7 @@ xfs_btree_init_block_int(
 	__u64			owner,
 	unsigned int		flags)
 {
-	int			crc = xfs_sb_version_hascrc(&mp->m_sb);
+	int			crc = xfs_has_crc(mp);
 	__u32			magic = xfs_btree_magic(crc, btnum);
 
 	buf->bb_magic = cpu_to_be32(magic);
@@ -1131,7 +1131,7 @@ xfs_btree_init_block(
 	__u16		numrecs,
 	__u64		owner)
 {
-	xfs_btree_init_block_int(mp, XFS_BUF_TO_BLOCK(bp), bp->b_bn,
+	xfs_btree_init_block_int(mp, XFS_BUF_TO_BLOCK(bp), xfs_buf_daddr(bp),
 				 btnum, level, numrecs, owner, 0);
 }
 
@@ -1155,9 +1155,9 @@ xfs_btree_init_block_cur(
 	else
 		owner = cur->bc_ag.pag->pag_agno;
 
-	xfs_btree_init_block_int(cur->bc_mp, XFS_BUF_TO_BLOCK(bp), bp->b_bn,
-				 cur->bc_btnum, level, numrecs,
-				 owner, cur->bc_flags);
+	xfs_btree_init_block_int(cur->bc_mp, XFS_BUF_TO_BLOCK(bp),
+				xfs_buf_daddr(bp), cur->bc_btnum, level,
+				numrecs, owner, cur->bc_flags);
 }
 
 /*
@@ -1192,10 +1192,10 @@ xfs_btree_buf_to_ptr(
 {
 	if (cur->bc_flags & XFS_BTREE_LONG_PTRS)
 		ptr->l = cpu_to_be64(XFS_DADDR_TO_FSB(cur->bc_mp,
-					XFS_BUF_ADDR(bp)));
+					xfs_buf_daddr(bp)));
 	else {
 		ptr->s = cpu_to_be32(xfs_daddr_to_agbno(cur->bc_mp,
-					XFS_BUF_ADDR(bp)));
+					xfs_buf_daddr(bp)));
 	}
 }
 
@@ -1229,10 +1229,10 @@ xfs_btree_set_refs(
 
 int
 xfs_btree_get_buf_block(
-	struct xfs_btree_cur	*cur,
-	union xfs_btree_ptr	*ptr,
-	struct xfs_btree_block	**block,
-	struct xfs_buf		**bpp)
+	struct xfs_btree_cur		*cur,
+	const union xfs_btree_ptr	*ptr,
+	struct xfs_btree_block		**block,
+	struct xfs_buf			**bpp)
 {
 	struct xfs_mount	*mp = cur->bc_mp;
 	xfs_daddr_t		d;
@@ -1257,11 +1257,11 @@ xfs_btree_get_buf_block(
  */
 STATIC int
 xfs_btree_read_buf_block(
-	struct xfs_btree_cur	*cur,
-	union xfs_btree_ptr	*ptr,
-	int			flags,
-	struct xfs_btree_block	**block,
-	struct xfs_buf		**bpp)
+	struct xfs_btree_cur		*cur,
+	const union xfs_btree_ptr	*ptr,
+	int				flags,
+	struct xfs_btree_block		**block,
+	struct xfs_buf			**bpp)
 {
 	struct xfs_mount	*mp = cur->bc_mp;
 	xfs_daddr_t		d;
@@ -1289,10 +1289,10 @@ xfs_btree_read_buf_block(
  */
 void
 xfs_btree_copy_keys(
-	struct xfs_btree_cur	*cur,
-	union xfs_btree_key	*dst_key,
-	union xfs_btree_key	*src_key,
-	int			numkeys)
+	struct xfs_btree_cur		*cur,
+	union xfs_btree_key		*dst_key,
+	const union xfs_btree_key	*src_key,
+	int				numkeys)
 {
 	ASSERT(numkeys >= 0);
 	memcpy(dst_key, src_key, numkeys * cur->bc_ops->key_len);
@@ -1713,10 +1713,10 @@ error0:
 
 int
 xfs_btree_lookup_get_block(
-	struct xfs_btree_cur	*cur,	/* btree cursor */
-	int			level,	/* level in the btree */
-	union xfs_btree_ptr	*pp,	/* ptr to btree block */
-	struct xfs_btree_block	**blkp) /* return btree block */
+	struct xfs_btree_cur		*cur,	/* btree cursor */
+	int				level,	/* level in the btree */
+	const union xfs_btree_ptr	*pp,	/* ptr to btree block */
+	struct xfs_btree_block		**blkp) /* return btree block */
 {
 	struct xfs_buf		*bp;	/* buffer pointer for btree block */
 	xfs_daddr_t		daddr;
@@ -1739,7 +1739,7 @@ xfs_btree_lookup_get_block(
 	error = xfs_btree_ptr_to_daddr(cur, pp, &daddr);
 	if (error)
 		return error;
-	if (bp && XFS_BUF_ADDR(bp) == daddr) {
+	if (bp && xfs_buf_daddr(bp) == daddr) {
 		*blkp = XFS_BUF_TO_BLOCK(bp);
 		return 0;
 	}
@@ -1749,7 +1749,7 @@ xfs_btree_lookup_get_block(
 		return error;
 
 	/* Check the inode owner since the verifiers don't. */
-	if (xfs_sb_version_hascrc(&cur->bc_mp->m_sb) &&
+	if (xfs_has_crc(cur->bc_mp) &&
 	    !(cur->bc_ino.flags & XFS_BTCUR_BMBT_INVALID_OWNER) &&
 	    (cur->bc_flags & XFS_BTREE_LONG_PTRS) &&
 	    be64_to_cpu((*blkp)->bb_u.l.bb_owner) !=
@@ -2923,10 +2923,11 @@ xfs_btree_new_iroot(
 	 */
 	memcpy(cblock, block, xfs_btree_block_len(cur));
 	if (cur->bc_flags & XFS_BTREE_CRC_BLOCKS) {
+		__be64 bno = cpu_to_be64(xfs_buf_daddr(cbp));
 		if (cur->bc_flags & XFS_BTREE_LONG_PTRS)
-			cblock->bb_u.l.bb_blkno = cpu_to_be64(cbp->b_bn);
+			cblock->bb_u.l.bb_blkno = bno;
 		else
-			cblock->bb_u.s.bb_blkno = cpu_to_be64(cbp->b_bn);
+			cblock->bb_u.s.bb_blkno = bno;
 	}
 
 	be16_add_cpu(&block->bb_level, 1);
@@ -3225,7 +3226,7 @@ xfs_btree_insrec(
 
 	/* Get pointers to the btree buffer and block. */
 	block = xfs_btree_get_block(cur, level, &bp);
-	old_bn = bp ? bp->b_bn : XFS_BUF_DADDR_NULL;
+	old_bn = bp ? xfs_buf_daddr(bp) : XFS_BUF_DADDR_NULL;
 	numrecs = xfs_btree_get_numrecs(block);
 
 #ifdef DEBUG
@@ -3341,7 +3342,7 @@ xfs_btree_insrec(
 	 * some records into the new tree block), so use the regular key
 	 * update mechanism.
 	 */
-	if (bp && bp->b_bn != old_bn) {
+	if (bp && xfs_buf_daddr(bp) != old_bn) {
 		xfs_btree_get_keys(cur, block, lkey);
 	} else if (xfs_btree_needs_key_update(cur, optr)) {
 		error = xfs_btree_update_keys(cur, level);
@@ -4418,11 +4419,11 @@ xfs_btree_lblock_v5hdr_verify(
 	struct xfs_mount	*mp = bp->b_mount;
 	struct xfs_btree_block	*block = XFS_BUF_TO_BLOCK(bp);
 
-	if (!xfs_sb_version_hascrc(&mp->m_sb))
+	if (!xfs_has_crc(mp))
 		return __this_address;
 	if (!uuid_equal(&block->bb_u.l.bb_uuid, &mp->m_sb.sb_meta_uuid))
 		return __this_address;
-	if (block->bb_u.l.bb_blkno != cpu_to_be64(bp->b_bn))
+	if (block->bb_u.l.bb_blkno != cpu_to_be64(xfs_buf_daddr(bp)))
 		return __this_address;
 	if (owner != XFS_RMAP_OWN_UNKNOWN &&
 	    be64_to_cpu(block->bb_u.l.bb_owner) != owner)
@@ -4468,11 +4469,11 @@ xfs_btree_sblock_v5hdr_verify(
 	struct xfs_btree_block	*block = XFS_BUF_TO_BLOCK(bp);
 	struct xfs_perag	*pag = bp->b_pag;
 
-	if (!xfs_sb_version_hascrc(&mp->m_sb))
+	if (!xfs_has_crc(mp))
 		return __this_address;
 	if (!uuid_equal(&block->bb_u.s.bb_uuid, &mp->m_sb.sb_meta_uuid))
 		return __this_address;
-	if (block->bb_u.s.bb_blkno != cpu_to_be64(bp->b_bn))
+	if (block->bb_u.s.bb_blkno != cpu_to_be64(xfs_buf_daddr(bp)))
 		return __this_address;
 	if (pag && be32_to_cpu(block->bb_u.s.bb_owner) != pag->pag_agno)
 		return __this_address;
@@ -4499,7 +4500,7 @@ xfs_btree_sblock_verify(
 		return __this_address;
 
 	/* sibling pointer verification */
-	agno = xfs_daddr_to_agno(mp, XFS_BUF_ADDR(bp));
+	agno = xfs_daddr_to_agno(mp, xfs_buf_daddr(bp));
 	if (block->bb_u.s.bb_leftsib != cpu_to_be32(NULLAGBLOCK) &&
 	    !xfs_verify_agbno(mp, agno, be32_to_cpu(block->bb_u.s.bb_leftsib)))
 		return __this_address;
@@ -4536,8 +4537,8 @@ xfs_btree_compute_maxlevels(
 STATIC int
 xfs_btree_simple_query_range(
 	struct xfs_btree_cur		*cur,
-	union xfs_btree_key		*low_key,
-	union xfs_btree_key		*high_key,
+	const union xfs_btree_key	*low_key,
+	const union xfs_btree_key	*high_key,
 	xfs_btree_query_range_fn	fn,
 	void				*priv)
 {
@@ -4627,8 +4628,8 @@ out:
 STATIC int
 xfs_btree_overlapped_query_range(
 	struct xfs_btree_cur		*cur,
-	union xfs_btree_key		*low_key,
-	union xfs_btree_key		*high_key,
+	const union xfs_btree_key	*low_key,
+	const union xfs_btree_key	*high_key,
 	xfs_btree_query_range_fn	fn,
 	void				*priv)
 {
@@ -4769,8 +4770,8 @@ out:
 int
 xfs_btree_query_range(
 	struct xfs_btree_cur		*cur,
-	union xfs_btree_irec		*low_rec,
-	union xfs_btree_irec		*high_rec,
+	const union xfs_btree_irec	*low_rec,
+	const union xfs_btree_irec	*high_rec,
 	xfs_btree_query_range_fn	fn,
 	void				*priv)
 {
@@ -4877,7 +4878,7 @@ xfs_btree_diff_two_ptrs(
 STATIC int
 xfs_btree_has_record_helper(
 	struct xfs_btree_cur		*cur,
-	union xfs_btree_rec		*rec,
+	const union xfs_btree_rec	*rec,
 	void				*priv)
 {
 	return -ECANCELED;
@@ -4886,12 +4887,12 @@ xfs_btree_has_record_helper(
 /* Is there a record covering a given range of keys? */
 int
 xfs_btree_has_record(
-	struct xfs_btree_cur	*cur,
-	union xfs_btree_irec	*low,
-	union xfs_btree_irec	*high,
-	bool			*exists)
+	struct xfs_btree_cur		*cur,
+	const union xfs_btree_irec	*low,
+	const union xfs_btree_irec	*high,
+	bool				*exists)
 {
-	int			error;
+	int				error;
 
 	error = xfs_btree_query_range(cur, low, high,
 			&xfs_btree_has_record_helper, NULL);
diff --git a/fs/xfs/libxfs/xfs_btree.h b/fs/xfs/libxfs/xfs_btree.h
index 4dbdc659c396..4eaf8517f850 100644
--- a/fs/xfs/libxfs/xfs_btree.h
+++ b/fs/xfs/libxfs/xfs_btree.h
@@ -106,19 +106,19 @@ struct xfs_btree_ops {
 
 	/* update btree root pointer */
 	void	(*set_root)(struct xfs_btree_cur *cur,
-			    union xfs_btree_ptr *nptr, int level_change);
+			    const union xfs_btree_ptr *nptr, int level_change);
 
 	/* block allocation / freeing */
 	int	(*alloc_block)(struct xfs_btree_cur *cur,
-			       union xfs_btree_ptr *start_bno,
+			       const union xfs_btree_ptr *start_bno,
 			       union xfs_btree_ptr *new_bno,
 			       int *stat);
 	int	(*free_block)(struct xfs_btree_cur *cur, struct xfs_buf *bp);
 
 	/* update last record information */
 	void	(*update_lastrec)(struct xfs_btree_cur *cur,
-				  struct xfs_btree_block *block,
-				  union xfs_btree_rec *rec,
+				  const struct xfs_btree_block *block,
+				  const union xfs_btree_rec *rec,
 				  int ptr, int reason);
 
 	/* records in block/level */
@@ -130,37 +130,37 @@ struct xfs_btree_ops {
 
 	/* init values of btree structures */
 	void	(*init_key_from_rec)(union xfs_btree_key *key,
-				     union xfs_btree_rec *rec);
+				     const union xfs_btree_rec *rec);
 	void	(*init_rec_from_cur)(struct xfs_btree_cur *cur,
 				     union xfs_btree_rec *rec);
 	void	(*init_ptr_from_cur)(struct xfs_btree_cur *cur,
 				     union xfs_btree_ptr *ptr);
 	void	(*init_high_key_from_rec)(union xfs_btree_key *key,
-					  union xfs_btree_rec *rec);
+					  const union xfs_btree_rec *rec);
 
 	/* difference between key value and cursor value */
 	int64_t (*key_diff)(struct xfs_btree_cur *cur,
-			      union xfs_btree_key *key);
+			    const union xfs_btree_key *key);
 
 	/*
 	 * Difference between key2 and key1 -- positive if key1 > key2,
 	 * negative if key1 < key2, and zero if equal.
 	 */
 	int64_t (*diff_two_keys)(struct xfs_btree_cur *cur,
-				   union xfs_btree_key *key1,
-				   union xfs_btree_key *key2);
+				 const union xfs_btree_key *key1,
+				 const union xfs_btree_key *key2);
 
 	const struct xfs_buf_ops	*buf_ops;
 
 	/* check that k1 is lower than k2 */
 	int	(*keys_inorder)(struct xfs_btree_cur *cur,
-				union xfs_btree_key *k1,
-				union xfs_btree_key *k2);
+				const union xfs_btree_key *k1,
+				const union xfs_btree_key *k2);
 
 	/* check that r1 is lower than r2 */
 	int	(*recs_inorder)(struct xfs_btree_cur *cur,
-				union xfs_btree_rec *r1,
-				union xfs_btree_rec *r2);
+				const union xfs_btree_rec *r1,
+				const union xfs_btree_rec *r2);
 };
 
 /*
@@ -423,7 +423,7 @@ void xfs_btree_log_recs(struct xfs_btree_cur *, struct xfs_buf *, int, int);
 /*
  * Helpers.
  */
-static inline int xfs_btree_get_numrecs(struct xfs_btree_block *block)
+static inline int xfs_btree_get_numrecs(const struct xfs_btree_block *block)
 {
 	return be16_to_cpu(block->bb_numrecs);
 }
@@ -434,7 +434,7 @@ static inline void xfs_btree_set_numrecs(struct xfs_btree_block *block,
 	block->bb_numrecs = cpu_to_be16(numrecs);
 }
 
-static inline int xfs_btree_get_level(struct xfs_btree_block *block)
+static inline int xfs_btree_get_level(const struct xfs_btree_block *block)
 {
 	return be16_to_cpu(block->bb_level);
 }
@@ -471,10 +471,11 @@ unsigned long long xfs_btree_calc_size(uint *limits, unsigned long long len);
  * code on its own.
  */
 typedef int (*xfs_btree_query_range_fn)(struct xfs_btree_cur *cur,
-		union xfs_btree_rec *rec, void *priv);
+		const union xfs_btree_rec *rec, void *priv);
 
 int xfs_btree_query_range(struct xfs_btree_cur *cur,
-		union xfs_btree_irec *low_rec, union xfs_btree_irec *high_rec,
+		const union xfs_btree_irec *low_rec,
+		const union xfs_btree_irec *high_rec,
 		xfs_btree_query_range_fn fn, void *priv);
 int xfs_btree_query_all(struct xfs_btree_cur *cur, xfs_btree_query_range_fn fn,
 		void *priv);
@@ -502,10 +503,11 @@ union xfs_btree_key *xfs_btree_high_key_addr(struct xfs_btree_cur *cur, int n,
 union xfs_btree_ptr *xfs_btree_ptr_addr(struct xfs_btree_cur *cur, int n,
 		struct xfs_btree_block *block);
 int xfs_btree_lookup_get_block(struct xfs_btree_cur *cur, int level,
-		union xfs_btree_ptr *pp, struct xfs_btree_block **blkp);
+		const union xfs_btree_ptr *pp, struct xfs_btree_block **blkp);
 struct xfs_btree_block *xfs_btree_get_block(struct xfs_btree_cur *cur,
 		int level, struct xfs_buf **bpp);
-bool xfs_btree_ptr_is_null(struct xfs_btree_cur *cur, union xfs_btree_ptr *ptr);
+bool xfs_btree_ptr_is_null(struct xfs_btree_cur *cur,
+		const union xfs_btree_ptr *ptr);
 int64_t xfs_btree_diff_two_ptrs(struct xfs_btree_cur *cur,
 				const union xfs_btree_ptr *a,
 				const union xfs_btree_ptr *b);
@@ -516,8 +518,9 @@ void xfs_btree_get_keys(struct xfs_btree_cur *cur,
 		struct xfs_btree_block *block, union xfs_btree_key *key);
 union xfs_btree_key *xfs_btree_high_key_from_key(struct xfs_btree_cur *cur,
 		union xfs_btree_key *key);
-int xfs_btree_has_record(struct xfs_btree_cur *cur, union xfs_btree_irec *low,
-		union xfs_btree_irec *high, bool *exists);
+int xfs_btree_has_record(struct xfs_btree_cur *cur,
+		const union xfs_btree_irec *low,
+		const union xfs_btree_irec *high, bool *exists);
 bool xfs_btree_has_more_records(struct xfs_btree_cur *cur);
 struct xfs_ifork *xfs_btree_ifork_ptr(struct xfs_btree_cur *cur);
 
@@ -540,10 +543,11 @@ xfs_btree_islastblock(
 
 void xfs_btree_set_ptr_null(struct xfs_btree_cur *cur,
 		union xfs_btree_ptr *ptr);
-int xfs_btree_get_buf_block(struct xfs_btree_cur *cur, union xfs_btree_ptr *ptr,
-		struct xfs_btree_block **block, struct xfs_buf **bpp);
+int xfs_btree_get_buf_block(struct xfs_btree_cur *cur,
+		const union xfs_btree_ptr *ptr, struct xfs_btree_block **block,
+		struct xfs_buf **bpp);
 void xfs_btree_set_sibling(struct xfs_btree_cur *cur,
-		struct xfs_btree_block *block, union xfs_btree_ptr *ptr,
+		struct xfs_btree_block *block, const union xfs_btree_ptr *ptr,
 		int lr);
 void xfs_btree_init_block_cur(struct xfs_btree_cur *cur,
 		struct xfs_buf *bp, int level, int numrecs);
@@ -551,7 +555,7 @@ void xfs_btree_copy_ptrs(struct xfs_btree_cur *cur,
 		union xfs_btree_ptr *dst_ptr,
 		const union xfs_btree_ptr *src_ptr, int numptrs);
 void xfs_btree_copy_keys(struct xfs_btree_cur *cur,
-		union xfs_btree_key *dst_key, union xfs_btree_key *src_key,
-		int numkeys);
+		union xfs_btree_key *dst_key,
+		const union xfs_btree_key *src_key, int numkeys);
 
 #endif	/* __XFS_BTREE_H__ */
diff --git a/fs/xfs/libxfs/xfs_btree_staging.c b/fs/xfs/libxfs/xfs_btree_staging.c
index aa8dc9521c39..ac9e80152b5c 100644
--- a/fs/xfs/libxfs/xfs_btree_staging.c
+++ b/fs/xfs/libxfs/xfs_btree_staging.c
@@ -59,10 +59,10 @@ xfs_btree_fakeroot_dup_cursor(
  */
 STATIC int
 xfs_btree_fakeroot_alloc_block(
-	struct xfs_btree_cur	*cur,
-	union xfs_btree_ptr	*start_bno,
-	union xfs_btree_ptr	*new_bno,
-	int			*stat)
+	struct xfs_btree_cur		*cur,
+	const union xfs_btree_ptr	*start_bno,
+	union xfs_btree_ptr		*new_bno,
+	int				*stat)
 {
 	ASSERT(0);
 	return -EFSCORRUPTED;
@@ -112,9 +112,9 @@ xfs_btree_fakeroot_init_ptr_from_cur(
 /* Update the btree root information for a per-AG fake root. */
 STATIC void
 xfs_btree_afakeroot_set_root(
-	struct xfs_btree_cur	*cur,
-	union xfs_btree_ptr	*ptr,
-	int			inc)
+	struct xfs_btree_cur		*cur,
+	const union xfs_btree_ptr	*ptr,
+	int				inc)
 {
 	struct xbtree_afakeroot	*afake = cur->bc_ag.afake;
 
diff --git a/fs/xfs/libxfs/xfs_da_btree.c b/fs/xfs/libxfs/xfs_da_btree.c
index 747ec77912c3..c062e2c85178 100644
--- a/fs/xfs/libxfs/xfs_da_btree.c
+++ b/fs/xfs/libxfs/xfs_da_btree.c
@@ -129,7 +129,7 @@ xfs_da3_node_hdr_from_disk(
 	struct xfs_da3_icnode_hdr	*to,
 	struct xfs_da_intnode		*from)
 {
-	if (xfs_sb_version_hascrc(&mp->m_sb)) {
+	if (xfs_has_crc(mp)) {
 		struct xfs_da3_intnode	*from3 = (struct xfs_da3_intnode *)from;
 
 		to->forw = be32_to_cpu(from3->hdr.info.hdr.forw);
@@ -156,7 +156,7 @@ xfs_da3_node_hdr_to_disk(
 	struct xfs_da_intnode		*to,
 	struct xfs_da3_icnode_hdr	*from)
 {
-	if (xfs_sb_version_hascrc(&mp->m_sb)) {
+	if (xfs_has_crc(mp)) {
 		struct xfs_da3_intnode	*to3 = (struct xfs_da3_intnode *)to;
 
 		ASSERT(from->magic == XFS_DA3_NODE_MAGIC);
@@ -191,10 +191,10 @@ xfs_da3_blkinfo_verify(
 	if (!xfs_verify_magic16(bp, hdr->magic))
 		return __this_address;
 
-	if (xfs_sb_version_hascrc(&mp->m_sb)) {
+	if (xfs_has_crc(mp)) {
 		if (!uuid_equal(&hdr3->uuid, &mp->m_sb.sb_meta_uuid))
 			return __this_address;
-		if (be64_to_cpu(hdr3->blkno) != bp->b_bn)
+		if (be64_to_cpu(hdr3->blkno) != xfs_buf_daddr(bp))
 			return __this_address;
 		if (!xfs_log_check_lsn(mp, be64_to_cpu(hdr3->lsn)))
 			return __this_address;
@@ -253,7 +253,7 @@ xfs_da3_node_write_verify(
 		return;
 	}
 
-	if (!xfs_sb_version_hascrc(&mp->m_sb))
+	if (!xfs_has_crc(mp))
 		return;
 
 	if (bip)
@@ -442,12 +442,12 @@ xfs_da3_node_create(
 	xfs_trans_buf_set_type(tp, bp, XFS_BLFT_DA_NODE_BUF);
 	node = bp->b_addr;
 
-	if (xfs_sb_version_hascrc(&mp->m_sb)) {
+	if (xfs_has_crc(mp)) {
 		struct xfs_da3_node_hdr *hdr3 = bp->b_addr;
 
 		memset(hdr3, 0, sizeof(struct xfs_da3_node_hdr));
 		ichdr.magic = XFS_DA3_NODE_MAGIC;
-		hdr3->info.blkno = cpu_to_be64(bp->b_bn);
+		hdr3->info.blkno = cpu_to_be64(xfs_buf_daddr(bp));
 		hdr3->info.owner = cpu_to_be64(args->dp->i_ino);
 		uuid_copy(&hdr3->info.uuid, &mp->m_sb.sb_meta_uuid);
 	} else {
@@ -711,7 +711,7 @@ xfs_da3_root_split(
 	    oldroot->hdr.info.magic == cpu_to_be16(XFS_DIR3_LEAFN_MAGIC)) {
 		struct xfs_da3_intnode *node3 = (struct xfs_da3_intnode *)node;
 
-		node3->hdr.info.blkno = cpu_to_be64(bp->b_bn);
+		node3->hdr.info.blkno = cpu_to_be64(xfs_buf_daddr(bp));
 	}
 	xfs_trans_log_buf(tp, bp, 0, size - 1);
 
@@ -1219,7 +1219,7 @@ xfs_da3_root_join(
 	xfs_trans_buf_copy_type(root_blk->bp, bp);
 	if (oldroothdr.magic == XFS_DA3_NODE_MAGIC) {
 		struct xfs_da3_blkinfo *da3 = root_blk->bp->b_addr;
-		da3->blkno = cpu_to_be64(root_blk->bp->b_bn);
+		da3->blkno = cpu_to_be64(xfs_buf_daddr(root_blk->bp));
 	}
 	xfs_trans_log_buf(args->trans, root_blk->bp, 0,
 			  args->geo->blksize - 1);
diff --git a/fs/xfs/libxfs/xfs_da_format.h b/fs/xfs/libxfs/xfs_da_format.h
index b876b44c0204..5a49caa5c9df 100644
--- a/fs/xfs/libxfs/xfs_da_format.h
+++ b/fs/xfs/libxfs/xfs_da_format.h
@@ -789,7 +789,7 @@ struct xfs_attr3_rmt_hdr {
 #define XFS_ATTR3_RMT_CRC_OFF	offsetof(struct xfs_attr3_rmt_hdr, rm_crc)
 
 #define XFS_ATTR3_RMT_BUF_SPACE(mp, bufsize)	\
-	((bufsize) - (xfs_sb_version_hascrc(&(mp)->m_sb) ? \
+	((bufsize) - (xfs_has_crc((mp)) ? \
 			sizeof(struct xfs_attr3_rmt_hdr) : 0))
 
 /* Number of bytes in a directory block. */
diff --git a/fs/xfs/libxfs/xfs_dir2.c b/fs/xfs/libxfs/xfs_dir2.c
index 050bdcc4fe73..50546eadaae2 100644
--- a/fs/xfs/libxfs/xfs_dir2.c
+++ b/fs/xfs/libxfs/xfs_dir2.c
@@ -115,7 +115,7 @@ xfs_da_mount(
 	dageo->fsblog = mp->m_sb.sb_blocklog;
 	dageo->blksize = xfs_dir2_dirblock_bytes(&mp->m_sb);
 	dageo->fsbcount = 1 << mp->m_sb.sb_dirblklog;
-	if (xfs_sb_version_hascrc(&mp->m_sb)) {
+	if (xfs_has_crc(mp)) {
 		dageo->node_hdr_size = sizeof(struct xfs_da3_node_hdr);
 		dageo->leaf_hdr_size = sizeof(struct xfs_dir3_leaf_hdr);
 		dageo->free_hdr_size = sizeof(struct xfs_dir3_free_hdr);
@@ -730,7 +730,7 @@ xfs_dir2_hashname(
 	struct xfs_mount	*mp,
 	struct xfs_name		*name)
 {
-	if (unlikely(xfs_sb_version_hasasciici(&mp->m_sb)))
+	if (unlikely(xfs_has_asciici(mp)))
 		return xfs_ascii_ci_hashname(name);
 	return xfs_da_hashname(name->name, name->len);
 }
@@ -741,7 +741,7 @@ xfs_dir2_compname(
 	const unsigned char	*name,
 	int			len)
 {
-	if (unlikely(xfs_sb_version_hasasciici(&args->dp->i_mount->m_sb)))
+	if (unlikely(xfs_has_asciici(args->dp->i_mount)))
 		return xfs_ascii_ci_compname(args, name, len);
 	return xfs_da_compname(args, name, len);
 }
diff --git a/fs/xfs/libxfs/xfs_dir2_block.c b/fs/xfs/libxfs/xfs_dir2_block.c
index 75e1421f69c4..df0869bba275 100644
--- a/fs/xfs/libxfs/xfs_dir2_block.c
+++ b/fs/xfs/libxfs/xfs_dir2_block.c
@@ -53,10 +53,10 @@ xfs_dir3_block_verify(
 	if (!xfs_verify_magic(bp, hdr3->magic))
 		return __this_address;
 
-	if (xfs_sb_version_hascrc(&mp->m_sb)) {
+	if (xfs_has_crc(mp)) {
 		if (!uuid_equal(&hdr3->uuid, &mp->m_sb.sb_meta_uuid))
 			return __this_address;
-		if (be64_to_cpu(hdr3->blkno) != bp->b_bn)
+		if (be64_to_cpu(hdr3->blkno) != xfs_buf_daddr(bp))
 			return __this_address;
 		if (!xfs_log_check_lsn(mp, be64_to_cpu(hdr3->lsn)))
 			return __this_address;
@@ -71,7 +71,7 @@ xfs_dir3_block_read_verify(
 	struct xfs_mount	*mp = bp->b_mount;
 	xfs_failaddr_t		fa;
 
-	if (xfs_sb_version_hascrc(&mp->m_sb) &&
+	if (xfs_has_crc(mp) &&
 	     !xfs_buf_verify_cksum(bp, XFS_DIR3_DATA_CRC_OFF))
 		xfs_verifier_error(bp, -EFSBADCRC, __this_address);
 	else {
@@ -96,7 +96,7 @@ xfs_dir3_block_write_verify(
 		return;
 	}
 
-	if (!xfs_sb_version_hascrc(&mp->m_sb))
+	if (!xfs_has_crc(mp))
 		return;
 
 	if (bip)
@@ -121,7 +121,7 @@ xfs_dir3_block_header_check(
 {
 	struct xfs_mount	*mp = dp->i_mount;
 
-	if (xfs_sb_version_hascrc(&mp->m_sb)) {
+	if (xfs_has_crc(mp)) {
 		struct xfs_dir3_blk_hdr *hdr3 = bp->b_addr;
 
 		if (be64_to_cpu(hdr3->owner) != dp->i_ino)
@@ -171,10 +171,10 @@ xfs_dir3_block_init(
 	bp->b_ops = &xfs_dir3_block_buf_ops;
 	xfs_trans_buf_set_type(tp, bp, XFS_BLFT_DIR_BLOCK_BUF);
 
-	if (xfs_sb_version_hascrc(&mp->m_sb)) {
+	if (xfs_has_crc(mp)) {
 		memset(hdr3, 0, sizeof(*hdr3));
 		hdr3->magic = cpu_to_be32(XFS_DIR3_BLOCK_MAGIC);
-		hdr3->blkno = cpu_to_be64(bp->b_bn);
+		hdr3->blkno = cpu_to_be64(xfs_buf_daddr(bp));
 		hdr3->owner = cpu_to_be64(dp->i_ino);
 		uuid_copy(&hdr3->uuid, &mp->m_sb.sb_meta_uuid);
 		return;
diff --git a/fs/xfs/libxfs/xfs_dir2_data.c b/fs/xfs/libxfs/xfs_dir2_data.c
index e67fa086f2c1..dbcf58979a59 100644
--- a/fs/xfs/libxfs/xfs_dir2_data.c
+++ b/fs/xfs/libxfs/xfs_dir2_data.c
@@ -29,7 +29,7 @@ xfs_dir2_data_bestfree_p(
 	struct xfs_mount		*mp,
 	struct xfs_dir2_data_hdr	*hdr)
 {
-	if (xfs_sb_version_hascrc(&mp->m_sb))
+	if (xfs_has_crc(mp))
 		return ((struct xfs_dir3_data_hdr *)hdr)->best_free;
 	return hdr->bestfree;
 }
@@ -51,7 +51,7 @@ xfs_dir2_data_get_ftype(
 	struct xfs_mount		*mp,
 	struct xfs_dir2_data_entry	*dep)
 {
-	if (xfs_sb_version_hasftype(&mp->m_sb)) {
+	if (xfs_has_ftype(mp)) {
 		uint8_t			ftype = dep->name[dep->namelen];
 
 		if (likely(ftype < XFS_DIR3_FT_MAX))
@@ -70,7 +70,7 @@ xfs_dir2_data_put_ftype(
 	ASSERT(ftype < XFS_DIR3_FT_MAX);
 	ASSERT(dep->namelen != 0);
 
-	if (xfs_sb_version_hasftype(&mp->m_sb))
+	if (xfs_has_ftype(mp))
 		dep->name[dep->namelen] = ftype;
 }
 
@@ -297,10 +297,10 @@ xfs_dir3_data_verify(
 	if (!xfs_verify_magic(bp, hdr3->magic))
 		return __this_address;
 
-	if (xfs_sb_version_hascrc(&mp->m_sb)) {
+	if (xfs_has_crc(mp)) {
 		if (!uuid_equal(&hdr3->uuid, &mp->m_sb.sb_meta_uuid))
 			return __this_address;
-		if (be64_to_cpu(hdr3->blkno) != bp->b_bn)
+		if (be64_to_cpu(hdr3->blkno) != xfs_buf_daddr(bp))
 			return __this_address;
 		if (!xfs_log_check_lsn(mp, be64_to_cpu(hdr3->lsn)))
 			return __this_address;
@@ -343,7 +343,7 @@ xfs_dir3_data_read_verify(
 	struct xfs_mount	*mp = bp->b_mount;
 	xfs_failaddr_t		fa;
 
-	if (xfs_sb_version_hascrc(&mp->m_sb) &&
+	if (xfs_has_crc(mp) &&
 	    !xfs_buf_verify_cksum(bp, XFS_DIR3_DATA_CRC_OFF))
 		xfs_verifier_error(bp, -EFSBADCRC, __this_address);
 	else {
@@ -368,7 +368,7 @@ xfs_dir3_data_write_verify(
 		return;
 	}
 
-	if (!xfs_sb_version_hascrc(&mp->m_sb))
+	if (!xfs_has_crc(mp))
 		return;
 
 	if (bip)
@@ -401,7 +401,7 @@ xfs_dir3_data_header_check(
 {
 	struct xfs_mount	*mp = dp->i_mount;
 
-	if (xfs_sb_version_hascrc(&mp->m_sb)) {
+	if (xfs_has_crc(mp)) {
 		struct xfs_dir3_data_hdr *hdr3 = bp->b_addr;
 
 		if (be64_to_cpu(hdr3->hdr.owner) != dp->i_ino)
@@ -717,12 +717,12 @@ xfs_dir3_data_init(
 	 * Initialize the header.
 	 */
 	hdr = bp->b_addr;
-	if (xfs_sb_version_hascrc(&mp->m_sb)) {
+	if (xfs_has_crc(mp)) {
 		struct xfs_dir3_blk_hdr *hdr3 = bp->b_addr;
 
 		memset(hdr3, 0, sizeof(*hdr3));
 		hdr3->magic = cpu_to_be32(XFS_DIR3_DATA_MAGIC);
-		hdr3->blkno = cpu_to_be64(bp->b_bn);
+		hdr3->blkno = cpu_to_be64(xfs_buf_daddr(bp));
 		hdr3->owner = cpu_to_be64(dp->i_ino);
 		uuid_copy(&hdr3->uuid, &mp->m_sb.sb_meta_uuid);
 
diff --git a/fs/xfs/libxfs/xfs_dir2_leaf.c b/fs/xfs/libxfs/xfs_dir2_leaf.c
index 5369d8bb2593..d9b66306a9a7 100644
--- a/fs/xfs/libxfs/xfs_dir2_leaf.c
+++ b/fs/xfs/libxfs/xfs_dir2_leaf.c
@@ -37,7 +37,7 @@ xfs_dir2_leaf_hdr_from_disk(
 	struct xfs_dir3_icleaf_hdr	*to,
 	struct xfs_dir2_leaf		*from)
 {
-	if (xfs_sb_version_hascrc(&mp->m_sb)) {
+	if (xfs_has_crc(mp)) {
 		struct xfs_dir3_leaf *from3 = (struct xfs_dir3_leaf *)from;
 
 		to->forw = be32_to_cpu(from3->hdr.info.hdr.forw);
@@ -68,7 +68,7 @@ xfs_dir2_leaf_hdr_to_disk(
 	struct xfs_dir2_leaf		*to,
 	struct xfs_dir3_icleaf_hdr	*from)
 {
-	if (xfs_sb_version_hascrc(&mp->m_sb)) {
+	if (xfs_has_crc(mp)) {
 		struct xfs_dir3_leaf *to3 = (struct xfs_dir3_leaf *)to;
 
 		ASSERT(from->magic == XFS_DIR3_LEAF1_MAGIC ||
@@ -108,7 +108,7 @@ xfs_dir3_leaf1_check(
 
 	if (leafhdr.magic == XFS_DIR3_LEAF1_MAGIC) {
 		struct xfs_dir3_leaf_hdr *leaf3 = bp->b_addr;
-		if (be64_to_cpu(leaf3->info.blkno) != bp->b_bn)
+		if (be64_to_cpu(leaf3->info.blkno) != xfs_buf_daddr(bp))
 			return __this_address;
 	} else if (leafhdr.magic != XFS_DIR2_LEAF1_MAGIC)
 		return __this_address;
@@ -209,7 +209,7 @@ xfs_dir3_leaf_read_verify(
 	struct xfs_mount	*mp = bp->b_mount;
 	xfs_failaddr_t		fa;
 
-	if (xfs_sb_version_hascrc(&mp->m_sb) &&
+	if (xfs_has_crc(mp) &&
 	     !xfs_buf_verify_cksum(bp, XFS_DIR3_LEAF_CRC_OFF))
 		xfs_verifier_error(bp, -EFSBADCRC, __this_address);
 	else {
@@ -234,7 +234,7 @@ xfs_dir3_leaf_write_verify(
 		return;
 	}
 
-	if (!xfs_sb_version_hascrc(&mp->m_sb))
+	if (!xfs_has_crc(mp))
 		return;
 
 	if (bip)
@@ -308,7 +308,7 @@ xfs_dir3_leaf_init(
 
 	ASSERT(type == XFS_DIR2_LEAF1_MAGIC || type == XFS_DIR2_LEAFN_MAGIC);
 
-	if (xfs_sb_version_hascrc(&mp->m_sb)) {
+	if (xfs_has_crc(mp)) {
 		struct xfs_dir3_leaf_hdr *leaf3 = bp->b_addr;
 
 		memset(leaf3, 0, sizeof(*leaf3));
@@ -316,7 +316,7 @@ xfs_dir3_leaf_init(
 		leaf3->info.hdr.magic = (type == XFS_DIR2_LEAF1_MAGIC)
 					 ? cpu_to_be16(XFS_DIR3_LEAF1_MAGIC)
 					 : cpu_to_be16(XFS_DIR3_LEAFN_MAGIC);
-		leaf3->info.blkno = cpu_to_be64(bp->b_bn);
+		leaf3->info.blkno = cpu_to_be64(xfs_buf_daddr(bp));
 		leaf3->info.owner = cpu_to_be64(owner);
 		uuid_copy(&leaf3->info.uuid, &mp->m_sb.sb_meta_uuid);
 	} else {
diff --git a/fs/xfs/libxfs/xfs_dir2_node.c b/fs/xfs/libxfs/xfs_dir2_node.c
index d0520afb913a..7a03aeb9f4c9 100644
--- a/fs/xfs/libxfs/xfs_dir2_node.c
+++ b/fs/xfs/libxfs/xfs_dir2_node.c
@@ -68,7 +68,7 @@ xfs_dir3_leafn_check(
 
 	if (leafhdr.magic == XFS_DIR3_LEAFN_MAGIC) {
 		struct xfs_dir3_leaf_hdr *leaf3 = bp->b_addr;
-		if (be64_to_cpu(leaf3->info.blkno) != bp->b_bn)
+		if (be64_to_cpu(leaf3->info.blkno) != xfs_buf_daddr(bp))
 			return __this_address;
 	} else if (leafhdr.magic != XFS_DIR2_LEAFN_MAGIC)
 		return __this_address;
@@ -105,12 +105,12 @@ xfs_dir3_free_verify(
 	if (!xfs_verify_magic(bp, hdr->magic))
 		return __this_address;
 
-	if (xfs_sb_version_hascrc(&mp->m_sb)) {
+	if (xfs_has_crc(mp)) {
 		struct xfs_dir3_blk_hdr *hdr3 = bp->b_addr;
 
 		if (!uuid_equal(&hdr3->uuid, &mp->m_sb.sb_meta_uuid))
 			return __this_address;
-		if (be64_to_cpu(hdr3->blkno) != bp->b_bn)
+		if (be64_to_cpu(hdr3->blkno) != xfs_buf_daddr(bp))
 			return __this_address;
 		if (!xfs_log_check_lsn(mp, be64_to_cpu(hdr3->lsn)))
 			return __this_address;
@@ -128,7 +128,7 @@ xfs_dir3_free_read_verify(
 	struct xfs_mount	*mp = bp->b_mount;
 	xfs_failaddr_t		fa;
 
-	if (xfs_sb_version_hascrc(&mp->m_sb) &&
+	if (xfs_has_crc(mp) &&
 	    !xfs_buf_verify_cksum(bp, XFS_DIR3_FREE_CRC_OFF))
 		xfs_verifier_error(bp, -EFSBADCRC, __this_address);
 	else {
@@ -153,7 +153,7 @@ xfs_dir3_free_write_verify(
 		return;
 	}
 
-	if (!xfs_sb_version_hascrc(&mp->m_sb))
+	if (!xfs_has_crc(mp))
 		return;
 
 	if (bip)
@@ -185,7 +185,7 @@ xfs_dir3_free_header_check(
 	firstdb = (xfs_dir2_da_to_db(mp->m_dir_geo, fbno) -
 		   xfs_dir2_byte_to_db(mp->m_dir_geo, XFS_DIR2_FREE_OFFSET)) *
 			maxbests;
-	if (xfs_sb_version_hascrc(&mp->m_sb)) {
+	if (xfs_has_crc(mp)) {
 		struct xfs_dir3_free_hdr *hdr3 = bp->b_addr;
 
 		if (be32_to_cpu(hdr3->firstdb) != firstdb)
@@ -247,7 +247,7 @@ xfs_dir2_free_hdr_from_disk(
 	struct xfs_dir3_icfree_hdr	*to,
 	struct xfs_dir2_free		*from)
 {
-	if (xfs_sb_version_hascrc(&mp->m_sb)) {
+	if (xfs_has_crc(mp)) {
 		struct xfs_dir3_free	*from3 = (struct xfs_dir3_free *)from;
 
 		to->magic = be32_to_cpu(from3->hdr.hdr.magic);
@@ -274,7 +274,7 @@ xfs_dir2_free_hdr_to_disk(
 	struct xfs_dir2_free		*to,
 	struct xfs_dir3_icfree_hdr	*from)
 {
-	if (xfs_sb_version_hascrc(&mp->m_sb)) {
+	if (xfs_has_crc(mp)) {
 		struct xfs_dir3_free	*to3 = (struct xfs_dir3_free *)to;
 
 		ASSERT(from->magic == XFS_DIR3_FREE_MAGIC);
@@ -341,12 +341,12 @@ xfs_dir3_free_get_buf(
 	memset(bp->b_addr, 0, sizeof(struct xfs_dir3_free_hdr));
 	memset(&hdr, 0, sizeof(hdr));
 
-	if (xfs_sb_version_hascrc(&mp->m_sb)) {
+	if (xfs_has_crc(mp)) {
 		struct xfs_dir3_free_hdr *hdr3 = bp->b_addr;
 
 		hdr.magic = XFS_DIR3_FREE_MAGIC;
 
-		hdr3->hdr.blkno = cpu_to_be64(bp->b_bn);
+		hdr3->hdr.blkno = cpu_to_be64(xfs_buf_daddr(bp));
 		hdr3->hdr.owner = cpu_to_be64(dp->i_ino);
 		uuid_copy(&hdr3->hdr.uuid, &mp->m_sb.sb_meta_uuid);
 	} else
diff --git a/fs/xfs/libxfs/xfs_dir2_priv.h b/fs/xfs/libxfs/xfs_dir2_priv.h
index 94943ce49cab..711709a2aa53 100644
--- a/fs/xfs/libxfs/xfs_dir2_priv.h
+++ b/fs/xfs/libxfs/xfs_dir2_priv.h
@@ -196,7 +196,7 @@ xfs_dir2_data_entsize(
 
 	len = offsetof(struct xfs_dir2_data_entry, name[0]) + namelen +
 			sizeof(xfs_dir2_data_off_t) /* tag */;
-	if (xfs_sb_version_hasftype(&mp->m_sb))
+	if (xfs_has_ftype(mp))
 		len += sizeof(uint8_t);
 	return round_up(len, XFS_DIR2_DATA_ALIGN);
 }
diff --git a/fs/xfs/libxfs/xfs_dir2_sf.c b/fs/xfs/libxfs/xfs_dir2_sf.c
index 46d18bf9d5e1..5a97a87eaa20 100644
--- a/fs/xfs/libxfs/xfs_dir2_sf.c
+++ b/fs/xfs/libxfs/xfs_dir2_sf.c
@@ -48,7 +48,7 @@ xfs_dir2_sf_entsize(
 	count += sizeof(struct xfs_dir2_sf_entry);	/* namelen + offset */
 	count += hdr->i8count ? XFS_INO64_SIZE : XFS_INO32_SIZE; /* ino # */
 
-	if (xfs_sb_version_hasftype(&mp->m_sb))
+	if (xfs_has_ftype(mp))
 		count += sizeof(uint8_t);
 	return count;
 }
@@ -76,7 +76,7 @@ xfs_dir2_sf_get_ino(
 {
 	uint8_t				*from = sfep->name + sfep->namelen;
 
-	if (xfs_sb_version_hasftype(&mp->m_sb))
+	if (xfs_has_ftype(mp))
 		from++;
 
 	if (!hdr->i8count)
@@ -95,7 +95,7 @@ xfs_dir2_sf_put_ino(
 
 	ASSERT(ino <= XFS_MAXINUMBER);
 
-	if (xfs_sb_version_hasftype(&mp->m_sb))
+	if (xfs_has_ftype(mp))
 		to++;
 
 	if (hdr->i8count)
@@ -135,7 +135,7 @@ xfs_dir2_sf_get_ftype(
 	struct xfs_mount		*mp,
 	struct xfs_dir2_sf_entry	*sfep)
 {
-	if (xfs_sb_version_hasftype(&mp->m_sb)) {
+	if (xfs_has_ftype(mp)) {
 		uint8_t			ftype = sfep->name[sfep->namelen];
 
 		if (ftype < XFS_DIR3_FT_MAX)
@@ -153,7 +153,7 @@ xfs_dir2_sf_put_ftype(
 {
 	ASSERT(ftype < XFS_DIR3_FT_MAX);
 
-	if (xfs_sb_version_hasftype(&mp->m_sb))
+	if (xfs_has_ftype(mp))
 		sfep->name[sfep->namelen] = ftype;
 }
 
@@ -192,7 +192,7 @@ xfs_dir2_block_sfsize(
 	 * if there is a filetype field, add the extra byte to the namelen
 	 * for each entry that we see.
 	 */
-	has_ftype = xfs_sb_version_hasftype(&mp->m_sb) ? 1 : 0;
+	has_ftype = xfs_has_ftype(mp) ? 1 : 0;
 
 	count = i8count = namelen = 0;
 	btp = xfs_dir2_block_tail_p(geo, hdr);
diff --git a/fs/xfs/libxfs/xfs_dquot_buf.c b/fs/xfs/libxfs/xfs_dquot_buf.c
index 6766417d5ba4..deeb74becabc 100644
--- a/fs/xfs/libxfs/xfs_dquot_buf.c
+++ b/fs/xfs/libxfs/xfs_dquot_buf.c
@@ -70,7 +70,7 @@ xfs_dquot_verify(
 		return __this_address;
 
 	if ((ddq->d_type & XFS_DQTYPE_BIGTIME) &&
-	    !xfs_sb_version_hasbigtime(&mp->m_sb))
+	    !xfs_has_bigtime(mp))
 		return __this_address;
 
 	if ((ddq->d_type & XFS_DQTYPE_BIGTIME) && !ddq->d_id)
@@ -106,7 +106,7 @@ xfs_dqblk_verify(
 	struct xfs_dqblk	*dqb,
 	xfs_dqid_t		id)	/* used only during quotacheck */
 {
-	if (xfs_sb_version_hascrc(&mp->m_sb) &&
+	if (xfs_has_crc(mp) &&
 	    !uuid_equal(&dqb->dd_uuid, &mp->m_sb.sb_meta_uuid))
 		return __this_address;
 
@@ -134,7 +134,7 @@ xfs_dqblk_repair(
 	dqb->dd_diskdq.d_type = type;
 	dqb->dd_diskdq.d_id = cpu_to_be32(id);
 
-	if (xfs_sb_version_hascrc(&mp->m_sb)) {
+	if (xfs_has_crc(mp)) {
 		uuid_copy(&dqb->dd_uuid, &mp->m_sb.sb_meta_uuid);
 		xfs_update_cksum((char *)dqb, sizeof(struct xfs_dqblk),
 				 XFS_DQUOT_CRC_OFF);
@@ -151,7 +151,7 @@ xfs_dquot_buf_verify_crc(
 	int			ndquots;
 	int			i;
 
-	if (!xfs_sb_version_hascrc(&mp->m_sb))
+	if (!xfs_has_crc(mp))
 		return true;
 
 	/*
diff --git a/fs/xfs/libxfs/xfs_format.h b/fs/xfs/libxfs/xfs_format.h
index 76e2461b9e66..2d7057b7984b 100644
--- a/fs/xfs/libxfs/xfs_format.h
+++ b/fs/xfs/libxfs/xfs_format.h
@@ -9,7 +9,7 @@
 /*
  * XFS On Disk Format Definitions
  *
- * This header file defines all the on-disk format definitions for 
+ * This header file defines all the on-disk format definitions for
  * general XFS objects. Directory and attribute related objects are defined in
  * xfs_da_format.h, which log and log item formats are defined in
  * xfs_log_format.h. Everything else goes here.
@@ -265,7 +265,6 @@ typedef struct xfs_dsb {
 	/* must be padded to 64 bit alignment */
 } xfs_dsb_t;
 
-
 /*
  * Misc. Flags - warning - these will be cleared by xfs_repair unless
  * a feature bit is set when the flag is used.
@@ -280,37 +279,9 @@ typedef struct xfs_dsb {
 
 #define	XFS_SB_VERSION_NUM(sbp)	((sbp)->sb_versionnum & XFS_SB_VERSION_NUMBITS)
 
-/*
- * The first XFS version we support is a v4 superblock with V2 directories.
- */
-static inline bool xfs_sb_good_v4_features(struct xfs_sb *sbp)
-{
-	if (!(sbp->sb_versionnum & XFS_SB_VERSION_DIRV2BIT))
-		return false;
-	if (!(sbp->sb_versionnum & XFS_SB_VERSION_EXTFLGBIT))
-		return false;
-
-	/* check for unknown features in the fs */
-	if ((sbp->sb_versionnum & ~XFS_SB_VERSION_OKBITS) ||
-	    ((sbp->sb_versionnum & XFS_SB_VERSION_MOREBITSBIT) &&
-	     (sbp->sb_features2 & ~XFS_SB_VERSION2_OKBITS)))
-		return false;
-
-	return true;
-}
-
-static inline bool xfs_sb_good_version(struct xfs_sb *sbp)
+static inline bool xfs_sb_is_v5(struct xfs_sb *sbp)
 {
-	if (XFS_SB_VERSION_NUM(sbp) == XFS_SB_VERSION_5)
-		return true;
-	if (XFS_SB_VERSION_NUM(sbp) == XFS_SB_VERSION_4)
-		return xfs_sb_good_v4_features(sbp);
-	return false;
-}
-
-static inline bool xfs_sb_version_hasrealtime(struct xfs_sb *sbp)
-{
-	return sbp->sb_rblocks > 0;
+	return XFS_SB_VERSION_NUM(sbp) == XFS_SB_VERSION_5;
 }
 
 /*
@@ -322,9 +293,10 @@ static inline bool xfs_sb_has_mismatched_features2(struct xfs_sb *sbp)
 	return sbp->sb_bad_features2 != sbp->sb_features2;
 }
 
-static inline bool xfs_sb_version_hasattr(struct xfs_sb *sbp)
+static inline bool xfs_sb_version_hasmorebits(struct xfs_sb *sbp)
 {
-	return (sbp->sb_versionnum & XFS_SB_VERSION_ATTRBIT);
+	return xfs_sb_is_v5(sbp) ||
+	       (sbp->sb_versionnum & XFS_SB_VERSION_MOREBITSBIT);
 }
 
 static inline void xfs_sb_version_addattr(struct xfs_sb *sbp)
@@ -332,87 +304,18 @@ static inline void xfs_sb_version_addattr(struct xfs_sb *sbp)
 	sbp->sb_versionnum |= XFS_SB_VERSION_ATTRBIT;
 }
 
-static inline bool xfs_sb_version_hasquota(struct xfs_sb *sbp)
-{
-	return (sbp->sb_versionnum & XFS_SB_VERSION_QUOTABIT);
-}
-
 static inline void xfs_sb_version_addquota(struct xfs_sb *sbp)
 {
 	sbp->sb_versionnum |= XFS_SB_VERSION_QUOTABIT;
 }
 
-static inline bool xfs_sb_version_hasalign(struct xfs_sb *sbp)
-{
-	return (XFS_SB_VERSION_NUM(sbp) == XFS_SB_VERSION_5 ||
-		(sbp->sb_versionnum & XFS_SB_VERSION_ALIGNBIT));
-}
-
-static inline bool xfs_sb_version_hasdalign(struct xfs_sb *sbp)
-{
-	return (sbp->sb_versionnum & XFS_SB_VERSION_DALIGNBIT);
-}
-
-static inline bool xfs_sb_version_haslogv2(struct xfs_sb *sbp)
-{
-	return XFS_SB_VERSION_NUM(sbp) == XFS_SB_VERSION_5 ||
-	       (sbp->sb_versionnum & XFS_SB_VERSION_LOGV2BIT);
-}
-
-static inline bool xfs_sb_version_hassector(struct xfs_sb *sbp)
-{
-	return (sbp->sb_versionnum & XFS_SB_VERSION_SECTORBIT);
-}
-
-static inline bool xfs_sb_version_hasasciici(struct xfs_sb *sbp)
-{
-	return (sbp->sb_versionnum & XFS_SB_VERSION_BORGBIT);
-}
-
-static inline bool xfs_sb_version_hasmorebits(struct xfs_sb *sbp)
-{
-	return XFS_SB_VERSION_NUM(sbp) == XFS_SB_VERSION_5 ||
-	       (sbp->sb_versionnum & XFS_SB_VERSION_MOREBITSBIT);
-}
-
-/*
- * sb_features2 bit version macros.
- */
-static inline bool xfs_sb_version_haslazysbcount(struct xfs_sb *sbp)
-{
-	return (XFS_SB_VERSION_NUM(sbp) == XFS_SB_VERSION_5) ||
-	       (xfs_sb_version_hasmorebits(sbp) &&
-		(sbp->sb_features2 & XFS_SB_VERSION2_LAZYSBCOUNTBIT));
-}
-
-static inline bool xfs_sb_version_hasattr2(struct xfs_sb *sbp)
-{
-	return (XFS_SB_VERSION_NUM(sbp) == XFS_SB_VERSION_5) ||
-	       (xfs_sb_version_hasmorebits(sbp) &&
-		(sbp->sb_features2 & XFS_SB_VERSION2_ATTR2BIT));
-}
-
 static inline void xfs_sb_version_addattr2(struct xfs_sb *sbp)
 {
 	sbp->sb_versionnum |= XFS_SB_VERSION_MOREBITSBIT;
 	sbp->sb_features2 |= XFS_SB_VERSION2_ATTR2BIT;
 }
 
-static inline void xfs_sb_version_removeattr2(struct xfs_sb *sbp)
-{
-	sbp->sb_features2 &= ~XFS_SB_VERSION2_ATTR2BIT;
-	if (!sbp->sb_features2)
-		sbp->sb_versionnum &= ~XFS_SB_VERSION_MOREBITSBIT;
-}
-
-static inline bool xfs_sb_version_hasprojid32bit(struct xfs_sb *sbp)
-{
-	return (XFS_SB_VERSION_NUM(sbp) == XFS_SB_VERSION_5) ||
-	       (xfs_sb_version_hasmorebits(sbp) &&
-		(sbp->sb_features2 & XFS_SB_VERSION2_PROJID32BIT));
-}
-
-static inline void xfs_sb_version_addprojid32bit(struct xfs_sb *sbp)
+static inline void xfs_sb_version_addprojid32(struct xfs_sb *sbp)
 {
 	sbp->sb_versionnum |= XFS_SB_VERSION_MOREBITSBIT;
 	sbp->sb_features2 |= XFS_SB_VERSION2_PROJID32BIT;
@@ -495,106 +398,21 @@ xfs_sb_has_incompat_log_feature(
 	return (sbp->sb_features_log_incompat & feature) != 0;
 }
 
-/*
- * V5 superblock specific feature checks
- */
-static inline bool xfs_sb_version_hascrc(struct xfs_sb *sbp)
-{
-	return XFS_SB_VERSION_NUM(sbp) == XFS_SB_VERSION_5;
-}
-
-/*
- * v5 file systems support V3 inodes only, earlier file systems support
- * v2 and v1 inodes.
- */
-static inline bool xfs_sb_version_has_v3inode(struct xfs_sb *sbp)
-{
-	return XFS_SB_VERSION_NUM(sbp) == XFS_SB_VERSION_5;
-}
-
-static inline bool xfs_dinode_good_version(struct xfs_sb *sbp,
-		uint8_t version)
-{
-	if (xfs_sb_version_has_v3inode(sbp))
-		return version == 3;
-	return version == 1 || version == 2;
-}
-
-static inline bool xfs_sb_version_has_pquotino(struct xfs_sb *sbp)
-{
-	return XFS_SB_VERSION_NUM(sbp) == XFS_SB_VERSION_5;
-}
-
-static inline int xfs_sb_version_hasftype(struct xfs_sb *sbp)
-{
-	return (XFS_SB_VERSION_NUM(sbp) == XFS_SB_VERSION_5 &&
-		xfs_sb_has_incompat_feature(sbp, XFS_SB_FEAT_INCOMPAT_FTYPE)) ||
-	       (xfs_sb_version_hasmorebits(sbp) &&
-		 (sbp->sb_features2 & XFS_SB_VERSION2_FTYPE));
-}
-
-static inline bool xfs_sb_version_hasfinobt(xfs_sb_t *sbp)
-{
-	return (XFS_SB_VERSION_NUM(sbp) == XFS_SB_VERSION_5) &&
-		(sbp->sb_features_ro_compat & XFS_SB_FEAT_RO_COMPAT_FINOBT);
-}
-
-static inline bool xfs_sb_version_hassparseinodes(struct xfs_sb *sbp)
-{
-	return XFS_SB_VERSION_NUM(sbp) == XFS_SB_VERSION_5 &&
-		xfs_sb_has_incompat_feature(sbp, XFS_SB_FEAT_INCOMPAT_SPINODES);
-}
-
-/*
- * XFS_SB_FEAT_INCOMPAT_META_UUID indicates that the metadata UUID
- * is stored separately from the user-visible UUID; this allows the
- * user-visible UUID to be changed on V5 filesystems which have a
- * filesystem UUID stamped into every piece of metadata.
- */
-static inline bool xfs_sb_version_hasmetauuid(struct xfs_sb *sbp)
-{
-	return (XFS_SB_VERSION_NUM(sbp) == XFS_SB_VERSION_5) &&
-		(sbp->sb_features_incompat & XFS_SB_FEAT_INCOMPAT_META_UUID);
-}
-
-static inline bool xfs_sb_version_hasrmapbt(struct xfs_sb *sbp)
-{
-	return (XFS_SB_VERSION_NUM(sbp) == XFS_SB_VERSION_5) &&
-		(sbp->sb_features_ro_compat & XFS_SB_FEAT_RO_COMPAT_RMAPBT);
-}
-
-static inline bool xfs_sb_version_hasreflink(struct xfs_sb *sbp)
-{
-	return XFS_SB_VERSION_NUM(sbp) == XFS_SB_VERSION_5 &&
-		(sbp->sb_features_ro_compat & XFS_SB_FEAT_RO_COMPAT_REFLINK);
-}
-
-static inline bool xfs_sb_version_hasbigtime(struct xfs_sb *sbp)
-{
-	return XFS_SB_VERSION_NUM(sbp) == XFS_SB_VERSION_5 &&
-		(sbp->sb_features_incompat & XFS_SB_FEAT_INCOMPAT_BIGTIME);
-}
-
-/*
- * Inode btree block counter.  We record the number of inobt and finobt blocks
- * in the AGI header so that we can skip the finobt walk at mount time when
- * setting up per-AG reservations.
- */
-static inline bool xfs_sb_version_hasinobtcounts(struct xfs_sb *sbp)
+static inline void
+xfs_sb_remove_incompat_log_features(
+	struct xfs_sb	*sbp)
 {
-	return XFS_SB_VERSION_NUM(sbp) == XFS_SB_VERSION_5 &&
-		(sbp->sb_features_ro_compat & XFS_SB_FEAT_RO_COMPAT_INOBTCNT);
+	sbp->sb_features_log_incompat &= ~XFS_SB_FEAT_INCOMPAT_LOG_ALL;
 }
 
-static inline bool xfs_sb_version_needsrepair(struct xfs_sb *sbp)
+static inline void
+xfs_sb_add_incompat_log_features(
+	struct xfs_sb	*sbp,
+	unsigned int	features)
 {
-	return XFS_SB_VERSION_NUM(sbp) == XFS_SB_VERSION_5 &&
-		(sbp->sb_features_incompat & XFS_SB_FEAT_INCOMPAT_NEEDSREPAIR);
+	sbp->sb_features_log_incompat |= features;
 }
 
-/*
- * end of superblock version macros
- */
 
 static inline bool
 xfs_is_quota_inode(struct xfs_sb *sbp, xfs_ino_t ino)
@@ -1062,12 +880,12 @@ enum xfs_dinode_fmt {
 /*
  * Inode size for given fs.
  */
-#define XFS_DINODE_SIZE(sbp) \
-	(xfs_sb_version_has_v3inode(sbp) ? \
+#define XFS_DINODE_SIZE(mp) \
+	(xfs_has_v3inodes(mp) ? \
 		sizeof(struct xfs_dinode) : \
 		offsetof(struct xfs_dinode, di_crc))
 #define XFS_LITINO(mp) \
-	((mp)->m_sb.sb_inodesize - XFS_DINODE_SIZE(&(mp)->m_sb))
+	((mp)->m_sb.sb_inodesize - XFS_DINODE_SIZE(mp))
 
 /*
  * Inode data & attribute fork sizes, per inode.
@@ -1454,7 +1272,7 @@ struct xfs_dsymlink_hdr {
 #define XFS_SYMLINK_MAPS 3
 
 #define XFS_SYMLINK_BUF_SPACE(mp, bufsize)	\
-	((bufsize) - (xfs_sb_version_hascrc(&(mp)->m_sb) ? \
+	((bufsize) - (xfs_has_crc((mp)) ? \
 			sizeof(struct xfs_dsymlink_hdr) : 0))
 
 
@@ -1686,7 +1504,7 @@ struct xfs_rmap_key {
 typedef __be32 xfs_rmap_ptr_t;
 
 #define	XFS_RMAP_BLOCK(mp) \
-	(xfs_sb_version_hasfinobt(&((mp)->m_sb)) ? \
+	(xfs_has_finobt(((mp))) ? \
 	 XFS_FIBT_BLOCK(mp) + 1 : \
 	 XFS_IBT_BLOCK(mp) + 1)
 
@@ -1918,7 +1736,7 @@ struct xfs_acl {
  * limited only by the maximum size of the xattr that stores the information.
  */
 #define XFS_ACL_MAX_ENTRIES(mp)	\
-	(xfs_sb_version_hascrc(&mp->m_sb) \
+	(xfs_has_crc(mp) \
 		?  (XFS_XATTR_SIZE_MAX - sizeof(struct xfs_acl)) / \
 						sizeof(struct xfs_acl_entry) \
 		: 25)
diff --git a/fs/xfs/libxfs/xfs_ialloc.c b/fs/xfs/libxfs/xfs_ialloc.c
index aaf8805a82df..994ad783d407 100644
--- a/fs/xfs/libxfs/xfs_ialloc.c
+++ b/fs/xfs/libxfs/xfs_ialloc.c
@@ -58,7 +58,7 @@ xfs_inobt_update(
 	union xfs_btree_rec	rec;
 
 	rec.inobt.ir_startino = cpu_to_be32(irec->ir_startino);
-	if (xfs_sb_version_hassparseinodes(&cur->bc_mp->m_sb)) {
+	if (xfs_has_sparseinodes(cur->bc_mp)) {
 		rec.inobt.ir_u.sp.ir_holemask = cpu_to_be16(irec->ir_holemask);
 		rec.inobt.ir_u.sp.ir_count = irec->ir_count;
 		rec.inobt.ir_u.sp.ir_freecount = irec->ir_freecount;
@@ -74,11 +74,11 @@ xfs_inobt_update(
 void
 xfs_inobt_btrec_to_irec(
 	struct xfs_mount		*mp,
-	union xfs_btree_rec		*rec,
+	const union xfs_btree_rec	*rec,
 	struct xfs_inobt_rec_incore	*irec)
 {
 	irec->ir_startino = be32_to_cpu(rec->inobt.ir_startino);
-	if (xfs_sb_version_hassparseinodes(&mp->m_sb)) {
+	if (xfs_has_sparseinodes(mp)) {
 		irec->ir_holemask = be16_to_cpu(rec->inobt.ir_u.sp.ir_holemask);
 		irec->ir_count = rec->inobt.ir_u.sp.ir_count;
 		irec->ir_freecount = rec->inobt.ir_u.sp.ir_freecount;
@@ -241,7 +241,7 @@ xfs_check_agi_freecount(
 			}
 		} while (i == 1);
 
-		if (!XFS_FORCED_SHUTDOWN(cur->bc_mp))
+		if (!xfs_is_shutdown(cur->bc_mp))
 			ASSERT(freecount == cur->bc_ag.pag->pagi_freecount);
 	}
 	return 0;
@@ -302,7 +302,7 @@ xfs_ialloc_inode_init(
 	 * That means for v3 inode we log the entire buffer rather than just the
 	 * inode cores.
 	 */
-	if (xfs_sb_version_has_v3inode(&mp->m_sb)) {
+	if (xfs_has_v3inodes(mp)) {
 		version = 3;
 		ino = XFS_AGINO_TO_INO(mp, agno, XFS_AGB_TO_AGINO(mp, agbno));
 
@@ -337,7 +337,6 @@ xfs_ialloc_inode_init(
 		xfs_buf_zero(fbuf, 0, BBTOB(fbuf->b_length));
 		for (i = 0; i < M_IGEO(mp)->inodes_per_cluster; i++) {
 			int	ioffset = i << mp->m_sb.sb_inodelog;
-			uint	isize = XFS_DINODE_SIZE(&mp->m_sb);
 
 			free = xfs_make_iptr(mp, fbuf, i);
 			free->di_magic = cpu_to_be16(XFS_DINODE_MAGIC);
@@ -354,7 +353,7 @@ xfs_ialloc_inode_init(
 			} else if (tp) {
 				/* just log the inode core */
 				xfs_trans_log_buf(tp, fbuf, ioffset,
-						  ioffset + isize - 1);
+					  ioffset + XFS_DINODE_SIZE(mp) - 1);
 			}
 		}
 
@@ -635,7 +634,7 @@ xfs_ialloc_ag_alloc(
 
 #ifdef DEBUG
 	/* randomly do sparse inode allocations */
-	if (xfs_sb_version_hassparseinodes(&tp->t_mountp->m_sb) &&
+	if (xfs_has_sparseinodes(tp->t_mountp) &&
 	    igeo->ialloc_min_blks < igeo->ialloc_blks)
 		do_sparse = prandom_u32() & 1;
 #endif
@@ -712,7 +711,7 @@ xfs_ialloc_ag_alloc(
 		 */
 		isaligned = 0;
 		if (igeo->ialloc_align) {
-			ASSERT(!(args.mp->m_flags & XFS_MOUNT_NOALIGN));
+			ASSERT(!xfs_has_noalign(args.mp));
 			args.alignment = args.mp->m_dalign;
 			isaligned = 1;
 		} else
@@ -754,7 +753,7 @@ xfs_ialloc_ag_alloc(
 	 * Finally, try a sparse allocation if the filesystem supports it and
 	 * the sparse allocation length is smaller than a full chunk.
 	 */
-	if (xfs_sb_version_hassparseinodes(&args.mp->m_sb) &&
+	if (xfs_has_sparseinodes(args.mp) &&
 	    igeo->ialloc_min_blks < igeo->ialloc_blks &&
 	    args.fsbno == NULLFSBLOCK) {
 sparse_alloc:
@@ -856,7 +855,7 @@ sparse_alloc:
 		 * from the previous call. Set merge false to replace any
 		 * existing record with this one.
 		 */
-		if (xfs_sb_version_hasfinobt(&args.mp->m_sb)) {
+		if (xfs_has_finobt(args.mp)) {
 			error = xfs_inobt_insert_sprec(args.mp, tp, agbp, pag,
 				       XFS_BTNUM_FINO, &rec, false);
 			if (error)
@@ -869,7 +868,7 @@ sparse_alloc:
 		if (error)
 			return error;
 
-		if (xfs_sb_version_hasfinobt(&args.mp->m_sb)) {
+		if (xfs_has_finobt(args.mp)) {
 			error = xfs_inobt_insert(args.mp, tp, agbp, pag, newino,
 						 newlen, XFS_BTNUM_FINO);
 			if (error)
@@ -1448,7 +1447,7 @@ xfs_dialloc_ag(
 	int				offset;
 	int				i;
 
-	if (!xfs_sb_version_hasfinobt(&mp->m_sb))
+	if (!xfs_has_finobt(mp))
 		return xfs_dialloc_ag_inobt(tp, agbp, pag, parent, inop);
 
 	/*
@@ -1784,7 +1783,7 @@ xfs_dialloc(
 				break;
 		}
 
-		if (XFS_FORCED_SHUTDOWN(mp)) {
+		if (xfs_is_shutdown(mp)) {
 			error = -EFSCORRUPTED;
 			break;
 		}
@@ -1953,8 +1952,7 @@ xfs_difree_inobt(
 	 * remove the chunk if the block size is large enough for multiple inode
 	 * chunks (that might not be free).
 	 */
-	if (!(mp->m_flags & XFS_MOUNT_IKEEP) &&
-	    rec.ir_free == XFS_INOBT_ALL_FREE &&
+	if (!xfs_has_ikeep(mp) && rec.ir_free == XFS_INOBT_ALL_FREE &&
 	    mp->m_sb.sb_inopblock <= XFS_INODES_PER_CHUNK) {
 		struct xfs_perag	*pag = agbp->b_pag;
 
@@ -1994,7 +1992,7 @@ xfs_difree_inobt(
 			goto error0;
 		}
 
-		/* 
+		/*
 		 * Change the inode free counts and log the ag/sb changes.
 		 */
 		be32_add_cpu(&agi->agi_freecount, 1);
@@ -2098,9 +2096,8 @@ xfs_difree_finobt(
 	 * enough for multiple chunks. Leave the finobt record to remain in sync
 	 * with the inobt.
 	 */
-	if (rec.ir_free == XFS_INOBT_ALL_FREE &&
-	    mp->m_sb.sb_inopblock <= XFS_INODES_PER_CHUNK &&
-	    !(mp->m_flags & XFS_MOUNT_IKEEP)) {
+	if (!xfs_has_ikeep(mp) && rec.ir_free == XFS_INOBT_ALL_FREE &&
+	    mp->m_sb.sb_inopblock <= XFS_INODES_PER_CHUNK) {
 		error = xfs_btree_delete(cur, &i);
 		if (error)
 			goto error;
@@ -2189,7 +2186,7 @@ xfs_difree(
 	/*
 	 * Fix up the free inode btree.
 	 */
-	if (xfs_sb_version_hasfinobt(&mp->m_sb)) {
+	if (xfs_has_finobt(mp)) {
 		error = xfs_difree_finobt(mp, tp, agbp, pag, agino, &rec);
 		if (error)
 			goto error0;
@@ -2478,7 +2475,7 @@ xfs_agi_verify(
 	struct xfs_agi	*agi = bp->b_addr;
 	int		i;
 
-	if (xfs_sb_version_hascrc(&mp->m_sb)) {
+	if (xfs_has_crc(mp)) {
 		if (!uuid_equal(&agi->agi_uuid, &mp->m_sb.sb_meta_uuid))
 			return __this_address;
 		if (!xfs_log_check_lsn(mp, be64_to_cpu(agi->agi_lsn)))
@@ -2497,7 +2494,7 @@ xfs_agi_verify(
 	    be32_to_cpu(agi->agi_level) > M_IGEO(mp)->inobt_maxlevels)
 		return __this_address;
 
-	if (xfs_sb_version_hasfinobt(&mp->m_sb) &&
+	if (xfs_has_finobt(mp) &&
 	    (be32_to_cpu(agi->agi_free_level) < 1 ||
 	     be32_to_cpu(agi->agi_free_level) > M_IGEO(mp)->inobt_maxlevels))
 		return __this_address;
@@ -2528,7 +2525,7 @@ xfs_agi_read_verify(
 	struct xfs_mount *mp = bp->b_mount;
 	xfs_failaddr_t	fa;
 
-	if (xfs_sb_version_hascrc(&mp->m_sb) &&
+	if (xfs_has_crc(mp) &&
 	    !xfs_buf_verify_cksum(bp, XFS_AGI_CRC_OFF))
 		xfs_verifier_error(bp, -EFSBADCRC, __this_address);
 	else {
@@ -2553,7 +2550,7 @@ xfs_agi_write_verify(
 		return;
 	}
 
-	if (!xfs_sb_version_hascrc(&mp->m_sb))
+	if (!xfs_has_crc(mp))
 		return;
 
 	if (bip)
@@ -2626,7 +2623,7 @@ xfs_ialloc_read_agi(
 	 * we are in the middle of a forced shutdown.
 	 */
 	ASSERT(pag->pagi_freecount == be32_to_cpu(agi->agi_freecount) ||
-		XFS_FORCED_SHUTDOWN(mp));
+		xfs_is_shutdown(mp));
 	return 0;
 }
 
@@ -2716,7 +2713,7 @@ struct xfs_ialloc_count_inodes {
 STATIC int
 xfs_ialloc_count_inodes_rec(
 	struct xfs_btree_cur		*cur,
-	union xfs_btree_rec		*rec,
+	const union xfs_btree_rec	*rec,
 	void				*priv)
 {
 	struct xfs_inobt_rec_incore	irec;
@@ -2773,7 +2770,7 @@ xfs_ialloc_setup_geometry(
 	uint			inodes;
 
 	igeo->new_diflags2 = 0;
-	if (xfs_sb_version_hasbigtime(&mp->m_sb))
+	if (xfs_has_bigtime(mp))
 		igeo->new_diflags2 |= XFS_DIFLAG2_BIGTIME;
 
 	/* Compute inode btree geometry. */
@@ -2828,7 +2825,7 @@ xfs_ialloc_setup_geometry(
 	 * cannot change the behavior.
 	 */
 	igeo->inode_cluster_size_raw = XFS_INODE_BIG_CLUSTER_SIZE;
-	if (xfs_sb_version_has_v3inode(&mp->m_sb)) {
+	if (xfs_has_v3inodes(mp)) {
 		int	new_size = igeo->inode_cluster_size_raw;
 
 		new_size *= mp->m_sb.sb_inodesize / XFS_DINODE_MIN_SIZE;
@@ -2846,7 +2843,7 @@ xfs_ialloc_setup_geometry(
 	igeo->inodes_per_cluster = XFS_FSB_TO_INO(mp, igeo->blocks_per_cluster);
 
 	/* Calculate inode cluster alignment. */
-	if (xfs_sb_version_hasalign(&mp->m_sb) &&
+	if (xfs_has_align(mp) &&
 	    mp->m_sb.sb_inoalignmt >= igeo->blocks_per_cluster)
 		igeo->cluster_align = mp->m_sb.sb_inoalignmt;
 	else
@@ -2894,15 +2891,15 @@ xfs_ialloc_calc_rootino(
 	first_bno += xfs_alloc_min_freelist(mp, NULL);
 
 	/* ...the free inode btree root... */
-	if (xfs_sb_version_hasfinobt(&mp->m_sb))
+	if (xfs_has_finobt(mp))
 		first_bno++;
 
 	/* ...the reverse mapping btree root... */
-	if (xfs_sb_version_hasrmapbt(&mp->m_sb))
+	if (xfs_has_rmapbt(mp))
 		first_bno++;
 
 	/* ...the reference count btree... */
-	if (xfs_sb_version_hasreflink(&mp->m_sb))
+	if (xfs_has_reflink(mp))
 		first_bno++;
 
 	/*
@@ -2920,9 +2917,9 @@ xfs_ialloc_calc_rootino(
 	 * Now round first_bno up to whatever allocation alignment is given
 	 * by the filesystem or was passed in.
 	 */
-	if (xfs_sb_version_hasdalign(&mp->m_sb) && igeo->ialloc_align > 0)
+	if (xfs_has_dalign(mp) && igeo->ialloc_align > 0)
 		first_bno = roundup(first_bno, sunit);
-	else if (xfs_sb_version_hasalign(&mp->m_sb) &&
+	else if (xfs_has_align(mp) &&
 			mp->m_sb.sb_inoalignmt > 1)
 		first_bno = roundup(first_bno, mp->m_sb.sb_inoalignmt);
 
@@ -2953,7 +2950,7 @@ xfs_ialloc_check_shrink(
 	int			has;
 	int			error;
 
-	if (!xfs_sb_version_hassparseinodes(&mp->m_sb))
+	if (!xfs_has_sparseinodes(mp))
 		return 0;
 
 	pag = xfs_perag_get(mp, agno);
diff --git a/fs/xfs/libxfs/xfs_ialloc.h b/fs/xfs/libxfs/xfs_ialloc.h
index 9a2112b4ad5e..8b5c2b709022 100644
--- a/fs/xfs/libxfs/xfs_ialloc.h
+++ b/fs/xfs/libxfs/xfs_ialloc.h
@@ -106,7 +106,8 @@ int xfs_read_agi(struct xfs_mount *mp, struct xfs_trans *tp,
 		xfs_agnumber_t agno, struct xfs_buf **bpp);
 
 union xfs_btree_rec;
-void xfs_inobt_btrec_to_irec(struct xfs_mount *mp, union xfs_btree_rec *rec,
+void xfs_inobt_btrec_to_irec(struct xfs_mount *mp,
+		const union xfs_btree_rec *rec,
 		struct xfs_inobt_rec_incore *irec);
 int xfs_ialloc_has_inodes_at_extent(struct xfs_btree_cur *cur,
 		xfs_agblock_t bno, xfs_extlen_t len, bool *exists);
diff --git a/fs/xfs/libxfs/xfs_ialloc_btree.c b/fs/xfs/libxfs/xfs_ialloc_btree.c
index 823a038939f8..27190840c5d8 100644
--- a/fs/xfs/libxfs/xfs_ialloc_btree.c
+++ b/fs/xfs/libxfs/xfs_ialloc_btree.c
@@ -40,9 +40,9 @@ xfs_inobt_dup_cursor(
 
 STATIC void
 xfs_inobt_set_root(
-	struct xfs_btree_cur	*cur,
-	union xfs_btree_ptr	*nptr,
-	int			inc)	/* level change */
+	struct xfs_btree_cur		*cur,
+	const union xfs_btree_ptr	*nptr,
+	int				inc)	/* level change */
 {
 	struct xfs_buf		*agbp = cur->bc_ag.agbp;
 	struct xfs_agi		*agi = agbp->b_addr;
@@ -54,9 +54,9 @@ xfs_inobt_set_root(
 
 STATIC void
 xfs_finobt_set_root(
-	struct xfs_btree_cur	*cur,
-	union xfs_btree_ptr	*nptr,
-	int			inc)	/* level change */
+	struct xfs_btree_cur		*cur,
+	const union xfs_btree_ptr	*nptr,
+	int				inc)	/* level change */
 {
 	struct xfs_buf		*agbp = cur->bc_ag.agbp;
 	struct xfs_agi		*agi = agbp->b_addr;
@@ -76,7 +76,7 @@ xfs_inobt_mod_blockcount(
 	struct xfs_buf		*agbp = cur->bc_ag.agbp;
 	struct xfs_agi		*agi = agbp->b_addr;
 
-	if (!xfs_sb_version_hasinobtcounts(&cur->bc_mp->m_sb))
+	if (!xfs_has_inobtcounts(cur->bc_mp))
 		return;
 
 	if (cur->bc_btnum == XFS_BTNUM_FINO)
@@ -88,11 +88,11 @@ xfs_inobt_mod_blockcount(
 
 STATIC int
 __xfs_inobt_alloc_block(
-	struct xfs_btree_cur	*cur,
-	union xfs_btree_ptr	*start,
-	union xfs_btree_ptr	*new,
-	int			*stat,
-	enum xfs_ag_resv_type	resv)
+	struct xfs_btree_cur		*cur,
+	const union xfs_btree_ptr	*start,
+	union xfs_btree_ptr		*new,
+	int				*stat,
+	enum xfs_ag_resv_type		resv)
 {
 	xfs_alloc_arg_t		args;		/* block allocation args */
 	int			error;		/* error return value */
@@ -127,20 +127,20 @@ __xfs_inobt_alloc_block(
 
 STATIC int
 xfs_inobt_alloc_block(
-	struct xfs_btree_cur	*cur,
-	union xfs_btree_ptr	*start,
-	union xfs_btree_ptr	*new,
-	int			*stat)
+	struct xfs_btree_cur		*cur,
+	const union xfs_btree_ptr	*start,
+	union xfs_btree_ptr		*new,
+	int				*stat)
 {
 	return __xfs_inobt_alloc_block(cur, start, new, stat, XFS_AG_RESV_NONE);
 }
 
 STATIC int
 xfs_finobt_alloc_block(
-	struct xfs_btree_cur	*cur,
-	union xfs_btree_ptr	*start,
-	union xfs_btree_ptr	*new,
-	int			*stat)
+	struct xfs_btree_cur		*cur,
+	const union xfs_btree_ptr	*start,
+	union xfs_btree_ptr		*new,
+	int				*stat)
 {
 	if (cur->bc_mp->m_finobt_nores)
 		return xfs_inobt_alloc_block(cur, start, new, stat);
@@ -156,7 +156,7 @@ __xfs_inobt_free_block(
 {
 	xfs_inobt_mod_blockcount(cur, -1);
 	return xfs_free_extent(cur->bc_tp,
-			XFS_DADDR_TO_FSB(cur->bc_mp, XFS_BUF_ADDR(bp)), 1,
+			XFS_DADDR_TO_FSB(cur->bc_mp, xfs_buf_daddr(bp)), 1,
 			&XFS_RMAP_OINFO_INOBT, resv);
 }
 
@@ -188,18 +188,18 @@ xfs_inobt_get_maxrecs(
 
 STATIC void
 xfs_inobt_init_key_from_rec(
-	union xfs_btree_key	*key,
-	union xfs_btree_rec	*rec)
+	union xfs_btree_key		*key,
+	const union xfs_btree_rec	*rec)
 {
 	key->inobt.ir_startino = rec->inobt.ir_startino;
 }
 
 STATIC void
 xfs_inobt_init_high_key_from_rec(
-	union xfs_btree_key	*key,
-	union xfs_btree_rec	*rec)
+	union xfs_btree_key		*key,
+	const union xfs_btree_rec	*rec)
 {
-	__u32			x;
+	__u32				x;
 
 	x = be32_to_cpu(rec->inobt.ir_startino);
 	x += XFS_INODES_PER_CHUNK - 1;
@@ -212,7 +212,7 @@ xfs_inobt_init_rec_from_cur(
 	union xfs_btree_rec	*rec)
 {
 	rec->inobt.ir_startino = cpu_to_be32(cur->bc_rec.i.ir_startino);
-	if (xfs_sb_version_hassparseinodes(&cur->bc_mp->m_sb)) {
+	if (xfs_has_sparseinodes(cur->bc_mp)) {
 		rec->inobt.ir_u.sp.ir_holemask =
 					cpu_to_be16(cur->bc_rec.i.ir_holemask);
 		rec->inobt.ir_u.sp.ir_count = cur->bc_rec.i.ir_count;
@@ -253,8 +253,8 @@ xfs_finobt_init_ptr_from_cur(
 
 STATIC int64_t
 xfs_inobt_key_diff(
-	struct xfs_btree_cur	*cur,
-	union xfs_btree_key	*key)
+	struct xfs_btree_cur		*cur,
+	const union xfs_btree_key	*key)
 {
 	return (int64_t)be32_to_cpu(key->inobt.ir_startino) -
 			  cur->bc_rec.i.ir_startino;
@@ -262,9 +262,9 @@ xfs_inobt_key_diff(
 
 STATIC int64_t
 xfs_inobt_diff_two_keys(
-	struct xfs_btree_cur	*cur,
-	union xfs_btree_key	*k1,
-	union xfs_btree_key	*k2)
+	struct xfs_btree_cur		*cur,
+	const union xfs_btree_key	*k1,
+	const union xfs_btree_key	*k2)
 {
 	return (int64_t)be32_to_cpu(k1->inobt.ir_startino) -
 			  be32_to_cpu(k2->inobt.ir_startino);
@@ -292,7 +292,7 @@ xfs_inobt_verify(
 	 * but beware of the landmine (i.e. need to check pag->pagi_init) if we
 	 * ever do.
 	 */
-	if (xfs_sb_version_hascrc(&mp->m_sb)) {
+	if (xfs_has_crc(mp)) {
 		fa = xfs_btree_sblock_v5hdr_verify(bp);
 		if (fa)
 			return fa;
@@ -360,9 +360,9 @@ const struct xfs_buf_ops xfs_finobt_buf_ops = {
 
 STATIC int
 xfs_inobt_keys_inorder(
-	struct xfs_btree_cur	*cur,
-	union xfs_btree_key	*k1,
-	union xfs_btree_key	*k2)
+	struct xfs_btree_cur		*cur,
+	const union xfs_btree_key	*k1,
+	const union xfs_btree_key	*k2)
 {
 	return be32_to_cpu(k1->inobt.ir_startino) <
 		be32_to_cpu(k2->inobt.ir_startino);
@@ -370,9 +370,9 @@ xfs_inobt_keys_inorder(
 
 STATIC int
 xfs_inobt_recs_inorder(
-	struct xfs_btree_cur	*cur,
-	union xfs_btree_rec	*r1,
-	union xfs_btree_rec	*r2)
+	struct xfs_btree_cur		*cur,
+	const union xfs_btree_rec	*r1,
+	const union xfs_btree_rec	*r2)
 {
 	return be32_to_cpu(r1->inobt.ir_startino) + XFS_INODES_PER_CHUNK <=
 		be32_to_cpu(r2->inobt.ir_startino);
@@ -446,7 +446,7 @@ xfs_inobt_init_common(
 
 	cur->bc_blocklog = mp->m_sb.sb_blocklog;
 
-	if (xfs_sb_version_hascrc(&mp->m_sb))
+	if (xfs_has_crc(mp))
 		cur->bc_flags |= XFS_BTREE_CRC_BLOCKS;
 
 	/* take a reference for the cursor */
@@ -511,7 +511,7 @@ xfs_inobt_commit_staged_btree(
 		fields = XFS_AGI_ROOT | XFS_AGI_LEVEL;
 		agi->agi_root = cpu_to_be32(afake->af_root);
 		agi->agi_level = cpu_to_be32(afake->af_levels);
-		if (xfs_sb_version_hasinobtcounts(&cur->bc_mp->m_sb)) {
+		if (xfs_has_inobtcounts(cur->bc_mp)) {
 			agi->agi_iblocks = cpu_to_be32(afake->af_blocks);
 			fields |= XFS_AGI_IBLOCKS;
 		}
@@ -521,7 +521,7 @@ xfs_inobt_commit_staged_btree(
 		fields = XFS_AGI_FREE_ROOT | XFS_AGI_FREE_LEVEL;
 		agi->agi_free_root = cpu_to_be32(afake->af_root);
 		agi->agi_free_level = cpu_to_be32(afake->af_levels);
-		if (xfs_sb_version_hasinobtcounts(&cur->bc_mp->m_sb)) {
+		if (xfs_has_inobtcounts(cur->bc_mp)) {
 			agi->agi_fblocks = cpu_to_be32(afake->af_blocks);
 			fields |= XFS_AGI_IBLOCKS;
 		}
@@ -737,10 +737,10 @@ xfs_finobt_calc_reserves(
 	xfs_extlen_t		tree_len = 0;
 	int			error;
 
-	if (!xfs_sb_version_hasfinobt(&mp->m_sb))
+	if (!xfs_has_finobt(mp))
 		return 0;
 
-	if (xfs_sb_version_hasinobtcounts(&mp->m_sb))
+	if (xfs_has_inobtcounts(mp))
 		error = xfs_finobt_read_blocks(mp, tp, pag, &tree_len);
 	else
 		error = xfs_inobt_count_blocks(mp, tp, pag, XFS_BTNUM_FINO,
diff --git a/fs/xfs/libxfs/xfs_ialloc_btree.h b/fs/xfs/libxfs/xfs_ialloc_btree.h
index e530c82b2217..8a322d402e61 100644
--- a/fs/xfs/libxfs/xfs_ialloc_btree.h
+++ b/fs/xfs/libxfs/xfs_ialloc_btree.h
@@ -19,7 +19,7 @@ struct xfs_perag;
  * Btree block header size depends on a superblock flag.
  */
 #define XFS_INOBT_BLOCK_LEN(mp) \
-	(xfs_sb_version_hascrc(&((mp)->m_sb)) ? \
+	(xfs_has_crc(((mp))) ? \
 		XFS_BTREE_SBLOCK_CRC_LEN : XFS_BTREE_SBLOCK_LEN)
 
 /*
diff --git a/fs/xfs/libxfs/xfs_inode_buf.c b/fs/xfs/libxfs/xfs_inode_buf.c
index 84ea2e0af9f0..3932b4ebf903 100644
--- a/fs/xfs/libxfs/xfs_inode_buf.c
+++ b/fs/xfs/libxfs/xfs_inode_buf.c
@@ -48,7 +48,7 @@ xfs_inode_buf_verify(
 	/*
 	 * Validate the magic number and version of every inode in the buffer
 	 */
-	agno = xfs_daddr_to_agno(mp, XFS_BUF_ADDR(bp));
+	agno = xfs_daddr_to_agno(mp, xfs_buf_daddr(bp));
 	ni = XFS_BB_TO_FSB(mp, bp->b_length) * mp->m_sb.sb_inopblock;
 	for (i = 0; i < ni; i++) {
 		int		di_ok;
@@ -58,7 +58,7 @@ xfs_inode_buf_verify(
 		dip = xfs_buf_offset(bp, (i << mp->m_sb.sb_inodelog));
 		unlinked_ino = be32_to_cpu(dip->di_next_unlinked);
 		di_ok = xfs_verify_magic16(bp, dip->di_magic) &&
-			xfs_dinode_good_version(&mp->m_sb, dip->di_version) &&
+			xfs_dinode_good_version(mp, dip->di_version) &&
 			xfs_verify_agino_or_null(mp, agno, unlinked_ino);
 		if (unlikely(XFS_TEST_ERROR(!di_ok, mp,
 						XFS_ERRTAG_ITOBP_INOTOBP))) {
@@ -71,7 +71,7 @@ xfs_inode_buf_verify(
 #ifdef DEBUG
 			xfs_alert(mp,
 				"bad inode magic/vsn daddr %lld #%d (magic=%x)",
-				(unsigned long long)bp->b_bn, i,
+				(unsigned long long)xfs_buf_daddr(bp), i,
 				be16_to_cpu(dip->di_magic));
 #endif
 			xfs_buf_verifier_error(bp, -EFSCORRUPTED,
@@ -192,7 +192,7 @@ xfs_inode_from_disk(
 	 * inode. If the inode is unused, mode is zero and we shouldn't mess
 	 * with the uninitialized part of it.
 	 */
-	if (!xfs_sb_version_has_v3inode(&ip->i_mount->m_sb))
+	if (!xfs_has_v3inodes(ip->i_mount))
 		ip->i_flushiter = be16_to_cpu(from->di_flushiter);
 	inode->i_generation = be32_to_cpu(from->di_gen);
 	inode->i_mode = be16_to_cpu(from->di_mode);
@@ -235,7 +235,7 @@ xfs_inode_from_disk(
 	if (from->di_dmevmask || from->di_dmstate)
 		xfs_iflags_set(ip, XFS_IPRESERVE_DM_FIELDS);
 
-	if (xfs_sb_version_has_v3inode(&ip->i_mount->m_sb)) {
+	if (xfs_has_v3inodes(ip->i_mount)) {
 		inode_set_iversion_queried(inode,
 					   be64_to_cpu(from->di_changecount));
 		ip->i_crtime = xfs_inode_from_disk_ts(from, from->di_crtime);
@@ -313,7 +313,7 @@ xfs_inode_to_disk(
 	to->di_aformat = xfs_ifork_format(ip->i_afp);
 	to->di_flags = cpu_to_be16(ip->i_diflags);
 
-	if (xfs_sb_version_has_v3inode(&ip->i_mount->m_sb)) {
+	if (xfs_has_v3inodes(ip->i_mount)) {
 		to->di_version = 3;
 		to->di_changecount = cpu_to_be64(inode_peek_iversion(inode));
 		to->di_crtime = xfs_inode_to_disk_ts(ip, ip->i_crtime);
@@ -413,7 +413,7 @@ xfs_dinode_verify(
 
 	/* Verify v3 integrity information first */
 	if (dip->di_version >= 3) {
-		if (!xfs_sb_version_has_v3inode(&mp->m_sb))
+		if (!xfs_has_v3inodes(mp))
 			return __this_address;
 		if (!xfs_verify_cksum((char *)dip, mp->m_sb.sb_inodesize,
 				      XFS_DINODE_CRC_OFF))
@@ -515,7 +515,7 @@ xfs_dinode_verify(
 
 	/* don't allow reflink/cowextsize if we don't have reflink */
 	if ((flags2 & (XFS_DIFLAG2_REFLINK | XFS_DIFLAG2_COWEXTSIZE)) &&
-	     !xfs_sb_version_hasreflink(&mp->m_sb))
+	     !xfs_has_reflink(mp))
 		return __this_address;
 
 	/* only regular files get reflink */
@@ -534,7 +534,7 @@ xfs_dinode_verify(
 
 	/* bigtime iflag can only happen on bigtime filesystems */
 	if (xfs_dinode_has_bigtime(dip) &&
-	    !xfs_sb_version_hasbigtime(&mp->m_sb))
+	    !xfs_has_bigtime(mp))
 		return __this_address;
 
 	return NULL;
@@ -550,7 +550,7 @@ xfs_dinode_calc_crc(
 	if (dip->di_version < 3)
 		return;
 
-	ASSERT(xfs_sb_version_hascrc(&mp->m_sb));
+	ASSERT(xfs_has_crc(mp));
 	crc = xfs_start_cksum_update((char *)dip, mp->m_sb.sb_inodesize,
 			      XFS_DINODE_CRC_OFF);
 	dip->di_crc = xfs_end_cksum(crc);
@@ -677,7 +677,7 @@ xfs_inode_validate_cowextsize(
 	hint_flag = (flags2 & XFS_DIFLAG2_COWEXTSIZE);
 	cowextsize_bytes = XFS_FSB_TO_B(mp, cowextsize);
 
-	if (hint_flag && !xfs_sb_version_hasreflink(&mp->m_sb))
+	if (hint_flag && !xfs_has_reflink(mp))
 		return __this_address;
 
 	if (hint_flag && !(S_ISDIR(mode) || S_ISREG(mode)))
diff --git a/fs/xfs/libxfs/xfs_inode_buf.h b/fs/xfs/libxfs/xfs_inode_buf.h
index 7f865bb4df84..585ed5a110af 100644
--- a/fs/xfs/libxfs/xfs_inode_buf.h
+++ b/fs/xfs/libxfs/xfs_inode_buf.h
@@ -21,7 +21,7 @@ struct xfs_imap {
 
 int	xfs_imap_to_bp(struct xfs_mount *mp, struct xfs_trans *tp,
 		       struct xfs_imap *imap, struct xfs_buf **bpp);
-void	xfs_dinode_calc_crc(struct xfs_mount *, struct xfs_dinode *);
+void	xfs_dinode_calc_crc(struct xfs_mount *mp, struct xfs_dinode *dip);
 void	xfs_inode_to_disk(struct xfs_inode *ip, struct xfs_dinode *to,
 			  xfs_lsn_t lsn);
 int	xfs_inode_from_disk(struct xfs_inode *ip, struct xfs_dinode *from);
@@ -42,4 +42,13 @@ static inline uint64_t xfs_inode_encode_bigtime(struct timespec64 tv)
 struct timespec64 xfs_inode_from_disk_ts(struct xfs_dinode *dip,
 		const xfs_timestamp_t ts);
 
+static inline bool
+xfs_dinode_good_version(struct xfs_mount *mp, uint8_t version)
+{
+	if (xfs_has_v3inodes(mp))
+		return version == 3;
+	return version == 1 || version == 2;
+}
+
+
 #endif	/* __XFS_INODE_BUF_H__ */
diff --git a/fs/xfs/libxfs/xfs_log_format.h b/fs/xfs/libxfs/xfs_log_format.h
index 2c5bcbc19264..b322db523d65 100644
--- a/fs/xfs/libxfs/xfs_log_format.h
+++ b/fs/xfs/libxfs/xfs_log_format.h
@@ -41,10 +41,10 @@ typedef uint32_t xlog_tid_t;
 #define XFS_MIN_LOG_FACTOR	3
 
 #define XLOG_REC_SHIFT(log) \
-	BTOBB(1 << (xfs_sb_version_haslogv2(&log->l_mp->m_sb) ? \
+	BTOBB(1 << (xfs_has_logv2(log->l_mp) ? \
 	 XLOG_MAX_RECORD_BSHIFT : XLOG_BIG_RECORD_BSHIFT))
 #define XLOG_TOTAL_REC_SHIFT(log) \
-	BTOBB(XLOG_MAX_ICLOGS << (xfs_sb_version_haslogv2(&log->l_mp->m_sb) ? \
+	BTOBB(XLOG_MAX_ICLOGS << (xfs_has_logv2(log->l_mp) ? \
 	 XLOG_MAX_RECORD_BSHIFT : XLOG_BIG_RECORD_BSHIFT))
 
 /* get lsn fields */
@@ -434,7 +434,7 @@ struct xfs_log_dinode {
 };
 
 #define xfs_log_dinode_size(mp)						\
-	(xfs_sb_version_has_v3inode(&(mp)->m_sb) ?			\
+	(xfs_has_v3inodes((mp)) ?					\
 		sizeof(struct xfs_log_dinode) :				\
 		offsetof(struct xfs_log_dinode, di_next_unlinked))
 
diff --git a/fs/xfs/libxfs/xfs_log_recover.h b/fs/xfs/libxfs/xfs_log_recover.h
index 3cca2bfe714c..ff69a0000817 100644
--- a/fs/xfs/libxfs/xfs_log_recover.h
+++ b/fs/xfs/libxfs/xfs_log_recover.h
@@ -122,6 +122,8 @@ void xlog_buf_readahead(struct xlog *log, xfs_daddr_t blkno, uint len,
 		const struct xfs_buf_ops *ops);
 bool xlog_is_buffer_cancelled(struct xlog *log, xfs_daddr_t blkno, uint len);
 
+int xlog_recover_iget(struct xfs_mount *mp, xfs_ino_t ino,
+		struct xfs_inode **ipp);
 void xlog_recover_release_intent(struct xlog *log, unsigned short intent_type,
 		uint64_t intent_id);
 
diff --git a/fs/xfs/libxfs/xfs_log_rlimit.c b/fs/xfs/libxfs/xfs_log_rlimit.c
index 7f55eb3f3653..67798ff5e14e 100644
--- a/fs/xfs/libxfs/xfs_log_rlimit.c
+++ b/fs/xfs/libxfs/xfs_log_rlimit.c
@@ -92,7 +92,7 @@ xfs_log_calc_minimum_size(
 	if (tres.tr_logcount > 1)
 		max_logres *= tres.tr_logcount;
 
-	if (xfs_sb_version_haslogv2(&mp->m_sb) && mp->m_sb.sb_logsunit > 1)
+	if (xfs_has_logv2(mp) && mp->m_sb.sb_logsunit > 1)
 		lsunit = BTOBB(mp->m_sb.sb_logsunit);
 
 	/*
diff --git a/fs/xfs/libxfs/xfs_quota_defs.h b/fs/xfs/libxfs/xfs_quota_defs.h
index 0f0af4e35032..a02c5062f9b2 100644
--- a/fs/xfs/libxfs/xfs_quota_defs.h
+++ b/fs/xfs/libxfs/xfs_quota_defs.h
@@ -60,37 +60,15 @@ typedef uint8_t		xfs_dqtype_t;
 #define XFS_DQUOT_LOGRES(mp)	\
 	((sizeof(struct xfs_dq_logformat) + sizeof(struct xfs_disk_dquot)) * 6)
 
-#define XFS_IS_QUOTA_RUNNING(mp)	((mp)->m_qflags & XFS_ALL_QUOTA_ACCT)
-#define XFS_IS_UQUOTA_RUNNING(mp)	((mp)->m_qflags & XFS_UQUOTA_ACCT)
-#define XFS_IS_PQUOTA_RUNNING(mp)	((mp)->m_qflags & XFS_PQUOTA_ACCT)
-#define XFS_IS_GQUOTA_RUNNING(mp)	((mp)->m_qflags & XFS_GQUOTA_ACCT)
+#define XFS_IS_QUOTA_ON(mp)		((mp)->m_qflags & XFS_ALL_QUOTA_ACCT)
+#define XFS_IS_UQUOTA_ON(mp)		((mp)->m_qflags & XFS_UQUOTA_ACCT)
+#define XFS_IS_PQUOTA_ON(mp)		((mp)->m_qflags & XFS_PQUOTA_ACCT)
+#define XFS_IS_GQUOTA_ON(mp)		((mp)->m_qflags & XFS_GQUOTA_ACCT)
 #define XFS_IS_UQUOTA_ENFORCED(mp)	((mp)->m_qflags & XFS_UQUOTA_ENFD)
 #define XFS_IS_GQUOTA_ENFORCED(mp)	((mp)->m_qflags & XFS_GQUOTA_ENFD)
 #define XFS_IS_PQUOTA_ENFORCED(mp)	((mp)->m_qflags & XFS_PQUOTA_ENFD)
 
 /*
- * Incore only flags for quotaoff - these bits get cleared when quota(s)
- * are in the process of getting turned off. These flags are in m_qflags but
- * never in sb_qflags.
- */
-#define XFS_UQUOTA_ACTIVE	0x1000  /* uquotas are being turned off */
-#define XFS_GQUOTA_ACTIVE	0x2000  /* gquotas are being turned off */
-#define XFS_PQUOTA_ACTIVE	0x4000  /* pquotas are being turned off */
-#define XFS_ALL_QUOTA_ACTIVE	\
-	(XFS_UQUOTA_ACTIVE | XFS_GQUOTA_ACTIVE | XFS_PQUOTA_ACTIVE)
-
-/*
- * Checking XFS_IS_*QUOTA_ON() while holding any inode lock guarantees
- * quota will be not be switched off as long as that inode lock is held.
- */
-#define XFS_IS_QUOTA_ON(mp)	((mp)->m_qflags & (XFS_UQUOTA_ACTIVE | \
-						   XFS_GQUOTA_ACTIVE | \
-						   XFS_PQUOTA_ACTIVE))
-#define XFS_IS_UQUOTA_ON(mp)	((mp)->m_qflags & XFS_UQUOTA_ACTIVE)
-#define XFS_IS_GQUOTA_ON(mp)	((mp)->m_qflags & XFS_GQUOTA_ACTIVE)
-#define XFS_IS_PQUOTA_ON(mp)	((mp)->m_qflags & XFS_PQUOTA_ACTIVE)
-
-/*
  * Flags to tell various functions what to do. Not all of these are meaningful
  * to a single function. None of these XFS_QMOPT_* flags are meant to have
  * persistent values (ie. their values can and will change between versions)
diff --git a/fs/xfs/libxfs/xfs_refcount.c b/fs/xfs/libxfs/xfs_refcount.c
index 860a0c9801ba..e5d767a7fc5d 100644
--- a/fs/xfs/libxfs/xfs_refcount.c
+++ b/fs/xfs/libxfs/xfs_refcount.c
@@ -91,7 +91,7 @@ xfs_refcount_lookup_eq(
 /* Convert on-disk record to in-core format. */
 void
 xfs_refcount_btrec_to_irec(
-	union xfs_btree_rec		*rec,
+	const union xfs_btree_rec	*rec,
 	struct xfs_refcount_irec	*irec)
 {
 	irec->rc_startblock = be32_to_cpu(rec->refc.rc_startblock);
@@ -1253,7 +1253,7 @@ xfs_refcount_increase_extent(
 	struct xfs_trans		*tp,
 	struct xfs_bmbt_irec		*PREV)
 {
-	if (!xfs_sb_version_hasreflink(&tp->t_mountp->m_sb))
+	if (!xfs_has_reflink(tp->t_mountp))
 		return;
 
 	__xfs_refcount_add(tp, XFS_REFCOUNT_INCREASE, PREV->br_startblock,
@@ -1268,7 +1268,7 @@ xfs_refcount_decrease_extent(
 	struct xfs_trans		*tp,
 	struct xfs_bmbt_irec		*PREV)
 {
-	if (!xfs_sb_version_hasreflink(&tp->t_mountp->m_sb))
+	if (!xfs_has_reflink(tp->t_mountp))
 		return;
 
 	__xfs_refcount_add(tp, XFS_REFCOUNT_DECREASE, PREV->br_startblock,
@@ -1617,7 +1617,7 @@ xfs_refcount_alloc_cow_extent(
 {
 	struct xfs_mount		*mp = tp->t_mountp;
 
-	if (!xfs_sb_version_hasreflink(&mp->m_sb))
+	if (!xfs_has_reflink(mp))
 		return;
 
 	__xfs_refcount_add(tp, XFS_REFCOUNT_ALLOC_COW, fsb, len);
@@ -1636,7 +1636,7 @@ xfs_refcount_free_cow_extent(
 {
 	struct xfs_mount		*mp = tp->t_mountp;
 
-	if (!xfs_sb_version_hasreflink(&mp->m_sb))
+	if (!xfs_has_reflink(mp))
 		return;
 
 	/* Remove rmap entry */
@@ -1654,7 +1654,7 @@ struct xfs_refcount_recovery {
 STATIC int
 xfs_refcount_recover_extent(
 	struct xfs_btree_cur		*cur,
-	union xfs_btree_rec		*rec,
+	const union xfs_btree_rec	*rec,
 	void				*priv)
 {
 	struct list_head		*debris = priv;
diff --git a/fs/xfs/libxfs/xfs_refcount.h b/fs/xfs/libxfs/xfs_refcount.h
index 9f6e9aae4da0..02cb3aa405be 100644
--- a/fs/xfs/libxfs/xfs_refcount.h
+++ b/fs/xfs/libxfs/xfs_refcount.h
@@ -78,7 +78,7 @@ static inline xfs_fileoff_t xfs_refcount_max_unmap(int log_res)
 extern int xfs_refcount_has_record(struct xfs_btree_cur *cur,
 		xfs_agblock_t bno, xfs_extlen_t len, bool *exists);
 union xfs_btree_rec;
-extern void xfs_refcount_btrec_to_irec(union xfs_btree_rec *rec,
+extern void xfs_refcount_btrec_to_irec(const union xfs_btree_rec *rec,
 		struct xfs_refcount_irec *irec);
 extern int xfs_refcount_insert(struct xfs_btree_cur *cur,
 		struct xfs_refcount_irec *irec, int *stat);
diff --git a/fs/xfs/libxfs/xfs_refcount_btree.c b/fs/xfs/libxfs/xfs_refcount_btree.c
index 92d336c17e83..1ef9b99962ab 100644
--- a/fs/xfs/libxfs/xfs_refcount_btree.c
+++ b/fs/xfs/libxfs/xfs_refcount_btree.c
@@ -31,9 +31,9 @@ xfs_refcountbt_dup_cursor(
 
 STATIC void
 xfs_refcountbt_set_root(
-	struct xfs_btree_cur	*cur,
-	union xfs_btree_ptr	*ptr,
-	int			inc)
+	struct xfs_btree_cur		*cur,
+	const union xfs_btree_ptr	*ptr,
+	int				inc)
 {
 	struct xfs_buf		*agbp = cur->bc_ag.agbp;
 	struct xfs_agf		*agf = agbp->b_addr;
@@ -51,10 +51,10 @@ xfs_refcountbt_set_root(
 
 STATIC int
 xfs_refcountbt_alloc_block(
-	struct xfs_btree_cur	*cur,
-	union xfs_btree_ptr	*start,
-	union xfs_btree_ptr	*new,
-	int			*stat)
+	struct xfs_btree_cur		*cur,
+	const union xfs_btree_ptr	*start,
+	union xfs_btree_ptr		*new,
+	int				*stat)
 {
 	struct xfs_buf		*agbp = cur->bc_ag.agbp;
 	struct xfs_agf		*agf = agbp->b_addr;
@@ -102,7 +102,7 @@ xfs_refcountbt_free_block(
 	struct xfs_mount	*mp = cur->bc_mp;
 	struct xfs_buf		*agbp = cur->bc_ag.agbp;
 	struct xfs_agf		*agf = agbp->b_addr;
-	xfs_fsblock_t		fsbno = XFS_DADDR_TO_FSB(mp, XFS_BUF_ADDR(bp));
+	xfs_fsblock_t		fsbno = XFS_DADDR_TO_FSB(mp, xfs_buf_daddr(bp));
 	int			error;
 
 	trace_xfs_refcountbt_free_block(cur->bc_mp, cur->bc_ag.pag->pag_agno,
@@ -135,18 +135,18 @@ xfs_refcountbt_get_maxrecs(
 
 STATIC void
 xfs_refcountbt_init_key_from_rec(
-	union xfs_btree_key	*key,
-	union xfs_btree_rec	*rec)
+	union xfs_btree_key		*key,
+	const union xfs_btree_rec	*rec)
 {
 	key->refc.rc_startblock = rec->refc.rc_startblock;
 }
 
 STATIC void
 xfs_refcountbt_init_high_key_from_rec(
-	union xfs_btree_key	*key,
-	union xfs_btree_rec	*rec)
+	union xfs_btree_key		*key,
+	const union xfs_btree_rec	*rec)
 {
-	__u32			x;
+	__u32				x;
 
 	x = be32_to_cpu(rec->refc.rc_startblock);
 	x += be32_to_cpu(rec->refc.rc_blockcount) - 1;
@@ -177,20 +177,20 @@ xfs_refcountbt_init_ptr_from_cur(
 
 STATIC int64_t
 xfs_refcountbt_key_diff(
-	struct xfs_btree_cur	*cur,
-	union xfs_btree_key	*key)
+	struct xfs_btree_cur		*cur,
+	const union xfs_btree_key	*key)
 {
 	struct xfs_refcount_irec	*rec = &cur->bc_rec.rc;
-	struct xfs_refcount_key		*kp = &key->refc;
+	const struct xfs_refcount_key	*kp = &key->refc;
 
 	return (int64_t)be32_to_cpu(kp->rc_startblock) - rec->rc_startblock;
 }
 
 STATIC int64_t
 xfs_refcountbt_diff_two_keys(
-	struct xfs_btree_cur	*cur,
-	union xfs_btree_key	*k1,
-	union xfs_btree_key	*k2)
+	struct xfs_btree_cur		*cur,
+	const union xfs_btree_key	*k1,
+	const union xfs_btree_key	*k2)
 {
 	return (int64_t)be32_to_cpu(k1->refc.rc_startblock) -
 			  be32_to_cpu(k2->refc.rc_startblock);
@@ -209,7 +209,7 @@ xfs_refcountbt_verify(
 	if (!xfs_verify_magic(bp, block->bb_magic))
 		return __this_address;
 
-	if (!xfs_sb_version_hasreflink(&mp->m_sb))
+	if (!xfs_has_reflink(mp))
 		return __this_address;
 	fa = xfs_btree_sblock_v5hdr_verify(bp);
 	if (fa)
@@ -269,9 +269,9 @@ const struct xfs_buf_ops xfs_refcountbt_buf_ops = {
 
 STATIC int
 xfs_refcountbt_keys_inorder(
-	struct xfs_btree_cur	*cur,
-	union xfs_btree_key	*k1,
-	union xfs_btree_key	*k2)
+	struct xfs_btree_cur		*cur,
+	const union xfs_btree_key	*k1,
+	const union xfs_btree_key	*k2)
 {
 	return be32_to_cpu(k1->refc.rc_startblock) <
 	       be32_to_cpu(k2->refc.rc_startblock);
@@ -279,9 +279,9 @@ xfs_refcountbt_keys_inorder(
 
 STATIC int
 xfs_refcountbt_recs_inorder(
-	struct xfs_btree_cur	*cur,
-	union xfs_btree_rec	*r1,
-	union xfs_btree_rec	*r2)
+	struct xfs_btree_cur		*cur,
+	const union xfs_btree_rec	*r1,
+	const union xfs_btree_rec	*r2)
 {
 	return  be32_to_cpu(r1->refc.rc_startblock) +
 		be32_to_cpu(r1->refc.rc_blockcount) <=
@@ -462,7 +462,7 @@ xfs_refcountbt_calc_reserves(
 	xfs_extlen_t		tree_len;
 	int			error;
 
-	if (!xfs_sb_version_hasreflink(&mp->m_sb))
+	if (!xfs_has_reflink(mp))
 		return 0;
 
 	error = xfs_alloc_read_agf(mp, tp, pag->pag_agno, 0, &agbp);
diff --git a/fs/xfs/libxfs/xfs_rmap.c b/fs/xfs/libxfs/xfs_rmap.c
index d1dfad0204e3..f45929b1b94a 100644
--- a/fs/xfs/libxfs/xfs_rmap.c
+++ b/fs/xfs/libxfs/xfs_rmap.c
@@ -179,8 +179,8 @@ done:
 /* Convert an internal btree record to an rmap record. */
 int
 xfs_rmap_btrec_to_irec(
-	union xfs_btree_rec	*rec,
-	struct xfs_rmap_irec	*irec)
+	const union xfs_btree_rec	*rec,
+	struct xfs_rmap_irec		*irec)
 {
 	irec->rm_startblock = be32_to_cpu(rec->rmap.rm_startblock);
 	irec->rm_blockcount = be32_to_cpu(rec->rmap.rm_blockcount);
@@ -255,9 +255,9 @@ struct xfs_find_left_neighbor_info {
 /* For each rmap given, figure out if it matches the key we want. */
 STATIC int
 xfs_rmap_find_left_neighbor_helper(
-	struct xfs_btree_cur	*cur,
-	struct xfs_rmap_irec	*rec,
-	void			*priv)
+	struct xfs_btree_cur		*cur,
+	const struct xfs_rmap_irec	*rec,
+	void				*priv)
 {
 	struct xfs_find_left_neighbor_info	*info = priv;
 
@@ -331,9 +331,9 @@ xfs_rmap_find_left_neighbor(
 /* For each rmap given, figure out if it matches the key we want. */
 STATIC int
 xfs_rmap_lookup_le_range_helper(
-	struct xfs_btree_cur	*cur,
-	struct xfs_rmap_irec	*rec,
-	void			*priv)
+	struct xfs_btree_cur		*cur,
+	const struct xfs_rmap_irec	*rec,
+	void				*priv)
 {
 	struct xfs_find_left_neighbor_info	*info = priv;
 
@@ -705,7 +705,7 @@ xfs_rmap_free(
 	struct xfs_btree_cur		*cur;
 	int				error;
 
-	if (!xfs_sb_version_hasrmapbt(&mp->m_sb))
+	if (!xfs_has_rmapbt(mp))
 		return 0;
 
 	cur = xfs_rmapbt_init_cursor(mp, tp, agbp, pag);
@@ -959,7 +959,7 @@ xfs_rmap_alloc(
 	struct xfs_btree_cur		*cur;
 	int				error;
 
-	if (!xfs_sb_version_hasrmapbt(&mp->m_sb))
+	if (!xfs_has_rmapbt(mp))
 		return 0;
 
 	cur = xfs_rmapbt_init_cursor(mp, tp, agbp, pag);
@@ -2278,9 +2278,9 @@ struct xfs_rmap_query_range_info {
 /* Format btree record and pass to our callback. */
 STATIC int
 xfs_rmap_query_range_helper(
-	struct xfs_btree_cur	*cur,
-	union xfs_btree_rec	*rec,
-	void			*priv)
+	struct xfs_btree_cur		*cur,
+	const union xfs_btree_rec	*rec,
+	void				*priv)
 {
 	struct xfs_rmap_query_range_info	*query = priv;
 	struct xfs_rmap_irec			irec;
@@ -2296,8 +2296,8 @@ xfs_rmap_query_range_helper(
 int
 xfs_rmap_query_range(
 	struct xfs_btree_cur			*cur,
-	struct xfs_rmap_irec			*low_rec,
-	struct xfs_rmap_irec			*high_rec,
+	const struct xfs_rmap_irec		*low_rec,
+	const struct xfs_rmap_irec		*high_rec,
 	xfs_rmap_query_range_fn			fn,
 	void					*priv)
 {
@@ -2459,7 +2459,7 @@ xfs_rmap_update_is_needed(
 	struct xfs_mount	*mp,
 	int			whichfork)
 {
-	return xfs_sb_version_hasrmapbt(&mp->m_sb) && whichfork != XFS_COW_FORK;
+	return xfs_has_rmapbt(mp) && whichfork != XFS_COW_FORK;
 }
 
 /*
@@ -2707,7 +2707,7 @@ struct xfs_rmap_key_state {
 STATIC int
 xfs_rmap_has_other_keys_helper(
 	struct xfs_btree_cur		*cur,
-	struct xfs_rmap_irec		*rec,
+	const struct xfs_rmap_irec	*rec,
 	void				*priv)
 {
 	struct xfs_rmap_key_state	*rks = priv;
diff --git a/fs/xfs/libxfs/xfs_rmap.h b/fs/xfs/libxfs/xfs_rmap.h
index f2423cf7f1e2..fd67904ed446 100644
--- a/fs/xfs/libxfs/xfs_rmap.h
+++ b/fs/xfs/libxfs/xfs_rmap.h
@@ -134,12 +134,13 @@ int xfs_rmap_get_rec(struct xfs_btree_cur *cur, struct xfs_rmap_irec *irec,
 		int *stat);
 
 typedef int (*xfs_rmap_query_range_fn)(
-	struct xfs_btree_cur	*cur,
-	struct xfs_rmap_irec	*rec,
-	void			*priv);
+	struct xfs_btree_cur		*cur,
+	const struct xfs_rmap_irec	*rec,
+	void				*priv);
 
 int xfs_rmap_query_range(struct xfs_btree_cur *cur,
-		struct xfs_rmap_irec *low_rec, struct xfs_rmap_irec *high_rec,
+		const struct xfs_rmap_irec *low_rec,
+		const struct xfs_rmap_irec *high_rec,
 		xfs_rmap_query_range_fn fn, void *priv);
 int xfs_rmap_query_all(struct xfs_btree_cur *cur, xfs_rmap_query_range_fn fn,
 		void *priv);
@@ -192,7 +193,7 @@ int xfs_rmap_lookup_le_range(struct xfs_btree_cur *cur, xfs_agblock_t bno,
 int xfs_rmap_compare(const struct xfs_rmap_irec *a,
 		const struct xfs_rmap_irec *b);
 union xfs_btree_rec;
-int xfs_rmap_btrec_to_irec(union xfs_btree_rec *rec,
+int xfs_rmap_btrec_to_irec(const union xfs_btree_rec *rec,
 		struct xfs_rmap_irec *irec);
 int xfs_rmap_has_record(struct xfs_btree_cur *cur, xfs_agblock_t bno,
 		xfs_extlen_t len, bool *exists);
diff --git a/fs/xfs/libxfs/xfs_rmap_btree.c b/fs/xfs/libxfs/xfs_rmap_btree.c
index f29bc71b9950..b7dbbfb3aeed 100644
--- a/fs/xfs/libxfs/xfs_rmap_btree.c
+++ b/fs/xfs/libxfs/xfs_rmap_btree.c
@@ -57,9 +57,9 @@ xfs_rmapbt_dup_cursor(
 
 STATIC void
 xfs_rmapbt_set_root(
-	struct xfs_btree_cur	*cur,
-	union xfs_btree_ptr	*ptr,
-	int			inc)
+	struct xfs_btree_cur		*cur,
+	const union xfs_btree_ptr	*ptr,
+	int				inc)
 {
 	struct xfs_buf		*agbp = cur->bc_ag.agbp;
 	struct xfs_agf		*agf = agbp->b_addr;
@@ -76,10 +76,10 @@ xfs_rmapbt_set_root(
 
 STATIC int
 xfs_rmapbt_alloc_block(
-	struct xfs_btree_cur	*cur,
-	union xfs_btree_ptr	*start,
-	union xfs_btree_ptr	*new,
-	int			*stat)
+	struct xfs_btree_cur		*cur,
+	const union xfs_btree_ptr	*start,
+	union xfs_btree_ptr		*new,
+	int				*stat)
 {
 	struct xfs_buf		*agbp = cur->bc_ag.agbp;
 	struct xfs_agf		*agf = agbp->b_addr;
@@ -122,7 +122,7 @@ xfs_rmapbt_free_block(
 	xfs_agblock_t		bno;
 	int			error;
 
-	bno = xfs_daddr_to_agbno(cur->bc_mp, XFS_BUF_ADDR(bp));
+	bno = xfs_daddr_to_agbno(cur->bc_mp, xfs_buf_daddr(bp));
 	trace_xfs_rmapbt_free_block(cur->bc_mp, pag->pag_agno,
 			bno, 1);
 	be32_add_cpu(&agf->agf_rmap_blocks, -1);
@@ -156,8 +156,8 @@ xfs_rmapbt_get_maxrecs(
 
 STATIC void
 xfs_rmapbt_init_key_from_rec(
-	union xfs_btree_key	*key,
-	union xfs_btree_rec	*rec)
+	union xfs_btree_key		*key,
+	const union xfs_btree_rec	*rec)
 {
 	key->rmap.rm_startblock = rec->rmap.rm_startblock;
 	key->rmap.rm_owner = rec->rmap.rm_owner;
@@ -173,11 +173,11 @@ xfs_rmapbt_init_key_from_rec(
  */
 STATIC void
 xfs_rmapbt_init_high_key_from_rec(
-	union xfs_btree_key	*key,
-	union xfs_btree_rec	*rec)
+	union xfs_btree_key		*key,
+	const union xfs_btree_rec	*rec)
 {
-	uint64_t		off;
-	int			adj;
+	uint64_t			off;
+	int				adj;
 
 	adj = be32_to_cpu(rec->rmap.rm_blockcount) - 1;
 
@@ -219,13 +219,13 @@ xfs_rmapbt_init_ptr_from_cur(
 
 STATIC int64_t
 xfs_rmapbt_key_diff(
-	struct xfs_btree_cur	*cur,
-	union xfs_btree_key	*key)
+	struct xfs_btree_cur		*cur,
+	const union xfs_btree_key	*key)
 {
-	struct xfs_rmap_irec	*rec = &cur->bc_rec.r;
-	struct xfs_rmap_key	*kp = &key->rmap;
-	__u64			x, y;
-	int64_t			d;
+	struct xfs_rmap_irec		*rec = &cur->bc_rec.r;
+	const struct xfs_rmap_key	*kp = &key->rmap;
+	__u64				x, y;
+	int64_t				d;
 
 	d = (int64_t)be32_to_cpu(kp->rm_startblock) - rec->rm_startblock;
 	if (d)
@@ -249,14 +249,14 @@ xfs_rmapbt_key_diff(
 
 STATIC int64_t
 xfs_rmapbt_diff_two_keys(
-	struct xfs_btree_cur	*cur,
-	union xfs_btree_key	*k1,
-	union xfs_btree_key	*k2)
+	struct xfs_btree_cur		*cur,
+	const union xfs_btree_key	*k1,
+	const union xfs_btree_key	*k2)
 {
-	struct xfs_rmap_key	*kp1 = &k1->rmap;
-	struct xfs_rmap_key	*kp2 = &k2->rmap;
-	int64_t			d;
-	__u64			x, y;
+	const struct xfs_rmap_key	*kp1 = &k1->rmap;
+	const struct xfs_rmap_key	*kp2 = &k2->rmap;
+	int64_t				d;
+	__u64				x, y;
 
 	d = (int64_t)be32_to_cpu(kp1->rm_startblock) -
 		       be32_to_cpu(kp2->rm_startblock);
@@ -304,7 +304,7 @@ xfs_rmapbt_verify(
 	if (!xfs_verify_magic(bp, block->bb_magic))
 		return __this_address;
 
-	if (!xfs_sb_version_hasrmapbt(&mp->m_sb))
+	if (!xfs_has_rmapbt(mp))
 		return __this_address;
 	fa = xfs_btree_sblock_v5hdr_verify(bp);
 	if (fa)
@@ -364,9 +364,9 @@ const struct xfs_buf_ops xfs_rmapbt_buf_ops = {
 
 STATIC int
 xfs_rmapbt_keys_inorder(
-	struct xfs_btree_cur	*cur,
-	union xfs_btree_key	*k1,
-	union xfs_btree_key	*k2)
+	struct xfs_btree_cur		*cur,
+	const union xfs_btree_key	*k1,
+	const union xfs_btree_key	*k2)
 {
 	uint32_t		x;
 	uint32_t		y;
@@ -394,9 +394,9 @@ xfs_rmapbt_keys_inorder(
 
 STATIC int
 xfs_rmapbt_recs_inorder(
-	struct xfs_btree_cur	*cur,
-	union xfs_btree_rec	*r1,
-	union xfs_btree_rec	*r2)
+	struct xfs_btree_cur		*cur,
+	const union xfs_btree_rec	*r1,
+	const union xfs_btree_rec	*r2)
 {
 	uint32_t		x;
 	uint32_t		y;
@@ -558,7 +558,7 @@ xfs_rmapbt_compute_maxlevels(
 	 * disallow reflinking when less than 10% of the per-AG metadata
 	 * block reservation since the fallback is a regular file copy.
 	 */
-	if (xfs_sb_version_hasreflink(&mp->m_sb))
+	if (xfs_has_reflink(mp))
 		mp->m_rmap_maxlevels = XFS_BTREE_MAXLEVELS;
 	else
 		mp->m_rmap_maxlevels = xfs_btree_compute_maxlevels(
@@ -606,7 +606,7 @@ xfs_rmapbt_calc_reserves(
 	xfs_extlen_t		tree_len;
 	int			error;
 
-	if (!xfs_sb_version_hasrmapbt(&mp->m_sb))
+	if (!xfs_has_rmapbt(mp))
 		return 0;
 
 	error = xfs_alloc_read_agf(mp, tp, pag->pag_agno, 0, &agbp);
diff --git a/fs/xfs/libxfs/xfs_rmap_btree.h b/fs/xfs/libxfs/xfs_rmap_btree.h
index 88d8d18788a2..f2eee6572af4 100644
--- a/fs/xfs/libxfs/xfs_rmap_btree.h
+++ b/fs/xfs/libxfs/xfs_rmap_btree.h
@@ -59,4 +59,4 @@ extern xfs_extlen_t xfs_rmapbt_max_size(struct xfs_mount *mp,
 extern int xfs_rmapbt_calc_reserves(struct xfs_mount *mp, struct xfs_trans *tp,
 		struct xfs_perag *pag, xfs_extlen_t *ask, xfs_extlen_t *used);
 
-#endif	/* __XFS_RMAP_BTREE_H__ */
+#endif /* __XFS_RMAP_BTREE_H__ */
diff --git a/fs/xfs/libxfs/xfs_rtbitmap.c b/fs/xfs/libxfs/xfs_rtbitmap.c
index 483375c6a735..5740ba664867 100644
--- a/fs/xfs/libxfs/xfs_rtbitmap.c
+++ b/fs/xfs/libxfs/xfs_rtbitmap.c
@@ -1009,8 +1009,8 @@ xfs_rtfree_extent(
 int
 xfs_rtalloc_query_range(
 	struct xfs_trans		*tp,
-	struct xfs_rtalloc_rec		*low_rec,
-	struct xfs_rtalloc_rec		*high_rec,
+	const struct xfs_rtalloc_rec	*low_rec,
+	const struct xfs_rtalloc_rec	*high_rec,
 	xfs_rtalloc_query_range_fn	fn,
 	void				*priv)
 {
@@ -1018,6 +1018,7 @@ xfs_rtalloc_query_range(
 	struct xfs_mount		*mp = tp->t_mountp;
 	xfs_rtblock_t			rtstart;
 	xfs_rtblock_t			rtend;
+	xfs_rtblock_t			high_key;
 	int				is_free;
 	int				error = 0;
 
@@ -1026,12 +1027,12 @@ xfs_rtalloc_query_range(
 	if (low_rec->ar_startext >= mp->m_sb.sb_rextents ||
 	    low_rec->ar_startext == high_rec->ar_startext)
 		return 0;
-	high_rec->ar_startext = min(high_rec->ar_startext,
-			mp->m_sb.sb_rextents - 1);
+
+	high_key = min(high_rec->ar_startext, mp->m_sb.sb_rextents - 1);
 
 	/* Iterate the bitmap, looking for discrepancies. */
 	rtstart = low_rec->ar_startext;
-	while (rtstart <= high_rec->ar_startext) {
+	while (rtstart <= high_key) {
 		/* Is the first block free? */
 		error = xfs_rtcheck_range(mp, tp, rtstart, 1, 1, &rtend,
 				&is_free);
@@ -1039,8 +1040,7 @@ xfs_rtalloc_query_range(
 			break;
 
 		/* How long does the extent go for? */
-		error = xfs_rtfind_forw(mp, tp, rtstart,
-				high_rec->ar_startext, &rtend);
+		error = xfs_rtfind_forw(mp, tp, rtstart, high_key, &rtend);
 		if (error)
 			break;
 
diff --git a/fs/xfs/libxfs/xfs_sb.c b/fs/xfs/libxfs/xfs_sb.c
index 04f5386446db..e58349be78bd 100644
--- a/fs/xfs/libxfs/xfs_sb.c
+++ b/fs/xfs/libxfs/xfs_sb.c
@@ -30,13 +30,110 @@
  * Physical superblock buffer manipulations. Shared with libxfs in userspace.
  */
 
+/*
+ * We support all XFS versions newer than a v4 superblock with V2 directories.
+ */
+bool
+xfs_sb_good_version(
+	struct xfs_sb	*sbp)
+{
+	/* all v5 filesystems are supported */
+	if (xfs_sb_is_v5(sbp))
+		return true;
+
+	/* versions prior to v4 are not supported */
+	if (XFS_SB_VERSION_NUM(sbp) < XFS_SB_VERSION_4)
+		return false;
+
+	/* V4 filesystems need v2 directories and unwritten extents */
+	if (!(sbp->sb_versionnum & XFS_SB_VERSION_DIRV2BIT))
+		return false;
+	if (!(sbp->sb_versionnum & XFS_SB_VERSION_EXTFLGBIT))
+		return false;
+
+	/* And must not have any unknown v4 feature bits set */
+	if ((sbp->sb_versionnum & ~XFS_SB_VERSION_OKBITS) ||
+	    ((sbp->sb_versionnum & XFS_SB_VERSION_MOREBITSBIT) &&
+	     (sbp->sb_features2 & ~XFS_SB_VERSION2_OKBITS)))
+		return false;
+
+	/* It's a supported v4 filesystem */
+	return true;
+}
+
+uint64_t
+xfs_sb_version_to_features(
+	struct xfs_sb	*sbp)
+{
+	uint64_t	features = 0;
+
+	/* optional V4 features */
+	if (sbp->sb_rblocks > 0)
+		features |= XFS_FEAT_REALTIME;
+	if (sbp->sb_versionnum & XFS_SB_VERSION_ATTRBIT)
+		features |= XFS_FEAT_ATTR;
+	if (sbp->sb_versionnum & XFS_SB_VERSION_QUOTABIT)
+		features |= XFS_FEAT_QUOTA;
+	if (sbp->sb_versionnum & XFS_SB_VERSION_ALIGNBIT)
+		features |= XFS_FEAT_ALIGN;
+	if (sbp->sb_versionnum & XFS_SB_VERSION_LOGV2BIT)
+		features |= XFS_FEAT_LOGV2;
+	if (sbp->sb_versionnum & XFS_SB_VERSION_DALIGNBIT)
+		features |= XFS_FEAT_DALIGN;
+	if (sbp->sb_versionnum & XFS_SB_VERSION_EXTFLGBIT)
+		features |= XFS_FEAT_EXTFLG;
+	if (sbp->sb_versionnum & XFS_SB_VERSION_SECTORBIT)
+		features |= XFS_FEAT_SECTOR;
+	if (sbp->sb_versionnum & XFS_SB_VERSION_BORGBIT)
+		features |= XFS_FEAT_ASCIICI;
+	if (sbp->sb_versionnum & XFS_SB_VERSION_MOREBITSBIT) {
+		if (sbp->sb_features2 & XFS_SB_VERSION2_LAZYSBCOUNTBIT)
+			features |= XFS_FEAT_LAZYSBCOUNT;
+		if (sbp->sb_features2 & XFS_SB_VERSION2_ATTR2BIT)
+			features |= XFS_FEAT_ATTR2;
+		if (sbp->sb_features2 & XFS_SB_VERSION2_PROJID32BIT)
+			features |= XFS_FEAT_PROJID32;
+		if (sbp->sb_features2 & XFS_SB_VERSION2_FTYPE)
+			features |= XFS_FEAT_FTYPE;
+	}
+
+	if (!xfs_sb_is_v5(sbp))
+		return features;
+
+	/* Always on V5 features */
+	features |= XFS_FEAT_ALIGN | XFS_FEAT_LOGV2 | XFS_FEAT_EXTFLG |
+		    XFS_FEAT_LAZYSBCOUNT | XFS_FEAT_ATTR2 | XFS_FEAT_PROJID32 |
+		    XFS_FEAT_V3INODES | XFS_FEAT_CRC | XFS_FEAT_PQUOTINO;
+
+	/* Optional V5 features */
+	if (sbp->sb_features_ro_compat & XFS_SB_FEAT_RO_COMPAT_FINOBT)
+		features |= XFS_FEAT_FINOBT;
+	if (sbp->sb_features_ro_compat & XFS_SB_FEAT_RO_COMPAT_RMAPBT)
+		features |= XFS_FEAT_RMAPBT;
+	if (sbp->sb_features_ro_compat & XFS_SB_FEAT_RO_COMPAT_REFLINK)
+		features |= XFS_FEAT_REFLINK;
+	if (sbp->sb_features_ro_compat & XFS_SB_FEAT_RO_COMPAT_INOBTCNT)
+		features |= XFS_FEAT_INOBTCNT;
+	if (sbp->sb_features_incompat & XFS_SB_FEAT_INCOMPAT_FTYPE)
+		features |= XFS_FEAT_FTYPE;
+	if (sbp->sb_features_incompat & XFS_SB_FEAT_INCOMPAT_SPINODES)
+		features |= XFS_FEAT_SPINODES;
+	if (sbp->sb_features_incompat & XFS_SB_FEAT_INCOMPAT_META_UUID)
+		features |= XFS_FEAT_META_UUID;
+	if (sbp->sb_features_incompat & XFS_SB_FEAT_INCOMPAT_BIGTIME)
+		features |= XFS_FEAT_BIGTIME;
+	if (sbp->sb_features_incompat & XFS_SB_FEAT_INCOMPAT_NEEDSREPAIR)
+		features |= XFS_FEAT_NEEDSREPAIR;
+	return features;
+}
+
 /* Check all the superblock fields we care about when reading one in. */
 STATIC int
 xfs_validate_sb_read(
 	struct xfs_mount	*mp,
 	struct xfs_sb		*sbp)
 {
-	if (XFS_SB_VERSION_NUM(sbp) != XFS_SB_VERSION_5)
+	if (!xfs_sb_is_v5(sbp))
 		return 0;
 
 	/*
@@ -56,7 +153,7 @@ xfs_validate_sb_read(
 "Superblock has unknown read-only compatible features (0x%x) enabled.",
 			(sbp->sb_features_ro_compat &
 					XFS_SB_FEAT_RO_COMPAT_UNKNOWN));
-		if (!(mp->m_flags & XFS_MOUNT_RDONLY)) {
+		if (!xfs_is_readonly(mp)) {
 			xfs_warn(mp,
 "Attempted to mount read-only compatible filesystem read-write.");
 			xfs_warn(mp,
@@ -95,7 +192,7 @@ xfs_validate_sb_write(
 	 * secondary superblocks, so allow this usage to continue because
 	 * we never read counters from such superblocks.
 	 */
-	if (XFS_BUF_ADDR(bp) == XFS_SB_DADDR && !sbp->sb_inprogress &&
+	if (xfs_buf_daddr(bp) == XFS_SB_DADDR && !sbp->sb_inprogress &&
 	    (sbp->sb_fdblocks > sbp->sb_dblocks ||
 	     !xfs_verify_icount(mp, sbp->sb_icount) ||
 	     sbp->sb_ifree > sbp->sb_icount)) {
@@ -103,7 +200,7 @@ xfs_validate_sb_write(
 		return -EFSCORRUPTED;
 	}
 
-	if (XFS_SB_VERSION_NUM(sbp) != XFS_SB_VERSION_5)
+	if (!xfs_sb_is_v5(sbp))
 		return 0;
 
 	/*
@@ -162,6 +259,7 @@ xfs_validate_sb_common(
 	struct xfs_dsb		*dsb = bp->b_addr;
 	uint32_t		agcount = 0;
 	uint32_t		rem;
+	bool			has_dalign;
 
 	if (!xfs_verify_magic(bp, dsb->sb_magicnum)) {
 		xfs_warn(mp, "bad magic number");
@@ -173,12 +271,41 @@ xfs_validate_sb_common(
 		return -EWRONGFS;
 	}
 
-	if (xfs_sb_version_has_pquotino(sbp)) {
+	/*
+	 * Validate feature flags and state
+	 */
+	if (xfs_sb_is_v5(sbp)) {
+		if (sbp->sb_blocksize < XFS_MIN_CRC_BLOCKSIZE) {
+			xfs_notice(mp,
+"Block size (%u bytes) too small for Version 5 superblock (minimum %d bytes)",
+				sbp->sb_blocksize, XFS_MIN_CRC_BLOCKSIZE);
+			return -EFSCORRUPTED;
+		}
+
+		/* V5 has a separate project quota inode */
 		if (sbp->sb_qflags & (XFS_OQUOTA_ENFD | XFS_OQUOTA_CHKD)) {
 			xfs_notice(mp,
 			   "Version 5 of Super block has XFS_OQUOTA bits.");
 			return -EFSCORRUPTED;
 		}
+
+		/*
+		 * Full inode chunks must be aligned to inode chunk size when
+		 * sparse inodes are enabled to support the sparse chunk
+		 * allocation algorithm and prevent overlapping inode records.
+		 */
+		if (sbp->sb_features_incompat & XFS_SB_FEAT_INCOMPAT_SPINODES) {
+			uint32_t	align;
+
+			align = XFS_INODES_PER_CHUNK * sbp->sb_inodesize
+					>> sbp->sb_blocklog;
+			if (sbp->sb_inoalignmt != align) {
+				xfs_warn(mp,
+"Inode block alignment (%u) must match chunk size (%u) for sparse inodes.",
+					 sbp->sb_inoalignmt, align);
+				return -EINVAL;
+			}
+		}
 	} else if (sbp->sb_qflags & (XFS_PQUOTA_ENFD | XFS_GQUOTA_ENFD |
 				XFS_PQUOTA_CHKD | XFS_GQUOTA_CHKD)) {
 			xfs_notice(mp,
@@ -186,24 +313,6 @@ xfs_validate_sb_common(
 			return -EFSCORRUPTED;
 	}
 
-	/*
-	 * Full inode chunks must be aligned to inode chunk size when
-	 * sparse inodes are enabled to support the sparse chunk
-	 * allocation algorithm and prevent overlapping inode records.
-	 */
-	if (xfs_sb_version_hassparseinodes(sbp)) {
-		uint32_t	align;
-
-		align = XFS_INODES_PER_CHUNK * sbp->sb_inodesize
-				>> sbp->sb_blocklog;
-		if (sbp->sb_inoalignmt != align) {
-			xfs_warn(mp,
-"Inode block alignment (%u) must match chunk size (%u) for sparse inodes.",
-				 sbp->sb_inoalignmt, align);
-			return -EINVAL;
-		}
-	}
-
 	if (unlikely(
 	    sbp->sb_logstart == 0 && mp->m_logdev_targp == mp->m_ddev_targp)) {
 		xfs_warn(mp,
@@ -303,7 +412,8 @@ xfs_validate_sb_common(
 	 * Either (sb_unit and !hasdalign) or (!sb_unit and hasdalign)
 	 * would imply the image is corrupted.
 	 */
-	if (!!sbp->sb_unit ^ xfs_sb_version_hasdalign(sbp)) {
+	has_dalign = sbp->sb_versionnum & XFS_SB_VERSION_DALIGNBIT;
+	if (!!sbp->sb_unit ^ has_dalign) {
 		xfs_notice(mp, "SB stripe alignment sanity check failed");
 		return -EFSCORRUPTED;
 	}
@@ -312,12 +422,6 @@ xfs_validate_sb_common(
 			XFS_FSB_TO_B(mp, sbp->sb_width), 0, false))
 		return -EFSCORRUPTED;
 
-	if (xfs_sb_version_hascrc(&mp->m_sb) &&
-	    sbp->sb_blocksize < XFS_MIN_CRC_BLOCKSIZE) {
-		xfs_notice(mp, "v5 SB sanity check failed");
-		return -EFSCORRUPTED;
-	}
-
 	/*
 	 * Currently only very few inode sizes are supported.
 	 */
@@ -361,7 +465,7 @@ xfs_sb_quota_from_disk(struct xfs_sb *sbp)
 	 * We need to do these manipilations only if we are working
 	 * with an older version of on-disk superblock.
 	 */
-	if (xfs_sb_version_has_pquotino(sbp))
+	if (xfs_sb_is_v5(sbp))
 		return;
 
 	if (sbp->sb_qflags & XFS_OQUOTA_ENFD)
@@ -454,7 +558,8 @@ __xfs_sb_from_disk(
 	 * sb_meta_uuid is only on disk if it differs from sb_uuid and the
 	 * feature flag is set; if not set we keep it only in memory.
 	 */
-	if (xfs_sb_version_hasmetauuid(to))
+	if (xfs_sb_is_v5(to) &&
+	    (to->sb_features_incompat & XFS_SB_FEAT_INCOMPAT_META_UUID))
 		uuid_copy(&to->sb_meta_uuid, &from->sb_meta_uuid);
 	else
 		uuid_copy(&to->sb_meta_uuid, &from->sb_uuid);
@@ -479,7 +584,12 @@ xfs_sb_quota_to_disk(
 	uint16_t	qflags = from->sb_qflags;
 
 	to->sb_uquotino = cpu_to_be64(from->sb_uquotino);
-	if (xfs_sb_version_has_pquotino(from)) {
+
+	/*
+	 * The in-memory superblock quota state matches the v5 on-disk format so
+	 * just write them out and return
+	 */
+	if (xfs_sb_is_v5(from)) {
 		to->sb_qflags = cpu_to_be16(from->sb_qflags);
 		to->sb_gquotino = cpu_to_be64(from->sb_gquotino);
 		to->sb_pquotino = cpu_to_be64(from->sb_pquotino);
@@ -487,9 +597,9 @@ xfs_sb_quota_to_disk(
 	}
 
 	/*
-	 * The in-core version of sb_qflags do not have XFS_OQUOTA_*
-	 * flags, whereas the on-disk version does.  So, convert incore
-	 * XFS_{PG}QUOTA_* flags to on-disk XFS_OQUOTA_* flags.
+	 * For older superblocks (v4), the in-core version of sb_qflags do not
+	 * have XFS_OQUOTA_* flags, whereas the on-disk version does.  So,
+	 * convert incore XFS_{PG}QUOTA_* flags to on-disk XFS_OQUOTA_* flags.
 	 */
 	qflags &= ~(XFS_PQUOTA_ENFD | XFS_PQUOTA_CHKD |
 			XFS_GQUOTA_ENFD | XFS_GQUOTA_CHKD);
@@ -589,19 +699,20 @@ xfs_sb_to_disk(
 	to->sb_features2 = cpu_to_be32(from->sb_features2);
 	to->sb_bad_features2 = cpu_to_be32(from->sb_bad_features2);
 
-	if (xfs_sb_version_hascrc(from)) {
-		to->sb_features_compat = cpu_to_be32(from->sb_features_compat);
-		to->sb_features_ro_compat =
-				cpu_to_be32(from->sb_features_ro_compat);
-		to->sb_features_incompat =
-				cpu_to_be32(from->sb_features_incompat);
-		to->sb_features_log_incompat =
-				cpu_to_be32(from->sb_features_log_incompat);
-		to->sb_spino_align = cpu_to_be32(from->sb_spino_align);
-		to->sb_lsn = cpu_to_be64(from->sb_lsn);
-		if (xfs_sb_version_hasmetauuid(from))
-			uuid_copy(&to->sb_meta_uuid, &from->sb_meta_uuid);
-	}
+	if (!xfs_sb_is_v5(from))
+		return;
+
+	to->sb_features_compat = cpu_to_be32(from->sb_features_compat);
+	to->sb_features_ro_compat =
+			cpu_to_be32(from->sb_features_ro_compat);
+	to->sb_features_incompat =
+			cpu_to_be32(from->sb_features_incompat);
+	to->sb_features_log_incompat =
+			cpu_to_be32(from->sb_features_log_incompat);
+	to->sb_spino_align = cpu_to_be32(from->sb_spino_align);
+	to->sb_lsn = cpu_to_be64(from->sb_lsn);
+	if (from->sb_features_incompat & XFS_SB_FEAT_INCOMPAT_META_UUID)
+		uuid_copy(&to->sb_meta_uuid, &from->sb_meta_uuid);
 }
 
 /*
@@ -636,8 +747,8 @@ xfs_sb_read_verify(
 
 		if (!xfs_buf_verify_cksum(bp, XFS_SB_CRC_OFF)) {
 			/* Only fail bad secondaries on a known V5 filesystem */
-			if (bp->b_bn == XFS_SB_DADDR ||
-			    xfs_sb_version_hascrc(&mp->m_sb)) {
+			if (xfs_buf_daddr(bp) == XFS_SB_DADDR ||
+			    xfs_has_crc(mp)) {
 				error = -EFSBADCRC;
 				goto out_error;
 			}
@@ -704,7 +815,7 @@ xfs_sb_write_verify(
 	if (error)
 		goto out_error;
 
-	if (!xfs_sb_version_hascrc(&mp->m_sb))
+	if (!xfs_sb_is_v5(&sb))
 		return;
 
 	if (bip)
@@ -801,7 +912,7 @@ xfs_log_sb(
 	 * unclean shutdown, this will be corrected by log recovery rebuilding
 	 * the counters from the AGF block counts.
 	 */
-	if (xfs_sb_version_haslazysbcount(&mp->m_sb)) {
+	if (xfs_has_lazysbcount(mp)) {
 		mp->m_sb.sb_icount = percpu_counter_sum(&mp->m_icount);
 		mp->m_sb.sb_ifree = percpu_counter_sum(&mp->m_ifree);
 		mp->m_sb.sb_fdblocks = percpu_counter_sum(&mp->m_fdblocks);
@@ -950,10 +1061,12 @@ out:
 
 void
 xfs_fs_geometry(
-	struct xfs_sb		*sbp,
+	struct xfs_mount	*mp,
 	struct xfs_fsop_geom	*geo,
 	int			struct_version)
 {
+	struct xfs_sb		*sbp = &mp->m_sb;
+
 	memset(geo, 0, sizeof(struct xfs_fsop_geom));
 
 	geo->blocksize = sbp->sb_blocksize;
@@ -984,51 +1097,51 @@ xfs_fs_geometry(
 	geo->flags = XFS_FSOP_GEOM_FLAGS_NLINK |
 		     XFS_FSOP_GEOM_FLAGS_DIRV2 |
 		     XFS_FSOP_GEOM_FLAGS_EXTFLG;
-	if (xfs_sb_version_hasattr(sbp))
+	if (xfs_has_attr(mp))
 		geo->flags |= XFS_FSOP_GEOM_FLAGS_ATTR;
-	if (xfs_sb_version_hasquota(sbp))
+	if (xfs_has_quota(mp))
 		geo->flags |= XFS_FSOP_GEOM_FLAGS_QUOTA;
-	if (xfs_sb_version_hasalign(sbp))
+	if (xfs_has_align(mp))
 		geo->flags |= XFS_FSOP_GEOM_FLAGS_IALIGN;
-	if (xfs_sb_version_hasdalign(sbp))
+	if (xfs_has_dalign(mp))
 		geo->flags |= XFS_FSOP_GEOM_FLAGS_DALIGN;
-	if (xfs_sb_version_hassector(sbp))
-		geo->flags |= XFS_FSOP_GEOM_FLAGS_SECTOR;
-	if (xfs_sb_version_hasasciici(sbp))
+	if (xfs_has_asciici(mp))
 		geo->flags |= XFS_FSOP_GEOM_FLAGS_DIRV2CI;
-	if (xfs_sb_version_haslazysbcount(sbp))
+	if (xfs_has_lazysbcount(mp))
 		geo->flags |= XFS_FSOP_GEOM_FLAGS_LAZYSB;
-	if (xfs_sb_version_hasattr2(sbp))
+	if (xfs_has_attr2(mp))
 		geo->flags |= XFS_FSOP_GEOM_FLAGS_ATTR2;
-	if (xfs_sb_version_hasprojid32bit(sbp))
+	if (xfs_has_projid32(mp))
 		geo->flags |= XFS_FSOP_GEOM_FLAGS_PROJID32;
-	if (xfs_sb_version_hascrc(sbp))
+	if (xfs_has_crc(mp))
 		geo->flags |= XFS_FSOP_GEOM_FLAGS_V5SB;
-	if (xfs_sb_version_hasftype(sbp))
+	if (xfs_has_ftype(mp))
 		geo->flags |= XFS_FSOP_GEOM_FLAGS_FTYPE;
-	if (xfs_sb_version_hasfinobt(sbp))
+	if (xfs_has_finobt(mp))
 		geo->flags |= XFS_FSOP_GEOM_FLAGS_FINOBT;
-	if (xfs_sb_version_hassparseinodes(sbp))
+	if (xfs_has_sparseinodes(mp))
 		geo->flags |= XFS_FSOP_GEOM_FLAGS_SPINODES;
-	if (xfs_sb_version_hasrmapbt(sbp))
+	if (xfs_has_rmapbt(mp))
 		geo->flags |= XFS_FSOP_GEOM_FLAGS_RMAPBT;
-	if (xfs_sb_version_hasreflink(sbp))
+	if (xfs_has_reflink(mp))
 		geo->flags |= XFS_FSOP_GEOM_FLAGS_REFLINK;
-	if (xfs_sb_version_hasbigtime(sbp))
+	if (xfs_has_bigtime(mp))
 		geo->flags |= XFS_FSOP_GEOM_FLAGS_BIGTIME;
-	if (xfs_sb_version_hasinobtcounts(sbp))
+	if (xfs_has_inobtcounts(mp))
 		geo->flags |= XFS_FSOP_GEOM_FLAGS_INOBTCNT;
-	if (xfs_sb_version_hassector(sbp))
+	if (xfs_has_sector(mp)) {
+		geo->flags |= XFS_FSOP_GEOM_FLAGS_SECTOR;
 		geo->logsectsize = sbp->sb_logsectsize;
-	else
+	} else {
 		geo->logsectsize = BBSIZE;
+	}
 	geo->rtsectsize = sbp->sb_blocksize;
 	geo->dirblocksize = xfs_dir2_dirblock_bytes(sbp);
 
 	if (struct_version < 4)
 		return;
 
-	if (xfs_sb_version_haslogv2(sbp))
+	if (xfs_has_logv2(mp))
 		geo->flags |= XFS_FSOP_GEOM_FLAGS_LOGV2;
 
 	geo->logsunit = sbp->sb_logsunit;
diff --git a/fs/xfs/libxfs/xfs_sb.h b/fs/xfs/libxfs/xfs_sb.h
index 0c1602d9b53d..a5e14740ec9a 100644
--- a/fs/xfs/libxfs/xfs_sb.h
+++ b/fs/xfs/libxfs/xfs_sb.h
@@ -20,11 +20,13 @@ extern void	xfs_sb_mount_common(struct xfs_mount *mp, struct xfs_sb *sbp);
 extern void	xfs_sb_from_disk(struct xfs_sb *to, struct xfs_dsb *from);
 extern void	xfs_sb_to_disk(struct xfs_dsb *to, struct xfs_sb *from);
 extern void	xfs_sb_quota_from_disk(struct xfs_sb *sbp);
+extern bool	xfs_sb_good_version(struct xfs_sb *sbp);
+extern uint64_t	xfs_sb_version_to_features(struct xfs_sb *sbp);
 
 extern int	xfs_update_secondary_sbs(struct xfs_mount *mp);
 
 #define XFS_FS_GEOM_MAX_STRUCT_VER	(4)
-extern void	xfs_fs_geometry(struct xfs_sb *sbp, struct xfs_fsop_geom *geo,
+extern void	xfs_fs_geometry(struct xfs_mount *mp, struct xfs_fsop_geom *geo,
 				int struct_version);
 extern int	xfs_sb_read_secondary(struct xfs_mount *mp,
 				struct xfs_trans *tp, xfs_agnumber_t agno,
diff --git a/fs/xfs/libxfs/xfs_symlink_remote.c b/fs/xfs/libxfs/xfs_symlink_remote.c
index 594bc447a7dd..f0b38f4aba80 100644
--- a/fs/xfs/libxfs/xfs_symlink_remote.c
+++ b/fs/xfs/libxfs/xfs_symlink_remote.c
@@ -42,7 +42,7 @@ xfs_symlink_hdr_set(
 {
 	struct xfs_dsymlink_hdr	*dsl = bp->b_addr;
 
-	if (!xfs_sb_version_hascrc(&mp->m_sb))
+	if (!xfs_has_crc(mp))
 		return 0;
 
 	memset(dsl, 0, sizeof(struct xfs_dsymlink_hdr));
@@ -51,7 +51,7 @@ xfs_symlink_hdr_set(
 	dsl->sl_bytes = cpu_to_be32(size);
 	uuid_copy(&dsl->sl_uuid, &mp->m_sb.sb_meta_uuid);
 	dsl->sl_owner = cpu_to_be64(ino);
-	dsl->sl_blkno = cpu_to_be64(bp->b_bn);
+	dsl->sl_blkno = cpu_to_be64(xfs_buf_daddr(bp));
 	bp->b_ops = &xfs_symlink_buf_ops;
 
 	return sizeof(struct xfs_dsymlink_hdr);
@@ -89,13 +89,13 @@ xfs_symlink_verify(
 	struct xfs_mount	*mp = bp->b_mount;
 	struct xfs_dsymlink_hdr	*dsl = bp->b_addr;
 
-	if (!xfs_sb_version_hascrc(&mp->m_sb))
+	if (!xfs_has_crc(mp))
 		return __this_address;
 	if (!xfs_verify_magic(bp, dsl->sl_magic))
 		return __this_address;
 	if (!uuid_equal(&dsl->sl_uuid, &mp->m_sb.sb_meta_uuid))
 		return __this_address;
-	if (bp->b_bn != be64_to_cpu(dsl->sl_blkno))
+	if (xfs_buf_daddr(bp) != be64_to_cpu(dsl->sl_blkno))
 		return __this_address;
 	if (be32_to_cpu(dsl->sl_offset) +
 				be32_to_cpu(dsl->sl_bytes) >= XFS_SYMLINK_MAXLEN)
@@ -116,7 +116,7 @@ xfs_symlink_read_verify(
 	xfs_failaddr_t	fa;
 
 	/* no verification of non-crc buffers */
-	if (!xfs_sb_version_hascrc(&mp->m_sb))
+	if (!xfs_has_crc(mp))
 		return;
 
 	if (!xfs_buf_verify_cksum(bp, XFS_SYMLINK_CRC_OFF))
@@ -137,7 +137,7 @@ xfs_symlink_write_verify(
 	xfs_failaddr_t		fa;
 
 	/* no verification of non-crc buffers */
-	if (!xfs_sb_version_hascrc(&mp->m_sb))
+	if (!xfs_has_crc(mp))
 		return;
 
 	fa = xfs_symlink_verify(bp);
@@ -173,7 +173,7 @@ xfs_symlink_local_to_remote(
 
 	xfs_trans_buf_set_type(tp, bp, XFS_BLFT_SYMLINK_BUF);
 
-	if (!xfs_sb_version_hascrc(&mp->m_sb)) {
+	if (!xfs_has_crc(mp)) {
 		bp->b_ops = NULL;
 		memcpy(bp->b_addr, ifp->if_u1.if_data, ifp->if_bytes);
 		xfs_trans_log_buf(tp, bp, 0, ifp->if_bytes - 1);
diff --git a/fs/xfs/libxfs/xfs_trans_inode.c b/fs/xfs/libxfs/xfs_trans_inode.c
index 16f723ebe8dd..8b5547073379 100644
--- a/fs/xfs/libxfs/xfs_trans_inode.c
+++ b/fs/xfs/libxfs/xfs_trans_inode.c
@@ -136,7 +136,7 @@ xfs_trans_log_inode(
 	 * to upgrade this inode to bigtime format, do so now.
 	 */
 	if ((flags & (XFS_ILOG_CORE | XFS_ILOG_TIMESTAMP)) &&
-	    xfs_sb_version_hasbigtime(&ip->i_mount->m_sb) &&
+	    xfs_has_bigtime(ip->i_mount) &&
 	    !xfs_inode_has_bigtime(ip)) {
 		ip->i_diflags2 |= XFS_DIFLAG2_BIGTIME;
 		flags |= XFS_ILOG_CORE;
diff --git a/fs/xfs/libxfs/xfs_trans_resv.c b/fs/xfs/libxfs/xfs_trans_resv.c
index d1a0848cb52e..5e300daa2559 100644
--- a/fs/xfs/libxfs/xfs_trans_resv.c
+++ b/fs/xfs/libxfs/xfs_trans_resv.c
@@ -71,9 +71,9 @@ xfs_allocfree_log_count(
 	uint		blocks;
 
 	blocks = num_ops * 2 * (2 * mp->m_ag_maxlevels - 1);
-	if (xfs_sb_version_hasrmapbt(&mp->m_sb))
+	if (xfs_has_rmapbt(mp))
 		blocks += num_ops * (2 * mp->m_rmap_maxlevels - 1);
-	if (xfs_sb_version_hasreflink(&mp->m_sb))
+	if (xfs_has_reflink(mp))
 		blocks += num_ops * (2 * mp->m_refc_maxlevels - 1);
 
 	return blocks;
@@ -155,7 +155,7 @@ STATIC uint
 xfs_calc_finobt_res(
 	struct xfs_mount	*mp)
 {
-	if (!xfs_sb_version_hasfinobt(&mp->m_sb))
+	if (!xfs_has_finobt(mp))
 		return 0;
 
 	return xfs_calc_inobt_res(mp);
@@ -187,7 +187,7 @@ xfs_calc_inode_chunk_res(
 			       XFS_FSB_TO_B(mp, 1));
 	if (alloc) {
 		/* icreate tx uses ordered buffers */
-		if (xfs_sb_version_has_v3inode(&mp->m_sb))
+		if (xfs_has_v3inodes(mp))
 			return res;
 		size = XFS_FSB_TO_B(mp, 1);
 	}
@@ -268,7 +268,7 @@ xfs_calc_write_reservation(
 	     xfs_calc_buf_res(3, mp->m_sb.sb_sectsize) +
 	     xfs_calc_buf_res(xfs_allocfree_log_count(mp, 2), blksz);
 
-	if (xfs_sb_version_hasrealtime(&mp->m_sb)) {
+	if (xfs_has_realtime(mp)) {
 		t2 = xfs_calc_inode_res(mp, 1) +
 		     xfs_calc_buf_res(XFS_BM_MAXLEVELS(mp, XFS_DATA_FORK),
 				     blksz) +
@@ -317,7 +317,7 @@ xfs_calc_itruncate_reservation(
 	t2 = xfs_calc_buf_res(9, mp->m_sb.sb_sectsize) +
 	     xfs_calc_buf_res(xfs_allocfree_log_count(mp, 4), blksz);
 
-	if (xfs_sb_version_hasrealtime(&mp->m_sb)) {
+	if (xfs_has_realtime(mp)) {
 		t3 = xfs_calc_buf_res(5, mp->m_sb.sb_sectsize) +
 		     xfs_calc_buf_res(xfs_rtalloc_log_count(mp, 2), blksz) +
 		     xfs_calc_buf_res(xfs_allocfree_log_count(mp, 2), blksz);
@@ -799,29 +799,6 @@ xfs_calc_qm_dqalloc_reservation(
 }
 
 /*
- * Turning off quotas.
- *    the quota off logitems: sizeof(struct xfs_qoff_logitem) * 2
- *    the superblock for the quota flags: sector size
- */
-STATIC uint
-xfs_calc_qm_quotaoff_reservation(
-	struct xfs_mount	*mp)
-{
-	return sizeof(struct xfs_qoff_logitem) * 2 +
-		xfs_calc_buf_res(1, mp->m_sb.sb_sectsize);
-}
-
-/*
- * End of turning off quotas.
- *    the quota off logitems: sizeof(struct xfs_qoff_logitem) * 2
- */
-STATIC uint
-xfs_calc_qm_quotaoff_end_reservation(void)
-{
-	return sizeof(struct xfs_qoff_logitem) * 2;
-}
-
-/*
  * Syncing the incore super block changes to disk.
  *     the super block to reflect the changes: sector size
  */
@@ -842,14 +819,14 @@ xfs_trans_resv_calc(
 	 * require a permanent reservation on space.
 	 */
 	resp->tr_write.tr_logres = xfs_calc_write_reservation(mp);
-	if (xfs_sb_version_hasreflink(&mp->m_sb))
+	if (xfs_has_reflink(mp))
 		resp->tr_write.tr_logcount = XFS_WRITE_LOG_COUNT_REFLINK;
 	else
 		resp->tr_write.tr_logcount = XFS_WRITE_LOG_COUNT;
 	resp->tr_write.tr_logflags |= XFS_TRANS_PERM_LOG_RES;
 
 	resp->tr_itruncate.tr_logres = xfs_calc_itruncate_reservation(mp);
-	if (xfs_sb_version_hasreflink(&mp->m_sb))
+	if (xfs_has_reflink(mp))
 		resp->tr_itruncate.tr_logcount =
 				XFS_ITRUNCATE_LOG_COUNT_REFLINK;
 	else
@@ -910,7 +887,7 @@ xfs_trans_resv_calc(
 	resp->tr_growrtalloc.tr_logflags |= XFS_TRANS_PERM_LOG_RES;
 
 	resp->tr_qm_dqalloc.tr_logres = xfs_calc_qm_dqalloc_reservation(mp);
-	if (xfs_sb_version_hasreflink(&mp->m_sb))
+	if (xfs_has_reflink(mp))
 		resp->tr_qm_dqalloc.tr_logcount = XFS_WRITE_LOG_COUNT_REFLINK;
 	else
 		resp->tr_qm_dqalloc.tr_logcount = XFS_WRITE_LOG_COUNT;
@@ -923,13 +900,6 @@ xfs_trans_resv_calc(
 	resp->tr_qm_setqlim.tr_logres = xfs_calc_qm_setqlim_reservation();
 	resp->tr_qm_setqlim.tr_logcount = XFS_DEFAULT_LOG_COUNT;
 
-	resp->tr_qm_quotaoff.tr_logres = xfs_calc_qm_quotaoff_reservation(mp);
-	resp->tr_qm_quotaoff.tr_logcount = XFS_DEFAULT_LOG_COUNT;
-
-	resp->tr_qm_equotaoff.tr_logres =
-		xfs_calc_qm_quotaoff_end_reservation();
-	resp->tr_qm_equotaoff.tr_logcount = XFS_DEFAULT_LOG_COUNT;
-
 	resp->tr_sb.tr_logres = xfs_calc_sb_reservation(mp);
 	resp->tr_sb.tr_logcount = XFS_DEFAULT_LOG_COUNT;
 
diff --git a/fs/xfs/libxfs/xfs_trans_resv.h b/fs/xfs/libxfs/xfs_trans_resv.h
index 7241ab28cf84..fc4e9b369a3a 100644
--- a/fs/xfs/libxfs/xfs_trans_resv.h
+++ b/fs/xfs/libxfs/xfs_trans_resv.h
@@ -46,8 +46,6 @@ struct xfs_trans_resv {
 	struct xfs_trans_res	tr_growrtfree;	/* grow realtime freeing */
 	struct xfs_trans_res	tr_qm_setqlim;	/* adjust quota limits */
 	struct xfs_trans_res	tr_qm_dqalloc;	/* allocate quota on disk */
-	struct xfs_trans_res	tr_qm_quotaoff;	/* turn quota off */
-	struct xfs_trans_res	tr_qm_equotaoff;/* end of turn quota off */
 	struct xfs_trans_res	tr_sb;		/* modify superblock */
 	struct xfs_trans_res	tr_fsyncts;	/* update timestamps on fsync */
 };
diff --git a/fs/xfs/libxfs/xfs_trans_space.h b/fs/xfs/libxfs/xfs_trans_space.h
index 7ad3659c5d2a..50332be34388 100644
--- a/fs/xfs/libxfs/xfs_trans_space.h
+++ b/fs/xfs/libxfs/xfs_trans_space.h
@@ -57,8 +57,7 @@
 	XFS_DAREMOVE_SPACE_RES(mp, XFS_DATA_FORK)
 #define	XFS_IALLOC_SPACE_RES(mp)	\
 	(M_IGEO(mp)->ialloc_blks + \
-	 ((xfs_sb_version_hasfinobt(&mp->m_sb) ? 2 : 1) * \
-	  M_IGEO(mp)->inobt_maxlevels))
+	 ((xfs_has_finobt(mp) ? 2 : 1) * M_IGEO(mp)->inobt_maxlevels))
 
 /*
  * Space reservation values for various transactions.
@@ -94,8 +93,7 @@
 #define	XFS_SYMLINK_SPACE_RES(mp,nl,b)	\
 	(XFS_IALLOC_SPACE_RES(mp) + XFS_DIRENTER_SPACE_RES(mp,nl) + (b))
 #define XFS_IFREE_SPACE_RES(mp)		\
-	(xfs_sb_version_hasfinobt(&mp->m_sb) ? \
-			M_IGEO(mp)->inobt_maxlevels : 0)
+	(xfs_has_finobt(mp) ? M_IGEO(mp)->inobt_maxlevels : 0)
 
 
 #endif	/* __XFS_TRANS_SPACE_H__ */
diff --git a/fs/xfs/libxfs/xfs_types.c b/fs/xfs/libxfs/xfs_types.c
index e8f4abee7892..e810d23f2d97 100644
--- a/fs/xfs/libxfs/xfs_types.c
+++ b/fs/xfs/libxfs/xfs_types.c
@@ -169,7 +169,7 @@ xfs_internal_inum(
 	xfs_ino_t		ino)
 {
 	return ino == mp->m_sb.sb_rbmino || ino == mp->m_sb.sb_rsumino ||
-		(xfs_sb_version_hasquota(&mp->m_sb) &&
+		(xfs_has_quota(mp) &&
 		 xfs_is_quota_inode(&mp->m_sb, ino));
 }
 
diff --git a/fs/xfs/libxfs/xfs_types.h b/fs/xfs/libxfs/xfs_types.h
index 0870ef6f933d..b6da06b40989 100644
--- a/fs/xfs/libxfs/xfs_types.h
+++ b/fs/xfs/libxfs/xfs_types.h
@@ -87,6 +87,11 @@ typedef void *		xfs_failaddr_t;
 #define	XFS_ATTR_FORK	1
 #define	XFS_COW_FORK	2
 
+#define XFS_WHICHFORK_STRINGS \
+	{ XFS_DATA_FORK, 	"data" }, \
+	{ XFS_ATTR_FORK,	"attr" }, \
+	{ XFS_COW_FORK,		"cow" }
+
 /*
  * Min numbers of data/attr fork btree root pointers.
  */
diff --git a/fs/xfs/scrub/agheader.c b/fs/xfs/scrub/agheader.c
index be1a7e1e65f7..ae3c9f6e2c69 100644
--- a/fs/xfs/scrub/agheader.c
+++ b/fs/xfs/scrub/agheader.c
@@ -36,7 +36,7 @@ xchk_superblock_xref(
 
 	agbno = XFS_SB_BLOCK(mp);
 
-	error = xchk_ag_init(sc, agno, &sc->sa);
+	error = xchk_ag_init_existing(sc, agno, &sc->sa);
 	if (!xchk_xref_process_error(sc, agno, agbno, &error))
 		return;
 
@@ -63,6 +63,7 @@ xchk_superblock(
 	struct xfs_mount	*mp = sc->mp;
 	struct xfs_buf		*bp;
 	struct xfs_dsb		*sb;
+	struct xfs_perag	*pag;
 	xfs_agnumber_t		agno;
 	uint32_t		v2_ok;
 	__be32			features_mask;
@@ -73,6 +74,15 @@ xchk_superblock(
 	if (agno == 0)
 		return 0;
 
+	/*
+	 * Grab an active reference to the perag structure.  If we can't get
+	 * it, we're racing with something that's tearing down the AG, so
+	 * signal that the AG no longer exists.
+	 */
+	pag = xfs_perag_get(mp, agno);
+	if (!pag)
+		return -ENOENT;
+
 	error = xfs_sb_read_secondary(mp, sc->tp, agno, &bp);
 	/*
 	 * The superblock verifier can return several different error codes
@@ -92,7 +102,7 @@ xchk_superblock(
 		break;
 	}
 	if (!xchk_process_error(sc, agno, XFS_SB_BLOCK(mp), &error))
-		return error;
+		goto out_pag;
 
 	sb = bp->b_addr;
 
@@ -248,7 +258,7 @@ xchk_superblock(
 			xchk_block_set_corrupt(sc, bp);
 	} else {
 		v2_ok = XFS_SB_VERSION2_OKBITS;
-		if (XFS_SB_VERSION_NUM(&mp->m_sb) >= XFS_SB_VERSION_5)
+		if (xfs_sb_is_v5(&mp->m_sb))
 			v2_ok |= XFS_SB_VERSION2_CRCBIT;
 
 		if (!!(sb->sb_features2 & cpu_to_be32(~v2_ok)))
@@ -273,7 +283,7 @@ xchk_superblock(
 	    (cpu_to_be32(mp->m_sb.sb_features2) & features_mask))
 		xchk_block_set_corrupt(sc, bp);
 
-	if (!xfs_sb_version_hascrc(&mp->m_sb)) {
+	if (!xfs_has_crc(mp)) {
 		/* all v5 fields must be zero */
 		if (memchr_inv(&sb->sb_features_compat, 0,
 				sizeof(struct xfs_dsb) -
@@ -324,7 +334,7 @@ xchk_superblock(
 		/* Don't care about sb_lsn */
 	}
 
-	if (xfs_sb_version_hasmetauuid(&mp->m_sb)) {
+	if (xfs_has_metauuid(mp)) {
 		/* The metadata UUID must be the same for all supers */
 		if (!uuid_equal(&sb->sb_meta_uuid, &mp->m_sb.sb_meta_uuid))
 			xchk_block_set_corrupt(sc, bp);
@@ -336,7 +346,8 @@ xchk_superblock(
 		xchk_block_set_corrupt(sc, bp);
 
 	xchk_superblock_xref(sc, bp);
-
+out_pag:
+	xfs_perag_put(pag);
 	return error;
 }
 
@@ -346,7 +357,7 @@ xchk_superblock(
 STATIC int
 xchk_agf_record_bno_lengths(
 	struct xfs_btree_cur		*cur,
-	struct xfs_alloc_rec_incore	*rec,
+	const struct xfs_alloc_rec_incore *rec,
 	void				*priv)
 {
 	xfs_extlen_t			*blocks = priv;
@@ -419,7 +430,7 @@ xchk_agf_xref_btreeblks(
 	int			error;
 
 	/* agf_btreeblks didn't exist before lazysbcount */
-	if (!xfs_sb_version_haslazysbcount(&sc->mp->m_sb))
+	if (!xfs_has_lazysbcount(sc->mp))
 		return;
 
 	/* Check agf_rmap_blocks; set up for agf_btreeblks check */
@@ -438,7 +449,7 @@ xchk_agf_xref_btreeblks(
 	 * No rmap cursor; we can't xref if we have the rmapbt feature.
 	 * We also can't do it if we're missing the free space btree cursors.
 	 */
-	if ((xfs_sb_version_hasrmapbt(&mp->m_sb) && !sc->sa.rmap_cur) ||
+	if ((xfs_has_rmapbt(mp) && !sc->sa.rmap_cur) ||
 	    !sc->sa.bno_cur || !sc->sa.cnt_cur)
 		return;
 
@@ -527,6 +538,7 @@ xchk_agf(
 	xchk_buffer_recheck(sc, sc->sa.agf_bp);
 
 	agf = sc->sa.agf_bp->b_addr;
+	pag = sc->sa.pag;
 
 	/* Check the AG length */
 	eoag = be32_to_cpu(agf->agf_length);
@@ -550,7 +562,7 @@ xchk_agf(
 	if (level <= 0 || level > XFS_BTREE_MAXLEVELS)
 		xchk_block_set_corrupt(sc, sc->sa.agf_bp);
 
-	if (xfs_sb_version_hasrmapbt(&mp->m_sb)) {
+	if (xfs_has_rmapbt(mp)) {
 		agbno = be32_to_cpu(agf->agf_roots[XFS_BTNUM_RMAP]);
 		if (!xfs_verify_agbno(mp, agno, agbno))
 			xchk_block_set_corrupt(sc, sc->sa.agf_bp);
@@ -560,7 +572,7 @@ xchk_agf(
 			xchk_block_set_corrupt(sc, sc->sa.agf_bp);
 	}
 
-	if (xfs_sb_version_hasreflink(&mp->m_sb)) {
+	if (xfs_has_reflink(mp)) {
 		agbno = be32_to_cpu(agf->agf_refcount_root);
 		if (!xfs_verify_agbno(mp, agno, agbno))
 			xchk_block_set_corrupt(sc, sc->sa.agf_bp);
@@ -582,15 +594,13 @@ xchk_agf(
 		xchk_block_set_corrupt(sc, sc->sa.agf_bp);
 
 	/* Do the incore counters match? */
-	pag = xfs_perag_get(mp, agno);
 	if (pag->pagf_freeblks != be32_to_cpu(agf->agf_freeblks))
 		xchk_block_set_corrupt(sc, sc->sa.agf_bp);
 	if (pag->pagf_flcount != be32_to_cpu(agf->agf_flcount))
 		xchk_block_set_corrupt(sc, sc->sa.agf_bp);
-	if (xfs_sb_version_haslazysbcount(&sc->mp->m_sb) &&
+	if (xfs_has_lazysbcount(sc->mp) &&
 	    pag->pagf_btreeblks != be32_to_cpu(agf->agf_btreeblks))
 		xchk_block_set_corrupt(sc, sc->sa.agf_bp);
-	xfs_perag_put(pag);
 
 	xchk_agf_xref(sc);
 out:
@@ -630,7 +640,7 @@ xchk_agfl_block(
 {
 	struct xchk_agfl_info	*sai = priv;
 	struct xfs_scrub	*sc = sai->sc;
-	xfs_agnumber_t		agno = sc->sa.agno;
+	xfs_agnumber_t		agno = sc->sa.pag->pag_agno;
 
 	if (xfs_verify_agbno(mp, agno, agbno) &&
 	    sai->nr_entries < sai->sz_entries)
@@ -787,7 +797,7 @@ xchk_agi_xref_fiblocks(
 	xfs_agblock_t		blocks;
 	int			error = 0;
 
-	if (!xfs_sb_version_hasinobtcounts(&sc->mp->m_sb))
+	if (!xfs_has_inobtcounts(sc->mp))
 		return;
 
 	if (sc->sa.ino_cur) {
@@ -857,6 +867,7 @@ xchk_agi(
 	xchk_buffer_recheck(sc, sc->sa.agi_bp);
 
 	agi = sc->sa.agi_bp->b_addr;
+	pag = sc->sa.pag;
 
 	/* Check the AG length */
 	eoag = be32_to_cpu(agi->agi_length);
@@ -872,7 +883,7 @@ xchk_agi(
 	if (level <= 0 || level > XFS_BTREE_MAXLEVELS)
 		xchk_block_set_corrupt(sc, sc->sa.agi_bp);
 
-	if (xfs_sb_version_hasfinobt(&mp->m_sb)) {
+	if (xfs_has_finobt(mp)) {
 		agbno = be32_to_cpu(agi->agi_free_root);
 		if (!xfs_verify_agbno(mp, agno, agbno))
 			xchk_block_set_corrupt(sc, sc->sa.agi_bp);
@@ -909,12 +920,10 @@ xchk_agi(
 		xchk_block_set_corrupt(sc, sc->sa.agi_bp);
 
 	/* Do the incore counters match? */
-	pag = xfs_perag_get(mp, agno);
 	if (pag->pagi_count != be32_to_cpu(agi->agi_count))
 		xchk_block_set_corrupt(sc, sc->sa.agi_bp);
 	if (pag->pagi_freecount != be32_to_cpu(agi->agi_freecount))
 		xchk_block_set_corrupt(sc, sc->sa.agi_bp);
-	xfs_perag_put(pag);
 
 	xchk_agi_xref(sc);
 out:
diff --git a/fs/xfs/scrub/agheader_repair.c b/fs/xfs/scrub/agheader_repair.c
index e95f8c98f0f7..0f8deee66f15 100644
--- a/fs/xfs/scrub/agheader_repair.c
+++ b/fs/xfs/scrub/agheader_repair.c
@@ -70,7 +70,7 @@ struct xrep_agf_allocbt {
 STATIC int
 xrep_agf_walk_allocbt(
 	struct xfs_btree_cur		*cur,
-	struct xfs_alloc_rec_incore	*rec,
+	const struct xfs_alloc_rec_incore *rec,
 	void				*priv)
 {
 	struct xrep_agf_allocbt		*raa = priv;
@@ -94,7 +94,7 @@ xrep_agf_check_agfl_block(
 {
 	struct xfs_scrub	*sc = priv;
 
-	if (!xfs_verify_agbno(mp, sc->sa.agno, agbno))
+	if (!xfs_verify_agbno(mp, sc->sa.pag->pag_agno, agbno))
 		return -EFSCORRUPTED;
 	return 0;
 }
@@ -164,7 +164,7 @@ xrep_agf_find_btrees(
 		return -EFSCORRUPTED;
 
 	/* We must find the refcountbt root if that feature is enabled. */
-	if (xfs_sb_version_hasreflink(&sc->mp->m_sb) &&
+	if (xfs_has_reflink(sc->mp) &&
 	    !xrep_check_btree_root(sc, &fab[XREP_AGF_REFCOUNTBT]))
 		return -EFSCORRUPTED;
 
@@ -188,12 +188,13 @@ xrep_agf_init_header(
 	memset(agf, 0, BBTOB(agf_bp->b_length));
 	agf->agf_magicnum = cpu_to_be32(XFS_AGF_MAGIC);
 	agf->agf_versionnum = cpu_to_be32(XFS_AGF_VERSION);
-	agf->agf_seqno = cpu_to_be32(sc->sa.agno);
-	agf->agf_length = cpu_to_be32(xfs_ag_block_count(mp, sc->sa.agno));
+	agf->agf_seqno = cpu_to_be32(sc->sa.pag->pag_agno);
+	agf->agf_length = cpu_to_be32(xfs_ag_block_count(mp,
+							sc->sa.pag->pag_agno));
 	agf->agf_flfirst = old_agf->agf_flfirst;
 	agf->agf_fllast = old_agf->agf_fllast;
 	agf->agf_flcount = old_agf->agf_flcount;
-	if (xfs_sb_version_hascrc(&mp->m_sb))
+	if (xfs_has_crc(mp))
 		uuid_copy(&agf->agf_uuid, &mp->m_sb.sb_meta_uuid);
 
 	/* Mark the incore AGF data stale until we're done fixing things. */
@@ -223,7 +224,7 @@ xrep_agf_set_roots(
 	agf->agf_levels[XFS_BTNUM_RMAPi] =
 			cpu_to_be32(fab[XREP_AGF_RMAPBT].height);
 
-	if (xfs_sb_version_hasreflink(&sc->mp->m_sb)) {
+	if (xfs_has_reflink(sc->mp)) {
 		agf->agf_refcount_root =
 				cpu_to_be32(fab[XREP_AGF_REFCOUNTBT].root);
 		agf->agf_refcount_level =
@@ -280,7 +281,7 @@ xrep_agf_calc_from_btrees(
 	agf->agf_btreeblks = cpu_to_be32(btreeblks);
 
 	/* Update the AGF counters from the refcountbt. */
-	if (xfs_sb_version_hasreflink(&mp->m_sb)) {
+	if (xfs_has_reflink(mp)) {
 		cur = xfs_refcountbt_init_cursor(mp, sc->tp, agf_bp,
 				sc->sa.pag);
 		error = xfs_btree_count_blocks(cur, &blocks);
@@ -363,16 +364,16 @@ xrep_agf(
 	int				error;
 
 	/* We require the rmapbt to rebuild anything. */
-	if (!xfs_sb_version_hasrmapbt(&mp->m_sb))
+	if (!xfs_has_rmapbt(mp))
 		return -EOPNOTSUPP;
 
-	xchk_perag_get(sc->mp, &sc->sa);
 	/*
 	 * Make sure we have the AGF buffer, as scrub might have decided it
 	 * was corrupt after xfs_alloc_read_agf failed with -EFSCORRUPTED.
 	 */
 	error = xfs_trans_read_buf(mp, sc->tp, mp->m_ddev_targp,
-			XFS_AG_DADDR(mp, sc->sa.agno, XFS_AGF_DADDR(mp)),
+			XFS_AG_DADDR(mp, sc->sa.pag->pag_agno,
+						XFS_AGF_DADDR(mp)),
 			XFS_FSS_TO_BB(mp, 1), 0, &agf_bp, NULL);
 	if (error)
 		return error;
@@ -388,7 +389,7 @@ xrep_agf(
 	 * btrees rooted in the AGF.  If the AGFL contents are obviously bad
 	 * then we'll bail out.
 	 */
-	error = xfs_alloc_read_agfl(mp, sc->tp, sc->sa.agno, &agfl_bp);
+	error = xfs_alloc_read_agfl(mp, sc->tp, sc->sa.pag->pag_agno, &agfl_bp);
 	if (error)
 		return error;
 
@@ -442,7 +443,7 @@ struct xrep_agfl {
 STATIC int
 xrep_agfl_walk_rmap(
 	struct xfs_btree_cur	*cur,
-	struct xfs_rmap_irec	*rec,
+	const struct xfs_rmap_irec *rec,
 	void			*priv)
 {
 	struct xrep_agfl	*ra = priv;
@@ -586,7 +587,7 @@ xrep_agfl_init_header(
 	agfl = XFS_BUF_TO_AGFL(agfl_bp);
 	memset(agfl, 0xFF, BBTOB(agfl_bp->b_length));
 	agfl->agfl_magicnum = cpu_to_be32(XFS_AGFL_MAGIC);
-	agfl->agfl_seqno = cpu_to_be32(sc->sa.agno);
+	agfl->agfl_seqno = cpu_to_be32(sc->sa.pag->pag_agno);
 	uuid_copy(&agfl->agfl_uuid, &mp->m_sb.sb_meta_uuid);
 
 	/*
@@ -599,7 +600,8 @@ xrep_agfl_init_header(
 	for_each_xbitmap_extent(br, n, agfl_extents) {
 		agbno = XFS_FSB_TO_AGBNO(mp, br->start);
 
-		trace_xrep_agfl_insert(mp, sc->sa.agno, agbno, br->len);
+		trace_xrep_agfl_insert(mp, sc->sa.pag->pag_agno, agbno,
+				br->len);
 
 		while (br->len > 0 && fl_off < flcount) {
 			agfl_bno[fl_off] = cpu_to_be32(agbno);
@@ -638,10 +640,9 @@ xrep_agfl(
 	int			error;
 
 	/* We require the rmapbt to rebuild anything. */
-	if (!xfs_sb_version_hasrmapbt(&mp->m_sb))
+	if (!xfs_has_rmapbt(mp))
 		return -EOPNOTSUPP;
 
-	xchk_perag_get(sc->mp, &sc->sa);
 	xbitmap_init(&agfl_extents);
 
 	/*
@@ -649,7 +650,8 @@ xrep_agfl(
 	 * nothing wrong with the AGF, but all the AG header repair functions
 	 * have this chicken-and-egg problem.
 	 */
-	error = xfs_alloc_read_agf(mp, sc->tp, sc->sa.agno, 0, &agf_bp);
+	error = xfs_alloc_read_agf(mp, sc->tp, sc->sa.pag->pag_agno, 0,
+			&agf_bp);
 	if (error)
 		return error;
 
@@ -658,7 +660,8 @@ xrep_agfl(
 	 * was corrupt after xfs_alloc_read_agfl failed with -EFSCORRUPTED.
 	 */
 	error = xfs_trans_read_buf(mp, sc->tp, mp->m_ddev_targp,
-			XFS_AG_DADDR(mp, sc->sa.agno, XFS_AGFL_DADDR(mp)),
+			XFS_AG_DADDR(mp, sc->sa.pag->pag_agno,
+						XFS_AGFL_DADDR(mp)),
 			XFS_FSS_TO_BB(mp, 1), 0, &agfl_bp, NULL);
 	if (error)
 		return error;
@@ -723,7 +726,8 @@ xrep_agi_find_btrees(
 	int				error;
 
 	/* Read the AGF. */
-	error = xfs_alloc_read_agf(mp, sc->tp, sc->sa.agno, 0, &agf_bp);
+	error = xfs_alloc_read_agf(mp, sc->tp, sc->sa.pag->pag_agno, 0,
+			&agf_bp);
 	if (error)
 		return error;
 
@@ -737,7 +741,7 @@ xrep_agi_find_btrees(
 		return -EFSCORRUPTED;
 
 	/* We must find the finobt root if that feature is enabled. */
-	if (xfs_sb_version_hasfinobt(&mp->m_sb) &&
+	if (xfs_has_finobt(mp) &&
 	    !xrep_check_btree_root(sc, &fab[XREP_AGI_FINOBT]))
 		return -EFSCORRUPTED;
 
@@ -761,11 +765,12 @@ xrep_agi_init_header(
 	memset(agi, 0, BBTOB(agi_bp->b_length));
 	agi->agi_magicnum = cpu_to_be32(XFS_AGI_MAGIC);
 	agi->agi_versionnum = cpu_to_be32(XFS_AGI_VERSION);
-	agi->agi_seqno = cpu_to_be32(sc->sa.agno);
-	agi->agi_length = cpu_to_be32(xfs_ag_block_count(mp, sc->sa.agno));
+	agi->agi_seqno = cpu_to_be32(sc->sa.pag->pag_agno);
+	agi->agi_length = cpu_to_be32(xfs_ag_block_count(mp,
+							sc->sa.pag->pag_agno));
 	agi->agi_newino = cpu_to_be32(NULLAGINO);
 	agi->agi_dirino = cpu_to_be32(NULLAGINO);
-	if (xfs_sb_version_hascrc(&mp->m_sb))
+	if (xfs_has_crc(mp))
 		uuid_copy(&agi->agi_uuid, &mp->m_sb.sb_meta_uuid);
 
 	/* We don't know how to fix the unlinked list yet. */
@@ -787,7 +792,7 @@ xrep_agi_set_roots(
 	agi->agi_root = cpu_to_be32(fab[XREP_AGI_INOBT].root);
 	agi->agi_level = cpu_to_be32(fab[XREP_AGI_INOBT].height);
 
-	if (xfs_sb_version_hasfinobt(&sc->mp->m_sb)) {
+	if (xfs_has_finobt(sc->mp)) {
 		agi->agi_free_root = cpu_to_be32(fab[XREP_AGI_FINOBT].root);
 		agi->agi_free_level = cpu_to_be32(fab[XREP_AGI_FINOBT].height);
 	}
@@ -811,7 +816,7 @@ xrep_agi_calc_from_btrees(
 	error = xfs_ialloc_count_inodes(cur, &count, &freecount);
 	if (error)
 		goto err;
-	if (xfs_sb_version_hasinobtcounts(&mp->m_sb)) {
+	if (xfs_has_inobtcounts(mp)) {
 		xfs_agblock_t	blocks;
 
 		error = xfs_btree_count_blocks(cur, &blocks);
@@ -824,8 +829,7 @@ xrep_agi_calc_from_btrees(
 	agi->agi_count = cpu_to_be32(count);
 	agi->agi_freecount = cpu_to_be32(freecount);
 
-	if (xfs_sb_version_hasfinobt(&mp->m_sb) &&
-	    xfs_sb_version_hasinobtcounts(&mp->m_sb)) {
+	if (xfs_has_finobt(mp) && xfs_has_inobtcounts(mp)) {
 		xfs_agblock_t	blocks;
 
 		cur = xfs_inobt_init_cursor(mp, sc->tp, agi_bp,
@@ -893,16 +897,16 @@ xrep_agi(
 	int				error;
 
 	/* We require the rmapbt to rebuild anything. */
-	if (!xfs_sb_version_hasrmapbt(&mp->m_sb))
+	if (!xfs_has_rmapbt(mp))
 		return -EOPNOTSUPP;
 
-	xchk_perag_get(sc->mp, &sc->sa);
 	/*
 	 * Make sure we have the AGI buffer, as scrub might have decided it
 	 * was corrupt after xfs_ialloc_read_agi failed with -EFSCORRUPTED.
 	 */
 	error = xfs_trans_read_buf(mp, sc->tp, mp->m_ddev_targp,
-			XFS_AG_DADDR(mp, sc->sa.agno, XFS_AGI_DADDR(mp)),
+			XFS_AG_DADDR(mp, sc->sa.pag->pag_agno,
+						XFS_AGI_DADDR(mp)),
 			XFS_FSS_TO_BB(mp, 1), 0, &agi_bp, NULL);
 	if (error)
 		return error;
diff --git a/fs/xfs/scrub/alloc.c b/fs/xfs/scrub/alloc.c
index d5741980094a..87518e1292f8 100644
--- a/fs/xfs/scrub/alloc.c
+++ b/fs/xfs/scrub/alloc.c
@@ -91,7 +91,7 @@ xchk_allocbt_xref(
 STATIC int
 xchk_allocbt_rec(
 	struct xchk_btree	*bs,
-	union xfs_btree_rec	*rec)
+	const union xfs_btree_rec *rec)
 {
 	struct xfs_mount	*mp = bs->cur->bc_mp;
 	xfs_agnumber_t		agno = bs->cur->bc_ag.pag->pag_agno;
diff --git a/fs/xfs/scrub/attr.c b/fs/xfs/scrub/attr.c
index 552af0cf8482..b6f0c9f3f124 100644
--- a/fs/xfs/scrub/attr.c
+++ b/fs/xfs/scrub/attr.c
@@ -25,11 +25,11 @@
  * reallocating the buffer if necessary.  Buffer contents are not preserved
  * across a reallocation.
  */
-int
+static int
 xchk_setup_xattr_buf(
 	struct xfs_scrub	*sc,
 	size_t			value_size,
-	xfs_km_flags_t		flags)
+	gfp_t			flags)
 {
 	size_t			sz;
 	struct xchk_xattr_buf	*ab = sc->buf;
@@ -57,7 +57,7 @@ xchk_setup_xattr_buf(
 	 * Don't zero the buffer upon allocation to avoid runtime overhead.
 	 * All users must be careful never to read uninitialized contents.
 	 */
-	ab = kmem_alloc_large(sizeof(*ab) + sz, flags);
+	ab = kvmalloc(sizeof(*ab) + sz, flags);
 	if (!ab)
 		return -ENOMEM;
 
@@ -79,7 +79,7 @@ xchk_setup_xattr(
 	 * without the inode lock held, which means we can sleep.
 	 */
 	if (sc->flags & XCHK_TRY_HARDER) {
-		error = xchk_setup_xattr_buf(sc, XATTR_SIZE_MAX, 0);
+		error = xchk_setup_xattr_buf(sc, XATTR_SIZE_MAX, GFP_KERNEL);
 		if (error)
 			return error;
 	}
@@ -138,7 +138,8 @@ xchk_xattr_listent(
 	 * doesn't work, we overload the seen_enough variable to convey
 	 * the error message back to the main scrub function.
 	 */
-	error = xchk_setup_xattr_buf(sx->sc, valuelen, KM_MAYFAIL);
+	error = xchk_setup_xattr_buf(sx->sc, valuelen,
+			GFP_KERNEL | __GFP_RETRY_MAYFAIL);
 	if (error == -ENOMEM)
 		error = -EDEADLOCK;
 	if (error) {
@@ -323,7 +324,8 @@ xchk_xattr_block(
 		return 0;
 
 	/* Allocate memory for block usage checking. */
-	error = xchk_setup_xattr_buf(ds->sc, 0, KM_MAYFAIL);
+	error = xchk_setup_xattr_buf(ds->sc, 0,
+			GFP_KERNEL | __GFP_RETRY_MAYFAIL);
 	if (error == -ENOMEM)
 		return -EDEADLOCK;
 	if (error)
@@ -334,7 +336,7 @@ xchk_xattr_block(
 	bitmap_zero(usedmap, mp->m_attr_geo->blksize);
 
 	/* Check all the padding. */
-	if (xfs_sb_version_hascrc(&ds->sc->mp->m_sb)) {
+	if (xfs_has_crc(ds->sc->mp)) {
 		struct xfs_attr3_leafblock	*leaf = bp->b_addr;
 
 		if (leaf->hdr.pad1 != 0 || leaf->hdr.pad2 != 0 ||
diff --git a/fs/xfs/scrub/attr.h b/fs/xfs/scrub/attr.h
index 13a1d2e8424d..1719e1c4da59 100644
--- a/fs/xfs/scrub/attr.h
+++ b/fs/xfs/scrub/attr.h
@@ -65,7 +65,4 @@ xchk_xattr_dstmap(
 			BITS_TO_LONGS(sc->mp->m_attr_geo->blksize);
 }
 
-int xchk_setup_xattr_buf(struct xfs_scrub *sc, size_t value_size,
-		xfs_km_flags_t flags);
-
 #endif	/* __XFS_SCRUB_ATTR_H__ */
diff --git a/fs/xfs/scrub/bitmap.c b/fs/xfs/scrub/bitmap.c
index 813b5f219113..d6d24c866bc4 100644
--- a/fs/xfs/scrub/bitmap.c
+++ b/fs/xfs/scrub/bitmap.c
@@ -260,7 +260,7 @@ xbitmap_set_btcur_path(
 		xfs_btree_get_block(cur, i, &bp);
 		if (!bp)
 			continue;
-		fsb = XFS_DADDR_TO_FSB(cur->bc_mp, bp->b_bn);
+		fsb = XFS_DADDR_TO_FSB(cur->bc_mp, xfs_buf_daddr(bp));
 		error = xbitmap_set(bitmap, fsb, 1);
 		if (error)
 			return error;
@@ -284,7 +284,7 @@ xbitmap_collect_btblock(
 	if (!bp)
 		return 0;
 
-	fsbno = XFS_DADDR_TO_FSB(cur->bc_mp, bp->b_bn);
+	fsbno = XFS_DADDR_TO_FSB(cur->bc_mp, xfs_buf_daddr(bp));
 	return xbitmap_set(bitmap, fsbno, 1);
 }
 
diff --git a/fs/xfs/scrub/bmap.c b/fs/xfs/scrub/bmap.c
index 1d146c9d9de1..017da9ceaee9 100644
--- a/fs/xfs/scrub/bmap.c
+++ b/fs/xfs/scrub/bmap.c
@@ -260,10 +260,10 @@ xchk_bmap_iextent_xref(
 	agbno = XFS_FSB_TO_AGBNO(mp, irec->br_startblock);
 	len = irec->br_blockcount;
 
-	error = xchk_ag_init(info->sc, agno, &info->sc->sa);
+	error = xchk_ag_init_existing(info->sc, agno, &info->sc->sa);
 	if (!xchk_fblock_process_error(info->sc, info->whichfork,
 			irec->br_startoff, &error))
-		return;
+		goto out_free;
 
 	xchk_xref_is_used_space(info->sc, agbno, len);
 	xchk_xref_is_not_inode_chunk(info->sc, agbno, len);
@@ -283,6 +283,7 @@ xchk_bmap_iextent_xref(
 		break;
 	}
 
+out_free:
 	xchk_ag_free(info->sc, &info->sc->sa);
 }
 
@@ -383,7 +384,7 @@ xchk_bmap_iextent(
 STATIC int
 xchk_bmapbt_rec(
 	struct xchk_btree	*bs,
-	union xfs_btree_rec	*rec)
+	const union xfs_btree_rec *rec)
 {
 	struct xfs_bmbt_irec	irec;
 	struct xfs_bmbt_irec	iext_irec;
@@ -400,7 +401,7 @@ xchk_bmapbt_rec(
 	 * Check the owners of the btree blocks up to the level below
 	 * the root since the verifiers don't do that.
 	 */
-	if (xfs_sb_version_hascrc(&bs->cur->bc_mp->m_sb) &&
+	if (xfs_has_crc(bs->cur->bc_mp) &&
 	    bs->cur->bc_ptrs[0] == 1) {
 		for (i = 0; i < bs->cur->bc_nlevels - 1; i++) {
 			block = xfs_btree_get_block(bs->cur, i, &bp);
@@ -473,10 +474,11 @@ struct xchk_bmap_check_rmap_info {
 STATIC int
 xchk_bmap_check_rmap(
 	struct xfs_btree_cur		*cur,
-	struct xfs_rmap_irec		*rec,
+	const struct xfs_rmap_irec	*rec,
 	void				*priv)
 {
 	struct xfs_bmbt_irec		irec;
+	struct xfs_rmap_irec		check_rec;
 	struct xchk_bmap_check_rmap_info	*sbcri = priv;
 	struct xfs_ifork		*ifp;
 	struct xfs_scrub		*sc = sbcri->sc;
@@ -510,28 +512,30 @@ xchk_bmap_check_rmap(
 	 * length, so we have to loop through the bmbt to make sure that the
 	 * entire rmap is covered by bmbt records.
 	 */
+	check_rec = *rec;
 	while (have_map) {
-		if (irec.br_startoff != rec->rm_offset)
+		if (irec.br_startoff != check_rec.rm_offset)
 			xchk_fblock_set_corrupt(sc, sbcri->whichfork,
-					rec->rm_offset);
+					check_rec.rm_offset);
 		if (irec.br_startblock != XFS_AGB_TO_FSB(sc->mp,
-				cur->bc_ag.pag->pag_agno, rec->rm_startblock))
+				cur->bc_ag.pag->pag_agno,
+				check_rec.rm_startblock))
 			xchk_fblock_set_corrupt(sc, sbcri->whichfork,
-					rec->rm_offset);
-		if (irec.br_blockcount > rec->rm_blockcount)
+					check_rec.rm_offset);
+		if (irec.br_blockcount > check_rec.rm_blockcount)
 			xchk_fblock_set_corrupt(sc, sbcri->whichfork,
-					rec->rm_offset);
+					check_rec.rm_offset);
 		if (sc->sm->sm_flags & XFS_SCRUB_OFLAG_CORRUPT)
 			break;
-		rec->rm_startblock += irec.br_blockcount;
-		rec->rm_offset += irec.br_blockcount;
-		rec->rm_blockcount -= irec.br_blockcount;
-		if (rec->rm_blockcount == 0)
+		check_rec.rm_startblock += irec.br_blockcount;
+		check_rec.rm_offset += irec.br_blockcount;
+		check_rec.rm_blockcount -= irec.br_blockcount;
+		if (check_rec.rm_blockcount == 0)
 			break;
 		have_map = xfs_iext_next_extent(ifp, &sbcri->icur, &irec);
 		if (!have_map)
 			xchk_fblock_set_corrupt(sc, sbcri->whichfork,
-					rec->rm_offset);
+					check_rec.rm_offset);
 	}
 
 out:
@@ -581,7 +585,7 @@ xchk_bmap_check_rmaps(
 	bool			zero_size;
 	int			error;
 
-	if (!xfs_sb_version_hasrmapbt(&sc->mp->m_sb) ||
+	if (!xfs_has_rmapbt(sc->mp) ||
 	    whichfork == XFS_COW_FORK ||
 	    (sc->sm->sm_flags & XFS_SCRUB_OFLAG_CORRUPT))
 		return 0;
@@ -659,8 +663,7 @@ xchk_bmap(
 		}
 		break;
 	case XFS_ATTR_FORK:
-		if (!xfs_sb_version_hasattr(&mp->m_sb) &&
-		    !xfs_sb_version_hasattr2(&mp->m_sb))
+		if (!xfs_has_attr(mp) && !xfs_has_attr2(mp))
 			xchk_ino_set_corrupt(sc, sc->ip->i_ino);
 		break;
 	default:
diff --git a/fs/xfs/scrub/btree.c b/fs/xfs/scrub/btree.c
index bd1172358964..eccb855dc904 100644
--- a/fs/xfs/scrub/btree.c
+++ b/fs/xfs/scrub/btree.c
@@ -374,10 +374,10 @@ xchk_btree_check_block_owner(
 
 	init_sa = bs->cur->bc_flags & XFS_BTREE_LONG_PTRS;
 	if (init_sa) {
-		error = xchk_ag_init(bs->sc, agno, &bs->sc->sa);
+		error = xchk_ag_init_existing(bs->sc, agno, &bs->sc->sa);
 		if (!xchk_btree_xref_process_error(bs->sc, bs->cur,
 				level, &error))
-			return error;
+			goto out_free;
 	}
 
 	xchk_xref_is_used_space(bs->sc, agbno, 1);
@@ -393,6 +393,7 @@ xchk_btree_check_block_owner(
 	if (!bs->sc->sa.rmap_cur && btnum == XFS_BTNUM_RMAP)
 		bs->cur = NULL;
 
+out_free:
 	if (init_sa)
 		xchk_ag_free(bs->sc, &bs->sc->sa);
 
@@ -435,12 +436,12 @@ xchk_btree_check_owner(
 		if (!co)
 			return -ENOMEM;
 		co->level = level;
-		co->daddr = XFS_BUF_ADDR(bp);
+		co->daddr = xfs_buf_daddr(bp);
 		list_add_tail(&co->list, &bs->to_check);
 		return 0;
 	}
 
-	return xchk_btree_check_block_owner(bs, level, XFS_BUF_ADDR(bp));
+	return xchk_btree_check_block_owner(bs, level, xfs_buf_daddr(bp));
 }
 
 /* Decide if we want to check minrecs of a btree block in the inode root. */
diff --git a/fs/xfs/scrub/btree.h b/fs/xfs/scrub/btree.h
index 5572e475f8ed..b7d2fc01fbf9 100644
--- a/fs/xfs/scrub/btree.h
+++ b/fs/xfs/scrub/btree.h
@@ -26,8 +26,8 @@ void xchk_btree_xref_set_corrupt(struct xfs_scrub *sc,
 
 struct xchk_btree;
 typedef int (*xchk_btree_rec_fn)(
-	struct xchk_btree	*bs,
-	union xfs_btree_rec	*rec);
+	struct xchk_btree		*bs,
+	const union xfs_btree_rec	*rec);
 
 struct xchk_btree {
 	/* caller-provided scrub state */
diff --git a/fs/xfs/scrub/common.c b/fs/xfs/scrub/common.c
index 8558ca05e11d..bf1f3607d0b6 100644
--- a/fs/xfs/scrub/common.c
+++ b/fs/xfs/scrub/common.c
@@ -186,7 +186,7 @@ xchk_block_set_preen(
 	struct xfs_buf		*bp)
 {
 	sc->sm->sm_flags |= XFS_SCRUB_OFLAG_PREEN;
-	trace_xchk_block_preen(sc, bp->b_bn, __return_address);
+	trace_xchk_block_preen(sc, xfs_buf_daddr(bp), __return_address);
 }
 
 /*
@@ -219,7 +219,7 @@ xchk_block_set_corrupt(
 	struct xfs_buf		*bp)
 {
 	sc->sm->sm_flags |= XFS_SCRUB_OFLAG_CORRUPT;
-	trace_xchk_block_error(sc, bp->b_bn, __return_address);
+	trace_xchk_block_error(sc, xfs_buf_daddr(bp), __return_address);
 }
 
 /* Record a corruption while cross-referencing. */
@@ -229,7 +229,7 @@ xchk_block_xref_set_corrupt(
 	struct xfs_buf		*bp)
 {
 	sc->sm->sm_flags |= XFS_SCRUB_OFLAG_XCORRUPT;
-	trace_xchk_block_error(sc, bp->b_bn, __return_address);
+	trace_xchk_block_error(sc, xfs_buf_daddr(bp), __return_address);
 }
 
 /*
@@ -324,7 +324,7 @@ struct xchk_rmap_ownedby_info {
 STATIC int
 xchk_count_rmap_ownedby_irec(
 	struct xfs_btree_cur		*cur,
-	struct xfs_rmap_irec		*rec,
+	const struct xfs_rmap_irec	*rec,
 	void				*priv)
 {
 	struct xchk_rmap_ownedby_info	*sroi = priv;
@@ -394,11 +394,11 @@ want_ag_read_header_failure(
 }
 
 /*
- * Grab all the headers for an AG.
+ * Grab the perag structure and all the headers for an AG.
  *
- * The headers should be released by xchk_ag_free, but as a fail
- * safe we attach all the buffers we grab to the scrub transaction so
- * they'll all be freed when we cancel it.
+ * The headers should be released by xchk_ag_free, but as a fail safe we attach
+ * all the buffers we grab to the scrub transaction so they'll all be freed
+ * when we cancel it.  Returns ENOENT if we can't grab the perag structure.
  */
 int
 xchk_ag_read_headers(
@@ -409,22 +409,24 @@ xchk_ag_read_headers(
 	struct xfs_mount	*mp = sc->mp;
 	int			error;
 
-	sa->agno = agno;
+	ASSERT(!sa->pag);
+	sa->pag = xfs_perag_get(mp, agno);
+	if (!sa->pag)
+		return -ENOENT;
 
 	error = xfs_ialloc_read_agi(mp, sc->tp, agno, &sa->agi_bp);
 	if (error && want_ag_read_header_failure(sc, XFS_SCRUB_TYPE_AGI))
-		goto out;
+		return error;
 
 	error = xfs_alloc_read_agf(mp, sc->tp, agno, 0, &sa->agf_bp);
 	if (error && want_ag_read_header_failure(sc, XFS_SCRUB_TYPE_AGF))
-		goto out;
+		return error;
 
 	error = xfs_alloc_read_agfl(mp, sc->tp, agno, &sa->agfl_bp);
 	if (error && want_ag_read_header_failure(sc, XFS_SCRUB_TYPE_AGFL))
-		goto out;
-	error = 0;
-out:
-	return error;
+		return error;
+
+	return 0;
 }
 
 /* Release all the AG btree cursors. */
@@ -461,7 +463,6 @@ xchk_ag_btcur_init(
 {
 	struct xfs_mount	*mp = sc->mp;
 
-	xchk_perag_get(sc->mp, sa);
 	if (sa->agf_bp &&
 	    xchk_ag_btree_healthy_enough(sc, sa->pag, XFS_BTNUM_BNO)) {
 		/* Set up a bnobt cursor for cross-referencing. */
@@ -484,21 +485,21 @@ xchk_ag_btcur_init(
 	}
 
 	/* Set up a finobt cursor for cross-referencing. */
-	if (sa->agi_bp && xfs_sb_version_hasfinobt(&mp->m_sb) &&
+	if (sa->agi_bp && xfs_has_finobt(mp) &&
 	    xchk_ag_btree_healthy_enough(sc, sa->pag, XFS_BTNUM_FINO)) {
 		sa->fino_cur = xfs_inobt_init_cursor(mp, sc->tp, sa->agi_bp,
 				sa->pag, XFS_BTNUM_FINO);
 	}
 
 	/* Set up a rmapbt cursor for cross-referencing. */
-	if (sa->agf_bp && xfs_sb_version_hasrmapbt(&mp->m_sb) &&
+	if (sa->agf_bp && xfs_has_rmapbt(mp) &&
 	    xchk_ag_btree_healthy_enough(sc, sa->pag, XFS_BTNUM_RMAP)) {
 		sa->rmap_cur = xfs_rmapbt_init_cursor(mp, sc->tp, sa->agf_bp,
 				sa->pag);
 	}
 
 	/* Set up a refcountbt cursor for cross-referencing. */
-	if (sa->agf_bp && xfs_sb_version_hasreflink(&mp->m_sb) &&
+	if (sa->agf_bp && xfs_has_reflink(mp) &&
 	    xchk_ag_btree_healthy_enough(sc, sa->pag, XFS_BTNUM_REFC)) {
 		sa->refc_cur = xfs_refcountbt_init_cursor(mp, sc->tp,
 				sa->agf_bp, sa->pag);
@@ -528,15 +529,14 @@ xchk_ag_free(
 		xfs_perag_put(sa->pag);
 		sa->pag = NULL;
 	}
-	sa->agno = NULLAGNUMBER;
 }
 
 /*
- * For scrub, grab the AGI and the AGF headers, in that order.  Locking
- * order requires us to get the AGI before the AGF.  We use the
- * transaction to avoid deadlocking on crosslinked metadata buffers;
- * either the caller passes one in (bmap scrub) or we have to create a
- * transaction ourselves.
+ * For scrub, grab the perag structure, the AGI, and the AGF headers, in that
+ * order.  Locking order requires us to get the AGI before the AGF.  We use the
+ * transaction to avoid deadlocking on crosslinked metadata buffers; either the
+ * caller passes one in (bmap scrub) or we have to create a transaction
+ * ourselves.  Returns ENOENT if the perag struct cannot be grabbed.
  */
 int
 xchk_ag_init(
@@ -554,19 +554,6 @@ xchk_ag_init(
 	return 0;
 }
 
-/*
- * Grab the per-ag structure if we haven't already gotten it.  Teardown of the
- * xchk_ag will release it for us.
- */
-void
-xchk_perag_get(
-	struct xfs_mount	*mp,
-	struct xchk_ag		*sa)
-{
-	if (!sa->pag)
-		sa->pag = xfs_perag_get(mp, sa->agno);
-}
-
 /* Per-scrubber setup functions */
 
 /*
@@ -797,7 +784,7 @@ xchk_buffer_recheck(
 	if (!fa)
 		return;
 	sc->sm->sm_flags |= XFS_SCRUB_OFLAG_CORRUPT;
-	trace_xchk_block_error(sc, bp->b_bn, fa);
+	trace_xchk_block_error(sc, xfs_buf_daddr(bp), fa);
 }
 
 /*
@@ -842,7 +829,7 @@ xchk_metadata_inode_forks(
 		return error;
 
 	/* Look for incorrect shared blocks. */
-	if (xfs_sb_version_hasreflink(&sc->mp->m_sb)) {
+	if (xfs_has_reflink(sc->mp)) {
 		error = xfs_reflink_inode_has_shared_extents(sc->tp, sc->ip,
 				&shared);
 		if (!xchk_fblock_process_error(sc, XFS_DATA_FORK, 0,
@@ -884,6 +871,7 @@ xchk_stop_reaping(
 {
 	sc->flags |= XCHK_REAPING_DISABLED;
 	xfs_blockgc_stop(sc->mp);
+	xfs_inodegc_stop(sc->mp);
 }
 
 /* Restart background reaping of resources. */
@@ -891,6 +879,13 @@ void
 xchk_start_reaping(
 	struct xfs_scrub	*sc)
 {
-	xfs_blockgc_start(sc->mp);
+	/*
+	 * Readonly filesystems do not perform inactivation or speculative
+	 * preallocation, so there's no need to restart the workers.
+	 */
+	if (!xfs_is_readonly(sc->mp)) {
+		xfs_inodegc_start(sc->mp);
+		xfs_blockgc_start(sc->mp);
+	}
 	sc->flags &= ~XCHK_REAPING_DISABLED;
 }
diff --git a/fs/xfs/scrub/common.h b/fs/xfs/scrub/common.h
index 0410faf7d735..454145db10e7 100644
--- a/fs/xfs/scrub/common.h
+++ b/fs/xfs/scrub/common.h
@@ -107,7 +107,23 @@ int xchk_setup_fscounters(struct xfs_scrub *sc);
 void xchk_ag_free(struct xfs_scrub *sc, struct xchk_ag *sa);
 int xchk_ag_init(struct xfs_scrub *sc, xfs_agnumber_t agno,
 		struct xchk_ag *sa);
-void xchk_perag_get(struct xfs_mount *mp, struct xchk_ag *sa);
+
+/*
+ * Grab all AG resources, treating the inability to grab the perag structure as
+ * a fs corruption.  This is intended for callers checking an ondisk reference
+ * to a given AG, which means that the AG must still exist.
+ */
+static inline int
+xchk_ag_init_existing(
+	struct xfs_scrub	*sc,
+	xfs_agnumber_t		agno,
+	struct xchk_ag		*sa)
+{
+	int			error = xchk_ag_init(sc, agno, sa);
+
+	return error == -ENOENT ? -EFSCORRUPTED : error;
+}
+
 int xchk_ag_read_headers(struct xfs_scrub *sc, xfs_agnumber_t agno,
 		struct xchk_ag *sa);
 void xchk_ag_btcur_free(struct xchk_ag *sa);
diff --git a/fs/xfs/scrub/dabtree.c b/fs/xfs/scrub/dabtree.c
index 9f0dbb47c82c..8a52514bc1ff 100644
--- a/fs/xfs/scrub/dabtree.c
+++ b/fs/xfs/scrub/dabtree.c
@@ -367,11 +367,11 @@ xchk_da_btree_block(
 	pmaxrecs = &ds->maxrecs[level];
 
 	/* We only started zeroing the header on v5 filesystems. */
-	if (xfs_sb_version_hascrc(&ds->sc->mp->m_sb) && hdr3->hdr.pad)
+	if (xfs_has_crc(ds->sc->mp) && hdr3->hdr.pad)
 		xchk_da_set_corrupt(ds, level);
 
 	/* Check the owner. */
-	if (xfs_sb_version_hascrc(&ip->i_mount->m_sb)) {
+	if (xfs_has_crc(ip->i_mount)) {
 		owner = be64_to_cpu(hdr3->owner);
 		if (owner != ip->i_ino)
 			xchk_da_set_corrupt(ds, level);
diff --git a/fs/xfs/scrub/dir.c b/fs/xfs/scrub/dir.c
index 28dda391d5df..200a63f58fe7 100644
--- a/fs/xfs/scrub/dir.c
+++ b/fs/xfs/scrub/dir.c
@@ -51,7 +51,7 @@ xchk_dir_check_ftype(
 	int			ino_dtype;
 	int			error = 0;
 
-	if (!xfs_sb_version_hasftype(&mp->m_sb)) {
+	if (!xfs_has_ftype(mp)) {
 		if (dtype != DT_UNKNOWN && dtype != DT_DIR)
 			xchk_fblock_set_corrupt(sdc->sc, XFS_DATA_FORK,
 					offset);
@@ -140,7 +140,7 @@ xchk_dir_actor(
 
 	if (!strncmp(".", name, namelen)) {
 		/* If this is "." then check that the inum matches the dir. */
-		if (xfs_sb_version_hasftype(&mp->m_sb) && type != DT_DIR)
+		if (xfs_has_ftype(mp) && type != DT_DIR)
 			xchk_fblock_set_corrupt(sdc->sc, XFS_DATA_FORK,
 					offset);
 		checked_ftype = true;
@@ -152,7 +152,7 @@ xchk_dir_actor(
 		 * If this is ".." in the root inode, check that the inum
 		 * matches this dir.
 		 */
-		if (xfs_sb_version_hasftype(&mp->m_sb) && type != DT_DIR)
+		if (xfs_has_ftype(mp) && type != DT_DIR)
 			xchk_fblock_set_corrupt(sdc->sc, XFS_DATA_FORK,
 					offset);
 		checked_ftype = true;
@@ -526,7 +526,7 @@ xchk_directory_leaf1_bestfree(
 	bestcount = be32_to_cpu(ltp->bestcount);
 	bestp = xfs_dir2_leaf_bests_p(ltp);
 
-	if (xfs_sb_version_hascrc(&sc->mp->m_sb)) {
+	if (xfs_has_crc(sc->mp)) {
 		struct xfs_dir3_leaf_hdr	*hdr3 = bp->b_addr;
 
 		if (hdr3->pad != cpu_to_be32(0))
@@ -623,7 +623,7 @@ xchk_directory_free_bestfree(
 		return error;
 	xchk_buffer_recheck(sc, bp);
 
-	if (xfs_sb_version_hascrc(&sc->mp->m_sb)) {
+	if (xfs_has_crc(sc->mp)) {
 		struct xfs_dir3_free_hdr	*hdr3 = bp->b_addr;
 
 		if (hdr3->pad != cpu_to_be32(0))
diff --git a/fs/xfs/scrub/fscounters.c b/fs/xfs/scrub/fscounters.c
index fd7941e04ae1..48a6cbdf95d0 100644
--- a/fs/xfs/scrub/fscounters.c
+++ b/fs/xfs/scrub/fscounters.c
@@ -148,9 +148,9 @@ xchk_fscount_btreeblks(
 	xfs_extlen_t		blocks;
 	int			error;
 
-	error = xchk_ag_init(sc, agno, &sc->sa);
+	error = xchk_ag_init_existing(sc, agno, &sc->sa);
 	if (error)
-		return error;
+		goto out_free;
 
 	error = xfs_btree_count_blocks(sc->sa.bno_cur, &blocks);
 	if (error)
@@ -207,7 +207,7 @@ retry:
 		/* Add up the free/freelist/bnobt/cntbt blocks */
 		fsc->fdblocks += pag->pagf_freeblks;
 		fsc->fdblocks += pag->pagf_flcount;
-		if (xfs_sb_version_haslazysbcount(&sc->mp->m_sb)) {
+		if (xfs_has_lazysbcount(sc->mp)) {
 			fsc->fdblocks += pag->pagf_btreeblks;
 		} else {
 			error = xchk_fscount_btreeblks(sc, fsc, agno);
diff --git a/fs/xfs/scrub/ialloc.c b/fs/xfs/scrub/ialloc.c
index 30e568596b79..00848ee542fb 100644
--- a/fs/xfs/scrub/ialloc.c
+++ b/fs/xfs/scrub/ialloc.c
@@ -418,7 +418,7 @@ xchk_iallocbt_rec_alignment(
 STATIC int
 xchk_iallocbt_rec(
 	struct xchk_btree		*bs,
-	union xfs_btree_rec		*rec)
+	const union xfs_btree_rec	*rec)
 {
 	struct xfs_mount		*mp = bs->cur->bc_mp;
 	struct xchk_iallocbt		*iabt = bs->private;
@@ -517,7 +517,7 @@ xchk_iallocbt_xref_rmap_btreeblks(
 	int			error;
 
 	if (!sc->sa.ino_cur || !sc->sa.rmap_cur ||
-	    (xfs_sb_version_hasfinobt(&sc->mp->m_sb) && !sc->sa.fino_cur) ||
+	    (xfs_has_finobt(sc->mp) && !sc->sa.fino_cur) ||
 	    xchk_skip_xref(sc->sm))
 		return;
 
diff --git a/fs/xfs/scrub/inode.c b/fs/xfs/scrub/inode.c
index 76fbc7ca4cec..2405b09d03d0 100644
--- a/fs/xfs/scrub/inode.c
+++ b/fs/xfs/scrub/inode.c
@@ -181,7 +181,7 @@ xchk_inode_flags2(
 
 	/* reflink flag requires reflink feature */
 	if ((flags2 & XFS_DIFLAG2_REFLINK) &&
-	    !xfs_sb_version_hasreflink(&mp->m_sb))
+	    !xfs_has_reflink(mp))
 		goto bad;
 
 	/* cowextsize flag is checked w.r.t. mode separately */
@@ -199,8 +199,7 @@ xchk_inode_flags2(
 		goto bad;
 
 	/* no bigtime iflag without the bigtime feature */
-	if (xfs_dinode_has_bigtime(dip) &&
-	    !xfs_sb_version_hasbigtime(&mp->m_sb))
+	if (xfs_dinode_has_bigtime(dip) && !xfs_has_bigtime(mp))
 		goto bad;
 
 	return;
@@ -278,7 +277,7 @@ xchk_dinode(
 			xchk_ino_set_corrupt(sc, ino);
 
 		if (dip->di_projid_hi != 0 &&
-		    !xfs_sb_version_hasprojid32bit(&mp->m_sb))
+		    !xfs_has_projid32(mp))
 			xchk_ino_set_corrupt(sc, ino);
 		break;
 	default:
@@ -532,9 +531,9 @@ xchk_inode_xref(
 	agno = XFS_INO_TO_AGNO(sc->mp, ino);
 	agbno = XFS_INO_TO_AGBNO(sc->mp, ino);
 
-	error = xchk_ag_init(sc, agno, &sc->sa);
+	error = xchk_ag_init_existing(sc, agno, &sc->sa);
 	if (!xchk_xref_process_error(sc, agno, agbno, &error))
-		return;
+		goto out_free;
 
 	xchk_xref_is_used_space(sc, agbno, 1);
 	xchk_inode_xref_finobt(sc, ino);
@@ -542,6 +541,7 @@ xchk_inode_xref(
 	xchk_xref_is_not_shared(sc, agbno, 1);
 	xchk_inode_xref_bmap(sc, dip);
 
+out_free:
 	xchk_ag_free(sc, &sc->sa);
 }
 
@@ -560,7 +560,7 @@ xchk_inode_check_reflink_iflag(
 	bool			has_shared;
 	int			error;
 
-	if (!xfs_sb_version_hasreflink(&mp->m_sb))
+	if (!xfs_has_reflink(mp))
 		return;
 
 	error = xfs_reflink_inode_has_shared_extents(sc->tp, sc->ip,
diff --git a/fs/xfs/scrub/quota.c b/fs/xfs/scrub/quota.c
index acbb9839d42f..d6c1b00a4fc8 100644
--- a/fs/xfs/scrub/quota.c
+++ b/fs/xfs/scrub/quota.c
@@ -42,7 +42,7 @@ xchk_setup_quota(
 	xfs_dqtype_t		dqtype;
 	int			error;
 
-	if (!XFS_IS_QUOTA_RUNNING(sc->mp) || !XFS_IS_QUOTA_ON(sc->mp))
+	if (!XFS_IS_QUOTA_ON(sc->mp))
 		return -ENOENT;
 
 	dqtype = xchk_quota_to_dqtype(sc);
@@ -127,7 +127,7 @@ xchk_quota_item(
 	 * a reflink filesystem we're allowed to exceed physical space
 	 * if there are no quota limits.
 	 */
-	if (xfs_sb_version_hasreflink(&mp->m_sb)) {
+	if (xfs_has_reflink(mp)) {
 		if (mp->m_sb.sb_dblocks < dq->q_blk.count)
 			xchk_fblock_set_warning(sc, XFS_DATA_FORK,
 					offset);
diff --git a/fs/xfs/scrub/refcount.c b/fs/xfs/scrub/refcount.c
index 7014b7408bad..2744eecdbaf0 100644
--- a/fs/xfs/scrub/refcount.c
+++ b/fs/xfs/scrub/refcount.c
@@ -91,7 +91,7 @@ struct xchk_refcnt_check {
 STATIC int
 xchk_refcountbt_rmap_check(
 	struct xfs_btree_cur		*cur,
-	struct xfs_rmap_irec		*rec,
+	const struct xfs_rmap_irec	*rec,
 	void				*priv)
 {
 	struct xchk_refcnt_check	*refchk = priv;
@@ -330,7 +330,7 @@ xchk_refcountbt_xref(
 STATIC int
 xchk_refcountbt_rec(
 	struct xchk_btree	*bs,
-	union xfs_btree_rec	*rec)
+	const union xfs_btree_rec *rec)
 {
 	struct xfs_mount	*mp = bs->cur->bc_mp;
 	xfs_agblock_t		*cow_blocks = bs->private;
diff --git a/fs/xfs/scrub/repair.c b/fs/xfs/scrub/repair.c
index ebb0e245aa72..8f3cba14ada3 100644
--- a/fs/xfs/scrub/repair.c
+++ b/fs/xfs/scrub/repair.c
@@ -248,19 +248,19 @@ xrep_calc_ag_resblks(
 	 * bnobt/cntbt or inobt/finobt as pairs.
 	 */
 	bnobt_sz = 2 * xfs_allocbt_calc_size(mp, freelen);
-	if (xfs_sb_version_hassparseinodes(&mp->m_sb))
+	if (xfs_has_sparseinodes(mp))
 		inobt_sz = xfs_iallocbt_calc_size(mp, icount /
 				XFS_INODES_PER_HOLEMASK_BIT);
 	else
 		inobt_sz = xfs_iallocbt_calc_size(mp, icount /
 				XFS_INODES_PER_CHUNK);
-	if (xfs_sb_version_hasfinobt(&mp->m_sb))
+	if (xfs_has_finobt(mp))
 		inobt_sz *= 2;
-	if (xfs_sb_version_hasreflink(&mp->m_sb))
+	if (xfs_has_reflink(mp))
 		refcbt_sz = xfs_refcountbt_calc_size(mp, usedlen);
 	else
 		refcbt_sz = 0;
-	if (xfs_sb_version_hasrmapbt(&mp->m_sb)) {
+	if (xfs_has_rmapbt(mp)) {
 		/*
 		 * Guess how many blocks we need to rebuild the rmapbt.
 		 * For non-reflink filesystems we can't have more records than
@@ -269,7 +269,7 @@ xrep_calc_ag_resblks(
 		 * many rmaps there could be in the AG, so we start off with
 		 * what we hope is an generous over-estimation.
 		 */
-		if (xfs_sb_version_hasreflink(&mp->m_sb))
+		if (xfs_has_reflink(mp))
 			rmapbt_sz = xfs_rmapbt_calc_size(mp,
 					(unsigned long long)aglen * 2);
 		else
@@ -306,9 +306,9 @@ xrep_alloc_ag_block(
 			return -ENOSPC;
 		xfs_extent_busy_reuse(sc->mp, sc->sa.pag, bno,
 				1, false);
-		*fsbno = XFS_AGB_TO_FSB(sc->mp, sc->sa.agno, bno);
+		*fsbno = XFS_AGB_TO_FSB(sc->mp, sc->sa.pag->pag_agno, bno);
 		if (resv == XFS_AG_RESV_RMAPBT)
-			xfs_ag_resv_rmapbt_alloc(sc->mp, sc->sa.agno);
+			xfs_ag_resv_rmapbt_alloc(sc->mp, sc->sa.pag->pag_agno);
 		return 0;
 	default:
 		break;
@@ -317,7 +317,7 @@ xrep_alloc_ag_block(
 	args.tp = sc->tp;
 	args.mp = sc->mp;
 	args.oinfo = *oinfo;
-	args.fsbno = XFS_AGB_TO_FSB(args.mp, sc->sa.agno, 0);
+	args.fsbno = XFS_AGB_TO_FSB(args.mp, sc->sa.pag->pag_agno, 0);
 	args.minlen = 1;
 	args.maxlen = 1;
 	args.prod = 1;
@@ -352,14 +352,14 @@ xrep_init_btblock(
 	trace_xrep_init_btblock(mp, XFS_FSB_TO_AGNO(mp, fsb),
 			XFS_FSB_TO_AGBNO(mp, fsb), btnum);
 
-	ASSERT(XFS_FSB_TO_AGNO(mp, fsb) == sc->sa.agno);
+	ASSERT(XFS_FSB_TO_AGNO(mp, fsb) == sc->sa.pag->pag_agno);
 	error = xfs_trans_get_buf(tp, mp->m_ddev_targp,
 			XFS_FSB_TO_DADDR(mp, fsb), XFS_FSB_TO_BB(mp, 1), 0,
 			&bp);
 	if (error)
 		return error;
 	xfs_buf_zero(bp, 0, BBTOB(bp->b_length));
-	xfs_btree_init_block(mp, bp, btnum, 0, 0, sc->sa.agno);
+	xfs_btree_init_block(mp, bp, btnum, 0, 0, sc->sa.pag->pag_agno);
 	xfs_trans_buf_set_type(tp, bp, XFS_BLFT_BTREE_BUF);
 	xfs_trans_log_buf(tp, bp, 0, BBTOB(bp->b_length) - 1);
 	bp->b_ops = ops;
@@ -481,7 +481,7 @@ xrep_fix_freelist(
 
 	args.mp = sc->mp;
 	args.tp = sc->tp;
-	args.agno = sc->sa.agno;
+	args.agno = sc->sa.pag->pag_agno;
 	args.alignment = 1;
 	args.pag = sc->sa.pag;
 
@@ -611,11 +611,11 @@ xrep_reap_extents(
 	xfs_fsblock_t			fsbno;
 	int				error = 0;
 
-	ASSERT(xfs_sb_version_hasrmapbt(&sc->mp->m_sb));
+	ASSERT(xfs_has_rmapbt(sc->mp));
 
 	for_each_xbitmap_block(fsbno, bmr, n, bitmap) {
 		ASSERT(sc->ip != NULL ||
-		       XFS_FSB_TO_AGNO(sc->mp, fsbno) == sc->sa.agno);
+		       XFS_FSB_TO_AGNO(sc->mp, fsbno) == sc->sa.pag->pag_agno);
 		trace_xrep_dispose_btree_extent(sc->mp,
 				XFS_FSB_TO_AGNO(sc->mp, fsbno),
 				XFS_FSB_TO_AGBNO(sc->mp, fsbno), 1);
@@ -690,7 +690,7 @@ xrep_findroot_block(
 	int				block_level;
 	int				error = 0;
 
-	daddr = XFS_AGB_TO_DADDR(mp, ri->sc->sa.agno, agbno);
+	daddr = XFS_AGB_TO_DADDR(mp, ri->sc->sa.pag->pag_agno, agbno);
 
 	/*
 	 * Blocks in the AGFL have stale contents that might just happen to
@@ -819,7 +819,7 @@ xrep_findroot_block(
 	else
 		fab->root = NULLAGBLOCK;
 
-	trace_xrep_findroot_block(mp, ri->sc->sa.agno, agbno,
+	trace_xrep_findroot_block(mp, ri->sc->sa.pag->pag_agno, agbno,
 			be32_to_cpu(btblock->bb_magic), fab->height - 1);
 out:
 	xfs_trans_brelse(ri->sc->tp, bp);
@@ -833,7 +833,7 @@ out:
 STATIC int
 xrep_findroot_rmap(
 	struct xfs_btree_cur		*cur,
-	struct xfs_rmap_irec		*rec,
+	const struct xfs_rmap_irec	*rec,
 	void				*priv)
 {
 	struct xrep_findroot		*ri = priv;
diff --git a/fs/xfs/scrub/rmap.c b/fs/xfs/scrub/rmap.c
index fc306573f0ac..8dae0345c7df 100644
--- a/fs/xfs/scrub/rmap.c
+++ b/fs/xfs/scrub/rmap.c
@@ -88,7 +88,7 @@ xchk_rmapbt_xref(
 STATIC int
 xchk_rmapbt_rec(
 	struct xchk_btree	*bs,
-	union xfs_btree_rec	*rec)
+	const union xfs_btree_rec *rec)
 {
 	struct xfs_mount	*mp = bs->cur->bc_mp;
 	struct xfs_rmap_irec	irec;
diff --git a/fs/xfs/scrub/rtbitmap.c b/fs/xfs/scrub/rtbitmap.c
index 37c0e2266c85..8fa012057405 100644
--- a/fs/xfs/scrub/rtbitmap.c
+++ b/fs/xfs/scrub/rtbitmap.c
@@ -41,7 +41,7 @@ xchk_setup_rt(
 STATIC int
 xchk_rtbitmap_rec(
 	struct xfs_trans	*tp,
-	struct xfs_rtalloc_rec	*rec,
+	const struct xfs_rtalloc_rec *rec,
 	void			*priv)
 {
 	struct xfs_scrub	*sc = priv;
diff --git a/fs/xfs/scrub/scrub.c b/fs/xfs/scrub/scrub.c
index 0e542636227c..51e4c61916d2 100644
--- a/fs/xfs/scrub/scrub.c
+++ b/fs/xfs/scrub/scrub.c
@@ -239,21 +239,21 @@ static const struct xchk_meta_ops meta_scrub_ops[] = {
 		.type	= ST_PERAG,
 		.setup	= xchk_setup_ag_iallocbt,
 		.scrub	= xchk_finobt,
-		.has	= xfs_sb_version_hasfinobt,
+		.has	= xfs_has_finobt,
 		.repair	= xrep_notsupported,
 	},
 	[XFS_SCRUB_TYPE_RMAPBT] = {	/* rmapbt */
 		.type	= ST_PERAG,
 		.setup	= xchk_setup_ag_rmapbt,
 		.scrub	= xchk_rmapbt,
-		.has	= xfs_sb_version_hasrmapbt,
+		.has	= xfs_has_rmapbt,
 		.repair	= xrep_notsupported,
 	},
 	[XFS_SCRUB_TYPE_REFCNTBT] = {	/* refcountbt */
 		.type	= ST_PERAG,
 		.setup	= xchk_setup_ag_refcountbt,
 		.scrub	= xchk_refcountbt,
-		.has	= xfs_sb_version_hasreflink,
+		.has	= xfs_has_reflink,
 		.repair	= xrep_notsupported,
 	},
 	[XFS_SCRUB_TYPE_INODE] = {	/* inode record */
@@ -308,14 +308,14 @@ static const struct xchk_meta_ops meta_scrub_ops[] = {
 		.type	= ST_FS,
 		.setup	= xchk_setup_rt,
 		.scrub	= xchk_rtbitmap,
-		.has	= xfs_sb_version_hasrealtime,
+		.has	= xfs_has_realtime,
 		.repair	= xrep_notsupported,
 	},
 	[XFS_SCRUB_TYPE_RTSUM] = {	/* realtime summary */
 		.type	= ST_FS,
 		.setup	= xchk_setup_rt,
 		.scrub	= xchk_rtsummary,
-		.has	= xfs_sb_version_hasrealtime,
+		.has	= xfs_has_realtime,
 		.repair	= xrep_notsupported,
 	},
 	[XFS_SCRUB_TYPE_UQUOTA] = {	/* user quota */
@@ -383,7 +383,7 @@ xchk_validate_inputs(
 	if (ops->setup == NULL || ops->scrub == NULL)
 		goto out;
 	/* Does this fs even support this type of metadata? */
-	if (ops->has && !ops->has(&mp->m_sb))
+	if (ops->has && !ops->has(mp))
 		goto out;
 
 	error = -EINVAL;
@@ -415,11 +415,11 @@ xchk_validate_inputs(
 	 */
 	if (sm->sm_flags & XFS_SCRUB_IFLAG_REPAIR) {
 		error = -EOPNOTSUPP;
-		if (!xfs_sb_version_hascrc(&mp->m_sb))
+		if (!xfs_has_crc(mp))
 			goto out;
 
 		error = -EROFS;
-		if (mp->m_flags & XFS_MOUNT_RDONLY)
+		if (xfs_is_readonly(mp))
 			goto out;
 	}
 
@@ -464,9 +464,6 @@ xfs_scrub_metadata(
 	struct xfs_scrub		sc = {
 		.file			= file,
 		.sm			= sm,
-		.sa			= {
-			.agno		= NULLAGNUMBER,
-		},
 	};
 	struct xfs_mount		*mp = XFS_I(file_inode(file))->i_mount;
 	int				error = 0;
@@ -480,10 +477,10 @@ xfs_scrub_metadata(
 
 	/* Forbidden if we are shut down or mounted norecovery. */
 	error = -ESHUTDOWN;
-	if (XFS_FORCED_SHUTDOWN(mp))
+	if (xfs_is_shutdown(mp))
 		goto out;
 	error = -ENOTRECOVERABLE;
-	if (mp->m_flags & XFS_MOUNT_NORECOVERY)
+	if (xfs_has_norecovery(mp))
 		goto out;
 
 	error = xchk_validate_inputs(mp, sm);
diff --git a/fs/xfs/scrub/scrub.h b/fs/xfs/scrub/scrub.h
index 08a483cb46e2..80e5026bba44 100644
--- a/fs/xfs/scrub/scrub.h
+++ b/fs/xfs/scrub/scrub.h
@@ -27,7 +27,7 @@ struct xchk_meta_ops {
 	int		(*repair)(struct xfs_scrub *);
 
 	/* Decide if we even have this piece of metadata. */
-	bool		(*has)(struct xfs_sb *);
+	bool		(*has)(struct xfs_mount *);
 
 	/* type describing required/allowed inputs */
 	enum xchk_type	type;
@@ -35,7 +35,6 @@ struct xchk_meta_ops {
 
 /* Buffer pointers and btree cursors for an entire AG. */
 struct xchk_ag {
-	xfs_agnumber_t		agno;
 	struct xfs_perag	*pag;
 
 	/* AG btree roots */
diff --git a/fs/xfs/scrub/trace.c b/fs/xfs/scrub/trace.c
index 03882a605a3c..c0ef53fe6611 100644
--- a/fs/xfs/scrub/trace.c
+++ b/fs/xfs/scrub/trace.c
@@ -22,11 +22,11 @@ xchk_btree_cur_fsbno(
 	int			level)
 {
 	if (level < cur->bc_nlevels && cur->bc_bufs[level])
-		return XFS_DADDR_TO_FSB(cur->bc_mp, cur->bc_bufs[level]->b_bn);
-	else if (level == cur->bc_nlevels - 1 &&
-		 cur->bc_flags & XFS_BTREE_LONG_PTRS)
+		return XFS_DADDR_TO_FSB(cur->bc_mp,
+				xfs_buf_daddr(cur->bc_bufs[level]));
+	if (level == cur->bc_nlevels - 1 && cur->bc_flags & XFS_BTREE_LONG_PTRS)
 		return XFS_INO_TO_FSB(cur->bc_mp, cur->bc_ino.ip->i_ino);
-	else if (!(cur->bc_flags & XFS_BTREE_LONG_PTRS))
+	if (!(cur->bc_flags & XFS_BTREE_LONG_PTRS))
 		return XFS_AGB_TO_FSB(cur->bc_mp, cur->bc_ag.pag->pag_agno, 0);
 	return NULLFSBLOCK;
 }
diff --git a/fs/xfs/scrub/trace.h b/fs/xfs/scrub/trace.h
index e46f5cef90da..a7bbb84f91a7 100644
--- a/fs/xfs/scrub/trace.h
+++ b/fs/xfs/scrub/trace.h
@@ -2,6 +2,10 @@
 /*
  * Copyright (C) 2017 Oracle.  All Rights Reserved.
  * Author: Darrick J. Wong <darrick.wong@oracle.com>
+ *
+ * NOTE: none of these tracepoints shall be considered a stable kernel ABI
+ * as they can change at any time.  See xfs_trace.h for documentation of
+ * specific units found in tracepoint output.
  */
 #undef TRACE_SYSTEM
 #define TRACE_SYSTEM xfs_scrub
@@ -79,6 +83,16 @@ TRACE_DEFINE_ENUM(XFS_SCRUB_TYPE_FSCOUNTERS);
 	{ XFS_SCRUB_TYPE_PQUOTA,	"prjquota" }, \
 	{ XFS_SCRUB_TYPE_FSCOUNTERS,	"fscounters" }
 
+#define XFS_SCRUB_FLAG_STRINGS \
+	{ XFS_SCRUB_IFLAG_REPAIR,		"repair" }, \
+	{ XFS_SCRUB_OFLAG_CORRUPT,		"corrupt" }, \
+	{ XFS_SCRUB_OFLAG_PREEN,		"preen" }, \
+	{ XFS_SCRUB_OFLAG_XFAIL,		"xfail" }, \
+	{ XFS_SCRUB_OFLAG_XCORRUPT,		"xcorrupt" }, \
+	{ XFS_SCRUB_OFLAG_INCOMPLETE,		"incomplete" }, \
+	{ XFS_SCRUB_OFLAG_WARNING,		"warning" }, \
+	{ XFS_SCRUB_OFLAG_NO_REPAIR_NEEDED,	"norepair" }
+
 DECLARE_EVENT_CLASS(xchk_class,
 	TP_PROTO(struct xfs_inode *ip, struct xfs_scrub_metadata *sm,
 		 int error),
@@ -103,14 +117,14 @@ DECLARE_EVENT_CLASS(xchk_class,
 		__entry->flags = sm->sm_flags;
 		__entry->error = error;
 	),
-	TP_printk("dev %d:%d ino 0x%llx type %s agno %u inum %llu gen %u flags 0x%x error %d",
+	TP_printk("dev %d:%d ino 0x%llx type %s agno 0x%x inum 0x%llx gen 0x%x flags (%s) error %d",
 		  MAJOR(__entry->dev), MINOR(__entry->dev),
 		  __entry->ino,
 		  __print_symbolic(__entry->type, XFS_SCRUB_TYPE_STRINGS),
 		  __entry->agno,
 		  __entry->inum,
 		  __entry->gen,
-		  __entry->flags,
+		  __print_flags(__entry->flags, "|", XFS_SCRUB_FLAG_STRINGS),
 		  __entry->error)
 )
 #define DEFINE_SCRUB_EVENT(name) \
@@ -145,7 +159,7 @@ TRACE_EVENT(xchk_op_error,
 		__entry->error = error;
 		__entry->ret_ip = ret_ip;
 	),
-	TP_printk("dev %d:%d type %s agno %u agbno %u error %d ret_ip %pS",
+	TP_printk("dev %d:%d type %s agno 0x%x agbno 0x%x error %d ret_ip %pS",
 		  MAJOR(__entry->dev), MINOR(__entry->dev),
 		  __print_symbolic(__entry->type, XFS_SCRUB_TYPE_STRINGS),
 		  __entry->agno,
@@ -176,10 +190,10 @@ TRACE_EVENT(xchk_file_op_error,
 		__entry->error = error;
 		__entry->ret_ip = ret_ip;
 	),
-	TP_printk("dev %d:%d ino 0x%llx fork %d type %s offset %llu error %d ret_ip %pS",
+	TP_printk("dev %d:%d ino 0x%llx fork %s type %s fileoff 0x%llx error %d ret_ip %pS",
 		  MAJOR(__entry->dev), MINOR(__entry->dev),
 		  __entry->ino,
-		  __entry->whichfork,
+		  __print_symbolic(__entry->whichfork, XFS_WHICHFORK_STRINGS),
 		  __print_symbolic(__entry->type, XFS_SCRUB_TYPE_STRINGS),
 		  __entry->offset,
 		  __entry->error,
@@ -193,29 +207,21 @@ DECLARE_EVENT_CLASS(xchk_block_error_class,
 		__field(dev_t, dev)
 		__field(unsigned int, type)
 		__field(xfs_agnumber_t, agno)
-		__field(xfs_agblock_t, bno)
+		__field(xfs_agblock_t, agbno)
 		__field(void *, ret_ip)
 	),
 	TP_fast_assign(
-		xfs_fsblock_t	fsbno;
-		xfs_agnumber_t	agno;
-		xfs_agblock_t	bno;
-
-		fsbno = XFS_DADDR_TO_FSB(sc->mp, daddr);
-		agno = XFS_FSB_TO_AGNO(sc->mp, fsbno);
-		bno = XFS_FSB_TO_AGBNO(sc->mp, fsbno);
-
 		__entry->dev = sc->mp->m_super->s_dev;
 		__entry->type = sc->sm->sm_type;
-		__entry->agno = agno;
-		__entry->bno = bno;
+		__entry->agno = xfs_daddr_to_agno(sc->mp, daddr);
+		__entry->agbno = xfs_daddr_to_agbno(sc->mp, daddr);
 		__entry->ret_ip = ret_ip;
 	),
-	TP_printk("dev %d:%d type %s agno %u agbno %u ret_ip %pS",
+	TP_printk("dev %d:%d type %s agno 0x%x agbno 0x%x ret_ip %pS",
 		  MAJOR(__entry->dev), MINOR(__entry->dev),
 		  __print_symbolic(__entry->type, XFS_SCRUB_TYPE_STRINGS),
 		  __entry->agno,
-		  __entry->bno,
+		  __entry->agbno,
 		  __entry->ret_ip)
 )
 
@@ -281,10 +287,10 @@ DECLARE_EVENT_CLASS(xchk_fblock_error_class,
 		__entry->offset = offset;
 		__entry->ret_ip = ret_ip;
 	),
-	TP_printk("dev %d:%d ino 0x%llx fork %d type %s offset %llu ret_ip %pS",
+	TP_printk("dev %d:%d ino 0x%llx fork %s type %s fileoff 0x%llx ret_ip %pS",
 		  MAJOR(__entry->dev), MINOR(__entry->dev),
 		  __entry->ino,
-		  __entry->whichfork,
+		  __print_symbolic(__entry->whichfork, XFS_WHICHFORK_STRINGS),
 		  __print_symbolic(__entry->type, XFS_SCRUB_TYPE_STRINGS),
 		  __entry->offset,
 		  __entry->ret_ip)
@@ -346,7 +352,7 @@ TRACE_EVENT(xchk_btree_op_error,
 		__entry->error = error;
 		__entry->ret_ip = ret_ip;
 	),
-	TP_printk("dev %d:%d type %s btree %s level %d ptr %d agno %u agbno %u error %d ret_ip %pS",
+	TP_printk("dev %d:%d type %s btree %s level %d ptr %d agno 0x%x agbno 0x%x error %d ret_ip %pS",
 		  MAJOR(__entry->dev), MINOR(__entry->dev),
 		  __print_symbolic(__entry->type, XFS_SCRUB_TYPE_STRINGS),
 		  __print_symbolic(__entry->btnum, XFS_BTNUM_STRINGS),
@@ -389,10 +395,10 @@ TRACE_EVENT(xchk_ifork_btree_op_error,
 		__entry->error = error;
 		__entry->ret_ip = ret_ip;
 	),
-	TP_printk("dev %d:%d ino 0x%llx fork %d type %s btree %s level %d ptr %d agno %u agbno %u error %d ret_ip %pS",
+	TP_printk("dev %d:%d ino 0x%llx fork %s type %s btree %s level %d ptr %d agno 0x%x agbno 0x%x error %d ret_ip %pS",
 		  MAJOR(__entry->dev), MINOR(__entry->dev),
 		  __entry->ino,
-		  __entry->whichfork,
+		  __print_symbolic(__entry->whichfork, XFS_WHICHFORK_STRINGS),
 		  __print_symbolic(__entry->type, XFS_SCRUB_TYPE_STRINGS),
 		  __print_symbolic(__entry->btnum, XFS_BTNUM_STRINGS),
 		  __entry->level,
@@ -428,7 +434,7 @@ TRACE_EVENT(xchk_btree_error,
 		__entry->ptr = cur->bc_ptrs[level];
 		__entry->ret_ip = ret_ip;
 	),
-	TP_printk("dev %d:%d type %s btree %s level %d ptr %d agno %u agbno %u ret_ip %pS",
+	TP_printk("dev %d:%d type %s btree %s level %d ptr %d agno 0x%x agbno 0x%x ret_ip %pS",
 		  MAJOR(__entry->dev), MINOR(__entry->dev),
 		  __print_symbolic(__entry->type, XFS_SCRUB_TYPE_STRINGS),
 		  __print_symbolic(__entry->btnum, XFS_BTNUM_STRINGS),
@@ -468,10 +474,10 @@ TRACE_EVENT(xchk_ifork_btree_error,
 		__entry->ptr = cur->bc_ptrs[level];
 		__entry->ret_ip = ret_ip;
 	),
-	TP_printk("dev %d:%d ino 0x%llx fork %d type %s btree %s level %d ptr %d agno %u agbno %u ret_ip %pS",
+	TP_printk("dev %d:%d ino 0x%llx fork %s type %s btree %s level %d ptr %d agno 0x%x agbno 0x%x ret_ip %pS",
 		  MAJOR(__entry->dev), MINOR(__entry->dev),
 		  __entry->ino,
-		  __entry->whichfork,
+		  __print_symbolic(__entry->whichfork, XFS_WHICHFORK_STRINGS),
 		  __print_symbolic(__entry->type, XFS_SCRUB_TYPE_STRINGS),
 		  __print_symbolic(__entry->btnum, XFS_BTNUM_STRINGS),
 		  __entry->level,
@@ -507,7 +513,7 @@ DECLARE_EVENT_CLASS(xchk_sbtree_class,
 		__entry->nlevels = cur->bc_nlevels;
 		__entry->ptr = cur->bc_ptrs[level];
 	),
-	TP_printk("dev %d:%d type %s btree %s agno %u agbno %u level %d nlevels %d ptr %d",
+	TP_printk("dev %d:%d type %s btree %s agno 0x%x agbno 0x%x level %d nlevels %d ptr %d",
 		  MAJOR(__entry->dev), MINOR(__entry->dev),
 		  __print_symbolic(__entry->type, XFS_SCRUB_TYPE_STRINGS),
 		  __print_symbolic(__entry->btnum, XFS_BTNUM_STRINGS),
@@ -580,7 +586,7 @@ TRACE_EVENT(xchk_iallocbt_check_cluster,
 		__entry->holemask = holemask;
 		__entry->cluster_ino = cluster_ino;
 	),
-	TP_printk("dev %d:%d agno %d startino %u daddr 0x%llx len %d chunkino %u nr_inodes %u cluster_mask 0x%x holemask 0x%x cluster_ino %u",
+	TP_printk("dev %d:%d agno 0x%x startino 0x%x daddr 0x%llx bbcount 0x%x chunkino 0x%x nr_inodes %u cluster_mask 0x%x holemask 0x%x cluster_ino 0x%x",
 		  MAJOR(__entry->dev), MINOR(__entry->dev),
 		  __entry->agno,
 		  __entry->startino,
@@ -670,7 +676,7 @@ DECLARE_EVENT_CLASS(xrep_extent_class,
 		__entry->agbno = agbno;
 		__entry->len = len;
 	),
-	TP_printk("dev %d:%d agno %u agbno %u len %u",
+	TP_printk("dev %d:%d agno 0x%x agbno 0x%x fsbcount 0x%x",
 		  MAJOR(__entry->dev), MINOR(__entry->dev),
 		  __entry->agno,
 		  __entry->agbno,
@@ -707,7 +713,7 @@ DECLARE_EVENT_CLASS(xrep_rmap_class,
 		__entry->offset = offset;
 		__entry->flags = flags;
 	),
-	TP_printk("dev %d:%d agno %u agbno %u len %u owner %lld offset %llu flags 0x%x",
+	TP_printk("dev %d:%d agno 0x%x agbno 0x%x fsbcount 0x%x owner 0x%llx fileoff 0x%llx flags 0x%x",
 		  MAJOR(__entry->dev), MINOR(__entry->dev),
 		  __entry->agno,
 		  __entry->agbno,
@@ -745,7 +751,7 @@ TRACE_EVENT(xrep_refcount_extent_fn,
 		__entry->blockcount = irec->rc_blockcount;
 		__entry->refcount = irec->rc_refcount;
 	),
-	TP_printk("dev %d:%d agno %u agbno %u len %u refcount %u",
+	TP_printk("dev %d:%d agno 0x%x agbno 0x%x fsbcount 0x%x refcount %u",
 		  MAJOR(__entry->dev), MINOR(__entry->dev),
 		  __entry->agno,
 		  __entry->startblock,
@@ -769,7 +775,7 @@ TRACE_EVENT(xrep_init_btblock,
 		__entry->agbno = agbno;
 		__entry->btnum = btnum;
 	),
-	TP_printk("dev %d:%d agno %u agbno %u btree %s",
+	TP_printk("dev %d:%d agno 0x%x agbno 0x%x btree %s",
 		  MAJOR(__entry->dev), MINOR(__entry->dev),
 		  __entry->agno,
 		  __entry->agbno,
@@ -793,7 +799,7 @@ TRACE_EVENT(xrep_findroot_block,
 		__entry->magic = magic;
 		__entry->level = level;
 	),
-	TP_printk("dev %d:%d agno %u agbno %u magic 0x%x level %u",
+	TP_printk("dev %d:%d agno 0x%x agbno 0x%x magic 0x%x level %u",
 		  MAJOR(__entry->dev), MINOR(__entry->dev),
 		  __entry->agno,
 		  __entry->agbno,
@@ -821,7 +827,7 @@ TRACE_EVENT(xrep_calc_ag_resblks,
 		__entry->freelen = freelen;
 		__entry->usedlen = usedlen;
 	),
-	TP_printk("dev %d:%d agno %d icount %u aglen %u freelen %u usedlen %u",
+	TP_printk("dev %d:%d agno 0x%x icount %u aglen %u freelen %u usedlen %u",
 		  MAJOR(__entry->dev), MINOR(__entry->dev),
 		  __entry->agno,
 		  __entry->icount,
@@ -850,7 +856,7 @@ TRACE_EVENT(xrep_calc_ag_resblks_btsize,
 		__entry->rmapbt_sz = rmapbt_sz;
 		__entry->refcbt_sz = refcbt_sz;
 	),
-	TP_printk("dev %d:%d agno %d bno %u ino %u rmap %u refcount %u",
+	TP_printk("dev %d:%d agno 0x%x bnobt %u inobt %u rmapbt %u refcountbt %u",
 		  MAJOR(__entry->dev), MINOR(__entry->dev),
 		  __entry->agno,
 		  __entry->bnobt_sz,
@@ -894,7 +900,7 @@ TRACE_EVENT(xrep_ialloc_insert,
 		__entry->freecount = freecount;
 		__entry->freemask = freemask;
 	),
-	TP_printk("dev %d:%d agno %d startino %u holemask 0x%x count %u freecount %u freemask 0x%llx",
+	TP_printk("dev %d:%d agno 0x%x startino 0x%x holemask 0x%x count %u freecount %u freemask 0x%llx",
 		  MAJOR(__entry->dev), MINOR(__entry->dev),
 		  __entry->agno,
 		  __entry->startino,
diff --git a/fs/xfs/xfs_acl.c b/fs/xfs/xfs_acl.c
index d02bef24b32b..5c52ee869272 100644
--- a/fs/xfs/xfs_acl.c
+++ b/fs/xfs/xfs_acl.c
@@ -125,7 +125,7 @@ xfs_acl_to_disk(struct xfs_acl *aclp, const struct posix_acl *acl)
 }
 
 struct posix_acl *
-xfs_get_acl(struct inode *inode, int type)
+xfs_get_acl(struct inode *inode, int type, bool rcu)
 {
 	struct xfs_inode	*ip = XFS_I(inode);
 	struct xfs_mount	*mp = ip->i_mount;
@@ -137,6 +137,9 @@ xfs_get_acl(struct inode *inode, int type)
 	};
 	int			error;
 
+	if (rcu)
+		return ERR_PTR(-ECHILD);
+
 	trace_xfs_get_acl(ip);
 
 	switch (type) {
@@ -232,7 +235,7 @@ xfs_acl_set_mode(
 	inode->i_ctime = current_time(inode);
 	xfs_trans_log_inode(tp, ip, XFS_ILOG_CORE);
 
-	if (mp->m_flags & XFS_MOUNT_WSYNC)
+	if (xfs_has_wsync(mp))
 		xfs_trans_set_sync(tp);
 	return xfs_trans_commit(tp);
 }
diff --git a/fs/xfs/xfs_acl.h b/fs/xfs/xfs_acl.h
index 7bdb3a4ed798..bb6abdcb265d 100644
--- a/fs/xfs/xfs_acl.h
+++ b/fs/xfs/xfs_acl.h
@@ -10,13 +10,13 @@ struct inode;
 struct posix_acl;
 
 #ifdef CONFIG_XFS_POSIX_ACL
-extern struct posix_acl *xfs_get_acl(struct inode *inode, int type);
+extern struct posix_acl *xfs_get_acl(struct inode *inode, int type, bool rcu);
 extern int xfs_set_acl(struct user_namespace *mnt_userns, struct inode *inode,
 		       struct posix_acl *acl, int type);
 extern int __xfs_set_acl(struct inode *inode, struct posix_acl *acl, int type);
 void xfs_forget_acl(struct inode *inode, const char *name);
 #else
-static inline struct posix_acl *xfs_get_acl(struct inode *inode, int type)
+static inline struct posix_acl *xfs_get_acl(struct inode *inode, int type, bool rcu)
 {
 	return NULL;
 }
diff --git a/fs/xfs/xfs_aops.c b/fs/xfs/xfs_aops.c
index cb4e0fcf4c76..34fc6148032a 100644
--- a/fs/xfs/xfs_aops.c
+++ b/fs/xfs/xfs_aops.c
@@ -97,7 +97,7 @@ xfs_end_ioend(
 	/*
 	 * Just clean up the in-memory structures if the fs has been shut down.
 	 */
-	if (XFS_FORCED_SHUTDOWN(ip->i_mount)) {
+	if (xfs_is_shutdown(ip->i_mount)) {
 		error = -EIO;
 		goto done;
 	}
@@ -260,7 +260,7 @@ xfs_map_blocks(
 	int			retries = 0;
 	int			error = 0;
 
-	if (XFS_FORCED_SHUTDOWN(mp))
+	if (xfs_is_shutdown(mp))
 		return -EIO;
 
 	/*
@@ -440,7 +440,7 @@ xfs_discard_page(
 	xfs_fileoff_t		pageoff_fsb = XFS_B_TO_FSBT(mp, pageoff);
 	int			error;
 
-	if (XFS_FORCED_SHUTDOWN(mp))
+	if (xfs_is_shutdown(mp))
 		goto out_invalidate;
 
 	xfs_alert_ratelimited(mp,
@@ -449,7 +449,7 @@ xfs_discard_page(
 
 	error = xfs_bmap_punch_delalloc_range(ip, start_fsb,
 			i_blocks_per_page(inode, page) - pageoff_fsb);
-	if (error && !XFS_FORCED_SHUTDOWN(mp))
+	if (error && !xfs_is_shutdown(mp))
 		xfs_alert(mp, "page discard unable to remove delalloc mapping.");
 out_invalidate:
 	iomap_invalidatepage(page, pageoff, PAGE_SIZE - pageoff);
@@ -462,22 +462,6 @@ static const struct iomap_writeback_ops xfs_writeback_ops = {
 };
 
 STATIC int
-xfs_vm_writepage(
-	struct page		*page,
-	struct writeback_control *wbc)
-{
-	struct xfs_writepage_ctx wpc = { };
-
-	if (WARN_ON_ONCE(current->journal_info)) {
-		redirty_page_for_writepage(wbc, page);
-		unlock_page(page);
-		return 0;
-	}
-
-	return iomap_writepage(page, wbc, &wpc.ctx, &xfs_writeback_ops);
-}
-
-STATIC int
 xfs_vm_writepages(
 	struct address_space	*mapping,
 	struct writeback_control *wbc)
@@ -559,7 +543,6 @@ xfs_iomap_swapfile_activate(
 const struct address_space_operations xfs_address_space_operations = {
 	.readpage		= xfs_vm_readpage,
 	.readahead		= xfs_vm_readahead,
-	.writepage		= xfs_vm_writepage,
 	.writepages		= xfs_vm_writepages,
 	.set_page_dirty		= __set_page_dirty_nobuffers,
 	.releasepage		= iomap_releasepage,
diff --git a/fs/xfs/xfs_attr_inactive.c b/fs/xfs/xfs_attr_inactive.c
index aaa7e66c42d7..2b5da6218977 100644
--- a/fs/xfs/xfs_attr_inactive.c
+++ b/fs/xfs/xfs_attr_inactive.c
@@ -151,7 +151,7 @@ xfs_attr3_node_inactive(
 	}
 
 	xfs_da3_node_hdr_from_disk(dp->i_mount, &ichdr, bp->b_addr);
-	parent_blkno = bp->b_bn;
+	parent_blkno = xfs_buf_daddr(bp);
 	if (!ichdr.count) {
 		xfs_trans_brelse(*trans, bp);
 		return 0;
@@ -177,7 +177,7 @@ xfs_attr3_node_inactive(
 			return error;
 
 		/* save for re-read later */
-		child_blkno = XFS_BUF_ADDR(child_bp);
+		child_blkno = xfs_buf_daddr(child_bp);
 
 		/*
 		 * Invalidate the subtree, however we have to.
@@ -271,7 +271,7 @@ xfs_attr3_root_inactive(
 	error = xfs_da3_node_read(*trans, dp, 0, &bp, XFS_ATTR_FORK);
 	if (error)
 		return error;
-	blkno = bp->b_bn;
+	blkno = xfs_buf_daddr(bp);
 
 	/*
 	 * Invalidate the tree, even if the "tree" is only a single leaf block.
diff --git a/fs/xfs/xfs_attr_list.c b/fs/xfs/xfs_attr_list.c
index 25dcc98d50e6..2d1e5134cebe 100644
--- a/fs/xfs/xfs_attr_list.c
+++ b/fs/xfs/xfs_attr_list.c
@@ -529,7 +529,7 @@ xfs_attr_list(
 
 	XFS_STATS_INC(dp->i_mount, xs_attr_list);
 
-	if (XFS_FORCED_SHUTDOWN(dp->i_mount))
+	if (xfs_is_shutdown(dp->i_mount))
 		return -EIO;
 
 	lock_mode = xfs_ilock_attr_map_shared(dp);
diff --git a/fs/xfs/xfs_bmap_item.c b/fs/xfs/xfs_bmap_item.c
index e3a691937e92..03159970133f 100644
--- a/fs/xfs/xfs_bmap_item.c
+++ b/fs/xfs/xfs_bmap_item.c
@@ -24,7 +24,6 @@
 #include "xfs_error.h"
 #include "xfs_log_priv.h"
 #include "xfs_log_recover.h"
-#include "xfs_quota.h"
 
 kmem_zone_t	*xfs_bui_zone;
 kmem_zone_t	*xfs_bud_zone;
@@ -487,18 +486,10 @@ xfs_bui_item_recover(
 			XFS_ATTR_FORK : XFS_DATA_FORK;
 	bui_type = bmap->me_flags & XFS_BMAP_EXTENT_TYPE_MASK;
 
-	/* Grab the inode. */
-	error = xfs_iget(mp, NULL, bmap->me_owner, 0, 0, &ip);
+	error = xlog_recover_iget(mp, bmap->me_owner, &ip);
 	if (error)
 		return error;
 
-	error = xfs_qm_dqattach(ip);
-	if (error)
-		goto err_rele;
-
-	if (VFS_I(ip)->i_nlink == 0)
-		xfs_iflags_set(ip, XFS_IRECOVERY);
-
 	/* Allocate transaction and do the work. */
 	error = xfs_trans_alloc(mp, &M_RES(mp)->tr_itruncate,
 			XFS_EXTENTADD_SPACE_RES(mp, XFS_DATA_FORK), 0, 0, &tp);
@@ -522,6 +513,9 @@ xfs_bui_item_recover(
 	error = xfs_trans_log_finish_bmap_update(tp, budp, bui_type, ip,
 			whichfork, bmap->me_startoff, bmap->me_startblock,
 			&count, state);
+	if (error == -EFSCORRUPTED)
+		XFS_CORRUPTION_ERROR(__func__, XFS_ERRLEVEL_LOW, mp, bmap,
+				sizeof(*bmap));
 	if (error)
 		goto err_cancel;
 
diff --git a/fs/xfs/xfs_bmap_util.c b/fs/xfs/xfs_bmap_util.c
index 1cd3f940fa6a..73a36b7be3bd 100644
--- a/fs/xfs/xfs_bmap_util.c
+++ b/fs/xfs/xfs_bmap_util.c
@@ -731,7 +731,7 @@ xfs_free_eofblocks(
 
 	error = xfs_trans_alloc(mp, &M_RES(mp)->tr_itruncate, 0, 0, 0, &tp);
 	if (error) {
-		ASSERT(XFS_FORCED_SHUTDOWN(mp));
+		ASSERT(xfs_is_shutdown(mp));
 		return error;
 	}
 
@@ -789,7 +789,7 @@ xfs_alloc_file_space(
 
 	trace_xfs_alloc_file_space(ip);
 
-	if (XFS_FORCED_SHUTDOWN(mp))
+	if (xfs_is_shutdown(mp))
 		return -EIO;
 
 	error = xfs_qm_dqattach(ip);
@@ -1282,7 +1282,7 @@ xfs_swap_extents_check_format(
 	 * If we have to use the (expensive) rmap swap method, we can
 	 * handle any number of extents and any format.
 	 */
-	if (xfs_sb_version_hasrmapbt(&ip->i_mount->m_sb))
+	if (xfs_has_rmapbt(ip->i_mount))
 		return 0;
 
 	/*
@@ -1516,7 +1516,7 @@ xfs_swap_extent_forks(
 	 * event of a crash. Set the owner change log flags now and leave the
 	 * bmbt scan as the last step.
 	 */
-	if (xfs_sb_version_has_v3inode(&ip->i_mount->m_sb)) {
+	if (xfs_has_v3inodes(ip->i_mount)) {
 		if (ip->i_df.if_format == XFS_DINODE_FMT_BTREE)
 			(*target_log_flags) |= XFS_ILOG_DOWNER;
 		if (tip->i_df.if_format == XFS_DINODE_FMT_BTREE)
@@ -1553,7 +1553,7 @@ xfs_swap_extent_forks(
 		(*src_log_flags) |= XFS_ILOG_DEXT;
 		break;
 	case XFS_DINODE_FMT_BTREE:
-		ASSERT(!xfs_sb_version_has_v3inode(&ip->i_mount->m_sb) ||
+		ASSERT(!xfs_has_v3inodes(ip->i_mount) ||
 		       (*src_log_flags & XFS_ILOG_DOWNER));
 		(*src_log_flags) |= XFS_ILOG_DBROOT;
 		break;
@@ -1565,7 +1565,7 @@ xfs_swap_extent_forks(
 		break;
 	case XFS_DINODE_FMT_BTREE:
 		(*target_log_flags) |= XFS_ILOG_DBROOT;
-		ASSERT(!xfs_sb_version_has_v3inode(&ip->i_mount->m_sb) ||
+		ASSERT(!xfs_has_v3inodes(ip->i_mount) ||
 		       (*target_log_flags & XFS_ILOG_DOWNER));
 		break;
 	}
@@ -1678,7 +1678,7 @@ xfs_swap_extents(
 	 * a block reservation because it's really just a remap operation
 	 * performed with log redo items!
 	 */
-	if (xfs_sb_version_hasrmapbt(&mp->m_sb)) {
+	if (xfs_has_rmapbt(mp)) {
 		int		w = XFS_DATA_FORK;
 		uint32_t	ipnext = ip->i_df.if_nextents;
 		uint32_t	tipnext	= tip->i_df.if_nextents;
@@ -1759,7 +1759,7 @@ xfs_swap_extents(
 	src_log_flags = XFS_ILOG_CORE;
 	target_log_flags = XFS_ILOG_CORE;
 
-	if (xfs_sb_version_hasrmapbt(&mp->m_sb))
+	if (xfs_has_rmapbt(mp))
 		error = xfs_swap_extent_rmap(&tp, ip, tip);
 	else
 		error = xfs_swap_extent_forks(tp, ip, tip, &src_log_flags,
@@ -1778,7 +1778,7 @@ xfs_swap_extents(
 	}
 
 	/* Swap the cow forks. */
-	if (xfs_sb_version_hasreflink(&mp->m_sb)) {
+	if (xfs_has_reflink(mp)) {
 		ASSERT(!ip->i_cowfp ||
 		       ip->i_cowfp->if_format == XFS_DINODE_FMT_EXTENTS);
 		ASSERT(!tip->i_cowfp ||
@@ -1820,7 +1820,7 @@ xfs_swap_extents(
 	 * If this is a synchronous mount, make sure that the
 	 * transaction goes to disk before returning to the user.
 	 */
-	if (mp->m_flags & XFS_MOUNT_WSYNC)
+	if (xfs_has_wsync(mp))
 		xfs_trans_set_sync(tp);
 
 	error = xfs_trans_commit(tp);
diff --git a/fs/xfs/xfs_buf.c b/fs/xfs/xfs_buf.c
index 3ab73567a0f5..5fa6cd947dd4 100644
--- a/fs/xfs/xfs_buf.c
+++ b/fs/xfs/xfs_buf.c
@@ -251,7 +251,7 @@ _xfs_buf_alloc(
 		return error;
 	}
 
-	bp->b_bn = map[0].bm_bn;
+	bp->b_rhash_key = map[0].bm_bn;
 	bp->b_length = 0;
 	for (i = 0; i < nmaps; i++) {
 		bp->b_maps[i].bm_bn = map[i].bm_bn;
@@ -315,7 +315,6 @@ xfs_buf_alloc_kmem(
 	struct xfs_buf	*bp,
 	xfs_buf_flags_t	flags)
 {
-	int		align_mask = xfs_buftarg_dma_alignment(bp->b_target);
 	xfs_km_flags_t	kmflag_mask = KM_NOFS;
 	size_t		size = BBTOB(bp->b_length);
 
@@ -323,7 +322,7 @@ xfs_buf_alloc_kmem(
 	if (!(flags & XBF_READ))
 		kmflag_mask |= KM_ZERO;
 
-	bp->b_addr = kmem_alloc_io(size, align_mask, kmflag_mask);
+	bp->b_addr = kmem_alloc(size, kmflag_mask);
 	if (!bp->b_addr)
 		return -ENOMEM;
 
@@ -460,7 +459,7 @@ _xfs_buf_obj_cmp(
 	 */
 	BUILD_BUG_ON(offsetof(struct xfs_buf_map, bm_bn) != 0);
 
-	if (bp->b_bn != map->bm_bn)
+	if (bp->b_rhash_key != map->bm_bn)
 		return 1;
 
 	if (unlikely(bp->b_length != map->bm_len)) {
@@ -482,7 +481,7 @@ static const struct rhashtable_params xfs_buf_hash_params = {
 	.min_size		= 32,	/* empty AGs have minimal footprint */
 	.nelem_hint		= 16,
 	.key_len		= sizeof(xfs_daddr_t),
-	.key_offset		= offsetof(struct xfs_buf, b_bn),
+	.key_offset		= offsetof(struct xfs_buf, b_rhash_key),
 	.head_offset		= offsetof(struct xfs_buf, b_rhash_head),
 	.automatic_shrinking	= true,
 	.obj_cmpfn		= _xfs_buf_obj_cmp,
@@ -814,7 +813,7 @@ xfs_buf_read_map(
 	 * buffer.
 	 */
 	if (error) {
-		if (!XFS_FORCED_SHUTDOWN(target->bt_mount))
+		if (!xfs_is_shutdown(target->bt_mount))
 			xfs_buf_ioerror_alert(bp, fa);
 
 		bp->b_flags &= ~XBF_DONE;
@@ -854,7 +853,9 @@ xfs_buf_readahead_map(
 
 /*
  * Read an uncached buffer from disk. Allocates and returns a locked
- * buffer containing the disk contents or nothing.
+ * buffer containing the disk contents or nothing. Uncached buffers always have
+ * a cache index of XFS_BUF_DADDR_NULL so we can easily determine if the buffer
+ * is cached or uncached during fault diagnosis.
  */
 int
 xfs_buf_read_uncached(
@@ -876,7 +877,7 @@ xfs_buf_read_uncached(
 
 	/* set up the buffer for a read IO */
 	ASSERT(bp->b_map_count == 1);
-	bp->b_bn = XFS_BUF_DADDR_NULL;  /* always null for uncached buffers */
+	bp->b_rhash_key = XFS_BUF_DADDR_NULL;
 	bp->b_maps[0].bm_bn = daddr;
 	bp->b_flags |= XBF_READ;
 	bp->b_ops = ops;
@@ -1145,7 +1146,7 @@ xfs_buf_ioerror_permanent(
 		return true;
 
 	/* At unmount we may treat errors differently */
-	if ((mp->m_flags & XFS_MOUNT_UNMOUNTING) && mp->m_fail_unmount)
+	if (xfs_is_unmounting(mp) && mp->m_fail_unmount)
 		return true;
 
 	return false;
@@ -1179,7 +1180,7 @@ xfs_buf_ioend_handle_error(
 	 * If we've already decided to shutdown the filesystem because of I/O
 	 * errors, there's no point in giving this a retry.
 	 */
-	if (XFS_FORCED_SHUTDOWN(mp))
+	if (xfs_is_shutdown(mp))
 		goto out_stale;
 
 	xfs_buf_ioerror_alert_ratelimited(bp);
@@ -1336,7 +1337,7 @@ xfs_buf_ioerror_alert(
 {
 	xfs_buf_alert_ratelimited(bp, "XFS: metadata IO error",
 		"metadata I/O error in \"%pS\" at daddr 0x%llx len %d error %d",
-				  func, (uint64_t)XFS_BUF_ADDR(bp),
+				  func, (uint64_t)xfs_buf_daddr(bp),
 				  bp->b_length, -bp->b_error);
 }
 
@@ -1514,17 +1515,18 @@ _xfs_buf_ioapply(
 						   SHUTDOWN_CORRUPT_INCORE);
 				return;
 			}
-		} else if (bp->b_bn != XFS_BUF_DADDR_NULL) {
+		} else if (bp->b_rhash_key != XFS_BUF_DADDR_NULL) {
 			struct xfs_mount *mp = bp->b_mount;
 
 			/*
 			 * non-crc filesystems don't attach verifiers during
 			 * log recovery, so don't warn for such filesystems.
 			 */
-			if (xfs_sb_version_hascrc(&mp->m_sb)) {
+			if (xfs_has_crc(mp)) {
 				xfs_warn(mp,
 					"%s: no buf ops on daddr 0x%llx len %d",
-					__func__, bp->b_bn, bp->b_length);
+					__func__, xfs_buf_daddr(bp),
+					bp->b_length);
 				xfs_hex_dump(bp->b_addr,
 						XFS_CORRUPTION_DUMP_LEN);
 				dump_stack();
@@ -1592,7 +1594,7 @@ __xfs_buf_submit(
 	ASSERT(!(bp->b_flags & _XBF_DELWRI_Q));
 
 	/* on shutdown we stale and complete the buffer immediately */
-	if (XFS_FORCED_SHUTDOWN(bp->b_mount)) {
+	if (xfs_is_shutdown(bp->b_mount)) {
 		xfs_buf_ioend_fail(bp);
 		return -EIO;
 	}
@@ -1794,7 +1796,7 @@ xfs_buftarg_drain(
 				xfs_buf_alert_ratelimited(bp,
 					"XFS: Corruption Alert",
 "Corruption Alert: Buffer at daddr 0x%llx had permanent write failures!",
-					(long long)bp->b_bn);
+					(long long)xfs_buf_daddr(bp));
 			}
 			xfs_buf_rele(bp);
 		}
@@ -1809,7 +1811,7 @@ xfs_buftarg_drain(
 	 * down the fs.
 	 */
 	if (write_fail) {
-		ASSERT(XFS_FORCED_SHUTDOWN(btp->bt_mount));
+		ASSERT(xfs_is_shutdown(btp->bt_mount));
 		xfs_alert(btp->bt_mount,
 	      "Please run xfs_repair to determine the extent of the problem.");
 	}
@@ -2302,7 +2304,7 @@ xfs_verify_magic(
 	struct xfs_mount	*mp = bp->b_mount;
 	int			idx;
 
-	idx = xfs_sb_version_hascrc(&mp->m_sb);
+	idx = xfs_has_crc(mp);
 	if (WARN_ON(!bp->b_ops || !bp->b_ops->magic[idx]))
 		return false;
 	return dmagic == bp->b_ops->magic[idx];
@@ -2320,7 +2322,7 @@ xfs_verify_magic16(
 	struct xfs_mount	*mp = bp->b_mount;
 	int			idx;
 
-	idx = xfs_sb_version_hascrc(&mp->m_sb);
+	idx = xfs_has_crc(mp);
 	if (WARN_ON(!bp->b_ops || !bp->b_ops->magic16[idx]))
 		return false;
 	return dmagic == bp->b_ops->magic16[idx];
diff --git a/fs/xfs/xfs_buf.h b/fs/xfs/xfs_buf.h
index 464dc548fa23..6b0200b8007d 100644
--- a/fs/xfs/xfs_buf.h
+++ b/fs/xfs/xfs_buf.h
@@ -133,7 +133,8 @@ struct xfs_buf {
 	 * fast-path on locking.
 	 */
 	struct rhash_head	b_rhash_head;	/* pag buffer hash node */
-	xfs_daddr_t		b_bn;		/* block number of buffer */
+
+	xfs_daddr_t		b_rhash_key;	/* buffer cache index */
 	int			b_length;	/* size of buffer in BBs */
 	atomic_t		b_hold;		/* reference count */
 	atomic_t		b_lru_ref;	/* lru reclaim ref count */
@@ -296,18 +297,10 @@ extern int xfs_buf_delwri_pushbuf(struct xfs_buf *, struct list_head *);
 extern int xfs_buf_init(void);
 extern void xfs_buf_terminate(void);
 
-/*
- * These macros use the IO block map rather than b_bn. b_bn is now really
- * just for the buffer cache index for cached buffers. As IO does not use b_bn
- * anymore, uncached buffers do not use b_bn at all and hence must modify the IO
- * map directly. Uncached buffers are not allowed to be discontiguous, so this
- * is safe to do.
- *
- * In future, uncached buffers will pass the block number directly to the io
- * request function and hence these macros will go away at that point.
- */
-#define XFS_BUF_ADDR(bp)		((bp)->b_maps[0].bm_bn)
-#define XFS_BUF_SET_ADDR(bp, bno)	((bp)->b_maps[0].bm_bn = (xfs_daddr_t)(bno))
+static inline xfs_daddr_t xfs_buf_daddr(struct xfs_buf *bp)
+{
+	return bp->b_maps[0].bm_bn;
+}
 
 void xfs_buf_set_ref(struct xfs_buf *bp, int lru_ref);
 
@@ -355,12 +348,6 @@ extern int xfs_setsize_buftarg(struct xfs_buftarg *, unsigned int);
 #define xfs_getsize_buftarg(buftarg)	block_size((buftarg)->bt_bdev)
 #define xfs_readonly_buftarg(buftarg)	bdev_read_only((buftarg)->bt_bdev)
 
-static inline int
-xfs_buftarg_dma_alignment(struct xfs_buftarg *bt)
-{
-	return queue_dma_alignment(bt->bt_bdev->bd_disk->queue);
-}
-
 int xfs_buf_reverify(struct xfs_buf *bp, const struct xfs_buf_ops *ops);
 bool xfs_verify_magic(struct xfs_buf *bp, __be32 dmagic);
 bool xfs_verify_magic16(struct xfs_buf *bp, __be16 dmagic);
diff --git a/fs/xfs/xfs_buf_item.c b/fs/xfs/xfs_buf_item.c
index 2828ce45b701..b1ab100c09e1 100644
--- a/fs/xfs/xfs_buf_item.c
+++ b/fs/xfs/xfs_buf_item.c
@@ -428,7 +428,7 @@ xfs_buf_item_format(
 	 * occurs during recovery.
 	 */
 	if (bip->bli_flags & XFS_BLI_INODE_BUF) {
-		if (xfs_sb_version_has_v3inode(&lip->li_mountp->m_sb) ||
+		if (xfs_has_v3inodes(lip->li_mountp) ||
 		    !((bip->bli_flags & XFS_BLI_INODE_ALLOC_BUF) &&
 		      xfs_log_item_in_current_chkpt(lip)))
 			bip->__bli_format.blf_flags |= XFS_BLF_INODE_BUF;
@@ -581,7 +581,7 @@ xfs_buf_item_push(
 	if (bp->b_flags & XBF_WRITE_FAIL) {
 		xfs_buf_alert_ratelimited(bp, "XFS: Failing async write",
 	    "Failing async write on buffer block 0x%llx. Retrying async write.",
-					  (long long)bp->b_bn);
+					  (long long)xfs_buf_daddr(bp));
 	}
 
 	if (!xfs_buf_delwri_queue(bp, buffer_list))
@@ -616,7 +616,7 @@ xfs_buf_item_put(
 	 * that case, the bli is freed on buffer writeback completion.
 	 */
 	aborted = test_bit(XFS_LI_ABORTED, &lip->li_flags) ||
-		  XFS_FORCED_SHUTDOWN(lip->li_mountp);
+		  xfs_is_shutdown(lip->li_mountp);
 	dirty = bip->bli_flags & XFS_BLI_DIRTY;
 	if (dirty && !aborted)
 		return false;
diff --git a/fs/xfs/xfs_buf_item_recover.c b/fs/xfs/xfs_buf_item_recover.c
index 4775485b4062..a476c7ef5d53 100644
--- a/fs/xfs/xfs_buf_item_recover.c
+++ b/fs/xfs/xfs_buf_item_recover.c
@@ -219,7 +219,7 @@ xlog_recover_validate_buf_type(
 	 * inconsistent state resulting in verification failures. Hence for now
 	 * just avoid the verification stage for non-crc filesystems
 	 */
-	if (!xfs_sb_version_hascrc(&mp->m_sb))
+	if (!xfs_has_crc(mp))
 		return;
 
 	magic32 = be32_to_cpu(*(__be32 *)bp->b_addr);
@@ -497,7 +497,7 @@ xlog_recover_do_reg_buffer(
 			if (fa) {
 				xfs_alert(mp,
 	"dquot corrupt at %pS trying to replay into block 0x%llx",
-					fa, bp->b_bn);
+					fa, xfs_buf_daddr(bp));
 				goto next;
 			}
 		}
@@ -597,7 +597,7 @@ xlog_recover_do_inode_buffer(
 	 * Post recovery validation only works properly on CRC enabled
 	 * filesystems.
 	 */
-	if (xfs_sb_version_hascrc(&mp->m_sb))
+	if (xfs_has_crc(mp))
 		bp->b_ops = &xfs_inode_buf_ops;
 
 	inodes_per_buf = BBTOB(bp->b_length) >> mp->m_sb.sb_inodelog;
@@ -710,7 +710,7 @@ xlog_recover_get_buf_lsn(
 	uint16_t		blft;
 
 	/* v4 filesystems always recover immediately */
-	if (!xfs_sb_version_hascrc(&mp->m_sb))
+	if (!xfs_has_crc(mp))
 		goto recover_immediately;
 
 	/*
@@ -787,7 +787,7 @@ xlog_recover_get_buf_lsn(
 		 * the relevant UUID in the superblock.
 		 */
 		lsn = be64_to_cpu(((struct xfs_dsb *)blk)->sb_lsn);
-		if (xfs_sb_version_hasmetauuid(&mp->m_sb))
+		if (xfs_has_metauuid(mp))
 			uuid = &((struct xfs_dsb *)blk)->sb_meta_uuid;
 		else
 			uuid = &((struct xfs_dsb *)blk)->sb_uuid;
diff --git a/fs/xfs/xfs_dir2_readdir.c b/fs/xfs/xfs_dir2_readdir.c
index da1cc683560c..8310005af00f 100644
--- a/fs/xfs/xfs_dir2_readdir.c
+++ b/fs/xfs/xfs_dir2_readdir.c
@@ -32,7 +32,7 @@ xfs_dir3_get_dtype(
 	struct xfs_mount	*mp,
 	uint8_t			filetype)
 {
-	if (!xfs_sb_version_hasftype(&mp->m_sb))
+	if (!xfs_has_ftype(mp))
 		return DT_UNKNOWN;
 
 	if (filetype >= XFS_DIR3_FT_MAX)
@@ -512,7 +512,7 @@ xfs_readdir(
 
 	trace_xfs_readdir(dp);
 
-	if (XFS_FORCED_SHUTDOWN(dp->i_mount))
+	if (xfs_is_shutdown(dp->i_mount))
 		return -EIO;
 
 	ASSERT(S_ISDIR(VFS_I(dp)->i_mode));
diff --git a/fs/xfs/xfs_discard.c b/fs/xfs/xfs_discard.c
index 736df5660f1f..0191de8ce9ce 100644
--- a/fs/xfs/xfs_discard.c
+++ b/fs/xfs/xfs_discard.c
@@ -169,7 +169,7 @@ xfs_ioc_trim(
 	 * We haven't recovered the log, so we cannot use our bnobt-guided
 	 * storage zapping commands.
 	 */
-	if (mp->m_flags & XFS_MOUNT_NORECOVERY)
+	if (xfs_has_norecovery(mp))
 		return -EROFS;
 
 	if (copy_from_user(&range, urange, sizeof(range)))
diff --git a/fs/xfs/xfs_dquot.c b/fs/xfs/xfs_dquot.c
index ecd5059d6928..c15d61d47a06 100644
--- a/fs/xfs/xfs_dquot.c
+++ b/fs/xfs/xfs_dquot.c
@@ -223,9 +223,9 @@ xfs_qm_init_dquot_blk(
 		d->dd_diskdq.d_version = XFS_DQUOT_VERSION;
 		d->dd_diskdq.d_id = cpu_to_be32(curid);
 		d->dd_diskdq.d_type = type;
-		if (curid > 0 && xfs_sb_version_hasbigtime(&mp->m_sb))
+		if (curid > 0 && xfs_has_bigtime(mp))
 			d->dd_diskdq.d_type |= XFS_DQTYPE_BIGTIME;
-		if (xfs_sb_version_hascrc(&mp->m_sb)) {
+		if (xfs_has_crc(mp)) {
 			uuid_copy(&d->dd_uuid, &mp->m_sb.sb_meta_uuid);
 			xfs_update_cksum((char *)d, sizeof(struct xfs_dqblk),
 					 XFS_DQUOT_CRC_OFF);
@@ -526,7 +526,7 @@ xfs_dquot_check_type(
 	 * expect an exact match for user dquots and for non-root group and
 	 * project dquots.
 	 */
-	if (xfs_sb_version_hascrc(&dqp->q_mount->m_sb) ||
+	if (xfs_has_crc(dqp->q_mount) ||
 	    dqp_type == XFS_DQTYPE_USER || dqp->q_id != 0)
 		return ddqp_type == dqp_type;
 
@@ -847,9 +847,6 @@ xfs_qm_dqget_checks(
 	struct xfs_mount	*mp,
 	xfs_dqtype_t		type)
 {
-	if (WARN_ON_ONCE(!XFS_IS_QUOTA_RUNNING(mp)))
-		return -ESRCH;
-
 	switch (type) {
 	case XFS_DQTYPE_USER:
 		if (!XFS_IS_UQUOTA_ON(mp))
@@ -1222,7 +1219,7 @@ xfs_qm_dqflush_check(
 
 	/* bigtime flag should never be set on root dquots */
 	if (dqp->q_type & XFS_DQTYPE_BIGTIME) {
-		if (!xfs_sb_version_hasbigtime(&dqp->q_mount->m_sb))
+		if (!xfs_has_bigtime(dqp->q_mount))
 			return __this_address;
 		if (dqp->q_id == 0)
 			return __this_address;
@@ -1301,7 +1298,7 @@ xfs_qm_dqflush(
 	 * buffer always has a valid CRC. This ensures there is no possibility
 	 * of a dquot without an up-to-date CRC getting to disk.
 	 */
-	if (xfs_sb_version_hascrc(&mp->m_sb)) {
+	if (xfs_has_crc(mp)) {
 		dqblk->dd_lsn = cpu_to_be64(dqp->q_logitem.qli_item.li_lsn);
 		xfs_update_cksum((char *)dqblk, sizeof(struct xfs_dqblk),
 				 XFS_DQUOT_CRC_OFF);
diff --git a/fs/xfs/xfs_dquot.h b/fs/xfs/xfs_dquot.h
index f642884a6834..6b5e3cf40c8b 100644
--- a/fs/xfs/xfs_dquot.h
+++ b/fs/xfs/xfs_dquot.h
@@ -54,6 +54,16 @@ struct xfs_dquot_res {
 	xfs_qwarncnt_t		warnings;
 };
 
+static inline bool
+xfs_dquot_res_over_limits(
+	const struct xfs_dquot_res	*qres)
+{
+	if ((qres->softlimit && qres->softlimit < qres->reserved) ||
+	    (qres->hardlimit && qres->hardlimit < qres->reserved))
+		return true;
+	return false;
+}
+
 /*
  * The incore dquot structure
  */
diff --git a/fs/xfs/xfs_dquot_item.c b/fs/xfs/xfs_dquot_item.c
index 8ed47b739b6c..6a1aae799cf1 100644
--- a/fs/xfs/xfs_dquot_item.c
+++ b/fs/xfs/xfs_dquot_item.c
@@ -218,137 +218,3 @@ xfs_qm_dquot_logitem_init(
 					&xfs_dquot_item_ops);
 	lp->qli_dquot = dqp;
 }
-
-/*------------------  QUOTAOFF LOG ITEMS  -------------------*/
-
-static inline struct xfs_qoff_logitem *QOFF_ITEM(struct xfs_log_item *lip)
-{
-	return container_of(lip, struct xfs_qoff_logitem, qql_item);
-}
-
-
-/*
- * This returns the number of iovecs needed to log the given quotaoff item.
- * We only need 1 iovec for an quotaoff item.  It just logs the
- * quotaoff_log_format structure.
- */
-STATIC void
-xfs_qm_qoff_logitem_size(
-	struct xfs_log_item	*lip,
-	int			*nvecs,
-	int			*nbytes)
-{
-	*nvecs += 1;
-	*nbytes += sizeof(struct xfs_qoff_logitem);
-}
-
-STATIC void
-xfs_qm_qoff_logitem_format(
-	struct xfs_log_item	*lip,
-	struct xfs_log_vec	*lv)
-{
-	struct xfs_qoff_logitem	*qflip = QOFF_ITEM(lip);
-	struct xfs_log_iovec	*vecp = NULL;
-	struct xfs_qoff_logformat *qlf;
-
-	qlf = xlog_prepare_iovec(lv, &vecp, XLOG_REG_TYPE_QUOTAOFF);
-	qlf->qf_type = XFS_LI_QUOTAOFF;
-	qlf->qf_size = 1;
-	qlf->qf_flags = qflip->qql_flags;
-	xlog_finish_iovec(lv, vecp, sizeof(struct xfs_qoff_logitem));
-}
-
-/*
- * There isn't much you can do to push a quotaoff item.  It is simply
- * stuck waiting for the log to be flushed to disk.
- */
-STATIC uint
-xfs_qm_qoff_logitem_push(
-	struct xfs_log_item	*lip,
-	struct list_head	*buffer_list)
-{
-	return XFS_ITEM_LOCKED;
-}
-
-STATIC xfs_lsn_t
-xfs_qm_qoffend_logitem_committed(
-	struct xfs_log_item	*lip,
-	xfs_lsn_t		lsn)
-{
-	struct xfs_qoff_logitem	*qfe = QOFF_ITEM(lip);
-	struct xfs_qoff_logitem	*qfs = qfe->qql_start_lip;
-
-	xfs_qm_qoff_logitem_relse(qfs);
-
-	kmem_free(lip->li_lv_shadow);
-	kmem_free(qfe);
-	return (xfs_lsn_t)-1;
-}
-
-STATIC void
-xfs_qm_qoff_logitem_release(
-	struct xfs_log_item	*lip)
-{
-	struct xfs_qoff_logitem	*qoff = QOFF_ITEM(lip);
-
-	if (test_bit(XFS_LI_ABORTED, &lip->li_flags)) {
-		if (qoff->qql_start_lip)
-			xfs_qm_qoff_logitem_relse(qoff->qql_start_lip);
-		xfs_qm_qoff_logitem_relse(qoff);
-	}
-}
-
-static const struct xfs_item_ops xfs_qm_qoffend_logitem_ops = {
-	.iop_size	= xfs_qm_qoff_logitem_size,
-	.iop_format	= xfs_qm_qoff_logitem_format,
-	.iop_committed	= xfs_qm_qoffend_logitem_committed,
-	.iop_push	= xfs_qm_qoff_logitem_push,
-	.iop_release	= xfs_qm_qoff_logitem_release,
-};
-
-static const struct xfs_item_ops xfs_qm_qoff_logitem_ops = {
-	.iop_size	= xfs_qm_qoff_logitem_size,
-	.iop_format	= xfs_qm_qoff_logitem_format,
-	.iop_push	= xfs_qm_qoff_logitem_push,
-	.iop_release	= xfs_qm_qoff_logitem_release,
-};
-
-/*
- * Delete the quotaoff intent from the AIL and free it. On success,
- * this should only be called for the start item. It can be used for
- * either on shutdown or abort.
- */
-void
-xfs_qm_qoff_logitem_relse(
-	struct xfs_qoff_logitem	*qoff)
-{
-	struct xfs_log_item	*lip = &qoff->qql_item;
-
-	ASSERT(test_bit(XFS_LI_IN_AIL, &lip->li_flags) ||
-	       test_bit(XFS_LI_ABORTED, &lip->li_flags) ||
-	       XFS_FORCED_SHUTDOWN(lip->li_mountp));
-	xfs_trans_ail_delete(lip, 0);
-	kmem_free(lip->li_lv_shadow);
-	kmem_free(qoff);
-}
-
-/*
- * Allocate and initialize an quotaoff item of the correct quota type(s).
- */
-struct xfs_qoff_logitem *
-xfs_qm_qoff_logitem_init(
-	struct xfs_mount	*mp,
-	struct xfs_qoff_logitem	*start,
-	uint			flags)
-{
-	struct xfs_qoff_logitem	*qf;
-
-	qf = kmem_zalloc(sizeof(struct xfs_qoff_logitem), 0);
-
-	xfs_log_item_init(mp, &qf->qql_item, XFS_LI_QUOTAOFF, start ?
-			&xfs_qm_qoffend_logitem_ops : &xfs_qm_qoff_logitem_ops);
-	qf->qql_item.li_mountp = mp;
-	qf->qql_start_lip = start;
-	qf->qql_flags = flags;
-	return qf;
-}
diff --git a/fs/xfs/xfs_dquot_item.h b/fs/xfs/xfs_dquot_item.h
index 2b86a43d7ce2..794710c24474 100644
--- a/fs/xfs/xfs_dquot_item.h
+++ b/fs/xfs/xfs_dquot_item.h
@@ -9,7 +9,6 @@
 struct xfs_dquot;
 struct xfs_trans;
 struct xfs_mount;
-struct xfs_qoff_logitem;
 
 struct xfs_dq_logitem {
 	struct xfs_log_item	qli_item;	/* common portion */
@@ -17,22 +16,6 @@ struct xfs_dq_logitem {
 	xfs_lsn_t		qli_flush_lsn;	/* lsn at last flush */
 };
 
-struct xfs_qoff_logitem {
-	struct xfs_log_item	qql_item;	/* common portion */
-	struct xfs_qoff_logitem *qql_start_lip;	/* qoff-start logitem, if any */
-	unsigned int		qql_flags;
-};
-
-
 void xfs_qm_dquot_logitem_init(struct xfs_dquot *dqp);
-struct xfs_qoff_logitem	*xfs_qm_qoff_logitem_init(struct xfs_mount *mp,
-		struct xfs_qoff_logitem *start,
-		uint flags);
-void xfs_qm_qoff_logitem_relse(struct xfs_qoff_logitem *);
-struct xfs_qoff_logitem	*xfs_trans_get_qoff_item(struct xfs_trans *tp,
-		struct xfs_qoff_logitem *startqoff,
-		uint flags);
-void xfs_trans_log_quotaoff_item(struct xfs_trans *tp,
-		struct xfs_qoff_logitem *qlp);
 
 #endif	/* __XFS_DQUOT_ITEM_H__ */
diff --git a/fs/xfs/xfs_dquot_item_recover.c b/fs/xfs/xfs_dquot_item_recover.c
index 5875c7e1bd28..8966ba842395 100644
--- a/fs/xfs/xfs_dquot_item_recover.c
+++ b/fs/xfs/xfs_dquot_item_recover.c
@@ -136,7 +136,7 @@ xlog_recover_dquot_commit_pass2(
 	 * If the dquot has an LSN in it, recover the dquot only if it's less
 	 * than the lsn of the transaction we are replaying.
 	 */
-	if (xfs_sb_version_hascrc(&mp->m_sb)) {
+	if (xfs_has_crc(mp)) {
 		struct xfs_dqblk *dqb = (struct xfs_dqblk *)ddq;
 		xfs_lsn_t	lsn = be64_to_cpu(dqb->dd_lsn);
 
@@ -146,7 +146,7 @@ xlog_recover_dquot_commit_pass2(
 	}
 
 	memcpy(ddq, recddq, item->ri_buf[1].i_len);
-	if (xfs_sb_version_hascrc(&mp->m_sb)) {
+	if (xfs_has_crc(mp)) {
 		xfs_update_cksum((char *)ddq, sizeof(struct xfs_dqblk),
 				 XFS_DQUOT_CRC_OFF);
 	}
diff --git a/fs/xfs/xfs_error.c b/fs/xfs/xfs_error.c
index ce3bc1b291a1..81c445e9489b 100644
--- a/fs/xfs/xfs_error.c
+++ b/fs/xfs/xfs_error.c
@@ -371,7 +371,7 @@ xfs_buf_corruption_error(
 
 	xfs_alert_tag(mp, XFS_PTAG_VERIFIER_ERROR,
 		  "Metadata corruption detected at %pS, %s block 0x%llx",
-		  fa, bp->b_ops->name, bp->b_bn);
+		  fa, bp->b_ops->name, xfs_buf_daddr(bp));
 
 	xfs_alert(mp, "Unmount and run xfs_repair");
 
@@ -402,7 +402,7 @@ xfs_buf_verifier_error(
 	xfs_alert_tag(mp, XFS_PTAG_VERIFIER_ERROR,
 		  "Metadata %s detected at %pS, %s block 0x%llx %s",
 		  bp->b_error == -EFSBADCRC ? "CRC error" : "corruption",
-		  fa, bp->b_ops->name, bp->b_bn, name);
+		  fa, bp->b_ops->name, xfs_buf_daddr(bp), name);
 
 	xfs_alert(mp, "Unmount and run xfs_repair");
 
diff --git a/fs/xfs/xfs_error.h b/fs/xfs/xfs_error.h
index 1717b7508356..5735d5ea87ee 100644
--- a/fs/xfs/xfs_error.h
+++ b/fs/xfs/xfs_error.h
@@ -75,4 +75,16 @@ extern int xfs_errortag_clearall(struct xfs_mount *mp);
 #define		XFS_PTAG_FSBLOCK_ZERO		0x00000080
 #define		XFS_PTAG_VERIFIER_ERROR		0x00000100
 
+#define XFS_PTAG_STRINGS \
+	{ XFS_NO_PTAG,			"none" }, \
+	{ XFS_PTAG_IFLUSH,		"iflush" }, \
+	{ XFS_PTAG_LOGRES,		"logres" }, \
+	{ XFS_PTAG_AILDELETE,		"aildelete" }, \
+	{ XFS_PTAG_ERROR_REPORT	,	"error_report" }, \
+	{ XFS_PTAG_SHUTDOWN_CORRUPT,	"corrupt" }, \
+	{ XFS_PTAG_SHUTDOWN_IOERROR,	"ioerror" }, \
+	{ XFS_PTAG_SHUTDOWN_LOGERROR,	"logerror" }, \
+	{ XFS_PTAG_FSBLOCK_ZERO,	"fsb_zero" }, \
+	{ XFS_PTAG_VERIFIER_ERROR,	"verifier" }
+
 #endif	/* __XFS_ERROR_H__ */
diff --git a/fs/xfs/xfs_export.c b/fs/xfs/xfs_export.c
index 1da59bdff245..1064c2342876 100644
--- a/fs/xfs/xfs_export.c
+++ b/fs/xfs/xfs_export.c
@@ -44,6 +44,7 @@ xfs_fs_encode_fh(
 	int		*max_len,
 	struct inode	*parent)
 {
+	struct xfs_mount	*mp = XFS_M(inode->i_sb);
 	struct fid		*fid = (struct fid *)fh;
 	struct xfs_fid64	*fid64 = (struct xfs_fid64 *)fh;
 	int			fileid_type;
@@ -63,8 +64,7 @@ xfs_fs_encode_fh(
 	 * large enough filesystem may contain them, thus the slightly
 	 * confusing looking conditional below.
 	 */
-	if (!(XFS_M(inode->i_sb)->m_flags & XFS_MOUNT_SMALL_INUMS) ||
-	    (XFS_M(inode->i_sb)->m_flags & XFS_MOUNT_32BITINODES))
+	if (!xfs_has_small_inums(mp) || xfs_is_inode32(mp))
 		fileid_type |= XFS_FILEID_TYPE_64FLAG;
 
 	/*
diff --git a/fs/xfs/xfs_extfree_item.c b/fs/xfs/xfs_extfree_item.c
index 2424230ca2c3..3f8a0713573a 100644
--- a/fs/xfs/xfs_extfree_item.c
+++ b/fs/xfs/xfs_extfree_item.c
@@ -629,6 +629,9 @@ xfs_efi_item_recover(
 		error = xfs_trans_free_extent(tp, efdp, extp->ext_start,
 					      extp->ext_len,
 					      &XFS_RMAP_OINFO_ANY_OWNER, false);
+		if (error == -EFSCORRUPTED)
+			XFS_CORRUPTION_ERROR(__func__, XFS_ERRLEVEL_LOW, mp,
+					extp, sizeof(*extp));
 		if (error)
 			goto abort_error;
 
diff --git a/fs/xfs/xfs_file.c b/fs/xfs/xfs_file.c
index 3dfbdcdb0d1c..7aa943edfc02 100644
--- a/fs/xfs/xfs_file.c
+++ b/fs/xfs/xfs_file.c
@@ -185,7 +185,7 @@ xfs_file_fsync(
 	if (error)
 		return error;
 
-	if (XFS_FORCED_SHUTDOWN(mp))
+	if (xfs_is_shutdown(mp))
 		return -EIO;
 
 	xfs_iflags_clear(ip, XFS_ITRUNCATED);
@@ -318,7 +318,7 @@ xfs_file_read_iter(
 
 	XFS_STATS_INC(mp, xs_read_calls);
 
-	if (XFS_FORCED_SHUTDOWN(mp))
+	if (xfs_is_shutdown(mp))
 		return -EIO;
 
 	if (IS_DAX(inode))
@@ -462,7 +462,7 @@ xfs_dio_write_end_io(
 
 	trace_xfs_end_io_direct_write(ip, offset, size);
 
-	if (XFS_FORCED_SHUTDOWN(ip->i_mount))
+	if (xfs_is_shutdown(ip->i_mount))
 		return -EIO;
 
 	if (error)
@@ -814,7 +814,7 @@ xfs_file_write_iter(
 	if (ocount == 0)
 		return 0;
 
-	if (XFS_FORCED_SHUTDOWN(ip->i_mount))
+	if (xfs_is_shutdown(ip->i_mount))
 		return -EIO;
 
 	if (IS_DAX(inode))
@@ -1122,7 +1122,7 @@ static inline bool xfs_file_sync_writes(struct file *filp)
 {
 	struct xfs_inode	*ip = XFS_I(file_inode(filp));
 
-	if (ip->i_mount->m_flags & XFS_MOUNT_WSYNC)
+	if (xfs_has_wsync(ip->i_mount))
 		return true;
 	if (filp->f_flags & (__O_SYNC | O_DSYNC))
 		return true;
@@ -1153,10 +1153,10 @@ xfs_file_remap_range(
 	if (remap_flags & ~(REMAP_FILE_DEDUP | REMAP_FILE_ADVISORY))
 		return -EINVAL;
 
-	if (!xfs_sb_version_hasreflink(&mp->m_sb))
+	if (!xfs_has_reflink(mp))
 		return -EOPNOTSUPP;
 
-	if (XFS_FORCED_SHUTDOWN(mp))
+	if (xfs_is_shutdown(mp))
 		return -EIO;
 
 	/* Prepare and then clone file data. */
@@ -1205,7 +1205,7 @@ xfs_file_open(
 {
 	if (!(file->f_flags & O_LARGEFILE) && i_size_read(inode) > MAX_NON_LFS)
 		return -EFBIG;
-	if (XFS_FORCED_SHUTDOWN(XFS_M(inode->i_sb)))
+	if (xfs_is_shutdown(XFS_M(inode->i_sb)))
 		return -EIO;
 	file->f_mode |= FMODE_NOWAIT | FMODE_BUF_RASYNC;
 	return 0;
@@ -1277,7 +1277,7 @@ xfs_file_llseek(
 {
 	struct inode		*inode = file->f_mapping->host;
 
-	if (XFS_FORCED_SHUTDOWN(XFS_I(inode)->i_mount))
+	if (xfs_is_shutdown(XFS_I(inode)->i_mount))
 		return -EIO;
 
 	switch (whence) {
diff --git a/fs/xfs/xfs_filestream.c b/fs/xfs/xfs_filestream.c
index eed6ca5f8f91..6a3ce0f6dc9e 100644
--- a/fs/xfs/xfs_filestream.c
+++ b/fs/xfs/xfs_filestream.c
@@ -295,7 +295,7 @@ xfs_filestream_lookup_ag(
 	 * Set the starting AG using the rotor for inode32, otherwise
 	 * use the directory inode's AG.
 	 */
-	if (mp->m_flags & XFS_MOUNT_32BITINODES) {
+	if (xfs_is_inode32(mp)) {
 		xfs_agnumber_t	 rotorstep = xfs_rotorstep;
 		startag = (mp->m_agfrotor / rotorstep) % mp->m_sb.sb_agcount;
 		mp->m_agfrotor = (mp->m_agfrotor + 1) %
diff --git a/fs/xfs/xfs_filestream.h b/fs/xfs/xfs_filestream.h
index 3af963743e4d..403226ebb80b 100644
--- a/fs/xfs/xfs_filestream.h
+++ b/fs/xfs/xfs_filestream.h
@@ -21,7 +21,7 @@ static inline int
 xfs_inode_is_filestream(
 	struct xfs_inode	*ip)
 {
-	return (ip->i_mount->m_flags & XFS_MOUNT_FILESTREAMS) ||
+	return xfs_has_filestreams(ip->i_mount) ||
 		(ip->i_diflags & XFS_DIFLAG_FILESTREAM);
 }
 
diff --git a/fs/xfs/xfs_fsmap.c b/fs/xfs/xfs_fsmap.c
index 7d0b09c1366e..48287caad28b 100644
--- a/fs/xfs/xfs_fsmap.c
+++ b/fs/xfs/xfs_fsmap.c
@@ -61,7 +61,7 @@ xfs_fsmap_to_internal(
 static int
 xfs_fsmap_owner_to_rmap(
 	struct xfs_rmap_irec	*dest,
-	struct xfs_fsmap	*src)
+	const struct xfs_fsmap	*src)
 {
 	if (!(src->fmr_flags & FMR_OF_SPECIAL_OWNER)) {
 		dest->rm_owner = src->fmr_owner;
@@ -111,8 +111,8 @@ xfs_fsmap_owner_to_rmap(
 /* Convert an rmapbt owner into an fsmap owner. */
 static int
 xfs_fsmap_owner_from_rmap(
-	struct xfs_fsmap	*dest,
-	struct xfs_rmap_irec	*src)
+	struct xfs_fsmap		*dest,
+	const struct xfs_rmap_irec	*src)
 {
 	dest->fmr_flags = 0;
 	if (!XFS_RMAP_NON_INODE_OWNER(src->rm_owner)) {
@@ -171,7 +171,7 @@ struct xfs_getfsmap_info {
 struct xfs_getfsmap_dev {
 	u32			dev;
 	int			(*fn)(struct xfs_trans *tp,
-				      struct xfs_fsmap *keys,
+				      const struct xfs_fsmap *keys,
 				      struct xfs_getfsmap_info *info);
 };
 
@@ -192,7 +192,7 @@ STATIC int
 xfs_getfsmap_is_shared(
 	struct xfs_trans		*tp,
 	struct xfs_getfsmap_info	*info,
-	struct xfs_rmap_irec		*rec,
+	const struct xfs_rmap_irec	*rec,
 	bool				*stat)
 {
 	struct xfs_mount		*mp = tp->t_mountp;
@@ -202,7 +202,7 @@ xfs_getfsmap_is_shared(
 	int				error;
 
 	*stat = false;
-	if (!xfs_sb_version_hasreflink(&mp->m_sb))
+	if (!xfs_has_reflink(mp))
 		return 0;
 	/* rt files will have no perag structure */
 	if (!info->pag)
@@ -245,7 +245,7 @@ STATIC int
 xfs_getfsmap_helper(
 	struct xfs_trans		*tp,
 	struct xfs_getfsmap_info	*info,
-	struct xfs_rmap_irec		*rec,
+	const struct xfs_rmap_irec	*rec,
 	xfs_daddr_t			rec_daddr)
 {
 	struct xfs_fsmap		fmr;
@@ -347,7 +347,7 @@ out:
 STATIC int
 xfs_getfsmap_datadev_helper(
 	struct xfs_btree_cur		*cur,
-	struct xfs_rmap_irec		*rec,
+	const struct xfs_rmap_irec	*rec,
 	void				*priv)
 {
 	struct xfs_mount		*mp = cur->bc_mp;
@@ -365,7 +365,7 @@ xfs_getfsmap_datadev_helper(
 STATIC int
 xfs_getfsmap_datadev_bnobt_helper(
 	struct xfs_btree_cur		*cur,
-	struct xfs_alloc_rec_incore	*rec,
+	const struct xfs_alloc_rec_incore *rec,
 	void				*priv)
 {
 	struct xfs_mount		*mp = cur->bc_mp;
@@ -389,7 +389,7 @@ xfs_getfsmap_datadev_bnobt_helper(
 static void
 xfs_getfsmap_set_irec_flags(
 	struct xfs_rmap_irec	*irec,
-	struct xfs_fsmap	*fmr)
+	const struct xfs_fsmap	*fmr)
 {
 	irec->rm_flags = 0;
 	if (fmr->fmr_flags & FMR_OF_ATTR_FORK)
@@ -404,7 +404,7 @@ xfs_getfsmap_set_irec_flags(
 STATIC int
 xfs_getfsmap_logdev(
 	struct xfs_trans		*tp,
-	struct xfs_fsmap		*keys,
+	const struct xfs_fsmap		*keys,
 	struct xfs_getfsmap_info	*info)
 {
 	struct xfs_mount		*mp = tp->t_mountp;
@@ -451,7 +451,7 @@ xfs_getfsmap_logdev(
 STATIC int
 xfs_getfsmap_rtdev_rtbitmap_helper(
 	struct xfs_trans		*tp,
-	struct xfs_rtalloc_rec		*rec,
+	const struct xfs_rtalloc_rec	*rec,
 	void				*priv)
 {
 	struct xfs_mount		*mp = tp->t_mountp;
@@ -473,7 +473,7 @@ xfs_getfsmap_rtdev_rtbitmap_helper(
 STATIC int
 __xfs_getfsmap_rtdev(
 	struct xfs_trans		*tp,
-	struct xfs_fsmap		*keys,
+	const struct xfs_fsmap		*keys,
 	int				(*query_fn)(struct xfs_trans *,
 						    struct xfs_getfsmap_info *),
 	struct xfs_getfsmap_info	*info)
@@ -481,16 +481,14 @@ __xfs_getfsmap_rtdev(
 	struct xfs_mount		*mp = tp->t_mountp;
 	xfs_fsblock_t			start_fsb;
 	xfs_fsblock_t			end_fsb;
-	xfs_daddr_t			eofs;
+	uint64_t			eofs;
 	int				error = 0;
 
 	eofs = XFS_FSB_TO_BB(mp, mp->m_sb.sb_rblocks);
 	if (keys[0].fmr_physical >= eofs)
 		return 0;
-	if (keys[1].fmr_physical >= eofs)
-		keys[1].fmr_physical = eofs - 1;
 	start_fsb = XFS_BB_TO_FSBT(mp, keys[0].fmr_physical);
-	end_fsb = XFS_BB_TO_FSB(mp, keys[1].fmr_physical);
+	end_fsb = XFS_BB_TO_FSB(mp, min(eofs - 1, keys[1].fmr_physical));
 
 	/* Set up search keys */
 	info->low.rm_startblock = start_fsb;
@@ -523,27 +521,37 @@ xfs_getfsmap_rtdev_rtbitmap_query(
 {
 	struct xfs_rtalloc_rec		alow = { 0 };
 	struct xfs_rtalloc_rec		ahigh = { 0 };
+	struct xfs_mount		*mp = tp->t_mountp;
 	int				error;
 
-	xfs_ilock(tp->t_mountp->m_rbmip, XFS_ILOCK_SHARED);
+	xfs_ilock(mp->m_rbmip, XFS_ILOCK_SHARED);
 
+	/*
+	 * Set up query parameters to return free rtextents covering the range
+	 * we want.
+	 */
 	alow.ar_startext = info->low.rm_startblock;
 	ahigh.ar_startext = info->high.rm_startblock;
-	do_div(alow.ar_startext, tp->t_mountp->m_sb.sb_rextsize);
-	if (do_div(ahigh.ar_startext, tp->t_mountp->m_sb.sb_rextsize))
+	do_div(alow.ar_startext, mp->m_sb.sb_rextsize);
+	if (do_div(ahigh.ar_startext, mp->m_sb.sb_rextsize))
 		ahigh.ar_startext++;
 	error = xfs_rtalloc_query_range(tp, &alow, &ahigh,
 			xfs_getfsmap_rtdev_rtbitmap_helper, info);
 	if (error)
 		goto err;
 
-	/* Report any gaps at the end of the rtbitmap */
+	/*
+	 * Report any gaps at the end of the rtbitmap by simulating a null
+	 * rmap starting at the block after the end of the query range.
+	 */
 	info->last = true;
+	ahigh.ar_startext = min(mp->m_sb.sb_rextents, ahigh.ar_startext);
+
 	error = xfs_getfsmap_rtdev_rtbitmap_helper(tp, &ahigh, info);
 	if (error)
 		goto err;
 err:
-	xfs_iunlock(tp->t_mountp->m_rbmip, XFS_ILOCK_SHARED);
+	xfs_iunlock(mp->m_rbmip, XFS_ILOCK_SHARED);
 	return error;
 }
 
@@ -551,7 +559,7 @@ err:
 STATIC int
 xfs_getfsmap_rtdev_rtbitmap(
 	struct xfs_trans		*tp,
-	struct xfs_fsmap		*keys,
+	const struct xfs_fsmap		*keys,
 	struct xfs_getfsmap_info	*info)
 {
 	info->missing_owner = XFS_FMR_OWN_UNKNOWN;
@@ -564,7 +572,7 @@ xfs_getfsmap_rtdev_rtbitmap(
 STATIC int
 __xfs_getfsmap_datadev(
 	struct xfs_trans		*tp,
-	struct xfs_fsmap		*keys,
+	const struct xfs_fsmap		*keys,
 	struct xfs_getfsmap_info	*info,
 	int				(*query_fn)(struct xfs_trans *,
 						    struct xfs_getfsmap_info *,
@@ -579,16 +587,14 @@ __xfs_getfsmap_datadev(
 	xfs_fsblock_t			end_fsb;
 	xfs_agnumber_t			start_ag;
 	xfs_agnumber_t			end_ag;
-	xfs_daddr_t			eofs;
+	uint64_t			eofs;
 	int				error = 0;
 
 	eofs = XFS_FSB_TO_BB(mp, mp->m_sb.sb_dblocks);
 	if (keys[0].fmr_physical >= eofs)
 		return 0;
-	if (keys[1].fmr_physical >= eofs)
-		keys[1].fmr_physical = eofs - 1;
 	start_fsb = XFS_DADDR_TO_FSB(mp, keys[0].fmr_physical);
-	end_fsb = XFS_DADDR_TO_FSB(mp, keys[1].fmr_physical);
+	end_fsb = XFS_DADDR_TO_FSB(mp, min(eofs - 1, keys[1].fmr_physical));
 
 	/*
 	 * Convert the fsmap low/high keys to AG based keys.  Initialize
@@ -716,7 +722,7 @@ xfs_getfsmap_datadev_rmapbt_query(
 STATIC int
 xfs_getfsmap_datadev_rmapbt(
 	struct xfs_trans		*tp,
-	struct xfs_fsmap		*keys,
+	const struct xfs_fsmap		*keys,
 	struct xfs_getfsmap_info	*info)
 {
 	info->missing_owner = XFS_FMR_OWN_FREE;
@@ -751,7 +757,7 @@ xfs_getfsmap_datadev_bnobt_query(
 STATIC int
 xfs_getfsmap_datadev_bnobt(
 	struct xfs_trans		*tp,
-	struct xfs_fsmap		*keys,
+	const struct xfs_fsmap		*keys,
 	struct xfs_getfsmap_info	*info)
 {
 	struct xfs_alloc_rec_incore	akeys[2];
@@ -859,7 +865,7 @@ xfs_getfsmap(
 		return -EINVAL;
 
 	use_rmap = capable(CAP_SYS_ADMIN) &&
-		   xfs_sb_version_hasrmapbt(&mp->m_sb);
+		   xfs_has_rmapbt(mp);
 	head->fmh_entries = 0;
 
 	/* Set up our device handlers. */
diff --git a/fs/xfs/xfs_fsops.c b/fs/xfs/xfs_fsops.c
index 6ed29b158312..33e26690a8c4 100644
--- a/fs/xfs/xfs_fsops.c
+++ b/fs/xfs/xfs_fsops.c
@@ -19,6 +19,7 @@
 #include "xfs_log.h"
 #include "xfs_ag.h"
 #include "xfs_ag_resv.h"
+#include "xfs_trace.h"
 
 /*
  * Write new AG headers to disk. Non-transactional, but need to be
@@ -177,7 +178,7 @@ xfs_growfs_data_private(
 	 * particularly important for shrink because the write verifier
 	 * will fail if sb_fdblocks is ever larger than sb_dblocks.
 	 */
-	if (xfs_sb_version_haslazysbcount(&mp->m_sb))
+	if (xfs_has_lazysbcount(mp))
 		xfs_log_sb(tp);
 
 	xfs_trans_set_sync(tp);
@@ -511,6 +512,11 @@ xfs_fs_goingdown(
  * consistent. We don't do an unmount here; just shutdown the shop, make sure
  * that absolutely nothing persistent happens to this filesystem after this
  * point.
+ *
+ * The shutdown state change is atomic, resulting in the first and only the
+ * first shutdown call processing the shutdown. This means we only shutdown the
+ * log once as it requires, and we don't spam the logs when multiple concurrent
+ * shutdowns race to set the shutdown flags.
  */
 void
 xfs_do_force_shutdown(
@@ -519,48 +525,37 @@ xfs_do_force_shutdown(
 	char		*fname,
 	int		lnnum)
 {
-	bool		logerror = flags & SHUTDOWN_LOG_IO_ERROR;
-
-	/*
-	 * No need to duplicate efforts.
-	 */
-	if (XFS_FORCED_SHUTDOWN(mp) && !logerror)
-		return;
-
-	/*
-	 * This flags XFS_MOUNT_FS_SHUTDOWN, makes sure that we don't
-	 * queue up anybody new on the log reservations, and wakes up
-	 * everybody who's sleeping on log reservations to tell them
-	 * the bad news.
-	 */
-	if (xfs_log_force_umount(mp, logerror))
-		return;
+	int		tag;
+	const char	*why;
 
-	if (flags & SHUTDOWN_FORCE_UMOUNT) {
-		xfs_alert(mp,
-"User initiated shutdown (0x%x) received. Shutting down filesystem",
-				flags);
+	if (test_and_set_bit(XFS_OPSTATE_SHUTDOWN, &mp->m_opstate))
 		return;
-	}
-
-	if (flags & SHUTDOWN_CORRUPT_INCORE) {
-		xfs_alert_tag(mp, XFS_PTAG_SHUTDOWN_CORRUPT,
-"Corruption of in-memory data (0x%x) detected at %pS (%s:%d).  Shutting down filesystem",
-				flags, __return_address, fname, lnnum);
-		if (XFS_ERRLEVEL_HIGH <= xfs_error_level)
-			xfs_stack_trace();
-	} else if (logerror) {
-		xfs_alert_tag(mp, XFS_PTAG_SHUTDOWN_LOGERROR,
-"Log I/O error (0x%x) detected at %pS (%s:%d). Shutting down filesystem",
-				flags, __return_address, fname, lnnum);
+	if (mp->m_sb_bp)
+		mp->m_sb_bp->b_flags |= XBF_DONE;
+
+	if (flags & SHUTDOWN_FORCE_UMOUNT)
+		xfs_alert(mp, "User initiated shutdown received.");
+
+	if (xlog_force_shutdown(mp->m_log, flags)) {
+		tag = XFS_PTAG_SHUTDOWN_LOGERROR;
+		why = "Log I/O Error";
+	} else if (flags & SHUTDOWN_CORRUPT_INCORE) {
+		tag = XFS_PTAG_SHUTDOWN_CORRUPT;
+		why = "Corruption of in-memory data";
 	} else {
-		xfs_alert_tag(mp, XFS_PTAG_SHUTDOWN_IOERROR,
-"I/O error (0x%x) detected at %pS (%s:%d). Shutting down filesystem",
-				flags, __return_address, fname, lnnum);
+		tag = XFS_PTAG_SHUTDOWN_IOERROR;
+		why = "Metadata I/O Error";
 	}
 
+	trace_xfs_force_shutdown(mp, tag, flags, fname, lnnum);
+
+	xfs_alert_tag(mp, tag,
+"%s (0x%x) detected at %pS (%s:%d).  Shutting down filesystem.",
+			why, flags, __return_address, fname, lnnum);
 	xfs_alert(mp,
 		"Please unmount the filesystem and rectify the problem(s)");
+	if (xfs_error_level >= XFS_ERRLEVEL_HIGH)
+		xfs_stack_trace();
 }
 
 /*
diff --git a/fs/xfs/xfs_health.c b/fs/xfs/xfs_health.c
index eb10eacabc8f..72a075bb2c10 100644
--- a/fs/xfs/xfs_health.c
+++ b/fs/xfs/xfs_health.c
@@ -30,7 +30,7 @@ xfs_health_unmount(
 	unsigned int		checked = 0;
 	bool			warn = false;
 
-	if (XFS_FORCED_SHUTDOWN(mp))
+	if (xfs_is_shutdown(mp))
 		return;
 
 	/* Measure AG corruption levels. */
diff --git a/fs/xfs/xfs_icache.c b/fs/xfs/xfs_icache.c
index 6007683482c6..f2210d927481 100644
--- a/fs/xfs/xfs_icache.c
+++ b/fs/xfs/xfs_icache.c
@@ -38,23 +38,11 @@
  * radix tree tags when convenient.  Avoid existing XFS_IWALK namespace.
  */
 enum xfs_icwalk_goal {
-	/* Goals that are not related to tags; these must be < 0. */
-	XFS_ICWALK_DQRELE	= -1,
-
 	/* Goals directly associated with tagged inodes. */
 	XFS_ICWALK_BLOCKGC	= XFS_ICI_BLOCKGC_TAG,
 	XFS_ICWALK_RECLAIM	= XFS_ICI_RECLAIM_TAG,
 };
 
-#define XFS_ICWALK_NULL_TAG	(-1U)
-
-/* Compute the inode radix tree tag for this goal. */
-static inline unsigned int
-xfs_icwalk_tag(enum xfs_icwalk_goal goal)
-{
-	return goal < 0 ? XFS_ICWALK_NULL_TAG : goal;
-}
-
 static int xfs_icwalk(struct xfs_mount *mp,
 		enum xfs_icwalk_goal goal, struct xfs_icwalk *icw);
 static int xfs_icwalk_ag(struct xfs_perag *pag,
@@ -64,9 +52,6 @@ static int xfs_icwalk_ag(struct xfs_perag *pag,
  * Private inode cache walk flags for struct xfs_icwalk.  Must not
  * coincide with XFS_ICWALK_FLAGS_VALID.
  */
-#define XFS_ICWALK_FLAG_DROP_UDQUOT	(1U << 31)
-#define XFS_ICWALK_FLAG_DROP_GDQUOT	(1U << 30)
-#define XFS_ICWALK_FLAG_DROP_PDQUOT	(1U << 29)
 
 /* Stop scanning after icw_scan_limit inodes. */
 #define XFS_ICWALK_FLAG_SCAN_LIMIT	(1U << 28)
@@ -74,10 +59,7 @@ static int xfs_icwalk_ag(struct xfs_perag *pag,
 #define XFS_ICWALK_FLAG_RECLAIM_SICK	(1U << 27)
 #define XFS_ICWALK_FLAG_UNION		(1U << 26) /* union filter algorithm */
 
-#define XFS_ICWALK_PRIVATE_FLAGS	(XFS_ICWALK_FLAG_DROP_UDQUOT | \
-					 XFS_ICWALK_FLAG_DROP_GDQUOT | \
-					 XFS_ICWALK_FLAG_DROP_PDQUOT | \
-					 XFS_ICWALK_FLAG_SCAN_LIMIT | \
+#define XFS_ICWALK_PRIVATE_FLAGS	(XFS_ICWALK_FLAG_SCAN_LIMIT | \
 					 XFS_ICWALK_FLAG_RECLAIM_SICK | \
 					 XFS_ICWALK_FLAG_UNION)
 
@@ -102,8 +84,9 @@ xfs_inode_alloc(
 		return NULL;
 	}
 
-	/* VFS doesn't initialise i_mode! */
+	/* VFS doesn't initialise i_mode or i_state! */
 	VFS_I(ip)->i_mode = 0;
+	VFS_I(ip)->i_state = 0;
 
 	XFS_STATS_INC(mp, vn_active);
 	ASSERT(atomic_read(&ip->i_pincount) == 0);
@@ -220,9 +203,14 @@ static inline void
 xfs_blockgc_queue(
 	struct xfs_perag	*pag)
 {
+	struct xfs_mount	*mp = pag->pag_mount;
+
+	if (!xfs_is_blockgc_enabled(mp))
+		return;
+
 	rcu_read_lock();
 	if (radix_tree_tagged(&pag->pag_ici_root, XFS_ICI_BLOCKGC_TAG))
-		queue_delayed_work(pag->pag_mount->m_gc_workqueue,
+		queue_delayed_work(pag->pag_mount->m_blockgc_wq,
 				   &pag->pag_blockgc_work,
 				   msecs_to_jiffies(xfs_blockgc_secs * 1000));
 	rcu_read_unlock();
@@ -301,31 +289,6 @@ xfs_perag_clear_inode_tag(
 	trace_xfs_perag_clear_inode_tag(mp, pag->pag_agno, tag, _RET_IP_);
 }
 
-/*
- * We set the inode flag atomically with the radix tree tag.
- * Once we get tag lookups on the radix tree, this inode flag
- * can go away.
- */
-void
-xfs_inode_mark_reclaimable(
-	struct xfs_inode	*ip)
-{
-	struct xfs_mount	*mp = ip->i_mount;
-	struct xfs_perag	*pag;
-
-	pag = xfs_perag_get(mp, XFS_INO_TO_AGNO(mp, ip->i_ino));
-	spin_lock(&pag->pag_ici_lock);
-	spin_lock(&ip->i_flags_lock);
-
-	xfs_perag_set_inode_tag(pag, XFS_INO_TO_AGINO(mp, ip->i_ino),
-			XFS_ICI_RECLAIM_TAG);
-	__xfs_iflags_set(ip, XFS_IRECLAIMABLE);
-
-	spin_unlock(&ip->i_flags_lock);
-	spin_unlock(&pag->pag_ici_lock);
-	xfs_perag_put(pag);
-}
-
 static inline void
 xfs_inew_wait(
 	struct xfs_inode	*ip)
@@ -484,6 +447,21 @@ xfs_iget_check_free_state(
 	return 0;
 }
 
+/* Make all pending inactivation work start immediately. */
+static void
+xfs_inodegc_queue_all(
+	struct xfs_mount	*mp)
+{
+	struct xfs_inodegc	*gc;
+	int			cpu;
+
+	for_each_online_cpu(cpu) {
+		gc = per_cpu_ptr(mp->m_inodegc, cpu);
+		if (!llist_empty(&gc->list))
+			queue_work_on(cpu, mp->m_inodegc_wq, &gc->work);
+	}
+}
+
 /*
  * Check the validity of the inode we just found it the cache
  */
@@ -516,13 +494,30 @@ xfs_iget_cache_hit(
 	 * reclaimable state, wait for the initialisation to complete
 	 * before continuing.
 	 *
+	 * If we're racing with the inactivation worker we also want to wait.
+	 * If we're creating a new file, it's possible that the worker
+	 * previously marked the inode as free on disk but hasn't finished
+	 * updating the incore state yet.  The AGI buffer will be dirty and
+	 * locked to the icreate transaction, so a synchronous push of the
+	 * inodegc workers would result in deadlock.  For a regular iget, the
+	 * worker is running already, so we might as well wait.
+	 *
 	 * XXX(hch): eventually we should do something equivalent to
 	 *	     wait_on_inode to wait for these flags to be cleared
 	 *	     instead of polling for it.
 	 */
-	if (ip->i_flags & (XFS_INEW | XFS_IRECLAIM))
+	if (ip->i_flags & (XFS_INEW | XFS_IRECLAIM | XFS_INACTIVATING))
 		goto out_skip;
 
+	if (ip->i_flags & XFS_NEED_INACTIVE) {
+		/* Unlinked inodes cannot be re-grabbed. */
+		if (VFS_I(ip)->i_nlink == 0) {
+			error = -ENOENT;
+			goto out_error;
+		}
+		goto out_inodegc_flush;
+	}
+
 	/*
 	 * Check the inode free state is valid. This also detects lookup
 	 * racing with unlinks.
@@ -570,6 +565,17 @@ out_error:
 	spin_unlock(&ip->i_flags_lock);
 	rcu_read_unlock();
 	return error;
+
+out_inodegc_flush:
+	spin_unlock(&ip->i_flags_lock);
+	rcu_read_unlock();
+	/*
+	 * Do not wait for the workers, because the caller could hold an AGI
+	 * buffer lock.  We're just going to sleep in a loop anyway.
+	 */
+	if (xfs_is_inodegc_enabled(mp))
+		xfs_inodegc_queue_all(mp);
+	return -EAGAIN;
 }
 
 static int
@@ -597,7 +603,7 @@ xfs_iget_cache_miss(
 
 	/*
 	 * For version 5 superblocks, if we are initialising a new inode and we
-	 * are not utilising the XFS_MOUNT_IKEEP inode cluster mode, we can
+	 * are not utilising the XFS_FEAT_IKEEP inode cluster mode, we can
 	 * simply build the new inode core with a random generation number.
 	 *
 	 * For version 4 (and older) superblocks, log recovery is dependent on
@@ -605,8 +611,8 @@ xfs_iget_cache_miss(
 	 * value and hence we must also read the inode off disk even when
 	 * initializing new inodes.
 	 */
-	if (xfs_sb_version_has_v3inode(&mp->m_sb) &&
-	    (flags & XFS_IGET_CREATE) && !(mp->m_flags & XFS_MOUNT_IKEEP)) {
+	if (xfs_has_v3inodes(mp) &&
+	    (flags & XFS_IGET_CREATE) && !xfs_has_ikeep(mp)) {
 		VFS_I(ip)->i_generation = prandom_u32();
 	} else {
 		struct xfs_buf		*bp;
@@ -817,97 +823,6 @@ xfs_icache_inode_is_allocated(
 	return 0;
 }
 
-#ifdef CONFIG_XFS_QUOTA
-/* Decide if we want to grab this inode to drop its dquots. */
-static bool
-xfs_dqrele_igrab(
-	struct xfs_inode	*ip)
-{
-	bool			ret = false;
-
-	ASSERT(rcu_read_lock_held());
-
-	/* Check for stale RCU freed inode */
-	spin_lock(&ip->i_flags_lock);
-	if (!ip->i_ino)
-		goto out_unlock;
-
-	/*
-	 * Skip inodes that are anywhere in the reclaim machinery because we
-	 * drop dquots before tagging an inode for reclamation.
-	 */
-	if (ip->i_flags & (XFS_IRECLAIM | XFS_IRECLAIMABLE))
-		goto out_unlock;
-
-	/*
-	 * The inode looks alive; try to grab a VFS reference so that it won't
-	 * get destroyed.  If we got the reference, return true to say that
-	 * we grabbed the inode.
-	 *
-	 * If we can't get the reference, then we know the inode had its VFS
-	 * state torn down and hasn't yet entered the reclaim machinery.  Since
-	 * we also know that dquots are detached from an inode before it enters
-	 * reclaim, we can skip the inode.
-	 */
-	ret = igrab(VFS_I(ip)) != NULL;
-
-out_unlock:
-	spin_unlock(&ip->i_flags_lock);
-	return ret;
-}
-
-/* Drop this inode's dquots. */
-static void
-xfs_dqrele_inode(
-	struct xfs_inode	*ip,
-	struct xfs_icwalk	*icw)
-{
-	if (xfs_iflags_test(ip, XFS_INEW))
-		xfs_inew_wait(ip);
-
-	xfs_ilock(ip, XFS_ILOCK_EXCL);
-	if (icw->icw_flags & XFS_ICWALK_FLAG_DROP_UDQUOT) {
-		xfs_qm_dqrele(ip->i_udquot);
-		ip->i_udquot = NULL;
-	}
-	if (icw->icw_flags & XFS_ICWALK_FLAG_DROP_GDQUOT) {
-		xfs_qm_dqrele(ip->i_gdquot);
-		ip->i_gdquot = NULL;
-	}
-	if (icw->icw_flags & XFS_ICWALK_FLAG_DROP_PDQUOT) {
-		xfs_qm_dqrele(ip->i_pdquot);
-		ip->i_pdquot = NULL;
-	}
-	xfs_iunlock(ip, XFS_ILOCK_EXCL);
-	xfs_irele(ip);
-}
-
-/*
- * Detach all dquots from incore inodes if we can.  The caller must already
- * have dropped the relevant XFS_[UGP]QUOTA_ACTIVE flags so that dquots will
- * not get reattached.
- */
-int
-xfs_dqrele_all_inodes(
-	struct xfs_mount	*mp,
-	unsigned int		qflags)
-{
-	struct xfs_icwalk	icw = { .icw_flags = 0 };
-
-	if (qflags & XFS_UQUOTA_ACCT)
-		icw.icw_flags |= XFS_ICWALK_FLAG_DROP_UDQUOT;
-	if (qflags & XFS_GQUOTA_ACCT)
-		icw.icw_flags |= XFS_ICWALK_FLAG_DROP_GDQUOT;
-	if (qflags & XFS_PQUOTA_ACCT)
-		icw.icw_flags |= XFS_ICWALK_FLAG_DROP_PDQUOT;
-
-	return xfs_icwalk(mp, XFS_ICWALK_DQRELE, &icw);
-}
-#else
-# define xfs_dqrele_igrab(ip)		(false)
-# define xfs_dqrele_inode(ip, priv)	((void)0)
-#endif /* CONFIG_XFS_QUOTA */
-
 /*
  * Grab the inode for reclaim exclusively.
  *
@@ -976,7 +891,7 @@ xfs_reclaim_inode(
 	if (xfs_iflags_test_and_set(ip, XFS_IFLUSHING))
 		goto out_iunlock;
 
-	if (XFS_FORCED_SHUTDOWN(ip->i_mount)) {
+	if (xfs_is_shutdown(ip->i_mount)) {
 		xfs_iunpin_wait(ip);
 		xfs_iflush_abort(ip);
 		goto reclaim;
@@ -988,6 +903,7 @@ xfs_reclaim_inode(
 
 	xfs_iflags_clear(ip, XFS_IFLUSHING);
 reclaim:
+	trace_xfs_inode_reclaiming(ip);
 
 	/*
 	 * Because we use RCU freeing we need to ensure the inode always appears
@@ -1052,9 +968,8 @@ static inline bool
 xfs_want_reclaim_sick(
 	struct xfs_mount	*mp)
 {
-	return (mp->m_flags & XFS_MOUNT_UNMOUNTING) ||
-	       (mp->m_flags & XFS_MOUNT_NORECOVERY) ||
-	       XFS_FORCED_SHUTDOWN(mp);
+	return xfs_is_unmounting(mp) || xfs_has_norecovery(mp) ||
+	       xfs_is_shutdown(mp);
 }
 
 void
@@ -1447,8 +1362,12 @@ xfs_blockgc_stop(
 	struct xfs_perag	*pag;
 	xfs_agnumber_t		agno;
 
-	for_each_perag_tag(mp, agno, pag, XFS_ICI_BLOCKGC_TAG)
+	if (!xfs_clear_blockgc_enabled(mp))
+		return;
+
+	for_each_perag(mp, agno, pag)
 		cancel_delayed_work_sync(&pag->pag_blockgc_work);
+	trace_xfs_blockgc_stop(mp, __return_address);
 }
 
 /* Enable post-EOF and CoW block auto-reclamation. */
@@ -1459,12 +1378,18 @@ xfs_blockgc_start(
 	struct xfs_perag	*pag;
 	xfs_agnumber_t		agno;
 
+	if (xfs_set_blockgc_enabled(mp))
+		return;
+
+	trace_xfs_blockgc_start(mp, __return_address);
 	for_each_perag_tag(mp, agno, pag, XFS_ICI_BLOCKGC_TAG)
 		xfs_blockgc_queue(pag);
 }
 
 /* Don't try to run block gc on an inode that's in any of these states. */
 #define XFS_BLOCKGC_NOGRAB_IFLAGS	(XFS_INEW | \
+					 XFS_NEED_INACTIVE | \
+					 XFS_INACTIVATING | \
 					 XFS_IRECLAIMABLE | \
 					 XFS_IRECLAIM)
 /*
@@ -1490,7 +1415,7 @@ xfs_blockgc_igrab(
 	spin_unlock(&ip->i_flags_lock);
 
 	/* nothing to sync during shutdown */
-	if (XFS_FORCED_SHUTDOWN(ip->i_mount))
+	if (xfs_is_shutdown(ip->i_mount))
 		return false;
 
 	/* If we can't grab the inode, it must on it's way to reclaim. */
@@ -1536,27 +1461,62 @@ xfs_blockgc_worker(
 	struct xfs_mount	*mp = pag->pag_mount;
 	int			error;
 
-	if (!sb_start_write_trylock(mp->m_super))
-		return;
+	trace_xfs_blockgc_worker(mp, __return_address);
+
 	error = xfs_icwalk_ag(pag, XFS_ICWALK_BLOCKGC, NULL);
 	if (error)
 		xfs_info(mp, "AG %u preallocation gc worker failed, err=%d",
 				pag->pag_agno, error);
-	sb_end_write(mp->m_super);
 	xfs_blockgc_queue(pag);
 }
 
 /*
- * Try to free space in the filesystem by purging eofblocks and cowblocks.
+ * Try to free space in the filesystem by purging inactive inodes, eofblocks
+ * and cowblocks.
  */
 int
 xfs_blockgc_free_space(
 	struct xfs_mount	*mp,
 	struct xfs_icwalk	*icw)
 {
+	int			error;
+
 	trace_xfs_blockgc_free_space(mp, icw, _RET_IP_);
 
-	return xfs_icwalk(mp, XFS_ICWALK_BLOCKGC, icw);
+	error = xfs_icwalk(mp, XFS_ICWALK_BLOCKGC, icw);
+	if (error)
+		return error;
+
+	xfs_inodegc_flush(mp);
+	return 0;
+}
+
+/*
+ * Reclaim all the free space that we can by scheduling the background blockgc
+ * and inodegc workers immediately and waiting for them all to clear.
+ */
+void
+xfs_blockgc_flush_all(
+	struct xfs_mount	*mp)
+{
+	struct xfs_perag	*pag;
+	xfs_agnumber_t		agno;
+
+	trace_xfs_blockgc_flush_all(mp, __return_address);
+
+	/*
+	 * For each blockgc worker, move its queue time up to now.  If it
+	 * wasn't queued, it will not be requeued.  Then flush whatever's
+	 * left.
+	 */
+	for_each_perag_tag(mp, agno, pag, XFS_ICI_BLOCKGC_TAG)
+		mod_delayed_work(pag->pag_mount->m_blockgc_wq,
+				&pag->pag_blockgc_work, 0);
+
+	for_each_perag_tag(mp, agno, pag, XFS_ICI_BLOCKGC_TAG)
+		flush_delayed_work(&pag->pag_blockgc_work);
+
+	xfs_inodegc_flush(mp);
 }
 
 /*
@@ -1647,8 +1607,6 @@ xfs_icwalk_igrab(
 	struct xfs_icwalk	*icw)
 {
 	switch (goal) {
-	case XFS_ICWALK_DQRELE:
-		return xfs_dqrele_igrab(ip);
 	case XFS_ICWALK_BLOCKGC:
 		return xfs_blockgc_igrab(ip);
 	case XFS_ICWALK_RECLAIM:
@@ -1672,9 +1630,6 @@ xfs_icwalk_process_inode(
 	int			error = 0;
 
 	switch (goal) {
-	case XFS_ICWALK_DQRELE:
-		xfs_dqrele_inode(ip, icw);
-		break;
 	case XFS_ICWALK_BLOCKGC:
 		error = xfs_blockgc_scan_inode(ip, icw);
 		break;
@@ -1712,22 +1667,14 @@ restart:
 	nr_found = 0;
 	do {
 		struct xfs_inode *batch[XFS_LOOKUP_BATCH];
-		unsigned int	tag = xfs_icwalk_tag(goal);
 		int		error = 0;
 		int		i;
 
 		rcu_read_lock();
 
-		if (tag == XFS_ICWALK_NULL_TAG)
-			nr_found = radix_tree_gang_lookup(&pag->pag_ici_root,
-					(void **)batch, first_index,
-					XFS_LOOKUP_BATCH);
-		else
-			nr_found = radix_tree_gang_lookup_tag(
-					&pag->pag_ici_root,
-					(void **) batch, first_index,
-					XFS_LOOKUP_BATCH, tag);
-
+		nr_found = radix_tree_gang_lookup_tag(&pag->pag_ici_root,
+				(void **) batch, first_index,
+				XFS_LOOKUP_BATCH, goal);
 		if (!nr_found) {
 			done = true;
 			rcu_read_unlock();
@@ -1805,20 +1752,6 @@ restart:
 	return last_error;
 }
 
-/* Fetch the next (possibly tagged) per-AG structure. */
-static inline struct xfs_perag *
-xfs_icwalk_get_perag(
-	struct xfs_mount	*mp,
-	xfs_agnumber_t		agno,
-	enum xfs_icwalk_goal	goal)
-{
-	unsigned int		tag = xfs_icwalk_tag(goal);
-
-	if (tag == XFS_ICWALK_NULL_TAG)
-		return xfs_perag_get(mp, agno);
-	return xfs_perag_get_tag(mp, agno, tag);
-}
-
 /* Walk all incore inodes to achieve a given goal. */
 static int
 xfs_icwalk(
@@ -1829,18 +1762,465 @@ xfs_icwalk(
 	struct xfs_perag	*pag;
 	int			error = 0;
 	int			last_error = 0;
-	xfs_agnumber_t		agno = 0;
+	xfs_agnumber_t		agno;
 
-	while ((pag = xfs_icwalk_get_perag(mp, agno, goal))) {
-		agno = pag->pag_agno + 1;
+	for_each_perag_tag(mp, agno, pag, goal) {
 		error = xfs_icwalk_ag(pag, goal, icw);
-		xfs_perag_put(pag);
 		if (error) {
 			last_error = error;
-			if (error == -EFSCORRUPTED)
+			if (error == -EFSCORRUPTED) {
+				xfs_perag_put(pag);
 				break;
+			}
 		}
 	}
 	return last_error;
 	BUILD_BUG_ON(XFS_ICWALK_PRIVATE_FLAGS & XFS_ICWALK_FLAGS_VALID);
 }
+
+#ifdef DEBUG
+static void
+xfs_check_delalloc(
+	struct xfs_inode	*ip,
+	int			whichfork)
+{
+	struct xfs_ifork	*ifp = XFS_IFORK_PTR(ip, whichfork);
+	struct xfs_bmbt_irec	got;
+	struct xfs_iext_cursor	icur;
+
+	if (!ifp || !xfs_iext_lookup_extent(ip, ifp, 0, &icur, &got))
+		return;
+	do {
+		if (isnullstartblock(got.br_startblock)) {
+			xfs_warn(ip->i_mount,
+	"ino %llx %s fork has delalloc extent at [0x%llx:0x%llx]",
+				ip->i_ino,
+				whichfork == XFS_DATA_FORK ? "data" : "cow",
+				got.br_startoff, got.br_blockcount);
+		}
+	} while (xfs_iext_next_extent(ifp, &icur, &got));
+}
+#else
+#define xfs_check_delalloc(ip, whichfork)	do { } while (0)
+#endif
+
+/* Schedule the inode for reclaim. */
+static void
+xfs_inodegc_set_reclaimable(
+	struct xfs_inode	*ip)
+{
+	struct xfs_mount	*mp = ip->i_mount;
+	struct xfs_perag	*pag;
+
+	if (!xfs_is_shutdown(mp) && ip->i_delayed_blks) {
+		xfs_check_delalloc(ip, XFS_DATA_FORK);
+		xfs_check_delalloc(ip, XFS_COW_FORK);
+		ASSERT(0);
+	}
+
+	pag = xfs_perag_get(mp, XFS_INO_TO_AGNO(mp, ip->i_ino));
+	spin_lock(&pag->pag_ici_lock);
+	spin_lock(&ip->i_flags_lock);
+
+	trace_xfs_inode_set_reclaimable(ip);
+	ip->i_flags &= ~(XFS_NEED_INACTIVE | XFS_INACTIVATING);
+	ip->i_flags |= XFS_IRECLAIMABLE;
+	xfs_perag_set_inode_tag(pag, XFS_INO_TO_AGINO(mp, ip->i_ino),
+			XFS_ICI_RECLAIM_TAG);
+
+	spin_unlock(&ip->i_flags_lock);
+	spin_unlock(&pag->pag_ici_lock);
+	xfs_perag_put(pag);
+}
+
+/*
+ * Free all speculative preallocations and possibly even the inode itself.
+ * This is the last chance to make changes to an otherwise unreferenced file
+ * before incore reclamation happens.
+ */
+static void
+xfs_inodegc_inactivate(
+	struct xfs_inode	*ip)
+{
+	trace_xfs_inode_inactivating(ip);
+	xfs_inactive(ip);
+	xfs_inodegc_set_reclaimable(ip);
+}
+
+void
+xfs_inodegc_worker(
+	struct work_struct	*work)
+{
+	struct xfs_inodegc	*gc = container_of(work, struct xfs_inodegc,
+							work);
+	struct llist_node	*node = llist_del_all(&gc->list);
+	struct xfs_inode	*ip, *n;
+
+	WRITE_ONCE(gc->items, 0);
+
+	if (!node)
+		return;
+
+	ip = llist_entry(node, struct xfs_inode, i_gclist);
+	trace_xfs_inodegc_worker(ip->i_mount, READ_ONCE(gc->shrinker_hits));
+
+	WRITE_ONCE(gc->shrinker_hits, 0);
+	llist_for_each_entry_safe(ip, n, node, i_gclist) {
+		xfs_iflags_set(ip, XFS_INACTIVATING);
+		xfs_inodegc_inactivate(ip);
+	}
+}
+
+/*
+ * Force all currently queued inode inactivation work to run immediately, and
+ * wait for the work to finish. Two pass - queue all the work first pass, wait
+ * for it in a second pass.
+ */
+void
+xfs_inodegc_flush(
+	struct xfs_mount	*mp)
+{
+	struct xfs_inodegc	*gc;
+	int			cpu;
+
+	if (!xfs_is_inodegc_enabled(mp))
+		return;
+
+	trace_xfs_inodegc_flush(mp, __return_address);
+
+	xfs_inodegc_queue_all(mp);
+
+	for_each_online_cpu(cpu) {
+		gc = per_cpu_ptr(mp->m_inodegc, cpu);
+		flush_work(&gc->work);
+	}
+}
+
+/*
+ * Flush all the pending work and then disable the inode inactivation background
+ * workers and wait for them to stop.
+ */
+void
+xfs_inodegc_stop(
+	struct xfs_mount	*mp)
+{
+	struct xfs_inodegc	*gc;
+	int			cpu;
+
+	if (!xfs_clear_inodegc_enabled(mp))
+		return;
+
+	xfs_inodegc_queue_all(mp);
+
+	for_each_online_cpu(cpu) {
+		gc = per_cpu_ptr(mp->m_inodegc, cpu);
+		cancel_work_sync(&gc->work);
+	}
+	trace_xfs_inodegc_stop(mp, __return_address);
+}
+
+/*
+ * Enable the inode inactivation background workers and schedule deferred inode
+ * inactivation work if there is any.
+ */
+void
+xfs_inodegc_start(
+	struct xfs_mount	*mp)
+{
+	if (xfs_set_inodegc_enabled(mp))
+		return;
+
+	trace_xfs_inodegc_start(mp, __return_address);
+	xfs_inodegc_queue_all(mp);
+}
+
+#ifdef CONFIG_XFS_RT
+static inline bool
+xfs_inodegc_want_queue_rt_file(
+	struct xfs_inode	*ip)
+{
+	struct xfs_mount	*mp = ip->i_mount;
+	uint64_t		freertx;
+
+	if (!XFS_IS_REALTIME_INODE(ip))
+		return false;
+
+	freertx = READ_ONCE(mp->m_sb.sb_frextents);
+	return freertx < mp->m_low_rtexts[XFS_LOWSP_5_PCNT];
+}
+#else
+# define xfs_inodegc_want_queue_rt_file(ip)	(false)
+#endif /* CONFIG_XFS_RT */
+
+/*
+ * Schedule the inactivation worker when:
+ *
+ *  - We've accumulated more than one inode cluster buffer's worth of inodes.
+ *  - There is less than 5% free space left.
+ *  - Any of the quotas for this inode are near an enforcement limit.
+ */
+static inline bool
+xfs_inodegc_want_queue_work(
+	struct xfs_inode	*ip,
+	unsigned int		items)
+{
+	struct xfs_mount	*mp = ip->i_mount;
+
+	if (items > mp->m_ino_geo.inodes_per_cluster)
+		return true;
+
+	if (__percpu_counter_compare(&mp->m_fdblocks,
+				mp->m_low_space[XFS_LOWSP_5_PCNT],
+				XFS_FDBLOCKS_BATCH) < 0)
+		return true;
+
+	if (xfs_inodegc_want_queue_rt_file(ip))
+		return true;
+
+	if (xfs_inode_near_dquot_enforcement(ip, XFS_DQTYPE_USER))
+		return true;
+
+	if (xfs_inode_near_dquot_enforcement(ip, XFS_DQTYPE_GROUP))
+		return true;
+
+	if (xfs_inode_near_dquot_enforcement(ip, XFS_DQTYPE_PROJ))
+		return true;
+
+	return false;
+}
+
+/*
+ * Upper bound on the number of inodes in each AG that can be queued for
+ * inactivation at any given time, to avoid monopolizing the workqueue.
+ */
+#define XFS_INODEGC_MAX_BACKLOG		(4 * XFS_INODES_PER_CHUNK)
+
+/*
+ * Make the frontend wait for inactivations when:
+ *
+ *  - Memory shrinkers queued the inactivation worker and it hasn't finished.
+ *  - The queue depth exceeds the maximum allowable percpu backlog.
+ *
+ * Note: If the current thread is running a transaction, we don't ever want to
+ * wait for other transactions because that could introduce a deadlock.
+ */
+static inline bool
+xfs_inodegc_want_flush_work(
+	struct xfs_inode	*ip,
+	unsigned int		items,
+	unsigned int		shrinker_hits)
+{
+	if (current->journal_info)
+		return false;
+
+	if (shrinker_hits > 0)
+		return true;
+
+	if (items > XFS_INODEGC_MAX_BACKLOG)
+		return true;
+
+	return false;
+}
+
+/*
+ * Queue a background inactivation worker if there are inodes that need to be
+ * inactivated and higher level xfs code hasn't disabled the background
+ * workers.
+ */
+static void
+xfs_inodegc_queue(
+	struct xfs_inode	*ip)
+{
+	struct xfs_mount	*mp = ip->i_mount;
+	struct xfs_inodegc	*gc;
+	int			items;
+	unsigned int		shrinker_hits;
+
+	trace_xfs_inode_set_need_inactive(ip);
+	spin_lock(&ip->i_flags_lock);
+	ip->i_flags |= XFS_NEED_INACTIVE;
+	spin_unlock(&ip->i_flags_lock);
+
+	gc = get_cpu_ptr(mp->m_inodegc);
+	llist_add(&ip->i_gclist, &gc->list);
+	items = READ_ONCE(gc->items);
+	WRITE_ONCE(gc->items, items + 1);
+	shrinker_hits = READ_ONCE(gc->shrinker_hits);
+	put_cpu_ptr(gc);
+
+	if (!xfs_is_inodegc_enabled(mp))
+		return;
+
+	if (xfs_inodegc_want_queue_work(ip, items)) {
+		trace_xfs_inodegc_queue(mp, __return_address);
+		queue_work(mp->m_inodegc_wq, &gc->work);
+	}
+
+	if (xfs_inodegc_want_flush_work(ip, items, shrinker_hits)) {
+		trace_xfs_inodegc_throttle(mp, __return_address);
+		flush_work(&gc->work);
+	}
+}
+
+/*
+ * Fold the dead CPU inodegc queue into the current CPUs queue.
+ */
+void
+xfs_inodegc_cpu_dead(
+	struct xfs_mount	*mp,
+	unsigned int		dead_cpu)
+{
+	struct xfs_inodegc	*dead_gc, *gc;
+	struct llist_node	*first, *last;
+	unsigned int		count = 0;
+
+	dead_gc = per_cpu_ptr(mp->m_inodegc, dead_cpu);
+	cancel_work_sync(&dead_gc->work);
+
+	if (llist_empty(&dead_gc->list))
+		return;
+
+	first = dead_gc->list.first;
+	last = first;
+	while (last->next) {
+		last = last->next;
+		count++;
+	}
+	dead_gc->list.first = NULL;
+	dead_gc->items = 0;
+
+	/* Add pending work to current CPU */
+	gc = get_cpu_ptr(mp->m_inodegc);
+	llist_add_batch(first, last, &gc->list);
+	count += READ_ONCE(gc->items);
+	WRITE_ONCE(gc->items, count);
+	put_cpu_ptr(gc);
+
+	if (xfs_is_inodegc_enabled(mp)) {
+		trace_xfs_inodegc_queue(mp, __return_address);
+		queue_work(mp->m_inodegc_wq, &gc->work);
+	}
+}
+
+/*
+ * We set the inode flag atomically with the radix tree tag.  Once we get tag
+ * lookups on the radix tree, this inode flag can go away.
+ *
+ * We always use background reclaim here because even if the inode is clean, it
+ * still may be under IO and hence we have wait for IO completion to occur
+ * before we can reclaim the inode. The background reclaim path handles this
+ * more efficiently than we can here, so simply let background reclaim tear down
+ * all inodes.
+ */
+void
+xfs_inode_mark_reclaimable(
+	struct xfs_inode	*ip)
+{
+	struct xfs_mount	*mp = ip->i_mount;
+	bool			need_inactive;
+
+	XFS_STATS_INC(mp, vn_reclaim);
+
+	/*
+	 * We should never get here with any of the reclaim flags already set.
+	 */
+	ASSERT_ALWAYS(!xfs_iflags_test(ip, XFS_ALL_IRECLAIM_FLAGS));
+
+	need_inactive = xfs_inode_needs_inactive(ip);
+	if (need_inactive) {
+		xfs_inodegc_queue(ip);
+		return;
+	}
+
+	/* Going straight to reclaim, so drop the dquots. */
+	xfs_qm_dqdetach(ip);
+	xfs_inodegc_set_reclaimable(ip);
+}
+
+/*
+ * Register a phony shrinker so that we can run background inodegc sooner when
+ * there's memory pressure.  Inactivation does not itself free any memory but
+ * it does make inodes reclaimable, which eventually frees memory.
+ *
+ * The count function, seek value, and batch value are crafted to trigger the
+ * scan function during the second round of scanning.  Hopefully this means
+ * that we reclaimed enough memory that initiating metadata transactions won't
+ * make things worse.
+ */
+#define XFS_INODEGC_SHRINKER_COUNT	(1UL << DEF_PRIORITY)
+#define XFS_INODEGC_SHRINKER_BATCH	((XFS_INODEGC_SHRINKER_COUNT / 2) + 1)
+
+static unsigned long
+xfs_inodegc_shrinker_count(
+	struct shrinker		*shrink,
+	struct shrink_control	*sc)
+{
+	struct xfs_mount	*mp = container_of(shrink, struct xfs_mount,
+						   m_inodegc_shrinker);
+	struct xfs_inodegc	*gc;
+	int			cpu;
+
+	if (!xfs_is_inodegc_enabled(mp))
+		return 0;
+
+	for_each_online_cpu(cpu) {
+		gc = per_cpu_ptr(mp->m_inodegc, cpu);
+		if (!llist_empty(&gc->list))
+			return XFS_INODEGC_SHRINKER_COUNT;
+	}
+
+	return 0;
+}
+
+static unsigned long
+xfs_inodegc_shrinker_scan(
+	struct shrinker		*shrink,
+	struct shrink_control	*sc)
+{
+	struct xfs_mount	*mp = container_of(shrink, struct xfs_mount,
+						   m_inodegc_shrinker);
+	struct xfs_inodegc	*gc;
+	int			cpu;
+	bool			no_items = true;
+
+	if (!xfs_is_inodegc_enabled(mp))
+		return SHRINK_STOP;
+
+	trace_xfs_inodegc_shrinker_scan(mp, sc, __return_address);
+
+	for_each_online_cpu(cpu) {
+		gc = per_cpu_ptr(mp->m_inodegc, cpu);
+		if (!llist_empty(&gc->list)) {
+			unsigned int	h = READ_ONCE(gc->shrinker_hits);
+
+			WRITE_ONCE(gc->shrinker_hits, h + 1);
+			queue_work_on(cpu, mp->m_inodegc_wq, &gc->work);
+			no_items = false;
+		}
+	}
+
+	/*
+	 * If there are no inodes to inactivate, we don't want the shrinker
+	 * to think there's deferred work to call us back about.
+	 */
+	if (no_items)
+		return LONG_MAX;
+
+	return SHRINK_STOP;
+}
+
+/* Register a shrinker so we can accelerate inodegc and throttle queuing. */
+int
+xfs_inodegc_register_shrinker(
+	struct xfs_mount	*mp)
+{
+	struct shrinker		*shrink = &mp->m_inodegc_shrinker;
+
+	shrink->count_objects = xfs_inodegc_shrinker_count;
+	shrink->scan_objects = xfs_inodegc_shrinker_scan;
+	shrink->seeks = 0;
+	shrink->flags = SHRINKER_NONSLAB;
+	shrink->batch = XFS_INODEGC_SHRINKER_BATCH;
+
+	return register_shrinker(shrink);
+}
diff --git a/fs/xfs/xfs_icache.h b/fs/xfs/xfs_icache.h
index c751cc32dc46..2e4cfddf8b8e 100644
--- a/fs/xfs/xfs_icache.h
+++ b/fs/xfs/xfs_icache.h
@@ -59,6 +59,7 @@ int xfs_blockgc_free_dquots(struct xfs_mount *mp, struct xfs_dquot *udqp,
 		unsigned int iwalk_flags);
 int xfs_blockgc_free_quota(struct xfs_inode *ip, unsigned int iwalk_flags);
 int xfs_blockgc_free_space(struct xfs_mount *mp, struct xfs_icwalk *icm);
+void xfs_blockgc_flush_all(struct xfs_mount *mp);
 
 void xfs_inode_set_eofblocks_tag(struct xfs_inode *ip);
 void xfs_inode_clear_eofblocks_tag(struct xfs_inode *ip);
@@ -68,16 +69,17 @@ void xfs_inode_clear_cowblocks_tag(struct xfs_inode *ip);
 
 void xfs_blockgc_worker(struct work_struct *work);
 
-#ifdef CONFIG_XFS_QUOTA
-int xfs_dqrele_all_inodes(struct xfs_mount *mp, unsigned int qflags);
-#else
-# define xfs_dqrele_all_inodes(mp, qflags)	(0)
-#endif
-
 int xfs_icache_inode_is_allocated(struct xfs_mount *mp, struct xfs_trans *tp,
 				  xfs_ino_t ino, bool *inuse);
 
 void xfs_blockgc_stop(struct xfs_mount *mp);
 void xfs_blockgc_start(struct xfs_mount *mp);
 
+void xfs_inodegc_worker(struct work_struct *work);
+void xfs_inodegc_flush(struct xfs_mount *mp);
+void xfs_inodegc_stop(struct xfs_mount *mp);
+void xfs_inodegc_start(struct xfs_mount *mp);
+void xfs_inodegc_cpu_dead(struct xfs_mount *mp, unsigned int cpu);
+int xfs_inodegc_register_shrinker(struct xfs_mount *mp);
+
 #endif
diff --git a/fs/xfs/xfs_icreate_item.c b/fs/xfs/xfs_icreate_item.c
index 9b3994b9c716..017904a34c02 100644
--- a/fs/xfs/xfs_icreate_item.c
+++ b/fs/xfs/xfs_icreate_item.c
@@ -201,7 +201,7 @@ xlog_recover_icreate_commit_pass2(
 	if (length != igeo->ialloc_blks &&
 	    length != igeo->ialloc_min_blks) {
 		xfs_warn(log->l_mp,
-			 "%s: unsupported chunk length", __FUNCTION__);
+			 "%s: unsupported chunk length", __func__);
 		return -EINVAL;
 	}
 
@@ -209,7 +209,7 @@ xlog_recover_icreate_commit_pass2(
 	if ((count >> mp->m_sb.sb_inopblog) != length) {
 		xfs_warn(log->l_mp,
 			 "%s: inconsistent inode count and chunk length",
-			 __FUNCTION__);
+			 __func__);
 		return -EINVAL;
 	}
 
diff --git a/fs/xfs/xfs_inode.c b/fs/xfs/xfs_inode.c
index f00145e1a976..a4f6f034fb81 100644
--- a/fs/xfs/xfs_inode.c
+++ b/fs/xfs/xfs_inode.c
@@ -674,7 +674,7 @@ xfs_lookup(
 
 	trace_xfs_lookup(dp, name);
 
-	if (XFS_FORCED_SHUTDOWN(dp->i_mount))
+	if (xfs_is_shutdown(dp->i_mount))
 		return -EIO;
 
 	error = xfs_dir_lookup(NULL, dp, name, &inum, ci_name);
@@ -716,7 +716,7 @@ xfs_inode_inherit_flags(
 			di_flags |= XFS_DIFLAG_PROJINHERIT;
 	} else if (S_ISREG(mode)) {
 		if ((pip->i_diflags & XFS_DIFLAG_RTINHERIT) &&
-		    xfs_sb_version_hasrealtime(&ip->i_mount->m_sb))
+		    xfs_has_realtime(ip->i_mount))
 			di_flags |= XFS_DIFLAG_REALTIME;
 		if (pip->i_diflags & XFS_DIFLAG_EXTSZINHERIT) {
 			di_flags |= XFS_DIFLAG_EXTSIZE;
@@ -837,8 +837,7 @@ xfs_init_new_inode(
 	inode->i_rdev = rdev;
 	ip->i_projid = prid;
 
-	if (dir && !(dir->i_mode & S_ISGID) &&
-	    (mp->m_flags & XFS_MOUNT_GRPID)) {
+	if (dir && !(dir->i_mode & S_ISGID) && xfs_has_grpid(mp)) {
 		inode_fsuid_set(inode, mnt_userns);
 		inode->i_gid = dir->i_gid;
 		inode->i_mode = mode;
@@ -868,7 +867,7 @@ xfs_init_new_inode(
 	ip->i_extsize = 0;
 	ip->i_diflags = 0;
 
-	if (xfs_sb_version_has_v3inode(&mp->m_sb)) {
+	if (xfs_has_v3inodes(mp)) {
 		inode_set_iversion(inode, 1);
 		ip->i_cowextsize = 0;
 		ip->i_crtime = tv;
@@ -908,7 +907,7 @@ xfs_init_new_inode(
 	 * this saves us from needing to run a separate transaction to set the
 	 * fork offset in the immediate future.
 	 */
-	if (init_xattrs && xfs_sb_version_hasattr(&mp->m_sb)) {
+	if (init_xattrs && xfs_has_attr(mp)) {
 		ip->i_forkoff = xfs_default_attroffset(ip) >> 3;
 		ip->i_afp = xfs_ifork_alloc(XFS_DINODE_FMT_EXTENTS, 0);
 	}
@@ -987,7 +986,7 @@ xfs_create(
 
 	trace_xfs_create(dp, name);
 
-	if (XFS_FORCED_SHUTDOWN(mp))
+	if (xfs_is_shutdown(mp))
 		return -EIO;
 
 	prid = xfs_get_initial_prid(dp);
@@ -1079,7 +1078,7 @@ xfs_create(
 	 * create transaction goes to disk before returning to
 	 * the user.
 	 */
-	if (mp->m_flags & (XFS_MOUNT_WSYNC|XFS_MOUNT_DIRSYNC))
+	if (xfs_has_wsync(mp) || xfs_has_dirsync(mp))
 		xfs_trans_set_sync(tp);
 
 	/*
@@ -1141,7 +1140,7 @@ xfs_create_tmpfile(
 	uint			resblks;
 	xfs_ino_t		ino;
 
-	if (XFS_FORCED_SHUTDOWN(mp))
+	if (xfs_is_shutdown(mp))
 		return -EIO;
 
 	prid = xfs_get_initial_prid(dp);
@@ -1171,7 +1170,7 @@ xfs_create_tmpfile(
 	if (error)
 		goto out_trans_cancel;
 
-	if (mp->m_flags & XFS_MOUNT_WSYNC)
+	if (xfs_has_wsync(mp))
 		xfs_trans_set_sync(tp);
 
 	/*
@@ -1231,7 +1230,7 @@ xfs_link(
 
 	ASSERT(!S_ISDIR(VFS_I(sip)->i_mode));
 
-	if (XFS_FORCED_SHUTDOWN(mp))
+	if (xfs_is_shutdown(mp))
 		return -EIO;
 
 	error = xfs_qm_dqattach(sip);
@@ -1305,7 +1304,7 @@ xfs_link(
 	 * link transaction goes to disk before returning to
 	 * the user.
 	 */
-	if (mp->m_flags & (XFS_MOUNT_WSYNC|XFS_MOUNT_DIRSYNC))
+	if (xfs_has_wsync(mp) || xfs_has_dirsync(mp))
 		xfs_trans_set_sync(tp);
 
 	return xfs_trans_commit(tp);
@@ -1446,10 +1445,10 @@ xfs_release(
 		return 0;
 
 	/* If this is a read-only mount, don't do this (would generate I/O) */
-	if (mp->m_flags & XFS_MOUNT_RDONLY)
+	if (xfs_is_readonly(mp))
 		return 0;
 
-	if (!XFS_FORCED_SHUTDOWN(mp)) {
+	if (!xfs_is_shutdown(mp)) {
 		int truncated;
 
 		/*
@@ -1532,7 +1531,7 @@ xfs_inactive_truncate(
 
 	error = xfs_trans_alloc(mp, &M_RES(mp)->tr_itruncate, 0, 0, 0, &tp);
 	if (error) {
-		ASSERT(XFS_FORCED_SHUTDOWN(mp));
+		ASSERT(xfs_is_shutdown(mp));
 		return error;
 	}
 	xfs_ilock(ip, XFS_ILOCK_EXCL);
@@ -1603,7 +1602,7 @@ xfs_inactive_ifree(
 			"Failed to remove inode(s) from unlinked list. "
 			"Please free space, unmount and run xfs_repair.");
 		} else {
-			ASSERT(XFS_FORCED_SHUTDOWN(mp));
+			ASSERT(xfs_is_shutdown(mp));
 		}
 		return error;
 	}
@@ -1639,7 +1638,7 @@ xfs_inactive_ifree(
 		 * might do that, we need to make sure.  Otherwise the
 		 * inode might be lost for a long time or forever.
 		 */
-		if (!XFS_FORCED_SHUTDOWN(mp)) {
+		if (!xfs_is_shutdown(mp)) {
 			xfs_notice(mp, "%s: xfs_ifree returned error %d",
 				__func__, error);
 			xfs_force_shutdown(mp, SHUTDOWN_META_IO_ERROR);
@@ -1666,6 +1665,59 @@ xfs_inactive_ifree(
 }
 
 /*
+ * Returns true if we need to update the on-disk metadata before we can free
+ * the memory used by this inode.  Updates include freeing post-eof
+ * preallocations; freeing COW staging extents; and marking the inode free in
+ * the inobt if it is on the unlinked list.
+ */
+bool
+xfs_inode_needs_inactive(
+	struct xfs_inode	*ip)
+{
+	struct xfs_mount	*mp = ip->i_mount;
+	struct xfs_ifork	*cow_ifp = XFS_IFORK_PTR(ip, XFS_COW_FORK);
+
+	/*
+	 * If the inode is already free, then there can be nothing
+	 * to clean up here.
+	 */
+	if (VFS_I(ip)->i_mode == 0)
+		return false;
+
+	/* If this is a read-only mount, don't do this (would generate I/O) */
+	if (xfs_is_readonly(mp))
+		return false;
+
+	/* If the log isn't running, push inodes straight to reclaim. */
+	if (xfs_is_shutdown(mp) || xfs_has_norecovery(mp))
+		return false;
+
+	/* Metadata inodes require explicit resource cleanup. */
+	if (xfs_is_metadata_inode(ip))
+		return false;
+
+	/* Want to clean out the cow blocks if there are any. */
+	if (cow_ifp && cow_ifp->if_bytes > 0)
+		return true;
+
+	/* Unlinked files must be freed. */
+	if (VFS_I(ip)->i_nlink == 0)
+		return true;
+
+	/*
+	 * This file isn't being freed, so check if there are post-eof blocks
+	 * to free.  @force is true because we are evicting an inode from the
+	 * cache.  Post-eof blocks must be freed, lest we end up with broken
+	 * free space accounting.
+	 *
+	 * Note: don't bother with iolock here since lockdep complains about
+	 * acquiring it in reclaim context. We have the only reference to the
+	 * inode at this point anyways.
+	 */
+	return xfs_can_free_eofblocks(ip, true);
+}
+
+/*
  * xfs_inactive
  *
  * This is called when the vnode reference count for the vnode
@@ -1694,7 +1746,7 @@ xfs_inactive(
 	ASSERT(!xfs_iflags_test(ip, XFS_IRECOVERY));
 
 	/* If this is a read-only mount, don't do this (would generate I/O) */
-	if (mp->m_flags & XFS_MOUNT_RDONLY)
+	if (xfs_is_readonly(mp))
 		goto out;
 
 	/* Metadata inodes require explicit resource cleanup. */
@@ -1969,7 +2021,7 @@ xfs_iunlink_destroy(
 	rhashtable_free_and_destroy(&pag->pagi_unlinked_hash,
 			xfs_iunlink_free_item, &freed_anything);
 
-	ASSERT(freed_anything == false || XFS_FORCED_SHUTDOWN(pag->pag_mount));
+	ASSERT(freed_anything == false || xfs_is_shutdown(pag->pag_mount));
 }
 
 /*
@@ -2714,7 +2766,7 @@ xfs_remove(
 
 	trace_xfs_remove(dp, name);
 
-	if (XFS_FORCED_SHUTDOWN(mp))
+	if (xfs_is_shutdown(mp))
 		return -EIO;
 
 	error = xfs_qm_dqattach(dp);
@@ -2813,7 +2865,7 @@ xfs_remove(
 	 * remove transaction goes to disk before returning to
 	 * the user.
 	 */
-	if (mp->m_flags & (XFS_MOUNT_WSYNC|XFS_MOUNT_DIRSYNC))
+	if (xfs_has_wsync(mp) || xfs_has_dirsync(mp))
 		xfs_trans_set_sync(tp);
 
 	error = xfs_trans_commit(tp);
@@ -2890,7 +2942,7 @@ xfs_finish_rename(
 	 * If this is a synchronous mount, make sure that the rename transaction
 	 * goes to disk before returning to the user.
 	 */
-	if (tp->t_mountp->m_flags & (XFS_MOUNT_WSYNC|XFS_MOUNT_DIRSYNC))
+	if (xfs_has_wsync(tp->t_mountp) || xfs_has_dirsync(tp->t_mountp))
 		xfs_trans_set_sync(tp);
 
 	return xfs_trans_commit(tp);
@@ -3473,7 +3525,7 @@ xfs_iflush(
 	 * happen but we need to still do it to ensure backwards compatibility
 	 * with old kernels that predate logging all inode changes.
 	 */
-	if (!xfs_sb_version_has_v3inode(&mp->m_sb))
+	if (!xfs_has_v3inodes(mp))
 		ip->i_flushiter++;
 
 	/*
@@ -3495,7 +3547,7 @@ xfs_iflush(
 	xfs_inode_to_disk(ip, dip, iip->ili_item.li_lsn);
 
 	/* Wrap, we never let the log put out DI_MAX_FLUSH */
-	if (!xfs_sb_version_has_v3inode(&mp->m_sb)) {
+	if (!xfs_has_v3inodes(mp)) {
 		if (ip->i_flushiter == DI_MAX_FLUSH)
 			ip->i_flushiter = 0;
 	}
@@ -3614,7 +3666,7 @@ xfs_iflush_cluster(
 		 * AIL, leaving a dirty/unpinned inode attached to the buffer
 		 * that otherwise looks like it should be flushed.
 		 */
-		if (XFS_FORCED_SHUTDOWN(mp)) {
+		if (xfs_is_shutdown(mp)) {
 			xfs_iunpin_wait(ip);
 			xfs_iflush_abort(ip);
 			xfs_iunlock(ip, XFS_ILOCK_SHARED);
diff --git a/fs/xfs/xfs_inode.h b/fs/xfs/xfs_inode.h
index e0ae905554e2..b21b177832d1 100644
--- a/fs/xfs/xfs_inode.h
+++ b/fs/xfs/xfs_inode.h
@@ -41,6 +41,7 @@ typedef struct xfs_inode {
 	struct xfs_inode_log_item *i_itemp;	/* logging information */
 	mrlock_t		i_lock;		/* inode lock */
 	atomic_t		i_pincount;	/* inode pin count */
+	struct llist_node	i_gclist;	/* deferred inactivation list */
 
 	/*
 	 * Bitsets of inode metadata that have been checked and/or are sick.
@@ -239,6 +240,7 @@ static inline bool xfs_inode_has_bigtime(struct xfs_inode *ip)
 #define __XFS_IPINNED_BIT	8	 /* wakeup key for zero pin count */
 #define XFS_IPINNED		(1 << __XFS_IPINNED_BIT)
 #define XFS_IEOFBLOCKS		(1 << 9) /* has the preallocblocks tag set */
+#define XFS_NEED_INACTIVE	(1 << 10) /* see XFS_INACTIVATING below */
 /*
  * If this unlinked inode is in the middle of recovery, don't let drop_inode
  * truncate and free the inode.  This can happen if we iget the inode during
@@ -248,13 +250,29 @@ static inline bool xfs_inode_has_bigtime(struct xfs_inode *ip)
 #define XFS_ICOWBLOCKS		(1 << 12)/* has the cowblocks tag set */
 
 /*
+ * If we need to update on-disk metadata before this IRECLAIMABLE inode can be
+ * freed, then NEED_INACTIVE will be set.  Once we start the updates, the
+ * INACTIVATING bit will be set to keep iget away from this inode.  After the
+ * inactivation completes, both flags will be cleared and the inode is a
+ * plain old IRECLAIMABLE inode.
+ */
+#define XFS_INACTIVATING	(1 << 13)
+
+/* All inode state flags related to inode reclaim. */
+#define XFS_ALL_IRECLAIM_FLAGS	(XFS_IRECLAIMABLE | \
+				 XFS_IRECLAIM | \
+				 XFS_NEED_INACTIVE | \
+				 XFS_INACTIVATING)
+
+/*
  * Per-lifetime flags need to be reset when re-using a reclaimable inode during
  * inode lookup. This prevents unintended behaviour on the new inode from
  * ocurring.
  */
 #define XFS_IRECLAIM_RESET_FLAGS	\
 	(XFS_IRECLAIMABLE | XFS_IRECLAIM | \
-	 XFS_IDIRTY_RELEASE | XFS_ITRUNCATED)
+	 XFS_IDIRTY_RELEASE | XFS_ITRUNCATED | XFS_NEED_INACTIVE | \
+	 XFS_INACTIVATING)
 
 /*
  * Flags for inode locking.
@@ -381,8 +399,7 @@ enum layout_break_reason {
  * new subdirectory gets S_ISGID bit from parent.
  */
 #define XFS_INHERIT_GID(pip)	\
-	(((pip)->i_mount->m_flags & XFS_MOUNT_GRPID) || \
-	 (VFS_I(pip)->i_mode & S_ISGID))
+	(xfs_has_grpid((pip)->i_mount) || (VFS_I(pip)->i_mode & S_ISGID))
 
 int		xfs_release(struct xfs_inode *ip);
 void		xfs_inactive(struct xfs_inode *ip);
@@ -492,6 +509,8 @@ extern struct kmem_zone	*xfs_inode_zone;
 /* The default CoW extent size hint. */
 #define XFS_DEFAULT_COWEXTSZ_HINT 32
 
+bool xfs_inode_needs_inactive(struct xfs_inode *ip);
+
 int xfs_iunlink_init(struct xfs_perag *pag);
 void xfs_iunlink_destroy(struct xfs_perag *pag);
 
diff --git a/fs/xfs/xfs_inode_item.c b/fs/xfs/xfs_inode_item.c
index 35de30849fcc..0659d19c211e 100644
--- a/fs/xfs/xfs_inode_item.c
+++ b/fs/xfs/xfs_inode_item.c
@@ -396,7 +396,7 @@ xfs_inode_to_log_dinode(
 	/* log a dummy value to ensure log structure is fully initialised */
 	to->di_next_unlinked = NULLAGINO;
 
-	if (xfs_sb_version_has_v3inode(&ip->i_mount->m_sb)) {
+	if (xfs_has_v3inodes(ip->i_mount)) {
 		to->di_version = 3;
 		to->di_changecount = inode_peek_iversion(inode);
 		to->di_crtime = xfs_inode_to_log_dinode_ts(ip, ip->i_crtime);
diff --git a/fs/xfs/xfs_inode_item_recover.c b/fs/xfs/xfs_inode_item_recover.c
index e0072a6cd2d3..239dd2e3384e 100644
--- a/fs/xfs/xfs_inode_item_recover.c
+++ b/fs/xfs/xfs_inode_item_recover.c
@@ -295,7 +295,7 @@ xlog_recover_inode_commit_pass2(
 	 * superblock flag to determine whether we need to look at di_flushiter
 	 * to skip replay when the on disk inode is newer than the log one
 	 */
-	if (!xfs_sb_version_has_v3inode(&mp->m_sb) &&
+	if (!xfs_has_v3inodes(mp) &&
 	    ldip->di_flushiter < be16_to_cpu(dip->di_flushiter)) {
 		/*
 		 * Deal with the wrap case, DI_MAX_FLUSH is less
diff --git a/fs/xfs/xfs_ioctl.c b/fs/xfs/xfs_ioctl.c
index 16039ea10ac9..0c795dc093ef 100644
--- a/fs/xfs/xfs_ioctl.c
+++ b/fs/xfs/xfs_ioctl.c
@@ -756,7 +756,7 @@ xfs_ioc_fsbulkstat(
 	if (!capable(CAP_SYS_ADMIN))
 		return -EPERM;
 
-	if (XFS_FORCED_SHUTDOWN(mp))
+	if (xfs_is_shutdown(mp))
 		return -EIO;
 
 	if (copy_from_user(&bulkreq, arg, sizeof(struct xfs_fsop_bulkreq)))
@@ -927,7 +927,7 @@ xfs_ioc_bulkstat(
 	if (!capable(CAP_SYS_ADMIN))
 		return -EPERM;
 
-	if (XFS_FORCED_SHUTDOWN(mp))
+	if (xfs_is_shutdown(mp))
 		return -EIO;
 
 	if (copy_from_user(&hdr, &arg->hdr, sizeof(hdr)))
@@ -977,7 +977,7 @@ xfs_ioc_inumbers(
 	if (!capable(CAP_SYS_ADMIN))
 		return -EPERM;
 
-	if (XFS_FORCED_SHUTDOWN(mp))
+	if (xfs_is_shutdown(mp))
 		return -EIO;
 
 	if (copy_from_user(&hdr, &arg->hdr, sizeof(hdr)))
@@ -1010,7 +1010,7 @@ xfs_ioc_fsgeometry(
 	struct xfs_fsop_geom	fsgeo;
 	size_t			len;
 
-	xfs_fs_geometry(&mp->m_sb, &fsgeo, struct_version);
+	xfs_fs_geometry(mp, &fsgeo, struct_version);
 
 	if (struct_version <= 3)
 		len = sizeof(struct xfs_fsop_geom_v1);
@@ -1213,7 +1213,7 @@ xfs_ioctl_setattr_xflags(
 
 	/* diflags2 only valid for v3 inodes. */
 	i_flags2 = xfs_flags2diflags2(ip, fa->fsx_xflags);
-	if (i_flags2 && !xfs_sb_version_has_v3inode(&mp->m_sb))
+	if (i_flags2 && !xfs_has_v3inodes(mp))
 		return -EINVAL;
 
 	ip->i_diflags = xfs_flags2diflags(ip, fa->fsx_xflags);
@@ -1237,8 +1237,7 @@ xfs_ioctl_setattr_prepare_dax(
 	if (S_ISDIR(inode->i_mode))
 		return;
 
-	if ((mp->m_flags & XFS_MOUNT_DAX_ALWAYS) ||
-	    (mp->m_flags & XFS_MOUNT_DAX_NEVER))
+	if (xfs_has_dax_always(mp) || xfs_has_dax_never(mp))
 		return;
 
 	if (((fa->fsx_xflags & FS_XFLAG_DAX) &&
@@ -1263,10 +1262,10 @@ xfs_ioctl_setattr_get_trans(
 	struct xfs_trans	*tp;
 	int			error = -EROFS;
 
-	if (mp->m_flags & XFS_MOUNT_RDONLY)
+	if (xfs_is_readonly(mp))
 		goto out_error;
 	error = -EIO;
-	if (XFS_FORCED_SHUTDOWN(mp))
+	if (xfs_is_shutdown(mp))
 		goto out_error;
 
 	error = xfs_trans_alloc_ichange(ip, NULL, NULL, pdqp,
@@ -1274,7 +1273,7 @@ xfs_ioctl_setattr_get_trans(
 	if (error)
 		goto out_error;
 
-	if (mp->m_flags & XFS_MOUNT_WSYNC)
+	if (xfs_has_wsync(mp))
 		xfs_trans_set_sync(tp);
 
 	return tp;
@@ -1362,9 +1361,9 @@ xfs_ioctl_setattr_check_projid(
 	if (!fa->fsx_valid)
 		return 0;
 
-	/* Disallow 32bit project ids if projid32bit feature is not enabled. */
+	/* Disallow 32bit project ids if 32bit IDs are not enabled. */
 	if (fa->fsx_projid > (uint16_t)-1 &&
-	    !xfs_sb_version_hasprojid32bit(&ip->i_mount->m_sb))
+	    !xfs_has_projid32(ip->i_mount))
 		return -EINVAL;
 	return 0;
 }
@@ -1450,7 +1449,7 @@ xfs_fileattr_set(
 
 	/* Change the ownerships and register project quota modifications */
 	if (ip->i_projid != fa->fsx_projid) {
-		if (XFS_IS_QUOTA_RUNNING(mp) && XFS_IS_PQUOTA_ON(mp)) {
+		if (XFS_IS_PQUOTA_ON(mp)) {
 			olddquot = xfs_qm_vop_chown(tp, ip,
 						&ip->i_pdquot, pdqp);
 		}
@@ -1467,7 +1466,7 @@ xfs_fileattr_set(
 	else
 		ip->i_extsize = 0;
 
-	if (xfs_sb_version_has_v3inode(&mp->m_sb)) {
+	if (xfs_has_v3inodes(mp)) {
 		if (ip->i_diflags2 & XFS_DIFLAG2_COWEXTSIZE)
 			ip->i_cowextsize = XFS_B_TO_FSB(mp, fa->fsx_cowextsize);
 		else
@@ -1792,7 +1791,7 @@ xfs_ioc_swapext(
 		goto out_put_tmp_file;
 	}
 
-	if (XFS_FORCED_SHUTDOWN(ip->i_mount)) {
+	if (xfs_is_shutdown(ip->i_mount)) {
 		error = -EIO;
 		goto out_put_tmp_file;
 	}
@@ -2081,7 +2080,7 @@ xfs_file_ioctl(
 		if (!capable(CAP_SYS_ADMIN))
 			return -EPERM;
 
-		if (mp->m_flags & XFS_MOUNT_RDONLY)
+		if (xfs_is_readonly(mp))
 			return -EROFS;
 
 		if (copy_from_user(&inout, arg, sizeof(inout)))
@@ -2198,7 +2197,7 @@ xfs_file_ioctl(
 		if (!capable(CAP_SYS_ADMIN))
 			return -EPERM;
 
-		if (mp->m_flags & XFS_MOUNT_RDONLY)
+		if (xfs_is_readonly(mp))
 			return -EROFS;
 
 		if (copy_from_user(&eofb, arg, sizeof(eofb)))
diff --git a/fs/xfs/xfs_ioctl32.c b/fs/xfs/xfs_ioctl32.c
index e6506773ba55..8783af203cfc 100644
--- a/fs/xfs/xfs_ioctl32.c
+++ b/fs/xfs/xfs_ioctl32.c
@@ -50,7 +50,7 @@ xfs_compat_ioc_fsgeometry_v1(
 {
 	struct xfs_fsop_geom	  fsgeo;
 
-	xfs_fs_geometry(&mp->m_sb, &fsgeo, 3);
+	xfs_fs_geometry(mp, &fsgeo, 3);
 	/* The 32-bit variant simply has some padding at the end */
 	if (copy_to_user(arg32, &fsgeo, sizeof(struct compat_xfs_fsop_geom_v1)))
 		return -EFAULT;
@@ -254,7 +254,7 @@ xfs_compat_ioc_fsbulkstat(
 	if (!capable(CAP_SYS_ADMIN))
 		return -EPERM;
 
-	if (XFS_FORCED_SHUTDOWN(mp))
+	if (xfs_is_shutdown(mp))
 		return -EIO;
 
 	if (get_user(addr, &p32->lastip))
diff --git a/fs/xfs/xfs_iomap.c b/fs/xfs/xfs_iomap.c
index d8cd2583dedb..093758440ad5 100644
--- a/fs/xfs/xfs_iomap.c
+++ b/fs/xfs/xfs_iomap.c
@@ -132,7 +132,7 @@ xfs_eof_alignment(
 		 * If mounted with the "-o swalloc" option the alignment is
 		 * increased from the strip unit size to the stripe width.
 		 */
-		if (mp->m_swidth && (mp->m_flags & XFS_MOUNT_SWALLOC))
+		if (mp->m_swidth && xfs_has_swalloc(mp))
 			align = mp->m_swidth;
 		else if (mp->m_dalign)
 			align = mp->m_dalign;
@@ -734,7 +734,7 @@ xfs_direct_write_iomap_begin(
 
 	ASSERT(flags & (IOMAP_WRITE | IOMAP_ZERO));
 
-	if (XFS_FORCED_SHUTDOWN(mp))
+	if (xfs_is_shutdown(mp))
 		return -EIO;
 
 	/*
@@ -874,7 +874,7 @@ xfs_buffered_write_iomap_begin(
 	int			allocfork = XFS_DATA_FORK;
 	int			error = 0;
 
-	if (XFS_FORCED_SHUTDOWN(mp))
+	if (xfs_is_shutdown(mp))
 		return -EIO;
 
 	/* we can't use delayed allocations when using extent size hints */
@@ -994,7 +994,7 @@ xfs_buffered_write_iomap_begin(
 		 * Determine the initial size of the preallocation.
 		 * We clean up any extra preallocation when the file is closed.
 		 */
-		if (mp->m_flags & XFS_MOUNT_ALLOCSIZE)
+		if (xfs_has_allocsize(mp))
 			prealloc_blocks = mp->m_allocsize_blocks;
 		else
 			prealloc_blocks = xfs_iomap_prealloc_size(ip, allocfork,
@@ -1064,11 +1064,11 @@ found_cow:
 		error = xfs_bmbt_to_iomap(ip, srcmap, &imap, 0);
 		if (error)
 			return error;
-	} else {
-		xfs_trim_extent(&cmap, offset_fsb,
-				imap.br_startoff - offset_fsb);
+		return xfs_bmbt_to_iomap(ip, iomap, &cmap, IOMAP_F_SHARED);
 	}
-	return xfs_bmbt_to_iomap(ip, iomap, &cmap, IOMAP_F_SHARED);
+
+	xfs_trim_extent(&cmap, offset_fsb, imap.br_startoff - offset_fsb);
+	return xfs_bmbt_to_iomap(ip, iomap, &cmap, 0);
 
 out_unlock:
 	xfs_iunlock(ip, XFS_ILOCK_EXCL);
@@ -1127,7 +1127,7 @@ xfs_buffered_write_iomap_end(
 
 		error = xfs_bmap_punch_delalloc_range(ip, start_fsb,
 					       end_fsb - start_fsb);
-		if (error && !XFS_FORCED_SHUTDOWN(mp)) {
+		if (error && !xfs_is_shutdown(mp)) {
 			xfs_alert(mp, "%s: unable to clean up ino %lld",
 				__func__, ip->i_ino);
 			return error;
@@ -1162,7 +1162,7 @@ xfs_read_iomap_begin(
 
 	ASSERT(!(flags & (IOMAP_WRITE | IOMAP_ZERO)));
 
-	if (XFS_FORCED_SHUTDOWN(mp))
+	if (xfs_is_shutdown(mp))
 		return -EIO;
 
 	error = xfs_ilock_for_iomap(ip, flags, &lockmode);
@@ -1203,7 +1203,7 @@ xfs_seek_iomap_begin(
 	int			error = 0;
 	unsigned		lockmode;
 
-	if (XFS_FORCED_SHUTDOWN(mp))
+	if (xfs_is_shutdown(mp))
 		return -EIO;
 
 	lockmode = xfs_ilock_data_map_shared(ip);
@@ -1285,7 +1285,7 @@ xfs_xattr_iomap_begin(
 	int			nimaps = 1, error = 0;
 	unsigned		lockmode;
 
-	if (XFS_FORCED_SHUTDOWN(mp))
+	if (xfs_is_shutdown(mp))
 		return -EIO;
 
 	lockmode = xfs_ilock_attr_map_shared(ip);
diff --git a/fs/xfs/xfs_iops.c b/fs/xfs/xfs_iops.c
index 93c082db04b7..a607d6aca5c4 100644
--- a/fs/xfs/xfs_iops.c
+++ b/fs/xfs/xfs_iops.c
@@ -393,7 +393,7 @@ xfs_vn_unlink(
 	 * but still hashed. This is incompatible with case-insensitive
 	 * mode, so invalidate (unhash) the dentry in CI-mode.
 	 */
-	if (xfs_sb_version_hasasciici(&XFS_M(dir->i_sb)->m_sb))
+	if (xfs_has_asciici(XFS_M(dir->i_sb)))
 		d_invalidate(dentry);
 	return 0;
 }
@@ -558,10 +558,10 @@ xfs_stat_blksize(
 	 * default buffered I/O size, return that, otherwise return the compat
 	 * default.
 	 */
-	if (mp->m_flags & XFS_MOUNT_LARGEIO) {
+	if (xfs_has_large_iosize(mp)) {
 		if (mp->m_swidth)
 			return XFS_FSB_TO_B(mp, mp->m_swidth);
-		if (mp->m_flags & XFS_MOUNT_ALLOCSIZE)
+		if (xfs_has_allocsize(mp))
 			return 1U << mp->m_allocsize_log;
 	}
 
@@ -582,7 +582,7 @@ xfs_vn_getattr(
 
 	trace_xfs_getattr(ip);
 
-	if (XFS_FORCED_SHUTDOWN(mp))
+	if (xfs_is_shutdown(mp))
 		return -EIO;
 
 	stat->size = XFS_ISIZE(ip);
@@ -597,7 +597,7 @@ xfs_vn_getattr(
 	stat->ctime = inode->i_ctime;
 	stat->blocks = XFS_FSB_TO_BB(mp, ip->i_nblocks + ip->i_delayed_blks);
 
-	if (xfs_sb_version_has_v3inode(&mp->m_sb)) {
+	if (xfs_has_v3inodes(mp)) {
 		if (request_mask & STATX_BTIME) {
 			stat->result_mask |= STATX_BTIME;
 			stat->btime = ip->i_crtime;
@@ -673,10 +673,10 @@ xfs_vn_change_ok(
 {
 	struct xfs_mount	*mp = XFS_I(d_inode(dentry))->i_mount;
 
-	if (mp->m_flags & XFS_MOUNT_RDONLY)
+	if (xfs_is_readonly(mp))
 		return -EROFS;
 
-	if (XFS_FORCED_SHUTDOWN(mp))
+	if (xfs_is_shutdown(mp))
 		return -EIO;
 
 	return setattr_prepare(mnt_userns, dentry, iattr);
@@ -778,7 +778,7 @@ xfs_setattr_nonsize(
 		 * in the transaction.
 		 */
 		if (!uid_eq(iuid, uid)) {
-			if (XFS_IS_QUOTA_RUNNING(mp) && XFS_IS_UQUOTA_ON(mp)) {
+			if (XFS_IS_UQUOTA_ON(mp)) {
 				ASSERT(mask & ATTR_UID);
 				ASSERT(udqp);
 				olddquot1 = xfs_qm_vop_chown(tp, ip,
@@ -787,8 +787,8 @@ xfs_setattr_nonsize(
 			inode->i_uid = uid;
 		}
 		if (!gid_eq(igid, gid)) {
-			if (XFS_IS_QUOTA_RUNNING(mp) && XFS_IS_GQUOTA_ON(mp)) {
-				ASSERT(xfs_sb_version_has_pquotino(&mp->m_sb) ||
+			if (XFS_IS_GQUOTA_ON(mp)) {
+				ASSERT(xfs_has_pquotino(mp) ||
 				       !XFS_IS_PQUOTA_ON(mp));
 				ASSERT(mask & ATTR_GID);
 				ASSERT(gdqp);
@@ -808,7 +808,7 @@ xfs_setattr_nonsize(
 
 	XFS_STATS_INC(mp, xs_ig_attrchg);
 
-	if (mp->m_flags & XFS_MOUNT_WSYNC)
+	if (xfs_has_wsync(mp))
 		xfs_trans_set_sync(tp);
 	error = xfs_trans_commit(tp);
 
@@ -1037,7 +1037,7 @@ xfs_setattr_size(
 
 	XFS_STATS_INC(mp, xs_ig_attrchg);
 
-	if (mp->m_flags & XFS_MOUNT_WSYNC)
+	if (xfs_has_wsync(mp))
 		xfs_trans_set_sync(tp);
 
 	error = xfs_trans_commit(tp);
@@ -1287,11 +1287,11 @@ xfs_inode_should_enable_dax(
 {
 	if (!IS_ENABLED(CONFIG_FS_DAX))
 		return false;
-	if (ip->i_mount->m_flags & XFS_MOUNT_DAX_NEVER)
+	if (xfs_has_dax_never(ip->i_mount))
 		return false;
 	if (!xfs_inode_supports_dax(ip))
 		return false;
-	if (ip->i_mount->m_flags & XFS_MOUNT_DAX_ALWAYS)
+	if (xfs_has_dax_always(ip->i_mount))
 		return true;
 	if (ip->i_diflags2 & XFS_DIFLAG2_DAX)
 		return true;
@@ -1344,7 +1344,7 @@ xfs_setup_inode(
 	gfp_t			gfp_mask;
 
 	inode->i_ino = ip->i_ino;
-	inode->i_state = I_NEW;
+	inode->i_state |= I_NEW;
 
 	inode_sb_list_add(inode);
 	/* make the inode look hashed for the writeback code */
@@ -1401,7 +1401,7 @@ xfs_setup_iops(
 			inode->i_mapping->a_ops = &xfs_address_space_operations;
 		break;
 	case S_IFDIR:
-		if (xfs_sb_version_hasasciici(&XFS_M(inode->i_sb)->m_sb))
+		if (xfs_has_asciici(XFS_M(inode->i_sb)))
 			inode->i_op = &xfs_dir_ci_inode_operations;
 		else
 			inode->i_op = &xfs_dir_inode_operations;
diff --git a/fs/xfs/xfs_itable.c b/fs/xfs/xfs_itable.c
index f331975a16de..c08c79d9e311 100644
--- a/fs/xfs/xfs_itable.c
+++ b/fs/xfs/xfs_itable.c
@@ -19,6 +19,7 @@
 #include "xfs_error.h"
 #include "xfs_icache.h"
 #include "xfs_health.h"
+#include "xfs_trans.h"
 
 /*
  * Bulk Stat
@@ -107,7 +108,7 @@ xfs_bulkstat_one_int(
 	buf->bs_forkoff = XFS_IFORK_BOFF(ip);
 	buf->bs_version = XFS_BULKSTAT_VERSION_V5;
 
-	if (xfs_sb_version_has_v3inode(&mp->m_sb)) {
+	if (xfs_has_v3inodes(mp)) {
 		buf->bs_btime = ip->i_crtime.tv_sec;
 		buf->bs_btime_nsec = ip->i_crtime.tv_nsec;
 		if (ip->i_diflags2 & XFS_DIFLAG2_COWEXTSIZE)
@@ -163,6 +164,7 @@ xfs_bulkstat_one(
 		.formatter	= formatter,
 		.breq		= breq,
 	};
+	struct xfs_trans	*tp;
 	int			error;
 
 	if (breq->mnt_userns != &init_user_ns) {
@@ -178,9 +180,18 @@ xfs_bulkstat_one(
 	if (!bc.buf)
 		return -ENOMEM;
 
-	error = xfs_bulkstat_one_int(breq->mp, breq->mnt_userns, NULL,
-				     breq->startino, &bc);
+	/*
+	 * Grab an empty transaction so that we can use its recursive buffer
+	 * locking abilities to detect cycles in the inobt without deadlocking.
+	 */
+	error = xfs_trans_alloc_empty(breq->mp, &tp);
+	if (error)
+		goto out;
 
+	error = xfs_bulkstat_one_int(breq->mp, breq->mnt_userns, tp,
+			breq->startino, &bc);
+	xfs_trans_cancel(tp);
+out:
 	kmem_free(bc.buf);
 
 	/*
@@ -244,6 +255,7 @@ xfs_bulkstat(
 		.formatter	= formatter,
 		.breq		= breq,
 	};
+	struct xfs_trans	*tp;
 	int			error;
 
 	if (breq->mnt_userns != &init_user_ns) {
@@ -259,9 +271,18 @@ xfs_bulkstat(
 	if (!bc.buf)
 		return -ENOMEM;
 
-	error = xfs_iwalk(breq->mp, NULL, breq->startino, breq->flags,
-			xfs_bulkstat_iwalk, breq->icount, &bc);
+	/*
+	 * Grab an empty transaction so that we can use its recursive buffer
+	 * locking abilities to detect cycles in the inobt without deadlocking.
+	 */
+	error = xfs_trans_alloc_empty(breq->mp, &tp);
+	if (error)
+		goto out;
 
+	error = xfs_iwalk(breq->mp, tp, breq->startino, breq->flags,
+			xfs_bulkstat_iwalk, breq->icount, &bc);
+	xfs_trans_cancel(tp);
+out:
 	kmem_free(bc.buf);
 
 	/*
@@ -374,13 +395,24 @@ xfs_inumbers(
 		.formatter	= formatter,
 		.breq		= breq,
 	};
+	struct xfs_trans	*tp;
 	int			error = 0;
 
 	if (xfs_bulkstat_already_done(breq->mp, breq->startino))
 		return 0;
 
-	error = xfs_inobt_walk(breq->mp, NULL, breq->startino, breq->flags,
+	/*
+	 * Grab an empty transaction so that we can use its recursive buffer
+	 * locking abilities to detect cycles in the inobt without deadlocking.
+	 */
+	error = xfs_trans_alloc_empty(breq->mp, &tp);
+	if (error)
+		goto out;
+
+	error = xfs_inobt_walk(breq->mp, tp, breq->startino, breq->flags,
 			xfs_inumbers_walk, breq->icount, &ic);
+	xfs_trans_cancel(tp);
+out:
 
 	/*
 	 * We found some inode groups, so clear the error status and return
diff --git a/fs/xfs/xfs_iwalk.c b/fs/xfs/xfs_iwalk.c
index 917d51eefee3..7558486f4937 100644
--- a/fs/xfs/xfs_iwalk.c
+++ b/fs/xfs/xfs_iwalk.c
@@ -83,6 +83,9 @@ struct xfs_iwalk_ag {
 
 	/* Skip empty inobt records? */
 	unsigned int			skip_empty:1;
+
+	/* Drop the (hopefully empty) transaction when calling iwalk_fn. */
+	unsigned int			drop_trans:1;
 };
 
 /*
@@ -352,7 +355,6 @@ xfs_iwalk_run_callbacks(
 	int				*has_more)
 {
 	struct xfs_mount		*mp = iwag->mp;
-	struct xfs_trans		*tp = iwag->tp;
 	struct xfs_inobt_rec_incore	*irec;
 	xfs_agino_t			next_agino;
 	int				error;
@@ -362,10 +364,15 @@ xfs_iwalk_run_callbacks(
 	ASSERT(iwag->nr_recs > 0);
 
 	/* Delete cursor but remember the last record we cached... */
-	xfs_iwalk_del_inobt(tp, curpp, agi_bpp, 0);
+	xfs_iwalk_del_inobt(iwag->tp, curpp, agi_bpp, 0);
 	irec = &iwag->recs[iwag->nr_recs - 1];
 	ASSERT(next_agino >= irec->ir_startino + XFS_INODES_PER_CHUNK);
 
+	if (iwag->drop_trans) {
+		xfs_trans_cancel(iwag->tp);
+		iwag->tp = NULL;
+	}
+
 	error = xfs_iwalk_ag_recs(iwag);
 	if (error)
 		return error;
@@ -376,8 +383,15 @@ xfs_iwalk_run_callbacks(
 	if (!has_more)
 		return 0;
 
+	if (iwag->drop_trans) {
+		error = xfs_trans_alloc_empty(mp, &iwag->tp);
+		if (error)
+			return error;
+	}
+
 	/* ...and recreate the cursor just past where we left off. */
-	error = xfs_inobt_cur(mp, tp, iwag->pag, XFS_BTNUM_INO, curpp, agi_bpp);
+	error = xfs_inobt_cur(mp, iwag->tp, iwag->pag, XFS_BTNUM_INO, curpp,
+			agi_bpp);
 	if (error)
 		return error;
 
@@ -390,7 +404,6 @@ xfs_iwalk_ag(
 	struct xfs_iwalk_ag		*iwag)
 {
 	struct xfs_mount		*mp = iwag->mp;
-	struct xfs_trans		*tp = iwag->tp;
 	struct xfs_perag		*pag = iwag->pag;
 	struct xfs_buf			*agi_bp = NULL;
 	struct xfs_btree_cur		*cur = NULL;
@@ -469,7 +482,7 @@ xfs_iwalk_ag(
 	error = xfs_iwalk_run_callbacks(iwag, &cur, &agi_bp, &has_more);
 
 out:
-	xfs_iwalk_del_inobt(tp, &cur, &agi_bp, error);
+	xfs_iwalk_del_inobt(iwag->tp, &cur, &agi_bp, error);
 	return error;
 }
 
@@ -599,8 +612,18 @@ xfs_iwalk_ag_work(
 	error = xfs_iwalk_alloc(iwag);
 	if (error)
 		goto out;
+	/*
+	 * Grab an empty transaction so that we can use its recursive buffer
+	 * locking abilities to detect cycles in the inobt without deadlocking.
+	 */
+	error = xfs_trans_alloc_empty(mp, &iwag->tp);
+	if (error)
+		goto out;
+	iwag->drop_trans = 1;
 
 	error = xfs_iwalk_ag(iwag);
+	if (iwag->tp)
+		xfs_trans_cancel(iwag->tp);
 	xfs_iwalk_free(iwag);
 out:
 	xfs_perag_put(iwag->pag);
diff --git a/fs/xfs/xfs_log.c b/fs/xfs/xfs_log.c
index 60ac5fd63f1e..f6cd2d4aa770 100644
--- a/fs/xfs/xfs_log.c
+++ b/fs/xfs/xfs_log.c
@@ -41,6 +41,8 @@ xlog_dealloc_log(
 /* local state machine functions */
 STATIC void xlog_state_done_syncing(
 	struct xlog_in_core	*iclog);
+STATIC void xlog_state_do_callback(
+	struct xlog		*log);
 STATIC int
 xlog_state_get_iclog_space(
 	struct xlog		*log,
@@ -50,11 +52,6 @@ xlog_state_get_iclog_space(
 	int			*continued_write,
 	int			*logoffsetp);
 STATIC void
-xlog_state_switch_iclogs(
-	struct xlog		*log,
-	struct xlog_in_core	*iclog,
-	int			eventual_size);
-STATIC void
 xlog_grant_push_ail(
 	struct xlog		*log,
 	int			need_bytes);
@@ -246,7 +243,7 @@ xlog_grant_head_wait(
 	list_add_tail(&tic->t_queue, &head->waiters);
 
 	do {
-		if (XLOG_FORCED_SHUTDOWN(log))
+		if (xlog_is_shutdown(log))
 			goto shutdown;
 		xlog_grant_push_ail(log, need_bytes);
 
@@ -260,7 +257,7 @@ xlog_grant_head_wait(
 		trace_xfs_log_grant_wake(log, tic);
 
 		spin_lock(&head->lock);
-		if (XLOG_FORCED_SHUTDOWN(log))
+		if (xlog_is_shutdown(log))
 			goto shutdown;
 	} while (xlog_space_left(log, &head->grant) < need_bytes);
 
@@ -298,7 +295,7 @@ xlog_grant_head_check(
 	int			free_bytes;
 	int			error = 0;
 
-	ASSERT(!(log->l_flags & XLOG_ACTIVE_RECOVERY));
+	ASSERT(!xlog_in_recovery(log));
 
 	/*
 	 * If there are other waiters on the queue then give them a chance at
@@ -359,13 +356,13 @@ xfs_log_writable(
 	 * mounts allow internal writes for log recovery and unmount purposes,
 	 * so don't restrict that case.
 	 */
-	if (mp->m_flags & XFS_MOUNT_NORECOVERY)
+	if (xfs_has_norecovery(mp))
 		return false;
 	if (xfs_readonly_buftarg(mp->m_ddev_targp))
 		return false;
 	if (xfs_readonly_buftarg(mp->m_log->l_targ))
 		return false;
-	if (XFS_FORCED_SHUTDOWN(mp))
+	if (xlog_is_shutdown(mp->m_log))
 		return false;
 	return true;
 }
@@ -382,7 +379,7 @@ xfs_log_regrant(
 	int			need_bytes;
 	int			error = 0;
 
-	if (XLOG_FORCED_SHUTDOWN(log))
+	if (xlog_is_shutdown(log))
 		return -EIO;
 
 	XFS_STATS_INC(mp, xs_try_logspace);
@@ -450,7 +447,7 @@ xfs_log_reserve(
 
 	ASSERT(client == XFS_TRANSACTION || client == XFS_LOG);
 
-	if (XLOG_FORCED_SHUTDOWN(log))
+	if (xlog_is_shutdown(log))
 		return -EIO;
 
 	XFS_STATS_INC(mp, xs_try_logspace);
@@ -487,6 +484,42 @@ out_error:
 }
 
 /*
+ * Run all the pending iclog callbacks and wake log force waiters and iclog
+ * space waiters so they can process the newly set shutdown state. We really
+ * don't care what order we process callbacks here because the log is shut down
+ * and so state cannot change on disk anymore.
+ *
+ * We avoid processing actively referenced iclogs so that we don't run callbacks
+ * while the iclog owner might still be preparing the iclog for IO submssion.
+ * These will be caught by xlog_state_iclog_release() and call this function
+ * again to process any callbacks that may have been added to that iclog.
+ */
+static void
+xlog_state_shutdown_callbacks(
+	struct xlog		*log)
+{
+	struct xlog_in_core	*iclog;
+	LIST_HEAD(cb_list);
+
+	spin_lock(&log->l_icloglock);
+	iclog = log->l_iclog;
+	do {
+		if (atomic_read(&iclog->ic_refcnt)) {
+			/* Reference holder will re-run iclog callbacks. */
+			continue;
+		}
+		list_splice_init(&iclog->ic_callbacks, &cb_list);
+		wake_up_all(&iclog->ic_write_wait);
+		wake_up_all(&iclog->ic_force_wait);
+	} while ((iclog = iclog->ic_next) != log->l_iclog);
+
+	wake_up_all(&log->l_flush_wait);
+	spin_unlock(&log->l_icloglock);
+
+	xlog_cil_process_committed(&cb_list);
+}
+
+/*
  * Flush iclog to disk if this is the last reference to the given iclog and the
  * it is in the WANT_SYNC state.
  *
@@ -520,12 +553,11 @@ xlog_state_release_iclog(
 	xfs_lsn_t		old_tail_lsn)
 {
 	xfs_lsn_t		tail_lsn;
+	bool			last_ref;
+
 	lockdep_assert_held(&log->l_icloglock);
 
 	trace_xlog_iclog_release(iclog, _RET_IP_);
-	if (iclog->ic_state == XLOG_STATE_IOERROR)
-		return -EIO;
-
 	/*
 	 * Grabbing the current log tail needs to be atomic w.r.t. the writing
 	 * of the tail LSN into the iclog so we guarantee that the log tail does
@@ -543,7 +575,23 @@ xlog_state_release_iclog(
 			iclog->ic_header.h_tail_lsn = cpu_to_be64(tail_lsn);
 	}
 
-	if (!atomic_dec_and_test(&iclog->ic_refcnt))
+	last_ref = atomic_dec_and_test(&iclog->ic_refcnt);
+
+	if (xlog_is_shutdown(log)) {
+		/*
+		 * If there are no more references to this iclog, process the
+		 * pending iclog callbacks that were waiting on the release of
+		 * this iclog.
+		 */
+		if (last_ref) {
+			spin_unlock(&log->l_icloglock);
+			xlog_state_shutdown_callbacks(log);
+			spin_lock(&log->l_icloglock);
+		}
+		return -EIO;
+	}
+
+	if (!last_ref)
 		return 0;
 
 	if (iclog->ic_state != XLOG_STATE_WANT_SYNC) {
@@ -580,25 +628,27 @@ xfs_log_mount(
 	xfs_daddr_t	blk_offset,
 	int		num_bblks)
 {
-	bool		fatal = xfs_sb_version_hascrc(&mp->m_sb);
+	struct xlog	*log;
+	bool		fatal = xfs_has_crc(mp);
 	int		error = 0;
 	int		min_logfsbs;
 
-	if (!(mp->m_flags & XFS_MOUNT_NORECOVERY)) {
+	if (!xfs_has_norecovery(mp)) {
 		xfs_notice(mp, "Mounting V%d Filesystem",
 			   XFS_SB_VERSION_NUM(&mp->m_sb));
 	} else {
 		xfs_notice(mp,
 "Mounting V%d filesystem in no-recovery mode. Filesystem will be inconsistent.",
 			   XFS_SB_VERSION_NUM(&mp->m_sb));
-		ASSERT(mp->m_flags & XFS_MOUNT_RDONLY);
+		ASSERT(xfs_is_readonly(mp));
 	}
 
-	mp->m_log = xlog_alloc_log(mp, log_target, blk_offset, num_bblks);
-	if (IS_ERR(mp->m_log)) {
-		error = PTR_ERR(mp->m_log);
+	log = xlog_alloc_log(mp, log_target, blk_offset, num_bblks);
+	if (IS_ERR(log)) {
+		error = PTR_ERR(log);
 		goto out;
 	}
+	mp->m_log = log;
 
 	/*
 	 * Validate the given log space and drop a critical message via syslog
@@ -663,51 +713,51 @@ xfs_log_mount(
 		xfs_warn(mp, "AIL initialisation failed: error %d", error);
 		goto out_free_log;
 	}
-	mp->m_log->l_ailp = mp->m_ail;
+	log->l_ailp = mp->m_ail;
 
 	/*
 	 * skip log recovery on a norecovery mount.  pretend it all
 	 * just worked.
 	 */
-	if (!(mp->m_flags & XFS_MOUNT_NORECOVERY)) {
-		int	readonly = (mp->m_flags & XFS_MOUNT_RDONLY);
-
-		if (readonly)
-			mp->m_flags &= ~XFS_MOUNT_RDONLY;
-
-		error = xlog_recover(mp->m_log);
-
+	if (!xfs_has_norecovery(mp)) {
+		/*
+		 * log recovery ignores readonly state and so we need to clear
+		 * mount-based read only state so it can write to disk.
+		 */
+		bool	readonly = test_and_clear_bit(XFS_OPSTATE_READONLY,
+						&mp->m_opstate);
+		error = xlog_recover(log);
 		if (readonly)
-			mp->m_flags |= XFS_MOUNT_RDONLY;
+			set_bit(XFS_OPSTATE_READONLY, &mp->m_opstate);
 		if (error) {
 			xfs_warn(mp, "log mount/recovery failed: error %d",
 				error);
-			xlog_recover_cancel(mp->m_log);
+			xlog_recover_cancel(log);
 			goto out_destroy_ail;
 		}
 	}
 
-	error = xfs_sysfs_init(&mp->m_log->l_kobj, &xfs_log_ktype, &mp->m_kobj,
+	error = xfs_sysfs_init(&log->l_kobj, &xfs_log_ktype, &mp->m_kobj,
 			       "log");
 	if (error)
 		goto out_destroy_ail;
 
 	/* Normal transactions can now occur */
-	mp->m_log->l_flags &= ~XLOG_ACTIVE_RECOVERY;
+	clear_bit(XLOG_ACTIVE_RECOVERY, &log->l_opstate);
 
 	/*
 	 * Now the log has been fully initialised and we know were our
 	 * space grant counters are, we can initialise the permanent ticket
 	 * needed for delayed logging to work.
 	 */
-	xlog_cil_init_post_recovery(mp->m_log);
+	xlog_cil_init_post_recovery(log);
 
 	return 0;
 
 out_destroy_ail:
 	xfs_trans_ail_destroy(mp);
 out_free_log:
-	xlog_dealloc_log(mp->m_log);
+	xlog_dealloc_log(log);
 out:
 	return error;
 }
@@ -726,19 +776,22 @@ int
 xfs_log_mount_finish(
 	struct xfs_mount	*mp)
 {
-	int	error = 0;
-	bool	readonly = (mp->m_flags & XFS_MOUNT_RDONLY);
-	bool	recovered = mp->m_log->l_flags & XLOG_RECOVERY_NEEDED;
+	struct xlog		*log = mp->m_log;
+	bool			readonly;
+	int			error = 0;
 
-	if (mp->m_flags & XFS_MOUNT_NORECOVERY) {
-		ASSERT(mp->m_flags & XFS_MOUNT_RDONLY);
+	if (xfs_has_norecovery(mp)) {
+		ASSERT(xfs_is_readonly(mp));
 		return 0;
-	} else if (readonly) {
-		/* Allow unlinked processing to proceed */
-		mp->m_flags &= ~XFS_MOUNT_RDONLY;
 	}
 
 	/*
+	 * log recovery ignores readonly state and so we need to clear
+	 * mount-based read only state so it can write to disk.
+	 */
+	readonly = test_and_clear_bit(XFS_OPSTATE_READONLY, &mp->m_opstate);
+
+	/*
 	 * During the second phase of log recovery, we need iget and
 	 * iput to behave like they do for an active filesystem.
 	 * xfs_fs_drop_inode needs to be able to prevent the deletion
@@ -759,7 +812,8 @@ xfs_log_mount_finish(
 	 * mount failure occurs.
 	 */
 	mp->m_super->s_flags |= SB_ACTIVE;
-	error = xlog_recover_finish(mp->m_log);
+	if (xlog_recovery_needed(log))
+		error = xlog_recover_finish(log);
 	if (!error)
 		xfs_log_work_queue(mp);
 	mp->m_super->s_flags &= ~SB_ACTIVE;
@@ -774,17 +828,24 @@ xfs_log_mount_finish(
 	 * Don't push in the error case because the AIL may have pending intents
 	 * that aren't removed until recovery is cancelled.
 	 */
-	if (!error && recovered) {
-		xfs_log_force(mp, XFS_LOG_SYNC);
-		xfs_ail_push_all_sync(mp->m_ail);
+	if (xlog_recovery_needed(log)) {
+		if (!error) {
+			xfs_log_force(mp, XFS_LOG_SYNC);
+			xfs_ail_push_all_sync(mp->m_ail);
+		}
+		xfs_notice(mp, "Ending recovery (logdev: %s)",
+				mp->m_logname ? mp->m_logname : "internal");
+	} else {
+		xfs_info(mp, "Ending clean mount");
 	}
 	xfs_buftarg_drain(mp->m_ddev_targp);
 
+	clear_bit(XLOG_RECOVERY_NEEDED, &log->l_opstate);
 	if (readonly)
-		mp->m_flags |= XFS_MOUNT_RDONLY;
+		set_bit(XFS_OPSTATE_READONLY, &mp->m_opstate);
 
 	/* Make sure the log is dead if we're returning failure. */
-	ASSERT(!error || (mp->m_log->l_flags & XLOG_IO_ERROR));
+	ASSERT(!error || xlog_is_shutdown(log));
 
 	return error;
 }
@@ -830,7 +891,7 @@ xlog_wait_on_iclog(
 	struct xlog		*log = iclog->ic_log;
 
 	trace_xlog_iclog_wait_on(iclog, _RET_IP_);
-	if (!XLOG_FORCED_SHUTDOWN(log) &&
+	if (!xlog_is_shutdown(log) &&
 	    iclog->ic_state != XLOG_STATE_ACTIVE &&
 	    iclog->ic_state != XLOG_STATE_DIRTY) {
 		XFS_STATS_INC(log->l_mp, xs_log_force_sleep);
@@ -839,7 +900,7 @@ xlog_wait_on_iclog(
 		spin_unlock(&log->l_icloglock);
 	}
 
-	if (XLOG_FORCED_SHUTDOWN(log))
+	if (xlog_is_shutdown(log))
 		return -EIO;
 	return 0;
 }
@@ -870,7 +931,7 @@ xlog_write_unmount_record(
 	/* account for space used by record data */
 	ticket->t_curr_res -= sizeof(ulf);
 
-	return xlog_write(log, &vec, ticket, NULL, NULL, XLOG_UNMOUNT_TRANS);
+	return xlog_write(log, NULL, &vec, ticket, XLOG_UNMOUNT_TRANS);
 }
 
 /*
@@ -893,7 +954,7 @@ xlog_unmount_write(
 	error = xlog_write_unmount_record(log, tic);
 	/*
 	 * At this point, we're umounting anyway, so there's no point in
-	 * transitioning log state to IOERROR. Just continue...
+	 * transitioning log state to shutdown. Just continue...
 	 */
 out_err:
 	if (error)
@@ -940,7 +1001,7 @@ xfs_log_unmount_write(
 
 	xfs_log_force(mp, XFS_LOG_SYNC);
 
-	if (XLOG_FORCED_SHUTDOWN(log))
+	if (xlog_is_shutdown(log))
 		return;
 
 	/*
@@ -972,6 +1033,20 @@ int
 xfs_log_quiesce(
 	struct xfs_mount	*mp)
 {
+	/*
+	 * Clear log incompat features since we're quiescing the log.  Report
+	 * failures, though it's not fatal to have a higher log feature
+	 * protection level than the log contents actually require.
+	 */
+	if (xfs_clear_incompat_log_features(mp)) {
+		int error;
+
+		error = xfs_sync_sb(mp, false);
+		if (error)
+			xfs_warn(mp,
+	"Failed to clear log incompat features on quiesce");
+	}
+
 	cancel_delayed_work_sync(&mp->m_log->l_work);
 	xfs_log_force(mp, XFS_LOG_SYNC);
 
@@ -1049,11 +1124,11 @@ xfs_log_space_wake(
 	struct xlog		*log = mp->m_log;
 	int			free_bytes;
 
-	if (XLOG_FORCED_SHUTDOWN(log))
+	if (xlog_is_shutdown(log))
 		return;
 
 	if (!list_empty_careful(&log->l_write_head.waiters)) {
-		ASSERT(!(log->l_flags & XLOG_ACTIVE_RECOVERY));
+		ASSERT(!xlog_in_recovery(log));
 
 		spin_lock(&log->l_write_head.lock);
 		free_bytes = xlog_space_left(log, &log->l_write_head.grant);
@@ -1062,7 +1137,7 @@ xfs_log_space_wake(
 	}
 
 	if (!list_empty_careful(&log->l_reserve_head.waiters)) {
-		ASSERT(!(log->l_flags & XLOG_ACTIVE_RECOVERY));
+		ASSERT(!xlog_in_recovery(log));
 
 		spin_lock(&log->l_reserve_head.lock);
 		free_bytes = xlog_space_left(log, &log->l_reserve_head.grant);
@@ -1140,7 +1215,7 @@ xfs_log_cover(
 
 	ASSERT((xlog_cil_empty(mp->m_log) && xlog_iclogs_empty(mp->m_log) &&
 	        !xfs_ail_min_lsn(mp->m_log->l_ailp)) ||
-	       XFS_FORCED_SHUTDOWN(mp));
+		xlog_is_shutdown(mp->m_log));
 
 	if (!xfs_log_writable(mp))
 		return 0;
@@ -1157,7 +1232,7 @@ xfs_log_cover(
 	 * handles this for us.
 	 */
 	need_covered = xfs_log_need_covered(mp);
-	if (!need_covered && !xfs_sb_version_haslazysbcount(&mp->m_sb))
+	if (!need_covered && !xfs_has_lazysbcount(mp))
 		return 0;
 
 	/*
@@ -1230,16 +1305,18 @@ xlog_assign_tail_lsn(
  * wrap the tail, we should blow up.  Rather than catch this case here,
  * we depend on other ASSERTions in other parts of the code.   XXXmiken
  *
- * This code also handles the case where the reservation head is behind
- * the tail.  The details of this case are described below, but the end
- * result is that we return the size of the log as the amount of space left.
+ * If reservation head is behind the tail, we have a problem. Warn about it,
+ * but then treat it as if the log is empty.
+ *
+ * If the log is shut down, the head and tail may be invalid or out of whack, so
+ * shortcut invalidity asserts in this case so that we don't trigger them
+ * falsely.
  */
 STATIC int
 xlog_space_left(
 	struct xlog	*log,
 	atomic64_t	*head)
 {
-	int		free_bytes;
 	int		tail_bytes;
 	int		tail_cycle;
 	int		head_cycle;
@@ -1249,29 +1326,30 @@ xlog_space_left(
 	xlog_crack_atomic_lsn(&log->l_tail_lsn, &tail_cycle, &tail_bytes);
 	tail_bytes = BBTOB(tail_bytes);
 	if (tail_cycle == head_cycle && head_bytes >= tail_bytes)
-		free_bytes = log->l_logsize - (head_bytes - tail_bytes);
-	else if (tail_cycle + 1 < head_cycle)
+		return log->l_logsize - (head_bytes - tail_bytes);
+	if (tail_cycle + 1 < head_cycle)
 		return 0;
-	else if (tail_cycle < head_cycle) {
+
+	/* Ignore potential inconsistency when shutdown. */
+	if (xlog_is_shutdown(log))
+		return log->l_logsize;
+
+	if (tail_cycle < head_cycle) {
 		ASSERT(tail_cycle == (head_cycle - 1));
-		free_bytes = tail_bytes - head_bytes;
-	} else {
-		/*
-		 * The reservation head is behind the tail.
-		 * In this case we just want to return the size of the
-		 * log as the amount of space left.
-		 */
-		xfs_alert(log->l_mp, "xlog_space_left: head behind tail");
-		xfs_alert(log->l_mp,
-			  "  tail_cycle = %d, tail_bytes = %d",
-			  tail_cycle, tail_bytes);
-		xfs_alert(log->l_mp,
-			  "  GH   cycle = %d, GH   bytes = %d",
-			  head_cycle, head_bytes);
-		ASSERT(0);
-		free_bytes = log->l_logsize;
+		return tail_bytes - head_bytes;
 	}
-	return free_bytes;
+
+	/*
+	 * The reservation head is behind the tail. In this case we just want to
+	 * return the size of the log as the amount of space left.
+	 */
+	xfs_alert(log->l_mp, "xlog_space_left: head behind tail");
+	xfs_alert(log->l_mp, "  tail_cycle = %d, tail_bytes = %d",
+		  tail_cycle, tail_bytes);
+	xfs_alert(log->l_mp, "  GH   cycle = %d, GH   bytes = %d",
+		  head_cycle, head_bytes);
+	ASSERT(0);
+	return log->l_logsize;
 }
 
 
@@ -1349,6 +1427,32 @@ xfs_log_work_queue(
 }
 
 /*
+ * Clear the log incompat flags if we have the opportunity.
+ *
+ * This only happens if we're about to log the second dummy transaction as part
+ * of covering the log and we can get the log incompat feature usage lock.
+ */
+static inline void
+xlog_clear_incompat(
+	struct xlog		*log)
+{
+	struct xfs_mount	*mp = log->l_mp;
+
+	if (!xfs_sb_has_incompat_log_feature(&mp->m_sb,
+				XFS_SB_FEAT_INCOMPAT_LOG_ALL))
+		return;
+
+	if (log->l_covered_state != XLOG_STATE_COVER_DONE2)
+		return;
+
+	if (!down_write_trylock(&log->l_incompat_users))
+		return;
+
+	xfs_clear_incompat_log_features(mp);
+	up_write(&log->l_incompat_users);
+}
+
+/*
  * Every sync period we need to unpin all items in the AIL and push them to
  * disk. If there is nothing dirty, then we might need to cover the log to
  * indicate that the filesystem is idle.
@@ -1374,6 +1478,7 @@ xfs_log_worker(
 		 * synchronously log the superblock instead to ensure the
 		 * superblock is immediately unpinned and can be written back.
 		 */
+		xlog_clear_incompat(log);
 		xfs_sync_sb(mp, true);
 	} else
 		xfs_log_force(mp, 0);
@@ -1417,7 +1522,7 @@ xlog_alloc_log(
 	log->l_logBBstart  = blk_offset;
 	log->l_logBBsize   = num_bblks;
 	log->l_covered_state = XLOG_STATE_COVER_IDLE;
-	log->l_flags	   |= XLOG_ACTIVE_RECOVERY;
+	set_bit(XLOG_ACTIVE_RECOVERY, &log->l_opstate);
 	INIT_DELAYED_WORK(&log->l_work, xfs_log_worker);
 
 	log->l_prev_block  = -1;
@@ -1426,7 +1531,7 @@ xlog_alloc_log(
 	xlog_assign_atomic_lsn(&log->l_last_sync_lsn, 1, 0);
 	log->l_curr_cycle  = 1;	    /* 0 is bad since this is initial value */
 
-	if (xfs_sb_version_haslogv2(&mp->m_sb) && mp->m_sb.sb_logsunit > 1)
+	if (xfs_has_logv2(mp) && mp->m_sb.sb_logsunit > 1)
 		log->l_iclog_roundoff = mp->m_sb.sb_logsunit;
 	else
 		log->l_iclog_roundoff = BBSIZE;
@@ -1435,7 +1540,7 @@ xlog_alloc_log(
 	xlog_grant_head_init(&log->l_write_head);
 
 	error = -EFSCORRUPTED;
-	if (xfs_sb_version_hassector(&mp->m_sb)) {
+	if (xfs_has_sector(mp)) {
 	        log2_size = mp->m_sb.sb_logsectlog;
 		if (log2_size < BBSHIFT) {
 			xfs_warn(mp, "Log sector size too small (0x%x < 0x%x)",
@@ -1452,7 +1557,7 @@ xlog_alloc_log(
 
 		/* for larger sector sizes, must have v2 or external log */
 		if (log2_size && log->l_logBBstart > 0 &&
-			    !xfs_sb_version_haslogv2(&mp->m_sb)) {
+			    !xfs_has_logv2(mp)) {
 			xfs_warn(mp,
 		"log sector size (0x%x) invalid for configuration.",
 				log2_size);
@@ -1461,6 +1566,8 @@ xlog_alloc_log(
 	}
 	log->l_sectBBsize = 1 << log2_size;
 
+	init_rwsem(&log->l_incompat_users);
+
 	xlog_get_iclog_buffer_size(mp, log);
 
 	spin_lock_init(&log->l_icloglock);
@@ -1476,7 +1583,6 @@ xlog_alloc_log(
 	 */
 	ASSERT(log->l_iclog_size >= 4096);
 	for (i = 0; i < log->l_iclog_bufs; i++) {
-		int align_mask = xfs_buftarg_dma_alignment(mp->m_logdev_targp);
 		size_t bvec_size = howmany(log->l_iclog_size, PAGE_SIZE) *
 				sizeof(struct bio_vec);
 
@@ -1488,8 +1594,8 @@ xlog_alloc_log(
 		iclog->ic_prev = prev_iclog;
 		prev_iclog = iclog;
 
-		iclog->ic_data = kmem_alloc_io(log->l_iclog_size, align_mask,
-						KM_MAYFAIL | KM_ZERO);
+		iclog->ic_data = kvzalloc(log->l_iclog_size,
+				GFP_KERNEL | __GFP_RETRY_MAYFAIL);
 		if (!iclog->ic_data)
 			goto out_free_iclog;
 #ifdef DEBUG
@@ -1499,7 +1605,7 @@ xlog_alloc_log(
 		memset(head, 0, sizeof(xlog_rec_header_t));
 		head->h_magicno = cpu_to_be32(XLOG_HEADER_MAGIC_NUM);
 		head->h_version = cpu_to_be32(
-			xfs_sb_version_haslogv2(&log->l_mp->m_sb) ? 2 : 1);
+			xfs_has_logv2(log->l_mp) ? 2 : 1);
 		head->h_size = cpu_to_be32(log->l_iclog_size);
 		/* new fields */
 		head->h_fmt = cpu_to_be32(XLOG_FMT);
@@ -1551,37 +1657,6 @@ out:
 }	/* xlog_alloc_log */
 
 /*
- * Write out the commit record of a transaction associated with the given
- * ticket to close off a running log write. Return the lsn of the commit record.
- */
-int
-xlog_commit_record(
-	struct xlog		*log,
-	struct xlog_ticket	*ticket,
-	struct xlog_in_core	**iclog,
-	xfs_lsn_t		*lsn)
-{
-	struct xfs_log_iovec reg = {
-		.i_addr = NULL,
-		.i_len = 0,
-		.i_type = XLOG_REG_TYPE_COMMIT,
-	};
-	struct xfs_log_vec vec = {
-		.lv_niovecs = 1,
-		.lv_iovecp = &reg,
-	};
-	int	error;
-
-	if (XLOG_FORCED_SHUTDOWN(log))
-		return -EIO;
-
-	error = xlog_write(log, &vec, ticket, lsn, iclog, XLOG_COMMIT_TRANS);
-	if (error)
-		xfs_force_shutdown(log->l_mp, SHUTDOWN_LOG_IO_ERROR);
-	return error;
-}
-
-/*
  * Compute the LSN that we'd need to push the log tail towards in order to have
  * (a) enough on-disk log space to log the number of bytes specified, (b) at
  * least 25% of the log space free, and (c) at least 256 blocks free.  If the
@@ -1653,7 +1728,7 @@ xlog_grant_push_ail(
 	xfs_lsn_t	threshold_lsn;
 
 	threshold_lsn = xlog_grant_push_threshold(log, need_bytes);
-	if (threshold_lsn == NULLCOMMITLSN || XLOG_FORCED_SHUTDOWN(log))
+	if (threshold_lsn == NULLCOMMITLSN || xlog_is_shutdown(log))
 		return;
 
 	/*
@@ -1689,7 +1764,7 @@ xlog_pack_data(
 		dp += BBSIZE;
 	}
 
-	if (xfs_sb_version_haslogv2(&log->l_mp->m_sb)) {
+	if (xfs_has_logv2(log->l_mp)) {
 		xlog_in_core_2_t *xhdr = iclog->ic_data;
 
 		for ( ; i < BTOBB(size); i++) {
@@ -1726,7 +1801,7 @@ xlog_cksum(
 			      offsetof(struct xlog_rec_header, h_crc));
 
 	/* ... then for additional cycle data for v2 logs ... */
-	if (xfs_sb_version_haslogv2(&log->l_mp->m_sb)) {
+	if (xfs_has_logv2(log->l_mp)) {
 		union xlog_in_core2 *xhdr = (union xlog_in_core2 *)rhead;
 		int		i;
 		int		xheads;
@@ -1795,7 +1870,7 @@ xlog_write_iclog(
 	 * across the log IO to archieve that.
 	 */
 	down(&iclog->ic_sema);
-	if (unlikely(iclog->ic_state == XLOG_STATE_IOERROR)) {
+	if (xlog_is_shutdown(log)) {
 		/*
 		 * It would seem logical to return EIO here, but we rely on
 		 * the log state machine to propagate I/O errors instead of
@@ -1953,7 +2028,7 @@ xlog_sync(
 
 	/* real byte length */
 	size = iclog->ic_offset;
-	if (xfs_sb_version_haslogv2(&log->l_mp->m_sb))
+	if (xfs_has_logv2(log->l_mp))
 		size += roundoff;
 	iclog->ic_header.h_len = cpu_to_be32(size);
 
@@ -2303,8 +2378,7 @@ xlog_write_copy_finish(
 	int			*data_cnt,
 	int			*partial_copy,
 	int			*partial_copy_len,
-	int			log_offset,
-	struct xlog_in_core	**commit_iclog)
+	int			log_offset)
 {
 	int			error;
 
@@ -2323,27 +2397,20 @@ xlog_write_copy_finish(
 	*partial_copy = 0;
 	*partial_copy_len = 0;
 
-	if (iclog->ic_size - log_offset <= sizeof(xlog_op_header_t)) {
-		/* no more space in this iclog - push it. */
-		spin_lock(&log->l_icloglock);
-		xlog_state_finish_copy(log, iclog, *record_cnt, *data_cnt);
-		*record_cnt = 0;
-		*data_cnt = 0;
-
-		if (iclog->ic_state == XLOG_STATE_ACTIVE)
-			xlog_state_switch_iclogs(log, iclog, 0);
-		else
-			ASSERT(iclog->ic_state == XLOG_STATE_WANT_SYNC ||
-			       iclog->ic_state == XLOG_STATE_IOERROR);
-		if (!commit_iclog)
-			goto release_iclog;
-		spin_unlock(&log->l_icloglock);
-		ASSERT(flags & XLOG_COMMIT_TRANS);
-		*commit_iclog = iclog;
-	}
+	if (iclog->ic_size - log_offset > sizeof(xlog_op_header_t))
+		return 0;
 
-	return 0;
+	/* no more space in this iclog - push it. */
+	spin_lock(&log->l_icloglock);
+	xlog_state_finish_copy(log, iclog, *record_cnt, *data_cnt);
+	*record_cnt = 0;
+	*data_cnt = 0;
 
+	if (iclog->ic_state == XLOG_STATE_ACTIVE)
+		xlog_state_switch_iclogs(log, iclog, 0);
+	else
+		ASSERT(iclog->ic_state == XLOG_STATE_WANT_SYNC ||
+			xlog_is_shutdown(log));
 release_iclog:
 	error = xlog_state_release_iclog(log, iclog, 0);
 	spin_unlock(&log->l_icloglock);
@@ -2393,10 +2460,9 @@ release_iclog:
 int
 xlog_write(
 	struct xlog		*log,
+	struct xfs_cil_ctx	*ctx,
 	struct xfs_log_vec	*log_vector,
 	struct xlog_ticket	*ticket,
-	xfs_lsn_t		*start_lsn,
-	struct xlog_in_core	**commit_iclog,
 	uint			optype)
 {
 	struct xlog_in_core	*iclog = NULL;
@@ -2426,8 +2492,6 @@ xlog_write(
 	}
 
 	len = xlog_write_calc_vec_length(ticket, log_vector, optype);
-	if (start_lsn)
-		*start_lsn = 0;
 	while (lv && (!lv->lv_niovecs || index < lv->lv_niovecs)) {
 		void		*ptr;
 		int		log_offset;
@@ -2440,9 +2504,15 @@ xlog_write(
 		ASSERT(log_offset <= iclog->ic_size - 1);
 		ptr = iclog->ic_datap + log_offset;
 
-		/* Start_lsn is the first lsn written to. */
-		if (start_lsn && !*start_lsn)
-			*start_lsn = be64_to_cpu(iclog->ic_header.h_lsn);
+		/*
+		 * If we have a context pointer, pass it the first iclog we are
+		 * writing to so it can record state needed for iclog write
+		 * ordering.
+		 */
+		if (ctx) {
+			xlog_cil_set_ctx_write_state(ctx, iclog);
+			ctx = NULL;
+		}
 
 		/*
 		 * This loop writes out as many regions as can fit in the amount
@@ -2521,8 +2591,7 @@ xlog_write(
 						       &record_cnt, &data_cnt,
 						       &partial_copy,
 						       &partial_copy_len,
-						       log_offset,
-						       commit_iclog);
+						       log_offset);
 			if (error)
 				return error;
 
@@ -2560,12 +2629,7 @@ next_lv:
 
 	spin_lock(&log->l_icloglock);
 	xlog_state_finish_copy(log, iclog, record_cnt, data_cnt);
-	if (commit_iclog) {
-		ASSERT(optype & XLOG_COMMIT_TRANS);
-		*commit_iclog = iclog;
-	} else {
-		error = xlog_state_release_iclog(log, iclog, 0);
-	}
+	error = xlog_state_release_iclog(log, iclog, 0);
 	spin_unlock(&log->l_icloglock);
 
 	return error;
@@ -2751,8 +2815,7 @@ xlog_state_set_callback(
 static bool
 xlog_state_iodone_process_iclog(
 	struct xlog		*log,
-	struct xlog_in_core	*iclog,
-	bool			*ioerror)
+	struct xlog_in_core	*iclog)
 {
 	xfs_lsn_t		lowest_lsn;
 	xfs_lsn_t		header_lsn;
@@ -2764,15 +2827,6 @@ xlog_state_iodone_process_iclog(
 		 * Skip all iclogs in the ACTIVE & DIRTY states:
 		 */
 		return false;
-	case XLOG_STATE_IOERROR:
-		/*
-		 * Between marking a filesystem SHUTDOWN and stopping the log,
-		 * we do flush all iclogs to disk (if there wasn't a log I/O
-		 * error). So, we do want things to go smoothly in case of just
-		 * a SHUTDOWN w/o a LOG_IO_ERROR.
-		 */
-		*ioerror = true;
-		return false;
 	case XLOG_STATE_DONE_SYNC:
 		/*
 		 * Now that we have an iclog that is in the DONE_SYNC state, do
@@ -2796,72 +2850,75 @@ xlog_state_iodone_process_iclog(
 	}
 }
 
-STATIC void
-xlog_state_do_callback(
+/*
+ * Loop over all the iclogs, running attached callbacks on them. Return true if
+ * we ran any callbacks, indicating that we dropped the icloglock. We don't need
+ * to handle transient shutdown state here at all because
+ * xlog_state_shutdown_callbacks() will be run to do the necessary shutdown
+ * cleanup of the callbacks.
+ */
+static bool
+xlog_state_do_iclog_callbacks(
 	struct xlog		*log)
+		__releases(&log->l_icloglock)
+		__acquires(&log->l_icloglock)
 {
-	struct xlog_in_core	*iclog;
-	struct xlog_in_core	*first_iclog;
-	bool			cycled_icloglock;
-	bool			ioerror;
-	int			flushcnt = 0;
-	int			repeats = 0;
+	struct xlog_in_core	*first_iclog = log->l_iclog;
+	struct xlog_in_core	*iclog = first_iclog;
+	bool			ran_callback = false;
 
-	spin_lock(&log->l_icloglock);
 	do {
-		/*
-		 * Scan all iclogs starting with the one pointed to by the
-		 * log.  Reset this starting point each time the log is
-		 * unlocked (during callbacks).
-		 *
-		 * Keep looping through iclogs until one full pass is made
-		 * without running any callbacks.
-		 */
-		first_iclog = log->l_iclog;
-		iclog = log->l_iclog;
-		cycled_icloglock = false;
-		ioerror = false;
-		repeats++;
+		LIST_HEAD(cb_list);
 
-		do {
-			LIST_HEAD(cb_list);
+		if (xlog_state_iodone_process_iclog(log, iclog))
+			break;
+		if (iclog->ic_state != XLOG_STATE_CALLBACK) {
+			iclog = iclog->ic_next;
+			continue;
+		}
+		list_splice_init(&iclog->ic_callbacks, &cb_list);
+		spin_unlock(&log->l_icloglock);
 
-			if (xlog_state_iodone_process_iclog(log, iclog,
-							&ioerror))
-				break;
+		trace_xlog_iclog_callbacks_start(iclog, _RET_IP_);
+		xlog_cil_process_committed(&cb_list);
+		trace_xlog_iclog_callbacks_done(iclog, _RET_IP_);
+		ran_callback = true;
 
-			if (iclog->ic_state != XLOG_STATE_CALLBACK &&
-			    iclog->ic_state != XLOG_STATE_IOERROR) {
-				iclog = iclog->ic_next;
-				continue;
-			}
-			list_splice_init(&iclog->ic_callbacks, &cb_list);
-			spin_unlock(&log->l_icloglock);
+		spin_lock(&log->l_icloglock);
+		xlog_state_clean_iclog(log, iclog);
+		iclog = iclog->ic_next;
+	} while (iclog != first_iclog);
+
+	return ran_callback;
+}
 
-			trace_xlog_iclog_callbacks_start(iclog, _RET_IP_);
-			xlog_cil_process_committed(&cb_list);
-			trace_xlog_iclog_callbacks_done(iclog, _RET_IP_);
-			cycled_icloglock = true;
 
-			spin_lock(&log->l_icloglock);
-			if (XLOG_FORCED_SHUTDOWN(log))
-				wake_up_all(&iclog->ic_force_wait);
-			else
-				xlog_state_clean_iclog(log, iclog);
-			iclog = iclog->ic_next;
-		} while (first_iclog != iclog);
+/*
+ * Loop running iclog completion callbacks until there are no more iclogs in a
+ * state that can run callbacks.
+ */
+STATIC void
+xlog_state_do_callback(
+	struct xlog		*log)
+{
+	int			flushcnt = 0;
+	int			repeats = 0;
+
+	spin_lock(&log->l_icloglock);
+	while (xlog_state_do_iclog_callbacks(log)) {
+		if (xlog_is_shutdown(log))
+			break;
 
-		if (repeats > 5000) {
+		if (++repeats > 5000) {
 			flushcnt += repeats;
 			repeats = 0;
 			xfs_warn(log->l_mp,
 				"%s: possible infinite loop (%d iterations)",
 				__func__, flushcnt);
 		}
-	} while (!ioerror && cycled_icloglock);
+	}
 
-	if (log->l_iclog->ic_state == XLOG_STATE_ACTIVE ||
-	    log->l_iclog->ic_state == XLOG_STATE_IOERROR)
+	if (log->l_iclog->ic_state == XLOG_STATE_ACTIVE)
 		wake_up_all(&log->l_flush_wait);
 
 	spin_unlock(&log->l_icloglock);
@@ -2871,13 +2928,6 @@ xlog_state_do_callback(
 /*
  * Finish transitioning this iclog to the dirty state.
  *
- * Make sure that we completely execute this routine only when this is
- * the last call to the iclog.  There is a good chance that iclog flushes,
- * when we reach the end of the physical log, get turned into 2 separate
- * calls to bwrite.  Hence, one iclog flush could generate two calls to this
- * routine.  By using the reference count bwritecnt, we guarantee that only
- * the second completion goes through.
- *
  * Callbacks could take time, so they are done outside the scope of the
  * global state machine log lock.
  */
@@ -2896,7 +2946,7 @@ xlog_state_done_syncing(
 	 * split log writes, on the second, we shut down the file system and
 	 * no iclogs should ever be attempted to be written to disk again.
 	 */
-	if (!XLOG_FORCED_SHUTDOWN(log)) {
+	if (!xlog_is_shutdown(log)) {
 		ASSERT(iclog->ic_state == XLOG_STATE_SYNCING);
 		iclog->ic_state = XLOG_STATE_DONE_SYNC;
 	}
@@ -2944,7 +2994,7 @@ xlog_state_get_iclog_space(
 
 restart:
 	spin_lock(&log->l_icloglock);
-	if (XLOG_FORCED_SHUTDOWN(log)) {
+	if (xlog_is_shutdown(log)) {
 		spin_unlock(&log->l_icloglock);
 		return -EIO;
 	}
@@ -3122,7 +3172,7 @@ xfs_log_ticket_ungrant(
  * This routine will mark the current iclog in the ring as WANT_SYNC and move
  * the current iclog pointer to the next iclog in the ring.
  */
-STATIC void
+void
 xlog_state_switch_iclogs(
 	struct xlog		*log,
 	struct xlog_in_core	*iclog,
@@ -3237,10 +3287,10 @@ xfs_log_force(
 	xlog_cil_force(log);
 
 	spin_lock(&log->l_icloglock);
-	iclog = log->l_iclog;
-	if (iclog->ic_state == XLOG_STATE_IOERROR)
+	if (xlog_is_shutdown(log))
 		goto out_error;
 
+	iclog = log->l_iclog;
 	trace_xlog_iclog_force(iclog, _RET_IP_);
 
 	if (iclog->ic_state == XLOG_STATE_DIRTY ||
@@ -3294,6 +3344,20 @@ out_error:
 	return -EIO;
 }
 
+/*
+ * Force the log to a specific LSN.
+ *
+ * If an iclog with that lsn can be found:
+ *	If it is in the DIRTY state, just return.
+ *	If it is in the ACTIVE state, move the in-core log into the WANT_SYNC
+ *		state and go to sleep or return.
+ *	If it is in any other state, go to sleep or return.
+ *
+ * Synchronous forces are implemented with a wait queue.  All callers trying
+ * to force a given lsn to disk must wait on the queue attached to the
+ * specific in-core log.  When given in-core log finally completes its write
+ * to disk, that thread will wake up all threads waiting on the queue.
+ */
 static int
 xlog_force_lsn(
 	struct xlog		*log,
@@ -3306,10 +3370,10 @@ xlog_force_lsn(
 	bool			completed;
 
 	spin_lock(&log->l_icloglock);
-	iclog = log->l_iclog;
-	if (iclog->ic_state == XLOG_STATE_IOERROR)
+	if (xlog_is_shutdown(log))
 		goto out_error;
 
+	iclog = log->l_iclog;
 	while (be64_to_cpu(iclog->ic_header.h_lsn) != lsn) {
 		trace_xlog_iclog_force_lsn(iclog, _RET_IP_);
 		iclog = iclog->ic_next;
@@ -3379,18 +3443,13 @@ out_error:
 }
 
 /*
- * Force the in-core log to disk for a specific LSN.
- *
- * Find in-core log with lsn.
- *	If it is in the DIRTY state, just return.
- *	If it is in the ACTIVE state, move the in-core log into the WANT_SYNC
- *		state and go to sleep or return.
- *	If it is in any other state, go to sleep or return.
+ * Force the log to a specific checkpoint sequence.
  *
- * Synchronous forces are implemented with a wait queue.  All callers trying
- * to force a given lsn to disk must wait on the queue attached to the
- * specific in-core log.  When given in-core log finally completes its write
- * to disk, that thread will wake up all threads waiting on the queue.
+ * First force the CIL so that all the required changes have been flushed to the
+ * iclogs. If the CIL force completed it will return a commit LSN that indicates
+ * the iclog that needs to be flushed to stable storage. If the caller needs
+ * a synchronous log force, we will wait on the iclog with the LSN returned by
+ * xlog_cil_force_seq() to be completed.
  */
 int
 xfs_log_force_seq(
@@ -3619,17 +3678,15 @@ xlog_verify_grant_tail(
 	xlog_crack_atomic_lsn(&log->l_tail_lsn, &tail_cycle, &tail_blocks);
 	if (tail_cycle != cycle) {
 		if (cycle - 1 != tail_cycle &&
-		    !(log->l_flags & XLOG_TAIL_WARN)) {
+		    !test_and_set_bit(XLOG_TAIL_WARN, &log->l_opstate)) {
 			xfs_alert_tag(log->l_mp, XFS_PTAG_LOGRES,
 				"%s: cycle - 1 != tail_cycle", __func__);
-			log->l_flags |= XLOG_TAIL_WARN;
 		}
 
 		if (space > BBTOB(tail_blocks) &&
-		    !(log->l_flags & XLOG_TAIL_WARN)) {
+		    !test_and_set_bit(XLOG_TAIL_WARN, &log->l_opstate)) {
 			xfs_alert_tag(log->l_mp, XFS_PTAG_LOGRES,
 				"%s: space > BBTOB(tail_blocks)", __func__);
-			log->l_flags |= XLOG_TAIL_WARN;
 		}
 	}
 }
@@ -3765,105 +3822,66 @@ xlog_verify_iclog(
 #endif
 
 /*
- * Mark all iclogs IOERROR. l_icloglock is held by the caller.
- */
-STATIC int
-xlog_state_ioerror(
-	struct xlog	*log)
-{
-	xlog_in_core_t	*iclog, *ic;
-
-	iclog = log->l_iclog;
-	if (iclog->ic_state != XLOG_STATE_IOERROR) {
-		/*
-		 * Mark all the incore logs IOERROR.
-		 * From now on, no log flushes will result.
-		 */
-		ic = iclog;
-		do {
-			ic->ic_state = XLOG_STATE_IOERROR;
-			ic = ic->ic_next;
-		} while (ic != iclog);
-		return 0;
-	}
-	/*
-	 * Return non-zero, if state transition has already happened.
-	 */
-	return 1;
-}
-
-/*
- * This is called from xfs_force_shutdown, when we're forcibly
- * shutting down the filesystem, typically because of an IO error.
+ * Perform a forced shutdown on the log. This should be called once and once
+ * only by the high level filesystem shutdown code to shut the log subsystem
+ * down cleanly.
+ *
  * Our main objectives here are to make sure that:
- *	a. if !logerror, flush the logs to disk. Anything modified
- *	   after this is ignored.
- *	b. the filesystem gets marked 'SHUTDOWN' for all interested
- *	   parties to find out, 'atomically'.
- *	c. those who're sleeping on log reservations, pinned objects and
- *	    other resources get woken up, and be told the bad news.
- *	d. nothing new gets queued up after (b) and (c) are done.
+ *	a. if the shutdown was not due to a log IO error, flush the logs to
+ *	   disk. Anything modified after this is ignored.
+ *	b. the log gets atomically marked 'XLOG_IO_ERROR' for all interested
+ *	   parties to find out. Nothing new gets queued after this is done.
+ *	c. Tasks sleeping on log reservations, pinned objects and
+ *	   other resources get woken up.
  *
- * Note: for the !logerror case we need to flush the regions held in memory out
- * to disk first. This needs to be done before the log is marked as shutdown,
- * otherwise the iclog writes will fail.
+ * Return true if the shutdown cause was a log IO error and we actually shut the
+ * log down.
  */
-int
-xfs_log_force_umount(
-	struct xfs_mount	*mp,
-	int			logerror)
+bool
+xlog_force_shutdown(
+	struct xlog	*log,
+	int		shutdown_flags)
 {
-	struct xlog	*log;
-	int		retval;
-
-	log = mp->m_log;
+	bool		log_error = (shutdown_flags & SHUTDOWN_LOG_IO_ERROR);
 
 	/*
-	 * If this happens during log recovery, don't worry about
-	 * locking; the log isn't open for business yet.
+	 * If this happens during log recovery then we aren't using the runtime
+	 * log mechanisms yet so there's nothing to shut down.
 	 */
-	if (!log ||
-	    log->l_flags & XLOG_ACTIVE_RECOVERY) {
-		mp->m_flags |= XFS_MOUNT_FS_SHUTDOWN;
-		if (mp->m_sb_bp)
-			mp->m_sb_bp->b_flags |= XBF_DONE;
-		return 0;
-	}
+	if (!log || xlog_in_recovery(log))
+		return false;
 
-	/*
-	 * Somebody could've already done the hard work for us.
-	 * No need to get locks for this.
-	 */
-	if (logerror && log->l_iclog->ic_state == XLOG_STATE_IOERROR) {
-		ASSERT(XLOG_FORCED_SHUTDOWN(log));
-		return 1;
-	}
+	ASSERT(!xlog_is_shutdown(log));
 
 	/*
 	 * Flush all the completed transactions to disk before marking the log
-	 * being shut down. We need to do it in this order to ensure that
-	 * completed operations are safely on disk before we shut down, and that
-	 * we don't have to issue any buffer IO after the shutdown flags are set
-	 * to guarantee this.
+	 * being shut down. We need to do this first as shutting down the log
+	 * before the force will prevent the log force from flushing the iclogs
+	 * to disk.
+	 *
+	 * Re-entry due to a log IO error shutdown during the log force is
+	 * prevented by the atomicity of higher level shutdown code.
 	 */
-	if (!logerror)
-		xfs_log_force(mp, XFS_LOG_SYNC);
+	if (!log_error)
+		xfs_log_force(log->l_mp, XFS_LOG_SYNC);
 
 	/*
-	 * mark the filesystem and the as in a shutdown state and wake
-	 * everybody up to tell them the bad news.
+	 * Atomically set the shutdown state. If the shutdown state is already
+	 * set, there someone else is performing the shutdown and so we are done
+	 * here. This should never happen because we should only ever get called
+	 * once by the first shutdown caller.
+	 *
+	 * Much of the log state machine transitions assume that shutdown state
+	 * cannot change once they hold the log->l_icloglock. Hence we need to
+	 * hold that lock here, even though we use the atomic test_and_set_bit()
+	 * operation to set the shutdown state.
 	 */
 	spin_lock(&log->l_icloglock);
-	mp->m_flags |= XFS_MOUNT_FS_SHUTDOWN;
-	if (mp->m_sb_bp)
-		mp->m_sb_bp->b_flags |= XBF_DONE;
-
-	/*
-	 * Mark the log and the iclogs with IO error flags to prevent any
-	 * further log IO from being issued or completed.
-	 */
-	log->l_flags |= XLOG_IO_ERROR;
-	retval = xlog_state_ioerror(log);
+	if (test_and_set_bit(XLOG_IO_ERROR, &log->l_opstate)) {
+		spin_unlock(&log->l_icloglock);
+		ASSERT(0);
+		return false;
+	}
 	spin_unlock(&log->l_icloglock);
 
 	/*
@@ -3883,12 +3901,12 @@ xfs_log_force_umount(
 	 * avoid races.
 	 */
 	spin_lock(&log->l_cilp->xc_push_lock);
+	wake_up_all(&log->l_cilp->xc_start_wait);
 	wake_up_all(&log->l_cilp->xc_commit_wait);
 	spin_unlock(&log->l_cilp->xc_push_lock);
-	xlog_state_do_callback(log);
+	xlog_state_shutdown_callbacks(log);
 
-	/* return non-zero if log IOERROR transition had already happened */
-	return retval;
+	return log_error;
 }
 
 STATIC int
@@ -3926,7 +3944,7 @@ xfs_log_check_lsn(
 	 * resets the in-core LSN. We can't validate in this mode, but
 	 * modifications are not allowed anyways so just return true.
 	 */
-	if (mp->m_flags & XFS_MOUNT_NORECOVERY)
+	if (xfs_has_norecovery(mp))
 		return true;
 
 	/*
@@ -3952,11 +3970,22 @@ xfs_log_check_lsn(
 	return valid;
 }
 
-bool
-xfs_log_in_recovery(
-	struct xfs_mount	*mp)
+/*
+ * Notify the log that we're about to start using a feature that is protected
+ * by a log incompat feature flag.  This will prevent log covering from
+ * clearing those flags.
+ */
+void
+xlog_use_incompat_feat(
+	struct xlog		*log)
 {
-	struct xlog		*log = mp->m_log;
+	down_read(&log->l_incompat_users);
+}
 
-	return log->l_flags & XLOG_ACTIVE_RECOVERY;
+/* Notify the log that we've finished using log incompat features. */
+void
+xlog_drop_incompat_feat(
+	struct xlog		*log)
+{
+	up_read(&log->l_incompat_users);
 }
diff --git a/fs/xfs/xfs_log.h b/fs/xfs/xfs_log.h
index 813b972e9788..dc1b77b92fc1 100644
--- a/fs/xfs/xfs_log.h
+++ b/fs/xfs/xfs_log.h
@@ -104,6 +104,7 @@ struct xlog_ticket;
 struct xfs_log_item;
 struct xfs_item_ops;
 struct xfs_trans;
+struct xlog;
 
 int	  xfs_log_force(struct xfs_mount *mp, uint flags);
 int	  xfs_log_force_seq(struct xfs_mount *mp, xfs_csn_t seq, uint flags,
@@ -125,7 +126,6 @@ int	  xfs_log_reserve(struct xfs_mount *mp,
 			  bool		   permanent);
 int	  xfs_log_regrant(struct xfs_mount *mp, struct xlog_ticket *tic);
 void      xfs_log_unmount(struct xfs_mount *mp);
-int	  xfs_log_force_umount(struct xfs_mount *mp, int logerror);
 bool	xfs_log_writable(struct xfs_mount *mp);
 
 struct xlog_ticket *xfs_log_ticket_get(struct xlog_ticket *ticket);
@@ -138,8 +138,11 @@ void	xfs_log_work_queue(struct xfs_mount *mp);
 int	xfs_log_quiesce(struct xfs_mount *mp);
 void	xfs_log_clean(struct xfs_mount *mp);
 bool	xfs_log_check_lsn(struct xfs_mount *, xfs_lsn_t);
-bool	xfs_log_in_recovery(struct xfs_mount *);
 
 xfs_lsn_t xlog_grant_push_threshold(struct xlog *log, int need_bytes);
+bool	  xlog_force_shutdown(struct xlog *log, int shutdown_flags);
+
+void xlog_use_incompat_feat(struct xlog *log);
+void xlog_drop_incompat_feat(struct xlog *log);
 
 #endif	/* __XFS_LOG_H__ */
diff --git a/fs/xfs/xfs_log_cil.c b/fs/xfs/xfs_log_cil.c
index 4c44bc3786c0..6c93c8ada6f3 100644
--- a/fs/xfs/xfs_log_cil.c
+++ b/fs/xfs/xfs_log_cil.c
@@ -48,6 +48,34 @@ xlog_cil_ticket_alloc(
 }
 
 /*
+ * Unavoidable forward declaration - xlog_cil_push_work() calls
+ * xlog_cil_ctx_alloc() itself.
+ */
+static void xlog_cil_push_work(struct work_struct *work);
+
+static struct xfs_cil_ctx *
+xlog_cil_ctx_alloc(void)
+{
+	struct xfs_cil_ctx	*ctx;
+
+	ctx = kmem_zalloc(sizeof(*ctx), KM_NOFS);
+	INIT_LIST_HEAD(&ctx->committing);
+	INIT_LIST_HEAD(&ctx->busy_extents);
+	INIT_WORK(&ctx->push_work, xlog_cil_push_work);
+	return ctx;
+}
+
+static void
+xlog_cil_ctx_switch(
+	struct xfs_cil		*cil,
+	struct xfs_cil_ctx	*ctx)
+{
+	ctx->sequence = ++cil->xc_current_sequence;
+	ctx->cil = cil;
+	cil->xc_ctx = ctx;
+}
+
+/*
  * After the first stage of log recovery is done, we know where the head and
  * tail of the log are. We need this log initialisation done before we can
  * initialise the first CIL checkpoint context.
@@ -185,7 +213,15 @@ xlog_cil_alloc_shadow_bufs(
 			 */
 			kmem_free(lip->li_lv_shadow);
 
-			lv = kmem_alloc_large(buf_size, KM_NOFS);
+			/*
+			 * We are in transaction context, which means this
+			 * allocation will pick up GFP_NOFS from the
+			 * memalloc_nofs_save/restore context the transaction
+			 * holds. This means we can use GFP_KERNEL here so the
+			 * generic kvmalloc() code will run vmalloc on
+			 * contiguous page allocation failure as we require.
+			 */
+			lv = kvmalloc(buf_size, GFP_KERNEL);
 			memset(lv, 0, xlog_cil_iovec_space(niovecs));
 
 			lv->lv_item = lip;
@@ -535,7 +571,7 @@ xlog_discard_busy_extents(
 	struct blk_plug		plug;
 	int			error = 0;
 
-	ASSERT(mp->m_flags & XFS_MOUNT_DISCARD);
+	ASSERT(xfs_has_discard(mp));
 
 	blk_start_plug(&plug);
 	list_for_each_entry(busyp, list, list) {
@@ -576,7 +612,7 @@ xlog_cil_committed(
 	struct xfs_cil_ctx	*ctx)
 {
 	struct xfs_mount	*mp = ctx->cil->xc_log->l_mp;
-	bool			abort = XLOG_FORCED_SHUTDOWN(ctx->cil->xc_log);
+	bool			abort = xlog_is_shutdown(ctx->cil->xc_log);
 
 	/*
 	 * If the I/O failed, we're aborting the commit and already shutdown.
@@ -587,6 +623,7 @@ xlog_cil_committed(
 	 */
 	if (abort) {
 		spin_lock(&ctx->cil->xc_push_lock);
+		wake_up_all(&ctx->cil->xc_start_wait);
 		wake_up_all(&ctx->cil->xc_commit_wait);
 		spin_unlock(&ctx->cil->xc_push_lock);
 	}
@@ -596,7 +633,7 @@ xlog_cil_committed(
 
 	xfs_extent_busy_sort(&ctx->busy_extents);
 	xfs_extent_busy_clear(mp, &ctx->busy_extents,
-			     (mp->m_flags & XFS_MOUNT_DISCARD) && !abort);
+			      xfs_has_discard(mp) && !abort);
 
 	spin_lock(&ctx->cil->xc_push_lock);
 	list_del(&ctx->committing);
@@ -624,6 +661,180 @@ xlog_cil_process_committed(
 }
 
 /*
+* Record the LSN of the iclog we were just granted space to start writing into.
+* If the context doesn't have a start_lsn recorded, then this iclog will
+* contain the start record for the checkpoint. Otherwise this write contains
+* the commit record for the checkpoint.
+*/
+void
+xlog_cil_set_ctx_write_state(
+	struct xfs_cil_ctx	*ctx,
+	struct xlog_in_core	*iclog)
+{
+	struct xfs_cil		*cil = ctx->cil;
+	xfs_lsn_t		lsn = be64_to_cpu(iclog->ic_header.h_lsn);
+
+	ASSERT(!ctx->commit_lsn);
+	if (!ctx->start_lsn) {
+		spin_lock(&cil->xc_push_lock);
+		/*
+		 * The LSN we need to pass to the log items on transaction
+		 * commit is the LSN reported by the first log vector write, not
+		 * the commit lsn. If we use the commit record lsn then we can
+		 * move the tail beyond the grant write head.
+		 */
+		ctx->start_lsn = lsn;
+		wake_up_all(&cil->xc_start_wait);
+		spin_unlock(&cil->xc_push_lock);
+		return;
+	}
+
+	/*
+	 * Take a reference to the iclog for the context so that we still hold
+	 * it when xlog_write is done and has released it. This means the
+	 * context controls when the iclog is released for IO.
+	 */
+	atomic_inc(&iclog->ic_refcnt);
+
+	/*
+	 * xlog_state_get_iclog_space() guarantees there is enough space in the
+	 * iclog for an entire commit record, so we can attach the context
+	 * callbacks now.  This needs to be done before we make the commit_lsn
+	 * visible to waiters so that checkpoints with commit records in the
+	 * same iclog order their IO completion callbacks in the same order that
+	 * the commit records appear in the iclog.
+	 */
+	spin_lock(&cil->xc_log->l_icloglock);
+	list_add_tail(&ctx->iclog_entry, &iclog->ic_callbacks);
+	spin_unlock(&cil->xc_log->l_icloglock);
+
+	/*
+	 * Now we can record the commit LSN and wake anyone waiting for this
+	 * sequence to have the ordered commit record assigned to a physical
+	 * location in the log.
+	 */
+	spin_lock(&cil->xc_push_lock);
+	ctx->commit_iclog = iclog;
+	ctx->commit_lsn = lsn;
+	wake_up_all(&cil->xc_commit_wait);
+	spin_unlock(&cil->xc_push_lock);
+}
+
+
+/*
+ * Ensure that the order of log writes follows checkpoint sequence order. This
+ * relies on the context LSN being zero until the log write has guaranteed the
+ * LSN that the log write will start at via xlog_state_get_iclog_space().
+ */
+enum _record_type {
+	_START_RECORD,
+	_COMMIT_RECORD,
+};
+
+static int
+xlog_cil_order_write(
+	struct xfs_cil		*cil,
+	xfs_csn_t		sequence,
+	enum _record_type	record)
+{
+	struct xfs_cil_ctx	*ctx;
+
+restart:
+	spin_lock(&cil->xc_push_lock);
+	list_for_each_entry(ctx, &cil->xc_committing, committing) {
+		/*
+		 * Avoid getting stuck in this loop because we were woken by the
+		 * shutdown, but then went back to sleep once already in the
+		 * shutdown state.
+		 */
+		if (xlog_is_shutdown(cil->xc_log)) {
+			spin_unlock(&cil->xc_push_lock);
+			return -EIO;
+		}
+
+		/*
+		 * Higher sequences will wait for this one so skip them.
+		 * Don't wait for our own sequence, either.
+		 */
+		if (ctx->sequence >= sequence)
+			continue;
+
+		/* Wait until the LSN for the record has been recorded. */
+		switch (record) {
+		case _START_RECORD:
+			if (!ctx->start_lsn) {
+				xlog_wait(&cil->xc_start_wait, &cil->xc_push_lock);
+				goto restart;
+			}
+			break;
+		case _COMMIT_RECORD:
+			if (!ctx->commit_lsn) {
+				xlog_wait(&cil->xc_commit_wait, &cil->xc_push_lock);
+				goto restart;
+			}
+			break;
+		}
+	}
+	spin_unlock(&cil->xc_push_lock);
+	return 0;
+}
+
+/*
+ * Write out the log vector change now attached to the CIL context. This will
+ * write a start record that needs to be strictly ordered in ascending CIL
+ * sequence order so that log recovery will always use in-order start LSNs when
+ * replaying checkpoints.
+ */
+static int
+xlog_cil_write_chain(
+	struct xfs_cil_ctx	*ctx,
+	struct xfs_log_vec	*chain)
+{
+	struct xlog		*log = ctx->cil->xc_log;
+	int			error;
+
+	error = xlog_cil_order_write(ctx->cil, ctx->sequence, _START_RECORD);
+	if (error)
+		return error;
+	return xlog_write(log, ctx, chain, ctx->ticket, XLOG_START_TRANS);
+}
+
+/*
+ * Write out the commit record of a checkpoint transaction to close off a
+ * running log write. These commit records are strictly ordered in ascending CIL
+ * sequence order so that log recovery will always replay the checkpoints in the
+ * correct order.
+ */
+static int
+xlog_cil_write_commit_record(
+	struct xfs_cil_ctx	*ctx)
+{
+	struct xlog		*log = ctx->cil->xc_log;
+	struct xfs_log_iovec	reg = {
+		.i_addr = NULL,
+		.i_len = 0,
+		.i_type = XLOG_REG_TYPE_COMMIT,
+	};
+	struct xfs_log_vec	vec = {
+		.lv_niovecs = 1,
+		.lv_iovecp = &reg,
+	};
+	int			error;
+
+	if (xlog_is_shutdown(log))
+		return -EIO;
+
+	error = xlog_cil_order_write(ctx->cil, ctx->sequence, _COMMIT_RECORD);
+	if (error)
+		return error;
+
+	error = xlog_write(log, ctx, &vec, ctx->ticket, XLOG_COMMIT_TRANS);
+	if (error)
+		xfs_force_shutdown(log->l_mp, SHUTDOWN_LOG_IO_ERROR);
+	return error;
+}
+
+/*
  * Push the Committed Item List to the log.
  *
  * If the current sequence is the same as xc_push_seq we need to do a flush. If
@@ -641,13 +852,12 @@ static void
 xlog_cil_push_work(
 	struct work_struct	*work)
 {
-	struct xfs_cil		*cil =
-		container_of(work, struct xfs_cil, xc_push_work);
+	struct xfs_cil_ctx	*ctx =
+		container_of(work, struct xfs_cil_ctx, push_work);
+	struct xfs_cil		*cil = ctx->cil;
 	struct xlog		*log = cil->xc_log;
 	struct xfs_log_vec	*lv;
-	struct xfs_cil_ctx	*ctx;
 	struct xfs_cil_ctx	*new_ctx;
-	struct xlog_in_core	*commit_iclog;
 	struct xlog_ticket	*tic;
 	int			num_iovecs;
 	int			error = 0;
@@ -655,20 +865,21 @@ xlog_cil_push_work(
 	struct xfs_log_iovec	lhdr;
 	struct xfs_log_vec	lvhdr = { NULL };
 	xfs_lsn_t		preflush_tail_lsn;
-	xfs_lsn_t		commit_lsn;
 	xfs_csn_t		push_seq;
 	struct bio		bio;
 	DECLARE_COMPLETION_ONSTACK(bdev_flush);
+	bool			push_commit_stable;
 
-	new_ctx = kmem_zalloc(sizeof(*new_ctx), KM_NOFS);
+	new_ctx = xlog_cil_ctx_alloc();
 	new_ctx->ticket = xlog_cil_ticket_alloc(log);
 
 	down_write(&cil->xc_ctx_lock);
-	ctx = cil->xc_ctx;
 
 	spin_lock(&cil->xc_push_lock);
 	push_seq = cil->xc_push_seq;
 	ASSERT(push_seq <= ctx->sequence);
+	push_commit_stable = cil->xc_push_commit_stable;
+	cil->xc_push_commit_stable = false;
 
 	/*
 	 * As we are about to switch to a new, empty CIL context, we no longer
@@ -694,7 +905,7 @@ xlog_cil_push_work(
 
 
 	/* check for a previously pushed sequence */
-	if (push_seq < cil->xc_ctx->sequence) {
+	if (push_seq < ctx->sequence) {
 		spin_unlock(&cil->xc_push_lock);
 		goto out_skip;
 	}
@@ -767,19 +978,7 @@ xlog_cil_push_work(
 	}
 
 	/*
-	 * initialise the new context and attach it to the CIL. Then attach
-	 * the current context to the CIL committing list so it can be found
-	 * during log forces to extract the commit lsn of the sequence that
-	 * needs to be forced.
-	 */
-	INIT_LIST_HEAD(&new_ctx->committing);
-	INIT_LIST_HEAD(&new_ctx->busy_extents);
-	new_ctx->sequence = ctx->sequence + 1;
-	new_ctx->cil = cil;
-	cil->xc_ctx = new_ctx;
-
-	/*
-	 * The switch is now done, so we can drop the context lock and move out
+	 * Switch the contexts so we can drop the context lock and move out
 	 * of a shared context. We can't just go straight to the commit record,
 	 * though - we need to synchronise with previous and future commits so
 	 * that the commit records are correctly ordered in the log to ensure
@@ -804,7 +1003,7 @@ xlog_cil_push_work(
 	 * deferencing a freed context pointer.
 	 */
 	spin_lock(&cil->xc_push_lock);
-	cil->xc_current_sequence = new_ctx->sequence;
+	xlog_cil_ctx_switch(cil, new_ctx);
 	spin_unlock(&cil->xc_push_lock);
 	up_write(&cil->xc_ctx_lock);
 
@@ -837,78 +1036,17 @@ xlog_cil_push_work(
 	 */
 	wait_for_completion(&bdev_flush);
 
-	error = xlog_write(log, &lvhdr, tic, &ctx->start_lsn, NULL,
-				XLOG_START_TRANS);
+	error = xlog_cil_write_chain(ctx, &lvhdr);
 	if (error)
 		goto out_abort_free_ticket;
 
-	/*
-	 * now that we've written the checkpoint into the log, strictly
-	 * order the commit records so replay will get them in the right order.
-	 */
-restart:
-	spin_lock(&cil->xc_push_lock);
-	list_for_each_entry(new_ctx, &cil->xc_committing, committing) {
-		/*
-		 * Avoid getting stuck in this loop because we were woken by the
-		 * shutdown, but then went back to sleep once already in the
-		 * shutdown state.
-		 */
-		if (XLOG_FORCED_SHUTDOWN(log)) {
-			spin_unlock(&cil->xc_push_lock);
-			goto out_abort_free_ticket;
-		}
-
-		/*
-		 * Higher sequences will wait for this one so skip them.
-		 * Don't wait for our own sequence, either.
-		 */
-		if (new_ctx->sequence >= ctx->sequence)
-			continue;
-		if (!new_ctx->commit_lsn) {
-			/*
-			 * It is still being pushed! Wait for the push to
-			 * complete, then start again from the beginning.
-			 */
-			xlog_wait(&cil->xc_commit_wait, &cil->xc_push_lock);
-			goto restart;
-		}
-	}
-	spin_unlock(&cil->xc_push_lock);
-
-	error = xlog_commit_record(log, tic, &commit_iclog, &commit_lsn);
+	error = xlog_cil_write_commit_record(ctx);
 	if (error)
 		goto out_abort_free_ticket;
 
 	xfs_log_ticket_ungrant(log, tic);
 
 	/*
-	 * Once we attach the ctx to the iclog, a shutdown can process the
-	 * iclog, run the callbacks and free the ctx. The only thing preventing
-	 * this potential UAF situation here is that we are holding the
-	 * icloglock. Hence we cannot access the ctx once we have attached the
-	 * callbacks and dropped the icloglock.
-	 */
-	spin_lock(&log->l_icloglock);
-	if (commit_iclog->ic_state == XLOG_STATE_IOERROR) {
-		spin_unlock(&log->l_icloglock);
-		goto out_abort;
-	}
-	ASSERT_ALWAYS(commit_iclog->ic_state == XLOG_STATE_ACTIVE ||
-		      commit_iclog->ic_state == XLOG_STATE_WANT_SYNC);
-	list_add_tail(&ctx->iclog_entry, &commit_iclog->ic_callbacks);
-
-	/*
-	 * now the checkpoint commit is complete and we've attached the
-	 * callbacks to the iclog we can assign the commit LSN to the context
-	 * and wake up anyone who is waiting for the commit to complete.
-	 */
-	spin_lock(&cil->xc_push_lock);
-	ctx->commit_lsn = commit_lsn;
-	wake_up_all(&cil->xc_commit_wait);
-	spin_unlock(&cil->xc_push_lock);
-
-	/*
 	 * If the checkpoint spans multiple iclogs, wait for all previous iclogs
 	 * to complete before we submit the commit_iclog. We can't use state
 	 * checks for this - ACTIVE can be either a past completed iclog or a
@@ -919,21 +1057,19 @@ restart:
 	 * wakeup until this commit_iclog is written to disk.  Hence we use the
 	 * iclog header lsn and compare it to the commit lsn to determine if we
 	 * need to wait on iclogs or not.
-	 *
-	 * NOTE: It is not safe to reference the ctx after this check as we drop
-	 * the icloglock if we have to wait for completion of other iclogs.
 	 */
-	if (ctx->start_lsn != commit_lsn) {
+	spin_lock(&log->l_icloglock);
+	if (ctx->start_lsn != ctx->commit_lsn) {
 		xfs_lsn_t	plsn;
 
-		plsn = be64_to_cpu(commit_iclog->ic_prev->ic_header.h_lsn);
-		if (plsn && XFS_LSN_CMP(plsn, commit_lsn) < 0) {
+		plsn = be64_to_cpu(ctx->commit_iclog->ic_prev->ic_header.h_lsn);
+		if (plsn && XFS_LSN_CMP(plsn, ctx->commit_lsn) < 0) {
 			/*
 			 * Waiting on ic_force_wait orders the completion of
 			 * iclogs older than ic_prev. Hence we only need to wait
 			 * on the most recent older iclog here.
 			 */
-			xlog_wait_on_iclog(commit_iclog->ic_prev);
+			xlog_wait_on_iclog(ctx->commit_iclog->ic_prev);
 			spin_lock(&log->l_icloglock);
 		}
 
@@ -941,16 +1077,27 @@ restart:
 		 * We need to issue a pre-flush so that the ordering for this
 		 * checkpoint is correctly preserved down to stable storage.
 		 */
-		commit_iclog->ic_flags |= XLOG_ICL_NEED_FLUSH;
+		ctx->commit_iclog->ic_flags |= XLOG_ICL_NEED_FLUSH;
 	}
 
 	/*
 	 * The commit iclog must be written to stable storage to guarantee
 	 * journal IO vs metadata writeback IO is correctly ordered on stable
 	 * storage.
+	 *
+	 * If the push caller needs the commit to be immediately stable and the
+	 * commit_iclog is not yet marked as XLOG_STATE_WANT_SYNC to indicate it
+	 * will be written when released, switch it's state to WANT_SYNC right
+	 * now.
 	 */
-	commit_iclog->ic_flags |= XLOG_ICL_NEED_FUA;
-	xlog_state_release_iclog(log, commit_iclog, preflush_tail_lsn);
+	ctx->commit_iclog->ic_flags |= XLOG_ICL_NEED_FUA;
+	if (push_commit_stable &&
+	    ctx->commit_iclog->ic_state == XLOG_STATE_ACTIVE)
+		xlog_state_switch_iclogs(log, ctx->commit_iclog, 0);
+	xlog_state_release_iclog(log, ctx->commit_iclog, preflush_tail_lsn);
+
+	/* Not safe to reference ctx now! */
+
 	spin_unlock(&log->l_icloglock);
 	return;
 
@@ -962,9 +1109,15 @@ out_skip:
 
 out_abort_free_ticket:
 	xfs_log_ticket_ungrant(log, tic);
-out_abort:
-	ASSERT(XLOG_FORCED_SHUTDOWN(log));
-	xlog_cil_committed(ctx);
+	ASSERT(xlog_is_shutdown(log));
+	if (!ctx->commit_iclog) {
+		xlog_cil_committed(ctx);
+		return;
+	}
+	spin_lock(&log->l_icloglock);
+	xlog_state_release_iclog(log, ctx->commit_iclog, 0);
+	/* Not safe to reference ctx now! */
+	spin_unlock(&log->l_icloglock);
 }
 
 /*
@@ -998,7 +1151,7 @@ xlog_cil_push_background(
 	spin_lock(&cil->xc_push_lock);
 	if (cil->xc_push_seq < cil->xc_current_sequence) {
 		cil->xc_push_seq = cil->xc_current_sequence;
-		queue_work(log->l_mp->m_cil_workqueue, &cil->xc_push_work);
+		queue_work(cil->xc_push_wq, &cil->xc_ctx->push_work);
 	}
 
 	/*
@@ -1034,13 +1187,26 @@ xlog_cil_push_background(
 /*
  * xlog_cil_push_now() is used to trigger an immediate CIL push to the sequence
  * number that is passed. When it returns, the work will be queued for
- * @push_seq, but it won't be completed. The caller is expected to do any
- * waiting for push_seq to complete if it is required.
+ * @push_seq, but it won't be completed.
+ *
+ * If the caller is performing a synchronous force, we will flush the workqueue
+ * to get previously queued work moving to minimise the wait time they will
+ * undergo waiting for all outstanding pushes to complete. The caller is
+ * expected to do the required waiting for push_seq to complete.
+ *
+ * If the caller is performing an async push, we need to ensure that the
+ * checkpoint is fully flushed out of the iclogs when we finish the push. If we
+ * don't do this, then the commit record may remain sitting in memory in an
+ * ACTIVE iclog. This then requires another full log force to push to disk,
+ * which defeats the purpose of having an async, non-blocking CIL force
+ * mechanism. Hence in this case we need to pass a flag to the push work to
+ * indicate it needs to flush the commit record itself.
  */
 static void
 xlog_cil_push_now(
 	struct xlog	*log,
-	xfs_lsn_t	push_seq)
+	xfs_lsn_t	push_seq,
+	bool		async)
 {
 	struct xfs_cil	*cil = log->l_cilp;
 
@@ -1050,7 +1216,8 @@ xlog_cil_push_now(
 	ASSERT(push_seq && push_seq <= cil->xc_current_sequence);
 
 	/* start on any pending background push to minimise wait time on it */
-	flush_work(&cil->xc_push_work);
+	if (!async)
+		flush_workqueue(cil->xc_push_wq);
 
 	/*
 	 * If the CIL is empty or we've already pushed the sequence then
@@ -1063,7 +1230,8 @@ xlog_cil_push_now(
 	}
 
 	cil->xc_push_seq = push_seq;
-	queue_work(log->l_mp->m_cil_workqueue, &cil->xc_push_work);
+	cil->xc_push_commit_stable = async;
+	queue_work(cil->xc_push_wq, &cil->xc_ctx->push_work);
 	spin_unlock(&cil->xc_push_lock);
 }
 
@@ -1116,7 +1284,7 @@ xlog_cil_commit(
 
 	xlog_cil_insert_items(log, tp);
 
-	if (regrant && !XLOG_FORCED_SHUTDOWN(log))
+	if (regrant && !xlog_is_shutdown(log))
 		xfs_log_ticket_regrant(log, tp->t_ticket);
 	else
 		xfs_log_ticket_ungrant(log, tp->t_ticket);
@@ -1148,11 +1316,26 @@ xlog_cil_commit(
 }
 
 /*
+ * Flush the CIL to stable storage but don't wait for it to complete. This
+ * requires the CIL push to ensure the commit record for the push hits the disk,
+ * but otherwise is no different to a push done from a log force.
+ */
+void
+xlog_cil_flush(
+	struct xlog	*log)
+{
+	xfs_csn_t	seq = log->l_cilp->xc_current_sequence;
+
+	trace_xfs_log_force(log->l_mp, seq, _RET_IP_);
+	xlog_cil_push_now(log, seq, true);
+}
+
+/*
  * Conditionally push the CIL based on the sequence passed in.
  *
- * We only need to push if we haven't already pushed the sequence
- * number given. Hence the only time we will trigger a push here is
- * if the push sequence is the same as the current context.
+ * We only need to push if we haven't already pushed the sequence number given.
+ * Hence the only time we will trigger a push here is if the push sequence is
+ * the same as the current context.
  *
  * We return the current commit lsn to allow the callers to determine if a
  * iclog flush is necessary following this call.
@@ -1168,13 +1351,17 @@ xlog_cil_force_seq(
 
 	ASSERT(sequence <= cil->xc_current_sequence);
 
+	if (!sequence)
+		sequence = cil->xc_current_sequence;
+	trace_xfs_log_force(log->l_mp, sequence, _RET_IP_);
+
 	/*
 	 * check to see if we need to force out the current context.
 	 * xlog_cil_push() handles racing pushes for the same sequence,
 	 * so no need to deal with it here.
 	 */
 restart:
-	xlog_cil_push_now(log, sequence);
+	xlog_cil_push_now(log, sequence, false);
 
 	/*
 	 * See if we can find a previous sequence still committing.
@@ -1189,7 +1376,7 @@ restart:
 		 * shutdown, but then went back to sleep once already in the
 		 * shutdown state.
 		 */
-		if (XLOG_FORCED_SHUTDOWN(log))
+		if (xlog_is_shutdown(log))
 			goto out_shutdown;
 		if (ctx->sequence > sequence)
 			continue;
@@ -1198,6 +1385,7 @@ restart:
 			 * It is still being pushed! Wait for the push to
 			 * complete, then start again from the beginning.
 			 */
+			XFS_STATS_INC(log->l_mp, xs_log_force_sleep);
 			xlog_wait(&cil->xc_commit_wait, &cil->xc_push_lock);
 			goto restart;
 		}
@@ -1282,32 +1470,35 @@ xlog_cil_init(
 	cil = kmem_zalloc(sizeof(*cil), KM_MAYFAIL);
 	if (!cil)
 		return -ENOMEM;
+	/*
+	 * Limit the CIL pipeline depth to 4 concurrent works to bound the
+	 * concurrency the log spinlocks will be exposed to.
+	 */
+	cil->xc_push_wq = alloc_workqueue("xfs-cil/%s",
+			XFS_WQFLAGS(WQ_FREEZABLE | WQ_MEM_RECLAIM | WQ_UNBOUND),
+			4, log->l_mp->m_super->s_id);
+	if (!cil->xc_push_wq)
+		goto out_destroy_cil;
 
-	ctx = kmem_zalloc(sizeof(*ctx), KM_MAYFAIL);
-	if (!ctx) {
-		kmem_free(cil);
-		return -ENOMEM;
-	}
-
-	INIT_WORK(&cil->xc_push_work, xlog_cil_push_work);
 	INIT_LIST_HEAD(&cil->xc_cil);
 	INIT_LIST_HEAD(&cil->xc_committing);
 	spin_lock_init(&cil->xc_cil_lock);
 	spin_lock_init(&cil->xc_push_lock);
 	init_waitqueue_head(&cil->xc_push_wait);
 	init_rwsem(&cil->xc_ctx_lock);
+	init_waitqueue_head(&cil->xc_start_wait);
 	init_waitqueue_head(&cil->xc_commit_wait);
-
-	INIT_LIST_HEAD(&ctx->committing);
-	INIT_LIST_HEAD(&ctx->busy_extents);
-	ctx->sequence = 1;
-	ctx->cil = cil;
-	cil->xc_ctx = ctx;
-	cil->xc_current_sequence = ctx->sequence;
-
 	cil->xc_log = log;
 	log->l_cilp = cil;
+
+	ctx = xlog_cil_ctx_alloc();
+	xlog_cil_ctx_switch(cil, ctx);
+
 	return 0;
+
+out_destroy_cil:
+	kmem_free(cil);
+	return -ENOMEM;
 }
 
 void
@@ -1321,6 +1512,7 @@ xlog_cil_destroy(
 	}
 
 	ASSERT(list_empty(&log->l_cilp->xc_cil));
+	destroy_workqueue(log->l_cilp->xc_push_wq);
 	kmem_free(log->l_cilp);
 }
 
diff --git a/fs/xfs/xfs_log_priv.h b/fs/xfs/xfs_log_priv.h
index f3e79a45d60a..844fbeec3545 100644
--- a/fs/xfs/xfs_log_priv.h
+++ b/fs/xfs/xfs_log_priv.h
@@ -12,15 +12,6 @@ struct xlog_ticket;
 struct xfs_mount;
 
 /*
- * Flags for log structure
- */
-#define XLOG_ACTIVE_RECOVERY	0x2	/* in the middle of recovery */
-#define	XLOG_RECOVERY_NEEDED	0x4	/* log was recovered */
-#define XLOG_IO_ERROR		0x8	/* log hit an I/O error, and being
-					   shutdown */
-#define XLOG_TAIL_WARN		0x10	/* log tail verify warning issued */
-
-/*
  * get client id from packed copy.
  *
  * this hack is here because the xlog_pack code copies four bytes
@@ -47,7 +38,6 @@ enum xlog_iclog_state {
 	XLOG_STATE_DONE_SYNC,	/* Done syncing to disk */
 	XLOG_STATE_CALLBACK,	/* Callback functions now */
 	XLOG_STATE_DIRTY,	/* Dirty IC log, not ready for ACTIVE status */
-	XLOG_STATE_IOERROR,	/* IO error happened in sync'ing log */
 };
 
 #define XLOG_STATE_STRINGS \
@@ -56,8 +46,7 @@ enum xlog_iclog_state {
 	{ XLOG_STATE_SYNCING,	"XLOG_STATE_SYNCING" }, \
 	{ XLOG_STATE_DONE_SYNC,	"XLOG_STATE_DONE_SYNC" }, \
 	{ XLOG_STATE_CALLBACK,	"XLOG_STATE_CALLBACK" }, \
-	{ XLOG_STATE_DIRTY,	"XLOG_STATE_DIRTY" }, \
-	{ XLOG_STATE_IOERROR,	"XLOG_STATE_IOERROR" }
+	{ XLOG_STATE_DIRTY,	"XLOG_STATE_DIRTY" }
 
 /*
  * In core log flags
@@ -251,6 +240,7 @@ struct xfs_cil_ctx {
 	xfs_csn_t		sequence;	/* chkpt sequence # */
 	xfs_lsn_t		start_lsn;	/* first LSN of chkpt commit */
 	xfs_lsn_t		commit_lsn;	/* chkpt commit record lsn */
+	struct xlog_in_core	*commit_iclog;
 	struct xlog_ticket	*ticket;	/* chkpt ticket */
 	int			nvecs;		/* number of regions */
 	int			space_used;	/* aggregate size of regions */
@@ -259,6 +249,7 @@ struct xfs_cil_ctx {
 	struct list_head	iclog_entry;
 	struct list_head	committing;	/* ctx committing list */
 	struct work_struct	discard_endio_work;
+	struct work_struct	push_work;
 };
 
 /*
@@ -281,16 +272,18 @@ struct xfs_cil {
 	struct xlog		*xc_log;
 	struct list_head	xc_cil;
 	spinlock_t		xc_cil_lock;
+	struct workqueue_struct	*xc_push_wq;
 
 	struct rw_semaphore	xc_ctx_lock ____cacheline_aligned_in_smp;
 	struct xfs_cil_ctx	*xc_ctx;
 
 	spinlock_t		xc_push_lock ____cacheline_aligned_in_smp;
 	xfs_csn_t		xc_push_seq;
+	bool			xc_push_commit_stable;
 	struct list_head	xc_committing;
 	wait_queue_head_t	xc_commit_wait;
+	wait_queue_head_t	xc_start_wait;
 	xfs_csn_t		xc_current_sequence;
-	struct work_struct	xc_push_work;
 	wait_queue_head_t	xc_push_wait;	/* background push throttle */
 } ____cacheline_aligned_in_smp;
 
@@ -407,7 +400,7 @@ struct xlog {
 	struct xfs_buftarg	*l_targ;        /* buftarg of log */
 	struct workqueue_struct	*l_ioend_workqueue; /* for I/O completions */
 	struct delayed_work	l_work;		/* background flush work */
-	uint			l_flags;
+	long			l_opstate;	/* operational state */
 	uint			l_quotaoffs_flag; /* XFS_DQ_*, for QUOTAOFFs */
 	struct list_head	*l_buf_cancel_table;
 	int			l_iclog_hsize;  /* size of iclog header */
@@ -456,13 +449,40 @@ struct xlog {
 	xfs_lsn_t		l_recovery_lsn;
 
 	uint32_t		l_iclog_roundoff;/* padding roundoff */
+
+	/* Users of log incompat features should take a read lock. */
+	struct rw_semaphore	l_incompat_users;
 };
 
 #define XLOG_BUF_CANCEL_BUCKET(log, blkno) \
 	((log)->l_buf_cancel_table + ((uint64_t)blkno % XLOG_BC_TABLE_SIZE))
 
-#define XLOG_FORCED_SHUTDOWN(log) \
-	(unlikely((log)->l_flags & XLOG_IO_ERROR))
+/*
+ * Bits for operational state
+ */
+#define XLOG_ACTIVE_RECOVERY	0	/* in the middle of recovery */
+#define XLOG_RECOVERY_NEEDED	1	/* log was recovered */
+#define XLOG_IO_ERROR		2	/* log hit an I/O error, and being
+				   shutdown */
+#define XLOG_TAIL_WARN		3	/* log tail verify warning issued */
+
+static inline bool
+xlog_recovery_needed(struct xlog *log)
+{
+	return test_bit(XLOG_RECOVERY_NEEDED, &log->l_opstate);
+}
+
+static inline bool
+xlog_in_recovery(struct xlog *log)
+{
+	return test_bit(XLOG_ACTIVE_RECOVERY, &log->l_opstate);
+}
+
+static inline bool
+xlog_is_shutdown(struct xlog *log)
+{
+	return test_bit(XLOG_IO_ERROR, &log->l_opstate);
+}
 
 /* common routines */
 extern int
@@ -496,14 +516,14 @@ xlog_write_adv_cnt(void **ptr, int *len, int *off, size_t bytes)
 
 void	xlog_print_tic_res(struct xfs_mount *mp, struct xlog_ticket *ticket);
 void	xlog_print_trans(struct xfs_trans *);
-int	xlog_write(struct xlog *log, struct xfs_log_vec *log_vector,
-		struct xlog_ticket *tic, xfs_lsn_t *start_lsn,
-		struct xlog_in_core **commit_iclog, uint optype);
-int	xlog_commit_record(struct xlog *log, struct xlog_ticket *ticket,
-		struct xlog_in_core **iclog, xfs_lsn_t *lsn);
+int	xlog_write(struct xlog *log, struct xfs_cil_ctx *ctx,
+		struct xfs_log_vec *log_vector, struct xlog_ticket *tic,
+		uint optype);
 void	xfs_log_ticket_ungrant(struct xlog *log, struct xlog_ticket *ticket);
 void	xfs_log_ticket_regrant(struct xlog *log, struct xlog_ticket *ticket);
 
+void xlog_state_switch_iclogs(struct xlog *log, struct xlog_in_core *iclog,
+		int eventual_size);
 int xlog_state_release_iclog(struct xlog *log, struct xlog_in_core *iclog,
 		xfs_lsn_t log_tail_lsn);
 
@@ -571,10 +591,14 @@ void	xlog_cil_destroy(struct xlog *log);
 bool	xlog_cil_empty(struct xlog *log);
 void	xlog_cil_commit(struct xlog *log, struct xfs_trans *tp,
 			xfs_csn_t *commit_seq, bool regrant);
+void	xlog_cil_set_ctx_write_state(struct xfs_cil_ctx *ctx,
+			struct xlog_in_core *iclog);
+
 
 /*
  * CIL force routines
  */
+void xlog_cil_flush(struct xlog *log);
 xfs_lsn_t xlog_cil_force_seq(struct xlog *log, xfs_csn_t sequence);
 
 static inline void
diff --git a/fs/xfs/xfs_log_recover.c b/fs/xfs/xfs_log_recover.c
index 1721fce2ec94..10562ecbd9ea 100644
--- a/fs/xfs/xfs_log_recover.c
+++ b/fs/xfs/xfs_log_recover.c
@@ -26,6 +26,8 @@
 #include "xfs_error.h"
 #include "xfs_buf_item.h"
 #include "xfs_ag.h"
+#include "xfs_quota.h"
+
 
 #define BLK_AVG(blk1, blk2)	((blk1+blk2) >> 1)
 
@@ -79,8 +81,6 @@ xlog_alloc_buffer(
 	struct xlog	*log,
 	int		nbblks)
 {
-	int align_mask = xfs_buftarg_dma_alignment(log->l_targ);
-
 	/*
 	 * Pass log block 0 since we don't have an addr yet, buffer will be
 	 * verified on read.
@@ -108,7 +108,7 @@ xlog_alloc_buffer(
 	if (nbblks > 1 && log->l_sectBBsize > 1)
 		nbblks += log->l_sectBBsize;
 	nbblks = round_up(nbblks, log->l_sectBBsize);
-	return kmem_alloc_io(BBTOB(nbblks), align_mask, KM_MAYFAIL | KM_ZERO);
+	return kvzalloc(BBTOB(nbblks), GFP_KERNEL | __GFP_RETRY_MAYFAIL);
 }
 
 /*
@@ -146,7 +146,7 @@ xlog_do_io(
 
 	error = xfs_rw_bdev(log->l_targ->bt_bdev, log->l_logBBstart + blk_no,
 			BBTOB(nbblks), data, op);
-	if (error && !XFS_FORCED_SHUTDOWN(log->l_mp)) {
+	if (error && !xlog_is_shutdown(log)) {
 		xfs_alert(log->l_mp,
 			  "log recovery %s I/O error at daddr 0x%llx len %d error %d",
 			  op == REQ_OP_WRITE ? "write" : "read",
@@ -375,7 +375,7 @@ out:
 static inline int
 xlog_logrec_hblks(struct xlog *log, struct xlog_rec_header *rh)
 {
-	if (xfs_sb_version_haslogv2(&log->l_mp->m_sb)) {
+	if (xfs_has_logv2(log->l_mp)) {
 		int	h_size = be32_to_cpu(rh->h_size);
 
 		if ((be32_to_cpu(rh->h_version) & XLOG_VERSION_2) &&
@@ -1347,7 +1347,7 @@ xlog_find_tail(
 	 * headers if we have a filesystem using non-persistent counters.
 	 */
 	if (clean)
-		log->l_mp->m_flags |= XFS_MOUNT_WAS_CLEAN;
+		set_bit(XFS_OPSTATE_CLEAN, &log->l_mp->m_opstate);
 
 	/*
 	 * Make sure that there are no blocks in front of the head
@@ -1504,7 +1504,7 @@ xlog_add_record(
 	recp->h_magicno = cpu_to_be32(XLOG_HEADER_MAGIC_NUM);
 	recp->h_cycle = cpu_to_be32(cycle);
 	recp->h_version = cpu_to_be32(
-			xfs_sb_version_haslogv2(&log->l_mp->m_sb) ? 2 : 1);
+			xfs_has_logv2(log->l_mp) ? 2 : 1);
 	recp->h_lsn = cpu_to_be64(xlog_assign_lsn(cycle, block));
 	recp->h_tail_lsn = cpu_to_be64(xlog_assign_lsn(tail_cycle, tail_block));
 	recp->h_fmt = cpu_to_be32(XLOG_FMT);
@@ -1756,6 +1756,30 @@ xlog_recover_release_intent(
 	spin_unlock(&ailp->ail_lock);
 }
 
+int
+xlog_recover_iget(
+	struct xfs_mount	*mp,
+	xfs_ino_t		ino,
+	struct xfs_inode	**ipp)
+{
+	int			error;
+
+	error = xfs_iget(mp, NULL, ino, 0, 0, ipp);
+	if (error)
+		return error;
+
+	error = xfs_qm_dqattach(*ipp);
+	if (error) {
+		xfs_irele(*ipp);
+		return error;
+	}
+
+	if (VFS_I(*ipp)->i_nlink == 0)
+		xfs_iflags_set(*ipp, XFS_IRECOVERY);
+
+	return 0;
+}
+
 /******************************************************************************
  *
  *		Log recover routines
@@ -2062,7 +2086,9 @@ xlog_recover_add_to_cont_trans(
 	old_ptr = item->ri_buf[item->ri_cnt-1].i_addr;
 	old_len = item->ri_buf[item->ri_cnt-1].i_len;
 
-	ptr = krealloc(old_ptr, len + old_len, GFP_KERNEL | __GFP_NOFAIL);
+	ptr = kvrealloc(old_ptr, old_len, len + old_len, GFP_KERNEL);
+	if (!ptr)
+		return -ENOMEM;
 	memcpy(&ptr[old_len], dp, len);
 	item->ri_buf[item->ri_cnt-1].i_len += len;
 	item->ri_buf[item->ri_cnt-1].i_addr = ptr;
@@ -2786,6 +2812,13 @@ xlog_recover_process_iunlinks(
 		}
 		xfs_buf_rele(agibp);
 	}
+
+	/*
+	 * Flush the pending unlinked inodes to ensure that the inactivations
+	 * are fully completed on disk and the incore inodes can be reclaimed
+	 * before we signal that recovery is complete.
+	 */
+	xfs_inodegc_flush(mp);
 }
 
 STATIC void
@@ -2802,7 +2835,7 @@ xlog_unpack_data(
 		dp += BBSIZE;
 	}
 
-	if (xfs_sb_version_haslogv2(&log->l_mp->m_sb)) {
+	if (xfs_has_logv2(log->l_mp)) {
 		xlog_in_core_2_t *xhdr = (xlog_in_core_2_t *)rhead;
 		for ( ; i < BTOBB(be32_to_cpu(rhead->h_len)); i++) {
 			j = i / (XLOG_HEADER_CYCLE_SIZE / BBSIZE);
@@ -2850,7 +2883,7 @@ xlog_recover_process(
 	 * the kernel from one that does not add CRCs by default.
 	 */
 	if (crc != old_crc) {
-		if (old_crc || xfs_sb_version_hascrc(&log->l_mp->m_sb)) {
+		if (old_crc || xfs_has_crc(log->l_mp)) {
 			xfs_alert(log->l_mp,
 		"log record CRC mismatch: found 0x%x, expected 0x%x.",
 					le32_to_cpu(old_crc),
@@ -2862,7 +2895,7 @@ xlog_recover_process(
 		 * If the filesystem is CRC enabled, this mismatch becomes a
 		 * fatal log corruption failure.
 		 */
-		if (xfs_sb_version_hascrc(&log->l_mp->m_sb)) {
+		if (xfs_has_crc(log->l_mp)) {
 			XFS_ERROR_REPORT(__func__, XFS_ERRLEVEL_LOW, log->l_mp);
 			return -EFSCORRUPTED;
 		}
@@ -2948,7 +2981,7 @@ xlog_do_recovery_pass(
 	 * Read the header of the tail block and get the iclog buffer size from
 	 * h_size.  Use this to tell how many sectors make up the log header.
 	 */
-	if (xfs_sb_version_haslogv2(&log->l_mp->m_sb)) {
+	if (xfs_has_logv2(log->l_mp)) {
 		/*
 		 * When using variable length iclogs, read first sector of
 		 * iclog header and extract the header size from it.  Get a
@@ -3280,10 +3313,7 @@ xlog_do_recover(
 	if (error)
 		return error;
 
-	/*
-	 * If IO errors happened during recovery, bail out.
-	 */
-	if (XFS_FORCED_SHUTDOWN(mp))
+	if (xlog_is_shutdown(log))
 		return -EIO;
 
 	/*
@@ -3305,7 +3335,7 @@ xlog_do_recover(
 	xfs_buf_hold(bp);
 	error = _xfs_buf_read(bp, XBF_READ);
 	if (error) {
-		if (!XFS_FORCED_SHUTDOWN(mp)) {
+		if (!xlog_is_shutdown(log)) {
 			xfs_buf_ioerror_alert(bp, __this_address);
 			ASSERT(0);
 		}
@@ -3318,6 +3348,7 @@ xlog_do_recover(
 	xfs_buf_relse(bp);
 
 	/* re-initialise in-core superblock and geometry structures */
+	mp->m_features |= xfs_sb_version_to_features(sbp);
 	xfs_reinit_percpu_counters(mp);
 	error = xfs_initialize_perag(mp, sbp->sb_agcount, &mp->m_maxagi);
 	if (error) {
@@ -3329,7 +3360,7 @@ xlog_do_recover(
 	xlog_recover_check_summary(log);
 
 	/* Normal transactions can now occur */
-	log->l_flags &= ~XLOG_ACTIVE_RECOVERY;
+	clear_bit(XLOG_ACTIVE_RECOVERY, &log->l_opstate);
 	return 0;
 }
 
@@ -3355,7 +3386,7 @@ xlog_recover(
 	 * could not be verified. Check the superblock LSN against the current
 	 * LSN now that it's known.
 	 */
-	if (xfs_sb_version_hascrc(&log->l_mp->m_sb) &&
+	if (xfs_has_crc(log->l_mp) &&
 	    !xfs_log_check_lsn(log->l_mp, log->l_mp->m_sb.sb_lsn))
 		return -EINVAL;
 
@@ -3382,7 +3413,7 @@ xlog_recover(
 		 * (e.g. unsupported transactions, then simply reject the
 		 * attempt at recovery before touching anything.
 		 */
-		if (XFS_SB_VERSION_NUM(&log->l_mp->m_sb) == XFS_SB_VERSION_5 &&
+		if (xfs_sb_is_v5(&log->l_mp->m_sb) &&
 		    xfs_sb_has_incompat_log_feature(&log->l_mp->m_sb,
 					XFS_SB_FEAT_INCOMPAT_LOG_UNKNOWN)) {
 			xfs_warn(log->l_mp,
@@ -3413,68 +3444,64 @@ xlog_recover(
 						     : "internal");
 
 		error = xlog_do_recover(log, head_blk, tail_blk);
-		log->l_flags |= XLOG_RECOVERY_NEEDED;
+		set_bit(XLOG_RECOVERY_NEEDED, &log->l_opstate);
 	}
 	return error;
 }
 
 /*
- * In the first part of recovery we replay inodes and buffers and build
- * up the list of extent free items which need to be processed.  Here
- * we process the extent free items and clean up the on disk unlinked
- * inode lists.  This is separated from the first part of recovery so
- * that the root and real-time bitmap inodes can be read in from disk in
- * between the two stages.  This is necessary so that we can free space
- * in the real-time portion of the file system.
+ * In the first part of recovery we replay inodes and buffers and build up the
+ * list of intents which need to be processed. Here we process the intents and
+ * clean up the on disk unlinked inode lists. This is separated from the first
+ * part of recovery so that the root and real-time bitmap inodes can be read in
+ * from disk in between the two stages.  This is necessary so that we can free
+ * space in the real-time portion of the file system.
  */
 int
 xlog_recover_finish(
 	struct xlog	*log)
 {
-	/*
-	 * Now we're ready to do the transactions needed for the
-	 * rest of recovery.  Start with completing all the extent
-	 * free intent records and then process the unlinked inode
-	 * lists.  At this point, we essentially run in normal mode
-	 * except that we're still performing recovery actions
-	 * rather than accepting new requests.
-	 */
-	if (log->l_flags & XLOG_RECOVERY_NEEDED) {
-		int	error;
-		error = xlog_recover_process_intents(log);
-		if (error) {
-			/*
-			 * Cancel all the unprocessed intent items now so that
-			 * we don't leave them pinned in the AIL.  This can
-			 * cause the AIL to livelock on the pinned item if
-			 * anyone tries to push the AIL (inode reclaim does
-			 * this) before we get around to xfs_log_mount_cancel.
-			 */
-			xlog_recover_cancel_intents(log);
-			xfs_force_shutdown(log->l_mp, SHUTDOWN_LOG_IO_ERROR);
-			xfs_alert(log->l_mp, "Failed to recover intents");
-			return error;
-		}
+	int	error;
 
+	error = xlog_recover_process_intents(log);
+	if (error) {
 		/*
-		 * Sync the log to get all the intents out of the AIL.
-		 * This isn't absolutely necessary, but it helps in
-		 * case the unlink transactions would have problems
-		 * pushing the intents out of the way.
+		 * Cancel all the unprocessed intent items now so that we don't
+		 * leave them pinned in the AIL.  This can cause the AIL to
+		 * livelock on the pinned item if anyone tries to push the AIL
+		 * (inode reclaim does this) before we get around to
+		 * xfs_log_mount_cancel.
 		 */
-		xfs_log_force(log->l_mp, XFS_LOG_SYNC);
-
-		xlog_recover_process_iunlinks(log);
+		xlog_recover_cancel_intents(log);
+		xfs_alert(log->l_mp, "Failed to recover intents");
+		xfs_force_shutdown(log->l_mp, SHUTDOWN_LOG_IO_ERROR);
+		return error;
+	}
 
-		xlog_recover_check_summary(log);
+	/*
+	 * Sync the log to get all the intents out of the AIL.  This isn't
+	 * absolutely necessary, but it helps in case the unlink transactions
+	 * would have problems pushing the intents out of the way.
+	 */
+	xfs_log_force(log->l_mp, XFS_LOG_SYNC);
 
-		xfs_notice(log->l_mp, "Ending recovery (logdev: %s)",
-				log->l_mp->m_logname ? log->l_mp->m_logname
-						     : "internal");
-		log->l_flags &= ~XLOG_RECOVERY_NEEDED;
-	} else {
-		xfs_info(log->l_mp, "Ending clean mount");
+	/*
+	 * Now that we've recovered the log and all the intents, we can clear
+	 * the log incompat feature bits in the superblock because there's no
+	 * longer anything to protect.  We rely on the AIL push to write out the
+	 * updated superblock after everything else.
+	 */
+	if (xfs_clear_incompat_log_features(log->l_mp)) {
+		error = xfs_sync_sb(log->l_mp, false);
+		if (error < 0) {
+			xfs_alert(log->l_mp,
+	"Failed to clear log incompat features on recovery");
+			return error;
+		}
 	}
+
+	xlog_recover_process_iunlinks(log);
+	xlog_recover_check_summary(log);
 	return 0;
 }
 
@@ -3482,7 +3509,7 @@ void
 xlog_recover_cancel(
 	struct xlog	*log)
 {
-	if (log->l_flags & XLOG_RECOVERY_NEEDED)
+	if (xlog_recovery_needed(log))
 		xlog_recover_cancel_intents(log);
 }
 
diff --git a/fs/xfs/xfs_mount.c b/fs/xfs/xfs_mount.c
index d0755494597f..06dac09eddbd 100644
--- a/fs/xfs/xfs_mount.c
+++ b/fs/xfs/xfs_mount.c
@@ -62,7 +62,7 @@ xfs_uuid_mount(
 	/* Publish UUID in struct super_block */
 	uuid_copy(&mp->m_super->s_uuid, uuid);
 
-	if (mp->m_flags & XFS_MOUNT_NOUUID)
+	if (xfs_has_nouuid(mp))
 		return 0;
 
 	if (uuid_is_null(uuid)) {
@@ -104,7 +104,7 @@ xfs_uuid_unmount(
 	uuid_t			*uuid = &mp->m_sb.sb_uuid;
 	int			i;
 
-	if (mp->m_flags & XFS_MOUNT_NOUUID)
+	if (xfs_has_nouuid(mp))
 		return;
 
 	mutex_lock(&xfs_uuid_table_mutex);
@@ -225,6 +225,7 @@ reread:
 		goto reread;
 	}
 
+	mp->m_features |= xfs_sb_version_to_features(sbp);
 	xfs_reinit_percpu_counters(mp);
 
 	/* no need to be quiet anymore, so reset the buf ops */
@@ -318,7 +319,7 @@ xfs_validate_new_dalign(
 		}
 	}
 
-	if (!xfs_sb_version_hasdalign(&mp->m_sb)) {
+	if (!xfs_has_dalign(mp)) {
 		xfs_warn(mp,
 "cannot change alignment: superblock does not support data alignment");
 		return -EINVAL;
@@ -349,8 +350,7 @@ xfs_update_alignment(
 		sbp->sb_unit = mp->m_dalign;
 		sbp->sb_width = mp->m_swidth;
 		mp->m_update_sb = true;
-	} else if ((mp->m_flags & XFS_MOUNT_NOALIGN) != XFS_MOUNT_NOALIGN &&
-		    xfs_sb_version_hasdalign(&mp->m_sb)) {
+	} else if (!xfs_has_noalign(mp) && xfs_has_dalign(mp)) {
 		mp->m_dalign = sbp->sb_unit;
 		mp->m_swidth = sbp->sb_width;
 	}
@@ -365,13 +365,16 @@ void
 xfs_set_low_space_thresholds(
 	struct xfs_mount	*mp)
 {
-	int i;
+	uint64_t		dblocks = mp->m_sb.sb_dblocks;
+	uint64_t		rtexts = mp->m_sb.sb_rextents;
+	int			i;
 
-	for (i = 0; i < XFS_LOWSP_MAX; i++) {
-		uint64_t space = mp->m_sb.sb_dblocks;
+	do_div(dblocks, 100);
+	do_div(rtexts, 100);
 
-		do_div(space, 100);
-		mp->m_low_space[i] = space * (i + 1);
+	for (i = 0; i < XFS_LOWSP_MAX; i++) {
+		mp->m_low_space[i] = dblocks * (i + 1);
+		mp->m_low_rtexts[i] = rtexts * (i + 1);
 	}
 }
 
@@ -485,7 +488,7 @@ xfs_check_summary_counts(
 	 * counters.  If any of them are obviously incorrect, we can recompute
 	 * them from the AGF headers in the next step.
 	 */
-	if (XFS_LAST_UNMOUNT_WAS_CLEAN(mp) &&
+	if (xfs_is_clean(mp) &&
 	    (mp->m_sb.sb_fdblocks > mp->m_sb.sb_dblocks ||
 	     !xfs_verify_icount(mp, mp->m_sb.sb_icount) ||
 	     mp->m_sb.sb_ifree > mp->m_sb.sb_icount))
@@ -502,8 +505,7 @@ xfs_check_summary_counts(
 	 * superblock to be correct and we don't need to do anything here.
 	 * Otherwise, recalculate the summary counters.
 	 */
-	if ((!xfs_sb_version_haslazysbcount(&mp->m_sb) ||
-	     XFS_LAST_UNMOUNT_WAS_CLEAN(mp)) &&
+	if ((!xfs_has_lazysbcount(mp) || xfs_is_clean(mp)) &&
 	    !xfs_fs_has_sickness(mp, XFS_SICK_FS_COUNTERS))
 		return 0;
 
@@ -514,7 +516,8 @@ xfs_check_summary_counts(
  * Flush and reclaim dirty inodes in preparation for unmount. Inodes and
  * internal inode structures can be sitting in the CIL and AIL at this point,
  * so we need to unpin them, write them back and/or reclaim them before unmount
- * can proceed.
+ * can proceed.  In other words, callers are required to have inactivated all
+ * inodes.
  *
  * An inode cluster that has been freed can have its buffer still pinned in
  * memory because the transaction is still sitting in a iclog. The stale inodes
@@ -543,9 +546,10 @@ xfs_unmount_flush_inodes(
 	xfs_extent_busy_wait_all(mp);
 	flush_workqueue(xfs_discard_wq);
 
-	mp->m_flags |= XFS_MOUNT_UNMOUNTING;
+	set_bit(XFS_OPSTATE_UNMOUNTING, &mp->m_opstate);
 
 	xfs_ail_push_all_sync(mp->m_ail);
+	xfs_inodegc_stop(mp);
 	cancel_delayed_work_sync(&mp->m_reclaim_work);
 	xfs_reclaim_inodes(mp);
 	xfs_health_unmount(mp);
@@ -607,29 +611,13 @@ xfs_mountfs(
 		xfs_warn(mp, "correcting sb_features alignment problem");
 		sbp->sb_features2 |= sbp->sb_bad_features2;
 		mp->m_update_sb = true;
-
-		/*
-		 * Re-check for ATTR2 in case it was found in bad_features2
-		 * slot.
-		 */
-		if (xfs_sb_version_hasattr2(&mp->m_sb) &&
-		   !(mp->m_flags & XFS_MOUNT_NOATTR2))
-			mp->m_flags |= XFS_MOUNT_ATTR2;
 	}
 
-	if (xfs_sb_version_hasattr2(&mp->m_sb) &&
-	   (mp->m_flags & XFS_MOUNT_NOATTR2)) {
-		xfs_sb_version_removeattr2(&mp->m_sb);
-		mp->m_update_sb = true;
-
-		/* update sb_versionnum for the clearing of the morebits */
-		if (!sbp->sb_features2)
-			mp->m_update_sb = true;
-	}
 
 	/* always use v2 inodes by default now */
 	if (!(mp->m_sb.sb_versionnum & XFS_SB_VERSION_NLINKBIT)) {
 		mp->m_sb.sb_versionnum |= XFS_SB_VERSION_NLINKBIT;
+		mp->m_features |= XFS_FEAT_NLINK;
 		mp->m_update_sb = true;
 	}
 
@@ -702,7 +690,7 @@ xfs_mountfs(
 	 * cluster size. Full inode chunk alignment must match the chunk size,
 	 * but that is checked on sb read verification...
 	 */
-	if (xfs_sb_version_hassparseinodes(&mp->m_sb) &&
+	if (xfs_has_sparseinodes(mp) &&
 	    mp->m_sb.sb_spino_align !=
 			XFS_B_TO_FSBT(mp, igeo->inode_cluster_size_raw)) {
 		xfs_warn(mp,
@@ -764,6 +752,10 @@ xfs_mountfs(
 		goto out_free_perag;
 	}
 
+	error = xfs_inodegc_register_shrinker(mp);
+	if (error)
+		goto out_fail_wait;
+
 	/*
 	 * Log's mount-time initialization. The first part of recovery can place
 	 * some items on the AIL, to be handled when recovery is finished or
@@ -774,7 +766,7 @@ xfs_mountfs(
 			      XFS_FSB_TO_BB(mp, sbp->sb_logblocks));
 	if (error) {
 		xfs_warn(mp, "log mount failed");
-		goto out_fail_wait;
+		goto out_inodegc_shrinker;
 	}
 
 	/* Make sure the summary counts are ok. */
@@ -782,6 +774,23 @@ xfs_mountfs(
 	if (error)
 		goto out_log_dealloc;
 
+	/* Enable background inode inactivation workers. */
+	xfs_inodegc_start(mp);
+	xfs_blockgc_start(mp);
+
+	/*
+	 * Now that we've recovered any pending superblock feature bit
+	 * additions, we can finish setting up the attr2 behaviour for the
+	 * mount. The noattr2 option overrides the superblock flag, so only
+	 * check the superblock feature flag if the mount option is not set.
+	 */
+	if (xfs_has_noattr2(mp)) {
+		mp->m_features &= ~XFS_FEAT_ATTR2;
+	} else if (!xfs_has_attr2(mp) &&
+		   (mp->m_sb.sb_features2 & XFS_SB_VERSION2_ATTR2BIT)) {
+		mp->m_features |= XFS_FEAT_ATTR2;
+	}
+
 	/*
 	 * Get and sanity-check the root inode.
 	 * Save the pointer to it in the mount structure.
@@ -825,7 +834,7 @@ xfs_mountfs(
 	 * the next remount into writeable mode.  Otherwise we would never
 	 * perform the update e.g. for the root filesystem.
 	 */
-	if (mp->m_update_sb && !(mp->m_flags & XFS_MOUNT_RDONLY)) {
+	if (mp->m_update_sb && !xfs_is_readonly(mp)) {
 		error = xfs_sync_sb(mp, false);
 		if (error) {
 			xfs_warn(mp, "failed to write sb changes");
@@ -836,13 +845,11 @@ xfs_mountfs(
 	/*
 	 * Initialise the XFS quota management subsystem for this mount
 	 */
-	if (XFS_IS_QUOTA_RUNNING(mp)) {
+	if (XFS_IS_QUOTA_ON(mp)) {
 		error = xfs_qm_newmount(mp, &quotamount, &quotaflags);
 		if (error)
 			goto out_rtunmount;
 	} else {
-		ASSERT(!XFS_IS_QUOTA_ON(mp));
-
 		/*
 		 * If a file system had quotas running earlier, but decided to
 		 * mount without -o uquota/pquota/gquota options, revoke the
@@ -884,10 +891,8 @@ xfs_mountfs(
 	 * We use the same quiesce mechanism as the rw->ro remount, as they are
 	 * semantically identical operations.
 	 */
-	if ((mp->m_flags & (XFS_MOUNT_RDONLY|XFS_MOUNT_NORECOVERY)) ==
-							XFS_MOUNT_RDONLY) {
+	if (xfs_is_readonly(mp) && !xfs_has_norecovery(mp))
 		xfs_log_clean(mp);
-	}
 
 	/*
 	 * Complete the quota initialisation, post-log-replay component.
@@ -910,7 +915,7 @@ xfs_mountfs(
 	 * This may drive us straight to ENOSPC on mount, but that implies
 	 * we were already there on the last unmount. Warn if this occurs.
 	 */
-	if (!(mp->m_flags & XFS_MOUNT_RDONLY)) {
+	if (!xfs_is_readonly(mp)) {
 		resblks = xfs_default_resblks(mp);
 		error = xfs_reserve_blocks(mp, &resblks, NULL);
 		if (error)
@@ -944,6 +949,15 @@ xfs_mountfs(
 	xfs_irele(rip);
 	/* Clean out dquots that might be in memory after quotacheck. */
 	xfs_qm_unmount(mp);
+
+	/*
+	 * Inactivate all inodes that might still be in memory after a log
+	 * intent recovery failure so that reclaim can free them.  Metadata
+	 * inodes and the root directory shouldn't need inactivation, but the
+	 * mount failed for some reason, so pull down all the state and flee.
+	 */
+	xfs_inodegc_flush(mp);
+
 	/*
 	 * Flush all inode reclamation work and flush the log.
 	 * We have to do this /after/ rtunmount and qm_unmount because those
@@ -958,6 +972,8 @@ xfs_mountfs(
 	xfs_unmount_flush_inodes(mp);
  out_log_dealloc:
 	xfs_log_mount_cancel(mp);
+ out_inodegc_shrinker:
+	unregister_shrinker(&mp->m_inodegc_shrinker);
  out_fail_wait:
 	if (mp->m_logdev_targp && mp->m_logdev_targp != mp->m_ddev_targp)
 		xfs_buftarg_drain(mp->m_logdev_targp);
@@ -991,6 +1007,16 @@ xfs_unmountfs(
 	uint64_t		resblks;
 	int			error;
 
+	/*
+	 * Perform all on-disk metadata updates required to inactivate inodes
+	 * that the VFS evicted earlier in the unmount process.  Freeing inodes
+	 * and discarding CoW fork preallocations can cause shape changes to
+	 * the free inode and refcount btrees, respectively, so we must finish
+	 * this before we discard the metadata space reservations.  Metadata
+	 * inodes and the root directory do not require inactivation.
+	 */
+	xfs_inodegc_flush(mp);
+
 	xfs_blockgc_stop(mp);
 	xfs_fs_unreserve_ag_blocks(mp);
 	xfs_qm_unmount_quotas(mp);
@@ -1028,6 +1054,7 @@ xfs_unmountfs(
 #if defined(DEBUG)
 	xfs_errortag_clearall(mp);
 #endif
+	unregister_shrinker(&mp->m_inodegc_shrinker);
 	xfs_free_perag(mp);
 
 	xfs_errortag_del(mp);
@@ -1049,20 +1076,12 @@ xfs_fs_writable(
 {
 	ASSERT(level > SB_UNFROZEN);
 	if ((mp->m_super->s_writers.frozen >= level) ||
-	    XFS_FORCED_SHUTDOWN(mp) || (mp->m_flags & XFS_MOUNT_RDONLY))
+	    xfs_is_shutdown(mp) || xfs_is_readonly(mp))
 		return false;
 
 	return true;
 }
 
-/*
- * Deltas for the block count can vary from 1 to very large, but lock contention
- * only occurs on frequent small block count updates such as in the delayed
- * allocation path for buffered writes (page a time updates). Hence we set
- * a large batch count (1024) to minimise global counter updates except when
- * we get near to ENOSPC and we have to be very accurate with our updates.
- */
-#define XFS_FDBLOCKS_BATCH	1024
 int
 xfs_mod_fdblocks(
 	struct xfs_mount	*mp,
@@ -1210,13 +1229,123 @@ void
 xfs_force_summary_recalc(
 	struct xfs_mount	*mp)
 {
-	if (!xfs_sb_version_haslazysbcount(&mp->m_sb))
+	if (!xfs_has_lazysbcount(mp))
 		return;
 
 	xfs_fs_mark_sick(mp, XFS_SICK_FS_COUNTERS);
 }
 
 /*
+ * Enable a log incompat feature flag in the primary superblock.  The caller
+ * cannot have any other transactions in progress.
+ */
+int
+xfs_add_incompat_log_feature(
+	struct xfs_mount	*mp,
+	uint32_t		feature)
+{
+	struct xfs_dsb		*dsb;
+	int			error;
+
+	ASSERT(hweight32(feature) == 1);
+	ASSERT(!(feature & XFS_SB_FEAT_INCOMPAT_LOG_UNKNOWN));
+
+	/*
+	 * Force the log to disk and kick the background AIL thread to reduce
+	 * the chances that the bwrite will stall waiting for the AIL to unpin
+	 * the primary superblock buffer.  This isn't a data integrity
+	 * operation, so we don't need a synchronous push.
+	 */
+	error = xfs_log_force(mp, XFS_LOG_SYNC);
+	if (error)
+		return error;
+	xfs_ail_push_all(mp->m_ail);
+
+	/*
+	 * Lock the primary superblock buffer to serialize all callers that
+	 * are trying to set feature bits.
+	 */
+	xfs_buf_lock(mp->m_sb_bp);
+	xfs_buf_hold(mp->m_sb_bp);
+
+	if (xfs_is_shutdown(mp)) {
+		error = -EIO;
+		goto rele;
+	}
+
+	if (xfs_sb_has_incompat_log_feature(&mp->m_sb, feature))
+		goto rele;
+
+	/*
+	 * Write the primary superblock to disk immediately, because we need
+	 * the log_incompat bit to be set in the primary super now to protect
+	 * the log items that we're going to commit later.
+	 */
+	dsb = mp->m_sb_bp->b_addr;
+	xfs_sb_to_disk(dsb, &mp->m_sb);
+	dsb->sb_features_log_incompat |= cpu_to_be32(feature);
+	error = xfs_bwrite(mp->m_sb_bp);
+	if (error)
+		goto shutdown;
+
+	/*
+	 * Add the feature bits to the incore superblock before we unlock the
+	 * buffer.
+	 */
+	xfs_sb_add_incompat_log_features(&mp->m_sb, feature);
+	xfs_buf_relse(mp->m_sb_bp);
+
+	/* Log the superblock to disk. */
+	return xfs_sync_sb(mp, false);
+shutdown:
+	xfs_force_shutdown(mp, SHUTDOWN_META_IO_ERROR);
+rele:
+	xfs_buf_relse(mp->m_sb_bp);
+	return error;
+}
+
+/*
+ * Clear all the log incompat flags from the superblock.
+ *
+ * The caller cannot be in a transaction, must ensure that the log does not
+ * contain any log items protected by any log incompat bit, and must ensure
+ * that there are no other threads that depend on the state of the log incompat
+ * feature flags in the primary super.
+ *
+ * Returns true if the superblock is dirty.
+ */
+bool
+xfs_clear_incompat_log_features(
+	struct xfs_mount	*mp)
+{
+	bool			ret = false;
+
+	if (!xfs_has_crc(mp) ||
+	    !xfs_sb_has_incompat_log_feature(&mp->m_sb,
+				XFS_SB_FEAT_INCOMPAT_LOG_ALL) ||
+	    xfs_is_shutdown(mp))
+		return false;
+
+	/*
+	 * Update the incore superblock.  We synchronize on the primary super
+	 * buffer lock to be consistent with the add function, though at least
+	 * in theory this shouldn't be necessary.
+	 */
+	xfs_buf_lock(mp->m_sb_bp);
+	xfs_buf_hold(mp->m_sb_bp);
+
+	if (xfs_sb_has_incompat_log_feature(&mp->m_sb,
+				XFS_SB_FEAT_INCOMPAT_LOG_ALL)) {
+		xfs_info(mp, "Clearing log incompat feature flags.");
+		xfs_sb_remove_incompat_log_features(&mp->m_sb);
+		ret = true;
+	}
+
+	xfs_buf_relse(mp->m_sb_bp);
+	return ret;
+}
+
+/*
  * Update the in-core delayed block counter.
  *
  * We prefer to update the counter without having to take a spinlock for every
diff --git a/fs/xfs/xfs_mount.h b/fs/xfs/xfs_mount.h
index c78b63fe779a..e091f3b3fa15 100644
--- a/fs/xfs/xfs_mount.h
+++ b/fs/xfs/xfs_mount.h
@@ -57,6 +57,18 @@ struct xfs_error_cfg {
 };
 
 /*
+ * Per-cpu deferred inode inactivation GC lists.
+ */
+struct xfs_inodegc {
+	struct llist_head	list;
+	struct work_struct	work;
+
+	/* approximate count of inodes in the list */
+	unsigned int		items;
+	unsigned int		shrinker_hits;
+};
+
+/*
  * The struct xfsmount layout is optimised to separate read-mostly variables
  * from variables that are frequently modified. We put the read-mostly variables
  * first, then place all the other variables at the end.
@@ -82,6 +94,9 @@ typedef struct xfs_mount {
 	xfs_buftarg_t		*m_ddev_targp;	/* saves taking the address */
 	xfs_buftarg_t		*m_logdev_targp;/* ptr to log device */
 	xfs_buftarg_t		*m_rtdev_targp;	/* ptr to rt device */
+	struct list_head	m_mount_list;	/* global mount list */
+	void __percpu		*m_inodegc;	/* percpu inodegc structures */
+
 	/*
 	 * Optional cache of rt summary level per bitmap block with the
 	 * invariant that m_rsum_cache[bbno] <= the minimum i for which
@@ -92,10 +107,10 @@ typedef struct xfs_mount {
 	struct xfs_mru_cache	*m_filestream;  /* per-mount filestream data */
 	struct workqueue_struct *m_buf_workqueue;
 	struct workqueue_struct	*m_unwritten_workqueue;
-	struct workqueue_struct	*m_cil_workqueue;
 	struct workqueue_struct	*m_reclaim_workqueue;
-	struct workqueue_struct *m_gc_workqueue;
 	struct workqueue_struct	*m_sync_workqueue;
+	struct workqueue_struct *m_blockgc_wq;
+	struct workqueue_struct *m_inodegc_wq;
 
 	int			m_bsize;	/* fs logical block size */
 	uint8_t			m_blkbit_log;	/* blocklog + NBBY */
@@ -131,11 +146,13 @@ typedef struct xfs_mount {
 	uint			m_rsumsize;	/* size of rt summary, bytes */
 	int			m_fixedfsid[2];	/* unchanged for life of FS */
 	uint			m_qflags;	/* quota status flags */
-	uint64_t		m_flags;	/* global mount flags */
-	int64_t			m_low_space[XFS_LOWSP_MAX];
+	uint64_t		m_features;	/* active filesystem features */
+	uint64_t		m_low_space[XFS_LOWSP_MAX];
+	uint64_t		m_low_rtexts[XFS_LOWSP_MAX];
 	struct xfs_ino_geometry	m_ino_geo;	/* inode geometry */
 	struct xfs_trans_resv	m_resv;		/* precomputed res values */
 						/* low free space thresholds */
+	unsigned long		m_opstate;	/* dynamic state flags */
 	bool			m_always_cow;
 	bool			m_fail_unmount;
 	bool			m_finobt_nores; /* no per-AG finobt resv. */
@@ -193,6 +210,8 @@ typedef struct xfs_mount {
 	xfs_agnumber_t		m_agirotor;	/* last ag dir inode alloced */
 	spinlock_t		m_agirotor_lock;/* .. and lock protecting it */
 
+	/* Memory shrinker to throttle and reprioritize inodegc */
+	struct shrinker		m_inodegc_shrinker;
 	/*
 	 * Workqueue item so that we can coalesce multiple inode flush attempts
 	 * into a single flush.
@@ -225,38 +244,178 @@ typedef struct xfs_mount {
 #define M_IGEO(mp)		(&(mp)->m_ino_geo)
 
 /*
- * Flags for m_flags.
+ * Flags for m_features.
+ *
+ * These are all the active features in the filesystem, regardless of how
+ * they are configured.
  */
-#define XFS_MOUNT_WSYNC		(1ULL << 0)	/* for nfs - all metadata ops
-						   must be synchronous except
-						   for space allocations */
-#define XFS_MOUNT_UNMOUNTING	(1ULL << 1)	/* filesystem is unmounting */
-#define XFS_MOUNT_WAS_CLEAN	(1ULL << 3)
-#define XFS_MOUNT_FS_SHUTDOWN	(1ULL << 4)	/* atomic stop of all filesystem
-						   operations, typically for
-						   disk errors in metadata */
-#define XFS_MOUNT_DISCARD	(1ULL << 5)	/* discard unused blocks */
-#define XFS_MOUNT_NOALIGN	(1ULL << 7)	/* turn off stripe alignment
-						   allocations */
-#define XFS_MOUNT_ATTR2		(1ULL << 8)	/* allow use of attr2 format */
-#define XFS_MOUNT_GRPID		(1ULL << 9)	/* group-ID assigned from directory */
-#define XFS_MOUNT_NORECOVERY	(1ULL << 10)	/* no recovery - dirty fs */
-#define XFS_MOUNT_ALLOCSIZE	(1ULL << 12)	/* specified allocation size */
-#define XFS_MOUNT_SMALL_INUMS	(1ULL << 14)	/* user wants 32bit inodes */
-#define XFS_MOUNT_32BITINODES	(1ULL << 15)	/* inode32 allocator active */
-#define XFS_MOUNT_NOUUID	(1ULL << 16)	/* ignore uuid during mount */
-#define XFS_MOUNT_IKEEP		(1ULL << 18)	/* keep empty inode clusters*/
-#define XFS_MOUNT_SWALLOC	(1ULL << 19)	/* turn on stripe width
-						 * allocation */
-#define XFS_MOUNT_RDONLY	(1ULL << 20)	/* read-only fs */
-#define XFS_MOUNT_DIRSYNC	(1ULL << 21)	/* synchronous directory ops */
-#define XFS_MOUNT_LARGEIO	(1ULL << 22)	/* report large preferred
+#define XFS_FEAT_ATTR		(1ULL << 0)	/* xattrs present in fs */
+#define XFS_FEAT_NLINK		(1ULL << 1)	/* 32 bit link counts */
+#define XFS_FEAT_QUOTA		(1ULL << 2)	/* quota active */
+#define XFS_FEAT_ALIGN		(1ULL << 3)	/* inode alignment */
+#define XFS_FEAT_DALIGN		(1ULL << 4)	/* data alignment */
+#define XFS_FEAT_LOGV2		(1ULL << 5)	/* version 2 logs */
+#define XFS_FEAT_SECTOR		(1ULL << 6)	/* sector size > 512 bytes */
+#define XFS_FEAT_EXTFLG		(1ULL << 7)	/* unwritten extents */
+#define XFS_FEAT_ASCIICI	(1ULL << 8)	/* ASCII only case-insens. */
+#define XFS_FEAT_LAZYSBCOUNT	(1ULL << 9)	/* Superblk counters */
+#define XFS_FEAT_ATTR2		(1ULL << 10)	/* dynamic attr fork */
+#define XFS_FEAT_PARENT		(1ULL << 11)	/* parent pointers */
+#define XFS_FEAT_PROJID32	(1ULL << 12)	/* 32 bit project id */
+#define XFS_FEAT_CRC		(1ULL << 13)	/* metadata CRCs */
+#define XFS_FEAT_V3INODES	(1ULL << 14)	/* Version 3 inodes */
+#define XFS_FEAT_PQUOTINO	(1ULL << 15)	/* non-shared proj/grp quotas */
+#define XFS_FEAT_FTYPE		(1ULL << 16)	/* inode type in dir */
+#define XFS_FEAT_FINOBT		(1ULL << 17)	/* free inode btree */
+#define XFS_FEAT_RMAPBT		(1ULL << 18)	/* reverse map btree */
+#define XFS_FEAT_REFLINK	(1ULL << 19)	/* reflinked files */
+#define XFS_FEAT_SPINODES	(1ULL << 20)	/* sparse inode chunks */
+#define XFS_FEAT_META_UUID	(1ULL << 21)	/* metadata UUID */
+#define XFS_FEAT_REALTIME	(1ULL << 22)	/* realtime device present */
+#define XFS_FEAT_INOBTCNT	(1ULL << 23)	/* inobt block counts */
+#define XFS_FEAT_BIGTIME	(1ULL << 24)	/* large timestamps */
+#define XFS_FEAT_NEEDSREPAIR	(1ULL << 25)	/* needs xfs_repair */
+
+/* Mount features */
+#define XFS_FEAT_NOATTR2	(1ULL << 48)	/* disable attr2 creation */
+#define XFS_FEAT_NOALIGN	(1ULL << 49)	/* ignore alignment */
+#define XFS_FEAT_ALLOCSIZE	(1ULL << 50)	/* user specified allocation size */
+#define XFS_FEAT_LARGE_IOSIZE	(1ULL << 51)	/* report large preferred
 						 * I/O size in stat() */
-#define XFS_MOUNT_FILESTREAMS	(1ULL << 24)	/* enable the filestreams
-						   allocator */
-#define XFS_MOUNT_NOATTR2	(1ULL << 25)	/* disable use of attr2 format */
-#define XFS_MOUNT_DAX_ALWAYS	(1ULL << 26)
-#define XFS_MOUNT_DAX_NEVER	(1ULL << 27)
+#define XFS_FEAT_WSYNC		(1ULL << 52)	/* synchronous metadata ops */
+#define XFS_FEAT_DIRSYNC	(1ULL << 53)	/* synchronous directory ops */
+#define XFS_FEAT_DISCARD	(1ULL << 54)	/* discard unused blocks */
+#define XFS_FEAT_GRPID		(1ULL << 55)	/* group-ID assigned from directory */
+#define XFS_FEAT_SMALL_INUMS	(1ULL << 56)	/* user wants 32bit inodes */
+#define XFS_FEAT_IKEEP		(1ULL << 57)	/* keep empty inode clusters*/
+#define XFS_FEAT_SWALLOC	(1ULL << 58)	/* stripe width allocation */
+#define XFS_FEAT_FILESTREAMS	(1ULL << 59)	/* use filestreams allocator */
+#define XFS_FEAT_DAX_ALWAYS	(1ULL << 60)	/* DAX always enabled */
+#define XFS_FEAT_DAX_NEVER	(1ULL << 61)	/* DAX never enabled */
+#define XFS_FEAT_NORECOVERY	(1ULL << 62)	/* no recovery - dirty fs */
+#define XFS_FEAT_NOUUID		(1ULL << 63)	/* ignore uuid during mount */
+
+#define __XFS_HAS_FEAT(name, NAME) \
+static inline bool xfs_has_ ## name (struct xfs_mount *mp) \
+{ \
+	return mp->m_features & XFS_FEAT_ ## NAME; \
+}
+
+/* Some features can be added dynamically so they need a set wrapper, too. */
+#define __XFS_ADD_FEAT(name, NAME) \
+	__XFS_HAS_FEAT(name, NAME); \
+static inline void xfs_add_ ## name (struct xfs_mount *mp) \
+{ \
+	mp->m_features |= XFS_FEAT_ ## NAME; \
+	xfs_sb_version_add ## name(&mp->m_sb); \
+}
+
+/* Superblock features */
+__XFS_ADD_FEAT(attr, ATTR)
+__XFS_HAS_FEAT(nlink, NLINK)
+__XFS_ADD_FEAT(quota, QUOTA)
+__XFS_HAS_FEAT(align, ALIGN)
+__XFS_HAS_FEAT(dalign, DALIGN)
+__XFS_HAS_FEAT(logv2, LOGV2)
+__XFS_HAS_FEAT(sector, SECTOR)
+__XFS_HAS_FEAT(extflg, EXTFLG)
+__XFS_HAS_FEAT(asciici, ASCIICI)
+__XFS_HAS_FEAT(lazysbcount, LAZYSBCOUNT)
+__XFS_ADD_FEAT(attr2, ATTR2)
+__XFS_HAS_FEAT(parent, PARENT)
+__XFS_ADD_FEAT(projid32, PROJID32)
+__XFS_HAS_FEAT(crc, CRC)
+__XFS_HAS_FEAT(v3inodes, V3INODES)
+__XFS_HAS_FEAT(pquotino, PQUOTINO)
+__XFS_HAS_FEAT(ftype, FTYPE)
+__XFS_HAS_FEAT(finobt, FINOBT)
+__XFS_HAS_FEAT(rmapbt, RMAPBT)
+__XFS_HAS_FEAT(reflink, REFLINK)
+__XFS_HAS_FEAT(sparseinodes, SPINODES)
+__XFS_HAS_FEAT(metauuid, META_UUID)
+__XFS_HAS_FEAT(realtime, REALTIME)
+__XFS_HAS_FEAT(inobtcounts, INOBTCNT)
+__XFS_HAS_FEAT(bigtime, BIGTIME)
+__XFS_HAS_FEAT(needsrepair, NEEDSREPAIR)
+
+/*
+ * Mount features
+ *
+ * These do not change dynamically - features that can come and go, such as 32
+ * bit inodes and read-only state, are kept as operational state rather than
+ * features.
+ */
+__XFS_HAS_FEAT(noattr2, NOATTR2)
+__XFS_HAS_FEAT(noalign, NOALIGN)
+__XFS_HAS_FEAT(allocsize, ALLOCSIZE)
+__XFS_HAS_FEAT(large_iosize, LARGE_IOSIZE)
+__XFS_HAS_FEAT(wsync, WSYNC)
+__XFS_HAS_FEAT(dirsync, DIRSYNC)
+__XFS_HAS_FEAT(discard, DISCARD)
+__XFS_HAS_FEAT(grpid, GRPID)
+__XFS_HAS_FEAT(small_inums, SMALL_INUMS)
+__XFS_HAS_FEAT(ikeep, IKEEP)
+__XFS_HAS_FEAT(swalloc, SWALLOC)
+__XFS_HAS_FEAT(filestreams, FILESTREAMS)
+__XFS_HAS_FEAT(dax_always, DAX_ALWAYS)
+__XFS_HAS_FEAT(dax_never, DAX_NEVER)
+__XFS_HAS_FEAT(norecovery, NORECOVERY)
+__XFS_HAS_FEAT(nouuid, NOUUID)
+
+/*
+ * Operational mount state flags
+ *
+ * Use these with atomic bit ops only!
+ */
+#define XFS_OPSTATE_UNMOUNTING		0	/* filesystem is unmounting */
+#define XFS_OPSTATE_CLEAN		1	/* mount was clean */
+#define XFS_OPSTATE_SHUTDOWN		2	/* stop all fs operations */
+#define XFS_OPSTATE_INODE32		3	/* inode32 allocator active */
+#define XFS_OPSTATE_READONLY		4	/* read-only fs */
+
+/*
+ * If set, inactivation worker threads will be scheduled to process queued
+ * inodegc work.  If not, queued inodes remain in memory waiting to be
+ * processed.
+ */
+#define XFS_OPSTATE_INODEGC_ENABLED	5
+/*
+ * If set, background speculative prealloc gc worker threads will be scheduled
+ * to process queued blockgc work.  If not, inodes retain their preallocations
+ * until explicitly deleted.
+ */
+#define XFS_OPSTATE_BLOCKGC_ENABLED	6
+
+#define __XFS_IS_OPSTATE(name, NAME) \
+static inline bool xfs_is_ ## name (struct xfs_mount *mp) \
+{ \
+	return test_bit(XFS_OPSTATE_ ## NAME, &mp->m_opstate); \
+} \
+static inline bool xfs_clear_ ## name (struct xfs_mount *mp) \
+{ \
+	return test_and_clear_bit(XFS_OPSTATE_ ## NAME, &mp->m_opstate); \
+} \
+static inline bool xfs_set_ ## name (struct xfs_mount *mp) \
+{ \
+	return test_and_set_bit(XFS_OPSTATE_ ## NAME, &mp->m_opstate); \
+}
+
+__XFS_IS_OPSTATE(unmounting, UNMOUNTING)
+__XFS_IS_OPSTATE(clean, CLEAN)
+__XFS_IS_OPSTATE(shutdown, SHUTDOWN)
+__XFS_IS_OPSTATE(inode32, INODE32)
+__XFS_IS_OPSTATE(readonly, READONLY)
+__XFS_IS_OPSTATE(inodegc_enabled, INODEGC_ENABLED)
+__XFS_IS_OPSTATE(blockgc_enabled, BLOCKGC_ENABLED)
+
+#define XFS_OPSTATE_STRINGS \
+	{ (1UL << XFS_OPSTATE_UNMOUNTING),		"unmounting" }, \
+	{ (1UL << XFS_OPSTATE_CLEAN),			"clean" }, \
+	{ (1UL << XFS_OPSTATE_SHUTDOWN),		"shutdown" }, \
+	{ (1UL << XFS_OPSTATE_INODE32),			"inode32" }, \
+	{ (1UL << XFS_OPSTATE_READONLY),		"read_only" }, \
+	{ (1UL << XFS_OPSTATE_INODEGC_ENABLED),		"inodegc" }, \
+	{ (1UL << XFS_OPSTATE_BLOCKGC_ENABLED),		"blockgc" }
 
 /*
  * Max and min values for mount-option defined I/O
@@ -265,9 +424,7 @@ typedef struct xfs_mount {
 #define XFS_MAX_IO_LOG		30	/* 1G */
 #define XFS_MIN_IO_LOG		PAGE_SHIFT
 
-#define XFS_LAST_UNMOUNT_WAS_CLEAN(mp)	\
-				((mp)->m_flags & XFS_MOUNT_WAS_CLEAN)
-#define XFS_FORCED_SHUTDOWN(mp)	((mp)->m_flags & XFS_MOUNT_FS_SHUTDOWN)
+#define xfs_is_shutdown(mp)		xfs_is_shutdown(mp)
 void xfs_do_force_shutdown(struct xfs_mount *mp, int flags, char *fname,
 		int lnnum);
 #define xfs_force_shutdown(m,f)	\
@@ -278,6 +435,12 @@ void xfs_do_force_shutdown(struct xfs_mount *mp, int flags, char *fname,
 #define SHUTDOWN_FORCE_UMOUNT	0x0004	/* shutdown from a forced unmount */
 #define SHUTDOWN_CORRUPT_INCORE	0x0008	/* corrupt in-memory data structures */
 
+#define XFS_SHUTDOWN_STRINGS \
+	{ SHUTDOWN_META_IO_ERROR,	"metadata_io" }, \
+	{ SHUTDOWN_LOG_IO_ERROR,	"log_io" }, \
+	{ SHUTDOWN_FORCE_UMOUNT,	"force_umount" }, \
+	{ SHUTDOWN_CORRUPT_INCORE,	"corruption" }
+
 /*
  * Flags for xfs_mountfs
  */
@@ -306,6 +469,15 @@ extern uint64_t xfs_default_resblks(xfs_mount_t *mp);
 extern int	xfs_mountfs(xfs_mount_t *mp);
 extern void	xfs_unmountfs(xfs_mount_t *);
 
+/*
+ * Deltas for the block count can vary from 1 to very large, but lock contention
+ * only occurs on frequent small block count updates such as in the delayed
+ * allocation path for buffered writes (page a time updates). Hence we set
+ * a large batch count (1024) to minimise global counter updates except when
+ * we get near to ENOSPC and we have to be very accurate with our updates.
+ */
+#define XFS_FDBLOCKS_BATCH	1024
+
 extern int	xfs_mod_fdblocks(struct xfs_mount *mp, int64_t delta,
 				 bool reserved);
 extern int	xfs_mod_frextents(struct xfs_mount *mp, int64_t delta);
@@ -325,6 +497,8 @@ int	xfs_zero_extent(struct xfs_inode *ip, xfs_fsblock_t start_fsb,
 struct xfs_error_cfg * xfs_error_get_cfg(struct xfs_mount *mp,
 		int error_class, int error);
 void xfs_force_summary_recalc(struct xfs_mount *mp);
+int xfs_add_incompat_log_feature(struct xfs_mount *mp, uint32_t feature);
+bool xfs_clear_incompat_log_features(struct xfs_mount *mp);
 void xfs_mod_delalloc(struct xfs_mount *mp, int64_t delta);
 
 #endif	/* __XFS_MOUNT_H__ */
diff --git a/fs/xfs/xfs_pnfs.c b/fs/xfs/xfs_pnfs.c
index 956cca24e67f..5e1d29d8b2e7 100644
--- a/fs/xfs/xfs_pnfs.c
+++ b/fs/xfs/xfs_pnfs.c
@@ -92,7 +92,7 @@ xfs_fs_map_blocks(
 	uint			lock_flags;
 	int			error = 0;
 
-	if (XFS_FORCED_SHUTDOWN(mp))
+	if (xfs_is_shutdown(mp))
 		return -EIO;
 
 	/*
diff --git a/fs/xfs/xfs_qm.c b/fs/xfs/xfs_qm.c
index fe341f3fd419..5608066d6e53 100644
--- a/fs/xfs/xfs_qm.c
+++ b/fs/xfs/xfs_qm.c
@@ -157,7 +157,7 @@ xfs_qm_dqpurge(
 	}
 
 	ASSERT(atomic_read(&dqp->q_pincount) == 0);
-	ASSERT(XFS_FORCED_SHUTDOWN(mp) ||
+	ASSERT(xfs_is_shutdown(mp) ||
 		!test_bit(XFS_LI_IN_AIL, &dqp->q_logitem.qli_item.li_flags));
 
 	xfs_dqfunlock(dqp);
@@ -185,17 +185,13 @@ out_unlock:
 /*
  * Purge the dquot cache.
  */
-void
+static void
 xfs_qm_dqpurge_all(
-	struct xfs_mount	*mp,
-	uint			flags)
+	struct xfs_mount	*mp)
 {
-	if (flags & XFS_QMOPT_UQUOTA)
-		xfs_qm_dquot_walk(mp, XFS_DQTYPE_USER, xfs_qm_dqpurge, NULL);
-	if (flags & XFS_QMOPT_GQUOTA)
-		xfs_qm_dquot_walk(mp, XFS_DQTYPE_GROUP, xfs_qm_dqpurge, NULL);
-	if (flags & XFS_QMOPT_PQUOTA)
-		xfs_qm_dquot_walk(mp, XFS_DQTYPE_PROJ, xfs_qm_dqpurge, NULL);
+	xfs_qm_dquot_walk(mp, XFS_DQTYPE_USER, xfs_qm_dqpurge, NULL);
+	xfs_qm_dquot_walk(mp, XFS_DQTYPE_GROUP, xfs_qm_dqpurge, NULL);
+	xfs_qm_dquot_walk(mp, XFS_DQTYPE_PROJ, xfs_qm_dqpurge, NULL);
 }
 
 /*
@@ -206,7 +202,7 @@ xfs_qm_unmount(
 	struct xfs_mount	*mp)
 {
 	if (mp->m_quotainfo) {
-		xfs_qm_dqpurge_all(mp, XFS_QMOPT_QUOTALL);
+		xfs_qm_dqpurge_all(mp);
 		xfs_qm_destroy_quotainfo(mp);
 	}
 }
@@ -299,8 +295,6 @@ xfs_qm_need_dqattach(
 {
 	struct xfs_mount	*mp = ip->i_mount;
 
-	if (!XFS_IS_QUOTA_RUNNING(mp))
-		return false;
 	if (!XFS_IS_QUOTA_ON(mp))
 		return false;
 	if (!XFS_NOT_DQATTACHED(mp, ip))
@@ -635,7 +629,7 @@ xfs_qm_init_quotainfo(
 	struct xfs_quotainfo	*qinf;
 	int			error;
 
-	ASSERT(XFS_IS_QUOTA_RUNNING(mp));
+	ASSERT(XFS_IS_QUOTA_ON(mp));
 
 	qinf = mp->m_quotainfo = kmem_zalloc(sizeof(struct xfs_quotainfo), 0);
 
@@ -662,7 +656,7 @@ xfs_qm_init_quotainfo(
 	/* Precalc some constants */
 	qinf->qi_dqchunklen = XFS_FSB_TO_BB(mp, XFS_DQUOT_CLUSTER_SIZE_FSB);
 	qinf->qi_dqperchunk = xfs_calc_dquots_per_chunk(qinf->qi_dqchunklen);
-	if (xfs_sb_version_hasbigtime(&mp->m_sb)) {
+	if (xfs_has_bigtime(mp)) {
 		qinf->qi_expiry_min =
 			xfs_dq_bigtime_to_unix(XFS_DQ_BIGTIME_EXPIRY_MIN);
 		qinf->qi_expiry_max =
@@ -680,11 +674,11 @@ xfs_qm_init_quotainfo(
 	xfs_qm_init_timelimits(mp, XFS_DQTYPE_GROUP);
 	xfs_qm_init_timelimits(mp, XFS_DQTYPE_PROJ);
 
-	if (XFS_IS_UQUOTA_RUNNING(mp))
+	if (XFS_IS_UQUOTA_ON(mp))
 		xfs_qm_set_defquota(mp, XFS_DQTYPE_USER, qinf);
-	if (XFS_IS_GQUOTA_RUNNING(mp))
+	if (XFS_IS_GQUOTA_ON(mp))
 		xfs_qm_set_defquota(mp, XFS_DQTYPE_GROUP, qinf);
-	if (XFS_IS_PQUOTA_RUNNING(mp))
+	if (XFS_IS_PQUOTA_ON(mp))
 		xfs_qm_set_defquota(mp, XFS_DQTYPE_PROJ, qinf);
 
 	qinf->qi_shrinker.count_objects = xfs_qm_shrink_count;
@@ -755,7 +749,7 @@ xfs_qm_qino_alloc(
 	 * with PQUOTA, just use sb_gquotino for sb_pquotino and
 	 * vice-versa.
 	 */
-	if (!xfs_sb_version_has_pquotino(&mp->m_sb) &&
+	if (!xfs_has_pquotino(mp) &&
 			(flags & (XFS_QMOPT_PQUOTA|XFS_QMOPT_GQUOTA))) {
 		xfs_ino_t ino = NULLFSINO;
 
@@ -808,9 +802,9 @@ xfs_qm_qino_alloc(
 	 */
 	spin_lock(&mp->m_sb_lock);
 	if (flags & XFS_QMOPT_SBVERSION) {
-		ASSERT(!xfs_sb_version_hasquota(&mp->m_sb));
+		ASSERT(!xfs_has_quota(mp));
 
-		xfs_sb_version_addquota(&mp->m_sb);
+		xfs_add_quota(mp);
 		mp->m_sb.sb_uquotino = NULLFSINO;
 		mp->m_sb.sb_gquotino = NULLFSINO;
 		mp->m_sb.sb_pquotino = NULLFSINO;
@@ -829,7 +823,7 @@ xfs_qm_qino_alloc(
 
 	error = xfs_trans_commit(tp);
 	if (error) {
-		ASSERT(XFS_FORCED_SHUTDOWN(mp));
+		ASSERT(xfs_is_shutdown(mp));
 		xfs_alert(mp, "%s failed (error %d)!", __func__, error);
 	}
 	if (need_alloc)
@@ -896,11 +890,11 @@ xfs_qm_reset_dqcounts(
 			ddq->d_bwarns = 0;
 			ddq->d_iwarns = 0;
 			ddq->d_rtbwarns = 0;
-			if (xfs_sb_version_hasbigtime(&mp->m_sb))
+			if (xfs_has_bigtime(mp))
 				ddq->d_type |= XFS_DQTYPE_BIGTIME;
 		}
 
-		if (xfs_sb_version_hascrc(&mp->m_sb)) {
+		if (xfs_has_crc(mp)) {
 			xfs_update_cksum((char *)&dqb[j],
 					 sizeof(struct xfs_dqblk),
 					 XFS_DQUOT_CRC_OFF);
@@ -1147,7 +1141,7 @@ xfs_qm_dqusage_adjust(
 	xfs_filblks_t		rtblks = 0;	/* total rt blks */
 	int			error;
 
-	ASSERT(XFS_IS_QUOTA_RUNNING(mp));
+	ASSERT(XFS_IS_QUOTA_ON(mp));
 
 	/*
 	 * rootino must have its resources accounted for, not so with the quota
@@ -1288,7 +1282,7 @@ xfs_qm_quotacheck(
 	flags = 0;
 
 	ASSERT(uip || gip || pip);
-	ASSERT(XFS_IS_QUOTA_RUNNING(mp));
+	ASSERT(XFS_IS_QUOTA_ON(mp));
 
 	xfs_notice(mp, "Quotacheck needed: Please wait.");
 
@@ -1359,7 +1353,7 @@ xfs_qm_quotacheck(
 	 * at this point (because we intentionally didn't in dqget_noattach).
 	 */
 	if (error) {
-		xfs_qm_dqpurge_all(mp, XFS_QMOPT_QUOTALL);
+		xfs_qm_dqpurge_all(mp);
 		goto error_return;
 	}
 
@@ -1418,7 +1412,7 @@ xfs_qm_mount_quotas(
 		goto write_changes;
 	}
 
-	ASSERT(XFS_IS_QUOTA_RUNNING(mp));
+	ASSERT(XFS_IS_QUOTA_ON(mp));
 
 	/*
 	 * Allocate the quotainfo structure inside the mount struct, and
@@ -1473,7 +1467,7 @@ xfs_qm_mount_quotas(
 			 * the incore structures are convinced that quotas are
 			 * off, but the on disk superblock doesn't know that !
 			 */
-			ASSERT(!(XFS_IS_QUOTA_RUNNING(mp)));
+			ASSERT(!(XFS_IS_QUOTA_ON(mp)));
 			xfs_alert(mp, "%s: Superblock update failed!",
 				__func__);
 		}
@@ -1504,7 +1498,7 @@ xfs_qm_init_quotainos(
 	/*
 	 * Get the uquota and gquota inodes
 	 */
-	if (xfs_sb_version_hasquota(&mp->m_sb)) {
+	if (xfs_has_quota(mp)) {
 		if (XFS_IS_UQUOTA_ON(mp) &&
 		    mp->m_sb.sb_uquotino != NULLFSINO) {
 			ASSERT(mp->m_sb.sb_uquotino > 0);
@@ -1645,7 +1639,7 @@ xfs_qm_vop_dqalloc(
 	int			error;
 	uint			lockflags;
 
-	if (!XFS_IS_QUOTA_RUNNING(mp) || !XFS_IS_QUOTA_ON(mp))
+	if (!XFS_IS_QUOTA_ON(mp))
 		return 0;
 
 	lockflags = XFS_ILOCK_EXCL;
@@ -1776,7 +1770,7 @@ xfs_qm_vop_chown(
 
 
 	ASSERT(xfs_isilocked(ip, XFS_ILOCK_EXCL));
-	ASSERT(XFS_IS_QUOTA_RUNNING(ip->i_mount));
+	ASSERT(XFS_IS_QUOTA_ON(ip->i_mount));
 
 	/* old dquot */
 	prevdq = *IO_olddq;
@@ -1829,7 +1823,7 @@ xfs_qm_vop_rename_dqattach(
 	struct xfs_mount	*mp = i_tab[0]->i_mount;
 	int			i;
 
-	if (!XFS_IS_QUOTA_RUNNING(mp) || !XFS_IS_QUOTA_ON(mp))
+	if (!XFS_IS_QUOTA_ON(mp))
 		return 0;
 
 	for (i = 0; (i < 4 && i_tab[i]); i++) {
@@ -1860,7 +1854,7 @@ xfs_qm_vop_create_dqattach(
 {
 	struct xfs_mount	*mp = tp->t_mountp;
 
-	if (!XFS_IS_QUOTA_RUNNING(mp) || !XFS_IS_QUOTA_ON(mp))
+	if (!XFS_IS_QUOTA_ON(mp))
 		return;
 
 	ASSERT(xfs_isilocked(ip, XFS_ILOCK_EXCL));
@@ -1888,3 +1882,37 @@ xfs_qm_vop_create_dqattach(
 	}
 }
 
+/* Decide if this inode's dquot is near an enforcement boundary. */
+bool
+xfs_inode_near_dquot_enforcement(
+	struct xfs_inode	*ip,
+	xfs_dqtype_t		type)
+{
+	struct xfs_dquot	*dqp;
+	int64_t			freesp;
+
+	/* We only care for quotas that are enabled and enforced. */
+	dqp = xfs_inode_dquot(ip, type);
+	if (!dqp || !xfs_dquot_is_enforced(dqp))
+		return false;
+
+	if (xfs_dquot_res_over_limits(&dqp->q_ino) ||
+	    xfs_dquot_res_over_limits(&dqp->q_rtb))
+		return true;
+
+	/* For space on the data device, check the various thresholds. */
+	if (!dqp->q_prealloc_hi_wmark)
+		return false;
+
+	if (dqp->q_blk.reserved < dqp->q_prealloc_lo_wmark)
+		return false;
+
+	if (dqp->q_blk.reserved >= dqp->q_prealloc_hi_wmark)
+		return true;
+
+	freesp = dqp->q_prealloc_hi_wmark - dqp->q_blk.reserved;
+	if (freesp < dqp->q_low_space[XFS_QLOWSP_5_PCNT])
+		return true;
+
+	return false;
+}
diff --git a/fs/xfs/xfs_qm.h b/fs/xfs/xfs_qm.h
index ebbb484c49dc..442a0f97a9d4 100644
--- a/fs/xfs/xfs_qm.h
+++ b/fs/xfs/xfs_qm.h
@@ -140,9 +140,6 @@ struct xfs_dquot_acct {
 
 extern void		xfs_qm_destroy_quotainfo(struct xfs_mount *);
 
-/* dquot stuff */
-extern void		xfs_qm_dqpurge_all(struct xfs_mount *, uint);
-
 /* quota ops */
 extern int		xfs_qm_scall_trunc_qfiles(struct xfs_mount *, uint);
 extern int		xfs_qm_scall_getquota(struct xfs_mount *mp,
diff --git a/fs/xfs/xfs_qm_bhv.c b/fs/xfs/xfs_qm_bhv.c
index df00dfbf5c9d..b77673dd0558 100644
--- a/fs/xfs/xfs_qm_bhv.c
+++ b/fs/xfs/xfs_qm_bhv.c
@@ -75,7 +75,7 @@ xfs_qm_newmount(
 	uint		quotaondisk;
 	uint		uquotaondisk = 0, gquotaondisk = 0, pquotaondisk = 0;
 
-	quotaondisk = xfs_sb_version_hasquota(&mp->m_sb) &&
+	quotaondisk = xfs_has_quota(mp) &&
 				(mp->m_sb.sb_qflags & XFS_ALL_QUOTA_ACCT);
 
 	if (quotaondisk) {
diff --git a/fs/xfs/xfs_qm_syscalls.c b/fs/xfs/xfs_qm_syscalls.c
index 13a56e1ea15c..47fe60e1a887 100644
--- a/fs/xfs/xfs_qm_syscalls.c
+++ b/fs/xfs/xfs_qm_syscalls.c
@@ -19,91 +19,11 @@
 #include "xfs_qm.h"
 #include "xfs_icache.h"
 
-STATIC int
-xfs_qm_log_quotaoff(
-	struct xfs_mount	*mp,
-	struct xfs_qoff_logitem	**qoffstartp,
-	uint			flags)
-{
-	struct xfs_trans	*tp;
-	int			error;
-	struct xfs_qoff_logitem	*qoffi;
-
-	error = xfs_trans_alloc(mp, &M_RES(mp)->tr_qm_quotaoff, 0, 0, 0, &tp);
-	if (error)
-		goto out;
-
-	qoffi = xfs_trans_get_qoff_item(tp, NULL, flags & XFS_ALL_QUOTA_ACCT);
-	xfs_trans_log_quotaoff_item(tp, qoffi);
-
-	spin_lock(&mp->m_sb_lock);
-	mp->m_sb.sb_qflags = (mp->m_qflags & ~(flags)) & XFS_MOUNT_QUOTA_ALL;
-	spin_unlock(&mp->m_sb_lock);
-
-	xfs_log_sb(tp);
-
-	/*
-	 * We have to make sure that the transaction is secure on disk before we
-	 * return and actually stop quota accounting. So, make it synchronous.
-	 * We don't care about quotoff's performance.
-	 */
-	xfs_trans_set_sync(tp);
-	error = xfs_trans_commit(tp);
-	if (error)
-		goto out;
-
-	*qoffstartp = qoffi;
-out:
-	return error;
-}
-
-STATIC int
-xfs_qm_log_quotaoff_end(
-	struct xfs_mount	*mp,
-	struct xfs_qoff_logitem	**startqoff,
-	uint			flags)
-{
-	struct xfs_trans	*tp;
-	int			error;
-	struct xfs_qoff_logitem	*qoffi;
-
-	error = xfs_trans_alloc(mp, &M_RES(mp)->tr_qm_equotaoff, 0, 0, 0, &tp);
-	if (error)
-		return error;
-
-	qoffi = xfs_trans_get_qoff_item(tp, *startqoff,
-					flags & XFS_ALL_QUOTA_ACCT);
-	xfs_trans_log_quotaoff_item(tp, qoffi);
-	*startqoff = NULL;
-
-	/*
-	 * We have to make sure that the transaction is secure on disk before we
-	 * return and actually stop quota accounting. So, make it synchronous.
-	 * We don't care about quotoff's performance.
-	 */
-	xfs_trans_set_sync(tp);
-	return xfs_trans_commit(tp);
-}
-
-/*
- * Turn off quota accounting and/or enforcement for all udquots and/or
- * gdquots. Called only at unmount time.
- *
- * This assumes that there are no dquots of this file system cached
- * incore, and modifies the ondisk dquot directly. Therefore, for example,
- * it is an error to call this twice, without purging the cache.
- */
 int
 xfs_qm_scall_quotaoff(
 	xfs_mount_t		*mp,
 	uint			flags)
 {
-	struct xfs_quotainfo	*q = mp->m_quotainfo;
-	uint			dqtype;
-	int			error;
-	uint			inactivate_flags;
-	struct xfs_qoff_logitem	*qoffstart = NULL;
-
 	/*
 	 * No file system can have quotas enabled on disk but not in core.
 	 * Note that quota utilities (like quotaoff) _expect_
@@ -111,160 +31,23 @@ xfs_qm_scall_quotaoff(
 	 */
 	if ((mp->m_qflags & flags) == 0)
 		return -EEXIST;
-	error = 0;
-
-	flags &= (XFS_ALL_QUOTA_ACCT | XFS_ALL_QUOTA_ENFD);
-
-	/*
-	 * We don't want to deal with two quotaoffs messing up each other,
-	 * so we're going to serialize it. quotaoff isn't exactly a performance
-	 * critical thing.
-	 * If quotaoff, then we must be dealing with the root filesystem.
-	 */
-	ASSERT(q);
-	mutex_lock(&q->qi_quotaofflock);
 
 	/*
-	 * If we're just turning off quota enforcement, change mp and go.
+	 * We do not support actually turning off quota accounting any more.
+	 * Just log a warning and ignore the accounting related flags.
 	 */
-	if ((flags & XFS_ALL_QUOTA_ACCT) == 0) {
-		mp->m_qflags &= ~(flags);
+	if (flags & XFS_ALL_QUOTA_ACCT)
+		xfs_info(mp, "disabling of quota accounting not supported.");
 
-		spin_lock(&mp->m_sb_lock);
-		mp->m_sb.sb_qflags = mp->m_qflags;
-		spin_unlock(&mp->m_sb_lock);
-		mutex_unlock(&q->qi_quotaofflock);
-
-		/* XXX what to do if error ? Revert back to old vals incore ? */
-		return xfs_sync_sb(mp, false);
-	}
-
-	dqtype = 0;
-	inactivate_flags = 0;
-	/*
-	 * If accounting is off, we must turn enforcement off, clear the
-	 * quota 'CHKD' certificate to make it known that we have to
-	 * do a quotacheck the next time this quota is turned on.
-	 */
-	if (flags & XFS_UQUOTA_ACCT) {
-		dqtype |= XFS_QMOPT_UQUOTA;
-		flags |= (XFS_UQUOTA_CHKD | XFS_UQUOTA_ENFD);
-		inactivate_flags |= XFS_UQUOTA_ACTIVE;
-	}
-	if (flags & XFS_GQUOTA_ACCT) {
-		dqtype |= XFS_QMOPT_GQUOTA;
-		flags |= (XFS_GQUOTA_CHKD | XFS_GQUOTA_ENFD);
-		inactivate_flags |= XFS_GQUOTA_ACTIVE;
-	}
-	if (flags & XFS_PQUOTA_ACCT) {
-		dqtype |= XFS_QMOPT_PQUOTA;
-		flags |= (XFS_PQUOTA_CHKD | XFS_PQUOTA_ENFD);
-		inactivate_flags |= XFS_PQUOTA_ACTIVE;
-	}
-
-	/*
-	 * Nothing to do?  Don't complain. This happens when we're just
-	 * turning off quota enforcement.
-	 */
-	if ((mp->m_qflags & flags) == 0)
-		goto out_unlock;
-
-	/*
-	 * Write the LI_QUOTAOFF log record, and do SB changes atomically,
-	 * and synchronously. If we fail to write, we should abort the
-	 * operation as it cannot be recovered safely if we crash.
-	 */
-	error = xfs_qm_log_quotaoff(mp, &qoffstart, flags);
-	if (error)
-		goto out_unlock;
-
-	/*
-	 * Next we clear the XFS_MOUNT_*DQ_ACTIVE bit(s) in the mount struct
-	 * to take care of the race between dqget and quotaoff. We don't take
-	 * any special locks to reset these bits. All processes need to check
-	 * these bits *after* taking inode lock(s) to see if the particular
-	 * quota type is in the process of being turned off. If *ACTIVE, it is
-	 * guaranteed that all dquot structures and all quotainode ptrs will all
-	 * stay valid as long as that inode is kept locked.
-	 *
-	 * There is no turning back after this.
-	 */
-	mp->m_qflags &= ~inactivate_flags;
-
-	/*
-	 * Give back all the dquot reference(s) held by inodes.
-	 * Here we go thru every single incore inode in this file system, and
-	 * do a dqrele on the i_udquot/i_gdquot that it may have.
-	 * Essentially, as long as somebody has an inode locked, this guarantees
-	 * that quotas will not be turned off. This is handy because in a
-	 * transaction once we lock the inode(s) and check for quotaon, we can
-	 * depend on the quota inodes (and other things) being valid as long as
-	 * we keep the lock(s).
-	 */
-	error = xfs_dqrele_all_inodes(mp, flags);
-	ASSERT(!error);
-
-	/*
-	 * Next we make the changes in the quota flag in the mount struct.
-	 * This isn't protected by a particular lock directly, because we
-	 * don't want to take a mrlock every time we depend on quotas being on.
-	 */
-	mp->m_qflags &= ~flags;
-
-	/*
-	 * Go through all the dquots of this file system and purge them,
-	 * according to what was turned off.
-	 */
-	xfs_qm_dqpurge_all(mp, dqtype);
-
-	/*
-	 * Transactions that had started before ACTIVE state bit was cleared
-	 * could have logged many dquots, so they'd have higher LSNs than
-	 * the first QUOTAOFF log record does. If we happen to crash when
-	 * the tail of the log has gone past the QUOTAOFF record, but
-	 * before the last dquot modification, those dquots __will__
-	 * recover, and that's not good.
-	 *
-	 * So, we have QUOTAOFF start and end logitems; the start
-	 * logitem won't get overwritten until the end logitem appears...
-	 */
-	error = xfs_qm_log_quotaoff_end(mp, &qoffstart, flags);
-	if (error) {
-		/* We're screwed now. Shutdown is the only option. */
-		xfs_force_shutdown(mp, SHUTDOWN_CORRUPT_INCORE);
-		goto out_unlock;
-	}
-
-	/*
-	 * If all quotas are completely turned off, close shop.
-	 */
-	if (mp->m_qflags == 0) {
-		mutex_unlock(&q->qi_quotaofflock);
-		xfs_qm_destroy_quotainfo(mp);
-		return 0;
-	}
-
-	/*
-	 * Release our quotainode references if we don't need them anymore.
-	 */
-	if ((dqtype & XFS_QMOPT_UQUOTA) && q->qi_uquotaip) {
-		xfs_irele(q->qi_uquotaip);
-		q->qi_uquotaip = NULL;
-	}
-	if ((dqtype & XFS_QMOPT_GQUOTA) && q->qi_gquotaip) {
-		xfs_irele(q->qi_gquotaip);
-		q->qi_gquotaip = NULL;
-	}
-	if ((dqtype & XFS_QMOPT_PQUOTA) && q->qi_pquotaip) {
-		xfs_irele(q->qi_pquotaip);
-		q->qi_pquotaip = NULL;
-	}
+	mutex_lock(&mp->m_quotainfo->qi_quotaofflock);
+	mp->m_qflags &= ~(flags & XFS_ALL_QUOTA_ENFD);
+	spin_lock(&mp->m_sb_lock);
+	mp->m_sb.sb_qflags = mp->m_qflags;
+	spin_unlock(&mp->m_sb_lock);
+	mutex_unlock(&mp->m_quotainfo->qi_quotaofflock);
 
-out_unlock:
-	if (error && qoffstart)
-		xfs_qm_qoff_logitem_relse(qoffstart);
-	mutex_unlock(&q->qi_quotaofflock);
-	return error;
+	/* XXX what to do if error ? Revert back to old vals incore ? */
+	return xfs_sync_sb(mp, false);
 }
 
 STATIC int
@@ -322,7 +105,7 @@ xfs_qm_scall_trunc_qfiles(
 {
 	int		error = -EINVAL;
 
-	if (!xfs_sb_version_hasquota(&mp->m_sb) || flags == 0 ||
+	if (!xfs_has_quota(mp) || flags == 0 ||
 	    (flags & ~XFS_QMOPT_QUOTALL)) {
 		xfs_debug(mp, "%s: flags=%x m_qflags=%x",
 			__func__, flags, mp->m_qflags);
@@ -421,7 +204,7 @@ xfs_qm_scall_quotaon(
 	     (mp->m_qflags & XFS_GQUOTA_ACCT)))
 		return 0;
 
-	if (! XFS_IS_QUOTA_RUNNING(mp))
+	if (!XFS_IS_QUOTA_ON(mp))
 		return -ESRCH;
 
 	/*
@@ -698,6 +481,10 @@ xfs_qm_scall_getquota(
 	struct xfs_dquot	*dqp;
 	int			error;
 
+	/* Flush inodegc work at the start of a quota reporting scan. */
+	if (id == 0)
+		xfs_inodegc_flush(mp);
+
 	/*
 	 * Try to get the dquot. We don't want it allocated on disk, so don't
 	 * set doalloc. If it doesn't exist, we'll get ENOENT back.
@@ -736,6 +523,10 @@ xfs_qm_scall_getquota_next(
 	struct xfs_dquot	*dqp;
 	int			error;
 
+	/* Flush inodegc work at the start of a quota reporting scan. */
+	if (*id == 0)
+		xfs_inodegc_flush(mp);
+
 	error = xfs_qm_dqget_next(mp, *id, type, &dqp);
 	if (error)
 		return error;
diff --git a/fs/xfs/xfs_quota.h b/fs/xfs/xfs_quota.h
index d00d01302545..dcc785fdd345 100644
--- a/fs/xfs/xfs_quota.h
+++ b/fs/xfs/xfs_quota.h
@@ -113,6 +113,7 @@ xfs_quota_reserve_blkres(struct xfs_inode *ip, int64_t blocks)
 {
 	return xfs_trans_reserve_quota_nblks(NULL, ip, blocks, 0, false);
 }
+bool xfs_inode_near_dquot_enforcement(struct xfs_inode *ip, xfs_dqtype_t type);
 #else
 static inline int
 xfs_qm_vop_dqalloc(struct xfs_inode *ip, kuid_t kuid, kgid_t kgid,
@@ -168,6 +169,7 @@ xfs_trans_reserve_quota_icreate(struct xfs_trans *tp, struct xfs_dquot *udqp,
 #define xfs_qm_mount_quotas(mp)
 #define xfs_qm_unmount(mp)
 #define xfs_qm_unmount_quotas(mp)
+#define xfs_inode_near_dquot_enforcement(ip, type)			(false)
 #endif /* CONFIG_XFS_QUOTA */
 
 static inline int
diff --git a/fs/xfs/xfs_quotaops.c b/fs/xfs/xfs_quotaops.c
index 88d70c236a54..07989bd67728 100644
--- a/fs/xfs/xfs_quotaops.c
+++ b/fs/xfs/xfs_quotaops.c
@@ -60,18 +60,18 @@ xfs_fs_get_quota_state(
 	struct xfs_quotainfo *q = mp->m_quotainfo;
 
 	memset(state, 0, sizeof(*state));
-	if (!XFS_IS_QUOTA_RUNNING(mp))
+	if (!XFS_IS_QUOTA_ON(mp))
 		return 0;
 	state->s_incoredqs = q->qi_dquots;
-	if (XFS_IS_UQUOTA_RUNNING(mp))
+	if (XFS_IS_UQUOTA_ON(mp))
 		state->s_state[USRQUOTA].flags |= QCI_ACCT_ENABLED;
 	if (XFS_IS_UQUOTA_ENFORCED(mp))
 		state->s_state[USRQUOTA].flags |= QCI_LIMITS_ENFORCED;
-	if (XFS_IS_GQUOTA_RUNNING(mp))
+	if (XFS_IS_GQUOTA_ON(mp))
 		state->s_state[GRPQUOTA].flags |= QCI_ACCT_ENABLED;
 	if (XFS_IS_GQUOTA_ENFORCED(mp))
 		state->s_state[GRPQUOTA].flags |= QCI_LIMITS_ENFORCED;
-	if (XFS_IS_PQUOTA_RUNNING(mp))
+	if (XFS_IS_PQUOTA_ON(mp))
 		state->s_state[PRJQUOTA].flags |= QCI_ACCT_ENABLED;
 	if (XFS_IS_PQUOTA_ENFORCED(mp))
 		state->s_state[PRJQUOTA].flags |= QCI_LIMITS_ENFORCED;
@@ -114,10 +114,8 @@ xfs_fs_set_info(
 
 	if (sb_rdonly(sb))
 		return -EROFS;
-	if (!XFS_IS_QUOTA_RUNNING(mp))
-		return -ENOSYS;
 	if (!XFS_IS_QUOTA_ON(mp))
-		return -ESRCH;
+		return -ENOSYS;
 	if (info->i_fieldmask & ~XFS_QC_SETINFO_MASK)
 		return -EINVAL;
 	if ((info->i_fieldmask & XFS_QC_SETINFO_MASK) == 0)
@@ -164,7 +162,7 @@ xfs_quota_enable(
 
 	if (sb_rdonly(sb))
 		return -EROFS;
-	if (!XFS_IS_QUOTA_RUNNING(mp))
+	if (!XFS_IS_QUOTA_ON(mp))
 		return -ENOSYS;
 
 	return xfs_qm_scall_quotaon(mp, xfs_quota_flags(uflags));
@@ -179,10 +177,8 @@ xfs_quota_disable(
 
 	if (sb_rdonly(sb))
 		return -EROFS;
-	if (!XFS_IS_QUOTA_RUNNING(mp))
-		return -ENOSYS;
 	if (!XFS_IS_QUOTA_ON(mp))
-		return -EINVAL;
+		return -ENOSYS;
 
 	return xfs_qm_scall_quotaoff(mp, xfs_quota_flags(uflags));
 }
@@ -223,10 +219,8 @@ xfs_fs_get_dqblk(
 	struct xfs_mount	*mp = XFS_M(sb);
 	xfs_dqid_t		id;
 
-	if (!XFS_IS_QUOTA_RUNNING(mp))
-		return -ENOSYS;
 	if (!XFS_IS_QUOTA_ON(mp))
-		return -ESRCH;
+		return -ENOSYS;
 
 	id = from_kqid(&init_user_ns, qid);
 	return xfs_qm_scall_getquota(mp, id, xfs_quota_type(qid.type), qdq);
@@ -243,10 +237,8 @@ xfs_fs_get_nextdqblk(
 	struct xfs_mount	*mp = XFS_M(sb);
 	xfs_dqid_t		id;
 
-	if (!XFS_IS_QUOTA_RUNNING(mp))
-		return -ENOSYS;
 	if (!XFS_IS_QUOTA_ON(mp))
-		return -ESRCH;
+		return -ENOSYS;
 
 	id = from_kqid(&init_user_ns, *qid);
 	ret = xfs_qm_scall_getquota_next(mp, &id, xfs_quota_type(qid->type),
@@ -269,10 +261,8 @@ xfs_fs_set_dqblk(
 
 	if (sb_rdonly(sb))
 		return -EROFS;
-	if (!XFS_IS_QUOTA_RUNNING(mp))
-		return -ENOSYS;
 	if (!XFS_IS_QUOTA_ON(mp))
-		return -ESRCH;
+		return -ENOSYS;
 
 	return xfs_qm_scall_setqlim(mp, from_kqid(&init_user_ns, qid),
 				     xfs_quota_type(qid.type), qdq);
diff --git a/fs/xfs/xfs_refcount_item.c b/fs/xfs/xfs_refcount_item.c
index 746f4eda724c..46904b793bd4 100644
--- a/fs/xfs/xfs_refcount_item.c
+++ b/fs/xfs/xfs_refcount_item.c
@@ -423,7 +423,7 @@ xfs_cui_validate_phys(
 	struct xfs_mount		*mp,
 	struct xfs_phys_extent		*refc)
 {
-	if (!xfs_sb_version_hasreflink(&mp->m_sb))
+	if (!xfs_has_reflink(mp))
 		return false;
 
 	if (refc->pe_flags & ~XFS_REFCOUNT_EXTENT_FLAGS)
@@ -522,6 +522,9 @@ xfs_cui_item_recover(
 			error = xfs_trans_log_finish_refcount_update(tp, cudp,
 				type, refc->pe_startblock, refc->pe_len,
 				&new_fsb, &new_len, &rcur);
+		if (error == -EFSCORRUPTED)
+			XFS_CORRUPTION_ERROR(__func__, XFS_ERRLEVEL_LOW, mp,
+					refc, sizeof(*refc));
 		if (error)
 			goto abort_error;
 
diff --git a/fs/xfs/xfs_reflink.c b/fs/xfs/xfs_reflink.c
index c256104772cb..76355f293488 100644
--- a/fs/xfs/xfs_reflink.c
+++ b/fs/xfs/xfs_reflink.c
@@ -759,7 +759,7 @@ xfs_reflink_recover_cow(
 	xfs_agnumber_t		agno;
 	int			error = 0;
 
-	if (!xfs_sb_version_hasreflink(&mp->m_sb))
+	if (!xfs_has_reflink(mp))
 		return 0;
 
 	for_each_perag(mp, agno, pag) {
@@ -967,7 +967,7 @@ xfs_reflink_ag_has_free_space(
 	struct xfs_perag	*pag;
 	int			error = 0;
 
-	if (!xfs_sb_version_hasrmapbt(&mp->m_sb))
+	if (!xfs_has_rmapbt(mp))
 		return 0;
 
 	pag = xfs_perag_get(mp, agno);
diff --git a/fs/xfs/xfs_reflink.h b/fs/xfs/xfs_reflink.h
index 487b00434b96..bea65f2fe657 100644
--- a/fs/xfs/xfs_reflink.h
+++ b/fs/xfs/xfs_reflink.h
@@ -8,8 +8,7 @@
 
 static inline bool xfs_is_always_cow_inode(struct xfs_inode *ip)
 {
-	return ip->i_mount->m_always_cow &&
-		xfs_sb_version_hasreflink(&ip->i_mount->m_sb);
+	return ip->i_mount->m_always_cow && xfs_has_reflink(ip->i_mount);
 }
 
 static inline bool xfs_is_cow_inode(struct xfs_inode *ip)
diff --git a/fs/xfs/xfs_rmap_item.c b/fs/xfs/xfs_rmap_item.c
index dc4f0c9f0897..5f0695980467 100644
--- a/fs/xfs/xfs_rmap_item.c
+++ b/fs/xfs/xfs_rmap_item.c
@@ -466,7 +466,7 @@ xfs_rui_validate_map(
 	struct xfs_mount		*mp,
 	struct xfs_map_extent		*rmap)
 {
-	if (!xfs_sb_version_hasrmapbt(&mp->m_sb))
+	if (!xfs_has_rmapbt(mp))
 		return false;
 
 	if (rmap->me_flags & ~XFS_RMAP_EXTENT_FLAGS)
@@ -578,6 +578,9 @@ xfs_rui_item_recover(
 				rmap->me_owner, whichfork,
 				rmap->me_startoff, rmap->me_startblock,
 				rmap->me_len, state, &rcur);
+		if (error == -EFSCORRUPTED)
+			XFS_CORRUPTION_ERROR(__func__, XFS_ERRLEVEL_LOW, mp,
+					rmap, sizeof(*rmap));
 		if (error)
 			goto abort_error;
 
diff --git a/fs/xfs/xfs_rtalloc.c b/fs/xfs/xfs_rtalloc.c
index 699066fb9052..b8c79ee791af 100644
--- a/fs/xfs/xfs_rtalloc.c
+++ b/fs/xfs/xfs_rtalloc.c
@@ -951,8 +951,7 @@ xfs_growfs_rt(
 		return -EINVAL;
 
 	/* Unsupported realtime features. */
-	if (xfs_sb_version_hasrmapbt(&mp->m_sb) ||
-	    xfs_sb_version_hasreflink(&mp->m_sb))
+	if (xfs_has_rmapbt(mp) || xfs_has_reflink(mp))
 		return -EOPNOTSUPP;
 
 	nrblocks = in->newblocks;
@@ -1131,6 +1130,9 @@ error_cancel:
 		error = xfs_trans_commit(tp);
 		if (error)
 			break;
+
+		/* Ensure the mount RT feature flag is now set. */
+		mp->m_features |= XFS_FEAT_REALTIME;
 	}
 	if (error)
 		goto out_free;
diff --git a/fs/xfs/xfs_rtalloc.h b/fs/xfs/xfs_rtalloc.h
index ed885620589c..91b00289509b 100644
--- a/fs/xfs/xfs_rtalloc.h
+++ b/fs/xfs/xfs_rtalloc.h
@@ -22,9 +22,9 @@ struct xfs_rtalloc_rec {
 };
 
 typedef int (*xfs_rtalloc_query_range_fn)(
-	struct xfs_trans	*tp,
-	struct xfs_rtalloc_rec	*rec,
-	void			*priv);
+	struct xfs_trans		*tp,
+	const struct xfs_rtalloc_rec	*rec,
+	void				*priv);
 
 #ifdef CONFIG_XFS_RT
 /*
@@ -124,10 +124,9 @@ int xfs_rtfree_range(struct xfs_mount *mp, struct xfs_trans *tp,
 		     xfs_rtblock_t start, xfs_extlen_t len,
 		     struct xfs_buf **rbpp, xfs_fsblock_t *rsb);
 int xfs_rtalloc_query_range(struct xfs_trans *tp,
-			    struct xfs_rtalloc_rec *low_rec,
-			    struct xfs_rtalloc_rec *high_rec,
-			    xfs_rtalloc_query_range_fn fn,
-			    void *priv);
+		const struct xfs_rtalloc_rec *low_rec,
+		const struct xfs_rtalloc_rec *high_rec,
+		xfs_rtalloc_query_range_fn fn, void *priv);
 int xfs_rtalloc_query_all(struct xfs_trans *tp,
 			  xfs_rtalloc_query_range_fn fn,
 			  void *priv);
diff --git a/fs/xfs/xfs_super.c b/fs/xfs/xfs_super.c
index 102cbd606633..c4e0cd1c1c8c 100644
--- a/fs/xfs/xfs_super.c
+++ b/fs/xfs/xfs_super.c
@@ -49,6 +49,28 @@ static struct kset *xfs_kset;		/* top-level xfs sysfs dir */
 static struct xfs_kobj xfs_dbg_kobj;	/* global debug sysfs attrs */
 #endif
 
+#ifdef CONFIG_HOTPLUG_CPU
+static LIST_HEAD(xfs_mount_list);
+static DEFINE_SPINLOCK(xfs_mount_list_lock);
+
+static inline void xfs_mount_list_add(struct xfs_mount *mp)
+{
+	spin_lock(&xfs_mount_list_lock);
+	list_add(&mp->m_mount_list, &xfs_mount_list);
+	spin_unlock(&xfs_mount_list_lock);
+}
+
+static inline void xfs_mount_list_del(struct xfs_mount *mp)
+{
+	spin_lock(&xfs_mount_list_lock);
+	list_del(&mp->m_mount_list);
+	spin_unlock(&xfs_mount_list_lock);
+}
+#else /* !CONFIG_HOTPLUG_CPU */
+static inline void xfs_mount_list_add(struct xfs_mount *mp) {}
+static inline void xfs_mount_list_del(struct xfs_mount *mp) {}
+#endif
+
 enum xfs_dax_mode {
 	XFS_DAX_INODE = 0,
 	XFS_DAX_ALWAYS = 1,
@@ -62,15 +84,15 @@ xfs_mount_set_dax_mode(
 {
 	switch (mode) {
 	case XFS_DAX_INODE:
-		mp->m_flags &= ~(XFS_MOUNT_DAX_ALWAYS | XFS_MOUNT_DAX_NEVER);
+		mp->m_features &= ~(XFS_FEAT_DAX_ALWAYS | XFS_FEAT_DAX_NEVER);
 		break;
 	case XFS_DAX_ALWAYS:
-		mp->m_flags |= XFS_MOUNT_DAX_ALWAYS;
-		mp->m_flags &= ~XFS_MOUNT_DAX_NEVER;
+		mp->m_features |= XFS_FEAT_DAX_ALWAYS;
+		mp->m_features &= ~XFS_FEAT_DAX_NEVER;
 		break;
 	case XFS_DAX_NEVER:
-		mp->m_flags |= XFS_MOUNT_DAX_NEVER;
-		mp->m_flags &= ~XFS_MOUNT_DAX_ALWAYS;
+		mp->m_features |= XFS_FEAT_DAX_NEVER;
+		mp->m_features &= ~XFS_FEAT_DAX_ALWAYS;
 		break;
 	}
 }
@@ -154,33 +176,32 @@ xfs_fs_show_options(
 {
 	static struct proc_xfs_info xfs_info_set[] = {
 		/* the few simple ones we can get from the mount struct */
-		{ XFS_MOUNT_IKEEP,		",ikeep" },
-		{ XFS_MOUNT_WSYNC,		",wsync" },
-		{ XFS_MOUNT_NOALIGN,		",noalign" },
-		{ XFS_MOUNT_SWALLOC,		",swalloc" },
-		{ XFS_MOUNT_NOUUID,		",nouuid" },
-		{ XFS_MOUNT_NORECOVERY,		",norecovery" },
-		{ XFS_MOUNT_ATTR2,		",attr2" },
-		{ XFS_MOUNT_FILESTREAMS,	",filestreams" },
-		{ XFS_MOUNT_GRPID,		",grpid" },
-		{ XFS_MOUNT_DISCARD,		",discard" },
-		{ XFS_MOUNT_LARGEIO,		",largeio" },
-		{ XFS_MOUNT_DAX_ALWAYS,		",dax=always" },
-		{ XFS_MOUNT_DAX_NEVER,		",dax=never" },
+		{ XFS_FEAT_IKEEP,		",ikeep" },
+		{ XFS_FEAT_WSYNC,		",wsync" },
+		{ XFS_FEAT_NOALIGN,		",noalign" },
+		{ XFS_FEAT_SWALLOC,		",swalloc" },
+		{ XFS_FEAT_NOUUID,		",nouuid" },
+		{ XFS_FEAT_NORECOVERY,		",norecovery" },
+		{ XFS_FEAT_ATTR2,		",attr2" },
+		{ XFS_FEAT_FILESTREAMS,		",filestreams" },
+		{ XFS_FEAT_GRPID,		",grpid" },
+		{ XFS_FEAT_DISCARD,		",discard" },
+		{ XFS_FEAT_LARGE_IOSIZE,	",largeio" },
+		{ XFS_FEAT_DAX_ALWAYS,		",dax=always" },
+		{ XFS_FEAT_DAX_NEVER,		",dax=never" },
 		{ 0, NULL }
 	};
 	struct xfs_mount	*mp = XFS_M(root->d_sb);
 	struct proc_xfs_info	*xfs_infop;
 
 	for (xfs_infop = xfs_info_set; xfs_infop->flag; xfs_infop++) {
-		if (mp->m_flags & xfs_infop->flag)
+		if (mp->m_features & xfs_infop->flag)
 			seq_puts(m, xfs_infop->str);
 	}
 
-	seq_printf(m, ",inode%d",
-		(mp->m_flags & XFS_MOUNT_SMALL_INUMS) ? 32 : 64);
+	seq_printf(m, ",inode%d", xfs_has_small_inums(mp) ? 32 : 64);
 
-	if (mp->m_flags & XFS_MOUNT_ALLOCSIZE)
+	if (xfs_has_allocsize(mp))
 		seq_printf(m, ",allocsize=%dk",
 			   (1 << mp->m_allocsize_log) >> 10);
 
@@ -201,25 +222,20 @@ xfs_fs_show_options(
 		seq_printf(m, ",swidth=%d",
 				(int)XFS_FSB_TO_BB(mp, mp->m_swidth));
 
-	if (mp->m_qflags & XFS_UQUOTA_ACCT) {
-		if (mp->m_qflags & XFS_UQUOTA_ENFD)
-			seq_puts(m, ",usrquota");
-		else
-			seq_puts(m, ",uqnoenforce");
-	}
+	if (mp->m_qflags & XFS_UQUOTA_ENFD)
+		seq_puts(m, ",usrquota");
+	else if (mp->m_qflags & XFS_UQUOTA_ACCT)
+		seq_puts(m, ",uqnoenforce");
 
-	if (mp->m_qflags & XFS_PQUOTA_ACCT) {
-		if (mp->m_qflags & XFS_PQUOTA_ENFD)
-			seq_puts(m, ",prjquota");
-		else
-			seq_puts(m, ",pqnoenforce");
-	}
-	if (mp->m_qflags & XFS_GQUOTA_ACCT) {
-		if (mp->m_qflags & XFS_GQUOTA_ENFD)
-			seq_puts(m, ",grpquota");
-		else
-			seq_puts(m, ",gqnoenforce");
-	}
+	if (mp->m_qflags & XFS_PQUOTA_ENFD)
+		seq_puts(m, ",prjquota");
+	else if (mp->m_qflags & XFS_PQUOTA_ACCT)
+		seq_puts(m, ",pqnoenforce");
+
+	if (mp->m_qflags & XFS_GQUOTA_ENFD)
+		seq_puts(m, ",grpquota");
+	else if (mp->m_qflags & XFS_GQUOTA_ACCT)
+		seq_puts(m, ",gqnoenforce");
 
 	if (!(mp->m_qflags & XFS_ALL_QUOTA_ACCT))
 		seq_puts(m, ",noquota");
@@ -230,11 +246,11 @@ xfs_fs_show_options(
 /*
  * Set parameters for inode allocation heuristics, taking into account
  * filesystem size and inode32/inode64 mount options; i.e. specifically
- * whether or not XFS_MOUNT_SMALL_INUMS is set.
+ * whether or not XFS_FEAT_SMALL_INUMS is set.
  *
  * Inode allocation patterns are altered only if inode32 is requested
- * (XFS_MOUNT_SMALL_INUMS), and the filesystem is sufficiently large.
- * If altered, XFS_MOUNT_32BITINODES is set as well.
+ * (XFS_FEAT_SMALL_INUMS), and the filesystem is sufficiently large.
+ * If altered, XFS_OPSTATE_INODE32 is set as well.
  *
  * An agcount independent of that in the mount structure is provided
  * because in the growfs case, mp->m_sb.sb_agcount is not yet updated
@@ -276,13 +292,13 @@ xfs_set_inode_alloc(
 
 	/*
 	 * If user asked for no more than 32-bit inodes, and the fs is
-	 * sufficiently large, set XFS_MOUNT_32BITINODES if we must alter
+	 * sufficiently large, set XFS_OPSTATE_INODE32 if we must alter
 	 * the allocator to accommodate the request.
 	 */
-	if ((mp->m_flags & XFS_MOUNT_SMALL_INUMS) && ino > XFS_MAXINUMBER_32)
-		mp->m_flags |= XFS_MOUNT_32BITINODES;
+	if (xfs_has_small_inums(mp) && ino > XFS_MAXINUMBER_32)
+		set_bit(XFS_OPSTATE_INODE32, &mp->m_opstate);
 	else
-		mp->m_flags &= ~XFS_MOUNT_32BITINODES;
+		clear_bit(XFS_OPSTATE_INODE32, &mp->m_opstate);
 
 	for (index = 0; index < agcount; index++) {
 		struct xfs_perag	*pag;
@@ -291,7 +307,7 @@ xfs_set_inode_alloc(
 
 		pag = xfs_perag_get(mp, index);
 
-		if (mp->m_flags & XFS_MOUNT_32BITINODES) {
+		if (xfs_is_inode32(mp)) {
 			if (ino > XFS_MAXINUMBER_32) {
 				pag->pagi_inodeok = 0;
 				pag->pagf_metadata = 0;
@@ -311,7 +327,16 @@ xfs_set_inode_alloc(
 		xfs_perag_put(pag);
 	}
 
-	return (mp->m_flags & XFS_MOUNT_32BITINODES) ? maxagi : agcount;
+	return xfs_is_inode32(mp) ? maxagi : agcount;
+}
+
+static bool
+xfs_buftarg_is_dax(
+	struct super_block	*sb,
+	struct xfs_buftarg	*bt)
+{
+	return dax_supported(bt->bt_daxdev, bt->bt_bdev, sb->s_blocksize, 0,
+			bdev_nr_sectors(bt->bt_bdev));
 }
 
 STATIC int
@@ -468,7 +493,7 @@ xfs_setup_devices(
 	if (mp->m_logdev_targp && mp->m_logdev_targp != mp->m_ddev_targp) {
 		unsigned int	log_sector_size = BBSIZE;
 
-		if (xfs_sb_version_hassector(&mp->m_sb))
+		if (xfs_has_sector(mp))
 			log_sector_size = mp->m_sb.sb_logsectsize;
 		error = xfs_setsize_buftarg(mp->m_logdev_targp,
 					    log_sector_size);
@@ -501,37 +526,37 @@ xfs_init_mount_workqueues(
 	if (!mp->m_unwritten_workqueue)
 		goto out_destroy_buf;
 
-	mp->m_cil_workqueue = alloc_workqueue("xfs-cil/%s",
-			XFS_WQFLAGS(WQ_FREEZABLE | WQ_MEM_RECLAIM | WQ_UNBOUND),
-			0, mp->m_super->s_id);
-	if (!mp->m_cil_workqueue)
-		goto out_destroy_unwritten;
-
 	mp->m_reclaim_workqueue = alloc_workqueue("xfs-reclaim/%s",
 			XFS_WQFLAGS(WQ_FREEZABLE | WQ_MEM_RECLAIM),
 			0, mp->m_super->s_id);
 	if (!mp->m_reclaim_workqueue)
-		goto out_destroy_cil;
+		goto out_destroy_unwritten;
 
-	mp->m_gc_workqueue = alloc_workqueue("xfs-gc/%s",
-			WQ_SYSFS | WQ_UNBOUND | WQ_FREEZABLE | WQ_MEM_RECLAIM,
+	mp->m_blockgc_wq = alloc_workqueue("xfs-blockgc/%s",
+			XFS_WQFLAGS(WQ_UNBOUND | WQ_FREEZABLE | WQ_MEM_RECLAIM),
 			0, mp->m_super->s_id);
-	if (!mp->m_gc_workqueue)
+	if (!mp->m_blockgc_wq)
 		goto out_destroy_reclaim;
 
+	mp->m_inodegc_wq = alloc_workqueue("xfs-inodegc/%s",
+			XFS_WQFLAGS(WQ_FREEZABLE | WQ_MEM_RECLAIM),
+			1, mp->m_super->s_id);
+	if (!mp->m_inodegc_wq)
+		goto out_destroy_blockgc;
+
 	mp->m_sync_workqueue = alloc_workqueue("xfs-sync/%s",
 			XFS_WQFLAGS(WQ_FREEZABLE), 0, mp->m_super->s_id);
 	if (!mp->m_sync_workqueue)
-		goto out_destroy_eofb;
+		goto out_destroy_inodegc;
 
 	return 0;
 
-out_destroy_eofb:
-	destroy_workqueue(mp->m_gc_workqueue);
+out_destroy_inodegc:
+	destroy_workqueue(mp->m_inodegc_wq);
+out_destroy_blockgc:
+	destroy_workqueue(mp->m_blockgc_wq);
 out_destroy_reclaim:
 	destroy_workqueue(mp->m_reclaim_workqueue);
-out_destroy_cil:
-	destroy_workqueue(mp->m_cil_workqueue);
 out_destroy_unwritten:
 	destroy_workqueue(mp->m_unwritten_workqueue);
 out_destroy_buf:
@@ -545,9 +570,9 @@ xfs_destroy_mount_workqueues(
 	struct xfs_mount	*mp)
 {
 	destroy_workqueue(mp->m_sync_workqueue);
-	destroy_workqueue(mp->m_gc_workqueue);
+	destroy_workqueue(mp->m_blockgc_wq);
+	destroy_workqueue(mp->m_inodegc_wq);
 	destroy_workqueue(mp->m_reclaim_workqueue);
-	destroy_workqueue(mp->m_cil_workqueue);
 	destroy_workqueue(mp->m_unwritten_workqueue);
 	destroy_workqueue(mp->m_buf_workqueue);
 }
@@ -596,32 +621,6 @@ xfs_fs_alloc_inode(
 	return NULL;
 }
 
-#ifdef DEBUG
-static void
-xfs_check_delalloc(
-	struct xfs_inode	*ip,
-	int			whichfork)
-{
-	struct xfs_ifork	*ifp = XFS_IFORK_PTR(ip, whichfork);
-	struct xfs_bmbt_irec	got;
-	struct xfs_iext_cursor	icur;
-
-	if (!ifp || !xfs_iext_lookup_extent(ip, ifp, 0, &icur, &got))
-		return;
-	do {
-		if (isnullstartblock(got.br_startblock)) {
-			xfs_warn(ip->i_mount,
-	"ino %llx %s fork has delalloc extent at [0x%llx:0x%llx]",
-				ip->i_ino,
-				whichfork == XFS_DATA_FORK ? "data" : "cow",
-				got.br_startoff, got.br_blockcount);
-		}
-	} while (xfs_iext_next_extent(ifp, &icur, &got));
-}
-#else
-#define xfs_check_delalloc(ip, whichfork)	do { } while (0)
-#endif
-
 /*
  * Now that the generic code is guaranteed not to be accessing
  * the linux inode, we can inactivate and reclaim the inode.
@@ -637,30 +636,6 @@ xfs_fs_destroy_inode(
 	ASSERT(!rwsem_is_locked(&inode->i_rwsem));
 	XFS_STATS_INC(ip->i_mount, vn_rele);
 	XFS_STATS_INC(ip->i_mount, vn_remove);
-
-	xfs_inactive(ip);
-
-	if (!XFS_FORCED_SHUTDOWN(ip->i_mount) && ip->i_delayed_blks) {
-		xfs_check_delalloc(ip, XFS_DATA_FORK);
-		xfs_check_delalloc(ip, XFS_COW_FORK);
-		ASSERT(0);
-	}
-
-	XFS_STATS_INC(ip->i_mount, vn_reclaim);
-
-	/*
-	 * We should never get here with one of the reclaim flags already set.
-	 */
-	ASSERT_ALWAYS(!xfs_iflags_test(ip, XFS_IRECLAIMABLE));
-	ASSERT_ALWAYS(!xfs_iflags_test(ip, XFS_IRECLAIM));
-
-	/*
-	 * We always use background reclaim here because even if the inode is
-	 * clean, it still may be under IO and hence we have wait for IO
-	 * completion to occur before we can reclaim the inode. The background
-	 * reclaim path handles this more efficiently than we can here, so
-	 * simply let background reclaim tear down all inodes.
-	 */
 	xfs_inode_mark_reclaimable(ip);
 }
 
@@ -732,7 +707,7 @@ xfs_fs_drop_inode(
 	 * that.  See the comment for this inode flag.
 	 */
 	if (ip->i_flags & XFS_IRECOVERY) {
-		ASSERT(ip->i_mount->m_log->l_flags & XLOG_RECOVERY_NEEDED);
+		ASSERT(xlog_recovery_needed(ip->i_mount->m_log));
 		return 0;
 	}
 
@@ -755,6 +730,8 @@ xfs_fs_sync_fs(
 {
 	struct xfs_mount	*mp = XFS_M(sb);
 
+	trace_xfs_fs_sync_fs(mp, __return_address);
+
 	/*
 	 * Doing anything during the async pass would be counterproductive.
 	 */
@@ -771,6 +748,25 @@ xfs_fs_sync_fs(
 		flush_delayed_work(&mp->m_log->l_work);
 	}
 
+	/*
+	 * If we are called with page faults frozen out, it means we are about
+	 * to freeze the transaction subsystem. Take the opportunity to shut
+	 * down inodegc because once SB_FREEZE_FS is set it's too late to
+	 * prevent inactivation races with freeze. The fs doesn't get called
+	 * again by the freezing process until after SB_FREEZE_FS has been set,
+	 * so it's now or never.  Same logic applies to speculative allocation
+	 * garbage collection.
+	 *
+	 * We don't care if this is a normal syncfs call that does this or
+	 * freeze that does this - we can run this multiple times without issue
+	 * and we won't race with a restart because a restart can only occur
+	 * when the state is either SB_FREEZE_FS or SB_FREEZE_COMPLETE.
+	 */
+	if (sb->s_writers.frozen == SB_FREEZE_PAGEFAULT) {
+		xfs_inodegc_stop(mp);
+		xfs_blockgc_stop(mp);
+	}
+
 	return 0;
 }
 
@@ -789,6 +785,9 @@ xfs_fs_statfs(
 	xfs_extlen_t		lsize;
 	int64_t			ffree;
 
+	/* Wait for whatever inactivations are in progress. */
+	xfs_inodegc_flush(mp);
+
 	statp->f_type = XFS_SUPER_MAGIC;
 	statp->f_namelen = MAXNAMELEN - 1;
 
@@ -884,10 +883,22 @@ xfs_fs_freeze(
 	 * set a GFP_NOFS context here to avoid recursion deadlocks.
 	 */
 	flags = memalloc_nofs_save();
-	xfs_blockgc_stop(mp);
 	xfs_save_resvblks(mp);
 	ret = xfs_log_quiesce(mp);
 	memalloc_nofs_restore(flags);
+
+	/*
+	 * For read-write filesystems, we need to restart the inodegc on error
+	 * because we stopped it at SB_FREEZE_PAGEFAULT level and a thaw is not
+	 * going to be run to restart it now.  We are at SB_FREEZE_FS level
+	 * here, so we can restart safely without racing with a stop in
+	 * xfs_fs_sync_fs().
+	 */
+	if (ret && !xfs_is_readonly(mp)) {
+		xfs_blockgc_start(mp);
+		xfs_inodegc_start(mp);
+	}
+
 	return ret;
 }
 
@@ -899,7 +910,18 @@ xfs_fs_unfreeze(
 
 	xfs_restore_resvblks(mp);
 	xfs_log_work_queue(mp);
-	xfs_blockgc_start(mp);
+
+	/*
+	 * Don't reactivate the inodegc worker on a readonly filesystem because
+	 * inodes are sent directly to reclaim.  Don't reactivate the blockgc
+	 * worker because there are no speculative preallocations on a readonly
+	 * filesystem.
+	 */
+	if (!xfs_is_readonly(mp)) {
+		xfs_blockgc_start(mp);
+		xfs_inodegc_start(mp);
+	}
+
 	return 0;
 }
 
@@ -911,10 +933,8 @@ STATIC int
 xfs_finish_flags(
 	struct xfs_mount	*mp)
 {
-	int			ronly = (mp->m_flags & XFS_MOUNT_RDONLY);
-
 	/* Fail a mount where the logbuf is smaller than the log stripe */
-	if (xfs_sb_version_haslogv2(&mp->m_sb)) {
+	if (xfs_has_logv2(mp)) {
 		if (mp->m_logbsize <= 0 &&
 		    mp->m_sb.sb_logsunit > XLOG_BIG_RECORD_BSIZE) {
 			mp->m_logbsize = mp->m_sb.sb_logsunit;
@@ -936,33 +956,24 @@ xfs_finish_flags(
 	/*
 	 * V5 filesystems always use attr2 format for attributes.
 	 */
-	if (xfs_sb_version_hascrc(&mp->m_sb) &&
-	    (mp->m_flags & XFS_MOUNT_NOATTR2)) {
+	if (xfs_has_crc(mp) && xfs_has_noattr2(mp)) {
 		xfs_warn(mp, "Cannot mount a V5 filesystem as noattr2. "
 			     "attr2 is always enabled for V5 filesystems.");
 		return -EINVAL;
 	}
 
 	/*
-	 * mkfs'ed attr2 will turn on attr2 mount unless explicitly
-	 * told by noattr2 to turn it off
-	 */
-	if (xfs_sb_version_hasattr2(&mp->m_sb) &&
-	    !(mp->m_flags & XFS_MOUNT_NOATTR2))
-		mp->m_flags |= XFS_MOUNT_ATTR2;
-
-	/*
 	 * prohibit r/w mounts of read-only filesystems
 	 */
-	if ((mp->m_sb.sb_flags & XFS_SBF_READONLY) && !ronly) {
+	if ((mp->m_sb.sb_flags & XFS_SBF_READONLY) && !xfs_is_readonly(mp)) {
 		xfs_warn(mp,
 			"cannot mount a read-only filesystem as read-write");
 		return -EROFS;
 	}
 
-	if ((mp->m_qflags & (XFS_GQUOTA_ACCT | XFS_GQUOTA_ACTIVE)) &&
-	    (mp->m_qflags & (XFS_PQUOTA_ACCT | XFS_PQUOTA_ACTIVE)) &&
-	    !xfs_sb_version_has_pquotino(&mp->m_sb)) {
+	if ((mp->m_qflags & XFS_GQUOTA_ACCT) &&
+	    (mp->m_qflags & XFS_PQUOTA_ACCT) &&
+	    !xfs_has_pquotino(mp)) {
 		xfs_warn(mp,
 		  "Super block does not support project and group quota together");
 		return -EINVAL;
@@ -1020,11 +1031,40 @@ xfs_destroy_percpu_counters(
 	percpu_counter_destroy(&mp->m_icount);
 	percpu_counter_destroy(&mp->m_ifree);
 	percpu_counter_destroy(&mp->m_fdblocks);
-	ASSERT(XFS_FORCED_SHUTDOWN(mp) ||
+	ASSERT(xfs_is_shutdown(mp) ||
 	       percpu_counter_sum(&mp->m_delalloc_blks) == 0);
 	percpu_counter_destroy(&mp->m_delalloc_blks);
 }
 
+static int
+xfs_inodegc_init_percpu(
+	struct xfs_mount	*mp)
+{
+	struct xfs_inodegc	*gc;
+	int			cpu;
+
+	mp->m_inodegc = alloc_percpu(struct xfs_inodegc);
+	if (!mp->m_inodegc)
+		return -ENOMEM;
+
+	for_each_possible_cpu(cpu) {
+		gc = per_cpu_ptr(mp->m_inodegc, cpu);
+		init_llist_head(&gc->list);
+		gc->items = 0;
+		INIT_WORK(&gc->work, xfs_inodegc_worker);
+	}
+	return 0;
+}
+
+static void
+xfs_inodegc_free_percpu(
+	struct xfs_mount	*mp)
+{
+	if (!mp->m_inodegc)
+		return;
+	free_percpu(mp->m_inodegc);
+}
+
 static void
 xfs_fs_put_super(
 	struct super_block	*sb)
@@ -1041,6 +1081,8 @@ xfs_fs_put_super(
 
 	xfs_freesb(mp);
 	free_percpu(mp->m_stats.xs_stats);
+	xfs_mount_list_del(mp);
+	xfs_inodegc_free_percpu(mp);
 	xfs_destroy_percpu_counters(mp);
 	xfs_destroy_mount_workqueues(mp);
 	xfs_close_devices(mp);
@@ -1129,7 +1171,7 @@ xfs_fs_warn_deprecated(
 	 * already had the flag set
 	 */
 	if ((fc->purpose & FS_CONTEXT_FOR_RECONFIGURE) &&
-			!!(XFS_M(fc->root->d_sb)->m_flags & flag) == value)
+            !!(XFS_M(fc->root->d_sb)->m_features & flag) == value)
 		return;
 	xfs_warn(fc->s_fs_info, "%s mount option is deprecated.", param->key);
 }
@@ -1177,27 +1219,27 @@ xfs_fs_parse_param(
 		if (suffix_kstrtoint(param->string, 10, &size))
 			return -EINVAL;
 		parsing_mp->m_allocsize_log = ffs(size) - 1;
-		parsing_mp->m_flags |= XFS_MOUNT_ALLOCSIZE;
+		parsing_mp->m_features |= XFS_FEAT_ALLOCSIZE;
 		return 0;
 	case Opt_grpid:
 	case Opt_bsdgroups:
-		parsing_mp->m_flags |= XFS_MOUNT_GRPID;
+		parsing_mp->m_features |= XFS_FEAT_GRPID;
 		return 0;
 	case Opt_nogrpid:
 	case Opt_sysvgroups:
-		parsing_mp->m_flags &= ~XFS_MOUNT_GRPID;
+		parsing_mp->m_features &= ~XFS_FEAT_GRPID;
 		return 0;
 	case Opt_wsync:
-		parsing_mp->m_flags |= XFS_MOUNT_WSYNC;
+		parsing_mp->m_features |= XFS_FEAT_WSYNC;
 		return 0;
 	case Opt_norecovery:
-		parsing_mp->m_flags |= XFS_MOUNT_NORECOVERY;
+		parsing_mp->m_features |= XFS_FEAT_NORECOVERY;
 		return 0;
 	case Opt_noalign:
-		parsing_mp->m_flags |= XFS_MOUNT_NOALIGN;
+		parsing_mp->m_features |= XFS_FEAT_NOALIGN;
 		return 0;
 	case Opt_swalloc:
-		parsing_mp->m_flags |= XFS_MOUNT_SWALLOC;
+		parsing_mp->m_features |= XFS_FEAT_SWALLOC;
 		return 0;
 	case Opt_sunit:
 		parsing_mp->m_dalign = result.uint_32;
@@ -1206,62 +1248,58 @@ xfs_fs_parse_param(
 		parsing_mp->m_swidth = result.uint_32;
 		return 0;
 	case Opt_inode32:
-		parsing_mp->m_flags |= XFS_MOUNT_SMALL_INUMS;
+		parsing_mp->m_features |= XFS_FEAT_SMALL_INUMS;
 		return 0;
 	case Opt_inode64:
-		parsing_mp->m_flags &= ~XFS_MOUNT_SMALL_INUMS;
+		parsing_mp->m_features &= ~XFS_FEAT_SMALL_INUMS;
 		return 0;
 	case Opt_nouuid:
-		parsing_mp->m_flags |= XFS_MOUNT_NOUUID;
+		parsing_mp->m_features |= XFS_FEAT_NOUUID;
 		return 0;
 	case Opt_largeio:
-		parsing_mp->m_flags |= XFS_MOUNT_LARGEIO;
+		parsing_mp->m_features |= XFS_FEAT_LARGE_IOSIZE;
 		return 0;
 	case Opt_nolargeio:
-		parsing_mp->m_flags &= ~XFS_MOUNT_LARGEIO;
+		parsing_mp->m_features &= ~XFS_FEAT_LARGE_IOSIZE;
 		return 0;
 	case Opt_filestreams:
-		parsing_mp->m_flags |= XFS_MOUNT_FILESTREAMS;
+		parsing_mp->m_features |= XFS_FEAT_FILESTREAMS;
 		return 0;
 	case Opt_noquota:
 		parsing_mp->m_qflags &= ~XFS_ALL_QUOTA_ACCT;
 		parsing_mp->m_qflags &= ~XFS_ALL_QUOTA_ENFD;
-		parsing_mp->m_qflags &= ~XFS_ALL_QUOTA_ACTIVE;
 		return 0;
 	case Opt_quota:
 	case Opt_uquota:
 	case Opt_usrquota:
-		parsing_mp->m_qflags |= (XFS_UQUOTA_ACCT | XFS_UQUOTA_ACTIVE |
-				 XFS_UQUOTA_ENFD);
+		parsing_mp->m_qflags |= (XFS_UQUOTA_ACCT | XFS_UQUOTA_ENFD);
 		return 0;
 	case Opt_qnoenforce:
 	case Opt_uqnoenforce:
-		parsing_mp->m_qflags |= (XFS_UQUOTA_ACCT | XFS_UQUOTA_ACTIVE);
+		parsing_mp->m_qflags |= XFS_UQUOTA_ACCT;
 		parsing_mp->m_qflags &= ~XFS_UQUOTA_ENFD;
 		return 0;
 	case Opt_pquota:
 	case Opt_prjquota:
-		parsing_mp->m_qflags |= (XFS_PQUOTA_ACCT | XFS_PQUOTA_ACTIVE |
-				 XFS_PQUOTA_ENFD);
+		parsing_mp->m_qflags |= (XFS_PQUOTA_ACCT | XFS_PQUOTA_ENFD);
 		return 0;
 	case Opt_pqnoenforce:
-		parsing_mp->m_qflags |= (XFS_PQUOTA_ACCT | XFS_PQUOTA_ACTIVE);
+		parsing_mp->m_qflags |= XFS_PQUOTA_ACCT;
 		parsing_mp->m_qflags &= ~XFS_PQUOTA_ENFD;
 		return 0;
 	case Opt_gquota:
 	case Opt_grpquota:
-		parsing_mp->m_qflags |= (XFS_GQUOTA_ACCT | XFS_GQUOTA_ACTIVE |
-				 XFS_GQUOTA_ENFD);
+		parsing_mp->m_qflags |= (XFS_GQUOTA_ACCT | XFS_GQUOTA_ENFD);
 		return 0;
 	case Opt_gqnoenforce:
-		parsing_mp->m_qflags |= (XFS_GQUOTA_ACCT | XFS_GQUOTA_ACTIVE);
+		parsing_mp->m_qflags |= XFS_GQUOTA_ACCT;
 		parsing_mp->m_qflags &= ~XFS_GQUOTA_ENFD;
 		return 0;
 	case Opt_discard:
-		parsing_mp->m_flags |= XFS_MOUNT_DISCARD;
+		parsing_mp->m_features |= XFS_FEAT_DISCARD;
 		return 0;
 	case Opt_nodiscard:
-		parsing_mp->m_flags &= ~XFS_MOUNT_DISCARD;
+		parsing_mp->m_features &= ~XFS_FEAT_DISCARD;
 		return 0;
 #ifdef CONFIG_FS_DAX
 	case Opt_dax:
@@ -1273,21 +1311,20 @@ xfs_fs_parse_param(
 #endif
 	/* Following mount options will be removed in September 2025 */
 	case Opt_ikeep:
-		xfs_fs_warn_deprecated(fc, param, XFS_MOUNT_IKEEP, true);
-		parsing_mp->m_flags |= XFS_MOUNT_IKEEP;
+		xfs_fs_warn_deprecated(fc, param, XFS_FEAT_IKEEP, true);
+		parsing_mp->m_features |= XFS_FEAT_IKEEP;
 		return 0;
 	case Opt_noikeep:
-		xfs_fs_warn_deprecated(fc, param, XFS_MOUNT_IKEEP, false);
-		parsing_mp->m_flags &= ~XFS_MOUNT_IKEEP;
+		xfs_fs_warn_deprecated(fc, param, XFS_FEAT_IKEEP, false);
+		parsing_mp->m_features &= ~XFS_FEAT_IKEEP;
 		return 0;
 	case Opt_attr2:
-		xfs_fs_warn_deprecated(fc, param, XFS_MOUNT_ATTR2, true);
-		parsing_mp->m_flags |= XFS_MOUNT_ATTR2;
+		xfs_fs_warn_deprecated(fc, param, XFS_FEAT_ATTR2, true);
+		parsing_mp->m_features |= XFS_FEAT_ATTR2;
 		return 0;
 	case Opt_noattr2:
-		xfs_fs_warn_deprecated(fc, param, XFS_MOUNT_NOATTR2, true);
-		parsing_mp->m_flags &= ~XFS_MOUNT_ATTR2;
-		parsing_mp->m_flags |= XFS_MOUNT_NOATTR2;
+		xfs_fs_warn_deprecated(fc, param, XFS_FEAT_NOATTR2, true);
+		parsing_mp->m_features |= XFS_FEAT_NOATTR2;
 		return 0;
 	default:
 		xfs_warn(parsing_mp, "unknown mount option [%s].", param->key);
@@ -1301,17 +1338,23 @@ static int
 xfs_fs_validate_params(
 	struct xfs_mount	*mp)
 {
+	/* No recovery flag requires a read-only mount */
+	if (xfs_has_norecovery(mp) && !xfs_is_readonly(mp)) {
+		xfs_warn(mp, "no-recovery mounts must be read-only.");
+		return -EINVAL;
+	}
+
 	/*
-	 * no recovery flag requires a read-only mount
+	 * We have not read the superblock at this point, so only the attr2
+	 * mount option can set the attr2 feature by this stage.
 	 */
-	if ((mp->m_flags & XFS_MOUNT_NORECOVERY) &&
-	    !(mp->m_flags & XFS_MOUNT_RDONLY)) {
-		xfs_warn(mp, "no-recovery mounts must be read-only.");
+	if (xfs_has_attr2(mp) && xfs_has_noattr2(mp)) {
+		xfs_warn(mp, "attr2 and noattr2 cannot both be specified.");
 		return -EINVAL;
 	}
 
-	if ((mp->m_flags & XFS_MOUNT_NOALIGN) &&
-	    (mp->m_dalign || mp->m_swidth)) {
+
+	if (xfs_has_noalign(mp) && (mp->m_dalign || mp->m_swidth)) {
 		xfs_warn(mp,
 	"sunit and swidth options incompatible with the noalign option");
 		return -EINVAL;
@@ -1355,7 +1398,7 @@ xfs_fs_validate_params(
 		return -EINVAL;
 	}
 
-	if ((mp->m_flags & XFS_MOUNT_ALLOCSIZE) &&
+	if (xfs_has_allocsize(mp) &&
 	    (mp->m_allocsize_log > XFS_MAX_IO_LOG ||
 	     mp->m_allocsize_log < XFS_MIN_IO_LOG)) {
 		xfs_warn(mp, "invalid log iosize: %d [not %d-%d]",
@@ -1416,11 +1459,22 @@ xfs_fs_fill_super(
 	if (error)
 		goto out_destroy_workqueues;
 
+	error = xfs_inodegc_init_percpu(mp);
+	if (error)
+		goto out_destroy_counters;
+
+	/*
+	 * All percpu data structures requiring cleanup when a cpu goes offline
+	 * must be allocated before adding this @mp to the cpu-dead handler's
+	 * mount list.
+	 */
+	xfs_mount_list_add(mp);
+
 	/* Allocate stats memory before we do operations that might use it */
 	mp->m_stats.xs_stats = alloc_percpu(struct xfsstats);
 	if (!mp->m_stats.xs_stats) {
 		error = -ENOMEM;
-		goto out_destroy_counters;
+		goto out_destroy_inodegc;
 	}
 
 	error = xfs_readsb(mp, flags);
@@ -1436,7 +1490,7 @@ xfs_fs_fill_super(
 		goto out_free_sb;
 
 	/* V4 support is undergoing deprecation. */
-	if (!xfs_sb_version_hascrc(&mp->m_sb)) {
+	if (!xfs_has_crc(mp)) {
 #ifdef CONFIG_XFS_SUPPORT_V4
 		xfs_warn_once(mp,
 	"Deprecated V4 format (crc=0) will not be supported after September 2030.");
@@ -1449,7 +1503,7 @@ xfs_fs_fill_super(
 	}
 
 	/* Filesystem claims it needs repair, so refuse the mount. */
-	if (xfs_sb_version_needsrepair(&mp->m_sb)) {
+	if (xfs_has_needsrepair(mp)) {
 		xfs_warn(mp, "Filesystem needs repair.  Please run xfs_repair.");
 		error = -EFSCORRUPTED;
 		goto out_free_sb;
@@ -1521,7 +1575,7 @@ xfs_fs_fill_super(
 	sb->s_maxbytes = MAX_LFS_FILESIZE;
 	sb->s_max_links = XFS_MAXLINK;
 	sb->s_time_gran = 1;
-	if (xfs_sb_version_hasbigtime(&mp->m_sb)) {
+	if (xfs_has_bigtime(mp)) {
 		sb->s_time_min = xfs_bigtime_to_unix(XFS_BIGTIME_TIME_MIN);
 		sb->s_time_max = xfs_bigtime_to_unix(XFS_BIGTIME_TIME_MAX);
 	} else {
@@ -1534,30 +1588,25 @@ xfs_fs_fill_super(
 	set_posix_acl_flag(sb);
 
 	/* version 5 superblocks support inode version counters. */
-	if (XFS_SB_VERSION_NUM(&mp->m_sb) == XFS_SB_VERSION_5)
+	if (xfs_has_crc(mp))
 		sb->s_flags |= SB_I_VERSION;
 
-	if (xfs_sb_version_hasbigtime(&mp->m_sb))
-		xfs_warn(mp,
- "EXPERIMENTAL big timestamp feature in use. Use at your own risk!");
-
-	if (mp->m_flags & XFS_MOUNT_DAX_ALWAYS) {
+	if (xfs_has_dax_always(mp)) {
 		bool rtdev_is_dax = false, datadev_is_dax;
 
 		xfs_warn(mp,
 		"DAX enabled. Warning: EXPERIMENTAL, use at your own risk");
 
-		datadev_is_dax = bdev_dax_supported(mp->m_ddev_targp->bt_bdev,
-			sb->s_blocksize);
+		datadev_is_dax = xfs_buftarg_is_dax(sb, mp->m_ddev_targp);
 		if (mp->m_rtdev_targp)
-			rtdev_is_dax = bdev_dax_supported(
-				mp->m_rtdev_targp->bt_bdev, sb->s_blocksize);
+			rtdev_is_dax = xfs_buftarg_is_dax(sb,
+						mp->m_rtdev_targp);
 		if (!rtdev_is_dax && !datadev_is_dax) {
 			xfs_alert(mp,
 			"DAX unsupported by block device. Turning off DAX.");
 			xfs_mount_set_dax_mode(mp, XFS_DAX_NEVER);
 		}
-		if (xfs_sb_version_hasreflink(&mp->m_sb)) {
+		if (xfs_has_reflink(mp)) {
 			xfs_alert(mp,
 		"DAX and reflink cannot be used together!");
 			error = -EINVAL;
@@ -1565,17 +1614,17 @@ xfs_fs_fill_super(
 		}
 	}
 
-	if (mp->m_flags & XFS_MOUNT_DISCARD) {
+	if (xfs_has_discard(mp)) {
 		struct request_queue *q = bdev_get_queue(sb->s_bdev);
 
 		if (!blk_queue_discard(q)) {
 			xfs_warn(mp, "mounting with \"discard\" option, but "
 					"the device does not support discard");
-			mp->m_flags &= ~XFS_MOUNT_DISCARD;
+			mp->m_features &= ~XFS_FEAT_DISCARD;
 		}
 	}
 
-	if (xfs_sb_version_hasreflink(&mp->m_sb)) {
+	if (xfs_has_reflink(mp)) {
 		if (mp->m_sb.sb_rblocks) {
 			xfs_alert(mp,
 	"reflink not compatible with realtime device!");
@@ -1589,17 +1638,13 @@ xfs_fs_fill_super(
 		}
 	}
 
-	if (xfs_sb_version_hasrmapbt(&mp->m_sb) && mp->m_sb.sb_rblocks) {
+	if (xfs_has_rmapbt(mp) && mp->m_sb.sb_rblocks) {
 		xfs_alert(mp,
 	"reverse mapping btree not compatible with realtime device!");
 		error = -EINVAL;
 		goto out_filestream_unmount;
 	}
 
-	if (xfs_sb_version_hasinobtcounts(&mp->m_sb))
-		xfs_warn(mp,
- "EXPERIMENTAL inode btree counters feature in use. Use at your own risk!");
-
 	error = xfs_mountfs(mp);
 	if (error)
 		goto out_filestream_unmount;
@@ -1623,6 +1668,9 @@ xfs_fs_fill_super(
 	xfs_freesb(mp);
  out_free_stats:
 	free_percpu(mp->m_stats.xs_stats);
+ out_destroy_inodegc:
+	xfs_mount_list_del(mp);
+	xfs_inodegc_free_percpu(mp);
  out_destroy_counters:
 	xfs_destroy_percpu_counters(mp);
  out_destroy_workqueues:
@@ -1654,13 +1702,13 @@ xfs_remount_rw(
 	struct xfs_sb		*sbp = &mp->m_sb;
 	int error;
 
-	if (mp->m_flags & XFS_MOUNT_NORECOVERY) {
+	if (xfs_has_norecovery(mp)) {
 		xfs_warn(mp,
 			"ro->rw transition prohibited on norecovery mount");
 		return -EINVAL;
 	}
 
-	if (XFS_SB_VERSION_NUM(sbp) == XFS_SB_VERSION_5 &&
+	if (xfs_sb_is_v5(sbp) &&
 	    xfs_sb_has_ro_compat_feature(sbp, XFS_SB_FEAT_RO_COMPAT_UNKNOWN)) {
 		xfs_warn(mp,
 	"ro->rw transition prohibited on unknown (0x%x) ro-compat filesystem",
@@ -1669,7 +1717,7 @@ xfs_remount_rw(
 		return -EINVAL;
 	}
 
-	mp->m_flags &= ~XFS_MOUNT_RDONLY;
+	clear_bit(XFS_OPSTATE_READONLY, &mp->m_opstate);
 
 	/*
 	 * If this is the first remount to writeable state we might have some
@@ -1706,6 +1754,9 @@ xfs_remount_rw(
 	if (error && error != -ENOSPC)
 		return error;
 
+	/* Re-enable the background inode inactivation worker. */
+	xfs_inodegc_start(mp);
+
 	return 0;
 }
 
@@ -1728,6 +1779,15 @@ xfs_remount_ro(
 		return error;
 	}
 
+	/*
+	 * Stop the inodegc background worker.  xfs_fs_reconfigure already
+	 * flushed all pending inodegc work when it sync'd the filesystem.
+	 * The VFS holds s_umount, so we know that inodes cannot enter
+	 * xfs_fs_destroy_inode during a remount operation.  In readonly mode
+	 * we send inodes straight to reclaim, so no inodes will be queued.
+	 */
+	xfs_inodegc_stop(mp);
+
 	/* Free the per-AG metadata reservation pool. */
 	error = xfs_fs_unreserve_ag_blocks(mp);
 	if (error) {
@@ -1745,7 +1805,7 @@ xfs_remount_ro(
 	xfs_save_resvblks(mp);
 
 	xfs_log_clean(mp);
-	mp->m_flags |= XFS_MOUNT_RDONLY;
+	set_bit(XFS_OPSTATE_READONLY, &mp->m_opstate);
 
 	return 0;
 }
@@ -1768,12 +1828,11 @@ xfs_fs_reconfigure(
 {
 	struct xfs_mount	*mp = XFS_M(fc->root->d_sb);
 	struct xfs_mount        *new_mp = fc->s_fs_info;
-	xfs_sb_t		*sbp = &mp->m_sb;
 	int			flags = fc->sb_flags;
 	int			error;
 
 	/* version 5 superblocks always support version counters. */
-	if (XFS_SB_VERSION_NUM(&mp->m_sb) == XFS_SB_VERSION_5)
+	if (xfs_has_crc(mp))
 		fc->sb_flags |= SB_I_VERSION;
 
 	error = xfs_fs_validate_params(new_mp);
@@ -1783,28 +1842,26 @@ xfs_fs_reconfigure(
 	sync_filesystem(mp->m_super);
 
 	/* inode32 -> inode64 */
-	if ((mp->m_flags & XFS_MOUNT_SMALL_INUMS) &&
-	    !(new_mp->m_flags & XFS_MOUNT_SMALL_INUMS)) {
-		mp->m_flags &= ~XFS_MOUNT_SMALL_INUMS;
-		mp->m_maxagi = xfs_set_inode_alloc(mp, sbp->sb_agcount);
+	if (xfs_has_small_inums(mp) && !xfs_has_small_inums(new_mp)) {
+		mp->m_features &= ~XFS_FEAT_SMALL_INUMS;
+		mp->m_maxagi = xfs_set_inode_alloc(mp, mp->m_sb.sb_agcount);
 	}
 
 	/* inode64 -> inode32 */
-	if (!(mp->m_flags & XFS_MOUNT_SMALL_INUMS) &&
-	    (new_mp->m_flags & XFS_MOUNT_SMALL_INUMS)) {
-		mp->m_flags |= XFS_MOUNT_SMALL_INUMS;
-		mp->m_maxagi = xfs_set_inode_alloc(mp, sbp->sb_agcount);
+	if (!xfs_has_small_inums(mp) && xfs_has_small_inums(new_mp)) {
+		mp->m_features |= XFS_FEAT_SMALL_INUMS;
+		mp->m_maxagi = xfs_set_inode_alloc(mp, mp->m_sb.sb_agcount);
 	}
 
 	/* ro -> rw */
-	if ((mp->m_flags & XFS_MOUNT_RDONLY) && !(flags & SB_RDONLY)) {
+	if (xfs_is_readonly(mp) && !(flags & SB_RDONLY)) {
 		error = xfs_remount_rw(mp);
 		if (error)
 			return error;
 	}
 
 	/* rw -> ro */
-	if (!(mp->m_flags & XFS_MOUNT_RDONLY) && (flags & SB_RDONLY)) {
+	if (!xfs_is_readonly(mp) && (flags & SB_RDONLY)) {
 		error = xfs_remount_ro(mp);
 		if (error)
 			return error;
@@ -1871,11 +1928,11 @@ static int xfs_init_fs_context(
 	 * Copy binary VFS mount flags we are interested in.
 	 */
 	if (fc->sb_flags & SB_RDONLY)
-		mp->m_flags |= XFS_MOUNT_RDONLY;
+		set_bit(XFS_OPSTATE_READONLY, &mp->m_opstate);
 	if (fc->sb_flags & SB_DIRSYNC)
-		mp->m_flags |= XFS_MOUNT_DIRSYNC;
+		mp->m_features |= XFS_FEAT_DIRSYNC;
 	if (fc->sb_flags & SB_SYNCHRONOUS)
-		mp->m_flags |= XFS_MOUNT_WSYNC;
+		mp->m_features |= XFS_FEAT_WSYNC;
 
 	fc->s_fs_info = mp;
 	fc->ops = &xfs_context_ops;
@@ -2118,6 +2175,48 @@ xfs_destroy_workqueues(void)
 	destroy_workqueue(xfs_alloc_wq);
 }
 
+#ifdef CONFIG_HOTPLUG_CPU
+static int
+xfs_cpu_dead(
+	unsigned int		cpu)
+{
+	struct xfs_mount	*mp, *n;
+
+	spin_lock(&xfs_mount_list_lock);
+	list_for_each_entry_safe(mp, n, &xfs_mount_list, m_mount_list) {
+		spin_unlock(&xfs_mount_list_lock);
+		xfs_inodegc_cpu_dead(mp, cpu);
+		spin_lock(&xfs_mount_list_lock);
+	}
+	spin_unlock(&xfs_mount_list_lock);
+	return 0;
+}
+
+static int __init
+xfs_cpu_hotplug_init(void)
+{
+	int	error;
+
+	error = cpuhp_setup_state_nocalls(CPUHP_XFS_DEAD, "xfs:dead", NULL,
+			xfs_cpu_dead);
+	if (error < 0)
+		xfs_alert(NULL,
+"Failed to initialise CPU hotplug, error %d. XFS is non-functional.",
+			error);
+	return error;
+}
+
+static void
+xfs_cpu_hotplug_destroy(void)
+{
+	cpuhp_remove_state_nocalls(CPUHP_XFS_DEAD);
+}
+
+#else /* !CONFIG_HOTPLUG_CPU */
+static inline int xfs_cpu_hotplug_init(void) { return 0; }
+static inline void xfs_cpu_hotplug_destroy(void) {}
+#endif
+
 STATIC int __init
 init_xfs_fs(void)
 {
@@ -2130,10 +2229,14 @@ init_xfs_fs(void)
 
 	xfs_dir_startup();
 
-	error = xfs_init_zones();
+	error = xfs_cpu_hotplug_init();
 	if (error)
 		goto out;
 
+	error = xfs_init_zones();
+	if (error)
+		goto out_destroy_hp;
+
 	error = xfs_init_workqueues();
 	if (error)
 		goto out_destroy_zones;
@@ -2213,6 +2316,8 @@ init_xfs_fs(void)
 	xfs_destroy_workqueues();
  out_destroy_zones:
 	xfs_destroy_zones();
+ out_destroy_hp:
+	xfs_cpu_hotplug_destroy();
  out:
 	return error;
 }
@@ -2235,6 +2340,7 @@ exit_xfs_fs(void)
 	xfs_destroy_workqueues();
 	xfs_destroy_zones();
 	xfs_uuid_table_free();
+	xfs_cpu_hotplug_destroy();
 }
 
 module_init(init_xfs_fs);
diff --git a/fs/xfs/xfs_symlink.c b/fs/xfs/xfs_symlink.c
index 1525636f4065..fc2c6a404647 100644
--- a/fs/xfs/xfs_symlink.c
+++ b/fs/xfs/xfs_symlink.c
@@ -63,7 +63,7 @@ xfs_readlink_bmap_ilocked(
 			byte_cnt = pathlen;
 
 		cur_chunk = bp->b_addr;
-		if (xfs_sb_version_hascrc(&mp->m_sb)) {
+		if (xfs_has_crc(mp)) {
 			if (!xfs_symlink_hdr_ok(ip->i_ino, offset,
 							byte_cnt, bp)) {
 				error = -EFSCORRUPTED;
@@ -107,7 +107,7 @@ xfs_readlink(
 
 	ASSERT(ip->i_df.if_format != XFS_DINODE_FMT_LOCAL);
 
-	if (XFS_FORCED_SHUTDOWN(mp))
+	if (xfs_is_shutdown(mp))
 		return -EIO;
 
 	xfs_ilock(ip, XFS_ILOCK_SHARED);
@@ -168,7 +168,7 @@ xfs_symlink(
 
 	trace_xfs_symlink(dp, link_name);
 
-	if (XFS_FORCED_SHUTDOWN(mp))
+	if (xfs_is_shutdown(mp))
 		return -EIO;
 
 	/*
@@ -321,9 +321,8 @@ xfs_symlink(
 	 * symlink transaction goes to disk before returning to
 	 * the user.
 	 */
-	if (mp->m_flags & (XFS_MOUNT_WSYNC|XFS_MOUNT_DIRSYNC)) {
+	if (xfs_has_wsync(mp) || xfs_has_dirsync(mp))
 		xfs_trans_set_sync(tp);
-	}
 
 	error = xfs_trans_commit(tp);
 	if (error)
@@ -445,7 +444,7 @@ xfs_inactive_symlink_rmt(
 	xfs_trans_log_inode(tp, ip, XFS_ILOG_CORE);
 	error = xfs_trans_commit(tp);
 	if (error) {
-		ASSERT(XFS_FORCED_SHUTDOWN(mp));
+		ASSERT(xfs_is_shutdown(mp));
 		goto error_unlock;
 	}
 
@@ -478,7 +477,7 @@ xfs_inactive_symlink(
 
 	trace_xfs_inactive_symlink(ip);
 
-	if (XFS_FORCED_SHUTDOWN(mp))
+	if (xfs_is_shutdown(mp))
 		return -EIO;
 
 	xfs_ilock(ip, XFS_ILOCK_EXCL);
diff --git a/fs/xfs/xfs_sysfs.c b/fs/xfs/xfs_sysfs.c
index f1bc88f4367c..18dc5eca6c04 100644
--- a/fs/xfs/xfs_sysfs.c
+++ b/fs/xfs/xfs_sysfs.c
@@ -10,6 +10,7 @@
 #include "xfs_log_format.h"
 #include "xfs_trans_resv.h"
 #include "xfs_sysfs.h"
+#include "xfs_log.h"
 #include "xfs_log_priv.h"
 #include "xfs_mount.h"
 
diff --git a/fs/xfs/xfs_trace.c b/fs/xfs/xfs_trace.c
index 7e01e00550ac..d269ef57ff01 100644
--- a/fs/xfs/xfs_trace.c
+++ b/fs/xfs/xfs_trace.c
@@ -20,6 +20,7 @@
 #include "xfs_bmap.h"
 #include "xfs_attr.h"
 #include "xfs_trans.h"
+#include "xfs_log.h"
 #include "xfs_log_priv.h"
 #include "xfs_buf_item.h"
 #include "xfs_quota.h"
@@ -32,6 +33,7 @@
 #include "xfs_icache.h"
 #include "xfs_ag.h"
 #include "xfs_ag_resv.h"
+#include "xfs_error.h"
 
 /*
  * We include this last to have the helpers above available for the trace
diff --git a/fs/xfs/xfs_trace.h b/fs/xfs/xfs_trace.h
index 19260291ff8b..1033a95fbf8e 100644
--- a/fs/xfs/xfs_trace.h
+++ b/fs/xfs/xfs_trace.h
@@ -2,6 +2,41 @@
 /*
  * Copyright (c) 2009, Christoph Hellwig
  * All Rights Reserved.
+ *
+ * NOTE: none of these tracepoints shall be considered a stable kernel ABI
+ * as they can change at any time.
+ *
+ * Current conventions for printing numbers measuring specific units:
+ *
+ * agno: allocation group number
+ *
+ * agino: per-AG inode number
+ * ino: filesystem inode number
+ *
+ * agbno: per-AG block number in fs blocks
+ * startblock: physical block number for file mappings.  This is either a
+ *             segmented fsblock for data device mappings, or a rfsblock
+ *             for realtime device mappings
+ * fsbcount: number of blocks in an extent, in fs blocks
+ *
+ * daddr: physical block number in 512b blocks
+ * bbcount: number of blocks in a physical extent, in 512b blocks
+ *
+ * owner: reverse-mapping owner, usually inodes
+ *
+ * fileoff: file offset, in fs blocks
+ * pos: file offset, in bytes
+ * bytecount: number of bytes
+ *
+ * disize: ondisk file size, in bytes
+ * isize: incore file size, in bytes
+ *
+ * forkoff: inode fork offset, in bytes
+ *
+ * ireccount: number of inode records
+ *
+ * Numbers describing space allocations (blocks, extents, inodes) should be
+ * formatted in hexadecimal.
  */
 #undef TRACE_SYSTEM
 #define TRACE_SYSTEM xfs
@@ -139,7 +174,7 @@ DECLARE_EVENT_CLASS(xfs_perag_class,
 		__entry->refcount = refcount;
 		__entry->caller_ip = caller_ip;
 	),
-	TP_printk("dev %d:%d agno %u refcount %d caller %pS",
+	TP_printk("dev %d:%d agno 0x%x refcount %d caller %pS",
 		  MAJOR(__entry->dev), MINOR(__entry->dev),
 		  __entry->agno,
 		  __entry->refcount,
@@ -157,6 +192,84 @@ DEFINE_PERAG_REF_EVENT(xfs_perag_put);
 DEFINE_PERAG_REF_EVENT(xfs_perag_set_inode_tag);
 DEFINE_PERAG_REF_EVENT(xfs_perag_clear_inode_tag);
 
+TRACE_EVENT(xfs_inodegc_worker,
+	TP_PROTO(struct xfs_mount *mp, unsigned int shrinker_hits),
+	TP_ARGS(mp, shrinker_hits),
+	TP_STRUCT__entry(
+		__field(dev_t, dev)
+		__field(unsigned int, shrinker_hits)
+	),
+	TP_fast_assign(
+		__entry->dev = mp->m_super->s_dev;
+		__entry->shrinker_hits = shrinker_hits;
+	),
+	TP_printk("dev %d:%d shrinker_hits %u",
+		  MAJOR(__entry->dev), MINOR(__entry->dev),
+		  __entry->shrinker_hits)
+);
+
+DECLARE_EVENT_CLASS(xfs_fs_class,
+	TP_PROTO(struct xfs_mount *mp, void *caller_ip),
+	TP_ARGS(mp, caller_ip),
+	TP_STRUCT__entry(
+		__field(dev_t, dev)
+		__field(unsigned long long, mflags)
+		__field(unsigned long, opstate)
+		__field(unsigned long, sbflags)
+		__field(void *, caller_ip)
+	),
+	TP_fast_assign(
+		if (mp) {
+			__entry->dev = mp->m_super->s_dev;
+			__entry->mflags = mp->m_features;
+			__entry->opstate = mp->m_opstate;
+			__entry->sbflags = mp->m_super->s_flags;
+		}
+		__entry->caller_ip = caller_ip;
+	),
+	TP_printk("dev %d:%d m_features 0x%llx opstate (%s) s_flags 0x%lx caller %pS",
+		  MAJOR(__entry->dev), MINOR(__entry->dev),
+		  __entry->mflags,
+		  __print_flags(__entry->opstate, "|", XFS_OPSTATE_STRINGS),
+		  __entry->sbflags,
+		  __entry->caller_ip)
+);
+
+#define DEFINE_FS_EVENT(name)	\
+DEFINE_EVENT(xfs_fs_class, name,					\
+	TP_PROTO(struct xfs_mount *mp, void *caller_ip), \
+	TP_ARGS(mp, caller_ip))
+DEFINE_FS_EVENT(xfs_inodegc_flush);
+DEFINE_FS_EVENT(xfs_inodegc_start);
+DEFINE_FS_EVENT(xfs_inodegc_stop);
+DEFINE_FS_EVENT(xfs_inodegc_queue);
+DEFINE_FS_EVENT(xfs_inodegc_throttle);
+DEFINE_FS_EVENT(xfs_fs_sync_fs);
+DEFINE_FS_EVENT(xfs_blockgc_start);
+DEFINE_FS_EVENT(xfs_blockgc_stop);
+DEFINE_FS_EVENT(xfs_blockgc_worker);
+DEFINE_FS_EVENT(xfs_blockgc_flush_all);
+
+TRACE_EVENT(xfs_inodegc_shrinker_scan,
+	TP_PROTO(struct xfs_mount *mp, struct shrink_control *sc,
+		 void *caller_ip),
+	TP_ARGS(mp, sc, caller_ip),
+	TP_STRUCT__entry(
+		__field(dev_t, dev)
+		__field(unsigned long, nr_to_scan)
+		__field(void *, caller_ip)
+	),
+	TP_fast_assign(
+		__entry->dev = mp->m_super->s_dev;
+		__entry->nr_to_scan = sc->nr_to_scan;
+		__entry->caller_ip = caller_ip;
+	),
+	TP_printk("dev %d:%d nr_to_scan %lu caller %pS",
+		  MAJOR(__entry->dev), MINOR(__entry->dev),
+		  __entry->nr_to_scan,
+		  __entry->caller_ip)
+);
+
 DECLARE_EVENT_CLASS(xfs_ag_class,
 	TP_PROTO(struct xfs_mount *mp, xfs_agnumber_t agno),
 	TP_ARGS(mp, agno),
@@ -168,7 +281,7 @@ DECLARE_EVENT_CLASS(xfs_ag_class,
 		__entry->dev = mp->m_super->s_dev;
 		__entry->agno = agno;
 	),
-	TP_printk("dev %d:%d agno %u",
+	TP_printk("dev %d:%d agno 0x%x",
 		  MAJOR(__entry->dev), MINOR(__entry->dev),
 		  __entry->agno)
 );
@@ -268,7 +381,7 @@ DECLARE_EVENT_CLASS(xfs_bmap_class,
 		__entry->caller_ip = caller_ip;
 	),
 	TP_printk("dev %d:%d ino 0x%llx state %s cur %p/%d "
-		  "offset %lld block %lld count %lld flag %d caller %pS",
+		  "fileoff 0x%llx startblock 0x%llx fsbcount 0x%llx flag %d caller %pS",
 		  MAJOR(__entry->dev), MINOR(__entry->dev),
 		  __entry->ino,
 		  __print_flags(__entry->bmap_state, "|", XFS_BMAP_EXT_FLAGS),
@@ -308,10 +421,7 @@ DECLARE_EVENT_CLASS(xfs_buf_class,
 	),
 	TP_fast_assign(
 		__entry->dev = bp->b_target->bt_dev;
-		if (bp->b_bn == XFS_BUF_DADDR_NULL)
-			__entry->bno = bp->b_maps[0].bm_bn;
-		else
-			__entry->bno = bp->b_bn;
+		__entry->bno = xfs_buf_daddr(bp);
 		__entry->nblks = bp->b_length;
 		__entry->hold = atomic_read(&bp->b_hold);
 		__entry->pincount = atomic_read(&bp->b_pin_count);
@@ -319,7 +429,7 @@ DECLARE_EVENT_CLASS(xfs_buf_class,
 		__entry->flags = bp->b_flags;
 		__entry->caller_ip = caller_ip;
 	),
-	TP_printk("dev %d:%d bno 0x%llx nblks 0x%x hold %d pincount %d "
+	TP_printk("dev %d:%d daddr 0x%llx bbcount 0x%x hold %d pincount %d "
 		  "lock %d flags %s caller %pS",
 		  MAJOR(__entry->dev), MINOR(__entry->dev),
 		  (unsigned long long)__entry->bno,
@@ -370,7 +480,7 @@ DECLARE_EVENT_CLASS(xfs_buf_flags_class,
 	TP_STRUCT__entry(
 		__field(dev_t, dev)
 		__field(xfs_daddr_t, bno)
-		__field(size_t, buffer_length)
+		__field(unsigned int, length)
 		__field(int, hold)
 		__field(int, pincount)
 		__field(unsigned, lockval)
@@ -379,19 +489,19 @@ DECLARE_EVENT_CLASS(xfs_buf_flags_class,
 	),
 	TP_fast_assign(
 		__entry->dev = bp->b_target->bt_dev;
-		__entry->bno = bp->b_bn;
-		__entry->buffer_length = BBTOB(bp->b_length);
+		__entry->bno = xfs_buf_daddr(bp);
+		__entry->length = bp->b_length;
 		__entry->flags = flags;
 		__entry->hold = atomic_read(&bp->b_hold);
 		__entry->pincount = atomic_read(&bp->b_pin_count);
 		__entry->lockval = bp->b_sema.count;
 		__entry->caller_ip = caller_ip;
 	),
-	TP_printk("dev %d:%d bno 0x%llx len 0x%zx hold %d pincount %d "
+	TP_printk("dev %d:%d daddr 0x%llx bbcount 0x%x hold %d pincount %d "
 		  "lock %d flags %s caller %pS",
 		  MAJOR(__entry->dev), MINOR(__entry->dev),
 		  (unsigned long long)__entry->bno,
-		  __entry->buffer_length,
+		  __entry->length,
 		  __entry->hold,
 		  __entry->pincount,
 		  __entry->lockval,
@@ -413,7 +523,7 @@ TRACE_EVENT(xfs_buf_ioerror,
 	TP_STRUCT__entry(
 		__field(dev_t, dev)
 		__field(xfs_daddr_t, bno)
-		__field(size_t, buffer_length)
+		__field(unsigned int, length)
 		__field(unsigned, flags)
 		__field(int, hold)
 		__field(int, pincount)
@@ -423,8 +533,8 @@ TRACE_EVENT(xfs_buf_ioerror,
 	),
 	TP_fast_assign(
 		__entry->dev = bp->b_target->bt_dev;
-		__entry->bno = bp->b_bn;
-		__entry->buffer_length = BBTOB(bp->b_length);
+		__entry->bno = xfs_buf_daddr(bp);
+		__entry->length = bp->b_length;
 		__entry->hold = atomic_read(&bp->b_hold);
 		__entry->pincount = atomic_read(&bp->b_pin_count);
 		__entry->lockval = bp->b_sema.count;
@@ -432,11 +542,11 @@ TRACE_EVENT(xfs_buf_ioerror,
 		__entry->flags = bp->b_flags;
 		__entry->caller_ip = caller_ip;
 	),
-	TP_printk("dev %d:%d bno 0x%llx len 0x%zx hold %d pincount %d "
+	TP_printk("dev %d:%d daddr 0x%llx bbcount 0x%x hold %d pincount %d "
 		  "lock %d error %d flags %s caller %pS",
 		  MAJOR(__entry->dev), MINOR(__entry->dev),
 		  (unsigned long long)__entry->bno,
-		  __entry->buffer_length,
+		  __entry->length,
 		  __entry->hold,
 		  __entry->pincount,
 		  __entry->lockval,
@@ -451,7 +561,7 @@ DECLARE_EVENT_CLASS(xfs_buf_item_class,
 	TP_STRUCT__entry(
 		__field(dev_t, dev)
 		__field(xfs_daddr_t, buf_bno)
-		__field(size_t, buf_len)
+		__field(unsigned int, buf_len)
 		__field(int, buf_hold)
 		__field(int, buf_pincount)
 		__field(int, buf_lockval)
@@ -466,15 +576,15 @@ DECLARE_EVENT_CLASS(xfs_buf_item_class,
 		__entry->bli_flags = bip->bli_flags;
 		__entry->bli_recur = bip->bli_recur;
 		__entry->bli_refcount = atomic_read(&bip->bli_refcount);
-		__entry->buf_bno = bip->bli_buf->b_bn;
-		__entry->buf_len = BBTOB(bip->bli_buf->b_length);
+		__entry->buf_bno = xfs_buf_daddr(bip->bli_buf);
+		__entry->buf_len = bip->bli_buf->b_length;
 		__entry->buf_flags = bip->bli_buf->b_flags;
 		__entry->buf_hold = atomic_read(&bip->bli_buf->b_hold);
 		__entry->buf_pincount = atomic_read(&bip->bli_buf->b_pin_count);
 		__entry->buf_lockval = bip->bli_buf->b_sema.count;
 		__entry->li_flags = bip->bli_item.li_flags;
 	),
-	TP_printk("dev %d:%d bno 0x%llx len 0x%zx hold %d pincount %d "
+	TP_printk("dev %d:%d daddr 0x%llx bbcount 0x%x hold %d pincount %d "
 		  "lock %d flags %s recur %d refcount %d bliflags %s "
 		  "liflags %s",
 		  MAJOR(__entry->dev), MINOR(__entry->dev),
@@ -534,7 +644,7 @@ DECLARE_EVENT_CLASS(xfs_filestream_class,
 		__entry->agno = agno;
 		__entry->streams = xfs_filestream_peek_ag(mp, agno);
 	),
-	TP_printk("dev %d:%d ino 0x%llx agno %u streams %d",
+	TP_printk("dev %d:%d ino 0x%llx agno 0x%x streams %d",
 		  MAJOR(__entry->dev), MINOR(__entry->dev),
 		  __entry->ino,
 		  __entry->agno,
@@ -568,7 +678,7 @@ TRACE_EVENT(xfs_filestream_pick,
 		__entry->free = free;
 		__entry->nscan = nscan;
 	),
-	TP_printk("dev %d:%d ino 0x%llx agno %u streams %d free %d nscan %d",
+	TP_printk("dev %d:%d ino 0x%llx agno 0x%x streams %d free %d nscan %d",
 		  MAJOR(__entry->dev), MINOR(__entry->dev),
 		  __entry->ino,
 		  __entry->agno,
@@ -616,14 +726,17 @@ DECLARE_EVENT_CLASS(xfs_inode_class,
 	TP_STRUCT__entry(
 		__field(dev_t, dev)
 		__field(xfs_ino_t, ino)
+		__field(unsigned long, iflags)
 	),
 	TP_fast_assign(
 		__entry->dev = VFS_I(ip)->i_sb->s_dev;
 		__entry->ino = ip->i_ino;
+		__entry->iflags = ip->i_flags;
 	),
-	TP_printk("dev %d:%d ino 0x%llx",
+	TP_printk("dev %d:%d ino 0x%llx iflags 0x%lx",
 		  MAJOR(__entry->dev), MINOR(__entry->dev),
-		  __entry->ino)
+		  __entry->ino,
+		  __entry->iflags)
 )
 
 #define DEFINE_INODE_EVENT(name) \
@@ -667,6 +780,10 @@ DEFINE_INODE_EVENT(xfs_inode_free_eofblocks_invalid);
 DEFINE_INODE_EVENT(xfs_inode_set_cowblocks_tag);
 DEFINE_INODE_EVENT(xfs_inode_clear_cowblocks_tag);
 DEFINE_INODE_EVENT(xfs_inode_free_cowblocks_invalid);
+DEFINE_INODE_EVENT(xfs_inode_set_reclaimable);
+DEFINE_INODE_EVENT(xfs_inode_reclaiming);
+DEFINE_INODE_EVENT(xfs_inode_set_need_inactive);
+DEFINE_INODE_EVENT(xfs_inode_inactivating);
 
 /*
  * ftrace's __print_symbolic requires that all enum values be wrapped in the
@@ -773,9 +890,12 @@ TRACE_EVENT(xfs_irec_merge_pre,
 		__entry->nagino = nagino;
 		__entry->nholemask = holemask;
 	),
-	TP_printk("dev %d:%d agno %d inobt (%u:0x%x) new (%u:0x%x)",
-		  MAJOR(__entry->dev), MINOR(__entry->dev), __entry->agno,
-		  __entry->agino, __entry->holemask, __entry->nagino,
+	TP_printk("dev %d:%d agno 0x%x agino 0x%x holemask 0x%x new_agino 0x%x new_holemask 0x%x",
+		  MAJOR(__entry->dev), MINOR(__entry->dev),
+		  __entry->agno,
+		  __entry->agino,
+		  __entry->holemask,
+		  __entry->nagino,
 		  __entry->nholemask)
 )
 
@@ -795,8 +915,11 @@ TRACE_EVENT(xfs_irec_merge_post,
 		__entry->agino = agino;
 		__entry->holemask = holemask;
 	),
-	TP_printk("dev %d:%d agno %d inobt (%u:0x%x)", MAJOR(__entry->dev),
-		  MINOR(__entry->dev), __entry->agno, __entry->agino,
+	TP_printk("dev %d:%d agno 0x%x agino 0x%x holemask 0x%x",
+		  MAJOR(__entry->dev),
+		  MINOR(__entry->dev),
+		  __entry->agno,
+		  __entry->agino,
 		  __entry->holemask)
 )
 
@@ -1301,7 +1424,7 @@ DECLARE_EVENT_CLASS(xfs_file_class,
 		__entry->offset = iocb->ki_pos;
 		__entry->count = iov_iter_count(iter);
 	),
-	TP_printk("dev %d:%d ino 0x%llx size 0x%llx offset 0x%llx count 0x%zx",
+	TP_printk("dev %d:%d ino 0x%llx disize 0x%llx pos 0x%llx bytecount 0x%zx",
 		  MAJOR(__entry->dev), MINOR(__entry->dev),
 		  __entry->ino,
 		  __entry->size,
@@ -1348,14 +1471,14 @@ DECLARE_EVENT_CLASS(xfs_imap_class,
 		__entry->startblock = irec ? irec->br_startblock : 0;
 		__entry->blockcount = irec ? irec->br_blockcount : 0;
 	),
-	TP_printk("dev %d:%d ino 0x%llx size 0x%llx offset 0x%llx count %zd "
-		  "fork %s startoff 0x%llx startblock %lld blockcount 0x%llx",
+	TP_printk("dev %d:%d ino 0x%llx disize 0x%llx pos 0x%llx bytecount 0x%zx "
+		  "fork %s startoff 0x%llx startblock 0x%llx fsbcount 0x%llx",
 		  MAJOR(__entry->dev), MINOR(__entry->dev),
 		  __entry->ino,
 		  __entry->size,
 		  __entry->offset,
 		  __entry->count,
-		  __entry->whichfork == XFS_COW_FORK ? "cow" : "data",
+		  __print_symbolic(__entry->whichfork, XFS_WHICHFORK_STRINGS),
 		  __entry->startoff,
 		  (int64_t)__entry->startblock,
 		  __entry->blockcount)
@@ -1391,7 +1514,7 @@ DECLARE_EVENT_CLASS(xfs_simple_io_class,
 		__entry->count = count;
 	),
 	TP_printk("dev %d:%d ino 0x%llx isize 0x%llx disize 0x%llx "
-		  "offset 0x%llx count %zd",
+		  "pos 0x%llx bytecount 0x%zx",
 		  MAJOR(__entry->dev), MINOR(__entry->dev),
 		  __entry->ino,
 		  __entry->isize,
@@ -1427,7 +1550,7 @@ DECLARE_EVENT_CLASS(xfs_itrunc_class,
 		__entry->size = ip->i_disk_size;
 		__entry->new_size = new_size;
 	),
-	TP_printk("dev %d:%d ino 0x%llx size 0x%llx new_size 0x%llx",
+	TP_printk("dev %d:%d ino 0x%llx disize 0x%llx new_size 0x%llx",
 		  MAJOR(__entry->dev), MINOR(__entry->dev),
 		  __entry->ino,
 		  __entry->size,
@@ -1458,7 +1581,7 @@ TRACE_EVENT(xfs_pagecache_inval,
 		__entry->start = start;
 		__entry->finish = finish;
 	),
-	TP_printk("dev %d:%d ino 0x%llx size 0x%llx start 0x%llx finish 0x%llx",
+	TP_printk("dev %d:%d ino 0x%llx disize 0x%llx start 0x%llx finish 0x%llx",
 		  MAJOR(__entry->dev), MINOR(__entry->dev),
 		  __entry->ino,
 		  __entry->size,
@@ -1467,14 +1590,14 @@ TRACE_EVENT(xfs_pagecache_inval,
 );
 
 TRACE_EVENT(xfs_bunmap,
-	TP_PROTO(struct xfs_inode *ip, xfs_fileoff_t bno, xfs_filblks_t len,
+	TP_PROTO(struct xfs_inode *ip, xfs_fileoff_t fileoff, xfs_filblks_t len,
 		 int flags, unsigned long caller_ip),
-	TP_ARGS(ip, bno, len, flags, caller_ip),
+	TP_ARGS(ip, fileoff, len, flags, caller_ip),
 	TP_STRUCT__entry(
 		__field(dev_t, dev)
 		__field(xfs_ino_t, ino)
 		__field(xfs_fsize_t, size)
-		__field(xfs_fileoff_t, bno)
+		__field(xfs_fileoff_t, fileoff)
 		__field(xfs_filblks_t, len)
 		__field(unsigned long, caller_ip)
 		__field(int, flags)
@@ -1483,17 +1606,17 @@ TRACE_EVENT(xfs_bunmap,
 		__entry->dev = VFS_I(ip)->i_sb->s_dev;
 		__entry->ino = ip->i_ino;
 		__entry->size = ip->i_disk_size;
-		__entry->bno = bno;
+		__entry->fileoff = fileoff;
 		__entry->len = len;
 		__entry->caller_ip = caller_ip;
 		__entry->flags = flags;
 	),
-	TP_printk("dev %d:%d ino 0x%llx size 0x%llx bno 0x%llx len 0x%llx"
+	TP_printk("dev %d:%d ino 0x%llx disize 0x%llx fileoff 0x%llx fsbcount 0x%llx"
 		  "flags %s caller %pS",
 		  MAJOR(__entry->dev), MINOR(__entry->dev),
 		  __entry->ino,
 		  __entry->size,
-		  __entry->bno,
+		  __entry->fileoff,
 		  __entry->len,
 		  __print_flags(__entry->flags, "|", XFS_BMAPI_FLAGS),
 		  (void *)__entry->caller_ip)
@@ -1516,7 +1639,7 @@ DECLARE_EVENT_CLASS(xfs_extent_busy_class,
 		__entry->agbno = agbno;
 		__entry->len = len;
 	),
-	TP_printk("dev %d:%d agno %u agbno %u len %u",
+	TP_printk("dev %d:%d agno 0x%x agbno 0x%x fsbcount 0x%x",
 		  MAJOR(__entry->dev), MINOR(__entry->dev),
 		  __entry->agno,
 		  __entry->agbno,
@@ -1554,7 +1677,7 @@ TRACE_EVENT(xfs_extent_busy_trim,
 		__entry->tbno = tbno;
 		__entry->tlen = tlen;
 	),
-	TP_printk("dev %d:%d agno %u agbno %u len %u tbno %u tlen %u",
+	TP_printk("dev %d:%d agno 0x%x agbno 0x%x fsbcount 0x%x found_agbno 0x%x found_fsbcount 0x%x",
 		  MAJOR(__entry->dev), MINOR(__entry->dev),
 		  __entry->agno,
 		  __entry->agbno,
@@ -1601,7 +1724,7 @@ DECLARE_EVENT_CLASS(xfs_agf_class,
 		__entry->longest = be32_to_cpu(agf->agf_longest);
 		__entry->caller_ip = caller_ip;
 	),
-	TP_printk("dev %d:%d agno %u flags %s length %u roots b %u c %u "
+	TP_printk("dev %d:%d agno 0x%x flags %s length %u roots b %u c %u "
 		  "levels b %u c %u flfirst %u fllast %u flcount %u "
 		  "freeblks %u longest %u caller %pS",
 		  MAJOR(__entry->dev), MINOR(__entry->dev),
@@ -1650,7 +1773,7 @@ TRACE_EVENT(xfs_free_extent,
 		__entry->haveleft = haveleft;
 		__entry->haveright = haveright;
 	),
-	TP_printk("dev %d:%d agno %u agbno %u len %u resv %d %s",
+	TP_printk("dev %d:%d agno 0x%x agbno 0x%x fsbcount 0x%x resv %d %s",
 		  MAJOR(__entry->dev), MINOR(__entry->dev),
 		  __entry->agno,
 		  __entry->agbno,
@@ -1707,7 +1830,7 @@ DECLARE_EVENT_CLASS(xfs_alloc_class,
 		__entry->datatype = args->datatype;
 		__entry->firstblock = args->tp->t_firstblock;
 	),
-	TP_printk("dev %d:%d agno %u agbno %u minlen %u maxlen %u mod %u "
+	TP_printk("dev %d:%d agno 0x%x agbno 0x%x minlen %u maxlen %u mod %u "
 		  "prod %u minleft %u total %u alignment %u minalignslop %u "
 		  "len %u type %s otype %s wasdel %d wasfromfl %d resv %d "
 		  "datatype 0x%x firstblock 0x%llx",
@@ -1785,7 +1908,7 @@ TRACE_EVENT(xfs_alloc_cur_check,
 		__entry->diff = diff;
 		__entry->new = new;
 	),
-	TP_printk("dev %d:%d btree %s bno 0x%x len 0x%x diff 0x%x new %d",
+	TP_printk("dev %d:%d btree %s agbno 0x%x fsbcount 0x%x diff 0x%x new %d",
 		  MAJOR(__entry->dev), MINOR(__entry->dev),
 		  __print_symbolic(__entry->btnum, XFS_BTNUM_STRINGS),
 		  __entry->bno, __entry->len, __entry->diff, __entry->new)
@@ -2060,7 +2183,7 @@ DECLARE_EVENT_CLASS(xfs_swap_extent_class,
 		__entry->fork_off = XFS_IFORK_BOFF(ip);
 	),
 	TP_printk("dev %d:%d ino 0x%llx (%s), %s format, num_extents %d, "
-		  "broot size %d, fork offset %d",
+		  "broot size %d, forkoff 0x%x",
 		  MAJOR(__entry->dev), MINOR(__entry->dev),
 		  __entry->ino,
 		  __print_symbolic(__entry->which, XFS_SWAPEXT_INODES),
@@ -2186,7 +2309,7 @@ DECLARE_EVENT_CLASS(xfs_log_recover_buf_item_class,
 		__entry->size = buf_f->blf_size;
 		__entry->map_size = buf_f->blf_map_size;
 	),
-	TP_printk("dev %d:%d blkno 0x%llx, len %u, flags 0x%x, size %d, "
+	TP_printk("dev %d:%d daddr 0x%llx, bbcount 0x%x, flags 0x%x, size %d, "
 			"map_size %d",
 		  MAJOR(__entry->dev), MINOR(__entry->dev),
 		  __entry->blkno,
@@ -2237,7 +2360,7 @@ DECLARE_EVENT_CLASS(xfs_log_recover_ino_item_class,
 		__entry->boffset = in_f->ilf_boffset;
 	),
 	TP_printk("dev %d:%d ino 0x%llx, size %u, fields 0x%x, asize %d, "
-			"dsize %d, blkno 0x%llx, len %d, boffset %d",
+			"dsize %d, daddr 0x%llx, bbcount 0x%x, boffset %d",
 		  MAJOR(__entry->dev), MINOR(__entry->dev),
 		  __entry->ino,
 		  __entry->size,
@@ -2278,10 +2401,14 @@ DECLARE_EVENT_CLASS(xfs_log_recover_icreate_item_class,
 		__entry->length = be32_to_cpu(in_f->icl_length);
 		__entry->gen = be32_to_cpu(in_f->icl_gen);
 	),
-	TP_printk("dev %d:%d agno %u agbno %u count %u isize %u length %u "
-		  "gen %u", MAJOR(__entry->dev), MINOR(__entry->dev),
-		  __entry->agno, __entry->agbno, __entry->count, __entry->isize,
-		  __entry->length, __entry->gen)
+	TP_printk("dev %d:%d agno 0x%x agbno 0x%x fsbcount 0x%x ireccount %u isize %u gen 0x%x",
+		  MAJOR(__entry->dev), MINOR(__entry->dev),
+		  __entry->agno,
+		  __entry->agbno,
+		  __entry->length,
+		  __entry->count,
+		  __entry->isize,
+		  __entry->gen)
 )
 #define DEFINE_LOG_RECOVER_ICREATE_ITEM(name) \
 DEFINE_EVENT(xfs_log_recover_icreate_item_class, name, \
@@ -2307,7 +2434,7 @@ DECLARE_EVENT_CLASS(xfs_discard_class,
 		__entry->agbno = agbno;
 		__entry->len = len;
 	),
-	TP_printk("dev %d:%d agno %u agbno %u len %u",
+	TP_printk("dev %d:%d agno 0x%x agbno 0x%x fsbcount 0x%x",
 		  MAJOR(__entry->dev), MINOR(__entry->dev),
 		  __entry->agno,
 		  __entry->agbno,
@@ -2350,7 +2477,7 @@ DECLARE_EVENT_CLASS(xfs_btree_cur_class,
 		__entry->level = level;
 		__entry->nlevels = cur->bc_nlevels;
 		__entry->ptr = cur->bc_ptrs[level];
-		__entry->daddr = bp ? bp->b_bn : -1;
+		__entry->daddr = bp ? xfs_buf_daddr(bp) : -1;
 	),
 	TP_printk("dev %d:%d btree %s level %d/%d ptr %d daddr 0x%llx",
 		  MAJOR(__entry->dev), MINOR(__entry->dev),
@@ -2466,7 +2593,7 @@ DECLARE_EVENT_CLASS(xfs_phys_extent_deferred_class,
 		__entry->agbno = agbno;
 		__entry->len = len;
 	),
-	TP_printk("dev %d:%d op %d agno %u agbno %u len %u",
+	TP_printk("dev %d:%d op %d agno 0x%x agbno 0x%x fsbcount 0x%x",
 		  MAJOR(__entry->dev), MINOR(__entry->dev),
 		  __entry->type,
 		  __entry->agno,
@@ -2513,13 +2640,13 @@ DECLARE_EVENT_CLASS(xfs_map_extent_deferred_class,
 		__entry->l_state = state;
 		__entry->op = op;
 	),
-	TP_printk("dev %d:%d op %d agno %u agbno %u owner %lld %s offset %llu len %llu state %d",
+	TP_printk("dev %d:%d op %d agno 0x%x agbno 0x%x owner 0x%llx %s fileoff 0x%llx fsbcount 0x%llx state %d",
 		  MAJOR(__entry->dev), MINOR(__entry->dev),
 		  __entry->op,
 		  __entry->agno,
 		  __entry->agbno,
 		  __entry->ino,
-		  __entry->whichfork == XFS_ATTR_FORK ? "attr" : "data",
+		  __print_symbolic(__entry->whichfork, XFS_WHICHFORK_STRINGS),
 		  __entry->l_loff,
 		  __entry->l_len,
 		  __entry->l_state)
@@ -2583,7 +2710,7 @@ DECLARE_EVENT_CLASS(xfs_rmap_class,
 		if (unwritten)
 			__entry->flags |= XFS_RMAP_UNWRITTEN;
 	),
-	TP_printk("dev %d:%d agno %u agbno %u len %u owner %lld offset %llu flags 0x%lx",
+	TP_printk("dev %d:%d agno 0x%x agbno 0x%x fsbcount 0x%x owner 0x%llx fileoff 0x%llx flags 0x%lx",
 		  MAJOR(__entry->dev), MINOR(__entry->dev),
 		  __entry->agno,
 		  __entry->agbno,
@@ -2616,7 +2743,7 @@ DECLARE_EVENT_CLASS(xfs_ag_error_class,
 		__entry->error = error;
 		__entry->caller_ip = caller_ip;
 	),
-	TP_printk("dev %d:%d agno %u error %d caller %pS",
+	TP_printk("dev %d:%d agno 0x%x error %d caller %pS",
 		  MAJOR(__entry->dev), MINOR(__entry->dev),
 		  __entry->agno,
 		  __entry->error,
@@ -2663,7 +2790,7 @@ DECLARE_EVENT_CLASS(xfs_rmapbt_class,
 		__entry->offset = offset;
 		__entry->flags = flags;
 	),
-	TP_printk("dev %d:%d agno %u agbno %u len %u owner %lld offset %llu flags 0x%x",
+	TP_printk("dev %d:%d agno 0x%x agbno 0x%x fsbcount 0x%x owner 0x%llx fileoff 0x%llx flags 0x%x",
 		  MAJOR(__entry->dev), MINOR(__entry->dev),
 		  __entry->agno,
 		  __entry->agbno,
@@ -2732,7 +2859,7 @@ DECLARE_EVENT_CLASS(xfs_ag_resv_class,
 		__entry->asked = r ? r->ar_asked : 0;
 		__entry->len = len;
 	),
-	TP_printk("dev %d:%d agno %u resv %d freeblks %u flcount %u "
+	TP_printk("dev %d:%d agno 0x%x resv %d freeblks %u flcount %u "
 		  "resv %u ask %u len %u",
 		  MAJOR(__entry->dev), MINOR(__entry->dev),
 		  __entry->agno,
@@ -2785,7 +2912,7 @@ DECLARE_EVENT_CLASS(xfs_ag_btree_lookup_class,
 		__entry->agbno = agbno;
 		__entry->dir = dir;
 	),
-	TP_printk("dev %d:%d agno %u agbno %u cmp %s(%d)",
+	TP_printk("dev %d:%d agno 0x%x agbno 0x%x cmp %s(%d)",
 		  MAJOR(__entry->dev), MINOR(__entry->dev),
 		  __entry->agno,
 		  __entry->agbno,
@@ -2818,7 +2945,7 @@ DECLARE_EVENT_CLASS(xfs_refcount_extent_class,
 		__entry->blockcount = irec->rc_blockcount;
 		__entry->refcount = irec->rc_refcount;
 	),
-	TP_printk("dev %d:%d agno %u agbno %u len %u refcount %u",
+	TP_printk("dev %d:%d agno 0x%x agbno 0x%x fsbcount 0x%x refcount %u",
 		  MAJOR(__entry->dev), MINOR(__entry->dev),
 		  __entry->agno,
 		  __entry->startblock,
@@ -2853,7 +2980,7 @@ DECLARE_EVENT_CLASS(xfs_refcount_extent_at_class,
 		__entry->refcount = irec->rc_refcount;
 		__entry->agbno = agbno;
 	),
-	TP_printk("dev %d:%d agno %u agbno %u len %u refcount %u @ agbno %u",
+	TP_printk("dev %d:%d agno 0x%x agbno 0x%x fsbcount 0x%x refcount %u @ agbno 0x%x",
 		  MAJOR(__entry->dev), MINOR(__entry->dev),
 		  __entry->agno,
 		  __entry->startblock,
@@ -2893,8 +3020,8 @@ DECLARE_EVENT_CLASS(xfs_refcount_double_extent_class,
 		__entry->i2_blockcount = i2->rc_blockcount;
 		__entry->i2_refcount = i2->rc_refcount;
 	),
-	TP_printk("dev %d:%d agno %u agbno %u len %u refcount %u -- "
-		  "agbno %u len %u refcount %u",
+	TP_printk("dev %d:%d agno 0x%x agbno 0x%x fsbcount 0x%x refcount %u -- "
+		  "agbno 0x%x fsbcount 0x%x refcount %u",
 		  MAJOR(__entry->dev), MINOR(__entry->dev),
 		  __entry->agno,
 		  __entry->i1_startblock,
@@ -2939,8 +3066,8 @@ DECLARE_EVENT_CLASS(xfs_refcount_double_extent_at_class,
 		__entry->i2_refcount = i2->rc_refcount;
 		__entry->agbno = agbno;
 	),
-	TP_printk("dev %d:%d agno %u agbno %u len %u refcount %u -- "
-		  "agbno %u len %u refcount %u @ agbno %u",
+	TP_printk("dev %d:%d agno 0x%x agbno 0x%x fsbcount 0x%x refcount %u -- "
+		  "agbno 0x%x fsbcount 0x%x refcount %u @ agbno 0x%x",
 		  MAJOR(__entry->dev), MINOR(__entry->dev),
 		  __entry->agno,
 		  __entry->i1_startblock,
@@ -2991,9 +3118,9 @@ DECLARE_EVENT_CLASS(xfs_refcount_triple_extent_class,
 		__entry->i3_blockcount = i3->rc_blockcount;
 		__entry->i3_refcount = i3->rc_refcount;
 	),
-	TP_printk("dev %d:%d agno %u agbno %u len %u refcount %u -- "
-		  "agbno %u len %u refcount %u -- "
-		  "agbno %u len %u refcount %u",
+	TP_printk("dev %d:%d agno 0x%x agbno 0x%x fsbcount 0x%x refcount %u -- "
+		  "agbno 0x%x fsbcount 0x%x refcount %u -- "
+		  "agbno 0x%x fsbcount 0x%x refcount %u",
 		  MAJOR(__entry->dev), MINOR(__entry->dev),
 		  __entry->agno,
 		  __entry->i1_startblock,
@@ -3080,7 +3207,7 @@ TRACE_EVENT(xfs_refcount_finish_one_leftover,
 		__entry->new_agbno = new_agbno;
 		__entry->new_len = new_len;
 	),
-	TP_printk("dev %d:%d type %d agno %u agbno %u len %u new_agbno %u new_len %u",
+	TP_printk("dev %d:%d type %d agno 0x%x agbno 0x%x fsbcount 0x%x new_agbno 0x%x new_fsbcount 0x%x",
 		  MAJOR(__entry->dev), MINOR(__entry->dev),
 		  __entry->type,
 		  __entry->agno,
@@ -3106,7 +3233,7 @@ DECLARE_EVENT_CLASS(xfs_inode_error_class,
 		__entry->error = error;
 		__entry->caller_ip = caller_ip;
 	),
-	TP_printk("dev %d:%d ino %llx error %d caller %pS",
+	TP_printk("dev %d:%d ino 0x%llx error %d caller %pS",
 		  MAJOR(__entry->dev), MINOR(__entry->dev),
 		  __entry->ino,
 		  __entry->error,
@@ -3132,7 +3259,7 @@ DECLARE_EVENT_CLASS(xfs_double_io_class,
 		__field(loff_t, src_isize)
 		__field(loff_t, src_disize)
 		__field(loff_t, src_offset)
-		__field(size_t, len)
+		__field(long long, len)
 		__field(xfs_ino_t, dest_ino)
 		__field(loff_t, dest_isize)
 		__field(loff_t, dest_disize)
@@ -3150,9 +3277,9 @@ DECLARE_EVENT_CLASS(xfs_double_io_class,
 		__entry->dest_disize = dest->i_disk_size;
 		__entry->dest_offset = doffset;
 	),
-	TP_printk("dev %d:%d count %zd "
-		  "ino 0x%llx isize 0x%llx disize 0x%llx offset 0x%llx -> "
-		  "ino 0x%llx isize 0x%llx disize 0x%llx offset 0x%llx",
+	TP_printk("dev %d:%d bytecount 0x%llx "
+		  "ino 0x%llx isize 0x%llx disize 0x%llx pos 0x%llx -> "
+		  "ino 0x%llx isize 0x%llx disize 0x%llx pos 0x%llx",
 		  MAJOR(__entry->dev), MINOR(__entry->dev),
 		  __entry->len,
 		  __entry->src_ino,
@@ -3191,7 +3318,7 @@ DECLARE_EVENT_CLASS(xfs_inode_irec_class,
 		__entry->pblk = irec->br_startblock;
 		__entry->state = irec->br_state;
 	),
-	TP_printk("dev %d:%d ino 0x%llx lblk 0x%llx len 0x%x pblk %llu st %d",
+	TP_printk("dev %d:%d ino 0x%llx fileoff 0x%llx fsbcount 0x%x startblock 0x%llx st %d",
 		  MAJOR(__entry->dev), MINOR(__entry->dev),
 		  __entry->ino,
 		  __entry->lblk,
@@ -3231,9 +3358,8 @@ TRACE_EVENT(xfs_reflink_remap_blocks,
 		__entry->dest_ino = dest->i_ino;
 		__entry->dest_lblk = doffset;
 	),
-	TP_printk("dev %d:%d len 0x%llx "
-		  "ino 0x%llx offset 0x%llx blocks -> "
-		  "ino 0x%llx offset 0x%llx blocks",
+	TP_printk("dev %d:%d fsbcount 0x%llx "
+		  "ino 0x%llx fileoff 0x%llx -> ino 0x%llx fileoff 0x%llx",
 		  MAJOR(__entry->dev), MINOR(__entry->dev),
 		  __entry->len,
 		  __entry->src_ino,
@@ -3272,9 +3398,7 @@ TRACE_EVENT(xfs_ioctl_clone,
 		__entry->dest_ino = dest->i_ino;
 		__entry->dest_isize = i_size_read(dest);
 	),
-	TP_printk("dev %d:%d "
-		  "ino 0x%lx isize 0x%llx -> "
-		  "ino 0x%lx isize 0x%llx",
+	TP_printk("dev %d:%d ino 0x%lx isize 0x%llx -> ino 0x%lx isize 0x%llx",
 		  MAJOR(__entry->dev), MINOR(__entry->dev),
 		  __entry->src_ino,
 		  __entry->src_isize,
@@ -3310,7 +3434,7 @@ DEFINE_INODE_ERROR_EVENT(xfs_swap_extent_rmap_error);
 /* fsmap traces */
 DECLARE_EVENT_CLASS(xfs_fsmap_class,
 	TP_PROTO(struct xfs_mount *mp, u32 keydev, xfs_agnumber_t agno,
-		 struct xfs_rmap_irec *rmap),
+		 const struct xfs_rmap_irec *rmap),
 	TP_ARGS(mp, keydev, agno, rmap),
 	TP_STRUCT__entry(
 		__field(dev_t, dev)
@@ -3332,7 +3456,7 @@ DECLARE_EVENT_CLASS(xfs_fsmap_class,
 		__entry->offset = rmap->rm_offset;
 		__entry->flags = rmap->rm_flags;
 	),
-	TP_printk("dev %d:%d keydev %d:%d agno %u bno %llu len %llu owner %lld offset %llu flags 0x%x",
+	TP_printk("dev %d:%d keydev %d:%d agno 0x%x startblock 0x%llx fsbcount 0x%llx owner 0x%llx fileoff 0x%llx flags 0x%x",
 		  MAJOR(__entry->dev), MINOR(__entry->dev),
 		  MAJOR(__entry->keydev), MINOR(__entry->keydev),
 		  __entry->agno,
@@ -3345,7 +3469,7 @@ DECLARE_EVENT_CLASS(xfs_fsmap_class,
 #define DEFINE_FSMAP_EVENT(name) \
 DEFINE_EVENT(xfs_fsmap_class, name, \
 	TP_PROTO(struct xfs_mount *mp, u32 keydev, xfs_agnumber_t agno, \
-		 struct xfs_rmap_irec *rmap), \
+		 const struct xfs_rmap_irec *rmap), \
 	TP_ARGS(mp, keydev, agno, rmap))
 DEFINE_FSMAP_EVENT(xfs_fsmap_low_key);
 DEFINE_FSMAP_EVENT(xfs_fsmap_high_key);
@@ -3372,7 +3496,7 @@ DECLARE_EVENT_CLASS(xfs_getfsmap_class,
 		__entry->offset = fsmap->fmr_offset;
 		__entry->flags = fsmap->fmr_flags;
 	),
-	TP_printk("dev %d:%d keydev %d:%d block %llu len %llu owner %lld offset %llu flags 0x%llx",
+	TP_printk("dev %d:%d keydev %d:%d daddr 0x%llx bbcount 0x%llx owner 0x%llx fileoff_daddr 0x%llx flags 0x%llx",
 		  MAJOR(__entry->dev), MINOR(__entry->dev),
 		  MAJOR(__entry->keydev), MINOR(__entry->keydev),
 		  __entry->block,
@@ -3471,7 +3595,7 @@ TRACE_EVENT(xfs_iunlink_update_bucket,
 		__entry->old_ptr = old_ptr;
 		__entry->new_ptr = new_ptr;
 	),
-	TP_printk("dev %d:%d agno %u bucket %u old 0x%x new 0x%x",
+	TP_printk("dev %d:%d agno 0x%x bucket %u old 0x%x new 0x%x",
 		  MAJOR(__entry->dev), MINOR(__entry->dev),
 		  __entry->agno,
 		  __entry->bucket,
@@ -3497,7 +3621,7 @@ TRACE_EVENT(xfs_iunlink_update_dinode,
 		__entry->old_ptr = old_ptr;
 		__entry->new_ptr = new_ptr;
 	),
-	TP_printk("dev %d:%d agno %u agino 0x%x old 0x%x new 0x%x",
+	TP_printk("dev %d:%d agno 0x%x agino 0x%x old 0x%x new 0x%x",
 		  MAJOR(__entry->dev), MINOR(__entry->dev),
 		  __entry->agno,
 		  __entry->agino,
@@ -3518,7 +3642,7 @@ DECLARE_EVENT_CLASS(xfs_ag_inode_class,
 		__entry->agno = XFS_INO_TO_AGNO(ip->i_mount, ip->i_ino);
 		__entry->agino = XFS_INO_TO_AGINO(ip->i_mount, ip->i_ino);
 	),
-	TP_printk("dev %d:%d agno %u agino %u",
+	TP_printk("dev %d:%d agno 0x%x agino 0x%x",
 		  MAJOR(__entry->dev), MINOR(__entry->dev),
 		  __entry->agno, __entry->agino)
 )
@@ -3570,7 +3694,7 @@ DECLARE_EVENT_CLASS(xfs_ag_corrupt_class,
 		__entry->agno = agno;
 		__entry->flags = flags;
 	),
-	TP_printk("dev %d:%d agno %u flags 0x%x",
+	TP_printk("dev %d:%d agno 0x%x flags 0x%x",
 		  MAJOR(__entry->dev), MINOR(__entry->dev),
 		  __entry->agno, __entry->flags)
 );
@@ -3621,7 +3745,7 @@ TRACE_EVENT(xfs_iwalk_ag,
 		__entry->agno = agno;
 		__entry->startino = startino;
 	),
-	TP_printk("dev %d:%d agno %d startino %u",
+	TP_printk("dev %d:%d agno 0x%x startino 0x%x",
 		  MAJOR(__entry->dev), MINOR(__entry->dev), __entry->agno,
 		  __entry->startino)
 )
@@ -3642,7 +3766,7 @@ TRACE_EVENT(xfs_iwalk_ag_rec,
 		__entry->startino = irec->ir_startino;
 		__entry->freemask = irec->ir_free;
 	),
-	TP_printk("dev %d:%d agno %d startino %u freemask 0x%llx",
+	TP_printk("dev %d:%d agno 0x%x startino 0x%x freemask 0x%llx",
 		  MAJOR(__entry->dev), MINOR(__entry->dev), __entry->agno,
 		  __entry->startino, __entry->freemask)
 )
@@ -3689,8 +3813,6 @@ DEFINE_EVENT(xfs_kmem_class, name, \
 	TP_PROTO(ssize_t size, int flags, unsigned long caller_ip), \
 	TP_ARGS(size, flags, caller_ip))
 DEFINE_KMEM_EVENT(kmem_alloc);
-DEFINE_KMEM_EVENT(kmem_alloc_io);
-DEFINE_KMEM_EVENT(kmem_alloc_large);
 
 TRACE_EVENT(xfs_check_new_dalign,
 	TP_PROTO(struct xfs_mount *mp, int new_dalign, xfs_ino_t calc_rootino),
@@ -3707,7 +3829,7 @@ TRACE_EVENT(xfs_check_new_dalign,
 		__entry->sb_rootino = mp->m_sb.sb_rootino;
 		__entry->calc_rootino = calc_rootino;
 	),
-	TP_printk("dev %d:%d new_dalign %d sb_rootino %llu calc_rootino %llu",
+	TP_printk("dev %d:%d new_dalign %d sb_rootino 0x%llx calc_rootino 0x%llx",
 		  MAJOR(__entry->dev), MINOR(__entry->dev),
 		  __entry->new_dalign, __entry->sb_rootino,
 		  __entry->calc_rootino)
@@ -3732,7 +3854,7 @@ TRACE_EVENT(xfs_btree_commit_afakeroot,
 		__entry->levels = cur->bc_ag.afake->af_levels;
 		__entry->blocks = cur->bc_ag.afake->af_blocks;
 	),
-	TP_printk("dev %d:%d btree %s ag %u levels %u blocks %u root %u",
+	TP_printk("dev %d:%d btree %s agno 0x%x levels %u blocks %u root %u",
 		  MAJOR(__entry->dev), MINOR(__entry->dev),
 		  __print_symbolic(__entry->btnum, XFS_BTNUM_STRINGS),
 		  __entry->agno,
@@ -3764,12 +3886,12 @@ TRACE_EVENT(xfs_btree_commit_ifakeroot,
 		__entry->blocks = cur->bc_ino.ifake->if_blocks;
 		__entry->whichfork = cur->bc_ino.whichfork;
 	),
-	TP_printk("dev %d:%d btree %s ag %u agino %u whichfork %s levels %u blocks %u",
+	TP_printk("dev %d:%d btree %s agno 0x%x agino 0x%x whichfork %s levels %u blocks %u",
 		  MAJOR(__entry->dev), MINOR(__entry->dev),
 		  __print_symbolic(__entry->btnum, XFS_BTNUM_STRINGS),
 		  __entry->agno,
 		  __entry->agino,
-		  __entry->whichfork == XFS_ATTR_FORK ? "attr" : "data",
+		  __print_symbolic(__entry->whichfork, XFS_WHICHFORK_STRINGS),
 		  __entry->levels,
 		  __entry->blocks)
 )
@@ -3847,7 +3969,7 @@ TRACE_EVENT(xfs_btree_bload_block,
 		}
 		__entry->nr_records = nr_records;
 	),
-	TP_printk("dev %d:%d btree %s level %u block %llu/%llu fsb (%u/%u) recs %u",
+	TP_printk("dev %d:%d btree %s level %u block %llu/%llu agno 0x%x agbno 0x%x recs %u",
 		  MAJOR(__entry->dev), MINOR(__entry->dev),
 		  __print_symbolic(__entry->btnum, XFS_BTNUM_STRINGS),
 		  __entry->level,
@@ -3934,7 +4056,6 @@ TRACE_DEFINE_ENUM(XLOG_STATE_SYNCING);
 TRACE_DEFINE_ENUM(XLOG_STATE_DONE_SYNC);
 TRACE_DEFINE_ENUM(XLOG_STATE_CALLBACK);
 TRACE_DEFINE_ENUM(XLOG_STATE_DIRTY);
-TRACE_DEFINE_ENUM(XLOG_STATE_IOERROR);
 
 DECLARE_EVENT_CLASS(xlog_iclog_class,
 	TP_PROTO(struct xlog_in_core *iclog, unsigned long caller_ip),
@@ -3990,6 +4111,57 @@ DEFINE_ICLOG_EVENT(xlog_iclog_want_sync);
 DEFINE_ICLOG_EVENT(xlog_iclog_wait_on);
 DEFINE_ICLOG_EVENT(xlog_iclog_write);
 
+DECLARE_EVENT_CLASS(xfs_das_state_class,
+	TP_PROTO(int das, struct xfs_inode *ip),
+	TP_ARGS(das, ip),
+	TP_STRUCT__entry(
+		__field(int, das)
+		__field(xfs_ino_t, ino)
+	),
+	TP_fast_assign(
+		__entry->das = das;
+		__entry->ino = ip->i_ino;
+	),
+	TP_printk("state change %d ino 0x%llx",
+		  __entry->das, __entry->ino)
+)
+
+#define DEFINE_DAS_STATE_EVENT(name) \
+DEFINE_EVENT(xfs_das_state_class, name, \
+	TP_PROTO(int das, struct xfs_inode *ip), \
+	TP_ARGS(das, ip))
+DEFINE_DAS_STATE_EVENT(xfs_attr_sf_addname_return);
+DEFINE_DAS_STATE_EVENT(xfs_attr_set_iter_return);
+DEFINE_DAS_STATE_EVENT(xfs_attr_node_addname_return);
+DEFINE_DAS_STATE_EVENT(xfs_attr_remove_iter_return);
+DEFINE_DAS_STATE_EVENT(xfs_attr_rmtval_remove_return);
+
+TRACE_EVENT(xfs_force_shutdown,
+	TP_PROTO(struct xfs_mount *mp, int ptag, int flags, const char *fname,
+		 int line_num),
+	TP_ARGS(mp, ptag, flags, fname, line_num),
+	TP_STRUCT__entry(
+		__field(dev_t, dev)
+		__field(int, ptag)
+		__field(int, flags)
+		__string(fname, fname)
+		__field(int, line_num)
+	),
+	TP_fast_assign(
+		__entry->dev = mp->m_super->s_dev;
+		__entry->ptag = ptag;
+		__entry->flags = flags;
+		__assign_str(fname, fname);
+		__entry->line_num = line_num;
+	),
+	TP_printk("dev %d:%d tag %s flags %s file %s line_num %d",
+		  MAJOR(__entry->dev), MINOR(__entry->dev),
+		__print_flags(__entry->ptag, "|", XFS_PTAG_STRINGS),
+		__print_flags(__entry->flags, "|", XFS_SHUTDOWN_STRINGS),
+		__get_str(fname),
+		__entry->line_num)
+);
+
 #endif /* _TRACE_XFS_H */
 
 #undef TRACE_INCLUDE_PATH
diff --git a/fs/xfs/xfs_trans.c b/fs/xfs/xfs_trans.c
index 87bffd12c20c..67dec11e34c7 100644
--- a/fs/xfs/xfs_trans.c
+++ b/fs/xfs/xfs_trans.c
@@ -9,7 +9,6 @@
 #include "xfs_shared.h"
 #include "xfs_format.h"
 #include "xfs_log_format.h"
-#include "xfs_log_priv.h"
 #include "xfs_trans_resv.h"
 #include "xfs_mount.h"
 #include "xfs_extent_busy.h"
@@ -17,6 +16,7 @@
 #include "xfs_trans.h"
 #include "xfs_trans_priv.h"
 #include "xfs_log.h"
+#include "xfs_log_priv.h"
 #include "xfs_trace.h"
 #include "xfs_error.h"
 #include "xfs_defer.h"
@@ -275,7 +275,7 @@ retry:
 	WARN_ON(resp->tr_logres > 0 &&
 		mp->m_super->s_writers.frozen == SB_FREEZE_COMPLETE);
 	ASSERT(!(flags & XFS_TRANS_RES_FDBLKS) ||
-	       xfs_sb_version_haslazysbcount(&mp->m_sb));
+	       xfs_has_lazysbcount(mp));
 
 	tp->t_magic = XFS_TRANS_HEADER_MAGIC;
 	tp->t_flags = flags;
@@ -295,10 +295,7 @@ retry:
 		 * Do not perform a synchronous scan because callers can hold
 		 * other locks.
 		 */
-		error = xfs_blockgc_free_space(mp, NULL);
-		if (error)
-			return error;
-
+		xfs_blockgc_flush_all(mp);
 		want_retry = false;
 		goto retry;
 	}
@@ -367,12 +364,12 @@ xfs_trans_mod_sb(
 	switch (field) {
 	case XFS_TRANS_SB_ICOUNT:
 		tp->t_icount_delta += delta;
-		if (xfs_sb_version_haslazysbcount(&mp->m_sb))
+		if (xfs_has_lazysbcount(mp))
 			flags &= ~XFS_TRANS_SB_DIRTY;
 		break;
 	case XFS_TRANS_SB_IFREE:
 		tp->t_ifree_delta += delta;
-		if (xfs_sb_version_haslazysbcount(&mp->m_sb))
+		if (xfs_has_lazysbcount(mp))
 			flags &= ~XFS_TRANS_SB_DIRTY;
 		break;
 	case XFS_TRANS_SB_FDBLOCKS:
@@ -401,7 +398,7 @@ xfs_trans_mod_sb(
 			delta -= blkres_delta;
 		}
 		tp->t_fdblocks_delta += delta;
-		if (xfs_sb_version_haslazysbcount(&mp->m_sb))
+		if (xfs_has_lazysbcount(mp))
 			flags &= ~XFS_TRANS_SB_DIRTY;
 		break;
 	case XFS_TRANS_SB_RES_FDBLOCKS:
@@ -411,7 +408,7 @@ xfs_trans_mod_sb(
 		 * be applied to the on-disk superblock.
 		 */
 		tp->t_res_fdblocks_delta += delta;
-		if (xfs_sb_version_haslazysbcount(&mp->m_sb))
+		if (xfs_has_lazysbcount(mp))
 			flags &= ~XFS_TRANS_SB_DIRTY;
 		break;
 	case XFS_TRANS_SB_FREXTENTS:
@@ -490,7 +487,7 @@ xfs_trans_apply_sb_deltas(
 	/*
 	 * Only update the superblock counters if we are logging them
 	 */
-	if (!xfs_sb_version_haslazysbcount(&(tp->t_mountp->m_sb))) {
+	if (!xfs_has_lazysbcount((tp->t_mountp))) {
 		if (tp->t_icount_delta)
 			be64_add_cpu(&sbp->sb_icount, tp->t_icount_delta);
 		if (tp->t_ifree_delta)
@@ -588,7 +585,7 @@ xfs_trans_unreserve_and_mod_sb(
 	if (tp->t_blk_res > 0)
 		blkdelta = tp->t_blk_res;
 	if ((tp->t_fdblocks_delta != 0) &&
-	    (xfs_sb_version_haslazysbcount(&mp->m_sb) ||
+	    (xfs_has_lazysbcount(mp) ||
 	     (tp->t_flags & XFS_TRANS_SB_DIRTY)))
 	        blkdelta += tp->t_fdblocks_delta;
 
@@ -598,7 +595,7 @@ xfs_trans_unreserve_and_mod_sb(
 	    (tp->t_flags & XFS_TRANS_SB_DIRTY))
 		rtxdelta += tp->t_frextents_delta;
 
-	if (xfs_sb_version_haslazysbcount(&mp->m_sb) ||
+	if (xfs_has_lazysbcount(mp) ||
 	     (tp->t_flags & XFS_TRANS_SB_DIRTY)) {
 		idelta = tp->t_icount_delta;
 		ifreedelta = tp->t_ifree_delta;
@@ -778,7 +775,7 @@ xfs_trans_committed_bulk(
 		 * object into the AIL as we are in a shutdown situation.
 		 */
 		if (aborted) {
-			ASSERT(XFS_FORCED_SHUTDOWN(ailp->ail_mount));
+			ASSERT(xfs_is_shutdown(ailp->ail_mount));
 			if (lip->li_ops->iop_unpin)
 				lip->li_ops->iop_unpin(lip, 1);
 			continue;
@@ -867,7 +864,7 @@ __xfs_trans_commit(
 	if (!(tp->t_flags & XFS_TRANS_DIRTY))
 		goto out_unreserve;
 
-	if (XFS_FORCED_SHUTDOWN(mp)) {
+	if (xfs_is_shutdown(mp)) {
 		error = -EIO;
 		goto out_unreserve;
 	}
@@ -908,7 +905,7 @@ out_unreserve:
 	 */
 	xfs_trans_unreserve_and_mod_dquots(tp);
 	if (tp->t_ticket) {
-		if (regrant && !XLOG_FORCED_SHUTDOWN(mp->m_log))
+		if (regrant && !xlog_is_shutdown(mp->m_log))
 			xfs_log_ticket_regrant(mp->m_log, tp->t_ticket);
 		else
 			xfs_log_ticket_ungrant(mp->m_log, tp->t_ticket);
@@ -953,12 +950,12 @@ xfs_trans_cancel(
 	 * filesystem.  This happens in paths where we detect
 	 * corruption and decide to give up.
 	 */
-	if (dirty && !XFS_FORCED_SHUTDOWN(mp)) {
+	if (dirty && !xfs_is_shutdown(mp)) {
 		XFS_ERROR_REPORT("xfs_trans_cancel", XFS_ERRLEVEL_LOW, mp);
 		xfs_force_shutdown(mp, SHUTDOWN_CORRUPT_INCORE);
 	}
 #ifdef DEBUG
-	if (!dirty && !XFS_FORCED_SHUTDOWN(mp)) {
+	if (!dirty && !xfs_is_shutdown(mp)) {
 		struct xfs_log_item *lip;
 
 		list_for_each_entry(lip, &tp->t_items, li_trans)
diff --git a/fs/xfs/xfs_trans_ail.c b/fs/xfs/xfs_trans_ail.c
index dbb69b4bf3ed..2a8c8dc54c95 100644
--- a/fs/xfs/xfs_trans_ail.c
+++ b/fs/xfs/xfs_trans_ail.c
@@ -17,6 +17,7 @@
 #include "xfs_errortag.h"
 #include "xfs_error.h"
 #include "xfs_log.h"
+#include "xfs_log_priv.h"
 
 #ifdef DEBUG
 /*
@@ -429,8 +430,12 @@ xfsaild_push(
 
 	/*
 	 * If we encountered pinned items or did not finish writing out all
-	 * buffers the last time we ran, force the log first and wait for it
-	 * before pushing again.
+	 * buffers the last time we ran, force a background CIL push to get the
+	 * items unpinned in the near future. We do not wait on the CIL push as
+	 * that could stall us for seconds if there is enough background IO
+	 * load. Stalling for that long when the tail of the log is pinned and
+	 * needs flushing will hard stop the transaction subsystem when log
+	 * space runs out.
 	 */
 	if (ailp->ail_log_flush && ailp->ail_last_pushed_lsn == 0 &&
 	    (!list_empty_careful(&ailp->ail_buf_list) ||
@@ -438,7 +443,7 @@ xfsaild_push(
 		ailp->ail_log_flush = 0;
 
 		XFS_STATS_INC(mp, xs_push_ail_flush);
-		xfs_log_force(mp, XFS_LOG_SYNC);
+		xlog_cil_flush(mp->m_log);
 	}
 
 	spin_lock(&ailp->ail_lock);
@@ -615,7 +620,7 @@ xfsaild(
 			 * opportunity to release such buffers from the queue.
 			 */
 			ASSERT(list_empty(&ailp->ail_buf_list) ||
-			       XFS_FORCED_SHUTDOWN(ailp->ail_mount));
+			       xfs_is_shutdown(ailp->ail_mount));
 			xfs_buf_delwri_cancel(&ailp->ail_buf_list);
 			break;
 		}
@@ -678,7 +683,7 @@ xfs_ail_push(
 	struct xfs_log_item	*lip;
 
 	lip = xfs_ail_min(ailp);
-	if (!lip || XFS_FORCED_SHUTDOWN(ailp->ail_mount) ||
+	if (!lip || xfs_is_shutdown(ailp->ail_mount) ||
 	    XFS_LSN_CMP(threshold_lsn, ailp->ail_target) <= 0)
 		return;
 
@@ -743,7 +748,7 @@ xfs_ail_update_finish(
 		return;
 	}
 
-	if (!XFS_FORCED_SHUTDOWN(mp))
+	if (!xfs_is_shutdown(mp))
 		xlog_assign_tail_lsn_locked(mp);
 
 	if (list_empty(&ailp->ail_head))
@@ -863,7 +868,7 @@ xfs_trans_ail_delete(
 	spin_lock(&ailp->ail_lock);
 	if (!test_bit(XFS_LI_IN_AIL, &lip->li_flags)) {
 		spin_unlock(&ailp->ail_lock);
-		if (shutdown_type && !XFS_FORCED_SHUTDOWN(mp)) {
+		if (shutdown_type && !xfs_is_shutdown(mp)) {
 			xfs_alert_tag(mp, XFS_PTAG_AILDELETE,
 	"%s: attempting to delete a log item that is not in the AIL",
 					__func__);
diff --git a/fs/xfs/xfs_trans_buf.c b/fs/xfs/xfs_trans_buf.c
index d11d032da0b4..6549e50d852c 100644
--- a/fs/xfs/xfs_trans_buf.c
+++ b/fs/xfs/xfs_trans_buf.c
@@ -38,7 +38,7 @@ xfs_trans_buf_item_match(
 		blip = (struct xfs_buf_log_item *)lip;
 		if (blip->bli_item.li_type == XFS_LI_BUF &&
 		    blip->bli_buf->b_target == target &&
-		    XFS_BUF_ADDR(blip->bli_buf) == map[0].bm_bn &&
+		    xfs_buf_daddr(blip->bli_buf) == map[0].bm_bn &&
 		    blip->bli_buf->b_length == len) {
 			ASSERT(blip->bli_buf->b_map_count == nmaps);
 			return blip->bli_buf;
@@ -138,7 +138,7 @@ xfs_trans_get_buf_map(
 	bp = xfs_trans_buf_item_match(tp, target, map, nmaps);
 	if (bp != NULL) {
 		ASSERT(xfs_buf_islocked(bp));
-		if (XFS_FORCED_SHUTDOWN(tp->t_mountp)) {
+		if (xfs_is_shutdown(tp->t_mountp)) {
 			xfs_buf_stale(bp);
 			bp->b_flags |= XBF_DONE;
 		}
@@ -244,7 +244,7 @@ xfs_trans_read_buf_map(
 		 * We never locked this buf ourselves, so we shouldn't
 		 * brelse it either. Just get out.
 		 */
-		if (XFS_FORCED_SHUTDOWN(mp)) {
+		if (xfs_is_shutdown(mp)) {
 			trace_xfs_trans_read_buf_shut(bp, _RET_IP_);
 			return -EIO;
 		}
@@ -300,7 +300,7 @@ xfs_trans_read_buf_map(
 		return error;
 	}
 
-	if (XFS_FORCED_SHUTDOWN(mp)) {
+	if (xfs_is_shutdown(mp)) {
 		xfs_buf_relse(bp);
 		trace_xfs_trans_read_buf_shut(bp, _RET_IP_);
 		return -EIO;
diff --git a/fs/xfs/xfs_trans_dquot.c b/fs/xfs/xfs_trans_dquot.c
index 48e09ea30ee5..3872ce671411 100644
--- a/fs/xfs/xfs_trans_dquot.c
+++ b/fs/xfs/xfs_trans_dquot.c
@@ -58,7 +58,7 @@ xfs_trans_log_dquot(
 
 	/* Upgrade the dquot to bigtime format if possible. */
 	if (dqp->q_id != 0 &&
-	    xfs_sb_version_hasbigtime(&tp->t_mountp->m_sb) &&
+	    xfs_has_bigtime(tp->t_mountp) &&
 	    !(dqp->q_type & XFS_DQTYPE_BIGTIME))
 		dqp->q_type |= XFS_DQTYPE_BIGTIME;
 
@@ -132,8 +132,7 @@ xfs_trans_mod_dquot_byino(
 {
 	xfs_mount_t	*mp = tp->t_mountp;
 
-	if (!XFS_IS_QUOTA_RUNNING(mp) ||
-	    !XFS_IS_QUOTA_ON(mp) ||
+	if (!XFS_IS_QUOTA_ON(mp) ||
 	    xfs_is_quota_inode(&mp->m_sb, ip->i_ino))
 		return;
 
@@ -192,7 +191,7 @@ xfs_trans_mod_dquot(
 	struct xfs_dqtrx	*qtrx;
 
 	ASSERT(tp);
-	ASSERT(XFS_IS_QUOTA_RUNNING(tp->t_mountp));
+	ASSERT(XFS_IS_QUOTA_ON(tp->t_mountp));
 	qtrx = NULL;
 
 	if (!delta)
@@ -738,7 +737,7 @@ xfs_trans_reserve_quota_bydquots(
 {
 	int		error;
 
-	if (!XFS_IS_QUOTA_RUNNING(mp) || !XFS_IS_QUOTA_ON(mp))
+	if (!XFS_IS_QUOTA_ON(mp))
 		return 0;
 
 	ASSERT(flags & XFS_QMOPT_RESBLK_MASK);
@@ -795,7 +794,7 @@ xfs_trans_reserve_quota_nblks(
 	unsigned int		qflags = 0;
 	int			error;
 
-	if (!XFS_IS_QUOTA_RUNNING(mp) || !XFS_IS_QUOTA_ON(mp))
+	if (!XFS_IS_QUOTA_ON(mp))
 		return 0;
 
 	ASSERT(!xfs_is_quota_inode(&mp->m_sb, ip->i_ino));
@@ -836,51 +835,13 @@ xfs_trans_reserve_quota_icreate(
 {
 	struct xfs_mount	*mp = tp->t_mountp;
 
-	if (!XFS_IS_QUOTA_RUNNING(mp) || !XFS_IS_QUOTA_ON(mp))
+	if (!XFS_IS_QUOTA_ON(mp))
 		return 0;
 
 	return xfs_trans_reserve_quota_bydquots(tp, mp, udqp, gdqp, pdqp,
 			dblocks, 1, XFS_QMOPT_RES_REGBLKS);
 }
 
-/*
- * This routine is called to allocate a quotaoff log item.
- */
-struct xfs_qoff_logitem *
-xfs_trans_get_qoff_item(
-	struct xfs_trans	*tp,
-	struct xfs_qoff_logitem	*startqoff,
-	uint			flags)
-{
-	struct xfs_qoff_logitem	*q;
-
-	ASSERT(tp != NULL);
-
-	q = xfs_qm_qoff_logitem_init(tp->t_mountp, startqoff, flags);
-	ASSERT(q != NULL);
-
-	/*
-	 * Get a log_item_desc to point at the new item.
-	 */
-	xfs_trans_add_item(tp, &q->qql_item);
-	return q;
-}
-
-
-/*
- * This is called to mark the quotaoff logitem as needing
- * to be logged when the transaction is committed.  The logitem must
- * already be associated with the given transaction.
- */
-void
-xfs_trans_log_quotaoff_item(
-	struct xfs_trans	*tp,
-	struct xfs_qoff_logitem	*qlp)
-{
-	tp->t_flags |= XFS_TRANS_DIRTY;
-	set_bit(XFS_LI_DIRTY, &qlp->qql_item.li_flags);
-}
-
 STATIC void
 xfs_trans_alloc_dqinfo(
 	xfs_trans_t	*tp)
diff --git a/include/acpi/cppc_acpi.h b/include/acpi/cppc_acpi.h
index 9f4985b4d64d..bc159a9b4a73 100644
--- a/include/acpi/cppc_acpi.h
+++ b/include/acpi/cppc_acpi.h
@@ -135,6 +135,7 @@ struct cppc_cpudata {
 
 #ifdef CONFIG_ACPI_CPPC_LIB
 extern int cppc_get_desired_perf(int cpunum, u64 *desired_perf);
+extern int cppc_get_nominal_perf(int cpunum, u64 *nominal_perf);
 extern int cppc_get_perf_ctrs(int cpu, struct cppc_perf_fb_ctrs *perf_fb_ctrs);
 extern int cppc_set_perf(int cpu, struct cppc_perf_ctrls *perf_ctrls);
 extern int cppc_get_perf_caps(int cpu, struct cppc_perf_caps *caps);
@@ -149,6 +150,10 @@ static inline int cppc_get_desired_perf(int cpunum, u64 *desired_perf)
 {
 	return -ENOTSUPP;
 }
+static inline int cppc_get_nominal_perf(int cpunum, u64 *nominal_perf)
+{
+	return -ENOTSUPP;
+}
 static inline int cppc_get_perf_ctrs(int cpu, struct cppc_perf_fb_ctrs *perf_fb_ctrs)
 {
 	return -ENOTSUPP;
diff --git a/include/acpi/platform/acgcc.h b/include/acpi/platform/acgcc.h
index f6656be81760..fb172a03a753 100644
--- a/include/acpi/platform/acgcc.h
+++ b/include/acpi/platform/acgcc.h
@@ -22,7 +22,7 @@ typedef __builtin_va_list va_list;
 #define va_arg(v, l)            __builtin_va_arg(v, l)
 #define va_copy(d, s)           __builtin_va_copy(d, s)
 #else
-#include <stdarg.h>
+#include <linux/stdarg.h>
 #endif
 #endif
 
diff --git a/include/asm-generic/div64.h b/include/asm-generic/div64.h
index cd905b44a630..13f5aa68a455 100644
--- a/include/asm-generic/div64.h
+++ b/include/asm-generic/div64.h
@@ -57,17 +57,11 @@
 /*
  * If the divisor happens to be constant, we determine the appropriate
  * inverse at compile time to turn the division into a few inline
- * multiplications which ought to be much faster. And yet only if compiling
- * with a sufficiently recent gcc version to perform proper 64-bit constant
- * propagation.
+ * multiplications which ought to be much faster.
  *
  * (It is unfortunate that gcc doesn't perform all this internally.)
  */
 
-#ifndef __div64_const32_is_OK
-#define __div64_const32_is_OK (__GNUC__ >= 4)
-#endif
-
 #define __div64_const32(n, ___b)					\
 ({									\
 	/*								\
@@ -230,8 +224,7 @@ extern uint32_t __div64_32(uint64_t *dividend, uint32_t divisor);
 	    is_power_of_2(__base)) {			\
 		__rem = (n) & (__base - 1);		\
 		(n) >>= ilog2(__base);			\
-	} else if (__div64_const32_is_OK &&		\
-		   __builtin_constant_p(__base) &&	\
+	} else if (__builtin_constant_p(__base) &&	\
 		   __base != 0) {			\
 		uint32_t __res_lo, __n_lo = (n);	\
 		(n) = __div64_const32(n, __base);	\
@@ -241,8 +234,9 @@ extern uint32_t __div64_32(uint64_t *dividend, uint32_t divisor);
 	} else if (likely(((n) >> 32) == 0)) {		\
 		__rem = (uint32_t)(n) % __base;		\
 		(n) = (uint32_t)(n) / __base;		\
-	} else 						\
+	} else {					\
 		__rem = __div64_32(&(n), __base);	\
+	}						\
 	__rem;						\
  })
 
diff --git a/include/asm-generic/early_ioremap.h b/include/asm-generic/early_ioremap.h
index 9def22e6e2b3..9d0479f50f97 100644
--- a/include/asm-generic/early_ioremap.h
+++ b/include/asm-generic/early_ioremap.h
@@ -19,12 +19,6 @@ extern void *early_memremap_prot(resource_size_t phys_addr,
 extern void early_iounmap(void __iomem *addr, unsigned long size);
 extern void early_memunmap(void *addr, unsigned long size);
 
-/*
- * Weak function called by early_ioremap_reset(). It does nothing, but
- * architectures may provide their own version to do any needed cleanups.
- */
-extern void early_ioremap_shutdown(void);
-
 #if defined(CONFIG_GENERIC_EARLY_IOREMAP) && defined(CONFIG_MMU)
 /* Arch-specific initialization */
 extern void early_ioremap_init(void);
diff --git a/include/asm-generic/pci_iomap.h b/include/asm-generic/pci_iomap.h
index d4f16dcc2ed7..df636c6d8e6c 100644
--- a/include/asm-generic/pci_iomap.h
+++ b/include/asm-generic/pci_iomap.h
@@ -52,4 +52,4 @@ static inline void __iomem *pci_iomap_wc_range(struct pci_dev *dev, int bar,
 }
 #endif
 
-#endif /* __ASM_GENERIC_IO_H */
+#endif /* __ASM_GENERIC_PCI_IOMAP_H */
diff --git a/include/drm/ttm/ttm_tt.h b/include/drm/ttm/ttm_tt.h
index 818680c6a8ed..b20e89d321b0 100644
--- a/include/drm/ttm/ttm_tt.h
+++ b/include/drm/ttm/ttm_tt.h
@@ -27,11 +27,12 @@
 #ifndef _TTM_TT_H_
 #define _TTM_TT_H_
 
+#include <linux/pagemap.h>
 #include <linux/types.h>
 #include <drm/ttm/ttm_caching.h>
 #include <drm/ttm/ttm_kmap_iter.h>
 
-struct ttm_bo_device;
+struct ttm_device;
 struct ttm_tt;
 struct ttm_resource;
 struct ttm_buffer_object;
diff --git a/include/dt-bindings/clock/imx8mn-clock.h b/include/dt-bindings/clock/imx8mn-clock.h
index d24b627cb2e7..01e8bab1d767 100644
--- a/include/dt-bindings/clock/imx8mn-clock.h
+++ b/include/dt-bindings/clock/imx8mn-clock.h
@@ -241,6 +241,8 @@
 #define IMX8MN_CLK_CLKOUT2_DIV			219
 #define IMX8MN_CLK_CLKOUT2			220
 
-#define IMX8MN_CLK_END				221
+#define IMX8MN_CLK_M7_CORE			221
+
+#define IMX8MN_CLK_END				222
 
 #endif
diff --git a/include/dt-bindings/clock/mt8192-clk.h b/include/dt-bindings/clock/mt8192-clk.h
new file mode 100644
index 000000000000..5ab68f15a256
--- /dev/null
+++ b/include/dt-bindings/clock/mt8192-clk.h
@@ -0,0 +1,585 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
+/*
+ * Copyright (c) 2021 MediaTek Inc.
+ * Author: Chun-Jie Chen <chun-jie.chen@mediatek.com>
+ */
+
+#ifndef _DT_BINDINGS_CLK_MT8192_H
+#define _DT_BINDINGS_CLK_MT8192_H
+
+/* TOPCKGEN */
+
+#define CLK_TOP_AXI_SEL			0
+#define CLK_TOP_SPM_SEL			1
+#define CLK_TOP_SCP_SEL			2
+#define CLK_TOP_BUS_AXIMEM_SEL		3
+#define CLK_TOP_DISP_SEL		4
+#define CLK_TOP_MDP_SEL			5
+#define CLK_TOP_IMG1_SEL		6
+#define CLK_TOP_IMG2_SEL		7
+#define CLK_TOP_IPE_SEL			8
+#define CLK_TOP_DPE_SEL			9
+#define CLK_TOP_CAM_SEL			10
+#define CLK_TOP_CCU_SEL			11
+#define CLK_TOP_DSP7_SEL		12
+#define CLK_TOP_MFG_REF_SEL		13
+#define CLK_TOP_MFG_PLL_SEL		14
+#define CLK_TOP_CAMTG_SEL		15
+#define CLK_TOP_CAMTG2_SEL		16
+#define CLK_TOP_CAMTG3_SEL		17
+#define CLK_TOP_CAMTG4_SEL		18
+#define CLK_TOP_CAMTG5_SEL		19
+#define CLK_TOP_CAMTG6_SEL		20
+#define CLK_TOP_UART_SEL		21
+#define CLK_TOP_SPI_SEL			22
+#define CLK_TOP_MSDC50_0_H_SEL		23
+#define CLK_TOP_MSDC50_0_SEL		24
+#define CLK_TOP_MSDC30_1_SEL		25
+#define CLK_TOP_MSDC30_2_SEL		26
+#define CLK_TOP_AUDIO_SEL		27
+#define CLK_TOP_AUD_INTBUS_SEL		28
+#define CLK_TOP_PWRAP_ULPOSC_SEL	29
+#define CLK_TOP_ATB_SEL			30
+#define CLK_TOP_DPI_SEL			31
+#define CLK_TOP_SCAM_SEL		32
+#define CLK_TOP_DISP_PWM_SEL		33
+#define CLK_TOP_USB_TOP_SEL		34
+#define CLK_TOP_SSUSB_XHCI_SEL		35
+#define CLK_TOP_I2C_SEL			36
+#define CLK_TOP_SENINF_SEL		37
+#define CLK_TOP_SENINF1_SEL		38
+#define CLK_TOP_SENINF2_SEL		39
+#define CLK_TOP_SENINF3_SEL		40
+#define CLK_TOP_TL_SEL			41
+#define CLK_TOP_DXCC_SEL		42
+#define CLK_TOP_AUD_ENGEN1_SEL		43
+#define CLK_TOP_AUD_ENGEN2_SEL		44
+#define CLK_TOP_AES_UFSFDE_SEL		45
+#define CLK_TOP_UFS_SEL			46
+#define CLK_TOP_AUD_1_SEL		47
+#define CLK_TOP_AUD_2_SEL		48
+#define CLK_TOP_ADSP_SEL		49
+#define CLK_TOP_DPMAIF_MAIN_SEL		50
+#define CLK_TOP_VENC_SEL		51
+#define CLK_TOP_VDEC_SEL		52
+#define CLK_TOP_CAMTM_SEL		53
+#define CLK_TOP_PWM_SEL			54
+#define CLK_TOP_AUDIO_H_SEL		55
+#define CLK_TOP_SPMI_MST_SEL		56
+#define CLK_TOP_AES_MSDCFDE_SEL		57
+#define CLK_TOP_SFLASH_SEL		58
+#define CLK_TOP_APLL_I2S0_M_SEL		59
+#define CLK_TOP_APLL_I2S1_M_SEL		60
+#define CLK_TOP_APLL_I2S2_M_SEL		61
+#define CLK_TOP_APLL_I2S3_M_SEL		62
+#define CLK_TOP_APLL_I2S4_M_SEL		63
+#define CLK_TOP_APLL_I2S5_M_SEL		64
+#define CLK_TOP_APLL_I2S6_M_SEL		65
+#define CLK_TOP_APLL_I2S7_M_SEL		66
+#define CLK_TOP_APLL_I2S8_M_SEL		67
+#define CLK_TOP_APLL_I2S9_M_SEL		68
+#define CLK_TOP_MAINPLL_D3		69
+#define CLK_TOP_MAINPLL_D4		70
+#define CLK_TOP_MAINPLL_D4_D2		71
+#define CLK_TOP_MAINPLL_D4_D4		72
+#define CLK_TOP_MAINPLL_D4_D8		73
+#define CLK_TOP_MAINPLL_D4_D16		74
+#define CLK_TOP_MAINPLL_D5		75
+#define CLK_TOP_MAINPLL_D5_D2		76
+#define CLK_TOP_MAINPLL_D5_D4		77
+#define CLK_TOP_MAINPLL_D5_D8		78
+#define CLK_TOP_MAINPLL_D6		79
+#define CLK_TOP_MAINPLL_D6_D2		80
+#define CLK_TOP_MAINPLL_D6_D4		81
+#define CLK_TOP_MAINPLL_D7		82
+#define CLK_TOP_MAINPLL_D7_D2		83
+#define CLK_TOP_MAINPLL_D7_D4		84
+#define CLK_TOP_MAINPLL_D7_D8		85
+#define CLK_TOP_UNIVPLL_D3		86
+#define CLK_TOP_UNIVPLL_D4		87
+#define CLK_TOP_UNIVPLL_D4_D2		88
+#define CLK_TOP_UNIVPLL_D4_D4		89
+#define CLK_TOP_UNIVPLL_D4_D8		90
+#define CLK_TOP_UNIVPLL_D5		91
+#define CLK_TOP_UNIVPLL_D5_D2		92
+#define CLK_TOP_UNIVPLL_D5_D4		93
+#define CLK_TOP_UNIVPLL_D5_D8		94
+#define CLK_TOP_UNIVPLL_D6		95
+#define CLK_TOP_UNIVPLL_D6_D2		96
+#define CLK_TOP_UNIVPLL_D6_D4		97
+#define CLK_TOP_UNIVPLL_D6_D8		98
+#define CLK_TOP_UNIVPLL_D6_D16		99
+#define CLK_TOP_UNIVPLL_D7		100
+#define CLK_TOP_APLL1			101
+#define CLK_TOP_APLL1_D2		102
+#define CLK_TOP_APLL1_D4		103
+#define CLK_TOP_APLL1_D8		104
+#define CLK_TOP_APLL2			105
+#define CLK_TOP_APLL2_D2		106
+#define CLK_TOP_APLL2_D4		107
+#define CLK_TOP_APLL2_D8		108
+#define CLK_TOP_MMPLL_D4		109
+#define CLK_TOP_MMPLL_D4_D2		110
+#define CLK_TOP_MMPLL_D5		111
+#define CLK_TOP_MMPLL_D5_D2		112
+#define CLK_TOP_MMPLL_D6		113
+#define CLK_TOP_MMPLL_D6_D2		114
+#define CLK_TOP_MMPLL_D7		115
+#define CLK_TOP_MMPLL_D9		116
+#define CLK_TOP_APUPLL			117
+#define CLK_TOP_NPUPLL			118
+#define CLK_TOP_TVDPLL			119
+#define CLK_TOP_TVDPLL_D2		120
+#define CLK_TOP_TVDPLL_D4		121
+#define CLK_TOP_TVDPLL_D8		122
+#define CLK_TOP_TVDPLL_D16		123
+#define CLK_TOP_MSDCPLL			124
+#define CLK_TOP_MSDCPLL_D2		125
+#define CLK_TOP_MSDCPLL_D4		126
+#define CLK_TOP_ULPOSC			127
+#define CLK_TOP_OSC_D2			128
+#define CLK_TOP_OSC_D4			129
+#define CLK_TOP_OSC_D8			130
+#define CLK_TOP_OSC_D10			131
+#define CLK_TOP_OSC_D16			132
+#define CLK_TOP_OSC_D20			133
+#define CLK_TOP_CSW_F26M_D2		134
+#define CLK_TOP_ADSPPLL			135
+#define CLK_TOP_UNIVPLL_192M		136
+#define CLK_TOP_UNIVPLL_192M_D2		137
+#define CLK_TOP_UNIVPLL_192M_D4		138
+#define CLK_TOP_UNIVPLL_192M_D8		139
+#define CLK_TOP_UNIVPLL_192M_D16	140
+#define CLK_TOP_UNIVPLL_192M_D32	141
+#define CLK_TOP_APLL12_DIV0		142
+#define CLK_TOP_APLL12_DIV1		143
+#define CLK_TOP_APLL12_DIV2		144
+#define CLK_TOP_APLL12_DIV3		145
+#define CLK_TOP_APLL12_DIV4		146
+#define CLK_TOP_APLL12_DIVB		147
+#define CLK_TOP_APLL12_DIV5		148
+#define CLK_TOP_APLL12_DIV6		149
+#define CLK_TOP_APLL12_DIV7		150
+#define CLK_TOP_APLL12_DIV8		151
+#define CLK_TOP_APLL12_DIV9		152
+#define CLK_TOP_SSUSB_TOP_REF		153
+#define CLK_TOP_SSUSB_PHY_REF		154
+#define CLK_TOP_NR_CLK			155
+
+/* INFRACFG */
+
+#define CLK_INFRA_PMIC_TMR		0
+#define CLK_INFRA_PMIC_AP		1
+#define CLK_INFRA_PMIC_MD		2
+#define CLK_INFRA_PMIC_CONN		3
+#define CLK_INFRA_SCPSYS		4
+#define CLK_INFRA_SEJ			5
+#define CLK_INFRA_APXGPT		6
+#define CLK_INFRA_GCE			7
+#define CLK_INFRA_GCE2			8
+#define CLK_INFRA_THERM			9
+#define CLK_INFRA_I2C0			10
+#define CLK_INFRA_AP_DMA_PSEUDO		11
+#define CLK_INFRA_I2C2			12
+#define CLK_INFRA_I2C3			13
+#define CLK_INFRA_PWM_H			14
+#define CLK_INFRA_PWM1			15
+#define CLK_INFRA_PWM2			16
+#define CLK_INFRA_PWM3			17
+#define CLK_INFRA_PWM4			18
+#define CLK_INFRA_PWM			19
+#define CLK_INFRA_UART0			20
+#define CLK_INFRA_UART1			21
+#define CLK_INFRA_UART2			22
+#define CLK_INFRA_UART3			23
+#define CLK_INFRA_GCE_26M		24
+#define CLK_INFRA_CQ_DMA_FPC		25
+#define CLK_INFRA_BTIF			26
+#define CLK_INFRA_SPI0			27
+#define CLK_INFRA_MSDC0			28
+#define CLK_INFRA_MSDC1			29
+#define CLK_INFRA_MSDC2			30
+#define CLK_INFRA_MSDC0_SRC		31
+#define CLK_INFRA_GCPU			32
+#define CLK_INFRA_TRNG			33
+#define CLK_INFRA_AUXADC		34
+#define CLK_INFRA_CPUM			35
+#define CLK_INFRA_CCIF1_AP		36
+#define CLK_INFRA_CCIF1_MD		37
+#define CLK_INFRA_AUXADC_MD		38
+#define CLK_INFRA_PCIE_TL_26M		39
+#define CLK_INFRA_MSDC1_SRC		40
+#define CLK_INFRA_MSDC2_SRC		41
+#define CLK_INFRA_PCIE_TL_96M		42
+#define CLK_INFRA_PCIE_PL_P_250M	43
+#define CLK_INFRA_DEVICE_APC		44
+#define CLK_INFRA_CCIF_AP		45
+#define CLK_INFRA_DEBUGSYS		46
+#define CLK_INFRA_AUDIO			47
+#define CLK_INFRA_CCIF_MD		48
+#define CLK_INFRA_DXCC_SEC_CORE		49
+#define CLK_INFRA_DXCC_AO		50
+#define CLK_INFRA_DBG_TRACE		51
+#define CLK_INFRA_DEVMPU_B		52
+#define CLK_INFRA_DRAMC_F26M		53
+#define CLK_INFRA_IRTX			54
+#define CLK_INFRA_SSUSB			55
+#define CLK_INFRA_DISP_PWM		56
+#define CLK_INFRA_CLDMA_B		57
+#define CLK_INFRA_AUDIO_26M_B		58
+#define CLK_INFRA_MODEM_TEMP_SHARE	59
+#define CLK_INFRA_SPI1			60
+#define CLK_INFRA_I2C4			61
+#define CLK_INFRA_SPI2			62
+#define CLK_INFRA_SPI3			63
+#define CLK_INFRA_UNIPRO_SYS		64
+#define CLK_INFRA_UNIPRO_TICK		65
+#define CLK_INFRA_UFS_MP_SAP_B		66
+#define CLK_INFRA_MD32_B		67
+#define CLK_INFRA_UNIPRO_MBIST		68
+#define CLK_INFRA_I2C5			69
+#define CLK_INFRA_I2C5_ARBITER		70
+#define CLK_INFRA_I2C5_IMM		71
+#define CLK_INFRA_I2C1_ARBITER		72
+#define CLK_INFRA_I2C1_IMM		73
+#define CLK_INFRA_I2C2_ARBITER		74
+#define CLK_INFRA_I2C2_IMM		75
+#define CLK_INFRA_SPI4			76
+#define CLK_INFRA_SPI5			77
+#define CLK_INFRA_CQ_DMA		78
+#define CLK_INFRA_UFS			79
+#define CLK_INFRA_AES_UFSFDE		80
+#define CLK_INFRA_UFS_TICK		81
+#define CLK_INFRA_SSUSB_XHCI		82
+#define CLK_INFRA_MSDC0_SELF		83
+#define CLK_INFRA_MSDC1_SELF		84
+#define CLK_INFRA_MSDC2_SELF		85
+#define CLK_INFRA_UFS_AXI		86
+#define CLK_INFRA_I2C6			87
+#define CLK_INFRA_AP_MSDC0		88
+#define CLK_INFRA_MD_MSDC0		89
+#define CLK_INFRA_CCIF5_AP		90
+#define CLK_INFRA_CCIF5_MD		91
+#define CLK_INFRA_PCIE_TOP_H_133M	92
+#define CLK_INFRA_FLASHIF_TOP_H_133M	93
+#define CLK_INFRA_PCIE_PERI_26M		94
+#define CLK_INFRA_CCIF2_AP		95
+#define CLK_INFRA_CCIF2_MD		96
+#define CLK_INFRA_CCIF3_AP		97
+#define CLK_INFRA_CCIF3_MD		98
+#define CLK_INFRA_SEJ_F13M		99
+#define CLK_INFRA_AES			100
+#define CLK_INFRA_I2C7			101
+#define CLK_INFRA_I2C8			102
+#define CLK_INFRA_FBIST2FPC		103
+#define CLK_INFRA_DEVICE_APC_SYNC	104
+#define CLK_INFRA_DPMAIF_MAIN		105
+#define CLK_INFRA_PCIE_TL_32K		106
+#define CLK_INFRA_CCIF4_AP		107
+#define CLK_INFRA_CCIF4_MD		108
+#define CLK_INFRA_SPI6			109
+#define CLK_INFRA_SPI7			110
+#define CLK_INFRA_133M			111
+#define CLK_INFRA_66M			112
+#define CLK_INFRA_66M_PERI_BUS		113
+#define CLK_INFRA_FREE_DCM_133M		114
+#define CLK_INFRA_FREE_DCM_66M		115
+#define CLK_INFRA_PERI_BUS_DCM_133M	116
+#define CLK_INFRA_PERI_BUS_DCM_66M	117
+#define CLK_INFRA_FLASHIF_PERI_26M	118
+#define CLK_INFRA_FLASHIF_SFLASH	119
+#define CLK_INFRA_AP_DMA		120
+#define CLK_INFRA_NR_CLK		121
+
+/* PERICFG */
+
+#define CLK_PERI_PERIAXI		0
+#define CLK_PERI_NR_CLK			1
+
+/* APMIXEDSYS */
+
+#define CLK_APMIXED_MAINPLL		0
+#define CLK_APMIXED_UNIVPLL		1
+#define CLK_APMIXED_USBPLL		2
+#define CLK_APMIXED_MSDCPLL		3
+#define CLK_APMIXED_MMPLL		4
+#define CLK_APMIXED_ADSPPLL		5
+#define CLK_APMIXED_MFGPLL		6
+#define CLK_APMIXED_TVDPLL		7
+#define CLK_APMIXED_APLL1		8
+#define CLK_APMIXED_APLL2		9
+#define CLK_APMIXED_MIPID26M		10
+#define CLK_APMIXED_NR_CLK		11
+
+/* SCP_ADSP */
+
+#define CLK_SCP_ADSP_AUDIODSP		0
+#define CLK_SCP_ADSP_NR_CLK		1
+
+/* IMP_IIC_WRAP_C */
+
+#define CLK_IMP_IIC_WRAP_C_I2C10	0
+#define CLK_IMP_IIC_WRAP_C_I2C11	1
+#define CLK_IMP_IIC_WRAP_C_I2C12	2
+#define CLK_IMP_IIC_WRAP_C_I2C13	3
+#define CLK_IMP_IIC_WRAP_C_NR_CLK	4
+
+/* AUDSYS */
+
+#define CLK_AUD_AFE			0
+#define CLK_AUD_22M			1
+#define CLK_AUD_24M			2
+#define CLK_AUD_APLL2_TUNER		3
+#define CLK_AUD_APLL_TUNER		4
+#define CLK_AUD_TDM			5
+#define CLK_AUD_ADC			6
+#define CLK_AUD_DAC			7
+#define CLK_AUD_DAC_PREDIS		8
+#define CLK_AUD_TML			9
+#define CLK_AUD_NLE			10
+#define CLK_AUD_I2S1_B			11
+#define CLK_AUD_I2S2_B			12
+#define CLK_AUD_I2S3_B			13
+#define CLK_AUD_I2S4_B			14
+#define CLK_AUD_CONNSYS_I2S_ASRC	15
+#define CLK_AUD_GENERAL1_ASRC		16
+#define CLK_AUD_GENERAL2_ASRC		17
+#define CLK_AUD_DAC_HIRES		18
+#define CLK_AUD_ADC_HIRES		19
+#define CLK_AUD_ADC_HIRES_TML		20
+#define CLK_AUD_ADDA6_ADC		21
+#define CLK_AUD_ADDA6_ADC_HIRES		22
+#define CLK_AUD_3RD_DAC			23
+#define CLK_AUD_3RD_DAC_PREDIS		24
+#define CLK_AUD_3RD_DAC_TML		25
+#define CLK_AUD_3RD_DAC_HIRES		26
+#define CLK_AUD_I2S5_B			27
+#define CLK_AUD_I2S6_B			28
+#define CLK_AUD_I2S7_B			29
+#define CLK_AUD_I2S8_B			30
+#define CLK_AUD_I2S9_B			31
+#define CLK_AUD_NR_CLK			32
+
+/* IMP_IIC_WRAP_E */
+
+#define CLK_IMP_IIC_WRAP_E_I2C3		0
+#define CLK_IMP_IIC_WRAP_E_NR_CLK	1
+
+/* IMP_IIC_WRAP_S */
+
+#define CLK_IMP_IIC_WRAP_S_I2C7		0
+#define CLK_IMP_IIC_WRAP_S_I2C8		1
+#define CLK_IMP_IIC_WRAP_S_I2C9		2
+#define CLK_IMP_IIC_WRAP_S_NR_CLK	3
+
+/* IMP_IIC_WRAP_WS */
+
+#define CLK_IMP_IIC_WRAP_WS_I2C1	0
+#define CLK_IMP_IIC_WRAP_WS_I2C2	1
+#define CLK_IMP_IIC_WRAP_WS_I2C4	2
+#define CLK_IMP_IIC_WRAP_WS_NR_CLK	3
+
+/* IMP_IIC_WRAP_W */
+
+#define CLK_IMP_IIC_WRAP_W_I2C5		0
+#define CLK_IMP_IIC_WRAP_W_NR_CLK	1
+
+/* IMP_IIC_WRAP_N */
+
+#define CLK_IMP_IIC_WRAP_N_I2C0		0
+#define CLK_IMP_IIC_WRAP_N_I2C6		1
+#define CLK_IMP_IIC_WRAP_N_NR_CLK	2
+
+/* MSDC_TOP */
+
+#define CLK_MSDC_TOP_AES_0P		0
+#define CLK_MSDC_TOP_SRC_0P		1
+#define CLK_MSDC_TOP_SRC_1P		2
+#define CLK_MSDC_TOP_SRC_2P		3
+#define CLK_MSDC_TOP_P_MSDC0		4
+#define CLK_MSDC_TOP_P_MSDC1		5
+#define CLK_MSDC_TOP_P_MSDC2		6
+#define CLK_MSDC_TOP_P_CFG		7
+#define CLK_MSDC_TOP_AXI		8
+#define CLK_MSDC_TOP_H_MST_0P		9
+#define CLK_MSDC_TOP_H_MST_1P		10
+#define CLK_MSDC_TOP_H_MST_2P		11
+#define CLK_MSDC_TOP_MEM_OFF_DLY_26M	12
+#define CLK_MSDC_TOP_32K		13
+#define CLK_MSDC_TOP_AHB2AXI_BRG_AXI	14
+#define CLK_MSDC_TOP_NR_CLK		15
+
+/* MSDC */
+
+#define CLK_MSDC_AXI_WRAP		0
+#define CLK_MSDC_NR_CLK			1
+
+/* MFGCFG */
+
+#define CLK_MFG_BG3D			0
+#define CLK_MFG_NR_CLK			1
+
+/* MMSYS */
+
+#define CLK_MM_DISP_MUTEX0		0
+#define CLK_MM_DISP_CONFIG		1
+#define CLK_MM_DISP_OVL0		2
+#define CLK_MM_DISP_RDMA0		3
+#define CLK_MM_DISP_OVL0_2L		4
+#define CLK_MM_DISP_WDMA0		5
+#define CLK_MM_DISP_UFBC_WDMA0		6
+#define CLK_MM_DISP_RSZ0		7
+#define CLK_MM_DISP_AAL0		8
+#define CLK_MM_DISP_CCORR0		9
+#define CLK_MM_DISP_DITHER0		10
+#define CLK_MM_SMI_INFRA		11
+#define CLK_MM_DISP_GAMMA0		12
+#define CLK_MM_DISP_POSTMASK0		13
+#define CLK_MM_DISP_DSC_WRAP0		14
+#define CLK_MM_DSI0			15
+#define CLK_MM_DISP_COLOR0		16
+#define CLK_MM_SMI_COMMON		17
+#define CLK_MM_DISP_FAKE_ENG0		18
+#define CLK_MM_DISP_FAKE_ENG1		19
+#define CLK_MM_MDP_TDSHP4		20
+#define CLK_MM_MDP_RSZ4			21
+#define CLK_MM_MDP_AAL4			22
+#define CLK_MM_MDP_HDR4			23
+#define CLK_MM_MDP_RDMA4		24
+#define CLK_MM_MDP_COLOR4		25
+#define CLK_MM_DISP_Y2R0		26
+#define CLK_MM_SMI_GALS			27
+#define CLK_MM_DISP_OVL2_2L		28
+#define CLK_MM_DISP_RDMA4		29
+#define CLK_MM_DISP_DPI0		30
+#define CLK_MM_SMI_IOMMU		31
+#define CLK_MM_DSI_DSI0			32
+#define CLK_MM_DPI_DPI0			33
+#define CLK_MM_26MHZ			34
+#define CLK_MM_32KHZ			35
+#define CLK_MM_NR_CLK			36
+
+/* IMGSYS */
+
+#define CLK_IMG_LARB9			0
+#define CLK_IMG_LARB10			1
+#define CLK_IMG_DIP			2
+#define CLK_IMG_GALS			3
+#define CLK_IMG_NR_CLK			4
+
+/* IMGSYS2 */
+
+#define CLK_IMG2_LARB11			0
+#define CLK_IMG2_LARB12			1
+#define CLK_IMG2_MFB			2
+#define CLK_IMG2_WPE			3
+#define CLK_IMG2_MSS			4
+#define CLK_IMG2_GALS			5
+#define CLK_IMG2_NR_CLK			6
+
+/* VDECSYS_SOC */
+
+#define CLK_VDEC_SOC_LARB1		0
+#define CLK_VDEC_SOC_LAT		1
+#define CLK_VDEC_SOC_LAT_ACTIVE		2
+#define CLK_VDEC_SOC_VDEC		3
+#define CLK_VDEC_SOC_VDEC_ACTIVE	4
+#define CLK_VDEC_SOC_NR_CLK		5
+
+/* VDECSYS */
+
+#define CLK_VDEC_LARB1			0
+#define CLK_VDEC_LAT			1
+#define CLK_VDEC_LAT_ACTIVE		2
+#define CLK_VDEC_VDEC			3
+#define CLK_VDEC_ACTIVE			4
+#define CLK_VDEC_NR_CLK			5
+
+/* VENCSYS */
+
+#define CLK_VENC_SET0_LARB		0
+#define CLK_VENC_SET1_VENC		1
+#define CLK_VENC_SET2_JPGENC		2
+#define CLK_VENC_SET5_GALS		3
+#define CLK_VENC_NR_CLK			4
+
+/* CAMSYS */
+
+#define CLK_CAM_LARB13			0
+#define CLK_CAM_DFP_VAD			1
+#define CLK_CAM_LARB14			2
+#define CLK_CAM_CAM			3
+#define CLK_CAM_CAMTG			4
+#define CLK_CAM_SENINF			5
+#define CLK_CAM_CAMSV0			6
+#define CLK_CAM_CAMSV1			7
+#define CLK_CAM_CAMSV2			8
+#define CLK_CAM_CAMSV3			9
+#define CLK_CAM_CCU0			10
+#define CLK_CAM_CCU1			11
+#define CLK_CAM_MRAW0			12
+#define CLK_CAM_FAKE_ENG		13
+#define CLK_CAM_CCU_GALS		14
+#define CLK_CAM_CAM2MM_GALS		15
+#define CLK_CAM_NR_CLK			16
+
+/* CAMSYS_RAWA */
+
+#define CLK_CAM_RAWA_LARBX		0
+#define CLK_CAM_RAWA_CAM		1
+#define CLK_CAM_RAWA_CAMTG		2
+#define CLK_CAM_RAWA_NR_CLK		3
+
+/* CAMSYS_RAWB */
+
+#define CLK_CAM_RAWB_LARBX		0
+#define CLK_CAM_RAWB_CAM		1
+#define CLK_CAM_RAWB_CAMTG		2
+#define CLK_CAM_RAWB_NR_CLK		3
+
+/* CAMSYS_RAWC */
+
+#define CLK_CAM_RAWC_LARBX		0
+#define CLK_CAM_RAWC_CAM		1
+#define CLK_CAM_RAWC_CAMTG		2
+#define CLK_CAM_RAWC_NR_CLK		3
+
+/* IPESYS */
+
+#define CLK_IPE_LARB19			0
+#define CLK_IPE_LARB20			1
+#define CLK_IPE_SMI_SUBCOM		2
+#define CLK_IPE_FD			3
+#define CLK_IPE_FE			4
+#define CLK_IPE_RSC			5
+#define CLK_IPE_DPE			6
+#define CLK_IPE_GALS			7
+#define CLK_IPE_NR_CLK			8
+
+/* MDPSYS */
+
+#define CLK_MDP_RDMA0			0
+#define CLK_MDP_TDSHP0			1
+#define CLK_MDP_IMG_DL_ASYNC0		2
+#define CLK_MDP_IMG_DL_ASYNC1		3
+#define CLK_MDP_RDMA1			4
+#define CLK_MDP_TDSHP1			5
+#define CLK_MDP_SMI0			6
+#define CLK_MDP_APB_BUS			7
+#define CLK_MDP_WROT0			8
+#define CLK_MDP_RSZ0			9
+#define CLK_MDP_HDR0			10
+#define CLK_MDP_MUTEX0			11
+#define CLK_MDP_WROT1			12
+#define CLK_MDP_RSZ1			13
+#define CLK_MDP_HDR1			14
+#define CLK_MDP_FAKE_ENG0		15
+#define CLK_MDP_AAL0			16
+#define CLK_MDP_AAL1			17
+#define CLK_MDP_COLOR0			18
+#define CLK_MDP_COLOR1			19
+#define CLK_MDP_IMG_DL_RELAY0_ASYNC0	20
+#define CLK_MDP_IMG_DL_RELAY1_ASYNC1	21
+#define CLK_MDP_NR_CLK			22
+
+#endif /* _DT_BINDINGS_CLK_MT8192_H */
diff --git a/include/dt-bindings/clock/qcom,dispcc-sc7280.h b/include/dt-bindings/clock/qcom,dispcc-sc7280.h
new file mode 100644
index 000000000000..a4a692c20acf
--- /dev/null
+++ b/include/dt-bindings/clock/qcom,dispcc-sc7280.h
@@ -0,0 +1,55 @@
+/* SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause) */
+/*
+ * Copyright (c) 2021, The Linux Foundation. All rights reserved.
+ */
+
+#ifndef _DT_BINDINGS_CLK_QCOM_DISP_CC_SC7280_H
+#define _DT_BINDINGS_CLK_QCOM_DISP_CC_SC7280_H
+
+/* DISP_CC clocks */
+#define DISP_CC_PLL0					0
+#define DISP_CC_MDSS_AHB_CLK				1
+#define DISP_CC_MDSS_AHB_CLK_SRC			2
+#define DISP_CC_MDSS_BYTE0_CLK				3
+#define DISP_CC_MDSS_BYTE0_CLK_SRC			4
+#define DISP_CC_MDSS_BYTE0_DIV_CLK_SRC			5
+#define DISP_CC_MDSS_BYTE0_INTF_CLK			6
+#define DISP_CC_MDSS_DP_AUX_CLK				7
+#define DISP_CC_MDSS_DP_AUX_CLK_SRC			8
+#define DISP_CC_MDSS_DP_CRYPTO_CLK			9
+#define DISP_CC_MDSS_DP_CRYPTO_CLK_SRC			10
+#define DISP_CC_MDSS_DP_LINK_CLK			11
+#define DISP_CC_MDSS_DP_LINK_CLK_SRC			12
+#define DISP_CC_MDSS_DP_LINK_DIV_CLK_SRC		13
+#define DISP_CC_MDSS_DP_LINK_INTF_CLK			14
+#define DISP_CC_MDSS_DP_PIXEL_CLK			15
+#define DISP_CC_MDSS_DP_PIXEL_CLK_SRC			16
+#define DISP_CC_MDSS_EDP_AUX_CLK			17
+#define DISP_CC_MDSS_EDP_AUX_CLK_SRC			18
+#define DISP_CC_MDSS_EDP_LINK_CLK			19
+#define DISP_CC_MDSS_EDP_LINK_CLK_SRC			20
+#define DISP_CC_MDSS_EDP_LINK_DIV_CLK_SRC		21
+#define DISP_CC_MDSS_EDP_LINK_INTF_CLK			22
+#define DISP_CC_MDSS_EDP_PIXEL_CLK			23
+#define DISP_CC_MDSS_EDP_PIXEL_CLK_SRC			24
+#define DISP_CC_MDSS_ESC0_CLK				25
+#define DISP_CC_MDSS_ESC0_CLK_SRC			26
+#define DISP_CC_MDSS_MDP_CLK				27
+#define DISP_CC_MDSS_MDP_CLK_SRC			28
+#define DISP_CC_MDSS_MDP_LUT_CLK			29
+#define DISP_CC_MDSS_NON_GDSC_AHB_CLK			30
+#define DISP_CC_MDSS_PCLK0_CLK				31
+#define DISP_CC_MDSS_PCLK0_CLK_SRC			32
+#define DISP_CC_MDSS_ROT_CLK				33
+#define DISP_CC_MDSS_ROT_CLK_SRC			34
+#define DISP_CC_MDSS_RSCC_AHB_CLK			35
+#define DISP_CC_MDSS_RSCC_VSYNC_CLK			36
+#define DISP_CC_MDSS_VSYNC_CLK				37
+#define DISP_CC_MDSS_VSYNC_CLK_SRC			38
+#define DISP_CC_SLEEP_CLK				39
+#define DISP_CC_XO_CLK					40
+
+/* DISP_CC power domains */
+#define DISP_CC_MDSS_CORE_GDSC				0
+
+#endif
diff --git a/include/dt-bindings/clock/qcom,gcc-msm8953.h b/include/dt-bindings/clock/qcom,gcc-msm8953.h
new file mode 100644
index 000000000000..783162da6148
--- /dev/null
+++ b/include/dt-bindings/clock/qcom,gcc-msm8953.h
@@ -0,0 +1,234 @@
+/* SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause) */
+
+#ifndef _DT_BINDINGS_CLK_MSM_GCC_8953_H
+#define _DT_BINDINGS_CLK_MSM_GCC_8953_H
+
+/* Clocks */
+#define APC0_DROOP_DETECTOR_CLK_SRC		0
+#define APC1_DROOP_DETECTOR_CLK_SRC		1
+#define APSS_AHB_CLK_SRC			2
+#define BLSP1_QUP1_I2C_APPS_CLK_SRC		3
+#define BLSP1_QUP1_SPI_APPS_CLK_SRC		4
+#define BLSP1_QUP2_I2C_APPS_CLK_SRC		5
+#define BLSP1_QUP2_SPI_APPS_CLK_SRC		6
+#define BLSP1_QUP3_I2C_APPS_CLK_SRC		7
+#define BLSP1_QUP3_SPI_APPS_CLK_SRC		8
+#define BLSP1_QUP4_I2C_APPS_CLK_SRC		9
+#define BLSP1_QUP4_SPI_APPS_CLK_SRC		10
+#define BLSP1_UART1_APPS_CLK_SRC		11
+#define BLSP1_UART2_APPS_CLK_SRC		12
+#define BLSP2_QUP1_I2C_APPS_CLK_SRC		13
+#define BLSP2_QUP1_SPI_APPS_CLK_SRC		14
+#define BLSP2_QUP2_I2C_APPS_CLK_SRC		15
+#define BLSP2_QUP2_SPI_APPS_CLK_SRC		16
+#define BLSP2_QUP3_I2C_APPS_CLK_SRC		17
+#define BLSP2_QUP3_SPI_APPS_CLK_SRC		18
+#define BLSP2_QUP4_I2C_APPS_CLK_SRC		19
+#define BLSP2_QUP4_SPI_APPS_CLK_SRC		20
+#define BLSP2_UART1_APPS_CLK_SRC		21
+#define BLSP2_UART2_APPS_CLK_SRC		22
+#define BYTE0_CLK_SRC				23
+#define BYTE1_CLK_SRC				24
+#define CAMSS_GP0_CLK_SRC			25
+#define CAMSS_GP1_CLK_SRC			26
+#define CAMSS_TOP_AHB_CLK_SRC			27
+#define CCI_CLK_SRC				28
+#define CPP_CLK_SRC				29
+#define CRYPTO_CLK_SRC				30
+#define CSI0PHYTIMER_CLK_SRC			31
+#define CSI0P_CLK_SRC				32
+#define CSI0_CLK_SRC				33
+#define CSI1PHYTIMER_CLK_SRC			34
+#define CSI1P_CLK_SRC				35
+#define CSI1_CLK_SRC				36
+#define CSI2PHYTIMER_CLK_SRC			37
+#define CSI2P_CLK_SRC				38
+#define CSI2_CLK_SRC				39
+#define ESC0_CLK_SRC				40
+#define ESC1_CLK_SRC				41
+#define GCC_APC0_DROOP_DETECTOR_GPLL0_CLK	42
+#define GCC_APC1_DROOP_DETECTOR_GPLL0_CLK	43
+#define GCC_APSS_AHB_CLK			44
+#define GCC_APSS_AXI_CLK			45
+#define GCC_APSS_TCU_ASYNC_CLK			46
+#define GCC_BIMC_GFX_CLK			47
+#define GCC_BIMC_GPU_CLK			48
+#define GCC_BLSP1_AHB_CLK			49
+#define GCC_BLSP1_QUP1_I2C_APPS_CLK		50
+#define GCC_BLSP1_QUP1_SPI_APPS_CLK		51
+#define GCC_BLSP1_QUP2_I2C_APPS_CLK		52
+#define GCC_BLSP1_QUP2_SPI_APPS_CLK		53
+#define GCC_BLSP1_QUP3_I2C_APPS_CLK		54
+#define GCC_BLSP1_QUP3_SPI_APPS_CLK		55
+#define GCC_BLSP1_QUP4_I2C_APPS_CLK		56
+#define GCC_BLSP1_QUP4_SPI_APPS_CLK		57
+#define GCC_BLSP1_UART1_APPS_CLK		58
+#define GCC_BLSP1_UART2_APPS_CLK		59
+#define GCC_BLSP2_AHB_CLK			60
+#define GCC_BLSP2_QUP1_I2C_APPS_CLK		61
+#define GCC_BLSP2_QUP1_SPI_APPS_CLK		62
+#define GCC_BLSP2_QUP2_I2C_APPS_CLK		63
+#define GCC_BLSP2_QUP2_SPI_APPS_CLK		64
+#define GCC_BLSP2_QUP3_I2C_APPS_CLK		65
+#define GCC_BLSP2_QUP3_SPI_APPS_CLK		66
+#define GCC_BLSP2_QUP4_I2C_APPS_CLK		67
+#define GCC_BLSP2_QUP4_SPI_APPS_CLK		68
+#define GCC_BLSP2_UART1_APPS_CLK		69
+#define GCC_BLSP2_UART2_APPS_CLK		70
+#define GCC_BOOT_ROM_AHB_CLK			71
+#define GCC_CAMSS_AHB_CLK			72
+#define GCC_CAMSS_CCI_AHB_CLK			73
+#define GCC_CAMSS_CCI_CLK			74
+#define GCC_CAMSS_CPP_AHB_CLK			75
+#define GCC_CAMSS_CPP_AXI_CLK			76
+#define GCC_CAMSS_CPP_CLK			77
+#define GCC_CAMSS_CSI0PHYTIMER_CLK		78
+#define GCC_CAMSS_CSI0PHY_CLK			79
+#define GCC_CAMSS_CSI0PIX_CLK			80
+#define GCC_CAMSS_CSI0RDI_CLK			81
+#define GCC_CAMSS_CSI0_AHB_CLK			82
+#define GCC_CAMSS_CSI0_CLK			83
+#define GCC_CAMSS_CSI0_CSIPHY_3P_CLK		84
+#define GCC_CAMSS_CSI1PHYTIMER_CLK		85
+#define GCC_CAMSS_CSI1PHY_CLK			86
+#define GCC_CAMSS_CSI1PIX_CLK			87
+#define GCC_CAMSS_CSI1RDI_CLK			88
+#define GCC_CAMSS_CSI1_AHB_CLK			89
+#define GCC_CAMSS_CSI1_CLK			90
+#define GCC_CAMSS_CSI1_CSIPHY_3P_CLK		91
+#define GCC_CAMSS_CSI2PHYTIMER_CLK		92
+#define GCC_CAMSS_CSI2PHY_CLK			93
+#define GCC_CAMSS_CSI2PIX_CLK			94
+#define GCC_CAMSS_CSI2RDI_CLK			95
+#define GCC_CAMSS_CSI2_AHB_CLK			96
+#define GCC_CAMSS_CSI2_CLK			97
+#define GCC_CAMSS_CSI2_CSIPHY_3P_CLK		98
+#define GCC_CAMSS_CSI_VFE0_CLK			99
+#define GCC_CAMSS_CSI_VFE1_CLK			100
+#define GCC_CAMSS_GP0_CLK			101
+#define GCC_CAMSS_GP1_CLK			102
+#define GCC_CAMSS_ISPIF_AHB_CLK			103
+#define GCC_CAMSS_JPEG0_CLK			104
+#define GCC_CAMSS_JPEG_AHB_CLK			105
+#define GCC_CAMSS_JPEG_AXI_CLK			106
+#define GCC_CAMSS_MCLK0_CLK			107
+#define GCC_CAMSS_MCLK1_CLK			108
+#define GCC_CAMSS_MCLK2_CLK			109
+#define GCC_CAMSS_MCLK3_CLK			110
+#define GCC_CAMSS_MICRO_AHB_CLK			111
+#define GCC_CAMSS_TOP_AHB_CLK			112
+#define GCC_CAMSS_VFE0_AHB_CLK			113
+#define GCC_CAMSS_VFE0_AXI_CLK			114
+#define GCC_CAMSS_VFE0_CLK			115
+#define GCC_CAMSS_VFE1_AHB_CLK			116
+#define GCC_CAMSS_VFE1_AXI_CLK			117
+#define GCC_CAMSS_VFE1_CLK			118
+#define GCC_CPP_TBU_CLK				119
+#define GCC_CRYPTO_AHB_CLK			120
+#define GCC_CRYPTO_AXI_CLK			121
+#define GCC_CRYPTO_CLK				122
+#define GCC_DCC_CLK				123
+#define GCC_GP1_CLK				124
+#define GCC_GP2_CLK				125
+#define GCC_GP3_CLK				126
+#define GCC_JPEG_TBU_CLK			127
+#define GCC_MDP_TBU_CLK				128
+#define GCC_MDSS_AHB_CLK			129
+#define GCC_MDSS_AXI_CLK			130
+#define GCC_MDSS_BYTE0_CLK			131
+#define GCC_MDSS_BYTE1_CLK			132
+#define GCC_MDSS_ESC0_CLK			133
+#define GCC_MDSS_ESC1_CLK			134
+#define GCC_MDSS_MDP_CLK			135
+#define GCC_MDSS_PCLK0_CLK			136
+#define GCC_MDSS_PCLK1_CLK			137
+#define GCC_MDSS_VSYNC_CLK			138
+#define GCC_MSS_CFG_AHB_CLK			139
+#define GCC_MSS_Q6_BIMC_AXI_CLK			140
+#define GCC_OXILI_AHB_CLK			141
+#define GCC_OXILI_AON_CLK			142
+#define GCC_OXILI_GFX3D_CLK			143
+#define GCC_OXILI_TIMER_CLK			144
+#define GCC_PCNOC_USB3_AXI_CLK			145
+#define GCC_PDM2_CLK				146
+#define GCC_PDM_AHB_CLK				147
+#define GCC_PRNG_AHB_CLK			148
+#define GCC_QDSS_DAP_CLK			149
+#define GCC_QUSB_REF_CLK			150
+#define GCC_RBCPR_GFX_CLK			151
+#define GCC_SDCC1_AHB_CLK			152
+#define GCC_SDCC1_APPS_CLK			153
+#define GCC_SDCC1_ICE_CORE_CLK			154
+#define GCC_SDCC2_AHB_CLK			155
+#define GCC_SDCC2_APPS_CLK			156
+#define GCC_SMMU_CFG_CLK			157
+#define GCC_USB30_MASTER_CLK			158
+#define GCC_USB30_MOCK_UTMI_CLK			159
+#define GCC_USB30_SLEEP_CLK			160
+#define GCC_USB3_AUX_CLK			161
+#define GCC_USB3_PIPE_CLK			162
+#define GCC_USB_PHY_CFG_AHB_CLK			163
+#define GCC_USB_SS_REF_CLK			164
+#define GCC_VENUS0_AHB_CLK			165
+#define GCC_VENUS0_AXI_CLK			166
+#define GCC_VENUS0_CORE0_VCODEC0_CLK		167
+#define GCC_VENUS0_VCODEC0_CLK			168
+#define GCC_VENUS_TBU_CLK			169
+#define GCC_VFE1_TBU_CLK			170
+#define GCC_VFE_TBU_CLK				171
+#define GFX3D_CLK_SRC				172
+#define GP1_CLK_SRC				173
+#define GP2_CLK_SRC				174
+#define GP3_CLK_SRC				175
+#define GPLL0					176
+#define GPLL0_EARLY				177
+#define GPLL2					178
+#define GPLL2_EARLY				179
+#define GPLL3					180
+#define GPLL3_EARLY				181
+#define GPLL4					182
+#define GPLL4_EARLY				183
+#define GPLL6					184
+#define GPLL6_EARLY				185
+#define JPEG0_CLK_SRC				186
+#define MCLK0_CLK_SRC				187
+#define MCLK1_CLK_SRC				188
+#define MCLK2_CLK_SRC				189
+#define MCLK3_CLK_SRC				190
+#define MDP_CLK_SRC				191
+#define PCLK0_CLK_SRC				192
+#define PCLK1_CLK_SRC				193
+#define PDM2_CLK_SRC				194
+#define RBCPR_GFX_CLK_SRC			195
+#define SDCC1_APPS_CLK_SRC			196
+#define SDCC1_ICE_CORE_CLK_SRC			197
+#define SDCC2_APPS_CLK_SRC			198
+#define USB30_MASTER_CLK_SRC			199
+#define USB30_MOCK_UTMI_CLK_SRC			200
+#define USB3_AUX_CLK_SRC			201
+#define VCODEC0_CLK_SRC				202
+#define VFE0_CLK_SRC				203
+#define VFE1_CLK_SRC				204
+#define VSYNC_CLK_SRC				205
+
+/* GCC block resets */
+#define GCC_CAMSS_MICRO_BCR			0
+#define GCC_MSS_BCR				1
+#define GCC_QUSB2_PHY_BCR			2
+#define GCC_USB3PHY_PHY_BCR			3
+#define GCC_USB3_PHY_BCR			4
+#define GCC_USB_30_BCR				5
+
+/* GDSCs */
+#define CPP_GDSC				0
+#define JPEG_GDSC				1
+#define MDSS_GDSC				2
+#define OXILI_CX_GDSC				3
+#define OXILI_GX_GDSC				4
+#define USB30_GDSC				5
+#define VENUS_CORE0_GDSC			6
+#define VENUS_GDSC				7
+#define VFE0_GDSC				8
+#define VFE1_GDSC				9
+
+#endif
diff --git a/include/dt-bindings/clock/qcom,gcc-sc7280.h b/include/dt-bindings/clock/qcom,gcc-sc7280.h
index 4394f15111c6..3d5724b79bff 100644
--- a/include/dt-bindings/clock/qcom,gcc-sc7280.h
+++ b/include/dt-bindings/clock/qcom,gcc-sc7280.h
@@ -1,4 +1,4 @@
-/* SPDX-License-Identifier: GPL-2.0-only */
+/* SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause) */
 /*
  * Copyright (c) 2020-2021, The Linux Foundation. All rights reserved.
  */
diff --git a/include/dt-bindings/clock/qcom,gcc-sm6115.h b/include/dt-bindings/clock/qcom,gcc-sm6115.h
new file mode 100644
index 000000000000..b91a7b460433
--- /dev/null
+++ b/include/dt-bindings/clock/qcom,gcc-sm6115.h
@@ -0,0 +1,201 @@
+/* SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause) */
+/*
+ * Copyright (c) 2019-2021, The Linux Foundation. All rights reserved.
+ */
+
+#ifndef _DT_BINDINGS_CLK_QCOM_GCC_SM6115_H
+#define _DT_BINDINGS_CLK_QCOM_GCC_SM6115_H
+
+/* GCC clocks */
+#define GPLL0							0
+#define GPLL0_OUT_AUX2						1
+#define GPLL0_OUT_MAIN						2
+#define GPLL10							3
+#define GPLL10_OUT_MAIN						4
+#define GPLL11							5
+#define GPLL11_OUT_MAIN						6
+#define GPLL3							7
+#define GPLL4							8
+#define GPLL4_OUT_MAIN						9
+#define GPLL6							10
+#define GPLL6_OUT_MAIN						11
+#define GPLL7							12
+#define GPLL7_OUT_MAIN						13
+#define GPLL8							14
+#define GPLL8_OUT_MAIN						15
+#define GPLL9							16
+#define GPLL9_OUT_MAIN						17
+#define GCC_CAMSS_CSI0PHYTIMER_CLK				18
+#define GCC_CAMSS_CSI0PHYTIMER_CLK_SRC				19
+#define GCC_CAMSS_CSI1PHYTIMER_CLK				20
+#define GCC_CAMSS_CSI1PHYTIMER_CLK_SRC				21
+#define GCC_CAMSS_CSI2PHYTIMER_CLK				22
+#define GCC_CAMSS_CSI2PHYTIMER_CLK_SRC				23
+#define GCC_CAMSS_MCLK0_CLK					24
+#define GCC_CAMSS_MCLK0_CLK_SRC					25
+#define GCC_CAMSS_MCLK1_CLK					26
+#define GCC_CAMSS_MCLK1_CLK_SRC					27
+#define GCC_CAMSS_MCLK2_CLK					28
+#define GCC_CAMSS_MCLK2_CLK_SRC					29
+#define GCC_CAMSS_MCLK3_CLK					30
+#define GCC_CAMSS_MCLK3_CLK_SRC					31
+#define GCC_CAMSS_NRT_AXI_CLK					32
+#define GCC_CAMSS_OPE_AHB_CLK					33
+#define GCC_CAMSS_OPE_AHB_CLK_SRC				34
+#define GCC_CAMSS_OPE_CLK					35
+#define GCC_CAMSS_OPE_CLK_SRC					36
+#define GCC_CAMSS_RT_AXI_CLK					37
+#define GCC_CAMSS_TFE_0_CLK					38
+#define GCC_CAMSS_TFE_0_CLK_SRC					39
+#define GCC_CAMSS_TFE_0_CPHY_RX_CLK				40
+#define GCC_CAMSS_TFE_0_CSID_CLK				41
+#define GCC_CAMSS_TFE_0_CSID_CLK_SRC				42
+#define GCC_CAMSS_TFE_1_CLK					43
+#define GCC_CAMSS_TFE_1_CLK_SRC					44
+#define GCC_CAMSS_TFE_1_CPHY_RX_CLK				45
+#define GCC_CAMSS_TFE_1_CSID_CLK				46
+#define GCC_CAMSS_TFE_1_CSID_CLK_SRC				47
+#define GCC_CAMSS_TFE_2_CLK					48
+#define GCC_CAMSS_TFE_2_CLK_SRC					49
+#define GCC_CAMSS_TFE_2_CPHY_RX_CLK				50
+#define GCC_CAMSS_TFE_2_CSID_CLK				51
+#define GCC_CAMSS_TFE_2_CSID_CLK_SRC				52
+#define GCC_CAMSS_TFE_CPHY_RX_CLK_SRC				53
+#define GCC_CAMSS_TOP_AHB_CLK					54
+#define GCC_CAMSS_TOP_AHB_CLK_SRC				55
+#define GCC_CFG_NOC_USB3_PRIM_AXI_CLK				56
+#define GCC_CPUSS_AHB_CLK					57
+#define GCC_CPUSS_GNOC_CLK					60
+#define GCC_DISP_AHB_CLK					61
+#define GCC_DISP_GPLL0_DIV_CLK_SRC				62
+#define GCC_DISP_HF_AXI_CLK					63
+#define GCC_DISP_THROTTLE_CORE_CLK				64
+#define GCC_DISP_XO_CLK						65
+#define GCC_GP1_CLK						66
+#define GCC_GP1_CLK_SRC						67
+#define GCC_GP2_CLK						68
+#define GCC_GP2_CLK_SRC						69
+#define GCC_GP3_CLK						70
+#define GCC_GP3_CLK_SRC						71
+#define GCC_GPU_CFG_AHB_CLK					72
+#define GCC_GPU_GPLL0_CLK_SRC					73
+#define GCC_GPU_GPLL0_DIV_CLK_SRC				74
+#define GCC_GPU_IREF_CLK					75
+#define GCC_GPU_MEMNOC_GFX_CLK					76
+#define GCC_GPU_SNOC_DVM_GFX_CLK				77
+#define GCC_GPU_THROTTLE_CORE_CLK				78
+#define GCC_GPU_THROTTLE_XO_CLK					79
+#define GCC_PDM2_CLK						80
+#define GCC_PDM2_CLK_SRC					81
+#define GCC_PDM_AHB_CLK						82
+#define GCC_PDM_XO4_CLK						83
+#define GCC_PRNG_AHB_CLK					84
+#define GCC_QMIP_CAMERA_NRT_AHB_CLK				85
+#define GCC_QMIP_CAMERA_RT_AHB_CLK				86
+#define GCC_QMIP_DISP_AHB_CLK					87
+#define GCC_QMIP_GPU_CFG_AHB_CLK				88
+#define GCC_QMIP_VIDEO_VCODEC_AHB_CLK				89
+#define GCC_QUPV3_WRAP0_CORE_2X_CLK				90
+#define GCC_QUPV3_WRAP0_CORE_CLK				91
+#define GCC_QUPV3_WRAP0_S0_CLK					92
+#define GCC_QUPV3_WRAP0_S0_CLK_SRC				93
+#define GCC_QUPV3_WRAP0_S1_CLK					94
+#define GCC_QUPV3_WRAP0_S1_CLK_SRC				95
+#define GCC_QUPV3_WRAP0_S2_CLK					96
+#define GCC_QUPV3_WRAP0_S2_CLK_SRC				97
+#define GCC_QUPV3_WRAP0_S3_CLK					98
+#define GCC_QUPV3_WRAP0_S3_CLK_SRC				99
+#define GCC_QUPV3_WRAP0_S4_CLK					100
+#define GCC_QUPV3_WRAP0_S4_CLK_SRC				101
+#define GCC_QUPV3_WRAP0_S5_CLK					102
+#define GCC_QUPV3_WRAP0_S5_CLK_SRC				103
+#define GCC_QUPV3_WRAP_0_M_AHB_CLK				104
+#define GCC_QUPV3_WRAP_0_S_AHB_CLK				105
+#define GCC_SDCC1_AHB_CLK					106
+#define GCC_SDCC1_APPS_CLK					107
+#define GCC_SDCC1_APPS_CLK_SRC					108
+#define GCC_SDCC1_ICE_CORE_CLK					109
+#define GCC_SDCC1_ICE_CORE_CLK_SRC				110
+#define GCC_SDCC2_AHB_CLK					111
+#define GCC_SDCC2_APPS_CLK					112
+#define GCC_SDCC2_APPS_CLK_SRC					113
+#define GCC_SYS_NOC_CPUSS_AHB_CLK				114
+#define GCC_SYS_NOC_UFS_PHY_AXI_CLK				115
+#define GCC_SYS_NOC_USB3_PRIM_AXI_CLK				116
+#define GCC_UFS_PHY_AHB_CLK					117
+#define GCC_UFS_PHY_AXI_CLK					118
+#define GCC_UFS_PHY_AXI_CLK_SRC					119
+#define GCC_UFS_PHY_ICE_CORE_CLK				120
+#define GCC_UFS_PHY_ICE_CORE_CLK_SRC				121
+#define GCC_UFS_PHY_PHY_AUX_CLK					122
+#define GCC_UFS_PHY_PHY_AUX_CLK_SRC				123
+#define GCC_UFS_PHY_RX_SYMBOL_0_CLK				124
+#define GCC_UFS_PHY_TX_SYMBOL_0_CLK				125
+#define GCC_UFS_PHY_UNIPRO_CORE_CLK				126
+#define GCC_UFS_PHY_UNIPRO_CORE_CLK_SRC				127
+#define GCC_USB30_PRIM_MASTER_CLK				128
+#define GCC_USB30_PRIM_MASTER_CLK_SRC				129
+#define GCC_USB30_PRIM_MOCK_UTMI_CLK				130
+#define GCC_USB30_PRIM_MOCK_UTMI_CLK_SRC			131
+#define GCC_USB30_PRIM_MOCK_UTMI_POSTDIV_CLK_SRC		132
+#define GCC_USB30_PRIM_SLEEP_CLK				133
+#define GCC_USB3_PRIM_CLKREF_CLK				134
+#define GCC_USB3_PRIM_PHY_AUX_CLK_SRC				135
+#define GCC_USB3_PRIM_PHY_COM_AUX_CLK				136
+#define GCC_USB3_PRIM_PHY_PIPE_CLK				137
+#define GCC_VCODEC0_AXI_CLK					138
+#define GCC_VENUS_AHB_CLK					139
+#define GCC_VENUS_CTL_AXI_CLK					140
+#define GCC_VIDEO_AHB_CLK					141
+#define GCC_VIDEO_AXI0_CLK					142
+#define GCC_VIDEO_THROTTLE_CORE_CLK				143
+#define GCC_VIDEO_VCODEC0_SYS_CLK				144
+#define GCC_VIDEO_VENUS_CLK_SRC					145
+#define GCC_VIDEO_VENUS_CTL_CLK					146
+#define GCC_VIDEO_XO_CLK					147
+#define GCC_AHB2PHY_CSI_CLK					148
+#define GCC_AHB2PHY_USB_CLK					149
+#define GCC_BIMC_GPU_AXI_CLK					150
+#define GCC_BOOT_ROM_AHB_CLK					151
+#define GCC_CAM_THROTTLE_NRT_CLK				152
+#define GCC_CAM_THROTTLE_RT_CLK					153
+#define GCC_CAMERA_AHB_CLK					154
+#define GCC_CAMERA_XO_CLK					155
+#define GCC_CAMSS_AXI_CLK					156
+#define GCC_CAMSS_AXI_CLK_SRC					157
+#define GCC_CAMSS_CAMNOC_ATB_CLK				158
+#define GCC_CAMSS_CAMNOC_NTS_XO_CLK				159
+#define GCC_CAMSS_CCI_0_CLK					160
+#define GCC_CAMSS_CCI_CLK_SRC					161
+#define GCC_CAMSS_CPHY_0_CLK					162
+#define GCC_CAMSS_CPHY_1_CLK					163
+#define GCC_CAMSS_CPHY_2_CLK					164
+#define GCC_UFS_CLKREF_CLK					165
+#define GCC_DISP_GPLL0_CLK_SRC					166
+
+/* GCC resets */
+#define GCC_QUSB2PHY_PRIM_BCR					0
+#define GCC_QUSB2PHY_SEC_BCR					1
+#define GCC_SDCC1_BCR						2
+#define GCC_UFS_PHY_BCR						3
+#define GCC_USB30_PRIM_BCR					4
+#define GCC_USB_PHY_CFG_AHB2PHY_BCR				5
+#define GCC_VCODEC0_BCR						6
+#define GCC_VENUS_BCR						7
+#define GCC_VIDEO_INTERFACE_BCR					8
+#define GCC_USB3PHY_PHY_PRIM_SP0_BCR				9
+#define GCC_USB3_PHY_PRIM_SP0_BCR				10
+#define GCC_SDCC2_BCR						11
+
+/* Indexes for GDSCs */
+#define GCC_CAMSS_TOP_GDSC			0
+#define GCC_UFS_PHY_GDSC			1
+#define GCC_USB30_PRIM_GDSC			2
+#define GCC_VCODEC0_GDSC			3
+#define GCC_VENUS_GDSC				4
+#define HLOS1_VOTE_TURING_MMU_TBU1_GDSC		5
+#define HLOS1_VOTE_TURING_MMU_TBU0_GDSC		6
+#define HLOS1_VOTE_MM_SNOC_MMU_TBU_RT_GDSC	7
+#define HLOS1_VOTE_MM_SNOC_MMU_TBU_NRT_GDSC	8
+
+#endif
diff --git a/include/dt-bindings/clock/qcom,gcc-sm6350.h b/include/dt-bindings/clock/qcom,gcc-sm6350.h
new file mode 100644
index 000000000000..ba584ca33c39
--- /dev/null
+++ b/include/dt-bindings/clock/qcom,gcc-sm6350.h
@@ -0,0 +1,178 @@
+/* SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause) */
+/*
+ * Copyright (c) 2021, The Linux Foundation. All rights reserved.
+ * Copyright (c) 2021, Konrad Dybcio <konrad.dybcio@somainline.org>
+ */
+
+#ifndef _DT_BINDINGS_CLK_QCOM_GCC_SM6350_H
+#define _DT_BINDINGS_CLK_QCOM_GCC_SM6350_H
+
+/* GCC clocks */
+#define GPLL0					0
+#define GPLL0_OUT_EVEN				1
+#define GPLL0_OUT_ODD				2
+#define GPLL6					3
+#define GPLL6_OUT_EVEN				4
+#define GPLL7					5
+#define GCC_AGGRE_CNOC_PERIPH_CENTER_AHB_CLK	6
+#define GCC_AGGRE_NOC_CENTER_AHB_CLK		7
+#define GCC_AGGRE_NOC_PCIE_SF_AXI_CLK		8
+#define GCC_AGGRE_NOC_PCIE_TBU_CLK		9
+#define GCC_AGGRE_NOC_WLAN_AXI_CLK		10
+#define GCC_AGGRE_UFS_PHY_AXI_CLK		11
+#define GCC_AGGRE_USB3_PRIM_AXI_CLK		12
+#define GCC_BOOT_ROM_AHB_CLK			13
+#define GCC_CAMERA_AHB_CLK			14
+#define GCC_CAMERA_AXI_CLK			15
+#define GCC_CAMERA_THROTTLE_NRT_AXI_CLK		16
+#define GCC_CAMERA_THROTTLE_RT_AXI_CLK		17
+#define GCC_CAMERA_XO_CLK			18
+#define GCC_CE1_AHB_CLK				19
+#define GCC_CE1_AXI_CLK				20
+#define GCC_CE1_CLK				21
+#define GCC_CFG_NOC_USB3_PRIM_AXI_CLK		22
+#define GCC_CPUSS_AHB_CLK			23
+#define GCC_CPUSS_AHB_CLK_SRC			24
+#define GCC_CPUSS_AHB_DIV_CLK_SRC		25
+#define GCC_CPUSS_GNOC_CLK			26
+#define GCC_CPUSS_RBCPR_CLK			27
+#define GCC_DDRSS_GPU_AXI_CLK			28
+#define GCC_DISP_AHB_CLK			29
+#define GCC_DISP_AXI_CLK			30
+#define GCC_DISP_CC_SLEEP_CLK			31
+#define GCC_DISP_CC_XO_CLK			32
+#define GCC_DISP_GPLL0_CLK			33
+#define GCC_DISP_THROTTLE_AXI_CLK		34
+#define GCC_DISP_XO_CLK				35
+#define GCC_GP1_CLK				36
+#define GCC_GP1_CLK_SRC				37
+#define GCC_GP2_CLK				38
+#define GCC_GP2_CLK_SRC				39
+#define GCC_GP3_CLK				40
+#define GCC_GP3_CLK_SRC				41
+#define GCC_GPU_CFG_AHB_CLK			42
+#define GCC_GPU_GPLL0_CLK			43
+#define GCC_GPU_GPLL0_DIV_CLK			44
+#define GCC_GPU_MEMNOC_GFX_CLK			45
+#define GCC_GPU_SNOC_DVM_GFX_CLK		46
+#define GCC_NPU_AXI_CLK				47
+#define GCC_NPU_BWMON_AXI_CLK			48
+#define GCC_NPU_BWMON_DMA_CFG_AHB_CLK		49
+#define GCC_NPU_BWMON_DSP_CFG_AHB_CLK		50
+#define GCC_NPU_CFG_AHB_CLK			51
+#define GCC_NPU_DMA_CLK				52
+#define GCC_NPU_GPLL0_CLK			53
+#define GCC_NPU_GPLL0_DIV_CLK			54
+#define GCC_PCIE_0_AUX_CLK			55
+#define GCC_PCIE_0_AUX_CLK_SRC			56
+#define GCC_PCIE_0_CFG_AHB_CLK			57
+#define GCC_PCIE_0_MSTR_AXI_CLK			58
+#define GCC_PCIE_0_PIPE_CLK			59
+#define GCC_PCIE_0_SLV_AXI_CLK			60
+#define GCC_PCIE_0_SLV_Q2A_AXI_CLK		61
+#define GCC_PCIE_PHY_RCHNG_CLK			62
+#define GCC_PCIE_PHY_RCHNG_CLK_SRC		63
+#define GCC_PDM2_CLK				64
+#define GCC_PDM2_CLK_SRC			65
+#define GCC_PDM_AHB_CLK				66
+#define GCC_PDM_XO4_CLK				67
+#define GCC_PRNG_AHB_CLK			68
+#define GCC_QUPV3_WRAP0_CORE_2X_CLK		69
+#define GCC_QUPV3_WRAP0_CORE_CLK		70
+#define GCC_QUPV3_WRAP0_S0_CLK			71
+#define GCC_QUPV3_WRAP0_S0_CLK_SRC		72
+#define GCC_QUPV3_WRAP0_S1_CLK			73
+#define GCC_QUPV3_WRAP0_S1_CLK_SRC		74
+#define GCC_QUPV3_WRAP0_S2_CLK			75
+#define GCC_QUPV3_WRAP0_S2_CLK_SRC		76
+#define GCC_QUPV3_WRAP0_S3_CLK			77
+#define GCC_QUPV3_WRAP0_S3_CLK_SRC		78
+#define GCC_QUPV3_WRAP0_S4_CLK			79
+#define GCC_QUPV3_WRAP0_S4_CLK_SRC		80
+#define GCC_QUPV3_WRAP0_S5_CLK			81
+#define GCC_QUPV3_WRAP0_S5_CLK_SRC		82
+#define GCC_QUPV3_WRAP1_CORE_2X_CLK		83
+#define GCC_QUPV3_WRAP1_CORE_CLK		84
+#define GCC_QUPV3_WRAP1_S0_CLK			85
+#define GCC_QUPV3_WRAP1_S0_CLK_SRC		86
+#define GCC_QUPV3_WRAP1_S1_CLK			87
+#define GCC_QUPV3_WRAP1_S1_CLK_SRC		88
+#define GCC_QUPV3_WRAP1_S2_CLK			89
+#define GCC_QUPV3_WRAP1_S2_CLK_SRC		90
+#define GCC_QUPV3_WRAP1_S3_CLK			91
+#define GCC_QUPV3_WRAP1_S3_CLK_SRC		92
+#define GCC_QUPV3_WRAP1_S4_CLK			93
+#define GCC_QUPV3_WRAP1_S4_CLK_SRC		94
+#define GCC_QUPV3_WRAP1_S5_CLK			95
+#define GCC_QUPV3_WRAP1_S5_CLK_SRC		96
+#define GCC_QUPV3_WRAP_0_M_AHB_CLK		97
+#define GCC_QUPV3_WRAP_0_S_AHB_CLK		98
+#define GCC_QUPV3_WRAP_1_M_AHB_CLK		99
+#define GCC_QUPV3_WRAP_1_S_AHB_CLK		100
+#define GCC_SDCC1_AHB_CLK			101
+#define GCC_SDCC1_APPS_CLK			102
+#define GCC_SDCC1_APPS_CLK_SRC			103
+#define GCC_SDCC1_ICE_CORE_CLK			104
+#define GCC_SDCC1_ICE_CORE_CLK_SRC		105
+#define GCC_SDCC2_AHB_CLK			106
+#define GCC_SDCC2_APPS_CLK			107
+#define GCC_SDCC2_APPS_CLK_SRC			108
+#define GCC_SYS_NOC_CPUSS_AHB_CLK		109
+#define GCC_UFS_MEM_CLKREF_CLK			110
+#define GCC_UFS_PHY_AHB_CLK			111
+#define GCC_UFS_PHY_AXI_CLK			112
+#define GCC_UFS_PHY_AXI_CLK_SRC			113
+#define GCC_UFS_PHY_ICE_CORE_CLK		114
+#define GCC_UFS_PHY_ICE_CORE_CLK_SRC		115
+#define GCC_UFS_PHY_PHY_AUX_CLK			116
+#define GCC_UFS_PHY_PHY_AUX_CLK_SRC		117
+#define GCC_UFS_PHY_RX_SYMBOL_0_CLK		118
+#define GCC_UFS_PHY_RX_SYMBOL_1_CLK		119
+#define GCC_UFS_PHY_TX_SYMBOL_0_CLK		120
+#define GCC_UFS_PHY_UNIPRO_CORE_CLK		121
+#define GCC_UFS_PHY_UNIPRO_CORE_CLK_SRC		122
+#define GCC_USB30_PRIM_MASTER_CLK		123
+#define GCC_USB30_PRIM_MASTER_CLK_SRC		124
+#define GCC_USB30_PRIM_MOCK_UTMI_CLK		125
+#define GCC_USB30_PRIM_MOCK_UTMI_CLK_SRC	126
+#define GCC_USB30_PRIM_MOCK_UTMI_DIV_CLK_SRC	127
+#define GCC_USB3_PRIM_CLKREF_CLK		128
+#define GCC_USB30_PRIM_SLEEP_CLK		129
+#define GCC_USB3_PRIM_PHY_AUX_CLK		130
+#define GCC_USB3_PRIM_PHY_AUX_CLK_SRC		131
+#define GCC_USB3_PRIM_PHY_COM_AUX_CLK		132
+#define GCC_USB3_PRIM_PHY_PIPE_CLK		133
+#define GCC_VIDEO_AHB_CLK			134
+#define GCC_VIDEO_AXI_CLK			135
+#define GCC_VIDEO_THROTTLE_AXI_CLK		136
+#define GCC_VIDEO_XO_CLK			137
+#define GCC_UFS_PHY_PHY_AUX_HW_CTL_CLK		138
+#define GCC_UFS_PHY_AXI_HW_CTL_CLK		139
+#define GCC_AGGRE_UFS_PHY_AXI_HW_CTL_CLK	140
+#define GCC_UFS_PHY_UNIPRO_CORE_HW_CTL_CLK	141
+#define GCC_UFS_PHY_ICE_CORE_HW_CTL_CLK		142
+#define GCC_RX5_PCIE_CLKREF_CLK			143
+#define GCC_GPU_GPLL0_MAIN_DIV_CLK_SRC		144
+#define GCC_NPU_PLL0_MAIN_DIV_CLK_SRC		145
+
+/* GCC resets */
+#define GCC_QUSB2PHY_PRIM_BCR			0
+#define GCC_QUSB2PHY_SEC_BCR			1
+#define GCC_SDCC1_BCR				2
+#define GCC_SDCC2_BCR				3
+#define GCC_UFS_PHY_BCR				4
+#define GCC_USB30_PRIM_BCR			5
+#define GCC_PCIE_0_BCR				6
+#define GCC_PCIE_0_PHY_BCR			7
+#define GCC_QUPV3_WRAPPER_0_BCR			8
+#define GCC_QUPV3_WRAPPER_1_BCR			9
+#define GCC_USB3_PHY_PRIM_BCR			10
+#define GCC_USB3_DP_PHY_PRIM_BCR		11
+
+/* GCC GDSCs */
+#define USB30_PRIM_GDSC				0
+#define UFS_PHY_GDSC				1
+#define HLOS1_VOTE_MMNOC_MMU_TBU_HF0_GDSC	2
+#define HLOS1_VOTE_MMNOC_MMU_TBU_HF1_GDSC	3
+
+#endif
diff --git a/include/dt-bindings/clock/qcom,gpucc-sc7280.h b/include/dt-bindings/clock/qcom,gpucc-sc7280.h
new file mode 100644
index 000000000000..669b23b606ba
--- /dev/null
+++ b/include/dt-bindings/clock/qcom,gpucc-sc7280.h
@@ -0,0 +1,35 @@
+/* SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause) */
+/*
+ * Copyright (c) 2021, The Linux Foundation. All rights reserved.
+ */
+
+#ifndef _DT_BINDINGS_CLK_QCOM_GPU_CC_SC7280_H
+#define _DT_BINDINGS_CLK_QCOM_GPU_CC_SC7280_H
+
+/* GPU_CC clocks */
+#define GPU_CC_PLL0				0
+#define GPU_CC_PLL1				1
+#define GPU_CC_AHB_CLK				2
+#define GPU_CC_CB_CLK				3
+#define GPU_CC_CRC_AHB_CLK			4
+#define GPU_CC_CX_GMU_CLK			5
+#define GPU_CC_CX_SNOC_DVM_CLK			6
+#define GPU_CC_CXO_AON_CLK			7
+#define GPU_CC_CXO_CLK				8
+#define GPU_CC_GMU_CLK_SRC			9
+#define GPU_CC_GX_GMU_CLK			10
+#define GPU_CC_HLOS1_VOTE_GPU_SMMU_CLK		11
+#define GPU_CC_HUB_AHB_DIV_CLK_SRC		12
+#define GPU_CC_HUB_AON_CLK			13
+#define GPU_CC_HUB_CLK_SRC			14
+#define GPU_CC_HUB_CX_INT_CLK			15
+#define GPU_CC_HUB_CX_INT_DIV_CLK_SRC		16
+#define GPU_CC_MND1X_0_GFX3D_CLK		17
+#define GPU_CC_MND1X_1_GFX3D_CLK		18
+#define GPU_CC_SLEEP_CLK			19
+
+/* GPU_CC power domains */
+#define GPU_CC_CX_GDSC				0
+#define GPU_CC_GX_GDSC				1
+
+#endif
diff --git a/include/dt-bindings/clock/qcom,mmcc-msm8994.h b/include/dt-bindings/clock/qcom,mmcc-msm8994.h
new file mode 100644
index 000000000000..4b289092f5a2
--- /dev/null
+++ b/include/dt-bindings/clock/qcom,mmcc-msm8994.h
@@ -0,0 +1,155 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
+/*
+ * Copyright (c) 2020, Konrad Dybcio
+ */
+
+#ifndef _DT_BINDINGS_CLK_MSM_MMCC_8994_H
+#define _DT_BINDINGS_CLK_MSM_MMCC_8994_H
+
+/* Clocks */
+#define MMPLL0_EARLY					0
+#define MMPLL0_PLL						1
+#define MMPLL1_EARLY					2
+#define MMPLL1_PLL						3
+#define MMPLL3_EARLY					4
+#define MMPLL3_PLL						5
+#define MMPLL4_EARLY					6
+#define MMPLL4_PLL						7
+#define MMPLL5_EARLY					8
+#define MMPLL5_PLL						9
+#define AXI_CLK_SRC						10
+#define RBBMTIMER_CLK_SRC				11
+#define PCLK0_CLK_SRC					12
+#define PCLK1_CLK_SRC					13
+#define MDP_CLK_SRC						14
+#define VSYNC_CLK_SRC					15
+#define BYTE0_CLK_SRC					16
+#define BYTE1_CLK_SRC					17
+#define ESC0_CLK_SRC					18
+#define ESC1_CLK_SRC					19
+#define MDSS_AHB_CLK					20
+#define MDSS_PCLK0_CLK					21
+#define MDSS_PCLK1_CLK					22
+#define MDSS_VSYNC_CLK					23
+#define MDSS_BYTE0_CLK					24
+#define MDSS_BYTE1_CLK					25
+#define MDSS_ESC0_CLK					26
+#define MDSS_ESC1_CLK					27
+#define CSI0_CLK_SRC					28
+#define CSI1_CLK_SRC					29
+#define CSI2_CLK_SRC					30
+#define CSI3_CLK_SRC					31
+#define VFE0_CLK_SRC					32
+#define VFE1_CLK_SRC					33
+#define CPP_CLK_SRC						34
+#define JPEG0_CLK_SRC					35
+#define JPEG1_CLK_SRC					36
+#define JPEG2_CLK_SRC					37
+#define CSI2PHYTIMER_CLK_SRC			38
+#define FD_CORE_CLK_SRC					39
+#define OCMEMNOC_CLK_SRC				40
+#define CCI_CLK_SRC						41
+#define MMSS_GP0_CLK_SRC				42
+#define MMSS_GP1_CLK_SRC				43
+#define JPEG_DMA_CLK_SRC				44
+#define MCLK0_CLK_SRC					45
+#define MCLK1_CLK_SRC					46
+#define MCLK2_CLK_SRC					47
+#define MCLK3_CLK_SRC					48
+#define CSI0PHYTIMER_CLK_SRC			49
+#define CSI1PHYTIMER_CLK_SRC			50
+#define EXTPCLK_CLK_SRC					51
+#define HDMI_CLK_SRC					52
+#define CAMSS_AHB_CLK					53
+#define CAMSS_CCI_CCI_AHB_CLK			54
+#define CAMSS_CCI_CCI_CLK				55
+#define CAMSS_VFE_CPP_AHB_CLK			56
+#define CAMSS_VFE_CPP_AXI_CLK			57
+#define CAMSS_VFE_CPP_CLK				58
+#define CAMSS_CSI0_AHB_CLK				59
+#define CAMSS_CSI0_CLK					60
+#define CAMSS_CSI0PHY_CLK				61
+#define CAMSS_CSI0PIX_CLK				62
+#define CAMSS_CSI0RDI_CLK				63
+#define CAMSS_CSI1_AHB_CLK				64
+#define CAMSS_CSI1_CLK					65
+#define CAMSS_CSI1PHY_CLK				66
+#define CAMSS_CSI1PIX_CLK				67
+#define CAMSS_CSI1RDI_CLK				68
+#define CAMSS_CSI2_AHB_CLK				69
+#define CAMSS_CSI2_CLK					70
+#define CAMSS_CSI2PHY_CLK				71
+#define CAMSS_CSI2PIX_CLK				72
+#define CAMSS_CSI2RDI_CLK				73
+#define CAMSS_CSI3_AHB_CLK				74
+#define CAMSS_CSI3_CLK					75
+#define CAMSS_CSI3PHY_CLK				76
+#define CAMSS_CSI3PIX_CLK				77
+#define CAMSS_CSI3RDI_CLK				78
+#define CAMSS_CSI_VFE0_CLK				79
+#define CAMSS_CSI_VFE1_CLK				80
+#define CAMSS_GP0_CLK					81
+#define CAMSS_GP1_CLK					82
+#define CAMSS_ISPIF_AHB_CLK				83
+#define CAMSS_JPEG_DMA_CLK				84
+#define CAMSS_JPEG_JPEG0_CLK			85
+#define CAMSS_JPEG_JPEG1_CLK			86
+#define CAMSS_JPEG_JPEG2_CLK			87
+#define CAMSS_JPEG_JPEG_AHB_CLK			88
+#define CAMSS_JPEG_JPEG_AXI_CLK			89
+#define CAMSS_MCLK0_CLK					90
+#define CAMSS_MCLK1_CLK					91
+#define CAMSS_MCLK2_CLK					92
+#define CAMSS_MCLK3_CLK					93
+#define CAMSS_MICRO_AHB_CLK				94
+#define CAMSS_PHY0_CSI0PHYTIMER_CLK		95
+#define CAMSS_PHY1_CSI1PHYTIMER_CLK		96
+#define CAMSS_PHY2_CSI2PHYTIMER_CLK		97
+#define CAMSS_TOP_AHB_CLK				98
+#define CAMSS_VFE_VFE0_CLK				99
+#define CAMSS_VFE_VFE1_CLK				100
+#define CAMSS_VFE_VFE_AHB_CLK			101
+#define CAMSS_VFE_VFE_AXI_CLK			102
+#define FD_AXI_CLK						103
+#define FD_CORE_CLK						104
+#define FD_CORE_UAR_CLK					105
+#define MDSS_AXI_CLK					106
+#define MDSS_EXTPCLK_CLK				107
+#define MDSS_HDMI_AHB_CLK				108
+#define MDSS_HDMI_CLK					109
+#define MDSS_MDP_CLK					110
+#define MMSS_MISC_AHB_CLK				111
+#define MMSS_MMSSNOC_AXI_CLK			112
+#define MMSS_S0_AXI_CLK					113
+#define OCMEMCX_OCMEMNOC_CLK			114
+#define OXILI_GFX3D_CLK					115
+#define OXILI_RBBMTIMER_CLK				116
+#define OXILICX_AHB_CLK					117
+#define VENUS0_AHB_CLK					118
+#define VENUS0_AXI_CLK					119
+#define VENUS0_OCMEMNOC_CLK				120
+#define VENUS0_VCODEC0_CLK				121
+#define VENUS0_CORE0_VCODEC_CLK			122
+#define VENUS0_CORE1_VCODEC_CLK			123
+#define VENUS0_CORE2_VCODEC_CLK			124
+#define AHB_CLK_SRC						125
+#define FD_AHB_CLK						126
+
+/* GDSCs */
+#define VENUS_GDSC						0
+#define VENUS_CORE0_GDSC				1
+#define VENUS_CORE1_GDSC				2
+#define VENUS_CORE2_GDSC				3
+#define CAMSS_TOP_GDSC					4
+#define MDSS_GDSC						5
+#define JPEG_GDSC						6
+#define VFE_GDSC						7
+#define CPP_GDSC						8
+#define OXILI_GX_GDSC					9
+#define OXILI_CX_GDSC					10
+#define FD_GDSC							11
+
+/* Resets */
+#define CAMSS_MICRO_BCR					0
+
+#endif
diff --git a/include/dt-bindings/clock/qcom,rpmcc.h b/include/dt-bindings/clock/qcom,rpmcc.h
index 8aaba7cd9589..aa834d516234 100644
--- a/include/dt-bindings/clock/qcom,rpmcc.h
+++ b/include/dt-bindings/clock/qcom,rpmcc.h
@@ -149,5 +149,15 @@
 #define RPM_SMD_CE2_A_CLK			103
 #define RPM_SMD_CE3_CLK				104
 #define RPM_SMD_CE3_A_CLK			105
+#define RPM_SMD_QUP_CLK				106
+#define RPM_SMD_QUP_A_CLK			107
+#define RPM_SMD_MMRT_CLK			108
+#define RPM_SMD_MMRT_A_CLK			109
+#define RPM_SMD_MMNRT_CLK			110
+#define RPM_SMD_MMNRT_A_CLK			111
+#define RPM_SMD_SNOC_PERIPH_CLK			112
+#define RPM_SMD_SNOC_PERIPH_A_CLK		113
+#define RPM_SMD_SNOC_LPASS_CLK			114
+#define RPM_SMD_SNOC_LPASS_A_CLK		115
 
 #endif
diff --git a/include/dt-bindings/clock/qcom,rpmh.h b/include/dt-bindings/clock/qcom,rpmh.h
index 583a99161aaa..0a7d1be0d124 100644
--- a/include/dt-bindings/clock/qcom,rpmh.h
+++ b/include/dt-bindings/clock/qcom,rpmh.h
@@ -31,5 +31,7 @@
 #define RPMH_RF_CLK5_A				22
 #define RPMH_PKA_CLK				23
 #define RPMH_HWKM_CLK				24
+#define RPMH_QLINK_CLK				25
+#define RPMH_QLINK_CLK_A			26
 
 #endif
diff --git a/include/dt-bindings/clock/qcom,videocc-sc7280.h b/include/dt-bindings/clock/qcom,videocc-sc7280.h
new file mode 100644
index 000000000000..9e00c3a5f75e
--- /dev/null
+++ b/include/dt-bindings/clock/qcom,videocc-sc7280.h
@@ -0,0 +1,27 @@
+/* SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause) */
+/*
+ * Copyright (c) 2021, The Linux Foundation. All rights reserved.
+ */
+
+#ifndef _DT_BINDINGS_CLK_QCOM_VIDEO_CC_SC7280_H
+#define _DT_BINDINGS_CLK_QCOM_VIDEO_CC_SC7280_H
+
+/* VIDEO_CC clocks */
+#define VIDEO_PLL0				0
+#define VIDEO_CC_IRIS_AHB_CLK			1
+#define VIDEO_CC_IRIS_CLK_SRC			2
+#define VIDEO_CC_MVS0_AXI_CLK			3
+#define VIDEO_CC_MVS0_CORE_CLK			4
+#define VIDEO_CC_MVSC_CORE_CLK			5
+#define VIDEO_CC_MVSC_CTL_AXI_CLK		6
+#define VIDEO_CC_SLEEP_CLK			7
+#define VIDEO_CC_SLEEP_CLK_SRC			8
+#define VIDEO_CC_VENUS_AHB_CLK			9
+#define VIDEO_CC_XO_CLK				10
+#define VIDEO_CC_XO_CLK_SRC			11
+
+/* VIDEO_CC power domains */
+#define MVS0_GDSC				0
+#define MVSC_GDSC				1
+
+#endif
diff --git a/include/dt-bindings/clock/rk3036-cru.h b/include/dt-bindings/clock/rk3036-cru.h
index 35a5a01f9697..a96a9870ad59 100644
--- a/include/dt-bindings/clock/rk3036-cru.h
+++ b/include/dt-bindings/clock/rk3036-cru.h
@@ -81,6 +81,7 @@
 #define HCLK_OTG0		449
 #define HCLK_OTG1		450
 #define HCLK_NANDC		453
+#define HCLK_SFC		454
 #define HCLK_SDMMC		456
 #define HCLK_SDIO		457
 #define HCLK_EMMC		459
diff --git a/include/dt-bindings/gce/mt8192-gce.h b/include/dt-bindings/gce/mt8192-gce.h
new file mode 100644
index 000000000000..9e5a0eb040a0
--- /dev/null
+++ b/include/dt-bindings/gce/mt8192-gce.h
@@ -0,0 +1,335 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/*
+ * Copyright (c) 2020 MediaTek Inc.
+ * Author: Yongqiang Niu <yongqiang.niu@mediatek.com>
+ */
+
+#ifndef _DT_BINDINGS_GCE_MT8192_H
+#define _DT_BINDINGS_GCE_MT8192_H
+
+/* assign timeout 0 also means default */
+#define CMDQ_NO_TIMEOUT		0xffffffff
+#define CMDQ_TIMEOUT_DEFAULT	1000
+
+/* GCE thread priority */
+#define CMDQ_THR_PRIO_LOWEST	0
+#define CMDQ_THR_PRIO_1		1
+#define CMDQ_THR_PRIO_2		2
+#define CMDQ_THR_PRIO_3		3
+#define CMDQ_THR_PRIO_4		4
+#define CMDQ_THR_PRIO_5		5
+#define CMDQ_THR_PRIO_6		6
+#define CMDQ_THR_PRIO_HIGHEST	7
+
+/* CPR count in 32bit register */
+#define GCE_CPR_COUNT		1312
+
+/* GCE subsys table */
+#define SUBSYS_1300XXXX		0
+#define SUBSYS_1400XXXX		1
+#define SUBSYS_1401XXXX		2
+#define SUBSYS_1402XXXX		3
+#define SUBSYS_1502XXXX		4
+#define SUBSYS_1880XXXX		5
+#define SUBSYS_1881XXXX		6
+#define SUBSYS_1882XXXX		7
+#define SUBSYS_1883XXXX		8
+#define SUBSYS_1884XXXX		9
+#define SUBSYS_1000XXXX		10
+#define SUBSYS_1001XXXX		11
+#define SUBSYS_1002XXXX		12
+#define SUBSYS_1003XXXX		13
+#define SUBSYS_1004XXXX		14
+#define SUBSYS_1005XXXX		15
+#define SUBSYS_1020XXXX		16
+#define SUBSYS_1028XXXX		17
+#define SUBSYS_1700XXXX		18
+#define SUBSYS_1701XXXX		19
+#define SUBSYS_1702XXXX		20
+#define SUBSYS_1703XXXX		21
+#define SUBSYS_1800XXXX		22
+#define SUBSYS_1801XXXX		23
+#define SUBSYS_1802XXXX		24
+#define SUBSYS_1804XXXX		25
+#define SUBSYS_1805XXXX		26
+#define SUBSYS_1808XXXX		27
+#define SUBSYS_180aXXXX		28
+#define SUBSYS_180bXXXX		29
+
+#define CMDQ_EVENT_VDEC_LAT_SOF_0			0
+#define CMDQ_EVENT_VDEC_LAT_FRAME_DONE_0		1
+#define CMDQ_EVENT_VDEC_LAT_FRAME_DONE_1		2
+#define CMDQ_EVENT_VDEC_LAT_FRAME_DONE_2		3
+#define CMDQ_EVENT_VDEC_LAT_FRAME_DONE_3		4
+#define CMDQ_EVENT_VDEC_LAT_FRAME_DONE_4		5
+#define CMDQ_EVENT_VDEC_LAT_FRAME_DONE_5		6
+#define CMDQ_EVENT_VDEC_LAT_FRAME_DONE_6		7
+#define CMDQ_EVENT_VDEC_LAT_ENG_EVENT_0			8
+#define CMDQ_EVENT_VDEC_LAT_ENG_EVENT_1			9
+#define CMDQ_EVENT_VDEC_LAT_ENG_EVENT_2			10
+#define CMDQ_EVENT_VDEC_LAT_ENG_EVENT_3			11
+#define CMDQ_EVENT_VDEC_LAT_ENG_EVENT_4			12
+#define CMDQ_EVENT_VDEC_LAT_ENG_EVENT_5			13
+#define CMDQ_EVENT_VDEC_LAT_ENG_EVENT_6			14
+#define CMDQ_EVENT_VDEC_LAT_ENG_EVENT_7			15
+
+#define CMDQ_EVENT_ISP_FRAME_DONE_A			65
+#define CMDQ_EVENT_ISP_FRAME_DONE_B			66
+#define CMDQ_EVENT_ISP_FRAME_DONE_C			67
+#define CMDQ_EVENT_CAMSV0_PASS1_DONE			68
+#define CMDQ_EVENT_CAMSV02_PASS1_DONE			69
+#define CMDQ_EVENT_CAMSV1_PASS1_DONE			70
+#define CMDQ_EVENT_CAMSV2_PASS1_DONE			71
+#define CMDQ_EVENT_CAMSV3_PASS1_DONE			72
+#define CMDQ_EVENT_MRAW_0_PASS1_DONE			73
+#define CMDQ_EVENT_MRAW_1_PASS1_DONE			74
+#define CMDQ_EVENT_SENINF_CAM0_FIFO_FULL		75
+#define CMDQ_EVENT_SENINF_CAM1_FIFO_FULL		76
+#define CMDQ_EVENT_SENINF_CAM2_FIFO_FULL		77
+#define CMDQ_EVENT_SENINF_CAM3_FIFO_FULL		78
+#define CMDQ_EVENT_SENINF_CAM4_FIFO_FULL		79
+#define CMDQ_EVENT_SENINF_CAM5_FIFO_FULL		80
+#define CMDQ_EVENT_SENINF_CAM6_FIFO_FULL		81
+#define CMDQ_EVENT_SENINF_CAM7_FIFO_FULL		82
+#define CMDQ_EVENT_SENINF_CAM8_FIFO_FULL		83
+#define CMDQ_EVENT_SENINF_CAM9_FIFO_FULL		84
+#define CMDQ_EVENT_SENINF_CAM10_FIFO_FULL		85
+#define CMDQ_EVENT_SENINF_CAM11_FIFO_FULL		86
+#define CMDQ_EVENT_SENINF_CAM12_FIFO_FULL		87
+#define CMDQ_EVENT_TG_OVRUN_A_INT			88
+#define CMDQ_EVENT_DMA_R1_ERROR_A_INT			89
+#define CMDQ_EVENT_TG_OVRUN_B_INT			90
+#define CMDQ_EVENT_DMA_R1_ERROR_B_INT			91
+#define CMDQ_EVENT_TG_OVRUN_C_INT			92
+#define CMDQ_EVENT_DMA_R1_ERROR_C_INT			93
+#define CMDQ_EVENT_TG_OVRUN_M0_INT			94
+#define CMDQ_EVENT_DMA_R1_ERROR_M0_INT			95
+#define CMDQ_EVENT_TG_GRABERR_M0_INT			96
+#define CMDQ_EVENT_TG_GRABERR_M1_INT			97
+#define CMDQ_EVENT_TG_GRABERR_A_INT			98
+#define CMDQ_EVENT_CQ_VR_SNAP_A_INT			99
+#define CMDQ_EVENT_TG_GRABERR_B_INT			100
+#define CMDQ_EVENT_CQ_VR_SNAP_B_INT			101
+#define CMDQ_EVENT_TG_GRABERR_C_INT			102
+#define CMDQ_EVENT_CQ_VR_SNAP_C_INT			103
+
+#define CMDQ_EVENT_VENC_CMDQ_FRAME_DONE			129
+#define CMDQ_EVENT_VENC_CMDQ_PAUSE_DONE			130
+#define CMDQ_EVENT_JPGENC_CMDQ_DONE			131
+#define CMDQ_EVENT_VENC_CMDQ_MB_DONE			132
+#define CMDQ_EVENT_VENC_CMDQ_128BYTE_CNT_DONE		133
+#define CMDQ_EVENT_VENC_C0_CMDQ_WP_2ND_STAGE_DONE	134
+#define CMDQ_EVENT_VENC_C0_CMDQ_WP_3RD_STAGE_DONE	135
+#define CMDQ_EVENT_VENC_CMDQ_PPS_DONE			136
+#define CMDQ_EVENT_VENC_CMDQ_SPS_DONE			137
+#define CMDQ_EVENT_VENC_CMDQ_VPS_DONE			138
+
+#define CMDQ_EVENT_VDEC_CORE0_SOF_0			160
+#define CMDQ_EVENT_VDEC_CORE0_FRAME_DONE_0		161
+#define CMDQ_EVENT_VDEC_CORE0_FRAME_DONE_1		162
+#define CMDQ_EVENT_VDEC_CORE0_FRAME_DONE_2		163
+#define CMDQ_EVENT_VDEC_CORE0_FRAME_DONE_3		164
+#define CMDQ_EVENT_VDEC_CORE0_FRAME_DONE_4		165
+#define CMDQ_EVENT_VDEC_CORE0_FRAME_DONE_5		166
+#define CMDQ_EVENT_VDEC_CORE0_FRAME_DONE_6		167
+#define CMDQ_EVENT_VDEC_CORE0_ENG_EVENT_0		168
+#define CMDQ_EVENT_VDEC_CORE0_ENG_EVENT_1		169
+#define CMDQ_EVENT_VDEC_CORE0_ENG_EVENT_2		170
+#define CMDQ_EVENT_VDEC_CORE0_ENG_EVENT_3		171
+#define CMDQ_EVENT_VDEC_CORE0_ENG_EVENT_4		172
+#define CMDQ_EVENT_VDEC_CORE0_ENG_EVENT_5		173
+#define CMDQ_EVENT_VDEC_CORE0_ENG_EVENT_6		174
+#define CMDQ_EVENT_VDEC_CORE0_ENG_EVENT_7		175
+#define CMDQ_EVENT_FDVT_DONE				177
+#define CMDQ_EVENT_FE_DONE				178
+#define CMDQ_EVENT_RSC_DONE				179
+#define CMDQ_EVENT_DVS_DONE_ASYNC_SHOT			180
+#define CMDQ_EVENT_DVP_DONE_ASYNC_SHOT			181
+
+#define CMDQ_EVENT_IMG2_DIP_FRAME_DONE_P2_0		193
+#define CMDQ_EVENT_IMG2_DIP_FRAME_DONE_P2_1		194
+#define CMDQ_EVENT_IMG2_DIP_FRAME_DONE_P2_2		195
+#define CMDQ_EVENT_IMG2_DIP_FRAME_DONE_P2_3		196
+#define CMDQ_EVENT_IMG2_DIP_FRAME_DONE_P2_4		197
+#define CMDQ_EVENT_IMG2_DIP_FRAME_DONE_P2_5		198
+#define CMDQ_EVENT_IMG2_DIP_FRAME_DONE_P2_6		199
+#define CMDQ_EVENT_IMG2_DIP_FRAME_DONE_P2_7		200
+#define CMDQ_EVENT_IMG2_DIP_FRAME_DONE_P2_8		201
+#define CMDQ_EVENT_IMG2_DIP_FRAME_DONE_P2_9		202
+#define CMDQ_EVENT_IMG2_DIP_FRAME_DONE_P2_10		203
+#define CMDQ_EVENT_IMG2_DIP_FRAME_DONE_P2_11		204
+#define CMDQ_EVENT_IMG2_DIP_FRAME_DONE_P2_12		205
+#define CMDQ_EVENT_IMG2_DIP_FRAME_DONE_P2_13		206
+#define CMDQ_EVENT_IMG2_DIP_FRAME_DONE_P2_14		207
+#define CMDQ_EVENT_IMG2_DIP_FRAME_DONE_P2_15		208
+#define CMDQ_EVENT_IMG2_DIP_FRAME_DONE_P2_16		209
+#define CMDQ_EVENT_IMG2_DIP_FRAME_DONE_P2_17		210
+#define CMDQ_EVENT_IMG2_DIP_FRAME_DONE_P2_18		211
+#define CMDQ_EVENT_IMG2_DIP_DMA_ERR_EVENT		212
+#define CMDQ_EVENT_IMG2_AMD_FRAME_DONE			213
+#define CMDQ_EVENT_IMG2_MFB_DONE_LINK_MISC		214
+#define CMDQ_EVENT_IMG2_WPE_A_DONE_LINK_MISC		215
+#define CMDQ_EVENT_IMG2_MSS_DONE_LINK_MISC		216
+
+#define CMDQ_EVENT_IMG1_DIP_FRAME_DONE_P2_0		225
+#define CMDQ_EVENT_IMG1_DIP_FRAME_DONE_P2_1		226
+#define CMDQ_EVENT_IMG1_DIP_FRAME_DONE_P2_2		227
+#define CMDQ_EVENT_IMG1_DIP_FRAME_DONE_P2_3		228
+#define CMDQ_EVENT_IMG1_DIP_FRAME_DONE_P2_4		229
+#define CMDQ_EVENT_IMG1_DIP_FRAME_DONE_P2_5		230
+#define CMDQ_EVENT_IMG1_DIP_FRAME_DONE_P2_6		231
+#define CMDQ_EVENT_IMG1_DIP_FRAME_DONE_P2_7		232
+#define CMDQ_EVENT_IMG1_DIP_FRAME_DONE_P2_8		233
+#define CMDQ_EVENT_IMG1_DIP_FRAME_DONE_P2_9		234
+#define CMDQ_EVENT_IMG1_DIP_FRAME_DONE_P2_10		235
+#define CMDQ_EVENT_IMG1_DIP_FRAME_DONE_P2_11		236
+#define CMDQ_EVENT_IMG1_DIP_FRAME_DONE_P2_12		237
+#define CMDQ_EVENT_IMG1_DIP_FRAME_DONE_P2_13		238
+#define CMDQ_EVENT_IMG1_DIP_FRAME_DONE_P2_14		239
+#define CMDQ_EVENT_IMG1_DIP_FRAME_DONE_P2_15		240
+#define CMDQ_EVENT_IMG1_DIP_FRAME_DONE_P2_16		241
+#define CMDQ_EVENT_IMG1_DIP_FRAME_DONE_P2_17		242
+#define CMDQ_EVENT_IMG1_DIP_FRAME_DONE_P2_18		243
+#define CMDQ_EVENT_IMG1_DIP_DMA_ERR_EVENT		244
+#define CMDQ_EVENT_IMG1_AMD_FRAME_DONE			245
+#define CMDQ_EVENT_IMG1_MFB_DONE_LINK_MISC		246
+#define CMDQ_EVENT_IMG1_WPE_A_DONE_LINK_MISC		247
+#define CMDQ_EVENT_IMG1_MSS_DONE_LINK_MISC		248
+
+#define CMDQ_EVENT_MDP_RDMA0_SOF			256
+#define CMDQ_EVENT_MDP_RDMA1_SOF			257
+#define CMDQ_EVENT_MDP_AAL0_SOF				258
+#define CMDQ_EVENT_MDP_AAL1_SOF				259
+#define CMDQ_EVENT_MDP_HDR0_SOF				260
+#define CMDQ_EVENT_MDP_HDR1_SOF				261
+#define CMDQ_EVENT_MDP_RSZ0_SOF				262
+#define CMDQ_EVENT_MDP_RSZ1_SOF				263
+#define CMDQ_EVENT_MDP_WROT0_SOF			264
+#define CMDQ_EVENT_MDP_WROT1_SOF			265
+#define CMDQ_EVENT_MDP_TDSHP0_SOF			266
+#define CMDQ_EVENT_MDP_TDSHP1_SOF			267
+#define CMDQ_EVENT_IMG_DL_RELAY0_SOF			268
+#define CMDQ_EVENT_IMG_DL_RELAY1_SOF			269
+#define CMDQ_EVENT_MDP_COLOR0_SOF			270
+#define CMDQ_EVENT_MDP_COLOR1_SOF			271
+#define CMDQ_EVENT_MDP_WROT1_FRAME_DONE			290
+#define CMDQ_EVENT_MDP_WROT0_FRAME_DONE			291
+#define CMDQ_EVENT_MDP_TDSHP1_FRAME_DONE		294
+#define CMDQ_EVENT_MDP_TDSHP0_FRAME_DONE		295
+#define CMDQ_EVENT_MDP_RSZ1_FRAME_DONE			302
+#define CMDQ_EVENT_MDP_RSZ0_FRAME_DONE			303
+#define CMDQ_EVENT_MDP_RDMA1_FRAME_DONE			306
+#define CMDQ_EVENT_MDP_RDMA0_FRAME_DONE			307
+#define CMDQ_EVENT_MDP_HDR1_FRAME_DONE			308
+#define CMDQ_EVENT_MDP_HDR0_FRAME_DONE			309
+#define CMDQ_EVENT_MDP_COLOR1_FRAME_DONE		312
+#define CMDQ_EVENT_MDP_COLOR0_FRAME_DONE		313
+#define CMDQ_EVENT_MDP_AAL1_FRAME_DONE			316
+#define CMDQ_EVENT_MDP_AAL0_FRAME_DONE			317
+#define CMDQ_EVENT_MDP_STREAM_DONE_ENG_EVENT_0		320
+#define CMDQ_EVENT_MDP_STREAM_DONE_ENG_EVENT_1		321
+#define CMDQ_EVENT_MDP_STREAM_DONE_ENG_EVENT_2		322
+#define CMDQ_EVENT_MDP_STREAM_DONE_ENG_EVENT_3		323
+#define CMDQ_EVENT_MDP_STREAM_DONE_ENG_EVENT_4		324
+#define CMDQ_EVENT_MDP_STREAM_DONE_ENG_EVENT_5		325
+#define CMDQ_EVENT_MDP_STREAM_DONE_ENG_EVENT_6		326
+#define CMDQ_EVENT_MDP_STREAM_DONE_ENG_EVENT_7		327
+#define CMDQ_EVENT_MDP_STREAM_DONE_ENG_EVENT_8		328
+#define CMDQ_EVENT_MDP_STREAM_DONE_ENG_EVENT_9		329
+#define CMDQ_EVENT_MDP_STREAM_DONE_ENG_EVENT_10		330
+#define CMDQ_EVENT_MDP_STREAM_DONE_ENG_EVENT_11		331
+#define CMDQ_EVENT_MDP_STREAM_DONE_ENG_EVENT_12		332
+#define CMDQ_EVENT_MDP_STREAM_DONE_ENG_EVENT_13		333
+#define CMDQ_EVENT_MDP_STREAM_DONE_ENG_EVENT_14		334
+#define CMDQ_EVENT_MDP_STREAM_DONE_ENG_EVENT_15		335
+#define CMDQ_EVENT_MDP_WROT1_SW_RST_DONE_ENG_EVENT	338
+#define CMDQ_EVENT_MDP_WROT0_SW_RST_DONE_ENG_EVENT	339
+#define CMDQ_EVENT_MDP_RDMA1_SW_RST_DONE_ENG_EVENT	342
+#define CMDQ_EVENT_MDP_RDMA0_SW_RST_DONE_ENG_EVENT	343
+
+#define CMDQ_EVENT_DISP_OVL0_SOF			384
+#define CMDQ_EVENT_DISP_OVL0_2L_SOF			385
+#define CMDQ_EVENT_DISP_RDMA0_SOF			386
+#define CMDQ_EVENT_DISP_RSZ0_SOF			387
+#define CMDQ_EVENT_DISP_COLOR0_SOF			388
+#define CMDQ_EVENT_DISP_CCORR0_SOF			389
+#define CMDQ_EVENT_DISP_AAL0_SOF			390
+#define CMDQ_EVENT_DISP_GAMMA0_SOF			391
+#define CMDQ_EVENT_DISP_POSTMASK0_SOF			392
+#define CMDQ_EVENT_DISP_DITHER0_SOF			393
+#define CMDQ_EVENT_DISP_DSC_WRAP0_CORE0_SOF		394
+#define CMDQ_EVENT_DISP_DSC_WRAP0_CORE1_SOF		395
+#define CMDQ_EVENT_DSI0_SOF				396
+#define CMDQ_EVENT_DISP_WDMA0_SOF			397
+#define CMDQ_EVENT_DISP_UFBC_WDMA0_SOF			398
+#define CMDQ_EVENT_DISP_PWM0_SOF			399
+#define CMDQ_EVENT_DISP_OVL2_2L_SOF			400
+#define CMDQ_EVENT_DISP_RDMA4_SOF			401
+#define CMDQ_EVENT_DISP_DPI0_SOF			402
+#define CMDQ_EVENT_MDP_RDMA4_SOF			403
+#define CMDQ_EVENT_MDP_HDR4_SOF				404
+#define CMDQ_EVENT_MDP_RSZ4_SOF				405
+#define CMDQ_EVENT_MDP_AAL4_SOF				406
+#define CMDQ_EVENT_MDP_TDSHP4_SOF			407
+#define CMDQ_EVENT_MDP_COLOR4_SOF			408
+#define CMDQ_EVENT_DISP_Y2R0_SOF			409
+#define CMDQ_EVENT_MDP_TDSHP4_FRAME_DONE		410
+#define CMDQ_EVENT_MDP_RSZ4_FRAME_DONE			411
+#define CMDQ_EVENT_MDP_RDMA4_FRAME_DONE			412
+#define CMDQ_EVENT_MDP_HDR4_FRAME_DONE			413
+#define CMDQ_EVENT_MDP_COLOR4_FRAME_DONE		414
+#define CMDQ_EVENT_MDP_AAL4_FRAME_DONE			415
+#define CMDQ_EVENT_DSI0_FRAME_DONE			416
+#define CMDQ_EVENT_DISP_WDMA0_FRAME_DONE		417
+#define CMDQ_EVENT_DISP_UFBC_WDMA0_FRAME_DONE		418
+#define CMDQ_EVENT_DISP_RSZ0_FRAME_DONE			419
+#define CMDQ_EVENT_DISP_RDMA4_FRAME_DONE		420
+#define CMDQ_EVENT_DISP_RDMA0_FRAME_DONE		421
+#define CMDQ_EVENT_DISP_POSTMASK0_FRAME_DONE		422
+#define CMDQ_EVENT_DISP_OVL2_2L_FRAME_DONE		423
+#define CMDQ_EVENT_DISP_OVL0_FRAME_DONE			424
+#define CMDQ_EVENT_DISP_OVL0_2L_FRAME_DONE		425
+#define CMDQ_EVENT_DISP_GAMMA0_FRAME_DONE		426
+#define CMDQ_EVENT_DISP_DSC_WRAP0_CORE1_FRAME_DONE	427
+#define CMDQ_EVENT_DISP_DSC_WRAP0_CORE0_FRAME_DONE	428
+#define CMDQ_EVENT_DISP_DPI0_FRAME_DONE			429
+#define CMDQ_EVENT_DISP_DITHER0_FRAME_DONE		430
+#define CMDQ_EVENT_DISP_COLOR0_FRAME_DONE		431
+#define CMDQ_EVENT_DISP_CCORR0_FRAME_DONE		432
+#define CMDQ_EVENT_DISP_AAL0_FRAME_DONE			433
+#define CMDQ_EVENT_DISP_STREAM_DONE_ENG_EVENT_0		434
+#define CMDQ_EVENT_DISP_STREAM_DONE_ENG_EVENT_1		435
+#define CMDQ_EVENT_DISP_STREAM_DONE_ENG_EVENT_2		436
+#define CMDQ_EVENT_DISP_STREAM_DONE_ENG_EVENT_3		437
+#define CMDQ_EVENT_DISP_STREAM_DONE_ENG_EVENT_4		438
+#define CMDQ_EVENT_DISP_STREAM_DONE_ENG_EVENT_5		439
+#define CMDQ_EVENT_DISP_STREAM_DONE_ENG_EVENT_6		440
+#define CMDQ_EVENT_DISP_STREAM_DONE_ENG_EVENT_7		441
+#define CMDQ_EVENT_DISP_STREAM_DONE_ENG_EVENT_8		442
+#define CMDQ_EVENT_DISP_STREAM_DONE_ENG_EVENT_9		443
+#define CMDQ_EVENT_DISP_STREAM_DONE_ENG_EVENT_10	444
+#define CMDQ_EVENT_DISP_STREAM_DONE_ENG_EVENT_11	445
+#define CMDQ_EVENT_DISP_STREAM_DONE_ENG_EVENT_12	446
+#define CMDQ_EVENT_DISP_STREAM_DONE_ENG_EVENT_13	447
+#define CMDQ_EVENT_DISP_STREAM_DONE_ENG_EVENT_14	448
+#define CMDQ_EVENT_DISP_STREAM_DONE_ENG_EVENT_15	449
+#define CMDQ_EVENT_DSI0_TE_ENG_EVENT			450
+#define CMDQ_EVENT_DSI0_IRQ_ENG_EVENT			451
+#define CMDQ_EVENT_DSI0_DONE_ENG_EVENT			452
+#define CMDQ_EVENT_DISP_WDMA0_SW_RST_DONE_ENG_EVENT	453
+#define CMDQ_EVENT_DISP_SMIASSERT_ENG_EVENT		454
+#define CMDQ_EVENT_DISP_POSTMASK0_RST_DONE_ENG_EVENT	455
+#define CMDQ_EVENT_DISP_OVL2_2L_RST_DONE_ENG_EVENT	456
+#define CMDQ_EVENT_DISP_OVL0_RST_DONE_ENG_EVENT		457
+#define CMDQ_EVENT_DISP_OVL0_2L_RST_DONE_ENG_EVENT	458
+#define CMDQ_EVENT_BUF_UNDERRUN_ENG_EVENT_0		459
+#define CMDQ_EVENT_BUF_UNDERRUN_ENG_EVENT_1		460
+#define CMDQ_EVENT_BUF_UNDERRUN_ENG_EVENT_2		461
+#define CMDQ_EVENT_BUF_UNDERRUN_ENG_EVENT_3		462
+#define CMDQ_EVENT_BUF_UNDERRUN_ENG_EVENT_4		463
+#define CMDQ_EVENT_BUF_UNDERRUN_ENG_EVENT_5		464
+#define CMDQ_EVENT_BUF_UNDERRUN_ENG_EVENT_6		465
+#define CMDQ_EVENT_BUF_UNDERRUN_ENG_EVENT_7		466
+#define CMDQ_MAX_HW_EVENT				512
+
+#endif
diff --git a/include/dt-bindings/gce/mt8195-gce.h b/include/dt-bindings/gce/mt8195-gce.h
new file mode 100644
index 000000000000..dcfb302b8a5b
--- /dev/null
+++ b/include/dt-bindings/gce/mt8195-gce.h
@@ -0,0 +1,812 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/*
+ * Copyright (c) 2021 MediaTek Inc.
+ * Author: Jason-JH Lin <jason0jh.lin@mediatek.com>
+ */
+
+#ifndef _DT_BINDINGS_GCE_MT8195_H
+#define _DT_BINDINGS_GCE_MT8195_H
+
+/* assign timeout 0 also means default */
+#define CMDQ_NO_TIMEOUT		0xffffffff
+#define CMDQ_TIMEOUT_DEFAULT	1000
+
+/* GCE thread priority */
+#define CMDQ_THR_PRIO_LOWEST	0
+#define CMDQ_THR_PRIO_1		1
+#define CMDQ_THR_PRIO_2		2
+#define CMDQ_THR_PRIO_3		3
+#define CMDQ_THR_PRIO_4		4
+#define CMDQ_THR_PRIO_5		5
+#define CMDQ_THR_PRIO_6		6
+#define CMDQ_THR_PRIO_HIGHEST	7
+
+/* CPR count in 32bit register */
+#define GCE_CPR_COUNT		1312
+
+/* GCE subsys table */
+#define SUBSYS_1400XXXX		0
+#define SUBSYS_1401XXXX		1
+#define SUBSYS_1402XXXX		2
+#define SUBSYS_1c00XXXX		3
+#define SUBSYS_1c01XXXX		4
+#define SUBSYS_1c02XXXX		5
+#define SUBSYS_1c10XXXX		6
+#define SUBSYS_1c11XXXX		7
+#define SUBSYS_1c12XXXX		8
+#define SUBSYS_14f0XXXX		9
+#define SUBSYS_14f1XXXX		10
+#define SUBSYS_14f2XXXX		11
+#define SUBSYS_1800XXXX		12
+#define SUBSYS_1801XXXX		13
+#define SUBSYS_1802XXXX		14
+#define SUBSYS_1803XXXX		15
+#define SUBSYS_1032XXXX		16
+#define SUBSYS_1033XXXX		17
+#define SUBSYS_1600XXXX		18
+#define SUBSYS_1601XXXX		19
+#define SUBSYS_14e0XXXX		20
+#define SUBSYS_1c20XXXX		21
+#define SUBSYS_1c30XXXX		22
+#define SUBSYS_1c40XXXX		23
+#define SUBSYS_1c50XXXX		24
+#define SUBSYS_1c60XXXX		25
+
+/* GCE General Purpose Register (GPR) support */
+#define GCE_GPR_R00		0x0
+#define GCE_GPR_R01		0x1
+#define GCE_GPR_R02		0x2
+#define GCE_GPR_R03		0x3
+#define GCE_GPR_R04		0x4
+#define GCE_GPR_R05		0x5
+#define GCE_GPR_R06		0x6
+#define GCE_GPR_R07		0x7
+#define GCE_GPR_R08		0x8
+#define GCE_GPR_R09		0x9
+#define GCE_GPR_R10		0xa
+#define GCE_GPR_R11		0xb
+#define GCE_GPR_R12		0xc
+#define GCE_GPR_R13		0xd
+#define GCE_GPR_R14		0xe
+#define GCE_GPR_R15		0xf
+
+/* GCE hw event id */
+#define CMDQ_EVENT_CQ_THR_DONE_TRAW0_0	1
+#define CMDQ_EVENT_CQ_THR_DONE_TRAW0_1	2
+#define CMDQ_EVENT_CQ_THR_DONE_TRAW0_2	3
+#define CMDQ_EVENT_CQ_THR_DONE_TRAW0_3	4
+#define CMDQ_EVENT_CQ_THR_DONE_TRAW0_4	5
+#define CMDQ_EVENT_CQ_THR_DONE_TRAW0_5	6
+#define CMDQ_EVENT_CQ_THR_DONE_TRAW0_6	7
+#define CMDQ_EVENT_CQ_THR_DONE_TRAW0_7	8
+#define CMDQ_EVENT_CQ_THR_DONE_TRAW0_8	9
+#define CMDQ_EVENT_CQ_THR_DONE_TRAW0_9	10
+#define CMDQ_EVENT_CQ_THR_DONE_TRAW0_10	11
+#define CMDQ_EVENT_CQ_THR_DONE_TRAW0_11	12
+#define CMDQ_EVENT_CQ_THR_DONE_TRAW0_12	13
+#define CMDQ_EVENT_CQ_THR_DONE_TRAW0_13	14
+#define CMDQ_EVENT_CQ_THR_DONE_TRAW0_14	15
+#define CMDQ_EVENT_TRAW0_DMA_ERROR_INT	16
+#define CMDQ_EVENT_CQ_THR_DONE_TRAW1_0	17
+#define CMDQ_EVENT_CQ_THR_DONE_TRAW1_1	18
+#define CMDQ_EVENT_CQ_THR_DONE_TRAW1_2	19
+#define CMDQ_EVENT_CQ_THR_DONE_TRAW1_3	20
+#define CMDQ_EVENT_CQ_THR_DONE_TRAW1_4	21
+#define CMDQ_EVENT_CQ_THR_DONE_TRAW1_5	22
+#define CMDQ_EVENT_CQ_THR_DONE_TRAW1_6	23
+#define CMDQ_EVENT_CQ_THR_DONE_TRAW1_7	24
+#define CMDQ_EVENT_CQ_THR_DONE_TRAW1_8	25
+#define CMDQ_EVENT_CQ_THR_DONE_TRAW1_9	26
+#define CMDQ_EVENT_CQ_THR_DONE_TRAW1_10	27
+#define CMDQ_EVENT_CQ_THR_DONE_TRAW1_11	28
+#define CMDQ_EVENT_CQ_THR_DONE_TRAW1_12	29
+#define CMDQ_EVENT_CQ_THR_DONE_TRAW1_13	30
+#define CMDQ_EVENT_CQ_THR_DONE_TRAW1_14	31
+#define CMDQ_EVENT_TRAW1_DMA_ERROR_INT	32
+
+#define CMDQ_EVENT_DIP0_FRAME_DONE_P2_0	65
+#define CMDQ_EVENT_DIP0_FRAME_DONE_P2_1	66
+#define CMDQ_EVENT_DIP0_FRAME_DONE_P2_2	67
+#define CMDQ_EVENT_DIP0_FRAME_DONE_P2_3	68
+#define CMDQ_EVENT_DIP0_FRAME_DONE_P2_4	69
+#define CMDQ_EVENT_DIP0_FRAME_DONE_P2_5	70
+#define CMDQ_EVENT_DIP0_FRAME_DONE_P2_6	71
+#define CMDQ_EVENT_DIP0_FRAME_DONE_P2_7	72
+#define CMDQ_EVENT_DIP0_FRAME_DONE_P2_8	73
+#define CMDQ_EVENT_DIP0_FRAME_DONE_P2_9	74
+#define CMDQ_EVENT_DIP0_FRAME_DONE_P2_10	75
+#define CMDQ_EVENT_DIP0_FRAME_DONE_P2_11	76
+#define CMDQ_EVENT_DIP0_FRAME_DONE_P2_12	77
+#define CMDQ_EVENT_DIP0_FRAME_DONE_P2_13	78
+#define CMDQ_EVENT_DIP0_FRAME_DONE_P2_14	79
+#define CMDQ_EVENT_DIP0_DMA_ERR	80
+#define CMDQ_EVENT_PQA0_FRAME_DONE_P2_0	81
+#define CMDQ_EVENT_PQA0_FRAME_DONE_P2_1	82
+#define CMDQ_EVENT_PQA0_FRAME_DONE_P2_2	83
+#define CMDQ_EVENT_PQA0_FRAME_DONE_P2_3	84
+#define CMDQ_EVENT_PQA0_FRAME_DONE_P2_4	85
+#define CMDQ_EVENT_PQA0_FRAME_DONE_P2_5	86
+#define CMDQ_EVENT_PQA0_FRAME_DONE_P2_6	87
+#define CMDQ_EVENT_PQA0_FRAME_DONE_P2_7	88
+#define CMDQ_EVENT_PQA0_FRAME_DONE_P2_8	89
+#define CMDQ_EVENT_PQA0_FRAME_DONE_P2_9	90
+#define CMDQ_EVENT_PQA0_FRAME_DONE_P2_10	91
+#define CMDQ_EVENT_PQA0_FRAME_DONE_P2_11	92
+#define CMDQ_EVENT_PQA0_FRAME_DONE_P2_12	93
+#define CMDQ_EVENT_PQA0_FRAME_DONE_P2_13	94
+#define CMDQ_EVENT_PQA0_FRAME_DONE_P2_14	95
+#define CMDQ_EVENT_PQA0_DMA_ERR	96
+#define CMDQ_EVENT_PQB0_FRAME_DONE_P2_0	97
+#define CMDQ_EVENT_PQB0_FRAME_DONE_P2_1	98
+#define CMDQ_EVENT_PQB0_FRAME_DONE_P2_2	99
+#define CMDQ_EVENT_PQB0_FRAME_DONE_P2_3	100
+#define CMDQ_EVENT_PQB0_FRAME_DONE_P2_4	101
+#define CMDQ_EVENT_PQB0_FRAME_DONE_P2_5	102
+#define CMDQ_EVENT_PQB0_FRAME_DONE_P2_6	103
+#define CMDQ_EVENT_PQB0_FRAME_DONE_P2_7	104
+#define CMDQ_EVENT_PQB0_FRAME_DONE_P2_8	105
+#define CMDQ_EVENT_PQB0_FRAME_DONE_P2_9	106
+#define CMDQ_EVENT_PQB0_FRAME_DONE_P2_10	107
+#define CMDQ_EVENT_PQB0_FRAME_DONE_P2_11	108
+#define CMDQ_EVENT_PQB0_FRAME_DONE_P2_12	109
+#define CMDQ_EVENT_PQB0_FRAME_DONE_P2_13	110
+#define CMDQ_EVENT_PQB0_FRAME_DONE_P2_14	111
+#define CMDQ_EVENT_PQB0_DMA_ERR	112
+#define CMDQ_EVENT_DIP0_DUMMY_0	113
+#define CMDQ_EVENT_DIP0_DUMMY_1	114
+#define CMDQ_EVENT_DIP0_DUMMY_2	115
+#define CMDQ_EVENT_DIP0_DUMMY_3	116
+#define CMDQ_EVENT_WPE0_EIS_GCE_FRAME_DONE	117
+#define CMDQ_EVENT_WPE0_EIS_DONE_SYNC_OUT	118
+#define CMDQ_EVENT_WPE0_TNR_GCE_FRAME_DONE	119
+#define CMDQ_EVENT_WPE0_TNR_DONE_SYNC_OUT	120
+#define CMDQ_EVENT_WPE0_EIS_FRAME_DONE_P2_0	121
+#define CMDQ_EVENT_WPE0_EIS_FRAME_DONE_P2_1	122
+#define CMDQ_EVENT_WPE0_EIS_FRAME_DONE_P2_2	123
+#define CMDQ_EVENT_WPE0_EIS_FRAME_DONE_P2_3	124
+#define CMDQ_EVENT_WPE0_EIS_FRAME_DONE_P2_4	125
+#define CMDQ_EVENT_WPE0_EIS_FRAME_DONE_P2_5	126
+#define CMDQ_EVENT_WPE0_EIS_FRAME_DONE_P2_6	127
+#define CMDQ_EVENT_WPE0_EIS_FRAME_DONE_P2_7	128
+#define CMDQ_EVENT_WPE0_EIS_FRAME_DONE_P2_8	129
+#define CMDQ_EVENT_WPE0_EIS_FRAME_DONE_P2_9	130
+#define CMDQ_EVENT_WPE0_EIS_FRAME_DONE_P2_10	131
+#define CMDQ_EVENT_WPE0_EIS_FRAME_DONE_P2_11	132
+#define CMDQ_EVENT_WPE0_EIS_FRAME_DONE_P2_12	133
+#define CMDQ_EVENT_WPE0_EIS_FRAME_DONE_P2_13	134
+#define CMDQ_EVENT_WPE0_EIS_FRAME_DONE_P2_14	135
+#define CMDQ_EVENT_WPE0_TNR_FRAME_DONE_P2_0	136
+#define CMDQ_EVENT_WPE0_TNR_FRAME_DONE_P2_1	137
+#define CMDQ_EVENT_WPE0_TNR_FRAME_DONE_P2_2	138
+#define CMDQ_EVENT_WPE0_TNR_FRAME_DONE_P2_3	139
+#define CMDQ_EVENT_WPE0_TNR_FRAME_DONE_P2_4	140
+#define CMDQ_EVENT_WPE0_TNR_FRAME_DONE_P2_5	141
+#define CMDQ_EVENT_WPE0_TNR_FRAME_DONE_P2_6	142
+#define CMDQ_EVENT_WPE0_TNR_FRAME_DONE_P2_7	143
+#define CMDQ_EVENT_WPE0_TNR_FRAME_DONE_P2_8	144
+#define CMDQ_EVENT_WPE0_TNR_FRAME_DONE_P2_9	145
+#define CMDQ_EVENT_WPE0_TNR_FRAME_DONE_P2_10	146
+#define CMDQ_EVENT_WPE0_TNR_FRAME_DONE_P2_11	147
+#define CMDQ_EVENT_WPE0_TNR_FRAME_DONE_P2_12	148
+#define CMDQ_EVENT_WPE0_TNR_FRAME_DONE_P2_13	149
+#define CMDQ_EVENT_WPE0_TNR_FRAME_DONE_P2_14	150
+#define CMDQ_EVENT_WPE0_DUMMY_0	151
+#define CMDQ_EVENT_IMGSYS_IPE_DUMMY	152
+#define CMDQ_EVENT_IMGSYS_IPE_FDVT_DONE	153
+#define CMDQ_EVENT_IMGSYS_IPE_ME_DONE	154
+#define CMDQ_EVENT_IMGSYS_IPE_DVS_DONE	155
+#define CMDQ_EVENT_IMGSYS_IPE_DVP_DONE	156
+
+#define CMDQ_EVENT_TPR_0	194
+#define CMDQ_EVENT_TPR_1	195
+#define CMDQ_EVENT_TPR_2	196
+#define CMDQ_EVENT_TPR_3	197
+#define CMDQ_EVENT_TPR_4	198
+#define CMDQ_EVENT_TPR_5	199
+#define CMDQ_EVENT_TPR_6	200
+#define CMDQ_EVENT_TPR_7	201
+#define CMDQ_EVENT_TPR_8	202
+#define CMDQ_EVENT_TPR_9	203
+#define CMDQ_EVENT_TPR_10	204
+#define CMDQ_EVENT_TPR_11	205
+#define CMDQ_EVENT_TPR_12	206
+#define CMDQ_EVENT_TPR_13	207
+#define CMDQ_EVENT_TPR_14	208
+#define CMDQ_EVENT_TPR_15	209
+#define CMDQ_EVENT_TPR_16	210
+#define CMDQ_EVENT_TPR_17	211
+#define CMDQ_EVENT_TPR_18	212
+#define CMDQ_EVENT_TPR_19	213
+#define CMDQ_EVENT_TPR_20	214
+#define CMDQ_EVENT_TPR_21	215
+#define CMDQ_EVENT_TPR_22	216
+#define CMDQ_EVENT_TPR_23	217
+#define CMDQ_EVENT_TPR_24	218
+#define CMDQ_EVENT_TPR_25	219
+#define CMDQ_EVENT_TPR_26	220
+#define CMDQ_EVENT_TPR_27	221
+#define CMDQ_EVENT_TPR_28	222
+#define CMDQ_EVENT_TPR_29	223
+#define CMDQ_EVENT_TPR_30	224
+#define CMDQ_EVENT_TPR_31	225
+#define CMDQ_EVENT_TPR_TIMEOUT_0	226
+#define CMDQ_EVENT_TPR_TIMEOUT_1	227
+#define CMDQ_EVENT_TPR_TIMEOUT_2	228
+#define CMDQ_EVENT_TPR_TIMEOUT_3	229
+#define CMDQ_EVENT_TPR_TIMEOUT_4	230
+#define CMDQ_EVENT_TPR_TIMEOUT_5	231
+#define CMDQ_EVENT_TPR_TIMEOUT_6	232
+#define CMDQ_EVENT_TPR_TIMEOUT_7	233
+#define CMDQ_EVENT_TPR_TIMEOUT_8	234
+#define CMDQ_EVENT_TPR_TIMEOUT_9	235
+#define CMDQ_EVENT_TPR_TIMEOUT_10	236
+#define CMDQ_EVENT_TPR_TIMEOUT_11	237
+#define CMDQ_EVENT_TPR_TIMEOUT_12	238
+#define CMDQ_EVENT_TPR_TIMEOUT_13	239
+#define CMDQ_EVENT_TPR_TIMEOUT_14	240
+#define CMDQ_EVENT_TPR_TIMEOUT_15	241
+
+#define CMDQ_EVENT_VPP0_MDP_RDMA_SOF	256
+#define CMDQ_EVENT_VPP0_MDP_FG_SOF	257
+#define CMDQ_EVENT_VPP0_STITCH_SOF	258
+#define CMDQ_EVENT_VPP0_MDP_HDR_SOF	259
+#define CMDQ_EVENT_VPP0_MDP_AAL_SOF	260
+#define CMDQ_EVENT_VPP0_MDP_RSZ_IN_RSZ_SOF	261
+#define CMDQ_EVENT_VPP0_MDP_TDSHP_SOF	262
+#define CMDQ_EVENT_VPP0_DISP_COLOR_SOF	263
+#define CMDQ_EVENT_VPP0_DISP_OVL_NOAFBC_SOF	264
+#define CMDQ_EVENT_VPP0_VPP_PADDING_IN_PADDING_SOF	265
+#define CMDQ_EVENT_VPP0_MDP_TCC_IN_SOF	266
+#define CMDQ_EVENT_VPP0_MDP_WROT_SOF	267
+
+#define CMDQ_EVENT_VPP0_WARP0_MMSYS_TOP_RELAY_SOF_PRE	269
+#define CMDQ_EVENT_VPP0_WARP1_MMSYS_TOP_RELAY_SOF_PRE	270
+#define CMDQ_EVENT_VPP0_VPP1_MMSYS_TOP_RELAY_SOF	271
+#define CMDQ_EVENT_VPP0_VPP1_IN_MMSYS_TOP_RELAY_SOF_PRE	272
+
+#define CMDQ_EVENT_VPP0_MDP_RDMA_FRAME_DONE	288
+#define CMDQ_EVENT_VPP0_MDP_FG_TILE_DONE	289
+#define CMDQ_EVENT_VPP0_STITCH_FRAME_DONE	290
+#define CMDQ_EVENT_VPP0_MDP_HDR_FRAME_DONE	291
+#define CMDQ_EVENT_VPP0_MDP_AAL_FRAME_DONE	292
+#define CMDQ_EVENT_VPP0_MDP_RSZ_FRAME_DONE	293
+#define CMDQ_EVENT_VPP0_MDP_TDSHP_FRAME_DONE	294
+#define CMDQ_EVENT_VPP0_DISP_COLOR_FRAME_DONE	295
+#define CMDQ_EVENT_VPP0_DISP_OVL_NOAFBC_FRAME_DONE	296
+#define CMDQ_EVENT_VPP0_VPP_PADDING_IN_PADDING_FRAME_DONE	297
+#define CMDQ_EVENT_VPP0_MDP_TCC_TCC_FRAME_DONE	298
+#define CMDQ_EVENT_VPP0_MDP_WROT_VIDO_WDONE	299
+
+#define CMDQ_EVENT_VPP0_STREAM_DONE_0	320
+#define CMDQ_EVENT_VPP0_STREAM_DONE_1	321
+#define CMDQ_EVENT_VPP0_STREAM_DONE_2	322
+#define CMDQ_EVENT_VPP0_STREAM_DONE_3	323
+#define CMDQ_EVENT_VPP0_STREAM_DONE_4	324
+#define CMDQ_EVENT_VPP0_STREAM_DONE_5	325
+#define CMDQ_EVENT_VPP0_STREAM_DONE_6	326
+#define CMDQ_EVENT_VPP0_STREAM_DONE_7	327
+#define CMDQ_EVENT_VPP0_STREAM_DONE_8	328
+#define CMDQ_EVENT_VPP0_STREAM_DONE_9	329
+#define CMDQ_EVENT_VPP0_STREAM_DONE_10	330
+#define CMDQ_EVENT_VPP0_STREAM_DONE_11	331
+#define CMDQ_EVENT_VPP0_STREAM_DONE_12	332
+#define CMDQ_EVENT_VPP0_STREAM_DONE_13	333
+#define CMDQ_EVENT_VPP0_STREAM_DONE_14	334
+#define CMDQ_EVENT_VPP0_STREAM_DONE_15	335
+#define CMDQ_EVENT_VPP0_BUF_UNDERRUN_0	336
+#define CMDQ_EVENT_VPP0_BUF_UNDERRUN_1	337
+#define CMDQ_EVENT_VPP0_BUF_UNDERRUN_2	338
+#define CMDQ_EVENT_VPP0_BUF_UNDERRUN_3	339
+#define CMDQ_EVENT_VPP0_BUF_UNDERRUN_4	340
+#define CMDQ_EVENT_VPP0_BUF_UNDERRUN_5	341
+#define CMDQ_EVENT_VPP0_BUF_UNDERRUN_6	342
+#define CMDQ_EVENT_VPP0_BUF_UNDERRUN_7	343
+#define CMDQ_EVENT_VPP0_BUF_UNDERRUN_8	344
+#define CMDQ_EVENT_VPP0_BUF_UNDERRUN_9	345
+#define CMDQ_EVENT_VPP0_BUF_UNDERRUN_10	346
+#define CMDQ_EVENT_VPP0_BUF_UNDERRUN_11	347
+#define CMDQ_EVENT_VPP0_BUF_UNDERRUN_12	348
+#define CMDQ_EVENT_VPP0_BUF_UNDERRUN_13	349
+#define CMDQ_EVENT_VPP0_BUF_UNDERRUN_14	350
+#define CMDQ_EVENT_VPP0_BUF_UNDERRUN_15	351
+#define CMDQ_EVENT_VPP0_MDP_RDMA_SW_RST_DONE	352
+#define CMDQ_EVENT_VPP0_MDP_RDMA_PM_VALID	353
+#define CMDQ_EVENT_VPP0_DISP_OVL_NOAFBC_FRAME_RESET_DONE_PULSE	354
+#define CMDQ_EVENT_VPP0_MDP_WROT_SW_RST_DONE	355
+
+#define CMDQ_EVENT_VPP1_HDMI_META_SOF		384
+#define CMDQ_EVENT_VPP1_DGI_SOF			385
+#define CMDQ_EVENT_VPP1_VPP_SPLIT_SOF		386
+#define CMDQ_EVENT_VPP1_SVPP1_MDP_TCC_SOF	387
+#define CMDQ_EVENT_VPP1_SVPP1_MDP_RDMA_SOF	388
+#define CMDQ_EVENT_VPP1_SVPP2_MDP_RDMA_SOF	389
+#define CMDQ_EVENT_VPP1_SVPP3_MDP_RDMA_SOF	390
+#define CMDQ_EVENT_VPP1_SVPP1_MDP_FG_SOF	391
+#define CMDQ_EVENT_VPP1_SVPP2_MDP_FG_SOF	392
+#define CMDQ_EVENT_VPP1_SVPP3_MDP_FG_SOF	393
+#define CMDQ_EVENT_VPP1_SVPP1_MDP_HDR_SOF	394
+#define CMDQ_EVENT_VPP1_SVPP2_MDP_HDR_SOF	395
+#define CMDQ_EVENT_VPP1_SVPP3_MDP_HDR_SOF	396
+#define CMDQ_EVENT_VPP1_SVPP1_MDP_AAL_SOF	397
+#define CMDQ_EVENT_VPP1_SVPP2_MDP_AAL_SOF	398
+#define CMDQ_EVENT_VPP1_SVPP3_MDP_AAL_SOF	399
+#define CMDQ_EVENT_VPP1_SVPP1_MDP_RSZ_SOF	400
+#define CMDQ_EVENT_VPP1_SVPP2_MDP_RSZ_SOF	401
+#define CMDQ_EVENT_VPP1_SVPP3_MDP_RSZ_SOF	402
+#define CMDQ_EVENT_VPP1_SVPP1_TDSHP_SOF		403
+#define CMDQ_EVENT_VPP1_SVPP2_TDSHP_SOF		404
+#define CMDQ_EVENT_VPP1_SVPP3_TDSHP_SOF		405
+#define CMDQ_EVENT_VPP1_SVPP2_VPP_MERGE_SOF	406
+#define CMDQ_EVENT_VPP1_SVPP3_VPP_MERGE_SOF	407
+#define CMDQ_EVENT_VPP1_SVPP1_MDP_COLOR_SOF	408
+#define CMDQ_EVENT_VPP1_SVPP2_MDP_COLOR_SOF	409
+#define CMDQ_EVENT_VPP1_SVPP3_MDP_COLOR_SOF	410
+#define CMDQ_EVENT_VPP1_SVPP1_MDP_OVL_SOF	411
+#define CMDQ_EVENT_VPP1_SVPP1_VPP_PAD_SOF	412
+#define CMDQ_EVENT_VPP1_SVPP2_VPP_PAD_SOF	413
+#define CMDQ_EVENT_VPP1_SVPP3_VPP_PAD_SOF	414
+#define CMDQ_EVENT_VPP1_SVPP1_MDP_WROT_SOF	415
+#define CMDQ_EVENT_VPP1_SVPP2_MDP_WROT_SOF	416
+#define CMDQ_EVENT_VPP1_SVPP3_MDP_WROT_SOF	417
+#define CMDQ_EVENT_VPP1_VPP0_DL_IRLY_SOF	418
+#define CMDQ_EVENT_VPP1_VPP0_DL_ORLY_SOF	419
+#define CMDQ_EVENT_VPP1_VDO0_DL_ORLY_0_SOF	420
+#define CMDQ_EVENT_VPP1_VDO0_DL_ORLY_1_SOF	421
+#define CMDQ_EVENT_VPP1_VDO1_DL_ORLY_0_SOF	422
+#define CMDQ_EVENT_VPP1_VDO1_DL_ORLY_1_SOF	423
+#define CMDQ_EVENT_VPP1_SVPP1_MDP_RDMA_FRAME_DONE	424
+#define CMDQ_EVENT_VPP1_SVPP2_MDP_RDMA_FRAME_DONE	425
+#define CMDQ_EVENT_VPP1_SVPP3_MDP_RDMA_FRAME_DONE	426
+#define CMDQ_EVENT_VPP1_SVPP1_MDP_WROT_FRAME_DONE	427
+#define CMDQ_EVENT_VPP1_SVPP2_MDP_WROT_FRAME_DONE	428
+#define CMDQ_EVENT_VPP1_SVPP3_MDP_WROT_FRAME_DONE	429
+#define CMDQ_EVENT_VPP1_SVPP1_MDP_OVL_FRAME_DONE	430
+#define CMDQ_EVENT_VPP1_SVPP1_MDP_RSZ_FRAME_DONE	431
+#define CMDQ_EVENT_VPP1_SVPP2_MDP_RSZ_FRAME_DONE	432
+#define CMDQ_EVENT_VPP1_SVPP3_MDP_RSZ_FRAME_DONE	433
+#define CMDQ_EVENT_VPP1_FRAME_DONE_10	434
+#define CMDQ_EVENT_VPP1_FRAME_DONE_11	435
+#define CMDQ_EVENT_VPP1_FRAME_DONE_12	436
+#define CMDQ_EVENT_VPP1_FRAME_DONE_13	437
+#define CMDQ_EVENT_VPP1_FRAME_DONE_14	438
+#define CMDQ_EVENT_VPP1_STREAM_DONE_0	439
+#define CMDQ_EVENT_VPP1_STREAM_DONE_1	440
+#define CMDQ_EVENT_VPP1_STREAM_DONE_2	441
+#define CMDQ_EVENT_VPP1_STREAM_DONE_3	442
+#define CMDQ_EVENT_VPP1_STREAM_DONE_4	443
+#define CMDQ_EVENT_VPP1_STREAM_DONE_5	444
+#define CMDQ_EVENT_VPP1_STREAM_DONE_6	445
+#define CMDQ_EVENT_VPP1_STREAM_DONE_7	446
+#define CMDQ_EVENT_VPP1_STREAM_DONE_8	447
+#define CMDQ_EVENT_VPP1_STREAM_DONE_9	448
+#define CMDQ_EVENT_VPP1_STREAM_DONE_10	449
+#define CMDQ_EVENT_VPP1_STREAM_DONE_11	450
+#define CMDQ_EVENT_VPP1_STREAM_DONE_12	451
+#define CMDQ_EVENT_VPP1_STREAM_DONE_13	452
+#define CMDQ_EVENT_VPP1_STREAM_DONE_14	453
+#define CMDQ_EVENT_VPP1_STREAM_DONE_15	454
+#define CMDQ_EVENT_VPP1_MDP_BUF_UNDERRUN_0	455
+#define CMDQ_EVENT_VPP1_MDP_BUF_UNDERRUN_1	456
+#define CMDQ_EVENT_VPP1_MDP_BUF_UNDERRUN_2	457
+#define CMDQ_EVENT_VPP1_MDP_BUF_UNDERRUN_3	458
+#define CMDQ_EVENT_VPP1_MDP_BUF_UNDERRUN_4	459
+#define CMDQ_EVENT_VPP1_MDP_BUF_UNDERRUN_5	460
+#define CMDQ_EVENT_VPP1_MDP_BUF_UNDERRUN_6	461
+#define CMDQ_EVENT_VPP1_MDP_BUF_UNDERRUN_7	462
+#define CMDQ_EVENT_VPP1_MDP_BUF_UNDERRUN_8	463
+#define CMDQ_EVENT_VPP1_MDP_BUF_UNDERRUN_9	464
+#define CMDQ_EVENT_VPP1_MDP_BUF_UNDERRUN_10	465
+#define CMDQ_EVENT_VPP1_MDP_BUF_UNDERRUN_11	466
+#define CMDQ_EVENT_VPP1_MDP_BUF_UNDERRUN_12	467
+#define CMDQ_EVENT_VPP1_MDP_BUF_UNDERRUN_13	468
+#define CMDQ_EVENT_VPP1_MDP_BUF_UNDERRUN_14	469
+#define CMDQ_EVENT_VPP1_MDP_BUF_UNDERRUN_15	470
+#define CMDQ_EVENT_VPP1_DGI_0	471
+#define CMDQ_EVENT_VPP1_DGI_1	472
+#define CMDQ_EVENT_VPP1_DGI_2	473
+#define CMDQ_EVENT_VPP1_DGI_3	474
+#define CMDQ_EVENT_VPP1_DGI_4	475
+#define CMDQ_EVENT_VPP1_DGI_5	476
+#define CMDQ_EVENT_VPP1_DGI_6	477
+#define CMDQ_EVENT_VPP1_DGI_7	478
+#define CMDQ_EVENT_VPP1_DGI_8	479
+#define CMDQ_EVENT_VPP1_DGI_9	480
+#define CMDQ_EVENT_VPP1_DGI_10	481
+#define CMDQ_EVENT_VPP1_DGI_11	482
+#define CMDQ_EVENT_VPP1_DGI_12	483
+#define CMDQ_EVENT_VPP1_DGI_13	484
+#define CMDQ_EVENT_VPP1_SVPP3_VPP_MERGE	485
+#define CMDQ_EVENT_VPP1_SVPP2_VPP_MERGE	486
+#define CMDQ_EVENT_VPP1_MDP_OVL_FRAME_RESET_DONE_PULSE	487
+#define CMDQ_EVENT_VPP1_VPP_SPLIT_DGI	488
+#define CMDQ_EVENT_VPP1_VPP_SPLIT_HDMI	489
+#define CMDQ_EVENT_VPP1_SVPP3_MDP_WROT_SW_RST_DONE	490
+#define CMDQ_EVENT_VPP1_SVPP2_MDP_WROT_SW_RST_DONE	491
+#define CMDQ_EVENT_VPP1_SVPP1_MDP_WROT_SW_RST_DONE	492
+#define CMDQ_EVENT_VPP1_SVPP3_MDP_FG_TILE_DONE	493
+#define CMDQ_EVENT_VPP1_SVPP2_MDP_FG_TILE_DONE	494
+#define CMDQ_EVENT_VPP1_SVPP1_MDP_FG_TILE_DONE	495
+
+#define CMDQ_EVENT_VDO0_DISP_OVL0_SOF	512
+#define CMDQ_EVENT_VDO0_DISP_WDMA0_SOF	513
+#define CMDQ_EVENT_VDO0_DISP_RDMA0_SOF	514
+#define CMDQ_EVENT_VDO0_DISP_COLOR0_SOF	515
+#define CMDQ_EVENT_VDO0_DISP_CCORR0_SOF	516
+#define CMDQ_EVENT_VDO0_DISP_AAL0_SOF	517
+#define CMDQ_EVENT_VDO0_DISP_GAMMA0_SOF	518
+#define CMDQ_EVENT_VDO0_DISP_DITHER0_SOF	519
+#define CMDQ_EVENT_VDO0_DSI0_SOF	520
+#define CMDQ_EVENT_VDO0_DSC_WRAP0C0_SOF	521
+#define CMDQ_EVENT_VDO0_DISP_OVL1_SOF	522
+#define CMDQ_EVENT_VDO0_DISP_WDMA1_SOF	523
+#define CMDQ_EVENT_VDO0_DISP_RDMA1_SOF	524
+#define CMDQ_EVENT_VDO0_DISP_COLOR1_SOF	525
+#define CMDQ_EVENT_VDO0_DISP_CCORR1_SOF	526
+#define CMDQ_EVENT_VDO0_DISP_AAL1_SOF	527
+#define CMDQ_EVENT_VDO0_DISP_GAMMA1_SOF	528
+#define CMDQ_EVENT_VDO0_DISP_DITHER1_SOF	529
+#define CMDQ_EVENT_VDO0_DSI1_SOF	530
+#define CMDQ_EVENT_VDO0_DSC_WRAP0C1_SOF	531
+#define CMDQ_EVENT_VDO0_VPP_MERGE0_SOF	532
+#define CMDQ_EVENT_VDO0_DP_INTF0_SOF	533
+#define CMDQ_EVENT_VDO0_VPP1_DL_RELAY0_SOF	534
+#define CMDQ_EVENT_VDO0_VPP1_DL_RELAY1_SOF	535
+#define CMDQ_EVENT_VDO0_VDO1_DL_RELAY2_SOF	536
+#define CMDQ_EVENT_VDO0_VDO0_DL_RELAY3_SOF	537
+#define CMDQ_EVENT_VDO0_VDO0_DL_RELAY4_SOF	538
+#define CMDQ_EVENT_VDO0_DISP_PWM0_SOF	539
+#define CMDQ_EVENT_VDO0_DISP_PWM1_SOF	540
+
+#define CMDQ_EVENT_VDO0_DISP_OVL0_FRAME_DONE	544
+#define CMDQ_EVENT_VDO0_DISP_WDMA0_FRAME_DONE	545
+#define CMDQ_EVENT_VDO0_DISP_RDMA0_FRAME_DONE	546
+#define CMDQ_EVENT_VDO0_DISP_COLOR0_FRAME_DONE	547
+#define CMDQ_EVENT_VDO0_DISP_CCORR0_FRAME_DONE	548
+#define CMDQ_EVENT_VDO0_DISP_AAL0_FRAME_DONE	549
+#define CMDQ_EVENT_VDO0_DISP_GAMMA0_FRAME_DONE	550
+#define CMDQ_EVENT_VDO0_DISP_DITHER0_FRAME_DONE	551
+#define CMDQ_EVENT_VDO0_DSI0_FRAME_DONE	552
+#define CMDQ_EVENT_VDO0_DSC_WRAP0C0_FRAME_DONE	553
+#define CMDQ_EVENT_VDO0_DISP_OVL1_FRAME_DONE	554
+#define CMDQ_EVENT_VDO0_DISP_WDMA1_FRAME_DONE	555
+#define CMDQ_EVENT_VDO0_DISP_RDMA1_FRAME_DONE	556
+#define CMDQ_EVENT_VDO0_DISP_COLOR1_FRAME_DONE	557
+#define CMDQ_EVENT_VDO0_DISP_CCORR1_FRAME_DONE	558
+#define CMDQ_EVENT_VDO0_DISP_AAL1_FRAME_DONE	559
+#define CMDQ_EVENT_VDO0_DISP_GAMMA1_FRAME_DONE	560
+#define CMDQ_EVENT_VDO0_DISP_DITHER1_FRAME_DONE	561
+#define CMDQ_EVENT_VDO0_DSI1_FRAME_DONE	562
+#define CMDQ_EVENT_VDO0_DSC_WRAP0C1_FRAME_DONE	563
+
+#define CMDQ_EVENT_VDO0_DP_INTF0_FRAME_DONE	565
+
+#define CMDQ_EVENT_VDO0_DISP_SMIASSERT_ENG	576
+#define CMDQ_EVENT_VDO0_DSI0_IRQ_ENG_EVENT_MM	577
+#define CMDQ_EVENT_VDO0_DSI0_TE_ENG_EVENT_MM	578
+#define CMDQ_EVENT_VDO0_DSI0_DONE_ENG_EVENT_MM	579
+#define CMDQ_EVENT_VDO0_DSI0_SOF_ENG_EVENT_MM	580
+#define CMDQ_EVENT_VDO0_DSI0_VACTL_ENG_EVENT_MM	581
+#define CMDQ_EVENT_VDO0_DSI1_IRQ_ENG_EVENT_MM	582
+#define CMDQ_EVENT_VDO0_DSI1_TE_ENG_EVENT_MM	583
+#define CMDQ_EVENT_VDO0_DSI1_DONE_ENG_EVENT_MM	584
+#define CMDQ_EVENT_VDO0_DSI1_SOF_ENG_EVENT_MM	585
+#define CMDQ_EVENT_VDO0_DSI1_VACTL_ENG_EVENT_MM	586
+#define CMDQ_EVENT_VDO0_DISP_WDMA0_SW_RST_DONE_ENG	587
+#define CMDQ_EVENT_VDO0_DISP_WDMA1_SW_RST_DONE_ENG	588
+#define CMDQ_EVENT_VDO0_DISP_OVL0_RST_DONE_ENG	589
+#define CMDQ_EVENT_VDO0_DISP_OVL1_RST_DONE_ENG	590
+#define CMDQ_EVENT_VDO0_DP_INTF0_VSYNC_START_ENG_EVENT_MM	591
+#define CMDQ_EVENT_VDO0_DP_INTF0_VSYNC_END_ENG_EVENT_MM	592
+#define CMDQ_EVENT_VDO0_DP_INTF0_VDE_START_ENG_EVENT_MM	593
+#define CMDQ_EVENT_VDO0_DP_INTF0_VDE_END_ENG_EVENT_MM	594
+#define CMDQ_EVENT_VDO0_DP_INTF0_TARGET_LINE_ENG_EVENT_MM	595
+#define CMDQ_EVENT_VDO0_VPP_MERGE0_ENG	596
+#define CMDQ_EVENT_VDO0_DISP_STREAM_DONE_0	597
+#define CMDQ_EVENT_VDO0_DISP_STREAM_DONE_1	598
+#define CMDQ_EVENT_VDO0_DISP_STREAM_DONE_2	599
+#define CMDQ_EVENT_VDO0_DISP_STREAM_DONE_3	600
+#define CMDQ_EVENT_VDO0_DISP_STREAM_DONE_4	601
+#define CMDQ_EVENT_VDO0_DISP_STREAM_DONE_5	602
+#define CMDQ_EVENT_VDO0_DISP_STREAM_DONE_6	603
+#define CMDQ_EVENT_VDO0_DISP_STREAM_DONE_7	604
+#define CMDQ_EVENT_VDO0_DISP_STREAM_DONE_8	605
+#define CMDQ_EVENT_VDO0_DISP_STREAM_DONE_9	606
+#define CMDQ_EVENT_VDO0_DISP_STREAM_DONE_10	607
+#define CMDQ_EVENT_VDO0_DISP_STREAM_DONE_11	608
+#define CMDQ_EVENT_VDO0_DISP_STREAM_DONE_12	609
+#define CMDQ_EVENT_VDO0_DISP_STREAM_DONE_13	610
+#define CMDQ_EVENT_VDO0_DISP_STREAM_DONE_14	611
+#define CMDQ_EVENT_VDO0_DISP_STREAM_DONE_15	612
+#define CMDQ_EVENT_VDO0_DISP_BUF_UNDERRUN_0	613
+#define CMDQ_EVENT_VDO0_DISP_BUF_UNDERRUN_1	614
+#define CMDQ_EVENT_VDO0_DISP_BUF_UNDERRUN_2	615
+#define CMDQ_EVENT_VDO0_DISP_BUF_UNDERRUN_3	616
+#define CMDQ_EVENT_VDO0_DISP_BUF_UNDERRUN_4	617
+#define CMDQ_EVENT_VDO0_DISP_BUF_UNDERRUN_5	618
+#define CMDQ_EVENT_VDO0_DISP_BUF_UNDERRUN_6	619
+#define CMDQ_EVENT_VDO0_DISP_BUF_UNDERRUN_7	620
+#define CMDQ_EVENT_VDO0_DISP_BUF_UNDERRUN_8	621
+#define CMDQ_EVENT_VDO0_DISP_BUF_UNDERRUN_9	622
+#define CMDQ_EVENT_VDO0_DISP_BUF_UNDERRUN_10	623
+#define CMDQ_EVENT_VDO0_DISP_BUF_UNDERRUN_11	624
+#define CMDQ_EVENT_VDO0_DISP_BUF_UNDERRUN_12	625
+#define CMDQ_EVENT_VDO0_DISP_BUF_UNDERRUN_13	626
+#define CMDQ_EVENT_VDO0_DISP_BUF_UNDERRUN_14	627
+#define CMDQ_EVENT_VDO0_DISP_BUF_UNDERRUN_15	628
+
+#define CMDQ_EVENT_VDO1_MDP_RDMA0_SOF	640
+#define CMDQ_EVENT_VDO1_MDP_RDMA1_SOF	641
+#define CMDQ_EVENT_VDO1_MDP_RDMA2_SOF	642
+#define CMDQ_EVENT_VDO1_MDP_RDMA3_SOF	643
+#define CMDQ_EVENT_VDO1_MDP_RDMA4_SOF	644
+#define CMDQ_EVENT_VDO1_MDP_RDMA5_SOF	645
+#define CMDQ_EVENT_VDO1_MDP_RDMA6_SOF	646
+#define CMDQ_EVENT_VDO1_MDP_RDMA7_SOF	647
+#define CMDQ_EVENT_VDO1_VPP_MERGE0_SOF	648
+#define CMDQ_EVENT_VDO1_VPP_MERGE1_SOF	649
+#define CMDQ_EVENT_VDO1_VPP_MERGE2_SOF	650
+#define CMDQ_EVENT_VDO1_VPP_MERGE3_SOF	651
+#define CMDQ_EVENT_VDO1_VPP_MERGE4_SOF	652
+#define CMDQ_EVENT_VDO1_VPP2_DL_RELAY_SOF	653
+#define CMDQ_EVENT_VDO1_VPP3_DL_RELAY_SOF	654
+#define CMDQ_EVENT_VDO1_VDO0_DSC_DL_ASYNC_SOF	655
+#define CMDQ_EVENT_VDO1_VDO0_MERGE_DL_ASYNC_SOF	656
+#define CMDQ_EVENT_VDO1_OUT_DL_RELAY_SOF	657
+#define CMDQ_EVENT_VDO1_DISP_MIXER_SOF	658
+#define CMDQ_EVENT_VDO1_HDR_VDO_FE0_SOF	659
+#define CMDQ_EVENT_VDO1_HDR_VDO_FE1_SOF	660
+#define CMDQ_EVENT_VDO1_HDR_GFX_FE0_SOF	661
+#define CMDQ_EVENT_VDO1_HDR_GFX_FE1_SOF	662
+#define CMDQ_EVENT_VDO1_HDR_VDO_BE0_SOF	663
+#define CMDQ_EVENT_VDO1_HDR_MLOAD_SOF	664
+
+#define CMDQ_EVENT_VDO1_MDP_RDMA0_FRAME_DONE	672
+#define CMDQ_EVENT_VDO1_MDP_RDMA1_FRAME_DONE	673
+#define CMDQ_EVENT_VDO1_MDP_RDMA2_FRAME_DONE	674
+#define CMDQ_EVENT_VDO1_MDP_RDMA3_FRAME_DONE	675
+#define CMDQ_EVENT_VDO1_MDP_RDMA4_FRAME_DONE	676
+#define CMDQ_EVENT_VDO1_MDP_RDMA5_FRAME_DONE	677
+#define CMDQ_EVENT_VDO1_MDP_RDMA6_FRAME_DONE	678
+#define CMDQ_EVENT_VDO1_MDP_RDMA7_FRAME_DONE	679
+#define CMDQ_EVENT_VDO1_VPP_MERGE0_FRAME_DONE	680
+#define CMDQ_EVENT_VDO1_VPP_MERGE1_FRAME_DONE	681
+#define CMDQ_EVENT_VDO1_VPP_MERGE2_FRAME_DONE	682
+#define CMDQ_EVENT_VDO1_VPP_MERGE3_FRAME_DONE	683
+#define CMDQ_EVENT_VDO1_VPP_MERGE4_FRAME_DONE	684
+#define CMDQ_EVENT_VDO1_DPI0_FRAME_DONE	685
+#define CMDQ_EVENT_VDO1_DPI1_FRAME_DONE	686
+#define CMDQ_EVENT_VDO1_DP_INTF0_FRAME_DONE	687
+#define CMDQ_EVENT_VDO1_DISP_MIXER_FRAME_DONE_MM	688
+
+#define CMDQ_EVENT_VDO1_STREAM_DONE_ENG_0	704
+#define CMDQ_EVENT_VDO1_STREAM_DONE_ENG_1	705
+#define CMDQ_EVENT_VDO1_STREAM_DONE_ENG_2	706
+#define CMDQ_EVENT_VDO1_STREAM_DONE_ENG_3	707
+#define CMDQ_EVENT_VDO1_STREAM_DONE_ENG_4	708
+#define CMDQ_EVENT_VDO1_STREAM_DONE_ENG_5	709
+#define CMDQ_EVENT_VDO1_STREAM_DONE_ENG_6	710
+#define CMDQ_EVENT_VDO1_STREAM_DONE_ENG_7	711
+#define CMDQ_EVENT_VDO1_STREAM_DONE_ENG_8	712
+#define CMDQ_EVENT_VDO1_STREAM_DONE_ENG_9	713
+#define CMDQ_EVENT_VDO1_STREAM_DONE_ENG_10	714
+#define CMDQ_EVENT_VDO1_STREAM_DONE_ENG_11	715
+#define CMDQ_EVENT_VDO1_STREAM_DONE_ENG_12	716
+#define CMDQ_EVENT_VDO1_STREAM_DONE_ENG_13	717
+#define CMDQ_EVENT_VDO1_STREAM_DONE_ENG_14	718
+#define CMDQ_EVENT_VDO1_STREAM_DONE_ENG_15	719
+#define CMDQ_EVENT_VDO1_BUF_UNDERRUN_ENG_0	720
+#define CMDQ_EVENT_VDO1_BUF_UNDERRUN_ENG_1	721
+#define CMDQ_EVENT_VDO1_BUF_UNDERRUN_ENG_2	722
+#define CMDQ_EVENT_VDO1_BUF_UNDERRUN_ENG_3	723
+#define CMDQ_EVENT_VDO1_BUF_UNDERRUN_ENG_4	724
+#define CMDQ_EVENT_VDO1_BUF_UNDERRUN_ENG_5	725
+#define CMDQ_EVENT_VDO1_BUF_UNDERRUN_ENG_6	726
+#define CMDQ_EVENT_VDO1_BUF_UNDERRUN_ENG_7	727
+#define CMDQ_EVENT_VDO1_BUF_UNDERRUN_ENG_8	728
+#define CMDQ_EVENT_VDO1_BUF_UNDERRUN_ENG_9	729
+#define CMDQ_EVENT_VDO1_BUF_UNDERRUN_ENG_10	730
+#define CMDQ_EVENT_VDO1_BUF_UNDERRUN_ENG_11	731
+#define CMDQ_EVENT_VDO1_BUF_UNDERRUN_ENG_12	732
+#define CMDQ_EVENT_VDO1_BUF_UNDERRUN_ENG_13	733
+#define CMDQ_EVENT_VDO1_BUF_UNDERRUN_ENG_14	734
+#define CMDQ_EVENT_VDO1_BUF_UNDERRUN_ENG_15	735
+#define CMDQ_EVENT_VDO1_MDP_RDMA0_SW_RST_DONE	736
+#define CMDQ_EVENT_VDO1_MDP_RDMA1_SW_RST_DONE	737
+#define CMDQ_EVENT_VDO1_MDP_RDMA2_SW_RST_DONE	738
+#define CMDQ_EVENT_VDO1_MDP_RDMA3_SW_RST_DONE	739
+#define CMDQ_EVENT_VDO1_MDP_RDMA4_SW_RST_DONE	740
+#define CMDQ_EVENT_VDO1_MDP_RDMA5_SW_RST_DONE	741
+#define CMDQ_EVENT_VDO1_MDP_RDMA6_SW_RST_DONE	742
+#define CMDQ_EVENT_VDO1_MDP_RDMA7_SW_RST_DONE	743
+
+#define CMDQ_EVENT_VDO1_DP0_VDE_END_ENG_EVENT_MM	745
+#define CMDQ_EVENT_VDO1_DP0_VDE_START_ENG_EVENT_MM	746
+#define CMDQ_EVENT_VDO1_DP0_VSYNC_END_ENG_EVENT_MM	747
+#define CMDQ_EVENT_VDO1_DP0_VSYNC_START_ENG_EVENT_MM	748
+#define CMDQ_EVENT_VDO1_DP0_TARGET_LINE_ENG_EVENT_MM	749
+#define CMDQ_EVENT_VDO1_VPP_MERGE0	750
+#define CMDQ_EVENT_VDO1_VPP_MERGE1	751
+#define CMDQ_EVENT_VDO1_VPP_MERGE2	752
+#define CMDQ_EVENT_VDO1_VPP_MERGE3	753
+#define CMDQ_EVENT_VDO1_VPP_MERGE4	754
+#define CMDQ_EVENT_VDO1_HDMITX	755
+#define CMDQ_EVENT_VDO1_HDR_VDO_BE0_ADL_TRIG_EVENT_MM	756
+#define CMDQ_EVENT_VDO1_HDR_GFX_FE1_THDR_ADL_TRIG_EVENT_MM	757
+#define CMDQ_EVENT_VDO1_HDR_GFX_FE1_DM_ADL_TRIG_EVENT_MM	758
+#define CMDQ_EVENT_VDO1_HDR_GFX_FE0_THDR_ADL_TRIG_EVENT_MM	759
+#define CMDQ_EVENT_VDO1_HDR_GFX_FE0_DM_ADL_TRIG_EVENT_MM	760
+#define CMDQ_EVENT_VDO1_HDR_VDO_FE1_ADL_TRIG_EVENT_MM	761
+#define CMDQ_EVENT_VDO1_HDR_VDO_FE1_AD0_TRIG_EVENT_MM	762
+
+#define CMDQ_EVENT_CAM_A_PASS1_DONE	769
+#define CMDQ_EVENT_CAM_B_PASS1_DONE	770
+#define CMDQ_EVENT_GCAMSV_A_PASS1_DONE	771
+#define CMDQ_EVENT_GCAMSV_B_PASS1_DONE	772
+#define CMDQ_EVENT_MRAW_0_PASS1_DONE	773
+#define CMDQ_EVENT_MRAW_1_PASS1_DONE	774
+#define CMDQ_EVENT_MRAW_2_PASS1_DONE	775
+#define CMDQ_EVENT_MRAW_3_PASS1_DONE	776
+#define CMDQ_EVENT_SENINF_CAM0_FIFO_FULL_X	777
+#define CMDQ_EVENT_SENINF_CAM1_FIFO_FULL_X	778
+#define CMDQ_EVENT_SENINF_CAM2_FIFO_FULL	779
+#define CMDQ_EVENT_SENINF_CAM3_FIFO_FULL	780
+#define CMDQ_EVENT_SENINF_CAM4_FIFO_FULL	781
+#define CMDQ_EVENT_SENINF_CAM5_FIFO_FULL	782
+#define CMDQ_EVENT_SENINF_CAM6_FIFO_FULL	783
+#define CMDQ_EVENT_SENINF_CAM7_FIFO_FULL	784
+#define CMDQ_EVENT_SENINF_CAM8_FIFO_FULL	785
+#define CMDQ_EVENT_SENINF_CAM9_FIFO_FULL	786
+#define CMDQ_EVENT_SENINF_CAM10_FIFO_FULL_X	787
+#define CMDQ_EVENT_SENINF_CAM11_FIFO_FULL_X	788
+#define CMDQ_EVENT_SENINF_CAM12_FIFO_FULL_X	789
+#define CMDQ_EVENT_SENINF_CAM13_FIFO_FULL_X	790
+#define CMDQ_EVENT_TG_OVRUN_MRAW0_INT_X0	791
+#define CMDQ_EVENT_TG_OVRUN_MRAW1_INT_X0	792
+#define CMDQ_EVENT_TG_OVRUN_MRAW2_INT	793
+#define CMDQ_EVENT_TG_OVRUN_MRAW3_INT	794
+#define CMDQ_EVENT_DMA_R1_ERROR_MRAW0_INT	795
+#define CMDQ_EVENT_DMA_R1_ERROR_MRAW1_INT	796
+#define CMDQ_EVENT_DMA_R1_ERROR_MRAW2_INT	797
+#define CMDQ_EVENT_DMA_R1_ERROR_MRAW3_INT	798
+#define CMDQ_EVENT_U_CAMSYS_PDA_IRQO_EVENT_DONE_D1	799
+#define CMDQ_EVENT_SUBB_TG_INT4	800
+#define CMDQ_EVENT_SUBB_TG_INT3	801
+#define CMDQ_EVENT_SUBB_TG_INT2	802
+#define CMDQ_EVENT_SUBB_TG_INT1	803
+#define CMDQ_EVENT_SUBA_TG_INT4	804
+#define CMDQ_EVENT_SUBA_TG_INT3	805
+#define CMDQ_EVENT_SUBA_TG_INT2	806
+#define CMDQ_EVENT_SUBA_TG_INT1	807
+#define CMDQ_EVENT_SUBB_DRZS4NO_R1_LOW_LATENCY_LINE_CNT_INT	808
+#define CMDQ_EVENT_SUBB_YUVO_R3_LOW_LATENCY_LINE_CNT_INT	809
+#define CMDQ_EVENT_SUBB_YUVO_R1_LOW_LATENCY_LINE_CNT_INT	810
+#define CMDQ_EVENT_SUBB_IMGO_R1_LOW_LATENCY_LINE_CNT_INT	811
+#define CMDQ_EVENT_SUBA_DRZS4NO_R1_LOW_LATENCY_LINE_CNT_INT	812
+#define CMDQ_EVENT_SUBA_YUVO_R3_LOW_LATENCY_LINE_CNT_INT	813
+#define CMDQ_EVENT_SUBA_YUVO_R1_LOW_LATENCY_LINE_CNT_INT	814
+#define CMDQ_EVENT_SUBA_IMGO_R1_LOW_LATENCY_LINE_CNT_INT	815
+#define CMDQ_EVENT_GCE1_SOF_0	816
+#define CMDQ_EVENT_GCE1_SOF_1	817
+#define CMDQ_EVENT_GCE1_SOF_2	818
+#define CMDQ_EVENT_GCE1_SOF_3	819
+#define CMDQ_EVENT_GCE1_SOF_4	820
+#define CMDQ_EVENT_GCE1_SOF_5	821
+#define CMDQ_EVENT_GCE1_SOF_6	822
+#define CMDQ_EVENT_GCE1_SOF_7	823
+#define CMDQ_EVENT_GCE1_SOF_8	824
+#define CMDQ_EVENT_GCE1_SOF_9	825
+#define CMDQ_EVENT_GCE1_SOF_10	826
+#define CMDQ_EVENT_GCE1_SOF_11	827
+#define CMDQ_EVENT_GCE1_SOF_12	828
+#define CMDQ_EVENT_GCE1_SOF_13	829
+#define CMDQ_EVENT_GCE1_SOF_14	830
+#define CMDQ_EVENT_GCE1_SOF_15	831
+
+#define CMDQ_EVENT_VDEC_LAT_LINE_COUNT_THRESHOLD_INTERRUPT	832
+#define CMDQ_EVENT_VDEC_LAT_VDEC_INT	833
+#define CMDQ_EVENT_VDEC_LAT_VDEC_PAUSE	834
+#define CMDQ_EVENT_VDEC_LAT_VDEC_DEC_ERROR	835
+#define CMDQ_EVENT_VDEC_LAT_MC_BUSY_OVERFLOW_MDEC_TIMEOUT	836
+#define CMDQ_EVENT_VDEC_LAT_VDEC_FRAME_DONE	837
+#define CMDQ_EVENT_VDEC_LAT_INI_FETCH_RDY	838
+#define CMDQ_EVENT_VDEC_LAT_PROCESS_FLAG	839
+#define CMDQ_EVENT_VDEC_LAT_SEARCH_START_CODE_DONE	840
+#define CMDQ_EVENT_VDEC_LAT_REF_REORDER_DONE	841
+#define CMDQ_EVENT_VDEC_LAT_WP_TBLE_DONE	842
+#define CMDQ_EVENT_VDEC_LAT_COUNT_SRAM_CLR_DONE_AND_CTX_SRAM_CLR_DONE	843
+#define CMDQ_EVENT_VDEC_LAT_GCE_CNT_OP_THRESHOLD	847
+
+#define CMDQ_EVENT_VDEC_LAT1_LINE_COUNT_THRESHOLD_INTERRUPT	848
+#define CMDQ_EVENT_VDEC_LAT1_VDEC_INT	849
+#define CMDQ_EVENT_VDEC_LAT1_VDEC_PAUSE	850
+#define CMDQ_EVENT_VDEC_LAT1_VDEC_DEC_ERROR	851
+#define CMDQ_EVENT_VDEC_LAT1_MC_BUSY_OVERFLOW_MDEC_TIMEOUT	852
+#define CMDQ_EVENT_VDEC_LAT1_VDEC_FRAME_DONE	853
+#define CMDQ_EVENT_VDEC_LAT1_INI_FETCH_RDY	854
+#define CMDQ_EVENT_VDEC_LAT1_PROCESS_FLAG	855
+#define CMDQ_EVENT_VDEC_LAT1_SEARCH_START_CODE_DONE	856
+#define CMDQ_EVENT_VDEC_LAT1_REF_REORDER_DONE	857
+#define CMDQ_EVENT_VDEC_LAT1_WP_TBLE_DONE	858
+#define CMDQ_EVENT_VDEC_LAT1_COUNT_SRAM_CLR_DONE_AND_CTX_SRAM_CLR_DONE	859
+#define CMDQ_EVENT_VDEC_LAT1_GCE_CNT_OP_THRESHOLD	863
+
+#define CMDQ_EVENT_VDEC_SOC_GLOBAL_CON_250_0	864
+#define CMDQ_EVENT_VDEC_SOC_GLOBAL_CON_250_1	865
+
+#define CMDQ_EVENT_VDEC_SOC_GLOBAL_CON_250_8	872
+#define CMDQ_EVENT_VDEC_SOC_GLOBAL_CON_250_9	873
+
+#define CMDQ_EVENT_VDEC_CORE_LINE_COUNT_THRESHOLD_INTERRUPT	896
+#define CMDQ_EVENT_VDEC_CORE_VDEC_INT	897
+#define CMDQ_EVENT_VDEC_CORE_VDEC_PAUSE	898
+#define CMDQ_EVENT_VDEC_CORE_VDEC_DEC_ERROR	899
+#define CMDQ_EVENT_VDEC_CORE_MC_BUSY_OVERFLOW_MDEC_TIMEOUT	900
+#define CMDQ_EVENT_VDEC_CORE_VDEC_FRAME_DONE	901
+#define CMDQ_EVENT_VDEC_CORE_INI_FETCH_RDY	902
+#define CMDQ_EVENT_VDEC_CORE_PROCESS_FLAG	903
+#define CMDQ_EVENT_VDEC_CORE_SEARCH_START_CODE_DONE	904
+#define CMDQ_EVENT_VDEC_CORE_REF_REORDER_DONE	905
+#define CMDQ_EVENT_VDEC_CORE_WP_TBLE_DONE	906
+#define CMDQ_EVENT_VDEC_CORE_COUNT_SRAM_CLR_DONE_AND_CTX_SRAM_CLR_DONE	907
+#define CMDQ_EVENT_VDEC_CORE_GCE_CNT_OP_THRESHOLD	911
+
+#define CMDQ_EVENT_VDEC_CORE1_LINE_COUNT_THRESHOLD_INTERRUPT	912
+#define CMDQ_EVENT_VDEC_CORE1_VDEC_INT	913
+#define CMDQ_EVENT_VDEC_CORE1_VDEC_PAUSE	914
+#define CMDQ_EVENT_VDEC_CORE1_VDEC_DEC_ERROR	915
+#define CMDQ_EVENT_VDEC_CORE1_MC_BUSY_OVERFLOW_MDEC_TIMEOUT	916
+#define CMDQ_EVENT_VDEC_CORE1_VDEC_FRAME_DONE	917
+#define CMDQ_EVENT_VDEC_CORE1_INI_FETCH_RDY	918
+#define CMDQ_EVENT_VDEC_CORE1_PROCESS_FLAG	919
+#define CMDQ_EVENT_VDEC_CORE1_SEARCH_START_CODE_DONE	920
+#define CMDQ_EVENT_VDEC_CORE1_REF_REORDER_DONE	921
+#define CMDQ_EVENT_VDEC_CORE1_WP_TBLE_DONE	922
+#define CMDQ_EVENT_VDEC_CORE1_COUNT_SRAM_CLR_DONE_AND_CTX_SRAM_CLR_DONE	923
+#define CMDQ_EVENT_VDEC_CORE1_CNT_OP_THRESHOLD	927
+
+#define CMDQ_EVENT_VENC_TOP_FRAME_DONE	929
+#define CMDQ_EVENT_VENC_TOP_PAUSE_DONE	930
+#define CMDQ_EVENT_VENC_TOP_JPGENC_DONE	931
+#define CMDQ_EVENT_VENC_TOP_MB_DONE	932
+#define CMDQ_EVENT_VENC_TOP_128BYTE_DONE	933
+#define CMDQ_EVENT_VENC_TOP_JPGDEC_DONE	934
+#define CMDQ_EVENT_VENC_TOP_JPGDEC_C1_DONE	935
+#define CMDQ_EVENT_VENC_TOP_JPGDEC_INSUFF_DONE	936
+#define CMDQ_EVENT_VENC_TOP_JPGDEC_C1_INSUFF_DONE	937
+#define CMDQ_EVENT_VENC_TOP_WP_2ND_STAGE_DONE	938
+#define CMDQ_EVENT_VENC_TOP_WP_3RD_STAGE_DONE	939
+#define CMDQ_EVENT_VENC_TOP_PPS_HEADER_DONE	940
+#define CMDQ_EVENT_VENC_TOP_SPS_HEADER_DONE	941
+#define CMDQ_EVENT_VENC_TOP_VPS_HEADER_DONE	942
+
+#define CMDQ_EVENT_VENC_CORE1_TOP_FRAME_DONE	945
+#define CMDQ_EVENT_VENC_CORE1_TOP_PAUSE_DONE	946
+#define CMDQ_EVENT_VENC_CORE1_TOP_JPGENC_DONE	947
+#define CMDQ_EVENT_VENC_CORE1_TOP_MB_DONE	948
+#define CMDQ_EVENT_VENC_CORE1_TOP_128BYTE_DONE	949
+#define CMDQ_EVENT_VENC_CORE1_TOP_JPGDEC_DONE	950
+#define CMDQ_EVENT_VENC_CORE1_TOP_JPGDEC_C1_DONE	951
+#define CMDQ_EVENT_VENC_CORE1_TOP_JPGDEC_INSUFF_DONE	952
+#define CMDQ_EVENT_VENC_CORE1_TOP_JPGDEC_C1_INSUFF_DONE	953
+#define CMDQ_EVENT_VENC_CORE1_TOP_WP_2ND_STAGE_DONE	954
+#define CMDQ_EVENT_VENC_CORE1_TOP_WP_3RD_STAGE_DONE	955
+#define CMDQ_EVENT_VENC_CORE1_TOP_PPS_HEADER_DONE	956
+#define CMDQ_EVENT_VENC_CORE1_TOP_SPS_HEADER_DONE	957
+#define CMDQ_EVENT_VENC_CORE1_TOP_VPS_HEADER_DONE	958
+
+#define CMDQ_EVENT_WPE_VPP0_WPE_GCE_FRAME_DONE	962
+#define CMDQ_EVENT_WPE_VPP0_WPE_DONE_SYNC_OUT	963
+
+#define CMDQ_EVENT_WPE_VPP1_WPE_GCE_FRAME_DONE	969
+#define CMDQ_EVENT_WPE_VPP1_WPE_DONE_SYNC_OUT	970
+
+#define CMDQ_EVENT_DP_TX_VBLANK_FALLING	994
+#define CMDQ_EVENT_DP_TX_VSC_FINISH	995
+
+#define CMDQ_EVENT_OUTPIN_0	1018
+#define CMDQ_EVENT_OUTPIN_1	1019
+
+/* end of hw event */
+#define CMDQ_MAX_HW_EVENT				1019
+
+#endif
diff --git a/arch/arm/boot/dts/mt8135-pinfunc.h b/include/dt-bindings/pinctrl/mt8135-pinfunc.h
index ce0cb5a440eb..ce0cb5a440eb 100644
--- a/arch/arm/boot/dts/mt8135-pinfunc.h
+++ b/include/dt-bindings/pinctrl/mt8135-pinfunc.h
diff --git a/arch/arm64/boot/dts/mediatek/mt8183-pinfunc.h b/include/dt-bindings/pinctrl/mt8183-pinfunc.h
index 6221cd712718..6221cd712718 100644
--- a/arch/arm64/boot/dts/mediatek/mt8183-pinfunc.h
+++ b/include/dt-bindings/pinctrl/mt8183-pinfunc.h
diff --git a/include/dt-bindings/pinctrl/pinctrl-zynq.h b/include/dt-bindings/pinctrl/pinctrl-zynq.h
new file mode 100644
index 000000000000..bbfc345f017d
--- /dev/null
+++ b/include/dt-bindings/pinctrl/pinctrl-zynq.h
@@ -0,0 +1,17 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/*
+ * MIO pin configuration defines for Xilinx Zynq
+ *
+ * Copyright (C) 2021 Xilinx, Inc.
+ */
+
+#ifndef _DT_BINDINGS_PINCTRL_ZYNQ_H
+#define _DT_BINDINGS_PINCTRL_ZYNQ_H
+
+/* Configuration options for different power supplies */
+#define IO_STANDARD_LVCMOS18	1
+#define IO_STANDARD_LVCMOS25	2
+#define IO_STANDARD_LVCMOS33	3
+#define IO_STANDARD_HSTL	4
+
+#endif /* _DT_BINDINGS_PINCTRL_ZYNQ_H */
diff --git a/include/dt-bindings/pinctrl/rzg2l-pinctrl.h b/include/dt-bindings/pinctrl/rzg2l-pinctrl.h
new file mode 100644
index 000000000000..b48f8c7a5556
--- /dev/null
+++ b/include/dt-bindings/pinctrl/rzg2l-pinctrl.h
@@ -0,0 +1,23 @@
+/* SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause) */
+/*
+ * This header provides constants for Renesas RZ/G2L family pinctrl bindings.
+ *
+ * Copyright (C) 2021 Renesas Electronics Corp.
+ *
+ */
+
+#ifndef __DT_BINDINGS_RZG2L_PINCTRL_H
+#define __DT_BINDINGS_RZG2L_PINCTRL_H
+
+#define RZG2L_PINS_PER_PORT	8
+
+/*
+ * Create the pin index from its bank and position numbers and store in
+ * the upper 16 bits the alternate function identifier
+ */
+#define RZG2L_PORT_PINMUX(b, p, f)	((b) * RZG2L_PINS_PER_PORT + (p) | ((f) << 16))
+
+/* Convert a port and pin label to its global pin index */
+ #define RZG2L_GPIO(port, pin)	((port) * RZG2L_PINS_PER_PORT + (pin))
+
+#endif /* __DT_BINDINGS_RZG2L_PINCTRL_H */
diff --git a/include/dt-bindings/reset/mt8195-resets.h b/include/dt-bindings/reset/mt8195-resets.h
new file mode 100644
index 000000000000..a26bccc8b957
--- /dev/null
+++ b/include/dt-bindings/reset/mt8195-resets.h
@@ -0,0 +1,29 @@
+/* SPDX-License-Identifier: (GPL-2.0+ OR BSD-3-Clause)*/
+/*
+ * Copyright (c) 2021 MediaTek Inc.
+ * Author: Christine Zhu <christine.zhu@mediatek.com>
+ */
+
+#ifndef _DT_BINDINGS_RESET_CONTROLLER_MT8195
+#define _DT_BINDINGS_RESET_CONTROLLER_MT8195
+
+#define MT8195_TOPRGU_CONN_MCU_SW_RST          0
+#define MT8195_TOPRGU_INFRA_GRST_SW_RST        1
+#define MT8195_TOPRGU_APU_SW_RST               2
+#define MT8195_TOPRGU_INFRA_AO_GRST_SW_RST     6
+#define MT8195_TOPRGU_MMSYS_SW_RST             7
+#define MT8195_TOPRGU_MFG_SW_RST               8
+#define MT8195_TOPRGU_VENC_SW_RST              9
+#define MT8195_TOPRGU_VDEC_SW_RST              10
+#define MT8195_TOPRGU_IMG_SW_RST               11
+#define MT8195_TOPRGU_APMIXEDSYS_SW_RST        13
+#define MT8195_TOPRGU_AUDIO_SW_RST             14
+#define MT8195_TOPRGU_CAMSYS_SW_RST            15
+#define MT8195_TOPRGU_EDPTX_SW_RST             16
+#define MT8195_TOPRGU_ADSPSYS_SW_RST           21
+#define MT8195_TOPRGU_DPTX_SW_RST              22
+#define MT8195_TOPRGU_SPMI_MST_SW_RST          23
+
+#define MT8195_TOPRGU_SW_RST_NUM               16
+
+#endif  /* _DT_BINDINGS_RESET_CONTROLLER_MT8195 */
diff --git a/include/linux/backing-dev-defs.h b/include/linux/backing-dev-defs.h
index 1d7edad9914f..33207004cfde 100644
--- a/include/linux/backing-dev-defs.h
+++ b/include/linux/backing-dev-defs.h
@@ -116,6 +116,7 @@ struct bdi_writeback {
 	struct list_head b_dirty_time;	/* time stamps are dirty */
 	spinlock_t list_lock;		/* protects the b_* lists */
 
+	atomic_t writeback_inodes;	/* number of inodes under writeback */
 	struct percpu_counter stat[NR_WB_STAT_ITEMS];
 
 	unsigned long congested;	/* WB_[a]sync_congested flags */
@@ -142,6 +143,7 @@ struct bdi_writeback {
 	spinlock_t work_lock;		/* protects work_list & dwork scheduling */
 	struct list_head work_list;
 	struct delayed_work dwork;	/* work item used for writeback */
+	struct delayed_work bw_dwork;	/* work item used for bandwidth estimate */
 
 	unsigned long dirty_sleep;	/* last wait */
 
diff --git a/include/linux/backing-dev.h b/include/linux/backing-dev.h
index 29530859d9ff..ac7f231b8825 100644
--- a/include/linux/backing-dev.h
+++ b/include/linux/backing-dev.h
@@ -288,6 +288,17 @@ static inline struct bdi_writeback *inode_to_wb(const struct inode *inode)
 	return inode->i_wb;
 }
 
+static inline struct bdi_writeback *inode_to_wb_wbc(
+				struct inode *inode,
+				struct writeback_control *wbc)
+{
+	/*
+	 * If wbc does not have inode attached, it means cgroup writeback was
+	 * disabled when wbc started. Just use the default wb in that case.
+	 */
+	return wbc->wb ? wbc->wb : &inode_to_bdi(inode)->wb;
+}
+
 /**
  * unlocked_inode_to_wb_begin - begin unlocked inode wb access transaction
  * @inode: target inode
@@ -366,6 +377,14 @@ static inline struct bdi_writeback *inode_to_wb(struct inode *inode)
 	return &inode_to_bdi(inode)->wb;
 }
 
+static inline struct bdi_writeback *inode_to_wb_wbc(
+				struct inode *inode,
+				struct writeback_control *wbc)
+{
+	return inode_to_wb(inode);
+}
+
+
 static inline struct bdi_writeback *
 unlocked_inode_to_wb_begin(struct inode *inode, struct wb_lock_cookie *cookie)
 {
diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h
index c9cb12483e12..12b9dbcc980e 100644
--- a/include/linux/blkdev.h
+++ b/include/linux/blkdev.h
@@ -17,7 +17,6 @@
 #include <linux/bio.h>
 #include <linux/stringify.h>
 #include <linux/gfp.h>
-#include <linux/bsg.h>
 #include <linux/smp.h>
 #include <linux/rcupdate.h>
 #include <linux/percpu-refcount.h>
@@ -27,14 +26,11 @@
 #include <linux/sbitmap.h>
 
 struct module;
-struct scsi_ioctl_command;
-
 struct request_queue;
 struct elevator_queue;
 struct blk_trace;
 struct request;
 struct sg_io_hdr;
-struct bsg_job;
 struct blkcg_gq;
 struct blk_flush_queue;
 struct pr_ops;
@@ -274,9 +270,6 @@ enum blk_queue_state {
 #define BLK_TAG_ALLOC_FIFO 0 /* allocate starting from 0 */
 #define BLK_TAG_ALLOC_RR 1 /* allocate starting from last allocated tag */
 
-#define BLK_SCSI_MAX_CMDS	(256)
-#define BLK_SCSI_CMD_PER_LONG	(BLK_SCSI_MAX_CMDS / (sizeof(long) * 8))
-
 /*
  * Zoned block device models (zoned limit).
  *
@@ -505,11 +498,6 @@ struct request_queue {
 	unsigned int		max_active_zones;
 #endif /* CONFIG_BLK_DEV_ZONED */
 
-	/*
-	 * sg stuff
-	 */
-	unsigned int		sg_timeout;
-	unsigned int		sg_reserved_size;
 	int			node;
 	struct mutex		debugfs_mutex;
 #ifdef CONFIG_BLK_DEV_IO_TRACE
@@ -536,10 +524,6 @@ struct request_queue {
 
 	int			mq_freeze_depth;
 
-#if defined(CONFIG_BLK_DEV_BSG)
-	struct bsg_class_device bsg_dev;
-#endif
-
 #ifdef CONFIG_BLK_DEV_THROTTLING
 	/* Throttle data */
 	struct throtl_data *td;
@@ -885,16 +869,6 @@ extern blk_status_t blk_insert_cloned_request(struct request_queue *q,
 				     struct request *rq);
 int blk_rq_append_bio(struct request *rq, struct bio *bio);
 extern void blk_queue_split(struct bio **);
-extern int scsi_verify_blk_ioctl(struct block_device *, unsigned int);
-extern int scsi_cmd_blk_ioctl(struct block_device *, fmode_t,
-			      unsigned int, void __user *);
-extern int scsi_cmd_ioctl(struct request_queue *, struct gendisk *, fmode_t,
-			  unsigned int, void __user *);
-extern int sg_scsi_ioctl(struct request_queue *, struct gendisk *, fmode_t,
-			 struct scsi_ioctl_command __user *);
-extern int get_sg_io_hdr(struct sg_io_hdr *hdr, const void __user *argp);
-extern int put_sg_io_hdr(const struct sg_io_hdr *hdr, void __user *argp);
-
 extern int blk_queue_enter(struct request_queue *q, blk_mq_req_flags_t flags);
 extern void blk_queue_exit(struct request_queue *q);
 extern void blk_sync_queue(struct request_queue *q);
@@ -1347,8 +1321,6 @@ static inline int sb_issue_zeroout(struct super_block *sb, sector_t block,
 				    gfp_mask, 0);
 }
 
-extern int blk_verify_command(unsigned char *cmd, fmode_t mode);
-
 static inline bool bdev_is_partition(struct block_device *bdev)
 {
 	return bdev->bd_partno;
@@ -1377,6 +1349,11 @@ static inline unsigned int queue_max_sectors(const struct request_queue *q)
 	return q->limits.max_sectors;
 }
 
+static inline unsigned int queue_max_bytes(struct request_queue *q)
+{
+	return min_t(unsigned int, queue_max_sectors(q), INT_MAX >> 9) << 9;
+}
+
 static inline unsigned int queue_max_hw_sectors(const struct request_queue *q)
 {
 	return q->limits.max_hw_sectors;
diff --git a/include/linux/bootconfig.h b/include/linux/bootconfig.h
index abe089c27529..537e1b991f11 100644
--- a/include/linux/bootconfig.h
+++ b/include/linux/bootconfig.h
@@ -110,7 +110,7 @@ static inline __init bool xbc_node_is_leaf(struct xbc_node *node)
 }
 
 /* Tree-based key-value access APIs */
-struct xbc_node * __init xbc_node_find_child(struct xbc_node *parent,
+struct xbc_node * __init xbc_node_find_subkey(struct xbc_node *parent,
 					     const char *key);
 
 const char * __init xbc_node_find_value(struct xbc_node *parent,
@@ -148,7 +148,7 @@ xbc_find_value(const char *key, struct xbc_node **vnode)
  */
 static inline struct xbc_node * __init xbc_find_node(const char *key)
 {
-	return xbc_node_find_child(NULL, key);
+	return xbc_node_find_subkey(NULL, key);
 }
 
 /**
diff --git a/include/linux/bsg-lib.h b/include/linux/bsg-lib.h
index 960988d42f77..6b211323a489 100644
--- a/include/linux/bsg-lib.h
+++ b/include/linux/bsg-lib.h
@@ -12,6 +12,7 @@
 #include <linux/blkdev.h>
 #include <scsi/scsi_request.h>
 
+struct bsg_job;
 struct request;
 struct device;
 struct scatterlist;
diff --git a/include/linux/bsg.h b/include/linux/bsg.h
index dac37b6e00ec..1ac81c809da9 100644
--- a/include/linux/bsg.h
+++ b/include/linux/bsg.h
@@ -4,36 +4,16 @@
 
 #include <uapi/linux/bsg.h>
 
-struct request;
+struct bsg_device;
+struct device;
+struct request_queue;
 
-#ifdef CONFIG_BLK_DEV_BSG
-struct bsg_ops {
-	int	(*check_proto)(struct sg_io_v4 *hdr);
-	int	(*fill_hdr)(struct request *rq, struct sg_io_v4 *hdr,
-				fmode_t mode);
-	int	(*complete_rq)(struct request *rq, struct sg_io_v4 *hdr);
-	void	(*free_rq)(struct request *rq);
-};
+typedef int (bsg_sg_io_fn)(struct request_queue *, struct sg_io_v4 *hdr,
+		fmode_t mode, unsigned int timeout);
 
-struct bsg_class_device {
-	struct device *class_dev;
-	int minor;
-	struct request_queue *queue;
-	const struct bsg_ops *ops;
-};
+struct bsg_device *bsg_register_queue(struct request_queue *q,
+		struct device *parent, const char *name,
+		bsg_sg_io_fn *sg_io_fn);
+void bsg_unregister_queue(struct bsg_device *bcd);
 
-int bsg_register_queue(struct request_queue *q, struct device *parent,
-		const char *name, const struct bsg_ops *ops);
-int bsg_scsi_register_queue(struct request_queue *q, struct device *parent);
-void bsg_unregister_queue(struct request_queue *q);
-#else
-static inline int bsg_scsi_register_queue(struct request_queue *q,
-		struct device *parent)
-{
-	return 0;
-}
-static inline void bsg_unregister_queue(struct request_queue *q)
-{
-}
-#endif /* CONFIG_BLK_DEV_BSG */
 #endif /* _LINUX_BSG_H */
diff --git a/include/linux/buffer_head.h b/include/linux/buffer_head.h
index e7e99da31349..6486d3c19463 100644
--- a/include/linux/buffer_head.h
+++ b/include/linux/buffer_head.h
@@ -409,7 +409,7 @@ static inline void invalidate_inode_buffers(struct inode *inode) {}
 static inline int remove_inode_buffers(struct inode *inode) { return 1; }
 static inline int sync_mapping_buffers(struct address_space *mapping) { return 0; }
 static inline void invalidate_bh_lrus_cpu(int cpu) {}
-static inline bool has_bh_in_lru(int cpu, void *dummy) { return 0; }
+static inline bool has_bh_in_lru(int cpu, void *dummy) { return false; }
 #define buffer_heads_over_limit 0
 
 #endif /* CONFIG_BLOCK */
diff --git a/include/linux/cacheinfo.h b/include/linux/cacheinfo.h
index 4f72b47973c3..2f909ed084c6 100644
--- a/include/linux/cacheinfo.h
+++ b/include/linux/cacheinfo.h
@@ -79,24 +79,6 @@ struct cpu_cacheinfo {
 	bool cpu_map_populated;
 };
 
-/*
- * Helpers to make sure "func" is executed on the cpu whose cache
- * attributes are being detected
- */
-#define DEFINE_SMP_CALL_CACHE_FUNCTION(func)			\
-static inline void _##func(void *ret)				\
-{								\
-	int cpu = smp_processor_id();				\
-	*(int *)ret = __##func(cpu);				\
-}								\
-								\
-int func(unsigned int cpu)					\
-{								\
-	int ret;						\
-	smp_call_function_single(cpu, _##func, &ret, true);	\
-	return ret;						\
-}
-
 struct cpu_cacheinfo *get_cpu_cacheinfo(unsigned int cpu);
 int init_cache_level(unsigned int cpu);
 int populate_cache_leaves(unsigned int cpu);
diff --git a/include/linux/cdrom.h b/include/linux/cdrom.h
index f48d0a31deae..c4fef00abdf3 100644
--- a/include/linux/cdrom.h
+++ b/include/linux/cdrom.h
@@ -86,11 +86,13 @@ struct cdrom_device_ops {
 	/* play stuff */
 	int (*audio_ioctl) (struct cdrom_device_info *,unsigned int, void *);
 
-/* driver specifications */
-	const int capability;   /* capability flags */
 	/* handle uniform packets for scsi type devices (scsi,atapi) */
 	int (*generic_packet) (struct cdrom_device_info *,
 			       struct packet_command *);
+	int (*read_cdda_bpc)(struct cdrom_device_info *cdi, void __user *ubuf,
+			       u32 lba, u32 nframes, u8 *last_sense);
+/* driver specifications */
+	const int capability;   /* capability flags */
 };
 
 int cdrom_multisession(struct cdrom_device_info *cdi,
diff --git a/include/linux/ceph/ceph_fs.h b/include/linux/ceph/ceph_fs.h
index e41a811026f6..bc2699feddbe 100644
--- a/include/linux/ceph/ceph_fs.h
+++ b/include/linux/ceph/ceph_fs.h
@@ -299,6 +299,7 @@ enum {
 	CEPH_SESSION_FLUSHMSG_ACK,
 	CEPH_SESSION_FORCE_RO,
 	CEPH_SESSION_REJECT,
+	CEPH_SESSION_REQUEST_FLUSH_MDLOG,
 };
 
 extern const char *ceph_session_op_name(int op);
diff --git a/include/linux/clk-provider.h b/include/linux/clk-provider.h
index d83b829305c0..f59c875271a0 100644
--- a/include/linux/clk-provider.h
+++ b/include/linux/clk-provider.h
@@ -342,7 +342,7 @@ struct clk_fixed_rate {
 	unsigned long	flags;
 };
 
-#define CLK_FIXED_RATE_PARENT_ACCURACY		BIT(0)
+#define CLK_FIXED_RATE_PARENT_ACCURACY	BIT(0)
 
 extern const struct clk_ops clk_fixed_rate_ops;
 struct clk_hw *__clk_hw_register_fixed_rate(struct device *dev,
@@ -1001,6 +1001,12 @@ struct clk_hw *devm_clk_hw_register_fixed_factor(struct device *dev,
  * CLK_FRAC_DIVIDER_BIG_ENDIAN - By default little endian register accesses are
  *	used for the divider register.  Setting this flag makes the register
  *	accesses big endian.
+ * CLK_FRAC_DIVIDER_POWER_OF_TWO_PS - By default the resulting fraction might
+ *	be saturated and the caller will get quite far from the good enough
+ *	approximation. Instead the caller may require, by setting this flag,
+ *	to shift left by a few bits in case, when the asked one is quite small
+ *	to satisfy the desired range of denominator. It assumes that on the
+ *	caller's side the power-of-two capable prescaler exists.
  */
 struct clk_fractional_divider {
 	struct clk_hw	hw;
@@ -1022,8 +1028,8 @@ struct clk_fractional_divider {
 
 #define CLK_FRAC_DIVIDER_ZERO_BASED		BIT(0)
 #define CLK_FRAC_DIVIDER_BIG_ENDIAN		BIT(1)
+#define CLK_FRAC_DIVIDER_POWER_OF_TWO_PS	BIT(2)
 
-extern const struct clk_ops clk_fractional_divider_ops;
 struct clk *clk_register_fractional_divider(struct device *dev,
 		const char *name, const char *parent_name, unsigned long flags,
 		void __iomem *reg, u8 mshift, u8 mwidth, u8 nshift, u8 nwidth,
@@ -1069,9 +1075,9 @@ struct clk_multiplier {
 
 #define to_clk_multiplier(_hw) container_of(_hw, struct clk_multiplier, hw)
 
-#define CLK_MULTIPLIER_ZERO_BYPASS		BIT(0)
+#define CLK_MULTIPLIER_ZERO_BYPASS	BIT(0)
 #define CLK_MULTIPLIER_ROUND_CLOSEST	BIT(1)
-#define CLK_MULTIPLIER_BIG_ENDIAN		BIT(2)
+#define CLK_MULTIPLIER_BIG_ENDIAN	BIT(2)
 
 extern const struct clk_ops clk_multiplier_ops;
 
diff --git a/include/linux/compaction.h b/include/linux/compaction.h
index c24098c7acca..34bce35c808d 100644
--- a/include/linux/compaction.h
+++ b/include/linux/compaction.h
@@ -84,6 +84,8 @@ static inline unsigned long compact_gap(unsigned int order)
 extern unsigned int sysctl_compaction_proactiveness;
 extern int sysctl_compaction_handler(struct ctl_table *table, int write,
 			void *buffer, size_t *length, loff_t *ppos);
+extern int compaction_proactiveness_sysctl_handler(struct ctl_table *table,
+		int write, void *buffer, size_t *length, loff_t *ppos);
 extern int sysctl_extfrag_threshold;
 extern int sysctl_compact_unevictable_allowed;
 
diff --git a/include/linux/compat.h b/include/linux/compat.h
index 8e0598c7d1d1..1c758b0e0359 100644
--- a/include/linux/compat.h
+++ b/include/linux/compat.h
@@ -395,14 +395,6 @@ struct compat_kexec_segment;
 struct compat_mq_attr;
 struct compat_msgbuf;
 
-#define BITS_PER_COMPAT_LONG    (8*sizeof(compat_long_t))
-
-#define BITS_TO_COMPAT_LONGS(bits) DIV_ROUND_UP(bits, BITS_PER_COMPAT_LONG)
-
-long compat_get_bitmap(unsigned long *mask, const compat_ulong_t __user *umask,
-		       unsigned long bitmap_size);
-long compat_put_bitmap(compat_ulong_t __user *umask, unsigned long *mask,
-		       unsigned long bitmap_size);
 void copy_siginfo_to_external32(struct compat_siginfo *to,
 		const struct kernel_siginfo *from);
 int copy_siginfo_from_user32(kernel_siginfo_t *to,
@@ -519,8 +511,6 @@ extern long compat_arch_ptrace(struct task_struct *child, compat_long_t request,
 
 struct epoll_event;	/* fortunately, this one is fixed-layout */
 
-extern void __user *compat_alloc_user_space(unsigned long len);
-
 int compat_restore_altstack(const compat_stack_t __user *uss);
 int __compat_save_altstack(compat_stack_t __user *, unsigned long);
 #define unsafe_compat_save_altstack(uss, sp, label) do { \
@@ -807,26 +797,6 @@ asmlinkage long compat_sys_execve(const char __user *filename, const compat_uptr
 /* mm/fadvise.c: No generic prototype for fadvise64_64 */
 
 /* mm/, CONFIG_MMU only */
-asmlinkage long compat_sys_mbind(compat_ulong_t start, compat_ulong_t len,
-				 compat_ulong_t mode,
-				 compat_ulong_t __user *nmask,
-				 compat_ulong_t maxnode, compat_ulong_t flags);
-asmlinkage long compat_sys_get_mempolicy(int __user *policy,
-					 compat_ulong_t __user *nmask,
-					 compat_ulong_t maxnode,
-					 compat_ulong_t addr,
-					 compat_ulong_t flags);
-asmlinkage long compat_sys_set_mempolicy(int mode, compat_ulong_t __user *nmask,
-					 compat_ulong_t maxnode);
-asmlinkage long compat_sys_migrate_pages(compat_pid_t pid,
-		compat_ulong_t maxnode, const compat_ulong_t __user *old_nodes,
-		const compat_ulong_t __user *new_nodes);
-asmlinkage long compat_sys_move_pages(pid_t pid, compat_ulong_t nr_pages,
-				      __u32 __user *pages,
-				      const int __user *nodes,
-				      int __user *status,
-				      int flags);
-
 asmlinkage long compat_sys_rt_tgsigqueueinfo(compat_pid_t tgid,
 					compat_pid_t pid, int sig,
 					struct compat_siginfo __user *uinfo);
@@ -976,6 +946,15 @@ static inline bool in_compat_syscall(void) { return false; }
 
 #endif /* CONFIG_COMPAT */
 
+#define BITS_PER_COMPAT_LONG    (8*sizeof(compat_long_t))
+
+#define BITS_TO_COMPAT_LONGS(bits) DIV_ROUND_UP(bits, BITS_PER_COMPAT_LONG)
+
+long compat_get_bitmap(unsigned long *mask, const compat_ulong_t __user *umask,
+		       unsigned long bitmap_size);
+long compat_put_bitmap(compat_ulong_t __user *umask, unsigned long *mask,
+		       unsigned long bitmap_size);
+
 /*
  * Some legacy ABIs like the i386 one use less than natural alignment for 64-bit
  * types, and will need special compat treatment for that.  Most architectures
diff --git a/include/linux/compiler-gcc.h b/include/linux/compiler-gcc.h
index cb9217fc60af..21c36b69eb06 100644
--- a/include/linux/compiler-gcc.h
+++ b/include/linux/compiler-gcc.h
@@ -43,9 +43,6 @@
 
 #define __compiletime_object_size(obj) __builtin_object_size(obj, 0)
 
-#define __compiletime_warning(message) __attribute__((__warning__(message)))
-#define __compiletime_error(message) __attribute__((__error__(message)))
-
 #if defined(LATENT_ENTROPY_PLUGIN) && !defined(__CHECKER__)
 #define __latent_entropy __attribute__((latent_entropy))
 #endif
diff --git a/include/linux/compiler_attributes.h b/include/linux/compiler_attributes.h
index 2487be0e7199..8f2106e9e5c1 100644
--- a/include/linux/compiler_attributes.h
+++ b/include/linux/compiler_attributes.h
@@ -30,6 +30,7 @@
 # define __GCC4_has_attribute___assume_aligned__      1
 # define __GCC4_has_attribute___copy__                0
 # define __GCC4_has_attribute___designated_init__     0
+# define __GCC4_has_attribute___error__               1
 # define __GCC4_has_attribute___externally_visible__  1
 # define __GCC4_has_attribute___no_caller_saved_registers__ 0
 # define __GCC4_has_attribute___noclone__             1
@@ -37,7 +38,9 @@
 # define __GCC4_has_attribute___nonstring__           0
 # define __GCC4_has_attribute___no_sanitize_address__ 1
 # define __GCC4_has_attribute___no_sanitize_undefined__ 1
+# define __GCC4_has_attribute___no_sanitize_coverage__ 0
 # define __GCC4_has_attribute___fallthrough__         0
+# define __GCC4_has_attribute___warning__             1
 #endif
 
 /*
@@ -138,6 +141,17 @@
 #endif
 
 /*
+ * Optional: only supported since clang >= 14.0
+ *
+ *   gcc: https://gcc.gnu.org/onlinedocs/gcc/Common-Function-Attributes.html#index-error-function-attribute
+ */
+#if __has_attribute(__error__)
+# define __compiletime_error(msg)       __attribute__((__error__(msg)))
+#else
+# define __compiletime_error(msg)
+#endif
+
+/*
  * Optional: not supported by clang
  *
  *   gcc: https://gcc.gnu.org/onlinedocs/gcc/Common-Function-Attributes.html#index-externally_005fvisible-function-attribute
@@ -299,6 +313,17 @@
 #define __must_check                    __attribute__((__warn_unused_result__))
 
 /*
+ * Optional: only supported since clang >= 14.0
+ *
+ *   gcc: https://gcc.gnu.org/onlinedocs/gcc/Common-Function-Attributes.html#index-warning-function-attribute
+ */
+#if __has_attribute(__warning__)
+# define __compiletime_warning(msg)     __attribute__((__warning__(msg)))
+#else
+# define __compiletime_warning(msg)
+#endif
+
+/*
  *   gcc: https://gcc.gnu.org/onlinedocs/gcc/Common-Function-Attributes.html#index-weak-function-attribute
  *   gcc: https://gcc.gnu.org/onlinedocs/gcc/Common-Variable-Attributes.html#index-weak-variable-attribute
  */
diff --git a/include/linux/compiler_types.h b/include/linux/compiler_types.h
index e4ea86fc584d..b6ff83a714ca 100644
--- a/include/linux/compiler_types.h
+++ b/include/linux/compiler_types.h
@@ -294,12 +294,6 @@ struct ftrace_likely_data {
 #ifndef __compiletime_object_size
 # define __compiletime_object_size(obj) -1
 #endif
-#ifndef __compiletime_warning
-# define __compiletime_warning(message)
-#endif
-#ifndef __compiletime_error
-# define __compiletime_error(message)
-#endif
 
 #ifdef __OPTIMIZE__
 # define __compiletime_assert(condition, msg, prefix, suffix)		\
diff --git a/include/linux/cpu.h b/include/linux/cpu.h
index 94a578a96202..9cf51e41e697 100644
--- a/include/linux/cpu.h
+++ b/include/linux/cpu.h
@@ -143,12 +143,6 @@ static inline int remove_cpu(unsigned int cpu) { return -EPERM; }
 static inline void smp_shutdown_nonboot_cpus(unsigned int primary_cpu) { }
 #endif	/* !CONFIG_HOTPLUG_CPU */
 
-/* Wrappers which go away once all code is converted */
-static inline void cpu_hotplug_begin(void) { cpus_write_lock(); }
-static inline void cpu_hotplug_done(void) { cpus_write_unlock(); }
-static inline void get_online_cpus(void) { cpus_read_lock(); }
-static inline void put_online_cpus(void) { cpus_read_unlock(); }
-
 #ifdef CONFIG_PM_SLEEP_SMP
 extern int freeze_secondary_cpus(int primary);
 extern void thaw_secondary_cpus(void);
diff --git a/include/linux/cpufreq.h b/include/linux/cpufreq.h
index 9fd719475fcd..ff88bb3e44fc 100644
--- a/include/linux/cpufreq.h
+++ b/include/linux/cpufreq.h
@@ -9,10 +9,14 @@
 #define _LINUX_CPUFREQ_H
 
 #include <linux/clk.h>
+#include <linux/cpu.h>
 #include <linux/cpumask.h>
 #include <linux/completion.h>
 #include <linux/kobject.h>
 #include <linux/notifier.h>
+#include <linux/of.h>
+#include <linux/of_device.h>
+#include <linux/pm_opp.h>
 #include <linux/pm_qos.h>
 #include <linux/spinlock.h>
 #include <linux/sysfs.h>
@@ -365,14 +369,17 @@ struct cpufreq_driver {
 	int		(*suspend)(struct cpufreq_policy *policy);
 	int		(*resume)(struct cpufreq_policy *policy);
 
-	/* Will be called after the driver is fully initialized */
-	void		(*ready)(struct cpufreq_policy *policy);
-
 	struct freq_attr **attr;
 
 	/* platform specific boost support code */
 	bool		boost_enabled;
 	int		(*set_boost)(struct cpufreq_policy *policy, int state);
+
+	/*
+	 * Set by drivers that want to register with the energy model after the
+	 * policy is properly initialized, but before the governor is started.
+	 */
+	void		(*register_em)(struct cpufreq_policy *policy);
 };
 
 /* flags */
@@ -995,6 +1002,55 @@ static inline int cpufreq_table_count_valid_entries(const struct cpufreq_policy
 
 	return count;
 }
+
+static inline int parse_perf_domain(int cpu, const char *list_name,
+				    const char *cell_name)
+{
+	struct device_node *cpu_np;
+	struct of_phandle_args args;
+	int ret;
+
+	cpu_np = of_cpu_device_node_get(cpu);
+	if (!cpu_np)
+		return -ENODEV;
+
+	ret = of_parse_phandle_with_args(cpu_np, list_name, cell_name, 0,
+					 &args);
+	if (ret < 0)
+		return ret;
+
+	of_node_put(cpu_np);
+
+	return args.args[0];
+}
+
+static inline int of_perf_domain_get_sharing_cpumask(int pcpu, const char *list_name,
+						     const char *cell_name, struct cpumask *cpumask)
+{
+	int target_idx;
+	int cpu, ret;
+
+	ret = parse_perf_domain(pcpu, list_name, cell_name);
+	if (ret < 0)
+		return ret;
+
+	target_idx = ret;
+	cpumask_set_cpu(pcpu, cpumask);
+
+	for_each_possible_cpu(cpu) {
+		if (cpu == pcpu)
+			continue;
+
+		ret = parse_perf_domain(pcpu, list_name, cell_name);
+		if (ret < 0)
+			continue;
+
+		if (target_idx == ret)
+			cpumask_set_cpu(cpu, cpumask);
+	}
+
+	return target_idx;
+}
 #else
 static inline int cpufreq_boost_trigger_state(int state)
 {
@@ -1014,6 +1070,12 @@ static inline bool policy_has_boost_freq(struct cpufreq_policy *policy)
 {
 	return false;
 }
+
+static inline int of_perf_domain_get_sharing_cpumask(int pcpu, const char *list_name,
+						     const char *cell_name, struct cpumask *cpumask)
+{
+	return -EOPNOTSUPP;
+}
 #endif
 
 #if defined(CONFIG_ENERGY_MODEL) && defined(CONFIG_CPU_FREQ_GOV_SCHEDUTIL)
@@ -1035,7 +1097,6 @@ void arch_set_freq_scale(const struct cpumask *cpus,
 {
 }
 #endif
-
 /* the following are really really optional */
 extern struct freq_attr cpufreq_freq_attr_scaling_available_freqs;
 extern struct freq_attr cpufreq_freq_attr_scaling_boost_freqs;
@@ -1046,4 +1107,10 @@ unsigned int cpufreq_generic_get(unsigned int cpu);
 void cpufreq_generic_init(struct cpufreq_policy *policy,
 		struct cpufreq_frequency_table *table,
 		unsigned int transition_latency);
+
+static inline void cpufreq_register_em_with_opp(struct cpufreq_policy *policy)
+{
+	dev_pm_opp_of_register_em(get_cpu_device(policy->cpu),
+				  policy->related_cpus);
+}
 #endif /* _LINUX_CPUFREQ_H */
diff --git a/include/linux/cpuhotplug.h b/include/linux/cpuhotplug.h
index 95f88edc8f09..832d8a74fa59 100644
--- a/include/linux/cpuhotplug.h
+++ b/include/linux/cpuhotplug.h
@@ -22,8 +22,42 @@
  *              AP_ACTIVE			AP_ACTIVE
  */
 
+/*
+ * CPU hotplug states. The state machine invokes the installed state
+ * startup callbacks sequentially from CPUHP_OFFLINE + 1 to CPUHP_ONLINE
+ * during a CPU online operation. During a CPU offline operation the
+ * installed teardown callbacks are invoked in the reverse order from
+ * CPU_ONLINE - 1 down to CPUHP_OFFLINE.
+ *
+ * The state space has three sections: PREPARE, STARTING and ONLINE.
+ *
+ * PREPARE: The callbacks are invoked on a control CPU before the
+ * hotplugged CPU is started up or after the hotplugged CPU has died.
+ *
+ * STARTING: The callbacks are invoked on the hotplugged CPU from the low level
+ * hotplug startup/teardown code with interrupts disabled.
+ *
+ * ONLINE: The callbacks are invoked on the hotplugged CPU from the per CPU
+ * hotplug thread with interrupts and preemption enabled.
+ *
+ * Adding explicit states to this enum is only necessary when:
+ *
+ * 1) The state is within the STARTING section
+ *
+ * 2) The state has ordering constraints vs. other states in the
+ *    same section.
+ *
+ * If neither #1 nor #2 apply, please use the dynamic state space when
+ * setting up a state by using CPUHP_PREPARE_DYN or CPUHP_PREPARE_ONLINE
+ * for the @state argument of the setup function.
+ *
+ * See Documentation/core-api/cpu_hotplug.rst for further information and
+ * examples.
+ */
 enum cpuhp_state {
 	CPUHP_INVALID = -1,
+
+	/* PREPARE section invoked on a control CPU */
 	CPUHP_OFFLINE = 0,
 	CPUHP_CREATE_THREADS,
 	CPUHP_PERF_PREPARE,
@@ -53,6 +87,7 @@ enum cpuhp_state {
 	CPUHP_FS_BUFF_DEAD,
 	CPUHP_PRINTK_DEAD,
 	CPUHP_MM_MEMCQ_DEAD,
+	CPUHP_XFS_DEAD,
 	CPUHP_PERCPU_CNT_DEAD,
 	CPUHP_RADIX_DEAD,
 	CPUHP_PAGE_ALLOC,
@@ -94,6 +129,11 @@ enum cpuhp_state {
 	CPUHP_BP_PREPARE_DYN,
 	CPUHP_BP_PREPARE_DYN_END		= CPUHP_BP_PREPARE_DYN + 20,
 	CPUHP_BRINGUP_CPU,
+
+	/*
+	 * STARTING section invoked on the hotplugged CPU in low level
+	 * bringup and teardown code.
+	 */
 	CPUHP_AP_IDLE_DEAD,
 	CPUHP_AP_OFFLINE,
 	CPUHP_AP_SCHED_STARTING,
@@ -154,6 +194,8 @@ enum cpuhp_state {
 	CPUHP_AP_ARM_CACHE_B15_RAC_DYING,
 	CPUHP_AP_ONLINE,
 	CPUHP_TEARDOWN_CPU,
+
+	/* Online section invoked on the hotplugged CPU from the hotplug thread */
 	CPUHP_AP_ONLINE_IDLE,
 	CPUHP_AP_SCHED_WAIT_EMPTY,
 	CPUHP_AP_SMPBOOT_THREADS,
@@ -215,14 +257,15 @@ int __cpuhp_setup_state_cpuslocked(enum cpuhp_state state, const char *name,
 				   int (*teardown)(unsigned int cpu),
 				   bool multi_instance);
 /**
- * cpuhp_setup_state - Setup hotplug state callbacks with calling the callbacks
+ * cpuhp_setup_state - Setup hotplug state callbacks with calling the @startup
+ *                     callback
  * @state:	The state for which the calls are installed
  * @name:	Name of the callback (will be used in debug output)
- * @startup:	startup callback function
- * @teardown:	teardown callback function
+ * @startup:	startup callback function or NULL if not required
+ * @teardown:	teardown callback function or NULL if not required
  *
- * Installs the callback functions and invokes the startup callback on
- * the present cpus which have already reached the @state.
+ * Installs the callback functions and invokes the @startup callback on
+ * the online cpus which have already reached the @state.
  */
 static inline int cpuhp_setup_state(enum cpuhp_state state,
 				    const char *name,
@@ -232,6 +275,18 @@ static inline int cpuhp_setup_state(enum cpuhp_state state,
 	return __cpuhp_setup_state(state, name, true, startup, teardown, false);
 }
 
+/**
+ * cpuhp_setup_state_cpuslocked - Setup hotplug state callbacks with calling
+ *				  @startup callback from a cpus_read_lock()
+ *				  held region
+ * @state:	The state for which the calls are installed
+ * @name:	Name of the callback (will be used in debug output)
+ * @startup:	startup callback function or NULL if not required
+ * @teardown:	teardown callback function or NULL if not required
+ *
+ * Same as cpuhp_setup_state() except that it must be invoked from within a
+ * cpus_read_lock() held region.
+ */
 static inline int cpuhp_setup_state_cpuslocked(enum cpuhp_state state,
 					       const char *name,
 					       int (*startup)(unsigned int cpu),
@@ -243,14 +298,14 @@ static inline int cpuhp_setup_state_cpuslocked(enum cpuhp_state state,
 
 /**
  * cpuhp_setup_state_nocalls - Setup hotplug state callbacks without calling the
- *			       callbacks
+ *			       @startup callback
  * @state:	The state for which the calls are installed
  * @name:	Name of the callback.
- * @startup:	startup callback function
- * @teardown:	teardown callback function
+ * @startup:	startup callback function or NULL if not required
+ * @teardown:	teardown callback function or NULL if not required
  *
- * Same as @cpuhp_setup_state except that no calls are executed are invoked
- * during installation of this callback. NOP if SMP=n or HOTPLUG_CPU=n.
+ * Same as cpuhp_setup_state() except that the @startup callback is not
+ * invoked during installation. NOP if SMP=n or HOTPLUG_CPU=n.
  */
 static inline int cpuhp_setup_state_nocalls(enum cpuhp_state state,
 					    const char *name,
@@ -261,6 +316,19 @@ static inline int cpuhp_setup_state_nocalls(enum cpuhp_state state,
 				   false);
 }
 
+/**
+ * cpuhp_setup_state_nocalls_cpuslocked - Setup hotplug state callbacks without
+ *					  invoking the @startup callback from
+ *					  a cpus_read_lock() held region
+ *			       callbacks
+ * @state:	The state for which the calls are installed
+ * @name:	Name of the callback.
+ * @startup:	startup callback function or NULL if not required
+ * @teardown:	teardown callback function or NULL if not required
+ *
+ * Same as cpuhp_setup_state_nocalls() except that it must be invoked from
+ * within a cpus_read_lock() held region.
+ */
 static inline int cpuhp_setup_state_nocalls_cpuslocked(enum cpuhp_state state,
 						     const char *name,
 						     int (*startup)(unsigned int cpu),
@@ -274,13 +342,13 @@ static inline int cpuhp_setup_state_nocalls_cpuslocked(enum cpuhp_state state,
  * cpuhp_setup_state_multi - Add callbacks for multi state
  * @state:	The state for which the calls are installed
  * @name:	Name of the callback.
- * @startup:	startup callback function
- * @teardown:	teardown callback function
+ * @startup:	startup callback function or NULL if not required
+ * @teardown:	teardown callback function or NULL if not required
  *
  * Sets the internal multi_instance flag and prepares a state to work as a multi
  * instance callback. No callbacks are invoked at this point. The callbacks are
  * invoked once an instance for this state are registered via
- * @cpuhp_state_add_instance or @cpuhp_state_add_instance_nocalls.
+ * cpuhp_state_add_instance() or cpuhp_state_add_instance_nocalls()
  */
 static inline int cpuhp_setup_state_multi(enum cpuhp_state state,
 					  const char *name,
@@ -305,9 +373,10 @@ int __cpuhp_state_add_instance_cpuslocked(enum cpuhp_state state,
  * @state:	The state for which the instance is installed
  * @node:	The node for this individual state.
  *
- * Installs the instance for the @state and invokes the startup callback on
- * the present cpus which have already reached the @state. The @state must have
- * been earlier marked as multi-instance by @cpuhp_setup_state_multi.
+ * Installs the instance for the @state and invokes the registered startup
+ * callback on the online cpus which have already reached the @state. The
+ * @state must have been earlier marked as multi-instance by
+ * cpuhp_setup_state_multi().
  */
 static inline int cpuhp_state_add_instance(enum cpuhp_state state,
 					   struct hlist_node *node)
@@ -321,8 +390,9 @@ static inline int cpuhp_state_add_instance(enum cpuhp_state state,
  * @state:	The state for which the instance is installed
  * @node:	The node for this individual state.
  *
- * Installs the instance for the @state The @state must have been earlier
- * marked as multi-instance by @cpuhp_setup_state_multi.
+ * Installs the instance for the @state. The @state must have been earlier
+ * marked as multi-instance by cpuhp_setup_state_multi. NOP if SMP=n or
+ * HOTPLUG_CPU=n.
  */
 static inline int cpuhp_state_add_instance_nocalls(enum cpuhp_state state,
 						   struct hlist_node *node)
@@ -330,6 +400,17 @@ static inline int cpuhp_state_add_instance_nocalls(enum cpuhp_state state,
 	return __cpuhp_state_add_instance(state, node, false);
 }
 
+/**
+ * cpuhp_state_add_instance_nocalls_cpuslocked - Add an instance for a state
+ *						 without invoking the startup
+ *						 callback from a cpus_read_lock()
+ *						 held region.
+ * @state:	The state for which the instance is installed
+ * @node:	The node for this individual state.
+ *
+ * Same as cpuhp_state_add_instance_nocalls() except that it must be
+ * invoked from within a cpus_read_lock() held region.
+ */
 static inline int
 cpuhp_state_add_instance_nocalls_cpuslocked(enum cpuhp_state state,
 					    struct hlist_node *node)
@@ -345,7 +426,7 @@ void __cpuhp_remove_state_cpuslocked(enum cpuhp_state state, bool invoke);
  * @state:	The state for which the calls are removed
  *
  * Removes the callback functions and invokes the teardown callback on
- * the present cpus which have already reached the @state.
+ * the online cpus which have already reached the @state.
  */
 static inline void cpuhp_remove_state(enum cpuhp_state state)
 {
@@ -354,7 +435,7 @@ static inline void cpuhp_remove_state(enum cpuhp_state state)
 
 /**
  * cpuhp_remove_state_nocalls - Remove hotplug state callbacks without invoking
- *				teardown
+ *				the teardown callback
  * @state:	The state for which the calls are removed
  */
 static inline void cpuhp_remove_state_nocalls(enum cpuhp_state state)
@@ -362,6 +443,14 @@ static inline void cpuhp_remove_state_nocalls(enum cpuhp_state state)
 	__cpuhp_remove_state(state, false);
 }
 
+/**
+ * cpuhp_remove_state_nocalls_cpuslocked - Remove hotplug state callbacks without invoking
+ *					   teardown from a cpus_read_lock() held region.
+ * @state:	The state for which the calls are removed
+ *
+ * Same as cpuhp_remove_state nocalls() except that it must be invoked
+ * from within a cpus_read_lock() held region.
+ */
 static inline void cpuhp_remove_state_nocalls_cpuslocked(enum cpuhp_state state)
 {
 	__cpuhp_remove_state_cpuslocked(state, false);
@@ -389,8 +478,8 @@ int __cpuhp_state_remove_instance(enum cpuhp_state state,
  * @state:	The state from which the instance is removed
  * @node:	The node for this individual state.
  *
- * Removes the instance and invokes the teardown callback on the present cpus
- * which have already reached the @state.
+ * Removes the instance and invokes the teardown callback on the online cpus
+ * which have already reached @state.
  */
 static inline int cpuhp_state_remove_instance(enum cpuhp_state state,
 					      struct hlist_node *node)
diff --git a/include/linux/damon.h b/include/linux/damon.h
new file mode 100644
index 000000000000..d68b67b8d458
--- /dev/null
+++ b/include/linux/damon.h
@@ -0,0 +1,268 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/*
+ * DAMON api
+ *
+ * Author: SeongJae Park <sjpark@amazon.de>
+ */
+
+#ifndef _DAMON_H_
+#define _DAMON_H_
+
+#include <linux/mutex.h>
+#include <linux/time64.h>
+#include <linux/types.h>
+
+/* Minimal region size.  Every damon_region is aligned by this. */
+#define DAMON_MIN_REGION	PAGE_SIZE
+
+/**
+ * struct damon_addr_range - Represents an address region of [@start, @end).
+ * @start:	Start address of the region (inclusive).
+ * @end:	End address of the region (exclusive).
+ */
+struct damon_addr_range {
+	unsigned long start;
+	unsigned long end;
+};
+
+/**
+ * struct damon_region - Represents a monitoring target region.
+ * @ar:			The address range of the region.
+ * @sampling_addr:	Address of the sample for the next access check.
+ * @nr_accesses:	Access frequency of this region.
+ * @list:		List head for siblings.
+ */
+struct damon_region {
+	struct damon_addr_range ar;
+	unsigned long sampling_addr;
+	unsigned int nr_accesses;
+	struct list_head list;
+};
+
+/**
+ * struct damon_target - Represents a monitoring target.
+ * @id:			Unique identifier for this target.
+ * @nr_regions:		Number of monitoring target regions of this target.
+ * @regions_list:	Head of the monitoring target regions of this target.
+ * @list:		List head for siblings.
+ *
+ * Each monitoring context could have multiple targets.  For example, a context
+ * for virtual memory address spaces could have multiple target processes.  The
+ * @id of each target should be unique among the targets of the context.  For
+ * example, in the virtual address monitoring context, it could be a pidfd or
+ * an address of an mm_struct.
+ */
+struct damon_target {
+	unsigned long id;
+	unsigned int nr_regions;
+	struct list_head regions_list;
+	struct list_head list;
+};
+
+struct damon_ctx;
+
+/**
+ * struct damon_primitive	Monitoring primitives for given use cases.
+ *
+ * @init:			Initialize primitive-internal data structures.
+ * @update:			Update primitive-internal data structures.
+ * @prepare_access_checks:	Prepare next access check of target regions.
+ * @check_accesses:		Check the accesses to target regions.
+ * @reset_aggregated:		Reset aggregated accesses monitoring results.
+ * @target_valid:		Determine if the target is valid.
+ * @cleanup:			Clean up the context.
+ *
+ * DAMON can be extended for various address spaces and usages.  For this,
+ * users should register the low level primitives for their target address
+ * space and usecase via the &damon_ctx.primitive.  Then, the monitoring thread
+ * (&damon_ctx.kdamond) calls @init and @prepare_access_checks before starting
+ * the monitoring, @update after each &damon_ctx.primitive_update_interval, and
+ * @check_accesses, @target_valid and @prepare_access_checks after each
+ * &damon_ctx.sample_interval.  Finally, @reset_aggregated is called after each
+ * &damon_ctx.aggr_interval.
+ *
+ * @init should initialize primitive-internal data structures.  For example,
+ * this could be used to construct proper monitoring target regions and link
+ * those to @damon_ctx.adaptive_targets.
+ * @update should update the primitive-internal data structures.  For example,
+ * this could be used to update monitoring target regions for current status.
+ * @prepare_access_checks should manipulate the monitoring regions to be
+ * prepared for the next access check.
+ * @check_accesses should check the accesses to each region that made after the
+ * last preparation and update the number of observed accesses of each region.
+ * It should also return max number of observed accesses that made as a result
+ * of its update.  The value will be used for regions adjustment threshold.
+ * @reset_aggregated should reset the access monitoring results that aggregated
+ * by @check_accesses.
+ * @target_valid should check whether the target is still valid for the
+ * monitoring.
+ * @cleanup is called from @kdamond just before its termination.
+ */
+struct damon_primitive {
+	void (*init)(struct damon_ctx *context);
+	void (*update)(struct damon_ctx *context);
+	void (*prepare_access_checks)(struct damon_ctx *context);
+	unsigned int (*check_accesses)(struct damon_ctx *context);
+	void (*reset_aggregated)(struct damon_ctx *context);
+	bool (*target_valid)(void *target);
+	void (*cleanup)(struct damon_ctx *context);
+};
+
+/*
+ * struct damon_callback	Monitoring events notification callbacks.
+ *
+ * @before_start:	Called before starting the monitoring.
+ * @after_sampling:	Called after each sampling.
+ * @after_aggregation:	Called after each aggregation.
+ * @before_terminate:	Called before terminating the monitoring.
+ * @private:		User private data.
+ *
+ * The monitoring thread (&damon_ctx.kdamond) calls @before_start and
+ * @before_terminate just before starting and finishing the monitoring,
+ * respectively.  Therefore, those are good places for installing and cleaning
+ * @private.
+ *
+ * The monitoring thread calls @after_sampling and @after_aggregation for each
+ * of the sampling intervals and aggregation intervals, respectively.
+ * Therefore, users can safely access the monitoring results without additional
+ * protection.  For the reason, users are recommended to use these callback for
+ * the accesses to the results.
+ *
+ * If any callback returns non-zero, monitoring stops.
+ */
+struct damon_callback {
+	void *private;
+
+	int (*before_start)(struct damon_ctx *context);
+	int (*after_sampling)(struct damon_ctx *context);
+	int (*after_aggregation)(struct damon_ctx *context);
+	int (*before_terminate)(struct damon_ctx *context);
+};
+
+/**
+ * struct damon_ctx - Represents a context for each monitoring.  This is the
+ * main interface that allows users to set the attributes and get the results
+ * of the monitoring.
+ *
+ * @sample_interval:		The time between access samplings.
+ * @aggr_interval:		The time between monitor results aggregations.
+ * @primitive_update_interval:	The time between monitoring primitive updates.
+ *
+ * For each @sample_interval, DAMON checks whether each region is accessed or
+ * not.  It aggregates and keeps the access information (number of accesses to
+ * each region) for @aggr_interval time.  DAMON also checks whether the target
+ * memory regions need update (e.g., by ``mmap()`` calls from the application,
+ * in case of virtual memory monitoring) and applies the changes for each
+ * @primitive_update_interval.  All time intervals are in micro-seconds.
+ * Please refer to &struct damon_primitive and &struct damon_callback for more
+ * detail.
+ *
+ * @kdamond:		Kernel thread who does the monitoring.
+ * @kdamond_stop:	Notifies whether kdamond should stop.
+ * @kdamond_lock:	Mutex for the synchronizations with @kdamond.
+ *
+ * For each monitoring context, one kernel thread for the monitoring is
+ * created.  The pointer to the thread is stored in @kdamond.
+ *
+ * Once started, the monitoring thread runs until explicitly required to be
+ * terminated or every monitoring target is invalid.  The validity of the
+ * targets is checked via the &damon_primitive.target_valid of @primitive.  The
+ * termination can also be explicitly requested by writing non-zero to
+ * @kdamond_stop.  The thread sets @kdamond to NULL when it terminates.
+ * Therefore, users can know whether the monitoring is ongoing or terminated by
+ * reading @kdamond.  Reads and writes to @kdamond and @kdamond_stop from
+ * outside of the monitoring thread must be protected by @kdamond_lock.
+ *
+ * Note that the monitoring thread protects only @kdamond and @kdamond_stop via
+ * @kdamond_lock.  Accesses to other fields must be protected by themselves.
+ *
+ * @primitive:	Set of monitoring primitives for given use cases.
+ * @callback:	Set of callbacks for monitoring events notifications.
+ *
+ * @min_nr_regions:	The minimum number of adaptive monitoring regions.
+ * @max_nr_regions:	The maximum number of adaptive monitoring regions.
+ * @adaptive_targets:	Head of monitoring targets (&damon_target) list.
+ */
+struct damon_ctx {
+	unsigned long sample_interval;
+	unsigned long aggr_interval;
+	unsigned long primitive_update_interval;
+
+/* private: internal use only */
+	struct timespec64 last_aggregation;
+	struct timespec64 last_primitive_update;
+
+/* public: */
+	struct task_struct *kdamond;
+	bool kdamond_stop;
+	struct mutex kdamond_lock;
+
+	struct damon_primitive primitive;
+	struct damon_callback callback;
+
+	unsigned long min_nr_regions;
+	unsigned long max_nr_regions;
+	struct list_head adaptive_targets;
+};
+
+#define damon_next_region(r) \
+	(container_of(r->list.next, struct damon_region, list))
+
+#define damon_prev_region(r) \
+	(container_of(r->list.prev, struct damon_region, list))
+
+#define damon_for_each_region(r, t) \
+	list_for_each_entry(r, &t->regions_list, list)
+
+#define damon_for_each_region_safe(r, next, t) \
+	list_for_each_entry_safe(r, next, &t->regions_list, list)
+
+#define damon_for_each_target(t, ctx) \
+	list_for_each_entry(t, &(ctx)->adaptive_targets, list)
+
+#define damon_for_each_target_safe(t, next, ctx)	\
+	list_for_each_entry_safe(t, next, &(ctx)->adaptive_targets, list)
+
+#ifdef CONFIG_DAMON
+
+struct damon_region *damon_new_region(unsigned long start, unsigned long end);
+inline void damon_insert_region(struct damon_region *r,
+		struct damon_region *prev, struct damon_region *next,
+		struct damon_target *t);
+void damon_add_region(struct damon_region *r, struct damon_target *t);
+void damon_destroy_region(struct damon_region *r, struct damon_target *t);
+
+struct damon_target *damon_new_target(unsigned long id);
+void damon_add_target(struct damon_ctx *ctx, struct damon_target *t);
+void damon_free_target(struct damon_target *t);
+void damon_destroy_target(struct damon_target *t);
+unsigned int damon_nr_regions(struct damon_target *t);
+
+struct damon_ctx *damon_new_ctx(void);
+void damon_destroy_ctx(struct damon_ctx *ctx);
+int damon_set_targets(struct damon_ctx *ctx,
+		unsigned long *ids, ssize_t nr_ids);
+int damon_set_attrs(struct damon_ctx *ctx, unsigned long sample_int,
+		unsigned long aggr_int, unsigned long primitive_upd_int,
+		unsigned long min_nr_reg, unsigned long max_nr_reg);
+int damon_nr_running_ctxs(void);
+
+int damon_start(struct damon_ctx **ctxs, int nr_ctxs);
+int damon_stop(struct damon_ctx **ctxs, int nr_ctxs);
+
+#endif	/* CONFIG_DAMON */
+
+#ifdef CONFIG_DAMON_VADDR
+
+/* Monitoring primitives for virtual memory address spaces */
+void damon_va_init(struct damon_ctx *ctx);
+void damon_va_update(struct damon_ctx *ctx);
+void damon_va_prepare_access_checks(struct damon_ctx *ctx);
+unsigned int damon_va_check_accesses(struct damon_ctx *ctx);
+bool damon_va_target_valid(void *t);
+void damon_va_cleanup(struct damon_ctx *ctx);
+void damon_va_set_primitives(struct damon_ctx *ctx);
+
+#endif	/* CONFIG_DAMON_VADDR */
+
+#endif	/* _DAMON_H */
diff --git a/include/linux/dax.h b/include/linux/dax.h
index b52f084aa643..2619d94c308d 100644
--- a/include/linux/dax.h
+++ b/include/linux/dax.h
@@ -41,7 +41,6 @@ struct dax_operations {
 extern struct attribute_group dax_attribute_group;
 
 #if IS_ENABLED(CONFIG_DAX)
-struct dax_device *dax_get_by_host(const char *host);
 struct dax_device *alloc_dax(void *private, const char *host,
 		const struct dax_operations *ops, unsigned long flags);
 void put_dax(struct dax_device *dax_dev);
@@ -58,8 +57,6 @@ static inline void set_dax_synchronous(struct dax_device *dax_dev)
 {
 	__set_dax_synchronous(dax_dev);
 }
-bool dax_supported(struct dax_device *dax_dev, struct block_device *bdev,
-		int blocksize, sector_t start, sector_t len);
 /*
  * Check if given mapping is supported by the file / underlying device.
  */
@@ -73,10 +70,6 @@ static inline bool daxdev_mapping_supported(struct vm_area_struct *vma,
 	return dax_synchronous(dax_dev);
 }
 #else
-static inline struct dax_device *dax_get_by_host(const char *host)
-{
-	return NULL;
-}
 static inline struct dax_device *alloc_dax(void *private, const char *host,
 		const struct dax_operations *ops, unsigned long flags)
 {
@@ -106,12 +99,6 @@ static inline bool dax_synchronous(struct dax_device *dax_dev)
 static inline void set_dax_synchronous(struct dax_device *dax_dev)
 {
 }
-static inline bool dax_supported(struct dax_device *dax_dev,
-		struct block_device *bdev, int blocksize, sector_t start,
-		sector_t len)
-{
-	return false;
-}
 static inline bool daxdev_mapping_supported(struct vm_area_struct *vma,
 				struct dax_device *dax_dev)
 {
@@ -122,22 +109,12 @@ static inline bool daxdev_mapping_supported(struct vm_area_struct *vma,
 struct writeback_control;
 int bdev_dax_pgoff(struct block_device *, sector_t, size_t, pgoff_t *pgoff);
 #if IS_ENABLED(CONFIG_FS_DAX)
-bool __bdev_dax_supported(struct block_device *bdev, int blocksize);
-static inline bool bdev_dax_supported(struct block_device *bdev, int blocksize)
-{
-	return __bdev_dax_supported(bdev, blocksize);
-}
-
-bool __generic_fsdax_supported(struct dax_device *dax_dev,
+bool generic_fsdax_supported(struct dax_device *dax_dev,
 		struct block_device *bdev, int blocksize, sector_t start,
 		sector_t sectors);
-static inline bool generic_fsdax_supported(struct dax_device *dax_dev,
-		struct block_device *bdev, int blocksize, sector_t start,
-		sector_t sectors)
-{
-	return __generic_fsdax_supported(dax_dev, bdev, blocksize, start,
-			sectors);
-}
+
+bool dax_supported(struct dax_device *dax_dev, struct block_device *bdev,
+		int blocksize, sector_t start, sector_t len);
 
 static inline void fs_put_dax(struct dax_device *dax_dev)
 {
@@ -153,15 +130,11 @@ struct page *dax_layout_busy_page_range(struct address_space *mapping, loff_t st
 dax_entry_t dax_lock_page(struct page *page);
 void dax_unlock_page(struct page *page, dax_entry_t cookie);
 #else
-static inline bool bdev_dax_supported(struct block_device *bdev,
-		int blocksize)
-{
-	return false;
-}
+#define generic_fsdax_supported		NULL
 
-static inline bool generic_fsdax_supported(struct dax_device *dax_dev,
+static inline bool dax_supported(struct dax_device *dax_dev,
 		struct block_device *bdev, int blocksize, sector_t start,
-		sector_t sectors)
+		sector_t len)
 {
 	return false;
 }
diff --git a/include/linux/device.h b/include/linux/device.h
index 65d84b67b024..e270cb740b9e 100644
--- a/include/linux/device.h
+++ b/include/linux/device.h
@@ -424,6 +424,7 @@ struct dev_links_info {
  * @dma_pools:	Dma pools (if dma'ble device).
  * @dma_mem:	Internal for coherent mem override.
  * @cma_area:	Contiguous memory area for dma allocations
+ * @dma_io_tlb_mem: Pointer to the swiotlb pool used.  Not for driver use.
  * @archdata:	For arch-specific additions.
  * @of_node:	Associated device tree node.
  * @fwnode:	Associated device node supplied by platform firmware.
@@ -534,6 +535,9 @@ struct device {
 	struct cma *cma_area;		/* contiguous memory area for dma
 					   allocations */
 #endif
+#ifdef CONFIG_SWIOTLB
+	struct io_tlb_mem *dma_io_tlb_mem;
+#endif
 	/* arch specific additions */
 	struct dev_archdata	archdata;
 
diff --git a/include/linux/dma-iommu.h b/include/linux/dma-iommu.h
index 758ca4694257..24607dc3c2ac 100644
--- a/include/linux/dma-iommu.h
+++ b/include/linux/dma-iommu.h
@@ -20,6 +20,7 @@ void iommu_put_dma_cookie(struct iommu_domain *domain);
 
 /* Setup call for arch DMA mapping code */
 void iommu_setup_dma_ops(struct device *dev, u64 dma_base, u64 dma_limit);
+int iommu_dma_init_fq(struct iommu_domain *domain);
 
 /* The DMA API isn't _quite_ the whole story, though... */
 /*
@@ -54,6 +55,11 @@ static inline void iommu_setup_dma_ops(struct device *dev, u64 dma_base,
 {
 }
 
+static inline int iommu_dma_init_fq(struct iommu_domain *domain)
+{
+	return -EINVAL;
+}
+
 static inline int iommu_get_dma_cookie(struct iommu_domain *domain)
 {
 	return -ENODEV;
diff --git a/include/linux/dma-map-ops.h b/include/linux/dma-map-ops.h
index 0d53a96a3d64..0d5b06b3a4a6 100644
--- a/include/linux/dma-map-ops.h
+++ b/include/linux/dma-map-ops.h
@@ -41,8 +41,9 @@ struct dma_map_ops {
 			size_t size, enum dma_data_direction dir,
 			unsigned long attrs);
 	/*
-	 * map_sg returns 0 on error and a value > 0 on success.
-	 * It should never return a value < 0.
+	 * map_sg should return a negative error code on error. See
+	 * dma_map_sgtable() for a list of appropriate error codes
+	 * and their meanings.
 	 */
 	int (*map_sg)(struct device *dev, struct scatterlist *sg, int nents,
 			enum dma_data_direction dir, unsigned long attrs);
@@ -170,13 +171,6 @@ int dma_alloc_from_dev_coherent(struct device *dev, ssize_t size,
 int dma_release_from_dev_coherent(struct device *dev, int order, void *vaddr);
 int dma_mmap_from_dev_coherent(struct device *dev, struct vm_area_struct *vma,
 		void *cpu_addr, size_t size, int *ret);
-
-void *dma_alloc_from_global_coherent(struct device *dev, ssize_t size,
-		dma_addr_t *dma_handle);
-int dma_release_from_global_coherent(int order, void *vaddr);
-int dma_mmap_from_global_coherent(struct vm_area_struct *vma, void *cpu_addr,
-		size_t size, int *ret);
-
 #else
 static inline int dma_declare_coherent_memory(struct device *dev,
 		phys_addr_t phys_addr, dma_addr_t device_addr, size_t size)
@@ -186,7 +180,16 @@ static inline int dma_declare_coherent_memory(struct device *dev,
 #define dma_alloc_from_dev_coherent(dev, size, handle, ret) (0)
 #define dma_release_from_dev_coherent(dev, order, vaddr) (0)
 #define dma_mmap_from_dev_coherent(dev, vma, vaddr, order, ret) (0)
+#endif /* CONFIG_DMA_DECLARE_COHERENT */
 
+#ifdef CONFIG_DMA_GLOBAL_POOL
+void *dma_alloc_from_global_coherent(struct device *dev, ssize_t size,
+		dma_addr_t *dma_handle);
+int dma_release_from_global_coherent(int order, void *vaddr);
+int dma_mmap_from_global_coherent(struct vm_area_struct *vma, void *cpu_addr,
+		size_t size, int *ret);
+int dma_init_global_coherent(phys_addr_t phys_addr, size_t size);
+#else
 static inline void *dma_alloc_from_global_coherent(struct device *dev,
 		ssize_t size, dma_addr_t *dma_handle)
 {
@@ -201,7 +204,7 @@ static inline int dma_mmap_from_global_coherent(struct vm_area_struct *vma,
 {
 	return 0;
 }
-#endif /* CONFIG_DMA_DECLARE_COHERENT */
+#endif /* CONFIG_DMA_GLOBAL_POOL */
 
 /*
  * This is the actual return value from the ->alloc_noncontiguous method.
diff --git a/include/linux/dma-mapping.h b/include/linux/dma-mapping.h
index 183e7103a66d..dca2b1355bb1 100644
--- a/include/linux/dma-mapping.h
+++ b/include/linux/dma-mapping.h
@@ -105,11 +105,13 @@ dma_addr_t dma_map_page_attrs(struct device *dev, struct page *page,
 		unsigned long attrs);
 void dma_unmap_page_attrs(struct device *dev, dma_addr_t addr, size_t size,
 		enum dma_data_direction dir, unsigned long attrs);
-int dma_map_sg_attrs(struct device *dev, struct scatterlist *sg, int nents,
-		enum dma_data_direction dir, unsigned long attrs);
+unsigned int dma_map_sg_attrs(struct device *dev, struct scatterlist *sg,
+		int nents, enum dma_data_direction dir, unsigned long attrs);
 void dma_unmap_sg_attrs(struct device *dev, struct scatterlist *sg,
 				      int nents, enum dma_data_direction dir,
 				      unsigned long attrs);
+int dma_map_sgtable(struct device *dev, struct sg_table *sgt,
+		enum dma_data_direction dir, unsigned long attrs);
 dma_addr_t dma_map_resource(struct device *dev, phys_addr_t phys_addr,
 		size_t size, enum dma_data_direction dir, unsigned long attrs);
 void dma_unmap_resource(struct device *dev, dma_addr_t addr, size_t size,
@@ -164,8 +166,9 @@ static inline void dma_unmap_page_attrs(struct device *dev, dma_addr_t addr,
 		size_t size, enum dma_data_direction dir, unsigned long attrs)
 {
 }
-static inline int dma_map_sg_attrs(struct device *dev, struct scatterlist *sg,
-		int nents, enum dma_data_direction dir, unsigned long attrs)
+static inline unsigned int dma_map_sg_attrs(struct device *dev,
+		struct scatterlist *sg, int nents, enum dma_data_direction dir,
+		unsigned long attrs)
 {
 	return 0;
 }
@@ -174,6 +177,11 @@ static inline void dma_unmap_sg_attrs(struct device *dev,
 		unsigned long attrs)
 {
 }
+static inline int dma_map_sgtable(struct device *dev, struct sg_table *sgt,
+		enum dma_data_direction dir, unsigned long attrs)
+{
+	return -EOPNOTSUPP;
+}
 static inline dma_addr_t dma_map_resource(struct device *dev,
 		phys_addr_t phys_addr, size_t size, enum dma_data_direction dir,
 		unsigned long attrs)
@@ -344,34 +352,6 @@ static inline void dma_sync_single_range_for_device(struct device *dev,
 }
 
 /**
- * dma_map_sgtable - Map the given buffer for DMA
- * @dev:	The device for which to perform the DMA operation
- * @sgt:	The sg_table object describing the buffer
- * @dir:	DMA direction
- * @attrs:	Optional DMA attributes for the map operation
- *
- * Maps a buffer described by a scatterlist stored in the given sg_table
- * object for the @dir DMA operation by the @dev device. After success the
- * ownership for the buffer is transferred to the DMA domain.  One has to
- * call dma_sync_sgtable_for_cpu() or dma_unmap_sgtable() to move the
- * ownership of the buffer back to the CPU domain before touching the
- * buffer by the CPU.
- *
- * Returns 0 on success or -EINVAL on error during mapping the buffer.
- */
-static inline int dma_map_sgtable(struct device *dev, struct sg_table *sgt,
-		enum dma_data_direction dir, unsigned long attrs)
-{
-	int nents;
-
-	nents = dma_map_sg_attrs(dev, sgt->sgl, sgt->orig_nents, dir, attrs);
-	if (nents <= 0)
-		return -EINVAL;
-	sgt->nents = nents;
-	return 0;
-}
-
-/**
  * dma_unmap_sgtable - Unmap the given buffer for DMA
  * @dev:	The device for which to perform the DMA operation
  * @sgt:	The sg_table object describing the buffer
diff --git a/include/linux/dmaengine.h b/include/linux/dmaengine.h
index 93c3ca5fdafd..e5c2c9e71bf1 100644
--- a/include/linux/dmaengine.h
+++ b/include/linux/dmaengine.h
@@ -380,6 +380,7 @@ enum dma_slave_buswidth {
 	DMA_SLAVE_BUSWIDTH_16_BYTES = 16,
 	DMA_SLAVE_BUSWIDTH_32_BYTES = 32,
 	DMA_SLAVE_BUSWIDTH_64_BYTES = 64,
+	DMA_SLAVE_BUSWIDTH_128_BYTES = 128,
 };
 
 /**
@@ -398,7 +399,7 @@ enum dma_slave_buswidth {
  * @src_addr_width: this is the width in bytes of the source (RX)
  * register where DMA data shall be read. If the source
  * is memory this may be ignored depending on architecture.
- * Legal values: 1, 2, 3, 4, 8, 16, 32, 64.
+ * Legal values: 1, 2, 3, 4, 8, 16, 32, 64, 128.
  * @dst_addr_width: same as src_addr_width but for destination
  * target (TX) mutatis mutandis.
  * @src_maxburst: the maximum number of words (note: words, as in
diff --git a/include/linux/energy_model.h b/include/linux/energy_model.h
index 1834752c5617..39dcadd492b5 100644
--- a/include/linux/energy_model.h
+++ b/include/linux/energy_model.h
@@ -11,7 +11,7 @@
 #include <linux/types.h>
 
 /**
- * em_perf_state - Performance state of a performance domain
+ * struct em_perf_state - Performance state of a performance domain
  * @frequency:	The frequency in KHz, for consistency with CPUFreq
  * @power:	The power consumed at this level (by 1 CPU or by a registered
  *		device). It can be a total power: static and dynamic.
@@ -25,7 +25,7 @@ struct em_perf_state {
 };
 
 /**
- * em_perf_domain - Performance domain
+ * struct em_perf_domain - Performance domain
  * @table:		List of performance states, in ascending order
  * @nr_perf_states:	Number of performance states
  * @milliwatts:		Flag indicating the power values are in milli-Watts
@@ -103,12 +103,12 @@ void em_dev_unregister_perf_domain(struct device *dev);
 
 /**
  * em_cpu_energy() - Estimates the energy consumed by the CPUs of a
-		performance domain
+ *		performance domain
  * @pd		: performance domain for which energy has to be estimated
  * @max_util	: highest utilization among CPUs of the domain
  * @sum_util	: sum of the utilization of all CPUs in the domain
  * @allowed_cpu_cap	: maximum allowed CPU capacity for the @pd, which
-			  might reflect reduced frequency (due to thermal)
+ *			  might reflect reduced frequency (due to thermal)
  *
  * This function must be used only for CPU devices. There is no validation,
  * i.e. if the EM is a CPU type and has cpumask allocated. It is called from
diff --git a/include/linux/entry-kvm.h b/include/linux/entry-kvm.h
index 136b8d97d8c0..0d7865a0731c 100644
--- a/include/linux/entry-kvm.h
+++ b/include/linux/entry-kvm.h
@@ -2,7 +2,11 @@
 #ifndef __LINUX_ENTRYKVM_H
 #define __LINUX_ENTRYKVM_H
 
-#include <linux/entry-common.h>
+#include <linux/static_call_types.h>
+#include <linux/tracehook.h>
+#include <linux/syscalls.h>
+#include <linux/seccomp.h>
+#include <linux/sched.h>
 #include <linux/tick.h>
 
 /* Transfer to guest mode work */
diff --git a/include/linux/etherdevice.h b/include/linux/etherdevice.h
index 330345b1be54..928c411bd509 100644
--- a/include/linux/etherdevice.h
+++ b/include/linux/etherdevice.h
@@ -300,6 +300,18 @@ static inline void ether_addr_copy(u8 *dst, const u8 *src)
 }
 
 /**
+ * eth_hw_addr_set - Assign Ethernet address to a net_device
+ * @dev: pointer to net_device structure
+ * @addr: address to assign
+ *
+ * Assign given address to the net_device, addr_assign_type is not changed.
+ */
+static inline void eth_hw_addr_set(struct net_device *dev, const u8 *addr)
+{
+	ether_addr_copy(dev->dev_addr, addr);
+}
+
+/**
  * eth_hw_addr_inherit - Copy dev_addr from another net_device
  * @dst: pointer to net_device to copy dev_addr to
  * @src: pointer to net_device to copy dev_addr from
diff --git a/include/linux/eventpoll.h b/include/linux/eventpoll.h
index 593322c946e6..3337745d81bd 100644
--- a/include/linux/eventpoll.h
+++ b/include/linux/eventpoll.h
@@ -68,4 +68,22 @@ static inline void eventpoll_release(struct file *file) {}
 
 #endif
 
+#if defined(CONFIG_ARM) && defined(CONFIG_OABI_COMPAT)
+/* ARM OABI has an incompatible struct layout and needs a special handler */
+extern struct epoll_event __user *
+epoll_put_uevent(__poll_t revents, __u64 data,
+		 struct epoll_event __user *uevent);
+#else
+static inline struct epoll_event __user *
+epoll_put_uevent(__poll_t revents, __u64 data,
+		 struct epoll_event __user *uevent)
+{
+	if (__put_user(revents, &uevent->events) ||
+	    __put_user(data, &uevent->data))
+		return NULL;
+
+	return uevent+1;
+}
+#endif
+
 #endif /* #ifndef _LINUX_EVENTPOLL_H */
diff --git a/include/linux/file.h b/include/linux/file.h
index 2de2e4613d7b..51e830b4fe3a 100644
--- a/include/linux/file.h
+++ b/include/linux/file.h
@@ -94,6 +94,9 @@ extern void fd_install(unsigned int fd, struct file *file);
 
 extern int __receive_fd(struct file *file, int __user *ufd,
 			unsigned int o_flags);
+
+extern int receive_fd(struct file *file, unsigned int o_flags);
+
 static inline int receive_fd_user(struct file *file, int __user *ufd,
 				  unsigned int o_flags)
 {
@@ -101,10 +104,6 @@ static inline int receive_fd_user(struct file *file, int __user *ufd,
 		return -EFAULT;
 	return __receive_fd(file, ufd, o_flags);
 }
-static inline int receive_fd(struct file *file, unsigned int o_flags)
-{
-	return __receive_fd(file, NULL, o_flags);
-}
 int receive_fd_replace(int new_fd, struct file *file, unsigned int o_flags);
 
 extern void flush_delayed_fput(void);
diff --git a/include/linux/filter.h b/include/linux/filter.h
index 7d248941ecea..4a93c12543ee 100644
--- a/include/linux/filter.h
+++ b/include/linux/filter.h
@@ -5,8 +5,6 @@
 #ifndef __LINUX_FILTER_H__
 #define __LINUX_FILTER_H__
 
-#include <stdarg.h>
-
 #include <linux/atomic.h>
 #include <linux/refcount.h>
 #include <linux/compat.h>
diff --git a/include/linux/fs.h b/include/linux/fs.h
index 1c01f9f2b574..e7a633353fd2 100644
--- a/include/linux/fs.h
+++ b/include/linux/fs.h
@@ -588,6 +588,11 @@ static inline void mapping_allow_writable(struct address_space *mapping)
 
 struct posix_acl;
 #define ACL_NOT_CACHED ((void *)(-1))
+/*
+ * ACL_DONT_CACHE is for stacked filesystems, that rely on underlying fs to
+ * cache the ACL.  This also means that ->get_acl() can be called in RCU mode
+ * with the LOOKUP_RCU flag.
+ */
 #define ACL_DONT_CACHE ((void *)(-3))
 
 static inline struct posix_acl *
@@ -2109,7 +2114,7 @@ struct inode_operations {
 	struct dentry * (*lookup) (struct inode *,struct dentry *, unsigned int);
 	const char * (*get_link) (struct dentry *, struct inode *, struct delayed_call *);
 	int (*permission) (struct user_namespace *, struct inode *, int);
-	struct posix_acl * (*get_acl)(struct inode *, int);
+	struct posix_acl * (*get_acl)(struct inode *, int, bool);
 
 	int (*readlink) (struct dentry *, char __user *,int);
 
@@ -3018,15 +3023,20 @@ static inline void file_end_write(struct file *file)
 }
 
 /*
+ * This is used for regular files where some users -- especially the
+ * currently executed binary in a process, previously handled via
+ * VM_DENYWRITE -- cannot handle concurrent write (and maybe mmap
+ * read-write shared) accesses.
+ *
  * get_write_access() gets write permission for a file.
  * put_write_access() releases this write permission.
- * This is used for regular files.
- * We cannot support write (and maybe mmap read-write shared) accesses and
- * MAP_DENYWRITE mmappings simultaneously. The i_writecount field of an inode
- * can have the following values:
- * 0: no writers, no VM_DENYWRITE mappings
- * < 0: (-i_writecount) vm_area_structs with VM_DENYWRITE set exist
- * > 0: (i_writecount) users are writing to the file.
+ * deny_write_access() denies write access to a file.
+ * allow_write_access() re-enables write access to a file.
+ *
+ * The i_writecount field of an inode can have the following values:
+ * 0: no write access, no denied write access
+ * < 0: (-i_writecount) users that denied write access to the file.
+ * > 0: (i_writecount) users that have write access to the file.
  *
  * Normally we operate on that counter with atomic_{inc,dec} and it's safe
  * except for the cases where we don't hold i_writecount yet. Then we need to
@@ -3314,6 +3324,7 @@ extern int page_symlink(struct inode *inode, const char *symname, int len);
 extern const struct inode_operations page_symlink_inode_operations;
 extern void kfree_link(void *);
 void generic_fillattr(struct user_namespace *, struct inode *, struct kstat *);
+void generic_fill_statx_attr(struct inode *inode, struct kstat *stat);
 extern int vfs_getattr_nosec(const struct path *, struct kstat *, u32, unsigned int);
 extern int vfs_getattr(const struct path *, struct kstat *, u32, unsigned int);
 void __inode_add_bytes(struct inode *inode, loff_t bytes);
@@ -3428,6 +3439,8 @@ extern int buffer_migrate_page_norefs(struct address_space *,
 #define buffer_migrate_page_norefs NULL
 #endif
 
+int may_setattr(struct user_namespace *mnt_userns, struct inode *inode,
+		unsigned int ia_valid);
 int setattr_prepare(struct user_namespace *, struct dentry *, struct iattr *);
 extern int inode_newsize_ok(const struct inode *, loff_t offset);
 void setattr_copy(struct user_namespace *, struct inode *inode,
@@ -3581,7 +3594,7 @@ int proc_nr_dentry(struct ctl_table *table, int write,
 		  void *buffer, size_t *lenp, loff_t *ppos);
 int proc_nr_inodes(struct ctl_table *table, int write,
 		   void *buffer, size_t *lenp, loff_t *ppos);
-int __init get_filesystem_list(char *buf);
+int __init list_bdev_fs_names(char *buf, size_t size);
 
 #define __FMODE_EXEC		((__force int) FMODE_EXEC)
 #define __FMODE_NONOTIFY	((__force int) FMODE_NONOTIFY)
diff --git a/include/linux/fscache-cache.h b/include/linux/fscache-cache.h
index 3235ddbdcc09..8d39491c5f9f 100644
--- a/include/linux/fscache-cache.h
+++ b/include/linux/fscache-cache.h
@@ -147,7 +147,6 @@ struct fscache_retrieval {
 	fscache_rw_complete_t	end_io_func;	/* function to call on I/O completion */
 	void			*context;	/* netfs read context (pinned) */
 	struct list_head	to_do;		/* list of things to be done by the backend */
-	unsigned long		start_time;	/* time at which retrieval started */
 	atomic_t		n_pages;	/* number of pages to be retrieved */
 };
 
@@ -385,9 +384,6 @@ struct fscache_object {
 	struct list_head	dependents;	/* FIFO of dependent objects */
 	struct list_head	dep_link;	/* link in parent's dependents list */
 	struct list_head	pending_ops;	/* unstarted operations on this object */
-#ifdef CONFIG_FSCACHE_OBJECT_LIST
-	struct rb_node		objlist_link;	/* link in global object list */
-#endif
 	pgoff_t			store_limit;	/* current storage limit */
 	loff_t			store_limit_l;	/* current storage limit */
 };
diff --git a/include/linux/fscache.h b/include/linux/fscache.h
index abc1c4737fb8..a4dab5998613 100644
--- a/include/linux/fscache.h
+++ b/include/linux/fscache.h
@@ -123,15 +123,17 @@ struct fscache_netfs {
  * - indices are created on disk just-in-time
  */
 struct fscache_cookie {
-	atomic_t			usage;		/* number of users of this cookie */
+	refcount_t			ref;		/* number of users of this cookie */
 	atomic_t			n_children;	/* number of children of this cookie */
 	atomic_t			n_active;	/* number of active users of netfs ptrs */
+	unsigned int			debug_id;
 	spinlock_t			lock;
 	spinlock_t			stores_lock;	/* lock on page store tree */
 	struct hlist_head		backing_objects; /* object(s) backing this file/index */
 	const struct fscache_cookie_def	*def;		/* definition */
 	struct fscache_cookie		*parent;	/* parent of this entry */
 	struct hlist_bl_node		hash_link;	/* Link in hash table */
+	struct list_head		proc_link;	/* Link in proc list */
 	void				*netfs_data;	/* back pointer to netfs */
 	struct radix_tree_root		stores;		/* pages to be stored on this cookie */
 #define FSCACHE_COOKIE_PENDING_TAG	0		/* pages tag: pending write to cache */
diff --git a/include/linux/gpio/consumer.h b/include/linux/gpio/consumer.h
index 566feb56601f..97a28ad3393b 100644
--- a/include/linux/gpio/consumer.h
+++ b/include/linux/gpio/consumer.h
@@ -609,7 +609,7 @@ struct gpio_desc *devm_fwnode_get_gpiod_from_child(struct device *dev,
 #if IS_ENABLED(CONFIG_GPIOLIB) && IS_ENABLED(CONFIG_OF_GPIO)
 struct device_node;
 
-struct gpio_desc *gpiod_get_from_of_node(struct device_node *node,
+struct gpio_desc *gpiod_get_from_of_node(const struct device_node *node,
 					 const char *propname, int index,
 					 enum gpiod_flags dflags,
 					 const char *label);
@@ -619,7 +619,7 @@ struct gpio_desc *gpiod_get_from_of_node(struct device_node *node,
 struct device_node;
 
 static inline
-struct gpio_desc *gpiod_get_from_of_node(struct device_node *node,
+struct gpio_desc *gpiod_get_from_of_node(const struct device_node *node,
 					 const char *propname, int index,
 					 enum gpiod_flags dflags,
 					 const char *label)
@@ -633,7 +633,7 @@ struct gpio_desc *gpiod_get_from_of_node(struct device_node *node,
 struct device_node;
 
 struct gpio_desc *devm_gpiod_get_from_of_node(struct device *dev,
-					      struct device_node *node,
+					      const struct device_node *node,
 					      const char *propname, int index,
 					      enum gpiod_flags dflags,
 					      const char *label);
@@ -644,7 +644,7 @@ struct device_node;
 
 static inline
 struct gpio_desc *devm_gpiod_get_from_of_node(struct device *dev,
-					      struct device_node *node,
+					      const struct device_node *node,
 					      const char *propname, int index,
 					      enum gpiod_flags dflags,
 					      const char *label)
@@ -680,10 +680,10 @@ struct acpi_gpio_mapping {
 	unsigned int quirks;
 };
 
-#if IS_ENABLED(CONFIG_GPIOLIB) && IS_ENABLED(CONFIG_ACPI)
-
 struct acpi_device;
 
+#if IS_ENABLED(CONFIG_GPIOLIB) && IS_ENABLED(CONFIG_ACPI)
+
 int acpi_dev_add_driver_gpios(struct acpi_device *adev,
 			      const struct acpi_gpio_mapping *gpios);
 void acpi_dev_remove_driver_gpios(struct acpi_device *adev);
@@ -696,8 +696,6 @@ struct gpio_desc *acpi_get_and_request_gpiod(char *path, int pin, char *label);
 
 #else  /* CONFIG_GPIOLIB && CONFIG_ACPI */
 
-struct acpi_device;
-
 static inline int acpi_dev_add_driver_gpios(struct acpi_device *adev,
 			      const struct acpi_gpio_mapping *gpios)
 {
diff --git a/include/linux/gpio/driver.h b/include/linux/gpio/driver.h
index 3a268781fcec..a0f9901dcae6 100644
--- a/include/linux/gpio/driver.h
+++ b/include/linux/gpio/driver.h
@@ -312,6 +312,9 @@ struct gpio_irq_chip {
  *	get rid of the static GPIO number space in the long run.
  * @ngpio: the number of GPIOs handled by this controller; the last GPIO
  *	handled is (base + ngpio - 1).
+ * @offset: when multiple gpio chips belong to the same device this
+ *	can be used as offset within the device so friendly names can
+ *	be properly assigned.
  * @names: if set, must be an array of strings to use as alternative
  *      names for the GPIOs in this chip. Any entry in the array
  *      may be NULL if there is no alias for the GPIO, however the
@@ -398,6 +401,7 @@ struct gpio_chip {
 
 	int			base;
 	u16			ngpio;
+	u16			offset;
 	const char		*const *names;
 	bool			can_sleep;
 
diff --git a/include/linux/highmem-internal.h b/include/linux/highmem-internal.h
index 7902c7d8b55f..4aa1031d3e4c 100644
--- a/include/linux/highmem-internal.h
+++ b/include/linux/highmem-internal.h
@@ -90,7 +90,11 @@ static inline void __kunmap_local(void *vaddr)
 
 static inline void *kmap_atomic_prot(struct page *page, pgprot_t prot)
 {
-	preempt_disable();
+	if (IS_ENABLED(CONFIG_PREEMPT_RT))
+		migrate_disable();
+	else
+		preempt_disable();
+
 	pagefault_disable();
 	return __kmap_local_page_prot(page, prot);
 }
@@ -102,7 +106,11 @@ static inline void *kmap_atomic(struct page *page)
 
 static inline void *kmap_atomic_pfn(unsigned long pfn)
 {
-	preempt_disable();
+	if (IS_ENABLED(CONFIG_PREEMPT_RT))
+		migrate_disable();
+	else
+		preempt_disable();
+
 	pagefault_disable();
 	return __kmap_local_pfn_prot(pfn, kmap_prot);
 }
@@ -111,7 +119,10 @@ static inline void __kunmap_atomic(void *addr)
 {
 	kunmap_local_indexed(addr);
 	pagefault_enable();
-	preempt_enable();
+	if (IS_ENABLED(CONFIG_PREEMPT_RT))
+		migrate_enable();
+	else
+		preempt_enable();
 }
 
 unsigned int __nr_free_highpages(void);
@@ -179,7 +190,10 @@ static inline void __kunmap_local(void *addr)
 
 static inline void *kmap_atomic(struct page *page)
 {
-	preempt_disable();
+	if (IS_ENABLED(CONFIG_PREEMPT_RT))
+		migrate_disable();
+	else
+		preempt_disable();
 	pagefault_disable();
 	return page_address(page);
 }
@@ -200,7 +214,10 @@ static inline void __kunmap_atomic(void *addr)
 	kunmap_flush_on_unmap(addr);
 #endif
 	pagefault_enable();
-	preempt_enable();
+	if (IS_ENABLED(CONFIG_PREEMPT_RT))
+		migrate_enable();
+	else
+		preempt_enable();
 }
 
 static inline unsigned int nr_free_highpages(void) { return 0; }
diff --git a/include/linux/highmem.h b/include/linux/highmem.h
index d9a606a9fc64..b4c49f9cc379 100644
--- a/include/linux/highmem.h
+++ b/include/linux/highmem.h
@@ -130,10 +130,7 @@ static inline void flush_anon_page(struct vm_area_struct *vma, struct page *page
 }
 #endif
 
-#ifndef ARCH_HAS_FLUSH_KERNEL_DCACHE_PAGE
-static inline void flush_kernel_dcache_page(struct page *page)
-{
-}
+#ifndef ARCH_IMPLEMENTS_FLUSH_KERNEL_VMAP_RANGE
 static inline void flush_kernel_vmap_range(void *vaddr, int size)
 {
 }
diff --git a/include/linux/hugetlb.h b/include/linux/hugetlb.h
index f7ca1a3870ea..1faebe1cd0ed 100644
--- a/include/linux/hugetlb.h
+++ b/include/linux/hugetlb.h
@@ -858,6 +858,11 @@ static inline spinlock_t *huge_pte_lockptr(struct hstate *h,
 
 void hugetlb_report_usage(struct seq_file *m, struct mm_struct *mm);
 
+static inline void hugetlb_count_init(struct mm_struct *mm)
+{
+	atomic_long_set(&mm->hugetlb_usage, 0);
+}
+
 static inline void hugetlb_count_add(long l, struct mm_struct *mm)
 {
 	atomic_long_add(l, &mm->hugetlb_usage);
@@ -1042,6 +1047,10 @@ static inline spinlock_t *huge_pte_lockptr(struct hstate *h,
 	return &mm->page_table_lock;
 }
 
+static inline void hugetlb_count_init(struct mm_struct *mm)
+{
+}
+
 static inline void hugetlb_report_usage(struct seq_file *f, struct mm_struct *m)
 {
 }
diff --git a/include/linux/hugetlb_cgroup.h b/include/linux/hugetlb_cgroup.h
index 0b8d1fdda3a1..c137396129db 100644
--- a/include/linux/hugetlb_cgroup.h
+++ b/include/linux/hugetlb_cgroup.h
@@ -121,6 +121,13 @@ static inline void hugetlb_cgroup_put_rsvd_cgroup(struct hugetlb_cgroup *h_cg)
 	css_put(&h_cg->css);
 }
 
+static inline void resv_map_dup_hugetlb_cgroup_uncharge_info(
+						struct resv_map *resv_map)
+{
+	if (resv_map->css)
+		css_get(resv_map->css);
+}
+
 extern int hugetlb_cgroup_charge_cgroup(int idx, unsigned long nr_pages,
 					struct hugetlb_cgroup **ptr);
 extern int hugetlb_cgroup_charge_cgroup_rsvd(int idx, unsigned long nr_pages,
@@ -199,6 +206,11 @@ static inline void hugetlb_cgroup_put_rsvd_cgroup(struct hugetlb_cgroup *h_cg)
 {
 }
 
+static inline void resv_map_dup_hugetlb_cgroup_uncharge_info(
+						struct resv_map *resv_map)
+{
+}
+
 static inline int hugetlb_cgroup_charge_cgroup(int idx, unsigned long nr_pages,
 					       struct hugetlb_cgroup **ptr)
 {
diff --git a/include/linux/i2c.h b/include/linux/i2c.h
index 3eb60a2e9e61..2ce3efbe9198 100644
--- a/include/linux/i2c.h
+++ b/include/linux/i2c.h
@@ -1010,6 +1010,7 @@ struct acpi_resource_i2c_serialbus;
 #if IS_ENABLED(CONFIG_ACPI)
 bool i2c_acpi_get_i2c_resource(struct acpi_resource *ares,
 			       struct acpi_resource_i2c_serialbus **i2c);
+int i2c_acpi_client_count(struct acpi_device *adev);
 u32 i2c_acpi_find_bus_speed(struct device *dev);
 struct i2c_client *i2c_acpi_new_device(struct device *dev, int index,
 				       struct i2c_board_info *info);
@@ -1020,6 +1021,10 @@ static inline bool i2c_acpi_get_i2c_resource(struct acpi_resource *ares,
 {
 	return false;
 }
+static inline int i2c_acpi_client_count(struct acpi_device *adev)
+{
+	return 0;
+}
 static inline u32 i2c_acpi_find_bus_speed(struct device *dev)
 {
 	return 0;
diff --git a/include/linux/ima.h b/include/linux/ima.h
index 61d5723ec303..b6ab66a546ae 100644
--- a/include/linux/ima.h
+++ b/include/linux/ima.h
@@ -11,9 +11,11 @@
 #include <linux/fs.h>
 #include <linux/security.h>
 #include <linux/kexec.h>
+#include <crypto/hash_info.h>
 struct linux_binprm;
 
 #ifdef CONFIG_IMA
+extern enum hash_algo ima_get_current_hash_algo(void);
 extern int ima_bprm_check(struct linux_binprm *bprm);
 extern int ima_file_check(struct file *file, int mask);
 extern void ima_post_create_tmpfile(struct user_namespace *mnt_userns,
@@ -33,10 +35,10 @@ extern void ima_post_path_mknod(struct user_namespace *mnt_userns,
 extern int ima_file_hash(struct file *file, char *buf, size_t buf_size);
 extern int ima_inode_hash(struct inode *inode, char *buf, size_t buf_size);
 extern void ima_kexec_cmdline(int kernel_fd, const void *buf, int size);
-extern void ima_measure_critical_data(const char *event_label,
-				      const char *event_name,
-				      const void *buf, size_t buf_len,
-				      bool hash);
+extern int ima_measure_critical_data(const char *event_label,
+				     const char *event_name,
+				     const void *buf, size_t buf_len,
+				     bool hash, u8 *digest, size_t digest_len);
 
 #ifdef CONFIG_IMA_APPRAISE_BOOTPARAM
 extern void ima_appraise_parse_cmdline(void);
@@ -64,6 +66,11 @@ static inline const char * const *arch_get_ima_policy(void)
 #endif
 
 #else
+static inline enum hash_algo ima_get_current_hash_algo(void)
+{
+	return HASH_ALGO__LAST;
+}
+
 static inline int ima_bprm_check(struct linux_binprm *bprm)
 {
 	return 0;
@@ -137,10 +144,14 @@ static inline int ima_inode_hash(struct inode *inode, char *buf, size_t buf_size
 
 static inline void ima_kexec_cmdline(int kernel_fd, const void *buf, int size) {}
 
-static inline void ima_measure_critical_data(const char *event_label,
+static inline int ima_measure_critical_data(const char *event_label,
 					     const char *event_name,
 					     const void *buf, size_t buf_len,
-					     bool hash) {}
+					     bool hash, u8 *digest,
+					     size_t digest_len)
+{
+	return -ENOENT;
+}
 
 #endif /* CONFIG_IMA */
 
diff --git a/include/linux/intel-iommu.h b/include/linux/intel-iommu.h
index d0fa0b31994d..05a65eb155f7 100644
--- a/include/linux/intel-iommu.h
+++ b/include/linux/intel-iommu.h
@@ -124,9 +124,9 @@
 #define DMAR_MTRR_PHYSMASK8_REG 0x208
 #define DMAR_MTRR_PHYSBASE9_REG 0x210
 #define DMAR_MTRR_PHYSMASK9_REG 0x218
-#define DMAR_VCCAP_REG		0xe00 /* Virtual command capability register */
-#define DMAR_VCMD_REG		0xe10 /* Virtual command register */
-#define DMAR_VCRSP_REG		0xe20 /* Virtual command response register */
+#define DMAR_VCCAP_REG		0xe30 /* Virtual command capability register */
+#define DMAR_VCMD_REG		0xe00 /* Virtual command register */
+#define DMAR_VCRSP_REG		0xe10 /* Virtual command response register */
 
 #define DMAR_IQER_REG_IQEI(reg)		FIELD_GET(GENMASK_ULL(3, 0), reg)
 #define DMAR_IQER_REG_ITESID(reg)	FIELD_GET(GENMASK_ULL(47, 32), reg)
diff --git a/include/linux/intel-svm.h b/include/linux/intel-svm.h
index 10fa80eef13a..57cceecbe37f 100644
--- a/include/linux/intel-svm.h
+++ b/include/linux/intel-svm.h
@@ -14,6 +14,11 @@
 #define SVM_REQ_EXEC	(1<<1)
 #define SVM_REQ_PRIV	(1<<0)
 
+/* Page Request Queue depth */
+#define PRQ_ORDER	2
+#define PRQ_RING_MASK	((0x1000 << PRQ_ORDER) - 0x20)
+#define PRQ_DEPTH	((0x1000 << PRQ_ORDER) >> 5)
+
 /*
  * The SVM_FLAG_SUPERVISOR_MODE flag requests a PASID which can be used only
  * for access to kernel addresses. No IOTLB flushes are automatically done
diff --git a/include/linux/io-pgtable.h b/include/linux/io-pgtable.h
index 4d40dfa75b55..86af6f0a00a2 100644
--- a/include/linux/io-pgtable.h
+++ b/include/linux/io-pgtable.h
@@ -16,6 +16,7 @@ enum io_pgtable_fmt {
 	ARM_V7S,
 	ARM_MALI_LPAE,
 	AMD_IOMMU_V1,
+	APPLE_DART,
 	IO_PGTABLE_NUM_FMTS,
 };
 
@@ -73,10 +74,6 @@ struct io_pgtable_cfg {
 	 *	to support up to 35 bits PA where the bit32, bit33 and bit34 are
 	 *	encoded in the bit9, bit4 and bit5 of the PTE respectively.
 	 *
-	 * IO_PGTABLE_QUIRK_NON_STRICT: Skip issuing synchronous leaf TLBIs
-	 *	on unmap, for DMA domains using the flush queue mechanism for
-	 *	delayed invalidation.
-	 *
 	 * IO_PGTABLE_QUIRK_ARM_TTBR1: (ARM LPAE format) Configure the table
 	 *	for use in the upper half of a split address space.
 	 *
@@ -86,7 +83,6 @@ struct io_pgtable_cfg {
 	#define IO_PGTABLE_QUIRK_ARM_NS		BIT(0)
 	#define IO_PGTABLE_QUIRK_NO_PERMS	BIT(1)
 	#define IO_PGTABLE_QUIRK_ARM_MTK_EXT	BIT(3)
-	#define IO_PGTABLE_QUIRK_NON_STRICT	BIT(4)
 	#define IO_PGTABLE_QUIRK_ARM_TTBR1	BIT(5)
 	#define IO_PGTABLE_QUIRK_ARM_OUTER_WBWA	BIT(6)
 	unsigned long			quirks;
@@ -136,6 +132,11 @@ struct io_pgtable_cfg {
 			u64	transtab;
 			u64	memattr;
 		} arm_mali_lpae_cfg;
+
+		struct {
+			u64 ttbr[4];
+			u32 n_ttbrs;
+		} apple_dart_cfg;
 	};
 };
 
@@ -143,7 +144,9 @@ struct io_pgtable_cfg {
  * struct io_pgtable_ops - Page table manipulation API for IOMMU drivers.
  *
  * @map:          Map a physically contiguous memory region.
+ * @map_pages:    Map a physically contiguous range of pages of the same size.
  * @unmap:        Unmap a physically contiguous memory region.
+ * @unmap_pages:  Unmap a range of virtually contiguous pages of the same size.
  * @iova_to_phys: Translate iova to physical address.
  *
  * These functions map directly onto the iommu_ops member functions with
@@ -152,8 +155,14 @@ struct io_pgtable_cfg {
 struct io_pgtable_ops {
 	int (*map)(struct io_pgtable_ops *ops, unsigned long iova,
 		   phys_addr_t paddr, size_t size, int prot, gfp_t gfp);
+	int (*map_pages)(struct io_pgtable_ops *ops, unsigned long iova,
+			 phys_addr_t paddr, size_t pgsize, size_t pgcount,
+			 int prot, gfp_t gfp, size_t *mapped);
 	size_t (*unmap)(struct io_pgtable_ops *ops, unsigned long iova,
 			size_t size, struct iommu_iotlb_gather *gather);
+	size_t (*unmap_pages)(struct io_pgtable_ops *ops, unsigned long iova,
+			      size_t pgsize, size_t pgcount,
+			      struct iommu_iotlb_gather *gather);
 	phys_addr_t (*iova_to_phys)(struct io_pgtable_ops *ops,
 				    unsigned long iova);
 };
@@ -246,5 +255,6 @@ extern struct io_pgtable_init_fns io_pgtable_arm_64_lpae_s2_init_fns;
 extern struct io_pgtable_init_fns io_pgtable_arm_v7s_init_fns;
 extern struct io_pgtable_init_fns io_pgtable_arm_mali_lpae_init_fns;
 extern struct io_pgtable_init_fns io_pgtable_amd_iommu_v1_init_fns;
+extern struct io_pgtable_init_fns io_pgtable_apple_dart_init_fns;
 
 #endif /* __IO_PGTABLE_H */
diff --git a/include/linux/iommu.h b/include/linux/iommu.h
index 32d448050bf7..d2f3435e7d17 100644
--- a/include/linux/iommu.h
+++ b/include/linux/iommu.h
@@ -40,6 +40,7 @@ struct iommu_domain;
 struct notifier_block;
 struct iommu_sva;
 struct iommu_fault_event;
+struct iommu_dma_cookie;
 
 /* iommu fault flags */
 #define IOMMU_FAULT_READ	0x0
@@ -60,6 +61,7 @@ struct iommu_domain_geometry {
 #define __IOMMU_DOMAIN_DMA_API	(1U << 1)  /* Domain for use in DMA-API
 					      implementation              */
 #define __IOMMU_DOMAIN_PT	(1U << 2)  /* Domain is identity mapped   */
+#define __IOMMU_DOMAIN_DMA_FQ	(1U << 3)  /* DMA-API uses flush queue    */
 
 /*
  * This are the possible domain-types
@@ -72,12 +74,17 @@ struct iommu_domain_geometry {
  *	IOMMU_DOMAIN_DMA	- Internally used for DMA-API implementations.
  *				  This flag allows IOMMU drivers to implement
  *				  certain optimizations for these domains
+ *	IOMMU_DOMAIN_DMA_FQ	- As above, but definitely using batched TLB
+ *				  invalidation.
  */
 #define IOMMU_DOMAIN_BLOCKED	(0U)
 #define IOMMU_DOMAIN_IDENTITY	(__IOMMU_DOMAIN_PT)
 #define IOMMU_DOMAIN_UNMANAGED	(__IOMMU_DOMAIN_PAGING)
 #define IOMMU_DOMAIN_DMA	(__IOMMU_DOMAIN_PAGING |	\
 				 __IOMMU_DOMAIN_DMA_API)
+#define IOMMU_DOMAIN_DMA_FQ	(__IOMMU_DOMAIN_PAGING |	\
+				 __IOMMU_DOMAIN_DMA_API |	\
+				 __IOMMU_DOMAIN_DMA_FQ)
 
 struct iommu_domain {
 	unsigned type;
@@ -86,9 +93,14 @@ struct iommu_domain {
 	iommu_fault_handler_t handler;
 	void *handler_token;
 	struct iommu_domain_geometry geometry;
-	void *iova_cookie;
+	struct iommu_dma_cookie *iova_cookie;
 };
 
+static inline bool iommu_is_dma_domain(struct iommu_domain *domain)
+{
+	return domain->type & __IOMMU_DOMAIN_DMA_API;
+}
+
 enum iommu_cap {
 	IOMMU_CAP_CACHE_COHERENCY,	/* IOMMU can enforce cache coherent DMA
 					   transactions */
@@ -160,16 +172,22 @@ enum iommu_dev_features {
  * @start: IOVA representing the start of the range to be flushed
  * @end: IOVA representing the end of the range to be flushed (inclusive)
  * @pgsize: The interval at which to perform the flush
+ * @freelist: Removed pages to free after sync
+ * @queued: Indicates that the flush will be queued
  *
  * This structure is intended to be updated by multiple calls to the
  * ->unmap() function in struct iommu_ops before eventually being passed
- * into ->iotlb_sync().
+ * into ->iotlb_sync(). Drivers can add pages to @freelist to be freed after
+ * ->iotlb_sync() or ->iotlb_flush_all() have cleared all cached references to
+ * them. @queued is set to indicate when ->iotlb_flush_all() will be called
+ * later instead of ->iotlb_sync(), so drivers may optimise accordingly.
  */
 struct iommu_iotlb_gather {
 	unsigned long		start;
 	unsigned long		end;
 	size_t			pgsize;
 	struct page		*freelist;
+	bool			queued;
 };
 
 /**
@@ -180,7 +198,10 @@ struct iommu_iotlb_gather {
  * @attach_dev: attach device to an iommu domain
  * @detach_dev: detach device from an iommu domain
  * @map: map a physically contiguous memory region to an iommu domain
+ * @map_pages: map a physically contiguous set of pages of the same size to
+ *             an iommu domain.
  * @unmap: unmap a physically contiguous memory region from an iommu domain
+ * @unmap_pages: unmap a number of pages of the same size from an iommu domain
  * @flush_iotlb_all: Synchronously flush all hardware TLBs for this domain
  * @iotlb_sync_map: Sync mappings created recently using @map to the hardware
  * @iotlb_sync: Flush all queued ranges from the hardware TLBs and empty flush
@@ -229,8 +250,14 @@ struct iommu_ops {
 	void (*detach_dev)(struct iommu_domain *domain, struct device *dev);
 	int (*map)(struct iommu_domain *domain, unsigned long iova,
 		   phys_addr_t paddr, size_t size, int prot, gfp_t gfp);
+	int (*map_pages)(struct iommu_domain *domain, unsigned long iova,
+			 phys_addr_t paddr, size_t pgsize, size_t pgcount,
+			 int prot, gfp_t gfp, size_t *mapped);
 	size_t (*unmap)(struct iommu_domain *domain, unsigned long iova,
 		     size_t size, struct iommu_iotlb_gather *iotlb_gather);
+	size_t (*unmap_pages)(struct iommu_domain *domain, unsigned long iova,
+			      size_t pgsize, size_t pgcount,
+			      struct iommu_iotlb_gather *iotlb_gather);
 	void (*flush_iotlb_all)(struct iommu_domain *domain);
 	void (*iotlb_sync_map)(struct iommu_domain *domain, unsigned long iova,
 			       size_t size);
@@ -414,11 +441,11 @@ extern size_t iommu_unmap(struct iommu_domain *domain, unsigned long iova,
 extern size_t iommu_unmap_fast(struct iommu_domain *domain,
 			       unsigned long iova, size_t size,
 			       struct iommu_iotlb_gather *iotlb_gather);
-extern size_t iommu_map_sg(struct iommu_domain *domain, unsigned long iova,
-			   struct scatterlist *sg,unsigned int nents, int prot);
-extern size_t iommu_map_sg_atomic(struct iommu_domain *domain,
-				  unsigned long iova, struct scatterlist *sg,
-				  unsigned int nents, int prot);
+extern ssize_t iommu_map_sg(struct iommu_domain *domain, unsigned long iova,
+		struct scatterlist *sg, unsigned int nents, int prot);
+extern ssize_t iommu_map_sg_atomic(struct iommu_domain *domain,
+				   unsigned long iova, struct scatterlist *sg,
+				   unsigned int nents, int prot);
 extern phys_addr_t iommu_iova_to_phys(struct iommu_domain *domain, dma_addr_t iova);
 extern void iommu_set_fault_handler(struct iommu_domain *domain,
 			iommu_fault_handler_t handler, void *token);
@@ -476,8 +503,7 @@ int iommu_enable_nesting(struct iommu_domain *domain);
 int iommu_set_pgtable_quirks(struct iommu_domain *domain,
 		unsigned long quirks);
 
-void iommu_set_dma_strict(bool val);
-bool iommu_get_dma_strict(struct iommu_domain *domain);
+void iommu_set_dma_strict(void);
 
 extern int report_iommu_fault(struct iommu_domain *domain, struct device *dev,
 			      unsigned long iova, int flags);
@@ -497,29 +523,80 @@ static inline void iommu_iotlb_sync(struct iommu_domain *domain,
 	iommu_iotlb_gather_init(iotlb_gather);
 }
 
+/**
+ * iommu_iotlb_gather_is_disjoint - Checks whether a new range is disjoint
+ *
+ * @gather: TLB gather data
+ * @iova: start of page to invalidate
+ * @size: size of page to invalidate
+ *
+ * Helper for IOMMU drivers to check whether a new range and the gathered range
+ * are disjoint. For many IOMMUs, flushing the IOMMU in this case is better
+ * than merging the two, which might lead to unnecessary invalidations.
+ */
+static inline
+bool iommu_iotlb_gather_is_disjoint(struct iommu_iotlb_gather *gather,
+				    unsigned long iova, size_t size)
+{
+	unsigned long start = iova, end = start + size - 1;
+
+	return gather->end != 0 &&
+		(end + 1 < gather->start || start > gather->end + 1);
+}
+
+
+/**
+ * iommu_iotlb_gather_add_range - Gather for address-based TLB invalidation
+ * @gather: TLB gather data
+ * @iova: start of page to invalidate
+ * @size: size of page to invalidate
+ *
+ * Helper for IOMMU drivers to build arbitrarily-sized invalidation commands
+ * where only the address range matters, and simply minimising intermediate
+ * syncs is preferred.
+ */
+static inline void iommu_iotlb_gather_add_range(struct iommu_iotlb_gather *gather,
+						unsigned long iova, size_t size)
+{
+	unsigned long end = iova + size - 1;
+
+	if (gather->start > iova)
+		gather->start = iova;
+	if (gather->end < end)
+		gather->end = end;
+}
+
+/**
+ * iommu_iotlb_gather_add_page - Gather for page-based TLB invalidation
+ * @domain: IOMMU domain to be invalidated
+ * @gather: TLB gather data
+ * @iova: start of page to invalidate
+ * @size: size of page to invalidate
+ *
+ * Helper for IOMMU drivers to build invalidation commands based on individual
+ * pages, or with page size/table level hints which cannot be gathered if they
+ * differ.
+ */
 static inline void iommu_iotlb_gather_add_page(struct iommu_domain *domain,
 					       struct iommu_iotlb_gather *gather,
 					       unsigned long iova, size_t size)
 {
-	unsigned long start = iova, end = start + size - 1;
-
 	/*
 	 * If the new page is disjoint from the current range or is mapped at
 	 * a different granularity, then sync the TLB so that the gather
 	 * structure can be rewritten.
 	 */
-	if (gather->pgsize != size ||
-	    end + 1 < gather->start || start > gather->end + 1) {
-		if (gather->pgsize)
-			iommu_iotlb_sync(domain, gather);
-		gather->pgsize = size;
-	}
+	if ((gather->pgsize && gather->pgsize != size) ||
+	    iommu_iotlb_gather_is_disjoint(gather, iova, size))
+		iommu_iotlb_sync(domain, gather);
 
-	if (gather->end < end)
-		gather->end = end;
+	gather->pgsize = size;
+	iommu_iotlb_gather_add_range(gather, iova, size);
+}
 
-	if (gather->start > start)
-		gather->start = start;
+static inline bool iommu_iotlb_gather_queued(struct iommu_iotlb_gather *gather)
+{
+	return gather && gather->queued;
 }
 
 /* PCI device grouping function */
@@ -679,18 +756,18 @@ static inline size_t iommu_unmap_fast(struct iommu_domain *domain,
 	return 0;
 }
 
-static inline size_t iommu_map_sg(struct iommu_domain *domain,
-				  unsigned long iova, struct scatterlist *sg,
-				  unsigned int nents, int prot)
+static inline ssize_t iommu_map_sg(struct iommu_domain *domain,
+				   unsigned long iova, struct scatterlist *sg,
+				   unsigned int nents, int prot)
 {
-	return 0;
+	return -ENODEV;
 }
 
-static inline size_t iommu_map_sg_atomic(struct iommu_domain *domain,
+static inline ssize_t iommu_map_sg_atomic(struct iommu_domain *domain,
 				  unsigned long iova, struct scatterlist *sg,
 				  unsigned int nents, int prot)
 {
-	return 0;
+	return -ENODEV;
 }
 
 static inline void iommu_flush_iotlb_all(struct iommu_domain *domain)
@@ -870,6 +947,11 @@ static inline void iommu_iotlb_gather_add_page(struct iommu_domain *domain,
 {
 }
 
+static inline bool iommu_iotlb_gather_queued(struct iommu_iotlb_gather *gather)
+{
+	return false;
+}
+
 static inline void iommu_device_unregister(struct iommu_device *iommu)
 {
 }
diff --git a/include/linux/kdb.h b/include/linux/kdb.h
index 0125a677b67f..ea0f5e580fac 100644
--- a/include/linux/kdb.h
+++ b/include/linux/kdb.h
@@ -13,6 +13,8 @@
  * Copyright (C) 2009 Jason Wessel <jason.wessel@windriver.com>
  */
 
+#include <linux/list.h>
+
 /* Shifted versions of the command enable bits are be used if the command
  * has no arguments (see kdb_check_flags). This allows commands, such as
  * go, to have different permissions depending upon whether it is called
@@ -64,6 +66,17 @@ typedef enum {
 
 typedef int (*kdb_func_t)(int, const char **);
 
+/* The KDB shell command table */
+typedef struct _kdbtab {
+	char    *name;			/* Command name */
+	kdb_func_t func;		/* Function to execute command */
+	char    *usage;			/* Usage String for this command */
+	char    *help;			/* Help message for this command */
+	short    minlen;		/* Minimum legal # cmd chars required */
+	kdb_cmdflags_t flags;		/* Command behaviour flags */
+	struct list_head list_node;	/* Command list */
+} kdbtab_t;
+
 #ifdef	CONFIG_KGDB_KDB
 #include <linux/init.h>
 #include <linux/sched.h>
@@ -193,19 +206,13 @@ static inline const char *kdb_walk_kallsyms(loff_t *pos)
 #endif /* ! CONFIG_KALLSYMS */
 
 /* Dynamic kdb shell command registration */
-extern int kdb_register(char *, kdb_func_t, char *, char *, short);
-extern int kdb_register_flags(char *, kdb_func_t, char *, char *,
-			      short, kdb_cmdflags_t);
-extern int kdb_unregister(char *);
+extern int kdb_register(kdbtab_t *cmd);
+extern void kdb_unregister(kdbtab_t *cmd);
 #else /* ! CONFIG_KGDB_KDB */
 static inline __printf(1, 2) int kdb_printf(const char *fmt, ...) { return 0; }
 static inline void kdb_init(int level) {}
-static inline int kdb_register(char *cmd, kdb_func_t func, char *usage,
-			       char *help, short minlen) { return 0; }
-static inline int kdb_register_flags(char *cmd, kdb_func_t func, char *usage,
-				     char *help, short minlen,
-				     kdb_cmdflags_t flags) { return 0; }
-static inline int kdb_unregister(char *cmd) { return 0; }
+static inline int kdb_register(kdbtab_t *cmd) { return 0; }
+static inline void kdb_unregister(kdbtab_t *cmd) {}
 #endif	/* CONFIG_KGDB_KDB */
 enum {
 	KDB_NOT_INITIALIZED,
diff --git a/include/linux/kernel.h b/include/linux/kernel.h
index 1b2f0a7e00d6..2776423a587e 100644
--- a/include/linux/kernel.h
+++ b/include/linux/kernel.h
@@ -2,7 +2,7 @@
 #ifndef _LINUX_KERNEL_H
 #define _LINUX_KERNEL_H
 
-#include <stdarg.h>
+#include <linux/stdarg.h>
 #include <linux/align.h>
 #include <linux/limits.h>
 #include <linux/linkage.h>
diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h
index ae7735b490b4..041ca7f15ea4 100644
--- a/include/linux/kvm_host.h
+++ b/include/linux/kvm_host.h
@@ -150,6 +150,7 @@ static inline bool is_error_page(struct page *page)
 #define KVM_REQ_MMU_RELOAD        (1 | KVM_REQUEST_WAIT | KVM_REQUEST_NO_WAKEUP)
 #define KVM_REQ_UNBLOCK           2
 #define KVM_REQ_UNHALT            3
+#define KVM_REQ_VM_BUGGED         (4 | KVM_REQUEST_WAIT | KVM_REQUEST_NO_WAKEUP)
 #define KVM_REQUEST_ARCH_BASE     8
 
 #define KVM_ARCH_REQ_FLAGS(nr, flags) ({ \
@@ -158,6 +159,15 @@ static inline bool is_error_page(struct page *page)
 })
 #define KVM_ARCH_REQ(nr)           KVM_ARCH_REQ_FLAGS(nr, 0)
 
+bool kvm_make_vcpus_request_mask(struct kvm *kvm, unsigned int req,
+				 struct kvm_vcpu *except,
+				 unsigned long *vcpu_bitmap, cpumask_var_t tmp);
+bool kvm_make_all_cpus_request(struct kvm *kvm, unsigned int req);
+bool kvm_make_all_cpus_request_except(struct kvm *kvm, unsigned int req,
+				      struct kvm_vcpu *except);
+bool kvm_make_cpus_request_mask(struct kvm *kvm, unsigned int req,
+				unsigned long *vcpu_bitmap);
+
 #define KVM_USERSPACE_IRQ_SOURCE_ID		0
 #define KVM_IRQFD_RESAMPLE_IRQ_SOURCE_ID	1
 
@@ -344,6 +354,13 @@ struct kvm_vcpu {
 	struct kvm_vcpu_stat stat;
 	char stats_id[KVM_STATS_NAME_SIZE];
 	struct kvm_dirty_ring dirty_ring;
+
+	/*
+	 * The index of the most recently used memslot by this vCPU. It's ok
+	 * if this becomes stale due to memslot changes since we always check
+	 * it is a valid slot.
+	 */
+	int last_used_slot;
 };
 
 /* must be called with irqs disabled */
@@ -512,7 +529,7 @@ struct kvm_memslots {
 	u64 generation;
 	/* The mapping table from slot id to the index in memslots[]. */
 	short id_to_index[KVM_MEM_SLOTS_NUM];
-	atomic_t lru_slot;
+	atomic_t last_used_slot;
 	int used_slots;
 	struct kvm_memory_slot memslots[];
 };
@@ -538,6 +555,11 @@ struct kvm {
 	struct kvm_memslots __rcu *memslots[KVM_ADDRESS_SPACE_NUM];
 	struct kvm_vcpu *vcpus[KVM_MAX_VCPUS];
 
+	/* Used to wait for completion of MMU notifiers.  */
+	spinlock_t mn_invalidate_lock;
+	unsigned long mn_active_invalidate_count;
+	struct rcuwait mn_memslots_update_rcuwait;
+
 	/*
 	 * created_vcpus is protected by kvm->lock, and is incremented
 	 * at the beginning of KVM_CREATE_VCPU.  online_vcpus is only
@@ -596,6 +618,7 @@ struct kvm {
 	pid_t userspace_pid;
 	unsigned int max_halt_poll_ns;
 	u32 dirty_ring_size;
+	bool vm_bugged;
 
 #ifdef CONFIG_HAVE_KVM_PM_NOTIFIER
 	struct notifier_block pm_notifier;
@@ -629,6 +652,30 @@ struct kvm {
 #define vcpu_err(vcpu, fmt, ...)					\
 	kvm_err("vcpu%i " fmt, (vcpu)->vcpu_id, ## __VA_ARGS__)
 
+static inline void kvm_vm_bugged(struct kvm *kvm)
+{
+	kvm->vm_bugged = true;
+	kvm_make_all_cpus_request(kvm, KVM_REQ_VM_BUGGED);
+}
+
+#define KVM_BUG(cond, kvm, fmt...)				\
+({								\
+	int __ret = (cond);					\
+								\
+	if (WARN_ONCE(__ret && !(kvm)->vm_bugged, fmt))		\
+		kvm_vm_bugged(kvm);				\
+	unlikely(__ret);					\
+})
+
+#define KVM_BUG_ON(cond, kvm)					\
+({								\
+	int __ret = (cond);					\
+								\
+	if (WARN_ON_ONCE(__ret && !(kvm)->vm_bugged))		\
+		kvm_vm_bugged(kvm);				\
+	unlikely(__ret);					\
+})
+
 static inline bool kvm_dirty_log_manual_protect_and_init_set(struct kvm *kvm)
 {
 	return !!(kvm->manual_dirty_log_protect & KVM_DIRTY_LOG_INITIALLY_SET);
@@ -720,6 +767,7 @@ int kvm_init(void *opaque, unsigned vcpu_size, unsigned vcpu_align,
 void kvm_exit(void);
 
 void kvm_get_kvm(struct kvm *kvm);
+bool kvm_get_kvm_safe(struct kvm *kvm);
 void kvm_put_kvm(struct kvm *kvm);
 bool file_is_kvm(struct file *file);
 void kvm_put_kvm_no_destroy(struct kvm *kvm);
@@ -824,7 +872,6 @@ void kvm_release_pfn_clean(kvm_pfn_t pfn);
 void kvm_release_pfn_dirty(kvm_pfn_t pfn);
 void kvm_set_pfn_dirty(kvm_pfn_t pfn);
 void kvm_set_pfn_accessed(kvm_pfn_t pfn);
-void kvm_get_pfn(kvm_pfn_t pfn);
 
 void kvm_release_pfn(kvm_pfn_t pfn, bool dirty, struct gfn_to_pfn_cache *cache);
 int kvm_read_guest_page(struct kvm *kvm, gfn_t gfn, void *data, int offset,
@@ -943,14 +990,10 @@ void kvm_mmu_free_memory_cache(struct kvm_mmu_memory_cache *mc);
 void *kvm_mmu_memory_cache_alloc(struct kvm_mmu_memory_cache *mc);
 #endif
 
-bool kvm_make_vcpus_request_mask(struct kvm *kvm, unsigned int req,
-				 struct kvm_vcpu *except,
-				 unsigned long *vcpu_bitmap, cpumask_var_t tmp);
-bool kvm_make_all_cpus_request(struct kvm *kvm, unsigned int req);
-bool kvm_make_all_cpus_request_except(struct kvm *kvm, unsigned int req,
-				      struct kvm_vcpu *except);
-bool kvm_make_cpus_request_mask(struct kvm *kvm, unsigned int req,
-				unsigned long *vcpu_bitmap);
+void kvm_inc_notifier_count(struct kvm *kvm, unsigned long start,
+				   unsigned long end);
+void kvm_dec_notifier_count(struct kvm *kvm, unsigned long start,
+				   unsigned long end);
 
 long kvm_arch_dev_ioctl(struct file *filp,
 			unsigned int ioctl, unsigned long arg);
@@ -1034,6 +1077,7 @@ bool kvm_arch_dy_runnable(struct kvm_vcpu *vcpu);
 bool kvm_arch_dy_has_pending_interrupt(struct kvm_vcpu *vcpu);
 int kvm_arch_post_init_vm(struct kvm *kvm);
 void kvm_arch_pre_destroy_vm(struct kvm *kvm);
+int kvm_arch_create_vm_debugfs(struct kvm *kvm);
 
 #ifndef __KVM_HAVE_ARCH_VM_ALLOC
 /*
@@ -1157,29 +1201,49 @@ void kvm_free_irq_source_id(struct kvm *kvm, int irq_source_id);
 bool kvm_arch_irqfd_allowed(struct kvm *kvm, struct kvm_irqfd *args);
 
 /*
- * search_memslots() and __gfn_to_memslot() are here because they are
- * used in non-modular code in arch/powerpc/kvm/book3s_hv_rm_mmu.c.
- * gfn_to_memslot() itself isn't here as an inline because that would
- * bloat other code too much.
+ * Returns a pointer to the memslot at slot_index if it contains gfn.
+ * Otherwise returns NULL.
+ */
+static inline struct kvm_memory_slot *
+try_get_memslot(struct kvm_memslots *slots, int slot_index, gfn_t gfn)
+{
+	struct kvm_memory_slot *slot;
+
+	if (slot_index < 0 || slot_index >= slots->used_slots)
+		return NULL;
+
+	/*
+	 * slot_index can come from vcpu->last_used_slot which is not kept
+	 * in sync with userspace-controllable memslot deletion. So use nospec
+	 * to prevent the CPU from speculating past the end of memslots[].
+	 */
+	slot_index = array_index_nospec(slot_index, slots->used_slots);
+	slot = &slots->memslots[slot_index];
+
+	if (gfn >= slot->base_gfn && gfn < slot->base_gfn + slot->npages)
+		return slot;
+	else
+		return NULL;
+}
+
+/*
+ * Returns a pointer to the memslot that contains gfn and records the index of
+ * the slot in index. Otherwise returns NULL.
  *
  * IMPORTANT: Slots are sorted from highest GFN to lowest GFN!
  */
 static inline struct kvm_memory_slot *
-search_memslots(struct kvm_memslots *slots, gfn_t gfn)
+search_memslots(struct kvm_memslots *slots, gfn_t gfn, int *index)
 {
 	int start = 0, end = slots->used_slots;
-	int slot = atomic_read(&slots->lru_slot);
 	struct kvm_memory_slot *memslots = slots->memslots;
+	struct kvm_memory_slot *slot;
 
 	if (unlikely(!slots->used_slots))
 		return NULL;
 
-	if (gfn >= memslots[slot].base_gfn &&
-	    gfn < memslots[slot].base_gfn + memslots[slot].npages)
-		return &memslots[slot];
-
 	while (start < end) {
-		slot = start + (end - start) / 2;
+		int slot = start + (end - start) / 2;
 
 		if (gfn >= memslots[slot].base_gfn)
 			end = slot;
@@ -1187,19 +1251,37 @@ search_memslots(struct kvm_memslots *slots, gfn_t gfn)
 			start = slot + 1;
 	}
 
-	if (start < slots->used_slots && gfn >= memslots[start].base_gfn &&
-	    gfn < memslots[start].base_gfn + memslots[start].npages) {
-		atomic_set(&slots->lru_slot, start);
-		return &memslots[start];
+	slot = try_get_memslot(slots, start, gfn);
+	if (slot) {
+		*index = start;
+		return slot;
 	}
 
 	return NULL;
 }
 
+/*
+ * __gfn_to_memslot() and its descendants are here because it is called from
+ * non-modular code in arch/powerpc/kvm/book3s_64_vio{,_hv}.c. gfn_to_memslot()
+ * itself isn't here as an inline because that would bloat other code too much.
+ */
 static inline struct kvm_memory_slot *
 __gfn_to_memslot(struct kvm_memslots *slots, gfn_t gfn)
 {
-	return search_memslots(slots, gfn);
+	struct kvm_memory_slot *slot;
+	int slot_index = atomic_read(&slots->last_used_slot);
+
+	slot = try_get_memslot(slots, slot_index, gfn);
+	if (slot)
+		return slot;
+
+	slot = search_memslots(slots, gfn, &slot_index);
+	if (slot) {
+		atomic_set(&slots->last_used_slot, slot_index);
+		return slot;
+	}
+
+	return NULL;
 }
 
 static inline unsigned long
@@ -1273,56 +1355,66 @@ struct _kvm_stats_desc {
 	char name[KVM_STATS_NAME_SIZE];
 };
 
-#define STATS_DESC_COMMON(type, unit, base, exp)			       \
+#define STATS_DESC_COMMON(type, unit, base, exp, sz, bsz)		       \
 	.flags = type | unit | base |					       \
 		 BUILD_BUG_ON_ZERO(type & ~KVM_STATS_TYPE_MASK) |	       \
 		 BUILD_BUG_ON_ZERO(unit & ~KVM_STATS_UNIT_MASK) |	       \
 		 BUILD_BUG_ON_ZERO(base & ~KVM_STATS_BASE_MASK),	       \
 	.exponent = exp,						       \
-	.size = 1
+	.size = sz,							       \
+	.bucket_size = bsz
 
-#define VM_GENERIC_STATS_DESC(stat, type, unit, base, exp)		       \
+#define VM_GENERIC_STATS_DESC(stat, type, unit, base, exp, sz, bsz)	       \
 	{								       \
 		{							       \
-			STATS_DESC_COMMON(type, unit, base, exp),	       \
+			STATS_DESC_COMMON(type, unit, base, exp, sz, bsz),     \
 			.offset = offsetof(struct kvm_vm_stat, generic.stat)   \
 		},							       \
 		.name = #stat,						       \
 	}
-#define VCPU_GENERIC_STATS_DESC(stat, type, unit, base, exp)		       \
+#define VCPU_GENERIC_STATS_DESC(stat, type, unit, base, exp, sz, bsz)	       \
 	{								       \
 		{							       \
-			STATS_DESC_COMMON(type, unit, base, exp),	       \
+			STATS_DESC_COMMON(type, unit, base, exp, sz, bsz),     \
 			.offset = offsetof(struct kvm_vcpu_stat, generic.stat) \
 		},							       \
 		.name = #stat,						       \
 	}
-#define VM_STATS_DESC(stat, type, unit, base, exp)			       \
+#define VM_STATS_DESC(stat, type, unit, base, exp, sz, bsz)		       \
 	{								       \
 		{							       \
-			STATS_DESC_COMMON(type, unit, base, exp),	       \
+			STATS_DESC_COMMON(type, unit, base, exp, sz, bsz),     \
 			.offset = offsetof(struct kvm_vm_stat, stat)	       \
 		},							       \
 		.name = #stat,						       \
 	}
-#define VCPU_STATS_DESC(stat, type, unit, base, exp)			       \
+#define VCPU_STATS_DESC(stat, type, unit, base, exp, sz, bsz)		       \
 	{								       \
 		{							       \
-			STATS_DESC_COMMON(type, unit, base, exp),	       \
+			STATS_DESC_COMMON(type, unit, base, exp, sz, bsz),     \
 			.offset = offsetof(struct kvm_vcpu_stat, stat)	       \
 		},							       \
 		.name = #stat,						       \
 	}
 /* SCOPE: VM, VM_GENERIC, VCPU, VCPU_GENERIC */
-#define STATS_DESC(SCOPE, stat, type, unit, base, exp)			       \
-	SCOPE##_STATS_DESC(stat, type, unit, base, exp)
+#define STATS_DESC(SCOPE, stat, type, unit, base, exp, sz, bsz)		       \
+	SCOPE##_STATS_DESC(stat, type, unit, base, exp, sz, bsz)
 
 #define STATS_DESC_CUMULATIVE(SCOPE, name, unit, base, exponent)	       \
-	STATS_DESC(SCOPE, name, KVM_STATS_TYPE_CUMULATIVE, unit, base, exponent)
+	STATS_DESC(SCOPE, name, KVM_STATS_TYPE_CUMULATIVE,		       \
+		unit, base, exponent, 1, 0)
 #define STATS_DESC_INSTANT(SCOPE, name, unit, base, exponent)		       \
-	STATS_DESC(SCOPE, name, KVM_STATS_TYPE_INSTANT, unit, base, exponent)
+	STATS_DESC(SCOPE, name, KVM_STATS_TYPE_INSTANT,			       \
+		unit, base, exponent, 1, 0)
 #define STATS_DESC_PEAK(SCOPE, name, unit, base, exponent)		       \
-	STATS_DESC(SCOPE, name, KVM_STATS_TYPE_PEAK, unit, base, exponent)
+	STATS_DESC(SCOPE, name, KVM_STATS_TYPE_PEAK,			       \
+		unit, base, exponent, 1, 0)
+#define STATS_DESC_LINEAR_HIST(SCOPE, name, unit, base, exponent, sz, bsz)     \
+	STATS_DESC(SCOPE, name, KVM_STATS_TYPE_LINEAR_HIST,		       \
+		unit, base, exponent, sz, bsz)
+#define STATS_DESC_LOG_HIST(SCOPE, name, unit, base, exponent, sz)	       \
+	STATS_DESC(SCOPE, name, KVM_STATS_TYPE_LOG_HIST,		       \
+		unit, base, exponent, sz, 0)
 
 /* Cumulative counter, read/write */
 #define STATS_DESC_COUNTER(SCOPE, name)					       \
@@ -1341,9 +1433,18 @@ struct _kvm_stats_desc {
 #define STATS_DESC_TIME_NSEC(SCOPE, name)				       \
 	STATS_DESC_CUMULATIVE(SCOPE, name, KVM_STATS_UNIT_SECONDS,	       \
 		KVM_STATS_BASE_POW10, -9)
+/* Linear histogram for time in nanosecond */
+#define STATS_DESC_LINHIST_TIME_NSEC(SCOPE, name, sz, bsz)		       \
+	STATS_DESC_LINEAR_HIST(SCOPE, name, KVM_STATS_UNIT_SECONDS,	       \
+		KVM_STATS_BASE_POW10, -9, sz, bsz)
+/* Logarithmic histogram for time in nanosecond */
+#define STATS_DESC_LOGHIST_TIME_NSEC(SCOPE, name, sz)			       \
+	STATS_DESC_LOG_HIST(SCOPE, name, KVM_STATS_UNIT_SECONDS,	       \
+		KVM_STATS_BASE_POW10, -9, sz)
 
 #define KVM_GENERIC_VM_STATS()						       \
-	STATS_DESC_COUNTER(VM_GENERIC, remote_tlb_flush)
+	STATS_DESC_COUNTER(VM_GENERIC, remote_tlb_flush),		       \
+	STATS_DESC_COUNTER(VM_GENERIC, remote_tlb_flush_requests)
 
 #define KVM_GENERIC_VCPU_STATS()					       \
 	STATS_DESC_COUNTER(VCPU_GENERIC, halt_successful_poll),		       \
@@ -1351,13 +1452,62 @@ struct _kvm_stats_desc {
 	STATS_DESC_COUNTER(VCPU_GENERIC, halt_poll_invalid),		       \
 	STATS_DESC_COUNTER(VCPU_GENERIC, halt_wakeup),			       \
 	STATS_DESC_TIME_NSEC(VCPU_GENERIC, halt_poll_success_ns),	       \
-	STATS_DESC_TIME_NSEC(VCPU_GENERIC, halt_poll_fail_ns)
+	STATS_DESC_TIME_NSEC(VCPU_GENERIC, halt_poll_fail_ns),		       \
+	STATS_DESC_TIME_NSEC(VCPU_GENERIC, halt_wait_ns),		       \
+	STATS_DESC_LOGHIST_TIME_NSEC(VCPU_GENERIC, halt_poll_success_hist,     \
+			HALT_POLL_HIST_COUNT),				       \
+	STATS_DESC_LOGHIST_TIME_NSEC(VCPU_GENERIC, halt_poll_fail_hist,	       \
+			HALT_POLL_HIST_COUNT),				       \
+	STATS_DESC_LOGHIST_TIME_NSEC(VCPU_GENERIC, halt_wait_hist,	       \
+			HALT_POLL_HIST_COUNT)
 
 extern struct dentry *kvm_debugfs_dir;
+
 ssize_t kvm_stats_read(char *id, const struct kvm_stats_header *header,
 		       const struct _kvm_stats_desc *desc,
 		       void *stats, size_t size_stats,
 		       char __user *user_buffer, size_t size, loff_t *offset);
+
+/**
+ * kvm_stats_linear_hist_update() - Update bucket value for linear histogram
+ * statistics data.
+ *
+ * @data: start address of the stats data
+ * @size: the number of bucket of the stats data
+ * @value: the new value used to update the linear histogram's bucket
+ * @bucket_size: the size (width) of a bucket
+ */
+static inline void kvm_stats_linear_hist_update(u64 *data, size_t size,
+						u64 value, size_t bucket_size)
+{
+	size_t index = div64_u64(value, bucket_size);
+
+	index = min(index, size - 1);
+	++data[index];
+}
+
+/**
+ * kvm_stats_log_hist_update() - Update bucket value for logarithmic histogram
+ * statistics data.
+ *
+ * @data: start address of the stats data
+ * @size: the number of bucket of the stats data
+ * @value: the new value used to update the logarithmic histogram's bucket
+ */
+static inline void kvm_stats_log_hist_update(u64 *data, size_t size, u64 value)
+{
+	size_t index = fls64(value);
+
+	index = min(index, size - 1);
+	++data[index];
+}
+
+#define KVM_STATS_LINEAR_HIST_UPDATE(array, value, bsize)		       \
+	kvm_stats_linear_hist_update(array, ARRAY_SIZE(array), value, bsize)
+#define KVM_STATS_LOG_HIST_UPDATE(array, value)				       \
+	kvm_stats_log_hist_update(array, ARRAY_SIZE(array), value)
+
+
 extern const struct kvm_stats_header kvm_vm_stats_header;
 extern const struct _kvm_stats_desc kvm_vm_stats_desc[];
 extern const struct kvm_stats_header kvm_vcpu_stats_header;
diff --git a/include/linux/kvm_types.h b/include/linux/kvm_types.h
index ed6a985c5680..2237abb93ccd 100644
--- a/include/linux/kvm_types.h
+++ b/include/linux/kvm_types.h
@@ -76,8 +76,11 @@ struct kvm_mmu_memory_cache {
 };
 #endif
 
+#define HALT_POLL_HIST_COUNT			32
+
 struct kvm_vm_stat_generic {
 	u64 remote_tlb_flush;
+	u64 remote_tlb_flush_requests;
 };
 
 struct kvm_vcpu_stat_generic {
@@ -87,6 +90,10 @@ struct kvm_vcpu_stat_generic {
 	u64 halt_wakeup;
 	u64 halt_poll_success_ns;
 	u64 halt_poll_fail_ns;
+	u64 halt_wait_ns;
+	u64 halt_poll_success_hist[HALT_POLL_HIST_COUNT];
+	u64 halt_poll_fail_hist[HALT_POLL_HIST_COUNT];
+	u64 halt_wait_hist[HALT_POLL_HIST_COUNT];
 };
 
 #define KVM_STATS_NAME_SIZE	48
diff --git a/include/linux/libata.h b/include/linux/libata.h
index 860e63f5667b..c0c64f03e107 100644
--- a/include/linux/libata.h
+++ b/include/linux/libata.h
@@ -426,6 +426,7 @@ enum {
 	ATA_HORKAGE_NOTRIM	= (1 << 24),	/* don't use TRIM */
 	ATA_HORKAGE_MAX_SEC_1024 = (1 << 25),	/* Limit max sects to 1024 */
 	ATA_HORKAGE_MAX_TRIM_128M = (1 << 26),	/* Limit max trim size to 128M */
+	ATA_HORKAGE_NO_NCQ_ON_ATI = (1 << 27),	/* Disable NCQ on ATI chipset */
 
 	 /* DMA mask for user DMA control: User visible values; DO NOT
 	    renumber */
diff --git a/include/linux/mISDNif.h b/include/linux/mISDNif.h
index a7330eb3ec64..7dd1f01ec4f9 100644
--- a/include/linux/mISDNif.h
+++ b/include/linux/mISDNif.h
@@ -18,7 +18,6 @@
 #ifndef mISDNIF_H
 #define mISDNIF_H
 
-#include <stdarg.h>
 #include <linux/types.h>
 #include <linux/errno.h>
 #include <linux/socket.h>
diff --git a/include/linux/mdev.h b/include/linux/mdev.h
index 3a38598c2605..68427e8fadeb 100644
--- a/include/linux/mdev.h
+++ b/include/linux/mdev.h
@@ -72,11 +72,6 @@ struct device *mtype_get_parent_dev(struct mdev_type *mtype);
  *			@mdev: mdev_device device structure which is being
  *			       destroyed
  *			Returns integer: success (0) or error (< 0)
- * @open:		Open mediated device.
- *			@mdev: mediated device.
- *			Returns integer: success (0) or error (< 0)
- * @release:		release mediated device
- *			@mdev: mediated device.
  * @read:		Read emulation callback
  *			@mdev: mediated device structure
  *			@buf: read buffer
@@ -111,8 +106,8 @@ struct mdev_parent_ops {
 
 	int     (*create)(struct mdev_device *mdev);
 	int     (*remove)(struct mdev_device *mdev);
-	int     (*open)(struct mdev_device *mdev);
-	void    (*release)(struct mdev_device *mdev);
+	int     (*open_device)(struct mdev_device *mdev);
+	void    (*close_device)(struct mdev_device *mdev);
 	ssize_t (*read)(struct mdev_device *mdev, char __user *buf,
 			size_t count, loff_t *ppos);
 	ssize_t (*write)(struct mdev_device *mdev, const char __user *buf,
diff --git a/include/linux/memblock.h b/include/linux/memblock.h
index 4a53c3ca86bd..b066024c62e3 100644
--- a/include/linux/memblock.h
+++ b/include/linux/memblock.h
@@ -99,8 +99,6 @@ void memblock_discard(void);
 static inline void memblock_discard(void) {}
 #endif
 
-phys_addr_t memblock_find_in_range(phys_addr_t start, phys_addr_t end,
-				   phys_addr_t size, phys_addr_t align);
 void memblock_allow_resize(void);
 int memblock_add_node(phys_addr_t base, phys_addr_t size, int nid);
 int memblock_add(phys_addr_t base, phys_addr_t size);
diff --git a/include/linux/memcontrol.h b/include/linux/memcontrol.h
index 20151c4f1e0e..3096c9a0ee01 100644
--- a/include/linux/memcontrol.h
+++ b/include/linux/memcontrol.h
@@ -105,14 +105,6 @@ struct mem_cgroup_reclaim_iter {
 	unsigned int generation;
 };
 
-struct lruvec_stat {
-	long count[NR_VM_NODE_STAT_ITEMS];
-};
-
-struct batched_lruvec_stat {
-	s32 count[NR_VM_NODE_STAT_ITEMS];
-};
-
 /*
  * Bitmap and deferred work of shrinker::id corresponding to memcg-aware
  * shrinkers, which have elements charged to this memcg.
@@ -123,24 +115,30 @@ struct shrinker_info {
 	unsigned long *map;
 };
 
+struct lruvec_stats_percpu {
+	/* Local (CPU and cgroup) state */
+	long state[NR_VM_NODE_STAT_ITEMS];
+
+	/* Delta calculation for lockless upward propagation */
+	long state_prev[NR_VM_NODE_STAT_ITEMS];
+};
+
+struct lruvec_stats {
+	/* Aggregated (CPU and subtree) state */
+	long state[NR_VM_NODE_STAT_ITEMS];
+
+	/* Pending child counts during tree propagation */
+	long state_pending[NR_VM_NODE_STAT_ITEMS];
+};
+
 /*
  * per-node information in memory controller.
  */
 struct mem_cgroup_per_node {
 	struct lruvec		lruvec;
 
-	/*
-	 * Legacy local VM stats. This should be struct lruvec_stat and
-	 * cannot be optimized to struct batched_lruvec_stat. Because
-	 * the threshold of the lruvec_stat_cpu can be as big as
-	 * MEMCG_CHARGE_BATCH * PAGE_SIZE. It can fit into s32. But this
-	 * filed has no upper limit.
-	 */
-	struct lruvec_stat __percpu *lruvec_stat_local;
-
-	/* Subtree VM stats (batched updates) */
-	struct batched_lruvec_stat __percpu *lruvec_stat_cpu;
-	atomic_long_t		lruvec_stat[NR_VM_NODE_STAT_ITEMS];
+	struct lruvec_stats_percpu __percpu	*lruvec_stats_percpu;
+	struct lruvec_stats			lruvec_stats;
 
 	unsigned long		lru_zone_size[MAX_NR_ZONES][NR_LRU_LISTS];
 
@@ -595,13 +593,6 @@ static inline struct obj_cgroup **page_objcgs_check(struct page *page)
 }
 #endif
 
-static __always_inline bool memcg_stat_item_in_bytes(int idx)
-{
-	if (idx == MEMCG_PERCPU_B)
-		return true;
-	return vmstat_item_in_bytes(idx);
-}
-
 static inline bool mem_cgroup_is_root(struct mem_cgroup *memcg)
 {
 	return (memcg == root_mem_cgroup);
@@ -693,13 +684,35 @@ static inline bool mem_cgroup_below_min(struct mem_cgroup *memcg)
 		page_counter_read(&memcg->memory);
 }
 
-int mem_cgroup_charge(struct page *page, struct mm_struct *mm, gfp_t gfp_mask);
+int __mem_cgroup_charge(struct page *page, struct mm_struct *mm,
+			gfp_t gfp_mask);
+static inline int mem_cgroup_charge(struct page *page, struct mm_struct *mm,
+				    gfp_t gfp_mask)
+{
+	if (mem_cgroup_disabled())
+		return 0;
+	return __mem_cgroup_charge(page, mm, gfp_mask);
+}
+
 int mem_cgroup_swapin_charge_page(struct page *page, struct mm_struct *mm,
 				  gfp_t gfp, swp_entry_t entry);
 void mem_cgroup_swapin_uncharge_swap(swp_entry_t entry);
 
-void mem_cgroup_uncharge(struct page *page);
-void mem_cgroup_uncharge_list(struct list_head *page_list);
+void __mem_cgroup_uncharge(struct page *page);
+static inline void mem_cgroup_uncharge(struct page *page)
+{
+	if (mem_cgroup_disabled())
+		return;
+	__mem_cgroup_uncharge(page);
+}
+
+void __mem_cgroup_uncharge_list(struct list_head *page_list);
+static inline void mem_cgroup_uncharge_list(struct list_head *page_list)
+{
+	if (mem_cgroup_disabled())
+		return;
+	__mem_cgroup_uncharge_list(page_list);
+}
 
 void mem_cgroup_migrate(struct page *oldpage, struct page *newpage);
 
@@ -884,11 +897,6 @@ static inline bool mem_cgroup_online(struct mem_cgroup *memcg)
 	return !!(memcg->css.flags & CSS_ONLINE);
 }
 
-/*
- * For memory reclaim.
- */
-int mem_cgroup_select_victim_node(struct mem_cgroup *memcg);
-
 void mem_cgroup_update_lru_size(struct lruvec *lruvec, enum lru_list lru,
 		int zid, int nr_pages);
 
@@ -955,22 +963,21 @@ static inline void mod_memcg_state(struct mem_cgroup *memcg,
 	local_irq_restore(flags);
 }
 
+static inline unsigned long memcg_page_state(struct mem_cgroup *memcg, int idx)
+{
+	return READ_ONCE(memcg->vmstats.state[idx]);
+}
+
 static inline unsigned long lruvec_page_state(struct lruvec *lruvec,
 					      enum node_stat_item idx)
 {
 	struct mem_cgroup_per_node *pn;
-	long x;
 
 	if (mem_cgroup_disabled())
 		return node_page_state(lruvec_pgdat(lruvec), idx);
 
 	pn = container_of(lruvec, struct mem_cgroup_per_node, lruvec);
-	x = atomic_long_read(&pn->lruvec_stat[idx]);
-#ifdef CONFIG_SMP
-	if (x < 0)
-		x = 0;
-#endif
-	return x;
+	return READ_ONCE(pn->lruvec_stats.state[idx]);
 }
 
 static inline unsigned long lruvec_page_state_local(struct lruvec *lruvec,
@@ -985,7 +992,7 @@ static inline unsigned long lruvec_page_state_local(struct lruvec *lruvec,
 
 	pn = container_of(lruvec, struct mem_cgroup_per_node, lruvec);
 	for_each_possible_cpu(cpu)
-		x += per_cpu(pn->lruvec_stat_local->count[idx], cpu);
+		x += per_cpu(pn->lruvec_stats_percpu->state[idx], cpu);
 #ifdef CONFIG_SMP
 	if (x < 0)
 		x = 0;
@@ -993,6 +1000,8 @@ static inline unsigned long lruvec_page_state_local(struct lruvec *lruvec,
 	return x;
 }
 
+void mem_cgroup_flush_stats(void);
+
 void __mod_memcg_lruvec_state(struct lruvec *lruvec, enum node_stat_item idx,
 			      int val);
 void __mod_lruvec_kmem_state(void *p, enum node_stat_item idx, int val);
@@ -1391,6 +1400,11 @@ static inline void mod_memcg_state(struct mem_cgroup *memcg,
 {
 }
 
+static inline unsigned long memcg_page_state(struct mem_cgroup *memcg, int idx)
+{
+	return 0;
+}
+
 static inline unsigned long lruvec_page_state(struct lruvec *lruvec,
 					      enum node_stat_item idx)
 {
@@ -1403,6 +1417,10 @@ static inline unsigned long lruvec_page_state_local(struct lruvec *lruvec,
 	return node_page_state(lruvec_pgdat(lruvec), idx);
 }
 
+static inline void mem_cgroup_flush_stats(void)
+{
+}
+
 static inline void __mod_memcg_lruvec_state(struct lruvec *lruvec,
 					    enum node_stat_item idx, int val)
 {
diff --git a/include/linux/memory.h b/include/linux/memory.h
index 97e92e8b556a..7efc0a7c14c9 100644
--- a/include/linux/memory.h
+++ b/include/linux/memory.h
@@ -23,6 +23,48 @@
 
 #define MIN_MEMORY_BLOCK_SIZE     (1UL << SECTION_SIZE_BITS)
 
+/**
+ * struct memory_group - a logical group of memory blocks
+ * @nid: The node id for all memory blocks inside the memory group.
+ * @blocks: List of all memory blocks belonging to this memory group.
+ * @present_kernel_pages: Present (online) memory outside ZONE_MOVABLE of this
+ *			  memory group.
+ * @present_movable_pages: Present (online) memory in ZONE_MOVABLE of this
+ *			   memory group.
+ * @is_dynamic: The memory group type: static vs. dynamic
+ * @s.max_pages: Valid with &memory_group.is_dynamic == false. The maximum
+ *		 number of pages we'll have in this static memory group.
+ * @d.unit_pages: Valid with &memory_group.is_dynamic == true. Unit in pages
+ *		  in which memory is added/removed in this dynamic memory group.
+ *		  This granularity defines the alignment of a unit in physical
+ *		  address space; it has to be at least as big as a single
+ *		  memory block.
+ *
+ * A memory group logically groups memory blocks; each memory block
+ * belongs to at most one memory group. A memory group corresponds to
+ * a memory device, such as a DIMM or a NUMA node, which spans multiple
+ * memory blocks and might even span multiple non-contiguous physical memory
+ * ranges.
+ *
+ * Modification of members after registration is serialized by memory
+ * hot(un)plug code.
+ */
+struct memory_group {
+	int nid;
+	struct list_head memory_blocks;
+	unsigned long present_kernel_pages;
+	unsigned long present_movable_pages;
+	bool is_dynamic;
+	union {
+		struct {
+			unsigned long max_pages;
+		} s;
+		struct {
+			unsigned long unit_pages;
+		} d;
+	};
+};
+
 struct memory_block {
 	unsigned long start_section_nr;
 	unsigned long state;		/* serialized by the dev->lock */
@@ -34,6 +76,8 @@ struct memory_block {
 	 * lay at the beginning of the memory block.
 	 */
 	unsigned long nr_vmemmap_pages;
+	struct memory_group *group;	/* group (if any) for this block */
+	struct list_head group_next;	/* next block inside memory group */
 };
 
 int arch_get_memory_phys_device(unsigned long start_pfn);
@@ -86,16 +130,25 @@ static inline int memory_notify(unsigned long val, void *v)
 extern int register_memory_notifier(struct notifier_block *nb);
 extern void unregister_memory_notifier(struct notifier_block *nb);
 int create_memory_block_devices(unsigned long start, unsigned long size,
-				unsigned long vmemmap_pages);
+				unsigned long vmemmap_pages,
+				struct memory_group *group);
 void remove_memory_block_devices(unsigned long start, unsigned long size);
 extern void memory_dev_init(void);
 extern int memory_notify(unsigned long val, void *v);
-extern struct memory_block *find_memory_block(struct mem_section *);
+extern struct memory_block *find_memory_block(unsigned long section_nr);
 typedef int (*walk_memory_blocks_func_t)(struct memory_block *, void *);
 extern int walk_memory_blocks(unsigned long start, unsigned long size,
 			      void *arg, walk_memory_blocks_func_t func);
 extern int for_each_memory_block(void *arg, walk_memory_blocks_func_t func);
 #define CONFIG_MEM_BLOCK_SIZE	(PAGES_PER_SECTION<<PAGE_SHIFT)
+
+extern int memory_group_register_static(int nid, unsigned long max_pages);
+extern int memory_group_register_dynamic(int nid, unsigned long unit_pages);
+extern int memory_group_unregister(int mgid);
+struct memory_group *memory_group_find_by_id(int mgid);
+typedef int (*walk_memory_groups_func_t)(struct memory_group *, void *);
+int walk_dynamic_memory_groups(int nid, walk_memory_groups_func_t func,
+			       struct memory_group *excluded, void *arg);
 #endif /* CONFIG_MEMORY_HOTPLUG_SPARSE */
 
 #ifdef CONFIG_MEMORY_HOTPLUG
diff --git a/include/linux/memory_hotplug.h b/include/linux/memory_hotplug.h
index a7fd2c3ccb77..e5a867c950b2 100644
--- a/include/linux/memory_hotplug.h
+++ b/include/linux/memory_hotplug.h
@@ -12,6 +12,7 @@ struct zone;
 struct pglist_data;
 struct mem_section;
 struct memory_block;
+struct memory_group;
 struct resource;
 struct vmem_altmap;
 
@@ -50,6 +51,11 @@ typedef int __bitwise mhp_t;
  * Only selected architectures support it with SPARSE_VMEMMAP.
  */
 #define MHP_MEMMAP_ON_MEMORY   ((__force mhp_t)BIT(1))
+/*
+ * The nid field specifies a memory group id (mgid) instead. The memory group
+ * implies the node id (nid).
+ */
+#define MHP_NID_IS_MGID		((__force mhp_t)BIT(2))
 
 /*
  * Extended parameters for memory hotplug:
@@ -95,13 +101,15 @@ static inline void zone_seqlock_init(struct zone *zone)
 extern int zone_grow_free_lists(struct zone *zone, unsigned long new_nr_pages);
 extern int zone_grow_waitqueues(struct zone *zone, unsigned long nr_pages);
 extern int add_one_highpage(struct page *page, int pfn, int bad_ppro);
-extern void adjust_present_page_count(struct zone *zone, long nr_pages);
+extern void adjust_present_page_count(struct page *page,
+				      struct memory_group *group,
+				      long nr_pages);
 /* VM interface that may be used by firmware interface */
 extern int mhp_init_memmap_on_memory(unsigned long pfn, unsigned long nr_pages,
 				     struct zone *zone);
 extern void mhp_deinit_memmap_on_memory(unsigned long pfn, unsigned long nr_pages);
 extern int online_pages(unsigned long pfn, unsigned long nr_pages,
-			struct zone *zone);
+			struct zone *zone, struct memory_group *group);
 extern struct zone *test_pages_in_a_zone(unsigned long start_pfn,
 					 unsigned long end_pfn);
 extern void __offline_isolated_pages(unsigned long start_pfn,
@@ -130,8 +138,7 @@ static inline bool movable_node_is_enabled(void)
 	return movable_node_enabled;
 }
 
-extern void arch_remove_memory(int nid, u64 start, u64 size,
-			       struct vmem_altmap *altmap);
+extern void arch_remove_memory(u64 start, u64 size, struct vmem_altmap *altmap);
 extern void __remove_pages(unsigned long start_pfn, unsigned long nr_pages,
 			   struct vmem_altmap *altmap);
 
@@ -292,25 +299,27 @@ static inline void pgdat_resize_init(struct pglist_data *pgdat) {}
 #ifdef CONFIG_MEMORY_HOTREMOVE
 
 extern void try_offline_node(int nid);
-extern int offline_pages(unsigned long start_pfn, unsigned long nr_pages);
-extern int remove_memory(int nid, u64 start, u64 size);
-extern void __remove_memory(int nid, u64 start, u64 size);
-extern int offline_and_remove_memory(int nid, u64 start, u64 size);
+extern int offline_pages(unsigned long start_pfn, unsigned long nr_pages,
+			 struct memory_group *group);
+extern int remove_memory(u64 start, u64 size);
+extern void __remove_memory(u64 start, u64 size);
+extern int offline_and_remove_memory(u64 start, u64 size);
 
 #else
 static inline void try_offline_node(int nid) {}
 
-static inline int offline_pages(unsigned long start_pfn, unsigned long nr_pages)
+static inline int offline_pages(unsigned long start_pfn, unsigned long nr_pages,
+				struct memory_group *group)
 {
 	return -EINVAL;
 }
 
-static inline int remove_memory(int nid, u64 start, u64 size)
+static inline int remove_memory(u64 start, u64 size)
 {
 	return -EBUSY;
 }
 
-static inline void __remove_memory(int nid, u64 start, u64 size) {}
+static inline void __remove_memory(u64 start, u64 size) {}
 #endif /* CONFIG_MEMORY_HOTREMOVE */
 
 extern void set_zone_contiguous(struct zone *zone);
@@ -339,7 +348,8 @@ extern void sparse_remove_section(struct mem_section *ms,
 		unsigned long map_offset, struct vmem_altmap *altmap);
 extern struct page *sparse_decode_mem_map(unsigned long coded_mem_map,
 					  unsigned long pnum);
-extern struct zone *zone_for_pfn_range(int online_type, int nid, unsigned start_pfn,
+extern struct zone *zone_for_pfn_range(int online_type, int nid,
+		struct memory_group *group, unsigned long start_pfn,
 		unsigned long nr_pages);
 extern int arch_create_linear_mapping(int nid, u64 start, u64 size,
 				      struct mhp_params *params);
diff --git a/include/linux/mempolicy.h b/include/linux/mempolicy.h
index 0aaf91b496e2..4091692bed8c 100644
--- a/include/linux/mempolicy.h
+++ b/include/linux/mempolicy.h
@@ -184,6 +184,14 @@ extern bool vma_migratable(struct vm_area_struct *vma);
 extern int mpol_misplaced(struct page *, struct vm_area_struct *, unsigned long);
 extern void mpol_put_task_policy(struct task_struct *);
 
+extern bool numa_demotion_enabled;
+
+static inline bool mpol_is_preferred_many(struct mempolicy *pol)
+{
+	return  (pol->mode == MPOL_PREFERRED_MANY);
+}
+
+
 #else
 
 struct mempolicy {};
@@ -292,5 +300,13 @@ static inline nodemask_t *policy_nodemask_current(gfp_t gfp)
 {
 	return NULL;
 }
+
+#define numa_demotion_enabled	false
+
+static inline bool mpol_is_preferred_many(struct mempolicy *pol)
+{
+	return  false;
+}
+
 #endif /* CONFIG_NUMA */
 #endif
diff --git a/include/linux/mfd/dbx500-prcmu.h b/include/linux/mfd/dbx500-prcmu.h
index e6ee2ec35de9..cbf9d7619493 100644
--- a/include/linux/mfd/dbx500-prcmu.h
+++ b/include/linux/mfd/dbx500-prcmu.h
@@ -186,10 +186,11 @@ enum ddr_pwrst {
 #define PRCMU_FW_PROJECT_U8500_C3	8
 #define PRCMU_FW_PROJECT_U8500_C4	9
 #define PRCMU_FW_PROJECT_U9500_MBL	10
-#define PRCMU_FW_PROJECT_U8500_MBL	11 /* Customer specific */
+#define PRCMU_FW_PROJECT_U8500_SSG1	11 /* Samsung specific */
 #define PRCMU_FW_PROJECT_U8500_MBL2	12 /* Customer specific */
 #define PRCMU_FW_PROJECT_U8520		13
 #define PRCMU_FW_PROJECT_U8420		14
+#define PRCMU_FW_PROJECT_U8500_SSG2	15 /* Samsung specific */
 #define PRCMU_FW_PROJECT_U8420_SYSCLK	17
 #define PRCMU_FW_PROJECT_A9420		20
 /* [32..63] 9540 and derivatives */
diff --git a/include/linux/mfd/idt82p33_reg.h b/include/linux/mfd/idt82p33_reg.h
new file mode 100644
index 000000000000..129a6c078221
--- /dev/null
+++ b/include/linux/mfd/idt82p33_reg.h
@@ -0,0 +1,112 @@
+/* SPDX-License-Identifier: GPL-2.0+ */
+/*
+ * Register Map - Based on AN888_SMUforIEEE_SynchEther_82P33xxx_RevH.pdf
+ *
+ * Copyright (C) 2021 Integrated Device Technology, Inc., a Renesas Company.
+ */
+#ifndef HAVE_IDT82P33_REG
+#define HAVE_IDT82P33_REG
+
+/* Register address */
+#define DPLL1_TOD_CNFG 0x134
+#define DPLL2_TOD_CNFG 0x1B4
+
+#define DPLL1_TOD_STS 0x10B
+#define DPLL2_TOD_STS 0x18B
+
+#define DPLL1_TOD_TRIGGER 0x115
+#define DPLL2_TOD_TRIGGER 0x195
+
+#define DPLL1_OPERATING_MODE_CNFG 0x120
+#define DPLL2_OPERATING_MODE_CNFG 0x1A0
+
+#define DPLL1_HOLDOVER_FREQ_CNFG 0x12C
+#define DPLL2_HOLDOVER_FREQ_CNFG 0x1AC
+
+#define DPLL1_PHASE_OFFSET_CNFG 0x143
+#define DPLL2_PHASE_OFFSET_CNFG 0x1C3
+
+#define DPLL1_SYNC_EDGE_CNFG 0x140
+#define DPLL2_SYNC_EDGE_CNFG 0x1C0
+
+#define DPLL1_INPUT_MODE_CNFG 0x116
+#define DPLL2_INPUT_MODE_CNFG 0x196
+
+#define DPLL1_OPERATING_STS 0x102
+#define DPLL2_OPERATING_STS 0x182
+
+#define DPLL1_CURRENT_FREQ_STS 0x103
+#define DPLL2_CURRENT_FREQ_STS 0x183
+
+#define REG_SOFT_RESET 0X381
+
+#define OUT_MUX_CNFG(outn) REG_ADDR(0x6, (0xC * (outn)))
+
+/* Register bit definitions */
+#define SYNC_TOD BIT(1)
+#define PH_OFFSET_EN BIT(7)
+#define SQUELCH_ENABLE BIT(5)
+
+/* Bit definitions for the DPLL_MODE register */
+#define PLL_MODE_SHIFT		(0)
+#define PLL_MODE_MASK		(0x1F)
+#define COMBO_MODE_EN		BIT(5)
+#define COMBO_MODE_SHIFT	(6)
+#define COMBO_MODE_MASK		(0x3)
+
+/* Bit definitions for DPLL_OPERATING_STS register */
+#define OPERATING_STS_MASK	(0x7)
+#define OPERATING_STS_SHIFT	(0x0)
+
+/* Bit definitions for DPLL_TOD_TRIGGER register */
+#define READ_TRIGGER_MASK	(0xF)
+#define READ_TRIGGER_SHIFT	(0x0)
+#define WRITE_TRIGGER_MASK	(0xF0)
+#define WRITE_TRIGGER_SHIFT	(0x4)
+
+/* Bit definitions for REG_SOFT_RESET register */
+#define SOFT_RESET_EN		BIT(7)
+
+enum pll_mode {
+	PLL_MODE_MIN = 0,
+	PLL_MODE_AUTOMATIC = PLL_MODE_MIN,
+	PLL_MODE_FORCE_FREERUN = 1,
+	PLL_MODE_FORCE_HOLDOVER = 2,
+	PLL_MODE_FORCE_LOCKED = 4,
+	PLL_MODE_FORCE_PRE_LOCKED2 = 5,
+	PLL_MODE_FORCE_PRE_LOCKED = 6,
+	PLL_MODE_FORCE_LOST_PHASE = 7,
+	PLL_MODE_DCO = 10,
+	PLL_MODE_WPH = 18,
+	PLL_MODE_MAX = PLL_MODE_WPH,
+};
+
+enum hw_tod_trig_sel {
+	HW_TOD_TRIG_SEL_MIN = 0,
+	HW_TOD_TRIG_SEL_NO_WRITE = HW_TOD_TRIG_SEL_MIN,
+	HW_TOD_TRIG_SEL_NO_READ = HW_TOD_TRIG_SEL_MIN,
+	HW_TOD_TRIG_SEL_SYNC_SEL = 1,
+	HW_TOD_TRIG_SEL_IN12 = 2,
+	HW_TOD_TRIG_SEL_IN13 = 3,
+	HW_TOD_TRIG_SEL_IN14 = 4,
+	HW_TOD_TRIG_SEL_TOD_PPS = 5,
+	HW_TOD_TRIG_SEL_TIMER_INTERVAL = 6,
+	HW_TOD_TRIG_SEL_MSB_PHASE_OFFSET_CNFG = 7,
+	HW_TOD_TRIG_SEL_MSB_HOLDOVER_FREQ_CNFG = 8,
+	HW_TOD_WR_TRIG_SEL_MSB_TOD_CNFG = 9,
+	HW_TOD_RD_TRIG_SEL_LSB_TOD_STS = HW_TOD_WR_TRIG_SEL_MSB_TOD_CNFG,
+	WR_TRIG_SEL_MAX = HW_TOD_WR_TRIG_SEL_MSB_TOD_CNFG,
+};
+
+/** @brief Enumerated type listing DPLL operational modes */
+enum dpll_state {
+	DPLL_STATE_FREERUN = 1,
+	DPLL_STATE_HOLDOVER = 2,
+	DPLL_STATE_LOCKED = 4,
+	DPLL_STATE_PRELOCKED2 = 5,
+	DPLL_STATE_PRELOCKED = 6,
+	DPLL_STATE_LOSTPHASE = 7,
+	DPLL_STATE_MAX
+};
+
+#endif
diff --git a/include/linux/mfd/idt8a340_reg.h b/include/linux/mfd/idt8a340_reg.h
new file mode 100644
index 000000000000..92d763230bdf
--- /dev/null
+++ b/include/linux/mfd/idt8a340_reg.h
@@ -0,0 +1,729 @@
+/* SPDX-License-Identifier: GPL-2.0+ */
+/*
+ * Based on 5.2.0, Family Programming Guide (Sept 30, 2020)
+ *
+ * Copyright (C) 2021 Integrated Device Technology, Inc., a Renesas Company.
+ */
+#ifndef HAVE_IDT8A340_REG
+#define HAVE_IDT8A340_REG
+
+#define PAGE_ADDR_BASE                    0x0000
+#define PAGE_ADDR                         0x00fc
+
+#define HW_REVISION                       0x8180
+#define REV_ID                            0x007a
+
+#define HW_DPLL_0                         (0x8a00)
+#define HW_DPLL_1                         (0x8b00)
+#define HW_DPLL_2                         (0x8c00)
+#define HW_DPLL_3                         (0x8d00)
+#define HW_DPLL_4                         (0x8e00)
+#define HW_DPLL_5                         (0x8f00)
+#define HW_DPLL_6                         (0x9000)
+#define HW_DPLL_7                         (0x9100)
+
+#define HW_DPLL_TOD_SW_TRIG_ADDR__0       (0x080)
+#define HW_DPLL_TOD_CTRL_1                (0x089)
+#define HW_DPLL_TOD_CTRL_2                (0x08A)
+#define HW_DPLL_TOD_OVR__0                (0x098)
+#define HW_DPLL_TOD_OUT_0__0              (0x0B0)
+
+#define HW_Q0_Q1_CH_SYNC_CTRL_0           (0xa740)
+#define HW_Q0_Q1_CH_SYNC_CTRL_1           (0xa741)
+#define HW_Q2_Q3_CH_SYNC_CTRL_0           (0xa742)
+#define HW_Q2_Q3_CH_SYNC_CTRL_1           (0xa743)
+#define HW_Q4_Q5_CH_SYNC_CTRL_0           (0xa744)
+#define HW_Q4_Q5_CH_SYNC_CTRL_1           (0xa745)
+#define HW_Q6_Q7_CH_SYNC_CTRL_0           (0xa746)
+#define HW_Q6_Q7_CH_SYNC_CTRL_1           (0xa747)
+#define HW_Q8_CH_SYNC_CTRL_0              (0xa748)
+#define HW_Q8_CH_SYNC_CTRL_1              (0xa749)
+#define HW_Q9_CH_SYNC_CTRL_0              (0xa74a)
+#define HW_Q9_CH_SYNC_CTRL_1              (0xa74b)
+#define HW_Q10_CH_SYNC_CTRL_0             (0xa74c)
+#define HW_Q10_CH_SYNC_CTRL_1             (0xa74d)
+#define HW_Q11_CH_SYNC_CTRL_0             (0xa74e)
+#define HW_Q11_CH_SYNC_CTRL_1             (0xa74f)
+
+#define SYNC_SOURCE_DPLL0_TOD_PPS	0x14
+#define SYNC_SOURCE_DPLL1_TOD_PPS	0x15
+#define SYNC_SOURCE_DPLL2_TOD_PPS	0x16
+#define SYNC_SOURCE_DPLL3_TOD_PPS	0x17
+
+#define SYNCTRL1_MASTER_SYNC_RST	BIT(7)
+#define SYNCTRL1_MASTER_SYNC_TRIG	BIT(5)
+#define SYNCTRL1_TOD_SYNC_TRIG		BIT(4)
+#define SYNCTRL1_FBDIV_FRAME_SYNC_TRIG	BIT(3)
+#define SYNCTRL1_FBDIV_SYNC_TRIG	BIT(2)
+#define SYNCTRL1_Q1_DIV_SYNC_TRIG	BIT(1)
+#define SYNCTRL1_Q0_DIV_SYNC_TRIG	BIT(0)
+
+#define HW_Q8_CTRL_SPARE  (0xa7d4)
+#define HW_Q11_CTRL_SPARE (0xa7ec)
+
+/**
+ * Select FOD5 as sync_trigger for Q8 divider.
+ * Transition from logic zero to one
+ * sets trigger to sync Q8 divider.
+ *
+ * Unused when FOD4 is driving Q8 divider (normal operation).
+ */
+#define Q9_TO_Q8_SYNC_TRIG  BIT(1)
+
+/**
+ * Enable FOD5 as driver for clock and sync for Q8 divider.
+ * Enable fanout buffer for FOD5.
+ *
+ * Unused when FOD4 is driving Q8 divider (normal operation).
+ */
+#define Q9_TO_Q8_FANOUT_AND_CLOCK_SYNC_ENABLE_MASK  (BIT(0) | BIT(2))
+
+/**
+ * Select FOD6 as sync_trigger for Q11 divider.
+ * Transition from logic zero to one
+ * sets trigger to sync Q11 divider.
+ *
+ * Unused when FOD7 is driving Q11 divider (normal operation).
+ */
+#define Q10_TO_Q11_SYNC_TRIG  BIT(1)
+
+/**
+ * Enable FOD6 as driver for clock and sync for Q11 divider.
+ * Enable fanout buffer for FOD6.
+ *
+ * Unused when FOD7 is driving Q11 divider (normal operation).
+ */
+#define Q10_TO_Q11_FANOUT_AND_CLOCK_SYNC_ENABLE_MASK  (BIT(0) | BIT(2))
+
+#define RESET_CTRL                        0xc000
+#define SM_RESET                          0x0012
+#define SM_RESET_V520                     0x0013
+#define SM_RESET_CMD                      0x5A
+
+#define GENERAL_STATUS                    0xc014
+#define BOOT_STATUS                       0x0000
+#define HW_REV_ID                         0x000A
+#define BOND_ID                           0x000B
+#define HW_CSR_ID                         0x000C
+#define HW_IRQ_ID                         0x000E
+#define MAJ_REL                           0x0010
+#define MIN_REL                           0x0011
+#define HOTFIX_REL                        0x0012
+#define PIPELINE_ID                       0x0014
+#define BUILD_ID                          0x0018
+#define JTAG_DEVICE_ID                    0x001c
+#define PRODUCT_ID                        0x001e
+#define OTP_SCSR_CONFIG_SELECT            0x0022
+
+#define STATUS                            0xc03c
+#define DPLL0_STATUS			  0x0018
+#define DPLL1_STATUS			  0x0019
+#define DPLL2_STATUS			  0x001a
+#define DPLL3_STATUS			  0x001b
+#define DPLL4_STATUS			  0x001c
+#define DPLL5_STATUS			  0x001d
+#define DPLL6_STATUS			  0x001e
+#define DPLL7_STATUS			  0x001f
+#define DPLL_SYS_STATUS                   0x0020
+#define DPLL_SYS_APLL_STATUS              0x0021
+#define DPLL0_FILTER_STATUS               0x0044
+#define DPLL1_FILTER_STATUS               0x004c
+#define DPLL2_FILTER_STATUS               0x0054
+#define DPLL3_FILTER_STATUS               0x005c
+#define DPLL4_FILTER_STATUS               0x0064
+#define DPLL5_FILTER_STATUS               0x006c
+#define DPLL6_FILTER_STATUS               0x0074
+#define DPLL7_FILTER_STATUS               0x007c
+#define DPLLSYS_FILTER_STATUS             0x0084
+#define USER_GPIO0_TO_7_STATUS            0x008a
+#define USER_GPIO8_TO_15_STATUS           0x008b
+
+#define GPIO_USER_CONTROL                 0xc160
+#define GPIO0_TO_7_OUT                    0x0000
+#define GPIO8_TO_15_OUT                   0x0001
+#define GPIO0_TO_7_OUT_V520               0x0002
+#define GPIO8_TO_15_OUT_V520              0x0003
+
+#define STICKY_STATUS_CLEAR               0xc164
+
+#define GPIO_TOD_NOTIFICATION_CLEAR       0xc16c
+
+#define ALERT_CFG                         0xc188
+
+#define SYS_DPLL_XO                       0xc194
+
+#define SYS_APLL                          0xc19c
+
+#define INPUT_0                           0xc1b0
+#define INPUT_1                           0xc1c0
+#define INPUT_2                           0xc1d0
+#define INPUT_3                           0xc200
+#define INPUT_4                           0xc210
+#define INPUT_5                           0xc220
+#define INPUT_6                           0xc230
+#define INPUT_7                           0xc240
+#define INPUT_8                           0xc250
+#define INPUT_9                           0xc260
+#define INPUT_10                          0xc280
+#define INPUT_11                          0xc290
+#define INPUT_12                          0xc2a0
+#define INPUT_13                          0xc2b0
+#define INPUT_14                          0xc2c0
+#define INPUT_15                          0xc2d0
+
+#define REF_MON_0                         0xc2e0
+#define REF_MON_1                         0xc2ec
+#define REF_MON_2                         0xc300
+#define REF_MON_3                         0xc30c
+#define REF_MON_4                         0xc318
+#define REF_MON_5                         0xc324
+#define REF_MON_6                         0xc330
+#define REF_MON_7                         0xc33c
+#define REF_MON_8                         0xc348
+#define REF_MON_9                         0xc354
+#define REF_MON_10                        0xc360
+#define REF_MON_11                        0xc36c
+#define REF_MON_12                        0xc380
+#define REF_MON_13                        0xc38c
+#define REF_MON_14                        0xc398
+#define REF_MON_15                        0xc3a4
+
+#define DPLL_0                            0xc3b0
+#define DPLL_CTRL_REG_0                   0x0002
+#define DPLL_CTRL_REG_1                   0x0003
+#define DPLL_CTRL_REG_2                   0x0004
+#define DPLL_TOD_SYNC_CFG                 0x0031
+#define DPLL_COMBO_SLAVE_CFG_0            0x0032
+#define DPLL_COMBO_SLAVE_CFG_1            0x0033
+#define DPLL_SLAVE_REF_CFG                0x0034
+#define DPLL_REF_MODE                     0x0035
+#define DPLL_PHASE_MEASUREMENT_CFG        0x0036
+#define DPLL_MODE                         0x0037
+#define DPLL_MODE_V520                    0x003B
+#define DPLL_1                            0xc400
+#define DPLL_2                            0xc438
+#define DPLL_2_V520                       0xc43c
+#define DPLL_3                            0xc480
+#define DPLL_4                            0xc4b8
+#define DPLL_4_V520                       0xc4bc
+#define DPLL_5                            0xc500
+#define DPLL_6                            0xc538
+#define DPLL_6_V520                       0xc53c
+#define DPLL_7                            0xc580
+#define SYS_DPLL                          0xc5b8
+#define SYS_DPLL_V520                     0xc5bc
+
+#define DPLL_CTRL_0                       0xc600
+#define DPLL_CTRL_DPLL_MANU_REF_CFG       0x0001
+#define DPLL_CTRL_DPLL_FOD_FREQ           0x001c
+#define DPLL_CTRL_COMBO_MASTER_CFG        0x003a
+#define DPLL_CTRL_1                       0xc63c
+#define DPLL_CTRL_2                       0xc680
+#define DPLL_CTRL_3                       0xc6bc
+#define DPLL_CTRL_4                       0xc700
+#define DPLL_CTRL_5                       0xc73c
+#define DPLL_CTRL_6                       0xc780
+#define DPLL_CTRL_7                       0xc7bc
+#define SYS_DPLL_CTRL                     0xc800
+
+#define DPLL_PHASE_0                      0xc818
+/* Signed 42-bit FFO in units of 2^(-53) */
+#define DPLL_WR_PHASE                     0x0000
+#define DPLL_PHASE_1                      0xc81c
+#define DPLL_PHASE_2                      0xc820
+#define DPLL_PHASE_3                      0xc824
+#define DPLL_PHASE_4                      0xc828
+#define DPLL_PHASE_5                      0xc82c
+#define DPLL_PHASE_6                      0xc830
+#define DPLL_PHASE_7                      0xc834
+
+#define DPLL_FREQ_0                       0xc838
+/* Signed 42-bit FFO in units of 2^(-53) */
+#define DPLL_WR_FREQ                      0x0000
+#define DPLL_FREQ_1                       0xc840
+#define DPLL_FREQ_2                       0xc848
+#define DPLL_FREQ_3                       0xc850
+#define DPLL_FREQ_4                       0xc858
+#define DPLL_FREQ_5                       0xc860
+#define DPLL_FREQ_6                       0xc868
+#define DPLL_FREQ_7                       0xc870
+
+#define DPLL_PHASE_PULL_IN_0              0xc880
+#define PULL_IN_OFFSET                    0x0000 /* Signed 32 bit */
+#define PULL_IN_SLOPE_LIMIT               0x0004 /* Unsigned 24 bit */
+#define PULL_IN_CTRL                      0x0007
+#define DPLL_PHASE_PULL_IN_1              0xc888
+#define DPLL_PHASE_PULL_IN_2              0xc890
+#define DPLL_PHASE_PULL_IN_3              0xc898
+#define DPLL_PHASE_PULL_IN_4              0xc8a0
+#define DPLL_PHASE_PULL_IN_5              0xc8a8
+#define DPLL_PHASE_PULL_IN_6              0xc8b0
+#define DPLL_PHASE_PULL_IN_7              0xc8b8
+
+#define GPIO_CFG                          0xc8c0
+#define GPIO_CFG_GBL                      0x0000
+#define GPIO_0                            0xc8c2
+#define GPIO_DCO_INC_DEC                  0x0000
+#define GPIO_OUT_CTRL_0                   0x0001
+#define GPIO_OUT_CTRL_1                   0x0002
+#define GPIO_TOD_TRIG                     0x0003
+#define GPIO_DPLL_INDICATOR               0x0004
+#define GPIO_LOS_INDICATOR                0x0005
+#define GPIO_REF_INPUT_DSQ_0              0x0006
+#define GPIO_REF_INPUT_DSQ_1              0x0007
+#define GPIO_REF_INPUT_DSQ_2              0x0008
+#define GPIO_REF_INPUT_DSQ_3              0x0009
+#define GPIO_MAN_CLK_SEL_0                0x000a
+#define GPIO_MAN_CLK_SEL_1                0x000b
+#define GPIO_MAN_CLK_SEL_2                0x000c
+#define GPIO_SLAVE                        0x000d
+#define GPIO_ALERT_OUT_CFG                0x000e
+#define GPIO_TOD_NOTIFICATION_CFG         0x000f
+#define GPIO_CTRL                         0x0010
+#define GPIO_CTRL_V520                    0x0011
+#define GPIO_1                            0xc8d4
+#define GPIO_2                            0xc8e6
+#define GPIO_3                            0xc900
+#define GPIO_4                            0xc912
+#define GPIO_5                            0xc924
+#define GPIO_6                            0xc936
+#define GPIO_7                            0xc948
+#define GPIO_8                            0xc95a
+#define GPIO_9                            0xc980
+#define GPIO_10                           0xc992
+#define GPIO_11                           0xc9a4
+#define GPIO_12                           0xc9b6
+#define GPIO_13                           0xc9c8
+#define GPIO_14                           0xc9da
+#define GPIO_15                           0xca00
+
+#define OUT_DIV_MUX                       0xca12
+#define OUTPUT_0                          0xca14
+#define OUTPUT_0_V520                     0xca20
+/* FOD frequency output divider value */
+#define OUT_DIV                           0x0000
+#define OUT_DUTY_CYCLE_HIGH               0x0004
+#define OUT_CTRL_0                        0x0008
+#define OUT_CTRL_1                        0x0009
+/* Phase adjustment in FOD cycles */
+#define OUT_PHASE_ADJ                     0x000c
+#define OUTPUT_1                          0xca24
+#define OUTPUT_1_V520                     0xca30
+#define OUTPUT_2                          0xca34
+#define OUTPUT_2_V520                     0xca40
+#define OUTPUT_3                          0xca44
+#define OUTPUT_3_V520                     0xca50
+#define OUTPUT_4                          0xca54
+#define OUTPUT_4_V520                     0xca60
+#define OUTPUT_5                          0xca64
+#define OUTPUT_5_V520                     0xca80
+#define OUTPUT_6                          0xca80
+#define OUTPUT_6_V520                     0xca90
+#define OUTPUT_7                          0xca90
+#define OUTPUT_7_V520                     0xcaa0
+#define OUTPUT_8                          0xcaa0
+#define OUTPUT_8_V520                     0xcab0
+#define OUTPUT_9                          0xcab0
+#define OUTPUT_9_V520                     0xcac0
+#define OUTPUT_10                         0xcac0
+#define OUTPUT_10_V520                     0xcad0
+#define OUTPUT_11                         0xcad0
+#define OUTPUT_11_V520                    0xcae0
+
+#define SERIAL                            0xcae0
+#define SERIAL_V520                       0xcaf0
+
+#define PWM_ENCODER_0                     0xcb00
+#define PWM_ENCODER_1                     0xcb08
+#define PWM_ENCODER_2                     0xcb10
+#define PWM_ENCODER_3                     0xcb18
+#define PWM_ENCODER_4                     0xcb20
+#define PWM_ENCODER_5                     0xcb28
+#define PWM_ENCODER_6                     0xcb30
+#define PWM_ENCODER_7                     0xcb38
+#define PWM_DECODER_0                     0xcb40
+#define PWM_DECODER_1                     0xcb48
+#define PWM_DECODER_1_V520                0xcb4a
+#define PWM_DECODER_2                     0xcb50
+#define PWM_DECODER_2_V520                0xcb54
+#define PWM_DECODER_3                     0xcb58
+#define PWM_DECODER_3_V520                0xcb5e
+#define PWM_DECODER_4                     0xcb60
+#define PWM_DECODER_4_V520                0xcb68
+#define PWM_DECODER_5                     0xcb68
+#define PWM_DECODER_5_V520                0xcb80
+#define PWM_DECODER_6                     0xcb70
+#define PWM_DECODER_6_V520                0xcb8a
+#define PWM_DECODER_7                     0xcb80
+#define PWM_DECODER_7_V520                0xcb94
+#define PWM_DECODER_8                     0xcb88
+#define PWM_DECODER_8_V520                0xcb9e
+#define PWM_DECODER_9                     0xcb90
+#define PWM_DECODER_9_V520                0xcba8
+#define PWM_DECODER_10                    0xcb98
+#define PWM_DECODER_10_V520               0xcbb2
+#define PWM_DECODER_11                    0xcba0
+#define PWM_DECODER_11_V520               0xcbbc
+#define PWM_DECODER_12                    0xcba8
+#define PWM_DECODER_12_V520               0xcbc6
+#define PWM_DECODER_13                    0xcbb0
+#define PWM_DECODER_13_V520               0xcbd0
+#define PWM_DECODER_14                    0xcbb8
+#define PWM_DECODER_14_V520               0xcbda
+#define PWM_DECODER_15                    0xcbc0
+#define PWM_DECODER_15_V520               0xcbe4
+#define PWM_USER_DATA                     0xcbc8
+#define PWM_USER_DATA_V520                0xcbf0
+
+#define TOD_0                             0xcbcc
+#define TOD_0_V520                        0xcc00
+/* Enable TOD counter, output channel sync and even-PPS mode */
+#define TOD_CFG                           0x0000
+#define TOD_CFG_V520                      0x0001
+#define TOD_1                             0xcbce
+#define TOD_1_V520                        0xcc02
+#define TOD_2                             0xcbd0
+#define TOD_2_V520                        0xcc04
+#define TOD_3                             0xcbd2
+#define TOD_3_V520                        0xcc06
+
+#define TOD_WRITE_0                       0xcc00
+#define TOD_WRITE_0_V520                  0xcc10
+/* 8-bit subns, 32-bit ns, 48-bit seconds */
+#define TOD_WRITE                         0x0000
+/* Counter increments after TOD write is completed */
+#define TOD_WRITE_COUNTER                 0x000c
+/* TOD write trigger configuration */
+#define TOD_WRITE_SELECT_CFG_0            0x000d
+/* TOD write trigger selection */
+#define TOD_WRITE_CMD                     0x000f
+#define TOD_WRITE_1                       0xcc10
+#define TOD_WRITE_1_V520                  0xcc20
+#define TOD_WRITE_2                       0xcc20
+#define TOD_WRITE_2_V520                  0xcc30
+#define TOD_WRITE_3                       0xcc30
+#define TOD_WRITE_3_V520                  0xcc40
+
+#define TOD_READ_PRIMARY_0                0xcc40
+#define TOD_READ_PRIMARY_0_V520           0xcc50
+/* 8-bit subns, 32-bit ns, 48-bit seconds */
+#define TOD_READ_PRIMARY                  0x0000
+/* Counter increments after TOD write is completed */
+#define TOD_READ_PRIMARY_COUNTER          0x000b
+/* Read trigger configuration */
+#define TOD_READ_PRIMARY_SEL_CFG_0        0x000c
+/* Read trigger selection */
+#define TOD_READ_PRIMARY_CMD              0x000e
+#define TOD_READ_PRIMARY_CMD_V520         0x000f
+#define TOD_READ_PRIMARY_1                0xcc50
+#define TOD_READ_PRIMARY_1_V520           0xcc60
+#define TOD_READ_PRIMARY_2                0xcc60
+#define TOD_READ_PRIMARY_2_V520           0xcc80
+#define TOD_READ_PRIMARY_3                0xcc80
+#define TOD_READ_PRIMARY_3_V520           0xcc90
+
+#define TOD_READ_SECONDARY_0              0xcc90
+#define TOD_READ_SECONDARY_0_V520         0xcca0
+#define TOD_READ_SECONDARY_1              0xcca0
+#define TOD_READ_SECONDARY_1_V520         0xccb0
+#define TOD_READ_SECONDARY_2              0xccb0
+#define TOD_READ_SECONDARY_2_V520         0xccc0
+#define TOD_READ_SECONDARY_3              0xccc0
+#define TOD_READ_SECONDARY_3_V520         0xccd0
+
+#define OUTPUT_TDC_CFG                    0xccd0
+#define OUTPUT_TDC_CFG_V520               0xcce0
+#define OUTPUT_TDC_0                      0xcd00
+#define OUTPUT_TDC_1                      0xcd08
+#define OUTPUT_TDC_2                      0xcd10
+#define OUTPUT_TDC_3                      0xcd18
+#define INPUT_TDC                         0xcd20
+
+#define SCRATCH                           0xcf50
+#define SCRATCH_V520                      0xcf4c
+
+#define EEPROM                            0xcf68
+#define EEPROM_V520                       0xcf64
+
+#define OTP                               0xcf70
+
+#define BYTE                              0xcf80
+
+/* Bit definitions for the MAJ_REL register */
+#define MAJOR_SHIFT                       (1)
+#define MAJOR_MASK                        (0x7f)
+#define PR_BUILD                          BIT(0)
+
+/* Bit definitions for the USER_GPIO0_TO_7_STATUS register */
+#define GPIO0_LEVEL                       BIT(0)
+#define GPIO1_LEVEL                       BIT(1)
+#define GPIO2_LEVEL                       BIT(2)
+#define GPIO3_LEVEL                       BIT(3)
+#define GPIO4_LEVEL                       BIT(4)
+#define GPIO5_LEVEL                       BIT(5)
+#define GPIO6_LEVEL                       BIT(6)
+#define GPIO7_LEVEL                       BIT(7)
+
+/* Bit definitions for the USER_GPIO8_TO_15_STATUS register */
+#define GPIO8_LEVEL                       BIT(0)
+#define GPIO9_LEVEL                       BIT(1)
+#define GPIO10_LEVEL                      BIT(2)
+#define GPIO11_LEVEL                      BIT(3)
+#define GPIO12_LEVEL                      BIT(4)
+#define GPIO13_LEVEL                      BIT(5)
+#define GPIO14_LEVEL                      BIT(6)
+#define GPIO15_LEVEL                      BIT(7)
+
+/* Bit definitions for the GPIO0_TO_7_OUT register */
+#define GPIO0_DRIVE_LEVEL                 BIT(0)
+#define GPIO1_DRIVE_LEVEL                 BIT(1)
+#define GPIO2_DRIVE_LEVEL                 BIT(2)
+#define GPIO3_DRIVE_LEVEL                 BIT(3)
+#define GPIO4_DRIVE_LEVEL                 BIT(4)
+#define GPIO5_DRIVE_LEVEL                 BIT(5)
+#define GPIO6_DRIVE_LEVEL                 BIT(6)
+#define GPIO7_DRIVE_LEVEL                 BIT(7)
+
+/* Bit definitions for the GPIO8_TO_15_OUT register */
+#define GPIO8_DRIVE_LEVEL                 BIT(0)
+#define GPIO9_DRIVE_LEVEL                 BIT(1)
+#define GPIO10_DRIVE_LEVEL                BIT(2)
+#define GPIO11_DRIVE_LEVEL                BIT(3)
+#define GPIO12_DRIVE_LEVEL                BIT(4)
+#define GPIO13_DRIVE_LEVEL                BIT(5)
+#define GPIO14_DRIVE_LEVEL                BIT(6)
+#define GPIO15_DRIVE_LEVEL                BIT(7)
+
+/* Bit definitions for the DPLL_TOD_SYNC_CFG register */
+#define TOD_SYNC_SOURCE_SHIFT             (1)
+#define TOD_SYNC_SOURCE_MASK              (0x3)
+#define TOD_SYNC_EN                       BIT(0)
+
+/* Bit definitions for the DPLL_MODE register */
+#define WRITE_TIMER_MODE                  BIT(6)
+#define PLL_MODE_SHIFT                    (3)
+#define PLL_MODE_MASK                     (0x7)
+#define STATE_MODE_SHIFT                  (0)
+#define STATE_MODE_MASK                   (0x7)
+
+/* Bit definitions for the GPIO_CFG_GBL register */
+#define SUPPLY_MODE_SHIFT                 (0)
+#define SUPPLY_MODE_MASK                  (0x3)
+
+/* Bit definitions for the GPIO_DCO_INC_DEC register */
+#define INCDEC_DPLL_INDEX_SHIFT           (0)
+#define INCDEC_DPLL_INDEX_MASK            (0x7)
+
+/* Bit definitions for the GPIO_OUT_CTRL_0 register */
+#define CTRL_OUT_0                        BIT(0)
+#define CTRL_OUT_1                        BIT(1)
+#define CTRL_OUT_2                        BIT(2)
+#define CTRL_OUT_3                        BIT(3)
+#define CTRL_OUT_4                        BIT(4)
+#define CTRL_OUT_5                        BIT(5)
+#define CTRL_OUT_6                        BIT(6)
+#define CTRL_OUT_7                        BIT(7)
+
+/* Bit definitions for the GPIO_OUT_CTRL_1 register */
+#define CTRL_OUT_8                        BIT(0)
+#define CTRL_OUT_9                        BIT(1)
+#define CTRL_OUT_10                       BIT(2)
+#define CTRL_OUT_11                       BIT(3)
+#define CTRL_OUT_12                       BIT(4)
+#define CTRL_OUT_13                       BIT(5)
+#define CTRL_OUT_14                       BIT(6)
+#define CTRL_OUT_15                       BIT(7)
+
+/* Bit definitions for the GPIO_TOD_TRIG register */
+#define TOD_TRIG_0                        BIT(0)
+#define TOD_TRIG_1                        BIT(1)
+#define TOD_TRIG_2                        BIT(2)
+#define TOD_TRIG_3                        BIT(3)
+
+/* Bit definitions for the GPIO_DPLL_INDICATOR register */
+#define IND_DPLL_INDEX_SHIFT              (0)
+#define IND_DPLL_INDEX_MASK               (0x7)
+
+/* Bit definitions for the GPIO_LOS_INDICATOR register */
+#define REFMON_INDEX_SHIFT                (0)
+#define REFMON_INDEX_MASK                 (0xf)
+/* Active level of LOS indicator, 0=low 1=high */
+#define ACTIVE_LEVEL                      BIT(4)
+
+/* Bit definitions for the GPIO_REF_INPUT_DSQ_0 register */
+#define DSQ_INP_0                         BIT(0)
+#define DSQ_INP_1                         BIT(1)
+#define DSQ_INP_2                         BIT(2)
+#define DSQ_INP_3                         BIT(3)
+#define DSQ_INP_4                         BIT(4)
+#define DSQ_INP_5                         BIT(5)
+#define DSQ_INP_6                         BIT(6)
+#define DSQ_INP_7                         BIT(7)
+
+/* Bit definitions for the GPIO_REF_INPUT_DSQ_1 register */
+#define DSQ_INP_8                         BIT(0)
+#define DSQ_INP_9                         BIT(1)
+#define DSQ_INP_10                        BIT(2)
+#define DSQ_INP_11                        BIT(3)
+#define DSQ_INP_12                        BIT(4)
+#define DSQ_INP_13                        BIT(5)
+#define DSQ_INP_14                        BIT(6)
+#define DSQ_INP_15                        BIT(7)
+
+/* Bit definitions for the GPIO_REF_INPUT_DSQ_2 register */
+#define DSQ_DPLL_0                        BIT(0)
+#define DSQ_DPLL_1                        BIT(1)
+#define DSQ_DPLL_2                        BIT(2)
+#define DSQ_DPLL_3                        BIT(3)
+#define DSQ_DPLL_4                        BIT(4)
+#define DSQ_DPLL_5                        BIT(5)
+#define DSQ_DPLL_6                        BIT(6)
+#define DSQ_DPLL_7                        BIT(7)
+
+/* Bit definitions for the GPIO_REF_INPUT_DSQ_3 register */
+#define DSQ_DPLL_SYS                      BIT(0)
+#define GPIO_DSQ_LEVEL                    BIT(1)
+
+/* Bit definitions for the GPIO_TOD_NOTIFICATION_CFG register */
+#define DPLL_TOD_SHIFT                    (0)
+#define DPLL_TOD_MASK                     (0x3)
+#define TOD_READ_SECONDARY                BIT(2)
+#define GPIO_ASSERT_LEVEL                 BIT(3)
+
+/* Bit definitions for the GPIO_CTRL register */
+#define GPIO_FUNCTION_EN                  BIT(0)
+#define GPIO_CMOS_OD_MODE                 BIT(1)
+#define GPIO_CONTROL_DIR                  BIT(2)
+#define GPIO_PU_PD_MODE                   BIT(3)
+#define GPIO_FUNCTION_SHIFT               (4)
+#define GPIO_FUNCTION_MASK                (0xf)
+
+/* Bit definitions for the OUT_CTRL_1 register */
+#define OUT_SYNC_DISABLE                  BIT(7)
+#define SQUELCH_VALUE                     BIT(6)
+#define SQUELCH_DISABLE                   BIT(5)
+#define PAD_VDDO_SHIFT                    (2)
+#define PAD_VDDO_MASK                     (0x7)
+#define PAD_CMOSDRV_SHIFT                 (0)
+#define PAD_CMOSDRV_MASK                  (0x3)
+
+/* Bit definitions for the TOD_CFG register */
+#define TOD_EVEN_PPS_MODE                 BIT(2)
+#define TOD_OUT_SYNC_ENABLE               BIT(1)
+#define TOD_ENABLE                        BIT(0)
+
+/* Bit definitions for the TOD_WRITE_SELECT_CFG_0 register */
+#define WR_PWM_DECODER_INDEX_SHIFT        (4)
+#define WR_PWM_DECODER_INDEX_MASK         (0xf)
+#define WR_REF_INDEX_SHIFT                (0)
+#define WR_REF_INDEX_MASK                 (0xf)
+
+/* Bit definitions for the TOD_WRITE_CMD register */
+#define TOD_WRITE_SELECTION_SHIFT         (0)
+#define TOD_WRITE_SELECTION_MASK          (0xf)
+/* 4.8.7 */
+#define TOD_WRITE_TYPE_SHIFT              (4)
+#define TOD_WRITE_TYPE_MASK               (0x3)
+
+/* Bit definitions for the TOD_READ_PRIMARY_SEL_CFG_0 register */
+#define RD_PWM_DECODER_INDEX_SHIFT        (4)
+#define RD_PWM_DECODER_INDEX_MASK         (0xf)
+#define RD_REF_INDEX_SHIFT                (0)
+#define RD_REF_INDEX_MASK                 (0xf)
+
+/* Bit definitions for the TOD_READ_PRIMARY_CMD register */
+#define TOD_READ_TRIGGER_MODE             BIT(4)
+#define TOD_READ_TRIGGER_SHIFT            (0)
+#define TOD_READ_TRIGGER_MASK             (0xf)
+
+/* Bit definitions for the DPLL_CTRL_COMBO_MASTER_CFG register */
+#define COMBO_MASTER_HOLD                 BIT(0)
+
+/* Bit definitions for DPLL_SYS_STATUS register */
+#define DPLL_SYS_STATE_MASK               (0xf)
+
+/* Bit definitions for SYS_APLL_STATUS register */
+#define SYS_APLL_LOSS_LOCK_LIVE_MASK       BIT(0)
+#define SYS_APLL_LOSS_LOCK_LIVE_LOCKED     0
+#define SYS_APLL_LOSS_LOCK_LIVE_UNLOCKED   1
+
+/* Bit definitions for the DPLL0_STATUS register */
+#define DPLL_STATE_MASK                   (0xf)
+#define DPLL_STATE_SHIFT                  (0x0)
+
+/* Values of DPLL_N.DPLL_MODE.PLL_MODE */
+enum pll_mode {
+	PLL_MODE_MIN = 0,
+	PLL_MODE_NORMAL = PLL_MODE_MIN,
+	PLL_MODE_WRITE_PHASE = 1,
+	PLL_MODE_WRITE_FREQUENCY = 2,
+	PLL_MODE_GPIO_INC_DEC = 3,
+	PLL_MODE_SYNTHESIS = 4,
+	PLL_MODE_PHASE_MEASUREMENT = 5,
+	PLL_MODE_DISABLED = 6,
+	PLL_MODE_MAX = PLL_MODE_DISABLED,
+};
+
+enum hw_tod_write_trig_sel {
+	HW_TOD_WR_TRIG_SEL_MIN = 0,
+	HW_TOD_WR_TRIG_SEL_MSB = HW_TOD_WR_TRIG_SEL_MIN,
+	HW_TOD_WR_TRIG_SEL_RESERVED = 1,
+	HW_TOD_WR_TRIG_SEL_TOD_PPS = 2,
+	HW_TOD_WR_TRIG_SEL_IRIGB_PPS = 3,
+	HW_TOD_WR_TRIG_SEL_PWM_PPS = 4,
+	HW_TOD_WR_TRIG_SEL_GPIO = 5,
+	HW_TOD_WR_TRIG_SEL_FOD_SYNC = 6,
+	WR_TRIG_SEL_MAX = HW_TOD_WR_TRIG_SEL_FOD_SYNC,
+};
+
+enum scsr_read_trig_sel {
+	/* CANCEL CURRENT TOD READ; MODULE BECOMES IDLE - NO TRIGGER OCCURS */
+	SCSR_TOD_READ_TRIG_SEL_DISABLE = 0,
+	/* TRIGGER IMMEDIATELY */
+	SCSR_TOD_READ_TRIG_SEL_IMMEDIATE = 1,
+	/* TRIGGER ON RISING EDGE OF INTERNAL TOD PPS SIGNAL */
+	SCSR_TOD_READ_TRIG_SEL_TODPPS = 2,
+	/* TRGGER ON RISING EDGE OF SELECTED REFERENCE INPUT */
+	SCSR_TOD_READ_TRIG_SEL_REFCLK = 3,
+	/* TRIGGER ON RISING EDGE OF SELECTED PWM DECODER 1PPS OUTPUT */
+	SCSR_TOD_READ_TRIG_SEL_PWMPPS = 4,
+	SCSR_TOD_READ_TRIG_SEL_RESERVED = 5,
+	/* TRIGGER WHEN WRITE FREQUENCY EVENT OCCURS  */
+	SCSR_TOD_READ_TRIG_SEL_WRITEFREQUENCYEVENT = 6,
+	/* TRIGGER ON SELECTED GPIO */
+	SCSR_TOD_READ_TRIG_SEL_GPIO = 7,
+	SCSR_TOD_READ_TRIG_SEL_MAX = SCSR_TOD_READ_TRIG_SEL_GPIO,
+};
+
+/* Values STATUS.DPLL_SYS_STATUS.DPLL_SYS_STATE */
+enum dpll_state {
+	DPLL_STATE_MIN = 0,
+	DPLL_STATE_FREERUN = DPLL_STATE_MIN,
+	DPLL_STATE_LOCKACQ = 1,
+	DPLL_STATE_LOCKREC = 2,
+	DPLL_STATE_LOCKED = 3,
+	DPLL_STATE_HOLDOVER = 4,
+	DPLL_STATE_OPEN_LOOP = 5,
+	DPLL_STATE_MAX = DPLL_STATE_OPEN_LOOP,
+};
+
+/* 4.8.7 only */
+enum scsr_tod_write_trig_sel {
+	SCSR_TOD_WR_TRIG_SEL_DISABLE = 0,
+	SCSR_TOD_WR_TRIG_SEL_IMMEDIATE = 1,
+	SCSR_TOD_WR_TRIG_SEL_REFCLK = 2,
+	SCSR_TOD_WR_TRIG_SEL_PWMPPS = 3,
+	SCSR_TOD_WR_TRIG_SEL_TODPPS = 4,
+	SCSR_TOD_WR_TRIG_SEL_SYNCFOD = 5,
+	SCSR_TOD_WR_TRIG_SEL_GPIO = 6,
+	SCSR_TOD_WR_TRIG_SEL_MAX = SCSR_TOD_WR_TRIG_SEL_GPIO,
+};
+
+/* 4.8.7 only */
+enum scsr_tod_write_type_sel {
+	SCSR_TOD_WR_TYPE_SEL_ABSOLUTE = 0,
+	SCSR_TOD_WR_TYPE_SEL_DELTA_PLUS = 1,
+	SCSR_TOD_WR_TYPE_SEL_DELTA_MINUS = 2,
+	SCSR_TOD_WR_TYPE_SEL_MAX = SCSR_TOD_WR_TYPE_SEL_DELTA_MINUS,
+};
+#endif
diff --git a/include/linux/mfd/rsmu.h b/include/linux/mfd/rsmu.h
new file mode 100644
index 000000000000..6870de608233
--- /dev/null
+++ b/include/linux/mfd/rsmu.h
@@ -0,0 +1,36 @@
+/* SPDX-License-Identifier: GPL-2.0+ */
+/*
+ * Core interface for Renesas Synchronization Management Unit (SMU) devices.
+ *
+ * Copyright (C) 2021 Integrated Device Technology, Inc., a Renesas Company.
+ */
+
+#ifndef __LINUX_MFD_RSMU_H
+#define __LINUX_MFD_RSMU_H
+
+/* The supported devices are ClockMatrix, Sabre and SnowLotus */
+enum rsmu_type {
+	RSMU_CM		= 0x34000,
+	RSMU_SABRE	= 0x33810,
+	RSMU_SL		= 0x19850,
+};
+
+/**
+ *
+ * struct rsmu_ddata - device data structure for sub devices.
+ *
+ * @dev:    i2c/spi device.
+ * @regmap: i2c/spi bus access.
+ * @lock:   mutex used by sub devices to make sure a series of
+ *          bus access requests are not interrupted.
+ * @type:   RSMU device type.
+ * @page:   i2c/spi bus driver internal use only.
+ */
+struct rsmu_ddata {
+	struct device *dev;
+	struct regmap *regmap;
+	struct mutex lock;
+	enum rsmu_type type;
+	u16 page;
+};
+#endif /*  __LINUX_MFD_RSMU_H */
diff --git a/include/linux/migrate.h b/include/linux/migrate.h
index 23dadf7aeba8..326250996b4e 100644
--- a/include/linux/migrate.h
+++ b/include/linux/migrate.h
@@ -28,6 +28,7 @@ enum migrate_reason {
 	MR_NUMA_MISPLACED,
 	MR_CONTIG_RANGE,
 	MR_LONGTERM_PIN,
+	MR_DEMOTION,
 	MR_TYPES
 };
 
@@ -41,7 +42,8 @@ extern int migrate_page(struct address_space *mapping,
 			struct page *newpage, struct page *page,
 			enum migrate_mode mode);
 extern int migrate_pages(struct list_head *l, new_page_t new, free_page_t free,
-		unsigned long private, enum migrate_mode mode, int reason);
+		unsigned long private, enum migrate_mode mode, int reason,
+		unsigned int *ret_succeeded);
 extern struct page *alloc_migration_target(struct page *page, unsigned long private);
 extern int isolate_movable_page(struct page *page, isolate_mode_t mode);
 
@@ -56,7 +58,7 @@ extern int migrate_page_move_mapping(struct address_space *mapping,
 static inline void putback_movable_pages(struct list_head *l) {}
 static inline int migrate_pages(struct list_head *l, new_page_t new,
 		free_page_t free, unsigned long private, enum migrate_mode mode,
-		int reason)
+		int reason, unsigned int *ret_succeeded)
 	{ return -ENOSYS; }
 static inline struct page *alloc_migration_target(struct page *page,
 		unsigned long private)
@@ -166,6 +168,14 @@ struct migrate_vma {
 int migrate_vma_setup(struct migrate_vma *args);
 void migrate_vma_pages(struct migrate_vma *migrate);
 void migrate_vma_finalize(struct migrate_vma *migrate);
+int next_demotion_node(int node);
+
+#else /* CONFIG_MIGRATION disabled: */
+
+static inline int next_demotion_node(int node)
+{
+	return NUMA_NO_NODE;
+}
 
 #endif /* CONFIG_MIGRATION */
 
diff --git a/include/linux/mm.h b/include/linux/mm.h
index 7ca22e6e694a..73a52aba448f 100644
--- a/include/linux/mm.h
+++ b/include/linux/mm.h
@@ -281,7 +281,6 @@ extern unsigned int kobjsize(const void *objp);
 #define VM_GROWSDOWN	0x00000100	/* general info on the segment */
 #define VM_UFFD_MISSING	0x00000200	/* missing pages tracking */
 #define VM_PFNMAP	0x00000400	/* Page-ranges managed without "struct page", just pure PFN */
-#define VM_DENYWRITE	0x00000800	/* ETXTBSY on write attempts.. */
 #define VM_UFFD_WP	0x00001000	/* wrprotect pages tracking */
 
 #define VM_LOCKED	0x00002000
@@ -829,6 +828,8 @@ static inline void *kvcalloc(size_t n, size_t size, gfp_t flags)
 	return kvmalloc_array(n, size, flags | __GFP_ZERO);
 }
 
+extern void *kvrealloc(const void *p, size_t oldsize, size_t newsize,
+		gfp_t flags);
 extern void kvfree(const void *addr);
 extern void kvfree_sensitive(const void *addr, size_t len);
 
@@ -1214,8 +1215,8 @@ static inline void get_page(struct page *page)
 }
 
 bool __must_check try_grab_page(struct page *page, unsigned int flags);
-__maybe_unused struct page *try_grab_compound_head(struct page *page, int refs,
-						   unsigned int flags);
+struct page *try_grab_compound_head(struct page *page, int refs,
+				    unsigned int flags);
 
 
 static inline __must_check bool try_get_page(struct page *page)
@@ -1847,7 +1848,6 @@ int __account_locked_vm(struct mm_struct *mm, unsigned long pages, bool inc,
 struct kvec;
 int get_kernel_pages(const struct kvec *iov, int nr_pages, int write,
 			struct page **pages);
-int get_kernel_page(unsigned long start, int write, struct page **pages);
 struct page *get_dump_page(unsigned long addr);
 
 extern int try_to_release_page(struct page * page, gfp_t gfp_mask);
@@ -2580,7 +2580,8 @@ static inline int check_data_rlimit(unsigned long rlim,
 extern int mm_take_all_locks(struct mm_struct *mm);
 extern void mm_drop_all_locks(struct mm_struct *mm);
 
-extern void set_mm_exe_file(struct mm_struct *mm, struct file *new_exe_file);
+extern int set_mm_exe_file(struct mm_struct *mm, struct file *new_exe_file);
+extern int replace_mm_exe_file(struct mm_struct *mm, struct file *new_exe_file);
 extern struct file *get_mm_exe_file(struct mm_struct *mm);
 extern struct file *get_task_exe_file(struct task_struct *task);
 
@@ -3119,7 +3120,7 @@ extern void memory_failure_queue_kick(int cpu);
 extern int unpoison_memory(unsigned long pfn);
 extern int sysctl_memory_failure_early_kill;
 extern int sysctl_memory_failure_recovery;
-extern void shake_page(struct page *p, int access);
+extern void shake_page(struct page *p);
 extern atomic_long_t num_poisoned_pages __read_mostly;
 extern int soft_offline_page(unsigned long pfn, int flags);
 
diff --git a/include/linux/mman.h b/include/linux/mman.h
index ebb09a964272..b66e91b8176c 100644
--- a/include/linux/mman.h
+++ b/include/linux/mman.h
@@ -32,7 +32,8 @@
  * The historical set of flags that all mmap implementations implicitly
  * support when a ->mmap_validate() op is not provided in file_operations.
  *
- * MAP_EXECUTABLE is completely ignored throughout the kernel.
+ * MAP_EXECUTABLE and MAP_DENYWRITE are completely ignored throughout the
+ * kernel.
  */
 #define LEGACY_MAP_MASK (MAP_SHARED \
 		| MAP_PRIVATE \
@@ -153,7 +154,6 @@ static inline unsigned long
 calc_vm_flag_bits(unsigned long flags)
 {
 	return _calc_vm_trans(flags, MAP_GROWSDOWN,  VM_GROWSDOWN ) |
-	       _calc_vm_trans(flags, MAP_DENYWRITE,  VM_DENYWRITE ) |
 	       _calc_vm_trans(flags, MAP_LOCKED,     VM_LOCKED    ) |
 	       _calc_vm_trans(flags, MAP_SYNC,	     VM_SYNC      ) |
 	       arch_calc_vm_flag_bits(flags);
diff --git a/include/linux/mmap_lock.h b/include/linux/mmap_lock.h
index 0540f0156f58..b179f1e3541a 100644
--- a/include/linux/mmap_lock.h
+++ b/include/linux/mmap_lock.h
@@ -101,14 +101,14 @@ static inline bool mmap_write_trylock(struct mm_struct *mm)
 
 static inline void mmap_write_unlock(struct mm_struct *mm)
 {
-	up_write(&mm->mmap_lock);
 	__mmap_lock_trace_released(mm, true);
+	up_write(&mm->mmap_lock);
 }
 
 static inline void mmap_write_downgrade(struct mm_struct *mm)
 {
-	downgrade_write(&mm->mmap_lock);
 	__mmap_lock_trace_acquire_returned(mm, false, true);
+	downgrade_write(&mm->mmap_lock);
 }
 
 static inline void mmap_read_lock(struct mm_struct *mm)
@@ -140,8 +140,8 @@ static inline bool mmap_read_trylock(struct mm_struct *mm)
 
 static inline void mmap_read_unlock(struct mm_struct *mm)
 {
-	up_read(&mm->mmap_lock);
 	__mmap_lock_trace_released(mm, false);
+	up_read(&mm->mmap_lock);
 }
 
 static inline bool mmap_read_trylock_non_owner(struct mm_struct *mm)
@@ -155,8 +155,8 @@ static inline bool mmap_read_trylock_non_owner(struct mm_struct *mm)
 
 static inline void mmap_read_unlock_non_owner(struct mm_struct *mm)
 {
-	up_read_non_owner(&mm->mmap_lock);
 	__mmap_lock_trace_released(mm, false);
+	up_read_non_owner(&mm->mmap_lock);
 }
 
 static inline void mmap_assert_locked(struct mm_struct *mm)
diff --git a/include/linux/mmzone.h b/include/linux/mmzone.h
index fcb535560028..6a1d79d84675 100644
--- a/include/linux/mmzone.h
+++ b/include/linux/mmzone.h
@@ -540,6 +540,10 @@ struct zone {
 	 * is calculated as:
 	 *	present_pages = spanned_pages - absent_pages(pages in holes);
 	 *
+	 * present_early_pages is present pages existing within the zone
+	 * located on memory available since early boot, excluding hotplugged
+	 * memory.
+	 *
 	 * managed_pages is present pages managed by the buddy system, which
 	 * is calculated as (reserved_pages includes pages allocated by the
 	 * bootmem allocator):
@@ -572,6 +576,9 @@ struct zone {
 	atomic_long_t		managed_pages;
 	unsigned long		spanned_pages;
 	unsigned long		present_pages;
+#if defined(CONFIG_MEMORY_HOTPLUG)
+	unsigned long		present_early_pages;
+#endif
 #ifdef CONFIG_CMA
 	unsigned long		cma_pages;
 #endif
@@ -846,6 +853,7 @@ typedef struct pglist_data {
 	enum zone_type kcompactd_highest_zoneidx;
 	wait_queue_head_t kcompactd_wait;
 	struct task_struct *kcompactd;
+	bool proactive_compact_trigger;
 #endif
 	/*
 	 * This is a per-node reserve of pages that are not available
@@ -1342,7 +1350,6 @@ static inline struct mem_section *__nr_to_section(unsigned long nr)
 		return NULL;
 	return &mem_section[SECTION_NR_TO_ROOT(nr)][nr & SECTION_ROOT_MASK];
 }
-extern unsigned long __section_nr(struct mem_section *ms);
 extern size_t mem_section_usage_size(void);
 
 /*
@@ -1365,7 +1372,7 @@ extern size_t mem_section_usage_size(void);
 #define SECTION_TAINT_ZONE_DEVICE	(1UL<<4)
 #define SECTION_MAP_LAST_BIT		(1UL<<5)
 #define SECTION_MAP_MASK		(~(SECTION_MAP_LAST_BIT-1))
-#define SECTION_NID_SHIFT		3
+#define SECTION_NID_SHIFT		6
 
 static inline struct page *__section_mem_map_addr(struct mem_section *section)
 {
@@ -1525,18 +1532,6 @@ void sparse_init(void);
 #define subsection_map_init(_pfn, _nr_pages) do {} while (0)
 #endif /* CONFIG_SPARSEMEM */
 
-/*
- * If it is possible to have holes within a MAX_ORDER_NR_PAGES, then we
- * need to check pfn validity within that MAX_ORDER_NR_PAGES block.
- * pfn_valid_within() should be used in this case; we optimise this away
- * when we have no holes within a MAX_ORDER_NR_PAGES block.
- */
-#ifdef CONFIG_HOLES_IN_ZONE
-#define pfn_valid_within(pfn) pfn_valid(pfn)
-#else
-#define pfn_valid_within(pfn) (1)
-#endif
-
 #endif /* !__GENERATING_BOUNDS.H */
 #endif /* !__ASSEMBLY__ */
 #endif /* _LINUX_MMZONE_H */
diff --git a/include/linux/mod_devicetable.h b/include/linux/mod_devicetable.h
index 8e291cfdaf06..ae2e75d15b21 100644
--- a/include/linux/mod_devicetable.h
+++ b/include/linux/mod_devicetable.h
@@ -16,6 +16,10 @@ typedef unsigned long kernel_ulong_t;
 
 #define PCI_ANY_ID (~0)
 
+enum {
+	PCI_ID_F_VFIO_DRIVER_OVERRIDE = 1,
+};
+
 /**
  * struct pci_device_id - PCI device ID structure
  * @vendor:		Vendor ID to match (or PCI_ANY_ID)
@@ -34,12 +38,14 @@ typedef unsigned long kernel_ulong_t;
  *			Best practice is to use driver_data as an index
  *			into a static list of equivalent device types,
  *			instead of using it as a pointer.
+ * @override_only:	Match only when dev->driver_override is this driver.
  */
 struct pci_device_id {
 	__u32 vendor, device;		/* Vendor and device ID or PCI_ANY_ID*/
 	__u32 subvendor, subdevice;	/* Subsystem ID's or PCI_ANY_ID */
 	__u32 class, class_mask;	/* (class,subclass,prog-if) triplet */
 	kernel_ulong_t driver_data;	/* Data private to the driver */
+	__u32 override_only;
 };
 
 
diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h
index 7c41593c1d6a..d79163208dfd 100644
--- a/include/linux/netdevice.h
+++ b/include/linux/netdevice.h
@@ -4641,6 +4641,24 @@ void __hw_addr_unsync_dev(struct netdev_hw_addr_list *list,
 void __hw_addr_init(struct netdev_hw_addr_list *list);
 
 /* Functions used for device addresses handling */
+static inline void
+__dev_addr_set(struct net_device *dev, const u8 *addr, size_t len)
+{
+	memcpy(dev->dev_addr, addr, len);
+}
+
+static inline void dev_addr_set(struct net_device *dev, const u8 *addr)
+{
+	__dev_addr_set(dev, addr, dev->addr_len);
+}
+
+static inline void
+dev_addr_mod(struct net_device *dev, unsigned int offset,
+	     const u8 *addr, size_t len)
+{
+	memcpy(&dev->dev_addr[offset], addr, len);
+}
+
 int dev_addr_add(struct net_device *dev, const unsigned char *addr,
 		 unsigned char addr_type);
 int dev_addr_del(struct net_device *dev, const unsigned char *addr,
diff --git a/include/linux/netfilter/nf_conntrack_common.h b/include/linux/netfilter/nf_conntrack_common.h
index 0c7d8d1e945d..700ea077ce2d 100644
--- a/include/linux/netfilter/nf_conntrack_common.h
+++ b/include/linux/netfilter/nf_conntrack_common.h
@@ -18,6 +18,7 @@ struct ip_conntrack_stat {
 	unsigned int expect_create;
 	unsigned int expect_delete;
 	unsigned int search_restart;
+	unsigned int chaintoolong;
 };
 
 #define NFCT_INFOMASK	7UL
diff --git a/include/linux/netfs.h b/include/linux/netfs.h
index 9062adfa2fb9..5d6a4158a9a6 100644
--- a/include/linux/netfs.h
+++ b/include/linux/netfs.h
@@ -102,6 +102,7 @@ struct netfs_cache_resources {
 	const struct netfs_cache_ops	*ops;
 	void				*cache_priv;
 	void				*cache_priv2;
+	unsigned int			debug_id;	/* Cookie debug ID */
 };
 
 /*
@@ -137,7 +138,6 @@ struct netfs_read_request {
 	struct list_head	subrequests;	/* Requests to fetch I/O from disk or net */
 	void			*netfs_priv;	/* Private data for the netfs */
 	unsigned int		debug_id;
-	unsigned int		cookie_debug_id;
 	atomic_t		nr_rd_ops;	/* Number of read ops in progress */
 	atomic_t		nr_wr_ops;	/* Number of write ops in progress */
 	size_t			submitted;	/* Amount submitted for I/O so far */
diff --git a/include/linux/nfs_fs.h b/include/linux/nfs_fs.h
index ce6474594872..b9a8b925db43 100644
--- a/include/linux/nfs_fs.h
+++ b/include/linux/nfs_fs.h
@@ -41,6 +41,11 @@
 #include <linux/mempool.h>
 
 /*
+ * These are the default for number of transports to different server IPs
+ */
+#define NFS_MAX_TRANSPORTS 16
+
+/*
  * These are the default flags for swap requests
  */
 #define NFS_RPC_SWAPFLAGS		(RPC_TASK_SWAPPER|RPC_TASK_ROOTCREDS)
diff --git a/include/linux/nfs_fs_sb.h b/include/linux/nfs_fs_sb.h
index d71a0e90faeb..2a9acbfe00f0 100644
--- a/include/linux/nfs_fs_sb.h
+++ b/include/linux/nfs_fs_sb.h
@@ -62,6 +62,7 @@ struct nfs_client {
 
 	u32			cl_minorversion;/* NFSv4 minorversion */
 	unsigned int		cl_nconnect;	/* Number of connections */
+	unsigned int		cl_max_connect; /* max number of xprts allowed */
 	const char *		cl_principal;  /* used for machine cred */
 
 #if IS_ENABLED(CONFIG_NFS_V4)
diff --git a/include/linux/of.h b/include/linux/of.h
index 0e786b60bd5d..6f1c41f109bb 100644
--- a/include/linux/of.h
+++ b/include/linux/of.h
@@ -896,7 +896,7 @@ static inline int of_parse_phandle_with_fixed_args(const struct device_node *np,
 	return -ENOSYS;
 }
 
-static inline int of_count_phandle_with_args(struct device_node *np,
+static inline int of_count_phandle_with_args(const struct device_node *np,
 					     const char *list_name,
 					     const char *cells_name)
 {
diff --git a/include/linux/of_gpio.h b/include/linux/of_gpio.h
index f821095218b0..8bf2ea859653 100644
--- a/include/linux/of_gpio.h
+++ b/include/linux/of_gpio.h
@@ -49,7 +49,7 @@ static inline struct of_mm_gpio_chip *to_of_mm_gpio_chip(struct gpio_chip *gc)
 	return container_of(gc, struct of_mm_gpio_chip, gc);
 }
 
-extern int of_get_named_gpio_flags(struct device_node *np,
+extern int of_get_named_gpio_flags(const struct device_node *np,
 		const char *list_name, int index, enum of_gpio_flags *flags);
 
 extern int of_mm_gpiochip_add_data(struct device_node *np,
@@ -67,7 +67,7 @@ extern void of_mm_gpiochip_remove(struct of_mm_gpio_chip *mm_gc);
 #include <linux/errno.h>
 
 /* Drivers may not strictly depend on the GPIO support, so let them link. */
-static inline int of_get_named_gpio_flags(struct device_node *np,
+static inline int of_get_named_gpio_flags(const struct device_node *np,
 		const char *list_name, int index, enum of_gpio_flags *flags)
 {
 	if (flags)
@@ -98,7 +98,8 @@ static inline int of_get_named_gpio_flags(struct device_node *np,
  * The above example defines four GPIOs, two of which are not specified.
  * This function will return '4'
  */
-static inline int of_gpio_named_count(struct device_node *np, const char* propname)
+static inline int of_gpio_named_count(const struct device_node *np,
+				      const char *propname)
 {
 	return of_count_phandle_with_args(np, propname, "#gpio-cells");
 }
@@ -109,12 +110,12 @@ static inline int of_gpio_named_count(struct device_node *np, const char* propna
  *
  * Same as of_gpio_named_count, but hard coded to use the 'gpios' property
  */
-static inline int of_gpio_count(struct device_node *np)
+static inline int of_gpio_count(const struct device_node *np)
 {
 	return of_gpio_named_count(np, "gpios");
 }
 
-static inline int of_get_gpio_flags(struct device_node *np, int index,
+static inline int of_get_gpio_flags(const struct device_node *np, int index,
 		      enum of_gpio_flags *flags)
 {
 	return of_get_named_gpio_flags(np, "gpios", index, flags);
@@ -129,7 +130,7 @@ static inline int of_get_gpio_flags(struct device_node *np, int index,
  * Returns GPIO number to use with Linux generic GPIO API, or one of the errno
  * value on the error condition.
  */
-static inline int of_get_named_gpio(struct device_node *np,
+static inline int of_get_named_gpio(const struct device_node *np,
                                    const char *propname, int index)
 {
 	return of_get_named_gpio_flags(np, propname, index, NULL);
@@ -143,7 +144,7 @@ static inline int of_get_named_gpio(struct device_node *np,
  * Returns GPIO number to use with Linux generic GPIO API, or one of the errno
  * value on the error condition.
  */
-static inline int of_get_gpio(struct device_node *np, int index)
+static inline int of_get_gpio(const struct device_node *np, int index)
 {
 	return of_get_gpio_flags(np, index, NULL);
 }
diff --git a/include/linux/once.h b/include/linux/once.h
index ae6f4eb41cbe..d361fb14ac3a 100644
--- a/include/linux/once.h
+++ b/include/linux/once.h
@@ -16,7 +16,7 @@ void __do_once_done(bool *done, struct static_key_true *once_key,
  * out the condition into a nop. DO_ONCE() guarantees type safety of
  * arguments!
  *
- * Not that the following is not equivalent ...
+ * Note that the following is not equivalent ...
  *
  *   DO_ONCE(func, arg);
  *   DO_ONCE(func, arg);
diff --git a/include/linux/page-flags.h b/include/linux/page-flags.h
index 5922031ffab6..a558d67ee86f 100644
--- a/include/linux/page-flags.h
+++ b/include/linux/page-flags.h
@@ -131,7 +131,7 @@ enum pageflags {
 #ifdef CONFIG_MEMORY_FAILURE
 	PG_hwpoison,		/* hardware poisoned page. Don't touch */
 #endif
-#if defined(CONFIG_IDLE_PAGE_TRACKING) && defined(CONFIG_64BIT)
+#if defined(CONFIG_PAGE_IDLE_FLAG) && defined(CONFIG_64BIT)
 	PG_young,
 	PG_idle,
 #endif
@@ -178,6 +178,8 @@ enum pageflags {
 	PG_reported = PG_uptodate,
 };
 
+#define PAGEFLAGS_MASK		((1UL << NR_PAGEFLAGS) - 1)
+
 #ifndef __GENERATING_BOUNDS_H
 
 static inline unsigned long _compound_head(const struct page *page)
@@ -439,7 +441,7 @@ PAGEFLAG_FALSE(HWPoison)
 #define __PG_HWPOISON 0
 #endif
 
-#if defined(CONFIG_IDLE_PAGE_TRACKING) && defined(CONFIG_64BIT)
+#if defined(CONFIG_PAGE_IDLE_FLAG) && defined(CONFIG_64BIT)
 TESTPAGEFLAG(Young, young, PF_ANY)
 SETPAGEFLAG(Young, young, PF_ANY)
 TESTCLEARFLAG(Young, young, PF_ANY)
@@ -633,43 +635,6 @@ static inline int PageTransCompound(struct page *page)
 }
 
 /*
- * PageTransCompoundMap is the same as PageTransCompound, but it also
- * guarantees the primary MMU has the entire compound page mapped
- * through pmd_trans_huge, which in turn guarantees the secondary MMUs
- * can also map the entire compound page. This allows the secondary
- * MMUs to call get_user_pages() only once for each compound page and
- * to immediately map the entire compound page with a single secondary
- * MMU fault. If there will be a pmd split later, the secondary MMUs
- * will get an update through the MMU notifier invalidation through
- * split_huge_pmd().
- *
- * Unlike PageTransCompound, this is safe to be called only while
- * split_huge_pmd() cannot run from under us, like if protected by the
- * MMU notifier, otherwise it may result in page->_mapcount check false
- * positives.
- *
- * We have to treat page cache THP differently since every subpage of it
- * would get _mapcount inc'ed once it is PMD mapped.  But, it may be PTE
- * mapped in the current process so comparing subpage's _mapcount to
- * compound_mapcount to filter out PTE mapped case.
- */
-static inline int PageTransCompoundMap(struct page *page)
-{
-	struct page *head;
-
-	if (!PageTransCompound(page))
-		return 0;
-
-	if (PageAnon(page))
-		return atomic_read(&page->_mapcount) < 0;
-
-	head = compound_head(page);
-	/* File THP is PMD mapped and not PTE mapped */
-	return atomic_read(&page->_mapcount) ==
-	       atomic_read(compound_mapcount_ptr(head));
-}
-
-/*
  * PageTransTail returns true for both transparent huge pages
  * and hugetlbfs pages, so it should only be called when it's known
  * that hugetlbfs pages aren't involved.
@@ -815,6 +780,15 @@ static inline int PageSlabPfmemalloc(struct page *page)
 	return PageActive(page);
 }
 
+/*
+ * A version of PageSlabPfmemalloc() for opportunistic checks where the page
+ * might have been freed under us and not be a PageSlab anymore.
+ */
+static inline int __PageSlabPfmemalloc(struct page *page)
+{
+	return PageActive(page);
+}
+
 static inline void SetPageSlabPfmemalloc(struct page *page)
 {
 	VM_BUG_ON_PAGE(!PageSlab(page), page);
@@ -859,7 +833,7 @@ static inline void ClearPageSlabPfmemalloc(struct page *page)
  * alloc-free cycle to prevent from reusing the page.
  */
 #define PAGE_FLAGS_CHECK_AT_PREP	\
-	(((1UL << NR_PAGEFLAGS) - 1) & ~__PG_HWPOISON)
+	(PAGEFLAGS_MASK & ~__PG_HWPOISON)
 
 #define PAGE_FLAGS_PRIVATE				\
 	(1UL << PG_private | 1UL << PG_private_2)
diff --git a/include/linux/page_ext.h b/include/linux/page_ext.h
index aff81ba31bd8..fabb2e1e087f 100644
--- a/include/linux/page_ext.h
+++ b/include/linux/page_ext.h
@@ -19,7 +19,7 @@ struct page_ext_operations {
 enum page_ext_flags {
 	PAGE_EXT_OWNER,
 	PAGE_EXT_OWNER_ALLOCATED,
-#if defined(CONFIG_IDLE_PAGE_TRACKING) && !defined(CONFIG_64BIT)
+#if defined(CONFIG_PAGE_IDLE_FLAG) && !defined(CONFIG_64BIT)
 	PAGE_EXT_YOUNG,
 	PAGE_EXT_IDLE,
 #endif
diff --git a/include/linux/page_idle.h b/include/linux/page_idle.h
index 1e894d34bdce..d8a6aecf99cb 100644
--- a/include/linux/page_idle.h
+++ b/include/linux/page_idle.h
@@ -6,7 +6,7 @@
 #include <linux/page-flags.h>
 #include <linux/page_ext.h>
 
-#ifdef CONFIG_IDLE_PAGE_TRACKING
+#ifdef CONFIG_PAGE_IDLE_FLAG
 
 #ifdef CONFIG_64BIT
 static inline bool page_is_young(struct page *page)
@@ -106,7 +106,7 @@ static inline void clear_page_idle(struct page *page)
 }
 #endif /* CONFIG_64BIT */
 
-#else /* !CONFIG_IDLE_PAGE_TRACKING */
+#else /* !CONFIG_PAGE_IDLE_FLAG */
 
 static inline bool page_is_young(struct page *page)
 {
@@ -135,6 +135,6 @@ static inline void clear_page_idle(struct page *page)
 {
 }
 
-#endif /* CONFIG_IDLE_PAGE_TRACKING */
+#endif /* CONFIG_PAGE_IDLE_FLAG */
 
 #endif /* _LINUX_MM_PAGE_IDLE_H */
diff --git a/include/linux/pagemap.h b/include/linux/pagemap.h
index ed02aa522263..62db6b0176b9 100644
--- a/include/linux/pagemap.h
+++ b/include/linux/pagemap.h
@@ -521,18 +521,17 @@ static inline struct page *read_mapping_page(struct address_space *mapping,
  */
 static inline pgoff_t page_to_index(struct page *page)
 {
-	pgoff_t pgoff;
+	struct page *head;
 
 	if (likely(!PageTransTail(page)))
 		return page->index;
 
+	head = compound_head(page);
 	/*
 	 *  We don't initialize ->index for tail pages: calculate based on
 	 *  head page
 	 */
-	pgoff = compound_head(page)->index;
-	pgoff += page - compound_head(page);
-	return pgoff;
+	return head->index + page - head;
 }
 
 extern pgoff_t hugetlb_basepage_index(struct page *page);
@@ -736,7 +735,7 @@ extern void add_page_wait_queue(struct page *page, wait_queue_entry_t *waiter);
 /*
  * Fault everything in given userspace address range in.
  */
-static inline int fault_in_pages_writeable(char __user *uaddr, int size)
+static inline int fault_in_pages_writeable(char __user *uaddr, size_t size)
 {
 	char __user *end = uaddr + size - 1;
 
@@ -763,7 +762,7 @@ static inline int fault_in_pages_writeable(char __user *uaddr, int size)
 	return 0;
 }
 
-static inline int fault_in_pages_readable(const char __user *uaddr, int size)
+static inline int fault_in_pages_readable(const char __user *uaddr, size_t size)
 {
 	volatile char c;
 	const char __user *end = uaddr + size - 1;
diff --git a/include/linux/pci-acpi.h b/include/linux/pci-acpi.h
index 5ba475ca9078..f16de399d2de 100644
--- a/include/linux/pci-acpi.h
+++ b/include/linux/pci-acpi.h
@@ -122,6 +122,9 @@ static inline void pci_acpi_add_edr_notifier(struct pci_dev *pdev) { }
 static inline void pci_acpi_remove_edr_notifier(struct pci_dev *pdev) { }
 #endif /* CONFIG_PCIE_EDR */
 
+int pci_acpi_set_companion_lookup_hook(struct acpi_device *(*func)(struct pci_dev *));
+void pci_acpi_clear_companion_lookup_hook(void);
+
 #else	/* CONFIG_ACPI */
 static inline void acpi_pci_add_bus(struct pci_bus *bus) { }
 static inline void acpi_pci_remove_bus(struct pci_bus *bus) { }
diff --git a/include/linux/pci-epc.h b/include/linux/pci-epc.h
index 50a649d33e68..a48778e1a4ee 100644
--- a/include/linux/pci-epc.h
+++ b/include/linux/pci-epc.h
@@ -62,31 +62,32 @@ pci_epc_interface_string(enum pci_epc_interface_type type)
  * @owner: the module owner containing the ops
  */
 struct pci_epc_ops {
-	int	(*write_header)(struct pci_epc *epc, u8 func_no,
+	int	(*write_header)(struct pci_epc *epc, u8 func_no, u8 vfunc_no,
 				struct pci_epf_header *hdr);
-	int	(*set_bar)(struct pci_epc *epc, u8 func_no,
+	int	(*set_bar)(struct pci_epc *epc, u8 func_no, u8 vfunc_no,
 			   struct pci_epf_bar *epf_bar);
-	void	(*clear_bar)(struct pci_epc *epc, u8 func_no,
+	void	(*clear_bar)(struct pci_epc *epc, u8 func_no, u8 vfunc_no,
 			     struct pci_epf_bar *epf_bar);
-	int	(*map_addr)(struct pci_epc *epc, u8 func_no,
+	int	(*map_addr)(struct pci_epc *epc, u8 func_no, u8 vfunc_no,
 			    phys_addr_t addr, u64 pci_addr, size_t size);
-	void	(*unmap_addr)(struct pci_epc *epc, u8 func_no,
+	void	(*unmap_addr)(struct pci_epc *epc, u8 func_no, u8 vfunc_no,
 			      phys_addr_t addr);
-	int	(*set_msi)(struct pci_epc *epc, u8 func_no, u8 interrupts);
-	int	(*get_msi)(struct pci_epc *epc, u8 func_no);
-	int	(*set_msix)(struct pci_epc *epc, u8 func_no, u16 interrupts,
-			    enum pci_barno, u32 offset);
-	int	(*get_msix)(struct pci_epc *epc, u8 func_no);
-	int	(*raise_irq)(struct pci_epc *epc, u8 func_no,
+	int	(*set_msi)(struct pci_epc *epc, u8 func_no, u8 vfunc_no,
+			   u8 interrupts);
+	int	(*get_msi)(struct pci_epc *epc, u8 func_no, u8 vfunc_no);
+	int	(*set_msix)(struct pci_epc *epc, u8 func_no, u8 vfunc_no,
+			    u16 interrupts, enum pci_barno, u32 offset);
+	int	(*get_msix)(struct pci_epc *epc, u8 func_no, u8 vfunc_no);
+	int	(*raise_irq)(struct pci_epc *epc, u8 func_no, u8 vfunc_no,
 			     enum pci_epc_irq_type type, u16 interrupt_num);
-	int	(*map_msi_irq)(struct pci_epc *epc, u8 func_no,
+	int	(*map_msi_irq)(struct pci_epc *epc, u8 func_no, u8 vfunc_no,
 			       phys_addr_t phys_addr, u8 interrupt_num,
 			       u32 entry_size, u32 *msi_data,
 			       u32 *msi_addr_offset);
 	int	(*start)(struct pci_epc *epc);
 	void	(*stop)(struct pci_epc *epc);
 	const struct pci_epc_features* (*get_features)(struct pci_epc *epc,
-						       u8 func_no);
+						       u8 func_no, u8 vfunc_no);
 	struct module *owner;
 };
 
@@ -128,6 +129,8 @@ struct pci_epc_mem {
  *       single window.
  * @num_windows: number of windows supported by device
  * @max_functions: max number of functions that can be configured in this EPC
+ * @max_vfs: Array indicating the maximum number of virtual functions that can
+ *   be associated with each physical function
  * @group: configfs group representing the PCI EPC device
  * @lock: mutex to protect pci_epc ops
  * @function_num_map: bitmap to manage physical function number
@@ -141,6 +144,7 @@ struct pci_epc {
 	struct pci_epc_mem		*mem;
 	unsigned int			num_windows;
 	u8				max_functions;
+	u8				*max_vfs;
 	struct config_group		*group;
 	/* mutex to protect against concurrent access of EP controller */
 	struct mutex			lock;
@@ -208,31 +212,32 @@ void pci_epc_linkup(struct pci_epc *epc);
 void pci_epc_init_notify(struct pci_epc *epc);
 void pci_epc_remove_epf(struct pci_epc *epc, struct pci_epf *epf,
 			enum pci_epc_interface_type type);
-int pci_epc_write_header(struct pci_epc *epc, u8 func_no,
+int pci_epc_write_header(struct pci_epc *epc, u8 func_no, u8 vfunc_no,
 			 struct pci_epf_header *hdr);
-int pci_epc_set_bar(struct pci_epc *epc, u8 func_no,
+int pci_epc_set_bar(struct pci_epc *epc, u8 func_no, u8 vfunc_no,
 		    struct pci_epf_bar *epf_bar);
-void pci_epc_clear_bar(struct pci_epc *epc, u8 func_no,
+void pci_epc_clear_bar(struct pci_epc *epc, u8 func_no, u8 vfunc_no,
 		       struct pci_epf_bar *epf_bar);
-int pci_epc_map_addr(struct pci_epc *epc, u8 func_no,
+int pci_epc_map_addr(struct pci_epc *epc, u8 func_no, u8 vfunc_no,
 		     phys_addr_t phys_addr,
 		     u64 pci_addr, size_t size);
-void pci_epc_unmap_addr(struct pci_epc *epc, u8 func_no,
+void pci_epc_unmap_addr(struct pci_epc *epc, u8 func_no, u8 vfunc_no,
 			phys_addr_t phys_addr);
-int pci_epc_set_msi(struct pci_epc *epc, u8 func_no, u8 interrupts);
-int pci_epc_get_msi(struct pci_epc *epc, u8 func_no);
-int pci_epc_set_msix(struct pci_epc *epc, u8 func_no, u16 interrupts,
-		     enum pci_barno, u32 offset);
-int pci_epc_get_msix(struct pci_epc *epc, u8 func_no);
-int pci_epc_map_msi_irq(struct pci_epc *epc, u8 func_no,
+int pci_epc_set_msi(struct pci_epc *epc, u8 func_no, u8 vfunc_no,
+		    u8 interrupts);
+int pci_epc_get_msi(struct pci_epc *epc, u8 func_no, u8 vfunc_no);
+int pci_epc_set_msix(struct pci_epc *epc, u8 func_no, u8 vfunc_no,
+		     u16 interrupts, enum pci_barno, u32 offset);
+int pci_epc_get_msix(struct pci_epc *epc, u8 func_no, u8 vfunc_no);
+int pci_epc_map_msi_irq(struct pci_epc *epc, u8 func_no, u8 vfunc_no,
 			phys_addr_t phys_addr, u8 interrupt_num,
 			u32 entry_size, u32 *msi_data, u32 *msi_addr_offset);
-int pci_epc_raise_irq(struct pci_epc *epc, u8 func_no,
+int pci_epc_raise_irq(struct pci_epc *epc, u8 func_no, u8 vfunc_no,
 		      enum pci_epc_irq_type type, u16 interrupt_num);
 int pci_epc_start(struct pci_epc *epc);
 void pci_epc_stop(struct pci_epc *epc);
 const struct pci_epc_features *pci_epc_get_features(struct pci_epc *epc,
-						    u8 func_no);
+						    u8 func_no, u8 vfunc_no);
 enum pci_barno
 pci_epc_get_first_free_bar(const struct pci_epc_features *epc_features);
 enum pci_barno pci_epc_get_next_free_bar(const struct pci_epc_features
diff --git a/include/linux/pci-epf.h b/include/linux/pci-epf.h
index 8292420426f3..009a07147c61 100644
--- a/include/linux/pci-epf.h
+++ b/include/linux/pci-epf.h
@@ -121,8 +121,10 @@ struct pci_epf_bar {
  * @bar: represents the BAR of EPF device
  * @msi_interrupts: number of MSI interrupts required by this function
  * @msix_interrupts: number of MSI-X interrupts required by this function
- * @func_no: unique function number within this endpoint device
+ * @func_no: unique (physical) function number within this endpoint device
+ * @vfunc_no: unique virtual function number within a physical function
  * @epc: the EPC device to which this EPF device is bound
+ * @epf_pf: the physical EPF device to which this virtual EPF device is bound
  * @driver: the EPF driver to which this EPF device is bound
  * @list: to add pci_epf as a list of PCI endpoint functions to pci_epc
  * @nb: notifier block to notify EPF of any EPC events (like linkup)
@@ -133,6 +135,10 @@ struct pci_epf_bar {
  * @sec_epc_bar: represents the BAR of EPF device associated with secondary EPC
  * @sec_epc_func_no: unique (physical) function number within the secondary EPC
  * @group: configfs group associated with the EPF device
+ * @is_bound: indicates if bind notification to function driver has been invoked
+ * @is_vf: true - virtual function, false - physical function
+ * @vfunction_num_map: bitmap to manage virtual function number
+ * @pci_vepf: list of virtual endpoint functions associated with this function
  */
 struct pci_epf {
 	struct device		dev;
@@ -142,8 +148,10 @@ struct pci_epf {
 	u8			msi_interrupts;
 	u16			msix_interrupts;
 	u8			func_no;
+	u8			vfunc_no;
 
 	struct pci_epc		*epc;
+	struct pci_epf		*epf_pf;
 	struct pci_epf_driver	*driver;
 	struct list_head	list;
 	struct notifier_block   nb;
@@ -156,6 +164,10 @@ struct pci_epf {
 	struct pci_epf_bar	sec_epc_bar[6];
 	u8			sec_epc_func_no;
 	struct config_group	*group;
+	unsigned int		is_bound;
+	unsigned int		is_vf;
+	unsigned long		vfunction_num_map;
+	struct list_head	pci_vepf;
 };
 
 /**
@@ -199,4 +211,6 @@ int pci_epf_bind(struct pci_epf *epf);
 void pci_epf_unbind(struct pci_epf *epf);
 struct config_group *pci_epf_type_add_cfs(struct pci_epf *epf,
 					  struct config_group *group);
+int pci_epf_add_vepf(struct pci_epf *epf_pf, struct pci_epf *epf_vf);
+void pci_epf_remove_vepf(struct pci_epf *epf_pf, struct pci_epf *epf_vf);
 #endif /* __LINUX_PCI_EPF_H */
diff --git a/include/linux/pci.h b/include/linux/pci.h
index 947430637cac..cd8aa6fce204 100644
--- a/include/linux/pci.h
+++ b/include/linux/pci.h
@@ -49,6 +49,12 @@
 			       PCI_STATUS_SIG_TARGET_ABORT | \
 			       PCI_STATUS_PARITY)
 
+/* Number of reset methods used in pci_reset_fn_methods array in pci.c */
+#define PCI_NUM_RESET_METHODS 7
+
+#define PCI_RESET_PROBE		true
+#define PCI_RESET_DO_RESET	false
+
 /*
  * The PCI interface treats multi-function devices as independent
  * devices.  The slot/function address of each device is encoded
@@ -288,21 +294,14 @@ enum pci_bus_speed {
 enum pci_bus_speed pcie_get_speed_cap(struct pci_dev *dev);
 enum pcie_link_width pcie_get_width_cap(struct pci_dev *dev);
 
-struct pci_cap_saved_data {
-	u16		cap_nr;
-	bool		cap_extended;
-	unsigned int	size;
-	u32		data[];
-};
-
-struct pci_cap_saved_state {
-	struct hlist_node		next;
-	struct pci_cap_saved_data	cap;
+struct pci_vpd {
+	struct mutex	lock;
+	unsigned int	len;
+	u8		cap;
 };
 
 struct irq_affinity;
 struct pcie_link_state;
-struct pci_vpd;
 struct pci_sriov;
 struct pci_p2pdma;
 struct rcec_ea;
@@ -333,6 +332,7 @@ struct pci_dev {
 	struct rcec_ea	*rcec_ea;	/* RCEC cached endpoint association */
 	struct pci_dev  *rcec;          /* Associated RCEC device */
 #endif
+	u32		devcap;		/* PCIe Device Capabilities */
 	u8		pcie_cap;	/* PCIe capability offset */
 	u8		msi_cap;	/* MSI capability offset */
 	u8		msix_cap;	/* MSI-X capability offset */
@@ -388,6 +388,7 @@ struct pci_dev {
 					   supported from root to here */
 	u16		l1ss;		/* L1SS Capability pointer */
 #endif
+	unsigned int	pasid_no_tlp:1;		/* PASID works without TLP Prefix */
 	unsigned int	eetlp_prefix_path:1;	/* End-to-End TLP Prefix */
 
 	pci_channel_state_t error_state;	/* Current connectivity state */
@@ -427,7 +428,6 @@ struct pci_dev {
 	unsigned int	state_saved:1;
 	unsigned int	is_physfn:1;
 	unsigned int	is_virtfn:1;
-	unsigned int	reset_fn:1;
 	unsigned int	is_hotplug_bridge:1;
 	unsigned int	shpc_managed:1;		/* SHPC owned by shpchp */
 	unsigned int	is_thunderbolt:1;	/* Thunderbolt controller */
@@ -473,7 +473,7 @@ struct pci_dev {
 #ifdef CONFIG_PCI_MSI
 	const struct attribute_group **msi_irq_groups;
 #endif
-	struct pci_vpd *vpd;
+	struct pci_vpd	vpd;
 #ifdef CONFIG_PCIE_DPC
 	u16		dpc_cap;
 	unsigned int	dpc_rp_extensions:1;
@@ -505,6 +505,9 @@ struct pci_dev {
 	char		*driver_override; /* Driver name to force a match */
 
 	unsigned long	priv_flags;	/* Private flags for the PCI driver */
+
+	/* These methods index pci_reset_fn_methods[] */
+	u8 reset_methods[PCI_NUM_RESET_METHODS]; /* In priority order */
 };
 
 static inline struct pci_dev *pci_physfn(struct pci_dev *dev)
@@ -526,6 +529,16 @@ static inline int pci_channel_offline(struct pci_dev *pdev)
 	return (pdev->error_state != pci_channel_io_normal);
 }
 
+/*
+ * Currently in ACPI spec, for each PCI host bridge, PCI Segment
+ * Group number is limited to a 16-bit value, therefore (int)-1 is
+ * not a valid PCI domain number, and can be used as a sentinel
+ * value indicating ->domain_nr is not set by the driver (and
+ * CONFIG_PCI_DOMAINS_GENERIC=y archs will set it with
+ * pci_bus_find_domain_nr()).
+ */
+#define PCI_DOMAIN_NR_NOT_SET (-1)
+
 struct pci_host_bridge {
 	struct device	dev;
 	struct pci_bus	*bus;		/* Root bus */
@@ -533,6 +546,7 @@ struct pci_host_bridge {
 	struct pci_ops	*child_ops;
 	void		*sysdata;
 	int		busnr;
+	int		domain_nr;
 	struct list_head windows;	/* resource_entry */
 	struct list_head dma_ranges;	/* dma ranges resource list */
 	u8 (*swizzle_irq)(struct pci_dev *, u8 *); /* Platform IRQ swizzler */
@@ -902,6 +916,35 @@ struct pci_driver {
 	.subvendor = PCI_ANY_ID, .subdevice = PCI_ANY_ID
 
 /**
+ * PCI_DEVICE_DRIVER_OVERRIDE - macro used to describe a PCI device with
+ *                              override_only flags.
+ * @vend: the 16 bit PCI Vendor ID
+ * @dev: the 16 bit PCI Device ID
+ * @driver_override: the 32 bit PCI Device override_only
+ *
+ * This macro is used to create a struct pci_device_id that matches only a
+ * driver_override device. The subvendor and subdevice fields will be set to
+ * PCI_ANY_ID.
+ */
+#define PCI_DEVICE_DRIVER_OVERRIDE(vend, dev, driver_override) \
+	.vendor = (vend), .device = (dev), .subvendor = PCI_ANY_ID, \
+	.subdevice = PCI_ANY_ID, .override_only = (driver_override)
+
+/**
+ * PCI_DRIVER_OVERRIDE_DEVICE_VFIO - macro used to describe a VFIO
+ *                                   "driver_override" PCI device.
+ * @vend: the 16 bit PCI Vendor ID
+ * @dev: the 16 bit PCI Device ID
+ *
+ * This macro is used to create a struct pci_device_id that matches a
+ * specific device. The subvendor and subdevice fields will be set to
+ * PCI_ANY_ID and the driver_override will be set to
+ * PCI_ID_F_VFIO_DRIVER_OVERRIDE.
+ */
+#define PCI_DRIVER_OVERRIDE_DEVICE_VFIO(vend, dev) \
+	PCI_DEVICE_DRIVER_OVERRIDE(vend, dev, PCI_ID_F_VFIO_DRIVER_OVERRIDE)
+
+/**
  * PCI_DEVICE_SUB - macro used to describe a specific PCI device with subsystem
  * @vend: the 16 bit PCI Vendor ID
  * @dev: the 16 bit PCI Device ID
@@ -1228,7 +1271,7 @@ u32 pcie_bandwidth_available(struct pci_dev *dev, struct pci_dev **limiting_dev,
 			     enum pci_bus_speed *speed,
 			     enum pcie_link_width *width);
 void pcie_print_link_status(struct pci_dev *dev);
-bool pcie_has_flr(struct pci_dev *dev);
+int pcie_reset_flr(struct pci_dev *dev, bool probe);
 int pcie_flr(struct pci_dev *dev);
 int __pci_reset_function_locked(struct pci_dev *dev);
 int pci_reset_function(struct pci_dev *dev);
@@ -1278,12 +1321,6 @@ int pci_load_saved_state(struct pci_dev *dev,
 			 struct pci_saved_state *state);
 int pci_load_and_free_saved_state(struct pci_dev *dev,
 				  struct pci_saved_state **state);
-struct pci_cap_saved_state *pci_find_saved_cap(struct pci_dev *dev, char cap);
-struct pci_cap_saved_state *pci_find_saved_ext_cap(struct pci_dev *dev,
-						   u16 cap);
-int pci_add_cap_save_buffer(struct pci_dev *dev, char cap, unsigned int size);
-int pci_add_ext_cap_save_buffer(struct pci_dev *dev,
-				u16 cap, unsigned int size);
 int pci_platform_power_transition(struct pci_dev *dev, pci_power_t state);
 int pci_set_power_state(struct pci_dev *dev, pci_power_t state);
 pci_power_t pci_choose_state(struct pci_dev *dev, pm_message_t state);
@@ -1750,8 +1787,9 @@ static inline void pci_disable_device(struct pci_dev *dev) { }
 static inline int pcim_enable_device(struct pci_dev *pdev) { return -EIO; }
 static inline int pci_assign_resource(struct pci_dev *dev, int i)
 { return -EBUSY; }
-static inline int __pci_register_driver(struct pci_driver *drv,
-					struct module *owner)
+static inline int __must_check __pci_register_driver(struct pci_driver *drv,
+						     struct module *owner,
+						     const char *mod_name)
 { return 0; }
 static inline int pci_register_driver(struct pci_driver *drv)
 { return 0; }
@@ -1891,9 +1929,7 @@ int pci_iobar_pfn(struct pci_dev *pdev, int bar, struct vm_area_struct *vma);
 #define pci_resource_end(dev, bar)	((dev)->resource[(bar)].end)
 #define pci_resource_flags(dev, bar)	((dev)->resource[(bar)].flags)
 #define pci_resource_len(dev,bar) \
-	((pci_resource_start((dev), (bar)) == 0 &&	\
-	  pci_resource_end((dev), (bar)) ==		\
-	  pci_resource_start((dev), (bar))) ? 0 :	\
+	((pci_resource_end((dev), (bar)) == 0) ? 0 :	\
 							\
 	 (pci_resource_end((dev), (bar)) -		\
 	  pci_resource_start((dev), (bar)) + 1))
@@ -2260,20 +2296,6 @@ int pci_enable_atomic_ops_to_root(struct pci_dev *dev, u32 cap_mask);
 #define PCI_VPD_LRDT_RO_DATA		PCI_VPD_LRDT_ID(PCI_VPD_LTIN_RO_DATA)
 #define PCI_VPD_LRDT_RW_DATA		PCI_VPD_LRDT_ID(PCI_VPD_LTIN_RW_DATA)
 
-/* Small Resource Data Type Tag Item Names */
-#define PCI_VPD_STIN_END		0x0f	/* End */
-
-#define PCI_VPD_SRDT_END		(PCI_VPD_STIN_END << 3)
-
-#define PCI_VPD_SRDT_TIN_MASK		0x78
-#define PCI_VPD_SRDT_LEN_MASK		0x07
-#define PCI_VPD_LRDT_TIN_MASK		0x7f
-
-#define PCI_VPD_LRDT_TAG_SIZE		3
-#define PCI_VPD_SRDT_TAG_SIZE		1
-
-#define PCI_VPD_INFO_FLD_HDR_SIZE	3
-
 #define PCI_VPD_RO_KEYWORD_PARTNO	"PN"
 #define PCI_VPD_RO_KEYWORD_SERIALNO	"SN"
 #define PCI_VPD_RO_KEYWORD_MFR_ID	"MN"
@@ -2281,83 +2303,45 @@ int pci_enable_atomic_ops_to_root(struct pci_dev *dev, u32 cap_mask);
 #define PCI_VPD_RO_KEYWORD_CHKSUM	"RV"
 
 /**
- * pci_vpd_lrdt_size - Extracts the Large Resource Data Type length
- * @lrdt: Pointer to the beginning of the Large Resource Data Type tag
- *
- * Returns the extracted Large Resource Data Type length.
- */
-static inline u16 pci_vpd_lrdt_size(const u8 *lrdt)
-{
-	return (u16)lrdt[1] + ((u16)lrdt[2] << 8);
-}
-
-/**
- * pci_vpd_lrdt_tag - Extracts the Large Resource Data Type Tag Item
- * @lrdt: Pointer to the beginning of the Large Resource Data Type tag
- *
- * Returns the extracted Large Resource Data Type Tag item.
- */
-static inline u16 pci_vpd_lrdt_tag(const u8 *lrdt)
-{
-	return (u16)(lrdt[0] & PCI_VPD_LRDT_TIN_MASK);
-}
-
-/**
- * pci_vpd_srdt_size - Extracts the Small Resource Data Type length
- * @srdt: Pointer to the beginning of the Small Resource Data Type tag
+ * pci_vpd_alloc - Allocate buffer and read VPD into it
+ * @dev: PCI device
+ * @size: pointer to field where VPD length is returned
  *
- * Returns the extracted Small Resource Data Type length.
+ * Returns pointer to allocated buffer or an ERR_PTR in case of failure
  */
-static inline u8 pci_vpd_srdt_size(const u8 *srdt)
-{
-	return (*srdt) & PCI_VPD_SRDT_LEN_MASK;
-}
+void *pci_vpd_alloc(struct pci_dev *dev, unsigned int *size);
 
 /**
- * pci_vpd_srdt_tag - Extracts the Small Resource Data Type Tag Item
- * @srdt: Pointer to the beginning of the Small Resource Data Type tag
+ * pci_vpd_find_id_string - Locate id string in VPD
+ * @buf: Pointer to buffered VPD data
+ * @len: The length of the buffer area in which to search
+ * @size: Pointer to field where length of id string is returned
  *
- * Returns the extracted Small Resource Data Type Tag Item.
+ * Returns the index of the id string or -ENOENT if not found.
  */
-static inline u8 pci_vpd_srdt_tag(const u8 *srdt)
-{
-	return ((*srdt) & PCI_VPD_SRDT_TIN_MASK) >> 3;
-}
+int pci_vpd_find_id_string(const u8 *buf, unsigned int len, unsigned int *size);
 
 /**
- * pci_vpd_info_field_size - Extracts the information field length
- * @info_field: Pointer to the beginning of an information field header
- *
- * Returns the extracted information field length.
- */
-static inline u8 pci_vpd_info_field_size(const u8 *info_field)
-{
-	return info_field[2];
-}
-
-/**
- * pci_vpd_find_tag - Locates the Resource Data Type tag provided
- * @buf: Pointer to buffered vpd data
- * @len: The length of the vpd buffer
- * @rdt: The Resource Data Type to search for
+ * pci_vpd_find_ro_info_keyword - Locate info field keyword in VPD RO section
+ * @buf: Pointer to buffered VPD data
+ * @len: The length of the buffer area in which to search
+ * @kw: The keyword to search for
+ * @size: Pointer to field where length of found keyword data is returned
  *
- * Returns the index where the Resource Data Type was found or
- * -ENOENT otherwise.
+ * Returns the index of the information field keyword data or -ENOENT if
+ * not found.
  */
-int pci_vpd_find_tag(const u8 *buf, unsigned int len, u8 rdt);
+int pci_vpd_find_ro_info_keyword(const void *buf, unsigned int len,
+				 const char *kw, unsigned int *size);
 
 /**
- * pci_vpd_find_info_keyword - Locates an information field keyword in the VPD
- * @buf: Pointer to buffered vpd data
- * @off: The offset into the buffer at which to begin the search
- * @len: The length of the buffer area, relative to off, in which to search
- * @kw: The keyword to search for
+ * pci_vpd_check_csum - Check VPD checksum
+ * @buf: Pointer to buffered VPD data
+ * @len: VPD size
  *
- * Returns the index where the information field keyword was found or
- * -ENOENT otherwise.
+ * Returns 1 if VPD has no checksum, otherwise 0 or an errno
  */
-int pci_vpd_find_info_keyword(const u8 *buf, unsigned int off,
-			      unsigned int len, const char *kw);
+int pci_vpd_check_csum(const void *buf, unsigned int len);
 
 /* PCI <-> OF binding helpers */
 #ifdef CONFIG_OF
diff --git a/include/linux/pci_hotplug.h b/include/linux/pci_hotplug.h
index 2dac431d94ac..3a10d6ec3ee7 100644
--- a/include/linux/pci_hotplug.h
+++ b/include/linux/pci_hotplug.h
@@ -44,7 +44,7 @@ struct hotplug_slot_ops {
 	int (*get_attention_status)	(struct hotplug_slot *slot, u8 *value);
 	int (*get_latch_status)		(struct hotplug_slot *slot, u8 *value);
 	int (*get_adapter_status)	(struct hotplug_slot *slot, u8 *value);
-	int (*reset_slot)		(struct hotplug_slot *slot, int probe);
+	int (*reset_slot)		(struct hotplug_slot *slot, bool probe);
 };
 
 /**
diff --git a/include/linux/pci_ids.h b/include/linux/pci_ids.h
index 06eccef155ad..011f2f1ea5bb 100644
--- a/include/linux/pci_ids.h
+++ b/include/linux/pci_ids.h
@@ -2453,7 +2453,8 @@
 #define PCI_VENDOR_ID_TDI               0x192E
 #define PCI_DEVICE_ID_TDI_EHCI          0x0101
 
-#define PCI_VENDOR_ID_FREESCALE		0x1957
+#define PCI_VENDOR_ID_FREESCALE		0x1957	/* duplicate: NXP */
+#define PCI_VENDOR_ID_NXP		0x1957	/* duplicate: FREESCALE */
 #define PCI_DEVICE_ID_MPC8308		0xc006
 #define PCI_DEVICE_ID_MPC8315E		0x00b4
 #define PCI_DEVICE_ID_MPC8315		0x00b5
diff --git a/include/linux/phylink.h b/include/linux/phylink.h
index afb3ded0b691..237291196ce2 100644
--- a/include/linux/phylink.h
+++ b/include/linux/phylink.h
@@ -451,6 +451,9 @@ void phylink_mac_change(struct phylink *, bool up);
 void phylink_start(struct phylink *);
 void phylink_stop(struct phylink *);
 
+void phylink_suspend(struct phylink *pl, bool mac_wol);
+void phylink_resume(struct phylink *pl);
+
 void phylink_ethtool_get_wol(struct phylink *, struct ethtool_wolinfo *);
 int phylink_ethtool_set_wol(struct phylink *, struct ethtool_wolinfo *);
 
diff --git a/include/linux/platform_data/dma-dw.h b/include/linux/platform_data/dma-dw.h
index b34a094b2258..860ba4bc5ead 100644
--- a/include/linux/platform_data/dma-dw.h
+++ b/include/linux/platform_data/dma-dw.h
@@ -41,36 +41,39 @@ struct dw_dma_slave {
 
 /**
  * struct dw_dma_platform_data - Controller configuration parameters
+ * @nr_masters: Number of AHB masters supported by the controller
  * @nr_channels: Number of channels supported by hardware (max 8)
  * @chan_allocation_order: Allocate channels starting from 0 or 7
  * @chan_priority: Set channel priority increasing from 0 to 7 or 7 to 0.
  * @block_size: Maximum block size supported by the controller
- * @nr_masters: Number of AHB masters supported by the controller
  * @data_width: Maximum data width supported by hardware per AHB master
  *		(in bytes, power of 2)
  * @multi_block: Multi block transfers supported by hardware per channel.
  * @max_burst: Maximum value of burst transaction size supported by hardware
  *	       per channel (in units of CTL.SRC_TR_WIDTH/CTL.DST_TR_WIDTH).
  * @protctl: Protection control signals setting per channel.
+ * @quirks: Optional platform quirks.
  */
 struct dw_dma_platform_data {
-	unsigned int	nr_channels;
+	u32		nr_masters;
+	u32		nr_channels;
 #define CHAN_ALLOCATION_ASCENDING	0	/* zero to seven */
 #define CHAN_ALLOCATION_DESCENDING	1	/* seven to zero */
-	unsigned char	chan_allocation_order;
+	u32		chan_allocation_order;
 #define CHAN_PRIORITY_ASCENDING		0	/* chan0 highest */
 #define CHAN_PRIORITY_DESCENDING	1	/* chan7 highest */
-	unsigned char	chan_priority;
-	unsigned int	block_size;
-	unsigned char	nr_masters;
-	unsigned char	data_width[DW_DMA_MAX_NR_MASTERS];
-	unsigned char	multi_block[DW_DMA_MAX_NR_CHANNELS];
+	u32		chan_priority;
+	u32		block_size;
+	u32		data_width[DW_DMA_MAX_NR_MASTERS];
+	u32		multi_block[DW_DMA_MAX_NR_CHANNELS];
 	u32		max_burst[DW_DMA_MAX_NR_CHANNELS];
 #define CHAN_PROTCTL_PRIVILEGED		BIT(0)
 #define CHAN_PROTCTL_BUFFERABLE		BIT(1)
 #define CHAN_PROTCTL_CACHEABLE		BIT(2)
 #define CHAN_PROTCTL_MASK		GENMASK(2, 0)
-	unsigned char	protctl;
+	u32		protctl;
+#define DW_DMA_QUIRK_XBAR_PRESENT	BIT(0)
+	u32		quirks;
 };
 
 #endif /* _PLATFORM_DATA_DMA_DW_H */
diff --git a/include/linux/platform_data/gpio-dwapb.h b/include/linux/platform_data/gpio-dwapb.h
deleted file mode 100644
index 0aa5c6720259..000000000000
--- a/include/linux/platform_data/gpio-dwapb.h
+++ /dev/null
@@ -1,25 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0-only */
-/*
- * Copyright(c) 2014 Intel Corporation.
- */
-
-#ifndef GPIO_DW_APB_H
-#define GPIO_DW_APB_H
-
-#define DWAPB_MAX_GPIOS		32
-
-struct dwapb_port_property {
-	struct fwnode_handle *fwnode;
-	unsigned int	idx;
-	unsigned int	ngpio;
-	unsigned int	gpio_base;
-	int		irq[DWAPB_MAX_GPIOS];
-	bool		irq_shared;
-};
-
-struct dwapb_platform_data {
-	struct dwapb_port_property *properties;
-	unsigned int nports;
-};
-
-#endif
diff --git a/include/linux/platform_data/x86/asus-wmi.h b/include/linux/platform_data/x86/asus-wmi.h
index 2f274cf52805..17dc5cb6f3f2 100644
--- a/include/linux/platform_data/x86/asus-wmi.h
+++ b/include/linux/platform_data/x86/asus-wmi.h
@@ -61,6 +61,7 @@
 #define ASUS_WMI_DEVID_THROTTLE_THERMAL_POLICY 0x00120075
 
 /* Misc */
+#define ASUS_WMI_DEVID_PANEL_OD		0x00050019
 #define ASUS_WMI_DEVID_CAMERA		0x00060013
 #define ASUS_WMI_DEVID_LID_FLIP		0x00060062
 
@@ -89,6 +90,12 @@
 /* Keyboard dock */
 #define ASUS_WMI_DEVID_KBD_DOCK		0x00120063
 
+/* dgpu on/off */
+#define ASUS_WMI_DEVID_EGPU		0x00090019
+
+/* dgpu on/off */
+#define ASUS_WMI_DEVID_DGPU		0x00090020
+
 /* DSTS masks */
 #define ASUS_WMI_DSTS_STATUS_BIT	0x00000001
 #define ASUS_WMI_DSTS_UNKNOWN_BIT	0x00000002
diff --git a/include/linux/platform_data/x86/clk-lpss.h b/include/linux/platform_data/x86/clk-lpss.h
index 207e1a317800..41df326583f9 100644
--- a/include/linux/platform_data/x86/clk-lpss.h
+++ b/include/linux/platform_data/x86/clk-lpss.h
@@ -15,6 +15,6 @@ struct lpss_clk_data {
 	struct clk *clk;
 };
 
-extern int lpt_clk_init(void);
+extern int lpss_atom_clk_init(void);
 
 #endif /* __CLK_LPSS_H */
diff --git a/include/linux/pm_clock.h b/include/linux/pm_clock.h
index 8ddc7860e131..ada3a0ab10bf 100644
--- a/include/linux/pm_clock.h
+++ b/include/linux/pm_clock.h
@@ -47,6 +47,7 @@ extern void pm_clk_remove(struct device *dev, const char *con_id);
 extern void pm_clk_remove_clk(struct device *dev, struct clk *clk);
 extern int pm_clk_suspend(struct device *dev);
 extern int pm_clk_resume(struct device *dev);
+extern int devm_pm_clk_create(struct device *dev);
 #else
 static inline bool pm_clk_no_clocks(struct device *dev)
 {
@@ -83,6 +84,10 @@ static inline void pm_clk_remove(struct device *dev, const char *con_id)
 static inline void pm_clk_remove_clk(struct device *dev, struct clk *clk)
 {
 }
+static inline int devm_pm_clk_create(struct device *dev)
+{
+	return -EINVAL;
+}
 #endif
 
 #ifdef CONFIG_HAVE_CLK
diff --git a/include/linux/pm_runtime.h b/include/linux/pm_runtime.h
index aab8b35e9f8a..222da43b7096 100644
--- a/include/linux/pm_runtime.h
+++ b/include/linux/pm_runtime.h
@@ -59,6 +59,8 @@ extern void pm_runtime_put_suppliers(struct device *dev);
 extern void pm_runtime_new_link(struct device *dev);
 extern void pm_runtime_drop_link(struct device_link *link);
 
+extern int devm_pm_runtime_enable(struct device *dev);
+
 /**
  * pm_runtime_get_if_in_use - Conditionally bump up runtime PM usage counter.
  * @dev: Target device.
@@ -253,6 +255,8 @@ static inline void __pm_runtime_disable(struct device *dev, bool c) {}
 static inline void pm_runtime_allow(struct device *dev) {}
 static inline void pm_runtime_forbid(struct device *dev) {}
 
+static inline int devm_pm_runtime_enable(struct device *dev) { return 0; }
+
 static inline void pm_suspend_ignore_children(struct device *dev, bool enable) {}
 static inline void pm_runtime_get_noresume(struct device *dev) {}
 static inline void pm_runtime_put_noidle(struct device *dev) {}
diff --git a/include/linux/posix_acl.h b/include/linux/posix_acl.h
index 307094ebb88c..b65c877d92b8 100644
--- a/include/linux/posix_acl.h
+++ b/include/linux/posix_acl.h
@@ -72,6 +72,8 @@ extern struct posix_acl *get_posix_acl(struct inode *, int);
 extern int set_posix_acl(struct user_namespace *, struct inode *, int,
 			 struct posix_acl *);
 
+struct posix_acl *get_cached_acl_rcu(struct inode *inode, int type);
+
 #ifdef CONFIG_FS_POSIX_ACL
 int posix_acl_chmod(struct user_namespace *, struct inode *, umode_t);
 extern int posix_acl_create(struct inode *, umode_t *, struct posix_acl **,
@@ -84,7 +86,6 @@ extern int simple_set_acl(struct user_namespace *, struct inode *,
 extern int simple_acl_create(struct inode *, struct inode *);
 
 struct posix_acl *get_cached_acl(struct inode *inode, int type);
-struct posix_acl *get_cached_acl_rcu(struct inode *inode, int type);
 void set_cached_acl(struct inode *inode, int type, struct posix_acl *acl);
 void forget_cached_acl(struct inode *inode, int type);
 void forget_all_cached_acls(struct inode *inode);
diff --git a/include/linux/printk.h b/include/linux/printk.h
index 259af4f97f50..85b656f82d75 100644
--- a/include/linux/printk.h
+++ b/include/linux/printk.h
@@ -2,7 +2,7 @@
 #ifndef __KERNEL_PRINTK__
 #define __KERNEL_PRINTK__
 
-#include <stdarg.h>
+#include <linux/stdarg.h>
 #include <linux/init.h>
 #include <linux/kern_levels.h>
 #include <linux/linkage.h>
diff --git a/include/linux/pwm.h b/include/linux/pwm.h
index a0b7e43049d5..725c9b784e60 100644
--- a/include/linux/pwm.h
+++ b/include/linux/pwm.h
@@ -404,7 +404,7 @@ int pwm_set_chip_data(struct pwm_device *pwm, void *data);
 void *pwm_get_chip_data(struct pwm_device *pwm);
 
 int pwmchip_add(struct pwm_chip *chip);
-int pwmchip_remove(struct pwm_chip *chip);
+void pwmchip_remove(struct pwm_chip *chip);
 
 int devm_pwmchip_add(struct device *dev, struct pwm_chip *chip);
 
diff --git a/include/linux/qcom_scm.h b/include/linux/qcom_scm.h
index 0165824c5128..c0475d1c9885 100644
--- a/include/linux/qcom_scm.h
+++ b/include/linux/qcom_scm.h
@@ -109,6 +109,12 @@ extern int qcom_scm_hdcp_req(struct qcom_scm_hdcp_req *req, u32 req_cnt,
 			     u32 *resp);
 
 extern int qcom_scm_qsmmu500_wait_safe_toggle(bool en);
+
+extern int qcom_scm_lmh_dcvsh(u32 payload_fn, u32 payload_reg, u32 payload_val,
+			      u64 limit_node, u32 node_id, u64 version);
+extern int qcom_scm_lmh_profile_change(u32 profile_id);
+extern bool qcom_scm_lmh_dcvsh_available(void);
+
 #else
 
 #include <linux/errno.h>
@@ -170,5 +176,13 @@ static inline int qcom_scm_hdcp_req(struct qcom_scm_hdcp_req *req, u32 req_cnt,
 
 static inline int qcom_scm_qsmmu500_wait_safe_toggle(bool en)
 		{ return -ENODEV; }
+
+static inline int qcom_scm_lmh_dcvsh(u32 payload_fn, u32 payload_reg, u32 payload_val,
+				     u64 limit_node, u32 node_id, u64 version)
+		{ return -ENODEV; }
+
+static inline int qcom_scm_lmh_profile_change(u32 profile_id) { return -ENODEV; }
+
+static inline bool qcom_scm_lmh_dcvsh_available(void) { return -ENODEV; }
 #endif
 #endif
diff --git a/include/linux/remoteproc.h b/include/linux/remoteproc.h
index a5b37bc10865..83c09ac36b13 100644
--- a/include/linux/remoteproc.h
+++ b/include/linux/remoteproc.h
@@ -369,9 +369,8 @@ enum rsc_handling_status {
  * @da_to_va:	optional platform hook to perform address translations
  * @parse_fw:	parse firmware to extract information (e.g. resource table)
  * @handle_rsc:	optional platform hook to handle vendor resources. Should return
- * RSC_HANDLED if resource was handled, RSC_IGNORED if not handled and a
- * negative value on error
- * @load_rsc_table:	load resource table from firmware image
+ *		RSC_HANDLED if resource was handled, RSC_IGNORED if not handled
+ *		and a negative value on error
  * @find_loaded_rsc_table: find the loaded resource table from firmware image
  * @get_loaded_rsc_table: get resource table installed in memory
  *			  by external entity
diff --git a/include/linux/rwsem.h b/include/linux/rwsem.h
index 426e98e0b675..352c6127cb90 100644
--- a/include/linux/rwsem.h
+++ b/include/linux/rwsem.h
@@ -142,22 +142,14 @@ struct rw_semaphore {
 #define DECLARE_RWSEM(lockname) \
 	struct rw_semaphore lockname = __RWSEM_INITIALIZER(lockname)
 
-#ifdef CONFIG_DEBUG_LOCK_ALLOC
-extern void  __rwsem_init(struct rw_semaphore *rwsem, const char *name,
+extern void  __init_rwsem(struct rw_semaphore *rwsem, const char *name,
 			  struct lock_class_key *key);
-#else
-static inline void  __rwsem_init(struct rw_semaphore *rwsem, const char *name,
-				 struct lock_class_key *key)
-{
-}
-#endif
 
 #define init_rwsem(sem)						\
 do {								\
 	static struct lock_class_key __key;			\
 								\
-	init_rwbase_rt(&(sem)->rwbase);			\
-	__rwsem_init((sem), #sem, &__key);			\
+	__init_rwsem((sem), #sem, &__key);			\
 } while (0)
 
 static __always_inline int rwsem_is_locked(struct rw_semaphore *sem)
diff --git a/include/linux/scatterlist.h b/include/linux/scatterlist.h
index ecf87484814f..266754a55327 100644
--- a/include/linux/scatterlist.h
+++ b/include/linux/scatterlist.h
@@ -39,6 +39,12 @@ struct sg_table {
 	unsigned int orig_nents;	/* original size of list */
 };
 
+struct sg_append_table {
+	struct sg_table sgt;		/* The scatter list table */
+	struct scatterlist *prv;	/* last populated sge in the table */
+	unsigned int total_nents;	/* Total entries in the table */
+};
+
 /*
  * Notes on SG table design.
  *
@@ -280,19 +286,51 @@ typedef struct scatterlist *(sg_alloc_fn)(unsigned int, gfp_t);
 typedef void (sg_free_fn)(struct scatterlist *, unsigned int);
 
 void __sg_free_table(struct sg_table *, unsigned int, unsigned int,
-		     sg_free_fn *);
+		     sg_free_fn *, unsigned int);
 void sg_free_table(struct sg_table *);
+void sg_free_append_table(struct sg_append_table *sgt);
 int __sg_alloc_table(struct sg_table *, unsigned int, unsigned int,
 		     struct scatterlist *, unsigned int, gfp_t, sg_alloc_fn *);
 int sg_alloc_table(struct sg_table *, unsigned int, gfp_t);
-struct scatterlist *__sg_alloc_table_from_pages(struct sg_table *sgt,
-		struct page **pages, unsigned int n_pages, unsigned int offset,
-		unsigned long size, unsigned int max_segment,
-		struct scatterlist *prv, unsigned int left_pages,
-		gfp_t gfp_mask);
-int sg_alloc_table_from_pages(struct sg_table *sgt, struct page **pages,
-			      unsigned int n_pages, unsigned int offset,
-			      unsigned long size, gfp_t gfp_mask);
+int sg_alloc_append_table_from_pages(struct sg_append_table *sgt,
+				     struct page **pages, unsigned int n_pages,
+				     unsigned int offset, unsigned long size,
+				     unsigned int max_segment,
+				     unsigned int left_pages, gfp_t gfp_mask);
+int sg_alloc_table_from_pages_segment(struct sg_table *sgt, struct page **pages,
+				      unsigned int n_pages, unsigned int offset,
+				      unsigned long size,
+				      unsigned int max_segment, gfp_t gfp_mask);
+
+/**
+ * sg_alloc_table_from_pages - Allocate and initialize an sg table from
+ *			       an array of pages
+ * @sgt:	 The sg table header to use
+ * @pages:	 Pointer to an array of page pointers
+ * @n_pages:	 Number of pages in the pages array
+ * @offset:      Offset from start of the first page to the start of a buffer
+ * @size:        Number of valid bytes in the buffer (after offset)
+ * @gfp_mask:	 GFP allocation mask
+ *
+ *  Description:
+ *    Allocate and initialize an sg table from a list of pages. Contiguous
+ *    ranges of the pages are squashed into a single scatterlist node. A user
+ *    may provide an offset at a start and a size of valid data in a buffer
+ *    specified by the page array. The returned sg table is released by
+ *    sg_free_table.
+ *
+ * Returns:
+ *   0 on success, negative error on failure
+ */
+static inline int sg_alloc_table_from_pages(struct sg_table *sgt,
+					    struct page **pages,
+					    unsigned int n_pages,
+					    unsigned int offset,
+					    unsigned long size, gfp_t gfp_mask)
+{
+	return sg_alloc_table_from_pages_segment(sgt, pages, n_pages, offset,
+						 size, UINT_MAX, gfp_mask);
+}
 
 #ifdef CONFIG_SGL_ALLOC
 struct scatterlist *sgl_alloc_order(unsigned long long length,
diff --git a/include/linux/sched/mm.h b/include/linux/sched/mm.h
index e24b1fe348e3..5561486fddef 100644
--- a/include/linux/sched/mm.h
+++ b/include/linux/sched/mm.h
@@ -174,13 +174,13 @@ static inline gfp_t current_gfp_context(gfp_t flags)
 }
 
 #ifdef CONFIG_LOCKDEP
-extern void __fs_reclaim_acquire(void);
-extern void __fs_reclaim_release(void);
+extern void __fs_reclaim_acquire(unsigned long ip);
+extern void __fs_reclaim_release(unsigned long ip);
 extern void fs_reclaim_acquire(gfp_t gfp_mask);
 extern void fs_reclaim_release(gfp_t gfp_mask);
 #else
-static inline void __fs_reclaim_acquire(void) { }
-static inline void __fs_reclaim_release(void) { }
+static inline void __fs_reclaim_acquire(unsigned long ip) { }
+static inline void __fs_reclaim_release(unsigned long ip) { }
 static inline void fs_reclaim_acquire(gfp_t gfp_mask) { }
 static inline void fs_reclaim_release(gfp_t gfp_mask) { }
 #endif
@@ -306,7 +306,7 @@ set_active_memcg(struct mem_cgroup *memcg)
 {
 	struct mem_cgroup *old;
 
-	if (in_interrupt()) {
+	if (!in_task()) {
 		old = this_cpu_read(int_active_memcg);
 		this_cpu_write(int_active_memcg, memcg);
 	} else {
diff --git a/include/linux/sched/user.h b/include/linux/sched/user.h
index 2462f7d07695..00ed419dd464 100644
--- a/include/linux/sched/user.h
+++ b/include/linux/sched/user.h
@@ -4,6 +4,7 @@
 
 #include <linux/uidgid.h>
 #include <linux/atomic.h>
+#include <linux/percpu_counter.h>
 #include <linux/refcount.h>
 #include <linux/ratelimit.h>
 
@@ -13,7 +14,7 @@
 struct user_struct {
 	refcount_t __count;	/* reference count */
 #ifdef CONFIG_EPOLL
-	atomic_long_t epoll_watches; /* The number of file descriptors currently watched */
+	struct percpu_counter epoll_watches; /* The number of file descriptors currently watched */
 #endif
 	unsigned long unix_inflight;	/* How many files in flight in unix sockets */
 	atomic_long_t pipe_bufs;  /* how many pages are allocated in pipe buffers */
diff --git a/include/linux/shmem_fs.h b/include/linux/shmem_fs.h
index 8e775ce517bb..166158b6e917 100644
--- a/include/linux/shmem_fs.h
+++ b/include/linux/shmem_fs.h
@@ -18,6 +18,7 @@ struct shmem_inode_info {
 	unsigned long		flags;
 	unsigned long		alloced;	/* data pages alloced to file */
 	unsigned long		swapped;	/* subtotal assigned to swap */
+	pgoff_t			fallocend;	/* highest fallocate endindex */
 	struct list_head        shrinklist;     /* shrinkable hpage inodes */
 	struct list_head	swaplist;	/* chain of maybes on swap */
 	struct shared_policy	policy;		/* NUMA memory alloc policy */
@@ -31,7 +32,7 @@ struct shmem_sb_info {
 	struct percpu_counter used_blocks;  /* How many are allocated */
 	unsigned long max_inodes;   /* How many inodes are allowed */
 	unsigned long free_inodes;  /* How many are left for allocation */
-	spinlock_t stat_lock;	    /* Serialize shmem_sb_info changes */
+	raw_spinlock_t stat_lock;   /* Serialize shmem_sb_info changes */
 	umode_t mode;		    /* Mount mode for root directory */
 	unsigned char huge;	    /* Whether to try for hugepages */
 	kuid_t uid;		    /* Mount uid for root directory */
@@ -85,7 +86,12 @@ extern void shmem_truncate_range(struct inode *inode, loff_t start, loff_t end);
 extern int shmem_unuse(unsigned int type, bool frontswap,
 		       unsigned long *fs_pages_to_unuse);
 
-extern bool shmem_huge_enabled(struct vm_area_struct *vma);
+extern bool shmem_is_huge(struct vm_area_struct *vma,
+			  struct inode *inode, pgoff_t index);
+static inline bool shmem_huge_enabled(struct vm_area_struct *vma)
+{
+	return shmem_is_huge(vma, file_inode(vma->vm_file), vma->vm_pgoff);
+}
 extern unsigned long shmem_swap_usage(struct vm_area_struct *vma);
 extern unsigned long shmem_partial_swap_usage(struct address_space *mapping,
 						pgoff_t start, pgoff_t end);
@@ -93,9 +99,8 @@ extern unsigned long shmem_partial_swap_usage(struct address_space *mapping,
 /* Flag allocation requirements to shmem_getpage */
 enum sgp_type {
 	SGP_READ,	/* don't exceed i_size, don't allocate page */
+	SGP_NOALLOC,	/* similar, but fail on hole or use fallocated page */
 	SGP_CACHE,	/* don't exceed i_size, may allocate page */
-	SGP_NOHUGE,	/* like SGP_CACHE, but no huge pages */
-	SGP_HUGE,	/* like SGP_CACHE, huge pages preferred */
 	SGP_WRITE,	/* may exceed i_size, may allocate !Uptodate page */
 	SGP_FALLOC,	/* like SGP_WRITE, but make existing page Uptodate */
 };
@@ -119,6 +124,18 @@ static inline bool shmem_file(struct file *file)
 	return shmem_mapping(file->f_mapping);
 }
 
+/*
+ * If fallocate(FALLOC_FL_KEEP_SIZE) has been used, there may be pages
+ * beyond i_size's notion of EOF, which fallocate has committed to reserving:
+ * which split_huge_page() must therefore not delete.  This use of a single
+ * "fallocend" per inode errs on the side of not deleting a reservation when
+ * in doubt: there are plenty of cases when it preserves unreserved pages.
+ */
+static inline pgoff_t shmem_fallocend(struct inode *inode, pgoff_t eof)
+{
+	return max(eof, SHMEM_I(inode)->fallocend);
+}
+
 extern bool shmem_charge(struct inode *inode, long pages);
 extern void shmem_uncharge(struct inode *inode, long pages);
 
diff --git a/include/linux/slub_def.h b/include/linux/slub_def.h
index dcde82a4434c..85499f0586b0 100644
--- a/include/linux/slub_def.h
+++ b/include/linux/slub_def.h
@@ -10,6 +10,7 @@
 #include <linux/kfence.h>
 #include <linux/kobject.h>
 #include <linux/reciprocal_div.h>
+#include <linux/local_lock.h>
 
 enum stat_item {
 	ALLOC_FASTPATH,		/* Allocation from cpu slab */
@@ -40,6 +41,10 @@ enum stat_item {
 	CPU_PARTIAL_DRAIN,	/* Drain cpu partial to node partial */
 	NR_SLUB_STAT_ITEMS };
 
+/*
+ * When changing the layout, make sure freelist and tid are still compatible
+ * with this_cpu_cmpxchg_double() alignment requirements.
+ */
 struct kmem_cache_cpu {
 	void **freelist;	/* Pointer to next available object */
 	unsigned long tid;	/* Globally unique transaction id */
@@ -47,6 +52,7 @@ struct kmem_cache_cpu {
 #ifdef CONFIG_SLUB_CPU_PARTIAL
 	struct page *partial;	/* Partially allocated frozen slabs */
 #endif
+	local_lock_t lock;	/* Protects the fields above */
 #ifdef CONFIG_SLUB_STATS
 	unsigned stat[NR_SLUB_STAT_ITEMS];
 #endif
diff --git a/include/linux/soc/marvell/octeontx2/asm.h b/include/linux/soc/marvell/octeontx2/asm.h
index 28c04d918f0f..fa1d6af0164e 100644
--- a/include/linux/soc/marvell/octeontx2/asm.h
+++ b/include/linux/soc/marvell/octeontx2/asm.h
@@ -22,12 +22,17 @@
 			 : [rs]"r" (ioaddr));           \
 	(result);                                       \
 })
+/*
+ * STEORL store to memory with release semantics.
+ * This will avoid using DMB barrier after each LMTST
+ * operation.
+ */
 #define cn10k_lmt_flush(val, addr)			\
 ({							\
 	__asm__ volatile(".cpu  generic+lse\n"		\
-			 "steor %x[rf],[%[rs]]"		\
-			 : [rf]"+r"(val)		\
-			 : [rs]"r"(addr));		\
+			 "steorl %x[rf],[%[rs]]"		\
+			 : [rf] "+r"(val)		\
+			 : [rs] "r"(addr));		\
 })
 #else
 #define otx2_lmt_flush(ioaddr)          ({ 0; })
diff --git a/include/linux/soc/qcom/smd-rpm.h b/include/linux/soc/qcom/smd-rpm.h
index f2645ec52520..60e66fc9b6bf 100644
--- a/include/linux/soc/qcom/smd-rpm.h
+++ b/include/linux/soc/qcom/smd-rpm.h
@@ -29,6 +29,7 @@ struct qcom_smd_rpm;
 #define QCOM_SMD_RPM_NCPB	0x6270636E
 #define QCOM_SMD_RPM_OCMEM_PWR	0x706d636f
 #define QCOM_SMD_RPM_QPIC_CLK	0x63697071
+#define QCOM_SMD_RPM_QUP_CLK	0x707571
 #define QCOM_SMD_RPM_SMPA	0x61706d73
 #define QCOM_SMD_RPM_SMPB	0x62706d73
 #define QCOM_SMD_RPM_SPDM	0x63707362
diff --git a/include/linux/spi/max7301.h b/include/linux/spi/max7301.h
index 433c20e2f46e..21449067aedb 100644
--- a/include/linux/spi/max7301.h
+++ b/include/linux/spi/max7301.h
@@ -2,7 +2,7 @@
 #ifndef LINUX_SPI_MAX7301_H
 #define LINUX_SPI_MAX7301_H
 
-#include <linux/gpio.h>
+#include <linux/gpio/driver.h>
 
 /*
  * Some registers must be read back to modify.
diff --git a/include/linux/stat.h b/include/linux/stat.h
index fff27e603814..7df06931f25d 100644
--- a/include/linux/stat.h
+++ b/include/linux/stat.h
@@ -34,6 +34,10 @@ struct kstat {
 	 STATX_ATTR_ENCRYPTED |				\
 	 STATX_ATTR_VERITY				\
 	 )/* Attrs corresponding to FS_*_FL flags */
+#define KSTAT_ATTR_VFS_FLAGS				\
+	(STATX_ATTR_IMMUTABLE |				\
+	 STATX_ATTR_APPEND				\
+	 ) /* Attrs corresponding to S_* flags that are enforced by the VFS */
 	u64		ino;
 	dev_t		dev;
 	dev_t		rdev;
diff --git a/include/linux/stdarg.h b/include/linux/stdarg.h
new file mode 100644
index 000000000000..c8dc7f4f390c
--- /dev/null
+++ b/include/linux/stdarg.h
@@ -0,0 +1,11 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
+#ifndef _LINUX_STDARG_H
+#define _LINUX_STDARG_H
+
+typedef __builtin_va_list va_list;
+#define va_start(v, l)	__builtin_va_start(v, l)
+#define va_end(v)	__builtin_va_end(v)
+#define va_arg(v, T)	__builtin_va_arg(v, T)
+#define va_copy(d, s)	__builtin_va_copy(d, s)
+
+#endif
diff --git a/include/linux/string.h b/include/linux/string.h
index b48d2d28e0b1..5e96d656be7a 100644
--- a/include/linux/string.h
+++ b/include/linux/string.h
@@ -6,7 +6,7 @@
 #include <linux/types.h>	/* for size_t */
 #include <linux/stddef.h>	/* for NULL */
 #include <linux/errno.h>	/* for E2BIG */
-#include <stdarg.h>
+#include <linux/stdarg.h>
 #include <uapi/linux/string.h>
 
 extern char *strndup_user(const char __user *, long);
diff --git a/include/linux/sunrpc/clnt.h b/include/linux/sunrpc/clnt.h
index 8b5d5c97553e..a4661646adc9 100644
--- a/include/linux/sunrpc/clnt.h
+++ b/include/linux/sunrpc/clnt.h
@@ -14,6 +14,7 @@
 #include <linux/socket.h>
 #include <linux/in.h>
 #include <linux/in6.h>
+#include <linux/refcount.h>
 
 #include <linux/sunrpc/msg_prot.h>
 #include <linux/sunrpc/sched.h>
@@ -35,7 +36,7 @@ struct rpc_sysfs_client;
  * The high-level client handle
  */
 struct rpc_clnt {
-	atomic_t		cl_count;	/* Number of references */
+	refcount_t		cl_count;	/* Number of references */
 	unsigned int		cl_clid;	/* client id */
 	struct list_head	cl_clients;	/* Global list of clients */
 	struct list_head	cl_tasks;	/* List of tasks */
@@ -81,6 +82,7 @@ struct rpc_clnt {
 		struct work_struct	cl_work;
 	};
 	const struct cred	*cl_cred;
+	unsigned int		cl_max_connect; /* max number of transports not to the same IP */
 };
 
 /*
@@ -135,6 +137,7 @@ struct rpc_create_args {
 	char			*client_name;
 	struct svc_xprt		*bc_xprt;	/* NFSv4.1 backchannel */
 	const struct cred	*cred;
+	unsigned int		max_connect;
 };
 
 struct rpc_add_xprt_test {
diff --git a/include/linux/sunrpc/svc.h b/include/linux/sunrpc/svc.h
index f0f846fa396e..064c96157d1f 100644
--- a/include/linux/sunrpc/svc.h
+++ b/include/linux/sunrpc/svc.h
@@ -277,13 +277,13 @@ struct svc_rqst {
 #define	RQ_VICTIM	(5)			/* about to be shut down */
 #define	RQ_BUSY		(6)			/* request is busy */
 #define	RQ_DATA		(7)			/* request has data */
-#define RQ_AUTHERR	(8)			/* Request status is auth error */
 	unsigned long		rq_flags;	/* flags field */
 	ktime_t			rq_qtime;	/* enqueue time */
 
 	void *			rq_argp;	/* decoded arguments */
 	void *			rq_resp;	/* xdr'd results */
 	void *			rq_auth_data;	/* flavor-specific data */
+	__be32			rq_auth_stat;	/* authentication status */
 	int			rq_auth_slack;	/* extra space xdr code
 						 * should leave in head
 						 * for krb5i, krb5p.
@@ -537,7 +537,6 @@ unsigned int	   svc_fill_write_vector(struct svc_rqst *rqstp,
 char		  *svc_fill_symlink_pathname(struct svc_rqst *rqstp,
 					     struct kvec *first, void *p,
 					     size_t total);
-__be32		   svc_return_autherr(struct svc_rqst *rqstp, __be32 auth_err);
 __be32		   svc_generic_init_request(struct svc_rqst *rqstp,
 					    const struct svc_program *progp,
 					    struct svc_process_info *procinfo);
diff --git a/include/linux/sunrpc/svcauth.h b/include/linux/sunrpc/svcauth.h
index b0003866a249..6d9cc9080aca 100644
--- a/include/linux/sunrpc/svcauth.h
+++ b/include/linux/sunrpc/svcauth.h
@@ -127,7 +127,7 @@ struct auth_ops {
 	char *	name;
 	struct module *owner;
 	int	flavour;
-	int	(*accept)(struct svc_rqst *rq, __be32 *authp);
+	int	(*accept)(struct svc_rqst *rq);
 	int	(*release)(struct svc_rqst *rq);
 	void	(*domain_release)(struct auth_domain *);
 	int	(*set_client)(struct svc_rqst *rq);
@@ -149,7 +149,7 @@ struct auth_ops {
 
 struct svc_xprt;
 
-extern int	svc_authenticate(struct svc_rqst *rqstp, __be32 *authp);
+extern int	svc_authenticate(struct svc_rqst *rqstp);
 extern int	svc_authorise(struct svc_rqst *rqstp);
 extern int	svc_set_client(struct svc_rqst *rqstp);
 extern int	svc_auth_register(rpc_authflavor_t flavor, struct auth_ops *aops);
diff --git a/include/linux/sunrpc/xprt.h b/include/linux/sunrpc/xprt.h
index b15c1f07162d..955ea4d7af0b 100644
--- a/include/linux/sunrpc/xprt.h
+++ b/include/linux/sunrpc/xprt.h
@@ -431,6 +431,7 @@ void			xprt_release_write(struct rpc_xprt *, struct rpc_task *);
 #define XPRT_CONGESTED		(9)
 #define XPRT_CWND_WAIT		(10)
 #define XPRT_WRITE_SPACE	(11)
+#define XPRT_SND_IS_COOKIE	(12)
 
 static inline void xprt_set_connected(struct rpc_xprt *xprt)
 {
diff --git a/include/linux/sunrpc/xprtmultipath.h b/include/linux/sunrpc/xprtmultipath.h
index b19addc8b715..bbb8a5fa0816 100644
--- a/include/linux/sunrpc/xprtmultipath.h
+++ b/include/linux/sunrpc/xprtmultipath.h
@@ -18,6 +18,7 @@ struct rpc_xprt_switch {
 	unsigned int		xps_id;
 	unsigned int		xps_nxprts;
 	unsigned int		xps_nactive;
+	unsigned int		xps_nunique_destaddr_xprts;
 	atomic_long_t		xps_queuelen;
 	struct list_head	xps_xprt_list;
 
diff --git a/include/linux/swap.h b/include/linux/swap.h
index 6f5a43251593..ba52f3a3478e 100644
--- a/include/linux/swap.h
+++ b/include/linux/swap.h
@@ -408,7 +408,7 @@ static inline bool node_reclaim_enabled(void)
 
 extern void check_move_unevictable_pages(struct pagevec *pvec);
 
-extern int kswapd_run(int nid);
+extern void kswapd_run(int nid);
 extern void kswapd_stop(int nid);
 
 #ifdef CONFIG_SWAP
@@ -721,7 +721,13 @@ static inline int mem_cgroup_swappiness(struct mem_cgroup *mem)
 #endif
 
 #if defined(CONFIG_SWAP) && defined(CONFIG_MEMCG) && defined(CONFIG_BLK_CGROUP)
-extern void cgroup_throttle_swaprate(struct page *page, gfp_t gfp_mask);
+extern void __cgroup_throttle_swaprate(struct page *page, gfp_t gfp_mask);
+static inline  void cgroup_throttle_swaprate(struct page *page, gfp_t gfp_mask)
+{
+	if (mem_cgroup_disabled())
+		return;
+	__cgroup_throttle_swaprate(page, gfp_mask);
+}
 #else
 static inline void cgroup_throttle_swaprate(struct page *page, gfp_t gfp_mask)
 {
@@ -730,8 +736,22 @@ static inline void cgroup_throttle_swaprate(struct page *page, gfp_t gfp_mask)
 
 #ifdef CONFIG_MEMCG_SWAP
 extern void mem_cgroup_swapout(struct page *page, swp_entry_t entry);
-extern int mem_cgroup_try_charge_swap(struct page *page, swp_entry_t entry);
-extern void mem_cgroup_uncharge_swap(swp_entry_t entry, unsigned int nr_pages);
+extern int __mem_cgroup_try_charge_swap(struct page *page, swp_entry_t entry);
+static inline int mem_cgroup_try_charge_swap(struct page *page, swp_entry_t entry)
+{
+	if (mem_cgroup_disabled())
+		return 0;
+	return __mem_cgroup_try_charge_swap(page, entry);
+}
+
+extern void __mem_cgroup_uncharge_swap(swp_entry_t entry, unsigned int nr_pages);
+static inline void mem_cgroup_uncharge_swap(swp_entry_t entry, unsigned int nr_pages)
+{
+	if (mem_cgroup_disabled())
+		return;
+	__mem_cgroup_uncharge_swap(entry, nr_pages);
+}
+
 extern long mem_cgroup_get_nr_swap_pages(struct mem_cgroup *memcg);
 extern bool mem_cgroup_swap_full(struct page *page);
 #else
diff --git a/include/linux/swiotlb.h b/include/linux/swiotlb.h
index 216854a5e513..b0cb2a9973f4 100644
--- a/include/linux/swiotlb.h
+++ b/include/linux/swiotlb.h
@@ -2,6 +2,7 @@
 #ifndef __LINUX_SWIOTLB_H
 #define __LINUX_SWIOTLB_H
 
+#include <linux/device.h>
 #include <linux/dma-direction.h>
 #include <linux/init.h>
 #include <linux/types.h>
@@ -72,7 +73,8 @@ extern enum swiotlb_force swiotlb_force;
  *		range check to see if the memory was in fact allocated by this
  *		API.
  * @nslabs:	The number of IO TLB blocks (in groups of 64) between @start and
- *		@end. This is command line adjustable via setup_io_tlb_npages.
+ *		@end. For default swiotlb, this is command line adjustable via
+ *		setup_io_tlb_npages.
  * @used:	The number of used IO TLB block.
  * @list:	The free list describing the number of free entries available
  *		from each index.
@@ -83,6 +85,8 @@ extern enum swiotlb_force swiotlb_force;
  *		unmap calls.
  * @debugfs:	The dentry to debugfs.
  * @late_alloc:	%true if allocated using the page allocator
+ * @force_bounce: %true if swiotlb bouncing is forced
+ * @for_alloc:  %true if the pool is used for memory allocation
  */
 struct io_tlb_mem {
 	phys_addr_t start;
@@ -93,29 +97,42 @@ struct io_tlb_mem {
 	spinlock_t lock;
 	struct dentry *debugfs;
 	bool late_alloc;
+	bool force_bounce;
+	bool for_alloc;
 	struct io_tlb_slot {
 		phys_addr_t orig_addr;
 		size_t alloc_size;
 		unsigned int list;
-	} slots[];
+	} *slots;
 };
-extern struct io_tlb_mem *io_tlb_default_mem;
+extern struct io_tlb_mem io_tlb_default_mem;
 
-static inline bool is_swiotlb_buffer(phys_addr_t paddr)
+static inline bool is_swiotlb_buffer(struct device *dev, phys_addr_t paddr)
 {
-	struct io_tlb_mem *mem = io_tlb_default_mem;
+	struct io_tlb_mem *mem = dev->dma_io_tlb_mem;
 
 	return mem && paddr >= mem->start && paddr < mem->end;
 }
 
+static inline bool is_swiotlb_force_bounce(struct device *dev)
+{
+	struct io_tlb_mem *mem = dev->dma_io_tlb_mem;
+
+	return mem && mem->force_bounce;
+}
+
 void __init swiotlb_exit(void);
 unsigned int swiotlb_max_segment(void);
 size_t swiotlb_max_mapping_size(struct device *dev);
-bool is_swiotlb_active(void);
+bool is_swiotlb_active(struct device *dev);
 void __init swiotlb_adjust_size(unsigned long size);
 #else
 #define swiotlb_force SWIOTLB_NO_FORCE
-static inline bool is_swiotlb_buffer(phys_addr_t paddr)
+static inline bool is_swiotlb_buffer(struct device *dev, phys_addr_t paddr)
+{
+	return false;
+}
+static inline bool is_swiotlb_force_bounce(struct device *dev)
 {
 	return false;
 }
@@ -131,7 +148,7 @@ static inline size_t swiotlb_max_mapping_size(struct device *dev)
 	return SIZE_MAX;
 }
 
-static inline bool is_swiotlb_active(void)
+static inline bool is_swiotlb_active(struct device *dev)
 {
 	return false;
 }
@@ -144,4 +161,28 @@ static inline void swiotlb_adjust_size(unsigned long size)
 extern void swiotlb_print_info(void);
 extern void swiotlb_set_max_segment(unsigned int);
 
+#ifdef CONFIG_DMA_RESTRICTED_POOL
+struct page *swiotlb_alloc(struct device *dev, size_t size);
+bool swiotlb_free(struct device *dev, struct page *page, size_t size);
+
+static inline bool is_swiotlb_for_alloc(struct device *dev)
+{
+	return dev->dma_io_tlb_mem->for_alloc;
+}
+#else
+static inline struct page *swiotlb_alloc(struct device *dev, size_t size)
+{
+	return NULL;
+}
+static inline bool swiotlb_free(struct device *dev, struct page *page,
+				size_t size)
+{
+	return false;
+}
+static inline bool is_swiotlb_for_alloc(struct device *dev)
+{
+	return false;
+}
+#endif /* CONFIG_DMA_RESTRICTED_POOL */
+
 #endif /* __LINUX_SWIOTLB_H */
diff --git a/include/linux/syscalls.h b/include/linux/syscalls.h
index 2b47584eb843..252243c7783d 100644
--- a/include/linux/syscalls.h
+++ b/include/linux/syscalls.h
@@ -915,6 +915,7 @@ asmlinkage long sys_mincore(unsigned long start, size_t len,
 asmlinkage long sys_madvise(unsigned long start, size_t len, int behavior);
 asmlinkage long sys_process_madvise(int pidfd, const struct iovec __user *vec,
 			size_t vlen, int behavior, unsigned int flags);
+asmlinkage long sys_process_mrelease(int pidfd, unsigned int flags);
 asmlinkage long sys_remap_file_pages(unsigned long start, unsigned long size,
 			unsigned long prot, unsigned long pgoff,
 			unsigned long flags);
@@ -1372,6 +1373,9 @@ long ksys_old_shmctl(int shmid, int cmd, struct shmid_ds __user *buf);
 long compat_ksys_semtimedop(int semid, struct sembuf __user *tsems,
 			    unsigned int nsops,
 			    const struct old_timespec32 __user *timeout);
+long __do_semtimedop(int semid, struct sembuf *tsems, unsigned int nsops,
+		     const struct timespec64 *timeout,
+		     struct ipc_namespace *ns);
 
 int __sys_getsockopt(int fd, int level, int optname, char __user *optval,
 		int __user *optlen);
diff --git a/include/linux/thermal.h b/include/linux/thermal.h
index d296f3b88fb9..c314893970b3 100644
--- a/include/linux/thermal.h
+++ b/include/linux/thermal.h
@@ -285,7 +285,7 @@ struct thermal_zone_params {
 };
 
 /**
- * struct thermal_zone_of_device_ops - scallbacks for handling DT based zones
+ * struct thermal_zone_of_device_ops - callbacks for handling DT based zones
  *
  * Mandatory:
  * @get_temp: a pointer to a function that reads the sensor temperature.
@@ -404,12 +404,13 @@ static inline void thermal_zone_device_unregister(
 	struct thermal_zone_device *tz)
 { }
 static inline struct thermal_cooling_device *
-thermal_cooling_device_register(char *type, void *devdata,
+thermal_cooling_device_register(const char *type, void *devdata,
 	const struct thermal_cooling_device_ops *ops)
 { return ERR_PTR(-ENODEV); }
 static inline struct thermal_cooling_device *
 thermal_of_cooling_device_register(struct device_node *np,
-	char *type, void *devdata, const struct thermal_cooling_device_ops *ops)
+	const char *type, void *devdata,
+	const struct thermal_cooling_device_ops *ops)
 { return ERR_PTR(-ENODEV); }
 static inline struct thermal_cooling_device *
 devm_thermal_of_cooling_device_register(struct device *dev,
diff --git a/include/linux/threads.h b/include/linux/threads.h
index 18d5a74bcc3d..c34173e6c5f1 100644
--- a/include/linux/threads.h
+++ b/include/linux/threads.h
@@ -38,7 +38,7 @@
  * Define a minimum number of pids per cpu.  Heuristically based
  * on original pid max of 32k for 32 cpus.  Also, increase the
  * minimum settable value for pid_max on the running system based
- * on similar defaults.  See kernel/pid.c:pidmap_init() for details.
+ * on similar defaults.  See kernel/pid.c:pid_idr_init() for details.
  */
 #define PIDS_PER_CPU_DEFAULT	1024
 #define PIDS_PER_CPU_MIN	8
diff --git a/include/linux/time64.h b/include/linux/time64.h
index 5117cb5b5656..81b9686a2079 100644
--- a/include/linux/time64.h
+++ b/include/linux/time64.h
@@ -25,7 +25,9 @@ struct itimerspec64 {
 #define TIME64_MIN			(-TIME64_MAX - 1)
 
 #define KTIME_MAX			((s64)~((u64)1 << 63))
+#define KTIME_MIN			(-KTIME_MAX - 1)
 #define KTIME_SEC_MAX			(KTIME_MAX / NSEC_PER_SEC)
+#define KTIME_SEC_MIN			(KTIME_MIN / NSEC_PER_SEC)
 
 /*
  * Limits for settimeofday():
@@ -124,10 +126,13 @@ static inline bool timespec64_valid_settod(const struct timespec64 *ts)
  */
 static inline s64 timespec64_to_ns(const struct timespec64 *ts)
 {
-	/* Prevent multiplication overflow */
-	if ((unsigned long long)ts->tv_sec >= KTIME_SEC_MAX)
+	/* Prevent multiplication overflow / underflow */
+	if (ts->tv_sec >= KTIME_SEC_MAX)
 		return KTIME_MAX;
 
+	if (ts->tv_sec <= KTIME_SEC_MIN)
+		return KTIME_MIN;
+
 	return ((s64) ts->tv_sec * NSEC_PER_SEC) + ts->tv_nsec;
 }
 
diff --git a/include/linux/trace_events.h b/include/linux/trace_events.h
index 8e0631a4b046..3e475eeb5a99 100644
--- a/include/linux/trace_events.h
+++ b/include/linux/trace_events.h
@@ -310,8 +310,10 @@ enum {
 	TRACE_EVENT_FL_NO_SET_FILTER_BIT,
 	TRACE_EVENT_FL_IGNORE_ENABLE_BIT,
 	TRACE_EVENT_FL_TRACEPOINT_BIT,
+	TRACE_EVENT_FL_DYNAMIC_BIT,
 	TRACE_EVENT_FL_KPROBE_BIT,
 	TRACE_EVENT_FL_UPROBE_BIT,
+	TRACE_EVENT_FL_EPROBE_BIT,
 };
 
 /*
@@ -321,8 +323,10 @@ enum {
  *  NO_SET_FILTER - Set when filter has error and is to be ignored
  *  IGNORE_ENABLE - For trace internal events, do not enable with debugfs file
  *  TRACEPOINT    - Event is a tracepoint
+ *  DYNAMIC       - Event is a dynamic event (created at run time)
  *  KPROBE        - Event is a kprobe
  *  UPROBE        - Event is a uprobe
+ *  EPROBE        - Event is an event probe
  */
 enum {
 	TRACE_EVENT_FL_FILTERED		= (1 << TRACE_EVENT_FL_FILTERED_BIT),
@@ -330,8 +334,10 @@ enum {
 	TRACE_EVENT_FL_NO_SET_FILTER	= (1 << TRACE_EVENT_FL_NO_SET_FILTER_BIT),
 	TRACE_EVENT_FL_IGNORE_ENABLE	= (1 << TRACE_EVENT_FL_IGNORE_ENABLE_BIT),
 	TRACE_EVENT_FL_TRACEPOINT	= (1 << TRACE_EVENT_FL_TRACEPOINT_BIT),
+	TRACE_EVENT_FL_DYNAMIC		= (1 << TRACE_EVENT_FL_DYNAMIC_BIT),
 	TRACE_EVENT_FL_KPROBE		= (1 << TRACE_EVENT_FL_KPROBE_BIT),
 	TRACE_EVENT_FL_UPROBE		= (1 << TRACE_EVENT_FL_UPROBE_BIT),
+	TRACE_EVENT_FL_EPROBE		= (1 << TRACE_EVENT_FL_EPROBE_BIT),
 };
 
 #define TRACE_EVENT_FL_UKPROBE (TRACE_EVENT_FL_KPROBE | TRACE_EVENT_FL_UPROBE)
@@ -347,7 +353,14 @@ struct trace_event_call {
 	struct trace_event	event;
 	char			*print_fmt;
 	struct event_filter	*filter;
-	void			*mod;
+	/*
+	 * Static events can disappear with modules,
+	 * where as dynamic ones need their own ref count.
+	 */
+	union {
+		void				*module;
+		atomic_t			refcnt;
+	};
 	void			*data;
 
 	/* See the TRACE_EVENT_FL_* flags above */
@@ -363,6 +376,42 @@ struct trace_event_call {
 #endif
 };
 
+#ifdef CONFIG_DYNAMIC_EVENTS
+bool trace_event_dyn_try_get_ref(struct trace_event_call *call);
+void trace_event_dyn_put_ref(struct trace_event_call *call);
+bool trace_event_dyn_busy(struct trace_event_call *call);
+#else
+static inline bool trace_event_dyn_try_get_ref(struct trace_event_call *call)
+{
+	/* Without DYNAMIC_EVENTS configured, nothing should be calling this */
+	return false;
+}
+static inline void trace_event_dyn_put_ref(struct trace_event_call *call)
+{
+}
+static inline bool trace_event_dyn_busy(struct trace_event_call *call)
+{
+	/* Nothing should call this without DYNAIMIC_EVENTS configured. */
+	return true;
+}
+#endif
+
+static inline bool trace_event_try_get_ref(struct trace_event_call *call)
+{
+	if (call->flags & TRACE_EVENT_FL_DYNAMIC)
+		return trace_event_dyn_try_get_ref(call);
+	else
+		return try_module_get(call->module);
+}
+
+static inline void trace_event_put_ref(struct trace_event_call *call)
+{
+	if (call->flags & TRACE_EVENT_FL_DYNAMIC)
+		trace_event_dyn_put_ref(call);
+	else
+		module_put(call->module);
+}
+
 #ifdef CONFIG_PERF_EVENTS
 static inline bool bpf_prog_array_valid(struct trace_event_call *call)
 {
@@ -634,6 +683,7 @@ enum event_trigger_type {
 	ETT_EVENT_ENABLE	= (1 << 3),
 	ETT_EVENT_HIST		= (1 << 4),
 	ETT_HIST_ENABLE		= (1 << 5),
+	ETT_EVENT_EPROBE	= (1 << 6),
 };
 
 extern int filter_match_preds(struct event_filter *filter, void *rec);
diff --git a/include/linux/tracepoint.h b/include/linux/tracepoint.h
index ab58696d0ddd..28031b15f878 100644
--- a/include/linux/tracepoint.h
+++ b/include/linux/tracepoint.h
@@ -475,7 +475,7 @@ static inline struct tracepoint *tracepoint_ptr_deref(tracepoint_ptr_t *p)
  *	*
  *	* The declared 'local variable' is called '__entry'
  *	*
- *	* __field(pid_t, prev_prid) is equivalent to a standard declaration:
+ *	* __field(pid_t, prev_pid) is equivalent to a standard declaration:
  *	*
  *	*	pid_t	prev_pid;
  *	*
diff --git a/include/linux/uaccess.h b/include/linux/uaccess.h
index c05e903cef02..ac0394087f7d 100644
--- a/include/linux/uaccess.h
+++ b/include/linux/uaccess.h
@@ -200,16 +200,6 @@ copy_to_user(void __user *to, const void *from, unsigned long n)
 		n = _copy_to_user(to, from, n);
 	return n;
 }
-#ifdef CONFIG_COMPAT
-static __always_inline unsigned long __must_check
-copy_in_user(void __user *to, const void __user *from, unsigned long n)
-{
-	might_fault();
-	if (access_ok(to, n) && access_ok(from, n))
-		n = raw_copy_in_user(to, from, n);
-	return n;
-}
-#endif
 
 #ifndef copy_mc_to_kernel
 /*
diff --git a/include/linux/uio.h b/include/linux/uio.h
index 82c3c3e819e0..5265024e8b90 100644
--- a/include/linux/uio.h
+++ b/include/linux/uio.h
@@ -47,6 +47,7 @@ struct iov_iter {
 		};
 		loff_t xarray_start;
 	};
+	size_t truncated;
 };
 
 static inline enum iter_type iov_iter_type(const struct iov_iter *i)
@@ -254,8 +255,10 @@ static inline void iov_iter_truncate(struct iov_iter *i, u64 count)
 	 * conversion in assignement is by definition greater than all
 	 * values of size_t, including old i->count.
 	 */
-	if (i->count > count)
+	if (i->count > count) {
+		i->truncated += i->count - count;
 		i->count = count;
+	}
 }
 
 /*
@@ -264,6 +267,7 @@ static inline void iov_iter_truncate(struct iov_iter *i, u64 count)
  */
 static inline void iov_iter_reexpand(struct iov_iter *i, size_t count)
 {
+	i->truncated -= count - i->count;
 	i->count = count;
 }
 
diff --git a/include/linux/units.h b/include/linux/units.h
index 4a25e0cc8fb3..681fc652e3d7 100644
--- a/include/linux/units.h
+++ b/include/linux/units.h
@@ -20,9 +20,13 @@
 #define PICO	1000000000000ULL
 #define FEMTO	1000000000000000ULL
 
-#define MILLIWATT_PER_WATT	1000L
-#define MICROWATT_PER_MILLIWATT	1000L
-#define MICROWATT_PER_WATT	1000000L
+#define HZ_PER_KHZ		1000UL
+#define KHZ_PER_MHZ		1000UL
+#define HZ_PER_MHZ		1000000UL
+
+#define MILLIWATT_PER_WATT	1000UL
+#define MICROWATT_PER_MILLIWATT	1000UL
+#define MICROWATT_PER_WATT	1000000UL
 
 #define ABSOLUTE_ZERO_MILLICELSIUS -273150
 
diff --git a/include/linux/userfaultfd_k.h b/include/linux/userfaultfd_k.h
index 331d2ccf0bcc..33cea484d1ad 100644
--- a/include/linux/userfaultfd_k.h
+++ b/include/linux/userfaultfd_k.h
@@ -60,16 +60,16 @@ extern int mfill_atomic_install_pte(struct mm_struct *dst_mm, pmd_t *dst_pmd,
 
 extern ssize_t mcopy_atomic(struct mm_struct *dst_mm, unsigned long dst_start,
 			    unsigned long src_start, unsigned long len,
-			    bool *mmap_changing, __u64 mode);
+			    atomic_t *mmap_changing, __u64 mode);
 extern ssize_t mfill_zeropage(struct mm_struct *dst_mm,
 			      unsigned long dst_start,
 			      unsigned long len,
-			      bool *mmap_changing);
+			      atomic_t *mmap_changing);
 extern ssize_t mcopy_continue(struct mm_struct *dst_mm, unsigned long dst_start,
-			      unsigned long len, bool *mmap_changing);
+			      unsigned long len, atomic_t *mmap_changing);
 extern int mwriteprotect_range(struct mm_struct *dst_mm,
 			       unsigned long start, unsigned long len,
-			       bool enable_wp, bool *mmap_changing);
+			       bool enable_wp, atomic_t *mmap_changing);
 
 /* mm helpers */
 static inline bool is_mergeable_vm_userfaultfd_ctx(struct vm_area_struct *vma,
diff --git a/include/linux/vdpa.h b/include/linux/vdpa.h
index 8cfe49d201dd..3972ab765de1 100644
--- a/include/linux/vdpa.h
+++ b/include/linux/vdpa.h
@@ -43,17 +43,17 @@ struct vdpa_vq_state_split {
  * @last_used_idx: used index
  */
 struct vdpa_vq_state_packed {
-        u16	last_avail_counter:1;
-        u16	last_avail_idx:15;
-        u16	last_used_counter:1;
-        u16	last_used_idx:15;
+	u16	last_avail_counter:1;
+	u16	last_avail_idx:15;
+	u16	last_used_counter:1;
+	u16	last_used_idx:15;
 };
 
 struct vdpa_vq_state {
-     union {
-          struct vdpa_vq_state_split split;
-          struct vdpa_vq_state_packed packed;
-     };
+	union {
+		struct vdpa_vq_state_split split;
+		struct vdpa_vq_state_packed packed;
+	};
 };
 
 struct vdpa_mgmt_dev;
@@ -65,6 +65,7 @@ struct vdpa_mgmt_dev;
  * @config: the configuration ops for this device.
  * @index: device index
  * @features_valid: were features initialized? for legacy guests
+ * @use_va: indicate whether virtual address must be used by this device
  * @nvqs: maximum number of supported virtqueues
  * @mdev: management device pointer; caller must setup when registering device as part
  *	  of dev_add() mgmtdev ops callback before invoking _vdpa_register_device().
@@ -75,6 +76,7 @@ struct vdpa_device {
 	const struct vdpa_config_ops *config;
 	unsigned int index;
 	bool features_valid;
+	bool use_va;
 	int nvqs;
 	struct vdpa_mgmt_dev *mdev;
 };
@@ -90,6 +92,16 @@ struct vdpa_iova_range {
 };
 
 /**
+ * Corresponding file area for device memory mapping
+ * @file: vma->vm_file for the mapping
+ * @offset: mapping offset in the vm_file
+ */
+struct vdpa_map_file {
+	struct file *file;
+	u64 offset;
+};
+
+/**
  * struct vdpa_config_ops - operations for configuring a vDPA device.
  * Note: vDPA device drivers are required to implement all of the
  * operations unless it is mentioned to be optional in the following
@@ -131,7 +143,7 @@ struct vdpa_iova_range {
  *				@vdev: vdpa device
  *				@idx: virtqueue index
  *				@state: pointer to returned state (last_avail_idx)
- * @get_vq_notification: 	Get the notification area for a virtqueue
+ * @get_vq_notification:	Get the notification area for a virtqueue
  *				@vdev: vdpa device
  *				@idx: virtqueue index
  *				Returns the notifcation area
@@ -171,6 +183,9 @@ struct vdpa_iova_range {
  * @set_status:			Set the device status
  *				@vdev: vdpa device
  *				@status: virtio device status
+ * @reset:			Reset device
+ *				@vdev: vdpa device
+ *				Returns integer: success (0) or error (< 0)
  * @get_config_size:		Get the size of the configuration space
  *				@vdev: vdpa device
  *				Returns size_t: configuration size
@@ -255,6 +270,7 @@ struct vdpa_config_ops {
 	u32 (*get_vendor_id)(struct vdpa_device *vdev);
 	u8 (*get_status)(struct vdpa_device *vdev);
 	void (*set_status)(struct vdpa_device *vdev, u8 status);
+	int (*reset)(struct vdpa_device *vdev);
 	size_t (*get_config_size)(struct vdpa_device *vdev);
 	void (*get_config)(struct vdpa_device *vdev, unsigned int offset,
 			   void *buf, unsigned int len);
@@ -266,7 +282,7 @@ struct vdpa_config_ops {
 	/* DMA ops */
 	int (*set_map)(struct vdpa_device *vdev, struct vhost_iotlb *iotlb);
 	int (*dma_map)(struct vdpa_device *vdev, u64 iova, u64 size,
-		       u64 pa, u32 perm);
+		       u64 pa, u32 perm, void *opaque);
 	int (*dma_unmap)(struct vdpa_device *vdev, u64 iova, u64 size);
 
 	/* Free device resources */
@@ -275,7 +291,8 @@ struct vdpa_config_ops {
 
 struct vdpa_device *__vdpa_alloc_device(struct device *parent,
 					const struct vdpa_config_ops *config,
-					size_t size, const char *name);
+					size_t size, const char *name,
+					bool use_va);
 
 /**
  * vdpa_alloc_device - allocate and initilaize a vDPA device
@@ -285,15 +302,16 @@ struct vdpa_device *__vdpa_alloc_device(struct device *parent,
  * @parent: the parent device
  * @config: the bus operations that is supported by this device
  * @name: name of the vdpa device
+ * @use_va: indicate whether virtual address must be used by this device
  *
  * Return allocated data structure or ERR_PTR upon error
  */
-#define vdpa_alloc_device(dev_struct, member, parent, config, name)   \
+#define vdpa_alloc_device(dev_struct, member, parent, config, name, use_va)   \
 			  container_of(__vdpa_alloc_device( \
 				       parent, config, \
 				       sizeof(dev_struct) + \
 				       BUILD_BUG_ON_ZERO(offsetof( \
-				       dev_struct, member)), name), \
+				       dev_struct, member)), name, use_va), \
 				       dev_struct, member)
 
 int vdpa_register_device(struct vdpa_device *vdev, int nvqs);
@@ -348,27 +366,27 @@ static inline struct device *vdpa_get_dma_dev(struct vdpa_device *vdev)
 	return vdev->dma_dev;
 }
 
-static inline void vdpa_reset(struct vdpa_device *vdev)
+static inline int vdpa_reset(struct vdpa_device *vdev)
 {
-        const struct vdpa_config_ops *ops = vdev->config;
+	const struct vdpa_config_ops *ops = vdev->config;
 
 	vdev->features_valid = false;
-        ops->set_status(vdev, 0);
+	return ops->reset(vdev);
 }
 
 static inline int vdpa_set_features(struct vdpa_device *vdev, u64 features)
 {
-        const struct vdpa_config_ops *ops = vdev->config;
+	const struct vdpa_config_ops *ops = vdev->config;
 
 	vdev->features_valid = true;
-        return ops->set_features(vdev, features);
+	return ops->set_features(vdev, features);
 }
 
-
-static inline void vdpa_get_config(struct vdpa_device *vdev, unsigned offset,
-				   void *buf, unsigned int len)
+static inline void vdpa_get_config(struct vdpa_device *vdev,
+				   unsigned int offset, void *buf,
+				   unsigned int len)
 {
-        const struct vdpa_config_ops *ops = vdev->config;
+	const struct vdpa_config_ops *ops = vdev->config;
 
 	/*
 	 * Config accesses aren't supposed to trigger before features are set.
diff --git a/include/linux/vfio.h b/include/linux/vfio.h
index a2c5b30e1763..b53a9557884a 100644
--- a/include/linux/vfio.h
+++ b/include/linux/vfio.h
@@ -15,13 +15,28 @@
 #include <linux/poll.h>
 #include <uapi/linux/vfio.h>
 
+/*
+ * VFIO devices can be placed in a set, this allows all devices to share this
+ * structure and the VFIO core will provide a lock that is held around
+ * open_device()/close_device() for all devices in the set.
+ */
+struct vfio_device_set {
+	void *set_id;
+	struct mutex lock;
+	struct list_head device_list;
+	unsigned int device_count;
+};
+
 struct vfio_device {
 	struct device *dev;
 	const struct vfio_device_ops *ops;
 	struct vfio_group *group;
+	struct vfio_device_set *dev_set;
+	struct list_head dev_set_list;
 
 	/* Members below here are private, not for driver use */
 	refcount_t refcount;
+	unsigned int open_count;
 	struct completion comp;
 	struct list_head group_next;
 };
@@ -29,8 +44,8 @@ struct vfio_device {
 /**
  * struct vfio_device_ops - VFIO bus driver device callbacks
  *
- * @open: Called when userspace creates new file descriptor for device
- * @release: Called when userspace releases file descriptor for device
+ * @open_device: Called when the first file descriptor is opened for this device
+ * @close_device: Opposite of open_device
  * @read: Perform read(2) on device file descriptor
  * @write: Perform write(2) on device file descriptor
  * @ioctl: Perform ioctl(2) on device file descriptor, supporting VFIO_DEVICE_*
@@ -43,8 +58,8 @@ struct vfio_device {
  */
 struct vfio_device_ops {
 	char	*name;
-	int	(*open)(struct vfio_device *vdev);
-	void	(*release)(struct vfio_device *vdev);
+	int	(*open_device)(struct vfio_device *vdev);
+	void	(*close_device)(struct vfio_device *vdev);
 	ssize_t	(*read)(struct vfio_device *vdev, char __user *buf,
 			size_t count, loff_t *ppos);
 	ssize_t	(*write)(struct vfio_device *vdev, const char __user *buf,
@@ -61,11 +76,14 @@ extern void vfio_iommu_group_put(struct iommu_group *group, struct device *dev);
 
 void vfio_init_group_dev(struct vfio_device *device, struct device *dev,
 			 const struct vfio_device_ops *ops);
+void vfio_uninit_group_dev(struct vfio_device *device);
 int vfio_register_group_dev(struct vfio_device *device);
 void vfio_unregister_group_dev(struct vfio_device *device);
 extern struct vfio_device *vfio_device_get_from_dev(struct device *dev);
 extern void vfio_device_put(struct vfio_device *device);
 
+int vfio_assign_device_set(struct vfio_device *device, void *set_id);
+
 /* events for the backend driver notify callback */
 enum vfio_iommu_notify_type {
 	VFIO_IOMMU_CONTAINER_CLOSE = 0,
diff --git a/drivers/vfio/pci/vfio_pci_private.h b/include/linux/vfio_pci_core.h
index 5a36272cecbf..ef9a44b6cf5d 100644
--- a/drivers/vfio/pci/vfio_pci_private.h
+++ b/include/linux/vfio_pci_core.h
@@ -10,13 +10,14 @@
 
 #include <linux/mutex.h>
 #include <linux/pci.h>
+#include <linux/vfio.h>
 #include <linux/irqbypass.h>
 #include <linux/types.h>
 #include <linux/uuid.h>
 #include <linux/notifier.h>
 
-#ifndef VFIO_PCI_PRIVATE_H
-#define VFIO_PCI_PRIVATE_H
+#ifndef VFIO_PCI_CORE_H
+#define VFIO_PCI_CORE_H
 
 #define VFIO_PCI_OFFSET_SHIFT   40
 
@@ -33,7 +34,7 @@
 
 struct vfio_pci_ioeventfd {
 	struct list_head	next;
-	struct vfio_pci_device	*vdev;
+	struct vfio_pci_core_device	*vdev;
 	struct virqfd		*virqfd;
 	void __iomem		*addr;
 	uint64_t		data;
@@ -52,18 +53,18 @@ struct vfio_pci_irq_ctx {
 	struct irq_bypass_producer	producer;
 };
 
-struct vfio_pci_device;
+struct vfio_pci_core_device;
 struct vfio_pci_region;
 
 struct vfio_pci_regops {
-	size_t	(*rw)(struct vfio_pci_device *vdev, char __user *buf,
+	ssize_t (*rw)(struct vfio_pci_core_device *vdev, char __user *buf,
 		      size_t count, loff_t *ppos, bool iswrite);
-	void	(*release)(struct vfio_pci_device *vdev,
+	void	(*release)(struct vfio_pci_core_device *vdev,
 			   struct vfio_pci_region *region);
-	int	(*mmap)(struct vfio_pci_device *vdev,
+	int	(*mmap)(struct vfio_pci_core_device *vdev,
 			struct vfio_pci_region *region,
 			struct vm_area_struct *vma);
-	int	(*add_capability)(struct vfio_pci_device *vdev,
+	int	(*add_capability)(struct vfio_pci_core_device *vdev,
 				  struct vfio_pci_region *region,
 				  struct vfio_info_cap *caps);
 };
@@ -83,11 +84,6 @@ struct vfio_pci_dummy_resource {
 	struct list_head	res_next;
 };
 
-struct vfio_pci_reflck {
-	struct kref		kref;
-	struct mutex		lock;
-};
-
 struct vfio_pci_vf_token {
 	struct mutex		lock;
 	uuid_t			uuid;
@@ -99,7 +95,7 @@ struct vfio_pci_mmap_vma {
 	struct list_head	vma_next;
 };
 
-struct vfio_pci_device {
+struct vfio_pci_core_device {
 	struct vfio_device	vdev;
 	struct pci_dev		*pdev;
 	void __iomem		*barmap[PCI_STD_NUM_BARS];
@@ -130,8 +126,6 @@ struct vfio_pci_device {
 	bool			needs_pm_restore;
 	struct pci_saved_state	*pci_saved_state;
 	struct pci_saved_state	*pm_save;
-	struct vfio_pci_reflck	*reflck;
-	int			refcnt;
 	int			ioeventfds_nr;
 	struct eventfd_ctx	*err_trigger;
 	struct eventfd_ctx	*req_trigger;
@@ -151,65 +145,95 @@ struct vfio_pci_device {
 #define is_irq_none(vdev) (!(is_intx(vdev) || is_msi(vdev) || is_msix(vdev)))
 #define irq_is(vdev, type) (vdev->irq_type == type)
 
-extern void vfio_pci_intx_mask(struct vfio_pci_device *vdev);
-extern void vfio_pci_intx_unmask(struct vfio_pci_device *vdev);
+extern void vfio_pci_intx_mask(struct vfio_pci_core_device *vdev);
+extern void vfio_pci_intx_unmask(struct vfio_pci_core_device *vdev);
 
-extern int vfio_pci_set_irqs_ioctl(struct vfio_pci_device *vdev,
+extern int vfio_pci_set_irqs_ioctl(struct vfio_pci_core_device *vdev,
 				   uint32_t flags, unsigned index,
 				   unsigned start, unsigned count, void *data);
 
-extern ssize_t vfio_pci_config_rw(struct vfio_pci_device *vdev,
+extern ssize_t vfio_pci_config_rw(struct vfio_pci_core_device *vdev,
 				  char __user *buf, size_t count,
 				  loff_t *ppos, bool iswrite);
 
-extern ssize_t vfio_pci_bar_rw(struct vfio_pci_device *vdev, char __user *buf,
+extern ssize_t vfio_pci_bar_rw(struct vfio_pci_core_device *vdev, char __user *buf,
 			       size_t count, loff_t *ppos, bool iswrite);
 
-extern ssize_t vfio_pci_vga_rw(struct vfio_pci_device *vdev, char __user *buf,
+extern ssize_t vfio_pci_vga_rw(struct vfio_pci_core_device *vdev, char __user *buf,
 			       size_t count, loff_t *ppos, bool iswrite);
 
-extern long vfio_pci_ioeventfd(struct vfio_pci_device *vdev, loff_t offset,
+extern long vfio_pci_ioeventfd(struct vfio_pci_core_device *vdev, loff_t offset,
 			       uint64_t data, int count, int fd);
 
 extern int vfio_pci_init_perm_bits(void);
 extern void vfio_pci_uninit_perm_bits(void);
 
-extern int vfio_config_init(struct vfio_pci_device *vdev);
-extern void vfio_config_free(struct vfio_pci_device *vdev);
+extern int vfio_config_init(struct vfio_pci_core_device *vdev);
+extern void vfio_config_free(struct vfio_pci_core_device *vdev);
 
-extern int vfio_pci_register_dev_region(struct vfio_pci_device *vdev,
+extern int vfio_pci_register_dev_region(struct vfio_pci_core_device *vdev,
 					unsigned int type, unsigned int subtype,
 					const struct vfio_pci_regops *ops,
 					size_t size, u32 flags, void *data);
 
-extern int vfio_pci_set_power_state(struct vfio_pci_device *vdev,
+extern int vfio_pci_set_power_state(struct vfio_pci_core_device *vdev,
 				    pci_power_t state);
 
-extern bool __vfio_pci_memory_enabled(struct vfio_pci_device *vdev);
-extern void vfio_pci_zap_and_down_write_memory_lock(struct vfio_pci_device
+extern bool __vfio_pci_memory_enabled(struct vfio_pci_core_device *vdev);
+extern void vfio_pci_zap_and_down_write_memory_lock(struct vfio_pci_core_device
 						    *vdev);
-extern u16 vfio_pci_memory_lock_and_enable(struct vfio_pci_device *vdev);
-extern void vfio_pci_memory_unlock_and_restore(struct vfio_pci_device *vdev,
+extern u16 vfio_pci_memory_lock_and_enable(struct vfio_pci_core_device *vdev);
+extern void vfio_pci_memory_unlock_and_restore(struct vfio_pci_core_device *vdev,
 					       u16 cmd);
 
 #ifdef CONFIG_VFIO_PCI_IGD
-extern int vfio_pci_igd_init(struct vfio_pci_device *vdev);
+extern int vfio_pci_igd_init(struct vfio_pci_core_device *vdev);
 #else
-static inline int vfio_pci_igd_init(struct vfio_pci_device *vdev)
+static inline int vfio_pci_igd_init(struct vfio_pci_core_device *vdev)
 {
 	return -ENODEV;
 }
 #endif
 
 #ifdef CONFIG_S390
-extern int vfio_pci_info_zdev_add_caps(struct vfio_pci_device *vdev,
+extern int vfio_pci_info_zdev_add_caps(struct vfio_pci_core_device *vdev,
 				       struct vfio_info_cap *caps);
 #else
-static inline int vfio_pci_info_zdev_add_caps(struct vfio_pci_device *vdev,
+static inline int vfio_pci_info_zdev_add_caps(struct vfio_pci_core_device *vdev,
 					      struct vfio_info_cap *caps)
 {
 	return -ENODEV;
 }
 #endif
 
-#endif /* VFIO_PCI_PRIVATE_H */
+/* Will be exported for vfio pci drivers usage */
+void vfio_pci_core_set_params(bool nointxmask, bool is_disable_vga,
+			      bool is_disable_idle_d3);
+void vfio_pci_core_close_device(struct vfio_device *core_vdev);
+void vfio_pci_core_init_device(struct vfio_pci_core_device *vdev,
+			       struct pci_dev *pdev,
+			       const struct vfio_device_ops *vfio_pci_ops);
+int vfio_pci_core_register_device(struct vfio_pci_core_device *vdev);
+void vfio_pci_core_uninit_device(struct vfio_pci_core_device *vdev);
+void vfio_pci_core_unregister_device(struct vfio_pci_core_device *vdev);
+int vfio_pci_core_sriov_configure(struct pci_dev *pdev, int nr_virtfn);
+extern const struct pci_error_handlers vfio_pci_core_err_handlers;
+long vfio_pci_core_ioctl(struct vfio_device *core_vdev, unsigned int cmd,
+		unsigned long arg);
+ssize_t vfio_pci_core_read(struct vfio_device *core_vdev, char __user *buf,
+		size_t count, loff_t *ppos);
+ssize_t vfio_pci_core_write(struct vfio_device *core_vdev, const char __user *buf,
+		size_t count, loff_t *ppos);
+int vfio_pci_core_mmap(struct vfio_device *core_vdev, struct vm_area_struct *vma);
+void vfio_pci_core_request(struct vfio_device *core_vdev, unsigned int count);
+int vfio_pci_core_match(struct vfio_device *core_vdev, char *buf);
+int vfio_pci_core_enable(struct vfio_pci_core_device *vdev);
+void vfio_pci_core_disable(struct vfio_pci_core_device *vdev);
+void vfio_pci_core_finish_enable(struct vfio_pci_core_device *vdev);
+
+static inline bool vfio_pci_is_vga(struct pci_dev *pdev)
+{
+	return (pdev->class >> 8) == PCI_CLASS_DISPLAY_VGA;
+}
+
+#endif /* VFIO_PCI_CORE_H */
diff --git a/include/linux/vhost_iotlb.h b/include/linux/vhost_iotlb.h
index 6b09b786a762..2d0e2f52f938 100644
--- a/include/linux/vhost_iotlb.h
+++ b/include/linux/vhost_iotlb.h
@@ -17,6 +17,7 @@ struct vhost_iotlb_map {
 	u32 perm;
 	u32 flags_padding;
 	u64 __subtree_last;
+	void *opaque;
 };
 
 #define VHOST_IOTLB_FLAG_RETIRE 0x1
@@ -29,6 +30,8 @@ struct vhost_iotlb {
 	unsigned int flags;
 };
 
+int vhost_iotlb_add_range_ctx(struct vhost_iotlb *iotlb, u64 start, u64 last,
+			      u64 addr, unsigned int perm, void *opaque);
 int vhost_iotlb_add_range(struct vhost_iotlb *iotlb, u64 start, u64 last,
 			  u64 addr, unsigned int perm);
 void vhost_iotlb_del_range(struct vhost_iotlb *iotlb, u64 start, u64 last);
diff --git a/include/linux/vm_event_item.h b/include/linux/vm_event_item.h
index ae0dd1948c2b..a185cc75ff52 100644
--- a/include/linux/vm_event_item.h
+++ b/include/linux/vm_event_item.h
@@ -33,6 +33,8 @@ enum vm_event_item { PGPGIN, PGPGOUT, PSWPIN, PSWPOUT,
 		PGREUSE,
 		PGSTEAL_KSWAPD,
 		PGSTEAL_DIRECT,
+		PGDEMOTE_KSWAPD,
+		PGDEMOTE_DIRECT,
 		PGSCAN_KSWAPD,
 		PGSCAN_DIRECT,
 		PGSCAN_DIRECT_THROTTLE,
diff --git a/include/linux/vmalloc.h b/include/linux/vmalloc.h
index 2644425b6dce..671d402c3778 100644
--- a/include/linux/vmalloc.h
+++ b/include/linux/vmalloc.h
@@ -225,9 +225,6 @@ static inline bool is_vm_area_hugepages(const void *addr)
 }
 
 #ifdef CONFIG_MMU
-int vmap_range(unsigned long addr, unsigned long end,
-			phys_addr_t phys_addr, pgprot_t prot,
-			unsigned int max_page_shift);
 void vunmap_range(unsigned long addr, unsigned long end);
 static inline void set_vm_flush_reset_perms(void *addr)
 {
diff --git a/include/linux/vmpressure.h b/include/linux/vmpressure.h
index 6d28bc433c1c..6a2f51ebbfd3 100644
--- a/include/linux/vmpressure.h
+++ b/include/linux/vmpressure.h
@@ -37,7 +37,7 @@ extern void vmpressure_prio(gfp_t gfp, struct mem_cgroup *memcg, int prio);
 extern void vmpressure_init(struct vmpressure *vmpr);
 extern void vmpressure_cleanup(struct vmpressure *vmpr);
 extern struct vmpressure *memcg_to_vmpressure(struct mem_cgroup *memcg);
-extern struct cgroup_subsys_state *vmpressure_to_css(struct vmpressure *vmpr);
+extern struct mem_cgroup *vmpressure_to_memcg(struct vmpressure *vmpr);
 extern int vmpressure_register_event(struct mem_cgroup *memcg,
 				     struct eventfd_ctx *eventfd,
 				     const char *args);
diff --git a/include/linux/watchdog.h b/include/linux/watchdog.h
index 9b19e6bb68b5..99660197a36c 100644
--- a/include/linux/watchdog.h
+++ b/include/linux/watchdog.h
@@ -107,6 +107,7 @@ struct watchdog_device {
 	unsigned int max_hw_heartbeat_ms;
 	struct notifier_block reboot_nb;
 	struct notifier_block restart_nb;
+	struct notifier_block pm_nb;
 	void *driver_data;
 	struct watchdog_core_data *wd_data;
 	unsigned long status;
@@ -116,6 +117,7 @@ struct watchdog_device {
 #define WDOG_STOP_ON_REBOOT	2	/* Should be stopped on reboot */
 #define WDOG_HW_RUNNING		3	/* True if HW watchdog running */
 #define WDOG_STOP_ON_UNREGISTER	4	/* Should be stopped on unregister */
+#define WDOG_NO_PING_ON_SUSPEND	5	/* Ping worker should be stopped on suspend */
 	struct list_head deferred;
 };
 
@@ -156,6 +158,12 @@ static inline void watchdog_stop_on_unregister(struct watchdog_device *wdd)
 	set_bit(WDOG_STOP_ON_UNREGISTER, &wdd->status);
 }
 
+/* Use the following function to stop the wdog ping worker when suspending */
+static inline void watchdog_stop_ping_on_suspend(struct watchdog_device *wdd)
+{
+	set_bit(WDOG_NO_PING_ON_SUSPEND, &wdd->status);
+}
+
 /* Use the following function to check if a timeout value is invalid */
 static inline bool watchdog_timeout_invalid(struct watchdog_device *wdd, unsigned int t)
 {
@@ -209,6 +217,8 @@ extern int watchdog_init_timeout(struct watchdog_device *wdd,
 				  unsigned int timeout_parm, struct device *dev);
 extern int watchdog_register_device(struct watchdog_device *);
 extern void watchdog_unregister_device(struct watchdog_device *);
+int watchdog_dev_suspend(struct watchdog_device *wdd);
+int watchdog_dev_resume(struct watchdog_device *wdd);
 
 int watchdog_set_last_hw_keepalive(struct watchdog_device *, unsigned int);
 
diff --git a/include/linux/writeback.h b/include/linux/writeback.h
index 270677dc4f36..d1f65adf6a26 100644
--- a/include/linux/writeback.h
+++ b/include/linux/writeback.h
@@ -218,7 +218,7 @@ void wbc_attach_and_unlock_inode(struct writeback_control *wbc,
 void wbc_detach_inode(struct writeback_control *wbc);
 void wbc_account_cgroup_owner(struct writeback_control *wbc, struct page *page,
 			      size_t bytes);
-int cgroup_writeback_by_id(u64 bdi_id, int memcg_id, unsigned long nr_pages,
+int cgroup_writeback_by_id(u64 bdi_id, int memcg_id,
 			   enum wb_reason reason, struct wb_completion *done);
 void cgroup_writeback_umount(void);
 bool cleanup_offline_cgwb(struct bdi_writeback *wb);
@@ -374,7 +374,7 @@ int dirty_writeback_centisecs_handler(struct ctl_table *table, int write,
 void global_dirty_limits(unsigned long *pbackground, unsigned long *pdirty);
 unsigned long wb_calc_thresh(struct bdi_writeback *wb, unsigned long thresh);
 
-void wb_update_bandwidth(struct bdi_writeback *wb, unsigned long start_time);
+void wb_update_bandwidth(struct bdi_writeback *wb);
 void balance_dirty_pages_ratelimited(struct address_space *mapping);
 bool wb_over_bg_thresh(struct bdi_writeback *wb);
 
diff --git a/include/net/flow.h b/include/net/flow.h
index 6f5e70240071..58beb16a49b8 100644
--- a/include/net/flow.h
+++ b/include/net/flow.h
@@ -194,7 +194,7 @@ static inline struct flowi *flowi4_to_flowi(struct flowi4 *fl4)
 
 static inline struct flowi_common *flowi4_to_flowi_common(struct flowi4 *fl4)
 {
-	return &(flowi4_to_flowi(fl4)->u.__fl_common);
+	return &(fl4->__fl_common);
 }
 
 static inline struct flowi *flowi6_to_flowi(struct flowi6 *fl6)
@@ -204,7 +204,7 @@ static inline struct flowi *flowi6_to_flowi(struct flowi6 *fl6)
 
 static inline struct flowi_common *flowi6_to_flowi_common(struct flowi6 *fl6)
 {
-	return &(flowi6_to_flowi(fl6)->u.__fl_common);
+	return &(fl6->__fl_common);
 }
 
 static inline struct flowi *flowidn_to_flowi(struct flowidn *fldn)
diff --git a/include/rdma/ib_sa.h b/include/rdma/ib_sa.h
index ba3c808a3789..3634d4cc7a56 100644
--- a/include/rdma/ib_sa.h
+++ b/include/rdma/ib_sa.h
@@ -366,20 +366,6 @@ struct ib_sa_mcmember_rec {
 
 #define IB_DEFAULT_SERVICE_LEASE 	0xFFFFFFFF
 
-struct ib_sa_service_rec {
-	u64		id;
-	union ib_gid	gid;
-	__be16 		pkey;
-	/* reserved */
-	u32		lease;
-	u8		key[16];
-	u8		name[64];
-	u8		data8[16];
-	u16		data16[8];
-	u32		data32[4];
-	u64		data64[2];
-};
-
 #define IB_SA_GUIDINFO_REC_LID		IB_SA_COMP_MASK(0)
 #define IB_SA_GUIDINFO_REC_BLOCK_NUM	IB_SA_COMP_MASK(1)
 #define IB_SA_GUIDINFO_REC_RES1		IB_SA_COMP_MASK(2)
@@ -430,16 +416,6 @@ int ib_sa_path_rec_get(struct ib_sa_client *client, struct ib_device *device,
 					void *context),
 		       void *context, struct ib_sa_query **query);
 
-int ib_sa_service_rec_query(struct ib_sa_client *client,
-			    struct ib_device *device, u32 port_num, u8 method,
-			    struct ib_sa_service_rec *rec,
-			    ib_sa_comp_mask comp_mask, unsigned long timeout_ms,
-			    gfp_t gfp_mask,
-			    void (*callback)(int status,
-					     struct ib_sa_service_rec *resp,
-					     void *context),
-			    void *context, struct ib_sa_query **sa_query);
-
 struct ib_sa_multicast {
 	struct ib_sa_mcmember_rec rec;
 	ib_sa_comp_mask		comp_mask;
diff --git a/include/rdma/ib_umem.h b/include/rdma/ib_umem.h
index 676c57f5ca80..5ae9dff74dac 100644
--- a/include/rdma/ib_umem.h
+++ b/include/rdma/ib_umem.h
@@ -26,9 +26,7 @@ struct ib_umem {
 	u32 is_odp : 1;
 	u32 is_dmabuf : 1;
 	struct work_struct	work;
-	struct sg_table sg_head;
-	int             nmap;
-	unsigned int    sg_nents;
+	struct sg_append_table sgt_append;
 };
 
 struct ib_umem_dmabuf {
@@ -56,7 +54,7 @@ static inline int ib_umem_offset(struct ib_umem *umem)
 static inline unsigned long ib_umem_dma_offset(struct ib_umem *umem,
 					       unsigned long pgsz)
 {
-	return (sg_dma_address(umem->sg_head.sgl) + ib_umem_offset(umem)) &
+	return (sg_dma_address(umem->sgt_append.sgt.sgl) + ib_umem_offset(umem)) &
 	       (pgsz - 1);
 }
 
@@ -77,7 +75,8 @@ static inline void __rdma_umem_block_iter_start(struct ib_block_iter *biter,
 						struct ib_umem *umem,
 						unsigned long pgsz)
 {
-	__rdma_block_iter_start(biter, umem->sg_head.sgl, umem->nmap, pgsz);
+	__rdma_block_iter_start(biter, umem->sgt_append.sgt.sgl,
+				umem->sgt_append.sgt.nents, pgsz);
 }
 
 /**
@@ -128,7 +127,7 @@ static inline unsigned long ib_umem_find_best_pgoff(struct ib_umem *umem,
 						    unsigned long pgsz_bitmap,
 						    u64 pgoff_bitmask)
 {
-	struct scatterlist *sg = umem->sg_head.sgl;
+	struct scatterlist *sg = umem->sgt_append.sgt.sgl;
 	dma_addr_t dma_addr;
 
 	dma_addr = sg_dma_address(sg) + (umem->address & ~PAGE_MASK);
diff --git a/include/rdma/ib_verbs.h b/include/rdma/ib_verbs.h
index 371df1c80aeb..4b50d9a3018a 100644
--- a/include/rdma/ib_verbs.h
+++ b/include/rdma/ib_verbs.h
@@ -2268,8 +2268,13 @@ struct iw_cm_conn_param;
 			 !__same_type(((struct drv_struct *)NULL)->member,     \
 				      struct ib_struct)))
 
-#define rdma_zalloc_drv_obj_gfp(ib_dev, ib_type, gfp)                         \
-	((struct ib_type *)kzalloc(ib_dev->ops.size_##ib_type, gfp))
+#define rdma_zalloc_drv_obj_gfp(ib_dev, ib_type, gfp)                          \
+	((struct ib_type *)rdma_zalloc_obj(ib_dev, ib_dev->ops.size_##ib_type, \
+					   gfp, false))
+
+#define rdma_zalloc_drv_obj_numa(ib_dev, ib_type)                              \
+	((struct ib_type *)rdma_zalloc_obj(ib_dev, ib_dev->ops.size_##ib_type, \
+					   GFP_KERNEL, true))
 
 #define rdma_zalloc_drv_obj(ib_dev, ib_type)                                   \
 	rdma_zalloc_drv_obj_gfp(ib_dev, ib_type, GFP_KERNEL)
@@ -2435,9 +2440,8 @@ struct ib_device_ops {
 			  struct ib_udata *udata);
 	int (*query_srq)(struct ib_srq *srq, struct ib_srq_attr *srq_attr);
 	int (*destroy_srq)(struct ib_srq *srq, struct ib_udata *udata);
-	struct ib_qp *(*create_qp)(struct ib_pd *pd,
-				   struct ib_qp_init_attr *qp_init_attr,
-				   struct ib_udata *udata);
+	int (*create_qp)(struct ib_qp *qp, struct ib_qp_init_attr *qp_init_attr,
+			 struct ib_udata *udata);
 	int (*modify_qp)(struct ib_qp *qp, struct ib_qp_attr *qp_attr,
 			 int qp_attr_mask, struct ib_udata *udata);
 	int (*query_qp)(struct ib_qp *qp, struct ib_qp_attr *qp_attr,
@@ -2635,11 +2639,18 @@ struct ib_device_ops {
 	int (*query_ucontext)(struct ib_ucontext *context,
 			      struct uverbs_attr_bundle *attrs);
 
+	/*
+	 * Provide NUMA node. This API exists for rdmavt/hfi1 only.
+	 * Everyone else relies on Linux memory management model.
+	 */
+	int (*get_numa_node)(struct ib_device *dev);
+
 	DECLARE_RDMA_OBJ_SIZE(ib_ah);
 	DECLARE_RDMA_OBJ_SIZE(ib_counters);
 	DECLARE_RDMA_OBJ_SIZE(ib_cq);
 	DECLARE_RDMA_OBJ_SIZE(ib_mw);
 	DECLARE_RDMA_OBJ_SIZE(ib_pd);
+	DECLARE_RDMA_OBJ_SIZE(ib_qp);
 	DECLARE_RDMA_OBJ_SIZE(ib_rwq_ind_table);
 	DECLARE_RDMA_OBJ_SIZE(ib_srq);
 	DECLARE_RDMA_OBJ_SIZE(ib_ucontext);
@@ -2746,6 +2757,15 @@ struct ib_device {
 	u32 lag_flags;
 };
 
+static inline void *rdma_zalloc_obj(struct ib_device *dev, size_t size,
+				    gfp_t gfp, bool is_numa_aware)
+{
+	if (is_numa_aware && dev->ops.get_numa_node)
+		return kzalloc_node(size, gfp, dev->ops.get_numa_node(dev));
+
+	return kzalloc(size, gfp);
+}
+
 struct ib_client_nl_info;
 struct ib_client {
 	const char *name;
@@ -3668,13 +3688,21 @@ static inline int ib_post_srq_recv(struct ib_srq *srq,
 					      bad_recv_wr ? : &dummy);
 }
 
-struct ib_qp *ib_create_named_qp(struct ib_pd *pd,
-				 struct ib_qp_init_attr *qp_init_attr,
-				 const char *caller);
+struct ib_qp *ib_create_qp_kernel(struct ib_pd *pd,
+				  struct ib_qp_init_attr *qp_init_attr,
+				  const char *caller);
+/**
+ * ib_create_qp - Creates a kernel QP associated with the specific protection
+ * domain.
+ * @pd: The protection domain associated with the QP.
+ * @init_attr: A list of initial attributes required to create the
+ *   QP.  If QP creation succeeds, then the attributes are updated to
+ *   the actual capabilities of the created QP.
+ */
 static inline struct ib_qp *ib_create_qp(struct ib_pd *pd,
 					 struct ib_qp_init_attr *init_attr)
 {
-	return ib_create_named_qp(pd, init_attr, KBUILD_MODNAME);
+	return ib_create_qp_kernel(pd, init_attr, KBUILD_MODNAME);
 }
 
 /**
@@ -4058,6 +4086,34 @@ static inline void ib_dma_unmap_sg_attrs(struct ib_device *dev,
 }
 
 /**
+ * ib_dma_map_sgtable_attrs - Map a scatter/gather table to DMA addresses
+ * @dev: The device for which the DMA addresses are to be created
+ * @sg: The sg_table object describing the buffer
+ * @direction: The direction of the DMA
+ * @attrs: Optional DMA attributes for the map operation
+ */
+static inline int ib_dma_map_sgtable_attrs(struct ib_device *dev,
+					   struct sg_table *sgt,
+					   enum dma_data_direction direction,
+					   unsigned long dma_attrs)
+{
+	if (ib_uses_virt_dma(dev)) {
+		ib_dma_virt_map_sg(dev, sgt->sgl, sgt->orig_nents);
+		return 0;
+	}
+	return dma_map_sgtable(dev->dma_device, sgt, direction, dma_attrs);
+}
+
+static inline void ib_dma_unmap_sgtable_attrs(struct ib_device *dev,
+					      struct sg_table *sgt,
+					      enum dma_data_direction direction,
+					      unsigned long dma_attrs)
+{
+	if (!ib_uses_virt_dma(dev))
+		dma_unmap_sgtable(dev->dma_device, sgt, direction, dma_attrs);
+}
+
+/**
  * ib_dma_map_sg - Map a scatter/gather list to DMA addresses
  * @dev: The device for which the DMA addresses are to be created
  * @sg: The array of scatter/gather entries
diff --git a/include/rdma/rdmavt_qp.h b/include/rdma/rdmavt_qp.h
index 8275954f5ce6..2e58d5e6ac0e 100644
--- a/include/rdma/rdmavt_qp.h
+++ b/include/rdma/rdmavt_qp.h
@@ -444,7 +444,7 @@ struct rvt_qp {
 	/*
 	 * This sge list MUST be last. Do not add anything below here.
 	 */
-	struct rvt_sge r_sg_list[] /* verified SGEs */
+	struct rvt_sge *r_sg_list /* verified SGEs */
 		____cacheline_aligned_in_smp;
 };
 
diff --git a/include/scsi/scsi_cmnd.h b/include/scsi/scsi_cmnd.h
index 779a59fe8676..eaf04c9a1dfc 100644
--- a/include/scsi/scsi_cmnd.h
+++ b/include/scsi/scsi_cmnd.h
@@ -111,9 +111,6 @@ struct scsi_cmnd {
 				   reconnects.   Probably == sector
 				   size */
 
-	struct request *request;	/* The command we are
-				   	   working on */
-
 	unsigned char *sense_buffer;
 				/* obtained by REQUEST SENSE when
 				 * CHECK CONDITION is received on original
@@ -142,10 +139,15 @@ struct scsi_cmnd {
 	int flags;		/* Command flags */
 	unsigned long state;	/* Command completion state */
 
-	unsigned char tag;	/* SCSI-II queued command tag */
 	unsigned int extra_len;	/* length of alignment and padding */
 };
 
+/* Variant of blk_mq_rq_from_pdu() that verifies the type of its argument. */
+static inline struct request *scsi_cmd_to_rq(struct scsi_cmnd *scmd)
+{
+	return blk_mq_rq_from_pdu(scmd);
+}
+
 /*
  * Return the driver private allocation behind the command.
  * Only works if cmd_size is set in the host template.
@@ -158,7 +160,9 @@ static inline void *scsi_cmd_priv(struct scsi_cmnd *cmd)
 /* make sure not to use it with passthrough commands */
 static inline struct scsi_driver *scsi_cmd_to_driver(struct scsi_cmnd *cmd)
 {
-	return *(struct scsi_driver **)cmd->request->rq_disk->private_data;
+	struct request *rq = scsi_cmd_to_rq(cmd);
+
+	return *(struct scsi_driver **)rq->rq_disk->private_data;
 }
 
 extern void scsi_finish_command(struct scsi_cmnd *cmd);
@@ -220,6 +224,25 @@ static inline int scsi_sg_copy_to_buffer(struct scsi_cmnd *cmd,
 				 buf, buflen);
 }
 
+static inline sector_t scsi_get_sector(struct scsi_cmnd *scmd)
+{
+	return blk_rq_pos(scsi_cmd_to_rq(scmd));
+}
+
+static inline sector_t scsi_get_lba(struct scsi_cmnd *scmd)
+{
+	unsigned int shift = ilog2(scmd->device->sector_size) - SECTOR_SHIFT;
+
+	return blk_rq_pos(scsi_cmd_to_rq(scmd)) >> shift;
+}
+
+static inline unsigned int scsi_logical_block_count(struct scsi_cmnd *scmd)
+{
+	unsigned int shift = ilog2(scmd->device->sector_size) - SECTOR_SHIFT;
+
+	return blk_rq_bytes(scsi_cmd_to_rq(scmd)) >> shift;
+}
+
 /*
  * The operations below are hints that tell the controller driver how
  * to handle I/Os with DIF or similar types of protection information.
@@ -282,9 +305,11 @@ static inline unsigned char scsi_get_prot_type(struct scsi_cmnd *scmd)
 	return scmd->prot_type;
 }
 
-static inline sector_t scsi_get_lba(struct scsi_cmnd *scmd)
+static inline u32 scsi_prot_ref_tag(struct scsi_cmnd *scmd)
 {
-	return blk_rq_pos(scmd->request);
+	struct request *rq = blk_mq_rq_from_pdu(scmd);
+
+	return t10_pi_ref_tag(rq);
 }
 
 static inline unsigned int scsi_prot_interval(struct scsi_cmnd *scmd)
diff --git a/include/scsi/scsi_device.h b/include/scsi/scsi_device.h
index ac6ab16abee7..09a17f6e93a7 100644
--- a/include/scsi/scsi_device.h
+++ b/include/scsi/scsi_device.h
@@ -10,6 +10,7 @@
 #include <linux/atomic.h>
 #include <linux/sbitmap.h>
 
+struct bsg_device;
 struct device;
 struct request_queue;
 struct scsi_cmnd;
@@ -205,6 +206,7 @@ struct scsi_device {
 	unsigned unmap_limit_for_ws:1;	/* Use the UNMAP limit for WRITE SAME */
 	unsigned rpm_autosuspend:1;	/* Enable runtime autosuspend at device
 					 * creation time */
+	unsigned ignore_media_change:1; /* Ignore MEDIA CHANGE on resume */
 
 	bool offline_already;		/* Device offline message logged */
 
@@ -234,6 +236,10 @@ struct scsi_device {
 	size_t			dma_drain_len;
 	void			*dma_drain_buf;
 
+	unsigned int		sg_timeout;
+	unsigned int		sg_reserved_size;
+
+	struct bsg_device	*bsg_dev;
 	unsigned char		access_state;
 	struct mutex		state_mutex;
 	enum scsi_device_state sdev_state;
@@ -265,13 +271,15 @@ sdev_prefix_printk(const char *, const struct scsi_device *, const char *,
 __printf(3, 4) void
 scmd_printk(const char *, const struct scsi_cmnd *, const char *, ...);
 
-#define scmd_dbg(scmd, fmt, a...)					   \
-	do {								   \
-		if ((scmd)->request->rq_disk)				   \
-			sdev_dbg((scmd)->device, "[%s] " fmt,		   \
-				 (scmd)->request->rq_disk->disk_name, ##a);\
-		else							   \
-			sdev_dbg((scmd)->device, fmt, ##a);		   \
+#define scmd_dbg(scmd, fmt, a...)					\
+	do {								\
+		struct request *__rq = scsi_cmd_to_rq((scmd));		\
+									\
+		if (__rq->rq_disk)					\
+			sdev_dbg((scmd)->device, "[%s] " fmt,		\
+				 __rq->rq_disk->disk_name, ##a);	\
+		else							\
+			sdev_dbg((scmd)->device, fmt, ##a);		\
 	} while (0)
 
 enum scsi_target_state {
diff --git a/include/scsi/scsi_devinfo.h b/include/scsi/scsi_devinfo.h
index 3fdb322d4c4b..5d14adae21c7 100644
--- a/include/scsi/scsi_devinfo.h
+++ b/include/scsi/scsi_devinfo.h
@@ -28,7 +28,8 @@
 #define BLIST_LARGELUN		((__force blist_flags_t)(1ULL << 9))
 /* override additional length field */
 #define BLIST_INQUIRY_36	((__force blist_flags_t)(1ULL << 10))
-#define __BLIST_UNUSED_11	((__force blist_flags_t)(1ULL << 11))
+/* ignore MEDIA CHANGE unit attention after resuming from runtime suspend */
+#define BLIST_IGN_MEDIA_CHANGE	((__force blist_flags_t)(1ULL << 11))
 /* do not do automatic start on add */
 #define BLIST_NOSTARTONADD	((__force blist_flags_t)(1ULL << 12))
 #define __BLIST_UNUSED_13	((__force blist_flags_t)(1ULL << 13))
@@ -73,8 +74,7 @@
 #define __BLIST_HIGH_UNUSED (~(__BLIST_LAST_USED | \
 			       (__force blist_flags_t) \
 			       ((__force __u64)__BLIST_LAST_USED - 1ULL)))
-#define __BLIST_UNUSED_MASK (__BLIST_UNUSED_11 | \
-			     __BLIST_UNUSED_13 | \
+#define __BLIST_UNUSED_MASK (__BLIST_UNUSED_13 | \
 			     __BLIST_UNUSED_14 | \
 			     __BLIST_UNUSED_15 | \
 			     __BLIST_UNUSED_16 | \
diff --git a/include/scsi/scsi_ioctl.h b/include/scsi/scsi_ioctl.h
index b465799f4d2d..d2cb9aeaf1f1 100644
--- a/include/scsi/scsi_ioctl.h
+++ b/include/scsi/scsi_ioctl.h
@@ -18,7 +18,9 @@
 
 #ifdef __KERNEL__
 
+struct gendisk;
 struct scsi_device;
+struct sg_io_hdr;
 
 /*
  * Structures used for scsi_ioctl et al.
@@ -43,8 +45,11 @@ typedef struct scsi_fctargaddress {
 
 int scsi_ioctl_block_when_processing_errors(struct scsi_device *sdev,
 		int cmd, bool ndelay);
-extern int scsi_ioctl(struct scsi_device *, int, void __user *);
-extern int scsi_compat_ioctl(struct scsi_device *sdev, int cmd, void __user *arg);
+int scsi_ioctl(struct scsi_device *sdev, struct gendisk *disk, fmode_t mode,
+		int cmd, void __user *arg);
+int get_sg_io_hdr(struct sg_io_hdr *hdr, const void __user *argp);
+int put_sg_io_hdr(const struct sg_io_hdr *hdr, void __user *argp);
+bool scsi_cmd_allowed(unsigned char *cmd, fmode_t mode);
 
 #endif /* __KERNEL__ */
 #endif /* _SCSI_IOCTL_H */
diff --git a/include/scsi/scsi_request.h b/include/scsi/scsi_request.h
index b06f28c74908..9129b23e12bc 100644
--- a/include/scsi/scsi_request.h
+++ b/include/scsi/scsi_request.h
@@ -28,6 +28,4 @@ static inline void scsi_req_free_cmd(struct scsi_request *req)
 		kfree(req->cmd);
 }
 
-void scsi_req_init(struct scsi_request *req);
-
 #endif /* _SCSI_SCSI_REQUEST_H */
diff --git a/include/target/target_core_backend.h b/include/target/target_core_backend.h
index 1f78b09bba55..675f3a1fe613 100644
--- a/include/target/target_core_backend.h
+++ b/include/target/target_core_backend.h
@@ -75,6 +75,7 @@ void	target_backend_unregister(const struct target_backend_ops *);
 
 void	target_complete_cmd(struct se_cmd *, u8);
 void	target_set_cmd_data_length(struct se_cmd *, int);
+void	target_complete_cmd_with_sense(struct se_cmd *, u8, sense_reason_t);
 void	target_complete_cmd_with_length(struct se_cmd *, u8, int);
 
 void	transport_copy_sense_to_cmd(struct se_cmd *, unsigned char *);
diff --git a/include/target/target_core_base.h b/include/target/target_core_base.h
index 85c16c266eac..fb11c7693b25 100644
--- a/include/target/target_core_base.h
+++ b/include/target/target_core_base.h
@@ -171,7 +171,7 @@ enum tcm_sense_reason_table {
 	TCM_WRITE_PROTECTED			= R(0x0c),
 	TCM_CHECK_CONDITION_ABORT_CMD		= R(0x0d),
 	TCM_CHECK_CONDITION_UNIT_ATTENTION	= R(0x0e),
-	TCM_CHECK_CONDITION_NOT_READY		= R(0x0f),
+
 	TCM_RESERVATION_CONFLICT		= R(0x10),
 	TCM_ADDRESS_OUT_OF_RANGE		= R(0x11),
 	TCM_OUT_OF_RESOURCES			= R(0x12),
@@ -188,6 +188,10 @@ enum tcm_sense_reason_table {
 	TCM_INSUFFICIENT_REGISTRATION_RESOURCES	= R(0x1d),
 	TCM_LUN_BUSY				= R(0x1e),
 	TCM_INVALID_FIELD_IN_COMMAND_IU         = R(0x1f),
+	TCM_ALUA_TG_PT_STANDBY			= R(0x20),
+	TCM_ALUA_TG_PT_UNAVAILABLE		= R(0x21),
+	TCM_ALUA_STATE_TRANSITION		= R(0x22),
+	TCM_ALUA_OFFLINE			= R(0x23),
 #undef R
 };
 
@@ -453,10 +457,10 @@ enum target_core_dif_check {
 #define TCM_ACA_TAG	0x24
 
 struct se_cmd {
+	/* Used for fail with specific sense codes */
+	sense_reason_t		sense_reason;
 	/* SAM response code being sent to initiator */
 	u8			scsi_status;
-	u8			scsi_asc;
-	u8			scsi_ascq;
 	u16			scsi_sense_length;
 	unsigned		unknown_data_length:1;
 	bool			state_active:1;
diff --git a/include/trace/events/cachefiles.h b/include/trace/events/cachefiles.h
index 5d9de24cb9c0..9a448fe9355d 100644
--- a/include/trace/events/cachefiles.h
+++ b/include/trace/events/cachefiles.h
@@ -78,20 +78,20 @@ TRACE_EVENT(cachefiles_ref,
 
 	    /* Note that obj may be NULL */
 	    TP_STRUCT__entry(
-		    __field(struct cachefiles_object *,		obj		)
-		    __field(struct fscache_cookie *,		cookie		)
+		    __field(unsigned int,			obj		)
+		    __field(unsigned int,			cookie		)
 		    __field(enum cachefiles_obj_ref_trace,	why		)
 		    __field(int,				usage		)
 			     ),
 
 	    TP_fast_assign(
-		    __entry->obj	= obj;
-		    __entry->cookie	= cookie;
+		    __entry->obj	= obj->fscache.debug_id;
+		    __entry->cookie	= cookie->debug_id;
 		    __entry->usage	= usage;
 		    __entry->why	= why;
 			   ),
 
-	    TP_printk("c=%p o=%p u=%d %s",
+	    TP_printk("c=%08x o=%08x u=%d %s",
 		      __entry->cookie, __entry->obj, __entry->usage,
 		      __print_symbolic(__entry->why, cachefiles_obj_ref_traces))
 	    );
@@ -104,18 +104,18 @@ TRACE_EVENT(cachefiles_lookup,
 	    TP_ARGS(obj, de, inode),
 
 	    TP_STRUCT__entry(
-		    __field(struct cachefiles_object *,	obj	)
+		    __field(unsigned int,		obj	)
 		    __field(struct dentry *,		de	)
 		    __field(struct inode *,		inode	)
 			     ),
 
 	    TP_fast_assign(
-		    __entry->obj	= obj;
+		    __entry->obj	= obj->fscache.debug_id;
 		    __entry->de		= de;
 		    __entry->inode	= inode;
 			   ),
 
-	    TP_printk("o=%p d=%p i=%p",
+	    TP_printk("o=%08x d=%p i=%p",
 		      __entry->obj, __entry->de, __entry->inode)
 	    );
 
@@ -126,18 +126,18 @@ TRACE_EVENT(cachefiles_mkdir,
 	    TP_ARGS(obj, de, ret),
 
 	    TP_STRUCT__entry(
-		    __field(struct cachefiles_object *,	obj	)
+		    __field(unsigned int,		obj	)
 		    __field(struct dentry *,		de	)
 		    __field(int,			ret	)
 			     ),
 
 	    TP_fast_assign(
-		    __entry->obj	= obj;
+		    __entry->obj	= obj->fscache.debug_id;
 		    __entry->de		= de;
 		    __entry->ret	= ret;
 			   ),
 
-	    TP_printk("o=%p d=%p r=%u",
+	    TP_printk("o=%08x d=%p r=%u",
 		      __entry->obj, __entry->de, __entry->ret)
 	    );
 
@@ -148,18 +148,18 @@ TRACE_EVENT(cachefiles_create,
 	    TP_ARGS(obj, de, ret),
 
 	    TP_STRUCT__entry(
-		    __field(struct cachefiles_object *,	obj	)
+		    __field(unsigned int,		obj	)
 		    __field(struct dentry *,		de	)
 		    __field(int,			ret	)
 			     ),
 
 	    TP_fast_assign(
-		    __entry->obj	= obj;
+		    __entry->obj	= obj->fscache.debug_id;
 		    __entry->de		= de;
 		    __entry->ret	= ret;
 			   ),
 
-	    TP_printk("o=%p d=%p r=%u",
+	    TP_printk("o=%08x d=%p r=%u",
 		      __entry->obj, __entry->de, __entry->ret)
 	    );
 
@@ -172,18 +172,18 @@ TRACE_EVENT(cachefiles_unlink,
 
 	    /* Note that obj may be NULL */
 	    TP_STRUCT__entry(
-		    __field(struct cachefiles_object *,	obj		)
+		    __field(unsigned int,		obj		)
 		    __field(struct dentry *,		de		)
 		    __field(enum fscache_why_object_killed, why		)
 			     ),
 
 	    TP_fast_assign(
-		    __entry->obj	= obj;
+		    __entry->obj	= obj->fscache.debug_id;
 		    __entry->de		= de;
 		    __entry->why	= why;
 			   ),
 
-	    TP_printk("o=%p d=%p w=%s",
+	    TP_printk("o=%08x d=%p w=%s",
 		      __entry->obj, __entry->de,
 		      __print_symbolic(__entry->why, cachefiles_obj_kill_traces))
 	    );
@@ -198,20 +198,20 @@ TRACE_EVENT(cachefiles_rename,
 
 	    /* Note that obj may be NULL */
 	    TP_STRUCT__entry(
-		    __field(struct cachefiles_object *,	obj		)
+		    __field(unsigned int,		obj		)
 		    __field(struct dentry *,		de		)
 		    __field(struct dentry *,		to		)
 		    __field(enum fscache_why_object_killed, why		)
 			     ),
 
 	    TP_fast_assign(
-		    __entry->obj	= obj;
+		    __entry->obj	= obj->fscache.debug_id;
 		    __entry->de		= de;
 		    __entry->to		= to;
 		    __entry->why	= why;
 			   ),
 
-	    TP_printk("o=%p d=%p t=%p w=%s",
+	    TP_printk("o=%08x d=%p t=%p w=%s",
 		      __entry->obj, __entry->de, __entry->to,
 		      __print_symbolic(__entry->why, cachefiles_obj_kill_traces))
 	    );
@@ -224,16 +224,16 @@ TRACE_EVENT(cachefiles_mark_active,
 
 	    /* Note that obj may be NULL */
 	    TP_STRUCT__entry(
-		    __field(struct cachefiles_object *,	obj		)
+		    __field(unsigned int,		obj		)
 		    __field(struct dentry *,		de		)
 			     ),
 
 	    TP_fast_assign(
-		    __entry->obj	= obj;
+		    __entry->obj	= obj->fscache.debug_id;
 		    __entry->de		= de;
 			   ),
 
-	    TP_printk("o=%p d=%p",
+	    TP_printk("o=%08x d=%p",
 		      __entry->obj, __entry->de)
 	    );
 
@@ -246,22 +246,22 @@ TRACE_EVENT(cachefiles_wait_active,
 
 	    /* Note that obj may be NULL */
 	    TP_STRUCT__entry(
-		    __field(struct cachefiles_object *,	obj		)
+		    __field(unsigned int,		obj		)
+		    __field(unsigned int,		xobj		)
 		    __field(struct dentry *,		de		)
-		    __field(struct cachefiles_object *,	xobj		)
 		    __field(u16,			flags		)
 		    __field(u16,			fsc_flags	)
 			     ),
 
 	    TP_fast_assign(
-		    __entry->obj	= obj;
+		    __entry->obj	= obj->fscache.debug_id;
 		    __entry->de		= de;
-		    __entry->xobj	= xobj;
+		    __entry->xobj	= xobj->fscache.debug_id;
 		    __entry->flags	= xobj->flags;
 		    __entry->fsc_flags	= xobj->fscache.flags;
 			   ),
 
-	    TP_printk("o=%p d=%p wo=%p wf=%x wff=%x",
+	    TP_printk("o=%08x d=%p wo=%08x wf=%x wff=%x",
 		      __entry->obj, __entry->de, __entry->xobj,
 		      __entry->flags, __entry->fsc_flags)
 	    );
@@ -275,18 +275,18 @@ TRACE_EVENT(cachefiles_mark_inactive,
 
 	    /* Note that obj may be NULL */
 	    TP_STRUCT__entry(
-		    __field(struct cachefiles_object *,	obj		)
+		    __field(unsigned int,		obj		)
 		    __field(struct dentry *,		de		)
 		    __field(struct inode *,		inode		)
 			     ),
 
 	    TP_fast_assign(
-		    __entry->obj	= obj;
+		    __entry->obj	= obj->fscache.debug_id;
 		    __entry->de		= de;
 		    __entry->inode	= inode;
 			   ),
 
-	    TP_printk("o=%p d=%p i=%p",
+	    TP_printk("o=%08x d=%p i=%p",
 		      __entry->obj, __entry->de, __entry->inode)
 	    );
 
@@ -299,18 +299,18 @@ TRACE_EVENT(cachefiles_mark_buried,
 
 	    /* Note that obj may be NULL */
 	    TP_STRUCT__entry(
-		    __field(struct cachefiles_object *,	obj		)
+		    __field(unsigned int,		obj		)
 		    __field(struct dentry *,		de		)
 		    __field(enum fscache_why_object_killed, why		)
 			     ),
 
 	    TP_fast_assign(
-		    __entry->obj	= obj;
+		    __entry->obj	= obj->fscache.debug_id;
 		    __entry->de		= de;
 		    __entry->why	= why;
 			   ),
 
-	    TP_printk("o=%p d=%p w=%s",
+	    TP_printk("o=%08x d=%p w=%s",
 		      __entry->obj, __entry->de,
 		      __print_symbolic(__entry->why, cachefiles_obj_kill_traces))
 	    );
diff --git a/include/trace/events/damon.h b/include/trace/events/damon.h
new file mode 100644
index 000000000000..2f422f4f1fb9
--- /dev/null
+++ b/include/trace/events/damon.h
@@ -0,0 +1,43 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#undef TRACE_SYSTEM
+#define TRACE_SYSTEM damon
+
+#if !defined(_TRACE_DAMON_H) || defined(TRACE_HEADER_MULTI_READ)
+#define _TRACE_DAMON_H
+
+#include <linux/damon.h>
+#include <linux/types.h>
+#include <linux/tracepoint.h>
+
+TRACE_EVENT(damon_aggregated,
+
+	TP_PROTO(struct damon_target *t, struct damon_region *r,
+		unsigned int nr_regions),
+
+	TP_ARGS(t, r, nr_regions),
+
+	TP_STRUCT__entry(
+		__field(unsigned long, target_id)
+		__field(unsigned int, nr_regions)
+		__field(unsigned long, start)
+		__field(unsigned long, end)
+		__field(unsigned int, nr_accesses)
+	),
+
+	TP_fast_assign(
+		__entry->target_id = t->id;
+		__entry->nr_regions = nr_regions;
+		__entry->start = r->ar.start;
+		__entry->end = r->ar.end;
+		__entry->nr_accesses = r->nr_accesses;
+	),
+
+	TP_printk("target_id=%lu nr_regions=%u %lu-%lu: %u",
+			__entry->target_id, __entry->nr_regions,
+			__entry->start, __entry->end, __entry->nr_accesses)
+);
+
+#endif /* _TRACE_DAMON_H */
+
+/* This part must be outside protection */
+#include <trace/define_trace.h>
diff --git a/include/trace/events/f2fs.h b/include/trace/events/f2fs.h
index 56b113e3cd6a..4e881d91c874 100644
--- a/include/trace/events/f2fs.h
+++ b/include/trace/events/f2fs.h
@@ -1818,6 +1818,7 @@ DEFINE_EVENT(f2fs_zip_end, f2fs_decompress_pages_end,
 	TP_ARGS(inode, cluster_idx, compressed_size, ret)
 );
 
+#ifdef CONFIG_F2FS_IOSTAT
 TRACE_EVENT(f2fs_iostat,
 
 	TP_PROTO(struct f2fs_sb_info *sbi, unsigned long long *iostat),
@@ -1894,6 +1895,102 @@ TRACE_EVENT(f2fs_iostat,
 		__entry->fs_cdrio, __entry->fs_nrio, __entry->fs_mrio)
 );
 
+#ifndef __F2FS_IOSTAT_LATENCY_TYPE
+#define __F2FS_IOSTAT_LATENCY_TYPE
+struct f2fs_iostat_latency {
+	unsigned int peak_lat;
+	unsigned int avg_lat;
+	unsigned int cnt;
+};
+#endif /* __F2FS_IOSTAT_LATENCY_TYPE */
+
+TRACE_EVENT(f2fs_iostat_latency,
+
+	TP_PROTO(struct f2fs_sb_info *sbi, struct f2fs_iostat_latency (*iostat_lat)[NR_PAGE_TYPE]),
+
+	TP_ARGS(sbi, iostat_lat),
+
+	TP_STRUCT__entry(
+		__field(dev_t,	dev)
+		__field(unsigned int,	d_rd_peak)
+		__field(unsigned int,	d_rd_avg)
+		__field(unsigned int,	d_rd_cnt)
+		__field(unsigned int,	n_rd_peak)
+		__field(unsigned int,	n_rd_avg)
+		__field(unsigned int,	n_rd_cnt)
+		__field(unsigned int,	m_rd_peak)
+		__field(unsigned int,	m_rd_avg)
+		__field(unsigned int,	m_rd_cnt)
+		__field(unsigned int,	d_wr_s_peak)
+		__field(unsigned int,	d_wr_s_avg)
+		__field(unsigned int,	d_wr_s_cnt)
+		__field(unsigned int,	n_wr_s_peak)
+		__field(unsigned int,	n_wr_s_avg)
+		__field(unsigned int,	n_wr_s_cnt)
+		__field(unsigned int,	m_wr_s_peak)
+		__field(unsigned int,	m_wr_s_avg)
+		__field(unsigned int,	m_wr_s_cnt)
+		__field(unsigned int,	d_wr_as_peak)
+		__field(unsigned int,	d_wr_as_avg)
+		__field(unsigned int,	d_wr_as_cnt)
+		__field(unsigned int,	n_wr_as_peak)
+		__field(unsigned int,	n_wr_as_avg)
+		__field(unsigned int,	n_wr_as_cnt)
+		__field(unsigned int,	m_wr_as_peak)
+		__field(unsigned int,	m_wr_as_avg)
+		__field(unsigned int,	m_wr_as_cnt)
+	),
+
+	TP_fast_assign(
+		__entry->dev		= sbi->sb->s_dev;
+		__entry->d_rd_peak	= iostat_lat[0][DATA].peak_lat;
+		__entry->d_rd_avg	= iostat_lat[0][DATA].avg_lat;
+		__entry->d_rd_cnt	= iostat_lat[0][DATA].cnt;
+		__entry->n_rd_peak	= iostat_lat[0][NODE].peak_lat;
+		__entry->n_rd_avg	= iostat_lat[0][NODE].avg_lat;
+		__entry->n_rd_cnt	= iostat_lat[0][NODE].cnt;
+		__entry->m_rd_peak	= iostat_lat[0][META].peak_lat;
+		__entry->m_rd_avg	= iostat_lat[0][META].avg_lat;
+		__entry->m_rd_cnt	= iostat_lat[0][META].cnt;
+		__entry->d_wr_s_peak	= iostat_lat[1][DATA].peak_lat;
+		__entry->d_wr_s_avg	= iostat_lat[1][DATA].avg_lat;
+		__entry->d_wr_s_cnt	= iostat_lat[1][DATA].cnt;
+		__entry->n_wr_s_peak	= iostat_lat[1][NODE].peak_lat;
+		__entry->n_wr_s_avg	= iostat_lat[1][NODE].avg_lat;
+		__entry->n_wr_s_cnt	= iostat_lat[1][NODE].cnt;
+		__entry->m_wr_s_peak	= iostat_lat[1][META].peak_lat;
+		__entry->m_wr_s_avg	= iostat_lat[1][META].avg_lat;
+		__entry->m_wr_s_cnt	= iostat_lat[1][META].cnt;
+		__entry->d_wr_as_peak	= iostat_lat[2][DATA].peak_lat;
+		__entry->d_wr_as_avg	= iostat_lat[2][DATA].avg_lat;
+		__entry->d_wr_as_cnt	= iostat_lat[2][DATA].cnt;
+		__entry->n_wr_as_peak	= iostat_lat[2][NODE].peak_lat;
+		__entry->n_wr_as_avg	= iostat_lat[2][NODE].avg_lat;
+		__entry->n_wr_as_cnt	= iostat_lat[2][NODE].cnt;
+		__entry->m_wr_as_peak	= iostat_lat[2][META].peak_lat;
+		__entry->m_wr_as_avg	= iostat_lat[2][META].avg_lat;
+		__entry->m_wr_as_cnt	= iostat_lat[2][META].cnt;
+	),
+
+	TP_printk("dev = (%d,%d), "
+		"iotype [peak lat.(ms)/avg lat.(ms)/count], "
+		"rd_data [%u/%u/%u], rd_node [%u/%u/%u], rd_meta [%u/%u/%u], "
+		"wr_sync_data [%u/%u/%u], wr_sync_node [%u/%u/%u], "
+		"wr_sync_meta [%u/%u/%u], wr_async_data [%u/%u/%u], "
+		"wr_async_node [%u/%u/%u], wr_async_meta [%u/%u/%u]",
+		show_dev(__entry->dev),
+		__entry->d_rd_peak, __entry->d_rd_avg, __entry->d_rd_cnt,
+		__entry->n_rd_peak, __entry->n_rd_avg, __entry->n_rd_cnt,
+		__entry->m_rd_peak, __entry->m_rd_avg, __entry->m_rd_cnt,
+		__entry->d_wr_s_peak, __entry->d_wr_s_avg, __entry->d_wr_s_cnt,
+		__entry->n_wr_s_peak, __entry->n_wr_s_avg, __entry->n_wr_s_cnt,
+		__entry->m_wr_s_peak, __entry->m_wr_s_avg, __entry->m_wr_s_cnt,
+		__entry->d_wr_as_peak, __entry->d_wr_as_avg, __entry->d_wr_as_cnt,
+		__entry->n_wr_as_peak, __entry->n_wr_as_avg, __entry->n_wr_as_cnt,
+		__entry->m_wr_as_peak, __entry->m_wr_as_avg, __entry->m_wr_as_cnt)
+);
+#endif
+
 TRACE_EVENT(f2fs_bmap,
 
 	TP_PROTO(struct inode *inode, sector_t lblock, sector_t pblock),
diff --git a/include/trace/events/fscache.h b/include/trace/events/fscache.h
index d16fe6ed78a2..446392f5ba83 100644
--- a/include/trace/events/fscache.h
+++ b/include/trace/events/fscache.h
@@ -160,37 +160,27 @@ fscache_cookie_traces;
 
 
 TRACE_EVENT(fscache_cookie,
-	    TP_PROTO(struct fscache_cookie *cookie,
-		     enum fscache_cookie_trace where,
-		     int usage),
+	    TP_PROTO(unsigned int cookie_debug_id,
+		     int ref,
+		     enum fscache_cookie_trace where),
 
-	    TP_ARGS(cookie, where, usage),
+	    TP_ARGS(cookie_debug_id, ref, where),
 
 	    TP_STRUCT__entry(
-		    __field(struct fscache_cookie *,	cookie		)
-		    __field(struct fscache_cookie *,	parent		)
+		    __field(unsigned int,		cookie		)
 		    __field(enum fscache_cookie_trace,	where		)
-		    __field(int,			usage		)
-		    __field(int,			n_children	)
-		    __field(int,			n_active	)
-		    __field(u8,				flags		)
+		    __field(int,			ref		)
 			     ),
 
 	    TP_fast_assign(
-		    __entry->cookie	= cookie;
-		    __entry->parent	= cookie->parent;
+		    __entry->cookie	= cookie_debug_id;
 		    __entry->where	= where;
-		    __entry->usage	= usage;
-		    __entry->n_children	= atomic_read(&cookie->n_children);
-		    __entry->n_active	= atomic_read(&cookie->n_active);
-		    __entry->flags	= cookie->flags;
+		    __entry->ref	= ref;
 			   ),
 
-	    TP_printk("%s c=%p u=%d p=%p Nc=%d Na=%d f=%02x",
+	    TP_printk("%s c=%08x r=%d",
 		      __print_symbolic(__entry->where, fscache_cookie_traces),
-		      __entry->cookie, __entry->usage,
-		      __entry->parent, __entry->n_children, __entry->n_active,
-		      __entry->flags)
+		      __entry->cookie, __entry->ref)
 	    );
 
 TRACE_EVENT(fscache_netfs,
@@ -199,17 +189,17 @@ TRACE_EVENT(fscache_netfs,
 	    TP_ARGS(netfs),
 
 	    TP_STRUCT__entry(
-		    __field(struct fscache_cookie *,	cookie		)
+		    __field(unsigned int,		cookie		)
 		    __array(char,			name, 8		)
 			     ),
 
 	    TP_fast_assign(
-		    __entry->cookie		= netfs->primary_index;
+		    __entry->cookie		= netfs->primary_index->debug_id;
 		    strncpy(__entry->name, netfs->name, 8);
 		    __entry->name[7]		= 0;
 			   ),
 
-	    TP_printk("c=%p n=%s",
+	    TP_printk("c=%08x n=%s",
 		      __entry->cookie, __entry->name)
 	    );
 
@@ -219,26 +209,26 @@ TRACE_EVENT(fscache_acquire,
 	    TP_ARGS(cookie),
 
 	    TP_STRUCT__entry(
-		    __field(struct fscache_cookie *,	cookie		)
-		    __field(struct fscache_cookie *,	parent		)
+		    __field(unsigned int,		cookie		)
+		    __field(unsigned int,		parent		)
 		    __array(char,			name, 8		)
-		    __field(int,			p_usage		)
+		    __field(int,			p_ref		)
 		    __field(int,			p_n_children	)
 		    __field(u8,				p_flags		)
 			     ),
 
 	    TP_fast_assign(
-		    __entry->cookie		= cookie;
-		    __entry->parent		= cookie->parent;
-		    __entry->p_usage		= atomic_read(&cookie->parent->usage);
+		    __entry->cookie		= cookie->debug_id;
+		    __entry->parent		= cookie->parent->debug_id;
+		    __entry->p_ref		= refcount_read(&cookie->parent->ref);
 		    __entry->p_n_children	= atomic_read(&cookie->parent->n_children);
 		    __entry->p_flags		= cookie->parent->flags;
 		    memcpy(__entry->name, cookie->def->name, 8);
 		    __entry->name[7]		= 0;
 			   ),
 
-	    TP_printk("c=%p p=%p pu=%d pc=%d pf=%02x n=%s",
-		      __entry->cookie, __entry->parent, __entry->p_usage,
+	    TP_printk("c=%08x p=%08x pr=%d pc=%d pf=%02x n=%s",
+		      __entry->cookie, __entry->parent, __entry->p_ref,
 		      __entry->p_n_children, __entry->p_flags, __entry->name)
 	    );
 
@@ -248,9 +238,9 @@ TRACE_EVENT(fscache_relinquish,
 	    TP_ARGS(cookie, retire),
 
 	    TP_STRUCT__entry(
-		    __field(struct fscache_cookie *,	cookie		)
-		    __field(struct fscache_cookie *,	parent		)
-		    __field(int,			usage		)
+		    __field(unsigned int,		cookie		)
+		    __field(unsigned int,		parent		)
+		    __field(int,			ref		)
 		    __field(int,			n_children	)
 		    __field(int,			n_active	)
 		    __field(u8,				flags		)
@@ -258,17 +248,17 @@ TRACE_EVENT(fscache_relinquish,
 			     ),
 
 	    TP_fast_assign(
-		    __entry->cookie	= cookie;
-		    __entry->parent	= cookie->parent;
-		    __entry->usage	= atomic_read(&cookie->usage);
+		    __entry->cookie	= cookie->debug_id;
+		    __entry->parent	= cookie->parent->debug_id;
+		    __entry->ref	= refcount_read(&cookie->ref);
 		    __entry->n_children	= atomic_read(&cookie->n_children);
 		    __entry->n_active	= atomic_read(&cookie->n_active);
 		    __entry->flags	= cookie->flags;
 		    __entry->retire	= retire;
 			   ),
 
-	    TP_printk("c=%p u=%d p=%p Nc=%d Na=%d f=%02x r=%u",
-		      __entry->cookie, __entry->usage,
+	    TP_printk("c=%08x r=%d p=%08x Nc=%d Na=%d f=%02x r=%u",
+		      __entry->cookie, __entry->ref,
 		      __entry->parent, __entry->n_children, __entry->n_active,
 		      __entry->flags, __entry->retire)
 	    );
@@ -279,23 +269,23 @@ TRACE_EVENT(fscache_enable,
 	    TP_ARGS(cookie),
 
 	    TP_STRUCT__entry(
-		    __field(struct fscache_cookie *,	cookie		)
-		    __field(int,			usage		)
+		    __field(unsigned int,		cookie		)
+		    __field(int,			ref		)
 		    __field(int,			n_children	)
 		    __field(int,			n_active	)
 		    __field(u8,				flags		)
 			     ),
 
 	    TP_fast_assign(
-		    __entry->cookie	= cookie;
-		    __entry->usage	= atomic_read(&cookie->usage);
+		    __entry->cookie	= cookie->debug_id;
+		    __entry->ref	= refcount_read(&cookie->ref);
 		    __entry->n_children	= atomic_read(&cookie->n_children);
 		    __entry->n_active	= atomic_read(&cookie->n_active);
 		    __entry->flags	= cookie->flags;
 			   ),
 
-	    TP_printk("c=%p u=%d Nc=%d Na=%d f=%02x",
-		      __entry->cookie, __entry->usage,
+	    TP_printk("c=%08x r=%d Nc=%d Na=%d f=%02x",
+		      __entry->cookie, __entry->ref,
 		      __entry->n_children, __entry->n_active, __entry->flags)
 	    );
 
@@ -305,23 +295,23 @@ TRACE_EVENT(fscache_disable,
 	    TP_ARGS(cookie),
 
 	    TP_STRUCT__entry(
-		    __field(struct fscache_cookie *,	cookie		)
-		    __field(int,			usage		)
+		    __field(unsigned int,		cookie		)
+		    __field(int,			ref		)
 		    __field(int,			n_children	)
 		    __field(int,			n_active	)
 		    __field(u8,				flags		)
 			     ),
 
 	    TP_fast_assign(
-		    __entry->cookie	= cookie;
-		    __entry->usage	= atomic_read(&cookie->usage);
+		    __entry->cookie	= cookie->debug_id;
+		    __entry->ref	= refcount_read(&cookie->ref);
 		    __entry->n_children	= atomic_read(&cookie->n_children);
 		    __entry->n_active	= atomic_read(&cookie->n_active);
 		    __entry->flags	= cookie->flags;
 			   ),
 
-	    TP_printk("c=%p u=%d Nc=%d Na=%d f=%02x",
-		      __entry->cookie, __entry->usage,
+	    TP_printk("c=%08x r=%d Nc=%d Na=%d f=%02x",
+		      __entry->cookie, __entry->ref,
 		      __entry->n_children, __entry->n_active, __entry->flags)
 	    );
 
@@ -333,8 +323,8 @@ TRACE_EVENT(fscache_osm,
 	    TP_ARGS(object, state, wait, oob, event_num),
 
 	    TP_STRUCT__entry(
-		    __field(struct fscache_cookie *,	cookie		)
-		    __field(struct fscache_object *,	object		)
+		    __field(unsigned int,		cookie		)
+		    __field(unsigned int,		object		)
 		    __array(char,			state, 8	)
 		    __field(bool,			wait		)
 		    __field(bool,			oob		)
@@ -342,15 +332,15 @@ TRACE_EVENT(fscache_osm,
 			     ),
 
 	    TP_fast_assign(
-		    __entry->cookie		= object->cookie;
-		    __entry->object		= object;
+		    __entry->cookie		= object->cookie->debug_id;
+		    __entry->object		= object->debug_id;
 		    __entry->wait		= wait;
 		    __entry->oob		= oob;
 		    __entry->event_num		= event_num;
 		    memcpy(__entry->state, state->short_name, 8);
 			   ),
 
-	    TP_printk("c=%p o=%p %s %s%sev=%d",
+	    TP_printk("c=%08x o=%08d %s %s%sev=%d",
 		      __entry->cookie,
 		      __entry->object,
 		      __entry->state,
@@ -370,18 +360,18 @@ TRACE_EVENT(fscache_page,
 	    TP_ARGS(cookie, page, why),
 
 	    TP_STRUCT__entry(
-		    __field(struct fscache_cookie *,	cookie		)
+		    __field(unsigned int,		cookie		)
 		    __field(pgoff_t,			page		)
 		    __field(enum fscache_page_trace,	why		)
 			     ),
 
 	    TP_fast_assign(
-		    __entry->cookie		= cookie;
+		    __entry->cookie		= cookie->debug_id;
 		    __entry->page		= page->index;
 		    __entry->why		= why;
 			   ),
 
-	    TP_printk("c=%p %s pg=%lx",
+	    TP_printk("c=%08x %s pg=%lx",
 		      __entry->cookie,
 		      __print_symbolic(__entry->why, fscache_page_traces),
 		      __entry->page)
@@ -394,20 +384,20 @@ TRACE_EVENT(fscache_check_page,
 	    TP_ARGS(cookie, page, val, n),
 
 	    TP_STRUCT__entry(
-		    __field(struct fscache_cookie *,	cookie		)
+		    __field(unsigned int,		cookie		)
 		    __field(void *,			page		)
 		    __field(void *,			val		)
 		    __field(int,			n		)
 			     ),
 
 	    TP_fast_assign(
-		    __entry->cookie		= cookie;
+		    __entry->cookie		= cookie->debug_id;
 		    __entry->page		= page;
 		    __entry->val		= val;
 		    __entry->n			= n;
 			   ),
 
-	    TP_printk("c=%p pg=%p val=%p n=%d",
+	    TP_printk("c=%08x pg=%p val=%p n=%d",
 		      __entry->cookie, __entry->page, __entry->val, __entry->n)
 	    );
 
@@ -417,14 +407,14 @@ TRACE_EVENT(fscache_wake_cookie,
 	    TP_ARGS(cookie),
 
 	    TP_STRUCT__entry(
-		    __field(struct fscache_cookie *,	cookie		)
+		    __field(unsigned int,		cookie		)
 			     ),
 
 	    TP_fast_assign(
-		    __entry->cookie		= cookie;
+		    __entry->cookie		= cookie->debug_id;
 			   ),
 
-	    TP_printk("c=%p", __entry->cookie)
+	    TP_printk("c=%08x", __entry->cookie)
 	    );
 
 TRACE_EVENT(fscache_op,
@@ -434,18 +424,18 @@ TRACE_EVENT(fscache_op,
 	    TP_ARGS(cookie, op, why),
 
 	    TP_STRUCT__entry(
-		    __field(struct fscache_cookie *,	cookie		)
-		    __field(struct fscache_operation *,	op		)
+		    __field(unsigned int,		cookie		)
+		    __field(unsigned int,		op		)
 		    __field(enum fscache_op_trace,	why		)
 			     ),
 
 	    TP_fast_assign(
-		    __entry->cookie		= cookie;
-		    __entry->op			= op;
+		    __entry->cookie		= cookie ? cookie->debug_id : 0;
+		    __entry->op			= op->debug_id;
 		    __entry->why		= why;
 			   ),
 
-	    TP_printk("c=%p op=%p %s",
+	    TP_printk("c=%08x op=%08x %s",
 		      __entry->cookie, __entry->op,
 		      __print_symbolic(__entry->why, fscache_op_traces))
 	    );
@@ -457,20 +447,20 @@ TRACE_EVENT(fscache_page_op,
 	    TP_ARGS(cookie, page, op, what),
 
 	    TP_STRUCT__entry(
-		    __field(struct fscache_cookie *,	cookie		)
+		    __field(unsigned int,		cookie		)
+		    __field(unsigned int,		op		)
 		    __field(pgoff_t,			page		)
-		    __field(struct fscache_operation *,	op		)
 		    __field(enum fscache_page_op_trace,	what		)
 			     ),
 
 	    TP_fast_assign(
-		    __entry->cookie		= cookie;
+		    __entry->cookie		= cookie->debug_id;
 		    __entry->page		= page ? page->index : 0;
-		    __entry->op			= op;
+		    __entry->op			= op->debug_id;
 		    __entry->what		= what;
 			   ),
 
-	    TP_printk("c=%p %s pg=%lx op=%p",
+	    TP_printk("c=%08x %s pg=%lx op=%08x",
 		      __entry->cookie,
 		      __print_symbolic(__entry->what, fscache_page_op_traces),
 		      __entry->page, __entry->op)
@@ -483,20 +473,20 @@ TRACE_EVENT(fscache_wrote_page,
 	    TP_ARGS(cookie, page, op, ret),
 
 	    TP_STRUCT__entry(
-		    __field(struct fscache_cookie *,	cookie		)
+		    __field(unsigned int,		cookie		)
+		    __field(unsigned int,		op		)
 		    __field(pgoff_t,			page		)
-		    __field(struct fscache_operation *,	op		)
 		    __field(int,			ret		)
 			     ),
 
 	    TP_fast_assign(
-		    __entry->cookie		= cookie;
+		    __entry->cookie		= cookie->debug_id;
 		    __entry->page		= page->index;
-		    __entry->op			= op;
+		    __entry->op			= op->debug_id;
 		    __entry->ret		= ret;
 			   ),
 
-	    TP_printk("c=%p pg=%lx op=%p ret=%d",
+	    TP_printk("c=%08x pg=%lx op=%08x ret=%d",
 		      __entry->cookie, __entry->page, __entry->op, __entry->ret)
 	    );
 
@@ -507,22 +497,22 @@ TRACE_EVENT(fscache_gang_lookup,
 	    TP_ARGS(cookie, op, results, n, store_limit),
 
 	    TP_STRUCT__entry(
-		    __field(struct fscache_cookie *,	cookie		)
-		    __field(struct fscache_operation *,	op		)
+		    __field(unsigned int,		cookie		)
+		    __field(unsigned int,		op		)
 		    __field(pgoff_t,			results0	)
 		    __field(int,			n		)
 		    __field(pgoff_t,			store_limit	)
 			     ),
 
 	    TP_fast_assign(
-		    __entry->cookie		= cookie;
-		    __entry->op			= op;
+		    __entry->cookie		= cookie->debug_id;
+		    __entry->op			= op->debug_id;
 		    __entry->results0		= results[0] ? ((struct page *)results[0])->index : (pgoff_t)-1;
 		    __entry->n			= n;
 		    __entry->store_limit	= store_limit;
 			   ),
 
-	    TP_printk("c=%p op=%p r0=%lx n=%d sl=%lx",
+	    TP_printk("c=%08x op=%08x r0=%lx n=%d sl=%lx",
 		      __entry->cookie, __entry->op, __entry->results0, __entry->n,
 		      __entry->store_limit)
 	    );
diff --git a/include/trace/events/io_uring.h b/include/trace/events/io_uring.h
index e4e44a2b4aa9..0dd30de00e5b 100644
--- a/include/trace/events/io_uring.h
+++ b/include/trace/events/io_uring.h
@@ -295,14 +295,14 @@ TRACE_EVENT(io_uring_fail_link,
  */
 TRACE_EVENT(io_uring_complete,
 
-	TP_PROTO(void *ctx, u64 user_data, long res, unsigned cflags),
+	TP_PROTO(void *ctx, u64 user_data, int res, unsigned cflags),
 
 	TP_ARGS(ctx, user_data, res, cflags),
 
 	TP_STRUCT__entry (
 		__field(  void *,	ctx		)
 		__field(  u64,		user_data	)
-		__field(  long,		res		)
+		__field(  int,		res		)
 		__field(  unsigned,	cflags		)
 	),
 
@@ -313,7 +313,7 @@ TRACE_EVENT(io_uring_complete,
 		__entry->cflags		= cflags;
 	),
 
-	TP_printk("ring %p, user_data 0x%llx, result %ld, cflags %x",
+	TP_printk("ring %p, user_data 0x%llx, result %d, cflags %x",
 			  __entry->ctx, (unsigned long long)__entry->user_data,
 			  __entry->res, __entry->cflags)
 );
diff --git a/include/trace/events/migrate.h b/include/trace/events/migrate.h
index 9fb2a3bbcdfb..779f3fad9ecd 100644
--- a/include/trace/events/migrate.h
+++ b/include/trace/events/migrate.h
@@ -21,7 +21,8 @@
 	EM( MR_MEMPOLICY_MBIND,	"mempolicy_mbind")		\
 	EM( MR_NUMA_MISPLACED,	"numa_misplaced")		\
 	EM( MR_CONTIG_RANGE,	"contig_range")			\
-	EMe(MR_LONGTERM_PIN,	"longterm_pin")
+	EM( MR_LONGTERM_PIN,	"longterm_pin")			\
+	EMe(MR_DEMOTION,	"demotion")
 
 /*
  * First define the enums in the above macros to be exported to userspace
diff --git a/include/trace/events/mmflags.h b/include/trace/events/mmflags.h
index f160484afc5c..116ed4d5d0f8 100644
--- a/include/trace/events/mmflags.h
+++ b/include/trace/events/mmflags.h
@@ -75,7 +75,7 @@
 #define IF_HAVE_PG_HWPOISON(flag,string)
 #endif
 
-#if defined(CONFIG_IDLE_PAGE_TRACKING) && defined(CONFIG_64BIT)
+#if defined(CONFIG_PAGE_IDLE_FLAG) && defined(CONFIG_64BIT)
 #define IF_HAVE_PG_IDLE(flag,string) ,{1UL << flag, string}
 #else
 #define IF_HAVE_PG_IDLE(flag,string)
@@ -165,7 +165,6 @@ IF_HAVE_PG_SKIP_KASAN_POISON(PG_skip_kasan_poison, "skip_kasan_poison")
 	{VM_UFFD_MISSING,		"uffd_missing"	},		\
 IF_HAVE_UFFD_MINOR(VM_UFFD_MINOR,	"uffd_minor"	)		\
 	{VM_PFNMAP,			"pfnmap"	},		\
-	{VM_DENYWRITE,			"denywrite"	},		\
 	{VM_UFFD_WP,			"uffd_wp"	},		\
 	{VM_LOCKED,			"locked"	},		\
 	{VM_IO,				"io"		},		\
diff --git a/include/trace/events/netfs.h b/include/trace/events/netfs.h
index de1c64635e42..4d470bffd9f1 100644
--- a/include/trace/events/netfs.h
+++ b/include/trace/events/netfs.h
@@ -139,7 +139,7 @@ TRACE_EVENT(netfs_read,
 
 	    TP_fast_assign(
 		    __entry->rreq	= rreq->debug_id;
-		    __entry->cookie	= rreq->cookie_debug_id;
+		    __entry->cookie	= rreq->cache_resources.debug_id;
 		    __entry->start	= start;
 		    __entry->len	= len;
 		    __entry->what	= what;
diff --git a/include/trace/events/page_ref.h b/include/trace/events/page_ref.h
index 5d2ea93956ce..8a99c1cd417b 100644
--- a/include/trace/events/page_ref.h
+++ b/include/trace/events/page_ref.h
@@ -38,7 +38,7 @@ DECLARE_EVENT_CLASS(page_ref_mod_template,
 
 	TP_printk("pfn=0x%lx flags=%s count=%d mapcount=%d mapping=%p mt=%d val=%d",
 		__entry->pfn,
-		show_page_flags(__entry->flags & ((1UL << NR_PAGEFLAGS) - 1)),
+		show_page_flags(__entry->flags & PAGEFLAGS_MASK),
 		__entry->count,
 		__entry->mapcount, __entry->mapping, __entry->mt,
 		__entry->val)
@@ -88,7 +88,7 @@ DECLARE_EVENT_CLASS(page_ref_mod_and_test_template,
 
 	TP_printk("pfn=0x%lx flags=%s count=%d mapcount=%d mapping=%p mt=%d val=%d ret=%d",
 		__entry->pfn,
-		show_page_flags(__entry->flags & ((1UL << NR_PAGEFLAGS) - 1)),
+		show_page_flags(__entry->flags & PAGEFLAGS_MASK),
 		__entry->count,
 		__entry->mapcount, __entry->mapping, __entry->mt,
 		__entry->val, __entry->ret)
diff --git a/include/trace/events/rpcrdma.h b/include/trace/events/rpcrdma.h
index bd55908c1bef..de4195499592 100644
--- a/include/trace/events/rpcrdma.h
+++ b/include/trace/events/rpcrdma.h
@@ -793,6 +793,39 @@ TRACE_EVENT(xprtrdma_post_send,
 	)
 );
 
+TRACE_EVENT(xprtrdma_post_send_err,
+	TP_PROTO(
+		const struct rpcrdma_xprt *r_xprt,
+		const struct rpcrdma_req *req,
+		int rc
+	),
+
+	TP_ARGS(r_xprt, req, rc),
+
+	TP_STRUCT__entry(
+		__field(u32, cq_id)
+		__field(unsigned int, task_id)
+		__field(unsigned int, client_id)
+		__field(int, rc)
+	),
+
+	TP_fast_assign(
+		const struct rpc_rqst *rqst = &req->rl_slot;
+		const struct rpcrdma_ep *ep = r_xprt->rx_ep;
+
+		__entry->cq_id = ep ? ep->re_attr.recv_cq->res.id : 0;
+		__entry->task_id = rqst->rq_task->tk_pid;
+		__entry->client_id = rqst->rq_task->tk_client ?
+				     rqst->rq_task->tk_client->cl_clid : -1;
+		__entry->rc = rc;
+	),
+
+	TP_printk("task:%u@%u cq.id=%u rc=%d",
+		__entry->task_id, __entry->client_id,
+		__entry->cq_id, __entry->rc
+	)
+);
+
 TRACE_EVENT(xprtrdma_post_recv,
 	TP_PROTO(
 		const struct rpcrdma_rep *rep
@@ -818,16 +851,14 @@ TRACE_EVENT(xprtrdma_post_recv,
 TRACE_EVENT(xprtrdma_post_recvs,
 	TP_PROTO(
 		const struct rpcrdma_xprt *r_xprt,
-		unsigned int count,
-		int status
+		unsigned int count
 	),
 
-	TP_ARGS(r_xprt, count, status),
+	TP_ARGS(r_xprt, count),
 
 	TP_STRUCT__entry(
 		__field(u32, cq_id)
 		__field(unsigned int, count)
-		__field(int, status)
 		__field(int, posted)
 		__string(addr, rpcrdma_addrstr(r_xprt))
 		__string(port, rpcrdma_portstr(r_xprt))
@@ -838,15 +869,44 @@ TRACE_EVENT(xprtrdma_post_recvs,
 
 		__entry->cq_id = ep->re_attr.recv_cq->res.id;
 		__entry->count = count;
-		__entry->status = status;
 		__entry->posted = ep->re_receive_count;
 		__assign_str(addr, rpcrdma_addrstr(r_xprt));
 		__assign_str(port, rpcrdma_portstr(r_xprt));
 	),
 
-	TP_printk("peer=[%s]:%s cq.id=%d %u new recvs, %d active (rc %d)",
+	TP_printk("peer=[%s]:%s cq.id=%d %u new recvs, %d active",
+		__get_str(addr), __get_str(port), __entry->cq_id,
+		__entry->count, __entry->posted
+	)
+);
+
+TRACE_EVENT(xprtrdma_post_recvs_err,
+	TP_PROTO(
+		const struct rpcrdma_xprt *r_xprt,
+		int status
+	),
+
+	TP_ARGS(r_xprt, status),
+
+	TP_STRUCT__entry(
+		__field(u32, cq_id)
+		__field(int, status)
+		__string(addr, rpcrdma_addrstr(r_xprt))
+		__string(port, rpcrdma_portstr(r_xprt))
+	),
+
+	TP_fast_assign(
+		const struct rpcrdma_ep *ep = r_xprt->rx_ep;
+
+		__entry->cq_id = ep->re_attr.recv_cq->res.id;
+		__entry->status = status;
+		__assign_str(addr, rpcrdma_addrstr(r_xprt));
+		__assign_str(port, rpcrdma_portstr(r_xprt));
+	),
+
+	TP_printk("peer=[%s]:%s cq.id=%d rc=%d",
 		__get_str(addr), __get_str(port), __entry->cq_id,
-		__entry->count, __entry->posted, __entry->status
+		__entry->status
 	)
 );
 
diff --git a/include/trace/events/sunrpc.h b/include/trace/events/sunrpc.h
index d323f5a049c8..2d04eb96d418 100644
--- a/include/trace/events/sunrpc.h
+++ b/include/trace/events/sunrpc.h
@@ -295,25 +295,11 @@ TRACE_EVENT(rpc_request,
 		)
 );
 
-TRACE_DEFINE_ENUM(RPC_TASK_ASYNC);
-TRACE_DEFINE_ENUM(RPC_TASK_SWAPPER);
-TRACE_DEFINE_ENUM(RPC_TASK_NULLCREDS);
-TRACE_DEFINE_ENUM(RPC_CALL_MAJORSEEN);
-TRACE_DEFINE_ENUM(RPC_TASK_ROOTCREDS);
-TRACE_DEFINE_ENUM(RPC_TASK_DYNAMIC);
-TRACE_DEFINE_ENUM(RPC_TASK_NO_ROUND_ROBIN);
-TRACE_DEFINE_ENUM(RPC_TASK_SOFT);
-TRACE_DEFINE_ENUM(RPC_TASK_SOFTCONN);
-TRACE_DEFINE_ENUM(RPC_TASK_SENT);
-TRACE_DEFINE_ENUM(RPC_TASK_TIMEOUT);
-TRACE_DEFINE_ENUM(RPC_TASK_NOCONNECT);
-TRACE_DEFINE_ENUM(RPC_TASK_NO_RETRANS_TIMEOUT);
-TRACE_DEFINE_ENUM(RPC_TASK_CRED_NOREF);
-
 #define rpc_show_task_flags(flags)					\
 	__print_flags(flags, "|",					\
 		{ RPC_TASK_ASYNC, "ASYNC" },				\
 		{ RPC_TASK_SWAPPER, "SWAPPER" },			\
+		{ RPC_TASK_MOVEABLE, "MOVEABLE" },			\
 		{ RPC_TASK_NULLCREDS, "NULLCREDS" },			\
 		{ RPC_CALL_MAJORSEEN, "MAJORSEEN" },			\
 		{ RPC_TASK_ROOTCREDS, "ROOTCREDS" },			\
@@ -327,14 +313,6 @@ TRACE_DEFINE_ENUM(RPC_TASK_CRED_NOREF);
 		{ RPC_TASK_NO_RETRANS_TIMEOUT, "NORTO" },		\
 		{ RPC_TASK_CRED_NOREF, "CRED_NOREF" })
 
-TRACE_DEFINE_ENUM(RPC_TASK_RUNNING);
-TRACE_DEFINE_ENUM(RPC_TASK_QUEUED);
-TRACE_DEFINE_ENUM(RPC_TASK_ACTIVE);
-TRACE_DEFINE_ENUM(RPC_TASK_NEED_XMIT);
-TRACE_DEFINE_ENUM(RPC_TASK_NEED_RECV);
-TRACE_DEFINE_ENUM(RPC_TASK_MSG_PIN_WAIT);
-TRACE_DEFINE_ENUM(RPC_TASK_SIGNALLED);
-
 #define rpc_show_runstate(flags)					\
 	__print_flags(flags, "|",					\
 		{ (1UL << RPC_TASK_RUNNING), "RUNNING" },		\
@@ -945,17 +923,6 @@ TRACE_EVENT(rpc_socket_nospace,
 	)
 );
 
-TRACE_DEFINE_ENUM(XPRT_LOCKED);
-TRACE_DEFINE_ENUM(XPRT_CONNECTED);
-TRACE_DEFINE_ENUM(XPRT_CONNECTING);
-TRACE_DEFINE_ENUM(XPRT_CLOSE_WAIT);
-TRACE_DEFINE_ENUM(XPRT_BOUND);
-TRACE_DEFINE_ENUM(XPRT_BINDING);
-TRACE_DEFINE_ENUM(XPRT_CLOSING);
-TRACE_DEFINE_ENUM(XPRT_CONGESTED);
-TRACE_DEFINE_ENUM(XPRT_CWND_WAIT);
-TRACE_DEFINE_ENUM(XPRT_WRITE_SPACE);
-
 #define rpc_show_xprt_state(x)						\
 	__print_flags(x, "|",						\
 		{ (1UL << XPRT_LOCKED),		"LOCKED"},		\
@@ -965,6 +932,8 @@ TRACE_DEFINE_ENUM(XPRT_WRITE_SPACE);
 		{ (1UL << XPRT_BOUND),		"BOUND"},		\
 		{ (1UL << XPRT_BINDING),	"BINDING"},		\
 		{ (1UL << XPRT_CLOSING),	"CLOSING"},		\
+		{ (1UL << XPRT_OFFLINE),	"OFFLINE"},		\
+		{ (1UL << XPRT_REMOVE),		"REMOVE"},		\
 		{ (1UL << XPRT_CONGESTED),	"CONGESTED"},		\
 		{ (1UL << XPRT_CWND_WAIT),	"CWND_WAIT"},		\
 		{ (1UL << XPRT_WRITE_SPACE),	"WRITE_SPACE"})
@@ -1092,10 +1061,10 @@ TRACE_EVENT(xprt_retransmit,
 		__field(u32, xid)
 		__field(int, ntrans)
 		__field(int, version)
+		__field(unsigned long, timeout)
 		__string(progname,
 			 rqst->rq_task->tk_client->cl_program->name)
-		__string(procedure,
-			 rqst->rq_task->tk_msg.rpc_proc->p_name)
+		__string(procname, rpc_proc_name(rqst->rq_task))
 	),
 
 	TP_fast_assign(
@@ -1106,17 +1075,19 @@ TRACE_EVENT(xprt_retransmit,
 			task->tk_client->cl_clid : -1;
 		__entry->xid = be32_to_cpu(rqst->rq_xid);
 		__entry->ntrans = rqst->rq_ntrans;
+		__entry->timeout = task->tk_timeout;
 		__assign_str(progname,
 			     task->tk_client->cl_program->name);
 		__entry->version = task->tk_client->cl_vers;
-		__assign_str(procedure, task->tk_msg.rpc_proc->p_name);
+		__assign_str(procname, rpc_proc_name(task));
 	),
 
 	TP_printk(
-		"task:%u@%u xid=0x%08x %sv%d %s ntrans=%d",
+		"task:%u@%u xid=0x%08x %sv%d %s ntrans=%d timeout=%lu",
 		__entry->task_id, __entry->client_id, __entry->xid,
-		__get_str(progname), __entry->version, __get_str(procedure),
-		__entry->ntrans)
+		__get_str(progname), __entry->version, __get_str(procname),
+		__entry->ntrans, __entry->timeout
+	)
 );
 
 TRACE_EVENT(xprt_ping,
@@ -1568,8 +1539,7 @@ DEFINE_SVCXDRBUF_EVENT(sendto);
 	svc_rqst_flag(SPLICE_OK)					\
 	svc_rqst_flag(VICTIM)						\
 	svc_rqst_flag(BUSY)						\
-	svc_rqst_flag(DATA)						\
-	svc_rqst_flag_end(AUTHERR)
+	svc_rqst_flag_end(DATA)
 
 #undef svc_rqst_flag
 #undef svc_rqst_flag_end
@@ -1611,9 +1581,9 @@ TRACE_DEFINE_ENUM(SVC_COMPLETE);
 		{ SVC_COMPLETE,	"SVC_COMPLETE" })
 
 TRACE_EVENT(svc_authenticate,
-	TP_PROTO(const struct svc_rqst *rqst, int auth_res, __be32 auth_stat),
+	TP_PROTO(const struct svc_rqst *rqst, int auth_res),
 
-	TP_ARGS(rqst, auth_res, auth_stat),
+	TP_ARGS(rqst, auth_res),
 
 	TP_STRUCT__entry(
 		__field(u32, xid)
@@ -1624,7 +1594,7 @@ TRACE_EVENT(svc_authenticate,
 	TP_fast_assign(
 		__entry->xid = be32_to_cpu(rqst->rq_xid);
 		__entry->svc_status = auth_res;
-		__entry->auth_stat = be32_to_cpu(auth_stat);
+		__entry->auth_stat = be32_to_cpu(rqst->rq_auth_stat);
 	),
 
 	TP_printk("xid=0x%08x auth_res=%s auth_stat=%s",
diff --git a/include/uapi/asm-generic/unistd.h b/include/uapi/asm-generic/unistd.h
index a9d6fcd95f42..1c5fb86d455a 100644
--- a/include/uapi/asm-generic/unistd.h
+++ b/include/uapi/asm-generic/unistd.h
@@ -673,15 +673,15 @@ __SYSCALL(__NR_madvise, sys_madvise)
 #define __NR_remap_file_pages 234
 __SYSCALL(__NR_remap_file_pages, sys_remap_file_pages)
 #define __NR_mbind 235
-__SC_COMP(__NR_mbind, sys_mbind, compat_sys_mbind)
+__SYSCALL(__NR_mbind, sys_mbind)
 #define __NR_get_mempolicy 236
-__SC_COMP(__NR_get_mempolicy, sys_get_mempolicy, compat_sys_get_mempolicy)
+__SYSCALL(__NR_get_mempolicy, sys_get_mempolicy)
 #define __NR_set_mempolicy 237
-__SC_COMP(__NR_set_mempolicy, sys_set_mempolicy, compat_sys_set_mempolicy)
+__SYSCALL(__NR_set_mempolicy, sys_set_mempolicy)
 #define __NR_migrate_pages 238
-__SC_COMP(__NR_migrate_pages, sys_migrate_pages, compat_sys_migrate_pages)
+__SYSCALL(__NR_migrate_pages, sys_migrate_pages)
 #define __NR_move_pages 239
-__SC_COMP(__NR_move_pages, sys_move_pages, compat_sys_move_pages)
+__SYSCALL(__NR_move_pages, sys_move_pages)
 #endif
 
 #define __NR_rt_tgsigqueueinfo 240
@@ -877,9 +877,11 @@ __SYSCALL(__NR_landlock_restrict_self, sys_landlock_restrict_self)
 #define __NR_memfd_secret 447
 __SYSCALL(__NR_memfd_secret, sys_memfd_secret)
 #endif
+#define __NR_process_mrelease 448
+__SYSCALL(__NR_process_mrelease, sys_process_mrelease)
 
 #undef __NR_syscalls
-#define __NR_syscalls 448
+#define __NR_syscalls 449
 
 /*
  * 32 bit systems traditionally used different
diff --git a/include/uapi/linux/cxl_mem.h b/include/uapi/linux/cxl_mem.h
index f6e8a005b113..8d206f27bb6d 100644
--- a/include/uapi/linux/cxl_mem.h
+++ b/include/uapi/linux/cxl_mem.h
@@ -50,7 +50,7 @@ enum { CXL_CMDS };
 #define ___C(a, b) { b }
 static const struct {
 	const char *name;
-} cxl_command_names[] = { CXL_CMDS };
+} cxl_command_names[] __attribute__((__unused__)) = { CXL_CMDS };
 
 /*
  * Here's how this actually breaks out:
diff --git a/include/uapi/linux/idxd.h b/include/uapi/linux/idxd.h
index edc346a77c91..c750eac09fc9 100644
--- a/include/uapi/linux/idxd.h
+++ b/include/uapi/linux/idxd.h
@@ -9,6 +9,30 @@
 #include <stdint.h>
 #endif
 
+/* Driver command error status */
+enum idxd_scmd_stat {
+	IDXD_SCMD_DEV_ENABLED = 0x80000010,
+	IDXD_SCMD_DEV_NOT_ENABLED = 0x80000020,
+	IDXD_SCMD_WQ_ENABLED = 0x80000021,
+	IDXD_SCMD_DEV_DMA_ERR = 0x80020000,
+	IDXD_SCMD_WQ_NO_GRP = 0x80030000,
+	IDXD_SCMD_WQ_NO_NAME = 0x80040000,
+	IDXD_SCMD_WQ_NO_SVM = 0x80050000,
+	IDXD_SCMD_WQ_NO_THRESH = 0x80060000,
+	IDXD_SCMD_WQ_PORTAL_ERR = 0x80070000,
+	IDXD_SCMD_WQ_RES_ALLOC_ERR = 0x80080000,
+	IDXD_SCMD_PERCPU_ERR = 0x80090000,
+	IDXD_SCMD_DMA_CHAN_ERR = 0x800a0000,
+	IDXD_SCMD_CDEV_ERR = 0x800b0000,
+	IDXD_SCMD_WQ_NO_SWQ_SUPPORT = 0x800c0000,
+	IDXD_SCMD_WQ_NONE_CONFIGURED = 0x800d0000,
+	IDXD_SCMD_WQ_NO_SIZE = 0x800e0000,
+	IDXD_SCMD_WQ_NO_PRIV = 0x800f0000,
+};
+
+#define IDXD_SCMD_SOFTERR_MASK	0x80000000
+#define IDXD_SCMD_SOFTERR_SHIFT	16
+
 /* Descriptor flags */
 #define IDXD_OP_FLAG_FENCE	0x0001
 #define IDXD_OP_FLAG_BOF	0x0002
diff --git a/include/uapi/linux/kvm.h b/include/uapi/linux/kvm.h
index d9e4aabcb31a..a067410ebea5 100644
--- a/include/uapi/linux/kvm.h
+++ b/include/uapi/linux/kvm.h
@@ -1965,7 +1965,9 @@ struct kvm_stats_header {
 #define KVM_STATS_TYPE_CUMULATIVE	(0x0 << KVM_STATS_TYPE_SHIFT)
 #define KVM_STATS_TYPE_INSTANT		(0x1 << KVM_STATS_TYPE_SHIFT)
 #define KVM_STATS_TYPE_PEAK		(0x2 << KVM_STATS_TYPE_SHIFT)
-#define KVM_STATS_TYPE_MAX		KVM_STATS_TYPE_PEAK
+#define KVM_STATS_TYPE_LINEAR_HIST	(0x3 << KVM_STATS_TYPE_SHIFT)
+#define KVM_STATS_TYPE_LOG_HIST		(0x4 << KVM_STATS_TYPE_SHIFT)
+#define KVM_STATS_TYPE_MAX		KVM_STATS_TYPE_LOG_HIST
 
 #define KVM_STATS_UNIT_SHIFT		4
 #define KVM_STATS_UNIT_MASK		(0xF << KVM_STATS_UNIT_SHIFT)
@@ -1988,8 +1990,9 @@ struct kvm_stats_header {
  * @size: The number of data items for this stats.
  *        Every data item is of type __u64.
  * @offset: The offset of the stats to the start of stat structure in
- *          struture kvm or kvm_vcpu.
- * @unused: Unused field for future usage. Always 0 for now.
+ *          structure kvm or kvm_vcpu.
+ * @bucket_size: A parameter value used for histogram stats. It is only used
+ *		for linear histogram stats, specifying the size of the bucket;
  * @name: The name string for the stats. Its size is indicated by the
  *        &kvm_stats_header->name_size.
  */
@@ -1998,7 +2001,7 @@ struct kvm_stats_desc {
 	__s16 exponent;
 	__u16 size;
 	__u32 offset;
-	__u32 unused;
+	__u32 bucket_size;
 	char name[];
 };
 
diff --git a/include/uapi/linux/mempolicy.h b/include/uapi/linux/mempolicy.h
index 19a00bc7fe86..046d0ccba4cd 100644
--- a/include/uapi/linux/mempolicy.h
+++ b/include/uapi/linux/mempolicy.h
@@ -22,6 +22,7 @@ enum {
 	MPOL_BIND,
 	MPOL_INTERLEAVE,
 	MPOL_LOCAL,
+	MPOL_PREFERRED_MANY,
 	MPOL_MAX,	/* always last member of enum */
 };
 
diff --git a/include/uapi/linux/netfilter/nfnetlink_conntrack.h b/include/uapi/linux/netfilter/nfnetlink_conntrack.h
index c6e6d7d7d538..c2ac7269acf7 100644
--- a/include/uapi/linux/netfilter/nfnetlink_conntrack.h
+++ b/include/uapi/linux/netfilter/nfnetlink_conntrack.h
@@ -258,6 +258,7 @@ enum ctattr_stats_cpu {
 	CTA_STATS_ERROR,
 	CTA_STATS_SEARCH_RESTART,
 	CTA_STATS_CLASH_RESOLVE,
+	CTA_STATS_CHAIN_TOOLONG,
 	__CTA_STATS_MAX,
 };
 #define CTA_STATS_MAX (__CTA_STATS_MAX - 1)
diff --git a/include/uapi/linux/pkt_sched.h b/include/uapi/linux/pkt_sched.h
index 79a699f106b1..ec88590b3198 100644
--- a/include/uapi/linux/pkt_sched.h
+++ b/include/uapi/linux/pkt_sched.h
@@ -827,6 +827,8 @@ struct tc_codel_xstats {
 
 /* FQ_CODEL */
 
+#define FQ_CODEL_QUANTUM_MAX (1 << 20)
+
 enum {
 	TCA_FQ_CODEL_UNSPEC,
 	TCA_FQ_CODEL_TARGET,
diff --git a/include/uapi/linux/target_core_user.h b/include/uapi/linux/target_core_user.h
index 95b1597f16ae..27ace512babd 100644
--- a/include/uapi/linux/target_core_user.h
+++ b/include/uapi/linux/target_core_user.h
@@ -46,6 +46,7 @@
 #define TCMU_MAILBOX_FLAG_CAP_OOOC (1 << 0) /* Out-of-order completions */
 #define TCMU_MAILBOX_FLAG_CAP_READ_LEN (1 << 1) /* Read data length */
 #define TCMU_MAILBOX_FLAG_CAP_TMR (1 << 2) /* TMR notifications */
+#define TCMU_MAILBOX_FLAG_CAP_KEEP_BUF (1<<3) /* Keep buf after cmd completion */
 
 struct tcmu_mailbox {
 	__u16 version;
@@ -75,6 +76,7 @@ struct tcmu_cmd_entry_hdr {
 	__u8 kflags;
 #define TCMU_UFLAG_UNKNOWN_OP 0x1
 #define TCMU_UFLAG_READ_LEN   0x2
+#define TCMU_UFLAG_KEEP_BUF   0x4
 	__u8 uflags;
 
 } __packed;
diff --git a/include/uapi/linux/vduse.h b/include/uapi/linux/vduse.h
new file mode 100644
index 000000000000..7cfe1c1280c0
--- /dev/null
+++ b/include/uapi/linux/vduse.h
@@ -0,0 +1,306 @@
+/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */
+#ifndef _UAPI_VDUSE_H_
+#define _UAPI_VDUSE_H_
+
+#include <linux/types.h>
+
+#define VDUSE_BASE	0x81
+
+/* The ioctls for control device (/dev/vduse/control) */
+
+#define VDUSE_API_VERSION	0
+
+/*
+ * Get the version of VDUSE API that kernel supported (VDUSE_API_VERSION).
+ * This is used for future extension.
+ */
+#define VDUSE_GET_API_VERSION	_IOR(VDUSE_BASE, 0x00, __u64)
+
+/* Set the version of VDUSE API that userspace supported. */
+#define VDUSE_SET_API_VERSION	_IOW(VDUSE_BASE, 0x01, __u64)
+
+/**
+ * struct vduse_dev_config - basic configuration of a VDUSE device
+ * @name: VDUSE device name, needs to be NUL terminated
+ * @vendor_id: virtio vendor id
+ * @device_id: virtio device id
+ * @features: virtio features
+ * @vq_num: the number of virtqueues
+ * @vq_align: the allocation alignment of virtqueue's metadata
+ * @reserved: for future use, needs to be initialized to zero
+ * @config_size: the size of the configuration space
+ * @config: the buffer of the configuration space
+ *
+ * Structure used by VDUSE_CREATE_DEV ioctl to create VDUSE device.
+ */
+struct vduse_dev_config {
+#define VDUSE_NAME_MAX	256
+	char name[VDUSE_NAME_MAX];
+	__u32 vendor_id;
+	__u32 device_id;
+	__u64 features;
+	__u32 vq_num;
+	__u32 vq_align;
+	__u32 reserved[13];
+	__u32 config_size;
+	__u8 config[];
+};
+
+/* Create a VDUSE device which is represented by a char device (/dev/vduse/$NAME) */
+#define VDUSE_CREATE_DEV	_IOW(VDUSE_BASE, 0x02, struct vduse_dev_config)
+
+/*
+ * Destroy a VDUSE device. Make sure there are no more references
+ * to the char device (/dev/vduse/$NAME).
+ */
+#define VDUSE_DESTROY_DEV	_IOW(VDUSE_BASE, 0x03, char[VDUSE_NAME_MAX])
+
+/* The ioctls for VDUSE device (/dev/vduse/$NAME) */
+
+/**
+ * struct vduse_iotlb_entry - entry of IOTLB to describe one IOVA region [start, last]
+ * @offset: the mmap offset on returned file descriptor
+ * @start: start of the IOVA region
+ * @last: last of the IOVA region
+ * @perm: access permission of the IOVA region
+ *
+ * Structure used by VDUSE_IOTLB_GET_FD ioctl to find an overlapped IOVA region.
+ */
+struct vduse_iotlb_entry {
+	__u64 offset;
+	__u64 start;
+	__u64 last;
+#define VDUSE_ACCESS_RO 0x1
+#define VDUSE_ACCESS_WO 0x2
+#define VDUSE_ACCESS_RW 0x3
+	__u8 perm;
+};
+
+/*
+ * Find the first IOVA region that overlaps with the range [start, last]
+ * and return the corresponding file descriptor. Return -EINVAL means the
+ * IOVA region doesn't exist. Caller should set start and last fields.
+ */
+#define VDUSE_IOTLB_GET_FD	_IOWR(VDUSE_BASE, 0x10, struct vduse_iotlb_entry)
+
+/*
+ * Get the negotiated virtio features. It's a subset of the features in
+ * struct vduse_dev_config which can be accepted by virtio driver. It's
+ * only valid after FEATURES_OK status bit is set.
+ */
+#define VDUSE_DEV_GET_FEATURES	_IOR(VDUSE_BASE, 0x11, __u64)
+
+/**
+ * struct vduse_config_data - data used to update configuration space
+ * @offset: the offset from the beginning of configuration space
+ * @length: the length to write to configuration space
+ * @buffer: the buffer used to write from
+ *
+ * Structure used by VDUSE_DEV_SET_CONFIG ioctl to update device
+ * configuration space.
+ */
+struct vduse_config_data {
+	__u32 offset;
+	__u32 length;
+	__u8 buffer[];
+};
+
+/* Set device configuration space */
+#define VDUSE_DEV_SET_CONFIG	_IOW(VDUSE_BASE, 0x12, struct vduse_config_data)
+
+/*
+ * Inject a config interrupt. It's usually used to notify virtio driver
+ * that device configuration space has changed.
+ */
+#define VDUSE_DEV_INJECT_CONFIG_IRQ	_IO(VDUSE_BASE, 0x13)
+
+/**
+ * struct vduse_vq_config - basic configuration of a virtqueue
+ * @index: virtqueue index
+ * @max_size: the max size of virtqueue
+ * @reserved: for future use, needs to be initialized to zero
+ *
+ * Structure used by VDUSE_VQ_SETUP ioctl to setup a virtqueue.
+ */
+struct vduse_vq_config {
+	__u32 index;
+	__u16 max_size;
+	__u16 reserved[13];
+};
+
+/*
+ * Setup the specified virtqueue. Make sure all virtqueues have been
+ * configured before the device is attached to vDPA bus.
+ */
+#define VDUSE_VQ_SETUP		_IOW(VDUSE_BASE, 0x14, struct vduse_vq_config)
+
+/**
+ * struct vduse_vq_state_split - split virtqueue state
+ * @avail_index: available index
+ */
+struct vduse_vq_state_split {
+	__u16 avail_index;
+};
+
+/**
+ * struct vduse_vq_state_packed - packed virtqueue state
+ * @last_avail_counter: last driver ring wrap counter observed by device
+ * @last_avail_idx: device available index
+ * @last_used_counter: device ring wrap counter
+ * @last_used_idx: used index
+ */
+struct vduse_vq_state_packed {
+	__u16 last_avail_counter;
+	__u16 last_avail_idx;
+	__u16 last_used_counter;
+	__u16 last_used_idx;
+};
+
+/**
+ * struct vduse_vq_info - information of a virtqueue
+ * @index: virtqueue index
+ * @num: the size of virtqueue
+ * @desc_addr: address of desc area
+ * @driver_addr: address of driver area
+ * @device_addr: address of device area
+ * @split: split virtqueue state
+ * @packed: packed virtqueue state
+ * @ready: ready status of virtqueue
+ *
+ * Structure used by VDUSE_VQ_GET_INFO ioctl to get virtqueue's information.
+ */
+struct vduse_vq_info {
+	__u32 index;
+	__u32 num;
+	__u64 desc_addr;
+	__u64 driver_addr;
+	__u64 device_addr;
+	union {
+		struct vduse_vq_state_split split;
+		struct vduse_vq_state_packed packed;
+	};
+	__u8 ready;
+};
+
+/* Get the specified virtqueue's information. Caller should set index field. */
+#define VDUSE_VQ_GET_INFO	_IOWR(VDUSE_BASE, 0x15, struct vduse_vq_info)
+
+/**
+ * struct vduse_vq_eventfd - eventfd configuration for a virtqueue
+ * @index: virtqueue index
+ * @fd: eventfd, -1 means de-assigning the eventfd
+ *
+ * Structure used by VDUSE_VQ_SETUP_KICKFD ioctl to setup kick eventfd.
+ */
+struct vduse_vq_eventfd {
+	__u32 index;
+#define VDUSE_EVENTFD_DEASSIGN -1
+	int fd;
+};
+
+/*
+ * Setup kick eventfd for specified virtqueue. The kick eventfd is used
+ * by VDUSE kernel module to notify userspace to consume the avail vring.
+ */
+#define VDUSE_VQ_SETUP_KICKFD	_IOW(VDUSE_BASE, 0x16, struct vduse_vq_eventfd)
+
+/*
+ * Inject an interrupt for specific virtqueue. It's used to notify virtio driver
+ * to consume the used vring.
+ */
+#define VDUSE_VQ_INJECT_IRQ	_IOW(VDUSE_BASE, 0x17, __u32)
+
+/* The control messages definition for read(2)/write(2) on /dev/vduse/$NAME */
+
+/**
+ * enum vduse_req_type - request type
+ * @VDUSE_GET_VQ_STATE: get the state for specified virtqueue from userspace
+ * @VDUSE_SET_STATUS: set the device status
+ * @VDUSE_UPDATE_IOTLB: Notify userspace to update the memory mapping for
+ *                      specified IOVA range via VDUSE_IOTLB_GET_FD ioctl
+ */
+enum vduse_req_type {
+	VDUSE_GET_VQ_STATE,
+	VDUSE_SET_STATUS,
+	VDUSE_UPDATE_IOTLB,
+};
+
+/**
+ * struct vduse_vq_state - virtqueue state
+ * @index: virtqueue index
+ * @split: split virtqueue state
+ * @packed: packed virtqueue state
+ */
+struct vduse_vq_state {
+	__u32 index;
+	union {
+		struct vduse_vq_state_split split;
+		struct vduse_vq_state_packed packed;
+	};
+};
+
+/**
+ * struct vduse_dev_status - device status
+ * @status: device status
+ */
+struct vduse_dev_status {
+	__u8 status;
+};
+
+/**
+ * struct vduse_iova_range - IOVA range [start, last]
+ * @start: start of the IOVA range
+ * @last: last of the IOVA range
+ */
+struct vduse_iova_range {
+	__u64 start;
+	__u64 last;
+};
+
+/**
+ * struct vduse_dev_request - control request
+ * @type: request type
+ * @request_id: request id
+ * @reserved: for future use
+ * @vq_state: virtqueue state, only index field is available
+ * @s: device status
+ * @iova: IOVA range for updating
+ * @padding: padding
+ *
+ * Structure used by read(2) on /dev/vduse/$NAME.
+ */
+struct vduse_dev_request {
+	__u32 type;
+	__u32 request_id;
+	__u32 reserved[4];
+	union {
+		struct vduse_vq_state vq_state;
+		struct vduse_dev_status s;
+		struct vduse_iova_range iova;
+		__u32 padding[32];
+	};
+};
+
+/**
+ * struct vduse_dev_response - response to control request
+ * @request_id: corresponding request id
+ * @result: the result of request
+ * @reserved: for future use, needs to be initialized to zero
+ * @vq_state: virtqueue state
+ * @padding: padding
+ *
+ * Structure used by write(2) on /dev/vduse/$NAME.
+ */
+struct vduse_dev_response {
+	__u32 request_id;
+#define VDUSE_REQ_RESULT_OK	0x00
+#define VDUSE_REQ_RESULT_FAILED	0x01
+	__u32 result;
+	__u32 reserved[4];
+	union {
+		struct vduse_vq_state vq_state;
+		__u32 padding[32];
+	};
+};
+
+#endif /* _UAPI_VDUSE_H_ */
diff --git a/include/uapi/linux/virtio_gpio.h b/include/uapi/linux/virtio_gpio.h
new file mode 100644
index 000000000000..0445f905d8cc
--- /dev/null
+++ b/include/uapi/linux/virtio_gpio.h
@@ -0,0 +1,47 @@
+/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */
+
+#ifndef _LINUX_VIRTIO_GPIO_H
+#define _LINUX_VIRTIO_GPIO_H
+
+#include <linux/types.h>
+
+/* Virtio GPIO request types */
+#define VIRTIO_GPIO_MSG_GET_NAMES		0x0001
+#define VIRTIO_GPIO_MSG_GET_DIRECTION		0x0002
+#define VIRTIO_GPIO_MSG_SET_DIRECTION		0x0003
+#define VIRTIO_GPIO_MSG_GET_VALUE		0x0004
+#define VIRTIO_GPIO_MSG_SET_VALUE		0x0005
+
+/* Possible values of the status field */
+#define VIRTIO_GPIO_STATUS_OK			0x0
+#define VIRTIO_GPIO_STATUS_ERR			0x1
+
+/* Direction types */
+#define VIRTIO_GPIO_DIRECTION_NONE		0x00
+#define VIRTIO_GPIO_DIRECTION_OUT		0x01
+#define VIRTIO_GPIO_DIRECTION_IN		0x02
+
+struct virtio_gpio_config {
+	__le16 ngpio;
+	__u8 padding[2];
+	__le32 gpio_names_size;
+} __packed;
+
+/* Virtio GPIO Request / Response */
+struct virtio_gpio_request {
+	__le16 type;
+	__le16 gpio;
+	__le32 value;
+};
+
+struct virtio_gpio_response {
+	__u8 status;
+	__u8 value;
+};
+
+struct virtio_gpio_response_get_names {
+	__u8 status;
+	__u8 value[];
+};
+
+#endif /* _LINUX_VIRTIO_GPIO_H */
diff --git a/include/uapi/linux/virtio_ids.h b/include/uapi/linux/virtio_ids.h
index 6da2f80a85e8..80d76b75bccd 100644
--- a/include/uapi/linux/virtio_ids.h
+++ b/include/uapi/linux/virtio_ids.h
@@ -54,10 +54,20 @@
 #define VIRTIO_ID_SOUND			25 /* virtio sound */
 #define VIRTIO_ID_FS			26 /* virtio filesystem */
 #define VIRTIO_ID_PMEM			27 /* virtio pmem */
+#define VIRTIO_ID_RPMB			28 /* virtio rpmb */
 #define VIRTIO_ID_MAC80211_HWSIM	29 /* virtio mac80211-hwsim */
+#define VIRTIO_ID_VIDEO_ENCODER		30 /* virtio video encoder */
+#define VIRTIO_ID_VIDEO_DECODER		31 /* virtio video decoder */
 #define VIRTIO_ID_SCMI			32 /* virtio SCMI */
+#define VIRTIO_ID_NITRO_SEC_MOD		33 /* virtio nitro secure module*/
 #define VIRTIO_ID_I2C_ADAPTER		34 /* virtio i2c adapter */
+#define VIRTIO_ID_WATCHDOG		35 /* virtio watchdog */
+#define VIRTIO_ID_CAN			36 /* virtio can */
+#define VIRTIO_ID_DMABUF		37 /* virtio dmabuf */
+#define VIRTIO_ID_PARAM_SERV		38 /* virtio parameter server */
+#define VIRTIO_ID_AUDIO_POLICY		39 /* virtio audio policy */
 #define VIRTIO_ID_BT			40 /* virtio bluetooth */
+#define VIRTIO_ID_GPIO			41 /* virtio gpio */
 
 /*
  * Virtio Transitional IDs
diff --git a/include/uapi/linux/virtio_pcidev.h b/include/uapi/linux/virtio_pcidev.h
index 89daa88bcfef..668b07ce515b 100644
--- a/include/uapi/linux/virtio_pcidev.h
+++ b/include/uapi/linux/virtio_pcidev.h
@@ -9,13 +9,14 @@
 
 /**
  * enum virtio_pcidev_ops - virtual PCI device operations
+ * @VIRTIO_PCIDEV_OP_RESERVED: reserved to catch errors
  * @VIRTIO_PCIDEV_OP_CFG_READ: read config space, size is 1, 2, 4 or 8;
  *	the @data field should be filled in by the device (in little endian).
  * @VIRTIO_PCIDEV_OP_CFG_WRITE: write config space, size is 1, 2, 4 or 8;
  *	the @data field contains the data to write (in little endian).
- * @VIRTIO_PCIDEV_OP_BAR_READ: read BAR mem/pio, size can be variable;
+ * @VIRTIO_PCIDEV_OP_MMIO_READ: read BAR mem/pio, size can be variable;
  *	the @data field should be filled in by the device (in little endian).
- * @VIRTIO_PCIDEV_OP_BAR_WRITE: write BAR mem/pio, size can be variable;
+ * @VIRTIO_PCIDEV_OP_MMIO_WRITE: write BAR mem/pio, size can be variable;
  *	the @data field contains the data to write (in little endian).
  * @VIRTIO_PCIDEV_OP_MMIO_MEMSET: memset MMIO, size is variable but
  *	the @data field only has one byte (unlike @VIRTIO_PCIDEV_OP_MMIO_WRITE)
diff --git a/include/uapi/linux/virtio_vsock.h b/include/uapi/linux/virtio_vsock.h
index 3dd3555b2740..64738838bee5 100644
--- a/include/uapi/linux/virtio_vsock.h
+++ b/include/uapi/linux/virtio_vsock.h
@@ -97,7 +97,8 @@ enum virtio_vsock_shutdown {
 
 /* VIRTIO_VSOCK_OP_RW flags values */
 enum virtio_vsock_rw {
-	VIRTIO_VSOCK_SEQ_EOR = 1,
+	VIRTIO_VSOCK_SEQ_EOM = 1,
+	VIRTIO_VSOCK_SEQ_EOR = 2,
 };
 
 #endif /* _UAPI_LINUX_VIRTIO_VSOCK_H */
diff --git a/include/uapi/misc/habanalabs.h b/include/uapi/misc/habanalabs.h
index a47a731e4527..7cc2a0f3f2f5 100644
--- a/include/uapi/misc/habanalabs.h
+++ b/include/uapi/misc/habanalabs.h
@@ -276,7 +276,17 @@ enum hl_device_status {
 	HL_DEVICE_STATUS_OPERATIONAL,
 	HL_DEVICE_STATUS_IN_RESET,
 	HL_DEVICE_STATUS_MALFUNCTION,
-	HL_DEVICE_STATUS_NEEDS_RESET
+	HL_DEVICE_STATUS_NEEDS_RESET,
+	HL_DEVICE_STATUS_IN_DEVICE_CREATION,
+	HL_DEVICE_STATUS_LAST = HL_DEVICE_STATUS_IN_DEVICE_CREATION
+};
+
+enum hl_server_type {
+	HL_SERVER_TYPE_UNKNOWN = 0,
+	HL_SERVER_GAUDI_HLS1 = 1,
+	HL_SERVER_GAUDI_HLS1H = 2,
+	HL_SERVER_GAUDI_TYPE1 = 3,
+	HL_SERVER_GAUDI_TYPE2 = 4
 };
 
 /* Opcode for management ioctl
@@ -337,17 +347,49 @@ enum hl_device_status {
 #define HL_INFO_VERSION_MAX_LEN	128
 #define HL_INFO_CARD_NAME_MAX_LEN	16
 
+/**
+ * struct hl_info_hw_ip_info - hardware information on various IPs in the ASIC
+ * @sram_base_address: The first SRAM physical base address that is free to be
+ *                     used by the user.
+ * @dram_base_address: The first DRAM virtual or physical base address that is
+ *                     free to be used by the user.
+ * @dram_size: The DRAM size that is available to the user.
+ * @sram_size: The SRAM size that is available to the user.
+ * @num_of_events: The number of events that can be received from the f/w. This
+ *                 is needed so the user can what is the size of the h/w events
+ *                 array he needs to pass to the kernel when he wants to fetch
+ *                 the event counters.
+ * @device_id: PCI device ID of the ASIC.
+ * @module_id: Module ID of the ASIC for mezzanine cards in servers
+ *             (From OCP spec).
+ * @first_available_interrupt_id: The first available interrupt ID for the user
+ *                                to be used when it works with user interrupts.
+ * @server_type: Server type that the Gaudi ASIC is currently installed in.
+ *               The value is according to enum hl_server_type
+ * @cpld_version: CPLD version on the board.
+ * @psoc_pci_pll_nr: PCI PLL NR value. Needed by the profiler in some ASICs.
+ * @psoc_pci_pll_nf: PCI PLL NF value. Needed by the profiler in some ASICs.
+ * @psoc_pci_pll_od: PCI PLL OD value. Needed by the profiler in some ASICs.
+ * @psoc_pci_pll_div_factor: PCI PLL DIV factor value. Needed by the profiler
+ *                           in some ASICs.
+ * @tpc_enabled_mask: Bit-mask that represents which TPCs are enabled. Relevant
+ *                    for Goya/Gaudi only.
+ * @dram_enabled: Whether the DRAM is enabled.
+ * @cpucp_version: The CPUCP f/w version.
+ * @card_name: The card name as passed by the f/w.
+ * @dram_page_size: The DRAM physical page size.
+ */
 struct hl_info_hw_ip_info {
 	__u64 sram_base_address;
 	__u64 dram_base_address;
 	__u64 dram_size;
 	__u32 sram_size;
 	__u32 num_of_events;
-	__u32 device_id; /* PCI Device ID */
-	__u32 module_id; /* For mezzanine cards in servers (From OCP spec.) */
+	__u32 device_id;
+	__u32 module_id;
 	__u32 reserved;
 	__u16 first_available_interrupt_id;
-	__u16 reserved2;
+	__u16 server_type;
 	__u32 cpld_version;
 	__u32 psoc_pci_pll_nr;
 	__u32 psoc_pci_pll_nf;
@@ -358,7 +400,7 @@ struct hl_info_hw_ip_info {
 	__u8 pad[2];
 	__u8 cpucp_version[HL_INFO_VERSION_MAX_LEN];
 	__u8 card_name[HL_INFO_CARD_NAME_MAX_LEN];
-	__u64 reserved3;
+	__u64 reserved2;
 	__u64 dram_page_size;
 };
 
@@ -628,12 +670,21 @@ struct hl_cs_chunk {
 		__u64 cb_handle;
 
 		/* Relevant only when HL_CS_FLAGS_WAIT or
-		 * HL_CS_FLAGS_COLLECTIVE_WAIT is set.
+		 * HL_CS_FLAGS_COLLECTIVE_WAIT is set
 		 * This holds address of array of u64 values that contain
-		 * signal CS sequence numbers. The wait described by this job
-		 * will listen on all those signals (wait event per signal)
+		 * signal CS sequence numbers. The wait described by
+		 * this job will listen on all those signals
+		 * (wait event per signal)
 		 */
 		__u64 signal_seq_arr;
+
+		/*
+		 * Relevant only when HL_CS_FLAGS_WAIT or
+		 * HL_CS_FLAGS_COLLECTIVE_WAIT is set
+		 * along with HL_CS_FLAGS_ENCAP_SIGNALS.
+		 * This is the CS sequence which has the encapsulated signals.
+		 */
+		__u64 encaps_signal_seq;
 	};
 
 	/* Index of queue to put the CB on */
@@ -651,6 +702,17 @@ struct hl_cs_chunk {
 		 * Number of entries in signal_seq_arr
 		 */
 		__u32 num_signal_seq_arr;
+
+		/* Relevant only when HL_CS_FLAGS_WAIT or
+		 * HL_CS_FLAGS_COLLECTIVE_WAIT is set along
+		 * with HL_CS_FLAGS_ENCAP_SIGNALS
+		 * This set the signals range that the user want to wait for
+		 * out of the whole reserved signals range.
+		 * e.g if the signals range is 20, and user don't want
+		 * to wait for signal 8, so he set this offset to 7, then
+		 * he call the API again with 9 and so on till 20.
+		 */
+		__u32 encaps_signal_offset;
 	};
 
 	/* HL_CS_CHUNK_FLAGS_* */
@@ -678,6 +740,28 @@ struct hl_cs_chunk {
 #define HL_CS_FLAGS_CUSTOM_TIMEOUT		0x200
 #define HL_CS_FLAGS_SKIP_RESET_ON_TIMEOUT	0x400
 
+/*
+ * The encapsulated signals CS is merged into the existing CS ioctls.
+ * In order to use this feature need to follow the below procedure:
+ * 1. Reserve signals, set the CS type to HL_CS_FLAGS_RESERVE_SIGNALS_ONLY
+ *    the output of this API will be the SOB offset from CFG_BASE.
+ *    this address will be used to patch CB cmds to do the signaling for this
+ *    SOB by incrementing it's value.
+ *    for reverting the reservation use HL_CS_FLAGS_UNRESERVE_SIGNALS_ONLY
+ *    CS type, note that this might fail if out-of-sync happened to the SOB
+ *    value, in case other signaling request to the same SOB occurred between
+ *    reserve-unreserve calls.
+ * 2. Use the staged CS to do the encapsulated signaling jobs.
+ *    use HL_CS_FLAGS_STAGED_SUBMISSION and HL_CS_FLAGS_STAGED_SUBMISSION_FIRST
+ *    along with HL_CS_FLAGS_ENCAP_SIGNALS flag, and set encaps_signal_offset
+ *    field. This offset allows app to wait on part of the reserved signals.
+ * 3. Use WAIT/COLLECTIVE WAIT CS along with HL_CS_FLAGS_ENCAP_SIGNALS flag
+ *    to wait for the encapsulated signals.
+ */
+#define HL_CS_FLAGS_ENCAP_SIGNALS		0x800
+#define HL_CS_FLAGS_RESERVE_SIGNALS_ONLY	0x1000
+#define HL_CS_FLAGS_UNRESERVE_SIGNALS_ONLY	0x2000
+
 #define HL_CS_STATUS_SUCCESS		0
 
 #define HL_MAX_JOBS_PER_CS		512
@@ -690,10 +774,35 @@ struct hl_cs_in {
 	/* holds address of array of hl_cs_chunk for execution phase */
 	__u64 chunks_execute;
 
-	/* Sequence number of a staged submission CS
-	 * valid only if HL_CS_FLAGS_STAGED_SUBMISSION is set
-	 */
-	__u64 seq;
+	union {
+		/*
+		 * Sequence number of a staged submission CS
+		 * valid only if HL_CS_FLAGS_STAGED_SUBMISSION is set and
+		 * HL_CS_FLAGS_STAGED_SUBMISSION_FIRST is unset.
+		 */
+		__u64 seq;
+
+		/*
+		 * Encapsulated signals handle id
+		 * Valid for two flows:
+		 * 1. CS with encapsulated signals:
+		 *    when HL_CS_FLAGS_STAGED_SUBMISSION and
+		 *    HL_CS_FLAGS_STAGED_SUBMISSION_FIRST
+		 *    and HL_CS_FLAGS_ENCAP_SIGNALS are set.
+		 * 2. unreserve signals:
+		 *    valid when HL_CS_FLAGS_UNRESERVE_SIGNALS_ONLY is set.
+		 */
+		__u32 encaps_sig_handle_id;
+
+		/* Valid only when HL_CS_FLAGS_RESERVE_SIGNALS_ONLY is set */
+		struct {
+			/* Encapsulated signals number */
+			__u32 encaps_signals_count;
+
+			/* Encapsulated signals queue index (stream) */
+			__u32 encaps_signals_q_idx;
+		};
+	};
 
 	/* Number of chunks in restore phase array. Maximum number is
 	 * HL_MAX_JOBS_PER_CS
@@ -718,14 +827,31 @@ struct hl_cs_in {
 };
 
 struct hl_cs_out {
+	union {
+		/*
+		 * seq holds the sequence number of the CS to pass to wait
+		 * ioctl. All values are valid except for 0 and ULLONG_MAX
+		 */
+		__u64 seq;
+
+		/* Valid only when HL_CS_FLAGS_RESERVE_SIGNALS_ONLY is set */
+		struct {
+			/* This is the resereved signal handle id */
+			__u32 handle_id;
+
+			/* This is the signals count */
+			__u32 count;
+		};
+	};
+
+	/* HL_CS_STATUS */
+	__u32 status;
+
 	/*
-	 * seq holds the sequence number of the CS to pass to wait ioctl. All
-	 * values are valid except for 0 and ULLONG_MAX
+	 * SOB base address offset
+	 * Valid only when HL_CS_FLAGS_RESERVE_SIGNALS_ONLY is set
 	 */
-	__u64 seq;
-	/* HL_CS_STATUS_* */
-	__u32 status;
-	__u32 pad;
+	__u32 sob_base_addr_offset;
 };
 
 union hl_cs_args {
@@ -735,11 +861,18 @@ union hl_cs_args {
 
 #define HL_WAIT_CS_FLAGS_INTERRUPT	0x2
 #define HL_WAIT_CS_FLAGS_INTERRUPT_MASK 0xFFF00000
+#define HL_WAIT_CS_FLAGS_MULTI_CS	0x4
+
+#define HL_WAIT_MULTI_CS_LIST_MAX_LEN	32
 
 struct hl_wait_cs_in {
 	union {
 		struct {
-			/* Command submission sequence number */
+			/*
+			 * In case of wait_cs holds the CS sequence number.
+			 * In case of wait for multi CS hold a user pointer to
+			 * an array of CS sequence numbers
+			 */
 			__u64 seq;
 			/* Absolute timeout to wait for command submission
 			 * in microseconds
@@ -767,12 +900,17 @@ struct hl_wait_cs_in {
 
 	/* Context ID - Currently not in use */
 	__u32 ctx_id;
+
 	/* HL_WAIT_CS_FLAGS_*
 	 * If HL_WAIT_CS_FLAGS_INTERRUPT is set, this field should include
 	 * interrupt id according to HL_WAIT_CS_FLAGS_INTERRUPT_MASK, in order
 	 * not to specify an interrupt id ,set mask to all 1s.
 	 */
 	__u32 flags;
+
+	/* Multi CS API info- valid entries in multi-CS array */
+	__u8 seq_arr_len;
+	__u8 pad[7];
 };
 
 #define HL_WAIT_CS_STATUS_COMPLETED	0
@@ -789,8 +927,15 @@ struct hl_wait_cs_out {
 	__u32 status;
 	/* HL_WAIT_CS_STATUS_FLAG* */
 	__u32 flags;
-	/* valid only if HL_WAIT_CS_STATUS_FLAG_TIMESTAMP_VLD is set */
+	/*
+	 * valid only if HL_WAIT_CS_STATUS_FLAG_TIMESTAMP_VLD is set
+	 * for wait_cs: timestamp of CS completion
+	 * for wait_multi_cs: timestamp of FIRST CS completion
+	 */
 	__s64 timestamp_nsec;
+	/* multi CS completion bitmap */
+	__u32 cs_completion_map;
+	__u32 pad;
 };
 
 union hl_wait_cs_args {
@@ -813,6 +958,7 @@ union hl_wait_cs_args {
 #define HL_MEM_CONTIGUOUS	0x1
 #define HL_MEM_SHARED		0x2
 #define HL_MEM_USERPTR		0x4
+#define HL_MEM_FORCE_HINT	0x8
 
 struct hl_mem_in {
 	union {
diff --git a/include/uapi/rdma/mlx5-abi.h b/include/uapi/rdma/mlx5-abi.h
index 8597e6f22a1c..86be4a92b67b 100644
--- a/include/uapi/rdma/mlx5-abi.h
+++ b/include/uapi/rdma/mlx5-abi.h
@@ -50,6 +50,7 @@ enum {
 	MLX5_QP_FLAG_ALLOW_SCATTER_CQE	= 1 << 8,
 	MLX5_QP_FLAG_PACKET_BASED_CREDIT_MODE	= 1 << 9,
 	MLX5_QP_FLAG_UAR_PAGE_INDEX = 1 << 10,
+	MLX5_QP_FLAG_DCI_STREAM	= 1 << 11,
 };
 
 enum {
@@ -238,6 +239,11 @@ struct mlx5_ib_striding_rq_caps {
 	__u32 reserved;
 };
 
+struct mlx5_ib_dci_streams_caps {
+	__u8 max_log_num_concurent;
+	__u8 max_log_num_errored;
+};
+
 enum mlx5_ib_query_dev_resp_flags {
 	/* Support 128B CQE compression */
 	MLX5_IB_QUERY_DEV_RESP_FLAGS_CQE_128B_COMP = 1 << 0,
@@ -266,7 +272,8 @@ struct mlx5_ib_query_device_resp {
 	struct mlx5_ib_sw_parsing_caps sw_parsing_caps;
 	struct mlx5_ib_striding_rq_caps striding_rq_caps;
 	__u32	tunnel_offloads_caps; /* enum mlx5_ib_tunnel_offloads */
-	__u32	reserved;
+	struct  mlx5_ib_dci_streams_caps dci_streams_caps;
+	__u16 reserved;
 };
 
 enum mlx5_ib_create_cq_flags {
@@ -313,6 +320,11 @@ struct mlx5_ib_create_srq_resp {
 	__u32	reserved;
 };
 
+struct mlx5_ib_create_qp_dci_streams {
+	__u8 log_num_concurent;
+	__u8 log_num_errored;
+};
+
 struct mlx5_ib_create_qp {
 	__aligned_u64 buf_addr;
 	__aligned_u64 db_addr;
@@ -327,7 +339,8 @@ struct mlx5_ib_create_qp {
 		__aligned_u64 access_key;
 	};
 	__u32  ece_options;
-	__u32  reserved;
+	struct  mlx5_ib_create_qp_dci_streams dci_streams;
+	__u16 reserved;
 };
 
 /* RX Hash function flags */
diff --git a/include/uapi/scsi/fc/fc_els.h b/include/uapi/scsi/fc/fc_els.h
index 91d4be987220..c9812c5c2fc4 100644
--- a/include/uapi/scsi/fc/fc_els.h
+++ b/include/uapi/scsi/fc/fc_els.h
@@ -41,6 +41,7 @@ enum fc_els_cmd {
 	ELS_REC =	0x13,	/* read exchange concise */
 	ELS_SRR =	0x14,	/* sequence retransmission request */
 	ELS_FPIN =	0x16,	/* Fabric Performance Impact Notification */
+	ELS_EDC =	0x17,	/* Exchange Diagnostic Capabilities */
 	ELS_RDP =	0x18,	/* Read Diagnostic Parameters */
 	ELS_RDF =	0x19,	/* Register Diagnostic Functions */
 	ELS_PRLI =	0x20,	/* process login */
@@ -111,6 +112,7 @@ enum fc_els_cmd {
 	[ELS_REC] =	"REC",			\
 	[ELS_SRR] =	"SRR",			\
 	[ELS_FPIN] =	"FPIN",			\
+	[ELS_EDC] =	"EDC",			\
 	[ELS_RDP] =	"RDP",			\
 	[ELS_RDF] =	"RDF",			\
 	[ELS_PRLI] =	"PRLI",			\
@@ -218,6 +220,10 @@ enum fc_els_rjt_explan {
 enum fc_ls_tlv_dtag {
 	ELS_DTAG_LS_REQ_INFO =		0x00000001,
 		/* Link Service Request Information Descriptor */
+	ELS_DTAG_LNK_FAULT_CAP =	0x0001000D,
+		/* Link Fault Capability Descriptor */
+	ELS_DTAG_CG_SIGNAL_CAP =	0x0001000F,
+		/* Congestion Signaling Capability Descriptor */
 	ELS_DTAG_LNK_INTEGRITY =	0x00020001,
 		/* Link Integrity Notification Descriptor */
 	ELS_DTAG_DELIVERY =		0x00020002,
@@ -236,6 +242,8 @@ enum fc_ls_tlv_dtag {
  */
 #define FC_LS_TLV_DTAG_INIT {					      \
 	{ ELS_DTAG_LS_REQ_INFO,		"Link Service Request Information" }, \
+	{ ELS_DTAG_LNK_FAULT_CAP,	"Link Fault Capability" },	      \
+	{ ELS_DTAG_CG_SIGNAL_CAP,	"Congestion Signaling Capability" },  \
 	{ ELS_DTAG_LNK_INTEGRITY,	"Link Integrity Notification" },      \
 	{ ELS_DTAG_DELIVERY,		"Delivery Notification Present" },    \
 	{ ELS_DTAG_PEER_CONGEST,	"Peer Congestion Notification" },     \
@@ -1144,4 +1152,102 @@ struct fc_els_rdf_resp {
 };
 
 
+/*
+ * Diagnostic Capability Descriptors for EDC ELS
+ */
+
+/*
+ * Diagnostic: Link Fault Capability Descriptor
+ */
+struct fc_diag_lnkflt_desc {
+	__be32		desc_tag;	/* Descriptor Tag (0x0001000D) */
+	__be32		desc_len;	/* Length of Descriptor (in bytes).
+					 * Size of descriptor excluding
+					 * desc_tag and desc_len fields.
+					 * 12 bytes
+					 */
+	__be32		degrade_activate_threshold;
+	__be32		degrade_deactivate_threshold;
+	__be32		fec_degrade_interval;
+};
+
+enum fc_edc_cg_signal_cap_types {
+	/* Note: Capability: bits 31:4 Rsvd; bits 3:0 are capabilities */
+	EDC_CG_SIG_NOTSUPPORTED =	0x00, /* neither supported */
+	EDC_CG_SIG_WARN_ONLY =		0x01,
+	EDC_CG_SIG_WARN_ALARM =		0x02, /* both supported */
+};
+
+/*
+ * Initializer useful for decoding table.
+ * Please keep this in sync with the above definitions.
+ */
+#define FC_EDC_CG_SIGNAL_CAP_TYPES_INIT {				\
+	{ EDC_CG_SIG_NOTSUPPORTED,	"Signaling Not Supported" },	\
+	{ EDC_CG_SIG_WARN_ONLY,		"Warning Signal" },		\
+	{ EDC_CG_SIG_WARN_ALARM,	"Warning and Alarm Signals" },	\
+}
+
+enum fc_diag_cg_sig_freq_types {
+	EDC_CG_SIGFREQ_CNT_MIN =	1,	/* Min Frequency Count */
+	EDC_CG_SIGFREQ_CNT_MAX =	999,	/* Max Frequency Count */
+
+	EDC_CG_SIGFREQ_SEC =		0x1,	/* Units: seconds */
+	EDC_CG_SIGFREQ_MSEC =		0x2,	/* Units: milliseconds */
+};
+
+struct fc_diag_cg_sig_freq {
+	__be16		count;		/* Time between signals
+					 * note: upper 6 bits rsvd
+					 */
+	__be16		units;		/* Time unit for count
+					 * note: upper 12 bits rsvd
+					 */
+};
+
+/*
+ * Diagnostic: Congestion Signaling Capability Descriptor
+ */
+struct fc_diag_cg_sig_desc {
+	__be32		desc_tag;	/* Descriptor Tag (0x0001000F) */
+	__be32		desc_len;	/* Length of Descriptor (in bytes).
+					 * Size of descriptor excluding
+					 * desc_tag and desc_len fields.
+					 * 16 bytes
+					 */
+	__be32				xmt_signal_capability;
+	struct fc_diag_cg_sig_freq	xmt_signal_frequency;
+	__be32				rcv_signal_capability;
+	struct fc_diag_cg_sig_freq	rcv_signal_frequency;
+};
+
+/*
+ * ELS_EDC - Exchange Diagnostic Capabilities
+ */
+struct fc_els_edc {
+	__u8		edc_cmd;	/* command (0x17) */
+	__u8		edc_zero[3];	/* specified as zero - part of cmd */
+	__be32		desc_len;	/* Length of Descriptor List (in bytes).
+					 * Size of ELS excluding edc_cmd,
+					 * edc_zero and desc_len fields.
+					 */
+	struct fc_tlv_desc	desc[0];
+					/* Diagnostic Descriptor list */
+};
+
+/*
+ * ELS EDC LS_ACC Response.
+ */
+struct fc_els_edc_resp {
+	struct fc_els_ls_acc	acc_hdr;
+	__be32			desc_list_len;	/* Length of response (in
+						 * bytes). Excludes acc_hdr
+						 * and desc_list_len fields.
+						 */
+	struct fc_els_lsri_desc	lsri;
+	struct fc_tlv_desc	desc[0];
+				    /* Supported Diagnostic Descriptor list */
+};
+
+
 #endif /* _FC_ELS_H_ */
diff --git a/init/Kconfig b/init/Kconfig
index e708180e9a59..11f8a845f259 100644
--- a/init/Kconfig
+++ b/init/Kconfig
@@ -137,6 +137,20 @@ config COMPILE_TEST
 	  here. If you are a user/distributor, say N here to exclude useless
 	  drivers to be distributed.
 
+config WERROR
+	bool "Compile the kernel with warnings as errors"
+	default COMPILE_TEST
+	help
+	  A kernel build should not cause any compiler warnings, and this
+	  enables the '-Werror' flag to enforce that rule by default.
+
+	  However, if you have a new (or very old) compiler with odd and
+	  unusual warnings, or you have some architecture with problems,
+	  you may need to disable this config option in order to
+	  successfully build the kernel.
+
+	  If in doubt, say Y.
+
 config UAPI_HEADER_TEST
 	bool "Compile test UAPI headers"
 	depends on HEADERS_INSTALL && CC_CAN_LINK
diff --git a/init/do_mounts.c b/init/do_mounts.c
index b691d6891e51..2ed30ff6c906 100644
--- a/init/do_mounts.c
+++ b/init/do_mounts.c
@@ -338,31 +338,22 @@ __setup("rootflags=", root_data_setup);
 __setup("rootfstype=", fs_names_setup);
 __setup("rootdelay=", root_delay_setup);
 
-static void __init get_fs_names(char *page)
+static int __init split_fs_names(char *page, char *names)
 {
-	char *s = page;
+	int count = 0;
+	char *p = page;
 
-	if (root_fs_names) {
-		strcpy(page, root_fs_names);
-		while (*s++) {
-			if (s[-1] == ',')
-				s[-1] = '\0';
-		}
-	} else {
-		int len = get_filesystem_list(page);
-		char *p, *next;
-
-		page[len] = '\0';
-		for (p = page-1; p; p = next) {
-			next = strchr(++p, '\n');
-			if (*p++ != '\t')
-				continue;
-			while ((*s++ = *p++) != '\n')
-				;
-			s[-1] = '\0';
-		}
+	strcpy(p, root_fs_names);
+	while (*p++) {
+		if (p[-1] == ',')
+			p[-1] = '\0';
 	}
-	*s = '\0';
+	*p = '\0';
+
+	for (p = page; *p; p += strlen(p)+1)
+		count++;
+
+	return count;
 }
 
 static int __init do_mount_root(const char *name, const char *fs,
@@ -408,12 +399,16 @@ void __init mount_block_root(char *name, int flags)
 	char *fs_names = page_address(page);
 	char *p;
 	char b[BDEVNAME_SIZE];
+	int num_fs, i;
 
 	scnprintf(b, BDEVNAME_SIZE, "unknown-block(%u,%u)",
 		  MAJOR(ROOT_DEV), MINOR(ROOT_DEV));
-	get_fs_names(fs_names);
+	if (root_fs_names)
+		num_fs = split_fs_names(fs_names, root_fs_names);
+	else
+		num_fs = list_bdev_fs_names(fs_names, PAGE_SIZE);
 retry:
-	for (p = fs_names; *p; p += strlen(p)+1) {
+	for (i = 0, p = fs_names; i < num_fs; i++, p += strlen(p)+1) {
 		int err = do_mount_root(name, p, flags, root_mount_data);
 		switch (err) {
 			case 0:
@@ -442,7 +437,7 @@ retry:
 	printk("List of all partitions:\n");
 	printk_all_partitions();
 	printk("No filesystem could mount root, tried: ");
-	for (p = fs_names; *p; p += strlen(p)+1)
+	for (i = 0, p = fs_names; i < num_fs; i++, p += strlen(p)+1)
 		printk(" %s", p);
 	printk("\n");
 	panic("VFS: Unable to mount root fs on %s", b);
@@ -526,6 +521,47 @@ static int __init mount_cifs_root(void)
 }
 #endif
 
+static bool __init fs_is_nodev(char *fstype)
+{
+	struct file_system_type *fs = get_fs_type(fstype);
+	bool ret = false;
+
+	if (fs) {
+		ret = !(fs->fs_flags & FS_REQUIRES_DEV);
+		put_filesystem(fs);
+	}
+
+	return ret;
+}
+
+static int __init mount_nodev_root(void)
+{
+	char *fs_names, *fstype;
+	int err = -EINVAL;
+	int num_fs, i;
+
+	fs_names = (void *)__get_free_page(GFP_KERNEL);
+	if (!fs_names)
+		return -EINVAL;
+	num_fs = split_fs_names(fs_names, root_fs_names);
+
+	for (i = 0, fstype = fs_names; i < num_fs;
+	     i++, fstype += strlen(fstype) + 1) {
+		if (!fs_is_nodev(fstype))
+			continue;
+		err = do_mount_root(root_device_name, fstype, root_mountflags,
+				    root_mount_data);
+		if (!err)
+			break;
+		if (err != -EACCES && err != -EINVAL)
+			panic("VFS: Unable to mount root \"%s\" (%s), err=%d\n",
+			      root_device_name, fstype, err);
+	}
+
+	free_page((unsigned long)fs_names);
+	return err;
+}
+
 void __init mount_root(void)
 {
 #ifdef CONFIG_ROOT_NFS
@@ -542,6 +578,10 @@ void __init mount_root(void)
 		return;
 	}
 #endif
+	if (ROOT_DEV == 0 && root_device_name && root_fs_names) {
+		if (mount_nodev_root() == 0)
+			return;
+	}
 #ifdef CONFIG_BLOCK
 	{
 		int err = create_dev("/dev/root", ROOT_DEV);
diff --git a/init/initramfs.c b/init/initramfs.c
index af27abc59643..a842c0544745 100644
--- a/init/initramfs.c
+++ b/init/initramfs.c
@@ -15,6 +15,7 @@
 #include <linux/mm.h>
 #include <linux/namei.h>
 #include <linux/init_syscalls.h>
+#include <linux/umh.h>
 
 static ssize_t __init xwrite(struct file *file, const char *p, size_t count,
 		loff_t *pos)
@@ -727,6 +728,7 @@ static int __init populate_rootfs(void)
 {
 	initramfs_cookie = async_schedule_domain(do_populate_rootfs, NULL,
 						 &initramfs_domain);
+	usermodehelper_enable();
 	if (!initramfs_async)
 		wait_for_initramfs();
 	return 0;
diff --git a/init/main.c b/init/main.c
index daad6979f782..5c9a48df90e1 100644
--- a/init/main.c
+++ b/init/main.c
@@ -153,10 +153,10 @@ static char *extra_init_args;
 #ifdef CONFIG_BOOT_CONFIG
 /* Is bootconfig on command line? */
 static bool bootconfig_found;
-static bool initargs_found;
+static size_t initargs_offs;
 #else
 # define bootconfig_found false
-# define initargs_found false
+# define initargs_offs 0
 #endif
 
 static char *execute_command;
@@ -422,9 +422,9 @@ static void __init setup_boot_config(void)
 	if (IS_ERR(err) || !bootconfig_found)
 		return;
 
-	/* parse_args() stops at '--' and returns an address */
+	/* parse_args() stops at the next param of '--' and returns an address */
 	if (err)
-		initargs_found = true;
+		initargs_offs = err - tmp_cmdline;
 
 	if (!data) {
 		pr_err("'bootconfig' found on command line, but no bootconfig found\n");
@@ -468,7 +468,12 @@ static void __init setup_boot_config(void)
 	return;
 }
 
-#else
+static void __init exit_boot_config(void)
+{
+	xbc_destroy_all();
+}
+
+#else	/* !CONFIG_BOOT_CONFIG */
 
 static void __init setup_boot_config(void)
 {
@@ -481,7 +486,11 @@ static int __init warn_bootconfig(char *str)
 	pr_warn("WARNING: 'bootconfig' found on the kernel command line but CONFIG_BOOT_CONFIG is not set.\n");
 	return 0;
 }
-#endif
+
+#define exit_boot_config()	do {} while (0)
+
+#endif	/* CONFIG_BOOT_CONFIG */
+
 early_param("bootconfig", warn_bootconfig);
 
 /* Change NUL term back to "=", to make "param" the whole string. */
@@ -646,16 +655,21 @@ static void __init setup_command_line(char *command_line)
 		 * Append supplemental init boot args to saved_command_line
 		 * so that user can check what command line options passed
 		 * to init.
+		 * The order should always be
+		 * " -- "[bootconfig init-param][cmdline init-param]
 		 */
-		len = strlen(saved_command_line);
-		if (initargs_found) {
-			saved_command_line[len++] = ' ';
+		if (initargs_offs) {
+			len = xlen + initargs_offs;
+			strcpy(saved_command_line + len, extra_init_args);
+			len += ilen - 4;	/* strlen(extra_init_args) */
+			strcpy(saved_command_line + len,
+				boot_command_line + initargs_offs - 1);
 		} else {
+			len = strlen(saved_command_line);
 			strcpy(saved_command_line + len, " -- ");
 			len += 4;
+			strcpy(saved_command_line + len, extra_init_args);
 		}
-
-		strcpy(saved_command_line + len, extra_init_args);
 	}
 }
 
@@ -777,6 +791,8 @@ void __init __weak poking_init(void) { }
 
 void __init __weak pgtable_cache_init(void) { }
 
+void __init __weak trap_init(void) { }
+
 bool initcall_debug;
 core_param(initcall_debug, initcall_debug, bool, 0644);
 
@@ -1392,7 +1408,6 @@ static void __init do_basic_setup(void)
 	driver_init();
 	init_irq_proc();
 	do_ctors();
-	usermodehelper_enable();
 	do_initcalls();
 }
 
@@ -1493,6 +1508,7 @@ static int __ref kernel_init(void *unused)
 	kprobe_free_init_mem();
 	ftrace_free_init_mem();
 	kgdb_free_init_mem();
+	exit_boot_config();
 	free_initmem();
 	mark_readonly();
 
diff --git a/init/noinitramfs.c b/init/noinitramfs.c
index 3d62b07f3bb9..d1d26b93d25c 100644
--- a/init/noinitramfs.c
+++ b/init/noinitramfs.c
@@ -10,6 +10,7 @@
 #include <linux/kdev_t.h>
 #include <linux/syscalls.h>
 #include <linux/init_syscalls.h>
+#include <linux/umh.h>
 
 /*
  * Create a simple rootfs that is similar to the default initramfs
@@ -18,6 +19,7 @@ static int __init default_rootfs(void)
 {
 	int err;
 
+	usermodehelper_enable();
 	err = init_mkdir("/dev", 0755);
 	if (err < 0)
 		goto out;
diff --git a/ipc/msg.c b/ipc/msg.c
index 6810276d6bb9..a0d05775af2c 100644
--- a/ipc/msg.c
+++ b/ipc/msg.c
@@ -147,7 +147,7 @@ static int newque(struct ipc_namespace *ns, struct ipc_params *params)
 	key_t key = params->key;
 	int msgflg = params->flg;
 
-	msq = kmalloc(sizeof(*msq), GFP_KERNEL);
+	msq = kmalloc(sizeof(*msq), GFP_KERNEL_ACCOUNT);
 	if (unlikely(!msq))
 		return -ENOMEM;
 
diff --git a/ipc/namespace.c b/ipc/namespace.c
index 7bd0766ddc3b..ae83f0f2651b 100644
--- a/ipc/namespace.c
+++ b/ipc/namespace.c
@@ -42,7 +42,7 @@ static struct ipc_namespace *create_ipc_ns(struct user_namespace *user_ns,
 		goto fail;
 
 	err = -ENOMEM;
-	ns = kzalloc(sizeof(struct ipc_namespace), GFP_KERNEL);
+	ns = kzalloc(sizeof(struct ipc_namespace), GFP_KERNEL_ACCOUNT);
 	if (ns == NULL)
 		goto fail_dec;
 
diff --git a/ipc/sem.c b/ipc/sem.c
index 971e75d28364..f833238df1ce 100644
--- a/ipc/sem.c
+++ b/ipc/sem.c
@@ -514,7 +514,7 @@ static struct sem_array *sem_alloc(size_t nsems)
 	if (nsems > (INT_MAX - sizeof(*sma)) / sizeof(sma->sems[0]))
 		return NULL;
 
-	sma = kvzalloc(struct_size(sma, sems, nsems), GFP_KERNEL);
+	sma = kvzalloc(struct_size(sma, sems, nsems), GFP_KERNEL_ACCOUNT);
 	if (unlikely(!sma))
 		return NULL;
 
@@ -1855,7 +1855,7 @@ static inline int get_undo_list(struct sem_undo_list **undo_listp)
 
 	undo_list = current->sysvsem.undo_list;
 	if (!undo_list) {
-		undo_list = kzalloc(sizeof(*undo_list), GFP_KERNEL);
+		undo_list = kzalloc(sizeof(*undo_list), GFP_KERNEL_ACCOUNT);
 		if (undo_list == NULL)
 			return -ENOMEM;
 		spin_lock_init(&undo_list->lock);
@@ -1941,7 +1941,7 @@ static struct sem_undo *find_alloc_undo(struct ipc_namespace *ns, int semid)
 
 	/* step 2: allocate new undo structure */
 	new = kvzalloc(sizeof(struct sem_undo) + sizeof(short)*nsems,
-		       GFP_KERNEL);
+		       GFP_KERNEL_ACCOUNT);
 	if (!new) {
 		ipc_rcu_putref(&sma->sem_perm, sem_rcu_free);
 		return ERR_PTR(-ENOMEM);
@@ -1984,46 +1984,34 @@ out:
 	return un;
 }
 
-static long do_semtimedop(int semid, struct sembuf __user *tsops,
-		unsigned nsops, const struct timespec64 *timeout)
+long __do_semtimedop(int semid, struct sembuf *sops,
+		unsigned nsops, const struct timespec64 *timeout,
+		struct ipc_namespace *ns)
 {
 	int error = -EINVAL;
 	struct sem_array *sma;
-	struct sembuf fast_sops[SEMOPM_FAST];
-	struct sembuf *sops = fast_sops, *sop;
+	struct sembuf *sop;
 	struct sem_undo *un;
 	int max, locknum;
 	bool undos = false, alter = false, dupsop = false;
 	struct sem_queue queue;
 	unsigned long dup = 0, jiffies_left = 0;
-	struct ipc_namespace *ns;
-
-	ns = current->nsproxy->ipc_ns;
 
 	if (nsops < 1 || semid < 0)
 		return -EINVAL;
 	if (nsops > ns->sc_semopm)
 		return -E2BIG;
-	if (nsops > SEMOPM_FAST) {
-		sops = kvmalloc_array(nsops, sizeof(*sops), GFP_KERNEL);
-		if (sops == NULL)
-			return -ENOMEM;
-	}
-
-	if (copy_from_user(sops, tsops, nsops * sizeof(*tsops))) {
-		error =  -EFAULT;
-		goto out_free;
-	}
 
 	if (timeout) {
 		if (timeout->tv_sec < 0 || timeout->tv_nsec < 0 ||
 			timeout->tv_nsec >= 1000000000L) {
 			error = -EINVAL;
-			goto out_free;
+			goto out;
 		}
 		jiffies_left = timespec64_to_jiffies(timeout);
 	}
 
+
 	max = 0;
 	for (sop = sops; sop < sops + nsops; sop++) {
 		unsigned long mask = 1ULL << ((sop->sem_num) % BITS_PER_LONG);
@@ -2052,7 +2040,7 @@ static long do_semtimedop(int semid, struct sembuf __user *tsops,
 		un = find_alloc_undo(ns, semid);
 		if (IS_ERR(un)) {
 			error = PTR_ERR(un);
-			goto out_free;
+			goto out;
 		}
 	} else {
 		un = NULL;
@@ -2063,25 +2051,25 @@ static long do_semtimedop(int semid, struct sembuf __user *tsops,
 	if (IS_ERR(sma)) {
 		rcu_read_unlock();
 		error = PTR_ERR(sma);
-		goto out_free;
+		goto out;
 	}
 
 	error = -EFBIG;
 	if (max >= sma->sem_nsems) {
 		rcu_read_unlock();
-		goto out_free;
+		goto out;
 	}
 
 	error = -EACCES;
 	if (ipcperms(ns, &sma->sem_perm, alter ? S_IWUGO : S_IRUGO)) {
 		rcu_read_unlock();
-		goto out_free;
+		goto out;
 	}
 
 	error = security_sem_semop(&sma->sem_perm, sops, nsops, alter);
 	if (error) {
 		rcu_read_unlock();
-		goto out_free;
+		goto out;
 	}
 
 	error = -EIDRM;
@@ -2095,7 +2083,7 @@ static long do_semtimedop(int semid, struct sembuf __user *tsops,
 	 * entangled here and why it's RMID race safe on comments at sem_lock()
 	 */
 	if (!ipc_valid_object(&sma->sem_perm))
-		goto out_unlock_free;
+		goto out_unlock;
 	/*
 	 * semid identifiers are not unique - find_alloc_undo may have
 	 * allocated an undo structure, it was invalidated by an RMID
@@ -2104,7 +2092,7 @@ static long do_semtimedop(int semid, struct sembuf __user *tsops,
 	 * "un" itself is guaranteed by rcu.
 	 */
 	if (un && un->semid == -1)
-		goto out_unlock_free;
+		goto out_unlock;
 
 	queue.sops = sops;
 	queue.nsops = nsops;
@@ -2130,10 +2118,10 @@ static long do_semtimedop(int semid, struct sembuf __user *tsops,
 		rcu_read_unlock();
 		wake_up_q(&wake_q);
 
-		goto out_free;
+		goto out;
 	}
 	if (error < 0) /* non-blocking error path */
-		goto out_unlock_free;
+		goto out_unlock;
 
 	/*
 	 * We need to sleep on this operation, so we put the current
@@ -2198,14 +2186,14 @@ static long do_semtimedop(int semid, struct sembuf __user *tsops,
 		if (error != -EINTR) {
 			/* see SEM_BARRIER_2 for purpose/pairing */
 			smp_acquire__after_ctrl_dep();
-			goto out_free;
+			goto out;
 		}
 
 		rcu_read_lock();
 		locknum = sem_lock(sma, sops, nsops);
 
 		if (!ipc_valid_object(&sma->sem_perm))
-			goto out_unlock_free;
+			goto out_unlock;
 
 		/*
 		 * No necessity for any barrier: We are protect by sem_lock()
@@ -2217,7 +2205,7 @@ static long do_semtimedop(int semid, struct sembuf __user *tsops,
 		 * Leave without unlink_queue(), but with sem_unlock().
 		 */
 		if (error != -EINTR)
-			goto out_unlock_free;
+			goto out_unlock;
 
 		/*
 		 * If an interrupt occurred we have to clean up the queue.
@@ -2228,13 +2216,45 @@ static long do_semtimedop(int semid, struct sembuf __user *tsops,
 
 	unlink_queue(sma, &queue);
 
-out_unlock_free:
+out_unlock:
 	sem_unlock(sma, locknum);
 	rcu_read_unlock();
+out:
+	return error;
+}
+
+static long do_semtimedop(int semid, struct sembuf __user *tsops,
+		unsigned nsops, const struct timespec64 *timeout)
+{
+	struct sembuf fast_sops[SEMOPM_FAST];
+	struct sembuf *sops = fast_sops;
+	struct ipc_namespace *ns;
+	int ret;
+
+	ns = current->nsproxy->ipc_ns;
+	if (nsops > ns->sc_semopm)
+		return -E2BIG;
+	if (nsops < 1)
+		return -EINVAL;
+
+	if (nsops > SEMOPM_FAST) {
+		sops = kvmalloc_array(nsops, sizeof(*sops), GFP_KERNEL_ACCOUNT);
+		if (sops == NULL)
+			return -ENOMEM;
+	}
+
+	if (copy_from_user(sops, tsops, nsops * sizeof(*tsops))) {
+		ret =  -EFAULT;
+		goto out_free;
+	}
+
+	ret = __do_semtimedop(semid, sops, nsops, timeout, ns);
+
 out_free:
 	if (sops != fast_sops)
 		kvfree(sops);
-	return error;
+
+	return ret;
 }
 
 long ksys_semtimedop(int semid, struct sembuf __user *tsops,
diff --git a/ipc/shm.c b/ipc/shm.c
index 748933e376ca..ab749be6d8b7 100644
--- a/ipc/shm.c
+++ b/ipc/shm.c
@@ -619,7 +619,7 @@ static int newseg(struct ipc_namespace *ns, struct ipc_params *params)
 			ns->shm_tot + numpages > ns->shm_ctlall)
 		return -ENOSPC;
 
-	shp = kmalloc(sizeof(*shp), GFP_KERNEL);
+	shp = kmalloc(sizeof(*shp), GFP_KERNEL_ACCOUNT);
 	if (unlikely(!shp))
 		return -ENOMEM;
 
diff --git a/ipc/util.c b/ipc/util.c
index 0027e47626b7..d48d8cfa1f3f 100644
--- a/ipc/util.c
+++ b/ipc/util.c
@@ -788,21 +788,13 @@ struct pid_namespace *ipc_seq_pid_ns(struct seq_file *s)
 static struct kern_ipc_perm *sysvipc_find_ipc(struct ipc_ids *ids, loff_t pos,
 					      loff_t *new_pos)
 {
-	struct kern_ipc_perm *ipc;
-	int total, id;
-
-	total = 0;
-	for (id = 0; id < pos && total < ids->in_use; id++) {
-		ipc = idr_find(&ids->ipcs_idr, id);
-		if (ipc != NULL)
-			total++;
-	}
+	struct kern_ipc_perm *ipc = NULL;
+	int max_idx = ipc_get_maxidx(ids);
 
-	ipc = NULL;
-	if (total >= ids->in_use)
+	if (max_idx == -1 || pos > max_idx)
 		goto out;
 
-	for (; pos < ipc_mni; pos++) {
+	for (; pos <= max_idx; pos++) {
 		ipc = idr_find(&ids->ipcs_idr, pos);
 		if (ipc != NULL) {
 			rcu_read_lock();
diff --git a/kernel/acct.c b/kernel/acct.c
index a64102be2bb0..23a7ab8e6cbc 100644
--- a/kernel/acct.c
+++ b/kernel/acct.c
@@ -478,7 +478,7 @@ static void do_acct_process(struct bsd_acct_struct *acct)
 	/*
 	 * Accounting records are not subject to resource limits.
 	 */
-	flim = current->signal->rlim[RLIMIT_FSIZE].rlim_cur;
+	flim = rlimit(RLIMIT_FSIZE);
 	current->signal->rlim[RLIMIT_FSIZE].rlim_cur = RLIM_INFINITY;
 	/* Perform file operations on behalf of whoever enabled accounting */
 	orig_cred = override_creds(file->f_cred);
diff --git a/kernel/cgroup/namespace.c b/kernel/cgroup/namespace.c
index f5e8828c109c..0d5c29879a50 100644
--- a/kernel/cgroup/namespace.c
+++ b/kernel/cgroup/namespace.c
@@ -24,7 +24,7 @@ static struct cgroup_namespace *alloc_cgroup_ns(void)
 	struct cgroup_namespace *new_ns;
 	int ret;
 
-	new_ns = kzalloc(sizeof(struct cgroup_namespace), GFP_KERNEL);
+	new_ns = kzalloc(sizeof(struct cgroup_namespace), GFP_KERNEL_ACCOUNT);
 	if (!new_ns)
 		return ERR_PTR(-ENOMEM);
 	ret = ns_alloc_inum(&new_ns->ns);
diff --git a/kernel/compat.c b/kernel/compat.c
index 05adfd6fa8bf..55551989d9da 100644
--- a/kernel/compat.c
+++ b/kernel/compat.c
@@ -269,24 +269,3 @@ get_compat_sigset(sigset_t *set, const compat_sigset_t __user *compat)
 	return 0;
 }
 EXPORT_SYMBOL_GPL(get_compat_sigset);
-
-/*
- * Allocate user-space memory for the duration of a single system call,
- * in order to marshall parameters inside a compat thunk.
- */
-void __user *compat_alloc_user_space(unsigned long len)
-{
-	void __user *ptr;
-
-	/* If len would occupy more than half of the entire compat space... */
-	if (unlikely(len > (((compat_uptr_t)~0) >> 1)))
-		return NULL;
-
-	ptr = arch_compat_alloc_user_space(len);
-
-	if (unlikely(!access_ok(ptr, len)))
-		return NULL;
-
-	return ptr;
-}
-EXPORT_SYMBOL_GPL(compat_alloc_user_space);
diff --git a/kernel/debug/debug_core.c b/kernel/debug/debug_core.c
index b4aa6bb6b2bd..da06a5553835 100644
--- a/kernel/debug/debug_core.c
+++ b/kernel/debug/debug_core.c
@@ -1,3 +1,4 @@
+// SPDX-License-Identifier: GPL-2.0-only
 /*
  * Kernel Debug Core
  *
@@ -22,10 +23,6 @@
  *
  * Original KGDB stub: David Grothe <dave@gcom.com>,
  * Tigran Aivazian <tigran@sco.com>
- *
- * This file is licensed under the terms of the GNU General Public License
- * version 2. This program is licensed "as is" without any warranty of any
- * kind, whether express or implied.
  */
 
 #define pr_fmt(fmt) "KGDB: " fmt
diff --git a/kernel/debug/gdbstub.c b/kernel/debug/gdbstub.c
index b6f28fad4307..9d34d2364b5a 100644
--- a/kernel/debug/gdbstub.c
+++ b/kernel/debug/gdbstub.c
@@ -1,3 +1,4 @@
+// SPDX-License-Identifier: GPL-2.0-only
 /*
  * Kernel Debug Core
  *
@@ -22,10 +23,6 @@
  *
  * Original KGDB stub: David Grothe <dave@gcom.com>,
  * Tigran Aivazian <tigran@sco.com>
- *
- * This file is licensed under the terms of the GNU General Public License
- * version 2. This program is licensed "as is" without any warranty of any
- * kind, whether express or implied.
  */
 
 #include <linux/kernel.h>
diff --git a/kernel/debug/kdb/kdb_bp.c b/kernel/debug/kdb/kdb_bp.c
index 2168f8dacb99..372025cf1ca3 100644
--- a/kernel/debug/kdb/kdb_bp.c
+++ b/kernel/debug/kdb/kdb_bp.c
@@ -523,51 +523,51 @@ static int kdb_ss(int argc, const char **argv)
 }
 
 static kdbtab_t bptab[] = {
-	{	.cmd_name = "bp",
-		.cmd_func = kdb_bp,
-		.cmd_usage = "[<vaddr>]",
-		.cmd_help = "Set/Display breakpoints",
-		.cmd_flags = KDB_ENABLE_FLOW_CTRL | KDB_REPEAT_NO_ARGS,
+	{	.name = "bp",
+		.func = kdb_bp,
+		.usage = "[<vaddr>]",
+		.help = "Set/Display breakpoints",
+		.flags = KDB_ENABLE_FLOW_CTRL | KDB_REPEAT_NO_ARGS,
 	},
-	{	.cmd_name = "bl",
-		.cmd_func = kdb_bp,
-		.cmd_usage = "[<vaddr>]",
-		.cmd_help = "Display breakpoints",
-		.cmd_flags = KDB_ENABLE_FLOW_CTRL | KDB_REPEAT_NO_ARGS,
+	{	.name = "bl",
+		.func = kdb_bp,
+		.usage = "[<vaddr>]",
+		.help = "Display breakpoints",
+		.flags = KDB_ENABLE_FLOW_CTRL | KDB_REPEAT_NO_ARGS,
 	},
-	{	.cmd_name = "bc",
-		.cmd_func = kdb_bc,
-		.cmd_usage = "<bpnum>",
-		.cmd_help = "Clear Breakpoint",
-		.cmd_flags = KDB_ENABLE_FLOW_CTRL,
+	{	.name = "bc",
+		.func = kdb_bc,
+		.usage = "<bpnum>",
+		.help = "Clear Breakpoint",
+		.flags = KDB_ENABLE_FLOW_CTRL,
 	},
-	{	.cmd_name = "be",
-		.cmd_func = kdb_bc,
-		.cmd_usage = "<bpnum>",
-		.cmd_help = "Enable Breakpoint",
-		.cmd_flags = KDB_ENABLE_FLOW_CTRL,
+	{	.name = "be",
+		.func = kdb_bc,
+		.usage = "<bpnum>",
+		.help = "Enable Breakpoint",
+		.flags = KDB_ENABLE_FLOW_CTRL,
 	},
-	{	.cmd_name = "bd",
-		.cmd_func = kdb_bc,
-		.cmd_usage = "<bpnum>",
-		.cmd_help = "Disable Breakpoint",
-		.cmd_flags = KDB_ENABLE_FLOW_CTRL,
+	{	.name = "bd",
+		.func = kdb_bc,
+		.usage = "<bpnum>",
+		.help = "Disable Breakpoint",
+		.flags = KDB_ENABLE_FLOW_CTRL,
 	},
-	{	.cmd_name = "ss",
-		.cmd_func = kdb_ss,
-		.cmd_usage = "",
-		.cmd_help = "Single Step",
-		.cmd_minlen = 1,
-		.cmd_flags = KDB_ENABLE_FLOW_CTRL | KDB_REPEAT_NO_ARGS,
+	{	.name = "ss",
+		.func = kdb_ss,
+		.usage = "",
+		.help = "Single Step",
+		.minlen = 1,
+		.flags = KDB_ENABLE_FLOW_CTRL | KDB_REPEAT_NO_ARGS,
 	},
 };
 
 static kdbtab_t bphcmd = {
-	.cmd_name = "bph",
-	.cmd_func = kdb_bp,
-	.cmd_usage = "[<vaddr>]",
-	.cmd_help = "[datar [length]|dataw [length]]   Set hw brk",
-	.cmd_flags = KDB_ENABLE_FLOW_CTRL | KDB_REPEAT_NO_ARGS,
+	.name = "bph",
+	.func = kdb_bp,
+	.usage = "[<vaddr>]",
+	.help = "[datar [length]|dataw [length]]   Set hw brk",
+	.flags = KDB_ENABLE_FLOW_CTRL | KDB_REPEAT_NO_ARGS,
 };
 
 /* Initialize the breakpoint table and register	breakpoint commands. */
diff --git a/kernel/debug/kdb/kdb_debugger.c b/kernel/debug/kdb/kdb_debugger.c
index 0220afda3200..e91fc3e4edd5 100644
--- a/kernel/debug/kdb/kdb_debugger.c
+++ b/kernel/debug/kdb/kdb_debugger.c
@@ -140,7 +140,6 @@ int kdb_stub(struct kgdb_state *ks)
 	 */
 	kdb_common_deinit_state();
 	KDB_STATE_CLEAR(PAGER);
-	kdbnearsym_cleanup();
 	if (error == KDB_CMD_KGDB) {
 		if (KDB_STATE(DOING_KGDB))
 			KDB_STATE_CLEAR(DOING_KGDB);
diff --git a/kernel/debug/kdb/kdb_main.c b/kernel/debug/kdb/kdb_main.c
index d8ee5647b732..fa6deda894a1 100644
--- a/kernel/debug/kdb/kdb_main.c
+++ b/kernel/debug/kdb/kdb_main.c
@@ -33,7 +33,6 @@
 #include <linux/kallsyms.h>
 #include <linux/kgdb.h>
 #include <linux/kdb.h>
-#include <linux/list.h>
 #include <linux/notifier.h>
 #include <linux/interrupt.h>
 #include <linux/delay.h>
@@ -654,16 +653,17 @@ static void kdb_cmderror(int diag)
  * Returns:
  *	zero for success, a kdb diagnostic if error
  */
-struct defcmd_set {
-	int count;
-	bool usable;
-	char *name;
-	char *usage;
-	char *help;
-	char **command;
+struct kdb_macro {
+	kdbtab_t cmd;			/* Macro command */
+	struct list_head statements;	/* Associated statement list */
 };
-static struct defcmd_set *defcmd_set;
-static int defcmd_set_count;
+
+struct kdb_macro_statement {
+	char *statement;		/* Statement text */
+	struct list_head list_node;	/* Statement list node */
+};
+
+static struct kdb_macro *kdb_macro;
 static bool defcmd_in_progress;
 
 /* Forward references */
@@ -671,53 +671,55 @@ static int kdb_exec_defcmd(int argc, const char **argv);
 
 static int kdb_defcmd2(const char *cmdstr, const char *argv0)
 {
-	struct defcmd_set *s = defcmd_set + defcmd_set_count - 1;
-	char **save_command = s->command;
+	struct kdb_macro_statement *kms;
+
+	if (!kdb_macro)
+		return KDB_NOTIMP;
+
 	if (strcmp(argv0, "endefcmd") == 0) {
 		defcmd_in_progress = false;
-		if (!s->count)
-			s->usable = false;
-		if (s->usable)
-			/* macros are always safe because when executed each
-			 * internal command re-enters kdb_parse() and is
-			 * safety checked individually.
-			 */
-			kdb_register_flags(s->name, kdb_exec_defcmd, s->usage,
-					   s->help, 0,
-					   KDB_ENABLE_ALWAYS_SAFE);
+		if (!list_empty(&kdb_macro->statements))
+			kdb_register(&kdb_macro->cmd);
 		return 0;
 	}
-	if (!s->usable)
-		return KDB_NOTIMP;
-	s->command = kcalloc(s->count + 1, sizeof(*(s->command)), GFP_KDB);
-	if (!s->command) {
-		kdb_printf("Could not allocate new kdb_defcmd table for %s\n",
+
+	kms = kmalloc(sizeof(*kms), GFP_KDB);
+	if (!kms) {
+		kdb_printf("Could not allocate new kdb macro command: %s\n",
 			   cmdstr);
-		s->usable = false;
 		return KDB_NOTIMP;
 	}
-	memcpy(s->command, save_command, s->count * sizeof(*(s->command)));
-	s->command[s->count++] = kdb_strdup(cmdstr, GFP_KDB);
-	kfree(save_command);
+
+	kms->statement = kdb_strdup(cmdstr, GFP_KDB);
+	list_add_tail(&kms->list_node, &kdb_macro->statements);
+
 	return 0;
 }
 
 static int kdb_defcmd(int argc, const char **argv)
 {
-	struct defcmd_set *save_defcmd_set = defcmd_set, *s;
+	kdbtab_t *mp;
+
 	if (defcmd_in_progress) {
 		kdb_printf("kdb: nested defcmd detected, assuming missing "
 			   "endefcmd\n");
 		kdb_defcmd2("endefcmd", "endefcmd");
 	}
 	if (argc == 0) {
-		int i;
-		for (s = defcmd_set; s < defcmd_set + defcmd_set_count; ++s) {
-			kdb_printf("defcmd %s \"%s\" \"%s\"\n", s->name,
-				   s->usage, s->help);
-			for (i = 0; i < s->count; ++i)
-				kdb_printf("%s", s->command[i]);
-			kdb_printf("endefcmd\n");
+		kdbtab_t *kp;
+		struct kdb_macro *kmp;
+		struct kdb_macro_statement *kms;
+
+		list_for_each_entry(kp, &kdb_cmds_head, list_node) {
+			if (kp->func == kdb_exec_defcmd) {
+				kdb_printf("defcmd %s \"%s\" \"%s\"\n",
+					   kp->name, kp->usage, kp->help);
+				kmp = container_of(kp, struct kdb_macro, cmd);
+				list_for_each_entry(kms, &kmp->statements,
+						    list_node)
+					kdb_printf("%s", kms->statement);
+				kdb_printf("endefcmd\n");
+			}
 		}
 		return 0;
 	}
@@ -727,45 +729,43 @@ static int kdb_defcmd(int argc, const char **argv)
 		kdb_printf("Command only available during kdb_init()\n");
 		return KDB_NOTIMP;
 	}
-	defcmd_set = kmalloc_array(defcmd_set_count + 1, sizeof(*defcmd_set),
-				   GFP_KDB);
-	if (!defcmd_set)
+	kdb_macro = kzalloc(sizeof(*kdb_macro), GFP_KDB);
+	if (!kdb_macro)
 		goto fail_defcmd;
-	memcpy(defcmd_set, save_defcmd_set,
-	       defcmd_set_count * sizeof(*defcmd_set));
-	s = defcmd_set + defcmd_set_count;
-	memset(s, 0, sizeof(*s));
-	s->usable = true;
-	s->name = kdb_strdup(argv[1], GFP_KDB);
-	if (!s->name)
+
+	mp = &kdb_macro->cmd;
+	mp->func = kdb_exec_defcmd;
+	mp->minlen = 0;
+	mp->flags = KDB_ENABLE_ALWAYS_SAFE;
+	mp->name = kdb_strdup(argv[1], GFP_KDB);
+	if (!mp->name)
 		goto fail_name;
-	s->usage = kdb_strdup(argv[2], GFP_KDB);
-	if (!s->usage)
+	mp->usage = kdb_strdup(argv[2], GFP_KDB);
+	if (!mp->usage)
 		goto fail_usage;
-	s->help = kdb_strdup(argv[3], GFP_KDB);
-	if (!s->help)
+	mp->help = kdb_strdup(argv[3], GFP_KDB);
+	if (!mp->help)
 		goto fail_help;
-	if (s->usage[0] == '"') {
-		strcpy(s->usage, argv[2]+1);
-		s->usage[strlen(s->usage)-1] = '\0';
+	if (mp->usage[0] == '"') {
+		strcpy(mp->usage, argv[2]+1);
+		mp->usage[strlen(mp->usage)-1] = '\0';
 	}
-	if (s->help[0] == '"') {
-		strcpy(s->help, argv[3]+1);
-		s->help[strlen(s->help)-1] = '\0';
+	if (mp->help[0] == '"') {
+		strcpy(mp->help, argv[3]+1);
+		mp->help[strlen(mp->help)-1] = '\0';
 	}
-	++defcmd_set_count;
+
+	INIT_LIST_HEAD(&kdb_macro->statements);
 	defcmd_in_progress = true;
-	kfree(save_defcmd_set);
 	return 0;
 fail_help:
-	kfree(s->usage);
+	kfree(mp->usage);
 fail_usage:
-	kfree(s->name);
+	kfree(mp->name);
 fail_name:
-	kfree(defcmd_set);
+	kfree(kdb_macro);
 fail_defcmd:
-	kdb_printf("Could not allocate new defcmd_set entry for %s\n", argv[1]);
-	defcmd_set = save_defcmd_set;
+	kdb_printf("Could not allocate new kdb_macro entry for %s\n", argv[1]);
 	return KDB_NOTIMP;
 }
 
@@ -780,25 +780,31 @@ fail_defcmd:
  */
 static int kdb_exec_defcmd(int argc, const char **argv)
 {
-	int i, ret;
-	struct defcmd_set *s;
+	int ret;
+	kdbtab_t *kp;
+	struct kdb_macro *kmp;
+	struct kdb_macro_statement *kms;
+
 	if (argc != 0)
 		return KDB_ARGCOUNT;
-	for (s = defcmd_set, i = 0; i < defcmd_set_count; ++i, ++s) {
-		if (strcmp(s->name, argv[0]) == 0)
+
+	list_for_each_entry(kp, &kdb_cmds_head, list_node) {
+		if (strcmp(kp->name, argv[0]) == 0)
 			break;
 	}
-	if (i == defcmd_set_count) {
+	if (list_entry_is_head(kp, &kdb_cmds_head, list_node)) {
 		kdb_printf("kdb_exec_defcmd: could not find commands for %s\n",
 			   argv[0]);
 		return KDB_NOTIMP;
 	}
-	for (i = 0; i < s->count; ++i) {
-		/* Recursive use of kdb_parse, do not use argv after
-		 * this point */
+	kmp = container_of(kp, struct kdb_macro, cmd);
+	list_for_each_entry(kms, &kmp->statements, list_node) {
+		/*
+		 * Recursive use of kdb_parse, do not use argv after this point.
+		 */
 		argv = NULL;
-		kdb_printf("[%s]kdb> %s\n", s->name, s->command[i]);
-		ret = kdb_parse(s->command[i]);
+		kdb_printf("[%s]kdb> %s\n", kmp->cmd.name, kms->statement);
+		ret = kdb_parse(kms->statement);
 		if (ret)
 			return ret;
 	}
@@ -1009,11 +1015,11 @@ int kdb_parse(const char *cmdstr)
 		 * If this command is allowed to be abbreviated,
 		 * check to see if this is it.
 		 */
-		if (tp->cmd_minlen && (strlen(argv[0]) <= tp->cmd_minlen) &&
-		    (strncmp(argv[0], tp->cmd_name, tp->cmd_minlen) == 0))
+		if (tp->minlen && (strlen(argv[0]) <= tp->minlen) &&
+		    (strncmp(argv[0], tp->name, tp->minlen) == 0))
 			break;
 
-		if (strcmp(argv[0], tp->cmd_name) == 0)
+		if (strcmp(argv[0], tp->name) == 0)
 			break;
 	}
 
@@ -1024,8 +1030,7 @@ int kdb_parse(const char *cmdstr)
 	 */
 	if (list_entry_is_head(tp, &kdb_cmds_head, list_node)) {
 		list_for_each_entry(tp, &kdb_cmds_head, list_node) {
-			if (strncmp(argv[0], tp->cmd_name,
-				    strlen(tp->cmd_name)) == 0)
+			if (strncmp(argv[0], tp->name, strlen(tp->name)) == 0)
 				break;
 		}
 	}
@@ -1033,19 +1038,19 @@ int kdb_parse(const char *cmdstr)
 	if (!list_entry_is_head(tp, &kdb_cmds_head, list_node)) {
 		int result;
 
-		if (!kdb_check_flags(tp->cmd_flags, kdb_cmd_enabled, argc <= 1))
+		if (!kdb_check_flags(tp->flags, kdb_cmd_enabled, argc <= 1))
 			return KDB_NOPERM;
 
 		KDB_STATE_SET(CMD);
-		result = (*tp->cmd_func)(argc-1, (const char **)argv);
+		result = (*tp->func)(argc-1, (const char **)argv);
 		if (result && ignore_errors && result > KDB_CMD_GO)
 			result = 0;
 		KDB_STATE_CLEAR(CMD);
 
-		if (tp->cmd_flags & KDB_REPEAT_WITH_ARGS)
+		if (tp->flags & KDB_REPEAT_WITH_ARGS)
 			return result;
 
-		argc = tp->cmd_flags & KDB_REPEAT_NO_ARGS ? 1 : 0;
+		argc = tp->flags & KDB_REPEAT_NO_ARGS ? 1 : 0;
 		if (argv[argc])
 			*(argv[argc]) = '\0';
 		return result;
@@ -2412,12 +2417,12 @@ static int kdb_help(int argc, const char **argv)
 		char *space = "";
 		if (KDB_FLAG(CMD_INTERRUPT))
 			return 0;
-		if (!kdb_check_flags(kt->cmd_flags, kdb_cmd_enabled, true))
+		if (!kdb_check_flags(kt->flags, kdb_cmd_enabled, true))
 			continue;
-		if (strlen(kt->cmd_usage) > 20)
+		if (strlen(kt->usage) > 20)
 			space = "\n                                    ";
-		kdb_printf("%-15.15s %-20s%s%s\n", kt->cmd_name,
-			   kt->cmd_usage, space, kt->cmd_help);
+		kdb_printf("%-15.15s %-20s%s%s\n", kt->name,
+			   kt->usage, space, kt->help);
 	}
 	return 0;
 }
@@ -2613,56 +2618,32 @@ static int kdb_grep_help(int argc, const char **argv)
 	return 0;
 }
 
-/*
- * kdb_register_flags - This function is used to register a kernel
- * 	debugger command.
- * Inputs:
- *	cmd	Command name
- *	func	Function to execute the command
- *	usage	A simple usage string showing arguments
- *	help	A simple help string describing command
- *	repeat	Does the command auto repeat on enter?
- * Returns:
- *	zero for success, one if a duplicate command.
+/**
+ * kdb_register() - This function is used to register a kernel debugger
+ *                  command.
+ * @cmd: pointer to kdb command
+ *
+ * Note that it's the job of the caller to keep the memory for the cmd
+ * allocated until unregister is called.
  */
-int kdb_register_flags(char *cmd,
-		       kdb_func_t func,
-		       char *usage,
-		       char *help,
-		       short minlen,
-		       kdb_cmdflags_t flags)
+int kdb_register(kdbtab_t *cmd)
 {
 	kdbtab_t *kp;
 
 	list_for_each_entry(kp, &kdb_cmds_head, list_node) {
-		if (strcmp(kp->cmd_name, cmd) == 0) {
-			kdb_printf("Duplicate kdb command registered: "
-				"%s, func %px help %s\n", cmd, func, help);
+		if (strcmp(kp->name, cmd->name) == 0) {
+			kdb_printf("Duplicate kdb cmd: %s, func %p help %s\n",
+				   cmd->name, cmd->func, cmd->help);
 			return 1;
 		}
 	}
 
-	kp = kmalloc(sizeof(*kp), GFP_KDB);
-	if (!kp) {
-		kdb_printf("Could not allocate new kdb_command table\n");
-		return 1;
-	}
-
-	kp->cmd_name   = cmd;
-	kp->cmd_func   = func;
-	kp->cmd_usage  = usage;
-	kp->cmd_help   = help;
-	kp->cmd_minlen = minlen;
-	kp->cmd_flags  = flags;
-	kp->is_dynamic = true;
-
-	list_add_tail(&kp->list_node, &kdb_cmds_head);
-
+	list_add_tail(&cmd->list_node, &kdb_cmds_head);
 	return 0;
 }
-EXPORT_SYMBOL_GPL(kdb_register_flags);
+EXPORT_SYMBOL_GPL(kdb_register);
 
-/*
+/**
  * kdb_register_table() - This function is used to register a kdb command
  *                        table.
  * @kp: pointer to kdb command table
@@ -2676,266 +2657,231 @@ void kdb_register_table(kdbtab_t *kp, size_t len)
 	}
 }
 
-/*
- * kdb_register - Compatibility register function for commands that do
- *	not need to specify a repeat state.  Equivalent to
- *	kdb_register_flags with flags set to 0.
- * Inputs:
- *	cmd	Command name
- *	func	Function to execute the command
- *	usage	A simple usage string showing arguments
- *	help	A simple help string describing command
- * Returns:
- *	zero for success, one if a duplicate command.
+/**
+ * kdb_unregister() - This function is used to unregister a kernel debugger
+ *                    command. It is generally called when a module which
+ *                    implements kdb command is unloaded.
+ * @cmd: pointer to kdb command
  */
-int kdb_register(char *cmd,
-	     kdb_func_t func,
-	     char *usage,
-	     char *help,
-	     short minlen)
+void kdb_unregister(kdbtab_t *cmd)
 {
-	return kdb_register_flags(cmd, func, usage, help, minlen, 0);
-}
-EXPORT_SYMBOL_GPL(kdb_register);
-
-/*
- * kdb_unregister - This function is used to unregister a kernel
- *	debugger command.  It is generally called when a module which
- *	implements kdb commands is unloaded.
- * Inputs:
- *	cmd	Command name
- * Returns:
- *	zero for success, one command not registered.
- */
-int kdb_unregister(char *cmd)
-{
-	kdbtab_t *kp;
-
-	/*
-	 *  find the command.
-	 */
-	list_for_each_entry(kp, &kdb_cmds_head, list_node) {
-		if (strcmp(kp->cmd_name, cmd) == 0) {
-			list_del(&kp->list_node);
-			if (kp->is_dynamic)
-				kfree(kp);
-			return 0;
-		}
-	}
-
-	/* Couldn't find it.  */
-	return 1;
+	list_del(&cmd->list_node);
 }
 EXPORT_SYMBOL_GPL(kdb_unregister);
 
 static kdbtab_t maintab[] = {
-	{	.cmd_name = "md",
-		.cmd_func = kdb_md,
-		.cmd_usage = "<vaddr>",
-		.cmd_help = "Display Memory Contents, also mdWcN, e.g. md8c1",
-		.cmd_minlen = 1,
-		.cmd_flags = KDB_ENABLE_MEM_READ | KDB_REPEAT_NO_ARGS,
+	{	.name = "md",
+		.func = kdb_md,
+		.usage = "<vaddr>",
+		.help = "Display Memory Contents, also mdWcN, e.g. md8c1",
+		.minlen = 1,
+		.flags = KDB_ENABLE_MEM_READ | KDB_REPEAT_NO_ARGS,
 	},
-	{	.cmd_name = "mdr",
-		.cmd_func = kdb_md,
-		.cmd_usage = "<vaddr> <bytes>",
-		.cmd_help = "Display Raw Memory",
-		.cmd_flags = KDB_ENABLE_MEM_READ | KDB_REPEAT_NO_ARGS,
+	{	.name = "mdr",
+		.func = kdb_md,
+		.usage = "<vaddr> <bytes>",
+		.help = "Display Raw Memory",
+		.flags = KDB_ENABLE_MEM_READ | KDB_REPEAT_NO_ARGS,
 	},
-	{	.cmd_name = "mdp",
-		.cmd_func = kdb_md,
-		.cmd_usage = "<paddr> <bytes>",
-		.cmd_help = "Display Physical Memory",
-		.cmd_flags = KDB_ENABLE_MEM_READ | KDB_REPEAT_NO_ARGS,
+	{	.name = "mdp",
+		.func = kdb_md,
+		.usage = "<paddr> <bytes>",
+		.help = "Display Physical Memory",
+		.flags = KDB_ENABLE_MEM_READ | KDB_REPEAT_NO_ARGS,
 	},
-	{	.cmd_name = "mds",
-		.cmd_func = kdb_md,
-		.cmd_usage = "<vaddr>",
-		.cmd_help = "Display Memory Symbolically",
-		.cmd_flags = KDB_ENABLE_MEM_READ | KDB_REPEAT_NO_ARGS,
+	{	.name = "mds",
+		.func = kdb_md,
+		.usage = "<vaddr>",
+		.help = "Display Memory Symbolically",
+		.flags = KDB_ENABLE_MEM_READ | KDB_REPEAT_NO_ARGS,
 	},
-	{	.cmd_name = "mm",
-		.cmd_func = kdb_mm,
-		.cmd_usage = "<vaddr> <contents>",
-		.cmd_help = "Modify Memory Contents",
-		.cmd_flags = KDB_ENABLE_MEM_WRITE | KDB_REPEAT_NO_ARGS,
+	{	.name = "mm",
+		.func = kdb_mm,
+		.usage = "<vaddr> <contents>",
+		.help = "Modify Memory Contents",
+		.flags = KDB_ENABLE_MEM_WRITE | KDB_REPEAT_NO_ARGS,
 	},
-	{	.cmd_name = "go",
-		.cmd_func = kdb_go,
-		.cmd_usage = "[<vaddr>]",
-		.cmd_help = "Continue Execution",
-		.cmd_minlen = 1,
-		.cmd_flags = KDB_ENABLE_REG_WRITE |
+	{	.name = "go",
+		.func = kdb_go,
+		.usage = "[<vaddr>]",
+		.help = "Continue Execution",
+		.minlen = 1,
+		.flags = KDB_ENABLE_REG_WRITE |
 			     KDB_ENABLE_ALWAYS_SAFE_NO_ARGS,
 	},
-	{	.cmd_name = "rd",
-		.cmd_func = kdb_rd,
-		.cmd_usage = "",
-		.cmd_help = "Display Registers",
-		.cmd_flags = KDB_ENABLE_REG_READ,
+	{	.name = "rd",
+		.func = kdb_rd,
+		.usage = "",
+		.help = "Display Registers",
+		.flags = KDB_ENABLE_REG_READ,
 	},
-	{	.cmd_name = "rm",
-		.cmd_func = kdb_rm,
-		.cmd_usage = "<reg> <contents>",
-		.cmd_help = "Modify Registers",
-		.cmd_flags = KDB_ENABLE_REG_WRITE,
+	{	.name = "rm",
+		.func = kdb_rm,
+		.usage = "<reg> <contents>",
+		.help = "Modify Registers",
+		.flags = KDB_ENABLE_REG_WRITE,
 	},
-	{	.cmd_name = "ef",
-		.cmd_func = kdb_ef,
-		.cmd_usage = "<vaddr>",
-		.cmd_help = "Display exception frame",
-		.cmd_flags = KDB_ENABLE_MEM_READ,
+	{	.name = "ef",
+		.func = kdb_ef,
+		.usage = "<vaddr>",
+		.help = "Display exception frame",
+		.flags = KDB_ENABLE_MEM_READ,
 	},
-	{	.cmd_name = "bt",
-		.cmd_func = kdb_bt,
-		.cmd_usage = "[<vaddr>]",
-		.cmd_help = "Stack traceback",
-		.cmd_minlen = 1,
-		.cmd_flags = KDB_ENABLE_MEM_READ | KDB_ENABLE_INSPECT_NO_ARGS,
+	{	.name = "bt",
+		.func = kdb_bt,
+		.usage = "[<vaddr>]",
+		.help = "Stack traceback",
+		.minlen = 1,
+		.flags = KDB_ENABLE_MEM_READ | KDB_ENABLE_INSPECT_NO_ARGS,
 	},
-	{	.cmd_name = "btp",
-		.cmd_func = kdb_bt,
-		.cmd_usage = "<pid>",
-		.cmd_help = "Display stack for process <pid>",
-		.cmd_flags = KDB_ENABLE_INSPECT,
+	{	.name = "btp",
+		.func = kdb_bt,
+		.usage = "<pid>",
+		.help = "Display stack for process <pid>",
+		.flags = KDB_ENABLE_INSPECT,
 	},
-	{	.cmd_name = "bta",
-		.cmd_func = kdb_bt,
-		.cmd_usage = "[D|R|S|T|C|Z|E|U|I|M|A]",
-		.cmd_help = "Backtrace all processes matching state flag",
-		.cmd_flags = KDB_ENABLE_INSPECT,
+	{	.name = "bta",
+		.func = kdb_bt,
+		.usage = "[D|R|S|T|C|Z|E|U|I|M|A]",
+		.help = "Backtrace all processes matching state flag",
+		.flags = KDB_ENABLE_INSPECT,
 	},
-	{	.cmd_name = "btc",
-		.cmd_func = kdb_bt,
-		.cmd_usage = "",
-		.cmd_help = "Backtrace current process on each cpu",
-		.cmd_flags = KDB_ENABLE_INSPECT,
+	{	.name = "btc",
+		.func = kdb_bt,
+		.usage = "",
+		.help = "Backtrace current process on each cpu",
+		.flags = KDB_ENABLE_INSPECT,
 	},
-	{	.cmd_name = "btt",
-		.cmd_func = kdb_bt,
-		.cmd_usage = "<vaddr>",
-		.cmd_help = "Backtrace process given its struct task address",
-		.cmd_flags = KDB_ENABLE_MEM_READ | KDB_ENABLE_INSPECT_NO_ARGS,
+	{	.name = "btt",
+		.func = kdb_bt,
+		.usage = "<vaddr>",
+		.help = "Backtrace process given its struct task address",
+		.flags = KDB_ENABLE_MEM_READ | KDB_ENABLE_INSPECT_NO_ARGS,
 	},
-	{	.cmd_name = "env",
-		.cmd_func = kdb_env,
-		.cmd_usage = "",
-		.cmd_help = "Show environment variables",
-		.cmd_flags = KDB_ENABLE_ALWAYS_SAFE,
+	{	.name = "env",
+		.func = kdb_env,
+		.usage = "",
+		.help = "Show environment variables",
+		.flags = KDB_ENABLE_ALWAYS_SAFE,
 	},
-	{	.cmd_name = "set",
-		.cmd_func = kdb_set,
-		.cmd_usage = "",
-		.cmd_help = "Set environment variables",
-		.cmd_flags = KDB_ENABLE_ALWAYS_SAFE,
+	{	.name = "set",
+		.func = kdb_set,
+		.usage = "",
+		.help = "Set environment variables",
+		.flags = KDB_ENABLE_ALWAYS_SAFE,
 	},
-	{	.cmd_name = "help",
-		.cmd_func = kdb_help,
-		.cmd_usage = "",
-		.cmd_help = "Display Help Message",
-		.cmd_minlen = 1,
-		.cmd_flags = KDB_ENABLE_ALWAYS_SAFE,
+	{	.name = "help",
+		.func = kdb_help,
+		.usage = "",
+		.help = "Display Help Message",
+		.minlen = 1,
+		.flags = KDB_ENABLE_ALWAYS_SAFE,
 	},
-	{	.cmd_name = "?",
-		.cmd_func = kdb_help,
-		.cmd_usage = "",
-		.cmd_help = "Display Help Message",
-		.cmd_flags = KDB_ENABLE_ALWAYS_SAFE,
+	{	.name = "?",
+		.func = kdb_help,
+		.usage = "",
+		.help = "Display Help Message",
+		.flags = KDB_ENABLE_ALWAYS_SAFE,
 	},
-	{	.cmd_name = "cpu",
-		.cmd_func = kdb_cpu,
-		.cmd_usage = "<cpunum>",
-		.cmd_help = "Switch to new cpu",
-		.cmd_flags = KDB_ENABLE_ALWAYS_SAFE_NO_ARGS,
+	{	.name = "cpu",
+		.func = kdb_cpu,
+		.usage = "<cpunum>",
+		.help = "Switch to new cpu",
+		.flags = KDB_ENABLE_ALWAYS_SAFE_NO_ARGS,
 	},
-	{	.cmd_name = "kgdb",
-		.cmd_func = kdb_kgdb,
-		.cmd_usage = "",
-		.cmd_help = "Enter kgdb mode",
-		.cmd_flags = 0,
+	{	.name = "kgdb",
+		.func = kdb_kgdb,
+		.usage = "",
+		.help = "Enter kgdb mode",
+		.flags = 0,
 	},
-	{	.cmd_name = "ps",
-		.cmd_func = kdb_ps,
-		.cmd_usage = "[<flags>|A]",
-		.cmd_help = "Display active task list",
-		.cmd_flags = KDB_ENABLE_INSPECT,
+	{	.name = "ps",
+		.func = kdb_ps,
+		.usage = "[<flags>|A]",
+		.help = "Display active task list",
+		.flags = KDB_ENABLE_INSPECT,
 	},
-	{	.cmd_name = "pid",
-		.cmd_func = kdb_pid,
-		.cmd_usage = "<pidnum>",
-		.cmd_help = "Switch to another task",
-		.cmd_flags = KDB_ENABLE_INSPECT,
+	{	.name = "pid",
+		.func = kdb_pid,
+		.usage = "<pidnum>",
+		.help = "Switch to another task",
+		.flags = KDB_ENABLE_INSPECT,
 	},
-	{	.cmd_name = "reboot",
-		.cmd_func = kdb_reboot,
-		.cmd_usage = "",
-		.cmd_help = "Reboot the machine immediately",
-		.cmd_flags = KDB_ENABLE_REBOOT,
+	{	.name = "reboot",
+		.func = kdb_reboot,
+		.usage = "",
+		.help = "Reboot the machine immediately",
+		.flags = KDB_ENABLE_REBOOT,
 	},
 #if defined(CONFIG_MODULES)
-	{	.cmd_name = "lsmod",
-		.cmd_func = kdb_lsmod,
-		.cmd_usage = "",
-		.cmd_help = "List loaded kernel modules",
-		.cmd_flags = KDB_ENABLE_INSPECT,
+	{	.name = "lsmod",
+		.func = kdb_lsmod,
+		.usage = "",
+		.help = "List loaded kernel modules",
+		.flags = KDB_ENABLE_INSPECT,
 	},
 #endif
 #if defined(CONFIG_MAGIC_SYSRQ)
-	{	.cmd_name = "sr",
-		.cmd_func = kdb_sr,
-		.cmd_usage = "<key>",
-		.cmd_help = "Magic SysRq key",
-		.cmd_flags = KDB_ENABLE_ALWAYS_SAFE,
+	{	.name = "sr",
+		.func = kdb_sr,
+		.usage = "<key>",
+		.help = "Magic SysRq key",
+		.flags = KDB_ENABLE_ALWAYS_SAFE,
 	},
 #endif
 #if defined(CONFIG_PRINTK)
-	{	.cmd_name = "dmesg",
-		.cmd_func = kdb_dmesg,
-		.cmd_usage = "[lines]",
-		.cmd_help = "Display syslog buffer",
-		.cmd_flags = KDB_ENABLE_ALWAYS_SAFE,
+	{	.name = "dmesg",
+		.func = kdb_dmesg,
+		.usage = "[lines]",
+		.help = "Display syslog buffer",
+		.flags = KDB_ENABLE_ALWAYS_SAFE,
 	},
 #endif
-	{	.cmd_name = "defcmd",
-		.cmd_func = kdb_defcmd,
-		.cmd_usage = "name \"usage\" \"help\"",
-		.cmd_help = "Define a set of commands, down to endefcmd",
-		.cmd_flags = KDB_ENABLE_ALWAYS_SAFE,
+	{	.name = "defcmd",
+		.func = kdb_defcmd,
+		.usage = "name \"usage\" \"help\"",
+		.help = "Define a set of commands, down to endefcmd",
+		/*
+		 * Macros are always safe because when executed each
+		 * internal command re-enters kdb_parse() and is safety
+		 * checked individually.
+		 */
+		.flags = KDB_ENABLE_ALWAYS_SAFE,
 	},
-	{	.cmd_name = "kill",
-		.cmd_func = kdb_kill,
-		.cmd_usage = "<-signal> <pid>",
-		.cmd_help = "Send a signal to a process",
-		.cmd_flags = KDB_ENABLE_SIGNAL,
+	{	.name = "kill",
+		.func = kdb_kill,
+		.usage = "<-signal> <pid>",
+		.help = "Send a signal to a process",
+		.flags = KDB_ENABLE_SIGNAL,
 	},
-	{	.cmd_name = "summary",
-		.cmd_func = kdb_summary,
-		.cmd_usage = "",
-		.cmd_help = "Summarize the system",
-		.cmd_minlen = 4,
-		.cmd_flags = KDB_ENABLE_ALWAYS_SAFE,
+	{	.name = "summary",
+		.func = kdb_summary,
+		.usage = "",
+		.help = "Summarize the system",
+		.minlen = 4,
+		.flags = KDB_ENABLE_ALWAYS_SAFE,
 	},
-	{	.cmd_name = "per_cpu",
-		.cmd_func = kdb_per_cpu,
-		.cmd_usage = "<sym> [<bytes>] [<cpu>]",
-		.cmd_help = "Display per_cpu variables",
-		.cmd_minlen = 3,
-		.cmd_flags = KDB_ENABLE_MEM_READ,
+	{	.name = "per_cpu",
+		.func = kdb_per_cpu,
+		.usage = "<sym> [<bytes>] [<cpu>]",
+		.help = "Display per_cpu variables",
+		.minlen = 3,
+		.flags = KDB_ENABLE_MEM_READ,
 	},
-	{	.cmd_name = "grephelp",
-		.cmd_func = kdb_grep_help,
-		.cmd_usage = "",
-		.cmd_help = "Display help on | grep",
-		.cmd_flags = KDB_ENABLE_ALWAYS_SAFE,
+	{	.name = "grephelp",
+		.func = kdb_grep_help,
+		.usage = "",
+		.help = "Display help on | grep",
+		.flags = KDB_ENABLE_ALWAYS_SAFE,
 	},
 };
 
 static kdbtab_t nmicmd = {
-	.cmd_name = "disable_nmi",
-	.cmd_func = kdb_disable_nmi,
-	.cmd_usage = "",
-	.cmd_help = "Disable NMI entry to KDB",
-	.cmd_flags = KDB_ENABLE_ALWAYS_SAFE,
+	.name = "disable_nmi",
+	.func = kdb_disable_nmi,
+	.usage = "",
+	.help = "Disable NMI entry to KDB",
+	.flags = KDB_ENABLE_ALWAYS_SAFE,
 };
 
 /* Initialize the kdb command table. */
diff --git a/kernel/debug/kdb/kdb_private.h b/kernel/debug/kdb/kdb_private.h
index 170c69aedebb..629590084a0d 100644
--- a/kernel/debug/kdb/kdb_private.h
+++ b/kernel/debug/kdb/kdb_private.h
@@ -109,7 +109,6 @@ extern int kdbgetaddrarg(int, const char **, int*, unsigned long *,
 			 long *, char **);
 extern int kdbgetsymval(const char *, kdb_symtab_t *);
 extern int kdbnearsym(unsigned long, kdb_symtab_t *);
-extern void kdbnearsym_cleanup(void);
 extern char *kdb_strdup(const char *str, gfp_t type);
 extern void kdb_symbol_print(unsigned long, const kdb_symtab_t *, unsigned int);
 
@@ -165,19 +164,6 @@ typedef struct _kdb_bp {
 #ifdef CONFIG_KGDB_KDB
 extern kdb_bp_t kdb_breakpoints[/* KDB_MAXBPT */];
 
-/* The KDB shell command table */
-typedef struct _kdbtab {
-	char    *cmd_name;		/* Command name */
-	kdb_func_t cmd_func;		/* Function to execute command */
-	char    *cmd_usage;		/* Usage String for this command */
-	char    *cmd_help;		/* Help message for this command */
-	short    cmd_minlen;		/* Minimum legal # command
-					 * chars required */
-	kdb_cmdflags_t cmd_flags;	/* Command behaviour flags */
-	struct list_head list_node;	/* Command list */
-	bool    is_dynamic;		/* Command table allocation type */
-} kdbtab_t;
-
 extern void kdb_register_table(kdbtab_t *kp, size_t len);
 extern int kdb_bt(int, const char **);	/* KDB display back trace */
 
@@ -233,10 +219,6 @@ extern struct task_struct *kdb_curr_task(int);
 
 #define GFP_KDB (in_dbg_master() ? GFP_ATOMIC : GFP_KERNEL)
 
-extern void *debug_kmalloc(size_t size, gfp_t flags);
-extern void debug_kfree(void *);
-extern void debug_kusage(void);
-
 extern struct task_struct *kdb_current_task;
 extern struct pt_regs *kdb_current_regs;
 
diff --git a/kernel/debug/kdb/kdb_support.c b/kernel/debug/kdb/kdb_support.c
index 9f50d22d68e6..7507d9a8dc6a 100644
--- a/kernel/debug/kdb/kdb_support.c
+++ b/kernel/debug/kdb/kdb_support.c
@@ -10,7 +10,6 @@
  * 03/02/13    added new 2.5 kallsyms <xavier.bru@bull.net>
  */
 
-#include <stdarg.h>
 #include <linux/types.h>
 #include <linux/sched.h>
 #include <linux/mm.h>
@@ -52,48 +51,48 @@ int kdbgetsymval(const char *symname, kdb_symtab_t *symtab)
 }
 EXPORT_SYMBOL(kdbgetsymval);
 
-static char *kdb_name_table[100];	/* arbitrary size */
-
-/*
- * kdbnearsym -	Return the name of the symbol with the nearest address
- *	less than 'addr'.
+/**
+ * kdbnearsym() - Return the name of the symbol with the nearest address
+ *                less than @addr.
+ * @addr: Address to check for near symbol
+ * @symtab: Structure to receive results
  *
- * Parameters:
- *	addr	Address to check for symbol near
- *	symtab  Structure to receive results
- * Returns:
- *	0	No sections contain this address, symtab zero filled
- *	1	Address mapped to module/symbol/section, data in symtab
- * Remarks:
- *	2.6 kallsyms has a "feature" where it unpacks the name into a
- *	string.  If that string is reused before the caller expects it
- *	then the caller sees its string change without warning.  To
- *	avoid cluttering up the main kdb code with lots of kdb_strdup,
- *	tests and kfree calls, kdbnearsym maintains an LRU list of the
- *	last few unique strings.  The list is sized large enough to
- *	hold active strings, no kdb caller of kdbnearsym makes more
- *	than ~20 later calls before using a saved value.
+ * WARNING: This function may return a pointer to a single statically
+ * allocated buffer (namebuf). kdb's unusual calling context (single
+ * threaded, all other CPUs halted) provides us sufficient locking for
+ * this to be safe. The only constraint imposed by the static buffer is
+ * that the caller must consume any previous reply prior to another call
+ * to lookup a new symbol.
+ *
+ * Note that, strictly speaking, some architectures may re-enter the kdb
+ * trap if the system turns out to be very badly damaged and this breaks
+ * the single-threaded assumption above. In these circumstances successful
+ * continuation and exit from the inner trap is unlikely to work and any
+ * user attempting this receives a prominent warning before being allowed
+ * to progress. In these circumstances we remain memory safe because
+ * namebuf[KSYM_NAME_LEN-1] will never change from '\0' although we do
+ * tolerate the possibility of garbled symbol display from the outer kdb
+ * trap.
+ *
+ * Return:
+ * * 0 - No sections contain this address, symtab zero filled
+ * * 1 - Address mapped to module/symbol/section, data in symtab
  */
 int kdbnearsym(unsigned long addr, kdb_symtab_t *symtab)
 {
 	int ret = 0;
 	unsigned long symbolsize = 0;
 	unsigned long offset = 0;
-#define knt1_size 128		/* must be >= kallsyms table size */
-	char *knt1 = NULL;
+	static char namebuf[KSYM_NAME_LEN];
 
 	kdb_dbg_printf(AR, "addr=0x%lx, symtab=%px\n", addr, symtab);
 	memset(symtab, 0, sizeof(*symtab));
 
 	if (addr < 4096)
 		goto out;
-	knt1 = debug_kmalloc(knt1_size, GFP_ATOMIC);
-	if (!knt1) {
-		kdb_func_printf("addr=0x%lx cannot kmalloc knt1\n", addr);
-		goto out;
-	}
+
 	symtab->sym_name = kallsyms_lookup(addr, &symbolsize , &offset,
-				(char **)(&symtab->mod_name), knt1);
+				(char **)(&symtab->mod_name), namebuf);
 	if (offset > 8*1024*1024) {
 		symtab->sym_name = NULL;
 		addr = offset = symbolsize = 0;
@@ -102,63 +101,14 @@ int kdbnearsym(unsigned long addr, kdb_symtab_t *symtab)
 	symtab->sym_end = symtab->sym_start + symbolsize;
 	ret = symtab->sym_name != NULL && *(symtab->sym_name) != '\0';
 
-	if (ret) {
-		int i;
-		/* Another 2.6 kallsyms "feature".  Sometimes the sym_name is
-		 * set but the buffer passed into kallsyms_lookup is not used,
-		 * so it contains garbage.  The caller has to work out which
-		 * buffer needs to be saved.
-		 *
-		 * What was Rusty smoking when he wrote that code?
-		 */
-		if (symtab->sym_name != knt1) {
-			strncpy(knt1, symtab->sym_name, knt1_size);
-			knt1[knt1_size-1] = '\0';
-		}
-		for (i = 0; i < ARRAY_SIZE(kdb_name_table); ++i) {
-			if (kdb_name_table[i] &&
-			    strcmp(kdb_name_table[i], knt1) == 0)
-				break;
-		}
-		if (i >= ARRAY_SIZE(kdb_name_table)) {
-			debug_kfree(kdb_name_table[0]);
-			memmove(kdb_name_table, kdb_name_table+1,
-			       sizeof(kdb_name_table[0]) *
-			       (ARRAY_SIZE(kdb_name_table)-1));
-		} else {
-			debug_kfree(knt1);
-			knt1 = kdb_name_table[i];
-			memmove(kdb_name_table+i, kdb_name_table+i+1,
-			       sizeof(kdb_name_table[0]) *
-			       (ARRAY_SIZE(kdb_name_table)-i-1));
-		}
-		i = ARRAY_SIZE(kdb_name_table) - 1;
-		kdb_name_table[i] = knt1;
-		symtab->sym_name = kdb_name_table[i];
-		knt1 = NULL;
-	}
-
 	if (symtab->mod_name == NULL)
 		symtab->mod_name = "kernel";
 	kdb_dbg_printf(AR, "returns %d symtab->sym_start=0x%lx, symtab->mod_name=%px, symtab->sym_name=%px (%s)\n",
 		       ret, symtab->sym_start, symtab->mod_name, symtab->sym_name, symtab->sym_name);
-
 out:
-	debug_kfree(knt1);
 	return ret;
 }
 
-void kdbnearsym_cleanup(void)
-{
-	int i;
-	for (i = 0; i < ARRAY_SIZE(kdb_name_table); ++i) {
-		if (kdb_name_table[i]) {
-			debug_kfree(kdb_name_table[i]);
-			kdb_name_table[i] = NULL;
-		}
-	}
-}
-
 static char ks_namebuf[KSYM_NAME_LEN+1], ks_namebuf_prev[KSYM_NAME_LEN+1];
 
 /*
@@ -656,230 +606,6 @@ unsigned long kdb_task_state(const struct task_struct *p, unsigned long mask)
 	return (mask & kdb_task_state_string(state)) != 0;
 }
 
-/* Last ditch allocator for debugging, so we can still debug even when
- * the GFP_ATOMIC pool has been exhausted.  The algorithms are tuned
- * for space usage, not for speed.  One smallish memory pool, the free
- * chain is always in ascending address order to allow coalescing,
- * allocations are done in brute force best fit.
- */
-
-struct debug_alloc_header {
-	u32 next;	/* offset of next header from start of pool */
-	u32 size;
-	void *caller;
-};
-
-/* The memory returned by this allocator must be aligned, which means
- * so must the header size.  Do not assume that sizeof(struct
- * debug_alloc_header) is a multiple of the alignment, explicitly
- * calculate the overhead of this header, including the alignment.
- * The rest of this code must not use sizeof() on any header or
- * pointer to a header.
- */
-#define dah_align 8
-#define dah_overhead ALIGN(sizeof(struct debug_alloc_header), dah_align)
-
-static u64 debug_alloc_pool_aligned[256*1024/dah_align];	/* 256K pool */
-static char *debug_alloc_pool = (char *)debug_alloc_pool_aligned;
-static u32 dah_first, dah_first_call = 1, dah_used, dah_used_max;
-
-/* Locking is awkward.  The debug code is called from all contexts,
- * including non maskable interrupts.  A normal spinlock is not safe
- * in NMI context.  Try to get the debug allocator lock, if it cannot
- * be obtained after a second then give up.  If the lock could not be
- * previously obtained on this cpu then only try once.
- *
- * sparse has no annotation for "this function _sometimes_ acquires a
- * lock", so fudge the acquire/release notation.
- */
-static DEFINE_SPINLOCK(dap_lock);
-static int get_dap_lock(void)
-	__acquires(dap_lock)
-{
-	static int dap_locked = -1;
-	int count;
-	if (dap_locked == smp_processor_id())
-		count = 1;
-	else
-		count = 1000;
-	while (1) {
-		if (spin_trylock(&dap_lock)) {
-			dap_locked = -1;
-			return 1;
-		}
-		if (!count--)
-			break;
-		udelay(1000);
-	}
-	dap_locked = smp_processor_id();
-	__acquire(dap_lock);
-	return 0;
-}
-
-void *debug_kmalloc(size_t size, gfp_t flags)
-{
-	unsigned int rem, h_offset;
-	struct debug_alloc_header *best, *bestprev, *prev, *h;
-	void *p = NULL;
-	if (!get_dap_lock()) {
-		__release(dap_lock);	/* we never actually got it */
-		return NULL;
-	}
-	h = (struct debug_alloc_header *)(debug_alloc_pool + dah_first);
-	if (dah_first_call) {
-		h->size = sizeof(debug_alloc_pool_aligned) - dah_overhead;
-		dah_first_call = 0;
-	}
-	size = ALIGN(size, dah_align);
-	prev = best = bestprev = NULL;
-	while (1) {
-		if (h->size >= size && (!best || h->size < best->size)) {
-			best = h;
-			bestprev = prev;
-			if (h->size == size)
-				break;
-		}
-		if (!h->next)
-			break;
-		prev = h;
-		h = (struct debug_alloc_header *)(debug_alloc_pool + h->next);
-	}
-	if (!best)
-		goto out;
-	rem = best->size - size;
-	/* The pool must always contain at least one header */
-	if (best->next == 0 && bestprev == NULL && rem < dah_overhead)
-		goto out;
-	if (rem >= dah_overhead) {
-		best->size = size;
-		h_offset = ((char *)best - debug_alloc_pool) +
-			   dah_overhead + best->size;
-		h = (struct debug_alloc_header *)(debug_alloc_pool + h_offset);
-		h->size = rem - dah_overhead;
-		h->next = best->next;
-	} else
-		h_offset = best->next;
-	best->caller = __builtin_return_address(0);
-	dah_used += best->size;
-	dah_used_max = max(dah_used, dah_used_max);
-	if (bestprev)
-		bestprev->next = h_offset;
-	else
-		dah_first = h_offset;
-	p = (char *)best + dah_overhead;
-	memset(p, POISON_INUSE, best->size - 1);
-	*((char *)p + best->size - 1) = POISON_END;
-out:
-	spin_unlock(&dap_lock);
-	return p;
-}
-
-void debug_kfree(void *p)
-{
-	struct debug_alloc_header *h;
-	unsigned int h_offset;
-	if (!p)
-		return;
-	if ((char *)p < debug_alloc_pool ||
-	    (char *)p >= debug_alloc_pool + sizeof(debug_alloc_pool_aligned)) {
-		kfree(p);
-		return;
-	}
-	if (!get_dap_lock()) {
-		__release(dap_lock);	/* we never actually got it */
-		return;		/* memory leak, cannot be helped */
-	}
-	h = (struct debug_alloc_header *)((char *)p - dah_overhead);
-	memset(p, POISON_FREE, h->size - 1);
-	*((char *)p + h->size - 1) = POISON_END;
-	h->caller = NULL;
-	dah_used -= h->size;
-	h_offset = (char *)h - debug_alloc_pool;
-	if (h_offset < dah_first) {
-		h->next = dah_first;
-		dah_first = h_offset;
-	} else {
-		struct debug_alloc_header *prev;
-		unsigned int prev_offset;
-		prev = (struct debug_alloc_header *)(debug_alloc_pool +
-						     dah_first);
-		while (1) {
-			if (!prev->next || prev->next > h_offset)
-				break;
-			prev = (struct debug_alloc_header *)
-				(debug_alloc_pool + prev->next);
-		}
-		prev_offset = (char *)prev - debug_alloc_pool;
-		if (prev_offset + dah_overhead + prev->size == h_offset) {
-			prev->size += dah_overhead + h->size;
-			memset(h, POISON_FREE, dah_overhead - 1);
-			*((char *)h + dah_overhead - 1) = POISON_END;
-			h = prev;
-			h_offset = prev_offset;
-		} else {
-			h->next = prev->next;
-			prev->next = h_offset;
-		}
-	}
-	if (h_offset + dah_overhead + h->size == h->next) {
-		struct debug_alloc_header *next;
-		next = (struct debug_alloc_header *)
-			(debug_alloc_pool + h->next);
-		h->size += dah_overhead + next->size;
-		h->next = next->next;
-		memset(next, POISON_FREE, dah_overhead - 1);
-		*((char *)next + dah_overhead - 1) = POISON_END;
-	}
-	spin_unlock(&dap_lock);
-}
-
-void debug_kusage(void)
-{
-	struct debug_alloc_header *h_free, *h_used;
-#ifdef	CONFIG_IA64
-	/* FIXME: using dah for ia64 unwind always results in a memory leak.
-	 * Fix that memory leak first, then set debug_kusage_one_time = 1 for
-	 * all architectures.
-	 */
-	static int debug_kusage_one_time;
-#else
-	static int debug_kusage_one_time = 1;
-#endif
-	if (!get_dap_lock()) {
-		__release(dap_lock);	/* we never actually got it */
-		return;
-	}
-	h_free = (struct debug_alloc_header *)(debug_alloc_pool + dah_first);
-	if (dah_first == 0 &&
-	    (h_free->size == sizeof(debug_alloc_pool_aligned) - dah_overhead ||
-	     dah_first_call))
-		goto out;
-	if (!debug_kusage_one_time)
-		goto out;
-	debug_kusage_one_time = 0;
-	kdb_func_printf("debug_kmalloc memory leak dah_first %d\n", dah_first);
-	if (dah_first) {
-		h_used = (struct debug_alloc_header *)debug_alloc_pool;
-		kdb_func_printf("h_used %px size %d\n", h_used, h_used->size);
-	}
-	do {
-		h_used = (struct debug_alloc_header *)
-			  ((char *)h_free + dah_overhead + h_free->size);
-		kdb_func_printf("h_used %px size %d caller %px\n",
-				h_used, h_used->size, h_used->caller);
-		h_free = (struct debug_alloc_header *)
-			  (debug_alloc_pool + h_free->next);
-	} while (h_free->next);
-	h_used = (struct debug_alloc_header *)
-		  ((char *)h_free + dah_overhead + h_free->size);
-	if ((char *)h_used - debug_alloc_pool !=
-	    sizeof(debug_alloc_pool_aligned))
-		kdb_func_printf("h_used %px size %d caller %px\n",
-				h_used, h_used->size, h_used->caller);
-out:
-	spin_unlock(&dap_lock);
-}
-
 /* Maintain a small stack of kdb_flags to allow recursion without disturbing
  * the global kdb state.
  */
diff --git a/kernel/dma/Kconfig b/kernel/dma/Kconfig
index 77b405508743..1b02179758cb 100644
--- a/kernel/dma/Kconfig
+++ b/kernel/dma/Kconfig
@@ -80,6 +80,19 @@ config SWIOTLB
 	bool
 	select NEED_DMA_MAP_STATE
 
+config DMA_RESTRICTED_POOL
+	bool "DMA Restricted Pool"
+	depends on OF && OF_RESERVED_MEM && SWIOTLB
+	help
+	  This enables support for restricted DMA pools which provide a level of
+	  DMA memory protection on systems with limited hardware protection
+	  capabilities, such as those lacking an IOMMU.
+
+	  For more information see
+	  <Documentation/devicetree/bindings/reserved-memory/reserved-memory.txt>
+	  and <kernel/dma/swiotlb.c>.
+	  If unsure, say "n".
+
 #
 # Should be selected if we can mmap non-coherent mappings to userspace.
 # The only thing that is really required is a way to set an uncached bit
@@ -93,6 +106,10 @@ config DMA_COHERENT_POOL
 	select GENERIC_ALLOCATOR
 	bool
 
+config DMA_GLOBAL_POOL
+	select DMA_DECLARE_COHERENT
+	bool
+
 config DMA_REMAP
 	bool
 	depends on MMU
diff --git a/kernel/dma/coherent.c b/kernel/dma/coherent.c
index 794e76b03b34..25fc85a7aebe 100644
--- a/kernel/dma/coherent.c
+++ b/kernel/dma/coherent.c
@@ -20,8 +20,6 @@ struct dma_coherent_mem {
 	bool		use_dev_dma_pfn_offset;
 };
 
-static struct dma_coherent_mem *dma_coherent_default_memory __ro_after_init;
-
 static inline struct dma_coherent_mem *dev_get_coherent_memory(struct device *dev)
 {
 	if (dev && dev->dma_mem)
@@ -37,51 +35,44 @@ static inline dma_addr_t dma_get_device_base(struct device *dev,
 	return mem->device_base;
 }
 
-static int dma_init_coherent_memory(phys_addr_t phys_addr,
-		dma_addr_t device_addr, size_t size,
-		struct dma_coherent_mem **mem)
+static struct dma_coherent_mem *dma_init_coherent_memory(phys_addr_t phys_addr,
+		dma_addr_t device_addr, size_t size, bool use_dma_pfn_offset)
 {
-	struct dma_coherent_mem *dma_mem = NULL;
-	void *mem_base = NULL;
+	struct dma_coherent_mem *dma_mem;
 	int pages = size >> PAGE_SHIFT;
 	int bitmap_size = BITS_TO_LONGS(pages) * sizeof(long);
-	int ret;
+	void *mem_base;
 
-	if (!size) {
-		ret = -EINVAL;
-		goto out;
-	}
+	if (!size)
+		return ERR_PTR(-EINVAL);
 
 	mem_base = memremap(phys_addr, size, MEMREMAP_WC);
-	if (!mem_base) {
-		ret = -EINVAL;
-		goto out;
-	}
+	if (!mem_base)
+		return ERR_PTR(-EINVAL);
+
 	dma_mem = kzalloc(sizeof(struct dma_coherent_mem), GFP_KERNEL);
-	if (!dma_mem) {
-		ret = -ENOMEM;
-		goto out;
-	}
+	if (!dma_mem)
+		goto out_unmap_membase;
 	dma_mem->bitmap = kzalloc(bitmap_size, GFP_KERNEL);
-	if (!dma_mem->bitmap) {
-		ret = -ENOMEM;
-		goto out;
-	}
+	if (!dma_mem->bitmap)
+		goto out_free_dma_mem;
 
 	dma_mem->virt_base = mem_base;
 	dma_mem->device_base = device_addr;
 	dma_mem->pfn_base = PFN_DOWN(phys_addr);
 	dma_mem->size = pages;
+	dma_mem->use_dev_dma_pfn_offset = use_dma_pfn_offset;
 	spin_lock_init(&dma_mem->spinlock);
 
-	*mem = dma_mem;
-	return 0;
+	return dma_mem;
 
-out:
+out_free_dma_mem:
 	kfree(dma_mem);
-	if (mem_base)
-		memunmap(mem_base);
-	return ret;
+out_unmap_membase:
+	memunmap(mem_base);
+	pr_err("Reserved memory: failed to init DMA memory pool at %pa, size %zd MiB\n",
+		&phys_addr, size / SZ_1M);
+	return ERR_PTR(-ENOMEM);
 }
 
 static void dma_release_coherent_memory(struct dma_coherent_mem *mem)
@@ -130,9 +121,9 @@ int dma_declare_coherent_memory(struct device *dev, phys_addr_t phys_addr,
 	struct dma_coherent_mem *mem;
 	int ret;
 
-	ret = dma_init_coherent_memory(phys_addr, device_addr, size, &mem);
-	if (ret)
-		return ret;
+	mem = dma_init_coherent_memory(phys_addr, device_addr, size, false);
+	if (IS_ERR(mem))
+		return PTR_ERR(mem);
 
 	ret = dma_assign_coherent_memory(dev, mem);
 	if (ret)
@@ -198,16 +189,6 @@ int dma_alloc_from_dev_coherent(struct device *dev, ssize_t size,
 	return 1;
 }
 
-void *dma_alloc_from_global_coherent(struct device *dev, ssize_t size,
-				     dma_addr_t *dma_handle)
-{
-	if (!dma_coherent_default_memory)
-		return NULL;
-
-	return __dma_alloc_from_coherent(dev, dma_coherent_default_memory, size,
-					 dma_handle);
-}
-
 static int __dma_release_from_coherent(struct dma_coherent_mem *mem,
 				       int order, void *vaddr)
 {
@@ -243,15 +224,6 @@ int dma_release_from_dev_coherent(struct device *dev, int order, void *vaddr)
 	return __dma_release_from_coherent(mem, order, vaddr);
 }
 
-int dma_release_from_global_coherent(int order, void *vaddr)
-{
-	if (!dma_coherent_default_memory)
-		return 0;
-
-	return __dma_release_from_coherent(dma_coherent_default_memory, order,
-			vaddr);
-}
-
 static int __dma_mmap_from_coherent(struct dma_coherent_mem *mem,
 		struct vm_area_struct *vma, void *vaddr, size_t size, int *ret)
 {
@@ -297,6 +269,28 @@ int dma_mmap_from_dev_coherent(struct device *dev, struct vm_area_struct *vma,
 	return __dma_mmap_from_coherent(mem, vma, vaddr, size, ret);
 }
 
+#ifdef CONFIG_DMA_GLOBAL_POOL
+static struct dma_coherent_mem *dma_coherent_default_memory __ro_after_init;
+
+void *dma_alloc_from_global_coherent(struct device *dev, ssize_t size,
+				     dma_addr_t *dma_handle)
+{
+	if (!dma_coherent_default_memory)
+		return NULL;
+
+	return __dma_alloc_from_coherent(dev, dma_coherent_default_memory, size,
+					 dma_handle);
+}
+
+int dma_release_from_global_coherent(int order, void *vaddr)
+{
+	if (!dma_coherent_default_memory)
+		return 0;
+
+	return __dma_release_from_coherent(dma_coherent_default_memory, order,
+			vaddr);
+}
+
 int dma_mmap_from_global_coherent(struct vm_area_struct *vma, void *vaddr,
 				   size_t size, int *ret)
 {
@@ -307,6 +301,19 @@ int dma_mmap_from_global_coherent(struct vm_area_struct *vma, void *vaddr,
 					vaddr, size, ret);
 }
 
+int dma_init_global_coherent(phys_addr_t phys_addr, size_t size)
+{
+	struct dma_coherent_mem *mem;
+
+	mem = dma_init_coherent_memory(phys_addr, phys_addr, size, true);
+	if (IS_ERR(mem))
+		return PTR_ERR(mem);
+	dma_coherent_default_memory = mem;
+	pr_info("DMA: default coherent area is set\n");
+	return 0;
+}
+#endif /* CONFIG_DMA_GLOBAL_POOL */
+
 /*
  * Support for reserved memory regions defined in device tree
  */
@@ -315,25 +322,22 @@ int dma_mmap_from_global_coherent(struct vm_area_struct *vma, void *vaddr,
 #include <linux/of_fdt.h>
 #include <linux/of_reserved_mem.h>
 
+#ifdef CONFIG_DMA_GLOBAL_POOL
 static struct reserved_mem *dma_reserved_default_memory __initdata;
+#endif
 
 static int rmem_dma_device_init(struct reserved_mem *rmem, struct device *dev)
 {
-	struct dma_coherent_mem *mem = rmem->priv;
-	int ret;
-
-	if (!mem) {
-		ret = dma_init_coherent_memory(rmem->base, rmem->base,
-					       rmem->size, &mem);
-		if (ret) {
-			pr_err("Reserved memory: failed to init DMA memory pool at %pa, size %ld MiB\n",
-				&rmem->base, (unsigned long)rmem->size / SZ_1M);
-			return ret;
-		}
+	if (!rmem->priv) {
+		struct dma_coherent_mem *mem;
+
+		mem = dma_init_coherent_memory(rmem->base, rmem->base,
+					       rmem->size, true);
+		if (IS_ERR(mem))
+			return PTR_ERR(mem);
+		rmem->priv = mem;
 	}
-	mem->use_dev_dma_pfn_offset = true;
-	rmem->priv = mem;
-	dma_assign_coherent_memory(dev, mem);
+	dma_assign_coherent_memory(dev, rmem->priv);
 	return 0;
 }
 
@@ -361,7 +365,9 @@ static int __init rmem_dma_setup(struct reserved_mem *rmem)
 		pr_err("Reserved memory: regions without no-map are not yet supported\n");
 		return -EINVAL;
 	}
+#endif
 
+#ifdef CONFIG_DMA_GLOBAL_POOL
 	if (of_get_flat_dt_prop(node, "linux,dma-default", NULL)) {
 		WARN(dma_reserved_default_memory,
 		     "Reserved memory: region for default DMA coherent area is redefined\n");
@@ -375,31 +381,16 @@ static int __init rmem_dma_setup(struct reserved_mem *rmem)
 	return 0;
 }
 
+#ifdef CONFIG_DMA_GLOBAL_POOL
 static int __init dma_init_reserved_memory(void)
 {
-	const struct reserved_mem_ops *ops;
-	int ret;
-
 	if (!dma_reserved_default_memory)
 		return -ENOMEM;
-
-	ops = dma_reserved_default_memory->ops;
-
-	/*
-	 * We rely on rmem_dma_device_init() does not propagate error of
-	 * dma_assign_coherent_memory() for "NULL" device.
-	 */
-	ret = ops->device_init(dma_reserved_default_memory, NULL);
-
-	if (!ret) {
-		dma_coherent_default_memory = dma_reserved_default_memory->priv;
-		pr_info("DMA: default coherent area is set\n");
-	}
-
-	return ret;
+	return dma_init_global_coherent(dma_reserved_default_memory->base,
+					dma_reserved_default_memory->size);
 }
-
 core_initcall(dma_init_reserved_memory);
+#endif /* CONFIG_DMA_GLOBAL_POOL */
 
 RESERVEDMEM_OF_DECLARE(dma, "shared-dma-pool", rmem_dma_setup);
 #endif
diff --git a/kernel/dma/debug.c b/kernel/dma/debug.c
index dadae6255d05..6c90c69e5311 100644
--- a/kernel/dma/debug.c
+++ b/kernel/dma/debug.c
@@ -792,7 +792,7 @@ static int dump_show(struct seq_file *seq, void *v)
 }
 DEFINE_SHOW_ATTRIBUTE(dump);
 
-static void dma_debug_fs_init(void)
+static int __init dma_debug_fs_init(void)
 {
 	struct dentry *dentry = debugfs_create_dir("dma-api", NULL);
 
@@ -805,7 +805,10 @@ static void dma_debug_fs_init(void)
 	debugfs_create_u32("nr_total_entries", 0444, dentry, &nr_total_entries);
 	debugfs_create_file("driver_filter", 0644, dentry, NULL, &filter_fops);
 	debugfs_create_file("dump", 0444, dentry, NULL, &dump_fops);
+
+	return 0;
 }
+core_initcall_sync(dma_debug_fs_init);
 
 static int device_dma_allocations(struct device *dev, struct dma_debug_entry **out_entry)
 {
@@ -890,8 +893,6 @@ static int dma_debug_init(void)
 		spin_lock_init(&dma_entry_hash[i].lock);
 	}
 
-	dma_debug_fs_init();
-
 	nr_pages = DIV_ROUND_UP(nr_prealloc_entries, DMA_DEBUG_DYNAMIC_ENTRIES);
 	for (i = 0; i < nr_pages; ++i)
 		dma_debug_create_entries(GFP_KERNEL);
@@ -1064,20 +1065,10 @@ static void check_for_stack(struct device *dev,
 	}
 }
 
-static inline bool overlap(void *addr, unsigned long len, void *start, void *end)
-{
-	unsigned long a1 = (unsigned long)addr;
-	unsigned long b1 = a1 + len;
-	unsigned long a2 = (unsigned long)start;
-	unsigned long b2 = (unsigned long)end;
-
-	return !(b1 <= a2 || a1 >= b2);
-}
-
 static void check_for_illegal_area(struct device *dev, void *addr, unsigned long len)
 {
-	if (overlap(addr, len, _stext, _etext) ||
-	    overlap(addr, len, __start_rodata, __end_rodata))
+	if (memory_intersects(_stext, _etext, addr, len) ||
+	    memory_intersects(__start_rodata, __end_rodata, addr, len))
 		err_printk(dev, NULL, "device driver maps memory from kernel text or rodata [addr=%p] [len=%lu]\n", addr, len);
 }
 
diff --git a/kernel/dma/direct.c b/kernel/dma/direct.c
index f737e3347059..4c6c5e0635e3 100644
--- a/kernel/dma/direct.c
+++ b/kernel/dma/direct.c
@@ -75,6 +75,15 @@ static bool dma_coherent_ok(struct device *dev, phys_addr_t phys, size_t size)
 		min_not_zero(dev->coherent_dma_mask, dev->bus_dma_limit);
 }
 
+static void __dma_direct_free_pages(struct device *dev, struct page *page,
+				    size_t size)
+{
+	if (IS_ENABLED(CONFIG_DMA_RESTRICTED_POOL) &&
+	    swiotlb_free(dev, page, size))
+		return;
+	dma_free_contiguous(dev, page, size);
+}
+
 static struct page *__dma_direct_alloc_pages(struct device *dev, size_t size,
 		gfp_t gfp)
 {
@@ -86,6 +95,16 @@ static struct page *__dma_direct_alloc_pages(struct device *dev, size_t size,
 
 	gfp |= dma_direct_optimal_gfp_mask(dev, dev->coherent_dma_mask,
 					   &phys_limit);
+	if (IS_ENABLED(CONFIG_DMA_RESTRICTED_POOL) &&
+	    is_swiotlb_for_alloc(dev)) {
+		page = swiotlb_alloc(dev, size);
+		if (page && !dma_coherent_ok(dev, page_to_phys(page), size)) {
+			__dma_direct_free_pages(dev, page, size);
+			return NULL;
+		}
+		return page;
+	}
+
 	page = dma_alloc_contiguous(dev, size, gfp);
 	if (page && !dma_coherent_ok(dev, page_to_phys(page), size)) {
 		dma_free_contiguous(dev, page, size);
@@ -142,7 +161,7 @@ void *dma_direct_alloc(struct device *dev, size_t size,
 		gfp |= __GFP_NOWARN;
 
 	if ((attrs & DMA_ATTR_NO_KERNEL_MAPPING) &&
-	    !force_dma_unencrypted(dev)) {
+	    !force_dma_unencrypted(dev) && !is_swiotlb_for_alloc(dev)) {
 		page = __dma_direct_alloc_pages(dev, size, gfp & ~__GFP_ZERO);
 		if (!page)
 			return NULL;
@@ -156,17 +175,28 @@ void *dma_direct_alloc(struct device *dev, size_t size,
 
 	if (!IS_ENABLED(CONFIG_ARCH_HAS_DMA_SET_UNCACHED) &&
 	    !IS_ENABLED(CONFIG_DMA_DIRECT_REMAP) &&
-	    !dev_is_dma_coherent(dev))
+	    !IS_ENABLED(CONFIG_DMA_GLOBAL_POOL) &&
+	    !dev_is_dma_coherent(dev) &&
+	    !is_swiotlb_for_alloc(dev))
 		return arch_dma_alloc(dev, size, dma_handle, gfp, attrs);
 
+	if (IS_ENABLED(CONFIG_DMA_GLOBAL_POOL) &&
+	    !dev_is_dma_coherent(dev))
+		return dma_alloc_from_global_coherent(dev, size, dma_handle);
+
 	/*
 	 * Remapping or decrypting memory may block. If either is required and
 	 * we can't block, allocate the memory from the atomic pools.
+	 * If restricted DMA (i.e., is_swiotlb_for_alloc) is required, one must
+	 * set up another device coherent pool by shared-dma-pool and use
+	 * dma_alloc_from_dev_coherent instead.
 	 */
 	if (IS_ENABLED(CONFIG_DMA_COHERENT_POOL) &&
 	    !gfpflags_allow_blocking(gfp) &&
 	    (force_dma_unencrypted(dev) ||
-	     (IS_ENABLED(CONFIG_DMA_DIRECT_REMAP) && !dev_is_dma_coherent(dev))))
+	     (IS_ENABLED(CONFIG_DMA_DIRECT_REMAP) &&
+	      !dev_is_dma_coherent(dev))) &&
+	    !is_swiotlb_for_alloc(dev))
 		return dma_direct_alloc_from_pool(dev, size, dma_handle, gfp);
 
 	/* we always manually zero the memory once we are done */
@@ -237,7 +267,7 @@ out_encrypt_pages:
 			return NULL;
 	}
 out_free_pages:
-	dma_free_contiguous(dev, page, size);
+	__dma_direct_free_pages(dev, page, size);
 	return NULL;
 }
 
@@ -247,7 +277,7 @@ void dma_direct_free(struct device *dev, size_t size,
 	unsigned int page_order = get_order(size);
 
 	if ((attrs & DMA_ATTR_NO_KERNEL_MAPPING) &&
-	    !force_dma_unencrypted(dev)) {
+	    !force_dma_unencrypted(dev) && !is_swiotlb_for_alloc(dev)) {
 		/* cpu_addr is a struct page cookie, not a kernel address */
 		dma_free_contiguous(dev, cpu_addr, size);
 		return;
@@ -255,11 +285,20 @@ void dma_direct_free(struct device *dev, size_t size,
 
 	if (!IS_ENABLED(CONFIG_ARCH_HAS_DMA_SET_UNCACHED) &&
 	    !IS_ENABLED(CONFIG_DMA_DIRECT_REMAP) &&
-	    !dev_is_dma_coherent(dev)) {
+	    !IS_ENABLED(CONFIG_DMA_GLOBAL_POOL) &&
+	    !dev_is_dma_coherent(dev) &&
+	    !is_swiotlb_for_alloc(dev)) {
 		arch_dma_free(dev, size, cpu_addr, dma_addr, attrs);
 		return;
 	}
 
+	if (IS_ENABLED(CONFIG_DMA_GLOBAL_POOL) &&
+	    !dev_is_dma_coherent(dev)) {
+		if (!dma_release_from_global_coherent(page_order, cpu_addr))
+			WARN_ON_ONCE(1);
+		return;
+	}
+
 	/* If cpu_addr is not from an atomic pool, dma_free_from_pool() fails */
 	if (IS_ENABLED(CONFIG_DMA_COHERENT_POOL) &&
 	    dma_free_from_pool(dev, cpu_addr, PAGE_ALIGN(size)))
@@ -273,7 +312,7 @@ void dma_direct_free(struct device *dev, size_t size,
 	else if (IS_ENABLED(CONFIG_ARCH_HAS_DMA_CLEAR_UNCACHED))
 		arch_dma_clear_uncached(cpu_addr, size);
 
-	dma_free_contiguous(dev, dma_direct_to_page(dev, dma_addr), size);
+	__dma_direct_free_pages(dev, dma_direct_to_page(dev, dma_addr), size);
 }
 
 struct page *dma_direct_alloc_pages(struct device *dev, size_t size,
@@ -283,7 +322,8 @@ struct page *dma_direct_alloc_pages(struct device *dev, size_t size,
 	void *ret;
 
 	if (IS_ENABLED(CONFIG_DMA_COHERENT_POOL) &&
-	    force_dma_unencrypted(dev) && !gfpflags_allow_blocking(gfp))
+	    force_dma_unencrypted(dev) && !gfpflags_allow_blocking(gfp) &&
+	    !is_swiotlb_for_alloc(dev))
 		return dma_direct_alloc_from_pool(dev, size, dma_handle, gfp);
 
 	page = __dma_direct_alloc_pages(dev, size, gfp);
@@ -310,7 +350,7 @@ struct page *dma_direct_alloc_pages(struct device *dev, size_t size,
 	*dma_handle = phys_to_dma_direct(dev, page_to_phys(page));
 	return page;
 out_free_pages:
-	dma_free_contiguous(dev, page, size);
+	__dma_direct_free_pages(dev, page, size);
 	return NULL;
 }
 
@@ -329,7 +369,7 @@ void dma_direct_free_pages(struct device *dev, size_t size,
 	if (force_dma_unencrypted(dev))
 		set_memory_encrypted((unsigned long)vaddr, 1 << page_order);
 
-	dma_free_contiguous(dev, page, size);
+	__dma_direct_free_pages(dev, page, size);
 }
 
 #if defined(CONFIG_ARCH_HAS_SYNC_DMA_FOR_DEVICE) || \
@@ -343,7 +383,7 @@ void dma_direct_sync_sg_for_device(struct device *dev,
 	for_each_sg(sgl, sg, nents, i) {
 		phys_addr_t paddr = dma_to_phys(dev, sg_dma_address(sg));
 
-		if (unlikely(is_swiotlb_buffer(paddr)))
+		if (unlikely(is_swiotlb_buffer(dev, paddr)))
 			swiotlb_sync_single_for_device(dev, paddr, sg->length,
 						       dir);
 
@@ -369,7 +409,7 @@ void dma_direct_sync_sg_for_cpu(struct device *dev,
 		if (!dev_is_dma_coherent(dev))
 			arch_sync_dma_for_cpu(paddr, sg->length, dir);
 
-		if (unlikely(is_swiotlb_buffer(paddr)))
+		if (unlikely(is_swiotlb_buffer(dev, paddr)))
 			swiotlb_sync_single_for_cpu(dev, paddr, sg->length,
 						    dir);
 
@@ -411,7 +451,7 @@ int dma_direct_map_sg(struct device *dev, struct scatterlist *sgl, int nents,
 
 out_unmap:
 	dma_direct_unmap_sg(dev, sgl, i, dir, attrs | DMA_ATTR_SKIP_CPU_SYNC);
-	return 0;
+	return -EIO;
 }
 
 dma_addr_t dma_direct_map_resource(struct device *dev, phys_addr_t paddr,
@@ -462,6 +502,8 @@ int dma_direct_mmap(struct device *dev, struct vm_area_struct *vma,
 
 	if (dma_mmap_from_dev_coherent(dev, vma, cpu_addr, size, &ret))
 		return ret;
+	if (dma_mmap_from_global_coherent(vma, cpu_addr, size, &ret))
+		return ret;
 
 	if (vma->vm_pgoff >= count || user_count > count - vma->vm_pgoff)
 		return -ENXIO;
@@ -495,8 +537,8 @@ int dma_direct_supported(struct device *dev, u64 mask)
 size_t dma_direct_max_mapping_size(struct device *dev)
 {
 	/* If SWIOTLB is active, use its maximum mapping size */
-	if (is_swiotlb_active() &&
-	    (dma_addressing_limited(dev) || swiotlb_force == SWIOTLB_FORCE))
+	if (is_swiotlb_active(dev) &&
+	    (dma_addressing_limited(dev) || is_swiotlb_force_bounce(dev)))
 		return swiotlb_max_mapping_size(dev);
 	return SIZE_MAX;
 }
@@ -504,7 +546,7 @@ size_t dma_direct_max_mapping_size(struct device *dev)
 bool dma_direct_need_sync(struct device *dev, dma_addr_t dma_addr)
 {
 	return !dev_is_dma_coherent(dev) ||
-		is_swiotlb_buffer(dma_to_phys(dev, dma_addr));
+	       is_swiotlb_buffer(dev, dma_to_phys(dev, dma_addr));
 }
 
 /**
diff --git a/kernel/dma/direct.h b/kernel/dma/direct.h
index 50afc05b6f1d..4632b0f4f72e 100644
--- a/kernel/dma/direct.h
+++ b/kernel/dma/direct.h
@@ -56,7 +56,7 @@ static inline void dma_direct_sync_single_for_device(struct device *dev,
 {
 	phys_addr_t paddr = dma_to_phys(dev, addr);
 
-	if (unlikely(is_swiotlb_buffer(paddr)))
+	if (unlikely(is_swiotlb_buffer(dev, paddr)))
 		swiotlb_sync_single_for_device(dev, paddr, size, dir);
 
 	if (!dev_is_dma_coherent(dev))
@@ -73,7 +73,7 @@ static inline void dma_direct_sync_single_for_cpu(struct device *dev,
 		arch_sync_dma_for_cpu_all();
 	}
 
-	if (unlikely(is_swiotlb_buffer(paddr)))
+	if (unlikely(is_swiotlb_buffer(dev, paddr)))
 		swiotlb_sync_single_for_cpu(dev, paddr, size, dir);
 
 	if (dir == DMA_FROM_DEVICE)
@@ -87,7 +87,7 @@ static inline dma_addr_t dma_direct_map_page(struct device *dev,
 	phys_addr_t phys = page_to_phys(page) + offset;
 	dma_addr_t dma_addr = phys_to_dma(dev, phys);
 
-	if (unlikely(swiotlb_force == SWIOTLB_FORCE))
+	if (is_swiotlb_force_bounce(dev))
 		return swiotlb_map(dev, phys, size, dir, attrs);
 
 	if (unlikely(!dma_capable(dev, dma_addr, size, true))) {
@@ -113,7 +113,7 @@ static inline void dma_direct_unmap_page(struct device *dev, dma_addr_t addr,
 	if (!(attrs & DMA_ATTR_SKIP_CPU_SYNC))
 		dma_direct_sync_single_for_cpu(dev, addr, size, dir);
 
-	if (unlikely(is_swiotlb_buffer(phys)))
+	if (unlikely(is_swiotlb_buffer(dev, phys)))
 		swiotlb_tbl_unmap_single(dev, phys, size, dir, attrs);
 }
 #endif /* _KERNEL_DMA_DIRECT_H */
diff --git a/kernel/dma/dummy.c b/kernel/dma/dummy.c
index eacd4c5b10bf..b492d59ac77e 100644
--- a/kernel/dma/dummy.c
+++ b/kernel/dma/dummy.c
@@ -22,7 +22,7 @@ static int dma_dummy_map_sg(struct device *dev, struct scatterlist *sgl,
 		int nelems, enum dma_data_direction dir,
 		unsigned long attrs)
 {
-	return 0;
+	return -EINVAL;
 }
 
 static int dma_dummy_supported(struct device *hwdev, u64 mask)
diff --git a/kernel/dma/mapping.c b/kernel/dma/mapping.c
index 2b06a809d0b9..7ee5284bff58 100644
--- a/kernel/dma/mapping.c
+++ b/kernel/dma/mapping.c
@@ -177,12 +177,8 @@ void dma_unmap_page_attrs(struct device *dev, dma_addr_t addr, size_t size,
 }
 EXPORT_SYMBOL(dma_unmap_page_attrs);
 
-/*
- * dma_maps_sg_attrs returns 0 on error and > 0 on success.
- * It should never return a value < 0.
- */
-int dma_map_sg_attrs(struct device *dev, struct scatterlist *sg, int nents,
-		enum dma_data_direction dir, unsigned long attrs)
+static int __dma_map_sg_attrs(struct device *dev, struct scatterlist *sg,
+	 int nents, enum dma_data_direction dir, unsigned long attrs)
 {
 	const struct dma_map_ops *ops = get_dma_ops(dev);
 	int ents;
@@ -197,13 +193,81 @@ int dma_map_sg_attrs(struct device *dev, struct scatterlist *sg, int nents,
 		ents = dma_direct_map_sg(dev, sg, nents, dir, attrs);
 	else
 		ents = ops->map_sg(dev, sg, nents, dir, attrs);
-	BUG_ON(ents < 0);
-	debug_dma_map_sg(dev, sg, nents, ents, dir);
+
+	if (ents > 0)
+		debug_dma_map_sg(dev, sg, nents, ents, dir);
+	else if (WARN_ON_ONCE(ents != -EINVAL && ents != -ENOMEM &&
+			      ents != -EIO))
+		return -EIO;
 
 	return ents;
 }
+
+/**
+ * dma_map_sg_attrs - Map the given buffer for DMA
+ * @dev:	The device for which to perform the DMA operation
+ * @sg:	The sg_table object describing the buffer
+ * @dir:	DMA direction
+ * @attrs:	Optional DMA attributes for the map operation
+ *
+ * Maps a buffer described by a scatterlist passed in the sg argument with
+ * nents segments for the @dir DMA operation by the @dev device.
+ *
+ * Returns the number of mapped entries (which can be less than nents)
+ * on success. Zero is returned for any error.
+ *
+ * dma_unmap_sg_attrs() should be used to unmap the buffer with the
+ * original sg and original nents (not the value returned by this funciton).
+ */
+unsigned int dma_map_sg_attrs(struct device *dev, struct scatterlist *sg,
+		    int nents, enum dma_data_direction dir, unsigned long attrs)
+{
+	int ret;
+
+	ret = __dma_map_sg_attrs(dev, sg, nents, dir, attrs);
+	if (ret < 0)
+		return 0;
+	return ret;
+}
 EXPORT_SYMBOL(dma_map_sg_attrs);
 
+/**
+ * dma_map_sgtable - Map the given buffer for DMA
+ * @dev:	The device for which to perform the DMA operation
+ * @sgt:	The sg_table object describing the buffer
+ * @dir:	DMA direction
+ * @attrs:	Optional DMA attributes for the map operation
+ *
+ * Maps a buffer described by a scatterlist stored in the given sg_table
+ * object for the @dir DMA operation by the @dev device. After success, the
+ * ownership for the buffer is transferred to the DMA domain.  One has to
+ * call dma_sync_sgtable_for_cpu() or dma_unmap_sgtable() to move the
+ * ownership of the buffer back to the CPU domain before touching the
+ * buffer by the CPU.
+ *
+ * Returns 0 on success or a negative error code on error. The following
+ * error codes are supported with the given meaning:
+ *
+ *   -EINVAL - An invalid argument, unaligned access or other error
+ *	       in usage. Will not succeed if retried.
+ *   -ENOMEM - Insufficient resources (like memory or IOVA space) to
+ *	       complete the mapping. Should succeed if retried later.
+ *   -EIO    - Legacy error code with an unknown meaning. eg. this is
+ *	       returned if a lower level call returned DMA_MAPPING_ERROR.
+ */
+int dma_map_sgtable(struct device *dev, struct sg_table *sgt,
+		    enum dma_data_direction dir, unsigned long attrs)
+{
+	int nents;
+
+	nents = __dma_map_sg_attrs(dev, sgt->sgl, sgt->orig_nents, dir, attrs);
+	if (nents < 0)
+		return nents;
+	sgt->nents = nents;
+	return 0;
+}
+EXPORT_SYMBOL_GPL(dma_map_sgtable);
+
 void dma_unmap_sg_attrs(struct device *dev, struct scatterlist *sg,
 				      int nents, enum dma_data_direction dir,
 				      unsigned long attrs)
diff --git a/kernel/dma/swiotlb.c b/kernel/dma/swiotlb.c
index e50df8d8f87e..87c40517e822 100644
--- a/kernel/dma/swiotlb.c
+++ b/kernel/dma/swiotlb.c
@@ -39,6 +39,13 @@
 #ifdef CONFIG_DEBUG_FS
 #include <linux/debugfs.h>
 #endif
+#ifdef CONFIG_DMA_RESTRICTED_POOL
+#include <linux/io.h>
+#include <linux/of.h>
+#include <linux/of_fdt.h>
+#include <linux/of_reserved_mem.h>
+#include <linux/slab.h>
+#endif
 
 #include <asm/io.h>
 #include <asm/dma.h>
@@ -63,7 +70,7 @@
 
 enum swiotlb_force swiotlb_force;
 
-struct io_tlb_mem *io_tlb_default_mem;
+struct io_tlb_mem io_tlb_default_mem;
 
 /*
  * Max segment that we can provide which (if pages are contingous) will
@@ -94,7 +101,7 @@ early_param("swiotlb", setup_io_tlb_npages);
 
 unsigned int swiotlb_max_segment(void)
 {
-	return io_tlb_default_mem ? max_segment : 0;
+	return io_tlb_default_mem.nslabs ? max_segment : 0;
 }
 EXPORT_SYMBOL_GPL(swiotlb_max_segment);
 
@@ -127,9 +134,9 @@ void __init swiotlb_adjust_size(unsigned long size)
 
 void swiotlb_print_info(void)
 {
-	struct io_tlb_mem *mem = io_tlb_default_mem;
+	struct io_tlb_mem *mem = &io_tlb_default_mem;
 
-	if (!mem) {
+	if (!mem->nslabs) {
 		pr_warn("No low mem\n");
 		return;
 	}
@@ -156,11 +163,11 @@ static inline unsigned long nr_slots(u64 val)
  */
 void __init swiotlb_update_mem_attributes(void)
 {
-	struct io_tlb_mem *mem = io_tlb_default_mem;
+	struct io_tlb_mem *mem = &io_tlb_default_mem;
 	void *vaddr;
 	unsigned long bytes;
 
-	if (!mem || mem->late_alloc)
+	if (!mem->nslabs || mem->late_alloc)
 		return;
 	vaddr = phys_to_virt(mem->start);
 	bytes = PAGE_ALIGN(mem->nslabs << IO_TLB_SHIFT);
@@ -168,36 +175,50 @@ void __init swiotlb_update_mem_attributes(void)
 	memset(vaddr, 0, bytes);
 }
 
-int __init swiotlb_init_with_tbl(char *tlb, unsigned long nslabs, int verbose)
+static void swiotlb_init_io_tlb_mem(struct io_tlb_mem *mem, phys_addr_t start,
+				    unsigned long nslabs, bool late_alloc)
 {
+	void *vaddr = phys_to_virt(start);
 	unsigned long bytes = nslabs << IO_TLB_SHIFT, i;
-	struct io_tlb_mem *mem;
-	size_t alloc_size;
-
-	if (swiotlb_force == SWIOTLB_NO_FORCE)
-		return 0;
-
-	/* protect against double initialization */
-	if (WARN_ON_ONCE(io_tlb_default_mem))
-		return -ENOMEM;
 
-	alloc_size = PAGE_ALIGN(struct_size(mem, slots, nslabs));
-	mem = memblock_alloc(alloc_size, PAGE_SIZE);
-	if (!mem)
-		panic("%s: Failed to allocate %zu bytes align=0x%lx\n",
-		      __func__, alloc_size, PAGE_SIZE);
 	mem->nslabs = nslabs;
-	mem->start = __pa(tlb);
+	mem->start = start;
 	mem->end = mem->start + bytes;
 	mem->index = 0;
+	mem->late_alloc = late_alloc;
+
+	if (swiotlb_force == SWIOTLB_FORCE)
+		mem->force_bounce = true;
+
 	spin_lock_init(&mem->lock);
 	for (i = 0; i < mem->nslabs; i++) {
 		mem->slots[i].list = IO_TLB_SEGSIZE - io_tlb_offset(i);
 		mem->slots[i].orig_addr = INVALID_PHYS_ADDR;
 		mem->slots[i].alloc_size = 0;
 	}
+	memset(vaddr, 0, bytes);
+}
+
+int __init swiotlb_init_with_tbl(char *tlb, unsigned long nslabs, int verbose)
+{
+	struct io_tlb_mem *mem = &io_tlb_default_mem;
+	size_t alloc_size;
+
+	if (swiotlb_force == SWIOTLB_NO_FORCE)
+		return 0;
+
+	/* protect against double initialization */
+	if (WARN_ON_ONCE(mem->nslabs))
+		return -ENOMEM;
+
+	alloc_size = PAGE_ALIGN(array_size(sizeof(*mem->slots), nslabs));
+	mem->slots = memblock_alloc(alloc_size, PAGE_SIZE);
+	if (!mem->slots)
+		panic("%s: Failed to allocate %zu bytes align=0x%lx\n",
+		      __func__, alloc_size, PAGE_SIZE);
+
+	swiotlb_init_io_tlb_mem(mem, __pa(tlb), nslabs, false);
 
-	io_tlb_default_mem = mem;
 	if (verbose)
 		swiotlb_print_info();
 	swiotlb_set_max_segment(mem->nslabs << IO_TLB_SHIFT);
@@ -282,37 +303,24 @@ swiotlb_late_init_with_default_size(size_t default_size)
 int
 swiotlb_late_init_with_tbl(char *tlb, unsigned long nslabs)
 {
-	unsigned long bytes = nslabs << IO_TLB_SHIFT, i;
-	struct io_tlb_mem *mem;
+	struct io_tlb_mem *mem = &io_tlb_default_mem;
+	unsigned long bytes = nslabs << IO_TLB_SHIFT;
 
 	if (swiotlb_force == SWIOTLB_NO_FORCE)
 		return 0;
 
 	/* protect against double initialization */
-	if (WARN_ON_ONCE(io_tlb_default_mem))
+	if (WARN_ON_ONCE(mem->nslabs))
 		return -ENOMEM;
 
-	mem = (void *)__get_free_pages(GFP_KERNEL,
-		get_order(struct_size(mem, slots, nslabs)));
-	if (!mem)
+	mem->slots = (void *)__get_free_pages(GFP_KERNEL | __GFP_ZERO,
+		get_order(array_size(sizeof(*mem->slots), nslabs)));
+	if (!mem->slots)
 		return -ENOMEM;
 
-	mem->nslabs = nslabs;
-	mem->start = virt_to_phys(tlb);
-	mem->end = mem->start + bytes;
-	mem->index = 0;
-	mem->late_alloc = 1;
-	spin_lock_init(&mem->lock);
-	for (i = 0; i < mem->nslabs; i++) {
-		mem->slots[i].list = IO_TLB_SEGSIZE - io_tlb_offset(i);
-		mem->slots[i].orig_addr = INVALID_PHYS_ADDR;
-		mem->slots[i].alloc_size = 0;
-	}
-
 	set_memory_decrypted((unsigned long)tlb, bytes >> PAGE_SHIFT);
-	memset(tlb, 0, bytes);
+	swiotlb_init_io_tlb_mem(mem, virt_to_phys(tlb), nslabs, true);
 
-	io_tlb_default_mem = mem;
 	swiotlb_print_info();
 	swiotlb_set_max_segment(mem->nslabs << IO_TLB_SHIFT);
 	return 0;
@@ -320,18 +328,28 @@ swiotlb_late_init_with_tbl(char *tlb, unsigned long nslabs)
 
 void __init swiotlb_exit(void)
 {
-	struct io_tlb_mem *mem = io_tlb_default_mem;
-	size_t size;
+	struct io_tlb_mem *mem = &io_tlb_default_mem;
+	unsigned long tbl_vaddr;
+	size_t tbl_size, slots_size;
 
-	if (!mem)
+	if (!mem->nslabs)
 		return;
 
-	size = struct_size(mem, slots, mem->nslabs);
-	if (mem->late_alloc)
-		free_pages((unsigned long)mem, get_order(size));
-	else
-		memblock_free_late(__pa(mem), PAGE_ALIGN(size));
-	io_tlb_default_mem = NULL;
+	pr_info("tearing down default memory pool\n");
+	tbl_vaddr = (unsigned long)phys_to_virt(mem->start);
+	tbl_size = PAGE_ALIGN(mem->end - mem->start);
+	slots_size = PAGE_ALIGN(array_size(sizeof(*mem->slots), mem->nslabs));
+
+	set_memory_encrypted(tbl_vaddr, tbl_size >> PAGE_SHIFT);
+	if (mem->late_alloc) {
+		free_pages(tbl_vaddr, get_order(tbl_size));
+		free_pages((unsigned long)mem->slots, get_order(slots_size));
+	} else {
+		memblock_free_late(mem->start, tbl_size);
+		memblock_free_late(__pa(mem->slots), slots_size);
+	}
+
+	memset(mem, 0, sizeof(*mem));
 }
 
 /*
@@ -348,19 +366,33 @@ static unsigned int swiotlb_align_offset(struct device *dev, u64 addr)
 static void swiotlb_bounce(struct device *dev, phys_addr_t tlb_addr, size_t size,
 			   enum dma_data_direction dir)
 {
-	struct io_tlb_mem *mem = io_tlb_default_mem;
+	struct io_tlb_mem *mem = dev->dma_io_tlb_mem;
 	int index = (tlb_addr - mem->start) >> IO_TLB_SHIFT;
 	phys_addr_t orig_addr = mem->slots[index].orig_addr;
 	size_t alloc_size = mem->slots[index].alloc_size;
 	unsigned long pfn = PFN_DOWN(orig_addr);
 	unsigned char *vaddr = phys_to_virt(tlb_addr);
-	unsigned int tlb_offset;
+	unsigned int tlb_offset, orig_addr_offset;
 
 	if (orig_addr == INVALID_PHYS_ADDR)
 		return;
 
-	tlb_offset = (tlb_addr & (IO_TLB_SIZE - 1)) -
-		     swiotlb_align_offset(dev, orig_addr);
+	tlb_offset = tlb_addr & (IO_TLB_SIZE - 1);
+	orig_addr_offset = swiotlb_align_offset(dev, orig_addr);
+	if (tlb_offset < orig_addr_offset) {
+		dev_WARN_ONCE(dev, 1,
+			"Access before mapping start detected. orig offset %u, requested offset %u.\n",
+			orig_addr_offset, tlb_offset);
+		return;
+	}
+
+	tlb_offset -= orig_addr_offset;
+	if (tlb_offset > alloc_size) {
+		dev_WARN_ONCE(dev, 1,
+			"Buffer overflow detected. Allocation size: %zu. Mapping size: %zu+%u.\n",
+			alloc_size, size, tlb_offset);
+		return;
+	}
 
 	orig_addr += tlb_offset;
 	alloc_size -= tlb_offset;
@@ -426,10 +458,10 @@ static unsigned int wrap_index(struct io_tlb_mem *mem, unsigned int index)
  * Find a suitable number of IO TLB entries size that will fit this request and
  * allocate a buffer from that IO TLB pool.
  */
-static int find_slots(struct device *dev, phys_addr_t orig_addr,
-		size_t alloc_size)
+static int swiotlb_find_slots(struct device *dev, phys_addr_t orig_addr,
+			      size_t alloc_size)
 {
-	struct io_tlb_mem *mem = io_tlb_default_mem;
+	struct io_tlb_mem *mem = dev->dma_io_tlb_mem;
 	unsigned long boundary_mask = dma_get_seg_boundary(dev);
 	dma_addr_t tbl_dma_addr =
 		phys_to_dma_unencrypted(dev, mem->start) & boundary_mask;
@@ -438,6 +470,7 @@ static int find_slots(struct device *dev, phys_addr_t orig_addr,
 		dma_get_min_align_mask(dev) & ~(IO_TLB_SIZE - 1);
 	unsigned int nslots = nr_slots(alloc_size), stride;
 	unsigned int index, wrap, count = 0, i;
+	unsigned int offset = swiotlb_align_offset(dev, orig_addr);
 	unsigned long flags;
 
 	BUG_ON(!nslots);
@@ -457,8 +490,9 @@ static int find_slots(struct device *dev, phys_addr_t orig_addr,
 
 	index = wrap = wrap_index(mem, ALIGN(mem->index, stride));
 	do {
-		if ((slot_addr(tbl_dma_addr, index) & iotlb_align_mask) !=
-		    (orig_addr & iotlb_align_mask)) {
+		if (orig_addr &&
+		    (slot_addr(tbl_dma_addr, index) & iotlb_align_mask) !=
+			    (orig_addr & iotlb_align_mask)) {
 			index = wrap_index(mem, index + 1);
 			continue;
 		}
@@ -482,8 +516,11 @@ not_found:
 	return -1;
 
 found:
-	for (i = index; i < index + nslots; i++)
+	for (i = index; i < index + nslots; i++) {
 		mem->slots[i].list = 0;
+		mem->slots[i].alloc_size =
+			alloc_size - (offset + ((i - index) << IO_TLB_SHIFT));
+	}
 	for (i = index - 1;
 	     io_tlb_offset(i) != IO_TLB_SEGSIZE - 1 &&
 	     mem->slots[i].list; i--)
@@ -506,7 +543,7 @@ phys_addr_t swiotlb_tbl_map_single(struct device *dev, phys_addr_t orig_addr,
 		size_t mapping_size, size_t alloc_size,
 		enum dma_data_direction dir, unsigned long attrs)
 {
-	struct io_tlb_mem *mem = io_tlb_default_mem;
+	struct io_tlb_mem *mem = dev->dma_io_tlb_mem;
 	unsigned int offset = swiotlb_align_offset(dev, orig_addr);
 	unsigned int i;
 	int index;
@@ -524,7 +561,7 @@ phys_addr_t swiotlb_tbl_map_single(struct device *dev, phys_addr_t orig_addr,
 		return (phys_addr_t)DMA_MAPPING_ERROR;
 	}
 
-	index = find_slots(dev, orig_addr, alloc_size + offset);
+	index = swiotlb_find_slots(dev, orig_addr, alloc_size + offset);
 	if (index == -1) {
 		if (!(attrs & DMA_ATTR_NO_WARN))
 			dev_warn_ratelimited(dev,
@@ -538,11 +575,8 @@ phys_addr_t swiotlb_tbl_map_single(struct device *dev, phys_addr_t orig_addr,
 	 * This is needed when we sync the memory.  Then we sync the buffer if
 	 * needed.
 	 */
-	for (i = 0; i < nr_slots(alloc_size + offset); i++) {
+	for (i = 0; i < nr_slots(alloc_size + offset); i++)
 		mem->slots[index + i].orig_addr = slot_addr(orig_addr, i);
-		mem->slots[index + i].alloc_size =
-			alloc_size - (i << IO_TLB_SHIFT);
-	}
 	tlb_addr = slot_addr(mem->start, index) + offset;
 	if (!(attrs & DMA_ATTR_SKIP_CPU_SYNC) &&
 	    (dir == DMA_TO_DEVICE || dir == DMA_BIDIRECTIONAL))
@@ -550,28 +584,16 @@ phys_addr_t swiotlb_tbl_map_single(struct device *dev, phys_addr_t orig_addr,
 	return tlb_addr;
 }
 
-/*
- * tlb_addr is the physical address of the bounce buffer to unmap.
- */
-void swiotlb_tbl_unmap_single(struct device *hwdev, phys_addr_t tlb_addr,
-			      size_t mapping_size, enum dma_data_direction dir,
-			      unsigned long attrs)
+static void swiotlb_release_slots(struct device *dev, phys_addr_t tlb_addr)
 {
-	struct io_tlb_mem *mem = io_tlb_default_mem;
+	struct io_tlb_mem *mem = dev->dma_io_tlb_mem;
 	unsigned long flags;
-	unsigned int offset = swiotlb_align_offset(hwdev, tlb_addr);
+	unsigned int offset = swiotlb_align_offset(dev, tlb_addr);
 	int index = (tlb_addr - offset - mem->start) >> IO_TLB_SHIFT;
 	int nslots = nr_slots(mem->slots[index].alloc_size + offset);
 	int count, i;
 
 	/*
-	 * First, sync the memory before unmapping the entry
-	 */
-	if (!(attrs & DMA_ATTR_SKIP_CPU_SYNC) &&
-	    (dir == DMA_FROM_DEVICE || dir == DMA_BIDIRECTIONAL))
-		swiotlb_bounce(hwdev, tlb_addr, mapping_size, DMA_FROM_DEVICE);
-
-	/*
 	 * Return the buffer to the free list by setting the corresponding
 	 * entries to indicate the number of contiguous entries available.
 	 * While returning the entries to the free list, we merge the entries
@@ -605,6 +627,23 @@ void swiotlb_tbl_unmap_single(struct device *hwdev, phys_addr_t tlb_addr,
 	spin_unlock_irqrestore(&mem->lock, flags);
 }
 
+/*
+ * tlb_addr is the physical address of the bounce buffer to unmap.
+ */
+void swiotlb_tbl_unmap_single(struct device *dev, phys_addr_t tlb_addr,
+			      size_t mapping_size, enum dma_data_direction dir,
+			      unsigned long attrs)
+{
+	/*
+	 * First, sync the memory before unmapping the entry
+	 */
+	if (!(attrs & DMA_ATTR_SKIP_CPU_SYNC) &&
+	    (dir == DMA_FROM_DEVICE || dir == DMA_BIDIRECTIONAL))
+		swiotlb_bounce(dev, tlb_addr, mapping_size, DMA_FROM_DEVICE);
+
+	swiotlb_release_slots(dev, tlb_addr);
+}
+
 void swiotlb_sync_single_for_device(struct device *dev, phys_addr_t tlb_addr,
 		size_t size, enum dma_data_direction dir)
 {
@@ -662,26 +701,155 @@ size_t swiotlb_max_mapping_size(struct device *dev)
 	return ((size_t)IO_TLB_SIZE) * IO_TLB_SEGSIZE;
 }
 
-bool is_swiotlb_active(void)
+bool is_swiotlb_active(struct device *dev)
 {
-	return io_tlb_default_mem != NULL;
+	struct io_tlb_mem *mem = dev->dma_io_tlb_mem;
+
+	return mem && mem->nslabs;
 }
 EXPORT_SYMBOL_GPL(is_swiotlb_active);
 
 #ifdef CONFIG_DEBUG_FS
+static struct dentry *debugfs_dir;
 
-static int __init swiotlb_create_debugfs(void)
+static void swiotlb_create_debugfs_files(struct io_tlb_mem *mem)
 {
-	struct io_tlb_mem *mem = io_tlb_default_mem;
-
-	if (!mem)
-		return 0;
-	mem->debugfs = debugfs_create_dir("swiotlb", NULL);
 	debugfs_create_ulong("io_tlb_nslabs", 0400, mem->debugfs, &mem->nslabs);
 	debugfs_create_ulong("io_tlb_used", 0400, mem->debugfs, &mem->used);
+}
+
+static int __init swiotlb_create_default_debugfs(void)
+{
+	struct io_tlb_mem *mem = &io_tlb_default_mem;
+
+	debugfs_dir = debugfs_create_dir("swiotlb", NULL);
+	if (mem->nslabs) {
+		mem->debugfs = debugfs_dir;
+		swiotlb_create_debugfs_files(mem);
+	}
 	return 0;
 }
 
-late_initcall(swiotlb_create_debugfs);
+late_initcall(swiotlb_create_default_debugfs);
+
+#endif
 
+#ifdef CONFIG_DMA_RESTRICTED_POOL
+
+#ifdef CONFIG_DEBUG_FS
+static void rmem_swiotlb_debugfs_init(struct reserved_mem *rmem)
+{
+	struct io_tlb_mem *mem = rmem->priv;
+
+	mem->debugfs = debugfs_create_dir(rmem->name, debugfs_dir);
+	swiotlb_create_debugfs_files(mem);
+}
+#else
+static void rmem_swiotlb_debugfs_init(struct reserved_mem *rmem)
+{
+}
 #endif
+
+struct page *swiotlb_alloc(struct device *dev, size_t size)
+{
+	struct io_tlb_mem *mem = dev->dma_io_tlb_mem;
+	phys_addr_t tlb_addr;
+	int index;
+
+	if (!mem)
+		return NULL;
+
+	index = swiotlb_find_slots(dev, 0, size);
+	if (index == -1)
+		return NULL;
+
+	tlb_addr = slot_addr(mem->start, index);
+
+	return pfn_to_page(PFN_DOWN(tlb_addr));
+}
+
+bool swiotlb_free(struct device *dev, struct page *page, size_t size)
+{
+	phys_addr_t tlb_addr = page_to_phys(page);
+
+	if (!is_swiotlb_buffer(dev, tlb_addr))
+		return false;
+
+	swiotlb_release_slots(dev, tlb_addr);
+
+	return true;
+}
+
+static int rmem_swiotlb_device_init(struct reserved_mem *rmem,
+				    struct device *dev)
+{
+	struct io_tlb_mem *mem = rmem->priv;
+	unsigned long nslabs = rmem->size >> IO_TLB_SHIFT;
+
+	/*
+	 * Since multiple devices can share the same pool, the private data,
+	 * io_tlb_mem struct, will be initialized by the first device attached
+	 * to it.
+	 */
+	if (!mem) {
+		mem = kzalloc(sizeof(*mem), GFP_KERNEL);
+		if (!mem)
+			return -ENOMEM;
+
+		mem->slots = kzalloc(array_size(sizeof(*mem->slots), nslabs),
+				     GFP_KERNEL);
+		if (!mem->slots) {
+			kfree(mem);
+			return -ENOMEM;
+		}
+
+		set_memory_decrypted((unsigned long)phys_to_virt(rmem->base),
+				     rmem->size >> PAGE_SHIFT);
+		swiotlb_init_io_tlb_mem(mem, rmem->base, nslabs, false);
+		mem->force_bounce = true;
+		mem->for_alloc = true;
+
+		rmem->priv = mem;
+
+		rmem_swiotlb_debugfs_init(rmem);
+	}
+
+	dev->dma_io_tlb_mem = mem;
+
+	return 0;
+}
+
+static void rmem_swiotlb_device_release(struct reserved_mem *rmem,
+					struct device *dev)
+{
+	dev->dma_io_tlb_mem = &io_tlb_default_mem;
+}
+
+static const struct reserved_mem_ops rmem_swiotlb_ops = {
+	.device_init = rmem_swiotlb_device_init,
+	.device_release = rmem_swiotlb_device_release,
+};
+
+static int __init rmem_swiotlb_setup(struct reserved_mem *rmem)
+{
+	unsigned long node = rmem->fdt_node;
+
+	if (of_get_flat_dt_prop(node, "reusable", NULL) ||
+	    of_get_flat_dt_prop(node, "linux,cma-default", NULL) ||
+	    of_get_flat_dt_prop(node, "linux,dma-default", NULL) ||
+	    of_get_flat_dt_prop(node, "no-map", NULL))
+		return -EINVAL;
+
+	if (PageHighMem(pfn_to_page(PHYS_PFN(rmem->base)))) {
+		pr_err("Restricted DMA pool must be accessible within the linear mapping.");
+		return -EINVAL;
+	}
+
+	rmem->ops = &rmem_swiotlb_ops;
+	pr_info("Reserved memory: created restricted DMA pool at %pa, size %ld MiB\n",
+		&rmem->base, (unsigned long)rmem->size / SZ_1M);
+	return 0;
+}
+
+RESERVEDMEM_OF_DECLARE(dma, "restricted-dma-pool", rmem_swiotlb_setup);
+#endif /* CONFIG_DMA_RESTRICTED_POOL */
diff --git a/kernel/events/core.c b/kernel/events/core.c
index 011cc5069b7b..744e8726c5b2 100644
--- a/kernel/events/core.c
+++ b/kernel/events/core.c
@@ -8320,8 +8320,6 @@ static void perf_event_mmap_event(struct perf_mmap_event *mmap_event)
 	else
 		flags = MAP_PRIVATE;
 
-	if (vma->vm_flags & VM_DENYWRITE)
-		flags |= MAP_DENYWRITE;
 	if (vma->vm_flags & VM_LOCKED)
 		flags |= MAP_LOCKED;
 	if (is_vm_hugetlb_page(vma))
diff --git a/kernel/fork.c b/kernel/fork.c
index 695d1343a254..38681ad44c76 100644
--- a/kernel/fork.c
+++ b/kernel/fork.c
@@ -471,6 +471,20 @@ void free_task(struct task_struct *tsk)
 }
 EXPORT_SYMBOL(free_task);
 
+static void dup_mm_exe_file(struct mm_struct *mm, struct mm_struct *oldmm)
+{
+	struct file *exe_file;
+
+	exe_file = get_mm_exe_file(oldmm);
+	RCU_INIT_POINTER(mm->exe_file, exe_file);
+	/*
+	 * We depend on the oldmm having properly denied write access to the
+	 * exe_file already.
+	 */
+	if (exe_file && deny_write_access(exe_file))
+		pr_warn_once("deny_write_access() failed in %s\n", __func__);
+}
+
 #ifdef CONFIG_MMU
 static __latent_entropy int dup_mmap(struct mm_struct *mm,
 					struct mm_struct *oldmm)
@@ -494,7 +508,7 @@ static __latent_entropy int dup_mmap(struct mm_struct *mm,
 	mmap_write_lock_nested(mm, SINGLE_DEPTH_NESTING);
 
 	/* No ordering required: file already has been exposed. */
-	RCU_INIT_POINTER(mm->exe_file, get_mm_exe_file(oldmm));
+	dup_mm_exe_file(mm, oldmm);
 
 	mm->total_vm = oldmm->total_vm;
 	mm->data_vm = oldmm->data_vm;
@@ -557,12 +571,9 @@ static __latent_entropy int dup_mmap(struct mm_struct *mm,
 		tmp->vm_flags &= ~(VM_LOCKED | VM_LOCKONFAULT);
 		file = tmp->vm_file;
 		if (file) {
-			struct inode *inode = file_inode(file);
 			struct address_space *mapping = file->f_mapping;
 
 			get_file(file);
-			if (tmp->vm_flags & VM_DENYWRITE)
-				put_write_access(inode);
 			i_mmap_lock_write(mapping);
 			if (tmp->vm_flags & VM_SHARED)
 				mapping_allow_writable(mapping);
@@ -640,7 +651,7 @@ static inline void mm_free_pgd(struct mm_struct *mm)
 static int dup_mmap(struct mm_struct *mm, struct mm_struct *oldmm)
 {
 	mmap_write_lock(oldmm);
-	RCU_INIT_POINTER(mm->exe_file, get_mm_exe_file(oldmm));
+	dup_mm_exe_file(mm, oldmm);
 	mmap_write_unlock(oldmm);
 	return 0;
 }
@@ -1052,6 +1063,7 @@ static struct mm_struct *mm_init(struct mm_struct *mm, struct task_struct *p,
 	mm->pmd_huge_pte = NULL;
 #endif
 	mm_init_uprobes_state(mm);
+	hugetlb_count_init(mm);
 
 	if (current->mm) {
 		mm->flags = current->mm->flags & MMF_INIT_MASK;
@@ -1150,11 +1162,11 @@ void mmput_async(struct mm_struct *mm)
  *
  * Main users are mmput() and sys_execve(). Callers prevent concurrent
  * invocations: in mmput() nobody alive left, in execve task is single
- * threaded. sys_prctl(PR_SET_MM_MAP/EXE_FILE) also needs to set the
- * mm->exe_file, but does so without using set_mm_exe_file() in order
- * to avoid the need for any locks.
+ * threaded.
+ *
+ * Can only fail if new_exe_file != NULL.
  */
-void set_mm_exe_file(struct mm_struct *mm, struct file *new_exe_file)
+int set_mm_exe_file(struct mm_struct *mm, struct file *new_exe_file)
 {
 	struct file *old_exe_file;
 
@@ -1165,11 +1177,73 @@ void set_mm_exe_file(struct mm_struct *mm, struct file *new_exe_file)
 	 */
 	old_exe_file = rcu_dereference_raw(mm->exe_file);
 
-	if (new_exe_file)
+	if (new_exe_file) {
+		/*
+		 * We expect the caller (i.e., sys_execve) to already denied
+		 * write access, so this is unlikely to fail.
+		 */
+		if (unlikely(deny_write_access(new_exe_file)))
+			return -EACCES;
 		get_file(new_exe_file);
+	}
 	rcu_assign_pointer(mm->exe_file, new_exe_file);
-	if (old_exe_file)
+	if (old_exe_file) {
+		allow_write_access(old_exe_file);
 		fput(old_exe_file);
+	}
+	return 0;
+}
+
+/**
+ * replace_mm_exe_file - replace a reference to the mm's executable file
+ *
+ * This changes mm's executable file (shown as symlink /proc/[pid]/exe),
+ * dealing with concurrent invocation and without grabbing the mmap lock in
+ * write mode.
+ *
+ * Main user is sys_prctl(PR_SET_MM_MAP/EXE_FILE).
+ */
+int replace_mm_exe_file(struct mm_struct *mm, struct file *new_exe_file)
+{
+	struct vm_area_struct *vma;
+	struct file *old_exe_file;
+	int ret = 0;
+
+	/* Forbid mm->exe_file change if old file still mapped. */
+	old_exe_file = get_mm_exe_file(mm);
+	if (old_exe_file) {
+		mmap_read_lock(mm);
+		for (vma = mm->mmap; vma && !ret; vma = vma->vm_next) {
+			if (!vma->vm_file)
+				continue;
+			if (path_equal(&vma->vm_file->f_path,
+				       &old_exe_file->f_path))
+				ret = -EBUSY;
+		}
+		mmap_read_unlock(mm);
+		fput(old_exe_file);
+		if (ret)
+			return ret;
+	}
+
+	/* set the new file, lockless */
+	ret = deny_write_access(new_exe_file);
+	if (ret)
+		return -EACCES;
+	get_file(new_exe_file);
+
+	old_exe_file = xchg(&mm->exe_file, new_exe_file);
+	if (old_exe_file) {
+		/*
+		 * Don't race with dup_mmap() getting the file and disallowing
+		 * write access while someone might open the file writable.
+		 */
+		mmap_read_lock(mm);
+		allow_write_access(old_exe_file);
+		fput(old_exe_file);
+		mmap_read_unlock(mm);
+	}
+	return 0;
 }
 
 /**
@@ -1189,7 +1263,6 @@ struct file *get_mm_exe_file(struct mm_struct *mm)
 	rcu_read_unlock();
 	return exe_file;
 }
-EXPORT_SYMBOL(get_mm_exe_file);
 
 /**
  * get_task_exe_file - acquire a reference to the task's executable file
@@ -1212,7 +1285,6 @@ struct file *get_task_exe_file(struct task_struct *task)
 	task_unlock(task);
 	return exe_file;
 }
-EXPORT_SYMBOL(get_task_exe_file);
 
 /**
  * get_task_mm - acquire a reference to the task's mm
diff --git a/kernel/futex.c b/kernel/futex.c
index e7b4c6121da4..c15ad276fd15 100644
--- a/kernel/futex.c
+++ b/kernel/futex.c
@@ -1263,6 +1263,36 @@ static int handle_exit_race(u32 __user *uaddr, u32 uval,
 	return -ESRCH;
 }
 
+static void __attach_to_pi_owner(struct task_struct *p, union futex_key *key,
+				 struct futex_pi_state **ps)
+{
+	/*
+	 * No existing pi state. First waiter. [2]
+	 *
+	 * This creates pi_state, we have hb->lock held, this means nothing can
+	 * observe this state, wait_lock is irrelevant.
+	 */
+	struct futex_pi_state *pi_state = alloc_pi_state();
+
+	/*
+	 * Initialize the pi_mutex in locked state and make @p
+	 * the owner of it:
+	 */
+	rt_mutex_init_proxy_locked(&pi_state->pi_mutex, p);
+
+	/* Store the key for possible exit cleanups: */
+	pi_state->key = *key;
+
+	WARN_ON(!list_empty(&pi_state->list));
+	list_add(&pi_state->list, &p->pi_state_list);
+	/*
+	 * Assignment without holding pi_state->pi_mutex.wait_lock is safe
+	 * because there is no concurrency as the object is not published yet.
+	 */
+	pi_state->owner = p;
+
+	*ps = pi_state;
+}
 /*
  * Lookup the task for the TID provided from user space and attach to
  * it after doing proper sanity checks.
@@ -1272,7 +1302,6 @@ static int attach_to_pi_owner(u32 __user *uaddr, u32 uval, union futex_key *key,
 			      struct task_struct **exiting)
 {
 	pid_t pid = uval & FUTEX_TID_MASK;
-	struct futex_pi_state *pi_state;
 	struct task_struct *p;
 
 	/*
@@ -1324,36 +1353,11 @@ static int attach_to_pi_owner(u32 __user *uaddr, u32 uval, union futex_key *key,
 		return ret;
 	}
 
-	/*
-	 * No existing pi state. First waiter. [2]
-	 *
-	 * This creates pi_state, we have hb->lock held, this means nothing can
-	 * observe this state, wait_lock is irrelevant.
-	 */
-	pi_state = alloc_pi_state();
-
-	/*
-	 * Initialize the pi_mutex in locked state and make @p
-	 * the owner of it:
-	 */
-	rt_mutex_init_proxy_locked(&pi_state->pi_mutex, p);
-
-	/* Store the key for possible exit cleanups: */
-	pi_state->key = *key;
-
-	WARN_ON(!list_empty(&pi_state->list));
-	list_add(&pi_state->list, &p->pi_state_list);
-	/*
-	 * Assignment without holding pi_state->pi_mutex.wait_lock is safe
-	 * because there is no concurrency as the object is not published yet.
-	 */
-	pi_state->owner = p;
+	__attach_to_pi_owner(p, key, ps);
 	raw_spin_unlock_irq(&p->pi_lock);
 
 	put_task_struct(p);
 
-	*ps = pi_state;
-
 	return 0;
 }
 
@@ -1454,8 +1458,26 @@ static int futex_lock_pi_atomic(u32 __user *uaddr, struct futex_hash_bucket *hb,
 			newval |= FUTEX_WAITERS;
 
 		ret = lock_pi_update_atomic(uaddr, uval, newval);
-		/* If the take over worked, return 1 */
-		return ret < 0 ? ret : 1;
+		if (ret)
+			return ret;
+
+		/*
+		 * If the waiter bit was requested the caller also needs PI
+		 * state attached to the new owner of the user space futex.
+		 *
+		 * @task is guaranteed to be alive and it cannot be exiting
+		 * because it is either sleeping or waiting in
+		 * futex_requeue_pi_wakeup_sync().
+		 *
+		 * No need to do the full attach_to_pi_owner() exercise
+		 * because @task is known and valid.
+		 */
+		if (set_waiters) {
+			raw_spin_lock_irq(&task->pi_lock);
+			__attach_to_pi_owner(task, key, ps);
+			raw_spin_unlock_irq(&task->pi_lock);
+		}
+		return 1;
 	}
 
 	/*
@@ -1939,12 +1961,26 @@ static inline int futex_requeue_pi_wakeup_sync(struct futex_q *q)
  * @hb:		the hash_bucket of the requeue target futex
  *
  * During futex_requeue, with requeue_pi=1, it is possible to acquire the
- * target futex if it is uncontended or via a lock steal.  Set the futex_q key
- * to the requeue target futex so the waiter can detect the wakeup on the right
- * futex, but remove it from the hb and NULL the rt_waiter so it can detect
- * atomic lock acquisition.  Set the q->lock_ptr to the requeue target hb->lock
- * to protect access to the pi_state to fixup the owner later.  Must be called
- * with both q->lock_ptr and hb->lock held.
+ * target futex if it is uncontended or via a lock steal.
+ *
+ * 1) Set @q::key to the requeue target futex key so the waiter can detect
+ *    the wakeup on the right futex.
+ *
+ * 2) Dequeue @q from the hash bucket.
+ *
+ * 3) Set @q::rt_waiter to NULL so the woken up task can detect atomic lock
+ *    acquisition.
+ *
+ * 4) Set the q->lock_ptr to the requeue target hb->lock for the case that
+ *    the waiter has to fixup the pi state.
+ *
+ * 5) Complete the requeue state so the waiter can make progress. After
+ *    this point the waiter task can return from the syscall immediately in
+ *    case that the pi state does not have to be fixed up.
+ *
+ * 6) Wake the waiter task.
+ *
+ * Must be called with both q->lock_ptr and hb->lock held.
  */
 static inline
 void requeue_pi_wake_futex(struct futex_q *q, union futex_key *key,
@@ -1998,7 +2034,7 @@ futex_proxy_trylock_atomic(u32 __user *pifutex, struct futex_hash_bucket *hb1,
 {
 	struct futex_q *top_waiter = NULL;
 	u32 curval;
-	int ret, vpid;
+	int ret;
 
 	if (get_futex_value_locked(&curval, pifutex))
 		return -EFAULT;
@@ -2025,7 +2061,7 @@ futex_proxy_trylock_atomic(u32 __user *pifutex, struct futex_hash_bucket *hb1,
 	 * and waiting on the 'waitqueue' futex which is always !PI.
 	 */
 	if (!top_waiter->rt_waiter || top_waiter->pi_state)
-		ret = -EINVAL;
+		return -EINVAL;
 
 	/* Ensure we requeue to the expected futex. */
 	if (!match_futex(top_waiter->requeue_pi_key, key2))
@@ -2036,17 +2072,23 @@ futex_proxy_trylock_atomic(u32 __user *pifutex, struct futex_hash_bucket *hb1,
 		return -EAGAIN;
 
 	/*
-	 * Try to take the lock for top_waiter.  Set the FUTEX_WAITERS bit in
-	 * the contended case or if set_waiters is 1.  The pi_state is returned
-	 * in ps in contended cases.
+	 * Try to take the lock for top_waiter and set the FUTEX_WAITERS bit
+	 * in the contended case or if @set_waiters is true.
+	 *
+	 * In the contended case PI state is attached to the lock owner. If
+	 * the user space lock can be acquired then PI state is attached to
+	 * the new owner (@top_waiter->task) when @set_waiters is true.
 	 */
-	vpid = task_pid_vnr(top_waiter->task);
 	ret = futex_lock_pi_atomic(pifutex, hb2, key2, ps, top_waiter->task,
 				   exiting, set_waiters);
 	if (ret == 1) {
-		/* Dequeue, wake up and update top_waiter::requeue_state */
+		/*
+		 * Lock was acquired in user space and PI state was
+		 * attached to @top_waiter->task. That means state is fully
+		 * consistent and the waiter can return to user space
+		 * immediately after the wakeup.
+		 */
 		requeue_pi_wake_futex(top_waiter, key2, hb2);
-		return vpid;
 	} else if (ret < 0) {
 		/* Rewind top_waiter::requeue_state */
 		futex_requeue_pi_complete(top_waiter, ret);
@@ -2208,19 +2250,26 @@ retry_private:
 						 &exiting, nr_requeue);
 
 		/*
-		 * At this point the top_waiter has either taken uaddr2 or is
-		 * waiting on it.  If the former, then the pi_state will not
-		 * exist yet, look it up one more time to ensure we have a
-		 * reference to it. If the lock was taken, @ret contains the
-		 * VPID of the top waiter task.
-		 * If the lock was not taken, we have pi_state and an initial
-		 * refcount on it. In case of an error we have nothing.
+		 * At this point the top_waiter has either taken uaddr2 or
+		 * is waiting on it. In both cases pi_state has been
+		 * established and an initial refcount on it. In case of an
+		 * error there's nothing.
 		 *
 		 * The top waiter's requeue_state is up to date:
 		 *
-		 *  - If the lock was acquired atomically (ret > 0), then
+		 *  - If the lock was acquired atomically (ret == 1), then
 		 *    the state is Q_REQUEUE_PI_LOCKED.
 		 *
+		 *    The top waiter has been dequeued and woken up and can
+		 *    return to user space immediately. The kernel/user
+		 *    space state is consistent. In case that there must be
+		 *    more waiters requeued the WAITERS bit in the user
+		 *    space futex is set so the top waiter task has to go
+		 *    into the syscall slowpath to unlock the futex. This
+		 *    will block until this requeue operation has been
+		 *    completed and the hash bucket locks have been
+		 *    dropped.
+		 *
 		 *  - If the trylock failed with an error (ret < 0) then
 		 *    the state is either Q_REQUEUE_PI_NONE, i.e. "nothing
 		 *    happened", or Q_REQUEUE_PI_IGNORE when there was an
@@ -2234,36 +2283,20 @@ retry_private:
 		 *    the same sanity checks for requeue_pi as the loop
 		 *    below does.
 		 */
-		if (ret > 0) {
-			WARN_ON(pi_state);
-			task_count++;
-			/*
-			 * If futex_proxy_trylock_atomic() acquired the
-			 * user space futex, then the user space value
-			 * @uaddr2 has been set to the @hb1's top waiter
-			 * task VPID. This task is guaranteed to be alive
-			 * and cannot be exiting because it is either
-			 * sleeping or blocked on @hb2 lock.
-			 *
-			 * The @uaddr2 futex cannot have waiters either as
-			 * otherwise futex_proxy_trylock_atomic() would not
-			 * have succeeded.
-			 *
-			 * In order to requeue waiters to @hb2, pi state is
-			 * required. Hand in the VPID value (@ret) and
-			 * allocate PI state with an initial refcount on
-			 * it.
-			 */
-			ret = attach_to_pi_owner(uaddr2, ret, &key2, &pi_state,
-						 &exiting);
-			WARN_ON(ret);
-		}
-
 		switch (ret) {
 		case 0:
 			/* We hold a reference on the pi state. */
 			break;
 
+		case 1:
+			/*
+			 * futex_proxy_trylock_atomic() acquired the user space
+			 * futex. Adjust task_count.
+			 */
+			task_count++;
+			ret = 0;
+			break;
+
 		/*
 		 * If the above failed, then pi_state is NULL and
 		 * waiter::requeue_state is correct.
@@ -2395,9 +2428,8 @@ retry_private:
 	}
 
 	/*
-	 * We took an extra initial reference to the pi_state either in
-	 * futex_proxy_trylock_atomic() or in attach_to_pi_owner(). We need
-	 * to drop it here again.
+	 * We took an extra initial reference to the pi_state in
+	 * futex_proxy_trylock_atomic(). We need to drop it here again.
 	 */
 	put_pi_state(pi_state);
 
diff --git a/kernel/irq/irqdomain.c b/kernel/irq/irqdomain.c
index 62be16135e7c..19e83e9b723c 100644
--- a/kernel/irq/irqdomain.c
+++ b/kernel/irq/irqdomain.c
@@ -491,6 +491,7 @@ struct irq_domain *irq_get_default_host(void)
 {
 	return irq_default_domain;
 }
+EXPORT_SYMBOL_GPL(irq_get_default_host);
 
 static bool irq_domain_is_nomap(struct irq_domain *domain)
 {
diff --git a/kernel/kcsan/atomic.h b/kernel/kcsan/atomic.h
deleted file mode 100644
index 530ae1bda8e7..000000000000
--- a/kernel/kcsan/atomic.h
+++ /dev/null
@@ -1,23 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0 */
-/*
- * Rules for implicitly atomic memory accesses.
- *
- * Copyright (C) 2019, Google LLC.
- */
-
-#ifndef _KERNEL_KCSAN_ATOMIC_H
-#define _KERNEL_KCSAN_ATOMIC_H
-
-#include <linux/types.h>
-
-/*
- * Special rules for certain memory where concurrent conflicting accesses are
- * common, however, the current convention is to not mark them; returns true if
- * access to @ptr should be considered atomic. Called from slow-path.
- */
-static bool kcsan_is_atomic_special(const volatile void *ptr)
-{
-	return false;
-}
-
-#endif /* _KERNEL_KCSAN_ATOMIC_H */
diff --git a/kernel/kcsan/core.c b/kernel/kcsan/core.c
index 26709ea65c71..76e67d1e02d4 100644
--- a/kernel/kcsan/core.c
+++ b/kernel/kcsan/core.c
@@ -20,9 +20,9 @@
 #include <linux/sched.h>
 #include <linux/uaccess.h>
 
-#include "atomic.h"
 #include "encoding.h"
 #include "kcsan.h"
+#include "permissive.h"
 
 static bool kcsan_early_enable = IS_ENABLED(CONFIG_KCSAN_EARLY_ENABLE);
 unsigned int kcsan_udelay_task = CONFIG_KCSAN_UDELAY_TASK;
@@ -301,9 +301,9 @@ static inline void reset_kcsan_skip(void)
 	this_cpu_write(kcsan_skip, skip_count);
 }
 
-static __always_inline bool kcsan_is_enabled(void)
+static __always_inline bool kcsan_is_enabled(struct kcsan_ctx *ctx)
 {
-	return READ_ONCE(kcsan_enabled) && get_ctx()->disable_count == 0;
+	return READ_ONCE(kcsan_enabled) && !ctx->disable_count;
 }
 
 /* Introduce delay depending on context and configuration. */
@@ -353,10 +353,18 @@ static noinline void kcsan_found_watchpoint(const volatile void *ptr,
 					    atomic_long_t *watchpoint,
 					    long encoded_watchpoint)
 {
+	const bool is_assert = (type & KCSAN_ACCESS_ASSERT) != 0;
+	struct kcsan_ctx *ctx = get_ctx();
 	unsigned long flags;
 	bool consumed;
 
-	if (!kcsan_is_enabled())
+	/*
+	 * We know a watchpoint exists. Let's try to keep the race-window
+	 * between here and finally consuming the watchpoint below as small as
+	 * possible -- avoid unneccessarily complex code until consumed.
+	 */
+
+	if (!kcsan_is_enabled(ctx))
 		return;
 
 	/*
@@ -364,14 +372,22 @@ static noinline void kcsan_found_watchpoint(const volatile void *ptr,
 	 * reporting a race where e.g. the writer set up the watchpoint, but the
 	 * reader has access_mask!=0, we have to ignore the found watchpoint.
 	 */
-	if (get_ctx()->access_mask != 0)
+	if (ctx->access_mask)
 		return;
 
 	/*
-	 * Consume the watchpoint as soon as possible, to minimize the chances
-	 * of !consumed. Consuming the watchpoint must always be guarded by
-	 * kcsan_is_enabled() check, as otherwise we might erroneously
-	 * triggering reports when disabled.
+	 * If the other thread does not want to ignore the access, and there was
+	 * a value change as a result of this thread's operation, we will still
+	 * generate a report of unknown origin.
+	 *
+	 * Use CONFIG_KCSAN_REPORT_RACE_UNKNOWN_ORIGIN=n to filter.
+	 */
+	if (!is_assert && kcsan_ignore_address(ptr))
+		return;
+
+	/*
+	 * Consuming the watchpoint must be guarded by kcsan_is_enabled() to
+	 * avoid erroneously triggering reports if the context is disabled.
 	 */
 	consumed = try_consume_watchpoint(watchpoint, encoded_watchpoint);
 
@@ -391,7 +407,7 @@ static noinline void kcsan_found_watchpoint(const volatile void *ptr,
 		atomic_long_inc(&kcsan_counters[KCSAN_COUNTER_REPORT_RACES]);
 	}
 
-	if ((type & KCSAN_ACCESS_ASSERT) != 0)
+	if (is_assert)
 		atomic_long_inc(&kcsan_counters[KCSAN_COUNTER_ASSERT_FAILURES]);
 	else
 		atomic_long_inc(&kcsan_counters[KCSAN_COUNTER_DATA_RACES]);
@@ -409,6 +425,7 @@ kcsan_setup_watchpoint(const volatile void *ptr, size_t size, int type)
 	unsigned long access_mask;
 	enum kcsan_value_change value_change = KCSAN_VALUE_CHANGE_MAYBE;
 	unsigned long ua_flags = user_access_save();
+	struct kcsan_ctx *ctx = get_ctx();
 	unsigned long irq_flags = 0;
 
 	/*
@@ -417,16 +434,14 @@ kcsan_setup_watchpoint(const volatile void *ptr, size_t size, int type)
 	 */
 	reset_kcsan_skip();
 
-	if (!kcsan_is_enabled())
+	if (!kcsan_is_enabled(ctx))
 		goto out;
 
 	/*
-	 * Special atomic rules: unlikely to be true, so we check them here in
-	 * the slow-path, and not in the fast-path in is_atomic(). Call after
-	 * kcsan_is_enabled(), as we may access memory that is not yet
-	 * initialized during early boot.
+	 * Check to-ignore addresses after kcsan_is_enabled(), as we may access
+	 * memory that is not yet initialized during early boot.
 	 */
-	if (!is_assert && kcsan_is_atomic_special(ptr))
+	if (!is_assert && kcsan_ignore_address(ptr))
 		goto out;
 
 	if (!check_encodable((unsigned long)ptr, size)) {
@@ -479,15 +494,6 @@ kcsan_setup_watchpoint(const volatile void *ptr, size_t size, int type)
 		break; /* ignore; we do not diff the values */
 	}
 
-	if (IS_ENABLED(CONFIG_KCSAN_DEBUG)) {
-		kcsan_disable_current();
-		pr_err("watching %s, size: %zu, addr: %px [slot: %d, encoded: %lx]\n",
-		       is_write ? "write" : "read", size, ptr,
-		       watchpoint_slot((unsigned long)ptr),
-		       encode_watchpoint((unsigned long)ptr, size, is_write));
-		kcsan_enable_current();
-	}
-
 	/*
 	 * Delay this thread, to increase probability of observing a racy
 	 * conflicting access.
@@ -498,7 +504,7 @@ kcsan_setup_watchpoint(const volatile void *ptr, size_t size, int type)
 	 * Re-read value, and check if it is as expected; if not, we infer a
 	 * racy access.
 	 */
-	access_mask = get_ctx()->access_mask;
+	access_mask = ctx->access_mask;
 	new = 0;
 	switch (size) {
 	case 1:
@@ -521,8 +527,14 @@ kcsan_setup_watchpoint(const volatile void *ptr, size_t size, int type)
 	if (access_mask)
 		diff &= access_mask;
 
-	/* Were we able to observe a value-change? */
-	if (diff != 0)
+	/*
+	 * Check if we observed a value change.
+	 *
+	 * Also check if the data race should be ignored (the rules depend on
+	 * non-zero diff); if it is to be ignored, the below rules for
+	 * KCSAN_VALUE_CHANGE_MAYBE apply.
+	 */
+	if (diff && !kcsan_ignore_data_race(size, type, old, new, diff))
 		value_change = KCSAN_VALUE_CHANGE_TRUE;
 
 	/* Check if this access raced with another. */
@@ -644,6 +656,15 @@ void __init kcsan_init(void)
 		pr_info("enabled early\n");
 		WRITE_ONCE(kcsan_enabled, true);
 	}
+
+	if (IS_ENABLED(CONFIG_KCSAN_REPORT_VALUE_CHANGE_ONLY) ||
+	    IS_ENABLED(CONFIG_KCSAN_ASSUME_PLAIN_WRITES_ATOMIC) ||
+	    IS_ENABLED(CONFIG_KCSAN_PERMISSIVE) ||
+	    IS_ENABLED(CONFIG_KCSAN_IGNORE_ATOMICS)) {
+		pr_warn("non-strict mode configured - use CONFIG_KCSAN_STRICT=y to see all data races\n");
+	} else {
+		pr_info("strict mode configured\n");
+	}
 }
 
 /* === Exported interface =================================================== */
diff --git a/kernel/kcsan/kcsan_test.c b/kernel/kcsan/kcsan_test.c
index 8bcffbdef3d3..dc55fd5a36fc 100644
--- a/kernel/kcsan/kcsan_test.c
+++ b/kernel/kcsan/kcsan_test.c
@@ -414,6 +414,14 @@ static noinline void test_kernel_atomic_builtins(void)
 	__atomic_load_n(&test_var, __ATOMIC_RELAXED);
 }
 
+static noinline void test_kernel_xor_1bit(void)
+{
+	/* Do not report data races between the read-writes. */
+	kcsan_nestable_atomic_begin();
+	test_var ^= 0x10000;
+	kcsan_nestable_atomic_end();
+}
+
 /* ===== Test cases ===== */
 
 /* Simple test with normal data race. */
@@ -952,6 +960,29 @@ static void test_atomic_builtins(struct kunit *test)
 	KUNIT_EXPECT_FALSE(test, match_never);
 }
 
+__no_kcsan
+static void test_1bit_value_change(struct kunit *test)
+{
+	const struct expect_report expect = {
+		.access = {
+			{ test_kernel_read, &test_var, sizeof(test_var), 0 },
+			{ test_kernel_xor_1bit, &test_var, sizeof(test_var), __KCSAN_ACCESS_RW(KCSAN_ACCESS_WRITE) },
+		},
+	};
+	bool match = false;
+
+	begin_test_checks(test_kernel_read, test_kernel_xor_1bit);
+	do {
+		match = IS_ENABLED(CONFIG_KCSAN_PERMISSIVE)
+				? report_available()
+				: report_matches(&expect);
+	} while (!end_test_checks(match));
+	if (IS_ENABLED(CONFIG_KCSAN_PERMISSIVE))
+		KUNIT_EXPECT_FALSE(test, match);
+	else
+		KUNIT_EXPECT_TRUE(test, match);
+}
+
 /*
  * Generate thread counts for all test cases. Values generated are in interval
  * [2, 5] followed by exponentially increasing thread counts from 8 to 32.
@@ -1024,6 +1055,7 @@ static struct kunit_case kcsan_test_cases[] = {
 	KCSAN_KUNIT_CASE(test_jiffies_noreport),
 	KCSAN_KUNIT_CASE(test_seqlock_noreport),
 	KCSAN_KUNIT_CASE(test_atomic_builtins),
+	KCSAN_KUNIT_CASE(test_1bit_value_change),
 	{},
 };
 
diff --git a/kernel/kcsan/permissive.h b/kernel/kcsan/permissive.h
new file mode 100644
index 000000000000..2c01fe4a59ee
--- /dev/null
+++ b/kernel/kcsan/permissive.h
@@ -0,0 +1,94 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/*
+ * Special rules for ignoring entire classes of data-racy memory accesses. None
+ * of the rules here imply that such data races are generally safe!
+ *
+ * All rules in this file can be configured via CONFIG_KCSAN_PERMISSIVE. Keep
+ * them separate from core code to make it easier to audit.
+ *
+ * Copyright (C) 2019, Google LLC.
+ */
+
+#ifndef _KERNEL_KCSAN_PERMISSIVE_H
+#define _KERNEL_KCSAN_PERMISSIVE_H
+
+#include <linux/bitops.h>
+#include <linux/sched.h>
+#include <linux/types.h>
+
+/*
+ * Access ignore rules based on address.
+ */
+static __always_inline bool kcsan_ignore_address(const volatile void *ptr)
+{
+	if (!IS_ENABLED(CONFIG_KCSAN_PERMISSIVE))
+		return false;
+
+	/*
+	 * Data-racy bitops on current->flags are too common, ignore completely
+	 * for now.
+	 */
+	return ptr == &current->flags;
+}
+
+/*
+ * Data race ignore rules based on access type and value change patterns.
+ */
+static bool
+kcsan_ignore_data_race(size_t size, int type, u64 old, u64 new, u64 diff)
+{
+	if (!IS_ENABLED(CONFIG_KCSAN_PERMISSIVE))
+		return false;
+
+	/*
+	 * Rules here are only for plain read accesses, so that we still report
+	 * data races between plain read-write accesses.
+	 */
+	if (type || size > sizeof(long))
+		return false;
+
+	/*
+	 * A common pattern is checking/setting just 1 bit in a variable; for
+	 * example:
+	 *
+	 *	if (flags & SOME_FLAG) { ... }
+	 *
+	 * and elsewhere flags is updated concurrently:
+	 *
+	 *	flags |= SOME_OTHER_FLAG; // just 1 bit
+	 *
+	 * While it is still recommended that such accesses be marked
+	 * appropriately, in many cases these types of data races are so common
+	 * that marking them all is often unrealistic and left to maintainer
+	 * preference.
+	 *
+	 * The assumption in all cases is that with all known compiler
+	 * optimizations (including those that tear accesses), because no more
+	 * than 1 bit changed, the plain accesses are safe despite the presence
+	 * of data races.
+	 *
+	 * The rules here will ignore the data races if we observe no more than
+	 * 1 bit changed.
+	 *
+	 * Of course many operations can effecively change just 1 bit, but the
+	 * general assuption that data races involving 1-bit changes can be
+	 * tolerated still applies.
+	 *
+	 * And in case a true bug is missed, the bug likely manifests as a
+	 * reportable data race elsewhere.
+	 */
+	if (hweight64(diff) == 1) {
+		/*
+		 * Exception: Report data races where the values look like
+		 * ordinary booleans (one of them was 0 and the 0th bit was
+		 * changed) More often than not, they come with interesting
+		 * memory ordering requirements, so let's report them.
+		 */
+		if (!((!old || !new) && diff == 1))
+			return true;
+	}
+
+	return false;
+}
+
+#endif /* _KERNEL_KCSAN_PERMISSIVE_H */
diff --git a/kernel/kexec.c b/kernel/kexec.c
index c82c6c06f051..b5e40f069768 100644
--- a/kernel/kexec.c
+++ b/kernel/kexec.c
@@ -19,26 +19,9 @@
 
 #include "kexec_internal.h"
 
-static int copy_user_segment_list(struct kimage *image,
-				  unsigned long nr_segments,
-				  struct kexec_segment __user *segments)
-{
-	int ret;
-	size_t segment_bytes;
-
-	/* Read in the segments */
-	image->nr_segments = nr_segments;
-	segment_bytes = nr_segments * sizeof(*segments);
-	ret = copy_from_user(image->segment, segments, segment_bytes);
-	if (ret)
-		ret = -EFAULT;
-
-	return ret;
-}
-
 static int kimage_alloc_init(struct kimage **rimage, unsigned long entry,
 			     unsigned long nr_segments,
-			     struct kexec_segment __user *segments,
+			     struct kexec_segment *segments,
 			     unsigned long flags)
 {
 	int ret;
@@ -58,10 +41,8 @@ static int kimage_alloc_init(struct kimage **rimage, unsigned long entry,
 		return -ENOMEM;
 
 	image->start = entry;
-
-	ret = copy_user_segment_list(image, nr_segments, segments);
-	if (ret)
-		goto out_free_image;
+	image->nr_segments = nr_segments;
+	memcpy(image->segment, segments, nr_segments * sizeof(*segments));
 
 	if (kexec_on_panic) {
 		/* Enable special crash kernel control page alloc policy. */
@@ -104,12 +85,23 @@ out_free_image:
 }
 
 static int do_kexec_load(unsigned long entry, unsigned long nr_segments,
-		struct kexec_segment __user *segments, unsigned long flags)
+		struct kexec_segment *segments, unsigned long flags)
 {
 	struct kimage **dest_image, *image;
 	unsigned long i;
 	int ret;
 
+	/*
+	 * Because we write directly to the reserved memory region when loading
+	 * crash kernels we need a mutex here to prevent multiple crash kernels
+	 * from attempting to load simultaneously, and to prevent a crash kernel
+	 * from loading over the top of a in use crash kernel.
+	 *
+	 * KISS: always take the mutex.
+	 */
+	if (!mutex_trylock(&kexec_mutex))
+		return -EBUSY;
+
 	if (flags & KEXEC_ON_CRASH) {
 		dest_image = &kexec_crash_image;
 		if (kexec_crash_image)
@@ -121,7 +113,8 @@ static int do_kexec_load(unsigned long entry, unsigned long nr_segments,
 	if (nr_segments == 0) {
 		/* Uninstall image */
 		kimage_free(xchg(dest_image, NULL));
-		return 0;
+		ret = 0;
+		goto out_unlock;
 	}
 	if (flags & KEXEC_ON_CRASH) {
 		/*
@@ -134,7 +127,7 @@ static int do_kexec_load(unsigned long entry, unsigned long nr_segments,
 
 	ret = kimage_alloc_init(&image, entry, nr_segments, segments, flags);
 	if (ret)
-		return ret;
+		goto out_unlock;
 
 	if (flags & KEXEC_PRESERVE_CONTEXT)
 		image->preserve_context = 1;
@@ -171,6 +164,8 @@ out:
 		arch_kexec_protect_crashkres();
 
 	kimage_free(image);
+out_unlock:
+	mutex_unlock(&kexec_mutex);
 	return ret;
 }
 
@@ -236,7 +231,8 @@ static inline int kexec_load_check(unsigned long nr_segments,
 SYSCALL_DEFINE4(kexec_load, unsigned long, entry, unsigned long, nr_segments,
 		struct kexec_segment __user *, segments, unsigned long, flags)
 {
-	int result;
+	struct kexec_segment *ksegments;
+	unsigned long result;
 
 	result = kexec_load_check(nr_segments, flags);
 	if (result)
@@ -247,20 +243,12 @@ SYSCALL_DEFINE4(kexec_load, unsigned long, entry, unsigned long, nr_segments,
 		((flags & KEXEC_ARCH_MASK) != KEXEC_ARCH_DEFAULT))
 		return -EINVAL;
 
-	/* Because we write directly to the reserved memory
-	 * region when loading crash kernels we need a mutex here to
-	 * prevent multiple crash  kernels from attempting to load
-	 * simultaneously, and to prevent a crash kernel from loading
-	 * over the top of a in use crash kernel.
-	 *
-	 * KISS: always take the mutex.
-	 */
-	if (!mutex_trylock(&kexec_mutex))
-		return -EBUSY;
+	ksegments = memdup_user(segments, nr_segments * sizeof(ksegments[0]));
+	if (IS_ERR(ksegments))
+		return PTR_ERR(ksegments);
 
-	result = do_kexec_load(entry, nr_segments, segments, flags);
-
-	mutex_unlock(&kexec_mutex);
+	result = do_kexec_load(entry, nr_segments, ksegments, flags);
+	kfree(ksegments);
 
 	return result;
 }
@@ -272,7 +260,7 @@ COMPAT_SYSCALL_DEFINE4(kexec_load, compat_ulong_t, entry,
 		       compat_ulong_t, flags)
 {
 	struct compat_kexec_segment in;
-	struct kexec_segment out, __user *ksegments;
+	struct kexec_segment *ksegments;
 	unsigned long i, result;
 
 	result = kexec_load_check(nr_segments, flags);
@@ -285,37 +273,26 @@ COMPAT_SYSCALL_DEFINE4(kexec_load, compat_ulong_t, entry,
 	if ((flags & KEXEC_ARCH_MASK) == KEXEC_ARCH_DEFAULT)
 		return -EINVAL;
 
-	ksegments = compat_alloc_user_space(nr_segments * sizeof(out));
+	ksegments = kmalloc_array(nr_segments, sizeof(ksegments[0]),
+			GFP_KERNEL);
+	if (!ksegments)
+		return -ENOMEM;
+
 	for (i = 0; i < nr_segments; i++) {
 		result = copy_from_user(&in, &segments[i], sizeof(in));
 		if (result)
-			return -EFAULT;
+			goto fail;
 
-		out.buf   = compat_ptr(in.buf);
-		out.bufsz = in.bufsz;
-		out.mem   = in.mem;
-		out.memsz = in.memsz;
-
-		result = copy_to_user(&ksegments[i], &out, sizeof(out));
-		if (result)
-			return -EFAULT;
+		ksegments[i].buf   = compat_ptr(in.buf);
+		ksegments[i].bufsz = in.bufsz;
+		ksegments[i].mem   = in.mem;
+		ksegments[i].memsz = in.memsz;
 	}
 
-	/* Because we write directly to the reserved memory
-	 * region when loading crash kernels we need a mutex here to
-	 * prevent multiple crash  kernels from attempting to load
-	 * simultaneously, and to prevent a crash kernel from loading
-	 * over the top of a in use crash kernel.
-	 *
-	 * KISS: always take the mutex.
-	 */
-	if (!mutex_trylock(&kexec_mutex))
-		return -EBUSY;
-
 	result = do_kexec_load(entry, nr_segments, ksegments, flags);
 
-	mutex_unlock(&kexec_mutex);
-
+fail:
+	kfree(ksegments);
 	return result;
 }
 #endif
diff --git a/kernel/livepatch/transition.c b/kernel/livepatch/transition.c
index 3a4beb9395c4..291b857a6e20 100644
--- a/kernel/livepatch/transition.c
+++ b/kernel/livepatch/transition.c
@@ -411,7 +411,7 @@ void klp_try_complete_transition(void)
 	/*
 	 * Ditto for the idle "swapper" tasks.
 	 */
-	get_online_cpus();
+	cpus_read_lock();
 	for_each_possible_cpu(cpu) {
 		task = idle_task(cpu);
 		if (cpu_online(cpu)) {
@@ -423,7 +423,7 @@ void klp_try_complete_transition(void)
 			task->patch_state = klp_target_state;
 		}
 	}
-	put_online_cpus();
+	cpus_read_unlock();
 
 	if (!complete) {
 		if (klp_signals_cnt && !(klp_signals_cnt % SIGNALS_TIMEOUT))
diff --git a/kernel/locking/rtmutex.c b/kernel/locking/rtmutex.c
index 8eabdc79602b..6bb116c559b4 100644
--- a/kernel/locking/rtmutex.c
+++ b/kernel/locking/rtmutex.c
@@ -753,7 +753,7 @@ static int __sched rt_mutex_adjust_prio_chain(struct task_struct *task,
 		 * other configuration and we fail to report; also, see
 		 * lockdep.
 		 */
-		if (IS_ENABLED(CONFIG_PREEMPT_RT) && orig_waiter->ww_ctx)
+		if (IS_ENABLED(CONFIG_PREEMPT_RT) && orig_waiter && orig_waiter->ww_ctx)
 			ret = 0;
 
 		raw_spin_unlock(&lock->wait_lock);
diff --git a/kernel/locking/rwsem.c b/kernel/locking/rwsem.c
index 9215b4d6a9de..000e8d5a2884 100644
--- a/kernel/locking/rwsem.c
+++ b/kernel/locking/rwsem.c
@@ -1376,15 +1376,17 @@ static inline void __downgrade_write(struct rw_semaphore *sem)
 
 #include "rwbase_rt.c"
 
-#ifdef CONFIG_DEBUG_LOCK_ALLOC
-void __rwsem_init(struct rw_semaphore *sem, const char *name,
+void __init_rwsem(struct rw_semaphore *sem, const char *name,
 		  struct lock_class_key *key)
 {
+	init_rwbase_rt(&(sem)->rwbase);
+
+#ifdef CONFIG_DEBUG_LOCK_ALLOC
 	debug_check_no_locks_freed((void *)sem, sizeof(*sem));
 	lockdep_init_map_wait(&sem->dep_map, name, key, 0, LD_WAIT_SLEEP);
-}
-EXPORT_SYMBOL(__rwsem_init);
 #endif
+}
+EXPORT_SYMBOL(__init_rwsem);
 
 static inline void __down_read(struct rw_semaphore *sem)
 {
diff --git a/kernel/nsproxy.c b/kernel/nsproxy.c
index abc01fcad8c7..eec72ca962e2 100644
--- a/kernel/nsproxy.c
+++ b/kernel/nsproxy.c
@@ -568,6 +568,6 @@ out:
 
 int __init nsproxy_cache_init(void)
 {
-	nsproxy_cachep = KMEM_CACHE(nsproxy, SLAB_PANIC);
+	nsproxy_cachep = KMEM_CACHE(nsproxy, SLAB_PANIC|SLAB_ACCOUNT);
 	return 0;
 }
diff --git a/kernel/pid_namespace.c b/kernel/pid_namespace.c
index ca43239a255a..a46a3723bc66 100644
--- a/kernel/pid_namespace.c
+++ b/kernel/pid_namespace.c
@@ -51,7 +51,8 @@ static struct kmem_cache *create_pid_cachep(unsigned int level)
 	mutex_lock(&pid_caches_mutex);
 	/* Name collision forces to do allocation under mutex. */
 	if (!*pkc)
-		*pkc = kmem_cache_create(name, len, 0, SLAB_HWCACHE_ALIGN, 0);
+		*pkc = kmem_cache_create(name, len, 0,
+					 SLAB_HWCACHE_ALIGN | SLAB_ACCOUNT, 0);
 	mutex_unlock(&pid_caches_mutex);
 	/* current can fail, but someone else can succeed. */
 	return READ_ONCE(*pkc);
@@ -449,7 +450,7 @@ const struct proc_ns_operations pidns_for_children_operations = {
 
 static __init int pid_namespaces_init(void)
 {
-	pid_ns_cachep = KMEM_CACHE(pid_namespace, SLAB_PANIC);
+	pid_ns_cachep = KMEM_CACHE(pid_namespace, SLAB_PANIC | SLAB_ACCOUNT);
 
 #ifdef CONFIG_CHECKPOINT_RESTORE
 	register_sysctl_paths(kern_path, pid_ns_ctl_table);
diff --git a/kernel/profile.c b/kernel/profile.c
index c2ebddb5e974..eb9c7f0f5ac5 100644
--- a/kernel/profile.c
+++ b/kernel/profile.c
@@ -41,7 +41,8 @@ struct profile_hit {
 #define NR_PROFILE_GRP		(NR_PROFILE_HIT/PROFILE_GRPSZ)
 
 static atomic_t *prof_buffer;
-static unsigned long prof_len, prof_shift;
+static unsigned long prof_len;
+static unsigned short int prof_shift;
 
 int prof_on __read_mostly;
 EXPORT_SYMBOL_GPL(prof_on);
@@ -67,8 +68,8 @@ int profile_setup(char *str)
 		if (str[strlen(sleepstr)] == ',')
 			str += strlen(sleepstr) + 1;
 		if (get_option(&str, &par))
-			prof_shift = par;
-		pr_info("kernel sleep profiling enabled (shift: %ld)\n",
+			prof_shift = clamp(par, 0, BITS_PER_LONG - 1);
+		pr_info("kernel sleep profiling enabled (shift: %u)\n",
 			prof_shift);
 #else
 		pr_warn("kernel sleep profiling requires CONFIG_SCHEDSTATS\n");
@@ -78,21 +79,21 @@ int profile_setup(char *str)
 		if (str[strlen(schedstr)] == ',')
 			str += strlen(schedstr) + 1;
 		if (get_option(&str, &par))
-			prof_shift = par;
-		pr_info("kernel schedule profiling enabled (shift: %ld)\n",
+			prof_shift = clamp(par, 0, BITS_PER_LONG - 1);
+		pr_info("kernel schedule profiling enabled (shift: %u)\n",
 			prof_shift);
 	} else if (!strncmp(str, kvmstr, strlen(kvmstr))) {
 		prof_on = KVM_PROFILING;
 		if (str[strlen(kvmstr)] == ',')
 			str += strlen(kvmstr) + 1;
 		if (get_option(&str, &par))
-			prof_shift = par;
-		pr_info("kernel KVM profiling enabled (shift: %ld)\n",
+			prof_shift = clamp(par, 0, BITS_PER_LONG - 1);
+		pr_info("kernel KVM profiling enabled (shift: %u)\n",
 			prof_shift);
 	} else if (get_option(&str, &par)) {
-		prof_shift = par;
+		prof_shift = clamp(par, 0, BITS_PER_LONG - 1);
 		prof_on = CPU_PROFILING;
-		pr_info("kernel profiling enabled (shift: %ld)\n",
+		pr_info("kernel profiling enabled (shift: %u)\n",
 			prof_shift);
 	}
 	return 1;
@@ -468,7 +469,7 @@ read_profile(struct file *file, char __user *buf, size_t count, loff_t *ppos)
 	unsigned long p = *ppos;
 	ssize_t read;
 	char *pnt;
-	unsigned int sample_step = 1 << prof_shift;
+	unsigned long sample_step = 1UL << prof_shift;
 
 	profile_flip_buffers();
 	if (p >= (prof_len+1)*sizeof(unsigned int))
diff --git a/kernel/sched/core.c b/kernel/sched/core.c
index c4462c454ab9..1bba4128a3e6 100644
--- a/kernel/sched/core.c
+++ b/kernel/sched/core.c
@@ -8836,7 +8836,6 @@ static void balance_push(struct rq *rq)
 	struct task_struct *push_task = rq->curr;
 
 	lockdep_assert_rq_held(rq);
-	SCHED_WARN_ON(rq->cpu != smp_processor_id());
 
 	/*
 	 * Ensure the thing is persistent until balance_push_set(.on = false);
@@ -8844,9 +8843,10 @@ static void balance_push(struct rq *rq)
 	rq->balance_callback = &balance_push_callback;
 
 	/*
-	 * Only active while going offline.
+	 * Only active while going offline and when invoked on the outgoing
+	 * CPU.
 	 */
-	if (!cpu_dying(rq->cpu))
+	if (!cpu_dying(rq->cpu) || rq != this_rq())
 		return;
 
 	/*
diff --git a/kernel/sched/idle.c b/kernel/sched/idle.c
index 912b47aa99d8..d17b0a5ce6ac 100644
--- a/kernel/sched/idle.c
+++ b/kernel/sched/idle.c
@@ -379,10 +379,10 @@ void play_idle_precise(u64 duration_ns, u64 latency_ns)
 	cpuidle_use_deepest_state(latency_ns);
 
 	it.done = 0;
-	hrtimer_init_on_stack(&it.timer, CLOCK_MONOTONIC, HRTIMER_MODE_REL);
+	hrtimer_init_on_stack(&it.timer, CLOCK_MONOTONIC, HRTIMER_MODE_REL_HARD);
 	it.timer.function = idle_inject_timer_fn;
 	hrtimer_start(&it.timer, ns_to_ktime(duration_ns),
-		      HRTIMER_MODE_REL_PINNED);
+		      HRTIMER_MODE_REL_PINNED_HARD);
 
 	while (!READ_ONCE(it.done))
 		do_idle();
diff --git a/kernel/signal.c b/kernel/signal.c
index cf7e2505ae31..952741f6d0f9 100644
--- a/kernel/signal.c
+++ b/kernel/signal.c
@@ -4726,7 +4726,7 @@ void __init signals_init(void)
 {
 	siginfo_buildtime_checks();
 
-	sigqueue_cachep = KMEM_CACHE(sigqueue, SLAB_PANIC);
+	sigqueue_cachep = KMEM_CACHE(sigqueue, SLAB_PANIC | SLAB_ACCOUNT);
 }
 
 #ifdef CONFIG_KGDB_KDB
diff --git a/kernel/sys.c b/kernel/sys.c
index 72c7639e3c98..8fdac0d90504 100644
--- a/kernel/sys.c
+++ b/kernel/sys.c
@@ -1847,7 +1847,6 @@ SYSCALL_DEFINE1(umask, int, mask)
 static int prctl_set_mm_exe_file(struct mm_struct *mm, unsigned int fd)
 {
 	struct fd exe;
-	struct file *old_exe, *exe_file;
 	struct inode *inode;
 	int err;
 
@@ -1870,40 +1869,10 @@ static int prctl_set_mm_exe_file(struct mm_struct *mm, unsigned int fd)
 	if (err)
 		goto exit;
 
-	/*
-	 * Forbid mm->exe_file change if old file still mapped.
-	 */
-	exe_file = get_mm_exe_file(mm);
-	err = -EBUSY;
-	if (exe_file) {
-		struct vm_area_struct *vma;
-
-		mmap_read_lock(mm);
-		for (vma = mm->mmap; vma; vma = vma->vm_next) {
-			if (!vma->vm_file)
-				continue;
-			if (path_equal(&vma->vm_file->f_path,
-				       &exe_file->f_path))
-				goto exit_err;
-		}
-
-		mmap_read_unlock(mm);
-		fput(exe_file);
-	}
-
-	err = 0;
-	/* set the new file, lockless */
-	get_file(exe.file);
-	old_exe = xchg(&mm->exe_file, exe.file);
-	if (old_exe)
-		fput(old_exe);
+	err = replace_mm_exe_file(mm, exe.file);
 exit:
 	fdput(exe);
 	return err;
-exit_err:
-	mmap_read_unlock(mm);
-	fput(exe_file);
-	goto exit;
 }
 
 /*
@@ -1961,13 +1930,6 @@ static int validate_prctl_map_addr(struct prctl_mm_map *prctl_map)
 	error = -EINVAL;
 
 	/*
-	 * @brk should be after @end_data in traditional maps.
-	 */
-	if (prctl_map->start_brk <= prctl_map->end_data ||
-	    prctl_map->brk <= prctl_map->end_data)
-		goto out;
-
-	/*
 	 * Neither we should allow to override limits if they set.
 	 */
 	if (check_data_rlimit(rlimit(RLIMIT_DATA), prctl_map->brk,
diff --git a/kernel/sys_ni.c b/kernel/sys_ni.c
index cb6f98f5c97a..f43d89d92860 100644
--- a/kernel/sys_ni.c
+++ b/kernel/sys_ni.c
@@ -289,17 +289,13 @@ COND_SYSCALL(munlockall);
 COND_SYSCALL(mincore);
 COND_SYSCALL(madvise);
 COND_SYSCALL(process_madvise);
+COND_SYSCALL(process_mrelease);
 COND_SYSCALL(remap_file_pages);
 COND_SYSCALL(mbind);
-COND_SYSCALL_COMPAT(mbind);
 COND_SYSCALL(get_mempolicy);
-COND_SYSCALL_COMPAT(get_mempolicy);
 COND_SYSCALL(set_mempolicy);
-COND_SYSCALL_COMPAT(set_mempolicy);
 COND_SYSCALL(migrate_pages);
-COND_SYSCALL_COMPAT(migrate_pages);
 COND_SYSCALL(move_pages);
-COND_SYSCALL_COMPAT(move_pages);
 
 COND_SYSCALL(perf_event_open);
 COND_SYSCALL(accept4);
diff --git a/kernel/sysctl.c b/kernel/sysctl.c
index 25e49b4d8049..083be6af29d7 100644
--- a/kernel/sysctl.c
+++ b/kernel/sysctl.c
@@ -2912,7 +2912,7 @@ static struct ctl_table vm_table[] = {
 		.data		= &sysctl_compaction_proactiveness,
 		.maxlen		= sizeof(sysctl_compaction_proactiveness),
 		.mode		= 0644,
-		.proc_handler	= proc_dointvec_minmax,
+		.proc_handler	= compaction_proactiveness_sysctl_handler,
 		.extra1		= SYSCTL_ZERO,
 		.extra2		= &one_hundred,
 	},
diff --git a/kernel/time/namespace.c b/kernel/time/namespace.c
index 12eab0d2ae28..aec832801c26 100644
--- a/kernel/time/namespace.c
+++ b/kernel/time/namespace.c
@@ -88,13 +88,13 @@ static struct time_namespace *clone_time_ns(struct user_namespace *user_ns,
 		goto fail;
 
 	err = -ENOMEM;
-	ns = kmalloc(sizeof(*ns), GFP_KERNEL);
+	ns = kmalloc(sizeof(*ns), GFP_KERNEL_ACCOUNT);
 	if (!ns)
 		goto fail_dec;
 
 	refcount_set(&ns->ns.count, 1);
 
-	ns->vvar_page = alloc_page(GFP_KERNEL | __GFP_ZERO);
+	ns->vvar_page = alloc_page(GFP_KERNEL_ACCOUNT | __GFP_ZERO);
 	if (!ns->vvar_page)
 		goto fail_free;
 
diff --git a/kernel/time/posix-timers.c b/kernel/time/posix-timers.c
index 3913222e7bcf..1cd10b102c51 100644
--- a/kernel/time/posix-timers.c
+++ b/kernel/time/posix-timers.c
@@ -273,8 +273,8 @@ static int posix_get_hrtimer_res(clockid_t which_clock, struct timespec64 *tp)
 static __init int init_posix_timers(void)
 {
 	posix_timers_cache = kmem_cache_create("posix_timers_cache",
-					sizeof (struct k_itimer), 0, SLAB_PANIC,
-					NULL);
+					sizeof(struct k_itimer), 0,
+					SLAB_PANIC | SLAB_ACCOUNT, NULL);
 	return 0;
 }
 __initcall(init_posix_timers);
diff --git a/kernel/trace/Kconfig b/kernel/trace/Kconfig
index 3ee23f4d437f..420ff4bc67fd 100644
--- a/kernel/trace/Kconfig
+++ b/kernel/trace/Kconfig
@@ -135,10 +135,9 @@ config TRACING_SUPPORT
 	depends on STACKTRACE_SUPPORT
 	default y
 
-if TRACING_SUPPORT
-
 menuconfig FTRACE
 	bool "Tracers"
+	depends on TRACING_SUPPORT
 	default y if DEBUG_KERNEL
 	help
 	  Enable the kernel tracing infrastructure.
@@ -1037,6 +1036,3 @@ config HIST_TRIGGERS_DEBUG
           If unsure, say N.
 
 endif # FTRACE
-
-endif # TRACING_SUPPORT
-
diff --git a/kernel/trace/Makefile b/kernel/trace/Makefile
index b1c47ccf4f73..6de5d4d63165 100644
--- a/kernel/trace/Makefile
+++ b/kernel/trace/Makefile
@@ -77,6 +77,7 @@ obj-$(CONFIG_EVENT_TRACING) += trace_event_perf.o
 endif
 obj-$(CONFIG_EVENT_TRACING) += trace_events_filter.o
 obj-$(CONFIG_EVENT_TRACING) += trace_events_trigger.o
+obj-$(CONFIG_PROBE_EVENTS) += trace_eprobe.o
 obj-$(CONFIG_TRACE_EVENT_INJECT) += trace_events_inject.o
 obj-$(CONFIG_SYNTH_EVENTS) += trace_events_synth.o
 obj-$(CONFIG_HIST_TRIGGERS) += trace_events_hist.o
diff --git a/kernel/trace/ring_buffer.c b/kernel/trace/ring_buffer.c
index e592d1df6f88..c5a3fbf19617 100644
--- a/kernel/trace/ring_buffer.c
+++ b/kernel/trace/ring_buffer.c
@@ -2111,7 +2111,7 @@ int ring_buffer_resize(struct trace_buffer *buffer, unsigned long size,
 			}
 		}
 
-		get_online_cpus();
+		cpus_read_lock();
 		/*
 		 * Fire off all the required work handlers
 		 * We can't schedule on offline CPUs, but it's not necessary
@@ -2143,7 +2143,7 @@ int ring_buffer_resize(struct trace_buffer *buffer, unsigned long size,
 			cpu_buffer->nr_pages_to_update = 0;
 		}
 
-		put_online_cpus();
+		cpus_read_unlock();
 	} else {
 		cpu_buffer = buffer->buffers[cpu_id];
 
@@ -2171,7 +2171,7 @@ int ring_buffer_resize(struct trace_buffer *buffer, unsigned long size,
 			goto out_err;
 		}
 
-		get_online_cpus();
+		cpus_read_lock();
 
 		/* Can't run something on an offline CPU. */
 		if (!cpu_online(cpu_id))
@@ -2183,7 +2183,7 @@ int ring_buffer_resize(struct trace_buffer *buffer, unsigned long size,
 		}
 
 		cpu_buffer->nr_pages_to_update = 0;
-		put_online_cpus();
+		cpus_read_unlock();
 	}
 
  out:
diff --git a/kernel/trace/trace.c b/kernel/trace/trace.c
index 2755534b0737..7896d30d90f7 100644
--- a/kernel/trace/trace.c
+++ b/kernel/trace/trace.c
@@ -2603,6 +2603,15 @@ enum print_line_t trace_handle_return(struct trace_seq *s)
 }
 EXPORT_SYMBOL_GPL(trace_handle_return);
 
+static unsigned short migration_disable_value(void)
+{
+#if defined(CONFIG_SMP)
+	return current->migration_disabled;
+#else
+	return 0;
+#endif
+}
+
 unsigned int tracing_gen_ctx_irq_test(unsigned int irqs_status)
 {
 	unsigned int trace_flags = irqs_status;
@@ -2621,7 +2630,8 @@ unsigned int tracing_gen_ctx_irq_test(unsigned int irqs_status)
 		trace_flags |= TRACE_FLAG_NEED_RESCHED;
 	if (test_preempt_need_resched())
 		trace_flags |= TRACE_FLAG_PREEMPT_RESCHED;
-	return (trace_flags << 16) | (pc & 0xff);
+	return (trace_flags << 16) | (min_t(unsigned int, pc & 0xff, 0xf)) |
+		(min_t(unsigned int, migration_disable_value(), 0xf)) << 4;
 }
 
 struct ring_buffer_event *
@@ -3697,11 +3707,11 @@ static bool trace_safe_str(struct trace_iterator *iter, const char *str)
 		return false;
 
 	event = container_of(trace_event, struct trace_event_call, event);
-	if (!event->mod)
+	if ((event->flags & TRACE_EVENT_FL_DYNAMIC) || !event->module)
 		return false;
 
 	/* Would rather have rodata, but this will suffice */
-	if (within_module_core(addr, event->mod))
+	if (within_module_core(addr, event->module))
 		return true;
 
 	return false;
@@ -4189,9 +4199,10 @@ static void print_lat_help_header(struct seq_file *m)
 		    "#                  | / _----=> need-resched    \n"
 		    "#                  || / _---=> hardirq/softirq \n"
 		    "#                  ||| / _--=> preempt-depth   \n"
-		    "#                  |||| /     delay            \n"
-		    "#  cmd     pid     ||||| time  |   caller      \n"
-		    "#     \\   /        |||||  \\    |   /         \n");
+		    "#                  |||| / _-=> migrate-disable \n"
+		    "#                  ||||| /     delay           \n"
+		    "#  cmd     pid     |||||| time  |   caller     \n"
+		    "#     \\   /        ||||||  \\    |    /       \n");
 }
 
 static void print_event_info(struct array_buffer *buf, struct seq_file *m)
@@ -4229,9 +4240,10 @@ static void print_func_help_header_irq(struct array_buffer *buf, struct seq_file
 	seq_printf(m, "#                            %.*s / _----=> need-resched\n", prec, space);
 	seq_printf(m, "#                            %.*s| / _---=> hardirq/softirq\n", prec, space);
 	seq_printf(m, "#                            %.*s|| / _--=> preempt-depth\n", prec, space);
-	seq_printf(m, "#                            %.*s||| /     delay\n", prec, space);
-	seq_printf(m, "#           TASK-PID  %.*s CPU#  ||||   TIMESTAMP  FUNCTION\n", prec, "     TGID   ");
-	seq_printf(m, "#              | |    %.*s   |   ||||      |         |\n", prec, "       |    ");
+	seq_printf(m, "#                            %.*s||| / _-=> migrate-disable\n", prec, space);
+	seq_printf(m, "#                            %.*s|||| /     delay\n", prec, space);
+	seq_printf(m, "#           TASK-PID  %.*s CPU#  |||||  TIMESTAMP  FUNCTION\n", prec, "     TGID   ");
+	seq_printf(m, "#              | |    %.*s   |   |||||     |         |\n", prec, "       |    ");
 }
 
 void
@@ -5543,6 +5555,7 @@ static const char readme_msg[] =
 #ifdef CONFIG_HIST_TRIGGERS
 	"\t           s:[synthetic/]<event> <field> [<field>]\n"
 #endif
+	"\t           e[:[<group>/]<event>] <attached-group>.<attached-event> [<args>]\n"
 	"\t           -:[<group>/]<event>\n"
 #ifdef CONFIG_KPROBE_EVENTS
 	"\t    place: [<module>:]<symbol>[+<offset>]|<memaddr>\n"
@@ -5552,7 +5565,7 @@ static const char readme_msg[] =
   "   place (uprobe): <path>:<offset>[%return][(ref_ctr_offset)]\n"
 #endif
 	"\t     args: <name>=fetcharg[:type]\n"
-	"\t fetcharg: %<register>, @<address>, @<symbol>[+|-<offset>],\n"
+	"\t fetcharg: (%<register>|$<efield>), @<address>, @<symbol>[+|-<offset>],\n"
 #ifdef CONFIG_HAVE_FUNCTION_ARG_ACCESS_API
 	"\t           $stack<index>, $stack, $retval, $comm, $arg<N>,\n"
 #else
@@ -5567,6 +5580,8 @@ static const char readme_msg[] =
 	"\t    stype: u8/u16/u32/u64, s8/s16/s32/s64, pid_t,\n"
 	"\t           [unsigned] char/int/long\n"
 #endif
+	"\t    efield: For event probes ('e' types), the field is on of the fields\n"
+	"\t            of the <attached-group>/<attached-event>.\n"
 #endif
 	"  events/\t\t- Directory containing all trace event subsystems:\n"
 	"      enable\t\t- Write 0/1 to enable/disable tracing of all events\n"
@@ -5654,6 +5669,7 @@ static const char readme_msg[] =
 	"\t            .execname   display a common_pid as a program name\n"
 	"\t            .syscall    display a syscall id as a syscall name\n"
 	"\t            .log2       display log2 value rather than raw number\n"
+	"\t            .buckets=size  display values in groups of size rather than raw number\n"
 	"\t            .usecs      display a common_timestamp in microseconds\n\n"
 	"\t    The 'pause' parameter can be used to pause an existing hist\n"
 	"\t    trigger or to start a hist trigger but not log any events\n"
diff --git a/kernel/trace/trace.h b/kernel/trace/trace.h
index 4a0e693000c6..b7c0f8e160fb 100644
--- a/kernel/trace/trace.h
+++ b/kernel/trace/trace.h
@@ -126,6 +126,11 @@ struct kprobe_trace_entry_head {
 	unsigned long		ip;
 };
 
+struct eprobe_trace_entry_head {
+	struct trace_entry	ent;
+	unsigned int		type;
+};
+
 struct kretprobe_trace_entry_head {
 	struct trace_entry	ent;
 	unsigned long		func;
@@ -1508,9 +1513,14 @@ static inline int register_trigger_hist_enable_disable_cmds(void) { return 0; }
 extern int register_trigger_cmds(void);
 extern void clear_event_triggers(struct trace_array *tr);
 
+enum {
+	EVENT_TRIGGER_FL_PROBE		= BIT(0),
+};
+
 struct event_trigger_data {
 	unsigned long			count;
 	int				ref;
+	int				flags;
 	struct event_trigger_ops	*ops;
 	struct event_command		*cmd_ops;
 	struct event_filter __rcu	*filter;
@@ -1918,6 +1928,14 @@ static inline bool is_good_name(const char *name)
 	return true;
 }
 
+/* Convert certain expected symbols into '_' when generating event names */
+static inline void sanitize_event_name(char *name)
+{
+	while (*name++ != '\0')
+		if (*name == ':' || *name == '.')
+			*name = '_';
+}
+
 /*
  * This is a generic way to read and write a u64 value from a file in tracefs.
  *
diff --git a/kernel/trace/trace_boot.c b/kernel/trace/trace_boot.c
index 94ef2d099e32..8d252f63cd78 100644
--- a/kernel/trace/trace_boot.c
+++ b/kernel/trace/trace_boot.c
@@ -171,6 +171,289 @@ trace_boot_add_synth_event(struct xbc_node *node, const char *event)
 }
 #endif
 
+#ifdef CONFIG_HIST_TRIGGERS
+static int __init __printf(3, 4)
+append_printf(char **bufp, char *end, const char *fmt, ...)
+{
+	va_list args;
+	int ret;
+
+	if (*bufp == end)
+		return -ENOSPC;
+
+	va_start(args, fmt);
+	ret = vsnprintf(*bufp, end - *bufp, fmt, args);
+	if (ret < end - *bufp) {
+		*bufp += ret;
+	} else {
+		*bufp = end;
+		ret = -ERANGE;
+	}
+	va_end(args);
+
+	return ret;
+}
+
+static int __init
+append_str_nospace(char **bufp, char *end, const char *str)
+{
+	char *p = *bufp;
+	int len;
+
+	while (p < end - 1 && *str != '\0') {
+		if (!isspace(*str))
+			*(p++) = *str;
+		str++;
+	}
+	*p = '\0';
+	if (p == end - 1) {
+		*bufp = end;
+		return -ENOSPC;
+	}
+	len = p - *bufp;
+	*bufp = p;
+	return (int)len;
+}
+
+static int __init
+trace_boot_hist_add_array(struct xbc_node *hnode, char **bufp,
+			  char *end, const char *key)
+{
+	struct xbc_node *anode;
+	const char *p;
+	char sep;
+
+	p = xbc_node_find_value(hnode, key, &anode);
+	if (p) {
+		if (!anode) {
+			pr_err("hist.%s requires value(s).\n", key);
+			return -EINVAL;
+		}
+
+		append_printf(bufp, end, ":%s", key);
+		sep = '=';
+		xbc_array_for_each_value(anode, p) {
+			append_printf(bufp, end, "%c%s", sep, p);
+			if (sep == '=')
+				sep = ',';
+		}
+	} else
+		return -ENOENT;
+
+	return 0;
+}
+
+static int __init
+trace_boot_hist_add_one_handler(struct xbc_node *hnode, char **bufp,
+				char *end, const char *handler,
+				const char *param)
+{
+	struct xbc_node *knode, *anode;
+	const char *p;
+	char sep;
+
+	/* Compose 'handler' parameter */
+	p = xbc_node_find_value(hnode, param, NULL);
+	if (!p) {
+		pr_err("hist.%s requires '%s' option.\n",
+		       xbc_node_get_data(hnode), param);
+		return -EINVAL;
+	}
+	append_printf(bufp, end, ":%s(%s)", handler, p);
+
+	/* Compose 'action' parameter */
+	knode = xbc_node_find_subkey(hnode, "trace");
+	if (!knode)
+		knode = xbc_node_find_subkey(hnode, "save");
+
+	if (knode) {
+		anode = xbc_node_get_child(knode);
+		if (!anode || !xbc_node_is_value(anode)) {
+			pr_err("hist.%s.%s requires value(s).\n",
+			       xbc_node_get_data(hnode),
+			       xbc_node_get_data(knode));
+			return -EINVAL;
+		}
+
+		append_printf(bufp, end, ".%s", xbc_node_get_data(knode));
+		sep = '(';
+		xbc_array_for_each_value(anode, p) {
+			append_printf(bufp, end, "%c%s", sep, p);
+			if (sep == '(')
+				sep = ',';
+		}
+		append_printf(bufp, end, ")");
+	} else if (xbc_node_find_subkey(hnode, "snapshot")) {
+		append_printf(bufp, end, ".snapshot()");
+	} else {
+		pr_err("hist.%s requires an action.\n",
+		       xbc_node_get_data(hnode));
+		return -EINVAL;
+	}
+
+	return 0;
+}
+
+static int __init
+trace_boot_hist_add_handlers(struct xbc_node *hnode, char **bufp,
+			     char *end, const char *param)
+{
+	struct xbc_node *node;
+	const char *p, *handler;
+	int ret;
+
+	handler = xbc_node_get_data(hnode);
+
+	xbc_node_for_each_subkey(hnode, node) {
+		p = xbc_node_get_data(node);
+		if (!isdigit(p[0]))
+			continue;
+		/* All digit started node should be instances. */
+		ret = trace_boot_hist_add_one_handler(node, bufp, end, handler, param);
+		if (ret < 0)
+			break;
+	}
+
+	if (xbc_node_find_subkey(hnode, param))
+		ret = trace_boot_hist_add_one_handler(hnode, bufp, end, handler, param);
+
+	return ret;
+}
+
+/*
+ * Histogram boottime tracing syntax.
+ *
+ * ftrace.[instance.INSTANCE.]event.GROUP.EVENT.hist[.N] {
+ *	keys = <KEY>[,...]
+ *	values = <VAL>[,...]
+ *	sort = <SORT-KEY>[,...]
+ *	size = <ENTRIES>
+ *	name = <HISTNAME>
+ *	var { <VAR> = <EXPR> ... }
+ *	pause|continue|clear
+ *	onmax|onchange[.N] { var = <VAR>; <ACTION> [= <PARAM>] }
+ *	onmatch[.N] { event = <EVENT>; <ACTION> [= <PARAM>] }
+ *	filter = <FILTER>
+ * }
+ *
+ * Where <ACTION> are;
+ *
+ *	trace = <EVENT>, <ARG1>[, ...]
+ *	save = <ARG1>[, ...]
+ *	snapshot
+ */
+static int __init
+trace_boot_compose_hist_cmd(struct xbc_node *hnode, char *buf, size_t size)
+{
+	struct xbc_node *node, *knode;
+	char *end = buf + size;
+	const char *p;
+	int ret = 0;
+
+	append_printf(&buf, end, "hist");
+
+	ret = trace_boot_hist_add_array(hnode, &buf, end, "keys");
+	if (ret < 0) {
+		if (ret == -ENOENT)
+			pr_err("hist requires keys.\n");
+		return -EINVAL;
+	}
+
+	ret = trace_boot_hist_add_array(hnode, &buf, end, "values");
+	if (ret == -EINVAL)
+		return ret;
+	ret = trace_boot_hist_add_array(hnode, &buf, end, "sort");
+	if (ret == -EINVAL)
+		return ret;
+
+	p = xbc_node_find_value(hnode, "size", NULL);
+	if (p)
+		append_printf(&buf, end, ":size=%s", p);
+
+	p = xbc_node_find_value(hnode, "name", NULL);
+	if (p)
+		append_printf(&buf, end, ":name=%s", p);
+
+	node = xbc_node_find_subkey(hnode, "var");
+	if (node) {
+		xbc_node_for_each_key_value(node, knode, p) {
+			/* Expression must not include spaces. */
+			append_printf(&buf, end, ":%s=",
+				      xbc_node_get_data(knode));
+			append_str_nospace(&buf, end, p);
+		}
+	}
+
+	/* Histogram control attributes (mutual exclusive) */
+	if (xbc_node_find_value(hnode, "pause", NULL))
+		append_printf(&buf, end, ":pause");
+	else if (xbc_node_find_value(hnode, "continue", NULL))
+		append_printf(&buf, end, ":continue");
+	else if (xbc_node_find_value(hnode, "clear", NULL))
+		append_printf(&buf, end, ":clear");
+
+	/* Histogram handler and actions */
+	node = xbc_node_find_subkey(hnode, "onmax");
+	if (node && trace_boot_hist_add_handlers(node, &buf, end, "var") < 0)
+		return -EINVAL;
+	node = xbc_node_find_subkey(hnode, "onchange");
+	if (node && trace_boot_hist_add_handlers(node, &buf, end, "var") < 0)
+		return -EINVAL;
+	node = xbc_node_find_subkey(hnode, "onmatch");
+	if (node && trace_boot_hist_add_handlers(node, &buf, end, "event") < 0)
+		return -EINVAL;
+
+	p = xbc_node_find_value(hnode, "filter", NULL);
+	if (p)
+		append_printf(&buf, end, " if %s", p);
+
+	if (buf == end) {
+		pr_err("hist exceeds the max command length.\n");
+		return -E2BIG;
+	}
+
+	return 0;
+}
+
+static void __init
+trace_boot_init_histograms(struct trace_event_file *file,
+			   struct xbc_node *hnode, char *buf, size_t size)
+{
+	struct xbc_node *node;
+	const char *p;
+	char *tmp;
+
+	xbc_node_for_each_subkey(hnode, node) {
+		p = xbc_node_get_data(node);
+		if (!isdigit(p[0]))
+			continue;
+		/* All digit started node should be instances. */
+		if (trace_boot_compose_hist_cmd(node, buf, size) == 0) {
+			tmp = kstrdup(buf, GFP_KERNEL);
+			if (trigger_process_regex(file, buf) < 0)
+				pr_err("Failed to apply hist trigger: %s\n", tmp);
+			kfree(tmp);
+		}
+	}
+
+	if (xbc_node_find_subkey(hnode, "keys")) {
+		if (trace_boot_compose_hist_cmd(hnode, buf, size) == 0) {
+			tmp = kstrdup(buf, GFP_KERNEL);
+			if (trigger_process_regex(file, buf) < 0)
+				pr_err("Failed to apply hist trigger: %s\n", tmp);
+			kfree(tmp);
+		}
+	}
+}
+#else
+static void __init
+trace_boot_init_histograms(struct trace_event_file *file,
+			   struct xbc_node *hnode, char *buf, size_t size)
+{
+	/* do nothing */
+}
+#endif
+
 static void __init
 trace_boot_init_one_event(struct trace_array *tr, struct xbc_node *gnode,
 			  struct xbc_node *enode)
@@ -205,12 +488,18 @@ trace_boot_init_one_event(struct trace_array *tr, struct xbc_node *gnode,
 			pr_err("Failed to apply filter: %s\n", buf);
 	}
 
-	xbc_node_for_each_array_value(enode, "actions", anode, p) {
-		if (strlcpy(buf, p, ARRAY_SIZE(buf)) >= ARRAY_SIZE(buf))
-			pr_err("action string is too long: %s\n", p);
-		else if (trigger_process_regex(file, buf) < 0)
-			pr_err("Failed to apply an action: %s\n", buf);
-	}
+	if (IS_ENABLED(CONFIG_HIST_TRIGGERS)) {
+		xbc_node_for_each_array_value(enode, "actions", anode, p) {
+			if (strlcpy(buf, p, ARRAY_SIZE(buf)) >= ARRAY_SIZE(buf))
+				pr_err("action string is too long: %s\n", p);
+			else if (trigger_process_regex(file, buf) < 0)
+				pr_err("Failed to apply an action: %s\n", p);
+		}
+		anode = xbc_node_find_subkey(enode, "hist");
+		if (anode)
+			trace_boot_init_histograms(file, anode, buf, ARRAY_SIZE(buf));
+	} else if (xbc_node_find_value(enode, "actions", NULL))
+		pr_err("Failed to apply event actions because CONFIG_HIST_TRIGGERS is not set.\n");
 
 	if (xbc_node_find_value(enode, "enable", NULL)) {
 		if (trace_event_enable_disable(file, 1, 0) < 0)
@@ -228,18 +517,18 @@ trace_boot_init_events(struct trace_array *tr, struct xbc_node *node)
 	bool enable, enable_all = false;
 	const char *data;
 
-	node = xbc_node_find_child(node, "event");
+	node = xbc_node_find_subkey(node, "event");
 	if (!node)
 		return;
 	/* per-event key starts with "event.GROUP.EVENT" */
-	xbc_node_for_each_child(node, gnode) {
+	xbc_node_for_each_subkey(node, gnode) {
 		data = xbc_node_get_data(gnode);
 		if (!strcmp(data, "enable")) {
 			enable_all = true;
 			continue;
 		}
 		enable = false;
-		xbc_node_for_each_child(gnode, enode) {
+		xbc_node_for_each_subkey(gnode, enode) {
 			data = xbc_node_get_data(enode);
 			if (!strcmp(data, "enable")) {
 				enable = true;
@@ -331,11 +620,11 @@ trace_boot_init_instances(struct xbc_node *node)
 	struct trace_array *tr;
 	const char *p;
 
-	node = xbc_node_find_child(node, "instance");
+	node = xbc_node_find_subkey(node, "instance");
 	if (!node)
 		return;
 
-	xbc_node_for_each_child(node, inode) {
+	xbc_node_for_each_subkey(node, inode) {
 		p = xbc_node_get_data(inode);
 		if (!p || *p == '\0')
 			continue;
diff --git a/kernel/trace/trace_dynevent.c b/kernel/trace/trace_dynevent.c
index e57cc0870892..1110112e55bd 100644
--- a/kernel/trace/trace_dynevent.c
+++ b/kernel/trace/trace_dynevent.c
@@ -13,11 +13,49 @@
 #include <linux/tracefs.h>
 
 #include "trace.h"
+#include "trace_output.h"	/* for trace_event_sem */
 #include "trace_dynevent.h"
 
 static DEFINE_MUTEX(dyn_event_ops_mutex);
 static LIST_HEAD(dyn_event_ops_list);
 
+bool trace_event_dyn_try_get_ref(struct trace_event_call *dyn_call)
+{
+	struct trace_event_call *call;
+	bool ret = false;
+
+	if (WARN_ON_ONCE(!(dyn_call->flags & TRACE_EVENT_FL_DYNAMIC)))
+		return false;
+
+	down_read(&trace_event_sem);
+	list_for_each_entry(call, &ftrace_events, list) {
+		if (call == dyn_call) {
+			atomic_inc(&dyn_call->refcnt);
+			ret = true;
+		}
+	}
+	up_read(&trace_event_sem);
+	return ret;
+}
+
+void trace_event_dyn_put_ref(struct trace_event_call *call)
+{
+	if (WARN_ON_ONCE(!(call->flags & TRACE_EVENT_FL_DYNAMIC)))
+		return;
+
+	if (WARN_ON_ONCE(atomic_read(&call->refcnt) <= 0)) {
+		atomic_set(&call->refcnt, 0);
+		return;
+	}
+
+	atomic_dec(&call->refcnt);
+}
+
+bool trace_event_dyn_busy(struct trace_event_call *call)
+{
+	return atomic_read(&call->refcnt) != 0;
+}
+
 int dyn_event_register(struct dyn_event_operations *ops)
 {
 	if (!ops || !ops->create || !ops->show || !ops->is_busy ||
diff --git a/kernel/trace/trace_dynevent.h b/kernel/trace/trace_dynevent.h
index 7754936b57ee..936477a111d3 100644
--- a/kernel/trace/trace_dynevent.h
+++ b/kernel/trace/trace_dynevent.h
@@ -76,13 +76,15 @@ int dyn_event_init(struct dyn_event *ev, struct dyn_event_operations *ops)
 	return 0;
 }
 
-static inline int dyn_event_add(struct dyn_event *ev)
+static inline int dyn_event_add(struct dyn_event *ev,
+				struct trace_event_call *call)
 {
 	lockdep_assert_held(&event_mutex);
 
 	if (!ev || !ev->ops)
 		return -EINVAL;
 
+	call->flags |= TRACE_EVENT_FL_DYNAMIC;
 	list_add_tail(&ev->list, &dyn_event_list);
 	return 0;
 }
diff --git a/kernel/trace/trace_eprobe.c b/kernel/trace/trace_eprobe.c
new file mode 100644
index 000000000000..3044b762cbd7
--- /dev/null
+++ b/kernel/trace/trace_eprobe.c
@@ -0,0 +1,904 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * event probes
+ *
+ * Part of this code was copied from kernel/trace/trace_kprobe.c written by
+ * Masami Hiramatsu <mhiramat@kernel.org>
+ *
+ * Copyright (C) 2021, VMware Inc, Steven Rostedt <rostedt@goodmis.org>
+ * Copyright (C) 2021, VMware Inc, Tzvetomir Stoyanov tz.stoyanov@gmail.com>
+ *
+ */
+#include <linux/module.h>
+#include <linux/mutex.h>
+#include <linux/ftrace.h>
+
+#include "trace_dynevent.h"
+#include "trace_probe.h"
+#include "trace_probe_tmpl.h"
+
+#define EPROBE_EVENT_SYSTEM "eprobes"
+
+struct trace_eprobe {
+	/* tracepoint system */
+	const char *event_system;
+
+	/* tracepoint event */
+	const char *event_name;
+
+	struct trace_event_call *event;
+
+	struct dyn_event	devent;
+	struct trace_probe	tp;
+};
+
+struct eprobe_data {
+	struct trace_event_file	*file;
+	struct trace_eprobe	*ep;
+};
+
+static int __trace_eprobe_create(int argc, const char *argv[]);
+
+static void trace_event_probe_cleanup(struct trace_eprobe *ep)
+{
+	if (!ep)
+		return;
+	trace_probe_cleanup(&ep->tp);
+	kfree(ep->event_name);
+	kfree(ep->event_system);
+	if (ep->event)
+		trace_event_put_ref(ep->event);
+	kfree(ep);
+}
+
+static struct trace_eprobe *to_trace_eprobe(struct dyn_event *ev)
+{
+	return container_of(ev, struct trace_eprobe, devent);
+}
+
+static int eprobe_dyn_event_create(const char *raw_command)
+{
+	return trace_probe_create(raw_command, __trace_eprobe_create);
+}
+
+static int eprobe_dyn_event_show(struct seq_file *m, struct dyn_event *ev)
+{
+	struct trace_eprobe *ep = to_trace_eprobe(ev);
+	int i;
+
+	seq_printf(m, "e:%s/%s", trace_probe_group_name(&ep->tp),
+				trace_probe_name(&ep->tp));
+	seq_printf(m, " %s.%s", ep->event_system, ep->event_name);
+
+	for (i = 0; i < ep->tp.nr_args; i++)
+		seq_printf(m, " %s=%s", ep->tp.args[i].name, ep->tp.args[i].comm);
+	seq_putc(m, '\n');
+
+	return 0;
+}
+
+static int unregister_trace_eprobe(struct trace_eprobe *ep)
+{
+	/* If other probes are on the event, just unregister eprobe */
+	if (trace_probe_has_sibling(&ep->tp))
+		goto unreg;
+
+	/* Enabled event can not be unregistered */
+	if (trace_probe_is_enabled(&ep->tp))
+		return -EBUSY;
+
+	/* Will fail if probe is being used by ftrace or perf */
+	if (trace_probe_unregister_event_call(&ep->tp))
+		return -EBUSY;
+
+unreg:
+	dyn_event_remove(&ep->devent);
+	trace_probe_unlink(&ep->tp);
+
+	return 0;
+}
+
+static int eprobe_dyn_event_release(struct dyn_event *ev)
+{
+	struct trace_eprobe *ep = to_trace_eprobe(ev);
+	int ret = unregister_trace_eprobe(ep);
+
+	if (!ret)
+		trace_event_probe_cleanup(ep);
+	return ret;
+}
+
+static bool eprobe_dyn_event_is_busy(struct dyn_event *ev)
+{
+	struct trace_eprobe *ep = to_trace_eprobe(ev);
+
+	return trace_probe_is_enabled(&ep->tp);
+}
+
+static bool eprobe_dyn_event_match(const char *system, const char *event,
+			int argc, const char **argv, struct dyn_event *ev)
+{
+	struct trace_eprobe *ep = to_trace_eprobe(ev);
+
+	return strcmp(trace_probe_name(&ep->tp), event) == 0 &&
+	    (!system || strcmp(trace_probe_group_name(&ep->tp), system) == 0) &&
+	    trace_probe_match_command_args(&ep->tp, argc, argv);
+}
+
+static struct dyn_event_operations eprobe_dyn_event_ops = {
+	.create = eprobe_dyn_event_create,
+	.show = eprobe_dyn_event_show,
+	.is_busy = eprobe_dyn_event_is_busy,
+	.free = eprobe_dyn_event_release,
+	.match = eprobe_dyn_event_match,
+};
+
+static struct trace_eprobe *alloc_event_probe(const char *group,
+					      const char *this_event,
+					      struct trace_event_call *event,
+					      int nargs)
+{
+	struct trace_eprobe *ep;
+	const char *event_name;
+	const char *sys_name;
+	int ret = -ENOMEM;
+
+	if (!event)
+		return ERR_PTR(-ENODEV);
+
+	sys_name = event->class->system;
+	event_name = trace_event_name(event);
+
+	ep = kzalloc(struct_size(ep, tp.args, nargs), GFP_KERNEL);
+	if (!ep) {
+		trace_event_put_ref(event);
+		goto error;
+	}
+	ep->event = event;
+	ep->event_name = kstrdup(event_name, GFP_KERNEL);
+	if (!ep->event_name)
+		goto error;
+	ep->event_system = kstrdup(sys_name, GFP_KERNEL);
+	if (!ep->event_system)
+		goto error;
+
+	ret = trace_probe_init(&ep->tp, this_event, group, false);
+	if (ret < 0)
+		goto error;
+
+	dyn_event_init(&ep->devent, &eprobe_dyn_event_ops);
+	return ep;
+error:
+	trace_event_probe_cleanup(ep);
+	return ERR_PTR(ret);
+}
+
+static int trace_eprobe_tp_arg_update(struct trace_eprobe *ep, int i)
+{
+	struct probe_arg *parg = &ep->tp.args[i];
+	struct ftrace_event_field *field;
+	struct list_head *head;
+
+	head = trace_get_fields(ep->event);
+	list_for_each_entry(field, head, link) {
+		if (!strcmp(parg->code->data, field->name)) {
+			kfree(parg->code->data);
+			parg->code->data = field;
+			return 0;
+		}
+	}
+	kfree(parg->code->data);
+	parg->code->data = NULL;
+	return -ENOENT;
+}
+
+static int eprobe_event_define_fields(struct trace_event_call *event_call)
+{
+	int ret;
+	struct eprobe_trace_entry_head field;
+	struct trace_probe *tp;
+
+	tp = trace_probe_primary_from_call(event_call);
+	if (WARN_ON_ONCE(!tp))
+		return -ENOENT;
+
+	DEFINE_FIELD(unsigned int, type, FIELD_STRING_TYPE, 0);
+
+	return traceprobe_define_arg_fields(event_call, sizeof(field), tp);
+}
+
+static struct trace_event_fields eprobe_fields_array[] = {
+	{ .type = TRACE_FUNCTION_TYPE,
+	  .define_fields = eprobe_event_define_fields },
+	{}
+};
+
+/* Event entry printers */
+static enum print_line_t
+print_eprobe_event(struct trace_iterator *iter, int flags,
+		   struct trace_event *event)
+{
+	struct eprobe_trace_entry_head *field;
+	struct trace_event_call *pevent;
+	struct trace_event *probed_event;
+	struct trace_seq *s = &iter->seq;
+	struct trace_probe *tp;
+
+	field = (struct eprobe_trace_entry_head *)iter->ent;
+	tp = trace_probe_primary_from_call(
+		container_of(event, struct trace_event_call, event));
+	if (WARN_ON_ONCE(!tp))
+		goto out;
+
+	trace_seq_printf(s, "%s: (", trace_probe_name(tp));
+
+	probed_event = ftrace_find_event(field->type);
+	if (probed_event) {
+		pevent = container_of(probed_event, struct trace_event_call, event);
+		trace_seq_printf(s, "%s.%s", pevent->class->system,
+				 trace_event_name(pevent));
+	} else {
+		trace_seq_printf(s, "%u", field->type);
+	}
+
+	trace_seq_putc(s, ')');
+
+	if (print_probe_args(s, tp->args, tp->nr_args,
+			     (u8 *)&field[1], field) < 0)
+		goto out;
+
+	trace_seq_putc(s, '\n');
+ out:
+	return trace_handle_return(s);
+}
+
+static unsigned long get_event_field(struct fetch_insn *code, void *rec)
+{
+	struct ftrace_event_field *field = code->data;
+	unsigned long val;
+	void *addr;
+
+	addr = rec + field->offset;
+
+	switch (field->size) {
+	case 1:
+		if (field->is_signed)
+			val = *(char *)addr;
+		else
+			val = *(unsigned char *)addr;
+		break;
+	case 2:
+		if (field->is_signed)
+			val = *(short *)addr;
+		else
+			val = *(unsigned short *)addr;
+		break;
+	case 4:
+		if (field->is_signed)
+			val = *(int *)addr;
+		else
+			val = *(unsigned int *)addr;
+		break;
+	default:
+		if (field->is_signed)
+			val = *(long *)addr;
+		else
+			val = *(unsigned long *)addr;
+		break;
+	}
+	return val;
+}
+
+static int get_eprobe_size(struct trace_probe *tp, void *rec)
+{
+	struct probe_arg *arg;
+	int i, len, ret = 0;
+
+	for (i = 0; i < tp->nr_args; i++) {
+		arg = tp->args + i;
+		if (unlikely(arg->dynamic)) {
+			unsigned long val;
+
+			val = get_event_field(arg->code, rec);
+			len = process_fetch_insn_bottom(arg->code + 1, val, NULL, NULL);
+			if (len > 0)
+				ret += len;
+		}
+	}
+
+	return ret;
+}
+
+/* Kprobe specific fetch functions */
+
+/* Note that we don't verify it, since the code does not come from user space */
+static int
+process_fetch_insn(struct fetch_insn *code, void *rec, void *dest,
+		   void *base)
+{
+	unsigned long val;
+
+	val = get_event_field(code, rec);
+	return process_fetch_insn_bottom(code + 1, val, dest, base);
+}
+NOKPROBE_SYMBOL(process_fetch_insn)
+
+/* Return the length of string -- including null terminal byte */
+static nokprobe_inline int
+fetch_store_strlen_user(unsigned long addr)
+{
+	const void __user *uaddr =  (__force const void __user *)addr;
+
+	return strnlen_user_nofault(uaddr, MAX_STRING_SIZE);
+}
+
+/* Return the length of string -- including null terminal byte */
+static nokprobe_inline int
+fetch_store_strlen(unsigned long addr)
+{
+	int ret, len = 0;
+	u8 c;
+
+#ifdef CONFIG_ARCH_HAS_NON_OVERLAPPING_ADDRESS_SPACE
+	if (addr < TASK_SIZE)
+		return fetch_store_strlen_user(addr);
+#endif
+
+	do {
+		ret = copy_from_kernel_nofault(&c, (u8 *)addr + len, 1);
+		len++;
+	} while (c && ret == 0 && len < MAX_STRING_SIZE);
+
+	return (ret < 0) ? ret : len;
+}
+
+/*
+ * Fetch a null-terminated string from user. Caller MUST set *(u32 *)buf
+ * with max length and relative data location.
+ */
+static nokprobe_inline int
+fetch_store_string_user(unsigned long addr, void *dest, void *base)
+{
+	const void __user *uaddr =  (__force const void __user *)addr;
+	int maxlen = get_loc_len(*(u32 *)dest);
+	void *__dest;
+	long ret;
+
+	if (unlikely(!maxlen))
+		return -ENOMEM;
+
+	__dest = get_loc_data(dest, base);
+
+	ret = strncpy_from_user_nofault(__dest, uaddr, maxlen);
+	if (ret >= 0)
+		*(u32 *)dest = make_data_loc(ret, __dest - base);
+
+	return ret;
+}
+
+/*
+ * Fetch a null-terminated string. Caller MUST set *(u32 *)buf with max
+ * length and relative data location.
+ */
+static nokprobe_inline int
+fetch_store_string(unsigned long addr, void *dest, void *base)
+{
+	int maxlen = get_loc_len(*(u32 *)dest);
+	void *__dest;
+	long ret;
+
+#ifdef CONFIG_ARCH_HAS_NON_OVERLAPPING_ADDRESS_SPACE
+	if ((unsigned long)addr < TASK_SIZE)
+		return fetch_store_string_user(addr, dest, base);
+#endif
+
+	if (unlikely(!maxlen))
+		return -ENOMEM;
+
+	__dest = get_loc_data(dest, base);
+
+	/*
+	 * Try to get string again, since the string can be changed while
+	 * probing.
+	 */
+	ret = strncpy_from_kernel_nofault(__dest, (void *)addr, maxlen);
+	if (ret >= 0)
+		*(u32 *)dest = make_data_loc(ret, __dest - base);
+
+	return ret;
+}
+
+static nokprobe_inline int
+probe_mem_read_user(void *dest, void *src, size_t size)
+{
+	const void __user *uaddr =  (__force const void __user *)src;
+
+	return copy_from_user_nofault(dest, uaddr, size);
+}
+
+static nokprobe_inline int
+probe_mem_read(void *dest, void *src, size_t size)
+{
+#ifdef CONFIG_ARCH_HAS_NON_OVERLAPPING_ADDRESS_SPACE
+	if ((unsigned long)src < TASK_SIZE)
+		return probe_mem_read_user(dest, src, size);
+#endif
+	return copy_from_kernel_nofault(dest, src, size);
+}
+
+/* eprobe handler */
+static inline void
+__eprobe_trace_func(struct eprobe_data *edata, void *rec)
+{
+	struct eprobe_trace_entry_head *entry;
+	struct trace_event_call *call = trace_probe_event_call(&edata->ep->tp);
+	struct trace_event_buffer fbuffer;
+	int dsize;
+
+	if (WARN_ON_ONCE(call != edata->file->event_call))
+		return;
+
+	if (trace_trigger_soft_disabled(edata->file))
+		return;
+
+	fbuffer.trace_ctx = tracing_gen_ctx();
+	fbuffer.trace_file = edata->file;
+
+	dsize = get_eprobe_size(&edata->ep->tp, rec);
+	fbuffer.regs = NULL;
+
+	fbuffer.event =
+		trace_event_buffer_lock_reserve(&fbuffer.buffer, edata->file,
+					call->event.type,
+					sizeof(*entry) + edata->ep->tp.size + dsize,
+					fbuffer.trace_ctx);
+	if (!fbuffer.event)
+		return;
+
+	entry = fbuffer.entry = ring_buffer_event_data(fbuffer.event);
+	if (edata->ep->event)
+		entry->type = edata->ep->event->event.type;
+	else
+		entry->type = 0;
+	store_trace_args(&entry[1], &edata->ep->tp, rec, sizeof(*entry), dsize);
+
+	trace_event_buffer_commit(&fbuffer);
+}
+
+/*
+ * The event probe implementation uses event triggers to get access to
+ * the event it is attached to, but is not an actual trigger. The below
+ * functions are just stubs to fulfill what is needed to use the trigger
+ * infrastructure.
+ */
+static int eprobe_trigger_init(struct event_trigger_ops *ops,
+			       struct event_trigger_data *data)
+{
+	return 0;
+}
+
+static void eprobe_trigger_free(struct event_trigger_ops *ops,
+				struct event_trigger_data *data)
+{
+
+}
+
+static int eprobe_trigger_print(struct seq_file *m,
+				struct event_trigger_ops *ops,
+				struct event_trigger_data *data)
+{
+	/* Do not print eprobe event triggers */
+	return 0;
+}
+
+static void eprobe_trigger_func(struct event_trigger_data *data,
+				struct trace_buffer *buffer, void *rec,
+				struct ring_buffer_event *rbe)
+{
+	struct eprobe_data *edata = data->private_data;
+
+	__eprobe_trace_func(edata, rec);
+}
+
+static struct event_trigger_ops eprobe_trigger_ops = {
+	.func			= eprobe_trigger_func,
+	.print			= eprobe_trigger_print,
+	.init			= eprobe_trigger_init,
+	.free			= eprobe_trigger_free,
+};
+
+static int eprobe_trigger_cmd_func(struct event_command *cmd_ops,
+				   struct trace_event_file *file,
+				   char *glob, char *cmd, char *param)
+{
+	return -1;
+}
+
+static int eprobe_trigger_reg_func(char *glob, struct event_trigger_ops *ops,
+				 struct event_trigger_data *data,
+				 struct trace_event_file *file)
+{
+	return -1;
+}
+
+static void eprobe_trigger_unreg_func(char *glob, struct event_trigger_ops *ops,
+				    struct event_trigger_data *data,
+				    struct trace_event_file *file)
+{
+
+}
+
+static struct event_trigger_ops *eprobe_trigger_get_ops(char *cmd,
+							char *param)
+{
+	return &eprobe_trigger_ops;
+}
+
+static struct event_command event_trigger_cmd = {
+	.name			= "eprobe",
+	.trigger_type		= ETT_EVENT_EPROBE,
+	.flags			= EVENT_CMD_FL_NEEDS_REC,
+	.func			= eprobe_trigger_cmd_func,
+	.reg			= eprobe_trigger_reg_func,
+	.unreg			= eprobe_trigger_unreg_func,
+	.unreg_all		= NULL,
+	.get_trigger_ops	= eprobe_trigger_get_ops,
+	.set_filter		= NULL,
+};
+
+static struct event_trigger_data *
+new_eprobe_trigger(struct trace_eprobe *ep, struct trace_event_file *file)
+{
+	struct event_trigger_data *trigger;
+	struct eprobe_data *edata;
+
+	edata = kzalloc(sizeof(*edata), GFP_KERNEL);
+	trigger = kzalloc(sizeof(*trigger), GFP_KERNEL);
+	if (!trigger || !edata) {
+		kfree(edata);
+		kfree(trigger);
+		return ERR_PTR(-ENOMEM);
+	}
+
+	trigger->flags = EVENT_TRIGGER_FL_PROBE;
+	trigger->count = -1;
+	trigger->ops = &eprobe_trigger_ops;
+
+	/*
+	 * EVENT PROBE triggers are not registered as commands with
+	 * register_event_command(), as they are not controlled by the user
+	 * from the trigger file
+	 */
+	trigger->cmd_ops = &event_trigger_cmd;
+
+	INIT_LIST_HEAD(&trigger->list);
+	RCU_INIT_POINTER(trigger->filter, NULL);
+
+	edata->file = file;
+	edata->ep = ep;
+	trigger->private_data = edata;
+
+	return trigger;
+}
+
+static int enable_eprobe(struct trace_eprobe *ep,
+			 struct trace_event_file *eprobe_file)
+{
+	struct event_trigger_data *trigger;
+	struct trace_event_file *file;
+	struct trace_array *tr = eprobe_file->tr;
+
+	file = find_event_file(tr, ep->event_system, ep->event_name);
+	if (!file)
+		return -ENOENT;
+	trigger = new_eprobe_trigger(ep, eprobe_file);
+	if (IS_ERR(trigger))
+		return PTR_ERR(trigger);
+
+	list_add_tail_rcu(&trigger->list, &file->triggers);
+
+	trace_event_trigger_enable_disable(file, 1);
+	update_cond_flag(file);
+
+	return 0;
+}
+
+static struct trace_event_functions eprobe_funcs = {
+	.trace		= print_eprobe_event
+};
+
+static int disable_eprobe(struct trace_eprobe *ep,
+			  struct trace_array *tr)
+{
+	struct event_trigger_data *trigger;
+	struct trace_event_file *file;
+	struct eprobe_data *edata;
+
+	file = find_event_file(tr, ep->event_system, ep->event_name);
+	if (!file)
+		return -ENOENT;
+
+	list_for_each_entry(trigger, &file->triggers, list) {
+		if (!(trigger->flags & EVENT_TRIGGER_FL_PROBE))
+			continue;
+		edata = trigger->private_data;
+		if (edata->ep == ep)
+			break;
+	}
+	if (list_entry_is_head(trigger, &file->triggers, list))
+		return -ENODEV;
+
+	list_del_rcu(&trigger->list);
+
+	trace_event_trigger_enable_disable(file, 0);
+	update_cond_flag(file);
+	return 0;
+}
+
+static int enable_trace_eprobe(struct trace_event_call *call,
+			       struct trace_event_file *file)
+{
+	struct trace_probe *pos, *tp;
+	struct trace_eprobe *ep;
+	bool enabled;
+	int ret = 0;
+
+	tp = trace_probe_primary_from_call(call);
+	if (WARN_ON_ONCE(!tp))
+		return -ENODEV;
+	enabled = trace_probe_is_enabled(tp);
+
+	/* This also changes "enabled" state */
+	if (file) {
+		ret = trace_probe_add_file(tp, file);
+		if (ret)
+			return ret;
+	} else
+		trace_probe_set_flag(tp, TP_FLAG_PROFILE);
+
+	if (enabled)
+		return 0;
+
+	list_for_each_entry(pos, trace_probe_probe_list(tp), list) {
+		ep = container_of(pos, struct trace_eprobe, tp);
+		ret = enable_eprobe(ep, file);
+		if (ret)
+			break;
+		enabled = true;
+	}
+
+	if (ret) {
+		/* Failed to enable one of them. Roll back all */
+		if (enabled)
+			disable_eprobe(ep, file->tr);
+		if (file)
+			trace_probe_remove_file(tp, file);
+		else
+			trace_probe_clear_flag(tp, TP_FLAG_PROFILE);
+	}
+
+	return ret;
+}
+
+static int disable_trace_eprobe(struct trace_event_call *call,
+				struct trace_event_file *file)
+{
+	struct trace_probe *pos, *tp;
+	struct trace_eprobe *ep;
+
+	tp = trace_probe_primary_from_call(call);
+	if (WARN_ON_ONCE(!tp))
+		return -ENODEV;
+
+	if (file) {
+		if (!trace_probe_get_file_link(tp, file))
+			return -ENOENT;
+		if (!trace_probe_has_single_file(tp))
+			goto out;
+		trace_probe_clear_flag(tp, TP_FLAG_TRACE);
+	} else
+		trace_probe_clear_flag(tp, TP_FLAG_PROFILE);
+
+	if (!trace_probe_is_enabled(tp)) {
+		list_for_each_entry(pos, trace_probe_probe_list(tp), list) {
+			ep = container_of(pos, struct trace_eprobe, tp);
+			disable_eprobe(ep, file->tr);
+		}
+	}
+
+ out:
+	if (file)
+		/*
+		 * Synchronization is done in below function. For perf event,
+		 * file == NULL and perf_trace_event_unreg() calls
+		 * tracepoint_synchronize_unregister() to ensure synchronize
+		 * event. We don't need to care about it.
+		 */
+		trace_probe_remove_file(tp, file);
+
+	return 0;
+}
+
+static int eprobe_register(struct trace_event_call *event,
+			   enum trace_reg type, void *data)
+{
+	struct trace_event_file *file = data;
+
+	switch (type) {
+	case TRACE_REG_REGISTER:
+		return enable_trace_eprobe(event, file);
+	case TRACE_REG_UNREGISTER:
+		return disable_trace_eprobe(event, file);
+#ifdef CONFIG_PERF_EVENTS
+	case TRACE_REG_PERF_REGISTER:
+	case TRACE_REG_PERF_UNREGISTER:
+	case TRACE_REG_PERF_OPEN:
+	case TRACE_REG_PERF_CLOSE:
+	case TRACE_REG_PERF_ADD:
+	case TRACE_REG_PERF_DEL:
+		return 0;
+#endif
+	}
+	return 0;
+}
+
+static inline void init_trace_eprobe_call(struct trace_eprobe *ep)
+{
+	struct trace_event_call *call = trace_probe_event_call(&ep->tp);
+
+	call->flags = TRACE_EVENT_FL_EPROBE;
+	call->event.funcs = &eprobe_funcs;
+	call->class->fields_array = eprobe_fields_array;
+	call->class->reg = eprobe_register;
+}
+
+static struct trace_event_call *
+find_and_get_event(const char *system, const char *event_name)
+{
+	struct trace_event_call *tp_event;
+	const char *name;
+
+	list_for_each_entry(tp_event, &ftrace_events, list) {
+		/* Skip other probes and ftrace events */
+		if (tp_event->flags &
+		    (TRACE_EVENT_FL_IGNORE_ENABLE |
+		     TRACE_EVENT_FL_KPROBE |
+		     TRACE_EVENT_FL_UPROBE |
+		     TRACE_EVENT_FL_EPROBE))
+			continue;
+		if (!tp_event->class->system ||
+		    strcmp(system, tp_event->class->system))
+			continue;
+		name = trace_event_name(tp_event);
+		if (!name || strcmp(event_name, name))
+			continue;
+		if (!trace_event_try_get_ref(tp_event)) {
+			return NULL;
+			break;
+		}
+		return tp_event;
+		break;
+	}
+	return NULL;
+}
+
+static int trace_eprobe_tp_update_arg(struct trace_eprobe *ep, const char *argv[], int i)
+{
+	unsigned int flags = TPARG_FL_KERNEL | TPARG_FL_TPOINT;
+	int ret;
+
+	ret = traceprobe_parse_probe_arg(&ep->tp, i, argv[i], flags);
+	if (ret)
+		return ret;
+
+	if (ep->tp.args[i].code->op == FETCH_OP_TP_ARG)
+		ret = trace_eprobe_tp_arg_update(ep, i);
+
+	return ret;
+}
+
+static int __trace_eprobe_create(int argc, const char *argv[])
+{
+	/*
+	 * Argument syntax:
+	 *      e[:[GRP/]ENAME] SYSTEM.EVENT [FETCHARGS]
+	 * Fetch args:
+	 *  <name>=$<field>[:TYPE]
+	 */
+	const char *event = NULL, *group = EPROBE_EVENT_SYSTEM;
+	const char *sys_event = NULL, *sys_name = NULL;
+	struct trace_event_call *event_call;
+	struct trace_eprobe *ep = NULL;
+	char buf1[MAX_EVENT_NAME_LEN];
+	char buf2[MAX_EVENT_NAME_LEN];
+	int ret = 0;
+	int i;
+
+	if (argc < 2 || argv[0][0] != 'e')
+		return -ECANCELED;
+
+	trace_probe_log_init("event_probe", argc, argv);
+
+	event = strchr(&argv[0][1], ':');
+	if (event) {
+		event++;
+		ret = traceprobe_parse_event_name(&event, &group, buf1,
+						  event - argv[0]);
+		if (ret)
+			goto parse_error;
+	} else {
+		strscpy(buf1, argv[1], MAX_EVENT_NAME_LEN);
+		sanitize_event_name(buf1);
+		event = buf1;
+	}
+	if (!is_good_name(event) || !is_good_name(group))
+		goto parse_error;
+
+	sys_event = argv[1];
+	ret = traceprobe_parse_event_name(&sys_event, &sys_name, buf2,
+					  sys_event - argv[1]);
+	if (ret || !sys_name)
+		goto parse_error;
+	if (!is_good_name(sys_event) || !is_good_name(sys_name))
+		goto parse_error;
+
+	mutex_lock(&event_mutex);
+	event_call = find_and_get_event(sys_name, sys_event);
+	ep = alloc_event_probe(group, event, event_call, argc - 2);
+	mutex_unlock(&event_mutex);
+
+	if (IS_ERR(ep)) {
+		ret = PTR_ERR(ep);
+		/* This must return -ENOMEM, else there is a bug */
+		WARN_ON_ONCE(ret != -ENOMEM);
+		ep = NULL;
+		goto error;
+	}
+
+	argc -= 2; argv += 2;
+	/* parse arguments */
+	for (i = 0; i < argc && i < MAX_TRACE_ARGS; i++) {
+		trace_probe_log_set_index(i + 2);
+		ret = trace_eprobe_tp_update_arg(ep, argv, i);
+		if (ret)
+			goto error;
+	}
+	ret = traceprobe_set_print_fmt(&ep->tp, PROBE_PRINT_EVENT);
+	if (ret < 0)
+		goto error;
+	init_trace_eprobe_call(ep);
+	mutex_lock(&event_mutex);
+	ret = trace_probe_register_event_call(&ep->tp);
+	if (ret) {
+		if (ret == -EEXIST) {
+			trace_probe_log_set_index(0);
+			trace_probe_log_err(0, EVENT_EXIST);
+		}
+		mutex_unlock(&event_mutex);
+		goto error;
+	}
+	ret = dyn_event_add(&ep->devent, &ep->tp.event->call);
+	mutex_unlock(&event_mutex);
+	return ret;
+parse_error:
+	ret = -EINVAL;
+error:
+	trace_event_probe_cleanup(ep);
+	return ret;
+}
+
+/*
+ * Register dynevent at core_initcall. This allows kernel to setup eprobe
+ * events in postcore_initcall without tracefs.
+ */
+static __init int trace_events_eprobe_init_early(void)
+{
+	int err = 0;
+
+	err = dyn_event_register(&eprobe_dyn_event_ops);
+	if (err)
+		pr_warn("Could not register eprobe_dyn_event_ops\n");
+
+	return err;
+}
+core_initcall(trace_events_eprobe_init_early);
diff --git a/kernel/trace/trace_event_perf.c b/kernel/trace/trace_event_perf.c
index 03be4435d103..6aed10e2f7ce 100644
--- a/kernel/trace/trace_event_perf.c
+++ b/kernel/trace/trace_event_perf.c
@@ -177,7 +177,7 @@ static void perf_trace_event_unreg(struct perf_event *p_event)
 		}
 	}
 out:
-	module_put(tp_event->mod);
+	trace_event_put_ref(tp_event);
 }
 
 static int perf_trace_event_open(struct perf_event *p_event)
@@ -224,10 +224,10 @@ int perf_trace_init(struct perf_event *p_event)
 	list_for_each_entry(tp_event, &ftrace_events, list) {
 		if (tp_event->event.type == event_id &&
 		    tp_event->class && tp_event->class->reg &&
-		    try_module_get(tp_event->mod)) {
+		    trace_event_try_get_ref(tp_event)) {
 			ret = perf_trace_event_init(tp_event, p_event);
 			if (ret)
-				module_put(tp_event->mod);
+				trace_event_put_ref(tp_event);
 			break;
 		}
 	}
diff --git a/kernel/trace/trace_events.c b/kernel/trace/trace_events.c
index 80e96989770e..830b3b9940f4 100644
--- a/kernel/trace/trace_events.c
+++ b/kernel/trace/trace_events.c
@@ -181,6 +181,7 @@ static int trace_define_common_fields(void)
 
 	__common_field(unsigned short, type);
 	__common_field(unsigned char, flags);
+	/* Holds both preempt_count and migrate_disable */
 	__common_field(unsigned char, preempt_count);
 	__common_field(int, pid);
 
@@ -2525,7 +2526,10 @@ __register_event(struct trace_event_call *call, struct module *mod)
 		return ret;
 
 	list_add(&call->list, &ftrace_events);
-	call->mod = mod;
+	if (call->flags & TRACE_EVENT_FL_DYNAMIC)
+		atomic_set(&call->refcnt, 0);
+	else
+		call->module = mod;
 
 	return 0;
 }
@@ -2839,7 +2843,9 @@ static void trace_module_remove_events(struct module *mod)
 
 	down_write(&trace_event_sem);
 	list_for_each_entry_safe(call, p, &ftrace_events, list) {
-		if (call->mod == mod)
+		if ((call->flags & TRACE_EVENT_FL_DYNAMIC) || !call->module)
+			continue;
+		if (call->module == mod)
 			__trace_remove_event_call(call);
 	}
 	up_write(&trace_event_sem);
@@ -2982,7 +2988,7 @@ struct trace_event_file *trace_get_event_file(const char *instance,
 	}
 
 	/* Don't let event modules unload while in use */
-	ret = try_module_get(file->event_call->mod);
+	ret = trace_event_try_get_ref(file->event_call);
 	if (!ret) {
 		trace_array_put(tr);
 		ret = -EBUSY;
@@ -3012,7 +3018,7 @@ EXPORT_SYMBOL_GPL(trace_get_event_file);
 void trace_put_event_file(struct trace_event_file *file)
 {
 	mutex_lock(&event_mutex);
-	module_put(file->event_call->mod);
+	trace_event_put_ref(file->event_call);
 	mutex_unlock(&event_mutex);
 
 	trace_array_put(file->tr);
@@ -3147,7 +3153,7 @@ static int free_probe_data(void *data)
 	if (!edata->ref) {
 		/* Remove the SOFT_MODE flag */
 		__ftrace_event_enable_disable(edata->file, 0, 1);
-		module_put(edata->file->event_call->mod);
+		trace_event_put_ref(edata->file->event_call);
 		kfree(edata);
 	}
 	return 0;
@@ -3280,7 +3286,7 @@ event_enable_func(struct trace_array *tr, struct ftrace_hash *hash,
 
  out_reg:
 	/* Don't let event modules unload while probe registered */
-	ret = try_module_get(file->event_call->mod);
+	ret = trace_event_try_get_ref(file->event_call);
 	if (!ret) {
 		ret = -EBUSY;
 		goto out_free;
@@ -3310,7 +3316,7 @@ event_enable_func(struct trace_array *tr, struct ftrace_hash *hash,
  out_disable:
 	__ftrace_event_enable_disable(file, 0, 1);
  out_put:
-	module_put(file->event_call->mod);
+	trace_event_put_ref(file->event_call);
  out_free:
 	kfree(data);
 	goto out;
@@ -3376,7 +3382,8 @@ void __trace_early_add_events(struct trace_array *tr)
 
 	list_for_each_entry(call, &ftrace_events, list) {
 		/* Early boot up should not have any modules loaded */
-		if (WARN_ON_ONCE(call->mod))
+		if (!(call->flags & TRACE_EVENT_FL_DYNAMIC) &&
+		    WARN_ON_ONCE(call->module))
 			continue;
 
 		ret = __trace_early_add_new_event(call, tr);
diff --git a/kernel/trace/trace_events_hist.c b/kernel/trace/trace_events_hist.c
index a48aa2a2875b..a6061a69aa84 100644
--- a/kernel/trace/trace_events_hist.c
+++ b/kernel/trace/trace_events_hist.c
@@ -121,6 +121,7 @@ struct hist_field {
 	unsigned int			size;
 	unsigned int			offset;
 	unsigned int                    is_signed;
+	unsigned long			buckets;
 	const char			*type;
 	struct hist_field		*operands[HIST_FIELD_OPERANDS_MAX];
 	struct hist_trigger_data	*hist_data;
@@ -219,6 +220,27 @@ static u64 hist_field_log2(struct hist_field *hist_field,
 	return (u64) ilog2(roundup_pow_of_two(val));
 }
 
+static u64 hist_field_bucket(struct hist_field *hist_field,
+			     struct tracing_map_elt *elt,
+			     struct trace_buffer *buffer,
+			     struct ring_buffer_event *rbe,
+			     void *event)
+{
+	struct hist_field *operand = hist_field->operands[0];
+	unsigned long buckets = hist_field->buckets;
+
+	u64 val = operand->fn(operand, elt, buffer, rbe, event);
+
+	if (WARN_ON_ONCE(!buckets))
+		return val;
+
+	if (val >= LONG_MAX)
+		val = div64_ul(val, buckets);
+	else
+		val = (u64)((unsigned long)val / buckets);
+	return val * buckets;
+}
+
 static u64 hist_field_plus(struct hist_field *hist_field,
 			   struct tracing_map_elt *elt,
 			   struct trace_buffer *buffer,
@@ -318,6 +340,7 @@ enum hist_field_flags {
 	HIST_FIELD_FL_VAR_REF		= 1 << 14,
 	HIST_FIELD_FL_CPU		= 1 << 15,
 	HIST_FIELD_FL_ALIAS		= 1 << 16,
+	HIST_FIELD_FL_BUCKET		= 1 << 17,
 };
 
 struct var_defs {
@@ -485,7 +508,8 @@ struct track_data {
 struct hist_elt_data {
 	char *comm;
 	u64 *var_ref_vals;
-	char *field_var_str[SYNTH_FIELDS_MAX];
+	char **field_var_str;
+	int n_field_var_str;
 };
 
 struct snapshot_context {
@@ -1109,7 +1133,8 @@ static const char *hist_field_name(struct hist_field *field,
 	if (field->field)
 		field_name = field->field->name;
 	else if (field->flags & HIST_FIELD_FL_LOG2 ||
-		 field->flags & HIST_FIELD_FL_ALIAS)
+		 field->flags & HIST_FIELD_FL_ALIAS ||
+		 field->flags & HIST_FIELD_FL_BUCKET)
 		field_name = hist_field_name(field->operands[0], ++level);
 	else if (field->flags & HIST_FIELD_FL_CPU)
 		field_name = "common_cpu";
@@ -1377,9 +1402,11 @@ static void hist_elt_data_free(struct hist_elt_data *elt_data)
 {
 	unsigned int i;
 
-	for (i = 0; i < SYNTH_FIELDS_MAX; i++)
+	for (i = 0; i < elt_data->n_field_var_str; i++)
 		kfree(elt_data->field_var_str[i]);
 
+	kfree(elt_data->field_var_str);
+
 	kfree(elt_data->comm);
 	kfree(elt_data);
 }
@@ -1396,17 +1423,17 @@ static int hist_trigger_elt_data_alloc(struct tracing_map_elt *elt)
 	struct hist_trigger_data *hist_data = elt->map->private_data;
 	unsigned int size = TASK_COMM_LEN;
 	struct hist_elt_data *elt_data;
-	struct hist_field *key_field;
+	struct hist_field *hist_field;
 	unsigned int i, n_str;
 
 	elt_data = kzalloc(sizeof(*elt_data), GFP_KERNEL);
 	if (!elt_data)
 		return -ENOMEM;
 
-	for_each_hist_key_field(i, hist_data) {
-		key_field = hist_data->fields[i];
+	for_each_hist_field(i, hist_data) {
+		hist_field = hist_data->fields[i];
 
-		if (key_field->flags & HIST_FIELD_FL_EXECNAME) {
+		if (hist_field->flags & HIST_FIELD_FL_EXECNAME) {
 			elt_data->comm = kzalloc(size, GFP_KERNEL);
 			if (!elt_data->comm) {
 				kfree(elt_data);
@@ -1427,6 +1454,13 @@ static int hist_trigger_elt_data_alloc(struct tracing_map_elt *elt)
 
 	size = STR_VAR_LEN_MAX;
 
+	elt_data->field_var_str = kcalloc(n_str, sizeof(char *), GFP_KERNEL);
+	if (!elt_data->field_var_str) {
+		hist_elt_data_free(elt_data);
+		return -EINVAL;
+	}
+	elt_data->n_field_var_str = n_str;
+
 	for (i = 0; i < n_str; i++) {
 		elt_data->field_var_str[i] = kzalloc(size, GFP_KERNEL);
 		if (!elt_data->field_var_str[i]) {
@@ -1470,6 +1504,8 @@ static const char *get_hist_field_flags(struct hist_field *hist_field)
 		flags_str = "syscall";
 	else if (hist_field->flags & HIST_FIELD_FL_LOG2)
 		flags_str = "log2";
+	else if (hist_field->flags & HIST_FIELD_FL_BUCKET)
+		flags_str = "buckets";
 	else if (hist_field->flags & HIST_FIELD_FL_TIMESTAMP_USECS)
 		flags_str = "usecs";
 
@@ -1590,7 +1626,9 @@ static void __destroy_hist_field(struct hist_field *hist_field)
 
 	kfree(hist_field->var.name);
 	kfree(hist_field->name);
-	kfree(hist_field->type);
+
+	/* Can likely be a const */
+	kfree_const(hist_field->type);
 
 	kfree(hist_field->system);
 	kfree(hist_field->event_name);
@@ -1647,9 +1685,7 @@ static struct hist_field *create_hist_field(struct hist_trigger_data *hist_data,
 	if (flags & HIST_FIELD_FL_HITCOUNT) {
 		hist_field->fn = hist_field_counter;
 		hist_field->size = sizeof(u64);
-		hist_field->type = kstrdup("u64", GFP_KERNEL);
-		if (!hist_field->type)
-			goto free;
+		hist_field->type = "u64";
 		goto out;
 	}
 
@@ -1658,12 +1694,13 @@ static struct hist_field *create_hist_field(struct hist_trigger_data *hist_data,
 		goto out;
 	}
 
-	if (flags & HIST_FIELD_FL_LOG2) {
-		unsigned long fl = flags & ~HIST_FIELD_FL_LOG2;
-		hist_field->fn = hist_field_log2;
+	if (flags & (HIST_FIELD_FL_LOG2 | HIST_FIELD_FL_BUCKET)) {
+		unsigned long fl = flags & ~(HIST_FIELD_FL_LOG2 | HIST_FIELD_FL_BUCKET);
+		hist_field->fn = flags & HIST_FIELD_FL_LOG2 ? hist_field_log2 :
+			hist_field_bucket;
 		hist_field->operands[0] = create_hist_field(hist_data, field, fl, NULL);
 		hist_field->size = hist_field->operands[0]->size;
-		hist_field->type = kstrdup(hist_field->operands[0]->type, GFP_KERNEL);
+		hist_field->type = kstrdup_const(hist_field->operands[0]->type, GFP_KERNEL);
 		if (!hist_field->type)
 			goto free;
 		goto out;
@@ -1672,18 +1709,14 @@ static struct hist_field *create_hist_field(struct hist_trigger_data *hist_data,
 	if (flags & HIST_FIELD_FL_TIMESTAMP) {
 		hist_field->fn = hist_field_timestamp;
 		hist_field->size = sizeof(u64);
-		hist_field->type = kstrdup("u64", GFP_KERNEL);
-		if (!hist_field->type)
-			goto free;
+		hist_field->type = "u64";
 		goto out;
 	}
 
 	if (flags & HIST_FIELD_FL_CPU) {
 		hist_field->fn = hist_field_cpu;
 		hist_field->size = sizeof(int);
-		hist_field->type = kstrdup("unsigned int", GFP_KERNEL);
-		if (!hist_field->type)
-			goto free;
+		hist_field->type = "unsigned int";
 		goto out;
 	}
 
@@ -1696,7 +1729,7 @@ static struct hist_field *create_hist_field(struct hist_trigger_data *hist_data,
 		flags |= HIST_FIELD_FL_STRING;
 
 		hist_field->size = MAX_FILTER_STR_VAL;
-		hist_field->type = kstrdup(field->type, GFP_KERNEL);
+		hist_field->type = kstrdup_const(field->type, GFP_KERNEL);
 		if (!hist_field->type)
 			goto free;
 
@@ -1709,7 +1742,7 @@ static struct hist_field *create_hist_field(struct hist_trigger_data *hist_data,
 	} else {
 		hist_field->size = field->size;
 		hist_field->is_signed = field->is_signed;
-		hist_field->type = kstrdup(field->type, GFP_KERNEL);
+		hist_field->type = kstrdup_const(field->type, GFP_KERNEL);
 		if (!hist_field->type)
 			goto free;
 
@@ -1795,7 +1828,7 @@ static int init_var_ref(struct hist_field *ref_field,
 		}
 	}
 
-	ref_field->type = kstrdup(var_field->type, GFP_KERNEL);
+	ref_field->type = kstrdup_const(var_field->type, GFP_KERNEL);
 	if (!ref_field->type) {
 		err = -ENOMEM;
 		goto free;
@@ -1953,7 +1986,7 @@ static struct hist_field *parse_var_ref(struct hist_trigger_data *hist_data,
 
 static struct ftrace_event_field *
 parse_field(struct hist_trigger_data *hist_data, struct trace_event_file *file,
-	    char *field_str, unsigned long *flags)
+	    char *field_str, unsigned long *flags, unsigned long *buckets)
 {
 	struct ftrace_event_field *field = NULL;
 	char *field_name, *modifier, *str;
@@ -1980,7 +2013,22 @@ parse_field(struct hist_trigger_data *hist_data, struct trace_event_file *file,
 			*flags |= HIST_FIELD_FL_LOG2;
 		else if (strcmp(modifier, "usecs") == 0)
 			*flags |= HIST_FIELD_FL_TIMESTAMP_USECS;
-		else {
+		else if (strncmp(modifier, "bucket", 6) == 0) {
+			int ret;
+
+			modifier += 6;
+
+			if (*modifier == 's')
+				modifier++;
+			if (*modifier != '=')
+				goto error;
+			modifier++;
+			ret = kstrtoul(modifier, 0, buckets);
+			if (ret || !(*buckets))
+				goto error;
+			*flags |= HIST_FIELD_FL_BUCKET;
+		} else {
+ error:
 			hist_err(tr, HIST_ERR_BAD_FIELD_MODIFIER, errpos(modifier));
 			field = ERR_PTR(-EINVAL);
 			goto out;
@@ -2049,6 +2097,7 @@ static struct hist_field *parse_atom(struct hist_trigger_data *hist_data,
 	char *s, *ref_system = NULL, *ref_event = NULL, *ref_var = str;
 	struct ftrace_event_field *field = NULL;
 	struct hist_field *hist_field = NULL;
+	unsigned long buckets = 0;
 	int ret = 0;
 
 	s = strchr(str, '.');
@@ -2086,7 +2135,7 @@ static struct hist_field *parse_atom(struct hist_trigger_data *hist_data,
 	} else
 		str = s;
 
-	field = parse_field(hist_data, file, str, flags);
+	field = parse_field(hist_data, file, str, flags, &buckets);
 	if (IS_ERR(field)) {
 		ret = PTR_ERR(field);
 		goto out;
@@ -2097,6 +2146,7 @@ static struct hist_field *parse_atom(struct hist_trigger_data *hist_data,
 		ret = -ENOMEM;
 		goto out;
 	}
+	hist_field->buckets = buckets;
 
 	return hist_field;
  out:
@@ -2171,7 +2221,7 @@ static struct hist_field *parse_unary(struct hist_trigger_data *hist_data,
 	expr->operands[0] = operand1;
 	expr->operator = FIELD_OP_UNARY_MINUS;
 	expr->name = expr_str(expr, 0);
-	expr->type = kstrdup(operand1->type, GFP_KERNEL);
+	expr->type = kstrdup_const(operand1->type, GFP_KERNEL);
 	if (!expr->type) {
 		ret = -ENOMEM;
 		goto free;
@@ -2311,7 +2361,7 @@ static struct hist_field *parse_expr(struct hist_trigger_data *hist_data,
 
 	expr->operator = field_op;
 	expr->name = expr_str(expr, 0);
-	expr->type = kstrdup(operand1->type, GFP_KERNEL);
+	expr->type = kstrdup_const(operand1->type, GFP_KERNEL);
 	if (!expr->type) {
 		ret = -ENOMEM;
 		goto free;
@@ -2699,10 +2749,10 @@ static struct hist_field *create_var(struct hist_trigger_data *hist_data,
 	var->var.hist_data = var->hist_data = hist_data;
 	var->size = size;
 	var->var.name = kstrdup(name, GFP_KERNEL);
-	var->type = kstrdup(type, GFP_KERNEL);
+	var->type = kstrdup_const(type, GFP_KERNEL);
 	if (!var->var.name || !var->type) {
+		kfree_const(var->type);
 		kfree(var->var.name);
-		kfree(var->type);
 		kfree(var);
 		var = ERR_PTR(-ENOMEM);
 	}
@@ -3731,6 +3781,41 @@ static int create_val_field(struct hist_trigger_data *hist_data,
 	return __create_val_field(hist_data, val_idx, file, NULL, field_str, 0);
 }
 
+static const char *no_comm = "(no comm)";
+
+static u64 hist_field_execname(struct hist_field *hist_field,
+			       struct tracing_map_elt *elt,
+			       struct trace_buffer *buffer,
+			       struct ring_buffer_event *rbe,
+			       void *event)
+{
+	struct hist_elt_data *elt_data;
+
+	if (WARN_ON_ONCE(!elt))
+		return (u64)(unsigned long)no_comm;
+
+	elt_data = elt->private_data;
+
+	if (WARN_ON_ONCE(!elt_data->comm))
+		return (u64)(unsigned long)no_comm;
+
+	return (u64)(unsigned long)(elt_data->comm);
+}
+
+/* Convert a var that points to common_pid.execname to a string */
+static void update_var_execname(struct hist_field *hist_field)
+{
+	hist_field->flags = HIST_FIELD_FL_STRING | HIST_FIELD_FL_VAR |
+		HIST_FIELD_FL_EXECNAME;
+	hist_field->size = MAX_FILTER_STR_VAL;
+	hist_field->is_signed = 0;
+
+	kfree_const(hist_field->type);
+	hist_field->type = "char[]";
+
+	hist_field->fn = hist_field_execname;
+}
+
 static int create_var_field(struct hist_trigger_data *hist_data,
 			    unsigned int val_idx,
 			    struct trace_event_file *file,
@@ -3755,6 +3840,9 @@ static int create_var_field(struct hist_trigger_data *hist_data,
 
 	ret = __create_val_field(hist_data, val_idx, file, var_name, expr_str, flags);
 
+	if (!ret && hist_data->fields[val_idx]->flags & HIST_FIELD_FL_EXECNAME)
+		update_var_execname(hist_data->fields[val_idx]);
+
 	if (!ret && hist_data->fields[val_idx]->flags & HIST_FIELD_FL_STRING)
 		hist_data->fields[val_idx]->var_str_idx = hist_data->n_var_str++;
 
@@ -4698,6 +4786,11 @@ static void hist_trigger_print_key(struct seq_file *m,
 		} else if (key_field->flags & HIST_FIELD_FL_LOG2) {
 			seq_printf(m, "%s: ~ 2^%-2llu", field_name,
 				   *(u64 *)(key + key_field->offset));
+		} else if (key_field->flags & HIST_FIELD_FL_BUCKET) {
+			unsigned long buckets = key_field->buckets;
+			uval = *(u64 *)(key + key_field->offset);
+			seq_printf(m, "%s: ~ %llu-%llu", field_name,
+				   uval, uval + buckets -1);
 		} else if (key_field->flags & HIST_FIELD_FL_STRING) {
 			seq_printf(m, "%s: %-50s", field_name,
 				   (char *)(key + key_field->offset));
@@ -5137,6 +5230,8 @@ static void hist_field_print(struct seq_file *m, struct hist_field *hist_field)
 				seq_printf(m, ".%s", flags);
 		}
 	}
+	if (hist_field->buckets)
+		seq_printf(m, "=%ld", hist_field->buckets);
 }
 
 static int event_hist_trigger_print(struct seq_file *m,
diff --git a/kernel/trace/trace_events_synth.c b/kernel/trace/trace_events_synth.c
index 9315fc03e303..d54094b7a9d7 100644
--- a/kernel/trace/trace_events_synth.c
+++ b/kernel/trace/trace_events_synth.c
@@ -1298,7 +1298,7 @@ static int __create_synth_event(const char *name, const char *raw_fields)
 	}
 	ret = register_synth_event(event);
 	if (!ret)
-		dyn_event_add(&event->devent);
+		dyn_event_add(&event->devent, &event->call);
 	else
 		free_synth_event(event);
  out:
@@ -1369,13 +1369,15 @@ static int destroy_synth_event(struct synth_event *se)
 	int ret;
 
 	if (se->ref)
-		ret = -EBUSY;
-	else {
-		ret = unregister_synth_event(se);
-		if (!ret) {
-			dyn_event_remove(&se->devent);
-			free_synth_event(se);
-		}
+		return -EBUSY;
+
+	if (trace_event_dyn_busy(&se->call))
+		return -EBUSY;
+
+	ret = unregister_synth_event(se);
+	if (!ret) {
+		dyn_event_remove(&se->devent);
+		free_synth_event(se);
 	}
 
 	return ret;
@@ -2102,6 +2104,9 @@ static int synth_event_release(struct dyn_event *ev)
 	if (event->ref)
 		return -EBUSY;
 
+	if (trace_event_dyn_busy(&event->call))
+		return -EBUSY;
+
 	ret = unregister_synth_event(event);
 	if (ret)
 		return ret;
diff --git a/kernel/trace/trace_events_trigger.c b/kernel/trace/trace_events_trigger.c
index cf84d0f6583a..3d5c07239a2a 100644
--- a/kernel/trace/trace_events_trigger.c
+++ b/kernel/trace/trace_events_trigger.c
@@ -124,6 +124,18 @@ static void *trigger_next(struct seq_file *m, void *t, loff_t *pos)
 	return seq_list_next(t, &event_file->triggers, pos);
 }
 
+static bool check_user_trigger(struct trace_event_file *file)
+{
+	struct event_trigger_data *data;
+
+	list_for_each_entry_rcu(data, &file->triggers, list) {
+		if (data->flags & EVENT_TRIGGER_FL_PROBE)
+			continue;
+		return true;
+	}
+	return false;
+}
+
 static void *trigger_start(struct seq_file *m, loff_t *pos)
 {
 	struct trace_event_file *event_file;
@@ -134,7 +146,7 @@ static void *trigger_start(struct seq_file *m, loff_t *pos)
 	if (unlikely(!event_file))
 		return ERR_PTR(-ENODEV);
 
-	if (list_empty(&event_file->triggers))
+	if (list_empty(&event_file->triggers) || !check_user_trigger(event_file))
 		return *pos == 0 ? SHOW_AVAILABLE_TRIGGERS : NULL;
 
 	return seq_list_start(&event_file->triggers, *pos);
@@ -1334,7 +1346,7 @@ void event_enable_trigger_free(struct event_trigger_ops *ops,
 	if (!data->ref) {
 		/* Remove the SOFT_MODE flag */
 		trace_event_enable_disable(enable_data->file, 0, 1);
-		module_put(enable_data->file->event_call->mod);
+		trace_event_put_ref(enable_data->file->event_call);
 		trigger_data_free(data);
 		kfree(enable_data);
 	}
@@ -1481,7 +1493,7 @@ int event_enable_trigger_func(struct event_command *cmd_ops,
 
  out_reg:
 	/* Don't let event modules unload while probe registered */
-	ret = try_module_get(event_enable_file->event_call->mod);
+	ret = trace_event_try_get_ref(event_enable_file->event_call);
 	if (!ret) {
 		ret = -EBUSY;
 		goto out_free;
@@ -1510,7 +1522,7 @@ int event_enable_trigger_func(struct event_command *cmd_ops,
  out_disable:
 	trace_event_enable_disable(event_enable_file, 0, 1);
  out_put:
-	module_put(event_enable_file->event_call->mod);
+	trace_event_put_ref(event_enable_file->event_call);
  out_free:
 	if (cmd_ops->set_filter)
 		cmd_ops->set_filter(NULL, trigger_data, NULL);
diff --git a/kernel/trace/trace_hwlat.c b/kernel/trace/trace_hwlat.c
index 14f46aae1981..1b83d75eb103 100644
--- a/kernel/trace/trace_hwlat.c
+++ b/kernel/trace/trace_hwlat.c
@@ -325,10 +325,10 @@ static void move_to_next_cpu(void)
 	if (!cpumask_equal(current_mask, current->cpus_ptr))
 		goto change_mode;
 
-	get_online_cpus();
+	cpus_read_lock();
 	cpumask_and(current_mask, cpu_online_mask, tr->tracing_cpumask);
 	next_cpu = cpumask_next(raw_smp_processor_id(), current_mask);
-	put_online_cpus();
+	cpus_read_unlock();
 
 	if (next_cpu >= nr_cpu_ids)
 		next_cpu = cpumask_first(current_mask);
@@ -398,7 +398,7 @@ static void stop_single_kthread(void)
 	struct hwlat_kthread_data *kdata = get_cpu_data();
 	struct task_struct *kthread;
 
-	get_online_cpus();
+	cpus_read_lock();
 	kthread = kdata->kthread;
 
 	if (!kthread)
@@ -408,7 +408,7 @@ static void stop_single_kthread(void)
 	kdata->kthread = NULL;
 
 out_put_cpus:
-	put_online_cpus();
+	cpus_read_unlock();
 }
 
 
@@ -425,14 +425,14 @@ static int start_single_kthread(struct trace_array *tr)
 	struct task_struct *kthread;
 	int next_cpu;
 
-	get_online_cpus();
+	cpus_read_lock();
 	if (kdata->kthread)
 		goto out_put_cpus;
 
 	kthread = kthread_create(kthread_fn, NULL, "hwlatd");
 	if (IS_ERR(kthread)) {
 		pr_err(BANNER "could not start sampling thread\n");
-		put_online_cpus();
+		cpus_read_unlock();
 		return -ENOMEM;
 	}
 
@@ -452,7 +452,7 @@ static int start_single_kthread(struct trace_array *tr)
 	wake_up_process(kthread);
 
 out_put_cpus:
-	put_online_cpus();
+	cpus_read_unlock();
 	return 0;
 }
 
@@ -479,10 +479,10 @@ static void stop_per_cpu_kthreads(void)
 {
 	unsigned int cpu;
 
-	get_online_cpus();
+	cpus_read_lock();
 	for_each_online_cpu(cpu)
 		stop_cpu_kthread(cpu);
-	put_online_cpus();
+	cpus_read_unlock();
 }
 
 /*
@@ -515,7 +515,7 @@ static void hwlat_hotplug_workfn(struct work_struct *dummy)
 
 	mutex_lock(&trace_types_lock);
 	mutex_lock(&hwlat_data.lock);
-	get_online_cpus();
+	cpus_read_lock();
 
 	if (!hwlat_busy || hwlat_data.thread_mode != MODE_PER_CPU)
 		goto out_unlock;
@@ -526,7 +526,7 @@ static void hwlat_hotplug_workfn(struct work_struct *dummy)
 	start_cpu_kthread(cpu);
 
 out_unlock:
-	put_online_cpus();
+	cpus_read_unlock();
 	mutex_unlock(&hwlat_data.lock);
 	mutex_unlock(&trace_types_lock);
 }
@@ -582,7 +582,7 @@ static int start_per_cpu_kthreads(struct trace_array *tr)
 	unsigned int cpu;
 	int retval;
 
-	get_online_cpus();
+	cpus_read_lock();
 	/*
 	 * Run only on CPUs in which hwlat is allowed to run.
 	 */
@@ -596,12 +596,12 @@ static int start_per_cpu_kthreads(struct trace_array *tr)
 		if (retval)
 			goto out_error;
 	}
-	put_online_cpus();
+	cpus_read_unlock();
 
 	return 0;
 
 out_error:
-	put_online_cpus();
+	cpus_read_unlock();
 	stop_per_cpu_kthreads();
 	return retval;
 }
diff --git a/kernel/trace/trace_kdb.c b/kernel/trace/trace_kdb.c
index 9da76104f7a2..59857a1ee44c 100644
--- a/kernel/trace/trace_kdb.c
+++ b/kernel/trace/trace_kdb.c
@@ -147,11 +147,17 @@ static int kdb_ftdump(int argc, const char **argv)
 	return 0;
 }
 
+static kdbtab_t ftdump_cmd = {
+	.name = "ftdump",
+	.func = kdb_ftdump,
+	.usage = "[skip_#entries] [cpu]",
+	.help = "Dump ftrace log; -skip dumps last #entries",
+	.flags = KDB_ENABLE_ALWAYS_SAFE,
+};
+
 static __init int kdb_ftrace_register(void)
 {
-	kdb_register_flags("ftdump", kdb_ftdump, "[skip_#entries] [cpu]",
-			    "Dump ftrace log; -skip dumps last #entries", 0,
-			    KDB_ENABLE_ALWAYS_SAFE);
+	kdb_register(&ftdump_cmd);
 	return 0;
 }
 
diff --git a/kernel/trace/trace_kprobe.c b/kernel/trace/trace_kprobe.c
index ea6178cb5e33..3a64ba4bbad6 100644
--- a/kernel/trace/trace_kprobe.c
+++ b/kernel/trace/trace_kprobe.c
@@ -80,10 +80,6 @@ static struct trace_kprobe *to_trace_kprobe(struct dyn_event *ev)
 	for_each_dyn_event(dpos)		\
 		if (is_trace_kprobe(dpos) && (pos = to_trace_kprobe(dpos)))
 
-#define SIZEOF_TRACE_KPROBE(n)				\
-	(offsetof(struct trace_kprobe, tp.args) +	\
-	(sizeof(struct probe_arg) * (n)))
-
 static nokprobe_inline bool trace_kprobe_is_return(struct trace_kprobe *tk)
 {
 	return tk->rp.handler != NULL;
@@ -265,7 +261,7 @@ static struct trace_kprobe *alloc_trace_kprobe(const char *group,
 	struct trace_kprobe *tk;
 	int ret = -ENOMEM;
 
-	tk = kzalloc(SIZEOF_TRACE_KPROBE(nargs), GFP_KERNEL);
+	tk = kzalloc(struct_size(tk, tp.args, nargs), GFP_KERNEL);
 	if (!tk)
 		return ERR_PTR(ret);
 
@@ -543,6 +539,10 @@ static int unregister_trace_kprobe(struct trace_kprobe *tk)
 	if (trace_probe_is_enabled(&tk->tp))
 		return -EBUSY;
 
+	/* If there's a reference to the dynamic event */
+	if (trace_event_dyn_busy(trace_probe_event_call(&tk->tp)))
+		return -EBUSY;
+
 	/* Will fail if probe is being used by ftrace or perf */
 	if (unregister_kprobe_event(tk))
 		return -EBUSY;
@@ -618,7 +618,7 @@ static int append_trace_kprobe(struct trace_kprobe *tk, struct trace_kprobe *to)
 	if (ret)
 		trace_probe_unlink(&tk->tp);
 	else
-		dyn_event_add(&tk->devent);
+		dyn_event_add(&tk->devent, trace_probe_event_call(&tk->tp));
 
 	return ret;
 }
@@ -647,7 +647,11 @@ static int register_trace_kprobe(struct trace_kprobe *tk)
 	/* Register new event */
 	ret = register_kprobe_event(tk);
 	if (ret) {
-		pr_warn("Failed to register probe event(%d)\n", ret);
+		if (ret == -EEXIST) {
+			trace_probe_log_set_index(0);
+			trace_probe_log_err(0, EVENT_EXIST);
+		} else
+			pr_warn("Failed to register probe event(%d)\n", ret);
 		goto end;
 	}
 
@@ -661,7 +665,7 @@ static int register_trace_kprobe(struct trace_kprobe *tk)
 	if (ret < 0)
 		unregister_kprobe_event(tk);
 	else
-		dyn_event_add(&tk->devent);
+		dyn_event_add(&tk->devent, trace_probe_event_call(&tk->tp));
 
 end:
 	mutex_unlock(&event_mutex);
@@ -703,14 +707,6 @@ static struct notifier_block trace_kprobe_module_nb = {
 	.priority = 1	/* Invoked after kprobe module callback */
 };
 
-/* Convert certain expected symbols into '_' when generating event names */
-static inline void sanitize_event_name(char *name)
-{
-	while (*name++ != '\0')
-		if (*name == ':' || *name == '.')
-			*name = '_';
-}
-
 static int __trace_kprobe_create(int argc, const char *argv[])
 {
 	/*
@@ -742,6 +738,7 @@ static int __trace_kprobe_create(int argc, const char *argv[])
 	bool is_return = false;
 	char *symbol = NULL, *tmp = NULL;
 	const char *event = NULL, *group = KPROBE_EVENT_SYSTEM;
+	enum probe_print_type ptype;
 	int maxactive = 0;
 	long offset = 0;
 	void *addr = NULL;
@@ -869,20 +866,14 @@ static int __trace_kprobe_create(int argc, const char *argv[])
 
 	/* parse arguments */
 	for (i = 0; i < argc && i < MAX_TRACE_ARGS; i++) {
-		tmp = kstrdup(argv[i], GFP_KERNEL);
-		if (!tmp) {
-			ret = -ENOMEM;
-			goto error;
-		}
-
 		trace_probe_log_set_index(i + 2);
-		ret = traceprobe_parse_probe_arg(&tk->tp, i, tmp, flags);
-		kfree(tmp);
+		ret = traceprobe_parse_probe_arg(&tk->tp, i, argv[i], flags);
 		if (ret)
 			goto error;	/* This can be -ENOMEM */
 	}
 
-	ret = traceprobe_set_print_fmt(&tk->tp, is_return);
+	ptype = is_return ? PROBE_PRINT_RETURN : PROBE_PRINT_NORMAL;
+	ret = traceprobe_set_print_fmt(&tk->tp, ptype);
 	if (ret < 0)
 		goto error;
 
@@ -1330,9 +1321,10 @@ probe_mem_read(void *dest, void *src, size_t size)
 
 /* Note that we don't verify it, since the code does not come from user space */
 static int
-process_fetch_insn(struct fetch_insn *code, struct pt_regs *regs, void *dest,
+process_fetch_insn(struct fetch_insn *code, void *rec, void *dest,
 		   void *base)
 {
+	struct pt_regs *regs = rec;
 	unsigned long val;
 
 retry:
@@ -1806,6 +1798,7 @@ struct trace_event_call *
 create_local_trace_kprobe(char *func, void *addr, unsigned long offs,
 			  bool is_return)
 {
+	enum probe_print_type ptype;
 	struct trace_kprobe *tk;
 	int ret;
 	char *event;
@@ -1829,7 +1822,9 @@ create_local_trace_kprobe(char *func, void *addr, unsigned long offs,
 
 	init_trace_event_call(tk);
 
-	if (traceprobe_set_print_fmt(&tk->tp, trace_kprobe_is_return(tk)) < 0) {
+	ptype = trace_kprobe_is_return(tk) ?
+		PROBE_PRINT_RETURN : PROBE_PRINT_NORMAL;
+	if (traceprobe_set_print_fmt(&tk->tp, ptype) < 0) {
 		ret = -ENOMEM;
 		goto error;
 	}
diff --git a/kernel/trace/trace_osnoise.c b/kernel/trace/trace_osnoise.c
index b61eefe5ccf5..ce053619f289 100644
--- a/kernel/trace/trace_osnoise.c
+++ b/kernel/trace/trace_osnoise.c
@@ -1498,12 +1498,12 @@ static void stop_per_cpu_kthreads(void)
 {
 	int cpu;
 
-	get_online_cpus();
+	cpus_read_lock();
 
 	for_each_online_cpu(cpu)
 		stop_kthread(cpu);
 
-	put_online_cpus();
+	cpus_read_unlock();
 }
 
 /*
@@ -1548,10 +1548,10 @@ static int start_kthread(unsigned int cpu)
 static int start_per_cpu_kthreads(struct trace_array *tr)
 {
 	struct cpumask *current_mask = &save_cpumask;
-	int retval;
+	int retval = 0;
 	int cpu;
 
-	get_online_cpus();
+	cpus_read_lock();
 	/*
 	 * Run only on CPUs in which trace and osnoise are allowed to run.
 	 */
@@ -1568,13 +1568,13 @@ static int start_per_cpu_kthreads(struct trace_array *tr)
 		retval = start_kthread(cpu);
 		if (retval) {
 			stop_per_cpu_kthreads();
-			return retval;
+			break;
 		}
 	}
 
-	put_online_cpus();
+	cpus_read_unlock();
 
-	return 0;
+	return retval;
 }
 
 #ifdef CONFIG_HOTPLUG_CPU
@@ -1590,7 +1590,7 @@ static void osnoise_hotplug_workfn(struct work_struct *dummy)
 		goto out_unlock_trace;
 
 	mutex_lock(&interface_lock);
-	get_online_cpus();
+	cpus_read_lock();
 
 	if (!cpumask_test_cpu(cpu, &osnoise_cpumask))
 		goto out_unlock;
@@ -1601,7 +1601,7 @@ static void osnoise_hotplug_workfn(struct work_struct *dummy)
 	start_kthread(cpu);
 
 out_unlock:
-	put_online_cpus();
+	cpus_read_unlock();
 	mutex_unlock(&interface_lock);
 out_unlock_trace:
 	mutex_unlock(&trace_types_lock);
@@ -1743,11 +1743,11 @@ osnoise_cpus_write(struct file *filp, const char __user *ubuf, size_t count,
 	/*
 	 * osnoise_cpumask is read by CPU hotplug operations.
 	 */
-	get_online_cpus();
+	cpus_read_lock();
 
 	cpumask_copy(&osnoise_cpumask, osnoise_cpumask_new);
 
-	put_online_cpus();
+	cpus_read_unlock();
 	mutex_unlock(&interface_lock);
 
 	if (running)
diff --git a/kernel/trace/trace_output.c b/kernel/trace/trace_output.c
index a0bf446bb034..c2ca40e8595b 100644
--- a/kernel/trace/trace_output.c
+++ b/kernel/trace/trace_output.c
@@ -492,8 +492,13 @@ int trace_print_lat_fmt(struct trace_seq *s, struct trace_entry *entry)
 	trace_seq_printf(s, "%c%c%c",
 			 irqs_off, need_resched, hardsoft_irq);
 
-	if (entry->preempt_count)
-		trace_seq_printf(s, "%x", entry->preempt_count);
+	if (entry->preempt_count & 0xf)
+		trace_seq_printf(s, "%x", entry->preempt_count & 0xf);
+	else
+		trace_seq_putc(s, '.');
+
+	if (entry->preempt_count & 0xf0)
+		trace_seq_printf(s, "%x", entry->preempt_count >> 4);
 	else
 		trace_seq_putc(s, '.');
 
@@ -656,7 +661,7 @@ int trace_print_lat_context(struct trace_iterator *iter)
 		trace_seq_printf(
 			s, "%16s %7d %3d %d %08x %08lx ",
 			comm, entry->pid, iter->cpu, entry->flags,
-			entry->preempt_count, iter->idx);
+			entry->preempt_count & 0xf, iter->idx);
 	} else {
 		lat_print_generic(s, entry, iter->cpu);
 	}
diff --git a/kernel/trace/trace_probe.c b/kernel/trace/trace_probe.c
index 15413ad7cef2..3ed2a3f37297 100644
--- a/kernel/trace/trace_probe.c
+++ b/kernel/trace/trace_probe.c
@@ -233,6 +233,9 @@ int traceprobe_parse_event_name(const char **pevent, const char **pgroup,
 	int len;
 
 	slash = strchr(event, '/');
+	if (!slash)
+		slash = strchr(event, '.');
+
 	if (slash) {
 		if (slash == event) {
 			trace_probe_log_err(offset, NO_GROUP_NAME);
@@ -316,6 +319,13 @@ static int parse_probe_vars(char *arg, const struct fetch_type *t,
 		code->op = FETCH_OP_ARG;
 		code->param = (unsigned int)param - 1;
 #endif
+	} else if (flags & TPARG_FL_TPOINT) {
+		if (code->data)
+			return -EFAULT;
+		code->data = kstrdup(arg, GFP_KERNEL);
+		if (!code->data)
+			return -ENOMEM;
+		code->op = FETCH_OP_TP_ARG;
 	} else
 		goto inval_var;
 
@@ -540,26 +550,34 @@ static int __parse_bitfield_probe_arg(const char *bf,
 }
 
 /* String length checking wrapper */
-static int traceprobe_parse_probe_arg_body(char *arg, ssize_t *size,
+static int traceprobe_parse_probe_arg_body(const char *argv, ssize_t *size,
 		struct probe_arg *parg, unsigned int flags, int offset)
 {
 	struct fetch_insn *code, *scode, *tmp = NULL;
 	char *t, *t2, *t3;
+	char *arg;
 	int ret, len;
 
+	arg = kstrdup(argv, GFP_KERNEL);
+	if (!arg)
+		return -ENOMEM;
+
+	ret = -EINVAL;
 	len = strlen(arg);
 	if (len > MAX_ARGSTR_LEN) {
 		trace_probe_log_err(offset, ARG_TOO_LONG);
-		return -EINVAL;
+		goto out;
 	} else if (len == 0) {
 		trace_probe_log_err(offset, NO_ARG_BODY);
-		return -EINVAL;
+		goto out;
 	}
 
+	ret = -ENOMEM;
 	parg->comm = kstrdup(arg, GFP_KERNEL);
 	if (!parg->comm)
-		return -ENOMEM;
+		goto out;
 
+	ret = -EINVAL;
 	t = strchr(arg, ':');
 	if (t) {
 		*t = '\0';
@@ -571,22 +589,22 @@ static int traceprobe_parse_probe_arg_body(char *arg, ssize_t *size,
 				offset += t2 + strlen(t2) - arg;
 				trace_probe_log_err(offset,
 						    ARRAY_NO_CLOSE);
-				return -EINVAL;
+				goto out;
 			} else if (t3[1] != '\0') {
 				trace_probe_log_err(offset + t3 + 1 - arg,
 						    BAD_ARRAY_SUFFIX);
-				return -EINVAL;
+				goto out;
 			}
 			*t3 = '\0';
 			if (kstrtouint(t2, 0, &parg->count) || !parg->count) {
 				trace_probe_log_err(offset + t2 - arg,
 						    BAD_ARRAY_NUM);
-				return -EINVAL;
+				goto out;
 			}
 			if (parg->count > MAX_ARRAY_LEN) {
 				trace_probe_log_err(offset + t2 - arg,
 						    ARRAY_TOO_BIG);
-				return -EINVAL;
+				goto out;
 			}
 		}
 	}
@@ -598,29 +616,30 @@ static int traceprobe_parse_probe_arg_body(char *arg, ssize_t *size,
 	if (strcmp(arg, "$comm") == 0 || strncmp(arg, "\\\"", 2) == 0) {
 		/* The type of $comm must be "string", and not an array. */
 		if (parg->count || (t && strcmp(t, "string")))
-			return -EINVAL;
+			goto out;
 		parg->type = find_fetch_type("string");
 	} else
 		parg->type = find_fetch_type(t);
 	if (!parg->type) {
 		trace_probe_log_err(offset + (t ? (t - arg) : 0), BAD_TYPE);
-		return -EINVAL;
+		goto out;
 	}
 	parg->offset = *size;
 	*size += parg->type->size * (parg->count ?: 1);
 
+	ret = -ENOMEM;
 	if (parg->count) {
 		len = strlen(parg->type->fmttype) + 6;
 		parg->fmt = kmalloc(len, GFP_KERNEL);
 		if (!parg->fmt)
-			return -ENOMEM;
+			goto out;
 		snprintf(parg->fmt, len, "%s[%d]", parg->type->fmttype,
 			 parg->count);
 	}
 
 	code = tmp = kcalloc(FETCH_INSN_MAX, sizeof(*code), GFP_KERNEL);
 	if (!code)
-		return -ENOMEM;
+		goto out;
 	code[FETCH_INSN_MAX - 1].op = FETCH_OP_END;
 
 	ret = parse_probe_arg(arg, parg->type, &code, &code[FETCH_INSN_MAX - 1],
@@ -628,19 +647,20 @@ static int traceprobe_parse_probe_arg_body(char *arg, ssize_t *size,
 	if (ret)
 		goto fail;
 
+	ret = -EINVAL;
 	/* Store operation */
 	if (!strcmp(parg->type->name, "string") ||
 	    !strcmp(parg->type->name, "ustring")) {
 		if (code->op != FETCH_OP_DEREF && code->op != FETCH_OP_UDEREF &&
 		    code->op != FETCH_OP_IMM && code->op != FETCH_OP_COMM &&
-		    code->op != FETCH_OP_DATA) {
+		    code->op != FETCH_OP_DATA && code->op != FETCH_OP_TP_ARG) {
 			trace_probe_log_err(offset + (t ? (t - arg) : 0),
 					    BAD_STRING);
-			ret = -EINVAL;
 			goto fail;
 		}
 		if ((code->op == FETCH_OP_IMM || code->op == FETCH_OP_COMM ||
-		     code->op == FETCH_OP_DATA) || parg->count) {
+		     code->op == FETCH_OP_DATA) || code->op == FETCH_OP_TP_ARG ||
+		     parg->count) {
 			/*
 			 * IMM, DATA and COMM is pointing actual address, those
 			 * must be kept, and if parg->count != 0, this is an
@@ -650,7 +670,6 @@ static int traceprobe_parse_probe_arg_body(char *arg, ssize_t *size,
 			code++;
 			if (code->op != FETCH_OP_NOP) {
 				trace_probe_log_err(offset, TOO_MANY_OPS);
-				ret = -EINVAL;
 				goto fail;
 			}
 		}
@@ -672,7 +691,6 @@ static int traceprobe_parse_probe_arg_body(char *arg, ssize_t *size,
 		code++;
 		if (code->op != FETCH_OP_NOP) {
 			trace_probe_log_err(offset, TOO_MANY_OPS);
-			ret = -EINVAL;
 			goto fail;
 		}
 		code->op = FETCH_OP_ST_RAW;
@@ -687,6 +705,7 @@ static int traceprobe_parse_probe_arg_body(char *arg, ssize_t *size,
 			goto fail;
 		}
 	}
+	ret = -EINVAL;
 	/* Loop(Array) operation */
 	if (parg->count) {
 		if (scode->op != FETCH_OP_ST_MEM &&
@@ -694,13 +713,11 @@ static int traceprobe_parse_probe_arg_body(char *arg, ssize_t *size,
 		    scode->op != FETCH_OP_ST_USTRING) {
 			trace_probe_log_err(offset + (t ? (t - arg) : 0),
 					    BAD_STRING);
-			ret = -EINVAL;
 			goto fail;
 		}
 		code++;
 		if (code->op != FETCH_OP_NOP) {
 			trace_probe_log_err(offset, TOO_MANY_OPS);
-			ret = -EINVAL;
 			goto fail;
 		}
 		code->op = FETCH_OP_LP_ARRAY;
@@ -709,6 +726,7 @@ static int traceprobe_parse_probe_arg_body(char *arg, ssize_t *size,
 	code++;
 	code->op = FETCH_OP_END;
 
+	ret = 0;
 	/* Shrink down the code buffer */
 	parg->code = kcalloc(code - tmp + 1, sizeof(*code), GFP_KERNEL);
 	if (!parg->code)
@@ -724,6 +742,8 @@ fail:
 				kfree(code->data);
 	}
 	kfree(tmp);
+out:
+	kfree(arg);
 
 	return ret;
 }
@@ -745,11 +765,11 @@ static int traceprobe_conflict_field_name(const char *name,
 	return 0;
 }
 
-int traceprobe_parse_probe_arg(struct trace_probe *tp, int i, char *arg,
+int traceprobe_parse_probe_arg(struct trace_probe *tp, int i, const char *arg,
 				unsigned int flags)
 {
 	struct probe_arg *parg = &tp->args[i];
-	char *body;
+	const char *body;
 
 	/* Increment count for freeing args in error case */
 	tp->nr_args++;
@@ -839,19 +859,29 @@ int traceprobe_update_arg(struct probe_arg *arg)
 /* When len=0, we just calculate the needed length */
 #define LEN_OR_ZERO (len ? len - pos : 0)
 static int __set_print_fmt(struct trace_probe *tp, char *buf, int len,
-			   bool is_return)
+			   enum probe_print_type ptype)
 {
 	struct probe_arg *parg;
 	int i, j;
 	int pos = 0;
 	const char *fmt, *arg;
 
-	if (!is_return) {
+	switch (ptype) {
+	case PROBE_PRINT_NORMAL:
 		fmt = "(%lx)";
 		arg = "REC->" FIELD_STRING_IP;
-	} else {
+		break;
+	case PROBE_PRINT_RETURN:
 		fmt = "(%lx <- %lx)";
 		arg = "REC->" FIELD_STRING_FUNC ", REC->" FIELD_STRING_RETIP;
+		break;
+	case PROBE_PRINT_EVENT:
+		fmt = "(%u)";
+		arg = "REC->" FIELD_STRING_TYPE;
+		break;
+	default:
+		WARN_ON_ONCE(1);
+		return 0;
 	}
 
 	pos += snprintf(buf + pos, LEN_OR_ZERO, "\"%s", fmt);
@@ -900,20 +930,20 @@ static int __set_print_fmt(struct trace_probe *tp, char *buf, int len,
 }
 #undef LEN_OR_ZERO
 
-int traceprobe_set_print_fmt(struct trace_probe *tp, bool is_return)
+int traceprobe_set_print_fmt(struct trace_probe *tp, enum probe_print_type ptype)
 {
 	struct trace_event_call *call = trace_probe_event_call(tp);
 	int len;
 	char *print_fmt;
 
 	/* First: called with 0 length to calculate the needed length */
-	len = __set_print_fmt(tp, NULL, 0, is_return);
+	len = __set_print_fmt(tp, NULL, 0, ptype);
 	print_fmt = kmalloc(len + 1, GFP_KERNEL);
 	if (!print_fmt)
 		return -ENOMEM;
 
 	/* Second: actually write the @print_fmt */
-	__set_print_fmt(tp, print_fmt, len + 1, is_return);
+	__set_print_fmt(tp, print_fmt, len + 1, ptype);
 	call->print_fmt = print_fmt;
 
 	return 0;
@@ -1029,11 +1059,36 @@ error:
 	return ret;
 }
 
+static struct trace_event_call *
+find_trace_event_call(const char *system, const char *event_name)
+{
+	struct trace_event_call *tp_event;
+	const char *name;
+
+	list_for_each_entry(tp_event, &ftrace_events, list) {
+		if (!tp_event->class->system ||
+		    strcmp(system, tp_event->class->system))
+			continue;
+		name = trace_event_name(tp_event);
+		if (!name || strcmp(event_name, name))
+			continue;
+		return tp_event;
+	}
+
+	return NULL;
+}
+
 int trace_probe_register_event_call(struct trace_probe *tp)
 {
 	struct trace_event_call *call = trace_probe_event_call(tp);
 	int ret;
 
+	lockdep_assert_held(&event_mutex);
+
+	if (find_trace_event_call(trace_probe_group_name(tp),
+				  trace_probe_name(tp)))
+		return -EEXIST;
+
 	ret = register_trace_event(&call->event);
 	if (!ret)
 		return -ENODEV;
diff --git a/kernel/trace/trace_probe.h b/kernel/trace/trace_probe.h
index 227d518e5ba5..99e7a5df025e 100644
--- a/kernel/trace/trace_probe.h
+++ b/kernel/trace/trace_probe.h
@@ -38,6 +38,7 @@
 #define FIELD_STRING_IP		"__probe_ip"
 #define FIELD_STRING_RETIP	"__probe_ret_ip"
 #define FIELD_STRING_FUNC	"__probe_func"
+#define FIELD_STRING_TYPE	"__probe_type"
 
 #undef DEFINE_FIELD
 #define DEFINE_FIELD(type, item, name, is_signed)			\
@@ -102,6 +103,7 @@ enum fetch_op {
 	FETCH_OP_MOD_BF,	/* Bitfield: .basesize, .lshift, .rshift */
 	// Stage 5 (loop) op
 	FETCH_OP_LP_ARRAY,	/* Array: .param = loop count */
+	FETCH_OP_TP_ARG,	/* Trace Point argument */
 	FETCH_OP_END,
 	FETCH_NOP_SYMBOL,	/* Unresolved Symbol holder */
 };
@@ -351,10 +353,11 @@ int trace_probe_create(const char *raw_command, int (*createfn)(int, const char
 #define TPARG_FL_RETURN BIT(0)
 #define TPARG_FL_KERNEL BIT(1)
 #define TPARG_FL_FENTRY BIT(2)
-#define TPARG_FL_MASK	GENMASK(2, 0)
+#define TPARG_FL_TPOINT BIT(3)
+#define TPARG_FL_MASK	GENMASK(3, 0)
 
 extern int traceprobe_parse_probe_arg(struct trace_probe *tp, int i,
-				char *arg, unsigned int flags);
+				const char *argv, unsigned int flags);
 
 extern int traceprobe_update_arg(struct probe_arg *arg);
 extern void traceprobe_free_probe_arg(struct probe_arg *arg);
@@ -363,7 +366,13 @@ extern int traceprobe_split_symbol_offset(char *symbol, long *offset);
 int traceprobe_parse_event_name(const char **pevent, const char **pgroup,
 				char *buf, int offset);
 
-extern int traceprobe_set_print_fmt(struct trace_probe *tp, bool is_return);
+enum probe_print_type {
+	PROBE_PRINT_NORMAL,
+	PROBE_PRINT_RETURN,
+	PROBE_PRINT_EVENT,
+};
+
+extern int traceprobe_set_print_fmt(struct trace_probe *tp, enum probe_print_type ptype);
 
 #ifdef CONFIG_PERF_EVENTS
 extern struct trace_event_call *
@@ -399,6 +408,7 @@ extern int traceprobe_define_arg_fields(struct trace_event_call *event_call,
 	C(NO_EVENT_NAME,	"Event name is not specified"),		\
 	C(EVENT_TOO_LONG,	"Event name is too long"),		\
 	C(BAD_EVENT_NAME,	"Event name must follow the same rules as C identifiers"), \
+	C(EVENT_EXIST,		"Given group/event name is already used by another event"), \
 	C(RETVAL_ON_PROBE,	"$retval is not available on probe"),	\
 	C(BAD_STACK_NUM,	"Invalid stack number"),		\
 	C(BAD_ARG_NUM,		"Invalid argument number"),		\
diff --git a/kernel/trace/trace_probe_tmpl.h b/kernel/trace/trace_probe_tmpl.h
index f003c5d02a3a..b3bdb8ddb862 100644
--- a/kernel/trace/trace_probe_tmpl.h
+++ b/kernel/trace/trace_probe_tmpl.h
@@ -54,7 +54,7 @@ fetch_apply_bitfield(struct fetch_insn *code, void *buf)
  * If dest is NULL, don't store result and return required dynamic data size.
  */
 static int
-process_fetch_insn(struct fetch_insn *code, struct pt_regs *regs,
+process_fetch_insn(struct fetch_insn *code, void *rec,
 		   void *dest, void *base);
 static nokprobe_inline int fetch_store_strlen(unsigned long addr);
 static nokprobe_inline int
@@ -188,7 +188,7 @@ __get_data_size(struct trace_probe *tp, struct pt_regs *regs)
 
 /* Store the value of each argument */
 static nokprobe_inline void
-store_trace_args(void *data, struct trace_probe *tp, struct pt_regs *regs,
+store_trace_args(void *data, struct trace_probe *tp, void *rec,
 		 int header_size, int maxlen)
 {
 	struct probe_arg *arg;
@@ -203,7 +203,7 @@ store_trace_args(void *data, struct trace_probe *tp, struct pt_regs *regs,
 		/* Point the dynamic data area if needed */
 		if (unlikely(arg->dynamic))
 			*dl = make_data_loc(maxlen, dyndata - base);
-		ret = process_fetch_insn(arg->code, regs, dl, base);
+		ret = process_fetch_insn(arg->code, rec, dl, base);
 		if (unlikely(ret < 0 && arg->dynamic)) {
 			*dl = make_data_loc(0, dyndata - base);
 		} else {
diff --git a/kernel/trace/trace_synth.h b/kernel/trace/trace_synth.h
index 4007fe95cf42..b29595fe3ac5 100644
--- a/kernel/trace/trace_synth.h
+++ b/kernel/trace/trace_synth.h
@@ -5,7 +5,7 @@
 #include "trace_dynevent.h"
 
 #define SYNTH_SYSTEM		"synthetic"
-#define SYNTH_FIELDS_MAX	32
+#define SYNTH_FIELDS_MAX	64
 
 #define STR_VAR_LEN_MAX		MAX_FILTER_STR_VAL /* must be multiple of sizeof(u64) */
 
diff --git a/kernel/trace/trace_uprobe.c b/kernel/trace/trace_uprobe.c
index 9b50869a5ddb..225ce569bf8f 100644
--- a/kernel/trace/trace_uprobe.c
+++ b/kernel/trace/trace_uprobe.c
@@ -83,10 +83,6 @@ static struct trace_uprobe *to_trace_uprobe(struct dyn_event *ev)
 	for_each_dyn_event(dpos)		\
 		if (is_trace_uprobe(dpos) && (pos = to_trace_uprobe(dpos)))
 
-#define SIZEOF_TRACE_UPROBE(n)				\
-	(offsetof(struct trace_uprobe, tp.args) +	\
-	(sizeof(struct probe_arg) * (n)))
-
 static int register_uprobe_event(struct trace_uprobe *tu);
 static int unregister_uprobe_event(struct trace_uprobe *tu);
 
@@ -217,9 +213,10 @@ static unsigned long translate_user_vaddr(unsigned long file_offset)
 
 /* Note that we don't verify it, since the code does not come from user space */
 static int
-process_fetch_insn(struct fetch_insn *code, struct pt_regs *regs, void *dest,
+process_fetch_insn(struct fetch_insn *code, void *rec, void *dest,
 		   void *base)
 {
+	struct pt_regs *regs = rec;
 	unsigned long val;
 
 	/* 1st stage: get value from context */
@@ -340,7 +337,7 @@ alloc_trace_uprobe(const char *group, const char *event, int nargs, bool is_ret)
 	struct trace_uprobe *tu;
 	int ret;
 
-	tu = kzalloc(SIZEOF_TRACE_UPROBE(nargs), GFP_KERNEL);
+	tu = kzalloc(struct_size(tu, tp.args, nargs), GFP_KERNEL);
 	if (!tu)
 		return ERR_PTR(-ENOMEM);
 
@@ -393,6 +390,10 @@ static int unregister_trace_uprobe(struct trace_uprobe *tu)
 	if (trace_probe_has_sibling(&tu->tp))
 		goto unreg;
 
+	/* If there's a reference to the dynamic event */
+	if (trace_event_dyn_busy(trace_probe_event_call(&tu->tp)))
+		return -EBUSY;
+
 	ret = unregister_uprobe_event(tu);
 	if (ret)
 		return ret;
@@ -455,7 +456,7 @@ static int append_trace_uprobe(struct trace_uprobe *tu, struct trace_uprobe *to)
 	/* Append to existing event */
 	ret = trace_probe_append(&tu->tp, &to->tp);
 	if (!ret)
-		dyn_event_add(&tu->devent);
+		dyn_event_add(&tu->devent, trace_probe_event_call(&tu->tp));
 
 	return ret;
 }
@@ -514,11 +515,15 @@ static int register_trace_uprobe(struct trace_uprobe *tu)
 
 	ret = register_uprobe_event(tu);
 	if (ret) {
-		pr_warn("Failed to register probe event(%d)\n", ret);
+		if (ret == -EEXIST) {
+			trace_probe_log_set_index(0);
+			trace_probe_log_err(0, EVENT_EXIST);
+		} else
+			pr_warn("Failed to register probe event(%d)\n", ret);
 		goto end;
 	}
 
-	dyn_event_add(&tu->devent);
+	dyn_event_add(&tu->devent, trace_probe_event_call(&tu->tp));
 
 end:
 	mutex_unlock(&event_mutex);
@@ -536,6 +541,7 @@ static int __trace_uprobe_create(int argc, const char **argv)
 	const char *event = NULL, *group = UPROBE_EVENT_SYSTEM;
 	char *arg, *filename, *rctr, *rctr_end, *tmp;
 	char buf[MAX_EVENT_NAME_LEN];
+	enum probe_print_type ptype;
 	struct path path;
 	unsigned long offset, ref_ctr_offset;
 	bool is_return = false;
@@ -680,21 +686,15 @@ static int __trace_uprobe_create(int argc, const char **argv)
 
 	/* parse arguments */
 	for (i = 0; i < argc && i < MAX_TRACE_ARGS; i++) {
-		tmp = kstrdup(argv[i], GFP_KERNEL);
-		if (!tmp) {
-			ret = -ENOMEM;
-			goto error;
-		}
-
 		trace_probe_log_set_index(i + 2);
-		ret = traceprobe_parse_probe_arg(&tu->tp, i, tmp,
+		ret = traceprobe_parse_probe_arg(&tu->tp, i, argv[i],
 					is_return ? TPARG_FL_RETURN : 0);
-		kfree(tmp);
 		if (ret)
 			goto error;
 	}
 
-	ret = traceprobe_set_print_fmt(&tu->tp, is_ret_probe(tu));
+	ptype = is_ret_probe(tu) ? PROBE_PRINT_RETURN : PROBE_PRINT_NORMAL;
+	ret = traceprobe_set_print_fmt(&tu->tp, ptype);
 	if (ret < 0)
 		goto error;
 
@@ -1585,6 +1585,7 @@ struct trace_event_call *
 create_local_trace_uprobe(char *name, unsigned long offs,
 			  unsigned long ref_ctr_offset, bool is_return)
 {
+	enum probe_print_type ptype;
 	struct trace_uprobe *tu;
 	struct path path;
 	int ret;
@@ -1619,7 +1620,8 @@ create_local_trace_uprobe(char *name, unsigned long offs,
 	tu->filename = kstrdup(name, GFP_KERNEL);
 	init_trace_event_call(tu);
 
-	if (traceprobe_set_print_fmt(&tu->tp, is_ret_probe(tu)) < 0) {
+	ptype = is_ret_probe(tu) ? PROBE_PRINT_RETURN : PROBE_PRINT_NORMAL;
+	if (traceprobe_set_print_fmt(&tu->tp, ptype) < 0) {
 		ret = -ENOMEM;
 		goto error;
 	}
diff --git a/kernel/tracepoint.c b/kernel/tracepoint.c
index efd14c79fab4..64ea283f2f86 100644
--- a/kernel/tracepoint.c
+++ b/kernel/tracepoint.c
@@ -577,7 +577,7 @@ bool trace_module_has_bad_taint(struct module *mod)
 static BLOCKING_NOTIFIER_HEAD(tracepoint_notify_list);
 
 /**
- * register_tracepoint_notifier - register tracepoint coming/going notifier
+ * register_tracepoint_module_notifier - register tracepoint coming/going notifier
  * @nb: notifier block
  *
  * Notifiers registered with this function are called on module
@@ -603,7 +603,7 @@ end:
 EXPORT_SYMBOL_GPL(register_tracepoint_module_notifier);
 
 /**
- * unregister_tracepoint_notifier - unregister tracepoint coming/going notifier
+ * unregister_tracepoint_module_notifier - unregister tracepoint coming/going notifier
  * @nb: notifier block
  *
  * The notifier block callback should expect a "struct tp_module" data
diff --git a/kernel/user.c b/kernel/user.c
index c82399c1618a..e2cf8c22b539 100644
--- a/kernel/user.c
+++ b/kernel/user.c
@@ -129,6 +129,22 @@ static struct user_struct *uid_hash_find(kuid_t uid, struct hlist_head *hashent)
 	return NULL;
 }
 
+static int user_epoll_alloc(struct user_struct *up)
+{
+#ifdef CONFIG_EPOLL
+	return percpu_counter_init(&up->epoll_watches, 0, GFP_KERNEL);
+#else
+	return 0;
+#endif
+}
+
+static void user_epoll_free(struct user_struct *up)
+{
+#ifdef CONFIG_EPOLL
+	percpu_counter_destroy(&up->epoll_watches);
+#endif
+}
+
 /* IRQs are disabled and uidhash_lock is held upon function entry.
  * IRQ state (as stored in flags) is restored and uidhash_lock released
  * upon function exit.
@@ -138,6 +154,7 @@ static void free_user(struct user_struct *up, unsigned long flags)
 {
 	uid_hash_remove(up);
 	spin_unlock_irqrestore(&uidhash_lock, flags);
+	user_epoll_free(up);
 	kmem_cache_free(uid_cachep, up);
 }
 
@@ -185,6 +202,10 @@ struct user_struct *alloc_uid(kuid_t uid)
 
 		new->uid = uid;
 		refcount_set(&new->__count, 1);
+		if (user_epoll_alloc(new)) {
+			kmem_cache_free(uid_cachep, new);
+			return NULL;
+		}
 		ratelimit_state_init(&new->ratelimit, HZ, 100);
 		ratelimit_set_flags(&new->ratelimit, RATELIMIT_MSG_ON_RELEASE);
 
@@ -195,6 +216,7 @@ struct user_struct *alloc_uid(kuid_t uid)
 		spin_lock_irq(&uidhash_lock);
 		up = uid_hash_find(uid, hashent);
 		if (up) {
+			user_epoll_free(new);
 			kmem_cache_free(uid_cachep, new);
 		} else {
 			uid_hash_insert(new, hashent);
@@ -216,6 +238,9 @@ static int __init uid_cache_init(void)
 	for(n = 0; n < UIDHASH_SZ; ++n)
 		INIT_HLIST_HEAD(uidhash_table + n);
 
+	if (user_epoll_alloc(&root_user))
+		panic("root_user epoll percpu counter alloc failed");
+
 	/* Insert the root user immediately (init already runs as root) */
 	spin_lock_irq(&uidhash_lock);
 	uid_hash_insert(&root_user, uidhashentry(GLOBAL_ROOT_UID));
diff --git a/kernel/user_namespace.c b/kernel/user_namespace.c
index ef82d401dde8..6b2e3ca7ee99 100644
--- a/kernel/user_namespace.c
+++ b/kernel/user_namespace.c
@@ -1385,7 +1385,7 @@ const struct proc_ns_operations userns_operations = {
 
 static __init int user_namespaces_init(void)
 {
-	user_ns_cachep = KMEM_CACHE(user_namespace, SLAB_PANIC);
+	user_ns_cachep = KMEM_CACHE(user_namespace, SLAB_PANIC | SLAB_ACCOUNT);
 	return 0;
 }
 subsys_initcall(user_namespaces_init);
diff --git a/lib/Kconfig.debug b/lib/Kconfig.debug
index 12b805dabbc9..ed4a31e34098 100644
--- a/lib/Kconfig.debug
+++ b/lib/Kconfig.debug
@@ -346,7 +346,7 @@ config FRAME_WARN
 	int "Warn for stack frames larger than"
 	range 0 8192
 	default 2048 if GCC_PLUGIN_LATENT_ENTROPY
-	default 1280 if (!64BIT && PARISC)
+	default 1536 if (!64BIT && PARISC)
 	default 1024 if (!64BIT && !PARISC)
 	default 2048 if 64BIT
 	help
@@ -365,6 +365,7 @@ config STRIP_ASM_SYMS
 config READABLE_ASM
 	bool "Generate readable assembler code"
 	depends on DEBUG_KERNEL
+	depends on CC_IS_GCC
 	help
 	  Disable some compiler optimizations that tend to generate human unreadable
 	  assembler output. This may make the kernel slightly slower, but it helps
@@ -383,6 +384,7 @@ config HEADERS_INSTALL
 
 config DEBUG_SECTION_MISMATCH
 	bool "Enable full Section mismatch analysis"
+	depends on CC_IS_GCC
 	help
 	  The section mismatch analysis checks if there are illegal
 	  references from one section to another section.
@@ -1062,7 +1064,6 @@ config HARDLOCKUP_DETECTOR
 	depends on HAVE_HARDLOCKUP_DETECTOR_PERF || HAVE_HARDLOCKUP_DETECTOR_ARCH
 	select LOCKUP_DETECTOR
 	select HARDLOCKUP_DETECTOR_PERF if HAVE_HARDLOCKUP_DETECTOR_PERF
-	select HARDLOCKUP_DETECTOR_ARCH if HAVE_HARDLOCKUP_DETECTOR_ARCH
 	help
 	  Say Y here to enable the kernel to act as a watchdog to detect
 	  hard lockups.
@@ -2059,8 +2060,9 @@ config TEST_MIN_HEAP
 	  If unsure, say N.
 
 config TEST_SORT
-	tristate "Array-based sort test"
-	depends on DEBUG_KERNEL || m
+	tristate "Array-based sort test" if !KUNIT_ALL_TESTS
+	depends on KUNIT
+	default KUNIT_ALL_TESTS
 	help
 	  This option enables the self-test function of 'sort()' at boot,
 	  or at module load time.
@@ -2441,8 +2443,7 @@ config SLUB_KUNIT_TEST
 
 config RATIONAL_KUNIT_TEST
 	tristate "KUnit test for rational.c" if !KUNIT_ALL_TESTS
-	depends on KUNIT
-	select RATIONAL
+	depends on KUNIT && RATIONAL
 	default KUNIT_ALL_TESTS
 	help
 	  This builds the rational math unit test.
diff --git a/lib/Kconfig.kcsan b/lib/Kconfig.kcsan
index 0440f373248e..e0a93ffdef30 100644
--- a/lib/Kconfig.kcsan
+++ b/lib/Kconfig.kcsan
@@ -40,10 +40,14 @@ menuconfig KCSAN
 
 if KCSAN
 
-# Compiler capabilities that should not fail the test if they are unavailable.
 config CC_HAS_TSAN_COMPOUND_READ_BEFORE_WRITE
 	def_bool (CC_IS_CLANG && $(cc-option,-fsanitize=thread -mllvm -tsan-compound-read-before-write=1)) || \
 		 (CC_IS_GCC && $(cc-option,-fsanitize=thread --param tsan-compound-read-before-write=1))
+	help
+	  The compiler instruments plain compound read-write operations
+	  differently (++, --, +=, -=, |=, &=, etc.), which allows KCSAN to
+	  distinguish them from other plain accesses. This is currently
+	  supported by Clang 12 or later.
 
 config KCSAN_VERBOSE
 	bool "Show verbose reports with more information about system state"
@@ -58,9 +62,6 @@ config KCSAN_VERBOSE
 	  generated from any one of them, system stability may suffer due to
 	  deadlocks or recursion.  If in doubt, say N.
 
-config KCSAN_DEBUG
-	bool "Debugging of KCSAN internals"
-
 config KCSAN_SELFTEST
 	bool "Perform short selftests on boot"
 	default y
@@ -149,7 +150,8 @@ config KCSAN_SKIP_WATCH_RANDOMIZE
 	  KCSAN_WATCH_SKIP.
 
 config KCSAN_INTERRUPT_WATCHER
-	bool "Interruptible watchers"
+	bool "Interruptible watchers" if !KCSAN_STRICT
+	default KCSAN_STRICT
 	help
 	  If enabled, a task that set up a watchpoint may be interrupted while
 	  delayed. This option will allow KCSAN to detect races between
@@ -169,13 +171,9 @@ config KCSAN_REPORT_ONCE_IN_MS
 	  reporting to avoid flooding the console with reports.  Setting this
 	  to 0 disables rate limiting.
 
-# The main purpose of the below options is to control reported data races (e.g.
-# in fuzzer configs), and are not expected to be switched frequently by other
-# users. We could turn some of them into boot parameters, but given they should
-# not be switched normally, let's keep them here to simplify configuration.
-#
-# The defaults below are chosen to be very conservative, and may miss certain
-# bugs.
+# The main purpose of the below options is to control reported data races, and
+# are not expected to be switched frequently by non-testers or at runtime.
+# The defaults are chosen to be conservative, and can miss certain bugs.
 
 config KCSAN_REPORT_RACE_UNKNOWN_ORIGIN
 	bool "Report races of unknown origin"
@@ -186,9 +184,17 @@ config KCSAN_REPORT_RACE_UNKNOWN_ORIGIN
 	  reported if it was only possible to infer a race due to a data value
 	  change while an access is being delayed on a watchpoint.
 
+config KCSAN_STRICT
+	bool "Strict data-race checking"
+	help
+	  KCSAN will report data races with the strictest possible rules, which
+	  closely aligns with the rules defined by the Linux-kernel memory
+	  consistency model (LKMM).
+
 config KCSAN_REPORT_VALUE_CHANGE_ONLY
 	bool "Only report races where watcher observed a data value change"
 	default y
+	depends on !KCSAN_STRICT
 	help
 	  If enabled and a conflicting write is observed via a watchpoint, but
 	  the data value of the memory location was observed to remain
@@ -197,6 +203,7 @@ config KCSAN_REPORT_VALUE_CHANGE_ONLY
 config KCSAN_ASSUME_PLAIN_WRITES_ATOMIC
 	bool "Assume that plain aligned writes up to word size are atomic"
 	default y
+	depends on !KCSAN_STRICT
 	help
 	  Assume that plain aligned writes up to word size are atomic by
 	  default, and also not subject to other unsafe compiler optimizations
@@ -209,6 +216,7 @@ config KCSAN_ASSUME_PLAIN_WRITES_ATOMIC
 
 config KCSAN_IGNORE_ATOMICS
 	bool "Do not instrument marked atomic accesses"
+	depends on !KCSAN_STRICT
 	help
 	  Never instrument marked atomic accesses. This option can be used for
 	  additional filtering. Conflicting marked atomic reads and plain
@@ -224,4 +232,14 @@ config KCSAN_IGNORE_ATOMICS
 	  due to two conflicting plain writes will be reported (aligned and
 	  unaligned, if CONFIG_KCSAN_ASSUME_PLAIN_WRITES_ATOMIC=n).
 
+config KCSAN_PERMISSIVE
+	bool "Enable all additional permissive rules"
+	depends on KCSAN_REPORT_VALUE_CHANGE_ONLY
+	help
+	  Enable additional permissive rules to ignore certain classes of data
+	  races (also see kernel/kcsan/permissive.h). None of the permissive
+	  rules imply that such data races are generally safe, but can be used
+	  to further reduce reported data races due to data-racy patterns
+	  common across the kernel.
+
 endif # KCSAN
diff --git a/lib/bootconfig.c b/lib/bootconfig.c
index 927017431fb6..f8419cff1147 100644
--- a/lib/bootconfig.c
+++ b/lib/bootconfig.c
@@ -142,16 +142,16 @@ xbc_node_match_prefix(struct xbc_node *node, const char **prefix)
 }
 
 /**
- * xbc_node_find_child() - Find a child node which matches given key
+ * xbc_node_find_subkey() - Find a subkey node which matches given key
  * @parent: An XBC node.
  * @key: A key string.
  *
- * Search a node under @parent which matches @key. The @key can contain
+ * Search a key node under @parent which matches @key. The @key can contain
  * several words jointed with '.'. If @parent is NULL, this searches the
  * node from whole tree. Return NULL if no node is matched.
  */
 struct xbc_node * __init
-xbc_node_find_child(struct xbc_node *parent, const char *key)
+xbc_node_find_subkey(struct xbc_node *parent, const char *key)
 {
 	struct xbc_node *node;
 
@@ -191,7 +191,7 @@ const char * __init
 xbc_node_find_value(struct xbc_node *parent, const char *key,
 		    struct xbc_node **vnode)
 {
-	struct xbc_node *node = xbc_node_find_child(parent, key);
+	struct xbc_node *node = xbc_node_find_subkey(parent, key);
 
 	if (!node || !xbc_node_is_key(node))
 		return NULL;
diff --git a/lib/debug_info.c b/lib/debug_info.c
index 36daf753293c..cc4723c74af5 100644
--- a/lib/debug_info.c
+++ b/lib/debug_info.c
@@ -5,8 +5,6 @@
  * CONFIG_DEBUG_INFO_REDUCED. Please do not add actual code. However,
  * adding appropriate #includes is fine.
  */
-#include <stdarg.h>
-
 #include <linux/cred.h>
 #include <linux/crypto.h>
 #include <linux/dcache.h>
@@ -22,6 +20,7 @@
 #include <linux/net.h>
 #include <linux/sched.h>
 #include <linux/slab.h>
+#include <linux/stdarg.h>
 #include <linux/types.h>
 #include <net/addrconf.h>
 #include <net/sock.h>
diff --git a/lib/dump_stack.c b/lib/dump_stack.c
index cd3387bb34e5..6b7f1bf6715d 100644
--- a/lib/dump_stack.c
+++ b/lib/dump_stack.c
@@ -89,7 +89,8 @@ static void __dump_stack(const char *log_lvl)
 }
 
 /**
- * dump_stack - dump the current task information and its stack trace
+ * dump_stack_lvl - dump the current task information and its stack trace
+ * @log_lvl: log level
  *
  * Architectures can override this implementation by implementing its own.
  */
diff --git a/lib/iov_iter.c b/lib/iov_iter.c
index e23123ae3a13..f2d50d69a6c3 100644
--- a/lib/iov_iter.c
+++ b/lib/iov_iter.c
@@ -672,7 +672,7 @@ static size_t copy_mc_pipe_to_iter(const void *addr, size_t bytes,
  * _copy_mc_to_iter - copy to iter with source memory error exception handling
  * @addr: source kernel address
  * @bytes: total transfer length
- * @iter: destination iterator
+ * @i: destination iterator
  *
  * The pmem driver deploys this for the dax operation
  * (dax_copy_to_iter()) for dax reads (bypass page-cache and the
@@ -690,6 +690,8 @@ static size_t copy_mc_pipe_to_iter(const void *addr, size_t bytes,
  * * ITER_KVEC, ITER_PIPE, and ITER_BVEC can return short copies.
  *   Compare to copy_to_iter() where only ITER_IOVEC attempts might return
  *   a short copy.
+ *
+ * Return: number of bytes copied (may be %0)
  */
 size_t _copy_mc_to_iter(const void *addr, size_t bytes, struct iov_iter *i)
 {
@@ -744,7 +746,7 @@ EXPORT_SYMBOL(_copy_from_iter_nocache);
  * _copy_from_iter_flushcache - write destination through cpu cache
  * @addr: destination kernel address
  * @bytes: total transfer length
- * @iter: source iterator
+ * @i: source iterator
  *
  * The pmem driver arranges for filesystem-dax to use this facility via
  * dax_copy_from_iter() for ensuring that writes to persistent memory
@@ -753,6 +755,8 @@ EXPORT_SYMBOL(_copy_from_iter_nocache);
  * all iterator types. The _copy_from_iter_nocache() only attempts to
  * bypass the cache for the ITER_IOVEC case, and on some archs may use
  * instructions that strand dirty-data in the cache.
+ *
+ * Return: number of bytes copied (may be %0)
  */
 size_t _copy_from_iter_flushcache(void *addr, size_t bytes, struct iov_iter *i)
 {
diff --git a/lib/kasprintf.c b/lib/kasprintf.c
index bacf7b83ccf0..cd2f5974ed98 100644
--- a/lib/kasprintf.c
+++ b/lib/kasprintf.c
@@ -5,7 +5,7 @@
  *  Copyright (C) 1991, 1992  Linus Torvalds
  */
 
-#include <stdarg.h>
+#include <linux/stdarg.h>
 #include <linux/export.h>
 #include <linux/slab.h>
 #include <linux/types.h>
diff --git a/lib/kunit/string-stream.h b/lib/kunit/string-stream.h
index 5e94b623454f..43f9508a55b4 100644
--- a/lib/kunit/string-stream.h
+++ b/lib/kunit/string-stream.h
@@ -11,7 +11,7 @@
 
 #include <linux/spinlock.h>
 #include <linux/types.h>
-#include <stdarg.h>
+#include <linux/stdarg.h>
 
 struct string_stream_fragment {
 	struct kunit *test;
diff --git a/lib/kunit/test.c b/lib/kunit/test.c
index d79ecb86ea57..f246b847024e 100644
--- a/lib/kunit/test.c
+++ b/lib/kunit/test.c
@@ -10,6 +10,7 @@
 #include <kunit/test-bug.h>
 #include <linux/kernel.h>
 #include <linux/kref.h>
+#include <linux/moduleparam.h>
 #include <linux/sched/debug.h>
 #include <linux/sched.h>
 
@@ -52,6 +53,51 @@ EXPORT_SYMBOL_GPL(__kunit_fail_current_test);
 #endif
 
 /*
+ * KUnit statistic mode:
+ * 0 - disabled
+ * 1 - only when there is more than one subtest
+ * 2 - enabled
+ */
+static int kunit_stats_enabled = 1;
+module_param_named(stats_enabled, kunit_stats_enabled, int, 0644);
+MODULE_PARM_DESC(stats_enabled,
+		  "Print test stats: never (0), only for multiple subtests (1), or always (2)");
+
+struct kunit_result_stats {
+	unsigned long passed;
+	unsigned long skipped;
+	unsigned long failed;
+	unsigned long total;
+};
+
+static bool kunit_should_print_stats(struct kunit_result_stats stats)
+{
+	if (kunit_stats_enabled == 0)
+		return false;
+
+	if (kunit_stats_enabled == 2)
+		return true;
+
+	return (stats.total > 1);
+}
+
+static void kunit_print_test_stats(struct kunit *test,
+				   struct kunit_result_stats stats)
+{
+	if (!kunit_should_print_stats(stats))
+		return;
+
+	kunit_log(KERN_INFO, test,
+		  KUNIT_SUBTEST_INDENT
+		  "# %s: pass:%lu fail:%lu skip:%lu total:%lu",
+		  test->name,
+		  stats.passed,
+		  stats.failed,
+		  stats.skipped,
+		  stats.total);
+}
+
+/*
  * Append formatted message to log, size of which is limited to
  * KUNIT_LOG_SIZE bytes (including null terminating byte).
  */
@@ -393,15 +439,69 @@ static void kunit_run_case_catch_errors(struct kunit_suite *suite,
 		test_case->status = KUNIT_SUCCESS;
 }
 
+static void kunit_print_suite_stats(struct kunit_suite *suite,
+				    struct kunit_result_stats suite_stats,
+				    struct kunit_result_stats param_stats)
+{
+	if (kunit_should_print_stats(suite_stats)) {
+		kunit_log(KERN_INFO, suite,
+			  "# %s: pass:%lu fail:%lu skip:%lu total:%lu",
+			  suite->name,
+			  suite_stats.passed,
+			  suite_stats.failed,
+			  suite_stats.skipped,
+			  suite_stats.total);
+	}
+
+	if (kunit_should_print_stats(param_stats)) {
+		kunit_log(KERN_INFO, suite,
+			  "# Totals: pass:%lu fail:%lu skip:%lu total:%lu",
+			  param_stats.passed,
+			  param_stats.failed,
+			  param_stats.skipped,
+			  param_stats.total);
+	}
+}
+
+static void kunit_update_stats(struct kunit_result_stats *stats,
+			       enum kunit_status status)
+{
+	switch (status) {
+	case KUNIT_SUCCESS:
+		stats->passed++;
+		break;
+	case KUNIT_SKIPPED:
+		stats->skipped++;
+		break;
+	case KUNIT_FAILURE:
+		stats->failed++;
+		break;
+	}
+
+	stats->total++;
+}
+
+static void kunit_accumulate_stats(struct kunit_result_stats *total,
+				   struct kunit_result_stats add)
+{
+	total->passed += add.passed;
+	total->skipped += add.skipped;
+	total->failed += add.failed;
+	total->total += add.total;
+}
+
 int kunit_run_tests(struct kunit_suite *suite)
 {
 	char param_desc[KUNIT_PARAM_DESC_SIZE];
 	struct kunit_case *test_case;
+	struct kunit_result_stats suite_stats = { 0 };
+	struct kunit_result_stats total_stats = { 0 };
 
 	kunit_print_subtest_start(suite);
 
 	kunit_suite_for_each_test_case(suite, test_case) {
 		struct kunit test = { .param_value = NULL, .param_index = 0 };
+		struct kunit_result_stats param_stats = { 0 };
 		test_case->status = KUNIT_SKIPPED;
 
 		if (test_case->generate_params) {
@@ -431,14 +531,23 @@ int kunit_run_tests(struct kunit_suite *suite)
 				test.param_value = test_case->generate_params(test.param_value, param_desc);
 				test.param_index++;
 			}
+
+			kunit_update_stats(&param_stats, test.status);
+
 		} while (test.param_value);
 
+		kunit_print_test_stats(&test, param_stats);
+
 		kunit_print_ok_not_ok(&test, true, test_case->status,
 				      kunit_test_case_num(suite, test_case),
 				      test_case->name,
 				      test.status_comment);
+
+		kunit_update_stats(&suite_stats, test_case->status);
+		kunit_accumulate_stats(&total_stats, param_stats);
 	}
 
+	kunit_print_suite_stats(suite, suite_stats, total_stats);
 	kunit_print_subtest_end(suite);
 
 	return 0;
diff --git a/lib/logic_iomem.c b/lib/logic_iomem.c
index b76b92dd0f1f..9bdfde0c0f86 100644
--- a/lib/logic_iomem.c
+++ b/lib/logic_iomem.c
@@ -6,6 +6,7 @@
 #include <linux/types.h>
 #include <linux/slab.h>
 #include <linux/logic_iomem.h>
+#include <asm/io.h>
 
 struct logic_iomem_region {
 	const struct resource *res;
@@ -78,7 +79,7 @@ static void __iomem *real_ioremap(phys_addr_t offset, size_t size)
 static void real_iounmap(void __iomem *addr)
 {
 	WARN(1, "invalid iounmap for addr 0x%llx\n",
-	     (unsigned long long)addr);
+	     (unsigned long long __force)addr);
 }
 #endif /* CONFIG_LOGIC_IOMEM_FALLBACK */
 
@@ -172,14 +173,15 @@ EXPORT_SYMBOL(iounmap);
 static u##sz real_raw_read ## op(const volatile void __iomem *addr)	\
 {									\
 	WARN(1, "Invalid read" #op " at address %llx\n",		\
-	     (unsigned long long)addr);					\
+	     (unsigned long long __force)addr);				\
 	return (u ## sz)~0ULL;						\
 }									\
 									\
-void real_raw_write ## op(u ## sz val, volatile void __iomem *addr)	\
+static void real_raw_write ## op(u ## sz val,				\
+				 volatile void __iomem *addr)		\
 {									\
 	WARN(1, "Invalid writeq" #op " of 0x%llx at address %llx\n",	\
-	     (unsigned long long)val, (unsigned long long)addr);	\
+	     (unsigned long long)val, (unsigned long long __force)addr);\
 }									\
 
 MAKE_FALLBACK(b, 8);
@@ -192,14 +194,14 @@ MAKE_FALLBACK(q, 64);
 static void real_memset_io(volatile void __iomem *addr, int value, size_t size)
 {
 	WARN(1, "Invalid memset_io at address 0x%llx\n",
-	     (unsigned long long)addr);
+	     (unsigned long long __force)addr);
 }
 
 static void real_memcpy_fromio(void *buffer, const volatile void __iomem *addr,
 			       size_t size)
 {
 	WARN(1, "Invalid memcpy_fromio at address 0x%llx\n",
-	     (unsigned long long)addr);
+	     (unsigned long long __force)addr);
 
 	memset(buffer, 0xff, size);
 }
@@ -208,7 +210,7 @@ static void real_memcpy_toio(volatile void __iomem *addr, const void *buffer,
 			     size_t size)
 {
 	WARN(1, "Invalid memcpy_toio at address 0x%llx\n",
-	     (unsigned long long)addr);
+	     (unsigned long long __force)addr);
 }
 #endif /* CONFIG_LOGIC_IOMEM_FALLBACK */
 
diff --git a/lib/math/Kconfig b/lib/math/Kconfig
index f19bc9734fa7..0634b428d0cb 100644
--- a/lib/math/Kconfig
+++ b/lib/math/Kconfig
@@ -14,4 +14,4 @@ config PRIME_NUMBERS
 	  If unsure, say N.
 
 config RATIONAL
-	bool
+	tristate
diff --git a/lib/math/rational.c b/lib/math/rational.c
index c0ab51d8fbb9..ec59d426ea63 100644
--- a/lib/math/rational.c
+++ b/lib/math/rational.c
@@ -13,6 +13,7 @@
 #include <linux/export.h>
 #include <linux/minmax.h>
 #include <linux/limits.h>
+#include <linux/module.h>
 
 /*
  * calculate best rational approximation for a given fraction
@@ -106,3 +107,5 @@ void rational_best_approximation(
 }
 
 EXPORT_SYMBOL(rational_best_approximation);
+
+MODULE_LICENSE("GPL v2");
diff --git a/lib/scatterlist.c b/lib/scatterlist.c
index 27efa6178153..abb3432ed744 100644
--- a/lib/scatterlist.c
+++ b/lib/scatterlist.c
@@ -182,6 +182,7 @@ static void sg_kfree(struct scatterlist *sg, unsigned int nents)
  * @nents_first_chunk: Number of entries int the (preallocated) first
  * 	scatterlist chunk, 0 means no such preallocated first chunk
  * @free_fn:	Free function
+ * @num_ents:	Number of entries in the table
  *
  *  Description:
  *    Free an sg table previously allocated and setup with
@@ -190,7 +191,8 @@ static void sg_kfree(struct scatterlist *sg, unsigned int nents)
  *
  **/
 void __sg_free_table(struct sg_table *table, unsigned int max_ents,
-		     unsigned int nents_first_chunk, sg_free_fn *free_fn)
+		     unsigned int nents_first_chunk, sg_free_fn *free_fn,
+		     unsigned int num_ents)
 {
 	struct scatterlist *sgl, *next;
 	unsigned curr_max_ents = nents_first_chunk ?: max_ents;
@@ -199,8 +201,8 @@ void __sg_free_table(struct sg_table *table, unsigned int max_ents,
 		return;
 
 	sgl = table->sgl;
-	while (table->orig_nents) {
-		unsigned int alloc_size = table->orig_nents;
+	while (num_ents) {
+		unsigned int alloc_size = num_ents;
 		unsigned int sg_size;
 
 		/*
@@ -218,7 +220,7 @@ void __sg_free_table(struct sg_table *table, unsigned int max_ents,
 			next = NULL;
 		}
 
-		table->orig_nents -= sg_size;
+		num_ents -= sg_size;
 		if (nents_first_chunk)
 			nents_first_chunk = 0;
 		else
@@ -232,13 +234,27 @@ void __sg_free_table(struct sg_table *table, unsigned int max_ents,
 EXPORT_SYMBOL(__sg_free_table);
 
 /**
+ * sg_free_append_table - Free a previously allocated append sg table.
+ * @table:	 The mapped sg append table header
+ *
+ **/
+void sg_free_append_table(struct sg_append_table *table)
+{
+	__sg_free_table(&table->sgt, SG_MAX_SINGLE_ALLOC, false, sg_kfree,
+			table->total_nents);
+}
+EXPORT_SYMBOL(sg_free_append_table);
+
+
+/**
  * sg_free_table - Free a previously allocated sg table
  * @table:	The mapped sg table header
  *
  **/
 void sg_free_table(struct sg_table *table)
 {
-	__sg_free_table(table, SG_MAX_SINGLE_ALLOC, false, sg_kfree);
+	__sg_free_table(table, SG_MAX_SINGLE_ALLOC, false, sg_kfree,
+			table->orig_nents);
 }
 EXPORT_SYMBOL(sg_free_table);
 
@@ -359,13 +375,12 @@ int sg_alloc_table(struct sg_table *table, unsigned int nents, gfp_t gfp_mask)
 	ret = __sg_alloc_table(table, nents, SG_MAX_SINGLE_ALLOC,
 			       NULL, 0, gfp_mask, sg_kmalloc);
 	if (unlikely(ret))
-		__sg_free_table(table, SG_MAX_SINGLE_ALLOC, 0, sg_kfree);
-
+		sg_free_table(table);
 	return ret;
 }
 EXPORT_SYMBOL(sg_alloc_table);
 
-static struct scatterlist *get_next_sg(struct sg_table *table,
+static struct scatterlist *get_next_sg(struct sg_append_table *table,
 				       struct scatterlist *cur,
 				       unsigned long needed_sges,
 				       gfp_t gfp_mask)
@@ -386,54 +401,52 @@ static struct scatterlist *get_next_sg(struct sg_table *table,
 		return ERR_PTR(-ENOMEM);
 	sg_init_table(new_sg, alloc_size);
 	if (cur) {
+		table->total_nents += alloc_size - 1;
 		__sg_chain(next_sg, new_sg);
-		table->orig_nents += alloc_size - 1;
 	} else {
-		table->sgl = new_sg;
-		table->orig_nents = alloc_size;
-		table->nents = 0;
+		table->sgt.sgl = new_sg;
+		table->total_nents = alloc_size;
 	}
 	return new_sg;
 }
 
 /**
- * __sg_alloc_table_from_pages - Allocate and initialize an sg table from
- *			         an array of pages
- * @sgt:	 The sg table header to use
- * @pages:	 Pointer to an array of page pointers
- * @n_pages:	 Number of pages in the pages array
+ * sg_alloc_append_table_from_pages - Allocate and initialize an append sg
+ *                                    table from an array of pages
+ * @sgt_append:  The sg append table to use
+ * @pages:       Pointer to an array of page pointers
+ * @n_pages:     Number of pages in the pages array
  * @offset:      Offset from start of the first page to the start of a buffer
  * @size:        Number of valid bytes in the buffer (after offset)
  * @max_segment: Maximum size of a scatterlist element in bytes
- * @prv:	 Last populated sge in sgt
  * @left_pages:  Left pages caller have to set after this call
  * @gfp_mask:	 GFP allocation mask
  *
  * Description:
- *    If @prv is NULL, allocate and initialize an sg table from a list of pages,
- *    else reuse the scatterlist passed in at @prv.
- *    Contiguous ranges of the pages are squashed into a single scatterlist
- *    entry up to the maximum size specified in @max_segment.  A user may
- *    provide an offset at a start and a size of valid data in a buffer
- *    specified by the page array.
+ *    In the first call it allocate and initialize an sg table from a list of
+ *    pages, else reuse the scatterlist from sgt_append. Contiguous ranges of
+ *    the pages are squashed into a single scatterlist entry up to the maximum
+ *    size specified in @max_segment.  A user may provide an offset at a start
+ *    and a size of valid data in a buffer specified by the page array. The
+ *    returned sg table is released by sg_free_append_table
  *
  * Returns:
- *   Last SGE in sgt on success, PTR_ERR on otherwise.
- *   The allocation in @sgt must be released by sg_free_table.
+ *   0 on success, negative error on failure
  *
  * Notes:
  *   If this function returns non-0 (eg failure), the caller must call
- *   sg_free_table() to cleanup any leftover allocations.
+ *   sg_free_append_table() to cleanup any leftover allocations.
+ *
+ *   In the fist call, sgt_append must by initialized.
  */
-struct scatterlist *__sg_alloc_table_from_pages(struct sg_table *sgt,
+int sg_alloc_append_table_from_pages(struct sg_append_table *sgt_append,
 		struct page **pages, unsigned int n_pages, unsigned int offset,
 		unsigned long size, unsigned int max_segment,
-		struct scatterlist *prv, unsigned int left_pages,
-		gfp_t gfp_mask)
+		unsigned int left_pages, gfp_t gfp_mask)
 {
 	unsigned int chunks, cur_page, seg_len, i, prv_len = 0;
 	unsigned int added_nents = 0;
-	struct scatterlist *s = prv;
+	struct scatterlist *s = sgt_append->prv;
 
 	/*
 	 * The algorithm below requires max_segment to be aligned to PAGE_SIZE
@@ -441,25 +454,26 @@ struct scatterlist *__sg_alloc_table_from_pages(struct sg_table *sgt,
 	 */
 	max_segment = ALIGN_DOWN(max_segment, PAGE_SIZE);
 	if (WARN_ON(max_segment < PAGE_SIZE))
-		return ERR_PTR(-EINVAL);
+		return -EINVAL;
 
-	if (IS_ENABLED(CONFIG_ARCH_NO_SG_CHAIN) && prv)
-		return ERR_PTR(-EOPNOTSUPP);
+	if (IS_ENABLED(CONFIG_ARCH_NO_SG_CHAIN) && sgt_append->prv)
+		return -EOPNOTSUPP;
 
-	if (prv) {
-		unsigned long paddr = (page_to_pfn(sg_page(prv)) * PAGE_SIZE +
-				       prv->offset + prv->length) /
-				      PAGE_SIZE;
+	if (sgt_append->prv) {
+		unsigned long paddr =
+			(page_to_pfn(sg_page(sgt_append->prv)) * PAGE_SIZE +
+			 sgt_append->prv->offset + sgt_append->prv->length) /
+			PAGE_SIZE;
 
 		if (WARN_ON(offset))
-			return ERR_PTR(-EINVAL);
+			return -EINVAL;
 
 		/* Merge contiguous pages into the last SG */
-		prv_len = prv->length;
+		prv_len = sgt_append->prv->length;
 		while (n_pages && page_to_pfn(pages[0]) == paddr) {
-			if (prv->length + PAGE_SIZE > max_segment)
+			if (sgt_append->prv->length + PAGE_SIZE > max_segment)
 				break;
-			prv->length += PAGE_SIZE;
+			sgt_append->prv->length += PAGE_SIZE;
 			paddr++;
 			pages++;
 			n_pages--;
@@ -496,15 +510,16 @@ struct scatterlist *__sg_alloc_table_from_pages(struct sg_table *sgt,
 		}
 
 		/* Pass how many chunks might be left */
-		s = get_next_sg(sgt, s, chunks - i + left_pages, gfp_mask);
+		s = get_next_sg(sgt_append, s, chunks - i + left_pages,
+				gfp_mask);
 		if (IS_ERR(s)) {
 			/*
 			 * Adjust entry length to be as before function was
 			 * called.
 			 */
-			if (prv)
-				prv->length = prv_len;
-			return s;
+			if (sgt_append->prv)
+				sgt_append->prv->length = prv_len;
+			return PTR_ERR(s);
 		}
 		chunk_size = ((j - cur_page) << PAGE_SHIFT) - offset;
 		sg_set_page(s, pages[cur_page],
@@ -514,42 +529,58 @@ struct scatterlist *__sg_alloc_table_from_pages(struct sg_table *sgt,
 		offset = 0;
 		cur_page = j;
 	}
-	sgt->nents += added_nents;
+	sgt_append->sgt.nents += added_nents;
+	sgt_append->sgt.orig_nents = sgt_append->sgt.nents;
+	sgt_append->prv = s;
 out:
 	if (!left_pages)
 		sg_mark_end(s);
-	return s;
+	return 0;
 }
-EXPORT_SYMBOL(__sg_alloc_table_from_pages);
+EXPORT_SYMBOL(sg_alloc_append_table_from_pages);
 
 /**
- * sg_alloc_table_from_pages - Allocate and initialize an sg table from
- *			       an array of pages
+ * sg_alloc_table_from_pages_segment - Allocate and initialize an sg table from
+ *                                     an array of pages and given maximum
+ *                                     segment.
  * @sgt:	 The sg table header to use
  * @pages:	 Pointer to an array of page pointers
  * @n_pages:	 Number of pages in the pages array
  * @offset:      Offset from start of the first page to the start of a buffer
  * @size:        Number of valid bytes in the buffer (after offset)
+ * @max_segment: Maximum size of a scatterlist element in bytes
  * @gfp_mask:	 GFP allocation mask
  *
  *  Description:
  *    Allocate and initialize an sg table from a list of pages. Contiguous
- *    ranges of the pages are squashed into a single scatterlist node. A user
- *    may provide an offset at a start and a size of valid data in a buffer
- *    specified by the page array. The returned sg table is released by
- *    sg_free_table.
+ *    ranges of the pages are squashed into a single scatterlist node up to the
+ *    maximum size specified in @max_segment. A user may provide an offset at a
+ *    start and a size of valid data in a buffer specified by the page array.
  *
- * Returns:
+ *    The returned sg table is released by sg_free_table.
+ *
+ *  Returns:
  *   0 on success, negative error on failure
  */
-int sg_alloc_table_from_pages(struct sg_table *sgt, struct page **pages,
-			      unsigned int n_pages, unsigned int offset,
-			      unsigned long size, gfp_t gfp_mask)
+int sg_alloc_table_from_pages_segment(struct sg_table *sgt, struct page **pages,
+				unsigned int n_pages, unsigned int offset,
+				unsigned long size, unsigned int max_segment,
+				gfp_t gfp_mask)
 {
-	return PTR_ERR_OR_ZERO(__sg_alloc_table_from_pages(sgt, pages, n_pages,
-			offset, size, UINT_MAX, NULL, 0, gfp_mask));
+	struct sg_append_table append = {};
+	int err;
+
+	err = sg_alloc_append_table_from_pages(&append, pages, n_pages, offset,
+					       size, max_segment, 0, gfp_mask);
+	if (err) {
+		sg_free_append_table(&append);
+		return err;
+	}
+	memcpy(sgt, &append.sgt, sizeof(*sgt));
+	WARN_ON(append.total_nents != sgt->orig_nents);
+	return 0;
 }
-EXPORT_SYMBOL(sg_alloc_table_from_pages);
+EXPORT_SYMBOL(sg_alloc_table_from_pages_segment);
 
 #ifdef CONFIG_SGL_ALLOC
 
@@ -887,9 +918,8 @@ void sg_miter_stop(struct sg_mapping_iter *miter)
 		miter->__offset += miter->consumed;
 		miter->__remaining -= miter->consumed;
 
-		if ((miter->__flags & SG_MITER_TO_SG) &&
-		    !PageSlab(miter->page))
-			flush_kernel_dcache_page(miter->page);
+		if (miter->__flags & SG_MITER_TO_SG)
+			flush_dcache_page(miter->page);
 
 		if (miter->__flags & SG_MITER_ATOMIC) {
 			WARN_ON_ONCE(preemptible());
diff --git a/lib/sg_pool.c b/lib/sg_pool.c
index db29e5c1f790..a0b1a52cd6f7 100644
--- a/lib/sg_pool.c
+++ b/lib/sg_pool.c
@@ -90,7 +90,8 @@ void sg_free_table_chained(struct sg_table *table,
 	if (nents_first_chunk == 1)
 		nents_first_chunk = 0;
 
-	__sg_free_table(table, SG_CHUNK_SIZE, nents_first_chunk, sg_pool_free);
+	__sg_free_table(table, SG_CHUNK_SIZE, nents_first_chunk, sg_pool_free,
+			table->orig_nents);
 }
 EXPORT_SYMBOL_GPL(sg_free_table_chained);
 
diff --git a/lib/test_kasan.c b/lib/test_kasan.c
index 8be9d4b3b259..8835e0784578 100644
--- a/lib/test_kasan.c
+++ b/lib/test_kasan.c
@@ -120,12 +120,28 @@ static void kasan_test_exit(struct kunit *test)
 static void kmalloc_oob_right(struct kunit *test)
 {
 	char *ptr;
-	size_t size = 123;
+	size_t size = 128 - KASAN_GRANULE_SIZE - 5;
 
 	ptr = kmalloc(size, GFP_KERNEL);
 	KUNIT_ASSERT_NOT_ERR_OR_NULL(test, ptr);
 
-	KUNIT_EXPECT_KASAN_FAIL(test, ptr[size + OOB_TAG_OFF] = 'x');
+	/*
+	 * An unaligned access past the requested kmalloc size.
+	 * Only generic KASAN can precisely detect these.
+	 */
+	if (IS_ENABLED(CONFIG_KASAN_GENERIC))
+		KUNIT_EXPECT_KASAN_FAIL(test, ptr[size] = 'x');
+
+	/*
+	 * An aligned access into the first out-of-bounds granule that falls
+	 * within the aligned kmalloc object.
+	 */
+	KUNIT_EXPECT_KASAN_FAIL(test, ptr[size + 5] = 'y');
+
+	/* Out-of-bounds access past the aligned kmalloc object. */
+	KUNIT_EXPECT_KASAN_FAIL(test, ptr[0] =
+					ptr[size + KASAN_GRANULE_SIZE + 5]);
+
 	kfree(ptr);
 }
 
@@ -149,7 +165,7 @@ static void kmalloc_node_oob_right(struct kunit *test)
 	ptr = kmalloc_node(size, GFP_KERNEL, 0);
 	KUNIT_ASSERT_NOT_ERR_OR_NULL(test, ptr);
 
-	KUNIT_EXPECT_KASAN_FAIL(test, ptr[size] = 0);
+	KUNIT_EXPECT_KASAN_FAIL(test, ptr[0] = ptr[size]);
 	kfree(ptr);
 }
 
@@ -185,7 +201,7 @@ static void kmalloc_pagealloc_uaf(struct kunit *test)
 	KUNIT_ASSERT_NOT_ERR_OR_NULL(test, ptr);
 	kfree(ptr);
 
-	KUNIT_EXPECT_KASAN_FAIL(test, ptr[0] = 0);
+	KUNIT_EXPECT_KASAN_FAIL(test, ((volatile char *)ptr)[0]);
 }
 
 static void kmalloc_pagealloc_invalid_free(struct kunit *test)
@@ -219,7 +235,7 @@ static void pagealloc_oob_right(struct kunit *test)
 	ptr = page_address(pages);
 	KUNIT_ASSERT_NOT_ERR_OR_NULL(test, ptr);
 
-	KUNIT_EXPECT_KASAN_FAIL(test, ptr[size] = 0);
+	KUNIT_EXPECT_KASAN_FAIL(test, ptr[0] = ptr[size]);
 	free_pages((unsigned long)ptr, order);
 }
 
@@ -234,7 +250,7 @@ static void pagealloc_uaf(struct kunit *test)
 	KUNIT_ASSERT_NOT_ERR_OR_NULL(test, ptr);
 	free_pages((unsigned long)ptr, order);
 
-	KUNIT_EXPECT_KASAN_FAIL(test, ptr[0] = 0);
+	KUNIT_EXPECT_KASAN_FAIL(test, ((volatile char *)ptr)[0]);
 }
 
 static void kmalloc_large_oob_right(struct kunit *test)
@@ -410,64 +426,70 @@ static void kmalloc_uaf_16(struct kunit *test)
 	kfree(ptr1);
 }
 
+/*
+ * Note: in the memset tests below, the written range touches both valid and
+ * invalid memory. This makes sure that the instrumentation does not only check
+ * the starting address but the whole range.
+ */
+
 static void kmalloc_oob_memset_2(struct kunit *test)
 {
 	char *ptr;
-	size_t size = 8;
+	size_t size = 128 - KASAN_GRANULE_SIZE;
 
 	ptr = kmalloc(size, GFP_KERNEL);
 	KUNIT_ASSERT_NOT_ERR_OR_NULL(test, ptr);
 
-	KUNIT_EXPECT_KASAN_FAIL(test, memset(ptr + 7 + OOB_TAG_OFF, 0, 2));
+	KUNIT_EXPECT_KASAN_FAIL(test, memset(ptr + size - 1, 0, 2));
 	kfree(ptr);
 }
 
 static void kmalloc_oob_memset_4(struct kunit *test)
 {
 	char *ptr;
-	size_t size = 8;
+	size_t size = 128 - KASAN_GRANULE_SIZE;
 
 	ptr = kmalloc(size, GFP_KERNEL);
 	KUNIT_ASSERT_NOT_ERR_OR_NULL(test, ptr);
 
-	KUNIT_EXPECT_KASAN_FAIL(test, memset(ptr + 5 + OOB_TAG_OFF, 0, 4));
+	KUNIT_EXPECT_KASAN_FAIL(test, memset(ptr + size - 3, 0, 4));
 	kfree(ptr);
 }
 
-
 static void kmalloc_oob_memset_8(struct kunit *test)
 {
 	char *ptr;
-	size_t size = 8;
+	size_t size = 128 - KASAN_GRANULE_SIZE;
 
 	ptr = kmalloc(size, GFP_KERNEL);
 	KUNIT_ASSERT_NOT_ERR_OR_NULL(test, ptr);
 
-	KUNIT_EXPECT_KASAN_FAIL(test, memset(ptr + 1 + OOB_TAG_OFF, 0, 8));
+	KUNIT_EXPECT_KASAN_FAIL(test, memset(ptr + size - 7, 0, 8));
 	kfree(ptr);
 }
 
 static void kmalloc_oob_memset_16(struct kunit *test)
 {
 	char *ptr;
-	size_t size = 16;
+	size_t size = 128 - KASAN_GRANULE_SIZE;
 
 	ptr = kmalloc(size, GFP_KERNEL);
 	KUNIT_ASSERT_NOT_ERR_OR_NULL(test, ptr);
 
-	KUNIT_EXPECT_KASAN_FAIL(test, memset(ptr + 1 + OOB_TAG_OFF, 0, 16));
+	KUNIT_EXPECT_KASAN_FAIL(test, memset(ptr + size - 15, 0, 16));
 	kfree(ptr);
 }
 
 static void kmalloc_oob_in_memset(struct kunit *test)
 {
 	char *ptr;
-	size_t size = 666;
+	size_t size = 128 - KASAN_GRANULE_SIZE;
 
 	ptr = kmalloc(size, GFP_KERNEL);
 	KUNIT_ASSERT_NOT_ERR_OR_NULL(test, ptr);
 
-	KUNIT_EXPECT_KASAN_FAIL(test, memset(ptr, 0, size + 5 + OOB_TAG_OFF));
+	KUNIT_EXPECT_KASAN_FAIL(test,
+				memset(ptr, 0, size + KASAN_GRANULE_SIZE));
 	kfree(ptr);
 }
 
@@ -477,11 +499,17 @@ static void kmalloc_memmove_invalid_size(struct kunit *test)
 	size_t size = 64;
 	volatile size_t invalid_size = -2;
 
+	/*
+	 * Hardware tag-based mode doesn't check memmove for negative size.
+	 * As a result, this test introduces a side-effect memory corruption,
+	 * which can result in a crash.
+	 */
+	KASAN_TEST_NEEDS_CONFIG_OFF(test, CONFIG_KASAN_HW_TAGS);
+
 	ptr = kmalloc(size, GFP_KERNEL);
 	KUNIT_ASSERT_NOT_ERR_OR_NULL(test, ptr);
 
 	memset((char *)ptr, 0, 64);
-
 	KUNIT_EXPECT_KASAN_FAIL(test,
 		memmove((char *)ptr, (char *)ptr + 4, invalid_size));
 	kfree(ptr);
@@ -496,7 +524,7 @@ static void kmalloc_uaf(struct kunit *test)
 	KUNIT_ASSERT_NOT_ERR_OR_NULL(test, ptr);
 
 	kfree(ptr);
-	KUNIT_EXPECT_KASAN_FAIL(test, *(ptr + 8) = 'x');
+	KUNIT_EXPECT_KASAN_FAIL(test, ((volatile char *)ptr)[8]);
 }
 
 static void kmalloc_uaf_memset(struct kunit *test)
@@ -504,6 +532,12 @@ static void kmalloc_uaf_memset(struct kunit *test)
 	char *ptr;
 	size_t size = 33;
 
+	/*
+	 * Only generic KASAN uses quarantine, which is required to avoid a
+	 * kernel memory corruption this test causes.
+	 */
+	KASAN_TEST_NEEDS_CONFIG_ON(test, CONFIG_KASAN_GENERIC);
+
 	ptr = kmalloc(size, GFP_KERNEL);
 	KUNIT_ASSERT_NOT_ERR_OR_NULL(test, ptr);
 
@@ -535,7 +569,7 @@ again:
 		goto again;
 	}
 
-	KUNIT_EXPECT_KASAN_FAIL(test, ptr1[40] = 'x');
+	KUNIT_EXPECT_KASAN_FAIL(test, ((volatile char *)ptr1)[40]);
 	KUNIT_EXPECT_PTR_NE(test, ptr1, ptr2);
 
 	kfree(ptr2);
@@ -682,7 +716,7 @@ static void ksize_unpoisons_memory(struct kunit *test)
 	ptr[size] = 'x';
 
 	/* This one must. */
-	KUNIT_EXPECT_KASAN_FAIL(test, ptr[real_size] = 'y');
+	KUNIT_EXPECT_KASAN_FAIL(test, ((volatile char *)ptr)[real_size]);
 
 	kfree(ptr);
 }
@@ -701,8 +735,8 @@ static void ksize_uaf(struct kunit *test)
 	kfree(ptr);
 
 	KUNIT_EXPECT_KASAN_FAIL(test, ksize(ptr));
-	KUNIT_EXPECT_KASAN_FAIL(test, kasan_int_result = *ptr);
-	KUNIT_EXPECT_KASAN_FAIL(test, kasan_int_result = *(ptr + size));
+	KUNIT_EXPECT_KASAN_FAIL(test, ((volatile char *)ptr)[0]);
+	KUNIT_EXPECT_KASAN_FAIL(test, ((volatile char *)ptr)[size]);
 }
 
 static void kasan_stack_oob(struct kunit *test)
diff --git a/lib/test_kasan_module.c b/lib/test_kasan_module.c
index f1017f345d6c..7ebf433edef3 100644
--- a/lib/test_kasan_module.c
+++ b/lib/test_kasan_module.c
@@ -15,13 +15,11 @@
 
 #include "../mm/kasan/kasan.h"
 
-#define OOB_TAG_OFF (IS_ENABLED(CONFIG_KASAN_GENERIC) ? 0 : KASAN_GRANULE_SIZE)
-
 static noinline void __init copy_user_test(void)
 {
 	char *kmem;
 	char __user *usermem;
-	size_t size = 10;
+	size_t size = 128 - KASAN_GRANULE_SIZE;
 	int __maybe_unused unused;
 
 	kmem = kmalloc(size, GFP_KERNEL);
@@ -38,25 +36,25 @@ static noinline void __init copy_user_test(void)
 	}
 
 	pr_info("out-of-bounds in copy_from_user()\n");
-	unused = copy_from_user(kmem, usermem, size + 1 + OOB_TAG_OFF);
+	unused = copy_from_user(kmem, usermem, size + 1);
 
 	pr_info("out-of-bounds in copy_to_user()\n");
-	unused = copy_to_user(usermem, kmem, size + 1 + OOB_TAG_OFF);
+	unused = copy_to_user(usermem, kmem, size + 1);
 
 	pr_info("out-of-bounds in __copy_from_user()\n");
-	unused = __copy_from_user(kmem, usermem, size + 1 + OOB_TAG_OFF);
+	unused = __copy_from_user(kmem, usermem, size + 1);
 
 	pr_info("out-of-bounds in __copy_to_user()\n");
-	unused = __copy_to_user(usermem, kmem, size + 1 + OOB_TAG_OFF);
+	unused = __copy_to_user(usermem, kmem, size + 1);
 
 	pr_info("out-of-bounds in __copy_from_user_inatomic()\n");
-	unused = __copy_from_user_inatomic(kmem, usermem, size + 1 + OOB_TAG_OFF);
+	unused = __copy_from_user_inatomic(kmem, usermem, size + 1);
 
 	pr_info("out-of-bounds in __copy_to_user_inatomic()\n");
-	unused = __copy_to_user_inatomic(usermem, kmem, size + 1 + OOB_TAG_OFF);
+	unused = __copy_to_user_inatomic(usermem, kmem, size + 1);
 
 	pr_info("out-of-bounds in strncpy_from_user()\n");
-	unused = strncpy_from_user(kmem, usermem, size + 1 + OOB_TAG_OFF);
+	unused = strncpy_from_user(kmem, usermem, size + 1);
 
 	vm_munmap((unsigned long)usermem, PAGE_SIZE);
 	kfree(kmem);
@@ -73,7 +71,7 @@ static noinline void __init kasan_rcu_reclaim(struct rcu_head *rp)
 						struct kasan_rcu_info, rcu);
 
 	kfree(fp);
-	fp->i = 1;
+	((volatile struct kasan_rcu_info *)fp)->i;
 }
 
 static noinline void __init kasan_rcu_uaf(void)
diff --git a/lib/test_printf.c b/lib/test_printf.c
index 8ac71aee46af..07309c45f327 100644
--- a/lib/test_printf.c
+++ b/lib/test_printf.c
@@ -586,70 +586,59 @@ struct page_flags_test {
 	int width;
 	int shift;
 	int mask;
-	unsigned long value;
 	const char *fmt;
 	const char *name;
 };
 
-static struct page_flags_test pft[] = {
+static const struct page_flags_test pft[] = {
 	{SECTIONS_WIDTH, SECTIONS_PGSHIFT, SECTIONS_MASK,
-	 0, "%d", "section"},
+	 "%d", "section"},
 	{NODES_WIDTH, NODES_PGSHIFT, NODES_MASK,
-	 0, "%d", "node"},
+	 "%d", "node"},
 	{ZONES_WIDTH, ZONES_PGSHIFT, ZONES_MASK,
-	 0, "%d", "zone"},
+	 "%d", "zone"},
 	{LAST_CPUPID_WIDTH, LAST_CPUPID_PGSHIFT, LAST_CPUPID_MASK,
-	 0, "%#x", "lastcpupid"},
+	 "%#x", "lastcpupid"},
 	{KASAN_TAG_WIDTH, KASAN_TAG_PGSHIFT, KASAN_TAG_MASK,
-	 0, "%#x", "kasantag"},
+	 "%#x", "kasantag"},
 };
 
 static void __init
 page_flags_test(int section, int node, int zone, int last_cpupid,
-		int kasan_tag, int flags, const char *name, char *cmp_buf)
+		int kasan_tag, unsigned long flags, const char *name,
+		char *cmp_buf)
 {
 	unsigned long values[] = {section, node, zone, last_cpupid, kasan_tag};
-	unsigned long page_flags = 0;
-	unsigned long size = 0;
+	unsigned long size;
 	bool append = false;
 	int i;
 
-	flags &= BIT(NR_PAGEFLAGS) - 1;
-	if (flags) {
-		page_flags |= flags;
-		snprintf(cmp_buf + size, BUF_SIZE - size, "%s", name);
-		size = strlen(cmp_buf);
-#if SECTIONS_WIDTH || NODES_WIDTH || ZONES_WIDTH || \
-	LAST_CPUPID_WIDTH || KASAN_TAG_WIDTH
-		/* Other information also included in page flags */
-		snprintf(cmp_buf + size, BUF_SIZE - size, "|");
-		size = strlen(cmp_buf);
-#endif
-	}
+	for (i = 0; i < ARRAY_SIZE(values); i++)
+		flags |= (values[i] & pft[i].mask) << pft[i].shift;
 
-	/* Set the test value */
-	for (i = 0; i < ARRAY_SIZE(pft); i++)
-		pft[i].value = values[i];
+	size = scnprintf(cmp_buf, BUF_SIZE, "%#lx(", flags);
+	if (flags & PAGEFLAGS_MASK) {
+		size += scnprintf(cmp_buf + size, BUF_SIZE - size, "%s", name);
+		append = true;
+	}
 
 	for (i = 0; i < ARRAY_SIZE(pft); i++) {
 		if (!pft[i].width)
 			continue;
 
-		if (append) {
-			snprintf(cmp_buf + size, BUF_SIZE - size, "|");
-			size = strlen(cmp_buf);
-		}
+		if (append)
+			size += scnprintf(cmp_buf + size, BUF_SIZE - size, "|");
 
-		page_flags |= (pft[i].value & pft[i].mask) << pft[i].shift;
-		snprintf(cmp_buf + size, BUF_SIZE - size, "%s=", pft[i].name);
-		size = strlen(cmp_buf);
-		snprintf(cmp_buf + size, BUF_SIZE - size, pft[i].fmt,
-			 pft[i].value & pft[i].mask);
-		size = strlen(cmp_buf);
+		size += scnprintf(cmp_buf + size, BUF_SIZE - size, "%s=",
+				pft[i].name);
+		size += scnprintf(cmp_buf + size, BUF_SIZE - size, pft[i].fmt,
+				values[i] & pft[i].mask);
 		append = true;
 	}
 
-	test(cmp_buf, "%pGp", &page_flags);
+	snprintf(cmp_buf + size, BUF_SIZE - size, ")");
+
+	test(cmp_buf, "%pGp", &flags);
 }
 
 static void __init
@@ -675,9 +664,8 @@ flags(void)
 			"uptodate|dirty|lru|active|swapbacked",
 			cmp_buffer);
 
-	flags = VM_READ | VM_EXEC | VM_MAYREAD | VM_MAYWRITE | VM_MAYEXEC
-			| VM_DENYWRITE;
-	test("read|exec|mayread|maywrite|mayexec|denywrite", "%pGv", &flags);
+	flags = VM_READ | VM_EXEC | VM_MAYREAD | VM_MAYWRITE | VM_MAYEXEC;
+	test("read|exec|mayread|maywrite|mayexec", "%pGv", &flags);
 
 	gfp = GFP_TRANSHUGE;
 	test("GFP_TRANSHUGE", "%pGg", &gfp);
diff --git a/lib/test_scanf.c b/lib/test_scanf.c
index abae88848972..b620cf7de503 100644
--- a/lib/test_scanf.c
+++ b/lib/test_scanf.c
@@ -398,7 +398,7 @@ do {										\
 	test_array_8(fn, expect, test_buffer, fmt_buffer, result);		\
 } while (0)
 
-static void __init numbers_list(const char *delim)
+static void __init numbers_list_ll(const char *delim)
 {
 	numbers_list_8(unsigned long long, "%llu",   delim, "llu", check_ull);
 	numbers_list_8(long long,	   "%lld",   delim, "lld", check_ll);
@@ -406,28 +406,40 @@ static void __init numbers_list(const char *delim)
 	numbers_list_8(unsigned long long, "%llx",   delim, "llx", check_ull);
 	numbers_list_8(unsigned long long, "0x%llx", delim, "llx", check_ull);
 	numbers_list_8(long long,	   "0x%llx", delim, "lli", check_ll);
+}
 
+static void __init numbers_list_l(const char *delim)
+{
 	numbers_list_8(unsigned long,	   "%lu",    delim, "lu", check_ulong);
 	numbers_list_8(long,		   "%ld",    delim, "ld", check_long);
 	numbers_list_8(long,		   "%ld",    delim, "li", check_long);
 	numbers_list_8(unsigned long,	   "%lx",    delim, "lx", check_ulong);
 	numbers_list_8(unsigned long,	   "0x%lx",  delim, "lx", check_ulong);
 	numbers_list_8(long,		   "0x%lx",  delim, "li", check_long);
+}
 
+static void __init numbers_list_d(const char *delim)
+{
 	numbers_list_8(unsigned int,	   "%u",     delim, "u", check_uint);
 	numbers_list_8(int,		   "%d",     delim, "d", check_int);
 	numbers_list_8(int,		   "%d",     delim, "i", check_int);
 	numbers_list_8(unsigned int,	   "%x",     delim, "x", check_uint);
 	numbers_list_8(unsigned int,	   "0x%x",   delim, "x", check_uint);
 	numbers_list_8(int,		   "0x%x",   delim, "i", check_int);
+}
 
+static void __init numbers_list_h(const char *delim)
+{
 	numbers_list_8(unsigned short,	   "%hu",    delim, "hu", check_ushort);
 	numbers_list_8(short,		   "%hd",    delim, "hd", check_short);
 	numbers_list_8(short,		   "%hd",    delim, "hi", check_short);
 	numbers_list_8(unsigned short,	   "%hx",    delim, "hx", check_ushort);
 	numbers_list_8(unsigned short,	   "0x%hx",  delim, "hx", check_ushort);
 	numbers_list_8(short,		   "0x%hx",  delim, "hi", check_short);
+}
 
+static void __init numbers_list_hh(const char *delim)
+{
 	numbers_list_8(unsigned char,	   "%hhu",   delim, "hhu", check_uchar);
 	numbers_list_8(signed char,	   "%hhd",   delim, "hhd", check_char);
 	numbers_list_8(signed char,	   "%hhd",   delim, "hhi", check_char);
@@ -436,11 +448,16 @@ static void __init numbers_list(const char *delim)
 	numbers_list_8(signed char,	   "0x%hhx", delim, "hhi", check_char);
 }
 
-/*
- * List of numbers separated by delim. Each field width specifier is the
- * maximum possible digits for the given type and base.
- */
-static void __init numbers_list_field_width_typemax(const char *delim)
+static void __init numbers_list(const char *delim)
+{
+	numbers_list_ll(delim);
+	numbers_list_l(delim);
+	numbers_list_d(delim);
+	numbers_list_h(delim);
+	numbers_list_hh(delim);
+}
+
+static void __init numbers_list_field_width_ll(const char *delim)
 {
 	numbers_list_fix_width(unsigned long long, "%llu",   delim, 20, "llu", check_ull);
 	numbers_list_fix_width(long long,	   "%lld",   delim, 20, "lld", check_ll);
@@ -448,7 +465,10 @@ static void __init numbers_list_field_width_typemax(const char *delim)
 	numbers_list_fix_width(unsigned long long, "%llx",   delim, 16, "llx", check_ull);
 	numbers_list_fix_width(unsigned long long, "0x%llx", delim, 18, "llx", check_ull);
 	numbers_list_fix_width(long long,	   "0x%llx", delim, 18, "lli", check_ll);
+}
 
+static void __init numbers_list_field_width_l(const char *delim)
+{
 #if BITS_PER_LONG == 64
 	numbers_list_fix_width(unsigned long,	"%lu",	     delim, 20, "lu", check_ulong);
 	numbers_list_fix_width(long,		"%ld",	     delim, 20, "ld", check_long);
@@ -464,21 +484,30 @@ static void __init numbers_list_field_width_typemax(const char *delim)
 	numbers_list_fix_width(unsigned long,	"0x%lx",     delim, 10, "lx", check_ulong);
 	numbers_list_fix_width(long,		"0x%lx",     delim, 10, "li", check_long);
 #endif
+}
 
+static void __init numbers_list_field_width_d(const char *delim)
+{
 	numbers_list_fix_width(unsigned int,	"%u",	     delim, 10, "u", check_uint);
 	numbers_list_fix_width(int,		"%d",	     delim, 11, "d", check_int);
 	numbers_list_fix_width(int,		"%d",	     delim, 11, "i", check_int);
 	numbers_list_fix_width(unsigned int,	"%x",	     delim, 8,  "x", check_uint);
 	numbers_list_fix_width(unsigned int,	"0x%x",	     delim, 10, "x", check_uint);
 	numbers_list_fix_width(int,		"0x%x",	     delim, 10, "i", check_int);
+}
 
+static void __init numbers_list_field_width_h(const char *delim)
+{
 	numbers_list_fix_width(unsigned short,	"%hu",	     delim, 5, "hu", check_ushort);
 	numbers_list_fix_width(short,		"%hd",	     delim, 6, "hd", check_short);
 	numbers_list_fix_width(short,		"%hd",	     delim, 6, "hi", check_short);
 	numbers_list_fix_width(unsigned short,	"%hx",	     delim, 4, "hx", check_ushort);
 	numbers_list_fix_width(unsigned short,	"0x%hx",     delim, 6, "hx", check_ushort);
 	numbers_list_fix_width(short,		"0x%hx",     delim, 6, "hi", check_short);
+}
 
+static void __init numbers_list_field_width_hh(const char *delim)
+{
 	numbers_list_fix_width(unsigned char,	"%hhu",	     delim, 3, "hhu", check_uchar);
 	numbers_list_fix_width(signed char,	"%hhd",	     delim, 4, "hhd", check_char);
 	numbers_list_fix_width(signed char,	"%hhd",	     delim, 4, "hhi", check_char);
@@ -489,9 +518,18 @@ static void __init numbers_list_field_width_typemax(const char *delim)
 
 /*
  * List of numbers separated by delim. Each field width specifier is the
- * exact length of the corresponding value digits in the string being scanned.
+ * maximum possible digits for the given type and base.
  */
-static void __init numbers_list_field_width_val_width(const char *delim)
+static void __init numbers_list_field_width_typemax(const char *delim)
+{
+	numbers_list_field_width_ll(delim);
+	numbers_list_field_width_l(delim);
+	numbers_list_field_width_d(delim);
+	numbers_list_field_width_h(delim);
+	numbers_list_field_width_hh(delim);
+}
+
+static void __init numbers_list_field_width_val_ll(const char *delim)
 {
 	numbers_list_val_width(unsigned long long, "%llu",   delim, "llu", check_ull);
 	numbers_list_val_width(long long,	   "%lld",   delim, "lld", check_ll);
@@ -499,28 +537,40 @@ static void __init numbers_list_field_width_val_width(const char *delim)
 	numbers_list_val_width(unsigned long long, "%llx",   delim, "llx", check_ull);
 	numbers_list_val_width(unsigned long long, "0x%llx", delim, "llx", check_ull);
 	numbers_list_val_width(long long,	   "0x%llx", delim, "lli", check_ll);
+}
 
+static void __init numbers_list_field_width_val_l(const char *delim)
+{
 	numbers_list_val_width(unsigned long,	"%lu",	     delim, "lu", check_ulong);
 	numbers_list_val_width(long,		"%ld",	     delim, "ld", check_long);
 	numbers_list_val_width(long,		"%ld",	     delim, "li", check_long);
 	numbers_list_val_width(unsigned long,	"%lx",	     delim, "lx", check_ulong);
 	numbers_list_val_width(unsigned long,	"0x%lx",     delim, "lx", check_ulong);
 	numbers_list_val_width(long,		"0x%lx",     delim, "li", check_long);
+}
 
+static void __init numbers_list_field_width_val_d(const char *delim)
+{
 	numbers_list_val_width(unsigned int,	"%u",	     delim, "u", check_uint);
 	numbers_list_val_width(int,		"%d",	     delim, "d", check_int);
 	numbers_list_val_width(int,		"%d",	     delim, "i", check_int);
 	numbers_list_val_width(unsigned int,	"%x",	     delim, "x", check_uint);
 	numbers_list_val_width(unsigned int,	"0x%x",	     delim, "x", check_uint);
 	numbers_list_val_width(int,		"0x%x",	     delim, "i", check_int);
+}
 
+static void __init numbers_list_field_width_val_h(const char *delim)
+{
 	numbers_list_val_width(unsigned short,	"%hu",	     delim, "hu", check_ushort);
 	numbers_list_val_width(short,		"%hd",	     delim, "hd", check_short);
 	numbers_list_val_width(short,		"%hd",	     delim, "hi", check_short);
 	numbers_list_val_width(unsigned short,	"%hx",	     delim, "hx", check_ushort);
 	numbers_list_val_width(unsigned short,	"0x%hx",     delim, "hx", check_ushort);
 	numbers_list_val_width(short,		"0x%hx",     delim, "hi", check_short);
+}
 
+static void __init numbers_list_field_width_val_hh(const char *delim)
+{
 	numbers_list_val_width(unsigned char,	"%hhu",	     delim, "hhu", check_uchar);
 	numbers_list_val_width(signed char,	"%hhd",	     delim, "hhd", check_char);
 	numbers_list_val_width(signed char,	"%hhd",	     delim, "hhi", check_char);
@@ -530,6 +580,19 @@ static void __init numbers_list_field_width_val_width(const char *delim)
 }
 
 /*
+ * List of numbers separated by delim. Each field width specifier is the
+ * exact length of the corresponding value digits in the string being scanned.
+ */
+static void __init numbers_list_field_width_val_width(const char *delim)
+{
+	numbers_list_field_width_val_ll(delim);
+	numbers_list_field_width_val_l(delim);
+	numbers_list_field_width_val_d(delim);
+	numbers_list_field_width_val_h(delim);
+	numbers_list_field_width_val_hh(delim);
+}
+
+/*
  * Slice a continuous string of digits without field delimiters, containing
  * numbers of varying length, using the field width to extract each group
  * of digits. For example the hex values c0,3,bf01,303 would have a
diff --git a/lib/test_sort.c b/lib/test_sort.c
index 52edbe10f2e5..be02e3a098cf 100644
--- a/lib/test_sort.c
+++ b/lib/test_sort.c
@@ -1,4 +1,7 @@
 // SPDX-License-Identifier: GPL-2.0-only
+
+#include <kunit/test.h>
+
 #include <linux/sort.h>
 #include <linux/slab.h>
 #include <linux/module.h>
@@ -7,18 +10,17 @@
 
 #define TEST_LEN 1000
 
-static int __init cmpint(const void *a, const void *b)
+static int cmpint(const void *a, const void *b)
 {
 	return *(int *)a - *(int *)b;
 }
 
-static int __init test_sort_init(void)
+static void test_sort(struct kunit *test)
 {
-	int *a, i, r = 1, err = -ENOMEM;
+	int *a, i, r = 1;
 
-	a = kmalloc_array(TEST_LEN, sizeof(*a), GFP_KERNEL);
-	if (!a)
-		return err;
+	a = kunit_kmalloc_array(test, TEST_LEN, sizeof(*a), GFP_KERNEL);
+	KUNIT_ASSERT_NOT_ERR_OR_NULL(test, a);
 
 	for (i = 0; i < TEST_LEN; i++) {
 		r = (r * 725861) % 6599;
@@ -27,24 +29,20 @@ static int __init test_sort_init(void)
 
 	sort(a, TEST_LEN, sizeof(*a), cmpint, NULL);
 
-	err = -EINVAL;
 	for (i = 0; i < TEST_LEN-1; i++)
-		if (a[i] > a[i+1]) {
-			pr_err("test has failed\n");
-			goto exit;
-		}
-	err = 0;
-	pr_info("test passed\n");
-exit:
-	kfree(a);
-	return err;
+		KUNIT_ASSERT_LE(test, a[i], a[i + 1]);
 }
 
-static void __exit test_sort_exit(void)
-{
-}
+static struct kunit_case sort_test_cases[] = {
+	KUNIT_CASE(test_sort),
+	{}
+};
+
+static struct kunit_suite sort_test_suite = {
+	.name = "lib_sort",
+	.test_cases = sort_test_cases,
+};
 
-module_init(test_sort_init);
-module_exit(test_sort_exit);
+kunit_test_suites(&sort_test_suite);
 
 MODULE_LICENSE("GPL");
diff --git a/lib/test_stackinit.c b/lib/test_stackinit.c
index f93b1e145ada..a3c74e6a21ff 100644
--- a/lib/test_stackinit.c
+++ b/lib/test_stackinit.c
@@ -1,8 +1,13 @@
-// SPDX-License-Identifier: GPL-2.0
+// SPDX-License-Identifier: GPL-2.0-or-later
 /*
- * Test cases for compiler-based stack variable zeroing via future
- * compiler flags or CONFIG_GCC_PLUGIN_STRUCTLEAK*.
+ * Test cases for compiler-based stack variable zeroing via
+ * -ftrivial-auto-var-init={zero,pattern} or CONFIG_GCC_PLUGIN_STRUCTLEAK*.
+ *
+ * External build example:
+ *	clang -O2 -Wall -ftrivial-auto-var-init=pattern \
+ *		-o test_stackinit test_stackinit.c
  */
+#ifdef __KERNEL__
 #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
 
 #include <linux/init.h>
@@ -10,6 +15,63 @@
 #include <linux/module.h>
 #include <linux/string.h>
 
+#else
+
+/* Userspace headers. */
+#include <stdio.h>
+#include <stdint.h>
+#include <string.h>
+#include <stdbool.h>
+#include <errno.h>
+#include <sys/types.h>
+
+/* Linux kernel-ism stubs for stand-alone userspace build. */
+#define KBUILD_MODNAME		"stackinit"
+#define pr_fmt(fmt)		KBUILD_MODNAME ": " fmt
+#define pr_err(fmt, ...)	fprintf(stderr, pr_fmt(fmt), ##__VA_ARGS__)
+#define pr_warn(fmt, ...)	fprintf(stderr, pr_fmt(fmt), ##__VA_ARGS__)
+#define pr_info(fmt, ...)	fprintf(stdout, pr_fmt(fmt), ##__VA_ARGS__)
+#define __init			/**/
+#define __exit			/**/
+#define __user			/**/
+#define noinline		__attribute__((__noinline__))
+#define __aligned(x)		__attribute__((__aligned__(x)))
+#ifdef __clang__
+# define __compiletime_error(message) /**/
+#else
+# define __compiletime_error(message) __attribute__((__error__(message)))
+#endif
+#define __compiletime_assert(condition, msg, prefix, suffix)		\
+	do {								\
+		extern void prefix ## suffix(void) __compiletime_error(msg); \
+		if (!(condition))					\
+			prefix ## suffix();				\
+	} while (0)
+#define _compiletime_assert(condition, msg, prefix, suffix) \
+	__compiletime_assert(condition, msg, prefix, suffix)
+#define compiletime_assert(condition, msg) \
+	_compiletime_assert(condition, msg, __compiletime_assert_, __COUNTER__)
+#define BUILD_BUG_ON_MSG(cond, msg) compiletime_assert(!(cond), msg)
+#define BUILD_BUG_ON(condition) \
+	BUILD_BUG_ON_MSG(condition, "BUILD_BUG_ON failed: " #condition)
+typedef uint8_t			u8;
+typedef uint16_t		u16;
+typedef uint32_t		u32;
+typedef uint64_t		u64;
+
+#define module_init(func)	static int (*do_init)(void) = func
+#define module_exit(func)	static void (*do_exit)(void) = func
+#define MODULE_LICENSE(str)	int main(void) {		\
+					int rc;			\
+					/* License: str */	\
+					rc = do_init();		\
+					if (rc == 0)		\
+						do_exit();	\
+					return rc;		\
+				}
+
+#endif /* __KERNEL__ */
+
 /* Exfiltration buffer. */
 #define MAX_VAR_SIZE	128
 static u8 check_buf[MAX_VAR_SIZE];
@@ -33,6 +95,10 @@ static bool range_contains(char *haystack_start, size_t haystack_size,
 	return false;
 }
 
+/* Whether the test is expected to fail. */
+#define WANT_SUCCESS				0
+#define XFAIL					1
+
 #define DO_NOTHING_TYPE_SCALAR(var_type)	var_type
 #define DO_NOTHING_TYPE_STRING(var_type)	void
 #define DO_NOTHING_TYPE_STRUCT(var_type)	void
@@ -58,34 +124,73 @@ static bool range_contains(char *haystack_start, size_t haystack_size,
 #define INIT_CLONE_STRING		[FILL_SIZE_STRING]
 #define INIT_CLONE_STRUCT		/**/
 
-#define INIT_SCALAR_none		/**/
-#define INIT_SCALAR_zero		= 0
+#define ZERO_CLONE_SCALAR(zero)		memset(&(zero), 0x00, sizeof(zero))
+#define ZERO_CLONE_STRING(zero)		memset(&(zero), 0x00, sizeof(zero))
+/*
+ * For the struct, intentionally poison padding to see if it gets
+ * copied out in direct assignments.
+ * */
+#define ZERO_CLONE_STRUCT(zero)				\
+	do {						\
+		memset(&(zero), 0xFF, sizeof(zero));	\
+		zero.one = 0;				\
+		zero.two = 0;				\
+		zero.three = 0;				\
+		zero.four = 0;				\
+	} while (0)
+
+#define INIT_SCALAR_none(var_type)	/**/
+#define INIT_SCALAR_zero(var_type)	= 0
 
-#define INIT_STRING_none		[FILL_SIZE_STRING] /**/
-#define INIT_STRING_zero		[FILL_SIZE_STRING] = { }
+#define INIT_STRING_none(var_type)	[FILL_SIZE_STRING] /**/
+#define INIT_STRING_zero(var_type)	[FILL_SIZE_STRING] = { }
 
-#define INIT_STRUCT_none		/**/
-#define INIT_STRUCT_zero		= { }
-#define INIT_STRUCT_static_partial	= { .two = 0, }
-#define INIT_STRUCT_static_all		= { .one = arg->one,		\
-					    .two = arg->two,		\
-					    .three = arg->three,	\
-					    .four = arg->four,		\
+#define INIT_STRUCT_none(var_type)	/**/
+#define INIT_STRUCT_zero(var_type)	= { }
+
+
+#define __static_partial		{ .two = 0, }
+#define __static_all			{ .one = 0,			\
+					  .two = 0,			\
+					  .three = 0,			\
+					  .four = 0,			\
 					}
-#define INIT_STRUCT_dynamic_partial	= { .two = arg->two, }
-#define INIT_STRUCT_dynamic_all		= { .one = arg->one,		\
-					    .two = arg->two,		\
-					    .three = arg->three,	\
-					    .four = arg->four,		\
+#define __dynamic_partial		{ .two = arg->two, }
+#define __dynamic_all			{ .one = arg->one,		\
+					  .two = arg->two,		\
+					  .three = arg->three,		\
+					  .four = arg->four,		\
 					}
-#define INIT_STRUCT_runtime_partial	;				\
-					var.two = 0
-#define INIT_STRUCT_runtime_all		;				\
-					var.one = 0;			\
+#define __runtime_partial		var.two = 0
+#define __runtime_all			var.one = 0;			\
 					var.two = 0;			\
 					var.three = 0;			\
-					memset(&var.four, 0,		\
-					       sizeof(var.four))
+					var.four = 0
+
+#define INIT_STRUCT_static_partial(var_type)				\
+					= __static_partial
+#define INIT_STRUCT_static_all(var_type)				\
+					= __static_all
+#define INIT_STRUCT_dynamic_partial(var_type)				\
+					= __dynamic_partial
+#define INIT_STRUCT_dynamic_all(var_type)				\
+					= __dynamic_all
+#define INIT_STRUCT_runtime_partial(var_type)				\
+					; __runtime_partial
+#define INIT_STRUCT_runtime_all(var_type)				\
+					; __runtime_all
+
+#define INIT_STRUCT_assigned_static_partial(var_type)			\
+					; var = (var_type)__static_partial
+#define INIT_STRUCT_assigned_static_all(var_type)			\
+					; var = (var_type)__static_all
+#define INIT_STRUCT_assigned_dynamic_partial(var_type)			\
+					; var = (var_type)__dynamic_partial
+#define INIT_STRUCT_assigned_dynamic_all(var_type)			\
+					; var = (var_type)__dynamic_all
+
+#define INIT_STRUCT_assigned_copy(var_type)				\
+					; var = *(arg)
 
 /*
  * @name: unique string name for the test
@@ -106,7 +211,7 @@ static noinline __init int test_ ## name (void)			\
 	BUILD_BUG_ON(sizeof(zero) > MAX_VAR_SIZE);		\
 								\
 	/* Fill clone type with zero for per-field init. */	\
-	memset(&zero, 0x00, sizeof(zero));			\
+	ZERO_CLONE_ ## which(zero);				\
 	/* Clear entire check buffer for 0xFF overlap test. */	\
 	memset(check_buf, 0x00, sizeof(check_buf));		\
 	/* Fill stack with 0xFF. */				\
@@ -149,7 +254,7 @@ static noinline __init int test_ ## name (void)			\
 		return (xfail) ? 0 : 1;				\
 	}							\
 }
-#define DEFINE_TEST(name, var_type, which, init_level)		\
+#define DEFINE_TEST(name, var_type, which, init_level, xfail)	\
 /* no-op to force compiler into ignoring "uninitialized" vars */\
 static noinline __init DO_NOTHING_TYPE_ ## which(var_type)	\
 do_nothing_ ## name(var_type *ptr)				\
@@ -165,7 +270,8 @@ static noinline __init int leaf_ ## name(unsigned long sp,	\
 					 var_type *arg)		\
 {								\
 	char buf[VAR_BUFFER];					\
-	var_type var INIT_ ## which ## _ ## init_level;		\
+	var_type var						\
+		INIT_ ## which ## _ ## init_level(var_type);	\
 								\
 	target_start = &var;					\
 	target_size = sizeof(var);				\
@@ -191,7 +297,7 @@ static noinline __init int leaf_ ## name(unsigned long sp,	\
 								\
 	return (int)buf[0] | (int)buf[sizeof(buf) - 1];		\
 }								\
-DEFINE_TEST_DRIVER(name, var_type, which, 0)
+DEFINE_TEST_DRIVER(name, var_type, which, xfail)
 
 /* Structure with no padding. */
 struct test_packed {
@@ -210,18 +316,13 @@ struct test_small_hole {
 	unsigned long four;
 };
 
-/* Try to trigger unhandled padding in a structure. */
-struct test_aligned {
-	u32 internal1;
-	u64 internal2;
-} __aligned(64);
-
+/* Trigger unhandled padding in a structure. */
 struct test_big_hole {
 	u8 one;
 	u8 two;
 	u8 three;
 	/* 61 byte padding hole here. */
-	struct test_aligned four;
+	u8 four __aligned(64);
 } __aligned(64);
 
 struct test_trailing_hole {
@@ -240,42 +341,50 @@ struct test_user {
 	unsigned long four;
 };
 
-#define DEFINE_SCALAR_TEST(name, init)				\
-		DEFINE_TEST(name ## _ ## init, name, SCALAR, init)
+#define DEFINE_SCALAR_TEST(name, init, xfail)			\
+		DEFINE_TEST(name ## _ ## init, name, SCALAR,	\
+			    init, xfail)
 
-#define DEFINE_SCALAR_TESTS(init)				\
-		DEFINE_SCALAR_TEST(u8, init);			\
-		DEFINE_SCALAR_TEST(u16, init);			\
-		DEFINE_SCALAR_TEST(u32, init);			\
-		DEFINE_SCALAR_TEST(u64, init);			\
-		DEFINE_TEST(char_array_ ## init, unsigned char, STRING, init)
+#define DEFINE_SCALAR_TESTS(init, xfail)			\
+		DEFINE_SCALAR_TEST(u8, init, xfail);		\
+		DEFINE_SCALAR_TEST(u16, init, xfail);		\
+		DEFINE_SCALAR_TEST(u32, init, xfail);		\
+		DEFINE_SCALAR_TEST(u64, init, xfail);		\
+		DEFINE_TEST(char_array_ ## init, unsigned char,	\
+			    STRING, init, xfail)
 
-#define DEFINE_STRUCT_TEST(name, init)				\
+#define DEFINE_STRUCT_TEST(name, init, xfail)			\
 		DEFINE_TEST(name ## _ ## init,			\
-			    struct test_ ## name, STRUCT, init)
+			    struct test_ ## name, STRUCT, init, \
+			    xfail)
+
+#define DEFINE_STRUCT_TESTS(init, xfail)			\
+		DEFINE_STRUCT_TEST(small_hole, init, xfail);	\
+		DEFINE_STRUCT_TEST(big_hole, init, xfail);	\
+		DEFINE_STRUCT_TEST(trailing_hole, init, xfail);	\
+		DEFINE_STRUCT_TEST(packed, init, xfail)
 
-#define DEFINE_STRUCT_TESTS(init)				\
-		DEFINE_STRUCT_TEST(small_hole, init);		\
-		DEFINE_STRUCT_TEST(big_hole, init);		\
-		DEFINE_STRUCT_TEST(trailing_hole, init);	\
-		DEFINE_STRUCT_TEST(packed, init)
+#define DEFINE_STRUCT_INITIALIZER_TESTS(base)			\
+		DEFINE_STRUCT_TESTS(base ## _ ## partial,	\
+				    WANT_SUCCESS);		\
+		DEFINE_STRUCT_TESTS(base ## _ ## all,		\
+				    WANT_SUCCESS)
 
 /* These should be fully initialized all the time! */
-DEFINE_SCALAR_TESTS(zero);
-DEFINE_STRUCT_TESTS(zero);
-/* Static initialization: padding may be left uninitialized. */
-DEFINE_STRUCT_TESTS(static_partial);
-DEFINE_STRUCT_TESTS(static_all);
-/* Dynamic initialization: padding may be left uninitialized. */
-DEFINE_STRUCT_TESTS(dynamic_partial);
-DEFINE_STRUCT_TESTS(dynamic_all);
-/* Runtime initialization: padding may be left uninitialized. */
-DEFINE_STRUCT_TESTS(runtime_partial);
-DEFINE_STRUCT_TESTS(runtime_all);
+DEFINE_SCALAR_TESTS(zero, WANT_SUCCESS);
+DEFINE_STRUCT_TESTS(zero, WANT_SUCCESS);
+/* Struct initializers: padding may be left uninitialized. */
+DEFINE_STRUCT_INITIALIZER_TESTS(static);
+DEFINE_STRUCT_INITIALIZER_TESTS(dynamic);
+DEFINE_STRUCT_INITIALIZER_TESTS(runtime);
+DEFINE_STRUCT_INITIALIZER_TESTS(assigned_static);
+DEFINE_STRUCT_INITIALIZER_TESTS(assigned_dynamic);
+DEFINE_STRUCT_TESTS(assigned_copy, XFAIL);
 /* No initialization without compiler instrumentation. */
-DEFINE_SCALAR_TESTS(none);
-DEFINE_STRUCT_TESTS(none);
-DEFINE_TEST(user, struct test_user, STRUCT, none);
+DEFINE_SCALAR_TESTS(none, WANT_SUCCESS);
+DEFINE_STRUCT_TESTS(none, WANT_SUCCESS);
+/* Initialization of members with __user attribute. */
+DEFINE_TEST(user, struct test_user, STRUCT, none, WANT_SUCCESS);
 
 /*
  * Check two uses through a variable declaration outside either path,
@@ -285,6 +394,10 @@ DEFINE_TEST(user, struct test_user, STRUCT, none);
 static int noinline __leaf_switch_none(int path, bool fill)
 {
 	switch (path) {
+		/*
+		 * This is intentionally unreachable. To silence the
+		 * warning, build with -Wno-switch-unreachable
+		 */
 		uint64_t var;
 
 	case 1:
@@ -334,8 +447,8 @@ static noinline __init int leaf_switch_2_none(unsigned long sp, bool fill,
  * non-code areas (i.e. in a switch statement before the first "case").
  * https://bugs.llvm.org/show_bug.cgi?id=44916
  */
-DEFINE_TEST_DRIVER(switch_1_none, uint64_t, SCALAR, 1);
-DEFINE_TEST_DRIVER(switch_2_none, uint64_t, SCALAR, 1);
+DEFINE_TEST_DRIVER(switch_1_none, uint64_t, SCALAR, XFAIL);
+DEFINE_TEST_DRIVER(switch_2_none, uint64_t, SCALAR, XFAIL);
 
 static int __init test_stackinit_init(void)
 {
@@ -361,12 +474,18 @@ static int __init test_stackinit_init(void)
 	test_structs(zero);
 	/* Padding here appears to be accidentally always initialized? */
 	test_structs(dynamic_partial);
+	test_structs(assigned_dynamic_partial);
 	/* Padding initialization depends on compiler behaviors. */
 	test_structs(static_partial);
 	test_structs(static_all);
 	test_structs(dynamic_all);
 	test_structs(runtime_partial);
 	test_structs(runtime_all);
+	test_structs(assigned_static_partial);
+	test_structs(assigned_static_all);
+	test_structs(assigned_dynamic_all);
+	/* Everything fails this since it effectively performs a memcpy(). */
+	test_structs(assigned_copy);
 
 	/* STRUCTLEAK_BYREF_ALL should cover everything from here down. */
 	test_scalars(none);
diff --git a/lib/test_vmalloc.c b/lib/test_vmalloc.c
index 01e9543de566..e14993bc84d2 100644
--- a/lib/test_vmalloc.c
+++ b/lib/test_vmalloc.c
@@ -35,6 +35,9 @@ __param(int, test_repeat_count, 1,
 __param(int, test_loop_count, 1000000,
 	"Set test loop counter");
 
+__param(int, nr_pages, 0,
+	"Set number of pages for fix_size_alloc_test(default: 1)");
+
 __param(int, run_test_mask, INT_MAX,
 	"Set tests specified in the mask.\n\n"
 		"\t\tid: 1,    name: fix_size_alloc_test\n"
@@ -262,7 +265,7 @@ static int fix_size_alloc_test(void)
 	int i;
 
 	for (i = 0; i < test_loop_count; i++) {
-		ptr = vmalloc(3 * PAGE_SIZE);
+		ptr = vmalloc((nr_pages > 0 ? nr_pages:1) * PAGE_SIZE);
 
 		if (!ptr)
 			return -1;
diff --git a/lib/ubsan.c b/lib/ubsan.c
index 26229973049d..bdc380ff5d5c 100644
--- a/lib/ubsan.c
+++ b/lib/ubsan.c
@@ -14,6 +14,7 @@
 #include <linux/types.h>
 #include <linux/sched.h>
 #include <linux/uaccess.h>
+#include <kunit/test-bug.h>
 
 #include "ubsan.h"
 
@@ -141,6 +142,8 @@ static void ubsan_prologue(struct source_location *loc, const char *reason)
 		"========================================\n");
 	pr_err("UBSAN: %s in %s:%d:%d\n", reason, loc->file_name,
 		loc->line & LINE_MASK, loc->column & COLUMN_MASK);
+
+	kunit_fail_current_test("%s in %s", reason, loc->file_name);
 }
 
 static void ubsan_epilogue(void)
diff --git a/lib/vsprintf.c b/lib/vsprintf.c
index 1173930ed9d3..f90f91d83920 100644
--- a/lib/vsprintf.c
+++ b/lib/vsprintf.c
@@ -17,7 +17,7 @@
  * - scnprintf and vscnprintf
  */
 
-#include <stdarg.h>
+#include <linux/stdarg.h>
 #include <linux/build_bug.h>
 #include <linux/clk.h>
 #include <linux/clk-provider.h>
@@ -2020,10 +2020,15 @@ static const struct page_flags_fields pff[] = {
 static
 char *format_page_flags(char *buf, char *end, unsigned long flags)
 {
-	unsigned long main_flags = flags & (BIT(NR_PAGEFLAGS) - 1);
+	unsigned long main_flags = flags & PAGEFLAGS_MASK;
 	bool append = false;
 	int i;
 
+	buf = number(buf, end, flags, default_flag_spec);
+	if (buf < end)
+		*buf = '(';
+	buf++;
+
 	/* Page flags from the main area. */
 	if (main_flags) {
 		buf = format_flags(buf, end, main_flags, pageflag_names);
@@ -2052,6 +2057,9 @@ char *format_page_flags(char *buf, char *end, unsigned long flags)
 
 		append = true;
 	}
+	if (buf < end)
+		*buf = ')';
+	buf++;
 
 	return buf;
 }
diff --git a/mm/Kconfig b/mm/Kconfig
index 40a9bfcd5062..d16ba9249bc5 100644
--- a/mm/Kconfig
+++ b/mm/Kconfig
@@ -96,9 +96,6 @@ config HAVE_FAST_GUP
 	depends on MMU
 	bool
 
-config HOLES_IN_ZONE
-	bool
-
 # Don't discard allocated memory used to track "memory" and "reserved" memblocks
 # after early boot, so it can still be used to test for validity of memory.
 # Also, memblocks are updated with memory hot(un)plug.
@@ -742,10 +739,18 @@ config DEFERRED_STRUCT_PAGE_INIT
 	  lifetime of the system until these kthreads finish the
 	  initialisation.
 
+config PAGE_IDLE_FLAG
+	bool
+	select PAGE_EXTENSION if !64BIT
+	help
+	  This adds PG_idle and PG_young flags to 'struct page'.  PTE Accessed
+	  bit writers can set the state of the bit in the flags so that PTE
+	  Accessed bit readers may avoid disturbance.
+
 config IDLE_PAGE_TRACKING
 	bool "Enable idle page tracking"
 	depends on SYSFS && MMU
-	select PAGE_EXTENSION if !64BIT
+	select PAGE_IDLE_FLAG
 	help
 	  This feature allows to estimate the amount of user pages that have
 	  not been touched during a given period of time. This information can
@@ -889,4 +894,6 @@ config IO_MAPPING
 config SECRETMEM
 	def_bool ARCH_HAS_SET_DIRECT_MAP && !EMBEDDED
 
+source "mm/damon/Kconfig"
+
 endmenu
diff --git a/mm/Makefile b/mm/Makefile
index e3436741d539..fc60a40ce954 100644
--- a/mm/Makefile
+++ b/mm/Makefile
@@ -38,7 +38,7 @@ mmu-y			:= nommu.o
 mmu-$(CONFIG_MMU)	:= highmem.o memory.o mincore.o \
 			   mlock.o mmap.o mmu_gather.o mprotect.o mremap.o \
 			   msync.o page_vma_mapped.o pagewalk.o \
-			   pgtable-generic.o rmap.o vmalloc.o ioremap.o
+			   pgtable-generic.o rmap.o vmalloc.o
 
 
 ifdef CONFIG_CROSS_MEMORY_ATTACH
@@ -118,6 +118,7 @@ obj-$(CONFIG_CMA_SYSFS) += cma_sysfs.o
 obj-$(CONFIG_USERFAULTFD) += userfaultfd.o
 obj-$(CONFIG_IDLE_PAGE_TRACKING) += page_idle.o
 obj-$(CONFIG_DEBUG_PAGE_REF) += debug_page_ref.o
+obj-$(CONFIG_DAMON) += damon/
 obj-$(CONFIG_HARDENED_USERCOPY) += usercopy.o
 obj-$(CONFIG_PERCPU_STATS) += percpu-stats.o
 obj-$(CONFIG_ZONE_DEVICE) += memremap.o
@@ -128,3 +129,4 @@ obj-$(CONFIG_PTDUMP_CORE) += ptdump.o
 obj-$(CONFIG_PAGE_REPORTING) += page_reporting.o
 obj-$(CONFIG_IO_MAPPING) += io-mapping.o
 obj-$(CONFIG_HAVE_BOOTMEM_INFO_NODE) += bootmem_info.o
+obj-$(CONFIG_GENERIC_IOREMAP) += ioremap.o
diff --git a/mm/backing-dev.c b/mm/backing-dev.c
index cd06dca232c3..4a9d4e27d0d9 100644
--- a/mm/backing-dev.c
+++ b/mm/backing-dev.c
@@ -271,6 +271,14 @@ void wb_wakeup_delayed(struct bdi_writeback *wb)
 	spin_unlock_bh(&wb->work_lock);
 }
 
+static void wb_update_bandwidth_workfn(struct work_struct *work)
+{
+	struct bdi_writeback *wb = container_of(to_delayed_work(work),
+						struct bdi_writeback, bw_dwork);
+
+	wb_update_bandwidth(wb);
+}
+
 /*
  * Initial write bandwidth: 100 MB/s
  */
@@ -293,6 +301,7 @@ static int wb_init(struct bdi_writeback *wb, struct backing_dev_info *bdi,
 	INIT_LIST_HEAD(&wb->b_dirty_time);
 	spin_lock_init(&wb->list_lock);
 
+	atomic_set(&wb->writeback_inodes, 0);
 	wb->bw_time_stamp = jiffies;
 	wb->balanced_dirty_ratelimit = INIT_BW;
 	wb->dirty_ratelimit = INIT_BW;
@@ -302,6 +311,7 @@ static int wb_init(struct bdi_writeback *wb, struct backing_dev_info *bdi,
 	spin_lock_init(&wb->work_lock);
 	INIT_LIST_HEAD(&wb->work_list);
 	INIT_DELAYED_WORK(&wb->dwork, wb_workfn);
+	INIT_DELAYED_WORK(&wb->bw_dwork, wb_update_bandwidth_workfn);
 	wb->dirty_sleep = jiffies;
 
 	err = fprop_local_init_percpu(&wb->completions, gfp);
@@ -350,6 +360,7 @@ static void wb_shutdown(struct bdi_writeback *wb)
 	mod_delayed_work(bdi_wq, &wb->dwork, 0);
 	flush_delayed_work(&wb->dwork);
 	WARN_ON(!list_empty(&wb->work_list));
+	flush_delayed_work(&wb->bw_dwork);
 }
 
 static void wb_exit(struct bdi_writeback *wb)
diff --git a/mm/bootmem_info.c b/mm/bootmem_info.c
index 5b152dba7344..f03f42f426f6 100644
--- a/mm/bootmem_info.c
+++ b/mm/bootmem_info.c
@@ -39,7 +39,7 @@ void put_page_bootmem(struct page *page)
 }
 
 #ifndef CONFIG_SPARSEMEM_VMEMMAP
-static void register_page_bootmem_info_section(unsigned long start_pfn)
+static void __init register_page_bootmem_info_section(unsigned long start_pfn)
 {
 	unsigned long mapsize, section_nr, i;
 	struct mem_section *ms;
@@ -74,7 +74,7 @@ static void register_page_bootmem_info_section(unsigned long start_pfn)
 
 }
 #else /* CONFIG_SPARSEMEM_VMEMMAP */
-static void register_page_bootmem_info_section(unsigned long start_pfn)
+static void __init register_page_bootmem_info_section(unsigned long start_pfn)
 {
 	unsigned long mapsize, section_nr, i;
 	struct mem_section *ms;
diff --git a/mm/compaction.c b/mm/compaction.c
index 621508e0ecd5..bfc93da1c2c7 100644
--- a/mm/compaction.c
+++ b/mm/compaction.c
@@ -306,16 +306,14 @@ __reset_isolation_pfn(struct zone *zone, unsigned long pfn, bool check_source,
 	 * is necessary for the block to be a migration source/target.
 	 */
 	do {
-		if (pfn_valid_within(pfn)) {
-			if (check_source && PageLRU(page)) {
-				clear_pageblock_skip(page);
-				return true;
-			}
+		if (check_source && PageLRU(page)) {
+			clear_pageblock_skip(page);
+			return true;
+		}
 
-			if (check_target && PageBuddy(page)) {
-				clear_pageblock_skip(page);
-				return true;
-			}
+		if (check_target && PageBuddy(page)) {
+			clear_pageblock_skip(page);
+			return true;
 		}
 
 		page += (1 << PAGE_ALLOC_COSTLY_ORDER);
@@ -585,8 +583,6 @@ static unsigned long isolate_freepages_block(struct compact_control *cc,
 			break;
 
 		nr_scanned++;
-		if (!pfn_valid_within(blockpfn))
-			goto isolate_fail;
 
 		/*
 		 * For compound pages such as THP and hugetlbfs, we can save
@@ -885,8 +881,6 @@ isolate_migratepages_block(struct compact_control *cc, unsigned long low_pfn,
 			cond_resched();
 		}
 
-		if (!pfn_valid_within(low_pfn))
-			goto isolate_fail;
 		nr_scanned++;
 
 		page = pfn_to_page(low_pfn);
@@ -2398,7 +2392,7 @@ compact_zone(struct compact_control *cc, struct capture_control *capc)
 
 		err = migrate_pages(&cc->migratepages, compaction_alloc,
 				compaction_free, (unsigned long)cc, cc->mode,
-				MR_COMPACTION);
+				MR_COMPACTION, NULL);
 
 		trace_mm_compaction_migratepages(cc->nr_migratepages, err,
 							&cc->migratepages);
@@ -2706,6 +2700,30 @@ static void compact_nodes(void)
  */
 unsigned int __read_mostly sysctl_compaction_proactiveness = 20;
 
+int compaction_proactiveness_sysctl_handler(struct ctl_table *table, int write,
+		void *buffer, size_t *length, loff_t *ppos)
+{
+	int rc, nid;
+
+	rc = proc_dointvec_minmax(table, write, buffer, length, ppos);
+	if (rc)
+		return rc;
+
+	if (write && sysctl_compaction_proactiveness) {
+		for_each_online_node(nid) {
+			pg_data_t *pgdat = NODE_DATA(nid);
+
+			if (pgdat->proactive_compact_trigger)
+				continue;
+
+			pgdat->proactive_compact_trigger = true;
+			wake_up_interruptible(&pgdat->kcompactd_wait);
+		}
+	}
+
+	return 0;
+}
+
 /*
  * This is the entry point for compacting all nodes via
  * /proc/sys/vm/compact_memory
@@ -2750,7 +2768,8 @@ void compaction_unregister_node(struct node *node)
 
 static inline bool kcompactd_work_requested(pg_data_t *pgdat)
 {
-	return pgdat->kcompactd_max_order > 0 || kthread_should_stop();
+	return pgdat->kcompactd_max_order > 0 || kthread_should_stop() ||
+		pgdat->proactive_compact_trigger;
 }
 
 static bool kcompactd_node_suitable(pg_data_t *pgdat)
@@ -2885,7 +2904,8 @@ static int kcompactd(void *p)
 {
 	pg_data_t *pgdat = (pg_data_t *)p;
 	struct task_struct *tsk = current;
-	unsigned int proactive_defer = 0;
+	long default_timeout = msecs_to_jiffies(HPAGE_FRAG_CHECK_INTERVAL_MSEC);
+	long timeout = default_timeout;
 
 	const struct cpumask *cpumask = cpumask_of_node(pgdat->node_id);
 
@@ -2900,25 +2920,39 @@ static int kcompactd(void *p)
 	while (!kthread_should_stop()) {
 		unsigned long pflags;
 
+		/*
+		 * Avoid the unnecessary wakeup for proactive compaction
+		 * when it is disabled.
+		 */
+		if (!sysctl_compaction_proactiveness)
+			timeout = MAX_SCHEDULE_TIMEOUT;
 		trace_mm_compaction_kcompactd_sleep(pgdat->node_id);
 		if (wait_event_freezable_timeout(pgdat->kcompactd_wait,
-			kcompactd_work_requested(pgdat),
-			msecs_to_jiffies(HPAGE_FRAG_CHECK_INTERVAL_MSEC))) {
+			kcompactd_work_requested(pgdat), timeout) &&
+			!pgdat->proactive_compact_trigger) {
 
 			psi_memstall_enter(&pflags);
 			kcompactd_do_work(pgdat);
 			psi_memstall_leave(&pflags);
+			/*
+			 * Reset the timeout value. The defer timeout from
+			 * proactive compaction is lost here but that is fine
+			 * as the condition of the zone changing substantionally
+			 * then carrying on with the previous defer interval is
+			 * not useful.
+			 */
+			timeout = default_timeout;
 			continue;
 		}
 
-		/* kcompactd wait timeout */
+		/*
+		 * Start the proactive work with default timeout. Based
+		 * on the fragmentation score, this timeout is updated.
+		 */
+		timeout = default_timeout;
 		if (should_proactive_compact_node(pgdat)) {
 			unsigned int prev_score, score;
 
-			if (proactive_defer) {
-				proactive_defer--;
-				continue;
-			}
 			prev_score = fragmentation_score_node(pgdat);
 			proactive_compact_node(pgdat);
 			score = fragmentation_score_node(pgdat);
@@ -2926,9 +2960,12 @@ static int kcompactd(void *p)
 			 * Defer proactive compaction if the fragmentation
 			 * score did not go down i.e. no progress made.
 			 */
-			proactive_defer = score < prev_score ?
-					0 : 1 << COMPACT_MAX_DEFER_SHIFT;
+			if (unlikely(score >= prev_score))
+				timeout =
+				   default_timeout << COMPACT_MAX_DEFER_SHIFT;
 		}
+		if (unlikely(pgdat->proactive_compact_trigger))
+			pgdat->proactive_compact_trigger = false;
 	}
 
 	return 0;
diff --git a/mm/damon/Kconfig b/mm/damon/Kconfig
new file mode 100644
index 000000000000..37024798a97c
--- /dev/null
+++ b/mm/damon/Kconfig
@@ -0,0 +1,68 @@
+# SPDX-License-Identifier: GPL-2.0-only
+
+menu "Data Access Monitoring"
+
+config DAMON
+	bool "DAMON: Data Access Monitoring Framework"
+	help
+	  This builds a framework that allows kernel subsystems to monitor
+	  access frequency of each memory region. The information can be useful
+	  for performance-centric DRAM level memory management.
+
+	  See https://damonitor.github.io/doc/html/latest-damon/index.html for
+	  more information.
+
+config DAMON_KUNIT_TEST
+	bool "Test for damon" if !KUNIT_ALL_TESTS
+	depends on DAMON && KUNIT=y
+	default KUNIT_ALL_TESTS
+	help
+	  This builds the DAMON Kunit test suite.
+
+	  For more information on KUnit and unit tests in general, please refer
+	  to the KUnit documentation.
+
+	  If unsure, say N.
+
+config DAMON_VADDR
+	bool "Data access monitoring primitives for virtual address spaces"
+	depends on DAMON && MMU
+	select PAGE_IDLE_FLAG
+	help
+	  This builds the default data access monitoring primitives for DAMON
+	  that works for virtual address spaces.
+
+config DAMON_VADDR_KUNIT_TEST
+	bool "Test for DAMON primitives" if !KUNIT_ALL_TESTS
+	depends on DAMON_VADDR && KUNIT=y
+	default KUNIT_ALL_TESTS
+	help
+	  This builds the DAMON virtual addresses primitives Kunit test suite.
+
+	  For more information on KUnit and unit tests in general, please refer
+	  to the KUnit documentation.
+
+	  If unsure, say N.
+
+config DAMON_DBGFS
+	bool "DAMON debugfs interface"
+	depends on DAMON_VADDR && DEBUG_FS
+	help
+	  This builds the debugfs interface for DAMON.  The user space admins
+	  can use the interface for arbitrary data access monitoring.
+
+	  If unsure, say N.
+
+config DAMON_DBGFS_KUNIT_TEST
+	bool "Test for damon debugfs interface" if !KUNIT_ALL_TESTS
+	depends on DAMON_DBGFS && KUNIT=y
+	default KUNIT_ALL_TESTS
+	help
+	  This builds the DAMON debugfs interface Kunit test suite.
+
+	  For more information on KUnit and unit tests in general, please refer
+	  to the KUnit documentation.
+
+	  If unsure, say N.
+
+endmenu
diff --git a/mm/damon/Makefile b/mm/damon/Makefile
new file mode 100644
index 000000000000..fed4be3bace3
--- /dev/null
+++ b/mm/damon/Makefile
@@ -0,0 +1,5 @@
+# SPDX-License-Identifier: GPL-2.0
+
+obj-$(CONFIG_DAMON)		:= core.o
+obj-$(CONFIG_DAMON_VADDR)	+= vaddr.o
+obj-$(CONFIG_DAMON_DBGFS)	+= dbgfs.o
diff --git a/mm/damon/core-test.h b/mm/damon/core-test.h
new file mode 100644
index 000000000000..c938a9c34e6c
--- /dev/null
+++ b/mm/damon/core-test.h
@@ -0,0 +1,253 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/*
+ * Data Access Monitor Unit Tests
+ *
+ * Copyright 2019 Amazon.com, Inc. or its affiliates.  All rights reserved.
+ *
+ * Author: SeongJae Park <sjpark@amazon.de>
+ */
+
+#ifdef CONFIG_DAMON_KUNIT_TEST
+
+#ifndef _DAMON_CORE_TEST_H
+#define _DAMON_CORE_TEST_H
+
+#include <kunit/test.h>
+
+static void damon_test_regions(struct kunit *test)
+{
+	struct damon_region *r;
+	struct damon_target *t;
+
+	r = damon_new_region(1, 2);
+	KUNIT_EXPECT_EQ(test, 1ul, r->ar.start);
+	KUNIT_EXPECT_EQ(test, 2ul, r->ar.end);
+	KUNIT_EXPECT_EQ(test, 0u, r->nr_accesses);
+
+	t = damon_new_target(42);
+	KUNIT_EXPECT_EQ(test, 0u, damon_nr_regions(t));
+
+	damon_add_region(r, t);
+	KUNIT_EXPECT_EQ(test, 1u, damon_nr_regions(t));
+
+	damon_del_region(r, t);
+	KUNIT_EXPECT_EQ(test, 0u, damon_nr_regions(t));
+
+	damon_free_target(t);
+}
+
+static unsigned int nr_damon_targets(struct damon_ctx *ctx)
+{
+	struct damon_target *t;
+	unsigned int nr_targets = 0;
+
+	damon_for_each_target(t, ctx)
+		nr_targets++;
+
+	return nr_targets;
+}
+
+static void damon_test_target(struct kunit *test)
+{
+	struct damon_ctx *c = damon_new_ctx();
+	struct damon_target *t;
+
+	t = damon_new_target(42);
+	KUNIT_EXPECT_EQ(test, 42ul, t->id);
+	KUNIT_EXPECT_EQ(test, 0u, nr_damon_targets(c));
+
+	damon_add_target(c, t);
+	KUNIT_EXPECT_EQ(test, 1u, nr_damon_targets(c));
+
+	damon_destroy_target(t);
+	KUNIT_EXPECT_EQ(test, 0u, nr_damon_targets(c));
+
+	damon_destroy_ctx(c);
+}
+
+/*
+ * Test kdamond_reset_aggregated()
+ *
+ * DAMON checks access to each region and aggregates this information as the
+ * access frequency of each region.  In detail, it increases '->nr_accesses' of
+ * regions that an access has confirmed.  'kdamond_reset_aggregated()' flushes
+ * the aggregated information ('->nr_accesses' of each regions) to the result
+ * buffer.  As a result of the flushing, the '->nr_accesses' of regions are
+ * initialized to zero.
+ */
+static void damon_test_aggregate(struct kunit *test)
+{
+	struct damon_ctx *ctx = damon_new_ctx();
+	unsigned long target_ids[] = {1, 2, 3};
+	unsigned long saddr[][3] = {{10, 20, 30}, {5, 42, 49}, {13, 33, 55} };
+	unsigned long eaddr[][3] = {{15, 27, 40}, {31, 45, 55}, {23, 44, 66} };
+	unsigned long accesses[][3] = {{42, 95, 84}, {10, 20, 30}, {0, 1, 2} };
+	struct damon_target *t;
+	struct damon_region *r;
+	int it, ir;
+
+	damon_set_targets(ctx, target_ids, 3);
+
+	it = 0;
+	damon_for_each_target(t, ctx) {
+		for (ir = 0; ir < 3; ir++) {
+			r = damon_new_region(saddr[it][ir], eaddr[it][ir]);
+			r->nr_accesses = accesses[it][ir];
+			damon_add_region(r, t);
+		}
+		it++;
+	}
+	kdamond_reset_aggregated(ctx);
+	it = 0;
+	damon_for_each_target(t, ctx) {
+		ir = 0;
+		/* '->nr_accesses' should be zeroed */
+		damon_for_each_region(r, t) {
+			KUNIT_EXPECT_EQ(test, 0u, r->nr_accesses);
+			ir++;
+		}
+		/* regions should be preserved */
+		KUNIT_EXPECT_EQ(test, 3, ir);
+		it++;
+	}
+	/* targets also should be preserved */
+	KUNIT_EXPECT_EQ(test, 3, it);
+
+	damon_destroy_ctx(ctx);
+}
+
+static void damon_test_split_at(struct kunit *test)
+{
+	struct damon_ctx *c = damon_new_ctx();
+	struct damon_target *t;
+	struct damon_region *r;
+
+	t = damon_new_target(42);
+	r = damon_new_region(0, 100);
+	damon_add_region(r, t);
+	damon_split_region_at(c, t, r, 25);
+	KUNIT_EXPECT_EQ(test, r->ar.start, 0ul);
+	KUNIT_EXPECT_EQ(test, r->ar.end, 25ul);
+
+	r = damon_next_region(r);
+	KUNIT_EXPECT_EQ(test, r->ar.start, 25ul);
+	KUNIT_EXPECT_EQ(test, r->ar.end, 100ul);
+
+	damon_free_target(t);
+	damon_destroy_ctx(c);
+}
+
+static void damon_test_merge_two(struct kunit *test)
+{
+	struct damon_target *t;
+	struct damon_region *r, *r2, *r3;
+	int i;
+
+	t = damon_new_target(42);
+	r = damon_new_region(0, 100);
+	r->nr_accesses = 10;
+	damon_add_region(r, t);
+	r2 = damon_new_region(100, 300);
+	r2->nr_accesses = 20;
+	damon_add_region(r2, t);
+
+	damon_merge_two_regions(t, r, r2);
+	KUNIT_EXPECT_EQ(test, r->ar.start, 0ul);
+	KUNIT_EXPECT_EQ(test, r->ar.end, 300ul);
+	KUNIT_EXPECT_EQ(test, r->nr_accesses, 16u);
+
+	i = 0;
+	damon_for_each_region(r3, t) {
+		KUNIT_EXPECT_PTR_EQ(test, r, r3);
+		i++;
+	}
+	KUNIT_EXPECT_EQ(test, i, 1);
+
+	damon_free_target(t);
+}
+
+static struct damon_region *__nth_region_of(struct damon_target *t, int idx)
+{
+	struct damon_region *r;
+	unsigned int i = 0;
+
+	damon_for_each_region(r, t) {
+		if (i++ == idx)
+			return r;
+	}
+
+	return NULL;
+}
+
+static void damon_test_merge_regions_of(struct kunit *test)
+{
+	struct damon_target *t;
+	struct damon_region *r;
+	unsigned long sa[] = {0, 100, 114, 122, 130, 156, 170, 184};
+	unsigned long ea[] = {100, 112, 122, 130, 156, 170, 184, 230};
+	unsigned int nrs[] = {0, 0, 10, 10, 20, 30, 1, 2};
+
+	unsigned long saddrs[] = {0, 114, 130, 156, 170};
+	unsigned long eaddrs[] = {112, 130, 156, 170, 230};
+	int i;
+
+	t = damon_new_target(42);
+	for (i = 0; i < ARRAY_SIZE(sa); i++) {
+		r = damon_new_region(sa[i], ea[i]);
+		r->nr_accesses = nrs[i];
+		damon_add_region(r, t);
+	}
+
+	damon_merge_regions_of(t, 9, 9999);
+	/* 0-112, 114-130, 130-156, 156-170 */
+	KUNIT_EXPECT_EQ(test, damon_nr_regions(t), 5u);
+	for (i = 0; i < 5; i++) {
+		r = __nth_region_of(t, i);
+		KUNIT_EXPECT_EQ(test, r->ar.start, saddrs[i]);
+		KUNIT_EXPECT_EQ(test, r->ar.end, eaddrs[i]);
+	}
+	damon_free_target(t);
+}
+
+static void damon_test_split_regions_of(struct kunit *test)
+{
+	struct damon_ctx *c = damon_new_ctx();
+	struct damon_target *t;
+	struct damon_region *r;
+
+	t = damon_new_target(42);
+	r = damon_new_region(0, 22);
+	damon_add_region(r, t);
+	damon_split_regions_of(c, t, 2);
+	KUNIT_EXPECT_EQ(test, damon_nr_regions(t), 2u);
+	damon_free_target(t);
+
+	t = damon_new_target(42);
+	r = damon_new_region(0, 220);
+	damon_add_region(r, t);
+	damon_split_regions_of(c, t, 4);
+	KUNIT_EXPECT_EQ(test, damon_nr_regions(t), 4u);
+	damon_free_target(t);
+	damon_destroy_ctx(c);
+}
+
+static struct kunit_case damon_test_cases[] = {
+	KUNIT_CASE(damon_test_target),
+	KUNIT_CASE(damon_test_regions),
+	KUNIT_CASE(damon_test_aggregate),
+	KUNIT_CASE(damon_test_split_at),
+	KUNIT_CASE(damon_test_merge_two),
+	KUNIT_CASE(damon_test_merge_regions_of),
+	KUNIT_CASE(damon_test_split_regions_of),
+	{},
+};
+
+static struct kunit_suite damon_test_suite = {
+	.name = "damon",
+	.test_cases = damon_test_cases,
+};
+kunit_test_suite(damon_test_suite);
+
+#endif /* _DAMON_CORE_TEST_H */
+
+#endif	/* CONFIG_DAMON_KUNIT_TEST */
diff --git a/mm/damon/core.c b/mm/damon/core.c
new file mode 100644
index 000000000000..30e9211f494a
--- /dev/null
+++ b/mm/damon/core.c
@@ -0,0 +1,720 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Data Access Monitor
+ *
+ * Author: SeongJae Park <sjpark@amazon.de>
+ */
+
+#define pr_fmt(fmt) "damon: " fmt
+
+#include <linux/damon.h>
+#include <linux/delay.h>
+#include <linux/kthread.h>
+#include <linux/random.h>
+#include <linux/slab.h>
+
+#define CREATE_TRACE_POINTS
+#include <trace/events/damon.h>
+
+#ifdef CONFIG_DAMON_KUNIT_TEST
+#undef DAMON_MIN_REGION
+#define DAMON_MIN_REGION 1
+#endif
+
+/* Get a random number in [l, r) */
+#define damon_rand(l, r) (l + prandom_u32_max(r - l))
+
+static DEFINE_MUTEX(damon_lock);
+static int nr_running_ctxs;
+
+/*
+ * Construct a damon_region struct
+ *
+ * Returns the pointer to the new struct if success, or NULL otherwise
+ */
+struct damon_region *damon_new_region(unsigned long start, unsigned long end)
+{
+	struct damon_region *region;
+
+	region = kmalloc(sizeof(*region), GFP_KERNEL);
+	if (!region)
+		return NULL;
+
+	region->ar.start = start;
+	region->ar.end = end;
+	region->nr_accesses = 0;
+	INIT_LIST_HEAD(&region->list);
+
+	return region;
+}
+
+/*
+ * Add a region between two other regions
+ */
+inline void damon_insert_region(struct damon_region *r,
+		struct damon_region *prev, struct damon_region *next,
+		struct damon_target *t)
+{
+	__list_add(&r->list, &prev->list, &next->list);
+	t->nr_regions++;
+}
+
+void damon_add_region(struct damon_region *r, struct damon_target *t)
+{
+	list_add_tail(&r->list, &t->regions_list);
+	t->nr_regions++;
+}
+
+static void damon_del_region(struct damon_region *r, struct damon_target *t)
+{
+	list_del(&r->list);
+	t->nr_regions--;
+}
+
+static void damon_free_region(struct damon_region *r)
+{
+	kfree(r);
+}
+
+void damon_destroy_region(struct damon_region *r, struct damon_target *t)
+{
+	damon_del_region(r, t);
+	damon_free_region(r);
+}
+
+/*
+ * Construct a damon_target struct
+ *
+ * Returns the pointer to the new struct if success, or NULL otherwise
+ */
+struct damon_target *damon_new_target(unsigned long id)
+{
+	struct damon_target *t;
+
+	t = kmalloc(sizeof(*t), GFP_KERNEL);
+	if (!t)
+		return NULL;
+
+	t->id = id;
+	t->nr_regions = 0;
+	INIT_LIST_HEAD(&t->regions_list);
+
+	return t;
+}
+
+void damon_add_target(struct damon_ctx *ctx, struct damon_target *t)
+{
+	list_add_tail(&t->list, &ctx->adaptive_targets);
+}
+
+static void damon_del_target(struct damon_target *t)
+{
+	list_del(&t->list);
+}
+
+void damon_free_target(struct damon_target *t)
+{
+	struct damon_region *r, *next;
+
+	damon_for_each_region_safe(r, next, t)
+		damon_free_region(r);
+	kfree(t);
+}
+
+void damon_destroy_target(struct damon_target *t)
+{
+	damon_del_target(t);
+	damon_free_target(t);
+}
+
+unsigned int damon_nr_regions(struct damon_target *t)
+{
+	return t->nr_regions;
+}
+
+struct damon_ctx *damon_new_ctx(void)
+{
+	struct damon_ctx *ctx;
+
+	ctx = kzalloc(sizeof(*ctx), GFP_KERNEL);
+	if (!ctx)
+		return NULL;
+
+	ctx->sample_interval = 5 * 1000;
+	ctx->aggr_interval = 100 * 1000;
+	ctx->primitive_update_interval = 60 * 1000 * 1000;
+
+	ktime_get_coarse_ts64(&ctx->last_aggregation);
+	ctx->last_primitive_update = ctx->last_aggregation;
+
+	mutex_init(&ctx->kdamond_lock);
+
+	ctx->min_nr_regions = 10;
+	ctx->max_nr_regions = 1000;
+
+	INIT_LIST_HEAD(&ctx->adaptive_targets);
+
+	return ctx;
+}
+
+static void damon_destroy_targets(struct damon_ctx *ctx)
+{
+	struct damon_target *t, *next_t;
+
+	if (ctx->primitive.cleanup) {
+		ctx->primitive.cleanup(ctx);
+		return;
+	}
+
+	damon_for_each_target_safe(t, next_t, ctx)
+		damon_destroy_target(t);
+}
+
+void damon_destroy_ctx(struct damon_ctx *ctx)
+{
+	damon_destroy_targets(ctx);
+	kfree(ctx);
+}
+
+/**
+ * damon_set_targets() - Set monitoring targets.
+ * @ctx:	monitoring context
+ * @ids:	array of target ids
+ * @nr_ids:	number of entries in @ids
+ *
+ * This function should not be called while the kdamond is running.
+ *
+ * Return: 0 on success, negative error code otherwise.
+ */
+int damon_set_targets(struct damon_ctx *ctx,
+		      unsigned long *ids, ssize_t nr_ids)
+{
+	ssize_t i;
+	struct damon_target *t, *next;
+
+	damon_destroy_targets(ctx);
+
+	for (i = 0; i < nr_ids; i++) {
+		t = damon_new_target(ids[i]);
+		if (!t) {
+			pr_err("Failed to alloc damon_target\n");
+			/* The caller should do cleanup of the ids itself */
+			damon_for_each_target_safe(t, next, ctx)
+				damon_destroy_target(t);
+			return -ENOMEM;
+		}
+		damon_add_target(ctx, t);
+	}
+
+	return 0;
+}
+
+/**
+ * damon_set_attrs() - Set attributes for the monitoring.
+ * @ctx:		monitoring context
+ * @sample_int:		time interval between samplings
+ * @aggr_int:		time interval between aggregations
+ * @primitive_upd_int:	time interval between monitoring primitive updates
+ * @min_nr_reg:		minimal number of regions
+ * @max_nr_reg:		maximum number of regions
+ *
+ * This function should not be called while the kdamond is running.
+ * Every time interval is in micro-seconds.
+ *
+ * Return: 0 on success, negative error code otherwise.
+ */
+int damon_set_attrs(struct damon_ctx *ctx, unsigned long sample_int,
+		    unsigned long aggr_int, unsigned long primitive_upd_int,
+		    unsigned long min_nr_reg, unsigned long max_nr_reg)
+{
+	if (min_nr_reg < 3) {
+		pr_err("min_nr_regions (%lu) must be at least 3\n",
+				min_nr_reg);
+		return -EINVAL;
+	}
+	if (min_nr_reg > max_nr_reg) {
+		pr_err("invalid nr_regions.  min (%lu) > max (%lu)\n",
+				min_nr_reg, max_nr_reg);
+		return -EINVAL;
+	}
+
+	ctx->sample_interval = sample_int;
+	ctx->aggr_interval = aggr_int;
+	ctx->primitive_update_interval = primitive_upd_int;
+	ctx->min_nr_regions = min_nr_reg;
+	ctx->max_nr_regions = max_nr_reg;
+
+	return 0;
+}
+
+/**
+ * damon_nr_running_ctxs() - Return number of currently running contexts.
+ */
+int damon_nr_running_ctxs(void)
+{
+	int nr_ctxs;
+
+	mutex_lock(&damon_lock);
+	nr_ctxs = nr_running_ctxs;
+	mutex_unlock(&damon_lock);
+
+	return nr_ctxs;
+}
+
+/* Returns the size upper limit for each monitoring region */
+static unsigned long damon_region_sz_limit(struct damon_ctx *ctx)
+{
+	struct damon_target *t;
+	struct damon_region *r;
+	unsigned long sz = 0;
+
+	damon_for_each_target(t, ctx) {
+		damon_for_each_region(r, t)
+			sz += r->ar.end - r->ar.start;
+	}
+
+	if (ctx->min_nr_regions)
+		sz /= ctx->min_nr_regions;
+	if (sz < DAMON_MIN_REGION)
+		sz = DAMON_MIN_REGION;
+
+	return sz;
+}
+
+static bool damon_kdamond_running(struct damon_ctx *ctx)
+{
+	bool running;
+
+	mutex_lock(&ctx->kdamond_lock);
+	running = ctx->kdamond != NULL;
+	mutex_unlock(&ctx->kdamond_lock);
+
+	return running;
+}
+
+static int kdamond_fn(void *data);
+
+/*
+ * __damon_start() - Starts monitoring with given context.
+ * @ctx:	monitoring context
+ *
+ * This function should be called while damon_lock is hold.
+ *
+ * Return: 0 on success, negative error code otherwise.
+ */
+static int __damon_start(struct damon_ctx *ctx)
+{
+	int err = -EBUSY;
+
+	mutex_lock(&ctx->kdamond_lock);
+	if (!ctx->kdamond) {
+		err = 0;
+		ctx->kdamond_stop = false;
+		ctx->kdamond = kthread_run(kdamond_fn, ctx, "kdamond.%d",
+				nr_running_ctxs);
+		if (IS_ERR(ctx->kdamond)) {
+			err = PTR_ERR(ctx->kdamond);
+			ctx->kdamond = 0;
+		}
+	}
+	mutex_unlock(&ctx->kdamond_lock);
+
+	return err;
+}
+
+/**
+ * damon_start() - Starts the monitorings for a given group of contexts.
+ * @ctxs:	an array of the pointers for contexts to start monitoring
+ * @nr_ctxs:	size of @ctxs
+ *
+ * This function starts a group of monitoring threads for a group of monitoring
+ * contexts.  One thread per each context is created and run in parallel.  The
+ * caller should handle synchronization between the threads by itself.  If a
+ * group of threads that created by other 'damon_start()' call is currently
+ * running, this function does nothing but returns -EBUSY.
+ *
+ * Return: 0 on success, negative error code otherwise.
+ */
+int damon_start(struct damon_ctx **ctxs, int nr_ctxs)
+{
+	int i;
+	int err = 0;
+
+	mutex_lock(&damon_lock);
+	if (nr_running_ctxs) {
+		mutex_unlock(&damon_lock);
+		return -EBUSY;
+	}
+
+	for (i = 0; i < nr_ctxs; i++) {
+		err = __damon_start(ctxs[i]);
+		if (err)
+			break;
+		nr_running_ctxs++;
+	}
+	mutex_unlock(&damon_lock);
+
+	return err;
+}
+
+/*
+ * __damon_stop() - Stops monitoring of given context.
+ * @ctx:	monitoring context
+ *
+ * Return: 0 on success, negative error code otherwise.
+ */
+static int __damon_stop(struct damon_ctx *ctx)
+{
+	mutex_lock(&ctx->kdamond_lock);
+	if (ctx->kdamond) {
+		ctx->kdamond_stop = true;
+		mutex_unlock(&ctx->kdamond_lock);
+		while (damon_kdamond_running(ctx))
+			usleep_range(ctx->sample_interval,
+					ctx->sample_interval * 2);
+		return 0;
+	}
+	mutex_unlock(&ctx->kdamond_lock);
+
+	return -EPERM;
+}
+
+/**
+ * damon_stop() - Stops the monitorings for a given group of contexts.
+ * @ctxs:	an array of the pointers for contexts to stop monitoring
+ * @nr_ctxs:	size of @ctxs
+ *
+ * Return: 0 on success, negative error code otherwise.
+ */
+int damon_stop(struct damon_ctx **ctxs, int nr_ctxs)
+{
+	int i, err = 0;
+
+	for (i = 0; i < nr_ctxs; i++) {
+		/* nr_running_ctxs is decremented in kdamond_fn */
+		err = __damon_stop(ctxs[i]);
+		if (err)
+			return err;
+	}
+
+	return err;
+}
+
+/*
+ * damon_check_reset_time_interval() - Check if a time interval is elapsed.
+ * @baseline:	the time to check whether the interval has elapsed since
+ * @interval:	the time interval (microseconds)
+ *
+ * See whether the given time interval has passed since the given baseline
+ * time.  If so, it also updates the baseline to current time for next check.
+ *
+ * Return:	true if the time interval has passed, or false otherwise.
+ */
+static bool damon_check_reset_time_interval(struct timespec64 *baseline,
+		unsigned long interval)
+{
+	struct timespec64 now;
+
+	ktime_get_coarse_ts64(&now);
+	if ((timespec64_to_ns(&now) - timespec64_to_ns(baseline)) <
+			interval * 1000)
+		return false;
+	*baseline = now;
+	return true;
+}
+
+/*
+ * Check whether it is time to flush the aggregated information
+ */
+static bool kdamond_aggregate_interval_passed(struct damon_ctx *ctx)
+{
+	return damon_check_reset_time_interval(&ctx->last_aggregation,
+			ctx->aggr_interval);
+}
+
+/*
+ * Reset the aggregated monitoring results ('nr_accesses' of each region).
+ */
+static void kdamond_reset_aggregated(struct damon_ctx *c)
+{
+	struct damon_target *t;
+
+	damon_for_each_target(t, c) {
+		struct damon_region *r;
+
+		damon_for_each_region(r, t) {
+			trace_damon_aggregated(t, r, damon_nr_regions(t));
+			r->nr_accesses = 0;
+		}
+	}
+}
+
+#define sz_damon_region(r) (r->ar.end - r->ar.start)
+
+/*
+ * Merge two adjacent regions into one region
+ */
+static void damon_merge_two_regions(struct damon_target *t,
+		struct damon_region *l, struct damon_region *r)
+{
+	unsigned long sz_l = sz_damon_region(l), sz_r = sz_damon_region(r);
+
+	l->nr_accesses = (l->nr_accesses * sz_l + r->nr_accesses * sz_r) /
+			(sz_l + sz_r);
+	l->ar.end = r->ar.end;
+	damon_destroy_region(r, t);
+}
+
+#define diff_of(a, b) (a > b ? a - b : b - a)
+
+/*
+ * Merge adjacent regions having similar access frequencies
+ *
+ * t		target affected by this merge operation
+ * thres	'->nr_accesses' diff threshold for the merge
+ * sz_limit	size upper limit of each region
+ */
+static void damon_merge_regions_of(struct damon_target *t, unsigned int thres,
+				   unsigned long sz_limit)
+{
+	struct damon_region *r, *prev = NULL, *next;
+
+	damon_for_each_region_safe(r, next, t) {
+		if (prev && prev->ar.end == r->ar.start &&
+		    diff_of(prev->nr_accesses, r->nr_accesses) <= thres &&
+		    sz_damon_region(prev) + sz_damon_region(r) <= sz_limit)
+			damon_merge_two_regions(t, prev, r);
+		else
+			prev = r;
+	}
+}
+
+/*
+ * Merge adjacent regions having similar access frequencies
+ *
+ * threshold	'->nr_accesses' diff threshold for the merge
+ * sz_limit	size upper limit of each region
+ *
+ * This function merges monitoring target regions which are adjacent and their
+ * access frequencies are similar.  This is for minimizing the monitoring
+ * overhead under the dynamically changeable access pattern.  If a merge was
+ * unnecessarily made, later 'kdamond_split_regions()' will revert it.
+ */
+static void kdamond_merge_regions(struct damon_ctx *c, unsigned int threshold,
+				  unsigned long sz_limit)
+{
+	struct damon_target *t;
+
+	damon_for_each_target(t, c)
+		damon_merge_regions_of(t, threshold, sz_limit);
+}
+
+/*
+ * Split a region in two
+ *
+ * r		the region to be split
+ * sz_r		size of the first sub-region that will be made
+ */
+static void damon_split_region_at(struct damon_ctx *ctx,
+		struct damon_target *t, struct damon_region *r,
+		unsigned long sz_r)
+{
+	struct damon_region *new;
+
+	new = damon_new_region(r->ar.start + sz_r, r->ar.end);
+	if (!new)
+		return;
+
+	r->ar.end = new->ar.start;
+
+	damon_insert_region(new, r, damon_next_region(r), t);
+}
+
+/* Split every region in the given target into 'nr_subs' regions */
+static void damon_split_regions_of(struct damon_ctx *ctx,
+				     struct damon_target *t, int nr_subs)
+{
+	struct damon_region *r, *next;
+	unsigned long sz_region, sz_sub = 0;
+	int i;
+
+	damon_for_each_region_safe(r, next, t) {
+		sz_region = r->ar.end - r->ar.start;
+
+		for (i = 0; i < nr_subs - 1 &&
+				sz_region > 2 * DAMON_MIN_REGION; i++) {
+			/*
+			 * Randomly select size of left sub-region to be at
+			 * least 10 percent and at most 90% of original region
+			 */
+			sz_sub = ALIGN_DOWN(damon_rand(1, 10) *
+					sz_region / 10, DAMON_MIN_REGION);
+			/* Do not allow blank region */
+			if (sz_sub == 0 || sz_sub >= sz_region)
+				continue;
+
+			damon_split_region_at(ctx, t, r, sz_sub);
+			sz_region = sz_sub;
+		}
+	}
+}
+
+/*
+ * Split every target region into randomly-sized small regions
+ *
+ * This function splits every target region into random-sized small regions if
+ * current total number of the regions is equal or smaller than half of the
+ * user-specified maximum number of regions.  This is for maximizing the
+ * monitoring accuracy under the dynamically changeable access patterns.  If a
+ * split was unnecessarily made, later 'kdamond_merge_regions()' will revert
+ * it.
+ */
+static void kdamond_split_regions(struct damon_ctx *ctx)
+{
+	struct damon_target *t;
+	unsigned int nr_regions = 0;
+	static unsigned int last_nr_regions;
+	int nr_subregions = 2;
+
+	damon_for_each_target(t, ctx)
+		nr_regions += damon_nr_regions(t);
+
+	if (nr_regions > ctx->max_nr_regions / 2)
+		return;
+
+	/* Maybe the middle of the region has different access frequency */
+	if (last_nr_regions == nr_regions &&
+			nr_regions < ctx->max_nr_regions / 3)
+		nr_subregions = 3;
+
+	damon_for_each_target(t, ctx)
+		damon_split_regions_of(ctx, t, nr_subregions);
+
+	last_nr_regions = nr_regions;
+}
+
+/*
+ * Check whether it is time to check and apply the target monitoring regions
+ *
+ * Returns true if it is.
+ */
+static bool kdamond_need_update_primitive(struct damon_ctx *ctx)
+{
+	return damon_check_reset_time_interval(&ctx->last_primitive_update,
+			ctx->primitive_update_interval);
+}
+
+/*
+ * Check whether current monitoring should be stopped
+ *
+ * The monitoring is stopped when either the user requested to stop, or all
+ * monitoring targets are invalid.
+ *
+ * Returns true if need to stop current monitoring.
+ */
+static bool kdamond_need_stop(struct damon_ctx *ctx)
+{
+	struct damon_target *t;
+	bool stop;
+
+	mutex_lock(&ctx->kdamond_lock);
+	stop = ctx->kdamond_stop;
+	mutex_unlock(&ctx->kdamond_lock);
+	if (stop)
+		return true;
+
+	if (!ctx->primitive.target_valid)
+		return false;
+
+	damon_for_each_target(t, ctx) {
+		if (ctx->primitive.target_valid(t))
+			return false;
+	}
+
+	return true;
+}
+
+static void set_kdamond_stop(struct damon_ctx *ctx)
+{
+	mutex_lock(&ctx->kdamond_lock);
+	ctx->kdamond_stop = true;
+	mutex_unlock(&ctx->kdamond_lock);
+}
+
+/*
+ * The monitoring daemon that runs as a kernel thread
+ */
+static int kdamond_fn(void *data)
+{
+	struct damon_ctx *ctx = (struct damon_ctx *)data;
+	struct damon_target *t;
+	struct damon_region *r, *next;
+	unsigned int max_nr_accesses = 0;
+	unsigned long sz_limit = 0;
+
+	mutex_lock(&ctx->kdamond_lock);
+	pr_info("kdamond (%d) starts\n", ctx->kdamond->pid);
+	mutex_unlock(&ctx->kdamond_lock);
+
+	if (ctx->primitive.init)
+		ctx->primitive.init(ctx);
+	if (ctx->callback.before_start && ctx->callback.before_start(ctx))
+		set_kdamond_stop(ctx);
+
+	sz_limit = damon_region_sz_limit(ctx);
+
+	while (!kdamond_need_stop(ctx)) {
+		if (ctx->primitive.prepare_access_checks)
+			ctx->primitive.prepare_access_checks(ctx);
+		if (ctx->callback.after_sampling &&
+				ctx->callback.after_sampling(ctx))
+			set_kdamond_stop(ctx);
+
+		usleep_range(ctx->sample_interval, ctx->sample_interval + 1);
+
+		if (ctx->primitive.check_accesses)
+			max_nr_accesses = ctx->primitive.check_accesses(ctx);
+
+		if (kdamond_aggregate_interval_passed(ctx)) {
+			kdamond_merge_regions(ctx,
+					max_nr_accesses / 10,
+					sz_limit);
+			if (ctx->callback.after_aggregation &&
+					ctx->callback.after_aggregation(ctx))
+				set_kdamond_stop(ctx);
+			kdamond_reset_aggregated(ctx);
+			kdamond_split_regions(ctx);
+			if (ctx->primitive.reset_aggregated)
+				ctx->primitive.reset_aggregated(ctx);
+		}
+
+		if (kdamond_need_update_primitive(ctx)) {
+			if (ctx->primitive.update)
+				ctx->primitive.update(ctx);
+			sz_limit = damon_region_sz_limit(ctx);
+		}
+	}
+	damon_for_each_target(t, ctx) {
+		damon_for_each_region_safe(r, next, t)
+			damon_destroy_region(r, t);
+	}
+
+	if (ctx->callback.before_terminate &&
+			ctx->callback.before_terminate(ctx))
+		set_kdamond_stop(ctx);
+	if (ctx->primitive.cleanup)
+		ctx->primitive.cleanup(ctx);
+
+	pr_debug("kdamond (%d) finishes\n", ctx->kdamond->pid);
+	mutex_lock(&ctx->kdamond_lock);
+	ctx->kdamond = NULL;
+	mutex_unlock(&ctx->kdamond_lock);
+
+	mutex_lock(&damon_lock);
+	nr_running_ctxs--;
+	mutex_unlock(&damon_lock);
+
+	do_exit(0);
+}
+
+#include "core-test.h"
diff --git a/mm/damon/dbgfs-test.h b/mm/damon/dbgfs-test.h
new file mode 100644
index 000000000000..930e83bceef0
--- /dev/null
+++ b/mm/damon/dbgfs-test.h
@@ -0,0 +1,126 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/*
+ * DAMON Debugfs Interface Unit Tests
+ *
+ * Author: SeongJae Park <sjpark@amazon.de>
+ */
+
+#ifdef CONFIG_DAMON_DBGFS_KUNIT_TEST
+
+#ifndef _DAMON_DBGFS_TEST_H
+#define _DAMON_DBGFS_TEST_H
+
+#include <kunit/test.h>
+
+static void damon_dbgfs_test_str_to_target_ids(struct kunit *test)
+{
+	char *question;
+	unsigned long *answers;
+	unsigned long expected[] = {12, 35, 46};
+	ssize_t nr_integers = 0, i;
+
+	question = "123";
+	answers = str_to_target_ids(question, strnlen(question, 128),
+			&nr_integers);
+	KUNIT_EXPECT_EQ(test, (ssize_t)1, nr_integers);
+	KUNIT_EXPECT_EQ(test, 123ul, answers[0]);
+	kfree(answers);
+
+	question = "123abc";
+	answers = str_to_target_ids(question, strnlen(question, 128),
+			&nr_integers);
+	KUNIT_EXPECT_EQ(test, (ssize_t)1, nr_integers);
+	KUNIT_EXPECT_EQ(test, 123ul, answers[0]);
+	kfree(answers);
+
+	question = "a123";
+	answers = str_to_target_ids(question, strnlen(question, 128),
+			&nr_integers);
+	KUNIT_EXPECT_EQ(test, (ssize_t)0, nr_integers);
+	kfree(answers);
+
+	question = "12 35";
+	answers = str_to_target_ids(question, strnlen(question, 128),
+			&nr_integers);
+	KUNIT_EXPECT_EQ(test, (ssize_t)2, nr_integers);
+	for (i = 0; i < nr_integers; i++)
+		KUNIT_EXPECT_EQ(test, expected[i], answers[i]);
+	kfree(answers);
+
+	question = "12 35 46";
+	answers = str_to_target_ids(question, strnlen(question, 128),
+			&nr_integers);
+	KUNIT_EXPECT_EQ(test, (ssize_t)3, nr_integers);
+	for (i = 0; i < nr_integers; i++)
+		KUNIT_EXPECT_EQ(test, expected[i], answers[i]);
+	kfree(answers);
+
+	question = "12 35 abc 46";
+	answers = str_to_target_ids(question, strnlen(question, 128),
+			&nr_integers);
+	KUNIT_EXPECT_EQ(test, (ssize_t)2, nr_integers);
+	for (i = 0; i < 2; i++)
+		KUNIT_EXPECT_EQ(test, expected[i], answers[i]);
+	kfree(answers);
+
+	question = "";
+	answers = str_to_target_ids(question, strnlen(question, 128),
+			&nr_integers);
+	KUNIT_EXPECT_EQ(test, (ssize_t)0, nr_integers);
+	kfree(answers);
+
+	question = "\n";
+	answers = str_to_target_ids(question, strnlen(question, 128),
+			&nr_integers);
+	KUNIT_EXPECT_EQ(test, (ssize_t)0, nr_integers);
+	kfree(answers);
+}
+
+static void damon_dbgfs_test_set_targets(struct kunit *test)
+{
+	struct damon_ctx *ctx = dbgfs_new_ctx();
+	unsigned long ids[] = {1, 2, 3};
+	char buf[64];
+
+	/* Make DAMON consider target id as plain number */
+	ctx->primitive.target_valid = NULL;
+	ctx->primitive.cleanup = NULL;
+
+	damon_set_targets(ctx, ids, 3);
+	sprint_target_ids(ctx, buf, 64);
+	KUNIT_EXPECT_STREQ(test, (char *)buf, "1 2 3\n");
+
+	damon_set_targets(ctx, NULL, 0);
+	sprint_target_ids(ctx, buf, 64);
+	KUNIT_EXPECT_STREQ(test, (char *)buf, "\n");
+
+	damon_set_targets(ctx, (unsigned long []){1, 2}, 2);
+	sprint_target_ids(ctx, buf, 64);
+	KUNIT_EXPECT_STREQ(test, (char *)buf, "1 2\n");
+
+	damon_set_targets(ctx, (unsigned long []){2}, 1);
+	sprint_target_ids(ctx, buf, 64);
+	KUNIT_EXPECT_STREQ(test, (char *)buf, "2\n");
+
+	damon_set_targets(ctx, NULL, 0);
+	sprint_target_ids(ctx, buf, 64);
+	KUNIT_EXPECT_STREQ(test, (char *)buf, "\n");
+
+	dbgfs_destroy_ctx(ctx);
+}
+
+static struct kunit_case damon_test_cases[] = {
+	KUNIT_CASE(damon_dbgfs_test_str_to_target_ids),
+	KUNIT_CASE(damon_dbgfs_test_set_targets),
+	{},
+};
+
+static struct kunit_suite damon_test_suite = {
+	.name = "damon-dbgfs",
+	.test_cases = damon_test_cases,
+};
+kunit_test_suite(damon_test_suite);
+
+#endif /* _DAMON_TEST_H */
+
+#endif	/* CONFIG_DAMON_KUNIT_TEST */
diff --git a/mm/damon/dbgfs.c b/mm/damon/dbgfs.c
new file mode 100644
index 000000000000..faee070977d8
--- /dev/null
+++ b/mm/damon/dbgfs.c
@@ -0,0 +1,623 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * DAMON Debugfs Interface
+ *
+ * Author: SeongJae Park <sjpark@amazon.de>
+ */
+
+#define pr_fmt(fmt) "damon-dbgfs: " fmt
+
+#include <linux/damon.h>
+#include <linux/debugfs.h>
+#include <linux/file.h>
+#include <linux/mm.h>
+#include <linux/module.h>
+#include <linux/page_idle.h>
+#include <linux/slab.h>
+
+static struct damon_ctx **dbgfs_ctxs;
+static int dbgfs_nr_ctxs;
+static struct dentry **dbgfs_dirs;
+static DEFINE_MUTEX(damon_dbgfs_lock);
+
+/*
+ * Returns non-empty string on success, negative error code otherwise.
+ */
+static char *user_input_str(const char __user *buf, size_t count, loff_t *ppos)
+{
+	char *kbuf;
+	ssize_t ret;
+
+	/* We do not accept continuous write */
+	if (*ppos)
+		return ERR_PTR(-EINVAL);
+
+	kbuf = kmalloc(count + 1, GFP_KERNEL);
+	if (!kbuf)
+		return ERR_PTR(-ENOMEM);
+
+	ret = simple_write_to_buffer(kbuf, count + 1, ppos, buf, count);
+	if (ret != count) {
+		kfree(kbuf);
+		return ERR_PTR(-EIO);
+	}
+	kbuf[ret] = '\0';
+
+	return kbuf;
+}
+
+static ssize_t dbgfs_attrs_read(struct file *file,
+		char __user *buf, size_t count, loff_t *ppos)
+{
+	struct damon_ctx *ctx = file->private_data;
+	char kbuf[128];
+	int ret;
+
+	mutex_lock(&ctx->kdamond_lock);
+	ret = scnprintf(kbuf, ARRAY_SIZE(kbuf), "%lu %lu %lu %lu %lu\n",
+			ctx->sample_interval, ctx->aggr_interval,
+			ctx->primitive_update_interval, ctx->min_nr_regions,
+			ctx->max_nr_regions);
+	mutex_unlock(&ctx->kdamond_lock);
+
+	return simple_read_from_buffer(buf, count, ppos, kbuf, ret);
+}
+
+static ssize_t dbgfs_attrs_write(struct file *file,
+		const char __user *buf, size_t count, loff_t *ppos)
+{
+	struct damon_ctx *ctx = file->private_data;
+	unsigned long s, a, r, minr, maxr;
+	char *kbuf;
+	ssize_t ret = count;
+	int err;
+
+	kbuf = user_input_str(buf, count, ppos);
+	if (IS_ERR(kbuf))
+		return PTR_ERR(kbuf);
+
+	if (sscanf(kbuf, "%lu %lu %lu %lu %lu",
+				&s, &a, &r, &minr, &maxr) != 5) {
+		ret = -EINVAL;
+		goto out;
+	}
+
+	mutex_lock(&ctx->kdamond_lock);
+	if (ctx->kdamond) {
+		ret = -EBUSY;
+		goto unlock_out;
+	}
+
+	err = damon_set_attrs(ctx, s, a, r, minr, maxr);
+	if (err)
+		ret = err;
+unlock_out:
+	mutex_unlock(&ctx->kdamond_lock);
+out:
+	kfree(kbuf);
+	return ret;
+}
+
+static inline bool targetid_is_pid(const struct damon_ctx *ctx)
+{
+	return ctx->primitive.target_valid == damon_va_target_valid;
+}
+
+static ssize_t sprint_target_ids(struct damon_ctx *ctx, char *buf, ssize_t len)
+{
+	struct damon_target *t;
+	unsigned long id;
+	int written = 0;
+	int rc;
+
+	damon_for_each_target(t, ctx) {
+		id = t->id;
+		if (targetid_is_pid(ctx))
+			/* Show pid numbers to debugfs users */
+			id = (unsigned long)pid_vnr((struct pid *)id);
+
+		rc = scnprintf(&buf[written], len - written, "%lu ", id);
+		if (!rc)
+			return -ENOMEM;
+		written += rc;
+	}
+	if (written)
+		written -= 1;
+	written += scnprintf(&buf[written], len - written, "\n");
+	return written;
+}
+
+static ssize_t dbgfs_target_ids_read(struct file *file,
+		char __user *buf, size_t count, loff_t *ppos)
+{
+	struct damon_ctx *ctx = file->private_data;
+	ssize_t len;
+	char ids_buf[320];
+
+	mutex_lock(&ctx->kdamond_lock);
+	len = sprint_target_ids(ctx, ids_buf, 320);
+	mutex_unlock(&ctx->kdamond_lock);
+	if (len < 0)
+		return len;
+
+	return simple_read_from_buffer(buf, count, ppos, ids_buf, len);
+}
+
+/*
+ * Converts a string into an array of unsigned long integers
+ *
+ * Returns an array of unsigned long integers if the conversion success, or
+ * NULL otherwise.
+ */
+static unsigned long *str_to_target_ids(const char *str, ssize_t len,
+					ssize_t *nr_ids)
+{
+	unsigned long *ids;
+	const int max_nr_ids = 32;
+	unsigned long id;
+	int pos = 0, parsed, ret;
+
+	*nr_ids = 0;
+	ids = kmalloc_array(max_nr_ids, sizeof(id), GFP_KERNEL);
+	if (!ids)
+		return NULL;
+	while (*nr_ids < max_nr_ids && pos < len) {
+		ret = sscanf(&str[pos], "%lu%n", &id, &parsed);
+		pos += parsed;
+		if (ret != 1)
+			break;
+		ids[*nr_ids] = id;
+		*nr_ids += 1;
+	}
+
+	return ids;
+}
+
+static void dbgfs_put_pids(unsigned long *ids, int nr_ids)
+{
+	int i;
+
+	for (i = 0; i < nr_ids; i++)
+		put_pid((struct pid *)ids[i]);
+}
+
+static ssize_t dbgfs_target_ids_write(struct file *file,
+		const char __user *buf, size_t count, loff_t *ppos)
+{
+	struct damon_ctx *ctx = file->private_data;
+	char *kbuf, *nrs;
+	unsigned long *targets;
+	ssize_t nr_targets;
+	ssize_t ret = count;
+	int i;
+	int err;
+
+	kbuf = user_input_str(buf, count, ppos);
+	if (IS_ERR(kbuf))
+		return PTR_ERR(kbuf);
+
+	nrs = kbuf;
+
+	targets = str_to_target_ids(nrs, ret, &nr_targets);
+	if (!targets) {
+		ret = -ENOMEM;
+		goto out;
+	}
+
+	if (targetid_is_pid(ctx)) {
+		for (i = 0; i < nr_targets; i++) {
+			targets[i] = (unsigned long)find_get_pid(
+					(int)targets[i]);
+			if (!targets[i]) {
+				dbgfs_put_pids(targets, i);
+				ret = -EINVAL;
+				goto free_targets_out;
+			}
+		}
+	}
+
+	mutex_lock(&ctx->kdamond_lock);
+	if (ctx->kdamond) {
+		if (targetid_is_pid(ctx))
+			dbgfs_put_pids(targets, nr_targets);
+		ret = -EBUSY;
+		goto unlock_out;
+	}
+
+	err = damon_set_targets(ctx, targets, nr_targets);
+	if (err) {
+		if (targetid_is_pid(ctx))
+			dbgfs_put_pids(targets, nr_targets);
+		ret = err;
+	}
+
+unlock_out:
+	mutex_unlock(&ctx->kdamond_lock);
+free_targets_out:
+	kfree(targets);
+out:
+	kfree(kbuf);
+	return ret;
+}
+
+static ssize_t dbgfs_kdamond_pid_read(struct file *file,
+		char __user *buf, size_t count, loff_t *ppos)
+{
+	struct damon_ctx *ctx = file->private_data;
+	char *kbuf;
+	ssize_t len;
+
+	kbuf = kmalloc(count, GFP_KERNEL);
+	if (!kbuf)
+		return -ENOMEM;
+
+	mutex_lock(&ctx->kdamond_lock);
+	if (ctx->kdamond)
+		len = scnprintf(kbuf, count, "%d\n", ctx->kdamond->pid);
+	else
+		len = scnprintf(kbuf, count, "none\n");
+	mutex_unlock(&ctx->kdamond_lock);
+	if (!len)
+		goto out;
+	len = simple_read_from_buffer(buf, count, ppos, kbuf, len);
+
+out:
+	kfree(kbuf);
+	return len;
+}
+
+static int damon_dbgfs_open(struct inode *inode, struct file *file)
+{
+	file->private_data = inode->i_private;
+
+	return nonseekable_open(inode, file);
+}
+
+static const struct file_operations attrs_fops = {
+	.open = damon_dbgfs_open,
+	.read = dbgfs_attrs_read,
+	.write = dbgfs_attrs_write,
+};
+
+static const struct file_operations target_ids_fops = {
+	.open = damon_dbgfs_open,
+	.read = dbgfs_target_ids_read,
+	.write = dbgfs_target_ids_write,
+};
+
+static const struct file_operations kdamond_pid_fops = {
+	.open = damon_dbgfs_open,
+	.read = dbgfs_kdamond_pid_read,
+};
+
+static void dbgfs_fill_ctx_dir(struct dentry *dir, struct damon_ctx *ctx)
+{
+	const char * const file_names[] = {"attrs", "target_ids",
+		"kdamond_pid"};
+	const struct file_operations *fops[] = {&attrs_fops, &target_ids_fops,
+		&kdamond_pid_fops};
+	int i;
+
+	for (i = 0; i < ARRAY_SIZE(file_names); i++)
+		debugfs_create_file(file_names[i], 0600, dir, ctx, fops[i]);
+}
+
+static int dbgfs_before_terminate(struct damon_ctx *ctx)
+{
+	struct damon_target *t, *next;
+
+	if (!targetid_is_pid(ctx))
+		return 0;
+
+	damon_for_each_target_safe(t, next, ctx) {
+		put_pid((struct pid *)t->id);
+		damon_destroy_target(t);
+	}
+	return 0;
+}
+
+static struct damon_ctx *dbgfs_new_ctx(void)
+{
+	struct damon_ctx *ctx;
+
+	ctx = damon_new_ctx();
+	if (!ctx)
+		return NULL;
+
+	damon_va_set_primitives(ctx);
+	ctx->callback.before_terminate = dbgfs_before_terminate;
+	return ctx;
+}
+
+static void dbgfs_destroy_ctx(struct damon_ctx *ctx)
+{
+	damon_destroy_ctx(ctx);
+}
+
+/*
+ * Make a context of @name and create a debugfs directory for it.
+ *
+ * This function should be called while holding damon_dbgfs_lock.
+ *
+ * Returns 0 on success, negative error code otherwise.
+ */
+static int dbgfs_mk_context(char *name)
+{
+	struct dentry *root, **new_dirs, *new_dir;
+	struct damon_ctx **new_ctxs, *new_ctx;
+
+	if (damon_nr_running_ctxs())
+		return -EBUSY;
+
+	new_ctxs = krealloc(dbgfs_ctxs, sizeof(*dbgfs_ctxs) *
+			(dbgfs_nr_ctxs + 1), GFP_KERNEL);
+	if (!new_ctxs)
+		return -ENOMEM;
+	dbgfs_ctxs = new_ctxs;
+
+	new_dirs = krealloc(dbgfs_dirs, sizeof(*dbgfs_dirs) *
+			(dbgfs_nr_ctxs + 1), GFP_KERNEL);
+	if (!new_dirs)
+		return -ENOMEM;
+	dbgfs_dirs = new_dirs;
+
+	root = dbgfs_dirs[0];
+	if (!root)
+		return -ENOENT;
+
+	new_dir = debugfs_create_dir(name, root);
+	dbgfs_dirs[dbgfs_nr_ctxs] = new_dir;
+
+	new_ctx = dbgfs_new_ctx();
+	if (!new_ctx) {
+		debugfs_remove(new_dir);
+		dbgfs_dirs[dbgfs_nr_ctxs] = NULL;
+		return -ENOMEM;
+	}
+
+	dbgfs_ctxs[dbgfs_nr_ctxs] = new_ctx;
+	dbgfs_fill_ctx_dir(dbgfs_dirs[dbgfs_nr_ctxs],
+			dbgfs_ctxs[dbgfs_nr_ctxs]);
+	dbgfs_nr_ctxs++;
+
+	return 0;
+}
+
+static ssize_t dbgfs_mk_context_write(struct file *file,
+		const char __user *buf, size_t count, loff_t *ppos)
+{
+	char *kbuf;
+	char *ctx_name;
+	ssize_t ret = count;
+	int err;
+
+	kbuf = user_input_str(buf, count, ppos);
+	if (IS_ERR(kbuf))
+		return PTR_ERR(kbuf);
+	ctx_name = kmalloc(count + 1, GFP_KERNEL);
+	if (!ctx_name) {
+		kfree(kbuf);
+		return -ENOMEM;
+	}
+
+	/* Trim white space */
+	if (sscanf(kbuf, "%s", ctx_name) != 1) {
+		ret = -EINVAL;
+		goto out;
+	}
+
+	mutex_lock(&damon_dbgfs_lock);
+	err = dbgfs_mk_context(ctx_name);
+	if (err)
+		ret = err;
+	mutex_unlock(&damon_dbgfs_lock);
+
+out:
+	kfree(kbuf);
+	kfree(ctx_name);
+	return ret;
+}
+
+/*
+ * Remove a context of @name and its debugfs directory.
+ *
+ * This function should be called while holding damon_dbgfs_lock.
+ *
+ * Return 0 on success, negative error code otherwise.
+ */
+static int dbgfs_rm_context(char *name)
+{
+	struct dentry *root, *dir, **new_dirs;
+	struct damon_ctx **new_ctxs;
+	int i, j;
+
+	if (damon_nr_running_ctxs())
+		return -EBUSY;
+
+	root = dbgfs_dirs[0];
+	if (!root)
+		return -ENOENT;
+
+	dir = debugfs_lookup(name, root);
+	if (!dir)
+		return -ENOENT;
+
+	new_dirs = kmalloc_array(dbgfs_nr_ctxs - 1, sizeof(*dbgfs_dirs),
+			GFP_KERNEL);
+	if (!new_dirs)
+		return -ENOMEM;
+
+	new_ctxs = kmalloc_array(dbgfs_nr_ctxs - 1, sizeof(*dbgfs_ctxs),
+			GFP_KERNEL);
+	if (!new_ctxs) {
+		kfree(new_dirs);
+		return -ENOMEM;
+	}
+
+	for (i = 0, j = 0; i < dbgfs_nr_ctxs; i++) {
+		if (dbgfs_dirs[i] == dir) {
+			debugfs_remove(dbgfs_dirs[i]);
+			dbgfs_destroy_ctx(dbgfs_ctxs[i]);
+			continue;
+		}
+		new_dirs[j] = dbgfs_dirs[i];
+		new_ctxs[j++] = dbgfs_ctxs[i];
+	}
+
+	kfree(dbgfs_dirs);
+	kfree(dbgfs_ctxs);
+
+	dbgfs_dirs = new_dirs;
+	dbgfs_ctxs = new_ctxs;
+	dbgfs_nr_ctxs--;
+
+	return 0;
+}
+
+static ssize_t dbgfs_rm_context_write(struct file *file,
+		const char __user *buf, size_t count, loff_t *ppos)
+{
+	char *kbuf;
+	ssize_t ret = count;
+	int err;
+	char *ctx_name;
+
+	kbuf = user_input_str(buf, count, ppos);
+	if (IS_ERR(kbuf))
+		return PTR_ERR(kbuf);
+	ctx_name = kmalloc(count + 1, GFP_KERNEL);
+	if (!ctx_name) {
+		kfree(kbuf);
+		return -ENOMEM;
+	}
+
+	/* Trim white space */
+	if (sscanf(kbuf, "%s", ctx_name) != 1) {
+		ret = -EINVAL;
+		goto out;
+	}
+
+	mutex_lock(&damon_dbgfs_lock);
+	err = dbgfs_rm_context(ctx_name);
+	if (err)
+		ret = err;
+	mutex_unlock(&damon_dbgfs_lock);
+
+out:
+	kfree(kbuf);
+	kfree(ctx_name);
+	return ret;
+}
+
+static ssize_t dbgfs_monitor_on_read(struct file *file,
+		char __user *buf, size_t count, loff_t *ppos)
+{
+	char monitor_on_buf[5];
+	bool monitor_on = damon_nr_running_ctxs() != 0;
+	int len;
+
+	len = scnprintf(monitor_on_buf, 5, monitor_on ? "on\n" : "off\n");
+
+	return simple_read_from_buffer(buf, count, ppos, monitor_on_buf, len);
+}
+
+static ssize_t dbgfs_monitor_on_write(struct file *file,
+		const char __user *buf, size_t count, loff_t *ppos)
+{
+	ssize_t ret = count;
+	char *kbuf;
+	int err;
+
+	kbuf = user_input_str(buf, count, ppos);
+	if (IS_ERR(kbuf))
+		return PTR_ERR(kbuf);
+
+	/* Remove white space */
+	if (sscanf(kbuf, "%s", kbuf) != 1) {
+		kfree(kbuf);
+		return -EINVAL;
+	}
+
+	if (!strncmp(kbuf, "on", count))
+		err = damon_start(dbgfs_ctxs, dbgfs_nr_ctxs);
+	else if (!strncmp(kbuf, "off", count))
+		err = damon_stop(dbgfs_ctxs, dbgfs_nr_ctxs);
+	else
+		err = -EINVAL;
+
+	if (err)
+		ret = err;
+	kfree(kbuf);
+	return ret;
+}
+
+static const struct file_operations mk_contexts_fops = {
+	.write = dbgfs_mk_context_write,
+};
+
+static const struct file_operations rm_contexts_fops = {
+	.write = dbgfs_rm_context_write,
+};
+
+static const struct file_operations monitor_on_fops = {
+	.read = dbgfs_monitor_on_read,
+	.write = dbgfs_monitor_on_write,
+};
+
+static int __init __damon_dbgfs_init(void)
+{
+	struct dentry *dbgfs_root;
+	const char * const file_names[] = {"mk_contexts", "rm_contexts",
+		"monitor_on"};
+	const struct file_operations *fops[] = {&mk_contexts_fops,
+		&rm_contexts_fops, &monitor_on_fops};
+	int i;
+
+	dbgfs_root = debugfs_create_dir("damon", NULL);
+
+	for (i = 0; i < ARRAY_SIZE(file_names); i++)
+		debugfs_create_file(file_names[i], 0600, dbgfs_root, NULL,
+				fops[i]);
+	dbgfs_fill_ctx_dir(dbgfs_root, dbgfs_ctxs[0]);
+
+	dbgfs_dirs = kmalloc_array(1, sizeof(dbgfs_root), GFP_KERNEL);
+	if (!dbgfs_dirs) {
+		debugfs_remove(dbgfs_root);
+		return -ENOMEM;
+	}
+	dbgfs_dirs[0] = dbgfs_root;
+
+	return 0;
+}
+
+/*
+ * Functions for the initialization
+ */
+
+static int __init damon_dbgfs_init(void)
+{
+	int rc;
+
+	dbgfs_ctxs = kmalloc(sizeof(*dbgfs_ctxs), GFP_KERNEL);
+	if (!dbgfs_ctxs)
+		return -ENOMEM;
+	dbgfs_ctxs[0] = dbgfs_new_ctx();
+	if (!dbgfs_ctxs[0]) {
+		kfree(dbgfs_ctxs);
+		return -ENOMEM;
+	}
+	dbgfs_nr_ctxs = 1;
+
+	rc = __damon_dbgfs_init();
+	if (rc) {
+		kfree(dbgfs_ctxs[0]);
+		kfree(dbgfs_ctxs);
+		pr_err("%s: dbgfs init failed\n", __func__);
+	}
+
+	return rc;
+}
+
+module_init(damon_dbgfs_init);
+
+#include "dbgfs-test.h"
diff --git a/mm/damon/vaddr-test.h b/mm/damon/vaddr-test.h
new file mode 100644
index 000000000000..1f5c13257dba
--- /dev/null
+++ b/mm/damon/vaddr-test.h
@@ -0,0 +1,329 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/*
+ * Data Access Monitor Unit Tests
+ *
+ * Copyright 2019 Amazon.com, Inc. or its affiliates.  All rights reserved.
+ *
+ * Author: SeongJae Park <sjpark@amazon.de>
+ */
+
+#ifdef CONFIG_DAMON_VADDR_KUNIT_TEST
+
+#ifndef _DAMON_VADDR_TEST_H
+#define _DAMON_VADDR_TEST_H
+
+#include <kunit/test.h>
+
+static void __link_vmas(struct vm_area_struct *vmas, ssize_t nr_vmas)
+{
+	int i, j;
+	unsigned long largest_gap, gap;
+
+	if (!nr_vmas)
+		return;
+
+	for (i = 0; i < nr_vmas - 1; i++) {
+		vmas[i].vm_next = &vmas[i + 1];
+
+		vmas[i].vm_rb.rb_left = NULL;
+		vmas[i].vm_rb.rb_right = &vmas[i + 1].vm_rb;
+
+		largest_gap = 0;
+		for (j = i; j < nr_vmas; j++) {
+			if (j == 0)
+				continue;
+			gap = vmas[j].vm_start - vmas[j - 1].vm_end;
+			if (gap > largest_gap)
+				largest_gap = gap;
+		}
+		vmas[i].rb_subtree_gap = largest_gap;
+	}
+	vmas[i].vm_next = NULL;
+	vmas[i].vm_rb.rb_right = NULL;
+	vmas[i].rb_subtree_gap = 0;
+}
+
+/*
+ * Test __damon_va_three_regions() function
+ *
+ * In case of virtual memory address spaces monitoring, DAMON converts the
+ * complex and dynamic memory mappings of each target task to three
+ * discontiguous regions which cover every mapped areas.  However, the three
+ * regions should not include the two biggest unmapped areas in the original
+ * mapping, because the two biggest areas are normally the areas between 1)
+ * heap and the mmap()-ed regions, and 2) the mmap()-ed regions and stack.
+ * Because these two unmapped areas are very huge but obviously never accessed,
+ * covering the region is just a waste.
+ *
+ * '__damon_va_three_regions() receives an address space of a process.  It
+ * first identifies the start of mappings, end of mappings, and the two biggest
+ * unmapped areas.  After that, based on the information, it constructs the
+ * three regions and returns.  For more detail, refer to the comment of
+ * 'damon_init_regions_of()' function definition in 'mm/damon.c' file.
+ *
+ * For example, suppose virtual address ranges of 10-20, 20-25, 200-210,
+ * 210-220, 300-305, and 307-330 (Other comments represent this mappings in
+ * more short form: 10-20-25, 200-210-220, 300-305, 307-330) of a process are
+ * mapped.  To cover every mappings, the three regions should start with 10,
+ * and end with 305.  The process also has three unmapped areas, 25-200,
+ * 220-300, and 305-307.  Among those, 25-200 and 220-300 are the biggest two
+ * unmapped areas, and thus it should be converted to three regions of 10-25,
+ * 200-220, and 300-330.
+ */
+static void damon_test_three_regions_in_vmas(struct kunit *test)
+{
+	struct damon_addr_range regions[3] = {0,};
+	/* 10-20-25, 200-210-220, 300-305, 307-330 */
+	struct vm_area_struct vmas[] = {
+		(struct vm_area_struct) {.vm_start = 10, .vm_end = 20},
+		(struct vm_area_struct) {.vm_start = 20, .vm_end = 25},
+		(struct vm_area_struct) {.vm_start = 200, .vm_end = 210},
+		(struct vm_area_struct) {.vm_start = 210, .vm_end = 220},
+		(struct vm_area_struct) {.vm_start = 300, .vm_end = 305},
+		(struct vm_area_struct) {.vm_start = 307, .vm_end = 330},
+	};
+
+	__link_vmas(vmas, 6);
+
+	__damon_va_three_regions(&vmas[0], regions);
+
+	KUNIT_EXPECT_EQ(test, 10ul, regions[0].start);
+	KUNIT_EXPECT_EQ(test, 25ul, regions[0].end);
+	KUNIT_EXPECT_EQ(test, 200ul, regions[1].start);
+	KUNIT_EXPECT_EQ(test, 220ul, regions[1].end);
+	KUNIT_EXPECT_EQ(test, 300ul, regions[2].start);
+	KUNIT_EXPECT_EQ(test, 330ul, regions[2].end);
+}
+
+static struct damon_region *__nth_region_of(struct damon_target *t, int idx)
+{
+	struct damon_region *r;
+	unsigned int i = 0;
+
+	damon_for_each_region(r, t) {
+		if (i++ == idx)
+			return r;
+	}
+
+	return NULL;
+}
+
+/*
+ * Test 'damon_va_apply_three_regions()'
+ *
+ * test			kunit object
+ * regions		an array containing start/end addresses of current
+ *			monitoring target regions
+ * nr_regions		the number of the addresses in 'regions'
+ * three_regions	The three regions that need to be applied now
+ * expected		start/end addresses of monitoring target regions that
+ *			'three_regions' are applied
+ * nr_expected		the number of addresses in 'expected'
+ *
+ * The memory mapping of the target processes changes dynamically.  To follow
+ * the change, DAMON periodically reads the mappings, simplifies it to the
+ * three regions, and updates the monitoring target regions to fit in the three
+ * regions.  The update of current target regions is the role of
+ * 'damon_va_apply_three_regions()'.
+ *
+ * This test passes the given target regions and the new three regions that
+ * need to be applied to the function and check whether it updates the regions
+ * as expected.
+ */
+static void damon_do_test_apply_three_regions(struct kunit *test,
+				unsigned long *regions, int nr_regions,
+				struct damon_addr_range *three_regions,
+				unsigned long *expected, int nr_expected)
+{
+	struct damon_ctx *ctx = damon_new_ctx();
+	struct damon_target *t;
+	struct damon_region *r;
+	int i;
+
+	t = damon_new_target(42);
+	for (i = 0; i < nr_regions / 2; i++) {
+		r = damon_new_region(regions[i * 2], regions[i * 2 + 1]);
+		damon_add_region(r, t);
+	}
+	damon_add_target(ctx, t);
+
+	damon_va_apply_three_regions(t, three_regions);
+
+	for (i = 0; i < nr_expected / 2; i++) {
+		r = __nth_region_of(t, i);
+		KUNIT_EXPECT_EQ(test, r->ar.start, expected[i * 2]);
+		KUNIT_EXPECT_EQ(test, r->ar.end, expected[i * 2 + 1]);
+	}
+
+	damon_destroy_ctx(ctx);
+}
+
+/*
+ * This function test most common case where the three big regions are only
+ * slightly changed.  Target regions should adjust their boundary (10-20-30,
+ * 50-55, 70-80, 90-100) to fit with the new big regions or remove target
+ * regions (57-79) that now out of the three regions.
+ */
+static void damon_test_apply_three_regions1(struct kunit *test)
+{
+	/* 10-20-30, 50-55-57-59, 70-80-90-100 */
+	unsigned long regions[] = {10, 20, 20, 30, 50, 55, 55, 57, 57, 59,
+				70, 80, 80, 90, 90, 100};
+	/* 5-27, 45-55, 73-104 */
+	struct damon_addr_range new_three_regions[3] = {
+		(struct damon_addr_range){.start = 5, .end = 27},
+		(struct damon_addr_range){.start = 45, .end = 55},
+		(struct damon_addr_range){.start = 73, .end = 104} };
+	/* 5-20-27, 45-55, 73-80-90-104 */
+	unsigned long expected[] = {5, 20, 20, 27, 45, 55,
+				73, 80, 80, 90, 90, 104};
+
+	damon_do_test_apply_three_regions(test, regions, ARRAY_SIZE(regions),
+			new_three_regions, expected, ARRAY_SIZE(expected));
+}
+
+/*
+ * Test slightly bigger change.  Similar to above, but the second big region
+ * now require two target regions (50-55, 57-59) to be removed.
+ */
+static void damon_test_apply_three_regions2(struct kunit *test)
+{
+	/* 10-20-30, 50-55-57-59, 70-80-90-100 */
+	unsigned long regions[] = {10, 20, 20, 30, 50, 55, 55, 57, 57, 59,
+				70, 80, 80, 90, 90, 100};
+	/* 5-27, 56-57, 65-104 */
+	struct damon_addr_range new_three_regions[3] = {
+		(struct damon_addr_range){.start = 5, .end = 27},
+		(struct damon_addr_range){.start = 56, .end = 57},
+		(struct damon_addr_range){.start = 65, .end = 104} };
+	/* 5-20-27, 56-57, 65-80-90-104 */
+	unsigned long expected[] = {5, 20, 20, 27, 56, 57,
+				65, 80, 80, 90, 90, 104};
+
+	damon_do_test_apply_three_regions(test, regions, ARRAY_SIZE(regions),
+			new_three_regions, expected, ARRAY_SIZE(expected));
+}
+
+/*
+ * Test a big change.  The second big region has totally freed and mapped to
+ * different area (50-59 -> 61-63).  The target regions which were in the old
+ * second big region (50-55-57-59) should be removed and new target region
+ * covering the second big region (61-63) should be created.
+ */
+static void damon_test_apply_three_regions3(struct kunit *test)
+{
+	/* 10-20-30, 50-55-57-59, 70-80-90-100 */
+	unsigned long regions[] = {10, 20, 20, 30, 50, 55, 55, 57, 57, 59,
+				70, 80, 80, 90, 90, 100};
+	/* 5-27, 61-63, 65-104 */
+	struct damon_addr_range new_three_regions[3] = {
+		(struct damon_addr_range){.start = 5, .end = 27},
+		(struct damon_addr_range){.start = 61, .end = 63},
+		(struct damon_addr_range){.start = 65, .end = 104} };
+	/* 5-20-27, 61-63, 65-80-90-104 */
+	unsigned long expected[] = {5, 20, 20, 27, 61, 63,
+				65, 80, 80, 90, 90, 104};
+
+	damon_do_test_apply_three_regions(test, regions, ARRAY_SIZE(regions),
+			new_three_regions, expected, ARRAY_SIZE(expected));
+}
+
+/*
+ * Test another big change.  Both of the second and third big regions (50-59
+ * and 70-100) has totally freed and mapped to different area (30-32 and
+ * 65-68).  The target regions which were in the old second and third big
+ * regions should now be removed and new target regions covering the new second
+ * and third big regions should be crated.
+ */
+static void damon_test_apply_three_regions4(struct kunit *test)
+{
+	/* 10-20-30, 50-55-57-59, 70-80-90-100 */
+	unsigned long regions[] = {10, 20, 20, 30, 50, 55, 55, 57, 57, 59,
+				70, 80, 80, 90, 90, 100};
+	/* 5-7, 30-32, 65-68 */
+	struct damon_addr_range new_three_regions[3] = {
+		(struct damon_addr_range){.start = 5, .end = 7},
+		(struct damon_addr_range){.start = 30, .end = 32},
+		(struct damon_addr_range){.start = 65, .end = 68} };
+	/* expect 5-7, 30-32, 65-68 */
+	unsigned long expected[] = {5, 7, 30, 32, 65, 68};
+
+	damon_do_test_apply_three_regions(test, regions, ARRAY_SIZE(regions),
+			new_three_regions, expected, ARRAY_SIZE(expected));
+}
+
+static void damon_test_split_evenly(struct kunit *test)
+{
+	struct damon_ctx *c = damon_new_ctx();
+	struct damon_target *t;
+	struct damon_region *r;
+	unsigned long i;
+
+	KUNIT_EXPECT_EQ(test, damon_va_evenly_split_region(NULL, NULL, 5),
+			-EINVAL);
+
+	t = damon_new_target(42);
+	r = damon_new_region(0, 100);
+	KUNIT_EXPECT_EQ(test, damon_va_evenly_split_region(t, r, 0), -EINVAL);
+
+	damon_add_region(r, t);
+	KUNIT_EXPECT_EQ(test, damon_va_evenly_split_region(t, r, 10), 0);
+	KUNIT_EXPECT_EQ(test, damon_nr_regions(t), 10u);
+
+	i = 0;
+	damon_for_each_region(r, t) {
+		KUNIT_EXPECT_EQ(test, r->ar.start, i++ * 10);
+		KUNIT_EXPECT_EQ(test, r->ar.end, i * 10);
+	}
+	damon_free_target(t);
+
+	t = damon_new_target(42);
+	r = damon_new_region(5, 59);
+	damon_add_region(r, t);
+	KUNIT_EXPECT_EQ(test, damon_va_evenly_split_region(t, r, 5), 0);
+	KUNIT_EXPECT_EQ(test, damon_nr_regions(t), 5u);
+
+	i = 0;
+	damon_for_each_region(r, t) {
+		if (i == 4)
+			break;
+		KUNIT_EXPECT_EQ(test, r->ar.start, 5 + 10 * i++);
+		KUNIT_EXPECT_EQ(test, r->ar.end, 5 + 10 * i);
+	}
+	KUNIT_EXPECT_EQ(test, r->ar.start, 5 + 10 * i);
+	KUNIT_EXPECT_EQ(test, r->ar.end, 59ul);
+	damon_free_target(t);
+
+	t = damon_new_target(42);
+	r = damon_new_region(5, 6);
+	damon_add_region(r, t);
+	KUNIT_EXPECT_EQ(test, damon_va_evenly_split_region(t, r, 2), -EINVAL);
+	KUNIT_EXPECT_EQ(test, damon_nr_regions(t), 1u);
+
+	damon_for_each_region(r, t) {
+		KUNIT_EXPECT_EQ(test, r->ar.start, 5ul);
+		KUNIT_EXPECT_EQ(test, r->ar.end, 6ul);
+	}
+	damon_free_target(t);
+	damon_destroy_ctx(c);
+}
+
+static struct kunit_case damon_test_cases[] = {
+	KUNIT_CASE(damon_test_three_regions_in_vmas),
+	KUNIT_CASE(damon_test_apply_three_regions1),
+	KUNIT_CASE(damon_test_apply_three_regions2),
+	KUNIT_CASE(damon_test_apply_three_regions3),
+	KUNIT_CASE(damon_test_apply_three_regions4),
+	KUNIT_CASE(damon_test_split_evenly),
+	{},
+};
+
+static struct kunit_suite damon_test_suite = {
+	.name = "damon-primitives",
+	.test_cases = damon_test_cases,
+};
+kunit_test_suite(damon_test_suite);
+
+#endif /* _DAMON_VADDR_TEST_H */
+
+#endif	/* CONFIG_DAMON_VADDR_KUNIT_TEST */
diff --git a/mm/damon/vaddr.c b/mm/damon/vaddr.c
new file mode 100644
index 000000000000..58c1fb2aafa9
--- /dev/null
+++ b/mm/damon/vaddr.c
@@ -0,0 +1,672 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * DAMON Primitives for Virtual Address Spaces
+ *
+ * Author: SeongJae Park <sjpark@amazon.de>
+ */
+
+#define pr_fmt(fmt) "damon-va: " fmt
+
+#include <linux/damon.h>
+#include <linux/hugetlb.h>
+#include <linux/mm.h>
+#include <linux/mmu_notifier.h>
+#include <linux/highmem.h>
+#include <linux/page_idle.h>
+#include <linux/pagewalk.h>
+#include <linux/random.h>
+#include <linux/sched/mm.h>
+#include <linux/slab.h>
+
+#ifdef CONFIG_DAMON_VADDR_KUNIT_TEST
+#undef DAMON_MIN_REGION
+#define DAMON_MIN_REGION 1
+#endif
+
+/* Get a random number in [l, r) */
+#define damon_rand(l, r) (l + prandom_u32_max(r - l))
+
+/*
+ * 't->id' should be the pointer to the relevant 'struct pid' having reference
+ * count.  Caller must put the returned task, unless it is NULL.
+ */
+#define damon_get_task_struct(t) \
+	(get_pid_task((struct pid *)t->id, PIDTYPE_PID))
+
+/*
+ * Get the mm_struct of the given target
+ *
+ * Caller _must_ put the mm_struct after use, unless it is NULL.
+ *
+ * Returns the mm_struct of the target on success, NULL on failure
+ */
+static struct mm_struct *damon_get_mm(struct damon_target *t)
+{
+	struct task_struct *task;
+	struct mm_struct *mm;
+
+	task = damon_get_task_struct(t);
+	if (!task)
+		return NULL;
+
+	mm = get_task_mm(task);
+	put_task_struct(task);
+	return mm;
+}
+
+/*
+ * Functions for the initial monitoring target regions construction
+ */
+
+/*
+ * Size-evenly split a region into 'nr_pieces' small regions
+ *
+ * Returns 0 on success, or negative error code otherwise.
+ */
+static int damon_va_evenly_split_region(struct damon_target *t,
+		struct damon_region *r, unsigned int nr_pieces)
+{
+	unsigned long sz_orig, sz_piece, orig_end;
+	struct damon_region *n = NULL, *next;
+	unsigned long start;
+
+	if (!r || !nr_pieces)
+		return -EINVAL;
+
+	orig_end = r->ar.end;
+	sz_orig = r->ar.end - r->ar.start;
+	sz_piece = ALIGN_DOWN(sz_orig / nr_pieces, DAMON_MIN_REGION);
+
+	if (!sz_piece)
+		return -EINVAL;
+
+	r->ar.end = r->ar.start + sz_piece;
+	next = damon_next_region(r);
+	for (start = r->ar.end; start + sz_piece <= orig_end;
+			start += sz_piece) {
+		n = damon_new_region(start, start + sz_piece);
+		if (!n)
+			return -ENOMEM;
+		damon_insert_region(n, r, next, t);
+		r = n;
+	}
+	/* complement last region for possible rounding error */
+	if (n)
+		n->ar.end = orig_end;
+
+	return 0;
+}
+
+static unsigned long sz_range(struct damon_addr_range *r)
+{
+	return r->end - r->start;
+}
+
+static void swap_ranges(struct damon_addr_range *r1,
+			struct damon_addr_range *r2)
+{
+	struct damon_addr_range tmp;
+
+	tmp = *r1;
+	*r1 = *r2;
+	*r2 = tmp;
+}
+
+/*
+ * Find three regions separated by two biggest unmapped regions
+ *
+ * vma		the head vma of the target address space
+ * regions	an array of three address ranges that results will be saved
+ *
+ * This function receives an address space and finds three regions in it which
+ * separated by the two biggest unmapped regions in the space.  Please refer to
+ * below comments of '__damon_va_init_regions()' function to know why this is
+ * necessary.
+ *
+ * Returns 0 if success, or negative error code otherwise.
+ */
+static int __damon_va_three_regions(struct vm_area_struct *vma,
+				       struct damon_addr_range regions[3])
+{
+	struct damon_addr_range gap = {0}, first_gap = {0}, second_gap = {0};
+	struct vm_area_struct *last_vma = NULL;
+	unsigned long start = 0;
+	struct rb_root rbroot;
+
+	/* Find two biggest gaps so that first_gap > second_gap > others */
+	for (; vma; vma = vma->vm_next) {
+		if (!last_vma) {
+			start = vma->vm_start;
+			goto next;
+		}
+
+		if (vma->rb_subtree_gap <= sz_range(&second_gap)) {
+			rbroot.rb_node = &vma->vm_rb;
+			vma = rb_entry(rb_last(&rbroot),
+					struct vm_area_struct, vm_rb);
+			goto next;
+		}
+
+		gap.start = last_vma->vm_end;
+		gap.end = vma->vm_start;
+		if (sz_range(&gap) > sz_range(&second_gap)) {
+			swap_ranges(&gap, &second_gap);
+			if (sz_range(&second_gap) > sz_range(&first_gap))
+				swap_ranges(&second_gap, &first_gap);
+		}
+next:
+		last_vma = vma;
+	}
+
+	if (!sz_range(&second_gap) || !sz_range(&first_gap))
+		return -EINVAL;
+
+	/* Sort the two biggest gaps by address */
+	if (first_gap.start > second_gap.start)
+		swap_ranges(&first_gap, &second_gap);
+
+	/* Store the result */
+	regions[0].start = ALIGN(start, DAMON_MIN_REGION);
+	regions[0].end = ALIGN(first_gap.start, DAMON_MIN_REGION);
+	regions[1].start = ALIGN(first_gap.end, DAMON_MIN_REGION);
+	regions[1].end = ALIGN(second_gap.start, DAMON_MIN_REGION);
+	regions[2].start = ALIGN(second_gap.end, DAMON_MIN_REGION);
+	regions[2].end = ALIGN(last_vma->vm_end, DAMON_MIN_REGION);
+
+	return 0;
+}
+
+/*
+ * Get the three regions in the given target (task)
+ *
+ * Returns 0 on success, negative error code otherwise.
+ */
+static int damon_va_three_regions(struct damon_target *t,
+				struct damon_addr_range regions[3])
+{
+	struct mm_struct *mm;
+	int rc;
+
+	mm = damon_get_mm(t);
+	if (!mm)
+		return -EINVAL;
+
+	mmap_read_lock(mm);
+	rc = __damon_va_three_regions(mm->mmap, regions);
+	mmap_read_unlock(mm);
+
+	mmput(mm);
+	return rc;
+}
+
+/*
+ * Initialize the monitoring target regions for the given target (task)
+ *
+ * t	the given target
+ *
+ * Because only a number of small portions of the entire address space
+ * is actually mapped to the memory and accessed, monitoring the unmapped
+ * regions is wasteful.  That said, because we can deal with small noises,
+ * tracking every mapping is not strictly required but could even incur a high
+ * overhead if the mapping frequently changes or the number of mappings is
+ * high.  The adaptive regions adjustment mechanism will further help to deal
+ * with the noise by simply identifying the unmapped areas as a region that
+ * has no access.  Moreover, applying the real mappings that would have many
+ * unmapped areas inside will make the adaptive mechanism quite complex.  That
+ * said, too huge unmapped areas inside the monitoring target should be removed
+ * to not take the time for the adaptive mechanism.
+ *
+ * For the reason, we convert the complex mappings to three distinct regions
+ * that cover every mapped area of the address space.  Also the two gaps
+ * between the three regions are the two biggest unmapped areas in the given
+ * address space.  In detail, this function first identifies the start and the
+ * end of the mappings and the two biggest unmapped areas of the address space.
+ * Then, it constructs the three regions as below:
+ *
+ *     [mappings[0]->start, big_two_unmapped_areas[0]->start)
+ *     [big_two_unmapped_areas[0]->end, big_two_unmapped_areas[1]->start)
+ *     [big_two_unmapped_areas[1]->end, mappings[nr_mappings - 1]->end)
+ *
+ * As usual memory map of processes is as below, the gap between the heap and
+ * the uppermost mmap()-ed region, and the gap between the lowermost mmap()-ed
+ * region and the stack will be two biggest unmapped regions.  Because these
+ * gaps are exceptionally huge areas in usual address space, excluding these
+ * two biggest unmapped regions will be sufficient to make a trade-off.
+ *
+ *   <heap>
+ *   <BIG UNMAPPED REGION 1>
+ *   <uppermost mmap()-ed region>
+ *   (other mmap()-ed regions and small unmapped regions)
+ *   <lowermost mmap()-ed region>
+ *   <BIG UNMAPPED REGION 2>
+ *   <stack>
+ */
+static void __damon_va_init_regions(struct damon_ctx *ctx,
+				     struct damon_target *t)
+{
+	struct damon_region *r;
+	struct damon_addr_range regions[3];
+	unsigned long sz = 0, nr_pieces;
+	int i;
+
+	if (damon_va_three_regions(t, regions)) {
+		pr_err("Failed to get three regions of target %lu\n", t->id);
+		return;
+	}
+
+	for (i = 0; i < 3; i++)
+		sz += regions[i].end - regions[i].start;
+	if (ctx->min_nr_regions)
+		sz /= ctx->min_nr_regions;
+	if (sz < DAMON_MIN_REGION)
+		sz = DAMON_MIN_REGION;
+
+	/* Set the initial three regions of the target */
+	for (i = 0; i < 3; i++) {
+		r = damon_new_region(regions[i].start, regions[i].end);
+		if (!r) {
+			pr_err("%d'th init region creation failed\n", i);
+			return;
+		}
+		damon_add_region(r, t);
+
+		nr_pieces = (regions[i].end - regions[i].start) / sz;
+		damon_va_evenly_split_region(t, r, nr_pieces);
+	}
+}
+
+/* Initialize '->regions_list' of every target (task) */
+void damon_va_init(struct damon_ctx *ctx)
+{
+	struct damon_target *t;
+
+	damon_for_each_target(t, ctx) {
+		/* the user may set the target regions as they want */
+		if (!damon_nr_regions(t))
+			__damon_va_init_regions(ctx, t);
+	}
+}
+
+/*
+ * Functions for the dynamic monitoring target regions update
+ */
+
+/*
+ * Check whether a region is intersecting an address range
+ *
+ * Returns true if it is.
+ */
+static bool damon_intersect(struct damon_region *r, struct damon_addr_range *re)
+{
+	return !(r->ar.end <= re->start || re->end <= r->ar.start);
+}
+
+/*
+ * Update damon regions for the three big regions of the given target
+ *
+ * t		the given target
+ * bregions	the three big regions of the target
+ */
+static void damon_va_apply_three_regions(struct damon_target *t,
+		struct damon_addr_range bregions[3])
+{
+	struct damon_region *r, *next;
+	unsigned int i = 0;
+
+	/* Remove regions which are not in the three big regions now */
+	damon_for_each_region_safe(r, next, t) {
+		for (i = 0; i < 3; i++) {
+			if (damon_intersect(r, &bregions[i]))
+				break;
+		}
+		if (i == 3)
+			damon_destroy_region(r, t);
+	}
+
+	/* Adjust intersecting regions to fit with the three big regions */
+	for (i = 0; i < 3; i++) {
+		struct damon_region *first = NULL, *last;
+		struct damon_region *newr;
+		struct damon_addr_range *br;
+
+		br = &bregions[i];
+		/* Get the first and last regions which intersects with br */
+		damon_for_each_region(r, t) {
+			if (damon_intersect(r, br)) {
+				if (!first)
+					first = r;
+				last = r;
+			}
+			if (r->ar.start >= br->end)
+				break;
+		}
+		if (!first) {
+			/* no damon_region intersects with this big region */
+			newr = damon_new_region(
+					ALIGN_DOWN(br->start,
+						DAMON_MIN_REGION),
+					ALIGN(br->end, DAMON_MIN_REGION));
+			if (!newr)
+				continue;
+			damon_insert_region(newr, damon_prev_region(r), r, t);
+		} else {
+			first->ar.start = ALIGN_DOWN(br->start,
+					DAMON_MIN_REGION);
+			last->ar.end = ALIGN(br->end, DAMON_MIN_REGION);
+		}
+	}
+}
+
+/*
+ * Update regions for current memory mappings
+ */
+void damon_va_update(struct damon_ctx *ctx)
+{
+	struct damon_addr_range three_regions[3];
+	struct damon_target *t;
+
+	damon_for_each_target(t, ctx) {
+		if (damon_va_three_regions(t, three_regions))
+			continue;
+		damon_va_apply_three_regions(t, three_regions);
+	}
+}
+
+/*
+ * Get an online page for a pfn if it's in the LRU list.  Otherwise, returns
+ * NULL.
+ *
+ * The body of this function is stolen from the 'page_idle_get_page()'.  We
+ * steal rather than reuse it because the code is quite simple.
+ */
+static struct page *damon_get_page(unsigned long pfn)
+{
+	struct page *page = pfn_to_online_page(pfn);
+
+	if (!page || !PageLRU(page) || !get_page_unless_zero(page))
+		return NULL;
+
+	if (unlikely(!PageLRU(page))) {
+		put_page(page);
+		page = NULL;
+	}
+	return page;
+}
+
+static void damon_ptep_mkold(pte_t *pte, struct mm_struct *mm,
+			     unsigned long addr)
+{
+	bool referenced = false;
+	struct page *page = damon_get_page(pte_pfn(*pte));
+
+	if (!page)
+		return;
+
+	if (pte_young(*pte)) {
+		referenced = true;
+		*pte = pte_mkold(*pte);
+	}
+
+#ifdef CONFIG_MMU_NOTIFIER
+	if (mmu_notifier_clear_young(mm, addr, addr + PAGE_SIZE))
+		referenced = true;
+#endif /* CONFIG_MMU_NOTIFIER */
+
+	if (referenced)
+		set_page_young(page);
+
+	set_page_idle(page);
+	put_page(page);
+}
+
+static void damon_pmdp_mkold(pmd_t *pmd, struct mm_struct *mm,
+			     unsigned long addr)
+{
+#ifdef CONFIG_TRANSPARENT_HUGEPAGE
+	bool referenced = false;
+	struct page *page = damon_get_page(pmd_pfn(*pmd));
+
+	if (!page)
+		return;
+
+	if (pmd_young(*pmd)) {
+		referenced = true;
+		*pmd = pmd_mkold(*pmd);
+	}
+
+#ifdef CONFIG_MMU_NOTIFIER
+	if (mmu_notifier_clear_young(mm, addr,
+				addr + ((1UL) << HPAGE_PMD_SHIFT)))
+		referenced = true;
+#endif /* CONFIG_MMU_NOTIFIER */
+
+	if (referenced)
+		set_page_young(page);
+
+	set_page_idle(page);
+	put_page(page);
+#endif /* CONFIG_TRANSPARENT_HUGEPAGE */
+}
+
+static int damon_mkold_pmd_entry(pmd_t *pmd, unsigned long addr,
+		unsigned long next, struct mm_walk *walk)
+{
+	pte_t *pte;
+	spinlock_t *ptl;
+
+	if (pmd_huge(*pmd)) {
+		ptl = pmd_lock(walk->mm, pmd);
+		if (pmd_huge(*pmd)) {
+			damon_pmdp_mkold(pmd, walk->mm, addr);
+			spin_unlock(ptl);
+			return 0;
+		}
+		spin_unlock(ptl);
+	}
+
+	if (pmd_none(*pmd) || unlikely(pmd_bad(*pmd)))
+		return 0;
+	pte = pte_offset_map_lock(walk->mm, pmd, addr, &ptl);
+	if (!pte_present(*pte))
+		goto out;
+	damon_ptep_mkold(pte, walk->mm, addr);
+out:
+	pte_unmap_unlock(pte, ptl);
+	return 0;
+}
+
+static struct mm_walk_ops damon_mkold_ops = {
+	.pmd_entry = damon_mkold_pmd_entry,
+};
+
+static void damon_va_mkold(struct mm_struct *mm, unsigned long addr)
+{
+	mmap_read_lock(mm);
+	walk_page_range(mm, addr, addr + 1, &damon_mkold_ops, NULL);
+	mmap_read_unlock(mm);
+}
+
+/*
+ * Functions for the access checking of the regions
+ */
+
+static void damon_va_prepare_access_check(struct damon_ctx *ctx,
+			struct mm_struct *mm, struct damon_region *r)
+{
+	r->sampling_addr = damon_rand(r->ar.start, r->ar.end);
+
+	damon_va_mkold(mm, r->sampling_addr);
+}
+
+void damon_va_prepare_access_checks(struct damon_ctx *ctx)
+{
+	struct damon_target *t;
+	struct mm_struct *mm;
+	struct damon_region *r;
+
+	damon_for_each_target(t, ctx) {
+		mm = damon_get_mm(t);
+		if (!mm)
+			continue;
+		damon_for_each_region(r, t)
+			damon_va_prepare_access_check(ctx, mm, r);
+		mmput(mm);
+	}
+}
+
+struct damon_young_walk_private {
+	unsigned long *page_sz;
+	bool young;
+};
+
+static int damon_young_pmd_entry(pmd_t *pmd, unsigned long addr,
+		unsigned long next, struct mm_walk *walk)
+{
+	pte_t *pte;
+	spinlock_t *ptl;
+	struct page *page;
+	struct damon_young_walk_private *priv = walk->private;
+
+#ifdef CONFIG_TRANSPARENT_HUGEPAGE
+	if (pmd_huge(*pmd)) {
+		ptl = pmd_lock(walk->mm, pmd);
+		if (!pmd_huge(*pmd)) {
+			spin_unlock(ptl);
+			goto regular_page;
+		}
+		page = damon_get_page(pmd_pfn(*pmd));
+		if (!page)
+			goto huge_out;
+		if (pmd_young(*pmd) || !page_is_idle(page) ||
+					mmu_notifier_test_young(walk->mm,
+						addr)) {
+			*priv->page_sz = ((1UL) << HPAGE_PMD_SHIFT);
+			priv->young = true;
+		}
+		put_page(page);
+huge_out:
+		spin_unlock(ptl);
+		return 0;
+	}
+
+regular_page:
+#endif	/* CONFIG_TRANSPARENT_HUGEPAGE */
+
+	if (pmd_none(*pmd) || unlikely(pmd_bad(*pmd)))
+		return -EINVAL;
+	pte = pte_offset_map_lock(walk->mm, pmd, addr, &ptl);
+	if (!pte_present(*pte))
+		goto out;
+	page = damon_get_page(pte_pfn(*pte));
+	if (!page)
+		goto out;
+	if (pte_young(*pte) || !page_is_idle(page) ||
+			mmu_notifier_test_young(walk->mm, addr)) {
+		*priv->page_sz = PAGE_SIZE;
+		priv->young = true;
+	}
+	put_page(page);
+out:
+	pte_unmap_unlock(pte, ptl);
+	return 0;
+}
+
+static struct mm_walk_ops damon_young_ops = {
+	.pmd_entry = damon_young_pmd_entry,
+};
+
+static bool damon_va_young(struct mm_struct *mm, unsigned long addr,
+		unsigned long *page_sz)
+{
+	struct damon_young_walk_private arg = {
+		.page_sz = page_sz,
+		.young = false,
+	};
+
+	mmap_read_lock(mm);
+	walk_page_range(mm, addr, addr + 1, &damon_young_ops, &arg);
+	mmap_read_unlock(mm);
+	return arg.young;
+}
+
+/*
+ * Check whether the region was accessed after the last preparation
+ *
+ * mm	'mm_struct' for the given virtual address space
+ * r	the region to be checked
+ */
+static void damon_va_check_access(struct damon_ctx *ctx,
+			       struct mm_struct *mm, struct damon_region *r)
+{
+	static struct mm_struct *last_mm;
+	static unsigned long last_addr;
+	static unsigned long last_page_sz = PAGE_SIZE;
+	static bool last_accessed;
+
+	/* If the region is in the last checked page, reuse the result */
+	if (mm == last_mm && (ALIGN_DOWN(last_addr, last_page_sz) ==
+				ALIGN_DOWN(r->sampling_addr, last_page_sz))) {
+		if (last_accessed)
+			r->nr_accesses++;
+		return;
+	}
+
+	last_accessed = damon_va_young(mm, r->sampling_addr, &last_page_sz);
+	if (last_accessed)
+		r->nr_accesses++;
+
+	last_mm = mm;
+	last_addr = r->sampling_addr;
+}
+
+unsigned int damon_va_check_accesses(struct damon_ctx *ctx)
+{
+	struct damon_target *t;
+	struct mm_struct *mm;
+	struct damon_region *r;
+	unsigned int max_nr_accesses = 0;
+
+	damon_for_each_target(t, ctx) {
+		mm = damon_get_mm(t);
+		if (!mm)
+			continue;
+		damon_for_each_region(r, t) {
+			damon_va_check_access(ctx, mm, r);
+			max_nr_accesses = max(r->nr_accesses, max_nr_accesses);
+		}
+		mmput(mm);
+	}
+
+	return max_nr_accesses;
+}
+
+/*
+ * Functions for the target validity check and cleanup
+ */
+
+bool damon_va_target_valid(void *target)
+{
+	struct damon_target *t = target;
+	struct task_struct *task;
+
+	task = damon_get_task_struct(t);
+	if (task) {
+		put_task_struct(task);
+		return true;
+	}
+
+	return false;
+}
+
+void damon_va_set_primitives(struct damon_ctx *ctx)
+{
+	ctx->primitive.init = damon_va_init;
+	ctx->primitive.update = damon_va_update;
+	ctx->primitive.prepare_access_checks = damon_va_prepare_access_checks;
+	ctx->primitive.check_accesses = damon_va_check_accesses;
+	ctx->primitive.reset_aggregated = NULL;
+	ctx->primitive.target_valid = damon_va_target_valid;
+	ctx->primitive.cleanup = NULL;
+}
+
+#include "vaddr-test.h"
diff --git a/mm/debug.c b/mm/debug.c
index e73fe0a8ec3d..ca9611784e4b 100644
--- a/mm/debug.c
+++ b/mm/debug.c
@@ -160,7 +160,7 @@ static void __dump_page(struct page *page)
 out_mapping:
 	BUILD_BUG_ON(ARRAY_SIZE(pageflag_names) != __NR_PAGEFLAGS + 1);
 
-	pr_warn("%sflags: %#lx(%pGp)%s\n", type, head->flags, &head->flags,
+	pr_warn("%sflags: %pGp%s\n", type, &head->flags,
 		page_cma ? " CMA" : "");
 	print_hex_dump(KERN_WARNING, "raw: ", DUMP_PREFIX_NONE, 32,
 			sizeof(unsigned long), page,
diff --git a/mm/debug_vm_pgtable.c b/mm/debug_vm_pgtable.c
index 1c922691aa61..1403639302e4 100644
--- a/mm/debug_vm_pgtable.c
+++ b/mm/debug_vm_pgtable.c
@@ -29,6 +29,8 @@
 #include <linux/start_kernel.h>
 #include <linux/sched/mm.h>
 #include <linux/io.h>
+
+#include <asm/cacheflush.h>
 #include <asm/pgalloc.h>
 #include <asm/tlbflush.h>
 
@@ -58,10 +60,41 @@
 #define RANDOM_ORVALUE (GENMASK(BITS_PER_LONG - 1, 0) & ~ARCH_SKIP_MASK)
 #define RANDOM_NZVALUE	GENMASK(7, 0)
 
-static void __init pte_basic_tests(unsigned long pfn, int idx)
+struct pgtable_debug_args {
+	struct mm_struct	*mm;
+	struct vm_area_struct	*vma;
+
+	pgd_t			*pgdp;
+	p4d_t			*p4dp;
+	pud_t			*pudp;
+	pmd_t			*pmdp;
+	pte_t			*ptep;
+
+	p4d_t			*start_p4dp;
+	pud_t			*start_pudp;
+	pmd_t			*start_pmdp;
+	pgtable_t		start_ptep;
+
+	unsigned long		vaddr;
+	pgprot_t		page_prot;
+	pgprot_t		page_prot_none;
+
+	bool			is_contiguous_page;
+	unsigned long		pud_pfn;
+	unsigned long		pmd_pfn;
+	unsigned long		pte_pfn;
+
+	unsigned long		fixed_pgd_pfn;
+	unsigned long		fixed_p4d_pfn;
+	unsigned long		fixed_pud_pfn;
+	unsigned long		fixed_pmd_pfn;
+	unsigned long		fixed_pte_pfn;
+};
+
+static void __init pte_basic_tests(struct pgtable_debug_args *args, int idx)
 {
 	pgprot_t prot = protection_map[idx];
-	pte_t pte = pfn_pte(pfn, prot);
+	pte_t pte = pfn_pte(args->fixed_pte_pfn, prot);
 	unsigned long val = idx, *ptr = &val;
 
 	pr_debug("Validating PTE basic (%pGv)\n", ptr);
@@ -86,53 +119,63 @@ static void __init pte_basic_tests(unsigned long pfn, int idx)
 	WARN_ON(!pte_dirty(pte_wrprotect(pte_mkdirty(pte))));
 }
 
-static void __init pte_advanced_tests(struct mm_struct *mm,
-				      struct vm_area_struct *vma, pte_t *ptep,
-				      unsigned long pfn, unsigned long vaddr,
-				      pgprot_t prot)
+static void __init pte_advanced_tests(struct pgtable_debug_args *args)
 {
+	struct page *page;
 	pte_t pte;
 
 	/*
 	 * Architectures optimize set_pte_at by avoiding TLB flush.
 	 * This requires set_pte_at to be not used to update an
 	 * existing pte entry. Clear pte before we do set_pte_at
+	 *
+	 * flush_dcache_page() is called after set_pte_at() to clear
+	 * PG_arch_1 for the page on ARM64. The page flag isn't cleared
+	 * when it's released and page allocation check will fail when
+	 * the page is allocated again. For architectures other than ARM64,
+	 * the unexpected overhead of cache flushing is acceptable.
 	 */
+	page = (args->pte_pfn != ULONG_MAX) ? pfn_to_page(args->pte_pfn) : NULL;
+	if (!page)
+		return;
 
 	pr_debug("Validating PTE advanced\n");
-	pte = pfn_pte(pfn, prot);
-	set_pte_at(mm, vaddr, ptep, pte);
-	ptep_set_wrprotect(mm, vaddr, ptep);
-	pte = ptep_get(ptep);
+	pte = pfn_pte(args->pte_pfn, args->page_prot);
+	set_pte_at(args->mm, args->vaddr, args->ptep, pte);
+	flush_dcache_page(page);
+	ptep_set_wrprotect(args->mm, args->vaddr, args->ptep);
+	pte = ptep_get(args->ptep);
 	WARN_ON(pte_write(pte));
-	ptep_get_and_clear(mm, vaddr, ptep);
-	pte = ptep_get(ptep);
+	ptep_get_and_clear(args->mm, args->vaddr, args->ptep);
+	pte = ptep_get(args->ptep);
 	WARN_ON(!pte_none(pte));
 
-	pte = pfn_pte(pfn, prot);
+	pte = pfn_pte(args->pte_pfn, args->page_prot);
 	pte = pte_wrprotect(pte);
 	pte = pte_mkclean(pte);
-	set_pte_at(mm, vaddr, ptep, pte);
+	set_pte_at(args->mm, args->vaddr, args->ptep, pte);
+	flush_dcache_page(page);
 	pte = pte_mkwrite(pte);
 	pte = pte_mkdirty(pte);
-	ptep_set_access_flags(vma, vaddr, ptep, pte, 1);
-	pte = ptep_get(ptep);
+	ptep_set_access_flags(args->vma, args->vaddr, args->ptep, pte, 1);
+	pte = ptep_get(args->ptep);
 	WARN_ON(!(pte_write(pte) && pte_dirty(pte)));
-	ptep_get_and_clear_full(mm, vaddr, ptep, 1);
-	pte = ptep_get(ptep);
+	ptep_get_and_clear_full(args->mm, args->vaddr, args->ptep, 1);
+	pte = ptep_get(args->ptep);
 	WARN_ON(!pte_none(pte));
 
-	pte = pfn_pte(pfn, prot);
+	pte = pfn_pte(args->pte_pfn, args->page_prot);
 	pte = pte_mkyoung(pte);
-	set_pte_at(mm, vaddr, ptep, pte);
-	ptep_test_and_clear_young(vma, vaddr, ptep);
-	pte = ptep_get(ptep);
+	set_pte_at(args->mm, args->vaddr, args->ptep, pte);
+	flush_dcache_page(page);
+	ptep_test_and_clear_young(args->vma, args->vaddr, args->ptep);
+	pte = ptep_get(args->ptep);
 	WARN_ON(pte_young(pte));
 }
 
-static void __init pte_savedwrite_tests(unsigned long pfn, pgprot_t prot)
+static void __init pte_savedwrite_tests(struct pgtable_debug_args *args)
 {
-	pte_t pte = pfn_pte(pfn, prot);
+	pte_t pte = pfn_pte(args->fixed_pte_pfn, args->page_prot_none);
 
 	if (!IS_ENABLED(CONFIG_NUMA_BALANCING))
 		return;
@@ -143,7 +186,7 @@ static void __init pte_savedwrite_tests(unsigned long pfn, pgprot_t prot)
 }
 
 #ifdef CONFIG_TRANSPARENT_HUGEPAGE
-static void __init pmd_basic_tests(unsigned long pfn, int idx)
+static void __init pmd_basic_tests(struct pgtable_debug_args *args, int idx)
 {
 	pgprot_t prot = protection_map[idx];
 	unsigned long val = idx, *ptr = &val;
@@ -153,7 +196,7 @@ static void __init pmd_basic_tests(unsigned long pfn, int idx)
 		return;
 
 	pr_debug("Validating PMD basic (%pGv)\n", ptr);
-	pmd = pfn_pmd(pfn, prot);
+	pmd = pfn_pmd(args->fixed_pmd_pfn, prot);
 
 	/*
 	 * This test needs to be executed after the given page table entry
@@ -181,57 +224,70 @@ static void __init pmd_basic_tests(unsigned long pfn, int idx)
 	WARN_ON(!pmd_bad(pmd_mkhuge(pmd)));
 }
 
-static void __init pmd_advanced_tests(struct mm_struct *mm,
-				      struct vm_area_struct *vma, pmd_t *pmdp,
-				      unsigned long pfn, unsigned long vaddr,
-				      pgprot_t prot, pgtable_t pgtable)
+static void __init pmd_advanced_tests(struct pgtable_debug_args *args)
 {
+	struct page *page;
 	pmd_t pmd;
+	unsigned long vaddr = args->vaddr;
 
 	if (!has_transparent_hugepage())
 		return;
 
+	page = (args->pmd_pfn != ULONG_MAX) ? pfn_to_page(args->pmd_pfn) : NULL;
+	if (!page)
+		return;
+
+	/*
+	 * flush_dcache_page() is called after set_pmd_at() to clear
+	 * PG_arch_1 for the page on ARM64. The page flag isn't cleared
+	 * when it's released and page allocation check will fail when
+	 * the page is allocated again. For architectures other than ARM64,
+	 * the unexpected overhead of cache flushing is acceptable.
+	 */
 	pr_debug("Validating PMD advanced\n");
 	/* Align the address wrt HPAGE_PMD_SIZE */
 	vaddr &= HPAGE_PMD_MASK;
 
-	pgtable_trans_huge_deposit(mm, pmdp, pgtable);
+	pgtable_trans_huge_deposit(args->mm, args->pmdp, args->start_ptep);
 
-	pmd = pfn_pmd(pfn, prot);
-	set_pmd_at(mm, vaddr, pmdp, pmd);
-	pmdp_set_wrprotect(mm, vaddr, pmdp);
-	pmd = READ_ONCE(*pmdp);
+	pmd = pfn_pmd(args->pmd_pfn, args->page_prot);
+	set_pmd_at(args->mm, vaddr, args->pmdp, pmd);
+	flush_dcache_page(page);
+	pmdp_set_wrprotect(args->mm, vaddr, args->pmdp);
+	pmd = READ_ONCE(*args->pmdp);
 	WARN_ON(pmd_write(pmd));
-	pmdp_huge_get_and_clear(mm, vaddr, pmdp);
-	pmd = READ_ONCE(*pmdp);
+	pmdp_huge_get_and_clear(args->mm, vaddr, args->pmdp);
+	pmd = READ_ONCE(*args->pmdp);
 	WARN_ON(!pmd_none(pmd));
 
-	pmd = pfn_pmd(pfn, prot);
+	pmd = pfn_pmd(args->pmd_pfn, args->page_prot);
 	pmd = pmd_wrprotect(pmd);
 	pmd = pmd_mkclean(pmd);
-	set_pmd_at(mm, vaddr, pmdp, pmd);
+	set_pmd_at(args->mm, vaddr, args->pmdp, pmd);
+	flush_dcache_page(page);
 	pmd = pmd_mkwrite(pmd);
 	pmd = pmd_mkdirty(pmd);
-	pmdp_set_access_flags(vma, vaddr, pmdp, pmd, 1);
-	pmd = READ_ONCE(*pmdp);
+	pmdp_set_access_flags(args->vma, vaddr, args->pmdp, pmd, 1);
+	pmd = READ_ONCE(*args->pmdp);
 	WARN_ON(!(pmd_write(pmd) && pmd_dirty(pmd)));
-	pmdp_huge_get_and_clear_full(vma, vaddr, pmdp, 1);
-	pmd = READ_ONCE(*pmdp);
+	pmdp_huge_get_and_clear_full(args->vma, vaddr, args->pmdp, 1);
+	pmd = READ_ONCE(*args->pmdp);
 	WARN_ON(!pmd_none(pmd));
 
-	pmd = pmd_mkhuge(pfn_pmd(pfn, prot));
+	pmd = pmd_mkhuge(pfn_pmd(args->pmd_pfn, args->page_prot));
 	pmd = pmd_mkyoung(pmd);
-	set_pmd_at(mm, vaddr, pmdp, pmd);
-	pmdp_test_and_clear_young(vma, vaddr, pmdp);
-	pmd = READ_ONCE(*pmdp);
+	set_pmd_at(args->mm, vaddr, args->pmdp, pmd);
+	flush_dcache_page(page);
+	pmdp_test_and_clear_young(args->vma, vaddr, args->pmdp);
+	pmd = READ_ONCE(*args->pmdp);
 	WARN_ON(pmd_young(pmd));
 
 	/*  Clear the pte entries  */
-	pmdp_huge_get_and_clear(mm, vaddr, pmdp);
-	pgtable = pgtable_trans_huge_withdraw(mm, pmdp);
+	pmdp_huge_get_and_clear(args->mm, vaddr, args->pmdp);
+	pgtable_trans_huge_withdraw(args->mm, args->pmdp);
 }
 
-static void __init pmd_leaf_tests(unsigned long pfn, pgprot_t prot)
+static void __init pmd_leaf_tests(struct pgtable_debug_args *args)
 {
 	pmd_t pmd;
 
@@ -239,7 +295,7 @@ static void __init pmd_leaf_tests(unsigned long pfn, pgprot_t prot)
 		return;
 
 	pr_debug("Validating PMD leaf\n");
-	pmd = pfn_pmd(pfn, prot);
+	pmd = pfn_pmd(args->fixed_pmd_pfn, args->page_prot);
 
 	/*
 	 * PMD based THP is a leaf entry.
@@ -248,7 +304,7 @@ static void __init pmd_leaf_tests(unsigned long pfn, pgprot_t prot)
 	WARN_ON(!pmd_leaf(pmd));
 }
 
-static void __init pmd_savedwrite_tests(unsigned long pfn, pgprot_t prot)
+static void __init pmd_savedwrite_tests(struct pgtable_debug_args *args)
 {
 	pmd_t pmd;
 
@@ -259,13 +315,13 @@ static void __init pmd_savedwrite_tests(unsigned long pfn, pgprot_t prot)
 		return;
 
 	pr_debug("Validating PMD saved write\n");
-	pmd = pfn_pmd(pfn, prot);
+	pmd = pfn_pmd(args->fixed_pmd_pfn, args->page_prot_none);
 	WARN_ON(!pmd_savedwrite(pmd_mk_savedwrite(pmd_clear_savedwrite(pmd))));
 	WARN_ON(pmd_savedwrite(pmd_clear_savedwrite(pmd_mk_savedwrite(pmd))));
 }
 
 #ifdef CONFIG_HAVE_ARCH_TRANSPARENT_HUGEPAGE_PUD
-static void __init pud_basic_tests(struct mm_struct *mm, unsigned long pfn, int idx)
+static void __init pud_basic_tests(struct pgtable_debug_args *args, int idx)
 {
 	pgprot_t prot = protection_map[idx];
 	unsigned long val = idx, *ptr = &val;
@@ -275,7 +331,7 @@ static void __init pud_basic_tests(struct mm_struct *mm, unsigned long pfn, int
 		return;
 
 	pr_debug("Validating PUD basic (%pGv)\n", ptr);
-	pud = pfn_pud(pfn, prot);
+	pud = pfn_pud(args->fixed_pud_pfn, prot);
 
 	/*
 	 * This test needs to be executed after the given page table entry
@@ -296,7 +352,7 @@ static void __init pud_basic_tests(struct mm_struct *mm, unsigned long pfn, int
 	WARN_ON(pud_dirty(pud_wrprotect(pud_mkclean(pud))));
 	WARN_ON(!pud_dirty(pud_wrprotect(pud_mkdirty(pud))));
 
-	if (mm_pmd_folded(mm))
+	if (mm_pmd_folded(args->mm))
 		return;
 
 	/*
@@ -306,58 +362,71 @@ static void __init pud_basic_tests(struct mm_struct *mm, unsigned long pfn, int
 	WARN_ON(!pud_bad(pud_mkhuge(pud)));
 }
 
-static void __init pud_advanced_tests(struct mm_struct *mm,
-				      struct vm_area_struct *vma, pud_t *pudp,
-				      unsigned long pfn, unsigned long vaddr,
-				      pgprot_t prot)
+static void __init pud_advanced_tests(struct pgtable_debug_args *args)
 {
+	struct page *page;
+	unsigned long vaddr = args->vaddr;
 	pud_t pud;
 
 	if (!has_transparent_hugepage())
 		return;
 
+	page = (args->pud_pfn != ULONG_MAX) ? pfn_to_page(args->pud_pfn) : NULL;
+	if (!page)
+		return;
+
+	/*
+	 * flush_dcache_page() is called after set_pud_at() to clear
+	 * PG_arch_1 for the page on ARM64. The page flag isn't cleared
+	 * when it's released and page allocation check will fail when
+	 * the page is allocated again. For architectures other than ARM64,
+	 * the unexpected overhead of cache flushing is acceptable.
+	 */
 	pr_debug("Validating PUD advanced\n");
 	/* Align the address wrt HPAGE_PUD_SIZE */
 	vaddr &= HPAGE_PUD_MASK;
 
-	pud = pfn_pud(pfn, prot);
-	set_pud_at(mm, vaddr, pudp, pud);
-	pudp_set_wrprotect(mm, vaddr, pudp);
-	pud = READ_ONCE(*pudp);
+	pud = pfn_pud(args->pud_pfn, args->page_prot);
+	set_pud_at(args->mm, vaddr, args->pudp, pud);
+	flush_dcache_page(page);
+	pudp_set_wrprotect(args->mm, vaddr, args->pudp);
+	pud = READ_ONCE(*args->pudp);
 	WARN_ON(pud_write(pud));
 
 #ifndef __PAGETABLE_PMD_FOLDED
-	pudp_huge_get_and_clear(mm, vaddr, pudp);
-	pud = READ_ONCE(*pudp);
+	pudp_huge_get_and_clear(args->mm, vaddr, args->pudp);
+	pud = READ_ONCE(*args->pudp);
 	WARN_ON(!pud_none(pud));
 #endif /* __PAGETABLE_PMD_FOLDED */
-	pud = pfn_pud(pfn, prot);
+	pud = pfn_pud(args->pud_pfn, args->page_prot);
 	pud = pud_wrprotect(pud);
 	pud = pud_mkclean(pud);
-	set_pud_at(mm, vaddr, pudp, pud);
+	set_pud_at(args->mm, vaddr, args->pudp, pud);
+	flush_dcache_page(page);
 	pud = pud_mkwrite(pud);
 	pud = pud_mkdirty(pud);
-	pudp_set_access_flags(vma, vaddr, pudp, pud, 1);
-	pud = READ_ONCE(*pudp);
+	pudp_set_access_flags(args->vma, vaddr, args->pudp, pud, 1);
+	pud = READ_ONCE(*args->pudp);
 	WARN_ON(!(pud_write(pud) && pud_dirty(pud)));
 
 #ifndef __PAGETABLE_PMD_FOLDED
-	pudp_huge_get_and_clear_full(mm, vaddr, pudp, 1);
-	pud = READ_ONCE(*pudp);
+	pudp_huge_get_and_clear_full(args->mm, vaddr, args->pudp, 1);
+	pud = READ_ONCE(*args->pudp);
 	WARN_ON(!pud_none(pud));
 #endif /* __PAGETABLE_PMD_FOLDED */
 
-	pud = pfn_pud(pfn, prot);
+	pud = pfn_pud(args->pud_pfn, args->page_prot);
 	pud = pud_mkyoung(pud);
-	set_pud_at(mm, vaddr, pudp, pud);
-	pudp_test_and_clear_young(vma, vaddr, pudp);
-	pud = READ_ONCE(*pudp);
+	set_pud_at(args->mm, vaddr, args->pudp, pud);
+	flush_dcache_page(page);
+	pudp_test_and_clear_young(args->vma, vaddr, args->pudp);
+	pud = READ_ONCE(*args->pudp);
 	WARN_ON(pud_young(pud));
 
-	pudp_huge_get_and_clear(mm, vaddr, pudp);
+	pudp_huge_get_and_clear(args->mm, vaddr, args->pudp);
 }
 
-static void __init pud_leaf_tests(unsigned long pfn, pgprot_t prot)
+static void __init pud_leaf_tests(struct pgtable_debug_args *args)
 {
 	pud_t pud;
 
@@ -365,7 +434,7 @@ static void __init pud_leaf_tests(unsigned long pfn, pgprot_t prot)
 		return;
 
 	pr_debug("Validating PUD leaf\n");
-	pud = pfn_pud(pfn, prot);
+	pud = pfn_pud(args->fixed_pud_pfn, args->page_prot);
 	/*
 	 * PUD based THP is a leaf entry.
 	 */
@@ -373,41 +442,26 @@ static void __init pud_leaf_tests(unsigned long pfn, pgprot_t prot)
 	WARN_ON(!pud_leaf(pud));
 }
 #else  /* !CONFIG_HAVE_ARCH_TRANSPARENT_HUGEPAGE_PUD */
-static void __init pud_basic_tests(struct mm_struct *mm, unsigned long pfn, int idx) { }
-static void __init pud_advanced_tests(struct mm_struct *mm,
-				      struct vm_area_struct *vma, pud_t *pudp,
-				      unsigned long pfn, unsigned long vaddr,
-				      pgprot_t prot)
-{
-}
-static void __init pud_leaf_tests(unsigned long pfn, pgprot_t prot) { }
+static void __init pud_basic_tests(struct pgtable_debug_args *args, int idx) { }
+static void __init pud_advanced_tests(struct pgtable_debug_args *args) { }
+static void __init pud_leaf_tests(struct pgtable_debug_args *args) { }
 #endif /* CONFIG_HAVE_ARCH_TRANSPARENT_HUGEPAGE_PUD */
 #else  /* !CONFIG_TRANSPARENT_HUGEPAGE */
-static void __init pmd_basic_tests(unsigned long pfn, int idx) { }
-static void __init pud_basic_tests(struct mm_struct *mm, unsigned long pfn, int idx) { }
-static void __init pmd_advanced_tests(struct mm_struct *mm,
-				      struct vm_area_struct *vma, pmd_t *pmdp,
-				      unsigned long pfn, unsigned long vaddr,
-				      pgprot_t prot, pgtable_t pgtable)
-{
-}
-static void __init pud_advanced_tests(struct mm_struct *mm,
-				      struct vm_area_struct *vma, pud_t *pudp,
-				      unsigned long pfn, unsigned long vaddr,
-				      pgprot_t prot)
-{
-}
-static void __init pmd_leaf_tests(unsigned long pfn, pgprot_t prot) { }
-static void __init pud_leaf_tests(unsigned long pfn, pgprot_t prot) { }
-static void __init pmd_savedwrite_tests(unsigned long pfn, pgprot_t prot) { }
+static void __init pmd_basic_tests(struct pgtable_debug_args *args, int idx) { }
+static void __init pud_basic_tests(struct pgtable_debug_args *args, int idx) { }
+static void __init pmd_advanced_tests(struct pgtable_debug_args *args) { }
+static void __init pud_advanced_tests(struct pgtable_debug_args *args) { }
+static void __init pmd_leaf_tests(struct pgtable_debug_args *args) { }
+static void __init pud_leaf_tests(struct pgtable_debug_args *args) { }
+static void __init pmd_savedwrite_tests(struct pgtable_debug_args *args) { }
 #endif /* CONFIG_TRANSPARENT_HUGEPAGE */
 
 #ifdef CONFIG_HAVE_ARCH_HUGE_VMAP
-static void __init pmd_huge_tests(pmd_t *pmdp, unsigned long pfn, pgprot_t prot)
+static void __init pmd_huge_tests(struct pgtable_debug_args *args)
 {
 	pmd_t pmd;
 
-	if (!arch_vmap_pmd_supported(prot))
+	if (!arch_vmap_pmd_supported(args->page_prot))
 		return;
 
 	pr_debug("Validating PMD huge\n");
@@ -415,18 +469,18 @@ static void __init pmd_huge_tests(pmd_t *pmdp, unsigned long pfn, pgprot_t prot)
 	 * X86 defined pmd_set_huge() verifies that the given
 	 * PMD is not a populated non-leaf entry.
 	 */
-	WRITE_ONCE(*pmdp, __pmd(0));
-	WARN_ON(!pmd_set_huge(pmdp, __pfn_to_phys(pfn), prot));
-	WARN_ON(!pmd_clear_huge(pmdp));
-	pmd = READ_ONCE(*pmdp);
+	WRITE_ONCE(*args->pmdp, __pmd(0));
+	WARN_ON(!pmd_set_huge(args->pmdp, __pfn_to_phys(args->fixed_pmd_pfn), args->page_prot));
+	WARN_ON(!pmd_clear_huge(args->pmdp));
+	pmd = READ_ONCE(*args->pmdp);
 	WARN_ON(!pmd_none(pmd));
 }
 
-static void __init pud_huge_tests(pud_t *pudp, unsigned long pfn, pgprot_t prot)
+static void __init pud_huge_tests(struct pgtable_debug_args *args)
 {
 	pud_t pud;
 
-	if (!arch_vmap_pud_supported(prot))
+	if (!arch_vmap_pud_supported(args->page_prot))
 		return;
 
 	pr_debug("Validating PUD huge\n");
@@ -434,18 +488,18 @@ static void __init pud_huge_tests(pud_t *pudp, unsigned long pfn, pgprot_t prot)
 	 * X86 defined pud_set_huge() verifies that the given
 	 * PUD is not a populated non-leaf entry.
 	 */
-	WRITE_ONCE(*pudp, __pud(0));
-	WARN_ON(!pud_set_huge(pudp, __pfn_to_phys(pfn), prot));
-	WARN_ON(!pud_clear_huge(pudp));
-	pud = READ_ONCE(*pudp);
+	WRITE_ONCE(*args->pudp, __pud(0));
+	WARN_ON(!pud_set_huge(args->pudp, __pfn_to_phys(args->fixed_pud_pfn), args->page_prot));
+	WARN_ON(!pud_clear_huge(args->pudp));
+	pud = READ_ONCE(*args->pudp);
 	WARN_ON(!pud_none(pud));
 }
 #else /* !CONFIG_HAVE_ARCH_HUGE_VMAP */
-static void __init pmd_huge_tests(pmd_t *pmdp, unsigned long pfn, pgprot_t prot) { }
-static void __init pud_huge_tests(pud_t *pudp, unsigned long pfn, pgprot_t prot) { }
+static void __init pmd_huge_tests(struct pgtable_debug_args *args) { }
+static void __init pud_huge_tests(struct pgtable_debug_args *args) { }
 #endif /* CONFIG_HAVE_ARCH_HUGE_VMAP */
 
-static void __init p4d_basic_tests(unsigned long pfn, pgprot_t prot)
+static void __init p4d_basic_tests(struct pgtable_debug_args *args)
 {
 	p4d_t p4d;
 
@@ -454,7 +508,7 @@ static void __init p4d_basic_tests(unsigned long pfn, pgprot_t prot)
 	WARN_ON(!p4d_same(p4d, p4d));
 }
 
-static void __init pgd_basic_tests(unsigned long pfn, pgprot_t prot)
+static void __init pgd_basic_tests(struct pgtable_debug_args *args)
 {
 	pgd_t pgd;
 
@@ -464,27 +518,26 @@ static void __init pgd_basic_tests(unsigned long pfn, pgprot_t prot)
 }
 
 #ifndef __PAGETABLE_PUD_FOLDED
-static void __init pud_clear_tests(struct mm_struct *mm, pud_t *pudp)
+static void __init pud_clear_tests(struct pgtable_debug_args *args)
 {
-	pud_t pud = READ_ONCE(*pudp);
+	pud_t pud = READ_ONCE(*args->pudp);
 
-	if (mm_pmd_folded(mm))
+	if (mm_pmd_folded(args->mm))
 		return;
 
 	pr_debug("Validating PUD clear\n");
 	pud = __pud(pud_val(pud) | RANDOM_ORVALUE);
-	WRITE_ONCE(*pudp, pud);
-	pud_clear(pudp);
-	pud = READ_ONCE(*pudp);
+	WRITE_ONCE(*args->pudp, pud);
+	pud_clear(args->pudp);
+	pud = READ_ONCE(*args->pudp);
 	WARN_ON(!pud_none(pud));
 }
 
-static void __init pud_populate_tests(struct mm_struct *mm, pud_t *pudp,
-				      pmd_t *pmdp)
+static void __init pud_populate_tests(struct pgtable_debug_args *args)
 {
 	pud_t pud;
 
-	if (mm_pmd_folded(mm))
+	if (mm_pmd_folded(args->mm))
 		return;
 
 	pr_debug("Validating PUD populate\n");
@@ -492,40 +545,36 @@ static void __init pud_populate_tests(struct mm_struct *mm, pud_t *pudp,
 	 * This entry points to next level page table page.
 	 * Hence this must not qualify as pud_bad().
 	 */
-	pud_populate(mm, pudp, pmdp);
-	pud = READ_ONCE(*pudp);
+	pud_populate(args->mm, args->pudp, args->start_pmdp);
+	pud = READ_ONCE(*args->pudp);
 	WARN_ON(pud_bad(pud));
 }
 #else  /* !__PAGETABLE_PUD_FOLDED */
-static void __init pud_clear_tests(struct mm_struct *mm, pud_t *pudp) { }
-static void __init pud_populate_tests(struct mm_struct *mm, pud_t *pudp,
-				      pmd_t *pmdp)
-{
-}
+static void __init pud_clear_tests(struct pgtable_debug_args *args) { }
+static void __init pud_populate_tests(struct pgtable_debug_args *args) { }
 #endif /* PAGETABLE_PUD_FOLDED */
 
 #ifndef __PAGETABLE_P4D_FOLDED
-static void __init p4d_clear_tests(struct mm_struct *mm, p4d_t *p4dp)
+static void __init p4d_clear_tests(struct pgtable_debug_args *args)
 {
-	p4d_t p4d = READ_ONCE(*p4dp);
+	p4d_t p4d = READ_ONCE(*args->p4dp);
 
-	if (mm_pud_folded(mm))
+	if (mm_pud_folded(args->mm))
 		return;
 
 	pr_debug("Validating P4D clear\n");
 	p4d = __p4d(p4d_val(p4d) | RANDOM_ORVALUE);
-	WRITE_ONCE(*p4dp, p4d);
-	p4d_clear(p4dp);
-	p4d = READ_ONCE(*p4dp);
+	WRITE_ONCE(*args->p4dp, p4d);
+	p4d_clear(args->p4dp);
+	p4d = READ_ONCE(*args->p4dp);
 	WARN_ON(!p4d_none(p4d));
 }
 
-static void __init p4d_populate_tests(struct mm_struct *mm, p4d_t *p4dp,
-				      pud_t *pudp)
+static void __init p4d_populate_tests(struct pgtable_debug_args *args)
 {
 	p4d_t p4d;
 
-	if (mm_pud_folded(mm))
+	if (mm_pud_folded(args->mm))
 		return;
 
 	pr_debug("Validating P4D populate\n");
@@ -533,34 +582,33 @@ static void __init p4d_populate_tests(struct mm_struct *mm, p4d_t *p4dp,
 	 * This entry points to next level page table page.
 	 * Hence this must not qualify as p4d_bad().
 	 */
-	pud_clear(pudp);
-	p4d_clear(p4dp);
-	p4d_populate(mm, p4dp, pudp);
-	p4d = READ_ONCE(*p4dp);
+	pud_clear(args->pudp);
+	p4d_clear(args->p4dp);
+	p4d_populate(args->mm, args->p4dp, args->start_pudp);
+	p4d = READ_ONCE(*args->p4dp);
 	WARN_ON(p4d_bad(p4d));
 }
 
-static void __init pgd_clear_tests(struct mm_struct *mm, pgd_t *pgdp)
+static void __init pgd_clear_tests(struct pgtable_debug_args *args)
 {
-	pgd_t pgd = READ_ONCE(*pgdp);
+	pgd_t pgd = READ_ONCE(*(args->pgdp));
 
-	if (mm_p4d_folded(mm))
+	if (mm_p4d_folded(args->mm))
 		return;
 
 	pr_debug("Validating PGD clear\n");
 	pgd = __pgd(pgd_val(pgd) | RANDOM_ORVALUE);
-	WRITE_ONCE(*pgdp, pgd);
-	pgd_clear(pgdp);
-	pgd = READ_ONCE(*pgdp);
+	WRITE_ONCE(*args->pgdp, pgd);
+	pgd_clear(args->pgdp);
+	pgd = READ_ONCE(*args->pgdp);
 	WARN_ON(!pgd_none(pgd));
 }
 
-static void __init pgd_populate_tests(struct mm_struct *mm, pgd_t *pgdp,
-				      p4d_t *p4dp)
+static void __init pgd_populate_tests(struct pgtable_debug_args *args)
 {
 	pgd_t pgd;
 
-	if (mm_p4d_folded(mm))
+	if (mm_p4d_folded(args->mm))
 		return;
 
 	pr_debug("Validating PGD populate\n");
@@ -568,56 +616,60 @@ static void __init pgd_populate_tests(struct mm_struct *mm, pgd_t *pgdp,
 	 * This entry points to next level page table page.
 	 * Hence this must not qualify as pgd_bad().
 	 */
-	p4d_clear(p4dp);
-	pgd_clear(pgdp);
-	pgd_populate(mm, pgdp, p4dp);
-	pgd = READ_ONCE(*pgdp);
+	p4d_clear(args->p4dp);
+	pgd_clear(args->pgdp);
+	pgd_populate(args->mm, args->pgdp, args->start_p4dp);
+	pgd = READ_ONCE(*args->pgdp);
 	WARN_ON(pgd_bad(pgd));
 }
 #else  /* !__PAGETABLE_P4D_FOLDED */
-static void __init p4d_clear_tests(struct mm_struct *mm, p4d_t *p4dp) { }
-static void __init pgd_clear_tests(struct mm_struct *mm, pgd_t *pgdp) { }
-static void __init p4d_populate_tests(struct mm_struct *mm, p4d_t *p4dp,
-				      pud_t *pudp)
-{
-}
-static void __init pgd_populate_tests(struct mm_struct *mm, pgd_t *pgdp,
-				      p4d_t *p4dp)
-{
-}
+static void __init p4d_clear_tests(struct pgtable_debug_args *args) { }
+static void __init pgd_clear_tests(struct pgtable_debug_args *args) { }
+static void __init p4d_populate_tests(struct pgtable_debug_args *args) { }
+static void __init pgd_populate_tests(struct pgtable_debug_args *args) { }
 #endif /* PAGETABLE_P4D_FOLDED */
 
-static void __init pte_clear_tests(struct mm_struct *mm, pte_t *ptep,
-				   unsigned long pfn, unsigned long vaddr,
-				   pgprot_t prot)
+static void __init pte_clear_tests(struct pgtable_debug_args *args)
 {
-	pte_t pte = pfn_pte(pfn, prot);
+	struct page *page;
+	pte_t pte = pfn_pte(args->pte_pfn, args->page_prot);
+
+	page = (args->pte_pfn != ULONG_MAX) ? pfn_to_page(args->pte_pfn) : NULL;
+	if (!page)
+		return;
 
+	/*
+	 * flush_dcache_page() is called after set_pte_at() to clear
+	 * PG_arch_1 for the page on ARM64. The page flag isn't cleared
+	 * when it's released and page allocation check will fail when
+	 * the page is allocated again. For architectures other than ARM64,
+	 * the unexpected overhead of cache flushing is acceptable.
+	 */
 	pr_debug("Validating PTE clear\n");
 #ifndef CONFIG_RISCV
 	pte = __pte(pte_val(pte) | RANDOM_ORVALUE);
 #endif
-	set_pte_at(mm, vaddr, ptep, pte);
+	set_pte_at(args->mm, args->vaddr, args->ptep, pte);
+	flush_dcache_page(page);
 	barrier();
-	pte_clear(mm, vaddr, ptep);
-	pte = ptep_get(ptep);
+	pte_clear(args->mm, args->vaddr, args->ptep);
+	pte = ptep_get(args->ptep);
 	WARN_ON(!pte_none(pte));
 }
 
-static void __init pmd_clear_tests(struct mm_struct *mm, pmd_t *pmdp)
+static void __init pmd_clear_tests(struct pgtable_debug_args *args)
 {
-	pmd_t pmd = READ_ONCE(*pmdp);
+	pmd_t pmd = READ_ONCE(*args->pmdp);
 
 	pr_debug("Validating PMD clear\n");
 	pmd = __pmd(pmd_val(pmd) | RANDOM_ORVALUE);
-	WRITE_ONCE(*pmdp, pmd);
-	pmd_clear(pmdp);
-	pmd = READ_ONCE(*pmdp);
+	WRITE_ONCE(*args->pmdp, pmd);
+	pmd_clear(args->pmdp);
+	pmd = READ_ONCE(*args->pmdp);
 	WARN_ON(!pmd_none(pmd));
 }
 
-static void __init pmd_populate_tests(struct mm_struct *mm, pmd_t *pmdp,
-				      pgtable_t pgtable)
+static void __init pmd_populate_tests(struct pgtable_debug_args *args)
 {
 	pmd_t pmd;
 
@@ -626,14 +678,14 @@ static void __init pmd_populate_tests(struct mm_struct *mm, pmd_t *pmdp,
 	 * This entry points to next level page table page.
 	 * Hence this must not qualify as pmd_bad().
 	 */
-	pmd_populate(mm, pmdp, pgtable);
-	pmd = READ_ONCE(*pmdp);
+	pmd_populate(args->mm, args->pmdp, args->start_ptep);
+	pmd = READ_ONCE(*args->pmdp);
 	WARN_ON(pmd_bad(pmd));
 }
 
-static void __init pte_special_tests(unsigned long pfn, pgprot_t prot)
+static void __init pte_special_tests(struct pgtable_debug_args *args)
 {
-	pte_t pte = pfn_pte(pfn, prot);
+	pte_t pte = pfn_pte(args->fixed_pte_pfn, args->page_prot);
 
 	if (!IS_ENABLED(CONFIG_ARCH_HAS_PTE_SPECIAL))
 		return;
@@ -642,9 +694,9 @@ static void __init pte_special_tests(unsigned long pfn, pgprot_t prot)
 	WARN_ON(!pte_special(pte_mkspecial(pte)));
 }
 
-static void __init pte_protnone_tests(unsigned long pfn, pgprot_t prot)
+static void __init pte_protnone_tests(struct pgtable_debug_args *args)
 {
-	pte_t pte = pfn_pte(pfn, prot);
+	pte_t pte = pfn_pte(args->fixed_pte_pfn, args->page_prot_none);
 
 	if (!IS_ENABLED(CONFIG_NUMA_BALANCING))
 		return;
@@ -655,7 +707,7 @@ static void __init pte_protnone_tests(unsigned long pfn, pgprot_t prot)
 }
 
 #ifdef CONFIG_TRANSPARENT_HUGEPAGE
-static void __init pmd_protnone_tests(unsigned long pfn, pgprot_t prot)
+static void __init pmd_protnone_tests(struct pgtable_debug_args *args)
 {
 	pmd_t pmd;
 
@@ -666,25 +718,25 @@ static void __init pmd_protnone_tests(unsigned long pfn, pgprot_t prot)
 		return;
 
 	pr_debug("Validating PMD protnone\n");
-	pmd = pmd_mkhuge(pfn_pmd(pfn, prot));
+	pmd = pmd_mkhuge(pfn_pmd(args->fixed_pmd_pfn, args->page_prot_none));
 	WARN_ON(!pmd_protnone(pmd));
 	WARN_ON(!pmd_present(pmd));
 }
 #else  /* !CONFIG_TRANSPARENT_HUGEPAGE */
-static void __init pmd_protnone_tests(unsigned long pfn, pgprot_t prot) { }
+static void __init pmd_protnone_tests(struct pgtable_debug_args *args) { }
 #endif /* CONFIG_TRANSPARENT_HUGEPAGE */
 
 #ifdef CONFIG_ARCH_HAS_PTE_DEVMAP
-static void __init pte_devmap_tests(unsigned long pfn, pgprot_t prot)
+static void __init pte_devmap_tests(struct pgtable_debug_args *args)
 {
-	pte_t pte = pfn_pte(pfn, prot);
+	pte_t pte = pfn_pte(args->fixed_pte_pfn, args->page_prot);
 
 	pr_debug("Validating PTE devmap\n");
 	WARN_ON(!pte_devmap(pte_mkdevmap(pte)));
 }
 
 #ifdef CONFIG_TRANSPARENT_HUGEPAGE
-static void __init pmd_devmap_tests(unsigned long pfn, pgprot_t prot)
+static void __init pmd_devmap_tests(struct pgtable_debug_args *args)
 {
 	pmd_t pmd;
 
@@ -692,12 +744,12 @@ static void __init pmd_devmap_tests(unsigned long pfn, pgprot_t prot)
 		return;
 
 	pr_debug("Validating PMD devmap\n");
-	pmd = pfn_pmd(pfn, prot);
+	pmd = pfn_pmd(args->fixed_pmd_pfn, args->page_prot);
 	WARN_ON(!pmd_devmap(pmd_mkdevmap(pmd)));
 }
 
 #ifdef CONFIG_HAVE_ARCH_TRANSPARENT_HUGEPAGE_PUD
-static void __init pud_devmap_tests(unsigned long pfn, pgprot_t prot)
+static void __init pud_devmap_tests(struct pgtable_debug_args *args)
 {
 	pud_t pud;
 
@@ -705,25 +757,25 @@ static void __init pud_devmap_tests(unsigned long pfn, pgprot_t prot)
 		return;
 
 	pr_debug("Validating PUD devmap\n");
-	pud = pfn_pud(pfn, prot);
+	pud = pfn_pud(args->fixed_pud_pfn, args->page_prot);
 	WARN_ON(!pud_devmap(pud_mkdevmap(pud)));
 }
 #else  /* !CONFIG_HAVE_ARCH_TRANSPARENT_HUGEPAGE_PUD */
-static void __init pud_devmap_tests(unsigned long pfn, pgprot_t prot) { }
+static void __init pud_devmap_tests(struct pgtable_debug_args *args) { }
 #endif /* CONFIG_HAVE_ARCH_TRANSPARENT_HUGEPAGE_PUD */
 #else  /* CONFIG_TRANSPARENT_HUGEPAGE */
-static void __init pmd_devmap_tests(unsigned long pfn, pgprot_t prot) { }
-static void __init pud_devmap_tests(unsigned long pfn, pgprot_t prot) { }
+static void __init pmd_devmap_tests(struct pgtable_debug_args *args) { }
+static void __init pud_devmap_tests(struct pgtable_debug_args *args) { }
 #endif /* CONFIG_TRANSPARENT_HUGEPAGE */
 #else
-static void __init pte_devmap_tests(unsigned long pfn, pgprot_t prot) { }
-static void __init pmd_devmap_tests(unsigned long pfn, pgprot_t prot) { }
-static void __init pud_devmap_tests(unsigned long pfn, pgprot_t prot) { }
+static void __init pte_devmap_tests(struct pgtable_debug_args *args) { }
+static void __init pmd_devmap_tests(struct pgtable_debug_args *args) { }
+static void __init pud_devmap_tests(struct pgtable_debug_args *args) { }
 #endif /* CONFIG_ARCH_HAS_PTE_DEVMAP */
 
-static void __init pte_soft_dirty_tests(unsigned long pfn, pgprot_t prot)
+static void __init pte_soft_dirty_tests(struct pgtable_debug_args *args)
 {
-	pte_t pte = pfn_pte(pfn, prot);
+	pte_t pte = pfn_pte(args->fixed_pte_pfn, args->page_prot);
 
 	if (!IS_ENABLED(CONFIG_MEM_SOFT_DIRTY))
 		return;
@@ -733,9 +785,9 @@ static void __init pte_soft_dirty_tests(unsigned long pfn, pgprot_t prot)
 	WARN_ON(pte_soft_dirty(pte_clear_soft_dirty(pte)));
 }
 
-static void __init pte_swap_soft_dirty_tests(unsigned long pfn, pgprot_t prot)
+static void __init pte_swap_soft_dirty_tests(struct pgtable_debug_args *args)
 {
-	pte_t pte = pfn_pte(pfn, prot);
+	pte_t pte = pfn_pte(args->fixed_pte_pfn, args->page_prot);
 
 	if (!IS_ENABLED(CONFIG_MEM_SOFT_DIRTY))
 		return;
@@ -746,7 +798,7 @@ static void __init pte_swap_soft_dirty_tests(unsigned long pfn, pgprot_t prot)
 }
 
 #ifdef CONFIG_TRANSPARENT_HUGEPAGE
-static void __init pmd_soft_dirty_tests(unsigned long pfn, pgprot_t prot)
+static void __init pmd_soft_dirty_tests(struct pgtable_debug_args *args)
 {
 	pmd_t pmd;
 
@@ -757,12 +809,12 @@ static void __init pmd_soft_dirty_tests(unsigned long pfn, pgprot_t prot)
 		return;
 
 	pr_debug("Validating PMD soft dirty\n");
-	pmd = pfn_pmd(pfn, prot);
+	pmd = pfn_pmd(args->fixed_pmd_pfn, args->page_prot);
 	WARN_ON(!pmd_soft_dirty(pmd_mksoft_dirty(pmd)));
 	WARN_ON(pmd_soft_dirty(pmd_clear_soft_dirty(pmd)));
 }
 
-static void __init pmd_swap_soft_dirty_tests(unsigned long pfn, pgprot_t prot)
+static void __init pmd_swap_soft_dirty_tests(struct pgtable_debug_args *args)
 {
 	pmd_t pmd;
 
@@ -774,31 +826,29 @@ static void __init pmd_swap_soft_dirty_tests(unsigned long pfn, pgprot_t prot)
 		return;
 
 	pr_debug("Validating PMD swap soft dirty\n");
-	pmd = pfn_pmd(pfn, prot);
+	pmd = pfn_pmd(args->fixed_pmd_pfn, args->page_prot);
 	WARN_ON(!pmd_swp_soft_dirty(pmd_swp_mksoft_dirty(pmd)));
 	WARN_ON(pmd_swp_soft_dirty(pmd_swp_clear_soft_dirty(pmd)));
 }
 #else  /* !CONFIG_TRANSPARENT_HUGEPAGE */
-static void __init pmd_soft_dirty_tests(unsigned long pfn, pgprot_t prot) { }
-static void __init pmd_swap_soft_dirty_tests(unsigned long pfn, pgprot_t prot)
-{
-}
+static void __init pmd_soft_dirty_tests(struct pgtable_debug_args *args) { }
+static void __init pmd_swap_soft_dirty_tests(struct pgtable_debug_args *args) { }
 #endif /* CONFIG_TRANSPARENT_HUGEPAGE */
 
-static void __init pte_swap_tests(unsigned long pfn, pgprot_t prot)
+static void __init pte_swap_tests(struct pgtable_debug_args *args)
 {
 	swp_entry_t swp;
 	pte_t pte;
 
 	pr_debug("Validating PTE swap\n");
-	pte = pfn_pte(pfn, prot);
+	pte = pfn_pte(args->fixed_pte_pfn, args->page_prot);
 	swp = __pte_to_swp_entry(pte);
 	pte = __swp_entry_to_pte(swp);
-	WARN_ON(pfn != pte_pfn(pte));
+	WARN_ON(args->fixed_pte_pfn != pte_pfn(pte));
 }
 
 #ifdef CONFIG_ARCH_ENABLE_THP_MIGRATION
-static void __init pmd_swap_tests(unsigned long pfn, pgprot_t prot)
+static void __init pmd_swap_tests(struct pgtable_debug_args *args)
 {
 	swp_entry_t swp;
 	pmd_t pmd;
@@ -807,16 +857,16 @@ static void __init pmd_swap_tests(unsigned long pfn, pgprot_t prot)
 		return;
 
 	pr_debug("Validating PMD swap\n");
-	pmd = pfn_pmd(pfn, prot);
+	pmd = pfn_pmd(args->fixed_pmd_pfn, args->page_prot);
 	swp = __pmd_to_swp_entry(pmd);
 	pmd = __swp_entry_to_pmd(swp);
-	WARN_ON(pfn != pmd_pfn(pmd));
+	WARN_ON(args->fixed_pmd_pfn != pmd_pfn(pmd));
 }
 #else  /* !CONFIG_ARCH_ENABLE_THP_MIGRATION */
-static void __init pmd_swap_tests(unsigned long pfn, pgprot_t prot) { }
+static void __init pmd_swap_tests(struct pgtable_debug_args *args) { }
 #endif /* CONFIG_ARCH_ENABLE_THP_MIGRATION */
 
-static void __init swap_migration_tests(void)
+static void __init swap_migration_tests(struct pgtable_debug_args *args)
 {
 	struct page *page;
 	swp_entry_t swp;
@@ -824,19 +874,18 @@ static void __init swap_migration_tests(void)
 	if (!IS_ENABLED(CONFIG_MIGRATION))
 		return;
 
-	pr_debug("Validating swap migration\n");
 	/*
 	 * swap_migration_tests() requires a dedicated page as it needs to
 	 * be locked before creating a migration entry from it. Locking the
 	 * page that actually maps kernel text ('start_kernel') can be real
-	 * problematic. Lets allocate a dedicated page explicitly for this
-	 * purpose that will be freed subsequently.
+	 * problematic. Lets use the allocated page explicitly for this
+	 * purpose.
 	 */
-	page = alloc_page(GFP_KERNEL);
-	if (!page) {
-		pr_err("page allocation failed\n");
+	page = (args->pte_pfn != ULONG_MAX) ? pfn_to_page(args->pte_pfn) : NULL;
+	if (!page)
 		return;
-	}
+
+	pr_debug("Validating swap migration\n");
 
 	/*
 	 * make_migration_entry() expects given page to be
@@ -855,11 +904,10 @@ static void __init swap_migration_tests(void)
 	WARN_ON(!is_migration_entry(swp));
 	WARN_ON(is_writable_migration_entry(swp));
 	__ClearPageLocked(page);
-	__free_page(page);
 }
 
 #ifdef CONFIG_HUGETLB_PAGE
-static void __init hugetlb_basic_tests(unsigned long pfn, pgprot_t prot)
+static void __init hugetlb_basic_tests(struct pgtable_debug_args *args)
 {
 	struct page *page;
 	pte_t pte;
@@ -869,25 +917,25 @@ static void __init hugetlb_basic_tests(unsigned long pfn, pgprot_t prot)
 	 * Accessing the page associated with the pfn is safe here,
 	 * as it was previously derived from a real kernel symbol.
 	 */
-	page = pfn_to_page(pfn);
-	pte = mk_huge_pte(page, prot);
+	page = pfn_to_page(args->fixed_pmd_pfn);
+	pte = mk_huge_pte(page, args->page_prot);
 
 	WARN_ON(!huge_pte_dirty(huge_pte_mkdirty(pte)));
 	WARN_ON(!huge_pte_write(huge_pte_mkwrite(huge_pte_wrprotect(pte))));
 	WARN_ON(huge_pte_write(huge_pte_wrprotect(huge_pte_mkwrite(pte))));
 
 #ifdef CONFIG_ARCH_WANT_GENERAL_HUGETLB
-	pte = pfn_pte(pfn, prot);
+	pte = pfn_pte(args->fixed_pmd_pfn, args->page_prot);
 
 	WARN_ON(!pte_huge(pte_mkhuge(pte)));
 #endif /* CONFIG_ARCH_WANT_GENERAL_HUGETLB */
 }
 #else  /* !CONFIG_HUGETLB_PAGE */
-static void __init hugetlb_basic_tests(unsigned long pfn, pgprot_t prot) { }
+static void __init hugetlb_basic_tests(struct pgtable_debug_args *args) { }
 #endif /* CONFIG_HUGETLB_PAGE */
 
 #ifdef CONFIG_TRANSPARENT_HUGEPAGE
-static void __init pmd_thp_tests(unsigned long pfn, pgprot_t prot)
+static void __init pmd_thp_tests(struct pgtable_debug_args *args)
 {
 	pmd_t pmd;
 
@@ -906,7 +954,7 @@ static void __init pmd_thp_tests(unsigned long pfn, pgprot_t prot)
 	 * needs to return true. pmd_present() should be true whenever
 	 * pmd_trans_huge() returns true.
 	 */
-	pmd = pfn_pmd(pfn, prot);
+	pmd = pfn_pmd(args->fixed_pmd_pfn, args->page_prot);
 	WARN_ON(!pmd_trans_huge(pmd_mkhuge(pmd)));
 
 #ifndef __HAVE_ARCH_PMDP_INVALIDATE
@@ -916,7 +964,7 @@ static void __init pmd_thp_tests(unsigned long pfn, pgprot_t prot)
 }
 
 #ifdef CONFIG_HAVE_ARCH_TRANSPARENT_HUGEPAGE_PUD
-static void __init pud_thp_tests(unsigned long pfn, pgprot_t prot)
+static void __init pud_thp_tests(struct pgtable_debug_args *args)
 {
 	pud_t pud;
 
@@ -924,7 +972,7 @@ static void __init pud_thp_tests(unsigned long pfn, pgprot_t prot)
 		return;
 
 	pr_debug("Validating PUD based THP\n");
-	pud = pfn_pud(pfn, prot);
+	pud = pfn_pud(args->fixed_pud_pfn, args->page_prot);
 	WARN_ON(!pud_trans_huge(pud_mkhuge(pud)));
 
 	/*
@@ -936,11 +984,11 @@ static void __init pud_thp_tests(unsigned long pfn, pgprot_t prot)
 	 */
 }
 #else  /* !CONFIG_HAVE_ARCH_TRANSPARENT_HUGEPAGE_PUD */
-static void __init pud_thp_tests(unsigned long pfn, pgprot_t prot) { }
+static void __init pud_thp_tests(struct pgtable_debug_args *args) { }
 #endif /* CONFIG_HAVE_ARCH_TRANSPARENT_HUGEPAGE_PUD */
 #else  /* !CONFIG_TRANSPARENT_HUGEPAGE */
-static void __init pmd_thp_tests(unsigned long pfn, pgprot_t prot) { }
-static void __init pud_thp_tests(unsigned long pfn, pgprot_t prot) { }
+static void __init pmd_thp_tests(struct pgtable_debug_args *args) { }
+static void __init pud_thp_tests(struct pgtable_debug_args *args) { }
 #endif /* CONFIG_TRANSPARENT_HUGEPAGE */
 
 static unsigned long __init get_random_vaddr(void)
@@ -955,43 +1003,179 @@ static unsigned long __init get_random_vaddr(void)
 	return random_vaddr;
 }
 
-static int __init debug_vm_pgtable(void)
+static void __init destroy_args(struct pgtable_debug_args *args)
 {
-	struct vm_area_struct *vma;
-	struct mm_struct *mm;
-	pgd_t *pgdp;
-	p4d_t *p4dp, *saved_p4dp;
-	pud_t *pudp, *saved_pudp;
-	pmd_t *pmdp, *saved_pmdp, pmd;
-	pte_t *ptep;
-	pgtable_t saved_ptep;
-	pgprot_t prot, protnone;
-	phys_addr_t paddr;
-	unsigned long vaddr, pte_aligned, pmd_aligned;
-	unsigned long pud_aligned, p4d_aligned, pgd_aligned;
-	spinlock_t *ptl = NULL;
-	int idx;
+	struct page *page = NULL;
+
+	/* Free (huge) page */
+	if (IS_ENABLED(CONFIG_TRANSPARENT_HUGEPAGE) &&
+	    IS_ENABLED(CONFIG_HAVE_ARCH_TRANSPARENT_HUGEPAGE_PUD) &&
+	    has_transparent_hugepage() &&
+	    args->pud_pfn != ULONG_MAX) {
+		if (args->is_contiguous_page) {
+			free_contig_range(args->pud_pfn,
+					  (1 << (HPAGE_PUD_SHIFT - PAGE_SHIFT)));
+		} else {
+			page = pfn_to_page(args->pud_pfn);
+			__free_pages(page, HPAGE_PUD_SHIFT - PAGE_SHIFT);
+		}
+
+		args->pud_pfn = ULONG_MAX;
+		args->pmd_pfn = ULONG_MAX;
+		args->pte_pfn = ULONG_MAX;
+	}
 
-	pr_info("Validating architecture page table helpers\n");
-	prot = vm_get_page_prot(VMFLAGS);
-	vaddr = get_random_vaddr();
-	mm = mm_alloc();
-	if (!mm) {
-		pr_err("mm_struct allocation failed\n");
-		return 1;
+	if (IS_ENABLED(CONFIG_TRANSPARENT_HUGEPAGE) &&
+	    has_transparent_hugepage() &&
+	    args->pmd_pfn != ULONG_MAX) {
+		if (args->is_contiguous_page) {
+			free_contig_range(args->pmd_pfn, (1 << HPAGE_PMD_ORDER));
+		} else {
+			page = pfn_to_page(args->pmd_pfn);
+			__free_pages(page, HPAGE_PMD_ORDER);
+		}
+
+		args->pmd_pfn = ULONG_MAX;
+		args->pte_pfn = ULONG_MAX;
 	}
 
+	if (args->pte_pfn != ULONG_MAX) {
+		page = pfn_to_page(args->pte_pfn);
+		__free_pages(page, 0);
+
+		args->pte_pfn = ULONG_MAX;
+	}
+
+	/* Free page table entries */
+	if (args->start_ptep) {
+		pte_free(args->mm, args->start_ptep);
+		mm_dec_nr_ptes(args->mm);
+	}
+
+	if (args->start_pmdp) {
+		pmd_free(args->mm, args->start_pmdp);
+		mm_dec_nr_pmds(args->mm);
+	}
+
+	if (args->start_pudp) {
+		pud_free(args->mm, args->start_pudp);
+		mm_dec_nr_puds(args->mm);
+	}
+
+	if (args->start_p4dp)
+		p4d_free(args->mm, args->start_p4dp);
+
+	/* Free vma and mm struct */
+	if (args->vma)
+		vm_area_free(args->vma);
+
+	if (args->mm)
+		mmdrop(args->mm);
+}
+
+static struct page * __init
+debug_vm_pgtable_alloc_huge_page(struct pgtable_debug_args *args, int order)
+{
+	struct page *page = NULL;
+
+#ifdef CONFIG_CONTIG_ALLOC
+	if (order >= MAX_ORDER) {
+		page = alloc_contig_pages((1 << order), GFP_KERNEL,
+					  first_online_node, NULL);
+		if (page) {
+			args->is_contiguous_page = true;
+			return page;
+		}
+	}
+#endif
+
+	if (order < MAX_ORDER)
+		page = alloc_pages(GFP_KERNEL, order);
+
+	return page;
+}
+
+static int __init init_args(struct pgtable_debug_args *args)
+{
+	struct page *page = NULL;
+	phys_addr_t phys;
+	int ret = 0;
+
 	/*
+	 * Initialize the debugging data.
+	 *
 	 * __P000 (or even __S000) will help create page table entries with
 	 * PROT_NONE permission as required for pxx_protnone_tests().
 	 */
-	protnone = __P000;
+	memset(args, 0, sizeof(*args));
+	args->vaddr              = get_random_vaddr();
+	args->page_prot          = vm_get_page_prot(VMFLAGS);
+	args->page_prot_none     = __P000;
+	args->is_contiguous_page = false;
+	args->pud_pfn            = ULONG_MAX;
+	args->pmd_pfn            = ULONG_MAX;
+	args->pte_pfn            = ULONG_MAX;
+	args->fixed_pgd_pfn      = ULONG_MAX;
+	args->fixed_p4d_pfn      = ULONG_MAX;
+	args->fixed_pud_pfn      = ULONG_MAX;
+	args->fixed_pmd_pfn      = ULONG_MAX;
+	args->fixed_pte_pfn      = ULONG_MAX;
+
+	/* Allocate mm and vma */
+	args->mm = mm_alloc();
+	if (!args->mm) {
+		pr_err("Failed to allocate mm struct\n");
+		ret = -ENOMEM;
+		goto error;
+	}
+
+	args->vma = vm_area_alloc(args->mm);
+	if (!args->vma) {
+		pr_err("Failed to allocate vma\n");
+		ret = -ENOMEM;
+		goto error;
+	}
+
+	/*
+	 * Allocate page table entries. They will be modified in the tests.
+	 * Lets save the page table entries so that they can be released
+	 * when the tests are completed.
+	 */
+	args->pgdp = pgd_offset(args->mm, args->vaddr);
+	args->p4dp = p4d_alloc(args->mm, args->pgdp, args->vaddr);
+	if (!args->p4dp) {
+		pr_err("Failed to allocate p4d entries\n");
+		ret = -ENOMEM;
+		goto error;
+	}
+	args->start_p4dp = p4d_offset(args->pgdp, 0UL);
+	WARN_ON(!args->start_p4dp);
+
+	args->pudp = pud_alloc(args->mm, args->p4dp, args->vaddr);
+	if (!args->pudp) {
+		pr_err("Failed to allocate pud entries\n");
+		ret = -ENOMEM;
+		goto error;
+	}
+	args->start_pudp = pud_offset(args->p4dp, 0UL);
+	WARN_ON(!args->start_pudp);
+
+	args->pmdp = pmd_alloc(args->mm, args->pudp, args->vaddr);
+	if (!args->pmdp) {
+		pr_err("Failed to allocate pmd entries\n");
+		ret = -ENOMEM;
+		goto error;
+	}
+	args->start_pmdp = pmd_offset(args->pudp, 0UL);
+	WARN_ON(!args->start_pmdp);
 
-	vma = vm_area_alloc(mm);
-	if (!vma) {
-		pr_err("vma allocation failed\n");
-		return 1;
+	if (pte_alloc(args->mm, args->pmdp)) {
+		pr_err("Failed to allocate pte entries\n");
+		ret = -ENOMEM;
+		goto error;
 	}
+	args->start_ptep = pmd_pgtable(READ_ONCE(*args->pmdp));
+	WARN_ON(!args->start_ptep);
 
 	/*
 	 * PFN for mapping at PTE level is determined from a standard kernel
@@ -1000,40 +1184,65 @@ static int __init debug_vm_pgtable(void)
 	 * exist on the platform but that does not really matter as pfn_pxx()
 	 * helpers will still create appropriate entries for the test. This
 	 * helps avoid large memory block allocations to be used for mapping
-	 * at higher page table levels.
+	 * at higher page table levels in some of the tests.
 	 */
-	paddr = __pa_symbol(&start_kernel);
-
-	pte_aligned = (paddr & PAGE_MASK) >> PAGE_SHIFT;
-	pmd_aligned = (paddr & PMD_MASK) >> PAGE_SHIFT;
-	pud_aligned = (paddr & PUD_MASK) >> PAGE_SHIFT;
-	p4d_aligned = (paddr & P4D_MASK) >> PAGE_SHIFT;
-	pgd_aligned = (paddr & PGDIR_MASK) >> PAGE_SHIFT;
-	WARN_ON(!pfn_valid(pte_aligned));
-
-	pgdp = pgd_offset(mm, vaddr);
-	p4dp = p4d_alloc(mm, pgdp, vaddr);
-	pudp = pud_alloc(mm, p4dp, vaddr);
-	pmdp = pmd_alloc(mm, pudp, vaddr);
+	phys = __pa_symbol(&start_kernel);
+	args->fixed_pgd_pfn = __phys_to_pfn(phys & PGDIR_MASK);
+	args->fixed_p4d_pfn = __phys_to_pfn(phys & P4D_MASK);
+	args->fixed_pud_pfn = __phys_to_pfn(phys & PUD_MASK);
+	args->fixed_pmd_pfn = __phys_to_pfn(phys & PMD_MASK);
+	args->fixed_pte_pfn = __phys_to_pfn(phys & PAGE_MASK);
+	WARN_ON(!pfn_valid(args->fixed_pte_pfn));
+
 	/*
-	 * Allocate pgtable_t
+	 * Allocate (huge) pages because some of the tests need to access
+	 * the data in the pages. The corresponding tests will be skipped
+	 * if we fail to allocate (huge) pages.
 	 */
-	if (pte_alloc(mm, pmdp)) {
-		pr_err("pgtable allocation failed\n");
-		return 1;
+	if (IS_ENABLED(CONFIG_TRANSPARENT_HUGEPAGE) &&
+	    IS_ENABLED(CONFIG_HAVE_ARCH_TRANSPARENT_HUGEPAGE_PUD) &&
+	    has_transparent_hugepage()) {
+		page = debug_vm_pgtable_alloc_huge_page(args,
+				HPAGE_PUD_SHIFT - PAGE_SHIFT);
+		if (page) {
+			args->pud_pfn = page_to_pfn(page);
+			args->pmd_pfn = args->pud_pfn;
+			args->pte_pfn = args->pud_pfn;
+			return 0;
+		}
 	}
 
-	/*
-	 * Save all the page table page addresses as the page table
-	 * entries will be used for testing with random or garbage
-	 * values. These saved addresses will be used for freeing
-	 * page table pages.
-	 */
-	pmd = READ_ONCE(*pmdp);
-	saved_p4dp = p4d_offset(pgdp, 0UL);
-	saved_pudp = pud_offset(p4dp, 0UL);
-	saved_pmdp = pmd_offset(pudp, 0UL);
-	saved_ptep = pmd_pgtable(pmd);
+	if (IS_ENABLED(CONFIG_TRANSPARENT_HUGEPAGE) &&
+	    has_transparent_hugepage()) {
+		page = debug_vm_pgtable_alloc_huge_page(args, HPAGE_PMD_ORDER);
+		if (page) {
+			args->pmd_pfn = page_to_pfn(page);
+			args->pte_pfn = args->pmd_pfn;
+			return 0;
+		}
+	}
+
+	page = alloc_pages(GFP_KERNEL, 0);
+	if (page)
+		args->pte_pfn = page_to_pfn(page);
+
+	return 0;
+
+error:
+	destroy_args(args);
+	return ret;
+}
+
+static int __init debug_vm_pgtable(void)
+{
+	struct pgtable_debug_args args;
+	spinlock_t *ptl = NULL;
+	int idx, ret;
+
+	pr_info("Validating architecture page table helpers\n");
+	ret = init_args(&args);
+	if (ret)
+		return ret;
 
 	/*
 	 * Iterate over the protection_map[] to make sure that all
@@ -1042,9 +1251,9 @@ static int __init debug_vm_pgtable(void)
 	 * given page table entry.
 	 */
 	for (idx = 0; idx < ARRAY_SIZE(protection_map); idx++) {
-		pte_basic_tests(pte_aligned, idx);
-		pmd_basic_tests(pmd_aligned, idx);
-		pud_basic_tests(mm, pud_aligned, idx);
+		pte_basic_tests(&args, idx);
+		pmd_basic_tests(&args, idx);
+		pud_basic_tests(&args, idx);
 	}
 
 	/*
@@ -1054,79 +1263,70 @@ static int __init debug_vm_pgtable(void)
 	 * the above iteration for now to save some test execution
 	 * time.
 	 */
-	p4d_basic_tests(p4d_aligned, prot);
-	pgd_basic_tests(pgd_aligned, prot);
+	p4d_basic_tests(&args);
+	pgd_basic_tests(&args);
 
-	pmd_leaf_tests(pmd_aligned, prot);
-	pud_leaf_tests(pud_aligned, prot);
+	pmd_leaf_tests(&args);
+	pud_leaf_tests(&args);
 
-	pte_savedwrite_tests(pte_aligned, protnone);
-	pmd_savedwrite_tests(pmd_aligned, protnone);
+	pte_savedwrite_tests(&args);
+	pmd_savedwrite_tests(&args);
 
-	pte_special_tests(pte_aligned, prot);
-	pte_protnone_tests(pte_aligned, protnone);
-	pmd_protnone_tests(pmd_aligned, protnone);
+	pte_special_tests(&args);
+	pte_protnone_tests(&args);
+	pmd_protnone_tests(&args);
 
-	pte_devmap_tests(pte_aligned, prot);
-	pmd_devmap_tests(pmd_aligned, prot);
-	pud_devmap_tests(pud_aligned, prot);
+	pte_devmap_tests(&args);
+	pmd_devmap_tests(&args);
+	pud_devmap_tests(&args);
 
-	pte_soft_dirty_tests(pte_aligned, prot);
-	pmd_soft_dirty_tests(pmd_aligned, prot);
-	pte_swap_soft_dirty_tests(pte_aligned, prot);
-	pmd_swap_soft_dirty_tests(pmd_aligned, prot);
+	pte_soft_dirty_tests(&args);
+	pmd_soft_dirty_tests(&args);
+	pte_swap_soft_dirty_tests(&args);
+	pmd_swap_soft_dirty_tests(&args);
 
-	pte_swap_tests(pte_aligned, prot);
-	pmd_swap_tests(pmd_aligned, prot);
+	pte_swap_tests(&args);
+	pmd_swap_tests(&args);
 
-	swap_migration_tests();
+	swap_migration_tests(&args);
 
-	pmd_thp_tests(pmd_aligned, prot);
-	pud_thp_tests(pud_aligned, prot);
+	pmd_thp_tests(&args);
+	pud_thp_tests(&args);
 
-	hugetlb_basic_tests(pte_aligned, prot);
+	hugetlb_basic_tests(&args);
 
 	/*
 	 * Page table modifying tests. They need to hold
 	 * proper page table lock.
 	 */
 
-	ptep = pte_offset_map_lock(mm, pmdp, vaddr, &ptl);
-	pte_clear_tests(mm, ptep, pte_aligned, vaddr, prot);
-	pte_advanced_tests(mm, vma, ptep, pte_aligned, vaddr, prot);
-	pte_unmap_unlock(ptep, ptl);
+	args.ptep = pte_offset_map_lock(args.mm, args.pmdp, args.vaddr, &ptl);
+	pte_clear_tests(&args);
+	pte_advanced_tests(&args);
+	pte_unmap_unlock(args.ptep, ptl);
 
-	ptl = pmd_lock(mm, pmdp);
-	pmd_clear_tests(mm, pmdp);
-	pmd_advanced_tests(mm, vma, pmdp, pmd_aligned, vaddr, prot, saved_ptep);
-	pmd_huge_tests(pmdp, pmd_aligned, prot);
-	pmd_populate_tests(mm, pmdp, saved_ptep);
+	ptl = pmd_lock(args.mm, args.pmdp);
+	pmd_clear_tests(&args);
+	pmd_advanced_tests(&args);
+	pmd_huge_tests(&args);
+	pmd_populate_tests(&args);
 	spin_unlock(ptl);
 
-	ptl = pud_lock(mm, pudp);
-	pud_clear_tests(mm, pudp);
-	pud_advanced_tests(mm, vma, pudp, pud_aligned, vaddr, prot);
-	pud_huge_tests(pudp, pud_aligned, prot);
-	pud_populate_tests(mm, pudp, saved_pmdp);
+	ptl = pud_lock(args.mm, args.pudp);
+	pud_clear_tests(&args);
+	pud_advanced_tests(&args);
+	pud_huge_tests(&args);
+	pud_populate_tests(&args);
 	spin_unlock(ptl);
 
-	spin_lock(&mm->page_table_lock);
-	p4d_clear_tests(mm, p4dp);
-	pgd_clear_tests(mm, pgdp);
-	p4d_populate_tests(mm, p4dp, saved_pudp);
-	pgd_populate_tests(mm, pgdp, saved_p4dp);
-	spin_unlock(&mm->page_table_lock);
-
-	p4d_free(mm, saved_p4dp);
-	pud_free(mm, saved_pudp);
-	pmd_free(mm, saved_pmdp);
-	pte_free(mm, saved_ptep);
-
-	vm_area_free(vma);
-	mm_dec_nr_puds(mm);
-	mm_dec_nr_pmds(mm);
-	mm_dec_nr_ptes(mm);
-	mmdrop(mm);
+	spin_lock(&(args.mm->page_table_lock));
+	p4d_clear_tests(&args);
+	pgd_clear_tests(&args);
+	p4d_populate_tests(&args);
+	pgd_populate_tests(&args);
+	spin_unlock(&(args.mm->page_table_lock));
+
+	destroy_args(&args);
 	return 0;
 }
 late_initcall(debug_vm_pgtable);
diff --git a/mm/early_ioremap.c b/mm/early_ioremap.c
index 164607c7cdf1..74984c23a87e 100644
--- a/mm/early_ioremap.c
+++ b/mm/early_ioremap.c
@@ -38,13 +38,8 @@ pgprot_t __init __weak early_memremap_pgprot_adjust(resource_size_t phys_addr,
 	return prot;
 }
 
-void __init __weak early_ioremap_shutdown(void)
-{
-}
-
 void __init early_ioremap_reset(void)
 {
-	early_ioremap_shutdown();
 	after_paging_init = 1;
 }
 
diff --git a/mm/filemap.c b/mm/filemap.c
index 920e8dc03251..dae481293b5d 100644
--- a/mm/filemap.c
+++ b/mm/filemap.c
@@ -260,12 +260,11 @@ static void page_cache_free_page(struct address_space *mapping,
 void delete_from_page_cache(struct page *page)
 {
 	struct address_space *mapping = page_mapping(page);
-	unsigned long flags;
 
 	BUG_ON(!PageLocked(page));
-	xa_lock_irqsave(&mapping->i_pages, flags);
+	xa_lock_irq(&mapping->i_pages);
 	__delete_from_page_cache(page, NULL);
-	xa_unlock_irqrestore(&mapping->i_pages, flags);
+	xa_unlock_irq(&mapping->i_pages);
 
 	page_cache_free_page(mapping, page);
 }
@@ -337,19 +336,18 @@ void delete_from_page_cache_batch(struct address_space *mapping,
 				  struct pagevec *pvec)
 {
 	int i;
-	unsigned long flags;
 
 	if (!pagevec_count(pvec))
 		return;
 
-	xa_lock_irqsave(&mapping->i_pages, flags);
+	xa_lock_irq(&mapping->i_pages);
 	for (i = 0; i < pagevec_count(pvec); i++) {
 		trace_mm_filemap_delete_from_page_cache(pvec->pages[i]);
 
 		unaccount_page_cache_page(mapping, pvec->pages[i]);
 	}
 	page_cache_delete_batch(mapping, pvec);
-	xa_unlock_irqrestore(&mapping->i_pages, flags);
+	xa_unlock_irq(&mapping->i_pages);
 
 	for (i = 0; i < pagevec_count(pvec); i++)
 		page_cache_free_page(mapping, pvec->pages[i]);
@@ -841,7 +839,6 @@ void replace_page_cache_page(struct page *old, struct page *new)
 	void (*freepage)(struct page *) = mapping->a_ops->freepage;
 	pgoff_t offset = old->index;
 	XA_STATE(xas, &mapping->i_pages, offset);
-	unsigned long flags;
 
 	VM_BUG_ON_PAGE(!PageLocked(old), old);
 	VM_BUG_ON_PAGE(!PageLocked(new), new);
@@ -853,7 +850,7 @@ void replace_page_cache_page(struct page *old, struct page *new)
 
 	mem_cgroup_migrate(old, new);
 
-	xas_lock_irqsave(&xas, flags);
+	xas_lock_irq(&xas);
 	xas_store(&xas, new);
 
 	old->mapping = NULL;
@@ -866,7 +863,7 @@ void replace_page_cache_page(struct page *old, struct page *new)
 		__dec_lruvec_page_state(old, NR_SHMEM);
 	if (PageSwapBacked(new))
 		__inc_lruvec_page_state(new, NR_SHMEM);
-	xas_unlock_irqrestore(&xas, flags);
+	xas_unlock_irq(&xas);
 	if (freepage)
 		freepage(old);
 	put_page(old);
diff --git a/mm/gup.c b/mm/gup.c
index b94717977d17..886d6148d3d0 100644
--- a/mm/gup.c
+++ b/mm/gup.c
@@ -92,10 +92,17 @@ static inline struct page *try_get_compound_head(struct page *page, int refs)
 	return head;
 }
 
-/*
+/**
  * try_grab_compound_head() - attempt to elevate a page's refcount, by a
  * flags-dependent amount.
  *
+ * Even though the name includes "compound_head", this function is still
+ * appropriate for callers that have a non-compound @page to get.
+ *
+ * @page:  pointer to page to be grabbed
+ * @refs:  the value to (effectively) add to the page's refcount
+ * @flags: gup flags: these are the FOLL_* flag values.
+ *
  * "grab" names in this file mean, "look at flags to decide whether to use
  * FOLL_PIN or FOLL_GET behavior, when incrementing the page's refcount.
  *
@@ -103,22 +110,26 @@ static inline struct page *try_get_compound_head(struct page *page, int refs)
  * same time. (That's true throughout the get_user_pages*() and
  * pin_user_pages*() APIs.) Cases:
  *
- *    FOLL_GET: page's refcount will be incremented by 1.
- *    FOLL_PIN: page's refcount will be incremented by GUP_PIN_COUNTING_BIAS.
+ *    FOLL_GET: page's refcount will be incremented by @refs.
+ *
+ *    FOLL_PIN on compound pages that are > two pages long: page's refcount will
+ *    be incremented by @refs, and page[2].hpage_pinned_refcount will be
+ *    incremented by @refs * GUP_PIN_COUNTING_BIAS.
+ *
+ *    FOLL_PIN on normal pages, or compound pages that are two pages long:
+ *    page's refcount will be incremented by @refs * GUP_PIN_COUNTING_BIAS.
  *
  * Return: head page (with refcount appropriately incremented) for success, or
  * NULL upon failure. If neither FOLL_GET nor FOLL_PIN was set, that's
  * considered failure, and furthermore, a likely bug in the caller, so a warning
  * is also emitted.
  */
-__maybe_unused struct page *try_grab_compound_head(struct page *page,
-						   int refs, unsigned int flags)
+struct page *try_grab_compound_head(struct page *page,
+				    int refs, unsigned int flags)
 {
 	if (flags & FOLL_GET)
 		return try_get_compound_head(page, refs);
 	else if (flags & FOLL_PIN) {
-		int orig_refs = refs;
-
 		/*
 		 * Can't do FOLL_LONGTERM + FOLL_PIN gup fast path if not in a
 		 * right zone, so fail and let the caller fall back to the slow
@@ -143,6 +154,8 @@ __maybe_unused struct page *try_grab_compound_head(struct page *page,
 		 *
 		 * However, be sure to *also* increment the normal page refcount
 		 * field at least once, so that the page really is pinned.
+		 * That's why the refcount from the earlier
+		 * try_get_compound_head() is left intact.
 		 */
 		if (hpage_pincount_available(page))
 			hpage_pincount_add(page, refs);
@@ -150,7 +163,7 @@ __maybe_unused struct page *try_grab_compound_head(struct page *page,
 			page_ref_add(page, refs * (GUP_PIN_COUNTING_BIAS - 1));
 
 		mod_node_page_state(page_pgdat(page), NR_FOLL_PIN_ACQUIRED,
-				    orig_refs);
+				    refs);
 
 		return page;
 	}
@@ -186,10 +199,8 @@ static void put_compound_head(struct page *page, int refs, unsigned int flags)
  * @flags:   gup flags: these are the FOLL_* flag values.
  *
  * Either FOLL_PIN or FOLL_GET (or neither) may be set, but not both at the same
- * time. Cases:
- *
- *    FOLL_GET: page's refcount will be incremented by 1.
- *    FOLL_PIN: page's refcount will be incremented by GUP_PIN_COUNTING_BIAS.
+ * time. Cases: please see the try_grab_compound_head() documentation, with
+ * "refs=1".
  *
  * Return: true for success, or if no action was required (if neither FOLL_PIN
  * nor FOLL_GET was set, nothing is done). False for failure: FOLL_GET or
@@ -197,35 +208,10 @@ static void put_compound_head(struct page *page, int refs, unsigned int flags)
  */
 bool __must_check try_grab_page(struct page *page, unsigned int flags)
 {
-	WARN_ON_ONCE((flags & (FOLL_GET | FOLL_PIN)) == (FOLL_GET | FOLL_PIN));
-
-	if (flags & FOLL_GET)
-		return try_get_page(page);
-	else if (flags & FOLL_PIN) {
-		int refs = 1;
-
-		page = compound_head(page);
-
-		if (WARN_ON_ONCE(page_ref_count(page) <= 0))
-			return false;
-
-		if (hpage_pincount_available(page))
-			hpage_pincount_add(page, 1);
-		else
-			refs = GUP_PIN_COUNTING_BIAS;
-
-		/*
-		 * Similar to try_grab_compound_head(): even if using the
-		 * hpage_pincount_add/_sub() routines, be sure to
-		 * *also* increment the normal page refcount field at least
-		 * once, so that the page really is pinned.
-		 */
-		page_ref_add(page, refs);
+	if (!(flags & (FOLL_GET | FOLL_PIN)))
+		return true;
 
-		mod_node_page_state(page_pgdat(page), NR_FOLL_PIN_ACQUIRED, 1);
-	}
-
-	return true;
+	return try_grab_compound_head(page, 1, flags);
 }
 
 /**
@@ -1151,7 +1137,6 @@ static long __get_user_pages(struct mm_struct *mm,
 					 * We must stop here.
 					 */
 					BUG_ON(gup_flags & FOLL_NOWAIT);
-					BUG_ON(ret != 0);
 					goto out;
 				}
 				continue;
@@ -1276,7 +1261,7 @@ int fixup_user_fault(struct mm_struct *mm,
 		     bool *unlocked)
 {
 	struct vm_area_struct *vma;
-	vm_fault_t ret, major = 0;
+	vm_fault_t ret;
 
 	address = untagged_addr(address);
 
@@ -1296,7 +1281,6 @@ retry:
 		return -EINTR;
 
 	ret = handle_mm_fault(vma, address, fault_flags, NULL);
-	major |= ret & VM_FAULT_MAJOR;
 	if (ret & VM_FAULT_ERROR) {
 		int err = vm_fault_to_errno(ret, 0);
 
@@ -1475,8 +1459,8 @@ long populate_vma_page_range(struct vm_area_struct *vma,
 	unsigned long nr_pages = (end - start) / PAGE_SIZE;
 	int gup_flags;
 
-	VM_BUG_ON(start & ~PAGE_MASK);
-	VM_BUG_ON(end   & ~PAGE_MASK);
+	VM_BUG_ON(!PAGE_ALIGNED(start));
+	VM_BUG_ON(!PAGE_ALIGNED(end));
 	VM_BUG_ON_VMA(start < vma->vm_start, vma);
 	VM_BUG_ON_VMA(end   > vma->vm_end, vma);
 	mmap_assert_locked(mm);
@@ -1775,7 +1759,7 @@ static long check_and_migrate_movable_pages(unsigned long nr_pages,
 	if (!list_empty(&movable_page_list)) {
 		ret = migrate_pages(&movable_page_list, alloc_migration_target,
 				    NULL, (unsigned long)&mtc, MIGRATE_SYNC,
-				    MR_LONGTERM_PIN);
+				    MR_LONGTERM_PIN, NULL);
 		if (ret && !list_empty(&movable_page_list))
 			putback_movable_pages(&movable_page_list);
 	}
@@ -2244,6 +2228,7 @@ static int __gup_device_huge(unsigned long pfn, unsigned long addr,
 {
 	int nr_start = *nr;
 	struct dev_pagemap *pgmap = NULL;
+	int ret = 1;
 
 	do {
 		struct page *page = pfn_to_page(pfn);
@@ -2251,21 +2236,22 @@ static int __gup_device_huge(unsigned long pfn, unsigned long addr,
 		pgmap = get_dev_pagemap(pfn, pgmap);
 		if (unlikely(!pgmap)) {
 			undo_dev_pagemap(nr, nr_start, flags, pages);
-			return 0;
+			ret = 0;
+			break;
 		}
 		SetPageReferenced(page);
 		pages[*nr] = page;
 		if (unlikely(!try_grab_page(page, flags))) {
 			undo_dev_pagemap(nr, nr_start, flags, pages);
-			return 0;
+			ret = 0;
+			break;
 		}
 		(*nr)++;
 		pfn++;
 	} while (addr += PAGE_SIZE, addr != end);
 
-	if (pgmap)
-		put_dev_pagemap(pgmap);
-	return 1;
+	put_dev_pagemap(pgmap);
+	return ret;
 }
 
 static int __gup_device_huge_pmd(pmd_t orig, pmd_t *pmdp, unsigned long addr,
diff --git a/mm/highmem.c b/mm/highmem.c
index 4fb51d735aa6..4212ad0e4a19 100644
--- a/mm/highmem.c
+++ b/mm/highmem.c
@@ -436,7 +436,7 @@ EXPORT_SYMBOL(zero_user_segments);
 
 static inline int kmap_local_idx_push(void)
 {
-	WARN_ON_ONCE(in_irq() && !irqs_disabled());
+	WARN_ON_ONCE(in_hardirq() && !irqs_disabled());
 	current->kmap_ctrl.idx += KM_INCR;
 	BUG_ON(current->kmap_ctrl.idx >= KM_MAX_IDX);
 	return current->kmap_ctrl.idx - 1;
diff --git a/mm/hmm.c b/mm/hmm.c
index fad6be2bf072..842e26599238 100644
--- a/mm/hmm.c
+++ b/mm/hmm.c
@@ -295,10 +295,13 @@ static int hmm_vma_handle_pte(struct mm_walk *walk, unsigned long addr,
 		goto fault;
 
 	/*
+	 * Bypass devmap pte such as DAX page when all pfn requested
+	 * flags(pfn_req_flags) are fulfilled.
 	 * Since each architecture defines a struct page for the zero page, just
 	 * fall through and treat it like a normal page.
 	 */
-	if (pte_special(pte) && !is_zero_pfn(pte_pfn(pte))) {
+	if (pte_special(pte) && !pte_devmap(pte) &&
+	    !is_zero_pfn(pte_pfn(pte))) {
 		if (hmm_pte_need_fault(hmm_vma_walk, pfn_req_flags, 0)) {
 			pte_unmap(ptep);
 			return -EFAULT;
diff --git a/mm/huge_memory.c b/mm/huge_memory.c
index afff3ac87067..5e9ef0fc261e 100644
--- a/mm/huge_memory.c
+++ b/mm/huge_memory.c
@@ -1440,32 +1440,6 @@ vm_fault_t do_huge_pmd_numa_page(struct vm_fault *vmf)
 		goto out;
 	}
 
-	/*
-	 * Since we took the NUMA fault, we must have observed the !accessible
-	 * bit. Make sure all other CPUs agree with that, to avoid them
-	 * modifying the page we're about to migrate.
-	 *
-	 * Must be done under PTL such that we'll observe the relevant
-	 * inc_tlb_flush_pending().
-	 *
-	 * We are not sure a pending tlb flush here is for a huge page
-	 * mapping or not. Hence use the tlb range variant
-	 */
-	if (mm_tlb_flush_pending(vma->vm_mm)) {
-		flush_tlb_range(vma, haddr, haddr + HPAGE_PMD_SIZE);
-		/*
-		 * change_huge_pmd() released the pmd lock before
-		 * invalidating the secondary MMUs sharing the primary
-		 * MMU pagetables (with ->invalidate_range()). The
-		 * mmu_notifier_invalidate_range_end() (which
-		 * internally calls ->invalidate_range()) in
-		 * change_pmd_range() will run after us, so we can't
-		 * rely on it here and we need an explicit invalidate.
-		 */
-		mmu_notifier_invalidate_range(vma->vm_mm, haddr,
-					      haddr + HPAGE_PMD_SIZE);
-	}
-
 	pmd = pmd_modify(oldpmd, vma->vm_page_prot);
 	page = vm_normal_page_pmd(vma, haddr, pmd);
 	if (!page)
@@ -2454,11 +2428,11 @@ static void __split_huge_page(struct page *page, struct list_head *list,
 
 	for (i = nr - 1; i >= 1; i--) {
 		__split_huge_page_tail(head, i, lruvec, list);
-		/* Some pages can be beyond i_size: drop them from page cache */
+		/* Some pages can be beyond EOF: drop them from page cache */
 		if (head[i].index >= end) {
 			ClearPageDirty(head + i);
 			__delete_from_page_cache(head + i, NULL);
-			if (IS_ENABLED(CONFIG_SHMEM) && PageSwapBacked(head))
+			if (shmem_mapping(head->mapping))
 				shmem_uncharge(head->mapping->host, 1);
 			put_page(head + i);
 		} else if (!PageAnon(page)) {
@@ -2686,6 +2660,8 @@ int split_huge_page_to_list(struct page *page, struct list_head *list)
 		 * head page lock is good enough to serialize the trimming.
 		 */
 		end = DIV_ROUND_UP(i_size_read(mapping->host), PAGE_SIZE);
+		if (shmem_mapping(mapping))
+			end = shmem_fallocend(mapping->host, end);
 	}
 
 	/*
diff --git a/mm/hugetlb.c b/mm/hugetlb.c
index 8ea35ba6699f..95dc7b83381f 100644
--- a/mm/hugetlb.c
+++ b/mm/hugetlb.c
@@ -1072,6 +1072,8 @@ static void enqueue_huge_page(struct hstate *h, struct page *page)
 	int nid = page_to_nid(page);
 
 	lockdep_assert_held(&hugetlb_lock);
+	VM_BUG_ON_PAGE(page_count(page), page);
+
 	list_move(&page->lru, &h->hugepage_freelists[nid]);
 	h->free_huge_pages++;
 	h->free_huge_pages_node[nid]++;
@@ -1143,7 +1145,7 @@ static struct page *dequeue_huge_page_vma(struct hstate *h,
 				unsigned long address, int avoid_reserve,
 				long chg)
 {
-	struct page *page;
+	struct page *page = NULL;
 	struct mempolicy *mpol;
 	gfp_t gfp_mask;
 	nodemask_t *nodemask;
@@ -1164,7 +1166,17 @@ static struct page *dequeue_huge_page_vma(struct hstate *h,
 
 	gfp_mask = htlb_alloc_mask(h);
 	nid = huge_node(vma, address, gfp_mask, &mpol, &nodemask);
-	page = dequeue_huge_page_nodemask(h, gfp_mask, nid, nodemask);
+
+	if (mpol_is_preferred_many(mpol)) {
+		page = dequeue_huge_page_nodemask(h, gfp_mask, nid, nodemask);
+
+		/* Fallback to all nodes if page==NULL */
+		nodemask = NULL;
+	}
+
+	if (!page)
+		page = dequeue_huge_page_nodemask(h, gfp_mask, nid, nodemask);
+
 	if (page && !avoid_reserve && vma_has_reserves(vma, chg)) {
 		SetHPageRestoreReserve(page);
 		h->resv_huge_pages--;
@@ -1368,8 +1380,28 @@ static void remove_hugetlb_page(struct hstate *h, struct page *page,
 		h->surplus_huge_pages_node[nid]--;
 	}
 
+	/*
+	 * Very subtle
+	 *
+	 * For non-gigantic pages set the destructor to the normal compound
+	 * page dtor.  This is needed in case someone takes an additional
+	 * temporary ref to the page, and freeing is delayed until they drop
+	 * their reference.
+	 *
+	 * For gigantic pages set the destructor to the null dtor.  This
+	 * destructor will never be called.  Before freeing the gigantic
+	 * page destroy_compound_gigantic_page will turn the compound page
+	 * into a simple group of pages.  After this the destructor does not
+	 * apply.
+	 *
+	 * This handles the case where more than one ref is held when and
+	 * after update_and_free_page is called.
+	 */
 	set_page_refcounted(page);
-	set_compound_page_dtor(page, NULL_COMPOUND_DTOR);
+	if (hstate_is_gigantic(h))
+		set_compound_page_dtor(page, NULL_COMPOUND_DTOR);
+	else
+		set_compound_page_dtor(page, COMPOUND_PAGE_DTOR);
 
 	h->nr_huge_pages--;
 	h->nr_huge_pages_node[nid]--;
@@ -1399,11 +1431,20 @@ static void add_hugetlb_page(struct hstate *h, struct page *page,
 	SetHPageVmemmapOptimized(page);
 
 	/*
-	 * This page is now managed by the hugetlb allocator and has
-	 * no users -- drop the last reference.
+	 * This page is about to be managed by the hugetlb allocator and
+	 * should have no users.  Drop our reference, and check for others
+	 * just in case.
 	 */
 	zeroed = put_page_testzero(page);
-	VM_BUG_ON_PAGE(!zeroed, page);
+	if (!zeroed)
+		/*
+		 * It is VERY unlikely soneone else has taken a ref on
+		 * the page.  In this case, we simply return as the
+		 * hugetlb destructor (free_huge_page) will be called
+		 * when this other ref is dropped.
+		 */
+		return;
+
 	arch_clear_hugepage_flags(page);
 	enqueue_huge_page(h, page);
 }
@@ -1657,16 +1698,14 @@ static bool prep_compound_gigantic_page(struct page *page, unsigned int order)
 		 * cache adding could take a ref on a 'to be' tail page.
 		 * We need to respect any increased ref count, and only set
 		 * the ref count to zero if count is currently 1.  If count
-		 * is not 1, we call synchronize_rcu in the hope that a rcu
-		 * grace period will cause ref count to drop and then retry.
-		 * If count is still inflated on retry we return an error and
-		 * must discard the pages.
+		 * is not 1, we return an error.  An error return indicates
+		 * the set of pages can not be converted to a gigantic page.
+		 * The caller who allocated the pages should then discard the
+		 * pages using the appropriate free interface.
 		 */
 		if (!page_ref_freeze(p, 1)) {
-			pr_info("HugeTLB unexpected inflated ref count on freshly allocated page\n");
-			synchronize_rcu();
-			if (!page_ref_freeze(p, 1))
-				goto out_error;
+			pr_warn("HugeTLB page can not be used due to unexpected inflated ref count\n");
+			goto out_error;
 		}
 		set_page_count(p, 0);
 		set_compound_head(p, page);
@@ -1830,7 +1869,6 @@ retry:
 				retry = true;
 				goto retry;
 			}
-			pr_warn("HugeTLB page can not be used due to unexpected inflated ref count\n");
 			return NULL;
 		}
 	}
@@ -2020,9 +2058,10 @@ int dissolve_free_huge_pages(unsigned long start_pfn, unsigned long end_pfn)
  * Allocates a fresh surplus page from the page allocator.
  */
 static struct page *alloc_surplus_huge_page(struct hstate *h, gfp_t gfp_mask,
-		int nid, nodemask_t *nmask)
+		int nid, nodemask_t *nmask, bool zero_ref)
 {
 	struct page *page = NULL;
+	bool retry = false;
 
 	if (hstate_is_gigantic(h))
 		return NULL;
@@ -2032,6 +2071,7 @@ static struct page *alloc_surplus_huge_page(struct hstate *h, gfp_t gfp_mask,
 		goto out_unlock;
 	spin_unlock_irq(&hugetlb_lock);
 
+retry:
 	page = alloc_fresh_huge_page(h, gfp_mask, nid, nmask, NULL);
 	if (!page)
 		return NULL;
@@ -2049,11 +2089,35 @@ static struct page *alloc_surplus_huge_page(struct hstate *h, gfp_t gfp_mask,
 		spin_unlock_irq(&hugetlb_lock);
 		put_page(page);
 		return NULL;
-	} else {
-		h->surplus_huge_pages++;
-		h->surplus_huge_pages_node[page_to_nid(page)]++;
 	}
 
+	if (zero_ref) {
+		/*
+		 * Caller requires a page with zero ref count.
+		 * We will drop ref count here.  If someone else is holding
+		 * a ref, the page will be freed when they drop it.  Abuse
+		 * temporary page flag to accomplish this.
+		 */
+		SetHPageTemporary(page);
+		if (!put_page_testzero(page)) {
+			/*
+			 * Unexpected inflated ref count on freshly allocated
+			 * huge.  Retry once.
+			 */
+			pr_info("HugeTLB unexpected inflated ref count on freshly allocated page\n");
+			spin_unlock_irq(&hugetlb_lock);
+			if (retry)
+				return NULL;
+
+			retry = true;
+			goto retry;
+		}
+		ClearHPageTemporary(page);
+	}
+
+	h->surplus_huge_pages++;
+	h->surplus_huge_pages_node[page_to_nid(page)]++;
+
 out_unlock:
 	spin_unlock_irq(&hugetlb_lock);
 
@@ -2088,16 +2152,26 @@ static
 struct page *alloc_buddy_huge_page_with_mpol(struct hstate *h,
 		struct vm_area_struct *vma, unsigned long addr)
 {
-	struct page *page;
+	struct page *page = NULL;
 	struct mempolicy *mpol;
 	gfp_t gfp_mask = htlb_alloc_mask(h);
 	int nid;
 	nodemask_t *nodemask;
 
 	nid = huge_node(vma, addr, gfp_mask, &mpol, &nodemask);
-	page = alloc_surplus_huge_page(h, gfp_mask, nid, nodemask);
-	mpol_cond_put(mpol);
+	if (mpol_is_preferred_many(mpol)) {
+		gfp_t gfp = gfp_mask | __GFP_NOWARN;
 
+		gfp &=  ~(__GFP_DIRECT_RECLAIM | __GFP_NOFAIL);
+		page = alloc_surplus_huge_page(h, gfp, nid, nodemask, false);
+
+		/* Fallback to all nodes if page==NULL */
+		nodemask = NULL;
+	}
+
+	if (!page)
+		page = alloc_surplus_huge_page(h, gfp_mask, nid, nodemask, false);
+	mpol_cond_put(mpol);
 	return page;
 }
 
@@ -2167,7 +2241,7 @@ retry:
 	spin_unlock_irq(&hugetlb_lock);
 	for (i = 0; i < needed; i++) {
 		page = alloc_surplus_huge_page(h, htlb_alloc_mask(h),
-				NUMA_NO_NODE, NULL);
+				NUMA_NO_NODE, NULL, true);
 		if (!page) {
 			alloc_ok = false;
 			break;
@@ -2208,24 +2282,20 @@ retry:
 
 	/* Free the needed pages to the hugetlb pool */
 	list_for_each_entry_safe(page, tmp, &surplus_list, lru) {
-		int zeroed;
-
 		if ((--needed) < 0)
 			break;
-		/*
-		 * This page is now managed by the hugetlb allocator and has
-		 * no users -- drop the buddy allocator's reference.
-		 */
-		zeroed = put_page_testzero(page);
-		VM_BUG_ON_PAGE(!zeroed, page);
+		/* Add the page to the hugetlb allocator */
 		enqueue_huge_page(h, page);
 	}
 free:
 	spin_unlock_irq(&hugetlb_lock);
 
-	/* Free unnecessary surplus pages to the buddy allocator */
+	/*
+	 * Free unnecessary surplus pages to the buddy allocator.
+	 * Pages have no ref count, call free_huge_page directly.
+	 */
 	list_for_each_entry_safe(page, tmp, &surplus_list, lru)
-		put_page(page);
+		free_huge_page(page);
 	spin_lock_irq(&hugetlb_lock);
 
 	return ret;
@@ -2534,6 +2604,7 @@ static int alloc_and_dissolve_huge_page(struct hstate *h, struct page *old_page,
 {
 	gfp_t gfp_mask = htlb_alloc_mask(h) | __GFP_THISNODE;
 	int nid = page_to_nid(old_page);
+	bool alloc_retry = false;
 	struct page *new_page;
 	int ret = 0;
 
@@ -2544,9 +2615,30 @@ static int alloc_and_dissolve_huge_page(struct hstate *h, struct page *old_page,
 	 * the pool.  This simplifies and let us do most of the processing
 	 * under the lock.
 	 */
+alloc_retry:
 	new_page = alloc_buddy_huge_page(h, gfp_mask, nid, NULL, NULL);
 	if (!new_page)
 		return -ENOMEM;
+	/*
+	 * If all goes well, this page will be directly added to the free
+	 * list in the pool.  For this the ref count needs to be zero.
+	 * Attempt to drop now, and retry once if needed.  It is VERY
+	 * unlikely there is another ref on the page.
+	 *
+	 * If someone else has a reference to the page, it will be freed
+	 * when they drop their ref.  Abuse temporary page flag to accomplish
+	 * this.  Retry once if there is an inflated ref count.
+	 */
+	SetHPageTemporary(new_page);
+	if (!put_page_testzero(new_page)) {
+		if (alloc_retry)
+			return -EBUSY;
+
+		alloc_retry = true;
+		goto alloc_retry;
+	}
+	ClearHPageTemporary(new_page);
+
 	__prep_new_huge_page(h, new_page);
 
 retry:
@@ -2586,11 +2678,10 @@ retry:
 		remove_hugetlb_page(h, old_page, false);
 
 		/*
-		 * Reference count trick is needed because allocator gives us
-		 * referenced page but the pool requires pages with 0 refcount.
+		 * Ref count on new page is already zero as it was dropped
+		 * earlier.  It can be directly added to the pool free list.
 		 */
 		__prep_account_new_huge_page(h, nid);
-		page_ref_dec(new_page);
 		enqueue_huge_page(h, new_page);
 
 		/*
@@ -2604,6 +2695,8 @@ retry:
 
 free_new:
 	spin_unlock_irq(&hugetlb_lock);
+	/* Page has a zero ref count, but needs a ref to be freed */
+	set_page_refcounted(new_page);
 	update_and_free_page(h, new_page, false);
 
 	return ret;
@@ -2828,8 +2921,8 @@ static void __init gather_bootmem_prealloc(void)
 			prep_new_huge_page(h, page, page_to_nid(page));
 			put_page(page); /* add to the hugepage allocator */
 		} else {
+			/* VERY unlikely inflated ref count on a tail page */
 			free_gigantic_page(page, huge_page_order(h));
-			pr_warn("HugeTLB page can not be used due to unexpected inflated ref count\n");
 		}
 
 		/*
@@ -4033,8 +4126,10 @@ static void hugetlb_vm_op_open(struct vm_area_struct *vma)
 	 * after this open call completes.  It is therefore safe to take a
 	 * new reference here without additional locking.
 	 */
-	if (resv && is_vma_resv_set(vma, HPAGE_RESV_OWNER))
+	if (resv && is_vma_resv_set(vma, HPAGE_RESV_OWNER)) {
+		resv_map_dup_hugetlb_cgroup_uncharge_info(resv);
 		kref_get(&resv->refs);
+	}
 }
 
 static void hugetlb_vm_op_close(struct vm_area_struct *vma)
diff --git a/mm/hwpoison-inject.c b/mm/hwpoison-inject.c
index 1ae1ebc2b9b1..aff4d27ec235 100644
--- a/mm/hwpoison-inject.c
+++ b/mm/hwpoison-inject.c
@@ -30,7 +30,7 @@ static int hwpoison_inject(void *data, u64 val)
 	if (!hwpoison_filter_enable)
 		goto inject;
 
-	shake_page(hpage, 0);
+	shake_page(hpage);
 	/*
 	 * This implies unable to support non-LRU pages.
 	 */
diff --git a/mm/internal.h b/mm/internal.h
index 31ff935b2547..cf3cb933eba3 100644
--- a/mm/internal.h
+++ b/mm/internal.h
@@ -211,6 +211,10 @@ extern void zone_pcp_reset(struct zone *zone);
 extern void zone_pcp_disable(struct zone *zone);
 extern void zone_pcp_enable(struct zone *zone);
 
+extern void *memmap_alloc(phys_addr_t size, phys_addr_t align,
+			  phys_addr_t min_addr,
+			  int nid, bool exact_nid);
+
 #if defined CONFIG_COMPACTION || defined CONFIG_CMA
 
 /*
@@ -539,12 +543,17 @@ static inline void mminit_validate_memmodel_limits(unsigned long *start_pfn,
 
 #ifdef CONFIG_NUMA
 extern int node_reclaim(struct pglist_data *, gfp_t, unsigned int);
+extern int find_next_best_node(int node, nodemask_t *used_node_mask);
 #else
 static inline int node_reclaim(struct pglist_data *pgdat, gfp_t mask,
 				unsigned int order)
 {
 	return NODE_RECLAIM_NOSCAN;
 }
+static inline int find_next_best_node(int node, nodemask_t *used_node_mask)
+{
+	return NUMA_NO_NODE;
+}
 #endif
 
 extern int hwpoison_filter(struct page *p);
diff --git a/mm/ioremap.c b/mm/ioremap.c
index 8ee0136f8cb0..5fe598ecd9b7 100644
--- a/mm/ioremap.c
+++ b/mm/ioremap.c
@@ -8,33 +8,9 @@
  */
 #include <linux/vmalloc.h>
 #include <linux/mm.h>
-#include <linux/sched.h>
 #include <linux/io.h>
 #include <linux/export.h>
-#include <asm/cacheflush.h>
 
-#include "pgalloc-track.h"
-
-#ifdef CONFIG_HAVE_ARCH_HUGE_VMAP
-static unsigned int __ro_after_init iomap_max_page_shift = BITS_PER_LONG - 1;
-
-static int __init set_nohugeiomap(char *str)
-{
-	iomap_max_page_shift = PAGE_SHIFT;
-	return 0;
-}
-early_param("nohugeiomap", set_nohugeiomap);
-#else /* CONFIG_HAVE_ARCH_HUGE_VMAP */
-static const unsigned int iomap_max_page_shift = PAGE_SHIFT;
-#endif	/* CONFIG_HAVE_ARCH_HUGE_VMAP */
-
-int ioremap_page_range(unsigned long addr,
-		       unsigned long end, phys_addr_t phys_addr, pgprot_t prot)
-{
-	return vmap_range(addr, end, phys_addr, prot, iomap_max_page_shift);
-}
-
-#ifdef CONFIG_GENERIC_IOREMAP
 void __iomem *ioremap_prot(phys_addr_t addr, size_t size, unsigned long prot)
 {
 	unsigned long offset, vaddr;
@@ -71,4 +47,3 @@ void iounmap(volatile void __iomem *addr)
 	vunmap((void *)((unsigned long)addr & PAGE_MASK));
 }
 EXPORT_SYMBOL(iounmap);
-#endif /* CONFIG_GENERIC_IOREMAP */
diff --git a/mm/kasan/hw_tags.c b/mm/kasan/hw_tags.c
index e4c16f6b6680..05d1e9460e2e 100644
--- a/mm/kasan/hw_tags.c
+++ b/mm/kasan/hw_tags.c
@@ -37,16 +37,9 @@ enum kasan_arg_stacktrace {
 	KASAN_ARG_STACKTRACE_ON,
 };
 
-enum kasan_arg_fault {
-	KASAN_ARG_FAULT_DEFAULT,
-	KASAN_ARG_FAULT_REPORT,
-	KASAN_ARG_FAULT_PANIC,
-};
-
 static enum kasan_arg kasan_arg __ro_after_init;
 static enum kasan_arg_mode kasan_arg_mode __ro_after_init;
 static enum kasan_arg_stacktrace kasan_arg_stacktrace __ro_after_init;
-static enum kasan_arg_fault kasan_arg_fault __ro_after_init;
 
 /* Whether KASAN is enabled at all. */
 DEFINE_STATIC_KEY_FALSE(kasan_flag_enabled);
@@ -59,9 +52,6 @@ EXPORT_SYMBOL_GPL(kasan_flag_async);
 /* Whether to collect alloc/free stack traces. */
 DEFINE_STATIC_KEY_FALSE(kasan_flag_stacktrace);
 
-/* Whether to panic or print a report and disable tag checking on fault. */
-bool kasan_flag_panic __ro_after_init;
-
 /* kasan=off/on */
 static int __init early_kasan_flag(char *arg)
 {
@@ -113,23 +103,6 @@ static int __init early_kasan_flag_stacktrace(char *arg)
 }
 early_param("kasan.stacktrace", early_kasan_flag_stacktrace);
 
-/* kasan.fault=report/panic */
-static int __init early_kasan_fault(char *arg)
-{
-	if (!arg)
-		return -EINVAL;
-
-	if (!strcmp(arg, "report"))
-		kasan_arg_fault = KASAN_ARG_FAULT_REPORT;
-	else if (!strcmp(arg, "panic"))
-		kasan_arg_fault = KASAN_ARG_FAULT_PANIC;
-	else
-		return -EINVAL;
-
-	return 0;
-}
-early_param("kasan.fault", early_kasan_fault);
-
 /* kasan_init_hw_tags_cpu() is called for each CPU. */
 void kasan_init_hw_tags_cpu(void)
 {
@@ -195,22 +168,6 @@ void __init kasan_init_hw_tags(void)
 		break;
 	}
 
-	switch (kasan_arg_fault) {
-	case KASAN_ARG_FAULT_DEFAULT:
-		/*
-		 * Default to no panic on report.
-		 * Do nothing, kasan_flag_panic keeps its default value.
-		 */
-		break;
-	case KASAN_ARG_FAULT_REPORT:
-		/* Do nothing, kasan_flag_panic keeps its default value. */
-		break;
-	case KASAN_ARG_FAULT_PANIC:
-		/* Enable panic on report. */
-		kasan_flag_panic = true;
-		break;
-	}
-
 	pr_info("KernelAddressSanitizer initialized\n");
 }
 
diff --git a/mm/kasan/kasan.h b/mm/kasan/kasan.h
index fff93b0bcb08..8bf568a80eb8 100644
--- a/mm/kasan/kasan.h
+++ b/mm/kasan/kasan.h
@@ -38,7 +38,6 @@ static inline bool kasan_async_mode_enabled(void)
 
 #endif
 
-extern bool kasan_flag_panic __ro_after_init;
 extern bool kasan_flag_async __ro_after_init;
 
 #if defined(CONFIG_KASAN_GENERIC) || defined(CONFIG_KASAN_SW_TAGS)
diff --git a/mm/kasan/report.c b/mm/kasan/report.c
index 8fff1825b22c..884a950c7026 100644
--- a/mm/kasan/report.c
+++ b/mm/kasan/report.c
@@ -39,6 +39,31 @@ static unsigned long kasan_flags;
 #define KASAN_BIT_REPORTED	0
 #define KASAN_BIT_MULTI_SHOT	1
 
+enum kasan_arg_fault {
+	KASAN_ARG_FAULT_DEFAULT,
+	KASAN_ARG_FAULT_REPORT,
+	KASAN_ARG_FAULT_PANIC,
+};
+
+static enum kasan_arg_fault kasan_arg_fault __ro_after_init = KASAN_ARG_FAULT_DEFAULT;
+
+/* kasan.fault=report/panic */
+static int __init early_kasan_fault(char *arg)
+{
+	if (!arg)
+		return -EINVAL;
+
+	if (!strcmp(arg, "report"))
+		kasan_arg_fault = KASAN_ARG_FAULT_REPORT;
+	else if (!strcmp(arg, "panic"))
+		kasan_arg_fault = KASAN_ARG_FAULT_PANIC;
+	else
+		return -EINVAL;
+
+	return 0;
+}
+early_param("kasan.fault", early_kasan_fault);
+
 bool kasan_save_enable_multi_shot(void)
 {
 	return test_and_set_bit(KASAN_BIT_MULTI_SHOT, &kasan_flags);
@@ -102,10 +127,8 @@ static void end_report(unsigned long *flags, unsigned long addr)
 		panic_on_warn = 0;
 		panic("panic_on_warn set ...\n");
 	}
-#ifdef CONFIG_KASAN_HW_TAGS
-	if (kasan_flag_panic)
+	if (kasan_arg_fault == KASAN_ARG_FAULT_PANIC)
 		panic("kasan.fault=panic set ...\n");
-#endif
 	kasan_enable_current();
 }
 
diff --git a/mm/kfence/core.c b/mm/kfence/core.c
index 575c685aa642..7a97db8bc8e7 100644
--- a/mm/kfence/core.c
+++ b/mm/kfence/core.c
@@ -20,6 +20,7 @@
 #include <linux/moduleparam.h>
 #include <linux/random.h>
 #include <linux/rcupdate.h>
+#include <linux/sched/clock.h>
 #include <linux/sched/sysctl.h>
 #include <linux/seq_file.h>
 #include <linux/slab.h>
@@ -196,6 +197,8 @@ static noinline void metadata_update_state(struct kfence_metadata *meta,
 	 */
 	track->num_stack_entries = stack_trace_save(track->stack_entries, KFENCE_STACK_DEPTH, 1);
 	track->pid = task_pid_nr(current);
+	track->cpu = raw_smp_processor_id();
+	track->ts_nsec = local_clock(); /* Same source as printk timestamps. */
 
 	/*
 	 * Pairs with READ_ONCE() in
diff --git a/mm/kfence/kfence.h b/mm/kfence/kfence.h
index 24065321ff8a..c1f23c61e5f9 100644
--- a/mm/kfence/kfence.h
+++ b/mm/kfence/kfence.h
@@ -36,6 +36,8 @@ enum kfence_object_state {
 /* Alloc/free tracking information. */
 struct kfence_track {
 	pid_t pid;
+	int cpu;
+	u64 ts_nsec;
 	int num_stack_entries;
 	unsigned long stack_entries[KFENCE_STACK_DEPTH];
 };
diff --git a/mm/kfence/kfence_test.c b/mm/kfence/kfence_test.c
index eb6307c199ea..f1690cf54199 100644
--- a/mm/kfence/kfence_test.c
+++ b/mm/kfence/kfence_test.c
@@ -800,6 +800,9 @@ static int test_init(struct kunit *test)
 	unsigned long flags;
 	int i;
 
+	if (!__kfence_pool)
+		return -EINVAL;
+
 	spin_lock_irqsave(&observed.lock, flags);
 	for (i = 0; i < ARRAY_SIZE(observed.lines); i++)
 		observed.lines[i][0] = '\0';
diff --git a/mm/kfence/report.c b/mm/kfence/report.c
index 2a319c21c939..f93a7b2a338b 100644
--- a/mm/kfence/report.c
+++ b/mm/kfence/report.c
@@ -5,10 +5,11 @@
  * Copyright (C) 2020, Google LLC.
  */
 
-#include <stdarg.h>
+#include <linux/stdarg.h>
 
 #include <linux/kernel.h>
 #include <linux/lockdep.h>
+#include <linux/math.h>
 #include <linux/printk.h>
 #include <linux/sched/debug.h>
 #include <linux/seq_file.h>
@@ -100,6 +101,13 @@ static void kfence_print_stack(struct seq_file *seq, const struct kfence_metadat
 			       bool show_alloc)
 {
 	const struct kfence_track *track = show_alloc ? &meta->alloc_track : &meta->free_track;
+	u64 ts_sec = track->ts_nsec;
+	unsigned long rem_nsec = do_div(ts_sec, NSEC_PER_SEC);
+
+	/* Timestamp matches printk timestamp format. */
+	seq_con_printf(seq, "%s by task %d on cpu %d at %lu.%06lus:\n",
+		       show_alloc ? "allocated" : "freed", track->pid,
+		       track->cpu, (unsigned long)ts_sec, rem_nsec / 1000);
 
 	if (track->num_stack_entries) {
 		/* Skip allocation/free internals stack. */
@@ -126,15 +134,14 @@ void kfence_print_object(struct seq_file *seq, const struct kfence_metadata *met
 		return;
 	}
 
-	seq_con_printf(seq,
-		       "kfence-#%td [0x%p-0x%p"
-		       ", size=%d, cache=%s] allocated by task %d:\n",
-		       meta - kfence_metadata, (void *)start, (void *)(start + size - 1), size,
-		       (cache && cache->name) ? cache->name : "<destroyed>", meta->alloc_track.pid);
+	seq_con_printf(seq, "kfence-#%td: 0x%p-0x%p, size=%d, cache=%s\n\n",
+		       meta - kfence_metadata, (void *)start, (void *)(start + size - 1),
+		       size, (cache && cache->name) ? cache->name : "<destroyed>");
+
 	kfence_print_stack(seq, meta, true);
 
 	if (meta->state == KFENCE_OBJECT_FREED) {
-		seq_con_printf(seq, "\nfreed by task %d:\n", meta->free_track.pid);
+		seq_con_printf(seq, "\n");
 		kfence_print_stack(seq, meta, false);
 	}
 }
diff --git a/mm/khugepaged.c b/mm/khugepaged.c
index b0412be08fa2..045cc579f724 100644
--- a/mm/khugepaged.c
+++ b/mm/khugepaged.c
@@ -1721,7 +1721,7 @@ static void collapse_file(struct mm_struct *mm,
 				xas_unlock_irq(&xas);
 				/* swap in or instantiate fallocated page */
 				if (shmem_getpage(mapping->host, index, &page,
-						  SGP_NOHUGE)) {
+						  SGP_NOALLOC)) {
 					result = SCAN_FAIL;
 					goto xa_unlocked;
 				}
diff --git a/mm/kmemleak.c b/mm/kmemleak.c
index 73d46d16d575..b57383c17cf6 100644
--- a/mm/kmemleak.c
+++ b/mm/kmemleak.c
@@ -113,7 +113,8 @@
 #define BYTES_PER_POINTER	sizeof(void *)
 
 /* GFP bitmask for kmemleak internal allocations */
-#define gfp_kmemleak_mask(gfp)	(((gfp) & (GFP_KERNEL | GFP_ATOMIC)) | \
+#define gfp_kmemleak_mask(gfp)	(((gfp) & (GFP_KERNEL | GFP_ATOMIC | \
+					   __GFP_NOLOCKDEP)) | \
 				 __GFP_NORETRY | __GFP_NOMEMALLOC | \
 				 __GFP_NOWARN)
 
@@ -598,7 +599,7 @@ static struct kmemleak_object *create_object(unsigned long ptr, size_t size,
 	object->checksum = 0;
 
 	/* task information */
-	if (in_irq()) {
+	if (in_hardirq()) {
 		object->pid = 0;
 		strncpy(object->comm, "hardirq", sizeof(object->comm));
 	} else if (in_serving_softirq()) {
diff --git a/mm/ksm.c b/mm/ksm.c
index 3fa9bc8a67cf..025338128cd9 100644
--- a/mm/ksm.c
+++ b/mm/ksm.c
@@ -259,7 +259,7 @@ static unsigned long ksm_stable_node_chains;
 static unsigned long ksm_stable_node_dups;
 
 /* Delay in pruning stale stable_node_dups in the stable_node_chains */
-static int ksm_stable_node_chains_prune_millisecs = 2000;
+static unsigned int ksm_stable_node_chains_prune_millisecs = 2000;
 
 /* Maximum number of page slots sharing a stable node */
 static int ksm_max_page_sharing = 256;
@@ -3105,11 +3105,11 @@ stable_node_chains_prune_millisecs_store(struct kobject *kobj,
 					 struct kobj_attribute *attr,
 					 const char *buf, size_t count)
 {
-	unsigned long msecs;
+	unsigned int msecs;
 	int err;
 
-	err = kstrtoul(buf, 10, &msecs);
-	if (err || msecs > UINT_MAX)
+	err = kstrtouint(buf, 10, &msecs);
+	if (err)
 		return -EINVAL;
 
 	ksm_stable_node_chains_prune_millisecs = msecs;
diff --git a/mm/maccess.c b/mm/maccess.c
index 3bd70405f2d8..d3f1a1f0b1c1 100644
--- a/mm/maccess.c
+++ b/mm/maccess.c
@@ -24,13 +24,21 @@ bool __weak copy_from_kernel_nofault_allowed(const void *unsafe_src,
 
 long copy_from_kernel_nofault(void *dst, const void *src, size_t size)
 {
+	unsigned long align = 0;
+
+	if (!IS_ENABLED(CONFIG_HAVE_EFFICIENT_UNALIGNED_ACCESS))
+		align = (unsigned long)dst | (unsigned long)src;
+
 	if (!copy_from_kernel_nofault_allowed(src, size))
 		return -ERANGE;
 
 	pagefault_disable();
-	copy_from_kernel_nofault_loop(dst, src, size, u64, Efault);
-	copy_from_kernel_nofault_loop(dst, src, size, u32, Efault);
-	copy_from_kernel_nofault_loop(dst, src, size, u16, Efault);
+	if (!(align & 7))
+		copy_from_kernel_nofault_loop(dst, src, size, u64, Efault);
+	if (!(align & 3))
+		copy_from_kernel_nofault_loop(dst, src, size, u32, Efault);
+	if (!(align & 1))
+		copy_from_kernel_nofault_loop(dst, src, size, u16, Efault);
 	copy_from_kernel_nofault_loop(dst, src, size, u8, Efault);
 	pagefault_enable();
 	return 0;
@@ -50,10 +58,18 @@ EXPORT_SYMBOL_GPL(copy_from_kernel_nofault);
 
 long copy_to_kernel_nofault(void *dst, const void *src, size_t size)
 {
+	unsigned long align = 0;
+
+	if (!IS_ENABLED(CONFIG_HAVE_EFFICIENT_UNALIGNED_ACCESS))
+		align = (unsigned long)dst | (unsigned long)src;
+
 	pagefault_disable();
-	copy_to_kernel_nofault_loop(dst, src, size, u64, Efault);
-	copy_to_kernel_nofault_loop(dst, src, size, u32, Efault);
-	copy_to_kernel_nofault_loop(dst, src, size, u16, Efault);
+	if (!(align & 7))
+		copy_to_kernel_nofault_loop(dst, src, size, u64, Efault);
+	if (!(align & 3))
+		copy_to_kernel_nofault_loop(dst, src, size, u32, Efault);
+	if (!(align & 1))
+		copy_to_kernel_nofault_loop(dst, src, size, u16, Efault);
 	copy_to_kernel_nofault_loop(dst, src, size, u8, Efault);
 	pagefault_enable();
 	return 0;
diff --git a/mm/madvise.c b/mm/madvise.c
index 56324a3dbc4e..0734db8d53a7 100644
--- a/mm/madvise.c
+++ b/mm/madvise.c
@@ -1048,6 +1048,7 @@ process_madvise_behavior_valid(int behavior)
 	switch (behavior) {
 	case MADV_COLD:
 	case MADV_PAGEOUT:
+	case MADV_WILLNEED:
 		return true;
 	default:
 		return false;
diff --git a/mm/memblock.c b/mm/memblock.c
index de7b553baa50..0ab5a749bfa6 100644
--- a/mm/memblock.c
+++ b/mm/memblock.c
@@ -315,7 +315,7 @@ static phys_addr_t __init_memblock memblock_find_in_range_node(phys_addr_t size,
  * Return:
  * Found address on success, 0 on failure.
  */
-phys_addr_t __init_memblock memblock_find_in_range(phys_addr_t start,
+static phys_addr_t __init_memblock memblock_find_in_range(phys_addr_t start,
 					phys_addr_t end, phys_addr_t size,
 					phys_addr_t align)
 {
@@ -665,6 +665,11 @@ repeat:
 int __init_memblock memblock_add_node(phys_addr_t base, phys_addr_t size,
 				       int nid)
 {
+	phys_addr_t end = base + size - 1;
+
+	memblock_dbg("%s: [%pa-%pa] nid=%d %pS\n", __func__,
+		     &base, &end, nid, (void *)_RET_IP_);
+
 	return memblock_add_range(&memblock.memory, base, size, nid, 0);
 }
 
@@ -1491,18 +1496,12 @@ void * __init memblock_alloc_exact_nid_raw(
 			phys_addr_t min_addr, phys_addr_t max_addr,
 			int nid)
 {
-	void *ptr;
-
 	memblock_dbg("%s: %llu bytes align=0x%llx nid=%d from=%pa max_addr=%pa %pS\n",
 		     __func__, (u64)size, (u64)align, nid, &min_addr,
 		     &max_addr, (void *)_RET_IP_);
 
-	ptr = memblock_alloc_internal(size, align,
-					   min_addr, max_addr, nid, true);
-	if (ptr && size > 0)
-		page_init_poison(ptr, size);
-
-	return ptr;
+	return memblock_alloc_internal(size, align, min_addr, max_addr, nid,
+				       true);
 }
 
 /**
@@ -1529,18 +1528,12 @@ void * __init memblock_alloc_try_nid_raw(
 			phys_addr_t min_addr, phys_addr_t max_addr,
 			int nid)
 {
-	void *ptr;
-
 	memblock_dbg("%s: %llu bytes align=0x%llx nid=%d from=%pa max_addr=%pa %pS\n",
 		     __func__, (u64)size, (u64)align, nid, &min_addr,
 		     &max_addr, (void *)_RET_IP_);
 
-	ptr = memblock_alloc_internal(size, align,
-					   min_addr, max_addr, nid, false);
-	if (ptr && size > 0)
-		page_init_poison(ptr, size);
-
-	return ptr;
+	return memblock_alloc_internal(size, align, min_addr, max_addr, nid,
+				       false);
 }
 
 /**
@@ -1680,6 +1673,11 @@ void __init memblock_cap_memory_range(phys_addr_t base, phys_addr_t size)
 	if (!size)
 		return;
 
+	if (memblock.memory.cnt <= 1) {
+		pr_warn("%s: No memory registered yet\n", __func__);
+		return;
+	}
+
 	ret = memblock_isolate_range(&memblock.memory, base, size,
 						&start_rgn, &end_rgn);
 	if (ret)
diff --git a/mm/memcontrol.c b/mm/memcontrol.c
index 389b5766e74f..b762215d73eb 100644
--- a/mm/memcontrol.c
+++ b/mm/memcontrol.c
@@ -103,6 +103,14 @@ static bool do_memsw_account(void)
 	return !cgroup_subsys_on_dfl(memory_cgrp_subsys) && !cgroup_memory_noswap;
 }
 
+/* memcg and lruvec stats flushing */
+static void flush_memcg_stats_dwork(struct work_struct *w);
+static DECLARE_DEFERRABLE_WORK(stats_flush_dwork, flush_memcg_stats_dwork);
+static void flush_memcg_stats_work(struct work_struct *w);
+static DECLARE_WORK(stats_flush_work, flush_memcg_stats_work);
+static DEFINE_PER_CPU(unsigned int, stats_flush_threshold);
+static DEFINE_SPINLOCK(stats_flush_lock);
+
 #define THRESHOLDS_EVENTS_TARGET 128
 #define SOFTLIMIT_EVENTS_TARGET 1024
 
@@ -248,9 +256,9 @@ struct vmpressure *memcg_to_vmpressure(struct mem_cgroup *memcg)
 	return &memcg->vmpressure;
 }
 
-struct cgroup_subsys_state *vmpressure_to_css(struct vmpressure *vmpr)
+struct mem_cgroup *vmpressure_to_memcg(struct vmpressure *vmpr)
 {
-	return &container_of(vmpr, struct mem_cgroup, vmpressure)->css;
+	return container_of(vmpr, struct mem_cgroup, vmpressure);
 }
 
 #ifdef CONFIG_MEMCG_KMEM
@@ -646,17 +654,6 @@ void __mod_memcg_state(struct mem_cgroup *memcg, int idx, int val)
 }
 
 /* idx can be of type enum memcg_stat_item or node_stat_item. */
-static unsigned long memcg_page_state(struct mem_cgroup *memcg, int idx)
-{
-	long x = READ_ONCE(memcg->vmstats.state[idx]);
-#ifdef CONFIG_SMP
-	if (x < 0)
-		x = 0;
-#endif
-	return x;
-}
-
-/* idx can be of type enum memcg_stat_item or node_stat_item. */
 static unsigned long memcg_page_state_local(struct mem_cgroup *memcg, int idx)
 {
 	long x = 0;
@@ -671,23 +668,11 @@ static unsigned long memcg_page_state_local(struct mem_cgroup *memcg, int idx)
 	return x;
 }
 
-static struct mem_cgroup_per_node *
-parent_nodeinfo(struct mem_cgroup_per_node *pn, int nid)
-{
-	struct mem_cgroup *parent;
-
-	parent = parent_mem_cgroup(pn->memcg);
-	if (!parent)
-		return NULL;
-	return parent->nodeinfo[nid];
-}
-
 void __mod_memcg_lruvec_state(struct lruvec *lruvec, enum node_stat_item idx,
 			      int val)
 {
 	struct mem_cgroup_per_node *pn;
 	struct mem_cgroup *memcg;
-	long x, threshold = MEMCG_CHARGE_BATCH;
 
 	pn = container_of(lruvec, struct mem_cgroup_per_node, lruvec);
 	memcg = pn->memcg;
@@ -696,21 +681,9 @@ void __mod_memcg_lruvec_state(struct lruvec *lruvec, enum node_stat_item idx,
 	__mod_memcg_state(memcg, idx, val);
 
 	/* Update lruvec */
-	__this_cpu_add(pn->lruvec_stat_local->count[idx], val);
-
-	if (vmstat_item_in_bytes(idx))
-		threshold <<= PAGE_SHIFT;
-
-	x = val + __this_cpu_read(pn->lruvec_stat_cpu->count[idx]);
-	if (unlikely(abs(x) > threshold)) {
-		pg_data_t *pgdat = lruvec_pgdat(lruvec);
-		struct mem_cgroup_per_node *pi;
-
-		for (pi = pn; pi; pi = parent_nodeinfo(pi, pgdat->node_id))
-			atomic_long_add(x, &pi->lruvec_stat[idx]);
-		x = 0;
-	}
-	__this_cpu_write(pn->lruvec_stat_cpu->count[idx], x);
+	__this_cpu_add(pn->lruvec_stats_percpu->state[idx], val);
+	if (!(__this_cpu_inc_return(stats_flush_threshold) % MEMCG_CHARGE_BATCH))
+		queue_work(system_unbound_wq, &stats_flush_work);
 }
 
 /**
@@ -905,7 +878,7 @@ EXPORT_SYMBOL(mem_cgroup_from_task);
 
 static __always_inline struct mem_cgroup *active_memcg(void)
 {
-	if (in_interrupt())
+	if (!in_task())
 		return this_cpu_read(int_active_memcg);
 	else
 		return current->active_memcg;
@@ -2205,8 +2178,9 @@ static void drain_local_stock(struct work_struct *dummy)
 	unsigned long flags;
 
 	/*
-	 * The only protection from memory hotplug vs. drain_stock races is
-	 * that we always operate on local CPU stock here with IRQ disabled
+	 * The only protection from cpu hotplug (memcg_hotplug_cpu_dead) vs.
+	 * drain_stock races is that we always operate on local CPU stock
+	 * here with IRQ disabled
 	 */
 	local_irq_save(flags);
 
@@ -2273,7 +2247,7 @@ static void drain_all_stock(struct mem_cgroup *root_memcg)
 		if (memcg && stock->nr_pages &&
 		    mem_cgroup_is_descendant(memcg, root_memcg))
 			flush = true;
-		if (obj_stock_flush_required(stock, root_memcg))
+		else if (obj_stock_flush_required(stock, root_memcg))
 			flush = true;
 		rcu_read_unlock();
 
@@ -2289,40 +2263,13 @@ static void drain_all_stock(struct mem_cgroup *root_memcg)
 	mutex_unlock(&percpu_charge_mutex);
 }
 
-static void memcg_flush_lruvec_page_state(struct mem_cgroup *memcg, int cpu)
-{
-	int nid;
-
-	for_each_node(nid) {
-		struct mem_cgroup_per_node *pn = memcg->nodeinfo[nid];
-		unsigned long stat[NR_VM_NODE_STAT_ITEMS];
-		struct batched_lruvec_stat *lstatc;
-		int i;
-
-		lstatc = per_cpu_ptr(pn->lruvec_stat_cpu, cpu);
-		for (i = 0; i < NR_VM_NODE_STAT_ITEMS; i++) {
-			stat[i] = lstatc->count[i];
-			lstatc->count[i] = 0;
-		}
-
-		do {
-			for (i = 0; i < NR_VM_NODE_STAT_ITEMS; i++)
-				atomic_long_add(stat[i], &pn->lruvec_stat[i]);
-		} while ((pn = parent_nodeinfo(pn, nid)));
-	}
-}
-
 static int memcg_hotplug_cpu_dead(unsigned int cpu)
 {
 	struct memcg_stock_pcp *stock;
-	struct mem_cgroup *memcg;
 
 	stock = &per_cpu(memcg_stock, cpu);
 	drain_stock(stock);
 
-	for_each_mem_cgroup(memcg)
-		memcg_flush_lruvec_page_state(memcg, cpu);
-
 	return 0;
 }
 
@@ -4116,7 +4063,7 @@ static int mem_cgroup_swappiness_write(struct cgroup_subsys_state *css,
 {
 	struct mem_cgroup *memcg = mem_cgroup_from_css(css);
 
-	if (val > 100)
+	if (val > 200)
 		return -EINVAL;
 
 	if (!mem_cgroup_is_root(memcg))
@@ -4668,7 +4615,7 @@ void mem_cgroup_flush_foreign(struct bdi_writeback *wb)
 		    atomic_read(&frn->done.cnt) == 1) {
 			frn->at = 0;
 			trace_flush_foreign(wb, frn->bdi_id, frn->memcg_id);
-			cgroup_writeback_by_id(frn->bdi_id, frn->memcg_id, 0,
+			cgroup_writeback_by_id(frn->bdi_id, frn->memcg_id,
 					       WB_REASON_FOREIGN_FLUSH,
 					       &frn->done);
 		}
@@ -4892,9 +4839,9 @@ static ssize_t memcg_write_event_control(struct kernfs_open_file *of,
 
 	vfs_poll(efile.file, &event->pt);
 
-	spin_lock(&memcg->event_list_lock);
+	spin_lock_irq(&memcg->event_list_lock);
 	list_add(&event->list, &memcg->event_list);
-	spin_unlock(&memcg->event_list_lock);
+	spin_unlock_irq(&memcg->event_list_lock);
 
 	fdput(cfile);
 	fdput(efile);
@@ -5129,17 +5076,9 @@ static int alloc_mem_cgroup_per_node_info(struct mem_cgroup *memcg, int node)
 	if (!pn)
 		return 1;
 
-	pn->lruvec_stat_local = alloc_percpu_gfp(struct lruvec_stat,
-						 GFP_KERNEL_ACCOUNT);
-	if (!pn->lruvec_stat_local) {
-		kfree(pn);
-		return 1;
-	}
-
-	pn->lruvec_stat_cpu = alloc_percpu_gfp(struct batched_lruvec_stat,
-					       GFP_KERNEL_ACCOUNT);
-	if (!pn->lruvec_stat_cpu) {
-		free_percpu(pn->lruvec_stat_local);
+	pn->lruvec_stats_percpu = alloc_percpu_gfp(struct lruvec_stats_percpu,
+						   GFP_KERNEL_ACCOUNT);
+	if (!pn->lruvec_stats_percpu) {
 		kfree(pn);
 		return 1;
 	}
@@ -5160,8 +5099,7 @@ static void free_mem_cgroup_per_node_info(struct mem_cgroup *memcg, int node)
 	if (!pn)
 		return;
 
-	free_percpu(pn->lruvec_stat_cpu);
-	free_percpu(pn->lruvec_stat_local);
+	free_percpu(pn->lruvec_stats_percpu);
 	kfree(pn);
 }
 
@@ -5177,15 +5115,7 @@ static void __mem_cgroup_free(struct mem_cgroup *memcg)
 
 static void mem_cgroup_free(struct mem_cgroup *memcg)
 {
-	int cpu;
-
 	memcg_wb_domain_exit(memcg);
-	/*
-	 * Flush percpu lruvec stats to guarantee the value
-	 * correctness on parent's and all ancestor levels.
-	 */
-	for_each_online_cpu(cpu)
-		memcg_flush_lruvec_page_state(memcg, cpu);
 	__mem_cgroup_free(memcg);
 }
 
@@ -5321,6 +5251,10 @@ static int mem_cgroup_css_online(struct cgroup_subsys_state *css)
 	/* Online state pins memcg ID, memcg ID pins CSS */
 	refcount_set(&memcg->id.ref, 1);
 	css_get(css);
+
+	if (unlikely(mem_cgroup_is_root(memcg)))
+		queue_delayed_work(system_unbound_wq, &stats_flush_dwork,
+				   2UL*HZ);
 	return 0;
 }
 
@@ -5334,12 +5268,12 @@ static void mem_cgroup_css_offline(struct cgroup_subsys_state *css)
 	 * Notify userspace about cgroup removing only after rmdir of cgroup
 	 * directory to avoid race between userspace and kernelspace.
 	 */
-	spin_lock(&memcg->event_list_lock);
+	spin_lock_irq(&memcg->event_list_lock);
 	list_for_each_entry_safe(event, tmp, &memcg->event_list, list) {
 		list_del_init(&event->list);
 		schedule_work(&event->remove);
 	}
-	spin_unlock(&memcg->event_list_lock);
+	spin_unlock_irq(&memcg->event_list_lock);
 
 	page_counter_set_min(&memcg->memory, 0);
 	page_counter_set_low(&memcg->memory, 0);
@@ -5412,13 +5346,33 @@ static void mem_cgroup_css_reset(struct cgroup_subsys_state *css)
 	memcg_wb_domain_size_changed(memcg);
 }
 
+void mem_cgroup_flush_stats(void)
+{
+	if (!spin_trylock(&stats_flush_lock))
+		return;
+
+	cgroup_rstat_flush_irqsafe(root_mem_cgroup->css.cgroup);
+	spin_unlock(&stats_flush_lock);
+}
+
+static void flush_memcg_stats_dwork(struct work_struct *w)
+{
+	mem_cgroup_flush_stats();
+	queue_delayed_work(system_unbound_wq, &stats_flush_dwork, 2UL*HZ);
+}
+
+static void flush_memcg_stats_work(struct work_struct *w)
+{
+	mem_cgroup_flush_stats();
+}
+
 static void mem_cgroup_css_rstat_flush(struct cgroup_subsys_state *css, int cpu)
 {
 	struct mem_cgroup *memcg = mem_cgroup_from_css(css);
 	struct mem_cgroup *parent = parent_mem_cgroup(memcg);
 	struct memcg_vmstats_percpu *statc;
 	long delta, v;
-	int i;
+	int i, nid;
 
 	statc = per_cpu_ptr(memcg->vmstats_percpu, cpu);
 
@@ -5466,6 +5420,36 @@ static void mem_cgroup_css_rstat_flush(struct cgroup_subsys_state *css, int cpu)
 		if (parent)
 			parent->vmstats.events_pending[i] += delta;
 	}
+
+	for_each_node_state(nid, N_MEMORY) {
+		struct mem_cgroup_per_node *pn = memcg->nodeinfo[nid];
+		struct mem_cgroup_per_node *ppn = NULL;
+		struct lruvec_stats_percpu *lstatc;
+
+		if (parent)
+			ppn = parent->nodeinfo[nid];
+
+		lstatc = per_cpu_ptr(pn->lruvec_stats_percpu, cpu);
+
+		for (i = 0; i < NR_VM_NODE_STAT_ITEMS; i++) {
+			delta = pn->lruvec_stats.state_pending[i];
+			if (delta)
+				pn->lruvec_stats.state_pending[i] = 0;
+
+			v = READ_ONCE(lstatc->state[i]);
+			if (v != lstatc->state_prev[i]) {
+				delta += v - lstatc->state_prev[i];
+				lstatc->state_prev[i] = v;
+			}
+
+			if (!delta)
+				continue;
+
+			pn->lruvec_stats.state[i] += delta;
+			if (ppn)
+				ppn->lruvec_stats.state_pending[i] += delta;
+		}
+	}
 }
 
 #ifdef CONFIG_MMU
@@ -6399,6 +6383,8 @@ static int memory_numa_stat_show(struct seq_file *m, void *v)
 	int i;
 	struct mem_cgroup *memcg = mem_cgroup_from_seq(m);
 
+	cgroup_rstat_flush(memcg->css.cgroup);
+
 	for (i = 0; i < ARRAY_SIZE(memory_stats); i++) {
 		int nid;
 
@@ -6704,8 +6690,7 @@ void mem_cgroup_calculate_protection(struct mem_cgroup *root,
 			atomic_long_read(&parent->memory.children_low_usage)));
 }
 
-static int __mem_cgroup_charge(struct page *page, struct mem_cgroup *memcg,
-			       gfp_t gfp)
+static int charge_memcg(struct page *page, struct mem_cgroup *memcg, gfp_t gfp)
 {
 	unsigned int nr_pages = thp_nr_pages(page);
 	int ret;
@@ -6726,7 +6711,7 @@ out:
 }
 
 /**
- * mem_cgroup_charge - charge a newly allocated page to a cgroup
+ * __mem_cgroup_charge - charge a newly allocated page to a cgroup
  * @page: page to charge
  * @mm: mm context of the victim
  * @gfp_mask: reclaim mode
@@ -6739,16 +6724,14 @@ out:
  *
  * Returns 0 on success. Otherwise, an error code is returned.
  */
-int mem_cgroup_charge(struct page *page, struct mm_struct *mm, gfp_t gfp_mask)
+int __mem_cgroup_charge(struct page *page, struct mm_struct *mm,
+			gfp_t gfp_mask)
 {
 	struct mem_cgroup *memcg;
 	int ret;
 
-	if (mem_cgroup_disabled())
-		return 0;
-
 	memcg = get_mem_cgroup_from_mm(mm);
-	ret = __mem_cgroup_charge(page, memcg, gfp_mask);
+	ret = charge_memcg(page, memcg, gfp_mask);
 	css_put(&memcg->css);
 
 	return ret;
@@ -6783,7 +6766,7 @@ int mem_cgroup_swapin_charge_page(struct page *page, struct mm_struct *mm,
 		memcg = get_mem_cgroup_from_mm(mm);
 	rcu_read_unlock();
 
-	ret = __mem_cgroup_charge(page, memcg, gfp);
+	ret = charge_memcg(page, memcg, gfp);
 
 	css_put(&memcg->css);
 	return ret;
@@ -6919,18 +6902,15 @@ static void uncharge_page(struct page *page, struct uncharge_gather *ug)
 }
 
 /**
- * mem_cgroup_uncharge - uncharge a page
+ * __mem_cgroup_uncharge - uncharge a page
  * @page: page to uncharge
  *
- * Uncharge a page previously charged with mem_cgroup_charge().
+ * Uncharge a page previously charged with __mem_cgroup_charge().
  */
-void mem_cgroup_uncharge(struct page *page)
+void __mem_cgroup_uncharge(struct page *page)
 {
 	struct uncharge_gather ug;
 
-	if (mem_cgroup_disabled())
-		return;
-
 	/* Don't touch page->lru of any random page, pre-check: */
 	if (!page_memcg(page))
 		return;
@@ -6941,20 +6921,17 @@ void mem_cgroup_uncharge(struct page *page)
 }
 
 /**
- * mem_cgroup_uncharge_list - uncharge a list of page
+ * __mem_cgroup_uncharge_list - uncharge a list of page
  * @page_list: list of pages to uncharge
  *
  * Uncharge a list of pages previously charged with
- * mem_cgroup_charge().
+ * __mem_cgroup_charge().
  */
-void mem_cgroup_uncharge_list(struct list_head *page_list)
+void __mem_cgroup_uncharge_list(struct list_head *page_list)
 {
 	struct uncharge_gather ug;
 	struct page *page;
 
-	if (mem_cgroup_disabled())
-		return;
-
 	uncharge_gather_clear(&ug);
 	list_for_each_entry(page, page_list, lru)
 		uncharge_page(page, &ug);
@@ -7244,7 +7221,7 @@ void mem_cgroup_swapout(struct page *page, swp_entry_t entry)
 }
 
 /**
- * mem_cgroup_try_charge_swap - try charging swap space for a page
+ * __mem_cgroup_try_charge_swap - try charging swap space for a page
  * @page: page being added to swap
  * @entry: swap entry to charge
  *
@@ -7252,16 +7229,13 @@ void mem_cgroup_swapout(struct page *page, swp_entry_t entry)
  *
  * Returns 0 on success, -ENOMEM on failure.
  */
-int mem_cgroup_try_charge_swap(struct page *page, swp_entry_t entry)
+int __mem_cgroup_try_charge_swap(struct page *page, swp_entry_t entry)
 {
 	unsigned int nr_pages = thp_nr_pages(page);
 	struct page_counter *counter;
 	struct mem_cgroup *memcg;
 	unsigned short oldid;
 
-	if (mem_cgroup_disabled())
-		return 0;
-
 	if (!cgroup_subsys_on_dfl(memory_cgrp_subsys))
 		return 0;
 
@@ -7297,11 +7271,11 @@ int mem_cgroup_try_charge_swap(struct page *page, swp_entry_t entry)
 }
 
 /**
- * mem_cgroup_uncharge_swap - uncharge swap space
+ * __mem_cgroup_uncharge_swap - uncharge swap space
  * @entry: swap entry to uncharge
  * @nr_pages: the amount of swap space to uncharge
  */
-void mem_cgroup_uncharge_swap(swp_entry_t entry, unsigned int nr_pages)
+void __mem_cgroup_uncharge_swap(swp_entry_t entry, unsigned int nr_pages)
 {
 	struct mem_cgroup *memcg;
 	unsigned short id;
diff --git a/mm/memory-failure.c b/mm/memory-failure.c
index e1f87cf13235..58ab5161a8ce 100644
--- a/mm/memory-failure.c
+++ b/mm/memory-failure.c
@@ -68,7 +68,7 @@ atomic_long_t num_poisoned_pages __read_mostly = ATOMIC_LONG_INIT(0);
 
 static bool __page_handle_poison(struct page *page)
 {
-	bool ret;
+	int ret;
 
 	zone_pcp_disable(page_zone(page));
 	ret = dissolve_free_huge_page(page);
@@ -76,7 +76,7 @@ static bool __page_handle_poison(struct page *page)
 		ret = take_page_off_buddy(page);
 	zone_pcp_enable(page_zone(page));
 
-	return ret;
+	return ret > 0;
 }
 
 static bool page_handle_poison(struct page *page, bool hugepage_or_freepage, bool release)
@@ -282,9 +282,9 @@ static int kill_proc(struct to_kill *tk, unsigned long pfn, int flags)
 
 /*
  * Unknown page type encountered. Try to check whether it can turn PageLRU by
- * lru_add_drain_all, or a free page by reclaiming slabs when possible.
+ * lru_add_drain_all.
  */
-void shake_page(struct page *p, int access)
+void shake_page(struct page *p)
 {
 	if (PageHuge(p))
 		return;
@@ -296,11 +296,9 @@ void shake_page(struct page *p, int access)
 	}
 
 	/*
-	 * Only call shrink_node_slabs here (which would also shrink
-	 * other caches) if access is not potentially fatal.
+	 * TODO: Could shrink slab caches here if a lightweight range-based
+	 * shrinker will be available.
 	 */
-	if (access)
-		drop_slab_node(page_to_nid(p));
 }
 EXPORT_SYMBOL_GPL(shake_page);
 
@@ -391,8 +389,8 @@ static void add_to_kill(struct task_struct *tsk, struct page *p,
 /*
  * Kill the processes that have been collected earlier.
  *
- * Only do anything when DOIT is set, otherwise just free the list
- * (this is used for clean pages which do not need killing)
+ * Only do anything when FORCEKILL is set, otherwise just free the
+ * list (this is used for clean pages which do not need killing)
  * Also when FAIL is set do a force kill because something went
  * wrong earlier.
  */
@@ -632,7 +630,7 @@ static int hwpoison_pte_range(pmd_t *pmdp, unsigned long addr,
 {
 	struct hwp_walk *hwp = (struct hwp_walk *)walk->private;
 	int ret = 0;
-	pte_t *ptep;
+	pte_t *ptep, *mapped_pte;
 	spinlock_t *ptl;
 
 	ptl = pmd_trans_huge_lock(pmdp, walk->vma);
@@ -645,14 +643,15 @@ static int hwpoison_pte_range(pmd_t *pmdp, unsigned long addr,
 	if (pmd_trans_unstable(pmdp))
 		goto out;
 
-	ptep = pte_offset_map_lock(walk->vma->vm_mm, pmdp, addr, &ptl);
+	mapped_pte = ptep = pte_offset_map_lock(walk->vma->vm_mm, pmdp,
+						addr, &ptl);
 	for (; addr != end; ptep++, addr += PAGE_SIZE) {
 		ret = check_hwpoisoned_entry(*ptep, addr, PAGE_SHIFT,
 					     hwp->pfn, &hwp->tk);
 		if (ret == 1)
 			break;
 	}
-	pte_unmap_unlock(ptep - 1, ptl);
+	pte_unmap_unlock(mapped_pte, ptl);
 out:
 	cond_resched();
 	return ret;
@@ -1204,7 +1203,7 @@ try_again:
 			 * page, retry.
 			 */
 			if (pass++ < 3) {
-				shake_page(p, 1);
+				shake_page(p);
 				goto try_again;
 			}
 			ret = -EIO;
@@ -1221,7 +1220,7 @@ try_again:
 		 */
 		if (pass++ < 3) {
 			put_page(p);
-			shake_page(p, 1);
+			shake_page(p);
 			count_increased = false;
 			goto try_again;
 		}
@@ -1229,6 +1228,9 @@ try_again:
 		ret = -EIO;
 	}
 out:
+	if (ret == -EIO)
+		dump_page(p, "hwpoison: unhandlable page");
+
 	return ret;
 }
 
@@ -1270,14 +1272,13 @@ static int get_hwpoison_page(struct page *p, unsigned long flags)
  * the pages and send SIGBUS to the processes if the data was dirty.
  */
 static bool hwpoison_user_mappings(struct page *p, unsigned long pfn,
-				  int flags, struct page **hpagep)
+				  int flags, struct page *hpage)
 {
 	enum ttu_flags ttu = TTU_IGNORE_MLOCK | TTU_SYNC;
 	struct address_space *mapping;
 	LIST_HEAD(tokill);
 	bool unmap_success;
 	int kill = 1, forcekill;
-	struct page *hpage = *hpagep;
 	bool mlocked = PageMlocked(hpage);
 
 	/*
@@ -1369,7 +1370,7 @@ static bool hwpoison_user_mappings(struct page *p, unsigned long pfn,
 	 * shake_page() again to ensure that it's flushed.
 	 */
 	if (mlocked)
-		shake_page(hpage, 0);
+		shake_page(hpage);
 
 	/*
 	 * Now that the dirty bit has been propagated to the
@@ -1502,7 +1503,7 @@ static int memory_failure_hugetlb(unsigned long pfn, int flags)
 		goto out;
 	}
 
-	if (!hwpoison_user_mappings(p, pfn, flags, &head)) {
+	if (!hwpoison_user_mappings(p, pfn, flags, head)) {
 		action_result(pfn, MF_MSG_UNMAP_FAILED, MF_IGNORED);
 		res = -EBUSY;
 		goto out;
@@ -1518,7 +1519,6 @@ static int memory_failure_dev_pagemap(unsigned long pfn, int flags,
 		struct dev_pagemap *pgmap)
 {
 	struct page *page = pfn_to_page(pfn);
-	const bool unmap_success = true;
 	unsigned long size = 0;
 	struct to_kill *tk;
 	LIST_HEAD(tokill);
@@ -1590,7 +1590,7 @@ static int memory_failure_dev_pagemap(unsigned long pfn, int flags,
 		start = (page->index << PAGE_SHIFT) & ~(size - 1);
 		unmap_mapping_range(page->mapping, start, size, 0);
 	}
-	kill_procs(&tokill, flags & MF_MUST_KILL, !unmap_success, pfn, flags);
+	kill_procs(&tokill, flags & MF_MUST_KILL, false, pfn, flags);
 	rc = 0;
 unlock:
 	dax_unlock_page(page, cookie);
@@ -1724,7 +1724,7 @@ try_again:
 	 * The check (unnecessarily) ignores LRU pages being isolated and
 	 * walked by the page reclaim code, however that's not a big loss.
 	 */
-	shake_page(p, 0);
+	shake_page(p);
 
 	lock_page(p);
 
@@ -1783,7 +1783,7 @@ try_again:
 	 * Now take care of user space mappings.
 	 * Abort on fail: __delete_from_page_cache() assumes unmapped page.
 	 */
-	if (!hwpoison_user_mappings(p, pfn, flags, &p)) {
+	if (!hwpoison_user_mappings(p, pfn, flags, p)) {
 		action_result(pfn, MF_MSG_UNMAP_FAILED, MF_IGNORED);
 		res = -EBUSY;
 		goto unlock_page;
@@ -2099,7 +2099,7 @@ static int __soft_offline_page(struct page *page)
 
 	if (isolate_page(hpage, &pagelist)) {
 		ret = migrate_pages(&pagelist, alloc_migration_target, NULL,
-			(unsigned long)&mtc, MIGRATE_SYNC, MR_MEMORY_FAILURE);
+			(unsigned long)&mtc, MIGRATE_SYNC, MR_MEMORY_FAILURE, NULL);
 		if (!ret) {
 			bool release = !huge;
 
@@ -2109,14 +2109,14 @@ static int __soft_offline_page(struct page *page)
 			if (!list_empty(&pagelist))
 				putback_movable_pages(&pagelist);
 
-			pr_info("soft offline: %#lx: %s migration failed %d, type %lx (%pGp)\n",
-				pfn, msg_page[huge], ret, page->flags, &page->flags);
+			pr_info("soft offline: %#lx: %s migration failed %d, type %pGp\n",
+				pfn, msg_page[huge], ret, &page->flags);
 			if (ret > 0)
 				ret = -EBUSY;
 		}
 	} else {
-		pr_info("soft offline: %#lx: %s isolation failed, page count %d, type %lx (%pGp)\n",
-			pfn, msg_page[huge], page_count(page), page->flags, &page->flags);
+		pr_info("soft offline: %#lx: %s isolation failed, page count %d, type %pGp\n",
+			pfn, msg_page[huge], page_count(page), &page->flags);
 		ret = -EBUSY;
 	}
 	return ret;
@@ -2208,9 +2208,6 @@ retry:
 			try_again = false;
 			goto retry;
 		}
-	} else if (ret == -EIO) {
-		pr_info("%s: %#lx: unknown page type: %lx (%pGp)\n",
-			 __func__, pfn, page->flags, &page->flags);
 	}
 
 	return ret;
diff --git a/mm/memory_hotplug.c b/mm/memory_hotplug.c
index 86c3af79e874..9fd0be32a281 100644
--- a/mm/memory_hotplug.c
+++ b/mm/memory_hotplug.c
@@ -52,6 +52,73 @@ module_param(memmap_on_memory, bool, 0444);
 MODULE_PARM_DESC(memmap_on_memory, "Enable memmap on memory for memory hotplug");
 #endif
 
+enum {
+	ONLINE_POLICY_CONTIG_ZONES = 0,
+	ONLINE_POLICY_AUTO_MOVABLE,
+};
+
+const char *online_policy_to_str[] = {
+	[ONLINE_POLICY_CONTIG_ZONES] = "contig-zones",
+	[ONLINE_POLICY_AUTO_MOVABLE] = "auto-movable",
+};
+
+static int set_online_policy(const char *val, const struct kernel_param *kp)
+{
+	int ret = sysfs_match_string(online_policy_to_str, val);
+
+	if (ret < 0)
+		return ret;
+	*((int *)kp->arg) = ret;
+	return 0;
+}
+
+static int get_online_policy(char *buffer, const struct kernel_param *kp)
+{
+	return sprintf(buffer, "%s\n", online_policy_to_str[*((int *)kp->arg)]);
+}
+
+/*
+ * memory_hotplug.online_policy: configure online behavior when onlining without
+ * specifying a zone (MMOP_ONLINE)
+ *
+ * "contig-zones": keep zone contiguous
+ * "auto-movable": online memory to ZONE_MOVABLE if the configuration
+ *                 (auto_movable_ratio, auto_movable_numa_aware) allows for it
+ */
+static int online_policy __read_mostly = ONLINE_POLICY_CONTIG_ZONES;
+static const struct kernel_param_ops online_policy_ops = {
+	.set = set_online_policy,
+	.get = get_online_policy,
+};
+module_param_cb(online_policy, &online_policy_ops, &online_policy, 0644);
+MODULE_PARM_DESC(online_policy,
+		"Set the online policy (\"contig-zones\", \"auto-movable\") "
+		"Default: \"contig-zones\"");
+
+/*
+ * memory_hotplug.auto_movable_ratio: specify maximum MOVABLE:KERNEL ratio
+ *
+ * The ratio represent an upper limit and the kernel might decide to not
+ * online some memory to ZONE_MOVABLE -- e.g., because hotplugged KERNEL memory
+ * doesn't allow for more MOVABLE memory.
+ */
+static unsigned int auto_movable_ratio __read_mostly = 301;
+module_param(auto_movable_ratio, uint, 0644);
+MODULE_PARM_DESC(auto_movable_ratio,
+		"Set the maximum ratio of MOVABLE:KERNEL memory in the system "
+		"in percent for \"auto-movable\" online policy. Default: 301");
+
+/*
+ * memory_hotplug.auto_movable_numa_aware: consider numa node stats
+ */
+#ifdef CONFIG_NUMA
+static bool auto_movable_numa_aware __read_mostly = true;
+module_param(auto_movable_numa_aware, bool, 0644);
+MODULE_PARM_DESC(auto_movable_numa_aware,
+		"Consider numa node stats in addition to global stats in "
+		"\"auto-movable\" online policy. Default: true");
+#endif /* CONFIG_NUMA */
+
 /*
  * online_page_callback contains pointer to current page onlining function.
  * Initially it is generic_online_page(). If it is required it could be
@@ -410,15 +477,13 @@ void __ref remove_pfn_range_from_zone(struct zone *zone,
 				 sizeof(struct page) * cur_nr_pages);
 	}
 
-#ifdef CONFIG_ZONE_DEVICE
 	/*
 	 * Zone shrinking code cannot properly deal with ZONE_DEVICE. So
 	 * we will not try to shrink the zones - which is okay as
 	 * set_zone_contiguous() cannot deal with ZONE_DEVICE either way.
 	 */
-	if (zone_idx(zone) == ZONE_DEVICE)
+	if (zone_is_zone_device(zone))
 		return;
-#endif
 
 	clear_zone_contiguous(zone);
 
@@ -663,6 +728,109 @@ void __ref move_pfn_range_to_zone(struct zone *zone, unsigned long start_pfn,
 	set_zone_contiguous(zone);
 }
 
+struct auto_movable_stats {
+	unsigned long kernel_early_pages;
+	unsigned long movable_pages;
+};
+
+static void auto_movable_stats_account_zone(struct auto_movable_stats *stats,
+					    struct zone *zone)
+{
+	if (zone_idx(zone) == ZONE_MOVABLE) {
+		stats->movable_pages += zone->present_pages;
+	} else {
+		stats->kernel_early_pages += zone->present_early_pages;
+#ifdef CONFIG_CMA
+		/*
+		 * CMA pages (never on hotplugged memory) behave like
+		 * ZONE_MOVABLE.
+		 */
+		stats->movable_pages += zone->cma_pages;
+		stats->kernel_early_pages -= zone->cma_pages;
+#endif /* CONFIG_CMA */
+	}
+}
+struct auto_movable_group_stats {
+	unsigned long movable_pages;
+	unsigned long req_kernel_early_pages;
+};
+
+static int auto_movable_stats_account_group(struct memory_group *group,
+					   void *arg)
+{
+	const int ratio = READ_ONCE(auto_movable_ratio);
+	struct auto_movable_group_stats *stats = arg;
+	long pages;
+
+	/*
+	 * We don't support modifying the config while the auto-movable online
+	 * policy is already enabled. Just avoid the division by zero below.
+	 */
+	if (!ratio)
+		return 0;
+
+	/*
+	 * Calculate how many early kernel pages this group requires to
+	 * satisfy the configured zone ratio.
+	 */
+	pages = group->present_movable_pages * 100 / ratio;
+	pages -= group->present_kernel_pages;
+
+	if (pages > 0)
+		stats->req_kernel_early_pages += pages;
+	stats->movable_pages += group->present_movable_pages;
+	return 0;
+}
+
+static bool auto_movable_can_online_movable(int nid, struct memory_group *group,
+					    unsigned long nr_pages)
+{
+	unsigned long kernel_early_pages, movable_pages;
+	struct auto_movable_group_stats group_stats = {};
+	struct auto_movable_stats stats = {};
+	pg_data_t *pgdat = NODE_DATA(nid);
+	struct zone *zone;
+	int i;
+
+	/* Walk all relevant zones and collect MOVABLE vs. KERNEL stats. */
+	if (nid == NUMA_NO_NODE) {
+		/* TODO: cache values */
+		for_each_populated_zone(zone)
+			auto_movable_stats_account_zone(&stats, zone);
+	} else {
+		for (i = 0; i < MAX_NR_ZONES; i++) {
+			zone = pgdat->node_zones + i;
+			if (populated_zone(zone))
+				auto_movable_stats_account_zone(&stats, zone);
+		}
+	}
+
+	kernel_early_pages = stats.kernel_early_pages;
+	movable_pages = stats.movable_pages;
+
+	/*
+	 * Kernel memory inside dynamic memory group allows for more MOVABLE
+	 * memory within the same group. Remove the effect of all but the
+	 * current group from the stats.
+	 */
+	walk_dynamic_memory_groups(nid, auto_movable_stats_account_group,
+				   group, &group_stats);
+	if (kernel_early_pages <= group_stats.req_kernel_early_pages)
+		return false;
+	kernel_early_pages -= group_stats.req_kernel_early_pages;
+	movable_pages -= group_stats.movable_pages;
+
+	if (group && group->is_dynamic)
+		kernel_early_pages += group->present_kernel_pages;
+
+	/*
+	 * Test if we could online the given number of pages to ZONE_MOVABLE
+	 * and still stay in the configured ratio.
+	 */
+	movable_pages += nr_pages;
+	return movable_pages <= (auto_movable_ratio * kernel_early_pages) / 100;
+}
+
 /*
  * Returns a default kernel memory zone for the given pfn range.
  * If no kernel zone covers this pfn range it will automatically go
@@ -684,6 +852,117 @@ static struct zone *default_kernel_zone_for_pfn(int nid, unsigned long start_pfn
 	return &pgdat->node_zones[ZONE_NORMAL];
 }
 
+/*
+ * Determine to which zone to online memory dynamically based on user
+ * configuration and system stats. We care about the following ratio:
+ *
+ *   MOVABLE : KERNEL
+ *
+ * Whereby MOVABLE is memory in ZONE_MOVABLE and KERNEL is memory in
+ * one of the kernel zones. CMA pages inside one of the kernel zones really
+ * behaves like ZONE_MOVABLE, so we treat them accordingly.
+ *
+ * We don't allow for hotplugged memory in a KERNEL zone to increase the
+ * amount of MOVABLE memory we can have, so we end up with:
+ *
+ *   MOVABLE : KERNEL_EARLY
+ *
+ * Whereby KERNEL_EARLY is memory in one of the kernel zones, available sinze
+ * boot. We base our calculation on KERNEL_EARLY internally, because:
+ *
+ * a) Hotplugged memory in one of the kernel zones can sometimes still get
+ *    hotunplugged, especially when hot(un)plugging individual memory blocks.
+ *    There is no coordination across memory devices, therefore "automatic"
+ *    hotunplugging, as implemented in hypervisors, could result in zone
+ *    imbalances.
+ * b) Early/boot memory in one of the kernel zones can usually not get
+ *    hotunplugged again (e.g., no firmware interface to unplug, fragmented
+ *    with unmovable allocations). While there are corner cases where it might
+ *    still work, it is barely relevant in practice.
+ *
+ * Exceptions are dynamic memory groups, which allow for more MOVABLE
+ * memory within the same memory group -- because in that case, there is
+ * coordination within the single memory device managed by a single driver.
+ *
+ * We rely on "present pages" instead of "managed pages", as the latter is
+ * highly unreliable and dynamic in virtualized environments, and does not
+ * consider boot time allocations. For example, memory ballooning adjusts the
+ * managed pages when inflating/deflating the balloon, and balloon compaction
+ * can even migrate inflated pages between zones.
+ *
+ * Using "present pages" is better but some things to keep in mind are:
+ *
+ * a) Some memblock allocations, such as for the crashkernel area, are
+ *    effectively unused by the kernel, yet they account to "present pages".
+ *    Fortunately, these allocations are comparatively small in relevant setups
+ *    (e.g., fraction of system memory).
+ * b) Some hotplugged memory blocks in virtualized environments, esecially
+ *    hotplugged by virtio-mem, look like they are completely present, however,
+ *    only parts of the memory block are actually currently usable.
+ *    "present pages" is an upper limit that can get reached at runtime. As
+ *    we base our calculations on KERNEL_EARLY, this is not an issue.
+ */
+static struct zone *auto_movable_zone_for_pfn(int nid,
+					      struct memory_group *group,
+					      unsigned long pfn,
+					      unsigned long nr_pages)
+{
+	unsigned long online_pages = 0, max_pages, end_pfn;
+	struct page *page;
+
+	if (!auto_movable_ratio)
+		goto kernel_zone;
+
+	if (group && !group->is_dynamic) {
+		max_pages = group->s.max_pages;
+		online_pages = group->present_movable_pages;
+
+		/* If anything is !MOVABLE online the rest !MOVABLE. */
+		if (group->present_kernel_pages)
+			goto kernel_zone;
+	} else if (!group || group->d.unit_pages == nr_pages) {
+		max_pages = nr_pages;
+	} else {
+		max_pages = group->d.unit_pages;
+		/*
+		 * Take a look at all online sections in the current unit.
+		 * We can safely assume that all pages within a section belong
+		 * to the same zone, because dynamic memory groups only deal
+		 * with hotplugged memory.
+		 */
+		pfn = ALIGN_DOWN(pfn, group->d.unit_pages);
+		end_pfn = pfn + group->d.unit_pages;
+		for (; pfn < end_pfn; pfn += PAGES_PER_SECTION) {
+			page = pfn_to_online_page(pfn);
+			if (!page)
+				continue;
+			/* If anything is !MOVABLE online the rest !MOVABLE. */
+			if (page_zonenum(page) != ZONE_MOVABLE)
+				goto kernel_zone;
+			online_pages += PAGES_PER_SECTION;
+		}
+	}
+
+	/*
+	 * Online MOVABLE if we could *currently* online all remaining parts
+	 * MOVABLE. We expect to (add+) online them immediately next, so if
+	 * nobody interferes, all will be MOVABLE if possible.
+	 */
+	nr_pages = max_pages - online_pages;
+	if (!auto_movable_can_online_movable(NUMA_NO_NODE, group, nr_pages))
+		goto kernel_zone;
+
+#ifdef CONFIG_NUMA
+	if (auto_movable_numa_aware &&
+	    !auto_movable_can_online_movable(nid, group, nr_pages))
+		goto kernel_zone;
+#endif /* CONFIG_NUMA */
+
+	return &NODE_DATA(nid)->node_zones[ZONE_MOVABLE];
+kernel_zone:
+	return default_kernel_zone_for_pfn(nid, pfn, nr_pages);
+}
+
 static inline struct zone *default_zone_for_pfn(int nid, unsigned long start_pfn,
 		unsigned long nr_pages)
 {
@@ -708,7 +987,8 @@ static inline struct zone *default_zone_for_pfn(int nid, unsigned long start_pfn
 	return movable_node_enabled ? movable_zone : kernel_zone;
 }
 
-struct zone *zone_for_pfn_range(int online_type, int nid, unsigned start_pfn,
+struct zone *zone_for_pfn_range(int online_type, int nid,
+		struct memory_group *group, unsigned long start_pfn,
 		unsigned long nr_pages)
 {
 	if (online_type == MMOP_ONLINE_KERNEL)
@@ -717,6 +997,9 @@ struct zone *zone_for_pfn_range(int online_type, int nid, unsigned start_pfn,
 	if (online_type == MMOP_ONLINE_MOVABLE)
 		return &NODE_DATA(nid)->node_zones[ZONE_MOVABLE];
 
+	if (online_policy == ONLINE_POLICY_AUTO_MOVABLE)
+		return auto_movable_zone_for_pfn(nid, group, start_pfn, nr_pages);
+
 	return default_zone_for_pfn(nid, start_pfn, nr_pages);
 }
 
@@ -724,10 +1007,25 @@ struct zone *zone_for_pfn_range(int online_type, int nid, unsigned start_pfn,
  * This function should only be called by memory_block_{online,offline},
  * and {online,offline}_pages.
  */
-void adjust_present_page_count(struct zone *zone, long nr_pages)
+void adjust_present_page_count(struct page *page, struct memory_group *group,
+			       long nr_pages)
 {
+	struct zone *zone = page_zone(page);
+	const bool movable = zone_idx(zone) == ZONE_MOVABLE;
+
+	/*
+	 * We only support onlining/offlining/adding/removing of complete
+	 * memory blocks; therefore, either all is either early or hotplugged.
+	 */
+	if (early_section(__pfn_to_section(page_to_pfn(page))))
+		zone->present_early_pages += nr_pages;
 	zone->present_pages += nr_pages;
 	zone->zone_pgdat->node_present_pages += nr_pages;
+
+	if (group && movable)
+		group->present_movable_pages += nr_pages;
+	else if (group && !movable)
+		group->present_kernel_pages += nr_pages;
 }
 
 int mhp_init_memmap_on_memory(unsigned long pfn, unsigned long nr_pages,
@@ -773,7 +1071,8 @@ void mhp_deinit_memmap_on_memory(unsigned long pfn, unsigned long nr_pages)
 	kasan_remove_zero_shadow(__va(PFN_PHYS(pfn)), PFN_PHYS(nr_pages));
 }
 
-int __ref online_pages(unsigned long pfn, unsigned long nr_pages, struct zone *zone)
+int __ref online_pages(unsigned long pfn, unsigned long nr_pages,
+		       struct zone *zone, struct memory_group *group)
 {
 	unsigned long flags;
 	int need_zonelists_rebuild = 0;
@@ -826,7 +1125,7 @@ int __ref online_pages(unsigned long pfn, unsigned long nr_pages, struct zone *z
 	}
 
 	online_pages_range(pfn, nr_pages);
-	adjust_present_page_count(zone, nr_pages);
+	adjust_present_page_count(pfn_to_page(pfn), group, nr_pages);
 
 	node_states_set_node(nid, &arg);
 	if (need_zonelists_rebuild)
@@ -1059,6 +1358,7 @@ int __ref add_memory_resource(int nid, struct resource *res, mhp_t mhp_flags)
 {
 	struct mhp_params params = { .pgprot = pgprot_mhp(PAGE_KERNEL) };
 	struct vmem_altmap mhp_altmap = {};
+	struct memory_group *group = NULL;
 	u64 start, size;
 	bool new_node = false;
 	int ret;
@@ -1070,6 +1370,13 @@ int __ref add_memory_resource(int nid, struct resource *res, mhp_t mhp_flags)
 	if (ret)
 		return ret;
 
+	if (mhp_flags & MHP_NID_IS_MGID) {
+		group = memory_group_find_by_id(nid);
+		if (!group)
+			return -EINVAL;
+		nid = group->nid;
+	}
+
 	if (!node_possible(nid)) {
 		WARN(1, "node %d was absent from the node_possible_map\n", nid);
 		return -EINVAL;
@@ -1104,9 +1411,10 @@ int __ref add_memory_resource(int nid, struct resource *res, mhp_t mhp_flags)
 		goto error;
 
 	/* create memory block devices after memory was added */
-	ret = create_memory_block_devices(start, size, mhp_altmap.alloc);
+	ret = create_memory_block_devices(start, size, mhp_altmap.alloc,
+					  group);
 	if (ret) {
-		arch_remove_memory(nid, start, size, NULL);
+		arch_remove_memory(start, size, NULL);
 		goto error;
 	}
 
@@ -1298,7 +1606,7 @@ struct zone *test_pages_in_a_zone(unsigned long start_pfn,
 	unsigned long pfn, sec_end_pfn;
 	struct zone *zone = NULL;
 	struct page *page;
-	int i;
+
 	for (pfn = start_pfn, sec_end_pfn = SECTION_ALIGN_UP(start_pfn + 1);
 	     pfn < end_pfn;
 	     pfn = sec_end_pfn, sec_end_pfn += PAGES_PER_SECTION) {
@@ -1307,17 +1615,10 @@ struct zone *test_pages_in_a_zone(unsigned long start_pfn,
 			continue;
 		for (; pfn < sec_end_pfn && pfn < end_pfn;
 		     pfn += MAX_ORDER_NR_PAGES) {
-			i = 0;
-			/* This is just a CONFIG_HOLES_IN_ZONE check.*/
-			while ((i < MAX_ORDER_NR_PAGES) &&
-				!pfn_valid_within(pfn + i))
-				i++;
-			if (i == MAX_ORDER_NR_PAGES || pfn + i >= end_pfn)
-				continue;
 			/* Check if we got outside of the zone */
-			if (zone && !zone_spans_pfn(zone, pfn + i))
+			if (zone && !zone_spans_pfn(zone, pfn))
 				return NULL;
-			page = pfn_to_page(pfn + i);
+			page = pfn_to_page(pfn);
 			if (zone && page_zone(page) != zone)
 				return NULL;
 			zone = page_zone(page);
@@ -1469,7 +1770,7 @@ do_migrate_range(unsigned long start_pfn, unsigned long end_pfn)
 		if (nodes_empty(nmask))
 			node_set(mtc.nid, nmask);
 		ret = migrate_pages(&source, alloc_migration_target, NULL,
-			(unsigned long)&mtc, MIGRATE_SYNC, MR_MEMORY_HOTPLUG);
+			(unsigned long)&mtc, MIGRATE_SYNC, MR_MEMORY_HOTPLUG, NULL);
 		if (ret) {
 			list_for_each_entry(page, &source, lru) {
 				if (__ratelimit(&migrate_rs)) {
@@ -1568,7 +1869,8 @@ static int count_system_ram_pages_cb(unsigned long start_pfn,
 	return 0;
 }
 
-int __ref offline_pages(unsigned long start_pfn, unsigned long nr_pages)
+int __ref offline_pages(unsigned long start_pfn, unsigned long nr_pages,
+			struct memory_group *group)
 {
 	const unsigned long end_pfn = start_pfn + nr_pages;
 	unsigned long pfn, system_ram_pages = 0;
@@ -1704,7 +2006,7 @@ int __ref offline_pages(unsigned long start_pfn, unsigned long nr_pages)
 
 	/* removal success */
 	adjust_managed_page_count(pfn_to_page(start_pfn), -nr_pages);
-	adjust_present_page_count(zone, -nr_pages);
+	adjust_present_page_count(pfn_to_page(start_pfn), group, -nr_pages);
 
 	/* reinitialise watermarks and update pcp limits */
 	init_per_zone_wmark_min();
@@ -1746,7 +2048,9 @@ failed_removal:
 static int check_memblock_offlined_cb(struct memory_block *mem, void *arg)
 {
 	int ret = !is_memblock_offlined(mem);
+	int *nid = arg;
 
+	*nid = mem->nid;
 	if (unlikely(ret)) {
 		phys_addr_t beginpa, endpa;
 
@@ -1839,12 +2143,12 @@ void try_offline_node(int nid)
 }
 EXPORT_SYMBOL(try_offline_node);
 
-static int __ref try_remove_memory(int nid, u64 start, u64 size)
+static int __ref try_remove_memory(u64 start, u64 size)
 {
-	int rc = 0;
 	struct vmem_altmap mhp_altmap = {};
 	struct vmem_altmap *altmap = NULL;
 	unsigned long nr_vmemmap_pages;
+	int rc = 0, nid = NUMA_NO_NODE;
 
 	BUG_ON(check_hotplug_memory_range(start, size));
 
@@ -1852,8 +2156,12 @@ static int __ref try_remove_memory(int nid, u64 start, u64 size)
 	 * All memory blocks must be offlined before removing memory.  Check
 	 * whether all memory blocks in question are offline and return error
 	 * if this is not the case.
+	 *
+	 * While at it, determine the nid. Note that if we'd have mixed nodes,
+	 * we'd only try to offline the last determined one -- which is good
+	 * enough for the cases we care about.
 	 */
-	rc = walk_memory_blocks(start, size, NULL, check_memblock_offlined_cb);
+	rc = walk_memory_blocks(start, size, &nid, check_memblock_offlined_cb);
 	if (rc)
 		return rc;
 
@@ -1893,7 +2201,7 @@ static int __ref try_remove_memory(int nid, u64 start, u64 size)
 
 	mem_hotplug_begin();
 
-	arch_remove_memory(nid, start, size, altmap);
+	arch_remove_memory(start, size, altmap);
 
 	if (IS_ENABLED(CONFIG_ARCH_KEEP_MEMBLOCK)) {
 		memblock_free(start, size);
@@ -1902,7 +2210,8 @@ static int __ref try_remove_memory(int nid, u64 start, u64 size)
 
 	release_mem_region_adjustable(start, size);
 
-	try_offline_node(nid);
+	if (nid != NUMA_NO_NODE)
+		try_offline_node(nid);
 
 	mem_hotplug_done();
 	return 0;
@@ -1910,7 +2219,6 @@ static int __ref try_remove_memory(int nid, u64 start, u64 size)
 
 /**
  * __remove_memory - Remove memory if every memory block is offline
- * @nid: the node ID
  * @start: physical address of the region to remove
  * @size: size of the region to remove
  *
@@ -1918,14 +2226,14 @@ static int __ref try_remove_memory(int nid, u64 start, u64 size)
  * and online/offline operations before this call, as required by
  * try_offline_node().
  */
-void __remove_memory(int nid, u64 start, u64 size)
+void __remove_memory(u64 start, u64 size)
 {
 
 	/*
 	 * trigger BUG() if some memory is not offlined prior to calling this
 	 * function
 	 */
-	if (try_remove_memory(nid, start, size))
+	if (try_remove_memory(start, size))
 		BUG();
 }
 
@@ -1933,12 +2241,12 @@ void __remove_memory(int nid, u64 start, u64 size)
  * Remove memory if every memory block is offline, otherwise return -EBUSY is
  * some memory is not offline
  */
-int remove_memory(int nid, u64 start, u64 size)
+int remove_memory(u64 start, u64 size)
 {
 	int rc;
 
 	lock_device_hotplug();
-	rc  = try_remove_memory(nid, start, size);
+	rc = try_remove_memory(start, size);
 	unlock_device_hotplug();
 
 	return rc;
@@ -1998,7 +2306,7 @@ static int try_reonline_memory_block(struct memory_block *mem, void *arg)
  * unplugged all memory (so it's no longer in use) and want to offline + remove
  * that memory.
  */
-int offline_and_remove_memory(int nid, u64 start, u64 size)
+int offline_and_remove_memory(u64 start, u64 size)
 {
 	const unsigned long mb_count = size / memory_block_size_bytes();
 	uint8_t *online_types, *tmp;
@@ -2034,7 +2342,7 @@ int offline_and_remove_memory(int nid, u64 start, u64 size)
 	 * This cannot fail as it cannot get onlined in the meantime.
 	 */
 	if (!rc) {
-		rc = try_remove_memory(nid, start, size);
+		rc = try_remove_memory(start, size);
 		if (rc)
 			pr_err("%s: Failed to remove memory: %d", __func__, rc);
 	}
diff --git a/mm/mempolicy.c b/mm/mempolicy.c
index e32360e90274..1592b081c58e 100644
--- a/mm/mempolicy.c
+++ b/mm/mempolicy.c
@@ -31,6 +31,9 @@
  *                but useful to set in a VMA when you have a non default
  *                process policy.
  *
+ * preferred many Try a set of nodes first before normal fallback. This is
+ *                similar to preferred without the special case.
+ *
  * default        Allocate on the local node first, or when on a VMA
  *                use the process policy. This is what Linux always did
  *		  in a NUMA aware kernel and still does by, ahem, default.
@@ -189,7 +192,7 @@ static void mpol_relative_nodemask(nodemask_t *ret, const nodemask_t *orig,
 	nodes_onto(*ret, tmp, *rel);
 }
 
-static int mpol_new_interleave(struct mempolicy *pol, const nodemask_t *nodes)
+static int mpol_new_nodemask(struct mempolicy *pol, const nodemask_t *nodes)
 {
 	if (nodes_empty(*nodes))
 		return -EINVAL;
@@ -207,14 +210,6 @@ static int mpol_new_preferred(struct mempolicy *pol, const nodemask_t *nodes)
 	return 0;
 }
 
-static int mpol_new_bind(struct mempolicy *pol, const nodemask_t *nodes)
-{
-	if (nodes_empty(*nodes))
-		return -EINVAL;
-	pol->nodes = *nodes;
-	return 0;
-}
-
 /*
  * mpol_set_nodemask is called after mpol_new() to set up the nodemask, if
  * any, for the new policy.  mpol_new() has already validated the nodes
@@ -394,7 +389,7 @@ static const struct mempolicy_operations mpol_ops[MPOL_MAX] = {
 		.rebind = mpol_rebind_default,
 	},
 	[MPOL_INTERLEAVE] = {
-		.create = mpol_new_interleave,
+		.create = mpol_new_nodemask,
 		.rebind = mpol_rebind_nodemask,
 	},
 	[MPOL_PREFERRED] = {
@@ -402,12 +397,16 @@ static const struct mempolicy_operations mpol_ops[MPOL_MAX] = {
 		.rebind = mpol_rebind_preferred,
 	},
 	[MPOL_BIND] = {
-		.create = mpol_new_bind,
+		.create = mpol_new_nodemask,
 		.rebind = mpol_rebind_nodemask,
 	},
 	[MPOL_LOCAL] = {
 		.rebind = mpol_rebind_default,
 	},
+	[MPOL_PREFERRED_MANY] = {
+		.create = mpol_new_nodemask,
+		.rebind = mpol_rebind_preferred,
+	},
 };
 
 static int migrate_page_add(struct page *page, struct list_head *pagelist,
@@ -900,6 +899,7 @@ static void get_policy_nodemask(struct mempolicy *p, nodemask_t *nodes)
 	case MPOL_BIND:
 	case MPOL_INTERLEAVE:
 	case MPOL_PREFERRED:
+	case MPOL_PREFERRED_MANY:
 		*nodes = p->nodes;
 		break;
 	case MPOL_LOCAL:
@@ -1084,7 +1084,7 @@ static int migrate_to_node(struct mm_struct *mm, int source, int dest,
 
 	if (!list_empty(&pagelist)) {
 		err = migrate_pages(&pagelist, alloc_migration_target, NULL,
-				(unsigned long)&mtc, MIGRATE_SYNC, MR_SYSCALL);
+				(unsigned long)&mtc, MIGRATE_SYNC, MR_SYSCALL, NULL);
 		if (err)
 			putback_movable_pages(&pagelist);
 	}
@@ -1338,7 +1338,7 @@ static long do_mbind(unsigned long start, unsigned long len,
 		if (!list_empty(&pagelist)) {
 			WARN_ON_ONCE(flags & MPOL_MF_LAZY);
 			nr_failed = migrate_pages(&pagelist, new_page, NULL,
-				start, MIGRATE_SYNC, MR_MEMPOLICY_MBIND);
+				start, MIGRATE_SYNC, MR_MEMPOLICY_MBIND, NULL);
 			if (nr_failed)
 				putback_movable_pages(&pagelist);
 		}
@@ -1362,16 +1362,33 @@ mpol_out:
 /*
  * User space interface with variable sized bitmaps for nodelists.
  */
+static int get_bitmap(unsigned long *mask, const unsigned long __user *nmask,
+		      unsigned long maxnode)
+{
+	unsigned long nlongs = BITS_TO_LONGS(maxnode);
+	int ret;
+
+	if (in_compat_syscall())
+		ret = compat_get_bitmap(mask,
+					(const compat_ulong_t __user *)nmask,
+					maxnode);
+	else
+		ret = copy_from_user(mask, nmask,
+				     nlongs * sizeof(unsigned long));
+
+	if (ret)
+		return -EFAULT;
+
+	if (maxnode % BITS_PER_LONG)
+		mask[nlongs - 1] &= (1UL << (maxnode % BITS_PER_LONG)) - 1;
+
+	return 0;
+}
 
 /* Copy a node mask from user space. */
 static int get_nodes(nodemask_t *nodes, const unsigned long __user *nmask,
 		     unsigned long maxnode)
 {
-	unsigned long k;
-	unsigned long t;
-	unsigned long nlongs;
-	unsigned long endmask;
-
 	--maxnode;
 	nodes_clear(*nodes);
 	if (maxnode == 0 || !nmask)
@@ -1379,49 +1396,29 @@ static int get_nodes(nodemask_t *nodes, const unsigned long __user *nmask,
 	if (maxnode > PAGE_SIZE*BITS_PER_BYTE)
 		return -EINVAL;
 
-	nlongs = BITS_TO_LONGS(maxnode);
-	if ((maxnode % BITS_PER_LONG) == 0)
-		endmask = ~0UL;
-	else
-		endmask = (1UL << (maxnode % BITS_PER_LONG)) - 1;
-
 	/*
 	 * When the user specified more nodes than supported just check
-	 * if the non supported part is all zero.
-	 *
-	 * If maxnode have more longs than MAX_NUMNODES, check
-	 * the bits in that area first. And then go through to
-	 * check the rest bits which equal or bigger than MAX_NUMNODES.
-	 * Otherwise, just check bits [MAX_NUMNODES, maxnode).
+	 * if the non supported part is all zero, one word at a time,
+	 * starting at the end.
 	 */
-	if (nlongs > BITS_TO_LONGS(MAX_NUMNODES)) {
-		for (k = BITS_TO_LONGS(MAX_NUMNODES); k < nlongs; k++) {
-			if (get_user(t, nmask + k))
-				return -EFAULT;
-			if (k == nlongs - 1) {
-				if (t & endmask)
-					return -EINVAL;
-			} else if (t)
-				return -EINVAL;
-		}
-		nlongs = BITS_TO_LONGS(MAX_NUMNODES);
-		endmask = ~0UL;
-	}
-
-	if (maxnode > MAX_NUMNODES && MAX_NUMNODES % BITS_PER_LONG != 0) {
-		unsigned long valid_mask = endmask;
+	while (maxnode > MAX_NUMNODES) {
+		unsigned long bits = min_t(unsigned long, maxnode, BITS_PER_LONG);
+		unsigned long t;
 
-		valid_mask &= ~((1UL << (MAX_NUMNODES % BITS_PER_LONG)) - 1);
-		if (get_user(t, nmask + nlongs - 1))
+		if (get_bitmap(&t, &nmask[maxnode / BITS_PER_LONG], bits))
 			return -EFAULT;
-		if (t & valid_mask)
+
+		if (maxnode - bits >= MAX_NUMNODES) {
+			maxnode -= bits;
+		} else {
+			maxnode = MAX_NUMNODES;
+			t &= ~((1UL << (MAX_NUMNODES % BITS_PER_LONG)) - 1);
+		}
+		if (t)
 			return -EINVAL;
 	}
 
-	if (copy_from_user(nodes_addr(*nodes), nmask, nlongs*sizeof(unsigned long)))
-		return -EFAULT;
-	nodes_addr(*nodes)[nlongs-1] &= endmask;
-	return 0;
+	return get_bitmap(nodes_addr(*nodes), nmask, maxnode);
 }
 
 /* Copy a kernel node mask to user space */
@@ -1430,6 +1427,10 @@ static int copy_nodes_to_user(unsigned long __user *mask, unsigned long maxnode,
 {
 	unsigned long copy = ALIGN(maxnode-1, 64) / 8;
 	unsigned int nbytes = BITS_TO_LONGS(nr_node_ids) * sizeof(long);
+	bool compat = in_compat_syscall();
+
+	if (compat)
+		nbytes = BITS_TO_COMPAT_LONGS(nr_node_ids) * sizeof(compat_long_t);
 
 	if (copy > nbytes) {
 		if (copy > PAGE_SIZE)
@@ -1437,7 +1438,13 @@ static int copy_nodes_to_user(unsigned long __user *mask, unsigned long maxnode,
 		if (clear_user((char __user *)mask + nbytes, copy - nbytes))
 			return -EFAULT;
 		copy = nbytes;
+		maxnode = nr_node_ids;
 	}
+
+	if (compat)
+		return compat_put_bitmap((compat_ulong_t __user *)mask,
+					 nodes_addr(*nodes), maxnode);
+
 	return copy_to_user(mask, nodes_addr(*nodes), copy) ? -EFAULT : 0;
 }
 
@@ -1446,7 +1453,8 @@ static inline int sanitize_mpol_flags(int *mode, unsigned short *flags)
 {
 	*flags = *mode & MPOL_MODE_FLAGS;
 	*mode &= ~MPOL_MODE_FLAGS;
-	if ((unsigned int)(*mode) >= MPOL_MAX)
+
+	if ((unsigned int)(*mode) >=  MPOL_MAX)
 		return -EINVAL;
 	if ((*flags & MPOL_F_STATIC_NODES) && (*flags & MPOL_F_RELATIVE_NODES))
 		return -EINVAL;
@@ -1641,116 +1649,6 @@ SYSCALL_DEFINE5(get_mempolicy, int __user *, policy,
 	return kernel_get_mempolicy(policy, nmask, maxnode, addr, flags);
 }
 
-#ifdef CONFIG_COMPAT
-
-COMPAT_SYSCALL_DEFINE5(get_mempolicy, int __user *, policy,
-		       compat_ulong_t __user *, nmask,
-		       compat_ulong_t, maxnode,
-		       compat_ulong_t, addr, compat_ulong_t, flags)
-{
-	long err;
-	unsigned long __user *nm = NULL;
-	unsigned long nr_bits, alloc_size;
-	DECLARE_BITMAP(bm, MAX_NUMNODES);
-
-	nr_bits = min_t(unsigned long, maxnode-1, nr_node_ids);
-	alloc_size = ALIGN(nr_bits, BITS_PER_LONG) / 8;
-
-	if (nmask)
-		nm = compat_alloc_user_space(alloc_size);
-
-	err = kernel_get_mempolicy(policy, nm, nr_bits+1, addr, flags);
-
-	if (!err && nmask) {
-		unsigned long copy_size;
-		copy_size = min_t(unsigned long, sizeof(bm), alloc_size);
-		err = copy_from_user(bm, nm, copy_size);
-		/* ensure entire bitmap is zeroed */
-		err |= clear_user(nmask, ALIGN(maxnode-1, 8) / 8);
-		err |= compat_put_bitmap(nmask, bm, nr_bits);
-	}
-
-	return err;
-}
-
-COMPAT_SYSCALL_DEFINE3(set_mempolicy, int, mode, compat_ulong_t __user *, nmask,
-		       compat_ulong_t, maxnode)
-{
-	unsigned long __user *nm = NULL;
-	unsigned long nr_bits, alloc_size;
-	DECLARE_BITMAP(bm, MAX_NUMNODES);
-
-	nr_bits = min_t(unsigned long, maxnode-1, MAX_NUMNODES);
-	alloc_size = ALIGN(nr_bits, BITS_PER_LONG) / 8;
-
-	if (nmask) {
-		if (compat_get_bitmap(bm, nmask, nr_bits))
-			return -EFAULT;
-		nm = compat_alloc_user_space(alloc_size);
-		if (copy_to_user(nm, bm, alloc_size))
-			return -EFAULT;
-	}
-
-	return kernel_set_mempolicy(mode, nm, nr_bits+1);
-}
-
-COMPAT_SYSCALL_DEFINE6(mbind, compat_ulong_t, start, compat_ulong_t, len,
-		       compat_ulong_t, mode, compat_ulong_t __user *, nmask,
-		       compat_ulong_t, maxnode, compat_ulong_t, flags)
-{
-	unsigned long __user *nm = NULL;
-	unsigned long nr_bits, alloc_size;
-	nodemask_t bm;
-
-	nr_bits = min_t(unsigned long, maxnode-1, MAX_NUMNODES);
-	alloc_size = ALIGN(nr_bits, BITS_PER_LONG) / 8;
-
-	if (nmask) {
-		if (compat_get_bitmap(nodes_addr(bm), nmask, nr_bits))
-			return -EFAULT;
-		nm = compat_alloc_user_space(alloc_size);
-		if (copy_to_user(nm, nodes_addr(bm), alloc_size))
-			return -EFAULT;
-	}
-
-	return kernel_mbind(start, len, mode, nm, nr_bits+1, flags);
-}
-
-COMPAT_SYSCALL_DEFINE4(migrate_pages, compat_pid_t, pid,
-		       compat_ulong_t, maxnode,
-		       const compat_ulong_t __user *, old_nodes,
-		       const compat_ulong_t __user *, new_nodes)
-{
-	unsigned long __user *old = NULL;
-	unsigned long __user *new = NULL;
-	nodemask_t tmp_mask;
-	unsigned long nr_bits;
-	unsigned long size;
-
-	nr_bits = min_t(unsigned long, maxnode - 1, MAX_NUMNODES);
-	size = ALIGN(nr_bits, BITS_PER_LONG) / 8;
-	if (old_nodes) {
-		if (compat_get_bitmap(nodes_addr(tmp_mask), old_nodes, nr_bits))
-			return -EFAULT;
-		old = compat_alloc_user_space(new_nodes ? size * 2 : size);
-		if (new_nodes)
-			new = old + size / sizeof(unsigned long);
-		if (copy_to_user(old, nodes_addr(tmp_mask), size))
-			return -EFAULT;
-	}
-	if (new_nodes) {
-		if (compat_get_bitmap(nodes_addr(tmp_mask), new_nodes, nr_bits))
-			return -EFAULT;
-		if (new == NULL)
-			new = compat_alloc_user_space(size);
-		if (copy_to_user(new, nodes_addr(tmp_mask), size))
-			return -EFAULT;
-	}
-	return kernel_migrate_pages(pid, nr_bits + 1, old, new);
-}
-
-#endif /* CONFIG_COMPAT */
-
 bool vma_migratable(struct vm_area_struct *vma)
 {
 	if (vma->vm_flags & (VM_IO | VM_PFNMAP))
@@ -1875,16 +1773,27 @@ static int apply_policy_zone(struct mempolicy *policy, enum zone_type zone)
  */
 nodemask_t *policy_nodemask(gfp_t gfp, struct mempolicy *policy)
 {
+	int mode = policy->mode;
+
 	/* Lower zones don't get a nodemask applied for MPOL_BIND */
-	if (unlikely(policy->mode == MPOL_BIND) &&
-			apply_policy_zone(policy, gfp_zone(gfp)) &&
-			cpuset_nodemask_valid_mems_allowed(&policy->nodes))
+	if (unlikely(mode == MPOL_BIND) &&
+		apply_policy_zone(policy, gfp_zone(gfp)) &&
+		cpuset_nodemask_valid_mems_allowed(&policy->nodes))
+		return &policy->nodes;
+
+	if (mode == MPOL_PREFERRED_MANY)
 		return &policy->nodes;
 
 	return NULL;
 }
 
-/* Return the node id preferred by the given mempolicy, or the given id */
+/*
+ * Return the  preferred node id for 'prefer' mempolicy, and return
+ * the given id for all other policies.
+ *
+ * policy_node() is always coupled with policy_nodemask(), which
+ * secures the nodemask limit for 'bind' and 'prefer-many' policy.
+ */
 static int policy_node(gfp_t gfp, struct mempolicy *policy, int nd)
 {
 	if (policy->mode == MPOL_PREFERRED) {
@@ -1922,7 +1831,7 @@ unsigned int mempolicy_slab_node(void)
 	struct mempolicy *policy;
 	int node = numa_mem_id();
 
-	if (in_interrupt())
+	if (!in_task())
 		return node;
 
 	policy = current->mempolicy;
@@ -1936,7 +1845,9 @@ unsigned int mempolicy_slab_node(void)
 	case MPOL_INTERLEAVE:
 		return interleave_nodes(policy);
 
-	case MPOL_BIND: {
+	case MPOL_BIND:
+	case MPOL_PREFERRED_MANY:
+	{
 		struct zoneref *z;
 
 		/*
@@ -1965,17 +1876,26 @@ unsigned int mempolicy_slab_node(void)
  */
 static unsigned offset_il_node(struct mempolicy *pol, unsigned long n)
 {
-	unsigned nnodes = nodes_weight(pol->nodes);
-	unsigned target;
+	nodemask_t nodemask = pol->nodes;
+	unsigned int target, nnodes;
 	int i;
 	int nid;
+	/*
+	 * The barrier will stabilize the nodemask in a register or on
+	 * the stack so that it will stop changing under the code.
+	 *
+	 * Between first_node() and next_node(), pol->nodes could be changed
+	 * by other threads. So we put pol->nodes in a local stack.
+	 */
+	barrier();
 
+	nnodes = nodes_weight(nodemask);
 	if (!nnodes)
 		return numa_node_id();
 	target = (unsigned int)n % nnodes;
-	nid = first_node(pol->nodes);
+	nid = first_node(nodemask);
 	for (i = 0; i < target; i++)
-		nid = next_node(nid, pol->nodes);
+		nid = next_node(nid, nodemask);
 	return nid;
 }
 
@@ -2008,12 +1928,12 @@ static inline unsigned interleave_nid(struct mempolicy *pol,
  * @addr: address in @vma for shared policy lookup and interleave policy
  * @gfp_flags: for requested zone
  * @mpol: pointer to mempolicy pointer for reference counted mempolicy
- * @nodemask: pointer to nodemask pointer for MPOL_BIND nodemask
+ * @nodemask: pointer to nodemask pointer for 'bind' and 'prefer-many' policy
  *
  * Returns a nid suitable for a huge page allocation and a pointer
  * to the struct mempolicy for conditional unref after allocation.
- * If the effective policy is 'BIND, returns a pointer to the mempolicy's
- * @nodemask for filtering the zonelist.
+ * If the effective policy is 'bind' or 'prefer-many', returns a pointer
+ * to the mempolicy's @nodemask for filtering the zonelist.
  *
  * Must be protected by read_mems_allowed_begin()
  */
@@ -2021,16 +1941,18 @@ int huge_node(struct vm_area_struct *vma, unsigned long addr, gfp_t gfp_flags,
 				struct mempolicy **mpol, nodemask_t **nodemask)
 {
 	int nid;
+	int mode;
 
 	*mpol = get_vma_policy(vma, addr);
-	*nodemask = NULL;	/* assume !MPOL_BIND */
+	*nodemask = NULL;
+	mode = (*mpol)->mode;
 
-	if (unlikely((*mpol)->mode == MPOL_INTERLEAVE)) {
+	if (unlikely(mode == MPOL_INTERLEAVE)) {
 		nid = interleave_nid(*mpol, vma, addr,
 					huge_page_shift(hstate_vma(vma)));
 	} else {
 		nid = policy_node(gfp_flags, *mpol, numa_node_id());
-		if ((*mpol)->mode == MPOL_BIND)
+		if (mode == MPOL_BIND || mode == MPOL_PREFERRED_MANY)
 			*nodemask = &(*mpol)->nodes;
 	}
 	return nid;
@@ -2063,6 +1985,7 @@ bool init_nodemask_of_mempolicy(nodemask_t *mask)
 	mempolicy = current->mempolicy;
 	switch (mempolicy->mode) {
 	case MPOL_PREFERRED:
+	case MPOL_PREFERRED_MANY:
 	case MPOL_BIND:
 	case MPOL_INTERLEAVE:
 		*mask = mempolicy->nodes;
@@ -2128,6 +2051,27 @@ static struct page *alloc_page_interleave(gfp_t gfp, unsigned order,
 	return page;
 }
 
+static struct page *alloc_pages_preferred_many(gfp_t gfp, unsigned int order,
+						int nid, struct mempolicy *pol)
+{
+	struct page *page;
+	gfp_t preferred_gfp;
+
+	/*
+	 * This is a two pass approach. The first pass will only try the
+	 * preferred nodes but skip the direct reclaim and allow the
+	 * allocation to fail, while the second pass will try all the
+	 * nodes in system.
+	 */
+	preferred_gfp = gfp | __GFP_NOWARN;
+	preferred_gfp &= ~(__GFP_DIRECT_RECLAIM | __GFP_NOFAIL);
+	page = __alloc_pages(preferred_gfp, order, nid, &pol->nodes);
+	if (!page)
+		page = __alloc_pages(gfp, order, numa_node_id(), NULL);
+
+	return page;
+}
+
 /**
  * alloc_pages_vma - Allocate a page for a VMA.
  * @gfp: GFP flags.
@@ -2163,6 +2107,12 @@ struct page *alloc_pages_vma(gfp_t gfp, int order, struct vm_area_struct *vma,
 		goto out;
 	}
 
+	if (pol->mode == MPOL_PREFERRED_MANY) {
+		page = alloc_pages_preferred_many(gfp, order, node, pol);
+		mpol_cond_put(pol);
+		goto out;
+	}
+
 	if (unlikely(IS_ENABLED(CONFIG_TRANSPARENT_HUGEPAGE) && hugepage)) {
 		int hpage_node = node;
 
@@ -2173,7 +2123,7 @@ struct page *alloc_pages_vma(gfp_t gfp, int order, struct vm_area_struct *vma,
 		 * node and don't fall back to other nodes, as the cost of
 		 * remote accesses would likely offset THP benefits.
 		 *
-		 * If the policy is interleave, or does not allow the current
+		 * If the policy is interleave or does not allow the current
 		 * node in its nodemask, we allocate the standard way.
 		 */
 		if (pol->mode == MPOL_PREFERRED)
@@ -2240,6 +2190,9 @@ struct page *alloc_pages(gfp_t gfp, unsigned order)
 	 */
 	if (pol->mode == MPOL_INTERLEAVE)
 		page = alloc_page_interleave(gfp, order, interleave_nodes(pol));
+	else if (pol->mode == MPOL_PREFERRED_MANY)
+		page = alloc_pages_preferred_many(gfp, order,
+				numa_node_id(), pol);
 	else
 		page = __alloc_pages(gfp, order,
 				policy_node(gfp, pol, numa_node_id()),
@@ -2311,6 +2264,7 @@ bool __mpol_equal(struct mempolicy *a, struct mempolicy *b)
 	case MPOL_BIND:
 	case MPOL_INTERLEAVE:
 	case MPOL_PREFERRED:
+	case MPOL_PREFERRED_MANY:
 		return !!nodes_equal(a->nodes, b->nodes);
 	case MPOL_LOCAL:
 		return true;
@@ -2425,8 +2379,8 @@ static void sp_free(struct sp_node *n)
  * node id.  Policy determination "mimics" alloc_page_vma().
  * Called from fault path where we know the vma and faulting address.
  *
- * Return: -1 if the page is in a node that is valid for this policy, or a
- * suitable node ID to allocate a replacement page from.
+ * Return: NUMA_NO_NODE if the page is in a node that is valid for this
+ * policy, or a suitable node ID to allocate a replacement page from.
  */
 int mpol_misplaced(struct page *page, struct vm_area_struct *vma, unsigned long addr)
 {
@@ -2437,7 +2391,7 @@ int mpol_misplaced(struct page *page, struct vm_area_struct *vma, unsigned long
 	int thiscpu = raw_smp_processor_id();
 	int thisnid = cpu_to_node(thiscpu);
 	int polnid = NUMA_NO_NODE;
-	int ret = -1;
+	int ret = NUMA_NO_NODE;
 
 	pol = get_vma_policy(vma, addr);
 	if (!(pol->flags & MPOL_F_MOF))
@@ -2451,6 +2405,8 @@ int mpol_misplaced(struct page *page, struct vm_area_struct *vma, unsigned long
 		break;
 
 	case MPOL_PREFERRED:
+		if (node_isset(curnid, pol->nodes))
+			goto out;
 		polnid = first_node(pol->nodes);
 		break;
 
@@ -2465,9 +2421,10 @@ int mpol_misplaced(struct page *page, struct vm_area_struct *vma, unsigned long
 				break;
 			goto out;
 		}
+		fallthrough;
 
+	case MPOL_PREFERRED_MANY:
 		/*
-		 * allows binding to multiple nodes.
 		 * use current page if in policy nodemask,
 		 * else select nearest allowed node, if any.
 		 * If no allowed nodes, use current [!misplaced].
@@ -2829,6 +2786,7 @@ static const char * const policy_modes[] =
 	[MPOL_BIND]       = "bind",
 	[MPOL_INTERLEAVE] = "interleave",
 	[MPOL_LOCAL]      = "local",
+	[MPOL_PREFERRED_MANY]  = "prefer (many)",
 };
 
 
@@ -2907,6 +2865,7 @@ int mpol_parse_str(char *str, struct mempolicy **mpol)
 		if (!nodelist)
 			err = 0;
 		goto out;
+	case MPOL_PREFERRED_MANY:
 	case MPOL_BIND:
 		/*
 		 * Insist on a nodelist
@@ -2993,6 +2952,7 @@ void mpol_to_str(char *buffer, int maxlen, struct mempolicy *pol)
 	case MPOL_LOCAL:
 		break;
 	case MPOL_PREFERRED:
+	case MPOL_PREFERRED_MANY:
 	case MPOL_BIND:
 	case MPOL_INTERLEAVE:
 		nodes = pol->nodes;
@@ -3021,3 +2981,64 @@ void mpol_to_str(char *buffer, int maxlen, struct mempolicy *pol)
 		p += scnprintf(p, buffer + maxlen - p, ":%*pbl",
 			       nodemask_pr_args(&nodes));
 }
+
+bool numa_demotion_enabled = false;
+
+#ifdef CONFIG_SYSFS
+static ssize_t numa_demotion_enabled_show(struct kobject *kobj,
+					  struct kobj_attribute *attr, char *buf)
+{
+	return sysfs_emit(buf, "%s\n",
+			  numa_demotion_enabled? "true" : "false");
+}
+
+static ssize_t numa_demotion_enabled_store(struct kobject *kobj,
+					   struct kobj_attribute *attr,
+					   const char *buf, size_t count)
+{
+	if (!strncmp(buf, "true", 4) || !strncmp(buf, "1", 1))
+		numa_demotion_enabled = true;
+	else if (!strncmp(buf, "false", 5) || !strncmp(buf, "0", 1))
+		numa_demotion_enabled = false;
+	else
+		return -EINVAL;
+
+	return count;
+}
+
+static struct kobj_attribute numa_demotion_enabled_attr =
+	__ATTR(demotion_enabled, 0644, numa_demotion_enabled_show,
+	       numa_demotion_enabled_store);
+
+static struct attribute *numa_attrs[] = {
+	&numa_demotion_enabled_attr.attr,
+	NULL,
+};
+
+static const struct attribute_group numa_attr_group = {
+	.attrs = numa_attrs,
+};
+
+static int __init numa_init_sysfs(void)
+{
+	int err;
+	struct kobject *numa_kobj;
+
+	numa_kobj = kobject_create_and_add("numa", mm_kobj);
+	if (!numa_kobj) {
+		pr_err("failed to create numa kobject\n");
+		return -ENOMEM;
+	}
+	err = sysfs_create_group(numa_kobj, &numa_attr_group);
+	if (err) {
+		pr_err("failed to register numa group\n");
+		goto delete_obj;
+	}
+	return 0;
+
+delete_obj:
+	kobject_put(numa_kobj);
+	return err;
+}
+subsys_initcall(numa_init_sysfs);
+#endif
diff --git a/mm/memremap.c b/mm/memremap.c
index 15a074ffb8d7..ed593bf87109 100644
--- a/mm/memremap.c
+++ b/mm/memremap.c
@@ -140,14 +140,11 @@ static void pageunmap_range(struct dev_pagemap *pgmap, int range_id)
 {
 	struct range *range = &pgmap->ranges[range_id];
 	struct page *first_page;
-	int nid;
 
 	/* make sure to access a memmap that was actually initialized */
 	first_page = pfn_to_page(pfn_first(pgmap, range_id));
 
 	/* pages are dead and unused, undo the arch mapping */
-	nid = page_to_nid(first_page);
-
 	mem_hotplug_begin();
 	remove_pfn_range_from_zone(page_zone(first_page), PHYS_PFN(range->start),
 				   PHYS_PFN(range_len(range)));
@@ -155,7 +152,7 @@ static void pageunmap_range(struct dev_pagemap *pgmap, int range_id)
 		__remove_pages(PHYS_PFN(range->start),
 			       PHYS_PFN(range_len(range)), NULL);
 	} else {
-		arch_remove_memory(nid, range->start, range_len(range),
+		arch_remove_memory(range->start, range_len(range),
 				pgmap_altmap(pgmap));
 		kasan_remove_zero_shadow(__va(range->start), range_len(range));
 	}
diff --git a/mm/migrate.c b/mm/migrate.c
index 7e240437e7d9..a6a7743ee98f 100644
--- a/mm/migrate.c
+++ b/mm/migrate.c
@@ -49,6 +49,7 @@
 #include <linux/sched/mm.h>
 #include <linux/ptrace.h>
 #include <linux/oom.h>
+#include <linux/memory.h>
 
 #include <asm/tlbflush.h>
 
@@ -959,7 +960,7 @@ static int __unmap_and_move(struct page *page, struct page *newpage,
 				int force, enum migrate_mode mode)
 {
 	int rc = -EAGAIN;
-	int page_was_mapped = 0;
+	bool page_was_mapped = false;
 	struct anon_vma *anon_vma = NULL;
 	bool is_lru = !__PageMovable(page);
 
@@ -1007,7 +1008,7 @@ static int __unmap_and_move(struct page *page, struct page *newpage,
 	}
 
 	/*
-	 * By try_to_unmap(), page->mapcount goes down to 0 here. In this case,
+	 * By try_to_migrate(), page->mapcount goes down to 0 here. In this case,
 	 * we cannot notice that anon_vma is freed while we migrates a page.
 	 * This get_anon_vma() delays freeing anon_vma pointer until the end
 	 * of migration. File cache pages are no problem because of page_lock()
@@ -1062,7 +1063,7 @@ static int __unmap_and_move(struct page *page, struct page *newpage,
 		VM_BUG_ON_PAGE(PageAnon(page) && !PageKsm(page) && !anon_vma,
 				page);
 		try_to_migrate(page, 0);
-		page_was_mapped = 1;
+		page_was_mapped = true;
 	}
 
 	if (!page_mapped(page))
@@ -1099,6 +1100,80 @@ out:
 	return rc;
 }
 
+
+/*
+ * node_demotion[] example:
+ *
+ * Consider a system with two sockets.  Each socket has
+ * three classes of memory attached: fast, medium and slow.
+ * Each memory class is placed in its own NUMA node.  The
+ * CPUs are placed in the node with the "fast" memory.  The
+ * 6 NUMA nodes (0-5) might be split among the sockets like
+ * this:
+ *
+ *	Socket A: 0, 1, 2
+ *	Socket B: 3, 4, 5
+ *
+ * When Node 0 fills up, its memory should be migrated to
+ * Node 1.  When Node 1 fills up, it should be migrated to
+ * Node 2.  The migration path start on the nodes with the
+ * processors (since allocations default to this node) and
+ * fast memory, progress through medium and end with the
+ * slow memory:
+ *
+ *	0 -> 1 -> 2 -> stop
+ *	3 -> 4 -> 5 -> stop
+ *
+ * This is represented in the node_demotion[] like this:
+ *
+ *	{  1, // Node 0 migrates to 1
+ *	   2, // Node 1 migrates to 2
+ *	  -1, // Node 2 does not migrate
+ *	   4, // Node 3 migrates to 4
+ *	   5, // Node 4 migrates to 5
+ *	  -1} // Node 5 does not migrate
+ */
+
+/*
+ * Writes to this array occur without locking.  Cycles are
+ * not allowed: Node X demotes to Y which demotes to X...
+ *
+ * If multiple reads are performed, a single rcu_read_lock()
+ * must be held over all reads to ensure that no cycles are
+ * observed.
+ */
+static int node_demotion[MAX_NUMNODES] __read_mostly =
+	{[0 ...  MAX_NUMNODES - 1] = NUMA_NO_NODE};
+
+/**
+ * next_demotion_node() - Get the next node in the demotion path
+ * @node: The starting node to lookup the next node
+ *
+ * Return: node id for next memory node in the demotion path hierarchy
+ * from @node; NUMA_NO_NODE if @node is terminal.  This does not keep
+ * @node online or guarantee that it *continues* to be the next demotion
+ * target.
+ */
+int next_demotion_node(int node)
+{
+	int target;
+
+	/*
+	 * node_demotion[] is updated without excluding this
+	 * function from running.  RCU doesn't provide any
+	 * compiler barriers, so the READ_ONCE() is required
+	 * to avoid compiler reordering or read merging.
+	 *
+	 * Make sure to use RCU over entire code blocks if
+	 * node_demotion[] reads need to be consistent.
+	 */
+	rcu_read_lock();
+	target = READ_ONCE(node_demotion[node]);
+	rcu_read_unlock();
+
+	return target;
+}
+
 /*
  * Obtain the lock on page, remove all ptes and migrate the page
  * to the newly allocated page in newpage.
@@ -1354,6 +1429,8 @@ static inline int try_split_thp(struct page *page, struct page **page2,
  * @mode:		The migration mode that specifies the constraints for
  *			page migration, if any.
  * @reason:		The reason for page migration.
+ * @ret_succeeded:	Set to the number of pages migrated successfully if
+ *			the caller passes a non-NULL pointer.
  *
  * The function returns after 10 attempts or if no pages are movable any more
  * because the list has become empty or no retryable pages exist any more.
@@ -1364,7 +1441,7 @@ static inline int try_split_thp(struct page *page, struct page **page2,
  */
 int migrate_pages(struct list_head *from, new_page_t get_new_page,
 		free_page_t put_new_page, unsigned long private,
-		enum migrate_mode mode, int reason)
+		enum migrate_mode mode, int reason, unsigned int *ret_succeeded)
 {
 	int retry = 1;
 	int thp_retry = 1;
@@ -1519,6 +1596,9 @@ out:
 	if (!swapwrite)
 		current->flags &= ~PF_SWAPWRITE;
 
+	if (ret_succeeded)
+		*ret_succeeded = nr_succeeded;
+
 	return rc;
 }
 
@@ -1588,7 +1668,7 @@ static int do_move_pages_to_node(struct mm_struct *mm,
 	};
 
 	err = migrate_pages(pagelist, alloc_migration_target, NULL,
-			(unsigned long)&mtc, MIGRATE_SYNC, MR_SYSCALL);
+		(unsigned long)&mtc, MIGRATE_SYNC, MR_SYSCALL, NULL);
 	if (err)
 		putback_movable_pages(pagelist);
 	return err;
@@ -1820,6 +1900,23 @@ set_status:
 	mmap_read_unlock(mm);
 }
 
+static int get_compat_pages_array(const void __user *chunk_pages[],
+				  const void __user * __user *pages,
+				  unsigned long chunk_nr)
+{
+	compat_uptr_t __user *pages32 = (compat_uptr_t __user *)pages;
+	compat_uptr_t p;
+	int i;
+
+	for (i = 0; i < chunk_nr; i++) {
+		if (get_user(p, pages32 + i))
+			return -EFAULT;
+		chunk_pages[i] = compat_ptr(p);
+	}
+
+	return 0;
+}
+
 /*
  * Determine the nodes of a user array of pages and store it in
  * a user array of status.
@@ -1839,8 +1936,15 @@ static int do_pages_stat(struct mm_struct *mm, unsigned long nr_pages,
 		if (chunk_nr > DO_PAGES_STAT_CHUNK_NR)
 			chunk_nr = DO_PAGES_STAT_CHUNK_NR;
 
-		if (copy_from_user(chunk_pages, pages, chunk_nr * sizeof(*chunk_pages)))
-			break;
+		if (in_compat_syscall()) {
+			if (get_compat_pages_array(chunk_pages, pages,
+						   chunk_nr))
+				break;
+		} else {
+			if (copy_from_user(chunk_pages, pages,
+				      chunk_nr * sizeof(*chunk_pages)))
+				break;
+		}
 
 		do_pages_stat_array(mm, chunk_nr, chunk_pages, chunk_status);
 
@@ -1943,28 +2047,6 @@ SYSCALL_DEFINE6(move_pages, pid_t, pid, unsigned long, nr_pages,
 	return kernel_move_pages(pid, nr_pages, pages, nodes, status, flags);
 }
 
-#ifdef CONFIG_COMPAT
-COMPAT_SYSCALL_DEFINE6(move_pages, pid_t, pid, compat_ulong_t, nr_pages,
-		       compat_uptr_t __user *, pages32,
-		       const int __user *, nodes,
-		       int __user *, status,
-		       int, flags)
-{
-	const void __user * __user *pages;
-	int i;
-
-	pages = compat_alloc_user_space(nr_pages * sizeof(void *));
-	for (i = 0; i < nr_pages; i++) {
-		compat_uptr_t p;
-
-		if (get_user(p, pages32 + i) ||
-			put_user(compat_ptr(p), pages + i))
-			return -EFAULT;
-	}
-	return kernel_move_pages(pid, nr_pages, pages, nodes, status, flags);
-}
-#endif /* CONFIG_COMPAT */
-
 #ifdef CONFIG_NUMA_BALANCING
 /*
  * Returns true if this is a safe migration target node for misplaced NUMA
@@ -2027,6 +2109,7 @@ out:
 static int numamigrate_isolate_page(pg_data_t *pgdat, struct page *page)
 {
 	int page_lru;
+	int nr_pages = thp_nr_pages(page);
 
 	VM_BUG_ON_PAGE(compound_order(page) && !PageTransHuge(page), page);
 
@@ -2035,7 +2118,7 @@ static int numamigrate_isolate_page(pg_data_t *pgdat, struct page *page)
 		return 0;
 
 	/* Avoid migrating to a node that is nearly full */
-	if (!migrate_balanced_pgdat(pgdat, compound_nr(page)))
+	if (!migrate_balanced_pgdat(pgdat, nr_pages))
 		return 0;
 
 	if (isolate_lru_page(page))
@@ -2043,7 +2126,7 @@ static int numamigrate_isolate_page(pg_data_t *pgdat, struct page *page)
 
 	page_lru = page_is_file_lru(page);
 	mod_node_page_state(page_pgdat(page), NR_ISOLATED_ANON + page_lru,
-				thp_nr_pages(page));
+			    nr_pages);
 
 	/*
 	 * Isolating the page has taken another reference, so the
@@ -2103,7 +2186,7 @@ int migrate_misplaced_page(struct page *page, struct vm_area_struct *vma,
 
 	list_add(&page->lru, &migratepages);
 	nr_remaining = migrate_pages(&migratepages, *new, NULL, node,
-				     MIGRATE_ASYNC, MR_NUMA_MISPLACED);
+				     MIGRATE_ASYNC, MR_NUMA_MISPLACED, NULL);
 	if (nr_remaining) {
 		if (!list_empty(&migratepages)) {
 			list_del(&page->lru);
@@ -2982,3 +3065,232 @@ void migrate_vma_finalize(struct migrate_vma *migrate)
 }
 EXPORT_SYMBOL(migrate_vma_finalize);
 #endif /* CONFIG_DEVICE_PRIVATE */
+
+#if defined(CONFIG_MEMORY_HOTPLUG)
+/* Disable reclaim-based migration. */
+static void __disable_all_migrate_targets(void)
+{
+	int node;
+
+	for_each_online_node(node)
+		node_demotion[node] = NUMA_NO_NODE;
+}
+
+static void disable_all_migrate_targets(void)
+{
+	__disable_all_migrate_targets();
+
+	/*
+	 * Ensure that the "disable" is visible across the system.
+	 * Readers will see either a combination of before+disable
+	 * state or disable+after.  They will never see before and
+	 * after state together.
+	 *
+	 * The before+after state together might have cycles and
+	 * could cause readers to do things like loop until this
+	 * function finishes.  This ensures they can only see a
+	 * single "bad" read and would, for instance, only loop
+	 * once.
+	 */
+	synchronize_rcu();
+}
+
+/*
+ * Find an automatic demotion target for 'node'.
+ * Failing here is OK.  It might just indicate
+ * being at the end of a chain.
+ */
+static int establish_migrate_target(int node, nodemask_t *used)
+{
+	int migration_target;
+
+	/*
+	 * Can not set a migration target on a
+	 * node with it already set.
+	 *
+	 * No need for READ_ONCE() here since this
+	 * in the write path for node_demotion[].
+	 * This should be the only thread writing.
+	 */
+	if (node_demotion[node] != NUMA_NO_NODE)
+		return NUMA_NO_NODE;
+
+	migration_target = find_next_best_node(node, used);
+	if (migration_target == NUMA_NO_NODE)
+		return NUMA_NO_NODE;
+
+	node_demotion[node] = migration_target;
+
+	return migration_target;
+}
+
+/*
+ * When memory fills up on a node, memory contents can be
+ * automatically migrated to another node instead of
+ * discarded at reclaim.
+ *
+ * Establish a "migration path" which will start at nodes
+ * with CPUs and will follow the priorities used to build the
+ * page allocator zonelists.
+ *
+ * The difference here is that cycles must be avoided.  If
+ * node0 migrates to node1, then neither node1, nor anything
+ * node1 migrates to can migrate to node0.
+ *
+ * This function can run simultaneously with readers of
+ * node_demotion[].  However, it can not run simultaneously
+ * with itself.  Exclusion is provided by memory hotplug events
+ * being single-threaded.
+ */
+static void __set_migration_target_nodes(void)
+{
+	nodemask_t next_pass	= NODE_MASK_NONE;
+	nodemask_t this_pass	= NODE_MASK_NONE;
+	nodemask_t used_targets = NODE_MASK_NONE;
+	int node;
+
+	/*
+	 * Avoid any oddities like cycles that could occur
+	 * from changes in the topology.  This will leave
+	 * a momentary gap when migration is disabled.
+	 */
+	disable_all_migrate_targets();
+
+	/*
+	 * Allocations go close to CPUs, first.  Assume that
+	 * the migration path starts at the nodes with CPUs.
+	 */
+	next_pass = node_states[N_CPU];
+again:
+	this_pass = next_pass;
+	next_pass = NODE_MASK_NONE;
+	/*
+	 * To avoid cycles in the migration "graph", ensure
+	 * that migration sources are not future targets by
+	 * setting them in 'used_targets'.  Do this only
+	 * once per pass so that multiple source nodes can
+	 * share a target node.
+	 *
+	 * 'used_targets' will become unavailable in future
+	 * passes.  This limits some opportunities for
+	 * multiple source nodes to share a destination.
+	 */
+	nodes_or(used_targets, used_targets, this_pass);
+	for_each_node_mask(node, this_pass) {
+		int target_node = establish_migrate_target(node, &used_targets);
+
+		if (target_node == NUMA_NO_NODE)
+			continue;
+
+		/*
+		 * Visit targets from this pass in the next pass.
+		 * Eventually, every node will have been part of
+		 * a pass, and will become set in 'used_targets'.
+		 */
+		node_set(target_node, next_pass);
+	}
+	/*
+	 * 'next_pass' contains nodes which became migration
+	 * targets in this pass.  Make additional passes until
+	 * no more migrations targets are available.
+	 */
+	if (!nodes_empty(next_pass))
+		goto again;
+}
+
+/*
+ * For callers that do not hold get_online_mems() already.
+ */
+static void set_migration_target_nodes(void)
+{
+	get_online_mems();
+	__set_migration_target_nodes();
+	put_online_mems();
+}
+
+/*
+ * React to hotplug events that might affect the migration targets
+ * like events that online or offline NUMA nodes.
+ *
+ * The ordering is also currently dependent on which nodes have
+ * CPUs.  That means we need CPU on/offline notification too.
+ */
+static int migration_online_cpu(unsigned int cpu)
+{
+	set_migration_target_nodes();
+	return 0;
+}
+
+static int migration_offline_cpu(unsigned int cpu)
+{
+	set_migration_target_nodes();
+	return 0;
+}
+
+/*
+ * This leaves migrate-on-reclaim transiently disabled between
+ * the MEM_GOING_OFFLINE and MEM_OFFLINE events.  This runs
+ * whether reclaim-based migration is enabled or not, which
+ * ensures that the user can turn reclaim-based migration at
+ * any time without needing to recalculate migration targets.
+ *
+ * These callbacks already hold get_online_mems().  That is why
+ * __set_migration_target_nodes() can be used as opposed to
+ * set_migration_target_nodes().
+ */
+static int __meminit migrate_on_reclaim_callback(struct notifier_block *self,
+						 unsigned long action, void *arg)
+{
+	switch (action) {
+	case MEM_GOING_OFFLINE:
+		/*
+		 * Make sure there are not transient states where
+		 * an offline node is a migration target.  This
+		 * will leave migration disabled until the offline
+		 * completes and the MEM_OFFLINE case below runs.
+		 */
+		disable_all_migrate_targets();
+		break;
+	case MEM_OFFLINE:
+	case MEM_ONLINE:
+		/*
+		 * Recalculate the target nodes once the node
+		 * reaches its final state (online or offline).
+		 */
+		__set_migration_target_nodes();
+		break;
+	case MEM_CANCEL_OFFLINE:
+		/*
+		 * MEM_GOING_OFFLINE disabled all the migration
+		 * targets.  Reenable them.
+		 */
+		__set_migration_target_nodes();
+		break;
+	case MEM_GOING_ONLINE:
+	case MEM_CANCEL_ONLINE:
+		break;
+	}
+
+	return notifier_from_errno(0);
+}
+
+static int __init migrate_on_reclaim_init(void)
+{
+	int ret;
+
+	ret = cpuhp_setup_state(CPUHP_AP_ONLINE_DYN, "migrate on reclaim",
+				migration_online_cpu,
+				migration_offline_cpu);
+	/*
+	 * In the unlikely case that this fails, the automatic
+	 * migration targets may become suboptimal for nodes
+	 * where N_CPU changes.  With such a small impact in a
+	 * rare case, do not bother trying to do anything special.
+	 */
+	WARN_ON(ret < 0);
+
+	hotplug_memory_notifier(migrate_on_reclaim_callback, 100);
+	return 0;
+}
+late_initcall(migrate_on_reclaim_init);
+#endif /* CONFIG_MEMORY_HOTPLUG */
diff --git a/mm/mmap.c b/mm/mmap.c
index 181a113b545d..88dcc5c25225 100644
--- a/mm/mmap.c
+++ b/mm/mmap.c
@@ -148,8 +148,6 @@ void vma_set_page_prot(struct vm_area_struct *vma)
 static void __remove_shared_vm_struct(struct vm_area_struct *vma,
 		struct file *file, struct address_space *mapping)
 {
-	if (vma->vm_flags & VM_DENYWRITE)
-		allow_write_access(file);
 	if (vma->vm_flags & VM_SHARED)
 		mapping_unmap_writable(mapping);
 
@@ -534,6 +532,7 @@ static int find_vma_links(struct mm_struct *mm, unsigned long addr,
 {
 	struct rb_node **__rb_link, *__rb_parent, *rb_prev;
 
+	mmap_assert_locked(mm);
 	__rb_link = &mm->mm_rb.rb_node;
 	rb_prev = __rb_parent = NULL;
 
@@ -666,8 +665,6 @@ static void __vma_link_file(struct vm_area_struct *vma)
 	if (file) {
 		struct address_space *mapping = file->f_mapping;
 
-		if (vma->vm_flags & VM_DENYWRITE)
-			put_write_access(file_inode(file));
 		if (vma->vm_flags & VM_SHARED)
 			mapping_allow_writable(mapping);
 
@@ -1624,8 +1621,6 @@ unsigned long ksys_mmap_pgoff(unsigned long addr, unsigned long len,
 			return PTR_ERR(file);
 	}
 
-	flags &= ~MAP_DENYWRITE;
-
 	retval = vm_mmap_pgoff(file, addr, len, prot, flags, pgoff);
 out_fput:
 	if (file)
@@ -1782,22 +1777,12 @@ unsigned long mmap_region(struct file *file, unsigned long addr,
 	vma->vm_pgoff = pgoff;
 
 	if (file) {
-		if (vm_flags & VM_DENYWRITE) {
-			error = deny_write_access(file);
-			if (error)
-				goto free_vma;
-		}
 		if (vm_flags & VM_SHARED) {
 			error = mapping_map_writable(file->f_mapping);
 			if (error)
-				goto allow_write_and_free_vma;
+				goto free_vma;
 		}
 
-		/* ->mmap() can change vma->vm_file, but must guarantee that
-		 * vma_link() below can deny write-access if VM_DENYWRITE is set
-		 * and map writably if VM_SHARED is set. This usually means the
-		 * new file must not have been exposed to user-space, yet.
-		 */
 		vma->vm_file = get_file(file);
 		error = call_mmap(file, vma);
 		if (error)
@@ -1854,13 +1839,9 @@ unsigned long mmap_region(struct file *file, unsigned long addr,
 
 	vma_link(mm, vma, prev, rb_link, rb_parent);
 	/* Once vma denies write, undo our temporary denial count */
-	if (file) {
 unmap_writable:
-		if (vm_flags & VM_SHARED)
-			mapping_unmap_writable(file->f_mapping);
-		if (vm_flags & VM_DENYWRITE)
-			allow_write_access(file);
-	}
+	if (file && vm_flags & VM_SHARED)
+		mapping_unmap_writable(file->f_mapping);
 	file = vma->vm_file;
 out:
 	perf_event_mmap(vma);
@@ -1900,9 +1881,6 @@ unmap_and_free_vma:
 	charged = 0;
 	if (vm_flags & VM_SHARED)
 		mapping_unmap_writable(file->f_mapping);
-allow_write_and_free_vma:
-	if (vm_flags & VM_DENYWRITE)
-		allow_write_access(file);
 free_vma:
 	vm_area_free(vma);
 unacct_error:
@@ -2297,6 +2275,7 @@ struct vm_area_struct *find_vma(struct mm_struct *mm, unsigned long addr)
 	struct rb_node *rb_node;
 	struct vm_area_struct *vma;
 
+	mmap_assert_locked(mm);
 	/* Check the cache first. */
 	vma = vmacache_find(mm, addr);
 	if (likely(vma))
@@ -2986,14 +2965,11 @@ SYSCALL_DEFINE5(remap_file_pages, unsigned long, start, unsigned long, size,
 	if (mmap_write_lock_killable(mm))
 		return -EINTR;
 
-	vma = find_vma(mm, start);
+	vma = vma_lookup(mm, start);
 
 	if (!vma || !(vma->vm_flags & VM_SHARED))
 		goto out;
 
-	if (start < vma->vm_start)
-		goto out;
-
 	if (start + size > vma->vm_end) {
 		struct vm_area_struct *next;
 
diff --git a/mm/mremap.c b/mm/mremap.c
index 5989d3990020..badfe17ade1f 100644
--- a/mm/mremap.c
+++ b/mm/mremap.c
@@ -686,7 +686,7 @@ static unsigned long move_vma(struct vm_area_struct *vma,
 	if (do_munmap(mm, old_addr, old_len, uf_unmap) < 0) {
 		/* OOM: unable to split vma, just get accounts right */
 		if (vm_flags & VM_ACCOUNT && !(flags & MREMAP_DONTUNMAP))
-			vm_acct_memory(new_len >> PAGE_SHIFT);
+			vm_acct_memory(old_len >> PAGE_SHIFT);
 		excess = 0;
 	}
 
diff --git a/mm/nommu.c b/mm/nommu.c
index 9d0ad98f838c..02d2427b8f9e 100644
--- a/mm/nommu.c
+++ b/mm/nommu.c
@@ -1293,8 +1293,6 @@ unsigned long ksys_mmap_pgoff(unsigned long addr, unsigned long len,
 			goto out;
 	}
 
-	flags &= ~MAP_DENYWRITE;
-
 	retval = vm_mmap_pgoff(file, addr, len, prot, flags, pgoff);
 
 	if (file)
diff --git a/mm/oom_kill.c b/mm/oom_kill.c
index c729a4c4a1ac..831340e7ad8b 100644
--- a/mm/oom_kill.c
+++ b/mm/oom_kill.c
@@ -28,6 +28,7 @@
 #include <linux/sched/task.h>
 #include <linux/sched/debug.h>
 #include <linux/swap.h>
+#include <linux/syscalls.h>
 #include <linux/timex.h>
 #include <linux/jiffies.h>
 #include <linux/cpuset.h>
@@ -1141,3 +1142,72 @@ void pagefault_out_of_memory(void)
 	out_of_memory(&oc);
 	mutex_unlock(&oom_lock);
 }
+
+SYSCALL_DEFINE2(process_mrelease, int, pidfd, unsigned int, flags)
+{
+#ifdef CONFIG_MMU
+	struct mm_struct *mm = NULL;
+	struct task_struct *task;
+	struct task_struct *p;
+	unsigned int f_flags;
+	bool reap = true;
+	struct pid *pid;
+	long ret = 0;
+
+	if (flags)
+		return -EINVAL;
+
+	pid = pidfd_get_pid(pidfd, &f_flags);
+	if (IS_ERR(pid))
+		return PTR_ERR(pid);
+
+	task = get_pid_task(pid, PIDTYPE_TGID);
+	if (!task) {
+		ret = -ESRCH;
+		goto put_pid;
+	}
+
+	/*
+	 * Make sure to choose a thread which still has a reference to mm
+	 * during the group exit
+	 */
+	p = find_lock_task_mm(task);
+	if (!p) {
+		ret = -ESRCH;
+		goto put_task;
+	}
+
+	mm = p->mm;
+	mmgrab(mm);
+
+	/* If the work has been done already, just exit with success */
+	if (test_bit(MMF_OOM_SKIP, &mm->flags))
+		reap = false;
+	else if (!task_will_free_mem(p)) {
+		reap = false;
+		ret = -EINVAL;
+	}
+	task_unlock(p);
+
+	if (!reap)
+		goto drop_mm;
+
+	if (mmap_read_lock_killable(mm)) {
+		ret = -EINTR;
+		goto drop_mm;
+	}
+	if (!__oom_reap_task_mm(mm))
+		ret = -EAGAIN;
+	mmap_read_unlock(mm);
+
+drop_mm:
+	mmdrop(mm);
+put_task:
+	put_task_struct(task);
+put_pid:
+	put_pid(pid);
+	return ret;
+#else
+	return -ENOSYS;
+#endif /* CONFIG_MMU */
+}
diff --git a/mm/page-writeback.c b/mm/page-writeback.c
index c12f67cbfa19..4812a17b288c 100644
--- a/mm/page-writeback.c
+++ b/mm/page-writeback.c
@@ -183,7 +183,7 @@ static struct fprop_local_percpu *wb_memcg_completions(struct bdi_writeback *wb)
 static void wb_min_max_ratio(struct bdi_writeback *wb,
 			     unsigned long *minp, unsigned long *maxp)
 {
-	unsigned long this_bw = wb->avg_write_bandwidth;
+	unsigned long this_bw = READ_ONCE(wb->avg_write_bandwidth);
 	unsigned long tot_bw = atomic_long_read(&wb->bdi->tot_write_bandwidth);
 	unsigned long long min = wb->bdi->min_ratio;
 	unsigned long long max = wb->bdi->max_ratio;
@@ -892,7 +892,7 @@ static long long pos_ratio_polynom(unsigned long setpoint,
 static void wb_position_ratio(struct dirty_throttle_control *dtc)
 {
 	struct bdi_writeback *wb = dtc->wb;
-	unsigned long write_bw = wb->avg_write_bandwidth;
+	unsigned long write_bw = READ_ONCE(wb->avg_write_bandwidth);
 	unsigned long freerun = dirty_freerun_ceiling(dtc->thresh, dtc->bg_thresh);
 	unsigned long limit = hard_dirty_limit(dtc_dom(dtc), dtc->thresh);
 	unsigned long wb_thresh = dtc->wb_thresh;
@@ -1115,7 +1115,7 @@ out:
 					&wb->bdi->tot_write_bandwidth) <= 0);
 	}
 	wb->write_bandwidth = bw;
-	wb->avg_write_bandwidth = avg;
+	WRITE_ONCE(wb->avg_write_bandwidth, avg);
 }
 
 static void update_dirty_limit(struct dirty_throttle_control *dtc)
@@ -1147,8 +1147,8 @@ update:
 	dom->dirty_limit = limit;
 }
 
-static void domain_update_bandwidth(struct dirty_throttle_control *dtc,
-				    unsigned long now)
+static void domain_update_dirty_limit(struct dirty_throttle_control *dtc,
+				      unsigned long now)
 {
 	struct wb_domain *dom = dtc_dom(dtc);
 
@@ -1324,7 +1324,7 @@ static void wb_update_dirty_ratelimit(struct dirty_throttle_control *dtc,
 	else
 		dirty_ratelimit -= step;
 
-	wb->dirty_ratelimit = max(dirty_ratelimit, 1UL);
+	WRITE_ONCE(wb->dirty_ratelimit, max(dirty_ratelimit, 1UL));
 	wb->balanced_dirty_ratelimit = balanced_dirty_ratelimit;
 
 	trace_bdi_dirty_ratelimit(wb, dirty_rate, task_ratelimit);
@@ -1332,35 +1332,28 @@ static void wb_update_dirty_ratelimit(struct dirty_throttle_control *dtc,
 
 static void __wb_update_bandwidth(struct dirty_throttle_control *gdtc,
 				  struct dirty_throttle_control *mdtc,
-				  unsigned long start_time,
 				  bool update_ratelimit)
 {
 	struct bdi_writeback *wb = gdtc->wb;
 	unsigned long now = jiffies;
-	unsigned long elapsed = now - wb->bw_time_stamp;
+	unsigned long elapsed;
 	unsigned long dirtied;
 	unsigned long written;
 
-	lockdep_assert_held(&wb->list_lock);
+	spin_lock(&wb->list_lock);
 
 	/*
-	 * rate-limit, only update once every 200ms.
+	 * Lockless checks for elapsed time are racy and delayed update after
+	 * IO completion doesn't do it at all (to make sure written pages are
+	 * accounted reasonably quickly). Make sure elapsed >= 1 to avoid
+	 * division errors.
 	 */
-	if (elapsed < BANDWIDTH_INTERVAL)
-		return;
-
+	elapsed = max(now - wb->bw_time_stamp, 1UL);
 	dirtied = percpu_counter_read(&wb->stat[WB_DIRTIED]);
 	written = percpu_counter_read(&wb->stat[WB_WRITTEN]);
 
-	/*
-	 * Skip quiet periods when disk bandwidth is under-utilized.
-	 * (at least 1s idle time between two flusher runs)
-	 */
-	if (elapsed > HZ && time_before(wb->bw_time_stamp, start_time))
-		goto snapshot;
-
 	if (update_ratelimit) {
-		domain_update_bandwidth(gdtc, now);
+		domain_update_dirty_limit(gdtc, now);
 		wb_update_dirty_ratelimit(gdtc, dirtied, elapsed);
 
 		/*
@@ -1368,23 +1361,41 @@ static void __wb_update_bandwidth(struct dirty_throttle_control *gdtc,
 		 * compiler has no way to figure that out.  Help it.
 		 */
 		if (IS_ENABLED(CONFIG_CGROUP_WRITEBACK) && mdtc) {
-			domain_update_bandwidth(mdtc, now);
+			domain_update_dirty_limit(mdtc, now);
 			wb_update_dirty_ratelimit(mdtc, dirtied, elapsed);
 		}
 	}
 	wb_update_write_bandwidth(wb, elapsed, written);
 
-snapshot:
 	wb->dirtied_stamp = dirtied;
 	wb->written_stamp = written;
-	wb->bw_time_stamp = now;
+	WRITE_ONCE(wb->bw_time_stamp, now);
+	spin_unlock(&wb->list_lock);
 }
 
-void wb_update_bandwidth(struct bdi_writeback *wb, unsigned long start_time)
+void wb_update_bandwidth(struct bdi_writeback *wb)
 {
 	struct dirty_throttle_control gdtc = { GDTC_INIT(wb) };
 
-	__wb_update_bandwidth(&gdtc, NULL, start_time, false);
+	__wb_update_bandwidth(&gdtc, NULL, false);
+}
+
+/* Interval after which we consider wb idle and don't estimate bandwidth */
+#define WB_BANDWIDTH_IDLE_JIF (HZ)
+
+static void wb_bandwidth_estimate_start(struct bdi_writeback *wb)
+{
+	unsigned long now = jiffies;
+	unsigned long elapsed = now - READ_ONCE(wb->bw_time_stamp);
+
+	if (elapsed > WB_BANDWIDTH_IDLE_JIF &&
+	    !atomic_read(&wb->writeback_inodes)) {
+		spin_lock(&wb->list_lock);
+		wb->dirtied_stamp = wb_stat(wb, WB_DIRTIED);
+		wb->written_stamp = wb_stat(wb, WB_WRITTEN);
+		WRITE_ONCE(wb->bw_time_stamp, now);
+		spin_unlock(&wb->list_lock);
+	}
 }
 
 /*
@@ -1407,7 +1418,7 @@ static unsigned long dirty_poll_interval(unsigned long dirty,
 static unsigned long wb_max_pause(struct bdi_writeback *wb,
 				  unsigned long wb_dirty)
 {
-	unsigned long bw = wb->avg_write_bandwidth;
+	unsigned long bw = READ_ONCE(wb->avg_write_bandwidth);
 	unsigned long t;
 
 	/*
@@ -1429,8 +1440,8 @@ static long wb_min_pause(struct bdi_writeback *wb,
 			 unsigned long dirty_ratelimit,
 			 int *nr_dirtied_pause)
 {
-	long hi = ilog2(wb->avg_write_bandwidth);
-	long lo = ilog2(wb->dirty_ratelimit);
+	long hi = ilog2(READ_ONCE(wb->avg_write_bandwidth));
+	long lo = ilog2(READ_ONCE(wb->dirty_ratelimit));
 	long t;		/* target pause */
 	long pause;	/* estimated next pause */
 	int pages;	/* target nr_dirtied_pause */
@@ -1710,15 +1721,12 @@ free_running:
 		if (dirty_exceeded && !wb->dirty_exceeded)
 			wb->dirty_exceeded = 1;
 
-		if (time_is_before_jiffies(wb->bw_time_stamp +
-					   BANDWIDTH_INTERVAL)) {
-			spin_lock(&wb->list_lock);
-			__wb_update_bandwidth(gdtc, mdtc, start_time, true);
-			spin_unlock(&wb->list_lock);
-		}
+		if (time_is_before_jiffies(READ_ONCE(wb->bw_time_stamp) +
+					   BANDWIDTH_INTERVAL))
+			__wb_update_bandwidth(gdtc, mdtc, true);
 
 		/* throttle according to the chosen dtc */
-		dirty_ratelimit = wb->dirty_ratelimit;
+		dirty_ratelimit = READ_ONCE(wb->dirty_ratelimit);
 		task_ratelimit = ((u64)dirty_ratelimit * sdtc->pos_ratio) >>
 							RATELIMIT_CALC_SHIFT;
 		max_pause = wb_max_pause(wb, sdtc->wb_dirty);
@@ -2345,9 +2353,12 @@ EXPORT_SYMBOL(generic_writepages);
 int do_writepages(struct address_space *mapping, struct writeback_control *wbc)
 {
 	int ret;
+	struct bdi_writeback *wb;
 
 	if (wbc->nr_to_write <= 0)
 		return 0;
+	wb = inode_to_wb_wbc(mapping->host, wbc);
+	wb_bandwidth_estimate_start(wb);
 	while (1) {
 		if (mapping->a_ops->writepages)
 			ret = mapping->a_ops->writepages(mapping, wbc);
@@ -2358,6 +2369,14 @@ int do_writepages(struct address_space *mapping, struct writeback_control *wbc)
 		cond_resched();
 		congestion_wait(BLK_RW_ASYNC, HZ/50);
 	}
+	/*
+	 * Usually few pages are written by now from those we've just submitted
+	 * but if there's constant writeback being submitted, this makes sure
+	 * writeback bandwidth is updated once in a while.
+	 */
+	if (time_is_before_jiffies(READ_ONCE(wb->bw_time_stamp) +
+				   BANDWIDTH_INTERVAL))
+		wb_update_bandwidth(wb);
 	return ret;
 }
 
@@ -2729,6 +2748,24 @@ int clear_page_dirty_for_io(struct page *page)
 }
 EXPORT_SYMBOL(clear_page_dirty_for_io);
 
+static void wb_inode_writeback_start(struct bdi_writeback *wb)
+{
+	atomic_inc(&wb->writeback_inodes);
+}
+
+static void wb_inode_writeback_end(struct bdi_writeback *wb)
+{
+	atomic_dec(&wb->writeback_inodes);
+	/*
+	 * Make sure estimate of writeback throughput gets updated after
+	 * writeback completed. We delay the update by BANDWIDTH_INTERVAL
+	 * (which is the interval other bandwidth updates use for batching) so
+	 * that if multiple inodes end writeback at a similar time, they get
+	 * batched into one bandwidth update.
+	 */
+	queue_delayed_work(bdi_wq, &wb->bw_dwork, BANDWIDTH_INTERVAL);
+}
+
 int test_clear_page_writeback(struct page *page)
 {
 	struct address_space *mapping = page_mapping(page);
@@ -2750,6 +2787,9 @@ int test_clear_page_writeback(struct page *page)
 
 				dec_wb_stat(wb, WB_WRITEBACK);
 				__wb_writeout_inc(wb);
+				if (!mapping_tagged(mapping,
+						    PAGECACHE_TAG_WRITEBACK))
+					wb_inode_writeback_end(wb);
 			}
 		}
 
@@ -2792,8 +2832,13 @@ int __test_set_page_writeback(struct page *page, bool keep_write)
 						   PAGECACHE_TAG_WRITEBACK);
 
 			xas_set_mark(&xas, PAGECACHE_TAG_WRITEBACK);
-			if (bdi->capabilities & BDI_CAP_WRITEBACK_ACCT)
-				inc_wb_stat(inode_to_wb(inode), WB_WRITEBACK);
+			if (bdi->capabilities & BDI_CAP_WRITEBACK_ACCT) {
+				struct bdi_writeback *wb = inode_to_wb(inode);
+
+				inc_wb_stat(wb, WB_WRITEBACK);
+				if (!on_wblist)
+					wb_inode_writeback_start(wb);
+			}
 
 			/*
 			 * We can come through here when swapping anonymous
diff --git a/mm/page_alloc.c b/mm/page_alloc.c
index eeb3a9cb36bb..b37435c274cf 100644
--- a/mm/page_alloc.c
+++ b/mm/page_alloc.c
@@ -594,8 +594,6 @@ static int page_outside_zone_boundaries(struct zone *zone, struct page *page)
 
 static int page_is_consistent(struct zone *zone, struct page *page)
 {
-	if (!pfn_valid_within(page_to_pfn(page)))
-		return 0;
 	if (zone != page_zone(page))
 		return 0;
 
@@ -1025,16 +1023,12 @@ buddy_merge_likely(unsigned long pfn, unsigned long buddy_pfn,
 	if (order >= MAX_ORDER - 2)
 		return false;
 
-	if (!pfn_valid_within(buddy_pfn))
-		return false;
-
 	combined_pfn = buddy_pfn & pfn;
 	higher_page = page + (combined_pfn - pfn);
 	buddy_pfn = __find_buddy_pfn(combined_pfn, order + 1);
 	higher_buddy = higher_page + (buddy_pfn - combined_pfn);
 
-	return pfn_valid_within(buddy_pfn) &&
-	       page_is_buddy(higher_page, higher_buddy, order + 1);
+	return page_is_buddy(higher_page, higher_buddy, order + 1);
 }
 
 /*
@@ -1095,8 +1089,6 @@ continue_merging:
 		buddy_pfn = __find_buddy_pfn(pfn, order);
 		buddy = page + (buddy_pfn - pfn);
 
-		if (!pfn_valid_within(buddy_pfn))
-			goto done_merging;
 		if (!page_is_buddy(page, buddy, order))
 			goto done_merging;
 		/*
@@ -1754,9 +1746,7 @@ void __init memblock_free_pages(struct page *page, unsigned long pfn,
 /*
  * Check that the whole (or subset of) a pageblock given by the interval of
  * [start_pfn, end_pfn) is valid and within the same zone, before scanning it
- * with the migration of free compaction scanner. The scanners then need to
- * use only pfn_valid_within() check for arches that allow holes within
- * pageblocks.
+ * with the migration of free compaction scanner.
  *
  * Return struct page pointer of start_pfn, or NULL if checks were not passed.
  *
@@ -1872,8 +1862,6 @@ static inline void __init pgdat_init_report_one_done(void)
  */
 static inline bool __init deferred_pfn_valid(unsigned long pfn)
 {
-	if (!pfn_valid_within(pfn))
-		return false;
 	if (!(pfn & (pageblock_nr_pages - 1)) && !pfn_valid(pfn))
 		return false;
 	return true;
@@ -2520,11 +2508,6 @@ static int move_freepages(struct zone *zone,
 	int pages_moved = 0;
 
 	for (pfn = start_pfn; pfn <= end_pfn;) {
-		if (!pfn_valid_within(pfn)) {
-			pfn++;
-			continue;
-		}
-
 		page = pfn_to_page(pfn);
 		if (!PageBuddy(page)) {
 			/*
@@ -3445,8 +3428,10 @@ void free_unref_page_list(struct list_head *list)
 	/* Prepare pages for freeing */
 	list_for_each_entry_safe(page, next, list, lru) {
 		pfn = page_to_pfn(page);
-		if (!free_unref_page_prepare(page, pfn, 0))
+		if (!free_unref_page_prepare(page, pfn, 0)) {
 			list_del(&page->lru);
+			continue;
+		}
 
 		/*
 		 * Free isolated pages directly to the allocator, see
@@ -4211,7 +4196,7 @@ static void warn_alloc_show_mem(gfp_t gfp_mask, nodemask_t *nodemask)
 		if (tsk_is_oom_victim(current) ||
 		    (current->flags & (PF_MEMALLOC | PF_EXITING)))
 			filter &= ~SHOW_MEM_FILTER_NODES;
-	if (in_interrupt() || !(gfp_mask & __GFP_DIRECT_RECLAIM))
+	if (!in_task() || !(gfp_mask & __GFP_DIRECT_RECLAIM))
 		filter &= ~SHOW_MEM_FILTER_NODES;
 
 	show_mem(filter, nodemask);
@@ -4549,14 +4534,14 @@ static bool __need_reclaim(gfp_t gfp_mask)
 	return true;
 }
 
-void __fs_reclaim_acquire(void)
+void __fs_reclaim_acquire(unsigned long ip)
 {
-	lock_map_acquire(&__fs_reclaim_map);
+	lock_acquire_exclusive(&__fs_reclaim_map, 0, 0, NULL, ip);
 }
 
-void __fs_reclaim_release(void)
+void __fs_reclaim_release(unsigned long ip)
 {
-	lock_map_release(&__fs_reclaim_map);
+	lock_release(&__fs_reclaim_map, ip);
 }
 
 void fs_reclaim_acquire(gfp_t gfp_mask)
@@ -4565,7 +4550,7 @@ void fs_reclaim_acquire(gfp_t gfp_mask)
 
 	if (__need_reclaim(gfp_mask)) {
 		if (gfp_mask & __GFP_FS)
-			__fs_reclaim_acquire();
+			__fs_reclaim_acquire(_RET_IP_);
 
 #ifdef CONFIG_MMU_NOTIFIER
 		lock_map_acquire(&__mmu_notifier_invalidate_range_start_map);
@@ -4582,7 +4567,7 @@ void fs_reclaim_release(gfp_t gfp_mask)
 
 	if (__need_reclaim(gfp_mask)) {
 		if (gfp_mask & __GFP_FS)
-			__fs_reclaim_release();
+			__fs_reclaim_release(_RET_IP_);
 	}
 }
 EXPORT_SYMBOL_GPL(fs_reclaim_release);
@@ -4697,7 +4682,7 @@ gfp_to_alloc_flags(gfp_t gfp_mask)
 		 * comment for __cpuset_node_allowed().
 		 */
 		alloc_flags &= ~ALLOC_CPUSET;
-	} else if (unlikely(rt_task(current)) && !in_interrupt())
+	} else if (unlikely(rt_task(current)) && in_task())
 		alloc_flags |= ALLOC_HARDER;
 
 	alloc_flags = gfp_to_alloc_flags_cma(gfp_mask, alloc_flags);
@@ -5157,7 +5142,7 @@ static inline bool prepare_alloc_pages(gfp_t gfp_mask, unsigned int order,
 		 * When we are in the interrupt context, it is irrelevant
 		 * to the current task context. It means that any node ok.
 		 */
-		if (!in_interrupt() && !ac->nodemask)
+		if (in_task() && !ac->nodemask)
 			ac->nodemask = &cpuset_current_mems_allowed;
 		else
 			*alloc_flags |= ALLOC_CPUSET;
@@ -5903,6 +5888,7 @@ void show_free_areas(unsigned int filter, nodemask_t *nodemask)
 		" unevictable:%lu dirty:%lu writeback:%lu\n"
 		" slab_reclaimable:%lu slab_unreclaimable:%lu\n"
 		" mapped:%lu shmem:%lu pagetables:%lu bounce:%lu\n"
+		" kernel_misc_reclaimable:%lu\n"
 		" free:%lu free_pcp:%lu free_cma:%lu\n",
 		global_node_page_state(NR_ACTIVE_ANON),
 		global_node_page_state(NR_INACTIVE_ANON),
@@ -5919,6 +5905,7 @@ void show_free_areas(unsigned int filter, nodemask_t *nodemask)
 		global_node_page_state(NR_SHMEM),
 		global_node_page_state(NR_PAGETABLE),
 		global_zone_page_state(NR_BOUNCE),
+		global_node_page_state(NR_KERNEL_MISC_RECLAIMABLE),
 		global_zone_page_state(NR_FREE_PAGES),
 		free_pcp,
 		global_zone_page_state(NR_FREE_CMA_PAGES));
@@ -6155,7 +6142,7 @@ static int node_load[MAX_NUMNODES];
  *
  * Return: node id of the found node or %NUMA_NO_NODE if no node is found.
  */
-static int find_next_best_node(int node, nodemask_t *used_node_mask)
+int find_next_best_node(int node, nodemask_t *used_node_mask)
 {
 	int n, val;
 	int min_val = INT_MAX;
@@ -6640,7 +6627,6 @@ static void __meminit zone_init_free_lists(struct zone *zone)
 	}
 }
 
-#if !defined(CONFIG_FLATMEM)
 /*
  * Only struct pages that correspond to ranges defined by memblock.memory
  * are zeroed and initialized by going through __init_single_page() during
@@ -6685,13 +6671,6 @@ static void __init init_unavailable_range(unsigned long spfn,
 		pr_info("On node %d, zone %s: %lld pages in unavailable ranges",
 			node, zone_names[zone], pgcnt);
 }
-#else
-static inline void init_unavailable_range(unsigned long spfn,
-					  unsigned long epfn,
-					  int zone, int node)
-{
-}
-#endif
 
 static void __init memmap_init_zone_range(struct zone *zone,
 					  unsigned long start_pfn,
@@ -6721,7 +6700,7 @@ static void __init memmap_init(void)
 {
 	unsigned long start_pfn, end_pfn;
 	unsigned long hole_pfn = 0;
-	int i, j, zone_id, nid;
+	int i, j, zone_id = 0, nid;
 
 	for_each_mem_pfn_range(i, MAX_NUMNODES, &start_pfn, &end_pfn, &nid) {
 		struct pglist_data *node = NODE_DATA(nid);
@@ -6754,6 +6733,26 @@ static void __init memmap_init(void)
 		init_unavailable_range(hole_pfn, end_pfn, zone_id, nid);
 }
 
+void __init *memmap_alloc(phys_addr_t size, phys_addr_t align,
+			  phys_addr_t min_addr, int nid, bool exact_nid)
+{
+	void *ptr;
+
+	if (exact_nid)
+		ptr = memblock_alloc_exact_nid_raw(size, align, min_addr,
+						   MEMBLOCK_ALLOC_ACCESSIBLE,
+						   nid);
+	else
+		ptr = memblock_alloc_try_nid_raw(size, align, min_addr,
+						 MEMBLOCK_ALLOC_ACCESSIBLE,
+						 nid);
+
+	if (ptr && size > 0)
+		page_init_poison(ptr, size);
+
+	return ptr;
+}
+
 static int zone_batchsize(struct zone *zone)
 {
 #ifdef CONFIG_MMU
@@ -7257,6 +7256,9 @@ static void __init calculate_node_totalpages(struct pglist_data *pgdat,
 			zone->zone_start_pfn = 0;
 		zone->spanned_pages = size;
 		zone->present_pages = real_size;
+#if defined(CONFIG_MEMORY_HOTPLUG)
+		zone->present_early_pages = real_size;
+#endif
 
 		totalpages += size;
 		realtotalpages += real_size;
@@ -7501,7 +7503,7 @@ static void __init free_area_init_core(struct pglist_data *pgdat)
 }
 
 #ifdef CONFIG_FLATMEM
-static void __ref alloc_node_mem_map(struct pglist_data *pgdat)
+static void __init alloc_node_mem_map(struct pglist_data *pgdat)
 {
 	unsigned long __maybe_unused start = 0;
 	unsigned long __maybe_unused offset = 0;
@@ -7525,8 +7527,8 @@ static void __ref alloc_node_mem_map(struct pglist_data *pgdat)
 		end = pgdat_end_pfn(pgdat);
 		end = ALIGN(end, MAX_ORDER_NR_PAGES);
 		size =  (end - start) * sizeof(struct page);
-		map = memblock_alloc_node(size, SMP_CACHE_BYTES,
-					  pgdat->node_id);
+		map = memmap_alloc(size, SMP_CACHE_BYTES, MEMBLOCK_LOW_LIMIT,
+				   pgdat->node_id, false);
 		if (!map)
 			panic("Failed to allocate %ld bytes for node %d memory map\n",
 			      size, pgdat->node_id);
@@ -7547,7 +7549,7 @@ static void __ref alloc_node_mem_map(struct pglist_data *pgdat)
 #endif
 }
 #else
-static void __ref alloc_node_mem_map(struct pglist_data *pgdat) { }
+static inline void alloc_node_mem_map(struct pglist_data *pgdat) { }
 #endif /* CONFIG_FLATMEM */
 
 #ifdef CONFIG_DEFERRED_STRUCT_PAGE_INIT
@@ -8814,9 +8816,6 @@ struct page *has_unmovable_pages(struct zone *zone, struct page *page,
 	}
 
 	for (; iter < pageblock_nr_pages - offset; iter++) {
-		if (!pfn_valid_within(pfn + iter))
-			continue;
-
 		page = pfn_to_page(pfn + iter);
 
 		/*
@@ -8976,7 +8975,7 @@ static int __alloc_contig_migrate_range(struct compact_control *cc,
 		cc->nr_migratepages -= nr_reclaimed;
 
 		ret = migrate_pages(&cc->migratepages, alloc_migration_target,
-				NULL, (unsigned long)&mtc, cc->mode, MR_CONTIG_RANGE);
+			NULL, (unsigned long)&mtc, cc->mode, MR_CONTIG_RANGE, NULL);
 
 		/*
 		 * On -ENOMEM, migrate_pages() bails out right away. It is pointless
diff --git a/mm/page_ext.c b/mm/page_ext.c
index 293b2685fc48..dfb91653d359 100644
--- a/mm/page_ext.c
+++ b/mm/page_ext.c
@@ -58,11 +58,21 @@
  * can utilize this callback to initialize the state of it correctly.
  */
 
+#if defined(CONFIG_PAGE_IDLE_FLAG) && !defined(CONFIG_64BIT)
+static bool need_page_idle(void)
+{
+	return true;
+}
+struct page_ext_operations page_idle_ops = {
+	.need = need_page_idle,
+};
+#endif
+
 static struct page_ext_operations *page_ext_ops[] = {
 #ifdef CONFIG_PAGE_OWNER
 	&page_owner_ops,
 #endif
-#if defined(CONFIG_IDLE_PAGE_TRACKING) && !defined(CONFIG_64BIT)
+#if defined(CONFIG_PAGE_IDLE_FLAG) && !defined(CONFIG_64BIT)
 	&page_idle_ops,
 #endif
 };
diff --git a/mm/page_idle.c b/mm/page_idle.c
index 64e5344a992c..edead6a8a5f9 100644
--- a/mm/page_idle.c
+++ b/mm/page_idle.c
@@ -207,16 +207,6 @@ static const struct attribute_group page_idle_attr_group = {
 	.name = "page_idle",
 };
 
-#ifndef CONFIG_64BIT
-static bool need_page_idle(void)
-{
-	return true;
-}
-struct page_ext_operations page_idle_ops = {
-	.need = need_page_idle,
-};
-#endif
-
 static int __init page_idle_init(void)
 {
 	int err;
diff --git a/mm/page_isolation.c b/mm/page_isolation.c
index bddf788f45bf..a95c2c6562d0 100644
--- a/mm/page_isolation.c
+++ b/mm/page_isolation.c
@@ -93,8 +93,7 @@ static void unset_migratetype_isolate(struct page *page, unsigned migratetype)
 			buddy_pfn = __find_buddy_pfn(pfn, order);
 			buddy = page + (buddy_pfn - pfn);
 
-			if (pfn_valid_within(buddy_pfn) &&
-			    !is_migrate_isolate_page(buddy)) {
+			if (!is_migrate_isolate_page(buddy)) {
 				__isolate_free_page(page, order);
 				isolated_page = true;
 			}
@@ -250,10 +249,6 @@ __test_page_isolated_in_pageblock(unsigned long pfn, unsigned long end_pfn,
 	struct page *page;
 
 	while (pfn < end_pfn) {
-		if (!pfn_valid_within(pfn)) {
-			pfn++;
-			continue;
-		}
 		page = pfn_to_page(pfn);
 		if (PageBuddy(page))
 			/*
@@ -287,6 +282,7 @@ int test_pages_isolated(unsigned long start_pfn, unsigned long end_pfn,
 	unsigned long pfn, flags;
 	struct page *page;
 	struct zone *zone;
+	int ret;
 
 	/*
 	 * Note: pageblock_nr_pages != MAX_ORDER. Then, chunks of free pages
@@ -299,15 +295,21 @@ int test_pages_isolated(unsigned long start_pfn, unsigned long end_pfn,
 			break;
 	}
 	page = __first_valid_page(start_pfn, end_pfn - start_pfn);
-	if ((pfn < end_pfn) || !page)
-		return -EBUSY;
+	if ((pfn < end_pfn) || !page) {
+		ret = -EBUSY;
+		goto out;
+	}
+
 	/* Check all pages are free or marked as ISOLATED */
 	zone = page_zone(page);
 	spin_lock_irqsave(&zone->lock, flags);
 	pfn = __test_page_isolated_in_pageblock(start_pfn, end_pfn, isol_flags);
 	spin_unlock_irqrestore(&zone->lock, flags);
 
+	ret = pfn < end_pfn ? -EBUSY : 0;
+
+out:
 	trace_test_pages_isolated(start_pfn, end_pfn, pfn);
 
-	return pfn < end_pfn ? -EBUSY : 0;
+	return ret;
 }
diff --git a/mm/page_owner.c b/mm/page_owner.c
index f51a57e92aa3..4afc713ca525 100644
--- a/mm/page_owner.c
+++ b/mm/page_owner.c
@@ -276,9 +276,6 @@ void pagetypeinfo_showmixedcount_print(struct seq_file *m,
 		pageblock_mt = get_pageblock_migratetype(page);
 
 		for (; pfn < block_end_pfn; pfn++) {
-			if (!pfn_valid_within(pfn))
-				continue;
-
 			/* The pageblock is online, no need to recheck. */
 			page = pfn_to_page(pfn);
 
@@ -354,12 +351,12 @@ print_page_owner(char __user *buf, size_t count, unsigned long pfn,
 	pageblock_mt = get_pageblock_migratetype(page);
 	page_mt  = gfp_migratetype(page_owner->gfp_mask);
 	ret += snprintf(kbuf + ret, count - ret,
-			"PFN %lu type %s Block %lu type %s Flags %#lx(%pGp)\n",
+			"PFN %lu type %s Block %lu type %s Flags %pGp\n",
 			pfn,
 			migratetype_names[page_mt],
 			pfn >> pageblock_order,
 			migratetype_names[pageblock_mt],
-			page->flags, &page->flags);
+			&page->flags);
 
 	if (ret >= count)
 		goto err;
@@ -479,10 +476,6 @@ read_page_owner(struct file *file, char __user *buf, size_t count, loff_t *ppos)
 			continue;
 		}
 
-		/* Check for holes within a MAX_ORDER area */
-		if (!pfn_valid_within(pfn))
-			continue;
-
 		page = pfn_to_page(pfn);
 		if (PageBuddy(page)) {
 			unsigned long freepage_order = buddy_order_unsafe(page);
@@ -560,14 +553,9 @@ static void init_pages_in_zone(pg_data_t *pgdat, struct zone *zone)
 		block_end_pfn = min(block_end_pfn, end_pfn);
 
 		for (; pfn < block_end_pfn; pfn++) {
-			struct page *page;
+			struct page *page = pfn_to_page(pfn);
 			struct page_ext *page_ext;
 
-			if (!pfn_valid_within(pfn))
-				continue;
-
-			page = pfn_to_page(pfn);
-
 			if (page_zone(page) != zone)
 				continue;
 
diff --git a/mm/percpu.c b/mm/percpu.c
index 7f2e0151c4e2..e0a986818903 100644
--- a/mm/percpu.c
+++ b/mm/percpu.c
@@ -146,7 +146,6 @@ static unsigned int pcpu_high_unit_cpu __ro_after_init;
 
 /* the address of the first chunk which starts with the kernel static area */
 void *pcpu_base_addr __ro_after_init;
-EXPORT_SYMBOL_GPL(pcpu_base_addr);
 
 static const int *pcpu_unit_map __ro_after_init;		/* cpu -> unit */
 const unsigned long *pcpu_unit_offsets __ro_after_init;	/* cpu -> unit offset */
@@ -1520,9 +1519,6 @@ static void pcpu_free_chunk(struct pcpu_chunk *chunk)
  * Pages in [@page_start,@page_end) have been populated to @chunk.  Update
  * the bookkeeping information accordingly.  Must be called after each
  * successful population.
- *
- * If this is @for_alloc, do not increment pcpu_nr_empty_pop_pages because it
- * is to serve an allocation in that area.
  */
 static void pcpu_chunk_populated(struct pcpu_chunk *chunk, int page_start,
 				 int page_end)
diff --git a/mm/rmap.c b/mm/rmap.c
index 2d29a57d29e8..6aebd1747251 100644
--- a/mm/rmap.c
+++ b/mm/rmap.c
@@ -1231,11 +1231,13 @@ void page_add_file_rmap(struct page *page, bool compound)
 						nr_pages);
 	} else {
 		if (PageTransCompound(page) && page_mapping(page)) {
+			struct page *head = compound_head(page);
+
 			VM_WARN_ON_ONCE(!PageLocked(page));
 
-			SetPageDoubleMap(compound_head(page));
+			SetPageDoubleMap(head);
 			if (PageMlocked(page))
-				clear_page_mlock(compound_head(page));
+				clear_page_mlock(head);
 		}
 		if (!atomic_inc_and_test(&page->_mapcount))
 			goto out;
diff --git a/mm/secretmem.c b/mm/secretmem.c
index 030f02ddc7c1..1fea68b8d5a6 100644
--- a/mm/secretmem.c
+++ b/mm/secretmem.c
@@ -18,6 +18,7 @@
 #include <linux/secretmem.h>
 #include <linux/set_memory.h>
 #include <linux/sched/signal.h>
+#include <linux/refcount.h>
 
 #include <uapi/linux/magic.h>
 
@@ -40,11 +41,11 @@ module_param_named(enable, secretmem_enable, bool, 0400);
 MODULE_PARM_DESC(secretmem_enable,
 		 "Enable secretmem and memfd_secret(2) system call");
 
-static atomic_t secretmem_users;
+static refcount_t secretmem_users;
 
 bool secretmem_active(void)
 {
-	return !!atomic_read(&secretmem_users);
+	return !!refcount_read(&secretmem_users);
 }
 
 static vm_fault_t secretmem_fault(struct vm_fault *vmf)
@@ -103,7 +104,7 @@ static const struct vm_operations_struct secretmem_vm_ops = {
 
 static int secretmem_release(struct inode *inode, struct file *file)
 {
-	atomic_dec(&secretmem_users);
+	refcount_dec(&secretmem_users);
 	return 0;
 }
 
@@ -217,7 +218,7 @@ SYSCALL_DEFINE1(memfd_secret, unsigned int, flags)
 	file->f_flags |= O_LARGEFILE;
 
 	fd_install(fd, file);
-	atomic_inc(&secretmem_users);
+	refcount_inc(&secretmem_users);
 	return fd;
 
 err_put_fd:
diff --git a/mm/shmem.c b/mm/shmem.c
index 3107acee4f71..88742953532c 100644
--- a/mm/shmem.c
+++ b/mm/shmem.c
@@ -38,8 +38,7 @@
 #include <linux/hugetlb.h>
 #include <linux/frontswap.h>
 #include <linux/fs_parser.h>
-
-#include <asm/tlbflush.h> /* for arch/microblaze update_mmu_cache() */
+#include <linux/swapfile.h>
 
 static struct vfsmount *shm_mnt;
 
@@ -137,9 +136,6 @@ static unsigned long shmem_default_max_inodes(void)
 }
 #endif
 
-static bool shmem_should_replace_page(struct page *page, gfp_t gfp);
-static int shmem_replace_page(struct page **pagep, gfp_t gfp,
-				struct shmem_inode_info *info, pgoff_t index);
 static int shmem_swapin_page(struct inode *inode, pgoff_t index,
 			     struct page **pagep, enum sgp_type sgp,
 			     gfp_t gfp, struct vm_area_struct *vma,
@@ -278,10 +274,10 @@ static int shmem_reserve_inode(struct super_block *sb, ino_t *inop)
 	ino_t ino;
 
 	if (!(sb->s_flags & SB_KERNMOUNT)) {
-		spin_lock(&sbinfo->stat_lock);
+		raw_spin_lock(&sbinfo->stat_lock);
 		if (sbinfo->max_inodes) {
 			if (!sbinfo->free_inodes) {
-				spin_unlock(&sbinfo->stat_lock);
+				raw_spin_unlock(&sbinfo->stat_lock);
 				return -ENOSPC;
 			}
 			sbinfo->free_inodes--;
@@ -304,7 +300,7 @@ static int shmem_reserve_inode(struct super_block *sb, ino_t *inop)
 			}
 			*inop = ino;
 		}
-		spin_unlock(&sbinfo->stat_lock);
+		raw_spin_unlock(&sbinfo->stat_lock);
 	} else if (inop) {
 		/*
 		 * __shmem_file_setup, one of our callers, is lock-free: it
@@ -319,13 +315,14 @@ static int shmem_reserve_inode(struct super_block *sb, ino_t *inop)
 		 * to worry about things like glibc compatibility.
 		 */
 		ino_t *next_ino;
+
 		next_ino = per_cpu_ptr(sbinfo->ino_batch, get_cpu());
 		ino = *next_ino;
 		if (unlikely(ino % SHMEM_INO_BATCH == 0)) {
-			spin_lock(&sbinfo->stat_lock);
+			raw_spin_lock(&sbinfo->stat_lock);
 			ino = sbinfo->next_ino;
 			sbinfo->next_ino += SHMEM_INO_BATCH;
-			spin_unlock(&sbinfo->stat_lock);
+			raw_spin_unlock(&sbinfo->stat_lock);
 			if (unlikely(is_zero_ino(ino)))
 				ino++;
 		}
@@ -341,9 +338,9 @@ static void shmem_free_inode(struct super_block *sb)
 {
 	struct shmem_sb_info *sbinfo = SHMEM_SB(sb);
 	if (sbinfo->max_inodes) {
-		spin_lock(&sbinfo->stat_lock);
+		raw_spin_lock(&sbinfo->stat_lock);
 		sbinfo->free_inodes++;
-		spin_unlock(&sbinfo->stat_lock);
+		raw_spin_unlock(&sbinfo->stat_lock);
 	}
 }
 
@@ -474,7 +471,38 @@ static bool shmem_confirm_swap(struct address_space *mapping,
 #ifdef CONFIG_TRANSPARENT_HUGEPAGE
 /* ifdef here to avoid bloating shmem.o when not necessary */
 
-static int shmem_huge __read_mostly;
+static int shmem_huge __read_mostly = SHMEM_HUGE_NEVER;
+
+bool shmem_is_huge(struct vm_area_struct *vma,
+		   struct inode *inode, pgoff_t index)
+{
+	loff_t i_size;
+
+	if (shmem_huge == SHMEM_HUGE_DENY)
+		return false;
+	if (vma && ((vma->vm_flags & VM_NOHUGEPAGE) ||
+	    test_bit(MMF_DISABLE_THP, &vma->vm_mm->flags)))
+		return false;
+	if (shmem_huge == SHMEM_HUGE_FORCE)
+		return true;
+
+	switch (SHMEM_SB(inode->i_sb)->huge) {
+	case SHMEM_HUGE_ALWAYS:
+		return true;
+	case SHMEM_HUGE_WITHIN_SIZE:
+		index = round_up(index, HPAGE_PMD_NR);
+		i_size = round_up(i_size_read(inode), PAGE_SIZE);
+		if (i_size >= HPAGE_PMD_SIZE && (i_size >> PAGE_SHIFT) >= index)
+			return true;
+		fallthrough;
+	case SHMEM_HUGE_ADVISE:
+		if (vma && (vma->vm_flags & VM_HUGEPAGE))
+			return true;
+		fallthrough;
+	default:
+		return false;
+	}
+}
 
 #if defined(CONFIG_SYSFS)
 static int shmem_parse_huge(const char *str)
@@ -645,6 +673,12 @@ static long shmem_unused_huge_count(struct super_block *sb,
 
 #define shmem_huge SHMEM_HUGE_DENY
 
+bool shmem_is_huge(struct vm_area_struct *vma,
+		   struct inode *inode, pgoff_t index)
+{
+	return false;
+}
+
 static unsigned long shmem_unused_huge_shrink(struct shmem_sb_info *sbinfo,
 		struct shrink_control *sc, unsigned long nr_to_split)
 {
@@ -652,15 +686,6 @@ static unsigned long shmem_unused_huge_shrink(struct shmem_sb_info *sbinfo,
 }
 #endif /* CONFIG_TRANSPARENT_HUGEPAGE */
 
-static inline bool is_huge_enabled(struct shmem_sb_info *sbinfo)
-{
-	if (IS_ENABLED(CONFIG_TRANSPARENT_HUGEPAGE) &&
-	    (shmem_huge == SHMEM_HUGE_FORCE || sbinfo->huge) &&
-	    shmem_huge != SHMEM_HUGE_DENY)
-		return true;
-	return false;
-}
-
 /*
  * Like add_to_page_cache_locked, but error if expected item has gone.
  */
@@ -905,6 +930,9 @@ static void shmem_undo_range(struct inode *inode, loff_t lstart, loff_t lend,
 	if (lend == -1)
 		end = -1;	/* unsigned, so actually very big */
 
+	if (info->fallocend > start && info->fallocend <= end && !unfalloc)
+		info->fallocend = start;
+
 	pagevec_init(&pvec);
 	index = start;
 	while (index < end && find_lock_entries(mapping, index, end - 1,
@@ -1038,7 +1066,6 @@ static int shmem_getattr(struct user_namespace *mnt_userns,
 {
 	struct inode *inode = path->dentry->d_inode;
 	struct shmem_inode_info *info = SHMEM_I(inode);
-	struct shmem_sb_info *sb_info = SHMEM_SB(inode->i_sb);
 
 	if (info->alloced - info->swapped != inode->i_mapping->nrpages) {
 		spin_lock_irq(&info->lock);
@@ -1047,7 +1074,7 @@ static int shmem_getattr(struct user_namespace *mnt_userns,
 	}
 	generic_fillattr(&init_user_ns, inode, stat);
 
-	if (is_huge_enabled(sb_info))
+	if (shmem_is_huge(NULL, inode, 0))
 		stat->blksize = HPAGE_PMD_SIZE;
 
 	return 0;
@@ -1058,7 +1085,6 @@ static int shmem_setattr(struct user_namespace *mnt_userns,
 {
 	struct inode *inode = d_inode(dentry);
 	struct shmem_inode_info *info = SHMEM_I(inode);
-	struct shmem_sb_info *sbinfo = SHMEM_SB(inode->i_sb);
 	int error;
 
 	error = setattr_prepare(&init_user_ns, dentry, attr);
@@ -1094,24 +1120,6 @@ static int shmem_setattr(struct user_namespace *mnt_userns,
 			if (oldsize > holebegin)
 				unmap_mapping_range(inode->i_mapping,
 							holebegin, 0, 1);
-
-			/*
-			 * Part of the huge page can be beyond i_size: subject
-			 * to shrink under memory pressure.
-			 */
-			if (IS_ENABLED(CONFIG_TRANSPARENT_HUGEPAGE)) {
-				spin_lock(&sbinfo->shrinklist_lock);
-				/*
-				 * _careful to defend against unlocked access to
-				 * ->shrink_list in shmem_unused_huge_shrink()
-				 */
-				if (list_empty_careful(&info->shrinklist)) {
-					list_add_tail(&info->shrinklist,
-							&sbinfo->shrinklist);
-					sbinfo->shrinklist_len++;
-				}
-				spin_unlock(&sbinfo->shrinklist_lock);
-			}
 		}
 	}
 
@@ -1156,8 +1164,6 @@ static void shmem_evict_inode(struct inode *inode)
 	clear_inode(inode);
 }
 
-extern struct swap_info_struct *swap_info[];
-
 static int shmem_find_swap_entries(struct address_space *mapping,
 				   pgoff_t start, unsigned int nr_entries,
 				   struct page **entries, pgoff_t *indices,
@@ -1338,7 +1344,19 @@ static int shmem_writepage(struct page *page, struct writeback_control *wbc)
 	swp_entry_t swap;
 	pgoff_t index;
 
-	VM_BUG_ON_PAGE(PageCompound(page), page);
+	/*
+	 * If /sys/kernel/mm/transparent_hugepage/shmem_enabled is "always" or
+	 * "force", drivers/gpu/drm/i915/gem/i915_gem_shmem.c gets huge pages,
+	 * and its shmem_writeback() needs them to be split when swapping.
+	 */
+	if (PageTransCompound(page)) {
+		/* Ensure the subpages are still dirty */
+		SetPageDirty(page);
+		if (split_huge_page(page) < 0)
+			goto redirty;
+		ClearPageDirty(page);
+	}
+
 	BUG_ON(!PageLocked(page));
 	mapping = page->mapping;
 	index = page->index;
@@ -1453,10 +1471,10 @@ static struct mempolicy *shmem_get_sbmpol(struct shmem_sb_info *sbinfo)
 {
 	struct mempolicy *mpol = NULL;
 	if (sbinfo->mpol) {
-		spin_lock(&sbinfo->stat_lock);	/* prevent replace/use races */
+		raw_spin_lock(&sbinfo->stat_lock);	/* prevent replace/use races */
 		mpol = sbinfo->mpol;
 		mpol_get(mpol);
-		spin_unlock(&sbinfo->stat_lock);
+		raw_spin_unlock(&sbinfo->stat_lock);
 	}
 	return mpol;
 }
@@ -1798,7 +1816,6 @@ static int shmem_getpage_gfp(struct inode *inode, pgoff_t index,
 	struct shmem_sb_info *sbinfo;
 	struct mm_struct *charge_mm;
 	struct page *page;
-	enum sgp_type sgp_huge = sgp;
 	pgoff_t hindex = index;
 	gfp_t huge_gfp;
 	int error;
@@ -1807,8 +1824,6 @@ static int shmem_getpage_gfp(struct inode *inode, pgoff_t index,
 
 	if (index > (MAX_LFS_FILESIZE >> PAGE_SHIFT))
 		return -EFBIG;
-	if (sgp == SGP_NOHUGE || sgp == SGP_HUGE)
-		sgp = SGP_CACHE;
 repeat:
 	if (sgp <= SGP_CACHE &&
 	    ((loff_t)index << PAGE_SHIFT) >= i_size_read(inode)) {
@@ -1840,26 +1855,31 @@ repeat:
 		return error;
 	}
 
-	if (page)
+	if (page) {
 		hindex = page->index;
-	if (page && sgp == SGP_WRITE)
-		mark_page_accessed(page);
-
-	/* fallocated page? */
-	if (page && !PageUptodate(page)) {
+		if (sgp == SGP_WRITE)
+			mark_page_accessed(page);
+		if (PageUptodate(page))
+			goto out;
+		/* fallocated page */
 		if (sgp != SGP_READ)
 			goto clear;
 		unlock_page(page);
 		put_page(page);
-		page = NULL;
-		hindex = index;
 	}
-	if (page || sgp == SGP_READ)
-		goto out;
 
 	/*
-	 * Fast cache lookup did not find it:
-	 * bring it back from swap or allocate.
+	 * SGP_READ: succeed on hole, with NULL page, letting caller zero.
+	 * SGP_NOALLOC: fail on hole, with NULL page, letting caller fail.
+	 */
+	*pagep = NULL;
+	if (sgp == SGP_READ)
+		return 0;
+	if (sgp == SGP_NOALLOC)
+		return -ENOENT;
+
+	/*
+	 * Fast cache lookup and swap lookup did not find it: allocate.
 	 */
 
 	if (vma && userfaultfd_missing(vma)) {
@@ -1867,36 +1887,12 @@ repeat:
 		return 0;
 	}
 
-	/* shmem_symlink() */
-	if (!shmem_mapping(mapping))
-		goto alloc_nohuge;
-	if (shmem_huge == SHMEM_HUGE_DENY || sgp_huge == SGP_NOHUGE)
-		goto alloc_nohuge;
-	if (shmem_huge == SHMEM_HUGE_FORCE)
-		goto alloc_huge;
-	switch (sbinfo->huge) {
-	case SHMEM_HUGE_NEVER:
+	/* Never use a huge page for shmem_symlink() */
+	if (S_ISLNK(inode->i_mode))
 		goto alloc_nohuge;
-	case SHMEM_HUGE_WITHIN_SIZE: {
-		loff_t i_size;
-		pgoff_t off;
-
-		off = round_up(index, HPAGE_PMD_NR);
-		i_size = round_up(i_size_read(inode), PAGE_SIZE);
-		if (i_size >= HPAGE_PMD_SIZE &&
-		    i_size >> PAGE_SHIFT >= off)
-			goto alloc_huge;
-
-		fallthrough;
-	}
-	case SHMEM_HUGE_ADVISE:
-		if (sgp_huge == SGP_HUGE)
-			goto alloc_huge;
-		/* TODO: implement fadvise() hints */
+	if (!shmem_is_huge(vma, inode, index))
 		goto alloc_nohuge;
-	}
 
-alloc_huge:
 	huge_gfp = vma_thp_gfp_mask(vma);
 	huge_gfp = limit_gfp_mask(huge_gfp, gfp);
 	page = shmem_alloc_and_acct_page(huge_gfp, inode, index, true);
@@ -2052,7 +2048,6 @@ static vm_fault_t shmem_fault(struct vm_fault *vmf)
 	struct vm_area_struct *vma = vmf->vma;
 	struct inode *inode = file_inode(vma->vm_file);
 	gfp_t gfp = mapping_gfp_mask(inode->i_mapping);
-	enum sgp_type sgp;
 	int err;
 	vm_fault_t ret = VM_FAULT_LOCKED;
 
@@ -2115,15 +2110,7 @@ static vm_fault_t shmem_fault(struct vm_fault *vmf)
 		spin_unlock(&inode->i_lock);
 	}
 
-	sgp = SGP_CACHE;
-
-	if ((vma->vm_flags & VM_NOHUGEPAGE) ||
-	    test_bit(MMF_DISABLE_THP, &vma->vm_mm->flags))
-		sgp = SGP_NOHUGE;
-	else if (vma->vm_flags & VM_HUGEPAGE)
-		sgp = SGP_HUGE;
-
-	err = shmem_getpage_gfp(inode, vmf->pgoff, &vmf->page, sgp,
+	err = shmem_getpage_gfp(inode, vmf->pgoff, &vmf->page, SGP_CACHE,
 				  gfp, vma, vmf, &ret);
 	if (err)
 		return vmf_error(err);
@@ -2655,7 +2642,7 @@ static long shmem_fallocate(struct file *file, int mode, loff_t offset,
 	struct shmem_sb_info *sbinfo = SHMEM_SB(inode->i_sb);
 	struct shmem_inode_info *info = SHMEM_I(inode);
 	struct shmem_falloc shmem_falloc;
-	pgoff_t start, index, end;
+	pgoff_t start, index, end, undo_fallocend;
 	int error;
 
 	if (mode & ~(FALLOC_FL_KEEP_SIZE | FALLOC_FL_PUNCH_HOLE))
@@ -2724,7 +2711,16 @@ static long shmem_fallocate(struct file *file, int mode, loff_t offset,
 	inode->i_private = &shmem_falloc;
 	spin_unlock(&inode->i_lock);
 
-	for (index = start; index < end; index++) {
+	/*
+	 * info->fallocend is only relevant when huge pages might be
+	 * involved: to prevent split_huge_page() freeing fallocated
+	 * pages when FALLOC_FL_KEEP_SIZE committed beyond i_size.
+	 */
+	undo_fallocend = info->fallocend;
+	if (info->fallocend < end)
+		info->fallocend = end;
+
+	for (index = start; index < end; ) {
 		struct page *page;
 
 		/*
@@ -2738,6 +2734,7 @@ static long shmem_fallocate(struct file *file, int mode, loff_t offset,
 		else
 			error = shmem_getpage(inode, index, &page, SGP_FALLOC);
 		if (error) {
+			info->fallocend = undo_fallocend;
 			/* Remove the !PageUptodate pages we added */
 			if (index > start) {
 				shmem_undo_range(inode,
@@ -2747,13 +2744,26 @@ static long shmem_fallocate(struct file *file, int mode, loff_t offset,
 			goto undone;
 		}
 
+		index++;
+		/*
+		 * Here is a more important optimization than it appears:
+		 * a second SGP_FALLOC on the same huge page will clear it,
+		 * making it PageUptodate and un-undoable if we fail later.
+		 */
+		if (PageTransCompound(page)) {
+			index = round_up(index, HPAGE_PMD_NR);
+			/* Beware 32-bit wraparound */
+			if (!index)
+				index--;
+		}
+
 		/*
 		 * Inform shmem_writepage() how far we have reached.
 		 * No need for lock or barrier: we have the page lock.
 		 */
-		shmem_falloc.next++;
 		if (!PageUptodate(page))
-			shmem_falloc.nr_falloced++;
+			shmem_falloc.nr_falloced += index - shmem_falloc.next;
+		shmem_falloc.next = index;
 
 		/*
 		 * If !PageUptodate, leave it that way so that freeable pages
@@ -3488,9 +3498,10 @@ static int shmem_reconfigure(struct fs_context *fc)
 	struct shmem_options *ctx = fc->fs_private;
 	struct shmem_sb_info *sbinfo = SHMEM_SB(fc->root->d_sb);
 	unsigned long inodes;
+	struct mempolicy *mpol = NULL;
 	const char *err;
 
-	spin_lock(&sbinfo->stat_lock);
+	raw_spin_lock(&sbinfo->stat_lock);
 	inodes = sbinfo->max_inodes - sbinfo->free_inodes;
 	if ((ctx->seen & SHMEM_SEEN_BLOCKS) && ctx->blocks) {
 		if (!sbinfo->max_blocks) {
@@ -3535,14 +3546,15 @@ static int shmem_reconfigure(struct fs_context *fc)
 	 * Preserve previous mempolicy unless mpol remount option was specified.
 	 */
 	if (ctx->mpol) {
-		mpol_put(sbinfo->mpol);
+		mpol = sbinfo->mpol;
 		sbinfo->mpol = ctx->mpol;	/* transfers initial ref */
 		ctx->mpol = NULL;
 	}
-	spin_unlock(&sbinfo->stat_lock);
+	raw_spin_unlock(&sbinfo->stat_lock);
+	mpol_put(mpol);
 	return 0;
 out:
-	spin_unlock(&sbinfo->stat_lock);
+	raw_spin_unlock(&sbinfo->stat_lock);
 	return invalfc(fc, "%s", err);
 }
 
@@ -3613,7 +3625,6 @@ static int shmem_fill_super(struct super_block *sb, struct fs_context *fc)
 	struct shmem_options *ctx = fc->fs_private;
 	struct inode *inode;
 	struct shmem_sb_info *sbinfo;
-	int err = -ENOMEM;
 
 	/* Round up to L1_CACHE_BYTES to resist false sharing */
 	sbinfo = kzalloc(max((int)sizeof(struct shmem_sb_info),
@@ -3659,7 +3670,7 @@ static int shmem_fill_super(struct super_block *sb, struct fs_context *fc)
 	sbinfo->mpol = ctx->mpol;
 	ctx->mpol = NULL;
 
-	spin_lock_init(&sbinfo->stat_lock);
+	raw_spin_lock_init(&sbinfo->stat_lock);
 	if (percpu_counter_init(&sbinfo->used_blocks, 0, GFP_KERNEL))
 		goto failed;
 	spin_lock_init(&sbinfo->shrinklist_lock);
@@ -3691,7 +3702,7 @@ static int shmem_fill_super(struct super_block *sb, struct fs_context *fc)
 
 failed:
 	shmem_put_super(sb);
-	return err;
+	return -ENOMEM;
 }
 
 static int shmem_get_tree(struct fs_context *fc)
@@ -3907,7 +3918,7 @@ int __init shmem_init(void)
 	if (has_transparent_hugepage() && shmem_huge > SHMEM_HUGE_DENY)
 		SHMEM_SB(shm_mnt->mnt_sb)->huge = shmem_huge;
 	else
-		shmem_huge = 0; /* just in case it was patched */
+		shmem_huge = SHMEM_HUGE_NEVER; /* just in case it was patched */
 #endif
 	return 0;
 
@@ -3976,42 +3987,6 @@ struct kobj_attribute shmem_enabled_attr =
 	__ATTR(shmem_enabled, 0644, shmem_enabled_show, shmem_enabled_store);
 #endif /* CONFIG_TRANSPARENT_HUGEPAGE && CONFIG_SYSFS */
 
-#ifdef CONFIG_TRANSPARENT_HUGEPAGE
-bool shmem_huge_enabled(struct vm_area_struct *vma)
-{
-	struct inode *inode = file_inode(vma->vm_file);
-	struct shmem_sb_info *sbinfo = SHMEM_SB(inode->i_sb);
-	loff_t i_size;
-	pgoff_t off;
-
-	if (!transhuge_vma_enabled(vma, vma->vm_flags))
-		return false;
-	if (shmem_huge == SHMEM_HUGE_FORCE)
-		return true;
-	if (shmem_huge == SHMEM_HUGE_DENY)
-		return false;
-	switch (sbinfo->huge) {
-		case SHMEM_HUGE_NEVER:
-			return false;
-		case SHMEM_HUGE_ALWAYS:
-			return true;
-		case SHMEM_HUGE_WITHIN_SIZE:
-			off = round_up(vma->vm_pgoff, HPAGE_PMD_NR);
-			i_size = round_up(i_size_read(inode), PAGE_SIZE);
-			if (i_size >= HPAGE_PMD_SIZE &&
-					i_size >> PAGE_SHIFT >= off)
-				return true;
-			fallthrough;
-		case SHMEM_HUGE_ADVISE:
-			/* TODO: implement fadvise() hints */
-			return (vma->vm_flags & VM_HUGEPAGE);
-		default:
-			VM_BUG_ON(1);
-			return false;
-	}
-}
-#endif /* CONFIG_TRANSPARENT_HUGEPAGE */
-
 #else /* !CONFIG_SHMEM */
 
 /*
diff --git a/mm/slab_common.c b/mm/slab_common.c
index 1c673c323baf..ec2bb0beed75 100644
--- a/mm/slab_common.c
+++ b/mm/slab_common.c
@@ -502,6 +502,7 @@ void kmem_cache_destroy(struct kmem_cache *s)
 	if (unlikely(!s))
 		return;
 
+	cpus_read_lock();
 	mutex_lock(&slab_mutex);
 
 	s->refcount--;
@@ -516,6 +517,7 @@ void kmem_cache_destroy(struct kmem_cache *s)
 	}
 out_unlock:
 	mutex_unlock(&slab_mutex);
+	cpus_read_unlock();
 }
 EXPORT_SYMBOL(kmem_cache_destroy);
 
diff --git a/mm/slub.c b/mm/slub.c
index f77d8cd79ef7..f7ac28646580 100644
--- a/mm/slub.c
+++ b/mm/slub.c
@@ -46,13 +46,21 @@
 /*
  * Lock order:
  *   1. slab_mutex (Global Mutex)
- *   2. node->list_lock
- *   3. slab_lock(page) (Only on some arches and for debugging)
+ *   2. node->list_lock (Spinlock)
+ *   3. kmem_cache->cpu_slab->lock (Local lock)
+ *   4. slab_lock(page) (Only on some arches or for debugging)
+ *   5. object_map_lock (Only for debugging)
  *
  *   slab_mutex
  *
  *   The role of the slab_mutex is to protect the list of all the slabs
  *   and to synchronize major metadata changes to slab cache structures.
+ *   Also synchronizes memory hotplug callbacks.
+ *
+ *   slab_lock
+ *
+ *   The slab_lock is a wrapper around the page lock, thus it is a bit
+ *   spinlock.
  *
  *   The slab_lock is only used for debugging and on arches that do not
  *   have the ability to do a cmpxchg_double. It only protects:
@@ -61,6 +69,8 @@
  *	C. page->objects	-> Number of objects in page
  *	D. page->frozen		-> frozen state
  *
+ *   Frozen slabs
+ *
  *   If a slab is frozen then it is exempt from list management. It is not
  *   on any list except per cpu partial list. The processor that froze the
  *   slab is the one who can perform list operations on the page. Other
@@ -68,6 +78,8 @@
  *   froze the slab is the only one that can retrieve the objects from the
  *   page's freelist.
  *
+ *   list_lock
+ *
  *   The list_lock protects the partial and full list on each node and
  *   the partial slab counter. If taken then no new slabs may be added or
  *   removed from the lists nor make the number of partial slabs be modified.
@@ -79,10 +91,36 @@
  *   slabs, operations can continue without any centralized lock. F.e.
  *   allocating a long series of objects that fill up slabs does not require
  *   the list lock.
- *   Interrupts are disabled during allocation and deallocation in order to
- *   make the slab allocator safe to use in the context of an irq. In addition
- *   interrupts are disabled to ensure that the processor does not change
- *   while handling per_cpu slabs, due to kernel preemption.
+ *
+ *   cpu_slab->lock local lock
+ *
+ *   This locks protect slowpath manipulation of all kmem_cache_cpu fields
+ *   except the stat counters. This is a percpu structure manipulated only by
+ *   the local cpu, so the lock protects against being preempted or interrupted
+ *   by an irq. Fast path operations rely on lockless operations instead.
+ *   On PREEMPT_RT, the local lock does not actually disable irqs (and thus
+ *   prevent the lockless operations), so fastpath operations also need to take
+ *   the lock and are no longer lockless.
+ *
+ *   lockless fastpaths
+ *
+ *   The fast path allocation (slab_alloc_node()) and freeing (do_slab_free())
+ *   are fully lockless when satisfied from the percpu slab (and when
+ *   cmpxchg_double is possible to use, otherwise slab_lock is taken).
+ *   They also don't disable preemption or migration or irqs. They rely on
+ *   the transaction id (tid) field to detect being preempted or moved to
+ *   another cpu.
+ *
+ *   irq, preemption, migration considerations
+ *
+ *   Interrupts are disabled as part of list_lock or local_lock operations, or
+ *   around the slab_lock operation, in order to make the slab allocator safe
+ *   to use in the context of an irq.
+ *
+ *   In addition, preemption (or migration on PREEMPT_RT) is disabled in the
+ *   allocation slowpath, bulk allocation, and put_cpu_partial(), so that the
+ *   local cpu doesn't change in the process and e.g. the kmem_cache_cpu pointer
+ *   doesn't have to be revalidated in each section protected by the local lock.
  *
  * SLUB assigns one slab for allocation to each processor.
  * Allocations only occur from these slabs called cpu slabs.
@@ -118,6 +156,26 @@
  * 			the fast path and disables lockless freelists.
  */
 
+/*
+ * We could simply use migrate_disable()/enable() but as long as it's a
+ * function call even on !PREEMPT_RT, use inline preempt_disable() there.
+ */
+#ifndef CONFIG_PREEMPT_RT
+#define slub_get_cpu_ptr(var)	get_cpu_ptr(var)
+#define slub_put_cpu_ptr(var)	put_cpu_ptr(var)
+#else
+#define slub_get_cpu_ptr(var)		\
+({					\
+	migrate_disable();		\
+	this_cpu_ptr(var);		\
+})
+#define slub_put_cpu_ptr(var)		\
+do {					\
+	(void)(var);			\
+	migrate_enable();		\
+} while (0)
+#endif
+
 #ifdef CONFIG_SLUB_DEBUG
 #ifdef CONFIG_SLUB_DEBUG_ON
 DEFINE_STATIC_KEY_TRUE(slub_debug_enabled);
@@ -359,25 +417,44 @@ static inline unsigned int oo_objects(struct kmem_cache_order_objects x)
 /*
  * Per slab locking using the pagelock
  */
-static __always_inline void slab_lock(struct page *page)
+static __always_inline void __slab_lock(struct page *page)
 {
 	VM_BUG_ON_PAGE(PageTail(page), page);
 	bit_spin_lock(PG_locked, &page->flags);
 }
 
-static __always_inline void slab_unlock(struct page *page)
+static __always_inline void __slab_unlock(struct page *page)
 {
 	VM_BUG_ON_PAGE(PageTail(page), page);
 	__bit_spin_unlock(PG_locked, &page->flags);
 }
 
-/* Interrupts must be disabled (for the fallback code to work right) */
+static __always_inline void slab_lock(struct page *page, unsigned long *flags)
+{
+	if (IS_ENABLED(CONFIG_PREEMPT_RT))
+		local_irq_save(*flags);
+	__slab_lock(page);
+}
+
+static __always_inline void slab_unlock(struct page *page, unsigned long *flags)
+{
+	__slab_unlock(page);
+	if (IS_ENABLED(CONFIG_PREEMPT_RT))
+		local_irq_restore(*flags);
+}
+
+/*
+ * Interrupts must be disabled (for the fallback code to work right), typically
+ * by an _irqsave() lock variant. Except on PREEMPT_RT where locks are different
+ * so we disable interrupts as part of slab_[un]lock().
+ */
 static inline bool __cmpxchg_double_slab(struct kmem_cache *s, struct page *page,
 		void *freelist_old, unsigned long counters_old,
 		void *freelist_new, unsigned long counters_new,
 		const char *n)
 {
-	VM_BUG_ON(!irqs_disabled());
+	if (!IS_ENABLED(CONFIG_PREEMPT_RT))
+		lockdep_assert_irqs_disabled();
 #if defined(CONFIG_HAVE_CMPXCHG_DOUBLE) && \
     defined(CONFIG_HAVE_ALIGNED_STRUCT_PAGE)
 	if (s->flags & __CMPXCHG_DOUBLE) {
@@ -388,15 +465,18 @@ static inline bool __cmpxchg_double_slab(struct kmem_cache *s, struct page *page
 	} else
 #endif
 	{
-		slab_lock(page);
+		/* init to 0 to prevent spurious warnings */
+		unsigned long flags = 0;
+
+		slab_lock(page, &flags);
 		if (page->freelist == freelist_old &&
 					page->counters == counters_old) {
 			page->freelist = freelist_new;
 			page->counters = counters_new;
-			slab_unlock(page);
+			slab_unlock(page, &flags);
 			return true;
 		}
-		slab_unlock(page);
+		slab_unlock(page, &flags);
 	}
 
 	cpu_relax();
@@ -427,16 +507,16 @@ static inline bool cmpxchg_double_slab(struct kmem_cache *s, struct page *page,
 		unsigned long flags;
 
 		local_irq_save(flags);
-		slab_lock(page);
+		__slab_lock(page);
 		if (page->freelist == freelist_old &&
 					page->counters == counters_old) {
 			page->freelist = freelist_new;
 			page->counters = counters_new;
-			slab_unlock(page);
+			__slab_unlock(page);
 			local_irq_restore(flags);
 			return true;
 		}
-		slab_unlock(page);
+		__slab_unlock(page);
 		local_irq_restore(flags);
 	}
 
@@ -452,7 +532,19 @@ static inline bool cmpxchg_double_slab(struct kmem_cache *s, struct page *page,
 
 #ifdef CONFIG_SLUB_DEBUG
 static unsigned long object_map[BITS_TO_LONGS(MAX_OBJS_PER_PAGE)];
-static DEFINE_SPINLOCK(object_map_lock);
+static DEFINE_RAW_SPINLOCK(object_map_lock);
+
+static void __fill_map(unsigned long *obj_map, struct kmem_cache *s,
+		       struct page *page)
+{
+	void *addr = page_address(page);
+	void *p;
+
+	bitmap_zero(obj_map, page->objects);
+
+	for (p = page->freelist; p; p = get_freepointer(s, p))
+		set_bit(__obj_to_index(s, addr, p), obj_map);
+}
 
 #if IS_ENABLED(CONFIG_KUNIT)
 static bool slab_add_kunit_errors(void)
@@ -483,17 +575,11 @@ static inline bool slab_add_kunit_errors(void) { return false; }
 static unsigned long *get_map(struct kmem_cache *s, struct page *page)
 	__acquires(&object_map_lock)
 {
-	void *p;
-	void *addr = page_address(page);
-
 	VM_BUG_ON(!irqs_disabled());
 
-	spin_lock(&object_map_lock);
+	raw_spin_lock(&object_map_lock);
 
-	bitmap_zero(object_map, page->objects);
-
-	for (p = page->freelist; p; p = get_freepointer(s, p))
-		set_bit(__obj_to_index(s, addr, p), object_map);
+	__fill_map(object_map, s, page);
 
 	return object_map;
 }
@@ -501,7 +587,7 @@ static unsigned long *get_map(struct kmem_cache *s, struct page *page)
 static void put_map(unsigned long *map) __releases(&object_map_lock)
 {
 	VM_BUG_ON(map != object_map);
-	spin_unlock(&object_map_lock);
+	raw_spin_unlock(&object_map_lock);
 }
 
 static inline unsigned int size_from_object(struct kmem_cache *s)
@@ -677,9 +763,9 @@ void print_tracking(struct kmem_cache *s, void *object)
 
 static void print_page_info(struct page *page)
 {
-	pr_err("Slab 0x%p objects=%u used=%u fp=0x%p flags=%#lx(%pGp)\n",
+	pr_err("Slab 0x%p objects=%u used=%u fp=0x%p flags=%pGp\n",
 	       page, page->objects, page->inuse, page->freelist,
-	       page->flags, &page->flags);
+	       &page->flags);
 
 }
 
@@ -1003,8 +1089,6 @@ static int check_slab(struct kmem_cache *s, struct page *page)
 {
 	int maxobj;
 
-	VM_BUG_ON(!irqs_disabled());
-
 	if (!PageSlab(page)) {
 		slab_err(s, page, "Not a valid slab page");
 		return 0;
@@ -1265,11 +1349,11 @@ static noinline int free_debug_processing(
 	struct kmem_cache_node *n = get_node(s, page_to_nid(page));
 	void *object = head;
 	int cnt = 0;
-	unsigned long flags;
+	unsigned long flags, flags2;
 	int ret = 0;
 
 	spin_lock_irqsave(&n->list_lock, flags);
-	slab_lock(page);
+	slab_lock(page, &flags2);
 
 	if (s->flags & SLAB_CONSISTENCY_CHECKS) {
 		if (!check_slab(s, page))
@@ -1302,7 +1386,7 @@ out:
 		slab_err(s, page, "Bulk freelist count(%d) invalid(%d)\n",
 			 bulk_cnt, cnt);
 
-	slab_unlock(page);
+	slab_unlock(page, &flags2);
 	spin_unlock_irqrestore(&n->list_lock, flags);
 	if (!ret)
 		slab_fix(s, "Object at 0x%p not freed", object);
@@ -1585,20 +1669,8 @@ static __always_inline bool slab_free_hook(struct kmem_cache *s,
 {
 	kmemleak_free_recursive(x, s->flags);
 
-	/*
-	 * Trouble is that we may no longer disable interrupts in the fast path
-	 * So in order to make the debug calls that expect irqs to be
-	 * disabled we need to disable interrupts temporarily.
-	 */
-#ifdef CONFIG_LOCKDEP
-	{
-		unsigned long flags;
+	debug_check_no_locks_freed(x, s->object_size);
 
-		local_irq_save(flags);
-		debug_check_no_locks_freed(x, s->object_size);
-		local_irq_restore(flags);
-	}
-#endif
 	if (!(s->flags & SLAB_DEBUG_OBJECTS))
 		debug_check_no_obj_freed(x, s->object_size);
 
@@ -1815,9 +1887,6 @@ static struct page *allocate_slab(struct kmem_cache *s, gfp_t flags, int node)
 
 	flags &= gfp_allowed_mask;
 
-	if (gfpflags_allow_blocking(flags))
-		local_irq_enable();
-
 	flags |= s->allocflags;
 
 	/*
@@ -1876,8 +1945,6 @@ static struct page *allocate_slab(struct kmem_cache *s, gfp_t flags, int node)
 	page->frozen = 1;
 
 out:
-	if (gfpflags_allow_blocking(flags))
-		local_irq_disable();
 	if (!page)
 		return NULL;
 
@@ -1891,6 +1958,8 @@ static struct page *new_slab(struct kmem_cache *s, gfp_t flags, int node)
 	if (unlikely(flags & GFP_SLAB_BUG_MASK))
 		flags = kmalloc_fix_flags(flags);
 
+	WARN_ON_ONCE(s->ctor && (flags & __GFP_ZERO));
+
 	return allocate_slab(s,
 		flags & (GFP_RECLAIM_MASK | GFP_CONSTRAINT_MASK), node);
 }
@@ -2014,18 +2083,24 @@ static inline void *acquire_slab(struct kmem_cache *s,
 	return freelist;
 }
 
+#ifdef CONFIG_SLUB_CPU_PARTIAL
 static void put_cpu_partial(struct kmem_cache *s, struct page *page, int drain);
+#else
+static inline void put_cpu_partial(struct kmem_cache *s, struct page *page,
+				   int drain) { }
+#endif
 static inline bool pfmemalloc_match(struct page *page, gfp_t gfpflags);
 
 /*
  * Try to allocate a partial slab from a specific node.
  */
 static void *get_partial_node(struct kmem_cache *s, struct kmem_cache_node *n,
-				struct kmem_cache_cpu *c, gfp_t flags)
+			      struct page **ret_page, gfp_t gfpflags)
 {
 	struct page *page, *page2;
 	void *object = NULL;
 	unsigned int available = 0;
+	unsigned long flags;
 	int objects;
 
 	/*
@@ -2037,11 +2112,11 @@ static void *get_partial_node(struct kmem_cache *s, struct kmem_cache_node *n,
 	if (!n || !n->nr_partial)
 		return NULL;
 
-	spin_lock(&n->list_lock);
+	spin_lock_irqsave(&n->list_lock, flags);
 	list_for_each_entry_safe(page, page2, &n->partial, slab_list) {
 		void *t;
 
-		if (!pfmemalloc_match(page, flags))
+		if (!pfmemalloc_match(page, gfpflags))
 			continue;
 
 		t = acquire_slab(s, n, page, object == NULL, &objects);
@@ -2050,7 +2125,7 @@ static void *get_partial_node(struct kmem_cache *s, struct kmem_cache_node *n,
 
 		available += objects;
 		if (!object) {
-			c->page = page;
+			*ret_page = page;
 			stat(s, ALLOC_FROM_PARTIAL);
 			object = t;
 		} else {
@@ -2062,7 +2137,7 @@ static void *get_partial_node(struct kmem_cache *s, struct kmem_cache_node *n,
 			break;
 
 	}
-	spin_unlock(&n->list_lock);
+	spin_unlock_irqrestore(&n->list_lock, flags);
 	return object;
 }
 
@@ -2070,7 +2145,7 @@ static void *get_partial_node(struct kmem_cache *s, struct kmem_cache_node *n,
  * Get a page from somewhere. Search in increasing NUMA distances.
  */
 static void *get_any_partial(struct kmem_cache *s, gfp_t flags,
-		struct kmem_cache_cpu *c)
+			     struct page **ret_page)
 {
 #ifdef CONFIG_NUMA
 	struct zonelist *zonelist;
@@ -2112,7 +2187,7 @@ static void *get_any_partial(struct kmem_cache *s, gfp_t flags,
 
 			if (n && cpuset_zone_allowed(zone, flags) &&
 					n->nr_partial > s->min_partial) {
-				object = get_partial_node(s, n, c, flags);
+				object = get_partial_node(s, n, ret_page, flags);
 				if (object) {
 					/*
 					 * Don't check read_mems_allowed_retry()
@@ -2134,7 +2209,7 @@ static void *get_any_partial(struct kmem_cache *s, gfp_t flags,
  * Get a partial page, lock it and return it.
  */
 static void *get_partial(struct kmem_cache *s, gfp_t flags, int node,
-		struct kmem_cache_cpu *c)
+			 struct page **ret_page)
 {
 	void *object;
 	int searchnode = node;
@@ -2142,11 +2217,11 @@ static void *get_partial(struct kmem_cache *s, gfp_t flags, int node,
 	if (node == NUMA_NO_NODE)
 		searchnode = numa_mem_id();
 
-	object = get_partial_node(s, get_node(s, searchnode), c, flags);
+	object = get_partial_node(s, get_node(s, searchnode), ret_page, flags);
 	if (object || node != NUMA_NO_NODE)
 		return object;
 
-	return get_any_partial(s, flags, c);
+	return get_any_partial(s, flags, ret_page);
 }
 
 #ifdef CONFIG_PREEMPTION
@@ -2213,16 +2288,23 @@ static inline void note_cmpxchg_failure(const char *n,
 static void init_kmem_cache_cpus(struct kmem_cache *s)
 {
 	int cpu;
+	struct kmem_cache_cpu *c;
 
-	for_each_possible_cpu(cpu)
-		per_cpu_ptr(s->cpu_slab, cpu)->tid = init_tid(cpu);
+	for_each_possible_cpu(cpu) {
+		c = per_cpu_ptr(s->cpu_slab, cpu);
+		local_lock_init(&c->lock);
+		c->tid = init_tid(cpu);
+	}
 }
 
 /*
- * Remove the cpu slab
+ * Finishes removing the cpu slab. Merges cpu's freelist with page's freelist,
+ * unfreezes the slabs and puts it on the proper list.
+ * Assumes the slab has been already safely taken away from kmem_cache_cpu
+ * by the caller.
  */
 static void deactivate_slab(struct kmem_cache *s, struct page *page,
-				void *freelist, struct kmem_cache_cpu *c)
+			    void *freelist)
 {
 	enum slab_modes { M_NONE, M_PARTIAL, M_FULL, M_FREE };
 	struct kmem_cache_node *n = get_node(s, page_to_nid(page));
@@ -2230,6 +2312,7 @@ static void deactivate_slab(struct kmem_cache *s, struct page *page,
 	enum slab_modes l = M_NONE, m = M_NONE;
 	void *nextfree, *freelist_iter, *freelist_tail;
 	int tail = DEACTIVATE_TO_HEAD;
+	unsigned long flags = 0;
 	struct page new;
 	struct page old;
 
@@ -2305,7 +2388,7 @@ redo:
 			 * that acquire_slab() will see a slab page that
 			 * is frozen
 			 */
-			spin_lock(&n->list_lock);
+			spin_lock_irqsave(&n->list_lock, flags);
 		}
 	} else {
 		m = M_FULL;
@@ -2316,7 +2399,7 @@ redo:
 			 * slabs from diagnostic functions will not see
 			 * any frozen slabs.
 			 */
-			spin_lock(&n->list_lock);
+			spin_lock_irqsave(&n->list_lock, flags);
 		}
 	}
 
@@ -2333,14 +2416,14 @@ redo:
 	}
 
 	l = m;
-	if (!__cmpxchg_double_slab(s, page,
+	if (!cmpxchg_double_slab(s, page,
 				old.freelist, old.counters,
 				new.freelist, new.counters,
 				"unfreezing slab"))
 		goto redo;
 
 	if (lock)
-		spin_unlock(&n->list_lock);
+		spin_unlock_irqrestore(&n->list_lock, flags);
 
 	if (m == M_PARTIAL)
 		stat(s, tail);
@@ -2351,38 +2434,29 @@ redo:
 		discard_slab(s, page);
 		stat(s, FREE_SLAB);
 	}
-
-	c->page = NULL;
-	c->freelist = NULL;
 }
 
-/*
- * Unfreeze all the cpu partial slabs.
- *
- * This function must be called with interrupts disabled
- * for the cpu using c (or some other guarantee must be there
- * to guarantee no concurrent accesses).
- */
-static void unfreeze_partials(struct kmem_cache *s,
-		struct kmem_cache_cpu *c)
-{
 #ifdef CONFIG_SLUB_CPU_PARTIAL
+static void __unfreeze_partials(struct kmem_cache *s, struct page *partial_page)
+{
 	struct kmem_cache_node *n = NULL, *n2 = NULL;
 	struct page *page, *discard_page = NULL;
+	unsigned long flags = 0;
 
-	while ((page = slub_percpu_partial(c))) {
+	while (partial_page) {
 		struct page new;
 		struct page old;
 
-		slub_set_percpu_partial(c, page);
+		page = partial_page;
+		partial_page = page->next;
 
 		n2 = get_node(s, page_to_nid(page));
 		if (n != n2) {
 			if (n)
-				spin_unlock(&n->list_lock);
+				spin_unlock_irqrestore(&n->list_lock, flags);
 
 			n = n2;
-			spin_lock(&n->list_lock);
+			spin_lock_irqsave(&n->list_lock, flags);
 		}
 
 		do {
@@ -2411,7 +2485,7 @@ static void unfreeze_partials(struct kmem_cache *s,
 	}
 
 	if (n)
-		spin_unlock(&n->list_lock);
+		spin_unlock_irqrestore(&n->list_lock, flags);
 
 	while (discard_page) {
 		page = discard_page;
@@ -2421,7 +2495,35 @@ static void unfreeze_partials(struct kmem_cache *s,
 		discard_slab(s, page);
 		stat(s, FREE_SLAB);
 	}
-#endif	/* CONFIG_SLUB_CPU_PARTIAL */
+}
+
+/*
+ * Unfreeze all the cpu partial slabs.
+ */
+static void unfreeze_partials(struct kmem_cache *s)
+{
+	struct page *partial_page;
+	unsigned long flags;
+
+	local_lock_irqsave(&s->cpu_slab->lock, flags);
+	partial_page = this_cpu_read(s->cpu_slab->partial);
+	this_cpu_write(s->cpu_slab->partial, NULL);
+	local_unlock_irqrestore(&s->cpu_slab->lock, flags);
+
+	if (partial_page)
+		__unfreeze_partials(s, partial_page);
+}
+
+static void unfreeze_partials_cpu(struct kmem_cache *s,
+				  struct kmem_cache_cpu *c)
+{
+	struct page *partial_page;
+
+	partial_page = slub_percpu_partial(c);
+	c->partial = NULL;
+
+	if (partial_page)
+		__unfreeze_partials(s, partial_page);
 }
 
 /*
@@ -2433,97 +2535,170 @@ static void unfreeze_partials(struct kmem_cache *s,
  */
 static void put_cpu_partial(struct kmem_cache *s, struct page *page, int drain)
 {
-#ifdef CONFIG_SLUB_CPU_PARTIAL
 	struct page *oldpage;
-	int pages;
-	int pobjects;
+	struct page *page_to_unfreeze = NULL;
+	unsigned long flags;
+	int pages = 0;
+	int pobjects = 0;
 
-	preempt_disable();
-	do {
-		pages = 0;
-		pobjects = 0;
-		oldpage = this_cpu_read(s->cpu_slab->partial);
+	local_lock_irqsave(&s->cpu_slab->lock, flags);
+
+	oldpage = this_cpu_read(s->cpu_slab->partial);
 
-		if (oldpage) {
+	if (oldpage) {
+		if (drain && oldpage->pobjects > slub_cpu_partial(s)) {
+			/*
+			 * Partial array is full. Move the existing set to the
+			 * per node partial list. Postpone the actual unfreezing
+			 * outside of the critical section.
+			 */
+			page_to_unfreeze = oldpage;
+			oldpage = NULL;
+		} else {
 			pobjects = oldpage->pobjects;
 			pages = oldpage->pages;
-			if (drain && pobjects > slub_cpu_partial(s)) {
-				unsigned long flags;
-				/*
-				 * partial array is full. Move the existing
-				 * set to the per node partial list.
-				 */
-				local_irq_save(flags);
-				unfreeze_partials(s, this_cpu_ptr(s->cpu_slab));
-				local_irq_restore(flags);
-				oldpage = NULL;
-				pobjects = 0;
-				pages = 0;
-				stat(s, CPU_PARTIAL_DRAIN);
-			}
 		}
+	}
 
-		pages++;
-		pobjects += page->objects - page->inuse;
+	pages++;
+	pobjects += page->objects - page->inuse;
 
-		page->pages = pages;
-		page->pobjects = pobjects;
-		page->next = oldpage;
+	page->pages = pages;
+	page->pobjects = pobjects;
+	page->next = oldpage;
 
-	} while (this_cpu_cmpxchg(s->cpu_slab->partial, oldpage, page)
-								!= oldpage);
-	if (unlikely(!slub_cpu_partial(s))) {
-		unsigned long flags;
+	this_cpu_write(s->cpu_slab->partial, page);
 
-		local_irq_save(flags);
-		unfreeze_partials(s, this_cpu_ptr(s->cpu_slab));
-		local_irq_restore(flags);
+	local_unlock_irqrestore(&s->cpu_slab->lock, flags);
+
+	if (page_to_unfreeze) {
+		__unfreeze_partials(s, page_to_unfreeze);
+		stat(s, CPU_PARTIAL_DRAIN);
 	}
-	preempt_enable();
-#endif	/* CONFIG_SLUB_CPU_PARTIAL */
 }
 
+#else	/* CONFIG_SLUB_CPU_PARTIAL */
+
+static inline void unfreeze_partials(struct kmem_cache *s) { }
+static inline void unfreeze_partials_cpu(struct kmem_cache *s,
+				  struct kmem_cache_cpu *c) { }
+
+#endif	/* CONFIG_SLUB_CPU_PARTIAL */
+
 static inline void flush_slab(struct kmem_cache *s, struct kmem_cache_cpu *c)
 {
-	stat(s, CPUSLAB_FLUSH);
-	deactivate_slab(s, c->page, c->freelist, c);
+	unsigned long flags;
+	struct page *page;
+	void *freelist;
+
+	local_lock_irqsave(&s->cpu_slab->lock, flags);
+
+	page = c->page;
+	freelist = c->freelist;
+
+	c->page = NULL;
+	c->freelist = NULL;
+	c->tid = next_tid(c->tid);
+
+	local_unlock_irqrestore(&s->cpu_slab->lock, flags);
+
+	if (page) {
+		deactivate_slab(s, page, freelist);
+		stat(s, CPUSLAB_FLUSH);
+	}
+}
+
+static inline void __flush_cpu_slab(struct kmem_cache *s, int cpu)
+{
+	struct kmem_cache_cpu *c = per_cpu_ptr(s->cpu_slab, cpu);
+	void *freelist = c->freelist;
+	struct page *page = c->page;
 
+	c->page = NULL;
+	c->freelist = NULL;
 	c->tid = next_tid(c->tid);
+
+	if (page) {
+		deactivate_slab(s, page, freelist);
+		stat(s, CPUSLAB_FLUSH);
+	}
+
+	unfreeze_partials_cpu(s, c);
 }
 
+struct slub_flush_work {
+	struct work_struct work;
+	struct kmem_cache *s;
+	bool skip;
+};
+
 /*
  * Flush cpu slab.
  *
- * Called from IPI handler with interrupts disabled.
+ * Called from CPU work handler with migration disabled.
  */
-static inline void __flush_cpu_slab(struct kmem_cache *s, int cpu)
+static void flush_cpu_slab(struct work_struct *w)
 {
-	struct kmem_cache_cpu *c = per_cpu_ptr(s->cpu_slab, cpu);
+	struct kmem_cache *s;
+	struct kmem_cache_cpu *c;
+	struct slub_flush_work *sfw;
+
+	sfw = container_of(w, struct slub_flush_work, work);
+
+	s = sfw->s;
+	c = this_cpu_ptr(s->cpu_slab);
 
 	if (c->page)
 		flush_slab(s, c);
 
-	unfreeze_partials(s, c);
+	unfreeze_partials(s);
 }
 
-static void flush_cpu_slab(void *d)
+static bool has_cpu_slab(int cpu, struct kmem_cache *s)
 {
-	struct kmem_cache *s = d;
+	struct kmem_cache_cpu *c = per_cpu_ptr(s->cpu_slab, cpu);
 
-	__flush_cpu_slab(s, smp_processor_id());
+	return c->page || slub_percpu_partial(c);
 }
 
-static bool has_cpu_slab(int cpu, void *info)
+static DEFINE_MUTEX(flush_lock);
+static DEFINE_PER_CPU(struct slub_flush_work, slub_flush);
+
+static void flush_all_cpus_locked(struct kmem_cache *s)
 {
-	struct kmem_cache *s = info;
-	struct kmem_cache_cpu *c = per_cpu_ptr(s->cpu_slab, cpu);
+	struct slub_flush_work *sfw;
+	unsigned int cpu;
 
-	return c->page || slub_percpu_partial(c);
+	lockdep_assert_cpus_held();
+	mutex_lock(&flush_lock);
+
+	for_each_online_cpu(cpu) {
+		sfw = &per_cpu(slub_flush, cpu);
+		if (!has_cpu_slab(cpu, s)) {
+			sfw->skip = true;
+			continue;
+		}
+		INIT_WORK(&sfw->work, flush_cpu_slab);
+		sfw->skip = false;
+		sfw->s = s;
+		schedule_work_on(cpu, &sfw->work);
+	}
+
+	for_each_online_cpu(cpu) {
+		sfw = &per_cpu(slub_flush, cpu);
+		if (sfw->skip)
+			continue;
+		flush_work(&sfw->work);
+	}
+
+	mutex_unlock(&flush_lock);
 }
 
 static void flush_all(struct kmem_cache *s)
 {
-	on_each_cpu_cond(has_cpu_slab, flush_cpu_slab, s, 1);
+	cpus_read_lock();
+	flush_all_cpus_locked(s);
+	cpus_read_unlock();
 }
 
 /*
@@ -2533,14 +2708,10 @@ static void flush_all(struct kmem_cache *s)
 static int slub_cpu_dead(unsigned int cpu)
 {
 	struct kmem_cache *s;
-	unsigned long flags;
 
 	mutex_lock(&slab_mutex);
-	list_for_each_entry(s, &slab_caches, list) {
-		local_irq_save(flags);
+	list_for_each_entry(s, &slab_caches, list)
 		__flush_cpu_slab(s, cpu);
-		local_irq_restore(flags);
-	}
 	mutex_unlock(&slab_mutex);
 	return 0;
 }
@@ -2623,44 +2794,22 @@ slab_out_of_memory(struct kmem_cache *s, gfp_t gfpflags, int nid)
 #endif
 }
 
-static inline void *new_slab_objects(struct kmem_cache *s, gfp_t flags,
-			int node, struct kmem_cache_cpu **pc)
+static inline bool pfmemalloc_match(struct page *page, gfp_t gfpflags)
 {
-	void *freelist;
-	struct kmem_cache_cpu *c = *pc;
-	struct page *page;
-
-	WARN_ON_ONCE(s->ctor && (flags & __GFP_ZERO));
-
-	freelist = get_partial(s, flags, node, c);
-
-	if (freelist)
-		return freelist;
-
-	page = new_slab(s, flags, node);
-	if (page) {
-		c = raw_cpu_ptr(s->cpu_slab);
-		if (c->page)
-			flush_slab(s, c);
-
-		/*
-		 * No other reference to the page yet so we can
-		 * muck around with it freely without cmpxchg
-		 */
-		freelist = page->freelist;
-		page->freelist = NULL;
-
-		stat(s, ALLOC_SLAB);
-		c->page = page;
-		*pc = c;
-	}
+	if (unlikely(PageSlabPfmemalloc(page)))
+		return gfp_pfmemalloc_allowed(gfpflags);
 
-	return freelist;
+	return true;
 }
 
-static inline bool pfmemalloc_match(struct page *page, gfp_t gfpflags)
+/*
+ * A variant of pfmemalloc_match() that tests page flags without asserting
+ * PageSlab. Intended for opportunistic checks before taking a lock and
+ * rechecking that nobody else freed the page under us.
+ */
+static inline bool pfmemalloc_match_unsafe(struct page *page, gfp_t gfpflags)
 {
-	if (unlikely(PageSlabPfmemalloc(page)))
+	if (unlikely(__PageSlabPfmemalloc(page)))
 		return gfp_pfmemalloc_allowed(gfpflags);
 
 	return true;
@@ -2673,8 +2822,6 @@ static inline bool pfmemalloc_match(struct page *page, gfp_t gfpflags)
  * The page is still frozen if the return value is not NULL.
  *
  * If this function returns NULL then the page has been unfrozen.
- *
- * This function must be called with interrupt disabled.
  */
 static inline void *get_freelist(struct kmem_cache *s, struct page *page)
 {
@@ -2682,6 +2829,8 @@ static inline void *get_freelist(struct kmem_cache *s, struct page *page)
 	unsigned long counters;
 	void *freelist;
 
+	lockdep_assert_held(this_cpu_ptr(&s->cpu_slab->lock));
+
 	do {
 		freelist = page->freelist;
 		counters = page->counters;
@@ -2716,7 +2865,7 @@ static inline void *get_freelist(struct kmem_cache *s, struct page *page)
  * we need to allocate a new slab. This is the slowest path since it involves
  * a call to the page allocator and the setup of a new slab.
  *
- * Version of __slab_alloc to use when we know that interrupts are
+ * Version of __slab_alloc to use when we know that preemption is
  * already disabled (which is the case for bulk allocation).
  */
 static void *___slab_alloc(struct kmem_cache *s, gfp_t gfpflags, int node,
@@ -2724,10 +2873,13 @@ static void *___slab_alloc(struct kmem_cache *s, gfp_t gfpflags, int node,
 {
 	void *freelist;
 	struct page *page;
+	unsigned long flags;
 
 	stat(s, ALLOC_SLOWPATH);
 
-	page = c->page;
+reread_page:
+
+	page = READ_ONCE(c->page);
 	if (!page) {
 		/*
 		 * if the node is not online or has no normal memory, just
@@ -2750,8 +2902,7 @@ redo:
 			goto redo;
 		} else {
 			stat(s, ALLOC_NODE_MISMATCH);
-			deactivate_slab(s, page, c->freelist, c);
-			goto new_slab;
+			goto deactivate_slab;
 		}
 	}
 
@@ -2760,12 +2911,15 @@ redo:
 	 * PFMEMALLOC but right now, we are losing the pfmemalloc
 	 * information when the page leaves the per-cpu allocator
 	 */
-	if (unlikely(!pfmemalloc_match(page, gfpflags))) {
-		deactivate_slab(s, page, c->freelist, c);
-		goto new_slab;
+	if (unlikely(!pfmemalloc_match_unsafe(page, gfpflags)))
+		goto deactivate_slab;
+
+	/* must check again c->page in case we got preempted and it changed */
+	local_lock_irqsave(&s->cpu_slab->lock, flags);
+	if (unlikely(page != c->page)) {
+		local_unlock_irqrestore(&s->cpu_slab->lock, flags);
+		goto reread_page;
 	}
-
-	/* must check again c->freelist in case of cpu migration or IRQ */
 	freelist = c->freelist;
 	if (freelist)
 		goto load_freelist;
@@ -2774,6 +2928,7 @@ redo:
 
 	if (!freelist) {
 		c->page = NULL;
+		local_unlock_irqrestore(&s->cpu_slab->lock, flags);
 		stat(s, DEACTIVATE_BYPASS);
 		goto new_slab;
 	}
@@ -2781,6 +2936,9 @@ redo:
 	stat(s, ALLOC_REFILL);
 
 load_freelist:
+
+	lockdep_assert_held(this_cpu_ptr(&s->cpu_slab->lock));
+
 	/*
 	 * freelist is pointing to the list of objects to be used.
 	 * page is pointing to the page from which the objects are obtained.
@@ -2789,59 +2947,141 @@ load_freelist:
 	VM_BUG_ON(!c->page->frozen);
 	c->freelist = get_freepointer(s, freelist);
 	c->tid = next_tid(c->tid);
+	local_unlock_irqrestore(&s->cpu_slab->lock, flags);
 	return freelist;
 
+deactivate_slab:
+
+	local_lock_irqsave(&s->cpu_slab->lock, flags);
+	if (page != c->page) {
+		local_unlock_irqrestore(&s->cpu_slab->lock, flags);
+		goto reread_page;
+	}
+	freelist = c->freelist;
+	c->page = NULL;
+	c->freelist = NULL;
+	local_unlock_irqrestore(&s->cpu_slab->lock, flags);
+	deactivate_slab(s, page, freelist);
+
 new_slab:
 
 	if (slub_percpu_partial(c)) {
+		local_lock_irqsave(&s->cpu_slab->lock, flags);
+		if (unlikely(c->page)) {
+			local_unlock_irqrestore(&s->cpu_slab->lock, flags);
+			goto reread_page;
+		}
+		if (unlikely(!slub_percpu_partial(c))) {
+			local_unlock_irqrestore(&s->cpu_slab->lock, flags);
+			/* we were preempted and partial list got empty */
+			goto new_objects;
+		}
+
 		page = c->page = slub_percpu_partial(c);
 		slub_set_percpu_partial(c, page);
+		local_unlock_irqrestore(&s->cpu_slab->lock, flags);
 		stat(s, CPU_PARTIAL_ALLOC);
 		goto redo;
 	}
 
-	freelist = new_slab_objects(s, gfpflags, node, &c);
+new_objects:
+
+	freelist = get_partial(s, gfpflags, node, &page);
+	if (freelist)
+		goto check_new_page;
+
+	slub_put_cpu_ptr(s->cpu_slab);
+	page = new_slab(s, gfpflags, node);
+	c = slub_get_cpu_ptr(s->cpu_slab);
 
-	if (unlikely(!freelist)) {
+	if (unlikely(!page)) {
 		slab_out_of_memory(s, gfpflags, node);
 		return NULL;
 	}
 
-	page = c->page;
-	if (likely(!kmem_cache_debug(s) && pfmemalloc_match(page, gfpflags)))
-		goto load_freelist;
+	/*
+	 * No other reference to the page yet so we can
+	 * muck around with it freely without cmpxchg
+	 */
+	freelist = page->freelist;
+	page->freelist = NULL;
 
-	/* Only entered in the debug case */
-	if (kmem_cache_debug(s) &&
-			!alloc_debug_processing(s, page, freelist, addr))
-		goto new_slab;	/* Slab failed checks. Next slab needed */
+	stat(s, ALLOC_SLAB);
+
+check_new_page:
+
+	if (kmem_cache_debug(s)) {
+		if (!alloc_debug_processing(s, page, freelist, addr)) {
+			/* Slab failed checks. Next slab needed */
+			goto new_slab;
+		} else {
+			/*
+			 * For debug case, we don't load freelist so that all
+			 * allocations go through alloc_debug_processing()
+			 */
+			goto return_single;
+		}
+	}
+
+	if (unlikely(!pfmemalloc_match(page, gfpflags)))
+		/*
+		 * For !pfmemalloc_match() case we don't load freelist so that
+		 * we don't make further mismatched allocations easier.
+		 */
+		goto return_single;
+
+retry_load_page:
+
+	local_lock_irqsave(&s->cpu_slab->lock, flags);
+	if (unlikely(c->page)) {
+		void *flush_freelist = c->freelist;
+		struct page *flush_page = c->page;
+
+		c->page = NULL;
+		c->freelist = NULL;
+		c->tid = next_tid(c->tid);
+
+		local_unlock_irqrestore(&s->cpu_slab->lock, flags);
 
-	deactivate_slab(s, page, get_freepointer(s, freelist), c);
+		deactivate_slab(s, flush_page, flush_freelist);
+
+		stat(s, CPUSLAB_FLUSH);
+
+		goto retry_load_page;
+	}
+	c->page = page;
+
+	goto load_freelist;
+
+return_single:
+
+	deactivate_slab(s, page, get_freepointer(s, freelist));
 	return freelist;
 }
 
 /*
- * Another one that disabled interrupt and compensates for possible
- * cpu changes by refetching the per cpu area pointer.
+ * A wrapper for ___slab_alloc() for contexts where preemption is not yet
+ * disabled. Compensates for possible cpu changes by refetching the per cpu area
+ * pointer.
  */
 static void *__slab_alloc(struct kmem_cache *s, gfp_t gfpflags, int node,
 			  unsigned long addr, struct kmem_cache_cpu *c)
 {
 	void *p;
-	unsigned long flags;
 
-	local_irq_save(flags);
-#ifdef CONFIG_PREEMPTION
+#ifdef CONFIG_PREEMPT_COUNT
 	/*
 	 * We may have been preempted and rescheduled on a different
-	 * cpu before disabling interrupts. Need to reload cpu area
+	 * cpu before disabling preemption. Need to reload cpu area
 	 * pointer.
 	 */
-	c = this_cpu_ptr(s->cpu_slab);
+	c = slub_get_cpu_ptr(s->cpu_slab);
 #endif
 
 	p = ___slab_alloc(s, gfpflags, node, addr, c);
-	local_irq_restore(flags);
+#ifdef CONFIG_PREEMPT_COUNT
+	slub_put_cpu_ptr(s->cpu_slab);
+#endif
 	return p;
 }
 
@@ -2892,15 +3132,14 @@ redo:
 	 * reading from one cpu area. That does not matter as long
 	 * as we end up on the original cpu again when doing the cmpxchg.
 	 *
-	 * We should guarantee that tid and kmem_cache are retrieved on
-	 * the same cpu. It could be different if CONFIG_PREEMPTION so we need
-	 * to check if it is matched or not.
+	 * We must guarantee that tid and kmem_cache_cpu are retrieved on the
+	 * same cpu. We read first the kmem_cache_cpu pointer and use it to read
+	 * the tid. If we are preempted and switched to another cpu between the
+	 * two reads, it's OK as the two are still associated with the same cpu
+	 * and cmpxchg later will validate the cpu.
 	 */
-	do {
-		tid = this_cpu_read(s->cpu_slab->tid);
-		c = raw_cpu_ptr(s->cpu_slab);
-	} while (IS_ENABLED(CONFIG_PREEMPTION) &&
-		 unlikely(tid != READ_ONCE(c->tid)));
+	c = raw_cpu_ptr(s->cpu_slab);
+	tid = READ_ONCE(c->tid);
 
 	/*
 	 * Irqless object alloc/free algorithm used here depends on sequence
@@ -2921,7 +3160,15 @@ redo:
 
 	object = c->freelist;
 	page = c->page;
-	if (unlikely(!object || !page || !node_match(page, node))) {
+	/*
+	 * We cannot use the lockless fastpath on PREEMPT_RT because if a
+	 * slowpath has taken the local_lock_irqsave(), it is not protected
+	 * against a fast path operation in an irq handler. So we need to take
+	 * the slow path which uses local_lock. It is still relatively fast if
+	 * there is a suitable cpu freelist.
+	 */
+	if (IS_ENABLED(CONFIG_PREEMPT_RT) ||
+	    unlikely(!object || !page || !node_match(page, node))) {
 		object = __slab_alloc(s, gfpflags, node, addr, c);
 	} else {
 		void *next_object = get_freepointer_safe(s, object);
@@ -3174,16 +3421,14 @@ redo:
 	 * data is retrieved via this pointer. If we are on the same cpu
 	 * during the cmpxchg then the free will succeed.
 	 */
-	do {
-		tid = this_cpu_read(s->cpu_slab->tid);
-		c = raw_cpu_ptr(s->cpu_slab);
-	} while (IS_ENABLED(CONFIG_PREEMPTION) &&
-		 unlikely(tid != READ_ONCE(c->tid)));
+	c = raw_cpu_ptr(s->cpu_slab);
+	tid = READ_ONCE(c->tid);
 
 	/* Same with comment on barrier() in slab_alloc_node() */
 	barrier();
 
 	if (likely(page == c->page)) {
+#ifndef CONFIG_PREEMPT_RT
 		void **freelist = READ_ONCE(c->freelist);
 
 		set_freepointer(s, tail_obj, freelist);
@@ -3196,6 +3441,31 @@ redo:
 			note_cmpxchg_failure("slab_free", s, tid);
 			goto redo;
 		}
+#else /* CONFIG_PREEMPT_RT */
+		/*
+		 * We cannot use the lockless fastpath on PREEMPT_RT because if
+		 * a slowpath has taken the local_lock_irqsave(), it is not
+		 * protected against a fast path operation in an irq handler. So
+		 * we need to take the local_lock. We shouldn't simply defer to
+		 * __slab_free() as that wouldn't use the cpu freelist at all.
+		 */
+		void **freelist;
+
+		local_lock(&s->cpu_slab->lock);
+		c = this_cpu_ptr(s->cpu_slab);
+		if (unlikely(page != c->page)) {
+			local_unlock(&s->cpu_slab->lock);
+			goto redo;
+		}
+		tid = c->tid;
+		freelist = c->freelist;
+
+		set_freepointer(s, tail_obj, freelist);
+		c->freelist = head;
+		c->tid = next_tid(tid);
+
+		local_unlock(&s->cpu_slab->lock);
+#endif
 		stat(s, FREE_FASTPATH);
 	} else
 		__slab_free(s, page, head, tail_obj, cnt, addr);
@@ -3373,8 +3643,8 @@ int kmem_cache_alloc_bulk(struct kmem_cache *s, gfp_t flags, size_t size,
 	 * IRQs, which protects against PREEMPT and interrupts
 	 * handlers invoking normal fastpath.
 	 */
-	local_irq_disable();
-	c = this_cpu_ptr(s->cpu_slab);
+	c = slub_get_cpu_ptr(s->cpu_slab);
+	local_lock_irq(&s->cpu_slab->lock);
 
 	for (i = 0; i < size; i++) {
 		void *object = kfence_alloc(s, s->object_size, flags);
@@ -3395,6 +3665,8 @@ int kmem_cache_alloc_bulk(struct kmem_cache *s, gfp_t flags, size_t size,
 			 */
 			c->tid = next_tid(c->tid);
 
+			local_unlock_irq(&s->cpu_slab->lock);
+
 			/*
 			 * Invoking slow path likely have side-effect
 			 * of re-populating per CPU c->freelist
@@ -3407,6 +3679,8 @@ int kmem_cache_alloc_bulk(struct kmem_cache *s, gfp_t flags, size_t size,
 			c = this_cpu_ptr(s->cpu_slab);
 			maybe_wipe_obj_freeptr(s, p[i]);
 
+			local_lock_irq(&s->cpu_slab->lock);
+
 			continue; /* goto for-loop */
 		}
 		c->freelist = get_freepointer(s, object);
@@ -3414,7 +3688,8 @@ int kmem_cache_alloc_bulk(struct kmem_cache *s, gfp_t flags, size_t size,
 		maybe_wipe_obj_freeptr(s, p[i]);
 	}
 	c->tid = next_tid(c->tid);
-	local_irq_enable();
+	local_unlock_irq(&s->cpu_slab->lock);
+	slub_put_cpu_ptr(s->cpu_slab);
 
 	/*
 	 * memcg and kmem_cache debug support and memory initialization.
@@ -3424,7 +3699,7 @@ int kmem_cache_alloc_bulk(struct kmem_cache *s, gfp_t flags, size_t size,
 				slab_want_init_on_alloc(flags, s));
 	return i;
 error:
-	local_irq_enable();
+	slub_put_cpu_ptr(s->cpu_slab);
 	slab_post_alloc_hook(s, objcg, flags, i, p, false);
 	__kmem_cache_free_bulk(s, i, p);
 	return 0;
@@ -3938,11 +4213,12 @@ static void list_slab_objects(struct kmem_cache *s, struct page *page,
 {
 #ifdef CONFIG_SLUB_DEBUG
 	void *addr = page_address(page);
+	unsigned long flags;
 	unsigned long *map;
 	void *p;
 
 	slab_err(s, page, text, s->name);
-	slab_lock(page);
+	slab_lock(page, &flags);
 
 	map = get_map(s, page);
 	for_each_object(p, s, addr, page->objects) {
@@ -3953,7 +4229,7 @@ static void list_slab_objects(struct kmem_cache *s, struct page *page,
 		}
 	}
 	put_map(map);
-	slab_unlock(page);
+	slab_unlock(page, &flags);
 #endif
 }
 
@@ -4003,7 +4279,7 @@ int __kmem_cache_shutdown(struct kmem_cache *s)
 	int node;
 	struct kmem_cache_node *n;
 
-	flush_all(s);
+	flush_all_cpus_locked(s);
 	/* Attempt to free all objects */
 	for_each_kmem_cache_node(s, node, n) {
 		free_partial(s, n);
@@ -4279,7 +4555,7 @@ EXPORT_SYMBOL(kfree);
  * being allocated from last increasing the chance that the last objects
  * are freed in them.
  */
-int __kmem_cache_shrink(struct kmem_cache *s)
+static int __kmem_cache_do_shrink(struct kmem_cache *s)
 {
 	int node;
 	int i;
@@ -4291,7 +4567,6 @@ int __kmem_cache_shrink(struct kmem_cache *s)
 	unsigned long flags;
 	int ret = 0;
 
-	flush_all(s);
 	for_each_kmem_cache_node(s, node, n) {
 		INIT_LIST_HEAD(&discard);
 		for (i = 0; i < SHRINK_PROMOTE_MAX; i++)
@@ -4341,13 +4616,21 @@ int __kmem_cache_shrink(struct kmem_cache *s)
 	return ret;
 }
 
+int __kmem_cache_shrink(struct kmem_cache *s)
+{
+	flush_all(s);
+	return __kmem_cache_do_shrink(s);
+}
+
 static int slab_mem_going_offline_callback(void *arg)
 {
 	struct kmem_cache *s;
 
 	mutex_lock(&slab_mutex);
-	list_for_each_entry(s, &slab_caches, list)
-		__kmem_cache_shrink(s);
+	list_for_each_entry(s, &slab_caches, list) {
+		flush_all_cpus_locked(s);
+		__kmem_cache_do_shrink(s);
+	}
 	mutex_unlock(&slab_mutex);
 
 	return 0;
@@ -4673,33 +4956,33 @@ static int count_total(struct page *page)
 #endif
 
 #ifdef CONFIG_SLUB_DEBUG
-static void validate_slab(struct kmem_cache *s, struct page *page)
+static void validate_slab(struct kmem_cache *s, struct page *page,
+			  unsigned long *obj_map)
 {
 	void *p;
 	void *addr = page_address(page);
-	unsigned long *map;
+	unsigned long flags;
 
-	slab_lock(page);
+	slab_lock(page, &flags);
 
 	if (!check_slab(s, page) || !on_freelist(s, page, NULL))
 		goto unlock;
 
 	/* Now we know that a valid freelist exists */
-	map = get_map(s, page);
+	__fill_map(obj_map, s, page);
 	for_each_object(p, s, addr, page->objects) {
-		u8 val = test_bit(__obj_to_index(s, addr, p), map) ?
+		u8 val = test_bit(__obj_to_index(s, addr, p), obj_map) ?
 			 SLUB_RED_INACTIVE : SLUB_RED_ACTIVE;
 
 		if (!check_object(s, page, p, val))
 			break;
 	}
-	put_map(map);
 unlock:
-	slab_unlock(page);
+	slab_unlock(page, &flags);
 }
 
 static int validate_slab_node(struct kmem_cache *s,
-		struct kmem_cache_node *n)
+		struct kmem_cache_node *n, unsigned long *obj_map)
 {
 	unsigned long count = 0;
 	struct page *page;
@@ -4708,7 +4991,7 @@ static int validate_slab_node(struct kmem_cache *s,
 	spin_lock_irqsave(&n->list_lock, flags);
 
 	list_for_each_entry(page, &n->partial, slab_list) {
-		validate_slab(s, page);
+		validate_slab(s, page, obj_map);
 		count++;
 	}
 	if (count != n->nr_partial) {
@@ -4721,7 +5004,7 @@ static int validate_slab_node(struct kmem_cache *s,
 		goto out;
 
 	list_for_each_entry(page, &n->full, slab_list) {
-		validate_slab(s, page);
+		validate_slab(s, page, obj_map);
 		count++;
 	}
 	if (count != atomic_long_read(&n->nr_slabs)) {
@@ -4740,10 +5023,17 @@ long validate_slab_cache(struct kmem_cache *s)
 	int node;
 	unsigned long count = 0;
 	struct kmem_cache_node *n;
+	unsigned long *obj_map;
+
+	obj_map = bitmap_alloc(oo_objects(s->oo), GFP_KERNEL);
+	if (!obj_map)
+		return -ENOMEM;
 
 	flush_all(s);
 	for_each_kmem_cache_node(s, node, n)
-		count += validate_slab_node(s, n);
+		count += validate_slab_node(s, n, obj_map);
+
+	bitmap_free(obj_map);
 
 	return count;
 }
@@ -4879,17 +5169,17 @@ static int add_location(struct loc_track *t, struct kmem_cache *s,
 }
 
 static void process_slab(struct loc_track *t, struct kmem_cache *s,
-		struct page *page, enum track_item alloc)
+		struct page *page, enum track_item alloc,
+		unsigned long *obj_map)
 {
 	void *addr = page_address(page);
 	void *p;
-	unsigned long *map;
 
-	map = get_map(s, page);
+	__fill_map(obj_map, s, page);
+
 	for_each_object(p, s, addr, page->objects)
-		if (!test_bit(__obj_to_index(s, addr, p), map))
+		if (!test_bit(__obj_to_index(s, addr, p), obj_map))
 			add_location(t, s, get_track(s, p, alloc));
-	put_map(map);
 }
 #endif  /* CONFIG_DEBUG_FS   */
 #endif	/* CONFIG_SLUB_DEBUG */
@@ -5816,17 +6106,21 @@ static int slab_debug_trace_open(struct inode *inode, struct file *filep)
 	struct loc_track *t = __seq_open_private(filep, &slab_debugfs_sops,
 						sizeof(struct loc_track));
 	struct kmem_cache *s = file_inode(filep)->i_private;
+	unsigned long *obj_map;
+
+	obj_map = bitmap_alloc(oo_objects(s->oo), GFP_KERNEL);
+	if (!obj_map)
+		return -ENOMEM;
 
 	if (strcmp(filep->f_path.dentry->d_name.name, "alloc_traces") == 0)
 		alloc = TRACK_ALLOC;
 	else
 		alloc = TRACK_FREE;
 
-	if (!alloc_loc_track(t, PAGE_SIZE / sizeof(struct location), GFP_KERNEL))
+	if (!alloc_loc_track(t, PAGE_SIZE / sizeof(struct location), GFP_KERNEL)) {
+		bitmap_free(obj_map);
 		return -ENOMEM;
-
-	/* Push back cpu slabs */
-	flush_all(s);
+	}
 
 	for_each_kmem_cache_node(s, node, n) {
 		unsigned long flags;
@@ -5837,12 +6131,13 @@ static int slab_debug_trace_open(struct inode *inode, struct file *filep)
 
 		spin_lock_irqsave(&n->list_lock, flags);
 		list_for_each_entry(page, &n->partial, slab_list)
-			process_slab(t, s, page, alloc);
+			process_slab(t, s, page, alloc, obj_map);
 		list_for_each_entry(page, &n->full, slab_list)
-			process_slab(t, s, page, alloc);
+			process_slab(t, s, page, alloc, obj_map);
 		spin_unlock_irqrestore(&n->list_lock, flags);
 	}
 
+	bitmap_free(obj_map);
 	return 0;
 }
 
diff --git a/mm/sparse.c b/mm/sparse.c
index 6326cdf36c4f..120bc8ea5293 100644
--- a/mm/sparse.c
+++ b/mm/sparse.c
@@ -109,32 +109,6 @@ static inline int sparse_index_init(unsigned long section_nr, int nid)
 }
 #endif
 
-#ifdef CONFIG_SPARSEMEM_EXTREME
-unsigned long __section_nr(struct mem_section *ms)
-{
-	unsigned long root_nr;
-	struct mem_section *root = NULL;
-
-	for (root_nr = 0; root_nr < NR_SECTION_ROOTS; root_nr++) {
-		root = __nr_to_section(root_nr * SECTIONS_PER_ROOT);
-		if (!root)
-			continue;
-
-		if ((ms >= root) && (ms < (root + SECTIONS_PER_ROOT)))
-		     break;
-	}
-
-	VM_BUG_ON(!root);
-
-	return (root_nr * SECTIONS_PER_ROOT) + (ms - root);
-}
-#else
-unsigned long __section_nr(struct mem_section *ms)
-{
-	return (unsigned long)(ms - mem_section[0]);
-}
-#endif
-
 /*
  * During early boot, before section_mem_map is used for an actual
  * mem_map, we use section_mem_map to store the section's NUMA
@@ -143,7 +117,7 @@ unsigned long __section_nr(struct mem_section *ms)
  */
 static inline unsigned long sparse_encode_early_nid(int nid)
 {
-	return (nid << SECTION_NID_SHIFT);
+	return ((unsigned long)nid << SECTION_NID_SHIFT);
 }
 
 static inline int sparse_early_nid(struct mem_section *section)
@@ -187,10 +161,9 @@ void __meminit mminit_validate_memmodel_limits(unsigned long *start_pfn,
  * those loops early.
  */
 unsigned long __highest_present_section_nr;
-static void section_mark_present(struct mem_section *ms)
+static void __section_mark_present(struct mem_section *ms,
+		unsigned long section_nr)
 {
-	unsigned long section_nr = __section_nr(ms);
-
 	if (section_nr > __highest_present_section_nr)
 		__highest_present_section_nr = section_nr;
 
@@ -280,7 +253,7 @@ static void __init memory_present(int nid, unsigned long start, unsigned long en
 		if (!ms->section_mem_map) {
 			ms->section_mem_map = sparse_encode_early_nid(nid) |
 							SECTION_IS_ONLINE;
-			section_mark_present(ms);
+			__section_mark_present(ms, section);
 		}
 	}
 }
@@ -348,7 +321,8 @@ size_t mem_section_usage_size(void)
 static inline phys_addr_t pgdat_to_phys(struct pglist_data *pgdat)
 {
 #ifndef CONFIG_NUMA
-	return __pa_symbol(pgdat);
+	VM_BUG_ON(pgdat != &contig_page_data);
+	return __pa_symbol(&contig_page_data);
 #else
 	return __pa(pgdat);
 #endif
@@ -462,8 +436,7 @@ struct page __init *__populate_section_memmap(unsigned long pfn,
 	if (map)
 		return map;
 
-	map = memblock_alloc_try_nid_raw(size, size, addr,
-					  MEMBLOCK_ALLOC_ACCESSIBLE, nid);
+	map = memmap_alloc(size, size, addr, nid, false);
 	if (!map)
 		panic("%s: Failed to allocate %lu bytes align=0x%lx nid=%d from=%pa\n",
 		      __func__, size, PAGE_SIZE, nid, &addr);
@@ -490,8 +463,7 @@ static void __init sparse_buffer_init(unsigned long size, int nid)
 	 * and we want it to be properly aligned to the section size - this is
 	 * especially the case for VMEMMAP which maps memmap to PMDs
 	 */
-	sparsemap_buf = memblock_alloc_exact_nid_raw(size, section_map_size(),
-					addr, MEMBLOCK_ALLOC_ACCESSIBLE, nid);
+	sparsemap_buf = memmap_alloc(size, section_map_size(), addr, nid, true);
 	sparsemap_buf_end = sparsemap_buf + size;
 }
 
@@ -934,7 +906,7 @@ int __meminit sparse_add_section(int nid, unsigned long start_pfn,
 
 	ms = __nr_to_section(section_nr);
 	set_section_nid(section_nr, nid);
-	section_mark_present(ms);
+	__section_mark_present(ms, section_nr);
 
 	/* Align memmap to section boundary in the subsection case */
 	if (section_nr_to_pfn(section_nr) != start_pfn)
diff --git a/mm/swap.c b/mm/swap.c
index 19600430e536..897200d27dd0 100644
--- a/mm/swap.c
+++ b/mm/swap.c
@@ -179,28 +179,6 @@ int get_kernel_pages(const struct kvec *kiov, int nr_segs, int write,
 }
 EXPORT_SYMBOL_GPL(get_kernel_pages);
 
-/*
- * get_kernel_page() - pin a kernel page in memory
- * @start:	starting kernel address
- * @write:	pinning for read/write, currently ignored
- * @pages:	array that receives pointer to the page pinned.
- *		Must be at least nr_segs long.
- *
- * Returns 1 if page is pinned. If the page was not pinned, returns
- * -errno. The page returned must be released with a put_page() call
- * when it is finished with.
- */
-int get_kernel_page(unsigned long start, int write, struct page **pages)
-{
-	const struct kvec kiov = {
-		.iov_base = (void *)start,
-		.iov_len = PAGE_SIZE
-	};
-
-	return get_kernel_pages(&kiov, 1, write, pages);
-}
-EXPORT_SYMBOL_GPL(get_kernel_page);
-
 static void pagevec_lru_move_fn(struct pagevec *pvec,
 	void (*move_fn)(struct page *page, struct lruvec *lruvec))
 {
diff --git a/mm/swapfile.c b/mm/swapfile.c
index 1e07d1c776f2..22d10f713848 100644
--- a/mm/swapfile.c
+++ b/mm/swapfile.c
@@ -3130,6 +3130,7 @@ SYSCALL_DEFINE2(swapon, const char __user *, specialfile, int, swap_flags)
 	struct filename *name;
 	struct file *swap_file = NULL;
 	struct address_space *mapping;
+	struct dentry *dentry;
 	int prio;
 	int error;
 	union swap_header *swap_header;
@@ -3173,6 +3174,7 @@ SYSCALL_DEFINE2(swapon, const char __user *, specialfile, int, swap_flags)
 
 	p->swap_file = swap_file;
 	mapping = swap_file->f_mapping;
+	dentry = swap_file->f_path.dentry;
 	inode = mapping->host;
 
 	error = claim_swapfile(p, inode);
@@ -3180,6 +3182,10 @@ SYSCALL_DEFINE2(swapon, const char __user *, specialfile, int, swap_flags)
 		goto bad_swap;
 
 	inode_lock(inode);
+	if (d_unlinked(dentry) || cant_mount(dentry)) {
+		error = -ENOENT;
+		goto bad_swap_unlock_inode;
+	}
 	if (IS_SWAPFILE(inode)) {
 		error = -EBUSY;
 		goto bad_swap_unlock_inode;
@@ -3773,7 +3779,7 @@ static void free_swap_count_continuations(struct swap_info_struct *si)
 }
 
 #if defined(CONFIG_MEMCG) && defined(CONFIG_BLK_CGROUP)
-void cgroup_throttle_swaprate(struct page *page, gfp_t gfp_mask)
+void __cgroup_throttle_swaprate(struct page *page, gfp_t gfp_mask)
 {
 	struct swap_info_struct *si, *next;
 	int nid = page_to_nid(page);
diff --git a/mm/truncate.c b/mm/truncate.c
index 44ad5e515140..714eaf19821d 100644
--- a/mm/truncate.c
+++ b/mm/truncate.c
@@ -484,8 +484,9 @@ static unsigned long __invalidate_mapping_pages(struct address_space *mapping,
 			index = indices[i];
 
 			if (xa_is_value(page)) {
-				invalidate_exceptional_entry(mapping, index,
-							     page);
+				count += invalidate_exceptional_entry(mapping,
+								      index,
+								      page);
 				continue;
 			}
 			index += thp_nr_pages(page) - 1;
@@ -513,19 +514,18 @@ static unsigned long __invalidate_mapping_pages(struct address_space *mapping,
 }
 
 /**
- * invalidate_mapping_pages - Invalidate all the unlocked pages of one inode
- * @mapping: the address_space which holds the pages to invalidate
+ * invalidate_mapping_pages - Invalidate all clean, unlocked cache of one inode
+ * @mapping: the address_space which holds the cache to invalidate
  * @start: the offset 'from' which to invalidate
  * @end: the offset 'to' which to invalidate (inclusive)
  *
- * This function only removes the unlocked pages, if you want to
- * remove all the pages of one inode, you must call truncate_inode_pages.
+ * This function removes pages that are clean, unmapped and unlocked,
+ * as well as shadow entries. It will not block on IO activity.
  *
- * invalidate_mapping_pages() will not block on IO activity. It will not
- * invalidate pages which are dirty, locked, under writeback or mapped into
- * pagetables.
+ * If you want to remove all the pages of one inode, regardless of
+ * their use and writeback state, use truncate_inode_pages().
  *
- * Return: the number of the pages that were invalidated
+ * Return: the number of the cache entries that were invalidated
  */
 unsigned long invalidate_mapping_pages(struct address_space *mapping,
 		pgoff_t start, pgoff_t end)
@@ -561,21 +561,19 @@ void invalidate_mapping_pagevec(struct address_space *mapping,
 static int
 invalidate_complete_page2(struct address_space *mapping, struct page *page)
 {
-	unsigned long flags;
-
 	if (page->mapping != mapping)
 		return 0;
 
 	if (page_has_private(page) && !try_to_release_page(page, GFP_KERNEL))
 		return 0;
 
-	xa_lock_irqsave(&mapping->i_pages, flags);
+	xa_lock_irq(&mapping->i_pages);
 	if (PageDirty(page))
 		goto failed;
 
 	BUG_ON(page_has_private(page));
 	__delete_from_page_cache(page, NULL);
-	xa_unlock_irqrestore(&mapping->i_pages, flags);
+	xa_unlock_irq(&mapping->i_pages);
 
 	if (mapping->a_ops->freepage)
 		mapping->a_ops->freepage(page);
@@ -583,7 +581,7 @@ invalidate_complete_page2(struct address_space *mapping, struct page *page)
 	put_page(page);	/* pagecache ref */
 	return 1;
 failed:
-	xa_unlock_irqrestore(&mapping->i_pages, flags);
+	xa_unlock_irq(&mapping->i_pages);
 	return 0;
 }
 
diff --git a/mm/userfaultfd.c b/mm/userfaultfd.c
index 0e2132834bc7..7a9008415534 100644
--- a/mm/userfaultfd.c
+++ b/mm/userfaultfd.c
@@ -483,7 +483,7 @@ static __always_inline ssize_t __mcopy_atomic(struct mm_struct *dst_mm,
 					      unsigned long src_start,
 					      unsigned long len,
 					      enum mcopy_atomic_mode mcopy_mode,
-					      bool *mmap_changing,
+					      atomic_t *mmap_changing,
 					      __u64 mode)
 {
 	struct vm_area_struct *dst_vma;
@@ -517,7 +517,7 @@ retry:
 	 * request the user to retry later
 	 */
 	err = -EAGAIN;
-	if (mmap_changing && READ_ONCE(*mmap_changing))
+	if (mmap_changing && atomic_read(mmap_changing))
 		goto out_unlock;
 
 	/*
@@ -650,28 +650,29 @@ out:
 
 ssize_t mcopy_atomic(struct mm_struct *dst_mm, unsigned long dst_start,
 		     unsigned long src_start, unsigned long len,
-		     bool *mmap_changing, __u64 mode)
+		     atomic_t *mmap_changing, __u64 mode)
 {
 	return __mcopy_atomic(dst_mm, dst_start, src_start, len,
 			      MCOPY_ATOMIC_NORMAL, mmap_changing, mode);
 }
 
 ssize_t mfill_zeropage(struct mm_struct *dst_mm, unsigned long start,
-		       unsigned long len, bool *mmap_changing)
+		       unsigned long len, atomic_t *mmap_changing)
 {
 	return __mcopy_atomic(dst_mm, start, 0, len, MCOPY_ATOMIC_ZEROPAGE,
 			      mmap_changing, 0);
 }
 
 ssize_t mcopy_continue(struct mm_struct *dst_mm, unsigned long start,
-		       unsigned long len, bool *mmap_changing)
+		       unsigned long len, atomic_t *mmap_changing)
 {
 	return __mcopy_atomic(dst_mm, start, 0, len, MCOPY_ATOMIC_CONTINUE,
 			      mmap_changing, 0);
 }
 
 int mwriteprotect_range(struct mm_struct *dst_mm, unsigned long start,
-			unsigned long len, bool enable_wp, bool *mmap_changing)
+			unsigned long len, bool enable_wp,
+			atomic_t *mmap_changing)
 {
 	struct vm_area_struct *dst_vma;
 	pgprot_t newprot;
@@ -694,7 +695,7 @@ int mwriteprotect_range(struct mm_struct *dst_mm, unsigned long start,
 	 * request the user to retry later
 	 */
 	err = -EAGAIN;
-	if (mmap_changing && READ_ONCE(*mmap_changing))
+	if (mmap_changing && atomic_read(mmap_changing))
 		goto out_unlock;
 
 	err = -ENOENT;
diff --git a/mm/util.c b/mm/util.c
index 9043d03750a7..499b6b5767ed 100644
--- a/mm/util.c
+++ b/mm/util.c
@@ -593,6 +593,10 @@ void *kvmalloc_node(size_t size, gfp_t flags, int node)
 	if (ret || size <= PAGE_SIZE)
 		return ret;
 
+	/* Don't even allow crazy sizes */
+	if (WARN_ON_ONCE(size > INT_MAX))
+		return NULL;
+
 	return __vmalloc_node(size, 1, flags, node,
 			__builtin_return_address(0));
 }
@@ -635,6 +639,21 @@ void kvfree_sensitive(const void *addr, size_t len)
 }
 EXPORT_SYMBOL(kvfree_sensitive);
 
+void *kvrealloc(const void *p, size_t oldsize, size_t newsize, gfp_t flags)
+{
+	void *newp;
+
+	if (oldsize >= newsize)
+		return (void *)p;
+	newp = kvmalloc(newsize, flags);
+	if (!newp)
+		return NULL;
+	memcpy(newp, p, oldsize);
+	kvfree(p);
+	return newp;
+}
+EXPORT_SYMBOL(kvrealloc);
+
 static inline void *__page_rmapping(struct page *page)
 {
 	unsigned long mapping;
diff --git a/mm/vmalloc.c b/mm/vmalloc.c
index d5cd52805149..d77830ff604c 100644
--- a/mm/vmalloc.c
+++ b/mm/vmalloc.c
@@ -44,6 +44,19 @@
 #include "internal.h"
 #include "pgalloc-track.h"
 
+#ifdef CONFIG_HAVE_ARCH_HUGE_VMAP
+static unsigned int __ro_after_init ioremap_max_page_shift = BITS_PER_LONG - 1;
+
+static int __init set_nohugeiomap(char *str)
+{
+	ioremap_max_page_shift = PAGE_SHIFT;
+	return 0;
+}
+early_param("nohugeiomap", set_nohugeiomap);
+#else /* CONFIG_HAVE_ARCH_HUGE_VMAP */
+static const unsigned int ioremap_max_page_shift = PAGE_SHIFT;
+#endif	/* CONFIG_HAVE_ARCH_HUGE_VMAP */
+
 #ifdef CONFIG_HAVE_ARCH_HUGE_VMALLOC
 static bool __ro_after_init vmap_allow_huge = true;
 
@@ -298,15 +311,14 @@ static int vmap_range_noflush(unsigned long addr, unsigned long end,
 	return err;
 }
 
-int vmap_range(unsigned long addr, unsigned long end,
-			phys_addr_t phys_addr, pgprot_t prot,
-			unsigned int max_page_shift)
+int ioremap_page_range(unsigned long addr, unsigned long end,
+		phys_addr_t phys_addr, pgprot_t prot)
 {
 	int err;
 
-	err = vmap_range_noflush(addr, end, phys_addr, prot, max_page_shift);
+	err = vmap_range_noflush(addr, end, phys_addr, pgprot_nx(prot),
+				 ioremap_max_page_shift);
 	flush_cache_vmap(addr, end);
-
 	return err;
 }
 
@@ -787,6 +799,28 @@ unsigned long vmalloc_nr_pages(void)
 	return atomic_long_read(&nr_vmalloc_pages);
 }
 
+static struct vmap_area *find_vmap_area_exceed_addr(unsigned long addr)
+{
+	struct vmap_area *va = NULL;
+	struct rb_node *n = vmap_area_root.rb_node;
+
+	while (n) {
+		struct vmap_area *tmp;
+
+		tmp = rb_entry(n, struct vmap_area, rb_node);
+		if (tmp->va_end > addr) {
+			va = tmp;
+			if (tmp->va_start <= addr)
+				break;
+
+			n = n->rb_left;
+		} else
+			n = n->rb_right;
+	}
+
+	return va;
+}
+
 static struct vmap_area *__find_vmap_area(unsigned long addr)
 {
 	struct rb_node *n = vmap_area_root.rb_node;
@@ -1479,6 +1513,7 @@ static struct vmap_area *alloc_vmap_area(unsigned long size,
 				int node, gfp_t gfp_mask)
 {
 	struct vmap_area *va;
+	unsigned long freed;
 	unsigned long addr;
 	int purged = 0;
 	int ret;
@@ -1542,13 +1577,12 @@ overflow:
 		goto retry;
 	}
 
-	if (gfpflags_allow_blocking(gfp_mask)) {
-		unsigned long freed = 0;
-		blocking_notifier_call_chain(&vmap_notify_list, 0, &freed);
-		if (freed > 0) {
-			purged = 0;
-			goto retry;
-		}
+	freed = 0;
+	blocking_notifier_call_chain(&vmap_notify_list, 0, &freed);
+
+	if (freed > 0) {
+		purged = 0;
+		goto retry;
 	}
 
 	if (!(gfp_mask & __GFP_NOWARN) && printk_ratelimit())
@@ -2779,7 +2813,7 @@ EXPORT_SYMBOL_GPL(vmap_pfn);
 
 static inline unsigned int
 vm_area_alloc_pages(gfp_t gfp, int nid,
-		unsigned int order, unsigned long nr_pages, struct page **pages)
+		unsigned int order, unsigned int nr_pages, struct page **pages)
 {
 	unsigned int nr_allocated = 0;
 
@@ -2789,10 +2823,32 @@ vm_area_alloc_pages(gfp_t gfp, int nid,
 	 * to fails, fallback to a single page allocator that is
 	 * more permissive.
 	 */
-	if (!order)
-		nr_allocated = alloc_pages_bulk_array_node(
-			gfp, nid, nr_pages, pages);
-	else
+	if (!order) {
+		while (nr_allocated < nr_pages) {
+			unsigned int nr, nr_pages_request;
+
+			/*
+			 * A maximum allowed request is hard-coded and is 100
+			 * pages per call. That is done in order to prevent a
+			 * long preemption off scenario in the bulk-allocator
+			 * so the range is [1:100].
+			 */
+			nr_pages_request = min(100U, nr_pages - nr_allocated);
+
+			nr = alloc_pages_bulk_array_node(gfp, nid,
+				nr_pages_request, pages + nr_allocated);
+
+			nr_allocated += nr;
+			cond_resched();
+
+			/*
+			 * If zero or pages were obtained partly,
+			 * fallback to a single page allocator.
+			 */
+			if (nr != nr_pages_request)
+				break;
+		}
+	} else
 		/*
 		 * Compound pages required for remap_vmalloc_page if
 		 * high-order pages.
@@ -2816,9 +2872,7 @@ vm_area_alloc_pages(gfp_t gfp, int nid,
 		for (i = 0; i < (1U << order); i++)
 			pages[nr_allocated + i] = page + i;
 
-		if (gfpflags_allow_blocking(gfp))
-			cond_resched();
-
+		cond_resched();
 		nr_allocated += 1U << order;
 	}
 
@@ -3267,9 +3321,14 @@ long vread(char *buf, char *addr, unsigned long count)
 		count = -(unsigned long) addr;
 
 	spin_lock(&vmap_area_lock);
-	va = __find_vmap_area((unsigned long)addr);
+	va = find_vmap_area_exceed_addr((unsigned long)addr);
 	if (!va)
 		goto finished;
+
+	/* no intersects with alive vmap_area */
+	if ((unsigned long)addr + count <= va->va_start)
+		goto finished;
+
 	list_for_each_entry_from(va, &vmap_area_list, list) {
 		if (!count)
 			break;
diff --git a/mm/vmpressure.c b/mm/vmpressure.c
index d69019fc3789..76518e4166dc 100644
--- a/mm/vmpressure.c
+++ b/mm/vmpressure.c
@@ -74,8 +74,7 @@ static struct vmpressure *work_to_vmpressure(struct work_struct *work)
 
 static struct vmpressure *vmpressure_parent(struct vmpressure *vmpr)
 {
-	struct cgroup_subsys_state *css = vmpressure_to_css(vmpr);
-	struct mem_cgroup *memcg = mem_cgroup_from_css(css);
+	struct mem_cgroup *memcg = vmpressure_to_memcg(vmpr);
 
 	memcg = parent_mem_cgroup(memcg);
 	if (!memcg)
@@ -240,7 +239,12 @@ static void vmpressure_work_fn(struct work_struct *work)
 void vmpressure(gfp_t gfp, struct mem_cgroup *memcg, bool tree,
 		unsigned long scanned, unsigned long reclaimed)
 {
-	struct vmpressure *vmpr = memcg_to_vmpressure(memcg);
+	struct vmpressure *vmpr;
+
+	if (mem_cgroup_disabled())
+		return;
+
+	vmpr = memcg_to_vmpressure(memcg);
 
 	/*
 	 * Here we only want to account pressure that userland is able to
diff --git a/mm/vmscan.c b/mm/vmscan.c
index eeae2f6bc532..74296c2d1fed 100644
--- a/mm/vmscan.c
+++ b/mm/vmscan.c
@@ -41,6 +41,7 @@
 #include <linux/kthread.h>
 #include <linux/freezer.h>
 #include <linux/memcontrol.h>
+#include <linux/migrate.h>
 #include <linux/delayacct.h>
 #include <linux/sysctl.h>
 #include <linux/oom.h>
@@ -121,6 +122,9 @@ struct scan_control {
 	/* The file pages on the current node are dangerously low */
 	unsigned int file_is_tiny:1;
 
+	/* Always discard instead of demoting to lower tier memory */
+	unsigned int no_demotion:1;
+
 	/* Allocation order */
 	s8 order;
 
@@ -518,6 +522,48 @@ static long add_nr_deferred(long nr, struct shrinker *shrinker,
 	return atomic_long_add_return(nr, &shrinker->nr_deferred[nid]);
 }
 
+static bool can_demote(int nid, struct scan_control *sc)
+{
+	if (!numa_demotion_enabled)
+		return false;
+	if (sc) {
+		if (sc->no_demotion)
+			return false;
+		/* It is pointless to do demotion in memcg reclaim */
+		if (cgroup_reclaim(sc))
+			return false;
+	}
+	if (next_demotion_node(nid) == NUMA_NO_NODE)
+		return false;
+
+	return true;
+}
+
+static inline bool can_reclaim_anon_pages(struct mem_cgroup *memcg,
+					  int nid,
+					  struct scan_control *sc)
+{
+	if (memcg == NULL) {
+		/*
+		 * For non-memcg reclaim, is there
+		 * space in any swap device?
+		 */
+		if (get_nr_swap_pages() > 0)
+			return true;
+	} else {
+		/* Is the memcg below its swap limit? */
+		if (mem_cgroup_get_nr_swap_pages(memcg) > 0)
+			return true;
+	}
+
+	/*
+	 * The page can not be swapped.
+	 *
+	 * Can it be reclaimed from this node via demotion?
+	 */
+	return can_demote(nid, sc);
+}
+
 /*
  * This misses isolated pages which are not accounted for to save counters.
  * As the data only determines if reclaim or compaction continues, it is
@@ -529,7 +575,7 @@ unsigned long zone_reclaimable_pages(struct zone *zone)
 
 	nr = zone_page_state_snapshot(zone, NR_ZONE_INACTIVE_FILE) +
 		zone_page_state_snapshot(zone, NR_ZONE_ACTIVE_FILE);
-	if (get_nr_swap_pages() > 0)
+	if (can_reclaim_anon_pages(NULL, zone_to_nid(zone), NULL))
 		nr += zone_page_state_snapshot(zone, NR_ZONE_INACTIVE_ANON) +
 			zone_page_state_snapshot(zone, NR_ZONE_ACTIVE_ANON);
 
@@ -893,6 +939,7 @@ out:
 void drop_slab_node(int nid)
 {
 	unsigned long freed;
+	int shift = 0;
 
 	do {
 		struct mem_cgroup *memcg = NULL;
@@ -905,7 +952,7 @@ void drop_slab_node(int nid)
 		do {
 			freed += shrink_slab(GFP_KERNEL, nid, memcg, 0);
 		} while ((memcg = mem_cgroup_iter(NULL, memcg, NULL)) != NULL);
-	} while (freed > 10);
+	} while ((freed >> shift++) > 1);
 }
 
 void drop_slab(void)
@@ -1052,14 +1099,13 @@ static pageout_t pageout(struct page *page, struct address_space *mapping)
 static int __remove_mapping(struct address_space *mapping, struct page *page,
 			    bool reclaimed, struct mem_cgroup *target_memcg)
 {
-	unsigned long flags;
 	int refcount;
 	void *shadow = NULL;
 
 	BUG_ON(!PageLocked(page));
 	BUG_ON(mapping != page_mapping(page));
 
-	xa_lock_irqsave(&mapping->i_pages, flags);
+	xa_lock_irq(&mapping->i_pages);
 	/*
 	 * The non racy check for a busy page.
 	 *
@@ -1100,7 +1146,7 @@ static int __remove_mapping(struct address_space *mapping, struct page *page,
 		if (reclaimed && !mapping_exiting(mapping))
 			shadow = workingset_eviction(page, target_memcg);
 		__delete_from_swap_cache(page, swap, shadow);
-		xa_unlock_irqrestore(&mapping->i_pages, flags);
+		xa_unlock_irq(&mapping->i_pages);
 		put_swap_page(page, swap);
 	} else {
 		void (*freepage)(struct page *);
@@ -1126,7 +1172,7 @@ static int __remove_mapping(struct address_space *mapping, struct page *page,
 		    !mapping_exiting(mapping) && !dax_mapping(mapping))
 			shadow = workingset_eviction(page, target_memcg);
 		__delete_from_page_cache(page, shadow);
-		xa_unlock_irqrestore(&mapping->i_pages, flags);
+		xa_unlock_irq(&mapping->i_pages);
 
 		if (freepage != NULL)
 			freepage(page);
@@ -1135,7 +1181,7 @@ static int __remove_mapping(struct address_space *mapping, struct page *page,
 	return 1;
 
 cannot_free:
-	xa_unlock_irqrestore(&mapping->i_pages, flags);
+	xa_unlock_irq(&mapping->i_pages);
 	return 0;
 }
 
@@ -1264,6 +1310,54 @@ static void page_check_dirty_writeback(struct page *page,
 		mapping->a_ops->is_dirty_writeback(page, dirty, writeback);
 }
 
+static struct page *alloc_demote_page(struct page *page, unsigned long node)
+{
+	struct migration_target_control mtc = {
+		/*
+		 * Allocate from 'node', or fail quickly and quietly.
+		 * When this happens, 'page' will likely just be discarded
+		 * instead of migrated.
+		 */
+		.gfp_mask = (GFP_HIGHUSER_MOVABLE & ~__GFP_RECLAIM) |
+			    __GFP_THISNODE  | __GFP_NOWARN |
+			    __GFP_NOMEMALLOC | GFP_NOWAIT,
+		.nid = node
+	};
+
+	return alloc_migration_target(page, (unsigned long)&mtc);
+}
+
+/*
+ * Take pages on @demote_list and attempt to demote them to
+ * another node.  Pages which are not demoted are left on
+ * @demote_pages.
+ */
+static unsigned int demote_page_list(struct list_head *demote_pages,
+				     struct pglist_data *pgdat)
+{
+	int target_nid = next_demotion_node(pgdat->node_id);
+	unsigned int nr_succeeded;
+	int err;
+
+	if (list_empty(demote_pages))
+		return 0;
+
+	if (target_nid == NUMA_NO_NODE)
+		return 0;
+
+	/* Demotion ignores all cpuset and mempolicy settings */
+	err = migrate_pages(demote_pages, alloc_demote_page, NULL,
+			    target_nid, MIGRATE_ASYNC, MR_DEMOTION,
+			    &nr_succeeded);
+
+	if (current_is_kswapd())
+		__count_vm_events(PGDEMOTE_KSWAPD, nr_succeeded);
+	else
+		__count_vm_events(PGDEMOTE_DIRECT, nr_succeeded);
+
+	return nr_succeeded;
+}
+
 /*
  * shrink_page_list() returns the number of reclaimed pages
  */
@@ -1275,12 +1369,16 @@ static unsigned int shrink_page_list(struct list_head *page_list,
 {
 	LIST_HEAD(ret_pages);
 	LIST_HEAD(free_pages);
+	LIST_HEAD(demote_pages);
 	unsigned int nr_reclaimed = 0;
 	unsigned int pgactivate = 0;
+	bool do_demote_pass;
 
 	memset(stat, 0, sizeof(*stat));
 	cond_resched();
+	do_demote_pass = can_demote(pgdat->node_id, sc);
 
+retry:
 	while (!list_empty(page_list)) {
 		struct address_space *mapping;
 		struct page *page;
@@ -1430,6 +1528,17 @@ static unsigned int shrink_page_list(struct list_head *page_list,
 		}
 
 		/*
+		 * Before reclaiming the page, try to relocate
+		 * its contents to another node.
+		 */
+		if (do_demote_pass &&
+		    (thp_migration_supported() || !PageTransHuge(page))) {
+			list_add(&page->lru, &demote_pages);
+			unlock_page(page);
+			continue;
+		}
+
+		/*
 		 * Anonymous process memory has backing store?
 		 * Try to allocate it some swap space here.
 		 * Lazyfree page could be freed directly
@@ -1624,11 +1733,14 @@ static unsigned int shrink_page_list(struct list_head *page_list,
 			/* follow __remove_mapping for reference */
 			if (!page_ref_freeze(page, 1))
 				goto keep_locked;
-			if (PageDirty(page)) {
-				page_ref_unfreeze(page, 1);
-				goto keep_locked;
-			}
-
+			/*
+			 * The page has only one reference left, which is
+			 * from the isolation. After the caller puts the
+			 * page back on lru and drops the reference, the
+			 * page will be freed anyway. It doesn't matter
+			 * which lru it goes. So we don't bother checking
+			 * PageDirty here.
+			 */
 			count_vm_event(PGLAZYFREED);
 			count_memcg_page_event(page, PGLAZYFREED);
 		} else if (!mapping || !__remove_mapping(mapping, page, true,
@@ -1680,6 +1792,17 @@ keep:
 		list_add(&page->lru, &ret_pages);
 		VM_BUG_ON_PAGE(PageLRU(page) || PageUnevictable(page), page);
 	}
+	/* 'page_list' is always empty here */
+
+	/* Migrate pages selected for demotion */
+	nr_reclaimed += demote_page_list(&demote_pages, pgdat);
+	/* Pages that could not be demoted are still in @demote_pages */
+	if (!list_empty(&demote_pages)) {
+		/* Pages which failed to demoted go back on @page_list for retry: */
+		list_splice_init(&demote_pages, page_list);
+		do_demote_pass = false;
+		goto retry;
+	}
 
 	pgactivate = stat->nr_activate[0] + stat->nr_activate[1];
 
@@ -1698,7 +1821,6 @@ unsigned int reclaim_clean_pages_from_list(struct zone *zone,
 {
 	struct scan_control sc = {
 		.gfp_mask = GFP_KERNEL,
-		.priority = DEF_PRIORITY,
 		.may_unmap = 1,
 	};
 	struct reclaim_stat stat;
@@ -2323,10 +2445,10 @@ unsigned long reclaim_pages(struct list_head *page_list)
 	unsigned int noreclaim_flag;
 	struct scan_control sc = {
 		.gfp_mask = GFP_KERNEL,
-		.priority = DEF_PRIORITY,
 		.may_writepage = 1,
 		.may_unmap = 1,
 		.may_swap = 1,
+		.no_demotion = 1,
 	};
 
 	noreclaim_flag = memalloc_noreclaim_save();
@@ -2452,6 +2574,7 @@ enum scan_balance {
 static void get_scan_count(struct lruvec *lruvec, struct scan_control *sc,
 			   unsigned long *nr)
 {
+	struct pglist_data *pgdat = lruvec_pgdat(lruvec);
 	struct mem_cgroup *memcg = lruvec_memcg(lruvec);
 	unsigned long anon_cost, file_cost, total_cost;
 	int swappiness = mem_cgroup_swappiness(memcg);
@@ -2462,7 +2585,7 @@ static void get_scan_count(struct lruvec *lruvec, struct scan_control *sc,
 	enum lru_list lru;
 
 	/* If we have no swap space, do not bother scanning anon pages. */
-	if (!sc->may_swap || mem_cgroup_get_nr_swap_pages(memcg) <= 0) {
+	if (!sc->may_swap || !can_reclaim_anon_pages(memcg, pgdat->node_id, sc)) {
 		scan_balance = SCAN_FILE;
 		goto out;
 	}
@@ -2592,7 +2715,7 @@ out:
 			cgroup_size = max(cgroup_size, protection);
 
 			scan = lruvec_size - lruvec_size * protection /
-				cgroup_size;
+				(cgroup_size + 1);
 
 			/*
 			 * Minimally target SWAP_CLUSTER_MAX pages to keep
@@ -2645,6 +2768,21 @@ out:
 	}
 }
 
+/*
+ * Anonymous LRU management is a waste if there is
+ * ultimately no way to reclaim the memory.
+ */
+static bool can_age_anon_pages(struct pglist_data *pgdat,
+			       struct scan_control *sc)
+{
+	/* Aging the anon LRU is valuable if swap is present: */
+	if (total_swap_pages > 0)
+		return true;
+
+	/* Also valuable if anon pages can be demoted: */
+	return can_demote(pgdat->node_id, sc);
+}
+
 static void shrink_lruvec(struct lruvec *lruvec, struct scan_control *sc)
 {
 	unsigned long nr[NR_LRU_LISTS];
@@ -2754,7 +2892,8 @@ static void shrink_lruvec(struct lruvec *lruvec, struct scan_control *sc)
 	 * Even if we did not try to evict anon pages at all, we want to
 	 * rebalance the anon lru active/inactive ratio.
 	 */
-	if (total_swap_pages && inactive_is_low(lruvec, LRU_INACTIVE_ANON))
+	if (can_age_anon_pages(lruvec_pgdat(lruvec), sc) &&
+	    inactive_is_low(lruvec, LRU_INACTIVE_ANON))
 		shrink_active_list(SWAP_CLUSTER_MAX, lruvec,
 				   sc, LRU_ACTIVE_ANON);
 }
@@ -2824,7 +2963,7 @@ static inline bool should_continue_reclaim(struct pglist_data *pgdat,
 	 */
 	pages_for_compaction = compact_gap(sc->order);
 	inactive_lru_pages = node_page_state(pgdat, NR_INACTIVE_FILE);
-	if (get_nr_swap_pages() > 0)
+	if (can_reclaim_anon_pages(NULL, pgdat->node_id, sc))
 		inactive_lru_pages += node_page_state(pgdat, NR_INACTIVE_ANON);
 
 	return inactive_lru_pages > pages_for_compaction;
@@ -2898,6 +3037,12 @@ static void shrink_node(pg_data_t *pgdat, struct scan_control *sc)
 	target_lruvec = mem_cgroup_lruvec(sc->target_mem_cgroup, pgdat);
 
 again:
+	/*
+	 * Flush the memory cgroup stats, so that we read accurate per-memcg
+	 * lruvec stats for heuristics.
+	 */
+	mem_cgroup_flush_stats();
+
 	memset(&sc->nr, 0, sizeof(sc->nr));
 
 	nr_reclaimed = sc->nr_reclaimed;
@@ -3434,18 +3579,14 @@ static bool throttle_direct_reclaim(gfp_t gfp_mask, struct zonelist *zonelist,
 	 * blocked waiting on the same lock. Instead, throttle for up to a
 	 * second before continuing.
 	 */
-	if (!(gfp_mask & __GFP_FS)) {
+	if (!(gfp_mask & __GFP_FS))
 		wait_event_interruptible_timeout(pgdat->pfmemalloc_wait,
 			allow_direct_reclaim(pgdat), HZ);
+	else
+		/* Throttle until kswapd wakes the process */
+		wait_event_killable(zone->zone_pgdat->pfmemalloc_wait,
+			allow_direct_reclaim(pgdat));
 
-		goto check_pending;
-	}
-
-	/* Throttle until kswapd wakes the process */
-	wait_event_killable(zone->zone_pgdat->pfmemalloc_wait,
-		allow_direct_reclaim(pgdat));
-
-check_pending:
 	if (fatal_signal_pending(current))
 		return true;
 
@@ -3583,7 +3724,7 @@ static void age_active_anon(struct pglist_data *pgdat,
 	struct mem_cgroup *memcg;
 	struct lruvec *lruvec;
 
-	if (!total_swap_pages)
+	if (!can_age_anon_pages(pgdat, sc))
 		return;
 
 	lruvec = mem_cgroup_lruvec(NULL, pgdat);
@@ -3812,7 +3953,7 @@ static int balance_pgdat(pg_data_t *pgdat, int order, int highest_zoneidx)
 
 	set_task_reclaim_state(current, &sc.reclaim_state);
 	psi_memstall_enter(&pflags);
-	__fs_reclaim_acquire();
+	__fs_reclaim_acquire(_THIS_IP_);
 
 	count_vm_event(PAGEOUTRUN);
 
@@ -3938,9 +4079,9 @@ restart:
 			wake_up_all(&pgdat->pfmemalloc_wait);
 
 		/* Check if kswapd should be suspending */
-		__fs_reclaim_release();
+		__fs_reclaim_release(_THIS_IP_);
 		ret = try_to_freeze();
-		__fs_reclaim_acquire();
+		__fs_reclaim_acquire(_THIS_IP_);
 		if (ret || kthread_should_stop())
 			break;
 
@@ -3992,7 +4133,7 @@ out:
 	}
 
 	snapshot_refaults(NULL, pgdat);
-	__fs_reclaim_release();
+	__fs_reclaim_release(_THIS_IP_);
 	psi_memstall_leave(&pflags);
 	set_task_reclaim_state(current, NULL);
 
@@ -4290,23 +4431,20 @@ unsigned long shrink_all_memory(unsigned long nr_to_reclaim)
  * This kswapd start function will be called by init and node-hot-add.
  * On node-hot-add, kswapd will moved to proper cpus if cpus are hot-added.
  */
-int kswapd_run(int nid)
+void kswapd_run(int nid)
 {
 	pg_data_t *pgdat = NODE_DATA(nid);
-	int ret = 0;
 
 	if (pgdat->kswapd)
-		return 0;
+		return;
 
 	pgdat->kswapd = kthread_run(kswapd, pgdat, "kswapd%d", nid);
 	if (IS_ERR(pgdat->kswapd)) {
 		/* failure at boot is fatal */
 		BUG_ON(system_state < SYSTEM_RUNNING);
 		pr_err("Failed to start kswapd on node %d\n", nid);
-		ret = PTR_ERR(pgdat->kswapd);
 		pgdat->kswapd = NULL;
 	}
-	return ret;
 }
 
 /*
diff --git a/mm/vmstat.c b/mm/vmstat.c
index a7ed56ac4c0b..8ce2620344b2 100644
--- a/mm/vmstat.c
+++ b/mm/vmstat.c
@@ -204,7 +204,7 @@ int calculate_normal_threshold(struct zone *zone)
 	 *
 	 * Some sample thresholds:
 	 *
-	 * Threshold	Processors	(fls)	Zonesize	fls(mem+1)
+	 * Threshold	Processors	(fls)	Zonesize	fls(mem)+1
 	 * ------------------------------------------------------------------
 	 * 8		1		1	0.9-1 GB	4
 	 * 16		2		2	0.9-1 GB	4
@@ -319,6 +319,16 @@ void __mod_zone_page_state(struct zone *zone, enum zone_stat_item item,
 	long x;
 	long t;
 
+	/*
+	 * Accurate vmstat updates require a RMW. On !PREEMPT_RT kernels,
+	 * atomicity is provided by IRQs being disabled -- either explicitly
+	 * or via local_lock_irq. On PREEMPT_RT, local_lock_irq only disables
+	 * CPU migrations and preemption potentially corrupts a counter so
+	 * disable preemption.
+	 */
+	if (IS_ENABLED(CONFIG_PREEMPT_RT))
+		preempt_disable();
+
 	x = delta + __this_cpu_read(*p);
 
 	t = __this_cpu_read(pcp->stat_threshold);
@@ -328,6 +338,9 @@ void __mod_zone_page_state(struct zone *zone, enum zone_stat_item item,
 		x = 0;
 	}
 	__this_cpu_write(*p, x);
+
+	if (IS_ENABLED(CONFIG_PREEMPT_RT))
+		preempt_enable();
 }
 EXPORT_SYMBOL(__mod_zone_page_state);
 
@@ -350,6 +363,10 @@ void __mod_node_page_state(struct pglist_data *pgdat, enum node_stat_item item,
 		delta >>= PAGE_SHIFT;
 	}
 
+	/* See __mod_node_page_state */
+	if (IS_ENABLED(CONFIG_PREEMPT_RT))
+		preempt_disable();
+
 	x = delta + __this_cpu_read(*p);
 
 	t = __this_cpu_read(pcp->stat_threshold);
@@ -359,6 +376,9 @@ void __mod_node_page_state(struct pglist_data *pgdat, enum node_stat_item item,
 		x = 0;
 	}
 	__this_cpu_write(*p, x);
+
+	if (IS_ENABLED(CONFIG_PREEMPT_RT))
+		preempt_enable();
 }
 EXPORT_SYMBOL(__mod_node_page_state);
 
@@ -391,6 +411,10 @@ void __inc_zone_state(struct zone *zone, enum zone_stat_item item)
 	s8 __percpu *p = pcp->vm_stat_diff + item;
 	s8 v, t;
 
+	/* See __mod_node_page_state */
+	if (IS_ENABLED(CONFIG_PREEMPT_RT))
+		preempt_disable();
+
 	v = __this_cpu_inc_return(*p);
 	t = __this_cpu_read(pcp->stat_threshold);
 	if (unlikely(v > t)) {
@@ -399,6 +423,9 @@ void __inc_zone_state(struct zone *zone, enum zone_stat_item item)
 		zone_page_state_add(v + overstep, zone, item);
 		__this_cpu_write(*p, -overstep);
 	}
+
+	if (IS_ENABLED(CONFIG_PREEMPT_RT))
+		preempt_enable();
 }
 
 void __inc_node_state(struct pglist_data *pgdat, enum node_stat_item item)
@@ -409,6 +436,10 @@ void __inc_node_state(struct pglist_data *pgdat, enum node_stat_item item)
 
 	VM_WARN_ON_ONCE(vmstat_item_in_bytes(item));
 
+	/* See __mod_node_page_state */
+	if (IS_ENABLED(CONFIG_PREEMPT_RT))
+		preempt_disable();
+
 	v = __this_cpu_inc_return(*p);
 	t = __this_cpu_read(pcp->stat_threshold);
 	if (unlikely(v > t)) {
@@ -417,6 +448,9 @@ void __inc_node_state(struct pglist_data *pgdat, enum node_stat_item item)
 		node_page_state_add(v + overstep, pgdat, item);
 		__this_cpu_write(*p, -overstep);
 	}
+
+	if (IS_ENABLED(CONFIG_PREEMPT_RT))
+		preempt_enable();
 }
 
 void __inc_zone_page_state(struct page *page, enum zone_stat_item item)
@@ -437,6 +471,10 @@ void __dec_zone_state(struct zone *zone, enum zone_stat_item item)
 	s8 __percpu *p = pcp->vm_stat_diff + item;
 	s8 v, t;
 
+	/* See __mod_node_page_state */
+	if (IS_ENABLED(CONFIG_PREEMPT_RT))
+		preempt_disable();
+
 	v = __this_cpu_dec_return(*p);
 	t = __this_cpu_read(pcp->stat_threshold);
 	if (unlikely(v < - t)) {
@@ -445,6 +483,9 @@ void __dec_zone_state(struct zone *zone, enum zone_stat_item item)
 		zone_page_state_add(v - overstep, zone, item);
 		__this_cpu_write(*p, overstep);
 	}
+
+	if (IS_ENABLED(CONFIG_PREEMPT_RT))
+		preempt_enable();
 }
 
 void __dec_node_state(struct pglist_data *pgdat, enum node_stat_item item)
@@ -455,6 +496,10 @@ void __dec_node_state(struct pglist_data *pgdat, enum node_stat_item item)
 
 	VM_WARN_ON_ONCE(vmstat_item_in_bytes(item));
 
+	/* See __mod_node_page_state */
+	if (IS_ENABLED(CONFIG_PREEMPT_RT))
+		preempt_disable();
+
 	v = __this_cpu_dec_return(*p);
 	t = __this_cpu_read(pcp->stat_threshold);
 	if (unlikely(v < - t)) {
@@ -463,6 +508,9 @@ void __dec_node_state(struct pglist_data *pgdat, enum node_stat_item item)
 		node_page_state_add(v - overstep, pgdat, item);
 		__this_cpu_write(*p, overstep);
 	}
+
+	if (IS_ENABLED(CONFIG_PREEMPT_RT))
+		preempt_enable();
 }
 
 void __dec_zone_page_state(struct page *page, enum zone_stat_item item)
@@ -1217,6 +1265,8 @@ const char * const vmstat_text[] = {
 	"pgreuse",
 	"pgsteal_kswapd",
 	"pgsteal_direct",
+	"pgdemote_kswapd",
+	"pgdemote_direct",
 	"pgscan_kswapd",
 	"pgscan_direct",
 	"pgscan_direct_throttle",
@@ -1452,7 +1502,7 @@ static void pagetypeinfo_showfree_print(struct seq_file *m,
 }
 
 /* Print out the free pages at each order for each migatetype */
-static int pagetypeinfo_showfree(struct seq_file *m, void *arg)
+static void pagetypeinfo_showfree(struct seq_file *m, void *arg)
 {
 	int order;
 	pg_data_t *pgdat = (pg_data_t *)arg;
@@ -1464,8 +1514,6 @@ static int pagetypeinfo_showfree(struct seq_file *m, void *arg)
 	seq_putc(m, '\n');
 
 	walk_zones_in_node(m, pgdat, true, false, pagetypeinfo_showfree_print);
-
-	return 0;
 }
 
 static void pagetypeinfo_showblockcount_print(struct seq_file *m,
@@ -1501,7 +1549,7 @@ static void pagetypeinfo_showblockcount_print(struct seq_file *m,
 }
 
 /* Print out the number of pageblocks for each migratetype */
-static int pagetypeinfo_showblockcount(struct seq_file *m, void *arg)
+static void pagetypeinfo_showblockcount(struct seq_file *m, void *arg)
 {
 	int mtype;
 	pg_data_t *pgdat = (pg_data_t *)arg;
@@ -1512,8 +1560,6 @@ static int pagetypeinfo_showblockcount(struct seq_file *m, void *arg)
 	seq_putc(m, '\n');
 	walk_zones_in_node(m, pgdat, true, false,
 		pagetypeinfo_showblockcount_print);
-
-	return 0;
 }
 
 /*
@@ -1874,11 +1920,6 @@ static void vmstat_update(struct work_struct *w)
 }
 
 /*
- * Switch off vmstat processing and then fold all the remaining differentials
- * until the diffs stay at zero. The function is used by NOHZ and can only be
- * invoked when tick processing is not active.
- */
-/*
  * Check if the diffs for a certain cpu indicate that
  * an update is needed.
  */
@@ -1894,17 +1935,15 @@ static bool need_update(int cpu)
 		/*
 		 * The fast way of checking if there are any vmstat diffs.
 		 */
-		if (memchr_inv(pzstats->vm_stat_diff, 0, NR_VM_ZONE_STAT_ITEMS *
-			       sizeof(pzstats->vm_stat_diff[0])))
+		if (memchr_inv(pzstats->vm_stat_diff, 0, sizeof(pzstats->vm_stat_diff)))
 			return true;
 
 		if (last_pgdat == zone->zone_pgdat)
 			continue;
 		last_pgdat = zone->zone_pgdat;
 		n = per_cpu_ptr(zone->zone_pgdat->per_cpu_nodestats, cpu);
-		if (memchr_inv(n->vm_node_stat_diff, 0, NR_VM_NODE_STAT_ITEMS *
-			       sizeof(n->vm_node_stat_diff[0])))
-		    return true;
+		if (memchr_inv(n->vm_node_stat_diff, 0, sizeof(n->vm_node_stat_diff)))
+			return true;
 	}
 	return false;
 }
diff --git a/mm/workingset.c b/mm/workingset.c
index 5ba3e42446fa..d4268d8e9a82 100644
--- a/mm/workingset.c
+++ b/mm/workingset.c
@@ -249,7 +249,7 @@ void workingset_age_nonresident(struct lruvec *lruvec, unsigned long nr_pages)
  * @target_memcg: the cgroup that is causing the reclaim
  * @page: the page being evicted
  *
- * Returns a shadow entry to be stored in @page->mapping->i_pages in place
+ * Return: a shadow entry to be stored in @page->mapping->i_pages in place
  * of the evicted @page so that a later refault can be detected.
  */
 void *workingset_eviction(struct page *page, struct mem_cgroup *target_memcg)
diff --git a/net/9p/client.c b/net/9p/client.c
index b7b958f61faf..213f12ed76cd 100644
--- a/net/9p/client.c
+++ b/net/9p/client.c
@@ -30,6 +30,8 @@
 #define CREATE_TRACE_POINTS
 #include <trace/events/9p.h>
 
+#define DEFAULT_MSIZE (128 * 1024)
+
 /*
   * Client Option Parsing (code inspired by NFS code)
   *  - a little lazy - parse all client options
@@ -65,7 +67,7 @@ EXPORT_SYMBOL(p9_is_proto_dotu);
 
 int p9_show_client_options(struct seq_file *m, struct p9_client *clnt)
 {
-	if (clnt->msize != 8192)
+	if (clnt->msize != DEFAULT_MSIZE)
 		seq_printf(m, ",msize=%u", clnt->msize);
 	seq_printf(m, ",trans=%s", clnt->trans_mod->name);
 
@@ -139,7 +141,7 @@ static int parse_opts(char *opts, struct p9_client *clnt)
 	int ret = 0;
 
 	clnt->proto_version = p9_proto_2000L;
-	clnt->msize = 8192;
+	clnt->msize = DEFAULT_MSIZE;
 
 	if (!opts)
 		return 0;
diff --git a/net/9p/trans_fd.c b/net/9p/trans_fd.c
index f4dd0456beaf..007bbcc68010 100644
--- a/net/9p/trans_fd.c
+++ b/net/9p/trans_fd.c
@@ -34,7 +34,7 @@
 #include <linux/syscalls.h> /* killme */
 
 #define P9_PORT 564
-#define MAX_SOCK_BUF (64*1024)
+#define MAX_SOCK_BUF (1024*1024)
 #define MAXPOLLWADDR	2
 
 static struct p9_trans_module p9_tcp_trans;
diff --git a/net/9p/trans_virtio.c b/net/9p/trans_virtio.c
index 2bbd7dce0f1d..490a4c900339 100644
--- a/net/9p/trans_virtio.c
+++ b/net/9p/trans_virtio.c
@@ -610,7 +610,7 @@ static int p9_virtio_probe(struct virtio_device *vdev)
 	chan->vc_wq = kmalloc(sizeof(wait_queue_head_t), GFP_KERNEL);
 	if (!chan->vc_wq) {
 		err = -ENOMEM;
-		goto out_free_tag;
+		goto out_remove_file;
 	}
 	init_waitqueue_head(chan->vc_wq);
 	chan->ring_bufs_avail = 1;
@@ -628,6 +628,8 @@ static int p9_virtio_probe(struct virtio_device *vdev)
 
 	return 0;
 
+out_remove_file:
+	sysfs_remove_file(&vdev->dev.kobj, &dev_attr_mount_tag.attr);
 out_free_tag:
 	kfree(tag);
 out_free_vq:
diff --git a/net/9p/trans_xen.c b/net/9p/trans_xen.c
index f4fea28e05da..3ec1a51a6944 100644
--- a/net/9p/trans_xen.c
+++ b/net/9p/trans_xen.c
@@ -138,7 +138,7 @@ static bool p9_xen_write_todo(struct xen_9pfs_dataring *ring, RING_IDX size)
 
 static int p9_xen_request(struct p9_client *client, struct p9_req_t *p9_req)
 {
-	struct xen_9pfs_front_priv *priv = NULL;
+	struct xen_9pfs_front_priv *priv;
 	RING_IDX cons, prod, masked_cons, masked_prod;
 	unsigned long flags;
 	u32 size = p9_req->tc.size;
@@ -151,7 +151,7 @@ static int p9_xen_request(struct p9_client *client, struct p9_req_t *p9_req)
 			break;
 	}
 	read_unlock(&xen_9pfs_lock);
-	if (!priv || priv->client != client)
+	if (list_entry_is_head(priv, &xen_9pfs_devs, list))
 		return -EINVAL;
 
 	num = p9_req->tc.tag % priv->num_rings;
diff --git a/net/batman-adv/log.c b/net/batman-adv/log.c
index f0e5d1429662..7a93a1e94c40 100644
--- a/net/batman-adv/log.c
+++ b/net/batman-adv/log.c
@@ -7,7 +7,7 @@
 #include "log.h"
 #include "main.h"
 
-#include <stdarg.h>
+#include <linux/stdarg.h>
 
 #include "trace.h"
 
diff --git a/net/bridge/br_multicast.c b/net/bridge/br_multicast.c
index 9231617a16e4..3523c8c7068f 100644
--- a/net/bridge/br_multicast.c
+++ b/net/bridge/br_multicast.c
@@ -4255,7 +4255,7 @@ int br_multicast_set_port_router(struct net_bridge_mcast_port *pmctx,
 	bool del = false;
 
 	brmctx = br_multicast_port_ctx_get_global(pmctx);
-	spin_lock(&brmctx->br->multicast_lock);
+	spin_lock_bh(&brmctx->br->multicast_lock);
 	if (pmctx->multicast_router == val) {
 		/* Refresh the temp router port timer */
 		if (pmctx->multicast_router == MDB_RTR_TYPE_TEMP) {
@@ -4305,7 +4305,7 @@ int br_multicast_set_port_router(struct net_bridge_mcast_port *pmctx,
 	}
 	err = 0;
 unlock:
-	spin_unlock(&brmctx->br->multicast_lock);
+	spin_unlock_bh(&brmctx->br->multicast_lock);
 
 	return err;
 }
diff --git a/net/core/pktgen.c b/net/core/pktgen.c
index 9e5a3249373c..a3d74e2704c4 100644
--- a/net/core/pktgen.c
+++ b/net/core/pktgen.c
@@ -3602,7 +3602,6 @@ out:
 
 static int pktgen_thread_worker(void *arg)
 {
-	DEFINE_WAIT(wait);
 	struct pktgen_thread *t = arg;
 	struct pktgen_dev *pkt_dev = NULL;
 	int cpu = t->cpu;
diff --git a/net/core/skbuff.c b/net/core/skbuff.c
index f9311762cc47..2170bea2c7de 100644
--- a/net/core/skbuff.c
+++ b/net/core/skbuff.c
@@ -3884,7 +3884,7 @@ struct sk_buff *skb_segment_list(struct sk_buff *skb,
 		skb_push(nskb, -skb_network_offset(nskb) + offset);
 
 		skb_release_head_state(nskb);
-		 __copy_skb_header(nskb, skb);
+		__copy_skb_header(nskb, skb);
 
 		skb_headers_offset_update(nskb, skb_headroom(nskb) - skb_headroom(skb));
 		skb_copy_from_linear_data_offset(skb, -tnl_hlen,
diff --git a/net/dsa/tag_rtl4_a.c b/net/dsa/tag_rtl4_a.c
index 40811bab4d09..f920487ae145 100644
--- a/net/dsa/tag_rtl4_a.c
+++ b/net/dsa/tag_rtl4_a.c
@@ -54,9 +54,10 @@ static struct sk_buff *rtl4a_tag_xmit(struct sk_buff *skb,
 	p = (__be16 *)tag;
 	*p = htons(RTL4_A_ETHERTYPE);
 
-	out = (RTL4_A_PROTOCOL_RTL8366RB << 12) | (2 << 8);
-	/* The lower bits is the port number */
-	out |= (u8)dp->index;
+	out = (RTL4_A_PROTOCOL_RTL8366RB << RTL4_A_PROTOCOL_SHIFT) | (2 << 8);
+	/* The lower bits indicate the port number */
+	out |= BIT(dp->index);
+
 	p = (__be16 *)(tag + 2);
 	*p = htons(out);
 
diff --git a/net/ipv4/cipso_ipv4.c b/net/ipv4/cipso_ipv4.c
index 7fbd0b532f52..099259fc826a 100644
--- a/net/ipv4/cipso_ipv4.c
+++ b/net/ipv4/cipso_ipv4.c
@@ -465,16 +465,14 @@ void cipso_v4_doi_free(struct cipso_v4_doi *doi_def)
 	if (!doi_def)
 		return;
 
-	if (doi_def->map.std) {
-		switch (doi_def->type) {
-		case CIPSO_V4_MAP_TRANS:
-			kfree(doi_def->map.std->lvl.cipso);
-			kfree(doi_def->map.std->lvl.local);
-			kfree(doi_def->map.std->cat.cipso);
-			kfree(doi_def->map.std->cat.local);
-			kfree(doi_def->map.std);
-			break;
-		}
+	switch (doi_def->type) {
+	case CIPSO_V4_MAP_TRANS:
+		kfree(doi_def->map.std->lvl.cipso);
+		kfree(doi_def->map.std->lvl.local);
+		kfree(doi_def->map.std->cat.cipso);
+		kfree(doi_def->map.std->cat.local);
+		kfree(doi_def->map.std);
+		break;
 	}
 	kfree(doi_def);
 }
diff --git a/net/ipv4/ip_gre.c b/net/ipv4/ip_gre.c
index 177d26d8fb9c..0fe6c936dc54 100644
--- a/net/ipv4/ip_gre.c
+++ b/net/ipv4/ip_gre.c
@@ -473,8 +473,6 @@ static void __gre_xmit(struct sk_buff *skb, struct net_device *dev,
 
 static int gre_handle_offloads(struct sk_buff *skb, bool csum)
 {
-	if (csum && skb_checksum_start(skb) < skb->data)
-		return -EINVAL;
 	return iptunnel_handle_offloads(skb, csum ? SKB_GSO_GRE_CSUM : SKB_GSO_GRE);
 }
 
@@ -632,15 +630,20 @@ static netdev_tx_t ipgre_xmit(struct sk_buff *skb,
 	}
 
 	if (dev->header_ops) {
+		const int pull_len = tunnel->hlen + sizeof(struct iphdr);
+
 		if (skb_cow_head(skb, 0))
 			goto free_skb;
 
 		tnl_params = (const struct iphdr *)skb->data;
 
+		if (pull_len > skb_transport_offset(skb))
+			goto free_skb;
+
 		/* Pull skb since ip_tunnel_xmit() needs skb->data pointing
 		 * to gre header.
 		 */
-		skb_pull(skb, tunnel->hlen + sizeof(struct iphdr));
+		skb_pull(skb, pull_len);
 		skb_reset_mac_header(skb);
 	} else {
 		if (skb_cow_head(skb, dev->needed_headroom))
diff --git a/net/ipv4/nexthop.c b/net/ipv4/nexthop.c
index 4075230b14c6..75ca4b6e484f 100644
--- a/net/ipv4/nexthop.c
+++ b/net/ipv4/nexthop.c
@@ -2490,6 +2490,7 @@ static int nh_create_ipv4(struct net *net, struct nexthop *nh,
 		.fc_gw4   = cfg->gw.ipv4,
 		.fc_gw_family = cfg->gw.ipv4 ? AF_INET : 0,
 		.fc_flags = cfg->nh_flags,
+		.fc_nlinfo = cfg->nlinfo,
 		.fc_encap = cfg->nh_encap,
 		.fc_encap_type = cfg->nh_encap_type,
 	};
@@ -2528,6 +2529,7 @@ static int nh_create_ipv6(struct net *net,  struct nexthop *nh,
 		.fc_ifindex = cfg->nh_ifindex,
 		.fc_gateway = cfg->gw.ipv6,
 		.fc_flags = cfg->nh_flags,
+		.fc_nlinfo = cfg->nlinfo,
 		.fc_encap = cfg->nh_encap,
 		.fc_encap_type = cfg->nh_encap_type,
 		.fc_is_fdb = cfg->nh_fdb,
diff --git a/net/ipv6/addrconf.c b/net/ipv6/addrconf.c
index 17756f3ed33b..c6a90b7bbb70 100644
--- a/net/ipv6/addrconf.c
+++ b/net/ipv6/addrconf.c
@@ -3092,19 +3092,22 @@ static void add_addr(struct inet6_dev *idev, const struct in6_addr *addr,
 	}
 }
 
-#if IS_ENABLED(CONFIG_IPV6_SIT)
-static void sit_add_v4_addrs(struct inet6_dev *idev)
+#if IS_ENABLED(CONFIG_IPV6_SIT) || IS_ENABLED(CONFIG_NET_IPGRE) || IS_ENABLED(CONFIG_IPV6_GRE)
+static void add_v4_addrs(struct inet6_dev *idev)
 {
 	struct in6_addr addr;
 	struct net_device *dev;
 	struct net *net = dev_net(idev->dev);
-	int scope, plen;
+	int scope, plen, offset = 0;
 	u32 pflags = 0;
 
 	ASSERT_RTNL();
 
 	memset(&addr, 0, sizeof(struct in6_addr));
-	memcpy(&addr.s6_addr32[3], idev->dev->dev_addr, 4);
+	/* in case of IP6GRE the dev_addr is an IPv6 and therefore we use only the last 4 bytes */
+	if (idev->dev->addr_len == sizeof(struct in6_addr))
+		offset = sizeof(struct in6_addr) - 4;
+	memcpy(&addr.s6_addr32[3], idev->dev->dev_addr + offset, 4);
 
 	if (idev->dev->flags&IFF_POINTOPOINT) {
 		addr.s6_addr32[0] = htonl(0xfe800000);
@@ -3342,8 +3345,6 @@ static void addrconf_dev_config(struct net_device *dev)
 	    (dev->type != ARPHRD_IEEE1394) &&
 	    (dev->type != ARPHRD_TUNNEL6) &&
 	    (dev->type != ARPHRD_6LOWPAN) &&
-	    (dev->type != ARPHRD_IP6GRE) &&
-	    (dev->type != ARPHRD_IPGRE) &&
 	    (dev->type != ARPHRD_TUNNEL) &&
 	    (dev->type != ARPHRD_NONE) &&
 	    (dev->type != ARPHRD_RAWIP)) {
@@ -3391,14 +3392,14 @@ static void addrconf_sit_config(struct net_device *dev)
 		return;
 	}
 
-	sit_add_v4_addrs(idev);
+	add_v4_addrs(idev);
 
 	if (dev->flags&IFF_POINTOPOINT)
 		addrconf_add_mroute(dev);
 }
 #endif
 
-#if IS_ENABLED(CONFIG_NET_IPGRE)
+#if IS_ENABLED(CONFIG_NET_IPGRE) || IS_ENABLED(CONFIG_IPV6_GRE)
 static void addrconf_gre_config(struct net_device *dev)
 {
 	struct inet6_dev *idev;
@@ -3411,7 +3412,13 @@ static void addrconf_gre_config(struct net_device *dev)
 		return;
 	}
 
-	addrconf_addr_gen(idev, true);
+	if (dev->type == ARPHRD_ETHER) {
+		addrconf_addr_gen(idev, true);
+		return;
+	}
+
+	add_v4_addrs(idev);
+
 	if (dev->flags & IFF_POINTOPOINT)
 		addrconf_add_mroute(dev);
 }
@@ -3587,7 +3594,8 @@ static int addrconf_notify(struct notifier_block *this, unsigned long event,
 			addrconf_sit_config(dev);
 			break;
 #endif
-#if IS_ENABLED(CONFIG_NET_IPGRE)
+#if IS_ENABLED(CONFIG_NET_IPGRE) || IS_ENABLED(CONFIG_IPV6_GRE)
+		case ARPHRD_IP6GRE:
 		case ARPHRD_IPGRE:
 			addrconf_gre_config(dev);
 			break;
diff --git a/net/ipv6/ip6_gre.c b/net/ipv6/ip6_gre.c
index 7baf41d160f5..3ad201d372d8 100644
--- a/net/ipv6/ip6_gre.c
+++ b/net/ipv6/ip6_gre.c
@@ -629,8 +629,6 @@ drop:
 
 static int gre_handle_offloads(struct sk_buff *skb, bool csum)
 {
-	if (csum && skb_checksum_start(skb) < skb->data)
-		return -EINVAL;
 	return iptunnel_handle_offloads(skb,
 					csum ? SKB_GSO_GRE_CSUM : SKB_GSO_GRE);
 }
diff --git a/net/ipv6/mcast.c b/net/ipv6/mcast.c
index cd951faa2fac..bed8155508c8 100644
--- a/net/ipv6/mcast.c
+++ b/net/ipv6/mcast.c
@@ -1356,8 +1356,8 @@ static int mld_process_v1(struct inet6_dev *idev, struct mld_msg *mld,
 	return 0;
 }
 
-static int mld_process_v2(struct inet6_dev *idev, struct mld2_query *mld,
-			  unsigned long *max_delay)
+static void mld_process_v2(struct inet6_dev *idev, struct mld2_query *mld,
+			   unsigned long *max_delay)
 {
 	*max_delay = max(msecs_to_jiffies(mldv2_mrc(mld)), 1UL);
 
@@ -1367,7 +1367,7 @@ static int mld_process_v2(struct inet6_dev *idev, struct mld2_query *mld,
 
 	idev->mc_maxdelay = *max_delay;
 
-	return 0;
+	return;
 }
 
 /* called with rcu_read_lock() */
@@ -1454,9 +1454,7 @@ static void __mld_query_work(struct sk_buff *skb)
 
 		mlh2 = (struct mld2_query *)skb_transport_header(skb);
 
-		err = mld_process_v2(idev, mlh2, &max_delay);
-		if (err < 0)
-			goto out;
+		mld_process_v2(idev, mlh2, &max_delay);
 
 		if (group_type == IPV6_ADDR_ANY) { /* general query */
 			if (mlh2->mld2q_nsrcs)
diff --git a/net/ipv6/netfilter/nf_socket_ipv6.c b/net/ipv6/netfilter/nf_socket_ipv6.c
index 6fd54744cbc3..aa5bb8789ba0 100644
--- a/net/ipv6/netfilter/nf_socket_ipv6.c
+++ b/net/ipv6/netfilter/nf_socket_ipv6.c
@@ -99,7 +99,7 @@ struct sock *nf_sk_lookup_slow_v6(struct net *net, const struct sk_buff *skb,
 {
 	__be16 dport, sport;
 	const struct in6_addr *daddr = NULL, *saddr = NULL;
-	struct ipv6hdr *iph = ipv6_hdr(skb);
+	struct ipv6hdr *iph = ipv6_hdr(skb), ipv6_var;
 	struct sk_buff *data_skb = NULL;
 	int doff = 0;
 	int thoff = 0, tproto;
@@ -129,8 +129,6 @@ struct sock *nf_sk_lookup_slow_v6(struct net *net, const struct sk_buff *skb,
 			thoff + sizeof(*hp);
 
 	} else if (tproto == IPPROTO_ICMPV6) {
-		struct ipv6hdr ipv6_var;
-
 		if (extract_icmp6_fields(skb, thoff, &tproto, &saddr, &daddr,
 					 &sport, &dport, &ipv6_var))
 			return NULL;
diff --git a/net/ipv6/seg6_iptunnel.c b/net/ipv6/seg6_iptunnel.c
index 1bf5f5ae75ac..3adc5d9211ad 100644
--- a/net/ipv6/seg6_iptunnel.c
+++ b/net/ipv6/seg6_iptunnel.c
@@ -385,7 +385,7 @@ static int seg6_output_core(struct net *net, struct sock *sk,
 	struct dst_entry *orig_dst = skb_dst(skb);
 	struct dst_entry *dst = NULL;
 	struct seg6_lwt *slwt;
-	int err = -EINVAL;
+	int err;
 
 	err = seg6_do_srh(skb);
 	if (unlikely(err))
diff --git a/net/mac802154/iface.c b/net/mac802154/iface.c
index 1cf5ac09edcb..323d3d2d986f 100644
--- a/net/mac802154/iface.c
+++ b/net/mac802154/iface.c
@@ -617,7 +617,7 @@ ieee802154_if_add(struct ieee802154_local *local, const char *name,
 {
 	struct net_device *ndev = NULL;
 	struct ieee802154_sub_if_data *sdata = NULL;
-	int ret = -ENOMEM;
+	int ret;
 
 	ASSERT_RTNL();
 
diff --git a/net/mptcp/pm_netlink.c b/net/mptcp/pm_netlink.c
index 1e4289c507ff..c4f9a5ce3815 100644
--- a/net/mptcp/pm_netlink.c
+++ b/net/mptcp/pm_netlink.c
@@ -644,15 +644,12 @@ void mptcp_pm_nl_addr_send_ack(struct mptcp_sock *msk)
 	subflow = list_first_entry_or_null(&msk->conn_list, typeof(*subflow), node);
 	if (subflow) {
 		struct sock *ssk = mptcp_subflow_tcp_sock(subflow);
-		bool slow;
 
 		spin_unlock_bh(&msk->pm.lock);
 		pr_debug("send ack for %s",
 			 mptcp_pm_should_add_signal(msk) ? "add_addr" : "rm_addr");
 
-		slow = lock_sock_fast(ssk);
-		tcp_send_ack(ssk);
-		unlock_sock_fast(ssk, slow);
+		mptcp_subflow_send_ack(ssk);
 		spin_lock_bh(&msk->pm.lock);
 	}
 }
@@ -669,7 +666,6 @@ int mptcp_pm_nl_mp_prio_send_ack(struct mptcp_sock *msk,
 		struct sock *ssk = mptcp_subflow_tcp_sock(subflow);
 		struct sock *sk = (struct sock *)msk;
 		struct mptcp_addr_info local;
-		bool slow;
 
 		local_address((struct sock_common *)ssk, &local);
 		if (!addresses_equal(&local, addr, addr->port))
@@ -682,9 +678,7 @@ int mptcp_pm_nl_mp_prio_send_ack(struct mptcp_sock *msk,
 
 		spin_unlock_bh(&msk->pm.lock);
 		pr_debug("send ack for mp_prio");
-		slow = lock_sock_fast(ssk);
-		tcp_send_ack(ssk);
-		unlock_sock_fast(ssk, slow);
+		mptcp_subflow_send_ack(ssk);
 		spin_lock_bh(&msk->pm.lock);
 
 		return 0;
diff --git a/net/mptcp/protocol.c b/net/mptcp/protocol.c
index ade648c3512b..2602f1386160 100644
--- a/net/mptcp/protocol.c
+++ b/net/mptcp/protocol.c
@@ -440,19 +440,22 @@ static bool tcp_can_send_ack(const struct sock *ssk)
 	       (TCPF_SYN_SENT | TCPF_SYN_RECV | TCPF_TIME_WAIT | TCPF_CLOSE | TCPF_LISTEN));
 }
 
+void mptcp_subflow_send_ack(struct sock *ssk)
+{
+	bool slow;
+
+	slow = lock_sock_fast(ssk);
+	if (tcp_can_send_ack(ssk))
+		tcp_send_ack(ssk);
+	unlock_sock_fast(ssk, slow);
+}
+
 static void mptcp_send_ack(struct mptcp_sock *msk)
 {
 	struct mptcp_subflow_context *subflow;
 
-	mptcp_for_each_subflow(msk, subflow) {
-		struct sock *ssk = mptcp_subflow_tcp_sock(subflow);
-		bool slow;
-
-		slow = lock_sock_fast(ssk);
-		if (tcp_can_send_ack(ssk))
-			tcp_send_ack(ssk);
-		unlock_sock_fast(ssk, slow);
-	}
+	mptcp_for_each_subflow(msk, subflow)
+		mptcp_subflow_send_ack(mptcp_subflow_tcp_sock(subflow));
 }
 
 static void mptcp_subflow_cleanup_rbuf(struct sock *ssk)
@@ -1003,6 +1006,13 @@ static void mptcp_wmem_uncharge(struct sock *sk, int size)
 	msk->wmem_reserved += size;
 }
 
+static void __mptcp_mem_reclaim_partial(struct sock *sk)
+{
+	lockdep_assert_held_once(&sk->sk_lock.slock);
+	__mptcp_update_wmem(sk);
+	sk_mem_reclaim_partial(sk);
+}
+
 static void mptcp_mem_reclaim_partial(struct sock *sk)
 {
 	struct mptcp_sock *msk = mptcp_sk(sk);
@@ -1094,12 +1104,8 @@ static void __mptcp_clean_una(struct sock *sk)
 		msk->recovery = false;
 
 out:
-	if (cleaned) {
-		if (tcp_under_memory_pressure(sk)) {
-			__mptcp_update_wmem(sk);
-			sk_mem_reclaim_partial(sk);
-		}
-	}
+	if (cleaned && tcp_under_memory_pressure(sk))
+		__mptcp_mem_reclaim_partial(sk);
 
 	if (snd_una == READ_ONCE(msk->snd_nxt) && !msk->recovery) {
 		if (mptcp_timer_pending(sk) && !mptcp_data_fin_enabled(msk))
@@ -1179,6 +1185,7 @@ struct mptcp_sendmsg_info {
 	u16 limit;
 	u16 sent;
 	unsigned int flags;
+	bool data_lock_held;
 };
 
 static int mptcp_check_allowed_size(struct mptcp_sock *msk, u64 data_seq,
@@ -1250,17 +1257,17 @@ static bool __mptcp_alloc_tx_skb(struct sock *sk, struct sock *ssk, gfp_t gfp)
 	return false;
 }
 
-static bool mptcp_must_reclaim_memory(struct sock *sk, struct sock *ssk)
+static bool mptcp_alloc_tx_skb(struct sock *sk, struct sock *ssk, bool data_lock_held)
 {
-	return !ssk->sk_tx_skb_cache &&
-	       tcp_under_memory_pressure(sk);
-}
+	gfp_t gfp = data_lock_held ? GFP_ATOMIC : sk->sk_allocation;
 
-static bool mptcp_alloc_tx_skb(struct sock *sk, struct sock *ssk)
-{
-	if (unlikely(mptcp_must_reclaim_memory(sk, ssk)))
-		mptcp_mem_reclaim_partial(sk);
-	return __mptcp_alloc_tx_skb(sk, ssk, sk->sk_allocation);
+	if (unlikely(tcp_under_memory_pressure(sk))) {
+		if (data_lock_held)
+			__mptcp_mem_reclaim_partial(sk);
+		else
+			mptcp_mem_reclaim_partial(sk);
+	}
+	return __mptcp_alloc_tx_skb(sk, ssk, gfp);
 }
 
 /* note: this always recompute the csum on the whole skb, even
@@ -1284,7 +1291,7 @@ static int mptcp_sendmsg_frag(struct sock *sk, struct sock *ssk,
 	bool zero_window_probe = false;
 	struct mptcp_ext *mpext = NULL;
 	struct sk_buff *skb, *tail;
-	bool can_collapse = false;
+	bool must_collapse = false;
 	int size_bias = 0;
 	int avail_size;
 	size_t ret = 0;
@@ -1304,16 +1311,24 @@ static int mptcp_sendmsg_frag(struct sock *sk, struct sock *ssk,
 		 * SSN association set here
 		 */
 		mpext = skb_ext_find(skb, SKB_EXT_MPTCP);
-		can_collapse = (info->size_goal - skb->len > 0) &&
-			 mptcp_skb_can_collapse_to(data_seq, skb, mpext);
-		if (!can_collapse) {
+		if (!mptcp_skb_can_collapse_to(data_seq, skb, mpext)) {
 			TCP_SKB_CB(skb)->eor = 1;
-		} else {
+			goto alloc_skb;
+		}
+
+		must_collapse = (info->size_goal - skb->len > 0) &&
+				(skb_shinfo(skb)->nr_frags < sysctl_max_skb_frags);
+		if (must_collapse) {
 			size_bias = skb->len;
 			avail_size = info->size_goal - skb->len;
 		}
 	}
 
+alloc_skb:
+	if (!must_collapse && !ssk->sk_tx_skb_cache &&
+	    !mptcp_alloc_tx_skb(sk, ssk, info->data_lock_held))
+		return 0;
+
 	/* Zero window and all data acked? Probe. */
 	avail_size = mptcp_check_allowed_size(msk, data_seq, avail_size);
 	if (avail_size == 0) {
@@ -1343,7 +1358,6 @@ static int mptcp_sendmsg_frag(struct sock *sk, struct sock *ssk,
 	if (skb == tail) {
 		TCP_SKB_CB(tail)->tcp_flags &= ~TCPHDR_PSH;
 		mpext->data_len += ret;
-		WARN_ON_ONCE(!can_collapse);
 		WARN_ON_ONCE(zero_window_probe);
 		goto out;
 	}
@@ -1530,15 +1544,6 @@ void __mptcp_push_pending(struct sock *sk, unsigned int flags)
 			if (ssk != prev_ssk)
 				lock_sock(ssk);
 
-			/* keep it simple and always provide a new skb for the
-			 * subflow, even if we will not use it when collapsing
-			 * on the pending one
-			 */
-			if (!mptcp_alloc_tx_skb(sk, ssk)) {
-				mptcp_push_release(sk, ssk, &info);
-				goto out;
-			}
-
 			ret = mptcp_sendmsg_frag(sk, ssk, dfrag, &info);
 			if (ret <= 0) {
 				mptcp_push_release(sk, ssk, &info);
@@ -1571,7 +1576,9 @@ out:
 static void __mptcp_subflow_push_pending(struct sock *sk, struct sock *ssk)
 {
 	struct mptcp_sock *msk = mptcp_sk(sk);
-	struct mptcp_sendmsg_info info;
+	struct mptcp_sendmsg_info info = {
+		.data_lock_held = true,
+	};
 	struct mptcp_data_frag *dfrag;
 	struct sock *xmit_ssk;
 	int len, copied = 0;
@@ -1597,13 +1604,6 @@ static void __mptcp_subflow_push_pending(struct sock *sk, struct sock *ssk)
 				goto out;
 			}
 
-			if (unlikely(mptcp_must_reclaim_memory(sk, ssk))) {
-				__mptcp_update_wmem(sk);
-				sk_mem_reclaim_partial(sk);
-			}
-			if (!__mptcp_alloc_tx_skb(sk, ssk, GFP_ATOMIC))
-				goto out;
-
 			ret = mptcp_sendmsg_frag(sk, ssk, dfrag, &info);
 			if (ret <= 0)
 				goto out;
@@ -2409,9 +2409,6 @@ static void __mptcp_retrans(struct sock *sk)
 	info.sent = 0;
 	info.limit = READ_ONCE(msk->csum_enabled) ? dfrag->data_len : dfrag->already_sent;
 	while (info.sent < info.limit) {
-		if (!mptcp_alloc_tx_skb(sk, ssk))
-			break;
-
 		ret = mptcp_sendmsg_frag(sk, ssk, dfrag, &info);
 		if (ret <= 0)
 			break;
diff --git a/net/mptcp/protocol.h b/net/mptcp/protocol.h
index d7aba1c4dc48..d3e6fd1615f1 100644
--- a/net/mptcp/protocol.h
+++ b/net/mptcp/protocol.h
@@ -34,7 +34,7 @@
 #define OPTIONS_MPTCP_MPC	(OPTION_MPTCP_MPC_SYN | OPTION_MPTCP_MPC_SYNACK | \
 				 OPTION_MPTCP_MPC_ACK)
 #define OPTIONS_MPTCP_MPJ	(OPTION_MPTCP_MPJ_SYN | OPTION_MPTCP_MPJ_SYNACK | \
-				 OPTION_MPTCP_MPJ_SYNACK)
+				 OPTION_MPTCP_MPJ_ACK)
 
 /* MPTCP option subtypes */
 #define MPTCPOPT_MP_CAPABLE	0
@@ -573,6 +573,7 @@ void __init mptcp_subflow_init(void);
 void mptcp_subflow_shutdown(struct sock *sk, struct sock *ssk, int how);
 void mptcp_close_ssk(struct sock *sk, struct sock *ssk,
 		     struct mptcp_subflow_context *subflow);
+void mptcp_subflow_send_ack(struct sock *ssk);
 void mptcp_subflow_reset(struct sock *ssk);
 void mptcp_sock_graft(struct sock *sk, struct socket *parent);
 struct socket *__mptcp_nmpc_socket(const struct mptcp_sock *msk);
diff --git a/net/ncsi/internal.h b/net/ncsi/internal.h
index 0b6cfd3b31e0..03757e76bb6b 100644
--- a/net/ncsi/internal.h
+++ b/net/ncsi/internal.h
@@ -80,6 +80,7 @@ enum {
 #define NCSI_OEM_MFR_BCM_ID             0x113d
 #define NCSI_OEM_MFR_INTEL_ID           0x157
 /* Intel specific OEM command */
+#define NCSI_OEM_INTEL_CMD_GMA          0x06   /* CMD ID for Get MAC */
 #define NCSI_OEM_INTEL_CMD_KEEP_PHY     0x20   /* CMD ID for Keep PHY up */
 /* Broadcom specific OEM Command */
 #define NCSI_OEM_BCM_CMD_GMA            0x01   /* CMD ID for Get MAC */
@@ -89,6 +90,7 @@ enum {
 #define NCSI_OEM_MLX_CMD_SMAF           0x01   /* CMD ID for Set MC Affinity */
 #define NCSI_OEM_MLX_CMD_SMAF_PARAM     0x07   /* Parameter for SMAF         */
 /* OEM Command payload lengths*/
+#define NCSI_OEM_INTEL_CMD_GMA_LEN      5
 #define NCSI_OEM_INTEL_CMD_KEEP_PHY_LEN 7
 #define NCSI_OEM_BCM_CMD_GMA_LEN        12
 #define NCSI_OEM_MLX_CMD_GMA_LEN        8
@@ -99,6 +101,7 @@ enum {
 /* Mac address offset in OEM response */
 #define BCM_MAC_ADDR_OFFSET             28
 #define MLX_MAC_ADDR_OFFSET             8
+#define INTEL_MAC_ADDR_OFFSET           1
 
 
 struct ncsi_channel_version {
diff --git a/net/ncsi/ncsi-manage.c b/net/ncsi/ncsi-manage.c
index 89c7742cd72e..7121ce2a47c0 100644
--- a/net/ncsi/ncsi-manage.c
+++ b/net/ncsi/ncsi-manage.c
@@ -795,13 +795,36 @@ static int ncsi_oem_smaf_mlx(struct ncsi_cmd_arg *nca)
 	return ret;
 }
 
+static int ncsi_oem_gma_handler_intel(struct ncsi_cmd_arg *nca)
+{
+	unsigned char data[NCSI_OEM_INTEL_CMD_GMA_LEN];
+	int ret = 0;
+
+	nca->payload = NCSI_OEM_INTEL_CMD_GMA_LEN;
+
+	memset(data, 0, NCSI_OEM_INTEL_CMD_GMA_LEN);
+	*(unsigned int *)data = ntohl((__force __be32)NCSI_OEM_MFR_INTEL_ID);
+	data[4] = NCSI_OEM_INTEL_CMD_GMA;
+
+	nca->data = data;
+
+	ret = ncsi_xmit_cmd(nca);
+	if (ret)
+		netdev_err(nca->ndp->ndev.dev,
+			   "NCSI: Failed to transmit cmd 0x%x during configure\n",
+			   nca->type);
+
+	return ret;
+}
+
 /* OEM Command handlers initialization */
 static struct ncsi_oem_gma_handler {
 	unsigned int	mfr_id;
 	int		(*handler)(struct ncsi_cmd_arg *nca);
 } ncsi_oem_gma_handlers[] = {
 	{ NCSI_OEM_MFR_BCM_ID, ncsi_oem_gma_handler_bcm },
-	{ NCSI_OEM_MFR_MLX_ID, ncsi_oem_gma_handler_mlx }
+	{ NCSI_OEM_MFR_MLX_ID, ncsi_oem_gma_handler_mlx },
+	{ NCSI_OEM_MFR_INTEL_ID, ncsi_oem_gma_handler_intel }
 };
 
 static int ncsi_gma_handler(struct ncsi_cmd_arg *nca, unsigned int mf_id)
diff --git a/net/ncsi/ncsi-pkt.h b/net/ncsi/ncsi-pkt.h
index 80938b338fee..ba66c7dc3a21 100644
--- a/net/ncsi/ncsi-pkt.h
+++ b/net/ncsi/ncsi-pkt.h
@@ -178,6 +178,12 @@ struct ncsi_rsp_oem_bcm_pkt {
 	unsigned char           data[];      /* Cmd specific Data */
 };
 
+/* Intel Response Data */
+struct ncsi_rsp_oem_intel_pkt {
+	unsigned char           cmd;         /* OEM Command ID    */
+	unsigned char           data[];      /* Cmd specific Data */
+};
+
 /* Get Link Status */
 struct ncsi_rsp_gls_pkt {
 	struct ncsi_rsp_pkt_hdr rsp;        /* Response header   */
diff --git a/net/ncsi/ncsi-rsp.c b/net/ncsi/ncsi-rsp.c
index d48374894817..6447a09932f5 100644
--- a/net/ncsi/ncsi-rsp.c
+++ b/net/ncsi/ncsi-rsp.c
@@ -699,9 +699,51 @@ static int ncsi_rsp_handler_oem_bcm(struct ncsi_request *nr)
 	return 0;
 }
 
+/* Response handler for Intel command Get Mac Address */
+static int ncsi_rsp_handler_oem_intel_gma(struct ncsi_request *nr)
+{
+	struct ncsi_dev_priv *ndp = nr->ndp;
+	struct net_device *ndev = ndp->ndev.dev;
+	const struct net_device_ops *ops = ndev->netdev_ops;
+	struct ncsi_rsp_oem_pkt *rsp;
+	struct sockaddr saddr;
+	int ret = 0;
+
+	/* Get the response header */
+	rsp = (struct ncsi_rsp_oem_pkt *)skb_network_header(nr->rsp);
+
+	saddr.sa_family = ndev->type;
+	ndev->priv_flags |= IFF_LIVE_ADDR_CHANGE;
+	memcpy(saddr.sa_data, &rsp->data[INTEL_MAC_ADDR_OFFSET], ETH_ALEN);
+	/* Increase mac address by 1 for BMC's address */
+	eth_addr_inc((u8 *)saddr.sa_data);
+	if (!is_valid_ether_addr((const u8 *)saddr.sa_data))
+		return -ENXIO;
+
+	/* Set the flag for GMA command which should only be called once */
+	ndp->gma_flag = 1;
+
+	ret = ops->ndo_set_mac_address(ndev, &saddr);
+	if (ret < 0)
+		netdev_warn(ndev,
+			    "NCSI: 'Writing mac address to device failed\n");
+
+	return ret;
+}
+
 /* Response handler for Intel card */
 static int ncsi_rsp_handler_oem_intel(struct ncsi_request *nr)
 {
+	struct ncsi_rsp_oem_intel_pkt *intel;
+	struct ncsi_rsp_oem_pkt *rsp;
+
+	/* Get the response header */
+	rsp = (struct ncsi_rsp_oem_pkt *)skb_network_header(nr->rsp);
+	intel = (struct ncsi_rsp_oem_intel_pkt *)(rsp->data);
+
+	if (intel->cmd == NCSI_OEM_INTEL_CMD_GMA)
+		return ncsi_rsp_handler_oem_intel_gma(nr);
+
 	return 0;
 }
 
diff --git a/net/netfilter/nf_conntrack_core.c b/net/netfilter/nf_conntrack_core.c
index d31dbccbe7bd..94e18fb9690d 100644
--- a/net/netfilter/nf_conntrack_core.c
+++ b/net/netfilter/nf_conntrack_core.c
@@ -21,7 +21,6 @@
 #include <linux/stddef.h>
 #include <linux/slab.h>
 #include <linux/random.h>
-#include <linux/jhash.h>
 #include <linux/siphash.h>
 #include <linux/err.h>
 #include <linux/percpu.h>
@@ -78,6 +77,8 @@ static __read_mostly bool nf_conntrack_locks_all;
 #define GC_SCAN_INTERVAL	(120u * HZ)
 #define GC_SCAN_MAX_DURATION	msecs_to_jiffies(10)
 
+#define MAX_CHAINLEN	64u
+
 static struct conntrack_gc_work conntrack_gc_work;
 
 void nf_conntrack_lock(spinlock_t *lock) __acquires(lock)
@@ -184,25 +185,31 @@ EXPORT_SYMBOL_GPL(nf_conntrack_htable_size);
 unsigned int nf_conntrack_max __read_mostly;
 EXPORT_SYMBOL_GPL(nf_conntrack_max);
 seqcount_spinlock_t nf_conntrack_generation __read_mostly;
-static unsigned int nf_conntrack_hash_rnd __read_mostly;
+static siphash_key_t nf_conntrack_hash_rnd __read_mostly;
 
 static u32 hash_conntrack_raw(const struct nf_conntrack_tuple *tuple,
 			      const struct net *net)
 {
-	unsigned int n;
-	u32 seed;
+	struct {
+		struct nf_conntrack_man src;
+		union nf_inet_addr dst_addr;
+		u32 net_mix;
+		u16 dport;
+		u16 proto;
+	} __aligned(SIPHASH_ALIGNMENT) combined;
 
 	get_random_once(&nf_conntrack_hash_rnd, sizeof(nf_conntrack_hash_rnd));
 
-	/* The direction must be ignored, so we hash everything up to the
-	 * destination ports (which is a multiple of 4) and treat the last
-	 * three bytes manually.
-	 */
-	seed = nf_conntrack_hash_rnd ^ net_hash_mix(net);
-	n = (sizeof(tuple->src) + sizeof(tuple->dst.u3)) / sizeof(u32);
-	return jhash2((u32 *)tuple, n, seed ^
-		      (((__force __u16)tuple->dst.u.all << 16) |
-		      tuple->dst.protonum));
+	memset(&combined, 0, sizeof(combined));
+
+	/* The direction must be ignored, so handle usable members manually. */
+	combined.src = tuple->src;
+	combined.dst_addr = tuple->dst.u3;
+	combined.net_mix = net_hash_mix(net);
+	combined.dport = (__force __u16)tuple->dst.u.all;
+	combined.proto = tuple->dst.protonum;
+
+	return (u32)siphash(&combined, sizeof(combined), &nf_conntrack_hash_rnd);
 }
 
 static u32 scale_hash(u32 hash)
@@ -835,7 +842,9 @@ nf_conntrack_hash_check_insert(struct nf_conn *ct)
 	unsigned int hash, reply_hash;
 	struct nf_conntrack_tuple_hash *h;
 	struct hlist_nulls_node *n;
+	unsigned int chainlen = 0;
 	unsigned int sequence;
+	int err = -EEXIST;
 
 	zone = nf_ct_zone(ct);
 
@@ -849,15 +858,24 @@ nf_conntrack_hash_check_insert(struct nf_conn *ct)
 	} while (nf_conntrack_double_lock(net, hash, reply_hash, sequence));
 
 	/* See if there's one in the list already, including reverse */
-	hlist_nulls_for_each_entry(h, n, &nf_conntrack_hash[hash], hnnode)
+	hlist_nulls_for_each_entry(h, n, &nf_conntrack_hash[hash], hnnode) {
 		if (nf_ct_key_equal(h, &ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple,
 				    zone, net))
 			goto out;
 
-	hlist_nulls_for_each_entry(h, n, &nf_conntrack_hash[reply_hash], hnnode)
+		if (chainlen++ > MAX_CHAINLEN)
+			goto chaintoolong;
+	}
+
+	chainlen = 0;
+
+	hlist_nulls_for_each_entry(h, n, &nf_conntrack_hash[reply_hash], hnnode) {
 		if (nf_ct_key_equal(h, &ct->tuplehash[IP_CT_DIR_REPLY].tuple,
 				    zone, net))
 			goto out;
+		if (chainlen++ > MAX_CHAINLEN)
+			goto chaintoolong;
+	}
 
 	smp_wmb();
 	/* The caller holds a reference to this object */
@@ -867,11 +885,13 @@ nf_conntrack_hash_check_insert(struct nf_conn *ct)
 	NF_CT_STAT_INC(net, insert);
 	local_bh_enable();
 	return 0;
-
+chaintoolong:
+	NF_CT_STAT_INC(net, chaintoolong);
+	err = -ENOSPC;
 out:
 	nf_conntrack_double_unlock(hash, reply_hash);
 	local_bh_enable();
-	return -EEXIST;
+	return err;
 }
 EXPORT_SYMBOL_GPL(nf_conntrack_hash_check_insert);
 
@@ -1084,6 +1104,7 @@ int
 __nf_conntrack_confirm(struct sk_buff *skb)
 {
 	const struct nf_conntrack_zone *zone;
+	unsigned int chainlen = 0, sequence;
 	unsigned int hash, reply_hash;
 	struct nf_conntrack_tuple_hash *h;
 	struct nf_conn *ct;
@@ -1091,7 +1112,6 @@ __nf_conntrack_confirm(struct sk_buff *skb)
 	struct hlist_nulls_node *n;
 	enum ip_conntrack_info ctinfo;
 	struct net *net;
-	unsigned int sequence;
 	int ret = NF_DROP;
 
 	ct = nf_ct_get(skb, &ctinfo);
@@ -1151,15 +1171,28 @@ __nf_conntrack_confirm(struct sk_buff *skb)
 	/* See if there's one in the list already, including reverse:
 	   NAT could have grabbed it without realizing, since we're
 	   not in the hash.  If there is, we lost race. */
-	hlist_nulls_for_each_entry(h, n, &nf_conntrack_hash[hash], hnnode)
+	hlist_nulls_for_each_entry(h, n, &nf_conntrack_hash[hash], hnnode) {
 		if (nf_ct_key_equal(h, &ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple,
 				    zone, net))
 			goto out;
+		if (chainlen++ > MAX_CHAINLEN)
+			goto chaintoolong;
+	}
 
-	hlist_nulls_for_each_entry(h, n, &nf_conntrack_hash[reply_hash], hnnode)
+	chainlen = 0;
+	hlist_nulls_for_each_entry(h, n, &nf_conntrack_hash[reply_hash], hnnode) {
 		if (nf_ct_key_equal(h, &ct->tuplehash[IP_CT_DIR_REPLY].tuple,
 				    zone, net))
 			goto out;
+		if (chainlen++ > MAX_CHAINLEN) {
+chaintoolong:
+			nf_ct_add_to_dying_list(ct);
+			NF_CT_STAT_INC(net, chaintoolong);
+			NF_CT_STAT_INC(net, insert_failed);
+			ret = NF_DROP;
+			goto dying;
+		}
+	}
 
 	/* Timer relative to confirmation time, not original
 	   setting time, otherwise we'd get timer wrap in
@@ -2594,26 +2627,24 @@ int nf_conntrack_init_start(void)
 		spin_lock_init(&nf_conntrack_locks[i]);
 
 	if (!nf_conntrack_htable_size) {
-		/* Idea from tcp.c: use 1/16384 of memory.
-		 * On i386: 32MB machine has 512 buckets.
-		 * >= 1GB machines have 16384 buckets.
-		 * >= 4GB machines have 65536 buckets.
-		 */
 		nf_conntrack_htable_size
 			= (((nr_pages << PAGE_SHIFT) / 16384)
 			   / sizeof(struct hlist_head));
-		if (nr_pages > (4 * (1024 * 1024 * 1024 / PAGE_SIZE)))
-			nf_conntrack_htable_size = 65536;
+		if (BITS_PER_LONG >= 64 &&
+		    nr_pages > (4 * (1024 * 1024 * 1024 / PAGE_SIZE)))
+			nf_conntrack_htable_size = 262144;
 		else if (nr_pages > (1024 * 1024 * 1024 / PAGE_SIZE))
-			nf_conntrack_htable_size = 16384;
-		if (nf_conntrack_htable_size < 32)
-			nf_conntrack_htable_size = 32;
-
-		/* Use a max. factor of four by default to get the same max as
-		 * with the old struct list_heads. When a table size is given
-		 * we use the old value of 8 to avoid reducing the max.
-		 * entries. */
-		max_factor = 4;
+			nf_conntrack_htable_size = 65536;
+
+		if (nf_conntrack_htable_size < 1024)
+			nf_conntrack_htable_size = 1024;
+		/* Use a max. factor of one by default to keep the average
+		 * hash chain length at 2 entries.  Each entry has to be added
+		 * twice (once for original direction, once for reply).
+		 * When a table size is given we use the old value of 8 to
+		 * avoid implicit reduction of the max entries setting.
+		 */
+		max_factor = 1;
 	}
 
 	nf_conntrack_hash = nf_ct_alloc_hashtable(&nf_conntrack_htable_size, 1);
diff --git a/net/netfilter/nf_conntrack_expect.c b/net/netfilter/nf_conntrack_expect.c
index 1e851bc2e61a..f562eeef4234 100644
--- a/net/netfilter/nf_conntrack_expect.c
+++ b/net/netfilter/nf_conntrack_expect.c
@@ -17,7 +17,7 @@
 #include <linux/err.h>
 #include <linux/percpu.h>
 #include <linux/kernel.h>
-#include <linux/jhash.h>
+#include <linux/siphash.h>
 #include <linux/moduleparam.h>
 #include <linux/export.h>
 #include <net/net_namespace.h>
@@ -41,7 +41,7 @@ EXPORT_SYMBOL_GPL(nf_ct_expect_hash);
 unsigned int nf_ct_expect_max __read_mostly;
 
 static struct kmem_cache *nf_ct_expect_cachep __read_mostly;
-static unsigned int nf_ct_expect_hashrnd __read_mostly;
+static siphash_key_t nf_ct_expect_hashrnd __read_mostly;
 
 /* nf_conntrack_expect helper functions */
 void nf_ct_unlink_expect_report(struct nf_conntrack_expect *exp,
@@ -81,15 +81,26 @@ static void nf_ct_expectation_timed_out(struct timer_list *t)
 
 static unsigned int nf_ct_expect_dst_hash(const struct net *n, const struct nf_conntrack_tuple *tuple)
 {
-	unsigned int hash, seed;
+	struct {
+		union nf_inet_addr dst_addr;
+		u32 net_mix;
+		u16 dport;
+		u8 l3num;
+		u8 protonum;
+	} __aligned(SIPHASH_ALIGNMENT) combined;
+	u32 hash;
 
 	get_random_once(&nf_ct_expect_hashrnd, sizeof(nf_ct_expect_hashrnd));
 
-	seed = nf_ct_expect_hashrnd ^ net_hash_mix(n);
+	memset(&combined, 0, sizeof(combined));
 
-	hash = jhash2(tuple->dst.u3.all, ARRAY_SIZE(tuple->dst.u3.all),
-		      (((tuple->dst.protonum ^ tuple->src.l3num) << 16) |
-		       (__force __u16)tuple->dst.u.all) ^ seed);
+	combined.dst_addr = tuple->dst.u3;
+	combined.net_mix = net_hash_mix(n);
+	combined.dport = (__force __u16)tuple->dst.u.all;
+	combined.l3num = tuple->src.l3num;
+	combined.protonum = tuple->dst.protonum;
+
+	hash = siphash(&combined, sizeof(combined), &nf_ct_expect_hashrnd);
 
 	return reciprocal_scale(hash, nf_ct_expect_hsize);
 }
diff --git a/net/netfilter/nf_conntrack_netlink.c b/net/netfilter/nf_conntrack_netlink.c
index 5f9fc6b94855..f1e5443fe7c7 100644
--- a/net/netfilter/nf_conntrack_netlink.c
+++ b/net/netfilter/nf_conntrack_netlink.c
@@ -2528,7 +2528,9 @@ ctnetlink_ct_stat_cpu_fill_info(struct sk_buff *skb, u32 portid, u32 seq,
 	    nla_put_be32(skb, CTA_STATS_SEARCH_RESTART,
 				htonl(st->search_restart)) ||
 	    nla_put_be32(skb, CTA_STATS_CLASH_RESOLVE,
-				htonl(st->clash_resolve)))
+				htonl(st->clash_resolve)) ||
+	    nla_put_be32(skb, CTA_STATS_CHAIN_TOOLONG,
+			 htonl(st->chaintoolong)))
 		goto nla_put_failure;
 
 	nlmsg_end(skb, nlh);
diff --git a/net/netfilter/nf_conntrack_standalone.c b/net/netfilter/nf_conntrack_standalone.c
index 7e0d956da51d..80f675d884b2 100644
--- a/net/netfilter/nf_conntrack_standalone.c
+++ b/net/netfilter/nf_conntrack_standalone.c
@@ -432,7 +432,7 @@ static int ct_cpu_seq_show(struct seq_file *seq, void *v)
 	unsigned int nr_conntracks;
 
 	if (v == SEQ_START_TOKEN) {
-		seq_puts(seq, "entries  clashres found new invalid ignore delete delete_list insert insert_failed drop early_drop icmp_error  expect_new expect_create expect_delete search_restart\n");
+		seq_puts(seq, "entries  clashres found new invalid ignore delete chainlength insert insert_failed drop early_drop icmp_error  expect_new expect_create expect_delete search_restart\n");
 		return 0;
 	}
 
@@ -447,7 +447,7 @@ static int ct_cpu_seq_show(struct seq_file *seq, void *v)
 		   st->invalid,
 		   0,
 		   0,
-		   0,
+		   st->chaintoolong,
 		   st->insert,
 		   st->insert_failed,
 		   st->drop,
diff --git a/net/netfilter/nf_nat_core.c b/net/netfilter/nf_nat_core.c
index 7de595ead06a..7008961f5cb0 100644
--- a/net/netfilter/nf_nat_core.c
+++ b/net/netfilter/nf_nat_core.c
@@ -13,7 +13,7 @@
 #include <linux/skbuff.h>
 #include <linux/gfp.h>
 #include <net/xfrm.h>
-#include <linux/jhash.h>
+#include <linux/siphash.h>
 #include <linux/rtnetlink.h>
 
 #include <net/netfilter/nf_conntrack.h>
@@ -34,7 +34,7 @@ static unsigned int nat_net_id __read_mostly;
 
 static struct hlist_head *nf_nat_bysource __read_mostly;
 static unsigned int nf_nat_htable_size __read_mostly;
-static unsigned int nf_nat_hash_rnd __read_mostly;
+static siphash_key_t nf_nat_hash_rnd __read_mostly;
 
 struct nf_nat_lookup_hook_priv {
 	struct nf_hook_entries __rcu *entries;
@@ -153,12 +153,22 @@ static unsigned int
 hash_by_src(const struct net *n, const struct nf_conntrack_tuple *tuple)
 {
 	unsigned int hash;
+	struct {
+		struct nf_conntrack_man src;
+		u32 net_mix;
+		u32 protonum;
+	} __aligned(SIPHASH_ALIGNMENT) combined;
 
 	get_random_once(&nf_nat_hash_rnd, sizeof(nf_nat_hash_rnd));
 
+	memset(&combined, 0, sizeof(combined));
+
 	/* Original src, to ensure we map it consistently if poss. */
-	hash = jhash2((u32 *)&tuple->src, sizeof(tuple->src) / sizeof(u32),
-		      tuple->dst.protonum ^ nf_nat_hash_rnd ^ net_hash_mix(n));
+	combined.src = tuple->src;
+	combined.net_mix = net_hash_mix(n);
+	combined.protonum = tuple->dst.protonum;
+
+	hash = siphash(&combined, sizeof(combined), &nf_nat_hash_rnd);
 
 	return reciprocal_scale(hash, nf_nat_htable_size);
 }
diff --git a/net/netfilter/nft_ct.c b/net/netfilter/nft_ct.c
index 337e22d8b40b..99b1de14ff7e 100644
--- a/net/netfilter/nft_ct.c
+++ b/net/netfilter/nft_ct.c
@@ -41,6 +41,7 @@ struct nft_ct_helper_obj  {
 #ifdef CONFIG_NF_CONNTRACK_ZONES
 static DEFINE_PER_CPU(struct nf_conn *, nft_ct_pcpu_template);
 static unsigned int nft_ct_pcpu_template_refcnt __read_mostly;
+static DEFINE_MUTEX(nft_ct_pcpu_mutex);
 #endif
 
 static u64 nft_ct_get_eval_counter(const struct nf_conn_counter *c,
@@ -525,8 +526,10 @@ static void __nft_ct_set_destroy(const struct nft_ctx *ctx, struct nft_ct *priv)
 #endif
 #ifdef CONFIG_NF_CONNTRACK_ZONES
 	case NFT_CT_ZONE:
+		mutex_lock(&nft_ct_pcpu_mutex);
 		if (--nft_ct_pcpu_template_refcnt == 0)
 			nft_ct_tmpl_put_pcpu();
+		mutex_unlock(&nft_ct_pcpu_mutex);
 		break;
 #endif
 	default:
@@ -564,9 +567,13 @@ static int nft_ct_set_init(const struct nft_ctx *ctx,
 #endif
 #ifdef CONFIG_NF_CONNTRACK_ZONES
 	case NFT_CT_ZONE:
-		if (!nft_ct_tmpl_alloc_pcpu())
+		mutex_lock(&nft_ct_pcpu_mutex);
+		if (!nft_ct_tmpl_alloc_pcpu()) {
+			mutex_unlock(&nft_ct_pcpu_mutex);
 			return -ENOMEM;
+		}
 		nft_ct_pcpu_template_refcnt++;
+		mutex_unlock(&nft_ct_pcpu_mutex);
 		len = sizeof(u16);
 		break;
 #endif
diff --git a/net/qrtr/qrtr.c b/net/qrtr/qrtr.c
index 525e3ea063b1..ec2322529727 100644
--- a/net/qrtr/qrtr.c
+++ b/net/qrtr/qrtr.c
@@ -493,7 +493,7 @@ int qrtr_endpoint_post(struct qrtr_endpoint *ep, const void *data, size_t len)
 		goto err;
 	}
 
-	if (!size || size & 3 || len != size + hdrlen)
+	if (!size || len != ALIGN(size, 4) + hdrlen)
 		goto err;
 
 	if (cb->dst_port != QRTR_PORT_CTRL && cb->type != QRTR_TYPE_DATA &&
diff --git a/net/sched/sch_fq_codel.c b/net/sched/sch_fq_codel.c
index c4afdd026f51..bb0cd6d3d2c2 100644
--- a/net/sched/sch_fq_codel.c
+++ b/net/sched/sch_fq_codel.c
@@ -369,6 +369,7 @@ static int fq_codel_change(struct Qdisc *sch, struct nlattr *opt,
 {
 	struct fq_codel_sched_data *q = qdisc_priv(sch);
 	struct nlattr *tb[TCA_FQ_CODEL_MAX + 1];
+	u32 quantum = 0;
 	int err;
 
 	if (!opt)
@@ -386,6 +387,13 @@ static int fq_codel_change(struct Qdisc *sch, struct nlattr *opt,
 		    q->flows_cnt > 65536)
 			return -EINVAL;
 	}
+	if (tb[TCA_FQ_CODEL_QUANTUM]) {
+		quantum = max(256U, nla_get_u32(tb[TCA_FQ_CODEL_QUANTUM]));
+		if (quantum > FQ_CODEL_QUANTUM_MAX) {
+			NL_SET_ERR_MSG(extack, "Invalid quantum");
+			return -EINVAL;
+		}
+	}
 	sch_tree_lock(sch);
 
 	if (tb[TCA_FQ_CODEL_TARGET]) {
@@ -412,8 +420,8 @@ static int fq_codel_change(struct Qdisc *sch, struct nlattr *opt,
 	if (tb[TCA_FQ_CODEL_ECN])
 		q->cparams.ecn = !!nla_get_u32(tb[TCA_FQ_CODEL_ECN]);
 
-	if (tb[TCA_FQ_CODEL_QUANTUM])
-		q->quantum = max(256U, nla_get_u32(tb[TCA_FQ_CODEL_QUANTUM]));
+	if (quantum)
+		q->quantum = quantum;
 
 	if (tb[TCA_FQ_CODEL_DROP_BATCH_SIZE])
 		q->drop_batch_size = max(1U, nla_get_u32(tb[TCA_FQ_CODEL_DROP_BATCH_SIZE]));
diff --git a/net/sunrpc/auth_gss/gss_rpc_upcall.c b/net/sunrpc/auth_gss/gss_rpc_upcall.c
index d1c003a25b0f..61c276bddaf2 100644
--- a/net/sunrpc/auth_gss/gss_rpc_upcall.c
+++ b/net/sunrpc/auth_gss/gss_rpc_upcall.c
@@ -160,7 +160,7 @@ static struct rpc_clnt *get_gssp_clnt(struct sunrpc_net *sn)
 	mutex_lock(&sn->gssp_lock);
 	clnt = sn->gssp_clnt;
 	if (clnt)
-		atomic_inc(&clnt->cl_count);
+		refcount_inc(&clnt->cl_count);
 	mutex_unlock(&sn->gssp_lock);
 	return clnt;
 }
diff --git a/net/sunrpc/auth_gss/svcauth_gss.c b/net/sunrpc/auth_gss/svcauth_gss.c
index 3d685fe328fa..3e776e3dff91 100644
--- a/net/sunrpc/auth_gss/svcauth_gss.c
+++ b/net/sunrpc/auth_gss/svcauth_gss.c
@@ -194,6 +194,8 @@ static void rsi_request(struct cache_detail *cd,
 	qword_addhex(bpp, blen, rsii->in_handle.data, rsii->in_handle.len);
 	qword_addhex(bpp, blen, rsii->in_token.data, rsii->in_token.len);
 	(*bpp)[-1] = '\n';
+	WARN_ONCE(*blen < 0,
+		  "RPCSEC/GSS credential too large - please use gssproxy\n");
 }
 
 static int rsi_parse(struct cache_detail *cd,
@@ -707,11 +709,11 @@ svc_safe_putnetobj(struct kvec *resv, struct xdr_netobj *o)
 /*
  * Verify the checksum on the header and return SVC_OK on success.
  * Otherwise, return SVC_DROP (in the case of a bad sequence number)
- * or return SVC_DENIED and indicate error in authp.
+ * or return SVC_DENIED and indicate error in rqstp->rq_auth_stat.
  */
 static int
 gss_verify_header(struct svc_rqst *rqstp, struct rsc *rsci,
-		  __be32 *rpcstart, struct rpc_gss_wire_cred *gc, __be32 *authp)
+		  __be32 *rpcstart, struct rpc_gss_wire_cred *gc)
 {
 	struct gss_ctx		*ctx_id = rsci->mechctx;
 	struct xdr_buf		rpchdr;
@@ -725,7 +727,7 @@ gss_verify_header(struct svc_rqst *rqstp, struct rsc *rsci,
 	iov.iov_len = (u8 *)argv->iov_base - (u8 *)rpcstart;
 	xdr_buf_from_iov(&iov, &rpchdr);
 
-	*authp = rpc_autherr_badverf;
+	rqstp->rq_auth_stat = rpc_autherr_badverf;
 	if (argv->iov_len < 4)
 		return SVC_DENIED;
 	flavor = svc_getnl(argv);
@@ -737,13 +739,13 @@ gss_verify_header(struct svc_rqst *rqstp, struct rsc *rsci,
 	if (rqstp->rq_deferred) /* skip verification of revisited request */
 		return SVC_OK;
 	if (gss_verify_mic(ctx_id, &rpchdr, &checksum) != GSS_S_COMPLETE) {
-		*authp = rpcsec_gsserr_credproblem;
+		rqstp->rq_auth_stat = rpcsec_gsserr_credproblem;
 		return SVC_DENIED;
 	}
 
 	if (gc->gc_seq > MAXSEQ) {
 		trace_rpcgss_svc_seqno_large(rqstp, gc->gc_seq);
-		*authp = rpcsec_gsserr_ctxproblem;
+		rqstp->rq_auth_stat = rpcsec_gsserr_ctxproblem;
 		return SVC_DENIED;
 	}
 	if (!gss_check_seq_num(rqstp, rsci, gc->gc_seq))
@@ -1038,6 +1040,8 @@ svcauth_gss_set_client(struct svc_rqst *rqstp)
 	struct rpc_gss_wire_cred *gc = &svcdata->clcred;
 	int stat;
 
+	rqstp->rq_auth_stat = rpc_autherr_badcred;
+
 	/*
 	 * A gss export can be specified either by:
 	 * 	export	*(sec=krb5,rw)
@@ -1053,6 +1057,8 @@ svcauth_gss_set_client(struct svc_rqst *rqstp)
 	stat = svcauth_unix_set_client(rqstp);
 	if (stat == SVC_DROP || stat == SVC_CLOSE)
 		return stat;
+
+	rqstp->rq_auth_stat = rpc_auth_ok;
 	return SVC_OK;
 }
 
@@ -1142,7 +1148,7 @@ static void gss_free_in_token_pages(struct gssp_in_token *in_token)
 }
 
 static int gss_read_proxy_verf(struct svc_rqst *rqstp,
-			       struct rpc_gss_wire_cred *gc, __be32 *authp,
+			       struct rpc_gss_wire_cred *gc,
 			       struct xdr_netobj *in_handle,
 			       struct gssp_in_token *in_token)
 {
@@ -1151,7 +1157,7 @@ static int gss_read_proxy_verf(struct svc_rqst *rqstp,
 	int pages, i, res, pgto, pgfrom;
 	size_t inlen, to_offs, from_offs;
 
-	res = gss_read_common_verf(gc, argv, authp, in_handle);
+	res = gss_read_common_verf(gc, argv, &rqstp->rq_auth_stat, in_handle);
 	if (res)
 		return res;
 
@@ -1227,7 +1233,7 @@ gss_write_resv(struct kvec *resv, size_t size_limit,
  * Otherwise, drop the request pending an answer to the upcall.
  */
 static int svcauth_gss_legacy_init(struct svc_rqst *rqstp,
-			struct rpc_gss_wire_cred *gc, __be32 *authp)
+				   struct rpc_gss_wire_cred *gc)
 {
 	struct kvec *argv = &rqstp->rq_arg.head[0];
 	struct kvec *resv = &rqstp->rq_res.head[0];
@@ -1236,7 +1242,7 @@ static int svcauth_gss_legacy_init(struct svc_rqst *rqstp,
 	struct sunrpc_net *sn = net_generic(SVC_NET(rqstp), sunrpc_net_id);
 
 	memset(&rsikey, 0, sizeof(rsikey));
-	ret = gss_read_verf(gc, argv, authp,
+	ret = gss_read_verf(gc, argv, &rqstp->rq_auth_stat,
 			    &rsikey.in_handle, &rsikey.in_token);
 	if (ret)
 		return ret;
@@ -1339,7 +1345,7 @@ out:
 }
 
 static int svcauth_gss_proxy_init(struct svc_rqst *rqstp,
-			struct rpc_gss_wire_cred *gc, __be32 *authp)
+				  struct rpc_gss_wire_cred *gc)
 {
 	struct kvec *resv = &rqstp->rq_res.head[0];
 	struct xdr_netobj cli_handle;
@@ -1351,8 +1357,7 @@ static int svcauth_gss_proxy_init(struct svc_rqst *rqstp,
 	struct sunrpc_net *sn = net_generic(net, sunrpc_net_id);
 
 	memset(&ud, 0, sizeof(ud));
-	ret = gss_read_proxy_verf(rqstp, gc, authp,
-				  &ud.in_handle, &ud.in_token);
+	ret = gss_read_proxy_verf(rqstp, gc, &ud.in_handle, &ud.in_token);
 	if (ret)
 		return ret;
 
@@ -1525,7 +1530,7 @@ static void destroy_use_gss_proxy_proc_entry(struct net *net) {}
  * response here and return SVC_COMPLETE.
  */
 static int
-svcauth_gss_accept(struct svc_rqst *rqstp, __be32 *authp)
+svcauth_gss_accept(struct svc_rqst *rqstp)
 {
 	struct kvec	*argv = &rqstp->rq_arg.head[0];
 	struct kvec	*resv = &rqstp->rq_res.head[0];
@@ -1538,7 +1543,7 @@ svcauth_gss_accept(struct svc_rqst *rqstp, __be32 *authp)
 	int		ret;
 	struct sunrpc_net *sn = net_generic(SVC_NET(rqstp), sunrpc_net_id);
 
-	*authp = rpc_autherr_badcred;
+	rqstp->rq_auth_stat = rpc_autherr_badcred;
 	if (!svcdata)
 		svcdata = kmalloc(sizeof(*svcdata), GFP_KERNEL);
 	if (!svcdata)
@@ -1575,22 +1580,22 @@ svcauth_gss_accept(struct svc_rqst *rqstp, __be32 *authp)
 	if ((gc->gc_proc != RPC_GSS_PROC_DATA) && (rqstp->rq_proc != 0))
 		goto auth_err;
 
-	*authp = rpc_autherr_badverf;
+	rqstp->rq_auth_stat = rpc_autherr_badverf;
 	switch (gc->gc_proc) {
 	case RPC_GSS_PROC_INIT:
 	case RPC_GSS_PROC_CONTINUE_INIT:
 		if (use_gss_proxy(SVC_NET(rqstp)))
-			return svcauth_gss_proxy_init(rqstp, gc, authp);
+			return svcauth_gss_proxy_init(rqstp, gc);
 		else
-			return svcauth_gss_legacy_init(rqstp, gc, authp);
+			return svcauth_gss_legacy_init(rqstp, gc);
 	case RPC_GSS_PROC_DATA:
 	case RPC_GSS_PROC_DESTROY:
 		/* Look up the context, and check the verifier: */
-		*authp = rpcsec_gsserr_credproblem;
+		rqstp->rq_auth_stat = rpcsec_gsserr_credproblem;
 		rsci = gss_svc_searchbyctx(sn->rsc_cache, &gc->gc_ctx);
 		if (!rsci)
 			goto auth_err;
-		switch (gss_verify_header(rqstp, rsci, rpcstart, gc, authp)) {
+		switch (gss_verify_header(rqstp, rsci, rpcstart, gc)) {
 		case SVC_OK:
 			break;
 		case SVC_DENIED:
@@ -1600,7 +1605,7 @@ svcauth_gss_accept(struct svc_rqst *rqstp, __be32 *authp)
 		}
 		break;
 	default:
-		*authp = rpc_autherr_rejectedcred;
+		rqstp->rq_auth_stat = rpc_autherr_rejectedcred;
 		goto auth_err;
 	}
 
@@ -1616,13 +1621,13 @@ svcauth_gss_accept(struct svc_rqst *rqstp, __be32 *authp)
 		svc_putnl(resv, RPC_SUCCESS);
 		goto complete;
 	case RPC_GSS_PROC_DATA:
-		*authp = rpcsec_gsserr_ctxproblem;
+		rqstp->rq_auth_stat = rpcsec_gsserr_ctxproblem;
 		svcdata->verf_start = resv->iov_base + resv->iov_len;
 		if (gss_write_verf(rqstp, rsci->mechctx, gc->gc_seq))
 			goto auth_err;
 		rqstp->rq_cred = rsci->cred;
 		get_group_info(rsci->cred.cr_group_info);
-		*authp = rpc_autherr_badcred;
+		rqstp->rq_auth_stat = rpc_autherr_badcred;
 		switch (gc->gc_svc) {
 		case RPC_GSS_SVC_NONE:
 			break;
diff --git a/net/sunrpc/cache.c b/net/sunrpc/cache.c
index 1a2c1c44bb00..59641803472c 100644
--- a/net/sunrpc/cache.c
+++ b/net/sunrpc/cache.c
@@ -803,7 +803,7 @@ static int cache_request(struct cache_detail *detail,
 
 	detail->cache_request(detail, crq->item, &bp, &len);
 	if (len < 0)
-		return -EAGAIN;
+		return -E2BIG;
 	return PAGE_SIZE - len;
 }
 
diff --git a/net/sunrpc/clnt.c b/net/sunrpc/clnt.c
index 8b4de70e8ead..f056ff931444 100644
--- a/net/sunrpc/clnt.c
+++ b/net/sunrpc/clnt.c
@@ -167,7 +167,7 @@ static int rpc_clnt_skip_event(struct rpc_clnt *clnt, unsigned long event)
 	case RPC_PIPEFS_MOUNT:
 		if (clnt->cl_pipedir_objects.pdh_dentry != NULL)
 			return 1;
-		if (atomic_read(&clnt->cl_count) == 0)
+		if (refcount_read(&clnt->cl_count) == 0)
 			return 1;
 		break;
 	case RPC_PIPEFS_UMOUNT:
@@ -419,7 +419,7 @@ static struct rpc_clnt * rpc_new_client(const struct rpc_create_args *args,
 	clnt->cl_rtt = &clnt->cl_rtt_default;
 	rpc_init_rtt(&clnt->cl_rtt_default, clnt->cl_timeout->to_initval);
 
-	atomic_set(&clnt->cl_count, 1);
+	refcount_set(&clnt->cl_count, 1);
 
 	if (nodename == NULL)
 		nodename = utsname()->nodename;
@@ -431,7 +431,7 @@ static struct rpc_clnt * rpc_new_client(const struct rpc_create_args *args,
 	if (err)
 		goto out_no_path;
 	if (parent)
-		atomic_inc(&parent->cl_count);
+		refcount_inc(&parent->cl_count);
 
 	trace_rpc_clnt_new(clnt, xprt, program->name, args->servername);
 	return clnt;
@@ -918,18 +918,16 @@ rpc_free_client(struct rpc_clnt *clnt)
 static struct rpc_clnt *
 rpc_free_auth(struct rpc_clnt *clnt)
 {
-	if (clnt->cl_auth == NULL)
-		return rpc_free_client(clnt);
-
 	/*
 	 * Note: RPCSEC_GSS may need to send NULL RPC calls in order to
 	 *       release remaining GSS contexts. This mechanism ensures
 	 *       that it can do so safely.
 	 */
-	atomic_inc(&clnt->cl_count);
-	rpcauth_release(clnt->cl_auth);
-	clnt->cl_auth = NULL;
-	if (atomic_dec_and_test(&clnt->cl_count))
+	if (clnt->cl_auth != NULL) {
+		rpcauth_release(clnt->cl_auth);
+		clnt->cl_auth = NULL;
+	}
+	if (refcount_dec_and_test(&clnt->cl_count))
 		return rpc_free_client(clnt);
 	return NULL;
 }
@@ -943,7 +941,7 @@ rpc_release_client(struct rpc_clnt *clnt)
 	do {
 		if (list_empty(&clnt->cl_tasks))
 			wake_up(&destroy_wait);
-		if (!atomic_dec_and_test(&clnt->cl_count))
+		if (refcount_dec_not_one(&clnt->cl_count))
 			break;
 		clnt = rpc_free_auth(clnt);
 	} while (clnt != NULL);
@@ -1082,7 +1080,7 @@ void rpc_task_set_client(struct rpc_task *task, struct rpc_clnt *clnt)
 	if (clnt != NULL) {
 		rpc_task_set_transport(task, clnt);
 		task->tk_client = clnt;
-		atomic_inc(&clnt->cl_count);
+		refcount_inc(&clnt->cl_count);
 		if (clnt->cl_softrtry)
 			task->tk_flags |= RPC_TASK_SOFT;
 		if (clnt->cl_softerr)
@@ -2694,17 +2692,18 @@ static const struct rpc_procinfo rpcproc_null = {
 	.p_decode = rpcproc_decode_null,
 };
 
-static int rpc_ping(struct rpc_clnt *clnt)
+static void
+rpc_null_call_prepare(struct rpc_task *task, void *data)
 {
-	struct rpc_message msg = {
-		.rpc_proc = &rpcproc_null,
-	};
-	int err;
-	err = rpc_call_sync(clnt, &msg, RPC_TASK_SOFT | RPC_TASK_SOFTCONN |
-			    RPC_TASK_NULLCREDS);
-	return err;
+	task->tk_flags &= ~RPC_TASK_NO_RETRANS_TIMEOUT;
+	rpc_call_start(task);
 }
 
+static const struct rpc_call_ops rpc_null_ops = {
+	.rpc_call_prepare = rpc_null_call_prepare,
+	.rpc_call_done = rpc_default_callback,
+};
+
 static
 struct rpc_task *rpc_call_null_helper(struct rpc_clnt *clnt,
 		struct rpc_xprt *xprt, struct rpc_cred *cred, int flags,
@@ -2718,7 +2717,7 @@ struct rpc_task *rpc_call_null_helper(struct rpc_clnt *clnt,
 		.rpc_xprt = xprt,
 		.rpc_message = &msg,
 		.rpc_op_cred = cred,
-		.callback_ops = (ops != NULL) ? ops : &rpc_default_ops,
+		.callback_ops = ops ?: &rpc_null_ops,
 		.callback_data = data,
 		.flags = flags | RPC_TASK_SOFT | RPC_TASK_SOFTCONN |
 			 RPC_TASK_NULLCREDS,
@@ -2733,6 +2732,19 @@ struct rpc_task *rpc_call_null(struct rpc_clnt *clnt, struct rpc_cred *cred, int
 }
 EXPORT_SYMBOL_GPL(rpc_call_null);
 
+static int rpc_ping(struct rpc_clnt *clnt)
+{
+	struct rpc_task	*task;
+	int status;
+
+	task = rpc_call_null_helper(clnt, NULL, NULL, 0, NULL, NULL);
+	if (IS_ERR(task))
+		return PTR_ERR(task);
+	status = task->tk_status;
+	rpc_put_task(task);
+	return status;
+}
+
 struct rpc_cb_add_xprt_calldata {
 	struct rpc_xprt_switch *xps;
 	struct rpc_xprt *xprt;
@@ -2756,6 +2768,7 @@ static void rpc_cb_add_xprt_release(void *calldata)
 }
 
 static const struct rpc_call_ops rpc_cb_add_xprt_call_ops = {
+	.rpc_call_prepare = rpc_null_call_prepare,
 	.rpc_call_done = rpc_cb_add_xprt_done,
 	.rpc_release = rpc_cb_add_xprt_release,
 };
@@ -2774,6 +2787,15 @@ int rpc_clnt_test_and_add_xprt(struct rpc_clnt *clnt,
 	struct rpc_cb_add_xprt_calldata *data;
 	struct rpc_task *task;
 
+	if (xps->xps_nunique_destaddr_xprts + 1 > clnt->cl_max_connect) {
+		rcu_read_lock();
+		pr_warn("SUNRPC: reached max allowed number (%d) did not add "
+			"transport to server: %s\n", clnt->cl_max_connect,
+			rpc_peeraddr2str(clnt, RPC_DISPLAY_ADDR));
+		rcu_read_unlock();
+		return -EINVAL;
+	}
+
 	data = kmalloc(sizeof(*data), GFP_NOFS);
 	if (!data)
 		return -ENOMEM;
@@ -2786,7 +2808,7 @@ int rpc_clnt_test_and_add_xprt(struct rpc_clnt *clnt,
 
 	task = rpc_call_null_helper(clnt, xprt, NULL, RPC_TASK_ASYNC,
 			&rpc_cb_add_xprt_call_ops, data);
-
+	data->xps->xps_nunique_destaddr_xprts++;
 	rpc_put_task(task);
 success:
 	return 1;
diff --git a/net/sunrpc/debugfs.c b/net/sunrpc/debugfs.c
index 827bf3a28178..7dc9cc929bfd 100644
--- a/net/sunrpc/debugfs.c
+++ b/net/sunrpc/debugfs.c
@@ -90,7 +90,7 @@ static int tasks_open(struct inode *inode, struct file *filp)
 		struct seq_file *seq = filp->private_data;
 		struct rpc_clnt *clnt = seq->private = inode->i_private;
 
-		if (!atomic_inc_not_zero(&clnt->cl_count)) {
+		if (!refcount_inc_not_zero(&clnt->cl_count)) {
 			seq_release(inode, filp);
 			ret = -EINVAL;
 		}
diff --git a/net/sunrpc/rpc_pipe.c b/net/sunrpc/rpc_pipe.c
index 09c000d490a1..ee5336d73fdd 100644
--- a/net/sunrpc/rpc_pipe.c
+++ b/net/sunrpc/rpc_pipe.c
@@ -423,7 +423,7 @@ rpc_info_open(struct inode *inode, struct file *file)
 		spin_lock(&file->f_path.dentry->d_lock);
 		if (!d_unhashed(file->f_path.dentry))
 			clnt = RPC_I(inode)->private;
-		if (clnt != NULL && atomic_inc_not_zero(&clnt->cl_count)) {
+		if (clnt != NULL && refcount_inc_not_zero(&clnt->cl_count)) {
 			spin_unlock(&file->f_path.dentry->d_lock);
 			m->private = clnt;
 		} else {
diff --git a/net/sunrpc/svc.c b/net/sunrpc/svc.c
index bfcbaf7b3822..a3bbe5ce4570 100644
--- a/net/sunrpc/svc.c
+++ b/net/sunrpc/svc.c
@@ -1186,22 +1186,6 @@ void svc_printk(struct svc_rqst *rqstp, const char *fmt, ...)
 static __printf(2,3) void svc_printk(struct svc_rqst *rqstp, const char *fmt, ...) {}
 #endif
 
-__be32
-svc_return_autherr(struct svc_rqst *rqstp, __be32 auth_err)
-{
-	set_bit(RQ_AUTHERR, &rqstp->rq_flags);
-	return auth_err;
-}
-EXPORT_SYMBOL_GPL(svc_return_autherr);
-
-static __be32
-svc_get_autherr(struct svc_rqst *rqstp, __be32 *statp)
-{
-	if (test_and_clear_bit(RQ_AUTHERR, &rqstp->rq_flags))
-		return *statp;
-	return rpc_auth_ok;
-}
-
 static int
 svc_generic_dispatch(struct svc_rqst *rqstp, __be32 *statp)
 {
@@ -1225,7 +1209,7 @@ svc_generic_dispatch(struct svc_rqst *rqstp, __be32 *statp)
 	    test_bit(RQ_DROPME, &rqstp->rq_flags))
 		return 0;
 
-	if (test_bit(RQ_AUTHERR, &rqstp->rq_flags))
+	if (rqstp->rq_auth_stat != rpc_auth_ok)
 		return 1;
 
 	if (*statp != rpc_success)
@@ -1306,7 +1290,7 @@ svc_process_common(struct svc_rqst *rqstp, struct kvec *argv, struct kvec *resv)
 	struct svc_process_info process;
 	__be32			*statp;
 	u32			prog, vers;
-	__be32			auth_stat, rpc_stat;
+	__be32			rpc_stat;
 	int			auth_res;
 	__be32			*reply_statp;
 
@@ -1349,14 +1333,12 @@ svc_process_common(struct svc_rqst *rqstp, struct kvec *argv, struct kvec *resv)
 	 * We do this before anything else in order to get a decent
 	 * auth verifier.
 	 */
-	auth_res = svc_authenticate(rqstp, &auth_stat);
+	auth_res = svc_authenticate(rqstp);
 	/* Also give the program a chance to reject this call: */
-	if (auth_res == SVC_OK && progp) {
-		auth_stat = rpc_autherr_badcred;
+	if (auth_res == SVC_OK && progp)
 		auth_res = progp->pg_authenticate(rqstp);
-	}
 	if (auth_res != SVC_OK)
-		trace_svc_authenticate(rqstp, auth_res, auth_stat);
+		trace_svc_authenticate(rqstp, auth_res);
 	switch (auth_res) {
 	case SVC_OK:
 		break;
@@ -1415,15 +1397,15 @@ svc_process_common(struct svc_rqst *rqstp, struct kvec *argv, struct kvec *resv)
 			goto release_dropit;
 		if (*statp == rpc_garbage_args)
 			goto err_garbage;
-		auth_stat = svc_get_autherr(rqstp, statp);
-		if (auth_stat != rpc_auth_ok)
-			goto err_release_bad_auth;
 	} else {
 		dprintk("svc: calling dispatcher\n");
 		if (!process.dispatch(rqstp, statp))
 			goto release_dropit; /* Release reply info */
 	}
 
+	if (rqstp->rq_auth_stat != rpc_auth_ok)
+		goto err_release_bad_auth;
+
 	/* Check RPC status result */
 	if (*statp != rpc_success)
 		resv->iov_len = ((void*)statp)  - resv->iov_base + 4;
@@ -1473,13 +1455,14 @@ err_release_bad_auth:
 	if (procp->pc_release)
 		procp->pc_release(rqstp);
 err_bad_auth:
-	dprintk("svc: authentication failed (%d)\n", ntohl(auth_stat));
+	dprintk("svc: authentication failed (%d)\n",
+		be32_to_cpu(rqstp->rq_auth_stat));
 	serv->sv_stats->rpcbadauth++;
 	/* Restore write pointer to location of accept status: */
 	xdr_ressize_check(rqstp, reply_statp);
 	svc_putnl(resv, 1);	/* REJECT */
 	svc_putnl(resv, 1);	/* AUTH_ERROR */
-	svc_putnl(resv, ntohl(auth_stat));	/* status */
+	svc_putu32(resv, rqstp->rq_auth_stat);	/* status */
 	goto sendit;
 
 err_bad_prog:
diff --git a/net/sunrpc/svc_xprt.c b/net/sunrpc/svc_xprt.c
index e1153cba9cc6..6316bd2b8f37 100644
--- a/net/sunrpc/svc_xprt.c
+++ b/net/sunrpc/svc_xprt.c
@@ -663,7 +663,7 @@ static int svc_alloc_arg(struct svc_rqst *rqstp)
 {
 	struct svc_serv *serv = rqstp->rq_server;
 	struct xdr_buf *arg = &rqstp->rq_arg;
-	unsigned long pages, filled;
+	unsigned long pages, filled, ret;
 
 	pagevec_init(&rqstp->rq_pvec);
 
@@ -675,11 +675,12 @@ static int svc_alloc_arg(struct svc_rqst *rqstp)
 		pages = RPCSVC_MAXPAGES;
 	}
 
-	for (;;) {
-		filled = alloc_pages_bulk_array(GFP_KERNEL, pages,
-						rqstp->rq_pages);
-		if (filled == pages)
-			break;
+	for (filled = 0; filled < pages; filled = ret) {
+		ret = alloc_pages_bulk_array(GFP_KERNEL, pages,
+					     rqstp->rq_pages);
+		if (ret > filled)
+			/* Made progress, don't sleep yet */
+			continue;
 
 		set_current_state(TASK_INTERRUPTIBLE);
 		if (signalled() || kthread_should_stop()) {
diff --git a/net/sunrpc/svcauth.c b/net/sunrpc/svcauth.c
index 998b196b6176..5a8b8e03fdd4 100644
--- a/net/sunrpc/svcauth.c
+++ b/net/sunrpc/svcauth.c
@@ -59,12 +59,12 @@ svc_put_auth_ops(struct auth_ops *aops)
 }
 
 int
-svc_authenticate(struct svc_rqst *rqstp, __be32 *authp)
+svc_authenticate(struct svc_rqst *rqstp)
 {
 	rpc_authflavor_t	flavor;
 	struct auth_ops		*aops;
 
-	*authp = rpc_auth_ok;
+	rqstp->rq_auth_stat = rpc_auth_ok;
 
 	flavor = svc_getnl(&rqstp->rq_arg.head[0]);
 
@@ -72,7 +72,7 @@ svc_authenticate(struct svc_rqst *rqstp, __be32 *authp)
 
 	aops = svc_get_auth_ops(flavor);
 	if (aops == NULL) {
-		*authp = rpc_autherr_badcred;
+		rqstp->rq_auth_stat = rpc_autherr_badcred;
 		return SVC_DENIED;
 	}
 
@@ -80,7 +80,7 @@ svc_authenticate(struct svc_rqst *rqstp, __be32 *authp)
 	init_svc_cred(&rqstp->rq_cred);
 
 	rqstp->rq_authop = aops;
-	return aops->accept(rqstp, authp);
+	return aops->accept(rqstp);
 }
 EXPORT_SYMBOL_GPL(svc_authenticate);
 
diff --git a/net/sunrpc/svcauth_unix.c b/net/sunrpc/svcauth_unix.c
index 35b7966ac3b3..d7ed7d49115a 100644
--- a/net/sunrpc/svcauth_unix.c
+++ b/net/sunrpc/svcauth_unix.c
@@ -681,8 +681,9 @@ svcauth_unix_set_client(struct svc_rqst *rqstp)
 
 	rqstp->rq_client = NULL;
 	if (rqstp->rq_proc == 0)
-		return SVC_OK;
+		goto out;
 
+	rqstp->rq_auth_stat = rpc_autherr_badcred;
 	ipm = ip_map_cached_get(xprt);
 	if (ipm == NULL)
 		ipm = __ip_map_lookup(sn->ip_map_cache, rqstp->rq_server->sv_program->pg_class,
@@ -719,13 +720,16 @@ svcauth_unix_set_client(struct svc_rqst *rqstp)
 		put_group_info(cred->cr_group_info);
 		cred->cr_group_info = gi;
 	}
+
+out:
+	rqstp->rq_auth_stat = rpc_auth_ok;
 	return SVC_OK;
 }
 
 EXPORT_SYMBOL_GPL(svcauth_unix_set_client);
 
 static int
-svcauth_null_accept(struct svc_rqst *rqstp, __be32 *authp)
+svcauth_null_accept(struct svc_rqst *rqstp)
 {
 	struct kvec	*argv = &rqstp->rq_arg.head[0];
 	struct kvec	*resv = &rqstp->rq_res.head[0];
@@ -736,12 +740,12 @@ svcauth_null_accept(struct svc_rqst *rqstp, __be32 *authp)
 
 	if (svc_getu32(argv) != 0) {
 		dprintk("svc: bad null cred\n");
-		*authp = rpc_autherr_badcred;
+		rqstp->rq_auth_stat = rpc_autherr_badcred;
 		return SVC_DENIED;
 	}
 	if (svc_getu32(argv) != htonl(RPC_AUTH_NULL) || svc_getu32(argv) != 0) {
 		dprintk("svc: bad null verf\n");
-		*authp = rpc_autherr_badverf;
+		rqstp->rq_auth_stat = rpc_autherr_badverf;
 		return SVC_DENIED;
 	}
 
@@ -785,7 +789,7 @@ struct auth_ops svcauth_null = {
 
 
 static int
-svcauth_unix_accept(struct svc_rqst *rqstp, __be32 *authp)
+svcauth_unix_accept(struct svc_rqst *rqstp)
 {
 	struct kvec	*argv = &rqstp->rq_arg.head[0];
 	struct kvec	*resv = &rqstp->rq_res.head[0];
@@ -827,7 +831,7 @@ svcauth_unix_accept(struct svc_rqst *rqstp, __be32 *authp)
 	}
 	groups_sort(cred->cr_group_info);
 	if (svc_getu32(argv) != htonl(RPC_AUTH_NULL) || svc_getu32(argv) != 0) {
-		*authp = rpc_autherr_badverf;
+		rqstp->rq_auth_stat = rpc_autherr_badverf;
 		return SVC_DENIED;
 	}
 
@@ -839,7 +843,7 @@ svcauth_unix_accept(struct svc_rqst *rqstp, __be32 *authp)
 	return SVC_OK;
 
 badcred:
-	*authp = rpc_autherr_badcred;
+	rqstp->rq_auth_stat = rpc_autherr_badcred;
 	return SVC_DENIED;
 }
 
diff --git a/net/sunrpc/sysfs.c b/net/sunrpc/sysfs.c
index 64da3bfd28e6..9a6f17e18f73 100644
--- a/net/sunrpc/sysfs.c
+++ b/net/sunrpc/sysfs.c
@@ -100,6 +100,28 @@ static ssize_t rpc_sysfs_xprt_dstaddr_show(struct kobject *kobj,
 	return ret + 1;
 }
 
+static ssize_t rpc_sysfs_xprt_srcaddr_show(struct kobject *kobj,
+					   struct kobj_attribute *attr,
+					   char *buf)
+{
+	struct rpc_xprt *xprt = rpc_sysfs_xprt_kobj_get_xprt(kobj);
+	struct sockaddr_storage saddr;
+	struct sock_xprt *sock;
+	ssize_t ret = -1;
+
+	if (!xprt)
+		return 0;
+
+	sock = container_of(xprt, struct sock_xprt, xprt);
+	if (kernel_getsockname(sock->sock, (struct sockaddr *)&saddr) < 0)
+		goto out;
+
+	ret = sprintf(buf, "%pISc\n", &saddr);
+out:
+	xprt_put(xprt);
+	return ret + 1;
+}
+
 static ssize_t rpc_sysfs_xprt_info_show(struct kobject *kobj,
 					struct kobj_attribute *attr,
 					char *buf)
@@ -114,14 +136,16 @@ static ssize_t rpc_sysfs_xprt_info_show(struct kobject *kobj,
 		       "max_num_slots=%u\nmin_num_slots=%u\nnum_reqs=%u\n"
 		       "binding_q_len=%u\nsending_q_len=%u\npending_q_len=%u\n"
 		       "backlog_q_len=%u\nmain_xprt=%d\nsrc_port=%u\n"
-		       "tasks_queuelen=%ld\n",
+		       "tasks_queuelen=%ld\ndst_port=%s\n",
 		       xprt->last_used, xprt->cong, xprt->cwnd, xprt->max_reqs,
 		       xprt->min_reqs, xprt->num_reqs, xprt->binding.qlen,
 		       xprt->sending.qlen, xprt->pending.qlen,
 		       xprt->backlog.qlen, xprt->main,
 		       (xprt->xprt_class->ident == XPRT_TRANSPORT_TCP) ?
 		       get_srcport(xprt) : 0,
-		       atomic_long_read(&xprt->queuelen));
+		       atomic_long_read(&xprt->queuelen),
+		       (xprt->xprt_class->ident == XPRT_TRANSPORT_TCP) ?
+				xprt->address_strings[RPC_DISPLAY_PORT] : "0");
 	xprt_put(xprt);
 	return ret + 1;
 }
@@ -183,8 +207,10 @@ static ssize_t rpc_sysfs_xprt_switch_info_show(struct kobject *kobj,
 
 	if (!xprt_switch)
 		return 0;
-	ret = sprintf(buf, "num_xprts=%u\nnum_active=%u\nqueue_len=%ld\n",
+	ret = sprintf(buf, "num_xprts=%u\nnum_active=%u\n"
+		      "num_unique_destaddr=%u\nqueue_len=%ld\n",
 		      xprt_switch->xps_nxprts, xprt_switch->xps_nactive,
+		      xprt_switch->xps_nunique_destaddr_xprts,
 		      atomic_long_read(&xprt_switch->xps_queuelen));
 	xprt_switch_put(xprt_switch);
 	return ret + 1;
@@ -376,6 +402,9 @@ static const void *rpc_sysfs_xprt_namespace(struct kobject *kobj)
 static struct kobj_attribute rpc_sysfs_xprt_dstaddr = __ATTR(dstaddr,
 	0644, rpc_sysfs_xprt_dstaddr_show, rpc_sysfs_xprt_dstaddr_store);
 
+static struct kobj_attribute rpc_sysfs_xprt_srcaddr = __ATTR(srcaddr,
+	0644, rpc_sysfs_xprt_srcaddr_show, NULL);
+
 static struct kobj_attribute rpc_sysfs_xprt_info = __ATTR(xprt_info,
 	0444, rpc_sysfs_xprt_info_show, NULL);
 
@@ -384,6 +413,7 @@ static struct kobj_attribute rpc_sysfs_xprt_change_state = __ATTR(xprt_state,
 
 static struct attribute *rpc_sysfs_xprt_attrs[] = {
 	&rpc_sysfs_xprt_dstaddr.attr,
+	&rpc_sysfs_xprt_srcaddr.attr,
 	&rpc_sysfs_xprt_info.attr,
 	&rpc_sysfs_xprt_change_state.attr,
 	NULL,
diff --git a/net/sunrpc/xprt.c b/net/sunrpc/xprt.c
index 05abe344a269..cfd681700d1a 100644
--- a/net/sunrpc/xprt.c
+++ b/net/sunrpc/xprt.c
@@ -762,6 +762,20 @@ void xprt_disconnect_done(struct rpc_xprt *xprt)
 EXPORT_SYMBOL_GPL(xprt_disconnect_done);
 
 /**
+ * xprt_schedule_autoclose_locked - Try to schedule an autoclose RPC call
+ * @xprt: transport to disconnect
+ */
+static void xprt_schedule_autoclose_locked(struct rpc_xprt *xprt)
+{
+	set_bit(XPRT_CLOSE_WAIT, &xprt->state);
+	if (test_and_set_bit(XPRT_LOCKED, &xprt->state) == 0)
+		queue_work(xprtiod_workqueue, &xprt->task_cleanup);
+	else if (xprt->snd_task && !test_bit(XPRT_SND_IS_COOKIE, &xprt->state))
+		rpc_wake_up_queued_task_set_status(&xprt->pending,
+						   xprt->snd_task, -ENOTCONN);
+}
+
+/**
  * xprt_force_disconnect - force a transport to disconnect
  * @xprt: transport to disconnect
  *
@@ -772,13 +786,7 @@ void xprt_force_disconnect(struct rpc_xprt *xprt)
 
 	/* Don't race with the test_bit() in xprt_clear_locked() */
 	spin_lock(&xprt->transport_lock);
-	set_bit(XPRT_CLOSE_WAIT, &xprt->state);
-	/* Try to schedule an autoclose RPC call */
-	if (test_and_set_bit(XPRT_LOCKED, &xprt->state) == 0)
-		queue_work(xprtiod_workqueue, &xprt->task_cleanup);
-	else if (xprt->snd_task)
-		rpc_wake_up_queued_task_set_status(&xprt->pending,
-				xprt->snd_task, -ENOTCONN);
+	xprt_schedule_autoclose_locked(xprt);
 	spin_unlock(&xprt->transport_lock);
 }
 EXPORT_SYMBOL_GPL(xprt_force_disconnect);
@@ -818,11 +826,7 @@ void xprt_conditional_disconnect(struct rpc_xprt *xprt, unsigned int cookie)
 		goto out;
 	if (test_bit(XPRT_CLOSING, &xprt->state))
 		goto out;
-	set_bit(XPRT_CLOSE_WAIT, &xprt->state);
-	/* Try to schedule an autoclose RPC call */
-	if (test_and_set_bit(XPRT_LOCKED, &xprt->state) == 0)
-		queue_work(xprtiod_workqueue, &xprt->task_cleanup);
-	xprt_wake_pending_tasks(xprt, -EAGAIN);
+	xprt_schedule_autoclose_locked(xprt);
 out:
 	spin_unlock(&xprt->transport_lock);
 }
@@ -880,12 +884,14 @@ bool xprt_lock_connect(struct rpc_xprt *xprt,
 		goto out;
 	if (xprt->snd_task != task)
 		goto out;
+	set_bit(XPRT_SND_IS_COOKIE, &xprt->state);
 	xprt->snd_task = cookie;
 	ret = true;
 out:
 	spin_unlock(&xprt->transport_lock);
 	return ret;
 }
+EXPORT_SYMBOL_GPL(xprt_lock_connect);
 
 void xprt_unlock_connect(struct rpc_xprt *xprt, void *cookie)
 {
@@ -895,12 +901,14 @@ void xprt_unlock_connect(struct rpc_xprt *xprt, void *cookie)
 	if (!test_bit(XPRT_LOCKED, &xprt->state))
 		goto out;
 	xprt->snd_task =NULL;
+	clear_bit(XPRT_SND_IS_COOKIE, &xprt->state);
 	xprt->ops->release_xprt(xprt, NULL);
 	xprt_schedule_autodisconnect(xprt);
 out:
 	spin_unlock(&xprt->transport_lock);
 	wake_up_bit(&xprt->state, XPRT_LOCKED);
 }
+EXPORT_SYMBOL_GPL(xprt_unlock_connect);
 
 /**
  * xprt_connect - schedule a transport connect operation
diff --git a/net/sunrpc/xprtmultipath.c b/net/sunrpc/xprtmultipath.c
index c60820e45082..1693f81aae37 100644
--- a/net/sunrpc/xprtmultipath.c
+++ b/net/sunrpc/xprtmultipath.c
@@ -139,6 +139,7 @@ struct rpc_xprt_switch *xprt_switch_alloc(struct rpc_xprt *xprt,
 		xps->xps_iter_ops = &rpc_xprt_iter_singular;
 		rpc_sysfs_xprt_switch_setup(xps, xprt, gfp_flags);
 		xprt_switch_add_xprt_locked(xps, xprt);
+		xps->xps_nunique_destaddr_xprts = 1;
 		rpc_sysfs_xprt_setup(xps, xprt, gfp_flags);
 	}
 
diff --git a/net/sunrpc/xprtrdma/backchannel.c b/net/sunrpc/xprtrdma/backchannel.c
index 1151efd09b27..17f174d6ea3b 100644
--- a/net/sunrpc/xprtrdma/backchannel.c
+++ b/net/sunrpc/xprtrdma/backchannel.c
@@ -115,7 +115,7 @@ int xprt_rdma_bc_send_reply(struct rpc_rqst *rqst)
 	if (rc < 0)
 		goto failed_marshal;
 
-	if (rpcrdma_post_sends(r_xprt, req))
+	if (frwr_send(r_xprt, req))
 		goto drop_connection;
 	return 0;
 
diff --git a/net/sunrpc/xprtrdma/frwr_ops.c b/net/sunrpc/xprtrdma/frwr_ops.c
index 229fcc9a9064..f700b34a5bfd 100644
--- a/net/sunrpc/xprtrdma/frwr_ops.c
+++ b/net/sunrpc/xprtrdma/frwr_ops.c
@@ -394,6 +394,7 @@ int frwr_send(struct rpcrdma_xprt *r_xprt, struct rpcrdma_req *req)
 	struct rpcrdma_ep *ep = r_xprt->rx_ep;
 	struct rpcrdma_mr *mr;
 	unsigned int num_wrs;
+	int ret;
 
 	num_wrs = 1;
 	post_wr = send_wr;
@@ -420,7 +421,10 @@ int frwr_send(struct rpcrdma_xprt *r_xprt, struct rpcrdma_req *req)
 	}
 
 	trace_xprtrdma_post_send(req);
-	return ib_post_send(ep->re_id->qp, post_wr, NULL);
+	ret = ib_post_send(ep->re_id->qp, post_wr, NULL);
+	if (ret)
+		trace_xprtrdma_post_send_err(r_xprt, req, ret);
+	return ret;
 }
 
 /**
@@ -557,6 +561,10 @@ void frwr_unmap_sync(struct rpcrdma_xprt *r_xprt, struct rpcrdma_req *req)
 
 	/* On error, the MRs get destroyed once the QP has drained. */
 	trace_xprtrdma_post_linv_err(req, rc);
+
+	/* Force a connection loss to ensure complete recovery.
+	 */
+	rpcrdma_force_disconnect(ep);
 }
 
 /**
@@ -653,4 +661,8 @@ void frwr_unmap_async(struct rpcrdma_xprt *r_xprt, struct rpcrdma_req *req)
 	 * retransmission.
 	 */
 	rpcrdma_unpin_rqst(req->rl_reply);
+
+	/* Force a connection loss to ensure complete recovery.
+	 */
+	rpcrdma_force_disconnect(ep);
 }
diff --git a/net/sunrpc/xprtrdma/transport.c b/net/sunrpc/xprtrdma/transport.c
index 9c2ffc67c0fd..16e5696314a4 100644
--- a/net/sunrpc/xprtrdma/transport.c
+++ b/net/sunrpc/xprtrdma/transport.c
@@ -250,12 +250,9 @@ xprt_rdma_connect_worker(struct work_struct *work)
 					   xprt->stat.connect_start;
 		xprt_set_connected(xprt);
 		rc = -EAGAIN;
-	} else {
-		/* Force a call to xprt_rdma_close to clean up */
-		spin_lock(&xprt->transport_lock);
-		set_bit(XPRT_CLOSE_WAIT, &xprt->state);
-		spin_unlock(&xprt->transport_lock);
-	}
+	} else
+		rpcrdma_xprt_disconnect(r_xprt);
+	xprt_unlock_connect(xprt, r_xprt);
 	xprt_wake_pending_tasks(xprt, rc);
 }
 
@@ -489,6 +486,8 @@ xprt_rdma_connect(struct rpc_xprt *xprt, struct rpc_task *task)
 	struct rpcrdma_ep *ep = r_xprt->rx_ep;
 	unsigned long delay;
 
+	WARN_ON_ONCE(!xprt_lock_connect(xprt, task, r_xprt));
+
 	delay = 0;
 	if (ep && ep->re_connect_status != 0) {
 		delay = xprt_reconnect_delay(xprt);
@@ -661,7 +660,7 @@ xprt_rdma_send_request(struct rpc_rqst *rqst)
 		goto drop_connection;
 	rqst->rq_xtime = ktime_get();
 
-	if (rpcrdma_post_sends(r_xprt, req))
+	if (frwr_send(r_xprt, req))
 		goto drop_connection;
 
 	rqst->rq_xmit_bytes_sent += rqst->rq_snd_buf.len;
diff --git a/net/sunrpc/xprtrdma/verbs.c b/net/sunrpc/xprtrdma/verbs.c
index 649c23518ec0..aaec3c9be8db 100644
--- a/net/sunrpc/xprtrdma/verbs.c
+++ b/net/sunrpc/xprtrdma/verbs.c
@@ -124,7 +124,7 @@ static void rpcrdma_xprt_drain(struct rpcrdma_xprt *r_xprt)
  * connection is closed or lost. (The important thing is it needs
  * to be invoked "at least" once).
  */
-static void rpcrdma_force_disconnect(struct rpcrdma_ep *ep)
+void rpcrdma_force_disconnect(struct rpcrdma_ep *ep)
 {
 	if (atomic_add_unless(&ep->re_force_disconnect, 1, 1))
 		xprt_force_disconnect(ep->re_xprt);
@@ -1350,21 +1350,6 @@ static void rpcrdma_regbuf_free(struct rpcrdma_regbuf *rb)
 }
 
 /**
- * rpcrdma_post_sends - Post WRs to a transport's Send Queue
- * @r_xprt: controlling transport instance
- * @req: rpcrdma_req containing the Send WR to post
- *
- * Returns 0 if the post was successful, otherwise -ENOTCONN
- * is returned.
- */
-int rpcrdma_post_sends(struct rpcrdma_xprt *r_xprt, struct rpcrdma_req *req)
-{
-	if (frwr_send(r_xprt, req))
-		return -ENOTCONN;
-	return 0;
-}
-
-/**
  * rpcrdma_post_recvs - Refill the Receive Queue
  * @r_xprt: controlling transport instance
  * @needed: current credit grant
@@ -1416,12 +1401,8 @@ void rpcrdma_post_recvs(struct rpcrdma_xprt *r_xprt, int needed, bool temp)
 
 	rc = ib_post_recv(ep->re_id->qp, wr,
 			  (const struct ib_recv_wr **)&bad_wr);
-	if (atomic_dec_return(&ep->re_receiving) > 0)
-		complete(&ep->re_done);
-
-out:
-	trace_xprtrdma_post_recvs(r_xprt, count, rc);
 	if (rc) {
+		trace_xprtrdma_post_recvs_err(r_xprt, rc);
 		for (wr = bad_wr; wr;) {
 			struct rpcrdma_rep *rep;
 
@@ -1431,6 +1412,11 @@ out:
 			--count;
 		}
 	}
+	if (atomic_dec_return(&ep->re_receiving) > 0)
+		complete(&ep->re_done);
+
+out:
+	trace_xprtrdma_post_recvs(r_xprt, count);
 	ep->re_receive_count += count;
 	return;
 }
diff --git a/net/sunrpc/xprtrdma/xprt_rdma.h b/net/sunrpc/xprtrdma/xprt_rdma.h
index 5d231d94e944..d91f54eae00b 100644
--- a/net/sunrpc/xprtrdma/xprt_rdma.h
+++ b/net/sunrpc/xprtrdma/xprt_rdma.h
@@ -454,11 +454,11 @@ extern unsigned int xprt_rdma_memreg_strategy;
 /*
  * Endpoint calls - xprtrdma/verbs.c
  */
+void rpcrdma_force_disconnect(struct rpcrdma_ep *ep);
 void rpcrdma_flush_disconnect(struct rpcrdma_xprt *r_xprt, struct ib_wc *wc);
 int rpcrdma_xprt_connect(struct rpcrdma_xprt *r_xprt);
 void rpcrdma_xprt_disconnect(struct rpcrdma_xprt *r_xprt);
 
-int rpcrdma_post_sends(struct rpcrdma_xprt *r_xprt, struct rpcrdma_req *req);
 void rpcrdma_post_recvs(struct rpcrdma_xprt *r_xprt, int needed, bool temp);
 
 /*
diff --git a/net/sunrpc/xprtsock.c b/net/sunrpc/xprtsock.c
index b7dbdcbdeb6c..04f1b78bcbca 100644
--- a/net/sunrpc/xprtsock.c
+++ b/net/sunrpc/xprtsock.c
@@ -1656,7 +1656,7 @@ static int xs_get_srcport(struct sock_xprt *transport)
 unsigned short get_srcport(struct rpc_xprt *xprt)
 {
 	struct sock_xprt *sock = container_of(xprt, struct sock_xprt, xprt);
-	return sock->srcport;
+	return xs_sock_getport(sock->sock);
 }
 EXPORT_SYMBOL(get_srcport);
 
@@ -2099,13 +2099,20 @@ static void xs_tcp_shutdown(struct rpc_xprt *xprt)
 
 	if (sock == NULL)
 		return;
+	if (!xprt->reuseport) {
+		xs_close(xprt);
+		return;
+	}
 	switch (skst) {
-	default:
+	case TCP_FIN_WAIT1:
+	case TCP_FIN_WAIT2:
+		break;
+	case TCP_ESTABLISHED:
+	case TCP_CLOSE_WAIT:
 		kernel_sock_shutdown(sock, SHUT_RDWR);
 		trace_rpc_socket_shutdown(xprt, sock);
 		break;
-	case TCP_CLOSE:
-	case TCP_TIME_WAIT:
+	default:
 		xs_reset_transport(transport);
 	}
 }
diff --git a/net/tipc/socket.c b/net/tipc/socket.c
index e3105ba407c7..a0a27d87f631 100644
--- a/net/tipc/socket.c
+++ b/net/tipc/socket.c
@@ -1426,7 +1426,7 @@ static int __tipc_sendmsg(struct socket *sock, struct msghdr *m, size_t dlen)
 	if (ua) {
 		if (!tipc_uaddr_valid(ua, m->msg_namelen))
 			return -EINVAL;
-		 atype = ua->addrtype;
+		atype = ua->addrtype;
 	}
 
 	/* If socket belongs to a communication group follow other paths */
diff --git a/net/vmw_vsock/af_vsock.c b/net/vmw_vsock/af_vsock.c
index 3e02cc3b24f8..e2c0cfb334d2 100644
--- a/net/vmw_vsock/af_vsock.c
+++ b/net/vmw_vsock/af_vsock.c
@@ -2014,7 +2014,7 @@ static int __vsock_seqpacket_recvmsg(struct sock *sk, struct msghdr *msg,
 {
 	const struct vsock_transport *transport;
 	struct vsock_sock *vsk;
-	ssize_t record_len;
+	ssize_t msg_len;
 	long timeout;
 	int err = 0;
 	DEFINE_WAIT(wait);
@@ -2028,9 +2028,9 @@ static int __vsock_seqpacket_recvmsg(struct sock *sk, struct msghdr *msg,
 	if (err <= 0)
 		goto out;
 
-	record_len = transport->seqpacket_dequeue(vsk, msg, flags);
+	msg_len = transport->seqpacket_dequeue(vsk, msg, flags);
 
-	if (record_len < 0) {
+	if (msg_len < 0) {
 		err = -ENOMEM;
 		goto out;
 	}
@@ -2044,14 +2044,14 @@ static int __vsock_seqpacket_recvmsg(struct sock *sk, struct msghdr *msg,
 		 * packet.
 		 */
 		if (flags & MSG_TRUNC)
-			err = record_len;
+			err = msg_len;
 		else
 			err = len - msg_data_left(msg);
 
 		/* Always set MSG_TRUNC if real length of packet is
 		 * bigger than user's buffer.
 		 */
-		if (record_len > len)
+		if (msg_len > len)
 			msg->msg_flags |= MSG_TRUNC;
 	}
 
diff --git a/net/vmw_vsock/virtio_transport_common.c b/net/vmw_vsock/virtio_transport_common.c
index 081e7ae93cb1..59ee1be5a6dd 100644
--- a/net/vmw_vsock/virtio_transport_common.c
+++ b/net/vmw_vsock/virtio_transport_common.c
@@ -76,8 +76,12 @@ virtio_transport_alloc_pkt(struct virtio_vsock_pkt_info *info,
 			goto out;
 
 		if (msg_data_left(info->msg) == 0 &&
-		    info->type == VIRTIO_VSOCK_TYPE_SEQPACKET)
-			pkt->hdr.flags |= cpu_to_le32(VIRTIO_VSOCK_SEQ_EOR);
+		    info->type == VIRTIO_VSOCK_TYPE_SEQPACKET) {
+			pkt->hdr.flags |= cpu_to_le32(VIRTIO_VSOCK_SEQ_EOM);
+
+			if (info->msg->msg_flags & MSG_EOR)
+				pkt->hdr.flags |= cpu_to_le32(VIRTIO_VSOCK_SEQ_EOR);
+		}
 	}
 
 	trace_virtio_transport_alloc_pkt(src_cid, src_port,
@@ -457,9 +461,12 @@ static int virtio_transport_seqpacket_do_dequeue(struct vsock_sock *vsk,
 				dequeued_len += pkt_len;
 		}
 
-		if (le32_to_cpu(pkt->hdr.flags) & VIRTIO_VSOCK_SEQ_EOR) {
+		if (le32_to_cpu(pkt->hdr.flags) & VIRTIO_VSOCK_SEQ_EOM) {
 			msg_ready = true;
 			vvs->msg_count--;
+
+			if (le32_to_cpu(pkt->hdr.flags) & VIRTIO_VSOCK_SEQ_EOR)
+				msg->msg_flags |= MSG_EOR;
 		}
 
 		virtio_transport_dec_rx_pkt(vvs, pkt);
@@ -1029,7 +1036,7 @@ virtio_transport_recv_enqueue(struct vsock_sock *vsk,
 		goto out;
 	}
 
-	if (le32_to_cpu(pkt->hdr.flags) & VIRTIO_VSOCK_SEQ_EOR)
+	if (le32_to_cpu(pkt->hdr.flags) & VIRTIO_VSOCK_SEQ_EOM)
 		vvs->msg_count++;
 
 	/* Try to copy small packets into the buffer of last packet queued,
@@ -1044,12 +1051,12 @@ virtio_transport_recv_enqueue(struct vsock_sock *vsk,
 
 		/* If there is space in the last packet queued, we copy the
 		 * new packet in its buffer. We avoid this if the last packet
-		 * queued has VIRTIO_VSOCK_SEQ_EOR set, because this is
-		 * delimiter of SEQPACKET record, so 'pkt' is the first packet
-		 * of a new record.
+		 * queued has VIRTIO_VSOCK_SEQ_EOM set, because this is
+		 * delimiter of SEQPACKET message, so 'pkt' is the first packet
+		 * of a new message.
 		 */
 		if ((pkt->len <= last_pkt->buf_len - last_pkt->len) &&
-		    !(le32_to_cpu(last_pkt->hdr.flags) & VIRTIO_VSOCK_SEQ_EOR)) {
+		    !(le32_to_cpu(last_pkt->hdr.flags) & VIRTIO_VSOCK_SEQ_EOM)) {
 			memcpy(last_pkt->buf + last_pkt->len, pkt->buf,
 			       pkt->len);
 			last_pkt->len += pkt->len;
diff --git a/samples/kdb/kdb_hello.c b/samples/kdb/kdb_hello.c
index c1c2fa0f62c2..82736e5a5e32 100644
--- a/samples/kdb/kdb_hello.c
+++ b/samples/kdb/kdb_hello.c
@@ -28,28 +28,26 @@ static int kdb_hello_cmd(int argc, const char **argv)
 	return 0;
 }
 
+static kdbtab_t hello_cmd = {
+	.name = "hello",
+	.func = kdb_hello_cmd,
+	.usage = "[string]",
+	.help = "Say Hello World or Hello [string]",
+};
 
 static int __init kdb_hello_cmd_init(void)
 {
 	/*
 	 * Registration of a dynamically added kdb command is done with
-	 * kdb_register() with the arguments being:
-	 *   1: The name of the shell command
-	 *   2: The function that processes the command
-	 *   3: Description of the usage of any arguments
-	 *   4: Descriptive text when you run help
-	 *   5: Number of characters to complete the command
-	 *      0 == type the whole command
-	 *      1 == match both "g" and "go" for example
+	 * kdb_register().
 	 */
-	kdb_register("hello", kdb_hello_cmd, "[string]",
-		     "Say Hello World or Hello [string]", 0);
+	kdb_register(&hello_cmd);
 	return 0;
 }
 
 static void __exit kdb_hello_cmd_exit(void)
 {
-	kdb_unregister("hello");
+	kdb_unregister(&hello_cmd);
 }
 
 module_init(kdb_hello_cmd_init);
diff --git a/samples/vfio-mdev/mbochs.c b/samples/vfio-mdev/mbochs.c
index 6c0f229db36a..c313ab4d1f4e 100644
--- a/samples/vfio-mdev/mbochs.c
+++ b/samples/vfio-mdev/mbochs.c
@@ -129,7 +129,7 @@ static dev_t		mbochs_devt;
 static struct class	*mbochs_class;
 static struct cdev	mbochs_cdev;
 static struct device	mbochs_dev;
-static int		mbochs_used_mbytes;
+static atomic_t mbochs_avail_mbytes;
 static const struct vfio_device_ops mbochs_dev_ops;
 
 struct vfio_region_info_ext {
@@ -507,18 +507,22 @@ static int mbochs_reset(struct mdev_state *mdev_state)
 
 static int mbochs_probe(struct mdev_device *mdev)
 {
+	int avail_mbytes = atomic_read(&mbochs_avail_mbytes);
 	const struct mbochs_type *type =
 		&mbochs_types[mdev_get_type_group_id(mdev)];
 	struct device *dev = mdev_dev(mdev);
 	struct mdev_state *mdev_state;
 	int ret = -ENOMEM;
 
-	if (type->mbytes + mbochs_used_mbytes > max_mbytes)
-		return -ENOMEM;
+	do {
+		if (avail_mbytes < type->mbytes)
+			return -ENOSPC;
+	} while (!atomic_try_cmpxchg(&mbochs_avail_mbytes, &avail_mbytes,
+				     avail_mbytes - type->mbytes));
 
 	mdev_state = kzalloc(sizeof(struct mdev_state), GFP_KERNEL);
 	if (mdev_state == NULL)
-		return -ENOMEM;
+		goto err_avail;
 	vfio_init_group_dev(&mdev_state->vdev, &mdev->dev, &mbochs_dev_ops);
 
 	mdev_state->vconfig = kzalloc(MBOCHS_CONFIG_SPACE_SIZE, GFP_KERNEL);
@@ -549,17 +553,18 @@ static int mbochs_probe(struct mdev_device *mdev)
 	mbochs_create_config_space(mdev_state);
 	mbochs_reset(mdev_state);
 
-	mbochs_used_mbytes += type->mbytes;
-
 	ret = vfio_register_group_dev(&mdev_state->vdev);
 	if (ret)
 		goto err_mem;
 	dev_set_drvdata(&mdev->dev, mdev_state);
 	return 0;
-
 err_mem:
+	vfio_uninit_group_dev(&mdev_state->vdev);
+	kfree(mdev_state->pages);
 	kfree(mdev_state->vconfig);
 	kfree(mdev_state);
+err_avail:
+	atomic_add(type->mbytes, &mbochs_avail_mbytes);
 	return ret;
 }
 
@@ -567,8 +572,9 @@ static void mbochs_remove(struct mdev_device *mdev)
 {
 	struct mdev_state *mdev_state = dev_get_drvdata(&mdev->dev);
 
-	mbochs_used_mbytes -= mdev_state->type->mbytes;
 	vfio_unregister_group_dev(&mdev_state->vdev);
+	vfio_uninit_group_dev(&mdev_state->vdev);
+	atomic_add(mdev_state->type->mbytes, &mbochs_avail_mbytes);
 	kfree(mdev_state->pages);
 	kfree(mdev_state->vconfig);
 	kfree(mdev_state);
@@ -1272,15 +1278,7 @@ static long mbochs_ioctl(struct vfio_device *vdev, unsigned int cmd,
 	return -ENOTTY;
 }
 
-static int mbochs_open(struct vfio_device *vdev)
-{
-	if (!try_module_get(THIS_MODULE))
-		return -ENODEV;
-
-	return 0;
-}
-
-static void mbochs_close(struct vfio_device *vdev)
+static void mbochs_close_device(struct vfio_device *vdev)
 {
 	struct mdev_state *mdev_state =
 		container_of(vdev, struct mdev_state, vdev);
@@ -1300,7 +1298,6 @@ static void mbochs_close(struct vfio_device *vdev)
 	mbochs_put_pages(mdev_state);
 
 	mutex_unlock(&mdev_state->ops_lock);
-	module_put(THIS_MODULE);
 }
 
 static ssize_t
@@ -1355,7 +1352,7 @@ static ssize_t available_instances_show(struct mdev_type *mtype,
 {
 	const struct mbochs_type *type =
 		&mbochs_types[mtype_get_type_group_id(mtype)];
-	int count = (max_mbytes - mbochs_used_mbytes) / type->mbytes;
+	int count = atomic_read(&mbochs_avail_mbytes) / type->mbytes;
 
 	return sprintf(buf, "%d\n", count);
 }
@@ -1399,8 +1396,7 @@ static struct attribute_group *mdev_type_groups[] = {
 };
 
 static const struct vfio_device_ops mbochs_dev_ops = {
-	.open = mbochs_open,
-	.release = mbochs_close,
+	.close_device = mbochs_close_device,
 	.read = mbochs_read,
 	.write = mbochs_write,
 	.ioctl = mbochs_ioctl,
@@ -1437,6 +1433,8 @@ static int __init mbochs_dev_init(void)
 {
 	int ret = 0;
 
+	atomic_set(&mbochs_avail_mbytes, max_mbytes);
+
 	ret = alloc_chrdev_region(&mbochs_devt, 0, MINORMASK + 1, MBOCHS_NAME);
 	if (ret < 0) {
 		pr_err("Error: failed to register mbochs_dev, err: %d\n", ret);
diff --git a/samples/vfio-mdev/mdpy.c b/samples/vfio-mdev/mdpy.c
index 393c9df6f6a0..8d1a80a0722a 100644
--- a/samples/vfio-mdev/mdpy.c
+++ b/samples/vfio-mdev/mdpy.c
@@ -235,17 +235,16 @@ static int mdpy_probe(struct mdev_device *mdev)
 
 	mdev_state->vconfig = kzalloc(MDPY_CONFIG_SPACE_SIZE, GFP_KERNEL);
 	if (mdev_state->vconfig == NULL) {
-		kfree(mdev_state);
-		return -ENOMEM;
+		ret = -ENOMEM;
+		goto err_state;
 	}
 
 	fbsize = roundup_pow_of_two(type->width * type->height * type->bytepp);
 
 	mdev_state->memblk = vmalloc_user(fbsize);
 	if (!mdev_state->memblk) {
-		kfree(mdev_state->vconfig);
-		kfree(mdev_state);
-		return -ENOMEM;
+		ret = -ENOMEM;
+		goto err_vconfig;
 	}
 	dev_info(dev, "%s: %s (%dx%d)\n", __func__, type->name, type->width,
 		 type->height);
@@ -260,13 +259,18 @@ static int mdpy_probe(struct mdev_device *mdev)
 	mdpy_count++;
 
 	ret = vfio_register_group_dev(&mdev_state->vdev);
-	if (ret) {
-		kfree(mdev_state->vconfig);
-		kfree(mdev_state);
-		return ret;
-	}
+	if (ret)
+		goto err_mem;
 	dev_set_drvdata(&mdev->dev, mdev_state);
 	return 0;
+err_mem:
+	vfree(mdev_state->memblk);
+err_vconfig:
+	kfree(mdev_state->vconfig);
+err_state:
+	vfio_uninit_group_dev(&mdev_state->vdev);
+	kfree(mdev_state);
+	return ret;
 }
 
 static void mdpy_remove(struct mdev_device *mdev)
@@ -278,6 +282,7 @@ static void mdpy_remove(struct mdev_device *mdev)
 	vfio_unregister_group_dev(&mdev_state->vdev);
 	vfree(mdev_state->memblk);
 	kfree(mdev_state->vconfig);
+	vfio_uninit_group_dev(&mdev_state->vdev);
 	kfree(mdev_state);
 
 	mdpy_count--;
@@ -609,19 +614,6 @@ static long mdpy_ioctl(struct vfio_device *vdev, unsigned int cmd,
 	return -ENOTTY;
 }
 
-static int mdpy_open(struct vfio_device *vdev)
-{
-	if (!try_module_get(THIS_MODULE))
-		return -ENODEV;
-
-	return 0;
-}
-
-static void mdpy_close(struct vfio_device *vdev)
-{
-	module_put(THIS_MODULE);
-}
-
 static ssize_t
 resolution_show(struct device *dev, struct device_attribute *attr,
 		char *buf)
@@ -716,8 +708,6 @@ static struct attribute_group *mdev_type_groups[] = {
 };
 
 static const struct vfio_device_ops mdpy_dev_ops = {
-	.open = mdpy_open,
-	.release = mdpy_close,
 	.read = mdpy_read,
 	.write = mdpy_write,
 	.ioctl = mdpy_ioctl,
diff --git a/samples/vfio-mdev/mtty.c b/samples/vfio-mdev/mtty.c
index 8b26fecc4afe..5983cdb16e3d 100644
--- a/samples/vfio-mdev/mtty.c
+++ b/samples/vfio-mdev/mtty.c
@@ -718,8 +718,8 @@ static int mtty_probe(struct mdev_device *mdev)
 
 	mdev_state = kzalloc(sizeof(struct mdev_state), GFP_KERNEL);
 	if (mdev_state == NULL) {
-		atomic_add(nr_ports, &mdev_avail_ports);
-		return -ENOMEM;
+		ret = -ENOMEM;
+		goto err_nr_ports;
 	}
 
 	vfio_init_group_dev(&mdev_state->vdev, &mdev->dev, &mtty_dev_ops);
@@ -732,9 +732,8 @@ static int mtty_probe(struct mdev_device *mdev)
 	mdev_state->vconfig = kzalloc(MTTY_CONFIG_SPACE_SIZE, GFP_KERNEL);
 
 	if (mdev_state->vconfig == NULL) {
-		kfree(mdev_state);
-		atomic_add(nr_ports, &mdev_avail_ports);
-		return -ENOMEM;
+		ret = -ENOMEM;
+		goto err_state;
 	}
 
 	mutex_init(&mdev_state->ops_lock);
@@ -743,14 +742,19 @@ static int mtty_probe(struct mdev_device *mdev)
 	mtty_create_config_space(mdev_state);
 
 	ret = vfio_register_group_dev(&mdev_state->vdev);
-	if (ret) {
-		kfree(mdev_state);
-		atomic_add(nr_ports, &mdev_avail_ports);
-		return ret;
-	}
-
+	if (ret)
+		goto err_vconfig;
 	dev_set_drvdata(&mdev->dev, mdev_state);
 	return 0;
+
+err_vconfig:
+	kfree(mdev_state->vconfig);
+err_state:
+	vfio_uninit_group_dev(&mdev_state->vdev);
+	kfree(mdev_state);
+err_nr_ports:
+	atomic_add(nr_ports, &mdev_avail_ports);
+	return ret;
 }
 
 static void mtty_remove(struct mdev_device *mdev)
@@ -761,6 +765,7 @@ static void mtty_remove(struct mdev_device *mdev)
 	vfio_unregister_group_dev(&mdev_state->vdev);
 
 	kfree(mdev_state->vconfig);
+	vfio_uninit_group_dev(&mdev_state->vdev);
 	kfree(mdev_state);
 	atomic_add(nr_ports, &mdev_avail_ports);
 }
@@ -1202,17 +1207,6 @@ static long mtty_ioctl(struct vfio_device *vdev, unsigned int cmd,
 	return -ENOTTY;
 }
 
-static int mtty_open(struct vfio_device *vdev)
-{
-	pr_info("%s\n", __func__);
-	return 0;
-}
-
-static void mtty_close(struct vfio_device *mdev)
-{
-	pr_info("%s\n", __func__);
-}
-
 static ssize_t
 sample_mtty_dev_show(struct device *dev, struct device_attribute *attr,
 		     char *buf)
@@ -1320,8 +1314,6 @@ static struct attribute_group *mdev_type_groups[] = {
 
 static const struct vfio_device_ops mtty_dev_ops = {
 	.name = "vfio-mtty",
-	.open = mtty_open,
-	.release = mtty_close,
 	.read = mtty_read,
 	.write = mtty_write,
 	.ioctl = mtty_ioctl,
diff --git a/scripts/Kbuild.include b/scripts/Kbuild.include
index f247e691562d..cdec22088423 100644
--- a/scripts/Kbuild.include
+++ b/scripts/Kbuild.include
@@ -57,6 +57,7 @@ kecho := $($(quiet)kecho)
 # - If the content differ the new file is used
 # - If they are equal no change, and no timestamp update
 define filechk
+	$(check-FORCE)
 	$(Q)set -e;						\
 	mkdir -p $(dir $@);					\
 	trap "rm -f $(dot-target).tmp" EXIT;			\
@@ -130,13 +131,19 @@ make-cmd = $(call escsq,$(subst $(pound),$$(pound),$(subst $$,$$$$,$(cmd_$(1))))
 # PHONY targets skipped in both cases.
 newer-prereqs = $(filter-out $(PHONY),$?)
 
+# It is a typical mistake to forget the FORCE prerequisite. Check it here so
+# no more breakage will slip in.
+check-FORCE = $(if $(filter FORCE, $^),,$(warning FORCE prerequisite is missing))
+
+if-changed-cond = $(newer-prereqs)$(cmd-check)$(check-FORCE)
+
 # Execute command if command has changed or prerequisite(s) are updated.
-if_changed = $(if $(newer-prereqs)$(cmd-check),                              \
+if_changed = $(if $(if-changed-cond),                                        \
 	$(cmd);                                                              \
 	printf '%s\n' 'cmd_$@ := $(make-cmd)' > $(dot-target).cmd, @:)
 
 # Execute the command and also postprocess generated .d dependencies file.
-if_changed_dep = $(if $(newer-prereqs)$(cmd-check),$(cmd_and_fixdep),@:)
+if_changed_dep = $(if $(if-changed-cond),$(cmd_and_fixdep),@:)
 
 cmd_and_fixdep =                                                             \
 	$(cmd);                                                              \
@@ -146,7 +153,7 @@ cmd_and_fixdep =                                                             \
 # Usage: $(call if_changed_rule,foo)
 # Will check if $(cmd_foo) or any of the prerequisites changed,
 # and if so will execute $(rule_foo).
-if_changed_rule = $(if $(newer-prereqs)$(cmd-check),$(rule_$(1)),@:)
+if_changed_rule = $(if $(if-changed-cond),$(rule_$(1)),@:)
 
 ###
 # why - tell why a target got built
diff --git a/scripts/Makefile.build b/scripts/Makefile.build
index 02197cb8e3a7..3efc984d4c69 100644
--- a/scripts/Makefile.build
+++ b/scripts/Makefile.build
@@ -88,6 +88,10 @@ endif
 
 targets-for-modules := $(patsubst %.o, %.mod, $(filter %.o, $(obj-m)))
 
+ifdef CONFIG_LTO_CLANG
+targets-for-modules += $(patsubst %.o, %.lto.o, $(filter %.o, $(obj-m)))
+endif
+
 ifdef need-modorder
 targets-for-modules += $(obj)/modules.order
 endif
@@ -173,6 +177,8 @@ cmd_modversions_c =								\
 	if $(NM) $@ 2>/dev/null | grep -q __ksymtab; then			\
 		$(call cmd_gensymtypes_c,$(KBUILD_SYMTYPES),$(@:.o=.symtypes))	\
 		    > $@.symversions;						\
+	else									\
+		rm -f $@.symversions;						\
 	fi;
 else
 cmd_modversions_c =								\
@@ -271,12 +277,33 @@ $(obj)/%.o: $(src)/%.c $(recordmcount_source) $$(objtool_dep) FORCE
 	$(call if_changed_rule,cc_o_c)
 	$(call cmd,force_checksrc)
 
+ifdef CONFIG_LTO_CLANG
+# Module .o files may contain LLVM bitcode, compile them into native code
+# before ELF processing
+quiet_cmd_cc_lto_link_modules = LTO [M] $@
+cmd_cc_lto_link_modules =						\
+	$(LD) $(ld_flags) -r -o $@					\
+		$(shell [ -s $(@:.lto.o=.o.symversions) ] &&		\
+			echo -T $(@:.lto.o=.o.symversions))		\
+		--whole-archive $(filter-out FORCE,$^)
+
+ifdef CONFIG_STACK_VALIDATION
+# objtool was skipped for LLVM bitcode, run it now that we have compiled
+# modules into native code
+cmd_cc_lto_link_modules += ;						\
+	$(objtree)/tools/objtool/objtool $(objtool_args) --module $@
+endif
+
+$(obj)/%.lto.o: $(obj)/%.o FORCE
+	$(call if_changed,cc_lto_link_modules)
+endif
+
 cmd_mod = { \
 	echo $(if $($*-objs)$($*-y)$($*-m), $(addprefix $(obj)/, $($*-objs) $($*-y) $($*-m)), $(@:.mod=.o)); \
 	$(undefined_syms) echo; \
 	} > $@
 
-$(obj)/%.mod: $(obj)/%.o FORCE
+$(obj)/%.mod: $(obj)/%$(mod-prelink-ext).o FORCE
 	$(call if_changed,mod)
 
 quiet_cmd_cc_lst_c = MKLST   $@
@@ -381,7 +408,6 @@ $(subdir-builtin): $(obj)/%/built-in.a: $(obj)/% ;
 $(subdir-modorder): $(obj)/%/modules.order: $(obj)/% ;
 
 # combine symversions for later processing
-quiet_cmd_update_lto_symversions = SYMVER  $@
 ifeq ($(CONFIG_LTO_CLANG) $(CONFIG_MODVERSIONS),y y)
       cmd_update_lto_symversions =					\
 	rm -f $@.symversions						\
diff --git a/scripts/Makefile.clang b/scripts/Makefile.clang
new file mode 100644
index 000000000000..4cce8fd0779c
--- /dev/null
+++ b/scripts/Makefile.clang
@@ -0,0 +1,35 @@
+# Individual arch/{arch}/Makefiles should use -EL/-EB to set intended
+# endianness and -m32/-m64 to set word size based on Kconfigs instead of
+# relying on the target triple.
+CLANG_TARGET_FLAGS_arm		:= arm-linux-gnueabi
+CLANG_TARGET_FLAGS_arm64	:= aarch64-linux-gnu
+CLANG_TARGET_FLAGS_hexagon	:= hexagon-linux-musl
+CLANG_TARGET_FLAGS_m68k		:= m68k-linux-gnu
+CLANG_TARGET_FLAGS_mips		:= mipsel-linux-gnu
+CLANG_TARGET_FLAGS_powerpc	:= powerpc64le-linux-gnu
+CLANG_TARGET_FLAGS_riscv	:= riscv64-linux-gnu
+CLANG_TARGET_FLAGS_s390		:= s390x-linux-gnu
+CLANG_TARGET_FLAGS_x86		:= x86_64-linux-gnu
+CLANG_TARGET_FLAGS		:= $(CLANG_TARGET_FLAGS_$(SRCARCH))
+
+ifeq ($(CROSS_COMPILE),)
+ifeq ($(CLANG_TARGET_FLAGS),)
+$(error Specify CROSS_COMPILE or add '--target=' option to scripts/Makefile.clang)
+else
+CLANG_FLAGS	+= --target=$(CLANG_TARGET_FLAGS)
+endif # CLANG_TARGET_FLAGS
+else
+CLANG_FLAGS	+= --target=$(notdir $(CROSS_COMPILE:%-=%))
+endif # CROSS_COMPILE
+
+ifeq ($(LLVM_IAS),0)
+CLANG_FLAGS	+= -fno-integrated-as
+GCC_TOOLCHAIN_DIR := $(dir $(shell which $(CROSS_COMPILE)elfedit))
+CLANG_FLAGS	+= --prefix=$(GCC_TOOLCHAIN_DIR)$(notdir $(CROSS_COMPILE))
+else
+CLANG_FLAGS	+= -fintegrated-as
+endif
+CLANG_FLAGS	+= -Werror=unknown-warning-option
+KBUILD_CFLAGS	+= $(CLANG_FLAGS)
+KBUILD_AFLAGS	+= $(CLANG_FLAGS)
+export CLANG_FLAGS
diff --git a/scripts/Makefile.lib b/scripts/Makefile.lib
index c57f14ac6f99..54582673fc1a 100644
--- a/scripts/Makefile.lib
+++ b/scripts/Makefile.lib
@@ -225,17 +225,23 @@ dtc_cpp_flags  = -Wp,-MMD,$(depfile).pre.tmp -nostdinc                    \
 		 $(addprefix -I,$(DTC_INCLUDE))                          \
 		 -undef -D__DTS__
 
+ifeq ($(CONFIG_LTO_CLANG),y)
+# With CONFIG_LTO_CLANG, .o files in modules might be LLVM bitcode, so we
+# need to run LTO to compile them into native code (.lto.o) before further
+# processing.
+mod-prelink-ext := .lto
+endif
+
 # Objtool arguments are also needed for modfinal with LTO, so we define
 # then here to avoid duplication.
 objtool_args =								\
 	$(if $(CONFIG_UNWINDER_ORC),orc generate,check)			\
-	$(if $(part-of-module), --module,)				\
+	$(if $(part-of-module), --module)				\
 	$(if $(CONFIG_FRAME_POINTER),, --no-fp)				\
-	$(if $(or $(CONFIG_GCOV_KERNEL),$(CONFIG_LTO_CLANG)), 		\
-		--no-unreachable,)					\
-	$(if $(CONFIG_RETPOLINE), --retpoline,)				\
-	$(if $(CONFIG_X86_SMAP), --uaccess,)				\
-	$(if $(CONFIG_FTRACE_MCOUNT_USE_OBJTOOL), --mcount,)
+	$(if $(CONFIG_GCOV_KERNEL)$(CONFIG_LTO_CLANG), --no-unreachable)\
+	$(if $(CONFIG_RETPOLINE), --retpoline)				\
+	$(if $(CONFIG_X86_SMAP), --uaccess)				\
+	$(if $(CONFIG_FTRACE_MCOUNT_USE_OBJTOOL), --mcount)
 
 # Useful for describing the dependency of composite objects
 # Usage:
diff --git a/scripts/Makefile.modfinal b/scripts/Makefile.modfinal
index 5e9b8057fb24..ff805777431c 100644
--- a/scripts/Makefile.modfinal
+++ b/scripts/Makefile.modfinal
@@ -9,7 +9,7 @@ __modfinal:
 include include/config/auto.conf
 include $(srctree)/scripts/Kbuild.include
 
-# for c_flags and objtool_args
+# for c_flags and mod-prelink-ext
 include $(srctree)/scripts/Makefile.lib
 
 # find all modules listed in modules.order
@@ -30,23 +30,6 @@ quiet_cmd_cc_o_c = CC [M]  $@
 
 ARCH_POSTLINK := $(wildcard $(srctree)/arch/$(SRCARCH)/Makefile.postlink)
 
-ifdef CONFIG_LTO_CLANG
-# With CONFIG_LTO_CLANG, reuse the object file we compiled for modpost to
-# avoid a second slow LTO link
-prelink-ext := .lto
-
-# ELF processing was skipped earlier because we didn't have native code,
-# so let's now process the prelinked binary before we link the module.
-
-ifdef CONFIG_STACK_VALIDATION
-cmd_ld_ko_o +=								\
-	$(objtree)/tools/objtool/objtool $(objtool_args)		\
-		$(@:.ko=$(prelink-ext).o);
-
-endif # CONFIG_STACK_VALIDATION
-
-endif # CONFIG_LTO_CLANG
-
 quiet_cmd_ld_ko_o = LD [M]  $@
       cmd_ld_ko_o +=							\
 	$(LD) -r $(KBUILD_LDFLAGS)					\
@@ -72,7 +55,7 @@ if_changed_except = $(if $(call newer_prereqs_except,$(2))$(cmd-check),      \
 
 
 # Re-generate module BTFs if either module's .ko or vmlinux changed
-$(modules): %.ko: %$(prelink-ext).o %.mod.o scripts/module.lds $(if $(KBUILD_BUILTIN),vmlinux) FORCE
+$(modules): %.ko: %$(mod-prelink-ext).o %.mod.o scripts/module.lds $(if $(KBUILD_BUILTIN),vmlinux) FORCE
 	+$(call if_changed_except,ld_ko_o,vmlinux)
 ifdef CONFIG_DEBUG_INFO_BTF_MODULES
 	+$(if $(newer-prereqs),$(call cmd,btf_ko))
diff --git a/scripts/Makefile.modpost b/scripts/Makefile.modpost
index c383ba33d837..eef56d629799 100644
--- a/scripts/Makefile.modpost
+++ b/scripts/Makefile.modpost
@@ -41,7 +41,7 @@ __modpost:
 include include/config/auto.conf
 include $(srctree)/scripts/Kbuild.include
 
-# for ld_flags
+# for mod-prelink-ext
 include $(srctree)/scripts/Makefile.lib
 
 MODPOST = scripts/mod/modpost								\
@@ -118,22 +118,6 @@ $(input-symdump):
 	@echo >&2 '         Modules may not have dependencies or modversions.'
 	@echo >&2 '         You may get many unresolved symbol warnings.'
 
-ifdef CONFIG_LTO_CLANG
-# With CONFIG_LTO_CLANG, .o files might be LLVM bitcode, so we need to run
-# LTO to compile them into native code before running modpost
-prelink-ext := .lto
-
-quiet_cmd_cc_lto_link_modules = LTO [M] $@
-cmd_cc_lto_link_modules =						\
-	$(LD) $(ld_flags) -r -o $@					\
-		$(shell [ -s $(@:.lto.o=.o.symversions) ] &&		\
-			echo -T $(@:.lto.o=.o.symversions))		\
-		--whole-archive $^
-
-%.lto.o: %.o
-	$(call if_changed,cc_lto_link_modules)
-endif
-
 modules := $(sort $(shell cat $(MODORDER)))
 
 # KBUILD_MODPOST_WARN can be set to avoid error out in case of undefined symbols
@@ -144,9 +128,9 @@ endif
 # Read out modules.order to pass in modpost.
 # Otherwise, allmodconfig would fail with "Argument list too long".
 quiet_cmd_modpost = MODPOST $@
-      cmd_modpost = sed 's/\.ko$$/$(prelink-ext)\.o/' $< | $(MODPOST) -T -
+      cmd_modpost = sed 's/\.ko$$/$(mod-prelink-ext)\.o/' $< | $(MODPOST) -T -
 
-$(output-symdump): $(MODORDER) $(input-symdump) $(modules:.ko=$(prelink-ext).o) FORCE
+$(output-symdump): $(MODORDER) $(input-symdump) $(modules:.ko=$(mod-prelink-ext).o) FORCE
 	$(call if_changed,modpost)
 
 targets += $(output-symdump)
diff --git a/scripts/adjust_autoksyms.sh b/scripts/adjust_autoksyms.sh
index d8f6f9c63043..59fdb875e818 100755
--- a/scripts/adjust_autoksyms.sh
+++ b/scripts/adjust_autoksyms.sh
@@ -42,10 +42,10 @@ $CONFIG_SHELL $srctree/scripts/gen_autoksyms.sh "$new_ksyms_file"
 changed=$(
 count=0
 sort "$cur_ksyms_file" "$new_ksyms_file" | uniq -u |
-sed -n 's/^#define __KSYM_\(.*\) 1/\1/p' | tr "A-Z_" "a-z/" |
+sed -n 's/^#define __KSYM_\(.*\) 1/\1/p' |
 while read sympath; do
 	if [ -z "$sympath" ]; then continue; fi
-	depfile="include/ksym/${sympath}.h"
+	depfile="include/ksym/${sympath}"
 	mkdir -p "$(dirname "$depfile")"
 	touch "$depfile"
 	# Filesystems with coarse time precision may create timestamps
diff --git a/scripts/as-version.sh b/scripts/as-version.sh
index 8b9410e329df..1a21495e9ff0 100755
--- a/scripts/as-version.sh
+++ b/scripts/as-version.sh
@@ -21,14 +21,14 @@ get_canonical_version()
 	echo $((10000 * $1 + 100 * ${2:-0} + ${3:-0}))
 }
 
-# Clang fails to handle -Wa,--version unless -no-integrated-as is given.
-# We check -(f)integrated-as, expecting it is explicitly passed in for the
+# Clang fails to handle -Wa,--version unless -fno-integrated-as is given.
+# We check -fintegrated-as, expecting it is explicitly passed in for the
 # integrated assembler case.
 check_integrated_as()
 {
 	while [ $# -gt 0 ]; do
-		if [ "$1" = -integrated-as -o "$1" = -fintegrated-as ]; then
-			# For the intergrated assembler, we do not check the
+		if [ "$1" = -fintegrated-as ]; then
+			# For the integrated assembler, we do not check the
 			# version here. It is the same as the clang version, and
 			# it has been already checked by scripts/cc-version.sh.
 			echo LLVM 0
diff --git a/scripts/check_extable.sh b/scripts/check_extable.sh
index 93af93c7b346..4b380564cf74 100755
--- a/scripts/check_extable.sh
+++ b/scripts/check_extable.sh
@@ -4,7 +4,7 @@
 
 obj=$1
 
-file ${obj} | grep -q ELF || (echo "${obj} is not and ELF file." 1>&2 ; exit 0)
+file ${obj} | grep -q ELF || (echo "${obj} is not an ELF file." 1>&2 ; exit 0)
 
 # Bail out early if there isn't an __ex_table section in this object file.
 objdump -hj __ex_table ${obj} 2> /dev/null > /dev/null
diff --git a/scripts/checkdeclares.pl b/scripts/checkdeclares.pl
index f6d551c84fc6..f6d551c84fc6 100644..100755
--- a/scripts/checkdeclares.pl
+++ b/scripts/checkdeclares.pl
diff --git a/scripts/checkkconfigsymbols.py b/scripts/checkkconfigsymbols.py
index 1548f9ce4682..b9b0f15e5880 100755
--- a/scripts/checkkconfigsymbols.py
+++ b/scripts/checkkconfigsymbols.py
@@ -329,7 +329,7 @@ def check_symbols_helper(pool, ignore):
         if REGEX_FILE_KCONFIG.match(gitfile):
             kconfig_files.append(gitfile)
         else:
-            if ignore and not re.match(ignore, gitfile):
+            if ignore and re.match(ignore, gitfile):
                 continue
             # add source files that do not match the ignore pattern
             source_files.append(gitfile)
diff --git a/scripts/checkpatch.pl b/scripts/checkpatch.pl
index 461d4221e4a4..c27d2312cfc3 100755
--- a/scripts/checkpatch.pl
+++ b/scripts/checkpatch.pl
@@ -501,7 +501,7 @@ our $Binary	= qr{(?i)0b[01]+$Int_type?};
 our $Hex	= qr{(?i)0x[0-9a-f]+$Int_type?};
 our $Int	= qr{[0-9]+$Int_type?};
 our $Octal	= qr{0[0-7]+$Int_type?};
-our $String	= qr{"[X\t]*"};
+our $String	= qr{(?:\b[Lu])?"[X\t]*"};
 our $Float_hex	= qr{(?i)0x[0-9a-f]+p-?[0-9]+[fl]?};
 our $Float_dec	= qr{(?i)(?:[0-9]+\.[0-9]*|[0-9]*\.[0-9]+)(?:e-?[0-9]+)?[fl]?};
 our $Float_int	= qr{(?i)[0-9]+e-?[0-9]+[fl]?};
@@ -1181,7 +1181,8 @@ sub git_commit_info {
 #		    git log --format='%H %s' -1 $line |
 #		    echo "commit $(cut -c 1-12,41-)"
 #		done
-	} elsif ($lines[0] =~ /^fatal: ambiguous argument '$commit': unknown revision or path not in the working tree\./) {
+	} elsif ($lines[0] =~ /^fatal: ambiguous argument '$commit': unknown revision or path not in the working tree\./ ||
+		 $lines[0] =~ /^fatal: bad object $commit/) {
 		$id = undef;
 	} else {
 		$id = substr($lines[0], 0, 12);
@@ -2587,6 +2588,8 @@ sub process {
 	my $reported_maintainer_file = 0;
 	my $non_utf8_charset = 0;
 
+	my $last_git_commit_id_linenr = -1;
+
 	my $last_blank_line = 0;
 	my $last_coalesced_string_linenr = -1;
 
@@ -2909,10 +2912,10 @@ sub process {
 					my ($email_name, $email_comment, $email_address, $comment1) = parse_email($ctx);
 					my ($author_name, $author_comment, $author_address, $comment2) = parse_email($author);
 
-					if ($email_address eq $author_address && $email_name eq $author_name) {
+					if (lc $email_address eq lc $author_address && $email_name eq $author_name) {
 						$author_sob = $ctx;
 						$authorsignoff = 2;
-					} elsif ($email_address eq $author_address) {
+					} elsif (lc $email_address eq lc $author_address) {
 						$author_sob = $ctx;
 						$authorsignoff = 3;
 					} elsif ($email_name eq $author_name) {
@@ -3170,10 +3173,20 @@ sub process {
 		}
 
 # Check for git id commit length and improperly formed commit descriptions
-		if ($in_commit_log && !$commit_log_possible_stack_dump &&
+# A correctly formed commit description is:
+#    commit <SHA-1 hash length 12+ chars> ("Complete commit subject")
+# with the commit subject '("' prefix and '")' suffix
+# This is a fairly compilicated block as it tests for what appears to be
+# bare SHA-1 hash with  minimum length of 5.  It also avoids several types of
+# possible SHA-1 matches.
+# A commit match can span multiple lines so this block attempts to find a
+# complete typical commit on a maximum of 3 lines
+		if ($perl_version_ok &&
+		    $in_commit_log && !$commit_log_possible_stack_dump &&
 		    $line !~ /^\s*(?:Link|Patchwork|http|https|BugLink|base-commit):/i &&
 		    $line !~ /^This reverts commit [0-9a-f]{7,40}/ &&
-		    ($line =~ /\bcommit\s+[0-9a-f]{5,}\b/i ||
+		    (($line =~ /\bcommit\s+[0-9a-f]{5,}\b/i ||
+		      ($line =~ /\bcommit\s*$/i && defined($rawlines[$linenr]) && $rawlines[$linenr] =~ /^\s*[0-9a-f]{5,}\b/i)) ||
 		     ($line =~ /(?:\s|^)[0-9a-f]{12,40}(?:[\s"'\(\[]|$)/i &&
 		      $line !~ /[\<\[][0-9a-f]{12,40}[\>\]]/i &&
 		      $line !~ /\bfixes:\s*[0-9a-f]{12,40}/i))) {
@@ -3183,49 +3196,56 @@ sub process {
 			my $long = 0;
 			my $case = 1;
 			my $space = 1;
-			my $hasdesc = 0;
-			my $hasparens = 0;
 			my $id = '0123456789ab';
 			my $orig_desc = "commit description";
 			my $description = "";
+			my $herectx = $herecurr;
+			my $has_parens = 0;
+			my $has_quotes = 0;
+
+			my $input = $line;
+			if ($line =~ /(?:\bcommit\s+[0-9a-f]{5,}|\bcommit\s*$)/i) {
+				for (my $n = 0; $n < 2; $n++) {
+					if ($input =~ /\bcommit\s+[0-9a-f]{5,}\s*($balanced_parens)/i) {
+						$orig_desc = $1;
+						$has_parens = 1;
+						# Always strip leading/trailing parens then double quotes if existing
+						$orig_desc = substr($orig_desc, 1, -1);
+						if ($orig_desc =~ /^".*"$/) {
+							$orig_desc = substr($orig_desc, 1, -1);
+							$has_quotes = 1;
+						}
+						last;
+					}
+					last if ($#lines < $linenr + $n);
+					$input .= " " . trim($rawlines[$linenr + $n]);
+					$herectx .= "$rawlines[$linenr + $n]\n";
+				}
+				$herectx = $herecurr if (!$has_parens);
+			}
 
-			if ($line =~ /\b(c)ommit\s+([0-9a-f]{5,})\b/i) {
+			if ($input =~ /\b(c)ommit\s+([0-9a-f]{5,})\b/i) {
 				$init_char = $1;
 				$orig_commit = lc($2);
-			} elsif ($line =~ /\b([0-9a-f]{12,40})\b/i) {
+				$short = 0 if ($input =~ /\bcommit\s+[0-9a-f]{12,40}/i);
+				$long = 1 if ($input =~ /\bcommit\s+[0-9a-f]{41,}/i);
+				$space = 0 if ($input =~ /\bcommit [0-9a-f]/i);
+				$case = 0 if ($input =~ /\b[Cc]ommit\s+[0-9a-f]{5,40}[^A-F]/);
+			} elsif ($input =~ /\b([0-9a-f]{12,40})\b/i) {
 				$orig_commit = lc($1);
 			}
 
-			$short = 0 if ($line =~ /\bcommit\s+[0-9a-f]{12,40}/i);
-			$long = 1 if ($line =~ /\bcommit\s+[0-9a-f]{41,}/i);
-			$space = 0 if ($line =~ /\bcommit [0-9a-f]/i);
-			$case = 0 if ($line =~ /\b[Cc]ommit\s+[0-9a-f]{5,40}[^A-F]/);
-			if ($line =~ /\bcommit\s+[0-9a-f]{5,}\s+\("([^"]+)"\)/i) {
-				$orig_desc = $1;
-				$hasparens = 1;
-			} elsif ($line =~ /\bcommit\s+[0-9a-f]{5,}\s*$/i &&
-				 defined $rawlines[$linenr] &&
-				 $rawlines[$linenr] =~ /^\s*\("([^"]+)"\)/) {
-				$orig_desc = $1;
-				$hasparens = 1;
-			} elsif ($line =~ /\bcommit\s+[0-9a-f]{5,}\s+\("[^"]+$/i &&
-				 defined $rawlines[$linenr] &&
-				 $rawlines[$linenr] =~ /^\s*[^"]+"\)/) {
-				$line =~ /\bcommit\s+[0-9a-f]{5,}\s+\("([^"]+)$/i;
-				$orig_desc = $1;
-				$rawlines[$linenr] =~ /^\s*([^"]+)"\)/;
-				$orig_desc .= " " . $1;
-				$hasparens = 1;
-			}
-
 			($id, $description) = git_commit_info($orig_commit,
 							      $id, $orig_desc);
 
 			if (defined($id) &&
-			   ($short || $long || $space || $case || ($orig_desc ne $description) || !$hasparens)) {
+			    ($short || $long || $space || $case || ($orig_desc ne $description) || !$has_quotes) &&
+			    $last_git_commit_id_linenr != $linenr - 1) {
 				ERROR("GIT_COMMIT_ID",
-				      "Please use git commit description style 'commit <12+ chars of sha1> (\"<title line>\")' - ie: '${init_char}ommit $id (\"$description\")'\n" . $herecurr);
+				      "Please use git commit description style 'commit <12+ chars of sha1> (\"<title line>\")' - ie: '${init_char}ommit $id (\"$description\")'\n" . $herectx);
 			}
+			#don't report the next line if this line ends in commit and the sha1 hash is the next line
+			$last_git_commit_id_linenr = $linenr if ($line =~ /\bcommit\s*$/i);
 		}
 
 # Check for added, moved or deleted files
@@ -6132,7 +6152,8 @@ sub process {
 		}
 
 # concatenated string without spaces between elements
-		if ($line =~ /$String[A-Za-z0-9_]/ || $line =~ /[A-Za-z0-9_]$String/) {
+		if ($line =~ /$String[A-Z_]/ ||
+		    ($line =~ /([A-Za-z0-9_]+)$String/ && $1 !~ /^[Lu]$/)) {
 			if (CHK("CONCATENATED_STRING",
 				"Concatenated strings should use spaces between elements\n" . $herecurr) &&
 			    $fix) {
@@ -6145,7 +6166,7 @@ sub process {
 		}
 
 # uncoalesced string fragments
-		if ($line =~ /$String\s*"/) {
+		if ($line =~ /$String\s*[Lu]?"/) {
 			if (WARN("STRING_FRAGMENTS",
 				 "Consecutive strings are generally better as a single string\n" . $herecurr) &&
 			    $fix) {
diff --git a/scripts/checksyscalls.sh b/scripts/checksyscalls.sh
index b7609958ee36..fd9777f63f14 100755
--- a/scripts/checksyscalls.sh
+++ b/scripts/checksyscalls.sh
@@ -266,4 +266,4 @@ syscall_list() {
 }
 
 (ignore_list && syscall_list $(dirname $0)/../arch/x86/entry/syscalls/syscall_32.tbl) | \
-$* -E -x c - > /dev/null
+$* -Wno-error -E -x c - > /dev/null
diff --git a/scripts/clang-tools/gen_compile_commands.py b/scripts/clang-tools/gen_compile_commands.py
index b7e9ecf16e56..0033eedce003 100755
--- a/scripts/clang-tools/gen_compile_commands.py
+++ b/scripts/clang-tools/gen_compile_commands.py
@@ -18,7 +18,7 @@ _DEFAULT_OUTPUT = 'compile_commands.json'
 _DEFAULT_LOG_LEVEL = 'WARNING'
 
 _FILENAME_PATTERN = r'^\..*\.cmd$'
-_LINE_PATTERN = r'^cmd_[^ ]*\.o := (.* )([^ ]*\.c)$'
+_LINE_PATTERN = r'^cmd_[^ ]*\.o := (.* )([^ ]*\.c) *(;|$)'
 _VALID_LOG_LEVELS = ['DEBUG', 'INFO', 'WARNING', 'ERROR', 'CRITICAL']
 # The tools/ directory adopts a different build system, and produces .cmd
 # files in a different format. Do not support it.
diff --git a/scripts/coccinelle/api/kvmalloc.cocci b/scripts/coccinelle/api/kvmalloc.cocci
index c30dab718a49..5ddcb76b76b0 100644
--- a/scripts/coccinelle/api/kvmalloc.cocci
+++ b/scripts/coccinelle/api/kvmalloc.cocci
@@ -79,7 +79,7 @@ position p : script:python() { relevant(p) };
   } else {
     ... when != krealloc(E, ...)
         when any
-*   \(kfree\|kzfree\)(E)
+*   \(kfree\|kfree_sensitive\)(E)
     ...
   }
 
diff --git a/scripts/coccinelle/iterators/use_after_iter.cocci b/scripts/coccinelle/iterators/use_after_iter.cocci
index 9be48b520879..676edd562eef 100644
--- a/scripts/coccinelle/iterators/use_after_iter.cocci
+++ b/scripts/coccinelle/iterators/use_after_iter.cocci
@@ -123,6 +123,8 @@ hlist_for_each_entry_safe(c,...) S
 |
 list_remove_head(x,c,...)
 |
+list_entry_is_head(c,...)
+|
 sizeof(<+...c...+>)
 |
  &c->member
diff --git a/scripts/gcc-plugins/gen-random-seed.sh b/scripts/gcc-plugins/gen-random-seed.sh
index 68af5cc20a64..68af5cc20a64 100644..100755
--- a/scripts/gcc-plugins/gen-random-seed.sh
+++ b/scripts/gcc-plugins/gen-random-seed.sh
diff --git a/scripts/gen_autoksyms.sh b/scripts/gen_autoksyms.sh
index da320151e7c3..6ed0d225c8b1 100755
--- a/scripts/gen_autoksyms.sh
+++ b/scripts/gen_autoksyms.sh
@@ -26,18 +26,6 @@ if [ -n "$CONFIG_MODVERSIONS" ]; then
 	needed_symbols="$needed_symbols module_layout"
 fi
 
-# With CONFIG_LTO_CLANG, LLVM bitcode has not yet been compiled into a binary
-# when the .mod files are generated, which means they don't yet contain
-# references to certain symbols that will be present in the final binaries.
-if [ -n "$CONFIG_LTO_CLANG" ]; then
-	# intrinsic functions
-	needed_symbols="$needed_symbols memcpy memmove memset"
-	# ftrace
-	needed_symbols="$needed_symbols _mcount"
-	# stack protector symbols
-	needed_symbols="$needed_symbols __stack_chk_fail __stack_chk_guard"
-fi
-
 ksym_wl=
 if [ -n "$CONFIG_UNUSED_KSYMS_WHITELIST" ]; then
 	# Use 'eval' to expand the whitelist path and check if it is relative
diff --git a/scripts/gen_ksymdeps.sh b/scripts/gen_ksymdeps.sh
index 1324986e1362..8ee533f33659 100755
--- a/scripts/gen_ksymdeps.sh
+++ b/scripts/gen_ksymdeps.sh
@@ -4,7 +4,13 @@
 set -e
 
 # List of exported symbols
-ksyms=$($NM $1 | sed -n 's/.*__ksym_marker_\(.*\)/\1/p' | tr A-Z a-z)
+#
+# If the object has no symbol, $NM warns 'no symbols'.
+# Suppress the stderr.
+# TODO:
+#   Use -q instead of 2>/dev/null when we upgrade the minimum version of
+#   binutils to 2.37, llvm to 13.0.0.
+ksyms=$($NM $1 2>/dev/null | sed -n 's/.*__ksym_marker_\(.*\)/\1/p')
 
 if [ -z "$ksyms" ]; then
 	exit 0
@@ -15,8 +21,7 @@ echo "ksymdeps_$1 := \\"
 
 for s in $ksyms
 do
-	echo $s | sed -e 's:^_*:    $(wildcard include/ksym/:' \
-			-e 's:__*:/:g' -e 's/$/.h) \\/'
+	printf '    $(wildcard include/ksym/%s) \\\n' "$s"
 done
 
 echo
diff --git a/scripts/kconfig/merge_config.sh b/scripts/kconfig/merge_config.sh
index 63c8565206a4..e5b46980c22a 100755
--- a/scripts/kconfig/merge_config.sh
+++ b/scripts/kconfig/merge_config.sh
@@ -28,6 +28,7 @@ usage() {
 	echo "  -r    list redundant entries when merging fragments"
 	echo "  -y    make builtin have precedence over modules"
 	echo "  -O    dir to put generated output files.  Consider setting \$KCONFIG_CONFIG instead."
+	echo "  -s    strict mode. Fail if the fragment redefines any value."
 	echo
 	echo "Used prefix: '$CONFIG_PREFIX'. You can redefine it with \$CONFIG_ environment variable."
 }
@@ -37,6 +38,7 @@ ALLTARGET=alldefconfig
 WARNREDUN=false
 BUILTIN=false
 OUTPUT=.
+STRICT=false
 CONFIG_PREFIX=${CONFIG_-CONFIG_}
 
 while true; do
@@ -75,6 +77,11 @@ while true; do
 		shift 2
 		continue
 		;;
+	"-s")
+		STRICT=true
+		shift
+		continue
+		;;
 	*)
 		break
 		;;
@@ -141,6 +148,9 @@ for ORIG_MERGE_FILE in $MERGE_LIST ; do
 			echo Previous  value: $PREV_VAL
 			echo New value:       $NEW_VAL
 			echo
+			if [ "$STRICT" = "true" ]; then
+				STRICT_MODE_VIOLATED=true
+			fi
 		elif [ "$WARNREDUN" = "true" ]; then
 			echo Value of $CFG is redundant by fragment $ORIG_MERGE_FILE:
 		fi
@@ -153,6 +163,11 @@ for ORIG_MERGE_FILE in $MERGE_LIST ; do
 	cat $MERGE_FILE >> $TMP_FILE
 done
 
+if [ "$STRICT_MODE_VIOLATED" = "true" ]; then
+	echo "The fragment redefined a value and strict mode had been passed."
+	exit 1
+fi
+
 if [ "$RUNMAKE" = "false" ]; then
 	cp -T -- "$TMP_FILE" "$KCONFIG_CONFIG"
 	echo "#"
diff --git a/scripts/kernel-doc b/scripts/kernel-doc
index 7c4a6a507ac4..cfcb60737957 100755
--- a/scripts/kernel-doc
+++ b/scripts/kernel-doc
@@ -329,10 +329,6 @@ if (defined($ENV{'KBUILD_VERBOSE'})) {
 	$verbose = "$ENV{'KBUILD_VERBOSE'}";
 }
 
-if (defined($ENV{'KDOC_WERROR'})) {
-	$Werror = "$ENV{'KDOC_WERROR'}";
-}
-
 if (defined($ENV{'KCFLAGS'})) {
 	my $kcflags = "$ENV{'KCFLAGS'}";
 
@@ -341,6 +337,10 @@ if (defined($ENV{'KCFLAGS'})) {
 	}
 }
 
+if (defined($ENV{'KDOC_WERROR'})) {
+	$Werror = "$ENV{'KDOC_WERROR'}";
+}
+
 # Generated docbook code is inserted in a template at a point where
 # docbook v3.1 requires a non-zero sequence of RefEntry's; see:
 # https://www.oasis-open.org/docbook/documentation/reference/html/refentry.html
diff --git a/scripts/link-vmlinux.sh b/scripts/link-vmlinux.sh
index 36ef7b37fc5d..d74cee5c4326 100755
--- a/scripts/link-vmlinux.sh
+++ b/scripts/link-vmlinux.sh
@@ -149,67 +149,54 @@ objtool_link()
 # ${2}, ${3}, ... - optional extra .o files
 vmlinux_link()
 {
-	local lds="${objtree}/${KBUILD_LDS}"
 	local output=${1}
-	local objects
-	local strip_debug
-	local map_option
+	local objs
+	local libs
+	local ld
+	local ldflags
+	local ldlibs
 
 	info LD ${output}
 
 	# skip output file argument
 	shift
 
+	if [ -n "${CONFIG_LTO_CLANG}" ]; then
+		# Use vmlinux.o instead of performing the slow LTO link again.
+		objs=vmlinux.o
+		libs=
+	else
+		objs="${KBUILD_VMLINUX_OBJS}"
+		libs="${KBUILD_VMLINUX_LIBS}"
+	fi
+
+	if [ "${SRCARCH}" = "um" ]; then
+		wl=-Wl,
+		ld="${CC}"
+		ldflags="${CFLAGS_vmlinux}"
+		ldlibs="-lutil -lrt -lpthread"
+	else
+		wl=
+		ld="${LD}"
+		ldflags="${KBUILD_LDFLAGS} ${LDFLAGS_vmlinux}"
+		ldlibs=
+	fi
+
+	ldflags="${ldflags} ${wl}--script=${objtree}/${KBUILD_LDS}"
+
 	# The kallsyms linking does not need debug symbols included.
 	if [ "$output" != "${output#.tmp_vmlinux.kallsyms}" ] ; then
-		strip_debug=-Wl,--strip-debug
+		ldflags="${ldflags} ${wl}--strip-debug"
 	fi
 
 	if [ -n "${CONFIG_VMLINUX_MAP}" ]; then
-		map_option="-Map=${output}.map"
+		ldflags="${ldflags} ${wl}-Map=${output}.map"
 	fi
 
-	if [ "${SRCARCH}" != "um" ]; then
-		if [ -n "${CONFIG_LTO_CLANG}" ]; then
-			# Use vmlinux.o instead of performing the slow LTO
-			# link again.
-			objects="--whole-archive		\
-				vmlinux.o 			\
-				--no-whole-archive		\
-				${@}"
-		else
-			objects="--whole-archive		\
-				${KBUILD_VMLINUX_OBJS}		\
-				--no-whole-archive		\
-				--start-group			\
-				${KBUILD_VMLINUX_LIBS}		\
-				--end-group			\
-				${@}"
-		fi
-
-		${LD} ${KBUILD_LDFLAGS} ${LDFLAGS_vmlinux}	\
-			${strip_debug#-Wl,}			\
-			-o ${output}				\
-			${map_option}				\
-			-T ${lds} ${objects}
-	else
-		objects="-Wl,--whole-archive			\
-			${KBUILD_VMLINUX_OBJS}			\
-			-Wl,--no-whole-archive			\
-			-Wl,--start-group			\
-			${KBUILD_VMLINUX_LIBS}			\
-			-Wl,--end-group				\
-			${@}"
-
-		${CC} ${CFLAGS_vmlinux}				\
-			${strip_debug}				\
-			-o ${output}				\
-			${map_option:+-Wl,${map_option}}	\
-			-Wl,-T,${lds}				\
-			${objects}				\
-			-lutil -lrt -lpthread
-		rm -f linux
-	fi
+	${ld} ${ldflags} -o ${output}					\
+		${wl}--whole-archive ${objs} ${wl}--no-whole-archive	\
+		${wl}--start-group ${libs} ${wl}--end-group		\
+		$@ ${ldlibs}
 }
 
 # generate .BTF typeinfo from DWARF debuginfo
diff --git a/scripts/mod/devicetable-offsets.c b/scripts/mod/devicetable-offsets.c
index 9bb6c7edccc4..cc3625617a0e 100644
--- a/scripts/mod/devicetable-offsets.c
+++ b/scripts/mod/devicetable-offsets.c
@@ -42,6 +42,7 @@ int main(void)
 	DEVID_FIELD(pci_device_id, subdevice);
 	DEVID_FIELD(pci_device_id, class);
 	DEVID_FIELD(pci_device_id, class_mask);
+	DEVID_FIELD(pci_device_id, override_only);
 
 	DEVID(ccw_device_id);
 	DEVID_FIELD(ccw_device_id, match_flags);
diff --git a/scripts/mod/file2alias.c b/scripts/mod/file2alias.c
index 7c97fa8e36bc..49aba862073e 100644
--- a/scripts/mod/file2alias.c
+++ b/scripts/mod/file2alias.c
@@ -426,7 +426,7 @@ static int do_ieee1394_entry(const char *filename,
 	return 1;
 }
 
-/* Looks like: pci:vNdNsvNsdNbcNscNiN. */
+/* Looks like: pci:vNdNsvNsdNbcNscNiN or <prefix>_pci:vNdNsvNsdNbcNscNiN. */
 static int do_pci_entry(const char *filename,
 			void *symval, char *alias)
 {
@@ -440,8 +440,21 @@ static int do_pci_entry(const char *filename,
 	DEF_FIELD(symval, pci_device_id, subdevice);
 	DEF_FIELD(symval, pci_device_id, class);
 	DEF_FIELD(symval, pci_device_id, class_mask);
+	DEF_FIELD(symval, pci_device_id, override_only);
+
+	switch (override_only) {
+	case 0:
+		strcpy(alias, "pci:");
+		break;
+	case PCI_ID_F_VFIO_DRIVER_OVERRIDE:
+		strcpy(alias, "vfio_pci:");
+		break;
+	default:
+		warn("Unknown PCI driver_override alias %08X\n",
+		     override_only);
+		return 0;
+	}
 
-	strcpy(alias, "pci:");
 	ADD(alias, "v", vendor != PCI_ANY_ID, vendor);
 	ADD(alias, "d", device != PCI_ANY_ID, device);
 	ADD(alias, "sv", subvendor != PCI_ANY_ID, subvendor);
diff --git a/scripts/mod/modpost.c b/scripts/mod/modpost.c
index 270a7df898e2..cb8ab7d91d30 100644
--- a/scripts/mod/modpost.c
+++ b/scripts/mod/modpost.c
@@ -17,6 +17,7 @@
 #include <ctype.h>
 #include <string.h>
 #include <limits.h>
+#include <stdbool.h>
 #include <errno.h>
 #include "modpost.h"
 #include "../../include/linux/license.h"
@@ -89,6 +90,14 @@ modpost_log(enum loglevel loglevel, const char *fmt, ...)
 		error_occurred = true;
 }
 
+static inline bool strends(const char *str, const char *postfix)
+{
+	if (strlen(str) < strlen(postfix))
+		return false;
+
+	return strcmp(str + strlen(str) - strlen(postfix), postfix) == 0;
+}
+
 void *do_nofail(void *ptr, const char *expr)
 {
 	if (!ptr)
@@ -931,7 +940,7 @@ static void check_section(const char *modname, struct elf_info *elf,
 		".kprobes.text", ".cpuidle.text", ".noinstr.text"
 #define OTHER_TEXT_SECTIONS ".ref.text", ".head.text", ".spinlock.text", \
 		".fixup", ".entry.text", ".exception.text", ".text.*", \
-		".coldtext"
+		".coldtext", ".softirqentry.text"
 
 #define INIT_SECTIONS      ".init.*"
 #define MEM_INIT_SECTIONS  ".meminit.*"
@@ -2060,7 +2069,7 @@ static void read_symbols(const char *modname)
 	if (!mod->is_vmlinux) {
 		version = get_modinfo(&info, "version");
 		if (version || all_versions)
-			get_src_version(modname, mod->srcversion,
+			get_src_version(mod->name, mod->srcversion,
 					sizeof(mod->srcversion) - 1);
 	}
 
diff --git a/scripts/mod/modpost.h b/scripts/mod/modpost.h
index c1a895c0d682..0c47ff95c0e2 100644
--- a/scripts/mod/modpost.h
+++ b/scripts/mod/modpost.h
@@ -2,7 +2,6 @@
 #include <stdio.h>
 #include <stdlib.h>
 #include <stdarg.h>
-#include <stdbool.h>
 #include <string.h>
 #include <sys/types.h>
 #include <sys/stat.h>
@@ -178,14 +177,6 @@ static inline unsigned int get_secindex(const struct elf_info *info,
 	return info->symtab_shndx_start[sym - info->symtab_start];
 }
 
-static inline bool strends(const char *str, const char *postfix)
-{
-	if (strlen(str) < strlen(postfix))
-		return false;
-
-	return strcmp(str + strlen(str) - strlen(postfix), postfix) == 0;
-}
-
 /* file2alias.c */
 extern unsigned int cross_build;
 void handle_moddevtable(struct module *mod, struct elf_info *info,
diff --git a/scripts/mod/sumversion.c b/scripts/mod/sumversion.c
index 760e6baa7eda..905c0ec291e1 100644
--- a/scripts/mod/sumversion.c
+++ b/scripts/mod/sumversion.c
@@ -391,14 +391,9 @@ void get_src_version(const char *modname, char sum[], unsigned sumlen)
 	struct md4_ctx md;
 	char *fname;
 	char filelist[PATH_MAX + 1];
-	int postfix_len = 1;
-
-	if (strends(modname, ".lto.o"))
-		postfix_len = 5;
 
 	/* objects for a module are listed in the first line of *.mod file. */
-	snprintf(filelist, sizeof(filelist), "%.*smod",
-		 (int)strlen(modname) - postfix_len, modname);
+	snprintf(filelist, sizeof(filelist), "%s.mod", modname);
 
 	buf = read_text_file(filelist);
 
diff --git a/scripts/module.lds.S b/scripts/module.lds.S
index 04c5685c25cf..1d0e1e4dc3d2 100644
--- a/scripts/module.lds.S
+++ b/scripts/module.lds.S
@@ -24,6 +24,7 @@ SECTIONS {
 	__kcrctab		0 : { *(SORT(___kcrctab+*)) }
 	__kcrctab_gpl		0 : { *(SORT(___kcrctab_gpl+*)) }
 
+	.ctors			0 : ALIGN(8) { *(SORT(.ctors.*)) *(.ctors) }
 	.init_array		0 : ALIGN(8) { *(SORT(.init_array.*)) *(.init_array) }
 
 	__jump_table		0 : ALIGN(8) { KEEP(*(__jump_table)) }
diff --git a/scripts/sorttable.c b/scripts/sorttable.c
index 0ef3abfc4a51..f355869c65cd 100644
--- a/scripts/sorttable.c
+++ b/scripts/sorttable.c
@@ -349,6 +349,7 @@ static int do_file(char const *const fname, void *addr)
 	case EM_ARM:
 	case EM_MICROBLAZE:
 	case EM_MIPS:
+	case EM_RISCV:
 	case EM_XTENSA:
 		break;
 	default:
diff --git a/scripts/subarch.include b/scripts/subarch.include
index 650682821126..776849a3c500 100644
--- a/scripts/subarch.include
+++ b/scripts/subarch.include
@@ -7,7 +7,7 @@
 SUBARCH := $(shell uname -m | sed -e s/i.86/x86/ -e s/x86_64/x86/ \
 				  -e s/sun4u/sparc64/ \
 				  -e s/arm.*/arm/ -e s/sa110/arm/ \
-				  -e s/s390x/s390/ -e s/parisc64/parisc/ \
+				  -e s/s390x/s390/ \
 				  -e s/ppc.*/powerpc/ -e s/mips.*/mips/ \
 				  -e s/sh[234].*/sh/ -e s/aarch64.*/arm64/ \
 				  -e s/riscv.*/riscv/)
diff --git a/scripts/syscallnr.sh b/scripts/syscallnr.sh
index 3aa29e0dcc52..3aa29e0dcc52 100644..100755
--- a/scripts/syscallnr.sh
+++ b/scripts/syscallnr.sh
diff --git a/scripts/xen-hypercalls.sh b/scripts/xen-hypercalls.sh
index f18b00843df3..f18b00843df3 100644..100755
--- a/scripts/xen-hypercalls.sh
+++ b/scripts/xen-hypercalls.sh
diff --git a/security/Kconfig.hardening b/security/Kconfig.hardening
index a56c36470cb1..90cbaff86e13 100644
--- a/security/Kconfig.hardening
+++ b/security/Kconfig.hardening
@@ -29,6 +29,7 @@ choice
 	prompt "Initialize kernel stack variables at function entry"
 	default GCC_PLUGIN_STRUCTLEAK_BYREF_ALL if COMPILE_TEST && GCC_PLUGINS
 	default INIT_STACK_ALL_PATTERN if COMPILE_TEST && CC_HAS_AUTO_VAR_INIT_PATTERN
+	default INIT_STACK_ALL_ZERO if CC_HAS_AUTO_VAR_INIT_PATTERN
 	default INIT_STACK_NONE
 	help
 	  This option enables initialization of stack variables at
@@ -39,11 +40,11 @@ choice
 	  syscalls.
 
 	  This chooses the level of coverage over classes of potentially
-	  uninitialized variables. The selected class will be
+	  uninitialized variables. The selected class of variable will be
 	  initialized before use in a function.
 
 	config INIT_STACK_NONE
-		bool "no automatic initialization (weakest)"
+		bool "no automatic stack variable initialization (weakest)"
 		help
 		  Disable automatic stack variable initialization.
 		  This leaves the kernel vulnerable to the standard
@@ -80,7 +81,7 @@ choice
 		  and is disallowed.
 
 	config GCC_PLUGIN_STRUCTLEAK_BYREF_ALL
-		bool "zero-init anything passed by reference (very strong)"
+		bool "zero-init everything passed by reference (very strong)"
 		depends on GCC_PLUGINS
 		depends on !(KASAN && KASAN_STACK)
 		select GCC_PLUGIN_STRUCTLEAK
@@ -91,33 +92,44 @@ choice
 		  of uninitialized stack variable exploits and information
 		  exposures.
 
+		  As a side-effect, this keeps a lot of variables on the
+		  stack that can otherwise be optimized out, so combining
+		  this with CONFIG_KASAN_STACK can lead to a stack overflow
+		  and is disallowed.
+
 	config INIT_STACK_ALL_PATTERN
-		bool "0xAA-init everything on the stack (strongest)"
+		bool "pattern-init everything (strongest)"
 		depends on CC_HAS_AUTO_VAR_INIT_PATTERN
 		help
-		  Initializes everything on the stack with a 0xAA
-		  pattern. This is intended to eliminate all classes
-		  of uninitialized stack variable exploits and information
-		  exposures, even variables that were warned to have been
-		  left uninitialized.
+		  Initializes everything on the stack (including padding)
+		  with a specific debug value. This is intended to eliminate
+		  all classes of uninitialized stack variable exploits and
+		  information exposures, even variables that were warned about
+		  having been left uninitialized.
 
 		  Pattern initialization is known to provoke many existing bugs
 		  related to uninitialized locals, e.g. pointers receive
-		  non-NULL values, buffer sizes and indices are very big.
+		  non-NULL values, buffer sizes and indices are very big. The
+		  pattern is situation-specific; Clang on 64-bit uses 0xAA
+		  repeating for all types and padding except float and double
+		  which use 0xFF repeating (-NaN). Clang on 32-bit uses 0xFF
+		  repeating for all types and padding.
 
 	config INIT_STACK_ALL_ZERO
-		bool "zero-init everything on the stack (strongest and safest)"
+		bool "zero-init everything (strongest and safest)"
 		depends on CC_HAS_AUTO_VAR_INIT_ZERO
 		help
-		  Initializes everything on the stack with a zero
-		  value. This is intended to eliminate all classes
-		  of uninitialized stack variable exploits and information
-		  exposures, even variables that were warned to have been
-		  left uninitialized.
-
-		  Zero initialization provides safe defaults for strings,
-		  pointers, indices and sizes, and is therefore
-		  more suitable as a security mitigation measure.
+		  Initializes everything on the stack (including padding)
+		  with a zero value. This is intended to eliminate all
+		  classes of uninitialized stack variable exploits and
+		  information exposures, even variables that were warned
+		  about having been left uninitialized.
+
+		  Zero initialization provides safe defaults for strings
+		  (immediately NUL-terminated), pointers (NULL), indices
+		  (index 0), and sizes (0 length), so it is therefore more
+		  suitable as a production security mitigation than pattern
+		  initialization.
 
 endchoice
 
@@ -217,6 +229,25 @@ config INIT_ON_FREE_DEFAULT_ON
 	  touching "cold" memory areas. Most cases see 3-5% impact. Some
 	  synthetic workloads have measured as high as 8%.
 
+config CC_HAS_ZERO_CALL_USED_REGS
+	def_bool $(cc-option,-fzero-call-used-regs=used-gpr)
+
+config ZERO_CALL_USED_REGS
+	bool "Enable register zeroing on function exit"
+	depends on CC_HAS_ZERO_CALL_USED_REGS
+	help
+	  At the end of functions, always zero any caller-used register
+	  contents. This helps ensure that temporary values are not
+	  leaked beyond the function boundary. This means that register
+	  contents are less likely to be available for side channels
+	  and information exposures. Additionally, this helps reduce the
+	  number of useful ROP gadgets by about 20% (and removes compiler
+	  generated "write-what-where" gadgets) in the resulting kernel
+	  image. This has a less than 1% performance impact on most
+	  workloads. Image size growth depends on architecture, and should
+	  be evaluated for suitability. For example, x86_64 grows by less
+	  than 1%, and arm64 grows by about 5%.
+
 endmenu
 
 endmenu
diff --git a/security/Makefile b/security/Makefile
index 47e432900e24..18121f8f85cd 100644
--- a/security/Makefile
+++ b/security/Makefile
@@ -4,16 +4,6 @@
 #
 
 obj-$(CONFIG_KEYS)			+= keys/
-subdir-$(CONFIG_SECURITY_SELINUX)	+= selinux
-subdir-$(CONFIG_SECURITY_SMACK)		+= smack
-subdir-$(CONFIG_SECURITY_TOMOYO)        += tomoyo
-subdir-$(CONFIG_SECURITY_APPARMOR)	+= apparmor
-subdir-$(CONFIG_SECURITY_YAMA)		+= yama
-subdir-$(CONFIG_SECURITY_LOADPIN)	+= loadpin
-subdir-$(CONFIG_SECURITY_SAFESETID)    += safesetid
-subdir-$(CONFIG_SECURITY_LOCKDOWN_LSM)	+= lockdown
-subdir-$(CONFIG_BPF_LSM)		+= bpf
-subdir-$(CONFIG_SECURITY_LANDLOCK)	+= landlock
 
 # always enable default capabilities
 obj-y					+= commoncap.o
@@ -36,5 +26,4 @@ obj-$(CONFIG_BPF_LSM)			+= bpf/
 obj-$(CONFIG_SECURITY_LANDLOCK)		+= landlock/
 
 # Object integrity file lists
-subdir-$(CONFIG_INTEGRITY)		+= integrity
 obj-$(CONFIG_INTEGRITY)			+= integrity/
diff --git a/security/integrity/ima/Kconfig b/security/integrity/ima/Kconfig
index d0ceada99243..f3a9cc201c8c 100644
--- a/security/integrity/ima/Kconfig
+++ b/security/integrity/ima/Kconfig
@@ -6,7 +6,6 @@ config IMA
 	select SECURITYFS
 	select CRYPTO
 	select CRYPTO_HMAC
-	select CRYPTO_MD5
 	select CRYPTO_SHA1
 	select CRYPTO_HASH_INFO
 	select TCG_TPM if HAS_IOMEM && !UML
diff --git a/security/integrity/ima/ima.h b/security/integrity/ima/ima.h
index f0e448ed1f9f..be965a8715e4 100644
--- a/security/integrity/ima/ima.h
+++ b/security/integrity/ima/ima.h
@@ -46,8 +46,11 @@ enum tpm_pcrs { TPM_PCR0 = 0, TPM_PCR8 = 8, TPM_PCR10 = 10 };
 /* current content of the policy */
 extern int ima_policy_flag;
 
+/* bitset of digests algorithms allowed in the setxattr hook */
+extern atomic_t ima_setxattr_allowed_hash_algorithms;
+
 /* set during initialization */
-extern int ima_hash_algo;
+extern int ima_hash_algo __ro_after_init;
 extern int ima_sha1_idx __ro_after_init;
 extern int ima_hash_algo_idx __ro_after_init;
 extern int ima_extra_slots __ro_after_init;
@@ -198,6 +201,7 @@ static inline unsigned int ima_hash_key(u8 *digest)
 	hook(KEXEC_CMDLINE, kexec_cmdline)		\
 	hook(KEY_CHECK, key)				\
 	hook(CRITICAL_DATA, critical_data)		\
+	hook(SETXATTR_CHECK, setxattr_check)		\
 	hook(MAX_CHECK, none)
 
 #define __ima_hook_enumify(ENUM, str)	ENUM,
@@ -254,7 +258,7 @@ int ima_get_action(struct user_namespace *mnt_userns, struct inode *inode,
 		   const struct cred *cred, u32 secid, int mask,
 		   enum ima_hooks func, int *pcr,
 		   struct ima_template_desc **template_desc,
-		   const char *func_data);
+		   const char *func_data, unsigned int *allowed_algos);
 int ima_must_measure(struct inode *inode, int mask, enum ima_hooks func);
 int ima_collect_measurement(struct integrity_iint_cache *iint,
 			    struct file *file, void *buf, loff_t size,
@@ -264,11 +268,11 @@ void ima_store_measurement(struct integrity_iint_cache *iint, struct file *file,
 			   struct evm_ima_xattr_data *xattr_value,
 			   int xattr_len, const struct modsig *modsig, int pcr,
 			   struct ima_template_desc *template_desc);
-void process_buffer_measurement(struct user_namespace *mnt_userns,
-				struct inode *inode, const void *buf, int size,
-				const char *eventname, enum ima_hooks func,
-				int pcr, const char *func_data,
-				bool buf_hash);
+int process_buffer_measurement(struct user_namespace *mnt_userns,
+			       struct inode *inode, const void *buf, int size,
+			       const char *eventname, enum ima_hooks func,
+			       int pcr, const char *func_data,
+			       bool buf_hash, u8 *digest, size_t digest_len);
 void ima_audit_measurement(struct integrity_iint_cache *iint,
 			   const unsigned char *filename);
 int ima_alloc_init_template(struct ima_event_data *event_data,
@@ -285,10 +289,10 @@ int ima_match_policy(struct user_namespace *mnt_userns, struct inode *inode,
 		     const struct cred *cred, u32 secid, enum ima_hooks func,
 		     int mask, int flags, int *pcr,
 		     struct ima_template_desc **template_desc,
-		     const char *func_data);
+		     const char *func_data, unsigned int *allowed_algos);
 void ima_init_policy(void);
 void ima_update_policy(void);
-void ima_update_policy_flag(void);
+void ima_update_policy_flags(void);
 ssize_t ima_parse_add_rule(char *);
 void ima_delete_rules(void);
 int ima_check_policy(void);
@@ -319,7 +323,7 @@ int ima_must_appraise(struct user_namespace *mnt_userns, struct inode *inode,
 void ima_update_xattr(struct integrity_iint_cache *iint, struct file *file);
 enum integrity_status ima_get_cache_status(struct integrity_iint_cache *iint,
 					   enum ima_hooks func);
-enum hash_algo ima_get_hash_algo(struct evm_ima_xattr_data *xattr_value,
+enum hash_algo ima_get_hash_algo(const struct evm_ima_xattr_data *xattr_value,
 				 int xattr_len);
 int ima_read_xattr(struct dentry *dentry,
 		   struct evm_ima_xattr_data **xattr_value);
diff --git a/security/integrity/ima/ima_api.c b/security/integrity/ima/ima_api.c
index d8e321cc6936..2c6c3a5228b5 100644
--- a/security/integrity/ima/ima_api.c
+++ b/security/integrity/ima/ima_api.c
@@ -172,6 +172,7 @@ err_out:
  * @pcr: pointer filled in if matched measure policy sets pcr=
  * @template_desc: pointer filled in if matched measure policy sets template=
  * @func_data: func specific data, may be NULL
+ * @allowed_algos: allowlist of hash algorithms for the IMA xattr
  *
  * The policy is defined in terms of keypairs:
  *		subj=, obj=, type=, func=, mask=, fsmagic=
@@ -188,14 +189,15 @@ int ima_get_action(struct user_namespace *mnt_userns, struct inode *inode,
 		   const struct cred *cred, u32 secid, int mask,
 		   enum ima_hooks func, int *pcr,
 		   struct ima_template_desc **template_desc,
-		   const char *func_data)
+		   const char *func_data, unsigned int *allowed_algos)
 {
 	int flags = IMA_MEASURE | IMA_AUDIT | IMA_APPRAISE | IMA_HASH;
 
 	flags &= ima_policy_flag;
 
 	return ima_match_policy(mnt_userns, inode, cred, secid, func, mask,
-				flags, pcr, template_desc, func_data);
+				flags, pcr, template_desc, func_data,
+				allowed_algos);
 }
 
 /*
diff --git a/security/integrity/ima/ima_appraise.c b/security/integrity/ima/ima_appraise.c
index ef9dcfce45d4..dbba51583e7c 100644
--- a/security/integrity/ima/ima_appraise.c
+++ b/security/integrity/ima/ima_appraise.c
@@ -77,8 +77,9 @@ int ima_must_appraise(struct user_namespace *mnt_userns, struct inode *inode,
 		return 0;
 
 	security_task_getsecid_subj(current, &secid);
-	return ima_match_policy(mnt_userns, inode, current_cred(), secid, func,
-				mask, IMA_APPRAISE | IMA_HASH, NULL, NULL, NULL);
+	return ima_match_policy(mnt_userns, inode, current_cred(), secid,
+				func, mask, IMA_APPRAISE | IMA_HASH, NULL,
+				NULL, NULL, NULL);
 }
 
 static int ima_fix_xattr(struct dentry *dentry,
@@ -171,7 +172,7 @@ static void ima_cache_flags(struct integrity_iint_cache *iint,
 	}
 }
 
-enum hash_algo ima_get_hash_algo(struct evm_ima_xattr_data *xattr_value,
+enum hash_algo ima_get_hash_algo(const struct evm_ima_xattr_data *xattr_value,
 				 int xattr_len)
 {
 	struct signature_v2_hdr *sig;
@@ -184,7 +185,8 @@ enum hash_algo ima_get_hash_algo(struct evm_ima_xattr_data *xattr_value,
 	switch (xattr_value->type) {
 	case EVM_IMA_XATTR_DIGSIG:
 		sig = (typeof(sig))xattr_value;
-		if (sig->version != 2 || xattr_len <= sizeof(*sig))
+		if (sig->version != 2 || xattr_len <= sizeof(*sig)
+		    || sig->hash_algo >= HASH_ALGO__LAST)
 			return ima_hash_algo;
 		return sig->hash_algo;
 		break;
@@ -357,7 +359,7 @@ int ima_check_blacklist(struct integrity_iint_cache *iint,
 		if ((rc == -EPERM) && (iint->flags & IMA_MEASURE))
 			process_buffer_measurement(&init_user_ns, NULL, digest, digestsize,
 						   "blacklisted-hash", NONE,
-						   pcr, NULL, false);
+						   pcr, NULL, false, NULL, 0);
 	}
 
 	return rc;
@@ -575,6 +577,66 @@ static void ima_reset_appraise_flags(struct inode *inode, int digsig)
 		clear_bit(IMA_DIGSIG, &iint->atomic_flags);
 }
 
+/**
+ * validate_hash_algo() - Block setxattr with unsupported hash algorithms
+ * @dentry: object of the setxattr()
+ * @xattr_value: userland supplied xattr value
+ * @xattr_value_len: length of xattr_value
+ *
+ * The xattr value is mapped to its hash algorithm, and this algorithm
+ * must be built in the kernel for the setxattr to be allowed.
+ *
+ * Emit an audit message when the algorithm is invalid.
+ *
+ * Return: 0 on success, else an error.
+ */
+static int validate_hash_algo(struct dentry *dentry,
+			      const struct evm_ima_xattr_data *xattr_value,
+			      size_t xattr_value_len)
+{
+	char *path = NULL, *pathbuf = NULL;
+	enum hash_algo xattr_hash_algo;
+	const char *errmsg = "unavailable-hash-algorithm";
+	unsigned int allowed_hashes;
+
+	xattr_hash_algo = ima_get_hash_algo(xattr_value, xattr_value_len);
+
+	allowed_hashes = atomic_read(&ima_setxattr_allowed_hash_algorithms);
+
+	if (allowed_hashes) {
+		/* success if the algorithm is allowed in the ima policy */
+		if (allowed_hashes & (1U << xattr_hash_algo))
+			return 0;
+
+		/*
+		 * We use a different audit message when the hash algorithm
+		 * is denied by a policy rule, instead of not being built
+		 * in the kernel image
+		 */
+		errmsg = "denied-hash-algorithm";
+	} else {
+		if (likely(xattr_hash_algo == ima_hash_algo))
+			return 0;
+
+		/* allow any xattr using an algorithm built in the kernel */
+		if (crypto_has_alg(hash_algo_name[xattr_hash_algo], 0, 0))
+			return 0;
+	}
+
+	pathbuf = kmalloc(PATH_MAX, GFP_KERNEL);
+	if (!pathbuf)
+		return -EACCES;
+
+	path = dentry_path(dentry, pathbuf, PATH_MAX);
+
+	integrity_audit_msg(AUDIT_INTEGRITY_DATA, d_inode(dentry), path,
+			    "set_data", errmsg, -EACCES, 0);
+
+	kfree(pathbuf);
+
+	return -EACCES;
+}
+
 int ima_inode_setxattr(struct dentry *dentry, const char *xattr_name,
 		       const void *xattr_value, size_t xattr_value_len)
 {
@@ -592,9 +654,11 @@ int ima_inode_setxattr(struct dentry *dentry, const char *xattr_name,
 		digsig = (xvalue->type == EVM_XATTR_PORTABLE_DIGSIG);
 	}
 	if (result == 1 || evm_revalidate_status(xattr_name)) {
+		result = validate_hash_algo(dentry, xvalue, xattr_value_len);
+		if (result)
+			return result;
+
 		ima_reset_appraise_flags(d_backing_inode(dentry), digsig);
-		if (result == 1)
-			result = 0;
 	}
 	return result;
 }
diff --git a/security/integrity/ima/ima_asymmetric_keys.c b/security/integrity/ima/ima_asymmetric_keys.c
index c985418698a4..f6aa0b47a772 100644
--- a/security/integrity/ima/ima_asymmetric_keys.c
+++ b/security/integrity/ima/ima_asymmetric_keys.c
@@ -62,5 +62,5 @@ void ima_post_key_create_or_update(struct key *keyring, struct key *key,
 	 */
 	process_buffer_measurement(&init_user_ns, NULL, payload, payload_len,
 				   keyring->description, KEY_CHECK, 0,
-				   keyring->description, false);
+				   keyring->description, false, NULL, 0);
 }
diff --git a/security/integrity/ima/ima_init.c b/security/integrity/ima/ima_init.c
index 5076a7d9d23e..b26fa67476b4 100644
--- a/security/integrity/ima/ima_init.c
+++ b/security/integrity/ima/ima_init.c
@@ -154,7 +154,8 @@ int __init ima_init(void)
 	ima_init_key_queue();
 
 	ima_measure_critical_data("kernel_info", "kernel_version",
-				  UTS_RELEASE, strlen(UTS_RELEASE), false);
+				  UTS_RELEASE, strlen(UTS_RELEASE), false,
+				  NULL, 0);
 
 	return rc;
 }
diff --git a/security/integrity/ima/ima_main.c b/security/integrity/ima/ima_main.c
index 673833f94069..465865412100 100644
--- a/security/integrity/ima/ima_main.c
+++ b/security/integrity/ima/ima_main.c
@@ -35,7 +35,7 @@ int ima_appraise = IMA_APPRAISE_ENFORCE;
 int ima_appraise;
 #endif
 
-int ima_hash_algo = HASH_ALGO_SHA1;
+int __ro_after_init ima_hash_algo = HASH_ALGO_SHA1;
 static int hash_setup_done;
 
 static struct notifier_block ima_lsm_policy_notifier = {
@@ -76,6 +76,11 @@ out:
 }
 __setup("ima_hash=", hash_setup);
 
+enum hash_algo ima_get_current_hash_algo(void)
+{
+	return ima_hash_algo;
+}
+
 /* Prevent mmap'ing a file execute that is already mmap'ed write */
 static int mmap_violation_check(enum ima_hooks func, struct file *file,
 				char **pathbuf, const char **pathname,
@@ -210,6 +215,7 @@ static int process_measurement(struct file *file, const struct cred *cred,
 	int xattr_len = 0;
 	bool violation_check;
 	enum hash_algo hash_algo;
+	unsigned int allowed_algos = 0;
 
 	if (!ima_policy_flag || !S_ISREG(inode->i_mode))
 		return 0;
@@ -219,7 +225,8 @@ static int process_measurement(struct file *file, const struct cred *cred,
 	 * Included is the appraise submask.
 	 */
 	action = ima_get_action(file_mnt_user_ns(file), inode, cred, secid,
-				mask, func, &pcr, &template_desc, NULL);
+				mask, func, &pcr, &template_desc, NULL,
+				&allowed_algos);
 	violation_check = ((func == FILE_CHECK || func == MMAP_CHECK) &&
 			   (ima_policy_flag & IMA_MEASURE));
 	if (!action && !violation_check)
@@ -356,6 +363,16 @@ static int process_measurement(struct file *file, const struct cred *cred,
 
 	if ((file->f_flags & O_DIRECT) && (iint->flags & IMA_PERMIT_DIRECTIO))
 		rc = 0;
+
+	/* Ensure the digest was generated using an allowed algorithm */
+	if (rc == 0 && must_appraise && allowed_algos != 0 &&
+	    (allowed_algos & (1U << hash_algo)) == 0) {
+		rc = -EACCES;
+
+		integrity_audit_msg(AUDIT_INTEGRITY_DATA, file_inode(file),
+				    pathname, "collect_data",
+				    "denied-hash-algorithm", rc, 0);
+	}
 out_locked:
 	if ((mask & MAY_WRITE) && test_bit(IMA_DIGSIG, &iint->atomic_flags) &&
 	     !(iint->flags & IMA_NEW_FILE))
@@ -433,7 +450,7 @@ int ima_file_mprotect(struct vm_area_struct *vma, unsigned long prot)
 	inode = file_inode(vma->vm_file);
 	action = ima_get_action(file_mnt_user_ns(vma->vm_file), inode,
 				current_cred(), secid, MAY_EXEC, MMAP_CHECK,
-				&pcr, &template, NULL);
+				&pcr, &template, NULL, NULL);
 
 	/* Is the mmap'ed file in policy? */
 	if (!(action & (IMA_MEASURE | IMA_APPRAISE_SUBMASK)))
@@ -822,7 +839,7 @@ int ima_post_load_data(char *buf, loff_t size,
 	return 0;
 }
 
-/*
+/**
  * process_buffer_measurement - Measure the buffer or the buffer data hash
  * @mnt_userns:	user namespace of the mount the inode was found from
  * @inode: inode associated with the object being measured (NULL for KEY_CHECK)
@@ -833,14 +850,20 @@ int ima_post_load_data(char *buf, loff_t size,
  * @pcr: pcr to extend the measurement
  * @func_data: func specific data, may be NULL
  * @buf_hash: measure buffer data hash
+ * @digest: buffer digest will be written to
+ * @digest_len: buffer length
  *
  * Based on policy, either the buffer data or buffer data hash is measured
+ *
+ * Return: 0 if the buffer has been successfully measured, 1 if the digest
+ * has been written to the passed location but not added to a measurement entry,
+ * a negative value otherwise.
  */
-void process_buffer_measurement(struct user_namespace *mnt_userns,
-				struct inode *inode, const void *buf, int size,
-				const char *eventname, enum ima_hooks func,
-				int pcr, const char *func_data,
-				bool buf_hash)
+int process_buffer_measurement(struct user_namespace *mnt_userns,
+			       struct inode *inode, const void *buf, int size,
+			       const char *eventname, enum ima_hooks func,
+			       int pcr, const char *func_data,
+			       bool buf_hash, u8 *digest, size_t digest_len)
 {
 	int ret = 0;
 	const char *audit_cause = "ENOMEM";
@@ -861,8 +884,11 @@ void process_buffer_measurement(struct user_namespace *mnt_userns,
 	int action = 0;
 	u32 secid;
 
-	if (!ima_policy_flag)
-		return;
+	if (digest && digest_len < digest_hash_len)
+		return -EINVAL;
+
+	if (!ima_policy_flag && !digest)
+		return -ENOENT;
 
 	template = ima_template_desc_buf();
 	if (!template) {
@@ -882,9 +908,9 @@ void process_buffer_measurement(struct user_namespace *mnt_userns,
 		security_task_getsecid_subj(current, &secid);
 		action = ima_get_action(mnt_userns, inode, current_cred(),
 					secid, 0, func, &pcr, &template,
-					func_data);
-		if (!(action & IMA_MEASURE))
-			return;
+					func_data, NULL);
+		if (!(action & IMA_MEASURE) && !digest)
+			return -ENOENT;
 	}
 
 	if (!pcr)
@@ -914,6 +940,12 @@ void process_buffer_measurement(struct user_namespace *mnt_userns,
 		event_data.buf_len = digest_hash_len;
 	}
 
+	if (digest)
+		memcpy(digest, iint.ima_hash->digest, digest_hash_len);
+
+	if (!ima_policy_flag || (func && !(action & IMA_MEASURE)))
+		return 1;
+
 	ret = ima_alloc_init_template(&event_data, &entry, template);
 	if (ret < 0) {
 		audit_cause = "alloc_entry";
@@ -932,7 +964,7 @@ out:
 					func_measure_str(func),
 					audit_cause, ret, 0, ret);
 
-	return;
+	return ret;
 }
 
 /**
@@ -956,7 +988,7 @@ void ima_kexec_cmdline(int kernel_fd, const void *buf, int size)
 
 	process_buffer_measurement(file_mnt_user_ns(f.file), file_inode(f.file),
 				   buf, size, "kexec-cmdline", KEXEC_CMDLINE, 0,
-				   NULL, false);
+				   NULL, false, NULL, 0);
 	fdput(f);
 }
 
@@ -967,23 +999,30 @@ void ima_kexec_cmdline(int kernel_fd, const void *buf, int size)
  * @buf: pointer to buffer data
  * @buf_len: length of buffer data (in bytes)
  * @hash: measure buffer data hash
+ * @digest: buffer digest will be written to
+ * @digest_len: buffer length
  *
  * Measure data critical to the integrity of the kernel into the IMA log
  * and extend the pcr.  Examples of critical data could be various data
  * structures, policies, and states stored in kernel memory that can
  * impact the integrity of the system.
+ *
+ * Return: 0 if the buffer has been successfully measured, 1 if the digest
+ * has been written to the passed location but not added to a measurement entry,
+ * a negative value otherwise.
  */
-void ima_measure_critical_data(const char *event_label,
-			       const char *event_name,
-			       const void *buf, size_t buf_len,
-			       bool hash)
+int ima_measure_critical_data(const char *event_label,
+			      const char *event_name,
+			      const void *buf, size_t buf_len,
+			      bool hash, u8 *digest, size_t digest_len)
 {
 	if (!event_name || !event_label || !buf || !buf_len)
-		return;
+		return -ENOPARAM;
 
-	process_buffer_measurement(&init_user_ns, NULL, buf, buf_len, event_name,
-				   CRITICAL_DATA, 0, event_label,
-				   hash);
+	return process_buffer_measurement(&init_user_ns, NULL, buf, buf_len,
+					  event_name, CRITICAL_DATA, 0,
+					  event_label, hash, digest,
+					  digest_len);
 }
 EXPORT_SYMBOL_GPL(ima_measure_critical_data);
 
@@ -1013,7 +1052,7 @@ static int __init init_ima(void)
 		pr_warn("Couldn't register LSM notifier, error %d\n", error);
 
 	if (!error)
-		ima_update_policy_flag();
+		ima_update_policy_flags();
 
 	return error;
 }
diff --git a/security/integrity/ima/ima_mok.c b/security/integrity/ima/ima_mok.c
index 1e5c01916173..95cc31525c57 100644
--- a/security/integrity/ima/ima_mok.c
+++ b/security/integrity/ima/ima_mok.c
@@ -21,7 +21,7 @@ struct key *ima_blacklist_keyring;
 /*
  * Allocate the IMA blacklist keyring
  */
-__init int ima_mok_init(void)
+static __init int ima_mok_init(void)
 {
 	struct key_restriction *restriction;
 
diff --git a/security/integrity/ima/ima_policy.c b/security/integrity/ima/ima_policy.c
index fd5d46e511f1..87b9b71cb820 100644
--- a/security/integrity/ima/ima_policy.c
+++ b/security/integrity/ima/ima_policy.c
@@ -35,6 +35,7 @@
 #define IMA_FSNAME	0x0200
 #define IMA_KEYRINGS	0x0400
 #define IMA_LABEL	0x0800
+#define IMA_VALIDATE_ALGOS	0x1000
 
 #define UNKNOWN		0
 #define MEASURE		0x0001	/* same as IMA_MEASURE */
@@ -52,6 +53,8 @@ int ima_policy_flag;
 static int temp_ima_appraise;
 static int build_ima_appraise __ro_after_init;
 
+atomic_t ima_setxattr_allowed_hash_algorithms;
+
 #define MAX_LSM_RULES 6
 enum lsm_rule_types { LSM_OBJ_USER, LSM_OBJ_ROLE, LSM_OBJ_TYPE,
 	LSM_SUBJ_USER, LSM_SUBJ_ROLE, LSM_SUBJ_TYPE
@@ -79,6 +82,7 @@ struct ima_rule_entry {
 	bool (*uid_op)(kuid_t, kuid_t);    /* Handlers for operators       */
 	bool (*fowner_op)(kuid_t, kuid_t); /* uid_eq(), uid_gt(), uid_lt() */
 	int pcr;
+	unsigned int allowed_algos; /* bitfield of allowed hash algorithms */
 	struct {
 		void *rule;	/* LSM file metadata specific */
 		char *args_p;	/* audit value */
@@ -91,6 +95,14 @@ struct ima_rule_entry {
 };
 
 /*
+ * sanity check in case the kernels gains more hash algorithms that can
+ * fit in an unsigned int
+ */
+static_assert(
+	8 * sizeof(unsigned int) >= HASH_ALGO__LAST,
+	"The bitfield allowed_algos in ima_rule_entry is too small to contain all the supported hash algorithms, consider using a bigger type");
+
+/*
  * Without LSM specific knowledge, the default policy can only be
  * written in terms of .action, .func, .mask, .fsmagic, .uid, and .fowner
  */
@@ -646,6 +658,7 @@ static int get_subaction(struct ima_rule_entry *rule, enum ima_hooks func)
  * @pcr: set the pcr to extend
  * @template_desc: the template that should be used for this rule
  * @func_data: func specific data, may be NULL
+ * @allowed_algos: allowlist of hash algorithms for the IMA xattr
  *
  * Measure decision based on func/mask/fsmagic and LSM(subj/obj/type)
  * conditions.
@@ -658,7 +671,7 @@ int ima_match_policy(struct user_namespace *mnt_userns, struct inode *inode,
 		     const struct cred *cred, u32 secid, enum ima_hooks func,
 		     int mask, int flags, int *pcr,
 		     struct ima_template_desc **template_desc,
-		     const char *func_data)
+		     const char *func_data, unsigned int *allowed_algos)
 {
 	struct ima_rule_entry *entry;
 	int action = 0, actmask = flags | (flags << 1);
@@ -684,8 +697,11 @@ int ima_match_policy(struct user_namespace *mnt_userns, struct inode *inode,
 			action &= ~IMA_HASH;
 			if (ima_fail_unverifiable_sigs)
 				action |= IMA_FAIL_UNVERIFIABLE_SIGS;
-		}
 
+			if (allowed_algos &&
+			    entry->flags & IMA_VALIDATE_ALGOS)
+				*allowed_algos = entry->allowed_algos;
+		}
 
 		if (entry->action & IMA_DO_MASK)
 			actmask &= ~(entry->action | entry->action << 1);
@@ -706,24 +722,57 @@ int ima_match_policy(struct user_namespace *mnt_userns, struct inode *inode,
 	return action;
 }
 
-/*
- * Initialize the ima_policy_flag variable based on the currently
- * loaded policy.  Based on this flag, the decision to short circuit
- * out of a function or not call the function in the first place
- * can be made earlier.
+/**
+ * ima_update_policy_flags() - Update global IMA variables
+ *
+ * Update ima_policy_flag and ima_setxattr_allowed_hash_algorithms
+ * based on the currently loaded policy.
+ *
+ * With ima_policy_flag, the decision to short circuit out of a function
+ * or not call the function in the first place can be made earlier.
+ *
+ * With ima_setxattr_allowed_hash_algorithms, the policy can restrict the
+ * set of hash algorithms accepted when updating the security.ima xattr of
+ * a file.
+ *
+ * Context: called after a policy update and at system initialization.
  */
-void ima_update_policy_flag(void)
+void ima_update_policy_flags(void)
 {
 	struct ima_rule_entry *entry;
+	int new_policy_flag = 0;
 
+	rcu_read_lock();
 	list_for_each_entry(entry, ima_rules, list) {
+		/*
+		 * SETXATTR_CHECK rules do not implement a full policy check
+		 * because rule checking would probably have an important
+		 * performance impact on setxattr(). As a consequence, only one
+		 * SETXATTR_CHECK can be active at a given time.
+		 * Because we want to preserve that property, we set out to use
+		 * atomic_cmpxchg. Either:
+		 * - the atomic was non-zero: a setxattr hash policy is
+		 *   already enforced, we do nothing
+		 * - the atomic was zero: no setxattr policy was set, enable
+		 *   the setxattr hash policy
+		 */
+		if (entry->func == SETXATTR_CHECK) {
+			atomic_cmpxchg(&ima_setxattr_allowed_hash_algorithms,
+				       0, entry->allowed_algos);
+			/* SETXATTR_CHECK doesn't impact ima_policy_flag */
+			continue;
+		}
+
 		if (entry->action & IMA_DO_MASK)
-			ima_policy_flag |= entry->action;
+			new_policy_flag |= entry->action;
 	}
+	rcu_read_unlock();
 
 	ima_appraise |= (build_ima_appraise | temp_ima_appraise);
 	if (!ima_appraise)
-		ima_policy_flag &= ~IMA_APPRAISE;
+		new_policy_flag &= ~IMA_APPRAISE;
+
+	ima_policy_flag = new_policy_flag;
 }
 
 static int ima_appraise_flag(enum ima_hooks func)
@@ -889,7 +938,9 @@ void __init ima_init_policy(void)
 			  ARRAY_SIZE(critical_data_rules),
 			  IMA_DEFAULT_POLICY);
 
-	ima_update_policy_flag();
+	atomic_set(&ima_setxattr_allowed_hash_algorithms, 0);
+
+	ima_update_policy_flags();
 }
 
 /* Make sure we have a valid policy, at least containing some rules. */
@@ -929,7 +980,7 @@ void ima_update_policy(void)
 		 */
 		kfree(arch_policy_entry);
 	}
-	ima_update_policy_flag();
+	ima_update_policy_flags();
 
 	/* Custom IMA policy has been loaded */
 	ima_process_queued_keys();
@@ -946,7 +997,7 @@ enum {
 	Opt_fsuuid, Opt_uid_eq, Opt_euid_eq, Opt_fowner_eq,
 	Opt_uid_gt, Opt_euid_gt, Opt_fowner_gt,
 	Opt_uid_lt, Opt_euid_lt, Opt_fowner_lt,
-	Opt_appraise_type, Opt_appraise_flag,
+	Opt_appraise_type, Opt_appraise_flag, Opt_appraise_algos,
 	Opt_permit_directio, Opt_pcr, Opt_template, Opt_keyrings,
 	Opt_label, Opt_err
 };
@@ -981,6 +1032,7 @@ static const match_table_t policy_tokens = {
 	{Opt_fowner_lt, "fowner<%s"},
 	{Opt_appraise_type, "appraise_type=%s"},
 	{Opt_appraise_flag, "appraise_flag=%s"},
+	{Opt_appraise_algos, "appraise_algos=%s"},
 	{Opt_permit_directio, "permit_directio"},
 	{Opt_pcr, "pcr=%s"},
 	{Opt_template, "template=%s"},
@@ -1081,7 +1133,8 @@ static bool ima_validate_rule(struct ima_rule_entry *entry)
 		return false;
 
 	if (entry->action != APPRAISE &&
-	    entry->flags & (IMA_DIGSIG_REQUIRED | IMA_MODSIG_ALLOWED | IMA_CHECK_BLACKLIST))
+	    entry->flags & (IMA_DIGSIG_REQUIRED | IMA_MODSIG_ALLOWED |
+			    IMA_CHECK_BLACKLIST | IMA_VALIDATE_ALGOS))
 		return false;
 
 	/*
@@ -1111,7 +1164,7 @@ static bool ima_validate_rule(struct ima_rule_entry *entry)
 				     IMA_UID | IMA_FOWNER | IMA_FSUUID |
 				     IMA_INMASK | IMA_EUID | IMA_PCR |
 				     IMA_FSNAME | IMA_DIGSIG_REQUIRED |
-				     IMA_PERMIT_DIRECTIO))
+				     IMA_PERMIT_DIRECTIO | IMA_VALIDATE_ALGOS))
 			return false;
 
 		break;
@@ -1123,7 +1176,7 @@ static bool ima_validate_rule(struct ima_rule_entry *entry)
 				     IMA_INMASK | IMA_EUID | IMA_PCR |
 				     IMA_FSNAME | IMA_DIGSIG_REQUIRED |
 				     IMA_PERMIT_DIRECTIO | IMA_MODSIG_ALLOWED |
-				     IMA_CHECK_BLACKLIST))
+				     IMA_CHECK_BLACKLIST | IMA_VALIDATE_ALGOS))
 			return false;
 
 		break;
@@ -1161,6 +1214,23 @@ static bool ima_validate_rule(struct ima_rule_entry *entry)
 			return false;
 
 		break;
+	case SETXATTR_CHECK:
+		/* any action other than APPRAISE is unsupported */
+		if (entry->action != APPRAISE)
+			return false;
+
+		/* SETXATTR_CHECK requires an appraise_algos parameter */
+		if (!(entry->flags & IMA_VALIDATE_ALGOS))
+			return false;
+
+		/*
+		 * full policies are not supported, they would have too
+		 * much of a performance impact
+		 */
+		if (entry->flags & ~(IMA_FUNC | IMA_VALIDATE_ALGOS))
+			return false;
+
+		break;
 	default:
 		return false;
 	}
@@ -1173,6 +1243,34 @@ static bool ima_validate_rule(struct ima_rule_entry *entry)
 	return true;
 }
 
+static unsigned int ima_parse_appraise_algos(char *arg)
+{
+	unsigned int res = 0;
+	int idx;
+	char *token;
+
+	while ((token = strsep(&arg, ",")) != NULL) {
+		idx = match_string(hash_algo_name, HASH_ALGO__LAST, token);
+
+		if (idx < 0) {
+			pr_err("unknown hash algorithm \"%s\"",
+			       token);
+			return 0;
+		}
+
+		if (!crypto_has_alg(hash_algo_name[idx], 0, 0)) {
+			pr_err("unavailable hash algorithm \"%s\", check your kernel configuration",
+			       token);
+			return 0;
+		}
+
+		/* Add the hash algorithm to the 'allowed' bitfield */
+		res |= (1U << idx);
+	}
+
+	return res;
+}
+
 static int ima_parse_rule(char *rule, struct ima_rule_entry *entry)
 {
 	struct audit_buffer *ab;
@@ -1294,6 +1392,8 @@ static int ima_parse_rule(char *rule, struct ima_rule_entry *entry)
 				entry->func = KEY_CHECK;
 			else if (strcmp(args[0].from, "CRITICAL_DATA") == 0)
 				entry->func = CRITICAL_DATA;
+			else if (strcmp(args[0].from, "SETXATTR_CHECK") == 0)
+				entry->func = SETXATTR_CHECK;
 			else
 				result = -EINVAL;
 			if (!result)
@@ -1508,6 +1608,25 @@ static int ima_parse_rule(char *rule, struct ima_rule_entry *entry)
 			else
 				result = -EINVAL;
 			break;
+		case Opt_appraise_algos:
+			ima_log_string(ab, "appraise_algos", args[0].from);
+
+			if (entry->allowed_algos) {
+				result = -EINVAL;
+				break;
+			}
+
+			entry->allowed_algos =
+				ima_parse_appraise_algos(args[0].from);
+			/* invalid or empty list of algorithms */
+			if (!entry->allowed_algos) {
+				result = -EINVAL;
+				break;
+			}
+
+			entry->flags |= IMA_VALIDATE_ALGOS;
+
+			break;
 		case Opt_permit_directio:
 			entry->flags |= IMA_PERMIT_DIRECTIO;
 			break;
@@ -1700,6 +1819,23 @@ static void ima_show_rule_opt_list(struct seq_file *m,
 		seq_printf(m, "%s%s", i ? "|" : "", opt_list->items[i]);
 }
 
+static void ima_policy_show_appraise_algos(struct seq_file *m,
+					   unsigned int allowed_hashes)
+{
+	int idx, list_size = 0;
+
+	for (idx = 0; idx < HASH_ALGO__LAST; idx++) {
+		if (!(allowed_hashes & (1U << idx)))
+			continue;
+
+		/* only add commas if the list contains multiple entries */
+		if (list_size++)
+			seq_puts(m, ",");
+
+		seq_puts(m, hash_algo_name[idx]);
+	}
+}
+
 int ima_policy_show(struct seq_file *m, void *v)
 {
 	struct ima_rule_entry *entry = v;
@@ -1811,6 +1947,12 @@ int ima_policy_show(struct seq_file *m, void *v)
 		seq_puts(m, " ");
 	}
 
+	if (entry->flags & IMA_VALIDATE_ALGOS) {
+		seq_puts(m, "appraise_algos=");
+		ima_policy_show_appraise_algos(m, entry->allowed_algos);
+		seq_puts(m, " ");
+	}
+
 	for (i = 0; i < MAX_LSM_RULES; i++) {
 		if (entry->lsm[i].rule) {
 			switch (i) {
diff --git a/security/integrity/ima/ima_queue_keys.c b/security/integrity/ima/ima_queue_keys.c
index 979ef6c71f3d..93056c03bf5a 100644
--- a/security/integrity/ima/ima_queue_keys.c
+++ b/security/integrity/ima/ima_queue_keys.c
@@ -165,7 +165,7 @@ void ima_process_queued_keys(void)
 						   entry->keyring_name,
 						   KEY_CHECK, 0,
 						   entry->keyring_name,
-						   false);
+						   false, NULL, 0);
 		list_del(&entry->list);
 		ima_free_key_entry(entry);
 	}
diff --git a/security/selinux/ima.c b/security/selinux/ima.c
index 34d421861bfc..727c4e43219d 100644
--- a/security/selinux/ima.c
+++ b/security/selinux/ima.c
@@ -86,7 +86,8 @@ void selinux_ima_measure_state_locked(struct selinux_state *state)
 	}
 
 	ima_measure_critical_data("selinux", "selinux-state",
-				  state_str, strlen(state_str), false);
+				  state_str, strlen(state_str), false,
+				  NULL, 0);
 
 	kfree(state_str);
 
@@ -103,7 +104,8 @@ void selinux_ima_measure_state_locked(struct selinux_state *state)
 	}
 
 	ima_measure_critical_data("selinux", "selinux-policy-hash",
-				  policy, policy_len, true);
+				  policy, policy_len, true,
+				  NULL, 0);
 
 	vfree(policy);
 }
diff --git a/security/tomoyo/domain.c b/security/tomoyo/domain.c
index 98d985895ec8..31af29f669d2 100644
--- a/security/tomoyo/domain.c
+++ b/security/tomoyo/domain.c
@@ -897,6 +897,9 @@ bool tomoyo_dump_page(struct linux_binprm *bprm, unsigned long pos,
 		      struct tomoyo_page_dump *dump)
 {
 	struct page *page;
+#ifdef CONFIG_MMU
+	int ret;
+#endif
 
 	/* dump->data is released by tomoyo_find_next_domain(). */
 	if (!dump->data) {
@@ -909,11 +912,13 @@ bool tomoyo_dump_page(struct linux_binprm *bprm, unsigned long pos,
 	/*
 	 * This is called at execve() time in order to dig around
 	 * in the argv/environment of the new proceess
-	 * (represented by bprm).  'current' is the process doing
-	 * the execve().
+	 * (represented by bprm).
 	 */
-	if (get_user_pages_remote(bprm->mm, pos, 1,
-				FOLL_FORCE, &page, NULL, NULL) <= 0)
+	mmap_read_lock(bprm->mm);
+	ret = get_user_pages_remote(bprm->mm, pos, 1,
+				    FOLL_FORCE, &page, NULL, NULL);
+	mmap_read_unlock(bprm->mm);
+	if (ret <= 0)
 		return false;
 #else
 	page = bprm->page[pos / PAGE_SIZE];
diff --git a/sound/aoa/codecs/onyx.h b/sound/aoa/codecs/onyx.h
index 8a32c3c3d716..6c31b7373b78 100644
--- a/sound/aoa/codecs/onyx.h
+++ b/sound/aoa/codecs/onyx.h
@@ -6,7 +6,6 @@
  */
 #ifndef __SND_AOA_CODEC_ONYX_H
 #define __SND_AOA_CODEC_ONYX_H
-#include <stddef.h>
 #include <linux/i2c.h>
 #include <asm/pmac_low_i2c.h>
 #include <asm/prom.h>
diff --git a/sound/aoa/codecs/tas.c b/sound/aoa/codecs/tas.c
index ac246dd3ab49..ab19a37e2a68 100644
--- a/sound/aoa/codecs/tas.c
+++ b/sound/aoa/codecs/tas.c
@@ -58,7 +58,6 @@
  *    and up to the hardware designer to not wire
  *    them up in some weird unusable way.
  */
-#include <stddef.h>
 #include <linux/i2c.h>
 #include <asm/pmac_low_i2c.h>
 #include <asm/prom.h>
diff --git a/sound/core/info.c b/sound/core/info.c
index 9fec3070f8ba..a451b24199c3 100644
--- a/sound/core/info.c
+++ b/sound/core/info.c
@@ -16,7 +16,6 @@
 #include <linux/utsname.h>
 #include <linux/proc_fs.h>
 #include <linux/mutex.h>
-#include <stdarg.h>
 
 int snd_info_check_reserved_words(const char *str)
 {
diff --git a/sound/isa/gus/gus_main.c b/sound/isa/gus/gus_main.c
index ae1e2542ee4a..3b46490271fe 100644
--- a/sound/isa/gus/gus_main.c
+++ b/sound/isa/gus/gus_main.c
@@ -87,10 +87,24 @@ static void snd_gus_init_control(struct snd_gus_card *gus)
 
 static int snd_gus_free(struct snd_gus_card *gus)
 {
-	if (gus->gf1.res_port2) {
-		snd_gf1_stop(gus);
-		snd_gus_init_dma_irq(gus, 0);
+	if (gus->gf1.res_port2 == NULL)
+		goto __hw_end;
+	snd_gf1_stop(gus);
+	snd_gus_init_dma_irq(gus, 0);
+      __hw_end:
+	release_and_free_resource(gus->gf1.res_port1);
+	release_and_free_resource(gus->gf1.res_port2);
+	if (gus->gf1.irq >= 0)
+		free_irq(gus->gf1.irq, (void *) gus);
+	if (gus->gf1.dma1 >= 0) {
+		disable_dma(gus->gf1.dma1);
+		free_dma(gus->gf1.dma1);
 	}
+	if (!gus->equal_dma && gus->gf1.dma2 >= 0) {
+		disable_dma(gus->gf1.dma2);
+		free_dma(gus->gf1.dma2);
+	}
+	kfree(gus);
 	return 0;
 }
 
@@ -116,7 +130,7 @@ int snd_gus_create(struct snd_card *card,
 	};
 
 	*rgus = NULL;
-	gus = devm_kzalloc(card->dev, sizeof(*gus), GFP_KERNEL);
+	gus = kzalloc(sizeof(*gus), GFP_KERNEL);
 	if (gus == NULL)
 		return -ENOMEM;
 	spin_lock_init(&gus->reg_lock);
@@ -142,33 +156,35 @@ int snd_gus_create(struct snd_card *card,
 	gus->gf1.reg_timerctrl = GUSP(gus, TIMERCNTRL);
 	gus->gf1.reg_timerdata = GUSP(gus, TIMERDATA);
 	/* allocate resources */
-	gus->gf1.res_port1 = devm_request_region(card->dev, port, 16,
-						 "GUS GF1 (Adlib/SB)");
+	gus->gf1.res_port1 = request_region(port, 16, "GUS GF1 (Adlib/SB)");
 	if (!gus->gf1.res_port1) {
 		snd_printk(KERN_ERR "gus: can't grab SB port 0x%lx\n", port);
+		snd_gus_free(gus);
 		return -EBUSY;
 	}
-	gus->gf1.res_port2 = devm_request_region(card->dev, port + 0x100, 12,
-						 "GUS GF1 (Synth)");
+	gus->gf1.res_port2 = request_region(port + 0x100, 12, "GUS GF1 (Synth)");
 	if (!gus->gf1.res_port2) {
 		snd_printk(KERN_ERR "gus: can't grab synth port 0x%lx\n", port + 0x100);
+		snd_gus_free(gus);
 		return -EBUSY;
 	}
-	if (irq >= 0 && devm_request_irq(card->dev, irq, snd_gus_interrupt, 0,
-					 "GUS GF1", (void *) gus)) {
+	if (irq >= 0 && request_irq(irq, snd_gus_interrupt, 0, "GUS GF1", (void *) gus)) {
 		snd_printk(KERN_ERR "gus: can't grab irq %d\n", irq);
+		snd_gus_free(gus);
 		return -EBUSY;
 	}
 	gus->gf1.irq = irq;
 	card->sync_irq = irq;
-	if (snd_devm_request_dma(card->dev, dma1, "GUS - 1")) {
+	if (request_dma(dma1, "GUS - 1")) {
 		snd_printk(KERN_ERR "gus: can't grab DMA1 %d\n", dma1);
+		snd_gus_free(gus);
 		return -EBUSY;
 	}
 	gus->gf1.dma1 = dma1;
 	if (dma2 >= 0 && dma1 != dma2) {
-		if (snd_devm_request_dma(card->dev, dma2, "GUS - 2")) {
+		if (request_dma(dma2, "GUS - 2")) {
 			snd_printk(KERN_ERR "gus: can't grab DMA2 %d\n", dma2);
+			snd_gus_free(gus);
 			return -EBUSY;
 		}
 		gus->gf1.dma2 = dma2;
@@ -193,8 +209,10 @@ int snd_gus_create(struct snd_card *card,
 	gus->gf1.volume_ramp = 25;
 	gus->gf1.smooth_pan = 1;
 	err = snd_device_new(card, SNDRV_DEV_LOWLEVEL, gus, &ops);
-	if (err < 0)
+	if (err < 0) {
+		snd_gus_free(gus);
 		return err;
+	}
 	*rgus = gus;
 	return 0;
 }
diff --git a/sound/isa/gus/interwave.c b/sound/isa/gus/interwave.c
index 20f490e9d563..a04a9d3253f8 100644
--- a/sound/isa/gus/interwave.c
+++ b/sound/isa/gus/interwave.c
@@ -618,12 +618,19 @@ static int snd_interwave_card_new(struct device *pdev, int dev,
 	return 0;
 }
 
-static int snd_interwave_probe(struct snd_card *card, int dev)
+static int snd_interwave_probe_gus(struct snd_card *card, int dev,
+				   struct snd_gus_card **gusp)
+{
+	return snd_gus_create(card, port[dev], -irq[dev], dma1[dev], dma2[dev],
+			      0, 32, pcm_channels[dev], effect[dev], gusp);
+}
+
+static int snd_interwave_probe(struct snd_card *card, int dev,
+			       struct snd_gus_card *gus)
 {
 	int xirq, xdma1, xdma2;
 	struct snd_interwave *iwcard = card->private_data;
 	struct snd_wss *wss;
-	struct snd_gus_card *gus;
 #ifdef SNDRV_STB
 	struct snd_i2c_bus *i2c_bus;
 #endif
@@ -634,14 +641,6 @@ static int snd_interwave_probe(struct snd_card *card, int dev)
 	xdma1 = dma1[dev];
 	xdma2 = dma2[dev];
 
-	err = snd_gus_create(card,
-			     port[dev],
-			     -xirq, xdma1, xdma2,
-			     0, 32,
-			     pcm_channels[dev], effect[dev], &gus);
-	if (err < 0)
-		return err;
-
 	err = snd_interwave_detect(iwcard, gus, dev
 #ifdef SNDRV_STB
 				   , &i2c_bus
@@ -757,22 +756,6 @@ static int snd_interwave_probe(struct snd_card *card, int dev)
 	return 0;
 }
 
-static int snd_interwave_isa_probe1(int dev, struct device *devptr)
-{
-	struct snd_card *card;
-	int err;
-
-	err = snd_interwave_card_new(devptr, dev, &card);
-	if (err < 0)
-		return err;
-
-	err = snd_interwave_probe(card, dev);
-	if (err < 0)
-		return err;
-	dev_set_drvdata(devptr, card);
-	return 0;
-}
-
 static int snd_interwave_isa_match(struct device *pdev,
 				   unsigned int dev)
 {
@@ -788,6 +771,8 @@ static int snd_interwave_isa_match(struct device *pdev,
 static int snd_interwave_isa_probe(struct device *pdev,
 				   unsigned int dev)
 {
+	struct snd_card *card;
+	struct snd_gus_card *gus;
 	int err;
 	static const int possible_irqs[] = {5, 11, 12, 9, 7, 15, 3, -1};
 	static const int possible_dmas[] = {0, 1, 3, 5, 6, 7, -1};
@@ -814,19 +799,31 @@ static int snd_interwave_isa_probe(struct device *pdev,
 		}
 	}
 
+	err = snd_interwave_card_new(pdev, dev, &card);
+	if (err < 0)
+		return err;
+
 	if (port[dev] != SNDRV_AUTO_PORT)
-		return snd_interwave_isa_probe1(dev, pdev);
+		err = snd_interwave_probe_gus(card, dev, &gus);
 	else {
 		static const long possible_ports[] = {0x210, 0x220, 0x230, 0x240, 0x250, 0x260};
 		int i;
 		for (i = 0; i < ARRAY_SIZE(possible_ports); i++) {
 			port[dev] = possible_ports[i];
-			err = snd_interwave_isa_probe1(dev, pdev);
+			err = snd_interwave_probe_gus(card, dev, &gus);
 			if (! err)
 				return 0;
 		}
-		return err;
 	}
+	if (err < 0)
+		return err;
+
+	err = snd_interwave_probe(card, dev, gus);
+	if (err < 0)
+		return err;
+
+	dev_set_drvdata(pdev, card);
+	return 0;
 }
 
 static struct isa_driver snd_interwave_driver = {
@@ -844,6 +841,7 @@ static int snd_interwave_pnp_detect(struct pnp_card_link *pcard,
 {
 	static int dev;
 	struct snd_card *card;
+	struct snd_gus_card *gus;
 	int res;
 
 	for ( ; dev < SNDRV_CARDS; dev++) {
@@ -860,7 +858,10 @@ static int snd_interwave_pnp_detect(struct pnp_card_link *pcard,
 	res = snd_interwave_pnp(dev, card->private_data, pcard, pid);
 	if (res < 0)
 		return res;
-	res = snd_interwave_probe(card, dev);
+	res = snd_interwave_probe_gus(card, dev, &gus);
+	if (res < 0)
+		return res;
+	res = snd_interwave_probe(card, dev, gus);
 	if (res < 0)
 		return res;
 	pnp_set_card_drvdata(pcard, card);
diff --git a/sound/parisc/harmony.c b/sound/parisc/harmony.c
index ec4fda324760..db9c296dd688 100644
--- a/sound/parisc/harmony.c
+++ b/sound/parisc/harmony.c
@@ -954,11 +954,10 @@ free_and_ret:
 	return err;
 }
 
-static int __exit
+static void __exit
 snd_harmony_remove(struct parisc_device *padev)
 {
 	snd_card_free(parisc_get_drvdata(padev));
-	return 0;
 }
 
 static struct parisc_driver snd_harmony_driver __refdata = {
diff --git a/sound/pci/vx222/vx222.c b/sound/pci/vx222/vx222.c
index f48cc20b9e8a..468a6a20dc1e 100644
--- a/sound/pci/vx222/vx222.c
+++ b/sound/pci/vx222/vx222.c
@@ -137,6 +137,7 @@ static int snd_vx222_create(struct snd_card *card, struct pci_dev *pci,
 	}
 	chip->irq = pci->irq;
 	card->sync_irq = chip->irq;
+	*rchip = vx;
 
 	return 0;
 }
diff --git a/sound/soc/codecs/rt5682.c b/sound/soc/codecs/rt5682.c
index e822fa1b9d4b..4a64cab99c55 100644
--- a/sound/soc/codecs/rt5682.c
+++ b/sound/soc/codecs/rt5682.c
@@ -2942,9 +2942,6 @@ static int rt5682_suspend(struct snd_soc_component *component)
 			break;
 		}
 
-		snd_soc_component_update_bits(component, RT5682_PWR_ANLG_3,
-			RT5682_PWR_CBJ, 0);
-
 		/* enter SAR ADC power saving mode */
 		snd_soc_component_update_bits(component, RT5682_SAR_IL_CMD_1,
 			RT5682_SAR_BUTT_DET_MASK | RT5682_SAR_BUTDET_MODE_MASK |
diff --git a/sound/soc/generic/audio-graph-card.c b/sound/soc/generic/audio-graph-card.c
index 5e71382467e8..546f6fd0609e 100644
--- a/sound/soc/generic/audio-graph-card.c
+++ b/sound/soc/generic/audio-graph-card.c
@@ -285,6 +285,7 @@ static int graph_dai_link_of_dpcm(struct asoc_simple_priv *priv,
 	if (li->cpu) {
 		struct snd_soc_card *card = simple_priv_to_card(priv);
 		struct snd_soc_dai_link_component *cpus = asoc_link_to_cpu(dai_link, 0);
+		struct snd_soc_dai_link_component *platforms = asoc_link_to_platform(dai_link, 0);
 		int is_single_links = 0;
 
 		/* Codec is dummy */
@@ -313,6 +314,7 @@ static int graph_dai_link_of_dpcm(struct asoc_simple_priv *priv,
 			dai_link->no_pcm = 1;
 
 		asoc_simple_canonicalize_cpu(cpus, is_single_links);
+		asoc_simple_canonicalize_platform(platforms, cpus);
 	} else {
 		struct snd_soc_codec_conf *cconf = simple_props_to_codec_conf(dai_props, 0);
 		struct snd_soc_dai_link_component *codecs = asoc_link_to_codec(dai_link, 0);
@@ -366,6 +368,7 @@ static int graph_dai_link_of(struct asoc_simple_priv *priv,
 	struct snd_soc_dai_link *dai_link = simple_priv_to_link(priv, li->link);
 	struct snd_soc_dai_link_component *cpus = asoc_link_to_cpu(dai_link, 0);
 	struct snd_soc_dai_link_component *codecs = asoc_link_to_codec(dai_link, 0);
+	struct snd_soc_dai_link_component *platforms = asoc_link_to_platform(dai_link, 0);
 	char dai_name[64];
 	int ret, is_single_links = 0;
 
@@ -383,6 +386,7 @@ static int graph_dai_link_of(struct asoc_simple_priv *priv,
 		 "%s-%s", cpus->dai_name, codecs->dai_name);
 
 	asoc_simple_canonicalize_cpu(cpus, is_single_links);
+	asoc_simple_canonicalize_platform(platforms, cpus);
 
 	ret = graph_link_init(priv, cpu_ep, codec_ep, li, dai_name);
 	if (ret < 0)
@@ -608,6 +612,7 @@ static int graph_count_noml(struct asoc_simple_priv *priv,
 
 	li->num[li->link].cpus		= 1;
 	li->num[li->link].codecs	= 1;
+	li->num[li->link].platforms     = 1;
 
 	li->link += 1; /* 1xCPU-Codec */
 
@@ -630,6 +635,7 @@ static int graph_count_dpcm(struct asoc_simple_priv *priv,
 
 	if (li->cpu) {
 		li->num[li->link].cpus		= 1;
+		li->num[li->link].platforms     = 1;
 
 		li->link++; /* 1xCPU-dummy */
 	} else {
diff --git a/sound/soc/intel/boards/Kconfig b/sound/soc/intel/boards/Kconfig
index 046955bf717c..61b71d6c44cf 100644
--- a/sound/soc/intel/boards/Kconfig
+++ b/sound/soc/intel/boards/Kconfig
@@ -602,7 +602,7 @@ config SND_SOC_INTEL_SOUNDWIRE_SOF_MACH
 	select SND_SOC_DMIC
 	select SND_SOC_INTEL_HDA_DSP_COMMON
 	select SND_SOC_INTEL_SOF_MAXIM_COMMON
-	select SND_SOC_SDW_MOCKUP
+	imply SND_SOC_SDW_MOCKUP
 	help
 	  Add support for Intel SoundWire-based platforms connected to
 	  MAX98373, RT700, RT711, RT1308 and RT715
diff --git a/sound/soc/mediatek/Kconfig b/sound/soc/mediatek/Kconfig
index cf567a89f421..5a2f4667d50b 100644
--- a/sound/soc/mediatek/Kconfig
+++ b/sound/soc/mediatek/Kconfig
@@ -187,6 +187,7 @@ config SND_SOC_MT8192_MT6359_RT1015_RT5682
 
 config SND_SOC_MT8195
 	tristate "ASoC support for Mediatek MT8195 chip"
+	depends on ARCH_MEDIATEK || COMPILE_TEST
 	select SND_SOC_MEDIATEK
 	help
 	  This adds ASoC platform driver support for Mediatek MT8195 chip
@@ -197,7 +198,7 @@ config SND_SOC_MT8195
 config SND_SOC_MT8195_MT6359_RT1019_RT5682
 	tristate "ASoC Audio driver for MT8195 with MT6359 RT1019 RT5682 codec"
 	depends on I2C
-	depends on SND_SOC_MT8195
+	depends on SND_SOC_MT8195 && MTK_PMIC_WRAP
 	select SND_SOC_MT6359
 	select SND_SOC_RT1015P
 	select SND_SOC_RT5682_I2C
diff --git a/sound/soc/mediatek/mt8195/mt8195-mt6359-rt1019-rt5682.c b/sound/soc/mediatek/mt8195/mt8195-mt6359-rt1019-rt5682.c
index 5dc217f59bd6..c97ace7387b4 100644
--- a/sound/soc/mediatek/mt8195/mt8195-mt6359-rt1019-rt5682.c
+++ b/sound/soc/mediatek/mt8195/mt8195-mt6359-rt1019-rt5682.c
@@ -1018,13 +1018,12 @@ static int mt8195_mt6359_rt1019_rt5682_dev_probe(struct platform_device *pdev)
 				of_parse_phandle(pdev->dev.of_node,
 						 "mediatek,dptx-codec", 0);
 			if (!dai_link->codecs->of_node) {
-				dev_err(&pdev->dev, "Property 'dptx-codec' missing or invalid\n");
-				return -EINVAL;
+				dev_dbg(&pdev->dev, "No property 'dptx-codec'\n");
+			} else {
+				dai_link->codecs->name = NULL;
+				dai_link->codecs->dai_name = "i2s-hifi";
+				dai_link->init = mt8195_dptx_codec_init;
 			}
-
-			dai_link->codecs->name = NULL;
-			dai_link->codecs->dai_name = "i2s-hifi";
-			dai_link->init = mt8195_dptx_codec_init;
 		}
 
 		if (strcmp(dai_link->name, "ETDM3_OUT_BE") == 0) {
@@ -1032,13 +1031,12 @@ static int mt8195_mt6359_rt1019_rt5682_dev_probe(struct platform_device *pdev)
 				of_parse_phandle(pdev->dev.of_node,
 						 "mediatek,hdmi-codec", 0);
 			if (!dai_link->codecs->of_node) {
-				dev_err(&pdev->dev, "Property 'hdmi-codec' missing or invalid\n");
-				return -EINVAL;
+				dev_dbg(&pdev->dev, "No property 'hdmi-codec'\n");
+			} else {
+				dai_link->codecs->name = NULL;
+				dai_link->codecs->dai_name = "i2s-hifi";
+				dai_link->init = mt8195_hdmi_codec_init;
 			}
-
-			dai_link->codecs->name = NULL;
-			dai_link->codecs->dai_name = "i2s-hifi";
-			dai_link->init = mt8195_hdmi_codec_init;
 		}
 	}
 
diff --git a/sound/soc/rockchip/rockchip_i2s.c b/sound/soc/rockchip/rockchip_i2s.c
index 53e0b4a1c7d2..7e89f5b0c237 100644
--- a/sound/soc/rockchip/rockchip_i2s.c
+++ b/sound/soc/rockchip/rockchip_i2s.c
@@ -15,6 +15,7 @@
 #include <linux/clk.h>
 #include <linux/pm_runtime.h>
 #include <linux/regmap.h>
+#include <linux/spinlock.h>
 #include <sound/pcm_params.h>
 #include <sound/dmaengine_pcm.h>
 
@@ -53,6 +54,7 @@ struct rk_i2s_dev {
 	bool is_master_mode;
 	const struct rk_i2s_pins *pins;
 	unsigned int bclk_ratio;
+	spinlock_t lock; /* tx/rx lock */
 };
 
 static int i2s_runtime_suspend(struct device *dev)
@@ -96,6 +98,7 @@ static void rockchip_snd_txctrl(struct rk_i2s_dev *i2s, int on)
 	unsigned int val = 0;
 	int retry = 10;
 
+	spin_lock(&i2s->lock);
 	if (on) {
 		regmap_update_bits(i2s->regmap, I2S_DMACR,
 				   I2S_DMACR_TDE_ENABLE, I2S_DMACR_TDE_ENABLE);
@@ -136,6 +139,7 @@ static void rockchip_snd_txctrl(struct rk_i2s_dev *i2s, int on)
 			}
 		}
 	}
+	spin_unlock(&i2s->lock);
 }
 
 static void rockchip_snd_rxctrl(struct rk_i2s_dev *i2s, int on)
@@ -143,6 +147,7 @@ static void rockchip_snd_rxctrl(struct rk_i2s_dev *i2s, int on)
 	unsigned int val = 0;
 	int retry = 10;
 
+	spin_lock(&i2s->lock);
 	if (on) {
 		regmap_update_bits(i2s->regmap, I2S_DMACR,
 				   I2S_DMACR_RDE_ENABLE, I2S_DMACR_RDE_ENABLE);
@@ -183,6 +188,7 @@ static void rockchip_snd_rxctrl(struct rk_i2s_dev *i2s, int on)
 			}
 		}
 	}
+	spin_unlock(&i2s->lock);
 }
 
 static int rockchip_i2s_set_fmt(struct snd_soc_dai *cpu_dai,
@@ -684,6 +690,7 @@ static int rockchip_i2s_probe(struct platform_device *pdev)
 	if (!i2s)
 		return -ENOMEM;
 
+	spin_lock_init(&i2s->lock);
 	i2s->dev = &pdev->dev;
 
 	i2s->grf = syscon_regmap_lookup_by_phandle(node, "rockchip,grf");
diff --git a/sound/soc/samsung/s3c24xx_simtec.c b/sound/soc/samsung/s3c24xx_simtec.c
index 81a29d12c57d..0cc66774b85d 100644
--- a/sound/soc/samsung/s3c24xx_simtec.c
+++ b/sound/soc/samsung/s3c24xx_simtec.c
@@ -327,7 +327,7 @@ int simtec_audio_core_probe(struct platform_device *pdev,
 
 	snd_dev = platform_device_alloc("soc-audio", -1);
 	if (!snd_dev) {
-		dev_err(&pdev->dev, "failed to alloc soc-audio devicec\n");
+		dev_err(&pdev->dev, "failed to alloc soc-audio device\n");
 		ret = -ENOMEM;
 		goto err_gpio;
 	}
diff --git a/sound/usb/quirks.c b/sound/usb/quirks.c
index 4479a590194f..6ee6d24c847f 100644
--- a/sound/usb/quirks.c
+++ b/sound/usb/quirks.c
@@ -1743,6 +1743,7 @@ static const struct registration_quirk registration_quirks[] = {
 	REG_QUIRK_ENTRY(0x0951, 0x16ed, 2),	/* Kingston HyperX Cloud Alpha S */
 	REG_QUIRK_ENTRY(0x0951, 0x16ea, 2),	/* Kingston HyperX Cloud Flight S */
 	REG_QUIRK_ENTRY(0x0ecb, 0x1f46, 2),	/* JBL Quantum 600 */
+	REG_QUIRK_ENTRY(0x0ecb, 0x1f47, 2),	/* JBL Quantum 800 */
 	REG_QUIRK_ENTRY(0x0ecb, 0x2039, 2),	/* JBL Quantum 400 */
 	REG_QUIRK_ENTRY(0x0ecb, 0x203c, 2),	/* JBL Quantum 600 */
 	REG_QUIRK_ENTRY(0x0ecb, 0x203e, 2),	/* JBL Quantum 800 */
diff --git a/tools/arch/x86/include/asm/amd-ibs.h b/tools/arch/x86/include/asm/amd-ibs.h
new file mode 100644
index 000000000000..174e7d83fcbd
--- /dev/null
+++ b/tools/arch/x86/include/asm/amd-ibs.h
@@ -0,0 +1,132 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/*
+ * From PPR Vol 1 for AMD Family 19h Model 01h B1
+ * 55898 Rev 0.35 - Feb 5, 2021
+ */
+
+#include "msr-index.h"
+
+/*
+ * IBS Hardware MSRs
+ */
+
+/* MSR 0xc0011030: IBS Fetch Control */
+union ibs_fetch_ctl {
+	__u64 val;
+	struct {
+		__u64	fetch_maxcnt:16,/* 0-15: instruction fetch max. count */
+			fetch_cnt:16,	/* 16-31: instruction fetch count */
+			fetch_lat:16,	/* 32-47: instruction fetch latency */
+			fetch_en:1,	/* 48: instruction fetch enable */
+			fetch_val:1,	/* 49: instruction fetch valid */
+			fetch_comp:1,	/* 50: instruction fetch complete */
+			ic_miss:1,	/* 51: i-cache miss */
+			phy_addr_valid:1,/* 52: physical address valid */
+			l1tlb_pgsz:2,	/* 53-54: i-cache L1TLB page size
+					 *	  (needs IbsPhyAddrValid) */
+			l1tlb_miss:1,	/* 55: i-cache fetch missed in L1TLB */
+			l2tlb_miss:1,	/* 56: i-cache fetch missed in L2TLB */
+			rand_en:1,	/* 57: random tagging enable */
+			fetch_l2_miss:1,/* 58: L2 miss for sampled fetch
+					 *      (needs IbsFetchComp) */
+			reserved:5;	/* 59-63: reserved */
+	};
+};
+
+/* MSR 0xc0011033: IBS Execution Control */
+union ibs_op_ctl {
+	__u64 val;
+	struct {
+		__u64	opmaxcnt:16,	/* 0-15: periodic op max. count */
+			reserved0:1,	/* 16: reserved */
+			op_en:1,	/* 17: op sampling enable */
+			op_val:1,	/* 18: op sample valid */
+			cnt_ctl:1,	/* 19: periodic op counter control */
+			opmaxcnt_ext:7,	/* 20-26: upper 7 bits of periodic op maximum count */
+			reserved1:5,	/* 27-31: reserved */
+			opcurcnt:27,	/* 32-58: periodic op counter current count */
+			reserved2:5;	/* 59-63: reserved */
+	};
+};
+
+/* MSR 0xc0011035: IBS Op Data 2 */
+union ibs_op_data {
+	__u64 val;
+	struct {
+		__u64	comp_to_ret_ctr:16,	/* 0-15: op completion to retire count */
+			tag_to_ret_ctr:16,	/* 15-31: op tag to retire count */
+			reserved1:2,		/* 32-33: reserved */
+			op_return:1,		/* 34: return op */
+			op_brn_taken:1,		/* 35: taken branch op */
+			op_brn_misp:1,		/* 36: mispredicted branch op */
+			op_brn_ret:1,		/* 37: branch op retired */
+			op_rip_invalid:1,	/* 38: RIP is invalid */
+			op_brn_fuse:1,		/* 39: fused branch op */
+			op_microcode:1,		/* 40: microcode op */
+			reserved2:23;		/* 41-63: reserved */
+	};
+};
+
+/* MSR 0xc0011036: IBS Op Data 2 */
+union ibs_op_data2 {
+	__u64 val;
+	struct {
+		__u64	data_src:3,	/* 0-2: data source */
+			reserved0:1,	/* 3: reserved */
+			rmt_node:1,	/* 4: destination node */
+			cache_hit_st:1,	/* 5: cache hit state */
+			reserved1:57;	/* 5-63: reserved */
+	};
+};
+
+/* MSR 0xc0011037: IBS Op Data 3 */
+union ibs_op_data3 {
+	__u64 val;
+	struct {
+		__u64	ld_op:1,			/* 0: load op */
+			st_op:1,			/* 1: store op */
+			dc_l1tlb_miss:1,		/* 2: data cache L1TLB miss */
+			dc_l2tlb_miss:1,		/* 3: data cache L2TLB hit in 2M page */
+			dc_l1tlb_hit_2m:1,		/* 4: data cache L1TLB hit in 2M page */
+			dc_l1tlb_hit_1g:1,		/* 5: data cache L1TLB hit in 1G page */
+			dc_l2tlb_hit_2m:1,		/* 6: data cache L2TLB hit in 2M page */
+			dc_miss:1,			/* 7: data cache miss */
+			dc_mis_acc:1,			/* 8: misaligned access */
+			reserved:4,			/* 9-12: reserved */
+			dc_wc_mem_acc:1,		/* 13: write combining memory access */
+			dc_uc_mem_acc:1,		/* 14: uncacheable memory access */
+			dc_locked_op:1,			/* 15: locked operation */
+			dc_miss_no_mab_alloc:1,		/* 16: DC miss with no MAB allocated */
+			dc_lin_addr_valid:1,		/* 17: data cache linear address valid */
+			dc_phy_addr_valid:1,		/* 18: data cache physical address valid */
+			dc_l2_tlb_hit_1g:1,		/* 19: data cache L2 hit in 1GB page */
+			l2_miss:1,			/* 20: L2 cache miss */
+			sw_pf:1,			/* 21: software prefetch */
+			op_mem_width:4,			/* 22-25: load/store size in bytes */
+			op_dc_miss_open_mem_reqs:6,	/* 26-31: outstanding mem reqs on DC fill */
+			dc_miss_lat:16,			/* 32-47: data cache miss latency */
+			tlb_refill_lat:16;		/* 48-63: L1 TLB refill latency */
+	};
+};
+
+/* MSR 0xc001103c: IBS Fetch Control Extended */
+union ic_ibs_extd_ctl {
+	__u64 val;
+	struct {
+		__u64	itlb_refill_lat:16,	/* 0-15: ITLB Refill latency for sampled fetch */
+			reserved:48;		/* 16-63: reserved */
+	};
+};
+
+/*
+ * IBS driver related
+ */
+
+struct perf_ibs_data {
+	u32		size;
+	union {
+		u32	data[0];	/* data buffer starts here */
+		u32	caps;
+	};
+	u64		regs[MSR_AMD64_IBS_REG_COUNT_MAX];
+};
diff --git a/tools/arch/x86/include/uapi/asm/kvm.h b/tools/arch/x86/include/uapi/asm/kvm.h
index a6c327f8ad9e..2ef1f6513c68 100644
--- a/tools/arch/x86/include/uapi/asm/kvm.h
+++ b/tools/arch/x86/include/uapi/asm/kvm.h
@@ -295,6 +295,7 @@ struct kvm_debug_exit_arch {
 #define KVM_GUESTDBG_USE_HW_BP		0x00020000
 #define KVM_GUESTDBG_INJECT_DB		0x00040000
 #define KVM_GUESTDBG_INJECT_BP		0x00080000
+#define KVM_GUESTDBG_BLOCKIRQ		0x00100000
 
 /* for KVM_SET_GUEST_DEBUG */
 struct kvm_guest_debug_arch {
diff --git a/tools/bootconfig/main.c b/tools/bootconfig/main.c
index f45fa992e01d..fd67496a947f 100644
--- a/tools/bootconfig/main.c
+++ b/tools/bootconfig/main.c
@@ -111,9 +111,11 @@ static void xbc_show_list(void)
 	char key[XBC_KEYLEN_MAX];
 	struct xbc_node *leaf;
 	const char *val;
+	int ret;
 
 	xbc_for_each_key_value(leaf, val) {
-		if (xbc_node_compose_key(leaf, key, XBC_KEYLEN_MAX) < 0) {
+		ret = xbc_node_compose_key(leaf, key, XBC_KEYLEN_MAX);
+		if (ret < 0) {
 			fprintf(stderr, "Failed to compose key %d\n", ret);
 			break;
 		}
diff --git a/tools/bootconfig/scripts/bconf2ftrace.sh b/tools/bootconfig/scripts/bconf2ftrace.sh
index feb30c2c7881..850c2073433e 100755
--- a/tools/bootconfig/scripts/bconf2ftrace.sh
+++ b/tools/bootconfig/scripts/bconf2ftrace.sh
@@ -94,6 +94,92 @@ compose_synth() { # event_name branch
 	xbc_get_val $2 | while read field; do echo -n "$field; "; done
 }
 
+print_hist_array() { # prefix key
+	__sep="="
+	if xbc_has_key ${1}.${2}; then
+		echo -n ":$2"
+		xbc_get_val ${1}.${2} | while read field; do
+			echo -n "$__sep$field"; __sep=","
+		done
+	fi
+}
+
+print_hist_action_array() { # prefix key
+	__sep="("
+	echo -n ".$2"
+	xbc_get_val ${1}.${2} | while read field; do
+		echo -n "$__sep$field"; __sep=","
+	done
+	echo -n ")"
+}
+
+print_hist_one_action() { # prefix handler param
+	echo -n ":${2}("`xbc_get_val ${1}.${3}`")"
+	if xbc_has_key "${1}.trace"; then
+		print_hist_action_array ${1} "trace"
+	elif xbc_has_key "${1}.save"; then
+		print_hist_action_array ${1} "save"
+	elif xbc_has_key "${1}.snapshot"; then
+		echo -n ".snapshot()"
+	fi
+}
+
+print_hist_actions() { # prefix handler param
+	for __hdr in `xbc_subkeys ${1}.${2} 1 ".[0-9]"`; do
+		print_hist_one_action ${1}.${2}.$__hdr ${2} ${3}
+	done
+	if xbc_has_key ${1}.${2}.${3} ; then
+		print_hist_one_action ${1}.${2} ${2} ${3}
+	fi
+}
+
+print_hist_var() { # prefix varname
+	echo -n ":${2}="`xbc_get_val ${1}.var.${2} | tr -d [:space:]`
+}
+
+print_one_histogram() { # prefix
+	echo -n "hist"
+	print_hist_array $1 "keys"
+	print_hist_array $1 "values"
+	print_hist_array $1 "sort"
+	if xbc_has_key "${1}.size"; then
+		echo -n ":size="`xbc_get_val ${1}.size`
+	fi
+	if xbc_has_key "${1}.name"; then
+		echo -n ":name="`xbc_get_val ${1}.name`
+	fi
+	for __var in `xbc_subkeys "${1}.var" 1`; do
+		print_hist_var ${1} ${__var}
+	done
+	if xbc_has_key "${1}.pause"; then
+		echo -n ":pause"
+	elif xbc_has_key "${1}.continue"; then
+		echo -n ":continue"
+	elif xbc_has_key "${1}.clear"; then
+		echo -n ":clear"
+	fi
+	print_hist_actions ${1} "onmax" "var"
+	print_hist_actions ${1} "onchange" "var"
+	print_hist_actions ${1} "onmatch" "event"
+
+	if xbc_has_key "${1}.filter"; then
+		echo -n " if "`xbc_get_val ${1}.filter`
+	fi
+}
+
+setup_one_histogram() { # prefix trigger-file
+	run_cmd "echo '`print_one_histogram ${1}`' >> ${2}"
+}
+
+setup_histograms() { # prefix trigger-file
+	for __hist in `xbc_subkeys ${1} 1 ".[0-9]"`; do
+		setup_one_histogram ${1}.$__hist ${2}
+	done
+	if xbc_has_key ${1}.keys; then
+		setup_one_histogram ${1} ${2}
+	fi
+}
+
 setup_event() { # prefix group event [instance]
 	branch=$1.$2.$3
 	if [ "$4" ]; then
@@ -101,6 +187,12 @@ setup_event() { # prefix group event [instance]
 	else
 		eventdir="$TRACEFS/events/$2/$3"
 	fi
+	# group enable
+	if [ "$3" = "enable" ]; then
+		run_cmd "echo 1 > ${eventdir}"
+		return
+	fi
+
 	case $2 in
 	kprobes)
 		xbc_get_val ${branch}.probes | while read line; do
@@ -115,6 +207,8 @@ setup_event() { # prefix group event [instance]
 	set_value_of ${branch}.filter ${eventdir}/filter
 	set_array_of ${branch}.actions ${eventdir}/trigger
 
+	setup_histograms ${branch}.hist ${eventdir}/trigger
+
 	if xbc_has_key ${branch}.enable; then
 		run_cmd "echo 1 > ${eventdir}/enable"
 	fi
@@ -127,6 +221,13 @@ setup_events() { # prefix("ftrace" or "ftrace.instance.INSTANCE") [instance]
 			setup_event $prefix ${grpev%.*} ${grpev#*.} $2
 		done
 	fi
+	if xbc_has_branch ${1}.event.enable; then
+		if [ "$2" ]; then
+			run_cmd "echo 1 > $TRACEFS/instances/$2/events/enable"
+		else
+			run_cmd "echo 1 > $TRACEFS/events/enable"
+		fi
+	fi
 }
 
 size2kb() { # size[KB|MB]
diff --git a/tools/bootconfig/scripts/ftrace2bconf.sh b/tools/bootconfig/scripts/ftrace2bconf.sh
index a0c3bcc6da4f..6183b36c6846 100755
--- a/tools/bootconfig/scripts/ftrace2bconf.sh
+++ b/tools/bootconfig/scripts/ftrace2bconf.sh
@@ -92,6 +92,10 @@ referred_vars() {
 	grep "^hist" $1/trigger | grep -o '$[a-zA-Z0-9]*'
 }
 
+event_is_enabled() { # enable-file
+	test -f $1 & grep -q "1" $1
+}
+
 per_event_options() { # event-dir
 	evdir=$1
 	# Check the special event which has no filter and no trigger
@@ -113,7 +117,9 @@ per_event_options() { # event-dir
 		emit_kv $PREFIX.event.$group.$event.actions += \'$action\'
 	done
 
-	# enable is not checked; this is done by set_event in the instance.
+	if [ $GROUP_ENABLED -eq 0 ] && event_is_enabled $evdir/enable; then
+		emit_kv $PREFIX.event.$group.$event.enable
+	fi
 	val=`cat $evdir/filter`
 	if [ "$val" != "none" ]; then
 		emit_kv $PREFIX.event.$group.$event.filter = "$val"
@@ -137,8 +143,19 @@ event_options() {
 		kprobe_event_options
 		synth_event_options
 	fi
+	ALL_ENABLED=0
+	if event_is_enabled $INSTANCE/events/enable; then
+		emit_kv $PREFIX.event.enable
+		ALL_ENABLED=1
+	fi
 	for group in `ls $INSTANCE/events/` ; do
 		[ ! -d $INSTANCE/events/$group ] && continue
+		GROUP_ENABLED=$ALL_ENABLED
+		if [ $ALL_ENABLED -eq 0 ] && \
+		   event_is_enabled $INSTANCE/events/$group/enable ;then
+			emit_kv $PREFIX.event.$group.enable
+			GROUP_ENABLED=1
+		fi
 		for event in `ls $INSTANCE/events/$group/` ;do
 			[ ! -d $INSTANCE/events/$group/$event ] && continue
 			per_event_options $INSTANCE/events/$group/$event
@@ -222,15 +239,10 @@ instance_options() { # [instance-name]
 		emit_kv $PREFIX.cpumask = $val
 	fi
 	val=`cat $INSTANCE/tracing_on`
-	if [ `echo $val | sed -e s/f//g`x != x ]; then
-		emit_kv $PREFIX.tracing_on = $val
+	if [ "$val" = "0" ]; then
+		emit_kv $PREFIX.tracing_on = 0
 	fi
 
-	val=
-	for i in `cat $INSTANCE/set_event`; do
-		val="$val, $i"
-	done
-	[ "$val" ] && emit_kv $PREFIX.events = "${val#,}"
 	val=`cat $INSTANCE/current_tracer`
 	[ $val != nop ] && emit_kv $PREFIX.tracer = $val
 	if grep -qv "^#" $INSTANCE/set_ftrace_filter $INSTANCE/set_ftrace_notrace; then
diff --git a/tools/bootconfig/scripts/xbc.sh b/tools/bootconfig/scripts/xbc.sh
index b8c84e654556..1f0ebf50dd2d 100644
--- a/tools/bootconfig/scripts/xbc.sh
+++ b/tools/bootconfig/scripts/xbc.sh
@@ -49,8 +49,8 @@ xbc_has_branch() { # prefix-key
 	grep -q "^$1" $XBC_TMPFILE
 }
 
-xbc_subkeys() { # prefix-key depth
+xbc_subkeys() { # prefix-key depth [subkey-pattern]
 	__keys=`echo $1 | sed "s/\./ /g"`
 	__s=`nr_args $__keys`
-	grep "^$1" $XBC_TMPFILE | cut -d= -f1| cut -d. -f$((__s + 1))-$((__s + $2)) | uniq
+	grep "^$1$3" $XBC_TMPFILE | cut -d= -f1| cut -d. -f$((__s + 1))-$((__s + $2)) | uniq
 }
diff --git a/tools/bootconfig/test-bootconfig.sh b/tools/bootconfig/test-bootconfig.sh
index baed891d0ba4..f68e2e9eef8b 100755
--- a/tools/bootconfig/test-bootconfig.sh
+++ b/tools/bootconfig/test-bootconfig.sh
@@ -26,7 +26,7 @@ trap cleanup EXIT TERM
 NO=1
 
 xpass() { # pass test command
-  echo "test case $NO ($3)... "
+  echo "test case $NO ($*)... "
   if ! ($@ && echo "\t\t[OK]"); then
      echo "\t\t[NG]"; NG=$((NG + 1))
   fi
@@ -34,7 +34,7 @@ xpass() { # pass test command
 }
 
 xfail() { # fail test command
-  echo "test case $NO ($3)... "
+  echo "test case $NO ($*)... "
   if ! (! $@ && echo "\t\t[OK]"); then
      echo "\t\t[NG]"; NG=$((NG + 1))
   fi
diff --git a/tools/build/Makefile b/tools/build/Makefile
index 5ed41b96fcde..6f11e6fc9ffe 100644
--- a/tools/build/Makefile
+++ b/tools/build/Makefile
@@ -32,7 +32,7 @@ all: $(OUTPUT)fixdep
 
 # Make sure there's anything to clean,
 # feature contains check for existing OUTPUT
-TMP_O := $(if $(OUTPUT),$(OUTPUT)/feature,./)
+TMP_O := $(if $(OUTPUT),$(OUTPUT)feature/,./)
 
 clean:
 	$(call QUIET_CLEAN, fixdep)
diff --git a/tools/build/Makefile.feature b/tools/build/Makefile.feature
index 04a8e3db8a54..3dd2f68366f9 100644
--- a/tools/build/Makefile.feature
+++ b/tools/build/Makefile.feature
@@ -34,7 +34,6 @@ FEATURE_TESTS_BASIC :=                  \
         dwarf_getlocations              \
         eventfd                         \
         fortify-source                  \
-        sync-compare-and-swap           \
         get_current_dir_name            \
         gettid				\
         glibc                           \
diff --git a/tools/build/feature/Makefile b/tools/build/feature/Makefile
index ec203e28407f..eff55d287db1 100644
--- a/tools/build/feature/Makefile
+++ b/tools/build/feature/Makefile
@@ -9,7 +9,6 @@ FILES=                                          \
          test-dwarf_getlocations.bin            \
          test-eventfd.bin                       \
          test-fortify-source.bin                \
-         test-sync-compare-and-swap.bin         \
          test-get_current_dir_name.bin          \
          test-glibc.bin                         \
          test-gtk2.bin                          \
@@ -260,9 +259,6 @@ $(OUTPUT)test-libdw-dwarf-unwind.bin:
 $(OUTPUT)test-libbabeltrace.bin:
 	$(BUILD) # -lbabeltrace provided by $(FEATURE_CHECK_LDFLAGS-libbabeltrace)
 
-$(OUTPUT)test-sync-compare-and-swap.bin:
-	$(BUILD)
-
 $(OUTPUT)test-compile-32.bin:
 	$(CC) -m32 -o $@ test-compile.c
 
diff --git a/tools/build/feature/test-all.c b/tools/build/feature/test-all.c
index 464873883396..920439527291 100644
--- a/tools/build/feature/test-all.c
+++ b/tools/build/feature/test-all.c
@@ -106,10 +106,6 @@
 # include "test-libdw-dwarf-unwind.c"
 #undef main
 
-#define main main_test_sync_compare_and_swap
-# include "test-sync-compare-and-swap.c"
-#undef main
-
 #define main main_test_zlib
 # include "test-zlib.c"
 #undef main
diff --git a/tools/build/feature/test-libopencsd.c b/tools/build/feature/test-libopencsd.c
index 52c790b0317b..eb6303ff446e 100644
--- a/tools/build/feature/test-libopencsd.c
+++ b/tools/build/feature/test-libopencsd.c
@@ -4,9 +4,9 @@
 /*
  * Check OpenCSD library version is sufficient to provide required features
  */
-#define OCSD_MIN_VER ((1 << 16) | (0 << 8) | (0))
+#define OCSD_MIN_VER ((1 << 16) | (1 << 8) | (1))
 #if !defined(OCSD_VER_NUM) || (OCSD_VER_NUM < OCSD_MIN_VER)
-#error "OpenCSD >= 1.0.0 is required"
+#error "OpenCSD >= 1.1.1 is required"
 #endif
 
 int main(void)
diff --git a/tools/build/feature/test-sync-compare-and-swap.c b/tools/build/feature/test-sync-compare-and-swap.c
deleted file mode 100644
index 3bc6b0768a53..000000000000
--- a/tools/build/feature/test-sync-compare-and-swap.c
+++ /dev/null
@@ -1,15 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0
-#include <stdint.h>
-
-volatile uint64_t x;
-
-int main(int argc, char *argv[])
-{
-	uint64_t old, new = argc;
-
-	(void)argv;
-	do {
-		old = __sync_val_compare_and_swap(&x, 0, 0);
-	} while (!__sync_bool_compare_and_swap(&x, old, new));
-	return old == new;
-}
diff --git a/tools/include/linux/bitmap.h b/tools/include/linux/bitmap.h
index 9d959bc24859..95611df1d26e 100644
--- a/tools/include/linux/bitmap.h
+++ b/tools/include/linux/bitmap.h
@@ -111,10 +111,10 @@ static inline int test_and_clear_bit(int nr, unsigned long *addr)
 }
 
 /**
- * bitmap_alloc - Allocate bitmap
+ * bitmap_zalloc - Allocate bitmap
  * @nbits: Number of bits
  */
-static inline unsigned long *bitmap_alloc(int nbits)
+static inline unsigned long *bitmap_zalloc(int nbits)
 {
 	return calloc(1, BITS_TO_LONGS(nbits) * sizeof(unsigned long));
 }
diff --git a/tools/include/linux/compiler_types.h b/tools/include/linux/compiler_types.h
index feea09029f61..24ae3054f304 100644
--- a/tools/include/linux/compiler_types.h
+++ b/tools/include/linux/compiler_types.h
@@ -13,6 +13,24 @@
 #define __has_builtin(x) (0)
 #endif
 
+#ifdef __CHECKER__
+/* context/locking */
+# define __must_hold(x)	__attribute__((context(x,1,1)))
+# define __acquires(x)	__attribute__((context(x,0,1)))
+# define __releases(x)	__attribute__((context(x,1,0)))
+# define __acquire(x)	__context__(x,1)
+# define __release(x)	__context__(x,-1)
+# define __cond_lock(x,c)	((c) ? ({ __acquire(x); 1; }) : 0)
+#else /* __CHECKER__ */
+/* context/locking */
+# define __must_hold(x)
+# define __acquires(x)
+# define __releases(x)
+# define __acquire(x)	(void)0
+# define __release(x)	(void)0
+# define __cond_lock(x,c) (c)
+#endif /* __CHECKER__ */
+
 /* Compiler specific macros. */
 #ifdef __GNUC__
 #include <linux/compiler-gcc.h>
diff --git a/tools/include/uapi/asm-generic/unistd.h b/tools/include/uapi/asm-generic/unistd.h
index a9d6fcd95f42..1c5fb86d455a 100644
--- a/tools/include/uapi/asm-generic/unistd.h
+++ b/tools/include/uapi/asm-generic/unistd.h
@@ -673,15 +673,15 @@ __SYSCALL(__NR_madvise, sys_madvise)
 #define __NR_remap_file_pages 234
 __SYSCALL(__NR_remap_file_pages, sys_remap_file_pages)
 #define __NR_mbind 235
-__SC_COMP(__NR_mbind, sys_mbind, compat_sys_mbind)
+__SYSCALL(__NR_mbind, sys_mbind)
 #define __NR_get_mempolicy 236
-__SC_COMP(__NR_get_mempolicy, sys_get_mempolicy, compat_sys_get_mempolicy)
+__SYSCALL(__NR_get_mempolicy, sys_get_mempolicy)
 #define __NR_set_mempolicy 237
-__SC_COMP(__NR_set_mempolicy, sys_set_mempolicy, compat_sys_set_mempolicy)
+__SYSCALL(__NR_set_mempolicy, sys_set_mempolicy)
 #define __NR_migrate_pages 238
-__SC_COMP(__NR_migrate_pages, sys_migrate_pages, compat_sys_migrate_pages)
+__SYSCALL(__NR_migrate_pages, sys_migrate_pages)
 #define __NR_move_pages 239
-__SC_COMP(__NR_move_pages, sys_move_pages, compat_sys_move_pages)
+__SYSCALL(__NR_move_pages, sys_move_pages)
 #endif
 
 #define __NR_rt_tgsigqueueinfo 240
@@ -877,9 +877,11 @@ __SYSCALL(__NR_landlock_restrict_self, sys_landlock_restrict_self)
 #define __NR_memfd_secret 447
 __SYSCALL(__NR_memfd_secret, sys_memfd_secret)
 #endif
+#define __NR_process_mrelease 448
+__SYSCALL(__NR_process_mrelease, sys_process_mrelease)
 
 #undef __NR_syscalls
-#define __NR_syscalls 448
+#define __NR_syscalls 449
 
 /*
  * 32 bit systems traditionally used different
diff --git a/tools/include/uapi/drm/drm.h b/tools/include/uapi/drm/drm.h
index d043752a74cf..3b810b53ba8b 100644
--- a/tools/include/uapi/drm/drm.h
+++ b/tools/include/uapi/drm/drm.h
@@ -635,8 +635,8 @@ struct drm_gem_open {
 /**
  * DRM_CAP_VBLANK_HIGH_CRTC
  *
- * If set to 1, the kernel supports specifying a CRTC index in the high bits of
- * &drm_wait_vblank_request.type.
+ * If set to 1, the kernel supports specifying a :ref:`CRTC index<crtc_index>`
+ * in the high bits of &drm_wait_vblank_request.type.
  *
  * Starting kernel version 2.6.39, this capability is always set to 1.
  */
@@ -1050,6 +1050,16 @@ extern "C" {
 #define DRM_IOCTL_MODE_GETPROPBLOB	DRM_IOWR(0xAC, struct drm_mode_get_blob)
 #define DRM_IOCTL_MODE_GETFB		DRM_IOWR(0xAD, struct drm_mode_fb_cmd)
 #define DRM_IOCTL_MODE_ADDFB		DRM_IOWR(0xAE, struct drm_mode_fb_cmd)
+/**
+ * DRM_IOCTL_MODE_RMFB - Remove a framebuffer.
+ *
+ * This removes a framebuffer previously added via ADDFB/ADDFB2. The IOCTL
+ * argument is a framebuffer object ID.
+ *
+ * Warning: removing a framebuffer currently in-use on an enabled plane will
+ * disable that plane. The CRTC the plane is linked to may also be disabled
+ * (depending on driver capabilities).
+ */
 #define DRM_IOCTL_MODE_RMFB		DRM_IOWR(0xAF, unsigned int)
 #define DRM_IOCTL_MODE_PAGE_FLIP	DRM_IOWR(0xB0, struct drm_mode_crtc_page_flip)
 #define DRM_IOCTL_MODE_DIRTYFB		DRM_IOWR(0xB1, struct drm_mode_fb_dirty_cmd)
diff --git a/tools/include/uapi/drm/i915_drm.h b/tools/include/uapi/drm/i915_drm.h
index c2c7759b7d2e..bde5860b3686 100644
--- a/tools/include/uapi/drm/i915_drm.h
+++ b/tools/include/uapi/drm/i915_drm.h
@@ -572,6 +572,15 @@ typedef struct drm_i915_irq_wait {
 #define   I915_SCHEDULER_CAP_PREEMPTION	(1ul << 2)
 #define   I915_SCHEDULER_CAP_SEMAPHORES	(1ul << 3)
 #define   I915_SCHEDULER_CAP_ENGINE_BUSY_STATS	(1ul << 4)
+/*
+ * Indicates the 2k user priority levels are statically mapped into 3 buckets as
+ * follows:
+ *
+ * -1k to -1	Low priority
+ * 0		Normal priority
+ * 1 to 1k	Highest priority
+ */
+#define   I915_SCHEDULER_CAP_STATIC_PRIORITY_MAP	(1ul << 5)
 
 #define I915_PARAM_HUC_STATUS		 42
 
@@ -674,6 +683,9 @@ typedef struct drm_i915_irq_wait {
  */
 #define I915_PARAM_HAS_EXEC_TIMELINE_FENCES 55
 
+/* Query if the kernel supports the I915_USERPTR_PROBE flag. */
+#define I915_PARAM_HAS_USERPTR_PROBE 56
+
 /* Must be kept compact -- no holes and well documented */
 
 typedef struct drm_i915_getparam {
@@ -849,45 +861,113 @@ struct drm_i915_gem_mmap_gtt {
 	__u64 offset;
 };
 
+/**
+ * struct drm_i915_gem_mmap_offset - Retrieve an offset so we can mmap this buffer object.
+ *
+ * This struct is passed as argument to the `DRM_IOCTL_I915_GEM_MMAP_OFFSET` ioctl,
+ * and is used to retrieve the fake offset to mmap an object specified by &handle.
+ *
+ * The legacy way of using `DRM_IOCTL_I915_GEM_MMAP` is removed on gen12+.
+ * `DRM_IOCTL_I915_GEM_MMAP_GTT` is an older supported alias to this struct, but will behave
+ * as setting the &extensions to 0, and &flags to `I915_MMAP_OFFSET_GTT`.
+ */
 struct drm_i915_gem_mmap_offset {
-	/** Handle for the object being mapped. */
+	/** @handle: Handle for the object being mapped. */
 	__u32 handle;
+	/** @pad: Must be zero */
 	__u32 pad;
 	/**
-	 * Fake offset to use for subsequent mmap call
+	 * @offset: The fake offset to use for subsequent mmap call
 	 *
 	 * This is a fixed-size type for 32/64 compatibility.
 	 */
 	__u64 offset;
 
 	/**
-	 * Flags for extended behaviour.
+	 * @flags: Flags for extended behaviour.
+	 *
+	 * It is mandatory that one of the `MMAP_OFFSET` types
+	 * should be included:
 	 *
-	 * It is mandatory that one of the MMAP_OFFSET types
-	 * (GTT, WC, WB, UC, etc) should be included.
+	 * - `I915_MMAP_OFFSET_GTT`: Use mmap with the object bound to GTT. (Write-Combined)
+	 * - `I915_MMAP_OFFSET_WC`: Use Write-Combined caching.
+	 * - `I915_MMAP_OFFSET_WB`: Use Write-Back caching.
+	 * - `I915_MMAP_OFFSET_FIXED`: Use object placement to determine caching.
+	 *
+	 * On devices with local memory `I915_MMAP_OFFSET_FIXED` is the only valid
+	 * type. On devices without local memory, this caching mode is invalid.
+	 *
+	 * As caching mode when specifying `I915_MMAP_OFFSET_FIXED`, WC or WB will
+	 * be used, depending on the object placement on creation. WB will be used
+	 * when the object can only exist in system memory, WC otherwise.
 	 */
 	__u64 flags;
-#define I915_MMAP_OFFSET_GTT 0
-#define I915_MMAP_OFFSET_WC  1
-#define I915_MMAP_OFFSET_WB  2
-#define I915_MMAP_OFFSET_UC  3
 
-	/*
-	 * Zero-terminated chain of extensions.
+#define I915_MMAP_OFFSET_GTT	0
+#define I915_MMAP_OFFSET_WC	1
+#define I915_MMAP_OFFSET_WB	2
+#define I915_MMAP_OFFSET_UC	3
+#define I915_MMAP_OFFSET_FIXED	4
+
+	/**
+	 * @extensions: Zero-terminated chain of extensions.
 	 *
 	 * No current extensions defined; mbz.
 	 */
 	__u64 extensions;
 };
 
+/**
+ * struct drm_i915_gem_set_domain - Adjust the objects write or read domain, in
+ * preparation for accessing the pages via some CPU domain.
+ *
+ * Specifying a new write or read domain will flush the object out of the
+ * previous domain(if required), before then updating the objects domain
+ * tracking with the new domain.
+ *
+ * Note this might involve waiting for the object first if it is still active on
+ * the GPU.
+ *
+ * Supported values for @read_domains and @write_domain:
+ *
+ *	- I915_GEM_DOMAIN_WC: Uncached write-combined domain
+ *	- I915_GEM_DOMAIN_CPU: CPU cache domain
+ *	- I915_GEM_DOMAIN_GTT: Mappable aperture domain
+ *
+ * All other domains are rejected.
+ *
+ * Note that for discrete, starting from DG1, this is no longer supported, and
+ * is instead rejected. On such platforms the CPU domain is effectively static,
+ * where we also only support a single &drm_i915_gem_mmap_offset cache mode,
+ * which can't be set explicitly and instead depends on the object placements,
+ * as per the below.
+ *
+ * Implicit caching rules, starting from DG1:
+ *
+ *	- If any of the object placements (see &drm_i915_gem_create_ext_memory_regions)
+ *	  contain I915_MEMORY_CLASS_DEVICE then the object will be allocated and
+ *	  mapped as write-combined only.
+ *
+ *	- Everything else is always allocated and mapped as write-back, with the
+ *	  guarantee that everything is also coherent with the GPU.
+ *
+ * Note that this is likely to change in the future again, where we might need
+ * more flexibility on future devices, so making this all explicit as part of a
+ * new &drm_i915_gem_create_ext extension is probable.
+ */
 struct drm_i915_gem_set_domain {
-	/** Handle for the object */
+	/** @handle: Handle for the object. */
 	__u32 handle;
 
-	/** New read domains */
+	/** @read_domains: New read domains. */
 	__u32 read_domains;
 
-	/** New write domain */
+	/**
+	 * @write_domain: New write domain.
+	 *
+	 * Note that having something in the write domain implies it's in the
+	 * read domain, and only that read domain.
+	 */
 	__u32 write_domain;
 };
 
@@ -1348,12 +1428,11 @@ struct drm_i915_gem_busy {
 	 * reading from the object simultaneously.
 	 *
 	 * The value of each engine class is the same as specified in the
-	 * I915_CONTEXT_SET_ENGINES parameter and via perf, i.e.
+	 * I915_CONTEXT_PARAM_ENGINES context parameter and via perf, i.e.
 	 * I915_ENGINE_CLASS_RENDER, I915_ENGINE_CLASS_COPY, etc.
-	 * reported as active itself. Some hardware may have parallel
-	 * execution engines, e.g. multiple media engines, which are
-	 * mapped to the same class identifier and so are not separately
-	 * reported for busyness.
+	 * Some hardware may have parallel execution engines, e.g. multiple
+	 * media engines, which are mapped to the same class identifier and so
+	 * are not separately reported for busyness.
 	 *
 	 * Caveat emptor:
 	 * Only the boolean result of this query is reliable; that is whether
@@ -1364,43 +1443,79 @@ struct drm_i915_gem_busy {
 };
 
 /**
- * I915_CACHING_NONE
- *
- * GPU access is not coherent with cpu caches. Default for machines without an
- * LLC.
- */
-#define I915_CACHING_NONE		0
-/**
- * I915_CACHING_CACHED
- *
- * GPU access is coherent with cpu caches and furthermore the data is cached in
- * last-level caches shared between cpu cores and the gpu GT. Default on
- * machines with HAS_LLC.
+ * struct drm_i915_gem_caching - Set or get the caching for given object
+ * handle.
+ *
+ * Allow userspace to control the GTT caching bits for a given object when the
+ * object is later mapped through the ppGTT(or GGTT on older platforms lacking
+ * ppGTT support, or if the object is used for scanout). Note that this might
+ * require unbinding the object from the GTT first, if its current caching value
+ * doesn't match.
+ *
+ * Note that this all changes on discrete platforms, starting from DG1, the
+ * set/get caching is no longer supported, and is now rejected.  Instead the CPU
+ * caching attributes(WB vs WC) will become an immutable creation time property
+ * for the object, along with the GTT caching level. For now we don't expose any
+ * new uAPI for this, instead on DG1 this is all implicit, although this largely
+ * shouldn't matter since DG1 is coherent by default(without any way of
+ * controlling it).
+ *
+ * Implicit caching rules, starting from DG1:
+ *
+ *     - If any of the object placements (see &drm_i915_gem_create_ext_memory_regions)
+ *       contain I915_MEMORY_CLASS_DEVICE then the object will be allocated and
+ *       mapped as write-combined only.
+ *
+ *     - Everything else is always allocated and mapped as write-back, with the
+ *       guarantee that everything is also coherent with the GPU.
+ *
+ * Note that this is likely to change in the future again, where we might need
+ * more flexibility on future devices, so making this all explicit as part of a
+ * new &drm_i915_gem_create_ext extension is probable.
+ *
+ * Side note: Part of the reason for this is that changing the at-allocation-time CPU
+ * caching attributes for the pages might be required(and is expensive) if we
+ * need to then CPU map the pages later with different caching attributes. This
+ * inconsistent caching behaviour, while supported on x86, is not universally
+ * supported on other architectures. So for simplicity we opt for setting
+ * everything at creation time, whilst also making it immutable, on discrete
+ * platforms.
  */
-#define I915_CACHING_CACHED		1
-/**
- * I915_CACHING_DISPLAY
- *
- * Special GPU caching mode which is coherent with the scanout engines.
- * Transparently falls back to I915_CACHING_NONE on platforms where no special
- * cache mode (like write-through or gfdt flushing) is available. The kernel
- * automatically sets this mode when using a buffer as a scanout target.
- * Userspace can manually set this mode to avoid a costly stall and clflush in
- * the hotpath of drawing the first frame.
- */
-#define I915_CACHING_DISPLAY		2
-
 struct drm_i915_gem_caching {
 	/**
-	 * Handle of the buffer to set/get the caching level of. */
+	 * @handle: Handle of the buffer to set/get the caching level.
+	 */
 	__u32 handle;
 
 	/**
-	 * Cacheing level to apply or return value
+	 * @caching: The GTT caching level to apply or possible return value.
+	 *
+	 * The supported @caching values:
 	 *
-	 * bits0-15 are for generic caching control (i.e. the above defined
-	 * values). bits16-31 are reserved for platform-specific variations
-	 * (e.g. l3$ caching on gen7). */
+	 * I915_CACHING_NONE:
+	 *
+	 * GPU access is not coherent with CPU caches.  Default for machines
+	 * without an LLC. This means manual flushing might be needed, if we
+	 * want GPU access to be coherent.
+	 *
+	 * I915_CACHING_CACHED:
+	 *
+	 * GPU access is coherent with CPU caches and furthermore the data is
+	 * cached in last-level caches shared between CPU cores and the GPU GT.
+	 *
+	 * I915_CACHING_DISPLAY:
+	 *
+	 * Special GPU caching mode which is coherent with the scanout engines.
+	 * Transparently falls back to I915_CACHING_NONE on platforms where no
+	 * special cache mode (like write-through or gfdt flushing) is
+	 * available. The kernel automatically sets this mode when using a
+	 * buffer as a scanout target.  Userspace can manually set this mode to
+	 * avoid a costly stall and clflush in the hotpath of drawing the first
+	 * frame.
+	 */
+#define I915_CACHING_NONE		0
+#define I915_CACHING_CACHED		1
+#define I915_CACHING_DISPLAY		2
 	__u32 caching;
 };
 
@@ -1639,6 +1754,10 @@ struct drm_i915_gem_context_param {
 	__u32 size;
 	__u64 param;
 #define I915_CONTEXT_PARAM_BAN_PERIOD	0x1
+/* I915_CONTEXT_PARAM_NO_ZEROMAP has been removed.  On the off chance
+ * someone somewhere has attempted to use it, never re-use this context
+ * param number.
+ */
 #define I915_CONTEXT_PARAM_NO_ZEROMAP	0x2
 #define I915_CONTEXT_PARAM_GTT_SIZE	0x3
 #define I915_CONTEXT_PARAM_NO_ERROR_CAPTURE	0x4
@@ -1723,24 +1842,8 @@ struct drm_i915_gem_context_param {
  */
 #define I915_CONTEXT_PARAM_PERSISTENCE	0xb
 
-/*
- * I915_CONTEXT_PARAM_RINGSIZE:
- *
- * Sets the size of the CS ringbuffer to use for logical ring contexts. This
- * applies a limit of how many batches can be queued to HW before the caller
- * is blocked due to lack of space for more commands.
- *
- * Only reliably possible to be set prior to first use, i.e. during
- * construction. At any later point, the current execution must be flushed as
- * the ring can only be changed while the context is idle. Note, the ringsize
- * can be specified as a constructor property, see
- * I915_CONTEXT_CREATE_EXT_SETPARAM, but can also be set later if required.
- *
- * Only applies to the current set of engine and lost when those engines
- * are replaced by a new mapping (see I915_CONTEXT_PARAM_ENGINES).
- *
- * Must be between 4 - 512 KiB, in intervals of page size [4 KiB].
- * Default is 16 KiB.
+/* This API has been removed.  On the off chance someone somewhere has
+ * attempted to use it, never re-use this context param number.
  */
 #define I915_CONTEXT_PARAM_RINGSIZE	0xc
 /* Must be kept compact -- no holes and well documented */
@@ -1807,6 +1910,69 @@ struct drm_i915_gem_context_param_sseu {
 	__u32 rsvd;
 };
 
+/**
+ * DOC: Virtual Engine uAPI
+ *
+ * Virtual engine is a concept where userspace is able to configure a set of
+ * physical engines, submit a batch buffer, and let the driver execute it on any
+ * engine from the set as it sees fit.
+ *
+ * This is primarily useful on parts which have multiple instances of a same
+ * class engine, like for example GT3+ Skylake parts with their two VCS engines.
+ *
+ * For instance userspace can enumerate all engines of a certain class using the
+ * previously described `Engine Discovery uAPI`_. After that userspace can
+ * create a GEM context with a placeholder slot for the virtual engine (using
+ * `I915_ENGINE_CLASS_INVALID` and `I915_ENGINE_CLASS_INVALID_NONE` for class
+ * and instance respectively) and finally using the
+ * `I915_CONTEXT_ENGINES_EXT_LOAD_BALANCE` extension place a virtual engine in
+ * the same reserved slot.
+ *
+ * Example of creating a virtual engine and submitting a batch buffer to it:
+ *
+ * .. code-block:: C
+ *
+ * 	I915_DEFINE_CONTEXT_ENGINES_LOAD_BALANCE(virtual, 2) = {
+ * 		.base.name = I915_CONTEXT_ENGINES_EXT_LOAD_BALANCE,
+ * 		.engine_index = 0, // Place this virtual engine into engine map slot 0
+ * 		.num_siblings = 2,
+ * 		.engines = { { I915_ENGINE_CLASS_VIDEO, 0 },
+ * 			     { I915_ENGINE_CLASS_VIDEO, 1 }, },
+ * 	};
+ * 	I915_DEFINE_CONTEXT_PARAM_ENGINES(engines, 1) = {
+ * 		.engines = { { I915_ENGINE_CLASS_INVALID,
+ * 			       I915_ENGINE_CLASS_INVALID_NONE } },
+ * 		.extensions = to_user_pointer(&virtual), // Chains after load_balance extension
+ * 	};
+ * 	struct drm_i915_gem_context_create_ext_setparam p_engines = {
+ * 		.base = {
+ * 			.name = I915_CONTEXT_CREATE_EXT_SETPARAM,
+ * 		},
+ * 		.param = {
+ * 			.param = I915_CONTEXT_PARAM_ENGINES,
+ * 			.value = to_user_pointer(&engines),
+ * 			.size = sizeof(engines),
+ * 		},
+ * 	};
+ * 	struct drm_i915_gem_context_create_ext create = {
+ * 		.flags = I915_CONTEXT_CREATE_FLAGS_USE_EXTENSIONS,
+ * 		.extensions = to_user_pointer(&p_engines);
+ * 	};
+ *
+ * 	ctx_id = gem_context_create_ext(drm_fd, &create);
+ *
+ * 	// Now we have created a GEM context with its engine map containing a
+ * 	// single virtual engine. Submissions to this slot can go either to
+ * 	// vcs0 or vcs1, depending on the load balancing algorithm used inside
+ * 	// the driver. The load balancing is dynamic from one batch buffer to
+ * 	// another and transparent to userspace.
+ *
+ * 	...
+ * 	execbuf.rsvd1 = ctx_id;
+ * 	execbuf.flags = 0; // Submits to index 0 which is the virtual engine
+ * 	gem_execbuf(drm_fd, &execbuf);
+ */
+
 /*
  * i915_context_engines_load_balance:
  *
@@ -1883,6 +2049,61 @@ struct i915_context_engines_bond {
 	struct i915_engine_class_instance engines[N__]; \
 } __attribute__((packed)) name__
 
+/**
+ * DOC: Context Engine Map uAPI
+ *
+ * Context engine map is a new way of addressing engines when submitting batch-
+ * buffers, replacing the existing way of using identifiers like `I915_EXEC_BLT`
+ * inside the flags field of `struct drm_i915_gem_execbuffer2`.
+ *
+ * To use it created GEM contexts need to be configured with a list of engines
+ * the user is intending to submit to. This is accomplished using the
+ * `I915_CONTEXT_PARAM_ENGINES` parameter and `struct
+ * i915_context_param_engines`.
+ *
+ * For such contexts the `I915_EXEC_RING_MASK` field becomes an index into the
+ * configured map.
+ *
+ * Example of creating such context and submitting against it:
+ *
+ * .. code-block:: C
+ *
+ * 	I915_DEFINE_CONTEXT_PARAM_ENGINES(engines, 2) = {
+ * 		.engines = { { I915_ENGINE_CLASS_RENDER, 0 },
+ * 			     { I915_ENGINE_CLASS_COPY, 0 } }
+ * 	};
+ * 	struct drm_i915_gem_context_create_ext_setparam p_engines = {
+ * 		.base = {
+ * 			.name = I915_CONTEXT_CREATE_EXT_SETPARAM,
+ * 		},
+ * 		.param = {
+ * 			.param = I915_CONTEXT_PARAM_ENGINES,
+ * 			.value = to_user_pointer(&engines),
+ * 			.size = sizeof(engines),
+ * 		},
+ * 	};
+ * 	struct drm_i915_gem_context_create_ext create = {
+ * 		.flags = I915_CONTEXT_CREATE_FLAGS_USE_EXTENSIONS,
+ * 		.extensions = to_user_pointer(&p_engines);
+ * 	};
+ *
+ * 	ctx_id = gem_context_create_ext(drm_fd, &create);
+ *
+ * 	// We have now created a GEM context with two engines in the map:
+ * 	// Index 0 points to rcs0 while index 1 points to bcs0. Other engines
+ * 	// will not be accessible from this context.
+ *
+ * 	...
+ * 	execbuf.rsvd1 = ctx_id;
+ * 	execbuf.flags = 0; // Submits to index 0, which is rcs0 for this context
+ * 	gem_execbuf(drm_fd, &execbuf);
+ *
+ * 	...
+ * 	execbuf.rsvd1 = ctx_id;
+ * 	execbuf.flags = 1; // Submits to index 0, which is bcs0 for this context
+ * 	gem_execbuf(drm_fd, &execbuf);
+ */
+
 struct i915_context_param_engines {
 	__u64 extensions; /* linked chain of extension blocks, 0 terminates */
 #define I915_CONTEXT_ENGINES_EXT_LOAD_BALANCE 0 /* see i915_context_engines_load_balance */
@@ -1901,20 +2122,10 @@ struct drm_i915_gem_context_create_ext_setparam {
 	struct drm_i915_gem_context_param param;
 };
 
-struct drm_i915_gem_context_create_ext_clone {
+/* This API has been removed.  On the off chance someone somewhere has
+ * attempted to use it, never re-use this extension number.
+ */
 #define I915_CONTEXT_CREATE_EXT_CLONE 1
-	struct i915_user_extension base;
-	__u32 clone_id;
-	__u32 flags;
-#define I915_CONTEXT_CLONE_ENGINES	(1u << 0)
-#define I915_CONTEXT_CLONE_FLAGS	(1u << 1)
-#define I915_CONTEXT_CLONE_SCHEDATTR	(1u << 2)
-#define I915_CONTEXT_CLONE_SSEU		(1u << 3)
-#define I915_CONTEXT_CLONE_TIMELINE	(1u << 4)
-#define I915_CONTEXT_CLONE_VM		(1u << 5)
-#define I915_CONTEXT_CLONE_UNKNOWN -(I915_CONTEXT_CLONE_VM << 1)
-	__u64 rsvd;
-};
 
 struct drm_i915_gem_context_destroy {
 	__u32 ctx_id;
@@ -1986,14 +2197,69 @@ struct drm_i915_reset_stats {
 	__u32 pad;
 };
 
+/**
+ * struct drm_i915_gem_userptr - Create GEM object from user allocated memory.
+ *
+ * Userptr objects have several restrictions on what ioctls can be used with the
+ * object handle.
+ */
 struct drm_i915_gem_userptr {
+	/**
+	 * @user_ptr: The pointer to the allocated memory.
+	 *
+	 * Needs to be aligned to PAGE_SIZE.
+	 */
 	__u64 user_ptr;
+
+	/**
+	 * @user_size:
+	 *
+	 * The size in bytes for the allocated memory. This will also become the
+	 * object size.
+	 *
+	 * Needs to be aligned to PAGE_SIZE, and should be at least PAGE_SIZE,
+	 * or larger.
+	 */
 	__u64 user_size;
+
+	/**
+	 * @flags:
+	 *
+	 * Supported flags:
+	 *
+	 * I915_USERPTR_READ_ONLY:
+	 *
+	 * Mark the object as readonly, this also means GPU access can only be
+	 * readonly. This is only supported on HW which supports readonly access
+	 * through the GTT. If the HW can't support readonly access, an error is
+	 * returned.
+	 *
+	 * I915_USERPTR_PROBE:
+	 *
+	 * Probe the provided @user_ptr range and validate that the @user_ptr is
+	 * indeed pointing to normal memory and that the range is also valid.
+	 * For example if some garbage address is given to the kernel, then this
+	 * should complain.
+	 *
+	 * Returns -EFAULT if the probe failed.
+	 *
+	 * Note that this doesn't populate the backing pages, and also doesn't
+	 * guarantee that the object will remain valid when the object is
+	 * eventually used.
+	 *
+	 * The kernel supports this feature if I915_PARAM_HAS_USERPTR_PROBE
+	 * returns a non-zero value.
+	 *
+	 * I915_USERPTR_UNSYNCHRONIZED:
+	 *
+	 * NOT USED. Setting this flag will result in an error.
+	 */
 	__u32 flags;
 #define I915_USERPTR_READ_ONLY 0x1
+#define I915_USERPTR_PROBE 0x2
 #define I915_USERPTR_UNSYNCHRONIZED 0x80000000
 	/**
-	 * Returned handle for the object.
+	 * @handle: Returned handle for the object.
 	 *
 	 * Object handles are nonzero.
 	 */
@@ -2377,6 +2643,76 @@ struct drm_i915_query_topology_info {
 };
 
 /**
+ * DOC: Engine Discovery uAPI
+ *
+ * Engine discovery uAPI is a way of enumerating physical engines present in a
+ * GPU associated with an open i915 DRM file descriptor. This supersedes the old
+ * way of using `DRM_IOCTL_I915_GETPARAM` and engine identifiers like
+ * `I915_PARAM_HAS_BLT`.
+ *
+ * The need for this interface came starting with Icelake and newer GPUs, which
+ * started to establish a pattern of having multiple engines of a same class,
+ * where not all instances were always completely functionally equivalent.
+ *
+ * Entry point for this uapi is `DRM_IOCTL_I915_QUERY` with the
+ * `DRM_I915_QUERY_ENGINE_INFO` as the queried item id.
+ *
+ * Example for getting the list of engines:
+ *
+ * .. code-block:: C
+ *
+ * 	struct drm_i915_query_engine_info *info;
+ * 	struct drm_i915_query_item item = {
+ * 		.query_id = DRM_I915_QUERY_ENGINE_INFO;
+ * 	};
+ * 	struct drm_i915_query query = {
+ * 		.num_items = 1,
+ * 		.items_ptr = (uintptr_t)&item,
+ * 	};
+ * 	int err, i;
+ *
+ * 	// First query the size of the blob we need, this needs to be large
+ * 	// enough to hold our array of engines. The kernel will fill out the
+ * 	// item.length for us, which is the number of bytes we need.
+ * 	//
+ * 	// Alternatively a large buffer can be allocated straight away enabling
+ * 	// querying in one pass, in which case item.length should contain the
+ * 	// length of the provided buffer.
+ * 	err = ioctl(fd, DRM_IOCTL_I915_QUERY, &query);
+ * 	if (err) ...
+ *
+ * 	info = calloc(1, item.length);
+ * 	// Now that we allocated the required number of bytes, we call the ioctl
+ * 	// again, this time with the data_ptr pointing to our newly allocated
+ * 	// blob, which the kernel can then populate with info on all engines.
+ * 	item.data_ptr = (uintptr_t)&info,
+ *
+ * 	err = ioctl(fd, DRM_IOCTL_I915_QUERY, &query);
+ * 	if (err) ...
+ *
+ * 	// We can now access each engine in the array
+ * 	for (i = 0; i < info->num_engines; i++) {
+ * 		struct drm_i915_engine_info einfo = info->engines[i];
+ * 		u16 class = einfo.engine.class;
+ * 		u16 instance = einfo.engine.instance;
+ * 		....
+ * 	}
+ *
+ * 	free(info);
+ *
+ * Each of the enumerated engines, apart from being defined by its class and
+ * instance (see `struct i915_engine_class_instance`), also can have flags and
+ * capabilities defined as documented in i915_drm.h.
+ *
+ * For instance video engines which support HEVC encoding will have the
+ * `I915_VIDEO_CLASS_CAPABILITY_HEVC` capability bit set.
+ *
+ * Engine discovery only fully comes to its own when combined with the new way
+ * of addressing engines when submitting batch buffers using contexts with
+ * engine maps configured.
+ */
+
+/**
  * struct drm_i915_engine_info
  *
  * Describes one engine and it's capabilities as known to the driver.
diff --git a/tools/include/uapi/linux/fs.h b/tools/include/uapi/linux/fs.h
index 4c32e97dcdf0..bdf7b404b3e7 100644
--- a/tools/include/uapi/linux/fs.h
+++ b/tools/include/uapi/linux/fs.h
@@ -184,6 +184,7 @@ struct fsxattr {
 #define BLKSECDISCARD _IO(0x12,125)
 #define BLKROTATIONAL _IO(0x12,126)
 #define BLKZEROOUT _IO(0x12,127)
+#define BLKGETDISKSEQ _IOR(0x12,128,__u64)
 /*
  * A jump here: 130-136 are reserved for zoned block devices
  * (see uapi/linux/blkzoned.h)
diff --git a/tools/include/uapi/linux/in.h b/tools/include/uapi/linux/in.h
index d1b327036ae4..14168225cecd 100644
--- a/tools/include/uapi/linux/in.h
+++ b/tools/include/uapi/linux/in.h
@@ -188,11 +188,22 @@ struct ip_mreq_source {
 };
 
 struct ip_msfilter {
-	__be32		imsf_multiaddr;
-	__be32		imsf_interface;
-	__u32		imsf_fmode;
-	__u32		imsf_numsrc;
-	__be32		imsf_slist[1];
+	union {
+		struct {
+			__be32		imsf_multiaddr_aux;
+			__be32		imsf_interface_aux;
+			__u32		imsf_fmode_aux;
+			__u32		imsf_numsrc_aux;
+			__be32		imsf_slist[1];
+		};
+		struct {
+			__be32		imsf_multiaddr;
+			__be32		imsf_interface;
+			__u32		imsf_fmode;
+			__u32		imsf_numsrc;
+			__be32		imsf_slist_flex[];
+		};
+	};
 };
 
 #define IP_MSFILTER_SIZE(numsrc) \
@@ -211,11 +222,22 @@ struct group_source_req {
 };
 
 struct group_filter {
-	__u32				 gf_interface;	/* interface index */
-	struct __kernel_sockaddr_storage gf_group;	/* multicast address */
-	__u32				 gf_fmode;	/* filter mode */
-	__u32				 gf_numsrc;	/* number of sources */
-	struct __kernel_sockaddr_storage gf_slist[1];	/* interface index */
+	union {
+		struct {
+			__u32				 gf_interface_aux; /* interface index */
+			struct __kernel_sockaddr_storage gf_group_aux;	   /* multicast address */
+			__u32				 gf_fmode_aux;	   /* filter mode */
+			__u32				 gf_numsrc_aux;	   /* number of sources */
+			struct __kernel_sockaddr_storage gf_slist[1];	   /* interface index */
+		};
+		struct {
+			__u32				 gf_interface;	  /* interface index */
+			struct __kernel_sockaddr_storage gf_group;	  /* multicast address */
+			__u32				 gf_fmode;	  /* filter mode */
+			__u32				 gf_numsrc;	  /* number of sources */
+			struct __kernel_sockaddr_storage gf_slist_flex[]; /* interface index */
+		};
+	};
 };
 
 #define GROUP_FILTER_SIZE(numsrc) \
diff --git a/tools/include/uapi/linux/kvm.h b/tools/include/uapi/linux/kvm.h
index d9e4aabcb31a..a067410ebea5 100644
--- a/tools/include/uapi/linux/kvm.h
+++ b/tools/include/uapi/linux/kvm.h
@@ -1965,7 +1965,9 @@ struct kvm_stats_header {
 #define KVM_STATS_TYPE_CUMULATIVE	(0x0 << KVM_STATS_TYPE_SHIFT)
 #define KVM_STATS_TYPE_INSTANT		(0x1 << KVM_STATS_TYPE_SHIFT)
 #define KVM_STATS_TYPE_PEAK		(0x2 << KVM_STATS_TYPE_SHIFT)
-#define KVM_STATS_TYPE_MAX		KVM_STATS_TYPE_PEAK
+#define KVM_STATS_TYPE_LINEAR_HIST	(0x3 << KVM_STATS_TYPE_SHIFT)
+#define KVM_STATS_TYPE_LOG_HIST		(0x4 << KVM_STATS_TYPE_SHIFT)
+#define KVM_STATS_TYPE_MAX		KVM_STATS_TYPE_LOG_HIST
 
 #define KVM_STATS_UNIT_SHIFT		4
 #define KVM_STATS_UNIT_MASK		(0xF << KVM_STATS_UNIT_SHIFT)
@@ -1988,8 +1990,9 @@ struct kvm_stats_header {
  * @size: The number of data items for this stats.
  *        Every data item is of type __u64.
  * @offset: The offset of the stats to the start of stat structure in
- *          struture kvm or kvm_vcpu.
- * @unused: Unused field for future usage. Always 0 for now.
+ *          structure kvm or kvm_vcpu.
+ * @bucket_size: A parameter value used for histogram stats. It is only used
+ *		for linear histogram stats, specifying the size of the bucket;
  * @name: The name string for the stats. Its size is indicated by the
  *        &kvm_stats_header->name_size.
  */
@@ -1998,7 +2001,7 @@ struct kvm_stats_desc {
 	__s16 exponent;
 	__u16 size;
 	__u32 offset;
-	__u32 unused;
+	__u32 bucket_size;
 	char name[];
 };
 
diff --git a/tools/include/uapi/linux/mount.h b/tools/include/uapi/linux/mount.h
index dd7a166fdf9c..4d93967f8aea 100644
--- a/tools/include/uapi/linux/mount.h
+++ b/tools/include/uapi/linux/mount.h
@@ -73,7 +73,8 @@
 #define MOVE_MOUNT_T_SYMLINKS		0x00000010 /* Follow symlinks on to path */
 #define MOVE_MOUNT_T_AUTOMOUNTS		0x00000020 /* Follow automounts on to path */
 #define MOVE_MOUNT_T_EMPTY_PATH		0x00000040 /* Empty to path permitted */
-#define MOVE_MOUNT__MASK		0x00000077
+#define MOVE_MOUNT_SET_GROUP		0x00000100 /* Set sharing group instead */
+#define MOVE_MOUNT__MASK		0x00000177
 
 /*
  * fsopen() flags.
diff --git a/tools/include/uapi/linux/prctl.h b/tools/include/uapi/linux/prctl.h
index 967d9c55323d..43bd7f713c39 100644
--- a/tools/include/uapi/linux/prctl.h
+++ b/tools/include/uapi/linux/prctl.h
@@ -213,6 +213,7 @@ struct prctl_mm_map {
 /* Speculation control variants */
 # define PR_SPEC_STORE_BYPASS		0
 # define PR_SPEC_INDIRECT_BRANCH	1
+# define PR_SPEC_L1D_FLUSH		2
 /* Return and control values for PR_SET/GET_SPECULATION_CTRL */
 # define PR_SPEC_NOT_AFFECTED		0
 # define PR_SPEC_PRCTL			(1UL << 0)
@@ -234,14 +235,15 @@ struct prctl_mm_map {
 #define PR_GET_TAGGED_ADDR_CTRL		56
 # define PR_TAGGED_ADDR_ENABLE		(1UL << 0)
 /* MTE tag check fault modes */
-# define PR_MTE_TCF_SHIFT		1
-# define PR_MTE_TCF_NONE		(0UL << PR_MTE_TCF_SHIFT)
-# define PR_MTE_TCF_SYNC		(1UL << PR_MTE_TCF_SHIFT)
-# define PR_MTE_TCF_ASYNC		(2UL << PR_MTE_TCF_SHIFT)
-# define PR_MTE_TCF_MASK		(3UL << PR_MTE_TCF_SHIFT)
+# define PR_MTE_TCF_NONE		0
+# define PR_MTE_TCF_SYNC		(1UL << 1)
+# define PR_MTE_TCF_ASYNC		(1UL << 2)
+# define PR_MTE_TCF_MASK		(PR_MTE_TCF_SYNC | PR_MTE_TCF_ASYNC)
 /* MTE tag inclusion mask */
 # define PR_MTE_TAG_SHIFT		3
 # define PR_MTE_TAG_MASK		(0xffffUL << PR_MTE_TAG_SHIFT)
+/* Unused; kept only for source compatibility */
+# define PR_MTE_TCF_SHIFT		1
 
 /* Control reclaim behavior when allocating memory */
 #define PR_SET_IO_FLUSHER		57
diff --git a/tools/include/uapi/sound/asound.h b/tools/include/uapi/sound/asound.h
index d17c061950df..1d84ec9db93b 100644
--- a/tools/include/uapi/sound/asound.h
+++ b/tools/include/uapi/sound/asound.h
@@ -299,6 +299,7 @@ typedef int __bitwise snd_pcm_subformat_t;
 #define SNDRV_PCM_INFO_HAS_LINK_ABSOLUTE_ATIME     0x02000000  /* report absolute hardware link audio time, not reset on startup */
 #define SNDRV_PCM_INFO_HAS_LINK_ESTIMATED_ATIME    0x04000000  /* report estimated link audio time */
 #define SNDRV_PCM_INFO_HAS_LINK_SYNCHRONIZED_ATIME 0x08000000  /* report synchronized audio/system time */
+#define SNDRV_PCM_INFO_EXPLICIT_SYNC	0x10000000	/* needs explicit sync of pointers and data */
 
 #define SNDRV_PCM_INFO_DRAIN_TRIGGER	0x40000000		/* internal kernel flag - trigger in drain */
 #define SNDRV_PCM_INFO_FIFO_IN_FRAMES	0x80000000	/* internal kernel flag - FIFO size is in frames */
diff --git a/tools/lib/perf/cpumap.c b/tools/lib/perf/cpumap.c
index ca0215047c32..6d8e521c59e1 100644
--- a/tools/lib/perf/cpumap.c
+++ b/tools/lib/perf/cpumap.c
@@ -68,6 +68,11 @@ static struct perf_cpu_map *cpu_map__default_new(void)
 	return cpus;
 }
 
+struct perf_cpu_map *perf_cpu_map__default_new(void)
+{
+	return cpu_map__default_new();
+}
+
 static int cmp_int(const void *a, const void *b)
 {
 	return *(const int *)a - *(const int*)b;
@@ -277,14 +282,8 @@ int perf_cpu_map__idx(struct perf_cpu_map *cpus, int cpu)
 
 int perf_cpu_map__max(struct perf_cpu_map *map)
 {
-	int i, max = -1;
-
-	for (i = 0; i < map->nr; i++) {
-		if (map->map[i] > max)
-			max = map->map[i];
-	}
-
-	return max;
+	// cpu_map__trim_new() qsort()s it, cpu_map__default_new() sorts it as well.
+	return map->nr > 0 ? map->map[map->nr - 1] : -1;
 }
 
 /*
diff --git a/tools/lib/perf/include/internal/tests.h b/tools/lib/perf/include/internal/tests.h
index 61052099225b..b130a6663ff8 100644
--- a/tools/lib/perf/include/internal/tests.h
+++ b/tools/lib/perf/include/internal/tests.h
@@ -23,6 +23,8 @@ static inline int get_verbose(char **argv, int argc)
 			break;
 		}
 	}
+	optind = 1;
+
 	return verbose;
 }
 
diff --git a/tools/lib/perf/include/perf/cpumap.h b/tools/lib/perf/include/perf/cpumap.h
index 6a17ad730cbc..7c27766ea0bf 100644
--- a/tools/lib/perf/include/perf/cpumap.h
+++ b/tools/lib/perf/include/perf/cpumap.h
@@ -9,6 +9,7 @@
 struct perf_cpu_map;
 
 LIBPERF_API struct perf_cpu_map *perf_cpu_map__dummy_new(void);
+LIBPERF_API struct perf_cpu_map *perf_cpu_map__default_new(void);
 LIBPERF_API struct perf_cpu_map *perf_cpu_map__new(const char *cpu_list);
 LIBPERF_API struct perf_cpu_map *perf_cpu_map__read(FILE *file);
 LIBPERF_API struct perf_cpu_map *perf_cpu_map__get(struct perf_cpu_map *map);
diff --git a/tools/lib/subcmd/parse-options.h b/tools/lib/subcmd/parse-options.h
index d2414144eb8c..41b9b942504d 100644
--- a/tools/lib/subcmd/parse-options.h
+++ b/tools/lib/subcmd/parse-options.h
@@ -133,6 +133,7 @@ struct option {
 #define OPT_SET_PTR(s, l, v, h, p)  { .type = OPTION_SET_PTR, .short_name = (s), .long_name = (l), .value = (v), .help = (h), .defval = (p) }
 #define OPT_INTEGER(s, l, v, h)     { .type = OPTION_INTEGER, .short_name = (s), .long_name = (l), .value = check_vtype(v, int *), .help = (h) }
 #define OPT_UINTEGER(s, l, v, h)    { .type = OPTION_UINTEGER, .short_name = (s), .long_name = (l), .value = check_vtype(v, unsigned int *), .help = (h) }
+#define OPT_UINTEGER_OPTARG(s, l, v, d, h)    { .type = OPTION_UINTEGER, .short_name = (s), .long_name = (l), .value = check_vtype(v, unsigned int *), .help = (h), .flags = PARSE_OPT_OPTARG, .defval = (intptr_t)(d) }
 #define OPT_LONG(s, l, v, h)        { .type = OPTION_LONG, .short_name = (s), .long_name = (l), .value = check_vtype(v, long *), .help = (h) }
 #define OPT_ULONG(s, l, v, h)        { .type = OPTION_ULONG, .short_name = (s), .long_name = (l), .value = check_vtype(v, unsigned long *), .help = (h) }
 #define OPT_U64(s, l, v, h)         { .type = OPTION_U64, .short_name = (s), .long_name = (l), .value = check_vtype(v, u64 *), .help = (h) }
diff --git a/tools/memory-model/Documentation/access-marking.txt b/tools/memory-model/Documentation/access-marking.txt
index 1ab189f51f55..65778222183e 100644
--- a/tools/memory-model/Documentation/access-marking.txt
+++ b/tools/memory-model/Documentation/access-marking.txt
@@ -37,7 +37,9 @@ compiler's use of code-motion and common-subexpression optimizations.
 Therefore, if a given access is involved in an intentional data race,
 using READ_ONCE() for loads and WRITE_ONCE() for stores is usually
 preferable to data_race(), which in turn is usually preferable to plain
-C-language accesses.
+C-language accesses.  It is permissible to combine #2 and #3, for example,
+data_race(READ_ONCE(a)), which will both restrict compiler optimizations
+and disable KCSAN diagnostics.
 
 KCSAN will complain about many types of data races involving plain
 C-language accesses, but marking all accesses involved in a given data
@@ -86,6 +88,10 @@ that fail to exclude the updates.  In this case, it is important to use
 data_race() for the diagnostic reads because otherwise KCSAN would give
 false-positive warnings about these diagnostic reads.
 
+If it is necessary to both restrict compiler optimizations and disable
+KCSAN diagnostics, use both data_race() and READ_ONCE(), for example,
+data_race(READ_ONCE(a)).
+
 In theory, plain C-language loads can also be used for this use case.
 However, in practice this will have the disadvantage of causing KCSAN
 to generate false positives because KCSAN will have no way of knowing
@@ -126,6 +132,11 @@ consistent errors, which in turn are quite capable of breaking heuristics.
 Therefore use of data_race() should be limited to cases where some other
 code (such as a barrier() call) will force the occasional reload.
 
+Note that this use case requires that the heuristic be able to handle
+any possible error.  In contrast, if the heuristics might be fatally
+confused by one or more of the possible erroneous values, use READ_ONCE()
+instead of data_race().
+
 In theory, plain C-language loads can also be used for this use case.
 However, in practice this will have the disadvantage of causing KCSAN
 to generate false positives because KCSAN will have no way of knowing
@@ -259,9 +270,9 @@ diagnostic purposes.  The code might look as follows:
 		return ret;
 	}
 
-	int read_foo_diagnostic(void)
+	void read_foo_diagnostic(void)
 	{
-		return data_race(foo);
+		pr_info("Current value of foo: %d\n", data_race(foo));
 	}
 
 The reader-writer lock prevents the compiler from introducing concurrency
@@ -274,19 +285,34 @@ tells KCSAN that data races are expected, and should be silently
 ignored.  This data_race() also tells the human reading the code that
 read_foo_diagnostic() might sometimes return a bogus value.
 
-However, please note that your kernel must be built with
-CONFIG_KCSAN_ASSUME_PLAIN_WRITES_ATOMIC=n in order for KCSAN to
-detect a buggy lockless write.  If you need KCSAN to detect such a
-write even if that write did not change the value of foo, you also
-need CONFIG_KCSAN_REPORT_VALUE_CHANGE_ONLY=n.  If you need KCSAN to
-detect such a write happening in an interrupt handler running on the
-same CPU doing the legitimate lock-protected write, you also need
-CONFIG_KCSAN_INTERRUPT_WATCHER=y.  With some or all of these Kconfig
-options set properly, KCSAN can be quite helpful, although it is not
-necessarily a full replacement for hardware watchpoints.  On the other
-hand, neither are hardware watchpoints a full replacement for KCSAN
-because it is not always easy to tell hardware watchpoint to conditionally
-trap on accesses.
+If it is necessary to suppress compiler optimization and also detect
+buggy lockless writes, read_foo_diagnostic() can be updated as follows:
+
+	void read_foo_diagnostic(void)
+	{
+		pr_info("Current value of foo: %d\n", data_race(READ_ONCE(foo)));
+	}
+
+Alternatively, given that KCSAN is to ignore all accesses in this function,
+this function can be marked __no_kcsan and the data_race() can be dropped:
+
+	void __no_kcsan read_foo_diagnostic(void)
+	{
+		pr_info("Current value of foo: %d\n", READ_ONCE(foo));
+	}
+
+However, in order for KCSAN to detect buggy lockless writes, your kernel
+must be built with CONFIG_KCSAN_ASSUME_PLAIN_WRITES_ATOMIC=n.  If you
+need KCSAN to detect such a write even if that write did not change
+the value of foo, you also need CONFIG_KCSAN_REPORT_VALUE_CHANGE_ONLY=n.
+If you need KCSAN to detect such a write happening in an interrupt handler
+running on the same CPU doing the legitimate lock-protected write, you
+also need CONFIG_KCSAN_INTERRUPT_WATCHER=y.  With some or all of these
+Kconfig options set properly, KCSAN can be quite helpful, although
+it is not necessarily a full replacement for hardware watchpoints.
+On the other hand, neither are hardware watchpoints a full replacement
+for KCSAN because it is not always easy to tell hardware watchpoint to
+conditionally trap on accesses.
 
 
 Lock-Protected Writes With Lockless Reads
@@ -319,6 +345,99 @@ of the ASSERT_EXCLUSIVE_WRITER() is to allow KCSAN to check for a buggy
 concurrent lockless write.
 
 
+Lock-Protected Writes With Heuristic Lockless Reads
+---------------------------------------------------
+
+For another example, suppose that the code can normally make use of
+a per-data-structure lock, but there are times when a global lock
+is required.  These times are indicated via a global flag.  The code
+might look as follows, and is based loosely on nf_conntrack_lock(),
+nf_conntrack_all_lock(), and nf_conntrack_all_unlock():
+
+	bool global_flag;
+	DEFINE_SPINLOCK(global_lock);
+	struct foo {
+		spinlock_t f_lock;
+		int f_data;
+	};
+
+	/* All foo structures are in the following array. */
+	int nfoo;
+	struct foo *foo_array;
+
+	void do_something_locked(struct foo *fp)
+	{
+		/* This works even if data_race() returns nonsense. */
+		if (!data_race(global_flag)) {
+			spin_lock(&fp->f_lock);
+			if (!smp_load_acquire(&global_flag)) {
+				do_something(fp);
+				spin_unlock(&fp->f_lock);
+				return;
+			}
+			spin_unlock(&fp->f_lock);
+		}
+		spin_lock(&global_lock);
+		/* global_lock held, thus global flag cannot be set. */
+		spin_lock(&fp->f_lock);
+		spin_unlock(&global_lock);
+		/*
+		 * global_flag might be set here, but begin_global()
+		 * will wait for ->f_lock to be released.
+		 */
+		do_something(fp);
+		spin_unlock(&fp->f_lock);
+	}
+
+	void begin_global(void)
+	{
+		int i;
+
+		spin_lock(&global_lock);
+		WRITE_ONCE(global_flag, true);
+		for (i = 0; i < nfoo; i++) {
+			/*
+			 * Wait for pre-existing local locks.  One at
+			 * a time to avoid lockdep limitations.
+			 */
+			spin_lock(&fp->f_lock);
+			spin_unlock(&fp->f_lock);
+		}
+	}
+
+	void end_global(void)
+	{
+		smp_store_release(&global_flag, false);
+		spin_unlock(&global_lock);
+	}
+
+All code paths leading from the do_something_locked() function's first
+read from global_flag acquire a lock, so endless load fusing cannot
+happen.
+
+If the value read from global_flag is true, then global_flag is
+rechecked while holding ->f_lock, which, if global_flag is now false,
+prevents begin_global() from completing.  It is therefore safe to invoke
+do_something().
+
+Otherwise, if either value read from global_flag is true, then after
+global_lock is acquired global_flag must be false.  The acquisition of
+->f_lock will prevent any call to begin_global() from returning, which
+means that it is safe to release global_lock and invoke do_something().
+
+For this to work, only those foo structures in foo_array[] may be passed
+to do_something_locked().  The reason for this is that the synchronization
+with begin_global() relies on momentarily holding the lock of each and
+every foo structure.
+
+The smp_load_acquire() and smp_store_release() are required because
+changes to a foo structure between calls to begin_global() and
+end_global() are carried out without holding that structure's ->f_lock.
+The smp_load_acquire() and smp_store_release() ensure that the next
+invocation of do_something() from do_something_locked() will see those
+changes.
+
+
 Lockless Reads and Writes
 -------------------------
 
diff --git a/tools/pci/pcitest.c b/tools/pci/pcitest.c
index 0a1344c45213..441b54234635 100644
--- a/tools/pci/pcitest.c
+++ b/tools/pci/pcitest.c
@@ -40,7 +40,7 @@ struct pci_test {
 
 static int run_test(struct pci_test *test)
 {
-	struct pci_endpoint_test_xfer_param param;
+	struct pci_endpoint_test_xfer_param param = {};
 	int ret = -EINVAL;
 	int fd;
 
diff --git a/tools/perf/.gitignore b/tools/perf/.gitignore
index e555e9729758..8e0163b7ef01 100644
--- a/tools/perf/.gitignore
+++ b/tools/perf/.gitignore
@@ -39,3 +39,4 @@ pmu-events/jevents
 feature/
 fixdep
 libtraceevent-dynamic-list
+Documentation/doc.dep
diff --git a/tools/perf/Documentation/Makefile b/tools/perf/Documentation/Makefile
index 6e54979c2124..6e7b88917ca0 100644
--- a/tools/perf/Documentation/Makefile
+++ b/tools/perf/Documentation/Makefile
@@ -2,6 +2,10 @@
 include ../../scripts/Makefile.include
 include ../../scripts/utilities.mak
 
+ARTICLES =
+# with their own formatting rules.
+SP_ARTICLES =
+
 MAN1_TXT= \
 	$(filter-out $(addsuffix .txt, $(ARTICLES) $(SP_ARTICLES)), \
 		$(wildcard perf-*.txt)) \
@@ -16,13 +20,6 @@ _MAN_HTML=$(patsubst %.txt,%.html,$(MAN_TXT))
 MAN_XML=$(addprefix $(OUTPUT),$(_MAN_XML))
 MAN_HTML=$(addprefix $(OUTPUT),$(_MAN_HTML))
 
-ARTICLES =
-# with their own formatting rules.
-SP_ARTICLES =
-API_DOCS = $(patsubst %.txt,%,$(filter-out technical/api-index-skel.txt technical/api-index.txt, $(wildcard technical/api-*.txt)))
-SP_ARTICLES += $(API_DOCS)
-SP_ARTICLES += technical/api-index
-
 _DOC_HTML = $(_MAN_HTML)
 _DOC_HTML+=$(patsubst %,%.html,$(ARTICLES) $(SP_ARTICLES))
 DOC_HTML=$(addprefix $(OUTPUT),$(_DOC_HTML))
@@ -173,7 +170,7 @@ ifneq ($(V),1)
 endif
 endif
 
-all: html man
+all: html man info
 
 html: $(DOC_HTML)
 
@@ -186,8 +183,6 @@ man7: $(DOC_MAN7)
 
 info: $(OUTPUT)perf.info $(OUTPUT)perfman.info
 
-pdf: $(OUTPUT)user-manual.pdf
-
 install: install-man
 
 check-man-tools:
@@ -225,11 +220,6 @@ install-info: info
 	  echo "No directory found in $(DESTDIR)$(infodir)" >&2 ; \
 	fi
 
-install-pdf: pdf
-	$(call QUIET_INSTALL, Documentation-pdf) \
-		$(INSTALL) -d -m 755 $(DESTDIR)$(pdfdir); \
-		$(INSTALL) -m 644 $(OUTPUT)user-manual.pdf $(DESTDIR)$(pdfdir)
-
 #install-html: html
 #	'$(SHELL_PATH_SQ)' ./install-webdoc.sh $(DESTDIR)$(htmldir)
 
@@ -244,33 +234,13 @@ $(OUTPUT)doc.dep : $(wildcard *.txt) build-docdep.perl
 
 -include $(OUTPUT)doc.dep
 
-_cmds_txt = cmds-ancillaryinterrogators.txt \
-	cmds-ancillarymanipulators.txt \
-	cmds-mainporcelain.txt \
-	cmds-plumbinginterrogators.txt \
-	cmds-plumbingmanipulators.txt \
-	cmds-synchingrepositories.txt \
-	cmds-synchelpers.txt \
-	cmds-purehelpers.txt \
-	cmds-foreignscminterface.txt
-cmds_txt=$(addprefix $(OUTPUT),$(_cmds_txt))
-
-$(cmds_txt): $(OUTPUT)cmd-list.made
-
-$(OUTPUT)cmd-list.made: cmd-list.perl ../command-list.txt $(MAN1_TXT)
-	$(QUIET_GEN)$(RM) $@ && \
-	$(PERL_PATH) ./cmd-list.perl ../command-list.txt $(QUIET_STDERR) && \
-	date >$@
-
 CLEAN_FILES =									\
 	$(MAN_XML) $(addsuffix +,$(MAN_XML))					\
 	$(MAN_HTML) $(addsuffix +,$(MAN_HTML))					\
 	$(DOC_HTML) $(DOC_MAN1) $(DOC_MAN5) $(DOC_MAN7)				\
 	$(OUTPUT)*.texi $(OUTPUT)*.texi+ $(OUTPUT)*.texi++			\
-	$(OUTPUT)perf.info $(OUTPUT)perfman.info				\
-	$(OUTPUT)howto-index.txt $(OUTPUT)howto/*.html $(OUTPUT)doc.dep		\
-	$(OUTPUT)technical/api-*.html $(OUTPUT)technical/api-index.txt		\
-	$(cmds_txt) $(OUTPUT)*.made
+	$(OUTPUT)perf.info $(OUTPUT)perfman.info $(OUTPUT)doc.dep		\
+	$(OUTPUT)technical/api-*.html $(OUTPUT)technical/api-index.txt
 clean:
 	$(call QUIET_CLEAN, Documentation) $(RM) $(CLEAN_FILES)
 
@@ -304,24 +274,6 @@ $(OUTPUT)%.xml : %.txt
 XSLT = docbook.xsl
 XSLTOPTS = --xinclude --stringparam html.stylesheet docbook-xsl.css
 
-$(OUTPUT)user-manual.html: $(OUTPUT)user-manual.xml
-	$(QUIET_XSLTPROC)xsltproc $(XSLTOPTS) -o $@ $(XSLT) $<
-
-$(OUTPUT)perf.info: $(OUTPUT)user-manual.texi
-	$(QUIET_MAKEINFO)$(MAKEINFO) --no-split -o $@ $(OUTPUT)user-manual.texi
-
-$(OUTPUT)user-manual.texi: $(OUTPUT)user-manual.xml
-	$(QUIET_DB2TEXI)$(RM) $@+ $@ && \
-	$(DOCBOOK2X_TEXI) $(OUTPUT)user-manual.xml --encoding=UTF-8 --to-stdout >$@++ && \
-	$(PERL_PATH) fix-texi.perl <$@++ >$@+ && \
-	rm $@++ && \
-	mv $@+ $@
-
-$(OUTPUT)user-manual.pdf: $(OUTPUT)user-manual.xml
-	$(QUIET_DBLATEX)$(RM) $@+ $@ && \
-	$(DBLATEX) -o $@+ -p /etc/asciidoc/dblatex/asciidoc-dblatex.xsl -s /etc/asciidoc/dblatex/asciidoc-dblatex.sty $< && \
-	mv $@+ $@
-
 $(OUTPUT)perfman.texi: $(MAN_XML) cat-texi.perl
 	$(QUIET_DB2TEXI)$(RM) $@+ $@ && \
 	($(foreach xml,$(MAN_XML),$(DOCBOOK2X_TEXI) --encoding=UTF-8 \
@@ -331,28 +283,18 @@ $(OUTPUT)perfman.texi: $(MAN_XML) cat-texi.perl
 	mv $@+ $@
 
 $(OUTPUT)perfman.info: $(OUTPUT)perfman.texi
-	$(QUIET_MAKEINFO)$(MAKEINFO) --no-split --no-validate $*.texi
+	$(QUIET_MAKEINFO)$(MAKEINFO) --no-split --no-validate -o $@ $*.texi
 
 $(patsubst %.txt,%.texi,$(MAN_TXT)): %.texi : %.xml
 	$(QUIET_DB2TEXI)$(RM) $@+ $@ && \
 	$(DOCBOOK2X_TEXI) --to-stdout $*.xml >$@+ && \
 	mv $@+ $@
 
-howto-index.txt: howto-index.sh $(wildcard howto/*.txt)
-	$(QUIET_GEN)$(RM) $@+ $@ && \
-	'$(SHELL_PATH_SQ)' ./howto-index.sh $(wildcard howto/*.txt) >$@+ && \
-	mv $@+ $@
-
 $(patsubst %,%.html,$(ARTICLES)) : %.html : %.txt
 	$(QUIET_ASCIIDOC)$(ASCIIDOC) -b $(ASCIIDOC_HTML) $*.txt
 
 WEBDOC_DEST = /pub/software/tools/perf/docs
 
-$(patsubst %.txt,%.html,$(wildcard howto/*.txt)): %.html : %.txt
-	$(QUIET_ASCIIDOC)$(RM) $@+ $@ && \
-	sed -e '1,/^$$/d' $< | $(ASCIIDOC) -b $(ASCIIDOC_HTML) - >$@+ && \
-	mv $@+ $@
-
 # UNIMPLEMENTED
 #install-webdoc : html
 #	'$(SHELL_PATH_SQ)' ./install-webdoc.sh $(WEBDOC_DEST)
diff --git a/tools/perf/Documentation/build-docdep.perl b/tools/perf/Documentation/build-docdep.perl
new file mode 100755
index 000000000000..ba4205e0302a
--- /dev/null
+++ b/tools/perf/Documentation/build-docdep.perl
@@ -0,0 +1,46 @@
+#!/usr/bin/perl
+
+my %include = ();
+my %included = ();
+
+for my $text (<*.txt>) {
+    open I, '<', $text || die "cannot read: $text";
+    while (<I>) {
+	if (/^include::/) {
+	    chomp;
+	    s/^include::\s*//;
+	    s/\[\]//;
+	    $include{$text}{$_} = 1;
+	    $included{$_} = 1;
+	}
+    }
+    close I;
+}
+
+# Do we care about chained includes???
+my $changed = 1;
+while ($changed) {
+    $changed = 0;
+    while (my ($text, $included) = each %include) {
+	for my $i (keys %$included) {
+	    # $text has include::$i; if $i includes $j
+	    # $text indirectly includes $j.
+	    if (exists $include{$i}) {
+		for my $j (keys %{$include{$i}}) {
+		    if (!exists $include{$text}{$j}) {
+			$include{$text}{$j} = 1;
+			$included{$j} = 1;
+			$changed = 1;
+		    }
+		}
+	    }
+	}
+    }
+}
+
+while (my ($text, $included) = each %include) {
+    if (! exists $included{$text} &&
+	(my $base = $text) =~ s/\.txt$//) {
+	print "$base.html $base.xml : ", join(" ", keys %$included), "\n";
+    }
+}
diff --git a/tools/perf/Documentation/cat-texi.perl b/tools/perf/Documentation/cat-texi.perl
new file mode 100755
index 000000000000..14d2f8341517
--- /dev/null
+++ b/tools/perf/Documentation/cat-texi.perl
@@ -0,0 +1,46 @@
+#!/usr/bin/perl -w
+
+use strict;
+use warnings;
+
+my @menu = ();
+my $output = $ARGV[0];
+
+open my $tmp, '>', "$output.tmp";
+
+while (<STDIN>) {
+	next if (/^\\input texinfo/../\@node Top/);
+	next if (/^\@bye/ || /^\.ft/);
+	if (s/^\@top (.*)/\@node $1,,,Top/) {
+		push @menu, $1;
+	}
+	s/\(\@pxref\{\[(URLS|REMOTES)\]}\)//;
+	s/\@anchor\{[^{}]*\}//g;
+	print $tmp $_;
+}
+close $tmp;
+
+print '\input texinfo
+@setfilename gitman.info
+@documentencoding UTF-8
+@dircategory Development
+@direntry
+* Git Man Pages: (gitman).  Manual pages for Git revision control system
+@end direntry
+@node Top,,, (dir)
+@top Git Manual Pages
+@documentlanguage en
+@menu
+';
+
+for (@menu) {
+	print "* ${_}::\n";
+}
+print "\@end menu\n";
+open $tmp, '<', "$output.tmp";
+while (<$tmp>) {
+	print;
+}
+close $tmp;
+print "\@bye\n";
+unlink "$output.tmp";
diff --git a/tools/perf/Documentation/intel-hybrid.txt b/tools/perf/Documentation/intel-hybrid.txt
index 07f0aa3bf682..c9302096dc46 100644
--- a/tools/perf/Documentation/intel-hybrid.txt
+++ b/tools/perf/Documentation/intel-hybrid.txt
@@ -140,7 +140,7 @@ displayed. The percentage is the event's running time/enabling time.
 One example, 'triad_loop' runs on cpu16 (atom core), while we can see the
 scaled value for core cycles is 160,444,092 and the percentage is 0.47%.
 
-perf stat -e cycles -- taskset -c 16 ./triad_loop
+perf stat -e cycles \-- taskset -c 16 ./triad_loop
 
 As previous, two events are created.
 
diff --git a/tools/perf/Documentation/perf-c2c.txt b/tools/perf/Documentation/perf-c2c.txt
index c81d72e3eecf..de6beedb7283 100644
--- a/tools/perf/Documentation/perf-c2c.txt
+++ b/tools/perf/Documentation/perf-c2c.txt
@@ -9,7 +9,7 @@ SYNOPSIS
 --------
 [verse]
 'perf c2c record' [<options>] <command>
-'perf c2c record' [<options>] -- [<record command options>] <command>
+'perf c2c record' [<options>] \-- [<record command options>] <command>
 'perf c2c report' [<options>]
 
 DESCRIPTION
diff --git a/tools/perf/Documentation/perf-dlfilter.txt b/tools/perf/Documentation/perf-dlfilter.txt
index 02842cb4cf90..594f5a5a0c9e 100644
--- a/tools/perf/Documentation/perf-dlfilter.txt
+++ b/tools/perf/Documentation/perf-dlfilter.txt
@@ -32,7 +32,7 @@ The API for filtering consists of the following:
 ----
 #include <perf/perf_dlfilter.h>
 
-const struct perf_dlfilter_fns perf_dlfilter_fns;
+struct perf_dlfilter_fns perf_dlfilter_fns;
 
 int start(void **data, void *ctx);
 int stop(void *data, void *ctx);
@@ -214,7 +214,7 @@ Filter out everything except branches from "foo" to "bar":
 #include <perf/perf_dlfilter.h>
 #include <string.h>
 
-const struct perf_dlfilter_fns perf_dlfilter_fns;
+struct perf_dlfilter_fns perf_dlfilter_fns;
 
 int filter_event(void *data, const struct perf_dlfilter_sample *sample, void *ctx)
 {
@@ -246,6 +246,14 @@ To use the filter with perf script:
 
 	perf script --dlfilter dlfilter-example.so
 
+NOTES
+-----
+
+The dlfilter .so file will be dependent on shared libraries. If those change,
+it may be necessary to rebuild the .so. Also there may be unexpected results
+if the .so uses different versions of the shared libraries that perf uses.
+Versions can be checked using the ldd command.
+
 SEE ALSO
 --------
 linkperf:perf-script[1]
diff --git a/tools/perf/Documentation/perf-iostat.txt b/tools/perf/Documentation/perf-iostat.txt
index 165176944031..04d510364384 100644
--- a/tools/perf/Documentation/perf-iostat.txt
+++ b/tools/perf/Documentation/perf-iostat.txt
@@ -9,7 +9,7 @@ SYNOPSIS
 --------
 [verse]
 'perf iostat' list
-'perf iostat' <ports> -- <command> [<options>]
+'perf iostat' <ports> \-- <command> [<options>]
 
 DESCRIPTION
 -----------
@@ -85,4 +85,4 @@ EXAMPLES
 
 SEE ALSO
 --------
-linkperf:perf-stat[1]
-\ No newline at end of file
+linkperf:perf-stat[1]
diff --git a/tools/perf/Documentation/perf-record.txt b/tools/perf/Documentation/perf-record.txt
index d71bac847936..f1079ee7f2ec 100644
--- a/tools/perf/Documentation/perf-record.txt
+++ b/tools/perf/Documentation/perf-record.txt
@@ -9,7 +9,7 @@ SYNOPSIS
 --------
 [verse]
 'perf record' [-e <EVENT> | --event=EVENT] [-a] <command>
-'perf record' [-e <EVENT> | --event=EVENT] [-a] -- <command> [<options>]
+'perf record' [-e <EVENT> | --event=EVENT] [-a] \-- <command> [<options>]
 
 DESCRIPTION
 -----------
diff --git a/tools/perf/Documentation/perf-script-python.txt b/tools/perf/Documentation/perf-script-python.txt
index 5e43cfa5ea1e..0250dc61cf98 100644
--- a/tools/perf/Documentation/perf-script-python.txt
+++ b/tools/perf/Documentation/perf-script-python.txt
@@ -167,7 +167,7 @@ below).
 
 Following those are the 'event handler' functions generated one for
 every event in the 'perf record' output.  The handler functions take
-the form subsystem__event_name, and contain named parameters, one for
+the form subsystem\__event_name, and contain named parameters, one for
 each field in the event; in this case, there's only one event,
 raw_syscalls__sys_enter().  (see the EVENT HANDLERS section below for
 more info on event handlers).
diff --git a/tools/perf/Documentation/perf-script.txt b/tools/perf/Documentation/perf-script.txt
index aa3a0b2c29a2..c80515243560 100644
--- a/tools/perf/Documentation/perf-script.txt
+++ b/tools/perf/Documentation/perf-script.txt
@@ -106,7 +106,7 @@ OPTIONS
 	Pass 'arg' as an argument to the dlfilter. --dlarg may be repeated
 	to add more arguments.
 
---list-dlfilters=::
+--list-dlfilters::
         Display a list of available dlfilters. Use with option -v (must come
         before option --list-dlfilters) to show long descriptions.
 
diff --git a/tools/perf/Documentation/perf-stat.txt b/tools/perf/Documentation/perf-stat.txt
index 45c2467e4eb2..4c9310be6acc 100644
--- a/tools/perf/Documentation/perf-stat.txt
+++ b/tools/perf/Documentation/perf-stat.txt
@@ -9,8 +9,8 @@ SYNOPSIS
 --------
 [verse]
 'perf stat' [-e <EVENT> | --event=EVENT] [-a] <command>
-'perf stat' [-e <EVENT> | --event=EVENT] [-a] -- <command> [<options>]
-'perf stat' [-e <EVENT> | --event=EVENT] [-a] record [-o file] -- <command> [<options>]
+'perf stat' [-e <EVENT> | --event=EVENT] [-a] \-- <command> [<options>]
+'perf stat' [-e <EVENT> | --event=EVENT] [-a] record [-o file] \-- <command> [<options>]
 'perf stat' report [-i file]
 
 DESCRIPTION
@@ -217,8 +217,8 @@ Append to the output file designated with the -o option. Ignored if -o is not sp
 
 Log output to fd, instead of stderr.  Complementary to --output, and mutually exclusive
 with it.  --append may be used here.  Examples:
-     3>results  perf stat --log-fd 3          -- $cmd
-     3>>results perf stat --log-fd 3 --append -- $cmd
+     3>results  perf stat --log-fd 3          \-- $cmd
+     3>>results perf stat --log-fd 3 --append \-- $cmd
 
 --control=fifo:ctl-fifo[,ack-fifo]::
 --control=fd:ctl-fd[,ack-fd]::
@@ -245,7 +245,7 @@ disable events during measurements:
 
  perf stat -D -1 -e cpu-cycles -a -I 1000       \
            --control fd:${ctl_fd},${ctl_fd_ack} \
-           -- sleep 30 &
+           \-- sleep 30 &
  perf_pid=$!
 
  sleep 5  && echo 'enable' >&${ctl_fd} && read -u ${ctl_fd_ack} e1 && echo "enabled(${e1})"
@@ -265,7 +265,7 @@ disable events during measurements:
 --post::
 	Pre and post measurement hooks, e.g.:
 
-perf stat --repeat 10 --null --sync --pre 'make -s O=defconfig-build/clean' -- make -s -j64 O=defconfig-build/ bzImage
+perf stat --repeat 10 --null --sync --pre 'make -s O=defconfig-build/clean' \-- make -s -j64 O=defconfig-build/ bzImage
 
 -I msecs::
 --interval-print msecs::
@@ -496,7 +496,7 @@ $ perf config stat.no-csv-summary=true
 EXAMPLES
 --------
 
-$ perf stat -- make
+$ perf stat \-- make
 
    Performance counter stats for 'make':
 
diff --git a/tools/perf/Makefile.config b/tools/perf/Makefile.config
index eb8e487ef90b..446180401e26 100644
--- a/tools/perf/Makefile.config
+++ b/tools/perf/Makefile.config
@@ -133,10 +133,10 @@ FEATURE_CHECK_LDFLAGS-libunwind = $(LIBUNWIND_LDFLAGS) $(LIBUNWIND_LIBS)
 FEATURE_CHECK_CFLAGS-libunwind-debug-frame = $(LIBUNWIND_CFLAGS)
 FEATURE_CHECK_LDFLAGS-libunwind-debug-frame = $(LIBUNWIND_LDFLAGS) $(LIBUNWIND_LIBS)
 
-FEATURE_CHECK_LDFLAGS-libunwind-arm = -lunwind -lunwind-arm
-FEATURE_CHECK_LDFLAGS-libunwind-aarch64 = -lunwind -lunwind-aarch64
-FEATURE_CHECK_LDFLAGS-libunwind-x86 = -lunwind -llzma -lunwind-x86
-FEATURE_CHECK_LDFLAGS-libunwind-x86_64 = -lunwind -llzma -lunwind-x86_64
+FEATURE_CHECK_LDFLAGS-libunwind-arm += -lunwind -lunwind-arm
+FEATURE_CHECK_LDFLAGS-libunwind-aarch64 += -lunwind -lunwind-aarch64
+FEATURE_CHECK_LDFLAGS-libunwind-x86 += -lunwind -llzma -lunwind-x86
+FEATURE_CHECK_LDFLAGS-libunwind-x86_64 += -lunwind -llzma -lunwind-x86_64
 
 FEATURE_CHECK_LDFLAGS-libcrypto = -lcrypto
 
@@ -349,10 +349,6 @@ CXXFLAGS += $(INC_FLAGS)
 
 LIBPERF_CFLAGS := $(CORE_CFLAGS) $(EXTRA_CFLAGS)
 
-ifeq ($(feature-sync-compare-and-swap), 1)
-  CFLAGS += -DHAVE_SYNC_COMPARE_AND_SWAP_SUPPORT
-endif
-
 ifeq ($(feature-pthread-attr-setaffinity-np), 1)
   CFLAGS += -DHAVE_PTHREAD_ATTR_SETAFFINITY_NP
 endif
@@ -493,6 +489,8 @@ ifdef CORESIGHT
         CFLAGS += -DCS_RAW_PACKED
       endif
     endif
+  else
+    dummy := $(error Error: No libopencsd library found or the version is not up-to-date. Please install recent libopencsd to build with CORESIGHT=1)
   endif
 endif
 
@@ -829,33 +827,36 @@ else
   endif
 endif
 
-ifeq ($(feature-libbfd), 1)
-  EXTLIBS += -lbfd -lopcodes
-else
-  # we are on a system that requires -liberty and (maybe) -lz
-  # to link against -lbfd; test each case individually here
 
-  # call all detections now so we get correct
-  # status in VF output
-  $(call feature_check,libbfd-liberty)
-  $(call feature_check,libbfd-liberty-z)
-
-  ifeq ($(feature-libbfd-liberty), 1)
-    EXTLIBS += -lbfd -lopcodes -liberty
-    FEATURE_CHECK_LDFLAGS-disassembler-four-args += -liberty -ldl
+ifndef NO_LIBBFD
+  ifeq ($(feature-libbfd), 1)
+    EXTLIBS += -lbfd -lopcodes
   else
-    ifeq ($(feature-libbfd-liberty-z), 1)
-      EXTLIBS += -lbfd -lopcodes -liberty -lz
-      FEATURE_CHECK_LDFLAGS-disassembler-four-args += -liberty -lz -ldl
+    # we are on a system that requires -liberty and (maybe) -lz
+    # to link against -lbfd; test each case individually here
+
+    # call all detections now so we get correct
+    # status in VF output
+    $(call feature_check,libbfd-liberty)
+    $(call feature_check,libbfd-liberty-z)
+
+    ifeq ($(feature-libbfd-liberty), 1)
+      EXTLIBS += -lbfd -lopcodes -liberty
+      FEATURE_CHECK_LDFLAGS-disassembler-four-args += -liberty -ldl
+    else
+      ifeq ($(feature-libbfd-liberty-z), 1)
+        EXTLIBS += -lbfd -lopcodes -liberty -lz
+        FEATURE_CHECK_LDFLAGS-disassembler-four-args += -liberty -lz -ldl
+      endif
     endif
+    $(call feature_check,disassembler-four-args)
   endif
-  $(call feature_check,disassembler-four-args)
-endif
 
-ifeq ($(feature-libbfd-buildid), 1)
-  CFLAGS += -DHAVE_LIBBFD_BUILDID_SUPPORT
-else
-  msg := $(warning Old version of libbfd/binutils things like PE executable profiling will not be available);
+  ifeq ($(feature-libbfd-buildid), 1)
+    CFLAGS += -DHAVE_LIBBFD_BUILDID_SUPPORT
+  else
+    msg := $(warning Old version of libbfd/binutils things like PE executable profiling will not be available);
+  endif
 endif
 
 ifdef NO_DEMANGLE
diff --git a/tools/perf/Makefile.perf b/tools/perf/Makefile.perf
index 77e7f18c0bd0..e04313c4d840 100644
--- a/tools/perf/Makefile.perf
+++ b/tools/perf/Makefile.perf
@@ -360,8 +360,11 @@ ifndef NO_JVMTI
 PROGRAMS += $(OUTPUT)$(LIBJVMTI)
 endif
 
+DLFILTERS := dlfilter-test-api-v0.so
+DLFILTERS := $(patsubst %,$(OUTPUT)dlfilters/%,$(DLFILTERS))
+
 # what 'all' will build and 'install' will install, in perfexecdir
-ALL_PROGRAMS = $(PROGRAMS) $(SCRIPTS)
+ALL_PROGRAMS = $(PROGRAMS) $(SCRIPTS) $(DLFILTERS)
 
 # what 'all' will build but not install in perfexecdir
 OTHER_PROGRAMS = $(OUTPUT)perf
@@ -780,6 +783,13 @@ $(OUTPUT)perf-read-vdsox32: perf-read-vdso.c util/find-map.c
 	$(QUIET_CC)$(CC) -mx32 $(filter -static,$(LDFLAGS)) -Wall -Werror -o $@ perf-read-vdso.c
 endif
 
+$(OUTPUT)dlfilters/%.o: dlfilters/%.c include/perf/perf_dlfilter.h
+	$(Q)$(MKDIR) -p $(OUTPUT)dlfilters
+	$(QUIET_CC)$(CC) -c -Iinclude $(EXTRA_CFLAGS) -o $@ -fpic $<
+
+$(OUTPUT)dlfilters/%.so: $(OUTPUT)dlfilters/%.o
+	$(QUIET_LINK)$(CC) $(EXTRA_CFLAGS) -shared -o $@ $<
+
 ifndef NO_JVMTI
 LIBJVMTI_IN := $(OUTPUT)jvmti/jvmti-in.o
 
@@ -925,7 +935,7 @@ install-tools: all install-gtk
 		$(INSTALL) $(OUTPUT)perf '$(DESTDIR_SQ)$(bindir_SQ)'; \
 		$(LN) '$(DESTDIR_SQ)$(bindir_SQ)/perf' '$(DESTDIR_SQ)$(bindir_SQ)/trace'; \
 		$(INSTALL) -d -m 755 '$(DESTDIR_SQ)$(includedir_SQ)/perf'; \
-		$(INSTALL) util/perf_dlfilter.h -t '$(DESTDIR_SQ)$(includedir_SQ)/perf'
+		$(INSTALL) -m 644 include/perf/perf_dlfilter.h -t '$(DESTDIR_SQ)$(includedir_SQ)/perf'
 ifndef NO_PERF_READ_VDSO32
 	$(call QUIET_INSTALL, perf-read-vdso32) \
 		$(INSTALL) $(OUTPUT)perf-read-vdso32 '$(DESTDIR_SQ)$(bindir_SQ)';
@@ -978,6 +988,9 @@ ifndef NO_LIBPYTHON
 		$(INSTALL) scripts/python/*.py -m 644 -t '$(DESTDIR_SQ)$(perfexec_instdir_SQ)/scripts/python'; \
 		$(INSTALL) scripts/python/bin/* -t '$(DESTDIR_SQ)$(perfexec_instdir_SQ)/scripts/python/bin'
 endif
+	$(call QUIET_INSTALL, dlfilters) \
+		$(INSTALL) -d -m 755 '$(DESTDIR_SQ)$(perfexec_instdir_SQ)/dlfilters'; \
+		$(INSTALL) $(DLFILTERS) '$(DESTDIR_SQ)$(perfexec_instdir_SQ)/dlfilters';
 	$(call QUIET_INSTALL, perf_completion-script) \
 		$(INSTALL) -d -m 755 '$(DESTDIR_SQ)$(sysconfdir_SQ)/bash_completion.d'; \
 		$(INSTALL) perf-completion.sh '$(DESTDIR_SQ)$(sysconfdir_SQ)/bash_completion.d/perf'
diff --git a/tools/perf/arch/arm/util/auxtrace.c b/tools/perf/arch/arm/util/auxtrace.c
index b187bddbd01a..c7c7ec0812d5 100644
--- a/tools/perf/arch/arm/util/auxtrace.c
+++ b/tools/perf/arch/arm/util/auxtrace.c
@@ -107,3 +107,35 @@ struct auxtrace_record
 	*err = 0;
 	return NULL;
 }
+
+#if defined(__arm__)
+u64 compat_auxtrace_mmap__read_head(struct auxtrace_mmap *mm)
+{
+	struct perf_event_mmap_page *pc = mm->userpg;
+	u64 result;
+
+	__asm__ __volatile__(
+"	ldrd    %0, %H0, [%1]"
+	: "=&r" (result)
+	: "r" (&pc->aux_head), "Qo" (pc->aux_head)
+	);
+
+	return result;
+}
+
+int compat_auxtrace_mmap__write_tail(struct auxtrace_mmap *mm, u64 tail)
+{
+	struct perf_event_mmap_page *pc = mm->userpg;
+
+	/* Ensure all reads are done before we write the tail out */
+	smp_mb();
+
+	__asm__ __volatile__(
+"	strd    %2, %H2, [%1]"
+	: "=Qo" (pc->aux_tail)
+	: "r" (&pc->aux_tail), "r" (tail)
+	);
+
+	return 0;
+}
+#endif
diff --git a/tools/perf/arch/arm/util/cs-etm.c b/tools/perf/arch/arm/util/cs-etm.c
index 85168d87b2d7..515aae470e23 100644
--- a/tools/perf/arch/arm/util/cs-etm.c
+++ b/tools/perf/arch/arm/util/cs-etm.c
@@ -47,15 +47,17 @@ static const char *metadata_etmv3_ro[CS_ETM_PRIV_MAX] = {
 	[CS_ETM_ETMIDR]		= "mgmt/etmidr",
 };
 
-static const char *metadata_etmv4_ro[CS_ETMV4_PRIV_MAX] = {
+static const char * const metadata_etmv4_ro[] = {
 	[CS_ETMV4_TRCIDR0]		= "trcidr/trcidr0",
 	[CS_ETMV4_TRCIDR1]		= "trcidr/trcidr1",
 	[CS_ETMV4_TRCIDR2]		= "trcidr/trcidr2",
 	[CS_ETMV4_TRCIDR8]		= "trcidr/trcidr8",
 	[CS_ETMV4_TRCAUTHSTATUS]	= "mgmt/trcauthstatus",
+	[CS_ETE_TRCDEVARCH]		= "mgmt/trcdevarch"
 };
 
 static bool cs_etm_is_etmv4(struct auxtrace_record *itr, int cpu);
+static bool cs_etm_is_ete(struct auxtrace_record *itr, int cpu);
 
 static int cs_etm_set_context_id(struct auxtrace_record *itr,
 				 struct evsel *evsel, int cpu)
@@ -73,7 +75,7 @@ static int cs_etm_set_context_id(struct auxtrace_record *itr,
 	if (!cs_etm_is_etmv4(itr, cpu))
 		goto out;
 
-	/* Get a handle on TRCIRD2 */
+	/* Get a handle on TRCIDR2 */
 	snprintf(path, PATH_MAX, "cpu%d/%s",
 		 cpu, metadata_etmv4_ro[CS_ETMV4_TRCIDR2]);
 	err = perf_pmu__scan_file(cs_etm_pmu, path, "%x", &val);
@@ -533,7 +535,7 @@ cs_etm_info_priv_size(struct auxtrace_record *itr __maybe_unused,
 		      struct evlist *evlist __maybe_unused)
 {
 	int i;
-	int etmv3 = 0, etmv4 = 0;
+	int etmv3 = 0, etmv4 = 0, ete = 0;
 	struct perf_cpu_map *event_cpus = evlist->core.cpus;
 	struct perf_cpu_map *online_cpus = perf_cpu_map__new(NULL);
 
@@ -544,7 +546,9 @@ cs_etm_info_priv_size(struct auxtrace_record *itr __maybe_unused,
 			    !cpu_map__has(online_cpus, i))
 				continue;
 
-			if (cs_etm_is_etmv4(itr, i))
+			if (cs_etm_is_ete(itr, i))
+				ete++;
+			else if (cs_etm_is_etmv4(itr, i))
 				etmv4++;
 			else
 				etmv3++;
@@ -555,7 +559,9 @@ cs_etm_info_priv_size(struct auxtrace_record *itr __maybe_unused,
 			if (!cpu_map__has(online_cpus, i))
 				continue;
 
-			if (cs_etm_is_etmv4(itr, i))
+			if (cs_etm_is_ete(itr, i))
+				ete++;
+			else if (cs_etm_is_etmv4(itr, i))
 				etmv4++;
 			else
 				etmv3++;
@@ -565,6 +571,7 @@ cs_etm_info_priv_size(struct auxtrace_record *itr __maybe_unused,
 	perf_cpu_map__put(online_cpus);
 
 	return (CS_ETM_HEADER_SIZE +
+	       (ete   * CS_ETE_PRIV_SIZE) +
 	       (etmv4 * CS_ETMV4_PRIV_SIZE) +
 	       (etmv3 * CS_ETMV3_PRIV_SIZE));
 }
@@ -607,6 +614,49 @@ static int cs_etm_get_ro(struct perf_pmu *pmu, int cpu, const char *path)
 	return val;
 }
 
+#define TRCDEVARCH_ARCHPART_SHIFT 0
+#define TRCDEVARCH_ARCHPART_MASK  GENMASK(11, 0)
+#define TRCDEVARCH_ARCHPART(x)    (((x) & TRCDEVARCH_ARCHPART_MASK) >> TRCDEVARCH_ARCHPART_SHIFT)
+
+#define TRCDEVARCH_ARCHVER_SHIFT 12
+#define TRCDEVARCH_ARCHVER_MASK  GENMASK(15, 12)
+#define TRCDEVARCH_ARCHVER(x)    (((x) & TRCDEVARCH_ARCHVER_MASK) >> TRCDEVARCH_ARCHVER_SHIFT)
+
+static bool cs_etm_is_ete(struct auxtrace_record *itr, int cpu)
+{
+	struct cs_etm_recording *ptr = container_of(itr, struct cs_etm_recording, itr);
+	struct perf_pmu *cs_etm_pmu = ptr->cs_etm_pmu;
+	int trcdevarch = cs_etm_get_ro(cs_etm_pmu, cpu, metadata_etmv4_ro[CS_ETE_TRCDEVARCH]);
+
+	/*
+	 * ETE if ARCHVER is 5 (ARCHVER is 4 for ETM) and ARCHPART is 0xA13.
+	 * See ETM_DEVARCH_ETE_ARCH in coresight-etm4x.h
+	 */
+	return TRCDEVARCH_ARCHVER(trcdevarch) == 5 && TRCDEVARCH_ARCHPART(trcdevarch) == 0xA13;
+}
+
+static void cs_etm_save_etmv4_header(__u64 data[], struct auxtrace_record *itr, int cpu)
+{
+	struct cs_etm_recording *ptr = container_of(itr, struct cs_etm_recording, itr);
+	struct perf_pmu *cs_etm_pmu = ptr->cs_etm_pmu;
+
+	/* Get trace configuration register */
+	data[CS_ETMV4_TRCCONFIGR] = cs_etmv4_get_config(itr);
+	/* Get traceID from the framework */
+	data[CS_ETMV4_TRCTRACEIDR] = coresight_get_trace_id(cpu);
+	/* Get read-only information from sysFS */
+	data[CS_ETMV4_TRCIDR0] = cs_etm_get_ro(cs_etm_pmu, cpu,
+					       metadata_etmv4_ro[CS_ETMV4_TRCIDR0]);
+	data[CS_ETMV4_TRCIDR1] = cs_etm_get_ro(cs_etm_pmu, cpu,
+					       metadata_etmv4_ro[CS_ETMV4_TRCIDR1]);
+	data[CS_ETMV4_TRCIDR2] = cs_etm_get_ro(cs_etm_pmu, cpu,
+					       metadata_etmv4_ro[CS_ETMV4_TRCIDR2]);
+	data[CS_ETMV4_TRCIDR8] = cs_etm_get_ro(cs_etm_pmu, cpu,
+					       metadata_etmv4_ro[CS_ETMV4_TRCIDR8]);
+	data[CS_ETMV4_TRCAUTHSTATUS] = cs_etm_get_ro(cs_etm_pmu, cpu,
+						     metadata_etmv4_ro[CS_ETMV4_TRCAUTHSTATUS]);
+}
+
 static void cs_etm_get_metadata(int cpu, u32 *offset,
 				struct auxtrace_record *itr,
 				struct perf_record_auxtrace_info *info)
@@ -618,31 +668,20 @@ static void cs_etm_get_metadata(int cpu, u32 *offset,
 	struct perf_pmu *cs_etm_pmu = ptr->cs_etm_pmu;
 
 	/* first see what kind of tracer this cpu is affined to */
-	if (cs_etm_is_etmv4(itr, cpu)) {
-		magic = __perf_cs_etmv4_magic;
-		/* Get trace configuration register */
-		info->priv[*offset + CS_ETMV4_TRCCONFIGR] =
-						cs_etmv4_get_config(itr);
-		/* Get traceID from the framework */
-		info->priv[*offset + CS_ETMV4_TRCTRACEIDR] =
-						coresight_get_trace_id(cpu);
-		/* Get read-only information from sysFS */
-		info->priv[*offset + CS_ETMV4_TRCIDR0] =
-			cs_etm_get_ro(cs_etm_pmu, cpu,
-				      metadata_etmv4_ro[CS_ETMV4_TRCIDR0]);
-		info->priv[*offset + CS_ETMV4_TRCIDR1] =
+	if (cs_etm_is_ete(itr, cpu)) {
+		magic = __perf_cs_ete_magic;
+		/* ETE uses the same registers as ETMv4 plus TRCDEVARCH */
+		cs_etm_save_etmv4_header(&info->priv[*offset], itr, cpu);
+		info->priv[*offset + CS_ETE_TRCDEVARCH] =
 			cs_etm_get_ro(cs_etm_pmu, cpu,
-				      metadata_etmv4_ro[CS_ETMV4_TRCIDR1]);
-		info->priv[*offset + CS_ETMV4_TRCIDR2] =
-			cs_etm_get_ro(cs_etm_pmu, cpu,
-				      metadata_etmv4_ro[CS_ETMV4_TRCIDR2]);
-		info->priv[*offset + CS_ETMV4_TRCIDR8] =
-			cs_etm_get_ro(cs_etm_pmu, cpu,
-				      metadata_etmv4_ro[CS_ETMV4_TRCIDR8]);
-		info->priv[*offset + CS_ETMV4_TRCAUTHSTATUS] =
-			cs_etm_get_ro(cs_etm_pmu, cpu,
-				      metadata_etmv4_ro
-				      [CS_ETMV4_TRCAUTHSTATUS]);
+				      metadata_etmv4_ro[CS_ETE_TRCDEVARCH]);
+
+		/* How much space was used */
+		increment = CS_ETE_PRIV_MAX;
+		nr_trc_params = CS_ETE_PRIV_MAX - CS_ETM_COMMON_BLK_MAX_V1;
+	} else if (cs_etm_is_etmv4(itr, cpu)) {
+		magic = __perf_cs_etmv4_magic;
+		cs_etm_save_etmv4_header(&info->priv[*offset], itr, cpu);
 
 		/* How much space was used */
 		increment = CS_ETMV4_PRIV_MAX;
diff --git a/tools/perf/arch/mips/entry/syscalls/syscall_n64.tbl b/tools/perf/arch/mips/entry/syscalls/syscall_n64.tbl
index ac653d08b1ea..1ca7bc337932 100644
--- a/tools/perf/arch/mips/entry/syscalls/syscall_n64.tbl
+++ b/tools/perf/arch/mips/entry/syscalls/syscall_n64.tbl
@@ -361,3 +361,5 @@
 444	n64	landlock_create_ruleset		sys_landlock_create_ruleset
 445	n64	landlock_add_rule		sys_landlock_add_rule
 446	n64	landlock_restrict_self		sys_landlock_restrict_self
+# 447 reserved for memfd_secret
+448	n64	process_mrelease		sys_process_mrelease
diff --git a/tools/perf/arch/powerpc/entry/syscalls/syscall.tbl b/tools/perf/arch/powerpc/entry/syscalls/syscall.tbl
index 6f3953f2a0d5..7bef917cc84e 100644
--- a/tools/perf/arch/powerpc/entry/syscalls/syscall.tbl
+++ b/tools/perf/arch/powerpc/entry/syscalls/syscall.tbl
@@ -330,10 +330,10 @@
 256	64	sys_debug_setcontext		sys_ni_syscall
 256	spu	sys_debug_setcontext		sys_ni_syscall
 # 257 reserved for vserver
-258	nospu	migrate_pages			sys_migrate_pages		compat_sys_migrate_pages
-259	nospu	mbind				sys_mbind			compat_sys_mbind
-260	nospu	get_mempolicy			sys_get_mempolicy		compat_sys_get_mempolicy
-261	nospu	set_mempolicy			sys_set_mempolicy		compat_sys_set_mempolicy
+258	nospu	migrate_pages			sys_migrate_pages
+259	nospu	mbind				sys_mbind
+260	nospu	get_mempolicy			sys_get_mempolicy
+261	nospu	set_mempolicy			sys_set_mempolicy
 262	nospu	mq_open				sys_mq_open			compat_sys_mq_open
 263	nospu	mq_unlink			sys_mq_unlink
 264	32	mq_timedsend			sys_mq_timedsend_time32
@@ -381,7 +381,7 @@
 298	common	faccessat			sys_faccessat
 299	common	get_robust_list			sys_get_robust_list		compat_sys_get_robust_list
 300	common	set_robust_list			sys_set_robust_list		compat_sys_set_robust_list
-301	common	move_pages			sys_move_pages			compat_sys_move_pages
+301	common	move_pages			sys_move_pages
 302	common	getcpu				sys_getcpu
 303	nospu	epoll_pwait			sys_epoll_pwait			compat_sys_epoll_pwait
 304	32	utimensat			sys_utimensat_time32
@@ -526,3 +526,5 @@
 444	common	landlock_create_ruleset		sys_landlock_create_ruleset
 445	common	landlock_add_rule		sys_landlock_add_rule
 446	common	landlock_restrict_self		sys_landlock_restrict_self
+# 447 reserved for memfd_secret
+448	common	process_mrelease		sys_process_mrelease
diff --git a/tools/perf/arch/s390/entry/syscalls/syscall.tbl b/tools/perf/arch/s390/entry/syscalls/syscall.tbl
index 8d619ec86dcc..df5261e5cfe1 100644
--- a/tools/perf/arch/s390/entry/syscalls/syscall.tbl
+++ b/tools/perf/arch/s390/entry/syscalls/syscall.tbl
@@ -122,7 +122,7 @@
 131  common	quotactl		sys_quotactl			sys_quotactl
 132  common	getpgid			sys_getpgid			sys_getpgid
 133  common	fchdir			sys_fchdir			sys_fchdir
-134  common	bdflush			-				-
+134  common	bdflush			sys_ni_syscall			sys_ni_syscall
 135  common	sysfs			sys_sysfs			sys_sysfs
 136  common	personality		sys_s390_personality		sys_s390_personality
 137  common	afs_syscall		-				-
@@ -274,9 +274,9 @@
 265  common	statfs64		sys_statfs64			compat_sys_statfs64
 266  common	fstatfs64		sys_fstatfs64			compat_sys_fstatfs64
 267  common	remap_file_pages	sys_remap_file_pages		sys_remap_file_pages
-268  common	mbind			sys_mbind			compat_sys_mbind
-269  common	get_mempolicy		sys_get_mempolicy		compat_sys_get_mempolicy
-270  common	set_mempolicy		sys_set_mempolicy		compat_sys_set_mempolicy
+268  common	mbind			sys_mbind			sys_mbind
+269  common	get_mempolicy		sys_get_mempolicy		sys_get_mempolicy
+270  common	set_mempolicy		sys_set_mempolicy		sys_set_mempolicy
 271  common	mq_open			sys_mq_open			compat_sys_mq_open
 272  common	mq_unlink		sys_mq_unlink			sys_mq_unlink
 273  common	mq_timedsend		sys_mq_timedsend		sys_mq_timedsend_time32
@@ -293,7 +293,7 @@
 284  common	inotify_init		sys_inotify_init		sys_inotify_init
 285  common	inotify_add_watch	sys_inotify_add_watch		sys_inotify_add_watch
 286  common	inotify_rm_watch	sys_inotify_rm_watch		sys_inotify_rm_watch
-287  common	migrate_pages		sys_migrate_pages		compat_sys_migrate_pages
+287  common	migrate_pages		sys_migrate_pages		sys_migrate_pages
 288  common	openat			sys_openat			compat_sys_openat
 289  common	mkdirat			sys_mkdirat			sys_mkdirat
 290  common	mknodat			sys_mknodat			sys_mknodat
@@ -317,7 +317,7 @@
 307  common	sync_file_range		sys_sync_file_range		compat_sys_s390_sync_file_range
 308  common	tee			sys_tee				sys_tee
 309  common	vmsplice		sys_vmsplice			sys_vmsplice
-310  common	move_pages		sys_move_pages			compat_sys_move_pages
+310  common	move_pages		sys_move_pages			sys_move_pages
 311  common	getcpu			sys_getcpu			sys_getcpu
 312  common	epoll_pwait		sys_epoll_pwait			compat_sys_epoll_pwait
 313  common	utimes			sys_utimes			sys_utimes_time32
@@ -449,3 +449,5 @@
 444  common	landlock_create_ruleset	sys_landlock_create_ruleset	sys_landlock_create_ruleset
 445  common	landlock_add_rule	sys_landlock_add_rule		sys_landlock_add_rule
 446  common	landlock_restrict_self	sys_landlock_restrict_self	sys_landlock_restrict_self
+# 447 reserved for memfd_secret
+448  common	process_mrelease	sys_process_mrelease		sys_process_mrelease
diff --git a/tools/perf/arch/x86/entry/syscalls/syscall_64.tbl b/tools/perf/arch/x86/entry/syscalls/syscall_64.tbl
index f6b57799c1ea..18b5500ea8bf 100644
--- a/tools/perf/arch/x86/entry/syscalls/syscall_64.tbl
+++ b/tools/perf/arch/x86/entry/syscalls/syscall_64.tbl
@@ -369,6 +369,7 @@
 445	common	landlock_add_rule	sys_landlock_add_rule
 446	common	landlock_restrict_self	sys_landlock_restrict_self
 447	common	memfd_secret		sys_memfd_secret
+448	common	process_mrelease	sys_process_mrelease
 
 #
 # Due to a historical design error, certain syscalls are numbered differently
@@ -397,7 +398,7 @@
 530	x32	set_robust_list		compat_sys_set_robust_list
 531	x32	get_robust_list		compat_sys_get_robust_list
 532	x32	vmsplice		sys_vmsplice
-533	x32	move_pages		compat_sys_move_pages
+533	x32	move_pages		sys_move_pages
 534	x32	preadv			compat_sys_preadv64
 535	x32	pwritev			compat_sys_pwritev64
 536	x32	rt_tgsigqueueinfo	compat_sys_rt_tgsigqueueinfo
diff --git a/tools/perf/arch/x86/util/pmu.c b/tools/perf/arch/x86/util/pmu.c
index d48d608517fd..74d69db1ea99 100644
--- a/tools/perf/arch/x86/util/pmu.c
+++ b/tools/perf/arch/x86/util/pmu.c
@@ -1,12 +1,30 @@
 // SPDX-License-Identifier: GPL-2.0
 #include <string.h>
-
+#include <stdio.h>
+#include <sys/types.h>
+#include <dirent.h>
+#include <fcntl.h>
 #include <linux/stddef.h>
 #include <linux/perf_event.h>
+#include <linux/zalloc.h>
+#include <api/fs/fs.h>
+#include <errno.h>
 
 #include "../../../util/intel-pt.h"
 #include "../../../util/intel-bts.h"
 #include "../../../util/pmu.h"
+#include "../../../util/fncache.h"
+
+#define TEMPLATE_ALIAS	"%s/bus/event_source/devices/%s/alias"
+
+struct pmu_alias {
+	char *name;
+	char *alias;
+	struct list_head list;
+};
+
+static LIST_HEAD(pmu_alias_name_list);
+static bool cached_list;
 
 struct perf_event_attr *perf_pmu__get_default_config(struct perf_pmu *pmu __maybe_unused)
 {
@@ -18,3 +36,138 @@ struct perf_event_attr *perf_pmu__get_default_config(struct perf_pmu *pmu __mayb
 #endif
 	return NULL;
 }
+
+static void pmu_alias__delete(struct pmu_alias *pmu_alias)
+{
+	if (!pmu_alias)
+		return;
+
+	zfree(&pmu_alias->name);
+	zfree(&pmu_alias->alias);
+	free(pmu_alias);
+}
+
+static struct pmu_alias *pmu_alias__new(char *name, char *alias)
+{
+	struct pmu_alias *pmu_alias = zalloc(sizeof(*pmu_alias));
+
+	if (pmu_alias) {
+		pmu_alias->name = strdup(name);
+		if (!pmu_alias->name)
+			goto out_delete;
+
+		pmu_alias->alias = strdup(alias);
+		if (!pmu_alias->alias)
+			goto out_delete;
+	}
+	return pmu_alias;
+
+out_delete:
+	pmu_alias__delete(pmu_alias);
+	return NULL;
+}
+
+static int setup_pmu_alias_list(void)
+{
+	char path[PATH_MAX];
+	DIR *dir;
+	struct dirent *dent;
+	const char *sysfs = sysfs__mountpoint();
+	struct pmu_alias *pmu_alias;
+	char buf[MAX_PMU_NAME_LEN];
+	FILE *file;
+	int ret = -ENOMEM;
+
+	if (!sysfs)
+		return -1;
+
+	snprintf(path, PATH_MAX,
+		 "%s" EVENT_SOURCE_DEVICE_PATH, sysfs);
+
+	dir = opendir(path);
+	if (!dir)
+		return -errno;
+
+	while ((dent = readdir(dir))) {
+		if (!strcmp(dent->d_name, ".") ||
+		    !strcmp(dent->d_name, ".."))
+			continue;
+
+		snprintf(path, PATH_MAX,
+			 TEMPLATE_ALIAS, sysfs, dent->d_name);
+
+		if (!file_available(path))
+			continue;
+
+		file = fopen(path, "r");
+		if (!file)
+			continue;
+
+		if (!fgets(buf, sizeof(buf), file)) {
+			fclose(file);
+			continue;
+		}
+
+		fclose(file);
+
+		/* Remove the last '\n' */
+		buf[strlen(buf) - 1] = 0;
+
+		pmu_alias = pmu_alias__new(dent->d_name, buf);
+		if (!pmu_alias)
+			goto close_dir;
+
+		list_add_tail(&pmu_alias->list, &pmu_alias_name_list);
+	}
+
+	ret = 0;
+
+close_dir:
+	closedir(dir);
+	return ret;
+}
+
+static char *__pmu_find_real_name(const char *name)
+{
+	struct pmu_alias *pmu_alias;
+
+	list_for_each_entry(pmu_alias, &pmu_alias_name_list, list) {
+		if (!strcmp(name, pmu_alias->alias))
+			return pmu_alias->name;
+	}
+
+	return (char *)name;
+}
+
+char *pmu_find_real_name(const char *name)
+{
+	if (cached_list)
+		return __pmu_find_real_name(name);
+
+	setup_pmu_alias_list();
+	cached_list = true;
+
+	return __pmu_find_real_name(name);
+}
+
+static char *__pmu_find_alias_name(const char *name)
+{
+	struct pmu_alias *pmu_alias;
+
+	list_for_each_entry(pmu_alias, &pmu_alias_name_list, list) {
+		if (!strcmp(name, pmu_alias->name))
+			return pmu_alias->alias;
+	}
+	return NULL;
+}
+
+char *pmu_find_alias_name(const char *name)
+{
+	if (cached_list)
+		return __pmu_find_alias_name(name);
+
+	setup_pmu_alias_list();
+	cached_list = true;
+
+	return __pmu_find_alias_name(name);
+}
diff --git a/tools/perf/bench/Build b/tools/perf/bench/Build
index e43f46931b41..61d45fcb4057 100644
--- a/tools/perf/bench/Build
+++ b/tools/perf/bench/Build
@@ -13,6 +13,7 @@ perf-y += synthesize.o
 perf-y += kallsyms-parse.o
 perf-y += find-bit-bench.o
 perf-y += inject-buildid.o
+perf-y += evlist-open-close.o
 
 perf-$(CONFIG_X86_64) += mem-memcpy-x86-64-asm.o
 perf-$(CONFIG_X86_64) += mem-memset-x86-64-asm.o
diff --git a/tools/perf/bench/bench.h b/tools/perf/bench/bench.h
index eac36afab2b3..b3480bc33fe8 100644
--- a/tools/perf/bench/bench.h
+++ b/tools/perf/bench/bench.h
@@ -48,6 +48,7 @@ int bench_epoll_ctl(int argc, const char **argv);
 int bench_synthesize(int argc, const char **argv);
 int bench_kallsyms_parse(int argc, const char **argv);
 int bench_inject_build_id(int argc, const char **argv);
+int bench_evlist_open_close(int argc, const char **argv);
 
 #define BENCH_FORMAT_DEFAULT_STR	"default"
 #define BENCH_FORMAT_DEFAULT		0
diff --git a/tools/perf/bench/evlist-open-close.c b/tools/perf/bench/evlist-open-close.c
new file mode 100644
index 000000000000..83e9897c64a1
--- /dev/null
+++ b/tools/perf/bench/evlist-open-close.c
@@ -0,0 +1,258 @@
+// SPDX-License-Identifier: GPL-2.0
+#include <inttypes.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <limits.h>
+#include "bench.h"
+#include "../util/debug.h"
+#include "../util/stat.h"
+#include "../util/evlist.h"
+#include "../util/evsel.h"
+#include "../util/strbuf.h"
+#include "../util/record.h"
+#include "../util/parse-events.h"
+#include "internal/threadmap.h"
+#include "internal/cpumap.h"
+#include <linux/perf_event.h>
+#include <linux/kernel.h>
+#include <linux/time64.h>
+#include <linux/string.h>
+#include <subcmd/parse-options.h>
+
+#define MMAP_FLUSH_DEFAULT 1
+
+static int iterations = 100;
+static int nr_events = 1;
+static const char *event_string = "dummy";
+
+static struct record_opts opts = {
+	.sample_time	     = true,
+	.mmap_pages	     = UINT_MAX,
+	.user_freq	     = UINT_MAX,
+	.user_interval	     = ULLONG_MAX,
+	.freq		     = 4000,
+	.target		     = {
+		.uses_mmap   = true,
+		.default_per_cpu = true,
+	},
+	.mmap_flush          = MMAP_FLUSH_DEFAULT,
+	.nr_threads_synthesize = 1,
+	.ctl_fd              = -1,
+	.ctl_fd_ack          = -1,
+};
+
+static const struct option options[] = {
+	OPT_STRING('e', "event", &event_string, "event", "event selector. use 'perf list' to list available events"),
+	OPT_INTEGER('n', "nr-events", &nr_events,
+		     "number of dummy events to create (default 1). If used with -e, it clones those events n times (1 = no change)"),
+	OPT_INTEGER('i', "iterations", &iterations, "Number of iterations used to compute average (default=100)"),
+	OPT_BOOLEAN('a', "all-cpus", &opts.target.system_wide, "system-wide collection from all CPUs"),
+	OPT_STRING('C', "cpu", &opts.target.cpu_list, "cpu", "list of cpus where to open events"),
+	OPT_STRING('p', "pid", &opts.target.pid, "pid", "record events on existing process id"),
+	OPT_STRING('t', "tid", &opts.target.tid, "tid", "record events on existing thread id"),
+	OPT_STRING('u', "uid", &opts.target.uid_str, "user", "user to profile"),
+	OPT_BOOLEAN(0, "per-thread", &opts.target.per_thread, "use per-thread mmaps"),
+	OPT_END()
+};
+
+static const char *const bench_usage[] = {
+	"perf bench internals evlist-open-close <options>",
+	NULL
+};
+
+static int evlist__count_evsel_fds(struct evlist *evlist)
+{
+	struct evsel *evsel;
+	int cnt = 0;
+
+	evlist__for_each_entry(evlist, evsel)
+		cnt += evsel->core.threads->nr * evsel->core.cpus->nr;
+
+	return cnt;
+}
+
+static struct evlist *bench__create_evlist(char *evstr)
+{
+	struct parse_events_error err = { .idx = 0, };
+	struct evlist *evlist = evlist__new();
+	int ret;
+
+	if (!evlist) {
+		pr_err("Not enough memory to create evlist\n");
+		return NULL;
+	}
+
+	ret = parse_events(evlist, evstr, &err);
+	if (ret) {
+		parse_events_print_error(&err, evstr);
+		pr_err("Run 'perf list' for a list of valid events\n");
+		ret = 1;
+		goto out_delete_evlist;
+	}
+
+	ret = evlist__create_maps(evlist, &opts.target);
+	if (ret < 0) {
+		pr_err("Not enough memory to create thread/cpu maps\n");
+		goto out_delete_evlist;
+	}
+
+	evlist__config(evlist, &opts, NULL);
+
+	return evlist;
+
+out_delete_evlist:
+	evlist__delete(evlist);
+	return NULL;
+}
+
+static int bench__do_evlist_open_close(struct evlist *evlist)
+{
+	char sbuf[STRERR_BUFSIZE];
+	int err = evlist__open(evlist);
+
+	if (err < 0) {
+		pr_err("evlist__open: %s\n", str_error_r(errno, sbuf, sizeof(sbuf)));
+		return err;
+	}
+
+	err = evlist__mmap(evlist, opts.mmap_pages);
+	if (err < 0) {
+		pr_err("evlist__mmap: %s\n", str_error_r(errno, sbuf, sizeof(sbuf)));
+		return err;
+	}
+
+	evlist__enable(evlist);
+	evlist__disable(evlist);
+	evlist__munmap(evlist);
+	evlist__close(evlist);
+
+	return 0;
+}
+
+static int bench_evlist_open_close__run(char *evstr)
+{
+	// used to print statistics only
+	struct evlist *evlist = bench__create_evlist(evstr);
+	double time_average, time_stddev;
+	struct timeval start, end, diff;
+	struct stats time_stats;
+	u64 runtime_us;
+	int i, err;
+
+	if (!evlist)
+		return -ENOMEM;
+
+	init_stats(&time_stats);
+
+	printf("  Number of cpus:\t%d\n", evlist->core.cpus->nr);
+	printf("  Number of threads:\t%d\n", evlist->core.threads->nr);
+	printf("  Number of events:\t%d (%d fds)\n",
+		evlist->core.nr_entries, evlist__count_evsel_fds(evlist));
+	printf("  Number of iterations:\t%d\n", iterations);
+
+	evlist__delete(evlist);
+
+	for (i = 0; i < iterations; i++) {
+		pr_debug("Started iteration %d\n", i);
+		evlist = bench__create_evlist(evstr);
+		if (!evlist)
+			return -ENOMEM;
+
+		gettimeofday(&start, NULL);
+		err = bench__do_evlist_open_close(evlist);
+		if (err) {
+			evlist__delete(evlist);
+			return err;
+		}
+
+		gettimeofday(&end, NULL);
+		timersub(&end, &start, &diff);
+		runtime_us = diff.tv_sec * USEC_PER_SEC + diff.tv_usec;
+		update_stats(&time_stats, runtime_us);
+
+		evlist__delete(evlist);
+		pr_debug("Iteration %d took:\t%" PRIu64 "us\n", i, runtime_us);
+	}
+
+	time_average = avg_stats(&time_stats);
+	time_stddev = stddev_stats(&time_stats);
+	printf("  Average open-close took: %.3f usec (+- %.3f usec)\n", time_average, time_stddev);
+
+	return 0;
+}
+
+static char *bench__repeat_event_string(const char *evstr, int n)
+{
+	char sbuf[STRERR_BUFSIZE];
+	struct strbuf buf;
+	int i, str_size = strlen(evstr),
+	    final_size = str_size * n + n,
+	    err = strbuf_init(&buf, final_size);
+
+	if (err) {
+		pr_err("strbuf_init: %s\n", str_error_r(err, sbuf, sizeof(sbuf)));
+		goto out_error;
+	}
+
+	for (i = 0; i < n; i++) {
+		err = strbuf_add(&buf, evstr, str_size);
+		if (err) {
+			pr_err("strbuf_add: %s\n", str_error_r(err, sbuf, sizeof(sbuf)));
+			goto out_error;
+		}
+
+		err = strbuf_addch(&buf, i == n-1 ? '\0' : ',');
+		if (err) {
+			pr_err("strbuf_addch: %s\n", str_error_r(err, sbuf, sizeof(sbuf)));
+			goto out_error;
+		}
+	}
+
+	return strbuf_detach(&buf, NULL);
+
+out_error:
+	strbuf_release(&buf);
+	return NULL;
+}
+
+
+int bench_evlist_open_close(int argc, const char **argv)
+{
+	char *evstr, errbuf[BUFSIZ];
+	int err;
+
+	argc = parse_options(argc, argv, options, bench_usage, 0);
+	if (argc) {
+		usage_with_options(bench_usage, options);
+		exit(EXIT_FAILURE);
+	}
+
+	err = target__validate(&opts.target);
+	if (err) {
+		target__strerror(&opts.target, err, errbuf, sizeof(errbuf));
+		pr_err("%s\n", errbuf);
+		goto out;
+	}
+
+	err = target__parse_uid(&opts.target);
+	if (err) {
+		target__strerror(&opts.target, err, errbuf, sizeof(errbuf));
+		pr_err("%s", errbuf);
+		goto out;
+	}
+
+	/* Enable ignoring missing threads when -u/-p option is defined. */
+	opts.ignore_missing_thread = opts.target.uid != UINT_MAX || opts.target.pid;
+
+	evstr = bench__repeat_event_string(event_string, nr_events);
+	if (!evstr) {
+		err = -ENOMEM;
+		goto out;
+	}
+
+	err = bench_evlist_open_close__run(evstr);
+
+	free(evstr);
+out:
+	return err;
+}
diff --git a/tools/perf/bench/find-bit-bench.c b/tools/perf/bench/find-bit-bench.c
index 73b5bcc5946a..22b5cfe97023 100644
--- a/tools/perf/bench/find-bit-bench.c
+++ b/tools/perf/bench/find-bit-bench.c
@@ -54,7 +54,7 @@ static bool asm_test_bit(long nr, const unsigned long *addr)
 
 static int do_for_each_set_bit(unsigned int num_bits)
 {
-	unsigned long *to_test = bitmap_alloc(num_bits);
+	unsigned long *to_test = bitmap_zalloc(num_bits);
 	struct timeval start, end, diff;
 	u64 runtime_us;
 	struct stats fb_time_stats, tb_time_stats;
diff --git a/tools/perf/bench/futex-hash.c b/tools/perf/bench/futex-hash.c
index b65373ce5c4f..fcdea3e44937 100644
--- a/tools/perf/bench/futex-hash.c
+++ b/tools/perf/bench/futex-hash.c
@@ -20,6 +20,7 @@
 #include <linux/kernel.h>
 #include <linux/zalloc.h>
 #include <sys/time.h>
+#include <sys/mman.h>
 #include <perf/cpumap.h>
 
 #include "../util/stat.h"
@@ -29,11 +30,7 @@
 
 #include <err.h>
 
-static unsigned int nthreads = 0;
-static unsigned int nsecs    = 10;
-/* amount of futexes per thread */
-static unsigned int nfutexes = 1024;
-static bool fshared = false, done = false, silent = false;
+static bool done = false;
 static int futex_flag = 0;
 
 struct timeval bench__start, bench__end, bench__runtime;
@@ -49,12 +46,18 @@ struct worker {
 	unsigned long ops;
 };
 
+static struct bench_futex_parameters params = {
+	.nfutexes = 1024,
+	.runtime  = 10,
+};
+
 static const struct option options[] = {
-	OPT_UINTEGER('t', "threads", &nthreads, "Specify amount of threads"),
-	OPT_UINTEGER('r', "runtime", &nsecs,    "Specify runtime (in seconds)"),
-	OPT_UINTEGER('f', "futexes", &nfutexes, "Specify amount of futexes per threads"),
-	OPT_BOOLEAN( 's', "silent",  &silent,   "Silent mode: do not display data/details"),
-	OPT_BOOLEAN( 'S', "shared",  &fshared,  "Use shared futexes instead of private ones"),
+	OPT_UINTEGER('t', "threads", &params.nthreads, "Specify amount of threads"),
+	OPT_UINTEGER('r', "runtime", &params.runtime, "Specify runtime (in seconds)"),
+	OPT_UINTEGER('f', "futexes", &params.nfutexes, "Specify amount of futexes per threads"),
+	OPT_BOOLEAN( 's', "silent",  &params.silent, "Silent mode: do not display data/details"),
+	OPT_BOOLEAN( 'S', "shared",  &params.fshared, "Use shared futexes instead of private ones"),
+	OPT_BOOLEAN( 'm', "mlockall", &params.mlockall, "Lock all current and future memory"),
 	OPT_END()
 };
 
@@ -78,7 +81,7 @@ static void *workerfn(void *arg)
 	pthread_mutex_unlock(&thread_lock);
 
 	do {
-		for (i = 0; i < nfutexes; i++, ops++) {
+		for (i = 0; i < params.nfutexes; i++, ops++) {
 			/*
 			 * We want the futex calls to fail in order to stress
 			 * the hashing of uaddr and not measure other steps,
@@ -86,7 +89,7 @@ static void *workerfn(void *arg)
 			 * the critical region protected by hb->lock.
 			 */
 			ret = futex_wait(&w->futex[i], 1234, NULL, futex_flag);
-			if (!silent &&
+			if (!params.silent &&
 			    (!ret || errno != EAGAIN || errno != EWOULDBLOCK))
 				warn("Non-expected futex return call");
 		}
@@ -112,7 +115,7 @@ static void print_summary(void)
 	double stddev = stddev_stats(&throughput_stats);
 
 	printf("%sAveraged %ld operations/sec (+- %.2f%%), total secs = %d\n",
-	       !silent ? "\n" : "", avg, rel_stddev_stats(stddev, avg),
+	       !params.silent ? "\n" : "", avg, rel_stddev_stats(stddev, avg),
 	       (int)bench__runtime.tv_sec);
 }
 
@@ -141,30 +144,35 @@ int bench_futex_hash(int argc, const char **argv)
 	act.sa_sigaction = toggle_done;
 	sigaction(SIGINT, &act, NULL);
 
-	if (!nthreads) /* default to the number of CPUs */
-		nthreads = cpu->nr;
+	if (params.mlockall) {
+		if (mlockall(MCL_CURRENT | MCL_FUTURE))
+			err(EXIT_FAILURE, "mlockall");
+	}
+
+	if (!params.nthreads) /* default to the number of CPUs */
+		params.nthreads = cpu->nr;
 
-	worker = calloc(nthreads, sizeof(*worker));
+	worker = calloc(params.nthreads, sizeof(*worker));
 	if (!worker)
 		goto errmem;
 
-	if (!fshared)
+	if (!params.fshared)
 		futex_flag = FUTEX_PRIVATE_FLAG;
 
 	printf("Run summary [PID %d]: %d threads, each operating on %d [%s] futexes for %d secs.\n\n",
-	       getpid(), nthreads, nfutexes, fshared ? "shared":"private", nsecs);
+	       getpid(), params.nthreads, params.nfutexes, params.fshared ? "shared":"private", params.runtime);
 
 	init_stats(&throughput_stats);
 	pthread_mutex_init(&thread_lock, NULL);
 	pthread_cond_init(&thread_parent, NULL);
 	pthread_cond_init(&thread_worker, NULL);
 
-	threads_starting = nthreads;
+	threads_starting = params.nthreads;
 	pthread_attr_init(&thread_attr);
 	gettimeofday(&bench__start, NULL);
-	for (i = 0; i < nthreads; i++) {
+	for (i = 0; i < params.nthreads; i++) {
 		worker[i].tid = i;
-		worker[i].futex = calloc(nfutexes, sizeof(*worker[i].futex));
+		worker[i].futex = calloc(params.nfutexes, sizeof(*worker[i].futex));
 		if (!worker[i].futex)
 			goto errmem;
 
@@ -189,10 +197,10 @@ int bench_futex_hash(int argc, const char **argv)
 	pthread_cond_broadcast(&thread_worker);
 	pthread_mutex_unlock(&thread_lock);
 
-	sleep(nsecs);
+	sleep(params.runtime);
 	toggle_done(0, NULL, NULL);
 
-	for (i = 0; i < nthreads; i++) {
+	for (i = 0; i < params.nthreads; i++) {
 		ret = pthread_join(worker[i].thread, NULL);
 		if (ret)
 			err(EXIT_FAILURE, "pthread_join");
@@ -203,18 +211,18 @@ int bench_futex_hash(int argc, const char **argv)
 	pthread_cond_destroy(&thread_worker);
 	pthread_mutex_destroy(&thread_lock);
 
-	for (i = 0; i < nthreads; i++) {
+	for (i = 0; i < params.nthreads; i++) {
 		unsigned long t = bench__runtime.tv_sec > 0 ?
 			worker[i].ops / bench__runtime.tv_sec : 0;
 		update_stats(&throughput_stats, t);
-		if (!silent) {
-			if (nfutexes == 1)
+		if (!params.silent) {
+			if (params.nfutexes == 1)
 				printf("[thread %2d] futex: %p [ %ld ops/sec ]\n",
 				       worker[i].tid, &worker[i].futex[0], t);
 			else
 				printf("[thread %2d] futexes: %p ... %p [ %ld ops/sec ]\n",
 				       worker[i].tid, &worker[i].futex[0],
-				       &worker[i].futex[nfutexes-1], t);
+				       &worker[i].futex[params.nfutexes-1], t);
 		}
 
 		zfree(&worker[i].futex);
diff --git a/tools/perf/bench/futex-lock-pi.c b/tools/perf/bench/futex-lock-pi.c
index 89c6d160379c..5d1fe9c35807 100644
--- a/tools/perf/bench/futex-lock-pi.c
+++ b/tools/perf/bench/futex-lock-pi.c
@@ -21,6 +21,7 @@
 #include <err.h>
 #include <stdlib.h>
 #include <sys/time.h>
+#include <sys/mman.h>
 
 struct worker {
 	int tid;
@@ -31,22 +32,24 @@ struct worker {
 
 static u_int32_t global_futex = 0;
 static struct worker *worker;
-static unsigned int nsecs = 10;
-static bool silent = false, multi = false;
-static bool done = false, fshared = false;
-static unsigned int nthreads = 0;
+static bool done = false;
 static int futex_flag = 0;
 static pthread_mutex_t thread_lock;
 static unsigned int threads_starting;
 static struct stats throughput_stats;
 static pthread_cond_t thread_parent, thread_worker;
 
+static struct bench_futex_parameters params = {
+	.runtime  = 10,
+};
+
 static const struct option options[] = {
-	OPT_UINTEGER('t', "threads",  &nthreads, "Specify amount of threads"),
-	OPT_UINTEGER('r', "runtime", &nsecs,     "Specify runtime (in seconds)"),
-	OPT_BOOLEAN( 'M', "multi",   &multi,     "Use multiple futexes"),
-	OPT_BOOLEAN( 's', "silent",  &silent,    "Silent mode: do not display data/details"),
-	OPT_BOOLEAN( 'S', "shared",  &fshared,   "Use shared futexes instead of private ones"),
+	OPT_UINTEGER('t', "threads", &params.nthreads, "Specify amount of threads"),
+	OPT_UINTEGER('r', "runtime", &params.runtime, "Specify runtime (in seconds)"),
+	OPT_BOOLEAN( 'M', "multi",   &params.multi, "Use multiple futexes"),
+	OPT_BOOLEAN( 's', "silent",  &params.silent, "Silent mode: do not display data/details"),
+	OPT_BOOLEAN( 'S', "shared",  &params.fshared, "Use shared futexes instead of private ones"),
+	OPT_BOOLEAN( 'm', "mlockall", &params.mlockall, "Lock all current and future memory"),
 	OPT_END()
 };
 
@@ -61,7 +64,7 @@ static void print_summary(void)
 	double stddev = stddev_stats(&throughput_stats);
 
 	printf("%sAveraged %ld operations/sec (+- %.2f%%), total secs = %d\n",
-	       !silent ? "\n" : "", avg, rel_stddev_stats(stddev, avg),
+	       !params.silent ? "\n" : "", avg, rel_stddev_stats(stddev, avg),
 	       (int)bench__runtime.tv_sec);
 }
 
@@ -93,7 +96,7 @@ static void *workerfn(void *arg)
 		ret = futex_lock_pi(w->futex, NULL, futex_flag);
 
 		if (ret) { /* handle lock acquisition */
-			if (!silent)
+			if (!params.silent)
 				warn("thread %d: Could not lock pi-lock for %p (%d)",
 				     w->tid, w->futex, ret);
 			if (done)
@@ -104,7 +107,7 @@ static void *workerfn(void *arg)
 
 		usleep(1);
 		ret = futex_unlock_pi(w->futex, futex_flag);
-		if (ret && !silent)
+		if (ret && !params.silent)
 			warn("thread %d: Could not unlock pi-lock for %p (%d)",
 			     w->tid, w->futex, ret);
 		ops++; /* account for thread's share of work */
@@ -120,12 +123,12 @@ static void create_threads(struct worker *w, pthread_attr_t thread_attr,
 	cpu_set_t cpuset;
 	unsigned int i;
 
-	threads_starting = nthreads;
+	threads_starting = params.nthreads;
 
-	for (i = 0; i < nthreads; i++) {
+	for (i = 0; i < params.nthreads; i++) {
 		worker[i].tid = i;
 
-		if (multi) {
+		if (params.multi) {
 			worker[i].futex = calloc(1, sizeof(u_int32_t));
 			if (!worker[i].futex)
 				err(EXIT_FAILURE, "calloc");
@@ -164,25 +167,30 @@ int bench_futex_lock_pi(int argc, const char **argv)
 	act.sa_sigaction = toggle_done;
 	sigaction(SIGINT, &act, NULL);
 
-	if (!nthreads)
-		nthreads = cpu->nr;
+	if (params.mlockall) {
+		if (mlockall(MCL_CURRENT | MCL_FUTURE))
+			err(EXIT_FAILURE, "mlockall");
+	}
+
+	if (!params.nthreads)
+		params.nthreads = cpu->nr;
 
-	worker = calloc(nthreads, sizeof(*worker));
+	worker = calloc(params.nthreads, sizeof(*worker));
 	if (!worker)
 		err(EXIT_FAILURE, "calloc");
 
-	if (!fshared)
+	if (!params.fshared)
 		futex_flag = FUTEX_PRIVATE_FLAG;
 
 	printf("Run summary [PID %d]: %d threads doing pi lock/unlock pairing for %d secs.\n\n",
-	       getpid(), nthreads, nsecs);
+	       getpid(), params.nthreads, params.runtime);
 
 	init_stats(&throughput_stats);
 	pthread_mutex_init(&thread_lock, NULL);
 	pthread_cond_init(&thread_parent, NULL);
 	pthread_cond_init(&thread_worker, NULL);
 
-	threads_starting = nthreads;
+	threads_starting = params.nthreads;
 	pthread_attr_init(&thread_attr);
 	gettimeofday(&bench__start, NULL);
 
@@ -195,10 +203,10 @@ int bench_futex_lock_pi(int argc, const char **argv)
 	pthread_cond_broadcast(&thread_worker);
 	pthread_mutex_unlock(&thread_lock);
 
-	sleep(nsecs);
+	sleep(params.runtime);
 	toggle_done(0, NULL, NULL);
 
-	for (i = 0; i < nthreads; i++) {
+	for (i = 0; i < params.nthreads; i++) {
 		ret = pthread_join(worker[i].thread, NULL);
 		if (ret)
 			err(EXIT_FAILURE, "pthread_join");
@@ -209,16 +217,16 @@ int bench_futex_lock_pi(int argc, const char **argv)
 	pthread_cond_destroy(&thread_worker);
 	pthread_mutex_destroy(&thread_lock);
 
-	for (i = 0; i < nthreads; i++) {
+	for (i = 0; i < params.nthreads; i++) {
 		unsigned long t = bench__runtime.tv_sec > 0 ?
 			worker[i].ops / bench__runtime.tv_sec : 0;
 
 		update_stats(&throughput_stats, t);
-		if (!silent)
+		if (!params.silent)
 			printf("[thread %3d] futex: %p [ %ld ops/sec ]\n",
 			       worker[i].tid, worker[i].futex, t);
 
-		if (multi)
+		if (params.multi)
 			zfree(&worker[i].futex);
 	}
 
diff --git a/tools/perf/bench/futex-requeue.c b/tools/perf/bench/futex-requeue.c
index 5fa23295ee5f..97fe31fd3a23 100644
--- a/tools/perf/bench/futex-requeue.c
+++ b/tools/perf/bench/futex-requeue.c
@@ -6,7 +6,8 @@
  *                on futex2, N at a time.
  *
  * This program is particularly useful to measure the latency of nthread
- * requeues without waking up any tasks -- thus mimicking a regular futex_wait.
+ * requeues without waking up any tasks (in the non-pi case) -- thus
+ * mimicking a regular futex_wait.
  */
 
 /* For the CLR_() macros */
@@ -27,28 +28,35 @@
 #include <err.h>
 #include <stdlib.h>
 #include <sys/time.h>
+#include <sys/mman.h>
 
 static u_int32_t futex1 = 0, futex2 = 0;
 
-/*
- * How many tasks to requeue at a time.
- * Default to 1 in order to make the kernel work more.
- */
-static unsigned int nrequeue = 1;
-
 static pthread_t *worker;
-static bool done = false, silent = false, fshared = false;
+static bool done = false;
 static pthread_mutex_t thread_lock;
 static pthread_cond_t thread_parent, thread_worker;
 static struct stats requeuetime_stats, requeued_stats;
-static unsigned int threads_starting, nthreads = 0;
+static unsigned int threads_starting;
 static int futex_flag = 0;
 
+static struct bench_futex_parameters params = {
+	/*
+	 * How many tasks to requeue at a time.
+	 * Default to 1 in order to make the kernel work more.
+	 */
+	.nrequeue = 1,
+};
+
 static const struct option options[] = {
-	OPT_UINTEGER('t', "threads",  &nthreads, "Specify amount of threads"),
-	OPT_UINTEGER('q', "nrequeue", &nrequeue, "Specify amount of threads to requeue at once"),
-	OPT_BOOLEAN( 's', "silent",   &silent,   "Silent mode: do not display data/details"),
-	OPT_BOOLEAN( 'S', "shared",   &fshared,  "Use shared futexes instead of private ones"),
+	OPT_UINTEGER('t', "threads",  &params.nthreads, "Specify amount of threads"),
+	OPT_UINTEGER('q', "nrequeue", &params.nrequeue, "Specify amount of threads to requeue at once"),
+	OPT_BOOLEAN( 's', "silent",   &params.silent, "Silent mode: do not display data/details"),
+	OPT_BOOLEAN( 'S', "shared",   &params.fshared, "Use shared futexes instead of private ones"),
+	OPT_BOOLEAN( 'm', "mlockall", &params.mlockall, "Lock all current and future memory"),
+	OPT_BOOLEAN( 'B', "broadcast", &params.broadcast, "Requeue all threads at once"),
+	OPT_BOOLEAN( 'p', "pi", &params.pi, "Use PI-aware variants of FUTEX_CMP_REQUEUE"),
+
 	OPT_END()
 };
 
@@ -65,13 +73,15 @@ static void print_summary(void)
 
 	printf("Requeued %d of %d threads in %.4f ms (+-%.2f%%)\n",
 	       requeued_avg,
-	       nthreads,
+	       params.nthreads,
 	       requeuetime_avg / USEC_PER_MSEC,
 	       rel_stddev_stats(requeuetime_stddev, requeuetime_avg));
 }
 
 static void *workerfn(void *arg __maybe_unused)
 {
+	int ret;
+
 	pthread_mutex_lock(&thread_lock);
 	threads_starting--;
 	if (!threads_starting)
@@ -79,7 +89,34 @@ static void *workerfn(void *arg __maybe_unused)
 	pthread_cond_wait(&thread_worker, &thread_lock);
 	pthread_mutex_unlock(&thread_lock);
 
-	futex_wait(&futex1, 0, NULL, futex_flag);
+	while (1) {
+		if (!params.pi) {
+			ret = futex_wait(&futex1, 0, NULL, futex_flag);
+			if (!ret)
+				break;
+
+			if (ret && errno != EAGAIN) {
+				if (!params.silent)
+					warnx("futex_wait");
+				break;
+			}
+		} else {
+			ret = futex_wait_requeue_pi(&futex1, 0, &futex2,
+						    NULL, futex_flag);
+			if (!ret) {
+				/* got the lock at futex2 */
+				futex_unlock_pi(&futex2, futex_flag);
+				break;
+			}
+
+			if (ret && errno != EAGAIN) {
+				if (!params.silent)
+					warnx("futex_wait_requeue_pi");
+				break;
+			}
+		}
+	}
+
 	return NULL;
 }
 
@@ -89,10 +126,10 @@ static void block_threads(pthread_t *w,
 	cpu_set_t cpuset;
 	unsigned int i;
 
-	threads_starting = nthreads;
+	threads_starting = params.nthreads;
 
 	/* create and block all threads */
-	for (i = 0; i < nthreads; i++) {
+	for (i = 0; i < params.nthreads; i++) {
 		CPU_ZERO(&cpuset);
 		CPU_SET(cpu->map[i % cpu->nr], &cpuset);
 
@@ -132,22 +169,31 @@ int bench_futex_requeue(int argc, const char **argv)
 	act.sa_sigaction = toggle_done;
 	sigaction(SIGINT, &act, NULL);
 
-	if (!nthreads)
-		nthreads = cpu->nr;
+	if (params.mlockall) {
+		if (mlockall(MCL_CURRENT | MCL_FUTURE))
+			err(EXIT_FAILURE, "mlockall");
+	}
+
+	if (!params.nthreads)
+		params.nthreads = cpu->nr;
 
-	worker = calloc(nthreads, sizeof(*worker));
+	worker = calloc(params.nthreads, sizeof(*worker));
 	if (!worker)
 		err(EXIT_FAILURE, "calloc");
 
-	if (!fshared)
+	if (!params.fshared)
 		futex_flag = FUTEX_PRIVATE_FLAG;
 
-	if (nrequeue > nthreads)
-		nrequeue = nthreads;
+	if (params.nrequeue > params.nthreads)
+		params.nrequeue = params.nthreads;
 
-	printf("Run summary [PID %d]: Requeuing %d threads (from [%s] %p to %p), "
-	       "%d at a time.\n\n",  getpid(), nthreads,
-	       fshared ? "shared":"private", &futex1, &futex2, nrequeue);
+	if (params.broadcast)
+		params.nrequeue = params.nthreads;
+
+	printf("Run summary [PID %d]: Requeuing %d threads (from [%s] %p to %s%p), "
+	       "%d at a time.\n\n",  getpid(), params.nthreads,
+	       params.fshared ? "shared":"private", &futex1,
+	       params.pi ? "PI ": "", &futex2, params.nrequeue);
 
 	init_stats(&requeued_stats);
 	init_stats(&requeuetime_stats);
@@ -157,7 +203,7 @@ int bench_futex_requeue(int argc, const char **argv)
 	pthread_cond_init(&thread_worker, NULL);
 
 	for (j = 0; j < bench_repeat && !done; j++) {
-		unsigned int nrequeued = 0;
+		unsigned int nrequeued = 0, wakeups = 0;
 		struct timeval start, end, runtime;
 
 		/* create, launch & block all threads */
@@ -174,13 +220,31 @@ int bench_futex_requeue(int argc, const char **argv)
 
 		/* Ok, all threads are patiently blocked, start requeueing */
 		gettimeofday(&start, NULL);
-		while (nrequeued < nthreads) {
+		while (nrequeued < params.nthreads) {
+			int r;
+
 			/*
-			 * Do not wakeup any tasks blocked on futex1, allowing
-			 * us to really measure futex_wait functionality.
+			 * For the regular non-pi case, do not wakeup any tasks
+			 * blocked on futex1, allowing us to really measure
+			 * futex_wait functionality. For the PI case the first
+			 * waiter is always awoken.
 			 */
-			nrequeued += futex_cmp_requeue(&futex1, 0, &futex2, 0,
-						       nrequeue, futex_flag);
+			if (!params.pi) {
+				r = futex_cmp_requeue(&futex1, 0, &futex2, 0,
+						      params.nrequeue,
+						      futex_flag);
+			} else {
+				r = futex_cmp_requeue_pi(&futex1, 0, &futex2,
+							 params.nrequeue,
+							 futex_flag);
+				wakeups++; /* assume no error */
+			}
+
+			if (r < 0)
+				err(EXIT_FAILURE, "couldn't requeue from %p to %p",
+				    &futex1, &futex2);
+
+			nrequeued += r;
 		}
 
 		gettimeofday(&end, NULL);
@@ -189,17 +253,32 @@ int bench_futex_requeue(int argc, const char **argv)
 		update_stats(&requeued_stats, nrequeued);
 		update_stats(&requeuetime_stats, runtime.tv_usec);
 
-		if (!silent) {
-			printf("[Run %d]: Requeued %d of %d threads in %.4f ms\n",
-			       j + 1, nrequeued, nthreads, runtime.tv_usec / (double)USEC_PER_MSEC);
+		if (!params.silent) {
+			if (!params.pi)
+				printf("[Run %d]: Requeued %d of %d threads in "
+				       "%.4f ms\n", j + 1, nrequeued,
+				       params.nthreads,
+				       runtime.tv_usec / (double)USEC_PER_MSEC);
+			else {
+				nrequeued -= wakeups;
+				printf("[Run %d]: Awoke and Requeued (%d+%d) of "
+				       "%d threads in %.4f ms\n",
+				       j + 1, wakeups, nrequeued,
+				       params.nthreads,
+				       runtime.tv_usec / (double)USEC_PER_MSEC);
+			}
+
 		}
 
-		/* everybody should be blocked on futex2, wake'em up */
-		nrequeued = futex_wake(&futex2, nrequeued, futex_flag);
-		if (nthreads != nrequeued)
-			warnx("couldn't wakeup all tasks (%d/%d)", nrequeued, nthreads);
+		if (!params.pi) {
+			/* everybody should be blocked on futex2, wake'em up */
+			nrequeued = futex_wake(&futex2, nrequeued, futex_flag);
+			if (params.nthreads != nrequeued)
+				warnx("couldn't wakeup all tasks (%d/%d)",
+				      nrequeued, params.nthreads);
+		}
 
-		for (i = 0; i < nthreads; i++) {
+		for (i = 0; i < params.nthreads; i++) {
 			ret = pthread_join(worker[i], NULL);
 			if (ret)
 				err(EXIT_FAILURE, "pthread_join");
diff --git a/tools/perf/bench/futex-wake-parallel.c b/tools/perf/bench/futex-wake-parallel.c
index 6e6f5247e1fe..e970e6b9ad53 100644
--- a/tools/perf/bench/futex-wake-parallel.c
+++ b/tools/perf/bench/futex-wake-parallel.c
@@ -34,6 +34,7 @@ int bench_futex_wake_parallel(int argc __maybe_unused, const char **argv __maybe
 #include <err.h>
 #include <stdlib.h>
 #include <sys/time.h>
+#include <sys/mman.h>
 
 struct thread_data {
 	pthread_t worker;
@@ -47,8 +48,7 @@ static unsigned int nwakes = 1;
 static u_int32_t futex = 0;
 
 static pthread_t *blocked_worker;
-static bool done = false, silent = false, fshared = false;
-static unsigned int nblocked_threads = 0, nwaking_threads = 0;
+static bool done = false;
 static pthread_mutex_t thread_lock;
 static pthread_cond_t thread_parent, thread_worker;
 static pthread_barrier_t barrier;
@@ -56,11 +56,15 @@ static struct stats waketime_stats, wakeup_stats;
 static unsigned int threads_starting;
 static int futex_flag = 0;
 
+static struct bench_futex_parameters params;
+
 static const struct option options[] = {
-	OPT_UINTEGER('t', "threads", &nblocked_threads, "Specify amount of threads"),
-	OPT_UINTEGER('w', "nwakers", &nwaking_threads, "Specify amount of waking threads"),
-	OPT_BOOLEAN( 's', "silent",  &silent,   "Silent mode: do not display data/details"),
-	OPT_BOOLEAN( 'S', "shared",  &fshared,  "Use shared futexes instead of private ones"),
+	OPT_UINTEGER('t', "threads", &params.nthreads, "Specify amount of threads"),
+	OPT_UINTEGER('w', "nwakers", &params.nwakes, "Specify amount of waking threads"),
+	OPT_BOOLEAN( 's', "silent",  &params.silent, "Silent mode: do not display data/details"),
+	OPT_BOOLEAN( 'S', "shared",  &params.fshared, "Use shared futexes instead of private ones"),
+	OPT_BOOLEAN( 'm', "mlockall", &params.mlockall, "Lock all current and future memory"),
+
 	OPT_END()
 };
 
@@ -96,10 +100,10 @@ static void wakeup_threads(struct thread_data *td, pthread_attr_t thread_attr)
 
 	pthread_attr_setdetachstate(&thread_attr, PTHREAD_CREATE_JOINABLE);
 
-	pthread_barrier_init(&barrier, NULL, nwaking_threads + 1);
+	pthread_barrier_init(&barrier, NULL, params.nwakes + 1);
 
 	/* create and block all threads */
-	for (i = 0; i < nwaking_threads; i++) {
+	for (i = 0; i < params.nwakes; i++) {
 		/*
 		 * Thread creation order will impact per-thread latency
 		 * as it will affect the order to acquire the hb spinlock.
@@ -112,7 +116,7 @@ static void wakeup_threads(struct thread_data *td, pthread_attr_t thread_attr)
 
 	pthread_barrier_wait(&barrier);
 
-	for (i = 0; i < nwaking_threads; i++)
+	for (i = 0; i < params.nwakes; i++)
 		if (pthread_join(td[i].worker, NULL))
 			err(EXIT_FAILURE, "pthread_join");
 
@@ -143,10 +147,10 @@ static void block_threads(pthread_t *w, pthread_attr_t thread_attr,
 	cpu_set_t cpuset;
 	unsigned int i;
 
-	threads_starting = nblocked_threads;
+	threads_starting = params.nthreads;
 
 	/* create and block all threads */
-	for (i = 0; i < nblocked_threads; i++) {
+	for (i = 0; i < params.nthreads; i++) {
 		CPU_ZERO(&cpuset);
 		CPU_SET(cpu->map[i % cpu->nr], &cpuset);
 
@@ -167,7 +171,7 @@ static void print_run(struct thread_data *waking_worker, unsigned int run_num)
 	init_stats(&__wakeup_stats);
 	init_stats(&__waketime_stats);
 
-	for (i = 0; i < nwaking_threads; i++) {
+	for (i = 0; i < params.nwakes; i++) {
 		update_stats(&__waketime_stats, waking_worker[i].runtime.tv_usec);
 		update_stats(&__wakeup_stats, waking_worker[i].nwoken);
 	}
@@ -178,7 +182,7 @@ static void print_run(struct thread_data *waking_worker, unsigned int run_num)
 
 	printf("[Run %d]: Avg per-thread latency (waking %d/%d threads) "
 	       "in %.4f ms (+-%.2f%%)\n", run_num + 1, wakeup_avg,
-	       nblocked_threads, waketime_avg / USEC_PER_MSEC,
+	       params.nthreads, waketime_avg / USEC_PER_MSEC,
 	       rel_stddev_stats(waketime_stddev, waketime_avg));
 }
 
@@ -193,7 +197,7 @@ static void print_summary(void)
 
 	printf("Avg per-thread latency (waking %d/%d threads) in %.4f ms (+-%.2f%%)\n",
 	       wakeup_avg,
-	       nblocked_threads,
+	       params.nthreads,
 	       waketime_avg / USEC_PER_MSEC,
 	       rel_stddev_stats(waketime_stddev, waketime_avg));
 }
@@ -203,7 +207,7 @@ static void do_run_stats(struct thread_data *waking_worker)
 {
 	unsigned int i;
 
-	for (i = 0; i < nwaking_threads; i++) {
+	for (i = 0; i < params.nwakes; i++) {
 		update_stats(&waketime_stats, waking_worker[i].runtime.tv_usec);
 		update_stats(&wakeup_stats, waking_worker[i].nwoken);
 	}
@@ -238,36 +242,42 @@ int bench_futex_wake_parallel(int argc, const char **argv)
 	act.sa_sigaction = toggle_done;
 	sigaction(SIGINT, &act, NULL);
 
+	if (params.mlockall) {
+		if (mlockall(MCL_CURRENT | MCL_FUTURE))
+			err(EXIT_FAILURE, "mlockall");
+	}
+
 	cpu = perf_cpu_map__new(NULL);
 	if (!cpu)
 		err(EXIT_FAILURE, "calloc");
 
-	if (!nblocked_threads)
-		nblocked_threads = cpu->nr;
+	if (!params.nthreads)
+		params.nthreads = cpu->nr;
 
 	/* some sanity checks */
-	if (nwaking_threads > nblocked_threads || !nwaking_threads)
-		nwaking_threads = nblocked_threads;
+	if (params.nwakes > params.nthreads ||
+	    !params.nwakes)
+		params.nwakes = params.nthreads;
 
-	if (nblocked_threads % nwaking_threads)
+	if (params.nthreads % params.nwakes)
 		errx(EXIT_FAILURE, "Must be perfectly divisible");
 	/*
 	 * Each thread will wakeup nwakes tasks in
 	 * a single futex_wait call.
 	 */
-	nwakes = nblocked_threads/nwaking_threads;
+	nwakes = params.nthreads/params.nwakes;
 
-	blocked_worker = calloc(nblocked_threads, sizeof(*blocked_worker));
+	blocked_worker = calloc(params.nthreads, sizeof(*blocked_worker));
 	if (!blocked_worker)
 		err(EXIT_FAILURE, "calloc");
 
-	if (!fshared)
+	if (!params.fshared)
 		futex_flag = FUTEX_PRIVATE_FLAG;
 
 	printf("Run summary [PID %d]: blocking on %d threads (at [%s] "
 	       "futex %p), %d threads waking up %d at a time.\n\n",
-	       getpid(), nblocked_threads, fshared ? "shared":"private",
-	       &futex, nwaking_threads, nwakes);
+	       getpid(), params.nthreads, params.fshared ? "shared":"private",
+	       &futex, params.nwakes, nwakes);
 
 	init_stats(&wakeup_stats);
 	init_stats(&waketime_stats);
@@ -278,7 +288,7 @@ int bench_futex_wake_parallel(int argc, const char **argv)
 	pthread_cond_init(&thread_worker, NULL);
 
 	for (j = 0; j < bench_repeat && !done; j++) {
-		waking_worker = calloc(nwaking_threads, sizeof(*waking_worker));
+		waking_worker = calloc(params.nwakes, sizeof(*waking_worker));
 		if (!waking_worker)
 			err(EXIT_FAILURE, "calloc");
 
@@ -297,14 +307,14 @@ int bench_futex_wake_parallel(int argc, const char **argv)
 		/* Ok, all threads are patiently blocked, start waking folks up */
 		wakeup_threads(waking_worker, thread_attr);
 
-		for (i = 0; i < nblocked_threads; i++) {
+		for (i = 0; i < params.nthreads; i++) {
 			ret = pthread_join(blocked_worker[i], NULL);
 			if (ret)
 				err(EXIT_FAILURE, "pthread_join");
 		}
 
 		do_run_stats(waking_worker);
-		if (!silent)
+		if (!params.silent)
 			print_run(waking_worker, j);
 
 		free(waking_worker);
diff --git a/tools/perf/bench/futex-wake.c b/tools/perf/bench/futex-wake.c
index 6d217868f53c..77f058a47790 100644
--- a/tools/perf/bench/futex-wake.c
+++ b/tools/perf/bench/futex-wake.c
@@ -27,29 +27,34 @@
 #include <err.h>
 #include <stdlib.h>
 #include <sys/time.h>
+#include <sys/mman.h>
 
 /* all threads will block on the same futex */
 static u_int32_t futex1 = 0;
 
-/*
- * How many wakeups to do at a time.
- * Default to 1 in order to make the kernel work more.
- */
-static unsigned int nwakes = 1;
-
-pthread_t *worker;
-static bool done = false, silent = false, fshared = false;
+static pthread_t *worker;
+static bool done = false;
 static pthread_mutex_t thread_lock;
 static pthread_cond_t thread_parent, thread_worker;
 static struct stats waketime_stats, wakeup_stats;
-static unsigned int threads_starting, nthreads = 0;
+static unsigned int threads_starting;
 static int futex_flag = 0;
 
+static struct bench_futex_parameters params = {
+	/*
+	 * How many wakeups to do at a time.
+	 * Default to 1 in order to make the kernel work more.
+	 */
+	.nwakes  = 1,
+};
+
 static const struct option options[] = {
-	OPT_UINTEGER('t', "threads", &nthreads, "Specify amount of threads"),
-	OPT_UINTEGER('w', "nwakes",  &nwakes,   "Specify amount of threads to wake at once"),
-	OPT_BOOLEAN( 's', "silent",  &silent,   "Silent mode: do not display data/details"),
-	OPT_BOOLEAN( 'S', "shared",  &fshared,  "Use shared futexes instead of private ones"),
+	OPT_UINTEGER('t', "threads", &params.nthreads, "Specify amount of threads"),
+	OPT_UINTEGER('w', "nwakes",  &params.nwakes, "Specify amount of threads to wake at once"),
+	OPT_BOOLEAN( 's', "silent",  &params.silent, "Silent mode: do not display data/details"),
+	OPT_BOOLEAN( 'S', "shared",  &params.fshared, "Use shared futexes instead of private ones"),
+	OPT_BOOLEAN( 'm', "mlockall", &params.mlockall, "Lock all current and future memory"),
+
 	OPT_END()
 };
 
@@ -84,7 +89,7 @@ static void print_summary(void)
 
 	printf("Wokeup %d of %d threads in %.4f ms (+-%.2f%%)\n",
 	       wakeup_avg,
-	       nthreads,
+	       params.nthreads,
 	       waketime_avg / USEC_PER_MSEC,
 	       rel_stddev_stats(waketime_stddev, waketime_avg));
 }
@@ -95,10 +100,10 @@ static void block_threads(pthread_t *w,
 	cpu_set_t cpuset;
 	unsigned int i;
 
-	threads_starting = nthreads;
+	threads_starting = params.nthreads;
 
 	/* create and block all threads */
-	for (i = 0; i < nthreads; i++) {
+	for (i = 0; i < params.nthreads; i++) {
 		CPU_ZERO(&cpuset);
 		CPU_SET(cpu->map[i % cpu->nr], &cpuset);
 
@@ -140,19 +145,25 @@ int bench_futex_wake(int argc, const char **argv)
 	act.sa_sigaction = toggle_done;
 	sigaction(SIGINT, &act, NULL);
 
-	if (!nthreads)
-		nthreads = cpu->nr;
+	if (params.mlockall) {
+		if (mlockall(MCL_CURRENT | MCL_FUTURE))
+			err(EXIT_FAILURE, "mlockall");
+	}
+
+	if (!params.nthreads)
+		params.nthreads = cpu->nr;
 
-	worker = calloc(nthreads, sizeof(*worker));
+	worker = calloc(params.nthreads, sizeof(*worker));
 	if (!worker)
 		err(EXIT_FAILURE, "calloc");
 
-	if (!fshared)
+	if (!params.fshared)
 		futex_flag = FUTEX_PRIVATE_FLAG;
 
 	printf("Run summary [PID %d]: blocking on %d threads (at [%s] futex %p), "
 	       "waking up %d at a time.\n\n",
-	       getpid(), nthreads, fshared ? "shared":"private",  &futex1, nwakes);
+	       getpid(), params.nthreads, params.fshared ? "shared":"private",
+	       &futex1, params.nwakes);
 
 	init_stats(&wakeup_stats);
 	init_stats(&waketime_stats);
@@ -179,20 +190,22 @@ int bench_futex_wake(int argc, const char **argv)
 
 		/* Ok, all threads are patiently blocked, start waking folks up */
 		gettimeofday(&start, NULL);
-		while (nwoken != nthreads)
-			nwoken += futex_wake(&futex1, nwakes, futex_flag);
+		while (nwoken != params.nthreads)
+			nwoken += futex_wake(&futex1,
+					     params.nwakes, futex_flag);
 		gettimeofday(&end, NULL);
 		timersub(&end, &start, &runtime);
 
 		update_stats(&wakeup_stats, nwoken);
 		update_stats(&waketime_stats, runtime.tv_usec);
 
-		if (!silent) {
+		if (!params.silent) {
 			printf("[Run %d]: Wokeup %d of %d threads in %.4f ms\n",
-			       j + 1, nwoken, nthreads, runtime.tv_usec / (double)USEC_PER_MSEC);
+			       j + 1, nwoken, params.nthreads,
+			       runtime.tv_usec / (double)USEC_PER_MSEC);
 		}
 
-		for (i = 0; i < nthreads; i++) {
+		for (i = 0; i < params.nthreads; i++) {
 			ret = pthread_join(worker[i], NULL);
 			if (ret)
 				err(EXIT_FAILURE, "pthread_join");
diff --git a/tools/perf/bench/futex.h b/tools/perf/bench/futex.h
index 31b53cc7d5bc..b3853aac3021 100644
--- a/tools/perf/bench/futex.h
+++ b/tools/perf/bench/futex.h
@@ -13,6 +13,20 @@
 #include <sys/types.h>
 #include <linux/futex.h>
 
+struct bench_futex_parameters {
+	bool silent;
+	bool fshared;
+	bool mlockall;
+	bool multi; /* lock-pi */
+	bool pi; /* requeue-pi */
+	bool broadcast; /* requeue */
+	unsigned int runtime; /* seconds*/
+	unsigned int nthreads;
+	unsigned int nfutexes;
+	unsigned int nwakes;
+	unsigned int nrequeue;
+};
+
 /**
  * futex() - SYS_futex syscall wrapper
  * @uaddr:	address of first futex
@@ -20,7 +34,7 @@
  * @val:	typically expected value of uaddr, but varies by op
  * @timeout:	typically an absolute struct timespec (except where noted
  *		otherwise). Overloaded by some ops
- * @uaddr2:	address of second futex for some ops\
+ * @uaddr2:	address of second futex for some ops
  * @val3:	varies by op
  * @opflags:	flags to be bitwise OR'd with op, such as FUTEX_PRIVATE_FLAG
  *
@@ -77,7 +91,7 @@ futex_unlock_pi(u_int32_t *uaddr, int opflags)
 /**
 * futex_cmp_requeue() - requeue tasks from uaddr to uaddr2
 * @nr_wake:        wake up to this many tasks
-* @nr_requeue:        requeue up to this many tasks
+* @nr_requeue:     requeue up to this many tasks
 */
 static inline int
 futex_cmp_requeue(u_int32_t *uaddr, u_int32_t val, u_int32_t *uaddr2, int nr_wake,
@@ -86,4 +100,38 @@ futex_cmp_requeue(u_int32_t *uaddr, u_int32_t val, u_int32_t *uaddr2, int nr_wak
 	return futex(uaddr, FUTEX_CMP_REQUEUE, nr_wake, nr_requeue, uaddr2,
 		 val, opflags);
 }
+
+/**
+ * futex_wait_requeue_pi() - block on uaddr and prepare to requeue to uaddr2
+ * @uaddr:	non-PI futex source
+ * @uaddr2:	PI futex target
+ *
+ * This is the first half of the requeue_pi mechanism. It shall always be
+ * paired with futex_cmp_requeue_pi().
+ */
+static inline int
+futex_wait_requeue_pi(u_int32_t *uaddr, u_int32_t val, u_int32_t *uaddr2,
+		      struct timespec *timeout, int opflags)
+{
+	return futex(uaddr, FUTEX_WAIT_REQUEUE_PI, val, timeout, uaddr2, 0,
+		     opflags);
+}
+
+/**
+ * futex_cmp_requeue_pi() - requeue tasks from uaddr to uaddr2
+ * @uaddr:	non-PI futex source
+ * @uaddr2:	PI futex target
+ * @nr_requeue:	requeue up to this many tasks
+ *
+ * This is the second half of the requeue_pi mechanism. It shall always be
+ * paired with futex_wait_requeue_pi(). The first waker is always awoken.
+ */
+static inline int
+futex_cmp_requeue_pi(u_int32_t *uaddr, u_int32_t val, u_int32_t *uaddr2,
+		     int nr_requeue, int opflags)
+{
+	return futex(uaddr, FUTEX_CMP_REQUEUE_PI, 1, nr_requeue, uaddr2,
+		     val, opflags);
+}
+
 #endif /* _FUTEX_H */
diff --git a/tools/perf/bench/inject-buildid.c b/tools/perf/bench/inject-buildid.c
index 55d373b75791..17672790f123 100644
--- a/tools/perf/bench/inject-buildid.c
+++ b/tools/perf/bench/inject-buildid.c
@@ -133,7 +133,7 @@ static u64 dso_map_addr(struct bench_dso *dso)
 	return 0x400000ULL + dso->ino * 8192ULL;
 }
 
-static u32 synthesize_attr(struct bench_data *data)
+static ssize_t synthesize_attr(struct bench_data *data)
 {
 	union perf_event event;
 
@@ -151,7 +151,7 @@ static u32 synthesize_attr(struct bench_data *data)
 	return writen(data->input_pipe[1], &event, event.header.size);
 }
 
-static u32 synthesize_fork(struct bench_data *data)
+static ssize_t synthesize_fork(struct bench_data *data)
 {
 	union perf_event event;
 
@@ -169,8 +169,7 @@ static u32 synthesize_fork(struct bench_data *data)
 	return writen(data->input_pipe[1], &event, event.header.size);
 }
 
-static u32 synthesize_mmap(struct bench_data *data, struct bench_dso *dso,
-			   u64 timestamp)
+static ssize_t synthesize_mmap(struct bench_data *data, struct bench_dso *dso, u64 timestamp)
 {
 	union perf_event event;
 	size_t len = offsetof(struct perf_record_mmap2, filename);
@@ -198,23 +197,25 @@ static u32 synthesize_mmap(struct bench_data *data, struct bench_dso *dso,
 
 	if (len > sizeof(event.mmap2)) {
 		/* write mmap2 event first */
-		writen(data->input_pipe[1], &event, len - bench_id_hdr_size);
+		if (writen(data->input_pipe[1], &event, len - bench_id_hdr_size) < 0)
+			return -1;
 		/* zero-fill sample id header */
 		memset(id_hdr_ptr, 0, bench_id_hdr_size);
 		/* put timestamp in the right position */
 		ts_idx = (bench_id_hdr_size / sizeof(u64)) - 2;
 		id_hdr_ptr[ts_idx] = timestamp;
-		writen(data->input_pipe[1], id_hdr_ptr, bench_id_hdr_size);
-	} else {
-		ts_idx = (len / sizeof(u64)) - 2;
-		id_hdr_ptr[ts_idx] = timestamp;
-		writen(data->input_pipe[1], &event, len);
+		if (writen(data->input_pipe[1], id_hdr_ptr, bench_id_hdr_size) < 0)
+			return -1;
+
+		return len;
 	}
-	return len;
+
+	ts_idx = (len / sizeof(u64)) - 2;
+	id_hdr_ptr[ts_idx] = timestamp;
+	return writen(data->input_pipe[1], &event, len);
 }
 
-static u32 synthesize_sample(struct bench_data *data, struct bench_dso *dso,
-			     u64 timestamp)
+static ssize_t synthesize_sample(struct bench_data *data, struct bench_dso *dso, u64 timestamp)
 {
 	union perf_event event;
 	struct perf_sample sample = {
@@ -233,7 +234,7 @@ static u32 synthesize_sample(struct bench_data *data, struct bench_dso *dso,
 	return writen(data->input_pipe[1], &event, event.header.size);
 }
 
-static u32 synthesize_flush(struct bench_data *data)
+static ssize_t synthesize_flush(struct bench_data *data)
 {
 	struct perf_event_header header = {
 		.size = sizeof(header),
@@ -348,14 +349,16 @@ static int inject_build_id(struct bench_data *data, u64 *max_rss)
 	int status;
 	unsigned int i, k;
 	struct rusage rusage;
-	u64 len = 0;
 
 	/* this makes the child to run */
 	if (perf_header__write_pipe(data->input_pipe[1]) < 0)
 		return -1;
 
-	len += synthesize_attr(data);
-	len += synthesize_fork(data);
+	if (synthesize_attr(data) < 0)
+		return -1;
+
+	if (synthesize_fork(data) < 0)
+		return -1;
 
 	for (i = 0; i < nr_mmaps; i++) {
 		int idx = rand() % (nr_dsos - 1);
@@ -363,13 +366,18 @@ static int inject_build_id(struct bench_data *data, u64 *max_rss)
 		u64 timestamp = rand() % 1000000;
 
 		pr_debug2("   [%d] injecting: %s\n", i+1, dso->name);
-		len += synthesize_mmap(data, dso, timestamp);
+		if (synthesize_mmap(data, dso, timestamp) < 0)
+			return -1;
 
-		for (k = 0; k < nr_samples; k++)
-			len += synthesize_sample(data, dso, timestamp + k * 1000);
+		for (k = 0; k < nr_samples; k++) {
+			if (synthesize_sample(data, dso, timestamp + k * 1000) < 0)
+				return -1;
+		}
 
-		if ((i + 1) % 10 == 0)
-			len += synthesize_flush(data);
+		if ((i + 1) % 10 == 0) {
+			if (synthesize_flush(data) < 0)
+				return -1;
+		}
 	}
 
 	/* this makes the child to finish */
diff --git a/tools/perf/bench/synthesize.c b/tools/perf/bench/synthesize.c
index b2924e3181dc..05f7c923c745 100644
--- a/tools/perf/bench/synthesize.c
+++ b/tools/perf/bench/synthesize.c
@@ -117,7 +117,7 @@ static int run_single_threaded(void)
 	int err;
 
 	perf_set_singlethreaded();
-	session = perf_session__new(NULL, false, NULL);
+	session = perf_session__new(NULL, NULL);
 	if (IS_ERR(session)) {
 		pr_err("Session creation failed.\n");
 		return PTR_ERR(session);
@@ -161,7 +161,7 @@ static int do_run_multi_threaded(struct target *target,
 	init_stats(&time_stats);
 	init_stats(&event_stats);
 	for (i = 0; i < multi_iterations; i++) {
-		session = perf_session__new(NULL, false, NULL);
+		session = perf_session__new(NULL, NULL);
 		if (IS_ERR(session))
 			return PTR_ERR(session);
 
diff --git a/tools/perf/builtin-annotate.c b/tools/perf/builtin-annotate.c
index cebb861be3e3..05eb098cb0e3 100644
--- a/tools/perf/builtin-annotate.c
+++ b/tools/perf/builtin-annotate.c
@@ -596,7 +596,7 @@ int cmd_annotate(int argc, const char **argv)
 
 	data.path = input_name;
 
-	annotate.session = perf_session__new(&data, false, &annotate.tool);
+	annotate.session = perf_session__new(&data, &annotate.tool);
 	if (IS_ERR(annotate.session))
 		return PTR_ERR(annotate.session);
 
diff --git a/tools/perf/builtin-bench.c b/tools/perf/builtin-bench.c
index 62a7b7420a44..d0895162c2ba 100644
--- a/tools/perf/builtin-bench.c
+++ b/tools/perf/builtin-bench.c
@@ -88,6 +88,7 @@ static struct bench internals_benchmarks[] = {
 	{ "synthesize", "Benchmark perf event synthesis",	bench_synthesize	},
 	{ "kallsyms-parse", "Benchmark kallsyms parsing",	bench_kallsyms_parse	},
 	{ "inject-build-id", "Benchmark build-id injection",	bench_inject_build_id	},
+	{ "evlist-open-close", "Benchmark evlist open and close",	bench_evlist_open_close	},
 	{ NULL,		NULL,					NULL			}
 };
 
diff --git a/tools/perf/builtin-buildid-cache.c b/tools/perf/builtin-buildid-cache.c
index ecd0d3cb6f5c..0db3cfc04c47 100644
--- a/tools/perf/builtin-buildid-cache.c
+++ b/tools/perf/builtin-buildid-cache.c
@@ -443,7 +443,7 @@ int cmd_buildid_cache(int argc, const char **argv)
 		data.path  = missing_filename;
 		data.force = force;
 
-		session = perf_session__new(&data, false, NULL);
+		session = perf_session__new(&data, NULL);
 		if (IS_ERR(session))
 			return PTR_ERR(session);
 	}
diff --git a/tools/perf/builtin-buildid-list.c b/tools/perf/builtin-buildid-list.c
index 833405c27dae..cebadd632234 100644
--- a/tools/perf/builtin-buildid-list.c
+++ b/tools/perf/builtin-buildid-list.c
@@ -65,7 +65,7 @@ static int perf_session__list_build_ids(bool force, bool with_hits)
 	if (filename__fprintf_build_id(input_name, stdout) > 0)
 		goto out;
 
-	session = perf_session__new(&data, false, &build_id__mark_dso_hit_ops);
+	session = perf_session__new(&data, &build_id__mark_dso_hit_ops);
 	if (IS_ERR(session))
 		return PTR_ERR(session);
 
diff --git a/tools/perf/builtin-c2c.c b/tools/perf/builtin-c2c.c
index 6dea37f141b2..a192014fa52b 100644
--- a/tools/perf/builtin-c2c.c
+++ b/tools/perf/builtin-c2c.c
@@ -139,11 +139,11 @@ static void *c2c_he_zalloc(size_t size)
 	if (!c2c_he)
 		return NULL;
 
-	c2c_he->cpuset = bitmap_alloc(c2c.cpus_cnt);
+	c2c_he->cpuset = bitmap_zalloc(c2c.cpus_cnt);
 	if (!c2c_he->cpuset)
 		return NULL;
 
-	c2c_he->nodeset = bitmap_alloc(c2c.nodes_cnt);
+	c2c_he->nodeset = bitmap_zalloc(c2c.nodes_cnt);
 	if (!c2c_he->nodeset)
 		return NULL;
 
@@ -2047,7 +2047,7 @@ static int setup_nodes(struct perf_session *session)
 		struct perf_cpu_map *map = n[node].map;
 		unsigned long *set;
 
-		set = bitmap_alloc(c2c.cpus_cnt);
+		set = bitmap_zalloc(c2c.cpus_cnt);
 		if (!set)
 			return -ENOMEM;
 
@@ -2790,7 +2790,7 @@ static int perf_c2c__report(int argc, const char **argv)
 		goto out;
 	}
 
-	session = perf_session__new(&data, 0, &c2c.tool);
+	session = perf_session__new(&data, &c2c.tool);
 	if (IS_ERR(session)) {
 		err = PTR_ERR(session);
 		pr_debug("Error creating perf session\n");
diff --git a/tools/perf/builtin-data.c b/tools/perf/builtin-data.c
index 15ca23675ef0..b7f9dc85a407 100644
--- a/tools/perf/builtin-data.c
+++ b/tools/perf/builtin-data.c
@@ -21,46 +21,21 @@ static struct data_cmd data_cmds[];
 #define for_each_cmd(cmd) \
 	for (cmd = data_cmds; cmd && cmd->name; cmd++)
 
-static const struct option data_options[] = {
-	OPT_END()
-};
-
 static const char * const data_subcommands[] = { "convert", NULL };
 
 static const char *data_usage[] = {
-	"perf data [<common options>] <command> [<options>]",
+	"perf data convert [<options>]",
 	NULL
 };
 
-static void print_usage(void)
-{
-	struct data_cmd *cmd;
-
-	printf("Usage:\n");
-	printf("\t%s\n\n", data_usage[0]);
-	printf("\tAvailable commands:\n");
-
-	for_each_cmd(cmd) {
-		printf("\t %s\t- %s\n", cmd->name, cmd->summary);
-	}
-
-	printf("\n");
-}
-
-static const char * const data_convert_usage[] = {
-	"perf data convert [<options>]",
-	NULL
+const char *to_json;
+const char *to_ctf;
+struct perf_data_convert_opts opts = {
+	.force = false,
+	.all = false,
 };
 
-static int cmd_data_convert(int argc, const char **argv)
-{
-	const char *to_json = NULL;
-	const char *to_ctf = NULL;
-	struct perf_data_convert_opts opts = {
-		.force = false,
-		.all = false,
-	};
-	const struct option options[] = {
+const struct option data_options[] = {
 		OPT_INCR('v', "verbose", &verbose, "be more verbose"),
 		OPT_STRING('i', "input", &input_name, "file", "input file name"),
 		OPT_STRING(0, "to-json", &to_json, NULL, "Convert to JSON format"),
@@ -73,10 +48,13 @@ static int cmd_data_convert(int argc, const char **argv)
 		OPT_END()
 	};
 
-	argc = parse_options(argc, argv, options,
-			     data_convert_usage, 0);
+static int cmd_data_convert(int argc, const char **argv)
+{
+
+	argc = parse_options(argc, argv, data_options,
+			     data_usage, 0);
 	if (argc) {
-		usage_with_options(data_convert_usage, options);
+		usage_with_options(data_usage, data_options);
 		return -1;
 	}
 
@@ -116,14 +94,13 @@ int cmd_data(int argc, const char **argv)
 	struct data_cmd *cmd;
 	const char *cmdstr;
 
-	/* No command specified. */
-	if (argc < 2)
-		goto usage;
-
 	argc = parse_options_subcommand(argc, argv, data_options, data_subcommands, data_usage,
 			     PARSE_OPT_STOP_AT_NON_OPTION);
-	if (argc < 1)
-		goto usage;
+
+	if (!argc) {
+		usage_with_options(data_usage, data_options);
+		return -1;
+	}
 
 	cmdstr = argv[0];
 
@@ -135,7 +112,6 @@ int cmd_data(int argc, const char **argv)
 	}
 
 	pr_err("Unknown command: %s\n", cmdstr);
-usage:
-	print_usage();
+	usage_with_options(data_usage, data_options);
 	return -1;
 }
diff --git a/tools/perf/builtin-diff.c b/tools/perf/builtin-diff.c
index 80450c0e8f36..d925096dd7f0 100644
--- a/tools/perf/builtin-diff.c
+++ b/tools/perf/builtin-diff.c
@@ -1156,7 +1156,7 @@ static int check_file_brstack(void)
 	int i;
 
 	data__for_each_file(i, d) {
-		d->session = perf_session__new(&d->data, false, &pdiff.tool);
+		d->session = perf_session__new(&d->data, &pdiff.tool);
 		if (IS_ERR(d->session)) {
 			pr_err("Failed to open %s\n", d->data.path);
 			return PTR_ERR(d->session);
@@ -1188,7 +1188,7 @@ static int __cmd_diff(void)
 	ret = -EINVAL;
 
 	data__for_each_file(i, d) {
-		d->session = perf_session__new(&d->data, false, &pdiff.tool);
+		d->session = perf_session__new(&d->data, &pdiff.tool);
 		if (IS_ERR(d->session)) {
 			ret = PTR_ERR(d->session);
 			pr_err("Failed to open %s\n", d->data.path);
diff --git a/tools/perf/builtin-evlist.c b/tools/perf/builtin-evlist.c
index 4617b32c9c97..b1076177c37f 100644
--- a/tools/perf/builtin-evlist.c
+++ b/tools/perf/builtin-evlist.c
@@ -42,7 +42,7 @@ static int __cmd_evlist(const char *file_name, struct perf_attr_details *details
 	};
 	bool has_tracepoint = false;
 
-	session = perf_session__new(&data, 0, &tool);
+	session = perf_session__new(&data, &tool);
 	if (IS_ERR(session))
 		return PTR_ERR(session);
 
diff --git a/tools/perf/builtin-inject.c b/tools/perf/builtin-inject.c
index c88c61e7f8cc..6ad191e731fc 100644
--- a/tools/perf/builtin-inject.c
+++ b/tools/perf/builtin-inject.c
@@ -46,6 +46,7 @@ struct perf_inject {
 	bool			jit_mode;
 	bool			in_place_update;
 	bool			in_place_update_dry_run;
+	bool			is_pipe;
 	const char		*input_name;
 	struct perf_data	output;
 	u64			bytes_written;
@@ -126,7 +127,7 @@ static int perf_event__repipe_attr(struct perf_tool *tool,
 	if (ret)
 		return ret;
 
-	if (!inject->output.is_pipe)
+	if (!inject->is_pipe)
 		return 0;
 
 	return perf_event__repipe_synth(tool, event);
@@ -826,14 +827,14 @@ static int __cmd_inject(struct perf_inject *inject)
 	if (!inject->itrace_synth_opts.set)
 		auxtrace_index__free(&session->auxtrace_index);
 
-	if (!data_out->is_pipe && !inject->in_place_update)
+	if (!inject->is_pipe && !inject->in_place_update)
 		lseek(fd, output_data_offset, SEEK_SET);
 
 	ret = perf_session__process_events(session);
 	if (ret)
 		return ret;
 
-	if (!data_out->is_pipe && !inject->in_place_update) {
+	if (!inject->is_pipe && !inject->in_place_update) {
 		if (inject->build_ids)
 			perf_header__set_feat(&session->header,
 					      HEADER_BUILD_ID);
@@ -918,6 +919,7 @@ int cmd_inject(int argc, const char **argv)
 		.use_stdio = true,
 	};
 	int ret;
+	bool repipe = true;
 
 	struct option options[] = {
 		OPT_BOOLEAN('b', "build-ids", &inject.build_ids,
@@ -992,7 +994,20 @@ int cmd_inject(int argc, const char **argv)
 	}
 
 	data.path = inject.input_name;
-	inject.session = perf_session__new(&data, inject.output.is_pipe, &inject.tool);
+	if (!strcmp(inject.input_name, "-") || inject.output.is_pipe) {
+		inject.is_pipe = true;
+		/*
+		 * Do not repipe header when input is a regular file
+		 * since either it can rewrite the header at the end
+		 * or write a new pipe header.
+		 */
+		if (strcmp(inject.input_name, "-"))
+			repipe = false;
+	}
+
+	inject.session = __perf_session__new(&data, repipe,
+					     perf_data__fd(&inject.output),
+					     &inject.tool);
 	if (IS_ERR(inject.session)) {
 		ret = PTR_ERR(inject.session);
 		goto out_close_output;
@@ -1001,6 +1016,21 @@ int cmd_inject(int argc, const char **argv)
 	if (zstd_init(&(inject.session->zstd_data), 0) < 0)
 		pr_warning("Decompression initialization failed.\n");
 
+	if (!data.is_pipe && inject.output.is_pipe) {
+		ret = perf_header__write_pipe(perf_data__fd(&inject.output));
+		if (ret < 0) {
+			pr_err("Couldn't write a new pipe header.\n");
+			goto out_delete;
+		}
+
+		ret = perf_event__synthesize_for_pipe(&inject.tool,
+						      inject.session,
+						      &inject.output,
+						      perf_event__repipe);
+		if (ret < 0)
+			goto out_delete;
+	}
+
 	if (inject.build_ids && !inject.build_id_all) {
 		/*
 		 * to make sure the mmap records are ordered correctly
diff --git a/tools/perf/builtin-kmem.c b/tools/perf/builtin-kmem.c
index 0062445e8ead..da03a341c63c 100644
--- a/tools/perf/builtin-kmem.c
+++ b/tools/perf/builtin-kmem.c
@@ -1953,7 +1953,7 @@ int cmd_kmem(int argc, const char **argv)
 
 	data.path = input_name;
 
-	kmem_session = session = perf_session__new(&data, false, &perf_kmem);
+	kmem_session = session = perf_session__new(&data, &perf_kmem);
 	if (IS_ERR(session))
 		return PTR_ERR(session);
 
diff --git a/tools/perf/builtin-kvm.c b/tools/perf/builtin-kvm.c
index 1105c9e40a80..aa1b127ffb5b 100644
--- a/tools/perf/builtin-kvm.c
+++ b/tools/perf/builtin-kvm.c
@@ -1093,7 +1093,7 @@ static int read_events(struct perf_kvm_stat *kvm)
 	};
 
 	kvm->tool = eops;
-	kvm->session = perf_session__new(&file, false, &kvm->tool);
+	kvm->session = perf_session__new(&file, &kvm->tool);
 	if (IS_ERR(kvm->session)) {
 		pr_err("Initializing perf session failed\n");
 		return PTR_ERR(kvm->session);
@@ -1447,7 +1447,7 @@ static int kvm_events_live(struct perf_kvm_stat *kvm,
 	/*
 	 * perf session
 	 */
-	kvm->session = perf_session__new(&data, false, &kvm->tool);
+	kvm->session = perf_session__new(&data, &kvm->tool);
 	if (IS_ERR(kvm->session)) {
 		err = PTR_ERR(kvm->session);
 		goto out;
diff --git a/tools/perf/builtin-lock.c b/tools/perf/builtin-lock.c
index 01326e370009..d70131b7b1b1 100644
--- a/tools/perf/builtin-lock.c
+++ b/tools/perf/builtin-lock.c
@@ -868,7 +868,7 @@ static int __cmd_report(bool display_info)
 		.force = force,
 	};
 
-	session = perf_session__new(&data, false, &eops);
+	session = perf_session__new(&data, &eops);
 	if (IS_ERR(session)) {
 		pr_err("Initializing perf session failed\n");
 		return PTR_ERR(session);
diff --git a/tools/perf/builtin-mem.c b/tools/perf/builtin-mem.c
index 0fd2a74dbaca..fcf65a59bea2 100644
--- a/tools/perf/builtin-mem.c
+++ b/tools/perf/builtin-mem.c
@@ -271,8 +271,7 @@ static int report_raw_events(struct perf_mem *mem)
 		.force = mem->force,
 	};
 	int ret;
-	struct perf_session *session = perf_session__new(&data, false,
-							 &mem->tool);
+	struct perf_session *session = perf_session__new(&data, &mem->tool);
 
 	if (IS_ERR(session))
 		return PTR_ERR(session);
diff --git a/tools/perf/builtin-record.c b/tools/perf/builtin-record.c
index 671a21c9ee4d..b3509d9d20cc 100644
--- a/tools/perf/builtin-record.c
+++ b/tools/perf/builtin-record.c
@@ -910,7 +910,8 @@ static int record__open(struct record *rec)
 		 * Enable the dummy event when the process is forked for
 		 * initial_delay, immediately for system wide.
 		 */
-		if (opts->initial_delay && !pos->immediate)
+		if (opts->initial_delay && !pos->immediate &&
+		    !target__has_cpu(&opts->target))
 			pos->core.attr.enable_on_exec = 1;
 		else
 			pos->immediate = 1;
@@ -1387,7 +1388,6 @@ static int record__synthesize(struct record *rec, bool tail)
 	struct perf_data *data = &rec->data;
 	struct record_opts *opts = &rec->opts;
 	struct perf_tool *tool = &rec->tool;
-	int fd = perf_data__fd(data);
 	int err = 0;
 	event_op f = process_synthesized_event;
 
@@ -1395,41 +1395,12 @@ static int record__synthesize(struct record *rec, bool tail)
 		return 0;
 
 	if (data->is_pipe) {
-		/*
-		 * We need to synthesize events first, because some
-		 * features works on top of them (on report side).
-		 */
-		err = perf_event__synthesize_attrs(tool, rec->evlist,
-						   process_synthesized_event);
-		if (err < 0) {
-			pr_err("Couldn't synthesize attrs.\n");
-			goto out;
-		}
-
-		err = perf_event__synthesize_features(tool, session, rec->evlist,
+		err = perf_event__synthesize_for_pipe(tool, session, data,
 						      process_synthesized_event);
-		if (err < 0) {
-			pr_err("Couldn't synthesize features.\n");
-			return err;
-		}
+		if (err < 0)
+			goto out;
 
-		if (have_tracepoints(&rec->evlist->core.entries)) {
-			/*
-			 * FIXME err <= 0 here actually means that
-			 * there were no tracepoints so its not really
-			 * an error, just that we don't need to
-			 * synthesize anything.  We really have to
-			 * return this more properly and also
-			 * propagate errors that now are calling die()
-			 */
-			err = perf_event__synthesize_tracing_data(tool,	fd, rec->evlist,
-								  process_synthesized_event);
-			if (err <= 0) {
-				pr_err("Couldn't record tracing data.\n");
-				goto out;
-			}
-			rec->bytes_written += err;
-		}
+		rec->bytes_written += err;
 	}
 
 	err = perf_event__synth_time_conv(record__pick_pc(rec), tool,
@@ -1681,7 +1652,7 @@ static int __cmd_record(struct record *rec, int argc, const char **argv)
 		signal(SIGUSR2, SIG_IGN);
 	}
 
-	session = perf_session__new(data, false, tool);
+	session = perf_session__new(data, tool);
 	if (IS_ERR(session)) {
 		pr_err("Perf session creation failed.\n");
 		return PTR_ERR(session);
@@ -2786,7 +2757,7 @@ int cmd_record(int argc, const char **argv)
 
 	if (rec->opts.affinity != PERF_AFFINITY_SYS) {
 		rec->affinity_mask.nbits = cpu__max_cpu();
-		rec->affinity_mask.bits = bitmap_alloc(rec->affinity_mask.nbits);
+		rec->affinity_mask.bits = bitmap_zalloc(rec->affinity_mask.nbits);
 		if (!rec->affinity_mask.bits) {
 			pr_err("Failed to allocate thread mask for %zd cpus\n", rec->affinity_mask.nbits);
 			err = -ENOMEM;
@@ -2884,6 +2855,13 @@ int cmd_record(int argc, const char **argv)
 	/* Enable ignoring missing threads when -u/-p option is defined. */
 	rec->opts.ignore_missing_thread = rec->opts.target.uid != UINT_MAX || rec->opts.target.pid;
 
+	if (evlist__fix_hybrid_cpus(rec->evlist, rec->opts.target.cpu_list)) {
+		pr_err("failed to use cpu list %s\n",
+		       rec->opts.target.cpu_list);
+		goto out;
+	}
+
+	rec->opts.target.hybrid = perf_pmu__has_hybrid();
 	err = -ENOMEM;
 	if (evlist__create_maps(rec->evlist, &rec->opts.target) < 0)
 		usage_with_options(record_usage, record_options);
diff --git a/tools/perf/builtin-report.c b/tools/perf/builtin-report.c
index dc0364f671b9..a0316ce910db 100644
--- a/tools/perf/builtin-report.c
+++ b/tools/perf/builtin-report.c
@@ -1411,7 +1411,7 @@ int cmd_report(int argc, const char **argv)
 	data.force = symbol_conf.force;
 
 repeat:
-	session = perf_session__new(&data, false, &report.tool);
+	session = perf_session__new(&data, &report.tool);
 	if (IS_ERR(session)) {
 		ret = PTR_ERR(session);
 		goto exit;
diff --git a/tools/perf/builtin-sched.c b/tools/perf/builtin-sched.c
index 1ff10d4bccf3..635a6b5a9ec9 100644
--- a/tools/perf/builtin-sched.c
+++ b/tools/perf/builtin-sched.c
@@ -1804,7 +1804,7 @@ static int perf_sched__read_events(struct perf_sched *sched)
 	};
 	int rc = -1;
 
-	session = perf_session__new(&data, false, &sched->tool);
+	session = perf_session__new(&data, &sched->tool);
 	if (IS_ERR(session)) {
 		pr_debug("Error creating perf session");
 		return PTR_ERR(session);
@@ -3011,7 +3011,7 @@ static int perf_sched__timehist(struct perf_sched *sched)
 
 	symbol_conf.use_callchain = sched->show_callchain;
 
-	session = perf_session__new(&data, false, &sched->tool);
+	session = perf_session__new(&data, &sched->tool);
 	if (IS_ERR(session))
 		return PTR_ERR(session);
 
diff --git a/tools/perf/builtin-script.c b/tools/perf/builtin-script.c
index 064da7f3618d..0e824f7d8b19 100644
--- a/tools/perf/builtin-script.c
+++ b/tools/perf/builtin-script.c
@@ -2212,7 +2212,7 @@ static int process_sample_event(struct perf_tool *tool,
 	if (filter_cpu(sample))
 		goto out_put;
 
-	if (machine__resolve(machine, &al, sample) < 0) {
+	if (!al.thread && machine__resolve(machine, &al, sample) < 0) {
 		pr_err("problem processing %d event, skipping it.\n",
 		       event->header.type);
 		ret = -1;
@@ -2493,6 +2493,17 @@ process_lost_event(struct perf_tool *tool,
 }
 
 static int
+process_throttle_event(struct perf_tool *tool __maybe_unused,
+		       union perf_event *event,
+		       struct perf_sample *sample,
+		       struct machine *machine)
+{
+	if (scripting_ops && scripting_ops->process_throttle)
+		scripting_ops->process_throttle(event, sample, machine);
+	return 0;
+}
+
+static int
 process_finished_round_event(struct perf_tool *tool __maybe_unused,
 			     union perf_event *event,
 			     struct ordered_events *oe __maybe_unused)
@@ -3294,7 +3305,7 @@ int find_scripts(char **scripts_array, char **scripts_path_array, int num,
 	char *temp;
 	int i = 0;
 
-	session = perf_session__new(&data, false, NULL);
+	session = perf_session__new(&data, NULL);
 	if (IS_ERR(session))
 		return PTR_ERR(session);
 
@@ -3652,6 +3663,8 @@ int cmd_script(int argc, const char **argv)
 			.stat_config	 = process_stat_config_event,
 			.thread_map	 = process_thread_map_event,
 			.cpu_map	 = process_cpu_map_event,
+			.throttle	 = process_throttle_event,
+			.unthrottle	 = process_throttle_event,
 			.ordered_events	 = true,
 			.ordering_requires_timestamps = true,
 		},
@@ -4007,7 +4020,7 @@ script_found:
 		use_browser = 0;
 	}
 
-	session = perf_session__new(&data, false, &script.tool);
+	session = perf_session__new(&data, &script.tool);
 	if (IS_ERR(session))
 		return PTR_ERR(session);
 
diff --git a/tools/perf/builtin-stat.c b/tools/perf/builtin-stat.c
index 634375937db9..f6e87b7be5fa 100644
--- a/tools/perf/builtin-stat.c
+++ b/tools/perf/builtin-stat.c
@@ -1996,7 +1996,7 @@ static int __cmd_record(int argc, const char **argv)
 		return -1;
 	}
 
-	session = perf_session__new(data, false, NULL);
+	session = perf_session__new(data, NULL);
 	if (IS_ERR(session)) {
 		pr_err("Perf session creation failed\n");
 		return PTR_ERR(session);
@@ -2168,7 +2168,7 @@ static int __cmd_report(int argc, const char **argv)
 	perf_stat.data.path = input_name;
 	perf_stat.data.mode = PERF_DATA_MODE_READ;
 
-	session = perf_session__new(&perf_stat.data, false, &perf_stat.tool);
+	session = perf_session__new(&perf_stat.data, &perf_stat.tool);
 	if (IS_ERR(session))
 		return PTR_ERR(session);
 
@@ -2386,7 +2386,8 @@ int cmd_stat(int argc, const char **argv)
 	 * --per-thread is aggregated per thread, we dont mix it with cpu mode
 	 */
 	if (((stat_config.aggr_mode != AGGR_GLOBAL &&
-	      stat_config.aggr_mode != AGGR_THREAD) || nr_cgroups) &&
+	      stat_config.aggr_mode != AGGR_THREAD) ||
+	     (nr_cgroups || stat_config.cgroup_list)) &&
 	    !target__has_cpu(&target)) {
 		fprintf(stderr, "both cgroup and no-aggregation "
 			"modes only available in system-wide mode\n");
@@ -2394,6 +2395,7 @@ int cmd_stat(int argc, const char **argv)
 		parse_options_usage(stat_usage, stat_options, "G", 1);
 		parse_options_usage(NULL, stat_options, "A", 1);
 		parse_options_usage(NULL, stat_options, "a", 1);
+		parse_options_usage(NULL, stat_options, "for-each-cgroup", 0);
 		goto out;
 	}
 
@@ -2430,6 +2432,12 @@ int cmd_stat(int argc, const char **argv)
 	if ((stat_config.aggr_mode == AGGR_THREAD) && (target.system_wide))
 		target.per_thread = true;
 
+	if (evlist__fix_hybrid_cpus(evsel_list, target.cpu_list)) {
+		pr_err("failed to use cpu list %s\n", target.cpu_list);
+		goto out;
+	}
+
+	target.hybrid = perf_pmu__has_hybrid();
 	if (evlist__create_maps(evsel_list, &target) < 0) {
 		if (target__has_task(&target)) {
 			pr_err("Problems finding threads of monitor\n");
diff --git a/tools/perf/builtin-timechart.c b/tools/perf/builtin-timechart.c
index 4e380e7b5230..43bf4d67edb0 100644
--- a/tools/perf/builtin-timechart.c
+++ b/tools/perf/builtin-timechart.c
@@ -1598,8 +1598,7 @@ static int __cmd_timechart(struct timechart *tchart, const char *output_name)
 		.force = tchart->force,
 	};
 
-	struct perf_session *session = perf_session__new(&data, false,
-							 &tchart->tool);
+	struct perf_session *session = perf_session__new(&data, &tchart->tool);
 	int ret = -EINVAL;
 
 	if (IS_ERR(session))
diff --git a/tools/perf/builtin-top.c b/tools/perf/builtin-top.c
index 02f8bb5dbc0f..a3ae9176a83e 100644
--- a/tools/perf/builtin-top.c
+++ b/tools/perf/builtin-top.c
@@ -1740,7 +1740,7 @@ int cmd_top(int argc, const char **argv)
 		signal(SIGWINCH, winch_sig);
 	}
 
-	top.session = perf_session__new(NULL, false, NULL);
+	top.session = perf_session__new(NULL, NULL);
 	if (IS_ERR(top.session)) {
 		status = PTR_ERR(top.session);
 		goto out_delete_evlist;
diff --git a/tools/perf/builtin-trace.c b/tools/perf/builtin-trace.c
index 9c265fa96011..2bf21194c7b3 100644
--- a/tools/perf/builtin-trace.c
+++ b/tools/perf/builtin-trace.c
@@ -707,7 +707,15 @@ static size_t syscall_arg__scnprintf_char_array(char *bf, size_t size, struct sy
 
 static const char *bpf_cmd[] = {
 	"MAP_CREATE", "MAP_LOOKUP_ELEM", "MAP_UPDATE_ELEM", "MAP_DELETE_ELEM",
-	"MAP_GET_NEXT_KEY", "PROG_LOAD",
+	"MAP_GET_NEXT_KEY", "PROG_LOAD", "OBJ_PIN", "OBJ_GET", "PROG_ATTACH",
+	"PROG_DETACH", "PROG_TEST_RUN", "PROG_GET_NEXT_ID", "MAP_GET_NEXT_ID",
+	"PROG_GET_FD_BY_ID", "MAP_GET_FD_BY_ID", "OBJ_GET_INFO_BY_FD",
+	"PROG_QUERY", "RAW_TRACEPOINT_OPEN", "BTF_LOAD", "BTF_GET_FD_BY_ID",
+	"TASK_FD_QUERY", "MAP_LOOKUP_AND_DELETE_ELEM", "MAP_FREEZE",
+	"BTF_GET_NEXT_ID", "MAP_LOOKUP_BATCH", "MAP_LOOKUP_AND_DELETE_BATCH",
+	"MAP_UPDATE_BATCH", "MAP_DELETE_BATCH", "LINK_CREATE", "LINK_UPDATE",
+	"LINK_GET_FD_BY_ID", "LINK_GET_NEXT_ID", "ENABLE_STATS", "ITER_CREATE",
+	"LINK_DETACH", "PROG_BIND_MAP",
 };
 static DEFINE_STRARRAY(bpf_cmd, "BPF_");
 
@@ -4228,7 +4236,7 @@ static int trace__replay(struct trace *trace)
 	/* add tid to output */
 	trace->multiple_threads = true;
 
-	session = perf_session__new(&data, false, &trace->tool);
+	session = perf_session__new(&data, &trace->tool);
 	if (IS_ERR(session))
 		return PTR_ERR(session);
 
diff --git a/tools/perf/check-headers.sh b/tools/perf/check-headers.sh
index c783558332b8..f1e46277e822 100755
--- a/tools/perf/check-headers.sh
+++ b/tools/perf/check-headers.sh
@@ -144,6 +144,7 @@ done
 # diff with extra ignore lines
 check arch/x86/lib/memcpy_64.S        '-I "^EXPORT_SYMBOL" -I "^#include <asm/export.h>" -I"^SYM_FUNC_START\(_LOCAL\)*(memcpy_\(erms\|orig\))"'
 check arch/x86/lib/memset_64.S        '-I "^EXPORT_SYMBOL" -I "^#include <asm/export.h>" -I"^SYM_FUNC_START\(_LOCAL\)*(memset_\(erms\|orig\))"'
+check arch/x86/include/asm/amd-ibs.h  '-I "^#include [<\"]\(asm/\)*msr-index.h"'
 check include/uapi/asm-generic/mman.h '-I "^#include <\(uapi/\)*asm-generic/mman-common\(-tools\)*.h>"'
 check include/uapi/linux/mman.h       '-I "^#include <\(uapi/\)*asm/mman.h>"'
 check include/linux/build_bug.h       '-I "^#\(ifndef\|endif\)\( \/\/\)* static_assert$"'
diff --git a/tools/perf/dlfilters/dlfilter-test-api-v0.c b/tools/perf/dlfilters/dlfilter-test-api-v0.c
new file mode 100644
index 000000000000..7565a1852c74
--- /dev/null
+++ b/tools/perf/dlfilters/dlfilter-test-api-v0.c
@@ -0,0 +1,336 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * dlfilter-test-api-v0.c: test original (v0) API for perf --dlfilter shared object
+ * Copyright (c) 2021, Intel Corporation.
+ */
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <stdbool.h>
+
+/*
+ * Copy original (v0) API instead of including current API
+ */
+#include <linux/perf_event.h>
+#include <linux/types.h>
+
+/* Definitions for perf_dlfilter_sample flags */
+enum {
+	PERF_DLFILTER_FLAG_BRANCH	= 1ULL << 0,
+	PERF_DLFILTER_FLAG_CALL		= 1ULL << 1,
+	PERF_DLFILTER_FLAG_RETURN	= 1ULL << 2,
+	PERF_DLFILTER_FLAG_CONDITIONAL	= 1ULL << 3,
+	PERF_DLFILTER_FLAG_SYSCALLRET	= 1ULL << 4,
+	PERF_DLFILTER_FLAG_ASYNC	= 1ULL << 5,
+	PERF_DLFILTER_FLAG_INTERRUPT	= 1ULL << 6,
+	PERF_DLFILTER_FLAG_TX_ABORT	= 1ULL << 7,
+	PERF_DLFILTER_FLAG_TRACE_BEGIN	= 1ULL << 8,
+	PERF_DLFILTER_FLAG_TRACE_END	= 1ULL << 9,
+	PERF_DLFILTER_FLAG_IN_TX	= 1ULL << 10,
+	PERF_DLFILTER_FLAG_VMENTRY	= 1ULL << 11,
+	PERF_DLFILTER_FLAG_VMEXIT	= 1ULL << 12,
+};
+
+/*
+ * perf sample event information (as per perf script and <linux/perf_event.h>)
+ */
+struct perf_dlfilter_sample {
+	__u32 size; /* Size of this structure (for compatibility checking) */
+	__u16 ins_lat;		/* Refer PERF_SAMPLE_WEIGHT_TYPE in <linux/perf_event.h> */
+	__u16 p_stage_cyc;	/* Refer PERF_SAMPLE_WEIGHT_TYPE in <linux/perf_event.h> */
+	__u64 ip;
+	__s32 pid;
+	__s32 tid;
+	__u64 time;
+	__u64 addr;
+	__u64 id;
+	__u64 stream_id;
+	__u64 period;
+	__u64 weight;		/* Refer PERF_SAMPLE_WEIGHT_TYPE in <linux/perf_event.h> */
+	__u64 transaction;	/* Refer PERF_SAMPLE_TRANSACTION in <linux/perf_event.h> */
+	__u64 insn_cnt;	/* For instructions-per-cycle (IPC) */
+	__u64 cyc_cnt;		/* For instructions-per-cycle (IPC) */
+	__s32 cpu;
+	__u32 flags;		/* Refer PERF_DLFILTER_FLAG_* above */
+	__u64 data_src;		/* Refer PERF_SAMPLE_DATA_SRC in <linux/perf_event.h> */
+	__u64 phys_addr;	/* Refer PERF_SAMPLE_PHYS_ADDR in <linux/perf_event.h> */
+	__u64 data_page_size;	/* Refer PERF_SAMPLE_DATA_PAGE_SIZE in <linux/perf_event.h> */
+	__u64 code_page_size;	/* Refer PERF_SAMPLE_CODE_PAGE_SIZE in <linux/perf_event.h> */
+	__u64 cgroup;		/* Refer PERF_SAMPLE_CGROUP in <linux/perf_event.h> */
+	__u8  cpumode;		/* Refer CPUMODE_MASK etc in <linux/perf_event.h> */
+	__u8  addr_correlates_sym; /* True => resolve_addr() can be called */
+	__u16 misc;		/* Refer perf_event_header in <linux/perf_event.h> */
+	__u32 raw_size;		/* Refer PERF_SAMPLE_RAW in <linux/perf_event.h> */
+	const void *raw_data;	/* Refer PERF_SAMPLE_RAW in <linux/perf_event.h> */
+	__u64 brstack_nr;	/* Number of brstack entries */
+	const struct perf_branch_entry *brstack; /* Refer <linux/perf_event.h> */
+	__u64 raw_callchain_nr;	/* Number of raw_callchain entries */
+	const __u64 *raw_callchain; /* Refer <linux/perf_event.h> */
+	const char *event;
+};
+
+/*
+ * Address location (as per perf script)
+ */
+struct perf_dlfilter_al {
+	__u32 size; /* Size of this structure (for compatibility checking) */
+	__u32 symoff;
+	const char *sym;
+	__u64 addr; /* Mapped address (from dso) */
+	__u64 sym_start;
+	__u64 sym_end;
+	const char *dso;
+	__u8  sym_binding; /* STB_LOCAL, STB_GLOBAL or STB_WEAK, refer <elf.h> */
+	__u8  is_64_bit; /* Only valid if dso is not NULL */
+	__u8  is_kernel_ip; /* True if in kernel space */
+	__u32 buildid_size;
+	__u8 *buildid;
+	/* Below members are only populated by resolve_ip() */
+	__u8 filtered; /* True if this sample event will be filtered out */
+	const char *comm;
+};
+
+struct perf_dlfilter_fns {
+	/* Return information about ip */
+	const struct perf_dlfilter_al *(*resolve_ip)(void *ctx);
+	/* Return information about addr (if addr_correlates_sym) */
+	const struct perf_dlfilter_al *(*resolve_addr)(void *ctx);
+	/* Return arguments from --dlarg option */
+	char **(*args)(void *ctx, int *dlargc);
+	/*
+	 * Return information about address (al->size must be set before
+	 * calling). Returns 0 on success, -1 otherwise.
+	 */
+	__s32 (*resolve_address)(void *ctx, __u64 address, struct perf_dlfilter_al *al);
+	/* Return instruction bytes and length */
+	const __u8 *(*insn)(void *ctx, __u32 *length);
+	/* Return source file name and line number */
+	const char *(*srcline)(void *ctx, __u32 *line_number);
+	/* Return perf_event_attr, refer <linux/perf_event.h> */
+	struct perf_event_attr *(*attr)(void *ctx);
+	/* Read object code, return numbers of bytes read */
+	__s32 (*object_code)(void *ctx, __u64 ip, void *buf, __u32 len);
+	/* Reserved */
+	void *(*reserved[120])(void *);
+};
+
+struct perf_dlfilter_fns perf_dlfilter_fns;
+
+static int verbose;
+
+#define pr_debug(fmt, ...) do { \
+		if (verbose) \
+			fprintf(stderr, fmt, ##__VA_ARGS__); \
+	} while (0)
+
+static int test_fail(const char *msg)
+{
+	pr_debug("%s\n", msg);
+	return -1;
+}
+
+#define CHECK(x) do { \
+		if (!(x)) \
+			return test_fail("Check '" #x "' failed\n"); \
+	} while (0)
+
+struct filter_data {
+	__u64 ip;
+	__u64 addr;
+	int do_early;
+	int early_filter_cnt;
+	int filter_cnt;
+};
+
+static struct filter_data *filt_dat;
+
+int start(void **data, void *ctx)
+{
+	int dlargc;
+	char **dlargv;
+	struct filter_data *d;
+	static bool called;
+
+	verbose = 1;
+
+	CHECK(!filt_dat && !called);
+	called = true;
+
+	d = calloc(1, sizeof(*d));
+	if (!d)
+		test_fail("Failed to allocate memory");
+	filt_dat = d;
+	*data = d;
+
+	dlargv = perf_dlfilter_fns.args(ctx, &dlargc);
+
+	CHECK(dlargc == 6);
+	CHECK(!strcmp(dlargv[0], "first"));
+	verbose = strtol(dlargv[1], NULL, 0);
+	d->ip = strtoull(dlargv[2], NULL, 0);
+	d->addr = strtoull(dlargv[3], NULL, 0);
+	d->do_early = strtol(dlargv[4], NULL, 0);
+	CHECK(!strcmp(dlargv[5], "last"));
+
+	pr_debug("%s API\n", __func__);
+
+	return 0;
+}
+
+#define CHECK_SAMPLE(x) do { \
+		if (sample->x != expected.x) \
+			return test_fail("'" #x "' not expected value\n"); \
+	} while (0)
+
+static int check_sample(struct filter_data *d, const struct perf_dlfilter_sample *sample)
+{
+	struct perf_dlfilter_sample expected = {
+		.ip		= d->ip,
+		.pid		= 12345,
+		.tid		= 12346,
+		.time		= 1234567890,
+		.addr		= d->addr,
+		.id		= 99,
+		.stream_id	= 101,
+		.period		= 543212345,
+		.cpu		= 31,
+		.cpumode	= PERF_RECORD_MISC_USER,
+		.addr_correlates_sym = 1,
+		.misc		= PERF_RECORD_MISC_USER,
+	};
+
+	CHECK(sample->size >= sizeof(struct perf_dlfilter_sample));
+
+	CHECK_SAMPLE(ip);
+	CHECK_SAMPLE(pid);
+	CHECK_SAMPLE(tid);
+	CHECK_SAMPLE(time);
+	CHECK_SAMPLE(addr);
+	CHECK_SAMPLE(id);
+	CHECK_SAMPLE(stream_id);
+	CHECK_SAMPLE(period);
+	CHECK_SAMPLE(cpu);
+	CHECK_SAMPLE(cpumode);
+	CHECK_SAMPLE(addr_correlates_sym);
+	CHECK_SAMPLE(misc);
+
+	CHECK(!sample->raw_data);
+	CHECK_SAMPLE(brstack_nr);
+	CHECK(!sample->brstack);
+	CHECK_SAMPLE(raw_callchain_nr);
+	CHECK(!sample->raw_callchain);
+
+#define EVENT_NAME "branches:"
+	CHECK(!strncmp(sample->event, EVENT_NAME, strlen(EVENT_NAME)));
+
+	return 0;
+}
+
+static int check_al(void *ctx)
+{
+	const struct perf_dlfilter_al *al;
+
+	al = perf_dlfilter_fns.resolve_ip(ctx);
+	if (!al)
+		return test_fail("resolve_ip() failed");
+
+	CHECK(al->sym && !strcmp("foo", al->sym));
+	CHECK(!al->symoff);
+
+	return 0;
+}
+
+static int check_addr_al(void *ctx)
+{
+	const struct perf_dlfilter_al *addr_al;
+
+	addr_al = perf_dlfilter_fns.resolve_addr(ctx);
+	if (!addr_al)
+		return test_fail("resolve_addr() failed");
+
+	CHECK(addr_al->sym && !strcmp("bar", addr_al->sym));
+	CHECK(!addr_al->symoff);
+
+	return 0;
+}
+
+static int check_attr(void *ctx)
+{
+	struct perf_event_attr *attr = perf_dlfilter_fns.attr(ctx);
+
+	CHECK(attr);
+	CHECK(attr->type == PERF_TYPE_HARDWARE);
+	CHECK(attr->config == PERF_COUNT_HW_BRANCH_INSTRUCTIONS);
+
+	return 0;
+}
+
+static int do_checks(void *data, const struct perf_dlfilter_sample *sample, void *ctx, bool early)
+{
+	struct filter_data *d = data;
+
+	CHECK(data && filt_dat == data);
+
+	if (early) {
+		CHECK(!d->early_filter_cnt);
+		d->early_filter_cnt += 1;
+	} else {
+		CHECK(!d->filter_cnt);
+		CHECK(d->early_filter_cnt);
+		CHECK(d->do_early != 2);
+		d->filter_cnt += 1;
+	}
+
+	if (check_sample(data, sample))
+		return -1;
+
+	if (check_attr(ctx))
+		return -1;
+
+	if (early && !d->do_early)
+		return 0;
+
+	if (check_al(ctx) || check_addr_al(ctx))
+		return -1;
+
+	if (early)
+		return d->do_early == 2;
+
+	return 1;
+}
+
+int filter_event_early(void *data, const struct perf_dlfilter_sample *sample, void *ctx)
+{
+	pr_debug("%s API\n", __func__);
+
+	return do_checks(data, sample, ctx, true);
+}
+
+int filter_event(void *data, const struct perf_dlfilter_sample *sample, void *ctx)
+{
+	struct filter_data *d = data;
+
+	pr_debug("%s API\n", __func__);
+
+	return do_checks(data, sample, ctx, false);
+}
+
+int stop(void *data, void *ctx)
+{
+	static bool called;
+
+	pr_debug("%s API\n", __func__);
+
+	CHECK(data && filt_dat == data && !called);
+	called = true;
+
+	free(data);
+	filt_dat = NULL;
+	return 0;
+}
+
+const char *filter_description(const char **long_description)
+{
+	*long_description = "Filter used by the 'dlfilter C API' perf test";
+	return "dlfilter to test v0 C API";
+}
diff --git a/tools/perf/util/perf_dlfilter.h b/tools/perf/include/perf/perf_dlfilter.h
index 3eef03d661b4..3eef03d661b4 100644
--- a/tools/perf/util/perf_dlfilter.h
+++ b/tools/perf/include/perf/perf_dlfilter.h
diff --git a/tools/perf/pmu-events/Build b/tools/perf/pmu-events/Build
index 215ba30b8534..a055dee6a46a 100644
--- a/tools/perf/pmu-events/Build
+++ b/tools/perf/pmu-events/Build
@@ -6,10 +6,13 @@ pmu-events-y	+= pmu-events.o
 JDIR		=  pmu-events/arch/$(SRCARCH)
 JSON		=  $(shell [ -d $(JDIR) ] &&				\
 			find $(JDIR) -name '*.json' -o -name 'mapfile.csv')
+JDIR_TEST	=  pmu-events/arch/test
+JSON_TEST	=  $(shell [ -d $(JDIR_TEST) ] &&			\
+			find $(JDIR_TEST) -name '*.json')
 
 #
 # Locate/process JSON files in pmu-events/arch/
 # directory and create tables in pmu-events.c.
 #
-$(OUTPUT)pmu-events/pmu-events.c: $(JSON) $(JEVENTS)
+$(OUTPUT)pmu-events/pmu-events.c: $(JSON) $(JSON_TEST) $(JEVENTS)
 	$(Q)$(call echo-cmd,gen)$(JEVENTS) $(SRCARCH) pmu-events/arch $(OUTPUT)pmu-events/pmu-events.c $(V)
diff --git a/tools/perf/pmu-events/arch/test/test_cpu/branch.json b/tools/perf/pmu-events/arch/test/test_soc/cpu/branch.json
index 93ddfd8053ca..93ddfd8053ca 100644
--- a/tools/perf/pmu-events/arch/test/test_cpu/branch.json
+++ b/tools/perf/pmu-events/arch/test/test_soc/cpu/branch.json
diff --git a/tools/perf/pmu-events/arch/test/test_cpu/cache.json b/tools/perf/pmu-events/arch/test/test_soc/cpu/cache.json
index 036d0efdb2bb..036d0efdb2bb 100644
--- a/tools/perf/pmu-events/arch/test/test_cpu/cache.json
+++ b/tools/perf/pmu-events/arch/test/test_soc/cpu/cache.json
diff --git a/tools/perf/pmu-events/arch/test/test_cpu/other.json b/tools/perf/pmu-events/arch/test/test_soc/cpu/other.json
index 7d53d7ecd723..7d53d7ecd723 100644
--- a/tools/perf/pmu-events/arch/test/test_cpu/other.json
+++ b/tools/perf/pmu-events/arch/test/test_soc/cpu/other.json
diff --git a/tools/perf/pmu-events/arch/test/test_cpu/uncore.json b/tools/perf/pmu-events/arch/test/test_soc/cpu/uncore.json
index d0a890cc814d..788766f45dbc 100644
--- a/tools/perf/pmu-events/arch/test/test_cpu/uncore.json
+++ b/tools/perf/pmu-events/arch/test/test_soc/cpu/uncore.json
@@ -17,5 +17,26 @@
 	    "CounterMask": "0",
 	    "Invert": "0",
 	    "EdgeDetect": "0"
-  }
+  },
+  {
+	    "EventCode": "0x7",
+	    "EventName": "uncore_hisi_l3c.rd_hit_cpipe",
+	    "BriefDescription": "Total read hits",
+	    "PublicDescription": "Total read hits",
+	    "Unit": "hisi_sccl,l3c"
+  },
+  {
+	    "EventCode": "0x12",
+	    "EventName": "uncore_imc_free_running.cache_miss",
+	    "BriefDescription": "Total cache misses",
+	    "PublicDescription": "Total cache misses",
+	    "Unit": "imc_free_running"
+  },
+  {
+	    "EventCode": "0x34",
+	    "EventName": "uncore_imc.cache_hits",
+	    "BriefDescription": "Total cache hits",
+	    "PublicDescription": "Total cache hits",
+	    "Unit": "imc"
+  },
 ]
diff --git a/tools/perf/pmu-events/arch/test/test_soc/sys/uncore.json b/tools/perf/pmu-events/arch/test/test_soc/sys/uncore.json
new file mode 100644
index 000000000000..0f681a6e10ea
--- /dev/null
+++ b/tools/perf/pmu-events/arch/test/test_soc/sys/uncore.json
@@ -0,0 +1,9 @@
+[
+  {
+           "BriefDescription": "ddr write-cycles event",
+           "EventCode": "0x2b",
+           "EventName": "sys_ddr_pmu.write_cycles",
+           "Unit": "sys_ddr_pmu",
+           "Compat": "v8"
+   },
+]
diff --git a/tools/perf/pmu-events/arch/x86/cascadelakex/cache.json b/tools/perf/pmu-events/arch/x86/cascadelakex/cache.json
index 3c0f5837480f..ffafb9f284d2 100644
--- a/tools/perf/pmu-events/arch/x86/cascadelakex/cache.json
+++ b/tools/perf/pmu-events/arch/x86/cascadelakex/cache.json
@@ -1,61 +1,125 @@
 [
     {
-        "BriefDescription": "This event is deprecated. Refer to new event OCR.OTHER.SUPPLIER_NONE.HIT_OTHER_CORE_NO_FWD",
+        "BriefDescription": "L1D data line replacements",
         "Counter": "0,1,2,3",
-        "CounterHTOff": "0,1,2,3",
-        "Deprecated": "1",
-        "EventCode": "0xB7, 0xBB",
-        "EventName": "OFFCORE_RESPONSE.OTHER.SUPPLIER_NONE.HIT_OTHER_CORE_NO_FWD",
-        "MSRIndex": "0x1a6,0x1a7",
-        "MSRValue": "0x0400028000",
-        "Offcore": "1",
-        "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
-        "SampleAfterValue": "100003",
+        "CounterHTOff": "0,1,2,3,4,5,6,7",
+        "EventCode": "0x51",
+        "EventName": "L1D.REPLACEMENT",
+        "PublicDescription": "Counts L1D data line replacements including opportunistic replacements, and replacements that require stall-for-replace or block-for-replace.",
+        "SampleAfterValue": "2000003",
         "UMask": "0x1"
     },
     {
-        "BriefDescription": "This event is deprecated. Refer to new event OCR.DEMAND_RFO.PMM_HIT_LOCAL_PMM.SNOOP_NOT_NEEDED",
+        "BriefDescription": "Number of times a request needed a FB entry but there was no entry available for it. That is the FB unavailability was dominant reason for blocking the request. A request includes cacheable/uncacheable demands that is load, store or SW prefetch.",
         "Counter": "0,1,2,3",
-        "CounterHTOff": "0,1,2,3",
-        "Deprecated": "1",
-        "EventCode": "0xB7, 0xBB",
-        "EventName": "OFFCORE_RESPONSE.DEMAND_RFO.PMM_HIT_LOCAL_PMM.SNOOP_NOT_NEEDED",
-        "MSRIndex": "0x1a6,0x1a7",
-        "MSRValue": "0x0100400002",
-        "Offcore": "1",
-        "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
-        "SampleAfterValue": "100003",
+        "CounterHTOff": "0,1,2,3,4,5,6,7",
+        "EventCode": "0x48",
+        "EventName": "L1D_PEND_MISS.FB_FULL",
+        "PublicDescription": "Number of times a request needed a FB (Fill Buffer) entry but there was no entry available for it. A request includes cacheable/uncacheable demands that are load, store or SW prefetch instructions.",
+        "SampleAfterValue": "2000003",
+        "UMask": "0x2"
+    },
+    {
+        "BriefDescription": "L1D miss outstandings duration in cycles",
+        "Counter": "0,1,2,3",
+        "CounterHTOff": "0,1,2,3,4,5,6,7",
+        "EventCode": "0x48",
+        "EventName": "L1D_PEND_MISS.PENDING",
+        "PublicDescription": "Counts duration of L1D miss outstanding, that is each cycle number of Fill Buffers (FB) outstanding required by Demand Reads. FB either is held by demand loads, or it is held by non-demand loads and gets hit at least once by demand. The valid outstanding interval is defined until the FB deallocation by one of the following ways: from FB allocation, if FB is allocated by demand from the demand Hit FB, if it is allocated by hardware or software prefetch.Note: In the L1D, a Demand Read contains cacheable or noncacheable demand loads, including ones causing cache-line splits and reads due to page walks resulted from any request type.",
+        "SampleAfterValue": "2000003",
         "UMask": "0x1"
     },
     {
-        "BriefDescription": "This event is deprecated. Refer to new event OCR.DEMAND_CODE_RD.L3_HIT.SNOOP_NONE",
+        "BriefDescription": "Cycles with L1D load Misses outstanding.",
         "Counter": "0,1,2,3",
-        "CounterHTOff": "0,1,2,3",
-        "Deprecated": "1",
-        "EventCode": "0xB7, 0xBB",
-        "EventName": "OFFCORE_RESPONSE.DEMAND_CODE_RD.L3_HIT.SNOOP_NONE",
-        "MSRIndex": "0x1a6,0x1a7",
-        "MSRValue": "0x00803C0004",
-        "Offcore": "1",
-        "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
-        "SampleAfterValue": "100003",
+        "CounterHTOff": "0,1,2,3,4,5,6,7",
+        "CounterMask": "1",
+        "EventCode": "0x48",
+        "EventName": "L1D_PEND_MISS.PENDING_CYCLES",
+        "PublicDescription": "Counts duration of L1D miss outstanding in cycles.",
+        "SampleAfterValue": "2000003",
         "UMask": "0x1"
     },
     {
-        "BriefDescription": "This event is deprecated. Refer to new event OCR.ALL_DATA_RD.L3_HIT_F.ANY_SNOOP",
+        "AnyThread": "1",
+        "BriefDescription": "Cycles with L1D load Misses outstanding from any thread on physical core.",
         "Counter": "0,1,2,3",
-        "CounterHTOff": "0,1,2,3",
-        "Deprecated": "1",
-        "EventCode": "0xB7, 0xBB",
-        "EventName": "OFFCORE_RESPONSE.ALL_DATA_RD.L3_HIT_F.ANY_SNOOP",
-        "MSRIndex": "0x1a6,0x1a7",
-        "MSRValue": "0x3F80200491",
-        "Offcore": "1",
-        "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
+        "CounterHTOff": "0,1,2,3,4,5,6,7",
+        "CounterMask": "1",
+        "EventCode": "0x48",
+        "EventName": "L1D_PEND_MISS.PENDING_CYCLES_ANY",
+        "SampleAfterValue": "2000003",
+        "UMask": "0x1"
+    },
+    {
+        "BriefDescription": "L2 cache lines filling L2",
+        "Counter": "0,1,2,3",
+        "CounterHTOff": "0,1,2,3,4,5,6,7",
+        "EventCode": "0xF1",
+        "EventName": "L2_LINES_IN.ALL",
+        "PublicDescription": "Counts the number of L2 cache lines filling the L2. Counting does not cover rejects.",
         "SampleAfterValue": "100003",
+        "UMask": "0x1f"
+    },
+    {
+        "BriefDescription": "Counts the number of lines that are evicted by L2 cache when triggered by an L2 cache fill. Those lines can be either in modified state or clean state. Modified lines may either be written back to L3 or directly written to memory and not allocated in L3.  Clean lines may either be allocated in L3 or dropped",
+        "Counter": "0,1,2,3",
+        "CounterHTOff": "0,1,2,3,4,5,6,7",
+        "EventCode": "0xF2",
+        "EventName": "L2_LINES_OUT.NON_SILENT",
+        "PublicDescription": "Counts the number of lines that are evicted by L2 cache when triggered by an L2 cache fill. Those lines can be either in modified state or clean state. Modified lines may either be written back to L3 or directly written to memory and not allocated in L3.  Clean lines may either be allocated in L3 or dropped.",
+        "SampleAfterValue": "200003",
+        "UMask": "0x2"
+    },
+    {
+        "BriefDescription": "Counts the number of lines that are silently dropped by L2 cache when triggered by an L2 cache fill. These lines are typically in Shared state. A non-threaded event.",
+        "Counter": "0,1,2,3",
+        "CounterHTOff": "0,1,2,3,4,5,6,7",
+        "EventCode": "0xF2",
+        "EventName": "L2_LINES_OUT.SILENT",
+        "SampleAfterValue": "200003",
         "UMask": "0x1"
     },
     {
+        "BriefDescription": "Counts the number of lines that have been hardware prefetched but not used and now evicted by L2 cache",
+        "Counter": "0,1,2,3",
+        "CounterHTOff": "0,1,2,3,4,5,6,7",
+        "EventCode": "0xF2",
+        "EventName": "L2_LINES_OUT.USELESS_HWPF",
+        "SampleAfterValue": "200003",
+        "UMask": "0x4"
+    },
+    {
+        "BriefDescription": "This event is deprecated. Refer to new event L2_LINES_OUT.USELESS_HWPF",
+        "Counter": "0,1,2,3",
+        "CounterHTOff": "0,1,2,3,4,5,6,7",
+        "Deprecated": "1",
+        "EventCode": "0xF2",
+        "EventName": "L2_LINES_OUT.USELESS_PREF",
+        "SampleAfterValue": "200003",
+        "UMask": "0x4"
+    },
+    {
+        "BriefDescription": "L2 code requests",
+        "Counter": "0,1,2,3",
+        "CounterHTOff": "0,1,2,3,4,5,6,7",
+        "EventCode": "0x24",
+        "EventName": "L2_RQSTS.ALL_CODE_RD",
+        "PublicDescription": "Counts the total number of L2 code requests.",
+        "SampleAfterValue": "200003",
+        "UMask": "0xe4"
+    },
+    {
+        "BriefDescription": "Demand Data Read requests",
+        "Counter": "0,1,2,3",
+        "CounterHTOff": "0,1,2,3,4,5,6,7",
+        "EventCode": "0x24",
+        "EventName": "L2_RQSTS.ALL_DEMAND_DATA_RD",
+        "PublicDescription": "Counts the number of demand Data Read requests (including requests from L1D hardware prefetchers). These loads may hit or miss L2 cache. Only non rejected loads are counted.",
+        "SampleAfterValue": "200003",
+        "UMask": "0xe1"
+    },
+    {
         "BriefDescription": "Demand requests that miss L2 cache",
         "Counter": "0,1,2,3",
         "CounterHTOff": "0,1,2,3,4,5,6,7",
@@ -66,34 +130,427 @@
         "UMask": "0x27"
     },
     {
-        "BriefDescription": "This event is deprecated. Refer to new event OCR.ALL_DATA_RD.L3_HIT_M.ANY_SNOOP",
+        "BriefDescription": "Demand requests to L2 cache",
+        "Counter": "0,1,2,3",
+        "CounterHTOff": "0,1,2,3,4,5,6,7",
+        "EventCode": "0x24",
+        "EventName": "L2_RQSTS.ALL_DEMAND_REFERENCES",
+        "PublicDescription": "Demand requests to L2 cache.",
+        "SampleAfterValue": "200003",
+        "UMask": "0xe7"
+    },
+    {
+        "BriefDescription": "Requests from the L1/L2/L3 hardware prefetchers or Load software prefetches",
+        "Counter": "0,1,2,3",
+        "CounterHTOff": "0,1,2,3,4,5,6,7",
+        "EventCode": "0x24",
+        "EventName": "L2_RQSTS.ALL_PF",
+        "PublicDescription": "Counts the total number of requests from the L2 hardware prefetchers.",
+        "SampleAfterValue": "200003",
+        "UMask": "0xf8"
+    },
+    {
+        "BriefDescription": "RFO requests to L2 cache",
+        "Counter": "0,1,2,3",
+        "CounterHTOff": "0,1,2,3,4,5,6,7",
+        "EventCode": "0x24",
+        "EventName": "L2_RQSTS.ALL_RFO",
+        "PublicDescription": "Counts the total number of RFO (read for ownership) requests to L2 cache. L2 RFO requests include both L1D demand RFO misses as well as L1D RFO prefetches.",
+        "SampleAfterValue": "200003",
+        "UMask": "0xe2"
+    },
+    {
+        "BriefDescription": "L2 cache hits when fetching instructions, code reads.",
+        "Counter": "0,1,2,3",
+        "CounterHTOff": "0,1,2,3,4,5,6,7",
+        "EventCode": "0x24",
+        "EventName": "L2_RQSTS.CODE_RD_HIT",
+        "PublicDescription": "Counts L2 cache hits when fetching instructions, code reads.",
+        "SampleAfterValue": "200003",
+        "UMask": "0xc4"
+    },
+    {
+        "BriefDescription": "L2 cache misses when fetching instructions",
+        "Counter": "0,1,2,3",
+        "CounterHTOff": "0,1,2,3,4,5,6,7",
+        "EventCode": "0x24",
+        "EventName": "L2_RQSTS.CODE_RD_MISS",
+        "PublicDescription": "Counts L2 cache misses when fetching instructions.",
+        "SampleAfterValue": "200003",
+        "UMask": "0x24"
+    },
+    {
+        "BriefDescription": "Demand Data Read requests that hit L2 cache",
+        "Counter": "0,1,2,3",
+        "CounterHTOff": "0,1,2,3,4,5,6,7",
+        "EventCode": "0x24",
+        "EventName": "L2_RQSTS.DEMAND_DATA_RD_HIT",
+        "PublicDescription": "Counts the number of demand Data Read requests, initiated by load instructions, that hit L2 cache",
+        "SampleAfterValue": "200003",
+        "UMask": "0xc1"
+    },
+    {
+        "BriefDescription": "Demand Data Read miss L2, no rejects",
+        "Counter": "0,1,2,3",
+        "CounterHTOff": "0,1,2,3,4,5,6,7",
+        "EventCode": "0x24",
+        "EventName": "L2_RQSTS.DEMAND_DATA_RD_MISS",
+        "PublicDescription": "Counts the number of demand Data Read requests that miss L2 cache. Only not rejected loads are counted.",
+        "SampleAfterValue": "200003",
+        "UMask": "0x21"
+    },
+    {
+        "BriefDescription": "All requests that miss L2 cache",
+        "Counter": "0,1,2,3",
+        "CounterHTOff": "0,1,2,3,4,5,6,7",
+        "EventCode": "0x24",
+        "EventName": "L2_RQSTS.MISS",
+        "PublicDescription": "All requests that miss L2 cache.",
+        "SampleAfterValue": "200003",
+        "UMask": "0x3f"
+    },
+    {
+        "BriefDescription": "Requests from the L1/L2/L3 hardware prefetchers or Load software prefetches that hit L2 cache",
+        "Counter": "0,1,2,3",
+        "CounterHTOff": "0,1,2,3,4,5,6,7",
+        "EventCode": "0x24",
+        "EventName": "L2_RQSTS.PF_HIT",
+        "PublicDescription": "Counts requests from the L1/L2/L3 hardware prefetchers or Load software prefetches that hit L2 cache.",
+        "SampleAfterValue": "200003",
+        "UMask": "0xd8"
+    },
+    {
+        "BriefDescription": "Requests from the L1/L2/L3 hardware prefetchers or Load software prefetches that miss L2 cache",
+        "Counter": "0,1,2,3",
+        "CounterHTOff": "0,1,2,3,4,5,6,7",
+        "EventCode": "0x24",
+        "EventName": "L2_RQSTS.PF_MISS",
+        "PublicDescription": "Counts requests from the L1/L2/L3 hardware prefetchers or Load software prefetches that miss L2 cache.",
+        "SampleAfterValue": "200003",
+        "UMask": "0x38"
+    },
+    {
+        "BriefDescription": "All L2 requests",
+        "Counter": "0,1,2,3",
+        "CounterHTOff": "0,1,2,3,4,5,6,7",
+        "EventCode": "0x24",
+        "EventName": "L2_RQSTS.REFERENCES",
+        "PublicDescription": "All L2 requests.",
+        "SampleAfterValue": "200003",
+        "UMask": "0xff"
+    },
+    {
+        "BriefDescription": "RFO requests that hit L2 cache",
+        "Counter": "0,1,2,3",
+        "CounterHTOff": "0,1,2,3,4,5,6,7",
+        "EventCode": "0x24",
+        "EventName": "L2_RQSTS.RFO_HIT",
+        "PublicDescription": "Counts the RFO (Read-for-Ownership) requests that hit L2 cache.",
+        "SampleAfterValue": "200003",
+        "UMask": "0xc2"
+    },
+    {
+        "BriefDescription": "RFO requests that miss L2 cache",
+        "Counter": "0,1,2,3",
+        "CounterHTOff": "0,1,2,3,4,5,6,7",
+        "EventCode": "0x24",
+        "EventName": "L2_RQSTS.RFO_MISS",
+        "PublicDescription": "Counts the RFO (Read-for-Ownership) requests that miss L2 cache.",
+        "SampleAfterValue": "200003",
+        "UMask": "0x22"
+    },
+    {
+        "BriefDescription": "L2 writebacks that access L2 cache",
+        "Counter": "0,1,2,3",
+        "CounterHTOff": "0,1,2,3,4,5,6,7",
+        "EventCode": "0xF0",
+        "EventName": "L2_TRANS.L2_WB",
+        "PublicDescription": "Counts L2 writebacks that access L2 cache.",
+        "SampleAfterValue": "200003",
+        "UMask": "0x40"
+    },
+    {
+        "BriefDescription": "Core-originated cacheable demand requests missed L3",
+        "Counter": "0,1,2,3",
+        "CounterHTOff": "0,1,2,3,4,5,6,7",
+        "Errata": "SKL057",
+        "EventCode": "0x2E",
+        "EventName": "LONGEST_LAT_CACHE.MISS",
+        "PublicDescription": "Counts core-originated cacheable requests that miss the L3 cache (Longest Latency cache). Requests include data and code reads, Reads-for-Ownership (RFOs), speculative accesses and hardware prefetches from L1 and L2. It does not include all misses to the L3.",
+        "SampleAfterValue": "100003",
+        "UMask": "0x41"
+    },
+    {
+        "BriefDescription": "Core-originated cacheable demand requests that refer to L3",
+        "Counter": "0,1,2,3",
+        "CounterHTOff": "0,1,2,3,4,5,6,7",
+        "Errata": "SKL057",
+        "EventCode": "0x2E",
+        "EventName": "LONGEST_LAT_CACHE.REFERENCE",
+        "PublicDescription": "Counts core-originated cacheable requests to the  L3 cache (Longest Latency cache). Requests include data and code reads, Reads-for-Ownership (RFOs), speculative accesses and hardware prefetches from L1 and L2.  It does not include all accesses to the L3.",
+        "SampleAfterValue": "100003",
+        "UMask": "0x4f"
+    },
+    {
+        "BriefDescription": "All retired load instructions.",
         "Counter": "0,1,2,3",
         "CounterHTOff": "0,1,2,3",
-        "Deprecated": "1",
-        "EventCode": "0xB7, 0xBB",
-        "EventName": "OFFCORE_RESPONSE.ALL_DATA_RD.L3_HIT_M.ANY_SNOOP",
-        "MSRIndex": "0x1a6,0x1a7",
-        "MSRValue": "0x3F80040491",
-        "Offcore": "1",
-        "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
+        "Data_LA": "1",
+        "EventCode": "0xD0",
+        "EventName": "MEM_INST_RETIRED.ALL_LOADS",
+        "PEBS": "1",
+        "SampleAfterValue": "2000003",
+        "UMask": "0x81"
+    },
+    {
+        "BriefDescription": "All retired store instructions.",
+        "Counter": "0,1,2,3",
+        "CounterHTOff": "0,1,2,3",
+        "Data_LA": "1",
+        "EventCode": "0xD0",
+        "EventName": "MEM_INST_RETIRED.ALL_STORES",
+        "L1_Hit_Indication": "1",
+        "PEBS": "1",
+        "SampleAfterValue": "2000003",
+        "UMask": "0x82"
+    },
+    {
+        "BriefDescription": "Retired load instructions with locked access.",
+        "Counter": "0,1,2,3",
+        "CounterHTOff": "0,1,2,3",
+        "Data_LA": "1",
+        "EventCode": "0xD0",
+        "EventName": "MEM_INST_RETIRED.LOCK_LOADS",
+        "PEBS": "1",
+        "SampleAfterValue": "100007",
+        "UMask": "0x21"
+    },
+    {
+        "BriefDescription": "Retired load instructions that split across a cacheline boundary.",
+        "Counter": "0,1,2,3",
+        "CounterHTOff": "0,1,2,3",
+        "Data_LA": "1",
+        "EventCode": "0xD0",
+        "EventName": "MEM_INST_RETIRED.SPLIT_LOADS",
+        "PEBS": "1",
+        "PublicDescription": "Counts retired load instructions that split across a cacheline boundary.",
         "SampleAfterValue": "100003",
+        "UMask": "0x41"
+    },
+    {
+        "BriefDescription": "Retired store instructions that split across a cacheline boundary.",
+        "Counter": "0,1,2,3",
+        "CounterHTOff": "0,1,2,3",
+        "Data_LA": "1",
+        "EventCode": "0xD0",
+        "EventName": "MEM_INST_RETIRED.SPLIT_STORES",
+        "L1_Hit_Indication": "1",
+        "PEBS": "1",
+        "PublicDescription": "Counts retired store instructions that split across a cacheline boundary.",
+        "SampleAfterValue": "100003",
+        "UMask": "0x42"
+    },
+    {
+        "BriefDescription": "Retired load instructions that miss the STLB.",
+        "Counter": "0,1,2,3",
+        "CounterHTOff": "0,1,2,3",
+        "Data_LA": "1",
+        "EventCode": "0xD0",
+        "EventName": "MEM_INST_RETIRED.STLB_MISS_LOADS",
+        "PEBS": "1",
+        "SampleAfterValue": "100003",
+        "UMask": "0x11"
+    },
+    {
+        "BriefDescription": "Retired store instructions that miss the STLB.",
+        "Counter": "0,1,2,3",
+        "CounterHTOff": "0,1,2,3",
+        "Data_LA": "1",
+        "EventCode": "0xD0",
+        "EventName": "MEM_INST_RETIRED.STLB_MISS_STORES",
+        "L1_Hit_Indication": "1",
+        "PEBS": "1",
+        "SampleAfterValue": "100003",
+        "UMask": "0x12"
+    },
+    {
+        "BriefDescription": "Retired load instructions which data sources were L3 and cross-core snoop hits in on-pkg core cache",
+        "Counter": "0,1,2,3",
+        "CounterHTOff": "0,1,2,3",
+        "Data_LA": "1",
+        "EventCode": "0xD2",
+        "EventName": "MEM_LOAD_L3_HIT_RETIRED.XSNP_HIT",
+        "PEBS": "1",
+        "PublicDescription": "Retired load instructions which data sources were L3 and cross-core snoop hits in on-pkg core cache.",
+        "SampleAfterValue": "20011",
+        "UMask": "0x2"
+    },
+    {
+        "BriefDescription": "Retired load instructions which data sources were HitM responses from shared L3",
+        "Counter": "0,1,2,3",
+        "CounterHTOff": "0,1,2,3",
+        "Data_LA": "1",
+        "EventCode": "0xD2",
+        "EventName": "MEM_LOAD_L3_HIT_RETIRED.XSNP_HITM",
+        "PEBS": "1",
+        "PublicDescription": "Retired load instructions which data sources were HitM responses from shared L3.",
+        "SampleAfterValue": "20011",
+        "UMask": "0x4"
+    },
+    {
+        "BriefDescription": "Retired load instructions which data sources were L3 hit and cross-core snoop missed in on-pkg core cache.",
+        "Counter": "0,1,2,3",
+        "CounterHTOff": "0,1,2,3",
+        "Data_LA": "1",
+        "EventCode": "0xD2",
+        "EventName": "MEM_LOAD_L3_HIT_RETIRED.XSNP_MISS",
+        "PEBS": "1",
+        "SampleAfterValue": "20011",
         "UMask": "0x1"
     },
     {
-        "BriefDescription": "This event is deprecated. Refer to new event OCR.PF_L2_DATA_RD.L3_HIT_M.SNOOP_MISS",
+        "BriefDescription": "Retired load instructions which data sources were hits in L3 without snoops required",
         "Counter": "0,1,2,3",
         "CounterHTOff": "0,1,2,3",
-        "Deprecated": "1",
-        "EventCode": "0xB7, 0xBB",
-        "EventName": "OFFCORE_RESPONSE.PF_L2_DATA_RD.L3_HIT_M.SNOOP_MISS",
-        "MSRIndex": "0x1a6,0x1a7",
-        "MSRValue": "0x0200040010",
-        "Offcore": "1",
-        "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
+        "Data_LA": "1",
+        "EventCode": "0xD2",
+        "EventName": "MEM_LOAD_L3_HIT_RETIRED.XSNP_NONE",
+        "PEBS": "1",
+        "PublicDescription": "Retired load instructions which data sources were hits in L3 without snoops required.",
         "SampleAfterValue": "100003",
+        "UMask": "0x8"
+    },
+    {
+        "BriefDescription": "Retired load instructions which data sources missed L3 but serviced from local dram",
+        "Counter": "0,1,2,3",
+        "CounterHTOff": "0,1,2,3",
+        "Data_LA": "1",
+        "EventCode": "0xD3",
+        "EventName": "MEM_LOAD_L3_MISS_RETIRED.LOCAL_DRAM",
+        "PEBS": "1",
+        "PublicDescription": "Retired load instructions which data sources missed L3 but serviced from local DRAM.",
+        "SampleAfterValue": "100007",
         "UMask": "0x1"
     },
     {
+        "BriefDescription": "Retired load instructions which data sources missed L3 but serviced from remote dram",
+        "Counter": "0,1,2,3",
+        "CounterHTOff": "0,1,2,3",
+        "Data_LA": "1",
+        "EventCode": "0xD3",
+        "EventName": "MEM_LOAD_L3_MISS_RETIRED.REMOTE_DRAM",
+        "PEBS": "1",
+        "SampleAfterValue": "100007",
+        "UMask": "0x2"
+    },
+    {
+        "BriefDescription": "Retired load instructions whose data sources was forwarded from a remote cache",
+        "Counter": "0,1,2,3",
+        "CounterHTOff": "0,1,2,3",
+        "Data_LA": "1",
+        "EventCode": "0xD3",
+        "EventName": "MEM_LOAD_L3_MISS_RETIRED.REMOTE_FWD",
+        "PublicDescription": "Retired load instructions whose data sources was forwarded from a remote cache.",
+        "SampleAfterValue": "100007",
+        "UMask": "0x8"
+    },
+    {
+        "BriefDescription": "Retired load instructions whose data sources was remote HITM",
+        "Counter": "0,1,2,3",
+        "CounterHTOff": "0,1,2,3",
+        "Data_LA": "1",
+        "EventCode": "0xD3",
+        "EventName": "MEM_LOAD_L3_MISS_RETIRED.REMOTE_HITM",
+        "PEBS": "1",
+        "PublicDescription": "Retired load instructions whose data sources was remote HITM.",
+        "SampleAfterValue": "100007",
+        "UMask": "0x4"
+    },
+    {
+        "BriefDescription": "Retired load instructions with remote Intel Optane DC persistent memory as the data source where the data request missed all caches. Precise event.",
+        "Counter": "0,1,2,3",
+        "CounterHTOff": "0,1,2,3",
+        "Data_LA": "1",
+        "ELLC": "1",
+        "EventCode": "0xD3",
+        "EventName": "MEM_LOAD_L3_MISS_RETIRED.REMOTE_PMM",
+        "PEBS": "1",
+        "PublicDescription": "Counts retired load instructions with remote Intel Optane DC persistent memory as the data source and the data request missed L3 (AppDirect or Memory Mode) and DRAM cache(Memory Mode). Precise event",
+        "SampleAfterValue": "100007",
+        "UMask": "0x10"
+    },
+    {
+        "BriefDescription": "Retired instructions with at least 1 uncacheable load or lock.",
+        "Counter": "0,1,2,3",
+        "CounterHTOff": "0,1,2,3",
+        "Data_LA": "1",
+        "EventCode": "0xD4",
+        "EventName": "MEM_LOAD_MISC_RETIRED.UC",
+        "PEBS": "1",
+        "SampleAfterValue": "100007",
+        "UMask": "0x4"
+    },
+    {
+        "BriefDescription": "Retired load instructions which data sources were load missed L1 but hit FB due to preceding miss to the same cache line with data not ready",
+        "Counter": "0,1,2,3",
+        "CounterHTOff": "0,1,2,3",
+        "Data_LA": "1",
+        "EventCode": "0xD1",
+        "EventName": "MEM_LOAD_RETIRED.FB_HIT",
+        "PEBS": "1",
+        "PublicDescription": "Counts retired load instructions with at least one uop was load missed in L1 but hit FB (Fill Buffers) due to preceding miss to the same cache line with data not ready.",
+        "SampleAfterValue": "100007",
+        "UMask": "0x40"
+    },
+    {
+        "BriefDescription": "Retired load instructions with L1 cache hits as data sources",
+        "Counter": "0,1,2,3",
+        "CounterHTOff": "0,1,2,3",
+        "Data_LA": "1",
+        "EventCode": "0xD1",
+        "EventName": "MEM_LOAD_RETIRED.L1_HIT",
+        "PEBS": "1",
+        "PublicDescription": "Counts retired load instructions with at least one uop that hit in the L1 data cache. This event includes all SW prefetches and lock instructions regardless of the data source.",
+        "SampleAfterValue": "2000003",
+        "UMask": "0x1"
+    },
+    {
+        "BriefDescription": "Retired load instructions missed L1 cache as data sources",
+        "Counter": "0,1,2,3",
+        "CounterHTOff": "0,1,2,3",
+        "Data_LA": "1",
+        "EventCode": "0xD1",
+        "EventName": "MEM_LOAD_RETIRED.L1_MISS",
+        "PEBS": "1",
+        "PublicDescription": "Counts retired load instructions with at least one uop that missed in the L1 cache.",
+        "SampleAfterValue": "100003",
+        "UMask": "0x8"
+    },
+    {
+        "BriefDescription": "Retired load instructions with L2 cache hits as data sources",
+        "Counter": "0,1,2,3",
+        "CounterHTOff": "0,1,2,3",
+        "Data_LA": "1",
+        "EventCode": "0xD1",
+        "EventName": "MEM_LOAD_RETIRED.L2_HIT",
+        "PEBS": "1",
+        "PublicDescription": "Retired load instructions with L2 cache hits as data sources.",
+        "SampleAfterValue": "100003",
+        "UMask": "0x2"
+    },
+    {
+        "BriefDescription": "Retired load instructions missed L2 cache as data sources",
+        "Counter": "0,1,2,3",
+        "CounterHTOff": "0,1,2,3",
+        "Data_LA": "1",
+        "EventCode": "0xD1",
+        "EventName": "MEM_LOAD_RETIRED.L2_MISS",
+        "PEBS": "1",
+        "PublicDescription": "Retired load instructions missed L2 cache as data sources.",
+        "SampleAfterValue": "50021",
+        "UMask": "0x10"
+    },
+    {
         "BriefDescription": "Retired load instructions with L3 cache hits as data sources",
         "Counter": "0,1,2,3",
         "CounterHTOff": "0,1,2,3",
@@ -106,2157 +563,2121 @@
         "UMask": "0x4"
     },
     {
-        "BriefDescription": "This event is deprecated. Refer to new event OCR.DEMAND_DATA_RD.PMM_HIT_LOCAL_PMM.ANY_SNOOP",
+        "BriefDescription": "Retired load instructions missed L3 cache as data sources",
         "Counter": "0,1,2,3",
         "CounterHTOff": "0,1,2,3",
-        "Deprecated": "1",
-        "EventCode": "0xB7, 0xBB",
-        "EventName": "OFFCORE_RESPONSE.DEMAND_DATA_RD.PMM_HIT_LOCAL_PMM.ANY_SNOOP",
-        "MSRIndex": "0x1a6,0x1a7",
-        "MSRValue": "0x3F80400001",
-        "Offcore": "1",
-        "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
+        "Data_LA": "1",
+        "EventCode": "0xD1",
+        "EventName": "MEM_LOAD_RETIRED.L3_MISS",
+        "PEBS": "1",
+        "PublicDescription": "Counts retired load instructions with at least one uop that missed in the L3 cache.",
+        "SampleAfterValue": "100007",
+        "UMask": "0x20"
+    },
+    {
+        "BriefDescription": "Retired load instructions with local Intel Optane DC persistent memory as the data source where the data request missed all caches. Precise event.",
+        "Counter": "0,1,2,3",
+        "CounterHTOff": "0,1,2,3",
+        "Data_LA": "1",
+        "ELLC": "1",
+        "EventCode": "0xD1",
+        "EventName": "MEM_LOAD_RETIRED.LOCAL_PMM",
+        "PEBS": "1",
+        "PublicDescription": "Counts retired load instructions with local Intel Optane DC persistent memory as the data source and the data request missed L3 (AppDirect or Memory Mode) and DRAM cache(Memory Mode). Precise event",
+        "SampleAfterValue": "100003",
+        "UMask": "0x80"
+    },
+    {
+        "BriefDescription": "Demand and prefetch data reads",
+        "Counter": "0,1,2,3",
+        "CounterHTOff": "0,1,2,3,4,5,6,7",
+        "EventCode": "0xB0",
+        "EventName": "OFFCORE_REQUESTS.ALL_DATA_RD",
+        "PublicDescription": "Counts the demand and prefetch data reads. All Core Data Reads include cacheable 'Demands' and L2 prefetchers (not L3 prefetchers). Counting also covers reads due to page walks resulted from any request type.",
+        "SampleAfterValue": "100003",
+        "UMask": "0x8"
+    },
+    {
+        "BriefDescription": "Any memory transaction that reached the SQ.",
+        "Counter": "0,1,2,3",
+        "CounterHTOff": "0,1,2,3,4,5,6,7",
+        "EventCode": "0xB0",
+        "EventName": "OFFCORE_REQUESTS.ALL_REQUESTS",
+        "PublicDescription": "Counts memory transactions reached the super queue including requests initiated by the core, all L3 prefetches, page walks, etc..",
+        "SampleAfterValue": "100003",
+        "UMask": "0x80"
+    },
+    {
+        "BriefDescription": "Cacheable and noncachaeble code read requests",
+        "Counter": "0,1,2,3",
+        "CounterHTOff": "0,1,2,3,4,5,6,7",
+        "EventCode": "0xB0",
+        "EventName": "OFFCORE_REQUESTS.DEMAND_CODE_RD",
+        "PublicDescription": "Counts both cacheable and non-cacheable code read requests.",
+        "SampleAfterValue": "100003",
+        "UMask": "0x2"
+    },
+    {
+        "BriefDescription": "Demand Data Read requests sent to uncore",
+        "Counter": "0,1,2,3",
+        "CounterHTOff": "0,1,2,3,4,5,6,7",
+        "EventCode": "0xB0",
+        "EventName": "OFFCORE_REQUESTS.DEMAND_DATA_RD",
+        "PublicDescription": "Counts the Demand Data Read requests sent to uncore. Use it in conjunction with OFFCORE_REQUESTS_OUTSTANDING to determine average latency in the uncore.",
         "SampleAfterValue": "100003",
         "UMask": "0x1"
     },
     {
-        "BriefDescription": "This event is deprecated. Refer to new event OCR.ALL_DATA_RD.L3_HIT.HIT_OTHER_CORE_FWD",
+        "BriefDescription": "Demand RFO requests including regular RFOs, locks, ItoM",
         "Counter": "0,1,2,3",
-        "CounterHTOff": "0,1,2,3",
-        "Deprecated": "1",
-        "EventCode": "0xB7, 0xBB",
-        "EventName": "OFFCORE_RESPONSE.ALL_DATA_RD.L3_HIT.HIT_OTHER_CORE_FWD",
-        "MSRIndex": "0x1a6,0x1a7",
-        "MSRValue": "0x08003C0491",
-        "Offcore": "1",
-        "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
+        "CounterHTOff": "0,1,2,3,4,5,6,7",
+        "EventCode": "0xB0",
+        "EventName": "OFFCORE_REQUESTS.DEMAND_RFO",
+        "PublicDescription": "Counts the demand RFO (read for ownership) requests including regular RFOs, locks, ItoM.",
         "SampleAfterValue": "100003",
+        "UMask": "0x4"
+    },
+    {
+        "BriefDescription": "Offcore requests buffer cannot take more entries for this thread core.",
+        "Counter": "0,1,2,3",
+        "CounterHTOff": "0,1,2,3,4,5,6,7",
+        "EventCode": "0xB2",
+        "EventName": "OFFCORE_REQUESTS_BUFFER.SQ_FULL",
+        "PublicDescription": "Counts the number of cases when the offcore requests buffer cannot take more entries for the core. This can happen when the superqueue does not contain eligible entries, or when L1D writeback pending FIFO requests is full.Note: Writeback pending FIFO has six entries.",
+        "SampleAfterValue": "2000003",
         "UMask": "0x1"
     },
     {
-        "BriefDescription": "L2 writebacks that access L2 cache",
+        "BriefDescription": "Offcore outstanding cacheable Core Data Read transactions in SuperQueue (SQ), queue to uncore",
         "Counter": "0,1,2,3",
         "CounterHTOff": "0,1,2,3,4,5,6,7",
-        "EventCode": "0xF0",
-        "EventName": "L2_TRANS.L2_WB",
-        "PublicDescription": "Counts L2 writebacks that access L2 cache.",
-        "SampleAfterValue": "200003",
-        "UMask": "0x40"
+        "EventCode": "0x60",
+        "EventName": "OFFCORE_REQUESTS_OUTSTANDING.ALL_DATA_RD",
+        "PublicDescription": "Counts the number of offcore outstanding cacheable Core Data Read transactions in the super queue every cycle. A transaction is considered to be in the Offcore outstanding state between L2 miss and transaction completion sent to requestor (SQ de-allocation). See corresponding Umask under OFFCORE_REQUESTS.",
+        "SampleAfterValue": "2000003",
+        "UMask": "0x8"
     },
     {
-        "BriefDescription": "L2 cache lines filling L2",
+        "BriefDescription": "Cycles when offcore outstanding cacheable Core Data Read transactions are present in SuperQueue (SQ), queue to uncore.",
         "Counter": "0,1,2,3",
         "CounterHTOff": "0,1,2,3,4,5,6,7",
-        "EventCode": "0xF1",
-        "EventName": "L2_LINES_IN.ALL",
-        "PublicDescription": "Counts the number of L2 cache lines filling the L2. Counting does not cover rejects.",
-        "SampleAfterValue": "100003",
-        "UMask": "0x1f"
+        "CounterMask": "1",
+        "EventCode": "0x60",
+        "EventName": "OFFCORE_REQUESTS_OUTSTANDING.CYCLES_WITH_DATA_RD",
+        "PublicDescription": "Counts cycles when offcore outstanding cacheable Core Data Read transactions are present in the super queue. A transaction is considered to be in the Offcore outstanding state between L2 miss and transaction completion sent to requestor (SQ de-allocation). See corresponding Umask under OFFCORE_REQUESTS.",
+        "SampleAfterValue": "2000003",
+        "UMask": "0x8"
     },
     {
-        "BriefDescription": "This event is deprecated. Refer to new event OCR.DEMAND_RFO.L3_HIT_F.SNOOP_NONE",
+        "BriefDescription": "Cycles with offcore outstanding Code Reads transactions in the SuperQueue (SQ), queue to uncore.",
+        "Counter": "0,1,2,3",
+        "CounterHTOff": "0,1,2,3,4,5,6,7",
+        "CounterMask": "1",
+        "EventCode": "0x60",
+        "EventName": "OFFCORE_REQUESTS_OUTSTANDING.CYCLES_WITH_DEMAND_CODE_RD",
+        "PublicDescription": "Counts the number of offcore outstanding Code Reads transactions in the super queue every cycle. The 'Offcore outstanding' state of the transaction lasts from the L2 miss until the sending transaction completion to requestor (SQ deallocation). See the corresponding Umask under OFFCORE_REQUESTS.",
+        "SampleAfterValue": "2000003",
+        "UMask": "0x2"
+    },
+    {
+        "BriefDescription": "Cycles when offcore outstanding Demand Data Read transactions are present in SuperQueue (SQ), queue to uncore",
+        "Counter": "0,1,2,3",
+        "CounterHTOff": "0,1,2,3,4,5,6,7",
+        "CounterMask": "1",
+        "EventCode": "0x60",
+        "EventName": "OFFCORE_REQUESTS_OUTSTANDING.CYCLES_WITH_DEMAND_DATA_RD",
+        "PublicDescription": "Counts cycles when offcore outstanding Demand Data Read transactions are present in the super queue (SQ). A transaction is considered to be in the Offcore outstanding state between L2 miss and transaction completion sent to requestor (SQ de-allocation).",
+        "SampleAfterValue": "2000003",
+        "UMask": "0x1"
+    },
+    {
+        "BriefDescription": "Cycles with offcore outstanding demand rfo reads transactions in SuperQueue (SQ), queue to uncore.",
+        "Counter": "0,1,2,3",
+        "CounterHTOff": "0,1,2,3,4,5,6,7",
+        "CounterMask": "1",
+        "EventCode": "0x60",
+        "EventName": "OFFCORE_REQUESTS_OUTSTANDING.CYCLES_WITH_DEMAND_RFO",
+        "PublicDescription": "Counts the number of offcore outstanding demand rfo Reads transactions in the super queue every cycle. The 'Offcore outstanding' state of the transaction lasts from the L2 miss until the sending transaction completion to requestor (SQ deallocation). See the corresponding Umask under OFFCORE_REQUESTS.",
+        "SampleAfterValue": "2000003",
+        "UMask": "0x4"
+    },
+    {
+        "BriefDescription": "Offcore outstanding Code Reads transactions in the SuperQueue (SQ), queue to uncore, every cycle.",
+        "Counter": "0,1,2,3",
+        "CounterHTOff": "0,1,2,3,4,5,6,7",
+        "EventCode": "0x60",
+        "EventName": "OFFCORE_REQUESTS_OUTSTANDING.DEMAND_CODE_RD",
+        "PublicDescription": "Counts the number of offcore outstanding Code Reads transactions in the super queue every cycle. The 'Offcore outstanding' state of the transaction lasts from the L2 miss until the sending transaction completion to requestor (SQ deallocation). See the corresponding Umask under OFFCORE_REQUESTS.",
+        "SampleAfterValue": "2000003",
+        "UMask": "0x2"
+    },
+    {
+        "BriefDescription": "Offcore outstanding Demand Data Read transactions in uncore queue.",
+        "Counter": "0,1,2,3",
+        "CounterHTOff": "0,1,2,3,4,5,6,7",
+        "EventCode": "0x60",
+        "EventName": "OFFCORE_REQUESTS_OUTSTANDING.DEMAND_DATA_RD",
+        "PublicDescription": "Counts the number of offcore outstanding Demand Data Read transactions in the super queue (SQ) every cycle. A transaction is considered to be in the Offcore outstanding state between L2 miss and transaction completion sent to requestor. See the corresponding Umask under OFFCORE_REQUESTS.Note: A prefetch promoted to Demand is counted from the promotion point.",
+        "SampleAfterValue": "2000003",
+        "UMask": "0x1"
+    },
+    {
+        "BriefDescription": "Cycles with at least 6 offcore outstanding Demand Data Read transactions in uncore queue.",
+        "Counter": "0,1,2,3",
+        "CounterHTOff": "0,1,2,3,4,5,6,7",
+        "CounterMask": "6",
+        "EventCode": "0x60",
+        "EventName": "OFFCORE_REQUESTS_OUTSTANDING.DEMAND_DATA_RD_GE_6",
+        "SampleAfterValue": "2000003",
+        "UMask": "0x1"
+    },
+    {
+        "BriefDescription": "Offcore outstanding demand rfo reads transactions in SuperQueue (SQ), queue to uncore, every cycle",
+        "Counter": "0,1,2,3",
+        "CounterHTOff": "0,1,2,3,4,5,6,7",
+        "EventCode": "0x60",
+        "EventName": "OFFCORE_REQUESTS_OUTSTANDING.DEMAND_RFO",
+        "PublicDescription": "Counts the number of offcore outstanding RFO (store) transactions in the super queue (SQ) every cycle. A transaction is considered to be in the Offcore outstanding state between L2 miss and transaction completion sent to requestor (SQ de-allocation). See corresponding Umask under OFFCORE_REQUESTS.",
+        "SampleAfterValue": "2000003",
+        "UMask": "0x4"
+    },
+    {
+        "BriefDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction",
         "Counter": "0,1,2,3",
         "CounterHTOff": "0,1,2,3",
-        "Deprecated": "1",
         "EventCode": "0xB7, 0xBB",
-        "EventName": "OFFCORE_RESPONSE.DEMAND_RFO.L3_HIT_F.SNOOP_NONE",
-        "MSRIndex": "0x1a6,0x1a7",
-        "MSRValue": "0x0080200002",
-        "Offcore": "1",
+        "EventName": "OFFCORE_RESPONSE",
         "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
         "SampleAfterValue": "100003",
         "UMask": "0x1"
     },
     {
-        "BriefDescription": "This event is deprecated. Refer to new event OCR.PF_L2_DATA_RD.L3_HIT.SNOOP_HIT_WITH_FWD",
+        "BriefDescription": "This event is deprecated. Refer to new event OCR.ALL_DATA_RD.ANY_RESPONSE",
         "Counter": "0,1,2,3",
         "CounterHTOff": "0,1,2,3",
         "Deprecated": "1",
         "EventCode": "0xB7, 0xBB",
-        "EventName": "OFFCORE_RESPONSE.PF_L2_DATA_RD.L3_HIT.SNOOP_HIT_WITH_FWD",
+        "EventName": "OFFCORE_RESPONSE.ALL_DATA_RD.ANY_RESPONSE",
         "MSRIndex": "0x1a6,0x1a7",
-        "MSRValue": "0x08007C0010",
+        "MSRValue": "0x0000010491",
         "Offcore": "1",
         "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
         "SampleAfterValue": "100003",
         "UMask": "0x1"
     },
     {
-        "BriefDescription": "This event is deprecated. Refer to new event OCR.DEMAND_CODE_RD.SUPPLIER_NONE.ANY_SNOOP",
+        "BriefDescription": "This event is deprecated. Refer to new event OCR.ALL_DATA_RD.L3_HIT.ANY_SNOOP",
         "Counter": "0,1,2,3",
         "CounterHTOff": "0,1,2,3",
         "Deprecated": "1",
         "EventCode": "0xB7, 0xBB",
-        "EventName": "OFFCORE_RESPONSE.DEMAND_CODE_RD.SUPPLIER_NONE.ANY_SNOOP",
+        "EventName": "OFFCORE_RESPONSE.ALL_DATA_RD.L3_HIT.ANY_SNOOP",
         "MSRIndex": "0x1a6,0x1a7",
-        "MSRValue": "0x3F80020004",
+        "MSRValue": "0x3F803C0491",
         "Offcore": "1",
         "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
         "SampleAfterValue": "100003",
         "UMask": "0x1"
     },
     {
-        "BriefDescription": "This event is deprecated. Refer to new event OCR.PF_L3_DATA_RD.L3_HIT_M.SNOOP_NONE",
+        "BriefDescription": "This event is deprecated. Refer to new event OCR.ALL_DATA_RD.L3_HIT.HITM_OTHER_CORE",
         "Counter": "0,1,2,3",
         "CounterHTOff": "0,1,2,3",
         "Deprecated": "1",
         "EventCode": "0xB7, 0xBB",
-        "EventName": "OFFCORE_RESPONSE.PF_L3_DATA_RD.L3_HIT_M.SNOOP_NONE",
+        "EventName": "OFFCORE_RESPONSE.ALL_DATA_RD.L3_HIT.HITM_OTHER_CORE",
         "MSRIndex": "0x1a6,0x1a7",
-        "MSRValue": "0x0080040080",
+        "MSRValue": "0x10003C0491",
         "Offcore": "1",
         "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
         "SampleAfterValue": "100003",
         "UMask": "0x1"
     },
     {
-        "BriefDescription": "This event is deprecated. Refer to new event OCR.ALL_READS.L3_HIT_E.HIT_OTHER_CORE_FWD",
+        "BriefDescription": "This event is deprecated. Refer to new event OCR.ALL_DATA_RD.L3_HIT.HIT_OTHER_CORE_FWD",
         "Counter": "0,1,2,3",
         "CounterHTOff": "0,1,2,3",
         "Deprecated": "1",
         "EventCode": "0xB7, 0xBB",
-        "EventName": "OFFCORE_RESPONSE.ALL_READS.L3_HIT_E.HIT_OTHER_CORE_FWD",
+        "EventName": "OFFCORE_RESPONSE.ALL_DATA_RD.L3_HIT.HIT_OTHER_CORE_FWD",
         "MSRIndex": "0x1a6,0x1a7",
-        "MSRValue": "0x08000807F7",
+        "MSRValue": "0x08003C0491",
         "Offcore": "1",
         "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
         "SampleAfterValue": "100003",
         "UMask": "0x1"
     },
     {
-        "BriefDescription": "Counts the number of lines that are silently dropped by L2 cache when triggered by an L2 cache fill. These lines are typically in Shared state. A non-threaded event.",
-        "Counter": "0,1,2,3",
-        "CounterHTOff": "0,1,2,3,4,5,6,7",
-        "EventCode": "0xF2",
-        "EventName": "L2_LINES_OUT.SILENT",
-        "SampleAfterValue": "200003",
-        "UMask": "0x1"
-    },
-    {
-        "BriefDescription": "This event is deprecated. Refer to new event OCR.PF_L3_DATA_RD.L3_HIT_E.NO_SNOOP_NEEDED",
+        "BriefDescription": "This event is deprecated. Refer to new event OCR.ALL_DATA_RD.L3_HIT.HIT_OTHER_CORE_NO_FWD",
         "Counter": "0,1,2,3",
         "CounterHTOff": "0,1,2,3",
         "Deprecated": "1",
         "EventCode": "0xB7, 0xBB",
-        "EventName": "OFFCORE_RESPONSE.PF_L3_DATA_RD.L3_HIT_E.NO_SNOOP_NEEDED",
+        "EventName": "OFFCORE_RESPONSE.ALL_DATA_RD.L3_HIT.HIT_OTHER_CORE_NO_FWD",
         "MSRIndex": "0x1a6,0x1a7",
-        "MSRValue": "0x0100080080",
+        "MSRValue": "0x04003C0491",
         "Offcore": "1",
         "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
         "SampleAfterValue": "100003",
         "UMask": "0x1"
     },
     {
-        "BriefDescription": "This event is deprecated. Refer to new event OCR.PF_L2_RFO.L3_HIT_S.SNOOP_NONE",
+        "BriefDescription": "This event is deprecated. Refer to new event OCR.ALL_DATA_RD.L3_HIT.NO_SNOOP_NEEDED",
         "Counter": "0,1,2,3",
         "CounterHTOff": "0,1,2,3",
         "Deprecated": "1",
         "EventCode": "0xB7, 0xBB",
-        "EventName": "OFFCORE_RESPONSE.PF_L2_RFO.L3_HIT_S.SNOOP_NONE",
+        "EventName": "OFFCORE_RESPONSE.ALL_DATA_RD.L3_HIT.NO_SNOOP_NEEDED",
         "MSRIndex": "0x1a6,0x1a7",
-        "MSRValue": "0x0080100020",
+        "MSRValue": "0x01003C0491",
         "Offcore": "1",
         "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
         "SampleAfterValue": "100003",
         "UMask": "0x1"
     },
     {
-        "BriefDescription": "This event is deprecated. Refer to new event OCR.DEMAND_CODE_RD.L3_HIT_E.SNOOP_MISS",
+        "BriefDescription": "This event is deprecated. Refer to new event OCR.ALL_DATA_RD.L3_HIT.SNOOP_HIT_WITH_FWD",
         "Counter": "0,1,2,3",
         "CounterHTOff": "0,1,2,3",
         "Deprecated": "1",
         "EventCode": "0xB7, 0xBB",
-        "EventName": "OFFCORE_RESPONSE.DEMAND_CODE_RD.L3_HIT_E.SNOOP_MISS",
+        "EventName": "OFFCORE_RESPONSE.ALL_DATA_RD.L3_HIT.SNOOP_HIT_WITH_FWD",
         "MSRIndex": "0x1a6,0x1a7",
-        "MSRValue": "0x0200080004",
+        "MSRValue": "0x08007C0491",
         "Offcore": "1",
         "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
         "SampleAfterValue": "100003",
         "UMask": "0x1"
     },
     {
-        "BriefDescription": "This event is deprecated. Refer to new event OCR.PF_L3_DATA_RD.L3_HIT.ANY_SNOOP",
+        "BriefDescription": "This event is deprecated. Refer to new event OCR.ALL_DATA_RD.L3_HIT.SNOOP_MISS",
         "Counter": "0,1,2,3",
         "CounterHTOff": "0,1,2,3",
         "Deprecated": "1",
         "EventCode": "0xB7, 0xBB",
-        "EventName": "OFFCORE_RESPONSE.PF_L3_DATA_RD.L3_HIT.ANY_SNOOP",
+        "EventName": "OFFCORE_RESPONSE.ALL_DATA_RD.L3_HIT.SNOOP_MISS",
         "MSRIndex": "0x1a6,0x1a7",
-        "MSRValue": "0x3F803C0080",
+        "MSRValue": "0x02003C0491",
         "Offcore": "1",
         "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
         "SampleAfterValue": "100003",
         "UMask": "0x1"
     },
     {
-        "BriefDescription": "This event is deprecated. Refer to new event OCR.DEMAND_DATA_RD.L3_HIT_F.ANY_SNOOP",
+        "BriefDescription": "This event is deprecated. Refer to new event OCR.ALL_DATA_RD.L3_HIT.SNOOP_NONE",
         "Counter": "0,1,2,3",
         "CounterHTOff": "0,1,2,3",
         "Deprecated": "1",
         "EventCode": "0xB7, 0xBB",
-        "EventName": "OFFCORE_RESPONSE.DEMAND_DATA_RD.L3_HIT_F.ANY_SNOOP",
+        "EventName": "OFFCORE_RESPONSE.ALL_DATA_RD.L3_HIT.SNOOP_NONE",
         "MSRIndex": "0x1a6,0x1a7",
-        "MSRValue": "0x3F80200001",
+        "MSRValue": "0x00803C0491",
         "Offcore": "1",
         "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
         "SampleAfterValue": "100003",
         "UMask": "0x1"
     },
     {
-        "BriefDescription": "This event is deprecated. Refer to new event OCR.ALL_DATA_RD.SUPPLIER_NONE.ANY_SNOOP",
+        "BriefDescription": "This event is deprecated. Refer to new event OCR.ALL_DATA_RD.L3_HIT_E.ANY_SNOOP",
         "Counter": "0,1,2,3",
         "CounterHTOff": "0,1,2,3",
         "Deprecated": "1",
         "EventCode": "0xB7, 0xBB",
-        "EventName": "OFFCORE_RESPONSE.ALL_DATA_RD.SUPPLIER_NONE.ANY_SNOOP",
+        "EventName": "OFFCORE_RESPONSE.ALL_DATA_RD.L3_HIT_E.ANY_SNOOP",
         "MSRIndex": "0x1a6,0x1a7",
-        "MSRValue": "0x3F80020491",
+        "MSRValue": "0x3F80080491",
         "Offcore": "1",
         "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
         "SampleAfterValue": "100003",
         "UMask": "0x1"
     },
     {
-        "BriefDescription": "This event is deprecated. Refer to new event OCR.DEMAND_DATA_RD.L3_HIT_S.HIT_OTHER_CORE_NO_FWD",
+        "BriefDescription": "This event is deprecated. Refer to new event OCR.ALL_DATA_RD.L3_HIT_E.HITM_OTHER_CORE",
         "Counter": "0,1,2,3",
         "CounterHTOff": "0,1,2,3",
         "Deprecated": "1",
         "EventCode": "0xB7, 0xBB",
-        "EventName": "OFFCORE_RESPONSE.DEMAND_DATA_RD.L3_HIT_S.HIT_OTHER_CORE_NO_FWD",
+        "EventName": "OFFCORE_RESPONSE.ALL_DATA_RD.L3_HIT_E.HITM_OTHER_CORE",
         "MSRIndex": "0x1a6,0x1a7",
-        "MSRValue": "0x0400100001",
+        "MSRValue": "0x1000080491",
         "Offcore": "1",
         "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
         "SampleAfterValue": "100003",
         "UMask": "0x1"
     },
     {
-        "BriefDescription": "This event is deprecated. Refer to new event OCR.DEMAND_CODE_RD.L3_HIT_F.NO_SNOOP_NEEDED",
+        "BriefDescription": "This event is deprecated. Refer to new event OCR.ALL_DATA_RD.L3_HIT_E.HIT_OTHER_CORE_FWD",
         "Counter": "0,1,2,3",
         "CounterHTOff": "0,1,2,3",
         "Deprecated": "1",
         "EventCode": "0xB7, 0xBB",
-        "EventName": "OFFCORE_RESPONSE.DEMAND_CODE_RD.L3_HIT_F.NO_SNOOP_NEEDED",
+        "EventName": "OFFCORE_RESPONSE.ALL_DATA_RD.L3_HIT_E.HIT_OTHER_CORE_FWD",
         "MSRIndex": "0x1a6,0x1a7",
-        "MSRValue": "0x0100200004",
+        "MSRValue": "0x0800080491",
         "Offcore": "1",
         "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
         "SampleAfterValue": "100003",
         "UMask": "0x1"
     },
     {
-        "BriefDescription": "This event is deprecated. Refer to new event OCR.PF_L3_RFO.L3_HIT.HIT_OTHER_CORE_NO_FWD",
+        "BriefDescription": "This event is deprecated. Refer to new event OCR.ALL_DATA_RD.L3_HIT_E.HIT_OTHER_CORE_NO_FWD",
         "Counter": "0,1,2,3",
         "CounterHTOff": "0,1,2,3",
         "Deprecated": "1",
         "EventCode": "0xB7, 0xBB",
-        "EventName": "OFFCORE_RESPONSE.PF_L3_RFO.L3_HIT.HIT_OTHER_CORE_NO_FWD",
+        "EventName": "OFFCORE_RESPONSE.ALL_DATA_RD.L3_HIT_E.HIT_OTHER_CORE_NO_FWD",
         "MSRIndex": "0x1a6,0x1a7",
-        "MSRValue": "0x04003C0100",
+        "MSRValue": "0x0400080491",
         "Offcore": "1",
         "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
         "SampleAfterValue": "100003",
         "UMask": "0x1"
     },
     {
-        "BriefDescription": "This event is deprecated. Refer to new event OCR.PF_L3_DATA_RD.L3_HIT_S.ANY_SNOOP",
+        "BriefDescription": "This event is deprecated. Refer to new event OCR.ALL_DATA_RD.L3_HIT_E.NO_SNOOP_NEEDED",
         "Counter": "0,1,2,3",
         "CounterHTOff": "0,1,2,3",
         "Deprecated": "1",
         "EventCode": "0xB7, 0xBB",
-        "EventName": "OFFCORE_RESPONSE.PF_L3_DATA_RD.L3_HIT_S.ANY_SNOOP",
+        "EventName": "OFFCORE_RESPONSE.ALL_DATA_RD.L3_HIT_E.NO_SNOOP_NEEDED",
         "MSRIndex": "0x1a6,0x1a7",
-        "MSRValue": "0x3F80100080",
+        "MSRValue": "0x0100080491",
         "Offcore": "1",
         "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
         "SampleAfterValue": "100003",
         "UMask": "0x1"
     },
     {
-        "BriefDescription": "This event is deprecated. Refer to new event OCR.PF_L2_RFO.L3_HIT_F.HITM_OTHER_CORE",
+        "BriefDescription": "This event is deprecated. Refer to new event OCR.ALL_DATA_RD.L3_HIT_E.SNOOP_MISS",
         "Counter": "0,1,2,3",
         "CounterHTOff": "0,1,2,3",
         "Deprecated": "1",
         "EventCode": "0xB7, 0xBB",
-        "EventName": "OFFCORE_RESPONSE.PF_L2_RFO.L3_HIT_F.HITM_OTHER_CORE",
+        "EventName": "OFFCORE_RESPONSE.ALL_DATA_RD.L3_HIT_E.SNOOP_MISS",
         "MSRIndex": "0x1a6,0x1a7",
-        "MSRValue": "0x1000200020",
+        "MSRValue": "0x0200080491",
         "Offcore": "1",
         "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
         "SampleAfterValue": "100003",
         "UMask": "0x1"
     },
     {
-        "BriefDescription": "This event is deprecated. Refer to new event OCR.PF_L3_RFO.L3_HIT_F.ANY_SNOOP",
+        "BriefDescription": "This event is deprecated. Refer to new event OCR.ALL_DATA_RD.L3_HIT_E.SNOOP_NONE",
         "Counter": "0,1,2,3",
         "CounterHTOff": "0,1,2,3",
         "Deprecated": "1",
         "EventCode": "0xB7, 0xBB",
-        "EventName": "OFFCORE_RESPONSE.PF_L3_RFO.L3_HIT_F.ANY_SNOOP",
+        "EventName": "OFFCORE_RESPONSE.ALL_DATA_RD.L3_HIT_E.SNOOP_NONE",
         "MSRIndex": "0x1a6,0x1a7",
-        "MSRValue": "0x3F80200100",
+        "MSRValue": "0x0080080491",
         "Offcore": "1",
         "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
         "SampleAfterValue": "100003",
         "UMask": "0x1"
     },
     {
-        "BriefDescription": "This event is deprecated. Refer to new event OCR.PF_L2_RFO.L3_HIT_M.HIT_OTHER_CORE_FWD",
+        "BriefDescription": "This event is deprecated. Refer to new event OCR.ALL_DATA_RD.L3_HIT_F.ANY_SNOOP",
         "Counter": "0,1,2,3",
         "CounterHTOff": "0,1,2,3",
         "Deprecated": "1",
         "EventCode": "0xB7, 0xBB",
-        "EventName": "OFFCORE_RESPONSE.PF_L2_RFO.L3_HIT_M.HIT_OTHER_CORE_FWD",
+        "EventName": "OFFCORE_RESPONSE.ALL_DATA_RD.L3_HIT_F.ANY_SNOOP",
         "MSRIndex": "0x1a6,0x1a7",
-        "MSRValue": "0x0800040020",
+        "MSRValue": "0x3F80200491",
         "Offcore": "1",
         "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
         "SampleAfterValue": "100003",
         "UMask": "0x1"
     },
     {
-        "BriefDescription": "This event is deprecated. Refer to new event OCR.ALL_PF_DATA_RD.SUPPLIER_NONE.ANY_SNOOP",
+        "BriefDescription": "This event is deprecated. Refer to new event OCR.ALL_DATA_RD.L3_HIT_F.HITM_OTHER_CORE",
         "Counter": "0,1,2,3",
         "CounterHTOff": "0,1,2,3",
         "Deprecated": "1",
         "EventCode": "0xB7, 0xBB",
-        "EventName": "OFFCORE_RESPONSE.ALL_PF_DATA_RD.SUPPLIER_NONE.ANY_SNOOP",
+        "EventName": "OFFCORE_RESPONSE.ALL_DATA_RD.L3_HIT_F.HITM_OTHER_CORE",
         "MSRIndex": "0x1a6,0x1a7",
-        "MSRValue": "0x3F80020490",
+        "MSRValue": "0x1000200491",
         "Offcore": "1",
         "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
         "SampleAfterValue": "100003",
         "UMask": "0x1"
     },
     {
-        "BriefDescription": "This event is deprecated. Refer to new event OCR.ALL_PF_DATA_RD.L3_HIT_F.NO_SNOOP_NEEDED",
+        "BriefDescription": "This event is deprecated. Refer to new event OCR.ALL_DATA_RD.L3_HIT_F.HIT_OTHER_CORE_FWD",
         "Counter": "0,1,2,3",
         "CounterHTOff": "0,1,2,3",
         "Deprecated": "1",
         "EventCode": "0xB7, 0xBB",
-        "EventName": "OFFCORE_RESPONSE.ALL_PF_DATA_RD.L3_HIT_F.NO_SNOOP_NEEDED",
+        "EventName": "OFFCORE_RESPONSE.ALL_DATA_RD.L3_HIT_F.HIT_OTHER_CORE_FWD",
         "MSRIndex": "0x1a6,0x1a7",
-        "MSRValue": "0x0100200490",
+        "MSRValue": "0x0800200491",
         "Offcore": "1",
         "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
         "SampleAfterValue": "100003",
         "UMask": "0x1"
     },
     {
-        "BriefDescription": "This event is deprecated. Refer to new event OCR.OTHER.SUPPLIER_NONE.ANY_SNOOP",
+        "BriefDescription": "This event is deprecated. Refer to new event OCR.ALL_DATA_RD.L3_HIT_F.HIT_OTHER_CORE_NO_FWD",
         "Counter": "0,1,2,3",
         "CounterHTOff": "0,1,2,3",
         "Deprecated": "1",
         "EventCode": "0xB7, 0xBB",
-        "EventName": "OFFCORE_RESPONSE.OTHER.SUPPLIER_NONE.ANY_SNOOP",
+        "EventName": "OFFCORE_RESPONSE.ALL_DATA_RD.L3_HIT_F.HIT_OTHER_CORE_NO_FWD",
         "MSRIndex": "0x1a6,0x1a7",
-        "MSRValue": "0x3F80028000",
+        "MSRValue": "0x0400200491",
         "Offcore": "1",
         "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
         "SampleAfterValue": "100003",
         "UMask": "0x1"
     },
     {
-        "BriefDescription": "This event is deprecated. Refer to new event OCR.ALL_READS.PMM_HIT_LOCAL_PMM.ANY_SNOOP",
+        "BriefDescription": "This event is deprecated. Refer to new event OCR.ALL_DATA_RD.L3_HIT_F.NO_SNOOP_NEEDED",
         "Counter": "0,1,2,3",
         "CounterHTOff": "0,1,2,3",
         "Deprecated": "1",
         "EventCode": "0xB7, 0xBB",
-        "EventName": "OFFCORE_RESPONSE.ALL_READS.PMM_HIT_LOCAL_PMM.ANY_SNOOP",
+        "EventName": "OFFCORE_RESPONSE.ALL_DATA_RD.L3_HIT_F.NO_SNOOP_NEEDED",
         "MSRIndex": "0x1a6,0x1a7",
-        "MSRValue": "0x3F804007F7",
+        "MSRValue": "0x0100200491",
         "Offcore": "1",
         "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
         "SampleAfterValue": "100003",
         "UMask": "0x1"
     },
     {
-        "BriefDescription": "This event is deprecated. Refer to new event OCR.PF_L1D_AND_SW.L3_HIT_S.SNOOP_MISS",
+        "BriefDescription": "This event is deprecated. Refer to new event OCR.ALL_DATA_RD.L3_HIT_F.SNOOP_MISS",
         "Counter": "0,1,2,3",
         "CounterHTOff": "0,1,2,3",
         "Deprecated": "1",
         "EventCode": "0xB7, 0xBB",
-        "EventName": "OFFCORE_RESPONSE.PF_L1D_AND_SW.L3_HIT_S.SNOOP_MISS",
+        "EventName": "OFFCORE_RESPONSE.ALL_DATA_RD.L3_HIT_F.SNOOP_MISS",
         "MSRIndex": "0x1a6,0x1a7",
-        "MSRValue": "0x0200100400",
+        "MSRValue": "0x0200200491",
         "Offcore": "1",
         "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
         "SampleAfterValue": "100003",
         "UMask": "0x1"
     },
     {
-        "BriefDescription": "This event is deprecated. Refer to new event OCR.PF_L1D_AND_SW.SUPPLIER_NONE.SNOOP_MISS",
+        "BriefDescription": "This event is deprecated. Refer to new event OCR.ALL_DATA_RD.L3_HIT_F.SNOOP_NONE",
         "Counter": "0,1,2,3",
         "CounterHTOff": "0,1,2,3",
         "Deprecated": "1",
         "EventCode": "0xB7, 0xBB",
-        "EventName": "OFFCORE_RESPONSE.PF_L1D_AND_SW.SUPPLIER_NONE.SNOOP_MISS",
+        "EventName": "OFFCORE_RESPONSE.ALL_DATA_RD.L3_HIT_F.SNOOP_NONE",
         "MSRIndex": "0x1a6,0x1a7",
-        "MSRValue": "0x0200020400",
+        "MSRValue": "0x0080200491",
         "Offcore": "1",
         "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
         "SampleAfterValue": "100003",
         "UMask": "0x1"
     },
     {
-        "BriefDescription": "This event is deprecated. Refer to new event OCR.PF_L1D_AND_SW.L3_HIT_M.HIT_OTHER_CORE_FWD",
+        "BriefDescription": "This event is deprecated. Refer to new event OCR.ALL_DATA_RD.L3_HIT_M.ANY_SNOOP",
         "Counter": "0,1,2,3",
         "CounterHTOff": "0,1,2,3",
         "Deprecated": "1",
         "EventCode": "0xB7, 0xBB",
-        "EventName": "OFFCORE_RESPONSE.PF_L1D_AND_SW.L3_HIT_M.HIT_OTHER_CORE_FWD",
+        "EventName": "OFFCORE_RESPONSE.ALL_DATA_RD.L3_HIT_M.ANY_SNOOP",
         "MSRIndex": "0x1a6,0x1a7",
-        "MSRValue": "0x0800040400",
+        "MSRValue": "0x3F80040491",
         "Offcore": "1",
         "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
         "SampleAfterValue": "100003",
         "UMask": "0x1"
     },
     {
-        "BriefDescription": "All requests that miss L2 cache",
-        "Counter": "0,1,2,3",
-        "CounterHTOff": "0,1,2,3,4,5,6,7",
-        "EventCode": "0x24",
-        "EventName": "L2_RQSTS.MISS",
-        "PublicDescription": "All requests that miss L2 cache.",
-        "SampleAfterValue": "200003",
-        "UMask": "0x3f"
-    },
-    {
-        "BriefDescription": "Retired load instructions whose data sources was forwarded from a remote cache",
-        "Counter": "0,1,2,3",
-        "CounterHTOff": "0,1,2,3",
-        "Data_LA": "1",
-        "EventCode": "0xD3",
-        "EventName": "MEM_LOAD_L3_MISS_RETIRED.REMOTE_FWD",
-        "PublicDescription": "Retired load instructions whose data sources was forwarded from a remote cache.",
-        "SampleAfterValue": "100007",
-        "UMask": "0x8"
-    },
-    {
-        "BriefDescription": "This event is deprecated. Refer to new event OCR.ALL_PF_DATA_RD.PMM_HIT_LOCAL_PMM.SNOOP_NONE",
+        "BriefDescription": "This event is deprecated. Refer to new event OCR.ALL_DATA_RD.L3_HIT_M.HITM_OTHER_CORE",
         "Counter": "0,1,2,3",
         "CounterHTOff": "0,1,2,3",
         "Deprecated": "1",
         "EventCode": "0xB7, 0xBB",
-        "EventName": "OFFCORE_RESPONSE.ALL_PF_DATA_RD.PMM_HIT_LOCAL_PMM.SNOOP_NONE",
+        "EventName": "OFFCORE_RESPONSE.ALL_DATA_RD.L3_HIT_M.HITM_OTHER_CORE",
         "MSRIndex": "0x1a6,0x1a7",
-        "MSRValue": "0x0080400490",
+        "MSRValue": "0x1000040491",
         "Offcore": "1",
         "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
         "SampleAfterValue": "100003",
         "UMask": "0x1"
     },
     {
-        "BriefDescription": "This event is deprecated. Refer to new event OCR.PF_L1D_AND_SW.L3_HIT.ANY_SNOOP",
+        "BriefDescription": "This event is deprecated. Refer to new event OCR.ALL_DATA_RD.L3_HIT_M.HIT_OTHER_CORE_FWD",
         "Counter": "0,1,2,3",
         "CounterHTOff": "0,1,2,3",
         "Deprecated": "1",
         "EventCode": "0xB7, 0xBB",
-        "EventName": "OFFCORE_RESPONSE.PF_L1D_AND_SW.L3_HIT.ANY_SNOOP",
+        "EventName": "OFFCORE_RESPONSE.ALL_DATA_RD.L3_HIT_M.HIT_OTHER_CORE_FWD",
         "MSRIndex": "0x1a6,0x1a7",
-        "MSRValue": "0x3F803C0400",
+        "MSRValue": "0x0800040491",
         "Offcore": "1",
         "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
         "SampleAfterValue": "100003",
         "UMask": "0x1"
     },
     {
-        "BriefDescription": "This event is deprecated. Refer to new event OCR.ALL_PF_DATA_RD.PMM_HIT_LOCAL_PMM.SNOOP_NOT_NEEDED",
+        "BriefDescription": "This event is deprecated. Refer to new event OCR.ALL_DATA_RD.L3_HIT_M.HIT_OTHER_CORE_NO_FWD",
         "Counter": "0,1,2,3",
         "CounterHTOff": "0,1,2,3",
         "Deprecated": "1",
         "EventCode": "0xB7, 0xBB",
-        "EventName": "OFFCORE_RESPONSE.ALL_PF_DATA_RD.PMM_HIT_LOCAL_PMM.SNOOP_NOT_NEEDED",
+        "EventName": "OFFCORE_RESPONSE.ALL_DATA_RD.L3_HIT_M.HIT_OTHER_CORE_NO_FWD",
         "MSRIndex": "0x1a6,0x1a7",
-        "MSRValue": "0x0100400490",
+        "MSRValue": "0x0400040491",
         "Offcore": "1",
         "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
         "SampleAfterValue": "100003",
         "UMask": "0x1"
     },
     {
-        "BriefDescription": "This event is deprecated. Refer to new event OCR.ALL_DATA_RD.SUPPLIER_NONE.HIT_OTHER_CORE_NO_FWD",
+        "BriefDescription": "This event is deprecated. Refer to new event OCR.ALL_DATA_RD.L3_HIT_M.NO_SNOOP_NEEDED",
         "Counter": "0,1,2,3",
         "CounterHTOff": "0,1,2,3",
         "Deprecated": "1",
         "EventCode": "0xB7, 0xBB",
-        "EventName": "OFFCORE_RESPONSE.ALL_DATA_RD.SUPPLIER_NONE.HIT_OTHER_CORE_NO_FWD",
+        "EventName": "OFFCORE_RESPONSE.ALL_DATA_RD.L3_HIT_M.NO_SNOOP_NEEDED",
         "MSRIndex": "0x1a6,0x1a7",
-        "MSRValue": "0x0400020491",
+        "MSRValue": "0x0100040491",
         "Offcore": "1",
         "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
         "SampleAfterValue": "100003",
         "UMask": "0x1"
     },
     {
-        "BriefDescription": "Retired load instructions which data sources were L3 and cross-core snoop hits in on-pkg core cache",
-        "Counter": "0,1,2,3",
-        "CounterHTOff": "0,1,2,3",
-        "Data_LA": "1",
-        "EventCode": "0xD2",
-        "EventName": "MEM_LOAD_L3_HIT_RETIRED.XSNP_HIT",
-        "PEBS": "1",
-        "PublicDescription": "Retired load instructions which data sources were L3 and cross-core snoop hits in on-pkg core cache.",
-        "SampleAfterValue": "20011",
-        "UMask": "0x2"
-    },
-    {
-        "BriefDescription": "This event is deprecated. Refer to new event OCR.DEMAND_DATA_RD.L3_HIT_M.ANY_SNOOP",
+        "BriefDescription": "This event is deprecated. Refer to new event OCR.ALL_DATA_RD.L3_HIT_M.SNOOP_MISS",
         "Counter": "0,1,2,3",
         "CounterHTOff": "0,1,2,3",
         "Deprecated": "1",
         "EventCode": "0xB7, 0xBB",
-        "EventName": "OFFCORE_RESPONSE.DEMAND_DATA_RD.L3_HIT_M.ANY_SNOOP",
+        "EventName": "OFFCORE_RESPONSE.ALL_DATA_RD.L3_HIT_M.SNOOP_MISS",
         "MSRIndex": "0x1a6,0x1a7",
-        "MSRValue": "0x3F80040001",
+        "MSRValue": "0x0200040491",
         "Offcore": "1",
         "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
         "SampleAfterValue": "100003",
         "UMask": "0x1"
     },
     {
-        "BriefDescription": "This event is deprecated. Refer to new event OCR.ALL_RFO.L3_HIT_S.HIT_OTHER_CORE_NO_FWD",
+        "BriefDescription": "This event is deprecated. Refer to new event OCR.ALL_DATA_RD.L3_HIT_M.SNOOP_NONE",
         "Counter": "0,1,2,3",
         "CounterHTOff": "0,1,2,3",
         "Deprecated": "1",
         "EventCode": "0xB7, 0xBB",
-        "EventName": "OFFCORE_RESPONSE.ALL_RFO.L3_HIT_S.HIT_OTHER_CORE_NO_FWD",
+        "EventName": "OFFCORE_RESPONSE.ALL_DATA_RD.L3_HIT_M.SNOOP_NONE",
         "MSRIndex": "0x1a6,0x1a7",
-        "MSRValue": "0x0400100122",
+        "MSRValue": "0x0080040491",
         "Offcore": "1",
         "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
         "SampleAfterValue": "100003",
         "UMask": "0x1"
     },
     {
-        "BriefDescription": "This event is deprecated. Refer to new event OCR.PF_L3_DATA_RD.L3_HIT.SNOOP_HIT_WITH_FWD",
+        "BriefDescription": "This event is deprecated. Refer to new event OCR.ALL_DATA_RD.L3_HIT_S.ANY_SNOOP",
         "Counter": "0,1,2,3",
         "CounterHTOff": "0,1,2,3",
         "Deprecated": "1",
         "EventCode": "0xB7, 0xBB",
-        "EventName": "OFFCORE_RESPONSE.PF_L3_DATA_RD.L3_HIT.SNOOP_HIT_WITH_FWD",
+        "EventName": "OFFCORE_RESPONSE.ALL_DATA_RD.L3_HIT_S.ANY_SNOOP",
         "MSRIndex": "0x1a6,0x1a7",
-        "MSRValue": "0x08007C0080",
+        "MSRValue": "0x3F80100491",
         "Offcore": "1",
         "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
         "SampleAfterValue": "100003",
         "UMask": "0x1"
     },
     {
-        "BriefDescription": "Retired load instructions missed L1 cache as data sources",
+        "BriefDescription": "This event is deprecated. Refer to new event OCR.ALL_DATA_RD.L3_HIT_S.HITM_OTHER_CORE",
         "Counter": "0,1,2,3",
         "CounterHTOff": "0,1,2,3",
-        "Data_LA": "1",
-        "EventCode": "0xD1",
-        "EventName": "MEM_LOAD_RETIRED.L1_MISS",
-        "PEBS": "1",
-        "PublicDescription": "Counts retired load instructions with at least one uop that missed in the L1 cache.",
+        "Deprecated": "1",
+        "EventCode": "0xB7, 0xBB",
+        "EventName": "OFFCORE_RESPONSE.ALL_DATA_RD.L3_HIT_S.HITM_OTHER_CORE",
+        "MSRIndex": "0x1a6,0x1a7",
+        "MSRValue": "0x1000100491",
+        "Offcore": "1",
+        "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
         "SampleAfterValue": "100003",
-        "UMask": "0x8"
+        "UMask": "0x1"
     },
     {
-        "BriefDescription": "This event is deprecated. Refer to new event OCR.OTHER.L3_HIT_S.HIT_OTHER_CORE_FWD",
+        "BriefDescription": "This event is deprecated. Refer to new event OCR.ALL_DATA_RD.L3_HIT_S.HIT_OTHER_CORE_FWD",
         "Counter": "0,1,2,3",
         "CounterHTOff": "0,1,2,3",
         "Deprecated": "1",
         "EventCode": "0xB7, 0xBB",
-        "EventName": "OFFCORE_RESPONSE.OTHER.L3_HIT_S.HIT_OTHER_CORE_FWD",
+        "EventName": "OFFCORE_RESPONSE.ALL_DATA_RD.L3_HIT_S.HIT_OTHER_CORE_FWD",
         "MSRIndex": "0x1a6,0x1a7",
-        "MSRValue": "0x0800108000",
+        "MSRValue": "0x0800100491",
         "Offcore": "1",
         "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
         "SampleAfterValue": "100003",
         "UMask": "0x1"
     },
     {
-        "BriefDescription": "This event is deprecated. Refer to new event OCR.ALL_RFO.SUPPLIER_NONE.NO_SNOOP_NEEDED",
+        "BriefDescription": "This event is deprecated. Refer to new event OCR.ALL_DATA_RD.L3_HIT_S.HIT_OTHER_CORE_NO_FWD",
         "Counter": "0,1,2,3",
         "CounterHTOff": "0,1,2,3",
         "Deprecated": "1",
         "EventCode": "0xB7, 0xBB",
-        "EventName": "OFFCORE_RESPONSE.ALL_RFO.SUPPLIER_NONE.NO_SNOOP_NEEDED",
+        "EventName": "OFFCORE_RESPONSE.ALL_DATA_RD.L3_HIT_S.HIT_OTHER_CORE_NO_FWD",
         "MSRIndex": "0x1a6,0x1a7",
-        "MSRValue": "0x0100020122",
+        "MSRValue": "0x0400100491",
         "Offcore": "1",
         "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
         "SampleAfterValue": "100003",
         "UMask": "0x1"
     },
     {
-        "BriefDescription": "This event is deprecated. Refer to new event OCR.PF_L3_DATA_RD.SUPPLIER_NONE.SNOOP_MISS",
+        "BriefDescription": "This event is deprecated. Refer to new event OCR.ALL_DATA_RD.L3_HIT_S.NO_SNOOP_NEEDED",
         "Counter": "0,1,2,3",
         "CounterHTOff": "0,1,2,3",
         "Deprecated": "1",
         "EventCode": "0xB7, 0xBB",
-        "EventName": "OFFCORE_RESPONSE.PF_L3_DATA_RD.SUPPLIER_NONE.SNOOP_MISS",
+        "EventName": "OFFCORE_RESPONSE.ALL_DATA_RD.L3_HIT_S.NO_SNOOP_NEEDED",
         "MSRIndex": "0x1a6,0x1a7",
-        "MSRValue": "0x0200020080",
+        "MSRValue": "0x0100100491",
         "Offcore": "1",
         "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
         "SampleAfterValue": "100003",
         "UMask": "0x1"
     },
     {
-        "BriefDescription": "This event is deprecated. Refer to new event OCR.PF_L2_DATA_RD.L3_HIT_F.ANY_SNOOP",
+        "BriefDescription": "This event is deprecated. Refer to new event OCR.ALL_DATA_RD.L3_HIT_S.SNOOP_MISS",
         "Counter": "0,1,2,3",
         "CounterHTOff": "0,1,2,3",
         "Deprecated": "1",
         "EventCode": "0xB7, 0xBB",
-        "EventName": "OFFCORE_RESPONSE.PF_L2_DATA_RD.L3_HIT_F.ANY_SNOOP",
+        "EventName": "OFFCORE_RESPONSE.ALL_DATA_RD.L3_HIT_S.SNOOP_MISS",
         "MSRIndex": "0x1a6,0x1a7",
-        "MSRValue": "0x3F80200010",
+        "MSRValue": "0x0200100491",
         "Offcore": "1",
         "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
         "SampleAfterValue": "100003",
         "UMask": "0x1"
     },
     {
-        "BriefDescription": "This event is deprecated. Refer to new event OCR.DEMAND_RFO.L3_HIT_S.SNOOP_NONE",
+        "BriefDescription": "This event is deprecated. Refer to new event OCR.ALL_DATA_RD.L3_HIT_S.SNOOP_NONE",
         "Counter": "0,1,2,3",
         "CounterHTOff": "0,1,2,3",
         "Deprecated": "1",
         "EventCode": "0xB7, 0xBB",
-        "EventName": "OFFCORE_RESPONSE.DEMAND_RFO.L3_HIT_S.SNOOP_NONE",
+        "EventName": "OFFCORE_RESPONSE.ALL_DATA_RD.L3_HIT_S.SNOOP_NONE",
         "MSRIndex": "0x1a6,0x1a7",
-        "MSRValue": "0x0080100002",
+        "MSRValue": "0x0080100491",
         "Offcore": "1",
         "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
         "SampleAfterValue": "100003",
         "UMask": "0x1"
     },
     {
-        "BriefDescription": "This event is deprecated. Refer to new event OCR.PF_L2_RFO.L3_HIT_S.HITM_OTHER_CORE",
+        "BriefDescription": "This event is deprecated. Refer to new event OCR.ALL_DATA_RD.PMM_HIT_LOCAL_PMM.ANY_SNOOP",
         "Counter": "0,1,2,3",
         "CounterHTOff": "0,1,2,3",
         "Deprecated": "1",
         "EventCode": "0xB7, 0xBB",
-        "EventName": "OFFCORE_RESPONSE.PF_L2_RFO.L3_HIT_S.HITM_OTHER_CORE",
+        "EventName": "OFFCORE_RESPONSE.ALL_DATA_RD.PMM_HIT_LOCAL_PMM.ANY_SNOOP",
         "MSRIndex": "0x1a6,0x1a7",
-        "MSRValue": "0x1000100020",
+        "MSRValue": "0x3F80400491",
         "Offcore": "1",
         "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
         "SampleAfterValue": "100003",
         "UMask": "0x1"
     },
     {
-        "BriefDescription": "This event is deprecated. Refer to new event OCR.DEMAND_RFO.PMM_HIT_LOCAL_PMM.SNOOP_NONE",
+        "BriefDescription": "This event is deprecated. Refer to new event OCR.ALL_DATA_RD.PMM_HIT_LOCAL_PMM.SNOOP_NONE",
         "Counter": "0,1,2,3",
         "CounterHTOff": "0,1,2,3",
         "Deprecated": "1",
         "EventCode": "0xB7, 0xBB",
-        "EventName": "OFFCORE_RESPONSE.DEMAND_RFO.PMM_HIT_LOCAL_PMM.SNOOP_NONE",
+        "EventName": "OFFCORE_RESPONSE.ALL_DATA_RD.PMM_HIT_LOCAL_PMM.SNOOP_NONE",
         "MSRIndex": "0x1a6,0x1a7",
-        "MSRValue": "0x0080400002",
+        "MSRValue": "0x0080400491",
         "Offcore": "1",
         "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
         "SampleAfterValue": "100003",
         "UMask": "0x1"
     },
     {
-        "BriefDescription": "This event is deprecated. Refer to new event OCR.OTHER.L3_HIT_M.SNOOP_MISS",
+        "BriefDescription": "This event is deprecated. Refer to new event OCR.ALL_DATA_RD.PMM_HIT_LOCAL_PMM.SNOOP_NOT_NEEDED",
         "Counter": "0,1,2,3",
         "CounterHTOff": "0,1,2,3",
         "Deprecated": "1",
         "EventCode": "0xB7, 0xBB",
-        "EventName": "OFFCORE_RESPONSE.OTHER.L3_HIT_M.SNOOP_MISS",
+        "EventName": "OFFCORE_RESPONSE.ALL_DATA_RD.PMM_HIT_LOCAL_PMM.SNOOP_NOT_NEEDED",
         "MSRIndex": "0x1a6,0x1a7",
-        "MSRValue": "0x0200048000",
+        "MSRValue": "0x0100400491",
         "Offcore": "1",
         "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
         "SampleAfterValue": "100003",
         "UMask": "0x1"
     },
     {
-        "BriefDescription": "This event is deprecated. Refer to new event OCR.PF_L1D_AND_SW.L3_HIT_E.HIT_OTHER_CORE_NO_FWD",
+        "BriefDescription": "This event is deprecated. Refer to new event OCR.ALL_DATA_RD.SUPPLIER_NONE.ANY_SNOOP",
         "Counter": "0,1,2,3",
         "CounterHTOff": "0,1,2,3",
         "Deprecated": "1",
         "EventCode": "0xB7, 0xBB",
-        "EventName": "OFFCORE_RESPONSE.PF_L1D_AND_SW.L3_HIT_E.HIT_OTHER_CORE_NO_FWD",
+        "EventName": "OFFCORE_RESPONSE.ALL_DATA_RD.SUPPLIER_NONE.ANY_SNOOP",
         "MSRIndex": "0x1a6,0x1a7",
-        "MSRValue": "0x0400080400",
+        "MSRValue": "0x3F80020491",
         "Offcore": "1",
         "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
         "SampleAfterValue": "100003",
         "UMask": "0x1"
     },
     {
-        "BriefDescription": "This event is deprecated. Refer to new event OCR.ALL_RFO.L3_HIT_F.HITM_OTHER_CORE",
+        "BriefDescription": "This event is deprecated. Refer to new event OCR.ALL_DATA_RD.SUPPLIER_NONE.HITM_OTHER_CORE",
         "Counter": "0,1,2,3",
         "CounterHTOff": "0,1,2,3",
         "Deprecated": "1",
         "EventCode": "0xB7, 0xBB",
-        "EventName": "OFFCORE_RESPONSE.ALL_RFO.L3_HIT_F.HITM_OTHER_CORE",
+        "EventName": "OFFCORE_RESPONSE.ALL_DATA_RD.SUPPLIER_NONE.HITM_OTHER_CORE",
         "MSRIndex": "0x1a6,0x1a7",
-        "MSRValue": "0x1000200122",
+        "MSRValue": "0x1000020491",
         "Offcore": "1",
         "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
         "SampleAfterValue": "100003",
         "UMask": "0x1"
     },
     {
-        "BriefDescription": "This event is deprecated. Refer to new event OCR.DEMAND_DATA_RD.L3_HIT_M.NO_SNOOP_NEEDED",
+        "BriefDescription": "This event is deprecated. Refer to new event OCR.ALL_DATA_RD.SUPPLIER_NONE.HIT_OTHER_CORE_FWD",
         "Counter": "0,1,2,3",
         "CounterHTOff": "0,1,2,3",
         "Deprecated": "1",
         "EventCode": "0xB7, 0xBB",
-        "EventName": "OFFCORE_RESPONSE.DEMAND_DATA_RD.L3_HIT_M.NO_SNOOP_NEEDED",
+        "EventName": "OFFCORE_RESPONSE.ALL_DATA_RD.SUPPLIER_NONE.HIT_OTHER_CORE_FWD",
         "MSRIndex": "0x1a6,0x1a7",
-        "MSRValue": "0x0100040001",
+        "MSRValue": "0x0800020491",
         "Offcore": "1",
         "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
         "SampleAfterValue": "100003",
         "UMask": "0x1"
     },
     {
-        "BriefDescription": "This event is deprecated. Refer to new event OCR.ALL_RFO.L3_HIT_E.SNOOP_MISS",
+        "BriefDescription": "This event is deprecated. Refer to new event OCR.ALL_DATA_RD.SUPPLIER_NONE.HIT_OTHER_CORE_NO_FWD",
         "Counter": "0,1,2,3",
         "CounterHTOff": "0,1,2,3",
         "Deprecated": "1",
         "EventCode": "0xB7, 0xBB",
-        "EventName": "OFFCORE_RESPONSE.ALL_RFO.L3_HIT_E.SNOOP_MISS",
+        "EventName": "OFFCORE_RESPONSE.ALL_DATA_RD.SUPPLIER_NONE.HIT_OTHER_CORE_NO_FWD",
         "MSRIndex": "0x1a6,0x1a7",
-        "MSRValue": "0x0200080122",
+        "MSRValue": "0x0400020491",
         "Offcore": "1",
         "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
         "SampleAfterValue": "100003",
         "UMask": "0x1"
     },
     {
-        "BriefDescription": "This event is deprecated. Refer to new event OCR.ALL_RFO.SUPPLIER_NONE.HIT_OTHER_CORE_NO_FWD",
+        "BriefDescription": "This event is deprecated. Refer to new event OCR.ALL_DATA_RD.SUPPLIER_NONE.NO_SNOOP_NEEDED",
         "Counter": "0,1,2,3",
         "CounterHTOff": "0,1,2,3",
         "Deprecated": "1",
         "EventCode": "0xB7, 0xBB",
-        "EventName": "OFFCORE_RESPONSE.ALL_RFO.SUPPLIER_NONE.HIT_OTHER_CORE_NO_FWD",
+        "EventName": "OFFCORE_RESPONSE.ALL_DATA_RD.SUPPLIER_NONE.NO_SNOOP_NEEDED",
         "MSRIndex": "0x1a6,0x1a7",
-        "MSRValue": "0x0400020122",
+        "MSRValue": "0x0100020491",
         "Offcore": "1",
         "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
         "SampleAfterValue": "100003",
         "UMask": "0x1"
     },
     {
-        "BriefDescription": "This event is deprecated. Refer to new event OCR.ALL_RFO.L3_HIT_F.HIT_OTHER_CORE_FWD",
+        "BriefDescription": "This event is deprecated. Refer to new event OCR.ALL_DATA_RD.SUPPLIER_NONE.SNOOP_MISS",
         "Counter": "0,1,2,3",
         "CounterHTOff": "0,1,2,3",
         "Deprecated": "1",
         "EventCode": "0xB7, 0xBB",
-        "EventName": "OFFCORE_RESPONSE.ALL_RFO.L3_HIT_F.HIT_OTHER_CORE_FWD",
+        "EventName": "OFFCORE_RESPONSE.ALL_DATA_RD.SUPPLIER_NONE.SNOOP_MISS",
         "MSRIndex": "0x1a6,0x1a7",
-        "MSRValue": "0x0800200122",
+        "MSRValue": "0x0200020491",
         "Offcore": "1",
         "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
         "SampleAfterValue": "100003",
         "UMask": "0x1"
     },
     {
-        "AnyThread": "1",
-        "BriefDescription": "Cycles with L1D load Misses outstanding from any thread on physical core.",
-        "Counter": "0,1,2,3",
-        "CounterHTOff": "0,1,2,3,4,5,6,7",
-        "CounterMask": "1",
-        "EventCode": "0x48",
-        "EventName": "L1D_PEND_MISS.PENDING_CYCLES_ANY",
-        "SampleAfterValue": "2000003",
-        "UMask": "0x1"
-    },
-    {
-        "BriefDescription": "This event is deprecated. Refer to new event OCR.OTHER.L3_HIT_F.HIT_OTHER_CORE_NO_FWD",
+        "BriefDescription": "This event is deprecated. Refer to new event OCR.ALL_DATA_RD.SUPPLIER_NONE.SNOOP_NONE",
         "Counter": "0,1,2,3",
         "CounterHTOff": "0,1,2,3",
         "Deprecated": "1",
         "EventCode": "0xB7, 0xBB",
-        "EventName": "OFFCORE_RESPONSE.OTHER.L3_HIT_F.HIT_OTHER_CORE_NO_FWD",
+        "EventName": "OFFCORE_RESPONSE.ALL_DATA_RD.SUPPLIER_NONE.SNOOP_NONE",
         "MSRIndex": "0x1a6,0x1a7",
-        "MSRValue": "0x0400208000",
+        "MSRValue": "0x0080020491",
         "Offcore": "1",
         "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
         "SampleAfterValue": "100003",
         "UMask": "0x1"
     },
     {
-        "BriefDescription": "This event is deprecated. Refer to new event OCR.PF_L2_DATA_RD.L3_HIT_S.HITM_OTHER_CORE",
+        "BriefDescription": "This event is deprecated. Refer to new event OCR.ALL_PF_DATA_RD.ANY_RESPONSE",
         "Counter": "0,1,2,3",
         "CounterHTOff": "0,1,2,3",
         "Deprecated": "1",
         "EventCode": "0xB7, 0xBB",
-        "EventName": "OFFCORE_RESPONSE.PF_L2_DATA_RD.L3_HIT_S.HITM_OTHER_CORE",
+        "EventName": "OFFCORE_RESPONSE.ALL_PF_DATA_RD.ANY_RESPONSE",
         "MSRIndex": "0x1a6,0x1a7",
-        "MSRValue": "0x1000100010",
+        "MSRValue": "0x0000010490",
         "Offcore": "1",
         "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
         "SampleAfterValue": "100003",
         "UMask": "0x1"
     },
     {
-        "BriefDescription": "This event is deprecated. Refer to new event OCR.PF_L3_DATA_RD.SUPPLIER_NONE.HITM_OTHER_CORE",
+        "BriefDescription": "This event is deprecated. Refer to new event OCR.ALL_PF_DATA_RD.L3_HIT.ANY_SNOOP",
         "Counter": "0,1,2,3",
         "CounterHTOff": "0,1,2,3",
         "Deprecated": "1",
         "EventCode": "0xB7, 0xBB",
-        "EventName": "OFFCORE_RESPONSE.PF_L3_DATA_RD.SUPPLIER_NONE.HITM_OTHER_CORE",
+        "EventName": "OFFCORE_RESPONSE.ALL_PF_DATA_RD.L3_HIT.ANY_SNOOP",
         "MSRIndex": "0x1a6,0x1a7",
-        "MSRValue": "0x1000020080",
+        "MSRValue": "0x3F803C0490",
         "Offcore": "1",
         "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
         "SampleAfterValue": "100003",
         "UMask": "0x1"
     },
     {
-        "BriefDescription": "This event is deprecated. Refer to new event OCR.ALL_DATA_RD.L3_HIT_S.SNOOP_NONE",
+        "BriefDescription": "This event is deprecated. Refer to new event OCR.ALL_PF_DATA_RD.L3_HIT.HITM_OTHER_CORE",
         "Counter": "0,1,2,3",
         "CounterHTOff": "0,1,2,3",
         "Deprecated": "1",
         "EventCode": "0xB7, 0xBB",
-        "EventName": "OFFCORE_RESPONSE.ALL_DATA_RD.L3_HIT_S.SNOOP_NONE",
+        "EventName": "OFFCORE_RESPONSE.ALL_PF_DATA_RD.L3_HIT.HITM_OTHER_CORE",
         "MSRIndex": "0x1a6,0x1a7",
-        "MSRValue": "0x0080100491",
+        "MSRValue": "0x10003C0490",
         "Offcore": "1",
         "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
         "SampleAfterValue": "100003",
         "UMask": "0x1"
     },
     {
-        "BriefDescription": "This event is deprecated. Refer to new event OCR.PF_L2_RFO.L3_HIT_F.ANY_SNOOP",
+        "BriefDescription": "This event is deprecated. Refer to new event OCR.ALL_PF_DATA_RD.L3_HIT.HIT_OTHER_CORE_FWD",
         "Counter": "0,1,2,3",
         "CounterHTOff": "0,1,2,3",
         "Deprecated": "1",
         "EventCode": "0xB7, 0xBB",
-        "EventName": "OFFCORE_RESPONSE.PF_L2_RFO.L3_HIT_F.ANY_SNOOP",
+        "EventName": "OFFCORE_RESPONSE.ALL_PF_DATA_RD.L3_HIT.HIT_OTHER_CORE_FWD",
         "MSRIndex": "0x1a6,0x1a7",
-        "MSRValue": "0x3F80200020",
+        "MSRValue": "0x08003C0490",
         "Offcore": "1",
         "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
         "SampleAfterValue": "100003",
         "UMask": "0x1"
     },
     {
-        "BriefDescription": "Any memory transaction that reached the SQ.",
-        "Counter": "0,1,2,3",
-        "CounterHTOff": "0,1,2,3,4,5,6,7",
-        "EventCode": "0xB0",
-        "EventName": "OFFCORE_REQUESTS.ALL_REQUESTS",
-        "PublicDescription": "Counts memory transactions reached the super queue including requests initiated by the core, all L3 prefetches, page walks, etc..",
-        "SampleAfterValue": "100003",
-        "UMask": "0x80"
-    },
-    {
-        "BriefDescription": "This event is deprecated. Refer to new event OCR.PF_L2_DATA_RD.L3_HIT_F.NO_SNOOP_NEEDED",
+        "BriefDescription": "This event is deprecated. Refer to new event OCR.ALL_PF_DATA_RD.L3_HIT.HIT_OTHER_CORE_NO_FWD",
         "Counter": "0,1,2,3",
         "CounterHTOff": "0,1,2,3",
         "Deprecated": "1",
         "EventCode": "0xB7, 0xBB",
-        "EventName": "OFFCORE_RESPONSE.PF_L2_DATA_RD.L3_HIT_F.NO_SNOOP_NEEDED",
+        "EventName": "OFFCORE_RESPONSE.ALL_PF_DATA_RD.L3_HIT.HIT_OTHER_CORE_NO_FWD",
         "MSRIndex": "0x1a6,0x1a7",
-        "MSRValue": "0x0100200010",
+        "MSRValue": "0x04003C0490",
         "Offcore": "1",
         "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
         "SampleAfterValue": "100003",
         "UMask": "0x1"
     },
     {
-        "BriefDescription": "This event is deprecated. Refer to new event OCR.ALL_PF_RFO.L3_HIT_F.NO_SNOOP_NEEDED",
+        "BriefDescription": "This event is deprecated. Refer to new event OCR.ALL_PF_DATA_RD.L3_HIT.NO_SNOOP_NEEDED",
         "Counter": "0,1,2,3",
         "CounterHTOff": "0,1,2,3",
         "Deprecated": "1",
         "EventCode": "0xB7, 0xBB",
-        "EventName": "OFFCORE_RESPONSE.ALL_PF_RFO.L3_HIT_F.NO_SNOOP_NEEDED",
+        "EventName": "OFFCORE_RESPONSE.ALL_PF_DATA_RD.L3_HIT.NO_SNOOP_NEEDED",
         "MSRIndex": "0x1a6,0x1a7",
-        "MSRValue": "0x0100200120",
+        "MSRValue": "0x01003C0490",
         "Offcore": "1",
         "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
         "SampleAfterValue": "100003",
         "UMask": "0x1"
     },
     {
-        "BriefDescription": "This event is deprecated. Refer to new event OCR.ALL_PF_RFO.L3_HIT_M.SNOOP_MISS",
+        "BriefDescription": "This event is deprecated. Refer to new event OCR.ALL_PF_DATA_RD.L3_HIT.SNOOP_HIT_WITH_FWD",
         "Counter": "0,1,2,3",
         "CounterHTOff": "0,1,2,3",
         "Deprecated": "1",
         "EventCode": "0xB7, 0xBB",
-        "EventName": "OFFCORE_RESPONSE.ALL_PF_RFO.L3_HIT_M.SNOOP_MISS",
+        "EventName": "OFFCORE_RESPONSE.ALL_PF_DATA_RD.L3_HIT.SNOOP_HIT_WITH_FWD",
         "MSRIndex": "0x1a6,0x1a7",
-        "MSRValue": "0x0200040120",
+        "MSRValue": "0x08007C0490",
         "Offcore": "1",
         "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
         "SampleAfterValue": "100003",
         "UMask": "0x1"
     },
     {
-        "BriefDescription": "This event is deprecated. Refer to new event OCR.OTHER.L3_HIT_M.NO_SNOOP_NEEDED",
+        "BriefDescription": "This event is deprecated. Refer to new event OCR.ALL_PF_DATA_RD.L3_HIT.SNOOP_MISS",
         "Counter": "0,1,2,3",
         "CounterHTOff": "0,1,2,3",
         "Deprecated": "1",
         "EventCode": "0xB7, 0xBB",
-        "EventName": "OFFCORE_RESPONSE.OTHER.L3_HIT_M.NO_SNOOP_NEEDED",
+        "EventName": "OFFCORE_RESPONSE.ALL_PF_DATA_RD.L3_HIT.SNOOP_MISS",
         "MSRIndex": "0x1a6,0x1a7",
-        "MSRValue": "0x0100048000",
+        "MSRValue": "0x02003C0490",
         "Offcore": "1",
         "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
         "SampleAfterValue": "100003",
         "UMask": "0x1"
     },
     {
-        "BriefDescription": "This event is deprecated. Refer to new event OCR.OTHER.SUPPLIER_NONE.HITM_OTHER_CORE",
+        "BriefDescription": "This event is deprecated. Refer to new event OCR.ALL_PF_DATA_RD.L3_HIT.SNOOP_NONE",
         "Counter": "0,1,2,3",
         "CounterHTOff": "0,1,2,3",
         "Deprecated": "1",
         "EventCode": "0xB7, 0xBB",
-        "EventName": "OFFCORE_RESPONSE.OTHER.SUPPLIER_NONE.HITM_OTHER_CORE",
+        "EventName": "OFFCORE_RESPONSE.ALL_PF_DATA_RD.L3_HIT.SNOOP_NONE",
         "MSRIndex": "0x1a6,0x1a7",
-        "MSRValue": "0x1000028000",
+        "MSRValue": "0x00803C0490",
         "Offcore": "1",
         "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
         "SampleAfterValue": "100003",
         "UMask": "0x1"
     },
     {
-        "BriefDescription": "This event is deprecated. Refer to new event OCR.PF_L1D_AND_SW.L3_HIT_F.HITM_OTHER_CORE",
+        "BriefDescription": "This event is deprecated. Refer to new event OCR.ALL_PF_DATA_RD.L3_HIT_E.ANY_SNOOP",
         "Counter": "0,1,2,3",
         "CounterHTOff": "0,1,2,3",
         "Deprecated": "1",
         "EventCode": "0xB7, 0xBB",
-        "EventName": "OFFCORE_RESPONSE.PF_L1D_AND_SW.L3_HIT_F.HITM_OTHER_CORE",
+        "EventName": "OFFCORE_RESPONSE.ALL_PF_DATA_RD.L3_HIT_E.ANY_SNOOP",
         "MSRIndex": "0x1a6,0x1a7",
-        "MSRValue": "0x1000200400",
+        "MSRValue": "0x3F80080490",
         "Offcore": "1",
         "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
         "SampleAfterValue": "100003",
         "UMask": "0x1"
     },
     {
-        "BriefDescription": "This event is deprecated. Refer to new event OCR.PF_L2_DATA_RD.L3_HIT.HIT_OTHER_CORE_NO_FWD",
+        "BriefDescription": "This event is deprecated. Refer to new event OCR.ALL_PF_DATA_RD.L3_HIT_E.HITM_OTHER_CORE",
         "Counter": "0,1,2,3",
         "CounterHTOff": "0,1,2,3",
         "Deprecated": "1",
         "EventCode": "0xB7, 0xBB",
-        "EventName": "OFFCORE_RESPONSE.PF_L2_DATA_RD.L3_HIT.HIT_OTHER_CORE_NO_FWD",
+        "EventName": "OFFCORE_RESPONSE.ALL_PF_DATA_RD.L3_HIT_E.HITM_OTHER_CORE",
         "MSRIndex": "0x1a6,0x1a7",
-        "MSRValue": "0x04003C0010",
+        "MSRValue": "0x1000080490",
         "Offcore": "1",
         "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
         "SampleAfterValue": "100003",
         "UMask": "0x1"
     },
     {
-        "BriefDescription": "Retired instructions with at least 1 uncacheable load or lock.",
-        "Counter": "0,1,2,3",
-        "CounterHTOff": "0,1,2,3",
-        "Data_LA": "1",
-        "EventCode": "0xD4",
-        "EventName": "MEM_LOAD_MISC_RETIRED.UC",
-        "PEBS": "1",
-        "SampleAfterValue": "100007",
-        "UMask": "0x4"
-    },
-    {
-        "BriefDescription": "Retired load instructions with remote Intel\u00ae Optane\u2122 DC persistent memory as the data source where the data request missed all caches. Precise event.",
-        "Counter": "0,1,2,3",
-        "CounterHTOff": "0,1,2,3",
-        "Data_LA": "1",
-        "ELLC": "1",
-        "EventCode": "0xD3",
-        "EventName": "MEM_LOAD_L3_MISS_RETIRED.REMOTE_PMM",
-        "PEBS": "1",
-        "PublicDescription": "Counts retired load instructions with remote Intel\u00ae Optane\u2122 DC persistent memory as the data source and the data request missed L3 (AppDirect or Memory Mode) and DRAM cache(Memory Mode). Precise event",
-        "SampleAfterValue": "100007",
-        "UMask": "0x10"
-    },
-    {
-        "BriefDescription": "This event is deprecated. Refer to new event OCR.ALL_RFO.L3_HIT_E.HIT_OTHER_CORE_NO_FWD",
+        "BriefDescription": "This event is deprecated. Refer to new event OCR.ALL_PF_DATA_RD.L3_HIT_E.HIT_OTHER_CORE_FWD",
         "Counter": "0,1,2,3",
         "CounterHTOff": "0,1,2,3",
         "Deprecated": "1",
         "EventCode": "0xB7, 0xBB",
-        "EventName": "OFFCORE_RESPONSE.ALL_RFO.L3_HIT_E.HIT_OTHER_CORE_NO_FWD",
+        "EventName": "OFFCORE_RESPONSE.ALL_PF_DATA_RD.L3_HIT_E.HIT_OTHER_CORE_FWD",
         "MSRIndex": "0x1a6,0x1a7",
-        "MSRValue": "0x0400080122",
+        "MSRValue": "0x0800080490",
         "Offcore": "1",
         "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
         "SampleAfterValue": "100003",
         "UMask": "0x1"
     },
     {
-        "BriefDescription": "This event is deprecated. Refer to new event OCR.PF_L1D_AND_SW.L3_HIT_E.ANY_SNOOP",
+        "BriefDescription": "This event is deprecated. Refer to new event OCR.ALL_PF_DATA_RD.L3_HIT_E.HIT_OTHER_CORE_NO_FWD",
         "Counter": "0,1,2,3",
         "CounterHTOff": "0,1,2,3",
         "Deprecated": "1",
         "EventCode": "0xB7, 0xBB",
-        "EventName": "OFFCORE_RESPONSE.PF_L1D_AND_SW.L3_HIT_E.ANY_SNOOP",
+        "EventName": "OFFCORE_RESPONSE.ALL_PF_DATA_RD.L3_HIT_E.HIT_OTHER_CORE_NO_FWD",
         "MSRIndex": "0x1a6,0x1a7",
-        "MSRValue": "0x3F80080400",
+        "MSRValue": "0x0400080490",
         "Offcore": "1",
         "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
         "SampleAfterValue": "100003",
         "UMask": "0x1"
     },
     {
-        "BriefDescription": "This event is deprecated. Refer to new event OCR.ALL_READS.L3_HIT_E.SNOOP_MISS",
+        "BriefDescription": "This event is deprecated. Refer to new event OCR.ALL_PF_DATA_RD.L3_HIT_E.NO_SNOOP_NEEDED",
         "Counter": "0,1,2,3",
         "CounterHTOff": "0,1,2,3",
         "Deprecated": "1",
         "EventCode": "0xB7, 0xBB",
-        "EventName": "OFFCORE_RESPONSE.ALL_READS.L3_HIT_E.SNOOP_MISS",
+        "EventName": "OFFCORE_RESPONSE.ALL_PF_DATA_RD.L3_HIT_E.NO_SNOOP_NEEDED",
         "MSRIndex": "0x1a6,0x1a7",
-        "MSRValue": "0x02000807F7",
+        "MSRValue": "0x0100080490",
         "Offcore": "1",
         "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
         "SampleAfterValue": "100003",
         "UMask": "0x1"
     },
     {
-        "BriefDescription": "This event is deprecated. Refer to new event OCR.ALL_PF_DATA_RD.L3_HIT.SNOOP_HIT_WITH_FWD",
+        "BriefDescription": "This event is deprecated. Refer to new event OCR.ALL_PF_DATA_RD.L3_HIT_E.SNOOP_MISS",
         "Counter": "0,1,2,3",
         "CounterHTOff": "0,1,2,3",
         "Deprecated": "1",
         "EventCode": "0xB7, 0xBB",
-        "EventName": "OFFCORE_RESPONSE.ALL_PF_DATA_RD.L3_HIT.SNOOP_HIT_WITH_FWD",
+        "EventName": "OFFCORE_RESPONSE.ALL_PF_DATA_RD.L3_HIT_E.SNOOP_MISS",
         "MSRIndex": "0x1a6,0x1a7",
-        "MSRValue": "0x08007C0490",
+        "MSRValue": "0x0200080490",
         "Offcore": "1",
         "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
         "SampleAfterValue": "100003",
         "UMask": "0x1"
     },
     {
-        "BriefDescription": "This event is deprecated. Refer to new event OCR.DEMAND_RFO.L3_HIT_E.NO_SNOOP_NEEDED",
+        "BriefDescription": "This event is deprecated. Refer to new event OCR.ALL_PF_DATA_RD.L3_HIT_E.SNOOP_NONE",
         "Counter": "0,1,2,3",
         "CounterHTOff": "0,1,2,3",
         "Deprecated": "1",
         "EventCode": "0xB7, 0xBB",
-        "EventName": "OFFCORE_RESPONSE.DEMAND_RFO.L3_HIT_E.NO_SNOOP_NEEDED",
+        "EventName": "OFFCORE_RESPONSE.ALL_PF_DATA_RD.L3_HIT_E.SNOOP_NONE",
         "MSRIndex": "0x1a6,0x1a7",
-        "MSRValue": "0x0100080002",
+        "MSRValue": "0x0080080490",
         "Offcore": "1",
         "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
         "SampleAfterValue": "100003",
         "UMask": "0x1"
     },
     {
-        "BriefDescription": "This event is deprecated. Refer to new event OCR.PF_L2_DATA_RD.L3_HIT_E.HIT_OTHER_CORE_FWD",
+        "BriefDescription": "This event is deprecated. Refer to new event OCR.ALL_PF_DATA_RD.L3_HIT_F.ANY_SNOOP",
         "Counter": "0,1,2,3",
         "CounterHTOff": "0,1,2,3",
         "Deprecated": "1",
         "EventCode": "0xB7, 0xBB",
-        "EventName": "OFFCORE_RESPONSE.PF_L2_DATA_RD.L3_HIT_E.HIT_OTHER_CORE_FWD",
+        "EventName": "OFFCORE_RESPONSE.ALL_PF_DATA_RD.L3_HIT_F.ANY_SNOOP",
         "MSRIndex": "0x1a6,0x1a7",
-        "MSRValue": "0x0800080010",
+        "MSRValue": "0x3F80200490",
         "Offcore": "1",
         "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
         "SampleAfterValue": "100003",
         "UMask": "0x1"
     },
     {
-        "BriefDescription": "This event is deprecated. Refer to new event OCR.DEMAND_DATA_RD.PMM_HIT_LOCAL_PMM.SNOOP_NOT_NEEDED",
+        "BriefDescription": "This event is deprecated. Refer to new event OCR.ALL_PF_DATA_RD.L3_HIT_F.HITM_OTHER_CORE",
         "Counter": "0,1,2,3",
         "CounterHTOff": "0,1,2,3",
         "Deprecated": "1",
         "EventCode": "0xB7, 0xBB",
-        "EventName": "OFFCORE_RESPONSE.DEMAND_DATA_RD.PMM_HIT_LOCAL_PMM.SNOOP_NOT_NEEDED",
+        "EventName": "OFFCORE_RESPONSE.ALL_PF_DATA_RD.L3_HIT_F.HITM_OTHER_CORE",
         "MSRIndex": "0x1a6,0x1a7",
-        "MSRValue": "0x0100400001",
+        "MSRValue": "0x1000200490",
         "Offcore": "1",
         "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
         "SampleAfterValue": "100003",
         "UMask": "0x1"
     },
     {
-        "BriefDescription": "This event is deprecated. Refer to new event OCR.ALL_PF_DATA_RD.ANY_RESPONSE",
+        "BriefDescription": "This event is deprecated. Refer to new event OCR.ALL_PF_DATA_RD.L3_HIT_F.HIT_OTHER_CORE_FWD",
         "Counter": "0,1,2,3",
         "CounterHTOff": "0,1,2,3",
         "Deprecated": "1",
         "EventCode": "0xB7, 0xBB",
-        "EventName": "OFFCORE_RESPONSE.ALL_PF_DATA_RD.ANY_RESPONSE",
+        "EventName": "OFFCORE_RESPONSE.ALL_PF_DATA_RD.L3_HIT_F.HIT_OTHER_CORE_FWD",
         "MSRIndex": "0x1a6,0x1a7",
-        "MSRValue": "0x0000010490",
+        "MSRValue": "0x0800200490",
         "Offcore": "1",
         "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
         "SampleAfterValue": "100003",
         "UMask": "0x1"
     },
     {
-        "BriefDescription": "Retired load instructions which data sources were hits in L3 without snoops required",
+        "BriefDescription": "This event is deprecated. Refer to new event OCR.ALL_PF_DATA_RD.L3_HIT_F.HIT_OTHER_CORE_NO_FWD",
         "Counter": "0,1,2,3",
         "CounterHTOff": "0,1,2,3",
-        "Data_LA": "1",
-        "EventCode": "0xD2",
-        "EventName": "MEM_LOAD_L3_HIT_RETIRED.XSNP_NONE",
-        "PEBS": "1",
-        "PublicDescription": "Retired load instructions which data sources were hits in L3 without snoops required.",
+        "Deprecated": "1",
+        "EventCode": "0xB7, 0xBB",
+        "EventName": "OFFCORE_RESPONSE.ALL_PF_DATA_RD.L3_HIT_F.HIT_OTHER_CORE_NO_FWD",
+        "MSRIndex": "0x1a6,0x1a7",
+        "MSRValue": "0x0400200490",
+        "Offcore": "1",
+        "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
         "SampleAfterValue": "100003",
-        "UMask": "0x8"
+        "UMask": "0x1"
     },
     {
-        "BriefDescription": "This event is deprecated. Refer to new event OCR.ALL_RFO.PMM_HIT_LOCAL_PMM.SNOOP_NOT_NEEDED",
+        "BriefDescription": "This event is deprecated. Refer to new event OCR.ALL_PF_DATA_RD.L3_HIT_F.NO_SNOOP_NEEDED",
         "Counter": "0,1,2,3",
         "CounterHTOff": "0,1,2,3",
         "Deprecated": "1",
         "EventCode": "0xB7, 0xBB",
-        "EventName": "OFFCORE_RESPONSE.ALL_RFO.PMM_HIT_LOCAL_PMM.SNOOP_NOT_NEEDED",
+        "EventName": "OFFCORE_RESPONSE.ALL_PF_DATA_RD.L3_HIT_F.NO_SNOOP_NEEDED",
         "MSRIndex": "0x1a6,0x1a7",
-        "MSRValue": "0x0100400122",
+        "MSRValue": "0x0100200490",
         "Offcore": "1",
         "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
         "SampleAfterValue": "100003",
         "UMask": "0x1"
     },
     {
-        "BriefDescription": "Demand Data Read requests",
-        "Counter": "0,1,2,3",
-        "CounterHTOff": "0,1,2,3,4,5,6,7",
-        "EventCode": "0x24",
-        "EventName": "L2_RQSTS.ALL_DEMAND_DATA_RD",
-        "PublicDescription": "Counts the number of demand Data Read requests (including requests from L1D hardware prefetchers). These loads may hit or miss L2 cache. Only non rejected loads are counted.",
-        "SampleAfterValue": "200003",
-        "UMask": "0xe1"
-    },
-    {
-        "BriefDescription": "This event is deprecated. Refer to new event OCR.PF_L2_RFO.L3_HIT_F.HIT_OTHER_CORE_NO_FWD",
+        "BriefDescription": "This event is deprecated. Refer to new event OCR.ALL_PF_DATA_RD.L3_HIT_F.SNOOP_MISS",
         "Counter": "0,1,2,3",
         "CounterHTOff": "0,1,2,3",
         "Deprecated": "1",
         "EventCode": "0xB7, 0xBB",
-        "EventName": "OFFCORE_RESPONSE.PF_L2_RFO.L3_HIT_F.HIT_OTHER_CORE_NO_FWD",
+        "EventName": "OFFCORE_RESPONSE.ALL_PF_DATA_RD.L3_HIT_F.SNOOP_MISS",
         "MSRIndex": "0x1a6,0x1a7",
-        "MSRValue": "0x0400200020",
+        "MSRValue": "0x0200200490",
         "Offcore": "1",
         "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
         "SampleAfterValue": "100003",
         "UMask": "0x1"
     },
     {
-        "BriefDescription": "This event is deprecated. Refer to new event OCR.ALL_DATA_RD.L3_HIT_M.SNOOP_MISS",
+        "BriefDescription": "This event is deprecated. Refer to new event OCR.ALL_PF_DATA_RD.L3_HIT_F.SNOOP_NONE",
         "Counter": "0,1,2,3",
         "CounterHTOff": "0,1,2,3",
         "Deprecated": "1",
         "EventCode": "0xB7, 0xBB",
-        "EventName": "OFFCORE_RESPONSE.ALL_DATA_RD.L3_HIT_M.SNOOP_MISS",
+        "EventName": "OFFCORE_RESPONSE.ALL_PF_DATA_RD.L3_HIT_F.SNOOP_NONE",
         "MSRIndex": "0x1a6,0x1a7",
-        "MSRValue": "0x0200040491",
+        "MSRValue": "0x0080200490",
         "Offcore": "1",
         "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
         "SampleAfterValue": "100003",
         "UMask": "0x1"
     },
     {
-        "BriefDescription": "This event is deprecated. Refer to new event OCR.PF_L2_DATA_RD.L3_HIT_M.SNOOP_NONE",
+        "BriefDescription": "This event is deprecated. Refer to new event OCR.ALL_PF_DATA_RD.L3_HIT_M.ANY_SNOOP",
         "Counter": "0,1,2,3",
         "CounterHTOff": "0,1,2,3",
         "Deprecated": "1",
         "EventCode": "0xB7, 0xBB",
-        "EventName": "OFFCORE_RESPONSE.PF_L2_DATA_RD.L3_HIT_M.SNOOP_NONE",
+        "EventName": "OFFCORE_RESPONSE.ALL_PF_DATA_RD.L3_HIT_M.ANY_SNOOP",
         "MSRIndex": "0x1a6,0x1a7",
-        "MSRValue": "0x0080040010",
+        "MSRValue": "0x3F80040490",
         "Offcore": "1",
         "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
         "SampleAfterValue": "100003",
         "UMask": "0x1"
     },
     {
-        "BriefDescription": "This event is deprecated. Refer to new event OCR.DEMAND_DATA_RD.L3_HIT.HITM_OTHER_CORE",
+        "BriefDescription": "This event is deprecated. Refer to new event OCR.ALL_PF_DATA_RD.L3_HIT_M.HITM_OTHER_CORE",
         "Counter": "0,1,2,3",
         "CounterHTOff": "0,1,2,3",
         "Deprecated": "1",
         "EventCode": "0xB7, 0xBB",
-        "EventName": "OFFCORE_RESPONSE.DEMAND_DATA_RD.L3_HIT.HITM_OTHER_CORE",
+        "EventName": "OFFCORE_RESPONSE.ALL_PF_DATA_RD.L3_HIT_M.HITM_OTHER_CORE",
         "MSRIndex": "0x1a6,0x1a7",
-        "MSRValue": "0x10003C0001",
+        "MSRValue": "0x1000040490",
         "Offcore": "1",
         "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
         "SampleAfterValue": "100003",
         "UMask": "0x1"
     },
     {
-        "BriefDescription": "This event is deprecated. Refer to new event OCR.OTHER.SUPPLIER_NONE.SNOOP_MISS",
+        "BriefDescription": "This event is deprecated. Refer to new event OCR.ALL_PF_DATA_RD.L3_HIT_M.HIT_OTHER_CORE_FWD",
         "Counter": "0,1,2,3",
         "CounterHTOff": "0,1,2,3",
         "Deprecated": "1",
         "EventCode": "0xB7, 0xBB",
-        "EventName": "OFFCORE_RESPONSE.OTHER.SUPPLIER_NONE.SNOOP_MISS",
+        "EventName": "OFFCORE_RESPONSE.ALL_PF_DATA_RD.L3_HIT_M.HIT_OTHER_CORE_FWD",
         "MSRIndex": "0x1a6,0x1a7",
-        "MSRValue": "0x0200028000",
+        "MSRValue": "0x0800040490",
         "Offcore": "1",
         "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
         "SampleAfterValue": "100003",
         "UMask": "0x1"
     },
     {
-        "BriefDescription": "Cycles with offcore outstanding demand rfo reads transactions in SuperQueue (SQ), queue to uncore.",
-        "Counter": "0,1,2,3",
-        "CounterHTOff": "0,1,2,3,4,5,6,7",
-        "CounterMask": "1",
-        "EventCode": "0x60",
-        "EventName": "OFFCORE_REQUESTS_OUTSTANDING.CYCLES_WITH_DEMAND_RFO",
-        "PublicDescription": "Counts the number of offcore outstanding demand rfo Reads transactions in the super queue every cycle. The 'Offcore outstanding' state of the transaction lasts from the L2 miss until the sending transaction completion to requestor (SQ deallocation). See the corresponding Umask under OFFCORE_REQUESTS.",
-        "SampleAfterValue": "2000003",
-        "UMask": "0x4"
-    },
-    {
-        "BriefDescription": "This event is deprecated. Refer to new event OCR.DEMAND_DATA_RD.L3_HIT_S.NO_SNOOP_NEEDED",
+        "BriefDescription": "This event is deprecated. Refer to new event OCR.ALL_PF_DATA_RD.L3_HIT_M.HIT_OTHER_CORE_NO_FWD",
         "Counter": "0,1,2,3",
         "CounterHTOff": "0,1,2,3",
         "Deprecated": "1",
         "EventCode": "0xB7, 0xBB",
-        "EventName": "OFFCORE_RESPONSE.DEMAND_DATA_RD.L3_HIT_S.NO_SNOOP_NEEDED",
+        "EventName": "OFFCORE_RESPONSE.ALL_PF_DATA_RD.L3_HIT_M.HIT_OTHER_CORE_NO_FWD",
         "MSRIndex": "0x1a6,0x1a7",
-        "MSRValue": "0x0100100001",
+        "MSRValue": "0x0400040490",
         "Offcore": "1",
         "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
         "SampleAfterValue": "100003",
         "UMask": "0x1"
     },
     {
-        "BriefDescription": "Number of cache line split locks sent to uncore.",
-        "Counter": "0,1,2,3",
-        "CounterHTOff": "0,1,2,3,4,5,6,7",
-        "EventCode": "0xF4",
-        "EventName": "SQ_MISC.SPLIT_LOCK",
-        "PublicDescription": "Counts the number of cache line split locks sent to the uncore.",
-        "SampleAfterValue": "100003",
-        "UMask": "0x10"
-    },
-    {
-        "BriefDescription": "This event is deprecated. Refer to new event OCR.PF_L3_RFO.L3_HIT_F.SNOOP_MISS",
+        "BriefDescription": "This event is deprecated. Refer to new event OCR.ALL_PF_DATA_RD.L3_HIT_M.NO_SNOOP_NEEDED",
         "Counter": "0,1,2,3",
         "CounterHTOff": "0,1,2,3",
         "Deprecated": "1",
         "EventCode": "0xB7, 0xBB",
-        "EventName": "OFFCORE_RESPONSE.PF_L3_RFO.L3_HIT_F.SNOOP_MISS",
+        "EventName": "OFFCORE_RESPONSE.ALL_PF_DATA_RD.L3_HIT_M.NO_SNOOP_NEEDED",
         "MSRIndex": "0x1a6,0x1a7",
-        "MSRValue": "0x0200200100",
+        "MSRValue": "0x0100040490",
         "Offcore": "1",
         "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
         "SampleAfterValue": "100003",
         "UMask": "0x1"
     },
     {
-        "BriefDescription": "This event is deprecated. Refer to new event OCR.DEMAND_RFO.L3_HIT_S.HIT_OTHER_CORE_FWD",
+        "BriefDescription": "This event is deprecated. Refer to new event OCR.ALL_PF_DATA_RD.L3_HIT_M.SNOOP_MISS",
         "Counter": "0,1,2,3",
         "CounterHTOff": "0,1,2,3",
         "Deprecated": "1",
         "EventCode": "0xB7, 0xBB",
-        "EventName": "OFFCORE_RESPONSE.DEMAND_RFO.L3_HIT_S.HIT_OTHER_CORE_FWD",
+        "EventName": "OFFCORE_RESPONSE.ALL_PF_DATA_RD.L3_HIT_M.SNOOP_MISS",
         "MSRIndex": "0x1a6,0x1a7",
-        "MSRValue": "0x0800100002",
+        "MSRValue": "0x0200040490",
         "Offcore": "1",
         "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
         "SampleAfterValue": "100003",
         "UMask": "0x1"
     },
     {
-        "BriefDescription": "L2 cache hits when fetching instructions, code reads.",
-        "Counter": "0,1,2,3",
-        "CounterHTOff": "0,1,2,3,4,5,6,7",
-        "EventCode": "0x24",
-        "EventName": "L2_RQSTS.CODE_RD_HIT",
-        "PublicDescription": "Counts L2 cache hits when fetching instructions, code reads.",
-        "SampleAfterValue": "200003",
-        "UMask": "0xc4"
-    },
-    {
-        "BriefDescription": "This event is deprecated. Refer to new event OCR.ALL_PF_RFO.L3_HIT_F.HITM_OTHER_CORE",
+        "BriefDescription": "This event is deprecated. Refer to new event OCR.ALL_PF_DATA_RD.L3_HIT_M.SNOOP_NONE",
         "Counter": "0,1,2,3",
         "CounterHTOff": "0,1,2,3",
         "Deprecated": "1",
         "EventCode": "0xB7, 0xBB",
-        "EventName": "OFFCORE_RESPONSE.ALL_PF_RFO.L3_HIT_F.HITM_OTHER_CORE",
+        "EventName": "OFFCORE_RESPONSE.ALL_PF_DATA_RD.L3_HIT_M.SNOOP_NONE",
         "MSRIndex": "0x1a6,0x1a7",
-        "MSRValue": "0x1000200120",
+        "MSRValue": "0x0080040490",
         "Offcore": "1",
         "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
         "SampleAfterValue": "100003",
         "UMask": "0x1"
     },
     {
-        "BriefDescription": "This event is deprecated. Refer to new event OCR.PF_L2_DATA_RD.SUPPLIER_NONE.SNOOP_NONE",
+        "BriefDescription": "This event is deprecated. Refer to new event OCR.ALL_PF_DATA_RD.L3_HIT_S.ANY_SNOOP",
         "Counter": "0,1,2,3",
         "CounterHTOff": "0,1,2,3",
         "Deprecated": "1",
         "EventCode": "0xB7, 0xBB",
-        "EventName": "OFFCORE_RESPONSE.PF_L2_DATA_RD.SUPPLIER_NONE.SNOOP_NONE",
+        "EventName": "OFFCORE_RESPONSE.ALL_PF_DATA_RD.L3_HIT_S.ANY_SNOOP",
         "MSRIndex": "0x1a6,0x1a7",
-        "MSRValue": "0x0080020010",
+        "MSRValue": "0x3F80100490",
         "Offcore": "1",
         "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
         "SampleAfterValue": "100003",
         "UMask": "0x1"
     },
     {
-        "BriefDescription": "This event is deprecated. Refer to new event OCR.ALL_READS.L3_HIT_S.HITM_OTHER_CORE",
+        "BriefDescription": "This event is deprecated. Refer to new event OCR.ALL_PF_DATA_RD.L3_HIT_S.HITM_OTHER_CORE",
         "Counter": "0,1,2,3",
         "CounterHTOff": "0,1,2,3",
         "Deprecated": "1",
         "EventCode": "0xB7, 0xBB",
-        "EventName": "OFFCORE_RESPONSE.ALL_READS.L3_HIT_S.HITM_OTHER_CORE",
+        "EventName": "OFFCORE_RESPONSE.ALL_PF_DATA_RD.L3_HIT_S.HITM_OTHER_CORE",
         "MSRIndex": "0x1a6,0x1a7",
-        "MSRValue": "0x10001007F7",
+        "MSRValue": "0x1000100490",
         "Offcore": "1",
         "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
         "SampleAfterValue": "100003",
         "UMask": "0x1"
     },
     {
-        "BriefDescription": "This event is deprecated. Refer to new event OCR.ALL_RFO.L3_HIT_M.HIT_OTHER_CORE_NO_FWD",
+        "BriefDescription": "This event is deprecated. Refer to new event OCR.ALL_PF_DATA_RD.L3_HIT_S.HIT_OTHER_CORE_FWD",
         "Counter": "0,1,2,3",
         "CounterHTOff": "0,1,2,3",
         "Deprecated": "1",
         "EventCode": "0xB7, 0xBB",
-        "EventName": "OFFCORE_RESPONSE.ALL_RFO.L3_HIT_M.HIT_OTHER_CORE_NO_FWD",
+        "EventName": "OFFCORE_RESPONSE.ALL_PF_DATA_RD.L3_HIT_S.HIT_OTHER_CORE_FWD",
         "MSRIndex": "0x1a6,0x1a7",
-        "MSRValue": "0x0400040122",
+        "MSRValue": "0x0800100490",
         "Offcore": "1",
         "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
         "SampleAfterValue": "100003",
         "UMask": "0x1"
     },
     {
-        "BriefDescription": "This event is deprecated. Refer to new event OCR.PF_L2_DATA_RD.L3_HIT_S.HIT_OTHER_CORE_NO_FWD",
+        "BriefDescription": "This event is deprecated. Refer to new event OCR.ALL_PF_DATA_RD.L3_HIT_S.HIT_OTHER_CORE_NO_FWD",
         "Counter": "0,1,2,3",
         "CounterHTOff": "0,1,2,3",
         "Deprecated": "1",
         "EventCode": "0xB7, 0xBB",
-        "EventName": "OFFCORE_RESPONSE.PF_L2_DATA_RD.L3_HIT_S.HIT_OTHER_CORE_NO_FWD",
+        "EventName": "OFFCORE_RESPONSE.ALL_PF_DATA_RD.L3_HIT_S.HIT_OTHER_CORE_NO_FWD",
         "MSRIndex": "0x1a6,0x1a7",
-        "MSRValue": "0x0400100010",
+        "MSRValue": "0x0400100490",
         "Offcore": "1",
         "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
         "SampleAfterValue": "100003",
         "UMask": "0x1"
     },
     {
-        "BriefDescription": "This event is deprecated. Refer to new event OCR.ALL_READS.L3_HIT_S.SNOOP_MISS",
+        "BriefDescription": "This event is deprecated. Refer to new event OCR.ALL_PF_DATA_RD.L3_HIT_S.NO_SNOOP_NEEDED",
         "Counter": "0,1,2,3",
         "CounterHTOff": "0,1,2,3",
         "Deprecated": "1",
         "EventCode": "0xB7, 0xBB",
-        "EventName": "OFFCORE_RESPONSE.ALL_READS.L3_HIT_S.SNOOP_MISS",
+        "EventName": "OFFCORE_RESPONSE.ALL_PF_DATA_RD.L3_HIT_S.NO_SNOOP_NEEDED",
         "MSRIndex": "0x1a6,0x1a7",
-        "MSRValue": "0x02001007F7",
+        "MSRValue": "0x0100100490",
         "Offcore": "1",
         "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
         "SampleAfterValue": "100003",
         "UMask": "0x1"
     },
     {
-        "BriefDescription": "This event is deprecated. Refer to new event OCR.ALL_PF_RFO.SUPPLIER_NONE.HIT_OTHER_CORE_FWD",
+        "BriefDescription": "This event is deprecated. Refer to new event OCR.ALL_PF_DATA_RD.L3_HIT_S.SNOOP_MISS",
         "Counter": "0,1,2,3",
         "CounterHTOff": "0,1,2,3",
         "Deprecated": "1",
         "EventCode": "0xB7, 0xBB",
-        "EventName": "OFFCORE_RESPONSE.ALL_PF_RFO.SUPPLIER_NONE.HIT_OTHER_CORE_FWD",
+        "EventName": "OFFCORE_RESPONSE.ALL_PF_DATA_RD.L3_HIT_S.SNOOP_MISS",
         "MSRIndex": "0x1a6,0x1a7",
-        "MSRValue": "0x0800020120",
+        "MSRValue": "0x0200100490",
         "Offcore": "1",
         "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
         "SampleAfterValue": "100003",
         "UMask": "0x1"
     },
     {
-        "BriefDescription": "This event is deprecated. Refer to new event OCR.ALL_PF_RFO.L3_HIT_M.HIT_OTHER_CORE_FWD",
+        "BriefDescription": "This event is deprecated. Refer to new event OCR.ALL_PF_DATA_RD.L3_HIT_S.SNOOP_NONE",
         "Counter": "0,1,2,3",
         "CounterHTOff": "0,1,2,3",
         "Deprecated": "1",
         "EventCode": "0xB7, 0xBB",
-        "EventName": "OFFCORE_RESPONSE.ALL_PF_RFO.L3_HIT_M.HIT_OTHER_CORE_FWD",
+        "EventName": "OFFCORE_RESPONSE.ALL_PF_DATA_RD.L3_HIT_S.SNOOP_NONE",
         "MSRIndex": "0x1a6,0x1a7",
-        "MSRValue": "0x0800040120",
+        "MSRValue": "0x0080100490",
         "Offcore": "1",
         "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
         "SampleAfterValue": "100003",
         "UMask": "0x1"
     },
     {
-        "BriefDescription": "This event is deprecated. Refer to new event OCR.PF_L2_RFO.L3_HIT_M.HIT_OTHER_CORE_NO_FWD",
+        "BriefDescription": "This event is deprecated. Refer to new event OCR.ALL_PF_DATA_RD.PMM_HIT_LOCAL_PMM.ANY_SNOOP",
         "Counter": "0,1,2,3",
         "CounterHTOff": "0,1,2,3",
         "Deprecated": "1",
         "EventCode": "0xB7, 0xBB",
-        "EventName": "OFFCORE_RESPONSE.PF_L2_RFO.L3_HIT_M.HIT_OTHER_CORE_NO_FWD",
+        "EventName": "OFFCORE_RESPONSE.ALL_PF_DATA_RD.PMM_HIT_LOCAL_PMM.ANY_SNOOP",
         "MSRIndex": "0x1a6,0x1a7",
-        "MSRValue": "0x0400040020",
+        "MSRValue": "0x3F80400490",
         "Offcore": "1",
         "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
         "SampleAfterValue": "100003",
         "UMask": "0x1"
     },
     {
-        "BriefDescription": "This event is deprecated. Refer to new event OCR.ALL_READS.L3_HIT_S.HIT_OTHER_CORE_FWD",
+        "BriefDescription": "This event is deprecated. Refer to new event OCR.ALL_PF_DATA_RD.PMM_HIT_LOCAL_PMM.SNOOP_NONE",
         "Counter": "0,1,2,3",
         "CounterHTOff": "0,1,2,3",
         "Deprecated": "1",
         "EventCode": "0xB7, 0xBB",
-        "EventName": "OFFCORE_RESPONSE.ALL_READS.L3_HIT_S.HIT_OTHER_CORE_FWD",
+        "EventName": "OFFCORE_RESPONSE.ALL_PF_DATA_RD.PMM_HIT_LOCAL_PMM.SNOOP_NONE",
         "MSRIndex": "0x1a6,0x1a7",
-        "MSRValue": "0x08001007F7",
+        "MSRValue": "0x0080400490",
         "Offcore": "1",
         "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
         "SampleAfterValue": "100003",
         "UMask": "0x1"
     },
     {
-        "BriefDescription": "This event is deprecated. Refer to new event OCR.ALL_READS.L3_HIT.HIT_OTHER_CORE_FWD",
+        "BriefDescription": "This event is deprecated. Refer to new event OCR.ALL_PF_DATA_RD.PMM_HIT_LOCAL_PMM.SNOOP_NOT_NEEDED",
         "Counter": "0,1,2,3",
         "CounterHTOff": "0,1,2,3",
         "Deprecated": "1",
         "EventCode": "0xB7, 0xBB",
-        "EventName": "OFFCORE_RESPONSE.ALL_READS.L3_HIT.HIT_OTHER_CORE_FWD",
+        "EventName": "OFFCORE_RESPONSE.ALL_PF_DATA_RD.PMM_HIT_LOCAL_PMM.SNOOP_NOT_NEEDED",
         "MSRIndex": "0x1a6,0x1a7",
-        "MSRValue": "0x08003C07F7",
+        "MSRValue": "0x0100400490",
         "Offcore": "1",
         "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
         "SampleAfterValue": "100003",
         "UMask": "0x1"
     },
     {
-        "BriefDescription": "This event is deprecated. Refer to new event OCR.DEMAND_CODE_RD.L3_HIT.SNOOP_HIT_WITH_FWD",
+        "BriefDescription": "This event is deprecated. Refer to new event OCR.ALL_PF_DATA_RD.SUPPLIER_NONE.ANY_SNOOP",
         "Counter": "0,1,2,3",
         "CounterHTOff": "0,1,2,3",
         "Deprecated": "1",
         "EventCode": "0xB7, 0xBB",
-        "EventName": "OFFCORE_RESPONSE.DEMAND_CODE_RD.L3_HIT.SNOOP_HIT_WITH_FWD",
+        "EventName": "OFFCORE_RESPONSE.ALL_PF_DATA_RD.SUPPLIER_NONE.ANY_SNOOP",
         "MSRIndex": "0x1a6,0x1a7",
-        "MSRValue": "0x08007C0004",
+        "MSRValue": "0x3F80020490",
         "Offcore": "1",
         "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
         "SampleAfterValue": "100003",
         "UMask": "0x1"
     },
     {
-        "BriefDescription": "Retired load instructions which data sources were HitM responses from shared L3",
-        "Counter": "0,1,2,3",
-        "CounterHTOff": "0,1,2,3",
-        "Data_LA": "1",
-        "EventCode": "0xD2",
-        "EventName": "MEM_LOAD_L3_HIT_RETIRED.XSNP_HITM",
-        "PEBS": "1",
-        "PublicDescription": "Retired load instructions which data sources were HitM responses from shared L3.",
-        "SampleAfterValue": "20011",
-        "UMask": "0x4"
-    },
-    {
-        "BriefDescription": "This event is deprecated. Refer to new event OCR.PF_L2_RFO.L3_HIT_F.SNOOP_NONE",
+        "BriefDescription": "This event is deprecated. Refer to new event OCR.ALL_PF_DATA_RD.SUPPLIER_NONE.HITM_OTHER_CORE",
         "Counter": "0,1,2,3",
         "CounterHTOff": "0,1,2,3",
         "Deprecated": "1",
         "EventCode": "0xB7, 0xBB",
-        "EventName": "OFFCORE_RESPONSE.PF_L2_RFO.L3_HIT_F.SNOOP_NONE",
+        "EventName": "OFFCORE_RESPONSE.ALL_PF_DATA_RD.SUPPLIER_NONE.HITM_OTHER_CORE",
         "MSRIndex": "0x1a6,0x1a7",
-        "MSRValue": "0x0080200020",
+        "MSRValue": "0x1000020490",
         "Offcore": "1",
         "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
         "SampleAfterValue": "100003",
         "UMask": "0x1"
     },
     {
-        "BriefDescription": "This event is deprecated. Refer to new event OCR.PF_L3_DATA_RD.L3_HIT_F.NO_SNOOP_NEEDED",
+        "BriefDescription": "This event is deprecated. Refer to new event OCR.ALL_PF_DATA_RD.SUPPLIER_NONE.HIT_OTHER_CORE_FWD",
         "Counter": "0,1,2,3",
         "CounterHTOff": "0,1,2,3",
         "Deprecated": "1",
         "EventCode": "0xB7, 0xBB",
-        "EventName": "OFFCORE_RESPONSE.PF_L3_DATA_RD.L3_HIT_F.NO_SNOOP_NEEDED",
+        "EventName": "OFFCORE_RESPONSE.ALL_PF_DATA_RD.SUPPLIER_NONE.HIT_OTHER_CORE_FWD",
         "MSRIndex": "0x1a6,0x1a7",
-        "MSRValue": "0x0100200080",
+        "MSRValue": "0x0800020490",
         "Offcore": "1",
         "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
         "SampleAfterValue": "100003",
         "UMask": "0x1"
     },
     {
-        "BriefDescription": "This event is deprecated. Refer to new event OCR.OTHER.L3_HIT_M.HIT_OTHER_CORE_NO_FWD",
+        "BriefDescription": "This event is deprecated. Refer to new event OCR.ALL_PF_DATA_RD.SUPPLIER_NONE.HIT_OTHER_CORE_NO_FWD",
         "Counter": "0,1,2,3",
         "CounterHTOff": "0,1,2,3",
         "Deprecated": "1",
         "EventCode": "0xB7, 0xBB",
-        "EventName": "OFFCORE_RESPONSE.OTHER.L3_HIT_M.HIT_OTHER_CORE_NO_FWD",
+        "EventName": "OFFCORE_RESPONSE.ALL_PF_DATA_RD.SUPPLIER_NONE.HIT_OTHER_CORE_NO_FWD",
         "MSRIndex": "0x1a6,0x1a7",
-        "MSRValue": "0x0400048000",
+        "MSRValue": "0x0400020490",
         "Offcore": "1",
         "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
         "SampleAfterValue": "100003",
         "UMask": "0x1"
     },
     {
-        "BriefDescription": "Retired store instructions that split across a cacheline boundary.",
+        "BriefDescription": "This event is deprecated. Refer to new event OCR.ALL_PF_DATA_RD.SUPPLIER_NONE.NO_SNOOP_NEEDED",
         "Counter": "0,1,2,3",
         "CounterHTOff": "0,1,2,3",
-        "Data_LA": "1",
-        "EventCode": "0xD0",
-        "EventName": "MEM_INST_RETIRED.SPLIT_STORES",
-        "L1_Hit_Indication": "1",
-        "PEBS": "1",
-        "PublicDescription": "Counts retired store instructions that split across a cacheline boundary.",
+        "Deprecated": "1",
+        "EventCode": "0xB7, 0xBB",
+        "EventName": "OFFCORE_RESPONSE.ALL_PF_DATA_RD.SUPPLIER_NONE.NO_SNOOP_NEEDED",
+        "MSRIndex": "0x1a6,0x1a7",
+        "MSRValue": "0x0100020490",
+        "Offcore": "1",
+        "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
         "SampleAfterValue": "100003",
-        "UMask": "0x42"
+        "UMask": "0x1"
     },
     {
-        "BriefDescription": "This event is deprecated. Refer to new event OCR.ALL_RFO.ANY_RESPONSE",
+        "BriefDescription": "This event is deprecated. Refer to new event OCR.ALL_PF_DATA_RD.SUPPLIER_NONE.SNOOP_MISS",
         "Counter": "0,1,2,3",
         "CounterHTOff": "0,1,2,3",
         "Deprecated": "1",
         "EventCode": "0xB7, 0xBB",
-        "EventName": "OFFCORE_RESPONSE.ALL_RFO.ANY_RESPONSE",
+        "EventName": "OFFCORE_RESPONSE.ALL_PF_DATA_RD.SUPPLIER_NONE.SNOOP_MISS",
         "MSRIndex": "0x1a6,0x1a7",
-        "MSRValue": "0x0000010122",
+        "MSRValue": "0x0200020490",
         "Offcore": "1",
         "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
         "SampleAfterValue": "100003",
         "UMask": "0x1"
     },
     {
-        "BriefDescription": "This event is deprecated. Refer to new event OCR.ALL_PF_RFO.L3_HIT_S.HITM_OTHER_CORE",
+        "BriefDescription": "This event is deprecated. Refer to new event OCR.ALL_PF_DATA_RD.SUPPLIER_NONE.SNOOP_NONE",
         "Counter": "0,1,2,3",
         "CounterHTOff": "0,1,2,3",
         "Deprecated": "1",
         "EventCode": "0xB7, 0xBB",
-        "EventName": "OFFCORE_RESPONSE.ALL_PF_RFO.L3_HIT_S.HITM_OTHER_CORE",
+        "EventName": "OFFCORE_RESPONSE.ALL_PF_DATA_RD.SUPPLIER_NONE.SNOOP_NONE",
         "MSRIndex": "0x1a6,0x1a7",
-        "MSRValue": "0x1000100120",
+        "MSRValue": "0x0080020490",
         "Offcore": "1",
         "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
         "SampleAfterValue": "100003",
         "UMask": "0x1"
     },
     {
-        "BriefDescription": "This event is deprecated. Refer to new event OCR.DEMAND_RFO.ANY_RESPONSE",
+        "BriefDescription": "This event is deprecated. Refer to new event OCR.ALL_PF_RFO.ANY_RESPONSE",
         "Counter": "0,1,2,3",
         "CounterHTOff": "0,1,2,3",
         "Deprecated": "1",
         "EventCode": "0xB7, 0xBB",
-        "EventName": "OFFCORE_RESPONSE.DEMAND_RFO.ANY_RESPONSE",
+        "EventName": "OFFCORE_RESPONSE.ALL_PF_RFO.ANY_RESPONSE",
         "MSRIndex": "0x1a6,0x1a7",
-        "MSRValue": "0x0000010002",
+        "MSRValue": "0x0000010120",
         "Offcore": "1",
         "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
         "SampleAfterValue": "100003",
         "UMask": "0x1"
     },
     {
-        "BriefDescription": "This event is deprecated. Refer to new event OCR.ALL_DATA_RD.SUPPLIER_NONE.HITM_OTHER_CORE",
+        "BriefDescription": "This event is deprecated. Refer to new event OCR.ALL_PF_RFO.L3_HIT.ANY_SNOOP",
         "Counter": "0,1,2,3",
         "CounterHTOff": "0,1,2,3",
         "Deprecated": "1",
         "EventCode": "0xB7, 0xBB",
-        "EventName": "OFFCORE_RESPONSE.ALL_DATA_RD.SUPPLIER_NONE.HITM_OTHER_CORE",
+        "EventName": "OFFCORE_RESPONSE.ALL_PF_RFO.L3_HIT.ANY_SNOOP",
         "MSRIndex": "0x1a6,0x1a7",
-        "MSRValue": "0x1000020491",
+        "MSRValue": "0x3F803C0120",
         "Offcore": "1",
         "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
         "SampleAfterValue": "100003",
         "UMask": "0x1"
     },
     {
-        "BriefDescription": "This event is deprecated. Refer to new event OCR.ALL_PF_RFO.L3_HIT_S.SNOOP_MISS",
+        "BriefDescription": "This event is deprecated. Refer to new event OCR.ALL_PF_RFO.L3_HIT.HITM_OTHER_CORE",
         "Counter": "0,1,2,3",
         "CounterHTOff": "0,1,2,3",
         "Deprecated": "1",
         "EventCode": "0xB7, 0xBB",
-        "EventName": "OFFCORE_RESPONSE.ALL_PF_RFO.L3_HIT_S.SNOOP_MISS",
+        "EventName": "OFFCORE_RESPONSE.ALL_PF_RFO.L3_HIT.HITM_OTHER_CORE",
         "MSRIndex": "0x1a6,0x1a7",
-        "MSRValue": "0x0200100120",
+        "MSRValue": "0x10003C0120",
         "Offcore": "1",
         "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
         "SampleAfterValue": "100003",
         "UMask": "0x1"
     },
     {
-        "BriefDescription": "This event is deprecated. Refer to new event OCR.DEMAND_DATA_RD.ANY_RESPONSE",
+        "BriefDescription": "This event is deprecated. Refer to new event OCR.ALL_PF_RFO.L3_HIT.HIT_OTHER_CORE_FWD",
         "Counter": "0,1,2,3",
         "CounterHTOff": "0,1,2,3",
         "Deprecated": "1",
         "EventCode": "0xB7, 0xBB",
-        "EventName": "OFFCORE_RESPONSE.DEMAND_DATA_RD.ANY_RESPONSE",
+        "EventName": "OFFCORE_RESPONSE.ALL_PF_RFO.L3_HIT.HIT_OTHER_CORE_FWD",
         "MSRIndex": "0x1a6,0x1a7",
-        "MSRValue": "0x0000010001",
+        "MSRValue": "0x08003C0120",
         "Offcore": "1",
         "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
         "SampleAfterValue": "100003",
         "UMask": "0x1"
     },
     {
-        "BriefDescription": "This event is deprecated. Refer to new event OCR.PF_L1D_AND_SW.L3_HIT_E.SNOOP_NONE",
+        "BriefDescription": "This event is deprecated. Refer to new event OCR.ALL_PF_RFO.L3_HIT.HIT_OTHER_CORE_NO_FWD",
         "Counter": "0,1,2,3",
         "CounterHTOff": "0,1,2,3",
         "Deprecated": "1",
         "EventCode": "0xB7, 0xBB",
-        "EventName": "OFFCORE_RESPONSE.PF_L1D_AND_SW.L3_HIT_E.SNOOP_NONE",
+        "EventName": "OFFCORE_RESPONSE.ALL_PF_RFO.L3_HIT.HIT_OTHER_CORE_NO_FWD",
         "MSRIndex": "0x1a6,0x1a7",
-        "MSRValue": "0x0080080400",
+        "MSRValue": "0x04003C0120",
         "Offcore": "1",
         "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
         "SampleAfterValue": "100003",
         "UMask": "0x1"
     },
     {
-        "BriefDescription": "This event is deprecated. Refer to new event OCR.PF_L3_DATA_RD.PMM_HIT_LOCAL_PMM.SNOOP_NONE",
+        "BriefDescription": "This event is deprecated. Refer to new event OCR.ALL_PF_RFO.L3_HIT.NO_SNOOP_NEEDED",
         "Counter": "0,1,2,3",
         "CounterHTOff": "0,1,2,3",
         "Deprecated": "1",
         "EventCode": "0xB7, 0xBB",
-        "EventName": "OFFCORE_RESPONSE.PF_L3_DATA_RD.PMM_HIT_LOCAL_PMM.SNOOP_NONE",
+        "EventName": "OFFCORE_RESPONSE.ALL_PF_RFO.L3_HIT.NO_SNOOP_NEEDED",
         "MSRIndex": "0x1a6,0x1a7",
-        "MSRValue": "0x0080400080",
+        "MSRValue": "0x01003C0120",
         "Offcore": "1",
         "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
         "SampleAfterValue": "100003",
         "UMask": "0x1"
     },
     {
-        "BriefDescription": "This event is deprecated. Refer to new event OCR.ALL_RFO.L3_HIT.HITM_OTHER_CORE",
+        "BriefDescription": "This event is deprecated. Refer to new event OCR.ALL_PF_RFO.L3_HIT.SNOOP_HIT_WITH_FWD",
         "Counter": "0,1,2,3",
         "CounterHTOff": "0,1,2,3",
         "Deprecated": "1",
         "EventCode": "0xB7, 0xBB",
-        "EventName": "OFFCORE_RESPONSE.ALL_RFO.L3_HIT.HITM_OTHER_CORE",
+        "EventName": "OFFCORE_RESPONSE.ALL_PF_RFO.L3_HIT.SNOOP_HIT_WITH_FWD",
         "MSRIndex": "0x1a6,0x1a7",
-        "MSRValue": "0x10003C0122",
+        "MSRValue": "0x08007C0120",
         "Offcore": "1",
         "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
         "SampleAfterValue": "100003",
         "UMask": "0x1"
     },
     {
-        "BriefDescription": "This event is deprecated. Refer to new event OCR.PF_L3_DATA_RD.PMM_HIT_LOCAL_PMM.SNOOP_NOT_NEEDED",
+        "BriefDescription": "This event is deprecated. Refer to new event OCR.ALL_PF_RFO.L3_HIT.SNOOP_MISS",
         "Counter": "0,1,2,3",
         "CounterHTOff": "0,1,2,3",
         "Deprecated": "1",
         "EventCode": "0xB7, 0xBB",
-        "EventName": "OFFCORE_RESPONSE.PF_L3_DATA_RD.PMM_HIT_LOCAL_PMM.SNOOP_NOT_NEEDED",
+        "EventName": "OFFCORE_RESPONSE.ALL_PF_RFO.L3_HIT.SNOOP_MISS",
         "MSRIndex": "0x1a6,0x1a7",
-        "MSRValue": "0x0100400080",
+        "MSRValue": "0x02003C0120",
         "Offcore": "1",
         "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
         "SampleAfterValue": "100003",
         "UMask": "0x1"
     },
     {
-        "BriefDescription": "This event is deprecated. Refer to new event OCR.ALL_PF_RFO.L3_HIT_E.HIT_OTHER_CORE_FWD",
+        "BriefDescription": "This event is deprecated. Refer to new event OCR.ALL_PF_RFO.L3_HIT.SNOOP_NONE",
         "Counter": "0,1,2,3",
         "CounterHTOff": "0,1,2,3",
         "Deprecated": "1",
         "EventCode": "0xB7, 0xBB",
-        "EventName": "OFFCORE_RESPONSE.ALL_PF_RFO.L3_HIT_E.HIT_OTHER_CORE_FWD",
+        "EventName": "OFFCORE_RESPONSE.ALL_PF_RFO.L3_HIT.SNOOP_NONE",
         "MSRIndex": "0x1a6,0x1a7",
-        "MSRValue": "0x0800080120",
+        "MSRValue": "0x00803C0120",
         "Offcore": "1",
         "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
         "SampleAfterValue": "100003",
         "UMask": "0x1"
     },
     {
-        "BriefDescription": "This event is deprecated. Refer to new event OCR.DEMAND_DATA_RD.L3_HIT_E.HIT_OTHER_CORE_FWD",
+        "BriefDescription": "This event is deprecated. Refer to new event OCR.ALL_PF_RFO.L3_HIT_E.ANY_SNOOP",
         "Counter": "0,1,2,3",
         "CounterHTOff": "0,1,2,3",
         "Deprecated": "1",
         "EventCode": "0xB7, 0xBB",
-        "EventName": "OFFCORE_RESPONSE.DEMAND_DATA_RD.L3_HIT_E.HIT_OTHER_CORE_FWD",
+        "EventName": "OFFCORE_RESPONSE.ALL_PF_RFO.L3_HIT_E.ANY_SNOOP",
         "MSRIndex": "0x1a6,0x1a7",
-        "MSRValue": "0x0800080001",
+        "MSRValue": "0x3F80080120",
         "Offcore": "1",
         "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
         "SampleAfterValue": "100003",
         "UMask": "0x1"
     },
     {
-        "BriefDescription": "This event is deprecated. Refer to new event OCR.ALL_DATA_RD.L3_HIT_S.HIT_OTHER_CORE_FWD",
+        "BriefDescription": "This event is deprecated. Refer to new event OCR.ALL_PF_RFO.L3_HIT_E.HITM_OTHER_CORE",
         "Counter": "0,1,2,3",
         "CounterHTOff": "0,1,2,3",
         "Deprecated": "1",
         "EventCode": "0xB7, 0xBB",
-        "EventName": "OFFCORE_RESPONSE.ALL_DATA_RD.L3_HIT_S.HIT_OTHER_CORE_FWD",
+        "EventName": "OFFCORE_RESPONSE.ALL_PF_RFO.L3_HIT_E.HITM_OTHER_CORE",
         "MSRIndex": "0x1a6,0x1a7",
-        "MSRValue": "0x0800100491",
+        "MSRValue": "0x1000080120",
         "Offcore": "1",
         "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
         "SampleAfterValue": "100003",
         "UMask": "0x1"
     },
     {
-        "BriefDescription": "This event is deprecated. Refer to new event OCR.DEMAND_RFO.L3_HIT_S.HIT_OTHER_CORE_NO_FWD",
+        "BriefDescription": "This event is deprecated. Refer to new event OCR.ALL_PF_RFO.L3_HIT_E.HIT_OTHER_CORE_FWD",
         "Counter": "0,1,2,3",
         "CounterHTOff": "0,1,2,3",
         "Deprecated": "1",
         "EventCode": "0xB7, 0xBB",
-        "EventName": "OFFCORE_RESPONSE.DEMAND_RFO.L3_HIT_S.HIT_OTHER_CORE_NO_FWD",
+        "EventName": "OFFCORE_RESPONSE.ALL_PF_RFO.L3_HIT_E.HIT_OTHER_CORE_FWD",
         "MSRIndex": "0x1a6,0x1a7",
-        "MSRValue": "0x0400100002",
+        "MSRValue": "0x0800080120",
         "Offcore": "1",
         "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
         "SampleAfterValue": "100003",
         "UMask": "0x1"
     },
     {
-        "BriefDescription": "This event is deprecated. Refer to new event OCR.PF_L3_RFO.PMM_HIT_LOCAL_PMM.ANY_SNOOP",
+        "BriefDescription": "This event is deprecated. Refer to new event OCR.ALL_PF_RFO.L3_HIT_E.HIT_OTHER_CORE_NO_FWD",
         "Counter": "0,1,2,3",
         "CounterHTOff": "0,1,2,3",
         "Deprecated": "1",
         "EventCode": "0xB7, 0xBB",
-        "EventName": "OFFCORE_RESPONSE.PF_L3_RFO.PMM_HIT_LOCAL_PMM.ANY_SNOOP",
+        "EventName": "OFFCORE_RESPONSE.ALL_PF_RFO.L3_HIT_E.HIT_OTHER_CORE_NO_FWD",
         "MSRIndex": "0x1a6,0x1a7",
-        "MSRValue": "0x3F80400100",
+        "MSRValue": "0x0400080120",
         "Offcore": "1",
         "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
         "SampleAfterValue": "100003",
         "UMask": "0x1"
     },
     {
-        "BriefDescription": "This event is deprecated. Refer to new event OCR.DEMAND_DATA_RD.L3_HIT_E.SNOOP_NONE",
+        "BriefDescription": "This event is deprecated. Refer to new event OCR.ALL_PF_RFO.L3_HIT_E.NO_SNOOP_NEEDED",
         "Counter": "0,1,2,3",
         "CounterHTOff": "0,1,2,3",
         "Deprecated": "1",
         "EventCode": "0xB7, 0xBB",
-        "EventName": "OFFCORE_RESPONSE.DEMAND_DATA_RD.L3_HIT_E.SNOOP_NONE",
+        "EventName": "OFFCORE_RESPONSE.ALL_PF_RFO.L3_HIT_E.NO_SNOOP_NEEDED",
         "MSRIndex": "0x1a6,0x1a7",
-        "MSRValue": "0x0080080001",
+        "MSRValue": "0x0100080120",
         "Offcore": "1",
         "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
         "SampleAfterValue": "100003",
         "UMask": "0x1"
     },
     {
-        "BriefDescription": "This event is deprecated. Refer to new event OCR.DEMAND_CODE_RD.ANY_RESPONSE",
+        "BriefDescription": "This event is deprecated. Refer to new event OCR.ALL_PF_RFO.L3_HIT_E.SNOOP_MISS",
         "Counter": "0,1,2,3",
         "CounterHTOff": "0,1,2,3",
         "Deprecated": "1",
         "EventCode": "0xB7, 0xBB",
-        "EventName": "OFFCORE_RESPONSE.DEMAND_CODE_RD.ANY_RESPONSE",
+        "EventName": "OFFCORE_RESPONSE.ALL_PF_RFO.L3_HIT_E.SNOOP_MISS",
         "MSRIndex": "0x1a6,0x1a7",
-        "MSRValue": "0x0000010004",
+        "MSRValue": "0x0200080120",
         "Offcore": "1",
         "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
         "SampleAfterValue": "100003",
         "UMask": "0x1"
     },
     {
-        "BriefDescription": "This event is deprecated. Refer to new event OCR.ALL_PF_DATA_RD.L3_HIT_S.SNOOP_MISS",
+        "BriefDescription": "This event is deprecated. Refer to new event OCR.ALL_PF_RFO.L3_HIT_E.SNOOP_NONE",
         "Counter": "0,1,2,3",
         "CounterHTOff": "0,1,2,3",
         "Deprecated": "1",
         "EventCode": "0xB7, 0xBB",
-        "EventName": "OFFCORE_RESPONSE.ALL_PF_DATA_RD.L3_HIT_S.SNOOP_MISS",
+        "EventName": "OFFCORE_RESPONSE.ALL_PF_RFO.L3_HIT_E.SNOOP_NONE",
         "MSRIndex": "0x1a6,0x1a7",
-        "MSRValue": "0x0200100490",
+        "MSRValue": "0x0080080120",
         "Offcore": "1",
         "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
         "SampleAfterValue": "100003",
         "UMask": "0x1"
     },
     {
-        "BriefDescription": "This event is deprecated. Refer to new event OCR.DEMAND_CODE_RD.L3_HIT_M.SNOOP_NONE",
+        "BriefDescription": "This event is deprecated. Refer to new event OCR.ALL_PF_RFO.L3_HIT_F.ANY_SNOOP",
         "Counter": "0,1,2,3",
         "CounterHTOff": "0,1,2,3",
         "Deprecated": "1",
         "EventCode": "0xB7, 0xBB",
-        "EventName": "OFFCORE_RESPONSE.DEMAND_CODE_RD.L3_HIT_M.SNOOP_NONE",
+        "EventName": "OFFCORE_RESPONSE.ALL_PF_RFO.L3_HIT_F.ANY_SNOOP",
         "MSRIndex": "0x1a6,0x1a7",
-        "MSRValue": "0x0080040004",
+        "MSRValue": "0x3F80200120",
         "Offcore": "1",
         "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
         "SampleAfterValue": "100003",
         "UMask": "0x1"
     },
     {
-        "BriefDescription": "This event is deprecated. Refer to new event OCR.PF_L2_RFO.PMM_HIT_LOCAL_PMM.SNOOP_NONE",
+        "BriefDescription": "This event is deprecated. Refer to new event OCR.ALL_PF_RFO.L3_HIT_F.HITM_OTHER_CORE",
         "Counter": "0,1,2,3",
         "CounterHTOff": "0,1,2,3",
         "Deprecated": "1",
         "EventCode": "0xB7, 0xBB",
-        "EventName": "OFFCORE_RESPONSE.PF_L2_RFO.PMM_HIT_LOCAL_PMM.SNOOP_NONE",
+        "EventName": "OFFCORE_RESPONSE.ALL_PF_RFO.L3_HIT_F.HITM_OTHER_CORE",
         "MSRIndex": "0x1a6,0x1a7",
-        "MSRValue": "0x0080400020",
+        "MSRValue": "0x1000200120",
         "Offcore": "1",
         "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
         "SampleAfterValue": "100003",
         "UMask": "0x1"
     },
     {
-        "BriefDescription": "This event is deprecated. Refer to new event OCR.OTHER.L3_HIT.SNOOP_MISS",
+        "BriefDescription": "This event is deprecated. Refer to new event OCR.ALL_PF_RFO.L3_HIT_F.HIT_OTHER_CORE_FWD",
         "Counter": "0,1,2,3",
         "CounterHTOff": "0,1,2,3",
         "Deprecated": "1",
         "EventCode": "0xB7, 0xBB",
-        "EventName": "OFFCORE_RESPONSE.OTHER.L3_HIT.SNOOP_MISS",
+        "EventName": "OFFCORE_RESPONSE.ALL_PF_RFO.L3_HIT_F.HIT_OTHER_CORE_FWD",
         "MSRIndex": "0x1a6,0x1a7",
-        "MSRValue": "0x02003C8000",
+        "MSRValue": "0x0800200120",
         "Offcore": "1",
         "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
         "SampleAfterValue": "100003",
         "UMask": "0x1"
     },
     {
-        "BriefDescription": "This event is deprecated. Refer to new event OCR.PF_L3_RFO.SUPPLIER_NONE.ANY_SNOOP",
+        "BriefDescription": "This event is deprecated. Refer to new event OCR.ALL_PF_RFO.L3_HIT_F.HIT_OTHER_CORE_NO_FWD",
         "Counter": "0,1,2,3",
         "CounterHTOff": "0,1,2,3",
         "Deprecated": "1",
         "EventCode": "0xB7, 0xBB",
-        "EventName": "OFFCORE_RESPONSE.PF_L3_RFO.SUPPLIER_NONE.ANY_SNOOP",
+        "EventName": "OFFCORE_RESPONSE.ALL_PF_RFO.L3_HIT_F.HIT_OTHER_CORE_NO_FWD",
         "MSRIndex": "0x1a6,0x1a7",
-        "MSRValue": "0x3F80020100",
+        "MSRValue": "0x0400200120",
         "Offcore": "1",
         "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
         "SampleAfterValue": "100003",
         "UMask": "0x1"
     },
     {
-        "BriefDescription": "This event is deprecated. Refer to new event OCR.PF_L2_RFO.L3_HIT_F.NO_SNOOP_NEEDED",
+        "BriefDescription": "This event is deprecated. Refer to new event OCR.ALL_PF_RFO.L3_HIT_F.NO_SNOOP_NEEDED",
         "Counter": "0,1,2,3",
         "CounterHTOff": "0,1,2,3",
         "Deprecated": "1",
         "EventCode": "0xB7, 0xBB",
-        "EventName": "OFFCORE_RESPONSE.PF_L2_RFO.L3_HIT_F.NO_SNOOP_NEEDED",
+        "EventName": "OFFCORE_RESPONSE.ALL_PF_RFO.L3_HIT_F.NO_SNOOP_NEEDED",
         "MSRIndex": "0x1a6,0x1a7",
-        "MSRValue": "0x0100200020",
+        "MSRValue": "0x0100200120",
         "Offcore": "1",
         "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
         "SampleAfterValue": "100003",
         "UMask": "0x1"
     },
     {
-        "BriefDescription": "This event is deprecated. Refer to new event OCR.PF_L1D_AND_SW.L3_HIT_F.ANY_SNOOP",
+        "BriefDescription": "This event is deprecated. Refer to new event OCR.ALL_PF_RFO.L3_HIT_F.SNOOP_MISS",
         "Counter": "0,1,2,3",
         "CounterHTOff": "0,1,2,3",
         "Deprecated": "1",
         "EventCode": "0xB7, 0xBB",
-        "EventName": "OFFCORE_RESPONSE.PF_L1D_AND_SW.L3_HIT_F.ANY_SNOOP",
+        "EventName": "OFFCORE_RESPONSE.ALL_PF_RFO.L3_HIT_F.SNOOP_MISS",
         "MSRIndex": "0x1a6,0x1a7",
-        "MSRValue": "0x3F80200400",
+        "MSRValue": "0x0200200120",
         "Offcore": "1",
         "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
         "SampleAfterValue": "100003",
         "UMask": "0x1"
     },
     {
-        "BriefDescription": "This event is deprecated. Refer to new event OCR.ALL_PF_RFO.L3_HIT_M.SNOOP_NONE",
+        "BriefDescription": "This event is deprecated. Refer to new event OCR.ALL_PF_RFO.L3_HIT_F.SNOOP_NONE",
         "Counter": "0,1,2,3",
         "CounterHTOff": "0,1,2,3",
         "Deprecated": "1",
         "EventCode": "0xB7, 0xBB",
-        "EventName": "OFFCORE_RESPONSE.ALL_PF_RFO.L3_HIT_M.SNOOP_NONE",
+        "EventName": "OFFCORE_RESPONSE.ALL_PF_RFO.L3_HIT_F.SNOOP_NONE",
         "MSRIndex": "0x1a6,0x1a7",
-        "MSRValue": "0x0080040120",
+        "MSRValue": "0x0080200120",
         "Offcore": "1",
         "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
         "SampleAfterValue": "100003",
         "UMask": "0x1"
     },
     {
-        "BriefDescription": "This event is deprecated. Refer to new event OCR.PF_L1D_AND_SW.L3_HIT.HIT_OTHER_CORE_NO_FWD",
+        "BriefDescription": "This event is deprecated. Refer to new event OCR.ALL_PF_RFO.L3_HIT_M.ANY_SNOOP",
         "Counter": "0,1,2,3",
         "CounterHTOff": "0,1,2,3",
         "Deprecated": "1",
         "EventCode": "0xB7, 0xBB",
-        "EventName": "OFFCORE_RESPONSE.PF_L1D_AND_SW.L3_HIT.HIT_OTHER_CORE_NO_FWD",
+        "EventName": "OFFCORE_RESPONSE.ALL_PF_RFO.L3_HIT_M.ANY_SNOOP",
         "MSRIndex": "0x1a6,0x1a7",
-        "MSRValue": "0x04003C0400",
+        "MSRValue": "0x3F80040120",
         "Offcore": "1",
         "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
         "SampleAfterValue": "100003",
         "UMask": "0x1"
     },
     {
-        "BriefDescription": "This event is deprecated. Refer to new event OCR.PF_L2_RFO.L3_HIT_M.ANY_SNOOP",
+        "BriefDescription": "This event is deprecated. Refer to new event OCR.ALL_PF_RFO.L3_HIT_M.HITM_OTHER_CORE",
         "Counter": "0,1,2,3",
         "CounterHTOff": "0,1,2,3",
         "Deprecated": "1",
         "EventCode": "0xB7, 0xBB",
-        "EventName": "OFFCORE_RESPONSE.PF_L2_RFO.L3_HIT_M.ANY_SNOOP",
+        "EventName": "OFFCORE_RESPONSE.ALL_PF_RFO.L3_HIT_M.HITM_OTHER_CORE",
         "MSRIndex": "0x1a6,0x1a7",
-        "MSRValue": "0x3F80040020",
+        "MSRValue": "0x1000040120",
         "Offcore": "1",
         "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
         "SampleAfterValue": "100003",
         "UMask": "0x1"
     },
     {
-        "BriefDescription": "This event is deprecated. Refer to new event OCR.ALL_PF_RFO.L3_HIT_S.ANY_SNOOP",
+        "BriefDescription": "This event is deprecated. Refer to new event OCR.ALL_PF_RFO.L3_HIT_M.HIT_OTHER_CORE_FWD",
         "Counter": "0,1,2,3",
         "CounterHTOff": "0,1,2,3",
         "Deprecated": "1",
         "EventCode": "0xB7, 0xBB",
-        "EventName": "OFFCORE_RESPONSE.ALL_PF_RFO.L3_HIT_S.ANY_SNOOP",
+        "EventName": "OFFCORE_RESPONSE.ALL_PF_RFO.L3_HIT_M.HIT_OTHER_CORE_FWD",
         "MSRIndex": "0x1a6,0x1a7",
-        "MSRValue": "0x3F80100120",
+        "MSRValue": "0x0800040120",
         "Offcore": "1",
         "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
         "SampleAfterValue": "100003",
         "UMask": "0x1"
     },
     {
-        "BriefDescription": "This event is deprecated. Refer to new event OCR.DEMAND_RFO.PMM_HIT_LOCAL_PMM.ANY_SNOOP",
+        "BriefDescription": "This event is deprecated. Refer to new event OCR.ALL_PF_RFO.L3_HIT_M.HIT_OTHER_CORE_NO_FWD",
         "Counter": "0,1,2,3",
         "CounterHTOff": "0,1,2,3",
         "Deprecated": "1",
         "EventCode": "0xB7, 0xBB",
-        "EventName": "OFFCORE_RESPONSE.DEMAND_RFO.PMM_HIT_LOCAL_PMM.ANY_SNOOP",
+        "EventName": "OFFCORE_RESPONSE.ALL_PF_RFO.L3_HIT_M.HIT_OTHER_CORE_NO_FWD",
         "MSRIndex": "0x1a6,0x1a7",
-        "MSRValue": "0x3F80400002",
+        "MSRValue": "0x0400040120",
         "Offcore": "1",
         "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
         "SampleAfterValue": "100003",
         "UMask": "0x1"
     },
     {
-        "BriefDescription": "This event is deprecated. Refer to new event OCR.ALL_DATA_RD.L3_HIT_M.HIT_OTHER_CORE_NO_FWD",
+        "BriefDescription": "This event is deprecated. Refer to new event OCR.ALL_PF_RFO.L3_HIT_M.NO_SNOOP_NEEDED",
         "Counter": "0,1,2,3",
         "CounterHTOff": "0,1,2,3",
         "Deprecated": "1",
         "EventCode": "0xB7, 0xBB",
-        "EventName": "OFFCORE_RESPONSE.ALL_DATA_RD.L3_HIT_M.HIT_OTHER_CORE_NO_FWD",
+        "EventName": "OFFCORE_RESPONSE.ALL_PF_RFO.L3_HIT_M.NO_SNOOP_NEEDED",
         "MSRIndex": "0x1a6,0x1a7",
-        "MSRValue": "0x0400040491",
+        "MSRValue": "0x0100040120",
         "Offcore": "1",
         "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
         "SampleAfterValue": "100003",
         "UMask": "0x1"
     },
     {
-        "BriefDescription": "Number of times a request needed a FB entry but there was no entry available for it. That is the FB unavailability was dominant reason for blocking the request. A request includes cacheable/uncacheable demands that is load, store or SW prefetch.",
-        "Counter": "0,1,2,3",
-        "CounterHTOff": "0,1,2,3,4,5,6,7",
-        "EventCode": "0x48",
-        "EventName": "L1D_PEND_MISS.FB_FULL",
-        "PublicDescription": "Number of times a request needed a FB (Fill Buffer) entry but there was no entry available for it. A request includes cacheable/uncacheable demands that are load, store or SW prefetch instructions.",
-        "SampleAfterValue": "2000003",
-        "UMask": "0x2"
-    },
-    {
-        "BriefDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction",
+        "BriefDescription": "This event is deprecated. Refer to new event OCR.ALL_PF_RFO.L3_HIT_M.SNOOP_MISS",
         "Counter": "0,1,2,3",
         "CounterHTOff": "0,1,2,3",
+        "Deprecated": "1",
         "EventCode": "0xB7, 0xBB",
-        "EventName": "OFFCORE_RESPONSE",
+        "EventName": "OFFCORE_RESPONSE.ALL_PF_RFO.L3_HIT_M.SNOOP_MISS",
+        "MSRIndex": "0x1a6,0x1a7",
+        "MSRValue": "0x0200040120",
+        "Offcore": "1",
         "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
         "SampleAfterValue": "100003",
         "UMask": "0x1"
     },
     {
-        "BriefDescription": "This event is deprecated. Refer to new event OCR.PF_L2_RFO.SUPPLIER_NONE.SNOOP_NONE",
+        "BriefDescription": "This event is deprecated. Refer to new event OCR.ALL_PF_RFO.L3_HIT_M.SNOOP_NONE",
         "Counter": "0,1,2,3",
         "CounterHTOff": "0,1,2,3",
         "Deprecated": "1",
         "EventCode": "0xB7, 0xBB",
-        "EventName": "OFFCORE_RESPONSE.PF_L2_RFO.SUPPLIER_NONE.SNOOP_NONE",
+        "EventName": "OFFCORE_RESPONSE.ALL_PF_RFO.L3_HIT_M.SNOOP_NONE",
         "MSRIndex": "0x1a6,0x1a7",
-        "MSRValue": "0x0080020020",
+        "MSRValue": "0x0080040120",
         "Offcore": "1",
         "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
         "SampleAfterValue": "100003",
         "UMask": "0x1"
     },
     {
-        "BriefDescription": "This event is deprecated. Refer to new event OCR.ALL_PF_RFO.L3_HIT.SNOOP_NONE",
+        "BriefDescription": "This event is deprecated. Refer to new event OCR.ALL_PF_RFO.L3_HIT_S.ANY_SNOOP",
         "Counter": "0,1,2,3",
         "CounterHTOff": "0,1,2,3",
         "Deprecated": "1",
         "EventCode": "0xB7, 0xBB",
-        "EventName": "OFFCORE_RESPONSE.ALL_PF_RFO.L3_HIT.SNOOP_NONE",
+        "EventName": "OFFCORE_RESPONSE.ALL_PF_RFO.L3_HIT_S.ANY_SNOOP",
         "MSRIndex": "0x1a6,0x1a7",
-        "MSRValue": "0x00803C0120",
+        "MSRValue": "0x3F80100120",
         "Offcore": "1",
         "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
         "SampleAfterValue": "100003",
         "UMask": "0x1"
     },
     {
-        "BriefDescription": "This event is deprecated. Refer to new event OCR.PF_L2_DATA_RD.SUPPLIER_NONE.HIT_OTHER_CORE_NO_FWD",
+        "BriefDescription": "This event is deprecated. Refer to new event OCR.ALL_PF_RFO.L3_HIT_S.HITM_OTHER_CORE",
         "Counter": "0,1,2,3",
         "CounterHTOff": "0,1,2,3",
         "Deprecated": "1",
         "EventCode": "0xB7, 0xBB",
-        "EventName": "OFFCORE_RESPONSE.PF_L2_DATA_RD.SUPPLIER_NONE.HIT_OTHER_CORE_NO_FWD",
+        "EventName": "OFFCORE_RESPONSE.ALL_PF_RFO.L3_HIT_S.HITM_OTHER_CORE",
         "MSRIndex": "0x1a6,0x1a7",
-        "MSRValue": "0x0400020010",
+        "MSRValue": "0x1000100120",
         "Offcore": "1",
         "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
         "SampleAfterValue": "100003",
         "UMask": "0x1"
     },
     {
-        "BriefDescription": "This event is deprecated. Refer to new event OCR.ALL_PF_DATA_RD.L3_HIT_F.ANY_SNOOP",
+        "BriefDescription": "This event is deprecated. Refer to new event OCR.ALL_PF_RFO.L3_HIT_S.HIT_OTHER_CORE_FWD",
         "Counter": "0,1,2,3",
         "CounterHTOff": "0,1,2,3",
         "Deprecated": "1",
         "EventCode": "0xB7, 0xBB",
-        "EventName": "OFFCORE_RESPONSE.ALL_PF_DATA_RD.L3_HIT_F.ANY_SNOOP",
+        "EventName": "OFFCORE_RESPONSE.ALL_PF_RFO.L3_HIT_S.HIT_OTHER_CORE_FWD",
         "MSRIndex": "0x1a6,0x1a7",
-        "MSRValue": "0x3F80200490",
+        "MSRValue": "0x0800100120",
         "Offcore": "1",
         "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
         "SampleAfterValue": "100003",
         "UMask": "0x1"
     },
     {
-        "BriefDescription": "This event is deprecated. Refer to new event OCR.PF_L3_RFO.L3_HIT.HIT_OTHER_CORE_FWD",
+        "BriefDescription": "This event is deprecated. Refer to new event OCR.ALL_PF_RFO.L3_HIT_S.HIT_OTHER_CORE_NO_FWD",
         "Counter": "0,1,2,3",
         "CounterHTOff": "0,1,2,3",
         "Deprecated": "1",
         "EventCode": "0xB7, 0xBB",
-        "EventName": "OFFCORE_RESPONSE.PF_L3_RFO.L3_HIT.HIT_OTHER_CORE_FWD",
+        "EventName": "OFFCORE_RESPONSE.ALL_PF_RFO.L3_HIT_S.HIT_OTHER_CORE_NO_FWD",
         "MSRIndex": "0x1a6,0x1a7",
-        "MSRValue": "0x08003C0100",
+        "MSRValue": "0x0400100120",
         "Offcore": "1",
         "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
         "SampleAfterValue": "100003",
         "UMask": "0x1"
     },
     {
-        "BriefDescription": "This event is deprecated. Refer to new event OCR.DEMAND_DATA_RD.L3_HIT_M.HIT_OTHER_CORE_NO_FWD",
+        "BriefDescription": "This event is deprecated. Refer to new event OCR.ALL_PF_RFO.L3_HIT_S.NO_SNOOP_NEEDED",
         "Counter": "0,1,2,3",
         "CounterHTOff": "0,1,2,3",
         "Deprecated": "1",
         "EventCode": "0xB7, 0xBB",
-        "EventName": "OFFCORE_RESPONSE.DEMAND_DATA_RD.L3_HIT_M.HIT_OTHER_CORE_NO_FWD",
+        "EventName": "OFFCORE_RESPONSE.ALL_PF_RFO.L3_HIT_S.NO_SNOOP_NEEDED",
         "MSRIndex": "0x1a6,0x1a7",
-        "MSRValue": "0x0400040001",
+        "MSRValue": "0x0100100120",
         "Offcore": "1",
         "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
         "SampleAfterValue": "100003",
         "UMask": "0x1"
     },
     {
-        "BriefDescription": "Offcore requests buffer cannot take more entries for this thread core.",
-        "Counter": "0,1,2,3",
-        "CounterHTOff": "0,1,2,3,4,5,6,7",
-        "EventCode": "0xB2",
-        "EventName": "OFFCORE_REQUESTS_BUFFER.SQ_FULL",
-        "PublicDescription": "Counts the number of cases when the offcore requests buffer cannot take more entries for the core. This can happen when the superqueue does not contain eligible entries, or when L1D writeback pending FIFO requests is full.Note: Writeback pending FIFO has six entries.",
-        "SampleAfterValue": "2000003",
-        "UMask": "0x1"
-    },
-    {
-        "BriefDescription": "This event is deprecated. Refer to new event OCR.PF_L2_RFO.L3_HIT_F.SNOOP_MISS",
+        "BriefDescription": "This event is deprecated. Refer to new event OCR.ALL_PF_RFO.L3_HIT_S.SNOOP_MISS",
         "Counter": "0,1,2,3",
         "CounterHTOff": "0,1,2,3",
         "Deprecated": "1",
         "EventCode": "0xB7, 0xBB",
-        "EventName": "OFFCORE_RESPONSE.PF_L2_RFO.L3_HIT_F.SNOOP_MISS",
+        "EventName": "OFFCORE_RESPONSE.ALL_PF_RFO.L3_HIT_S.SNOOP_MISS",
         "MSRIndex": "0x1a6,0x1a7",
-        "MSRValue": "0x0200200020",
+        "MSRValue": "0x0200100120",
         "Offcore": "1",
         "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
         "SampleAfterValue": "100003",
         "UMask": "0x1"
     },
     {
-        "BriefDescription": "This event is deprecated. Refer to new event OCR.ALL_PF_DATA_RD.L3_HIT_F.HITM_OTHER_CORE",
+        "BriefDescription": "This event is deprecated. Refer to new event OCR.ALL_PF_RFO.L3_HIT_S.SNOOP_NONE",
         "Counter": "0,1,2,3",
         "CounterHTOff": "0,1,2,3",
         "Deprecated": "1",
         "EventCode": "0xB7, 0xBB",
-        "EventName": "OFFCORE_RESPONSE.ALL_PF_DATA_RD.L3_HIT_F.HITM_OTHER_CORE",
+        "EventName": "OFFCORE_RESPONSE.ALL_PF_RFO.L3_HIT_S.SNOOP_NONE",
         "MSRIndex": "0x1a6,0x1a7",
-        "MSRValue": "0x1000200490",
+        "MSRValue": "0x0080100120",
         "Offcore": "1",
         "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
         "SampleAfterValue": "100003",
         "UMask": "0x1"
     },
     {
-        "BriefDescription": "This event is deprecated. Refer to new event OCR.PF_L3_RFO.L3_HIT.ANY_SNOOP",
+        "BriefDescription": "This event is deprecated. Refer to new event OCR.ALL_PF_RFO.PMM_HIT_LOCAL_PMM.ANY_SNOOP",
         "Counter": "0,1,2,3",
         "CounterHTOff": "0,1,2,3",
         "Deprecated": "1",
         "EventCode": "0xB7, 0xBB",
-        "EventName": "OFFCORE_RESPONSE.PF_L3_RFO.L3_HIT.ANY_SNOOP",
+        "EventName": "OFFCORE_RESPONSE.ALL_PF_RFO.PMM_HIT_LOCAL_PMM.ANY_SNOOP",
         "MSRIndex": "0x1a6,0x1a7",
-        "MSRValue": "0x3F803C0100",
+        "MSRValue": "0x3F80400120",
         "Offcore": "1",
         "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
         "SampleAfterValue": "100003",
         "UMask": "0x1"
     },
     {
-        "BriefDescription": "This event is deprecated. Refer to new event OCR.ALL_DATA_RD.PMM_HIT_LOCAL_PMM.SNOOP_NONE",
+        "BriefDescription": "This event is deprecated. Refer to new event OCR.ALL_PF_RFO.PMM_HIT_LOCAL_PMM.SNOOP_NONE",
         "Counter": "0,1,2,3",
         "CounterHTOff": "0,1,2,3",
         "Deprecated": "1",
         "EventCode": "0xB7, 0xBB",
-        "EventName": "OFFCORE_RESPONSE.ALL_DATA_RD.PMM_HIT_LOCAL_PMM.SNOOP_NONE",
+        "EventName": "OFFCORE_RESPONSE.ALL_PF_RFO.PMM_HIT_LOCAL_PMM.SNOOP_NONE",
         "MSRIndex": "0x1a6,0x1a7",
-        "MSRValue": "0x0080400491",
+        "MSRValue": "0x0080400120",
         "Offcore": "1",
         "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
         "SampleAfterValue": "100003",
         "UMask": "0x1"
     },
     {
-        "BriefDescription": "This event is deprecated. Refer to new event OCR.ALL_READS.SUPPLIER_NONE.HITM_OTHER_CORE",
+        "BriefDescription": "This event is deprecated. Refer to new event OCR.ALL_PF_RFO.PMM_HIT_LOCAL_PMM.SNOOP_NOT_NEEDED",
         "Counter": "0,1,2,3",
         "CounterHTOff": "0,1,2,3",
         "Deprecated": "1",
         "EventCode": "0xB7, 0xBB",
-        "EventName": "OFFCORE_RESPONSE.ALL_READS.SUPPLIER_NONE.HITM_OTHER_CORE",
+        "EventName": "OFFCORE_RESPONSE.ALL_PF_RFO.PMM_HIT_LOCAL_PMM.SNOOP_NOT_NEEDED",
         "MSRIndex": "0x1a6,0x1a7",
-        "MSRValue": "0x10000207F7",
+        "MSRValue": "0x0100400120",
         "Offcore": "1",
         "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
         "SampleAfterValue": "100003",
         "UMask": "0x1"
     },
     {
-        "BriefDescription": "This event is deprecated. Refer to new event OCR.ALL_PF_RFO.L3_HIT_E.HIT_OTHER_CORE_NO_FWD",
+        "BriefDescription": "This event is deprecated. Refer to new event OCR.ALL_PF_RFO.SUPPLIER_NONE.ANY_SNOOP",
         "Counter": "0,1,2,3",
         "CounterHTOff": "0,1,2,3",
         "Deprecated": "1",
         "EventCode": "0xB7, 0xBB",
-        "EventName": "OFFCORE_RESPONSE.ALL_PF_RFO.L3_HIT_E.HIT_OTHER_CORE_NO_FWD",
+        "EventName": "OFFCORE_RESPONSE.ALL_PF_RFO.SUPPLIER_NONE.ANY_SNOOP",
         "MSRIndex": "0x1a6,0x1a7",
-        "MSRValue": "0x0400080120",
+        "MSRValue": "0x3F80020120",
         "Offcore": "1",
         "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
         "SampleAfterValue": "100003",
         "UMask": "0x1"
     },
     {
-        "BriefDescription": "Retired load instructions missed L2 cache as data sources",
-        "Counter": "0,1,2,3",
-        "CounterHTOff": "0,1,2,3",
-        "Data_LA": "1",
-        "EventCode": "0xD1",
-        "EventName": "MEM_LOAD_RETIRED.L2_MISS",
-        "PEBS": "1",
-        "PublicDescription": "Retired load instructions missed L2 cache as data sources.",
-        "SampleAfterValue": "50021",
-        "UMask": "0x10"
-    },
-    {
-        "BriefDescription": "This event is deprecated. Refer to new event OCR.ALL_RFO.L3_HIT.SNOOP_MISS",
+        "BriefDescription": "This event is deprecated. Refer to new event OCR.ALL_PF_RFO.SUPPLIER_NONE.HITM_OTHER_CORE",
         "Counter": "0,1,2,3",
         "CounterHTOff": "0,1,2,3",
         "Deprecated": "1",
         "EventCode": "0xB7, 0xBB",
-        "EventName": "OFFCORE_RESPONSE.ALL_RFO.L3_HIT.SNOOP_MISS",
+        "EventName": "OFFCORE_RESPONSE.ALL_PF_RFO.SUPPLIER_NONE.HITM_OTHER_CORE",
         "MSRIndex": "0x1a6,0x1a7",
-        "MSRValue": "0x02003C0122",
+        "MSRValue": "0x1000020120",
         "Offcore": "1",
         "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
         "SampleAfterValue": "100003",
         "UMask": "0x1"
     },
     {
-        "BriefDescription": "This event is deprecated. Refer to new event OCR.PF_L1D_AND_SW.L3_HIT_F.SNOOP_NONE",
+        "BriefDescription": "This event is deprecated. Refer to new event OCR.ALL_PF_RFO.SUPPLIER_NONE.HIT_OTHER_CORE_FWD",
         "Counter": "0,1,2,3",
         "CounterHTOff": "0,1,2,3",
         "Deprecated": "1",
         "EventCode": "0xB7, 0xBB",
-        "EventName": "OFFCORE_RESPONSE.PF_L1D_AND_SW.L3_HIT_F.SNOOP_NONE",
+        "EventName": "OFFCORE_RESPONSE.ALL_PF_RFO.SUPPLIER_NONE.HIT_OTHER_CORE_FWD",
         "MSRIndex": "0x1a6,0x1a7",
-        "MSRValue": "0x0080200400",
+        "MSRValue": "0x0800020120",
         "Offcore": "1",
         "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
         "SampleAfterValue": "100003",
         "UMask": "0x1"
     },
     {
-        "BriefDescription": "This event is deprecated. Refer to new event OCR.DEMAND_CODE_RD.PMM_HIT_LOCAL_PMM.SNOOP_NOT_NEEDED",
+        "BriefDescription": "This event is deprecated. Refer to new event OCR.ALL_PF_RFO.SUPPLIER_NONE.HIT_OTHER_CORE_NO_FWD",
         "Counter": "0,1,2,3",
         "CounterHTOff": "0,1,2,3",
         "Deprecated": "1",
         "EventCode": "0xB7, 0xBB",
-        "EventName": "OFFCORE_RESPONSE.DEMAND_CODE_RD.PMM_HIT_LOCAL_PMM.SNOOP_NOT_NEEDED",
+        "EventName": "OFFCORE_RESPONSE.ALL_PF_RFO.SUPPLIER_NONE.HIT_OTHER_CORE_NO_FWD",
         "MSRIndex": "0x1a6,0x1a7",
-        "MSRValue": "0x0100400004",
+        "MSRValue": "0x0400020120",
         "Offcore": "1",
         "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
         "SampleAfterValue": "100003",
@@ -2277,3204 +2698,3038 @@
         "UMask": "0x1"
     },
     {
-        "BriefDescription": "This event is deprecated. Refer to new event OCR.PF_L3_RFO.L3_HIT_F.NO_SNOOP_NEEDED",
+        "BriefDescription": "This event is deprecated. Refer to new event OCR.ALL_PF_RFO.SUPPLIER_NONE.SNOOP_MISS",
         "Counter": "0,1,2,3",
         "CounterHTOff": "0,1,2,3",
         "Deprecated": "1",
         "EventCode": "0xB7, 0xBB",
-        "EventName": "OFFCORE_RESPONSE.PF_L3_RFO.L3_HIT_F.NO_SNOOP_NEEDED",
+        "EventName": "OFFCORE_RESPONSE.ALL_PF_RFO.SUPPLIER_NONE.SNOOP_MISS",
         "MSRIndex": "0x1a6,0x1a7",
-        "MSRValue": "0x0100200100",
+        "MSRValue": "0x0200020120",
         "Offcore": "1",
         "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
         "SampleAfterValue": "100003",
         "UMask": "0x1"
     },
     {
-        "BriefDescription": "This event is deprecated. Refer to new event OCR.PF_L3_DATA_RD.L3_HIT_M.SNOOP_MISS",
+        "BriefDescription": "This event is deprecated. Refer to new event OCR.ALL_PF_RFO.SUPPLIER_NONE.SNOOP_NONE",
         "Counter": "0,1,2,3",
         "CounterHTOff": "0,1,2,3",
         "Deprecated": "1",
         "EventCode": "0xB7, 0xBB",
-        "EventName": "OFFCORE_RESPONSE.PF_L3_DATA_RD.L3_HIT_M.SNOOP_MISS",
+        "EventName": "OFFCORE_RESPONSE.ALL_PF_RFO.SUPPLIER_NONE.SNOOP_NONE",
         "MSRIndex": "0x1a6,0x1a7",
-        "MSRValue": "0x0200040080",
+        "MSRValue": "0x0080020120",
         "Offcore": "1",
         "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
         "SampleAfterValue": "100003",
         "UMask": "0x1"
     },
     {
-        "BriefDescription": "This event is deprecated. Refer to new event OCR.OTHER.L3_HIT_E.SNOOP_MISS",
+        "BriefDescription": "This event is deprecated. Refer to new event OCR.ALL_READS.ANY_RESPONSE",
         "Counter": "0,1,2,3",
         "CounterHTOff": "0,1,2,3",
         "Deprecated": "1",
         "EventCode": "0xB7, 0xBB",
-        "EventName": "OFFCORE_RESPONSE.OTHER.L3_HIT_E.SNOOP_MISS",
+        "EventName": "OFFCORE_RESPONSE.ALL_READS.ANY_RESPONSE",
         "MSRIndex": "0x1a6,0x1a7",
-        "MSRValue": "0x0200088000",
+        "MSRValue": "0x00000107F7",
         "Offcore": "1",
         "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
         "SampleAfterValue": "100003",
         "UMask": "0x1"
     },
     {
-        "BriefDescription": "This event is deprecated. Refer to new event OCR.OTHER.L3_HIT.SNOOP_NONE",
+        "BriefDescription": "This event is deprecated. Refer to new event OCR.ALL_READS.L3_HIT.ANY_SNOOP",
         "Counter": "0,1,2,3",
         "CounterHTOff": "0,1,2,3",
         "Deprecated": "1",
         "EventCode": "0xB7, 0xBB",
-        "EventName": "OFFCORE_RESPONSE.OTHER.L3_HIT.SNOOP_NONE",
+        "EventName": "OFFCORE_RESPONSE.ALL_READS.L3_HIT.ANY_SNOOP",
         "MSRIndex": "0x1a6,0x1a7",
-        "MSRValue": "0x00803C8000",
+        "MSRValue": "0x3F803C07F7",
         "Offcore": "1",
         "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
         "SampleAfterValue": "100003",
         "UMask": "0x1"
     },
     {
-        "BriefDescription": "This event is deprecated. Refer to new event OCR.DEMAND_DATA_RD.L3_HIT_F.NO_SNOOP_NEEDED",
+        "BriefDescription": "This event is deprecated. Refer to new event OCR.ALL_READS.L3_HIT.HITM_OTHER_CORE",
         "Counter": "0,1,2,3",
         "CounterHTOff": "0,1,2,3",
         "Deprecated": "1",
         "EventCode": "0xB7, 0xBB",
-        "EventName": "OFFCORE_RESPONSE.DEMAND_DATA_RD.L3_HIT_F.NO_SNOOP_NEEDED",
+        "EventName": "OFFCORE_RESPONSE.ALL_READS.L3_HIT.HITM_OTHER_CORE",
         "MSRIndex": "0x1a6,0x1a7",
-        "MSRValue": "0x0100200001",
+        "MSRValue": "0x10003C07F7",
         "Offcore": "1",
         "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
         "SampleAfterValue": "100003",
         "UMask": "0x1"
     },
     {
-        "BriefDescription": "This event is deprecated. Refer to new event OCR.PF_L2_RFO.L3_HIT_E.ANY_SNOOP",
+        "BriefDescription": "This event is deprecated. Refer to new event OCR.ALL_READS.L3_HIT.HIT_OTHER_CORE_FWD",
         "Counter": "0,1,2,3",
         "CounterHTOff": "0,1,2,3",
         "Deprecated": "1",
         "EventCode": "0xB7, 0xBB",
-        "EventName": "OFFCORE_RESPONSE.PF_L2_RFO.L3_HIT_E.ANY_SNOOP",
+        "EventName": "OFFCORE_RESPONSE.ALL_READS.L3_HIT.HIT_OTHER_CORE_FWD",
         "MSRIndex": "0x1a6,0x1a7",
-        "MSRValue": "0x3F80080020",
+        "MSRValue": "0x08003C07F7",
         "Offcore": "1",
         "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
         "SampleAfterValue": "100003",
         "UMask": "0x1"
     },
     {
-        "BriefDescription": "This event is deprecated. Refer to new event OCR.ALL_PF_DATA_RD.PMM_HIT_LOCAL_PMM.ANY_SNOOP",
+        "BriefDescription": "This event is deprecated. Refer to new event OCR.ALL_READS.L3_HIT.HIT_OTHER_CORE_NO_FWD",
         "Counter": "0,1,2,3",
         "CounterHTOff": "0,1,2,3",
         "Deprecated": "1",
         "EventCode": "0xB7, 0xBB",
-        "EventName": "OFFCORE_RESPONSE.ALL_PF_DATA_RD.PMM_HIT_LOCAL_PMM.ANY_SNOOP",
+        "EventName": "OFFCORE_RESPONSE.ALL_READS.L3_HIT.HIT_OTHER_CORE_NO_FWD",
         "MSRIndex": "0x1a6,0x1a7",
-        "MSRValue": "0x3F80400490",
+        "MSRValue": "0x04003C07F7",
         "Offcore": "1",
         "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
         "SampleAfterValue": "100003",
         "UMask": "0x1"
     },
     {
-        "BriefDescription": "This event is deprecated. Refer to new event OCR.PF_L1D_AND_SW.PMM_HIT_LOCAL_PMM.ANY_SNOOP",
+        "BriefDescription": "This event is deprecated. Refer to new event OCR.ALL_READS.L3_HIT.NO_SNOOP_NEEDED",
         "Counter": "0,1,2,3",
         "CounterHTOff": "0,1,2,3",
         "Deprecated": "1",
         "EventCode": "0xB7, 0xBB",
-        "EventName": "OFFCORE_RESPONSE.PF_L1D_AND_SW.PMM_HIT_LOCAL_PMM.ANY_SNOOP",
+        "EventName": "OFFCORE_RESPONSE.ALL_READS.L3_HIT.NO_SNOOP_NEEDED",
         "MSRIndex": "0x1a6,0x1a7",
-        "MSRValue": "0x3F80400400",
+        "MSRValue": "0x01003C07F7",
         "Offcore": "1",
         "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
         "SampleAfterValue": "100003",
         "UMask": "0x1"
     },
     {
-        "BriefDescription": "This event is deprecated. Refer to new event OCR.PF_L2_RFO.PMM_HIT_LOCAL_PMM.SNOOP_NOT_NEEDED",
+        "BriefDescription": "This event is deprecated. Refer to new event OCR.ALL_READS.L3_HIT.SNOOP_HIT_WITH_FWD",
         "Counter": "0,1,2,3",
         "CounterHTOff": "0,1,2,3",
         "Deprecated": "1",
         "EventCode": "0xB7, 0xBB",
-        "EventName": "OFFCORE_RESPONSE.PF_L2_RFO.PMM_HIT_LOCAL_PMM.SNOOP_NOT_NEEDED",
+        "EventName": "OFFCORE_RESPONSE.ALL_READS.L3_HIT.SNOOP_HIT_WITH_FWD",
         "MSRIndex": "0x1a6,0x1a7",
-        "MSRValue": "0x0100400020",
+        "MSRValue": "0x08007C07F7",
         "Offcore": "1",
         "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
         "SampleAfterValue": "100003",
         "UMask": "0x1"
     },
     {
-        "BriefDescription": "Requests from the L1/L2/L3 hardware prefetchers or Load software prefetches that miss L2 cache",
-        "Counter": "0,1,2,3",
-        "CounterHTOff": "0,1,2,3,4,5,6,7",
-        "EventCode": "0x24",
-        "EventName": "L2_RQSTS.PF_MISS",
-        "PublicDescription": "Counts requests from the L1/L2/L3 hardware prefetchers or Load software prefetches that miss L2 cache.",
-        "SampleAfterValue": "200003",
-        "UMask": "0x38"
-    },
-    {
-        "BriefDescription": "This event is deprecated. Refer to new event OCR.ALL_READS.L3_HIT_M.HIT_OTHER_CORE_FWD",
+        "BriefDescription": "This event is deprecated. Refer to new event OCR.ALL_READS.L3_HIT.SNOOP_MISS",
         "Counter": "0,1,2,3",
         "CounterHTOff": "0,1,2,3",
         "Deprecated": "1",
         "EventCode": "0xB7, 0xBB",
-        "EventName": "OFFCORE_RESPONSE.ALL_READS.L3_HIT_M.HIT_OTHER_CORE_FWD",
+        "EventName": "OFFCORE_RESPONSE.ALL_READS.L3_HIT.SNOOP_MISS",
         "MSRIndex": "0x1a6,0x1a7",
-        "MSRValue": "0x08000407F7",
+        "MSRValue": "0x02003C07F7",
         "Offcore": "1",
         "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
         "SampleAfterValue": "100003",
         "UMask": "0x1"
     },
     {
-        "BriefDescription": "This event is deprecated. Refer to new event OCR.PF_L3_RFO.L3_HIT_F.HIT_OTHER_CORE_FWD",
+        "BriefDescription": "This event is deprecated. Refer to new event OCR.ALL_READS.L3_HIT.SNOOP_NONE",
         "Counter": "0,1,2,3",
         "CounterHTOff": "0,1,2,3",
         "Deprecated": "1",
         "EventCode": "0xB7, 0xBB",
-        "EventName": "OFFCORE_RESPONSE.PF_L3_RFO.L3_HIT_F.HIT_OTHER_CORE_FWD",
+        "EventName": "OFFCORE_RESPONSE.ALL_READS.L3_HIT.SNOOP_NONE",
         "MSRIndex": "0x1a6,0x1a7",
-        "MSRValue": "0x0800200100",
+        "MSRValue": "0x00803C07F7",
         "Offcore": "1",
         "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
         "SampleAfterValue": "100003",
         "UMask": "0x1"
     },
     {
-        "BriefDescription": "This event is deprecated. Refer to new event OCR.DEMAND_CODE_RD.L3_HIT_E.ANY_SNOOP",
+        "BriefDescription": "This event is deprecated. Refer to new event OCR.ALL_READS.L3_HIT_E.ANY_SNOOP",
         "Counter": "0,1,2,3",
         "CounterHTOff": "0,1,2,3",
         "Deprecated": "1",
         "EventCode": "0xB7, 0xBB",
-        "EventName": "OFFCORE_RESPONSE.DEMAND_CODE_RD.L3_HIT_E.ANY_SNOOP",
+        "EventName": "OFFCORE_RESPONSE.ALL_READS.L3_HIT_E.ANY_SNOOP",
         "MSRIndex": "0x1a6,0x1a7",
-        "MSRValue": "0x3F80080004",
+        "MSRValue": "0x3F800807F7",
         "Offcore": "1",
         "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
         "SampleAfterValue": "100003",
         "UMask": "0x1"
     },
     {
-        "BriefDescription": "This event is deprecated. Refer to new event OCR.PF_L1D_AND_SW.L3_HIT_M.SNOOP_MISS",
+        "BriefDescription": "This event is deprecated. Refer to new event OCR.ALL_READS.L3_HIT_E.HITM_OTHER_CORE",
         "Counter": "0,1,2,3",
         "CounterHTOff": "0,1,2,3",
         "Deprecated": "1",
         "EventCode": "0xB7, 0xBB",
-        "EventName": "OFFCORE_RESPONSE.PF_L1D_AND_SW.L3_HIT_M.SNOOP_MISS",
+        "EventName": "OFFCORE_RESPONSE.ALL_READS.L3_HIT_E.HITM_OTHER_CORE",
         "MSRIndex": "0x1a6,0x1a7",
-        "MSRValue": "0x0200040400",
+        "MSRValue": "0x10000807F7",
         "Offcore": "1",
         "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
         "SampleAfterValue": "100003",
         "UMask": "0x1"
     },
     {
-        "BriefDescription": "This event is deprecated. Refer to new event OCR.DEMAND_CODE_RD.L3_HIT_S.HIT_OTHER_CORE_NO_FWD",
+        "BriefDescription": "This event is deprecated. Refer to new event OCR.ALL_READS.L3_HIT_E.HIT_OTHER_CORE_FWD",
         "Counter": "0,1,2,3",
         "CounterHTOff": "0,1,2,3",
         "Deprecated": "1",
         "EventCode": "0xB7, 0xBB",
-        "EventName": "OFFCORE_RESPONSE.DEMAND_CODE_RD.L3_HIT_S.HIT_OTHER_CORE_NO_FWD",
+        "EventName": "OFFCORE_RESPONSE.ALL_READS.L3_HIT_E.HIT_OTHER_CORE_FWD",
         "MSRIndex": "0x1a6,0x1a7",
-        "MSRValue": "0x0400100004",
+        "MSRValue": "0x08000807F7",
         "Offcore": "1",
         "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
         "SampleAfterValue": "100003",
         "UMask": "0x1"
     },
     {
-        "BriefDescription": "This event is deprecated. Refer to new event OCR.PF_L3_DATA_RD.L3_HIT_E.SNOOP_MISS",
+        "BriefDescription": "This event is deprecated. Refer to new event OCR.ALL_READS.L3_HIT_E.HIT_OTHER_CORE_NO_FWD",
         "Counter": "0,1,2,3",
         "CounterHTOff": "0,1,2,3",
         "Deprecated": "1",
         "EventCode": "0xB7, 0xBB",
-        "EventName": "OFFCORE_RESPONSE.PF_L3_DATA_RD.L3_HIT_E.SNOOP_MISS",
+        "EventName": "OFFCORE_RESPONSE.ALL_READS.L3_HIT_E.HIT_OTHER_CORE_NO_FWD",
         "MSRIndex": "0x1a6,0x1a7",
-        "MSRValue": "0x0200080080",
+        "MSRValue": "0x04000807F7",
         "Offcore": "1",
         "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
         "SampleAfterValue": "100003",
         "UMask": "0x1"
     },
     {
-        "BriefDescription": "This event is deprecated. Refer to new event OCR.ALL_READS.L3_HIT_M.HITM_OTHER_CORE",
+        "BriefDescription": "This event is deprecated. Refer to new event OCR.ALL_READS.L3_HIT_E.NO_SNOOP_NEEDED",
         "Counter": "0,1,2,3",
         "CounterHTOff": "0,1,2,3",
         "Deprecated": "1",
         "EventCode": "0xB7, 0xBB",
-        "EventName": "OFFCORE_RESPONSE.ALL_READS.L3_HIT_M.HITM_OTHER_CORE",
+        "EventName": "OFFCORE_RESPONSE.ALL_READS.L3_HIT_E.NO_SNOOP_NEEDED",
         "MSRIndex": "0x1a6,0x1a7",
-        "MSRValue": "0x10000407F7",
+        "MSRValue": "0x01000807F7",
         "Offcore": "1",
         "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
         "SampleAfterValue": "100003",
         "UMask": "0x1"
     },
     {
-        "BriefDescription": "This event is deprecated. Refer to new event OCR.ALL_READS.SUPPLIER_NONE.SNOOP_NONE",
+        "BriefDescription": "This event is deprecated. Refer to new event OCR.ALL_READS.L3_HIT_E.SNOOP_MISS",
         "Counter": "0,1,2,3",
         "CounterHTOff": "0,1,2,3",
         "Deprecated": "1",
         "EventCode": "0xB7, 0xBB",
-        "EventName": "OFFCORE_RESPONSE.ALL_READS.SUPPLIER_NONE.SNOOP_NONE",
+        "EventName": "OFFCORE_RESPONSE.ALL_READS.L3_HIT_E.SNOOP_MISS",
         "MSRIndex": "0x1a6,0x1a7",
-        "MSRValue": "0x00800207F7",
+        "MSRValue": "0x02000807F7",
         "Offcore": "1",
         "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
         "SampleAfterValue": "100003",
         "UMask": "0x1"
     },
     {
-        "BriefDescription": "This event is deprecated. Refer to new event OCR.ALL_DATA_RD.L3_HIT.SNOOP_NONE",
+        "BriefDescription": "This event is deprecated. Refer to new event OCR.ALL_READS.L3_HIT_E.SNOOP_NONE",
         "Counter": "0,1,2,3",
         "CounterHTOff": "0,1,2,3",
         "Deprecated": "1",
         "EventCode": "0xB7, 0xBB",
-        "EventName": "OFFCORE_RESPONSE.ALL_DATA_RD.L3_HIT.SNOOP_NONE",
+        "EventName": "OFFCORE_RESPONSE.ALL_READS.L3_HIT_E.SNOOP_NONE",
         "MSRIndex": "0x1a6,0x1a7",
-        "MSRValue": "0x00803C0491",
+        "MSRValue": "0x00800807F7",
         "Offcore": "1",
         "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
         "SampleAfterValue": "100003",
         "UMask": "0x1"
     },
     {
-        "BriefDescription": "This event is deprecated. Refer to new event OCR.PF_L3_DATA_RD.ANY_RESPONSE",
+        "BriefDescription": "This event is deprecated. Refer to new event OCR.ALL_READS.L3_HIT_F.ANY_SNOOP",
         "Counter": "0,1,2,3",
         "CounterHTOff": "0,1,2,3",
         "Deprecated": "1",
         "EventCode": "0xB7, 0xBB",
-        "EventName": "OFFCORE_RESPONSE.PF_L3_DATA_RD.ANY_RESPONSE",
+        "EventName": "OFFCORE_RESPONSE.ALL_READS.L3_HIT_F.ANY_SNOOP",
         "MSRIndex": "0x1a6,0x1a7",
-        "MSRValue": "0x0000010080",
+        "MSRValue": "0x3F802007F7",
         "Offcore": "1",
         "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
         "SampleAfterValue": "100003",
         "UMask": "0x1"
     },
     {
-        "BriefDescription": "This event is deprecated. Refer to new event OCR.ALL_RFO.SUPPLIER_NONE.SNOOP_MISS",
+        "BriefDescription": "This event is deprecated. Refer to new event OCR.ALL_READS.L3_HIT_F.HITM_OTHER_CORE",
         "Counter": "0,1,2,3",
         "CounterHTOff": "0,1,2,3",
         "Deprecated": "1",
         "EventCode": "0xB7, 0xBB",
-        "EventName": "OFFCORE_RESPONSE.ALL_RFO.SUPPLIER_NONE.SNOOP_MISS",
+        "EventName": "OFFCORE_RESPONSE.ALL_READS.L3_HIT_F.HITM_OTHER_CORE",
         "MSRIndex": "0x1a6,0x1a7",
-        "MSRValue": "0x0200020122",
+        "MSRValue": "0x10002007F7",
         "Offcore": "1",
         "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
         "SampleAfterValue": "100003",
         "UMask": "0x1"
     },
     {
-        "BriefDescription": "This event is deprecated. Refer to new event OCR.ALL_RFO.L3_HIT.HIT_OTHER_CORE_NO_FWD",
+        "BriefDescription": "This event is deprecated. Refer to new event OCR.ALL_READS.L3_HIT_F.HIT_OTHER_CORE_FWD",
         "Counter": "0,1,2,3",
         "CounterHTOff": "0,1,2,3",
         "Deprecated": "1",
         "EventCode": "0xB7, 0xBB",
-        "EventName": "OFFCORE_RESPONSE.ALL_RFO.L3_HIT.HIT_OTHER_CORE_NO_FWD",
+        "EventName": "OFFCORE_RESPONSE.ALL_READS.L3_HIT_F.HIT_OTHER_CORE_FWD",
         "MSRIndex": "0x1a6,0x1a7",
-        "MSRValue": "0x04003C0122",
+        "MSRValue": "0x08002007F7",
         "Offcore": "1",
         "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
         "SampleAfterValue": "100003",
         "UMask": "0x1"
     },
     {
-        "BriefDescription": "This event is deprecated. Refer to new event OCR.DEMAND_DATA_RD.L3_HIT_E.ANY_SNOOP",
+        "BriefDescription": "This event is deprecated. Refer to new event OCR.ALL_READS.L3_HIT_F.HIT_OTHER_CORE_NO_FWD",
         "Counter": "0,1,2,3",
         "CounterHTOff": "0,1,2,3",
         "Deprecated": "1",
         "EventCode": "0xB7, 0xBB",
-        "EventName": "OFFCORE_RESPONSE.DEMAND_DATA_RD.L3_HIT_E.ANY_SNOOP",
+        "EventName": "OFFCORE_RESPONSE.ALL_READS.L3_HIT_F.HIT_OTHER_CORE_NO_FWD",
         "MSRIndex": "0x1a6,0x1a7",
-        "MSRValue": "0x3F80080001",
+        "MSRValue": "0x04002007F7",
         "Offcore": "1",
         "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
         "SampleAfterValue": "100003",
         "UMask": "0x1"
     },
     {
-        "BriefDescription": "This event is deprecated. Refer to new event OCR.ALL_DATA_RD.SUPPLIER_NONE.SNOOP_NONE",
+        "BriefDescription": "This event is deprecated. Refer to new event OCR.ALL_READS.L3_HIT_F.NO_SNOOP_NEEDED",
         "Counter": "0,1,2,3",
         "CounterHTOff": "0,1,2,3",
         "Deprecated": "1",
         "EventCode": "0xB7, 0xBB",
-        "EventName": "OFFCORE_RESPONSE.ALL_DATA_RD.SUPPLIER_NONE.SNOOP_NONE",
+        "EventName": "OFFCORE_RESPONSE.ALL_READS.L3_HIT_F.NO_SNOOP_NEEDED",
         "MSRIndex": "0x1a6,0x1a7",
-        "MSRValue": "0x0080020491",
+        "MSRValue": "0x01002007F7",
         "Offcore": "1",
         "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
         "SampleAfterValue": "100003",
         "UMask": "0x1"
     },
     {
-        "BriefDescription": "This event is deprecated. Refer to new event OCR.DEMAND_DATA_RD.L3_HIT_M.SNOOP_MISS",
+        "BriefDescription": "This event is deprecated. Refer to new event OCR.ALL_READS.L3_HIT_F.SNOOP_MISS",
         "Counter": "0,1,2,3",
         "CounterHTOff": "0,1,2,3",
         "Deprecated": "1",
         "EventCode": "0xB7, 0xBB",
-        "EventName": "OFFCORE_RESPONSE.DEMAND_DATA_RD.L3_HIT_M.SNOOP_MISS",
+        "EventName": "OFFCORE_RESPONSE.ALL_READS.L3_HIT_F.SNOOP_MISS",
         "MSRIndex": "0x1a6,0x1a7",
-        "MSRValue": "0x0200040001",
+        "MSRValue": "0x02002007F7",
         "Offcore": "1",
         "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
         "SampleAfterValue": "100003",
         "UMask": "0x1"
     },
     {
-        "BriefDescription": "This event is deprecated. Refer to new event OCR.ALL_RFO.PMM_HIT_LOCAL_PMM.ANY_SNOOP",
+        "BriefDescription": "This event is deprecated. Refer to new event OCR.ALL_READS.L3_HIT_F.SNOOP_NONE",
         "Counter": "0,1,2,3",
         "CounterHTOff": "0,1,2,3",
         "Deprecated": "1",
         "EventCode": "0xB7, 0xBB",
-        "EventName": "OFFCORE_RESPONSE.ALL_RFO.PMM_HIT_LOCAL_PMM.ANY_SNOOP",
+        "EventName": "OFFCORE_RESPONSE.ALL_READS.L3_HIT_F.SNOOP_NONE",
         "MSRIndex": "0x1a6,0x1a7",
-        "MSRValue": "0x3F80400122",
+        "MSRValue": "0x00802007F7",
         "Offcore": "1",
         "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
         "SampleAfterValue": "100003",
         "UMask": "0x1"
     },
     {
-        "BriefDescription": "This event is deprecated. Refer to new event OCR.PF_L2_RFO.ANY_RESPONSE",
+        "BriefDescription": "This event is deprecated. Refer to new event OCR.ALL_READS.L3_HIT_M.ANY_SNOOP",
         "Counter": "0,1,2,3",
         "CounterHTOff": "0,1,2,3",
         "Deprecated": "1",
         "EventCode": "0xB7, 0xBB",
-        "EventName": "OFFCORE_RESPONSE.PF_L2_RFO.ANY_RESPONSE",
+        "EventName": "OFFCORE_RESPONSE.ALL_READS.L3_HIT_M.ANY_SNOOP",
         "MSRIndex": "0x1a6,0x1a7",
-        "MSRValue": "0x0000010020",
+        "MSRValue": "0x3F800407F7",
         "Offcore": "1",
         "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
         "SampleAfterValue": "100003",
         "UMask": "0x1"
     },
     {
-        "BriefDescription": "This event is deprecated. Refer to new event OCR.ALL_PF_DATA_RD.L3_HIT.HIT_OTHER_CORE_FWD",
+        "BriefDescription": "This event is deprecated. Refer to new event OCR.ALL_READS.L3_HIT_M.HITM_OTHER_CORE",
         "Counter": "0,1,2,3",
         "CounterHTOff": "0,1,2,3",
         "Deprecated": "1",
         "EventCode": "0xB7, 0xBB",
-        "EventName": "OFFCORE_RESPONSE.ALL_PF_DATA_RD.L3_HIT.HIT_OTHER_CORE_FWD",
+        "EventName": "OFFCORE_RESPONSE.ALL_READS.L3_HIT_M.HITM_OTHER_CORE",
         "MSRIndex": "0x1a6,0x1a7",
-        "MSRValue": "0x08003C0490",
+        "MSRValue": "0x10000407F7",
         "Offcore": "1",
         "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
         "SampleAfterValue": "100003",
         "UMask": "0x1"
     },
     {
-        "BriefDescription": "This event is deprecated. Refer to new event OCR.PF_L2_RFO.SUPPLIER_NONE.HITM_OTHER_CORE",
+        "BriefDescription": "This event is deprecated. Refer to new event OCR.ALL_READS.L3_HIT_M.HIT_OTHER_CORE_FWD",
         "Counter": "0,1,2,3",
         "CounterHTOff": "0,1,2,3",
         "Deprecated": "1",
         "EventCode": "0xB7, 0xBB",
-        "EventName": "OFFCORE_RESPONSE.PF_L2_RFO.SUPPLIER_NONE.HITM_OTHER_CORE",
+        "EventName": "OFFCORE_RESPONSE.ALL_READS.L3_HIT_M.HIT_OTHER_CORE_FWD",
         "MSRIndex": "0x1a6,0x1a7",
-        "MSRValue": "0x1000020020",
+        "MSRValue": "0x08000407F7",
         "Offcore": "1",
         "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
         "SampleAfterValue": "100003",
         "UMask": "0x1"
     },
     {
-        "BriefDescription": "This event is deprecated. Refer to new event OCR.ALL_RFO.L3_HIT_S.SNOOP_MISS",
+        "BriefDescription": "This event is deprecated. Refer to new event OCR.ALL_READS.L3_HIT_M.HIT_OTHER_CORE_NO_FWD",
         "Counter": "0,1,2,3",
         "CounterHTOff": "0,1,2,3",
         "Deprecated": "1",
         "EventCode": "0xB7, 0xBB",
-        "EventName": "OFFCORE_RESPONSE.ALL_RFO.L3_HIT_S.SNOOP_MISS",
+        "EventName": "OFFCORE_RESPONSE.ALL_READS.L3_HIT_M.HIT_OTHER_CORE_NO_FWD",
         "MSRIndex": "0x1a6,0x1a7",
-        "MSRValue": "0x0200100122",
+        "MSRValue": "0x04000407F7",
         "Offcore": "1",
         "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
         "SampleAfterValue": "100003",
         "UMask": "0x1"
     },
     {
-        "BriefDescription": "This event is deprecated. Refer to new event OCR.DEMAND_DATA_RD.L3_HIT_F.SNOOP_NONE",
+        "BriefDescription": "This event is deprecated. Refer to new event OCR.ALL_READS.L3_HIT_M.NO_SNOOP_NEEDED",
         "Counter": "0,1,2,3",
         "CounterHTOff": "0,1,2,3",
         "Deprecated": "1",
         "EventCode": "0xB7, 0xBB",
-        "EventName": "OFFCORE_RESPONSE.DEMAND_DATA_RD.L3_HIT_F.SNOOP_NONE",
+        "EventName": "OFFCORE_RESPONSE.ALL_READS.L3_HIT_M.NO_SNOOP_NEEDED",
         "MSRIndex": "0x1a6,0x1a7",
-        "MSRValue": "0x0080200001",
+        "MSRValue": "0x01000407F7",
         "Offcore": "1",
         "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
         "SampleAfterValue": "100003",
         "UMask": "0x1"
     },
     {
-        "BriefDescription": "This event is deprecated. Refer to new event OCR.PF_L3_DATA_RD.SUPPLIER_NONE.HIT_OTHER_CORE_NO_FWD",
+        "BriefDescription": "This event is deprecated. Refer to new event OCR.ALL_READS.L3_HIT_M.SNOOP_MISS",
         "Counter": "0,1,2,3",
         "CounterHTOff": "0,1,2,3",
         "Deprecated": "1",
         "EventCode": "0xB7, 0xBB",
-        "EventName": "OFFCORE_RESPONSE.PF_L3_DATA_RD.SUPPLIER_NONE.HIT_OTHER_CORE_NO_FWD",
+        "EventName": "OFFCORE_RESPONSE.ALL_READS.L3_HIT_M.SNOOP_MISS",
         "MSRIndex": "0x1a6,0x1a7",
-        "MSRValue": "0x0400020080",
+        "MSRValue": "0x02000407F7",
         "Offcore": "1",
         "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
         "SampleAfterValue": "100003",
         "UMask": "0x1"
     },
     {
-        "BriefDescription": "This event is deprecated. Refer to new event OCR.PF_L2_DATA_RD.L3_HIT_M.NO_SNOOP_NEEDED",
+        "BriefDescription": "This event is deprecated. Refer to new event OCR.ALL_READS.L3_HIT_M.SNOOP_NONE",
         "Counter": "0,1,2,3",
         "CounterHTOff": "0,1,2,3",
         "Deprecated": "1",
         "EventCode": "0xB7, 0xBB",
-        "EventName": "OFFCORE_RESPONSE.PF_L2_DATA_RD.L3_HIT_M.NO_SNOOP_NEEDED",
+        "EventName": "OFFCORE_RESPONSE.ALL_READS.L3_HIT_M.SNOOP_NONE",
         "MSRIndex": "0x1a6,0x1a7",
-        "MSRValue": "0x0100040010",
+        "MSRValue": "0x00800407F7",
         "Offcore": "1",
         "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
         "SampleAfterValue": "100003",
         "UMask": "0x1"
     },
     {
-        "BriefDescription": "This event is deprecated. Refer to new event OCR.PF_L3_RFO.L3_HIT_E.SNOOP_MISS",
+        "BriefDescription": "This event is deprecated. Refer to new event OCR.ALL_READS.L3_HIT_S.ANY_SNOOP",
         "Counter": "0,1,2,3",
         "CounterHTOff": "0,1,2,3",
         "Deprecated": "1",
         "EventCode": "0xB7, 0xBB",
-        "EventName": "OFFCORE_RESPONSE.PF_L3_RFO.L3_HIT_E.SNOOP_MISS",
+        "EventName": "OFFCORE_RESPONSE.ALL_READS.L3_HIT_S.ANY_SNOOP",
         "MSRIndex": "0x1a6,0x1a7",
-        "MSRValue": "0x0200080100",
+        "MSRValue": "0x3F801007F7",
         "Offcore": "1",
         "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
         "SampleAfterValue": "100003",
         "UMask": "0x1"
     },
     {
-        "BriefDescription": "This event is deprecated. Refer to new event OCR.ALL_PF_DATA_RD.SUPPLIER_NONE.SNOOP_NONE",
+        "BriefDescription": "This event is deprecated. Refer to new event OCR.ALL_READS.L3_HIT_S.HITM_OTHER_CORE",
         "Counter": "0,1,2,3",
         "CounterHTOff": "0,1,2,3",
         "Deprecated": "1",
         "EventCode": "0xB7, 0xBB",
-        "EventName": "OFFCORE_RESPONSE.ALL_PF_DATA_RD.SUPPLIER_NONE.SNOOP_NONE",
+        "EventName": "OFFCORE_RESPONSE.ALL_READS.L3_HIT_S.HITM_OTHER_CORE",
         "MSRIndex": "0x1a6,0x1a7",
-        "MSRValue": "0x0080020490",
+        "MSRValue": "0x10001007F7",
         "Offcore": "1",
         "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
         "SampleAfterValue": "100003",
         "UMask": "0x1"
     },
     {
-        "BriefDescription": "This event is deprecated. Refer to new event OCR.ALL_READS.SUPPLIER_NONE.HIT_OTHER_CORE_FWD",
+        "BriefDescription": "This event is deprecated. Refer to new event OCR.ALL_READS.L3_HIT_S.HIT_OTHER_CORE_FWD",
         "Counter": "0,1,2,3",
         "CounterHTOff": "0,1,2,3",
         "Deprecated": "1",
         "EventCode": "0xB7, 0xBB",
-        "EventName": "OFFCORE_RESPONSE.ALL_READS.SUPPLIER_NONE.HIT_OTHER_CORE_FWD",
+        "EventName": "OFFCORE_RESPONSE.ALL_READS.L3_HIT_S.HIT_OTHER_CORE_FWD",
         "MSRIndex": "0x1a6,0x1a7",
-        "MSRValue": "0x08000207F7",
+        "MSRValue": "0x08001007F7",
         "Offcore": "1",
         "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
         "SampleAfterValue": "100003",
         "UMask": "0x1"
     },
     {
-        "BriefDescription": "Retired load instructions missed L3 cache as data sources",
-        "Counter": "0,1,2,3",
-        "CounterHTOff": "0,1,2,3",
-        "Data_LA": "1",
-        "EventCode": "0xD1",
-        "EventName": "MEM_LOAD_RETIRED.L3_MISS",
-        "PEBS": "1",
-        "PublicDescription": "Counts retired load instructions with at least one uop that missed in the L3 cache.",
-        "SampleAfterValue": "100007",
-        "UMask": "0x20"
-    },
-    {
-        "BriefDescription": "This event is deprecated. Refer to new event OCR.PF_L3_RFO.L3_HIT_F.HITM_OTHER_CORE",
+        "BriefDescription": "This event is deprecated. Refer to new event OCR.ALL_READS.L3_HIT_S.HIT_OTHER_CORE_NO_FWD",
         "Counter": "0,1,2,3",
         "CounterHTOff": "0,1,2,3",
         "Deprecated": "1",
         "EventCode": "0xB7, 0xBB",
-        "EventName": "OFFCORE_RESPONSE.PF_L3_RFO.L3_HIT_F.HITM_OTHER_CORE",
+        "EventName": "OFFCORE_RESPONSE.ALL_READS.L3_HIT_S.HIT_OTHER_CORE_NO_FWD",
         "MSRIndex": "0x1a6,0x1a7",
-        "MSRValue": "0x1000200100",
+        "MSRValue": "0x04001007F7",
         "Offcore": "1",
         "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
         "SampleAfterValue": "100003",
         "UMask": "0x1"
     },
     {
-        "BriefDescription": "This event is deprecated. Refer to new event OCR.DEMAND_RFO.SUPPLIER_NONE.NO_SNOOP_NEEDED",
+        "BriefDescription": "This event is deprecated. Refer to new event OCR.ALL_READS.L3_HIT_S.NO_SNOOP_NEEDED",
         "Counter": "0,1,2,3",
         "CounterHTOff": "0,1,2,3",
         "Deprecated": "1",
         "EventCode": "0xB7, 0xBB",
-        "EventName": "OFFCORE_RESPONSE.DEMAND_RFO.SUPPLIER_NONE.NO_SNOOP_NEEDED",
+        "EventName": "OFFCORE_RESPONSE.ALL_READS.L3_HIT_S.NO_SNOOP_NEEDED",
         "MSRIndex": "0x1a6,0x1a7",
-        "MSRValue": "0x0100020002",
+        "MSRValue": "0x01001007F7",
         "Offcore": "1",
         "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
         "SampleAfterValue": "100003",
         "UMask": "0x1"
     },
     {
-        "BriefDescription": "This event is deprecated. Refer to new event OCR.OTHER.SUPPLIER_NONE.SNOOP_NONE",
+        "BriefDescription": "This event is deprecated. Refer to new event OCR.ALL_READS.L3_HIT_S.SNOOP_MISS",
         "Counter": "0,1,2,3",
         "CounterHTOff": "0,1,2,3",
         "Deprecated": "1",
         "EventCode": "0xB7, 0xBB",
-        "EventName": "OFFCORE_RESPONSE.OTHER.SUPPLIER_NONE.SNOOP_NONE",
+        "EventName": "OFFCORE_RESPONSE.ALL_READS.L3_HIT_S.SNOOP_MISS",
         "MSRIndex": "0x1a6,0x1a7",
-        "MSRValue": "0x0080028000",
+        "MSRValue": "0x02001007F7",
         "Offcore": "1",
         "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
         "SampleAfterValue": "100003",
         "UMask": "0x1"
     },
     {
-        "BriefDescription": "This event is deprecated. Refer to new event OCR.ALL_DATA_RD.SUPPLIER_NONE.NO_SNOOP_NEEDED",
+        "BriefDescription": "This event is deprecated. Refer to new event OCR.ALL_READS.L3_HIT_S.SNOOP_NONE",
         "Counter": "0,1,2,3",
         "CounterHTOff": "0,1,2,3",
         "Deprecated": "1",
         "EventCode": "0xB7, 0xBB",
-        "EventName": "OFFCORE_RESPONSE.ALL_DATA_RD.SUPPLIER_NONE.NO_SNOOP_NEEDED",
+        "EventName": "OFFCORE_RESPONSE.ALL_READS.L3_HIT_S.SNOOP_NONE",
         "MSRIndex": "0x1a6,0x1a7",
-        "MSRValue": "0x0100020491",
+        "MSRValue": "0x00801007F7",
         "Offcore": "1",
         "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
         "SampleAfterValue": "100003",
         "UMask": "0x1"
     },
     {
-        "BriefDescription": "This event is deprecated. Refer to new event OCR.ALL_PF_RFO.L3_HIT_E.ANY_SNOOP",
+        "BriefDescription": "This event is deprecated. Refer to new event OCR.ALL_READS.PMM_HIT_LOCAL_PMM.ANY_SNOOP",
         "Counter": "0,1,2,3",
         "CounterHTOff": "0,1,2,3",
         "Deprecated": "1",
         "EventCode": "0xB7, 0xBB",
-        "EventName": "OFFCORE_RESPONSE.ALL_PF_RFO.L3_HIT_E.ANY_SNOOP",
+        "EventName": "OFFCORE_RESPONSE.ALL_READS.PMM_HIT_LOCAL_PMM.ANY_SNOOP",
         "MSRIndex": "0x1a6,0x1a7",
-        "MSRValue": "0x3F80080120",
+        "MSRValue": "0x3F804007F7",
         "Offcore": "1",
         "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
         "SampleAfterValue": "100003",
         "UMask": "0x1"
     },
     {
-        "BriefDescription": "This event is deprecated. Refer to new event OCR.ALL_PF_RFO.L3_HIT.SNOOP_MISS",
+        "BriefDescription": "This event is deprecated. Refer to new event OCR.ALL_READS.PMM_HIT_LOCAL_PMM.SNOOP_NONE",
         "Counter": "0,1,2,3",
         "CounterHTOff": "0,1,2,3",
         "Deprecated": "1",
         "EventCode": "0xB7, 0xBB",
-        "EventName": "OFFCORE_RESPONSE.ALL_PF_RFO.L3_HIT.SNOOP_MISS",
+        "EventName": "OFFCORE_RESPONSE.ALL_READS.PMM_HIT_LOCAL_PMM.SNOOP_NONE",
         "MSRIndex": "0x1a6,0x1a7",
-        "MSRValue": "0x02003C0120",
+        "MSRValue": "0x00804007F7",
         "Offcore": "1",
         "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
         "SampleAfterValue": "100003",
         "UMask": "0x1"
     },
     {
-        "BriefDescription": "This event is deprecated. Refer to new event OCR.PF_L2_DATA_RD.L3_HIT.NO_SNOOP_NEEDED",
+        "BriefDescription": "This event is deprecated. Refer to new event OCR.ALL_READS.PMM_HIT_LOCAL_PMM.SNOOP_NOT_NEEDED",
         "Counter": "0,1,2,3",
         "CounterHTOff": "0,1,2,3",
         "Deprecated": "1",
         "EventCode": "0xB7, 0xBB",
-        "EventName": "OFFCORE_RESPONSE.PF_L2_DATA_RD.L3_HIT.NO_SNOOP_NEEDED",
+        "EventName": "OFFCORE_RESPONSE.ALL_READS.PMM_HIT_LOCAL_PMM.SNOOP_NOT_NEEDED",
         "MSRIndex": "0x1a6,0x1a7",
-        "MSRValue": "0x01003C0010",
+        "MSRValue": "0x01004007F7",
         "Offcore": "1",
         "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
         "SampleAfterValue": "100003",
         "UMask": "0x1"
     },
     {
-        "BriefDescription": "This event is deprecated. Refer to new event OCR.PF_L2_RFO.SUPPLIER_NONE.ANY_SNOOP",
+        "BriefDescription": "This event is deprecated. Refer to new event OCR.ALL_READS.SUPPLIER_NONE.ANY_SNOOP",
         "Counter": "0,1,2,3",
         "CounterHTOff": "0,1,2,3",
         "Deprecated": "1",
         "EventCode": "0xB7, 0xBB",
-        "EventName": "OFFCORE_RESPONSE.PF_L2_RFO.SUPPLIER_NONE.ANY_SNOOP",
+        "EventName": "OFFCORE_RESPONSE.ALL_READS.SUPPLIER_NONE.ANY_SNOOP",
         "MSRIndex": "0x1a6,0x1a7",
-        "MSRValue": "0x3F80020020",
+        "MSRValue": "0x3F800207F7",
         "Offcore": "1",
         "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
         "SampleAfterValue": "100003",
         "UMask": "0x1"
     },
     {
-        "BriefDescription": "This event is deprecated. Refer to new event OCR.PF_L3_RFO.SUPPLIER_NONE.HIT_OTHER_CORE_FWD",
+        "BriefDescription": "This event is deprecated. Refer to new event OCR.ALL_READS.SUPPLIER_NONE.HITM_OTHER_CORE",
         "Counter": "0,1,2,3",
         "CounterHTOff": "0,1,2,3",
         "Deprecated": "1",
         "EventCode": "0xB7, 0xBB",
-        "EventName": "OFFCORE_RESPONSE.PF_L3_RFO.SUPPLIER_NONE.HIT_OTHER_CORE_FWD",
+        "EventName": "OFFCORE_RESPONSE.ALL_READS.SUPPLIER_NONE.HITM_OTHER_CORE",
         "MSRIndex": "0x1a6,0x1a7",
-        "MSRValue": "0x0800020100",
+        "MSRValue": "0x10000207F7",
         "Offcore": "1",
         "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
         "SampleAfterValue": "100003",
         "UMask": "0x1"
     },
     {
-        "BriefDescription": "Core-originated cacheable demand requests missed L3",
-        "Counter": "0,1,2,3",
-        "CounterHTOff": "0,1,2,3,4,5,6,7",
-        "Errata": "SKL057",
-        "EventCode": "0x2E",
-        "EventName": "LONGEST_LAT_CACHE.MISS",
-        "PublicDescription": "Counts core-originated cacheable requests that miss the L3 cache (Longest Latency cache). Requests include data and code reads, Reads-for-Ownership (RFOs), speculative accesses and hardware prefetches from L1 and L2. It does not include all misses to the L3.",
-        "SampleAfterValue": "100003",
-        "UMask": "0x41"
-    },
-    {
-        "BriefDescription": "This event is deprecated. Refer to new event OCR.ALL_PF_DATA_RD.L3_HIT_F.SNOOP_NONE",
+        "BriefDescription": "This event is deprecated. Refer to new event OCR.ALL_READS.SUPPLIER_NONE.HIT_OTHER_CORE_FWD",
         "Counter": "0,1,2,3",
         "CounterHTOff": "0,1,2,3",
         "Deprecated": "1",
         "EventCode": "0xB7, 0xBB",
-        "EventName": "OFFCORE_RESPONSE.ALL_PF_DATA_RD.L3_HIT_F.SNOOP_NONE",
+        "EventName": "OFFCORE_RESPONSE.ALL_READS.SUPPLIER_NONE.HIT_OTHER_CORE_FWD",
         "MSRIndex": "0x1a6,0x1a7",
-        "MSRValue": "0x0080200490",
+        "MSRValue": "0x08000207F7",
         "Offcore": "1",
         "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
         "SampleAfterValue": "100003",
         "UMask": "0x1"
     },
     {
-        "BriefDescription": "This event is deprecated. Refer to new event OCR.PF_L3_DATA_RD.L3_HIT_F.SNOOP_MISS",
+        "BriefDescription": "This event is deprecated. Refer to new event OCR.ALL_READS.SUPPLIER_NONE.HIT_OTHER_CORE_NO_FWD",
         "Counter": "0,1,2,3",
         "CounterHTOff": "0,1,2,3",
         "Deprecated": "1",
         "EventCode": "0xB7, 0xBB",
-        "EventName": "OFFCORE_RESPONSE.PF_L3_DATA_RD.L3_HIT_F.SNOOP_MISS",
+        "EventName": "OFFCORE_RESPONSE.ALL_READS.SUPPLIER_NONE.HIT_OTHER_CORE_NO_FWD",
         "MSRIndex": "0x1a6,0x1a7",
-        "MSRValue": "0x0200200080",
+        "MSRValue": "0x04000207F7",
         "Offcore": "1",
         "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
         "SampleAfterValue": "100003",
         "UMask": "0x1"
     },
     {
-        "BriefDescription": "This event is deprecated. Refer to new event OCR.ALL_PF_RFO.L3_HIT_E.SNOOP_NONE",
+        "BriefDescription": "This event is deprecated. Refer to new event OCR.ALL_READS.SUPPLIER_NONE.NO_SNOOP_NEEDED",
         "Counter": "0,1,2,3",
         "CounterHTOff": "0,1,2,3",
         "Deprecated": "1",
         "EventCode": "0xB7, 0xBB",
-        "EventName": "OFFCORE_RESPONSE.ALL_PF_RFO.L3_HIT_E.SNOOP_NONE",
+        "EventName": "OFFCORE_RESPONSE.ALL_READS.SUPPLIER_NONE.NO_SNOOP_NEEDED",
         "MSRIndex": "0x1a6,0x1a7",
-        "MSRValue": "0x0080080120",
+        "MSRValue": "0x01000207F7",
         "Offcore": "1",
         "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
         "SampleAfterValue": "100003",
         "UMask": "0x1"
     },
     {
-        "BriefDescription": "This event is deprecated. Refer to new event OCR.PF_L2_DATA_RD.SUPPLIER_NONE.HITM_OTHER_CORE",
+        "BriefDescription": "This event is deprecated. Refer to new event OCR.ALL_READS.SUPPLIER_NONE.SNOOP_MISS",
         "Counter": "0,1,2,3",
         "CounterHTOff": "0,1,2,3",
         "Deprecated": "1",
         "EventCode": "0xB7, 0xBB",
-        "EventName": "OFFCORE_RESPONSE.PF_L2_DATA_RD.SUPPLIER_NONE.HITM_OTHER_CORE",
+        "EventName": "OFFCORE_RESPONSE.ALL_READS.SUPPLIER_NONE.SNOOP_MISS",
         "MSRIndex": "0x1a6,0x1a7",
-        "MSRValue": "0x1000020010",
+        "MSRValue": "0x02000207F7",
         "Offcore": "1",
         "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
         "SampleAfterValue": "100003",
         "UMask": "0x1"
     },
     {
-        "BriefDescription": "This event is deprecated. Refer to new event OCR.ALL_PF_RFO.L3_HIT.NO_SNOOP_NEEDED",
+        "BriefDescription": "This event is deprecated. Refer to new event OCR.ALL_READS.SUPPLIER_NONE.SNOOP_NONE",
         "Counter": "0,1,2,3",
         "CounterHTOff": "0,1,2,3",
         "Deprecated": "1",
         "EventCode": "0xB7, 0xBB",
-        "EventName": "OFFCORE_RESPONSE.ALL_PF_RFO.L3_HIT.NO_SNOOP_NEEDED",
+        "EventName": "OFFCORE_RESPONSE.ALL_READS.SUPPLIER_NONE.SNOOP_NONE",
         "MSRIndex": "0x1a6,0x1a7",
-        "MSRValue": "0x01003C0120",
+        "MSRValue": "0x00800207F7",
         "Offcore": "1",
         "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
         "SampleAfterValue": "100003",
         "UMask": "0x1"
     },
     {
-        "BriefDescription": "This event is deprecated. Refer to new event OCR.PF_L1D_AND_SW.SUPPLIER_NONE.ANY_SNOOP",
+        "BriefDescription": "This event is deprecated. Refer to new event OCR.ALL_RFO.ANY_RESPONSE",
         "Counter": "0,1,2,3",
         "CounterHTOff": "0,1,2,3",
         "Deprecated": "1",
         "EventCode": "0xB7, 0xBB",
-        "EventName": "OFFCORE_RESPONSE.PF_L1D_AND_SW.SUPPLIER_NONE.ANY_SNOOP",
+        "EventName": "OFFCORE_RESPONSE.ALL_RFO.ANY_RESPONSE",
         "MSRIndex": "0x1a6,0x1a7",
-        "MSRValue": "0x3F80020400",
+        "MSRValue": "0x0000010122",
         "Offcore": "1",
         "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
         "SampleAfterValue": "100003",
         "UMask": "0x1"
     },
     {
-        "BriefDescription": "This event is deprecated. Refer to new event OCR.PF_L1D_AND_SW.L3_HIT_S.ANY_SNOOP",
+        "BriefDescription": "This event is deprecated. Refer to new event OCR.ALL_RFO.L3_HIT.ANY_SNOOP",
         "Counter": "0,1,2,3",
         "CounterHTOff": "0,1,2,3",
         "Deprecated": "1",
         "EventCode": "0xB7, 0xBB",
-        "EventName": "OFFCORE_RESPONSE.PF_L1D_AND_SW.L3_HIT_S.ANY_SNOOP",
+        "EventName": "OFFCORE_RESPONSE.ALL_RFO.L3_HIT.ANY_SNOOP",
         "MSRIndex": "0x1a6,0x1a7",
-        "MSRValue": "0x3F80100400",
+        "MSRValue": "0x3F803C0122",
         "Offcore": "1",
         "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
         "SampleAfterValue": "100003",
         "UMask": "0x1"
     },
     {
-        "BriefDescription": "This event is deprecated. Refer to new event OCR.OTHER.L3_HIT.HITM_OTHER_CORE",
+        "BriefDescription": "This event is deprecated. Refer to new event OCR.ALL_RFO.L3_HIT.HITM_OTHER_CORE",
         "Counter": "0,1,2,3",
         "CounterHTOff": "0,1,2,3",
         "Deprecated": "1",
         "EventCode": "0xB7, 0xBB",
-        "EventName": "OFFCORE_RESPONSE.OTHER.L3_HIT.HITM_OTHER_CORE",
+        "EventName": "OFFCORE_RESPONSE.ALL_RFO.L3_HIT.HITM_OTHER_CORE",
         "MSRIndex": "0x1a6,0x1a7",
-        "MSRValue": "0x10003C8000",
+        "MSRValue": "0x10003C0122",
         "Offcore": "1",
         "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
         "SampleAfterValue": "100003",
         "UMask": "0x1"
     },
     {
-        "BriefDescription": "This event is deprecated. Refer to new event OCR.PF_L1D_AND_SW.L3_HIT.SNOOP_HIT_WITH_FWD",
+        "BriefDescription": "This event is deprecated. Refer to new event OCR.ALL_RFO.L3_HIT.HIT_OTHER_CORE_FWD",
         "Counter": "0,1,2,3",
         "CounterHTOff": "0,1,2,3",
         "Deprecated": "1",
         "EventCode": "0xB7, 0xBB",
-        "EventName": "OFFCORE_RESPONSE.PF_L1D_AND_SW.L3_HIT.SNOOP_HIT_WITH_FWD",
+        "EventName": "OFFCORE_RESPONSE.ALL_RFO.L3_HIT.HIT_OTHER_CORE_FWD",
         "MSRIndex": "0x1a6,0x1a7",
-        "MSRValue": "0x08007C0400",
+        "MSRValue": "0x08003C0122",
         "Offcore": "1",
         "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
         "SampleAfterValue": "100003",
         "UMask": "0x1"
     },
     {
-        "BriefDescription": "This event is deprecated. Refer to new event OCR.ALL_PF_RFO.L3_HIT_M.HITM_OTHER_CORE",
+        "BriefDescription": "This event is deprecated. Refer to new event OCR.ALL_RFO.L3_HIT.HIT_OTHER_CORE_NO_FWD",
         "Counter": "0,1,2,3",
         "CounterHTOff": "0,1,2,3",
         "Deprecated": "1",
         "EventCode": "0xB7, 0xBB",
-        "EventName": "OFFCORE_RESPONSE.ALL_PF_RFO.L3_HIT_M.HITM_OTHER_CORE",
+        "EventName": "OFFCORE_RESPONSE.ALL_RFO.L3_HIT.HIT_OTHER_CORE_NO_FWD",
         "MSRIndex": "0x1a6,0x1a7",
-        "MSRValue": "0x1000040120",
+        "MSRValue": "0x04003C0122",
         "Offcore": "1",
         "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
         "SampleAfterValue": "100003",
         "UMask": "0x1"
     },
     {
-        "BriefDescription": "This event is deprecated. Refer to new event OCR.PF_L3_RFO.L3_HIT.SNOOP_HIT_WITH_FWD",
+        "BriefDescription": "This event is deprecated. Refer to new event OCR.ALL_RFO.L3_HIT.NO_SNOOP_NEEDED",
         "Counter": "0,1,2,3",
         "CounterHTOff": "0,1,2,3",
         "Deprecated": "1",
         "EventCode": "0xB7, 0xBB",
-        "EventName": "OFFCORE_RESPONSE.PF_L3_RFO.L3_HIT.SNOOP_HIT_WITH_FWD",
+        "EventName": "OFFCORE_RESPONSE.ALL_RFO.L3_HIT.NO_SNOOP_NEEDED",
         "MSRIndex": "0x1a6,0x1a7",
-        "MSRValue": "0x08007C0100",
+        "MSRValue": "0x01003C0122",
         "Offcore": "1",
         "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
         "SampleAfterValue": "100003",
         "UMask": "0x1"
     },
     {
-        "BriefDescription": "This event is deprecated. Refer to new event OCR.DEMAND_RFO.L3_HIT_F.NO_SNOOP_NEEDED",
+        "BriefDescription": "This event is deprecated. Refer to new event OCR.ALL_RFO.L3_HIT.SNOOP_HIT_WITH_FWD",
         "Counter": "0,1,2,3",
         "CounterHTOff": "0,1,2,3",
         "Deprecated": "1",
         "EventCode": "0xB7, 0xBB",
-        "EventName": "OFFCORE_RESPONSE.DEMAND_RFO.L3_HIT_F.NO_SNOOP_NEEDED",
+        "EventName": "OFFCORE_RESPONSE.ALL_RFO.L3_HIT.SNOOP_HIT_WITH_FWD",
         "MSRIndex": "0x1a6,0x1a7",
-        "MSRValue": "0x0100200002",
+        "MSRValue": "0x08007C0122",
         "Offcore": "1",
         "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
         "SampleAfterValue": "100003",
         "UMask": "0x1"
     },
     {
-        "BriefDescription": "This event is deprecated. Refer to new event OCR.ALL_PF_DATA_RD.L3_HIT.SNOOP_MISS",
+        "BriefDescription": "This event is deprecated. Refer to new event OCR.ALL_RFO.L3_HIT.SNOOP_MISS",
         "Counter": "0,1,2,3",
         "CounterHTOff": "0,1,2,3",
         "Deprecated": "1",
         "EventCode": "0xB7, 0xBB",
-        "EventName": "OFFCORE_RESPONSE.ALL_PF_DATA_RD.L3_HIT.SNOOP_MISS",
+        "EventName": "OFFCORE_RESPONSE.ALL_RFO.L3_HIT.SNOOP_MISS",
         "MSRIndex": "0x1a6,0x1a7",
-        "MSRValue": "0x02003C0490",
+        "MSRValue": "0x02003C0122",
         "Offcore": "1",
         "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
         "SampleAfterValue": "100003",
         "UMask": "0x1"
     },
     {
-        "BriefDescription": "This event is deprecated. Refer to new event OCR.PF_L3_RFO.L3_HIT_F.SNOOP_NONE",
+        "BriefDescription": "This event is deprecated. Refer to new event OCR.ALL_RFO.L3_HIT.SNOOP_NONE",
         "Counter": "0,1,2,3",
         "CounterHTOff": "0,1,2,3",
         "Deprecated": "1",
         "EventCode": "0xB7, 0xBB",
-        "EventName": "OFFCORE_RESPONSE.PF_L3_RFO.L3_HIT_F.SNOOP_NONE",
+        "EventName": "OFFCORE_RESPONSE.ALL_RFO.L3_HIT.SNOOP_NONE",
         "MSRIndex": "0x1a6,0x1a7",
-        "MSRValue": "0x0080200100",
+        "MSRValue": "0x00803C0122",
         "Offcore": "1",
         "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
         "SampleAfterValue": "100003",
         "UMask": "0x1"
     },
     {
-        "BriefDescription": "This event is deprecated. Refer to new event OCR.ALL_READS.PMM_HIT_LOCAL_PMM.SNOOP_NOT_NEEDED",
+        "BriefDescription": "This event is deprecated. Refer to new event OCR.ALL_RFO.L3_HIT_E.ANY_SNOOP",
         "Counter": "0,1,2,3",
         "CounterHTOff": "0,1,2,3",
         "Deprecated": "1",
         "EventCode": "0xB7, 0xBB",
-        "EventName": "OFFCORE_RESPONSE.ALL_READS.PMM_HIT_LOCAL_PMM.SNOOP_NOT_NEEDED",
+        "EventName": "OFFCORE_RESPONSE.ALL_RFO.L3_HIT_E.ANY_SNOOP",
         "MSRIndex": "0x1a6,0x1a7",
-        "MSRValue": "0x01004007F7",
+        "MSRValue": "0x3F80080122",
         "Offcore": "1",
         "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
         "SampleAfterValue": "100003",
         "UMask": "0x1"
     },
     {
-        "BriefDescription": "This event is deprecated. Refer to new event OCR.ALL_PF_RFO.L3_HIT_E.NO_SNOOP_NEEDED",
+        "BriefDescription": "This event is deprecated. Refer to new event OCR.ALL_RFO.L3_HIT_E.HITM_OTHER_CORE",
         "Counter": "0,1,2,3",
         "CounterHTOff": "0,1,2,3",
         "Deprecated": "1",
         "EventCode": "0xB7, 0xBB",
-        "EventName": "OFFCORE_RESPONSE.ALL_PF_RFO.L3_HIT_E.NO_SNOOP_NEEDED",
+        "EventName": "OFFCORE_RESPONSE.ALL_RFO.L3_HIT_E.HITM_OTHER_CORE",
         "MSRIndex": "0x1a6,0x1a7",
-        "MSRValue": "0x0100080120",
+        "MSRValue": "0x1000080122",
         "Offcore": "1",
         "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
         "SampleAfterValue": "100003",
         "UMask": "0x1"
     },
     {
-        "BriefDescription": "Offcore outstanding demand rfo reads transactions in SuperQueue (SQ), queue to uncore, every cycle",
-        "Counter": "0,1,2,3",
-        "CounterHTOff": "0,1,2,3,4,5,6,7",
-        "EventCode": "0x60",
-        "EventName": "OFFCORE_REQUESTS_OUTSTANDING.DEMAND_RFO",
-        "PublicDescription": "Counts the number of offcore outstanding RFO (store) transactions in the super queue (SQ) every cycle. A transaction is considered to be in the Offcore outstanding state between L2 miss and transaction completion sent to requestor (SQ de-allocation). See corresponding Umask under OFFCORE_REQUESTS.",
-        "SampleAfterValue": "2000003",
-        "UMask": "0x4"
-    },
-    {
-        "BriefDescription": "This event is deprecated. Refer to new event OCR.ALL_DATA_RD.L3_HIT.SNOOP_HIT_WITH_FWD",
+        "BriefDescription": "This event is deprecated. Refer to new event OCR.ALL_RFO.L3_HIT_E.HIT_OTHER_CORE_FWD",
         "Counter": "0,1,2,3",
         "CounterHTOff": "0,1,2,3",
         "Deprecated": "1",
         "EventCode": "0xB7, 0xBB",
-        "EventName": "OFFCORE_RESPONSE.ALL_DATA_RD.L3_HIT.SNOOP_HIT_WITH_FWD",
+        "EventName": "OFFCORE_RESPONSE.ALL_RFO.L3_HIT_E.HIT_OTHER_CORE_FWD",
         "MSRIndex": "0x1a6,0x1a7",
-        "MSRValue": "0x08007C0491",
+        "MSRValue": "0x0800080122",
         "Offcore": "1",
         "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
         "SampleAfterValue": "100003",
         "UMask": "0x1"
     },
     {
-        "BriefDescription": "This event is deprecated. Refer to new event OCR.PF_L3_RFO.L3_HIT_S.HITM_OTHER_CORE",
+        "BriefDescription": "This event is deprecated. Refer to new event OCR.ALL_RFO.L3_HIT_E.HIT_OTHER_CORE_NO_FWD",
         "Counter": "0,1,2,3",
         "CounterHTOff": "0,1,2,3",
         "Deprecated": "1",
         "EventCode": "0xB7, 0xBB",
-        "EventName": "OFFCORE_RESPONSE.PF_L3_RFO.L3_HIT_S.HITM_OTHER_CORE",
+        "EventName": "OFFCORE_RESPONSE.ALL_RFO.L3_HIT_E.HIT_OTHER_CORE_NO_FWD",
         "MSRIndex": "0x1a6,0x1a7",
-        "MSRValue": "0x1000100100",
+        "MSRValue": "0x0400080122",
         "Offcore": "1",
         "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
         "SampleAfterValue": "100003",
         "UMask": "0x1"
     },
     {
-        "BriefDescription": "This event is deprecated. Refer to new event OCR.PF_L3_RFO.SUPPLIER_NONE.HITM_OTHER_CORE",
+        "BriefDescription": "This event is deprecated. Refer to new event OCR.ALL_RFO.L3_HIT_E.NO_SNOOP_NEEDED",
         "Counter": "0,1,2,3",
         "CounterHTOff": "0,1,2,3",
         "Deprecated": "1",
         "EventCode": "0xB7, 0xBB",
-        "EventName": "OFFCORE_RESPONSE.PF_L3_RFO.SUPPLIER_NONE.HITM_OTHER_CORE",
+        "EventName": "OFFCORE_RESPONSE.ALL_RFO.L3_HIT_E.NO_SNOOP_NEEDED",
         "MSRIndex": "0x1a6,0x1a7",
-        "MSRValue": "0x1000020100",
+        "MSRValue": "0x0100080122",
         "Offcore": "1",
         "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
         "SampleAfterValue": "100003",
         "UMask": "0x1"
     },
     {
-        "BriefDescription": "This event is deprecated. Refer to new event OCR.PF_L2_DATA_RD.L3_HIT_E.SNOOP_NONE",
+        "BriefDescription": "This event is deprecated. Refer to new event OCR.ALL_RFO.L3_HIT_E.SNOOP_MISS",
         "Counter": "0,1,2,3",
         "CounterHTOff": "0,1,2,3",
         "Deprecated": "1",
         "EventCode": "0xB7, 0xBB",
-        "EventName": "OFFCORE_RESPONSE.PF_L2_DATA_RD.L3_HIT_E.SNOOP_NONE",
+        "EventName": "OFFCORE_RESPONSE.ALL_RFO.L3_HIT_E.SNOOP_MISS",
         "MSRIndex": "0x1a6,0x1a7",
-        "MSRValue": "0x0080080010",
+        "MSRValue": "0x0200080122",
         "Offcore": "1",
         "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
         "SampleAfterValue": "100003",
         "UMask": "0x1"
     },
     {
-        "BriefDescription": "This event is deprecated. Refer to new event OCR.PF_L2_DATA_RD.PMM_HIT_LOCAL_PMM.SNOOP_NONE",
+        "BriefDescription": "This event is deprecated. Refer to new event OCR.ALL_RFO.L3_HIT_E.SNOOP_NONE",
         "Counter": "0,1,2,3",
         "CounterHTOff": "0,1,2,3",
         "Deprecated": "1",
         "EventCode": "0xB7, 0xBB",
-        "EventName": "OFFCORE_RESPONSE.PF_L2_DATA_RD.PMM_HIT_LOCAL_PMM.SNOOP_NONE",
+        "EventName": "OFFCORE_RESPONSE.ALL_RFO.L3_HIT_E.SNOOP_NONE",
         "MSRIndex": "0x1a6,0x1a7",
-        "MSRValue": "0x0080400010",
+        "MSRValue": "0x0080080122",
         "Offcore": "1",
         "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
         "SampleAfterValue": "100003",
         "UMask": "0x1"
     },
     {
-        "BriefDescription": "Counts the number of lines that are evicted by L2 cache when triggered by an L2 cache fill. Those lines can be either in modified state or clean state. Modified lines may either be written back to L3 or directly written to memory and not allocated in L3.  Clean lines may either be allocated in L3 or dropped",
-        "Counter": "0,1,2,3",
-        "CounterHTOff": "0,1,2,3,4,5,6,7",
-        "EventCode": "0xF2",
-        "EventName": "L2_LINES_OUT.NON_SILENT",
-        "PublicDescription": "Counts the number of lines that are evicted by L2 cache when triggered by an L2 cache fill. Those lines can be either in modified state or clean state. Modified lines may either be written back to L3 or directly written to memory and not allocated in L3.  Clean lines may either be allocated in L3 or dropped.",
-        "SampleAfterValue": "200003",
-        "UMask": "0x2"
-    },
-    {
-        "BriefDescription": "This event is deprecated. Refer to new event OCR.PF_L2_DATA_RD.L3_HIT_M.HITM_OTHER_CORE",
+        "BriefDescription": "This event is deprecated. Refer to new event OCR.ALL_RFO.L3_HIT_F.ANY_SNOOP",
         "Counter": "0,1,2,3",
         "CounterHTOff": "0,1,2,3",
         "Deprecated": "1",
         "EventCode": "0xB7, 0xBB",
-        "EventName": "OFFCORE_RESPONSE.PF_L2_DATA_RD.L3_HIT_M.HITM_OTHER_CORE",
+        "EventName": "OFFCORE_RESPONSE.ALL_RFO.L3_HIT_F.ANY_SNOOP",
         "MSRIndex": "0x1a6,0x1a7",
-        "MSRValue": "0x1000040010",
+        "MSRValue": "0x3F80200122",
         "Offcore": "1",
         "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
         "SampleAfterValue": "100003",
         "UMask": "0x1"
     },
     {
-        "BriefDescription": "This event is deprecated. Refer to new event OCR.ALL_READS.PMM_HIT_LOCAL_PMM.SNOOP_NONE",
+        "BriefDescription": "This event is deprecated. Refer to new event OCR.ALL_RFO.L3_HIT_F.HITM_OTHER_CORE",
         "Counter": "0,1,2,3",
         "CounterHTOff": "0,1,2,3",
         "Deprecated": "1",
         "EventCode": "0xB7, 0xBB",
-        "EventName": "OFFCORE_RESPONSE.ALL_READS.PMM_HIT_LOCAL_PMM.SNOOP_NONE",
+        "EventName": "OFFCORE_RESPONSE.ALL_RFO.L3_HIT_F.HITM_OTHER_CORE",
         "MSRIndex": "0x1a6,0x1a7",
-        "MSRValue": "0x00804007F7",
+        "MSRValue": "0x1000200122",
         "Offcore": "1",
         "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
         "SampleAfterValue": "100003",
         "UMask": "0x1"
     },
     {
-        "BriefDescription": "This event is deprecated. Refer to new event OCR.PF_L2_RFO.L3_HIT.SNOOP_HIT_WITH_FWD",
+        "BriefDescription": "This event is deprecated. Refer to new event OCR.ALL_RFO.L3_HIT_F.HIT_OTHER_CORE_FWD",
         "Counter": "0,1,2,3",
         "CounterHTOff": "0,1,2,3",
         "Deprecated": "1",
         "EventCode": "0xB7, 0xBB",
-        "EventName": "OFFCORE_RESPONSE.PF_L2_RFO.L3_HIT.SNOOP_HIT_WITH_FWD",
+        "EventName": "OFFCORE_RESPONSE.ALL_RFO.L3_HIT_F.HIT_OTHER_CORE_FWD",
         "MSRIndex": "0x1a6,0x1a7",
-        "MSRValue": "0x08007C0020",
+        "MSRValue": "0x0800200122",
         "Offcore": "1",
         "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
         "SampleAfterValue": "100003",
         "UMask": "0x1"
     },
     {
-        "BriefDescription": "This event is deprecated. Refer to new event OCR.PF_L3_RFO.L3_HIT_S.HIT_OTHER_CORE_FWD",
+        "BriefDescription": "This event is deprecated. Refer to new event OCR.ALL_RFO.L3_HIT_F.HIT_OTHER_CORE_NO_FWD",
         "Counter": "0,1,2,3",
         "CounterHTOff": "0,1,2,3",
         "Deprecated": "1",
         "EventCode": "0xB7, 0xBB",
-        "EventName": "OFFCORE_RESPONSE.PF_L3_RFO.L3_HIT_S.HIT_OTHER_CORE_FWD",
+        "EventName": "OFFCORE_RESPONSE.ALL_RFO.L3_HIT_F.HIT_OTHER_CORE_NO_FWD",
         "MSRIndex": "0x1a6,0x1a7",
-        "MSRValue": "0x0800100100",
+        "MSRValue": "0x0400200122",
         "Offcore": "1",
         "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
         "SampleAfterValue": "100003",
         "UMask": "0x1"
     },
     {
-        "BriefDescription": "RFO requests that hit L2 cache",
-        "Counter": "0,1,2,3",
-        "CounterHTOff": "0,1,2,3,4,5,6,7",
-        "EventCode": "0x24",
-        "EventName": "L2_RQSTS.RFO_HIT",
-        "PublicDescription": "Counts the RFO (Read-for-Ownership) requests that hit L2 cache.",
-        "SampleAfterValue": "200003",
-        "UMask": "0xc2"
-    },
-    {
-        "BriefDescription": "This event is deprecated. Refer to new event OCR.PF_L3_DATA_RD.L3_HIT_S.HIT_OTHER_CORE_FWD",
+        "BriefDescription": "This event is deprecated. Refer to new event OCR.ALL_RFO.L3_HIT_F.NO_SNOOP_NEEDED",
         "Counter": "0,1,2,3",
         "CounterHTOff": "0,1,2,3",
         "Deprecated": "1",
         "EventCode": "0xB7, 0xBB",
-        "EventName": "OFFCORE_RESPONSE.PF_L3_DATA_RD.L3_HIT_S.HIT_OTHER_CORE_FWD",
+        "EventName": "OFFCORE_RESPONSE.ALL_RFO.L3_HIT_F.NO_SNOOP_NEEDED",
         "MSRIndex": "0x1a6,0x1a7",
-        "MSRValue": "0x0800100080",
+        "MSRValue": "0x0100200122",
         "Offcore": "1",
         "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
         "SampleAfterValue": "100003",
         "UMask": "0x1"
     },
     {
-        "BriefDescription": "This event is deprecated. Refer to new event OCR.DEMAND_DATA_RD.L3_HIT_F.HIT_OTHER_CORE_NO_FWD",
+        "BriefDescription": "This event is deprecated. Refer to new event OCR.ALL_RFO.L3_HIT_F.SNOOP_MISS",
         "Counter": "0,1,2,3",
         "CounterHTOff": "0,1,2,3",
         "Deprecated": "1",
         "EventCode": "0xB7, 0xBB",
-        "EventName": "OFFCORE_RESPONSE.DEMAND_DATA_RD.L3_HIT_F.HIT_OTHER_CORE_NO_FWD",
+        "EventName": "OFFCORE_RESPONSE.ALL_RFO.L3_HIT_F.SNOOP_MISS",
         "MSRIndex": "0x1a6,0x1a7",
-        "MSRValue": "0x0400200001",
+        "MSRValue": "0x0200200122",
         "Offcore": "1",
         "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
         "SampleAfterValue": "100003",
         "UMask": "0x1"
     },
     {
-        "BriefDescription": "This event is deprecated. Refer to new event OCR.PF_L3_RFO.SUPPLIER_NONE.HIT_OTHER_CORE_NO_FWD",
+        "BriefDescription": "This event is deprecated. Refer to new event OCR.ALL_RFO.L3_HIT_F.SNOOP_NONE",
         "Counter": "0,1,2,3",
         "CounterHTOff": "0,1,2,3",
         "Deprecated": "1",
         "EventCode": "0xB7, 0xBB",
-        "EventName": "OFFCORE_RESPONSE.PF_L3_RFO.SUPPLIER_NONE.HIT_OTHER_CORE_NO_FWD",
+        "EventName": "OFFCORE_RESPONSE.ALL_RFO.L3_HIT_F.SNOOP_NONE",
         "MSRIndex": "0x1a6,0x1a7",
-        "MSRValue": "0x0400020100",
+        "MSRValue": "0x0080200122",
         "Offcore": "1",
         "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
         "SampleAfterValue": "100003",
         "UMask": "0x1"
     },
     {
-        "BriefDescription": "Offcore outstanding cacheable Core Data Read transactions in SuperQueue (SQ), queue to uncore",
-        "Counter": "0,1,2,3",
-        "CounterHTOff": "0,1,2,3,4,5,6,7",
-        "EventCode": "0x60",
-        "EventName": "OFFCORE_REQUESTS_OUTSTANDING.ALL_DATA_RD",
-        "PublicDescription": "Counts the number of offcore outstanding cacheable Core Data Read transactions in the super queue every cycle. A transaction is considered to be in the Offcore outstanding state between L2 miss and transaction completion sent to requestor (SQ de-allocation). See corresponding Umask under OFFCORE_REQUESTS.",
-        "SampleAfterValue": "2000003",
-        "UMask": "0x8"
-    },
-    {
-        "BriefDescription": "This event is deprecated. Refer to new event OCR.OTHER.L3_HIT_S.HIT_OTHER_CORE_NO_FWD",
+        "BriefDescription": "This event is deprecated. Refer to new event OCR.ALL_RFO.L3_HIT_M.ANY_SNOOP",
         "Counter": "0,1,2,3",
         "CounterHTOff": "0,1,2,3",
         "Deprecated": "1",
         "EventCode": "0xB7, 0xBB",
-        "EventName": "OFFCORE_RESPONSE.OTHER.L3_HIT_S.HIT_OTHER_CORE_NO_FWD",
+        "EventName": "OFFCORE_RESPONSE.ALL_RFO.L3_HIT_M.ANY_SNOOP",
         "MSRIndex": "0x1a6,0x1a7",
-        "MSRValue": "0x0400108000",
+        "MSRValue": "0x3F80040122",
         "Offcore": "1",
         "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
         "SampleAfterValue": "100003",
         "UMask": "0x1"
     },
     {
-        "BriefDescription": "This event is deprecated. Refer to new event OCR.ALL_READS.ANY_RESPONSE",
+        "BriefDescription": "This event is deprecated. Refer to new event OCR.ALL_RFO.L3_HIT_M.HITM_OTHER_CORE",
         "Counter": "0,1,2,3",
         "CounterHTOff": "0,1,2,3",
         "Deprecated": "1",
         "EventCode": "0xB7, 0xBB",
-        "EventName": "OFFCORE_RESPONSE.ALL_READS.ANY_RESPONSE",
+        "EventName": "OFFCORE_RESPONSE.ALL_RFO.L3_HIT_M.HITM_OTHER_CORE",
         "MSRIndex": "0x1a6,0x1a7",
-        "MSRValue": "0x00000107F7",
+        "MSRValue": "0x1000040122",
         "Offcore": "1",
         "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
         "SampleAfterValue": "100003",
         "UMask": "0x1"
     },
     {
-        "BriefDescription": "This event is deprecated. Refer to new event OCR.ALL_READS.L3_HIT_E.ANY_SNOOP",
+        "BriefDescription": "This event is deprecated. Refer to new event OCR.ALL_RFO.L3_HIT_M.HIT_OTHER_CORE_FWD",
         "Counter": "0,1,2,3",
         "CounterHTOff": "0,1,2,3",
         "Deprecated": "1",
         "EventCode": "0xB7, 0xBB",
-        "EventName": "OFFCORE_RESPONSE.ALL_READS.L3_HIT_E.ANY_SNOOP",
+        "EventName": "OFFCORE_RESPONSE.ALL_RFO.L3_HIT_M.HIT_OTHER_CORE_FWD",
         "MSRIndex": "0x1a6,0x1a7",
-        "MSRValue": "0x3F800807F7",
+        "MSRValue": "0x0800040122",
         "Offcore": "1",
         "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
         "SampleAfterValue": "100003",
         "UMask": "0x1"
     },
     {
-        "BriefDescription": "This event is deprecated. Refer to new event OCR.DEMAND_CODE_RD.L3_HIT_M.ANY_SNOOP",
+        "BriefDescription": "This event is deprecated. Refer to new event OCR.ALL_RFO.L3_HIT_M.HIT_OTHER_CORE_NO_FWD",
         "Counter": "0,1,2,3",
         "CounterHTOff": "0,1,2,3",
         "Deprecated": "1",
         "EventCode": "0xB7, 0xBB",
-        "EventName": "OFFCORE_RESPONSE.DEMAND_CODE_RD.L3_HIT_M.ANY_SNOOP",
+        "EventName": "OFFCORE_RESPONSE.ALL_RFO.L3_HIT_M.HIT_OTHER_CORE_NO_FWD",
         "MSRIndex": "0x1a6,0x1a7",
-        "MSRValue": "0x3F80040004",
+        "MSRValue": "0x0400040122",
         "Offcore": "1",
         "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
         "SampleAfterValue": "100003",
         "UMask": "0x1"
     },
     {
-        "BriefDescription": "This event is deprecated. Refer to new event OCR.DEMAND_DATA_RD.SUPPLIER_NONE.SNOOP_MISS",
+        "BriefDescription": "This event is deprecated. Refer to new event OCR.ALL_RFO.L3_HIT_M.NO_SNOOP_NEEDED",
         "Counter": "0,1,2,3",
         "CounterHTOff": "0,1,2,3",
         "Deprecated": "1",
         "EventCode": "0xB7, 0xBB",
-        "EventName": "OFFCORE_RESPONSE.DEMAND_DATA_RD.SUPPLIER_NONE.SNOOP_MISS",
+        "EventName": "OFFCORE_RESPONSE.ALL_RFO.L3_HIT_M.NO_SNOOP_NEEDED",
         "MSRIndex": "0x1a6,0x1a7",
-        "MSRValue": "0x0200020001",
+        "MSRValue": "0x0100040122",
         "Offcore": "1",
         "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
         "SampleAfterValue": "100003",
         "UMask": "0x1"
     },
     {
-        "BriefDescription": "This event is deprecated. Refer to new event OCR.ALL_RFO.L3_HIT_S.NO_SNOOP_NEEDED",
+        "BriefDescription": "This event is deprecated. Refer to new event OCR.ALL_RFO.L3_HIT_M.SNOOP_MISS",
         "Counter": "0,1,2,3",
         "CounterHTOff": "0,1,2,3",
         "Deprecated": "1",
         "EventCode": "0xB7, 0xBB",
-        "EventName": "OFFCORE_RESPONSE.ALL_RFO.L3_HIT_S.NO_SNOOP_NEEDED",
+        "EventName": "OFFCORE_RESPONSE.ALL_RFO.L3_HIT_M.SNOOP_MISS",
         "MSRIndex": "0x1a6,0x1a7",
-        "MSRValue": "0x0100100122",
+        "MSRValue": "0x0200040122",
         "Offcore": "1",
         "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
         "SampleAfterValue": "100003",
         "UMask": "0x1"
     },
     {
-        "BriefDescription": "This event is deprecated. Refer to new event OCR.ALL_RFO.L3_HIT_S.SNOOP_NONE",
+        "BriefDescription": "This event is deprecated. Refer to new event OCR.ALL_RFO.L3_HIT_M.SNOOP_NONE",
         "Counter": "0,1,2,3",
         "CounterHTOff": "0,1,2,3",
         "Deprecated": "1",
         "EventCode": "0xB7, 0xBB",
-        "EventName": "OFFCORE_RESPONSE.ALL_RFO.L3_HIT_S.SNOOP_NONE",
+        "EventName": "OFFCORE_RESPONSE.ALL_RFO.L3_HIT_M.SNOOP_NONE",
         "MSRIndex": "0x1a6,0x1a7",
-        "MSRValue": "0x0080100122",
+        "MSRValue": "0x0080040122",
         "Offcore": "1",
         "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
         "SampleAfterValue": "100003",
         "UMask": "0x1"
     },
     {
-        "BriefDescription": "This event is deprecated. Refer to new event OCR.DEMAND_DATA_RD.L3_HIT.ANY_SNOOP",
+        "BriefDescription": "This event is deprecated. Refer to new event OCR.ALL_RFO.L3_HIT_S.ANY_SNOOP",
         "Counter": "0,1,2,3",
         "CounterHTOff": "0,1,2,3",
         "Deprecated": "1",
         "EventCode": "0xB7, 0xBB",
-        "EventName": "OFFCORE_RESPONSE.DEMAND_DATA_RD.L3_HIT.ANY_SNOOP",
+        "EventName": "OFFCORE_RESPONSE.ALL_RFO.L3_HIT_S.ANY_SNOOP",
         "MSRIndex": "0x1a6,0x1a7",
-        "MSRValue": "0x3F803C0001",
+        "MSRValue": "0x3F80100122",
         "Offcore": "1",
         "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
         "SampleAfterValue": "100003",
         "UMask": "0x1"
     },
     {
-        "BriefDescription": "This event is deprecated. Refer to new event OCR.DEMAND_RFO.L3_HIT_E.ANY_SNOOP",
+        "BriefDescription": "This event is deprecated. Refer to new event OCR.ALL_RFO.L3_HIT_S.HITM_OTHER_CORE",
         "Counter": "0,1,2,3",
         "CounterHTOff": "0,1,2,3",
         "Deprecated": "1",
         "EventCode": "0xB7, 0xBB",
-        "EventName": "OFFCORE_RESPONSE.DEMAND_RFO.L3_HIT_E.ANY_SNOOP",
+        "EventName": "OFFCORE_RESPONSE.ALL_RFO.L3_HIT_S.HITM_OTHER_CORE",
         "MSRIndex": "0x1a6,0x1a7",
-        "MSRValue": "0x3F80080002",
+        "MSRValue": "0x1000100122",
         "Offcore": "1",
         "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
         "SampleAfterValue": "100003",
         "UMask": "0x1"
     },
     {
-        "BriefDescription": "This event is deprecated. Refer to new event OCR.PF_L2_DATA_RD.L3_HIT_M.ANY_SNOOP",
+        "BriefDescription": "This event is deprecated. Refer to new event OCR.ALL_RFO.L3_HIT_S.HIT_OTHER_CORE_FWD",
         "Counter": "0,1,2,3",
         "CounterHTOff": "0,1,2,3",
         "Deprecated": "1",
         "EventCode": "0xB7, 0xBB",
-        "EventName": "OFFCORE_RESPONSE.PF_L2_DATA_RD.L3_HIT_M.ANY_SNOOP",
+        "EventName": "OFFCORE_RESPONSE.ALL_RFO.L3_HIT_S.HIT_OTHER_CORE_FWD",
         "MSRIndex": "0x1a6,0x1a7",
-        "MSRValue": "0x3F80040010",
+        "MSRValue": "0x0800100122",
         "Offcore": "1",
         "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
         "SampleAfterValue": "100003",
         "UMask": "0x1"
     },
     {
-        "BriefDescription": "This event is deprecated. Refer to new event OCR.PF_L2_DATA_RD.L3_HIT_F.SNOOP_MISS",
+        "BriefDescription": "This event is deprecated. Refer to new event OCR.ALL_RFO.L3_HIT_S.HIT_OTHER_CORE_NO_FWD",
         "Counter": "0,1,2,3",
         "CounterHTOff": "0,1,2,3",
         "Deprecated": "1",
         "EventCode": "0xB7, 0xBB",
-        "EventName": "OFFCORE_RESPONSE.PF_L2_DATA_RD.L3_HIT_F.SNOOP_MISS",
+        "EventName": "OFFCORE_RESPONSE.ALL_RFO.L3_HIT_S.HIT_OTHER_CORE_NO_FWD",
         "MSRIndex": "0x1a6,0x1a7",
-        "MSRValue": "0x0200200010",
+        "MSRValue": "0x0400100122",
         "Offcore": "1",
         "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
         "SampleAfterValue": "100003",
         "UMask": "0x1"
     },
     {
-        "BriefDescription": "This event is deprecated. Refer to new event OCR.PF_L3_RFO.L3_HIT_S.ANY_SNOOP",
+        "BriefDescription": "This event is deprecated. Refer to new event OCR.ALL_RFO.L3_HIT_S.NO_SNOOP_NEEDED",
         "Counter": "0,1,2,3",
         "CounterHTOff": "0,1,2,3",
         "Deprecated": "1",
         "EventCode": "0xB7, 0xBB",
-        "EventName": "OFFCORE_RESPONSE.PF_L3_RFO.L3_HIT_S.ANY_SNOOP",
+        "EventName": "OFFCORE_RESPONSE.ALL_RFO.L3_HIT_S.NO_SNOOP_NEEDED",
         "MSRIndex": "0x1a6,0x1a7",
-        "MSRValue": "0x3F80100100",
+        "MSRValue": "0x0100100122",
         "Offcore": "1",
         "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
         "SampleAfterValue": "100003",
         "UMask": "0x1"
     },
     {
-        "BriefDescription": "This event is deprecated. Refer to new event OCR.DEMAND_DATA_RD.L3_HIT_F.HIT_OTHER_CORE_FWD",
+        "BriefDescription": "This event is deprecated. Refer to new event OCR.ALL_RFO.L3_HIT_S.SNOOP_MISS",
         "Counter": "0,1,2,3",
         "CounterHTOff": "0,1,2,3",
         "Deprecated": "1",
         "EventCode": "0xB7, 0xBB",
-        "EventName": "OFFCORE_RESPONSE.DEMAND_DATA_RD.L3_HIT_F.HIT_OTHER_CORE_FWD",
+        "EventName": "OFFCORE_RESPONSE.ALL_RFO.L3_HIT_S.SNOOP_MISS",
         "MSRIndex": "0x1a6,0x1a7",
-        "MSRValue": "0x0800200001",
+        "MSRValue": "0x0200100122",
         "Offcore": "1",
         "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
         "SampleAfterValue": "100003",
         "UMask": "0x1"
     },
     {
-        "BriefDescription": "This event is deprecated. Refer to new event OCR.OTHER.PMM_HIT_LOCAL_PMM.SNOOP_NONE",
+        "BriefDescription": "This event is deprecated. Refer to new event OCR.ALL_RFO.L3_HIT_S.SNOOP_NONE",
         "Counter": "0,1,2,3",
         "CounterHTOff": "0,1,2,3",
         "Deprecated": "1",
         "EventCode": "0xB7, 0xBB",
-        "EventName": "OFFCORE_RESPONSE.OTHER.PMM_HIT_LOCAL_PMM.SNOOP_NONE",
+        "EventName": "OFFCORE_RESPONSE.ALL_RFO.L3_HIT_S.SNOOP_NONE",
         "MSRIndex": "0x1a6,0x1a7",
-        "MSRValue": "0x0080408000",
+        "MSRValue": "0x0080100122",
         "Offcore": "1",
         "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
         "SampleAfterValue": "100003",
         "UMask": "0x1"
     },
     {
-        "BriefDescription": "This event is deprecated. Refer to new event OCR.PF_L1D_AND_SW.L3_HIT_M.SNOOP_NONE",
+        "BriefDescription": "This event is deprecated. Refer to new event OCR.ALL_RFO.PMM_HIT_LOCAL_PMM.ANY_SNOOP",
         "Counter": "0,1,2,3",
         "CounterHTOff": "0,1,2,3",
         "Deprecated": "1",
         "EventCode": "0xB7, 0xBB",
-        "EventName": "OFFCORE_RESPONSE.PF_L1D_AND_SW.L3_HIT_M.SNOOP_NONE",
+        "EventName": "OFFCORE_RESPONSE.ALL_RFO.PMM_HIT_LOCAL_PMM.ANY_SNOOP",
         "MSRIndex": "0x1a6,0x1a7",
-        "MSRValue": "0x0080040400",
+        "MSRValue": "0x3F80400122",
         "Offcore": "1",
         "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
         "SampleAfterValue": "100003",
         "UMask": "0x1"
     },
     {
-        "BriefDescription": "This event is deprecated. Refer to new event OCR.PF_L1D_AND_SW.L3_HIT_M.NO_SNOOP_NEEDED",
+        "BriefDescription": "This event is deprecated. Refer to new event OCR.ALL_RFO.PMM_HIT_LOCAL_PMM.SNOOP_NONE",
         "Counter": "0,1,2,3",
         "CounterHTOff": "0,1,2,3",
         "Deprecated": "1",
         "EventCode": "0xB7, 0xBB",
-        "EventName": "OFFCORE_RESPONSE.PF_L1D_AND_SW.L3_HIT_M.NO_SNOOP_NEEDED",
+        "EventName": "OFFCORE_RESPONSE.ALL_RFO.PMM_HIT_LOCAL_PMM.SNOOP_NONE",
         "MSRIndex": "0x1a6,0x1a7",
-        "MSRValue": "0x0100040400",
+        "MSRValue": "0x0080400122",
         "Offcore": "1",
         "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
         "SampleAfterValue": "100003",
         "UMask": "0x1"
     },
     {
-        "BriefDescription": "This event is deprecated. Refer to new event OCR.ALL_DATA_RD.L3_HIT_E.HIT_OTHER_CORE_NO_FWD",
+        "BriefDescription": "This event is deprecated. Refer to new event OCR.ALL_RFO.PMM_HIT_LOCAL_PMM.SNOOP_NOT_NEEDED",
         "Counter": "0,1,2,3",
         "CounterHTOff": "0,1,2,3",
         "Deprecated": "1",
         "EventCode": "0xB7, 0xBB",
-        "EventName": "OFFCORE_RESPONSE.ALL_DATA_RD.L3_HIT_E.HIT_OTHER_CORE_NO_FWD",
+        "EventName": "OFFCORE_RESPONSE.ALL_RFO.PMM_HIT_LOCAL_PMM.SNOOP_NOT_NEEDED",
         "MSRIndex": "0x1a6,0x1a7",
-        "MSRValue": "0x0400080491",
+        "MSRValue": "0x0100400122",
         "Offcore": "1",
         "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
         "SampleAfterValue": "100003",
         "UMask": "0x1"
     },
     {
-        "BriefDescription": "This event is deprecated. Refer to new event OCR.ALL_DATA_RD.L3_HIT.HITM_OTHER_CORE",
+        "BriefDescription": "This event is deprecated. Refer to new event OCR.ALL_RFO.SUPPLIER_NONE.ANY_SNOOP",
         "Counter": "0,1,2,3",
         "CounterHTOff": "0,1,2,3",
         "Deprecated": "1",
         "EventCode": "0xB7, 0xBB",
-        "EventName": "OFFCORE_RESPONSE.ALL_DATA_RD.L3_HIT.HITM_OTHER_CORE",
+        "EventName": "OFFCORE_RESPONSE.ALL_RFO.SUPPLIER_NONE.ANY_SNOOP",
         "MSRIndex": "0x1a6,0x1a7",
-        "MSRValue": "0x10003C0491",
+        "MSRValue": "0x3F80020122",
         "Offcore": "1",
         "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
         "SampleAfterValue": "100003",
         "UMask": "0x1"
     },
     {
-        "BriefDescription": "This event is deprecated. Refer to new event OCR.PF_L3_DATA_RD.L3_HIT_F.ANY_SNOOP",
+        "BriefDescription": "This event is deprecated. Refer to new event OCR.ALL_RFO.SUPPLIER_NONE.HITM_OTHER_CORE",
         "Counter": "0,1,2,3",
         "CounterHTOff": "0,1,2,3",
         "Deprecated": "1",
         "EventCode": "0xB7, 0xBB",
-        "EventName": "OFFCORE_RESPONSE.PF_L3_DATA_RD.L3_HIT_F.ANY_SNOOP",
+        "EventName": "OFFCORE_RESPONSE.ALL_RFO.SUPPLIER_NONE.HITM_OTHER_CORE",
         "MSRIndex": "0x1a6,0x1a7",
-        "MSRValue": "0x3F80200080",
+        "MSRValue": "0x1000020122",
         "Offcore": "1",
         "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
         "SampleAfterValue": "100003",
         "UMask": "0x1"
     },
     {
-        "BriefDescription": "This event is deprecated. Refer to new event OCR.ALL_RFO.L3_HIT_M.NO_SNOOP_NEEDED",
+        "BriefDescription": "This event is deprecated. Refer to new event OCR.ALL_RFO.SUPPLIER_NONE.HIT_OTHER_CORE_FWD",
         "Counter": "0,1,2,3",
         "CounterHTOff": "0,1,2,3",
         "Deprecated": "1",
         "EventCode": "0xB7, 0xBB",
-        "EventName": "OFFCORE_RESPONSE.ALL_RFO.L3_HIT_M.NO_SNOOP_NEEDED",
+        "EventName": "OFFCORE_RESPONSE.ALL_RFO.SUPPLIER_NONE.HIT_OTHER_CORE_FWD",
         "MSRIndex": "0x1a6,0x1a7",
-        "MSRValue": "0x0100040122",
+        "MSRValue": "0x0800020122",
         "Offcore": "1",
         "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
         "SampleAfterValue": "100003",
         "UMask": "0x1"
     },
     {
-        "BriefDescription": "This event is deprecated. Refer to new event OCR.OTHER.L3_HIT_E.SNOOP_NONE",
+        "BriefDescription": "This event is deprecated. Refer to new event OCR.ALL_RFO.SUPPLIER_NONE.HIT_OTHER_CORE_NO_FWD",
         "Counter": "0,1,2,3",
         "CounterHTOff": "0,1,2,3",
         "Deprecated": "1",
         "EventCode": "0xB7, 0xBB",
-        "EventName": "OFFCORE_RESPONSE.OTHER.L3_HIT_E.SNOOP_NONE",
+        "EventName": "OFFCORE_RESPONSE.ALL_RFO.SUPPLIER_NONE.HIT_OTHER_CORE_NO_FWD",
         "MSRIndex": "0x1a6,0x1a7",
-        "MSRValue": "0x0080088000",
+        "MSRValue": "0x0400020122",
         "Offcore": "1",
         "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
         "SampleAfterValue": "100003",
         "UMask": "0x1"
     },
     {
-        "BriefDescription": "This event is deprecated. Refer to new event OCR.ALL_PF_DATA_RD.L3_HIT_M.HIT_OTHER_CORE_NO_FWD",
+        "BriefDescription": "This event is deprecated. Refer to new event OCR.ALL_RFO.SUPPLIER_NONE.NO_SNOOP_NEEDED",
         "Counter": "0,1,2,3",
         "CounterHTOff": "0,1,2,3",
         "Deprecated": "1",
         "EventCode": "0xB7, 0xBB",
-        "EventName": "OFFCORE_RESPONSE.ALL_PF_DATA_RD.L3_HIT_M.HIT_OTHER_CORE_NO_FWD",
+        "EventName": "OFFCORE_RESPONSE.ALL_RFO.SUPPLIER_NONE.NO_SNOOP_NEEDED",
         "MSRIndex": "0x1a6,0x1a7",
-        "MSRValue": "0x0400040490",
+        "MSRValue": "0x0100020122",
         "Offcore": "1",
         "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
         "SampleAfterValue": "100003",
         "UMask": "0x1"
     },
     {
-        "BriefDescription": "This event is deprecated. Refer to new event OCR.PF_L3_DATA_RD.L3_HIT.SNOOP_NONE",
+        "BriefDescription": "This event is deprecated. Refer to new event OCR.ALL_RFO.SUPPLIER_NONE.SNOOP_MISS",
         "Counter": "0,1,2,3",
         "CounterHTOff": "0,1,2,3",
         "Deprecated": "1",
         "EventCode": "0xB7, 0xBB",
-        "EventName": "OFFCORE_RESPONSE.PF_L3_DATA_RD.L3_HIT.SNOOP_NONE",
+        "EventName": "OFFCORE_RESPONSE.ALL_RFO.SUPPLIER_NONE.SNOOP_MISS",
         "MSRIndex": "0x1a6,0x1a7",
-        "MSRValue": "0x00803C0080",
+        "MSRValue": "0x0200020122",
         "Offcore": "1",
         "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
         "SampleAfterValue": "100003",
         "UMask": "0x1"
     },
     {
-        "BriefDescription": "This event is deprecated. Refer to new event OCR.ALL_RFO.L3_HIT_S.HIT_OTHER_CORE_FWD",
+        "BriefDescription": "This event is deprecated. Refer to new event OCR.ALL_RFO.SUPPLIER_NONE.SNOOP_NONE",
         "Counter": "0,1,2,3",
         "CounterHTOff": "0,1,2,3",
         "Deprecated": "1",
         "EventCode": "0xB7, 0xBB",
-        "EventName": "OFFCORE_RESPONSE.ALL_RFO.L3_HIT_S.HIT_OTHER_CORE_FWD",
+        "EventName": "OFFCORE_RESPONSE.ALL_RFO.SUPPLIER_NONE.SNOOP_NONE",
         "MSRIndex": "0x1a6,0x1a7",
-        "MSRValue": "0x0800100122",
+        "MSRValue": "0x0080020122",
         "Offcore": "1",
         "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
         "SampleAfterValue": "100003",
         "UMask": "0x1"
     },
     {
-        "BriefDescription": "This event is deprecated. Refer to new event OCR.DEMAND_CODE_RD.SUPPLIER_NONE.NO_SNOOP_NEEDED",
+        "BriefDescription": "This event is deprecated. Refer to new event OCR.DEMAND_CODE_RD.ANY_RESPONSE",
         "Counter": "0,1,2,3",
         "CounterHTOff": "0,1,2,3",
         "Deprecated": "1",
         "EventCode": "0xB7, 0xBB",
-        "EventName": "OFFCORE_RESPONSE.DEMAND_CODE_RD.SUPPLIER_NONE.NO_SNOOP_NEEDED",
+        "EventName": "OFFCORE_RESPONSE.DEMAND_CODE_RD.ANY_RESPONSE",
         "MSRIndex": "0x1a6,0x1a7",
-        "MSRValue": "0x0100020004",
+        "MSRValue": "0x0000010004",
         "Offcore": "1",
         "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
         "SampleAfterValue": "100003",
         "UMask": "0x1"
     },
     {
-        "BriefDescription": "This event is deprecated. Refer to new event OCR.PF_L3_DATA_RD.L3_HIT_S.SNOOP_NONE",
+        "BriefDescription": "This event is deprecated. Refer to new event OCR.DEMAND_CODE_RD.L3_HIT.ANY_SNOOP",
         "Counter": "0,1,2,3",
         "CounterHTOff": "0,1,2,3",
         "Deprecated": "1",
         "EventCode": "0xB7, 0xBB",
-        "EventName": "OFFCORE_RESPONSE.PF_L3_DATA_RD.L3_HIT_S.SNOOP_NONE",
+        "EventName": "OFFCORE_RESPONSE.DEMAND_CODE_RD.L3_HIT.ANY_SNOOP",
         "MSRIndex": "0x1a6,0x1a7",
-        "MSRValue": "0x0080100080",
+        "MSRValue": "0x3F803C0004",
         "Offcore": "1",
         "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
         "SampleAfterValue": "100003",
         "UMask": "0x1"
     },
     {
-        "BriefDescription": "This event is deprecated. Refer to new event OCR.PF_L3_DATA_RD.L3_HIT_F.HITM_OTHER_CORE",
+        "BriefDescription": "This event is deprecated. Refer to new event OCR.DEMAND_CODE_RD.L3_HIT.HITM_OTHER_CORE",
         "Counter": "0,1,2,3",
         "CounterHTOff": "0,1,2,3",
         "Deprecated": "1",
         "EventCode": "0xB7, 0xBB",
-        "EventName": "OFFCORE_RESPONSE.PF_L3_DATA_RD.L3_HIT_F.HITM_OTHER_CORE",
+        "EventName": "OFFCORE_RESPONSE.DEMAND_CODE_RD.L3_HIT.HITM_OTHER_CORE",
         "MSRIndex": "0x1a6,0x1a7",
-        "MSRValue": "0x1000200080",
+        "MSRValue": "0x10003C0004",
         "Offcore": "1",
         "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
         "SampleAfterValue": "100003",
         "UMask": "0x1"
     },
     {
-        "BriefDescription": "This event is deprecated. Refer to new event OCR.DEMAND_RFO.L3_HIT.NO_SNOOP_NEEDED",
+        "BriefDescription": "This event is deprecated. Refer to new event OCR.DEMAND_CODE_RD.L3_HIT.HIT_OTHER_CORE_FWD",
         "Counter": "0,1,2,3",
         "CounterHTOff": "0,1,2,3",
         "Deprecated": "1",
         "EventCode": "0xB7, 0xBB",
-        "EventName": "OFFCORE_RESPONSE.DEMAND_RFO.L3_HIT.NO_SNOOP_NEEDED",
+        "EventName": "OFFCORE_RESPONSE.DEMAND_CODE_RD.L3_HIT.HIT_OTHER_CORE_FWD",
         "MSRIndex": "0x1a6,0x1a7",
-        "MSRValue": "0x01003C0002",
+        "MSRValue": "0x08003C0004",
         "Offcore": "1",
         "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
         "SampleAfterValue": "100003",
         "UMask": "0x1"
     },
     {
-        "BriefDescription": "This event is deprecated. Refer to new event OCR.PF_L3_RFO.L3_HIT_E.HIT_OTHER_CORE_NO_FWD",
+        "BriefDescription": "This event is deprecated. Refer to new event OCR.DEMAND_CODE_RD.L3_HIT.HIT_OTHER_CORE_NO_FWD",
         "Counter": "0,1,2,3",
         "CounterHTOff": "0,1,2,3",
         "Deprecated": "1",
         "EventCode": "0xB7, 0xBB",
-        "EventName": "OFFCORE_RESPONSE.PF_L3_RFO.L3_HIT_E.HIT_OTHER_CORE_NO_FWD",
+        "EventName": "OFFCORE_RESPONSE.DEMAND_CODE_RD.L3_HIT.HIT_OTHER_CORE_NO_FWD",
         "MSRIndex": "0x1a6,0x1a7",
-        "MSRValue": "0x0400080100",
+        "MSRValue": "0x04003C0004",
         "Offcore": "1",
         "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
         "SampleAfterValue": "100003",
         "UMask": "0x1"
     },
     {
-        "BriefDescription": "This event is deprecated. Refer to new event OCR.ALL_READS.L3_HIT_S.ANY_SNOOP",
+        "BriefDescription": "This event is deprecated. Refer to new event OCR.DEMAND_CODE_RD.L3_HIT.NO_SNOOP_NEEDED",
         "Counter": "0,1,2,3",
         "CounterHTOff": "0,1,2,3",
         "Deprecated": "1",
         "EventCode": "0xB7, 0xBB",
-        "EventName": "OFFCORE_RESPONSE.ALL_READS.L3_HIT_S.ANY_SNOOP",
+        "EventName": "OFFCORE_RESPONSE.DEMAND_CODE_RD.L3_HIT.NO_SNOOP_NEEDED",
         "MSRIndex": "0x1a6,0x1a7",
-        "MSRValue": "0x3F801007F7",
+        "MSRValue": "0x01003C0004",
         "Offcore": "1",
         "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
         "SampleAfterValue": "100003",
         "UMask": "0x1"
     },
     {
-        "BriefDescription": "This event is deprecated. Refer to new event OCR.PF_L1D_AND_SW.L3_HIT_S.HITM_OTHER_CORE",
+        "BriefDescription": "This event is deprecated. Refer to new event OCR.DEMAND_CODE_RD.L3_HIT.SNOOP_HIT_WITH_FWD",
         "Counter": "0,1,2,3",
         "CounterHTOff": "0,1,2,3",
         "Deprecated": "1",
         "EventCode": "0xB7, 0xBB",
-        "EventName": "OFFCORE_RESPONSE.PF_L1D_AND_SW.L3_HIT_S.HITM_OTHER_CORE",
+        "EventName": "OFFCORE_RESPONSE.DEMAND_CODE_RD.L3_HIT.SNOOP_HIT_WITH_FWD",
         "MSRIndex": "0x1a6,0x1a7",
-        "MSRValue": "0x1000100400",
+        "MSRValue": "0x08007C0004",
         "Offcore": "1",
         "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
         "SampleAfterValue": "100003",
         "UMask": "0x1"
     },
     {
-        "BriefDescription": "This event is deprecated. Refer to new event OCR.PF_L1D_AND_SW.SUPPLIER_NONE.HITM_OTHER_CORE",
+        "BriefDescription": "This event is deprecated. Refer to new event OCR.DEMAND_CODE_RD.L3_HIT.SNOOP_MISS",
         "Counter": "0,1,2,3",
         "CounterHTOff": "0,1,2,3",
         "Deprecated": "1",
         "EventCode": "0xB7, 0xBB",
-        "EventName": "OFFCORE_RESPONSE.PF_L1D_AND_SW.SUPPLIER_NONE.HITM_OTHER_CORE",
+        "EventName": "OFFCORE_RESPONSE.DEMAND_CODE_RD.L3_HIT.SNOOP_MISS",
         "MSRIndex": "0x1a6,0x1a7",
-        "MSRValue": "0x1000020400",
+        "MSRValue": "0x02003C0004",
         "Offcore": "1",
         "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
         "SampleAfterValue": "100003",
         "UMask": "0x1"
     },
     {
-        "BriefDescription": "This event is deprecated. Refer to new event OCR.ALL_PF_DATA_RD.L3_HIT_M.NO_SNOOP_NEEDED",
+        "BriefDescription": "This event is deprecated. Refer to new event OCR.DEMAND_CODE_RD.L3_HIT.SNOOP_NONE",
         "Counter": "0,1,2,3",
         "CounterHTOff": "0,1,2,3",
         "Deprecated": "1",
         "EventCode": "0xB7, 0xBB",
-        "EventName": "OFFCORE_RESPONSE.ALL_PF_DATA_RD.L3_HIT_M.NO_SNOOP_NEEDED",
+        "EventName": "OFFCORE_RESPONSE.DEMAND_CODE_RD.L3_HIT.SNOOP_NONE",
         "MSRIndex": "0x1a6,0x1a7",
-        "MSRValue": "0x0100040490",
+        "MSRValue": "0x00803C0004",
         "Offcore": "1",
         "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
         "SampleAfterValue": "100003",
         "UMask": "0x1"
     },
     {
-        "BriefDescription": "This event is deprecated. Refer to new event OCR.DEMAND_RFO.L3_HIT_S.ANY_SNOOP",
+        "BriefDescription": "This event is deprecated. Refer to new event OCR.DEMAND_CODE_RD.L3_HIT_E.ANY_SNOOP",
         "Counter": "0,1,2,3",
         "CounterHTOff": "0,1,2,3",
         "Deprecated": "1",
         "EventCode": "0xB7, 0xBB",
-        "EventName": "OFFCORE_RESPONSE.DEMAND_RFO.L3_HIT_S.ANY_SNOOP",
+        "EventName": "OFFCORE_RESPONSE.DEMAND_CODE_RD.L3_HIT_E.ANY_SNOOP",
         "MSRIndex": "0x1a6,0x1a7",
-        "MSRValue": "0x3F80100002",
+        "MSRValue": "0x3F80080004",
         "Offcore": "1",
         "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
         "SampleAfterValue": "100003",
         "UMask": "0x1"
     },
     {
-        "BriefDescription": "This event is deprecated. Refer to new event OCR.DEMAND_RFO.L3_HIT.SNOOP_MISS",
+        "BriefDescription": "This event is deprecated. Refer to new event OCR.DEMAND_CODE_RD.L3_HIT_E.HITM_OTHER_CORE",
         "Counter": "0,1,2,3",
         "CounterHTOff": "0,1,2,3",
         "Deprecated": "1",
         "EventCode": "0xB7, 0xBB",
-        "EventName": "OFFCORE_RESPONSE.DEMAND_RFO.L3_HIT.SNOOP_MISS",
+        "EventName": "OFFCORE_RESPONSE.DEMAND_CODE_RD.L3_HIT_E.HITM_OTHER_CORE",
         "MSRIndex": "0x1a6,0x1a7",
-        "MSRValue": "0x02003C0002",
+        "MSRValue": "0x1000080004",
         "Offcore": "1",
         "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
         "SampleAfterValue": "100003",
         "UMask": "0x1"
     },
     {
-        "BriefDescription": "This event is deprecated. Refer to new event OCR.OTHER.L3_HIT_M.SNOOP_NONE",
+        "BriefDescription": "This event is deprecated. Refer to new event OCR.DEMAND_CODE_RD.L3_HIT_E.HIT_OTHER_CORE_FWD",
         "Counter": "0,1,2,3",
         "CounterHTOff": "0,1,2,3",
         "Deprecated": "1",
         "EventCode": "0xB7, 0xBB",
-        "EventName": "OFFCORE_RESPONSE.OTHER.L3_HIT_M.SNOOP_NONE",
+        "EventName": "OFFCORE_RESPONSE.DEMAND_CODE_RD.L3_HIT_E.HIT_OTHER_CORE_FWD",
         "MSRIndex": "0x1a6,0x1a7",
-        "MSRValue": "0x0080048000",
+        "MSRValue": "0x0800080004",
         "Offcore": "1",
         "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
         "SampleAfterValue": "100003",
         "UMask": "0x1"
     },
     {
-        "BriefDescription": "L1D data line replacements",
-        "Counter": "0,1,2,3",
-        "CounterHTOff": "0,1,2,3,4,5,6,7",
-        "EventCode": "0x51",
-        "EventName": "L1D.REPLACEMENT",
-        "PublicDescription": "Counts L1D data line replacements including opportunistic replacements, and replacements that require stall-for-replace or block-for-replace.",
-        "SampleAfterValue": "2000003",
-        "UMask": "0x1"
-    },
-    {
-        "BriefDescription": "This event is deprecated. Refer to new event OCR.PF_L2_DATA_RD.SUPPLIER_NONE.HIT_OTHER_CORE_FWD",
+        "BriefDescription": "This event is deprecated. Refer to new event OCR.DEMAND_CODE_RD.L3_HIT_E.HIT_OTHER_CORE_NO_FWD",
         "Counter": "0,1,2,3",
         "CounterHTOff": "0,1,2,3",
         "Deprecated": "1",
         "EventCode": "0xB7, 0xBB",
-        "EventName": "OFFCORE_RESPONSE.PF_L2_DATA_RD.SUPPLIER_NONE.HIT_OTHER_CORE_FWD",
+        "EventName": "OFFCORE_RESPONSE.DEMAND_CODE_RD.L3_HIT_E.HIT_OTHER_CORE_NO_FWD",
         "MSRIndex": "0x1a6,0x1a7",
-        "MSRValue": "0x0800020010",
+        "MSRValue": "0x0400080004",
         "Offcore": "1",
         "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
         "SampleAfterValue": "100003",
         "UMask": "0x1"
     },
     {
-        "BriefDescription": "This event is deprecated. Refer to new event OCR.PF_L2_DATA_RD.L3_HIT.SNOOP_MISS",
+        "BriefDescription": "This event is deprecated. Refer to new event OCR.DEMAND_CODE_RD.L3_HIT_E.NO_SNOOP_NEEDED",
         "Counter": "0,1,2,3",
         "CounterHTOff": "0,1,2,3",
         "Deprecated": "1",
         "EventCode": "0xB7, 0xBB",
-        "EventName": "OFFCORE_RESPONSE.PF_L2_DATA_RD.L3_HIT.SNOOP_MISS",
+        "EventName": "OFFCORE_RESPONSE.DEMAND_CODE_RD.L3_HIT_E.NO_SNOOP_NEEDED",
         "MSRIndex": "0x1a6,0x1a7",
-        "MSRValue": "0x02003C0010",
+        "MSRValue": "0x0100080004",
         "Offcore": "1",
         "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
         "SampleAfterValue": "100003",
         "UMask": "0x1"
     },
     {
-        "BriefDescription": "This event is deprecated. Refer to new event OCR.PF_L2_DATA_RD.L3_HIT_F.HIT_OTHER_CORE_NO_FWD",
+        "BriefDescription": "This event is deprecated. Refer to new event OCR.DEMAND_CODE_RD.L3_HIT_E.SNOOP_MISS",
         "Counter": "0,1,2,3",
         "CounterHTOff": "0,1,2,3",
         "Deprecated": "1",
         "EventCode": "0xB7, 0xBB",
-        "EventName": "OFFCORE_RESPONSE.PF_L2_DATA_RD.L3_HIT_F.HIT_OTHER_CORE_NO_FWD",
+        "EventName": "OFFCORE_RESPONSE.DEMAND_CODE_RD.L3_HIT_E.SNOOP_MISS",
         "MSRIndex": "0x1a6,0x1a7",
-        "MSRValue": "0x0400200010",
+        "MSRValue": "0x0200080004",
         "Offcore": "1",
         "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
         "SampleAfterValue": "100003",
         "UMask": "0x1"
     },
     {
-        "BriefDescription": "This event is deprecated. Refer to new event OCR.ALL_RFO.L3_HIT_E.HIT_OTHER_CORE_FWD",
+        "BriefDescription": "This event is deprecated. Refer to new event OCR.DEMAND_CODE_RD.L3_HIT_E.SNOOP_NONE",
         "Counter": "0,1,2,3",
         "CounterHTOff": "0,1,2,3",
         "Deprecated": "1",
         "EventCode": "0xB7, 0xBB",
-        "EventName": "OFFCORE_RESPONSE.ALL_RFO.L3_HIT_E.HIT_OTHER_CORE_FWD",
+        "EventName": "OFFCORE_RESPONSE.DEMAND_CODE_RD.L3_HIT_E.SNOOP_NONE",
         "MSRIndex": "0x1a6,0x1a7",
-        "MSRValue": "0x0800080122",
+        "MSRValue": "0x0080080004",
         "Offcore": "1",
         "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
         "SampleAfterValue": "100003",
         "UMask": "0x1"
     },
     {
-        "BriefDescription": "This event is deprecated. Refer to new event OCR.PF_L2_DATA_RD.L3_HIT_F.HIT_OTHER_CORE_FWD",
+        "BriefDescription": "This event is deprecated. Refer to new event OCR.DEMAND_CODE_RD.L3_HIT_F.ANY_SNOOP",
         "Counter": "0,1,2,3",
         "CounterHTOff": "0,1,2,3",
         "Deprecated": "1",
         "EventCode": "0xB7, 0xBB",
-        "EventName": "OFFCORE_RESPONSE.PF_L2_DATA_RD.L3_HIT_F.HIT_OTHER_CORE_FWD",
+        "EventName": "OFFCORE_RESPONSE.DEMAND_CODE_RD.L3_HIT_F.ANY_SNOOP",
         "MSRIndex": "0x1a6,0x1a7",
-        "MSRValue": "0x0800200010",
+        "MSRValue": "0x3F80200004",
         "Offcore": "1",
         "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
         "SampleAfterValue": "100003",
         "UMask": "0x1"
     },
     {
-        "BriefDescription": "This event is deprecated. Refer to new event OCR.ALL_RFO.L3_HIT_F.ANY_SNOOP",
+        "BriefDescription": "This event is deprecated. Refer to new event OCR.DEMAND_CODE_RD.L3_HIT_F.HITM_OTHER_CORE",
         "Counter": "0,1,2,3",
         "CounterHTOff": "0,1,2,3",
         "Deprecated": "1",
         "EventCode": "0xB7, 0xBB",
-        "EventName": "OFFCORE_RESPONSE.ALL_RFO.L3_HIT_F.ANY_SNOOP",
+        "EventName": "OFFCORE_RESPONSE.DEMAND_CODE_RD.L3_HIT_F.HITM_OTHER_CORE",
         "MSRIndex": "0x1a6,0x1a7",
-        "MSRValue": "0x3F80200122",
+        "MSRValue": "0x1000200004",
         "Offcore": "1",
         "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
         "SampleAfterValue": "100003",
         "UMask": "0x1"
     },
     {
-        "BriefDescription": "This event is deprecated. Refer to new event OCR.ALL_PF_DATA_RD.L3_HIT_M.HIT_OTHER_CORE_FWD",
+        "BriefDescription": "This event is deprecated. Refer to new event OCR.DEMAND_CODE_RD.L3_HIT_F.HIT_OTHER_CORE_FWD",
         "Counter": "0,1,2,3",
         "CounterHTOff": "0,1,2,3",
         "Deprecated": "1",
         "EventCode": "0xB7, 0xBB",
-        "EventName": "OFFCORE_RESPONSE.ALL_PF_DATA_RD.L3_HIT_M.HIT_OTHER_CORE_FWD",
+        "EventName": "OFFCORE_RESPONSE.DEMAND_CODE_RD.L3_HIT_F.HIT_OTHER_CORE_FWD",
         "MSRIndex": "0x1a6,0x1a7",
-        "MSRValue": "0x0800040490",
+        "MSRValue": "0x0800200004",
         "Offcore": "1",
         "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
         "SampleAfterValue": "100003",
         "UMask": "0x1"
     },
     {
-        "BriefDescription": "This event is deprecated. Refer to new event OCR.DEMAND_RFO.L3_HIT_S.HITM_OTHER_CORE",
+        "BriefDescription": "This event is deprecated. Refer to new event OCR.DEMAND_CODE_RD.L3_HIT_F.HIT_OTHER_CORE_NO_FWD",
         "Counter": "0,1,2,3",
         "CounterHTOff": "0,1,2,3",
         "Deprecated": "1",
         "EventCode": "0xB7, 0xBB",
-        "EventName": "OFFCORE_RESPONSE.DEMAND_RFO.L3_HIT_S.HITM_OTHER_CORE",
+        "EventName": "OFFCORE_RESPONSE.DEMAND_CODE_RD.L3_HIT_F.HIT_OTHER_CORE_NO_FWD",
         "MSRIndex": "0x1a6,0x1a7",
-        "MSRValue": "0x1000100002",
+        "MSRValue": "0x0400200004",
         "Offcore": "1",
         "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
         "SampleAfterValue": "100003",
         "UMask": "0x1"
     },
     {
-        "BriefDescription": "This event is deprecated. Refer to new event OCR.ALL_DATA_RD.L3_HIT_E.HITM_OTHER_CORE",
+        "BriefDescription": "This event is deprecated. Refer to new event OCR.DEMAND_CODE_RD.L3_HIT_F.NO_SNOOP_NEEDED",
         "Counter": "0,1,2,3",
         "CounterHTOff": "0,1,2,3",
         "Deprecated": "1",
         "EventCode": "0xB7, 0xBB",
-        "EventName": "OFFCORE_RESPONSE.ALL_DATA_RD.L3_HIT_E.HITM_OTHER_CORE",
+        "EventName": "OFFCORE_RESPONSE.DEMAND_CODE_RD.L3_HIT_F.NO_SNOOP_NEEDED",
         "MSRIndex": "0x1a6,0x1a7",
-        "MSRValue": "0x1000080491",
+        "MSRValue": "0x0100200004",
         "Offcore": "1",
         "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
         "SampleAfterValue": "100003",
         "UMask": "0x1"
     },
     {
-        "BriefDescription": "Demand Data Read requests that hit L2 cache",
-        "Counter": "0,1,2,3",
-        "CounterHTOff": "0,1,2,3,4,5,6,7",
-        "EventCode": "0x24",
-        "EventName": "L2_RQSTS.DEMAND_DATA_RD_HIT",
-        "PublicDescription": "Counts the number of demand Data Read requests, initiated by load instructions, that hit L2 cache",
-        "SampleAfterValue": "200003",
-        "UMask": "0xc1"
-    },
-    {
-        "BriefDescription": "This event is deprecated. Refer to new event OCR.ALL_READS.L3_HIT.HITM_OTHER_CORE",
+        "BriefDescription": "This event is deprecated. Refer to new event OCR.DEMAND_CODE_RD.L3_HIT_F.SNOOP_MISS",
         "Counter": "0,1,2,3",
         "CounterHTOff": "0,1,2,3",
         "Deprecated": "1",
         "EventCode": "0xB7, 0xBB",
-        "EventName": "OFFCORE_RESPONSE.ALL_READS.L3_HIT.HITM_OTHER_CORE",
+        "EventName": "OFFCORE_RESPONSE.DEMAND_CODE_RD.L3_HIT_F.SNOOP_MISS",
         "MSRIndex": "0x1a6,0x1a7",
-        "MSRValue": "0x10003C07F7",
+        "MSRValue": "0x0200200004",
         "Offcore": "1",
         "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
         "SampleAfterValue": "100003",
         "UMask": "0x1"
     },
     {
-        "BriefDescription": "This event is deprecated. Refer to new event OCR.PF_L2_DATA_RD.L3_HIT_S.NO_SNOOP_NEEDED",
+        "BriefDescription": "This event is deprecated. Refer to new event OCR.DEMAND_CODE_RD.L3_HIT_F.SNOOP_NONE",
         "Counter": "0,1,2,3",
         "CounterHTOff": "0,1,2,3",
         "Deprecated": "1",
         "EventCode": "0xB7, 0xBB",
-        "EventName": "OFFCORE_RESPONSE.PF_L2_DATA_RD.L3_HIT_S.NO_SNOOP_NEEDED",
+        "EventName": "OFFCORE_RESPONSE.DEMAND_CODE_RD.L3_HIT_F.SNOOP_NONE",
         "MSRIndex": "0x1a6,0x1a7",
-        "MSRValue": "0x0100100010",
+        "MSRValue": "0x0080200004",
         "Offcore": "1",
         "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
         "SampleAfterValue": "100003",
         "UMask": "0x1"
     },
     {
-        "BriefDescription": "All retired load instructions.",
-        "Counter": "0,1,2,3",
-        "CounterHTOff": "0,1,2,3",
-        "Data_LA": "1",
-        "EventCode": "0xD0",
-        "EventName": "MEM_INST_RETIRED.ALL_LOADS",
-        "PEBS": "1",
-        "SampleAfterValue": "2000003",
-        "UMask": "0x81"
-    },
-    {
-        "BriefDescription": "This event is deprecated. Refer to new event OCR.PF_L1D_AND_SW.L3_HIT_F.HIT_OTHER_CORE_NO_FWD",
+        "BriefDescription": "This event is deprecated. Refer to new event OCR.DEMAND_CODE_RD.L3_HIT_M.ANY_SNOOP",
         "Counter": "0,1,2,3",
         "CounterHTOff": "0,1,2,3",
         "Deprecated": "1",
         "EventCode": "0xB7, 0xBB",
-        "EventName": "OFFCORE_RESPONSE.PF_L1D_AND_SW.L3_HIT_F.HIT_OTHER_CORE_NO_FWD",
+        "EventName": "OFFCORE_RESPONSE.DEMAND_CODE_RD.L3_HIT_M.ANY_SNOOP",
         "MSRIndex": "0x1a6,0x1a7",
-        "MSRValue": "0x0400200400",
+        "MSRValue": "0x3F80040004",
         "Offcore": "1",
         "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
         "SampleAfterValue": "100003",
         "UMask": "0x1"
     },
     {
-        "BriefDescription": "This event is deprecated. Refer to new event OCR.PF_L3_DATA_RD.L3_HIT.HIT_OTHER_CORE_NO_FWD",
+        "BriefDescription": "This event is deprecated. Refer to new event OCR.DEMAND_CODE_RD.L3_HIT_M.HITM_OTHER_CORE",
         "Counter": "0,1,2,3",
         "CounterHTOff": "0,1,2,3",
         "Deprecated": "1",
         "EventCode": "0xB7, 0xBB",
-        "EventName": "OFFCORE_RESPONSE.PF_L3_DATA_RD.L3_HIT.HIT_OTHER_CORE_NO_FWD",
+        "EventName": "OFFCORE_RESPONSE.DEMAND_CODE_RD.L3_HIT_M.HITM_OTHER_CORE",
         "MSRIndex": "0x1a6,0x1a7",
-        "MSRValue": "0x04003C0080",
+        "MSRValue": "0x1000040004",
         "Offcore": "1",
         "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
         "SampleAfterValue": "100003",
         "UMask": "0x1"
     },
     {
-        "BriefDescription": "This event is deprecated. Refer to new event OCR.OTHER.L3_HIT_S.HITM_OTHER_CORE",
+        "BriefDescription": "This event is deprecated. Refer to new event OCR.DEMAND_CODE_RD.L3_HIT_M.HIT_OTHER_CORE_FWD",
         "Counter": "0,1,2,3",
         "CounterHTOff": "0,1,2,3",
         "Deprecated": "1",
         "EventCode": "0xB7, 0xBB",
-        "EventName": "OFFCORE_RESPONSE.OTHER.L3_HIT_S.HITM_OTHER_CORE",
+        "EventName": "OFFCORE_RESPONSE.DEMAND_CODE_RD.L3_HIT_M.HIT_OTHER_CORE_FWD",
         "MSRIndex": "0x1a6,0x1a7",
-        "MSRValue": "0x1000108000",
+        "MSRValue": "0x0800040004",
         "Offcore": "1",
         "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
         "SampleAfterValue": "100003",
         "UMask": "0x1"
     },
     {
-        "BriefDescription": "This event is deprecated. Refer to new event OCR.PF_L2_DATA_RD.SUPPLIER_NONE.SNOOP_MISS",
+        "BriefDescription": "This event is deprecated. Refer to new event OCR.DEMAND_CODE_RD.L3_HIT_M.HIT_OTHER_CORE_NO_FWD",
         "Counter": "0,1,2,3",
         "CounterHTOff": "0,1,2,3",
         "Deprecated": "1",
         "EventCode": "0xB7, 0xBB",
-        "EventName": "OFFCORE_RESPONSE.PF_L2_DATA_RD.SUPPLIER_NONE.SNOOP_MISS",
+        "EventName": "OFFCORE_RESPONSE.DEMAND_CODE_RD.L3_HIT_M.HIT_OTHER_CORE_NO_FWD",
         "MSRIndex": "0x1a6,0x1a7",
-        "MSRValue": "0x0200020010",
+        "MSRValue": "0x0400040004",
         "Offcore": "1",
         "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
         "SampleAfterValue": "100003",
         "UMask": "0x1"
     },
     {
-        "BriefDescription": "This event is deprecated. Refer to new event OCR.PF_L2_DATA_RD.L3_HIT_E.HITM_OTHER_CORE",
+        "BriefDescription": "This event is deprecated. Refer to new event OCR.DEMAND_CODE_RD.L3_HIT_M.NO_SNOOP_NEEDED",
         "Counter": "0,1,2,3",
         "CounterHTOff": "0,1,2,3",
         "Deprecated": "1",
         "EventCode": "0xB7, 0xBB",
-        "EventName": "OFFCORE_RESPONSE.PF_L2_DATA_RD.L3_HIT_E.HITM_OTHER_CORE",
+        "EventName": "OFFCORE_RESPONSE.DEMAND_CODE_RD.L3_HIT_M.NO_SNOOP_NEEDED",
         "MSRIndex": "0x1a6,0x1a7",
-        "MSRValue": "0x1000080010",
+        "MSRValue": "0x0100040004",
         "Offcore": "1",
         "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
         "SampleAfterValue": "100003",
         "UMask": "0x1"
     },
     {
-        "BriefDescription": "This event is deprecated. Refer to new event OCR.ALL_DATA_RD.L3_HIT_M.HIT_OTHER_CORE_FWD",
+        "BriefDescription": "This event is deprecated. Refer to new event OCR.DEMAND_CODE_RD.L3_HIT_M.SNOOP_MISS",
         "Counter": "0,1,2,3",
         "CounterHTOff": "0,1,2,3",
         "Deprecated": "1",
         "EventCode": "0xB7, 0xBB",
-        "EventName": "OFFCORE_RESPONSE.ALL_DATA_RD.L3_HIT_M.HIT_OTHER_CORE_FWD",
+        "EventName": "OFFCORE_RESPONSE.DEMAND_CODE_RD.L3_HIT_M.SNOOP_MISS",
         "MSRIndex": "0x1a6,0x1a7",
-        "MSRValue": "0x0800040491",
+        "MSRValue": "0x0200040004",
         "Offcore": "1",
         "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
         "SampleAfterValue": "100003",
         "UMask": "0x1"
     },
     {
-        "BriefDescription": "This event is deprecated. Refer to new event OCR.OTHER.L3_HIT_F.HIT_OTHER_CORE_FWD",
+        "BriefDescription": "This event is deprecated. Refer to new event OCR.DEMAND_CODE_RD.L3_HIT_M.SNOOP_NONE",
         "Counter": "0,1,2,3",
         "CounterHTOff": "0,1,2,3",
         "Deprecated": "1",
         "EventCode": "0xB7, 0xBB",
-        "EventName": "OFFCORE_RESPONSE.OTHER.L3_HIT_F.HIT_OTHER_CORE_FWD",
+        "EventName": "OFFCORE_RESPONSE.DEMAND_CODE_RD.L3_HIT_M.SNOOP_NONE",
         "MSRIndex": "0x1a6,0x1a7",
-        "MSRValue": "0x0800208000",
+        "MSRValue": "0x0080040004",
         "Offcore": "1",
         "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
         "SampleAfterValue": "100003",
         "UMask": "0x1"
     },
     {
-        "BriefDescription": "This event is deprecated. Refer to new event OCR.PF_L2_DATA_RD.ANY_RESPONSE",
+        "BriefDescription": "This event is deprecated. Refer to new event OCR.DEMAND_CODE_RD.L3_HIT_S.ANY_SNOOP",
         "Counter": "0,1,2,3",
         "CounterHTOff": "0,1,2,3",
         "Deprecated": "1",
         "EventCode": "0xB7, 0xBB",
-        "EventName": "OFFCORE_RESPONSE.PF_L2_DATA_RD.ANY_RESPONSE",
+        "EventName": "OFFCORE_RESPONSE.DEMAND_CODE_RD.L3_HIT_S.ANY_SNOOP",
         "MSRIndex": "0x1a6,0x1a7",
-        "MSRValue": "0x0000010010",
+        "MSRValue": "0x3F80100004",
         "Offcore": "1",
         "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
         "SampleAfterValue": "100003",
         "UMask": "0x1"
     },
     {
-        "BriefDescription": "This event is deprecated. Refer to new event OCR.PF_L3_DATA_RD.SUPPLIER_NONE.SNOOP_NONE",
+        "BriefDescription": "This event is deprecated. Refer to new event OCR.DEMAND_CODE_RD.L3_HIT_S.HITM_OTHER_CORE",
         "Counter": "0,1,2,3",
         "CounterHTOff": "0,1,2,3",
         "Deprecated": "1",
         "EventCode": "0xB7, 0xBB",
-        "EventName": "OFFCORE_RESPONSE.PF_L3_DATA_RD.SUPPLIER_NONE.SNOOP_NONE",
+        "EventName": "OFFCORE_RESPONSE.DEMAND_CODE_RD.L3_HIT_S.HITM_OTHER_CORE",
         "MSRIndex": "0x1a6,0x1a7",
-        "MSRValue": "0x0080020080",
+        "MSRValue": "0x1000100004",
         "Offcore": "1",
         "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
         "SampleAfterValue": "100003",
         "UMask": "0x1"
     },
     {
-        "BriefDescription": "This event is deprecated. Refer to new event OCR.OTHER.L3_HIT_F.SNOOP_NONE",
+        "BriefDescription": "This event is deprecated. Refer to new event OCR.DEMAND_CODE_RD.L3_HIT_S.HIT_OTHER_CORE_FWD",
         "Counter": "0,1,2,3",
         "CounterHTOff": "0,1,2,3",
         "Deprecated": "1",
         "EventCode": "0xB7, 0xBB",
-        "EventName": "OFFCORE_RESPONSE.OTHER.L3_HIT_F.SNOOP_NONE",
+        "EventName": "OFFCORE_RESPONSE.DEMAND_CODE_RD.L3_HIT_S.HIT_OTHER_CORE_FWD",
         "MSRIndex": "0x1a6,0x1a7",
-        "MSRValue": "0x0080208000",
+        "MSRValue": "0x0800100004",
         "Offcore": "1",
         "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
         "SampleAfterValue": "100003",
         "UMask": "0x1"
     },
     {
-        "BriefDescription": "This event is deprecated. Refer to new event OCR.DEMAND_RFO.L3_HIT_S.SNOOP_MISS",
+        "BriefDescription": "This event is deprecated. Refer to new event OCR.DEMAND_CODE_RD.L3_HIT_S.HIT_OTHER_CORE_NO_FWD",
         "Counter": "0,1,2,3",
         "CounterHTOff": "0,1,2,3",
         "Deprecated": "1",
         "EventCode": "0xB7, 0xBB",
-        "EventName": "OFFCORE_RESPONSE.DEMAND_RFO.L3_HIT_S.SNOOP_MISS",
+        "EventName": "OFFCORE_RESPONSE.DEMAND_CODE_RD.L3_HIT_S.HIT_OTHER_CORE_NO_FWD",
         "MSRIndex": "0x1a6,0x1a7",
-        "MSRValue": "0x0200100002",
+        "MSRValue": "0x0400100004",
         "Offcore": "1",
         "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
         "SampleAfterValue": "100003",
         "UMask": "0x1"
     },
     {
-        "BriefDescription": "This event is deprecated. Refer to new event OCR.PF_L2_RFO.L3_HIT_S.HIT_OTHER_CORE_FWD",
+        "BriefDescription": "This event is deprecated. Refer to new event OCR.DEMAND_CODE_RD.L3_HIT_S.NO_SNOOP_NEEDED",
         "Counter": "0,1,2,3",
         "CounterHTOff": "0,1,2,3",
         "Deprecated": "1",
         "EventCode": "0xB7, 0xBB",
-        "EventName": "OFFCORE_RESPONSE.PF_L2_RFO.L3_HIT_S.HIT_OTHER_CORE_FWD",
+        "EventName": "OFFCORE_RESPONSE.DEMAND_CODE_RD.L3_HIT_S.NO_SNOOP_NEEDED",
         "MSRIndex": "0x1a6,0x1a7",
-        "MSRValue": "0x0800100020",
+        "MSRValue": "0x0100100004",
         "Offcore": "1",
         "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
         "SampleAfterValue": "100003",
         "UMask": "0x1"
     },
     {
-        "BriefDescription": "This event is deprecated. Refer to new event OCR.PF_L2_RFO.L3_HIT_S.SNOOP_MISS",
+        "BriefDescription": "This event is deprecated. Refer to new event OCR.DEMAND_CODE_RD.L3_HIT_S.SNOOP_MISS",
         "Counter": "0,1,2,3",
         "CounterHTOff": "0,1,2,3",
         "Deprecated": "1",
         "EventCode": "0xB7, 0xBB",
-        "EventName": "OFFCORE_RESPONSE.PF_L2_RFO.L3_HIT_S.SNOOP_MISS",
+        "EventName": "OFFCORE_RESPONSE.DEMAND_CODE_RD.L3_HIT_S.SNOOP_MISS",
         "MSRIndex": "0x1a6,0x1a7",
-        "MSRValue": "0x0200100020",
+        "MSRValue": "0x0200100004",
         "Offcore": "1",
         "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
         "SampleAfterValue": "100003",
         "UMask": "0x1"
     },
     {
-        "BriefDescription": "This event is deprecated. Refer to new event OCR.PF_L3_RFO.L3_HIT_E.ANY_SNOOP",
+        "BriefDescription": "This event is deprecated. Refer to new event OCR.DEMAND_CODE_RD.L3_HIT_S.SNOOP_NONE",
         "Counter": "0,1,2,3",
         "CounterHTOff": "0,1,2,3",
         "Deprecated": "1",
         "EventCode": "0xB7, 0xBB",
-        "EventName": "OFFCORE_RESPONSE.PF_L3_RFO.L3_HIT_E.ANY_SNOOP",
+        "EventName": "OFFCORE_RESPONSE.DEMAND_CODE_RD.L3_HIT_S.SNOOP_NONE",
         "MSRIndex": "0x1a6,0x1a7",
-        "MSRValue": "0x3F80080100",
+        "MSRValue": "0x0080100004",
         "Offcore": "1",
         "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
         "SampleAfterValue": "100003",
         "UMask": "0x1"
     },
     {
-        "BriefDescription": "This event is deprecated. Refer to new event OCR.DEMAND_RFO.L3_HIT_F.HIT_OTHER_CORE_NO_FWD",
+        "BriefDescription": "This event is deprecated. Refer to new event OCR.DEMAND_CODE_RD.PMM_HIT_LOCAL_PMM.ANY_SNOOP",
         "Counter": "0,1,2,3",
         "CounterHTOff": "0,1,2,3",
         "Deprecated": "1",
         "EventCode": "0xB7, 0xBB",
-        "EventName": "OFFCORE_RESPONSE.DEMAND_RFO.L3_HIT_F.HIT_OTHER_CORE_NO_FWD",
+        "EventName": "OFFCORE_RESPONSE.DEMAND_CODE_RD.PMM_HIT_LOCAL_PMM.ANY_SNOOP",
         "MSRIndex": "0x1a6,0x1a7",
-        "MSRValue": "0x0400200002",
+        "MSRValue": "0x3F80400004",
         "Offcore": "1",
         "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
         "SampleAfterValue": "100003",
         "UMask": "0x1"
     },
     {
-        "BriefDescription": "This event is deprecated. Refer to new event OCR.ALL_PF_RFO.L3_HIT_S.SNOOP_NONE",
+        "BriefDescription": "This event is deprecated. Refer to new event OCR.DEMAND_CODE_RD.PMM_HIT_LOCAL_PMM.SNOOP_NONE",
         "Counter": "0,1,2,3",
         "CounterHTOff": "0,1,2,3",
         "Deprecated": "1",
         "EventCode": "0xB7, 0xBB",
-        "EventName": "OFFCORE_RESPONSE.ALL_PF_RFO.L3_HIT_S.SNOOP_NONE",
+        "EventName": "OFFCORE_RESPONSE.DEMAND_CODE_RD.PMM_HIT_LOCAL_PMM.SNOOP_NONE",
         "MSRIndex": "0x1a6,0x1a7",
-        "MSRValue": "0x0080100120",
+        "MSRValue": "0x0080400004",
         "Offcore": "1",
         "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
         "SampleAfterValue": "100003",
         "UMask": "0x1"
     },
     {
-        "BriefDescription": "This event is deprecated. Refer to new event OCR.ALL_PF_DATA_RD.L3_HIT_F.HIT_OTHER_CORE_NO_FWD",
+        "BriefDescription": "This event is deprecated. Refer to new event OCR.DEMAND_CODE_RD.PMM_HIT_LOCAL_PMM.SNOOP_NOT_NEEDED",
         "Counter": "0,1,2,3",
         "CounterHTOff": "0,1,2,3",
         "Deprecated": "1",
         "EventCode": "0xB7, 0xBB",
-        "EventName": "OFFCORE_RESPONSE.ALL_PF_DATA_RD.L3_HIT_F.HIT_OTHER_CORE_NO_FWD",
+        "EventName": "OFFCORE_RESPONSE.DEMAND_CODE_RD.PMM_HIT_LOCAL_PMM.SNOOP_NOT_NEEDED",
         "MSRIndex": "0x1a6,0x1a7",
-        "MSRValue": "0x0400200490",
+        "MSRValue": "0x0100400004",
         "Offcore": "1",
         "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
         "SampleAfterValue": "100003",
         "UMask": "0x1"
     },
     {
-        "BriefDescription": "This event is deprecated. Refer to new event OCR.PF_L3_RFO.L3_HIT.NO_SNOOP_NEEDED",
+        "BriefDescription": "This event is deprecated. Refer to new event OCR.DEMAND_CODE_RD.SUPPLIER_NONE.ANY_SNOOP",
         "Counter": "0,1,2,3",
         "CounterHTOff": "0,1,2,3",
         "Deprecated": "1",
         "EventCode": "0xB7, 0xBB",
-        "EventName": "OFFCORE_RESPONSE.PF_L3_RFO.L3_HIT.NO_SNOOP_NEEDED",
+        "EventName": "OFFCORE_RESPONSE.DEMAND_CODE_RD.SUPPLIER_NONE.ANY_SNOOP",
         "MSRIndex": "0x1a6,0x1a7",
-        "MSRValue": "0x01003C0100",
+        "MSRValue": "0x3F80020004",
         "Offcore": "1",
         "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
         "SampleAfterValue": "100003",
         "UMask": "0x1"
     },
     {
-        "BriefDescription": "This event is deprecated. Refer to new event OCR.ALL_DATA_RD.L3_HIT_E.SNOOP_NONE",
+        "BriefDescription": "This event is deprecated. Refer to new event OCR.DEMAND_CODE_RD.SUPPLIER_NONE.HITM_OTHER_CORE",
         "Counter": "0,1,2,3",
         "CounterHTOff": "0,1,2,3",
         "Deprecated": "1",
         "EventCode": "0xB7, 0xBB",
-        "EventName": "OFFCORE_RESPONSE.ALL_DATA_RD.L3_HIT_E.SNOOP_NONE",
+        "EventName": "OFFCORE_RESPONSE.DEMAND_CODE_RD.SUPPLIER_NONE.HITM_OTHER_CORE",
         "MSRIndex": "0x1a6,0x1a7",
-        "MSRValue": "0x0080080491",
+        "MSRValue": "0x1000020004",
         "Offcore": "1",
         "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
         "SampleAfterValue": "100003",
         "UMask": "0x1"
     },
     {
-        "BriefDescription": "This event is deprecated. Refer to new event OCR.ALL_RFO.L3_HIT_S.HITM_OTHER_CORE",
+        "BriefDescription": "This event is deprecated. Refer to new event OCR.DEMAND_CODE_RD.SUPPLIER_NONE.HIT_OTHER_CORE_FWD",
         "Counter": "0,1,2,3",
         "CounterHTOff": "0,1,2,3",
         "Deprecated": "1",
         "EventCode": "0xB7, 0xBB",
-        "EventName": "OFFCORE_RESPONSE.ALL_RFO.L3_HIT_S.HITM_OTHER_CORE",
+        "EventName": "OFFCORE_RESPONSE.DEMAND_CODE_RD.SUPPLIER_NONE.HIT_OTHER_CORE_FWD",
         "MSRIndex": "0x1a6,0x1a7",
-        "MSRValue": "0x1000100122",
+        "MSRValue": "0x0800020004",
         "Offcore": "1",
         "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
         "SampleAfterValue": "100003",
         "UMask": "0x1"
     },
     {
-        "BriefDescription": "This event is deprecated. Refer to new event OCR.DEMAND_CODE_RD.L3_HIT_M.SNOOP_MISS",
+        "BriefDescription": "This event is deprecated. Refer to new event OCR.DEMAND_CODE_RD.SUPPLIER_NONE.HIT_OTHER_CORE_NO_FWD",
         "Counter": "0,1,2,3",
         "CounterHTOff": "0,1,2,3",
         "Deprecated": "1",
         "EventCode": "0xB7, 0xBB",
-        "EventName": "OFFCORE_RESPONSE.DEMAND_CODE_RD.L3_HIT_M.SNOOP_MISS",
+        "EventName": "OFFCORE_RESPONSE.DEMAND_CODE_RD.SUPPLIER_NONE.HIT_OTHER_CORE_NO_FWD",
         "MSRIndex": "0x1a6,0x1a7",
-        "MSRValue": "0x0200040004",
+        "MSRValue": "0x0400020004",
         "Offcore": "1",
         "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
         "SampleAfterValue": "100003",
         "UMask": "0x1"
     },
     {
-        "BriefDescription": "This event is deprecated. Refer to new event OCR.PF_L3_RFO.L3_HIT_M.HITM_OTHER_CORE",
+        "BriefDescription": "This event is deprecated. Refer to new event OCR.DEMAND_CODE_RD.SUPPLIER_NONE.NO_SNOOP_NEEDED",
         "Counter": "0,1,2,3",
         "CounterHTOff": "0,1,2,3",
         "Deprecated": "1",
         "EventCode": "0xB7, 0xBB",
-        "EventName": "OFFCORE_RESPONSE.PF_L3_RFO.L3_HIT_M.HITM_OTHER_CORE",
+        "EventName": "OFFCORE_RESPONSE.DEMAND_CODE_RD.SUPPLIER_NONE.NO_SNOOP_NEEDED",
         "MSRIndex": "0x1a6,0x1a7",
-        "MSRValue": "0x1000040100",
+        "MSRValue": "0x0100020004",
         "Offcore": "1",
         "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
         "SampleAfterValue": "100003",
         "UMask": "0x1"
     },
     {
-        "BriefDescription": "This event is deprecated. Refer to new event OCR.PF_L2_RFO.L3_HIT_E.HIT_OTHER_CORE_NO_FWD",
+        "BriefDescription": "This event is deprecated. Refer to new event OCR.DEMAND_CODE_RD.SUPPLIER_NONE.SNOOP_MISS",
         "Counter": "0,1,2,3",
         "CounterHTOff": "0,1,2,3",
         "Deprecated": "1",
         "EventCode": "0xB7, 0xBB",
-        "EventName": "OFFCORE_RESPONSE.PF_L2_RFO.L3_HIT_E.HIT_OTHER_CORE_NO_FWD",
+        "EventName": "OFFCORE_RESPONSE.DEMAND_CODE_RD.SUPPLIER_NONE.SNOOP_MISS",
         "MSRIndex": "0x1a6,0x1a7",
-        "MSRValue": "0x0400080020",
+        "MSRValue": "0x0200020004",
         "Offcore": "1",
         "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
         "SampleAfterValue": "100003",
         "UMask": "0x1"
     },
     {
-        "BriefDescription": "This event is deprecated. Refer to new event OCR.DEMAND_DATA_RD.L3_HIT.HIT_OTHER_CORE_FWD",
+        "BriefDescription": "This event is deprecated. Refer to new event OCR.DEMAND_CODE_RD.SUPPLIER_NONE.SNOOP_NONE",
         "Counter": "0,1,2,3",
         "CounterHTOff": "0,1,2,3",
         "Deprecated": "1",
         "EventCode": "0xB7, 0xBB",
-        "EventName": "OFFCORE_RESPONSE.DEMAND_DATA_RD.L3_HIT.HIT_OTHER_CORE_FWD",
+        "EventName": "OFFCORE_RESPONSE.DEMAND_CODE_RD.SUPPLIER_NONE.SNOOP_NONE",
         "MSRIndex": "0x1a6,0x1a7",
-        "MSRValue": "0x08003C0001",
+        "MSRValue": "0x0080020004",
         "Offcore": "1",
         "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
         "SampleAfterValue": "100003",
         "UMask": "0x1"
     },
     {
-        "BriefDescription": "This event is deprecated. Refer to new event OCR.PF_L3_DATA_RD.SUPPLIER_NONE.ANY_SNOOP",
+        "BriefDescription": "This event is deprecated. Refer to new event OCR.DEMAND_DATA_RD.ANY_RESPONSE",
         "Counter": "0,1,2,3",
         "CounterHTOff": "0,1,2,3",
         "Deprecated": "1",
         "EventCode": "0xB7, 0xBB",
-        "EventName": "OFFCORE_RESPONSE.PF_L3_DATA_RD.SUPPLIER_NONE.ANY_SNOOP",
+        "EventName": "OFFCORE_RESPONSE.DEMAND_DATA_RD.ANY_RESPONSE",
         "MSRIndex": "0x1a6,0x1a7",
-        "MSRValue": "0x3F80020080",
+        "MSRValue": "0x0000010001",
         "Offcore": "1",
         "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
         "SampleAfterValue": "100003",
         "UMask": "0x1"
     },
     {
-        "BriefDescription": "This event is deprecated. Refer to new event OCR.PF_L3_RFO.L3_HIT_E.NO_SNOOP_NEEDED",
+        "BriefDescription": "This event is deprecated. Refer to new event OCR.DEMAND_DATA_RD.L3_HIT.ANY_SNOOP",
         "Counter": "0,1,2,3",
         "CounterHTOff": "0,1,2,3",
         "Deprecated": "1",
         "EventCode": "0xB7, 0xBB",
-        "EventName": "OFFCORE_RESPONSE.PF_L3_RFO.L3_HIT_E.NO_SNOOP_NEEDED",
+        "EventName": "OFFCORE_RESPONSE.DEMAND_DATA_RD.L3_HIT.ANY_SNOOP",
         "MSRIndex": "0x1a6,0x1a7",
-        "MSRValue": "0x0100080100",
+        "MSRValue": "0x3F803C0001",
         "Offcore": "1",
         "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
         "SampleAfterValue": "100003",
         "UMask": "0x1"
     },
     {
-        "BriefDescription": "This event is deprecated. Refer to new event OCR.PF_L2_DATA_RD.L3_HIT_F.SNOOP_NONE",
+        "BriefDescription": "This event is deprecated. Refer to new event OCR.DEMAND_DATA_RD.L3_HIT.HITM_OTHER_CORE",
         "Counter": "0,1,2,3",
         "CounterHTOff": "0,1,2,3",
         "Deprecated": "1",
         "EventCode": "0xB7, 0xBB",
-        "EventName": "OFFCORE_RESPONSE.PF_L2_DATA_RD.L3_HIT_F.SNOOP_NONE",
+        "EventName": "OFFCORE_RESPONSE.DEMAND_DATA_RD.L3_HIT.HITM_OTHER_CORE",
         "MSRIndex": "0x1a6,0x1a7",
-        "MSRValue": "0x0080200010",
+        "MSRValue": "0x10003C0001",
         "Offcore": "1",
         "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
         "SampleAfterValue": "100003",
         "UMask": "0x1"
     },
     {
-        "BriefDescription": "This event is deprecated. Refer to new event OCR.OTHER.L3_HIT_S.ANY_SNOOP",
+        "BriefDescription": "This event is deprecated. Refer to new event OCR.DEMAND_DATA_RD.L3_HIT.HIT_OTHER_CORE_FWD",
         "Counter": "0,1,2,3",
         "CounterHTOff": "0,1,2,3",
         "Deprecated": "1",
         "EventCode": "0xB7, 0xBB",
-        "EventName": "OFFCORE_RESPONSE.OTHER.L3_HIT_S.ANY_SNOOP",
+        "EventName": "OFFCORE_RESPONSE.DEMAND_DATA_RD.L3_HIT.HIT_OTHER_CORE_FWD",
         "MSRIndex": "0x1a6,0x1a7",
-        "MSRValue": "0x3F80108000",
+        "MSRValue": "0x08003C0001",
         "Offcore": "1",
         "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
         "SampleAfterValue": "100003",
         "UMask": "0x1"
     },
     {
-        "BriefDescription": "This event is deprecated. Refer to new event OCR.ALL_PF_DATA_RD.L3_HIT.NO_SNOOP_NEEDED",
+        "BriefDescription": "This event is deprecated. Refer to new event OCR.DEMAND_DATA_RD.L3_HIT.HIT_OTHER_CORE_NO_FWD",
         "Counter": "0,1,2,3",
         "CounterHTOff": "0,1,2,3",
         "Deprecated": "1",
         "EventCode": "0xB7, 0xBB",
-        "EventName": "OFFCORE_RESPONSE.ALL_PF_DATA_RD.L3_HIT.NO_SNOOP_NEEDED",
+        "EventName": "OFFCORE_RESPONSE.DEMAND_DATA_RD.L3_HIT.HIT_OTHER_CORE_NO_FWD",
         "MSRIndex": "0x1a6,0x1a7",
-        "MSRValue": "0x01003C0490",
+        "MSRValue": "0x04003C0001",
         "Offcore": "1",
         "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
         "SampleAfterValue": "100003",
         "UMask": "0x1"
     },
     {
-        "BriefDescription": "This event is deprecated. Refer to new event OCR.ALL_PF_DATA_RD.L3_HIT_E.ANY_SNOOP",
+        "BriefDescription": "This event is deprecated. Refer to new event OCR.DEMAND_DATA_RD.L3_HIT.NO_SNOOP_NEEDED",
         "Counter": "0,1,2,3",
         "CounterHTOff": "0,1,2,3",
         "Deprecated": "1",
         "EventCode": "0xB7, 0xBB",
-        "EventName": "OFFCORE_RESPONSE.ALL_PF_DATA_RD.L3_HIT_E.ANY_SNOOP",
+        "EventName": "OFFCORE_RESPONSE.DEMAND_DATA_RD.L3_HIT.NO_SNOOP_NEEDED",
         "MSRIndex": "0x1a6,0x1a7",
-        "MSRValue": "0x3F80080490",
+        "MSRValue": "0x01003C0001",
         "Offcore": "1",
         "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
         "SampleAfterValue": "100003",
         "UMask": "0x1"
     },
     {
-        "BriefDescription": "This event is deprecated. Refer to new event OCR.ALL_READS.L3_HIT_E.HITM_OTHER_CORE",
+        "BriefDescription": "This event is deprecated. Refer to new event OCR.DEMAND_DATA_RD.L3_HIT.SNOOP_HIT_WITH_FWD",
         "Counter": "0,1,2,3",
         "CounterHTOff": "0,1,2,3",
         "Deprecated": "1",
         "EventCode": "0xB7, 0xBB",
-        "EventName": "OFFCORE_RESPONSE.ALL_READS.L3_HIT_E.HITM_OTHER_CORE",
+        "EventName": "OFFCORE_RESPONSE.DEMAND_DATA_RD.L3_HIT.SNOOP_HIT_WITH_FWD",
         "MSRIndex": "0x1a6,0x1a7",
-        "MSRValue": "0x10000807F7",
+        "MSRValue": "0x08007C0001",
         "Offcore": "1",
         "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
         "SampleAfterValue": "100003",
         "UMask": "0x1"
     },
     {
-        "BriefDescription": "This event is deprecated. Refer to new event OCR.DEMAND_CODE_RD.SUPPLIER_NONE.SNOOP_NONE",
+        "BriefDescription": "This event is deprecated. Refer to new event OCR.DEMAND_DATA_RD.L3_HIT.SNOOP_MISS",
         "Counter": "0,1,2,3",
         "CounterHTOff": "0,1,2,3",
         "Deprecated": "1",
         "EventCode": "0xB7, 0xBB",
-        "EventName": "OFFCORE_RESPONSE.DEMAND_CODE_RD.SUPPLIER_NONE.SNOOP_NONE",
+        "EventName": "OFFCORE_RESPONSE.DEMAND_DATA_RD.L3_HIT.SNOOP_MISS",
         "MSRIndex": "0x1a6,0x1a7",
-        "MSRValue": "0x0080020004",
+        "MSRValue": "0x02003C0001",
         "Offcore": "1",
         "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
         "SampleAfterValue": "100003",
         "UMask": "0x1"
     },
     {
-        "BriefDescription": "This event is deprecated. Refer to new event OCR.DEMAND_RFO.SUPPLIER_NONE.SNOOP_NONE",
+        "BriefDescription": "This event is deprecated. Refer to new event OCR.DEMAND_DATA_RD.L3_HIT.SNOOP_NONE",
         "Counter": "0,1,2,3",
         "CounterHTOff": "0,1,2,3",
         "Deprecated": "1",
         "EventCode": "0xB7, 0xBB",
-        "EventName": "OFFCORE_RESPONSE.DEMAND_RFO.SUPPLIER_NONE.SNOOP_NONE",
+        "EventName": "OFFCORE_RESPONSE.DEMAND_DATA_RD.L3_HIT.SNOOP_NONE",
         "MSRIndex": "0x1a6,0x1a7",
-        "MSRValue": "0x0080020002",
+        "MSRValue": "0x00803C0001",
         "Offcore": "1",
         "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
         "SampleAfterValue": "100003",
         "UMask": "0x1"
     },
     {
-        "BriefDescription": "This event is deprecated. Refer to new event OCR.PF_L1D_AND_SW.L3_HIT_M.HITM_OTHER_CORE",
+        "BriefDescription": "This event is deprecated. Refer to new event OCR.DEMAND_DATA_RD.L3_HIT_E.ANY_SNOOP",
         "Counter": "0,1,2,3",
         "CounterHTOff": "0,1,2,3",
         "Deprecated": "1",
         "EventCode": "0xB7, 0xBB",
-        "EventName": "OFFCORE_RESPONSE.PF_L1D_AND_SW.L3_HIT_M.HITM_OTHER_CORE",
+        "EventName": "OFFCORE_RESPONSE.DEMAND_DATA_RD.L3_HIT_E.ANY_SNOOP",
         "MSRIndex": "0x1a6,0x1a7",
-        "MSRValue": "0x1000040400",
+        "MSRValue": "0x3F80080001",
         "Offcore": "1",
         "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
         "SampleAfterValue": "100003",
         "UMask": "0x1"
     },
     {
-        "BriefDescription": "Cycles when offcore outstanding Demand Data Read transactions are present in SuperQueue (SQ), queue to uncore",
-        "Counter": "0,1,2,3",
-        "CounterHTOff": "0,1,2,3,4,5,6,7",
-        "CounterMask": "1",
-        "EventCode": "0x60",
-        "EventName": "OFFCORE_REQUESTS_OUTSTANDING.CYCLES_WITH_DEMAND_DATA_RD",
-        "PublicDescription": "Counts cycles when offcore outstanding Demand Data Read transactions are present in the super queue (SQ). A transaction is considered to be in the Offcore outstanding state between L2 miss and transaction completion sent to requestor (SQ de-allocation).",
-        "SampleAfterValue": "2000003",
-        "UMask": "0x1"
-    },
-    {
-        "BriefDescription": "This event is deprecated. Refer to new event OCR.PF_L2_RFO.L3_HIT_E.NO_SNOOP_NEEDED",
+        "BriefDescription": "This event is deprecated. Refer to new event OCR.DEMAND_DATA_RD.L3_HIT_E.HITM_OTHER_CORE",
         "Counter": "0,1,2,3",
         "CounterHTOff": "0,1,2,3",
         "Deprecated": "1",
         "EventCode": "0xB7, 0xBB",
-        "EventName": "OFFCORE_RESPONSE.PF_L2_RFO.L3_HIT_E.NO_SNOOP_NEEDED",
+        "EventName": "OFFCORE_RESPONSE.DEMAND_DATA_RD.L3_HIT_E.HITM_OTHER_CORE",
         "MSRIndex": "0x1a6,0x1a7",
-        "MSRValue": "0x0100080020",
+        "MSRValue": "0x1000080001",
         "Offcore": "1",
         "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
         "SampleAfterValue": "100003",
         "UMask": "0x1"
     },
     {
-        "BriefDescription": "This event is deprecated. Refer to new event OCR.DEMAND_RFO.L3_HIT.HIT_OTHER_CORE_NO_FWD",
+        "BriefDescription": "This event is deprecated. Refer to new event OCR.DEMAND_DATA_RD.L3_HIT_E.HIT_OTHER_CORE_FWD",
         "Counter": "0,1,2,3",
         "CounterHTOff": "0,1,2,3",
         "Deprecated": "1",
         "EventCode": "0xB7, 0xBB",
-        "EventName": "OFFCORE_RESPONSE.DEMAND_RFO.L3_HIT.HIT_OTHER_CORE_NO_FWD",
+        "EventName": "OFFCORE_RESPONSE.DEMAND_DATA_RD.L3_HIT_E.HIT_OTHER_CORE_FWD",
         "MSRIndex": "0x1a6,0x1a7",
-        "MSRValue": "0x04003C0002",
+        "MSRValue": "0x0800080001",
         "Offcore": "1",
         "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
         "SampleAfterValue": "100003",
         "UMask": "0x1"
     },
     {
-        "BriefDescription": "This event is deprecated. Refer to new event OCR.PF_L1D_AND_SW.L3_HIT.SNOOP_NONE",
+        "BriefDescription": "This event is deprecated. Refer to new event OCR.DEMAND_DATA_RD.L3_HIT_E.HIT_OTHER_CORE_NO_FWD",
         "Counter": "0,1,2,3",
         "CounterHTOff": "0,1,2,3",
         "Deprecated": "1",
         "EventCode": "0xB7, 0xBB",
-        "EventName": "OFFCORE_RESPONSE.PF_L1D_AND_SW.L3_HIT.SNOOP_NONE",
+        "EventName": "OFFCORE_RESPONSE.DEMAND_DATA_RD.L3_HIT_E.HIT_OTHER_CORE_NO_FWD",
         "MSRIndex": "0x1a6,0x1a7",
-        "MSRValue": "0x00803C0400",
+        "MSRValue": "0x0400080001",
         "Offcore": "1",
         "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
         "SampleAfterValue": "100003",
         "UMask": "0x1"
     },
     {
-        "BriefDescription": "This event is deprecated. Refer to new event OCR.DEMAND_DATA_RD.L3_HIT_F.SNOOP_MISS",
+        "BriefDescription": "This event is deprecated. Refer to new event OCR.DEMAND_DATA_RD.L3_HIT_E.NO_SNOOP_NEEDED",
         "Counter": "0,1,2,3",
         "CounterHTOff": "0,1,2,3",
         "Deprecated": "1",
         "EventCode": "0xB7, 0xBB",
-        "EventName": "OFFCORE_RESPONSE.DEMAND_DATA_RD.L3_HIT_F.SNOOP_MISS",
+        "EventName": "OFFCORE_RESPONSE.DEMAND_DATA_RD.L3_HIT_E.NO_SNOOP_NEEDED",
         "MSRIndex": "0x1a6,0x1a7",
-        "MSRValue": "0x0200200001",
+        "MSRValue": "0x0100080001",
         "Offcore": "1",
         "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
         "SampleAfterValue": "100003",
         "UMask": "0x1"
     },
     {
-        "BriefDescription": "This event is deprecated. Refer to new event OCR.PF_L2_DATA_RD.L3_HIT_F.HITM_OTHER_CORE",
+        "BriefDescription": "This event is deprecated. Refer to new event OCR.DEMAND_DATA_RD.L3_HIT_E.SNOOP_MISS",
         "Counter": "0,1,2,3",
         "CounterHTOff": "0,1,2,3",
         "Deprecated": "1",
         "EventCode": "0xB7, 0xBB",
-        "EventName": "OFFCORE_RESPONSE.PF_L2_DATA_RD.L3_HIT_F.HITM_OTHER_CORE",
+        "EventName": "OFFCORE_RESPONSE.DEMAND_DATA_RD.L3_HIT_E.SNOOP_MISS",
         "MSRIndex": "0x1a6,0x1a7",
-        "MSRValue": "0x1000200010",
+        "MSRValue": "0x0200080001",
         "Offcore": "1",
         "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
         "SampleAfterValue": "100003",
         "UMask": "0x1"
     },
     {
-        "BriefDescription": "This event is deprecated. Refer to new event OCR.PF_L1D_AND_SW.ANY_RESPONSE",
+        "BriefDescription": "This event is deprecated. Refer to new event OCR.DEMAND_DATA_RD.L3_HIT_E.SNOOP_NONE",
         "Counter": "0,1,2,3",
         "CounterHTOff": "0,1,2,3",
         "Deprecated": "1",
         "EventCode": "0xB7, 0xBB",
-        "EventName": "OFFCORE_RESPONSE.PF_L1D_AND_SW.ANY_RESPONSE",
+        "EventName": "OFFCORE_RESPONSE.DEMAND_DATA_RD.L3_HIT_E.SNOOP_NONE",
         "MSRIndex": "0x1a6,0x1a7",
-        "MSRValue": "0x0000010400",
+        "MSRValue": "0x0080080001",
         "Offcore": "1",
         "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
         "SampleAfterValue": "100003",
         "UMask": "0x1"
     },
     {
-        "BriefDescription": "This event is deprecated. Refer to new event OCR.OTHER.L3_HIT_E.HITM_OTHER_CORE",
+        "BriefDescription": "This event is deprecated. Refer to new event OCR.DEMAND_DATA_RD.L3_HIT_F.ANY_SNOOP",
         "Counter": "0,1,2,3",
         "CounterHTOff": "0,1,2,3",
         "Deprecated": "1",
         "EventCode": "0xB7, 0xBB",
-        "EventName": "OFFCORE_RESPONSE.OTHER.L3_HIT_E.HITM_OTHER_CORE",
+        "EventName": "OFFCORE_RESPONSE.DEMAND_DATA_RD.L3_HIT_F.ANY_SNOOP",
         "MSRIndex": "0x1a6,0x1a7",
-        "MSRValue": "0x1000088000",
+        "MSRValue": "0x3F80200001",
         "Offcore": "1",
         "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
         "SampleAfterValue": "100003",
         "UMask": "0x1"
     },
     {
-        "BriefDescription": "This event is deprecated. Refer to new event OCR.DEMAND_CODE_RD.L3_HIT.HIT_OTHER_CORE_FWD",
+        "BriefDescription": "This event is deprecated. Refer to new event OCR.DEMAND_DATA_RD.L3_HIT_F.HITM_OTHER_CORE",
         "Counter": "0,1,2,3",
         "CounterHTOff": "0,1,2,3",
         "Deprecated": "1",
         "EventCode": "0xB7, 0xBB",
-        "EventName": "OFFCORE_RESPONSE.DEMAND_CODE_RD.L3_HIT.HIT_OTHER_CORE_FWD",
+        "EventName": "OFFCORE_RESPONSE.DEMAND_DATA_RD.L3_HIT_F.HITM_OTHER_CORE",
         "MSRIndex": "0x1a6,0x1a7",
-        "MSRValue": "0x08003C0004",
+        "MSRValue": "0x1000200001",
         "Offcore": "1",
         "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
         "SampleAfterValue": "100003",
         "UMask": "0x1"
     },
     {
-        "BriefDescription": "Cycles when offcore outstanding cacheable Core Data Read transactions are present in SuperQueue (SQ), queue to uncore.",
-        "Counter": "0,1,2,3",
-        "CounterHTOff": "0,1,2,3,4,5,6,7",
-        "CounterMask": "1",
-        "EventCode": "0x60",
-        "EventName": "OFFCORE_REQUESTS_OUTSTANDING.CYCLES_WITH_DATA_RD",
-        "PublicDescription": "Counts cycles when offcore outstanding cacheable Core Data Read transactions are present in the super queue. A transaction is considered to be in the Offcore outstanding state between L2 miss and transaction completion sent to requestor (SQ de-allocation). See corresponding Umask under OFFCORE_REQUESTS.",
-        "SampleAfterValue": "2000003",
-        "UMask": "0x8"
-    },
-    {
-        "BriefDescription": "This event is deprecated. Refer to new event OCR.PF_L2_RFO.L3_HIT_F.HIT_OTHER_CORE_FWD",
+        "BriefDescription": "This event is deprecated. Refer to new event OCR.DEMAND_DATA_RD.L3_HIT_F.HIT_OTHER_CORE_FWD",
         "Counter": "0,1,2,3",
         "CounterHTOff": "0,1,2,3",
         "Deprecated": "1",
         "EventCode": "0xB7, 0xBB",
-        "EventName": "OFFCORE_RESPONSE.PF_L2_RFO.L3_HIT_F.HIT_OTHER_CORE_FWD",
+        "EventName": "OFFCORE_RESPONSE.DEMAND_DATA_RD.L3_HIT_F.HIT_OTHER_CORE_FWD",
         "MSRIndex": "0x1a6,0x1a7",
-        "MSRValue": "0x0800200020",
+        "MSRValue": "0x0800200001",
         "Offcore": "1",
         "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
         "SampleAfterValue": "100003",
         "UMask": "0x1"
     },
     {
-        "BriefDescription": "This event is deprecated. Refer to new event OCR.ALL_PF_DATA_RD.L3_HIT_E.SNOOP_NONE",
+        "BriefDescription": "This event is deprecated. Refer to new event OCR.DEMAND_DATA_RD.L3_HIT_F.HIT_OTHER_CORE_NO_FWD",
         "Counter": "0,1,2,3",
         "CounterHTOff": "0,1,2,3",
         "Deprecated": "1",
         "EventCode": "0xB7, 0xBB",
-        "EventName": "OFFCORE_RESPONSE.ALL_PF_DATA_RD.L3_HIT_E.SNOOP_NONE",
+        "EventName": "OFFCORE_RESPONSE.DEMAND_DATA_RD.L3_HIT_F.HIT_OTHER_CORE_NO_FWD",
         "MSRIndex": "0x1a6,0x1a7",
-        "MSRValue": "0x0080080490",
+        "MSRValue": "0x0400200001",
         "Offcore": "1",
         "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
         "SampleAfterValue": "100003",
         "UMask": "0x1"
     },
     {
-        "BriefDescription": "This event is deprecated. Refer to new event OCR.ALL_RFO.L3_HIT_E.ANY_SNOOP",
+        "BriefDescription": "This event is deprecated. Refer to new event OCR.DEMAND_DATA_RD.L3_HIT_F.NO_SNOOP_NEEDED",
         "Counter": "0,1,2,3",
         "CounterHTOff": "0,1,2,3",
         "Deprecated": "1",
         "EventCode": "0xB7, 0xBB",
-        "EventName": "OFFCORE_RESPONSE.ALL_RFO.L3_HIT_E.ANY_SNOOP",
+        "EventName": "OFFCORE_RESPONSE.DEMAND_DATA_RD.L3_HIT_F.NO_SNOOP_NEEDED",
         "MSRIndex": "0x1a6,0x1a7",
-        "MSRValue": "0x3F80080122",
+        "MSRValue": "0x0100200001",
         "Offcore": "1",
         "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
         "SampleAfterValue": "100003",
         "UMask": "0x1"
     },
     {
-        "BriefDescription": "This event is deprecated. Refer to new event OCR.PF_L2_DATA_RD.PMM_HIT_LOCAL_PMM.SNOOP_NOT_NEEDED",
+        "BriefDescription": "This event is deprecated. Refer to new event OCR.DEMAND_DATA_RD.L3_HIT_F.SNOOP_MISS",
         "Counter": "0,1,2,3",
         "CounterHTOff": "0,1,2,3",
         "Deprecated": "1",
         "EventCode": "0xB7, 0xBB",
-        "EventName": "OFFCORE_RESPONSE.PF_L2_DATA_RD.PMM_HIT_LOCAL_PMM.SNOOP_NOT_NEEDED",
+        "EventName": "OFFCORE_RESPONSE.DEMAND_DATA_RD.L3_HIT_F.SNOOP_MISS",
         "MSRIndex": "0x1a6,0x1a7",
-        "MSRValue": "0x0100400010",
+        "MSRValue": "0x0200200001",
         "Offcore": "1",
         "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
         "SampleAfterValue": "100003",
         "UMask": "0x1"
     },
     {
-        "BriefDescription": "This event is deprecated. Refer to new event OCR.ALL_PF_DATA_RD.L3_HIT_M.ANY_SNOOP",
+        "BriefDescription": "This event is deprecated. Refer to new event OCR.DEMAND_DATA_RD.L3_HIT_F.SNOOP_NONE",
         "Counter": "0,1,2,3",
         "CounterHTOff": "0,1,2,3",
         "Deprecated": "1",
         "EventCode": "0xB7, 0xBB",
-        "EventName": "OFFCORE_RESPONSE.ALL_PF_DATA_RD.L3_HIT_M.ANY_SNOOP",
+        "EventName": "OFFCORE_RESPONSE.DEMAND_DATA_RD.L3_HIT_F.SNOOP_NONE",
         "MSRIndex": "0x1a6,0x1a7",
-        "MSRValue": "0x3F80040490",
+        "MSRValue": "0x0080200001",
         "Offcore": "1",
         "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
         "SampleAfterValue": "100003",
         "UMask": "0x1"
     },
     {
-        "BriefDescription": "This event is deprecated. Refer to new event OCR.ALL_PF_RFO.L3_HIT_M.HIT_OTHER_CORE_NO_FWD",
+        "BriefDescription": "This event is deprecated. Refer to new event OCR.DEMAND_DATA_RD.L3_HIT_M.ANY_SNOOP",
         "Counter": "0,1,2,3",
         "CounterHTOff": "0,1,2,3",
         "Deprecated": "1",
         "EventCode": "0xB7, 0xBB",
-        "EventName": "OFFCORE_RESPONSE.ALL_PF_RFO.L3_HIT_M.HIT_OTHER_CORE_NO_FWD",
+        "EventName": "OFFCORE_RESPONSE.DEMAND_DATA_RD.L3_HIT_M.ANY_SNOOP",
         "MSRIndex": "0x1a6,0x1a7",
-        "MSRValue": "0x0400040120",
+        "MSRValue": "0x3F80040001",
         "Offcore": "1",
         "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
         "SampleAfterValue": "100003",
         "UMask": "0x1"
     },
     {
-        "BriefDescription": "Retired load instructions that split across a cacheline boundary.",
-        "Counter": "0,1,2,3",
-        "CounterHTOff": "0,1,2,3",
-        "Data_LA": "1",
-        "EventCode": "0xD0",
-        "EventName": "MEM_INST_RETIRED.SPLIT_LOADS",
-        "PEBS": "1",
-        "PublicDescription": "Counts retired load instructions that split across a cacheline boundary.",
-        "SampleAfterValue": "100003",
-        "UMask": "0x41"
-    },
-    {
-        "BriefDescription": "This event is deprecated. Refer to new event OCR.PF_L3_DATA_RD.L3_HIT_M.HIT_OTHER_CORE_NO_FWD",
+        "BriefDescription": "This event is deprecated. Refer to new event OCR.DEMAND_DATA_RD.L3_HIT_M.HITM_OTHER_CORE",
         "Counter": "0,1,2,3",
         "CounterHTOff": "0,1,2,3",
         "Deprecated": "1",
         "EventCode": "0xB7, 0xBB",
-        "EventName": "OFFCORE_RESPONSE.PF_L3_DATA_RD.L3_HIT_M.HIT_OTHER_CORE_NO_FWD",
+        "EventName": "OFFCORE_RESPONSE.DEMAND_DATA_RD.L3_HIT_M.HITM_OTHER_CORE",
         "MSRIndex": "0x1a6,0x1a7",
-        "MSRValue": "0x0400040080",
+        "MSRValue": "0x1000040001",
         "Offcore": "1",
         "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
         "SampleAfterValue": "100003",
         "UMask": "0x1"
     },
     {
-        "BriefDescription": "Cycles with at least 6 offcore outstanding Demand Data Read transactions in uncore queue.",
-        "Counter": "0,1,2,3",
-        "CounterHTOff": "0,1,2,3,4,5,6,7",
-        "CounterMask": "6",
-        "EventCode": "0x60",
-        "EventName": "OFFCORE_REQUESTS_OUTSTANDING.DEMAND_DATA_RD_GE_6",
-        "SampleAfterValue": "2000003",
-        "UMask": "0x1"
-    },
-    {
-        "BriefDescription": "This event is deprecated. Refer to new event OCR.ALL_DATA_RD.L3_HIT_S.HIT_OTHER_CORE_NO_FWD",
+        "BriefDescription": "This event is deprecated. Refer to new event OCR.DEMAND_DATA_RD.L3_HIT_M.HIT_OTHER_CORE_FWD",
         "Counter": "0,1,2,3",
         "CounterHTOff": "0,1,2,3",
         "Deprecated": "1",
         "EventCode": "0xB7, 0xBB",
-        "EventName": "OFFCORE_RESPONSE.ALL_DATA_RD.L3_HIT_S.HIT_OTHER_CORE_NO_FWD",
+        "EventName": "OFFCORE_RESPONSE.DEMAND_DATA_RD.L3_HIT_M.HIT_OTHER_CORE_FWD",
         "MSRIndex": "0x1a6,0x1a7",
-        "MSRValue": "0x0400100491",
+        "MSRValue": "0x0800040001",
         "Offcore": "1",
         "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
         "SampleAfterValue": "100003",
         "UMask": "0x1"
     },
     {
-        "BriefDescription": "This event is deprecated. Refer to new event OCR.PF_L3_RFO.L3_HIT_M.SNOOP_MISS",
+        "BriefDescription": "This event is deprecated. Refer to new event OCR.DEMAND_DATA_RD.L3_HIT_M.HIT_OTHER_CORE_NO_FWD",
         "Counter": "0,1,2,3",
         "CounterHTOff": "0,1,2,3",
         "Deprecated": "1",
         "EventCode": "0xB7, 0xBB",
-        "EventName": "OFFCORE_RESPONSE.PF_L3_RFO.L3_HIT_M.SNOOP_MISS",
+        "EventName": "OFFCORE_RESPONSE.DEMAND_DATA_RD.L3_HIT_M.HIT_OTHER_CORE_NO_FWD",
         "MSRIndex": "0x1a6,0x1a7",
-        "MSRValue": "0x0200040100",
+        "MSRValue": "0x0400040001",
         "Offcore": "1",
         "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
         "SampleAfterValue": "100003",
         "UMask": "0x1"
     },
     {
-        "BriefDescription": "This event is deprecated. Refer to new event OCR.ALL_RFO.L3_HIT.ANY_SNOOP",
+        "BriefDescription": "This event is deprecated. Refer to new event OCR.DEMAND_DATA_RD.L3_HIT_M.NO_SNOOP_NEEDED",
         "Counter": "0,1,2,3",
         "CounterHTOff": "0,1,2,3",
         "Deprecated": "1",
         "EventCode": "0xB7, 0xBB",
-        "EventName": "OFFCORE_RESPONSE.ALL_RFO.L3_HIT.ANY_SNOOP",
+        "EventName": "OFFCORE_RESPONSE.DEMAND_DATA_RD.L3_HIT_M.NO_SNOOP_NEEDED",
         "MSRIndex": "0x1a6,0x1a7",
-        "MSRValue": "0x3F803C0122",
+        "MSRValue": "0x0100040001",
         "Offcore": "1",
         "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
         "SampleAfterValue": "100003",
         "UMask": "0x1"
     },
     {
-        "BriefDescription": "Retired store instructions that miss the STLB.",
+        "BriefDescription": "This event is deprecated. Refer to new event OCR.DEMAND_DATA_RD.L3_HIT_M.SNOOP_MISS",
         "Counter": "0,1,2,3",
         "CounterHTOff": "0,1,2,3",
-        "Data_LA": "1",
-        "EventCode": "0xD0",
-        "EventName": "MEM_INST_RETIRED.STLB_MISS_STORES",
-        "L1_Hit_Indication": "1",
-        "PEBS": "1",
+        "Deprecated": "1",
+        "EventCode": "0xB7, 0xBB",
+        "EventName": "OFFCORE_RESPONSE.DEMAND_DATA_RD.L3_HIT_M.SNOOP_MISS",
+        "MSRIndex": "0x1a6,0x1a7",
+        "MSRValue": "0x0200040001",
+        "Offcore": "1",
+        "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
         "SampleAfterValue": "100003",
-        "UMask": "0x12"
+        "UMask": "0x1"
     },
     {
-        "BriefDescription": "This event is deprecated. Refer to new event OCR.DEMAND_DATA_RD.PMM_HIT_LOCAL_PMM.SNOOP_NONE",
+        "BriefDescription": "This event is deprecated. Refer to new event OCR.DEMAND_DATA_RD.L3_HIT_M.SNOOP_NONE",
         "Counter": "0,1,2,3",
         "CounterHTOff": "0,1,2,3",
         "Deprecated": "1",
         "EventCode": "0xB7, 0xBB",
-        "EventName": "OFFCORE_RESPONSE.DEMAND_DATA_RD.PMM_HIT_LOCAL_PMM.SNOOP_NONE",
+        "EventName": "OFFCORE_RESPONSE.DEMAND_DATA_RD.L3_HIT_M.SNOOP_NONE",
         "MSRIndex": "0x1a6,0x1a7",
-        "MSRValue": "0x0080400001",
+        "MSRValue": "0x0080040001",
         "Offcore": "1",
         "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
         "SampleAfterValue": "100003",
         "UMask": "0x1"
     },
     {
-        "BriefDescription": "RFO requests to L2 cache",
-        "Counter": "0,1,2,3",
-        "CounterHTOff": "0,1,2,3,4,5,6,7",
-        "EventCode": "0x24",
-        "EventName": "L2_RQSTS.ALL_RFO",
-        "PublicDescription": "Counts the total number of RFO (read for ownership) requests to L2 cache. L2 RFO requests include both L1D demand RFO misses as well as L1D RFO prefetches.",
-        "SampleAfterValue": "200003",
-        "UMask": "0xe2"
-    },
-    {
-        "BriefDescription": "This event is deprecated. Refer to new event OCR.ALL_PF_RFO.PMM_HIT_LOCAL_PMM.SNOOP_NOT_NEEDED",
+        "BriefDescription": "This event is deprecated. Refer to new event OCR.DEMAND_DATA_RD.L3_HIT_S.ANY_SNOOP",
         "Counter": "0,1,2,3",
         "CounterHTOff": "0,1,2,3",
         "Deprecated": "1",
         "EventCode": "0xB7, 0xBB",
-        "EventName": "OFFCORE_RESPONSE.ALL_PF_RFO.PMM_HIT_LOCAL_PMM.SNOOP_NOT_NEEDED",
+        "EventName": "OFFCORE_RESPONSE.DEMAND_DATA_RD.L3_HIT_S.ANY_SNOOP",
         "MSRIndex": "0x1a6,0x1a7",
-        "MSRValue": "0x0100400120",
+        "MSRValue": "0x3F80100001",
         "Offcore": "1",
         "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
         "SampleAfterValue": "100003",
         "UMask": "0x1"
     },
     {
-        "BriefDescription": "This event is deprecated. Refer to new event OCR.ALL_READS.L3_HIT.SNOOP_NONE",
+        "BriefDescription": "This event is deprecated. Refer to new event OCR.DEMAND_DATA_RD.L3_HIT_S.HITM_OTHER_CORE",
         "Counter": "0,1,2,3",
         "CounterHTOff": "0,1,2,3",
         "Deprecated": "1",
         "EventCode": "0xB7, 0xBB",
-        "EventName": "OFFCORE_RESPONSE.ALL_READS.L3_HIT.SNOOP_NONE",
+        "EventName": "OFFCORE_RESPONSE.DEMAND_DATA_RD.L3_HIT_S.HITM_OTHER_CORE",
         "MSRIndex": "0x1a6,0x1a7",
-        "MSRValue": "0x00803C07F7",
+        "MSRValue": "0x1000100001",
         "Offcore": "1",
         "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
         "SampleAfterValue": "100003",
         "UMask": "0x1"
     },
     {
-        "BriefDescription": "This event is deprecated. Refer to new event OCR.DEMAND_CODE_RD.L3_HIT.HIT_OTHER_CORE_NO_FWD",
+        "BriefDescription": "This event is deprecated. Refer to new event OCR.DEMAND_DATA_RD.L3_HIT_S.HIT_OTHER_CORE_FWD",
         "Counter": "0,1,2,3",
         "CounterHTOff": "0,1,2,3",
         "Deprecated": "1",
         "EventCode": "0xB7, 0xBB",
-        "EventName": "OFFCORE_RESPONSE.DEMAND_CODE_RD.L3_HIT.HIT_OTHER_CORE_NO_FWD",
+        "EventName": "OFFCORE_RESPONSE.DEMAND_DATA_RD.L3_HIT_S.HIT_OTHER_CORE_FWD",
         "MSRIndex": "0x1a6,0x1a7",
-        "MSRValue": "0x04003C0004",
+        "MSRValue": "0x0800100001",
         "Offcore": "1",
         "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
         "SampleAfterValue": "100003",
         "UMask": "0x1"
     },
     {
-        "BriefDescription": "This event is deprecated. Refer to new event OCR.PF_L2_DATA_RD.L3_HIT_S.SNOOP_NONE",
+        "BriefDescription": "This event is deprecated. Refer to new event OCR.DEMAND_DATA_RD.L3_HIT_S.HIT_OTHER_CORE_NO_FWD",
         "Counter": "0,1,2,3",
         "CounterHTOff": "0,1,2,3",
         "Deprecated": "1",
         "EventCode": "0xB7, 0xBB",
-        "EventName": "OFFCORE_RESPONSE.PF_L2_DATA_RD.L3_HIT_S.SNOOP_NONE",
+        "EventName": "OFFCORE_RESPONSE.DEMAND_DATA_RD.L3_HIT_S.HIT_OTHER_CORE_NO_FWD",
         "MSRIndex": "0x1a6,0x1a7",
-        "MSRValue": "0x0080100010",
+        "MSRValue": "0x0400100001",
         "Offcore": "1",
         "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
         "SampleAfterValue": "100003",
         "UMask": "0x1"
     },
     {
-        "BriefDescription": "This event is deprecated. Refer to new event OCR.ALL_RFO.L3_HIT_E.NO_SNOOP_NEEDED",
+        "BriefDescription": "This event is deprecated. Refer to new event OCR.DEMAND_DATA_RD.L3_HIT_S.NO_SNOOP_NEEDED",
         "Counter": "0,1,2,3",
         "CounterHTOff": "0,1,2,3",
         "Deprecated": "1",
         "EventCode": "0xB7, 0xBB",
-        "EventName": "OFFCORE_RESPONSE.ALL_RFO.L3_HIT_E.NO_SNOOP_NEEDED",
+        "EventName": "OFFCORE_RESPONSE.DEMAND_DATA_RD.L3_HIT_S.NO_SNOOP_NEEDED",
         "MSRIndex": "0x1a6,0x1a7",
-        "MSRValue": "0x0100080122",
+        "MSRValue": "0x0100100001",
         "Offcore": "1",
         "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
         "SampleAfterValue": "100003",
         "UMask": "0x1"
     },
     {
-        "BriefDescription": "This event is deprecated. Refer to new event OCR.PF_L3_DATA_RD.L3_HIT_M.NO_SNOOP_NEEDED",
+        "BriefDescription": "This event is deprecated. Refer to new event OCR.DEMAND_DATA_RD.L3_HIT_S.SNOOP_MISS",
         "Counter": "0,1,2,3",
         "CounterHTOff": "0,1,2,3",
         "Deprecated": "1",
         "EventCode": "0xB7, 0xBB",
-        "EventName": "OFFCORE_RESPONSE.PF_L3_DATA_RD.L3_HIT_M.NO_SNOOP_NEEDED",
+        "EventName": "OFFCORE_RESPONSE.DEMAND_DATA_RD.L3_HIT_S.SNOOP_MISS",
         "MSRIndex": "0x1a6,0x1a7",
-        "MSRValue": "0x0100040080",
+        "MSRValue": "0x0200100001",
         "Offcore": "1",
         "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
         "SampleAfterValue": "100003",
         "UMask": "0x1"
     },
     {
-        "BriefDescription": "This event is deprecated. Refer to new event OCR.PF_L2_RFO.L3_HIT.ANY_SNOOP",
+        "BriefDescription": "This event is deprecated. Refer to new event OCR.DEMAND_DATA_RD.L3_HIT_S.SNOOP_NONE",
         "Counter": "0,1,2,3",
         "CounterHTOff": "0,1,2,3",
         "Deprecated": "1",
         "EventCode": "0xB7, 0xBB",
-        "EventName": "OFFCORE_RESPONSE.PF_L2_RFO.L3_HIT.ANY_SNOOP",
+        "EventName": "OFFCORE_RESPONSE.DEMAND_DATA_RD.L3_HIT_S.SNOOP_NONE",
         "MSRIndex": "0x1a6,0x1a7",
-        "MSRValue": "0x3F803C0020",
+        "MSRValue": "0x0080100001",
         "Offcore": "1",
         "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
         "SampleAfterValue": "100003",
         "UMask": "0x1"
     },
     {
-        "BriefDescription": "This event is deprecated. Refer to new event OCR.DEMAND_CODE_RD.L3_HIT.NO_SNOOP_NEEDED",
+        "BriefDescription": "This event is deprecated. Refer to new event OCR.DEMAND_DATA_RD.PMM_HIT_LOCAL_PMM.ANY_SNOOP",
         "Counter": "0,1,2,3",
         "CounterHTOff": "0,1,2,3",
         "Deprecated": "1",
         "EventCode": "0xB7, 0xBB",
-        "EventName": "OFFCORE_RESPONSE.DEMAND_CODE_RD.L3_HIT.NO_SNOOP_NEEDED",
+        "EventName": "OFFCORE_RESPONSE.DEMAND_DATA_RD.PMM_HIT_LOCAL_PMM.ANY_SNOOP",
         "MSRIndex": "0x1a6,0x1a7",
-        "MSRValue": "0x01003C0004",
+        "MSRValue": "0x3F80400001",
         "Offcore": "1",
         "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
         "SampleAfterValue": "100003",
         "UMask": "0x1"
     },
     {
-        "BriefDescription": "This event is deprecated. Refer to new event OCR.ALL_DATA_RD.L3_HIT_M.HITM_OTHER_CORE",
+        "BriefDescription": "This event is deprecated. Refer to new event OCR.DEMAND_DATA_RD.PMM_HIT_LOCAL_PMM.SNOOP_NONE",
         "Counter": "0,1,2,3",
         "CounterHTOff": "0,1,2,3",
         "Deprecated": "1",
         "EventCode": "0xB7, 0xBB",
-        "EventName": "OFFCORE_RESPONSE.ALL_DATA_RD.L3_HIT_M.HITM_OTHER_CORE",
+        "EventName": "OFFCORE_RESPONSE.DEMAND_DATA_RD.PMM_HIT_LOCAL_PMM.SNOOP_NONE",
         "MSRIndex": "0x1a6,0x1a7",
-        "MSRValue": "0x1000040491",
+        "MSRValue": "0x0080400001",
         "Offcore": "1",
         "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
         "SampleAfterValue": "100003",
         "UMask": "0x1"
     },
     {
-        "BriefDescription": "This event is deprecated. Refer to new event OCR.PF_L3_DATA_RD.L3_HIT.NO_SNOOP_NEEDED",
+        "BriefDescription": "This event is deprecated. Refer to new event OCR.DEMAND_DATA_RD.PMM_HIT_LOCAL_PMM.SNOOP_NOT_NEEDED",
         "Counter": "0,1,2,3",
         "CounterHTOff": "0,1,2,3",
         "Deprecated": "1",
         "EventCode": "0xB7, 0xBB",
-        "EventName": "OFFCORE_RESPONSE.PF_L3_DATA_RD.L3_HIT.NO_SNOOP_NEEDED",
+        "EventName": "OFFCORE_RESPONSE.DEMAND_DATA_RD.PMM_HIT_LOCAL_PMM.SNOOP_NOT_NEEDED",
         "MSRIndex": "0x1a6,0x1a7",
-        "MSRValue": "0x01003C0080",
+        "MSRValue": "0x0100400001",
         "Offcore": "1",
         "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
         "SampleAfterValue": "100003",
         "UMask": "0x1"
     },
     {
-        "BriefDescription": "This event is deprecated. Refer to new event OCR.ALL_PF_RFO.L3_HIT_S.HIT_OTHER_CORE_FWD",
+        "BriefDescription": "This event is deprecated. Refer to new event OCR.DEMAND_DATA_RD.SUPPLIER_NONE.ANY_SNOOP",
         "Counter": "0,1,2,3",
         "CounterHTOff": "0,1,2,3",
         "Deprecated": "1",
         "EventCode": "0xB7, 0xBB",
-        "EventName": "OFFCORE_RESPONSE.ALL_PF_RFO.L3_HIT_S.HIT_OTHER_CORE_FWD",
+        "EventName": "OFFCORE_RESPONSE.DEMAND_DATA_RD.SUPPLIER_NONE.ANY_SNOOP",
         "MSRIndex": "0x1a6,0x1a7",
-        "MSRValue": "0x0800100120",
+        "MSRValue": "0x3F80020001",
         "Offcore": "1",
         "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
         "SampleAfterValue": "100003",
         "UMask": "0x1"
     },
     {
-        "BriefDescription": "This event is deprecated. Refer to new event OCR.PF_L2_RFO.L3_HIT.HIT_OTHER_CORE_NO_FWD",
+        "BriefDescription": "This event is deprecated. Refer to new event OCR.DEMAND_DATA_RD.SUPPLIER_NONE.HITM_OTHER_CORE",
         "Counter": "0,1,2,3",
         "CounterHTOff": "0,1,2,3",
         "Deprecated": "1",
         "EventCode": "0xB7, 0xBB",
-        "EventName": "OFFCORE_RESPONSE.PF_L2_RFO.L3_HIT.HIT_OTHER_CORE_NO_FWD",
+        "EventName": "OFFCORE_RESPONSE.DEMAND_DATA_RD.SUPPLIER_NONE.HITM_OTHER_CORE",
         "MSRIndex": "0x1a6,0x1a7",
-        "MSRValue": "0x04003C0020",
+        "MSRValue": "0x1000020001",
         "Offcore": "1",
         "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
         "SampleAfterValue": "100003",
         "UMask": "0x1"
     },
     {
-        "BriefDescription": "This event is deprecated. Refer to new event OCR.ALL_PF_DATA_RD.L3_HIT_S.HIT_OTHER_CORE_FWD",
+        "BriefDescription": "This event is deprecated. Refer to new event OCR.DEMAND_DATA_RD.SUPPLIER_NONE.HIT_OTHER_CORE_FWD",
         "Counter": "0,1,2,3",
         "CounterHTOff": "0,1,2,3",
         "Deprecated": "1",
         "EventCode": "0xB7, 0xBB",
-        "EventName": "OFFCORE_RESPONSE.ALL_PF_DATA_RD.L3_HIT_S.HIT_OTHER_CORE_FWD",
+        "EventName": "OFFCORE_RESPONSE.DEMAND_DATA_RD.SUPPLIER_NONE.HIT_OTHER_CORE_FWD",
         "MSRIndex": "0x1a6,0x1a7",
-        "MSRValue": "0x0800100490",
+        "MSRValue": "0x0800020001",
         "Offcore": "1",
         "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
         "SampleAfterValue": "100003",
         "UMask": "0x1"
     },
     {
-        "BriefDescription": "This event is deprecated. Refer to new event OCR.ALL_READS.L3_HIT.NO_SNOOP_NEEDED",
+        "BriefDescription": "This event is deprecated. Refer to new event OCR.DEMAND_DATA_RD.SUPPLIER_NONE.HIT_OTHER_CORE_NO_FWD",
         "Counter": "0,1,2,3",
         "CounterHTOff": "0,1,2,3",
         "Deprecated": "1",
         "EventCode": "0xB7, 0xBB",
-        "EventName": "OFFCORE_RESPONSE.ALL_READS.L3_HIT.NO_SNOOP_NEEDED",
+        "EventName": "OFFCORE_RESPONSE.DEMAND_DATA_RD.SUPPLIER_NONE.HIT_OTHER_CORE_NO_FWD",
         "MSRIndex": "0x1a6,0x1a7",
-        "MSRValue": "0x01003C07F7",
+        "MSRValue": "0x0400020001",
         "Offcore": "1",
         "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
         "SampleAfterValue": "100003",
         "UMask": "0x1"
     },
     {
-        "BriefDescription": "This event is deprecated. Refer to new event OCR.PF_L2_RFO.L3_HIT_M.SNOOP_MISS",
+        "BriefDescription": "This event is deprecated. Refer to new event OCR.DEMAND_DATA_RD.SUPPLIER_NONE.NO_SNOOP_NEEDED",
         "Counter": "0,1,2,3",
         "CounterHTOff": "0,1,2,3",
         "Deprecated": "1",
         "EventCode": "0xB7, 0xBB",
-        "EventName": "OFFCORE_RESPONSE.PF_L2_RFO.L3_HIT_M.SNOOP_MISS",
+        "EventName": "OFFCORE_RESPONSE.DEMAND_DATA_RD.SUPPLIER_NONE.NO_SNOOP_NEEDED",
         "MSRIndex": "0x1a6,0x1a7",
-        "MSRValue": "0x0200040020",
+        "MSRValue": "0x0100020001",
         "Offcore": "1",
         "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
         "SampleAfterValue": "100003",
         "UMask": "0x1"
     },
     {
-        "BriefDescription": "This event is deprecated. Refer to new event OCR.ALL_READS.L3_HIT_S.NO_SNOOP_NEEDED",
+        "BriefDescription": "This event is deprecated. Refer to new event OCR.DEMAND_DATA_RD.SUPPLIER_NONE.SNOOP_MISS",
         "Counter": "0,1,2,3",
         "CounterHTOff": "0,1,2,3",
         "Deprecated": "1",
         "EventCode": "0xB7, 0xBB",
-        "EventName": "OFFCORE_RESPONSE.ALL_READS.L3_HIT_S.NO_SNOOP_NEEDED",
+        "EventName": "OFFCORE_RESPONSE.DEMAND_DATA_RD.SUPPLIER_NONE.SNOOP_MISS",
         "MSRIndex": "0x1a6,0x1a7",
-        "MSRValue": "0x01001007F7",
+        "MSRValue": "0x0200020001",
         "Offcore": "1",
         "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
         "SampleAfterValue": "100003",
         "UMask": "0x1"
     },
     {
-        "BriefDescription": "This event is deprecated. Refer to new event OCR.ALL_RFO.L3_HIT_M.ANY_SNOOP",
+        "BriefDescription": "This event is deprecated. Refer to new event OCR.DEMAND_DATA_RD.SUPPLIER_NONE.SNOOP_NONE",
         "Counter": "0,1,2,3",
         "CounterHTOff": "0,1,2,3",
         "Deprecated": "1",
         "EventCode": "0xB7, 0xBB",
-        "EventName": "OFFCORE_RESPONSE.ALL_RFO.L3_HIT_M.ANY_SNOOP",
+        "EventName": "OFFCORE_RESPONSE.DEMAND_DATA_RD.SUPPLIER_NONE.SNOOP_NONE",
         "MSRIndex": "0x1a6,0x1a7",
-        "MSRValue": "0x3F80040122",
+        "MSRValue": "0x0080020001",
         "Offcore": "1",
         "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
         "SampleAfterValue": "100003",
         "UMask": "0x1"
     },
     {
-        "BriefDescription": "This event is deprecated. Refer to new event OCR.OTHER.SUPPLIER_NONE.NO_SNOOP_NEEDED",
+        "BriefDescription": "This event is deprecated. Refer to new event OCR.DEMAND_RFO.ANY_RESPONSE",
         "Counter": "0,1,2,3",
         "CounterHTOff": "0,1,2,3",
         "Deprecated": "1",
         "EventCode": "0xB7, 0xBB",
-        "EventName": "OFFCORE_RESPONSE.OTHER.SUPPLIER_NONE.NO_SNOOP_NEEDED",
+        "EventName": "OFFCORE_RESPONSE.DEMAND_RFO.ANY_RESPONSE",
         "MSRIndex": "0x1a6,0x1a7",
-        "MSRValue": "0x0100028000",
+        "MSRValue": "0x0000010002",
         "Offcore": "1",
         "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
         "SampleAfterValue": "100003",
         "UMask": "0x1"
     },
     {
-        "BriefDescription": "This event is deprecated. Refer to new event OCR.ALL_READS.L3_HIT_M.SNOOP_MISS",
+        "BriefDescription": "This event is deprecated. Refer to new event OCR.DEMAND_RFO.L3_HIT.ANY_SNOOP",
         "Counter": "0,1,2,3",
         "CounterHTOff": "0,1,2,3",
         "Deprecated": "1",
         "EventCode": "0xB7, 0xBB",
-        "EventName": "OFFCORE_RESPONSE.ALL_READS.L3_HIT_M.SNOOP_MISS",
+        "EventName": "OFFCORE_RESPONSE.DEMAND_RFO.L3_HIT.ANY_SNOOP",
         "MSRIndex": "0x1a6,0x1a7",
-        "MSRValue": "0x02000407F7",
+        "MSRValue": "0x3F803C0002",
         "Offcore": "1",
         "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
         "SampleAfterValue": "100003",
         "UMask": "0x1"
     },
     {
-        "BriefDescription": "This event is deprecated. Refer to new event OCR.ALL_RFO.SUPPLIER_NONE.ANY_SNOOP",
+        "BriefDescription": "This event is deprecated. Refer to new event OCR.DEMAND_RFO.L3_HIT.HITM_OTHER_CORE",
         "Counter": "0,1,2,3",
         "CounterHTOff": "0,1,2,3",
         "Deprecated": "1",
         "EventCode": "0xB7, 0xBB",
-        "EventName": "OFFCORE_RESPONSE.ALL_RFO.SUPPLIER_NONE.ANY_SNOOP",
+        "EventName": "OFFCORE_RESPONSE.DEMAND_RFO.L3_HIT.HITM_OTHER_CORE",
         "MSRIndex": "0x1a6,0x1a7",
-        "MSRValue": "0x3F80020122",
+        "MSRValue": "0x10003C0002",
         "Offcore": "1",
         "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
         "SampleAfterValue": "100003",
         "UMask": "0x1"
     },
     {
-        "BriefDescription": "This event is deprecated. Refer to new event OCR.ALL_PF_DATA_RD.L3_HIT_E.HIT_OTHER_CORE_NO_FWD",
+        "BriefDescription": "This event is deprecated. Refer to new event OCR.DEMAND_RFO.L3_HIT.HIT_OTHER_CORE_FWD",
         "Counter": "0,1,2,3",
         "CounterHTOff": "0,1,2,3",
         "Deprecated": "1",
         "EventCode": "0xB7, 0xBB",
-        "EventName": "OFFCORE_RESPONSE.ALL_PF_DATA_RD.L3_HIT_E.HIT_OTHER_CORE_NO_FWD",
+        "EventName": "OFFCORE_RESPONSE.DEMAND_RFO.L3_HIT.HIT_OTHER_CORE_FWD",
         "MSRIndex": "0x1a6,0x1a7",
-        "MSRValue": "0x0400080490",
+        "MSRValue": "0x08003C0002",
         "Offcore": "1",
         "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
         "SampleAfterValue": "100003",
         "UMask": "0x1"
     },
     {
-        "BriefDescription": "This event is deprecated. Refer to new event OCR.PF_L1D_AND_SW.L3_HIT_E.HITM_OTHER_CORE",
+        "BriefDescription": "This event is deprecated. Refer to new event OCR.DEMAND_RFO.L3_HIT.HIT_OTHER_CORE_NO_FWD",
         "Counter": "0,1,2,3",
         "CounterHTOff": "0,1,2,3",
         "Deprecated": "1",
         "EventCode": "0xB7, 0xBB",
-        "EventName": "OFFCORE_RESPONSE.PF_L1D_AND_SW.L3_HIT_E.HITM_OTHER_CORE",
+        "EventName": "OFFCORE_RESPONSE.DEMAND_RFO.L3_HIT.HIT_OTHER_CORE_NO_FWD",
         "MSRIndex": "0x1a6,0x1a7",
-        "MSRValue": "0x1000080400",
+        "MSRValue": "0x04003C0002",
         "Offcore": "1",
         "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
         "SampleAfterValue": "100003",
         "UMask": "0x1"
     },
     {
-        "BriefDescription": "This event is deprecated. Refer to new event OCR.DEMAND_CODE_RD.L3_HIT_E.HITM_OTHER_CORE",
+        "BriefDescription": "This event is deprecated. Refer to new event OCR.DEMAND_RFO.L3_HIT.NO_SNOOP_NEEDED",
         "Counter": "0,1,2,3",
         "CounterHTOff": "0,1,2,3",
         "Deprecated": "1",
         "EventCode": "0xB7, 0xBB",
-        "EventName": "OFFCORE_RESPONSE.DEMAND_CODE_RD.L3_HIT_E.HITM_OTHER_CORE",
+        "EventName": "OFFCORE_RESPONSE.DEMAND_RFO.L3_HIT.NO_SNOOP_NEEDED",
         "MSRIndex": "0x1a6,0x1a7",
-        "MSRValue": "0x1000080004",
+        "MSRValue": "0x01003C0002",
         "Offcore": "1",
         "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
         "SampleAfterValue": "100003",
         "UMask": "0x1"
     },
     {
-        "BriefDescription": "This event is deprecated. Refer to new event OCR.DEMAND_DATA_RD.L3_HIT.SNOOP_HIT_WITH_FWD",
+        "BriefDescription": "This event is deprecated. Refer to new event OCR.DEMAND_RFO.L3_HIT.SNOOP_HIT_WITH_FWD",
         "Counter": "0,1,2,3",
         "CounterHTOff": "0,1,2,3",
         "Deprecated": "1",
         "EventCode": "0xB7, 0xBB",
-        "EventName": "OFFCORE_RESPONSE.DEMAND_DATA_RD.L3_HIT.SNOOP_HIT_WITH_FWD",
+        "EventName": "OFFCORE_RESPONSE.DEMAND_RFO.L3_HIT.SNOOP_HIT_WITH_FWD",
         "MSRIndex": "0x1a6,0x1a7",
-        "MSRValue": "0x08007C0001",
+        "MSRValue": "0x08007C0002",
         "Offcore": "1",
         "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
         "SampleAfterValue": "100003",
         "UMask": "0x1"
     },
     {
-        "BriefDescription": "This event is deprecated. Refer to new event OCR.DEMAND_CODE_RD.L3_HIT_S.NO_SNOOP_NEEDED",
+        "BriefDescription": "This event is deprecated. Refer to new event OCR.DEMAND_RFO.L3_HIT.SNOOP_MISS",
         "Counter": "0,1,2,3",
         "CounterHTOff": "0,1,2,3",
         "Deprecated": "1",
         "EventCode": "0xB7, 0xBB",
-        "EventName": "OFFCORE_RESPONSE.DEMAND_CODE_RD.L3_HIT_S.NO_SNOOP_NEEDED",
+        "EventName": "OFFCORE_RESPONSE.DEMAND_RFO.L3_HIT.SNOOP_MISS",
         "MSRIndex": "0x1a6,0x1a7",
-        "MSRValue": "0x0100100004",
+        "MSRValue": "0x02003C0002",
         "Offcore": "1",
         "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
         "SampleAfterValue": "100003",
         "UMask": "0x1"
     },
     {
-        "BriefDescription": "This event is deprecated. Refer to new event OCR.ALL_DATA_RD.PMM_HIT_LOCAL_PMM.SNOOP_NOT_NEEDED",
+        "BriefDescription": "This event is deprecated. Refer to new event OCR.DEMAND_RFO.L3_HIT.SNOOP_NONE",
         "Counter": "0,1,2,3",
         "CounterHTOff": "0,1,2,3",
         "Deprecated": "1",
         "EventCode": "0xB7, 0xBB",
-        "EventName": "OFFCORE_RESPONSE.ALL_DATA_RD.PMM_HIT_LOCAL_PMM.SNOOP_NOT_NEEDED",
+        "EventName": "OFFCORE_RESPONSE.DEMAND_RFO.L3_HIT.SNOOP_NONE",
         "MSRIndex": "0x1a6,0x1a7",
-        "MSRValue": "0x0100400491",
+        "MSRValue": "0x00803C0002",
         "Offcore": "1",
         "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
         "SampleAfterValue": "100003",
         "UMask": "0x1"
     },
     {
-        "BriefDescription": "This event is deprecated. Refer to new event OCR.DEMAND_CODE_RD.L3_HIT_E.NO_SNOOP_NEEDED",
+        "BriefDescription": "This event is deprecated. Refer to new event OCR.DEMAND_RFO.L3_HIT_E.ANY_SNOOP",
         "Counter": "0,1,2,3",
         "CounterHTOff": "0,1,2,3",
         "Deprecated": "1",
         "EventCode": "0xB7, 0xBB",
-        "EventName": "OFFCORE_RESPONSE.DEMAND_CODE_RD.L3_HIT_E.NO_SNOOP_NEEDED",
+        "EventName": "OFFCORE_RESPONSE.DEMAND_RFO.L3_HIT_E.ANY_SNOOP",
         "MSRIndex": "0x1a6,0x1a7",
-        "MSRValue": "0x0100080004",
+        "MSRValue": "0x3F80080002",
         "Offcore": "1",
         "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
         "SampleAfterValue": "100003",
         "UMask": "0x1"
     },
     {
-        "BriefDescription": "This event is deprecated. Refer to new event OCR.DEMAND_DATA_RD.SUPPLIER_NONE.HIT_OTHER_CORE_FWD",
+        "BriefDescription": "This event is deprecated. Refer to new event OCR.DEMAND_RFO.L3_HIT_E.HITM_OTHER_CORE",
         "Counter": "0,1,2,3",
         "CounterHTOff": "0,1,2,3",
         "Deprecated": "1",
         "EventCode": "0xB7, 0xBB",
-        "EventName": "OFFCORE_RESPONSE.DEMAND_DATA_RD.SUPPLIER_NONE.HIT_OTHER_CORE_FWD",
+        "EventName": "OFFCORE_RESPONSE.DEMAND_RFO.L3_HIT_E.HITM_OTHER_CORE",
         "MSRIndex": "0x1a6,0x1a7",
-        "MSRValue": "0x0800020001",
+        "MSRValue": "0x1000080002",
         "Offcore": "1",
         "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
         "SampleAfterValue": "100003",
         "UMask": "0x1"
     },
     {
-        "BriefDescription": "This event is deprecated. Refer to new event OCR.DEMAND_CODE_RD.L3_HIT_F.SNOOP_NONE",
+        "BriefDescription": "This event is deprecated. Refer to new event OCR.DEMAND_RFO.L3_HIT_E.HIT_OTHER_CORE_FWD",
         "Counter": "0,1,2,3",
         "CounterHTOff": "0,1,2,3",
         "Deprecated": "1",
         "EventCode": "0xB7, 0xBB",
-        "EventName": "OFFCORE_RESPONSE.DEMAND_CODE_RD.L3_HIT_F.SNOOP_NONE",
+        "EventName": "OFFCORE_RESPONSE.DEMAND_RFO.L3_HIT_E.HIT_OTHER_CORE_FWD",
         "MSRIndex": "0x1a6,0x1a7",
-        "MSRValue": "0x0080200004",
+        "MSRValue": "0x0800080002",
         "Offcore": "1",
         "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
         "SampleAfterValue": "100003",
         "UMask": "0x1"
     },
     {
-        "BriefDescription": "This event is deprecated. Refer to new event OCR.OTHER.L3_HIT_E.HIT_OTHER_CORE_NO_FWD",
+        "BriefDescription": "This event is deprecated. Refer to new event OCR.DEMAND_RFO.L3_HIT_E.HIT_OTHER_CORE_NO_FWD",
         "Counter": "0,1,2,3",
         "CounterHTOff": "0,1,2,3",
         "Deprecated": "1",
         "EventCode": "0xB7, 0xBB",
-        "EventName": "OFFCORE_RESPONSE.OTHER.L3_HIT_E.HIT_OTHER_CORE_NO_FWD",
+        "EventName": "OFFCORE_RESPONSE.DEMAND_RFO.L3_HIT_E.HIT_OTHER_CORE_NO_FWD",
         "MSRIndex": "0x1a6,0x1a7",
-        "MSRValue": "0x0400088000",
+        "MSRValue": "0x0400080002",
         "Offcore": "1",
         "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
         "SampleAfterValue": "100003",
         "UMask": "0x1"
     },
     {
-        "BriefDescription": "This event is deprecated. Refer to new event OCR.PF_L2_DATA_RD.L3_HIT_M.HIT_OTHER_CORE_NO_FWD",
+        "BriefDescription": "This event is deprecated. Refer to new event OCR.DEMAND_RFO.L3_HIT_E.NO_SNOOP_NEEDED",
         "Counter": "0,1,2,3",
         "CounterHTOff": "0,1,2,3",
         "Deprecated": "1",
         "EventCode": "0xB7, 0xBB",
-        "EventName": "OFFCORE_RESPONSE.PF_L2_DATA_RD.L3_HIT_M.HIT_OTHER_CORE_NO_FWD",
+        "EventName": "OFFCORE_RESPONSE.DEMAND_RFO.L3_HIT_E.NO_SNOOP_NEEDED",
         "MSRIndex": "0x1a6,0x1a7",
-        "MSRValue": "0x0400040010",
+        "MSRValue": "0x0100080002",
         "Offcore": "1",
         "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
         "SampleAfterValue": "100003",
         "UMask": "0x1"
     },
     {
-        "BriefDescription": "This event is deprecated. Refer to new event OCR.PF_L1D_AND_SW.L3_HIT_S.NO_SNOOP_NEEDED",
+        "BriefDescription": "This event is deprecated. Refer to new event OCR.DEMAND_RFO.L3_HIT_E.SNOOP_MISS",
         "Counter": "0,1,2,3",
         "CounterHTOff": "0,1,2,3",
         "Deprecated": "1",
         "EventCode": "0xB7, 0xBB",
-        "EventName": "OFFCORE_RESPONSE.PF_L1D_AND_SW.L3_HIT_S.NO_SNOOP_NEEDED",
+        "EventName": "OFFCORE_RESPONSE.DEMAND_RFO.L3_HIT_E.SNOOP_MISS",
         "MSRIndex": "0x1a6,0x1a7",
-        "MSRValue": "0x0100100400",
+        "MSRValue": "0x0200080002",
         "Offcore": "1",
         "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
         "SampleAfterValue": "100003",
         "UMask": "0x1"
     },
     {
-        "BriefDescription": "This event is deprecated. Refer to new event OCR.PF_L1D_AND_SW.SUPPLIER_NONE.NO_SNOOP_NEEDED",
+        "BriefDescription": "This event is deprecated. Refer to new event OCR.DEMAND_RFO.L3_HIT_E.SNOOP_NONE",
         "Counter": "0,1,2,3",
         "CounterHTOff": "0,1,2,3",
         "Deprecated": "1",
         "EventCode": "0xB7, 0xBB",
-        "EventName": "OFFCORE_RESPONSE.PF_L1D_AND_SW.SUPPLIER_NONE.NO_SNOOP_NEEDED",
+        "EventName": "OFFCORE_RESPONSE.DEMAND_RFO.L3_HIT_E.SNOOP_NONE",
         "MSRIndex": "0x1a6,0x1a7",
-        "MSRValue": "0x0100020400",
+        "MSRValue": "0x0080080002",
         "Offcore": "1",
         "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
         "SampleAfterValue": "100003",
         "UMask": "0x1"
     },
     {
-        "BriefDescription": "This event is deprecated. Refer to new event OCR.PF_L3_RFO.L3_HIT_M.HIT_OTHER_CORE_FWD",
+        "BriefDescription": "This event is deprecated. Refer to new event OCR.DEMAND_RFO.L3_HIT_F.ANY_SNOOP",
         "Counter": "0,1,2,3",
         "CounterHTOff": "0,1,2,3",
         "Deprecated": "1",
         "EventCode": "0xB7, 0xBB",
-        "EventName": "OFFCORE_RESPONSE.PF_L3_RFO.L3_HIT_M.HIT_OTHER_CORE_FWD",
+        "EventName": "OFFCORE_RESPONSE.DEMAND_RFO.L3_HIT_F.ANY_SNOOP",
         "MSRIndex": "0x1a6,0x1a7",
-        "MSRValue": "0x0800040100",
+        "MSRValue": "0x3F80200002",
         "Offcore": "1",
         "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
         "SampleAfterValue": "100003",
         "UMask": "0x1"
     },
     {
-        "BriefDescription": "This event is deprecated. Refer to new event OCR.OTHER.L3_HIT_E.NO_SNOOP_NEEDED",
+        "BriefDescription": "This event is deprecated. Refer to new event OCR.DEMAND_RFO.L3_HIT_F.HITM_OTHER_CORE",
         "Counter": "0,1,2,3",
         "CounterHTOff": "0,1,2,3",
         "Deprecated": "1",
         "EventCode": "0xB7, 0xBB",
-        "EventName": "OFFCORE_RESPONSE.OTHER.L3_HIT_E.NO_SNOOP_NEEDED",
+        "EventName": "OFFCORE_RESPONSE.DEMAND_RFO.L3_HIT_F.HITM_OTHER_CORE",
         "MSRIndex": "0x1a6,0x1a7",
-        "MSRValue": "0x0100088000",
+        "MSRValue": "0x1000200002",
         "Offcore": "1",
         "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
         "SampleAfterValue": "100003",
         "UMask": "0x1"
     },
     {
-        "BriefDescription": "This event is deprecated. Refer to new event OCR.ALL_DATA_RD.L3_HIT_S.HITM_OTHER_CORE",
+        "BriefDescription": "This event is deprecated. Refer to new event OCR.DEMAND_RFO.L3_HIT_F.HIT_OTHER_CORE_FWD",
         "Counter": "0,1,2,3",
         "CounterHTOff": "0,1,2,3",
         "Deprecated": "1",
         "EventCode": "0xB7, 0xBB",
-        "EventName": "OFFCORE_RESPONSE.ALL_DATA_RD.L3_HIT_S.HITM_OTHER_CORE",
+        "EventName": "OFFCORE_RESPONSE.DEMAND_RFO.L3_HIT_F.HIT_OTHER_CORE_FWD",
         "MSRIndex": "0x1a6,0x1a7",
-        "MSRValue": "0x1000100491",
+        "MSRValue": "0x0800200002",
         "Offcore": "1",
         "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
         "SampleAfterValue": "100003",
         "UMask": "0x1"
     },
     {
-        "BriefDescription": "This event is deprecated. Refer to new event OCR.PF_L3_DATA_RD.L3_HIT_F.HIT_OTHER_CORE_NO_FWD",
+        "BriefDescription": "This event is deprecated. Refer to new event OCR.DEMAND_RFO.L3_HIT_F.HIT_OTHER_CORE_NO_FWD",
         "Counter": "0,1,2,3",
         "CounterHTOff": "0,1,2,3",
         "Deprecated": "1",
         "EventCode": "0xB7, 0xBB",
-        "EventName": "OFFCORE_RESPONSE.PF_L3_DATA_RD.L3_HIT_F.HIT_OTHER_CORE_NO_FWD",
+        "EventName": "OFFCORE_RESPONSE.DEMAND_RFO.L3_HIT_F.HIT_OTHER_CORE_NO_FWD",
         "MSRIndex": "0x1a6,0x1a7",
-        "MSRValue": "0x0400200080",
+        "MSRValue": "0x0400200002",
         "Offcore": "1",
         "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
         "SampleAfterValue": "100003",
         "UMask": "0x1"
     },
     {
-        "BriefDescription": "This event is deprecated. Refer to new event OCR.PF_L2_RFO.SUPPLIER_NONE.SNOOP_MISS",
+        "BriefDescription": "This event is deprecated. Refer to new event OCR.DEMAND_RFO.L3_HIT_F.NO_SNOOP_NEEDED",
         "Counter": "0,1,2,3",
         "CounterHTOff": "0,1,2,3",
         "Deprecated": "1",
         "EventCode": "0xB7, 0xBB",
-        "EventName": "OFFCORE_RESPONSE.PF_L2_RFO.SUPPLIER_NONE.SNOOP_MISS",
+        "EventName": "OFFCORE_RESPONSE.DEMAND_RFO.L3_HIT_F.NO_SNOOP_NEEDED",
         "MSRIndex": "0x1a6,0x1a7",
-        "MSRValue": "0x0200020020",
+        "MSRValue": "0x0100200002",
         "Offcore": "1",
         "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
         "SampleAfterValue": "100003",
         "UMask": "0x1"
     },
     {
-        "BriefDescription": "Requests from the L1/L2/L3 hardware prefetchers or Load software prefetches",
-        "Counter": "0,1,2,3",
-        "CounterHTOff": "0,1,2,3,4,5,6,7",
-        "EventCode": "0x24",
-        "EventName": "L2_RQSTS.ALL_PF",
-        "PublicDescription": "Counts the total number of requests from the L2 hardware prefetchers.",
-        "SampleAfterValue": "200003",
-        "UMask": "0xf8"
-    },
-    {
-        "BriefDescription": "This event is deprecated. Refer to new event OCR.ALL_RFO.L3_HIT_F.NO_SNOOP_NEEDED",
+        "BriefDescription": "This event is deprecated. Refer to new event OCR.DEMAND_RFO.L3_HIT_F.SNOOP_MISS",
         "Counter": "0,1,2,3",
         "CounterHTOff": "0,1,2,3",
         "Deprecated": "1",
         "EventCode": "0xB7, 0xBB",
-        "EventName": "OFFCORE_RESPONSE.ALL_RFO.L3_HIT_F.NO_SNOOP_NEEDED",
+        "EventName": "OFFCORE_RESPONSE.DEMAND_RFO.L3_HIT_F.SNOOP_MISS",
         "MSRIndex": "0x1a6,0x1a7",
-        "MSRValue": "0x0100200122",
+        "MSRValue": "0x0200200002",
         "Offcore": "1",
         "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
         "SampleAfterValue": "100003",
         "UMask": "0x1"
     },
     {
-        "BriefDescription": "This event is deprecated. Refer to new event OCR.DEMAND_CODE_RD.L3_HIT_S.SNOOP_MISS",
+        "BriefDescription": "This event is deprecated. Refer to new event OCR.DEMAND_RFO.L3_HIT_F.SNOOP_NONE",
         "Counter": "0,1,2,3",
         "CounterHTOff": "0,1,2,3",
         "Deprecated": "1",
         "EventCode": "0xB7, 0xBB",
-        "EventName": "OFFCORE_RESPONSE.DEMAND_CODE_RD.L3_HIT_S.SNOOP_MISS",
+        "EventName": "OFFCORE_RESPONSE.DEMAND_RFO.L3_HIT_F.SNOOP_NONE",
         "MSRIndex": "0x1a6,0x1a7",
-        "MSRValue": "0x0200100004",
+        "MSRValue": "0x0080200002",
         "Offcore": "1",
         "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
         "SampleAfterValue": "100003",
         "UMask": "0x1"
     },
     {
-        "BriefDescription": "This event is deprecated. Refer to new event OCR.DEMAND_DATA_RD.L3_HIT_E.HIT_OTHER_CORE_NO_FWD",
+        "BriefDescription": "This event is deprecated. Refer to new event OCR.DEMAND_RFO.L3_HIT_M.ANY_SNOOP",
         "Counter": "0,1,2,3",
         "CounterHTOff": "0,1,2,3",
         "Deprecated": "1",
         "EventCode": "0xB7, 0xBB",
-        "EventName": "OFFCORE_RESPONSE.DEMAND_DATA_RD.L3_HIT_E.HIT_OTHER_CORE_NO_FWD",
+        "EventName": "OFFCORE_RESPONSE.DEMAND_RFO.L3_HIT_M.ANY_SNOOP",
         "MSRIndex": "0x1a6,0x1a7",
-        "MSRValue": "0x0400080001",
+        "MSRValue": "0x3F80040002",
         "Offcore": "1",
         "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
         "SampleAfterValue": "100003",
         "UMask": "0x1"
     },
     {
-        "BriefDescription": "This event is deprecated. Refer to new event OCR.PF_L1D_AND_SW.L3_HIT_F.NO_SNOOP_NEEDED",
+        "BriefDescription": "This event is deprecated. Refer to new event OCR.DEMAND_RFO.L3_HIT_M.HITM_OTHER_CORE",
         "Counter": "0,1,2,3",
         "CounterHTOff": "0,1,2,3",
         "Deprecated": "1",
         "EventCode": "0xB7, 0xBB",
-        "EventName": "OFFCORE_RESPONSE.PF_L1D_AND_SW.L3_HIT_F.NO_SNOOP_NEEDED",
+        "EventName": "OFFCORE_RESPONSE.DEMAND_RFO.L3_HIT_M.HITM_OTHER_CORE",
         "MSRIndex": "0x1a6,0x1a7",
-        "MSRValue": "0x0100200400",
+        "MSRValue": "0x1000040002",
         "Offcore": "1",
         "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
         "SampleAfterValue": "100003",
         "UMask": "0x1"
     },
     {
-        "BriefDescription": "This event is deprecated. Refer to new event OCR.ALL_DATA_RD.L3_HIT_E.NO_SNOOP_NEEDED",
+        "BriefDescription": "This event is deprecated. Refer to new event OCR.DEMAND_RFO.L3_HIT_M.HIT_OTHER_CORE_FWD",
         "Counter": "0,1,2,3",
         "CounterHTOff": "0,1,2,3",
         "Deprecated": "1",
         "EventCode": "0xB7, 0xBB",
-        "EventName": "OFFCORE_RESPONSE.ALL_DATA_RD.L3_HIT_E.NO_SNOOP_NEEDED",
+        "EventName": "OFFCORE_RESPONSE.DEMAND_RFO.L3_HIT_M.HIT_OTHER_CORE_FWD",
         "MSRIndex": "0x1a6,0x1a7",
-        "MSRValue": "0x0100080491",
+        "MSRValue": "0x0800040002",
         "Offcore": "1",
         "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
         "SampleAfterValue": "100003",
         "UMask": "0x1"
     },
     {
-        "BriefDescription": "This event is deprecated. Refer to new event OCR.ALL_PF_DATA_RD.L3_HIT_E.HITM_OTHER_CORE",
+        "BriefDescription": "This event is deprecated. Refer to new event OCR.DEMAND_RFO.L3_HIT_M.HIT_OTHER_CORE_NO_FWD",
         "Counter": "0,1,2,3",
         "CounterHTOff": "0,1,2,3",
         "Deprecated": "1",
         "EventCode": "0xB7, 0xBB",
-        "EventName": "OFFCORE_RESPONSE.ALL_PF_DATA_RD.L3_HIT_E.HITM_OTHER_CORE",
+        "EventName": "OFFCORE_RESPONSE.DEMAND_RFO.L3_HIT_M.HIT_OTHER_CORE_NO_FWD",
         "MSRIndex": "0x1a6,0x1a7",
-        "MSRValue": "0x1000080490",
+        "MSRValue": "0x0400040002",
         "Offcore": "1",
         "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
         "SampleAfterValue": "100003",
@@ -5495,4482 +5750,4227 @@
         "UMask": "0x1"
     },
     {
-        "BriefDescription": "This event is deprecated. Refer to new event OCR.PF_L3_DATA_RD.L3_HIT_E.ANY_SNOOP",
+        "BriefDescription": "This event is deprecated. Refer to new event OCR.DEMAND_RFO.L3_HIT_M.SNOOP_MISS",
         "Counter": "0,1,2,3",
         "CounterHTOff": "0,1,2,3",
         "Deprecated": "1",
         "EventCode": "0xB7, 0xBB",
-        "EventName": "OFFCORE_RESPONSE.PF_L3_DATA_RD.L3_HIT_E.ANY_SNOOP",
+        "EventName": "OFFCORE_RESPONSE.DEMAND_RFO.L3_HIT_M.SNOOP_MISS",
         "MSRIndex": "0x1a6,0x1a7",
-        "MSRValue": "0x3F80080080",
+        "MSRValue": "0x0200040002",
         "Offcore": "1",
         "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
         "SampleAfterValue": "100003",
         "UMask": "0x1"
     },
     {
-        "BriefDescription": "This event is deprecated. Refer to new event OCR.PF_L1D_AND_SW.L3_HIT_F.SNOOP_MISS",
+        "BriefDescription": "This event is deprecated. Refer to new event OCR.DEMAND_RFO.L3_HIT_M.SNOOP_NONE",
         "Counter": "0,1,2,3",
         "CounterHTOff": "0,1,2,3",
         "Deprecated": "1",
         "EventCode": "0xB7, 0xBB",
-        "EventName": "OFFCORE_RESPONSE.PF_L1D_AND_SW.L3_HIT_F.SNOOP_MISS",
+        "EventName": "OFFCORE_RESPONSE.DEMAND_RFO.L3_HIT_M.SNOOP_NONE",
         "MSRIndex": "0x1a6,0x1a7",
-        "MSRValue": "0x0200200400",
+        "MSRValue": "0x0080040002",
         "Offcore": "1",
         "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
         "SampleAfterValue": "100003",
         "UMask": "0x1"
     },
     {
-        "BriefDescription": "L2 code requests",
-        "Counter": "0,1,2,3",
-        "CounterHTOff": "0,1,2,3,4,5,6,7",
-        "EventCode": "0x24",
-        "EventName": "L2_RQSTS.ALL_CODE_RD",
-        "PublicDescription": "Counts the total number of L2 code requests.",
-        "SampleAfterValue": "200003",
-        "UMask": "0xe4"
-    },
-    {
-        "BriefDescription": "This event is deprecated. Refer to new event OCR.PF_L2_DATA_RD.L3_HIT_S.HIT_OTHER_CORE_FWD",
+        "BriefDescription": "This event is deprecated. Refer to new event OCR.DEMAND_RFO.L3_HIT_S.ANY_SNOOP",
         "Counter": "0,1,2,3",
         "CounterHTOff": "0,1,2,3",
         "Deprecated": "1",
         "EventCode": "0xB7, 0xBB",
-        "EventName": "OFFCORE_RESPONSE.PF_L2_DATA_RD.L3_HIT_S.HIT_OTHER_CORE_FWD",
+        "EventName": "OFFCORE_RESPONSE.DEMAND_RFO.L3_HIT_S.ANY_SNOOP",
         "MSRIndex": "0x1a6,0x1a7",
-        "MSRValue": "0x0800100010",
+        "MSRValue": "0x3F80100002",
         "Offcore": "1",
         "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
         "SampleAfterValue": "100003",
         "UMask": "0x1"
     },
     {
-        "BriefDescription": "This event is deprecated. Refer to new event OCR.DEMAND_DATA_RD.SUPPLIER_NONE.ANY_SNOOP",
+        "BriefDescription": "This event is deprecated. Refer to new event OCR.DEMAND_RFO.L3_HIT_S.HITM_OTHER_CORE",
         "Counter": "0,1,2,3",
         "CounterHTOff": "0,1,2,3",
         "Deprecated": "1",
         "EventCode": "0xB7, 0xBB",
-        "EventName": "OFFCORE_RESPONSE.DEMAND_DATA_RD.SUPPLIER_NONE.ANY_SNOOP",
+        "EventName": "OFFCORE_RESPONSE.DEMAND_RFO.L3_HIT_S.HITM_OTHER_CORE",
         "MSRIndex": "0x1a6,0x1a7",
-        "MSRValue": "0x3F80020001",
+        "MSRValue": "0x1000100002",
         "Offcore": "1",
         "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
         "SampleAfterValue": "100003",
         "UMask": "0x1"
     },
     {
-        "BriefDescription": "This event is deprecated. Refer to new event OCR.PF_L3_RFO.L3_HIT_E.HIT_OTHER_CORE_FWD",
+        "BriefDescription": "This event is deprecated. Refer to new event OCR.DEMAND_RFO.L3_HIT_S.HIT_OTHER_CORE_FWD",
         "Counter": "0,1,2,3",
         "CounterHTOff": "0,1,2,3",
         "Deprecated": "1",
         "EventCode": "0xB7, 0xBB",
-        "EventName": "OFFCORE_RESPONSE.PF_L3_RFO.L3_HIT_E.HIT_OTHER_CORE_FWD",
+        "EventName": "OFFCORE_RESPONSE.DEMAND_RFO.L3_HIT_S.HIT_OTHER_CORE_FWD",
         "MSRIndex": "0x1a6,0x1a7",
-        "MSRValue": "0x0800080100",
+        "MSRValue": "0x0800100002",
         "Offcore": "1",
         "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
         "SampleAfterValue": "100003",
         "UMask": "0x1"
     },
     {
-        "BriefDescription": "This event is deprecated. Refer to new event OCR.ALL_DATA_RD.L3_HIT.NO_SNOOP_NEEDED",
+        "BriefDescription": "This event is deprecated. Refer to new event OCR.DEMAND_RFO.L3_HIT_S.HIT_OTHER_CORE_NO_FWD",
         "Counter": "0,1,2,3",
         "CounterHTOff": "0,1,2,3",
         "Deprecated": "1",
         "EventCode": "0xB7, 0xBB",
-        "EventName": "OFFCORE_RESPONSE.ALL_DATA_RD.L3_HIT.NO_SNOOP_NEEDED",
+        "EventName": "OFFCORE_RESPONSE.DEMAND_RFO.L3_HIT_S.HIT_OTHER_CORE_NO_FWD",
         "MSRIndex": "0x1a6,0x1a7",
-        "MSRValue": "0x01003C0491",
+        "MSRValue": "0x0400100002",
         "Offcore": "1",
         "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
         "SampleAfterValue": "100003",
         "UMask": "0x1"
     },
     {
-        "BriefDescription": "This event is deprecated. Refer to new event OCR.DEMAND_RFO.L3_HIT_M.HIT_OTHER_CORE_NO_FWD",
+        "BriefDescription": "This event is deprecated. Refer to new event OCR.DEMAND_RFO.L3_HIT_S.NO_SNOOP_NEEDED",
         "Counter": "0,1,2,3",
         "CounterHTOff": "0,1,2,3",
         "Deprecated": "1",
         "EventCode": "0xB7, 0xBB",
-        "EventName": "OFFCORE_RESPONSE.DEMAND_RFO.L3_HIT_M.HIT_OTHER_CORE_NO_FWD",
+        "EventName": "OFFCORE_RESPONSE.DEMAND_RFO.L3_HIT_S.NO_SNOOP_NEEDED",
         "MSRIndex": "0x1a6,0x1a7",
-        "MSRValue": "0x0400040002",
+        "MSRValue": "0x0100100002",
         "Offcore": "1",
         "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
         "SampleAfterValue": "100003",
         "UMask": "0x1"
     },
     {
-        "BriefDescription": "Demand requests to L2 cache",
-        "Counter": "0,1,2,3",
-        "CounterHTOff": "0,1,2,3,4,5,6,7",
-        "EventCode": "0x24",
-        "EventName": "L2_RQSTS.ALL_DEMAND_REFERENCES",
-        "PublicDescription": "Demand requests to L2 cache.",
-        "SampleAfterValue": "200003",
-        "UMask": "0xe7"
-    },
-    {
-        "BriefDescription": "Retired load instructions which data sources missed L3 but serviced from remote dram",
-        "Counter": "0,1,2,3",
-        "CounterHTOff": "0,1,2,3",
-        "Data_LA": "1",
-        "EventCode": "0xD3",
-        "EventName": "MEM_LOAD_L3_MISS_RETIRED.REMOTE_DRAM",
-        "PEBS": "1",
-        "SampleAfterValue": "100007",
-        "UMask": "0x2"
-    },
-    {
-        "BriefDescription": "This event is deprecated. Refer to new event OCR.ALL_READS.SUPPLIER_NONE.ANY_SNOOP",
+        "BriefDescription": "This event is deprecated. Refer to new event OCR.DEMAND_RFO.L3_HIT_S.SNOOP_MISS",
         "Counter": "0,1,2,3",
         "CounterHTOff": "0,1,2,3",
         "Deprecated": "1",
         "EventCode": "0xB7, 0xBB",
-        "EventName": "OFFCORE_RESPONSE.ALL_READS.SUPPLIER_NONE.ANY_SNOOP",
+        "EventName": "OFFCORE_RESPONSE.DEMAND_RFO.L3_HIT_S.SNOOP_MISS",
         "MSRIndex": "0x1a6,0x1a7",
-        "MSRValue": "0x3F800207F7",
+        "MSRValue": "0x0200100002",
         "Offcore": "1",
         "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
         "SampleAfterValue": "100003",
         "UMask": "0x1"
     },
     {
-        "BriefDescription": "This event is deprecated. Refer to new event OCR.ALL_READS.L3_HIT_M.NO_SNOOP_NEEDED",
+        "BriefDescription": "This event is deprecated. Refer to new event OCR.DEMAND_RFO.L3_HIT_S.SNOOP_NONE",
         "Counter": "0,1,2,3",
         "CounterHTOff": "0,1,2,3",
         "Deprecated": "1",
         "EventCode": "0xB7, 0xBB",
-        "EventName": "OFFCORE_RESPONSE.ALL_READS.L3_HIT_M.NO_SNOOP_NEEDED",
+        "EventName": "OFFCORE_RESPONSE.DEMAND_RFO.L3_HIT_S.SNOOP_NONE",
         "MSRIndex": "0x1a6,0x1a7",
-        "MSRValue": "0x01000407F7",
+        "MSRValue": "0x0080100002",
         "Offcore": "1",
         "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
         "SampleAfterValue": "100003",
         "UMask": "0x1"
     },
     {
-        "BriefDescription": "This event is deprecated. Refer to new event OCR.ALL_PF_DATA_RD.L3_HIT_E.HIT_OTHER_CORE_FWD",
+        "BriefDescription": "This event is deprecated. Refer to new event OCR.DEMAND_RFO.PMM_HIT_LOCAL_PMM.ANY_SNOOP",
         "Counter": "0,1,2,3",
         "CounterHTOff": "0,1,2,3",
         "Deprecated": "1",
         "EventCode": "0xB7, 0xBB",
-        "EventName": "OFFCORE_RESPONSE.ALL_PF_DATA_RD.L3_HIT_E.HIT_OTHER_CORE_FWD",
+        "EventName": "OFFCORE_RESPONSE.DEMAND_RFO.PMM_HIT_LOCAL_PMM.ANY_SNOOP",
         "MSRIndex": "0x1a6,0x1a7",
-        "MSRValue": "0x0800080490",
+        "MSRValue": "0x3F80400002",
         "Offcore": "1",
         "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
         "SampleAfterValue": "100003",
         "UMask": "0x1"
     },
     {
-        "BriefDescription": "This event is deprecated. Refer to new event OCR.PF_L3_RFO.L3_HIT_S.SNOOP_MISS",
+        "BriefDescription": "This event is deprecated. Refer to new event OCR.DEMAND_RFO.PMM_HIT_LOCAL_PMM.SNOOP_NONE",
         "Counter": "0,1,2,3",
         "CounterHTOff": "0,1,2,3",
         "Deprecated": "1",
         "EventCode": "0xB7, 0xBB",
-        "EventName": "OFFCORE_RESPONSE.PF_L3_RFO.L3_HIT_S.SNOOP_MISS",
+        "EventName": "OFFCORE_RESPONSE.DEMAND_RFO.PMM_HIT_LOCAL_PMM.SNOOP_NONE",
         "MSRIndex": "0x1a6,0x1a7",
-        "MSRValue": "0x0200100100",
+        "MSRValue": "0x0080400002",
         "Offcore": "1",
         "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
         "SampleAfterValue": "100003",
         "UMask": "0x1"
     },
     {
-        "BriefDescription": "L2 cache misses when fetching instructions",
-        "Counter": "0,1,2,3",
-        "CounterHTOff": "0,1,2,3,4,5,6,7",
-        "EventCode": "0x24",
-        "EventName": "L2_RQSTS.CODE_RD_MISS",
-        "PublicDescription": "Counts L2 cache misses when fetching instructions.",
-        "SampleAfterValue": "200003",
-        "UMask": "0x24"
-    },
-    {
-        "BriefDescription": "This event is deprecated. Refer to new event OCR.PF_L3_DATA_RD.L3_HIT_S.SNOOP_MISS",
+        "BriefDescription": "This event is deprecated. Refer to new event OCR.DEMAND_RFO.PMM_HIT_LOCAL_PMM.SNOOP_NOT_NEEDED",
         "Counter": "0,1,2,3",
         "CounterHTOff": "0,1,2,3",
         "Deprecated": "1",
         "EventCode": "0xB7, 0xBB",
-        "EventName": "OFFCORE_RESPONSE.PF_L3_DATA_RD.L3_HIT_S.SNOOP_MISS",
+        "EventName": "OFFCORE_RESPONSE.DEMAND_RFO.PMM_HIT_LOCAL_PMM.SNOOP_NOT_NEEDED",
         "MSRIndex": "0x1a6,0x1a7",
-        "MSRValue": "0x0200100080",
+        "MSRValue": "0x0100400002",
         "Offcore": "1",
         "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
         "SampleAfterValue": "100003",
         "UMask": "0x1"
     },
     {
-        "BriefDescription": "This event is deprecated. Refer to new event OCR.ALL_PF_RFO.L3_HIT_E.HITM_OTHER_CORE",
+        "BriefDescription": "This event is deprecated. Refer to new event OCR.DEMAND_RFO.SUPPLIER_NONE.ANY_SNOOP",
         "Counter": "0,1,2,3",
         "CounterHTOff": "0,1,2,3",
         "Deprecated": "1",
         "EventCode": "0xB7, 0xBB",
-        "EventName": "OFFCORE_RESPONSE.ALL_PF_RFO.L3_HIT_E.HITM_OTHER_CORE",
+        "EventName": "OFFCORE_RESPONSE.DEMAND_RFO.SUPPLIER_NONE.ANY_SNOOP",
         "MSRIndex": "0x1a6,0x1a7",
-        "MSRValue": "0x1000080120",
+        "MSRValue": "0x3F80020002",
         "Offcore": "1",
         "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
         "SampleAfterValue": "100003",
         "UMask": "0x1"
     },
     {
-        "BriefDescription": "This event is deprecated. Refer to new event OCR.PF_L3_RFO.L3_HIT_M.SNOOP_NONE",
+        "BriefDescription": "This event is deprecated. Refer to new event OCR.DEMAND_RFO.SUPPLIER_NONE.HITM_OTHER_CORE",
         "Counter": "0,1,2,3",
         "CounterHTOff": "0,1,2,3",
         "Deprecated": "1",
         "EventCode": "0xB7, 0xBB",
-        "EventName": "OFFCORE_RESPONSE.PF_L3_RFO.L3_HIT_M.SNOOP_NONE",
+        "EventName": "OFFCORE_RESPONSE.DEMAND_RFO.SUPPLIER_NONE.HITM_OTHER_CORE",
         "MSRIndex": "0x1a6,0x1a7",
-        "MSRValue": "0x0080040100",
+        "MSRValue": "0x1000020002",
         "Offcore": "1",
         "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
         "SampleAfterValue": "100003",
         "UMask": "0x1"
     },
     {
-        "BriefDescription": "This event is deprecated. Refer to new event OCR.ALL_DATA_RD.L3_HIT_E.HIT_OTHER_CORE_FWD",
+        "BriefDescription": "This event is deprecated. Refer to new event OCR.DEMAND_RFO.SUPPLIER_NONE.HIT_OTHER_CORE_FWD",
         "Counter": "0,1,2,3",
         "CounterHTOff": "0,1,2,3",
         "Deprecated": "1",
         "EventCode": "0xB7, 0xBB",
-        "EventName": "OFFCORE_RESPONSE.ALL_DATA_RD.L3_HIT_E.HIT_OTHER_CORE_FWD",
+        "EventName": "OFFCORE_RESPONSE.DEMAND_RFO.SUPPLIER_NONE.HIT_OTHER_CORE_FWD",
         "MSRIndex": "0x1a6,0x1a7",
-        "MSRValue": "0x0800080491",
+        "MSRValue": "0x0800020002",
         "Offcore": "1",
         "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
         "SampleAfterValue": "100003",
         "UMask": "0x1"
     },
     {
-        "BriefDescription": "Offcore outstanding Demand Data Read transactions in uncore queue.",
-        "Counter": "0,1,2,3",
-        "CounterHTOff": "0,1,2,3,4,5,6,7",
-        "EventCode": "0x60",
-        "EventName": "OFFCORE_REQUESTS_OUTSTANDING.DEMAND_DATA_RD",
-        "PublicDescription": "Counts the number of offcore outstanding Demand Data Read transactions in the super queue (SQ) every cycle. A transaction is considered to be in the Offcore outstanding state between L2 miss and transaction completion sent to requestor. See the corresponding Umask under OFFCORE_REQUESTS.Note: A prefetch promoted to Demand is counted from the promotion point.",
-        "SampleAfterValue": "2000003",
-        "UMask": "0x1"
-    },
-    {
-        "BriefDescription": "This event is deprecated. Refer to new event OCR.ALL_RFO.L3_HIT.HIT_OTHER_CORE_FWD",
+        "BriefDescription": "This event is deprecated. Refer to new event OCR.DEMAND_RFO.SUPPLIER_NONE.HIT_OTHER_CORE_NO_FWD",
         "Counter": "0,1,2,3",
         "CounterHTOff": "0,1,2,3",
         "Deprecated": "1",
         "EventCode": "0xB7, 0xBB",
-        "EventName": "OFFCORE_RESPONSE.ALL_RFO.L3_HIT.HIT_OTHER_CORE_FWD",
+        "EventName": "OFFCORE_RESPONSE.DEMAND_RFO.SUPPLIER_NONE.HIT_OTHER_CORE_NO_FWD",
         "MSRIndex": "0x1a6,0x1a7",
-        "MSRValue": "0x08003C0122",
+        "MSRValue": "0x0400020002",
         "Offcore": "1",
         "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
         "SampleAfterValue": "100003",
         "UMask": "0x1"
     },
     {
-        "BriefDescription": "This event is deprecated. Refer to new event OCR.DEMAND_CODE_RD.L3_HIT_F.HIT_OTHER_CORE_NO_FWD",
+        "BriefDescription": "This event is deprecated. Refer to new event OCR.DEMAND_RFO.SUPPLIER_NONE.NO_SNOOP_NEEDED",
         "Counter": "0,1,2,3",
         "CounterHTOff": "0,1,2,3",
         "Deprecated": "1",
         "EventCode": "0xB7, 0xBB",
-        "EventName": "OFFCORE_RESPONSE.DEMAND_CODE_RD.L3_HIT_F.HIT_OTHER_CORE_NO_FWD",
+        "EventName": "OFFCORE_RESPONSE.DEMAND_RFO.SUPPLIER_NONE.NO_SNOOP_NEEDED",
         "MSRIndex": "0x1a6,0x1a7",
-        "MSRValue": "0x0400200004",
+        "MSRValue": "0x0100020002",
         "Offcore": "1",
         "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
         "SampleAfterValue": "100003",
         "UMask": "0x1"
     },
     {
-        "BriefDescription": "This event is deprecated. Refer to new event OCR.ALL_READS.L3_HIT_M.HIT_OTHER_CORE_NO_FWD",
+        "BriefDescription": "This event is deprecated. Refer to new event OCR.DEMAND_RFO.SUPPLIER_NONE.SNOOP_MISS",
         "Counter": "0,1,2,3",
         "CounterHTOff": "0,1,2,3",
         "Deprecated": "1",
         "EventCode": "0xB7, 0xBB",
-        "EventName": "OFFCORE_RESPONSE.ALL_READS.L3_HIT_M.HIT_OTHER_CORE_NO_FWD",
+        "EventName": "OFFCORE_RESPONSE.DEMAND_RFO.SUPPLIER_NONE.SNOOP_MISS",
         "MSRIndex": "0x1a6,0x1a7",
-        "MSRValue": "0x04000407F7",
+        "MSRValue": "0x0200020002",
         "Offcore": "1",
         "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
         "SampleAfterValue": "100003",
         "UMask": "0x1"
     },
     {
-        "BriefDescription": "This event is deprecated. Refer to new event OCR.PF_L2_RFO.L3_HIT_S.HIT_OTHER_CORE_NO_FWD",
+        "BriefDescription": "This event is deprecated. Refer to new event OCR.DEMAND_RFO.SUPPLIER_NONE.SNOOP_NONE",
         "Counter": "0,1,2,3",
         "CounterHTOff": "0,1,2,3",
         "Deprecated": "1",
         "EventCode": "0xB7, 0xBB",
-        "EventName": "OFFCORE_RESPONSE.PF_L2_RFO.L3_HIT_S.HIT_OTHER_CORE_NO_FWD",
+        "EventName": "OFFCORE_RESPONSE.DEMAND_RFO.SUPPLIER_NONE.SNOOP_NONE",
         "MSRIndex": "0x1a6,0x1a7",
-        "MSRValue": "0x0400100020",
+        "MSRValue": "0x0080020002",
         "Offcore": "1",
         "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
         "SampleAfterValue": "100003",
         "UMask": "0x1"
     },
     {
-        "BriefDescription": "Cycles with offcore outstanding Code Reads transactions in the SuperQueue (SQ), queue to uncore.",
-        "Counter": "0,1,2,3",
-        "CounterHTOff": "0,1,2,3,4,5,6,7",
-        "CounterMask": "1",
-        "EventCode": "0x60",
-        "EventName": "OFFCORE_REQUESTS_OUTSTANDING.CYCLES_WITH_DEMAND_CODE_RD",
-        "PublicDescription": "Counts the number of offcore outstanding Code Reads transactions in the super queue every cycle. The 'Offcore outstanding' state of the transaction lasts from the L2 miss until the sending transaction completion to requestor (SQ deallocation). See the corresponding Umask under OFFCORE_REQUESTS.",
-        "SampleAfterValue": "2000003",
-        "UMask": "0x2"
-    },
-    {
-        "BriefDescription": "This event is deprecated. Refer to new event OCR.ALL_READS.SUPPLIER_NONE.NO_SNOOP_NEEDED",
+        "BriefDescription": "This event is deprecated. Refer to new event OCR.OTHER.ANY_RESPONSE",
         "Counter": "0,1,2,3",
         "CounterHTOff": "0,1,2,3",
         "Deprecated": "1",
         "EventCode": "0xB7, 0xBB",
-        "EventName": "OFFCORE_RESPONSE.ALL_READS.SUPPLIER_NONE.NO_SNOOP_NEEDED",
+        "EventName": "OFFCORE_RESPONSE.OTHER.ANY_RESPONSE",
         "MSRIndex": "0x1a6,0x1a7",
-        "MSRValue": "0x01000207F7",
+        "MSRValue": "0x0000018000",
         "Offcore": "1",
         "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
         "SampleAfterValue": "100003",
         "UMask": "0x1"
     },
     {
-        "BriefDescription": "This event is deprecated. Refer to new event OCR.ALL_RFO.SUPPLIER_NONE.HIT_OTHER_CORE_FWD",
+        "BriefDescription": "This event is deprecated. Refer to new event OCR.OTHER.L3_HIT.ANY_SNOOP",
         "Counter": "0,1,2,3",
         "CounterHTOff": "0,1,2,3",
         "Deprecated": "1",
         "EventCode": "0xB7, 0xBB",
-        "EventName": "OFFCORE_RESPONSE.ALL_RFO.SUPPLIER_NONE.HIT_OTHER_CORE_FWD",
+        "EventName": "OFFCORE_RESPONSE.OTHER.L3_HIT.ANY_SNOOP",
         "MSRIndex": "0x1a6,0x1a7",
-        "MSRValue": "0x0800020122",
+        "MSRValue": "0x3F803C8000",
         "Offcore": "1",
         "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
         "SampleAfterValue": "100003",
         "UMask": "0x1"
     },
     {
-        "BriefDescription": "This event is deprecated. Refer to new event OCR.DEMAND_RFO.L3_HIT_M.HITM_OTHER_CORE",
+        "BriefDescription": "This event is deprecated. Refer to new event OCR.OTHER.L3_HIT.HITM_OTHER_CORE",
         "Counter": "0,1,2,3",
         "CounterHTOff": "0,1,2,3",
         "Deprecated": "1",
         "EventCode": "0xB7, 0xBB",
-        "EventName": "OFFCORE_RESPONSE.DEMAND_RFO.L3_HIT_M.HITM_OTHER_CORE",
+        "EventName": "OFFCORE_RESPONSE.OTHER.L3_HIT.HITM_OTHER_CORE",
         "MSRIndex": "0x1a6,0x1a7",
-        "MSRValue": "0x1000040002",
+        "MSRValue": "0x10003C8000",
         "Offcore": "1",
         "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
         "SampleAfterValue": "100003",
         "UMask": "0x1"
     },
     {
-        "BriefDescription": "This event is deprecated. Refer to new event OCR.PF_L3_RFO.L3_HIT.SNOOP_NONE",
+        "BriefDescription": "This event is deprecated. Refer to new event OCR.OTHER.L3_HIT.HIT_OTHER_CORE_FWD",
         "Counter": "0,1,2,3",
         "CounterHTOff": "0,1,2,3",
         "Deprecated": "1",
         "EventCode": "0xB7, 0xBB",
-        "EventName": "OFFCORE_RESPONSE.PF_L3_RFO.L3_HIT.SNOOP_NONE",
+        "EventName": "OFFCORE_RESPONSE.OTHER.L3_HIT.HIT_OTHER_CORE_FWD",
         "MSRIndex": "0x1a6,0x1a7",
-        "MSRValue": "0x00803C0100",
+        "MSRValue": "0x08003C8000",
         "Offcore": "1",
         "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
         "SampleAfterValue": "100003",
         "UMask": "0x1"
     },
     {
-        "BriefDescription": "This event is deprecated. Refer to new event OCR.PF_L3_DATA_RD.L3_HIT_M.ANY_SNOOP",
+        "BriefDescription": "This event is deprecated. Refer to new event OCR.OTHER.L3_HIT.HIT_OTHER_CORE_NO_FWD",
         "Counter": "0,1,2,3",
         "CounterHTOff": "0,1,2,3",
         "Deprecated": "1",
         "EventCode": "0xB7, 0xBB",
-        "EventName": "OFFCORE_RESPONSE.PF_L3_DATA_RD.L3_HIT_M.ANY_SNOOP",
+        "EventName": "OFFCORE_RESPONSE.OTHER.L3_HIT.HIT_OTHER_CORE_NO_FWD",
         "MSRIndex": "0x1a6,0x1a7",
-        "MSRValue": "0x3F80040080",
+        "MSRValue": "0x04003C8000",
         "Offcore": "1",
         "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
         "SampleAfterValue": "100003",
         "UMask": "0x1"
     },
     {
-        "BriefDescription": "This event is deprecated. Refer to new event OCR.ALL_PF_DATA_RD.SUPPLIER_NONE.NO_SNOOP_NEEDED",
+        "BriefDescription": "This event is deprecated. Refer to new event OCR.OTHER.L3_HIT.NO_SNOOP_NEEDED",
         "Counter": "0,1,2,3",
         "CounterHTOff": "0,1,2,3",
         "Deprecated": "1",
         "EventCode": "0xB7, 0xBB",
-        "EventName": "OFFCORE_RESPONSE.ALL_PF_DATA_RD.SUPPLIER_NONE.NO_SNOOP_NEEDED",
+        "EventName": "OFFCORE_RESPONSE.OTHER.L3_HIT.NO_SNOOP_NEEDED",
         "MSRIndex": "0x1a6,0x1a7",
-        "MSRValue": "0x0100020490",
+        "MSRValue": "0x01003C8000",
         "Offcore": "1",
         "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
         "SampleAfterValue": "100003",
         "UMask": "0x1"
     },
     {
-        "BriefDescription": "This event is deprecated. Refer to new event OCR.DEMAND_DATA_RD.L3_HIT_S.ANY_SNOOP",
+        "BriefDescription": "This event is deprecated. Refer to new event OCR.OTHER.L3_HIT.SNOOP_HIT_WITH_FWD",
         "Counter": "0,1,2,3",
         "CounterHTOff": "0,1,2,3",
         "Deprecated": "1",
         "EventCode": "0xB7, 0xBB",
-        "EventName": "OFFCORE_RESPONSE.DEMAND_DATA_RD.L3_HIT_S.ANY_SNOOP",
+        "EventName": "OFFCORE_RESPONSE.OTHER.L3_HIT.SNOOP_HIT_WITH_FWD",
         "MSRIndex": "0x1a6,0x1a7",
-        "MSRValue": "0x3F80100001",
+        "MSRValue": "0x08007C8000",
         "Offcore": "1",
         "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
         "SampleAfterValue": "100003",
         "UMask": "0x1"
     },
     {
-        "BriefDescription": "This event is deprecated. Refer to new event OCR.PF_L3_RFO.L3_HIT_M.HIT_OTHER_CORE_NO_FWD",
+        "BriefDescription": "This event is deprecated. Refer to new event OCR.OTHER.L3_HIT.SNOOP_MISS",
         "Counter": "0,1,2,3",
         "CounterHTOff": "0,1,2,3",
         "Deprecated": "1",
         "EventCode": "0xB7, 0xBB",
-        "EventName": "OFFCORE_RESPONSE.PF_L3_RFO.L3_HIT_M.HIT_OTHER_CORE_NO_FWD",
+        "EventName": "OFFCORE_RESPONSE.OTHER.L3_HIT.SNOOP_MISS",
         "MSRIndex": "0x1a6,0x1a7",
-        "MSRValue": "0x0400040100",
+        "MSRValue": "0x02003C8000",
         "Offcore": "1",
         "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
         "SampleAfterValue": "100003",
         "UMask": "0x1"
     },
     {
-        "BriefDescription": "This event is deprecated. Refer to new event OCR.ALL_PF_DATA_RD.L3_HIT_S.HITM_OTHER_CORE",
+        "BriefDescription": "This event is deprecated. Refer to new event OCR.OTHER.L3_HIT.SNOOP_NONE",
         "Counter": "0,1,2,3",
         "CounterHTOff": "0,1,2,3",
         "Deprecated": "1",
         "EventCode": "0xB7, 0xBB",
-        "EventName": "OFFCORE_RESPONSE.ALL_PF_DATA_RD.L3_HIT_S.HITM_OTHER_CORE",
+        "EventName": "OFFCORE_RESPONSE.OTHER.L3_HIT.SNOOP_NONE",
         "MSRIndex": "0x1a6,0x1a7",
-        "MSRValue": "0x1000100490",
+        "MSRValue": "0x00803C8000",
         "Offcore": "1",
         "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
         "SampleAfterValue": "100003",
         "UMask": "0x1"
     },
     {
-        "BriefDescription": "This event is deprecated. Refer to new event OCR.DEMAND_CODE_RD.L3_HIT_S.HIT_OTHER_CORE_FWD",
+        "BriefDescription": "This event is deprecated. Refer to new event OCR.OTHER.L3_HIT_E.ANY_SNOOP",
         "Counter": "0,1,2,3",
         "CounterHTOff": "0,1,2,3",
         "Deprecated": "1",
         "EventCode": "0xB7, 0xBB",
-        "EventName": "OFFCORE_RESPONSE.DEMAND_CODE_RD.L3_HIT_S.HIT_OTHER_CORE_FWD",
+        "EventName": "OFFCORE_RESPONSE.OTHER.L3_HIT_E.ANY_SNOOP",
         "MSRIndex": "0x1a6,0x1a7",
-        "MSRValue": "0x0800100004",
+        "MSRValue": "0x3F80088000",
         "Offcore": "1",
         "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
         "SampleAfterValue": "100003",
         "UMask": "0x1"
     },
     {
-        "BriefDescription": "Core-originated cacheable demand requests that refer to L3",
-        "Counter": "0,1,2,3",
-        "CounterHTOff": "0,1,2,3,4,5,6,7",
-        "Errata": "SKL057",
-        "EventCode": "0x2E",
-        "EventName": "LONGEST_LAT_CACHE.REFERENCE",
-        "PublicDescription": "Counts core-originated cacheable requests to the  L3 cache (Longest Latency cache). Requests include data and code reads, Reads-for-Ownership (RFOs), speculative accesses and hardware prefetches from L1 and L2.  It does not include all accesses to the L3.",
-        "SampleAfterValue": "100003",
-        "UMask": "0x4f"
-    },
-    {
-        "BriefDescription": "This event is deprecated. Refer to new event OCR.DEMAND_RFO.L3_HIT_M.SNOOP_MISS",
+        "BriefDescription": "This event is deprecated. Refer to new event OCR.OTHER.L3_HIT_E.HITM_OTHER_CORE",
         "Counter": "0,1,2,3",
         "CounterHTOff": "0,1,2,3",
         "Deprecated": "1",
         "EventCode": "0xB7, 0xBB",
-        "EventName": "OFFCORE_RESPONSE.DEMAND_RFO.L3_HIT_M.SNOOP_MISS",
+        "EventName": "OFFCORE_RESPONSE.OTHER.L3_HIT_E.HITM_OTHER_CORE",
         "MSRIndex": "0x1a6,0x1a7",
-        "MSRValue": "0x0200040002",
+        "MSRValue": "0x1000088000",
         "Offcore": "1",
         "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
         "SampleAfterValue": "100003",
         "UMask": "0x1"
     },
     {
-        "BriefDescription": "This event is deprecated. Refer to new event OCR.DEMAND_RFO.L3_HIT.SNOOP_NONE",
+        "BriefDescription": "This event is deprecated. Refer to new event OCR.OTHER.L3_HIT_E.HIT_OTHER_CORE_FWD",
         "Counter": "0,1,2,3",
         "CounterHTOff": "0,1,2,3",
         "Deprecated": "1",
         "EventCode": "0xB7, 0xBB",
-        "EventName": "OFFCORE_RESPONSE.DEMAND_RFO.L3_HIT.SNOOP_NONE",
+        "EventName": "OFFCORE_RESPONSE.OTHER.L3_HIT_E.HIT_OTHER_CORE_FWD",
         "MSRIndex": "0x1a6,0x1a7",
-        "MSRValue": "0x00803C0002",
+        "MSRValue": "0x0800088000",
         "Offcore": "1",
         "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
         "SampleAfterValue": "100003",
         "UMask": "0x1"
     },
     {
-        "BriefDescription": "This event is deprecated. Refer to new event OCR.PF_L2_RFO.L3_HIT_E.HITM_OTHER_CORE",
+        "BriefDescription": "This event is deprecated. Refer to new event OCR.OTHER.L3_HIT_E.HIT_OTHER_CORE_NO_FWD",
         "Counter": "0,1,2,3",
         "CounterHTOff": "0,1,2,3",
         "Deprecated": "1",
         "EventCode": "0xB7, 0xBB",
-        "EventName": "OFFCORE_RESPONSE.PF_L2_RFO.L3_HIT_E.HITM_OTHER_CORE",
+        "EventName": "OFFCORE_RESPONSE.OTHER.L3_HIT_E.HIT_OTHER_CORE_NO_FWD",
         "MSRIndex": "0x1a6,0x1a7",
-        "MSRValue": "0x1000080020",
+        "MSRValue": "0x0400088000",
         "Offcore": "1",
         "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
         "SampleAfterValue": "100003",
         "UMask": "0x1"
     },
     {
-        "BriefDescription": "This event is deprecated. Refer to new event OCR.DEMAND_DATA_RD.SUPPLIER_NONE.HIT_OTHER_CORE_NO_FWD",
+        "BriefDescription": "This event is deprecated. Refer to new event OCR.OTHER.L3_HIT_E.NO_SNOOP_NEEDED",
         "Counter": "0,1,2,3",
         "CounterHTOff": "0,1,2,3",
         "Deprecated": "1",
         "EventCode": "0xB7, 0xBB",
-        "EventName": "OFFCORE_RESPONSE.DEMAND_DATA_RD.SUPPLIER_NONE.HIT_OTHER_CORE_NO_FWD",
+        "EventName": "OFFCORE_RESPONSE.OTHER.L3_HIT_E.NO_SNOOP_NEEDED",
         "MSRIndex": "0x1a6,0x1a7",
-        "MSRValue": "0x0400020001",
+        "MSRValue": "0x0100088000",
         "Offcore": "1",
         "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
         "SampleAfterValue": "100003",
         "UMask": "0x1"
     },
     {
-        "BriefDescription": "This event is deprecated. Refer to new event OCR.OTHER.L3_HIT_F.HITM_OTHER_CORE",
+        "BriefDescription": "This event is deprecated. Refer to new event OCR.OTHER.L3_HIT_E.SNOOP_MISS",
         "Counter": "0,1,2,3",
         "CounterHTOff": "0,1,2,3",
         "Deprecated": "1",
         "EventCode": "0xB7, 0xBB",
-        "EventName": "OFFCORE_RESPONSE.OTHER.L3_HIT_F.HITM_OTHER_CORE",
+        "EventName": "OFFCORE_RESPONSE.OTHER.L3_HIT_E.SNOOP_MISS",
         "MSRIndex": "0x1a6,0x1a7",
-        "MSRValue": "0x1000208000",
+        "MSRValue": "0x0200088000",
         "Offcore": "1",
         "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
         "SampleAfterValue": "100003",
         "UMask": "0x1"
     },
     {
-        "BriefDescription": "This event is deprecated. Refer to new event OCR.PF_L2_RFO.L3_HIT_E.SNOOP_MISS",
+        "BriefDescription": "This event is deprecated. Refer to new event OCR.OTHER.L3_HIT_E.SNOOP_NONE",
         "Counter": "0,1,2,3",
         "CounterHTOff": "0,1,2,3",
         "Deprecated": "1",
         "EventCode": "0xB7, 0xBB",
-        "EventName": "OFFCORE_RESPONSE.PF_L2_RFO.L3_HIT_E.SNOOP_MISS",
+        "EventName": "OFFCORE_RESPONSE.OTHER.L3_HIT_E.SNOOP_NONE",
         "MSRIndex": "0x1a6,0x1a7",
-        "MSRValue": "0x0200080020",
+        "MSRValue": "0x0080088000",
         "Offcore": "1",
         "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
         "SampleAfterValue": "100003",
         "UMask": "0x1"
     },
     {
-        "BriefDescription": "This event is deprecated. Refer to new event OCR.ALL_RFO.L3_HIT.NO_SNOOP_NEEDED",
+        "BriefDescription": "This event is deprecated. Refer to new event OCR.OTHER.L3_HIT_F.ANY_SNOOP",
         "Counter": "0,1,2,3",
         "CounterHTOff": "0,1,2,3",
         "Deprecated": "1",
         "EventCode": "0xB7, 0xBB",
-        "EventName": "OFFCORE_RESPONSE.ALL_RFO.L3_HIT.NO_SNOOP_NEEDED",
+        "EventName": "OFFCORE_RESPONSE.OTHER.L3_HIT_F.ANY_SNOOP",
         "MSRIndex": "0x1a6,0x1a7",
-        "MSRValue": "0x01003C0122",
+        "MSRValue": "0x3F80208000",
         "Offcore": "1",
         "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
         "SampleAfterValue": "100003",
         "UMask": "0x1"
     },
     {
-        "BriefDescription": "This event is deprecated. Refer to new event OCR.DEMAND_RFO.L3_HIT_F.ANY_SNOOP",
+        "BriefDescription": "This event is deprecated. Refer to new event OCR.OTHER.L3_HIT_F.HITM_OTHER_CORE",
         "Counter": "0,1,2,3",
         "CounterHTOff": "0,1,2,3",
         "Deprecated": "1",
         "EventCode": "0xB7, 0xBB",
-        "EventName": "OFFCORE_RESPONSE.DEMAND_RFO.L3_HIT_F.ANY_SNOOP",
+        "EventName": "OFFCORE_RESPONSE.OTHER.L3_HIT_F.HITM_OTHER_CORE",
         "MSRIndex": "0x1a6,0x1a7",
-        "MSRValue": "0x3F80200002",
+        "MSRValue": "0x1000208000",
         "Offcore": "1",
         "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
         "SampleAfterValue": "100003",
         "UMask": "0x1"
     },
     {
-        "BriefDescription": "This event is deprecated. Refer to new event OCR.PF_L3_RFO.PMM_HIT_LOCAL_PMM.SNOOP_NOT_NEEDED",
+        "BriefDescription": "This event is deprecated. Refer to new event OCR.OTHER.L3_HIT_F.HIT_OTHER_CORE_FWD",
         "Counter": "0,1,2,3",
         "CounterHTOff": "0,1,2,3",
         "Deprecated": "1",
         "EventCode": "0xB7, 0xBB",
-        "EventName": "OFFCORE_RESPONSE.PF_L3_RFO.PMM_HIT_LOCAL_PMM.SNOOP_NOT_NEEDED",
+        "EventName": "OFFCORE_RESPONSE.OTHER.L3_HIT_F.HIT_OTHER_CORE_FWD",
         "MSRIndex": "0x1a6,0x1a7",
-        "MSRValue": "0x0100400100",
+        "MSRValue": "0x0800208000",
         "Offcore": "1",
         "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
         "SampleAfterValue": "100003",
         "UMask": "0x1"
     },
     {
-        "BriefDescription": "This event is deprecated. Refer to new event OCR.ALL_DATA_RD.L3_HIT_M.NO_SNOOP_NEEDED",
+        "BriefDescription": "This event is deprecated. Refer to new event OCR.OTHER.L3_HIT_F.HIT_OTHER_CORE_NO_FWD",
         "Counter": "0,1,2,3",
         "CounterHTOff": "0,1,2,3",
         "Deprecated": "1",
         "EventCode": "0xB7, 0xBB",
-        "EventName": "OFFCORE_RESPONSE.ALL_DATA_RD.L3_HIT_M.NO_SNOOP_NEEDED",
+        "EventName": "OFFCORE_RESPONSE.OTHER.L3_HIT_F.HIT_OTHER_CORE_NO_FWD",
         "MSRIndex": "0x1a6,0x1a7",
-        "MSRValue": "0x0100040491",
+        "MSRValue": "0x0400208000",
         "Offcore": "1",
         "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
         "SampleAfterValue": "100003",
         "UMask": "0x1"
     },
     {
-        "BriefDescription": "Retired load instructions that miss the STLB.",
-        "Counter": "0,1,2,3",
-        "CounterHTOff": "0,1,2,3",
-        "Data_LA": "1",
-        "EventCode": "0xD0",
-        "EventName": "MEM_INST_RETIRED.STLB_MISS_LOADS",
-        "PEBS": "1",
-        "SampleAfterValue": "100003",
-        "UMask": "0x11"
-    },
-    {
-        "BriefDescription": "This event is deprecated. Refer to new event OCR.PF_L3_RFO.SUPPLIER_NONE.NO_SNOOP_NEEDED",
+        "BriefDescription": "This event is deprecated. Refer to new event OCR.OTHER.L3_HIT_F.NO_SNOOP_NEEDED",
         "Counter": "0,1,2,3",
         "CounterHTOff": "0,1,2,3",
         "Deprecated": "1",
         "EventCode": "0xB7, 0xBB",
-        "EventName": "OFFCORE_RESPONSE.PF_L3_RFO.SUPPLIER_NONE.NO_SNOOP_NEEDED",
+        "EventName": "OFFCORE_RESPONSE.OTHER.L3_HIT_F.NO_SNOOP_NEEDED",
         "MSRIndex": "0x1a6,0x1a7",
-        "MSRValue": "0x0100020100",
+        "MSRValue": "0x0100208000",
         "Offcore": "1",
         "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
         "SampleAfterValue": "100003",
         "UMask": "0x1"
     },
     {
-        "BriefDescription": "This event is deprecated. Refer to new event OCR.ALL_RFO.L3_HIT_F.SNOOP_MISS",
+        "BriefDescription": "This event is deprecated. Refer to new event OCR.OTHER.L3_HIT_F.SNOOP_MISS",
         "Counter": "0,1,2,3",
         "CounterHTOff": "0,1,2,3",
         "Deprecated": "1",
         "EventCode": "0xB7, 0xBB",
-        "EventName": "OFFCORE_RESPONSE.ALL_RFO.L3_HIT_F.SNOOP_MISS",
+        "EventName": "OFFCORE_RESPONSE.OTHER.L3_HIT_F.SNOOP_MISS",
         "MSRIndex": "0x1a6,0x1a7",
-        "MSRValue": "0x0200200122",
+        "MSRValue": "0x0200208000",
         "Offcore": "1",
         "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
         "SampleAfterValue": "100003",
         "UMask": "0x1"
     },
     {
-        "BriefDescription": "This event is deprecated. Refer to new event OCR.PF_L2_DATA_RD.PMM_HIT_LOCAL_PMM.ANY_SNOOP",
+        "BriefDescription": "This event is deprecated. Refer to new event OCR.OTHER.L3_HIT_F.SNOOP_NONE",
         "Counter": "0,1,2,3",
         "CounterHTOff": "0,1,2,3",
         "Deprecated": "1",
         "EventCode": "0xB7, 0xBB",
-        "EventName": "OFFCORE_RESPONSE.PF_L2_DATA_RD.PMM_HIT_LOCAL_PMM.ANY_SNOOP",
+        "EventName": "OFFCORE_RESPONSE.OTHER.L3_HIT_F.SNOOP_NONE",
         "MSRIndex": "0x1a6,0x1a7",
-        "MSRValue": "0x3F80400010",
+        "MSRValue": "0x0080208000",
         "Offcore": "1",
         "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
         "SampleAfterValue": "100003",
         "UMask": "0x1"
     },
     {
-        "BriefDescription": "This event is deprecated. Refer to new event OCR.PF_L3_DATA_RD.PMM_HIT_LOCAL_PMM.ANY_SNOOP",
+        "BriefDescription": "This event is deprecated. Refer to new event OCR.OTHER.L3_HIT_M.ANY_SNOOP",
         "Counter": "0,1,2,3",
         "CounterHTOff": "0,1,2,3",
         "Deprecated": "1",
         "EventCode": "0xB7, 0xBB",
-        "EventName": "OFFCORE_RESPONSE.PF_L3_DATA_RD.PMM_HIT_LOCAL_PMM.ANY_SNOOP",
+        "EventName": "OFFCORE_RESPONSE.OTHER.L3_HIT_M.ANY_SNOOP",
         "MSRIndex": "0x1a6,0x1a7",
-        "MSRValue": "0x3F80400080",
+        "MSRValue": "0x3F80048000",
         "Offcore": "1",
         "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
         "SampleAfterValue": "100003",
         "UMask": "0x1"
     },
     {
-        "BriefDescription": "This event is deprecated. Refer to new event OCR.PF_L2_DATA_RD.L3_HIT.ANY_SNOOP",
+        "BriefDescription": "This event is deprecated. Refer to new event OCR.OTHER.L3_HIT_M.HITM_OTHER_CORE",
         "Counter": "0,1,2,3",
         "CounterHTOff": "0,1,2,3",
         "Deprecated": "1",
         "EventCode": "0xB7, 0xBB",
-        "EventName": "OFFCORE_RESPONSE.PF_L2_DATA_RD.L3_HIT.ANY_SNOOP",
+        "EventName": "OFFCORE_RESPONSE.OTHER.L3_HIT_M.HITM_OTHER_CORE",
         "MSRIndex": "0x1a6,0x1a7",
-        "MSRValue": "0x3F803C0010",
+        "MSRValue": "0x1000048000",
         "Offcore": "1",
         "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
         "SampleAfterValue": "100003",
         "UMask": "0x1"
     },
     {
-        "BriefDescription": "This event is deprecated. Refer to new event OCR.DEMAND_CODE_RD.L3_HIT_F.ANY_SNOOP",
+        "BriefDescription": "This event is deprecated. Refer to new event OCR.OTHER.L3_HIT_M.HIT_OTHER_CORE_FWD",
         "Counter": "0,1,2,3",
         "CounterHTOff": "0,1,2,3",
         "Deprecated": "1",
         "EventCode": "0xB7, 0xBB",
-        "EventName": "OFFCORE_RESPONSE.DEMAND_CODE_RD.L3_HIT_F.ANY_SNOOP",
+        "EventName": "OFFCORE_RESPONSE.OTHER.L3_HIT_M.HIT_OTHER_CORE_FWD",
         "MSRIndex": "0x1a6,0x1a7",
-        "MSRValue": "0x3F80200004",
+        "MSRValue": "0x0800048000",
         "Offcore": "1",
         "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
         "SampleAfterValue": "100003",
         "UMask": "0x1"
     },
     {
-        "BriefDescription": "Retired load instructions with local Intel\u00ae Optane\u2122 DC persistent memory as the data source where the data request missed all caches. Precise event.",
-        "Counter": "0,1,2,3",
-        "CounterHTOff": "0,1,2,3",
-        "Data_LA": "1",
-        "ELLC": "1",
-        "EventCode": "0xD1",
-        "EventName": "MEM_LOAD_RETIRED.LOCAL_PMM",
-        "PEBS": "1",
-        "PublicDescription": "Counts retired load instructions with local Intel\u00ae Optane\u2122 DC persistent memory as the data source and the data request missed L3 (AppDirect or Memory Mode) and DRAM cache(Memory Mode). Precise event",
-        "SampleAfterValue": "100003",
-        "UMask": "0x80"
-    },
-    {
-        "BriefDescription": "This event is deprecated. Refer to new event OCR.DEMAND_RFO.L3_HIT.HIT_OTHER_CORE_FWD",
+        "BriefDescription": "This event is deprecated. Refer to new event OCR.OTHER.L3_HIT_M.HIT_OTHER_CORE_NO_FWD",
         "Counter": "0,1,2,3",
         "CounterHTOff": "0,1,2,3",
         "Deprecated": "1",
         "EventCode": "0xB7, 0xBB",
-        "EventName": "OFFCORE_RESPONSE.DEMAND_RFO.L3_HIT.HIT_OTHER_CORE_FWD",
+        "EventName": "OFFCORE_RESPONSE.OTHER.L3_HIT_M.HIT_OTHER_CORE_NO_FWD",
         "MSRIndex": "0x1a6,0x1a7",
-        "MSRValue": "0x08003C0002",
+        "MSRValue": "0x0400048000",
         "Offcore": "1",
         "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
         "SampleAfterValue": "100003",
         "UMask": "0x1"
     },
     {
-        "BriefDescription": "This event is deprecated. Refer to new event OCR.PF_L2_DATA_RD.L3_HIT_M.HIT_OTHER_CORE_FWD",
+        "BriefDescription": "This event is deprecated. Refer to new event OCR.OTHER.L3_HIT_M.NO_SNOOP_NEEDED",
         "Counter": "0,1,2,3",
         "CounterHTOff": "0,1,2,3",
         "Deprecated": "1",
         "EventCode": "0xB7, 0xBB",
-        "EventName": "OFFCORE_RESPONSE.PF_L2_DATA_RD.L3_HIT_M.HIT_OTHER_CORE_FWD",
+        "EventName": "OFFCORE_RESPONSE.OTHER.L3_HIT_M.NO_SNOOP_NEEDED",
         "MSRIndex": "0x1a6,0x1a7",
-        "MSRValue": "0x0800040010",
+        "MSRValue": "0x0100048000",
         "Offcore": "1",
         "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
         "SampleAfterValue": "100003",
         "UMask": "0x1"
     },
     {
-        "BriefDescription": "This event is deprecated. Refer to new event OCR.ALL_DATA_RD.ANY_RESPONSE",
+        "BriefDescription": "This event is deprecated. Refer to new event OCR.OTHER.L3_HIT_M.SNOOP_MISS",
         "Counter": "0,1,2,3",
         "CounterHTOff": "0,1,2,3",
         "Deprecated": "1",
         "EventCode": "0xB7, 0xBB",
-        "EventName": "OFFCORE_RESPONSE.ALL_DATA_RD.ANY_RESPONSE",
+        "EventName": "OFFCORE_RESPONSE.OTHER.L3_HIT_M.SNOOP_MISS",
         "MSRIndex": "0x1a6,0x1a7",
-        "MSRValue": "0x0000010491",
+        "MSRValue": "0x0200048000",
         "Offcore": "1",
         "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
         "SampleAfterValue": "100003",
         "UMask": "0x1"
     },
     {
-        "BriefDescription": "This event is deprecated. Refer to new event OCR.PF_L1D_AND_SW.L3_HIT.HIT_OTHER_CORE_FWD",
+        "BriefDescription": "This event is deprecated. Refer to new event OCR.OTHER.L3_HIT_M.SNOOP_NONE",
         "Counter": "0,1,2,3",
         "CounterHTOff": "0,1,2,3",
         "Deprecated": "1",
         "EventCode": "0xB7, 0xBB",
-        "EventName": "OFFCORE_RESPONSE.PF_L1D_AND_SW.L3_HIT.HIT_OTHER_CORE_FWD",
+        "EventName": "OFFCORE_RESPONSE.OTHER.L3_HIT_M.SNOOP_NONE",
         "MSRIndex": "0x1a6,0x1a7",
-        "MSRValue": "0x08003C0400",
+        "MSRValue": "0x0080048000",
         "Offcore": "1",
         "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
         "SampleAfterValue": "100003",
         "UMask": "0x1"
     },
     {
-        "BriefDescription": "This event is deprecated. Refer to new event OCR.ALL_PF_RFO.L3_HIT_F.HIT_OTHER_CORE_NO_FWD",
+        "BriefDescription": "This event is deprecated. Refer to new event OCR.OTHER.L3_HIT_S.ANY_SNOOP",
         "Counter": "0,1,2,3",
         "CounterHTOff": "0,1,2,3",
         "Deprecated": "1",
         "EventCode": "0xB7, 0xBB",
-        "EventName": "OFFCORE_RESPONSE.ALL_PF_RFO.L3_HIT_F.HIT_OTHER_CORE_NO_FWD",
+        "EventName": "OFFCORE_RESPONSE.OTHER.L3_HIT_S.ANY_SNOOP",
         "MSRIndex": "0x1a6,0x1a7",
-        "MSRValue": "0x0400200120",
+        "MSRValue": "0x3F80108000",
         "Offcore": "1",
         "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
         "SampleAfterValue": "100003",
         "UMask": "0x1"
     },
     {
-        "BriefDescription": "This event is deprecated. Refer to new event OCR.ALL_PF_DATA_RD.L3_HIT_S.NO_SNOOP_NEEDED",
+        "BriefDescription": "This event is deprecated. Refer to new event OCR.OTHER.L3_HIT_S.HITM_OTHER_CORE",
         "Counter": "0,1,2,3",
         "CounterHTOff": "0,1,2,3",
         "Deprecated": "1",
         "EventCode": "0xB7, 0xBB",
-        "EventName": "OFFCORE_RESPONSE.ALL_PF_DATA_RD.L3_HIT_S.NO_SNOOP_NEEDED",
+        "EventName": "OFFCORE_RESPONSE.OTHER.L3_HIT_S.HITM_OTHER_CORE",
         "MSRIndex": "0x1a6,0x1a7",
-        "MSRValue": "0x0100100490",
+        "MSRValue": "0x1000108000",
         "Offcore": "1",
         "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
         "SampleAfterValue": "100003",
         "UMask": "0x1"
     },
     {
-        "BriefDescription": "This event is deprecated. Refer to new event L2_LINES_OUT.USELESS_HWPF",
-        "Counter": "0,1,2,3",
-        "CounterHTOff": "0,1,2,3,4,5,6,7",
-        "Deprecated": "1",
-        "EventCode": "0xF2",
-        "EventName": "L2_LINES_OUT.USELESS_PREF",
-        "SampleAfterValue": "200003",
-        "UMask": "0x4"
-    },
-    {
-        "BriefDescription": "This event is deprecated. Refer to new event OCR.ALL_RFO.L3_HIT.SNOOP_NONE",
+        "BriefDescription": "This event is deprecated. Refer to new event OCR.OTHER.L3_HIT_S.HIT_OTHER_CORE_FWD",
         "Counter": "0,1,2,3",
         "CounterHTOff": "0,1,2,3",
         "Deprecated": "1",
         "EventCode": "0xB7, 0xBB",
-        "EventName": "OFFCORE_RESPONSE.ALL_RFO.L3_HIT.SNOOP_NONE",
+        "EventName": "OFFCORE_RESPONSE.OTHER.L3_HIT_S.HIT_OTHER_CORE_FWD",
         "MSRIndex": "0x1a6,0x1a7",
-        "MSRValue": "0x00803C0122",
+        "MSRValue": "0x0800108000",
         "Offcore": "1",
         "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
         "SampleAfterValue": "100003",
         "UMask": "0x1"
     },
     {
-        "BriefDescription": "This event is deprecated. Refer to new event OCR.ALL_DATA_RD.L3_HIT_S.SNOOP_MISS",
+        "BriefDescription": "This event is deprecated. Refer to new event OCR.OTHER.L3_HIT_S.HIT_OTHER_CORE_NO_FWD",
         "Counter": "0,1,2,3",
         "CounterHTOff": "0,1,2,3",
         "Deprecated": "1",
         "EventCode": "0xB7, 0xBB",
-        "EventName": "OFFCORE_RESPONSE.ALL_DATA_RD.L3_HIT_S.SNOOP_MISS",
+        "EventName": "OFFCORE_RESPONSE.OTHER.L3_HIT_S.HIT_OTHER_CORE_NO_FWD",
         "MSRIndex": "0x1a6,0x1a7",
-        "MSRValue": "0x0200100491",
+        "MSRValue": "0x0400108000",
         "Offcore": "1",
         "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
         "SampleAfterValue": "100003",
         "UMask": "0x1"
     },
     {
-        "BriefDescription": "Demand Data Read miss L2, no rejects",
-        "Counter": "0,1,2,3",
-        "CounterHTOff": "0,1,2,3,4,5,6,7",
-        "EventCode": "0x24",
-        "EventName": "L2_RQSTS.DEMAND_DATA_RD_MISS",
-        "PublicDescription": "Counts the number of demand Data Read requests that miss L2 cache. Only not rejected loads are counted.",
-        "SampleAfterValue": "200003",
-        "UMask": "0x21"
-    },
-    {
-        "BriefDescription": "This event is deprecated. Refer to new event OCR.DEMAND_DATA_RD.L3_HIT_E.HITM_OTHER_CORE",
+        "BriefDescription": "This event is deprecated. Refer to new event OCR.OTHER.L3_HIT_S.NO_SNOOP_NEEDED",
         "Counter": "0,1,2,3",
         "CounterHTOff": "0,1,2,3",
         "Deprecated": "1",
         "EventCode": "0xB7, 0xBB",
-        "EventName": "OFFCORE_RESPONSE.DEMAND_DATA_RD.L3_HIT_E.HITM_OTHER_CORE",
+        "EventName": "OFFCORE_RESPONSE.OTHER.L3_HIT_S.NO_SNOOP_NEEDED",
         "MSRIndex": "0x1a6,0x1a7",
-        "MSRValue": "0x1000080001",
+        "MSRValue": "0x0100108000",
         "Offcore": "1",
         "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
         "SampleAfterValue": "100003",
         "UMask": "0x1"
     },
     {
-        "BriefDescription": "This event is deprecated. Refer to new event OCR.DEMAND_RFO.SUPPLIER_NONE.HITM_OTHER_CORE",
+        "BriefDescription": "This event is deprecated. Refer to new event OCR.OTHER.L3_HIT_S.SNOOP_MISS",
         "Counter": "0,1,2,3",
         "CounterHTOff": "0,1,2,3",
         "Deprecated": "1",
         "EventCode": "0xB7, 0xBB",
-        "EventName": "OFFCORE_RESPONSE.DEMAND_RFO.SUPPLIER_NONE.HITM_OTHER_CORE",
+        "EventName": "OFFCORE_RESPONSE.OTHER.L3_HIT_S.SNOOP_MISS",
         "MSRIndex": "0x1a6,0x1a7",
-        "MSRValue": "0x1000020002",
+        "MSRValue": "0x0200108000",
         "Offcore": "1",
         "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
         "SampleAfterValue": "100003",
         "UMask": "0x1"
     },
     {
-        "BriefDescription": "Retired load instructions which data sources were L3 hit and cross-core snoop missed in on-pkg core cache.",
-        "Counter": "0,1,2,3",
-        "CounterHTOff": "0,1,2,3",
-        "Data_LA": "1",
-        "EventCode": "0xD2",
-        "EventName": "MEM_LOAD_L3_HIT_RETIRED.XSNP_MISS",
-        "PEBS": "1",
-        "SampleAfterValue": "20011",
-        "UMask": "0x1"
-    },
-    {
-        "BriefDescription": "This event is deprecated. Refer to new event OCR.OTHER.SUPPLIER_NONE.HIT_OTHER_CORE_FWD",
+        "BriefDescription": "This event is deprecated. Refer to new event OCR.OTHER.L3_HIT_S.SNOOP_NONE",
         "Counter": "0,1,2,3",
         "CounterHTOff": "0,1,2,3",
         "Deprecated": "1",
         "EventCode": "0xB7, 0xBB",
-        "EventName": "OFFCORE_RESPONSE.OTHER.SUPPLIER_NONE.HIT_OTHER_CORE_FWD",
+        "EventName": "OFFCORE_RESPONSE.OTHER.L3_HIT_S.SNOOP_NONE",
         "MSRIndex": "0x1a6,0x1a7",
-        "MSRValue": "0x0800028000",
+        "MSRValue": "0x0080108000",
         "Offcore": "1",
         "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
         "SampleAfterValue": "100003",
         "UMask": "0x1"
     },
     {
-        "BriefDescription": "This event is deprecated. Refer to new event OCR.PF_L3_DATA_RD.L3_HIT.HIT_OTHER_CORE_FWD",
+        "BriefDescription": "This event is deprecated. Refer to new event OCR.OTHER.PMM_HIT_LOCAL_PMM.ANY_SNOOP",
         "Counter": "0,1,2,3",
         "CounterHTOff": "0,1,2,3",
         "Deprecated": "1",
         "EventCode": "0xB7, 0xBB",
-        "EventName": "OFFCORE_RESPONSE.PF_L3_DATA_RD.L3_HIT.HIT_OTHER_CORE_FWD",
+        "EventName": "OFFCORE_RESPONSE.OTHER.PMM_HIT_LOCAL_PMM.ANY_SNOOP",
         "MSRIndex": "0x1a6,0x1a7",
-        "MSRValue": "0x08003C0080",
+        "MSRValue": "0x3F80408000",
         "Offcore": "1",
         "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
         "SampleAfterValue": "100003",
         "UMask": "0x1"
     },
     {
-        "BriefDescription": "This event is deprecated. Refer to new event OCR.PF_L1D_AND_SW.L3_HIT.SNOOP_MISS",
+        "BriefDescription": "This event is deprecated. Refer to new event OCR.OTHER.PMM_HIT_LOCAL_PMM.SNOOP_NONE",
         "Counter": "0,1,2,3",
         "CounterHTOff": "0,1,2,3",
         "Deprecated": "1",
         "EventCode": "0xB7, 0xBB",
-        "EventName": "OFFCORE_RESPONSE.PF_L1D_AND_SW.L3_HIT.SNOOP_MISS",
+        "EventName": "OFFCORE_RESPONSE.OTHER.PMM_HIT_LOCAL_PMM.SNOOP_NONE",
         "MSRIndex": "0x1a6,0x1a7",
-        "MSRValue": "0x02003C0400",
+        "MSRValue": "0x0080408000",
         "Offcore": "1",
         "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
         "SampleAfterValue": "100003",
         "UMask": "0x1"
     },
     {
-        "BriefDescription": "All L2 requests",
-        "Counter": "0,1,2,3",
-        "CounterHTOff": "0,1,2,3,4,5,6,7",
-        "EventCode": "0x24",
-        "EventName": "L2_RQSTS.REFERENCES",
-        "PublicDescription": "All L2 requests.",
-        "SampleAfterValue": "200003",
-        "UMask": "0xff"
-    },
-    {
-        "BriefDescription": "This event is deprecated. Refer to new event OCR.PF_L1D_AND_SW.L3_HIT_S.SNOOP_NONE",
+        "BriefDescription": "This event is deprecated. Refer to new event OCR.OTHER.PMM_HIT_LOCAL_PMM.SNOOP_NOT_NEEDED",
         "Counter": "0,1,2,3",
         "CounterHTOff": "0,1,2,3",
         "Deprecated": "1",
         "EventCode": "0xB7, 0xBB",
-        "EventName": "OFFCORE_RESPONSE.PF_L1D_AND_SW.L3_HIT_S.SNOOP_NONE",
+        "EventName": "OFFCORE_RESPONSE.OTHER.PMM_HIT_LOCAL_PMM.SNOOP_NOT_NEEDED",
         "MSRIndex": "0x1a6,0x1a7",
-        "MSRValue": "0x0080100400",
+        "MSRValue": "0x0100408000",
         "Offcore": "1",
         "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
         "SampleAfterValue": "100003",
         "UMask": "0x1"
     },
     {
-        "BriefDescription": "This event is deprecated. Refer to new event OCR.PF_L1D_AND_SW.SUPPLIER_NONE.SNOOP_NONE",
+        "BriefDescription": "This event is deprecated. Refer to new event OCR.OTHER.SUPPLIER_NONE.ANY_SNOOP",
         "Counter": "0,1,2,3",
         "CounterHTOff": "0,1,2,3",
         "Deprecated": "1",
         "EventCode": "0xB7, 0xBB",
-        "EventName": "OFFCORE_RESPONSE.PF_L1D_AND_SW.SUPPLIER_NONE.SNOOP_NONE",
+        "EventName": "OFFCORE_RESPONSE.OTHER.SUPPLIER_NONE.ANY_SNOOP",
         "MSRIndex": "0x1a6,0x1a7",
-        "MSRValue": "0x0080020400",
+        "MSRValue": "0x3F80028000",
         "Offcore": "1",
         "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
         "SampleAfterValue": "100003",
         "UMask": "0x1"
     },
     {
-        "BriefDescription": "Cycles with L1D load Misses outstanding.",
-        "Counter": "0,1,2,3",
-        "CounterHTOff": "0,1,2,3,4,5,6,7",
-        "CounterMask": "1",
-        "EventCode": "0x48",
-        "EventName": "L1D_PEND_MISS.PENDING_CYCLES",
-        "PublicDescription": "Counts duration of L1D miss outstanding in cycles.",
-        "SampleAfterValue": "2000003",
-        "UMask": "0x1"
-    },
-    {
-        "BriefDescription": "This event is deprecated. Refer to new event OCR.PF_L2_RFO.L3_HIT_S.NO_SNOOP_NEEDED",
+        "BriefDescription": "This event is deprecated. Refer to new event OCR.OTHER.SUPPLIER_NONE.HITM_OTHER_CORE",
         "Counter": "0,1,2,3",
         "CounterHTOff": "0,1,2,3",
         "Deprecated": "1",
         "EventCode": "0xB7, 0xBB",
-        "EventName": "OFFCORE_RESPONSE.PF_L2_RFO.L3_HIT_S.NO_SNOOP_NEEDED",
+        "EventName": "OFFCORE_RESPONSE.OTHER.SUPPLIER_NONE.HITM_OTHER_CORE",
         "MSRIndex": "0x1a6,0x1a7",
-        "MSRValue": "0x0100100020",
+        "MSRValue": "0x1000028000",
         "Offcore": "1",
         "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
         "SampleAfterValue": "100003",
         "UMask": "0x1"
     },
     {
-        "BriefDescription": "This event is deprecated. Refer to new event OCR.OTHER.L3_HIT_S.SNOOP_NONE",
+        "BriefDescription": "This event is deprecated. Refer to new event OCR.OTHER.SUPPLIER_NONE.HIT_OTHER_CORE_FWD",
         "Counter": "0,1,2,3",
         "CounterHTOff": "0,1,2,3",
         "Deprecated": "1",
         "EventCode": "0xB7, 0xBB",
-        "EventName": "OFFCORE_RESPONSE.OTHER.L3_HIT_S.SNOOP_NONE",
+        "EventName": "OFFCORE_RESPONSE.OTHER.SUPPLIER_NONE.HIT_OTHER_CORE_FWD",
         "MSRIndex": "0x1a6,0x1a7",
-        "MSRValue": "0x0080108000",
+        "MSRValue": "0x0800028000",
         "Offcore": "1",
         "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
         "SampleAfterValue": "100003",
         "UMask": "0x1"
     },
     {
-        "BriefDescription": "This event is deprecated. Refer to new event OCR.PF_L3_DATA_RD.L3_HIT_E.HIT_OTHER_CORE_NO_FWD",
+        "BriefDescription": "This event is deprecated. Refer to new event OCR.OTHER.SUPPLIER_NONE.HIT_OTHER_CORE_NO_FWD",
         "Counter": "0,1,2,3",
         "CounterHTOff": "0,1,2,3",
         "Deprecated": "1",
         "EventCode": "0xB7, 0xBB",
-        "EventName": "OFFCORE_RESPONSE.PF_L3_DATA_RD.L3_HIT_E.HIT_OTHER_CORE_NO_FWD",
+        "EventName": "OFFCORE_RESPONSE.OTHER.SUPPLIER_NONE.HIT_OTHER_CORE_NO_FWD",
         "MSRIndex": "0x1a6,0x1a7",
-        "MSRValue": "0x0400080080",
+        "MSRValue": "0x0400028000",
         "Offcore": "1",
         "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
         "SampleAfterValue": "100003",
         "UMask": "0x1"
     },
     {
-        "BriefDescription": "This event is deprecated. Refer to new event OCR.ALL_PF_RFO.L3_HIT_S.HIT_OTHER_CORE_NO_FWD",
+        "BriefDescription": "This event is deprecated. Refer to new event OCR.OTHER.SUPPLIER_NONE.NO_SNOOP_NEEDED",
         "Counter": "0,1,2,3",
         "CounterHTOff": "0,1,2,3",
         "Deprecated": "1",
         "EventCode": "0xB7, 0xBB",
-        "EventName": "OFFCORE_RESPONSE.ALL_PF_RFO.L3_HIT_S.HIT_OTHER_CORE_NO_FWD",
+        "EventName": "OFFCORE_RESPONSE.OTHER.SUPPLIER_NONE.NO_SNOOP_NEEDED",
         "MSRIndex": "0x1a6,0x1a7",
-        "MSRValue": "0x0400100120",
+        "MSRValue": "0x0100028000",
         "Offcore": "1",
         "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
         "SampleAfterValue": "100003",
         "UMask": "0x1"
     },
     {
-        "BriefDescription": "This event is deprecated. Refer to new event OCR.DEMAND_DATA_RD.L3_HIT.SNOOP_MISS",
+        "BriefDescription": "This event is deprecated. Refer to new event OCR.OTHER.SUPPLIER_NONE.SNOOP_MISS",
         "Counter": "0,1,2,3",
         "CounterHTOff": "0,1,2,3",
         "Deprecated": "1",
         "EventCode": "0xB7, 0xBB",
-        "EventName": "OFFCORE_RESPONSE.DEMAND_DATA_RD.L3_HIT.SNOOP_MISS",
+        "EventName": "OFFCORE_RESPONSE.OTHER.SUPPLIER_NONE.SNOOP_MISS",
         "MSRIndex": "0x1a6,0x1a7",
-        "MSRValue": "0x02003C0001",
+        "MSRValue": "0x0200028000",
         "Offcore": "1",
         "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
         "SampleAfterValue": "100003",
         "UMask": "0x1"
     },
     {
-        "BriefDescription": "This event is deprecated. Refer to new event OCR.ALL_PF_DATA_RD.L3_HIT_M.SNOOP_NONE",
+        "BriefDescription": "This event is deprecated. Refer to new event OCR.OTHER.SUPPLIER_NONE.SNOOP_NONE",
         "Counter": "0,1,2,3",
         "CounterHTOff": "0,1,2,3",
         "Deprecated": "1",
         "EventCode": "0xB7, 0xBB",
-        "EventName": "OFFCORE_RESPONSE.ALL_PF_DATA_RD.L3_HIT_M.SNOOP_NONE",
+        "EventName": "OFFCORE_RESPONSE.OTHER.SUPPLIER_NONE.SNOOP_NONE",
         "MSRIndex": "0x1a6,0x1a7",
-        "MSRValue": "0x0080040490",
+        "MSRValue": "0x0080028000",
         "Offcore": "1",
         "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
         "SampleAfterValue": "100003",
         "UMask": "0x1"
     },
     {
-        "BriefDescription": "This event is deprecated. Refer to new event OCR.DEMAND_RFO.SUPPLIER_NONE.HIT_OTHER_CORE_FWD",
+        "BriefDescription": "This event is deprecated. Refer to new event OCR.PF_L1D_AND_SW.ANY_RESPONSE",
         "Counter": "0,1,2,3",
         "CounterHTOff": "0,1,2,3",
         "Deprecated": "1",
         "EventCode": "0xB7, 0xBB",
-        "EventName": "OFFCORE_RESPONSE.DEMAND_RFO.SUPPLIER_NONE.HIT_OTHER_CORE_FWD",
+        "EventName": "OFFCORE_RESPONSE.PF_L1D_AND_SW.ANY_RESPONSE",
         "MSRIndex": "0x1a6,0x1a7",
-        "MSRValue": "0x0800020002",
+        "MSRValue": "0x0000010400",
         "Offcore": "1",
         "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
         "SampleAfterValue": "100003",
         "UMask": "0x1"
     },
     {
-        "BriefDescription": "This event is deprecated. Refer to new event OCR.ALL_PF_RFO.L3_HIT.HITM_OTHER_CORE",
+        "BriefDescription": "This event is deprecated. Refer to new event OCR.PF_L1D_AND_SW.L3_HIT.ANY_SNOOP",
         "Counter": "0,1,2,3",
         "CounterHTOff": "0,1,2,3",
         "Deprecated": "1",
         "EventCode": "0xB7, 0xBB",
-        "EventName": "OFFCORE_RESPONSE.ALL_PF_RFO.L3_HIT.HITM_OTHER_CORE",
+        "EventName": "OFFCORE_RESPONSE.PF_L1D_AND_SW.L3_HIT.ANY_SNOOP",
         "MSRIndex": "0x1a6,0x1a7",
-        "MSRValue": "0x10003C0120",
+        "MSRValue": "0x3F803C0400",
         "Offcore": "1",
         "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
         "SampleAfterValue": "100003",
         "UMask": "0x1"
     },
     {
-        "BriefDescription": "This event is deprecated. Refer to new event OCR.PF_L3_RFO.L3_HIT_E.SNOOP_NONE",
+        "BriefDescription": "This event is deprecated. Refer to new event OCR.PF_L1D_AND_SW.L3_HIT.HITM_OTHER_CORE",
         "Counter": "0,1,2,3",
         "CounterHTOff": "0,1,2,3",
         "Deprecated": "1",
         "EventCode": "0xB7, 0xBB",
-        "EventName": "OFFCORE_RESPONSE.PF_L3_RFO.L3_HIT_E.SNOOP_NONE",
+        "EventName": "OFFCORE_RESPONSE.PF_L1D_AND_SW.L3_HIT.HITM_OTHER_CORE",
         "MSRIndex": "0x1a6,0x1a7",
-        "MSRValue": "0x0080080100",
+        "MSRValue": "0x10003C0400",
         "Offcore": "1",
         "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
         "SampleAfterValue": "100003",
         "UMask": "0x1"
     },
     {
-        "BriefDescription": "This event is deprecated. Refer to new event OCR.ALL_PF_RFO.SUPPLIER_NONE.ANY_SNOOP",
+        "BriefDescription": "This event is deprecated. Refer to new event OCR.PF_L1D_AND_SW.L3_HIT.HIT_OTHER_CORE_FWD",
         "Counter": "0,1,2,3",
         "CounterHTOff": "0,1,2,3",
         "Deprecated": "1",
         "EventCode": "0xB7, 0xBB",
-        "EventName": "OFFCORE_RESPONSE.ALL_PF_RFO.SUPPLIER_NONE.ANY_SNOOP",
+        "EventName": "OFFCORE_RESPONSE.PF_L1D_AND_SW.L3_HIT.HIT_OTHER_CORE_FWD",
         "MSRIndex": "0x1a6,0x1a7",
-        "MSRValue": "0x3F80020120",
+        "MSRValue": "0x08003C0400",
         "Offcore": "1",
         "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
         "SampleAfterValue": "100003",
         "UMask": "0x1"
     },
     {
-        "BriefDescription": "This event is deprecated. Refer to new event OCR.ALL_RFO.L3_HIT_M.HIT_OTHER_CORE_FWD",
+        "BriefDescription": "This event is deprecated. Refer to new event OCR.PF_L1D_AND_SW.L3_HIT.HIT_OTHER_CORE_NO_FWD",
         "Counter": "0,1,2,3",
         "CounterHTOff": "0,1,2,3",
         "Deprecated": "1",
         "EventCode": "0xB7, 0xBB",
-        "EventName": "OFFCORE_RESPONSE.ALL_RFO.L3_HIT_M.HIT_OTHER_CORE_FWD",
+        "EventName": "OFFCORE_RESPONSE.PF_L1D_AND_SW.L3_HIT.HIT_OTHER_CORE_NO_FWD",
         "MSRIndex": "0x1a6,0x1a7",
-        "MSRValue": "0x0800040122",
+        "MSRValue": "0x04003C0400",
         "Offcore": "1",
         "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
         "SampleAfterValue": "100003",
         "UMask": "0x1"
     },
     {
-        "BriefDescription": "This event is deprecated. Refer to new event OCR.ALL_DATA_RD.L3_HIT.ANY_SNOOP",
+        "BriefDescription": "This event is deprecated. Refer to new event OCR.PF_L1D_AND_SW.L3_HIT.NO_SNOOP_NEEDED",
         "Counter": "0,1,2,3",
         "CounterHTOff": "0,1,2,3",
         "Deprecated": "1",
         "EventCode": "0xB7, 0xBB",
-        "EventName": "OFFCORE_RESPONSE.ALL_DATA_RD.L3_HIT.ANY_SNOOP",
+        "EventName": "OFFCORE_RESPONSE.PF_L1D_AND_SW.L3_HIT.NO_SNOOP_NEEDED",
         "MSRIndex": "0x1a6,0x1a7",
-        "MSRValue": "0x3F803C0491",
+        "MSRValue": "0x01003C0400",
         "Offcore": "1",
         "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
         "SampleAfterValue": "100003",
         "UMask": "0x1"
     },
     {
-        "BriefDescription": "This event is deprecated. Refer to new event OCR.OTHER.L3_HIT.HIT_OTHER_CORE_NO_FWD",
+        "BriefDescription": "This event is deprecated. Refer to new event OCR.PF_L1D_AND_SW.L3_HIT.SNOOP_HIT_WITH_FWD",
         "Counter": "0,1,2,3",
         "CounterHTOff": "0,1,2,3",
         "Deprecated": "1",
         "EventCode": "0xB7, 0xBB",
-        "EventName": "OFFCORE_RESPONSE.OTHER.L3_HIT.HIT_OTHER_CORE_NO_FWD",
+        "EventName": "OFFCORE_RESPONSE.PF_L1D_AND_SW.L3_HIT.SNOOP_HIT_WITH_FWD",
         "MSRIndex": "0x1a6,0x1a7",
-        "MSRValue": "0x04003C8000",
+        "MSRValue": "0x08007C0400",
         "Offcore": "1",
         "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
         "SampleAfterValue": "100003",
         "UMask": "0x1"
     },
     {
-        "BriefDescription": "This event is deprecated. Refer to new event OCR.ALL_DATA_RD.L3_HIT_S.ANY_SNOOP",
+        "BriefDescription": "This event is deprecated. Refer to new event OCR.PF_L1D_AND_SW.L3_HIT.SNOOP_MISS",
         "Counter": "0,1,2,3",
         "CounterHTOff": "0,1,2,3",
         "Deprecated": "1",
         "EventCode": "0xB7, 0xBB",
-        "EventName": "OFFCORE_RESPONSE.ALL_DATA_RD.L3_HIT_S.ANY_SNOOP",
+        "EventName": "OFFCORE_RESPONSE.PF_L1D_AND_SW.L3_HIT.SNOOP_MISS",
         "MSRIndex": "0x1a6,0x1a7",
-        "MSRValue": "0x3F80100491",
+        "MSRValue": "0x02003C0400",
         "Offcore": "1",
         "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
         "SampleAfterValue": "100003",
         "UMask": "0x1"
     },
     {
-        "BriefDescription": "This event is deprecated. Refer to new event OCR.PF_L3_RFO.L3_HIT.SNOOP_MISS",
+        "BriefDescription": "This event is deprecated. Refer to new event OCR.PF_L1D_AND_SW.L3_HIT.SNOOP_NONE",
         "Counter": "0,1,2,3",
         "CounterHTOff": "0,1,2,3",
         "Deprecated": "1",
         "EventCode": "0xB7, 0xBB",
-        "EventName": "OFFCORE_RESPONSE.PF_L3_RFO.L3_HIT.SNOOP_MISS",
+        "EventName": "OFFCORE_RESPONSE.PF_L1D_AND_SW.L3_HIT.SNOOP_NONE",
         "MSRIndex": "0x1a6,0x1a7",
-        "MSRValue": "0x02003C0100",
+        "MSRValue": "0x00803C0400",
         "Offcore": "1",
         "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
         "SampleAfterValue": "100003",
         "UMask": "0x1"
     },
     {
-        "BriefDescription": "This event is deprecated. Refer to new event OCR.ALL_READS.L3_HIT_F.SNOOP_MISS",
+        "BriefDescription": "This event is deprecated. Refer to new event OCR.PF_L1D_AND_SW.L3_HIT_E.ANY_SNOOP",
         "Counter": "0,1,2,3",
         "CounterHTOff": "0,1,2,3",
         "Deprecated": "1",
         "EventCode": "0xB7, 0xBB",
-        "EventName": "OFFCORE_RESPONSE.ALL_READS.L3_HIT_F.SNOOP_MISS",
+        "EventName": "OFFCORE_RESPONSE.PF_L1D_AND_SW.L3_HIT_E.ANY_SNOOP",
         "MSRIndex": "0x1a6,0x1a7",
-        "MSRValue": "0x02002007F7",
+        "MSRValue": "0x3F80080400",
         "Offcore": "1",
         "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
         "SampleAfterValue": "100003",
         "UMask": "0x1"
     },
     {
-        "BriefDescription": "This event is deprecated. Refer to new event OCR.PF_L3_DATA_RD.L3_HIT_F.SNOOP_NONE",
+        "BriefDescription": "This event is deprecated. Refer to new event OCR.PF_L1D_AND_SW.L3_HIT_E.HITM_OTHER_CORE",
         "Counter": "0,1,2,3",
         "CounterHTOff": "0,1,2,3",
         "Deprecated": "1",
         "EventCode": "0xB7, 0xBB",
-        "EventName": "OFFCORE_RESPONSE.PF_L3_DATA_RD.L3_HIT_F.SNOOP_NONE",
+        "EventName": "OFFCORE_RESPONSE.PF_L1D_AND_SW.L3_HIT_E.HITM_OTHER_CORE",
         "MSRIndex": "0x1a6,0x1a7",
-        "MSRValue": "0x0080200080",
+        "MSRValue": "0x1000080400",
         "Offcore": "1",
         "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
         "SampleAfterValue": "100003",
         "UMask": "0x1"
     },
     {
-        "BriefDescription": "This event is deprecated. Refer to new event OCR.PF_L1D_AND_SW.PMM_HIT_LOCAL_PMM.SNOOP_NONE",
+        "BriefDescription": "This event is deprecated. Refer to new event OCR.PF_L1D_AND_SW.L3_HIT_E.HIT_OTHER_CORE_FWD",
         "Counter": "0,1,2,3",
         "CounterHTOff": "0,1,2,3",
         "Deprecated": "1",
         "EventCode": "0xB7, 0xBB",
-        "EventName": "OFFCORE_RESPONSE.PF_L1D_AND_SW.PMM_HIT_LOCAL_PMM.SNOOP_NONE",
+        "EventName": "OFFCORE_RESPONSE.PF_L1D_AND_SW.L3_HIT_E.HIT_OTHER_CORE_FWD",
         "MSRIndex": "0x1a6,0x1a7",
-        "MSRValue": "0x0080400400",
+        "MSRValue": "0x0800080400",
         "Offcore": "1",
         "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
         "SampleAfterValue": "100003",
         "UMask": "0x1"
     },
     {
-        "BriefDescription": "This event is deprecated. Refer to new event OCR.PF_L2_RFO.SUPPLIER_NONE.HIT_OTHER_CORE_FWD",
+        "BriefDescription": "This event is deprecated. Refer to new event OCR.PF_L1D_AND_SW.L3_HIT_E.HIT_OTHER_CORE_NO_FWD",
         "Counter": "0,1,2,3",
         "CounterHTOff": "0,1,2,3",
         "Deprecated": "1",
         "EventCode": "0xB7, 0xBB",
-        "EventName": "OFFCORE_RESPONSE.PF_L2_RFO.SUPPLIER_NONE.HIT_OTHER_CORE_FWD",
+        "EventName": "OFFCORE_RESPONSE.PF_L1D_AND_SW.L3_HIT_E.HIT_OTHER_CORE_NO_FWD",
         "MSRIndex": "0x1a6,0x1a7",
-        "MSRValue": "0x0800020020",
+        "MSRValue": "0x0400080400",
         "Offcore": "1",
         "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
         "SampleAfterValue": "100003",
         "UMask": "0x1"
     },
     {
-        "BriefDescription": "This event is deprecated. Refer to new event OCR.DEMAND_CODE_RD.SUPPLIER_NONE.HIT_OTHER_CORE_NO_FWD",
+        "BriefDescription": "This event is deprecated. Refer to new event OCR.PF_L1D_AND_SW.L3_HIT_E.NO_SNOOP_NEEDED",
         "Counter": "0,1,2,3",
         "CounterHTOff": "0,1,2,3",
         "Deprecated": "1",
         "EventCode": "0xB7, 0xBB",
-        "EventName": "OFFCORE_RESPONSE.DEMAND_CODE_RD.SUPPLIER_NONE.HIT_OTHER_CORE_NO_FWD",
+        "EventName": "OFFCORE_RESPONSE.PF_L1D_AND_SW.L3_HIT_E.NO_SNOOP_NEEDED",
         "MSRIndex": "0x1a6,0x1a7",
-        "MSRValue": "0x0400020004",
+        "MSRValue": "0x0100080400",
         "Offcore": "1",
         "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
         "SampleAfterValue": "100003",
         "UMask": "0x1"
     },
     {
-        "BriefDescription": "This event is deprecated. Refer to new event OCR.ALL_READS.L3_HIT_S.SNOOP_NONE",
+        "BriefDescription": "This event is deprecated. Refer to new event OCR.PF_L1D_AND_SW.L3_HIT_E.SNOOP_MISS",
         "Counter": "0,1,2,3",
         "CounterHTOff": "0,1,2,3",
         "Deprecated": "1",
         "EventCode": "0xB7, 0xBB",
-        "EventName": "OFFCORE_RESPONSE.ALL_READS.L3_HIT_S.SNOOP_NONE",
+        "EventName": "OFFCORE_RESPONSE.PF_L1D_AND_SW.L3_HIT_E.SNOOP_MISS",
         "MSRIndex": "0x1a6,0x1a7",
-        "MSRValue": "0x00801007F7",
+        "MSRValue": "0x0200080400",
         "Offcore": "1",
         "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
         "SampleAfterValue": "100003",
         "UMask": "0x1"
     },
     {
-        "BriefDescription": "This event is deprecated. Refer to new event OCR.PF_L3_RFO.L3_HIT_S.HIT_OTHER_CORE_NO_FWD",
+        "BriefDescription": "This event is deprecated. Refer to new event OCR.PF_L1D_AND_SW.L3_HIT_E.SNOOP_NONE",
         "Counter": "0,1,2,3",
         "CounterHTOff": "0,1,2,3",
         "Deprecated": "1",
         "EventCode": "0xB7, 0xBB",
-        "EventName": "OFFCORE_RESPONSE.PF_L3_RFO.L3_HIT_S.HIT_OTHER_CORE_NO_FWD",
+        "EventName": "OFFCORE_RESPONSE.PF_L1D_AND_SW.L3_HIT_E.SNOOP_NONE",
         "MSRIndex": "0x1a6,0x1a7",
-        "MSRValue": "0x0400100100",
+        "MSRValue": "0x0080080400",
         "Offcore": "1",
         "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
         "SampleAfterValue": "100003",
         "UMask": "0x1"
     },
     {
-        "BriefDescription": "This event is deprecated. Refer to new event OCR.PF_L2_DATA_RD.L3_HIT.HITM_OTHER_CORE",
+        "BriefDescription": "This event is deprecated. Refer to new event OCR.PF_L1D_AND_SW.L3_HIT_F.ANY_SNOOP",
         "Counter": "0,1,2,3",
         "CounterHTOff": "0,1,2,3",
         "Deprecated": "1",
         "EventCode": "0xB7, 0xBB",
-        "EventName": "OFFCORE_RESPONSE.PF_L2_DATA_RD.L3_HIT.HITM_OTHER_CORE",
+        "EventName": "OFFCORE_RESPONSE.PF_L1D_AND_SW.L3_HIT_F.ANY_SNOOP",
         "MSRIndex": "0x1a6,0x1a7",
-        "MSRValue": "0x10003C0010",
+        "MSRValue": "0x3F80200400",
         "Offcore": "1",
         "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
         "SampleAfterValue": "100003",
         "UMask": "0x1"
     },
     {
-        "BriefDescription": "This event is deprecated. Refer to new event OCR.ALL_PF_RFO.L3_HIT.ANY_SNOOP",
+        "BriefDescription": "This event is deprecated. Refer to new event OCR.PF_L1D_AND_SW.L3_HIT_F.HITM_OTHER_CORE",
         "Counter": "0,1,2,3",
         "CounterHTOff": "0,1,2,3",
         "Deprecated": "1",
         "EventCode": "0xB7, 0xBB",
-        "EventName": "OFFCORE_RESPONSE.ALL_PF_RFO.L3_HIT.ANY_SNOOP",
+        "EventName": "OFFCORE_RESPONSE.PF_L1D_AND_SW.L3_HIT_F.HITM_OTHER_CORE",
         "MSRIndex": "0x1a6,0x1a7",
-        "MSRValue": "0x3F803C0120",
+        "MSRValue": "0x1000200400",
         "Offcore": "1",
         "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
         "SampleAfterValue": "100003",
         "UMask": "0x1"
     },
     {
-        "BriefDescription": "This event is deprecated. Refer to new event OCR.ALL_PF_RFO.SUPPLIER_NONE.SNOOP_MISS",
+        "BriefDescription": "This event is deprecated. Refer to new event OCR.PF_L1D_AND_SW.L3_HIT_F.HIT_OTHER_CORE_FWD",
         "Counter": "0,1,2,3",
         "CounterHTOff": "0,1,2,3",
         "Deprecated": "1",
         "EventCode": "0xB7, 0xBB",
-        "EventName": "OFFCORE_RESPONSE.ALL_PF_RFO.SUPPLIER_NONE.SNOOP_MISS",
+        "EventName": "OFFCORE_RESPONSE.PF_L1D_AND_SW.L3_HIT_F.HIT_OTHER_CORE_FWD",
         "MSRIndex": "0x1a6,0x1a7",
-        "MSRValue": "0x0200020120",
+        "MSRValue": "0x0800200400",
         "Offcore": "1",
         "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
         "SampleAfterValue": "100003",
         "UMask": "0x1"
     },
     {
-        "BriefDescription": "This event is deprecated. Refer to new event OCR.DEMAND_RFO.L3_HIT_S.NO_SNOOP_NEEDED",
+        "BriefDescription": "This event is deprecated. Refer to new event OCR.PF_L1D_AND_SW.L3_HIT_F.HIT_OTHER_CORE_NO_FWD",
         "Counter": "0,1,2,3",
         "CounterHTOff": "0,1,2,3",
         "Deprecated": "1",
         "EventCode": "0xB7, 0xBB",
-        "EventName": "OFFCORE_RESPONSE.DEMAND_RFO.L3_HIT_S.NO_SNOOP_NEEDED",
+        "EventName": "OFFCORE_RESPONSE.PF_L1D_AND_SW.L3_HIT_F.HIT_OTHER_CORE_NO_FWD",
         "MSRIndex": "0x1a6,0x1a7",
-        "MSRValue": "0x0100100002",
+        "MSRValue": "0x0400200400",
         "Offcore": "1",
         "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
         "SampleAfterValue": "100003",
         "UMask": "0x1"
     },
     {
-        "BriefDescription": "This event is deprecated. Refer to new event OCR.OTHER.L3_HIT_S.SNOOP_MISS",
+        "BriefDescription": "This event is deprecated. Refer to new event OCR.PF_L1D_AND_SW.L3_HIT_F.NO_SNOOP_NEEDED",
         "Counter": "0,1,2,3",
         "CounterHTOff": "0,1,2,3",
         "Deprecated": "1",
         "EventCode": "0xB7, 0xBB",
-        "EventName": "OFFCORE_RESPONSE.OTHER.L3_HIT_S.SNOOP_MISS",
+        "EventName": "OFFCORE_RESPONSE.PF_L1D_AND_SW.L3_HIT_F.NO_SNOOP_NEEDED",
         "MSRIndex": "0x1a6,0x1a7",
-        "MSRValue": "0x0200108000",
+        "MSRValue": "0x0100200400",
         "Offcore": "1",
         "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
         "SampleAfterValue": "100003",
         "UMask": "0x1"
     },
     {
-        "BriefDescription": "This event is deprecated. Refer to new event OCR.PF_L3_DATA_RD.L3_HIT.SNOOP_MISS",
+        "BriefDescription": "This event is deprecated. Refer to new event OCR.PF_L1D_AND_SW.L3_HIT_F.SNOOP_MISS",
         "Counter": "0,1,2,3",
         "CounterHTOff": "0,1,2,3",
         "Deprecated": "1",
         "EventCode": "0xB7, 0xBB",
-        "EventName": "OFFCORE_RESPONSE.PF_L3_DATA_RD.L3_HIT.SNOOP_MISS",
+        "EventName": "OFFCORE_RESPONSE.PF_L1D_AND_SW.L3_HIT_F.SNOOP_MISS",
         "MSRIndex": "0x1a6,0x1a7",
-        "MSRValue": "0x02003C0080",
+        "MSRValue": "0x0200200400",
         "Offcore": "1",
         "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
         "SampleAfterValue": "100003",
         "UMask": "0x1"
     },
     {
-        "BriefDescription": "This event is deprecated. Refer to new event OCR.ALL_PF_RFO.L3_HIT_M.ANY_SNOOP",
+        "BriefDescription": "This event is deprecated. Refer to new event OCR.PF_L1D_AND_SW.L3_HIT_F.SNOOP_NONE",
         "Counter": "0,1,2,3",
         "CounterHTOff": "0,1,2,3",
         "Deprecated": "1",
         "EventCode": "0xB7, 0xBB",
-        "EventName": "OFFCORE_RESPONSE.ALL_PF_RFO.L3_HIT_M.ANY_SNOOP",
+        "EventName": "OFFCORE_RESPONSE.PF_L1D_AND_SW.L3_HIT_F.SNOOP_NONE",
         "MSRIndex": "0x1a6,0x1a7",
-        "MSRValue": "0x3F80040120",
+        "MSRValue": "0x0080200400",
         "Offcore": "1",
         "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
         "SampleAfterValue": "100003",
         "UMask": "0x1"
     },
     {
-        "BriefDescription": "This event is deprecated. Refer to new event OCR.ALL_DATA_RD.L3_HIT_F.HIT_OTHER_CORE_FWD",
+        "BriefDescription": "This event is deprecated. Refer to new event OCR.PF_L1D_AND_SW.L3_HIT_M.ANY_SNOOP",
         "Counter": "0,1,2,3",
         "CounterHTOff": "0,1,2,3",
         "Deprecated": "1",
         "EventCode": "0xB7, 0xBB",
-        "EventName": "OFFCORE_RESPONSE.ALL_DATA_RD.L3_HIT_F.HIT_OTHER_CORE_FWD",
+        "EventName": "OFFCORE_RESPONSE.PF_L1D_AND_SW.L3_HIT_M.ANY_SNOOP",
         "MSRIndex": "0x1a6,0x1a7",
-        "MSRValue": "0x0800200491",
+        "MSRValue": "0x3F80040400",
         "Offcore": "1",
         "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
         "SampleAfterValue": "100003",
         "UMask": "0x1"
     },
     {
-        "BriefDescription": "This event is deprecated. Refer to new event OCR.PF_L3_DATA_RD.L3_HIT_E.HITM_OTHER_CORE",
+        "BriefDescription": "This event is deprecated. Refer to new event OCR.PF_L1D_AND_SW.L3_HIT_M.HITM_OTHER_CORE",
         "Counter": "0,1,2,3",
         "CounterHTOff": "0,1,2,3",
         "Deprecated": "1",
         "EventCode": "0xB7, 0xBB",
-        "EventName": "OFFCORE_RESPONSE.PF_L3_DATA_RD.L3_HIT_E.HITM_OTHER_CORE",
+        "EventName": "OFFCORE_RESPONSE.PF_L1D_AND_SW.L3_HIT_M.HITM_OTHER_CORE",
         "MSRIndex": "0x1a6,0x1a7",
-        "MSRValue": "0x1000080080",
+        "MSRValue": "0x1000040400",
         "Offcore": "1",
         "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
         "SampleAfterValue": "100003",
         "UMask": "0x1"
     },
     {
-        "BriefDescription": "This event is deprecated. Refer to new event OCR.DEMAND_RFO.SUPPLIER_NONE.SNOOP_MISS",
+        "BriefDescription": "This event is deprecated. Refer to new event OCR.PF_L1D_AND_SW.L3_HIT_M.HIT_OTHER_CORE_FWD",
         "Counter": "0,1,2,3",
         "CounterHTOff": "0,1,2,3",
         "Deprecated": "1",
         "EventCode": "0xB7, 0xBB",
-        "EventName": "OFFCORE_RESPONSE.DEMAND_RFO.SUPPLIER_NONE.SNOOP_MISS",
+        "EventName": "OFFCORE_RESPONSE.PF_L1D_AND_SW.L3_HIT_M.HIT_OTHER_CORE_FWD",
         "MSRIndex": "0x1a6,0x1a7",
-        "MSRValue": "0x0200020002",
+        "MSRValue": "0x0800040400",
         "Offcore": "1",
         "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
         "SampleAfterValue": "100003",
         "UMask": "0x1"
     },
     {
-        "BriefDescription": "This event is deprecated. Refer to new event OCR.ALL_DATA_RD.L3_HIT_F.HIT_OTHER_CORE_NO_FWD",
+        "BriefDescription": "This event is deprecated. Refer to new event OCR.PF_L1D_AND_SW.L3_HIT_M.HIT_OTHER_CORE_NO_FWD",
         "Counter": "0,1,2,3",
         "CounterHTOff": "0,1,2,3",
         "Deprecated": "1",
         "EventCode": "0xB7, 0xBB",
-        "EventName": "OFFCORE_RESPONSE.ALL_DATA_RD.L3_HIT_F.HIT_OTHER_CORE_NO_FWD",
+        "EventName": "OFFCORE_RESPONSE.PF_L1D_AND_SW.L3_HIT_M.HIT_OTHER_CORE_NO_FWD",
         "MSRIndex": "0x1a6,0x1a7",
-        "MSRValue": "0x0400200491",
+        "MSRValue": "0x0400040400",
         "Offcore": "1",
         "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
         "SampleAfterValue": "100003",
         "UMask": "0x1"
     },
     {
-        "BriefDescription": "This event is deprecated. Refer to new event OCR.ALL_RFO.PMM_HIT_LOCAL_PMM.SNOOP_NONE",
+        "BriefDescription": "This event is deprecated. Refer to new event OCR.PF_L1D_AND_SW.L3_HIT_M.NO_SNOOP_NEEDED",
         "Counter": "0,1,2,3",
         "CounterHTOff": "0,1,2,3",
         "Deprecated": "1",
         "EventCode": "0xB7, 0xBB",
-        "EventName": "OFFCORE_RESPONSE.ALL_RFO.PMM_HIT_LOCAL_PMM.SNOOP_NONE",
+        "EventName": "OFFCORE_RESPONSE.PF_L1D_AND_SW.L3_HIT_M.NO_SNOOP_NEEDED",
         "MSRIndex": "0x1a6,0x1a7",
-        "MSRValue": "0x0080400122",
+        "MSRValue": "0x0100040400",
         "Offcore": "1",
         "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
         "SampleAfterValue": "100003",
         "UMask": "0x1"
     },
     {
-        "BriefDescription": "This event is deprecated. Refer to new event OCR.OTHER.L3_HIT_E.HIT_OTHER_CORE_FWD",
+        "BriefDescription": "This event is deprecated. Refer to new event OCR.PF_L1D_AND_SW.L3_HIT_M.SNOOP_MISS",
         "Counter": "0,1,2,3",
         "CounterHTOff": "0,1,2,3",
         "Deprecated": "1",
         "EventCode": "0xB7, 0xBB",
-        "EventName": "OFFCORE_RESPONSE.OTHER.L3_HIT_E.HIT_OTHER_CORE_FWD",
+        "EventName": "OFFCORE_RESPONSE.PF_L1D_AND_SW.L3_HIT_M.SNOOP_MISS",
         "MSRIndex": "0x1a6,0x1a7",
-        "MSRValue": "0x0800088000",
+        "MSRValue": "0x0200040400",
         "Offcore": "1",
         "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
         "SampleAfterValue": "100003",
         "UMask": "0x1"
     },
     {
-        "BriefDescription": "This event is deprecated. Refer to new event OCR.ALL_RFO.L3_HIT_M.HITM_OTHER_CORE",
+        "BriefDescription": "This event is deprecated. Refer to new event OCR.PF_L1D_AND_SW.L3_HIT_M.SNOOP_NONE",
         "Counter": "0,1,2,3",
         "CounterHTOff": "0,1,2,3",
         "Deprecated": "1",
         "EventCode": "0xB7, 0xBB",
-        "EventName": "OFFCORE_RESPONSE.ALL_RFO.L3_HIT_M.HITM_OTHER_CORE",
+        "EventName": "OFFCORE_RESPONSE.PF_L1D_AND_SW.L3_HIT_M.SNOOP_NONE",
         "MSRIndex": "0x1a6,0x1a7",
-        "MSRValue": "0x1000040122",
+        "MSRValue": "0x0080040400",
         "Offcore": "1",
         "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
         "SampleAfterValue": "100003",
         "UMask": "0x1"
     },
     {
-        "BriefDescription": "This event is deprecated. Refer to new event OCR.ALL_PF_DATA_RD.L3_HIT.HITM_OTHER_CORE",
+        "BriefDescription": "This event is deprecated. Refer to new event OCR.PF_L1D_AND_SW.L3_HIT_S.ANY_SNOOP",
         "Counter": "0,1,2,3",
         "CounterHTOff": "0,1,2,3",
         "Deprecated": "1",
         "EventCode": "0xB7, 0xBB",
-        "EventName": "OFFCORE_RESPONSE.ALL_PF_DATA_RD.L3_HIT.HITM_OTHER_CORE",
+        "EventName": "OFFCORE_RESPONSE.PF_L1D_AND_SW.L3_HIT_S.ANY_SNOOP",
         "MSRIndex": "0x1a6,0x1a7",
-        "MSRValue": "0x10003C0490",
+        "MSRValue": "0x3F80100400",
         "Offcore": "1",
         "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
         "SampleAfterValue": "100003",
         "UMask": "0x1"
     },
     {
-        "BriefDescription": "This event is deprecated. Refer to new event OCR.ALL_READS.L3_HIT_F.NO_SNOOP_NEEDED",
+        "BriefDescription": "This event is deprecated. Refer to new event OCR.PF_L1D_AND_SW.L3_HIT_S.HITM_OTHER_CORE",
         "Counter": "0,1,2,3",
         "CounterHTOff": "0,1,2,3",
         "Deprecated": "1",
         "EventCode": "0xB7, 0xBB",
-        "EventName": "OFFCORE_RESPONSE.ALL_READS.L3_HIT_F.NO_SNOOP_NEEDED",
+        "EventName": "OFFCORE_RESPONSE.PF_L1D_AND_SW.L3_HIT_S.HITM_OTHER_CORE",
         "MSRIndex": "0x1a6,0x1a7",
-        "MSRValue": "0x01002007F7",
+        "MSRValue": "0x1000100400",
         "Offcore": "1",
         "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
         "SampleAfterValue": "100003",
         "UMask": "0x1"
     },
     {
-        "BriefDescription": "This event is deprecated. Refer to new event OCR.PF_L1D_AND_SW.L3_HIT.NO_SNOOP_NEEDED",
+        "BriefDescription": "This event is deprecated. Refer to new event OCR.PF_L1D_AND_SW.L3_HIT_S.HIT_OTHER_CORE_FWD",
         "Counter": "0,1,2,3",
         "CounterHTOff": "0,1,2,3",
         "Deprecated": "1",
         "EventCode": "0xB7, 0xBB",
-        "EventName": "OFFCORE_RESPONSE.PF_L1D_AND_SW.L3_HIT.NO_SNOOP_NEEDED",
+        "EventName": "OFFCORE_RESPONSE.PF_L1D_AND_SW.L3_HIT_S.HIT_OTHER_CORE_FWD",
         "MSRIndex": "0x1a6,0x1a7",
-        "MSRValue": "0x01003C0400",
+        "MSRValue": "0x0800100400",
         "Offcore": "1",
         "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
         "SampleAfterValue": "100003",
         "UMask": "0x1"
     },
     {
-        "BriefDescription": "This event is deprecated. Refer to new event OCR.ALL_DATA_RD.L3_HIT.SNOOP_MISS",
+        "BriefDescription": "This event is deprecated. Refer to new event OCR.PF_L1D_AND_SW.L3_HIT_S.HIT_OTHER_CORE_NO_FWD",
         "Counter": "0,1,2,3",
         "CounterHTOff": "0,1,2,3",
         "Deprecated": "1",
         "EventCode": "0xB7, 0xBB",
-        "EventName": "OFFCORE_RESPONSE.ALL_DATA_RD.L3_HIT.SNOOP_MISS",
+        "EventName": "OFFCORE_RESPONSE.PF_L1D_AND_SW.L3_HIT_S.HIT_OTHER_CORE_NO_FWD",
         "MSRIndex": "0x1a6,0x1a7",
-        "MSRValue": "0x02003C0491",
+        "MSRValue": "0x0400100400",
         "Offcore": "1",
         "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
         "SampleAfterValue": "100003",
         "UMask": "0x1"
     },
     {
-        "BriefDescription": "This event is deprecated. Refer to new event OCR.DEMAND_RFO.L3_HIT_M.HIT_OTHER_CORE_FWD",
+        "BriefDescription": "This event is deprecated. Refer to new event OCR.PF_L1D_AND_SW.L3_HIT_S.NO_SNOOP_NEEDED",
         "Counter": "0,1,2,3",
         "CounterHTOff": "0,1,2,3",
         "Deprecated": "1",
         "EventCode": "0xB7, 0xBB",
-        "EventName": "OFFCORE_RESPONSE.DEMAND_RFO.L3_HIT_M.HIT_OTHER_CORE_FWD",
+        "EventName": "OFFCORE_RESPONSE.PF_L1D_AND_SW.L3_HIT_S.NO_SNOOP_NEEDED",
         "MSRIndex": "0x1a6,0x1a7",
-        "MSRValue": "0x0800040002",
+        "MSRValue": "0x0100100400",
         "Offcore": "1",
         "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
         "SampleAfterValue": "100003",
         "UMask": "0x1"
     },
     {
-        "BriefDescription": "This event is deprecated. Refer to new event OCR.DEMAND_RFO.L3_HIT_E.HITM_OTHER_CORE",
+        "BriefDescription": "This event is deprecated. Refer to new event OCR.PF_L1D_AND_SW.L3_HIT_S.SNOOP_MISS",
         "Counter": "0,1,2,3",
         "CounterHTOff": "0,1,2,3",
         "Deprecated": "1",
         "EventCode": "0xB7, 0xBB",
-        "EventName": "OFFCORE_RESPONSE.DEMAND_RFO.L3_HIT_E.HITM_OTHER_CORE",
+        "EventName": "OFFCORE_RESPONSE.PF_L1D_AND_SW.L3_HIT_S.SNOOP_MISS",
         "MSRIndex": "0x1a6,0x1a7",
-        "MSRValue": "0x1000080002",
+        "MSRValue": "0x0200100400",
         "Offcore": "1",
         "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
         "SampleAfterValue": "100003",
         "UMask": "0x1"
     },
     {
-        "BriefDescription": "Retired load instructions with locked access.",
-        "Counter": "0,1,2,3",
-        "CounterHTOff": "0,1,2,3",
-        "Data_LA": "1",
-        "EventCode": "0xD0",
-        "EventName": "MEM_INST_RETIRED.LOCK_LOADS",
-        "PEBS": "1",
-        "SampleAfterValue": "100007",
-        "UMask": "0x21"
-    },
-    {
-        "BriefDescription": "This event is deprecated. Refer to new event OCR.DEMAND_RFO.L3_HIT.HITM_OTHER_CORE",
+        "BriefDescription": "This event is deprecated. Refer to new event OCR.PF_L1D_AND_SW.L3_HIT_S.SNOOP_NONE",
         "Counter": "0,1,2,3",
         "CounterHTOff": "0,1,2,3",
         "Deprecated": "1",
         "EventCode": "0xB7, 0xBB",
-        "EventName": "OFFCORE_RESPONSE.DEMAND_RFO.L3_HIT.HITM_OTHER_CORE",
+        "EventName": "OFFCORE_RESPONSE.PF_L1D_AND_SW.L3_HIT_S.SNOOP_NONE",
         "MSRIndex": "0x1a6,0x1a7",
-        "MSRValue": "0x10003C0002",
+        "MSRValue": "0x0080100400",
         "Offcore": "1",
         "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
         "SampleAfterValue": "100003",
         "UMask": "0x1"
     },
     {
-        "BriefDescription": "This event is deprecated. Refer to new event OCR.DEMAND_DATA_RD.L3_HIT.HIT_OTHER_CORE_NO_FWD",
+        "BriefDescription": "This event is deprecated. Refer to new event OCR.PF_L1D_AND_SW.PMM_HIT_LOCAL_PMM.ANY_SNOOP",
         "Counter": "0,1,2,3",
         "CounterHTOff": "0,1,2,3",
         "Deprecated": "1",
         "EventCode": "0xB7, 0xBB",
-        "EventName": "OFFCORE_RESPONSE.DEMAND_DATA_RD.L3_HIT.HIT_OTHER_CORE_NO_FWD",
+        "EventName": "OFFCORE_RESPONSE.PF_L1D_AND_SW.PMM_HIT_LOCAL_PMM.ANY_SNOOP",
         "MSRIndex": "0x1a6,0x1a7",
-        "MSRValue": "0x04003C0001",
+        "MSRValue": "0x3F80400400",
         "Offcore": "1",
         "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
         "SampleAfterValue": "100003",
         "UMask": "0x1"
     },
     {
-        "BriefDescription": "This event is deprecated. Refer to new event OCR.PF_L3_RFO.L3_HIT_E.HITM_OTHER_CORE",
+        "BriefDescription": "This event is deprecated. Refer to new event OCR.PF_L1D_AND_SW.PMM_HIT_LOCAL_PMM.SNOOP_NONE",
         "Counter": "0,1,2,3",
         "CounterHTOff": "0,1,2,3",
         "Deprecated": "1",
         "EventCode": "0xB7, 0xBB",
-        "EventName": "OFFCORE_RESPONSE.PF_L3_RFO.L3_HIT_E.HITM_OTHER_CORE",
+        "EventName": "OFFCORE_RESPONSE.PF_L1D_AND_SW.PMM_HIT_LOCAL_PMM.SNOOP_NONE",
         "MSRIndex": "0x1a6,0x1a7",
-        "MSRValue": "0x1000080100",
+        "MSRValue": "0x0080400400",
         "Offcore": "1",
         "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
         "SampleAfterValue": "100003",
         "UMask": "0x1"
     },
     {
-        "BriefDescription": "This event is deprecated. Refer to new event OCR.PF_L2_RFO.L3_HIT_M.SNOOP_NONE",
+        "BriefDescription": "This event is deprecated. Refer to new event OCR.PF_L1D_AND_SW.PMM_HIT_LOCAL_PMM.SNOOP_NOT_NEEDED",
         "Counter": "0,1,2,3",
         "CounterHTOff": "0,1,2,3",
         "Deprecated": "1",
         "EventCode": "0xB7, 0xBB",
-        "EventName": "OFFCORE_RESPONSE.PF_L2_RFO.L3_HIT_M.SNOOP_NONE",
+        "EventName": "OFFCORE_RESPONSE.PF_L1D_AND_SW.PMM_HIT_LOCAL_PMM.SNOOP_NOT_NEEDED",
         "MSRIndex": "0x1a6,0x1a7",
-        "MSRValue": "0x0080040020",
+        "MSRValue": "0x0100400400",
         "Offcore": "1",
         "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
         "SampleAfterValue": "100003",
         "UMask": "0x1"
     },
     {
-        "BriefDescription": "This event is deprecated. Refer to new event OCR.PF_L1D_AND_SW.L3_HIT_F.HIT_OTHER_CORE_FWD",
+        "BriefDescription": "This event is deprecated. Refer to new event OCR.PF_L1D_AND_SW.SUPPLIER_NONE.ANY_SNOOP",
         "Counter": "0,1,2,3",
         "CounterHTOff": "0,1,2,3",
         "Deprecated": "1",
         "EventCode": "0xB7, 0xBB",
-        "EventName": "OFFCORE_RESPONSE.PF_L1D_AND_SW.L3_HIT_F.HIT_OTHER_CORE_FWD",
+        "EventName": "OFFCORE_RESPONSE.PF_L1D_AND_SW.SUPPLIER_NONE.ANY_SNOOP",
         "MSRIndex": "0x1a6,0x1a7",
-        "MSRValue": "0x0800200400",
+        "MSRValue": "0x3F80020400",
         "Offcore": "1",
         "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
         "SampleAfterValue": "100003",
         "UMask": "0x1"
     },
     {
-        "BriefDescription": "All retired store instructions.",
-        "Counter": "0,1,2,3",
-        "CounterHTOff": "0,1,2,3",
-        "Data_LA": "1",
-        "EventCode": "0xD0",
-        "EventName": "MEM_INST_RETIRED.ALL_STORES",
-        "L1_Hit_Indication": "1",
-        "PEBS": "1",
-        "SampleAfterValue": "2000003",
-        "UMask": "0x82"
-    },
-    {
-        "BriefDescription": "This event is deprecated. Refer to new event OCR.PF_L1D_AND_SW.L3_HIT_E.SNOOP_MISS",
+        "BriefDescription": "This event is deprecated. Refer to new event OCR.PF_L1D_AND_SW.SUPPLIER_NONE.HITM_OTHER_CORE",
         "Counter": "0,1,2,3",
         "CounterHTOff": "0,1,2,3",
         "Deprecated": "1",
         "EventCode": "0xB7, 0xBB",
-        "EventName": "OFFCORE_RESPONSE.PF_L1D_AND_SW.L3_HIT_E.SNOOP_MISS",
+        "EventName": "OFFCORE_RESPONSE.PF_L1D_AND_SW.SUPPLIER_NONE.HITM_OTHER_CORE",
         "MSRIndex": "0x1a6,0x1a7",
-        "MSRValue": "0x0200080400",
+        "MSRValue": "0x1000020400",
         "Offcore": "1",
         "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
         "SampleAfterValue": "100003",
         "UMask": "0x1"
     },
     {
-        "BriefDescription": "This event is deprecated. Refer to new event OCR.DEMAND_DATA_RD.L3_HIT.NO_SNOOP_NEEDED",
+        "BriefDescription": "This event is deprecated. Refer to new event OCR.PF_L1D_AND_SW.SUPPLIER_NONE.HIT_OTHER_CORE_FWD",
         "Counter": "0,1,2,3",
         "CounterHTOff": "0,1,2,3",
         "Deprecated": "1",
         "EventCode": "0xB7, 0xBB",
-        "EventName": "OFFCORE_RESPONSE.DEMAND_DATA_RD.L3_HIT.NO_SNOOP_NEEDED",
+        "EventName": "OFFCORE_RESPONSE.PF_L1D_AND_SW.SUPPLIER_NONE.HIT_OTHER_CORE_FWD",
         "MSRIndex": "0x1a6,0x1a7",
-        "MSRValue": "0x01003C0001",
+        "MSRValue": "0x0800020400",
         "Offcore": "1",
         "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
         "SampleAfterValue": "100003",
         "UMask": "0x1"
     },
     {
-        "BriefDescription": "This event is deprecated. Refer to new event OCR.DEMAND_CODE_RD.L3_HIT_E.HIT_OTHER_CORE_NO_FWD",
+        "BriefDescription": "This event is deprecated. Refer to new event OCR.PF_L1D_AND_SW.SUPPLIER_NONE.HIT_OTHER_CORE_NO_FWD",
         "Counter": "0,1,2,3",
         "CounterHTOff": "0,1,2,3",
         "Deprecated": "1",
         "EventCode": "0xB7, 0xBB",
-        "EventName": "OFFCORE_RESPONSE.DEMAND_CODE_RD.L3_HIT_E.HIT_OTHER_CORE_NO_FWD",
+        "EventName": "OFFCORE_RESPONSE.PF_L1D_AND_SW.SUPPLIER_NONE.HIT_OTHER_CORE_NO_FWD",
         "MSRIndex": "0x1a6,0x1a7",
-        "MSRValue": "0x0400080004",
+        "MSRValue": "0x0400020400",
         "Offcore": "1",
         "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
         "SampleAfterValue": "100003",
         "UMask": "0x1"
     },
     {
-        "BriefDescription": "This event is deprecated. Refer to new event OCR.ALL_PF_RFO.L3_HIT_S.NO_SNOOP_NEEDED",
+        "BriefDescription": "This event is deprecated. Refer to new event OCR.PF_L1D_AND_SW.SUPPLIER_NONE.NO_SNOOP_NEEDED",
         "Counter": "0,1,2,3",
         "CounterHTOff": "0,1,2,3",
         "Deprecated": "1",
         "EventCode": "0xB7, 0xBB",
-        "EventName": "OFFCORE_RESPONSE.ALL_PF_RFO.L3_HIT_S.NO_SNOOP_NEEDED",
+        "EventName": "OFFCORE_RESPONSE.PF_L1D_AND_SW.SUPPLIER_NONE.NO_SNOOP_NEEDED",
         "MSRIndex": "0x1a6,0x1a7",
-        "MSRValue": "0x0100100120",
+        "MSRValue": "0x0100020400",
         "Offcore": "1",
         "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
         "SampleAfterValue": "100003",
         "UMask": "0x1"
     },
     {
-        "BriefDescription": "Retired load instructions which data sources missed L3 but serviced from local dram",
+        "BriefDescription": "This event is deprecated. Refer to new event OCR.PF_L1D_AND_SW.SUPPLIER_NONE.SNOOP_MISS",
         "Counter": "0,1,2,3",
         "CounterHTOff": "0,1,2,3",
-        "Data_LA": "1",
-        "EventCode": "0xD3",
-        "EventName": "MEM_LOAD_L3_MISS_RETIRED.LOCAL_DRAM",
-        "PEBS": "1",
-        "PublicDescription": "Retired load instructions which data sources missed L3 but serviced from local DRAM.",
-        "SampleAfterValue": "100007",
+        "Deprecated": "1",
+        "EventCode": "0xB7, 0xBB",
+        "EventName": "OFFCORE_RESPONSE.PF_L1D_AND_SW.SUPPLIER_NONE.SNOOP_MISS",
+        "MSRIndex": "0x1a6,0x1a7",
+        "MSRValue": "0x0200020400",
+        "Offcore": "1",
+        "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
+        "SampleAfterValue": "100003",
         "UMask": "0x1"
     },
     {
-        "BriefDescription": "This event is deprecated. Refer to new event OCR.ALL_DATA_RD.SUPPLIER_NONE.HIT_OTHER_CORE_FWD",
+        "BriefDescription": "This event is deprecated. Refer to new event OCR.PF_L1D_AND_SW.SUPPLIER_NONE.SNOOP_NONE",
         "Counter": "0,1,2,3",
         "CounterHTOff": "0,1,2,3",
         "Deprecated": "1",
         "EventCode": "0xB7, 0xBB",
-        "EventName": "OFFCORE_RESPONSE.ALL_DATA_RD.SUPPLIER_NONE.HIT_OTHER_CORE_FWD",
+        "EventName": "OFFCORE_RESPONSE.PF_L1D_AND_SW.SUPPLIER_NONE.SNOOP_NONE",
         "MSRIndex": "0x1a6,0x1a7",
-        "MSRValue": "0x0800020491",
+        "MSRValue": "0x0080020400",
         "Offcore": "1",
         "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
         "SampleAfterValue": "100003",
         "UMask": "0x1"
     },
     {
-        "BriefDescription": "This event is deprecated. Refer to new event OCR.DEMAND_CODE_RD.L3_HIT_M.HIT_OTHER_CORE_NO_FWD",
+        "BriefDescription": "This event is deprecated. Refer to new event OCR.PF_L2_DATA_RD.ANY_RESPONSE",
         "Counter": "0,1,2,3",
         "CounterHTOff": "0,1,2,3",
         "Deprecated": "1",
         "EventCode": "0xB7, 0xBB",
-        "EventName": "OFFCORE_RESPONSE.DEMAND_CODE_RD.L3_HIT_M.HIT_OTHER_CORE_NO_FWD",
+        "EventName": "OFFCORE_RESPONSE.PF_L2_DATA_RD.ANY_RESPONSE",
         "MSRIndex": "0x1a6,0x1a7",
-        "MSRValue": "0x0400040004",
+        "MSRValue": "0x0000010010",
         "Offcore": "1",
         "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
         "SampleAfterValue": "100003",
         "UMask": "0x1"
     },
     {
-        "BriefDescription": "This event is deprecated. Refer to new event OCR.PF_L3_DATA_RD.L3_HIT_S.HITM_OTHER_CORE",
+        "BriefDescription": "This event is deprecated. Refer to new event OCR.PF_L2_DATA_RD.L3_HIT.ANY_SNOOP",
         "Counter": "0,1,2,3",
         "CounterHTOff": "0,1,2,3",
         "Deprecated": "1",
         "EventCode": "0xB7, 0xBB",
-        "EventName": "OFFCORE_RESPONSE.PF_L3_DATA_RD.L3_HIT_S.HITM_OTHER_CORE",
+        "EventName": "OFFCORE_RESPONSE.PF_L2_DATA_RD.L3_HIT.ANY_SNOOP",
         "MSRIndex": "0x1a6,0x1a7",
-        "MSRValue": "0x1000100080",
+        "MSRValue": "0x3F803C0010",
         "Offcore": "1",
         "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
         "SampleAfterValue": "100003",
         "UMask": "0x1"
     },
     {
-        "BriefDescription": "This event is deprecated. Refer to new event OCR.ALL_PF_RFO.PMM_HIT_LOCAL_PMM.SNOOP_NONE",
+        "BriefDescription": "This event is deprecated. Refer to new event OCR.PF_L2_DATA_RD.L3_HIT.HITM_OTHER_CORE",
         "Counter": "0,1,2,3",
         "CounterHTOff": "0,1,2,3",
         "Deprecated": "1",
         "EventCode": "0xB7, 0xBB",
-        "EventName": "OFFCORE_RESPONSE.ALL_PF_RFO.PMM_HIT_LOCAL_PMM.SNOOP_NONE",
+        "EventName": "OFFCORE_RESPONSE.PF_L2_DATA_RD.L3_HIT.HITM_OTHER_CORE",
         "MSRIndex": "0x1a6,0x1a7",
-        "MSRValue": "0x0080400120",
+        "MSRValue": "0x10003C0010",
         "Offcore": "1",
         "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
         "SampleAfterValue": "100003",
         "UMask": "0x1"
     },
     {
-        "BriefDescription": "This event is deprecated. Refer to new event OCR.DEMAND_CODE_RD.L3_HIT_M.HIT_OTHER_CORE_FWD",
+        "BriefDescription": "This event is deprecated. Refer to new event OCR.PF_L2_DATA_RD.L3_HIT.HIT_OTHER_CORE_FWD",
         "Counter": "0,1,2,3",
         "CounterHTOff": "0,1,2,3",
         "Deprecated": "1",
         "EventCode": "0xB7, 0xBB",
-        "EventName": "OFFCORE_RESPONSE.DEMAND_CODE_RD.L3_HIT_M.HIT_OTHER_CORE_FWD",
+        "EventName": "OFFCORE_RESPONSE.PF_L2_DATA_RD.L3_HIT.HIT_OTHER_CORE_FWD",
         "MSRIndex": "0x1a6,0x1a7",
-        "MSRValue": "0x0800040004",
+        "MSRValue": "0x08003C0010",
         "Offcore": "1",
         "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
         "SampleAfterValue": "100003",
         "UMask": "0x1"
     },
     {
-        "BriefDescription": "This event is deprecated. Refer to new event OCR.ALL_READS.SUPPLIER_NONE.HIT_OTHER_CORE_NO_FWD",
+        "BriefDescription": "This event is deprecated. Refer to new event OCR.PF_L2_DATA_RD.L3_HIT.HIT_OTHER_CORE_NO_FWD",
         "Counter": "0,1,2,3",
         "CounterHTOff": "0,1,2,3",
         "Deprecated": "1",
         "EventCode": "0xB7, 0xBB",
-        "EventName": "OFFCORE_RESPONSE.ALL_READS.SUPPLIER_NONE.HIT_OTHER_CORE_NO_FWD",
+        "EventName": "OFFCORE_RESPONSE.PF_L2_DATA_RD.L3_HIT.HIT_OTHER_CORE_NO_FWD",
         "MSRIndex": "0x1a6,0x1a7",
-        "MSRValue": "0x04000207F7",
+        "MSRValue": "0x04003C0010",
         "Offcore": "1",
         "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
         "SampleAfterValue": "100003",
         "UMask": "0x1"
     },
     {
-        "BriefDescription": "This event is deprecated. Refer to new event OCR.ALL_PF_DATA_RD.SUPPLIER_NONE.HITM_OTHER_CORE",
+        "BriefDescription": "This event is deprecated. Refer to new event OCR.PF_L2_DATA_RD.L3_HIT.NO_SNOOP_NEEDED",
         "Counter": "0,1,2,3",
         "CounterHTOff": "0,1,2,3",
         "Deprecated": "1",
         "EventCode": "0xB7, 0xBB",
-        "EventName": "OFFCORE_RESPONSE.ALL_PF_DATA_RD.SUPPLIER_NONE.HITM_OTHER_CORE",
+        "EventName": "OFFCORE_RESPONSE.PF_L2_DATA_RD.L3_HIT.NO_SNOOP_NEEDED",
         "MSRIndex": "0x1a6,0x1a7",
-        "MSRValue": "0x1000020490",
+        "MSRValue": "0x01003C0010",
         "Offcore": "1",
         "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
         "SampleAfterValue": "100003",
         "UMask": "0x1"
     },
     {
-        "BriefDescription": "This event is deprecated. Refer to new event OCR.ALL_PF_DATA_RD.L3_HIT_M.HITM_OTHER_CORE",
+        "BriefDescription": "This event is deprecated. Refer to new event OCR.PF_L2_DATA_RD.L3_HIT.SNOOP_HIT_WITH_FWD",
         "Counter": "0,1,2,3",
         "CounterHTOff": "0,1,2,3",
         "Deprecated": "1",
         "EventCode": "0xB7, 0xBB",
-        "EventName": "OFFCORE_RESPONSE.ALL_PF_DATA_RD.L3_HIT_M.HITM_OTHER_CORE",
+        "EventName": "OFFCORE_RESPONSE.PF_L2_DATA_RD.L3_HIT.SNOOP_HIT_WITH_FWD",
         "MSRIndex": "0x1a6,0x1a7",
-        "MSRValue": "0x1000040490",
+        "MSRValue": "0x08007C0010",
         "Offcore": "1",
         "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
         "SampleAfterValue": "100003",
         "UMask": "0x1"
     },
     {
-        "BriefDescription": "This event is deprecated. Refer to new event OCR.DEMAND_RFO.L3_HIT_E.HIT_OTHER_CORE_FWD",
+        "BriefDescription": "This event is deprecated. Refer to new event OCR.PF_L2_DATA_RD.L3_HIT.SNOOP_MISS",
         "Counter": "0,1,2,3",
         "CounterHTOff": "0,1,2,3",
         "Deprecated": "1",
         "EventCode": "0xB7, 0xBB",
-        "EventName": "OFFCORE_RESPONSE.DEMAND_RFO.L3_HIT_E.HIT_OTHER_CORE_FWD",
+        "EventName": "OFFCORE_RESPONSE.PF_L2_DATA_RD.L3_HIT.SNOOP_MISS",
         "MSRIndex": "0x1a6,0x1a7",
-        "MSRValue": "0x0800080002",
+        "MSRValue": "0x02003C0010",
         "Offcore": "1",
         "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
         "SampleAfterValue": "100003",
         "UMask": "0x1"
     },
     {
-        "BriefDescription": "This event is deprecated. Refer to new event OCR.OTHER.L3_HIT_S.NO_SNOOP_NEEDED",
+        "BriefDescription": "This event is deprecated. Refer to new event OCR.PF_L2_DATA_RD.L3_HIT.SNOOP_NONE",
         "Counter": "0,1,2,3",
         "CounterHTOff": "0,1,2,3",
         "Deprecated": "1",
         "EventCode": "0xB7, 0xBB",
-        "EventName": "OFFCORE_RESPONSE.OTHER.L3_HIT_S.NO_SNOOP_NEEDED",
+        "EventName": "OFFCORE_RESPONSE.PF_L2_DATA_RD.L3_HIT.SNOOP_NONE",
         "MSRIndex": "0x1a6,0x1a7",
-        "MSRValue": "0x0100108000",
+        "MSRValue": "0x00803C0010",
         "Offcore": "1",
         "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
         "SampleAfterValue": "100003",
         "UMask": "0x1"
     },
     {
-        "BriefDescription": "This event is deprecated. Refer to new event OCR.PF_L1D_AND_SW.L3_HIT_M.ANY_SNOOP",
+        "BriefDescription": "This event is deprecated. Refer to new event OCR.PF_L2_DATA_RD.L3_HIT_E.ANY_SNOOP",
         "Counter": "0,1,2,3",
         "CounterHTOff": "0,1,2,3",
         "Deprecated": "1",
         "EventCode": "0xB7, 0xBB",
-        "EventName": "OFFCORE_RESPONSE.PF_L1D_AND_SW.L3_HIT_M.ANY_SNOOP",
+        "EventName": "OFFCORE_RESPONSE.PF_L2_DATA_RD.L3_HIT_E.ANY_SNOOP",
         "MSRIndex": "0x1a6,0x1a7",
-        "MSRValue": "0x3F80040400",
+        "MSRValue": "0x3F80080010",
         "Offcore": "1",
         "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
         "SampleAfterValue": "100003",
         "UMask": "0x1"
     },
     {
-        "BriefDescription": "This event is deprecated. Refer to new event OCR.DEMAND_RFO.L3_HIT_F.HITM_OTHER_CORE",
+        "BriefDescription": "This event is deprecated. Refer to new event OCR.PF_L2_DATA_RD.L3_HIT_E.HITM_OTHER_CORE",
         "Counter": "0,1,2,3",
         "CounterHTOff": "0,1,2,3",
         "Deprecated": "1",
         "EventCode": "0xB7, 0xBB",
-        "EventName": "OFFCORE_RESPONSE.DEMAND_RFO.L3_HIT_F.HITM_OTHER_CORE",
+        "EventName": "OFFCORE_RESPONSE.PF_L2_DATA_RD.L3_HIT_E.HITM_OTHER_CORE",
         "MSRIndex": "0x1a6,0x1a7",
-        "MSRValue": "0x1000200002",
+        "MSRValue": "0x1000080010",
         "Offcore": "1",
         "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
         "SampleAfterValue": "100003",
         "UMask": "0x1"
     },
     {
-        "BriefDescription": "This event is deprecated. Refer to new event OCR.DEMAND_DATA_RD.L3_HIT_M.HITM_OTHER_CORE",
+        "BriefDescription": "This event is deprecated. Refer to new event OCR.PF_L2_DATA_RD.L3_HIT_E.HIT_OTHER_CORE_FWD",
         "Counter": "0,1,2,3",
         "CounterHTOff": "0,1,2,3",
         "Deprecated": "1",
         "EventCode": "0xB7, 0xBB",
-        "EventName": "OFFCORE_RESPONSE.DEMAND_DATA_RD.L3_HIT_M.HITM_OTHER_CORE",
+        "EventName": "OFFCORE_RESPONSE.PF_L2_DATA_RD.L3_HIT_E.HIT_OTHER_CORE_FWD",
         "MSRIndex": "0x1a6,0x1a7",
-        "MSRValue": "0x1000040001",
+        "MSRValue": "0x0800080010",
         "Offcore": "1",
         "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
         "SampleAfterValue": "100003",
         "UMask": "0x1"
     },
     {
-        "BriefDescription": "This event is deprecated. Refer to new event OCR.PF_L2_RFO.L3_HIT_E.HIT_OTHER_CORE_FWD",
+        "BriefDescription": "This event is deprecated. Refer to new event OCR.PF_L2_DATA_RD.L3_HIT_E.HIT_OTHER_CORE_NO_FWD",
         "Counter": "0,1,2,3",
         "CounterHTOff": "0,1,2,3",
         "Deprecated": "1",
         "EventCode": "0xB7, 0xBB",
-        "EventName": "OFFCORE_RESPONSE.PF_L2_RFO.L3_HIT_E.HIT_OTHER_CORE_FWD",
+        "EventName": "OFFCORE_RESPONSE.PF_L2_DATA_RD.L3_HIT_E.HIT_OTHER_CORE_NO_FWD",
         "MSRIndex": "0x1a6,0x1a7",
-        "MSRValue": "0x0800080020",
+        "MSRValue": "0x0400080010",
         "Offcore": "1",
         "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
         "SampleAfterValue": "100003",
         "UMask": "0x1"
     },
     {
-        "BriefDescription": "This event is deprecated. Refer to new event OCR.PF_L2_RFO.L3_HIT.HITM_OTHER_CORE",
+        "BriefDescription": "This event is deprecated. Refer to new event OCR.PF_L2_DATA_RD.L3_HIT_E.NO_SNOOP_NEEDED",
         "Counter": "0,1,2,3",
         "CounterHTOff": "0,1,2,3",
         "Deprecated": "1",
         "EventCode": "0xB7, 0xBB",
-        "EventName": "OFFCORE_RESPONSE.PF_L2_RFO.L3_HIT.HITM_OTHER_CORE",
+        "EventName": "OFFCORE_RESPONSE.PF_L2_DATA_RD.L3_HIT_E.NO_SNOOP_NEEDED",
         "MSRIndex": "0x1a6,0x1a7",
-        "MSRValue": "0x10003C0020",
+        "MSRValue": "0x0100080010",
         "Offcore": "1",
         "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
         "SampleAfterValue": "100003",
         "UMask": "0x1"
     },
     {
-        "BriefDescription": "This event is deprecated. Refer to new event OCR.ALL_RFO.L3_HIT_E.SNOOP_NONE",
+        "BriefDescription": "This event is deprecated. Refer to new event OCR.PF_L2_DATA_RD.L3_HIT_E.SNOOP_MISS",
         "Counter": "0,1,2,3",
         "CounterHTOff": "0,1,2,3",
         "Deprecated": "1",
         "EventCode": "0xB7, 0xBB",
-        "EventName": "OFFCORE_RESPONSE.ALL_RFO.L3_HIT_E.SNOOP_NONE",
+        "EventName": "OFFCORE_RESPONSE.PF_L2_DATA_RD.L3_HIT_E.SNOOP_MISS",
         "MSRIndex": "0x1a6,0x1a7",
-        "MSRValue": "0x0080080122",
+        "MSRValue": "0x0200080010",
         "Offcore": "1",
         "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
         "SampleAfterValue": "100003",
         "UMask": "0x1"
     },
     {
-        "BriefDescription": "This event is deprecated. Refer to new event OCR.DEMAND_RFO.L3_HIT_M.SNOOP_NONE",
+        "BriefDescription": "This event is deprecated. Refer to new event OCR.PF_L2_DATA_RD.L3_HIT_E.SNOOP_NONE",
         "Counter": "0,1,2,3",
         "CounterHTOff": "0,1,2,3",
         "Deprecated": "1",
         "EventCode": "0xB7, 0xBB",
-        "EventName": "OFFCORE_RESPONSE.DEMAND_RFO.L3_HIT_M.SNOOP_NONE",
+        "EventName": "OFFCORE_RESPONSE.PF_L2_DATA_RD.L3_HIT_E.SNOOP_NONE",
         "MSRIndex": "0x1a6,0x1a7",
-        "MSRValue": "0x0080040002",
+        "MSRValue": "0x0080080010",
         "Offcore": "1",
         "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
         "SampleAfterValue": "100003",
         "UMask": "0x1"
     },
     {
-        "BriefDescription": "This event is deprecated. Refer to new event OCR.OTHER.L3_HIT_M.HITM_OTHER_CORE",
+        "BriefDescription": "This event is deprecated. Refer to new event OCR.PF_L2_DATA_RD.L3_HIT_F.ANY_SNOOP",
         "Counter": "0,1,2,3",
         "CounterHTOff": "0,1,2,3",
         "Deprecated": "1",
         "EventCode": "0xB7, 0xBB",
-        "EventName": "OFFCORE_RESPONSE.OTHER.L3_HIT_M.HITM_OTHER_CORE",
+        "EventName": "OFFCORE_RESPONSE.PF_L2_DATA_RD.L3_HIT_F.ANY_SNOOP",
         "MSRIndex": "0x1a6,0x1a7",
-        "MSRValue": "0x1000048000",
+        "MSRValue": "0x3F80200010",
         "Offcore": "1",
         "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
         "SampleAfterValue": "100003",
         "UMask": "0x1"
     },
     {
-        "BriefDescription": "This event is deprecated. Refer to new event OCR.ALL_READS.L3_HIT_E.NO_SNOOP_NEEDED",
+        "BriefDescription": "This event is deprecated. Refer to new event OCR.PF_L2_DATA_RD.L3_HIT_F.HITM_OTHER_CORE",
         "Counter": "0,1,2,3",
         "CounterHTOff": "0,1,2,3",
         "Deprecated": "1",
         "EventCode": "0xB7, 0xBB",
-        "EventName": "OFFCORE_RESPONSE.ALL_READS.L3_HIT_E.NO_SNOOP_NEEDED",
+        "EventName": "OFFCORE_RESPONSE.PF_L2_DATA_RD.L3_HIT_F.HITM_OTHER_CORE",
         "MSRIndex": "0x1a6,0x1a7",
-        "MSRValue": "0x01000807F7",
+        "MSRValue": "0x1000200010",
         "Offcore": "1",
         "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
         "SampleAfterValue": "100003",
         "UMask": "0x1"
     },
     {
-        "BriefDescription": "This event is deprecated. Refer to new event OCR.DEMAND_CODE_RD.SUPPLIER_NONE.HIT_OTHER_CORE_FWD",
+        "BriefDescription": "This event is deprecated. Refer to new event OCR.PF_L2_DATA_RD.L3_HIT_F.HIT_OTHER_CORE_FWD",
         "Counter": "0,1,2,3",
         "CounterHTOff": "0,1,2,3",
         "Deprecated": "1",
         "EventCode": "0xB7, 0xBB",
-        "EventName": "OFFCORE_RESPONSE.DEMAND_CODE_RD.SUPPLIER_NONE.HIT_OTHER_CORE_FWD",
+        "EventName": "OFFCORE_RESPONSE.PF_L2_DATA_RD.L3_HIT_F.HIT_OTHER_CORE_FWD",
         "MSRIndex": "0x1a6,0x1a7",
-        "MSRValue": "0x0800020004",
+        "MSRValue": "0x0800200010",
         "Offcore": "1",
         "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
         "SampleAfterValue": "100003",
         "UMask": "0x1"
     },
     {
-        "BriefDescription": "This event is deprecated. Refer to new event OCR.DEMAND_DATA_RD.SUPPLIER_NONE.SNOOP_NONE",
+        "BriefDescription": "This event is deprecated. Refer to new event OCR.PF_L2_DATA_RD.L3_HIT_F.HIT_OTHER_CORE_NO_FWD",
         "Counter": "0,1,2,3",
         "CounterHTOff": "0,1,2,3",
         "Deprecated": "1",
         "EventCode": "0xB7, 0xBB",
-        "EventName": "OFFCORE_RESPONSE.DEMAND_DATA_RD.SUPPLIER_NONE.SNOOP_NONE",
+        "EventName": "OFFCORE_RESPONSE.PF_L2_DATA_RD.L3_HIT_F.HIT_OTHER_CORE_NO_FWD",
         "MSRIndex": "0x1a6,0x1a7",
-        "MSRValue": "0x0080020001",
+        "MSRValue": "0x0400200010",
         "Offcore": "1",
         "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
         "SampleAfterValue": "100003",
         "UMask": "0x1"
     },
     {
-        "BriefDescription": "Demand RFO requests including regular RFOs, locks, ItoM",
-        "Counter": "0,1,2,3",
-        "CounterHTOff": "0,1,2,3,4,5,6,7",
-        "EventCode": "0xB0",
-        "EventName": "OFFCORE_REQUESTS.DEMAND_RFO",
-        "PublicDescription": "Counts the demand RFO (read for ownership) requests including regular RFOs, locks, ItoM.",
-        "SampleAfterValue": "100003",
-        "UMask": "0x4"
-    },
-    {
-        "BriefDescription": "This event is deprecated. Refer to new event OCR.ALL_DATA_RD.L3_HIT_M.SNOOP_NONE",
+        "BriefDescription": "This event is deprecated. Refer to new event OCR.PF_L2_DATA_RD.L3_HIT_F.NO_SNOOP_NEEDED",
         "Counter": "0,1,2,3",
         "CounterHTOff": "0,1,2,3",
         "Deprecated": "1",
         "EventCode": "0xB7, 0xBB",
-        "EventName": "OFFCORE_RESPONSE.ALL_DATA_RD.L3_HIT_M.SNOOP_NONE",
+        "EventName": "OFFCORE_RESPONSE.PF_L2_DATA_RD.L3_HIT_F.NO_SNOOP_NEEDED",
         "MSRIndex": "0x1a6,0x1a7",
-        "MSRValue": "0x0080040491",
+        "MSRValue": "0x0100200010",
         "Offcore": "1",
         "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
         "SampleAfterValue": "100003",
         "UMask": "0x1"
     },
     {
-        "BriefDescription": "This event is deprecated. Refer to new event OCR.PF_L2_DATA_RD.SUPPLIER_NONE.ANY_SNOOP",
+        "BriefDescription": "This event is deprecated. Refer to new event OCR.PF_L2_DATA_RD.L3_HIT_F.SNOOP_MISS",
         "Counter": "0,1,2,3",
         "CounterHTOff": "0,1,2,3",
         "Deprecated": "1",
         "EventCode": "0xB7, 0xBB",
-        "EventName": "OFFCORE_RESPONSE.PF_L2_DATA_RD.SUPPLIER_NONE.ANY_SNOOP",
+        "EventName": "OFFCORE_RESPONSE.PF_L2_DATA_RD.L3_HIT_F.SNOOP_MISS",
         "MSRIndex": "0x1a6,0x1a7",
-        "MSRValue": "0x3F80020010",
+        "MSRValue": "0x0200200010",
         "Offcore": "1",
         "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
         "SampleAfterValue": "100003",
         "UMask": "0x1"
     },
     {
-        "BriefDescription": "This event is deprecated. Refer to new event OCR.PF_L3_RFO.L3_HIT_F.HIT_OTHER_CORE_NO_FWD",
+        "BriefDescription": "This event is deprecated. Refer to new event OCR.PF_L2_DATA_RD.L3_HIT_F.SNOOP_NONE",
         "Counter": "0,1,2,3",
         "CounterHTOff": "0,1,2,3",
         "Deprecated": "1",
         "EventCode": "0xB7, 0xBB",
-        "EventName": "OFFCORE_RESPONSE.PF_L3_RFO.L3_HIT_F.HIT_OTHER_CORE_NO_FWD",
+        "EventName": "OFFCORE_RESPONSE.PF_L2_DATA_RD.L3_HIT_F.SNOOP_NONE",
         "MSRIndex": "0x1a6,0x1a7",
-        "MSRValue": "0x0400200100",
+        "MSRValue": "0x0080200010",
         "Offcore": "1",
         "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
         "SampleAfterValue": "100003",
         "UMask": "0x1"
     },
     {
-        "BriefDescription": "This event is deprecated. Refer to new event OCR.PF_L3_DATA_RD.L3_HIT_S.NO_SNOOP_NEEDED",
+        "BriefDescription": "This event is deprecated. Refer to new event OCR.PF_L2_DATA_RD.L3_HIT_M.ANY_SNOOP",
         "Counter": "0,1,2,3",
         "CounterHTOff": "0,1,2,3",
         "Deprecated": "1",
         "EventCode": "0xB7, 0xBB",
-        "EventName": "OFFCORE_RESPONSE.PF_L3_DATA_RD.L3_HIT_S.NO_SNOOP_NEEDED",
+        "EventName": "OFFCORE_RESPONSE.PF_L2_DATA_RD.L3_HIT_M.ANY_SNOOP",
         "MSRIndex": "0x1a6,0x1a7",
-        "MSRValue": "0x0100100080",
+        "MSRValue": "0x3F80040010",
         "Offcore": "1",
         "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
         "SampleAfterValue": "100003",
         "UMask": "0x1"
     },
     {
-        "BriefDescription": "This event is deprecated. Refer to new event OCR.DEMAND_CODE_RD.L3_HIT_M.HITM_OTHER_CORE",
+        "BriefDescription": "This event is deprecated. Refer to new event OCR.PF_L2_DATA_RD.L3_HIT_M.HITM_OTHER_CORE",
         "Counter": "0,1,2,3",
         "CounterHTOff": "0,1,2,3",
         "Deprecated": "1",
         "EventCode": "0xB7, 0xBB",
-        "EventName": "OFFCORE_RESPONSE.DEMAND_CODE_RD.L3_HIT_M.HITM_OTHER_CORE",
+        "EventName": "OFFCORE_RESPONSE.PF_L2_DATA_RD.L3_HIT_M.HITM_OTHER_CORE",
         "MSRIndex": "0x1a6,0x1a7",
-        "MSRValue": "0x1000040004",
+        "MSRValue": "0x1000040010",
         "Offcore": "1",
         "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
         "SampleAfterValue": "100003",
         "UMask": "0x1"
     },
     {
-        "BriefDescription": "This event is deprecated. Refer to new event OCR.PF_L3_DATA_RD.L3_HIT_M.HIT_OTHER_CORE_FWD",
+        "BriefDescription": "This event is deprecated. Refer to new event OCR.PF_L2_DATA_RD.L3_HIT_M.HIT_OTHER_CORE_FWD",
         "Counter": "0,1,2,3",
         "CounterHTOff": "0,1,2,3",
         "Deprecated": "1",
         "EventCode": "0xB7, 0xBB",
-        "EventName": "OFFCORE_RESPONSE.PF_L3_DATA_RD.L3_HIT_M.HIT_OTHER_CORE_FWD",
+        "EventName": "OFFCORE_RESPONSE.PF_L2_DATA_RD.L3_HIT_M.HIT_OTHER_CORE_FWD",
         "MSRIndex": "0x1a6,0x1a7",
-        "MSRValue": "0x0800040080",
+        "MSRValue": "0x0800040010",
         "Offcore": "1",
         "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
         "SampleAfterValue": "100003",
         "UMask": "0x1"
     },
     {
-        "BriefDescription": "This event is deprecated. Refer to new event OCR.DEMAND_DATA_RD.L3_HIT_M.SNOOP_NONE",
+        "BriefDescription": "This event is deprecated. Refer to new event OCR.PF_L2_DATA_RD.L3_HIT_M.HIT_OTHER_CORE_NO_FWD",
         "Counter": "0,1,2,3",
         "CounterHTOff": "0,1,2,3",
         "Deprecated": "1",
         "EventCode": "0xB7, 0xBB",
-        "EventName": "OFFCORE_RESPONSE.DEMAND_DATA_RD.L3_HIT_M.SNOOP_NONE",
+        "EventName": "OFFCORE_RESPONSE.PF_L2_DATA_RD.L3_HIT_M.HIT_OTHER_CORE_NO_FWD",
         "MSRIndex": "0x1a6,0x1a7",
-        "MSRValue": "0x0080040001",
+        "MSRValue": "0x0400040010",
         "Offcore": "1",
         "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
         "SampleAfterValue": "100003",
         "UMask": "0x1"
     },
     {
-        "BriefDescription": "This event is deprecated. Refer to new event OCR.ALL_PF_RFO.SUPPLIER_NONE.SNOOP_NONE",
+        "BriefDescription": "This event is deprecated. Refer to new event OCR.PF_L2_DATA_RD.L3_HIT_M.NO_SNOOP_NEEDED",
         "Counter": "0,1,2,3",
         "CounterHTOff": "0,1,2,3",
         "Deprecated": "1",
         "EventCode": "0xB7, 0xBB",
-        "EventName": "OFFCORE_RESPONSE.ALL_PF_RFO.SUPPLIER_NONE.SNOOP_NONE",
+        "EventName": "OFFCORE_RESPONSE.PF_L2_DATA_RD.L3_HIT_M.NO_SNOOP_NEEDED",
         "MSRIndex": "0x1a6,0x1a7",
-        "MSRValue": "0x0080020120",
+        "MSRValue": "0x0100040010",
         "Offcore": "1",
         "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
         "SampleAfterValue": "100003",
         "UMask": "0x1"
     },
     {
-        "BriefDescription": "This event is deprecated. Refer to new event OCR.ALL_READS.SUPPLIER_NONE.SNOOP_MISS",
+        "BriefDescription": "This event is deprecated. Refer to new event OCR.PF_L2_DATA_RD.L3_HIT_M.SNOOP_MISS",
         "Counter": "0,1,2,3",
         "CounterHTOff": "0,1,2,3",
         "Deprecated": "1",
         "EventCode": "0xB7, 0xBB",
-        "EventName": "OFFCORE_RESPONSE.ALL_READS.SUPPLIER_NONE.SNOOP_MISS",
+        "EventName": "OFFCORE_RESPONSE.PF_L2_DATA_RD.L3_HIT_M.SNOOP_MISS",
         "MSRIndex": "0x1a6,0x1a7",
-        "MSRValue": "0x02000207F7",
+        "MSRValue": "0x0200040010",
         "Offcore": "1",
         "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
         "SampleAfterValue": "100003",
         "UMask": "0x1"
     },
     {
-        "BriefDescription": "This event is deprecated. Refer to new event OCR.DEMAND_DATA_RD.L3_HIT_S.SNOOP_MISS",
+        "BriefDescription": "This event is deprecated. Refer to new event OCR.PF_L2_DATA_RD.L3_HIT_M.SNOOP_NONE",
         "Counter": "0,1,2,3",
         "CounterHTOff": "0,1,2,3",
         "Deprecated": "1",
         "EventCode": "0xB7, 0xBB",
-        "EventName": "OFFCORE_RESPONSE.DEMAND_DATA_RD.L3_HIT_S.SNOOP_MISS",
+        "EventName": "OFFCORE_RESPONSE.PF_L2_DATA_RD.L3_HIT_M.SNOOP_NONE",
         "MSRIndex": "0x1a6,0x1a7",
-        "MSRValue": "0x0200100001",
+        "MSRValue": "0x0080040010",
         "Offcore": "1",
         "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
         "SampleAfterValue": "100003",
         "UMask": "0x1"
     },
     {
-        "BriefDescription": "This event is deprecated. Refer to new event OCR.PF_L1D_AND_SW.L3_HIT_E.HIT_OTHER_CORE_FWD",
+        "BriefDescription": "This event is deprecated. Refer to new event OCR.PF_L2_DATA_RD.L3_HIT_S.ANY_SNOOP",
         "Counter": "0,1,2,3",
         "CounterHTOff": "0,1,2,3",
         "Deprecated": "1",
         "EventCode": "0xB7, 0xBB",
-        "EventName": "OFFCORE_RESPONSE.PF_L1D_AND_SW.L3_HIT_E.HIT_OTHER_CORE_FWD",
+        "EventName": "OFFCORE_RESPONSE.PF_L2_DATA_RD.L3_HIT_S.ANY_SNOOP",
         "MSRIndex": "0x1a6,0x1a7",
-        "MSRValue": "0x0800080400",
+        "MSRValue": "0x3F80100010",
         "Offcore": "1",
         "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
         "SampleAfterValue": "100003",
         "UMask": "0x1"
     },
     {
-        "BriefDescription": "Demand Data Read requests sent to uncore",
+        "BriefDescription": "This event is deprecated. Refer to new event OCR.PF_L2_DATA_RD.L3_HIT_S.HITM_OTHER_CORE",
         "Counter": "0,1,2,3",
-        "CounterHTOff": "0,1,2,3,4,5,6,7",
-        "EventCode": "0xB0",
-        "EventName": "OFFCORE_REQUESTS.DEMAND_DATA_RD",
-        "PublicDescription": "Counts the Demand Data Read requests sent to uncore. Use it in conjunction with OFFCORE_REQUESTS_OUTSTANDING to determine average latency in the uncore.",
+        "CounterHTOff": "0,1,2,3",
+        "Deprecated": "1",
+        "EventCode": "0xB7, 0xBB",
+        "EventName": "OFFCORE_RESPONSE.PF_L2_DATA_RD.L3_HIT_S.HITM_OTHER_CORE",
+        "MSRIndex": "0x1a6,0x1a7",
+        "MSRValue": "0x1000100010",
+        "Offcore": "1",
+        "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
         "SampleAfterValue": "100003",
         "UMask": "0x1"
     },
     {
-        "BriefDescription": "This event is deprecated. Refer to new event OCR.ALL_READS.L3_HIT_S.HIT_OTHER_CORE_NO_FWD",
+        "BriefDescription": "This event is deprecated. Refer to new event OCR.PF_L2_DATA_RD.L3_HIT_S.HIT_OTHER_CORE_FWD",
         "Counter": "0,1,2,3",
         "CounterHTOff": "0,1,2,3",
         "Deprecated": "1",
         "EventCode": "0xB7, 0xBB",
-        "EventName": "OFFCORE_RESPONSE.ALL_READS.L3_HIT_S.HIT_OTHER_CORE_NO_FWD",
+        "EventName": "OFFCORE_RESPONSE.PF_L2_DATA_RD.L3_HIT_S.HIT_OTHER_CORE_FWD",
         "MSRIndex": "0x1a6,0x1a7",
-        "MSRValue": "0x04001007F7",
+        "MSRValue": "0x0800100010",
         "Offcore": "1",
         "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
         "SampleAfterValue": "100003",
         "UMask": "0x1"
     },
     {
-        "BriefDescription": "This event is deprecated. Refer to new event OCR.PF_L2_DATA_RD.L3_HIT.SNOOP_NONE",
+        "BriefDescription": "This event is deprecated. Refer to new event OCR.PF_L2_DATA_RD.L3_HIT_S.HIT_OTHER_CORE_NO_FWD",
         "Counter": "0,1,2,3",
         "CounterHTOff": "0,1,2,3",
         "Deprecated": "1",
         "EventCode": "0xB7, 0xBB",
-        "EventName": "OFFCORE_RESPONSE.PF_L2_DATA_RD.L3_HIT.SNOOP_NONE",
+        "EventName": "OFFCORE_RESPONSE.PF_L2_DATA_RD.L3_HIT_S.HIT_OTHER_CORE_NO_FWD",
         "MSRIndex": "0x1a6,0x1a7",
-        "MSRValue": "0x00803C0010",
+        "MSRValue": "0x0400100010",
         "Offcore": "1",
         "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
         "SampleAfterValue": "100003",
         "UMask": "0x1"
     },
     {
-        "BriefDescription": "This event is deprecated. Refer to new event OCR.DEMAND_CODE_RD.L3_HIT_F.SNOOP_MISS",
+        "BriefDescription": "This event is deprecated. Refer to new event OCR.PF_L2_DATA_RD.L3_HIT_S.NO_SNOOP_NEEDED",
         "Counter": "0,1,2,3",
         "CounterHTOff": "0,1,2,3",
         "Deprecated": "1",
         "EventCode": "0xB7, 0xBB",
-        "EventName": "OFFCORE_RESPONSE.DEMAND_CODE_RD.L3_HIT_F.SNOOP_MISS",
+        "EventName": "OFFCORE_RESPONSE.PF_L2_DATA_RD.L3_HIT_S.NO_SNOOP_NEEDED",
         "MSRIndex": "0x1a6,0x1a7",
-        "MSRValue": "0x0200200004",
+        "MSRValue": "0x0100100010",
         "Offcore": "1",
         "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
         "SampleAfterValue": "100003",
         "UMask": "0x1"
     },
     {
-        "BriefDescription": "This event is deprecated. Refer to new event OCR.DEMAND_DATA_RD.L3_HIT_M.HIT_OTHER_CORE_FWD",
+        "BriefDescription": "This event is deprecated. Refer to new event OCR.PF_L2_DATA_RD.L3_HIT_S.SNOOP_MISS",
         "Counter": "0,1,2,3",
         "CounterHTOff": "0,1,2,3",
         "Deprecated": "1",
         "EventCode": "0xB7, 0xBB",
-        "EventName": "OFFCORE_RESPONSE.DEMAND_DATA_RD.L3_HIT_M.HIT_OTHER_CORE_FWD",
+        "EventName": "OFFCORE_RESPONSE.PF_L2_DATA_RD.L3_HIT_S.SNOOP_MISS",
         "MSRIndex": "0x1a6,0x1a7",
-        "MSRValue": "0x0800040001",
+        "MSRValue": "0x0200100010",
         "Offcore": "1",
         "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
         "SampleAfterValue": "100003",
         "UMask": "0x1"
     },
     {
-        "BriefDescription": "This event is deprecated. Refer to new event OCR.PF_L3_RFO.L3_HIT.HITM_OTHER_CORE",
+        "BriefDescription": "This event is deprecated. Refer to new event OCR.PF_L2_DATA_RD.L3_HIT_S.SNOOP_NONE",
         "Counter": "0,1,2,3",
         "CounterHTOff": "0,1,2,3",
         "Deprecated": "1",
         "EventCode": "0xB7, 0xBB",
-        "EventName": "OFFCORE_RESPONSE.PF_L3_RFO.L3_HIT.HITM_OTHER_CORE",
+        "EventName": "OFFCORE_RESPONSE.PF_L2_DATA_RD.L3_HIT_S.SNOOP_NONE",
         "MSRIndex": "0x1a6,0x1a7",
-        "MSRValue": "0x10003C0100",
+        "MSRValue": "0x0080100010",
         "Offcore": "1",
         "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
         "SampleAfterValue": "100003",
         "UMask": "0x1"
     },
     {
-        "BriefDescription": "This event is deprecated. Refer to new event OCR.PF_L3_RFO.L3_HIT_S.SNOOP_NONE",
+        "BriefDescription": "This event is deprecated. Refer to new event OCR.PF_L2_DATA_RD.PMM_HIT_LOCAL_PMM.ANY_SNOOP",
         "Counter": "0,1,2,3",
         "CounterHTOff": "0,1,2,3",
         "Deprecated": "1",
         "EventCode": "0xB7, 0xBB",
-        "EventName": "OFFCORE_RESPONSE.PF_L3_RFO.L3_HIT_S.SNOOP_NONE",
+        "EventName": "OFFCORE_RESPONSE.PF_L2_DATA_RD.PMM_HIT_LOCAL_PMM.ANY_SNOOP",
         "MSRIndex": "0x1a6,0x1a7",
-        "MSRValue": "0x0080100100",
+        "MSRValue": "0x3F80400010",
         "Offcore": "1",
         "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
         "SampleAfterValue": "100003",
         "UMask": "0x1"
     },
     {
-        "BriefDescription": "This event is deprecated. Refer to new event OCR.PF_L2_DATA_RD.L3_HIT.HIT_OTHER_CORE_FWD",
+        "BriefDescription": "This event is deprecated. Refer to new event OCR.PF_L2_DATA_RD.PMM_HIT_LOCAL_PMM.SNOOP_NONE",
         "Counter": "0,1,2,3",
         "CounterHTOff": "0,1,2,3",
         "Deprecated": "1",
         "EventCode": "0xB7, 0xBB",
-        "EventName": "OFFCORE_RESPONSE.PF_L2_DATA_RD.L3_HIT.HIT_OTHER_CORE_FWD",
+        "EventName": "OFFCORE_RESPONSE.PF_L2_DATA_RD.PMM_HIT_LOCAL_PMM.SNOOP_NONE",
         "MSRIndex": "0x1a6,0x1a7",
-        "MSRValue": "0x08003C0010",
+        "MSRValue": "0x0080400010",
         "Offcore": "1",
         "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
         "SampleAfterValue": "100003",
         "UMask": "0x1"
     },
     {
-        "BriefDescription": "This event is deprecated. Refer to new event OCR.ALL_PF_DATA_RD.SUPPLIER_NONE.HIT_OTHER_CORE_FWD",
+        "BriefDescription": "This event is deprecated. Refer to new event OCR.PF_L2_DATA_RD.PMM_HIT_LOCAL_PMM.SNOOP_NOT_NEEDED",
         "Counter": "0,1,2,3",
         "CounterHTOff": "0,1,2,3",
         "Deprecated": "1",
         "EventCode": "0xB7, 0xBB",
-        "EventName": "OFFCORE_RESPONSE.ALL_PF_DATA_RD.SUPPLIER_NONE.HIT_OTHER_CORE_FWD",
+        "EventName": "OFFCORE_RESPONSE.PF_L2_DATA_RD.PMM_HIT_LOCAL_PMM.SNOOP_NOT_NEEDED",
         "MSRIndex": "0x1a6,0x1a7",
-        "MSRValue": "0x0800020490",
+        "MSRValue": "0x0100400010",
         "Offcore": "1",
         "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
         "SampleAfterValue": "100003",
         "UMask": "0x1"
     },
     {
-        "BriefDescription": "This event is deprecated. Refer to new event OCR.DEMAND_CODE_RD.L3_HIT_M.NO_SNOOP_NEEDED",
+        "BriefDescription": "This event is deprecated. Refer to new event OCR.PF_L2_DATA_RD.SUPPLIER_NONE.ANY_SNOOP",
         "Counter": "0,1,2,3",
         "CounterHTOff": "0,1,2,3",
         "Deprecated": "1",
         "EventCode": "0xB7, 0xBB",
-        "EventName": "OFFCORE_RESPONSE.DEMAND_CODE_RD.L3_HIT_M.NO_SNOOP_NEEDED",
+        "EventName": "OFFCORE_RESPONSE.PF_L2_DATA_RD.SUPPLIER_NONE.ANY_SNOOP",
         "MSRIndex": "0x1a6,0x1a7",
-        "MSRValue": "0x0100040004",
+        "MSRValue": "0x3F80020010",
         "Offcore": "1",
         "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
         "SampleAfterValue": "100003",
         "UMask": "0x1"
     },
     {
-        "BriefDescription": "This event is deprecated. Refer to new event OCR.PF_L3_DATA_RD.L3_HIT_F.HIT_OTHER_CORE_FWD",
+        "BriefDescription": "This event is deprecated. Refer to new event OCR.PF_L2_DATA_RD.SUPPLIER_NONE.HITM_OTHER_CORE",
         "Counter": "0,1,2,3",
         "CounterHTOff": "0,1,2,3",
         "Deprecated": "1",
         "EventCode": "0xB7, 0xBB",
-        "EventName": "OFFCORE_RESPONSE.PF_L3_DATA_RD.L3_HIT_F.HIT_OTHER_CORE_FWD",
+        "EventName": "OFFCORE_RESPONSE.PF_L2_DATA_RD.SUPPLIER_NONE.HITM_OTHER_CORE",
         "MSRIndex": "0x1a6,0x1a7",
-        "MSRValue": "0x0800200080",
+        "MSRValue": "0x1000020010",
         "Offcore": "1",
         "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
         "SampleAfterValue": "100003",
         "UMask": "0x1"
     },
     {
-        "BriefDescription": "This event is deprecated. Refer to new event OCR.ALL_PF_RFO.L3_HIT_F.SNOOP_MISS",
+        "BriefDescription": "This event is deprecated. Refer to new event OCR.PF_L2_DATA_RD.SUPPLIER_NONE.HIT_OTHER_CORE_FWD",
         "Counter": "0,1,2,3",
         "CounterHTOff": "0,1,2,3",
         "Deprecated": "1",
         "EventCode": "0xB7, 0xBB",
-        "EventName": "OFFCORE_RESPONSE.ALL_PF_RFO.L3_HIT_F.SNOOP_MISS",
+        "EventName": "OFFCORE_RESPONSE.PF_L2_DATA_RD.SUPPLIER_NONE.HIT_OTHER_CORE_FWD",
         "MSRIndex": "0x1a6,0x1a7",
-        "MSRValue": "0x0200200120",
+        "MSRValue": "0x0800020010",
         "Offcore": "1",
         "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
         "SampleAfterValue": "100003",
         "UMask": "0x1"
     },
     {
-        "BriefDescription": "Retired load instructions whose data sources was remote HITM",
-        "Counter": "0,1,2,3",
-        "CounterHTOff": "0,1,2,3",
-        "Data_LA": "1",
-        "EventCode": "0xD3",
-        "EventName": "MEM_LOAD_L3_MISS_RETIRED.REMOTE_HITM",
-        "PEBS": "1",
-        "PublicDescription": "Retired load instructions whose data sources was remote HITM.",
-        "SampleAfterValue": "100007",
-        "UMask": "0x4"
-    },
-    {
-        "BriefDescription": "This event is deprecated. Refer to new event OCR.PF_L2_DATA_RD.L3_HIT_E.ANY_SNOOP",
+        "BriefDescription": "This event is deprecated. Refer to new event OCR.PF_L2_DATA_RD.SUPPLIER_NONE.HIT_OTHER_CORE_NO_FWD",
         "Counter": "0,1,2,3",
         "CounterHTOff": "0,1,2,3",
         "Deprecated": "1",
         "EventCode": "0xB7, 0xBB",
-        "EventName": "OFFCORE_RESPONSE.PF_L2_DATA_RD.L3_HIT_E.ANY_SNOOP",
+        "EventName": "OFFCORE_RESPONSE.PF_L2_DATA_RD.SUPPLIER_NONE.HIT_OTHER_CORE_NO_FWD",
         "MSRIndex": "0x1a6,0x1a7",
-        "MSRValue": "0x3F80080010",
+        "MSRValue": "0x0400020010",
         "Offcore": "1",
         "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
         "SampleAfterValue": "100003",
         "UMask": "0x1"
     },
     {
-        "BriefDescription": "This event is deprecated. Refer to new event OCR.PF_L3_RFO.SUPPLIER_NONE.SNOOP_MISS",
+        "BriefDescription": "This event is deprecated. Refer to new event OCR.PF_L2_DATA_RD.SUPPLIER_NONE.NO_SNOOP_NEEDED",
         "Counter": "0,1,2,3",
         "CounterHTOff": "0,1,2,3",
         "Deprecated": "1",
         "EventCode": "0xB7, 0xBB",
-        "EventName": "OFFCORE_RESPONSE.PF_L3_RFO.SUPPLIER_NONE.SNOOP_MISS",
+        "EventName": "OFFCORE_RESPONSE.PF_L2_DATA_RD.SUPPLIER_NONE.NO_SNOOP_NEEDED",
         "MSRIndex": "0x1a6,0x1a7",
-        "MSRValue": "0x0200020100",
+        "MSRValue": "0x0100020010",
         "Offcore": "1",
         "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
         "SampleAfterValue": "100003",
         "UMask": "0x1"
     },
     {
-        "BriefDescription": "This event is deprecated. Refer to new event OCR.PF_L3_RFO.L3_HIT_M.ANY_SNOOP",
+        "BriefDescription": "This event is deprecated. Refer to new event OCR.PF_L2_DATA_RD.SUPPLIER_NONE.SNOOP_MISS",
         "Counter": "0,1,2,3",
         "CounterHTOff": "0,1,2,3",
         "Deprecated": "1",
         "EventCode": "0xB7, 0xBB",
-        "EventName": "OFFCORE_RESPONSE.PF_L3_RFO.L3_HIT_M.ANY_SNOOP",
+        "EventName": "OFFCORE_RESPONSE.PF_L2_DATA_RD.SUPPLIER_NONE.SNOOP_MISS",
         "MSRIndex": "0x1a6,0x1a7",
-        "MSRValue": "0x3F80040100",
+        "MSRValue": "0x0200020010",
         "Offcore": "1",
         "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
         "SampleAfterValue": "100003",
         "UMask": "0x1"
     },
     {
-        "BriefDescription": "This event is deprecated. Refer to new event OCR.OTHER.ANY_RESPONSE",
+        "BriefDescription": "This event is deprecated. Refer to new event OCR.PF_L2_DATA_RD.SUPPLIER_NONE.SNOOP_NONE",
         "Counter": "0,1,2,3",
         "CounterHTOff": "0,1,2,3",
         "Deprecated": "1",
         "EventCode": "0xB7, 0xBB",
-        "EventName": "OFFCORE_RESPONSE.OTHER.ANY_RESPONSE",
+        "EventName": "OFFCORE_RESPONSE.PF_L2_DATA_RD.SUPPLIER_NONE.SNOOP_NONE",
         "MSRIndex": "0x1a6,0x1a7",
-        "MSRValue": "0x0000018000",
+        "MSRValue": "0x0080020010",
         "Offcore": "1",
         "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
         "SampleAfterValue": "100003",
         "UMask": "0x1"
     },
     {
-        "BriefDescription": "This event is deprecated. Refer to new event OCR.PF_L2_RFO.SUPPLIER_NONE.NO_SNOOP_NEEDED",
+        "BriefDescription": "This event is deprecated. Refer to new event OCR.PF_L2_RFO.ANY_RESPONSE",
         "Counter": "0,1,2,3",
         "CounterHTOff": "0,1,2,3",
         "Deprecated": "1",
         "EventCode": "0xB7, 0xBB",
-        "EventName": "OFFCORE_RESPONSE.PF_L2_RFO.SUPPLIER_NONE.NO_SNOOP_NEEDED",
+        "EventName": "OFFCORE_RESPONSE.PF_L2_RFO.ANY_RESPONSE",
         "MSRIndex": "0x1a6,0x1a7",
-        "MSRValue": "0x0100020020",
+        "MSRValue": "0x0000010020",
         "Offcore": "1",
         "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
         "SampleAfterValue": "100003",
         "UMask": "0x1"
     },
     {
-        "BriefDescription": "This event is deprecated. Refer to new event OCR.ALL_DATA_RD.L3_HIT_F.SNOOP_NONE",
+        "BriefDescription": "This event is deprecated. Refer to new event OCR.PF_L2_RFO.L3_HIT.ANY_SNOOP",
         "Counter": "0,1,2,3",
         "CounterHTOff": "0,1,2,3",
         "Deprecated": "1",
         "EventCode": "0xB7, 0xBB",
-        "EventName": "OFFCORE_RESPONSE.ALL_DATA_RD.L3_HIT_F.SNOOP_NONE",
+        "EventName": "OFFCORE_RESPONSE.PF_L2_RFO.L3_HIT.ANY_SNOOP",
         "MSRIndex": "0x1a6,0x1a7",
-        "MSRValue": "0x0080200491",
+        "MSRValue": "0x3F803C0020",
         "Offcore": "1",
         "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
         "SampleAfterValue": "100003",
         "UMask": "0x1"
     },
     {
-        "BriefDescription": "RFO requests that miss L2 cache",
-        "Counter": "0,1,2,3",
-        "CounterHTOff": "0,1,2,3,4,5,6,7",
-        "EventCode": "0x24",
-        "EventName": "L2_RQSTS.RFO_MISS",
-        "PublicDescription": "Counts the RFO (Read-for-Ownership) requests that miss L2 cache.",
-        "SampleAfterValue": "200003",
-        "UMask": "0x22"
-    },
-    {
-        "BriefDescription": "Counts the number of lines that have been hardware prefetched but not used and now evicted by L2 cache",
-        "Counter": "0,1,2,3",
-        "CounterHTOff": "0,1,2,3,4,5,6,7",
-        "EventCode": "0xF2",
-        "EventName": "L2_LINES_OUT.USELESS_HWPF",
-        "SampleAfterValue": "200003",
-        "UMask": "0x4"
-    },
-    {
-        "BriefDescription": "This event is deprecated. Refer to new event OCR.PF_L2_DATA_RD.L3_HIT_S.SNOOP_MISS",
+        "BriefDescription": "This event is deprecated. Refer to new event OCR.PF_L2_RFO.L3_HIT.HITM_OTHER_CORE",
         "Counter": "0,1,2,3",
         "CounterHTOff": "0,1,2,3",
         "Deprecated": "1",
         "EventCode": "0xB7, 0xBB",
-        "EventName": "OFFCORE_RESPONSE.PF_L2_DATA_RD.L3_HIT_S.SNOOP_MISS",
+        "EventName": "OFFCORE_RESPONSE.PF_L2_RFO.L3_HIT.HITM_OTHER_CORE",
         "MSRIndex": "0x1a6,0x1a7",
-        "MSRValue": "0x0200100010",
+        "MSRValue": "0x10003C0020",
         "Offcore": "1",
         "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
         "SampleAfterValue": "100003",
         "UMask": "0x1"
     },
     {
-        "BriefDescription": "This event is deprecated. Refer to new event OCR.PF_L2_DATA_RD.L3_HIT_E.SNOOP_MISS",
+        "BriefDescription": "This event is deprecated. Refer to new event OCR.PF_L2_RFO.L3_HIT.HIT_OTHER_CORE_FWD",
         "Counter": "0,1,2,3",
         "CounterHTOff": "0,1,2,3",
         "Deprecated": "1",
         "EventCode": "0xB7, 0xBB",
-        "EventName": "OFFCORE_RESPONSE.PF_L2_DATA_RD.L3_HIT_E.SNOOP_MISS",
+        "EventName": "OFFCORE_RESPONSE.PF_L2_RFO.L3_HIT.HIT_OTHER_CORE_FWD",
         "MSRIndex": "0x1a6,0x1a7",
-        "MSRValue": "0x0200080010",
+        "MSRValue": "0x08003C0020",
         "Offcore": "1",
         "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
         "SampleAfterValue": "100003",
         "UMask": "0x1"
     },
     {
-        "BriefDescription": "Requests from the L1/L2/L3 hardware prefetchers or Load software prefetches that hit L2 cache",
-        "Counter": "0,1,2,3",
-        "CounterHTOff": "0,1,2,3,4,5,6,7",
-        "EventCode": "0x24",
-        "EventName": "L2_RQSTS.PF_HIT",
-        "PublicDescription": "Counts requests from the L1/L2/L3 hardware prefetchers or Load software prefetches that hit L2 cache.",
-        "SampleAfterValue": "200003",
-        "UMask": "0xd8"
-    },
-    {
-        "BriefDescription": "This event is deprecated. Refer to new event OCR.PF_L3_DATA_RD.L3_HIT.HITM_OTHER_CORE",
+        "BriefDescription": "This event is deprecated. Refer to new event OCR.PF_L2_RFO.L3_HIT.HIT_OTHER_CORE_NO_FWD",
         "Counter": "0,1,2,3",
         "CounterHTOff": "0,1,2,3",
         "Deprecated": "1",
         "EventCode": "0xB7, 0xBB",
-        "EventName": "OFFCORE_RESPONSE.PF_L3_DATA_RD.L3_HIT.HITM_OTHER_CORE",
+        "EventName": "OFFCORE_RESPONSE.PF_L2_RFO.L3_HIT.HIT_OTHER_CORE_NO_FWD",
         "MSRIndex": "0x1a6,0x1a7",
-        "MSRValue": "0x10003C0080",
+        "MSRValue": "0x04003C0020",
         "Offcore": "1",
         "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
         "SampleAfterValue": "100003",
         "UMask": "0x1"
     },
     {
-        "BriefDescription": "This event is deprecated. Refer to new event OCR.PF_L2_RFO.L3_HIT_S.ANY_SNOOP",
+        "BriefDescription": "This event is deprecated. Refer to new event OCR.PF_L2_RFO.L3_HIT.NO_SNOOP_NEEDED",
         "Counter": "0,1,2,3",
         "CounterHTOff": "0,1,2,3",
         "Deprecated": "1",
         "EventCode": "0xB7, 0xBB",
-        "EventName": "OFFCORE_RESPONSE.PF_L2_RFO.L3_HIT_S.ANY_SNOOP",
+        "EventName": "OFFCORE_RESPONSE.PF_L2_RFO.L3_HIT.NO_SNOOP_NEEDED",
         "MSRIndex": "0x1a6,0x1a7",
-        "MSRValue": "0x3F80100020",
+        "MSRValue": "0x01003C0020",
         "Offcore": "1",
         "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
         "SampleAfterValue": "100003",
         "UMask": "0x1"
     },
     {
-        "BriefDescription": "This event is deprecated. Refer to new event OCR.DEMAND_RFO.SUPPLIER_NONE.HIT_OTHER_CORE_NO_FWD",
+        "BriefDescription": "This event is deprecated. Refer to new event OCR.PF_L2_RFO.L3_HIT.SNOOP_HIT_WITH_FWD",
         "Counter": "0,1,2,3",
         "CounterHTOff": "0,1,2,3",
         "Deprecated": "1",
         "EventCode": "0xB7, 0xBB",
-        "EventName": "OFFCORE_RESPONSE.DEMAND_RFO.SUPPLIER_NONE.HIT_OTHER_CORE_NO_FWD",
+        "EventName": "OFFCORE_RESPONSE.PF_L2_RFO.L3_HIT.SNOOP_HIT_WITH_FWD",
         "MSRIndex": "0x1a6,0x1a7",
-        "MSRValue": "0x0400020002",
+        "MSRValue": "0x08007C0020",
         "Offcore": "1",
         "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
         "SampleAfterValue": "100003",
         "UMask": "0x1"
     },
     {
-        "BriefDescription": "This event is deprecated. Refer to new event OCR.PF_L2_RFO.L3_HIT.HIT_OTHER_CORE_FWD",
+        "BriefDescription": "This event is deprecated. Refer to new event OCR.PF_L2_RFO.L3_HIT.SNOOP_MISS",
         "Counter": "0,1,2,3",
         "CounterHTOff": "0,1,2,3",
         "Deprecated": "1",
         "EventCode": "0xB7, 0xBB",
-        "EventName": "OFFCORE_RESPONSE.PF_L2_RFO.L3_HIT.HIT_OTHER_CORE_FWD",
+        "EventName": "OFFCORE_RESPONSE.PF_L2_RFO.L3_HIT.SNOOP_MISS",
         "MSRIndex": "0x1a6,0x1a7",
-        "MSRValue": "0x08003C0020",
+        "MSRValue": "0x02003C0020",
         "Offcore": "1",
         "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
         "SampleAfterValue": "100003",
         "UMask": "0x1"
     },
     {
-        "BriefDescription": "This event is deprecated. Refer to new event OCR.DEMAND_CODE_RD.SUPPLIER_NONE.HITM_OTHER_CORE",
+        "BriefDescription": "This event is deprecated. Refer to new event OCR.PF_L2_RFO.L3_HIT.SNOOP_NONE",
         "Counter": "0,1,2,3",
         "CounterHTOff": "0,1,2,3",
         "Deprecated": "1",
         "EventCode": "0xB7, 0xBB",
-        "EventName": "OFFCORE_RESPONSE.DEMAND_CODE_RD.SUPPLIER_NONE.HITM_OTHER_CORE",
+        "EventName": "OFFCORE_RESPONSE.PF_L2_RFO.L3_HIT.SNOOP_NONE",
         "MSRIndex": "0x1a6,0x1a7",
-        "MSRValue": "0x1000020004",
+        "MSRValue": "0x00803C0020",
         "Offcore": "1",
         "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
         "SampleAfterValue": "100003",
         "UMask": "0x1"
     },
     {
-        "BriefDescription": "This event is deprecated. Refer to new event OCR.ALL_PF_DATA_RD.L3_HIT_S.HIT_OTHER_CORE_NO_FWD",
+        "BriefDescription": "This event is deprecated. Refer to new event OCR.PF_L2_RFO.L3_HIT_E.ANY_SNOOP",
         "Counter": "0,1,2,3",
         "CounterHTOff": "0,1,2,3",
         "Deprecated": "1",
         "EventCode": "0xB7, 0xBB",
-        "EventName": "OFFCORE_RESPONSE.ALL_PF_DATA_RD.L3_HIT_S.HIT_OTHER_CORE_NO_FWD",
+        "EventName": "OFFCORE_RESPONSE.PF_L2_RFO.L3_HIT_E.ANY_SNOOP",
         "MSRIndex": "0x1a6,0x1a7",
-        "MSRValue": "0x0400100490",
+        "MSRValue": "0x3F80080020",
         "Offcore": "1",
         "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
         "SampleAfterValue": "100003",
         "UMask": "0x1"
     },
     {
-        "BriefDescription": "This event is deprecated. Refer to new event OCR.DEMAND_DATA_RD.SUPPLIER_NONE.HITM_OTHER_CORE",
+        "BriefDescription": "This event is deprecated. Refer to new event OCR.PF_L2_RFO.L3_HIT_E.HITM_OTHER_CORE",
         "Counter": "0,1,2,3",
         "CounterHTOff": "0,1,2,3",
         "Deprecated": "1",
         "EventCode": "0xB7, 0xBB",
-        "EventName": "OFFCORE_RESPONSE.DEMAND_DATA_RD.SUPPLIER_NONE.HITM_OTHER_CORE",
+        "EventName": "OFFCORE_RESPONSE.PF_L2_RFO.L3_HIT_E.HITM_OTHER_CORE",
         "MSRIndex": "0x1a6,0x1a7",
-        "MSRValue": "0x1000020001",
+        "MSRValue": "0x1000080020",
         "Offcore": "1",
         "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
         "SampleAfterValue": "100003",
         "UMask": "0x1"
     },
     {
-        "BriefDescription": "Cacheable and noncachaeble code read requests",
-        "Counter": "0,1,2,3",
-        "CounterHTOff": "0,1,2,3,4,5,6,7",
-        "EventCode": "0xB0",
-        "EventName": "OFFCORE_REQUESTS.DEMAND_CODE_RD",
-        "PublicDescription": "Counts both cacheable and non-cacheable code read requests.",
-        "SampleAfterValue": "100003",
-        "UMask": "0x2"
-    },
-    {
-        "BriefDescription": "This event is deprecated. Refer to new event OCR.ALL_PF_RFO.PMM_HIT_LOCAL_PMM.ANY_SNOOP",
+        "BriefDescription": "This event is deprecated. Refer to new event OCR.PF_L2_RFO.L3_HIT_E.HIT_OTHER_CORE_FWD",
         "Counter": "0,1,2,3",
         "CounterHTOff": "0,1,2,3",
         "Deprecated": "1",
         "EventCode": "0xB7, 0xBB",
-        "EventName": "OFFCORE_RESPONSE.ALL_PF_RFO.PMM_HIT_LOCAL_PMM.ANY_SNOOP",
+        "EventName": "OFFCORE_RESPONSE.PF_L2_RFO.L3_HIT_E.HIT_OTHER_CORE_FWD",
         "MSRIndex": "0x1a6,0x1a7",
-        "MSRValue": "0x3F80400120",
+        "MSRValue": "0x0800080020",
         "Offcore": "1",
         "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
         "SampleAfterValue": "100003",
         "UMask": "0x1"
     },
     {
-        "BriefDescription": "This event is deprecated. Refer to new event OCR.PF_L2_DATA_RD.L3_HIT_S.ANY_SNOOP",
+        "BriefDescription": "This event is deprecated. Refer to new event OCR.PF_L2_RFO.L3_HIT_E.HIT_OTHER_CORE_NO_FWD",
         "Counter": "0,1,2,3",
         "CounterHTOff": "0,1,2,3",
         "Deprecated": "1",
         "EventCode": "0xB7, 0xBB",
-        "EventName": "OFFCORE_RESPONSE.PF_L2_DATA_RD.L3_HIT_S.ANY_SNOOP",
+        "EventName": "OFFCORE_RESPONSE.PF_L2_RFO.L3_HIT_E.HIT_OTHER_CORE_NO_FWD",
         "MSRIndex": "0x1a6,0x1a7",
-        "MSRValue": "0x3F80100010",
+        "MSRValue": "0x0400080020",
         "Offcore": "1",
         "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
         "SampleAfterValue": "100003",
         "UMask": "0x1"
     },
     {
-        "BriefDescription": "This event is deprecated. Refer to new event OCR.ALL_RFO.L3_HIT_M.SNOOP_MISS",
+        "BriefDescription": "This event is deprecated. Refer to new event OCR.PF_L2_RFO.L3_HIT_E.NO_SNOOP_NEEDED",
         "Counter": "0,1,2,3",
         "CounterHTOff": "0,1,2,3",
         "Deprecated": "1",
         "EventCode": "0xB7, 0xBB",
-        "EventName": "OFFCORE_RESPONSE.ALL_RFO.L3_HIT_M.SNOOP_MISS",
+        "EventName": "OFFCORE_RESPONSE.PF_L2_RFO.L3_HIT_E.NO_SNOOP_NEEDED",
         "MSRIndex": "0x1a6,0x1a7",
-        "MSRValue": "0x0200040122",
+        "MSRValue": "0x0100080020",
         "Offcore": "1",
         "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
         "SampleAfterValue": "100003",
         "UMask": "0x1"
     },
     {
-        "BriefDescription": "This event is deprecated. Refer to new event OCR.ALL_RFO.SUPPLIER_NONE.HITM_OTHER_CORE",
+        "BriefDescription": "This event is deprecated. Refer to new event OCR.PF_L2_RFO.L3_HIT_E.SNOOP_MISS",
         "Counter": "0,1,2,3",
         "CounterHTOff": "0,1,2,3",
         "Deprecated": "1",
         "EventCode": "0xB7, 0xBB",
-        "EventName": "OFFCORE_RESPONSE.ALL_RFO.SUPPLIER_NONE.HITM_OTHER_CORE",
+        "EventName": "OFFCORE_RESPONSE.PF_L2_RFO.L3_HIT_E.SNOOP_MISS",
         "MSRIndex": "0x1a6,0x1a7",
-        "MSRValue": "0x1000020122",
+        "MSRValue": "0x0200080020",
         "Offcore": "1",
         "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
         "SampleAfterValue": "100003",
         "UMask": "0x1"
     },
     {
-        "BriefDescription": "This event is deprecated. Refer to new event OCR.OTHER.PMM_HIT_LOCAL_PMM.SNOOP_NOT_NEEDED",
+        "BriefDescription": "This event is deprecated. Refer to new event OCR.PF_L2_RFO.L3_HIT_E.SNOOP_NONE",
         "Counter": "0,1,2,3",
         "CounterHTOff": "0,1,2,3",
         "Deprecated": "1",
         "EventCode": "0xB7, 0xBB",
-        "EventName": "OFFCORE_RESPONSE.OTHER.PMM_HIT_LOCAL_PMM.SNOOP_NOT_NEEDED",
+        "EventName": "OFFCORE_RESPONSE.PF_L2_RFO.L3_HIT_E.SNOOP_NONE",
         "MSRIndex": "0x1a6,0x1a7",
-        "MSRValue": "0x0100408000",
+        "MSRValue": "0x0080080020",
         "Offcore": "1",
         "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
         "SampleAfterValue": "100003",
         "UMask": "0x1"
     },
     {
-        "BriefDescription": "This event is deprecated. Refer to new event OCR.ALL_RFO.L3_HIT.SNOOP_HIT_WITH_FWD",
+        "BriefDescription": "This event is deprecated. Refer to new event OCR.PF_L2_RFO.L3_HIT_F.ANY_SNOOP",
         "Counter": "0,1,2,3",
         "CounterHTOff": "0,1,2,3",
         "Deprecated": "1",
         "EventCode": "0xB7, 0xBB",
-        "EventName": "OFFCORE_RESPONSE.ALL_RFO.L3_HIT.SNOOP_HIT_WITH_FWD",
+        "EventName": "OFFCORE_RESPONSE.PF_L2_RFO.L3_HIT_F.ANY_SNOOP",
         "MSRIndex": "0x1a6,0x1a7",
-        "MSRValue": "0x08007C0122",
+        "MSRValue": "0x3F80200020",
         "Offcore": "1",
         "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
         "SampleAfterValue": "100003",
         "UMask": "0x1"
     },
     {
-        "BriefDescription": "This event is deprecated. Refer to new event OCR.DEMAND_CODE_RD.L3_HIT_F.HITM_OTHER_CORE",
+        "BriefDescription": "This event is deprecated. Refer to new event OCR.PF_L2_RFO.L3_HIT_F.HITM_OTHER_CORE",
         "Counter": "0,1,2,3",
         "CounterHTOff": "0,1,2,3",
         "Deprecated": "1",
         "EventCode": "0xB7, 0xBB",
-        "EventName": "OFFCORE_RESPONSE.DEMAND_CODE_RD.L3_HIT_F.HITM_OTHER_CORE",
+        "EventName": "OFFCORE_RESPONSE.PF_L2_RFO.L3_HIT_F.HITM_OTHER_CORE",
         "MSRIndex": "0x1a6,0x1a7",
-        "MSRValue": "0x1000200004",
+        "MSRValue": "0x1000200020",
         "Offcore": "1",
         "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
         "SampleAfterValue": "100003",
         "UMask": "0x1"
     },
     {
-        "BriefDescription": "This event is deprecated. Refer to new event OCR.OTHER.L3_HIT_E.ANY_SNOOP",
+        "BriefDescription": "This event is deprecated. Refer to new event OCR.PF_L2_RFO.L3_HIT_F.HIT_OTHER_CORE_FWD",
         "Counter": "0,1,2,3",
         "CounterHTOff": "0,1,2,3",
         "Deprecated": "1",
         "EventCode": "0xB7, 0xBB",
-        "EventName": "OFFCORE_RESPONSE.OTHER.L3_HIT_E.ANY_SNOOP",
+        "EventName": "OFFCORE_RESPONSE.PF_L2_RFO.L3_HIT_F.HIT_OTHER_CORE_FWD",
         "MSRIndex": "0x1a6,0x1a7",
-        "MSRValue": "0x3F80088000",
+        "MSRValue": "0x0800200020",
         "Offcore": "1",
         "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
         "SampleAfterValue": "100003",
         "UMask": "0x1"
     },
     {
-        "BriefDescription": "This event is deprecated. Refer to new event OCR.DEMAND_CODE_RD.L3_HIT_E.HIT_OTHER_CORE_FWD",
+        "BriefDescription": "This event is deprecated. Refer to new event OCR.PF_L2_RFO.L3_HIT_F.HIT_OTHER_CORE_NO_FWD",
         "Counter": "0,1,2,3",
         "CounterHTOff": "0,1,2,3",
         "Deprecated": "1",
         "EventCode": "0xB7, 0xBB",
-        "EventName": "OFFCORE_RESPONSE.DEMAND_CODE_RD.L3_HIT_E.HIT_OTHER_CORE_FWD",
+        "EventName": "OFFCORE_RESPONSE.PF_L2_RFO.L3_HIT_F.HIT_OTHER_CORE_NO_FWD",
         "MSRIndex": "0x1a6,0x1a7",
-        "MSRValue": "0x0800080004",
+        "MSRValue": "0x0400200020",
         "Offcore": "1",
         "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
         "SampleAfterValue": "100003",
         "UMask": "0x1"
     },
     {
-        "BriefDescription": "This event is deprecated. Refer to new event OCR.DEMAND_DATA_RD.L3_HIT_S.HITM_OTHER_CORE",
+        "BriefDescription": "This event is deprecated. Refer to new event OCR.PF_L2_RFO.L3_HIT_F.NO_SNOOP_NEEDED",
         "Counter": "0,1,2,3",
         "CounterHTOff": "0,1,2,3",
         "Deprecated": "1",
         "EventCode": "0xB7, 0xBB",
-        "EventName": "OFFCORE_RESPONSE.DEMAND_DATA_RD.L3_HIT_S.HITM_OTHER_CORE",
+        "EventName": "OFFCORE_RESPONSE.PF_L2_RFO.L3_HIT_F.NO_SNOOP_NEEDED",
         "MSRIndex": "0x1a6,0x1a7",
-        "MSRValue": "0x1000100001",
+        "MSRValue": "0x0100200020",
         "Offcore": "1",
         "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
         "SampleAfterValue": "100003",
         "UMask": "0x1"
     },
     {
-        "BriefDescription": "This event is deprecated. Refer to new event OCR.ALL_READS.L3_HIT_F.HIT_OTHER_CORE_FWD",
+        "BriefDescription": "This event is deprecated. Refer to new event OCR.PF_L2_RFO.L3_HIT_F.SNOOP_MISS",
         "Counter": "0,1,2,3",
         "CounterHTOff": "0,1,2,3",
         "Deprecated": "1",
         "EventCode": "0xB7, 0xBB",
-        "EventName": "OFFCORE_RESPONSE.ALL_READS.L3_HIT_F.HIT_OTHER_CORE_FWD",
+        "EventName": "OFFCORE_RESPONSE.PF_L2_RFO.L3_HIT_F.SNOOP_MISS",
         "MSRIndex": "0x1a6,0x1a7",
-        "MSRValue": "0x08002007F7",
+        "MSRValue": "0x0200200020",
         "Offcore": "1",
         "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
         "SampleAfterValue": "100003",
         "UMask": "0x1"
     },
     {
-        "BriefDescription": "This event is deprecated. Refer to new event OCR.PF_L3_RFO.L3_HIT_S.NO_SNOOP_NEEDED",
+        "BriefDescription": "This event is deprecated. Refer to new event OCR.PF_L2_RFO.L3_HIT_F.SNOOP_NONE",
         "Counter": "0,1,2,3",
         "CounterHTOff": "0,1,2,3",
         "Deprecated": "1",
         "EventCode": "0xB7, 0xBB",
-        "EventName": "OFFCORE_RESPONSE.PF_L3_RFO.L3_HIT_S.NO_SNOOP_NEEDED",
+        "EventName": "OFFCORE_RESPONSE.PF_L2_RFO.L3_HIT_F.SNOOP_NONE",
         "MSRIndex": "0x1a6,0x1a7",
-        "MSRValue": "0x0100100100",
+        "MSRValue": "0x0080200020",
         "Offcore": "1",
         "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
         "SampleAfterValue": "100003",
         "UMask": "0x1"
     },
     {
-        "BriefDescription": "This event is deprecated. Refer to new event OCR.DEMAND_DATA_RD.SUPPLIER_NONE.NO_SNOOP_NEEDED",
+        "BriefDescription": "This event is deprecated. Refer to new event OCR.PF_L2_RFO.L3_HIT_M.ANY_SNOOP",
         "Counter": "0,1,2,3",
         "CounterHTOff": "0,1,2,3",
         "Deprecated": "1",
         "EventCode": "0xB7, 0xBB",
-        "EventName": "OFFCORE_RESPONSE.DEMAND_DATA_RD.SUPPLIER_NONE.NO_SNOOP_NEEDED",
+        "EventName": "OFFCORE_RESPONSE.PF_L2_RFO.L3_HIT_M.ANY_SNOOP",
         "MSRIndex": "0x1a6,0x1a7",
-        "MSRValue": "0x0100020001",
+        "MSRValue": "0x3F80040020",
         "Offcore": "1",
         "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
         "SampleAfterValue": "100003",
         "UMask": "0x1"
     },
     {
-        "BriefDescription": "This event is deprecated. Refer to new event OCR.PF_L3_DATA_RD.L3_HIT_E.SNOOP_NONE",
+        "BriefDescription": "This event is deprecated. Refer to new event OCR.PF_L2_RFO.L3_HIT_M.HITM_OTHER_CORE",
         "Counter": "0,1,2,3",
         "CounterHTOff": "0,1,2,3",
         "Deprecated": "1",
         "EventCode": "0xB7, 0xBB",
-        "EventName": "OFFCORE_RESPONSE.PF_L3_DATA_RD.L3_HIT_E.SNOOP_NONE",
+        "EventName": "OFFCORE_RESPONSE.PF_L2_RFO.L3_HIT_M.HITM_OTHER_CORE",
         "MSRIndex": "0x1a6,0x1a7",
-        "MSRValue": "0x0080080080",
+        "MSRValue": "0x1000040020",
         "Offcore": "1",
         "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
         "SampleAfterValue": "100003",
         "UMask": "0x1"
     },
     {
-        "BriefDescription": "This event is deprecated. Refer to new event OCR.DEMAND_RFO.SUPPLIER_NONE.ANY_SNOOP",
+        "BriefDescription": "This event is deprecated. Refer to new event OCR.PF_L2_RFO.L3_HIT_M.HIT_OTHER_CORE_FWD",
         "Counter": "0,1,2,3",
         "CounterHTOff": "0,1,2,3",
         "Deprecated": "1",
         "EventCode": "0xB7, 0xBB",
-        "EventName": "OFFCORE_RESPONSE.DEMAND_RFO.SUPPLIER_NONE.ANY_SNOOP",
+        "EventName": "OFFCORE_RESPONSE.PF_L2_RFO.L3_HIT_M.HIT_OTHER_CORE_FWD",
         "MSRIndex": "0x1a6,0x1a7",
-        "MSRValue": "0x3F80020002",
+        "MSRValue": "0x0800040020",
         "Offcore": "1",
         "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
         "SampleAfterValue": "100003",
         "UMask": "0x1"
     },
     {
-        "BriefDescription": "This event is deprecated. Refer to new event OCR.ALL_PF_RFO.L3_HIT.HIT_OTHER_CORE_FWD",
+        "BriefDescription": "This event is deprecated. Refer to new event OCR.PF_L2_RFO.L3_HIT_M.HIT_OTHER_CORE_NO_FWD",
         "Counter": "0,1,2,3",
         "CounterHTOff": "0,1,2,3",
         "Deprecated": "1",
         "EventCode": "0xB7, 0xBB",
-        "EventName": "OFFCORE_RESPONSE.ALL_PF_RFO.L3_HIT.HIT_OTHER_CORE_FWD",
+        "EventName": "OFFCORE_RESPONSE.PF_L2_RFO.L3_HIT_M.HIT_OTHER_CORE_NO_FWD",
         "MSRIndex": "0x1a6,0x1a7",
-        "MSRValue": "0x08003C0120",
+        "MSRValue": "0x0400040020",
         "Offcore": "1",
         "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
         "SampleAfterValue": "100003",
         "UMask": "0x1"
     },
     {
-        "BriefDescription": "This event is deprecated. Refer to new event OCR.DEMAND_CODE_RD.SUPPLIER_NONE.SNOOP_MISS",
+        "BriefDescription": "This event is deprecated. Refer to new event OCR.PF_L2_RFO.L3_HIT_M.NO_SNOOP_NEEDED",
         "Counter": "0,1,2,3",
         "CounterHTOff": "0,1,2,3",
         "Deprecated": "1",
         "EventCode": "0xB7, 0xBB",
-        "EventName": "OFFCORE_RESPONSE.DEMAND_CODE_RD.SUPPLIER_NONE.SNOOP_MISS",
+        "EventName": "OFFCORE_RESPONSE.PF_L2_RFO.L3_HIT_M.NO_SNOOP_NEEDED",
         "MSRIndex": "0x1a6,0x1a7",
-        "MSRValue": "0x0200020004",
+        "MSRValue": "0x0100040020",
         "Offcore": "1",
         "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
         "SampleAfterValue": "100003",
         "UMask": "0x1"
     },
     {
-        "BriefDescription": "This event is deprecated. Refer to new event OCR.PF_L3_RFO.PMM_HIT_LOCAL_PMM.SNOOP_NONE",
+        "BriefDescription": "This event is deprecated. Refer to new event OCR.PF_L2_RFO.L3_HIT_M.SNOOP_MISS",
         "Counter": "0,1,2,3",
         "CounterHTOff": "0,1,2,3",
         "Deprecated": "1",
         "EventCode": "0xB7, 0xBB",
-        "EventName": "OFFCORE_RESPONSE.PF_L3_RFO.PMM_HIT_LOCAL_PMM.SNOOP_NONE",
+        "EventName": "OFFCORE_RESPONSE.PF_L2_RFO.L3_HIT_M.SNOOP_MISS",
         "MSRIndex": "0x1a6,0x1a7",
-        "MSRValue": "0x0080400100",
+        "MSRValue": "0x0200040020",
         "Offcore": "1",
         "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
         "SampleAfterValue": "100003",
         "UMask": "0x1"
     },
     {
-        "BriefDescription": "This event is deprecated. Refer to new event OCR.PF_L2_RFO.L3_HIT.NO_SNOOP_NEEDED",
+        "BriefDescription": "This event is deprecated. Refer to new event OCR.PF_L2_RFO.L3_HIT_M.SNOOP_NONE",
         "Counter": "0,1,2,3",
         "CounterHTOff": "0,1,2,3",
         "Deprecated": "1",
         "EventCode": "0xB7, 0xBB",
-        "EventName": "OFFCORE_RESPONSE.PF_L2_RFO.L3_HIT.NO_SNOOP_NEEDED",
+        "EventName": "OFFCORE_RESPONSE.PF_L2_RFO.L3_HIT_M.SNOOP_NONE",
         "MSRIndex": "0x1a6,0x1a7",
-        "MSRValue": "0x01003C0020",
+        "MSRValue": "0x0080040020",
         "Offcore": "1",
         "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
         "SampleAfterValue": "100003",
         "UMask": "0x1"
     },
     {
-        "BriefDescription": "This event is deprecated. Refer to new event OCR.PF_L2_RFO.L3_HIT_M.NO_SNOOP_NEEDED",
+        "BriefDescription": "This event is deprecated. Refer to new event OCR.PF_L2_RFO.L3_HIT_S.ANY_SNOOP",
         "Counter": "0,1,2,3",
         "CounterHTOff": "0,1,2,3",
         "Deprecated": "1",
         "EventCode": "0xB7, 0xBB",
-        "EventName": "OFFCORE_RESPONSE.PF_L2_RFO.L3_HIT_M.NO_SNOOP_NEEDED",
+        "EventName": "OFFCORE_RESPONSE.PF_L2_RFO.L3_HIT_S.ANY_SNOOP",
         "MSRIndex": "0x1a6,0x1a7",
-        "MSRValue": "0x0100040020",
+        "MSRValue": "0x3F80100020",
         "Offcore": "1",
         "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
         "SampleAfterValue": "100003",
         "UMask": "0x1"
     },
     {
-        "BriefDescription": "This event is deprecated. Refer to new event OCR.ALL_RFO.SUPPLIER_NONE.SNOOP_NONE",
+        "BriefDescription": "This event is deprecated. Refer to new event OCR.PF_L2_RFO.L3_HIT_S.HITM_OTHER_CORE",
         "Counter": "0,1,2,3",
         "CounterHTOff": "0,1,2,3",
         "Deprecated": "1",
         "EventCode": "0xB7, 0xBB",
-        "EventName": "OFFCORE_RESPONSE.ALL_RFO.SUPPLIER_NONE.SNOOP_NONE",
+        "EventName": "OFFCORE_RESPONSE.PF_L2_RFO.L3_HIT_S.HITM_OTHER_CORE",
         "MSRIndex": "0x1a6,0x1a7",
-        "MSRValue": "0x0080020122",
+        "MSRValue": "0x1000100020",
         "Offcore": "1",
         "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
         "SampleAfterValue": "100003",
         "UMask": "0x1"
     },
     {
-        "BriefDescription": "This event is deprecated. Refer to new event OCR.PF_L3_DATA_RD.L3_HIT_M.HITM_OTHER_CORE",
+        "BriefDescription": "This event is deprecated. Refer to new event OCR.PF_L2_RFO.L3_HIT_S.HIT_OTHER_CORE_FWD",
         "Counter": "0,1,2,3",
         "CounterHTOff": "0,1,2,3",
         "Deprecated": "1",
         "EventCode": "0xB7, 0xBB",
-        "EventName": "OFFCORE_RESPONSE.PF_L3_DATA_RD.L3_HIT_M.HITM_OTHER_CORE",
+        "EventName": "OFFCORE_RESPONSE.PF_L2_RFO.L3_HIT_S.HIT_OTHER_CORE_FWD",
         "MSRIndex": "0x1a6,0x1a7",
-        "MSRValue": "0x1000040080",
+        "MSRValue": "0x0800100020",
         "Offcore": "1",
         "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
         "SampleAfterValue": "100003",
         "UMask": "0x1"
     },
     {
-        "BriefDescription": "This event is deprecated. Refer to new event OCR.PF_L2_DATA_RD.L3_HIT_E.NO_SNOOP_NEEDED",
+        "BriefDescription": "This event is deprecated. Refer to new event OCR.PF_L2_RFO.L3_HIT_S.HIT_OTHER_CORE_NO_FWD",
         "Counter": "0,1,2,3",
         "CounterHTOff": "0,1,2,3",
         "Deprecated": "1",
         "EventCode": "0xB7, 0xBB",
-        "EventName": "OFFCORE_RESPONSE.PF_L2_DATA_RD.L3_HIT_E.NO_SNOOP_NEEDED",
+        "EventName": "OFFCORE_RESPONSE.PF_L2_RFO.L3_HIT_S.HIT_OTHER_CORE_NO_FWD",
         "MSRIndex": "0x1a6,0x1a7",
-        "MSRValue": "0x0100080010",
+        "MSRValue": "0x0400100020",
         "Offcore": "1",
         "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
         "SampleAfterValue": "100003",
         "UMask": "0x1"
     },
     {
-        "BriefDescription": "This event is deprecated. Refer to new event OCR.DEMAND_RFO.L3_HIT_M.ANY_SNOOP",
+        "BriefDescription": "This event is deprecated. Refer to new event OCR.PF_L2_RFO.L3_HIT_S.NO_SNOOP_NEEDED",
         "Counter": "0,1,2,3",
         "CounterHTOff": "0,1,2,3",
         "Deprecated": "1",
         "EventCode": "0xB7, 0xBB",
-        "EventName": "OFFCORE_RESPONSE.DEMAND_RFO.L3_HIT_M.ANY_SNOOP",
+        "EventName": "OFFCORE_RESPONSE.PF_L2_RFO.L3_HIT_S.NO_SNOOP_NEEDED",
         "MSRIndex": "0x1a6,0x1a7",
-        "MSRValue": "0x3F80040002",
+        "MSRValue": "0x0100100020",
         "Offcore": "1",
         "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
         "SampleAfterValue": "100003",
         "UMask": "0x1"
     },
     {
-        "BriefDescription": "This event is deprecated. Refer to new event OCR.OTHER.PMM_HIT_LOCAL_PMM.ANY_SNOOP",
+        "BriefDescription": "This event is deprecated. Refer to new event OCR.PF_L2_RFO.L3_HIT_S.SNOOP_MISS",
         "Counter": "0,1,2,3",
         "CounterHTOff": "0,1,2,3",
         "Deprecated": "1",
         "EventCode": "0xB7, 0xBB",
-        "EventName": "OFFCORE_RESPONSE.OTHER.PMM_HIT_LOCAL_PMM.ANY_SNOOP",
+        "EventName": "OFFCORE_RESPONSE.PF_L2_RFO.L3_HIT_S.SNOOP_MISS",
         "MSRIndex": "0x1a6,0x1a7",
-        "MSRValue": "0x3F80408000",
+        "MSRValue": "0x0200100020",
         "Offcore": "1",
         "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
         "SampleAfterValue": "100003",
         "UMask": "0x1"
     },
     {
-        "BriefDescription": "This event is deprecated. Refer to new event OCR.ALL_PF_RFO.L3_HIT_M.NO_SNOOP_NEEDED",
+        "BriefDescription": "This event is deprecated. Refer to new event OCR.PF_L2_RFO.L3_HIT_S.SNOOP_NONE",
         "Counter": "0,1,2,3",
         "CounterHTOff": "0,1,2,3",
         "Deprecated": "1",
         "EventCode": "0xB7, 0xBB",
-        "EventName": "OFFCORE_RESPONSE.ALL_PF_RFO.L3_HIT_M.NO_SNOOP_NEEDED",
+        "EventName": "OFFCORE_RESPONSE.PF_L2_RFO.L3_HIT_S.SNOOP_NONE",
         "MSRIndex": "0x1a6,0x1a7",
-        "MSRValue": "0x0100040120",
+        "MSRValue": "0x0080100020",
         "Offcore": "1",
         "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
         "SampleAfterValue": "100003",
         "UMask": "0x1"
     },
     {
-        "BriefDescription": "This event is deprecated. Refer to new event OCR.ALL_RFO.L3_HIT_F.HIT_OTHER_CORE_NO_FWD",
+        "BriefDescription": "This event is deprecated. Refer to new event OCR.PF_L2_RFO.PMM_HIT_LOCAL_PMM.ANY_SNOOP",
         "Counter": "0,1,2,3",
         "CounterHTOff": "0,1,2,3",
         "Deprecated": "1",
         "EventCode": "0xB7, 0xBB",
-        "EventName": "OFFCORE_RESPONSE.ALL_RFO.L3_HIT_F.HIT_OTHER_CORE_NO_FWD",
+        "EventName": "OFFCORE_RESPONSE.PF_L2_RFO.PMM_HIT_LOCAL_PMM.ANY_SNOOP",
         "MSRIndex": "0x1a6,0x1a7",
-        "MSRValue": "0x0400200122",
+        "MSRValue": "0x3F80400020",
         "Offcore": "1",
         "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
         "SampleAfterValue": "100003",
         "UMask": "0x1"
     },
     {
-        "BriefDescription": "This event is deprecated. Refer to new event OCR.DEMAND_DATA_RD.L3_HIT_F.HITM_OTHER_CORE",
+        "BriefDescription": "This event is deprecated. Refer to new event OCR.PF_L2_RFO.PMM_HIT_LOCAL_PMM.SNOOP_NONE",
         "Counter": "0,1,2,3",
         "CounterHTOff": "0,1,2,3",
         "Deprecated": "1",
         "EventCode": "0xB7, 0xBB",
-        "EventName": "OFFCORE_RESPONSE.DEMAND_DATA_RD.L3_HIT_F.HITM_OTHER_CORE",
+        "EventName": "OFFCORE_RESPONSE.PF_L2_RFO.PMM_HIT_LOCAL_PMM.SNOOP_NONE",
         "MSRIndex": "0x1a6,0x1a7",
-        "MSRValue": "0x1000200001",
+        "MSRValue": "0x0080400020",
         "Offcore": "1",
         "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
         "SampleAfterValue": "100003",
         "UMask": "0x1"
     },
     {
-        "BriefDescription": "This event is deprecated. Refer to new event OCR.DEMAND_DATA_RD.L3_HIT_S.HIT_OTHER_CORE_FWD",
+        "BriefDescription": "This event is deprecated. Refer to new event OCR.PF_L2_RFO.PMM_HIT_LOCAL_PMM.SNOOP_NOT_NEEDED",
         "Counter": "0,1,2,3",
         "CounterHTOff": "0,1,2,3",
         "Deprecated": "1",
         "EventCode": "0xB7, 0xBB",
-        "EventName": "OFFCORE_RESPONSE.DEMAND_DATA_RD.L3_HIT_S.HIT_OTHER_CORE_FWD",
+        "EventName": "OFFCORE_RESPONSE.PF_L2_RFO.PMM_HIT_LOCAL_PMM.SNOOP_NOT_NEEDED",
         "MSRIndex": "0x1a6,0x1a7",
-        "MSRValue": "0x0800100001",
+        "MSRValue": "0x0100400020",
         "Offcore": "1",
         "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
         "SampleAfterValue": "100003",
         "UMask": "0x1"
     },
     {
-        "BriefDescription": "This event is deprecated. Refer to new event OCR.ALL_READS.L3_HIT.SNOOP_MISS",
+        "BriefDescription": "This event is deprecated. Refer to new event OCR.PF_L2_RFO.SUPPLIER_NONE.ANY_SNOOP",
         "Counter": "0,1,2,3",
         "CounterHTOff": "0,1,2,3",
         "Deprecated": "1",
         "EventCode": "0xB7, 0xBB",
-        "EventName": "OFFCORE_RESPONSE.ALL_READS.L3_HIT.SNOOP_MISS",
+        "EventName": "OFFCORE_RESPONSE.PF_L2_RFO.SUPPLIER_NONE.ANY_SNOOP",
         "MSRIndex": "0x1a6,0x1a7",
-        "MSRValue": "0x02003C07F7",
+        "MSRValue": "0x3F80020020",
         "Offcore": "1",
         "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
         "SampleAfterValue": "100003",
         "UMask": "0x1"
     },
     {
-        "BriefDescription": "This event is deprecated. Refer to new event OCR.ALL_READS.L3_HIT.SNOOP_HIT_WITH_FWD",
+        "BriefDescription": "This event is deprecated. Refer to new event OCR.PF_L2_RFO.SUPPLIER_NONE.HITM_OTHER_CORE",
         "Counter": "0,1,2,3",
         "CounterHTOff": "0,1,2,3",
         "Deprecated": "1",
         "EventCode": "0xB7, 0xBB",
-        "EventName": "OFFCORE_RESPONSE.ALL_READS.L3_HIT.SNOOP_HIT_WITH_FWD",
+        "EventName": "OFFCORE_RESPONSE.PF_L2_RFO.SUPPLIER_NONE.HITM_OTHER_CORE",
         "MSRIndex": "0x1a6,0x1a7",
-        "MSRValue": "0x08007C07F7",
+        "MSRValue": "0x1000020020",
         "Offcore": "1",
         "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
         "SampleAfterValue": "100003",
         "UMask": "0x1"
     },
     {
-        "BriefDescription": "This event is deprecated. Refer to new event OCR.DEMAND_RFO.L3_HIT.SNOOP_HIT_WITH_FWD",
+        "BriefDescription": "This event is deprecated. Refer to new event OCR.PF_L2_RFO.SUPPLIER_NONE.HIT_OTHER_CORE_FWD",
         "Counter": "0,1,2,3",
         "CounterHTOff": "0,1,2,3",
         "Deprecated": "1",
         "EventCode": "0xB7, 0xBB",
-        "EventName": "OFFCORE_RESPONSE.DEMAND_RFO.L3_HIT.SNOOP_HIT_WITH_FWD",
+        "EventName": "OFFCORE_RESPONSE.PF_L2_RFO.SUPPLIER_NONE.HIT_OTHER_CORE_FWD",
         "MSRIndex": "0x1a6,0x1a7",
-        "MSRValue": "0x08007C0002",
+        "MSRValue": "0x0800020020",
         "Offcore": "1",
         "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
         "SampleAfterValue": "100003",
         "UMask": "0x1"
     },
     {
-        "BriefDescription": "This event is deprecated. Refer to new event OCR.ALL_PF_DATA_RD.L3_HIT_M.SNOOP_MISS",
+        "BriefDescription": "This event is deprecated. Refer to new event OCR.PF_L2_RFO.SUPPLIER_NONE.HIT_OTHER_CORE_NO_FWD",
         "Counter": "0,1,2,3",
         "CounterHTOff": "0,1,2,3",
         "Deprecated": "1",
         "EventCode": "0xB7, 0xBB",
-        "EventName": "OFFCORE_RESPONSE.ALL_PF_DATA_RD.L3_HIT_M.SNOOP_MISS",
+        "EventName": "OFFCORE_RESPONSE.PF_L2_RFO.SUPPLIER_NONE.HIT_OTHER_CORE_NO_FWD",
         "MSRIndex": "0x1a6,0x1a7",
-        "MSRValue": "0x0200040490",
+        "MSRValue": "0x0400020020",
         "Offcore": "1",
         "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
         "SampleAfterValue": "100003",
         "UMask": "0x1"
     },
     {
-        "BriefDescription": "This event is deprecated. Refer to new event OCR.DEMAND_CODE_RD.L3_HIT_S.SNOOP_NONE",
+        "BriefDescription": "This event is deprecated. Refer to new event OCR.PF_L2_RFO.SUPPLIER_NONE.NO_SNOOP_NEEDED",
         "Counter": "0,1,2,3",
         "CounterHTOff": "0,1,2,3",
         "Deprecated": "1",
         "EventCode": "0xB7, 0xBB",
-        "EventName": "OFFCORE_RESPONSE.DEMAND_CODE_RD.L3_HIT_S.SNOOP_NONE",
+        "EventName": "OFFCORE_RESPONSE.PF_L2_RFO.SUPPLIER_NONE.NO_SNOOP_NEEDED",
         "MSRIndex": "0x1a6,0x1a7",
-        "MSRValue": "0x0080100004",
+        "MSRValue": "0x0100020020",
         "Offcore": "1",
         "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
         "SampleAfterValue": "100003",
         "UMask": "0x1"
     },
     {
-        "BriefDescription": "This event is deprecated. Refer to new event OCR.DEMAND_DATA_RD.L3_HIT_E.SNOOP_MISS",
+        "BriefDescription": "This event is deprecated. Refer to new event OCR.PF_L2_RFO.SUPPLIER_NONE.SNOOP_MISS",
         "Counter": "0,1,2,3",
         "CounterHTOff": "0,1,2,3",
         "Deprecated": "1",
         "EventCode": "0xB7, 0xBB",
-        "EventName": "OFFCORE_RESPONSE.DEMAND_DATA_RD.L3_HIT_E.SNOOP_MISS",
+        "EventName": "OFFCORE_RESPONSE.PF_L2_RFO.SUPPLIER_NONE.SNOOP_MISS",
         "MSRIndex": "0x1a6,0x1a7",
-        "MSRValue": "0x0200080001",
+        "MSRValue": "0x0200020020",
         "Offcore": "1",
         "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
         "SampleAfterValue": "100003",
         "UMask": "0x1"
     },
     {
-        "BriefDescription": "This event is deprecated. Refer to new event OCR.ALL_PF_RFO.ANY_RESPONSE",
+        "BriefDescription": "This event is deprecated. Refer to new event OCR.PF_L2_RFO.SUPPLIER_NONE.SNOOP_NONE",
         "Counter": "0,1,2,3",
         "CounterHTOff": "0,1,2,3",
         "Deprecated": "1",
         "EventCode": "0xB7, 0xBB",
-        "EventName": "OFFCORE_RESPONSE.ALL_PF_RFO.ANY_RESPONSE",
+        "EventName": "OFFCORE_RESPONSE.PF_L2_RFO.SUPPLIER_NONE.SNOOP_NONE",
         "MSRIndex": "0x1a6,0x1a7",
-        "MSRValue": "0x0000010120",
+        "MSRValue": "0x0080020020",
         "Offcore": "1",
         "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
         "SampleAfterValue": "100003",
         "UMask": "0x1"
     },
     {
-        "BriefDescription": "This event is deprecated. Refer to new event OCR.ALL_READS.L3_HIT_M.SNOOP_NONE",
+        "BriefDescription": "This event is deprecated. Refer to new event OCR.PF_L3_DATA_RD.ANY_RESPONSE",
         "Counter": "0,1,2,3",
         "CounterHTOff": "0,1,2,3",
         "Deprecated": "1",
         "EventCode": "0xB7, 0xBB",
-        "EventName": "OFFCORE_RESPONSE.ALL_READS.L3_HIT_M.SNOOP_NONE",
+        "EventName": "OFFCORE_RESPONSE.PF_L3_DATA_RD.ANY_RESPONSE",
         "MSRIndex": "0x1a6,0x1a7",
-        "MSRValue": "0x00800407F7",
+        "MSRValue": "0x0000010080",
         "Offcore": "1",
         "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
         "SampleAfterValue": "100003",
         "UMask": "0x1"
     },
     {
-        "BriefDescription": "This event is deprecated. Refer to new event OCR.DEMAND_DATA_RD.L3_HIT_E.NO_SNOOP_NEEDED",
+        "BriefDescription": "This event is deprecated. Refer to new event OCR.PF_L3_DATA_RD.L3_HIT.ANY_SNOOP",
         "Counter": "0,1,2,3",
         "CounterHTOff": "0,1,2,3",
         "Deprecated": "1",
         "EventCode": "0xB7, 0xBB",
-        "EventName": "OFFCORE_RESPONSE.DEMAND_DATA_RD.L3_HIT_E.NO_SNOOP_NEEDED",
+        "EventName": "OFFCORE_RESPONSE.PF_L3_DATA_RD.L3_HIT.ANY_SNOOP",
         "MSRIndex": "0x1a6,0x1a7",
-        "MSRValue": "0x0100080001",
+        "MSRValue": "0x3F803C0080",
         "Offcore": "1",
         "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
         "SampleAfterValue": "100003",
         "UMask": "0x1"
     },
     {
-        "BriefDescription": "This event is deprecated. Refer to new event OCR.ALL_READS.L3_HIT.ANY_SNOOP",
+        "BriefDescription": "This event is deprecated. Refer to new event OCR.PF_L3_DATA_RD.L3_HIT.HITM_OTHER_CORE",
         "Counter": "0,1,2,3",
         "CounterHTOff": "0,1,2,3",
         "Deprecated": "1",
         "EventCode": "0xB7, 0xBB",
-        "EventName": "OFFCORE_RESPONSE.ALL_READS.L3_HIT.ANY_SNOOP",
+        "EventName": "OFFCORE_RESPONSE.PF_L3_DATA_RD.L3_HIT.HITM_OTHER_CORE",
         "MSRIndex": "0x1a6,0x1a7",
-        "MSRValue": "0x3F803C07F7",
+        "MSRValue": "0x10003C0080",
         "Offcore": "1",
         "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
         "SampleAfterValue": "100003",
         "UMask": "0x1"
     },
     {
-        "BriefDescription": "This event is deprecated. Refer to new event OCR.PF_L1D_AND_SW.SUPPLIER_NONE.HIT_OTHER_CORE_FWD",
+        "BriefDescription": "This event is deprecated. Refer to new event OCR.PF_L3_DATA_RD.L3_HIT.HIT_OTHER_CORE_FWD",
         "Counter": "0,1,2,3",
         "CounterHTOff": "0,1,2,3",
         "Deprecated": "1",
         "EventCode": "0xB7, 0xBB",
-        "EventName": "OFFCORE_RESPONSE.PF_L1D_AND_SW.SUPPLIER_NONE.HIT_OTHER_CORE_FWD",
+        "EventName": "OFFCORE_RESPONSE.PF_L3_DATA_RD.L3_HIT.HIT_OTHER_CORE_FWD",
         "MSRIndex": "0x1a6,0x1a7",
-        "MSRValue": "0x0800020400",
+        "MSRValue": "0x08003C0080",
         "Offcore": "1",
         "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
         "SampleAfterValue": "100003",
         "UMask": "0x1"
     },
     {
-        "BriefDescription": "This event is deprecated. Refer to new event OCR.ALL_READS.L3_HIT_E.SNOOP_NONE",
+        "BriefDescription": "This event is deprecated. Refer to new event OCR.PF_L3_DATA_RD.L3_HIT.HIT_OTHER_CORE_NO_FWD",
         "Counter": "0,1,2,3",
         "CounterHTOff": "0,1,2,3",
         "Deprecated": "1",
         "EventCode": "0xB7, 0xBB",
-        "EventName": "OFFCORE_RESPONSE.ALL_READS.L3_HIT_E.SNOOP_NONE",
+        "EventName": "OFFCORE_RESPONSE.PF_L3_DATA_RD.L3_HIT.HIT_OTHER_CORE_NO_FWD",
         "MSRIndex": "0x1a6,0x1a7",
-        "MSRValue": "0x00800807F7",
+        "MSRValue": "0x04003C0080",
         "Offcore": "1",
         "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
         "SampleAfterValue": "100003",
         "UMask": "0x1"
     },
     {
-        "BriefDescription": "This event is deprecated. Refer to new event OCR.ALL_DATA_RD.SUPPLIER_NONE.SNOOP_MISS",
+        "BriefDescription": "This event is deprecated. Refer to new event OCR.PF_L3_DATA_RD.L3_HIT.NO_SNOOP_NEEDED",
         "Counter": "0,1,2,3",
         "CounterHTOff": "0,1,2,3",
         "Deprecated": "1",
         "EventCode": "0xB7, 0xBB",
-        "EventName": "OFFCORE_RESPONSE.ALL_DATA_RD.SUPPLIER_NONE.SNOOP_MISS",
+        "EventName": "OFFCORE_RESPONSE.PF_L3_DATA_RD.L3_HIT.NO_SNOOP_NEEDED",
         "MSRIndex": "0x1a6,0x1a7",
-        "MSRValue": "0x0200020491",
+        "MSRValue": "0x01003C0080",
         "Offcore": "1",
         "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
         "SampleAfterValue": "100003",
         "UMask": "0x1"
     },
     {
-        "BriefDescription": "This event is deprecated. Refer to new event OCR.DEMAND_CODE_RD.L3_HIT_S.ANY_SNOOP",
+        "BriefDescription": "This event is deprecated. Refer to new event OCR.PF_L3_DATA_RD.L3_HIT.SNOOP_HIT_WITH_FWD",
         "Counter": "0,1,2,3",
         "CounterHTOff": "0,1,2,3",
         "Deprecated": "1",
         "EventCode": "0xB7, 0xBB",
-        "EventName": "OFFCORE_RESPONSE.DEMAND_CODE_RD.L3_HIT_S.ANY_SNOOP",
+        "EventName": "OFFCORE_RESPONSE.PF_L3_DATA_RD.L3_HIT.SNOOP_HIT_WITH_FWD",
         "MSRIndex": "0x1a6,0x1a7",
-        "MSRValue": "0x3F80100004",
+        "MSRValue": "0x08007C0080",
         "Offcore": "1",
         "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
         "SampleAfterValue": "100003",
         "UMask": "0x1"
     },
     {
-        "BriefDescription": "This event is deprecated. Refer to new event OCR.PF_L1D_AND_SW.L3_HIT_S.HIT_OTHER_CORE_FWD",
+        "BriefDescription": "This event is deprecated. Refer to new event OCR.PF_L3_DATA_RD.L3_HIT.SNOOP_MISS",
         "Counter": "0,1,2,3",
         "CounterHTOff": "0,1,2,3",
         "Deprecated": "1",
         "EventCode": "0xB7, 0xBB",
-        "EventName": "OFFCORE_RESPONSE.PF_L1D_AND_SW.L3_HIT_S.HIT_OTHER_CORE_FWD",
+        "EventName": "OFFCORE_RESPONSE.PF_L3_DATA_RD.L3_HIT.SNOOP_MISS",
         "MSRIndex": "0x1a6,0x1a7",
-        "MSRValue": "0x0800100400",
+        "MSRValue": "0x02003C0080",
         "Offcore": "1",
         "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
         "SampleAfterValue": "100003",
         "UMask": "0x1"
     },
     {
-        "BriefDescription": "Retired load instructions which data sources were load missed L1 but hit FB due to preceding miss to the same cache line with data not ready",
-        "Counter": "0,1,2,3",
-        "CounterHTOff": "0,1,2,3",
-        "Data_LA": "1",
-        "EventCode": "0xD1",
-        "EventName": "MEM_LOAD_RETIRED.FB_HIT",
-        "PEBS": "1",
-        "PublicDescription": "Counts retired load instructions with at least one uop was load missed in L1 but hit FB (Fill Buffers) due to preceding miss to the same cache line with data not ready.",
-        "SampleAfterValue": "100007",
-        "UMask": "0x40"
-    },
-    {
-        "BriefDescription": "This event is deprecated. Refer to new event OCR.ALL_PF_RFO.L3_HIT_F.HIT_OTHER_CORE_FWD",
+        "BriefDescription": "This event is deprecated. Refer to new event OCR.PF_L3_DATA_RD.L3_HIT.SNOOP_NONE",
         "Counter": "0,1,2,3",
         "CounterHTOff": "0,1,2,3",
         "Deprecated": "1",
         "EventCode": "0xB7, 0xBB",
-        "EventName": "OFFCORE_RESPONSE.ALL_PF_RFO.L3_HIT_F.HIT_OTHER_CORE_FWD",
+        "EventName": "OFFCORE_RESPONSE.PF_L3_DATA_RD.L3_HIT.SNOOP_NONE",
         "MSRIndex": "0x1a6,0x1a7",
-        "MSRValue": "0x0800200120",
+        "MSRValue": "0x00803C0080",
         "Offcore": "1",
         "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
         "SampleAfterValue": "100003",
         "UMask": "0x1"
     },
     {
-        "BriefDescription": "This event is deprecated. Refer to new event OCR.PF_L2_DATA_RD.L3_HIT_E.HIT_OTHER_CORE_NO_FWD",
+        "BriefDescription": "This event is deprecated. Refer to new event OCR.PF_L3_DATA_RD.L3_HIT_E.ANY_SNOOP",
         "Counter": "0,1,2,3",
         "CounterHTOff": "0,1,2,3",
         "Deprecated": "1",
         "EventCode": "0xB7, 0xBB",
-        "EventName": "OFFCORE_RESPONSE.PF_L2_DATA_RD.L3_HIT_E.HIT_OTHER_CORE_NO_FWD",
+        "EventName": "OFFCORE_RESPONSE.PF_L3_DATA_RD.L3_HIT_E.ANY_SNOOP",
         "MSRIndex": "0x1a6,0x1a7",
-        "MSRValue": "0x0400080010",
+        "MSRValue": "0x3F80080080",
         "Offcore": "1",
         "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
         "SampleAfterValue": "100003",
         "UMask": "0x1"
     },
     {
-        "BriefDescription": "This event is deprecated. Refer to new event OCR.DEMAND_RFO.L3_HIT_E.SNOOP_MISS",
+        "BriefDescription": "This event is deprecated. Refer to new event OCR.PF_L3_DATA_RD.L3_HIT_E.HITM_OTHER_CORE",
         "Counter": "0,1,2,3",
         "CounterHTOff": "0,1,2,3",
         "Deprecated": "1",
         "EventCode": "0xB7, 0xBB",
-        "EventName": "OFFCORE_RESPONSE.DEMAND_RFO.L3_HIT_E.SNOOP_MISS",
+        "EventName": "OFFCORE_RESPONSE.PF_L3_DATA_RD.L3_HIT_E.HITM_OTHER_CORE",
         "MSRIndex": "0x1a6,0x1a7",
-        "MSRValue": "0x0200080002",
+        "MSRValue": "0x1000080080",
         "Offcore": "1",
         "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
         "SampleAfterValue": "100003",
         "UMask": "0x1"
     },
     {
-        "BriefDescription": "This event is deprecated. Refer to new event OCR.ALL_PF_DATA_RD.L3_HIT_E.SNOOP_MISS",
+        "BriefDescription": "This event is deprecated. Refer to new event OCR.PF_L3_DATA_RD.L3_HIT_E.HIT_OTHER_CORE_FWD",
         "Counter": "0,1,2,3",
         "CounterHTOff": "0,1,2,3",
         "Deprecated": "1",
         "EventCode": "0xB7, 0xBB",
-        "EventName": "OFFCORE_RESPONSE.ALL_PF_DATA_RD.L3_HIT_E.SNOOP_MISS",
+        "EventName": "OFFCORE_RESPONSE.PF_L3_DATA_RD.L3_HIT_E.HIT_OTHER_CORE_FWD",
         "MSRIndex": "0x1a6,0x1a7",
-        "MSRValue": "0x0200080490",
+        "MSRValue": "0x0800080080",
         "Offcore": "1",
         "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
         "SampleAfterValue": "100003",
         "UMask": "0x1"
     },
     {
-        "BriefDescription": "This event is deprecated. Refer to new event OCR.DEMAND_CODE_RD.L3_HIT_S.HITM_OTHER_CORE",
+        "BriefDescription": "This event is deprecated. Refer to new event OCR.PF_L3_DATA_RD.L3_HIT_E.HIT_OTHER_CORE_NO_FWD",
         "Counter": "0,1,2,3",
         "CounterHTOff": "0,1,2,3",
         "Deprecated": "1",
         "EventCode": "0xB7, 0xBB",
-        "EventName": "OFFCORE_RESPONSE.DEMAND_CODE_RD.L3_HIT_S.HITM_OTHER_CORE",
+        "EventName": "OFFCORE_RESPONSE.PF_L3_DATA_RD.L3_HIT_E.HIT_OTHER_CORE_NO_FWD",
         "MSRIndex": "0x1a6,0x1a7",
-        "MSRValue": "0x1000100004",
+        "MSRValue": "0x0400080080",
         "Offcore": "1",
         "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
         "SampleAfterValue": "100003",
         "UMask": "0x1"
     },
     {
-        "BriefDescription": "This event is deprecated. Refer to new event OCR.ALL_PF_RFO.L3_HIT_E.SNOOP_MISS",
+        "BriefDescription": "This event is deprecated. Refer to new event OCR.PF_L3_DATA_RD.L3_HIT_E.NO_SNOOP_NEEDED",
         "Counter": "0,1,2,3",
         "CounterHTOff": "0,1,2,3",
         "Deprecated": "1",
         "EventCode": "0xB7, 0xBB",
-        "EventName": "OFFCORE_RESPONSE.ALL_PF_RFO.L3_HIT_E.SNOOP_MISS",
+        "EventName": "OFFCORE_RESPONSE.PF_L3_DATA_RD.L3_HIT_E.NO_SNOOP_NEEDED",
         "MSRIndex": "0x1a6,0x1a7",
-        "MSRValue": "0x0200080120",
+        "MSRValue": "0x0100080080",
         "Offcore": "1",
         "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
         "SampleAfterValue": "100003",
         "UMask": "0x1"
     },
     {
-        "BriefDescription": "This event is deprecated. Refer to new event OCR.PF_L3_RFO.SUPPLIER_NONE.SNOOP_NONE",
+        "BriefDescription": "This event is deprecated. Refer to new event OCR.PF_L3_DATA_RD.L3_HIT_E.SNOOP_MISS",
         "Counter": "0,1,2,3",
         "CounterHTOff": "0,1,2,3",
         "Deprecated": "1",
         "EventCode": "0xB7, 0xBB",
-        "EventName": "OFFCORE_RESPONSE.PF_L3_RFO.SUPPLIER_NONE.SNOOP_NONE",
+        "EventName": "OFFCORE_RESPONSE.PF_L3_DATA_RD.L3_HIT_E.SNOOP_MISS",
         "MSRIndex": "0x1a6,0x1a7",
-        "MSRValue": "0x0080020100",
+        "MSRValue": "0x0200080080",
         "Offcore": "1",
         "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
         "SampleAfterValue": "100003",
         "UMask": "0x1"
     },
     {
-        "BriefDescription": "This event is deprecated. Refer to new event OCR.ALL_READS.L3_HIT_M.ANY_SNOOP",
+        "BriefDescription": "This event is deprecated. Refer to new event OCR.PF_L3_DATA_RD.L3_HIT_E.SNOOP_NONE",
         "Counter": "0,1,2,3",
         "CounterHTOff": "0,1,2,3",
         "Deprecated": "1",
         "EventCode": "0xB7, 0xBB",
-        "EventName": "OFFCORE_RESPONSE.ALL_READS.L3_HIT_M.ANY_SNOOP",
+        "EventName": "OFFCORE_RESPONSE.PF_L3_DATA_RD.L3_HIT_E.SNOOP_NONE",
         "MSRIndex": "0x1a6,0x1a7",
-        "MSRValue": "0x3F800407F7",
+        "MSRValue": "0x0080080080",
         "Offcore": "1",
         "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
         "SampleAfterValue": "100003",
         "UMask": "0x1"
     },
     {
-        "BriefDescription": "This event is deprecated. Refer to new event OCR.PF_L1D_AND_SW.L3_HIT_M.HIT_OTHER_CORE_NO_FWD",
+        "BriefDescription": "This event is deprecated. Refer to new event OCR.PF_L3_DATA_RD.L3_HIT_F.ANY_SNOOP",
         "Counter": "0,1,2,3",
         "CounterHTOff": "0,1,2,3",
         "Deprecated": "1",
         "EventCode": "0xB7, 0xBB",
-        "EventName": "OFFCORE_RESPONSE.PF_L1D_AND_SW.L3_HIT_M.HIT_OTHER_CORE_NO_FWD",
+        "EventName": "OFFCORE_RESPONSE.PF_L3_DATA_RD.L3_HIT_F.ANY_SNOOP",
         "MSRIndex": "0x1a6,0x1a7",
-        "MSRValue": "0x0400040400",
+        "MSRValue": "0x3F80200080",
         "Offcore": "1",
         "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
         "SampleAfterValue": "100003",
         "UMask": "0x1"
     },
     {
-        "BriefDescription": "This event is deprecated. Refer to new event OCR.PF_L2_RFO.L3_HIT_E.SNOOP_NONE",
+        "BriefDescription": "This event is deprecated. Refer to new event OCR.PF_L3_DATA_RD.L3_HIT_F.HITM_OTHER_CORE",
         "Counter": "0,1,2,3",
         "CounterHTOff": "0,1,2,3",
         "Deprecated": "1",
         "EventCode": "0xB7, 0xBB",
-        "EventName": "OFFCORE_RESPONSE.PF_L2_RFO.L3_HIT_E.SNOOP_NONE",
+        "EventName": "OFFCORE_RESPONSE.PF_L3_DATA_RD.L3_HIT_F.HITM_OTHER_CORE",
         "MSRIndex": "0x1a6,0x1a7",
-        "MSRValue": "0x0080080020",
+        "MSRValue": "0x1000200080",
         "Offcore": "1",
         "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
         "SampleAfterValue": "100003",
         "UMask": "0x1"
     },
     {
-        "BriefDescription": "This event is deprecated. Refer to new event OCR.OTHER.L3_HIT_F.SNOOP_MISS",
+        "BriefDescription": "This event is deprecated. Refer to new event OCR.PF_L3_DATA_RD.L3_HIT_F.HIT_OTHER_CORE_FWD",
         "Counter": "0,1,2,3",
         "CounterHTOff": "0,1,2,3",
         "Deprecated": "1",
         "EventCode": "0xB7, 0xBB",
-        "EventName": "OFFCORE_RESPONSE.OTHER.L3_HIT_F.SNOOP_MISS",
+        "EventName": "OFFCORE_RESPONSE.PF_L3_DATA_RD.L3_HIT_F.HIT_OTHER_CORE_FWD",
         "MSRIndex": "0x1a6,0x1a7",
-        "MSRValue": "0x0200208000",
+        "MSRValue": "0x0800200080",
         "Offcore": "1",
         "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
         "SampleAfterValue": "100003",
         "UMask": "0x1"
     },
     {
-        "BriefDescription": "This event is deprecated. Refer to new event OCR.ALL_PF_DATA_RD.SUPPLIER_NONE.HIT_OTHER_CORE_NO_FWD",
+        "BriefDescription": "This event is deprecated. Refer to new event OCR.PF_L3_DATA_RD.L3_HIT_F.HIT_OTHER_CORE_NO_FWD",
         "Counter": "0,1,2,3",
         "CounterHTOff": "0,1,2,3",
         "Deprecated": "1",
         "EventCode": "0xB7, 0xBB",
-        "EventName": "OFFCORE_RESPONSE.ALL_PF_DATA_RD.SUPPLIER_NONE.HIT_OTHER_CORE_NO_FWD",
+        "EventName": "OFFCORE_RESPONSE.PF_L3_DATA_RD.L3_HIT_F.HIT_OTHER_CORE_NO_FWD",
         "MSRIndex": "0x1a6,0x1a7",
-        "MSRValue": "0x0400020490",
+        "MSRValue": "0x0400200080",
         "Offcore": "1",
         "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
         "SampleAfterValue": "100003",
         "UMask": "0x1"
     },
     {
-        "BriefDescription": "This event is deprecated. Refer to new event OCR.ALL_RFO.L3_HIT_F.SNOOP_NONE",
+        "BriefDescription": "This event is deprecated. Refer to new event OCR.PF_L3_DATA_RD.L3_HIT_F.NO_SNOOP_NEEDED",
         "Counter": "0,1,2,3",
         "CounterHTOff": "0,1,2,3",
         "Deprecated": "1",
         "EventCode": "0xB7, 0xBB",
-        "EventName": "OFFCORE_RESPONSE.ALL_RFO.L3_HIT_F.SNOOP_NONE",
+        "EventName": "OFFCORE_RESPONSE.PF_L3_DATA_RD.L3_HIT_F.NO_SNOOP_NEEDED",
         "MSRIndex": "0x1a6,0x1a7",
-        "MSRValue": "0x0080200122",
+        "MSRValue": "0x0100200080",
         "Offcore": "1",
         "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
         "SampleAfterValue": "100003",
         "UMask": "0x1"
     },
     {
-        "BriefDescription": "This event is deprecated. Refer to new event OCR.DEMAND_RFO.L3_HIT_E.SNOOP_NONE",
+        "BriefDescription": "This event is deprecated. Refer to new event OCR.PF_L3_DATA_RD.L3_HIT_F.SNOOP_MISS",
         "Counter": "0,1,2,3",
         "CounterHTOff": "0,1,2,3",
         "Deprecated": "1",
         "EventCode": "0xB7, 0xBB",
-        "EventName": "OFFCORE_RESPONSE.DEMAND_RFO.L3_HIT_E.SNOOP_NONE",
+        "EventName": "OFFCORE_RESPONSE.PF_L3_DATA_RD.L3_HIT_F.SNOOP_MISS",
         "MSRIndex": "0x1a6,0x1a7",
-        "MSRValue": "0x0080080002",
+        "MSRValue": "0x0200200080",
         "Offcore": "1",
         "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
         "SampleAfterValue": "100003",
         "UMask": "0x1"
     },
     {
-        "BriefDescription": "This event is deprecated. Refer to new event OCR.DEMAND_DATA_RD.L3_HIT.SNOOP_NONE",
+        "BriefDescription": "This event is deprecated. Refer to new event OCR.PF_L3_DATA_RD.L3_HIT_F.SNOOP_NONE",
         "Counter": "0,1,2,3",
         "CounterHTOff": "0,1,2,3",
         "Deprecated": "1",
         "EventCode": "0xB7, 0xBB",
-        "EventName": "OFFCORE_RESPONSE.DEMAND_DATA_RD.L3_HIT.SNOOP_NONE",
+        "EventName": "OFFCORE_RESPONSE.PF_L3_DATA_RD.L3_HIT_F.SNOOP_NONE",
         "MSRIndex": "0x1a6,0x1a7",
-        "MSRValue": "0x00803C0001",
+        "MSRValue": "0x0080200080",
         "Offcore": "1",
         "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
         "SampleAfterValue": "100003",
         "UMask": "0x1"
     },
     {
-        "BriefDescription": "This event is deprecated. Refer to new event OCR.ALL_DATA_RD.L3_HIT_F.NO_SNOOP_NEEDED",
+        "BriefDescription": "This event is deprecated. Refer to new event OCR.PF_L3_DATA_RD.L3_HIT_M.ANY_SNOOP",
         "Counter": "0,1,2,3",
         "CounterHTOff": "0,1,2,3",
         "Deprecated": "1",
         "EventCode": "0xB7, 0xBB",
-        "EventName": "OFFCORE_RESPONSE.ALL_DATA_RD.L3_HIT_F.NO_SNOOP_NEEDED",
+        "EventName": "OFFCORE_RESPONSE.PF_L3_DATA_RD.L3_HIT_M.ANY_SNOOP",
         "MSRIndex": "0x1a6,0x1a7",
-        "MSRValue": "0x0100200491",
+        "MSRValue": "0x3F80040080",
         "Offcore": "1",
         "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
         "SampleAfterValue": "100003",
         "UMask": "0x1"
     },
     {
-        "BriefDescription": "This event is deprecated. Refer to new event OCR.ALL_PF_DATA_RD.L3_HIT_E.NO_SNOOP_NEEDED",
+        "BriefDescription": "This event is deprecated. Refer to new event OCR.PF_L3_DATA_RD.L3_HIT_M.HITM_OTHER_CORE",
         "Counter": "0,1,2,3",
         "CounterHTOff": "0,1,2,3",
         "Deprecated": "1",
         "EventCode": "0xB7, 0xBB",
-        "EventName": "OFFCORE_RESPONSE.ALL_PF_DATA_RD.L3_HIT_E.NO_SNOOP_NEEDED",
+        "EventName": "OFFCORE_RESPONSE.PF_L3_DATA_RD.L3_HIT_M.HITM_OTHER_CORE",
         "MSRIndex": "0x1a6,0x1a7",
-        "MSRValue": "0x0100080490",
+        "MSRValue": "0x1000040080",
         "Offcore": "1",
         "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
         "SampleAfterValue": "100003",
         "UMask": "0x1"
     },
     {
-        "BriefDescription": "This event is deprecated. Refer to new event OCR.OTHER.L3_HIT_M.ANY_SNOOP",
+        "BriefDescription": "This event is deprecated. Refer to new event OCR.PF_L3_DATA_RD.L3_HIT_M.HIT_OTHER_CORE_FWD",
         "Counter": "0,1,2,3",
         "CounterHTOff": "0,1,2,3",
         "Deprecated": "1",
         "EventCode": "0xB7, 0xBB",
-        "EventName": "OFFCORE_RESPONSE.OTHER.L3_HIT_M.ANY_SNOOP",
+        "EventName": "OFFCORE_RESPONSE.PF_L3_DATA_RD.L3_HIT_M.HIT_OTHER_CORE_FWD",
         "MSRIndex": "0x1a6,0x1a7",
-        "MSRValue": "0x3F80048000",
+        "MSRValue": "0x0800040080",
         "Offcore": "1",
         "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
         "SampleAfterValue": "100003",
         "UMask": "0x1"
     },
     {
-        "BriefDescription": "This event is deprecated. Refer to new event OCR.PF_L3_DATA_RD.SUPPLIER_NONE.HIT_OTHER_CORE_FWD",
+        "BriefDescription": "This event is deprecated. Refer to new event OCR.PF_L3_DATA_RD.L3_HIT_M.HIT_OTHER_CORE_NO_FWD",
         "Counter": "0,1,2,3",
         "CounterHTOff": "0,1,2,3",
         "Deprecated": "1",
         "EventCode": "0xB7, 0xBB",
-        "EventName": "OFFCORE_RESPONSE.PF_L3_DATA_RD.SUPPLIER_NONE.HIT_OTHER_CORE_FWD",
+        "EventName": "OFFCORE_RESPONSE.PF_L3_DATA_RD.L3_HIT_M.HIT_OTHER_CORE_NO_FWD",
         "MSRIndex": "0x1a6,0x1a7",
-        "MSRValue": "0x0800020080",
+        "MSRValue": "0x0400040080",
         "Offcore": "1",
         "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
         "SampleAfterValue": "100003",
         "UMask": "0x1"
     },
     {
-        "BriefDescription": "This event is deprecated. Refer to new event OCR.ALL_PF_DATA_RD.L3_HIT.ANY_SNOOP",
+        "BriefDescription": "This event is deprecated. Refer to new event OCR.PF_L3_DATA_RD.L3_HIT_M.NO_SNOOP_NEEDED",
         "Counter": "0,1,2,3",
         "CounterHTOff": "0,1,2,3",
         "Deprecated": "1",
         "EventCode": "0xB7, 0xBB",
-        "EventName": "OFFCORE_RESPONSE.ALL_PF_DATA_RD.L3_HIT.ANY_SNOOP",
+        "EventName": "OFFCORE_RESPONSE.PF_L3_DATA_RD.L3_HIT_M.NO_SNOOP_NEEDED",
         "MSRIndex": "0x1a6,0x1a7",
-        "MSRValue": "0x3F803C0490",
+        "MSRValue": "0x0100040080",
         "Offcore": "1",
         "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
         "SampleAfterValue": "100003",
         "UMask": "0x1"
     },
     {
-        "BriefDescription": "Retired load instructions with L2 cache hits as data sources",
+        "BriefDescription": "This event is deprecated. Refer to new event OCR.PF_L3_DATA_RD.L3_HIT_M.SNOOP_MISS",
         "Counter": "0,1,2,3",
         "CounterHTOff": "0,1,2,3",
-        "Data_LA": "1",
-        "EventCode": "0xD1",
-        "EventName": "MEM_LOAD_RETIRED.L2_HIT",
-        "PEBS": "1",
-        "PublicDescription": "Retired load instructions with L2 cache hits as data sources.",
+        "Deprecated": "1",
+        "EventCode": "0xB7, 0xBB",
+        "EventName": "OFFCORE_RESPONSE.PF_L3_DATA_RD.L3_HIT_M.SNOOP_MISS",
+        "MSRIndex": "0x1a6,0x1a7",
+        "MSRValue": "0x0200040080",
+        "Offcore": "1",
+        "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
         "SampleAfterValue": "100003",
-        "UMask": "0x2"
+        "UMask": "0x1"
     },
     {
-        "BriefDescription": "This event is deprecated. Refer to new event OCR.PF_L2_RFO.L3_HIT.SNOOP_MISS",
+        "BriefDescription": "This event is deprecated. Refer to new event OCR.PF_L3_DATA_RD.L3_HIT_M.SNOOP_NONE",
         "Counter": "0,1,2,3",
         "CounterHTOff": "0,1,2,3",
         "Deprecated": "1",
         "EventCode": "0xB7, 0xBB",
-        "EventName": "OFFCORE_RESPONSE.PF_L2_RFO.L3_HIT.SNOOP_MISS",
+        "EventName": "OFFCORE_RESPONSE.PF_L3_DATA_RD.L3_HIT_M.SNOOP_NONE",
         "MSRIndex": "0x1a6,0x1a7",
-        "MSRValue": "0x02003C0020",
+        "MSRValue": "0x0080040080",
         "Offcore": "1",
         "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
         "SampleAfterValue": "100003",
         "UMask": "0x1"
     },
     {
-        "BriefDescription": "This event is deprecated. Refer to new event OCR.ALL_PF_DATA_RD.L3_HIT_F.SNOOP_MISS",
+        "BriefDescription": "This event is deprecated. Refer to new event OCR.PF_L3_DATA_RD.L3_HIT_S.ANY_SNOOP",
         "Counter": "0,1,2,3",
         "CounterHTOff": "0,1,2,3",
         "Deprecated": "1",
         "EventCode": "0xB7, 0xBB",
-        "EventName": "OFFCORE_RESPONSE.ALL_PF_DATA_RD.L3_HIT_F.SNOOP_MISS",
+        "EventName": "OFFCORE_RESPONSE.PF_L3_DATA_RD.L3_HIT_S.ANY_SNOOP",
         "MSRIndex": "0x1a6,0x1a7",
-        "MSRValue": "0x0200200490",
+        "MSRValue": "0x3F80100080",
         "Offcore": "1",
         "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
         "SampleAfterValue": "100003",
         "UMask": "0x1"
     },
     {
-        "BriefDescription": "This event is deprecated. Refer to new event OCR.PF_L2_RFO.PMM_HIT_LOCAL_PMM.ANY_SNOOP",
+        "BriefDescription": "This event is deprecated. Refer to new event OCR.PF_L3_DATA_RD.L3_HIT_S.HITM_OTHER_CORE",
         "Counter": "0,1,2,3",
         "CounterHTOff": "0,1,2,3",
         "Deprecated": "1",
         "EventCode": "0xB7, 0xBB",
-        "EventName": "OFFCORE_RESPONSE.PF_L2_RFO.PMM_HIT_LOCAL_PMM.ANY_SNOOP",
+        "EventName": "OFFCORE_RESPONSE.PF_L3_DATA_RD.L3_HIT_S.HITM_OTHER_CORE",
         "MSRIndex": "0x1a6,0x1a7",
-        "MSRValue": "0x3F80400020",
+        "MSRValue": "0x1000100080",
         "Offcore": "1",
         "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
         "SampleAfterValue": "100003",
         "UMask": "0x1"
     },
     {
-        "BriefDescription": "This event is deprecated. Refer to new event OCR.ALL_READS.L3_HIT_F.SNOOP_NONE",
+        "BriefDescription": "This event is deprecated. Refer to new event OCR.PF_L3_DATA_RD.L3_HIT_S.HIT_OTHER_CORE_FWD",
         "Counter": "0,1,2,3",
         "CounterHTOff": "0,1,2,3",
         "Deprecated": "1",
         "EventCode": "0xB7, 0xBB",
-        "EventName": "OFFCORE_RESPONSE.ALL_READS.L3_HIT_F.SNOOP_NONE",
+        "EventName": "OFFCORE_RESPONSE.PF_L3_DATA_RD.L3_HIT_S.HIT_OTHER_CORE_FWD",
         "MSRIndex": "0x1a6,0x1a7",
-        "MSRValue": "0x00802007F7",
+        "MSRValue": "0x0800100080",
         "Offcore": "1",
         "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
         "SampleAfterValue": "100003",
         "UMask": "0x1"
     },
     {
-        "BriefDescription": "L1D miss outstandings duration in cycles",
-        "Counter": "0,1,2,3",
-        "CounterHTOff": "0,1,2,3,4,5,6,7",
-        "EventCode": "0x48",
-        "EventName": "L1D_PEND_MISS.PENDING",
-        "PublicDescription": "Counts duration of L1D miss outstanding, that is each cycle number of Fill Buffers (FB) outstanding required by Demand Reads. FB either is held by demand loads, or it is held by non-demand loads and gets hit at least once by demand. The valid outstanding interval is defined until the FB deallocation by one of the following ways: from FB allocation, if FB is allocated by demand from the demand Hit FB, if it is allocated by hardware or software prefetch.Note: In the L1D, a Demand Read contains cacheable or noncacheable demand loads, including ones causing cache-line splits and reads due to page walks resulted from any request type.",
-        "SampleAfterValue": "2000003",
-        "UMask": "0x1"
-    },
-    {
-        "BriefDescription": "This event is deprecated. Refer to new event OCR.DEMAND_RFO.L3_HIT_E.HIT_OTHER_CORE_NO_FWD",
+        "BriefDescription": "This event is deprecated. Refer to new event OCR.PF_L3_DATA_RD.L3_HIT_S.HIT_OTHER_CORE_NO_FWD",
         "Counter": "0,1,2,3",
         "CounterHTOff": "0,1,2,3",
         "Deprecated": "1",
         "EventCode": "0xB7, 0xBB",
-        "EventName": "OFFCORE_RESPONSE.DEMAND_RFO.L3_HIT_E.HIT_OTHER_CORE_NO_FWD",
+        "EventName": "OFFCORE_RESPONSE.PF_L3_DATA_RD.L3_HIT_S.HIT_OTHER_CORE_NO_FWD",
         "MSRIndex": "0x1a6,0x1a7",
-        "MSRValue": "0x0400080002",
+        "MSRValue": "0x0400100080",
         "Offcore": "1",
         "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
         "SampleAfterValue": "100003",
         "UMask": "0x1"
     },
     {
-        "BriefDescription": "This event is deprecated. Refer to new event OCR.DEMAND_CODE_RD.L3_HIT_F.HIT_OTHER_CORE_FWD",
+        "BriefDescription": "This event is deprecated. Refer to new event OCR.PF_L3_DATA_RD.L3_HIT_S.NO_SNOOP_NEEDED",
         "Counter": "0,1,2,3",
         "CounterHTOff": "0,1,2,3",
         "Deprecated": "1",
         "EventCode": "0xB7, 0xBB",
-        "EventName": "OFFCORE_RESPONSE.DEMAND_CODE_RD.L3_HIT_F.HIT_OTHER_CORE_FWD",
+        "EventName": "OFFCORE_RESPONSE.PF_L3_DATA_RD.L3_HIT_S.NO_SNOOP_NEEDED",
         "MSRIndex": "0x1a6,0x1a7",
-        "MSRValue": "0x0800200004",
+        "MSRValue": "0x0100100080",
         "Offcore": "1",
         "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
         "SampleAfterValue": "100003",
         "UMask": "0x1"
     },
     {
-        "BriefDescription": "This event is deprecated. Refer to new event OCR.DEMAND_CODE_RD.L3_HIT.SNOOP_MISS",
+        "BriefDescription": "This event is deprecated. Refer to new event OCR.PF_L3_DATA_RD.L3_HIT_S.SNOOP_MISS",
         "Counter": "0,1,2,3",
         "CounterHTOff": "0,1,2,3",
         "Deprecated": "1",
         "EventCode": "0xB7, 0xBB",
-        "EventName": "OFFCORE_RESPONSE.DEMAND_CODE_RD.L3_HIT.SNOOP_MISS",
+        "EventName": "OFFCORE_RESPONSE.PF_L3_DATA_RD.L3_HIT_S.SNOOP_MISS",
         "MSRIndex": "0x1a6,0x1a7",
-        "MSRValue": "0x02003C0004",
+        "MSRValue": "0x0200100080",
         "Offcore": "1",
         "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
         "SampleAfterValue": "100003",
         "UMask": "0x1"
     },
     {
-        "BriefDescription": "This event is deprecated. Refer to new event OCR.DEMAND_RFO.L3_HIT.ANY_SNOOP",
+        "BriefDescription": "This event is deprecated. Refer to new event OCR.PF_L3_DATA_RD.L3_HIT_S.SNOOP_NONE",
         "Counter": "0,1,2,3",
         "CounterHTOff": "0,1,2,3",
         "Deprecated": "1",
         "EventCode": "0xB7, 0xBB",
-        "EventName": "OFFCORE_RESPONSE.DEMAND_RFO.L3_HIT.ANY_SNOOP",
+        "EventName": "OFFCORE_RESPONSE.PF_L3_DATA_RD.L3_HIT_S.SNOOP_NONE",
         "MSRIndex": "0x1a6,0x1a7",
-        "MSRValue": "0x3F803C0002",
+        "MSRValue": "0x0080100080",
         "Offcore": "1",
         "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
         "SampleAfterValue": "100003",
         "UMask": "0x1"
     },
     {
-        "BriefDescription": "This event is deprecated. Refer to new event OCR.ALL_PF_RFO.L3_HIT.HIT_OTHER_CORE_NO_FWD",
+        "BriefDescription": "This event is deprecated. Refer to new event OCR.PF_L3_DATA_RD.PMM_HIT_LOCAL_PMM.ANY_SNOOP",
         "Counter": "0,1,2,3",
         "CounterHTOff": "0,1,2,3",
         "Deprecated": "1",
         "EventCode": "0xB7, 0xBB",
-        "EventName": "OFFCORE_RESPONSE.ALL_PF_RFO.L3_HIT.HIT_OTHER_CORE_NO_FWD",
+        "EventName": "OFFCORE_RESPONSE.PF_L3_DATA_RD.PMM_HIT_LOCAL_PMM.ANY_SNOOP",
         "MSRIndex": "0x1a6,0x1a7",
-        "MSRValue": "0x04003C0120",
+        "MSRValue": "0x3F80400080",
         "Offcore": "1",
         "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
         "SampleAfterValue": "100003",
         "UMask": "0x1"
     },
     {
-        "BriefDescription": "This event is deprecated. Refer to new event OCR.ALL_PF_DATA_RD.L3_HIT_S.ANY_SNOOP",
+        "BriefDescription": "This event is deprecated. Refer to new event OCR.PF_L3_DATA_RD.PMM_HIT_LOCAL_PMM.SNOOP_NONE",
         "Counter": "0,1,2,3",
         "CounterHTOff": "0,1,2,3",
         "Deprecated": "1",
         "EventCode": "0xB7, 0xBB",
-        "EventName": "OFFCORE_RESPONSE.ALL_PF_DATA_RD.L3_HIT_S.ANY_SNOOP",
+        "EventName": "OFFCORE_RESPONSE.PF_L3_DATA_RD.PMM_HIT_LOCAL_PMM.SNOOP_NONE",
         "MSRIndex": "0x1a6,0x1a7",
-        "MSRValue": "0x3F80100490",
+        "MSRValue": "0x0080400080",
         "Offcore": "1",
         "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
         "SampleAfterValue": "100003",
         "UMask": "0x1"
     },
     {
-        "BriefDescription": "This event is deprecated. Refer to new event OCR.ALL_PF_DATA_RD.L3_HIT_F.HIT_OTHER_CORE_FWD",
+        "BriefDescription": "This event is deprecated. Refer to new event OCR.PF_L3_DATA_RD.PMM_HIT_LOCAL_PMM.SNOOP_NOT_NEEDED",
         "Counter": "0,1,2,3",
         "CounterHTOff": "0,1,2,3",
         "Deprecated": "1",
         "EventCode": "0xB7, 0xBB",
-        "EventName": "OFFCORE_RESPONSE.ALL_PF_DATA_RD.L3_HIT_F.HIT_OTHER_CORE_FWD",
+        "EventName": "OFFCORE_RESPONSE.PF_L3_DATA_RD.PMM_HIT_LOCAL_PMM.SNOOP_NOT_NEEDED",
         "MSRIndex": "0x1a6,0x1a7",
-        "MSRValue": "0x0800200490",
+        "MSRValue": "0x0100400080",
         "Offcore": "1",
         "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
         "SampleAfterValue": "100003",
         "UMask": "0x1"
     },
     {
-        "BriefDescription": "This event is deprecated. Refer to new event OCR.ALL_PF_RFO.L3_HIT_F.ANY_SNOOP",
+        "BriefDescription": "This event is deprecated. Refer to new event OCR.PF_L3_DATA_RD.SUPPLIER_NONE.ANY_SNOOP",
         "Counter": "0,1,2,3",
         "CounterHTOff": "0,1,2,3",
         "Deprecated": "1",
         "EventCode": "0xB7, 0xBB",
-        "EventName": "OFFCORE_RESPONSE.ALL_PF_RFO.L3_HIT_F.ANY_SNOOP",
+        "EventName": "OFFCORE_RESPONSE.PF_L3_DATA_RD.SUPPLIER_NONE.ANY_SNOOP",
         "MSRIndex": "0x1a6,0x1a7",
-        "MSRValue": "0x3F80200120",
+        "MSRValue": "0x3F80020080",
         "Offcore": "1",
         "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
         "SampleAfterValue": "100003",
         "UMask": "0x1"
     },
     {
-        "BriefDescription": "This event is deprecated. Refer to new event OCR.PF_L1D_AND_SW.L3_HIT_E.NO_SNOOP_NEEDED",
+        "BriefDescription": "This event is deprecated. Refer to new event OCR.PF_L3_DATA_RD.SUPPLIER_NONE.HITM_OTHER_CORE",
         "Counter": "0,1,2,3",
         "CounterHTOff": "0,1,2,3",
         "Deprecated": "1",
         "EventCode": "0xB7, 0xBB",
-        "EventName": "OFFCORE_RESPONSE.PF_L1D_AND_SW.L3_HIT_E.NO_SNOOP_NEEDED",
+        "EventName": "OFFCORE_RESPONSE.PF_L3_DATA_RD.SUPPLIER_NONE.HITM_OTHER_CORE",
         "MSRIndex": "0x1a6,0x1a7",
-        "MSRValue": "0x0100080400",
+        "MSRValue": "0x1000020080",
         "Offcore": "1",
         "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
         "SampleAfterValue": "100003",
         "UMask": "0x1"
     },
     {
-        "BriefDescription": "This event is deprecated. Refer to new event OCR.ALL_PF_RFO.L3_HIT.SNOOP_HIT_WITH_FWD",
+        "BriefDescription": "This event is deprecated. Refer to new event OCR.PF_L3_DATA_RD.SUPPLIER_NONE.HIT_OTHER_CORE_FWD",
         "Counter": "0,1,2,3",
         "CounterHTOff": "0,1,2,3",
         "Deprecated": "1",
         "EventCode": "0xB7, 0xBB",
-        "EventName": "OFFCORE_RESPONSE.ALL_PF_RFO.L3_HIT.SNOOP_HIT_WITH_FWD",
+        "EventName": "OFFCORE_RESPONSE.PF_L3_DATA_RD.SUPPLIER_NONE.HIT_OTHER_CORE_FWD",
         "MSRIndex": "0x1a6,0x1a7",
-        "MSRValue": "0x08007C0120",
+        "MSRValue": "0x0800020080",
         "Offcore": "1",
         "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
         "SampleAfterValue": "100003",
         "UMask": "0x1"
     },
     {
-        "BriefDescription": "This event is deprecated. Refer to new event OCR.PF_L1D_AND_SW.PMM_HIT_LOCAL_PMM.SNOOP_NOT_NEEDED",
+        "BriefDescription": "This event is deprecated. Refer to new event OCR.PF_L3_DATA_RD.SUPPLIER_NONE.HIT_OTHER_CORE_NO_FWD",
         "Counter": "0,1,2,3",
         "CounterHTOff": "0,1,2,3",
         "Deprecated": "1",
         "EventCode": "0xB7, 0xBB",
-        "EventName": "OFFCORE_RESPONSE.PF_L1D_AND_SW.PMM_HIT_LOCAL_PMM.SNOOP_NOT_NEEDED",
+        "EventName": "OFFCORE_RESPONSE.PF_L3_DATA_RD.SUPPLIER_NONE.HIT_OTHER_CORE_NO_FWD",
         "MSRIndex": "0x1a6,0x1a7",
-        "MSRValue": "0x0100400400",
+        "MSRValue": "0x0400020080",
         "Offcore": "1",
         "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
         "SampleAfterValue": "100003",
         "UMask": "0x1"
     },
     {
-        "BriefDescription": "This event is deprecated. Refer to new event OCR.ALL_PF_RFO.L3_HIT_F.SNOOP_NONE",
+        "BriefDescription": "This event is deprecated. Refer to new event OCR.PF_L3_DATA_RD.SUPPLIER_NONE.NO_SNOOP_NEEDED",
         "Counter": "0,1,2,3",
         "CounterHTOff": "0,1,2,3",
         "Deprecated": "1",
         "EventCode": "0xB7, 0xBB",
-        "EventName": "OFFCORE_RESPONSE.ALL_PF_RFO.L3_HIT_F.SNOOP_NONE",
+        "EventName": "OFFCORE_RESPONSE.PF_L3_DATA_RD.SUPPLIER_NONE.NO_SNOOP_NEEDED",
         "MSRIndex": "0x1a6,0x1a7",
-        "MSRValue": "0x0080200120",
+        "MSRValue": "0x0100020080",
         "Offcore": "1",
         "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
         "SampleAfterValue": "100003",
         "UMask": "0x1"
     },
     {
-        "BriefDescription": "This event is deprecated. Refer to new event OCR.ALL_PF_RFO.SUPPLIER_NONE.HITM_OTHER_CORE",
+        "BriefDescription": "This event is deprecated. Refer to new event OCR.PF_L3_DATA_RD.SUPPLIER_NONE.SNOOP_MISS",
         "Counter": "0,1,2,3",
         "CounterHTOff": "0,1,2,3",
         "Deprecated": "1",
         "EventCode": "0xB7, 0xBB",
-        "EventName": "OFFCORE_RESPONSE.ALL_PF_RFO.SUPPLIER_NONE.HITM_OTHER_CORE",
+        "EventName": "OFFCORE_RESPONSE.PF_L3_DATA_RD.SUPPLIER_NONE.SNOOP_MISS",
         "MSRIndex": "0x1a6,0x1a7",
-        "MSRValue": "0x1000020120",
+        "MSRValue": "0x0200020080",
         "Offcore": "1",
         "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
         "SampleAfterValue": "100003",
         "UMask": "0x1"
     },
     {
-        "BriefDescription": "This event is deprecated. Refer to new event OCR.DEMAND_RFO.L3_HIT_F.SNOOP_MISS",
+        "BriefDescription": "This event is deprecated. Refer to new event OCR.PF_L3_DATA_RD.SUPPLIER_NONE.SNOOP_NONE",
         "Counter": "0,1,2,3",
         "CounterHTOff": "0,1,2,3",
         "Deprecated": "1",
         "EventCode": "0xB7, 0xBB",
-        "EventName": "OFFCORE_RESPONSE.DEMAND_RFO.L3_HIT_F.SNOOP_MISS",
+        "EventName": "OFFCORE_RESPONSE.PF_L3_DATA_RD.SUPPLIER_NONE.SNOOP_NONE",
         "MSRIndex": "0x1a6,0x1a7",
-        "MSRValue": "0x0200200002",
+        "MSRValue": "0x0080020080",
         "Offcore": "1",
         "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
         "SampleAfterValue": "100003",
         "UMask": "0x1"
     },
     {
-        "BriefDescription": "This event is deprecated. Refer to new event OCR.ALL_PF_DATA_RD.L3_HIT.SNOOP_NONE",
+        "BriefDescription": "This event is deprecated. Refer to new event OCR.PF_L3_RFO.ANY_RESPONSE",
         "Counter": "0,1,2,3",
         "CounterHTOff": "0,1,2,3",
         "Deprecated": "1",
         "EventCode": "0xB7, 0xBB",
-        "EventName": "OFFCORE_RESPONSE.ALL_PF_DATA_RD.L3_HIT.SNOOP_NONE",
+        "EventName": "OFFCORE_RESPONSE.PF_L3_RFO.ANY_RESPONSE",
         "MSRIndex": "0x1a6,0x1a7",
-        "MSRValue": "0x00803C0490",
+        "MSRValue": "0x0000010100",
         "Offcore": "1",
         "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
         "SampleAfterValue": "100003",
         "UMask": "0x1"
     },
     {
-        "BriefDescription": "This event is deprecated. Refer to new event OCR.ALL_DATA_RD.L3_HIT.HIT_OTHER_CORE_NO_FWD",
+        "BriefDescription": "This event is deprecated. Refer to new event OCR.PF_L3_RFO.L3_HIT.ANY_SNOOP",
         "Counter": "0,1,2,3",
         "CounterHTOff": "0,1,2,3",
         "Deprecated": "1",
         "EventCode": "0xB7, 0xBB",
-        "EventName": "OFFCORE_RESPONSE.ALL_DATA_RD.L3_HIT.HIT_OTHER_CORE_NO_FWD",
+        "EventName": "OFFCORE_RESPONSE.PF_L3_RFO.L3_HIT.ANY_SNOOP",
         "MSRIndex": "0x1a6,0x1a7",
-        "MSRValue": "0x04003C0491",
+        "MSRValue": "0x3F803C0100",
         "Offcore": "1",
         "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
         "SampleAfterValue": "100003",
         "UMask": "0x1"
     },
     {
-        "BriefDescription": "This event is deprecated. Refer to new event OCR.ALL_DATA_RD.L3_HIT_E.ANY_SNOOP",
+        "BriefDescription": "This event is deprecated. Refer to new event OCR.PF_L3_RFO.L3_HIT.HITM_OTHER_CORE",
         "Counter": "0,1,2,3",
         "CounterHTOff": "0,1,2,3",
         "Deprecated": "1",
         "EventCode": "0xB7, 0xBB",
-        "EventName": "OFFCORE_RESPONSE.ALL_DATA_RD.L3_HIT_E.ANY_SNOOP",
+        "EventName": "OFFCORE_RESPONSE.PF_L3_RFO.L3_HIT.HITM_OTHER_CORE",
         "MSRIndex": "0x1a6,0x1a7",
-        "MSRValue": "0x3F80080491",
+        "MSRValue": "0x10003C0100",
         "Offcore": "1",
         "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
         "SampleAfterValue": "100003",
         "UMask": "0x1"
     },
     {
-        "BriefDescription": "This event is deprecated. Refer to new event OCR.PF_L2_RFO.L3_HIT.SNOOP_NONE",
+        "BriefDescription": "This event is deprecated. Refer to new event OCR.PF_L3_RFO.L3_HIT.HIT_OTHER_CORE_FWD",
         "Counter": "0,1,2,3",
         "CounterHTOff": "0,1,2,3",
         "Deprecated": "1",
         "EventCode": "0xB7, 0xBB",
-        "EventName": "OFFCORE_RESPONSE.PF_L2_RFO.L3_HIT.SNOOP_NONE",
+        "EventName": "OFFCORE_RESPONSE.PF_L3_RFO.L3_HIT.HIT_OTHER_CORE_FWD",
         "MSRIndex": "0x1a6,0x1a7",
-        "MSRValue": "0x00803C0020",
+        "MSRValue": "0x08003C0100",
         "Offcore": "1",
         "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
         "SampleAfterValue": "100003",
         "UMask": "0x1"
     },
     {
-        "BriefDescription": "This event is deprecated. Refer to new event OCR.ALL_PF_DATA_RD.L3_HIT.HIT_OTHER_CORE_NO_FWD",
+        "BriefDescription": "This event is deprecated. Refer to new event OCR.PF_L3_RFO.L3_HIT.HIT_OTHER_CORE_NO_FWD",
         "Counter": "0,1,2,3",
         "CounterHTOff": "0,1,2,3",
         "Deprecated": "1",
         "EventCode": "0xB7, 0xBB",
-        "EventName": "OFFCORE_RESPONSE.ALL_PF_DATA_RD.L3_HIT.HIT_OTHER_CORE_NO_FWD",
+        "EventName": "OFFCORE_RESPONSE.PF_L3_RFO.L3_HIT.HIT_OTHER_CORE_NO_FWD",
         "MSRIndex": "0x1a6,0x1a7",
-        "MSRValue": "0x04003C0490",
+        "MSRValue": "0x04003C0100",
         "Offcore": "1",
         "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
         "SampleAfterValue": "100003",
         "UMask": "0x1"
     },
     {
-        "BriefDescription": "This event is deprecated. Refer to new event OCR.DEMAND_CODE_RD.L3_HIT.HITM_OTHER_CORE",
+        "BriefDescription": "This event is deprecated. Refer to new event OCR.PF_L3_RFO.L3_HIT.NO_SNOOP_NEEDED",
         "Counter": "0,1,2,3",
         "CounterHTOff": "0,1,2,3",
         "Deprecated": "1",
         "EventCode": "0xB7, 0xBB",
-        "EventName": "OFFCORE_RESPONSE.DEMAND_CODE_RD.L3_HIT.HITM_OTHER_CORE",
+        "EventName": "OFFCORE_RESPONSE.PF_L3_RFO.L3_HIT.NO_SNOOP_NEEDED",
         "MSRIndex": "0x1a6,0x1a7",
-        "MSRValue": "0x10003C0004",
+        "MSRValue": "0x01003C0100",
         "Offcore": "1",
         "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
         "SampleAfterValue": "100003",
         "UMask": "0x1"
     },
     {
-        "BriefDescription": "This event is deprecated. Refer to new event OCR.ALL_READS.L3_HIT.HIT_OTHER_CORE_NO_FWD",
+        "BriefDescription": "This event is deprecated. Refer to new event OCR.PF_L3_RFO.L3_HIT.SNOOP_HIT_WITH_FWD",
         "Counter": "0,1,2,3",
         "CounterHTOff": "0,1,2,3",
         "Deprecated": "1",
         "EventCode": "0xB7, 0xBB",
-        "EventName": "OFFCORE_RESPONSE.ALL_READS.L3_HIT.HIT_OTHER_CORE_NO_FWD",
+        "EventName": "OFFCORE_RESPONSE.PF_L3_RFO.L3_HIT.SNOOP_HIT_WITH_FWD",
         "MSRIndex": "0x1a6,0x1a7",
-        "MSRValue": "0x04003C07F7",
+        "MSRValue": "0x08007C0100",
         "Offcore": "1",
         "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
         "SampleAfterValue": "100003",
         "UMask": "0x1"
     },
     {
-        "BriefDescription": "This event is deprecated. Refer to new event OCR.PF_L2_DATA_RD.SUPPLIER_NONE.NO_SNOOP_NEEDED",
+        "BriefDescription": "This event is deprecated. Refer to new event OCR.PF_L3_RFO.L3_HIT.SNOOP_MISS",
         "Counter": "0,1,2,3",
         "CounterHTOff": "0,1,2,3",
         "Deprecated": "1",
         "EventCode": "0xB7, 0xBB",
-        "EventName": "OFFCORE_RESPONSE.PF_L2_DATA_RD.SUPPLIER_NONE.NO_SNOOP_NEEDED",
+        "EventName": "OFFCORE_RESPONSE.PF_L3_RFO.L3_HIT.SNOOP_MISS",
         "MSRIndex": "0x1a6,0x1a7",
-        "MSRValue": "0x0100020010",
+        "MSRValue": "0x02003C0100",
         "Offcore": "1",
         "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
         "SampleAfterValue": "100003",
         "UMask": "0x1"
     },
     {
-        "BriefDescription": "This event is deprecated. Refer to new event OCR.ALL_DATA_RD.L3_HIT_S.NO_SNOOP_NEEDED",
+        "BriefDescription": "This event is deprecated. Refer to new event OCR.PF_L3_RFO.L3_HIT.SNOOP_NONE",
         "Counter": "0,1,2,3",
         "CounterHTOff": "0,1,2,3",
         "Deprecated": "1",
         "EventCode": "0xB7, 0xBB",
-        "EventName": "OFFCORE_RESPONSE.ALL_DATA_RD.L3_HIT_S.NO_SNOOP_NEEDED",
+        "EventName": "OFFCORE_RESPONSE.PF_L3_RFO.L3_HIT.SNOOP_NONE",
         "MSRIndex": "0x1a6,0x1a7",
-        "MSRValue": "0x0100100491",
+        "MSRValue": "0x00803C0100",
         "Offcore": "1",
         "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
         "SampleAfterValue": "100003",
         "UMask": "0x1"
     },
     {
-        "BriefDescription": "This event is deprecated. Refer to new event OCR.OTHER.L3_HIT_F.ANY_SNOOP",
+        "BriefDescription": "This event is deprecated. Refer to new event OCR.PF_L3_RFO.L3_HIT_E.ANY_SNOOP",
         "Counter": "0,1,2,3",
         "CounterHTOff": "0,1,2,3",
         "Deprecated": "1",
         "EventCode": "0xB7, 0xBB",
-        "EventName": "OFFCORE_RESPONSE.OTHER.L3_HIT_F.ANY_SNOOP",
+        "EventName": "OFFCORE_RESPONSE.PF_L3_RFO.L3_HIT_E.ANY_SNOOP",
         "MSRIndex": "0x1a6,0x1a7",
-        "MSRValue": "0x3F80208000",
+        "MSRValue": "0x3F80080100",
         "Offcore": "1",
         "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
         "SampleAfterValue": "100003",
         "UMask": "0x1"
     },
     {
-        "BriefDescription": "This event is deprecated. Refer to new event OCR.ALL_READS.L3_HIT_E.HIT_OTHER_CORE_NO_FWD",
+        "BriefDescription": "This event is deprecated. Refer to new event OCR.PF_L3_RFO.L3_HIT_E.HITM_OTHER_CORE",
         "Counter": "0,1,2,3",
         "CounterHTOff": "0,1,2,3",
         "Deprecated": "1",
         "EventCode": "0xB7, 0xBB",
-        "EventName": "OFFCORE_RESPONSE.ALL_READS.L3_HIT_E.HIT_OTHER_CORE_NO_FWD",
+        "EventName": "OFFCORE_RESPONSE.PF_L3_RFO.L3_HIT_E.HITM_OTHER_CORE",
         "MSRIndex": "0x1a6,0x1a7",
-        "MSRValue": "0x04000807F7",
+        "MSRValue": "0x1000080100",
         "Offcore": "1",
         "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
         "SampleAfterValue": "100003",
         "UMask": "0x1"
     },
     {
-        "BriefDescription": "This event is deprecated. Refer to new event OCR.DEMAND_CODE_RD.L3_HIT.ANY_SNOOP",
+        "BriefDescription": "This event is deprecated. Refer to new event OCR.PF_L3_RFO.L3_HIT_E.HIT_OTHER_CORE_FWD",
         "Counter": "0,1,2,3",
         "CounterHTOff": "0,1,2,3",
         "Deprecated": "1",
         "EventCode": "0xB7, 0xBB",
-        "EventName": "OFFCORE_RESPONSE.DEMAND_CODE_RD.L3_HIT.ANY_SNOOP",
+        "EventName": "OFFCORE_RESPONSE.PF_L3_RFO.L3_HIT_E.HIT_OTHER_CORE_FWD",
         "MSRIndex": "0x1a6,0x1a7",
-        "MSRValue": "0x3F803C0004",
+        "MSRValue": "0x0800080100",
         "Offcore": "1",
         "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
         "SampleAfterValue": "100003",
         "UMask": "0x1"
     },
     {
-        "BriefDescription": "This event is deprecated. Refer to new event OCR.OTHER.L3_HIT.NO_SNOOP_NEEDED",
+        "BriefDescription": "This event is deprecated. Refer to new event OCR.PF_L3_RFO.L3_HIT_E.HIT_OTHER_CORE_NO_FWD",
         "Counter": "0,1,2,3",
         "CounterHTOff": "0,1,2,3",
         "Deprecated": "1",
         "EventCode": "0xB7, 0xBB",
-        "EventName": "OFFCORE_RESPONSE.OTHER.L3_HIT.NO_SNOOP_NEEDED",
+        "EventName": "OFFCORE_RESPONSE.PF_L3_RFO.L3_HIT_E.HIT_OTHER_CORE_NO_FWD",
         "MSRIndex": "0x1a6,0x1a7",
-        "MSRValue": "0x01003C8000",
+        "MSRValue": "0x0400080100",
         "Offcore": "1",
         "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
         "SampleAfterValue": "100003",
         "UMask": "0x1"
     },
     {
-        "BriefDescription": "This event is deprecated. Refer to new event OCR.DEMAND_CODE_RD.PMM_HIT_LOCAL_PMM.ANY_SNOOP",
+        "BriefDescription": "This event is deprecated. Refer to new event OCR.PF_L3_RFO.L3_HIT_E.NO_SNOOP_NEEDED",
         "Counter": "0,1,2,3",
         "CounterHTOff": "0,1,2,3",
         "Deprecated": "1",
         "EventCode": "0xB7, 0xBB",
-        "EventName": "OFFCORE_RESPONSE.DEMAND_CODE_RD.PMM_HIT_LOCAL_PMM.ANY_SNOOP",
+        "EventName": "OFFCORE_RESPONSE.PF_L3_RFO.L3_HIT_E.NO_SNOOP_NEEDED",
         "MSRIndex": "0x1a6,0x1a7",
-        "MSRValue": "0x3F80400004",
+        "MSRValue": "0x0100080100",
         "Offcore": "1",
         "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
         "SampleAfterValue": "100003",
         "UMask": "0x1"
     },
     {
-        "BriefDescription": "This event is deprecated. Refer to new event OCR.PF_L3_DATA_RD.L3_HIT_S.HIT_OTHER_CORE_NO_FWD",
+        "BriefDescription": "This event is deprecated. Refer to new event OCR.PF_L3_RFO.L3_HIT_E.SNOOP_MISS",
         "Counter": "0,1,2,3",
         "CounterHTOff": "0,1,2,3",
         "Deprecated": "1",
         "EventCode": "0xB7, 0xBB",
-        "EventName": "OFFCORE_RESPONSE.PF_L3_DATA_RD.L3_HIT_S.HIT_OTHER_CORE_NO_FWD",
+        "EventName": "OFFCORE_RESPONSE.PF_L3_RFO.L3_HIT_E.SNOOP_MISS",
         "MSRIndex": "0x1a6,0x1a7",
-        "MSRValue": "0x0400100080",
+        "MSRValue": "0x0200080100",
         "Offcore": "1",
         "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
         "SampleAfterValue": "100003",
         "UMask": "0x1"
     },
     {
-        "BriefDescription": "This event is deprecated. Refer to new event OCR.PF_L2_RFO.SUPPLIER_NONE.HIT_OTHER_CORE_NO_FWD",
+        "BriefDescription": "This event is deprecated. Refer to new event OCR.PF_L3_RFO.L3_HIT_E.SNOOP_NONE",
         "Counter": "0,1,2,3",
         "CounterHTOff": "0,1,2,3",
         "Deprecated": "1",
         "EventCode": "0xB7, 0xBB",
-        "EventName": "OFFCORE_RESPONSE.PF_L2_RFO.SUPPLIER_NONE.HIT_OTHER_CORE_NO_FWD",
+        "EventName": "OFFCORE_RESPONSE.PF_L3_RFO.L3_HIT_E.SNOOP_NONE",
         "MSRIndex": "0x1a6,0x1a7",
-        "MSRValue": "0x0400020020",
+        "MSRValue": "0x0080080100",
         "Offcore": "1",
         "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
         "SampleAfterValue": "100003",
         "UMask": "0x1"
     },
     {
-        "BriefDescription": "This event is deprecated. Refer to new event OCR.ALL_PF_RFO.SUPPLIER_NONE.HIT_OTHER_CORE_NO_FWD",
+        "BriefDescription": "This event is deprecated. Refer to new event OCR.PF_L3_RFO.L3_HIT_F.ANY_SNOOP",
         "Counter": "0,1,2,3",
         "CounterHTOff": "0,1,2,3",
         "Deprecated": "1",
         "EventCode": "0xB7, 0xBB",
-        "EventName": "OFFCORE_RESPONSE.ALL_PF_RFO.SUPPLIER_NONE.HIT_OTHER_CORE_NO_FWD",
+        "EventName": "OFFCORE_RESPONSE.PF_L3_RFO.L3_HIT_F.ANY_SNOOP",
         "MSRIndex": "0x1a6,0x1a7",
-        "MSRValue": "0x0400020120",
+        "MSRValue": "0x3F80200100",
         "Offcore": "1",
         "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
         "SampleAfterValue": "100003",
         "UMask": "0x1"
     },
     {
-        "BriefDescription": "This event is deprecated. Refer to new event OCR.DEMAND_CODE_RD.PMM_HIT_LOCAL_PMM.SNOOP_NONE",
+        "BriefDescription": "This event is deprecated. Refer to new event OCR.PF_L3_RFO.L3_HIT_F.HITM_OTHER_CORE",
         "Counter": "0,1,2,3",
         "CounterHTOff": "0,1,2,3",
         "Deprecated": "1",
         "EventCode": "0xB7, 0xBB",
-        "EventName": "OFFCORE_RESPONSE.DEMAND_CODE_RD.PMM_HIT_LOCAL_PMM.SNOOP_NONE",
+        "EventName": "OFFCORE_RESPONSE.PF_L3_RFO.L3_HIT_F.HITM_OTHER_CORE",
         "MSRIndex": "0x1a6,0x1a7",
-        "MSRValue": "0x0080400004",
+        "MSRValue": "0x1000200100",
         "Offcore": "1",
         "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
         "SampleAfterValue": "100003",
         "UMask": "0x1"
     },
     {
-        "BriefDescription": "This event is deprecated. Refer to new event OCR.ALL_DATA_RD.PMM_HIT_LOCAL_PMM.ANY_SNOOP",
+        "BriefDescription": "This event is deprecated. Refer to new event OCR.PF_L3_RFO.L3_HIT_F.HIT_OTHER_CORE_FWD",
         "Counter": "0,1,2,3",
         "CounterHTOff": "0,1,2,3",
         "Deprecated": "1",
         "EventCode": "0xB7, 0xBB",
-        "EventName": "OFFCORE_RESPONSE.ALL_DATA_RD.PMM_HIT_LOCAL_PMM.ANY_SNOOP",
+        "EventName": "OFFCORE_RESPONSE.PF_L3_RFO.L3_HIT_F.HIT_OTHER_CORE_FWD",
         "MSRIndex": "0x1a6,0x1a7",
-        "MSRValue": "0x3F80400491",
+        "MSRValue": "0x0800200100",
         "Offcore": "1",
         "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
         "SampleAfterValue": "100003",
         "UMask": "0x1"
     },
     {
-        "BriefDescription": "This event is deprecated. Refer to new event OCR.ALL_READS.L3_HIT_F.HITM_OTHER_CORE",
+        "BriefDescription": "This event is deprecated. Refer to new event OCR.PF_L3_RFO.L3_HIT_F.HIT_OTHER_CORE_NO_FWD",
         "Counter": "0,1,2,3",
         "CounterHTOff": "0,1,2,3",
         "Deprecated": "1",
         "EventCode": "0xB7, 0xBB",
-        "EventName": "OFFCORE_RESPONSE.ALL_READS.L3_HIT_F.HITM_OTHER_CORE",
+        "EventName": "OFFCORE_RESPONSE.PF_L3_RFO.L3_HIT_F.HIT_OTHER_CORE_NO_FWD",
         "MSRIndex": "0x1a6,0x1a7",
-        "MSRValue": "0x10002007F7",
+        "MSRValue": "0x0400200100",
         "Offcore": "1",
         "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
         "SampleAfterValue": "100003",
         "UMask": "0x1"
     },
     {
-        "BriefDescription": "This event is deprecated. Refer to new event OCR.OTHER.L3_HIT.HIT_OTHER_CORE_FWD",
+        "BriefDescription": "This event is deprecated. Refer to new event OCR.PF_L3_RFO.L3_HIT_F.NO_SNOOP_NEEDED",
         "Counter": "0,1,2,3",
         "CounterHTOff": "0,1,2,3",
         "Deprecated": "1",
         "EventCode": "0xB7, 0xBB",
-        "EventName": "OFFCORE_RESPONSE.OTHER.L3_HIT.HIT_OTHER_CORE_FWD",
+        "EventName": "OFFCORE_RESPONSE.PF_L3_RFO.L3_HIT_F.NO_SNOOP_NEEDED",
         "MSRIndex": "0x1a6,0x1a7",
-        "MSRValue": "0x08003C8000",
+        "MSRValue": "0x0100200100",
         "Offcore": "1",
         "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
         "SampleAfterValue": "100003",
         "UMask": "0x1"
     },
     {
-        "BriefDescription": "This event is deprecated. Refer to new event OCR.PF_L3_DATA_RD.L3_HIT_E.HIT_OTHER_CORE_FWD",
+        "BriefDescription": "This event is deprecated. Refer to new event OCR.PF_L3_RFO.L3_HIT_F.SNOOP_MISS",
         "Counter": "0,1,2,3",
         "CounterHTOff": "0,1,2,3",
         "Deprecated": "1",
         "EventCode": "0xB7, 0xBB",
-        "EventName": "OFFCORE_RESPONSE.PF_L3_DATA_RD.L3_HIT_E.HIT_OTHER_CORE_FWD",
+        "EventName": "OFFCORE_RESPONSE.PF_L3_RFO.L3_HIT_F.SNOOP_MISS",
         "MSRIndex": "0x1a6,0x1a7",
-        "MSRValue": "0x0800080080",
+        "MSRValue": "0x0200200100",
         "Offcore": "1",
         "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
         "SampleAfterValue": "100003",
         "UMask": "0x1"
     },
     {
-        "BriefDescription": "This event is deprecated. Refer to new event OCR.ALL_PF_DATA_RD.SUPPLIER_NONE.SNOOP_MISS",
+        "BriefDescription": "This event is deprecated. Refer to new event OCR.PF_L3_RFO.L3_HIT_F.SNOOP_NONE",
         "Counter": "0,1,2,3",
         "CounterHTOff": "0,1,2,3",
         "Deprecated": "1",
         "EventCode": "0xB7, 0xBB",
-        "EventName": "OFFCORE_RESPONSE.ALL_PF_DATA_RD.SUPPLIER_NONE.SNOOP_MISS",
+        "EventName": "OFFCORE_RESPONSE.PF_L3_RFO.L3_HIT_F.SNOOP_NONE",
         "MSRIndex": "0x1a6,0x1a7",
-        "MSRValue": "0x0200020490",
+        "MSRValue": "0x0080200100",
         "Offcore": "1",
         "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
         "SampleAfterValue": "100003",
         "UMask": "0x1"
     },
     {
-        "BriefDescription": "This event is deprecated. Refer to new event OCR.PF_L1D_AND_SW.L3_HIT_S.HIT_OTHER_CORE_NO_FWD",
+        "BriefDescription": "This event is deprecated. Refer to new event OCR.PF_L3_RFO.L3_HIT_M.ANY_SNOOP",
         "Counter": "0,1,2,3",
         "CounterHTOff": "0,1,2,3",
         "Deprecated": "1",
         "EventCode": "0xB7, 0xBB",
-        "EventName": "OFFCORE_RESPONSE.PF_L1D_AND_SW.L3_HIT_S.HIT_OTHER_CORE_NO_FWD",
+        "EventName": "OFFCORE_RESPONSE.PF_L3_RFO.L3_HIT_M.ANY_SNOOP",
         "MSRIndex": "0x1a6,0x1a7",
-        "MSRValue": "0x0400100400",
+        "MSRValue": "0x3F80040100",
         "Offcore": "1",
         "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
         "SampleAfterValue": "100003",
         "UMask": "0x1"
     },
     {
-        "BriefDescription": "This event is deprecated. Refer to new event OCR.PF_L1D_AND_SW.SUPPLIER_NONE.HIT_OTHER_CORE_NO_FWD",
+        "BriefDescription": "This event is deprecated. Refer to new event OCR.PF_L3_RFO.L3_HIT_M.HITM_OTHER_CORE",
         "Counter": "0,1,2,3",
         "CounterHTOff": "0,1,2,3",
         "Deprecated": "1",
         "EventCode": "0xB7, 0xBB",
-        "EventName": "OFFCORE_RESPONSE.PF_L1D_AND_SW.SUPPLIER_NONE.HIT_OTHER_CORE_NO_FWD",
+        "EventName": "OFFCORE_RESPONSE.PF_L3_RFO.L3_HIT_M.HITM_OTHER_CORE",
         "MSRIndex": "0x1a6,0x1a7",
-        "MSRValue": "0x0400020400",
+        "MSRValue": "0x1000040100",
         "Offcore": "1",
         "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
         "SampleAfterValue": "100003",
         "UMask": "0x1"
     },
     {
-        "BriefDescription": "This event is deprecated. Refer to new event OCR.PF_L2_RFO.L3_HIT_M.HITM_OTHER_CORE",
+        "BriefDescription": "This event is deprecated. Refer to new event OCR.PF_L3_RFO.L3_HIT_M.HIT_OTHER_CORE_FWD",
         "Counter": "0,1,2,3",
         "CounterHTOff": "0,1,2,3",
         "Deprecated": "1",
         "EventCode": "0xB7, 0xBB",
-        "EventName": "OFFCORE_RESPONSE.PF_L2_RFO.L3_HIT_M.HITM_OTHER_CORE",
+        "EventName": "OFFCORE_RESPONSE.PF_L3_RFO.L3_HIT_M.HIT_OTHER_CORE_FWD",
         "MSRIndex": "0x1a6,0x1a7",
-        "MSRValue": "0x1000040020",
+        "MSRValue": "0x0800040100",
         "Offcore": "1",
         "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
         "SampleAfterValue": "100003",
         "UMask": "0x1"
     },
     {
-        "BriefDescription": "This event is deprecated. Refer to new event OCR.ALL_RFO.L3_HIT_E.HITM_OTHER_CORE",
+        "BriefDescription": "This event is deprecated. Refer to new event OCR.PF_L3_RFO.L3_HIT_M.HIT_OTHER_CORE_NO_FWD",
         "Counter": "0,1,2,3",
         "CounterHTOff": "0,1,2,3",
         "Deprecated": "1",
         "EventCode": "0xB7, 0xBB",
-        "EventName": "OFFCORE_RESPONSE.ALL_RFO.L3_HIT_E.HITM_OTHER_CORE",
+        "EventName": "OFFCORE_RESPONSE.PF_L3_RFO.L3_HIT_M.HIT_OTHER_CORE_NO_FWD",
         "MSRIndex": "0x1a6,0x1a7",
-        "MSRValue": "0x1000080122",
+        "MSRValue": "0x0400040100",
         "Offcore": "1",
         "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
         "SampleAfterValue": "100003",
         "UMask": "0x1"
     },
     {
-        "BriefDescription": "Retired load instructions with L1 cache hits as data sources",
+        "BriefDescription": "This event is deprecated. Refer to new event OCR.PF_L3_RFO.L3_HIT_M.NO_SNOOP_NEEDED",
         "Counter": "0,1,2,3",
         "CounterHTOff": "0,1,2,3",
-        "Data_LA": "1",
-        "EventCode": "0xD1",
-        "EventName": "MEM_LOAD_RETIRED.L1_HIT",
-        "PEBS": "1",
-        "PublicDescription": "Counts retired load instructions with at least one uop that hit in the L1 data cache. This event includes all SW prefetches and lock instructions regardless of the data source.",
-        "SampleAfterValue": "2000003",
-        "UMask": "0x1"
-    },
-    {
-        "BriefDescription": "Demand and prefetch data reads",
-        "Counter": "0,1,2,3",
-        "CounterHTOff": "0,1,2,3,4,5,6,7",
-        "EventCode": "0xB0",
-        "EventName": "OFFCORE_REQUESTS.ALL_DATA_RD",
-        "PublicDescription": "Counts the demand and prefetch data reads. All Core Data Reads include cacheable 'Demands' and L2 prefetchers (not L3 prefetchers). Counting also covers reads due to page walks resulted from any request type.",
+        "Deprecated": "1",
+        "EventCode": "0xB7, 0xBB",
+        "EventName": "OFFCORE_RESPONSE.PF_L3_RFO.L3_HIT_M.NO_SNOOP_NEEDED",
+        "MSRIndex": "0x1a6,0x1a7",
+        "MSRValue": "0x0100040100",
+        "Offcore": "1",
+        "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
         "SampleAfterValue": "100003",
-        "UMask": "0x8"
+        "UMask": "0x1"
     },
     {
-        "BriefDescription": "This event is deprecated. Refer to new event OCR.OTHER.L3_HIT_M.HIT_OTHER_CORE_FWD",
+        "BriefDescription": "This event is deprecated. Refer to new event OCR.PF_L3_RFO.L3_HIT_M.SNOOP_MISS",
         "Counter": "0,1,2,3",
         "CounterHTOff": "0,1,2,3",
         "Deprecated": "1",
         "EventCode": "0xB7, 0xBB",
-        "EventName": "OFFCORE_RESPONSE.OTHER.L3_HIT_M.HIT_OTHER_CORE_FWD",
+        "EventName": "OFFCORE_RESPONSE.PF_L3_RFO.L3_HIT_M.SNOOP_MISS",
         "MSRIndex": "0x1a6,0x1a7",
-        "MSRValue": "0x0800048000",
+        "MSRValue": "0x0200040100",
         "Offcore": "1",
         "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
         "SampleAfterValue": "100003",
         "UMask": "0x1"
     },
     {
-        "BriefDescription": "This event is deprecated. Refer to new event OCR.OTHER.L3_HIT_F.NO_SNOOP_NEEDED",
+        "BriefDescription": "This event is deprecated. Refer to new event OCR.PF_L3_RFO.L3_HIT_M.SNOOP_NONE",
         "Counter": "0,1,2,3",
         "CounterHTOff": "0,1,2,3",
         "Deprecated": "1",
         "EventCode": "0xB7, 0xBB",
-        "EventName": "OFFCORE_RESPONSE.OTHER.L3_HIT_F.NO_SNOOP_NEEDED",
+        "EventName": "OFFCORE_RESPONSE.PF_L3_RFO.L3_HIT_M.SNOOP_NONE",
         "MSRIndex": "0x1a6,0x1a7",
-        "MSRValue": "0x0100208000",
+        "MSRValue": "0x0080040100",
         "Offcore": "1",
         "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
         "SampleAfterValue": "100003",
         "UMask": "0x1"
     },
     {
-        "BriefDescription": "This event is deprecated. Refer to new event OCR.PF_L3_DATA_RD.SUPPLIER_NONE.NO_SNOOP_NEEDED",
+        "BriefDescription": "This event is deprecated. Refer to new event OCR.PF_L3_RFO.L3_HIT_S.ANY_SNOOP",
         "Counter": "0,1,2,3",
         "CounterHTOff": "0,1,2,3",
         "Deprecated": "1",
         "EventCode": "0xB7, 0xBB",
-        "EventName": "OFFCORE_RESPONSE.PF_L3_DATA_RD.SUPPLIER_NONE.NO_SNOOP_NEEDED",
+        "EventName": "OFFCORE_RESPONSE.PF_L3_RFO.L3_HIT_S.ANY_SNOOP",
         "MSRIndex": "0x1a6,0x1a7",
-        "MSRValue": "0x0100020080",
+        "MSRValue": "0x3F80100100",
         "Offcore": "1",
         "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
         "SampleAfterValue": "100003",
         "UMask": "0x1"
     },
     {
-        "BriefDescription": "This event is deprecated. Refer to new event OCR.PF_L1D_AND_SW.L3_HIT.HITM_OTHER_CORE",
+        "BriefDescription": "This event is deprecated. Refer to new event OCR.PF_L3_RFO.L3_HIT_S.HITM_OTHER_CORE",
         "Counter": "0,1,2,3",
         "CounterHTOff": "0,1,2,3",
         "Deprecated": "1",
         "EventCode": "0xB7, 0xBB",
-        "EventName": "OFFCORE_RESPONSE.PF_L1D_AND_SW.L3_HIT.HITM_OTHER_CORE",
+        "EventName": "OFFCORE_RESPONSE.PF_L3_RFO.L3_HIT_S.HITM_OTHER_CORE",
         "MSRIndex": "0x1a6,0x1a7",
-        "MSRValue": "0x10003C0400",
+        "MSRValue": "0x1000100100",
         "Offcore": "1",
         "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
         "SampleAfterValue": "100003",
         "UMask": "0x1"
     },
     {
-        "BriefDescription": "This event is deprecated. Refer to new event OCR.ALL_DATA_RD.L3_HIT_F.SNOOP_MISS",
+        "BriefDescription": "This event is deprecated. Refer to new event OCR.PF_L3_RFO.L3_HIT_S.HIT_OTHER_CORE_FWD",
         "Counter": "0,1,2,3",
         "CounterHTOff": "0,1,2,3",
         "Deprecated": "1",
         "EventCode": "0xB7, 0xBB",
-        "EventName": "OFFCORE_RESPONSE.ALL_DATA_RD.L3_HIT_F.SNOOP_MISS",
+        "EventName": "OFFCORE_RESPONSE.PF_L3_RFO.L3_HIT_S.HIT_OTHER_CORE_FWD",
         "MSRIndex": "0x1a6,0x1a7",
-        "MSRValue": "0x0200200491",
+        "MSRValue": "0x0800100100",
         "Offcore": "1",
         "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
         "SampleAfterValue": "100003",
         "UMask": "0x1"
     },
     {
-        "BriefDescription": "This event is deprecated. Refer to new event OCR.OTHER.L3_HIT.ANY_SNOOP",
+        "BriefDescription": "This event is deprecated. Refer to new event OCR.PF_L3_RFO.L3_HIT_S.HIT_OTHER_CORE_NO_FWD",
         "Counter": "0,1,2,3",
         "CounterHTOff": "0,1,2,3",
         "Deprecated": "1",
         "EventCode": "0xB7, 0xBB",
-        "EventName": "OFFCORE_RESPONSE.OTHER.L3_HIT.ANY_SNOOP",
+        "EventName": "OFFCORE_RESPONSE.PF_L3_RFO.L3_HIT_S.HIT_OTHER_CORE_NO_FWD",
         "MSRIndex": "0x1a6,0x1a7",
-        "MSRValue": "0x3F803C8000",
+        "MSRValue": "0x0400100100",
         "Offcore": "1",
         "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
         "SampleAfterValue": "100003",
         "UMask": "0x1"
     },
     {
-        "BriefDescription": "This event is deprecated. Refer to new event OCR.DEMAND_RFO.L3_HIT_F.HIT_OTHER_CORE_FWD",
+        "BriefDescription": "This event is deprecated. Refer to new event OCR.PF_L3_RFO.L3_HIT_S.NO_SNOOP_NEEDED",
         "Counter": "0,1,2,3",
         "CounterHTOff": "0,1,2,3",
         "Deprecated": "1",
         "EventCode": "0xB7, 0xBB",
-        "EventName": "OFFCORE_RESPONSE.DEMAND_RFO.L3_HIT_F.HIT_OTHER_CORE_FWD",
+        "EventName": "OFFCORE_RESPONSE.PF_L3_RFO.L3_HIT_S.NO_SNOOP_NEEDED",
         "MSRIndex": "0x1a6,0x1a7",
-        "MSRValue": "0x0800200002",
+        "MSRValue": "0x0100100100",
         "Offcore": "1",
         "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
         "SampleAfterValue": "100003",
         "UMask": "0x1"
     },
     {
-        "BriefDescription": "This event is deprecated. Refer to new event OCR.ALL_RFO.L3_HIT_M.SNOOP_NONE",
+        "BriefDescription": "This event is deprecated. Refer to new event OCR.PF_L3_RFO.L3_HIT_S.SNOOP_MISS",
         "Counter": "0,1,2,3",
         "CounterHTOff": "0,1,2,3",
         "Deprecated": "1",
         "EventCode": "0xB7, 0xBB",
-        "EventName": "OFFCORE_RESPONSE.ALL_RFO.L3_HIT_M.SNOOP_NONE",
+        "EventName": "OFFCORE_RESPONSE.PF_L3_RFO.L3_HIT_S.SNOOP_MISS",
         "MSRIndex": "0x1a6,0x1a7",
-        "MSRValue": "0x0080040122",
+        "MSRValue": "0x0200100100",
         "Offcore": "1",
         "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
         "SampleAfterValue": "100003",
         "UMask": "0x1"
     },
     {
-        "BriefDescription": "This event is deprecated. Refer to new event OCR.ALL_DATA_RD.L3_HIT_E.SNOOP_MISS",
+        "BriefDescription": "This event is deprecated. Refer to new event OCR.PF_L3_RFO.L3_HIT_S.SNOOP_NONE",
         "Counter": "0,1,2,3",
         "CounterHTOff": "0,1,2,3",
         "Deprecated": "1",
         "EventCode": "0xB7, 0xBB",
-        "EventName": "OFFCORE_RESPONSE.ALL_DATA_RD.L3_HIT_E.SNOOP_MISS",
+        "EventName": "OFFCORE_RESPONSE.PF_L3_RFO.L3_HIT_S.SNOOP_NONE",
         "MSRIndex": "0x1a6,0x1a7",
-        "MSRValue": "0x0200080491",
+        "MSRValue": "0x0080100100",
         "Offcore": "1",
         "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
         "SampleAfterValue": "100003",
         "UMask": "0x1"
     },
     {
-        "BriefDescription": "Offcore outstanding Code Reads transactions in the SuperQueue (SQ), queue to uncore, every cycle.",
-        "Counter": "0,1,2,3",
-        "CounterHTOff": "0,1,2,3,4,5,6,7",
-        "EventCode": "0x60",
-        "EventName": "OFFCORE_REQUESTS_OUTSTANDING.DEMAND_CODE_RD",
-        "PublicDescription": "Counts the number of offcore outstanding Code Reads transactions in the super queue every cycle. The 'Offcore outstanding' state of the transaction lasts from the L2 miss until the sending transaction completion to requestor (SQ deallocation). See the corresponding Umask under OFFCORE_REQUESTS.",
-        "SampleAfterValue": "2000003",
-        "UMask": "0x2"
-    },
-    {
-        "BriefDescription": "This event is deprecated. Refer to new event OCR.ALL_DATA_RD.L3_HIT_F.HITM_OTHER_CORE",
+        "BriefDescription": "This event is deprecated. Refer to new event OCR.PF_L3_RFO.PMM_HIT_LOCAL_PMM.ANY_SNOOP",
         "Counter": "0,1,2,3",
         "CounterHTOff": "0,1,2,3",
         "Deprecated": "1",
         "EventCode": "0xB7, 0xBB",
-        "EventName": "OFFCORE_RESPONSE.ALL_DATA_RD.L3_HIT_F.HITM_OTHER_CORE",
+        "EventName": "OFFCORE_RESPONSE.PF_L3_RFO.PMM_HIT_LOCAL_PMM.ANY_SNOOP",
         "MSRIndex": "0x1a6,0x1a7",
-        "MSRValue": "0x1000200491",
+        "MSRValue": "0x3F80400100",
         "Offcore": "1",
         "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
         "SampleAfterValue": "100003",
         "UMask": "0x1"
     },
     {
-        "BriefDescription": "This event is deprecated. Refer to new event OCR.ALL_PF_DATA_RD.L3_HIT_S.SNOOP_NONE",
+        "BriefDescription": "This event is deprecated. Refer to new event OCR.PF_L3_RFO.PMM_HIT_LOCAL_PMM.SNOOP_NONE",
         "Counter": "0,1,2,3",
         "CounterHTOff": "0,1,2,3",
         "Deprecated": "1",
         "EventCode": "0xB7, 0xBB",
-        "EventName": "OFFCORE_RESPONSE.ALL_PF_DATA_RD.L3_HIT_S.SNOOP_NONE",
+        "EventName": "OFFCORE_RESPONSE.PF_L3_RFO.PMM_HIT_LOCAL_PMM.SNOOP_NONE",
         "MSRIndex": "0x1a6,0x1a7",
-        "MSRValue": "0x0080100490",
+        "MSRValue": "0x0080400100",
         "Offcore": "1",
         "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
         "SampleAfterValue": "100003",
         "UMask": "0x1"
     },
     {
-        "BriefDescription": "This event is deprecated. Refer to new event OCR.PF_L3_RFO.L3_HIT_M.NO_SNOOP_NEEDED",
+        "BriefDescription": "This event is deprecated. Refer to new event OCR.PF_L3_RFO.PMM_HIT_LOCAL_PMM.SNOOP_NOT_NEEDED",
         "Counter": "0,1,2,3",
         "CounterHTOff": "0,1,2,3",
         "Deprecated": "1",
         "EventCode": "0xB7, 0xBB",
-        "EventName": "OFFCORE_RESPONSE.PF_L3_RFO.L3_HIT_M.NO_SNOOP_NEEDED",
+        "EventName": "OFFCORE_RESPONSE.PF_L3_RFO.PMM_HIT_LOCAL_PMM.SNOOP_NOT_NEEDED",
         "MSRIndex": "0x1a6,0x1a7",
-        "MSRValue": "0x0100040100",
+        "MSRValue": "0x0100400100",
         "Offcore": "1",
         "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
         "SampleAfterValue": "100003",
         "UMask": "0x1"
     },
     {
-        "BriefDescription": "This event is deprecated. Refer to new event OCR.OTHER.L3_HIT.SNOOP_HIT_WITH_FWD",
+        "BriefDescription": "This event is deprecated. Refer to new event OCR.PF_L3_RFO.SUPPLIER_NONE.ANY_SNOOP",
         "Counter": "0,1,2,3",
         "CounterHTOff": "0,1,2,3",
         "Deprecated": "1",
         "EventCode": "0xB7, 0xBB",
-        "EventName": "OFFCORE_RESPONSE.OTHER.L3_HIT.SNOOP_HIT_WITH_FWD",
+        "EventName": "OFFCORE_RESPONSE.PF_L3_RFO.SUPPLIER_NONE.ANY_SNOOP",
         "MSRIndex": "0x1a6,0x1a7",
-        "MSRValue": "0x08007C8000",
+        "MSRValue": "0x3F80020100",
         "Offcore": "1",
         "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
         "SampleAfterValue": "100003",
         "UMask": "0x1"
     },
     {
-        "BriefDescription": "This event is deprecated. Refer to new event OCR.ALL_READS.L3_HIT_F.ANY_SNOOP",
+        "BriefDescription": "This event is deprecated. Refer to new event OCR.PF_L3_RFO.SUPPLIER_NONE.HITM_OTHER_CORE",
         "Counter": "0,1,2,3",
         "CounterHTOff": "0,1,2,3",
         "Deprecated": "1",
         "EventCode": "0xB7, 0xBB",
-        "EventName": "OFFCORE_RESPONSE.ALL_READS.L3_HIT_F.ANY_SNOOP",
+        "EventName": "OFFCORE_RESPONSE.PF_L3_RFO.SUPPLIER_NONE.HITM_OTHER_CORE",
         "MSRIndex": "0x1a6,0x1a7",
-        "MSRValue": "0x3F802007F7",
+        "MSRValue": "0x1000020100",
         "Offcore": "1",
         "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
         "SampleAfterValue": "100003",
         "UMask": "0x1"
     },
     {
-        "BriefDescription": "This event is deprecated. Refer to new event OCR.DEMAND_DATA_RD.L3_HIT_S.SNOOP_NONE",
+        "BriefDescription": "This event is deprecated. Refer to new event OCR.PF_L3_RFO.SUPPLIER_NONE.HIT_OTHER_CORE_FWD",
         "Counter": "0,1,2,3",
         "CounterHTOff": "0,1,2,3",
         "Deprecated": "1",
         "EventCode": "0xB7, 0xBB",
-        "EventName": "OFFCORE_RESPONSE.DEMAND_DATA_RD.L3_HIT_S.SNOOP_NONE",
+        "EventName": "OFFCORE_RESPONSE.PF_L3_RFO.SUPPLIER_NONE.HIT_OTHER_CORE_FWD",
         "MSRIndex": "0x1a6,0x1a7",
-        "MSRValue": "0x0080100001",
+        "MSRValue": "0x0800020100",
         "Offcore": "1",
         "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
         "SampleAfterValue": "100003",
         "UMask": "0x1"
     },
     {
-        "BriefDescription": "This event is deprecated. Refer to new event OCR.ALL_RFO.L3_HIT_S.ANY_SNOOP",
+        "BriefDescription": "This event is deprecated. Refer to new event OCR.PF_L3_RFO.SUPPLIER_NONE.HIT_OTHER_CORE_NO_FWD",
         "Counter": "0,1,2,3",
         "CounterHTOff": "0,1,2,3",
         "Deprecated": "1",
         "EventCode": "0xB7, 0xBB",
-        "EventName": "OFFCORE_RESPONSE.ALL_RFO.L3_HIT_S.ANY_SNOOP",
+        "EventName": "OFFCORE_RESPONSE.PF_L3_RFO.SUPPLIER_NONE.HIT_OTHER_CORE_NO_FWD",
         "MSRIndex": "0x1a6,0x1a7",
-        "MSRValue": "0x3F80100122",
+        "MSRValue": "0x0400020100",
         "Offcore": "1",
         "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
         "SampleAfterValue": "100003",
         "UMask": "0x1"
     },
     {
-        "BriefDescription": "This event is deprecated. Refer to new event OCR.ALL_READS.L3_HIT_F.HIT_OTHER_CORE_NO_FWD",
+        "BriefDescription": "This event is deprecated. Refer to new event OCR.PF_L3_RFO.SUPPLIER_NONE.NO_SNOOP_NEEDED",
         "Counter": "0,1,2,3",
         "CounterHTOff": "0,1,2,3",
         "Deprecated": "1",
         "EventCode": "0xB7, 0xBB",
-        "EventName": "OFFCORE_RESPONSE.ALL_READS.L3_HIT_F.HIT_OTHER_CORE_NO_FWD",
+        "EventName": "OFFCORE_RESPONSE.PF_L3_RFO.SUPPLIER_NONE.NO_SNOOP_NEEDED",
         "MSRIndex": "0x1a6,0x1a7",
-        "MSRValue": "0x04002007F7",
+        "MSRValue": "0x0100020100",
         "Offcore": "1",
         "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
         "SampleAfterValue": "100003",
         "UMask": "0x1"
     },
     {
-        "BriefDescription": "This event is deprecated. Refer to new event OCR.PF_L3_RFO.ANY_RESPONSE",
+        "BriefDescription": "This event is deprecated. Refer to new event OCR.PF_L3_RFO.SUPPLIER_NONE.SNOOP_MISS",
         "Counter": "0,1,2,3",
         "CounterHTOff": "0,1,2,3",
         "Deprecated": "1",
         "EventCode": "0xB7, 0xBB",
-        "EventName": "OFFCORE_RESPONSE.PF_L3_RFO.ANY_RESPONSE",
+        "EventName": "OFFCORE_RESPONSE.PF_L3_RFO.SUPPLIER_NONE.SNOOP_MISS",
         "MSRIndex": "0x1a6,0x1a7",
-        "MSRValue": "0x0000010100",
+        "MSRValue": "0x0200020100",
         "Offcore": "1",
         "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
         "SampleAfterValue": "100003",
         "UMask": "0x1"
     },
     {
-        "BriefDescription": "This event is deprecated. Refer to new event OCR.DEMAND_CODE_RD.L3_HIT_E.SNOOP_NONE",
+        "BriefDescription": "This event is deprecated. Refer to new event OCR.PF_L3_RFO.SUPPLIER_NONE.SNOOP_NONE",
         "Counter": "0,1,2,3",
         "CounterHTOff": "0,1,2,3",
         "Deprecated": "1",
         "EventCode": "0xB7, 0xBB",
-        "EventName": "OFFCORE_RESPONSE.DEMAND_CODE_RD.L3_HIT_E.SNOOP_NONE",
+        "EventName": "OFFCORE_RESPONSE.PF_L3_RFO.SUPPLIER_NONE.SNOOP_NONE",
         "MSRIndex": "0x1a6,0x1a7",
-        "MSRValue": "0x0080080004",
+        "MSRValue": "0x0080020100",
         "Offcore": "1",
         "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
         "SampleAfterValue": "100003",
         "UMask": "0x1"
+    },
+    {
+        "BriefDescription": "Number of cache line split locks sent to uncore.",
+        "Counter": "0,1,2,3",
+        "CounterHTOff": "0,1,2,3,4,5,6,7",
+        "EventCode": "0xF4",
+        "EventName": "SQ_MISC.SPLIT_LOCK",
+        "PublicDescription": "Counts the number of cache line split locks sent to the uncore.",
+        "SampleAfterValue": "100003",
+        "UMask": "0x10"
     }
 ]
 \ No newline at end of file
diff --git a/tools/perf/pmu-events/arch/x86/cascadelakex/clx-metrics.json b/tools/perf/pmu-events/arch/x86/cascadelakex/clx-metrics.json
index 00f4fcffa815..5d6b2e6fcb7b 100644
--- a/tools/perf/pmu-events/arch/x86/cascadelakex/clx-metrics.json
+++ b/tools/perf/pmu-events/arch/x86/cascadelakex/clx-metrics.json
@@ -1,62 +1,5 @@
 [
     {
-        "BriefDescription": "This category represents fraction of slots where the processor's Frontend undersupplies its Backend",
-        "MetricExpr": "IDQ_UOPS_NOT_DELIVERED.CORE / (4 * cycles)",
-        "MetricGroup": "TopdownL1",
-        "MetricName": "Frontend_Bound",
-        "PublicDescription": "This category represents fraction of slots where the processor's Frontend undersupplies its Backend. Frontend denotes the first part of the processor core responsible to fetch operations that are executed later on by the Backend part. Within the Frontend; a branch predictor predicts the next address to fetch; cache-lines are fetched from the memory subsystem; parsed into instructions; and lastly decoded into micro-operations (uops). Ideally the Frontend can issue Machine_Width uops every cycle to the Backend. Frontend Bound denotes unutilized issue-slots when there is no Backend stall; i.e. bubbles where Frontend delivered no uops while Backend could have accepted them. For example; stalls due to instruction-cache misses would be categorized under Frontend Bound."
-    },
-    {
-        "BriefDescription": "This category represents fraction of slots where the processor's Frontend undersupplies its Backend. SMT version; use when SMT is enabled and measuring per logical CPU.",
-        "MetricExpr": "IDQ_UOPS_NOT_DELIVERED.CORE / (4 * ( ( CPU_CLK_UNHALTED.THREAD / 2 ) * ( 1 + CPU_CLK_UNHALTED.ONE_THREAD_ACTIVE / CPU_CLK_UNHALTED.REF_XCLK ) ))",
-        "MetricGroup": "TopdownL1_SMT",
-        "MetricName": "Frontend_Bound_SMT",
-        "PublicDescription": "This category represents fraction of slots where the processor's Frontend undersupplies its Backend. Frontend denotes the first part of the processor core responsible to fetch operations that are executed later on by the Backend part. Within the Frontend; a branch predictor predicts the next address to fetch; cache-lines are fetched from the memory subsystem; parsed into instructions; and lastly decoded into micro-operations (uops). Ideally the Frontend can issue Machine_Width uops every cycle to the Backend. Frontend Bound denotes unutilized issue-slots when there is no Backend stall; i.e. bubbles where Frontend delivered no uops while Backend could have accepted them. For example; stalls due to instruction-cache misses would be categorized under Frontend Bound. SMT version; use when SMT is enabled and measuring per logical CPU."
-    },
-    {
-        "BriefDescription": "This category represents fraction of slots wasted due to incorrect speculations",
-        "MetricExpr": "( UOPS_ISSUED.ANY - UOPS_RETIRED.RETIRE_SLOTS + 4 * INT_MISC.RECOVERY_CYCLES ) / (4 * cycles)",
-        "MetricGroup": "TopdownL1",
-        "MetricName": "Bad_Speculation",
-        "PublicDescription": "This category represents fraction of slots wasted due to incorrect speculations. This include slots used to issue uops that do not eventually get retired and slots for which the issue-pipeline was blocked due to recovery from earlier incorrect speculation. For example; wasted work due to miss-predicted branches are categorized under Bad Speculation category. Incorrect data speculation followed by Memory Ordering Nukes is another example."
-    },
-    {
-        "BriefDescription": "This category represents fraction of slots wasted due to incorrect speculations. SMT version; use when SMT is enabled and measuring per logical CPU.",
-        "MetricExpr": "( UOPS_ISSUED.ANY - UOPS_RETIRED.RETIRE_SLOTS + 4 * ( INT_MISC.RECOVERY_CYCLES_ANY / 2 ) ) / (4 * ( ( CPU_CLK_UNHALTED.THREAD / 2 ) * ( 1 + CPU_CLK_UNHALTED.ONE_THREAD_ACTIVE / CPU_CLK_UNHALTED.REF_XCLK ) ))",
-        "MetricGroup": "TopdownL1_SMT",
-        "MetricName": "Bad_Speculation_SMT",
-        "PublicDescription": "This category represents fraction of slots wasted due to incorrect speculations. This include slots used to issue uops that do not eventually get retired and slots for which the issue-pipeline was blocked due to recovery from earlier incorrect speculation. For example; wasted work due to miss-predicted branches are categorized under Bad Speculation category. Incorrect data speculation followed by Memory Ordering Nukes is another example. SMT version; use when SMT is enabled and measuring per logical CPU."
-    },
-    {
-        "BriefDescription": "This category represents fraction of slots where no uops are being delivered due to a lack of required resources for accepting new uops in the Backend",
-        "MetricConstraint": "NO_NMI_WATCHDOG",
-        "MetricExpr": "1 - ( (IDQ_UOPS_NOT_DELIVERED.CORE / (4 * cycles)) + (( UOPS_ISSUED.ANY - UOPS_RETIRED.RETIRE_SLOTS + 4 * INT_MISC.RECOVERY_CYCLES ) / (4 * cycles)) + (UOPS_RETIRED.RETIRE_SLOTS / (4 * cycles)) )",
-        "MetricGroup": "TopdownL1",
-        "MetricName": "Backend_Bound",
-        "PublicDescription": "This category represents fraction of slots where no uops are being delivered due to a lack of required resources for accepting new uops in the Backend. Backend is the portion of the processor core where the out-of-order scheduler dispatches ready uops into their respective execution units; and once completed these uops get retired according to program order. For example; stalls due to data-cache misses or stalls due to the divider unit being overloaded are both categorized under Backend Bound. Backend Bound is further divided into two main categories: Memory Bound and Core Bound."
-    },
-    {
-        "BriefDescription": "This category represents fraction of slots where no uops are being delivered due to a lack of required resources for accepting new uops in the Backend. SMT version; use when SMT is enabled and measuring per logical CPU.",
-        "MetricExpr": "1 - ( (IDQ_UOPS_NOT_DELIVERED.CORE / (4 * ( ( CPU_CLK_UNHALTED.THREAD / 2 ) * ( 1 + CPU_CLK_UNHALTED.ONE_THREAD_ACTIVE / CPU_CLK_UNHALTED.REF_XCLK ) ))) + (( UOPS_ISSUED.ANY - UOPS_RETIRED.RETIRE_SLOTS + 4 * ( INT_MISC.RECOVERY_CYCLES_ANY / 2 ) ) / (4 * ( ( CPU_CLK_UNHALTED.THREAD / 2 ) * ( 1 + CPU_CLK_UNHALTED.ONE_THREAD_ACTIVE / CPU_CLK_UNHALTED.REF_XCLK ) ))) + (UOPS_RETIRED.RETIRE_SLOTS / (4 * ( ( CPU_CLK_UNHALTED.THREAD / 2 ) * ( 1 + CPU_CLK_UNHALTED.ONE_THREAD_ACTIVE / CPU_CLK_UNHALTED.REF_XCLK ) ))) )",
-        "MetricGroup": "TopdownL1_SMT",
-        "MetricName": "Backend_Bound_SMT",
-        "PublicDescription": "This category represents fraction of slots where no uops are being delivered due to a lack of required resources for accepting new uops in the Backend. Backend is the portion of the processor core where the out-of-order scheduler dispatches ready uops into their respective execution units; and once completed these uops get retired according to program order. For example; stalls due to data-cache misses or stalls due to the divider unit being overloaded are both categorized under Backend Bound. Backend Bound is further divided into two main categories: Memory Bound and Core Bound. SMT version; use when SMT is enabled and measuring per logical CPU."
-    },
-    {
-        "BriefDescription": "This category represents fraction of slots utilized by useful work i.e. issued uops that eventually get retired",
-        "MetricExpr": "UOPS_RETIRED.RETIRE_SLOTS / (4 * cycles)",
-        "MetricGroup": "TopdownL1",
-        "MetricName": "Retiring",
-        "PublicDescription": "This category represents fraction of slots utilized by useful work i.e. issued uops that eventually get retired. Ideally; all pipeline slots would be attributed to the Retiring category.  Retiring of 100% would indicate the maximum 4 uops retired per cycle has been achieved.  Maximizing Retiring typically increases the Instruction-Per-Cycle metric. Note that a high Retiring value does not necessary mean there is no room for more performance.  For example; Microcode assists are categorized under Retiring. They hurt performance and can often be avoided. "
-    },
-    {
-        "BriefDescription": "This category represents fraction of slots utilized by useful work i.e. issued uops that eventually get retired. SMT version; use when SMT is enabled and measuring per logical CPU.",
-        "MetricExpr": "UOPS_RETIRED.RETIRE_SLOTS / (4 * ( ( CPU_CLK_UNHALTED.THREAD / 2 ) * ( 1 + CPU_CLK_UNHALTED.ONE_THREAD_ACTIVE / CPU_CLK_UNHALTED.REF_XCLK ) ))",
-        "MetricGroup": "TopdownL1_SMT",
-        "MetricName": "Retiring_SMT",
-        "PublicDescription": "This category represents fraction of slots utilized by useful work i.e. issued uops that eventually get retired. Ideally; all pipeline slots would be attributed to the Retiring category.  Retiring of 100% would indicate the maximum 4 uops retired per cycle has been achieved.  Maximizing Retiring typically increases the Instruction-Per-Cycle metric. Note that a high Retiring value does not necessary mean there is no room for more performance.  For example; Microcode assists are categorized under Retiring. They hurt performance and can often be avoided. SMT version; use when SMT is enabled and measuring per logical CPU."
-    },
-    {
         "BriefDescription": "Instructions Per Cycle (per Logical Processor)",
         "MetricExpr": "INST_RETIRED.ANY / CPU_CLK_UNHALTED.THREAD",
         "MetricGroup": "Summary",
@@ -71,49 +14,79 @@
     {
         "BriefDescription": "Instruction per taken branch",
         "MetricExpr": "INST_RETIRED.ANY / BR_INST_RETIRED.NEAR_TAKEN",
-        "MetricGroup": "Branches;Fetch_BW;PGO",
+        "MetricGroup": "Branches;FetchBW;PGO",
         "MetricName": "IpTB"
     },
     {
         "BriefDescription": "Cycles Per Instruction (per Logical Processor)",
-        "MetricExpr": "1 / (INST_RETIRED.ANY / cycles)",
-        "MetricGroup": "Pipeline;Summary",
+        "MetricExpr": "1 / (INST_RETIRED.ANY / CPU_CLK_UNHALTED.THREAD)",
+        "MetricGroup": "Pipeline",
         "MetricName": "CPI"
     },
     {
         "BriefDescription": "Per-Logical Processor actual clocks when the Logical Processor is active.",
         "MetricExpr": "CPU_CLK_UNHALTED.THREAD",
-        "MetricGroup": "Summary",
+        "MetricGroup": "Pipeline",
         "MetricName": "CLKS"
     },
     {
-        "BriefDescription": "Total issue-pipeline slots (per-Physical Core till ICL; per-Logical Processor ICL onward)",
-        "MetricExpr": "4 * cycles",
-        "MetricGroup": "TopDownL1",
-        "MetricName": "SLOTS"
+        "BriefDescription": "Instructions Per Cycle (per physical core)",
+        "MetricExpr": "INST_RETIRED.ANY / CPU_CLK_UNHALTED.THREAD",
+        "MetricGroup": "SMT;TmaL1",
+        "MetricName": "CoreIPC"
     },
     {
-        "BriefDescription": "Total issue-pipeline slots (per-Physical Core till ICL; per-Logical Processor ICL onward)",
-        "MetricExpr": "4 * ( ( CPU_CLK_UNHALTED.THREAD / 2 ) * ( 1 + CPU_CLK_UNHALTED.ONE_THREAD_ACTIVE / CPU_CLK_UNHALTED.REF_XCLK ) )",
-        "MetricGroup": "TopDownL1_SMT",
-        "MetricName": "SLOTS_SMT"
+        "BriefDescription": "Instructions Per Cycle (per physical core)",
+        "MetricExpr": "INST_RETIRED.ANY / ( ( CPU_CLK_UNHALTED.THREAD / 2 ) * ( 1 + CPU_CLK_UNHALTED.ONE_THREAD_ACTIVE / CPU_CLK_UNHALTED.REF_XCLK ) )",
+        "MetricGroup": "SMT;TmaL1",
+        "MetricName": "CoreIPC_SMT"
+    },
+    {
+        "BriefDescription": "Floating Point Operations Per Cycle",
+        "MetricExpr": "( 1 * ( FP_ARITH_INST_RETIRED.SCALAR_SINGLE + FP_ARITH_INST_RETIRED.SCALAR_DOUBLE ) + 2 * FP_ARITH_INST_RETIRED.128B_PACKED_DOUBLE + 4 * ( FP_ARITH_INST_RETIRED.128B_PACKED_SINGLE + FP_ARITH_INST_RETIRED.256B_PACKED_DOUBLE ) + 8 * ( FP_ARITH_INST_RETIRED.256B_PACKED_SINGLE + FP_ARITH_INST_RETIRED.512B_PACKED_DOUBLE ) + 16 * FP_ARITH_INST_RETIRED.512B_PACKED_SINGLE ) / CPU_CLK_UNHALTED.THREAD",
+        "MetricGroup": "Flops",
+        "MetricName": "FLOPc"
+    },
+    {
+        "BriefDescription": "Floating Point Operations Per Cycle",
+        "MetricExpr": "( 1 * ( FP_ARITH_INST_RETIRED.SCALAR_SINGLE + FP_ARITH_INST_RETIRED.SCALAR_DOUBLE ) + 2 * FP_ARITH_INST_RETIRED.128B_PACKED_DOUBLE + 4 * ( FP_ARITH_INST_RETIRED.128B_PACKED_SINGLE + FP_ARITH_INST_RETIRED.256B_PACKED_DOUBLE ) + 8 * ( FP_ARITH_INST_RETIRED.256B_PACKED_SINGLE + FP_ARITH_INST_RETIRED.512B_PACKED_DOUBLE ) + 16 * FP_ARITH_INST_RETIRED.512B_PACKED_SINGLE ) / ( ( CPU_CLK_UNHALTED.THREAD / 2 ) * ( 1 + CPU_CLK_UNHALTED.ONE_THREAD_ACTIVE / CPU_CLK_UNHALTED.REF_XCLK ) )",
+        "MetricGroup": "Flops_SMT",
+        "MetricName": "FLOPc_SMT"
+    },
+    {
+        "BriefDescription": "Instruction-Level-Parallelism (average number of uops executed when there is at least 1 uop executed)",
+        "MetricExpr": "UOPS_EXECUTED.THREAD / (( UOPS_EXECUTED.CORE_CYCLES_GE_1 / 2 ) if #SMT_on else UOPS_EXECUTED.CORE_CYCLES_GE_1)",
+        "MetricGroup": "Pipeline;PortsUtil",
+        "MetricName": "ILP"
+    },
+    {
+        "BriefDescription": "Number of Instructions per non-speculative Branch Misprediction (JEClear)",
+        "MetricExpr": "INST_RETIRED.ANY / BR_MISP_RETIRED.ALL_BRANCHES",
+        "MetricGroup": "BrMispredicts",
+        "MetricName": "IpMispredict"
+    },
+    {
+        "BriefDescription": "Core actual clocks when any Logical Processor is active on the Physical Core",
+        "MetricExpr": "( CPU_CLK_UNHALTED.THREAD_ANY / 2 ) if #SMT_on else CPU_CLK_UNHALTED.THREAD",
+        "MetricGroup": "SMT",
+        "MetricName": "CORE_CLKS"
     },
     {
         "BriefDescription": "Instructions per Load (lower number means higher occurrence rate)",
         "MetricExpr": "INST_RETIRED.ANY / MEM_INST_RETIRED.ALL_LOADS",
-        "MetricGroup": "Instruction_Type",
+        "MetricGroup": "InsType",
         "MetricName": "IpLoad"
     },
     {
         "BriefDescription": "Instructions per Store (lower number means higher occurrence rate)",
         "MetricExpr": "INST_RETIRED.ANY / MEM_INST_RETIRED.ALL_STORES",
-        "MetricGroup": "Instruction_Type",
+        "MetricGroup": "InsType",
         "MetricName": "IpStore"
     },
     {
         "BriefDescription": "Instructions per Branch (lower number means higher occurrence rate)",
         "MetricExpr": "INST_RETIRED.ANY / BR_INST_RETIRED.ALL_BRANCHES",
-        "MetricGroup": "Branches;Instruction_Type",
+        "MetricGroup": "Branches;InsType",
         "MetricName": "IpBranch"
     },
     {
@@ -131,13 +104,13 @@
     {
         "BriefDescription": "Instructions per Floating Point (FP) Operation (lower number means higher occurrence rate)",
         "MetricExpr": "INST_RETIRED.ANY / ( 1 * ( FP_ARITH_INST_RETIRED.SCALAR_SINGLE + FP_ARITH_INST_RETIRED.SCALAR_DOUBLE ) + 2 * FP_ARITH_INST_RETIRED.128B_PACKED_DOUBLE + 4 * ( FP_ARITH_INST_RETIRED.128B_PACKED_SINGLE + FP_ARITH_INST_RETIRED.256B_PACKED_DOUBLE ) + 8 * ( FP_ARITH_INST_RETIRED.256B_PACKED_SINGLE + FP_ARITH_INST_RETIRED.512B_PACKED_DOUBLE ) + 16 * FP_ARITH_INST_RETIRED.512B_PACKED_SINGLE )",
-        "MetricGroup": "FLOPS;FP_Arith;Instruction_Type",
+        "MetricGroup": "Flops;FpArith;InsType",
         "MetricName": "IpFLOP"
     },
     {
-        "BriefDescription": "Total number of retired Instructions",
+        "BriefDescription": "Total number of retired Instructions, Sample with: INST_RETIRED.PREC_DIST",
         "MetricExpr": "INST_RETIRED.ANY",
-        "MetricGroup": "Summary;TopDownL1",
+        "MetricGroup": "Summary;TmaL1",
         "MetricName": "Instructions"
     },
     {
@@ -149,164 +122,110 @@
     {
         "BriefDescription": "Fraction of Uops delivered by the DSB (aka Decoded ICache; or Uop Cache)",
         "MetricExpr": "IDQ.DSB_UOPS / (IDQ.DSB_UOPS + LSD.UOPS + IDQ.MITE_UOPS + IDQ.MS_UOPS)",
-        "MetricGroup": "DSB;Fetch_BW",
+        "MetricGroup": "DSB;FetchBW",
         "MetricName": "DSB_Coverage"
     },
     {
-        "BriefDescription": "Instructions Per Cycle (per physical core)",
-        "MetricExpr": "INST_RETIRED.ANY / cycles",
-        "MetricGroup": "SMT;TopDownL1",
-        "MetricName": "CoreIPC"
-    },
-    {
-        "BriefDescription": "Instructions Per Cycle (per physical core)",
-        "MetricExpr": "INST_RETIRED.ANY / ( ( CPU_CLK_UNHALTED.THREAD / 2 ) * ( 1 + CPU_CLK_UNHALTED.ONE_THREAD_ACTIVE / CPU_CLK_UNHALTED.REF_XCLK ) )",
-        "MetricGroup": "SMT;TopDownL1",
-        "MetricName": "CoreIPC_SMT"
-    },
-    {
-        "BriefDescription": "Floating Point Operations Per Cycle",
-        "MetricExpr": "( 1 * ( FP_ARITH_INST_RETIRED.SCALAR_SINGLE + FP_ARITH_INST_RETIRED.SCALAR_DOUBLE ) + 2 * FP_ARITH_INST_RETIRED.128B_PACKED_DOUBLE + 4 * ( FP_ARITH_INST_RETIRED.128B_PACKED_SINGLE + FP_ARITH_INST_RETIRED.256B_PACKED_DOUBLE ) + 8 * ( FP_ARITH_INST_RETIRED.256B_PACKED_SINGLE + FP_ARITH_INST_RETIRED.512B_PACKED_DOUBLE ) + 16 * FP_ARITH_INST_RETIRED.512B_PACKED_SINGLE ) / cycles",
-        "MetricGroup": "FLOPS",
-        "MetricName": "FLOPc"
-    },
-    {
-        "BriefDescription": "Floating Point Operations Per Cycle",
-        "MetricExpr": "( 1 * ( FP_ARITH_INST_RETIRED.SCALAR_SINGLE + FP_ARITH_INST_RETIRED.SCALAR_DOUBLE ) + 2 * FP_ARITH_INST_RETIRED.128B_PACKED_DOUBLE + 4 * ( FP_ARITH_INST_RETIRED.128B_PACKED_SINGLE + FP_ARITH_INST_RETIRED.256B_PACKED_DOUBLE ) + 8 * ( FP_ARITH_INST_RETIRED.256B_PACKED_SINGLE + FP_ARITH_INST_RETIRED.512B_PACKED_DOUBLE ) + 16 * FP_ARITH_INST_RETIRED.512B_PACKED_SINGLE ) / ( ( CPU_CLK_UNHALTED.THREAD / 2 ) * ( 1 + CPU_CLK_UNHALTED.ONE_THREAD_ACTIVE / CPU_CLK_UNHALTED.REF_XCLK ) )",
-        "MetricGroup": "FLOPS_SMT",
-        "MetricName": "FLOPc_SMT"
-    },
-    {
-        "BriefDescription": "Instruction-Level-Parallelism (average number of uops executed when there is at least 1 uop executed)",
-        "MetricExpr": "UOPS_EXECUTED.THREAD / ( UOPS_EXECUTED.CORE_CYCLES_GE_1 / 2 )",
-        "MetricGroup": "Pipeline;Ports_Utilization",
-        "MetricName": "ILP"
-    },
-    {
-        "BriefDescription": "Branch Misprediction Cost: Fraction of TopDown slots wasted per non-speculative branch misprediction (jeclear)",
-        "MetricExpr": "( ((BR_MISP_RETIRED.ALL_BRANCHES / ( BR_MISP_RETIRED.ALL_BRANCHES + MACHINE_CLEARS.COUNT )) * (( UOPS_ISSUED.ANY - UOPS_RETIRED.RETIRE_SLOTS + 4 * INT_MISC.RECOVERY_CYCLES ) / (4 * cycles))) + (4 * ( IDQ_UOPS_NOT_DELIVERED.CYCLES_0_UOPS_DELIV.CORE - ( FRONTEND_RETIRED.LATENCY_GE_1 - FRONTEND_RETIRED.LATENCY_GE_2 ) / (UOPS_RETIRED.RETIRE_SLOTS / UOPS_ISSUED.ANY) ) / (4 * cycles)) * (( INT_MISC.CLEAR_RESTEER_CYCLES + 9 * BACLEARS.ANY ) / cycles) / (4 * ( IDQ_UOPS_NOT_DELIVERED.CYCLES_0_UOPS_DELIV.CORE - ( FRONTEND_RETIRED.LATENCY_GE_1 - FRONTEND_RETIRED.LATENCY_GE_2 ) / (UOPS_RETIRED.RETIRE_SLOTS / UOPS_ISSUED.ANY) ) / (4 * cycles)) ) * (4 * cycles) / BR_MISP_RETIRED.ALL_BRANCHES",
-        "MetricGroup": "BrMispredicts",
-        "MetricName": "Branch_Misprediction_Cost"
-    },
-    {
-        "BriefDescription": "Branch Misprediction Cost: Fraction of TopDown slots wasted per non-speculative branch misprediction (jeclear)",
-        "MetricExpr": "( ((BR_MISP_RETIRED.ALL_BRANCHES / ( BR_MISP_RETIRED.ALL_BRANCHES + MACHINE_CLEARS.COUNT )) * (( UOPS_ISSUED.ANY - UOPS_RETIRED.RETIRE_SLOTS + 4 * ( INT_MISC.RECOVERY_CYCLES_ANY / 2 ) ) / (4 * ( ( CPU_CLK_UNHALTED.THREAD / 2 ) * ( 1 + CPU_CLK_UNHALTED.ONE_THREAD_ACTIVE / CPU_CLK_UNHALTED.REF_XCLK ) )))) + (4 * ( IDQ_UOPS_NOT_DELIVERED.CYCLES_0_UOPS_DELIV.CORE - ( FRONTEND_RETIRED.LATENCY_GE_1 - FRONTEND_RETIRED.LATENCY_GE_2 ) / (UOPS_RETIRED.RETIRE_SLOTS / UOPS_ISSUED.ANY) ) / (4 * ( ( CPU_CLK_UNHALTED.THREAD / 2 ) * ( 1 + CPU_CLK_UNHALTED.ONE_THREAD_ACTIVE / CPU_CLK_UNHALTED.REF_XCLK ) ))) * (( INT_MISC.CLEAR_RESTEER_CYCLES + 9 * BACLEARS.ANY ) / cycles) / (4 * ( IDQ_UOPS_NOT_DELIVERED.CYCLES_0_UOPS_DELIV.CORE - ( FRONTEND_RETIRED.LATENCY_GE_1 - FRONTEND_RETIRED.LATENCY_GE_2 ) / (UOPS_RETIRED.RETIRE_SLOTS / UOPS_ISSUED.ANY) ) / (4 * ( ( CPU_CLK_UNHALTED.THREAD / 2 ) * ( 1 + CPU_CLK_UNHALTED.ONE_THREAD_ACTIVE / CPU_CLK_UNHALTED.REF_XCLK ) ))) ) * (4 * ( ( CPU_CLK_UNHALTED.THREAD / 2 ) * ( 1 + CPU_CLK_UNHALTED.ONE_THREAD_ACTIVE / CPU_CLK_UNHALTED.REF_XCLK ) )) / BR_MISP_RETIRED.ALL_BRANCHES",
-        "MetricGroup": "BrMispredicts_SMT",
-        "MetricName": "Branch_Misprediction_Cost_SMT"
-    },
-    {
-        "BriefDescription": "Number of Instructions per non-speculative Branch Misprediction (JEClear)",
-        "MetricExpr": "INST_RETIRED.ANY / BR_MISP_RETIRED.ALL_BRANCHES",
-        "MetricGroup": "BrMispredicts",
-        "MetricName": "IpMispredict"
-    },
-    {
-        "BriefDescription": "Core actual clocks when any Logical Processor is active on the Physical Core",
-        "MetricExpr": "( ( CPU_CLK_UNHALTED.THREAD / 2 ) * ( 1 + CPU_CLK_UNHALTED.ONE_THREAD_ACTIVE / CPU_CLK_UNHALTED.REF_XCLK ) )",
-        "MetricGroup": "SMT",
-        "MetricName": "CORE_CLKS"
-    },
-    {
         "BriefDescription": "Actual Average Latency for L1 data-cache miss demand loads (in core cycles)",
         "MetricExpr": "L1D_PEND_MISS.PENDING / ( MEM_LOAD_RETIRED.L1_MISS + MEM_LOAD_RETIRED.FB_HIT )",
-        "MetricGroup": "Memory_Bound;Memory_Lat",
+        "MetricGroup": "MemoryBound;MemoryLat",
         "MetricName": "Load_Miss_Real_Latency"
     },
     {
         "BriefDescription": "Memory-Level-Parallelism (average number of L1 miss demand load when there is at least one such miss. Per-Logical Processor)",
         "MetricExpr": "L1D_PEND_MISS.PENDING / L1D_PEND_MISS.PENDING_CYCLES",
-        "MetricGroup": "Memory_Bound;Memory_BW",
+        "MetricGroup": "MemoryBound;MemoryBW",
         "MetricName": "MLP"
     },
     {
         "BriefDescription": "Utilization of the core's Page Walker(s) serving STLB misses triggered by instruction/Load/Store accesses",
         "MetricConstraint": "NO_NMI_WATCHDOG",
-        "MetricExpr": "( ITLB_MISSES.WALK_PENDING + DTLB_LOAD_MISSES.WALK_PENDING + DTLB_STORE_MISSES.WALK_PENDING + EPT.WALK_PENDING ) / ( 2 * cycles )",
-        "MetricGroup": "TLB",
+        "MetricExpr": "( ITLB_MISSES.WALK_PENDING + DTLB_LOAD_MISSES.WALK_PENDING + DTLB_STORE_MISSES.WALK_PENDING + EPT.WALK_PENDING ) / ( 2 * CORE_CLKS )",
+        "MetricGroup": "MemoryTLB",
         "MetricName": "Page_Walks_Utilization"
     },
     {
-        "BriefDescription": "Utilization of the core's Page Walker(s) serving STLB misses triggered by instruction/Load/Store accesses",
-        "MetricExpr": "( ITLB_MISSES.WALK_PENDING + DTLB_LOAD_MISSES.WALK_PENDING + DTLB_STORE_MISSES.WALK_PENDING + EPT.WALK_PENDING ) / ( 2 * ( ( CPU_CLK_UNHALTED.THREAD / 2 ) * ( 1 + CPU_CLK_UNHALTED.ONE_THREAD_ACTIVE / CPU_CLK_UNHALTED.REF_XCLK ) ) )",
-        "MetricGroup": "TLB_SMT",
-        "MetricName": "Page_Walks_Utilization_SMT"
-    },
-    {
         "BriefDescription": "Average data fill bandwidth to the L1 data cache [GB / sec]",
         "MetricExpr": "64 * L1D.REPLACEMENT / 1000000000 / duration_time",
-        "MetricGroup": "Memory_BW",
+        "MetricGroup": "MemoryBW",
         "MetricName": "L1D_Cache_Fill_BW"
     },
     {
         "BriefDescription": "Average data fill bandwidth to the L2 cache [GB / sec]",
         "MetricExpr": "64 * L2_LINES_IN.ALL / 1000000000 / duration_time",
-        "MetricGroup": "Memory_BW",
+        "MetricGroup": "MemoryBW",
         "MetricName": "L2_Cache_Fill_BW"
     },
     {
         "BriefDescription": "Average per-core data fill bandwidth to the L3 cache [GB / sec]",
         "MetricExpr": "64 * LONGEST_LAT_CACHE.MISS / 1000000000 / duration_time",
-        "MetricGroup": "Memory_BW",
+        "MetricGroup": "MemoryBW",
         "MetricName": "L3_Cache_Fill_BW"
     },
     {
-        "BriefDescription": "Average per-core data fill bandwidth to the L3 cache [GB / sec]",
+        "BriefDescription": "Average per-core data access bandwidth to the L3 cache [GB / sec]",
         "MetricExpr": "64 * OFFCORE_REQUESTS.ALL_REQUESTS / 1000000000 / duration_time",
-        "MetricGroup": "Memory_BW;Offcore",
+        "MetricGroup": "MemoryBW;Offcore",
         "MetricName": "L3_Cache_Access_BW"
     },
     {
         "BriefDescription": "L1 cache true misses per kilo instruction for retired demand loads",
         "MetricExpr": "1000 * MEM_LOAD_RETIRED.L1_MISS / INST_RETIRED.ANY",
-        "MetricGroup": "Cache_Misses",
+        "MetricGroup": "CacheMisses",
         "MetricName": "L1MPKI"
     },
     {
         "BriefDescription": "L2 cache true misses per kilo instruction for retired demand loads",
         "MetricExpr": "1000 * MEM_LOAD_RETIRED.L2_MISS / INST_RETIRED.ANY",
-        "MetricGroup": "Cache_Misses",
+        "MetricGroup": "CacheMisses",
         "MetricName": "L2MPKI"
     },
     {
         "BriefDescription": "L2 cache misses per kilo instruction for all request types (including speculative)",
         "MetricExpr": "1000 * L2_RQSTS.MISS / INST_RETIRED.ANY",
-        "MetricGroup": "Cache_Misses;Offcore",
+        "MetricGroup": "CacheMisses;Offcore",
         "MetricName": "L2MPKI_All"
     },
     {
         "BriefDescription": "L2 cache hits per kilo instruction for all request types (including speculative)",
         "MetricExpr": "1000 * ( L2_RQSTS.REFERENCES - L2_RQSTS.MISS ) / INST_RETIRED.ANY",
-        "MetricGroup": "Cache_Misses",
+        "MetricGroup": "CacheMisses",
         "MetricName": "L2HPKI_All"
     },
     {
         "BriefDescription": "L3 cache true misses per kilo instruction for retired demand loads",
         "MetricExpr": "1000 * MEM_LOAD_RETIRED.L3_MISS / INST_RETIRED.ANY",
-        "MetricGroup": "Cache_Misses",
+        "MetricGroup": "CacheMisses",
         "MetricName": "L3MPKI"
     },
     {
         "BriefDescription": "Rate of silent evictions from the L2 cache per Kilo instruction where the evicted lines are dropped (no writeback to L3 or memory)",
         "MetricExpr": "1000 * L2_LINES_OUT.SILENT / INST_RETIRED.ANY",
-        "MetricGroup": "",
+        "MetricGroup": "L2Evicts;Server",
         "MetricName": "L2_Evictions_Silent_PKI"
     },
     {
         "BriefDescription": "Rate of non silent evictions from the L2 cache per Kilo instruction",
         "MetricExpr": "1000 * L2_LINES_OUT.NON_SILENT / INST_RETIRED.ANY",
-        "MetricGroup": "",
+        "MetricGroup": "L2Evicts;Server",
         "MetricName": "L2_Evictions_NonSilent_PKI"
     },
     {
         "BriefDescription": "Average CPU Utilization",
         "MetricExpr": "CPU_CLK_UNHALTED.REF_TSC / msr@tsc@",
-        "MetricGroup": "Summary",
+        "MetricGroup": "HPC;Summary",
         "MetricName": "CPU_Utilization"
     },
     {
+        "BriefDescription": "Measured Average Frequency for unhalted processors [GHz]",
+        "MetricExpr": "(CPU_CLK_UNHALTED.THREAD / CPU_CLK_UNHALTED.REF_TSC) * msr@tsc@ / 1000000000 / duration_time",
+        "MetricGroup": "Summary;Power",
+        "MetricName": "Average_Frequency"
+    },
+    {
         "BriefDescription": "Giga Floating Point Operations Per Second",
         "MetricExpr": "( ( 1 * ( FP_ARITH_INST_RETIRED.SCALAR_SINGLE + FP_ARITH_INST_RETIRED.SCALAR_DOUBLE ) + 2 * FP_ARITH_INST_RETIRED.128B_PACKED_DOUBLE + 4 * ( FP_ARITH_INST_RETIRED.128B_PACKED_SINGLE + FP_ARITH_INST_RETIRED.256B_PACKED_DOUBLE ) + 8 * ( FP_ARITH_INST_RETIRED.256B_PACKED_SINGLE + FP_ARITH_INST_RETIRED.512B_PACKED_DOUBLE ) + 16 * FP_ARITH_INST_RETIRED.512B_PACKED_SINGLE ) / 1000000000 ) / duration_time",
-        "MetricGroup": "FLOPS;Summary",
+        "MetricGroup": "Flops;HPC",
         "MetricName": "GFLOPs"
     },
     {
@@ -317,62 +236,62 @@
     },
     {
         "BriefDescription": "Fraction of cycles where both hardware Logical Processors were active",
-        "MetricExpr": "1 - CPU_CLK_THREAD_UNHALTED.ONE_THREAD_ACTIVE / ( CPU_CLK_THREAD_UNHALTED.REF_XCLK_ANY / 2 )",
-        "MetricGroup": "SMT;Summary",
+        "MetricExpr": "1 - CPU_CLK_UNHALTED.ONE_THREAD_ACTIVE / ( CPU_CLK_UNHALTED.REF_XCLK_ANY / 2 ) if #SMT_on else 0",
+        "MetricGroup": "SMT",
         "MetricName": "SMT_2T_Utilization"
     },
     {
         "BriefDescription": "Fraction of cycles spent in the Operating System (OS) Kernel mode",
-        "MetricExpr": "CPU_CLK_UNHALTED.THREAD:k / CPU_CLK_UNHALTED.THREAD",
+        "MetricExpr": "CPU_CLK_UNHALTED.THREAD_P:k / CPU_CLK_UNHALTED.THREAD",
         "MetricGroup": "OS",
         "MetricName": "Kernel_Utilization"
     },
     {
         "BriefDescription": "Average external Memory Bandwidth Use for reads and writes [GB / sec]",
-        "MetricExpr": "( ( ( uncore_imc@cas_count_read@ + uncore_imc@cas_count_write@ ) * 1048576 ) / 1000000000 ) / duration_time",
-        "MetricGroup": "Memory_BW;SoC",
+        "MetricExpr": "( 64 * ( uncore_imc@cas_count_read@ + uncore_imc@cas_count_write@ ) / 1000000000 ) / duration_time",
+        "MetricGroup": "HPC;MemoryBW;SoC",
         "MetricName": "DRAM_BW_Use"
     },
     {
         "BriefDescription": "Average latency of data read request to external memory (in nanoseconds). Accounts for demand loads and L1/L2 prefetches",
         "MetricExpr": "1000000000 * ( cha@event\\=0x36\\,umask\\=0x21\\,config\\=0x40433@ / cha@event\\=0x35\\,umask\\=0x21\\,config\\=0x40433@ ) / ( cha_0@event\\=0x0@ / duration_time )",
-        "MetricGroup": "Memory_Lat;SoC",
+        "MetricGroup": "MemoryLat;SoC",
         "MetricName": "MEM_Read_Latency"
     },
     {
         "BriefDescription": "Average number of parallel data read requests to external memory. Accounts for demand loads and L1/L2 prefetches",
         "MetricExpr": "cha@event\\=0x36\\,umask\\=0x21\\,config\\=0x40433@ / cha@event\\=0x36\\,umask\\=0x21\\,config\\=0x40433\\,thresh\\=1@",
-        "MetricGroup": "Memory_BW;SoC",
+        "MetricGroup": "MemoryBW;SoC",
         "MetricName": "MEM_Parallel_Reads"
     },
     {
         "BriefDescription": "Average latency of data read request to external 3D X-Point memory [in nanoseconds]. Accounts for demand loads and L1/L2 data-read prefetches",
         "MetricExpr": "( 1000000000 * ( imc@event\\=0xe0\\,umask\\=0x1@ / imc@event\\=0xe3@ ) / imc_0@event\\=0x0@ )",
-        "MetricGroup": "Memory_Lat;SoC;Server",
+        "MetricGroup": "MemoryLat;SoC;Server",
         "MetricName": "MEM_PMM_Read_Latency"
     },
     {
         "BriefDescription": "Average 3DXP Memory Bandwidth Use for reads [GB / sec]",
         "MetricExpr": "( ( 64 * imc@event\\=0xe3@ / 1000000000 ) / duration_time )",
-        "MetricGroup": "Memory_BW;SoC;Server",
+        "MetricGroup": "MemoryBW;SoC;Server",
         "MetricName": "PMM_Read_BW"
     },
     {
         "BriefDescription": "Average 3DXP Memory Bandwidth Use for Writes [GB / sec]",
         "MetricExpr": "( ( 64 * imc@event\\=0xe7@ / 1000000000 ) / duration_time )",
-        "MetricGroup": "Memory_BW;SoC;Server",
+        "MetricGroup": "MemoryBW;SoC;Server",
         "MetricName": "PMM_Write_BW"
     },
     {
         "BriefDescription": "Average IO (network or disk) Bandwidth Use for Writes [GB / sec]",
         "MetricExpr": "( UNC_IIO_DATA_REQ_OF_CPU.MEM_READ.PART0 + UNC_IIO_DATA_REQ_OF_CPU.MEM_READ.PART1 + UNC_IIO_DATA_REQ_OF_CPU.MEM_READ.PART2 + UNC_IIO_DATA_REQ_OF_CPU.MEM_READ.PART3 ) * 4 / 1000000000 / duration_time",
-        "MetricGroup": "IO_BW;SoC;Server",
+        "MetricGroup": "IoBW;SoC;Server",
         "MetricName": "IO_Write_BW"
     },
     {
         "BriefDescription": "Average IO (network or disk) Bandwidth Use for Reads [GB / sec]",
         "MetricExpr": "( UNC_IIO_DATA_REQ_OF_CPU.MEM_WRITE.PART0 + UNC_IIO_DATA_REQ_OF_CPU.MEM_WRITE.PART1 + UNC_IIO_DATA_REQ_OF_CPU.MEM_WRITE.PART2 + UNC_IIO_DATA_REQ_OF_CPU.MEM_WRITE.PART3 ) * 4 / 1000000000 / duration_time",
-        "MetricGroup": "IO_BW;SoC;Server",
+        "MetricGroup": "IoBW;SoC;Server",
         "MetricName": "IO_Read_BW"
     },
     {
@@ -383,7 +302,7 @@
     },
     {
         "BriefDescription": "Instructions per Far Branch ( Far Branches apply upon transition from application to operating system, handling interrupts, exceptions) [lower number means higher occurrence rate]",
-        "MetricExpr": "INST_RETIRED.ANY / ( BR_INST_RETIRED.FAR_BRANCH / 2 )",
+        "MetricExpr": "INST_RETIRED.ANY / BR_INST_RETIRED.FAR_BRANCH:u",
         "MetricGroup": "Branches;OS",
         "MetricName": "IpFarBranch"
     },
diff --git a/tools/perf/pmu-events/arch/x86/cascadelakex/floating-point.json b/tools/perf/pmu-events/arch/x86/cascadelakex/floating-point.json
index 3c0b95fd60ad..ade925d7a68c 100644
--- a/tools/perf/pmu-events/arch/x86/cascadelakex/floating-point.json
+++ b/tools/perf/pmu-events/arch/x86/cascadelakex/floating-point.json
@@ -1,12 +1,12 @@
 [
     {
-        "BriefDescription": "Number of SSE/AVX computational scalar single precision floating-point instructions retired; some instructions will count twice as noted below.  Each count represents 1 computation. Applies to SSE* and AVX* scalar single precision floating-point instructions: ADD SUB MUL DIV MIN MAX RCP14 RSQRT14 SQRT DPP FM(N)ADD/SUB.  DPP and FM(N)ADD/SUB instructions count twice as they perform 2 calculations per element.",
+        "BriefDescription": "Number of SSE/AVX computational 128-bit packed double precision floating-point instructions retired; some instructions will count twice as noted below.  Each count represents 2 computation operations, one for each element.  Applies to SSE* and AVX* packed double precision floating-point instructions: ADD SUB HADD HSUB SUBADD MUL DIV MIN MAX SQRT RSQRT14 RCP14 DPP FM(N)ADD/SUB.  DPP and FM(N)ADD/SUB instructions count twice as they perform 2 calculations per element.",
         "Counter": "0,1,2,3",
         "CounterHTOff": "0,1,2,3,4,5,6,7",
         "EventCode": "0xC7",
-        "EventName": "FP_ARITH_INST_RETIRED.SCALAR_SINGLE",
+        "EventName": "FP_ARITH_INST_RETIRED.128B_PACKED_DOUBLE",
         "SampleAfterValue": "2000003",
-        "UMask": "0x2"
+        "UMask": "0x4"
     },
     {
         "BriefDescription": "Number of SSE/AVX computational 128-bit packed single precision floating-point instructions retired; some instructions will count twice as noted below.  Each count represents 4 computation operations, one for each element.  Applies to SSE* and AVX* packed single precision floating-point instructions: ADD SUB MUL DIV MIN MAX RCP14 RSQRT14 SQRT DPP FM(N)ADD/SUB.  DPP and FM(N)ADD/SUB instructions count twice as they perform 2 calculations per element.",
@@ -18,13 +18,13 @@
         "UMask": "0x8"
     },
     {
-        "BriefDescription": "Number of SSE/AVX computational 512-bit packed double precision floating-point instructions retired; some instructions will count twice as noted below.  Each count represents 8 computation operations, one for each element.  Applies to SSE* and AVX* packed double precision floating-point instructions: ADD SUB MUL DIV MIN MAX RCP14 RSQRT14 SQRT DPP FM(N)ADD/SUB.  DPP and FM(N)ADD/SUB instructions count twice as they perform 2 calculations per element.",
+        "BriefDescription": "Number of SSE/AVX computational 256-bit packed double precision floating-point instructions retired; some instructions will count twice as noted below.  Each count represents 4 computation operations, one for each element.  Applies to SSE* and AVX* packed double precision floating-point instructions: ADD SUB MUL DIV MIN MAX RCP14 RSQRT14 SQRT DPP FM(N)ADD/SUB.  DPP and FM(N)ADD/SUB instructions count twice as they perform 2 calculations per element.",
         "Counter": "0,1,2,3",
         "CounterHTOff": "0,1,2,3,4,5,6,7",
         "EventCode": "0xC7",
-        "EventName": "FP_ARITH_INST_RETIRED.512B_PACKED_DOUBLE",
+        "EventName": "FP_ARITH_INST_RETIRED.256B_PACKED_DOUBLE",
         "SampleAfterValue": "2000003",
-        "UMask": "0x40"
+        "UMask": "0x10"
     },
     {
         "BriefDescription": "Number of SSE/AVX computational 256-bit packed single precision floating-point instructions retired; some instructions will count twice as noted below.  Each count represents 8 computation operations, one for each element.  Applies to SSE* and AVX* packed single precision floating-point instructions: ADD SUB MUL DIV MIN MAX RCP14 RSQRT14 SQRT DPP FM(N)ADD/SUB.  DPP and FM(N)ADD/SUB instructions count twice as they perform 2 calculations per element.",
@@ -36,13 +36,13 @@
         "UMask": "0x20"
     },
     {
-        "BriefDescription": "Number of SSE/AVX computational scalar double precision floating-point instructions retired; some instructions will count twice as noted below.  Each count represents 1 computation. Applies to SSE* and AVX* scalar double precision floating-point instructions: ADD SUB MUL DIV MIN MAX RCP14 RSQRT14 SQRT DPP FM(N)ADD/SUB.  DPP and FM(N)ADD/SUB instructions count twice as they perform 2 calculations per element.",
+        "BriefDescription": "Number of SSE/AVX computational 512-bit packed double precision floating-point instructions retired; some instructions will count twice as noted below.  Each count represents 8 computation operations, one for each element.  Applies to SSE* and AVX* packed double precision floating-point instructions: ADD SUB MUL DIV MIN MAX RCP14 RSQRT14 SQRT DPP FM(N)ADD/SUB.  DPP and FM(N)ADD/SUB instructions count twice as they perform 2 calculations per element.",
         "Counter": "0,1,2,3",
         "CounterHTOff": "0,1,2,3,4,5,6,7",
         "EventCode": "0xC7",
-        "EventName": "FP_ARITH_INST_RETIRED.SCALAR_DOUBLE",
+        "EventName": "FP_ARITH_INST_RETIRED.512B_PACKED_DOUBLE",
         "SampleAfterValue": "2000003",
-        "UMask": "0x1"
+        "UMask": "0x40"
     },
     {
         "BriefDescription": "Number of SSE/AVX computational 512-bit packed single precision floating-point instructions retired; some instructions will count twice as noted below.  Each count represents 16 computation operations, one for each element.  Applies to SSE* and AVX* packed single precision floating-point instructions: ADD SUB MUL DIV MIN MAX RCP14 RSQRT14 SQRT DPP FM(N)ADD/SUB.  DPP and FM(N)ADD/SUB instructions count twice as they perform 2 calculations per element.",
@@ -54,32 +54,32 @@
         "UMask": "0x80"
     },
     {
-        "BriefDescription": "Cycles with any input/output SSE or FP assist",
+        "BriefDescription": "Number of SSE/AVX computational scalar double precision floating-point instructions retired; some instructions will count twice as noted below.  Each count represents 1 computation. Applies to SSE* and AVX* scalar double precision floating-point instructions: ADD SUB MUL DIV MIN MAX RCP14 RSQRT14 SQRT DPP FM(N)ADD/SUB.  DPP and FM(N)ADD/SUB instructions count twice as they perform 2 calculations per element.",
         "Counter": "0,1,2,3",
         "CounterHTOff": "0,1,2,3,4,5,6,7",
-        "CounterMask": "1",
-        "EventCode": "0xCA",
-        "EventName": "FP_ASSIST.ANY",
-        "PublicDescription": "Counts cycles with any input and output SSE or x87 FP assist. If an input and output assist are detected on the same cycle the event increments by 1.",
-        "SampleAfterValue": "100003",
-        "UMask": "0x1e"
+        "EventCode": "0xC7",
+        "EventName": "FP_ARITH_INST_RETIRED.SCALAR_DOUBLE",
+        "SampleAfterValue": "2000003",
+        "UMask": "0x1"
     },
     {
-        "BriefDescription": "Number of SSE/AVX computational 128-bit packed double precision floating-point instructions retired; some instructions will count twice as noted below.  Each count represents 2 computation operations, one for each element.  Applies to SSE* and AVX* packed double precision floating-point instructions: ADD SUB HADD HSUB SUBADD MUL DIV MIN MAX SQRT RSQRT14 RCP14 DPP FM(N)ADD/SUB.  DPP and FM(N)ADD/SUB instructions count twice as they perform 2 calculations per element.",
+        "BriefDescription": "Number of SSE/AVX computational scalar single precision floating-point instructions retired; some instructions will count twice as noted below.  Each count represents 1 computation. Applies to SSE* and AVX* scalar single precision floating-point instructions: ADD SUB MUL DIV MIN MAX RCP14 RSQRT14 SQRT DPP FM(N)ADD/SUB.  DPP and FM(N)ADD/SUB instructions count twice as they perform 2 calculations per element.",
         "Counter": "0,1,2,3",
         "CounterHTOff": "0,1,2,3,4,5,6,7",
         "EventCode": "0xC7",
-        "EventName": "FP_ARITH_INST_RETIRED.128B_PACKED_DOUBLE",
+        "EventName": "FP_ARITH_INST_RETIRED.SCALAR_SINGLE",
         "SampleAfterValue": "2000003",
-        "UMask": "0x4"
+        "UMask": "0x2"
     },
     {
-        "BriefDescription": "Number of SSE/AVX computational 256-bit packed double precision floating-point instructions retired; some instructions will count twice as noted below.  Each count represents 4 computation operations, one for each element.  Applies to SSE* and AVX* packed double precision floating-point instructions: ADD SUB MUL DIV MIN MAX RCP14 RSQRT14 SQRT DPP FM(N)ADD/SUB.  DPP and FM(N)ADD/SUB instructions count twice as they perform 2 calculations per element.",
+        "BriefDescription": "Cycles with any input/output SSE or FP assist",
         "Counter": "0,1,2,3",
         "CounterHTOff": "0,1,2,3,4,5,6,7",
-        "EventCode": "0xC7",
-        "EventName": "FP_ARITH_INST_RETIRED.256B_PACKED_DOUBLE",
-        "SampleAfterValue": "2000003",
-        "UMask": "0x10"
+        "CounterMask": "1",
+        "EventCode": "0xCA",
+        "EventName": "FP_ASSIST.ANY",
+        "PublicDescription": "Counts cycles with any input and output SSE or x87 FP assist. If an input and output assist are detected on the same cycle the event increments by 1.",
+        "SampleAfterValue": "100003",
+        "UMask": "0x1e"
     }
 ]
 \ No newline at end of file
diff --git a/tools/perf/pmu-events/arch/x86/cascadelakex/frontend.json b/tools/perf/pmu-events/arch/x86/cascadelakex/frontend.json
index 0716b2e3ff75..078706a50091 100644
--- a/tools/perf/pmu-events/arch/x86/cascadelakex/frontend.json
+++ b/tools/perf/pmu-events/arch/x86/cascadelakex/frontend.json
@@ -1,210 +1,125 @@
 [
     {
-        "BriefDescription": "Cycles per thread when 4 or more uops are not delivered to Resource Allocation Table (RAT) when backend of the machine is not stalled",
+        "BriefDescription": "Counts the total number when the front end is resteered, mainly when the BPU cannot provide a correct prediction and this is corrected by other branch handling mechanisms at the front end.",
         "Counter": "0,1,2,3",
         "CounterHTOff": "0,1,2,3,4,5,6,7",
-        "CounterMask": "4",
-        "EventCode": "0x9C",
-        "EventName": "IDQ_UOPS_NOT_DELIVERED.CYCLES_0_UOPS_DELIV.CORE",
-        "PublicDescription": "Counts, on the per-thread basis, cycles when no uops are delivered to Resource Allocation Table (RAT). IDQ_Uops_Not_Delivered.core =4.",
-        "SampleAfterValue": "2000003",
+        "EventCode": "0xE6",
+        "EventName": "BACLEARS.ANY",
+        "PublicDescription": "Counts the number of times the front-end is resteered when it finds a branch instruction in a fetch line. This occurs for the first time a branch instruction is fetched or when the branch is not tracked by the BPU (Branch Prediction Unit) anymore.",
+        "SampleAfterValue": "100003",
         "UMask": "0x1"
     },
     {
-        "BriefDescription": "Retired Instructions who experienced Instruction L2 Cache true miss.",
+        "BriefDescription": "Decode Stream Buffer (DSB)-to-MITE switches",
         "Counter": "0,1,2,3",
-        "CounterHTOff": "0,1,2,3",
-        "EventCode": "0xC6",
-        "EventName": "FRONTEND_RETIRED.L2_MISS",
-        "MSRIndex": "0x3F7",
-        "MSRValue": "0x13",
-        "PEBS": "1",
-        "SampleAfterValue": "100007",
-        "TakenAlone": "1",
+        "CounterHTOff": "0,1,2,3,4,5,6,7",
+        "EventCode": "0xAB",
+        "EventName": "DSB2MITE_SWITCHES.COUNT",
+        "PublicDescription": "This event counts the number of the Decode Stream Buffer (DSB)-to-MITE switches including all misses because of missing Decode Stream Buffer (DSB) cache and u-arch forced misses.\nNote: Invoking MITE requires two or three cycles delay.",
+        "SampleAfterValue": "2000003",
         "UMask": "0x1"
     },
     {
-        "BriefDescription": "Uops initiated by MITE and delivered to Instruction Decode Queue (IDQ) while Microcode Sequenser (MS) is busy",
+        "BriefDescription": "Decode Stream Buffer (DSB)-to-MITE switch true penalty cycles.",
         "Counter": "0,1,2,3",
         "CounterHTOff": "0,1,2,3,4,5,6,7",
-        "EventCode": "0x79",
-        "EventName": "IDQ.MS_MITE_UOPS",
-        "PublicDescription": "Counts the number of uops initiated by MITE and delivered to Instruction Decode Queue (IDQ) while the Microcode Sequencer (MS) is busy. Counting includes uops that may 'bypass' the IDQ.",
+        "EventCode": "0xAB",
+        "EventName": "DSB2MITE_SWITCHES.PENALTY_CYCLES",
+        "PublicDescription": "Counts Decode Stream Buffer (DSB)-to-MITE switch true penalty cycles. These cycles do not include uops routed through because of the switch itself, for example, when Instruction Decode Queue (IDQ) pre-allocation is unavailable, or Instruction Decode Queue (IDQ) is full. SBD-to-MITE switch true penalty cycles happen after the merge mux (MM) receives Decode Stream Buffer (DSB) Sync-indication until receiving the first MITE uop. MM is placed before Instruction Decode Queue (IDQ) to merge uops being fed from the MITE and Decode Stream Buffer (DSB) paths. Decode Stream Buffer (DSB) inserts the Sync-indication whenever a Decode Stream Buffer (DSB)-to-MITE switch occurs.Penalty: A Decode Stream Buffer (DSB) hit followed by a Decode Stream Buffer (DSB) miss can cost up to six cycles in which no uops are delivered to the IDQ. Most often, such switches from the Decode Stream Buffer (DSB) to the legacy pipeline cost 02 cycles.",
         "SampleAfterValue": "2000003",
-        "UMask": "0x20"
+        "UMask": "0x2"
     },
     {
-        "BriefDescription": "Retired instructions that are fetched after an interval where the front-end had at least 2 bubble-slots for a period of 2 cycles which was not interrupted by a back-end stall.",
+        "BriefDescription": "Retired Instructions who experienced decode stream buffer (DSB - the decoded instruction-cache) miss.",
         "Counter": "0,1,2,3",
         "CounterHTOff": "0,1,2,3",
         "EventCode": "0xC6",
-        "EventName": "FRONTEND_RETIRED.LATENCY_GE_2_BUBBLES_GE_2",
+        "EventName": "FRONTEND_RETIRED.DSB_MISS",
         "MSRIndex": "0x3F7",
-        "MSRValue": "0x200206",
+        "MSRValue": "0x11",
         "PEBS": "1",
+        "PublicDescription": "Counts retired Instructions that experienced DSB (Decode stream buffer i.e. the decoded instruction-cache) miss.",
         "SampleAfterValue": "100007",
         "TakenAlone": "1",
         "UMask": "0x1"
     },
     {
-        "BriefDescription": "Retired instructions that are fetched after an interval where the front-end had at least 3 bubble-slots for a period of 2 cycles which was not interrupted by a back-end stall.",
+        "BriefDescription": "Retired Instructions who experienced iTLB true miss.",
         "Counter": "0,1,2,3",
         "CounterHTOff": "0,1,2,3",
         "EventCode": "0xC6",
-        "EventName": "FRONTEND_RETIRED.LATENCY_GE_2_BUBBLES_GE_3",
+        "EventName": "FRONTEND_RETIRED.ITLB_MISS",
         "MSRIndex": "0x3F7",
-        "MSRValue": "0x300206",
+        "MSRValue": "0x14",
         "PEBS": "1",
+        "PublicDescription": "Counts retired Instructions that experienced iTLB (Instruction TLB) true miss.",
         "SampleAfterValue": "100007",
         "TakenAlone": "1",
         "UMask": "0x1"
     },
     {
-        "BriefDescription": "Retired instructions that are fetched after an interval where the front-end had at least 1 bubble-slot for a period of 2 cycles which was not interrupted by a back-end stall.",
+        "BriefDescription": "Retired Instructions who experienced Instruction L1 Cache true miss.",
         "Counter": "0,1,2,3",
         "CounterHTOff": "0,1,2,3",
         "EventCode": "0xC6",
-        "EventName": "FRONTEND_RETIRED.LATENCY_GE_2_BUBBLES_GE_1",
+        "EventName": "FRONTEND_RETIRED.L1I_MISS",
         "MSRIndex": "0x3F7",
-        "MSRValue": "0x100206",
+        "MSRValue": "0x12",
         "PEBS": "1",
-        "PublicDescription": "Counts retired instructions that are delivered to the back-end after the front-end had at least 1 bubble-slot for a period of 2 cycles. A bubble-slot is an empty issue-pipeline slot while there was no RAT stall.",
         "SampleAfterValue": "100007",
         "TakenAlone": "1",
         "UMask": "0x1"
     },
     {
-        "BriefDescription": "Counts cycles FE delivered 4 uops or Resource Allocation Table (RAT) was stalling FE.",
-        "Counter": "0,1,2,3",
-        "CounterHTOff": "0,1,2,3,4,5,6,7",
-        "CounterMask": "1",
-        "EventCode": "0x9C",
-        "EventName": "IDQ_UOPS_NOT_DELIVERED.CYCLES_FE_WAS_OK",
-        "Invert": "1",
-        "SampleAfterValue": "2000003",
-        "UMask": "0x1"
-    },
-    {
-        "BriefDescription": "Uops delivered to Instruction Decode Queue (IDQ) while Microcode Sequenser (MS) is busy",
-        "Counter": "0,1,2,3",
-        "CounterHTOff": "0,1,2,3,4,5,6,7",
-        "EventCode": "0x79",
-        "EventName": "IDQ.MS_UOPS",
-        "PublicDescription": "Counts the total number of uops delivered by the Microcode Sequencer (MS). Any instruction over 4 uops will be delivered by the MS. Some instructions such as transcendentals may additionally generate uops from the MS.",
-        "SampleAfterValue": "2000003",
-        "UMask": "0x30"
-    },
-    {
-        "BriefDescription": "Retired Instructions who experienced Instruction L1 Cache true miss.",
+        "BriefDescription": "Retired Instructions who experienced Instruction L2 Cache true miss.",
         "Counter": "0,1,2,3",
         "CounterHTOff": "0,1,2,3",
         "EventCode": "0xC6",
-        "EventName": "FRONTEND_RETIRED.L1I_MISS",
+        "EventName": "FRONTEND_RETIRED.L2_MISS",
         "MSRIndex": "0x3F7",
-        "MSRValue": "0x12",
+        "MSRValue": "0x13",
         "PEBS": "1",
         "SampleAfterValue": "100007",
         "TakenAlone": "1",
         "UMask": "0x1"
     },
     {
-        "BriefDescription": "Cycles with less than 3 uops delivered by the front end.",
+        "BriefDescription": "Retired instructions after front-end starvation of at least 1 cycle",
         "Counter": "0,1,2,3",
-        "CounterHTOff": "0,1,2,3,4,5,6,7",
-        "CounterMask": "1",
-        "EventCode": "0x9C",
-        "EventName": "IDQ_UOPS_NOT_DELIVERED.CYCLES_LE_3_UOP_DELIV.CORE",
-        "PublicDescription": "Cycles with less than 3 uops delivered by the front-end.",
-        "SampleAfterValue": "2000003",
+        "CounterHTOff": "0,1,2,3",
+        "EventCode": "0xc6",
+        "EventName": "FRONTEND_RETIRED.LATENCY_GE_1",
+        "MSRIndex": "0x3F7",
+        "MSRValue": "0x400106",
+        "PEBS": "2",
+        "PublicDescription": "Retired instructions that are fetched after an interval where the front-end delivered no uops for a period of at least 1 cycle which was not interrupted by a back-end stall.",
+        "SampleAfterValue": "100007",
+        "TakenAlone": "1",
         "UMask": "0x1"
     },
     {
-        "BriefDescription": "Cycles when uops are being delivered to Instruction Decode Queue (IDQ) from Decode Stream Buffer (DSB) path",
-        "Counter": "0,1,2,3",
-        "CounterHTOff": "0,1,2,3,4,5,6,7",
-        "CounterMask": "1",
-        "EventCode": "0x79",
-        "EventName": "IDQ.DSB_CYCLES",
-        "PublicDescription": "Counts cycles during which uops are being delivered to Instruction Decode Queue (IDQ) from the Decode Stream Buffer (DSB) path. Counting includes uops that may 'bypass' the IDQ.",
-        "SampleAfterValue": "2000003",
-        "UMask": "0x8"
-    },
-    {
-        "BriefDescription": "Retired Instructions who experienced decode stream buffer (DSB - the decoded instruction-cache) miss.",
+        "BriefDescription": "Retired instructions that are fetched after an interval where the front-end delivered no uops for a period of 128 cycles which was not interrupted by a back-end stall.",
         "Counter": "0,1,2,3",
         "CounterHTOff": "0,1,2,3",
         "EventCode": "0xC6",
-        "EventName": "FRONTEND_RETIRED.DSB_MISS",
+        "EventName": "FRONTEND_RETIRED.LATENCY_GE_128",
         "MSRIndex": "0x3F7",
-        "MSRValue": "0x11",
+        "MSRValue": "0x408006",
         "PEBS": "1",
-        "PublicDescription": "Counts retired Instructions that experienced DSB (Decode stream buffer i.e. the decoded instruction-cache) miss.",
         "SampleAfterValue": "100007",
         "TakenAlone": "1",
         "UMask": "0x1"
     },
     {
-        "BriefDescription": "Uops delivered to Instruction Decode Queue (IDQ) from MITE path",
-        "Counter": "0,1,2,3",
-        "CounterHTOff": "0,1,2,3,4,5,6,7",
-        "EventCode": "0x79",
-        "EventName": "IDQ.MITE_UOPS",
-        "PublicDescription": "Counts the number of uops delivered to Instruction Decode Queue (IDQ) from the MITE path. Counting includes uops that may 'bypass' the IDQ. This also means that uops are not being delivered from the Decode Stream Buffer (DSB).",
-        "SampleAfterValue": "2000003",
-        "UMask": "0x4"
-    },
-    {
-        "BriefDescription": "Cycles when uops are being delivered to Instruction Decode Queue (IDQ) while Microcode Sequenser (MS) is busy",
-        "Counter": "0,1,2,3",
-        "CounterHTOff": "0,1,2,3,4,5,6,7",
-        "CounterMask": "1",
-        "EventCode": "0x79",
-        "EventName": "IDQ.MS_CYCLES",
-        "PublicDescription": "Counts cycles during which uops are being delivered to Instruction Decode Queue (IDQ) while the Microcode Sequencer (MS) is busy. Counting includes uops that may 'bypass' the IDQ. Uops maybe initiated by Decode Stream Buffer (DSB) or MITE.",
-        "SampleAfterValue": "2000003",
-        "UMask": "0x30"
-    },
-    {
-        "BriefDescription": "Cycles when uops are being delivered to Instruction Decode Queue (IDQ) from MITE path",
-        "Counter": "0,1,2,3",
-        "CounterHTOff": "0,1,2,3,4,5,6,7",
-        "CounterMask": "1",
-        "EventCode": "0x79",
-        "EventName": "IDQ.MITE_CYCLES",
-        "PublicDescription": "Counts cycles during which uops are being delivered to Instruction Decode Queue (IDQ) from the MITE path. Counting includes uops that may 'bypass' the IDQ.",
-        "SampleAfterValue": "2000003",
-        "UMask": "0x4"
-    },
-    {
-        "BriefDescription": "Uops not delivered to Resource Allocation Table (RAT) per thread when backend of the machine is not stalled",
-        "Counter": "0,1,2,3",
-        "CounterHTOff": "0,1,2,3,4,5,6,7",
-        "EventCode": "0x9C",
-        "EventName": "IDQ_UOPS_NOT_DELIVERED.CORE",
-        "PublicDescription": "Counts the number of uops not delivered to Resource Allocation Table (RAT) per thread adding 4  x when Resource Allocation Table (RAT) is not stalled and Instruction Decode Queue (IDQ) delivers x uops to Resource Allocation Table (RAT) (where x belongs to {0,1,2,3}). Counting does not cover cases when: a. IDQ-Resource Allocation Table (RAT) pipe serves the other thread. b. Resource Allocation Table (RAT) is stalled for the thread (including uop drops and clear BE conditions).  c. Instruction Decode Queue (IDQ) delivers four uops.",
-        "SampleAfterValue": "2000003",
-        "UMask": "0x1"
-    },
-    {
-        "BriefDescription": "Decode Stream Buffer (DSB)-to-MITE switch true penalty cycles.",
-        "Counter": "0,1,2,3",
-        "CounterHTOff": "0,1,2,3,4,5,6,7",
-        "EventCode": "0xAB",
-        "EventName": "DSB2MITE_SWITCHES.PENALTY_CYCLES",
-        "PublicDescription": "Counts Decode Stream Buffer (DSB)-to-MITE switch true penalty cycles. These cycles do not include uops routed through because of the switch itself, for example, when Instruction Decode Queue (IDQ) pre-allocation is unavailable, or Instruction Decode Queue (IDQ) is full. SBD-to-MITE switch true penalty cycles happen after the merge mux (MM) receives Decode Stream Buffer (DSB) Sync-indication until receiving the first MITE uop. MM is placed before Instruction Decode Queue (IDQ) to merge uops being fed from the MITE and Decode Stream Buffer (DSB) paths. Decode Stream Buffer (DSB) inserts the Sync-indication whenever a Decode Stream Buffer (DSB)-to-MITE switch occurs.Penalty: A Decode Stream Buffer (DSB) hit followed by a Decode Stream Buffer (DSB) miss can cost up to six cycles in which no uops are delivered to the IDQ. Most often, such switches from the Decode Stream Buffer (DSB) to the legacy pipeline cost 02 cycles.",
-        "SampleAfterValue": "2000003",
-        "UMask": "0x2"
-    },
-    {
-        "BriefDescription": "Retired instructions that are fetched after an interval where the front-end delivered no uops for a period of 8 cycles which was not interrupted by a back-end stall.",
+        "BriefDescription": "Retired instructions that are fetched after an interval where the front-end delivered no uops for a period of 16 cycles which was not interrupted by a back-end stall.",
         "Counter": "0,1,2,3",
         "CounterHTOff": "0,1,2,3",
         "EventCode": "0xC6",
-        "EventName": "FRONTEND_RETIRED.LATENCY_GE_8",
+        "EventName": "FRONTEND_RETIRED.LATENCY_GE_16",
         "MSRIndex": "0x3F7",
-        "MSRValue": "0x400806",
+        "MSRValue": "0x401006",
         "PEBS": "1",
-        "PublicDescription": "Counts retired instructions that are delivered to the back-end after a front-end stall of at least 8 cycles. During this period the front-end delivered no uops.",
+        "PublicDescription": "Counts retired instructions that are delivered to the back-end after a front-end stall of at least 16 cycles. During this period the front-end delivered no uops.",
         "SampleAfterValue": "100007",
         "TakenAlone": "1",
         "UMask": "0x1"
@@ -223,93 +138,96 @@
         "UMask": "0x1"
     },
     {
-        "BriefDescription": "Retired instructions that are fetched after an interval where the front-end delivered no uops for a period of 4 cycles which was not interrupted by a back-end stall.",
+        "BriefDescription": "Retired instructions that are fetched after an interval where the front-end delivered no uops for a period of 256 cycles which was not interrupted by a back-end stall.",
         "Counter": "0,1,2,3",
         "CounterHTOff": "0,1,2,3",
         "EventCode": "0xC6",
-        "EventName": "FRONTEND_RETIRED.LATENCY_GE_4",
+        "EventName": "FRONTEND_RETIRED.LATENCY_GE_256",
         "MSRIndex": "0x3F7",
-        "MSRValue": "0x400406",
+        "MSRValue": "0x410006",
         "PEBS": "1",
         "SampleAfterValue": "100007",
         "TakenAlone": "1",
         "UMask": "0x1"
     },
     {
-        "BriefDescription": "Cycles when uops initiated by Decode Stream Buffer (DSB) are being delivered to Instruction Decode Queue (IDQ) while Microcode Sequenser (MS) is busy",
-        "Counter": "0,1,2,3",
-        "CounterHTOff": "0,1,2,3,4,5,6,7",
-        "CounterMask": "1",
-        "EventCode": "0x79",
-        "EventName": "IDQ.MS_DSB_CYCLES",
-        "PublicDescription": "Counts cycles during which uops initiated by Decode Stream Buffer (DSB) are being delivered to Instruction Decode Queue (IDQ) while the Microcode Sequencer (MS) is busy. Counting includes uops that may 'bypass' the IDQ.",
-        "SampleAfterValue": "2000003",
-        "UMask": "0x10"
-    },
-    {
-        "BriefDescription": "Decode Stream Buffer (DSB)-to-MITE switches",
+        "BriefDescription": "Retired instructions that are fetched after an interval where the front-end had at least 1 bubble-slot for a period of 2 cycles which was not interrupted by a back-end stall.",
         "Counter": "0,1,2,3",
-        "CounterHTOff": "0,1,2,3,4,5,6,7",
-        "EventCode": "0xAB",
-        "EventName": "DSB2MITE_SWITCHES.COUNT",
-        "PublicDescription": "This event counts the number of the Decode Stream Buffer (DSB)-to-MITE switches including all misses because of missing Decode Stream Buffer (DSB) cache and u-arch forced misses.\nNote: Invoking MITE requires two or three cycles delay.",
-        "SampleAfterValue": "2000003",
+        "CounterHTOff": "0,1,2,3",
+        "EventCode": "0xC6",
+        "EventName": "FRONTEND_RETIRED.LATENCY_GE_2_BUBBLES_GE_1",
+        "MSRIndex": "0x3F7",
+        "MSRValue": "0x100206",
+        "PEBS": "1",
+        "PublicDescription": "Counts retired instructions that are delivered to the back-end after the front-end had at least 1 bubble-slot for a period of 2 cycles. A bubble-slot is an empty issue-pipeline slot while there was no RAT stall.",
+        "SampleAfterValue": "100007",
+        "TakenAlone": "1",
         "UMask": "0x1"
     },
     {
-        "BriefDescription": "Retired instructions after front-end starvation of at least 1 cycle",
-        "Counter": "0,1,2,3,4,5,6,7",
-        "CounterHTOff": "0,1,2,3,4,5,6,7",
-        "EventCode": "0xc6",
-        "EventName": "FRONTEND_RETIRED.LATENCY_GE_1",
+        "BriefDescription": "Retired instructions that are fetched after an interval where the front-end had at least 2 bubble-slots for a period of 2 cycles which was not interrupted by a back-end stall.",
+        "Counter": "0,1,2,3",
+        "CounterHTOff": "0,1,2,3",
+        "EventCode": "0xC6",
+        "EventName": "FRONTEND_RETIRED.LATENCY_GE_2_BUBBLES_GE_2",
         "MSRIndex": "0x3F7",
-        "MSRValue": "0x400106",
-        "PEBS": "2",
-        "PublicDescription": "Retired instructions that are fetched after an interval where the front-end delivered no uops for a period of at least 1 cycle which was not interrupted by a back-end stall.",
+        "MSRValue": "0x200206",
+        "PEBS": "1",
         "SampleAfterValue": "100007",
         "TakenAlone": "1",
         "UMask": "0x1"
     },
     {
-        "BriefDescription": "Instruction fetch tag lookups that miss in the instruction cache (L1I). Counts at 64-byte cache-line granularity.",
+        "BriefDescription": "Retired instructions that are fetched after an interval where the front-end had at least 3 bubble-slots for a period of 2 cycles which was not interrupted by a back-end stall.",
         "Counter": "0,1,2,3",
-        "CounterHTOff": "0,1,2,3,4,5,6,7",
-        "EventCode": "0x83",
-        "EventName": "ICACHE_64B.IFTAG_MISS",
-        "SampleAfterValue": "200003",
-        "UMask": "0x2"
+        "CounterHTOff": "0,1,2,3",
+        "EventCode": "0xC6",
+        "EventName": "FRONTEND_RETIRED.LATENCY_GE_2_BUBBLES_GE_3",
+        "MSRIndex": "0x3F7",
+        "MSRValue": "0x300206",
+        "PEBS": "1",
+        "SampleAfterValue": "100007",
+        "TakenAlone": "1",
+        "UMask": "0x1"
     },
     {
-        "BriefDescription": "Retired instructions that are fetched after an interval where the front-end delivered no uops for a period of 128 cycles which was not interrupted by a back-end stall.",
+        "BriefDescription": "Retired instructions that are fetched after an interval where the front-end delivered no uops for a period of 32 cycles which was not interrupted by a back-end stall.",
         "Counter": "0,1,2,3",
         "CounterHTOff": "0,1,2,3",
         "EventCode": "0xC6",
-        "EventName": "FRONTEND_RETIRED.LATENCY_GE_128",
+        "EventName": "FRONTEND_RETIRED.LATENCY_GE_32",
         "MSRIndex": "0x3F7",
-        "MSRValue": "0x408006",
+        "MSRValue": "0x402006",
         "PEBS": "1",
+        "PublicDescription": "Counts retired instructions that are delivered to the back-end after a front-end stall of at least 32 cycles. During this period the front-end delivered no uops.",
         "SampleAfterValue": "100007",
         "TakenAlone": "1",
         "UMask": "0x1"
     },
     {
-        "BriefDescription": "Cycles per thread when 3 or more uops are not delivered to Resource Allocation Table (RAT) when backend of the machine is not stalled",
+        "BriefDescription": "Retired instructions that are fetched after an interval where the front-end delivered no uops for a period of 4 cycles which was not interrupted by a back-end stall.",
         "Counter": "0,1,2,3",
-        "CounterHTOff": "0,1,2,3,4,5,6,7",
-        "CounterMask": "3",
-        "EventCode": "0x9C",
-        "EventName": "IDQ_UOPS_NOT_DELIVERED.CYCLES_LE_1_UOP_DELIV.CORE",
-        "PublicDescription": "Counts, on the per-thread basis, cycles when less than 1 uop is delivered to Resource Allocation Table (RAT). IDQ_Uops_Not_Delivered.core >= 3.",
-        "SampleAfterValue": "2000003",
+        "CounterHTOff": "0,1,2,3",
+        "EventCode": "0xC6",
+        "EventName": "FRONTEND_RETIRED.LATENCY_GE_4",
+        "MSRIndex": "0x3F7",
+        "MSRValue": "0x400406",
+        "PEBS": "1",
+        "SampleAfterValue": "100007",
+        "TakenAlone": "1",
         "UMask": "0x1"
     },
     {
-        "BriefDescription": "Instruction fetch tag lookups that hit in the instruction cache (L1I). Counts at 64-byte cache-line granularity.",
+        "BriefDescription": "Retired instructions that are fetched after an interval where the front-end delivered no uops for a period of 512 cycles which was not interrupted by a back-end stall.",
         "Counter": "0,1,2,3",
-        "CounterHTOff": "0,1,2,3,4,5,6,7",
-        "EventCode": "0x83",
-        "EventName": "ICACHE_64B.IFTAG_HIT",
-        "SampleAfterValue": "200003",
+        "CounterHTOff": "0,1,2,3",
+        "EventCode": "0xC6",
+        "EventName": "FRONTEND_RETIRED.LATENCY_GE_512",
+        "MSRIndex": "0x3F7",
+        "MSRValue": "0x420006",
+        "PEBS": "1",
+        "SampleAfterValue": "100007",
+        "TakenAlone": "1",
         "UMask": "0x1"
     },
     {
@@ -326,38 +244,29 @@
         "UMask": "0x1"
     },
     {
-        "BriefDescription": "Retired Instructions who experienced STLB (2nd level TLB) true miss.",
+        "BriefDescription": "Retired instructions that are fetched after an interval where the front-end delivered no uops for a period of 8 cycles which was not interrupted by a back-end stall.",
         "Counter": "0,1,2,3",
         "CounterHTOff": "0,1,2,3",
         "EventCode": "0xC6",
-        "EventName": "FRONTEND_RETIRED.STLB_MISS",
+        "EventName": "FRONTEND_RETIRED.LATENCY_GE_8",
         "MSRIndex": "0x3F7",
-        "MSRValue": "0x15",
+        "MSRValue": "0x400806",
         "PEBS": "1",
-        "PublicDescription": "Counts retired Instructions that experienced STLB (2nd level TLB) true miss.",
+        "PublicDescription": "Counts retired instructions that are delivered to the back-end after a front-end stall of at least 8 cycles. During this period the front-end delivered no uops.",
         "SampleAfterValue": "100007",
         "TakenAlone": "1",
         "UMask": "0x1"
     },
     {
-        "BriefDescription": "Uops delivered to Instruction Decode Queue (IDQ) from the Decode Stream Buffer (DSB) path",
-        "Counter": "0,1,2,3",
-        "CounterHTOff": "0,1,2,3,4,5,6,7",
-        "EventCode": "0x79",
-        "EventName": "IDQ.DSB_UOPS",
-        "PublicDescription": "Counts the number of uops delivered to Instruction Decode Queue (IDQ) from the Decode Stream Buffer (DSB) path. Counting includes uops that may 'bypass' the IDQ.",
-        "SampleAfterValue": "2000003",
-        "UMask": "0x8"
-    },
-    {
-        "BriefDescription": "Retired instructions that are fetched after an interval where the front-end delivered no uops for a period of 256 cycles which was not interrupted by a back-end stall.",
+        "BriefDescription": "Retired Instructions who experienced STLB (2nd level TLB) true miss.",
         "Counter": "0,1,2,3",
         "CounterHTOff": "0,1,2,3",
         "EventCode": "0xC6",
-        "EventName": "FRONTEND_RETIRED.LATENCY_GE_256",
+        "EventName": "FRONTEND_RETIRED.STLB_MISS",
         "MSRIndex": "0x3F7",
-        "MSRValue": "0x410006",
+        "MSRValue": "0x15",
         "PEBS": "1",
+        "PublicDescription": "Counts retired Instructions that experienced STLB (2nd level TLB) true miss.",
         "SampleAfterValue": "100007",
         "TakenAlone": "1",
         "UMask": "0x1"
@@ -373,6 +282,66 @@
         "UMask": "0x4"
     },
     {
+        "BriefDescription": "Instruction fetch tag lookups that hit in the instruction cache (L1I). Counts at 64-byte cache-line granularity.",
+        "Counter": "0,1,2,3",
+        "CounterHTOff": "0,1,2,3,4,5,6,7",
+        "EventCode": "0x83",
+        "EventName": "ICACHE_64B.IFTAG_HIT",
+        "SampleAfterValue": "200003",
+        "UMask": "0x1"
+    },
+    {
+        "BriefDescription": "Instruction fetch tag lookups that miss in the instruction cache (L1I). Counts at 64-byte cache-line granularity.",
+        "Counter": "0,1,2,3",
+        "CounterHTOff": "0,1,2,3,4,5,6,7",
+        "EventCode": "0x83",
+        "EventName": "ICACHE_64B.IFTAG_MISS",
+        "SampleAfterValue": "200003",
+        "UMask": "0x2"
+    },
+    {
+        "BriefDescription": "Cycles where a code fetch is stalled due to L1 instruction cache tag miss.",
+        "Counter": "0,1,2,3",
+        "CounterHTOff": "0,1,2,3,4,5,6,7",
+        "EventCode": "0x83",
+        "EventName": "ICACHE_64B.IFTAG_STALL",
+        "SampleAfterValue": "200003",
+        "UMask": "0x4"
+    },
+    {
+        "BriefDescription": "Cycles Decode Stream Buffer (DSB) is delivering 4 Uops",
+        "Counter": "0,1,2,3",
+        "CounterHTOff": "0,1,2,3,4,5,6,7",
+        "CounterMask": "4",
+        "EventCode": "0x79",
+        "EventName": "IDQ.ALL_DSB_CYCLES_4_UOPS",
+        "PublicDescription": "Counts the number of cycles 4 uops were delivered to Instruction Decode Queue (IDQ) from the Decode Stream Buffer (DSB) path. Count includes uops that may 'bypass' the IDQ.",
+        "SampleAfterValue": "2000003",
+        "UMask": "0x18"
+    },
+    {
+        "BriefDescription": "Cycles Decode Stream Buffer (DSB) is delivering any Uop",
+        "Counter": "0,1,2,3",
+        "CounterHTOff": "0,1,2,3,4,5,6,7",
+        "CounterMask": "1",
+        "EventCode": "0x79",
+        "EventName": "IDQ.ALL_DSB_CYCLES_ANY_UOPS",
+        "PublicDescription": "Counts the number of cycles uops were delivered to Instruction Decode Queue (IDQ) from the Decode Stream Buffer (DSB) path. Count includes uops that may 'bypass' the IDQ.",
+        "SampleAfterValue": "2000003",
+        "UMask": "0x18"
+    },
+    {
+        "BriefDescription": "Cycles MITE is delivering 4 Uops",
+        "Counter": "0,1,2,3",
+        "CounterHTOff": "0,1,2,3,4,5,6,7",
+        "CounterMask": "4",
+        "EventCode": "0x79",
+        "EventName": "IDQ.ALL_MITE_CYCLES_4_UOPS",
+        "PublicDescription": "Counts the number of cycles 4 uops were delivered to the Instruction Decode Queue (IDQ) from the MITE (legacy decode pipeline) path. Counting includes uops that may 'bypass' the IDQ. During these cycles uops are not being delivered from the Decode Stream Buffer (DSB).",
+        "SampleAfterValue": "2000003",
+        "UMask": "0x24"
+    },
+    {
         "BriefDescription": "Cycles MITE is delivering any Uop",
         "Counter": "0,1,2,3",
         "CounterHTOff": "0,1,2,3,4,5,6,7",
@@ -384,53 +353,78 @@
         "UMask": "0x24"
     },
     {
-        "BriefDescription": "Counts the total number when the front end is resteered, mainly when the BPU cannot provide a correct prediction and this is corrected by other branch handling mechanisms at the front end.",
+        "BriefDescription": "Cycles when uops are being delivered to Instruction Decode Queue (IDQ) from Decode Stream Buffer (DSB) path",
         "Counter": "0,1,2,3",
         "CounterHTOff": "0,1,2,3,4,5,6,7",
-        "EventCode": "0xE6",
-        "EventName": "BACLEARS.ANY",
-        "PublicDescription": "Counts the number of times the front-end is resteered when it finds a branch instruction in a fetch line. This occurs for the first time a branch instruction is fetched or when the branch is not tracked by the BPU (Branch Prediction Unit) anymore.",
-        "SampleAfterValue": "100003",
-        "UMask": "0x1"
+        "CounterMask": "1",
+        "EventCode": "0x79",
+        "EventName": "IDQ.DSB_CYCLES",
+        "PublicDescription": "Counts cycles during which uops are being delivered to Instruction Decode Queue (IDQ) from the Decode Stream Buffer (DSB) path. Counting includes uops that may 'bypass' the IDQ.",
+        "SampleAfterValue": "2000003",
+        "UMask": "0x8"
     },
     {
-        "BriefDescription": "Retired instructions that are fetched after an interval where the front-end delivered no uops for a period of 16 cycles which was not interrupted by a back-end stall.",
+        "BriefDescription": "Uops delivered to Instruction Decode Queue (IDQ) from the Decode Stream Buffer (DSB) path",
         "Counter": "0,1,2,3",
-        "CounterHTOff": "0,1,2,3",
-        "EventCode": "0xC6",
-        "EventName": "FRONTEND_RETIRED.LATENCY_GE_16",
-        "MSRIndex": "0x3F7",
-        "MSRValue": "0x401006",
-        "PEBS": "1",
-        "PublicDescription": "Counts retired instructions that are delivered to the back-end after a front-end stall of at least 16 cycles. During this period the front-end delivered no uops.",
-        "SampleAfterValue": "100007",
-        "TakenAlone": "1",
-        "UMask": "0x1"
+        "CounterHTOff": "0,1,2,3,4,5,6,7",
+        "EventCode": "0x79",
+        "EventName": "IDQ.DSB_UOPS",
+        "PublicDescription": "Counts the number of uops delivered to Instruction Decode Queue (IDQ) from the Decode Stream Buffer (DSB) path. Counting includes uops that may 'bypass' the IDQ.",
+        "SampleAfterValue": "2000003",
+        "UMask": "0x8"
     },
     {
-        "BriefDescription": "Cycles with less than 2 uops delivered by the front end.",
+        "BriefDescription": "Cycles when uops are being delivered to Instruction Decode Queue (IDQ) from MITE path",
         "Counter": "0,1,2,3",
         "CounterHTOff": "0,1,2,3,4,5,6,7",
-        "CounterMask": "2",
-        "EventCode": "0x9C",
-        "EventName": "IDQ_UOPS_NOT_DELIVERED.CYCLES_LE_2_UOP_DELIV.CORE",
-        "PublicDescription": "Cycles with less than 2 uops delivered by the front-end.",
+        "CounterMask": "1",
+        "EventCode": "0x79",
+        "EventName": "IDQ.MITE_CYCLES",
+        "PublicDescription": "Counts cycles during which uops are being delivered to Instruction Decode Queue (IDQ) from the MITE path. Counting includes uops that may 'bypass' the IDQ.",
         "SampleAfterValue": "2000003",
-        "UMask": "0x1"
+        "UMask": "0x4"
     },
     {
-        "BriefDescription": "Retired instructions that are fetched after an interval where the front-end delivered no uops for a period of 32 cycles which was not interrupted by a back-end stall.",
+        "BriefDescription": "Uops delivered to Instruction Decode Queue (IDQ) from MITE path",
         "Counter": "0,1,2,3",
-        "CounterHTOff": "0,1,2,3",
-        "EventCode": "0xC6",
-        "EventName": "FRONTEND_RETIRED.LATENCY_GE_32",
-        "MSRIndex": "0x3F7",
-        "MSRValue": "0x402006",
-        "PEBS": "1",
-        "PublicDescription": "Counts retired instructions that are delivered to the back-end after a front-end stall of at least 32 cycles. During this period the front-end delivered no uops.",
-        "SampleAfterValue": "100007",
-        "TakenAlone": "1",
-        "UMask": "0x1"
+        "CounterHTOff": "0,1,2,3,4,5,6,7",
+        "EventCode": "0x79",
+        "EventName": "IDQ.MITE_UOPS",
+        "PublicDescription": "Counts the number of uops delivered to Instruction Decode Queue (IDQ) from the MITE path. Counting includes uops that may 'bypass' the IDQ. This also means that uops are not being delivered from the Decode Stream Buffer (DSB).",
+        "SampleAfterValue": "2000003",
+        "UMask": "0x4"
+    },
+    {
+        "BriefDescription": "Cycles when uops are being delivered to Instruction Decode Queue (IDQ) while Microcode Sequenser (MS) is busy",
+        "Counter": "0,1,2,3",
+        "CounterHTOff": "0,1,2,3,4,5,6,7",
+        "CounterMask": "1",
+        "EventCode": "0x79",
+        "EventName": "IDQ.MS_CYCLES",
+        "PublicDescription": "Counts cycles during which uops are being delivered to Instruction Decode Queue (IDQ) while the Microcode Sequencer (MS) is busy. Counting includes uops that may 'bypass' the IDQ. Uops maybe initiated by Decode Stream Buffer (DSB) or MITE.",
+        "SampleAfterValue": "2000003",
+        "UMask": "0x30"
+    },
+    {
+        "BriefDescription": "Cycles when uops initiated by Decode Stream Buffer (DSB) are being delivered to Instruction Decode Queue (IDQ) while Microcode Sequenser (MS) is busy",
+        "Counter": "0,1,2,3",
+        "CounterHTOff": "0,1,2,3,4,5,6,7",
+        "CounterMask": "1",
+        "EventCode": "0x79",
+        "EventName": "IDQ.MS_DSB_CYCLES",
+        "PublicDescription": "Counts cycles during which uops initiated by Decode Stream Buffer (DSB) are being delivered to Instruction Decode Queue (IDQ) while the Microcode Sequencer (MS) is busy. Counting includes uops that may 'bypass' the IDQ.",
+        "SampleAfterValue": "2000003",
+        "UMask": "0x10"
+    },
+    {
+        "BriefDescription": "Uops initiated by MITE and delivered to Instruction Decode Queue (IDQ) while Microcode Sequenser (MS) is busy",
+        "Counter": "0,1,2,3",
+        "CounterHTOff": "0,1,2,3,4,5,6,7",
+        "EventCode": "0x79",
+        "EventName": "IDQ.MS_MITE_UOPS",
+        "PublicDescription": "Counts the number of uops initiated by MITE and delivered to Instruction Decode Queue (IDQ) while the Microcode Sequencer (MS) is busy. Counting includes uops that may 'bypass' the IDQ.",
+        "SampleAfterValue": "2000003",
+        "UMask": "0x20"
     },
     {
         "BriefDescription": "Number of switches from DSB (Decode Stream Buffer) or MITE (legacy decode pipeline) to the Microcode Sequencer",
@@ -445,72 +439,78 @@
         "UMask": "0x30"
     },
     {
-        "BriefDescription": "Retired Instructions who experienced iTLB true miss.",
+        "BriefDescription": "Uops delivered to Instruction Decode Queue (IDQ) while Microcode Sequenser (MS) is busy",
         "Counter": "0,1,2,3",
-        "CounterHTOff": "0,1,2,3",
-        "EventCode": "0xC6",
-        "EventName": "FRONTEND_RETIRED.ITLB_MISS",
-        "MSRIndex": "0x3F7",
-        "MSRValue": "0x14",
-        "PEBS": "1",
-        "PublicDescription": "Counts retired Instructions that experienced iTLB (Instruction TLB) true miss.",
-        "SampleAfterValue": "100007",
-        "TakenAlone": "1",
+        "CounterHTOff": "0,1,2,3,4,5,6,7",
+        "EventCode": "0x79",
+        "EventName": "IDQ.MS_UOPS",
+        "PublicDescription": "Counts the total number of uops delivered by the Microcode Sequencer (MS). Any instruction over 4 uops will be delivered by the MS. Some instructions such as transcendentals may additionally generate uops from the MS.",
+        "SampleAfterValue": "2000003",
+        "UMask": "0x30"
+    },
+    {
+        "BriefDescription": "Uops not delivered to Resource Allocation Table (RAT) per thread when backend of the machine is not stalled",
+        "Counter": "0,1,2,3",
+        "CounterHTOff": "0,1,2,3,4,5,6,7",
+        "EventCode": "0x9C",
+        "EventName": "IDQ_UOPS_NOT_DELIVERED.CORE",
+        "PublicDescription": "Counts the number of uops not delivered to Resource Allocation Table (RAT) per thread adding 4  x when Resource Allocation Table (RAT) is not stalled and Instruction Decode Queue (IDQ) delivers x uops to Resource Allocation Table (RAT) (where x belongs to {0,1,2,3}). Counting does not cover cases when: a. IDQ-Resource Allocation Table (RAT) pipe serves the other thread. b. Resource Allocation Table (RAT) is stalled for the thread (including uop drops and clear BE conditions).  c. Instruction Decode Queue (IDQ) delivers four uops.",
+        "SampleAfterValue": "2000003",
         "UMask": "0x1"
     },
     {
-        "BriefDescription": "Cycles Decode Stream Buffer (DSB) is delivering any Uop",
+        "BriefDescription": "Cycles per thread when 4 or more uops are not delivered to Resource Allocation Table (RAT) when backend of the machine is not stalled",
         "Counter": "0,1,2,3",
         "CounterHTOff": "0,1,2,3,4,5,6,7",
-        "CounterMask": "1",
-        "EventCode": "0x79",
-        "EventName": "IDQ.ALL_DSB_CYCLES_ANY_UOPS",
-        "PublicDescription": "Counts the number of cycles uops were delivered to Instruction Decode Queue (IDQ) from the Decode Stream Buffer (DSB) path. Count includes uops that may 'bypass' the IDQ.",
+        "CounterMask": "4",
+        "EventCode": "0x9C",
+        "EventName": "IDQ_UOPS_NOT_DELIVERED.CYCLES_0_UOPS_DELIV.CORE",
+        "PublicDescription": "Counts, on the per-thread basis, cycles when no uops are delivered to Resource Allocation Table (RAT). IDQ_Uops_Not_Delivered.core =4.",
         "SampleAfterValue": "2000003",
-        "UMask": "0x18"
+        "UMask": "0x1"
     },
     {
-        "BriefDescription": "Retired instructions that are fetched after an interval where the front-end delivered no uops for a period of 512 cycles which was not interrupted by a back-end stall.",
+        "BriefDescription": "Counts cycles FE delivered 4 uops or Resource Allocation Table (RAT) was stalling FE.",
         "Counter": "0,1,2,3",
-        "CounterHTOff": "0,1,2,3",
-        "EventCode": "0xC6",
-        "EventName": "FRONTEND_RETIRED.LATENCY_GE_512",
-        "MSRIndex": "0x3F7",
-        "MSRValue": "0x420006",
-        "PEBS": "1",
-        "SampleAfterValue": "100007",
-        "TakenAlone": "1",
+        "CounterHTOff": "0,1,2,3,4,5,6,7",
+        "CounterMask": "1",
+        "EventCode": "0x9C",
+        "EventName": "IDQ_UOPS_NOT_DELIVERED.CYCLES_FE_WAS_OK",
+        "Invert": "1",
+        "SampleAfterValue": "2000003",
         "UMask": "0x1"
     },
     {
-        "BriefDescription": "Cycles MITE is delivering 4 Uops",
+        "BriefDescription": "Cycles per thread when 3 or more uops are not delivered to Resource Allocation Table (RAT) when backend of the machine is not stalled",
         "Counter": "0,1,2,3",
         "CounterHTOff": "0,1,2,3,4,5,6,7",
-        "CounterMask": "4",
-        "EventCode": "0x79",
-        "EventName": "IDQ.ALL_MITE_CYCLES_4_UOPS",
-        "PublicDescription": "Counts the number of cycles 4 uops were delivered to the Instruction Decode Queue (IDQ) from the MITE (legacy decode pipeline) path. Counting includes uops that may 'bypass' the IDQ. During these cycles uops are not being delivered from the Decode Stream Buffer (DSB).",
+        "CounterMask": "3",
+        "EventCode": "0x9C",
+        "EventName": "IDQ_UOPS_NOT_DELIVERED.CYCLES_LE_1_UOP_DELIV.CORE",
+        "PublicDescription": "Counts, on the per-thread basis, cycles when less than 1 uop is delivered to Resource Allocation Table (RAT). IDQ_Uops_Not_Delivered.core >= 3.",
         "SampleAfterValue": "2000003",
-        "UMask": "0x24"
+        "UMask": "0x1"
     },
     {
-        "BriefDescription": "Cycles Decode Stream Buffer (DSB) is delivering 4 Uops",
+        "BriefDescription": "Cycles with less than 2 uops delivered by the front end.",
         "Counter": "0,1,2,3",
         "CounterHTOff": "0,1,2,3,4,5,6,7",
-        "CounterMask": "4",
-        "EventCode": "0x79",
-        "EventName": "IDQ.ALL_DSB_CYCLES_4_UOPS",
-        "PublicDescription": "Counts the number of cycles 4 uops were delivered to Instruction Decode Queue (IDQ) from the Decode Stream Buffer (DSB) path. Count includes uops that may 'bypass' the IDQ.",
+        "CounterMask": "2",
+        "EventCode": "0x9C",
+        "EventName": "IDQ_UOPS_NOT_DELIVERED.CYCLES_LE_2_UOP_DELIV.CORE",
+        "PublicDescription": "Cycles with less than 2 uops delivered by the front-end.",
         "SampleAfterValue": "2000003",
-        "UMask": "0x18"
+        "UMask": "0x1"
     },
     {
-        "BriefDescription": "Cycles where a code fetch is stalled due to L1 instruction cache tag miss.",
+        "BriefDescription": "Cycles with less than 3 uops delivered by the front end.",
         "Counter": "0,1,2,3",
         "CounterHTOff": "0,1,2,3,4,5,6,7",
-        "EventCode": "0x83",
-        "EventName": "ICACHE_64B.IFTAG_STALL",
-        "SampleAfterValue": "200003",
-        "UMask": "0x4"
+        "CounterMask": "1",
+        "EventCode": "0x9C",
+        "EventName": "IDQ_UOPS_NOT_DELIVERED.CYCLES_LE_3_UOP_DELIV.CORE",
+        "PublicDescription": "Cycles with less than 3 uops delivered by the front-end.",
+        "SampleAfterValue": "2000003",
+        "UMask": "0x1"
     }
 ]
 \ No newline at end of file
diff --git a/tools/perf/pmu-events/arch/x86/cascadelakex/memory.json b/tools/perf/pmu-events/arch/x86/cascadelakex/memory.json
index 0c07cb4fbf58..7c2adadca87e 100644
--- a/tools/perf/pmu-events/arch/x86/cascadelakex/memory.json
+++ b/tools/perf/pmu-events/arch/x86/cascadelakex/memory.json
@@ -1,1338 +1,1358 @@
 [
     {
-        "BriefDescription": "OCR.ALL_READS.L3_MISS_LOCAL_DRAM.SNOOP_MISS_OR_NO_FWD OCR.ALL_READS.L3_MISS_LOCAL_DRAM.SNOOP_MISS_OR_NO_FWD",
+        "BriefDescription": "Cycles while L3 cache miss demand load is outstanding.",
         "Counter": "0,1,2,3",
-        "CounterHTOff": "0,1,2,3",
-        "EventCode": "0xB7, 0xBB",
-        "EventName": "OCR.ALL_READS.L3_MISS_LOCAL_DRAM.SNOOP_MISS_OR_NO_FWD",
-        "MSRIndex": "0x1a6,0x1a7",
-        "MSRValue": "0x06040007F7",
-        "Offcore": "1",
-        "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
+        "CounterHTOff": "0,1,2,3,4,5,6,7",
+        "CounterMask": "2",
+        "EventCode": "0xA3",
+        "EventName": "CYCLE_ACTIVITY.CYCLES_L3_MISS",
+        "SampleAfterValue": "2000003",
+        "UMask": "0x2"
+    },
+    {
+        "BriefDescription": "Execution stalls while L3 cache miss demand load is outstanding.",
+        "Counter": "0,1,2,3",
+        "CounterHTOff": "0,1,2,3,4,5,6,7",
+        "CounterMask": "6",
+        "EventCode": "0xA3",
+        "EventName": "CYCLE_ACTIVITY.STALLS_L3_MISS",
+        "SampleAfterValue": "2000003",
+        "UMask": "0x6"
+    },
+    {
+        "BriefDescription": "Number of times an HLE execution aborted due to any reasons (multiple categories may count as one).",
+        "Counter": "0,1,2,3",
+        "CounterHTOff": "0,1,2,3,4,5,6,7",
+        "EventCode": "0xC8",
+        "EventName": "HLE_RETIRED.ABORTED",
+        "PEBS": "1",
+        "PublicDescription": "Number of times HLE abort was triggered.",
+        "SampleAfterValue": "2000003",
+        "UMask": "0x4"
+    },
+    {
+        "BriefDescription": "Number of times an HLE execution aborted due to unfriendly events (such as interrupts).",
+        "Counter": "0,1,2,3",
+        "CounterHTOff": "0,1,2,3,4,5,6,7",
+        "EventCode": "0xC8",
+        "EventName": "HLE_RETIRED.ABORTED_EVENTS",
+        "SampleAfterValue": "2000003",
+        "UMask": "0x80"
+    },
+    {
+        "BriefDescription": "Number of times an HLE execution aborted due to various memory events (e.g., read/write capacity and conflicts).",
+        "Counter": "0,1,2,3",
+        "CounterHTOff": "0,1,2,3,4,5,6,7",
+        "EventCode": "0xC8",
+        "EventName": "HLE_RETIRED.ABORTED_MEM",
+        "SampleAfterValue": "2000003",
+        "UMask": "0x8"
+    },
+    {
+        "BriefDescription": "Number of times an HLE execution aborted due to incompatible memory type",
+        "Counter": "0,1,2,3",
+        "CounterHTOff": "0,1,2,3,4,5,6,7",
+        "EventCode": "0xC8",
+        "EventName": "HLE_RETIRED.ABORTED_MEMTYPE",
+        "PublicDescription": "Number of times an HLE execution aborted due to incompatible memory type.",
+        "SampleAfterValue": "2000003",
+        "UMask": "0x40"
+    },
+    {
+        "BriefDescription": "Number of times an HLE execution aborted due to hardware timer expiration.",
+        "Counter": "0,1,2,3",
+        "CounterHTOff": "0,1,2,3,4,5,6,7",
+        "EventCode": "0xC8",
+        "EventName": "HLE_RETIRED.ABORTED_TIMER",
+        "SampleAfterValue": "2000003",
+        "UMask": "0x10"
+    },
+    {
+        "BriefDescription": "Number of times an HLE execution aborted due to HLE-unfriendly instructions and certain unfriendly events (such as AD assists etc.).",
+        "Counter": "0,1,2,3",
+        "CounterHTOff": "0,1,2,3,4,5,6,7",
+        "EventCode": "0xC8",
+        "EventName": "HLE_RETIRED.ABORTED_UNFRIENDLY",
+        "SampleAfterValue": "2000003",
+        "UMask": "0x20"
+    },
+    {
+        "BriefDescription": "Number of times an HLE execution successfully committed",
+        "Counter": "0,1,2,3",
+        "CounterHTOff": "0,1,2,3,4,5,6,7",
+        "EventCode": "0xC8",
+        "EventName": "HLE_RETIRED.COMMIT",
+        "PublicDescription": "Number of times HLE commit succeeded.",
+        "SampleAfterValue": "2000003",
+        "UMask": "0x2"
+    },
+    {
+        "BriefDescription": "Number of times an HLE execution started.",
+        "Counter": "0,1,2,3",
+        "CounterHTOff": "0,1,2,3,4,5,6,7",
+        "EventCode": "0xC8",
+        "EventName": "HLE_RETIRED.START",
+        "PublicDescription": "Number of times we entered an HLE region. Does not count nested transactions.",
+        "SampleAfterValue": "2000003",
+        "UMask": "0x1"
+    },
+    {
+        "BriefDescription": "Counts the number of machine clears due to memory order conflicts.",
+        "Counter": "0,1,2,3",
+        "CounterHTOff": "0,1,2,3,4,5,6,7",
+        "Errata": "SKL089",
+        "EventCode": "0xC3",
+        "EventName": "MACHINE_CLEARS.MEMORY_ORDERING",
+        "PublicDescription": "Counts the number of memory ordering Machine Clears detected. Memory Ordering Machine Clears can result from one of the following:a. memory disambiguation,b. external snoop, orc. cross SMT-HW-thread snoop (stores) hitting load buffer.",
         "SampleAfterValue": "100003",
+        "UMask": "0x2"
+    },
+    {
+        "BriefDescription": "Counts randomly selected loads when the latency from first dispatch to completion is greater than 128 cycles.",
+        "Counter": "0,1,2,3",
+        "CounterHTOff": "0,1,2,3",
+        "Data_LA": "1",
+        "EventCode": "0xcd",
+        "EventName": "MEM_TRANS_RETIRED.LOAD_LATENCY_GT_128",
+        "MSRIndex": "0x3F6",
+        "MSRValue": "0x80",
+        "PEBS": "2",
+        "PublicDescription": "Counts randomly selected loads when the latency from first dispatch to completion is greater than 128 cycles.  Reported latency may be longer than just the memory latency.",
+        "SampleAfterValue": "1009",
+        "TakenAlone": "1",
         "UMask": "0x1"
     },
     {
-        "BriefDescription": "OCR.ALL_DATA_RD.L3_MISS_REMOTE_DRAM.SNOOP_MISS_OR_NO_FWD OCR.ALL_DATA_RD.L3_MISS_REMOTE_DRAM.SNOOP_MISS_OR_NO_FWD",
+        "BriefDescription": "Counts randomly selected loads when the latency from first dispatch to completion is greater than 16 cycles.",
         "Counter": "0,1,2,3",
         "CounterHTOff": "0,1,2,3",
-        "EventCode": "0xB7, 0xBB",
-        "EventName": "OCR.ALL_DATA_RD.L3_MISS_REMOTE_DRAM.SNOOP_MISS_OR_NO_FWD",
-        "MSRIndex": "0x1a6,0x1a7",
-        "MSRValue": "0x063B800491",
-        "Offcore": "1",
-        "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
-        "SampleAfterValue": "100003",
+        "Data_LA": "1",
+        "EventCode": "0xcd",
+        "EventName": "MEM_TRANS_RETIRED.LOAD_LATENCY_GT_16",
+        "MSRIndex": "0x3F6",
+        "MSRValue": "0x10",
+        "PEBS": "2",
+        "PublicDescription": "Counts randomly selected loads when the latency from first dispatch to completion is greater than 16 cycles.  Reported latency may be longer than just the memory latency.",
+        "SampleAfterValue": "20011",
+        "TakenAlone": "1",
         "UMask": "0x1"
     },
     {
-        "BriefDescription": "Counts all prefetch (that bring data to LLC only) RFOs  OCR.PF_L3_RFO.L3_MISS_LOCAL_DRAM.NO_SNOOP_NEEDED",
+        "BriefDescription": "Counts randomly selected loads when the latency from first dispatch to completion is greater than 256 cycles.",
         "Counter": "0,1,2,3",
         "CounterHTOff": "0,1,2,3",
-        "EventCode": "0xB7, 0xBB",
-        "EventName": "OCR.PF_L3_RFO.L3_MISS_LOCAL_DRAM.NO_SNOOP_NEEDED",
-        "MSRIndex": "0x1a6,0x1a7",
-        "MSRValue": "0x0104000100",
-        "Offcore": "1",
-        "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
-        "SampleAfterValue": "100003",
+        "Data_LA": "1",
+        "EventCode": "0xcd",
+        "EventName": "MEM_TRANS_RETIRED.LOAD_LATENCY_GT_256",
+        "MSRIndex": "0x3F6",
+        "MSRValue": "0x100",
+        "PEBS": "2",
+        "PublicDescription": "Counts randomly selected loads when the latency from first dispatch to completion is greater than 256 cycles.  Reported latency may be longer than just the memory latency.",
+        "SampleAfterValue": "503",
+        "TakenAlone": "1",
         "UMask": "0x1"
     },
     {
-        "BriefDescription": "This event is deprecated. Refer to new event OCR.ALL_PF_DATA_RD.L3_MISS.SNOOP_MISS",
+        "BriefDescription": "Counts randomly selected loads when the latency from first dispatch to completion is greater than 32 cycles.",
         "Counter": "0,1,2,3",
         "CounterHTOff": "0,1,2,3",
-        "Deprecated": "1",
-        "EventCode": "0xB7, 0xBB",
-        "EventName": "OFFCORE_RESPONSE.ALL_PF_DATA_RD.L3_MISS.SNOOP_MISS",
-        "MSRIndex": "0x1a6,0x1a7",
-        "MSRValue": "0x023C000490",
-        "Offcore": "1",
-        "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
-        "SampleAfterValue": "100003",
+        "Data_LA": "1",
+        "EventCode": "0xcd",
+        "EventName": "MEM_TRANS_RETIRED.LOAD_LATENCY_GT_32",
+        "MSRIndex": "0x3F6",
+        "MSRValue": "0x20",
+        "PEBS": "2",
+        "PublicDescription": "Counts randomly selected loads when the latency from first dispatch to completion is greater than 32 cycles.  Reported latency may be longer than just the memory latency.",
+        "SampleAfterValue": "100007",
+        "TakenAlone": "1",
         "UMask": "0x1"
     },
     {
-        "BriefDescription": "This event is deprecated. Refer to new event OCR.PF_L2_RFO.L3_MISS_REMOTE_HOP1_DRAM.SNOOP_MISS",
+        "BriefDescription": "Counts randomly selected loads when the latency from first dispatch to completion is greater than 4 cycles.",
         "Counter": "0,1,2,3",
         "CounterHTOff": "0,1,2,3",
-        "Deprecated": "1",
-        "EventCode": "0xB7, 0xBB",
-        "EventName": "OFFCORE_RESPONSE.PF_L2_RFO.L3_MISS_REMOTE_HOP1_DRAM.SNOOP_MISS",
-        "MSRIndex": "0x1a6,0x1a7",
-        "MSRValue": "0x0210000020",
-        "Offcore": "1",
-        "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
+        "Data_LA": "1",
+        "EventCode": "0xcd",
+        "EventName": "MEM_TRANS_RETIRED.LOAD_LATENCY_GT_4",
+        "MSRIndex": "0x3F6",
+        "MSRValue": "0x4",
+        "PEBS": "2",
+        "PublicDescription": "Counts randomly selected loads when the latency from first dispatch to completion is greater than 4 cycles.  Reported latency may be longer than just the memory latency.",
         "SampleAfterValue": "100003",
+        "TakenAlone": "1",
         "UMask": "0x1"
     },
     {
-        "BriefDescription": "This event is deprecated. Refer to new event OCR.PF_L3_RFO.L3_MISS.NO_SNOOP_NEEDED",
+        "BriefDescription": "Counts randomly selected loads when the latency from first dispatch to completion is greater than 512 cycles.",
+        "Counter": "0,1,2,3",
+        "CounterHTOff": "0,1,2,3",
+        "Data_LA": "1",
+        "EventCode": "0xcd",
+        "EventName": "MEM_TRANS_RETIRED.LOAD_LATENCY_GT_512",
+        "MSRIndex": "0x3F6",
+        "MSRValue": "0x200",
+        "PEBS": "2",
+        "PublicDescription": "Counts randomly selected loads when the latency from first dispatch to completion is greater than 512 cycles.  Reported latency may be longer than just the memory latency.",
+        "SampleAfterValue": "101",
+        "TakenAlone": "1",
+        "UMask": "0x1"
+    },
+    {
+        "BriefDescription": "Counts randomly selected loads when the latency from first dispatch to completion is greater than 64 cycles.",
+        "Counter": "0,1,2,3",
+        "CounterHTOff": "0,1,2,3",
+        "Data_LA": "1",
+        "EventCode": "0xcd",
+        "EventName": "MEM_TRANS_RETIRED.LOAD_LATENCY_GT_64",
+        "MSRIndex": "0x3F6",
+        "MSRValue": "0x40",
+        "PEBS": "2",
+        "PublicDescription": "Counts randomly selected loads when the latency from first dispatch to completion is greater than 64 cycles.  Reported latency may be longer than just the memory latency.",
+        "SampleAfterValue": "2003",
+        "TakenAlone": "1",
+        "UMask": "0x1"
+    },
+    {
+        "BriefDescription": "Counts randomly selected loads when the latency from first dispatch to completion is greater than 8 cycles.",
+        "Counter": "0,1,2,3",
+        "CounterHTOff": "0,1,2,3",
+        "Data_LA": "1",
+        "EventCode": "0xcd",
+        "EventName": "MEM_TRANS_RETIRED.LOAD_LATENCY_GT_8",
+        "MSRIndex": "0x3F6",
+        "MSRValue": "0x8",
+        "PEBS": "2",
+        "PublicDescription": "Counts randomly selected loads when the latency from first dispatch to completion is greater than 8 cycles.  Reported latency may be longer than just the memory latency.",
+        "SampleAfterValue": "50021",
+        "TakenAlone": "1",
+        "UMask": "0x1"
+    },
+    {
+        "BriefDescription": "OCR.ALL_DATA_RD.L3_MISS.ANY_SNOOP OCR.ALL_DATA_RD.L3_MISS.ANY_SNOOP OCR.ALL_DATA_RD.L3_MISS.ANY_SNOOP",
         "Counter": "0,1,2,3",
         "CounterHTOff": "0,1,2,3",
-        "Deprecated": "1",
         "EventCode": "0xB7, 0xBB",
-        "EventName": "OFFCORE_RESPONSE.PF_L3_RFO.L3_MISS.NO_SNOOP_NEEDED",
+        "EventName": "OCR.ALL_DATA_RD.L3_MISS.ANY_SNOOP",
         "MSRIndex": "0x1a6,0x1a7",
-        "MSRValue": "0x013C000100",
+        "MSRValue": "0x3FBC000491",
         "Offcore": "1",
         "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
         "SampleAfterValue": "100003",
         "UMask": "0x1"
     },
     {
-        "BriefDescription": "Counts L1 data cache hardware prefetch requests and software prefetch requests  OCR.PF_L1D_AND_SW.L3_MISS_REMOTE_HOP1_DRAM.ANY_SNOOP",
+        "BriefDescription": "OCR.ALL_DATA_RD.L3_MISS.HITM_OTHER_CORE OCR.ALL_DATA_RD.L3_MISS.HITM_OTHER_CORE OCR.ALL_DATA_RD.L3_MISS.HITM_OTHER_CORE",
         "Counter": "0,1,2,3",
         "CounterHTOff": "0,1,2,3",
         "EventCode": "0xB7, 0xBB",
-        "EventName": "OCR.PF_L1D_AND_SW.L3_MISS_REMOTE_HOP1_DRAM.ANY_SNOOP",
+        "EventName": "OCR.ALL_DATA_RD.L3_MISS.HITM_OTHER_CORE",
         "MSRIndex": "0x1a6,0x1a7",
-        "MSRValue": "0x3F90000400",
+        "MSRValue": "0x103C000491",
         "Offcore": "1",
         "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
         "SampleAfterValue": "100003",
         "UMask": "0x1"
     },
     {
-        "BriefDescription": "This event is deprecated. Refer to new event OCR.ALL_PF_DATA_RD.L3_MISS_LOCAL_DRAM.HIT_OTHER_CORE_NO_FWD",
+        "BriefDescription": "OCR.ALL_DATA_RD.L3_MISS.HIT_OTHER_CORE_FWD OCR.ALL_DATA_RD.L3_MISS.HIT_OTHER_CORE_FWD OCR.ALL_DATA_RD.L3_MISS.HIT_OTHER_CORE_FWD",
         "Counter": "0,1,2,3",
         "CounterHTOff": "0,1,2,3",
-        "Deprecated": "1",
         "EventCode": "0xB7, 0xBB",
-        "EventName": "OFFCORE_RESPONSE.ALL_PF_DATA_RD.L3_MISS_LOCAL_DRAM.HIT_OTHER_CORE_NO_FWD",
+        "EventName": "OCR.ALL_DATA_RD.L3_MISS.HIT_OTHER_CORE_FWD",
         "MSRIndex": "0x1a6,0x1a7",
-        "MSRValue": "0x0404000490",
+        "MSRValue": "0x083C000491",
         "Offcore": "1",
         "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
         "SampleAfterValue": "100003",
         "UMask": "0x1"
     },
     {
-        "BriefDescription": "This event is deprecated. Refer to new event OCR.PF_L2_DATA_RD.L3_MISS_LOCAL_DRAM.SNOOP_MISS_OR_NO_FWD",
+        "BriefDescription": "OCR.ALL_DATA_RD.L3_MISS.HIT_OTHER_CORE_NO_FWD OCR.ALL_DATA_RD.L3_MISS.HIT_OTHER_CORE_NO_FWD OCR.ALL_DATA_RD.L3_MISS.HIT_OTHER_CORE_NO_FWD",
         "Counter": "0,1,2,3",
         "CounterHTOff": "0,1,2,3",
-        "Deprecated": "1",
         "EventCode": "0xB7, 0xBB",
-        "EventName": "OFFCORE_RESPONSE.PF_L2_DATA_RD.L3_MISS_LOCAL_DRAM.SNOOP_MISS_OR_NO_FWD",
+        "EventName": "OCR.ALL_DATA_RD.L3_MISS.HIT_OTHER_CORE_NO_FWD",
         "MSRIndex": "0x1a6,0x1a7",
-        "MSRValue": "0x0604000010",
+        "MSRValue": "0x043C000491",
         "Offcore": "1",
         "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
         "SampleAfterValue": "100003",
         "UMask": "0x1"
     },
     {
-        "BriefDescription": "This event is deprecated. Refer to new event OCR.ALL_PF_DATA_RD.L3_MISS.REMOTE_HITM",
+        "BriefDescription": "OCR.ALL_DATA_RD.L3_MISS.NO_SNOOP_NEEDED OCR.ALL_DATA_RD.L3_MISS.NO_SNOOP_NEEDED OCR.ALL_DATA_RD.L3_MISS.NO_SNOOP_NEEDED",
         "Counter": "0,1,2,3",
         "CounterHTOff": "0,1,2,3",
-        "Deprecated": "1",
         "EventCode": "0xB7, 0xBB",
-        "EventName": "OFFCORE_RESPONSE.ALL_PF_DATA_RD.L3_MISS.REMOTE_HITM",
+        "EventName": "OCR.ALL_DATA_RD.L3_MISS.NO_SNOOP_NEEDED",
         "MSRIndex": "0x1a6,0x1a7",
-        "MSRValue": "0x103FC00490",
+        "MSRValue": "0x013C000491",
         "Offcore": "1",
         "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
         "SampleAfterValue": "100003",
         "UMask": "0x1"
     },
     {
-        "BriefDescription": "This event is deprecated. Refer to new event OCR.ALL_PF_RFO.L3_MISS_REMOTE_HOP1_DRAM.SNOOP_NONE",
+        "BriefDescription": "OCR.ALL_DATA_RD.L3_MISS.REMOTE_HITM OCR.ALL_DATA_RD.L3_MISS.REMOTE_HITM",
         "Counter": "0,1,2,3",
         "CounterHTOff": "0,1,2,3",
-        "Deprecated": "1",
         "EventCode": "0xB7, 0xBB",
-        "EventName": "OFFCORE_RESPONSE.ALL_PF_RFO.L3_MISS_REMOTE_HOP1_DRAM.SNOOP_NONE",
+        "EventName": "OCR.ALL_DATA_RD.L3_MISS.REMOTE_HITM",
         "MSRIndex": "0x1a6,0x1a7",
-        "MSRValue": "0x0090000120",
+        "MSRValue": "0x103FC00491",
         "Offcore": "1",
         "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
         "SampleAfterValue": "100003",
         "UMask": "0x1"
     },
     {
-        "BriefDescription": "Counts all prefetch (that bring data to LLC only) RFOs",
+        "BriefDescription": "OCR.ALL_DATA_RD.L3_MISS.REMOTE_HIT_FORWARD OCR.ALL_DATA_RD.L3_MISS.REMOTE_HIT_FORWARD",
         "Counter": "0,1,2,3",
         "CounterHTOff": "0,1,2,3",
         "EventCode": "0xB7, 0xBB",
-        "EventName": "OCR.PF_L3_RFO.L3_MISS_LOCAL_DRAM.SNOOP_NONE",
+        "EventName": "OCR.ALL_DATA_RD.L3_MISS.REMOTE_HIT_FORWARD",
         "MSRIndex": "0x1a6,0x1a7",
-        "MSRValue": "0x0084000100",
+        "MSRValue": "0x083FC00491",
         "Offcore": "1",
         "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
         "SampleAfterValue": "100003",
         "UMask": "0x1"
     },
     {
-        "BriefDescription": "This event is deprecated. Refer to new event OCR.PF_L3_RFO.L3_MISS.HITM_OTHER_CORE",
+        "BriefDescription": "OCR.ALL_DATA_RD.L3_MISS.SNOOP_MISS OCR.ALL_DATA_RD.L3_MISS.SNOOP_MISS",
         "Counter": "0,1,2,3",
         "CounterHTOff": "0,1,2,3",
-        "Deprecated": "1",
         "EventCode": "0xB7, 0xBB",
-        "EventName": "OFFCORE_RESPONSE.PF_L3_RFO.L3_MISS.HITM_OTHER_CORE",
+        "EventName": "OCR.ALL_DATA_RD.L3_MISS.SNOOP_MISS",
         "MSRIndex": "0x1a6,0x1a7",
-        "MSRValue": "0x103C000100",
+        "MSRValue": "0x023C000491",
         "Offcore": "1",
         "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
         "SampleAfterValue": "100003",
         "UMask": "0x1"
     },
     {
-        "BriefDescription": "Counts all prefetch (that bring data to LLC only) RFOs  OCR.PF_L3_RFO.L3_MISS_REMOTE_HOP1_DRAM.ANY_SNOOP",
+        "BriefDescription": "OCR.ALL_DATA_RD.L3_MISS.SNOOP_NONE OCR.ALL_DATA_RD.L3_MISS.SNOOP_NONE",
         "Counter": "0,1,2,3",
         "CounterHTOff": "0,1,2,3",
         "EventCode": "0xB7, 0xBB",
-        "EventName": "OCR.PF_L3_RFO.L3_MISS_REMOTE_HOP1_DRAM.ANY_SNOOP",
+        "EventName": "OCR.ALL_DATA_RD.L3_MISS.SNOOP_NONE",
         "MSRIndex": "0x1a6,0x1a7",
-        "MSRValue": "0x3F90000100",
+        "MSRValue": "0x00BC000491",
         "Offcore": "1",
         "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
         "SampleAfterValue": "100003",
         "UMask": "0x1"
     },
     {
-        "BriefDescription": "This event is deprecated. Refer to new event OCR.OTHER.L3_MISS_LOCAL_DRAM.HIT_OTHER_CORE_FWD",
+        "BriefDescription": "OCR.ALL_DATA_RD.L3_MISS_LOCAL_DRAM.ANY_SNOOP  OCR.ALL_DATA_RD.L3_MISS_LOCAL_DRAM.ANY_SNOOP",
         "Counter": "0,1,2,3",
         "CounterHTOff": "0,1,2,3",
-        "Deprecated": "1",
         "EventCode": "0xB7, 0xBB",
-        "EventName": "OFFCORE_RESPONSE.OTHER.L3_MISS_LOCAL_DRAM.HIT_OTHER_CORE_FWD",
+        "EventName": "OCR.ALL_DATA_RD.L3_MISS_LOCAL_DRAM.ANY_SNOOP",
         "MSRIndex": "0x1a6,0x1a7",
-        "MSRValue": "0x0804008000",
+        "MSRValue": "0x3F84000491",
         "Offcore": "1",
         "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
         "SampleAfterValue": "100003",
         "UMask": "0x1"
     },
     {
-        "BriefDescription": "This event is deprecated. Refer to new event OCR.ALL_READS.L3_MISS_LOCAL_DRAM.SNOOP_MISS_OR_NO_FWD",
+        "BriefDescription": "OCR.ALL_DATA_RD.L3_MISS_LOCAL_DRAM.HITM_OTHER_CORE  OCR.ALL_DATA_RD.L3_MISS_LOCAL_DRAM.HITM_OTHER_CORE",
         "Counter": "0,1,2,3",
         "CounterHTOff": "0,1,2,3",
-        "Deprecated": "1",
         "EventCode": "0xB7, 0xBB",
-        "EventName": "OFFCORE_RESPONSE.ALL_READS.L3_MISS_LOCAL_DRAM.SNOOP_MISS_OR_NO_FWD",
+        "EventName": "OCR.ALL_DATA_RD.L3_MISS_LOCAL_DRAM.HITM_OTHER_CORE",
         "MSRIndex": "0x1a6,0x1a7",
-        "MSRValue": "0x06040007F7",
+        "MSRValue": "0x1004000491",
         "Offcore": "1",
         "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
         "SampleAfterValue": "100003",
         "UMask": "0x1"
     },
     {
-        "BriefDescription": "OCR.ALL_DATA_RD.L3_MISS.HIT_OTHER_CORE_NO_FWD OCR.ALL_DATA_RD.L3_MISS.HIT_OTHER_CORE_NO_FWD OCR.ALL_DATA_RD.L3_MISS.HIT_OTHER_CORE_NO_FWD",
+        "BriefDescription": "OCR.ALL_DATA_RD.L3_MISS_LOCAL_DRAM.HIT_OTHER_CORE_FWD  OCR.ALL_DATA_RD.L3_MISS_LOCAL_DRAM.HIT_OTHER_CORE_FWD",
         "Counter": "0,1,2,3",
         "CounterHTOff": "0,1,2,3",
         "EventCode": "0xB7, 0xBB",
-        "EventName": "OCR.ALL_DATA_RD.L3_MISS.HIT_OTHER_CORE_NO_FWD",
+        "EventName": "OCR.ALL_DATA_RD.L3_MISS_LOCAL_DRAM.HIT_OTHER_CORE_FWD",
         "MSRIndex": "0x1a6,0x1a7",
-        "MSRValue": "0x043C000491",
+        "MSRValue": "0x0804000491",
         "Offcore": "1",
         "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
         "SampleAfterValue": "100003",
         "UMask": "0x1"
     },
     {
-        "BriefDescription": "This event is deprecated. Refer to new event OCR.PF_L2_DATA_RD.L3_MISS.HIT_OTHER_CORE_NO_FWD",
+        "BriefDescription": "OCR.ALL_DATA_RD.L3_MISS_LOCAL_DRAM.HIT_OTHER_CORE_NO_FWD  OCR.ALL_DATA_RD.L3_MISS_LOCAL_DRAM.HIT_OTHER_CORE_NO_FWD",
         "Counter": "0,1,2,3",
         "CounterHTOff": "0,1,2,3",
-        "Deprecated": "1",
         "EventCode": "0xB7, 0xBB",
-        "EventName": "OFFCORE_RESPONSE.PF_L2_DATA_RD.L3_MISS.HIT_OTHER_CORE_NO_FWD",
+        "EventName": "OCR.ALL_DATA_RD.L3_MISS_LOCAL_DRAM.HIT_OTHER_CORE_NO_FWD",
         "MSRIndex": "0x1a6,0x1a7",
-        "MSRValue": "0x043C000010",
+        "MSRValue": "0x0404000491",
         "Offcore": "1",
         "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
         "SampleAfterValue": "100003",
         "UMask": "0x1"
     },
     {
-        "BriefDescription": "Counts all prefetch (that bring data to LLC only) data reads  OCR.PF_L3_DATA_RD.L3_MISS_LOCAL_DRAM.HIT_OTHER_CORE_NO_FWD",
+        "BriefDescription": "OCR.ALL_DATA_RD.L3_MISS_LOCAL_DRAM.NO_SNOOP_NEEDED  OCR.ALL_DATA_RD.L3_MISS_LOCAL_DRAM.NO_SNOOP_NEEDED",
         "Counter": "0,1,2,3",
         "CounterHTOff": "0,1,2,3",
         "EventCode": "0xB7, 0xBB",
-        "EventName": "OCR.PF_L3_DATA_RD.L3_MISS_LOCAL_DRAM.HIT_OTHER_CORE_NO_FWD",
+        "EventName": "OCR.ALL_DATA_RD.L3_MISS_LOCAL_DRAM.NO_SNOOP_NEEDED",
         "MSRIndex": "0x1a6,0x1a7",
-        "MSRValue": "0x0404000080",
+        "MSRValue": "0x0104000491",
         "Offcore": "1",
         "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
         "SampleAfterValue": "100003",
         "UMask": "0x1"
     },
     {
-        "BriefDescription": "This event is deprecated. Refer to new event OCR.PF_L2_DATA_RD.L3_MISS_LOCAL_DRAM.HIT_OTHER_CORE_FWD",
+        "BriefDescription": "OCR.ALL_DATA_RD.L3_MISS_LOCAL_DRAM.SNOOP_MISS",
         "Counter": "0,1,2,3",
         "CounterHTOff": "0,1,2,3",
-        "Deprecated": "1",
         "EventCode": "0xB7, 0xBB",
-        "EventName": "OFFCORE_RESPONSE.PF_L2_DATA_RD.L3_MISS_LOCAL_DRAM.HIT_OTHER_CORE_FWD",
+        "EventName": "OCR.ALL_DATA_RD.L3_MISS_LOCAL_DRAM.SNOOP_MISS",
         "MSRIndex": "0x1a6,0x1a7",
-        "MSRValue": "0x0804000010",
+        "MSRValue": "0x0204000491",
         "Offcore": "1",
         "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
         "SampleAfterValue": "100003",
         "UMask": "0x1"
     },
     {
-        "BriefDescription": "This event is deprecated. Refer to new event OCR.ALL_READS.L3_MISS_LOCAL_DRAM.NO_SNOOP_NEEDED",
+        "BriefDescription": "OCR.ALL_DATA_RD.L3_MISS_LOCAL_DRAM.SNOOP_MISS_OR_NO_FWD OCR.ALL_DATA_RD.L3_MISS_LOCAL_DRAM.SNOOP_MISS_OR_NO_FWD",
         "Counter": "0,1,2,3",
         "CounterHTOff": "0,1,2,3",
-        "Deprecated": "1",
         "EventCode": "0xB7, 0xBB",
-        "EventName": "OFFCORE_RESPONSE.ALL_READS.L3_MISS_LOCAL_DRAM.NO_SNOOP_NEEDED",
+        "EventName": "OCR.ALL_DATA_RD.L3_MISS_LOCAL_DRAM.SNOOP_MISS_OR_NO_FWD",
         "MSRIndex": "0x1a6,0x1a7",
-        "MSRValue": "0x01040007F7",
+        "MSRValue": "0x0604000491",
         "Offcore": "1",
         "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
         "SampleAfterValue": "100003",
         "UMask": "0x1"
     },
     {
-        "BriefDescription": "Counts L1 data cache hardware prefetch requests and software prefetch requests  OCR.PF_L1D_AND_SW.L3_MISS_LOCAL_DRAM.NO_SNOOP_NEEDED",
+        "BriefDescription": "OCR.ALL_DATA_RD.L3_MISS_LOCAL_DRAM.SNOOP_NONE",
         "Counter": "0,1,2,3",
         "CounterHTOff": "0,1,2,3",
         "EventCode": "0xB7, 0xBB",
-        "EventName": "OCR.PF_L1D_AND_SW.L3_MISS_LOCAL_DRAM.NO_SNOOP_NEEDED",
+        "EventName": "OCR.ALL_DATA_RD.L3_MISS_LOCAL_DRAM.SNOOP_NONE",
         "MSRIndex": "0x1a6,0x1a7",
-        "MSRValue": "0x0104000400",
+        "MSRValue": "0x0084000491",
         "Offcore": "1",
         "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
         "SampleAfterValue": "100003",
         "UMask": "0x1"
     },
     {
-        "BriefDescription": "This event is deprecated. Refer to new event OCR.DEMAND_CODE_RD.L3_MISS.HITM_OTHER_CORE",
+        "BriefDescription": "OCR.ALL_DATA_RD.L3_MISS_REMOTE_DRAM.SNOOP_MISS_OR_NO_FWD OCR.ALL_DATA_RD.L3_MISS_REMOTE_DRAM.SNOOP_MISS_OR_NO_FWD",
         "Counter": "0,1,2,3",
         "CounterHTOff": "0,1,2,3",
-        "Deprecated": "1",
         "EventCode": "0xB7, 0xBB",
-        "EventName": "OFFCORE_RESPONSE.DEMAND_CODE_RD.L3_MISS.HITM_OTHER_CORE",
+        "EventName": "OCR.ALL_DATA_RD.L3_MISS_REMOTE_DRAM.SNOOP_MISS_OR_NO_FWD",
         "MSRIndex": "0x1a6,0x1a7",
-        "MSRValue": "0x103C000004",
+        "MSRValue": "0x063B800491",
         "Offcore": "1",
         "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
         "SampleAfterValue": "100003",
         "UMask": "0x1"
     },
     {
-        "BriefDescription": "Counts prefetch (that bring data to L2) data reads  OCR.PF_L2_DATA_RD.L3_MISS_REMOTE_HOP1_DRAM.NO_SNOOP_NEEDED",
+        "BriefDescription": "OCR.ALL_DATA_RD.L3_MISS_REMOTE_HOP1_DRAM.ANY_SNOOP  OCR.ALL_DATA_RD.L3_MISS_REMOTE_HOP1_DRAM.ANY_SNOOP",
         "Counter": "0,1,2,3",
         "CounterHTOff": "0,1,2,3",
         "EventCode": "0xB7, 0xBB",
-        "EventName": "OCR.PF_L2_DATA_RD.L3_MISS_REMOTE_HOP1_DRAM.NO_SNOOP_NEEDED",
+        "EventName": "OCR.ALL_DATA_RD.L3_MISS_REMOTE_HOP1_DRAM.ANY_SNOOP",
         "MSRIndex": "0x1a6,0x1a7",
-        "MSRValue": "0x0110000010",
+        "MSRValue": "0x3F90000491",
         "Offcore": "1",
         "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
         "SampleAfterValue": "100003",
         "UMask": "0x1"
     },
     {
-        "BriefDescription": "This event is deprecated. Refer to new event OCR.ALL_DATA_RD.L3_MISS_LOCAL_DRAM.ANY_SNOOP",
+        "BriefDescription": "OCR.ALL_DATA_RD.L3_MISS_REMOTE_HOP1_DRAM.HITM_OTHER_CORE  OCR.ALL_DATA_RD.L3_MISS_REMOTE_HOP1_DRAM.HITM_OTHER_CORE",
         "Counter": "0,1,2,3",
         "CounterHTOff": "0,1,2,3",
-        "Deprecated": "1",
         "EventCode": "0xB7, 0xBB",
-        "EventName": "OFFCORE_RESPONSE.ALL_DATA_RD.L3_MISS_LOCAL_DRAM.ANY_SNOOP",
+        "EventName": "OCR.ALL_DATA_RD.L3_MISS_REMOTE_HOP1_DRAM.HITM_OTHER_CORE",
         "MSRIndex": "0x1a6,0x1a7",
-        "MSRValue": "0x3F84000491",
+        "MSRValue": "0x1010000491",
         "Offcore": "1",
         "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
         "SampleAfterValue": "100003",
         "UMask": "0x1"
     },
     {
-        "BriefDescription": "Number of times an RTM execution aborted due to various memory events (e.g. read/write capacity and conflicts)",
-        "Counter": "0,1,2,3",
-        "CounterHTOff": "0,1,2,3,4,5,6,7",
-        "EventCode": "0xC9",
-        "EventName": "RTM_RETIRED.ABORTED_MEM",
-        "PublicDescription": "Number of times an RTM execution aborted due to various memory events (e.g. read/write capacity and conflicts).",
-        "SampleAfterValue": "2000003",
-        "UMask": "0x8"
-    },
-    {
-        "BriefDescription": "Counts prefetch (that bring data to L2) data reads  OCR.PF_L2_DATA_RD.L3_MISS_LOCAL_DRAM.ANY_SNOOP",
+        "BriefDescription": "OCR.ALL_DATA_RD.L3_MISS_REMOTE_HOP1_DRAM.HIT_OTHER_CORE_FWD  OCR.ALL_DATA_RD.L3_MISS_REMOTE_HOP1_DRAM.HIT_OTHER_CORE_FWD",
         "Counter": "0,1,2,3",
         "CounterHTOff": "0,1,2,3",
         "EventCode": "0xB7, 0xBB",
-        "EventName": "OCR.PF_L2_DATA_RD.L3_MISS_LOCAL_DRAM.ANY_SNOOP",
+        "EventName": "OCR.ALL_DATA_RD.L3_MISS_REMOTE_HOP1_DRAM.HIT_OTHER_CORE_FWD",
         "MSRIndex": "0x1a6,0x1a7",
-        "MSRValue": "0x3F84000010",
+        "MSRValue": "0x0810000491",
         "Offcore": "1",
         "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
         "SampleAfterValue": "100003",
         "UMask": "0x1"
     },
     {
-        "BriefDescription": "This event is deprecated. Refer to new event OCR.ALL_READS.L3_MISS_LOCAL_DRAM.HITM_OTHER_CORE",
+        "BriefDescription": "OCR.ALL_DATA_RD.L3_MISS_REMOTE_HOP1_DRAM.HIT_OTHER_CORE_NO_FWD  OCR.ALL_DATA_RD.L3_MISS_REMOTE_HOP1_DRAM.HIT_OTHER_CORE_NO_FWD",
         "Counter": "0,1,2,3",
         "CounterHTOff": "0,1,2,3",
-        "Deprecated": "1",
         "EventCode": "0xB7, 0xBB",
-        "EventName": "OFFCORE_RESPONSE.ALL_READS.L3_MISS_LOCAL_DRAM.HITM_OTHER_CORE",
+        "EventName": "OCR.ALL_DATA_RD.L3_MISS_REMOTE_HOP1_DRAM.HIT_OTHER_CORE_NO_FWD",
         "MSRIndex": "0x1a6,0x1a7",
-        "MSRValue": "0x10040007F7",
+        "MSRValue": "0x0410000491",
         "Offcore": "1",
         "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
         "SampleAfterValue": "100003",
         "UMask": "0x1"
     },
     {
-        "BriefDescription": "This event is deprecated. Refer to new event OCR.DEMAND_RFO.L3_MISS.NO_SNOOP_NEEDED",
+        "BriefDescription": "OCR.ALL_DATA_RD.L3_MISS_REMOTE_HOP1_DRAM.NO_SNOOP_NEEDED  OCR.ALL_DATA_RD.L3_MISS_REMOTE_HOP1_DRAM.NO_SNOOP_NEEDED",
         "Counter": "0,1,2,3",
         "CounterHTOff": "0,1,2,3",
-        "Deprecated": "1",
         "EventCode": "0xB7, 0xBB",
-        "EventName": "OFFCORE_RESPONSE.DEMAND_RFO.L3_MISS.NO_SNOOP_NEEDED",
+        "EventName": "OCR.ALL_DATA_RD.L3_MISS_REMOTE_HOP1_DRAM.NO_SNOOP_NEEDED",
         "MSRIndex": "0x1a6,0x1a7",
-        "MSRValue": "0x013C000002",
+        "MSRValue": "0x0110000491",
         "Offcore": "1",
         "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
         "SampleAfterValue": "100003",
         "UMask": "0x1"
     },
     {
-        "BriefDescription": "This event is deprecated. Refer to new event OCR.DEMAND_CODE_RD.L3_MISS_REMOTE_HOP1_DRAM.SNOOP_NONE",
+        "BriefDescription": "OCR.ALL_DATA_RD.L3_MISS_REMOTE_HOP1_DRAM.SNOOP_MISS",
         "Counter": "0,1,2,3",
         "CounterHTOff": "0,1,2,3",
-        "Deprecated": "1",
         "EventCode": "0xB7, 0xBB",
-        "EventName": "OFFCORE_RESPONSE.DEMAND_CODE_RD.L3_MISS_REMOTE_HOP1_DRAM.SNOOP_NONE",
+        "EventName": "OCR.ALL_DATA_RD.L3_MISS_REMOTE_HOP1_DRAM.SNOOP_MISS",
         "MSRIndex": "0x1a6,0x1a7",
-        "MSRValue": "0x0090000004",
+        "MSRValue": "0x0210000491",
         "Offcore": "1",
         "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
         "SampleAfterValue": "100003",
         "UMask": "0x1"
     },
     {
-        "BriefDescription": "Counts L1 data cache hardware prefetch requests and software prefetch requests OCR.PF_L1D_AND_SW.L3_MISS.ANY_SNOOP OCR.PF_L1D_AND_SW.L3_MISS.ANY_SNOOP",
+        "BriefDescription": "OCR.ALL_DATA_RD.L3_MISS_REMOTE_HOP1_DRAM.SNOOP_NONE",
         "Counter": "0,1,2,3",
         "CounterHTOff": "0,1,2,3",
         "EventCode": "0xB7, 0xBB",
-        "EventName": "OCR.PF_L1D_AND_SW.L3_MISS.ANY_SNOOP",
+        "EventName": "OCR.ALL_DATA_RD.L3_MISS_REMOTE_HOP1_DRAM.SNOOP_NONE",
         "MSRIndex": "0x1a6,0x1a7",
-        "MSRValue": "0x3FBC000400",
+        "MSRValue": "0x0090000491",
         "Offcore": "1",
         "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
         "SampleAfterValue": "100003",
         "UMask": "0x1"
     },
     {
-        "BriefDescription": "This event is deprecated. Refer to new event OCR.ALL_PF_RFO.L3_MISS.SNOOP_NONE",
+        "BriefDescription": "OCR.ALL_PF_DATA_RD.L3_MISS.ANY_SNOOP OCR.ALL_PF_DATA_RD.L3_MISS.ANY_SNOOP OCR.ALL_PF_DATA_RD.L3_MISS.ANY_SNOOP",
         "Counter": "0,1,2,3",
         "CounterHTOff": "0,1,2,3",
-        "Deprecated": "1",
         "EventCode": "0xB7, 0xBB",
-        "EventName": "OFFCORE_RESPONSE.ALL_PF_RFO.L3_MISS.SNOOP_NONE",
+        "EventName": "OCR.ALL_PF_DATA_RD.L3_MISS.ANY_SNOOP",
         "MSRIndex": "0x1a6,0x1a7",
-        "MSRValue": "0x00BC000120",
+        "MSRValue": "0x3FBC000490",
         "Offcore": "1",
         "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
         "SampleAfterValue": "100003",
         "UMask": "0x1"
     },
     {
-        "BriefDescription": "OCR.ALL_READS.L3_MISS_REMOTE_HOP1_DRAM.HIT_OTHER_CORE_FWD  OCR.ALL_READS.L3_MISS_REMOTE_HOP1_DRAM.HIT_OTHER_CORE_FWD",
+        "BriefDescription": "OCR.ALL_PF_DATA_RD.L3_MISS.HITM_OTHER_CORE OCR.ALL_PF_DATA_RD.L3_MISS.HITM_OTHER_CORE OCR.ALL_PF_DATA_RD.L3_MISS.HITM_OTHER_CORE",
         "Counter": "0,1,2,3",
         "CounterHTOff": "0,1,2,3",
         "EventCode": "0xB7, 0xBB",
-        "EventName": "OCR.ALL_READS.L3_MISS_REMOTE_HOP1_DRAM.HIT_OTHER_CORE_FWD",
+        "EventName": "OCR.ALL_PF_DATA_RD.L3_MISS.HITM_OTHER_CORE",
         "MSRIndex": "0x1a6,0x1a7",
-        "MSRValue": "0x08100007F7",
+        "MSRValue": "0x103C000490",
         "Offcore": "1",
         "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
         "SampleAfterValue": "100003",
         "UMask": "0x1"
     },
     {
-        "BriefDescription": "This event is deprecated. Refer to new event OCR.PF_L3_RFO.L3_MISS_LOCAL_DRAM.ANY_SNOOP",
+        "BriefDescription": "OCR.ALL_PF_DATA_RD.L3_MISS.HIT_OTHER_CORE_FWD OCR.ALL_PF_DATA_RD.L3_MISS.HIT_OTHER_CORE_FWD OCR.ALL_PF_DATA_RD.L3_MISS.HIT_OTHER_CORE_FWD",
         "Counter": "0,1,2,3",
         "CounterHTOff": "0,1,2,3",
-        "Deprecated": "1",
         "EventCode": "0xB7, 0xBB",
-        "EventName": "OFFCORE_RESPONSE.PF_L3_RFO.L3_MISS_LOCAL_DRAM.ANY_SNOOP",
+        "EventName": "OCR.ALL_PF_DATA_RD.L3_MISS.HIT_OTHER_CORE_FWD",
         "MSRIndex": "0x1a6,0x1a7",
-        "MSRValue": "0x3F84000100",
+        "MSRValue": "0x083C000490",
         "Offcore": "1",
         "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
         "SampleAfterValue": "100003",
         "UMask": "0x1"
     },
     {
-        "BriefDescription": "This event is deprecated. Refer to new event OCR.ALL_PF_RFO.L3_MISS.ANY_SNOOP",
+        "BriefDescription": "OCR.ALL_PF_DATA_RD.L3_MISS.HIT_OTHER_CORE_NO_FWD OCR.ALL_PF_DATA_RD.L3_MISS.HIT_OTHER_CORE_NO_FWD OCR.ALL_PF_DATA_RD.L3_MISS.HIT_OTHER_CORE_NO_FWD",
         "Counter": "0,1,2,3",
         "CounterHTOff": "0,1,2,3",
-        "Deprecated": "1",
         "EventCode": "0xB7, 0xBB",
-        "EventName": "OFFCORE_RESPONSE.ALL_PF_RFO.L3_MISS.ANY_SNOOP",
+        "EventName": "OCR.ALL_PF_DATA_RD.L3_MISS.HIT_OTHER_CORE_NO_FWD",
         "MSRIndex": "0x1a6,0x1a7",
-        "MSRValue": "0x3FBC000120",
+        "MSRValue": "0x043C000490",
         "Offcore": "1",
         "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
         "SampleAfterValue": "100003",
         "UMask": "0x1"
     },
     {
-        "BriefDescription": "Counts all prefetch (that bring data to L2) RFOs  OCR.PF_L2_RFO.L3_MISS_LOCAL_DRAM.NO_SNOOP_NEEDED",
+        "BriefDescription": "OCR.ALL_PF_DATA_RD.L3_MISS.NO_SNOOP_NEEDED OCR.ALL_PF_DATA_RD.L3_MISS.NO_SNOOP_NEEDED OCR.ALL_PF_DATA_RD.L3_MISS.NO_SNOOP_NEEDED",
         "Counter": "0,1,2,3",
         "CounterHTOff": "0,1,2,3",
         "EventCode": "0xB7, 0xBB",
-        "EventName": "OCR.PF_L2_RFO.L3_MISS_LOCAL_DRAM.NO_SNOOP_NEEDED",
+        "EventName": "OCR.ALL_PF_DATA_RD.L3_MISS.NO_SNOOP_NEEDED",
         "MSRIndex": "0x1a6,0x1a7",
-        "MSRValue": "0x0104000020",
+        "MSRValue": "0x013C000490",
         "Offcore": "1",
         "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
         "SampleAfterValue": "100003",
         "UMask": "0x1"
     },
     {
-        "BriefDescription": "This event is deprecated. Refer to new event OCR.PF_L2_RFO.L3_MISS_LOCAL_DRAM.HIT_OTHER_CORE_NO_FWD",
+        "BriefDescription": "OCR.ALL_PF_DATA_RD.L3_MISS.REMOTE_HITM OCR.ALL_PF_DATA_RD.L3_MISS.REMOTE_HITM",
         "Counter": "0,1,2,3",
         "CounterHTOff": "0,1,2,3",
-        "Deprecated": "1",
         "EventCode": "0xB7, 0xBB",
-        "EventName": "OFFCORE_RESPONSE.PF_L2_RFO.L3_MISS_LOCAL_DRAM.HIT_OTHER_CORE_NO_FWD",
+        "EventName": "OCR.ALL_PF_DATA_RD.L3_MISS.REMOTE_HITM",
         "MSRIndex": "0x1a6,0x1a7",
-        "MSRValue": "0x0404000020",
+        "MSRValue": "0x103FC00490",
         "Offcore": "1",
         "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
         "SampleAfterValue": "100003",
         "UMask": "0x1"
     },
     {
-        "BriefDescription": "Counts L1 data cache hardware prefetch requests and software prefetch requests",
+        "BriefDescription": "OCR.ALL_PF_DATA_RD.L3_MISS.REMOTE_HIT_FORWARD OCR.ALL_PF_DATA_RD.L3_MISS.REMOTE_HIT_FORWARD",
         "Counter": "0,1,2,3",
         "CounterHTOff": "0,1,2,3",
         "EventCode": "0xB7, 0xBB",
-        "EventName": "OCR.PF_L1D_AND_SW.L3_MISS_LOCAL_DRAM.SNOOP_MISS",
+        "EventName": "OCR.ALL_PF_DATA_RD.L3_MISS.REMOTE_HIT_FORWARD",
         "MSRIndex": "0x1a6,0x1a7",
-        "MSRValue": "0x0204000400",
+        "MSRValue": "0x083FC00490",
         "Offcore": "1",
         "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
         "SampleAfterValue": "100003",
         "UMask": "0x1"
     },
     {
-        "BriefDescription": "OCR.ALL_PF_RFO.L3_MISS.ANY_SNOOP OCR.ALL_PF_RFO.L3_MISS.ANY_SNOOP OCR.ALL_PF_RFO.L3_MISS.ANY_SNOOP",
+        "BriefDescription": "OCR.ALL_PF_DATA_RD.L3_MISS.SNOOP_MISS OCR.ALL_PF_DATA_RD.L3_MISS.SNOOP_MISS",
         "Counter": "0,1,2,3",
         "CounterHTOff": "0,1,2,3",
         "EventCode": "0xB7, 0xBB",
-        "EventName": "OCR.ALL_PF_RFO.L3_MISS.ANY_SNOOP",
+        "EventName": "OCR.ALL_PF_DATA_RD.L3_MISS.SNOOP_MISS",
         "MSRIndex": "0x1a6,0x1a7",
-        "MSRValue": "0x3FBC000120",
+        "MSRValue": "0x023C000490",
         "Offcore": "1",
         "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
         "SampleAfterValue": "100003",
         "UMask": "0x1"
     },
     {
-        "BriefDescription": "This event is deprecated. Refer to new event OCR.DEMAND_RFO.L3_MISS_LOCAL_DRAM.HIT_OTHER_CORE_NO_FWD",
+        "BriefDescription": "OCR.ALL_PF_DATA_RD.L3_MISS.SNOOP_NONE OCR.ALL_PF_DATA_RD.L3_MISS.SNOOP_NONE",
         "Counter": "0,1,2,3",
         "CounterHTOff": "0,1,2,3",
-        "Deprecated": "1",
         "EventCode": "0xB7, 0xBB",
-        "EventName": "OFFCORE_RESPONSE.DEMAND_RFO.L3_MISS_LOCAL_DRAM.HIT_OTHER_CORE_NO_FWD",
+        "EventName": "OCR.ALL_PF_DATA_RD.L3_MISS.SNOOP_NONE",
         "MSRIndex": "0x1a6,0x1a7",
-        "MSRValue": "0x0404000002",
+        "MSRValue": "0x00BC000490",
         "Offcore": "1",
         "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
         "SampleAfterValue": "100003",
         "UMask": "0x1"
     },
     {
-        "BriefDescription": "Counts all prefetch (that bring data to L2) RFOs OCR.PF_L2_RFO.L3_MISS.HIT_OTHER_CORE_FWD OCR.PF_L2_RFO.L3_MISS.HIT_OTHER_CORE_FWD",
+        "BriefDescription": "OCR.ALL_PF_DATA_RD.L3_MISS_LOCAL_DRAM.ANY_SNOOP  OCR.ALL_PF_DATA_RD.L3_MISS_LOCAL_DRAM.ANY_SNOOP",
         "Counter": "0,1,2,3",
         "CounterHTOff": "0,1,2,3",
         "EventCode": "0xB7, 0xBB",
-        "EventName": "OCR.PF_L2_RFO.L3_MISS.HIT_OTHER_CORE_FWD",
+        "EventName": "OCR.ALL_PF_DATA_RD.L3_MISS_LOCAL_DRAM.ANY_SNOOP",
         "MSRIndex": "0x1a6,0x1a7",
-        "MSRValue": "0x083C000020",
+        "MSRValue": "0x3F84000490",
         "Offcore": "1",
         "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
         "SampleAfterValue": "100003",
         "UMask": "0x1"
     },
     {
-        "BriefDescription": "This event is deprecated. Refer to new event OCR.OTHER.L3_MISS_REMOTE_HOP1_DRAM.HIT_OTHER_CORE_NO_FWD",
+        "BriefDescription": "OCR.ALL_PF_DATA_RD.L3_MISS_LOCAL_DRAM.HITM_OTHER_CORE  OCR.ALL_PF_DATA_RD.L3_MISS_LOCAL_DRAM.HITM_OTHER_CORE",
         "Counter": "0,1,2,3",
         "CounterHTOff": "0,1,2,3",
-        "Deprecated": "1",
         "EventCode": "0xB7, 0xBB",
-        "EventName": "OFFCORE_RESPONSE.OTHER.L3_MISS_REMOTE_HOP1_DRAM.HIT_OTHER_CORE_NO_FWD",
+        "EventName": "OCR.ALL_PF_DATA_RD.L3_MISS_LOCAL_DRAM.HITM_OTHER_CORE",
         "MSRIndex": "0x1a6,0x1a7",
-        "MSRValue": "0x0410008000",
+        "MSRValue": "0x1004000490",
         "Offcore": "1",
         "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
         "SampleAfterValue": "100003",
         "UMask": "0x1"
     },
     {
-        "BriefDescription": "Counts all demand data writes (RFOs)",
+        "BriefDescription": "OCR.ALL_PF_DATA_RD.L3_MISS_LOCAL_DRAM.HIT_OTHER_CORE_FWD  OCR.ALL_PF_DATA_RD.L3_MISS_LOCAL_DRAM.HIT_OTHER_CORE_FWD",
         "Counter": "0,1,2,3",
         "CounterHTOff": "0,1,2,3",
         "EventCode": "0xB7, 0xBB",
-        "EventName": "OCR.DEMAND_RFO.L3_MISS_REMOTE_HOP1_DRAM.SNOOP_MISS",
+        "EventName": "OCR.ALL_PF_DATA_RD.L3_MISS_LOCAL_DRAM.HIT_OTHER_CORE_FWD",
         "MSRIndex": "0x1a6,0x1a7",
-        "MSRValue": "0x0210000002",
+        "MSRValue": "0x0804000490",
         "Offcore": "1",
         "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
         "SampleAfterValue": "100003",
         "UMask": "0x1"
     },
     {
-        "BriefDescription": "This event is deprecated. Refer to new event OCR.DEMAND_RFO.L3_MISS_REMOTE_DRAM.SNOOP_MISS_OR_NO_FWD",
+        "BriefDescription": "OCR.ALL_PF_DATA_RD.L3_MISS_LOCAL_DRAM.HIT_OTHER_CORE_NO_FWD  OCR.ALL_PF_DATA_RD.L3_MISS_LOCAL_DRAM.HIT_OTHER_CORE_NO_FWD",
         "Counter": "0,1,2,3",
         "CounterHTOff": "0,1,2,3",
-        "Deprecated": "1",
         "EventCode": "0xB7, 0xBB",
-        "EventName": "OFFCORE_RESPONSE.DEMAND_RFO.L3_MISS_REMOTE_DRAM.SNOOP_MISS_OR_NO_FWD",
+        "EventName": "OCR.ALL_PF_DATA_RD.L3_MISS_LOCAL_DRAM.HIT_OTHER_CORE_NO_FWD",
         "MSRIndex": "0x1a6,0x1a7",
-        "MSRValue": "0x063B800002",
+        "MSRValue": "0x0404000490",
         "Offcore": "1",
         "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
         "SampleAfterValue": "100003",
         "UMask": "0x1"
     },
     {
-        "BriefDescription": "This event is deprecated. Refer to new event OCR.ALL_READS.L3_MISS_LOCAL_DRAM.ANY_SNOOP",
+        "BriefDescription": "OCR.ALL_PF_DATA_RD.L3_MISS_LOCAL_DRAM.NO_SNOOP_NEEDED  OCR.ALL_PF_DATA_RD.L3_MISS_LOCAL_DRAM.NO_SNOOP_NEEDED",
         "Counter": "0,1,2,3",
         "CounterHTOff": "0,1,2,3",
-        "Deprecated": "1",
         "EventCode": "0xB7, 0xBB",
-        "EventName": "OFFCORE_RESPONSE.ALL_READS.L3_MISS_LOCAL_DRAM.ANY_SNOOP",
+        "EventName": "OCR.ALL_PF_DATA_RD.L3_MISS_LOCAL_DRAM.NO_SNOOP_NEEDED",
         "MSRIndex": "0x1a6,0x1a7",
-        "MSRValue": "0x3F840007F7",
+        "MSRValue": "0x0104000490",
         "Offcore": "1",
         "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
         "SampleAfterValue": "100003",
         "UMask": "0x1"
     },
     {
-        "BriefDescription": "This event is deprecated. Refer to new event OCR.ALL_DATA_RD.L3_MISS_REMOTE_HOP1_DRAM.SNOOP_MISS",
+        "BriefDescription": "OCR.ALL_PF_DATA_RD.L3_MISS_LOCAL_DRAM.SNOOP_MISS",
         "Counter": "0,1,2,3",
         "CounterHTOff": "0,1,2,3",
-        "Deprecated": "1",
         "EventCode": "0xB7, 0xBB",
-        "EventName": "OFFCORE_RESPONSE.ALL_DATA_RD.L3_MISS_REMOTE_HOP1_DRAM.SNOOP_MISS",
+        "EventName": "OCR.ALL_PF_DATA_RD.L3_MISS_LOCAL_DRAM.SNOOP_MISS",
         "MSRIndex": "0x1a6,0x1a7",
-        "MSRValue": "0x0210000491",
+        "MSRValue": "0x0204000490",
         "Offcore": "1",
         "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
         "SampleAfterValue": "100003",
         "UMask": "0x1"
     },
     {
-        "BriefDescription": "This event is deprecated. Refer to new event OCR.PF_L1D_AND_SW.L3_MISS_REMOTE_HOP1_DRAM.SNOOP_MISS",
+        "BriefDescription": "OCR.ALL_PF_DATA_RD.L3_MISS_LOCAL_DRAM.SNOOP_MISS_OR_NO_FWD OCR.ALL_PF_DATA_RD.L3_MISS_LOCAL_DRAM.SNOOP_MISS_OR_NO_FWD",
         "Counter": "0,1,2,3",
         "CounterHTOff": "0,1,2,3",
-        "Deprecated": "1",
         "EventCode": "0xB7, 0xBB",
-        "EventName": "OFFCORE_RESPONSE.PF_L1D_AND_SW.L3_MISS_REMOTE_HOP1_DRAM.SNOOP_MISS",
+        "EventName": "OCR.ALL_PF_DATA_RD.L3_MISS_LOCAL_DRAM.SNOOP_MISS_OR_NO_FWD",
         "MSRIndex": "0x1a6,0x1a7",
-        "MSRValue": "0x0210000400",
+        "MSRValue": "0x0604000490",
         "Offcore": "1",
         "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
         "SampleAfterValue": "100003",
         "UMask": "0x1"
     },
     {
-        "BriefDescription": "This event is deprecated. Refer to new event OCR.DEMAND_CODE_RD.L3_MISS.SNOOP_MISS",
+        "BriefDescription": "OCR.ALL_PF_DATA_RD.L3_MISS_LOCAL_DRAM.SNOOP_NONE",
         "Counter": "0,1,2,3",
         "CounterHTOff": "0,1,2,3",
-        "Deprecated": "1",
         "EventCode": "0xB7, 0xBB",
-        "EventName": "OFFCORE_RESPONSE.DEMAND_CODE_RD.L3_MISS.SNOOP_MISS",
+        "EventName": "OCR.ALL_PF_DATA_RD.L3_MISS_LOCAL_DRAM.SNOOP_NONE",
         "MSRIndex": "0x1a6,0x1a7",
-        "MSRValue": "0x023C000004",
+        "MSRValue": "0x0084000490",
         "Offcore": "1",
         "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
         "SampleAfterValue": "100003",
         "UMask": "0x1"
     },
     {
-        "BriefDescription": "This event is deprecated. Refer to new event OCR.PF_L3_RFO.L3_MISS_LOCAL_DRAM.NO_SNOOP_NEEDED",
+        "BriefDescription": "OCR.ALL_PF_DATA_RD.L3_MISS_REMOTE_DRAM.SNOOP_MISS_OR_NO_FWD OCR.ALL_PF_DATA_RD.L3_MISS_REMOTE_DRAM.SNOOP_MISS_OR_NO_FWD",
         "Counter": "0,1,2,3",
         "CounterHTOff": "0,1,2,3",
-        "Deprecated": "1",
         "EventCode": "0xB7, 0xBB",
-        "EventName": "OFFCORE_RESPONSE.PF_L3_RFO.L3_MISS_LOCAL_DRAM.NO_SNOOP_NEEDED",
+        "EventName": "OCR.ALL_PF_DATA_RD.L3_MISS_REMOTE_DRAM.SNOOP_MISS_OR_NO_FWD",
         "MSRIndex": "0x1a6,0x1a7",
-        "MSRValue": "0x0104000100",
+        "MSRValue": "0x063B800490",
         "Offcore": "1",
         "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
         "SampleAfterValue": "100003",
         "UMask": "0x1"
     },
     {
-        "BriefDescription": "This event is deprecated. Refer to new event OCR.ALL_PF_DATA_RD.L3_MISS_LOCAL_DRAM.HITM_OTHER_CORE",
+        "BriefDescription": "OCR.ALL_PF_DATA_RD.L3_MISS_REMOTE_HOP1_DRAM.ANY_SNOOP  OCR.ALL_PF_DATA_RD.L3_MISS_REMOTE_HOP1_DRAM.ANY_SNOOP",
         "Counter": "0,1,2,3",
         "CounterHTOff": "0,1,2,3",
-        "Deprecated": "1",
         "EventCode": "0xB7, 0xBB",
-        "EventName": "OFFCORE_RESPONSE.ALL_PF_DATA_RD.L3_MISS_LOCAL_DRAM.HITM_OTHER_CORE",
+        "EventName": "OCR.ALL_PF_DATA_RD.L3_MISS_REMOTE_HOP1_DRAM.ANY_SNOOP",
         "MSRIndex": "0x1a6,0x1a7",
-        "MSRValue": "0x1004000490",
+        "MSRValue": "0x3F90000490",
         "Offcore": "1",
         "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
         "SampleAfterValue": "100003",
         "UMask": "0x1"
     },
     {
-        "BriefDescription": "Counts all demand code reads  OCR.DEMAND_CODE_RD.L3_MISS_REMOTE_HOP1_DRAM.HITM_OTHER_CORE",
+        "BriefDescription": "OCR.ALL_PF_DATA_RD.L3_MISS_REMOTE_HOP1_DRAM.HITM_OTHER_CORE  OCR.ALL_PF_DATA_RD.L3_MISS_REMOTE_HOP1_DRAM.HITM_OTHER_CORE",
         "Counter": "0,1,2,3",
         "CounterHTOff": "0,1,2,3",
         "EventCode": "0xB7, 0xBB",
-        "EventName": "OCR.DEMAND_CODE_RD.L3_MISS_REMOTE_HOP1_DRAM.HITM_OTHER_CORE",
+        "EventName": "OCR.ALL_PF_DATA_RD.L3_MISS_REMOTE_HOP1_DRAM.HITM_OTHER_CORE",
         "MSRIndex": "0x1a6,0x1a7",
-        "MSRValue": "0x1010000004",
+        "MSRValue": "0x1010000490",
         "Offcore": "1",
         "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
         "SampleAfterValue": "100003",
         "UMask": "0x1"
     },
     {
-        "BriefDescription": "This event is deprecated. Refer to new event OCR.PF_L2_DATA_RD.L3_MISS.SNOOP_MISS",
+        "BriefDescription": "OCR.ALL_PF_DATA_RD.L3_MISS_REMOTE_HOP1_DRAM.HIT_OTHER_CORE_FWD  OCR.ALL_PF_DATA_RD.L3_MISS_REMOTE_HOP1_DRAM.HIT_OTHER_CORE_FWD",
         "Counter": "0,1,2,3",
         "CounterHTOff": "0,1,2,3",
-        "Deprecated": "1",
         "EventCode": "0xB7, 0xBB",
-        "EventName": "OFFCORE_RESPONSE.PF_L2_DATA_RD.L3_MISS.SNOOP_MISS",
+        "EventName": "OCR.ALL_PF_DATA_RD.L3_MISS_REMOTE_HOP1_DRAM.HIT_OTHER_CORE_FWD",
         "MSRIndex": "0x1a6,0x1a7",
-        "MSRValue": "0x023C000010",
+        "MSRValue": "0x0810000490",
         "Offcore": "1",
         "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
         "SampleAfterValue": "100003",
         "UMask": "0x1"
     },
     {
-        "BriefDescription": "Counts all prefetch (that bring data to LLC only) RFOs  OCR.PF_L3_RFO.L3_MISS_LOCAL_DRAM.ANY_SNOOP",
+        "BriefDescription": "OCR.ALL_PF_DATA_RD.L3_MISS_REMOTE_HOP1_DRAM.HIT_OTHER_CORE_NO_FWD  OCR.ALL_PF_DATA_RD.L3_MISS_REMOTE_HOP1_DRAM.HIT_OTHER_CORE_NO_FWD",
         "Counter": "0,1,2,3",
         "CounterHTOff": "0,1,2,3",
         "EventCode": "0xB7, 0xBB",
-        "EventName": "OCR.PF_L3_RFO.L3_MISS_LOCAL_DRAM.ANY_SNOOP",
+        "EventName": "OCR.ALL_PF_DATA_RD.L3_MISS_REMOTE_HOP1_DRAM.HIT_OTHER_CORE_NO_FWD",
         "MSRIndex": "0x1a6,0x1a7",
-        "MSRValue": "0x3F84000100",
+        "MSRValue": "0x0410000490",
         "Offcore": "1",
         "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
         "SampleAfterValue": "100003",
         "UMask": "0x1"
     },
     {
-        "BriefDescription": "Number of times a HLE transactional region aborted due to a non XRELEASE prefixed instruction writing to an elided lock in the elision buffer",
-        "Counter": "0,1,2,3",
-        "CounterHTOff": "0,1,2,3,4,5,6,7",
-        "EventCode": "0x54",
-        "EventName": "TX_MEM.ABORT_HLE_STORE_TO_ELIDED_LOCK",
-        "PublicDescription": "Number of times a TSX Abort was triggered due to a non-release/commit store to lock.",
-        "SampleAfterValue": "2000003",
-        "UMask": "0x4"
-    },
-    {
-        "BriefDescription": "This event is deprecated. Refer to new event OCR.DEMAND_CODE_RD.L3_MISS_LOCAL_DRAM.SNOOP_MISS_OR_NO_FWD",
+        "BriefDescription": "OCR.ALL_PF_DATA_RD.L3_MISS_REMOTE_HOP1_DRAM.NO_SNOOP_NEEDED  OCR.ALL_PF_DATA_RD.L3_MISS_REMOTE_HOP1_DRAM.NO_SNOOP_NEEDED",
         "Counter": "0,1,2,3",
         "CounterHTOff": "0,1,2,3",
-        "Deprecated": "1",
         "EventCode": "0xB7, 0xBB",
-        "EventName": "OFFCORE_RESPONSE.DEMAND_CODE_RD.L3_MISS_LOCAL_DRAM.SNOOP_MISS_OR_NO_FWD",
+        "EventName": "OCR.ALL_PF_DATA_RD.L3_MISS_REMOTE_HOP1_DRAM.NO_SNOOP_NEEDED",
         "MSRIndex": "0x1a6,0x1a7",
-        "MSRValue": "0x0604000004",
+        "MSRValue": "0x0110000490",
         "Offcore": "1",
         "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
         "SampleAfterValue": "100003",
         "UMask": "0x1"
     },
     {
-        "BriefDescription": "This event is deprecated. Refer to new event OCR.PF_L2_RFO.L3_MISS.SNOOP_MISS",
+        "BriefDescription": "OCR.ALL_PF_DATA_RD.L3_MISS_REMOTE_HOP1_DRAM.SNOOP_MISS",
         "Counter": "0,1,2,3",
         "CounterHTOff": "0,1,2,3",
-        "Deprecated": "1",
         "EventCode": "0xB7, 0xBB",
-        "EventName": "OFFCORE_RESPONSE.PF_L2_RFO.L3_MISS.SNOOP_MISS",
+        "EventName": "OCR.ALL_PF_DATA_RD.L3_MISS_REMOTE_HOP1_DRAM.SNOOP_MISS",
         "MSRIndex": "0x1a6,0x1a7",
-        "MSRValue": "0x023C000020",
+        "MSRValue": "0x0210000490",
         "Offcore": "1",
         "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
         "SampleAfterValue": "100003",
         "UMask": "0x1"
     },
     {
-        "BriefDescription": "Number of times a transactional abort was signaled due to a data conflict on a transactionally accessed address",
-        "Counter": "0,1,2,3",
-        "CounterHTOff": "0,1,2,3,4,5,6,7",
-        "EventCode": "0x54",
-        "EventName": "TX_MEM.ABORT_CONFLICT",
-        "PublicDescription": "Number of times a TSX line had a cache conflict.",
-        "SampleAfterValue": "2000003",
-        "UMask": "0x1"
-    },
-    {
-        "BriefDescription": "Counts L1 data cache hardware prefetch requests and software prefetch requests",
+        "BriefDescription": "OCR.ALL_PF_DATA_RD.L3_MISS_REMOTE_HOP1_DRAM.SNOOP_NONE",
         "Counter": "0,1,2,3",
         "CounterHTOff": "0,1,2,3",
         "EventCode": "0xB7, 0xBB",
-        "EventName": "OCR.PF_L1D_AND_SW.L3_MISS_REMOTE_HOP1_DRAM.SNOOP_MISS",
+        "EventName": "OCR.ALL_PF_DATA_RD.L3_MISS_REMOTE_HOP1_DRAM.SNOOP_NONE",
         "MSRIndex": "0x1a6,0x1a7",
-        "MSRValue": "0x0210000400",
+        "MSRValue": "0x0090000490",
         "Offcore": "1",
         "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
         "SampleAfterValue": "100003",
         "UMask": "0x1"
     },
     {
-        "BriefDescription": "OCR.ALL_PF_RFO.L3_MISS_LOCAL_DRAM.HIT_OTHER_CORE_FWD  OCR.ALL_PF_RFO.L3_MISS_LOCAL_DRAM.HIT_OTHER_CORE_FWD",
+        "BriefDescription": "OCR.ALL_PF_RFO.L3_MISS.ANY_SNOOP OCR.ALL_PF_RFO.L3_MISS.ANY_SNOOP OCR.ALL_PF_RFO.L3_MISS.ANY_SNOOP",
         "Counter": "0,1,2,3",
         "CounterHTOff": "0,1,2,3",
         "EventCode": "0xB7, 0xBB",
-        "EventName": "OCR.ALL_PF_RFO.L3_MISS_LOCAL_DRAM.HIT_OTHER_CORE_FWD",
+        "EventName": "OCR.ALL_PF_RFO.L3_MISS.ANY_SNOOP",
         "MSRIndex": "0x1a6,0x1a7",
-        "MSRValue": "0x0804000120",
+        "MSRValue": "0x3FBC000120",
         "Offcore": "1",
         "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
         "SampleAfterValue": "100003",
         "UMask": "0x1"
     },
     {
-        "BriefDescription": "Counts all demand data writes (RFOs)  OCR.DEMAND_RFO.L3_MISS_REMOTE_HOP1_DRAM.HIT_OTHER_CORE_NO_FWD",
+        "BriefDescription": "OCR.ALL_PF_RFO.L3_MISS.HITM_OTHER_CORE OCR.ALL_PF_RFO.L3_MISS.HITM_OTHER_CORE OCR.ALL_PF_RFO.L3_MISS.HITM_OTHER_CORE",
         "Counter": "0,1,2,3",
         "CounterHTOff": "0,1,2,3",
         "EventCode": "0xB7, 0xBB",
-        "EventName": "OCR.DEMAND_RFO.L3_MISS_REMOTE_HOP1_DRAM.HIT_OTHER_CORE_NO_FWD",
+        "EventName": "OCR.ALL_PF_RFO.L3_MISS.HITM_OTHER_CORE",
         "MSRIndex": "0x1a6,0x1a7",
-        "MSRValue": "0x0410000002",
+        "MSRValue": "0x103C000120",
         "Offcore": "1",
         "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
         "SampleAfterValue": "100003",
         "UMask": "0x1"
     },
     {
-        "BriefDescription": "This event is deprecated. Refer to new event OCR.DEMAND_CODE_RD.L3_MISS_LOCAL_DRAM.NO_SNOOP_NEEDED",
+        "BriefDescription": "OCR.ALL_PF_RFO.L3_MISS.HIT_OTHER_CORE_FWD OCR.ALL_PF_RFO.L3_MISS.HIT_OTHER_CORE_FWD OCR.ALL_PF_RFO.L3_MISS.HIT_OTHER_CORE_FWD",
         "Counter": "0,1,2,3",
         "CounterHTOff": "0,1,2,3",
-        "Deprecated": "1",
         "EventCode": "0xB7, 0xBB",
-        "EventName": "OFFCORE_RESPONSE.DEMAND_CODE_RD.L3_MISS_LOCAL_DRAM.NO_SNOOP_NEEDED",
+        "EventName": "OCR.ALL_PF_RFO.L3_MISS.HIT_OTHER_CORE_FWD",
         "MSRIndex": "0x1a6,0x1a7",
-        "MSRValue": "0x0104000004",
+        "MSRValue": "0x083C000120",
         "Offcore": "1",
         "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
         "SampleAfterValue": "100003",
         "UMask": "0x1"
     },
     {
-        "BriefDescription": "This event is deprecated. Refer to new event OCR.PF_L3_RFO.L3_MISS_REMOTE_DRAM.SNOOP_MISS_OR_NO_FWD",
+        "BriefDescription": "OCR.ALL_PF_RFO.L3_MISS.HIT_OTHER_CORE_NO_FWD OCR.ALL_PF_RFO.L3_MISS.HIT_OTHER_CORE_NO_FWD OCR.ALL_PF_RFO.L3_MISS.HIT_OTHER_CORE_NO_FWD",
         "Counter": "0,1,2,3",
         "CounterHTOff": "0,1,2,3",
-        "Deprecated": "1",
         "EventCode": "0xB7, 0xBB",
-        "EventName": "OFFCORE_RESPONSE.PF_L3_RFO.L3_MISS_REMOTE_DRAM.SNOOP_MISS_OR_NO_FWD",
+        "EventName": "OCR.ALL_PF_RFO.L3_MISS.HIT_OTHER_CORE_NO_FWD",
         "MSRIndex": "0x1a6,0x1a7",
-        "MSRValue": "0x063B800100",
+        "MSRValue": "0x043C000120",
         "Offcore": "1",
         "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
         "SampleAfterValue": "100003",
         "UMask": "0x1"
     },
     {
-        "BriefDescription": "This event is deprecated. Refer to new event OCR.ALL_PF_RFO.L3_MISS_LOCAL_DRAM.HIT_OTHER_CORE_FWD",
+        "BriefDescription": "OCR.ALL_PF_RFO.L3_MISS.NO_SNOOP_NEEDED OCR.ALL_PF_RFO.L3_MISS.NO_SNOOP_NEEDED OCR.ALL_PF_RFO.L3_MISS.NO_SNOOP_NEEDED",
         "Counter": "0,1,2,3",
         "CounterHTOff": "0,1,2,3",
-        "Deprecated": "1",
         "EventCode": "0xB7, 0xBB",
-        "EventName": "OFFCORE_RESPONSE.ALL_PF_RFO.L3_MISS_LOCAL_DRAM.HIT_OTHER_CORE_FWD",
+        "EventName": "OCR.ALL_PF_RFO.L3_MISS.NO_SNOOP_NEEDED",
         "MSRIndex": "0x1a6,0x1a7",
-        "MSRValue": "0x0804000120",
+        "MSRValue": "0x013C000120",
         "Offcore": "1",
         "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
         "SampleAfterValue": "100003",
         "UMask": "0x1"
     },
     {
-        "BriefDescription": "OCR.ALL_RFO.L3_MISS.HIT_OTHER_CORE_FWD OCR.ALL_RFO.L3_MISS.HIT_OTHER_CORE_FWD OCR.ALL_RFO.L3_MISS.HIT_OTHER_CORE_FWD",
+        "BriefDescription": "OCR.ALL_PF_RFO.L3_MISS.REMOTE_HITM OCR.ALL_PF_RFO.L3_MISS.REMOTE_HITM",
         "Counter": "0,1,2,3",
         "CounterHTOff": "0,1,2,3",
         "EventCode": "0xB7, 0xBB",
-        "EventName": "OCR.ALL_RFO.L3_MISS.HIT_OTHER_CORE_FWD",
+        "EventName": "OCR.ALL_PF_RFO.L3_MISS.REMOTE_HITM",
         "MSRIndex": "0x1a6,0x1a7",
-        "MSRValue": "0x083C000122",
+        "MSRValue": "0x103FC00120",
         "Offcore": "1",
         "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
         "SampleAfterValue": "100003",
         "UMask": "0x1"
     },
     {
-        "BriefDescription": "This event is deprecated. Refer to new event OCR.ALL_RFO.L3_MISS_REMOTE_HOP1_DRAM.HIT_OTHER_CORE_FWD",
+        "BriefDescription": "OCR.ALL_PF_RFO.L3_MISS.REMOTE_HIT_FORWARD OCR.ALL_PF_RFO.L3_MISS.REMOTE_HIT_FORWARD",
         "Counter": "0,1,2,3",
         "CounterHTOff": "0,1,2,3",
-        "Deprecated": "1",
         "EventCode": "0xB7, 0xBB",
-        "EventName": "OFFCORE_RESPONSE.ALL_RFO.L3_MISS_REMOTE_HOP1_DRAM.HIT_OTHER_CORE_FWD",
+        "EventName": "OCR.ALL_PF_RFO.L3_MISS.REMOTE_HIT_FORWARD",
         "MSRIndex": "0x1a6,0x1a7",
-        "MSRValue": "0x0810000122",
+        "MSRValue": "0x083FC00120",
         "Offcore": "1",
         "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
         "SampleAfterValue": "100003",
         "UMask": "0x1"
     },
     {
-        "BriefDescription": "Counts any other requests  OCR.OTHER.L3_MISS_LOCAL_DRAM.NO_SNOOP_NEEDED",
+        "BriefDescription": "OCR.ALL_PF_RFO.L3_MISS.SNOOP_MISS OCR.ALL_PF_RFO.L3_MISS.SNOOP_MISS",
         "Counter": "0,1,2,3",
         "CounterHTOff": "0,1,2,3",
         "EventCode": "0xB7, 0xBB",
-        "EventName": "OCR.OTHER.L3_MISS_LOCAL_DRAM.NO_SNOOP_NEEDED",
+        "EventName": "OCR.ALL_PF_RFO.L3_MISS.SNOOP_MISS",
         "MSRIndex": "0x1a6,0x1a7",
-        "MSRValue": "0x0104008000",
+        "MSRValue": "0x023C000120",
         "Offcore": "1",
         "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
         "SampleAfterValue": "100003",
         "UMask": "0x1"
     },
     {
-        "BriefDescription": "Counts the number of times an HLE XACQUIRE instruction was executed inside an RTM transactional region",
-        "Counter": "0,1,2,3",
-        "CounterHTOff": "0,1,2,3,4,5,6,7",
-        "EventCode": "0x5d",
-        "EventName": "TX_EXEC.MISC5",
-        "PublicDescription": "Counts the number of times an HLE XACQUIRE instruction was executed inside an RTM transactional region.",
-        "SampleAfterValue": "2000003",
-        "UMask": "0x10"
-    },
-    {
-        "BriefDescription": "Counts the number of times a XBEGIN instruction was executed inside an HLE transactional region.",
-        "Counter": "0,1,2,3",
-        "CounterHTOff": "0,1,2,3,4,5,6,7",
-        "EventCode": "0x5d",
-        "EventName": "TX_EXEC.MISC4",
-        "PublicDescription": "RTM region detected inside HLE.",
-        "SampleAfterValue": "2000003",
-        "UMask": "0x8"
-    },
-    {
-        "BriefDescription": "Counts the number of times an instruction execution caused the transactional nest count supported to be exceeded",
-        "Counter": "0,1,2,3",
-        "CounterHTOff": "0,1,2,3,4,5,6,7",
-        "EventCode": "0x5d",
-        "EventName": "TX_EXEC.MISC3",
-        "PublicDescription": "Unfriendly TSX abort triggered by a nest count that is too deep.",
-        "SampleAfterValue": "2000003",
-        "UMask": "0x4"
-    },
-    {
-        "BriefDescription": "Counts the number of times a class of instructions (e.g., vzeroupper) that may cause a transactional abort was executed inside a transactional region",
-        "Counter": "0,1,2,3",
-        "CounterHTOff": "0,1,2,3,4,5,6,7",
-        "EventCode": "0x5d",
-        "EventName": "TX_EXEC.MISC2",
-        "PublicDescription": "Unfriendly TSX abort triggered by a vzeroupper instruction.",
-        "SampleAfterValue": "2000003",
-        "UMask": "0x2"
-    },
-    {
-        "BriefDescription": "Counts the number of times a class of instructions that may cause a transactional abort was executed. Since this is the count of execution, it may not always cause a transactional abort.",
-        "Counter": "0,1,2,3",
-        "CounterHTOff": "0,1,2,3,4,5,6,7",
-        "EventCode": "0x5d",
-        "EventName": "TX_EXEC.MISC1",
-        "SampleAfterValue": "2000003",
-        "UMask": "0x1"
-    },
-    {
-        "BriefDescription": "This event is deprecated. Refer to new event OCR.ALL_RFO.L3_MISS_LOCAL_DRAM.NO_SNOOP_NEEDED",
+        "BriefDescription": "OCR.ALL_PF_RFO.L3_MISS.SNOOP_NONE OCR.ALL_PF_RFO.L3_MISS.SNOOP_NONE",
         "Counter": "0,1,2,3",
         "CounterHTOff": "0,1,2,3",
-        "Deprecated": "1",
         "EventCode": "0xB7, 0xBB",
-        "EventName": "OFFCORE_RESPONSE.ALL_RFO.L3_MISS_LOCAL_DRAM.NO_SNOOP_NEEDED",
+        "EventName": "OCR.ALL_PF_RFO.L3_MISS.SNOOP_NONE",
         "MSRIndex": "0x1a6,0x1a7",
-        "MSRValue": "0x0104000122",
+        "MSRValue": "0x00BC000120",
         "Offcore": "1",
         "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
         "SampleAfterValue": "100003",
         "UMask": "0x1"
     },
     {
-        "BriefDescription": "Counts all demand data writes (RFOs)  OCR.DEMAND_RFO.L3_MISS_REMOTE_HOP1_DRAM.HITM_OTHER_CORE",
+        "BriefDescription": "OCR.ALL_PF_RFO.L3_MISS_LOCAL_DRAM.ANY_SNOOP  OCR.ALL_PF_RFO.L3_MISS_LOCAL_DRAM.ANY_SNOOP",
         "Counter": "0,1,2,3",
         "CounterHTOff": "0,1,2,3",
         "EventCode": "0xB7, 0xBB",
-        "EventName": "OCR.DEMAND_RFO.L3_MISS_REMOTE_HOP1_DRAM.HITM_OTHER_CORE",
+        "EventName": "OCR.ALL_PF_RFO.L3_MISS_LOCAL_DRAM.ANY_SNOOP",
         "MSRIndex": "0x1a6,0x1a7",
-        "MSRValue": "0x1010000002",
+        "MSRValue": "0x3F84000120",
         "Offcore": "1",
         "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
         "SampleAfterValue": "100003",
         "UMask": "0x1"
     },
     {
-        "BriefDescription": "This event is deprecated. Refer to new event OCR.ALL_PF_DATA_RD.L3_MISS.REMOTE_HIT_FORWARD",
+        "BriefDescription": "OCR.ALL_PF_RFO.L3_MISS_LOCAL_DRAM.HITM_OTHER_CORE  OCR.ALL_PF_RFO.L3_MISS_LOCAL_DRAM.HITM_OTHER_CORE",
         "Counter": "0,1,2,3",
         "CounterHTOff": "0,1,2,3",
-        "Deprecated": "1",
         "EventCode": "0xB7, 0xBB",
-        "EventName": "OFFCORE_RESPONSE.ALL_PF_DATA_RD.L3_MISS.REMOTE_HIT_FORWARD",
+        "EventName": "OCR.ALL_PF_RFO.L3_MISS_LOCAL_DRAM.HITM_OTHER_CORE",
         "MSRIndex": "0x1a6,0x1a7",
-        "MSRValue": "0x083FC00490",
+        "MSRValue": "0x1004000120",
         "Offcore": "1",
         "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
         "SampleAfterValue": "100003",
         "UMask": "0x1"
     },
     {
-        "BriefDescription": "Counts prefetch (that bring data to L2) data reads OCR.PF_L2_DATA_RD.L3_MISS_REMOTE_DRAM.SNOOP_MISS_OR_NO_FWD",
+        "BriefDescription": "OCR.ALL_PF_RFO.L3_MISS_LOCAL_DRAM.HIT_OTHER_CORE_FWD  OCR.ALL_PF_RFO.L3_MISS_LOCAL_DRAM.HIT_OTHER_CORE_FWD",
         "Counter": "0,1,2,3",
         "CounterHTOff": "0,1,2,3",
         "EventCode": "0xB7, 0xBB",
-        "EventName": "OCR.PF_L2_DATA_RD.L3_MISS_REMOTE_DRAM.SNOOP_MISS_OR_NO_FWD",
+        "EventName": "OCR.ALL_PF_RFO.L3_MISS_LOCAL_DRAM.HIT_OTHER_CORE_FWD",
         "MSRIndex": "0x1a6,0x1a7",
-        "MSRValue": "0x063B800010",
+        "MSRValue": "0x0804000120",
         "Offcore": "1",
         "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
         "SampleAfterValue": "100003",
         "UMask": "0x1"
     },
     {
-        "BriefDescription": "This event is deprecated. Refer to new event OCR.PF_L1D_AND_SW.L3_MISS_REMOTE_HOP1_DRAM.SNOOP_NONE",
+        "BriefDescription": "OCR.ALL_PF_RFO.L3_MISS_LOCAL_DRAM.HIT_OTHER_CORE_NO_FWD  OCR.ALL_PF_RFO.L3_MISS_LOCAL_DRAM.HIT_OTHER_CORE_NO_FWD",
         "Counter": "0,1,2,3",
         "CounterHTOff": "0,1,2,3",
-        "Deprecated": "1",
         "EventCode": "0xB7, 0xBB",
-        "EventName": "OFFCORE_RESPONSE.PF_L1D_AND_SW.L3_MISS_REMOTE_HOP1_DRAM.SNOOP_NONE",
+        "EventName": "OCR.ALL_PF_RFO.L3_MISS_LOCAL_DRAM.HIT_OTHER_CORE_NO_FWD",
         "MSRIndex": "0x1a6,0x1a7",
-        "MSRValue": "0x0090000400",
+        "MSRValue": "0x0404000120",
         "Offcore": "1",
         "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
         "SampleAfterValue": "100003",
         "UMask": "0x1"
     },
     {
-        "BriefDescription": "This event is deprecated. Refer to new event OCR.PF_L1D_AND_SW.L3_MISS.HIT_OTHER_CORE_NO_FWD",
+        "BriefDescription": "OCR.ALL_PF_RFO.L3_MISS_LOCAL_DRAM.NO_SNOOP_NEEDED  OCR.ALL_PF_RFO.L3_MISS_LOCAL_DRAM.NO_SNOOP_NEEDED",
         "Counter": "0,1,2,3",
         "CounterHTOff": "0,1,2,3",
-        "Deprecated": "1",
         "EventCode": "0xB7, 0xBB",
-        "EventName": "OFFCORE_RESPONSE.PF_L1D_AND_SW.L3_MISS.HIT_OTHER_CORE_NO_FWD",
+        "EventName": "OCR.ALL_PF_RFO.L3_MISS_LOCAL_DRAM.NO_SNOOP_NEEDED",
         "MSRIndex": "0x1a6,0x1a7",
-        "MSRValue": "0x043C000400",
+        "MSRValue": "0x0104000120",
         "Offcore": "1",
         "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
         "SampleAfterValue": "100003",
         "UMask": "0x1"
     },
     {
-        "BriefDescription": "This event is deprecated. Refer to new event OCR.DEMAND_DATA_RD.L3_MISS_LOCAL_DRAM.SNOOP_MISS",
+        "BriefDescription": "OCR.ALL_PF_RFO.L3_MISS_LOCAL_DRAM.SNOOP_MISS",
         "Counter": "0,1,2,3",
         "CounterHTOff": "0,1,2,3",
-        "Deprecated": "1",
         "EventCode": "0xB7, 0xBB",
-        "EventName": "OFFCORE_RESPONSE.DEMAND_DATA_RD.L3_MISS_LOCAL_DRAM.SNOOP_MISS",
+        "EventName": "OCR.ALL_PF_RFO.L3_MISS_LOCAL_DRAM.SNOOP_MISS",
         "MSRIndex": "0x1a6,0x1a7",
-        "MSRValue": "0x0204000001",
+        "MSRValue": "0x0204000120",
         "Offcore": "1",
         "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
         "SampleAfterValue": "100003",
         "UMask": "0x1"
     },
     {
-        "BriefDescription": "Counts prefetch (that bring data to L2) data reads",
+        "BriefDescription": "OCR.ALL_PF_RFO.L3_MISS_LOCAL_DRAM.SNOOP_MISS_OR_NO_FWD OCR.ALL_PF_RFO.L3_MISS_LOCAL_DRAM.SNOOP_MISS_OR_NO_FWD",
         "Counter": "0,1,2,3",
         "CounterHTOff": "0,1,2,3",
         "EventCode": "0xB7, 0xBB",
-        "EventName": "OCR.PF_L2_DATA_RD.L3_MISS_LOCAL_DRAM.SNOOP_NONE",
+        "EventName": "OCR.ALL_PF_RFO.L3_MISS_LOCAL_DRAM.SNOOP_MISS_OR_NO_FWD",
         "MSRIndex": "0x1a6,0x1a7",
-        "MSRValue": "0x0084000010",
+        "MSRValue": "0x0604000120",
         "Offcore": "1",
         "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
         "SampleAfterValue": "100003",
         "UMask": "0x1"
     },
     {
-        "BriefDescription": "OCR.ALL_PF_DATA_RD.L3_MISS.REMOTE_HIT_FORWARD OCR.ALL_PF_DATA_RD.L3_MISS.REMOTE_HIT_FORWARD",
+        "BriefDescription": "OCR.ALL_PF_RFO.L3_MISS_LOCAL_DRAM.SNOOP_NONE",
         "Counter": "0,1,2,3",
         "CounterHTOff": "0,1,2,3",
         "EventCode": "0xB7, 0xBB",
-        "EventName": "OCR.ALL_PF_DATA_RD.L3_MISS.REMOTE_HIT_FORWARD",
+        "EventName": "OCR.ALL_PF_RFO.L3_MISS_LOCAL_DRAM.SNOOP_NONE",
         "MSRIndex": "0x1a6,0x1a7",
-        "MSRValue": "0x083FC00490",
+        "MSRValue": "0x0084000120",
         "Offcore": "1",
         "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
         "SampleAfterValue": "100003",
         "UMask": "0x1"
     },
     {
-        "BriefDescription": "OCR.ALL_RFO.L3_MISS_REMOTE_DRAM.SNOOP_MISS_OR_NO_FWD OCR.ALL_RFO.L3_MISS_REMOTE_DRAM.SNOOP_MISS_OR_NO_FWD",
+        "BriefDescription": "OCR.ALL_PF_RFO.L3_MISS_REMOTE_DRAM.SNOOP_MISS_OR_NO_FWD OCR.ALL_PF_RFO.L3_MISS_REMOTE_DRAM.SNOOP_MISS_OR_NO_FWD",
         "Counter": "0,1,2,3",
         "CounterHTOff": "0,1,2,3",
         "EventCode": "0xB7, 0xBB",
-        "EventName": "OCR.ALL_RFO.L3_MISS_REMOTE_DRAM.SNOOP_MISS_OR_NO_FWD",
+        "EventName": "OCR.ALL_PF_RFO.L3_MISS_REMOTE_DRAM.SNOOP_MISS_OR_NO_FWD",
         "MSRIndex": "0x1a6,0x1a7",
-        "MSRValue": "0x063B800122",
+        "MSRValue": "0x063B800120",
         "Offcore": "1",
         "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
         "SampleAfterValue": "100003",
         "UMask": "0x1"
     },
     {
-        "BriefDescription": "Counts L1 data cache hardware prefetch requests and software prefetch requests  OCR.PF_L1D_AND_SW.L3_MISS_LOCAL_DRAM.HIT_OTHER_CORE_NO_FWD",
+        "BriefDescription": "OCR.ALL_PF_RFO.L3_MISS_REMOTE_HOP1_DRAM.ANY_SNOOP  OCR.ALL_PF_RFO.L3_MISS_REMOTE_HOP1_DRAM.ANY_SNOOP",
         "Counter": "0,1,2,3",
         "CounterHTOff": "0,1,2,3",
         "EventCode": "0xB7, 0xBB",
-        "EventName": "OCR.PF_L1D_AND_SW.L3_MISS_LOCAL_DRAM.HIT_OTHER_CORE_NO_FWD",
+        "EventName": "OCR.ALL_PF_RFO.L3_MISS_REMOTE_HOP1_DRAM.ANY_SNOOP",
         "MSRIndex": "0x1a6,0x1a7",
-        "MSRValue": "0x0404000400",
+        "MSRValue": "0x3F90000120",
         "Offcore": "1",
         "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
         "SampleAfterValue": "100003",
         "UMask": "0x1"
     },
     {
-        "BriefDescription": "Counts all prefetch (that bring data to L2) RFOs OCR.PF_L2_RFO.L3_MISS.HIT_OTHER_CORE_NO_FWD OCR.PF_L2_RFO.L3_MISS.HIT_OTHER_CORE_NO_FWD",
+        "BriefDescription": "OCR.ALL_PF_RFO.L3_MISS_REMOTE_HOP1_DRAM.HITM_OTHER_CORE  OCR.ALL_PF_RFO.L3_MISS_REMOTE_HOP1_DRAM.HITM_OTHER_CORE",
         "Counter": "0,1,2,3",
         "CounterHTOff": "0,1,2,3",
         "EventCode": "0xB7, 0xBB",
-        "EventName": "OCR.PF_L2_RFO.L3_MISS.HIT_OTHER_CORE_NO_FWD",
+        "EventName": "OCR.ALL_PF_RFO.L3_MISS_REMOTE_HOP1_DRAM.HITM_OTHER_CORE",
         "MSRIndex": "0x1a6,0x1a7",
-        "MSRValue": "0x043C000020",
+        "MSRValue": "0x1010000120",
         "Offcore": "1",
         "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
         "SampleAfterValue": "100003",
         "UMask": "0x1"
     },
     {
-        "BriefDescription": "Counts L1 data cache hardware prefetch requests and software prefetch requests OCR.PF_L1D_AND_SW.L3_MISS.SNOOP_MISS",
+        "BriefDescription": "OCR.ALL_PF_RFO.L3_MISS_REMOTE_HOP1_DRAM.HIT_OTHER_CORE_FWD  OCR.ALL_PF_RFO.L3_MISS_REMOTE_HOP1_DRAM.HIT_OTHER_CORE_FWD",
         "Counter": "0,1,2,3",
         "CounterHTOff": "0,1,2,3",
         "EventCode": "0xB7, 0xBB",
-        "EventName": "OCR.PF_L1D_AND_SW.L3_MISS.SNOOP_MISS",
+        "EventName": "OCR.ALL_PF_RFO.L3_MISS_REMOTE_HOP1_DRAM.HIT_OTHER_CORE_FWD",
         "MSRIndex": "0x1a6,0x1a7",
-        "MSRValue": "0x023C000400",
+        "MSRValue": "0x0810000120",
         "Offcore": "1",
         "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
         "SampleAfterValue": "100003",
         "UMask": "0x1"
     },
     {
-        "BriefDescription": "This event is deprecated. Refer to new event OCR.ALL_READS.L3_MISS_REMOTE_DRAM.SNOOP_MISS_OR_NO_FWD",
+        "BriefDescription": "OCR.ALL_PF_RFO.L3_MISS_REMOTE_HOP1_DRAM.HIT_OTHER_CORE_NO_FWD  OCR.ALL_PF_RFO.L3_MISS_REMOTE_HOP1_DRAM.HIT_OTHER_CORE_NO_FWD",
         "Counter": "0,1,2,3",
         "CounterHTOff": "0,1,2,3",
-        "Deprecated": "1",
         "EventCode": "0xB7, 0xBB",
-        "EventName": "OFFCORE_RESPONSE.ALL_READS.L3_MISS_REMOTE_DRAM.SNOOP_MISS_OR_NO_FWD",
+        "EventName": "OCR.ALL_PF_RFO.L3_MISS_REMOTE_HOP1_DRAM.HIT_OTHER_CORE_NO_FWD",
         "MSRIndex": "0x1a6,0x1a7",
-        "MSRValue": "0x063B8007F7",
+        "MSRValue": "0x0410000120",
         "Offcore": "1",
         "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
         "SampleAfterValue": "100003",
         "UMask": "0x1"
     },
     {
-        "BriefDescription": "This event is deprecated. Refer to new event OCR.ALL_DATA_RD.L3_MISS.NO_SNOOP_NEEDED",
+        "BriefDescription": "OCR.ALL_PF_RFO.L3_MISS_REMOTE_HOP1_DRAM.NO_SNOOP_NEEDED  OCR.ALL_PF_RFO.L3_MISS_REMOTE_HOP1_DRAM.NO_SNOOP_NEEDED",
         "Counter": "0,1,2,3",
         "CounterHTOff": "0,1,2,3",
-        "Deprecated": "1",
         "EventCode": "0xB7, 0xBB",
-        "EventName": "OFFCORE_RESPONSE.ALL_DATA_RD.L3_MISS.NO_SNOOP_NEEDED",
+        "EventName": "OCR.ALL_PF_RFO.L3_MISS_REMOTE_HOP1_DRAM.NO_SNOOP_NEEDED",
         "MSRIndex": "0x1a6,0x1a7",
-        "MSRValue": "0x013C000491",
+        "MSRValue": "0x0110000120",
         "Offcore": "1",
         "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
         "SampleAfterValue": "100003",
         "UMask": "0x1"
     },
     {
-        "BriefDescription": "Counts demand data reads",
+        "BriefDescription": "OCR.ALL_PF_RFO.L3_MISS_REMOTE_HOP1_DRAM.SNOOP_MISS",
         "Counter": "0,1,2,3",
         "CounterHTOff": "0,1,2,3",
         "EventCode": "0xB7, 0xBB",
-        "EventName": "OCR.DEMAND_DATA_RD.L3_MISS_LOCAL_DRAM.SNOOP_NONE",
+        "EventName": "OCR.ALL_PF_RFO.L3_MISS_REMOTE_HOP1_DRAM.SNOOP_MISS",
         "MSRIndex": "0x1a6,0x1a7",
-        "MSRValue": "0x0084000001",
+        "MSRValue": "0x0210000120",
         "Offcore": "1",
         "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
         "SampleAfterValue": "100003",
         "UMask": "0x1"
     },
     {
-        "BriefDescription": "This event is deprecated. Refer to new event OCR.DEMAND_RFO.L3_MISS_REMOTE_HOP1_DRAM.HITM_OTHER_CORE",
+        "BriefDescription": "OCR.ALL_PF_RFO.L3_MISS_REMOTE_HOP1_DRAM.SNOOP_NONE",
         "Counter": "0,1,2,3",
         "CounterHTOff": "0,1,2,3",
-        "Deprecated": "1",
         "EventCode": "0xB7, 0xBB",
-        "EventName": "OFFCORE_RESPONSE.DEMAND_RFO.L3_MISS_REMOTE_HOP1_DRAM.HITM_OTHER_CORE",
+        "EventName": "OCR.ALL_PF_RFO.L3_MISS_REMOTE_HOP1_DRAM.SNOOP_NONE",
         "MSRIndex": "0x1a6,0x1a7",
-        "MSRValue": "0x1010000002",
+        "MSRValue": "0x0090000120",
         "Offcore": "1",
         "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
         "SampleAfterValue": "100003",
         "UMask": "0x1"
     },
     {
-        "BriefDescription": "This event is deprecated. Refer to new event OCR.ALL_READS.L3_MISS_LOCAL_DRAM.SNOOP_NONE",
+        "BriefDescription": "OCR.ALL_READS.L3_MISS.ANY_SNOOP OCR.ALL_READS.L3_MISS.ANY_SNOOP OCR.ALL_READS.L3_MISS.ANY_SNOOP",
         "Counter": "0,1,2,3",
         "CounterHTOff": "0,1,2,3",
-        "Deprecated": "1",
         "EventCode": "0xB7, 0xBB",
-        "EventName": "OFFCORE_RESPONSE.ALL_READS.L3_MISS_LOCAL_DRAM.SNOOP_NONE",
+        "EventName": "OCR.ALL_READS.L3_MISS.ANY_SNOOP",
         "MSRIndex": "0x1a6,0x1a7",
-        "MSRValue": "0x00840007F7",
+        "MSRValue": "0x3FBC0007F7",
         "Offcore": "1",
         "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
         "SampleAfterValue": "100003",
         "UMask": "0x1"
     },
     {
-        "BriefDescription": "This event is deprecated. Refer to new event OCR.ALL_PF_DATA_RD.L3_MISS_REMOTE_HOP1_DRAM.SNOOP_NONE",
+        "BriefDescription": "OCR.ALL_READS.L3_MISS.HITM_OTHER_CORE OCR.ALL_READS.L3_MISS.HITM_OTHER_CORE OCR.ALL_READS.L3_MISS.HITM_OTHER_CORE",
         "Counter": "0,1,2,3",
         "CounterHTOff": "0,1,2,3",
-        "Deprecated": "1",
         "EventCode": "0xB7, 0xBB",
-        "EventName": "OFFCORE_RESPONSE.ALL_PF_DATA_RD.L3_MISS_REMOTE_HOP1_DRAM.SNOOP_NONE",
+        "EventName": "OCR.ALL_READS.L3_MISS.HITM_OTHER_CORE",
         "MSRIndex": "0x1a6,0x1a7",
-        "MSRValue": "0x0090000490",
+        "MSRValue": "0x103C0007F7",
         "Offcore": "1",
         "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
         "SampleAfterValue": "100003",
         "UMask": "0x1"
     },
     {
-        "BriefDescription": "This event is deprecated. Refer to new event OCR.PF_L3_RFO.L3_MISS_REMOTE_HOP1_DRAM.NO_SNOOP_NEEDED",
+        "BriefDescription": "OCR.ALL_READS.L3_MISS.HIT_OTHER_CORE_FWD OCR.ALL_READS.L3_MISS.HIT_OTHER_CORE_FWD OCR.ALL_READS.L3_MISS.HIT_OTHER_CORE_FWD",
         "Counter": "0,1,2,3",
         "CounterHTOff": "0,1,2,3",
-        "Deprecated": "1",
         "EventCode": "0xB7, 0xBB",
-        "EventName": "OFFCORE_RESPONSE.PF_L3_RFO.L3_MISS_REMOTE_HOP1_DRAM.NO_SNOOP_NEEDED",
+        "EventName": "OCR.ALL_READS.L3_MISS.HIT_OTHER_CORE_FWD",
         "MSRIndex": "0x1a6,0x1a7",
-        "MSRValue": "0x0110000100",
+        "MSRValue": "0x083C0007F7",
         "Offcore": "1",
         "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
         "SampleAfterValue": "100003",
         "UMask": "0x1"
     },
     {
-        "BriefDescription": "This event is deprecated. Refer to new event OCR.ALL_READS.L3_MISS_REMOTE_HOP1_DRAM.HITM_OTHER_CORE",
+        "BriefDescription": "OCR.ALL_READS.L3_MISS.HIT_OTHER_CORE_NO_FWD OCR.ALL_READS.L3_MISS.HIT_OTHER_CORE_NO_FWD OCR.ALL_READS.L3_MISS.HIT_OTHER_CORE_NO_FWD",
         "Counter": "0,1,2,3",
         "CounterHTOff": "0,1,2,3",
-        "Deprecated": "1",
         "EventCode": "0xB7, 0xBB",
-        "EventName": "OFFCORE_RESPONSE.ALL_READS.L3_MISS_REMOTE_HOP1_DRAM.HITM_OTHER_CORE",
+        "EventName": "OCR.ALL_READS.L3_MISS.HIT_OTHER_CORE_NO_FWD",
         "MSRIndex": "0x1a6,0x1a7",
-        "MSRValue": "0x10100007F7",
+        "MSRValue": "0x043C0007F7",
         "Offcore": "1",
         "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
         "SampleAfterValue": "100003",
         "UMask": "0x1"
     },
     {
-        "BriefDescription": "This event is deprecated. Refer to new event OCR.DEMAND_CODE_RD.L3_MISS.NO_SNOOP_NEEDED",
+        "BriefDescription": "OCR.ALL_READS.L3_MISS.NO_SNOOP_NEEDED OCR.ALL_READS.L3_MISS.NO_SNOOP_NEEDED OCR.ALL_READS.L3_MISS.NO_SNOOP_NEEDED",
         "Counter": "0,1,2,3",
         "CounterHTOff": "0,1,2,3",
-        "Deprecated": "1",
         "EventCode": "0xB7, 0xBB",
-        "EventName": "OFFCORE_RESPONSE.DEMAND_CODE_RD.L3_MISS.NO_SNOOP_NEEDED",
+        "EventName": "OCR.ALL_READS.L3_MISS.NO_SNOOP_NEEDED",
         "MSRIndex": "0x1a6,0x1a7",
-        "MSRValue": "0x013C000004",
+        "MSRValue": "0x013C0007F7",
         "Offcore": "1",
         "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
         "SampleAfterValue": "100003",
         "UMask": "0x1"
     },
     {
-        "BriefDescription": "Number of times an RTM execution aborted due to uncommon conditions.",
-        "Counter": "0,1,2,3",
-        "CounterHTOff": "0,1,2,3,4,5,6,7",
-        "EventCode": "0xC9",
-        "EventName": "RTM_RETIRED.ABORTED_TIMER",
-        "SampleAfterValue": "2000003",
-        "UMask": "0x10"
-    },
-    {
-        "BriefDescription": "This event is deprecated. Refer to new event OCR.DEMAND_RFO.L3_MISS_LOCAL_DRAM.SNOOP_MISS_OR_NO_FWD",
+        "BriefDescription": "OCR.ALL_READS.L3_MISS.REMOTE_HITM OCR.ALL_READS.L3_MISS.REMOTE_HITM",
         "Counter": "0,1,2,3",
         "CounterHTOff": "0,1,2,3",
-        "Deprecated": "1",
         "EventCode": "0xB7, 0xBB",
-        "EventName": "OFFCORE_RESPONSE.DEMAND_RFO.L3_MISS_LOCAL_DRAM.SNOOP_MISS_OR_NO_FWD",
+        "EventName": "OCR.ALL_READS.L3_MISS.REMOTE_HITM",
         "MSRIndex": "0x1a6,0x1a7",
-        "MSRValue": "0x0604000002",
+        "MSRValue": "0x103FC007F7",
         "Offcore": "1",
         "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
         "SampleAfterValue": "100003",
         "UMask": "0x1"
     },
     {
-        "BriefDescription": "Counts all demand code reads OCR.DEMAND_CODE_RD.L3_MISS.REMOTE_HIT_FORWARD",
+        "BriefDescription": "OCR.ALL_READS.L3_MISS.REMOTE_HIT_FORWARD OCR.ALL_READS.L3_MISS.REMOTE_HIT_FORWARD",
         "Counter": "0,1,2,3",
         "CounterHTOff": "0,1,2,3",
         "EventCode": "0xB7, 0xBB",
-        "EventName": "OCR.DEMAND_CODE_RD.L3_MISS.REMOTE_HIT_FORWARD",
+        "EventName": "OCR.ALL_READS.L3_MISS.REMOTE_HIT_FORWARD",
         "MSRIndex": "0x1a6,0x1a7",
-        "MSRValue": "0x083FC00004",
+        "MSRValue": "0x083FC007F7",
         "Offcore": "1",
         "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
         "SampleAfterValue": "100003",
         "UMask": "0x1"
     },
     {
-        "BriefDescription": "This event is deprecated. Refer to new event OCR.DEMAND_RFO.L3_MISS.HIT_OTHER_CORE_FWD",
+        "BriefDescription": "OCR.ALL_READS.L3_MISS.SNOOP_MISS OCR.ALL_READS.L3_MISS.SNOOP_MISS",
         "Counter": "0,1,2,3",
         "CounterHTOff": "0,1,2,3",
-        "Deprecated": "1",
         "EventCode": "0xB7, 0xBB",
-        "EventName": "OFFCORE_RESPONSE.DEMAND_RFO.L3_MISS.HIT_OTHER_CORE_FWD",
+        "EventName": "OCR.ALL_READS.L3_MISS.SNOOP_MISS",
         "MSRIndex": "0x1a6,0x1a7",
-        "MSRValue": "0x083C000002",
+        "MSRValue": "0x023C0007F7",
         "Offcore": "1",
         "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
         "SampleAfterValue": "100003",
         "UMask": "0x1"
     },
     {
-        "BriefDescription": "OCR.ALL_READS.L3_MISS_REMOTE_HOP1_DRAM.ANY_SNOOP  OCR.ALL_READS.L3_MISS_REMOTE_HOP1_DRAM.ANY_SNOOP",
+        "BriefDescription": "OCR.ALL_READS.L3_MISS.SNOOP_NONE OCR.ALL_READS.L3_MISS.SNOOP_NONE",
         "Counter": "0,1,2,3",
         "CounterHTOff": "0,1,2,3",
         "EventCode": "0xB7, 0xBB",
-        "EventName": "OCR.ALL_READS.L3_MISS_REMOTE_HOP1_DRAM.ANY_SNOOP",
+        "EventName": "OCR.ALL_READS.L3_MISS.SNOOP_NONE",
         "MSRIndex": "0x1a6,0x1a7",
-        "MSRValue": "0x3F900007F7",
+        "MSRValue": "0x00BC0007F7",
         "Offcore": "1",
         "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
         "SampleAfterValue": "100003",
         "UMask": "0x1"
     },
     {
-        "BriefDescription": "Counts all prefetch (that bring data to LLC only) RFOs  OCR.PF_L3_RFO.L3_MISS_LOCAL_DRAM.HIT_OTHER_CORE_FWD",
+        "BriefDescription": "OCR.ALL_READS.L3_MISS_LOCAL_DRAM.ANY_SNOOP  OCR.ALL_READS.L3_MISS_LOCAL_DRAM.ANY_SNOOP",
         "Counter": "0,1,2,3",
         "CounterHTOff": "0,1,2,3",
         "EventCode": "0xB7, 0xBB",
-        "EventName": "OCR.PF_L3_RFO.L3_MISS_LOCAL_DRAM.HIT_OTHER_CORE_FWD",
+        "EventName": "OCR.ALL_READS.L3_MISS_LOCAL_DRAM.ANY_SNOOP",
         "MSRIndex": "0x1a6,0x1a7",
-        "MSRValue": "0x0804000100",
+        "MSRValue": "0x3F840007F7",
         "Offcore": "1",
         "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
         "SampleAfterValue": "100003",
         "UMask": "0x1"
     },
     {
-        "BriefDescription": "This event is deprecated. Refer to new event OCR.PF_L2_DATA_RD.L3_MISS_LOCAL_DRAM.NO_SNOOP_NEEDED",
+        "BriefDescription": "OCR.ALL_READS.L3_MISS_LOCAL_DRAM.HITM_OTHER_CORE  OCR.ALL_READS.L3_MISS_LOCAL_DRAM.HITM_OTHER_CORE",
         "Counter": "0,1,2,3",
         "CounterHTOff": "0,1,2,3",
-        "Deprecated": "1",
         "EventCode": "0xB7, 0xBB",
-        "EventName": "OFFCORE_RESPONSE.PF_L2_DATA_RD.L3_MISS_LOCAL_DRAM.NO_SNOOP_NEEDED",
+        "EventName": "OCR.ALL_READS.L3_MISS_LOCAL_DRAM.HITM_OTHER_CORE",
         "MSRIndex": "0x1a6,0x1a7",
-        "MSRValue": "0x0104000010",
+        "MSRValue": "0x10040007F7",
         "Offcore": "1",
         "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
         "SampleAfterValue": "100003",
         "UMask": "0x1"
     },
     {
-        "BriefDescription": "This event is deprecated. Refer to new event OCR.DEMAND_DATA_RD.L3_MISS_LOCAL_DRAM.ANY_SNOOP",
+        "BriefDescription": "OCR.ALL_READS.L3_MISS_LOCAL_DRAM.HIT_OTHER_CORE_FWD  OCR.ALL_READS.L3_MISS_LOCAL_DRAM.HIT_OTHER_CORE_FWD",
         "Counter": "0,1,2,3",
         "CounterHTOff": "0,1,2,3",
-        "Deprecated": "1",
         "EventCode": "0xB7, 0xBB",
-        "EventName": "OFFCORE_RESPONSE.DEMAND_DATA_RD.L3_MISS_LOCAL_DRAM.ANY_SNOOP",
+        "EventName": "OCR.ALL_READS.L3_MISS_LOCAL_DRAM.HIT_OTHER_CORE_FWD",
         "MSRIndex": "0x1a6,0x1a7",
-        "MSRValue": "0x3F84000001",
+        "MSRValue": "0x08040007F7",
         "Offcore": "1",
         "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
         "SampleAfterValue": "100003",
@@ -1352,360 +1372,338 @@
         "UMask": "0x1"
     },
     {
-        "BriefDescription": "This event is deprecated. Refer to new event OCR.PF_L2_DATA_RD.L3_MISS_REMOTE_HOP1_DRAM.HIT_OTHER_CORE_NO_FWD",
+        "BriefDescription": "OCR.ALL_READS.L3_MISS_LOCAL_DRAM.NO_SNOOP_NEEDED  OCR.ALL_READS.L3_MISS_LOCAL_DRAM.NO_SNOOP_NEEDED",
         "Counter": "0,1,2,3",
         "CounterHTOff": "0,1,2,3",
-        "Deprecated": "1",
         "EventCode": "0xB7, 0xBB",
-        "EventName": "OFFCORE_RESPONSE.PF_L2_DATA_RD.L3_MISS_REMOTE_HOP1_DRAM.HIT_OTHER_CORE_NO_FWD",
+        "EventName": "OCR.ALL_READS.L3_MISS_LOCAL_DRAM.NO_SNOOP_NEEDED",
         "MSRIndex": "0x1a6,0x1a7",
-        "MSRValue": "0x0410000010",
+        "MSRValue": "0x01040007F7",
         "Offcore": "1",
         "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
         "SampleAfterValue": "100003",
         "UMask": "0x1"
     },
     {
-        "BriefDescription": "This event is deprecated. Refer to new event OCR.DEMAND_CODE_RD.L3_MISS_LOCAL_DRAM.SNOOP_MISS",
+        "BriefDescription": "OCR.ALL_READS.L3_MISS_LOCAL_DRAM.SNOOP_MISS",
         "Counter": "0,1,2,3",
         "CounterHTOff": "0,1,2,3",
-        "Deprecated": "1",
         "EventCode": "0xB7, 0xBB",
-        "EventName": "OFFCORE_RESPONSE.DEMAND_CODE_RD.L3_MISS_LOCAL_DRAM.SNOOP_MISS",
+        "EventName": "OCR.ALL_READS.L3_MISS_LOCAL_DRAM.SNOOP_MISS",
         "MSRIndex": "0x1a6,0x1a7",
-        "MSRValue": "0x0204000004",
+        "MSRValue": "0x02040007F7",
         "Offcore": "1",
         "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
         "SampleAfterValue": "100003",
         "UMask": "0x1"
     },
     {
-        "BriefDescription": "Counts prefetch (that bring data to L2) data reads OCR.PF_L2_DATA_RD.L3_MISS.HIT_OTHER_CORE_FWD OCR.PF_L2_DATA_RD.L3_MISS.HIT_OTHER_CORE_FWD",
+        "BriefDescription": "OCR.ALL_READS.L3_MISS_LOCAL_DRAM.SNOOP_MISS_OR_NO_FWD OCR.ALL_READS.L3_MISS_LOCAL_DRAM.SNOOP_MISS_OR_NO_FWD",
         "Counter": "0,1,2,3",
         "CounterHTOff": "0,1,2,3",
         "EventCode": "0xB7, 0xBB",
-        "EventName": "OCR.PF_L2_DATA_RD.L3_MISS.HIT_OTHER_CORE_FWD",
+        "EventName": "OCR.ALL_READS.L3_MISS_LOCAL_DRAM.SNOOP_MISS_OR_NO_FWD",
         "MSRIndex": "0x1a6,0x1a7",
-        "MSRValue": "0x083C000010",
+        "MSRValue": "0x06040007F7",
         "Offcore": "1",
         "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
         "SampleAfterValue": "100003",
         "UMask": "0x1"
     },
     {
-        "BriefDescription": "Counts all demand data writes (RFOs) OCR.DEMAND_RFO.L3_MISS.SNOOP_MISS",
+        "BriefDescription": "OCR.ALL_READS.L3_MISS_LOCAL_DRAM.SNOOP_NONE",
         "Counter": "0,1,2,3",
         "CounterHTOff": "0,1,2,3",
         "EventCode": "0xB7, 0xBB",
-        "EventName": "OCR.DEMAND_RFO.L3_MISS.SNOOP_MISS",
+        "EventName": "OCR.ALL_READS.L3_MISS_LOCAL_DRAM.SNOOP_NONE",
         "MSRIndex": "0x1a6,0x1a7",
-        "MSRValue": "0x023C000002",
+        "MSRValue": "0x00840007F7",
         "Offcore": "1",
         "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
         "SampleAfterValue": "100003",
         "UMask": "0x1"
     },
     {
-        "BriefDescription": "Counts all prefetch (that bring data to LLC only) data reads  OCR.PF_L3_DATA_RD.L3_MISS_LOCAL_DRAM.NO_SNOOP_NEEDED",
+        "BriefDescription": "OCR.ALL_READS.L3_MISS_REMOTE_DRAM.SNOOP_MISS_OR_NO_FWD OCR.ALL_READS.L3_MISS_REMOTE_DRAM.SNOOP_MISS_OR_NO_FWD",
         "Counter": "0,1,2,3",
         "CounterHTOff": "0,1,2,3",
         "EventCode": "0xB7, 0xBB",
-        "EventName": "OCR.PF_L3_DATA_RD.L3_MISS_LOCAL_DRAM.NO_SNOOP_NEEDED",
+        "EventName": "OCR.ALL_READS.L3_MISS_REMOTE_DRAM.SNOOP_MISS_OR_NO_FWD",
         "MSRIndex": "0x1a6,0x1a7",
-        "MSRValue": "0x0104000080",
+        "MSRValue": "0x063B8007F7",
         "Offcore": "1",
         "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
         "SampleAfterValue": "100003",
         "UMask": "0x1"
     },
     {
-        "BriefDescription": "OCR.ALL_PF_DATA_RD.L3_MISS.REMOTE_HITM OCR.ALL_PF_DATA_RD.L3_MISS.REMOTE_HITM",
+        "BriefDescription": "OCR.ALL_READS.L3_MISS_REMOTE_HOP1_DRAM.ANY_SNOOP  OCR.ALL_READS.L3_MISS_REMOTE_HOP1_DRAM.ANY_SNOOP",
         "Counter": "0,1,2,3",
         "CounterHTOff": "0,1,2,3",
         "EventCode": "0xB7, 0xBB",
-        "EventName": "OCR.ALL_PF_DATA_RD.L3_MISS.REMOTE_HITM",
+        "EventName": "OCR.ALL_READS.L3_MISS_REMOTE_HOP1_DRAM.ANY_SNOOP",
         "MSRIndex": "0x1a6,0x1a7",
-        "MSRValue": "0x103FC00490",
+        "MSRValue": "0x3F900007F7",
         "Offcore": "1",
         "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
         "SampleAfterValue": "100003",
         "UMask": "0x1"
     },
     {
-        "BriefDescription": "Counts all demand code reads  OCR.DEMAND_CODE_RD.L3_MISS_REMOTE_HOP1_DRAM.HIT_OTHER_CORE_FWD",
+        "BriefDescription": "OCR.ALL_READS.L3_MISS_REMOTE_HOP1_DRAM.HITM_OTHER_CORE  OCR.ALL_READS.L3_MISS_REMOTE_HOP1_DRAM.HITM_OTHER_CORE",
         "Counter": "0,1,2,3",
         "CounterHTOff": "0,1,2,3",
         "EventCode": "0xB7, 0xBB",
-        "EventName": "OCR.DEMAND_CODE_RD.L3_MISS_REMOTE_HOP1_DRAM.HIT_OTHER_CORE_FWD",
+        "EventName": "OCR.ALL_READS.L3_MISS_REMOTE_HOP1_DRAM.HITM_OTHER_CORE",
         "MSRIndex": "0x1a6,0x1a7",
-        "MSRValue": "0x0810000004",
+        "MSRValue": "0x10100007F7",
         "Offcore": "1",
         "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
         "SampleAfterValue": "100003",
         "UMask": "0x1"
     },
     {
-        "BriefDescription": "Counts all demand code reads OCR.DEMAND_CODE_RD.L3_MISS.SNOOP_MISS",
+        "BriefDescription": "OCR.ALL_READS.L3_MISS_REMOTE_HOP1_DRAM.HIT_OTHER_CORE_FWD  OCR.ALL_READS.L3_MISS_REMOTE_HOP1_DRAM.HIT_OTHER_CORE_FWD",
         "Counter": "0,1,2,3",
         "CounterHTOff": "0,1,2,3",
         "EventCode": "0xB7, 0xBB",
-        "EventName": "OCR.DEMAND_CODE_RD.L3_MISS.SNOOP_MISS",
+        "EventName": "OCR.ALL_READS.L3_MISS_REMOTE_HOP1_DRAM.HIT_OTHER_CORE_FWD",
         "MSRIndex": "0x1a6,0x1a7",
-        "MSRValue": "0x023C000004",
+        "MSRValue": "0x08100007F7",
         "Offcore": "1",
         "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
         "SampleAfterValue": "100003",
         "UMask": "0x1"
     },
     {
-        "BriefDescription": "This event is deprecated. Refer to new event OCR.PF_L3_DATA_RD.L3_MISS.SNOOP_MISS",
+        "BriefDescription": "OCR.ALL_READS.L3_MISS_REMOTE_HOP1_DRAM.HIT_OTHER_CORE_NO_FWD  OCR.ALL_READS.L3_MISS_REMOTE_HOP1_DRAM.HIT_OTHER_CORE_NO_FWD",
         "Counter": "0,1,2,3",
         "CounterHTOff": "0,1,2,3",
-        "Deprecated": "1",
         "EventCode": "0xB7, 0xBB",
-        "EventName": "OFFCORE_RESPONSE.PF_L3_DATA_RD.L3_MISS.SNOOP_MISS",
+        "EventName": "OCR.ALL_READS.L3_MISS_REMOTE_HOP1_DRAM.HIT_OTHER_CORE_NO_FWD",
         "MSRIndex": "0x1a6,0x1a7",
-        "MSRValue": "0x023C000080",
+        "MSRValue": "0x04100007F7",
         "Offcore": "1",
         "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
         "SampleAfterValue": "100003",
         "UMask": "0x1"
     },
     {
-        "BriefDescription": "This event is deprecated. Refer to new event OCR.PF_L1D_AND_SW.L3_MISS_LOCAL_DRAM.SNOOP_MISS_OR_NO_FWD",
+        "BriefDescription": "OCR.ALL_READS.L3_MISS_REMOTE_HOP1_DRAM.NO_SNOOP_NEEDED  OCR.ALL_READS.L3_MISS_REMOTE_HOP1_DRAM.NO_SNOOP_NEEDED",
         "Counter": "0,1,2,3",
         "CounterHTOff": "0,1,2,3",
-        "Deprecated": "1",
         "EventCode": "0xB7, 0xBB",
-        "EventName": "OFFCORE_RESPONSE.PF_L1D_AND_SW.L3_MISS_LOCAL_DRAM.SNOOP_MISS_OR_NO_FWD",
+        "EventName": "OCR.ALL_READS.L3_MISS_REMOTE_HOP1_DRAM.NO_SNOOP_NEEDED",
         "MSRIndex": "0x1a6,0x1a7",
-        "MSRValue": "0x0604000400",
+        "MSRValue": "0x01100007F7",
         "Offcore": "1",
         "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
         "SampleAfterValue": "100003",
         "UMask": "0x1"
     },
     {
-        "BriefDescription": "This event is deprecated. Refer to new event OCR.PF_L3_DATA_RD.L3_MISS_LOCAL_DRAM.HIT_OTHER_CORE_NO_FWD",
+        "BriefDescription": "OCR.ALL_READS.L3_MISS_REMOTE_HOP1_DRAM.SNOOP_MISS",
         "Counter": "0,1,2,3",
         "CounterHTOff": "0,1,2,3",
-        "Deprecated": "1",
         "EventCode": "0xB7, 0xBB",
-        "EventName": "OFFCORE_RESPONSE.PF_L3_DATA_RD.L3_MISS_LOCAL_DRAM.HIT_OTHER_CORE_NO_FWD",
+        "EventName": "OCR.ALL_READS.L3_MISS_REMOTE_HOP1_DRAM.SNOOP_MISS",
         "MSRIndex": "0x1a6,0x1a7",
-        "MSRValue": "0x0404000080",
+        "MSRValue": "0x02100007F7",
         "Offcore": "1",
         "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
         "SampleAfterValue": "100003",
         "UMask": "0x1"
     },
     {
-        "BriefDescription": "Counts prefetch (that bring data to L2) data reads",
+        "BriefDescription": "OCR.ALL_READS.L3_MISS_REMOTE_HOP1_DRAM.SNOOP_NONE",
         "Counter": "0,1,2,3",
         "CounterHTOff": "0,1,2,3",
         "EventCode": "0xB7, 0xBB",
-        "EventName": "OCR.PF_L2_DATA_RD.L3_MISS_REMOTE_HOP1_DRAM.SNOOP_MISS",
+        "EventName": "OCR.ALL_READS.L3_MISS_REMOTE_HOP1_DRAM.SNOOP_NONE",
         "MSRIndex": "0x1a6,0x1a7",
-        "MSRValue": "0x0210000010",
+        "MSRValue": "0x00900007F7",
         "Offcore": "1",
         "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
         "SampleAfterValue": "100003",
         "UMask": "0x1"
     },
     {
-        "BriefDescription": "Counts L1 data cache hardware prefetch requests and software prefetch requests OCR.PF_L1D_AND_SW.L3_MISS.NO_SNOOP_NEEDED OCR.PF_L1D_AND_SW.L3_MISS.NO_SNOOP_NEEDED",
+        "BriefDescription": "OCR.ALL_RFO.L3_MISS.ANY_SNOOP OCR.ALL_RFO.L3_MISS.ANY_SNOOP OCR.ALL_RFO.L3_MISS.ANY_SNOOP",
         "Counter": "0,1,2,3",
         "CounterHTOff": "0,1,2,3",
         "EventCode": "0xB7, 0xBB",
-        "EventName": "OCR.PF_L1D_AND_SW.L3_MISS.NO_SNOOP_NEEDED",
+        "EventName": "OCR.ALL_RFO.L3_MISS.ANY_SNOOP",
         "MSRIndex": "0x1a6,0x1a7",
-        "MSRValue": "0x013C000400",
+        "MSRValue": "0x3FBC000122",
         "Offcore": "1",
         "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
         "SampleAfterValue": "100003",
         "UMask": "0x1"
     },
     {
-        "BriefDescription": "This event is deprecated. Refer to new event OCR.DEMAND_CODE_RD.L3_MISS.SNOOP_NONE",
+        "BriefDescription": "OCR.ALL_RFO.L3_MISS.HITM_OTHER_CORE OCR.ALL_RFO.L3_MISS.HITM_OTHER_CORE OCR.ALL_RFO.L3_MISS.HITM_OTHER_CORE",
         "Counter": "0,1,2,3",
         "CounterHTOff": "0,1,2,3",
-        "Deprecated": "1",
         "EventCode": "0xB7, 0xBB",
-        "EventName": "OFFCORE_RESPONSE.DEMAND_CODE_RD.L3_MISS.SNOOP_NONE",
+        "EventName": "OCR.ALL_RFO.L3_MISS.HITM_OTHER_CORE",
         "MSRIndex": "0x1a6,0x1a7",
-        "MSRValue": "0x00BC000004",
+        "MSRValue": "0x103C000122",
         "Offcore": "1",
         "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
         "SampleAfterValue": "100003",
         "UMask": "0x1"
     },
     {
-        "BriefDescription": "Counts L1 data cache hardware prefetch requests and software prefetch requests OCR.PF_L1D_AND_SW.L3_MISS.REMOTE_HIT_FORWARD",
+        "BriefDescription": "OCR.ALL_RFO.L3_MISS.HIT_OTHER_CORE_FWD OCR.ALL_RFO.L3_MISS.HIT_OTHER_CORE_FWD OCR.ALL_RFO.L3_MISS.HIT_OTHER_CORE_FWD",
         "Counter": "0,1,2,3",
         "CounterHTOff": "0,1,2,3",
         "EventCode": "0xB7, 0xBB",
-        "EventName": "OCR.PF_L1D_AND_SW.L3_MISS.REMOTE_HIT_FORWARD",
+        "EventName": "OCR.ALL_RFO.L3_MISS.HIT_OTHER_CORE_FWD",
         "MSRIndex": "0x1a6,0x1a7",
-        "MSRValue": "0x083FC00400",
+        "MSRValue": "0x083C000122",
         "Offcore": "1",
         "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
         "SampleAfterValue": "100003",
         "UMask": "0x1"
     },
     {
-        "BriefDescription": "This event is deprecated. Refer to new event OCR.PF_L3_RFO.L3_MISS_LOCAL_DRAM.SNOOP_MISS",
+        "BriefDescription": "OCR.ALL_RFO.L3_MISS.HIT_OTHER_CORE_NO_FWD OCR.ALL_RFO.L3_MISS.HIT_OTHER_CORE_NO_FWD OCR.ALL_RFO.L3_MISS.HIT_OTHER_CORE_NO_FWD",
         "Counter": "0,1,2,3",
         "CounterHTOff": "0,1,2,3",
-        "Deprecated": "1",
         "EventCode": "0xB7, 0xBB",
-        "EventName": "OFFCORE_RESPONSE.PF_L3_RFO.L3_MISS_LOCAL_DRAM.SNOOP_MISS",
+        "EventName": "OCR.ALL_RFO.L3_MISS.HIT_OTHER_CORE_NO_FWD",
         "MSRIndex": "0x1a6,0x1a7",
-        "MSRValue": "0x0204000100",
+        "MSRValue": "0x043C000122",
         "Offcore": "1",
         "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
         "SampleAfterValue": "100003",
         "UMask": "0x1"
     },
     {
-        "BriefDescription": "Number of times an HLE execution successfully committed",
-        "Counter": "0,1,2,3",
-        "CounterHTOff": "0,1,2,3,4,5,6,7",
-        "EventCode": "0xC8",
-        "EventName": "HLE_RETIRED.COMMIT",
-        "PublicDescription": "Number of times HLE commit succeeded.",
-        "SampleAfterValue": "2000003",
-        "UMask": "0x2"
-    },
-    {
-        "BriefDescription": "This event is deprecated. Refer to new event OCR.ALL_RFO.L3_MISS_LOCAL_DRAM.HIT_OTHER_CORE_NO_FWD",
+        "BriefDescription": "OCR.ALL_RFO.L3_MISS.NO_SNOOP_NEEDED OCR.ALL_RFO.L3_MISS.NO_SNOOP_NEEDED OCR.ALL_RFO.L3_MISS.NO_SNOOP_NEEDED",
         "Counter": "0,1,2,3",
         "CounterHTOff": "0,1,2,3",
-        "Deprecated": "1",
         "EventCode": "0xB7, 0xBB",
-        "EventName": "OFFCORE_RESPONSE.ALL_RFO.L3_MISS_LOCAL_DRAM.HIT_OTHER_CORE_NO_FWD",
+        "EventName": "OCR.ALL_RFO.L3_MISS.NO_SNOOP_NEEDED",
         "MSRIndex": "0x1a6,0x1a7",
-        "MSRValue": "0x0404000122",
+        "MSRValue": "0x013C000122",
         "Offcore": "1",
         "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
         "SampleAfterValue": "100003",
         "UMask": "0x1"
     },
     {
-        "BriefDescription": "This event is deprecated. Refer to new event OCR.DEMAND_RFO.L3_MISS_LOCAL_DRAM.ANY_SNOOP",
+        "BriefDescription": "OCR.ALL_RFO.L3_MISS.REMOTE_HITM OCR.ALL_RFO.L3_MISS.REMOTE_HITM",
         "Counter": "0,1,2,3",
         "CounterHTOff": "0,1,2,3",
-        "Deprecated": "1",
         "EventCode": "0xB7, 0xBB",
-        "EventName": "OFFCORE_RESPONSE.DEMAND_RFO.L3_MISS_LOCAL_DRAM.ANY_SNOOP",
+        "EventName": "OCR.ALL_RFO.L3_MISS.REMOTE_HITM",
         "MSRIndex": "0x1a6,0x1a7",
-        "MSRValue": "0x3F84000002",
+        "MSRValue": "0x103FC00122",
         "Offcore": "1",
         "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
         "SampleAfterValue": "100003",
         "UMask": "0x1"
     },
     {
-        "BriefDescription": "This event is deprecated. Refer to new event OCR.DEMAND_RFO.L3_MISS_REMOTE_HOP1_DRAM.HIT_OTHER_CORE_NO_FWD",
+        "BriefDescription": "OCR.ALL_RFO.L3_MISS.REMOTE_HIT_FORWARD OCR.ALL_RFO.L3_MISS.REMOTE_HIT_FORWARD",
         "Counter": "0,1,2,3",
         "CounterHTOff": "0,1,2,3",
-        "Deprecated": "1",
         "EventCode": "0xB7, 0xBB",
-        "EventName": "OFFCORE_RESPONSE.DEMAND_RFO.L3_MISS_REMOTE_HOP1_DRAM.HIT_OTHER_CORE_NO_FWD",
+        "EventName": "OCR.ALL_RFO.L3_MISS.REMOTE_HIT_FORWARD",
         "MSRIndex": "0x1a6,0x1a7",
-        "MSRValue": "0x0410000002",
+        "MSRValue": "0x083FC00122",
         "Offcore": "1",
         "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
         "SampleAfterValue": "100003",
         "UMask": "0x1"
     },
     {
-        "BriefDescription": "OCR.ALL_DATA_RD.L3_MISS_REMOTE_HOP1_DRAM.HIT_OTHER_CORE_FWD  OCR.ALL_DATA_RD.L3_MISS_REMOTE_HOP1_DRAM.HIT_OTHER_CORE_FWD",
+        "BriefDescription": "OCR.ALL_RFO.L3_MISS.SNOOP_MISS OCR.ALL_RFO.L3_MISS.SNOOP_MISS",
         "Counter": "0,1,2,3",
         "CounterHTOff": "0,1,2,3",
         "EventCode": "0xB7, 0xBB",
-        "EventName": "OCR.ALL_DATA_RD.L3_MISS_REMOTE_HOP1_DRAM.HIT_OTHER_CORE_FWD",
+        "EventName": "OCR.ALL_RFO.L3_MISS.SNOOP_MISS",
         "MSRIndex": "0x1a6,0x1a7",
-        "MSRValue": "0x0810000491",
+        "MSRValue": "0x023C000122",
         "Offcore": "1",
         "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
         "SampleAfterValue": "100003",
         "UMask": "0x1"
     },
     {
-        "BriefDescription": "Counts prefetch (that bring data to L2) data reads  OCR.PF_L2_DATA_RD.L3_MISS_REMOTE_HOP1_DRAM.HITM_OTHER_CORE",
+        "BriefDescription": "OCR.ALL_RFO.L3_MISS.SNOOP_NONE OCR.ALL_RFO.L3_MISS.SNOOP_NONE",
         "Counter": "0,1,2,3",
         "CounterHTOff": "0,1,2,3",
         "EventCode": "0xB7, 0xBB",
-        "EventName": "OCR.PF_L2_DATA_RD.L3_MISS_REMOTE_HOP1_DRAM.HITM_OTHER_CORE",
+        "EventName": "OCR.ALL_RFO.L3_MISS.SNOOP_NONE",
         "MSRIndex": "0x1a6,0x1a7",
-        "MSRValue": "0x1010000010",
+        "MSRValue": "0x00BC000122",
         "Offcore": "1",
         "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
         "SampleAfterValue": "100003",
         "UMask": "0x1"
     },
     {
-        "BriefDescription": "OCR.ALL_DATA_RD.L3_MISS_LOCAL_DRAM.SNOOP_MISS",
+        "BriefDescription": "OCR.ALL_RFO.L3_MISS_LOCAL_DRAM.ANY_SNOOP  OCR.ALL_RFO.L3_MISS_LOCAL_DRAM.ANY_SNOOP",
         "Counter": "0,1,2,3",
         "CounterHTOff": "0,1,2,3",
         "EventCode": "0xB7, 0xBB",
-        "EventName": "OCR.ALL_DATA_RD.L3_MISS_LOCAL_DRAM.SNOOP_MISS",
+        "EventName": "OCR.ALL_RFO.L3_MISS_LOCAL_DRAM.ANY_SNOOP",
         "MSRIndex": "0x1a6,0x1a7",
-        "MSRValue": "0x0204000491",
+        "MSRValue": "0x3F84000122",
         "Offcore": "1",
         "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
         "SampleAfterValue": "100003",
         "UMask": "0x1"
     },
     {
-        "BriefDescription": "This event is deprecated. Refer to new event OCR.DEMAND_DATA_RD.L3_MISS_REMOTE_HOP1_DRAM.SNOOP_MISS",
+        "BriefDescription": "OCR.ALL_RFO.L3_MISS_LOCAL_DRAM.HITM_OTHER_CORE  OCR.ALL_RFO.L3_MISS_LOCAL_DRAM.HITM_OTHER_CORE",
         "Counter": "0,1,2,3",
         "CounterHTOff": "0,1,2,3",
-        "Deprecated": "1",
         "EventCode": "0xB7, 0xBB",
-        "EventName": "OFFCORE_RESPONSE.DEMAND_DATA_RD.L3_MISS_REMOTE_HOP1_DRAM.SNOOP_MISS",
+        "EventName": "OCR.ALL_RFO.L3_MISS_LOCAL_DRAM.HITM_OTHER_CORE",
         "MSRIndex": "0x1a6,0x1a7",
-        "MSRValue": "0x0210000001",
+        "MSRValue": "0x1004000122",
         "Offcore": "1",
         "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
         "SampleAfterValue": "100003",
         "UMask": "0x1"
     },
     {
-        "BriefDescription": "Counts prefetch (that bring data to L2) data reads OCR.PF_L2_DATA_RD.L3_MISS.HITM_OTHER_CORE OCR.PF_L2_DATA_RD.L3_MISS.HITM_OTHER_CORE",
+        "BriefDescription": "OCR.ALL_RFO.L3_MISS_LOCAL_DRAM.HIT_OTHER_CORE_FWD  OCR.ALL_RFO.L3_MISS_LOCAL_DRAM.HIT_OTHER_CORE_FWD",
         "Counter": "0,1,2,3",
         "CounterHTOff": "0,1,2,3",
         "EventCode": "0xB7, 0xBB",
-        "EventName": "OCR.PF_L2_DATA_RD.L3_MISS.HITM_OTHER_CORE",
+        "EventName": "OCR.ALL_RFO.L3_MISS_LOCAL_DRAM.HIT_OTHER_CORE_FWD",
         "MSRIndex": "0x1a6,0x1a7",
-        "MSRValue": "0x103C000010",
+        "MSRValue": "0x0804000122",
         "Offcore": "1",
         "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
         "SampleAfterValue": "100003",
         "UMask": "0x1"
     },
     {
-        "BriefDescription": "This event is deprecated. Refer to new event OCR.PF_L3_RFO.L3_MISS.REMOTE_HITM",
+        "BriefDescription": "OCR.ALL_RFO.L3_MISS_LOCAL_DRAM.HIT_OTHER_CORE_NO_FWD  OCR.ALL_RFO.L3_MISS_LOCAL_DRAM.HIT_OTHER_CORE_NO_FWD",
         "Counter": "0,1,2,3",
         "CounterHTOff": "0,1,2,3",
-        "Deprecated": "1",
         "EventCode": "0xB7, 0xBB",
-        "EventName": "OFFCORE_RESPONSE.PF_L3_RFO.L3_MISS.REMOTE_HITM",
+        "EventName": "OCR.ALL_RFO.L3_MISS_LOCAL_DRAM.HIT_OTHER_CORE_NO_FWD",
         "MSRIndex": "0x1a6,0x1a7",
-        "MSRValue": "0x103FC00100",
+        "MSRValue": "0x0404000122",
         "Offcore": "1",
         "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
         "SampleAfterValue": "100003",
         "UMask": "0x1"
     },
     {
-        "BriefDescription": "OCR.ALL_PF_DATA_RD.L3_MISS.HITM_OTHER_CORE OCR.ALL_PF_DATA_RD.L3_MISS.HITM_OTHER_CORE OCR.ALL_PF_DATA_RD.L3_MISS.HITM_OTHER_CORE",
+        "BriefDescription": "OCR.ALL_RFO.L3_MISS_LOCAL_DRAM.NO_SNOOP_NEEDED  OCR.ALL_RFO.L3_MISS_LOCAL_DRAM.NO_SNOOP_NEEDED",
         "Counter": "0,1,2,3",
         "CounterHTOff": "0,1,2,3",
         "EventCode": "0xB7, 0xBB",
-        "EventName": "OCR.ALL_PF_DATA_RD.L3_MISS.HITM_OTHER_CORE",
+        "EventName": "OCR.ALL_RFO.L3_MISS_LOCAL_DRAM.NO_SNOOP_NEEDED",
         "MSRIndex": "0x1a6,0x1a7",
-        "MSRValue": "0x103C000490",
+        "MSRValue": "0x0104000122",
         "Offcore": "1",
         "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
         "SampleAfterValue": "100003",
@@ -1725,389 +1723,375 @@
         "UMask": "0x1"
     },
     {
-        "BriefDescription": "Counts all demand data writes (RFOs) OCR.DEMAND_RFO.L3_MISS.NO_SNOOP_NEEDED OCR.DEMAND_RFO.L3_MISS.NO_SNOOP_NEEDED",
+        "BriefDescription": "OCR.ALL_RFO.L3_MISS_LOCAL_DRAM.SNOOP_MISS_OR_NO_FWD OCR.ALL_RFO.L3_MISS_LOCAL_DRAM.SNOOP_MISS_OR_NO_FWD",
         "Counter": "0,1,2,3",
         "CounterHTOff": "0,1,2,3",
         "EventCode": "0xB7, 0xBB",
-        "EventName": "OCR.DEMAND_RFO.L3_MISS.NO_SNOOP_NEEDED",
+        "EventName": "OCR.ALL_RFO.L3_MISS_LOCAL_DRAM.SNOOP_MISS_OR_NO_FWD",
         "MSRIndex": "0x1a6,0x1a7",
-        "MSRValue": "0x013C000002",
+        "MSRValue": "0x0604000122",
         "Offcore": "1",
         "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
         "SampleAfterValue": "100003",
         "UMask": "0x1"
     },
     {
-        "BriefDescription": "Counts all prefetch (that bring data to LLC only) data reads OCR.PF_L3_DATA_RD.L3_MISS.SNOOP_NONE",
+        "BriefDescription": "OCR.ALL_RFO.L3_MISS_LOCAL_DRAM.SNOOP_NONE",
         "Counter": "0,1,2,3",
         "CounterHTOff": "0,1,2,3",
         "EventCode": "0xB7, 0xBB",
-        "EventName": "OCR.PF_L3_DATA_RD.L3_MISS.SNOOP_NONE",
+        "EventName": "OCR.ALL_RFO.L3_MISS_LOCAL_DRAM.SNOOP_NONE",
         "MSRIndex": "0x1a6,0x1a7",
-        "MSRValue": "0x00BC000080",
+        "MSRValue": "0x0084000122",
         "Offcore": "1",
         "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
         "SampleAfterValue": "100003",
         "UMask": "0x1"
     },
     {
-        "BriefDescription": "This event is deprecated. Refer to new event OCR.PF_L3_RFO.L3_MISS.SNOOP_NONE",
+        "BriefDescription": "OCR.ALL_RFO.L3_MISS_REMOTE_DRAM.SNOOP_MISS_OR_NO_FWD OCR.ALL_RFO.L3_MISS_REMOTE_DRAM.SNOOP_MISS_OR_NO_FWD",
         "Counter": "0,1,2,3",
         "CounterHTOff": "0,1,2,3",
-        "Deprecated": "1",
         "EventCode": "0xB7, 0xBB",
-        "EventName": "OFFCORE_RESPONSE.PF_L3_RFO.L3_MISS.SNOOP_NONE",
+        "EventName": "OCR.ALL_RFO.L3_MISS_REMOTE_DRAM.SNOOP_MISS_OR_NO_FWD",
         "MSRIndex": "0x1a6,0x1a7",
-        "MSRValue": "0x00BC000100",
+        "MSRValue": "0x063B800122",
         "Offcore": "1",
         "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
         "SampleAfterValue": "100003",
         "UMask": "0x1"
     },
     {
-        "BriefDescription": "OCR.ALL_READS.L3_MISS.SNOOP_MISS OCR.ALL_READS.L3_MISS.SNOOP_MISS",
+        "BriefDescription": "OCR.ALL_RFO.L3_MISS_REMOTE_HOP1_DRAM.ANY_SNOOP  OCR.ALL_RFO.L3_MISS_REMOTE_HOP1_DRAM.ANY_SNOOP",
         "Counter": "0,1,2,3",
         "CounterHTOff": "0,1,2,3",
         "EventCode": "0xB7, 0xBB",
-        "EventName": "OCR.ALL_READS.L3_MISS.SNOOP_MISS",
+        "EventName": "OCR.ALL_RFO.L3_MISS_REMOTE_HOP1_DRAM.ANY_SNOOP",
         "MSRIndex": "0x1a6,0x1a7",
-        "MSRValue": "0x023C0007F7",
+        "MSRValue": "0x3F90000122",
         "Offcore": "1",
         "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
         "SampleAfterValue": "100003",
         "UMask": "0x1"
     },
     {
-        "BriefDescription": "This event is deprecated. Refer to new event OCR.DEMAND_CODE_RD.L3_MISS_LOCAL_DRAM.HIT_OTHER_CORE_NO_FWD",
+        "BriefDescription": "OCR.ALL_RFO.L3_MISS_REMOTE_HOP1_DRAM.HITM_OTHER_CORE  OCR.ALL_RFO.L3_MISS_REMOTE_HOP1_DRAM.HITM_OTHER_CORE",
         "Counter": "0,1,2,3",
         "CounterHTOff": "0,1,2,3",
-        "Deprecated": "1",
         "EventCode": "0xB7, 0xBB",
-        "EventName": "OFFCORE_RESPONSE.DEMAND_CODE_RD.L3_MISS_LOCAL_DRAM.HIT_OTHER_CORE_NO_FWD",
+        "EventName": "OCR.ALL_RFO.L3_MISS_REMOTE_HOP1_DRAM.HITM_OTHER_CORE",
         "MSRIndex": "0x1a6,0x1a7",
-        "MSRValue": "0x0404000004",
+        "MSRValue": "0x1010000122",
         "Offcore": "1",
         "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
         "SampleAfterValue": "100003",
         "UMask": "0x1"
     },
     {
-        "BriefDescription": "This event is deprecated. Refer to new event OCR.ALL_RFO.L3_MISS.REMOTE_HITM",
+        "BriefDescription": "OCR.ALL_RFO.L3_MISS_REMOTE_HOP1_DRAM.HIT_OTHER_CORE_FWD  OCR.ALL_RFO.L3_MISS_REMOTE_HOP1_DRAM.HIT_OTHER_CORE_FWD",
         "Counter": "0,1,2,3",
         "CounterHTOff": "0,1,2,3",
-        "Deprecated": "1",
         "EventCode": "0xB7, 0xBB",
-        "EventName": "OFFCORE_RESPONSE.ALL_RFO.L3_MISS.REMOTE_HITM",
+        "EventName": "OCR.ALL_RFO.L3_MISS_REMOTE_HOP1_DRAM.HIT_OTHER_CORE_FWD",
         "MSRIndex": "0x1a6,0x1a7",
-        "MSRValue": "0x103FC00122",
+        "MSRValue": "0x0810000122",
         "Offcore": "1",
         "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
         "SampleAfterValue": "100003",
         "UMask": "0x1"
     },
     {
-        "BriefDescription": "OCR.ALL_READS.L3_MISS.HIT_OTHER_CORE_NO_FWD OCR.ALL_READS.L3_MISS.HIT_OTHER_CORE_NO_FWD OCR.ALL_READS.L3_MISS.HIT_OTHER_CORE_NO_FWD",
+        "BriefDescription": "OCR.ALL_RFO.L3_MISS_REMOTE_HOP1_DRAM.HIT_OTHER_CORE_NO_FWD  OCR.ALL_RFO.L3_MISS_REMOTE_HOP1_DRAM.HIT_OTHER_CORE_NO_FWD",
         "Counter": "0,1,2,3",
         "CounterHTOff": "0,1,2,3",
         "EventCode": "0xB7, 0xBB",
-        "EventName": "OCR.ALL_READS.L3_MISS.HIT_OTHER_CORE_NO_FWD",
+        "EventName": "OCR.ALL_RFO.L3_MISS_REMOTE_HOP1_DRAM.HIT_OTHER_CORE_NO_FWD",
         "MSRIndex": "0x1a6,0x1a7",
-        "MSRValue": "0x043C0007F7",
+        "MSRValue": "0x0410000122",
         "Offcore": "1",
         "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
         "SampleAfterValue": "100003",
         "UMask": "0x1"
     },
     {
-        "BriefDescription": "This event is deprecated. Refer to new event OCR.ALL_PF_RFO.L3_MISS_LOCAL_DRAM.SNOOP_NONE",
+        "BriefDescription": "OCR.ALL_RFO.L3_MISS_REMOTE_HOP1_DRAM.NO_SNOOP_NEEDED  OCR.ALL_RFO.L3_MISS_REMOTE_HOP1_DRAM.NO_SNOOP_NEEDED",
         "Counter": "0,1,2,3",
         "CounterHTOff": "0,1,2,3",
-        "Deprecated": "1",
         "EventCode": "0xB7, 0xBB",
-        "EventName": "OFFCORE_RESPONSE.ALL_PF_RFO.L3_MISS_LOCAL_DRAM.SNOOP_NONE",
+        "EventName": "OCR.ALL_RFO.L3_MISS_REMOTE_HOP1_DRAM.NO_SNOOP_NEEDED",
         "MSRIndex": "0x1a6,0x1a7",
-        "MSRValue": "0x0084000120",
+        "MSRValue": "0x0110000122",
         "Offcore": "1",
         "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
         "SampleAfterValue": "100003",
         "UMask": "0x1"
     },
     {
-        "BriefDescription": "Counts all prefetch (that bring data to L2) RFOs OCR.PF_L2_RFO.L3_MISS.REMOTE_HIT_FORWARD",
+        "BriefDescription": "OCR.ALL_RFO.L3_MISS_REMOTE_HOP1_DRAM.SNOOP_MISS",
         "Counter": "0,1,2,3",
         "CounterHTOff": "0,1,2,3",
         "EventCode": "0xB7, 0xBB",
-        "EventName": "OCR.PF_L2_RFO.L3_MISS.REMOTE_HIT_FORWARD",
+        "EventName": "OCR.ALL_RFO.L3_MISS_REMOTE_HOP1_DRAM.SNOOP_MISS",
         "MSRIndex": "0x1a6,0x1a7",
-        "MSRValue": "0x083FC00020",
+        "MSRValue": "0x0210000122",
         "Offcore": "1",
         "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
         "SampleAfterValue": "100003",
         "UMask": "0x1"
     },
     {
-        "BriefDescription": "This event is deprecated. Refer to new event OCR.OTHER.L3_MISS_LOCAL_DRAM.ANY_SNOOP",
+        "BriefDescription": "OCR.ALL_RFO.L3_MISS_REMOTE_HOP1_DRAM.SNOOP_NONE",
         "Counter": "0,1,2,3",
         "CounterHTOff": "0,1,2,3",
-        "Deprecated": "1",
         "EventCode": "0xB7, 0xBB",
-        "EventName": "OFFCORE_RESPONSE.OTHER.L3_MISS_LOCAL_DRAM.ANY_SNOOP",
+        "EventName": "OCR.ALL_RFO.L3_MISS_REMOTE_HOP1_DRAM.SNOOP_NONE",
         "MSRIndex": "0x1a6,0x1a7",
-        "MSRValue": "0x3F84008000",
+        "MSRValue": "0x0090000122",
         "Offcore": "1",
         "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
         "SampleAfterValue": "100003",
         "UMask": "0x1"
     },
     {
-        "BriefDescription": "This event is deprecated. Refer to new event OCR.DEMAND_RFO.L3_MISS_REMOTE_HOP1_DRAM.SNOOP_NONE",
+        "BriefDescription": "Counts all demand code reads OCR.DEMAND_CODE_RD.L3_MISS.ANY_SNOOP OCR.DEMAND_CODE_RD.L3_MISS.ANY_SNOOP",
         "Counter": "0,1,2,3",
         "CounterHTOff": "0,1,2,3",
-        "Deprecated": "1",
         "EventCode": "0xB7, 0xBB",
-        "EventName": "OFFCORE_RESPONSE.DEMAND_RFO.L3_MISS_REMOTE_HOP1_DRAM.SNOOP_NONE",
+        "EventName": "OCR.DEMAND_CODE_RD.L3_MISS.ANY_SNOOP",
         "MSRIndex": "0x1a6,0x1a7",
-        "MSRValue": "0x0090000002",
+        "MSRValue": "0x3FBC000004",
         "Offcore": "1",
         "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
         "SampleAfterValue": "100003",
         "UMask": "0x1"
     },
     {
-        "BriefDescription": "Counts all demand code reads  OCR.DEMAND_CODE_RD.L3_MISS_LOCAL_DRAM.HIT_OTHER_CORE_FWD",
+        "BriefDescription": "Counts all demand code reads OCR.DEMAND_CODE_RD.L3_MISS.HITM_OTHER_CORE OCR.DEMAND_CODE_RD.L3_MISS.HITM_OTHER_CORE",
         "Counter": "0,1,2,3",
         "CounterHTOff": "0,1,2,3",
         "EventCode": "0xB7, 0xBB",
-        "EventName": "OCR.DEMAND_CODE_RD.L3_MISS_LOCAL_DRAM.HIT_OTHER_CORE_FWD",
+        "EventName": "OCR.DEMAND_CODE_RD.L3_MISS.HITM_OTHER_CORE",
         "MSRIndex": "0x1a6,0x1a7",
-        "MSRValue": "0x0804000004",
+        "MSRValue": "0x103C000004",
         "Offcore": "1",
         "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
         "SampleAfterValue": "100003",
         "UMask": "0x1"
     },
     {
-        "BriefDescription": "OCR.ALL_PF_RFO.L3_MISS_REMOTE_HOP1_DRAM.HIT_OTHER_CORE_NO_FWD  OCR.ALL_PF_RFO.L3_MISS_REMOTE_HOP1_DRAM.HIT_OTHER_CORE_NO_FWD",
+        "BriefDescription": "Counts all demand code reads OCR.DEMAND_CODE_RD.L3_MISS.HIT_OTHER_CORE_FWD OCR.DEMAND_CODE_RD.L3_MISS.HIT_OTHER_CORE_FWD",
         "Counter": "0,1,2,3",
         "CounterHTOff": "0,1,2,3",
         "EventCode": "0xB7, 0xBB",
-        "EventName": "OCR.ALL_PF_RFO.L3_MISS_REMOTE_HOP1_DRAM.HIT_OTHER_CORE_NO_FWD",
+        "EventName": "OCR.DEMAND_CODE_RD.L3_MISS.HIT_OTHER_CORE_FWD",
         "MSRIndex": "0x1a6,0x1a7",
-        "MSRValue": "0x0410000120",
+        "MSRValue": "0x083C000004",
         "Offcore": "1",
         "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
         "SampleAfterValue": "100003",
         "UMask": "0x1"
     },
     {
-        "BriefDescription": "This event is deprecated. Refer to new event OCR.ALL_RFO.L3_MISS_LOCAL_DRAM.SNOOP_MISS_OR_NO_FWD",
+        "BriefDescription": "Counts all demand code reads OCR.DEMAND_CODE_RD.L3_MISS.HIT_OTHER_CORE_NO_FWD OCR.DEMAND_CODE_RD.L3_MISS.HIT_OTHER_CORE_NO_FWD",
         "Counter": "0,1,2,3",
         "CounterHTOff": "0,1,2,3",
-        "Deprecated": "1",
         "EventCode": "0xB7, 0xBB",
-        "EventName": "OFFCORE_RESPONSE.ALL_RFO.L3_MISS_LOCAL_DRAM.SNOOP_MISS_OR_NO_FWD",
+        "EventName": "OCR.DEMAND_CODE_RD.L3_MISS.HIT_OTHER_CORE_NO_FWD",
         "MSRIndex": "0x1a6,0x1a7",
-        "MSRValue": "0x0604000122",
+        "MSRValue": "0x043C000004",
         "Offcore": "1",
         "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
         "SampleAfterValue": "100003",
         "UMask": "0x1"
     },
     {
-        "BriefDescription": "This event is deprecated. Refer to new event OCR.PF_L3_DATA_RD.L3_MISS_LOCAL_DRAM.HITM_OTHER_CORE",
+        "BriefDescription": "Counts all demand code reads OCR.DEMAND_CODE_RD.L3_MISS.NO_SNOOP_NEEDED OCR.DEMAND_CODE_RD.L3_MISS.NO_SNOOP_NEEDED",
         "Counter": "0,1,2,3",
         "CounterHTOff": "0,1,2,3",
-        "Deprecated": "1",
         "EventCode": "0xB7, 0xBB",
-        "EventName": "OFFCORE_RESPONSE.PF_L3_DATA_RD.L3_MISS_LOCAL_DRAM.HITM_OTHER_CORE",
+        "EventName": "OCR.DEMAND_CODE_RD.L3_MISS.NO_SNOOP_NEEDED",
         "MSRIndex": "0x1a6,0x1a7",
-        "MSRValue": "0x1004000080",
+        "MSRValue": "0x013C000004",
         "Offcore": "1",
         "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
         "SampleAfterValue": "100003",
         "UMask": "0x1"
     },
     {
-        "BriefDescription": "This event is deprecated. Refer to new event OCR.DEMAND_CODE_RD.L3_MISS_REMOTE_HOP1_DRAM.HITM_OTHER_CORE",
+        "BriefDescription": "Counts all demand code reads OCR.DEMAND_CODE_RD.L3_MISS.REMOTE_HITM",
         "Counter": "0,1,2,3",
         "CounterHTOff": "0,1,2,3",
-        "Deprecated": "1",
         "EventCode": "0xB7, 0xBB",
-        "EventName": "OFFCORE_RESPONSE.DEMAND_CODE_RD.L3_MISS_REMOTE_HOP1_DRAM.HITM_OTHER_CORE",
+        "EventName": "OCR.DEMAND_CODE_RD.L3_MISS.REMOTE_HITM",
         "MSRIndex": "0x1a6,0x1a7",
-        "MSRValue": "0x1010000004",
+        "MSRValue": "0x103FC00004",
         "Offcore": "1",
         "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
         "SampleAfterValue": "100003",
         "UMask": "0x1"
     },
     {
-        "BriefDescription": "Counts prefetch (that bring data to L2) data reads  OCR.PF_L2_DATA_RD.L3_MISS_REMOTE_HOP1_DRAM.HIT_OTHER_CORE_NO_FWD",
+        "BriefDescription": "Counts all demand code reads OCR.DEMAND_CODE_RD.L3_MISS.REMOTE_HIT_FORWARD",
         "Counter": "0,1,2,3",
         "CounterHTOff": "0,1,2,3",
         "EventCode": "0xB7, 0xBB",
-        "EventName": "OCR.PF_L2_DATA_RD.L3_MISS_REMOTE_HOP1_DRAM.HIT_OTHER_CORE_NO_FWD",
+        "EventName": "OCR.DEMAND_CODE_RD.L3_MISS.REMOTE_HIT_FORWARD",
         "MSRIndex": "0x1a6,0x1a7",
-        "MSRValue": "0x0410000010",
+        "MSRValue": "0x083FC00004",
         "Offcore": "1",
         "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
         "SampleAfterValue": "100003",
         "UMask": "0x1"
     },
     {
-        "BriefDescription": "This event is deprecated. Refer to new event OCR.ALL_PF_DATA_RD.L3_MISS_REMOTE_DRAM.SNOOP_MISS_OR_NO_FWD",
+        "BriefDescription": "Counts all demand code reads OCR.DEMAND_CODE_RD.L3_MISS.SNOOP_MISS",
         "Counter": "0,1,2,3",
         "CounterHTOff": "0,1,2,3",
-        "Deprecated": "1",
         "EventCode": "0xB7, 0xBB",
-        "EventName": "OFFCORE_RESPONSE.ALL_PF_DATA_RD.L3_MISS_REMOTE_DRAM.SNOOP_MISS_OR_NO_FWD",
+        "EventName": "OCR.DEMAND_CODE_RD.L3_MISS.SNOOP_MISS",
         "MSRIndex": "0x1a6,0x1a7",
-        "MSRValue": "0x063B800490",
+        "MSRValue": "0x023C000004",
         "Offcore": "1",
         "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
         "SampleAfterValue": "100003",
         "UMask": "0x1"
     },
     {
-        "BriefDescription": "OCR.ALL_PF_DATA_RD.L3_MISS_REMOTE_HOP1_DRAM.NO_SNOOP_NEEDED  OCR.ALL_PF_DATA_RD.L3_MISS_REMOTE_HOP1_DRAM.NO_SNOOP_NEEDED",
+        "BriefDescription": "Counts all demand code reads OCR.DEMAND_CODE_RD.L3_MISS.SNOOP_NONE",
         "Counter": "0,1,2,3",
         "CounterHTOff": "0,1,2,3",
         "EventCode": "0xB7, 0xBB",
-        "EventName": "OCR.ALL_PF_DATA_RD.L3_MISS_REMOTE_HOP1_DRAM.NO_SNOOP_NEEDED",
+        "EventName": "OCR.DEMAND_CODE_RD.L3_MISS.SNOOP_NONE",
         "MSRIndex": "0x1a6,0x1a7",
-        "MSRValue": "0x0110000490",
+        "MSRValue": "0x00BC000004",
         "Offcore": "1",
         "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
         "SampleAfterValue": "100003",
         "UMask": "0x1"
     },
     {
-        "BriefDescription": "Counts any other requests  OCR.OTHER.L3_MISS_REMOTE_HOP1_DRAM.NO_SNOOP_NEEDED",
+        "BriefDescription": "Counts all demand code reads  OCR.DEMAND_CODE_RD.L3_MISS_LOCAL_DRAM.ANY_SNOOP",
         "Counter": "0,1,2,3",
         "CounterHTOff": "0,1,2,3",
         "EventCode": "0xB7, 0xBB",
-        "EventName": "OCR.OTHER.L3_MISS_REMOTE_HOP1_DRAM.NO_SNOOP_NEEDED",
+        "EventName": "OCR.DEMAND_CODE_RD.L3_MISS_LOCAL_DRAM.ANY_SNOOP",
         "MSRIndex": "0x1a6,0x1a7",
-        "MSRValue": "0x0110008000",
+        "MSRValue": "0x3F84000004",
         "Offcore": "1",
         "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
         "SampleAfterValue": "100003",
         "UMask": "0x1"
     },
     {
-        "BriefDescription": "Counts all prefetch (that bring data to LLC only) RFOs OCR.PF_L3_RFO.L3_MISS_REMOTE_DRAM.SNOOP_MISS_OR_NO_FWD",
+        "BriefDescription": "Counts all demand code reads  OCR.DEMAND_CODE_RD.L3_MISS_LOCAL_DRAM.HITM_OTHER_CORE",
         "Counter": "0,1,2,3",
         "CounterHTOff": "0,1,2,3",
         "EventCode": "0xB7, 0xBB",
-        "EventName": "OCR.PF_L3_RFO.L3_MISS_REMOTE_DRAM.SNOOP_MISS_OR_NO_FWD",
+        "EventName": "OCR.DEMAND_CODE_RD.L3_MISS_LOCAL_DRAM.HITM_OTHER_CORE",
         "MSRIndex": "0x1a6,0x1a7",
-        "MSRValue": "0x063B800100",
+        "MSRValue": "0x1004000004",
         "Offcore": "1",
         "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
         "SampleAfterValue": "100003",
         "UMask": "0x1"
     },
     {
-        "BriefDescription": "Counts any other requests OCR.OTHER.L3_MISS.SNOOP_NONE",
+        "BriefDescription": "Counts all demand code reads  OCR.DEMAND_CODE_RD.L3_MISS_LOCAL_DRAM.HIT_OTHER_CORE_FWD",
         "Counter": "0,1,2,3",
         "CounterHTOff": "0,1,2,3",
         "EventCode": "0xB7, 0xBB",
-        "EventName": "OCR.OTHER.L3_MISS.SNOOP_NONE",
+        "EventName": "OCR.DEMAND_CODE_RD.L3_MISS_LOCAL_DRAM.HIT_OTHER_CORE_FWD",
         "MSRIndex": "0x1a6,0x1a7",
-        "MSRValue": "0x00BC008000",
+        "MSRValue": "0x0804000004",
         "Offcore": "1",
         "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
         "SampleAfterValue": "100003",
         "UMask": "0x1"
     },
     {
-        "BriefDescription": "OCR.ALL_RFO.L3_MISS_LOCAL_DRAM.HITM_OTHER_CORE  OCR.ALL_RFO.L3_MISS_LOCAL_DRAM.HITM_OTHER_CORE",
+        "BriefDescription": "Counts all demand code reads  OCR.DEMAND_CODE_RD.L3_MISS_LOCAL_DRAM.HIT_OTHER_CORE_NO_FWD",
         "Counter": "0,1,2,3",
         "CounterHTOff": "0,1,2,3",
         "EventCode": "0xB7, 0xBB",
-        "EventName": "OCR.ALL_RFO.L3_MISS_LOCAL_DRAM.HITM_OTHER_CORE",
+        "EventName": "OCR.DEMAND_CODE_RD.L3_MISS_LOCAL_DRAM.HIT_OTHER_CORE_NO_FWD",
         "MSRIndex": "0x1a6,0x1a7",
-        "MSRValue": "0x1004000122",
+        "MSRValue": "0x0404000004",
         "Offcore": "1",
         "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
         "SampleAfterValue": "100003",
         "UMask": "0x1"
     },
     {
-        "BriefDescription": "This event is deprecated. Refer to new event OCR.PF_L3_DATA_RD.L3_MISS.REMOTE_HIT_FORWARD",
+        "BriefDescription": "Counts all demand code reads  OCR.DEMAND_CODE_RD.L3_MISS_LOCAL_DRAM.NO_SNOOP_NEEDED",
         "Counter": "0,1,2,3",
         "CounterHTOff": "0,1,2,3",
-        "Deprecated": "1",
         "EventCode": "0xB7, 0xBB",
-        "EventName": "OFFCORE_RESPONSE.PF_L3_DATA_RD.L3_MISS.REMOTE_HIT_FORWARD",
+        "EventName": "OCR.DEMAND_CODE_RD.L3_MISS_LOCAL_DRAM.NO_SNOOP_NEEDED",
         "MSRIndex": "0x1a6,0x1a7",
-        "MSRValue": "0x083FC00080",
+        "MSRValue": "0x0104000004",
         "Offcore": "1",
         "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
         "SampleAfterValue": "100003",
         "UMask": "0x1"
     },
     {
-        "BriefDescription": "Counts all demand data writes (RFOs) OCR.DEMAND_RFO.L3_MISS.REMOTE_HITM",
+        "BriefDescription": "Counts all demand code reads",
         "Counter": "0,1,2,3",
         "CounterHTOff": "0,1,2,3",
         "EventCode": "0xB7, 0xBB",
-        "EventName": "OCR.DEMAND_RFO.L3_MISS.REMOTE_HITM",
+        "EventName": "OCR.DEMAND_CODE_RD.L3_MISS_LOCAL_DRAM.SNOOP_MISS",
         "MSRIndex": "0x1a6,0x1a7",
-        "MSRValue": "0x103FC00002",
+        "MSRValue": "0x0204000004",
         "Offcore": "1",
         "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
         "SampleAfterValue": "100003",
         "UMask": "0x1"
     },
     {
-        "BriefDescription": "This event is deprecated. Refer to new event OCR.PF_L2_RFO.L3_MISS_LOCAL_DRAM.SNOOP_MISS_OR_NO_FWD",
+        "BriefDescription": "Counts all demand code reads OCR.DEMAND_CODE_RD.L3_MISS_LOCAL_DRAM.SNOOP_MISS_OR_NO_FWD",
         "Counter": "0,1,2,3",
         "CounterHTOff": "0,1,2,3",
-        "Deprecated": "1",
         "EventCode": "0xB7, 0xBB",
-        "EventName": "OFFCORE_RESPONSE.PF_L2_RFO.L3_MISS_LOCAL_DRAM.SNOOP_MISS_OR_NO_FWD",
+        "EventName": "OCR.DEMAND_CODE_RD.L3_MISS_LOCAL_DRAM.SNOOP_MISS_OR_NO_FWD",
         "MSRIndex": "0x1a6,0x1a7",
-        "MSRValue": "0x0604000020",
+        "MSRValue": "0x0604000004",
         "Offcore": "1",
         "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
         "SampleAfterValue": "100003",
         "UMask": "0x1"
     },
     {
-        "BriefDescription": "This event is deprecated. Refer to new event OCR.ALL_PF_RFO.L3_MISS_LOCAL_DRAM.HITM_OTHER_CORE",
+        "BriefDescription": "Counts all demand code reads",
         "Counter": "0,1,2,3",
         "CounterHTOff": "0,1,2,3",
-        "Deprecated": "1",
         "EventCode": "0xB7, 0xBB",
-        "EventName": "OFFCORE_RESPONSE.ALL_PF_RFO.L3_MISS_LOCAL_DRAM.HITM_OTHER_CORE",
+        "EventName": "OCR.DEMAND_CODE_RD.L3_MISS_LOCAL_DRAM.SNOOP_NONE",
         "MSRIndex": "0x1a6,0x1a7",
-        "MSRValue": "0x1004000120",
+        "MSRValue": "0x0084000004",
         "Offcore": "1",
         "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
         "SampleAfterValue": "100003",
         "UMask": "0x1"
     },
     {
-        "BriefDescription": "OCR.ALL_READS.L3_MISS_LOCAL_DRAM.ANY_SNOOP  OCR.ALL_READS.L3_MISS_LOCAL_DRAM.ANY_SNOOP",
+        "BriefDescription": "Counts all demand code reads OCR.DEMAND_CODE_RD.L3_MISS_REMOTE_DRAM.SNOOP_MISS_OR_NO_FWD",
         "Counter": "0,1,2,3",
         "CounterHTOff": "0,1,2,3",
         "EventCode": "0xB7, 0xBB",
-        "EventName": "OCR.ALL_READS.L3_MISS_LOCAL_DRAM.ANY_SNOOP",
+        "EventName": "OCR.DEMAND_CODE_RD.L3_MISS_REMOTE_DRAM.SNOOP_MISS_OR_NO_FWD",
         "MSRIndex": "0x1a6,0x1a7",
-        "MSRValue": "0x3F840007F7",
+        "MSRValue": "0x063B800004",
         "Offcore": "1",
         "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
         "SampleAfterValue": "100003",
         "UMask": "0x1"
     },
     {
-        "BriefDescription": "This event is deprecated. Refer to new event OCR.DEMAND_CODE_RD.L3_MISS_REMOTE_HOP1_DRAM.ANY_SNOOP",
+        "BriefDescription": "Counts all demand code reads  OCR.DEMAND_CODE_RD.L3_MISS_REMOTE_HOP1_DRAM.ANY_SNOOP",
         "Counter": "0,1,2,3",
         "CounterHTOff": "0,1,2,3",
-        "Deprecated": "1",
         "EventCode": "0xB7, 0xBB",
-        "EventName": "OFFCORE_RESPONSE.DEMAND_CODE_RD.L3_MISS_REMOTE_HOP1_DRAM.ANY_SNOOP",
+        "EventName": "OCR.DEMAND_CODE_RD.L3_MISS_REMOTE_HOP1_DRAM.ANY_SNOOP",
         "MSRIndex": "0x1a6,0x1a7",
         "MSRValue": "0x3F90000004",
         "Offcore": "1",
@@ -2116,1002 +2100,949 @@
         "UMask": "0x1"
     },
     {
-        "BriefDescription": "This event is deprecated. Refer to new event OCR.ALL_READS.L3_MISS.SNOOP_MISS",
+        "BriefDescription": "Counts all demand code reads  OCR.DEMAND_CODE_RD.L3_MISS_REMOTE_HOP1_DRAM.HITM_OTHER_CORE",
         "Counter": "0,1,2,3",
         "CounterHTOff": "0,1,2,3",
-        "Deprecated": "1",
         "EventCode": "0xB7, 0xBB",
-        "EventName": "OFFCORE_RESPONSE.ALL_READS.L3_MISS.SNOOP_MISS",
+        "EventName": "OCR.DEMAND_CODE_RD.L3_MISS_REMOTE_HOP1_DRAM.HITM_OTHER_CORE",
         "MSRIndex": "0x1a6,0x1a7",
-        "MSRValue": "0x023C0007F7",
+        "MSRValue": "0x1010000004",
         "Offcore": "1",
         "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
         "SampleAfterValue": "100003",
         "UMask": "0x1"
     },
     {
-        "BriefDescription": "This event is deprecated. Refer to new event OCR.ALL_RFO.L3_MISS_REMOTE_HOP1_DRAM.NO_SNOOP_NEEDED",
+        "BriefDescription": "Counts all demand code reads  OCR.DEMAND_CODE_RD.L3_MISS_REMOTE_HOP1_DRAM.HIT_OTHER_CORE_FWD",
         "Counter": "0,1,2,3",
         "CounterHTOff": "0,1,2,3",
-        "Deprecated": "1",
         "EventCode": "0xB7, 0xBB",
-        "EventName": "OFFCORE_RESPONSE.ALL_RFO.L3_MISS_REMOTE_HOP1_DRAM.NO_SNOOP_NEEDED",
+        "EventName": "OCR.DEMAND_CODE_RD.L3_MISS_REMOTE_HOP1_DRAM.HIT_OTHER_CORE_FWD",
         "MSRIndex": "0x1a6,0x1a7",
-        "MSRValue": "0x0110000122",
+        "MSRValue": "0x0810000004",
         "Offcore": "1",
         "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
         "SampleAfterValue": "100003",
         "UMask": "0x1"
     },
     {
-        "BriefDescription": "OCR.ALL_PF_DATA_RD.L3_MISS.ANY_SNOOP OCR.ALL_PF_DATA_RD.L3_MISS.ANY_SNOOP OCR.ALL_PF_DATA_RD.L3_MISS.ANY_SNOOP",
+        "BriefDescription": "Counts all demand code reads  OCR.DEMAND_CODE_RD.L3_MISS_REMOTE_HOP1_DRAM.HIT_OTHER_CORE_NO_FWD",
         "Counter": "0,1,2,3",
         "CounterHTOff": "0,1,2,3",
         "EventCode": "0xB7, 0xBB",
-        "EventName": "OCR.ALL_PF_DATA_RD.L3_MISS.ANY_SNOOP",
+        "EventName": "OCR.DEMAND_CODE_RD.L3_MISS_REMOTE_HOP1_DRAM.HIT_OTHER_CORE_NO_FWD",
         "MSRIndex": "0x1a6,0x1a7",
-        "MSRValue": "0x3FBC000490",
+        "MSRValue": "0x0410000004",
         "Offcore": "1",
         "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
         "SampleAfterValue": "100003",
         "UMask": "0x1"
     },
     {
-        "BriefDescription": "This event is deprecated. Refer to new event OCR.DEMAND_DATA_RD.L3_MISS.ANY_SNOOP",
+        "BriefDescription": "Counts all demand code reads  OCR.DEMAND_CODE_RD.L3_MISS_REMOTE_HOP1_DRAM.NO_SNOOP_NEEDED",
         "Counter": "0,1,2,3",
         "CounterHTOff": "0,1,2,3",
-        "Deprecated": "1",
         "EventCode": "0xB7, 0xBB",
-        "EventName": "OFFCORE_RESPONSE.DEMAND_DATA_RD.L3_MISS.ANY_SNOOP",
+        "EventName": "OCR.DEMAND_CODE_RD.L3_MISS_REMOTE_HOP1_DRAM.NO_SNOOP_NEEDED",
         "MSRIndex": "0x1a6,0x1a7",
-        "MSRValue": "0x3FBC000001",
+        "MSRValue": "0x0110000004",
         "Offcore": "1",
         "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
         "SampleAfterValue": "100003",
         "UMask": "0x1"
     },
     {
-        "BriefDescription": "This event is deprecated. Refer to new event OCR.ALL_DATA_RD.L3_MISS.REMOTE_HIT_FORWARD",
+        "BriefDescription": "Counts all demand code reads",
         "Counter": "0,1,2,3",
         "CounterHTOff": "0,1,2,3",
-        "Deprecated": "1",
         "EventCode": "0xB7, 0xBB",
-        "EventName": "OFFCORE_RESPONSE.ALL_DATA_RD.L3_MISS.REMOTE_HIT_FORWARD",
+        "EventName": "OCR.DEMAND_CODE_RD.L3_MISS_REMOTE_HOP1_DRAM.SNOOP_MISS",
         "MSRIndex": "0x1a6,0x1a7",
-        "MSRValue": "0x083FC00491",
+        "MSRValue": "0x0210000004",
         "Offcore": "1",
         "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
         "SampleAfterValue": "100003",
         "UMask": "0x1"
     },
     {
-        "BriefDescription": "OCR.ALL_READS.L3_MISS_LOCAL_DRAM.HITM_OTHER_CORE  OCR.ALL_READS.L3_MISS_LOCAL_DRAM.HITM_OTHER_CORE",
+        "BriefDescription": "Counts all demand code reads",
         "Counter": "0,1,2,3",
         "CounterHTOff": "0,1,2,3",
         "EventCode": "0xB7, 0xBB",
-        "EventName": "OCR.ALL_READS.L3_MISS_LOCAL_DRAM.HITM_OTHER_CORE",
+        "EventName": "OCR.DEMAND_CODE_RD.L3_MISS_REMOTE_HOP1_DRAM.SNOOP_NONE",
         "MSRIndex": "0x1a6,0x1a7",
-        "MSRValue": "0x10040007F7",
+        "MSRValue": "0x0090000004",
         "Offcore": "1",
         "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
         "SampleAfterValue": "100003",
         "UMask": "0x1"
     },
     {
-        "BriefDescription": "This event is deprecated. Refer to new event OCR.ALL_READS.L3_MISS.NO_SNOOP_NEEDED",
+        "BriefDescription": "Counts demand data reads OCR.DEMAND_DATA_RD.L3_MISS.ANY_SNOOP OCR.DEMAND_DATA_RD.L3_MISS.ANY_SNOOP",
         "Counter": "0,1,2,3",
         "CounterHTOff": "0,1,2,3",
-        "Deprecated": "1",
         "EventCode": "0xB7, 0xBB",
-        "EventName": "OFFCORE_RESPONSE.ALL_READS.L3_MISS.NO_SNOOP_NEEDED",
+        "EventName": "OCR.DEMAND_DATA_RD.L3_MISS.ANY_SNOOP",
         "MSRIndex": "0x1a6,0x1a7",
-        "MSRValue": "0x013C0007F7",
+        "MSRValue": "0x3FBC000001",
         "Offcore": "1",
         "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
         "SampleAfterValue": "100003",
         "UMask": "0x1"
     },
     {
-        "BriefDescription": "Counts all demand data writes (RFOs)  OCR.DEMAND_RFO.L3_MISS_REMOTE_HOP1_DRAM.ANY_SNOOP",
+        "BriefDescription": "Counts demand data reads OCR.DEMAND_DATA_RD.L3_MISS.HITM_OTHER_CORE OCR.DEMAND_DATA_RD.L3_MISS.HITM_OTHER_CORE",
         "Counter": "0,1,2,3",
         "CounterHTOff": "0,1,2,3",
         "EventCode": "0xB7, 0xBB",
-        "EventName": "OCR.DEMAND_RFO.L3_MISS_REMOTE_HOP1_DRAM.ANY_SNOOP",
+        "EventName": "OCR.DEMAND_DATA_RD.L3_MISS.HITM_OTHER_CORE",
         "MSRIndex": "0x1a6,0x1a7",
-        "MSRValue": "0x3F90000002",
+        "MSRValue": "0x103C000001",
         "Offcore": "1",
         "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
         "SampleAfterValue": "100003",
         "UMask": "0x1"
     },
     {
-        "BriefDescription": "Counts any other requests  OCR.OTHER.L3_MISS_LOCAL_DRAM.HITM_OTHER_CORE",
+        "BriefDescription": "Counts demand data reads OCR.DEMAND_DATA_RD.L3_MISS.HIT_OTHER_CORE_FWD OCR.DEMAND_DATA_RD.L3_MISS.HIT_OTHER_CORE_FWD",
         "Counter": "0,1,2,3",
         "CounterHTOff": "0,1,2,3",
         "EventCode": "0xB7, 0xBB",
-        "EventName": "OCR.OTHER.L3_MISS_LOCAL_DRAM.HITM_OTHER_CORE",
+        "EventName": "OCR.DEMAND_DATA_RD.L3_MISS.HIT_OTHER_CORE_FWD",
         "MSRIndex": "0x1a6,0x1a7",
-        "MSRValue": "0x1004008000",
+        "MSRValue": "0x083C000001",
         "Offcore": "1",
         "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
         "SampleAfterValue": "100003",
         "UMask": "0x1"
     },
     {
-        "BriefDescription": "Counts randomly selected loads when the latency from first dispatch to completion is greater than 128 cycles.",
+        "BriefDescription": "Counts demand data reads OCR.DEMAND_DATA_RD.L3_MISS.HIT_OTHER_CORE_NO_FWD OCR.DEMAND_DATA_RD.L3_MISS.HIT_OTHER_CORE_NO_FWD",
         "Counter": "0,1,2,3",
         "CounterHTOff": "0,1,2,3",
-        "Data_LA": "1",
-        "EventCode": "0xcd",
-        "EventName": "MEM_TRANS_RETIRED.LOAD_LATENCY_GT_128",
-        "MSRIndex": "0x3F6",
-        "MSRValue": "0x80",
-        "PEBS": "2",
-        "PublicDescription": "Counts randomly selected loads when the latency from first dispatch to completion is greater than 128 cycles.  Reported latency may be longer than just the memory latency.",
-        "SampleAfterValue": "1009",
-        "TakenAlone": "1",
+        "EventCode": "0xB7, 0xBB",
+        "EventName": "OCR.DEMAND_DATA_RD.L3_MISS.HIT_OTHER_CORE_NO_FWD",
+        "MSRIndex": "0x1a6,0x1a7",
+        "MSRValue": "0x043C000001",
+        "Offcore": "1",
+        "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
+        "SampleAfterValue": "100003",
         "UMask": "0x1"
     },
     {
-        "BriefDescription": "This event is deprecated. Refer to new event OCR.ALL_READS.L3_MISS.HIT_OTHER_CORE_FWD",
+        "BriefDescription": "Counts demand data reads OCR.DEMAND_DATA_RD.L3_MISS.NO_SNOOP_NEEDED OCR.DEMAND_DATA_RD.L3_MISS.NO_SNOOP_NEEDED",
         "Counter": "0,1,2,3",
         "CounterHTOff": "0,1,2,3",
-        "Deprecated": "1",
         "EventCode": "0xB7, 0xBB",
-        "EventName": "OFFCORE_RESPONSE.ALL_READS.L3_MISS.HIT_OTHER_CORE_FWD",
+        "EventName": "OCR.DEMAND_DATA_RD.L3_MISS.NO_SNOOP_NEEDED",
         "MSRIndex": "0x1a6,0x1a7",
-        "MSRValue": "0x083C0007F7",
+        "MSRValue": "0x013C000001",
         "Offcore": "1",
         "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
         "SampleAfterValue": "100003",
         "UMask": "0x1"
     },
     {
-        "BriefDescription": "Number of times HLE lock could not be elided due to ElisionBufferAvailable being zero.",
-        "Counter": "0,1,2,3",
-        "CounterHTOff": "0,1,2,3,4,5,6,7",
-        "EventCode": "0x54",
-        "EventName": "TX_MEM.HLE_ELISION_BUFFER_FULL",
-        "PublicDescription": "Number of times we could not allocate Lock Buffer.",
-        "SampleAfterValue": "2000003",
-        "UMask": "0x40"
-    },
-    {
-        "BriefDescription": "This event is deprecated. Refer to new event OCR.OTHER.L3_MISS_LOCAL_DRAM.SNOOP_MISS",
+        "BriefDescription": "Counts demand data reads OCR.DEMAND_DATA_RD.L3_MISS.REMOTE_HITM",
         "Counter": "0,1,2,3",
         "CounterHTOff": "0,1,2,3",
-        "Deprecated": "1",
         "EventCode": "0xB7, 0xBB",
-        "EventName": "OFFCORE_RESPONSE.OTHER.L3_MISS_LOCAL_DRAM.SNOOP_MISS",
+        "EventName": "OCR.DEMAND_DATA_RD.L3_MISS.REMOTE_HITM",
         "MSRIndex": "0x1a6,0x1a7",
-        "MSRValue": "0x0204008000",
+        "MSRValue": "0x103FC00001",
         "Offcore": "1",
         "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
         "SampleAfterValue": "100003",
         "UMask": "0x1"
     },
     {
-        "BriefDescription": "Counts prefetch (that bring data to L2) data reads OCR.PF_L2_DATA_RD.L3_MISS.SNOOP_MISS",
+        "BriefDescription": "Counts demand data reads OCR.DEMAND_DATA_RD.L3_MISS.REMOTE_HIT_FORWARD",
         "Counter": "0,1,2,3",
         "CounterHTOff": "0,1,2,3",
         "EventCode": "0xB7, 0xBB",
-        "EventName": "OCR.PF_L2_DATA_RD.L3_MISS.SNOOP_MISS",
+        "EventName": "OCR.DEMAND_DATA_RD.L3_MISS.REMOTE_HIT_FORWARD",
         "MSRIndex": "0x1a6,0x1a7",
-        "MSRValue": "0x023C000010",
+        "MSRValue": "0x083FC00001",
         "Offcore": "1",
         "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
         "SampleAfterValue": "100003",
         "UMask": "0x1"
     },
     {
-        "BriefDescription": "This event is deprecated. Refer to new event OCR.ALL_DATA_RD.L3_MISS.HIT_OTHER_CORE_NO_FWD",
+        "BriefDescription": "Counts demand data reads OCR.DEMAND_DATA_RD.L3_MISS.SNOOP_MISS",
         "Counter": "0,1,2,3",
         "CounterHTOff": "0,1,2,3",
-        "Deprecated": "1",
         "EventCode": "0xB7, 0xBB",
-        "EventName": "OFFCORE_RESPONSE.ALL_DATA_RD.L3_MISS.HIT_OTHER_CORE_NO_FWD",
+        "EventName": "OCR.DEMAND_DATA_RD.L3_MISS.SNOOP_MISS",
         "MSRIndex": "0x1a6,0x1a7",
-        "MSRValue": "0x043C000491",
+        "MSRValue": "0x023C000001",
         "Offcore": "1",
         "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
         "SampleAfterValue": "100003",
         "UMask": "0x1"
     },
     {
-        "BriefDescription": "This event is deprecated. Refer to new event OCR.PF_L2_RFO.L3_MISS_REMOTE_HOP1_DRAM.HITM_OTHER_CORE",
+        "BriefDescription": "Counts demand data reads OCR.DEMAND_DATA_RD.L3_MISS.SNOOP_NONE",
         "Counter": "0,1,2,3",
         "CounterHTOff": "0,1,2,3",
-        "Deprecated": "1",
         "EventCode": "0xB7, 0xBB",
-        "EventName": "OFFCORE_RESPONSE.PF_L2_RFO.L3_MISS_REMOTE_HOP1_DRAM.HITM_OTHER_CORE",
+        "EventName": "OCR.DEMAND_DATA_RD.L3_MISS.SNOOP_NONE",
         "MSRIndex": "0x1a6,0x1a7",
-        "MSRValue": "0x1010000020",
+        "MSRValue": "0x00BC000001",
         "Offcore": "1",
         "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
         "SampleAfterValue": "100003",
         "UMask": "0x1"
     },
     {
-        "BriefDescription": "This event is deprecated. Refer to new event OCR.ALL_RFO.L3_MISS.HITM_OTHER_CORE",
+        "BriefDescription": "Counts demand data reads  OCR.DEMAND_DATA_RD.L3_MISS_LOCAL_DRAM.ANY_SNOOP",
         "Counter": "0,1,2,3",
         "CounterHTOff": "0,1,2,3",
-        "Deprecated": "1",
         "EventCode": "0xB7, 0xBB",
-        "EventName": "OFFCORE_RESPONSE.ALL_RFO.L3_MISS.HITM_OTHER_CORE",
+        "EventName": "OCR.DEMAND_DATA_RD.L3_MISS_LOCAL_DRAM.ANY_SNOOP",
         "MSRIndex": "0x1a6,0x1a7",
-        "MSRValue": "0x103C000122",
+        "MSRValue": "0x3F84000001",
         "Offcore": "1",
         "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
         "SampleAfterValue": "100003",
         "UMask": "0x1"
     },
     {
-        "BriefDescription": "Counts L1 data cache hardware prefetch requests and software prefetch requests  OCR.PF_L1D_AND_SW.L3_MISS_REMOTE_HOP1_DRAM.HIT_OTHER_CORE_NO_FWD",
+        "BriefDescription": "Counts demand data reads  OCR.DEMAND_DATA_RD.L3_MISS_LOCAL_DRAM.HITM_OTHER_CORE",
         "Counter": "0,1,2,3",
         "CounterHTOff": "0,1,2,3",
         "EventCode": "0xB7, 0xBB",
-        "EventName": "OCR.PF_L1D_AND_SW.L3_MISS_REMOTE_HOP1_DRAM.HIT_OTHER_CORE_NO_FWD",
+        "EventName": "OCR.DEMAND_DATA_RD.L3_MISS_LOCAL_DRAM.HITM_OTHER_CORE",
         "MSRIndex": "0x1a6,0x1a7",
-        "MSRValue": "0x0410000400",
+        "MSRValue": "0x1004000001",
         "Offcore": "1",
         "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
         "SampleAfterValue": "100003",
         "UMask": "0x1"
     },
     {
-        "BriefDescription": "This event is deprecated. Refer to new event OCR.ALL_RFO.L3_MISS_LOCAL_DRAM.SNOOP_NONE",
+        "BriefDescription": "Counts demand data reads  OCR.DEMAND_DATA_RD.L3_MISS_LOCAL_DRAM.HIT_OTHER_CORE_FWD",
         "Counter": "0,1,2,3",
         "CounterHTOff": "0,1,2,3",
-        "Deprecated": "1",
         "EventCode": "0xB7, 0xBB",
-        "EventName": "OFFCORE_RESPONSE.ALL_RFO.L3_MISS_LOCAL_DRAM.SNOOP_NONE",
+        "EventName": "OCR.DEMAND_DATA_RD.L3_MISS_LOCAL_DRAM.HIT_OTHER_CORE_FWD",
         "MSRIndex": "0x1a6,0x1a7",
-        "MSRValue": "0x0084000122",
+        "MSRValue": "0x0804000001",
         "Offcore": "1",
         "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
         "SampleAfterValue": "100003",
         "UMask": "0x1"
     },
     {
-        "BriefDescription": "This event is deprecated. Refer to new event OCR.ALL_DATA_RD.L3_MISS.HIT_OTHER_CORE_FWD",
+        "BriefDescription": "Counts demand data reads  OCR.DEMAND_DATA_RD.L3_MISS_LOCAL_DRAM.HIT_OTHER_CORE_NO_FWD",
         "Counter": "0,1,2,3",
         "CounterHTOff": "0,1,2,3",
-        "Deprecated": "1",
         "EventCode": "0xB7, 0xBB",
-        "EventName": "OFFCORE_RESPONSE.ALL_DATA_RD.L3_MISS.HIT_OTHER_CORE_FWD",
+        "EventName": "OCR.DEMAND_DATA_RD.L3_MISS_LOCAL_DRAM.HIT_OTHER_CORE_NO_FWD",
         "MSRIndex": "0x1a6,0x1a7",
-        "MSRValue": "0x083C000491",
+        "MSRValue": "0x0404000001",
         "Offcore": "1",
         "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
         "SampleAfterValue": "100003",
         "UMask": "0x1"
     },
     {
-        "BriefDescription": "OCR.ALL_PF_RFO.L3_MISS_LOCAL_DRAM.ANY_SNOOP  OCR.ALL_PF_RFO.L3_MISS_LOCAL_DRAM.ANY_SNOOP",
+        "BriefDescription": "Counts demand data reads  OCR.DEMAND_DATA_RD.L3_MISS_LOCAL_DRAM.NO_SNOOP_NEEDED",
         "Counter": "0,1,2,3",
         "CounterHTOff": "0,1,2,3",
         "EventCode": "0xB7, 0xBB",
-        "EventName": "OCR.ALL_PF_RFO.L3_MISS_LOCAL_DRAM.ANY_SNOOP",
+        "EventName": "OCR.DEMAND_DATA_RD.L3_MISS_LOCAL_DRAM.NO_SNOOP_NEEDED",
         "MSRIndex": "0x1a6,0x1a7",
-        "MSRValue": "0x3F84000120",
+        "MSRValue": "0x0104000001",
         "Offcore": "1",
         "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
         "SampleAfterValue": "100003",
         "UMask": "0x1"
     },
     {
-        "BriefDescription": "OCR.ALL_PF_DATA_RD.L3_MISS.NO_SNOOP_NEEDED OCR.ALL_PF_DATA_RD.L3_MISS.NO_SNOOP_NEEDED OCR.ALL_PF_DATA_RD.L3_MISS.NO_SNOOP_NEEDED",
+        "BriefDescription": "Counts demand data reads",
         "Counter": "0,1,2,3",
         "CounterHTOff": "0,1,2,3",
         "EventCode": "0xB7, 0xBB",
-        "EventName": "OCR.ALL_PF_DATA_RD.L3_MISS.NO_SNOOP_NEEDED",
+        "EventName": "OCR.DEMAND_DATA_RD.L3_MISS_LOCAL_DRAM.SNOOP_MISS",
         "MSRIndex": "0x1a6,0x1a7",
-        "MSRValue": "0x013C000490",
+        "MSRValue": "0x0204000001",
         "Offcore": "1",
         "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
         "SampleAfterValue": "100003",
         "UMask": "0x1"
     },
     {
-        "BriefDescription": "This event is deprecated. Refer to new event OCR.ALL_PF_DATA_RD.L3_MISS.ANY_SNOOP",
+        "BriefDescription": "Counts demand data reads OCR.DEMAND_DATA_RD.L3_MISS_LOCAL_DRAM.SNOOP_MISS_OR_NO_FWD",
         "Counter": "0,1,2,3",
         "CounterHTOff": "0,1,2,3",
-        "Deprecated": "1",
         "EventCode": "0xB7, 0xBB",
-        "EventName": "OFFCORE_RESPONSE.ALL_PF_DATA_RD.L3_MISS.ANY_SNOOP",
+        "EventName": "OCR.DEMAND_DATA_RD.L3_MISS_LOCAL_DRAM.SNOOP_MISS_OR_NO_FWD",
         "MSRIndex": "0x1a6,0x1a7",
-        "MSRValue": "0x3FBC000490",
+        "MSRValue": "0x0604000001",
         "Offcore": "1",
         "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
         "SampleAfterValue": "100003",
         "UMask": "0x1"
     },
     {
-        "BriefDescription": "OCR.ALL_PF_DATA_RD.L3_MISS_LOCAL_DRAM.HIT_OTHER_CORE_FWD  OCR.ALL_PF_DATA_RD.L3_MISS_LOCAL_DRAM.HIT_OTHER_CORE_FWD",
+        "BriefDescription": "Counts demand data reads",
         "Counter": "0,1,2,3",
         "CounterHTOff": "0,1,2,3",
         "EventCode": "0xB7, 0xBB",
-        "EventName": "OCR.ALL_PF_DATA_RD.L3_MISS_LOCAL_DRAM.HIT_OTHER_CORE_FWD",
+        "EventName": "OCR.DEMAND_DATA_RD.L3_MISS_LOCAL_DRAM.SNOOP_NONE",
         "MSRIndex": "0x1a6,0x1a7",
-        "MSRValue": "0x0804000490",
+        "MSRValue": "0x0084000001",
         "Offcore": "1",
         "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
         "SampleAfterValue": "100003",
         "UMask": "0x1"
     },
     {
-        "BriefDescription": "Counts all demand data writes (RFOs) OCR.DEMAND_RFO.L3_MISS.HIT_OTHER_CORE_FWD OCR.DEMAND_RFO.L3_MISS.HIT_OTHER_CORE_FWD",
+        "BriefDescription": "Counts demand data reads OCR.DEMAND_DATA_RD.L3_MISS_REMOTE_DRAM.SNOOP_MISS_OR_NO_FWD",
         "Counter": "0,1,2,3",
         "CounterHTOff": "0,1,2,3",
         "EventCode": "0xB7, 0xBB",
-        "EventName": "OCR.DEMAND_RFO.L3_MISS.HIT_OTHER_CORE_FWD",
+        "EventName": "OCR.DEMAND_DATA_RD.L3_MISS_REMOTE_DRAM.SNOOP_MISS_OR_NO_FWD",
         "MSRIndex": "0x1a6,0x1a7",
-        "MSRValue": "0x083C000002",
+        "MSRValue": "0x063B800001",
         "Offcore": "1",
         "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
         "SampleAfterValue": "100003",
         "UMask": "0x1"
     },
     {
-        "BriefDescription": "OCR.ALL_DATA_RD.L3_MISS.ANY_SNOOP OCR.ALL_DATA_RD.L3_MISS.ANY_SNOOP OCR.ALL_DATA_RD.L3_MISS.ANY_SNOOP",
+        "BriefDescription": "Counts demand data reads  OCR.DEMAND_DATA_RD.L3_MISS_REMOTE_HOP1_DRAM.ANY_SNOOP",
         "Counter": "0,1,2,3",
         "CounterHTOff": "0,1,2,3",
         "EventCode": "0xB7, 0xBB",
-        "EventName": "OCR.ALL_DATA_RD.L3_MISS.ANY_SNOOP",
+        "EventName": "OCR.DEMAND_DATA_RD.L3_MISS_REMOTE_HOP1_DRAM.ANY_SNOOP",
         "MSRIndex": "0x1a6,0x1a7",
-        "MSRValue": "0x3FBC000491",
+        "MSRValue": "0x3F90000001",
         "Offcore": "1",
         "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
         "SampleAfterValue": "100003",
         "UMask": "0x1"
     },
     {
-        "BriefDescription": "This event is deprecated. Refer to new event OCR.PF_L3_DATA_RD.L3_MISS.HITM_OTHER_CORE",
+        "BriefDescription": "Counts demand data reads  OCR.DEMAND_DATA_RD.L3_MISS_REMOTE_HOP1_DRAM.HITM_OTHER_CORE",
         "Counter": "0,1,2,3",
         "CounterHTOff": "0,1,2,3",
-        "Deprecated": "1",
         "EventCode": "0xB7, 0xBB",
-        "EventName": "OFFCORE_RESPONSE.PF_L3_DATA_RD.L3_MISS.HITM_OTHER_CORE",
+        "EventName": "OCR.DEMAND_DATA_RD.L3_MISS_REMOTE_HOP1_DRAM.HITM_OTHER_CORE",
         "MSRIndex": "0x1a6,0x1a7",
-        "MSRValue": "0x103C000080",
+        "MSRValue": "0x1010000001",
         "Offcore": "1",
         "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
         "SampleAfterValue": "100003",
         "UMask": "0x1"
     },
     {
-        "BriefDescription": "This event is deprecated. Refer to new event OCR.ALL_DATA_RD.L3_MISS.SNOOP_MISS",
+        "BriefDescription": "Counts demand data reads  OCR.DEMAND_DATA_RD.L3_MISS_REMOTE_HOP1_DRAM.HIT_OTHER_CORE_FWD",
         "Counter": "0,1,2,3",
         "CounterHTOff": "0,1,2,3",
-        "Deprecated": "1",
         "EventCode": "0xB7, 0xBB",
-        "EventName": "OFFCORE_RESPONSE.ALL_DATA_RD.L3_MISS.SNOOP_MISS",
+        "EventName": "OCR.DEMAND_DATA_RD.L3_MISS_REMOTE_HOP1_DRAM.HIT_OTHER_CORE_FWD",
         "MSRIndex": "0x1a6,0x1a7",
-        "MSRValue": "0x023C000491",
+        "MSRValue": "0x0810000001",
         "Offcore": "1",
         "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
         "SampleAfterValue": "100003",
         "UMask": "0x1"
     },
     {
-        "BriefDescription": "Counts all prefetch (that bring data to LLC only) RFOs  OCR.PF_L3_RFO.L3_MISS_LOCAL_DRAM.HIT_OTHER_CORE_NO_FWD",
+        "BriefDescription": "Counts demand data reads  OCR.DEMAND_DATA_RD.L3_MISS_REMOTE_HOP1_DRAM.HIT_OTHER_CORE_NO_FWD",
         "Counter": "0,1,2,3",
         "CounterHTOff": "0,1,2,3",
         "EventCode": "0xB7, 0xBB",
-        "EventName": "OCR.PF_L3_RFO.L3_MISS_LOCAL_DRAM.HIT_OTHER_CORE_NO_FWD",
+        "EventName": "OCR.DEMAND_DATA_RD.L3_MISS_REMOTE_HOP1_DRAM.HIT_OTHER_CORE_NO_FWD",
         "MSRIndex": "0x1a6,0x1a7",
-        "MSRValue": "0x0404000100",
+        "MSRValue": "0x0410000001",
         "Offcore": "1",
         "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
         "SampleAfterValue": "100003",
         "UMask": "0x1"
     },
     {
-        "BriefDescription": "This event is deprecated. Refer to new event OCR.ALL_PF_RFO.L3_MISS_REMOTE_HOP1_DRAM.ANY_SNOOP",
+        "BriefDescription": "Counts demand data reads  OCR.DEMAND_DATA_RD.L3_MISS_REMOTE_HOP1_DRAM.NO_SNOOP_NEEDED",
         "Counter": "0,1,2,3",
         "CounterHTOff": "0,1,2,3",
-        "Deprecated": "1",
         "EventCode": "0xB7, 0xBB",
-        "EventName": "OFFCORE_RESPONSE.ALL_PF_RFO.L3_MISS_REMOTE_HOP1_DRAM.ANY_SNOOP",
+        "EventName": "OCR.DEMAND_DATA_RD.L3_MISS_REMOTE_HOP1_DRAM.NO_SNOOP_NEEDED",
         "MSRIndex": "0x1a6,0x1a7",
-        "MSRValue": "0x3F90000120",
+        "MSRValue": "0x0110000001",
         "Offcore": "1",
         "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
         "SampleAfterValue": "100003",
         "UMask": "0x1"
     },
     {
-        "BriefDescription": "OCR.ALL_PF_RFO.L3_MISS.HIT_OTHER_CORE_NO_FWD OCR.ALL_PF_RFO.L3_MISS.HIT_OTHER_CORE_NO_FWD OCR.ALL_PF_RFO.L3_MISS.HIT_OTHER_CORE_NO_FWD",
+        "BriefDescription": "Counts demand data reads",
         "Counter": "0,1,2,3",
         "CounterHTOff": "0,1,2,3",
         "EventCode": "0xB7, 0xBB",
-        "EventName": "OCR.ALL_PF_RFO.L3_MISS.HIT_OTHER_CORE_NO_FWD",
+        "EventName": "OCR.DEMAND_DATA_RD.L3_MISS_REMOTE_HOP1_DRAM.SNOOP_MISS",
         "MSRIndex": "0x1a6,0x1a7",
-        "MSRValue": "0x043C000120",
+        "MSRValue": "0x0210000001",
         "Offcore": "1",
         "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
         "SampleAfterValue": "100003",
         "UMask": "0x1"
     },
     {
-        "BriefDescription": "This event is deprecated. Refer to new event OCR.OTHER.L3_MISS.ANY_SNOOP",
+        "BriefDescription": "Counts demand data reads",
         "Counter": "0,1,2,3",
         "CounterHTOff": "0,1,2,3",
-        "Deprecated": "1",
         "EventCode": "0xB7, 0xBB",
-        "EventName": "OFFCORE_RESPONSE.OTHER.L3_MISS.ANY_SNOOP",
+        "EventName": "OCR.DEMAND_DATA_RD.L3_MISS_REMOTE_HOP1_DRAM.SNOOP_NONE",
         "MSRIndex": "0x1a6,0x1a7",
-        "MSRValue": "0x3FBC008000",
+        "MSRValue": "0x0090000001",
         "Offcore": "1",
         "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
         "SampleAfterValue": "100003",
         "UMask": "0x1"
     },
     {
-        "BriefDescription": "Counts all prefetch (that bring data to LLC only) RFOs OCR.PF_L3_RFO.L3_MISS.SNOOP_MISS",
+        "BriefDescription": "Counts all demand data writes (RFOs) OCR.DEMAND_RFO.L3_MISS.ANY_SNOOP OCR.DEMAND_RFO.L3_MISS.ANY_SNOOP",
         "Counter": "0,1,2,3",
         "CounterHTOff": "0,1,2,3",
         "EventCode": "0xB7, 0xBB",
-        "EventName": "OCR.PF_L3_RFO.L3_MISS.SNOOP_MISS",
+        "EventName": "OCR.DEMAND_RFO.L3_MISS.ANY_SNOOP",
         "MSRIndex": "0x1a6,0x1a7",
-        "MSRValue": "0x023C000100",
+        "MSRValue": "0x3FBC000002",
         "Offcore": "1",
         "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
         "SampleAfterValue": "100003",
         "UMask": "0x1"
     },
     {
-        "BriefDescription": "Counts all demand data writes (RFOs) OCR.DEMAND_RFO.L3_MISS_LOCAL_DRAM.SNOOP_MISS_OR_NO_FWD",
+        "BriefDescription": "Counts all demand data writes (RFOs) OCR.DEMAND_RFO.L3_MISS.HITM_OTHER_CORE OCR.DEMAND_RFO.L3_MISS.HITM_OTHER_CORE",
         "Counter": "0,1,2,3",
         "CounterHTOff": "0,1,2,3",
         "EventCode": "0xB7, 0xBB",
-        "EventName": "OCR.DEMAND_RFO.L3_MISS_LOCAL_DRAM.SNOOP_MISS_OR_NO_FWD",
+        "EventName": "OCR.DEMAND_RFO.L3_MISS.HITM_OTHER_CORE",
         "MSRIndex": "0x1a6,0x1a7",
-        "MSRValue": "0x0604000002",
+        "MSRValue": "0x103C000002",
         "Offcore": "1",
         "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
         "SampleAfterValue": "100003",
         "UMask": "0x1"
     },
     {
-        "BriefDescription": "Counts any other requests",
+        "BriefDescription": "Counts all demand data writes (RFOs) OCR.DEMAND_RFO.L3_MISS.HIT_OTHER_CORE_FWD OCR.DEMAND_RFO.L3_MISS.HIT_OTHER_CORE_FWD",
         "Counter": "0,1,2,3",
         "CounterHTOff": "0,1,2,3",
         "EventCode": "0xB7, 0xBB",
-        "EventName": "OCR.OTHER.L3_MISS_REMOTE_HOP1_DRAM.SNOOP_NONE",
+        "EventName": "OCR.DEMAND_RFO.L3_MISS.HIT_OTHER_CORE_FWD",
         "MSRIndex": "0x1a6,0x1a7",
-        "MSRValue": "0x0090008000",
+        "MSRValue": "0x083C000002",
         "Offcore": "1",
         "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
         "SampleAfterValue": "100003",
         "UMask": "0x1"
     },
     {
-        "BriefDescription": "OCR.ALL_RFO.L3_MISS_LOCAL_DRAM.NO_SNOOP_NEEDED  OCR.ALL_RFO.L3_MISS_LOCAL_DRAM.NO_SNOOP_NEEDED",
+        "BriefDescription": "Counts all demand data writes (RFOs) OCR.DEMAND_RFO.L3_MISS.HIT_OTHER_CORE_NO_FWD OCR.DEMAND_RFO.L3_MISS.HIT_OTHER_CORE_NO_FWD",
         "Counter": "0,1,2,3",
         "CounterHTOff": "0,1,2,3",
         "EventCode": "0xB7, 0xBB",
-        "EventName": "OCR.ALL_RFO.L3_MISS_LOCAL_DRAM.NO_SNOOP_NEEDED",
+        "EventName": "OCR.DEMAND_RFO.L3_MISS.HIT_OTHER_CORE_NO_FWD",
         "MSRIndex": "0x1a6,0x1a7",
-        "MSRValue": "0x0104000122",
+        "MSRValue": "0x043C000002",
         "Offcore": "1",
         "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
         "SampleAfterValue": "100003",
         "UMask": "0x1"
     },
     {
-        "BriefDescription": "OCR.ALL_PF_DATA_RD.L3_MISS.SNOOP_MISS OCR.ALL_PF_DATA_RD.L3_MISS.SNOOP_MISS",
+        "BriefDescription": "Counts all demand data writes (RFOs) OCR.DEMAND_RFO.L3_MISS.NO_SNOOP_NEEDED OCR.DEMAND_RFO.L3_MISS.NO_SNOOP_NEEDED",
         "Counter": "0,1,2,3",
         "CounterHTOff": "0,1,2,3",
         "EventCode": "0xB7, 0xBB",
-        "EventName": "OCR.ALL_PF_DATA_RD.L3_MISS.SNOOP_MISS",
+        "EventName": "OCR.DEMAND_RFO.L3_MISS.NO_SNOOP_NEEDED",
         "MSRIndex": "0x1a6,0x1a7",
-        "MSRValue": "0x023C000490",
+        "MSRValue": "0x013C000002",
         "Offcore": "1",
         "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
         "SampleAfterValue": "100003",
         "UMask": "0x1"
     },
     {
-        "BriefDescription": "OCR.ALL_RFO.L3_MISS_REMOTE_HOP1_DRAM.ANY_SNOOP  OCR.ALL_RFO.L3_MISS_REMOTE_HOP1_DRAM.ANY_SNOOP",
+        "BriefDescription": "Counts all demand data writes (RFOs) OCR.DEMAND_RFO.L3_MISS.REMOTE_HITM",
         "Counter": "0,1,2,3",
         "CounterHTOff": "0,1,2,3",
         "EventCode": "0xB7, 0xBB",
-        "EventName": "OCR.ALL_RFO.L3_MISS_REMOTE_HOP1_DRAM.ANY_SNOOP",
+        "EventName": "OCR.DEMAND_RFO.L3_MISS.REMOTE_HITM",
         "MSRIndex": "0x1a6,0x1a7",
-        "MSRValue": "0x3F90000122",
+        "MSRValue": "0x103FC00002",
         "Offcore": "1",
         "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
         "SampleAfterValue": "100003",
         "UMask": "0x1"
     },
     {
-        "BriefDescription": "This event is deprecated. Refer to new event OCR.PF_L3_DATA_RD.L3_MISS_REMOTE_HOP1_DRAM.HITM_OTHER_CORE",
+        "BriefDescription": "Counts all demand data writes (RFOs) OCR.DEMAND_RFO.L3_MISS.REMOTE_HIT_FORWARD",
         "Counter": "0,1,2,3",
         "CounterHTOff": "0,1,2,3",
-        "Deprecated": "1",
         "EventCode": "0xB7, 0xBB",
-        "EventName": "OFFCORE_RESPONSE.PF_L3_DATA_RD.L3_MISS_REMOTE_HOP1_DRAM.HITM_OTHER_CORE",
+        "EventName": "OCR.DEMAND_RFO.L3_MISS.REMOTE_HIT_FORWARD",
         "MSRIndex": "0x1a6,0x1a7",
-        "MSRValue": "0x1010000080",
+        "MSRValue": "0x083FC00002",
         "Offcore": "1",
         "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
         "SampleAfterValue": "100003",
         "UMask": "0x1"
     },
     {
-        "BriefDescription": "This event is deprecated. Refer to new event OCR.ALL_PF_DATA_RD.L3_MISS.HIT_OTHER_CORE_NO_FWD",
+        "BriefDescription": "Counts all demand data writes (RFOs) OCR.DEMAND_RFO.L3_MISS.SNOOP_MISS",
         "Counter": "0,1,2,3",
         "CounterHTOff": "0,1,2,3",
-        "Deprecated": "1",
         "EventCode": "0xB7, 0xBB",
-        "EventName": "OFFCORE_RESPONSE.ALL_PF_DATA_RD.L3_MISS.HIT_OTHER_CORE_NO_FWD",
+        "EventName": "OCR.DEMAND_RFO.L3_MISS.SNOOP_MISS",
         "MSRIndex": "0x1a6,0x1a7",
-        "MSRValue": "0x043C000490",
+        "MSRValue": "0x023C000002",
         "Offcore": "1",
         "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
         "SampleAfterValue": "100003",
         "UMask": "0x1"
     },
     {
-        "BriefDescription": "OCR.ALL_PF_RFO.L3_MISS_REMOTE_HOP1_DRAM.HIT_OTHER_CORE_FWD  OCR.ALL_PF_RFO.L3_MISS_REMOTE_HOP1_DRAM.HIT_OTHER_CORE_FWD",
+        "BriefDescription": "Counts all demand data writes (RFOs) OCR.DEMAND_RFO.L3_MISS.SNOOP_NONE",
         "Counter": "0,1,2,3",
         "CounterHTOff": "0,1,2,3",
         "EventCode": "0xB7, 0xBB",
-        "EventName": "OCR.ALL_PF_RFO.L3_MISS_REMOTE_HOP1_DRAM.HIT_OTHER_CORE_FWD",
+        "EventName": "OCR.DEMAND_RFO.L3_MISS.SNOOP_NONE",
         "MSRIndex": "0x1a6,0x1a7",
-        "MSRValue": "0x0810000120",
+        "MSRValue": "0x00BC000002",
         "Offcore": "1",
         "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
         "SampleAfterValue": "100003",
         "UMask": "0x1"
     },
     {
-        "BriefDescription": "This event is deprecated. Refer to new event OCR.ALL_PF_DATA_RD.L3_MISS_LOCAL_DRAM.SNOOP_MISS_OR_NO_FWD",
+        "BriefDescription": "Counts all demand data writes (RFOs)  OCR.DEMAND_RFO.L3_MISS_LOCAL_DRAM.ANY_SNOOP",
         "Counter": "0,1,2,3",
         "CounterHTOff": "0,1,2,3",
-        "Deprecated": "1",
         "EventCode": "0xB7, 0xBB",
-        "EventName": "OFFCORE_RESPONSE.ALL_PF_DATA_RD.L3_MISS_LOCAL_DRAM.SNOOP_MISS_OR_NO_FWD",
+        "EventName": "OCR.DEMAND_RFO.L3_MISS_LOCAL_DRAM.ANY_SNOOP",
         "MSRIndex": "0x1a6,0x1a7",
-        "MSRValue": "0x0604000490",
+        "MSRValue": "0x3F84000002",
         "Offcore": "1",
         "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
         "SampleAfterValue": "100003",
         "UMask": "0x1"
     },
     {
-        "BriefDescription": "This event is deprecated. Refer to new event OCR.DEMAND_DATA_RD.L3_MISS_REMOTE_HOP1_DRAM.SNOOP_NONE",
+        "BriefDescription": "Counts all demand data writes (RFOs)  OCR.DEMAND_RFO.L3_MISS_LOCAL_DRAM.HITM_OTHER_CORE",
         "Counter": "0,1,2,3",
         "CounterHTOff": "0,1,2,3",
-        "Deprecated": "1",
         "EventCode": "0xB7, 0xBB",
-        "EventName": "OFFCORE_RESPONSE.DEMAND_DATA_RD.L3_MISS_REMOTE_HOP1_DRAM.SNOOP_NONE",
+        "EventName": "OCR.DEMAND_RFO.L3_MISS_LOCAL_DRAM.HITM_OTHER_CORE",
         "MSRIndex": "0x1a6,0x1a7",
-        "MSRValue": "0x0090000001",
+        "MSRValue": "0x1004000002",
         "Offcore": "1",
         "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
         "SampleAfterValue": "100003",
         "UMask": "0x1"
     },
     {
-        "BriefDescription": "OCR.ALL_PF_RFO.L3_MISS_LOCAL_DRAM.SNOOP_MISS",
+        "BriefDescription": "Counts all demand data writes (RFOs)  OCR.DEMAND_RFO.L3_MISS_LOCAL_DRAM.HIT_OTHER_CORE_FWD",
         "Counter": "0,1,2,3",
         "CounterHTOff": "0,1,2,3",
         "EventCode": "0xB7, 0xBB",
-        "EventName": "OCR.ALL_PF_RFO.L3_MISS_LOCAL_DRAM.SNOOP_MISS",
+        "EventName": "OCR.DEMAND_RFO.L3_MISS_LOCAL_DRAM.HIT_OTHER_CORE_FWD",
         "MSRIndex": "0x1a6,0x1a7",
-        "MSRValue": "0x0204000120",
+        "MSRValue": "0x0804000002",
         "Offcore": "1",
         "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
         "SampleAfterValue": "100003",
         "UMask": "0x1"
     },
     {
-        "BriefDescription": "Counts demand data reads OCR.DEMAND_DATA_RD.L3_MISS.ANY_SNOOP OCR.DEMAND_DATA_RD.L3_MISS.ANY_SNOOP",
+        "BriefDescription": "Counts all demand data writes (RFOs)  OCR.DEMAND_RFO.L3_MISS_LOCAL_DRAM.HIT_OTHER_CORE_NO_FWD",
         "Counter": "0,1,2,3",
         "CounterHTOff": "0,1,2,3",
         "EventCode": "0xB7, 0xBB",
-        "EventName": "OCR.DEMAND_DATA_RD.L3_MISS.ANY_SNOOP",
+        "EventName": "OCR.DEMAND_RFO.L3_MISS_LOCAL_DRAM.HIT_OTHER_CORE_NO_FWD",
         "MSRIndex": "0x1a6,0x1a7",
-        "MSRValue": "0x3FBC000001",
+        "MSRValue": "0x0404000002",
         "Offcore": "1",
         "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
         "SampleAfterValue": "100003",
         "UMask": "0x1"
     },
     {
-        "BriefDescription": "Number of times an RTM execution aborted due to incompatible memory type",
-        "Counter": "0,1,2,3",
-        "CounterHTOff": "0,1,2,3,4,5,6,7",
-        "EventCode": "0xC9",
-        "EventName": "RTM_RETIRED.ABORTED_MEMTYPE",
-        "PublicDescription": "Number of times an RTM execution aborted due to incompatible memory type.",
-        "SampleAfterValue": "2000003",
-        "UMask": "0x40"
-    },
-    {
-        "BriefDescription": "OCR.ALL_PF_RFO.L3_MISS_REMOTE_HOP1_DRAM.HITM_OTHER_CORE  OCR.ALL_PF_RFO.L3_MISS_REMOTE_HOP1_DRAM.HITM_OTHER_CORE",
+        "BriefDescription": "Counts all demand data writes (RFOs)  OCR.DEMAND_RFO.L3_MISS_LOCAL_DRAM.NO_SNOOP_NEEDED",
         "Counter": "0,1,2,3",
         "CounterHTOff": "0,1,2,3",
         "EventCode": "0xB7, 0xBB",
-        "EventName": "OCR.ALL_PF_RFO.L3_MISS_REMOTE_HOP1_DRAM.HITM_OTHER_CORE",
+        "EventName": "OCR.DEMAND_RFO.L3_MISS_LOCAL_DRAM.NO_SNOOP_NEEDED",
         "MSRIndex": "0x1a6,0x1a7",
-        "MSRValue": "0x1010000120",
+        "MSRValue": "0x0104000002",
         "Offcore": "1",
         "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
         "SampleAfterValue": "100003",
         "UMask": "0x1"
     },
     {
-        "BriefDescription": "OCR.ALL_PF_DATA_RD.L3_MISS_LOCAL_DRAM.ANY_SNOOP  OCR.ALL_PF_DATA_RD.L3_MISS_LOCAL_DRAM.ANY_SNOOP",
+        "BriefDescription": "Counts all demand data writes (RFOs)",
         "Counter": "0,1,2,3",
         "CounterHTOff": "0,1,2,3",
         "EventCode": "0xB7, 0xBB",
-        "EventName": "OCR.ALL_PF_DATA_RD.L3_MISS_LOCAL_DRAM.ANY_SNOOP",
+        "EventName": "OCR.DEMAND_RFO.L3_MISS_LOCAL_DRAM.SNOOP_MISS",
         "MSRIndex": "0x1a6,0x1a7",
-        "MSRValue": "0x3F84000490",
+        "MSRValue": "0x0204000002",
         "Offcore": "1",
         "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
         "SampleAfterValue": "100003",
         "UMask": "0x1"
     },
     {
-        "BriefDescription": "Counts prefetch (that bring data to L2) data reads OCR.PF_L2_DATA_RD.L3_MISS.REMOTE_HIT_FORWARD",
+        "BriefDescription": "Counts all demand data writes (RFOs) OCR.DEMAND_RFO.L3_MISS_LOCAL_DRAM.SNOOP_MISS_OR_NO_FWD",
         "Counter": "0,1,2,3",
         "CounterHTOff": "0,1,2,3",
         "EventCode": "0xB7, 0xBB",
-        "EventName": "OCR.PF_L2_DATA_RD.L3_MISS.REMOTE_HIT_FORWARD",
+        "EventName": "OCR.DEMAND_RFO.L3_MISS_LOCAL_DRAM.SNOOP_MISS_OR_NO_FWD",
         "MSRIndex": "0x1a6,0x1a7",
-        "MSRValue": "0x083FC00010",
+        "MSRValue": "0x0604000002",
         "Offcore": "1",
         "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
         "SampleAfterValue": "100003",
         "UMask": "0x1"
     },
     {
-        "BriefDescription": "OCR.ALL_RFO.L3_MISS_LOCAL_DRAM.SNOOP_NONE",
+        "BriefDescription": "Counts all demand data writes (RFOs)",
         "Counter": "0,1,2,3",
         "CounterHTOff": "0,1,2,3",
         "EventCode": "0xB7, 0xBB",
-        "EventName": "OCR.ALL_RFO.L3_MISS_LOCAL_DRAM.SNOOP_NONE",
+        "EventName": "OCR.DEMAND_RFO.L3_MISS_LOCAL_DRAM.SNOOP_NONE",
         "MSRIndex": "0x1a6,0x1a7",
-        "MSRValue": "0x0084000122",
+        "MSRValue": "0x0084000002",
         "Offcore": "1",
         "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
         "SampleAfterValue": "100003",
         "UMask": "0x1"
     },
     {
-        "BriefDescription": "OCR.ALL_READS.L3_MISS_REMOTE_HOP1_DRAM.SNOOP_NONE",
+        "BriefDescription": "Counts all demand data writes (RFOs) OCR.DEMAND_RFO.L3_MISS_REMOTE_DRAM.SNOOP_MISS_OR_NO_FWD",
         "Counter": "0,1,2,3",
         "CounterHTOff": "0,1,2,3",
         "EventCode": "0xB7, 0xBB",
-        "EventName": "OCR.ALL_READS.L3_MISS_REMOTE_HOP1_DRAM.SNOOP_NONE",
+        "EventName": "OCR.DEMAND_RFO.L3_MISS_REMOTE_DRAM.SNOOP_MISS_OR_NO_FWD",
         "MSRIndex": "0x1a6,0x1a7",
-        "MSRValue": "0x00900007F7",
+        "MSRValue": "0x063B800002",
         "Offcore": "1",
         "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
         "SampleAfterValue": "100003",
         "UMask": "0x1"
     },
     {
-        "BriefDescription": "This event is deprecated. Refer to new event OCR.PF_L2_DATA_RD.L3_MISS.HITM_OTHER_CORE",
+        "BriefDescription": "Counts all demand data writes (RFOs)  OCR.DEMAND_RFO.L3_MISS_REMOTE_HOP1_DRAM.ANY_SNOOP",
         "Counter": "0,1,2,3",
         "CounterHTOff": "0,1,2,3",
-        "Deprecated": "1",
         "EventCode": "0xB7, 0xBB",
-        "EventName": "OFFCORE_RESPONSE.PF_L2_DATA_RD.L3_MISS.HITM_OTHER_CORE",
+        "EventName": "OCR.DEMAND_RFO.L3_MISS_REMOTE_HOP1_DRAM.ANY_SNOOP",
         "MSRIndex": "0x1a6,0x1a7",
-        "MSRValue": "0x103C000010",
+        "MSRValue": "0x3F90000002",
         "Offcore": "1",
         "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
         "SampleAfterValue": "100003",
         "UMask": "0x1"
     },
     {
-        "BriefDescription": "Counts L1 data cache hardware prefetch requests and software prefetch requests  OCR.PF_L1D_AND_SW.L3_MISS_LOCAL_DRAM.ANY_SNOOP",
+        "BriefDescription": "Counts all demand data writes (RFOs)  OCR.DEMAND_RFO.L3_MISS_REMOTE_HOP1_DRAM.HITM_OTHER_CORE",
         "Counter": "0,1,2,3",
         "CounterHTOff": "0,1,2,3",
         "EventCode": "0xB7, 0xBB",
-        "EventName": "OCR.PF_L1D_AND_SW.L3_MISS_LOCAL_DRAM.ANY_SNOOP",
+        "EventName": "OCR.DEMAND_RFO.L3_MISS_REMOTE_HOP1_DRAM.HITM_OTHER_CORE",
         "MSRIndex": "0x1a6,0x1a7",
-        "MSRValue": "0x3F84000400",
+        "MSRValue": "0x1010000002",
         "Offcore": "1",
         "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
         "SampleAfterValue": "100003",
         "UMask": "0x1"
     },
     {
-        "BriefDescription": "OCR.ALL_READS.L3_MISS_LOCAL_DRAM.SNOOP_MISS",
+        "BriefDescription": "Counts all demand data writes (RFOs)  OCR.DEMAND_RFO.L3_MISS_REMOTE_HOP1_DRAM.HIT_OTHER_CORE_FWD",
         "Counter": "0,1,2,3",
         "CounterHTOff": "0,1,2,3",
         "EventCode": "0xB7, 0xBB",
-        "EventName": "OCR.ALL_READS.L3_MISS_LOCAL_DRAM.SNOOP_MISS",
+        "EventName": "OCR.DEMAND_RFO.L3_MISS_REMOTE_HOP1_DRAM.HIT_OTHER_CORE_FWD",
         "MSRIndex": "0x1a6,0x1a7",
-        "MSRValue": "0x02040007F7",
+        "MSRValue": "0x0810000002",
         "Offcore": "1",
         "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
         "SampleAfterValue": "100003",
         "UMask": "0x1"
     },
     {
-        "BriefDescription": "OCR.ALL_RFO.L3_MISS_REMOTE_HOP1_DRAM.HIT_OTHER_CORE_NO_FWD  OCR.ALL_RFO.L3_MISS_REMOTE_HOP1_DRAM.HIT_OTHER_CORE_NO_FWD",
+        "BriefDescription": "Counts all demand data writes (RFOs)  OCR.DEMAND_RFO.L3_MISS_REMOTE_HOP1_DRAM.HIT_OTHER_CORE_NO_FWD",
         "Counter": "0,1,2,3",
         "CounterHTOff": "0,1,2,3",
         "EventCode": "0xB7, 0xBB",
-        "EventName": "OCR.ALL_RFO.L3_MISS_REMOTE_HOP1_DRAM.HIT_OTHER_CORE_NO_FWD",
+        "EventName": "OCR.DEMAND_RFO.L3_MISS_REMOTE_HOP1_DRAM.HIT_OTHER_CORE_NO_FWD",
         "MSRIndex": "0x1a6,0x1a7",
-        "MSRValue": "0x0410000122",
+        "MSRValue": "0x0410000002",
         "Offcore": "1",
         "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
         "SampleAfterValue": "100003",
         "UMask": "0x1"
     },
     {
-        "BriefDescription": "Counts demand data reads  OCR.DEMAND_DATA_RD.L3_MISS_LOCAL_DRAM.HITM_OTHER_CORE",
+        "BriefDescription": "Counts all demand data writes (RFOs)  OCR.DEMAND_RFO.L3_MISS_REMOTE_HOP1_DRAM.NO_SNOOP_NEEDED",
         "Counter": "0,1,2,3",
         "CounterHTOff": "0,1,2,3",
         "EventCode": "0xB7, 0xBB",
-        "EventName": "OCR.DEMAND_DATA_RD.L3_MISS_LOCAL_DRAM.HITM_OTHER_CORE",
+        "EventName": "OCR.DEMAND_RFO.L3_MISS_REMOTE_HOP1_DRAM.NO_SNOOP_NEEDED",
         "MSRIndex": "0x1a6,0x1a7",
-        "MSRValue": "0x1004000001",
+        "MSRValue": "0x0110000002",
         "Offcore": "1",
         "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
         "SampleAfterValue": "100003",
         "UMask": "0x1"
     },
     {
-        "BriefDescription": "This event is deprecated. Refer to new event OCR.PF_L3_DATA_RD.L3_MISS_REMOTE_HOP1_DRAM.ANY_SNOOP",
+        "BriefDescription": "Counts all demand data writes (RFOs)",
         "Counter": "0,1,2,3",
         "CounterHTOff": "0,1,2,3",
-        "Deprecated": "1",
         "EventCode": "0xB7, 0xBB",
-        "EventName": "OFFCORE_RESPONSE.PF_L3_DATA_RD.L3_MISS_REMOTE_HOP1_DRAM.ANY_SNOOP",
+        "EventName": "OCR.DEMAND_RFO.L3_MISS_REMOTE_HOP1_DRAM.SNOOP_MISS",
         "MSRIndex": "0x1a6,0x1a7",
-        "MSRValue": "0x3F90000080",
+        "MSRValue": "0x0210000002",
         "Offcore": "1",
         "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
         "SampleAfterValue": "100003",
         "UMask": "0x1"
     },
     {
-        "BriefDescription": "OCR.ALL_RFO.L3_MISS.REMOTE_HIT_FORWARD OCR.ALL_RFO.L3_MISS.REMOTE_HIT_FORWARD",
+        "BriefDescription": "Counts all demand data writes (RFOs)",
         "Counter": "0,1,2,3",
         "CounterHTOff": "0,1,2,3",
         "EventCode": "0xB7, 0xBB",
-        "EventName": "OCR.ALL_RFO.L3_MISS.REMOTE_HIT_FORWARD",
+        "EventName": "OCR.DEMAND_RFO.L3_MISS_REMOTE_HOP1_DRAM.SNOOP_NONE",
         "MSRIndex": "0x1a6,0x1a7",
-        "MSRValue": "0x083FC00122",
+        "MSRValue": "0x0090000002",
         "Offcore": "1",
         "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
         "SampleAfterValue": "100003",
         "UMask": "0x1"
     },
     {
-        "BriefDescription": "This event is deprecated. Refer to new event OCR.PF_L2_DATA_RD.L3_MISS_REMOTE_HOP1_DRAM.HIT_OTHER_CORE_FWD",
+        "BriefDescription": "Counts any other requests OCR.OTHER.L3_MISS.ANY_SNOOP OCR.OTHER.L3_MISS.ANY_SNOOP",
         "Counter": "0,1,2,3",
         "CounterHTOff": "0,1,2,3",
-        "Deprecated": "1",
         "EventCode": "0xB7, 0xBB",
-        "EventName": "OFFCORE_RESPONSE.PF_L2_DATA_RD.L3_MISS_REMOTE_HOP1_DRAM.HIT_OTHER_CORE_FWD",
+        "EventName": "OCR.OTHER.L3_MISS.ANY_SNOOP",
         "MSRIndex": "0x1a6,0x1a7",
-        "MSRValue": "0x0810000010",
+        "MSRValue": "0x3FBC008000",
         "Offcore": "1",
         "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
         "SampleAfterValue": "100003",
         "UMask": "0x1"
     },
     {
-        "BriefDescription": "OCR.ALL_READS.L3_MISS_REMOTE_DRAM.SNOOP_MISS_OR_NO_FWD OCR.ALL_READS.L3_MISS_REMOTE_DRAM.SNOOP_MISS_OR_NO_FWD",
+        "BriefDescription": "Counts any other requests OCR.OTHER.L3_MISS.HITM_OTHER_CORE OCR.OTHER.L3_MISS.HITM_OTHER_CORE",
         "Counter": "0,1,2,3",
         "CounterHTOff": "0,1,2,3",
         "EventCode": "0xB7, 0xBB",
-        "EventName": "OCR.ALL_READS.L3_MISS_REMOTE_DRAM.SNOOP_MISS_OR_NO_FWD",
+        "EventName": "OCR.OTHER.L3_MISS.HITM_OTHER_CORE",
         "MSRIndex": "0x1a6,0x1a7",
-        "MSRValue": "0x063B8007F7",
+        "MSRValue": "0x103C008000",
         "Offcore": "1",
         "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
         "SampleAfterValue": "100003",
         "UMask": "0x1"
     },
     {
-        "BriefDescription": "OCR.ALL_RFO.L3_MISS_REMOTE_HOP1_DRAM.HIT_OTHER_CORE_FWD  OCR.ALL_RFO.L3_MISS_REMOTE_HOP1_DRAM.HIT_OTHER_CORE_FWD",
+        "BriefDescription": "Counts any other requests OCR.OTHER.L3_MISS.HIT_OTHER_CORE_FWD OCR.OTHER.L3_MISS.HIT_OTHER_CORE_FWD",
         "Counter": "0,1,2,3",
         "CounterHTOff": "0,1,2,3",
         "EventCode": "0xB7, 0xBB",
-        "EventName": "OCR.ALL_RFO.L3_MISS_REMOTE_HOP1_DRAM.HIT_OTHER_CORE_FWD",
+        "EventName": "OCR.OTHER.L3_MISS.HIT_OTHER_CORE_FWD",
         "MSRIndex": "0x1a6,0x1a7",
-        "MSRValue": "0x0810000122",
+        "MSRValue": "0x083C008000",
         "Offcore": "1",
         "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
         "SampleAfterValue": "100003",
         "UMask": "0x1"
     },
     {
-        "BriefDescription": "Counts prefetch (that bring data to L2) data reads OCR.PF_L2_DATA_RD.L3_MISS.NO_SNOOP_NEEDED OCR.PF_L2_DATA_RD.L3_MISS.NO_SNOOP_NEEDED",
+        "BriefDescription": "Counts any other requests OCR.OTHER.L3_MISS.HIT_OTHER_CORE_NO_FWD OCR.OTHER.L3_MISS.HIT_OTHER_CORE_NO_FWD",
         "Counter": "0,1,2,3",
         "CounterHTOff": "0,1,2,3",
         "EventCode": "0xB7, 0xBB",
-        "EventName": "OCR.PF_L2_DATA_RD.L3_MISS.NO_SNOOP_NEEDED",
+        "EventName": "OCR.OTHER.L3_MISS.HIT_OTHER_CORE_NO_FWD",
         "MSRIndex": "0x1a6,0x1a7",
-        "MSRValue": "0x013C000010",
+        "MSRValue": "0x043C008000",
         "Offcore": "1",
         "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
         "SampleAfterValue": "100003",
         "UMask": "0x1"
     },
     {
-        "BriefDescription": "Counts all prefetch (that bring data to L2) RFOs  OCR.PF_L2_RFO.L3_MISS_REMOTE_HOP1_DRAM.HIT_OTHER_CORE_NO_FWD",
+        "BriefDescription": "Counts any other requests OCR.OTHER.L3_MISS.NO_SNOOP_NEEDED OCR.OTHER.L3_MISS.NO_SNOOP_NEEDED",
         "Counter": "0,1,2,3",
         "CounterHTOff": "0,1,2,3",
         "EventCode": "0xB7, 0xBB",
-        "EventName": "OCR.PF_L2_RFO.L3_MISS_REMOTE_HOP1_DRAM.HIT_OTHER_CORE_NO_FWD",
+        "EventName": "OCR.OTHER.L3_MISS.NO_SNOOP_NEEDED",
         "MSRIndex": "0x1a6,0x1a7",
-        "MSRValue": "0x0410000020",
+        "MSRValue": "0x013C008000",
         "Offcore": "1",
         "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
         "SampleAfterValue": "100003",
         "UMask": "0x1"
     },
     {
-        "BriefDescription": "Counts all demand data writes (RFOs)  OCR.DEMAND_RFO.L3_MISS_LOCAL_DRAM.HIT_OTHER_CORE_FWD",
+        "BriefDescription": "Counts any other requests OCR.OTHER.L3_MISS.REMOTE_HITM",
         "Counter": "0,1,2,3",
         "CounterHTOff": "0,1,2,3",
         "EventCode": "0xB7, 0xBB",
-        "EventName": "OCR.DEMAND_RFO.L3_MISS_LOCAL_DRAM.HIT_OTHER_CORE_FWD",
+        "EventName": "OCR.OTHER.L3_MISS.REMOTE_HITM",
         "MSRIndex": "0x1a6,0x1a7",
-        "MSRValue": "0x0804000002",
+        "MSRValue": "0x103FC08000",
         "Offcore": "1",
         "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
         "SampleAfterValue": "100003",
         "UMask": "0x1"
     },
     {
-        "BriefDescription": "This event is deprecated. Refer to new event OCR.DEMAND_CODE_RD.L3_MISS_LOCAL_DRAM.HITM_OTHER_CORE",
+        "BriefDescription": "Counts any other requests OCR.OTHER.L3_MISS.REMOTE_HIT_FORWARD",
         "Counter": "0,1,2,3",
         "CounterHTOff": "0,1,2,3",
-        "Deprecated": "1",
         "EventCode": "0xB7, 0xBB",
-        "EventName": "OFFCORE_RESPONSE.DEMAND_CODE_RD.L3_MISS_LOCAL_DRAM.HITM_OTHER_CORE",
+        "EventName": "OCR.OTHER.L3_MISS.REMOTE_HIT_FORWARD",
         "MSRIndex": "0x1a6,0x1a7",
-        "MSRValue": "0x1004000004",
+        "MSRValue": "0x083FC08000",
         "Offcore": "1",
         "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
         "SampleAfterValue": "100003",
         "UMask": "0x1"
     },
     {
-        "BriefDescription": "OCR.ALL_PF_DATA_RD.L3_MISS_LOCAL_DRAM.NO_SNOOP_NEEDED  OCR.ALL_PF_DATA_RD.L3_MISS_LOCAL_DRAM.NO_SNOOP_NEEDED",
+        "BriefDescription": "Counts any other requests OCR.OTHER.L3_MISS.SNOOP_MISS",
         "Counter": "0,1,2,3",
         "CounterHTOff": "0,1,2,3",
         "EventCode": "0xB7, 0xBB",
-        "EventName": "OCR.ALL_PF_DATA_RD.L3_MISS_LOCAL_DRAM.NO_SNOOP_NEEDED",
+        "EventName": "OCR.OTHER.L3_MISS.SNOOP_MISS",
         "MSRIndex": "0x1a6,0x1a7",
-        "MSRValue": "0x0104000490",
+        "MSRValue": "0x023C008000",
         "Offcore": "1",
         "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
         "SampleAfterValue": "100003",
         "UMask": "0x1"
     },
     {
-        "BriefDescription": "This event is deprecated. Refer to new event OCR.PF_L3_DATA_RD.L3_MISS_REMOTE_DRAM.SNOOP_MISS_OR_NO_FWD",
+        "BriefDescription": "Counts any other requests OCR.OTHER.L3_MISS.SNOOP_NONE",
         "Counter": "0,1,2,3",
         "CounterHTOff": "0,1,2,3",
-        "Deprecated": "1",
         "EventCode": "0xB7, 0xBB",
-        "EventName": "OFFCORE_RESPONSE.PF_L3_DATA_RD.L3_MISS_REMOTE_DRAM.SNOOP_MISS_OR_NO_FWD",
+        "EventName": "OCR.OTHER.L3_MISS.SNOOP_NONE",
         "MSRIndex": "0x1a6,0x1a7",
-        "MSRValue": "0x063B800080",
+        "MSRValue": "0x00BC008000",
         "Offcore": "1",
         "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
         "SampleAfterValue": "100003",
         "UMask": "0x1"
     },
     {
-        "BriefDescription": "This event is deprecated. Refer to new event OCR.ALL_RFO.L3_MISS.ANY_SNOOP",
+        "BriefDescription": "Counts any other requests  OCR.OTHER.L3_MISS_LOCAL_DRAM.ANY_SNOOP",
         "Counter": "0,1,2,3",
         "CounterHTOff": "0,1,2,3",
-        "Deprecated": "1",
         "EventCode": "0xB7, 0xBB",
-        "EventName": "OFFCORE_RESPONSE.ALL_RFO.L3_MISS.ANY_SNOOP",
+        "EventName": "OCR.OTHER.L3_MISS_LOCAL_DRAM.ANY_SNOOP",
         "MSRIndex": "0x1a6,0x1a7",
-        "MSRValue": "0x3FBC000122",
+        "MSRValue": "0x3F84008000",
         "Offcore": "1",
         "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
         "SampleAfterValue": "100003",
         "UMask": "0x1"
     },
     {
-        "BriefDescription": "This event is deprecated. Refer to new event OCR.PF_L3_RFO.L3_MISS_LOCAL_DRAM.SNOOP_MISS_OR_NO_FWD",
+        "BriefDescription": "Counts any other requests  OCR.OTHER.L3_MISS_LOCAL_DRAM.HITM_OTHER_CORE",
         "Counter": "0,1,2,3",
         "CounterHTOff": "0,1,2,3",
-        "Deprecated": "1",
         "EventCode": "0xB7, 0xBB",
-        "EventName": "OFFCORE_RESPONSE.PF_L3_RFO.L3_MISS_LOCAL_DRAM.SNOOP_MISS_OR_NO_FWD",
+        "EventName": "OCR.OTHER.L3_MISS_LOCAL_DRAM.HITM_OTHER_CORE",
         "MSRIndex": "0x1a6,0x1a7",
-        "MSRValue": "0x0604000100",
+        "MSRValue": "0x1004008000",
         "Offcore": "1",
         "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
         "SampleAfterValue": "100003",
         "UMask": "0x1"
     },
     {
-        "BriefDescription": "This event is deprecated. Refer to new event OCR.OTHER.L3_MISS_LOCAL_DRAM.HIT_OTHER_CORE_NO_FWD",
+        "BriefDescription": "Counts any other requests  OCR.OTHER.L3_MISS_LOCAL_DRAM.HIT_OTHER_CORE_FWD",
         "Counter": "0,1,2,3",
         "CounterHTOff": "0,1,2,3",
-        "Deprecated": "1",
         "EventCode": "0xB7, 0xBB",
-        "EventName": "OFFCORE_RESPONSE.OTHER.L3_MISS_LOCAL_DRAM.HIT_OTHER_CORE_NO_FWD",
+        "EventName": "OCR.OTHER.L3_MISS_LOCAL_DRAM.HIT_OTHER_CORE_FWD",
         "MSRIndex": "0x1a6,0x1a7",
-        "MSRValue": "0x0404008000",
+        "MSRValue": "0x0804008000",
         "Offcore": "1",
         "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
         "SampleAfterValue": "100003",
         "UMask": "0x1"
     },
     {
-        "BriefDescription": "Counts all prefetch (that bring data to LLC only) RFOs  OCR.PF_L3_RFO.L3_MISS_REMOTE_HOP1_DRAM.HIT_OTHER_CORE_FWD",
+        "BriefDescription": "Counts any other requests  OCR.OTHER.L3_MISS_LOCAL_DRAM.HIT_OTHER_CORE_NO_FWD",
         "Counter": "0,1,2,3",
         "CounterHTOff": "0,1,2,3",
         "EventCode": "0xB7, 0xBB",
-        "EventName": "OCR.PF_L3_RFO.L3_MISS_REMOTE_HOP1_DRAM.HIT_OTHER_CORE_FWD",
+        "EventName": "OCR.OTHER.L3_MISS_LOCAL_DRAM.HIT_OTHER_CORE_NO_FWD",
         "MSRIndex": "0x1a6,0x1a7",
-        "MSRValue": "0x0810000100",
+        "MSRValue": "0x0404008000",
         "Offcore": "1",
         "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
         "SampleAfterValue": "100003",
         "UMask": "0x1"
     },
     {
-        "BriefDescription": "This event is deprecated. Refer to new event OCR.ALL_PF_DATA_RD.L3_MISS.NO_SNOOP_NEEDED",
+        "BriefDescription": "Counts any other requests  OCR.OTHER.L3_MISS_LOCAL_DRAM.NO_SNOOP_NEEDED",
         "Counter": "0,1,2,3",
         "CounterHTOff": "0,1,2,3",
-        "Deprecated": "1",
         "EventCode": "0xB7, 0xBB",
-        "EventName": "OFFCORE_RESPONSE.ALL_PF_DATA_RD.L3_MISS.NO_SNOOP_NEEDED",
+        "EventName": "OCR.OTHER.L3_MISS_LOCAL_DRAM.NO_SNOOP_NEEDED",
         "MSRIndex": "0x1a6,0x1a7",
-        "MSRValue": "0x013C000490",
+        "MSRValue": "0x0104008000",
         "Offcore": "1",
         "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
         "SampleAfterValue": "100003",
         "UMask": "0x1"
     },
     {
-        "BriefDescription": "Counts demand data reads",
+        "BriefDescription": "Counts any other requests",
         "Counter": "0,1,2,3",
         "CounterHTOff": "0,1,2,3",
         "EventCode": "0xB7, 0xBB",
-        "EventName": "OCR.DEMAND_DATA_RD.L3_MISS_LOCAL_DRAM.SNOOP_MISS",
+        "EventName": "OCR.OTHER.L3_MISS_LOCAL_DRAM.SNOOP_MISS",
         "MSRIndex": "0x1a6,0x1a7",
-        "MSRValue": "0x0204000001",
+        "MSRValue": "0x0204008000",
         "Offcore": "1",
         "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
         "SampleAfterValue": "100003",
         "UMask": "0x1"
     },
     {
-        "BriefDescription": "This event is deprecated. Refer to new event OCR.PF_L1D_AND_SW.L3_MISS.ANY_SNOOP",
+        "BriefDescription": "Counts any other requests OCR.OTHER.L3_MISS_LOCAL_DRAM.SNOOP_MISS_OR_NO_FWD",
         "Counter": "0,1,2,3",
         "CounterHTOff": "0,1,2,3",
-        "Deprecated": "1",
         "EventCode": "0xB7, 0xBB",
-        "EventName": "OFFCORE_RESPONSE.PF_L1D_AND_SW.L3_MISS.ANY_SNOOP",
+        "EventName": "OCR.OTHER.L3_MISS_LOCAL_DRAM.SNOOP_MISS_OR_NO_FWD",
         "MSRIndex": "0x1a6,0x1a7",
-        "MSRValue": "0x3FBC000400",
+        "MSRValue": "0x0604008000",
         "Offcore": "1",
         "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
         "SampleAfterValue": "100003",
         "UMask": "0x1"
     },
     {
-        "BriefDescription": "Counts all prefetch (that bring data to LLC only) RFOs OCR.PF_L3_RFO.L3_MISS.HITM_OTHER_CORE OCR.PF_L3_RFO.L3_MISS.HITM_OTHER_CORE",
+        "BriefDescription": "Counts any other requests",
         "Counter": "0,1,2,3",
         "CounterHTOff": "0,1,2,3",
         "EventCode": "0xB7, 0xBB",
-        "EventName": "OCR.PF_L3_RFO.L3_MISS.HITM_OTHER_CORE",
+        "EventName": "OCR.OTHER.L3_MISS_LOCAL_DRAM.SNOOP_NONE",
         "MSRIndex": "0x1a6,0x1a7",
-        "MSRValue": "0x103C000100",
+        "MSRValue": "0x0084008000",
         "Offcore": "1",
         "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
         "SampleAfterValue": "100003",
@@ -3131,1426 +3062,1326 @@
         "UMask": "0x1"
     },
     {
-        "BriefDescription": "Number of times an HLE transactional execution aborted due to NoAllocatedElisionBuffer being non-zero.",
-        "Counter": "0,1,2,3",
-        "CounterHTOff": "0,1,2,3,4,5,6,7",
-        "EventCode": "0x54",
-        "EventName": "TX_MEM.ABORT_HLE_ELISION_BUFFER_NOT_EMPTY",
-        "PublicDescription": "Number of times a TSX Abort was triggered due to commit but Lock Buffer not empty.",
-        "SampleAfterValue": "2000003",
-        "UMask": "0x8"
-    },
-    {
-        "BriefDescription": "Counts any other requests",
+        "BriefDescription": "Counts any other requests  OCR.OTHER.L3_MISS_REMOTE_HOP1_DRAM.ANY_SNOOP",
         "Counter": "0,1,2,3",
         "CounterHTOff": "0,1,2,3",
         "EventCode": "0xB7, 0xBB",
-        "EventName": "OCR.OTHER.L3_MISS_LOCAL_DRAM.SNOOP_NONE",
+        "EventName": "OCR.OTHER.L3_MISS_REMOTE_HOP1_DRAM.ANY_SNOOP",
         "MSRIndex": "0x1a6,0x1a7",
-        "MSRValue": "0x0084008000",
+        "MSRValue": "0x3F90008000",
         "Offcore": "1",
         "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
         "SampleAfterValue": "100003",
         "UMask": "0x1"
     },
     {
-        "BriefDescription": "This event is deprecated. Refer to new event OCR.DEMAND_DATA_RD.L3_MISS_LOCAL_DRAM.HITM_OTHER_CORE",
+        "BriefDescription": "Counts any other requests  OCR.OTHER.L3_MISS_REMOTE_HOP1_DRAM.HITM_OTHER_CORE",
         "Counter": "0,1,2,3",
         "CounterHTOff": "0,1,2,3",
-        "Deprecated": "1",
         "EventCode": "0xB7, 0xBB",
-        "EventName": "OFFCORE_RESPONSE.DEMAND_DATA_RD.L3_MISS_LOCAL_DRAM.HITM_OTHER_CORE",
+        "EventName": "OCR.OTHER.L3_MISS_REMOTE_HOP1_DRAM.HITM_OTHER_CORE",
         "MSRIndex": "0x1a6,0x1a7",
-        "MSRValue": "0x1004000001",
+        "MSRValue": "0x1010008000",
         "Offcore": "1",
         "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
         "SampleAfterValue": "100003",
         "UMask": "0x1"
     },
     {
-        "BriefDescription": "This event is deprecated. Refer to new event OCR.PF_L3_RFO.L3_MISS_LOCAL_DRAM.HITM_OTHER_CORE",
+        "BriefDescription": "Counts any other requests  OCR.OTHER.L3_MISS_REMOTE_HOP1_DRAM.HIT_OTHER_CORE_FWD",
         "Counter": "0,1,2,3",
         "CounterHTOff": "0,1,2,3",
-        "Deprecated": "1",
         "EventCode": "0xB7, 0xBB",
-        "EventName": "OFFCORE_RESPONSE.PF_L3_RFO.L3_MISS_LOCAL_DRAM.HITM_OTHER_CORE",
+        "EventName": "OCR.OTHER.L3_MISS_REMOTE_HOP1_DRAM.HIT_OTHER_CORE_FWD",
         "MSRIndex": "0x1a6,0x1a7",
-        "MSRValue": "0x1004000100",
+        "MSRValue": "0x0810008000",
         "Offcore": "1",
         "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
         "SampleAfterValue": "100003",
         "UMask": "0x1"
     },
     {
-        "BriefDescription": "This event is deprecated. Refer to new event OCR.DEMAND_RFO.L3_MISS.HITM_OTHER_CORE",
+        "BriefDescription": "Counts any other requests  OCR.OTHER.L3_MISS_REMOTE_HOP1_DRAM.HIT_OTHER_CORE_NO_FWD",
         "Counter": "0,1,2,3",
         "CounterHTOff": "0,1,2,3",
-        "Deprecated": "1",
         "EventCode": "0xB7, 0xBB",
-        "EventName": "OFFCORE_RESPONSE.DEMAND_RFO.L3_MISS.HITM_OTHER_CORE",
+        "EventName": "OCR.OTHER.L3_MISS_REMOTE_HOP1_DRAM.HIT_OTHER_CORE_NO_FWD",
         "MSRIndex": "0x1a6,0x1a7",
-        "MSRValue": "0x103C000002",
+        "MSRValue": "0x0410008000",
         "Offcore": "1",
         "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
         "SampleAfterValue": "100003",
         "UMask": "0x1"
     },
     {
-        "BriefDescription": "OCR.ALL_DATA_RD.L3_MISS_LOCAL_DRAM.SNOOP_MISS_OR_NO_FWD OCR.ALL_DATA_RD.L3_MISS_LOCAL_DRAM.SNOOP_MISS_OR_NO_FWD",
+        "BriefDescription": "Counts any other requests  OCR.OTHER.L3_MISS_REMOTE_HOP1_DRAM.NO_SNOOP_NEEDED",
         "Counter": "0,1,2,3",
         "CounterHTOff": "0,1,2,3",
         "EventCode": "0xB7, 0xBB",
-        "EventName": "OCR.ALL_DATA_RD.L3_MISS_LOCAL_DRAM.SNOOP_MISS_OR_NO_FWD",
+        "EventName": "OCR.OTHER.L3_MISS_REMOTE_HOP1_DRAM.NO_SNOOP_NEEDED",
         "MSRIndex": "0x1a6,0x1a7",
-        "MSRValue": "0x0604000491",
+        "MSRValue": "0x0110008000",
         "Offcore": "1",
         "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
         "SampleAfterValue": "100003",
         "UMask": "0x1"
     },
     {
-        "BriefDescription": "This event is deprecated. Refer to new event OCR.ALL_PF_RFO.L3_MISS_REMOTE_HOP1_DRAM.SNOOP_MISS",
+        "BriefDescription": "Counts any other requests",
         "Counter": "0,1,2,3",
         "CounterHTOff": "0,1,2,3",
-        "Deprecated": "1",
         "EventCode": "0xB7, 0xBB",
-        "EventName": "OFFCORE_RESPONSE.ALL_PF_RFO.L3_MISS_REMOTE_HOP1_DRAM.SNOOP_MISS",
+        "EventName": "OCR.OTHER.L3_MISS_REMOTE_HOP1_DRAM.SNOOP_MISS",
         "MSRIndex": "0x1a6,0x1a7",
-        "MSRValue": "0x0210000120",
+        "MSRValue": "0x0210008000",
         "Offcore": "1",
         "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
         "SampleAfterValue": "100003",
         "UMask": "0x1"
     },
     {
-        "BriefDescription": "OCR.ALL_PF_DATA_RD.L3_MISS.HIT_OTHER_CORE_FWD OCR.ALL_PF_DATA_RD.L3_MISS.HIT_OTHER_CORE_FWD OCR.ALL_PF_DATA_RD.L3_MISS.HIT_OTHER_CORE_FWD",
+        "BriefDescription": "Counts any other requests",
         "Counter": "0,1,2,3",
         "CounterHTOff": "0,1,2,3",
         "EventCode": "0xB7, 0xBB",
-        "EventName": "OCR.ALL_PF_DATA_RD.L3_MISS.HIT_OTHER_CORE_FWD",
+        "EventName": "OCR.OTHER.L3_MISS_REMOTE_HOP1_DRAM.SNOOP_NONE",
         "MSRIndex": "0x1a6,0x1a7",
-        "MSRValue": "0x083C000490",
+        "MSRValue": "0x0090008000",
         "Offcore": "1",
         "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
         "SampleAfterValue": "100003",
         "UMask": "0x1"
     },
     {
-        "BriefDescription": "Counts demand data reads  OCR.DEMAND_DATA_RD.L3_MISS_REMOTE_HOP1_DRAM.ANY_SNOOP",
+        "BriefDescription": "Counts L1 data cache hardware prefetch requests and software prefetch requests OCR.PF_L1D_AND_SW.L3_MISS.ANY_SNOOP OCR.PF_L1D_AND_SW.L3_MISS.ANY_SNOOP",
         "Counter": "0,1,2,3",
         "CounterHTOff": "0,1,2,3",
         "EventCode": "0xB7, 0xBB",
-        "EventName": "OCR.DEMAND_DATA_RD.L3_MISS_REMOTE_HOP1_DRAM.ANY_SNOOP",
+        "EventName": "OCR.PF_L1D_AND_SW.L3_MISS.ANY_SNOOP",
         "MSRIndex": "0x1a6,0x1a7",
-        "MSRValue": "0x3F90000001",
+        "MSRValue": "0x3FBC000400",
         "Offcore": "1",
         "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
         "SampleAfterValue": "100003",
         "UMask": "0x1"
     },
     {
-        "BriefDescription": "This event is deprecated. Refer to new event OCR.DEMAND_CODE_RD.L3_MISS.HIT_OTHER_CORE_NO_FWD",
+        "BriefDescription": "Counts L1 data cache hardware prefetch requests and software prefetch requests OCR.PF_L1D_AND_SW.L3_MISS.HITM_OTHER_CORE OCR.PF_L1D_AND_SW.L3_MISS.HITM_OTHER_CORE",
         "Counter": "0,1,2,3",
         "CounterHTOff": "0,1,2,3",
-        "Deprecated": "1",
         "EventCode": "0xB7, 0xBB",
-        "EventName": "OFFCORE_RESPONSE.DEMAND_CODE_RD.L3_MISS.HIT_OTHER_CORE_NO_FWD",
+        "EventName": "OCR.PF_L1D_AND_SW.L3_MISS.HITM_OTHER_CORE",
         "MSRIndex": "0x1a6,0x1a7",
-        "MSRValue": "0x043C000004",
+        "MSRValue": "0x103C000400",
         "Offcore": "1",
         "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
         "SampleAfterValue": "100003",
         "UMask": "0x1"
     },
     {
-        "BriefDescription": "This event is deprecated. Refer to new event OCR.ALL_DATA_RD.L3_MISS_LOCAL_DRAM.HITM_OTHER_CORE",
+        "BriefDescription": "Counts L1 data cache hardware prefetch requests and software prefetch requests OCR.PF_L1D_AND_SW.L3_MISS.HIT_OTHER_CORE_FWD OCR.PF_L1D_AND_SW.L3_MISS.HIT_OTHER_CORE_FWD",
         "Counter": "0,1,2,3",
         "CounterHTOff": "0,1,2,3",
-        "Deprecated": "1",
         "EventCode": "0xB7, 0xBB",
-        "EventName": "OFFCORE_RESPONSE.ALL_DATA_RD.L3_MISS_LOCAL_DRAM.HITM_OTHER_CORE",
+        "EventName": "OCR.PF_L1D_AND_SW.L3_MISS.HIT_OTHER_CORE_FWD",
         "MSRIndex": "0x1a6,0x1a7",
-        "MSRValue": "0x1004000491",
+        "MSRValue": "0x083C000400",
         "Offcore": "1",
         "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
         "SampleAfterValue": "100003",
         "UMask": "0x1"
     },
     {
-        "BriefDescription": "This event is deprecated. Refer to new event OCR.ALL_PF_DATA_RD.L3_MISS_LOCAL_DRAM.SNOOP_NONE",
+        "BriefDescription": "Counts L1 data cache hardware prefetch requests and software prefetch requests OCR.PF_L1D_AND_SW.L3_MISS.HIT_OTHER_CORE_NO_FWD OCR.PF_L1D_AND_SW.L3_MISS.HIT_OTHER_CORE_NO_FWD",
         "Counter": "0,1,2,3",
         "CounterHTOff": "0,1,2,3",
-        "Deprecated": "1",
         "EventCode": "0xB7, 0xBB",
-        "EventName": "OFFCORE_RESPONSE.ALL_PF_DATA_RD.L3_MISS_LOCAL_DRAM.SNOOP_NONE",
+        "EventName": "OCR.PF_L1D_AND_SW.L3_MISS.HIT_OTHER_CORE_NO_FWD",
         "MSRIndex": "0x1a6,0x1a7",
-        "MSRValue": "0x0084000490",
+        "MSRValue": "0x043C000400",
         "Offcore": "1",
         "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
         "SampleAfterValue": "100003",
         "UMask": "0x1"
     },
     {
-        "BriefDescription": "Counts prefetch (that bring data to L2) data reads  OCR.PF_L2_DATA_RD.L3_MISS_REMOTE_HOP1_DRAM.ANY_SNOOP",
+        "BriefDescription": "Counts L1 data cache hardware prefetch requests and software prefetch requests OCR.PF_L1D_AND_SW.L3_MISS.NO_SNOOP_NEEDED OCR.PF_L1D_AND_SW.L3_MISS.NO_SNOOP_NEEDED",
         "Counter": "0,1,2,3",
         "CounterHTOff": "0,1,2,3",
         "EventCode": "0xB7, 0xBB",
-        "EventName": "OCR.PF_L2_DATA_RD.L3_MISS_REMOTE_HOP1_DRAM.ANY_SNOOP",
+        "EventName": "OCR.PF_L1D_AND_SW.L3_MISS.NO_SNOOP_NEEDED",
         "MSRIndex": "0x1a6,0x1a7",
-        "MSRValue": "0x3F90000010",
+        "MSRValue": "0x013C000400",
         "Offcore": "1",
         "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
         "SampleAfterValue": "100003",
         "UMask": "0x1"
     },
     {
-        "BriefDescription": "This event is deprecated. Refer to new event OCR.ALL_READS.L3_MISS.REMOTE_HIT_FORWARD",
+        "BriefDescription": "Counts L1 data cache hardware prefetch requests and software prefetch requests OCR.PF_L1D_AND_SW.L3_MISS.REMOTE_HITM",
         "Counter": "0,1,2,3",
         "CounterHTOff": "0,1,2,3",
-        "Deprecated": "1",
         "EventCode": "0xB7, 0xBB",
-        "EventName": "OFFCORE_RESPONSE.ALL_READS.L3_MISS.REMOTE_HIT_FORWARD",
+        "EventName": "OCR.PF_L1D_AND_SW.L3_MISS.REMOTE_HITM",
         "MSRIndex": "0x1a6,0x1a7",
-        "MSRValue": "0x083FC007F7",
+        "MSRValue": "0x103FC00400",
         "Offcore": "1",
         "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
         "SampleAfterValue": "100003",
         "UMask": "0x1"
     },
     {
-        "BriefDescription": "This event is deprecated. Refer to new event OCR.OTHER.L3_MISS_LOCAL_DRAM.SNOOP_MISS_OR_NO_FWD",
+        "BriefDescription": "Counts L1 data cache hardware prefetch requests and software prefetch requests OCR.PF_L1D_AND_SW.L3_MISS.REMOTE_HIT_FORWARD",
         "Counter": "0,1,2,3",
         "CounterHTOff": "0,1,2,3",
-        "Deprecated": "1",
         "EventCode": "0xB7, 0xBB",
-        "EventName": "OFFCORE_RESPONSE.OTHER.L3_MISS_LOCAL_DRAM.SNOOP_MISS_OR_NO_FWD",
+        "EventName": "OCR.PF_L1D_AND_SW.L3_MISS.REMOTE_HIT_FORWARD",
         "MSRIndex": "0x1a6,0x1a7",
-        "MSRValue": "0x0604008000",
+        "MSRValue": "0x083FC00400",
         "Offcore": "1",
         "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
         "SampleAfterValue": "100003",
         "UMask": "0x1"
     },
     {
-        "BriefDescription": "This event is deprecated. Refer to new event OCR.PF_L2_DATA_RD.L3_MISS_LOCAL_DRAM.SNOOP_NONE",
+        "BriefDescription": "Counts L1 data cache hardware prefetch requests and software prefetch requests OCR.PF_L1D_AND_SW.L3_MISS.SNOOP_MISS",
         "Counter": "0,1,2,3",
         "CounterHTOff": "0,1,2,3",
-        "Deprecated": "1",
         "EventCode": "0xB7, 0xBB",
-        "EventName": "OFFCORE_RESPONSE.PF_L2_DATA_RD.L3_MISS_LOCAL_DRAM.SNOOP_NONE",
+        "EventName": "OCR.PF_L1D_AND_SW.L3_MISS.SNOOP_MISS",
         "MSRIndex": "0x1a6,0x1a7",
-        "MSRValue": "0x0084000010",
+        "MSRValue": "0x023C000400",
         "Offcore": "1",
         "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
         "SampleAfterValue": "100003",
         "UMask": "0x1"
     },
     {
-        "BriefDescription": "This event is deprecated. Refer to new event OCR.DEMAND_RFO.L3_MISS_LOCAL_DRAM.SNOOP_MISS",
+        "BriefDescription": "Counts L1 data cache hardware prefetch requests and software prefetch requests OCR.PF_L1D_AND_SW.L3_MISS.SNOOP_NONE",
         "Counter": "0,1,2,3",
         "CounterHTOff": "0,1,2,3",
-        "Deprecated": "1",
         "EventCode": "0xB7, 0xBB",
-        "EventName": "OFFCORE_RESPONSE.DEMAND_RFO.L3_MISS_LOCAL_DRAM.SNOOP_MISS",
+        "EventName": "OCR.PF_L1D_AND_SW.L3_MISS.SNOOP_NONE",
         "MSRIndex": "0x1a6,0x1a7",
-        "MSRValue": "0x0204000002",
+        "MSRValue": "0x00BC000400",
         "Offcore": "1",
         "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
         "SampleAfterValue": "100003",
         "UMask": "0x1"
     },
     {
-        "BriefDescription": "This event is deprecated. Refer to new event OCR.ALL_READS.L3_MISS.REMOTE_HITM",
+        "BriefDescription": "Counts L1 data cache hardware prefetch requests and software prefetch requests  OCR.PF_L1D_AND_SW.L3_MISS_LOCAL_DRAM.ANY_SNOOP",
         "Counter": "0,1,2,3",
         "CounterHTOff": "0,1,2,3",
-        "Deprecated": "1",
         "EventCode": "0xB7, 0xBB",
-        "EventName": "OFFCORE_RESPONSE.ALL_READS.L3_MISS.REMOTE_HITM",
+        "EventName": "OCR.PF_L1D_AND_SW.L3_MISS_LOCAL_DRAM.ANY_SNOOP",
         "MSRIndex": "0x1a6,0x1a7",
-        "MSRValue": "0x103FC007F7",
+        "MSRValue": "0x3F84000400",
         "Offcore": "1",
         "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
         "SampleAfterValue": "100003",
         "UMask": "0x1"
     },
     {
-        "BriefDescription": "Counts demand data reads OCR.DEMAND_DATA_RD.L3_MISS.SNOOP_NONE",
+        "BriefDescription": "Counts L1 data cache hardware prefetch requests and software prefetch requests  OCR.PF_L1D_AND_SW.L3_MISS_LOCAL_DRAM.HITM_OTHER_CORE",
         "Counter": "0,1,2,3",
         "CounterHTOff": "0,1,2,3",
         "EventCode": "0xB7, 0xBB",
-        "EventName": "OCR.DEMAND_DATA_RD.L3_MISS.SNOOP_NONE",
+        "EventName": "OCR.PF_L1D_AND_SW.L3_MISS_LOCAL_DRAM.HITM_OTHER_CORE",
         "MSRIndex": "0x1a6,0x1a7",
-        "MSRValue": "0x00BC000001",
+        "MSRValue": "0x1004000400",
         "Offcore": "1",
         "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
         "SampleAfterValue": "100003",
         "UMask": "0x1"
     },
     {
-        "BriefDescription": "This event is deprecated. Refer to new event OCR.ALL_RFO.L3_MISS.SNOOP_MISS",
+        "BriefDescription": "Counts L1 data cache hardware prefetch requests and software prefetch requests  OCR.PF_L1D_AND_SW.L3_MISS_LOCAL_DRAM.HIT_OTHER_CORE_FWD",
         "Counter": "0,1,2,3",
         "CounterHTOff": "0,1,2,3",
-        "Deprecated": "1",
         "EventCode": "0xB7, 0xBB",
-        "EventName": "OFFCORE_RESPONSE.ALL_RFO.L3_MISS.SNOOP_MISS",
+        "EventName": "OCR.PF_L1D_AND_SW.L3_MISS_LOCAL_DRAM.HIT_OTHER_CORE_FWD",
         "MSRIndex": "0x1a6,0x1a7",
-        "MSRValue": "0x023C000122",
+        "MSRValue": "0x0804000400",
         "Offcore": "1",
         "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
         "SampleAfterValue": "100003",
         "UMask": "0x1"
     },
     {
-        "BriefDescription": "Counts all prefetch (that bring data to LLC only) data reads",
+        "BriefDescription": "Counts L1 data cache hardware prefetch requests and software prefetch requests  OCR.PF_L1D_AND_SW.L3_MISS_LOCAL_DRAM.HIT_OTHER_CORE_NO_FWD",
         "Counter": "0,1,2,3",
         "CounterHTOff": "0,1,2,3",
         "EventCode": "0xB7, 0xBB",
-        "EventName": "OCR.PF_L3_DATA_RD.L3_MISS_REMOTE_HOP1_DRAM.SNOOP_MISS",
+        "EventName": "OCR.PF_L1D_AND_SW.L3_MISS_LOCAL_DRAM.HIT_OTHER_CORE_NO_FWD",
         "MSRIndex": "0x1a6,0x1a7",
-        "MSRValue": "0x0210000080",
+        "MSRValue": "0x0404000400",
         "Offcore": "1",
         "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
         "SampleAfterValue": "100003",
         "UMask": "0x1"
     },
     {
-        "BriefDescription": "Counts all prefetch (that bring data to LLC only) RFOs  OCR.PF_L3_RFO.L3_MISS_REMOTE_HOP1_DRAM.HIT_OTHER_CORE_NO_FWD",
+        "BriefDescription": "Counts L1 data cache hardware prefetch requests and software prefetch requests  OCR.PF_L1D_AND_SW.L3_MISS_LOCAL_DRAM.NO_SNOOP_NEEDED",
         "Counter": "0,1,2,3",
         "CounterHTOff": "0,1,2,3",
         "EventCode": "0xB7, 0xBB",
-        "EventName": "OCR.PF_L3_RFO.L3_MISS_REMOTE_HOP1_DRAM.HIT_OTHER_CORE_NO_FWD",
+        "EventName": "OCR.PF_L1D_AND_SW.L3_MISS_LOCAL_DRAM.NO_SNOOP_NEEDED",
         "MSRIndex": "0x1a6,0x1a7",
-        "MSRValue": "0x0410000100",
+        "MSRValue": "0x0104000400",
         "Offcore": "1",
         "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
         "SampleAfterValue": "100003",
         "UMask": "0x1"
     },
     {
-        "BriefDescription": "Counts all demand code reads  OCR.DEMAND_CODE_RD.L3_MISS_LOCAL_DRAM.HIT_OTHER_CORE_NO_FWD",
+        "BriefDescription": "Counts L1 data cache hardware prefetch requests and software prefetch requests",
         "Counter": "0,1,2,3",
         "CounterHTOff": "0,1,2,3",
         "EventCode": "0xB7, 0xBB",
-        "EventName": "OCR.DEMAND_CODE_RD.L3_MISS_LOCAL_DRAM.HIT_OTHER_CORE_NO_FWD",
+        "EventName": "OCR.PF_L1D_AND_SW.L3_MISS_LOCAL_DRAM.SNOOP_MISS",
         "MSRIndex": "0x1a6,0x1a7",
-        "MSRValue": "0x0404000004",
+        "MSRValue": "0x0204000400",
         "Offcore": "1",
         "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
         "SampleAfterValue": "100003",
         "UMask": "0x1"
     },
     {
-        "BriefDescription": "This event is deprecated. Refer to new event OCR.ALL_DATA_RD.L3_MISS_LOCAL_DRAM.SNOOP_NONE",
+        "BriefDescription": "Counts L1 data cache hardware prefetch requests and software prefetch requests OCR.PF_L1D_AND_SW.L3_MISS_LOCAL_DRAM.SNOOP_MISS_OR_NO_FWD",
         "Counter": "0,1,2,3",
         "CounterHTOff": "0,1,2,3",
-        "Deprecated": "1",
         "EventCode": "0xB7, 0xBB",
-        "EventName": "OFFCORE_RESPONSE.ALL_DATA_RD.L3_MISS_LOCAL_DRAM.SNOOP_NONE",
+        "EventName": "OCR.PF_L1D_AND_SW.L3_MISS_LOCAL_DRAM.SNOOP_MISS_OR_NO_FWD",
         "MSRIndex": "0x1a6,0x1a7",
-        "MSRValue": "0x0084000491",
+        "MSRValue": "0x0604000400",
         "Offcore": "1",
         "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
         "SampleAfterValue": "100003",
         "UMask": "0x1"
     },
     {
-        "BriefDescription": "This event is deprecated. Refer to new event OCR.DEMAND_RFO.L3_MISS.SNOOP_NONE",
+        "BriefDescription": "Counts L1 data cache hardware prefetch requests and software prefetch requests",
         "Counter": "0,1,2,3",
         "CounterHTOff": "0,1,2,3",
-        "Deprecated": "1",
         "EventCode": "0xB7, 0xBB",
-        "EventName": "OFFCORE_RESPONSE.DEMAND_RFO.L3_MISS.SNOOP_NONE",
+        "EventName": "OCR.PF_L1D_AND_SW.L3_MISS_LOCAL_DRAM.SNOOP_NONE",
         "MSRIndex": "0x1a6,0x1a7",
-        "MSRValue": "0x00BC000002",
+        "MSRValue": "0x0084000400",
         "Offcore": "1",
         "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
         "SampleAfterValue": "100003",
         "UMask": "0x1"
     },
     {
-        "BriefDescription": "This event is deprecated. Refer to new event OCR.OTHER.L3_MISS_REMOTE_DRAM.SNOOP_MISS_OR_NO_FWD",
+        "BriefDescription": "Counts L1 data cache hardware prefetch requests and software prefetch requests OCR.PF_L1D_AND_SW.L3_MISS_REMOTE_DRAM.SNOOP_MISS_OR_NO_FWD",
         "Counter": "0,1,2,3",
         "CounterHTOff": "0,1,2,3",
-        "Deprecated": "1",
         "EventCode": "0xB7, 0xBB",
-        "EventName": "OFFCORE_RESPONSE.OTHER.L3_MISS_REMOTE_DRAM.SNOOP_MISS_OR_NO_FWD",
+        "EventName": "OCR.PF_L1D_AND_SW.L3_MISS_REMOTE_DRAM.SNOOP_MISS_OR_NO_FWD",
         "MSRIndex": "0x1a6,0x1a7",
-        "MSRValue": "0x063B808000",
+        "MSRValue": "0x063B800400",
         "Offcore": "1",
         "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
         "SampleAfterValue": "100003",
         "UMask": "0x1"
     },
     {
-        "BriefDescription": "Counts all demand data writes (RFOs) OCR.DEMAND_RFO.L3_MISS.HIT_OTHER_CORE_NO_FWD OCR.DEMAND_RFO.L3_MISS.HIT_OTHER_CORE_NO_FWD",
+        "BriefDescription": "Counts L1 data cache hardware prefetch requests and software prefetch requests  OCR.PF_L1D_AND_SW.L3_MISS_REMOTE_HOP1_DRAM.ANY_SNOOP",
         "Counter": "0,1,2,3",
         "CounterHTOff": "0,1,2,3",
         "EventCode": "0xB7, 0xBB",
-        "EventName": "OCR.DEMAND_RFO.L3_MISS.HIT_OTHER_CORE_NO_FWD",
+        "EventName": "OCR.PF_L1D_AND_SW.L3_MISS_REMOTE_HOP1_DRAM.ANY_SNOOP",
         "MSRIndex": "0x1a6,0x1a7",
-        "MSRValue": "0x043C000002",
+        "MSRValue": "0x3F90000400",
         "Offcore": "1",
         "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
         "SampleAfterValue": "100003",
         "UMask": "0x1"
     },
     {
-        "BriefDescription": "This event is deprecated. Refer to new event OCR.DEMAND_RFO.L3_MISS_LOCAL_DRAM.HIT_OTHER_CORE_FWD",
+        "BriefDescription": "Counts L1 data cache hardware prefetch requests and software prefetch requests  OCR.PF_L1D_AND_SW.L3_MISS_REMOTE_HOP1_DRAM.HITM_OTHER_CORE",
         "Counter": "0,1,2,3",
         "CounterHTOff": "0,1,2,3",
-        "Deprecated": "1",
         "EventCode": "0xB7, 0xBB",
-        "EventName": "OFFCORE_RESPONSE.DEMAND_RFO.L3_MISS_LOCAL_DRAM.HIT_OTHER_CORE_FWD",
+        "EventName": "OCR.PF_L1D_AND_SW.L3_MISS_REMOTE_HOP1_DRAM.HITM_OTHER_CORE",
         "MSRIndex": "0x1a6,0x1a7",
-        "MSRValue": "0x0804000002",
+        "MSRValue": "0x1010000400",
         "Offcore": "1",
         "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
         "SampleAfterValue": "100003",
         "UMask": "0x1"
     },
     {
-        "BriefDescription": "This event is deprecated. Refer to new event OCR.PF_L2_DATA_RD.L3_MISS_LOCAL_DRAM.ANY_SNOOP",
+        "BriefDescription": "Counts L1 data cache hardware prefetch requests and software prefetch requests  OCR.PF_L1D_AND_SW.L3_MISS_REMOTE_HOP1_DRAM.HIT_OTHER_CORE_FWD",
         "Counter": "0,1,2,3",
         "CounterHTOff": "0,1,2,3",
-        "Deprecated": "1",
         "EventCode": "0xB7, 0xBB",
-        "EventName": "OFFCORE_RESPONSE.PF_L2_DATA_RD.L3_MISS_LOCAL_DRAM.ANY_SNOOP",
+        "EventName": "OCR.PF_L1D_AND_SW.L3_MISS_REMOTE_HOP1_DRAM.HIT_OTHER_CORE_FWD",
         "MSRIndex": "0x1a6,0x1a7",
-        "MSRValue": "0x3F84000010",
+        "MSRValue": "0x0810000400",
         "Offcore": "1",
         "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
         "SampleAfterValue": "100003",
         "UMask": "0x1"
     },
     {
-        "BriefDescription": "OCR.ALL_PF_DATA_RD.L3_MISS_LOCAL_DRAM.HITM_OTHER_CORE  OCR.ALL_PF_DATA_RD.L3_MISS_LOCAL_DRAM.HITM_OTHER_CORE",
+        "BriefDescription": "Counts L1 data cache hardware prefetch requests and software prefetch requests  OCR.PF_L1D_AND_SW.L3_MISS_REMOTE_HOP1_DRAM.HIT_OTHER_CORE_NO_FWD",
         "Counter": "0,1,2,3",
         "CounterHTOff": "0,1,2,3",
         "EventCode": "0xB7, 0xBB",
-        "EventName": "OCR.ALL_PF_DATA_RD.L3_MISS_LOCAL_DRAM.HITM_OTHER_CORE",
+        "EventName": "OCR.PF_L1D_AND_SW.L3_MISS_REMOTE_HOP1_DRAM.HIT_OTHER_CORE_NO_FWD",
         "MSRIndex": "0x1a6,0x1a7",
-        "MSRValue": "0x1004000490",
+        "MSRValue": "0x0410000400",
         "Offcore": "1",
         "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
         "SampleAfterValue": "100003",
         "UMask": "0x1"
     },
     {
-        "BriefDescription": "Counts any other requests OCR.OTHER.L3_MISS.REMOTE_HIT_FORWARD",
+        "BriefDescription": "Counts L1 data cache hardware prefetch requests and software prefetch requests  OCR.PF_L1D_AND_SW.L3_MISS_REMOTE_HOP1_DRAM.NO_SNOOP_NEEDED",
         "Counter": "0,1,2,3",
         "CounterHTOff": "0,1,2,3",
         "EventCode": "0xB7, 0xBB",
-        "EventName": "OCR.OTHER.L3_MISS.REMOTE_HIT_FORWARD",
+        "EventName": "OCR.PF_L1D_AND_SW.L3_MISS_REMOTE_HOP1_DRAM.NO_SNOOP_NEEDED",
         "MSRIndex": "0x1a6,0x1a7",
-        "MSRValue": "0x083FC08000",
+        "MSRValue": "0x0110000400",
         "Offcore": "1",
         "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
         "SampleAfterValue": "100003",
         "UMask": "0x1"
     },
     {
-        "BriefDescription": "Counts all demand data writes (RFOs) OCR.DEMAND_RFO.L3_MISS_REMOTE_DRAM.SNOOP_MISS_OR_NO_FWD",
+        "BriefDescription": "Counts L1 data cache hardware prefetch requests and software prefetch requests",
         "Counter": "0,1,2,3",
         "CounterHTOff": "0,1,2,3",
         "EventCode": "0xB7, 0xBB",
-        "EventName": "OCR.DEMAND_RFO.L3_MISS_REMOTE_DRAM.SNOOP_MISS_OR_NO_FWD",
+        "EventName": "OCR.PF_L1D_AND_SW.L3_MISS_REMOTE_HOP1_DRAM.SNOOP_MISS",
         "MSRIndex": "0x1a6,0x1a7",
-        "MSRValue": "0x063B800002",
+        "MSRValue": "0x0210000400",
         "Offcore": "1",
         "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
         "SampleAfterValue": "100003",
         "UMask": "0x1"
     },
     {
-        "BriefDescription": "Counts all prefetch (that bring data to L2) RFOs  OCR.PF_L2_RFO.L3_MISS_LOCAL_DRAM.HITM_OTHER_CORE",
+        "BriefDescription": "Counts L1 data cache hardware prefetch requests and software prefetch requests",
         "Counter": "0,1,2,3",
         "CounterHTOff": "0,1,2,3",
         "EventCode": "0xB7, 0xBB",
-        "EventName": "OCR.PF_L2_RFO.L3_MISS_LOCAL_DRAM.HITM_OTHER_CORE",
+        "EventName": "OCR.PF_L1D_AND_SW.L3_MISS_REMOTE_HOP1_DRAM.SNOOP_NONE",
         "MSRIndex": "0x1a6,0x1a7",
-        "MSRValue": "0x1004000020",
+        "MSRValue": "0x0090000400",
         "Offcore": "1",
         "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
         "SampleAfterValue": "100003",
         "UMask": "0x1"
     },
     {
-        "BriefDescription": "This event is deprecated. Refer to new event OCR.DEMAND_DATA_RD.L3_MISS_LOCAL_DRAM.SNOOP_NONE",
+        "BriefDescription": "Counts prefetch (that bring data to L2) data reads OCR.PF_L2_DATA_RD.L3_MISS.ANY_SNOOP OCR.PF_L2_DATA_RD.L3_MISS.ANY_SNOOP",
         "Counter": "0,1,2,3",
         "CounterHTOff": "0,1,2,3",
-        "Deprecated": "1",
         "EventCode": "0xB7, 0xBB",
-        "EventName": "OFFCORE_RESPONSE.DEMAND_DATA_RD.L3_MISS_LOCAL_DRAM.SNOOP_NONE",
+        "EventName": "OCR.PF_L2_DATA_RD.L3_MISS.ANY_SNOOP",
         "MSRIndex": "0x1a6,0x1a7",
-        "MSRValue": "0x0084000001",
+        "MSRValue": "0x3FBC000010",
         "Offcore": "1",
         "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
         "SampleAfterValue": "100003",
         "UMask": "0x1"
     },
     {
-        "BriefDescription": "Counts all demand data writes (RFOs)  OCR.DEMAND_RFO.L3_MISS_LOCAL_DRAM.HIT_OTHER_CORE_NO_FWD",
+        "BriefDescription": "Counts prefetch (that bring data to L2) data reads OCR.PF_L2_DATA_RD.L3_MISS.HITM_OTHER_CORE OCR.PF_L2_DATA_RD.L3_MISS.HITM_OTHER_CORE",
         "Counter": "0,1,2,3",
         "CounterHTOff": "0,1,2,3",
         "EventCode": "0xB7, 0xBB",
-        "EventName": "OCR.DEMAND_RFO.L3_MISS_LOCAL_DRAM.HIT_OTHER_CORE_NO_FWD",
+        "EventName": "OCR.PF_L2_DATA_RD.L3_MISS.HITM_OTHER_CORE",
         "MSRIndex": "0x1a6,0x1a7",
-        "MSRValue": "0x0404000002",
+        "MSRValue": "0x103C000010",
         "Offcore": "1",
         "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
         "SampleAfterValue": "100003",
         "UMask": "0x1"
     },
     {
-        "BriefDescription": "OCR.ALL_PF_RFO.L3_MISS_REMOTE_HOP1_DRAM.ANY_SNOOP  OCR.ALL_PF_RFO.L3_MISS_REMOTE_HOP1_DRAM.ANY_SNOOP",
+        "BriefDescription": "Counts prefetch (that bring data to L2) data reads OCR.PF_L2_DATA_RD.L3_MISS.HIT_OTHER_CORE_FWD OCR.PF_L2_DATA_RD.L3_MISS.HIT_OTHER_CORE_FWD",
         "Counter": "0,1,2,3",
         "CounterHTOff": "0,1,2,3",
         "EventCode": "0xB7, 0xBB",
-        "EventName": "OCR.ALL_PF_RFO.L3_MISS_REMOTE_HOP1_DRAM.ANY_SNOOP",
+        "EventName": "OCR.PF_L2_DATA_RD.L3_MISS.HIT_OTHER_CORE_FWD",
         "MSRIndex": "0x1a6,0x1a7",
-        "MSRValue": "0x3F90000120",
+        "MSRValue": "0x083C000010",
         "Offcore": "1",
         "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
         "SampleAfterValue": "100003",
         "UMask": "0x1"
     },
     {
-        "BriefDescription": "OCR.ALL_READS.L3_MISS_REMOTE_HOP1_DRAM.HITM_OTHER_CORE  OCR.ALL_READS.L3_MISS_REMOTE_HOP1_DRAM.HITM_OTHER_CORE",
+        "BriefDescription": "Counts prefetch (that bring data to L2) data reads OCR.PF_L2_DATA_RD.L3_MISS.HIT_OTHER_CORE_NO_FWD OCR.PF_L2_DATA_RD.L3_MISS.HIT_OTHER_CORE_NO_FWD",
         "Counter": "0,1,2,3",
         "CounterHTOff": "0,1,2,3",
         "EventCode": "0xB7, 0xBB",
-        "EventName": "OCR.ALL_READS.L3_MISS_REMOTE_HOP1_DRAM.HITM_OTHER_CORE",
+        "EventName": "OCR.PF_L2_DATA_RD.L3_MISS.HIT_OTHER_CORE_NO_FWD",
         "MSRIndex": "0x1a6,0x1a7",
-        "MSRValue": "0x10100007F7",
+        "MSRValue": "0x043C000010",
         "Offcore": "1",
         "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
         "SampleAfterValue": "100003",
         "UMask": "0x1"
     },
     {
-        "BriefDescription": "Counts L1 data cache hardware prefetch requests and software prefetch requests",
+        "BriefDescription": "Counts prefetch (that bring data to L2) data reads OCR.PF_L2_DATA_RD.L3_MISS.NO_SNOOP_NEEDED OCR.PF_L2_DATA_RD.L3_MISS.NO_SNOOP_NEEDED",
         "Counter": "0,1,2,3",
         "CounterHTOff": "0,1,2,3",
         "EventCode": "0xB7, 0xBB",
-        "EventName": "OCR.PF_L1D_AND_SW.L3_MISS_LOCAL_DRAM.SNOOP_NONE",
+        "EventName": "OCR.PF_L2_DATA_RD.L3_MISS.NO_SNOOP_NEEDED",
         "MSRIndex": "0x1a6,0x1a7",
-        "MSRValue": "0x0084000400",
+        "MSRValue": "0x013C000010",
         "Offcore": "1",
         "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
         "SampleAfterValue": "100003",
         "UMask": "0x1"
     },
     {
-        "BriefDescription": "This event is deprecated. Refer to new event OCR.PF_L3_RFO.L3_MISS_REMOTE_HOP1_DRAM.ANY_SNOOP",
+        "BriefDescription": "Counts prefetch (that bring data to L2) data reads OCR.PF_L2_DATA_RD.L3_MISS.REMOTE_HITM",
         "Counter": "0,1,2,3",
         "CounterHTOff": "0,1,2,3",
-        "Deprecated": "1",
         "EventCode": "0xB7, 0xBB",
-        "EventName": "OFFCORE_RESPONSE.PF_L3_RFO.L3_MISS_REMOTE_HOP1_DRAM.ANY_SNOOP",
+        "EventName": "OCR.PF_L2_DATA_RD.L3_MISS.REMOTE_HITM",
         "MSRIndex": "0x1a6,0x1a7",
-        "MSRValue": "0x3F90000100",
+        "MSRValue": "0x103FC00010",
         "Offcore": "1",
         "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
         "SampleAfterValue": "100003",
         "UMask": "0x1"
     },
     {
-        "BriefDescription": "Counts all demand data writes (RFOs)",
+        "BriefDescription": "Counts prefetch (that bring data to L2) data reads OCR.PF_L2_DATA_RD.L3_MISS.REMOTE_HIT_FORWARD",
         "Counter": "0,1,2,3",
         "CounterHTOff": "0,1,2,3",
         "EventCode": "0xB7, 0xBB",
-        "EventName": "OCR.DEMAND_RFO.L3_MISS_LOCAL_DRAM.SNOOP_MISS",
+        "EventName": "OCR.PF_L2_DATA_RD.L3_MISS.REMOTE_HIT_FORWARD",
         "MSRIndex": "0x1a6,0x1a7",
-        "MSRValue": "0x0204000002",
+        "MSRValue": "0x083FC00010",
         "Offcore": "1",
         "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
         "SampleAfterValue": "100003",
         "UMask": "0x1"
     },
     {
-        "BriefDescription": "This event is deprecated. Refer to new event OCR.DEMAND_RFO.L3_MISS.ANY_SNOOP",
+        "BriefDescription": "Counts prefetch (that bring data to L2) data reads OCR.PF_L2_DATA_RD.L3_MISS.SNOOP_MISS",
         "Counter": "0,1,2,3",
         "CounterHTOff": "0,1,2,3",
-        "Deprecated": "1",
         "EventCode": "0xB7, 0xBB",
-        "EventName": "OFFCORE_RESPONSE.DEMAND_RFO.L3_MISS.ANY_SNOOP",
+        "EventName": "OCR.PF_L2_DATA_RD.L3_MISS.SNOOP_MISS",
         "MSRIndex": "0x1a6,0x1a7",
-        "MSRValue": "0x3FBC000002",
+        "MSRValue": "0x023C000010",
         "Offcore": "1",
         "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
         "SampleAfterValue": "100003",
         "UMask": "0x1"
     },
     {
-        "BriefDescription": "This event is deprecated. Refer to new event OCR.PF_L3_DATA_RD.L3_MISS.REMOTE_HITM",
+        "BriefDescription": "Counts prefetch (that bring data to L2) data reads OCR.PF_L2_DATA_RD.L3_MISS.SNOOP_NONE",
         "Counter": "0,1,2,3",
         "CounterHTOff": "0,1,2,3",
-        "Deprecated": "1",
         "EventCode": "0xB7, 0xBB",
-        "EventName": "OFFCORE_RESPONSE.PF_L3_DATA_RD.L3_MISS.REMOTE_HITM",
+        "EventName": "OCR.PF_L2_DATA_RD.L3_MISS.SNOOP_NONE",
         "MSRIndex": "0x1a6,0x1a7",
-        "MSRValue": "0x103FC00080",
+        "MSRValue": "0x00BC000010",
         "Offcore": "1",
         "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
         "SampleAfterValue": "100003",
         "UMask": "0x1"
     },
     {
-        "BriefDescription": "This event is deprecated. Refer to new event OCR.ALL_READS.L3_MISS_LOCAL_DRAM.HIT_OTHER_CORE_FWD",
+        "BriefDescription": "Counts prefetch (that bring data to L2) data reads  OCR.PF_L2_DATA_RD.L3_MISS_LOCAL_DRAM.ANY_SNOOP",
         "Counter": "0,1,2,3",
         "CounterHTOff": "0,1,2,3",
-        "Deprecated": "1",
         "EventCode": "0xB7, 0xBB",
-        "EventName": "OFFCORE_RESPONSE.ALL_READS.L3_MISS_LOCAL_DRAM.HIT_OTHER_CORE_FWD",
+        "EventName": "OCR.PF_L2_DATA_RD.L3_MISS_LOCAL_DRAM.ANY_SNOOP",
         "MSRIndex": "0x1a6,0x1a7",
-        "MSRValue": "0x08040007F7",
+        "MSRValue": "0x3F84000010",
         "Offcore": "1",
         "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
         "SampleAfterValue": "100003",
         "UMask": "0x1"
     },
     {
-        "BriefDescription": "This event is deprecated. Refer to new event OCR.PF_L3_RFO.L3_MISS_REMOTE_HOP1_DRAM.SNOOP_NONE",
+        "BriefDescription": "Counts prefetch (that bring data to L2) data reads  OCR.PF_L2_DATA_RD.L3_MISS_LOCAL_DRAM.HITM_OTHER_CORE",
         "Counter": "0,1,2,3",
         "CounterHTOff": "0,1,2,3",
-        "Deprecated": "1",
         "EventCode": "0xB7, 0xBB",
-        "EventName": "OFFCORE_RESPONSE.PF_L3_RFO.L3_MISS_REMOTE_HOP1_DRAM.SNOOP_NONE",
+        "EventName": "OCR.PF_L2_DATA_RD.L3_MISS_LOCAL_DRAM.HITM_OTHER_CORE",
         "MSRIndex": "0x1a6,0x1a7",
-        "MSRValue": "0x0090000100",
+        "MSRValue": "0x1004000010",
         "Offcore": "1",
         "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
         "SampleAfterValue": "100003",
         "UMask": "0x1"
     },
     {
-        "BriefDescription": "Counts L1 data cache hardware prefetch requests and software prefetch requests",
+        "BriefDescription": "Counts prefetch (that bring data to L2) data reads  OCR.PF_L2_DATA_RD.L3_MISS_LOCAL_DRAM.HIT_OTHER_CORE_FWD",
         "Counter": "0,1,2,3",
         "CounterHTOff": "0,1,2,3",
         "EventCode": "0xB7, 0xBB",
-        "EventName": "OCR.PF_L1D_AND_SW.L3_MISS_REMOTE_HOP1_DRAM.SNOOP_NONE",
+        "EventName": "OCR.PF_L2_DATA_RD.L3_MISS_LOCAL_DRAM.HIT_OTHER_CORE_FWD",
         "MSRIndex": "0x1a6,0x1a7",
-        "MSRValue": "0x0090000400",
+        "MSRValue": "0x0804000010",
         "Offcore": "1",
         "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
         "SampleAfterValue": "100003",
         "UMask": "0x1"
     },
     {
-        "BriefDescription": "This event is deprecated. Refer to new event OCR.PF_L2_RFO.L3_MISS_REMOTE_HOP1_DRAM.SNOOP_NONE",
+        "BriefDescription": "Counts prefetch (that bring data to L2) data reads  OCR.PF_L2_DATA_RD.L3_MISS_LOCAL_DRAM.HIT_OTHER_CORE_NO_FWD",
         "Counter": "0,1,2,3",
         "CounterHTOff": "0,1,2,3",
-        "Deprecated": "1",
         "EventCode": "0xB7, 0xBB",
-        "EventName": "OFFCORE_RESPONSE.PF_L2_RFO.L3_MISS_REMOTE_HOP1_DRAM.SNOOP_NONE",
+        "EventName": "OCR.PF_L2_DATA_RD.L3_MISS_LOCAL_DRAM.HIT_OTHER_CORE_NO_FWD",
         "MSRIndex": "0x1a6,0x1a7",
-        "MSRValue": "0x0090000020",
+        "MSRValue": "0x0404000010",
         "Offcore": "1",
         "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
         "SampleAfterValue": "100003",
         "UMask": "0x1"
     },
     {
-        "BriefDescription": "This event is deprecated. Refer to new event OCR.OTHER.L3_MISS_REMOTE_HOP1_DRAM.ANY_SNOOP",
+        "BriefDescription": "Counts prefetch (that bring data to L2) data reads  OCR.PF_L2_DATA_RD.L3_MISS_LOCAL_DRAM.NO_SNOOP_NEEDED",
         "Counter": "0,1,2,3",
         "CounterHTOff": "0,1,2,3",
-        "Deprecated": "1",
         "EventCode": "0xB7, 0xBB",
-        "EventName": "OFFCORE_RESPONSE.OTHER.L3_MISS_REMOTE_HOP1_DRAM.ANY_SNOOP",
+        "EventName": "OCR.PF_L2_DATA_RD.L3_MISS_LOCAL_DRAM.NO_SNOOP_NEEDED",
         "MSRIndex": "0x1a6,0x1a7",
-        "MSRValue": "0x3F90008000",
+        "MSRValue": "0x0104000010",
         "Offcore": "1",
         "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
         "SampleAfterValue": "100003",
         "UMask": "0x1"
     },
     {
-        "BriefDescription": "This event is deprecated. Refer to new event OCR.ALL_DATA_RD.L3_MISS.REMOTE_HITM",
+        "BriefDescription": "Counts prefetch (that bring data to L2) data reads",
         "Counter": "0,1,2,3",
         "CounterHTOff": "0,1,2,3",
-        "Deprecated": "1",
         "EventCode": "0xB7, 0xBB",
-        "EventName": "OFFCORE_RESPONSE.ALL_DATA_RD.L3_MISS.REMOTE_HITM",
+        "EventName": "OCR.PF_L2_DATA_RD.L3_MISS_LOCAL_DRAM.SNOOP_MISS",
         "MSRIndex": "0x1a6,0x1a7",
-        "MSRValue": "0x103FC00491",
+        "MSRValue": "0x0204000010",
         "Offcore": "1",
         "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
         "SampleAfterValue": "100003",
         "UMask": "0x1"
     },
     {
-        "BriefDescription": "Counts all demand code reads  OCR.DEMAND_CODE_RD.L3_MISS_LOCAL_DRAM.ANY_SNOOP",
+        "BriefDescription": "Counts prefetch (that bring data to L2) data reads OCR.PF_L2_DATA_RD.L3_MISS_LOCAL_DRAM.SNOOP_MISS_OR_NO_FWD",
         "Counter": "0,1,2,3",
         "CounterHTOff": "0,1,2,3",
         "EventCode": "0xB7, 0xBB",
-        "EventName": "OCR.DEMAND_CODE_RD.L3_MISS_LOCAL_DRAM.ANY_SNOOP",
+        "EventName": "OCR.PF_L2_DATA_RD.L3_MISS_LOCAL_DRAM.SNOOP_MISS_OR_NO_FWD",
         "MSRIndex": "0x1a6,0x1a7",
-        "MSRValue": "0x3F84000004",
+        "MSRValue": "0x0604000010",
         "Offcore": "1",
         "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
         "SampleAfterValue": "100003",
         "UMask": "0x1"
     },
     {
-        "BriefDescription": "Counts any other requests  OCR.OTHER.L3_MISS_REMOTE_HOP1_DRAM.HIT_OTHER_CORE_NO_FWD",
+        "BriefDescription": "Counts prefetch (that bring data to L2) data reads",
         "Counter": "0,1,2,3",
         "CounterHTOff": "0,1,2,3",
         "EventCode": "0xB7, 0xBB",
-        "EventName": "OCR.OTHER.L3_MISS_REMOTE_HOP1_DRAM.HIT_OTHER_CORE_NO_FWD",
+        "EventName": "OCR.PF_L2_DATA_RD.L3_MISS_LOCAL_DRAM.SNOOP_NONE",
         "MSRIndex": "0x1a6,0x1a7",
-        "MSRValue": "0x0410008000",
+        "MSRValue": "0x0084000010",
         "Offcore": "1",
         "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
         "SampleAfterValue": "100003",
         "UMask": "0x1"
     },
     {
-        "BriefDescription": "Counts all demand code reads  OCR.DEMAND_CODE_RD.L3_MISS_LOCAL_DRAM.HITM_OTHER_CORE",
+        "BriefDescription": "Counts prefetch (that bring data to L2) data reads OCR.PF_L2_DATA_RD.L3_MISS_REMOTE_DRAM.SNOOP_MISS_OR_NO_FWD",
         "Counter": "0,1,2,3",
         "CounterHTOff": "0,1,2,3",
         "EventCode": "0xB7, 0xBB",
-        "EventName": "OCR.DEMAND_CODE_RD.L3_MISS_LOCAL_DRAM.HITM_OTHER_CORE",
+        "EventName": "OCR.PF_L2_DATA_RD.L3_MISS_REMOTE_DRAM.SNOOP_MISS_OR_NO_FWD",
         "MSRIndex": "0x1a6,0x1a7",
-        "MSRValue": "0x1004000004",
+        "MSRValue": "0x063B800010",
         "Offcore": "1",
         "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
         "SampleAfterValue": "100003",
         "UMask": "0x1"
     },
     {
-        "BriefDescription": "This event is deprecated. Refer to new event OCR.ALL_PF_RFO.L3_MISS.HIT_OTHER_CORE_NO_FWD",
+        "BriefDescription": "Counts prefetch (that bring data to L2) data reads  OCR.PF_L2_DATA_RD.L3_MISS_REMOTE_HOP1_DRAM.ANY_SNOOP",
         "Counter": "0,1,2,3",
         "CounterHTOff": "0,1,2,3",
-        "Deprecated": "1",
         "EventCode": "0xB7, 0xBB",
-        "EventName": "OFFCORE_RESPONSE.ALL_PF_RFO.L3_MISS.HIT_OTHER_CORE_NO_FWD",
+        "EventName": "OCR.PF_L2_DATA_RD.L3_MISS_REMOTE_HOP1_DRAM.ANY_SNOOP",
         "MSRIndex": "0x1a6,0x1a7",
-        "MSRValue": "0x043C000120",
+        "MSRValue": "0x3F90000010",
         "Offcore": "1",
         "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
         "SampleAfterValue": "100003",
         "UMask": "0x1"
     },
     {
-        "BriefDescription": "This event is deprecated. Refer to new event OCR.DEMAND_DATA_RD.L3_MISS.HITM_OTHER_CORE",
+        "BriefDescription": "Counts prefetch (that bring data to L2) data reads  OCR.PF_L2_DATA_RD.L3_MISS_REMOTE_HOP1_DRAM.HITM_OTHER_CORE",
         "Counter": "0,1,2,3",
         "CounterHTOff": "0,1,2,3",
-        "Deprecated": "1",
         "EventCode": "0xB7, 0xBB",
-        "EventName": "OFFCORE_RESPONSE.DEMAND_DATA_RD.L3_MISS.HITM_OTHER_CORE",
+        "EventName": "OCR.PF_L2_DATA_RD.L3_MISS_REMOTE_HOP1_DRAM.HITM_OTHER_CORE",
         "MSRIndex": "0x1a6,0x1a7",
-        "MSRValue": "0x103C000001",
+        "MSRValue": "0x1010000010",
         "Offcore": "1",
         "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
         "SampleAfterValue": "100003",
         "UMask": "0x1"
     },
     {
-        "BriefDescription": "This event is deprecated. Refer to new event OCR.ALL_READS.L3_MISS_REMOTE_HOP1_DRAM.NO_SNOOP_NEEDED",
+        "BriefDescription": "Counts prefetch (that bring data to L2) data reads  OCR.PF_L2_DATA_RD.L3_MISS_REMOTE_HOP1_DRAM.HIT_OTHER_CORE_FWD",
         "Counter": "0,1,2,3",
         "CounterHTOff": "0,1,2,3",
-        "Deprecated": "1",
         "EventCode": "0xB7, 0xBB",
-        "EventName": "OFFCORE_RESPONSE.ALL_READS.L3_MISS_REMOTE_HOP1_DRAM.NO_SNOOP_NEEDED",
+        "EventName": "OCR.PF_L2_DATA_RD.L3_MISS_REMOTE_HOP1_DRAM.HIT_OTHER_CORE_FWD",
         "MSRIndex": "0x1a6,0x1a7",
-        "MSRValue": "0x01100007F7",
+        "MSRValue": "0x0810000010",
         "Offcore": "1",
         "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
         "SampleAfterValue": "100003",
         "UMask": "0x1"
     },
     {
-        "BriefDescription": "Counts demand data reads  OCR.DEMAND_DATA_RD.L3_MISS_LOCAL_DRAM.ANY_SNOOP",
+        "BriefDescription": "Counts prefetch (that bring data to L2) data reads  OCR.PF_L2_DATA_RD.L3_MISS_REMOTE_HOP1_DRAM.HIT_OTHER_CORE_NO_FWD",
         "Counter": "0,1,2,3",
         "CounterHTOff": "0,1,2,3",
         "EventCode": "0xB7, 0xBB",
-        "EventName": "OCR.DEMAND_DATA_RD.L3_MISS_LOCAL_DRAM.ANY_SNOOP",
+        "EventName": "OCR.PF_L2_DATA_RD.L3_MISS_REMOTE_HOP1_DRAM.HIT_OTHER_CORE_NO_FWD",
         "MSRIndex": "0x1a6,0x1a7",
-        "MSRValue": "0x3F84000001",
+        "MSRValue": "0x0410000010",
         "Offcore": "1",
         "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
         "SampleAfterValue": "100003",
         "UMask": "0x1"
     },
     {
-        "BriefDescription": "This event is deprecated. Refer to new event OCR.PF_L2_DATA_RD.L3_MISS_LOCAL_DRAM.HITM_OTHER_CORE",
+        "BriefDescription": "Counts prefetch (that bring data to L2) data reads  OCR.PF_L2_DATA_RD.L3_MISS_REMOTE_HOP1_DRAM.NO_SNOOP_NEEDED",
         "Counter": "0,1,2,3",
         "CounterHTOff": "0,1,2,3",
-        "Deprecated": "1",
         "EventCode": "0xB7, 0xBB",
-        "EventName": "OFFCORE_RESPONSE.PF_L2_DATA_RD.L3_MISS_LOCAL_DRAM.HITM_OTHER_CORE",
+        "EventName": "OCR.PF_L2_DATA_RD.L3_MISS_REMOTE_HOP1_DRAM.NO_SNOOP_NEEDED",
         "MSRIndex": "0x1a6,0x1a7",
-        "MSRValue": "0x1004000010",
+        "MSRValue": "0x0110000010",
         "Offcore": "1",
         "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
         "SampleAfterValue": "100003",
         "UMask": "0x1"
     },
     {
-        "BriefDescription": "Counts all prefetch (that bring data to LLC only) data reads  OCR.PF_L3_DATA_RD.L3_MISS_REMOTE_HOP1_DRAM.ANY_SNOOP",
+        "BriefDescription": "Counts prefetch (that bring data to L2) data reads",
         "Counter": "0,1,2,3",
         "CounterHTOff": "0,1,2,3",
         "EventCode": "0xB7, 0xBB",
-        "EventName": "OCR.PF_L3_DATA_RD.L3_MISS_REMOTE_HOP1_DRAM.ANY_SNOOP",
+        "EventName": "OCR.PF_L2_DATA_RD.L3_MISS_REMOTE_HOP1_DRAM.SNOOP_MISS",
         "MSRIndex": "0x1a6,0x1a7",
-        "MSRValue": "0x3F90000080",
+        "MSRValue": "0x0210000010",
         "Offcore": "1",
         "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
         "SampleAfterValue": "100003",
         "UMask": "0x1"
     },
     {
-        "BriefDescription": "Counts any other requests  OCR.OTHER.L3_MISS_LOCAL_DRAM.HIT_OTHER_CORE_FWD",
+        "BriefDescription": "Counts prefetch (that bring data to L2) data reads",
         "Counter": "0,1,2,3",
         "CounterHTOff": "0,1,2,3",
         "EventCode": "0xB7, 0xBB",
-        "EventName": "OCR.OTHER.L3_MISS_LOCAL_DRAM.HIT_OTHER_CORE_FWD",
+        "EventName": "OCR.PF_L2_DATA_RD.L3_MISS_REMOTE_HOP1_DRAM.SNOOP_NONE",
         "MSRIndex": "0x1a6,0x1a7",
-        "MSRValue": "0x0804008000",
+        "MSRValue": "0x0090000010",
         "Offcore": "1",
         "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
         "SampleAfterValue": "100003",
         "UMask": "0x1"
     },
     {
-        "BriefDescription": "Counts demand data reads",
+        "BriefDescription": "Counts all prefetch (that bring data to L2) RFOs OCR.PF_L2_RFO.L3_MISS.ANY_SNOOP OCR.PF_L2_RFO.L3_MISS.ANY_SNOOP",
         "Counter": "0,1,2,3",
         "CounterHTOff": "0,1,2,3",
         "EventCode": "0xB7, 0xBB",
-        "EventName": "OCR.DEMAND_DATA_RD.L3_MISS_REMOTE_HOP1_DRAM.SNOOP_MISS",
+        "EventName": "OCR.PF_L2_RFO.L3_MISS.ANY_SNOOP",
         "MSRIndex": "0x1a6,0x1a7",
-        "MSRValue": "0x0210000001",
+        "MSRValue": "0x3FBC000020",
         "Offcore": "1",
         "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
         "SampleAfterValue": "100003",
         "UMask": "0x1"
     },
     {
-        "BriefDescription": "Counts all prefetch (that bring data to L2) RFOs OCR.PF_L2_RFO.L3_MISS.NO_SNOOP_NEEDED OCR.PF_L2_RFO.L3_MISS.NO_SNOOP_NEEDED",
+        "BriefDescription": "Counts all prefetch (that bring data to L2) RFOs OCR.PF_L2_RFO.L3_MISS.HITM_OTHER_CORE OCR.PF_L2_RFO.L3_MISS.HITM_OTHER_CORE",
         "Counter": "0,1,2,3",
         "CounterHTOff": "0,1,2,3",
         "EventCode": "0xB7, 0xBB",
-        "EventName": "OCR.PF_L2_RFO.L3_MISS.NO_SNOOP_NEEDED",
+        "EventName": "OCR.PF_L2_RFO.L3_MISS.HITM_OTHER_CORE",
         "MSRIndex": "0x1a6,0x1a7",
-        "MSRValue": "0x013C000020",
+        "MSRValue": "0x103C000020",
         "Offcore": "1",
         "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
         "SampleAfterValue": "100003",
         "UMask": "0x1"
     },
     {
-        "BriefDescription": "Counts all prefetch (that bring data to LLC only) data reads  OCR.PF_L3_DATA_RD.L3_MISS_REMOTE_HOP1_DRAM.NO_SNOOP_NEEDED",
+        "BriefDescription": "Counts all prefetch (that bring data to L2) RFOs OCR.PF_L2_RFO.L3_MISS.HIT_OTHER_CORE_FWD OCR.PF_L2_RFO.L3_MISS.HIT_OTHER_CORE_FWD",
         "Counter": "0,1,2,3",
         "CounterHTOff": "0,1,2,3",
         "EventCode": "0xB7, 0xBB",
-        "EventName": "OCR.PF_L3_DATA_RD.L3_MISS_REMOTE_HOP1_DRAM.NO_SNOOP_NEEDED",
+        "EventName": "OCR.PF_L2_RFO.L3_MISS.HIT_OTHER_CORE_FWD",
         "MSRIndex": "0x1a6,0x1a7",
-        "MSRValue": "0x0110000080",
+        "MSRValue": "0x083C000020",
         "Offcore": "1",
         "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
         "SampleAfterValue": "100003",
         "UMask": "0x1"
     },
     {
-        "BriefDescription": "This event is deprecated. Refer to new event OCR.ALL_PF_DATA_RD.L3_MISS_LOCAL_DRAM.HIT_OTHER_CORE_FWD",
+        "BriefDescription": "Counts all prefetch (that bring data to L2) RFOs OCR.PF_L2_RFO.L3_MISS.HIT_OTHER_CORE_NO_FWD OCR.PF_L2_RFO.L3_MISS.HIT_OTHER_CORE_NO_FWD",
         "Counter": "0,1,2,3",
         "CounterHTOff": "0,1,2,3",
-        "Deprecated": "1",
         "EventCode": "0xB7, 0xBB",
-        "EventName": "OFFCORE_RESPONSE.ALL_PF_DATA_RD.L3_MISS_LOCAL_DRAM.HIT_OTHER_CORE_FWD",
+        "EventName": "OCR.PF_L2_RFO.L3_MISS.HIT_OTHER_CORE_NO_FWD",
         "MSRIndex": "0x1a6,0x1a7",
-        "MSRValue": "0x0804000490",
+        "MSRValue": "0x043C000020",
         "Offcore": "1",
         "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
         "SampleAfterValue": "100003",
         "UMask": "0x1"
     },
     {
-        "BriefDescription": "This event is deprecated. Refer to new event OCR.PF_L2_RFO.L3_MISS.HIT_OTHER_CORE_FWD",
+        "BriefDescription": "Counts all prefetch (that bring data to L2) RFOs OCR.PF_L2_RFO.L3_MISS.NO_SNOOP_NEEDED OCR.PF_L2_RFO.L3_MISS.NO_SNOOP_NEEDED",
         "Counter": "0,1,2,3",
         "CounterHTOff": "0,1,2,3",
-        "Deprecated": "1",
         "EventCode": "0xB7, 0xBB",
-        "EventName": "OFFCORE_RESPONSE.PF_L2_RFO.L3_MISS.HIT_OTHER_CORE_FWD",
+        "EventName": "OCR.PF_L2_RFO.L3_MISS.NO_SNOOP_NEEDED",
         "MSRIndex": "0x1a6,0x1a7",
-        "MSRValue": "0x083C000020",
+        "MSRValue": "0x013C000020",
         "Offcore": "1",
         "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
         "SampleAfterValue": "100003",
         "UMask": "0x1"
     },
     {
-        "BriefDescription": "Counts all prefetch (that bring data to LLC only) RFOs OCR.PF_L3_RFO.L3_MISS.NO_SNOOP_NEEDED OCR.PF_L3_RFO.L3_MISS.NO_SNOOP_NEEDED",
+        "BriefDescription": "Counts all prefetch (that bring data to L2) RFOs OCR.PF_L2_RFO.L3_MISS.REMOTE_HITM",
         "Counter": "0,1,2,3",
         "CounterHTOff": "0,1,2,3",
         "EventCode": "0xB7, 0xBB",
-        "EventName": "OCR.PF_L3_RFO.L3_MISS.NO_SNOOP_NEEDED",
+        "EventName": "OCR.PF_L2_RFO.L3_MISS.REMOTE_HITM",
         "MSRIndex": "0x1a6,0x1a7",
-        "MSRValue": "0x013C000100",
+        "MSRValue": "0x103FC00020",
         "Offcore": "1",
         "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
         "SampleAfterValue": "100003",
         "UMask": "0x1"
     },
     {
-        "BriefDescription": "This event is deprecated. Refer to new event OCR.DEMAND_DATA_RD.L3_MISS.SNOOP_NONE",
+        "BriefDescription": "Counts all prefetch (that bring data to L2) RFOs OCR.PF_L2_RFO.L3_MISS.REMOTE_HIT_FORWARD",
         "Counter": "0,1,2,3",
         "CounterHTOff": "0,1,2,3",
-        "Deprecated": "1",
         "EventCode": "0xB7, 0xBB",
-        "EventName": "OFFCORE_RESPONSE.DEMAND_DATA_RD.L3_MISS.SNOOP_NONE",
+        "EventName": "OCR.PF_L2_RFO.L3_MISS.REMOTE_HIT_FORWARD",
         "MSRIndex": "0x1a6,0x1a7",
-        "MSRValue": "0x00BC000001",
+        "MSRValue": "0x083FC00020",
         "Offcore": "1",
         "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
         "SampleAfterValue": "100003",
         "UMask": "0x1"
     },
     {
-        "BriefDescription": "This event is deprecated. Refer to new event OCR.PF_L3_RFO.L3_MISS_REMOTE_HOP1_DRAM.HITM_OTHER_CORE",
+        "BriefDescription": "Counts all prefetch (that bring data to L2) RFOs OCR.PF_L2_RFO.L3_MISS.SNOOP_MISS",
         "Counter": "0,1,2,3",
         "CounterHTOff": "0,1,2,3",
-        "Deprecated": "1",
         "EventCode": "0xB7, 0xBB",
-        "EventName": "OFFCORE_RESPONSE.PF_L3_RFO.L3_MISS_REMOTE_HOP1_DRAM.HITM_OTHER_CORE",
+        "EventName": "OCR.PF_L2_RFO.L3_MISS.SNOOP_MISS",
         "MSRIndex": "0x1a6,0x1a7",
-        "MSRValue": "0x1010000100",
+        "MSRValue": "0x023C000020",
         "Offcore": "1",
         "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
         "SampleAfterValue": "100003",
         "UMask": "0x1"
     },
     {
-        "BriefDescription": "This event is deprecated. Refer to new event OCR.PF_L3_RFO.L3_MISS.ANY_SNOOP",
+        "BriefDescription": "Counts all prefetch (that bring data to L2) RFOs OCR.PF_L2_RFO.L3_MISS.SNOOP_NONE",
         "Counter": "0,1,2,3",
         "CounterHTOff": "0,1,2,3",
-        "Deprecated": "1",
         "EventCode": "0xB7, 0xBB",
-        "EventName": "OFFCORE_RESPONSE.PF_L3_RFO.L3_MISS.ANY_SNOOP",
+        "EventName": "OCR.PF_L2_RFO.L3_MISS.SNOOP_NONE",
         "MSRIndex": "0x1a6,0x1a7",
-        "MSRValue": "0x3FBC000100",
+        "MSRValue": "0x00BC000020",
         "Offcore": "1",
         "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
         "SampleAfterValue": "100003",
         "UMask": "0x1"
     },
     {
-        "BriefDescription": "Number of times an RTM execution aborted due to HLE-unfriendly instructions",
-        "Counter": "0,1,2,3",
-        "CounterHTOff": "0,1,2,3,4,5,6,7",
-        "EventCode": "0xC9",
-        "EventName": "RTM_RETIRED.ABORTED_UNFRIENDLY",
-        "PublicDescription": "Number of times an RTM execution aborted due to HLE-unfriendly instructions.",
-        "SampleAfterValue": "2000003",
-        "UMask": "0x20"
-    },
-    {
-        "BriefDescription": "OCR.ALL_PF_RFO.L3_MISS.SNOOP_MISS OCR.ALL_PF_RFO.L3_MISS.SNOOP_MISS",
+        "BriefDescription": "Counts all prefetch (that bring data to L2) RFOs  OCR.PF_L2_RFO.L3_MISS_LOCAL_DRAM.ANY_SNOOP",
         "Counter": "0,1,2,3",
         "CounterHTOff": "0,1,2,3",
         "EventCode": "0xB7, 0xBB",
-        "EventName": "OCR.ALL_PF_RFO.L3_MISS.SNOOP_MISS",
+        "EventName": "OCR.PF_L2_RFO.L3_MISS_LOCAL_DRAM.ANY_SNOOP",
         "MSRIndex": "0x1a6,0x1a7",
-        "MSRValue": "0x023C000120",
+        "MSRValue": "0x3F84000020",
         "Offcore": "1",
         "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
         "SampleAfterValue": "100003",
         "UMask": "0x1"
     },
     {
-        "BriefDescription": "This event is deprecated. Refer to new event OCR.PF_L2_RFO.L3_MISS_REMOTE_HOP1_DRAM.ANY_SNOOP",
+        "BriefDescription": "Counts all prefetch (that bring data to L2) RFOs  OCR.PF_L2_RFO.L3_MISS_LOCAL_DRAM.HITM_OTHER_CORE",
         "Counter": "0,1,2,3",
         "CounterHTOff": "0,1,2,3",
-        "Deprecated": "1",
         "EventCode": "0xB7, 0xBB",
-        "EventName": "OFFCORE_RESPONSE.PF_L2_RFO.L3_MISS_REMOTE_HOP1_DRAM.ANY_SNOOP",
+        "EventName": "OCR.PF_L2_RFO.L3_MISS_LOCAL_DRAM.HITM_OTHER_CORE",
         "MSRIndex": "0x1a6,0x1a7",
-        "MSRValue": "0x3F90000020",
+        "MSRValue": "0x1004000020",
         "Offcore": "1",
         "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
         "SampleAfterValue": "100003",
         "UMask": "0x1"
     },
     {
-        "BriefDescription": "Counts all prefetch (that bring data to LLC only) data reads OCR.PF_L3_DATA_RD.L3_MISS.HITM_OTHER_CORE OCR.PF_L3_DATA_RD.L3_MISS.HITM_OTHER_CORE",
+        "BriefDescription": "Counts all prefetch (that bring data to L2) RFOs  OCR.PF_L2_RFO.L3_MISS_LOCAL_DRAM.HIT_OTHER_CORE_FWD",
         "Counter": "0,1,2,3",
         "CounterHTOff": "0,1,2,3",
         "EventCode": "0xB7, 0xBB",
-        "EventName": "OCR.PF_L3_DATA_RD.L3_MISS.HITM_OTHER_CORE",
+        "EventName": "OCR.PF_L2_RFO.L3_MISS_LOCAL_DRAM.HIT_OTHER_CORE_FWD",
         "MSRIndex": "0x1a6,0x1a7",
-        "MSRValue": "0x103C000080",
+        "MSRValue": "0x0804000020",
         "Offcore": "1",
         "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
         "SampleAfterValue": "100003",
         "UMask": "0x1"
     },
     {
-        "BriefDescription": "Counts all prefetch (that bring data to LLC only) data reads",
+        "BriefDescription": "Counts all prefetch (that bring data to L2) RFOs  OCR.PF_L2_RFO.L3_MISS_LOCAL_DRAM.HIT_OTHER_CORE_NO_FWD",
         "Counter": "0,1,2,3",
         "CounterHTOff": "0,1,2,3",
         "EventCode": "0xB7, 0xBB",
-        "EventName": "OCR.PF_L3_DATA_RD.L3_MISS_REMOTE_HOP1_DRAM.SNOOP_NONE",
+        "EventName": "OCR.PF_L2_RFO.L3_MISS_LOCAL_DRAM.HIT_OTHER_CORE_NO_FWD",
         "MSRIndex": "0x1a6,0x1a7",
-        "MSRValue": "0x0090000080",
+        "MSRValue": "0x0404000020",
         "Offcore": "1",
         "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
         "SampleAfterValue": "100003",
         "UMask": "0x1"
     },
     {
-        "BriefDescription": "This event is deprecated. Refer to new event OCR.DEMAND_RFO.L3_MISS_REMOTE_HOP1_DRAM.SNOOP_MISS",
+        "BriefDescription": "Counts all prefetch (that bring data to L2) RFOs  OCR.PF_L2_RFO.L3_MISS_LOCAL_DRAM.NO_SNOOP_NEEDED",
         "Counter": "0,1,2,3",
         "CounterHTOff": "0,1,2,3",
-        "Deprecated": "1",
         "EventCode": "0xB7, 0xBB",
-        "EventName": "OFFCORE_RESPONSE.DEMAND_RFO.L3_MISS_REMOTE_HOP1_DRAM.SNOOP_MISS",
+        "EventName": "OCR.PF_L2_RFO.L3_MISS_LOCAL_DRAM.NO_SNOOP_NEEDED",
         "MSRIndex": "0x1a6,0x1a7",
-        "MSRValue": "0x0210000002",
+        "MSRValue": "0x0104000020",
         "Offcore": "1",
         "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
         "SampleAfterValue": "100003",
         "UMask": "0x1"
     },
     {
-        "BriefDescription": "Counts all prefetch (that bring data to LLC only) data reads  OCR.PF_L3_DATA_RD.L3_MISS_LOCAL_DRAM.ANY_SNOOP",
+        "BriefDescription": "Counts all prefetch (that bring data to L2) RFOs",
         "Counter": "0,1,2,3",
         "CounterHTOff": "0,1,2,3",
         "EventCode": "0xB7, 0xBB",
-        "EventName": "OCR.PF_L3_DATA_RD.L3_MISS_LOCAL_DRAM.ANY_SNOOP",
+        "EventName": "OCR.PF_L2_RFO.L3_MISS_LOCAL_DRAM.SNOOP_MISS",
         "MSRIndex": "0x1a6,0x1a7",
-        "MSRValue": "0x3F84000080",
+        "MSRValue": "0x0204000020",
         "Offcore": "1",
         "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
         "SampleAfterValue": "100003",
         "UMask": "0x1"
     },
     {
-        "BriefDescription": "This event is deprecated. Refer to new event OCR.ALL_PF_DATA_RD.L3_MISS_REMOTE_HOP1_DRAM.SNOOP_MISS",
+        "BriefDescription": "Counts all prefetch (that bring data to L2) RFOs OCR.PF_L2_RFO.L3_MISS_LOCAL_DRAM.SNOOP_MISS_OR_NO_FWD",
         "Counter": "0,1,2,3",
         "CounterHTOff": "0,1,2,3",
-        "Deprecated": "1",
         "EventCode": "0xB7, 0xBB",
-        "EventName": "OFFCORE_RESPONSE.ALL_PF_DATA_RD.L3_MISS_REMOTE_HOP1_DRAM.SNOOP_MISS",
+        "EventName": "OCR.PF_L2_RFO.L3_MISS_LOCAL_DRAM.SNOOP_MISS_OR_NO_FWD",
         "MSRIndex": "0x1a6,0x1a7",
-        "MSRValue": "0x0210000490",
+        "MSRValue": "0x0604000020",
         "Offcore": "1",
         "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
         "SampleAfterValue": "100003",
         "UMask": "0x1"
     },
     {
-        "BriefDescription": "OCR.ALL_PF_RFO.L3_MISS_LOCAL_DRAM.NO_SNOOP_NEEDED  OCR.ALL_PF_RFO.L3_MISS_LOCAL_DRAM.NO_SNOOP_NEEDED",
+        "BriefDescription": "Counts all prefetch (that bring data to L2) RFOs",
         "Counter": "0,1,2,3",
         "CounterHTOff": "0,1,2,3",
         "EventCode": "0xB7, 0xBB",
-        "EventName": "OCR.ALL_PF_RFO.L3_MISS_LOCAL_DRAM.NO_SNOOP_NEEDED",
+        "EventName": "OCR.PF_L2_RFO.L3_MISS_LOCAL_DRAM.SNOOP_NONE",
         "MSRIndex": "0x1a6,0x1a7",
-        "MSRValue": "0x0104000120",
+        "MSRValue": "0x0084000020",
         "Offcore": "1",
         "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
         "SampleAfterValue": "100003",
         "UMask": "0x1"
     },
     {
-        "BriefDescription": "Counts all demand code reads OCR.DEMAND_CODE_RD.L3_MISS.ANY_SNOOP OCR.DEMAND_CODE_RD.L3_MISS.ANY_SNOOP",
+        "BriefDescription": "Counts all prefetch (that bring data to L2) RFOs OCR.PF_L2_RFO.L3_MISS_REMOTE_DRAM.SNOOP_MISS_OR_NO_FWD",
         "Counter": "0,1,2,3",
         "CounterHTOff": "0,1,2,3",
         "EventCode": "0xB7, 0xBB",
-        "EventName": "OCR.DEMAND_CODE_RD.L3_MISS.ANY_SNOOP",
+        "EventName": "OCR.PF_L2_RFO.L3_MISS_REMOTE_DRAM.SNOOP_MISS_OR_NO_FWD",
         "MSRIndex": "0x1a6,0x1a7",
-        "MSRValue": "0x3FBC000004",
+        "MSRValue": "0x063B800020",
         "Offcore": "1",
         "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
         "SampleAfterValue": "100003",
         "UMask": "0x1"
     },
     {
-        "BriefDescription": "OCR.ALL_READS.L3_MISS.ANY_SNOOP OCR.ALL_READS.L3_MISS.ANY_SNOOP OCR.ALL_READS.L3_MISS.ANY_SNOOP",
+        "BriefDescription": "Counts all prefetch (that bring data to L2) RFOs  OCR.PF_L2_RFO.L3_MISS_REMOTE_HOP1_DRAM.ANY_SNOOP",
         "Counter": "0,1,2,3",
         "CounterHTOff": "0,1,2,3",
         "EventCode": "0xB7, 0xBB",
-        "EventName": "OCR.ALL_READS.L3_MISS.ANY_SNOOP",
+        "EventName": "OCR.PF_L2_RFO.L3_MISS_REMOTE_HOP1_DRAM.ANY_SNOOP",
         "MSRIndex": "0x1a6,0x1a7",
-        "MSRValue": "0x3FBC0007F7",
+        "MSRValue": "0x3F90000020",
         "Offcore": "1",
         "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
         "SampleAfterValue": "100003",
         "UMask": "0x1"
     },
     {
-        "BriefDescription": "This event is deprecated. Refer to new event OCR.ALL_PF_DATA_RD.L3_MISS.HIT_OTHER_CORE_FWD",
+        "BriefDescription": "Counts all prefetch (that bring data to L2) RFOs  OCR.PF_L2_RFO.L3_MISS_REMOTE_HOP1_DRAM.HITM_OTHER_CORE",
         "Counter": "0,1,2,3",
         "CounterHTOff": "0,1,2,3",
-        "Deprecated": "1",
         "EventCode": "0xB7, 0xBB",
-        "EventName": "OFFCORE_RESPONSE.ALL_PF_DATA_RD.L3_MISS.HIT_OTHER_CORE_FWD",
+        "EventName": "OCR.PF_L2_RFO.L3_MISS_REMOTE_HOP1_DRAM.HITM_OTHER_CORE",
         "MSRIndex": "0x1a6,0x1a7",
-        "MSRValue": "0x083C000490",
+        "MSRValue": "0x1010000020",
         "Offcore": "1",
         "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
         "SampleAfterValue": "100003",
         "UMask": "0x1"
     },
     {
-        "BriefDescription": "This event is deprecated. Refer to new event OCR.PF_L1D_AND_SW.L3_MISS.REMOTE_HIT_FORWARD",
+        "BriefDescription": "Counts all prefetch (that bring data to L2) RFOs  OCR.PF_L2_RFO.L3_MISS_REMOTE_HOP1_DRAM.HIT_OTHER_CORE_FWD",
         "Counter": "0,1,2,3",
         "CounterHTOff": "0,1,2,3",
-        "Deprecated": "1",
         "EventCode": "0xB7, 0xBB",
-        "EventName": "OFFCORE_RESPONSE.PF_L1D_AND_SW.L3_MISS.REMOTE_HIT_FORWARD",
+        "EventName": "OCR.PF_L2_RFO.L3_MISS_REMOTE_HOP1_DRAM.HIT_OTHER_CORE_FWD",
         "MSRIndex": "0x1a6,0x1a7",
-        "MSRValue": "0x083FC00400",
+        "MSRValue": "0x0810000020",
         "Offcore": "1",
         "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
         "SampleAfterValue": "100003",
         "UMask": "0x1"
     },
     {
-        "BriefDescription": "Counts all prefetch (that bring data to L2) RFOs OCR.PF_L2_RFO.L3_MISS.SNOOP_NONE",
+        "BriefDescription": "Counts all prefetch (that bring data to L2) RFOs  OCR.PF_L2_RFO.L3_MISS_REMOTE_HOP1_DRAM.HIT_OTHER_CORE_NO_FWD",
         "Counter": "0,1,2,3",
         "CounterHTOff": "0,1,2,3",
         "EventCode": "0xB7, 0xBB",
-        "EventName": "OCR.PF_L2_RFO.L3_MISS.SNOOP_NONE",
+        "EventName": "OCR.PF_L2_RFO.L3_MISS_REMOTE_HOP1_DRAM.HIT_OTHER_CORE_NO_FWD",
         "MSRIndex": "0x1a6,0x1a7",
-        "MSRValue": "0x00BC000020",
+        "MSRValue": "0x0410000020",
         "Offcore": "1",
         "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
         "SampleAfterValue": "100003",
         "UMask": "0x1"
     },
     {
-        "BriefDescription": "This event is deprecated. Refer to new event OCR.DEMAND_RFO.L3_MISS_REMOTE_HOP1_DRAM.ANY_SNOOP",
+        "BriefDescription": "Counts all prefetch (that bring data to L2) RFOs  OCR.PF_L2_RFO.L3_MISS_REMOTE_HOP1_DRAM.NO_SNOOP_NEEDED",
         "Counter": "0,1,2,3",
         "CounterHTOff": "0,1,2,3",
-        "Deprecated": "1",
         "EventCode": "0xB7, 0xBB",
-        "EventName": "OFFCORE_RESPONSE.DEMAND_RFO.L3_MISS_REMOTE_HOP1_DRAM.ANY_SNOOP",
+        "EventName": "OCR.PF_L2_RFO.L3_MISS_REMOTE_HOP1_DRAM.NO_SNOOP_NEEDED",
         "MSRIndex": "0x1a6,0x1a7",
-        "MSRValue": "0x3F90000002",
+        "MSRValue": "0x0110000020",
         "Offcore": "1",
         "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
         "SampleAfterValue": "100003",
         "UMask": "0x1"
     },
     {
-        "BriefDescription": "Number of times an RTM execution aborted due to any reasons (multiple categories may count as one).",
-        "Counter": "0,1,2,3",
-        "CounterHTOff": "0,1,2,3,4,5,6,7",
-        "EventCode": "0xC9",
-        "EventName": "RTM_RETIRED.ABORTED",
-        "PEBS": "1",
-        "PublicDescription": "Number of times RTM abort was triggered.",
-        "SampleAfterValue": "2000003",
-        "UMask": "0x4"
-    },
-    {
-        "BriefDescription": "Counts all prefetch (that bring data to L2) RFOs OCR.PF_L2_RFO.L3_MISS_LOCAL_DRAM.SNOOP_MISS_OR_NO_FWD",
+        "BriefDescription": "Counts all prefetch (that bring data to L2) RFOs",
         "Counter": "0,1,2,3",
         "CounterHTOff": "0,1,2,3",
         "EventCode": "0xB7, 0xBB",
-        "EventName": "OCR.PF_L2_RFO.L3_MISS_LOCAL_DRAM.SNOOP_MISS_OR_NO_FWD",
+        "EventName": "OCR.PF_L2_RFO.L3_MISS_REMOTE_HOP1_DRAM.SNOOP_MISS",
         "MSRIndex": "0x1a6,0x1a7",
-        "MSRValue": "0x0604000020",
+        "MSRValue": "0x0210000020",
         "Offcore": "1",
         "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
         "SampleAfterValue": "100003",
         "UMask": "0x1"
     },
     {
-        "BriefDescription": "This event is deprecated. Refer to new event OCR.ALL_PF_DATA_RD.L3_MISS_LOCAL_DRAM.NO_SNOOP_NEEDED",
+        "BriefDescription": "Counts all prefetch (that bring data to L2) RFOs",
         "Counter": "0,1,2,3",
         "CounterHTOff": "0,1,2,3",
-        "Deprecated": "1",
         "EventCode": "0xB7, 0xBB",
-        "EventName": "OFFCORE_RESPONSE.ALL_PF_DATA_RD.L3_MISS_LOCAL_DRAM.NO_SNOOP_NEEDED",
+        "EventName": "OCR.PF_L2_RFO.L3_MISS_REMOTE_HOP1_DRAM.SNOOP_NONE",
         "MSRIndex": "0x1a6,0x1a7",
-        "MSRValue": "0x0104000490",
+        "MSRValue": "0x0090000020",
         "Offcore": "1",
         "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
         "SampleAfterValue": "100003",
         "UMask": "0x1"
     },
     {
-        "BriefDescription": "This event is deprecated. Refer to new event OCR.PF_L2_RFO.L3_MISS.HIT_OTHER_CORE_NO_FWD",
+        "BriefDescription": "Counts all prefetch (that bring data to LLC only) data reads OCR.PF_L3_DATA_RD.L3_MISS.ANY_SNOOP OCR.PF_L3_DATA_RD.L3_MISS.ANY_SNOOP",
         "Counter": "0,1,2,3",
         "CounterHTOff": "0,1,2,3",
-        "Deprecated": "1",
         "EventCode": "0xB7, 0xBB",
-        "EventName": "OFFCORE_RESPONSE.PF_L2_RFO.L3_MISS.HIT_OTHER_CORE_NO_FWD",
+        "EventName": "OCR.PF_L3_DATA_RD.L3_MISS.ANY_SNOOP",
         "MSRIndex": "0x1a6,0x1a7",
-        "MSRValue": "0x043C000020",
+        "MSRValue": "0x3FBC000080",
         "Offcore": "1",
         "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
         "SampleAfterValue": "100003",
         "UMask": "0x1"
     },
     {
-        "BriefDescription": "Counts all prefetch (that bring data to L2) RFOs  OCR.PF_L2_RFO.L3_MISS_LOCAL_DRAM.HIT_OTHER_CORE_NO_FWD",
+        "BriefDescription": "Counts all prefetch (that bring data to LLC only) data reads OCR.PF_L3_DATA_RD.L3_MISS.HITM_OTHER_CORE OCR.PF_L3_DATA_RD.L3_MISS.HITM_OTHER_CORE",
         "Counter": "0,1,2,3",
         "CounterHTOff": "0,1,2,3",
         "EventCode": "0xB7, 0xBB",
-        "EventName": "OCR.PF_L2_RFO.L3_MISS_LOCAL_DRAM.HIT_OTHER_CORE_NO_FWD",
+        "EventName": "OCR.PF_L3_DATA_RD.L3_MISS.HITM_OTHER_CORE",
         "MSRIndex": "0x1a6,0x1a7",
-        "MSRValue": "0x0404000020",
+        "MSRValue": "0x103C000080",
         "Offcore": "1",
         "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
         "SampleAfterValue": "100003",
         "UMask": "0x1"
     },
     {
-        "BriefDescription": "Counts all prefetch (that bring data to LLC only) RFOs OCR.PF_L3_RFO.L3_MISS_LOCAL_DRAM.SNOOP_MISS_OR_NO_FWD",
+        "BriefDescription": "Counts all prefetch (that bring data to LLC only) data reads OCR.PF_L3_DATA_RD.L3_MISS.HIT_OTHER_CORE_FWD OCR.PF_L3_DATA_RD.L3_MISS.HIT_OTHER_CORE_FWD",
         "Counter": "0,1,2,3",
         "CounterHTOff": "0,1,2,3",
         "EventCode": "0xB7, 0xBB",
-        "EventName": "OCR.PF_L3_RFO.L3_MISS_LOCAL_DRAM.SNOOP_MISS_OR_NO_FWD",
+        "EventName": "OCR.PF_L3_DATA_RD.L3_MISS.HIT_OTHER_CORE_FWD",
         "MSRIndex": "0x1a6,0x1a7",
-        "MSRValue": "0x0604000100",
+        "MSRValue": "0x083C000080",
         "Offcore": "1",
         "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
         "SampleAfterValue": "100003",
         "UMask": "0x1"
     },
     {
-        "BriefDescription": "Counts any other requests  OCR.OTHER.L3_MISS_REMOTE_HOP1_DRAM.HITM_OTHER_CORE",
+        "BriefDescription": "Counts all prefetch (that bring data to LLC only) data reads OCR.PF_L3_DATA_RD.L3_MISS.HIT_OTHER_CORE_NO_FWD OCR.PF_L3_DATA_RD.L3_MISS.HIT_OTHER_CORE_NO_FWD",
         "Counter": "0,1,2,3",
         "CounterHTOff": "0,1,2,3",
         "EventCode": "0xB7, 0xBB",
-        "EventName": "OCR.OTHER.L3_MISS_REMOTE_HOP1_DRAM.HITM_OTHER_CORE",
+        "EventName": "OCR.PF_L3_DATA_RD.L3_MISS.HIT_OTHER_CORE_NO_FWD",
         "MSRIndex": "0x1a6,0x1a7",
-        "MSRValue": "0x1010008000",
+        "MSRValue": "0x043C000080",
         "Offcore": "1",
         "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
         "SampleAfterValue": "100003",
         "UMask": "0x1"
     },
     {
-        "BriefDescription": "OCR.ALL_DATA_RD.L3_MISS_LOCAL_DRAM.SNOOP_NONE",
+        "BriefDescription": "Counts all prefetch (that bring data to LLC only) data reads OCR.PF_L3_DATA_RD.L3_MISS.NO_SNOOP_NEEDED OCR.PF_L3_DATA_RD.L3_MISS.NO_SNOOP_NEEDED",
         "Counter": "0,1,2,3",
         "CounterHTOff": "0,1,2,3",
         "EventCode": "0xB7, 0xBB",
-        "EventName": "OCR.ALL_DATA_RD.L3_MISS_LOCAL_DRAM.SNOOP_NONE",
+        "EventName": "OCR.PF_L3_DATA_RD.L3_MISS.NO_SNOOP_NEEDED",
         "MSRIndex": "0x1a6,0x1a7",
-        "MSRValue": "0x0084000491",
+        "MSRValue": "0x013C000080",
         "Offcore": "1",
         "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
         "SampleAfterValue": "100003",
         "UMask": "0x1"
     },
     {
-        "BriefDescription": "OCR.ALL_PF_DATA_RD.L3_MISS_REMOTE_DRAM.SNOOP_MISS_OR_NO_FWD OCR.ALL_PF_DATA_RD.L3_MISS_REMOTE_DRAM.SNOOP_MISS_OR_NO_FWD",
+        "BriefDescription": "Counts all prefetch (that bring data to LLC only) data reads OCR.PF_L3_DATA_RD.L3_MISS.REMOTE_HITM",
         "Counter": "0,1,2,3",
         "CounterHTOff": "0,1,2,3",
         "EventCode": "0xB7, 0xBB",
-        "EventName": "OCR.ALL_PF_DATA_RD.L3_MISS_REMOTE_DRAM.SNOOP_MISS_OR_NO_FWD",
+        "EventName": "OCR.PF_L3_DATA_RD.L3_MISS.REMOTE_HITM",
         "MSRIndex": "0x1a6,0x1a7",
-        "MSRValue": "0x063B800490",
+        "MSRValue": "0x103FC00080",
         "Offcore": "1",
         "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
         "SampleAfterValue": "100003",
         "UMask": "0x1"
     },
     {
-        "BriefDescription": "This event is deprecated. Refer to new event OCR.PF_L3_RFO.L3_MISS_LOCAL_DRAM.HIT_OTHER_CORE_FWD",
+        "BriefDescription": "Counts all prefetch (that bring data to LLC only) data reads OCR.PF_L3_DATA_RD.L3_MISS.REMOTE_HIT_FORWARD",
         "Counter": "0,1,2,3",
         "CounterHTOff": "0,1,2,3",
-        "Deprecated": "1",
         "EventCode": "0xB7, 0xBB",
-        "EventName": "OFFCORE_RESPONSE.PF_L3_RFO.L3_MISS_LOCAL_DRAM.HIT_OTHER_CORE_FWD",
+        "EventName": "OCR.PF_L3_DATA_RD.L3_MISS.REMOTE_HIT_FORWARD",
         "MSRIndex": "0x1a6,0x1a7",
-        "MSRValue": "0x0804000100",
+        "MSRValue": "0x083FC00080",
         "Offcore": "1",
         "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
         "SampleAfterValue": "100003",
         "UMask": "0x1"
     },
     {
-        "BriefDescription": "Counts all prefetch (that bring data to LLC only) data reads OCR.PF_L3_DATA_RD.L3_MISS.ANY_SNOOP OCR.PF_L3_DATA_RD.L3_MISS.ANY_SNOOP",
+        "BriefDescription": "Counts all prefetch (that bring data to LLC only) data reads OCR.PF_L3_DATA_RD.L3_MISS.SNOOP_MISS",
         "Counter": "0,1,2,3",
         "CounterHTOff": "0,1,2,3",
         "EventCode": "0xB7, 0xBB",
-        "EventName": "OCR.PF_L3_DATA_RD.L3_MISS.ANY_SNOOP",
+        "EventName": "OCR.PF_L3_DATA_RD.L3_MISS.SNOOP_MISS",
         "MSRIndex": "0x1a6,0x1a7",
-        "MSRValue": "0x3FBC000080",
+        "MSRValue": "0x023C000080",
         "Offcore": "1",
         "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
         "SampleAfterValue": "100003",
         "UMask": "0x1"
     },
     {
-        "BriefDescription": "This event is deprecated. Refer to new event OCR.PF_L2_RFO.L3_MISS_REMOTE_HOP1_DRAM.HIT_OTHER_CORE_NO_FWD",
+        "BriefDescription": "Counts all prefetch (that bring data to LLC only) data reads OCR.PF_L3_DATA_RD.L3_MISS.SNOOP_NONE",
         "Counter": "0,1,2,3",
         "CounterHTOff": "0,1,2,3",
-        "Deprecated": "1",
         "EventCode": "0xB7, 0xBB",
-        "EventName": "OFFCORE_RESPONSE.PF_L2_RFO.L3_MISS_REMOTE_HOP1_DRAM.HIT_OTHER_CORE_NO_FWD",
+        "EventName": "OCR.PF_L3_DATA_RD.L3_MISS.SNOOP_NONE",
         "MSRIndex": "0x1a6,0x1a7",
-        "MSRValue": "0x0410000020",
+        "MSRValue": "0x00BC000080",
         "Offcore": "1",
         "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
         "SampleAfterValue": "100003",
         "UMask": "0x1"
     },
     {
-        "BriefDescription": "Counts demand data reads OCR.DEMAND_DATA_RD.L3_MISS_LOCAL_DRAM.SNOOP_MISS_OR_NO_FWD",
+        "BriefDescription": "Counts all prefetch (that bring data to LLC only) data reads  OCR.PF_L3_DATA_RD.L3_MISS_LOCAL_DRAM.ANY_SNOOP",
         "Counter": "0,1,2,3",
         "CounterHTOff": "0,1,2,3",
         "EventCode": "0xB7, 0xBB",
-        "EventName": "OCR.DEMAND_DATA_RD.L3_MISS_LOCAL_DRAM.SNOOP_MISS_OR_NO_FWD",
+        "EventName": "OCR.PF_L3_DATA_RD.L3_MISS_LOCAL_DRAM.ANY_SNOOP",
         "MSRIndex": "0x1a6,0x1a7",
-        "MSRValue": "0x0604000001",
+        "MSRValue": "0x3F84000080",
         "Offcore": "1",
         "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
         "SampleAfterValue": "100003",
         "UMask": "0x1"
     },
     {
-        "BriefDescription": "This event is deprecated. Refer to new event OCR.ALL_PF_DATA_RD.L3_MISS_REMOTE_HOP1_DRAM.HIT_OTHER_CORE_NO_FWD",
+        "BriefDescription": "Counts all prefetch (that bring data to LLC only) data reads  OCR.PF_L3_DATA_RD.L3_MISS_LOCAL_DRAM.HITM_OTHER_CORE",
         "Counter": "0,1,2,3",
         "CounterHTOff": "0,1,2,3",
-        "Deprecated": "1",
         "EventCode": "0xB7, 0xBB",
-        "EventName": "OFFCORE_RESPONSE.ALL_PF_DATA_RD.L3_MISS_REMOTE_HOP1_DRAM.HIT_OTHER_CORE_NO_FWD",
+        "EventName": "OCR.PF_L3_DATA_RD.L3_MISS_LOCAL_DRAM.HITM_OTHER_CORE",
         "MSRIndex": "0x1a6,0x1a7",
-        "MSRValue": "0x0410000490",
+        "MSRValue": "0x1004000080",
         "Offcore": "1",
         "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
         "SampleAfterValue": "100003",
         "UMask": "0x1"
     },
     {
-        "BriefDescription": "Cycles with at least 1 Demand Data Read requests who miss L3 cache in the superQ.",
-        "Counter": "0,1,2,3",
-        "CounterHTOff": "0,1,2,3,4,5,6,7",
-        "CounterMask": "1",
-        "EventCode": "0x60",
-        "EventName": "OFFCORE_REQUESTS_OUTSTANDING.CYCLES_WITH_L3_MISS_DEMAND_DATA_RD",
-        "SampleAfterValue": "2000003",
-        "UMask": "0x10"
-    },
-    {
-        "BriefDescription": "This event is deprecated. Refer to new event OCR.PF_L2_DATA_RD.L3_MISS.HIT_OTHER_CORE_FWD",
+        "BriefDescription": "Counts all prefetch (that bring data to LLC only) data reads  OCR.PF_L3_DATA_RD.L3_MISS_LOCAL_DRAM.HIT_OTHER_CORE_FWD",
         "Counter": "0,1,2,3",
         "CounterHTOff": "0,1,2,3",
-        "Deprecated": "1",
         "EventCode": "0xB7, 0xBB",
-        "EventName": "OFFCORE_RESPONSE.PF_L2_DATA_RD.L3_MISS.HIT_OTHER_CORE_FWD",
+        "EventName": "OCR.PF_L3_DATA_RD.L3_MISS_LOCAL_DRAM.HIT_OTHER_CORE_FWD",
         "MSRIndex": "0x1a6,0x1a7",
-        "MSRValue": "0x083C000010",
+        "MSRValue": "0x0804000080",
         "Offcore": "1",
         "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
         "SampleAfterValue": "100003",
         "UMask": "0x1"
     },
     {
-        "BriefDescription": "Counts the number of machine clears due to memory order conflicts.",
+        "BriefDescription": "Counts all prefetch (that bring data to LLC only) data reads  OCR.PF_L3_DATA_RD.L3_MISS_LOCAL_DRAM.HIT_OTHER_CORE_NO_FWD",
         "Counter": "0,1,2,3",
-        "CounterHTOff": "0,1,2,3,4,5,6,7",
-        "Errata": "SKL089",
-        "EventCode": "0xC3",
-        "EventName": "MACHINE_CLEARS.MEMORY_ORDERING",
-        "PublicDescription": "Counts the number of memory ordering Machine Clears detected. Memory Ordering Machine Clears can result from one of the following:a. memory disambiguation,b. external snoop, orc. cross SMT-HW-thread snoop (stores) hitting load buffer.",
+        "CounterHTOff": "0,1,2,3",
+        "EventCode": "0xB7, 0xBB",
+        "EventName": "OCR.PF_L3_DATA_RD.L3_MISS_LOCAL_DRAM.HIT_OTHER_CORE_NO_FWD",
+        "MSRIndex": "0x1a6,0x1a7",
+        "MSRValue": "0x0404000080",
+        "Offcore": "1",
+        "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
         "SampleAfterValue": "100003",
-        "UMask": "0x2"
-    },
-    {
-        "BriefDescription": "Number of times an HLE transactional execution aborted due to XRELEASE lock not satisfying the address and value requirements in the elision buffer",
-        "Counter": "0,1,2,3",
-        "CounterHTOff": "0,1,2,3,4,5,6,7",
-        "EventCode": "0x54",
-        "EventName": "TX_MEM.ABORT_HLE_ELISION_BUFFER_MISMATCH",
-        "PublicDescription": "Number of times a TSX Abort was triggered due to release/commit but data and address mismatch.",
-        "SampleAfterValue": "2000003",
-        "UMask": "0x10"
+        "UMask": "0x1"
     },
     {
-        "BriefDescription": "This event is deprecated. Refer to new event OCR.PF_L2_DATA_RD.L3_MISS_REMOTE_HOP1_DRAM.NO_SNOOP_NEEDED",
+        "BriefDescription": "Counts all prefetch (that bring data to LLC only) data reads  OCR.PF_L3_DATA_RD.L3_MISS_LOCAL_DRAM.NO_SNOOP_NEEDED",
         "Counter": "0,1,2,3",
         "CounterHTOff": "0,1,2,3",
-        "Deprecated": "1",
         "EventCode": "0xB7, 0xBB",
-        "EventName": "OFFCORE_RESPONSE.PF_L2_DATA_RD.L3_MISS_REMOTE_HOP1_DRAM.NO_SNOOP_NEEDED",
+        "EventName": "OCR.PF_L3_DATA_RD.L3_MISS_LOCAL_DRAM.NO_SNOOP_NEEDED",
         "MSRIndex": "0x1a6,0x1a7",
-        "MSRValue": "0x0110000010",
+        "MSRValue": "0x0104000080",
         "Offcore": "1",
         "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
         "SampleAfterValue": "100003",
         "UMask": "0x1"
     },
     {
-        "BriefDescription": "This event is deprecated. Refer to new event OCR.PF_L2_RFO.L3_MISS_LOCAL_DRAM.SNOOP_MISS",
+        "BriefDescription": "Counts all prefetch (that bring data to LLC only) data reads",
         "Counter": "0,1,2,3",
         "CounterHTOff": "0,1,2,3",
-        "Deprecated": "1",
         "EventCode": "0xB7, 0xBB",
-        "EventName": "OFFCORE_RESPONSE.PF_L2_RFO.L3_MISS_LOCAL_DRAM.SNOOP_MISS",
+        "EventName": "OCR.PF_L3_DATA_RD.L3_MISS_LOCAL_DRAM.SNOOP_MISS",
         "MSRIndex": "0x1a6,0x1a7",
-        "MSRValue": "0x0204000020",
+        "MSRValue": "0x0204000080",
         "Offcore": "1",
         "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
         "SampleAfterValue": "100003",
         "UMask": "0x1"
     },
     {
-        "BriefDescription": "Counts all demand data writes (RFOs)",
+        "BriefDescription": "Counts all prefetch (that bring data to LLC only) data reads OCR.PF_L3_DATA_RD.L3_MISS_LOCAL_DRAM.SNOOP_MISS_OR_NO_FWD",
         "Counter": "0,1,2,3",
         "CounterHTOff": "0,1,2,3",
         "EventCode": "0xB7, 0xBB",
-        "EventName": "OCR.DEMAND_RFO.L3_MISS_REMOTE_HOP1_DRAM.SNOOP_NONE",
+        "EventName": "OCR.PF_L3_DATA_RD.L3_MISS_LOCAL_DRAM.SNOOP_MISS_OR_NO_FWD",
         "MSRIndex": "0x1a6,0x1a7",
-        "MSRValue": "0x0090000002",
+        "MSRValue": "0x0604000080",
         "Offcore": "1",
         "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
         "SampleAfterValue": "100003",
         "UMask": "0x1"
     },
     {
-        "BriefDescription": "Counts all prefetch (that bring data to LLC only) data reads  OCR.PF_L3_DATA_RD.L3_MISS_LOCAL_DRAM.HITM_OTHER_CORE",
+        "BriefDescription": "Counts all prefetch (that bring data to LLC only) data reads",
         "Counter": "0,1,2,3",
         "CounterHTOff": "0,1,2,3",
         "EventCode": "0xB7, 0xBB",
-        "EventName": "OCR.PF_L3_DATA_RD.L3_MISS_LOCAL_DRAM.HITM_OTHER_CORE",
+        "EventName": "OCR.PF_L3_DATA_RD.L3_MISS_LOCAL_DRAM.SNOOP_NONE",
         "MSRIndex": "0x1a6,0x1a7",
-        "MSRValue": "0x1004000080",
+        "MSRValue": "0x0084000080",
         "Offcore": "1",
         "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
         "SampleAfterValue": "100003",
         "UMask": "0x1"
     },
     {
-        "BriefDescription": "Counts prefetch (that bring data to L2) data reads",
+        "BriefDescription": "Counts all prefetch (that bring data to LLC only) data reads OCR.PF_L3_DATA_RD.L3_MISS_REMOTE_DRAM.SNOOP_MISS_OR_NO_FWD",
         "Counter": "0,1,2,3",
         "CounterHTOff": "0,1,2,3",
         "EventCode": "0xB7, 0xBB",
-        "EventName": "OCR.PF_L2_DATA_RD.L3_MISS_REMOTE_HOP1_DRAM.SNOOP_NONE",
+        "EventName": "OCR.PF_L3_DATA_RD.L3_MISS_REMOTE_DRAM.SNOOP_MISS_OR_NO_FWD",
         "MSRIndex": "0x1a6,0x1a7",
-        "MSRValue": "0x0090000010",
+        "MSRValue": "0x063B800080",
         "Offcore": "1",
         "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
         "SampleAfterValue": "100003",
         "UMask": "0x1"
     },
     {
-        "BriefDescription": "OCR.ALL_RFO.L3_MISS_REMOTE_HOP1_DRAM.HITM_OTHER_CORE  OCR.ALL_RFO.L3_MISS_REMOTE_HOP1_DRAM.HITM_OTHER_CORE",
+        "BriefDescription": "Counts all prefetch (that bring data to LLC only) data reads  OCR.PF_L3_DATA_RD.L3_MISS_REMOTE_HOP1_DRAM.ANY_SNOOP",
         "Counter": "0,1,2,3",
         "CounterHTOff": "0,1,2,3",
         "EventCode": "0xB7, 0xBB",
-        "EventName": "OCR.ALL_RFO.L3_MISS_REMOTE_HOP1_DRAM.HITM_OTHER_CORE",
+        "EventName": "OCR.PF_L3_DATA_RD.L3_MISS_REMOTE_HOP1_DRAM.ANY_SNOOP",
         "MSRIndex": "0x1a6,0x1a7",
-        "MSRValue": "0x1010000122",
+        "MSRValue": "0x3F90000080",
         "Offcore": "1",
         "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
         "SampleAfterValue": "100003",
         "UMask": "0x1"
     },
     {
-        "BriefDescription": "This event is deprecated. Refer to new event OCR.ALL_RFO.L3_MISS.HIT_OTHER_CORE_FWD",
+        "BriefDescription": "Counts all prefetch (that bring data to LLC only) data reads  OCR.PF_L3_DATA_RD.L3_MISS_REMOTE_HOP1_DRAM.HITM_OTHER_CORE",
         "Counter": "0,1,2,3",
         "CounterHTOff": "0,1,2,3",
-        "Deprecated": "1",
         "EventCode": "0xB7, 0xBB",
-        "EventName": "OFFCORE_RESPONSE.ALL_RFO.L3_MISS.HIT_OTHER_CORE_FWD",
+        "EventName": "OCR.PF_L3_DATA_RD.L3_MISS_REMOTE_HOP1_DRAM.HITM_OTHER_CORE",
         "MSRIndex": "0x1a6,0x1a7",
-        "MSRValue": "0x083C000122",
+        "MSRValue": "0x1010000080",
         "Offcore": "1",
         "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
         "SampleAfterValue": "100003",
@@ -4570,52 +4401,50 @@
         "UMask": "0x1"
     },
     {
-        "BriefDescription": "Counts demand data reads OCR.DEMAND_DATA_RD.L3_MISS.HIT_OTHER_CORE_FWD OCR.DEMAND_DATA_RD.L3_MISS.HIT_OTHER_CORE_FWD",
+        "BriefDescription": "Counts all prefetch (that bring data to LLC only) data reads  OCR.PF_L3_DATA_RD.L3_MISS_REMOTE_HOP1_DRAM.HIT_OTHER_CORE_NO_FWD",
         "Counter": "0,1,2,3",
         "CounterHTOff": "0,1,2,3",
         "EventCode": "0xB7, 0xBB",
-        "EventName": "OCR.DEMAND_DATA_RD.L3_MISS.HIT_OTHER_CORE_FWD",
+        "EventName": "OCR.PF_L3_DATA_RD.L3_MISS_REMOTE_HOP1_DRAM.HIT_OTHER_CORE_NO_FWD",
         "MSRIndex": "0x1a6,0x1a7",
-        "MSRValue": "0x083C000001",
+        "MSRValue": "0x0410000080",
         "Offcore": "1",
         "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
         "SampleAfterValue": "100003",
         "UMask": "0x1"
     },
     {
-        "BriefDescription": "Counts any other requests  OCR.OTHER.L3_MISS_REMOTE_HOP1_DRAM.HIT_OTHER_CORE_FWD",
+        "BriefDescription": "Counts all prefetch (that bring data to LLC only) data reads  OCR.PF_L3_DATA_RD.L3_MISS_REMOTE_HOP1_DRAM.NO_SNOOP_NEEDED",
         "Counter": "0,1,2,3",
         "CounterHTOff": "0,1,2,3",
         "EventCode": "0xB7, 0xBB",
-        "EventName": "OCR.OTHER.L3_MISS_REMOTE_HOP1_DRAM.HIT_OTHER_CORE_FWD",
+        "EventName": "OCR.PF_L3_DATA_RD.L3_MISS_REMOTE_HOP1_DRAM.NO_SNOOP_NEEDED",
         "MSRIndex": "0x1a6,0x1a7",
-        "MSRValue": "0x0810008000",
+        "MSRValue": "0x0110000080",
         "Offcore": "1",
         "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
         "SampleAfterValue": "100003",
         "UMask": "0x1"
     },
     {
-        "BriefDescription": "This event is deprecated. Refer to new event OCR.ALL_RFO.L3_MISS.REMOTE_HIT_FORWARD",
+        "BriefDescription": "Counts all prefetch (that bring data to LLC only) data reads",
         "Counter": "0,1,2,3",
         "CounterHTOff": "0,1,2,3",
-        "Deprecated": "1",
         "EventCode": "0xB7, 0xBB",
-        "EventName": "OFFCORE_RESPONSE.ALL_RFO.L3_MISS.REMOTE_HIT_FORWARD",
+        "EventName": "OCR.PF_L3_DATA_RD.L3_MISS_REMOTE_HOP1_DRAM.SNOOP_MISS",
         "MSRIndex": "0x1a6,0x1a7",
-        "MSRValue": "0x083FC00122",
+        "MSRValue": "0x0210000080",
         "Offcore": "1",
         "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
         "SampleAfterValue": "100003",
         "UMask": "0x1"
     },
     {
-        "BriefDescription": "This event is deprecated. Refer to new event OCR.PF_L3_DATA_RD.L3_MISS_REMOTE_HOP1_DRAM.SNOOP_NONE",
+        "BriefDescription": "Counts all prefetch (that bring data to LLC only) data reads",
         "Counter": "0,1,2,3",
         "CounterHTOff": "0,1,2,3",
-        "Deprecated": "1",
         "EventCode": "0xB7, 0xBB",
-        "EventName": "OFFCORE_RESPONSE.PF_L3_DATA_RD.L3_MISS_REMOTE_HOP1_DRAM.SNOOP_NONE",
+        "EventName": "OCR.PF_L3_DATA_RD.L3_MISS_REMOTE_HOP1_DRAM.SNOOP_NONE",
         "MSRIndex": "0x1a6,0x1a7",
         "MSRValue": "0x0090000080",
         "Offcore": "1",
@@ -4624,526 +4453,544 @@
         "UMask": "0x1"
     },
     {
-        "BriefDescription": "Counts all prefetch (that bring data to L2) RFOs OCR.PF_L2_RFO.L3_MISS.HITM_OTHER_CORE OCR.PF_L2_RFO.L3_MISS.HITM_OTHER_CORE",
+        "BriefDescription": "Counts all prefetch (that bring data to LLC only) RFOs OCR.PF_L3_RFO.L3_MISS.ANY_SNOOP OCR.PF_L3_RFO.L3_MISS.ANY_SNOOP",
         "Counter": "0,1,2,3",
         "CounterHTOff": "0,1,2,3",
         "EventCode": "0xB7, 0xBB",
-        "EventName": "OCR.PF_L2_RFO.L3_MISS.HITM_OTHER_CORE",
+        "EventName": "OCR.PF_L3_RFO.L3_MISS.ANY_SNOOP",
         "MSRIndex": "0x1a6,0x1a7",
-        "MSRValue": "0x103C000020",
+        "MSRValue": "0x3FBC000100",
         "Offcore": "1",
         "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
         "SampleAfterValue": "100003",
         "UMask": "0x1"
     },
     {
-        "BriefDescription": "Counts all demand data writes (RFOs)  OCR.DEMAND_RFO.L3_MISS_LOCAL_DRAM.NO_SNOOP_NEEDED",
+        "BriefDescription": "Counts all prefetch (that bring data to LLC only) RFOs OCR.PF_L3_RFO.L3_MISS.HITM_OTHER_CORE OCR.PF_L3_RFO.L3_MISS.HITM_OTHER_CORE",
         "Counter": "0,1,2,3",
         "CounterHTOff": "0,1,2,3",
         "EventCode": "0xB7, 0xBB",
-        "EventName": "OCR.DEMAND_RFO.L3_MISS_LOCAL_DRAM.NO_SNOOP_NEEDED",
+        "EventName": "OCR.PF_L3_RFO.L3_MISS.HITM_OTHER_CORE",
         "MSRIndex": "0x1a6,0x1a7",
-        "MSRValue": "0x0104000002",
+        "MSRValue": "0x103C000100",
         "Offcore": "1",
         "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
         "SampleAfterValue": "100003",
         "UMask": "0x1"
     },
     {
-        "BriefDescription": "OCR.ALL_READS.L3_MISS.NO_SNOOP_NEEDED OCR.ALL_READS.L3_MISS.NO_SNOOP_NEEDED OCR.ALL_READS.L3_MISS.NO_SNOOP_NEEDED",
+        "BriefDescription": "Counts all prefetch (that bring data to LLC only) RFOs OCR.PF_L3_RFO.L3_MISS.HIT_OTHER_CORE_FWD OCR.PF_L3_RFO.L3_MISS.HIT_OTHER_CORE_FWD",
         "Counter": "0,1,2,3",
         "CounterHTOff": "0,1,2,3",
         "EventCode": "0xB7, 0xBB",
-        "EventName": "OCR.ALL_READS.L3_MISS.NO_SNOOP_NEEDED",
+        "EventName": "OCR.PF_L3_RFO.L3_MISS.HIT_OTHER_CORE_FWD",
         "MSRIndex": "0x1a6,0x1a7",
-        "MSRValue": "0x013C0007F7",
+        "MSRValue": "0x083C000100",
         "Offcore": "1",
         "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
         "SampleAfterValue": "100003",
         "UMask": "0x1"
     },
     {
-        "BriefDescription": "Counts demand data reads  OCR.DEMAND_DATA_RD.L3_MISS_REMOTE_HOP1_DRAM.HIT_OTHER_CORE_NO_FWD",
+        "BriefDescription": "Counts all prefetch (that bring data to LLC only) RFOs OCR.PF_L3_RFO.L3_MISS.HIT_OTHER_CORE_NO_FWD OCR.PF_L3_RFO.L3_MISS.HIT_OTHER_CORE_NO_FWD",
         "Counter": "0,1,2,3",
         "CounterHTOff": "0,1,2,3",
         "EventCode": "0xB7, 0xBB",
-        "EventName": "OCR.DEMAND_DATA_RD.L3_MISS_REMOTE_HOP1_DRAM.HIT_OTHER_CORE_NO_FWD",
+        "EventName": "OCR.PF_L3_RFO.L3_MISS.HIT_OTHER_CORE_NO_FWD",
         "MSRIndex": "0x1a6,0x1a7",
-        "MSRValue": "0x0410000001",
+        "MSRValue": "0x043C000100",
         "Offcore": "1",
         "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
         "SampleAfterValue": "100003",
         "UMask": "0x1"
     },
     {
-        "BriefDescription": "Number of times an RTM execution aborted due to none of the previous 4 categories (e.g. interrupt)",
-        "Counter": "0,1,2,3",
-        "CounterHTOff": "0,1,2,3,4,5,6,7",
-        "EventCode": "0xC9",
-        "EventName": "RTM_RETIRED.ABORTED_EVENTS",
-        "PublicDescription": "Number of times an RTM execution aborted due to none of the previous 4 categories (e.g. interrupt).",
-        "SampleAfterValue": "2000003",
-        "UMask": "0x80"
-    },
-    {
-        "BriefDescription": "OCR.ALL_READS.L3_MISS_REMOTE_HOP1_DRAM.HIT_OTHER_CORE_NO_FWD  OCR.ALL_READS.L3_MISS_REMOTE_HOP1_DRAM.HIT_OTHER_CORE_NO_FWD",
+        "BriefDescription": "Counts all prefetch (that bring data to LLC only) RFOs OCR.PF_L3_RFO.L3_MISS.NO_SNOOP_NEEDED OCR.PF_L3_RFO.L3_MISS.NO_SNOOP_NEEDED",
         "Counter": "0,1,2,3",
         "CounterHTOff": "0,1,2,3",
         "EventCode": "0xB7, 0xBB",
-        "EventName": "OCR.ALL_READS.L3_MISS_REMOTE_HOP1_DRAM.HIT_OTHER_CORE_NO_FWD",
+        "EventName": "OCR.PF_L3_RFO.L3_MISS.NO_SNOOP_NEEDED",
         "MSRIndex": "0x1a6,0x1a7",
-        "MSRValue": "0x04100007F7",
+        "MSRValue": "0x013C000100",
         "Offcore": "1",
         "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
         "SampleAfterValue": "100003",
         "UMask": "0x1"
     },
     {
-        "BriefDescription": "OCR.ALL_PF_RFO.L3_MISS.SNOOP_NONE OCR.ALL_PF_RFO.L3_MISS.SNOOP_NONE",
+        "BriefDescription": "Counts all prefetch (that bring data to LLC only) RFOs OCR.PF_L3_RFO.L3_MISS.REMOTE_HITM",
         "Counter": "0,1,2,3",
         "CounterHTOff": "0,1,2,3",
         "EventCode": "0xB7, 0xBB",
-        "EventName": "OCR.ALL_PF_RFO.L3_MISS.SNOOP_NONE",
+        "EventName": "OCR.PF_L3_RFO.L3_MISS.REMOTE_HITM",
         "MSRIndex": "0x1a6,0x1a7",
-        "MSRValue": "0x00BC000120",
+        "MSRValue": "0x103FC00100",
         "Offcore": "1",
         "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
         "SampleAfterValue": "100003",
         "UMask": "0x1"
     },
     {
-        "BriefDescription": "Counts demand data reads OCR.DEMAND_DATA_RD.L3_MISS.HIT_OTHER_CORE_NO_FWD OCR.DEMAND_DATA_RD.L3_MISS.HIT_OTHER_CORE_NO_FWD",
+        "BriefDescription": "Counts all prefetch (that bring data to LLC only) RFOs OCR.PF_L3_RFO.L3_MISS.REMOTE_HIT_FORWARD",
         "Counter": "0,1,2,3",
         "CounterHTOff": "0,1,2,3",
         "EventCode": "0xB7, 0xBB",
-        "EventName": "OCR.DEMAND_DATA_RD.L3_MISS.HIT_OTHER_CORE_NO_FWD",
+        "EventName": "OCR.PF_L3_RFO.L3_MISS.REMOTE_HIT_FORWARD",
         "MSRIndex": "0x1a6,0x1a7",
-        "MSRValue": "0x043C000001",
+        "MSRValue": "0x083FC00100",
         "Offcore": "1",
         "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
         "SampleAfterValue": "100003",
         "UMask": "0x1"
     },
     {
-        "BriefDescription": "OCR.ALL_RFO.L3_MISS.SNOOP_MISS OCR.ALL_RFO.L3_MISS.SNOOP_MISS",
+        "BriefDescription": "Counts all prefetch (that bring data to LLC only) RFOs OCR.PF_L3_RFO.L3_MISS.SNOOP_MISS",
         "Counter": "0,1,2,3",
         "CounterHTOff": "0,1,2,3",
         "EventCode": "0xB7, 0xBB",
-        "EventName": "OCR.ALL_RFO.L3_MISS.SNOOP_MISS",
+        "EventName": "OCR.PF_L3_RFO.L3_MISS.SNOOP_MISS",
         "MSRIndex": "0x1a6,0x1a7",
-        "MSRValue": "0x023C000122",
+        "MSRValue": "0x023C000100",
         "Offcore": "1",
         "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
         "SampleAfterValue": "100003",
         "UMask": "0x1"
     },
     {
-        "BriefDescription": "This event is deprecated. Refer to new event OCR.OTHER.L3_MISS_REMOTE_HOP1_DRAM.NO_SNOOP_NEEDED",
+        "BriefDescription": "Counts all prefetch (that bring data to LLC only) RFOs OCR.PF_L3_RFO.L3_MISS.SNOOP_NONE",
         "Counter": "0,1,2,3",
         "CounterHTOff": "0,1,2,3",
-        "Deprecated": "1",
         "EventCode": "0xB7, 0xBB",
-        "EventName": "OFFCORE_RESPONSE.OTHER.L3_MISS_REMOTE_HOP1_DRAM.NO_SNOOP_NEEDED",
+        "EventName": "OCR.PF_L3_RFO.L3_MISS.SNOOP_NONE",
         "MSRIndex": "0x1a6,0x1a7",
-        "MSRValue": "0x0110008000",
+        "MSRValue": "0x00BC000100",
         "Offcore": "1",
         "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
         "SampleAfterValue": "100003",
         "UMask": "0x1"
     },
     {
-        "BriefDescription": "This event is deprecated. Refer to new event OCR.OTHER.L3_MISS_REMOTE_HOP1_DRAM.HITM_OTHER_CORE",
+        "BriefDescription": "Counts all prefetch (that bring data to LLC only) RFOs  OCR.PF_L3_RFO.L3_MISS_LOCAL_DRAM.ANY_SNOOP",
         "Counter": "0,1,2,3",
         "CounterHTOff": "0,1,2,3",
-        "Deprecated": "1",
         "EventCode": "0xB7, 0xBB",
-        "EventName": "OFFCORE_RESPONSE.OTHER.L3_MISS_REMOTE_HOP1_DRAM.HITM_OTHER_CORE",
+        "EventName": "OCR.PF_L3_RFO.L3_MISS_LOCAL_DRAM.ANY_SNOOP",
         "MSRIndex": "0x1a6,0x1a7",
-        "MSRValue": "0x1010008000",
+        "MSRValue": "0x3F84000100",
         "Offcore": "1",
         "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
         "SampleAfterValue": "100003",
         "UMask": "0x1"
     },
     {
-        "BriefDescription": "Counts L1 data cache hardware prefetch requests and software prefetch requests  OCR.PF_L1D_AND_SW.L3_MISS_REMOTE_HOP1_DRAM.NO_SNOOP_NEEDED",
+        "BriefDescription": "Counts all prefetch (that bring data to LLC only) RFOs  OCR.PF_L3_RFO.L3_MISS_LOCAL_DRAM.HITM_OTHER_CORE",
         "Counter": "0,1,2,3",
         "CounterHTOff": "0,1,2,3",
         "EventCode": "0xB7, 0xBB",
-        "EventName": "OCR.PF_L1D_AND_SW.L3_MISS_REMOTE_HOP1_DRAM.NO_SNOOP_NEEDED",
+        "EventName": "OCR.PF_L3_RFO.L3_MISS_LOCAL_DRAM.HITM_OTHER_CORE",
         "MSRIndex": "0x1a6,0x1a7",
-        "MSRValue": "0x0110000400",
+        "MSRValue": "0x1004000100",
         "Offcore": "1",
         "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
         "SampleAfterValue": "100003",
         "UMask": "0x1"
     },
     {
-        "BriefDescription": "Execution stalls while L3 cache miss demand load is outstanding.",
-        "Counter": "0,1,2,3",
-        "CounterHTOff": "0,1,2,3,4,5,6,7",
-        "CounterMask": "6",
-        "EventCode": "0xA3",
-        "EventName": "CYCLE_ACTIVITY.STALLS_L3_MISS",
-        "SampleAfterValue": "2000003",
-        "UMask": "0x6"
-    },
-    {
-        "BriefDescription": "This event is deprecated. Refer to new event OCR.ALL_RFO.L3_MISS_LOCAL_DRAM.SNOOP_MISS",
+        "BriefDescription": "Counts all prefetch (that bring data to LLC only) RFOs  OCR.PF_L3_RFO.L3_MISS_LOCAL_DRAM.HIT_OTHER_CORE_FWD",
         "Counter": "0,1,2,3",
         "CounterHTOff": "0,1,2,3",
-        "Deprecated": "1",
         "EventCode": "0xB7, 0xBB",
-        "EventName": "OFFCORE_RESPONSE.ALL_RFO.L3_MISS_LOCAL_DRAM.SNOOP_MISS",
+        "EventName": "OCR.PF_L3_RFO.L3_MISS_LOCAL_DRAM.HIT_OTHER_CORE_FWD",
         "MSRIndex": "0x1a6,0x1a7",
-        "MSRValue": "0x0204000122",
+        "MSRValue": "0x0804000100",
         "Offcore": "1",
         "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
         "SampleAfterValue": "100003",
         "UMask": "0x1"
     },
     {
-        "BriefDescription": "Counts L1 data cache hardware prefetch requests and software prefetch requests OCR.PF_L1D_AND_SW.L3_MISS.SNOOP_NONE",
+        "BriefDescription": "Counts all prefetch (that bring data to LLC only) RFOs  OCR.PF_L3_RFO.L3_MISS_LOCAL_DRAM.HIT_OTHER_CORE_NO_FWD",
         "Counter": "0,1,2,3",
         "CounterHTOff": "0,1,2,3",
         "EventCode": "0xB7, 0xBB",
-        "EventName": "OCR.PF_L1D_AND_SW.L3_MISS.SNOOP_NONE",
+        "EventName": "OCR.PF_L3_RFO.L3_MISS_LOCAL_DRAM.HIT_OTHER_CORE_NO_FWD",
         "MSRIndex": "0x1a6,0x1a7",
-        "MSRValue": "0x00BC000400",
+        "MSRValue": "0x0404000100",
         "Offcore": "1",
         "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
         "SampleAfterValue": "100003",
         "UMask": "0x1"
     },
     {
-        "BriefDescription": "Counts all prefetch (that bring data to LLC only) RFOs",
+        "BriefDescription": "Counts all prefetch (that bring data to LLC only) RFOs  OCR.PF_L3_RFO.L3_MISS_LOCAL_DRAM.NO_SNOOP_NEEDED",
         "Counter": "0,1,2,3",
         "CounterHTOff": "0,1,2,3",
         "EventCode": "0xB7, 0xBB",
-        "EventName": "OCR.PF_L3_RFO.L3_MISS_REMOTE_HOP1_DRAM.SNOOP_NONE",
+        "EventName": "OCR.PF_L3_RFO.L3_MISS_LOCAL_DRAM.NO_SNOOP_NEEDED",
         "MSRIndex": "0x1a6,0x1a7",
-        "MSRValue": "0x0090000100",
+        "MSRValue": "0x0104000100",
         "Offcore": "1",
         "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
         "SampleAfterValue": "100003",
         "UMask": "0x1"
     },
     {
-        "BriefDescription": "Counts demand data reads OCR.DEMAND_DATA_RD.L3_MISS.HITM_OTHER_CORE OCR.DEMAND_DATA_RD.L3_MISS.HITM_OTHER_CORE",
+        "BriefDescription": "Counts all prefetch (that bring data to LLC only) RFOs",
         "Counter": "0,1,2,3",
         "CounterHTOff": "0,1,2,3",
         "EventCode": "0xB7, 0xBB",
-        "EventName": "OCR.DEMAND_DATA_RD.L3_MISS.HITM_OTHER_CORE",
+        "EventName": "OCR.PF_L3_RFO.L3_MISS_LOCAL_DRAM.SNOOP_MISS",
         "MSRIndex": "0x1a6,0x1a7",
-        "MSRValue": "0x103C000001",
+        "MSRValue": "0x0204000100",
         "Offcore": "1",
         "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
         "SampleAfterValue": "100003",
         "UMask": "0x1"
     },
     {
-        "BriefDescription": "Counts L1 data cache hardware prefetch requests and software prefetch requests OCR.PF_L1D_AND_SW.L3_MISS.REMOTE_HITM",
+        "BriefDescription": "Counts all prefetch (that bring data to LLC only) RFOs OCR.PF_L3_RFO.L3_MISS_LOCAL_DRAM.SNOOP_MISS_OR_NO_FWD",
         "Counter": "0,1,2,3",
         "CounterHTOff": "0,1,2,3",
         "EventCode": "0xB7, 0xBB",
-        "EventName": "OCR.PF_L1D_AND_SW.L3_MISS.REMOTE_HITM",
+        "EventName": "OCR.PF_L3_RFO.L3_MISS_LOCAL_DRAM.SNOOP_MISS_OR_NO_FWD",
         "MSRIndex": "0x1a6,0x1a7",
-        "MSRValue": "0x103FC00400",
+        "MSRValue": "0x0604000100",
         "Offcore": "1",
         "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
         "SampleAfterValue": "100003",
         "UMask": "0x1"
     },
     {
-        "BriefDescription": "This event is deprecated. Refer to new event OCR.PF_L1D_AND_SW.L3_MISS_LOCAL_DRAM.SNOOP_MISS",
+        "BriefDescription": "Counts all prefetch (that bring data to LLC only) RFOs",
         "Counter": "0,1,2,3",
         "CounterHTOff": "0,1,2,3",
-        "Deprecated": "1",
         "EventCode": "0xB7, 0xBB",
-        "EventName": "OFFCORE_RESPONSE.PF_L1D_AND_SW.L3_MISS_LOCAL_DRAM.SNOOP_MISS",
+        "EventName": "OCR.PF_L3_RFO.L3_MISS_LOCAL_DRAM.SNOOP_NONE",
         "MSRIndex": "0x1a6,0x1a7",
-        "MSRValue": "0x0204000400",
+        "MSRValue": "0x0084000100",
         "Offcore": "1",
         "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
         "SampleAfterValue": "100003",
         "UMask": "0x1"
     },
     {
-        "BriefDescription": "Counts any other requests",
+        "BriefDescription": "Counts all prefetch (that bring data to LLC only) RFOs OCR.PF_L3_RFO.L3_MISS_REMOTE_DRAM.SNOOP_MISS_OR_NO_FWD",
         "Counter": "0,1,2,3",
         "CounterHTOff": "0,1,2,3",
         "EventCode": "0xB7, 0xBB",
-        "EventName": "OCR.OTHER.L3_MISS_LOCAL_DRAM.SNOOP_MISS",
+        "EventName": "OCR.PF_L3_RFO.L3_MISS_REMOTE_DRAM.SNOOP_MISS_OR_NO_FWD",
         "MSRIndex": "0x1a6,0x1a7",
-        "MSRValue": "0x0204008000",
+        "MSRValue": "0x063B800100",
         "Offcore": "1",
         "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
         "SampleAfterValue": "100003",
         "UMask": "0x1"
     },
     {
-        "BriefDescription": "This event is deprecated. Refer to new event OCR.OTHER.L3_MISS_REMOTE_HOP1_DRAM.SNOOP_NONE",
+        "BriefDescription": "Counts all prefetch (that bring data to LLC only) RFOs  OCR.PF_L3_RFO.L3_MISS_REMOTE_HOP1_DRAM.ANY_SNOOP",
         "Counter": "0,1,2,3",
         "CounterHTOff": "0,1,2,3",
-        "Deprecated": "1",
         "EventCode": "0xB7, 0xBB",
-        "EventName": "OFFCORE_RESPONSE.OTHER.L3_MISS_REMOTE_HOP1_DRAM.SNOOP_NONE",
+        "EventName": "OCR.PF_L3_RFO.L3_MISS_REMOTE_HOP1_DRAM.ANY_SNOOP",
         "MSRIndex": "0x1a6,0x1a7",
-        "MSRValue": "0x0090008000",
+        "MSRValue": "0x3F90000100",
         "Offcore": "1",
         "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
         "SampleAfterValue": "100003",
         "UMask": "0x1"
     },
     {
-        "BriefDescription": "This event is deprecated. Refer to new event OCR.ALL_READS.L3_MISS_REMOTE_HOP1_DRAM.HIT_OTHER_CORE_FWD",
+        "BriefDescription": "Counts all prefetch (that bring data to LLC only) RFOs  OCR.PF_L3_RFO.L3_MISS_REMOTE_HOP1_DRAM.HITM_OTHER_CORE",
         "Counter": "0,1,2,3",
         "CounterHTOff": "0,1,2,3",
-        "Deprecated": "1",
         "EventCode": "0xB7, 0xBB",
-        "EventName": "OFFCORE_RESPONSE.ALL_READS.L3_MISS_REMOTE_HOP1_DRAM.HIT_OTHER_CORE_FWD",
+        "EventName": "OCR.PF_L3_RFO.L3_MISS_REMOTE_HOP1_DRAM.HITM_OTHER_CORE",
         "MSRIndex": "0x1a6,0x1a7",
-        "MSRValue": "0x08100007F7",
+        "MSRValue": "0x1010000100",
         "Offcore": "1",
         "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
         "SampleAfterValue": "100003",
         "UMask": "0x1"
     },
     {
-        "BriefDescription": "Counts all demand code reads OCR.DEMAND_CODE_RD.L3_MISS_LOCAL_DRAM.SNOOP_MISS_OR_NO_FWD",
+        "BriefDescription": "Counts all prefetch (that bring data to LLC only) RFOs  OCR.PF_L3_RFO.L3_MISS_REMOTE_HOP1_DRAM.HIT_OTHER_CORE_FWD",
         "Counter": "0,1,2,3",
         "CounterHTOff": "0,1,2,3",
         "EventCode": "0xB7, 0xBB",
-        "EventName": "OCR.DEMAND_CODE_RD.L3_MISS_LOCAL_DRAM.SNOOP_MISS_OR_NO_FWD",
+        "EventName": "OCR.PF_L3_RFO.L3_MISS_REMOTE_HOP1_DRAM.HIT_OTHER_CORE_FWD",
         "MSRIndex": "0x1a6,0x1a7",
-        "MSRValue": "0x0604000004",
+        "MSRValue": "0x0810000100",
         "Offcore": "1",
         "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
         "SampleAfterValue": "100003",
         "UMask": "0x1"
     },
     {
-        "BriefDescription": "This event is deprecated. Refer to new event OCR.ALL_DATA_RD.L3_MISS_LOCAL_DRAM.SNOOP_MISS_OR_NO_FWD",
+        "BriefDescription": "Counts all prefetch (that bring data to LLC only) RFOs  OCR.PF_L3_RFO.L3_MISS_REMOTE_HOP1_DRAM.HIT_OTHER_CORE_NO_FWD",
         "Counter": "0,1,2,3",
         "CounterHTOff": "0,1,2,3",
-        "Deprecated": "1",
         "EventCode": "0xB7, 0xBB",
-        "EventName": "OFFCORE_RESPONSE.ALL_DATA_RD.L3_MISS_LOCAL_DRAM.SNOOP_MISS_OR_NO_FWD",
+        "EventName": "OCR.PF_L3_RFO.L3_MISS_REMOTE_HOP1_DRAM.HIT_OTHER_CORE_NO_FWD",
         "MSRIndex": "0x1a6,0x1a7",
-        "MSRValue": "0x0604000491",
+        "MSRValue": "0x0410000100",
         "Offcore": "1",
         "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
         "SampleAfterValue": "100003",
         "UMask": "0x1"
     },
     {
-        "BriefDescription": "This event is deprecated. Refer to new event OCR.DEMAND_RFO.L3_MISS.REMOTE_HIT_FORWARD",
+        "BriefDescription": "Counts all prefetch (that bring data to LLC only) RFOs  OCR.PF_L3_RFO.L3_MISS_REMOTE_HOP1_DRAM.NO_SNOOP_NEEDED",
         "Counter": "0,1,2,3",
         "CounterHTOff": "0,1,2,3",
-        "Deprecated": "1",
         "EventCode": "0xB7, 0xBB",
-        "EventName": "OFFCORE_RESPONSE.DEMAND_RFO.L3_MISS.REMOTE_HIT_FORWARD",
+        "EventName": "OCR.PF_L3_RFO.L3_MISS_REMOTE_HOP1_DRAM.NO_SNOOP_NEEDED",
         "MSRIndex": "0x1a6,0x1a7",
-        "MSRValue": "0x083FC00002",
+        "MSRValue": "0x0110000100",
         "Offcore": "1",
         "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
         "SampleAfterValue": "100003",
         "UMask": "0x1"
     },
     {
-        "BriefDescription": "Counts all demand code reads OCR.DEMAND_CODE_RD.L3_MISS.SNOOP_NONE",
+        "BriefDescription": "Counts all prefetch (that bring data to LLC only) RFOs",
         "Counter": "0,1,2,3",
         "CounterHTOff": "0,1,2,3",
         "EventCode": "0xB7, 0xBB",
-        "EventName": "OCR.DEMAND_CODE_RD.L3_MISS.SNOOP_NONE",
+        "EventName": "OCR.PF_L3_RFO.L3_MISS_REMOTE_HOP1_DRAM.SNOOP_MISS",
         "MSRIndex": "0x1a6,0x1a7",
-        "MSRValue": "0x00BC000004",
+        "MSRValue": "0x0210000100",
         "Offcore": "1",
         "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
         "SampleAfterValue": "100003",
         "UMask": "0x1"
     },
     {
-        "BriefDescription": "This event is deprecated. Refer to new event OCR.ALL_RFO.L3_MISS_REMOTE_DRAM.SNOOP_MISS_OR_NO_FWD",
+        "BriefDescription": "Counts all prefetch (that bring data to LLC only) RFOs",
         "Counter": "0,1,2,3",
         "CounterHTOff": "0,1,2,3",
-        "Deprecated": "1",
         "EventCode": "0xB7, 0xBB",
-        "EventName": "OFFCORE_RESPONSE.ALL_RFO.L3_MISS_REMOTE_DRAM.SNOOP_MISS_OR_NO_FWD",
+        "EventName": "OCR.PF_L3_RFO.L3_MISS_REMOTE_HOP1_DRAM.SNOOP_NONE",
         "MSRIndex": "0x1a6,0x1a7",
-        "MSRValue": "0x063B800122",
+        "MSRValue": "0x0090000100",
         "Offcore": "1",
         "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
         "SampleAfterValue": "100003",
         "UMask": "0x1"
     },
     {
-        "BriefDescription": "OCR.ALL_READS.L3_MISS.REMOTE_HIT_FORWARD OCR.ALL_READS.L3_MISS.REMOTE_HIT_FORWARD",
+        "BriefDescription": "Demand Data Read requests who miss L3 cache",
+        "Counter": "0,1,2,3",
+        "CounterHTOff": "0,1,2,3,4,5,6,7",
+        "EventCode": "0xB0",
+        "EventName": "OFFCORE_REQUESTS.L3_MISS_DEMAND_DATA_RD",
+        "PublicDescription": "Demand Data Read requests who miss L3 cache.",
+        "SampleAfterValue": "100003",
+        "UMask": "0x10"
+    },
+    {
+        "BriefDescription": "Cycles with at least 1 Demand Data Read requests who miss L3 cache in the superQ.",
+        "Counter": "0,1,2,3",
+        "CounterHTOff": "0,1,2,3,4,5,6,7",
+        "CounterMask": "1",
+        "EventCode": "0x60",
+        "EventName": "OFFCORE_REQUESTS_OUTSTANDING.CYCLES_WITH_L3_MISS_DEMAND_DATA_RD",
+        "SampleAfterValue": "2000003",
+        "UMask": "0x10"
+    },
+    {
+        "BriefDescription": "Counts number of Offcore outstanding Demand Data Read requests that miss L3 cache in the superQ every cycle.",
+        "Counter": "0,1,2,3",
+        "CounterHTOff": "0,1,2,3,4,5,6,7",
+        "EventCode": "0x60",
+        "EventName": "OFFCORE_REQUESTS_OUTSTANDING.L3_MISS_DEMAND_DATA_RD",
+        "SampleAfterValue": "2000003",
+        "UMask": "0x10"
+    },
+    {
+        "BriefDescription": "Cycles with at least 6 Demand Data Read requests that miss L3 cache in the superQ.",
+        "Counter": "0,1,2,3",
+        "CounterHTOff": "0,1,2,3,4,5,6,7",
+        "CounterMask": "6",
+        "EventCode": "0x60",
+        "EventName": "OFFCORE_REQUESTS_OUTSTANDING.L3_MISS_DEMAND_DATA_RD_GE_6",
+        "SampleAfterValue": "2000003",
+        "UMask": "0x10"
+    },
+    {
+        "BriefDescription": "This event is deprecated. Refer to new event OCR.ALL_DATA_RD.L3_MISS.ANY_SNOOP",
         "Counter": "0,1,2,3",
         "CounterHTOff": "0,1,2,3",
+        "Deprecated": "1",
         "EventCode": "0xB7, 0xBB",
-        "EventName": "OCR.ALL_READS.L3_MISS.REMOTE_HIT_FORWARD",
+        "EventName": "OFFCORE_RESPONSE.ALL_DATA_RD.L3_MISS.ANY_SNOOP",
         "MSRIndex": "0x1a6,0x1a7",
-        "MSRValue": "0x083FC007F7",
+        "MSRValue": "0x3FBC000491",
         "Offcore": "1",
         "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
         "SampleAfterValue": "100003",
         "UMask": "0x1"
     },
     {
-        "BriefDescription": "OCR.ALL_RFO.L3_MISS.NO_SNOOP_NEEDED OCR.ALL_RFO.L3_MISS.NO_SNOOP_NEEDED OCR.ALL_RFO.L3_MISS.NO_SNOOP_NEEDED",
+        "BriefDescription": "This event is deprecated. Refer to new event OCR.ALL_DATA_RD.L3_MISS.HITM_OTHER_CORE",
         "Counter": "0,1,2,3",
         "CounterHTOff": "0,1,2,3",
+        "Deprecated": "1",
         "EventCode": "0xB7, 0xBB",
-        "EventName": "OCR.ALL_RFO.L3_MISS.NO_SNOOP_NEEDED",
+        "EventName": "OFFCORE_RESPONSE.ALL_DATA_RD.L3_MISS.HITM_OTHER_CORE",
         "MSRIndex": "0x1a6,0x1a7",
-        "MSRValue": "0x013C000122",
+        "MSRValue": "0x103C000491",
         "Offcore": "1",
         "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
         "SampleAfterValue": "100003",
         "UMask": "0x1"
     },
     {
-        "BriefDescription": "Counts all prefetch (that bring data to LLC only) RFOs OCR.PF_L3_RFO.L3_MISS.HIT_OTHER_CORE_FWD OCR.PF_L3_RFO.L3_MISS.HIT_OTHER_CORE_FWD",
+        "BriefDescription": "This event is deprecated. Refer to new event OCR.ALL_DATA_RD.L3_MISS.HIT_OTHER_CORE_FWD",
         "Counter": "0,1,2,3",
         "CounterHTOff": "0,1,2,3",
+        "Deprecated": "1",
         "EventCode": "0xB7, 0xBB",
-        "EventName": "OCR.PF_L3_RFO.L3_MISS.HIT_OTHER_CORE_FWD",
+        "EventName": "OFFCORE_RESPONSE.ALL_DATA_RD.L3_MISS.HIT_OTHER_CORE_FWD",
         "MSRIndex": "0x1a6,0x1a7",
-        "MSRValue": "0x083C000100",
+        "MSRValue": "0x083C000491",
         "Offcore": "1",
         "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
         "SampleAfterValue": "100003",
         "UMask": "0x1"
     },
     {
-        "BriefDescription": "Counts all demand code reads  OCR.DEMAND_CODE_RD.L3_MISS_REMOTE_HOP1_DRAM.ANY_SNOOP",
+        "BriefDescription": "This event is deprecated. Refer to new event OCR.ALL_DATA_RD.L3_MISS.HIT_OTHER_CORE_NO_FWD",
         "Counter": "0,1,2,3",
         "CounterHTOff": "0,1,2,3",
+        "Deprecated": "1",
         "EventCode": "0xB7, 0xBB",
-        "EventName": "OCR.DEMAND_CODE_RD.L3_MISS_REMOTE_HOP1_DRAM.ANY_SNOOP",
+        "EventName": "OFFCORE_RESPONSE.ALL_DATA_RD.L3_MISS.HIT_OTHER_CORE_NO_FWD",
         "MSRIndex": "0x1a6,0x1a7",
-        "MSRValue": "0x3F90000004",
+        "MSRValue": "0x043C000491",
         "Offcore": "1",
         "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
         "SampleAfterValue": "100003",
         "UMask": "0x1"
     },
     {
-        "BriefDescription": "OCR.ALL_PF_RFO.L3_MISS.NO_SNOOP_NEEDED OCR.ALL_PF_RFO.L3_MISS.NO_SNOOP_NEEDED OCR.ALL_PF_RFO.L3_MISS.NO_SNOOP_NEEDED",
+        "BriefDescription": "This event is deprecated. Refer to new event OCR.ALL_DATA_RD.L3_MISS.NO_SNOOP_NEEDED",
         "Counter": "0,1,2,3",
         "CounterHTOff": "0,1,2,3",
+        "Deprecated": "1",
         "EventCode": "0xB7, 0xBB",
-        "EventName": "OCR.ALL_PF_RFO.L3_MISS.NO_SNOOP_NEEDED",
+        "EventName": "OFFCORE_RESPONSE.ALL_DATA_RD.L3_MISS.NO_SNOOP_NEEDED",
         "MSRIndex": "0x1a6,0x1a7",
-        "MSRValue": "0x013C000120",
+        "MSRValue": "0x013C000491",
         "Offcore": "1",
         "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
         "SampleAfterValue": "100003",
         "UMask": "0x1"
     },
     {
-        "BriefDescription": "This event is deprecated. Refer to new event OCR.ALL_READS.L3_MISS_REMOTE_HOP1_DRAM.SNOOP_MISS",
+        "BriefDescription": "This event is deprecated. Refer to new event OCR.ALL_DATA_RD.L3_MISS.REMOTE_HITM",
         "Counter": "0,1,2,3",
         "CounterHTOff": "0,1,2,3",
         "Deprecated": "1",
         "EventCode": "0xB7, 0xBB",
-        "EventName": "OFFCORE_RESPONSE.ALL_READS.L3_MISS_REMOTE_HOP1_DRAM.SNOOP_MISS",
+        "EventName": "OFFCORE_RESPONSE.ALL_DATA_RD.L3_MISS.REMOTE_HITM",
         "MSRIndex": "0x1a6,0x1a7",
-        "MSRValue": "0x02100007F7",
+        "MSRValue": "0x103FC00491",
         "Offcore": "1",
         "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
         "SampleAfterValue": "100003",
         "UMask": "0x1"
     },
     {
-        "BriefDescription": "OCR.ALL_READS.L3_MISS.REMOTE_HITM OCR.ALL_READS.L3_MISS.REMOTE_HITM",
+        "BriefDescription": "This event is deprecated. Refer to new event OCR.ALL_DATA_RD.L3_MISS.REMOTE_HIT_FORWARD",
         "Counter": "0,1,2,3",
         "CounterHTOff": "0,1,2,3",
+        "Deprecated": "1",
         "EventCode": "0xB7, 0xBB",
-        "EventName": "OCR.ALL_READS.L3_MISS.REMOTE_HITM",
+        "EventName": "OFFCORE_RESPONSE.ALL_DATA_RD.L3_MISS.REMOTE_HIT_FORWARD",
         "MSRIndex": "0x1a6,0x1a7",
-        "MSRValue": "0x103FC007F7",
+        "MSRValue": "0x083FC00491",
         "Offcore": "1",
         "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
         "SampleAfterValue": "100003",
         "UMask": "0x1"
     },
     {
-        "BriefDescription": "This event is deprecated. Refer to new event OCR.ALL_PF_DATA_RD.L3_MISS_REMOTE_HOP1_DRAM.NO_SNOOP_NEEDED",
+        "BriefDescription": "This event is deprecated. Refer to new event OCR.ALL_DATA_RD.L3_MISS.SNOOP_MISS",
         "Counter": "0,1,2,3",
         "CounterHTOff": "0,1,2,3",
         "Deprecated": "1",
         "EventCode": "0xB7, 0xBB",
-        "EventName": "OFFCORE_RESPONSE.ALL_PF_DATA_RD.L3_MISS_REMOTE_HOP1_DRAM.NO_SNOOP_NEEDED",
+        "EventName": "OFFCORE_RESPONSE.ALL_DATA_RD.L3_MISS.SNOOP_MISS",
         "MSRIndex": "0x1a6,0x1a7",
-        "MSRValue": "0x0110000490",
+        "MSRValue": "0x023C000491",
         "Offcore": "1",
         "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
         "SampleAfterValue": "100003",
         "UMask": "0x1"
     },
     {
-        "BriefDescription": "Counts all prefetch (that bring data to LLC only) RFOs OCR.PF_L3_RFO.L3_MISS.ANY_SNOOP OCR.PF_L3_RFO.L3_MISS.ANY_SNOOP",
+        "BriefDescription": "This event is deprecated. Refer to new event OCR.ALL_DATA_RD.L3_MISS.SNOOP_NONE",
         "Counter": "0,1,2,3",
         "CounterHTOff": "0,1,2,3",
+        "Deprecated": "1",
         "EventCode": "0xB7, 0xBB",
-        "EventName": "OCR.PF_L3_RFO.L3_MISS.ANY_SNOOP",
+        "EventName": "OFFCORE_RESPONSE.ALL_DATA_RD.L3_MISS.SNOOP_NONE",
         "MSRIndex": "0x1a6,0x1a7",
-        "MSRValue": "0x3FBC000100",
+        "MSRValue": "0x00BC000491",
         "Offcore": "1",
         "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
         "SampleAfterValue": "100003",
         "UMask": "0x1"
     },
     {
-        "BriefDescription": "This event is deprecated. Refer to new event OCR.DEMAND_CODE_RD.L3_MISS_REMOTE_HOP1_DRAM.HIT_OTHER_CORE_FWD",
+        "BriefDescription": "This event is deprecated. Refer to new event OCR.ALL_DATA_RD.L3_MISS_LOCAL_DRAM.ANY_SNOOP",
         "Counter": "0,1,2,3",
         "CounterHTOff": "0,1,2,3",
         "Deprecated": "1",
         "EventCode": "0xB7, 0xBB",
-        "EventName": "OFFCORE_RESPONSE.DEMAND_CODE_RD.L3_MISS_REMOTE_HOP1_DRAM.HIT_OTHER_CORE_FWD",
+        "EventName": "OFFCORE_RESPONSE.ALL_DATA_RD.L3_MISS_LOCAL_DRAM.ANY_SNOOP",
         "MSRIndex": "0x1a6,0x1a7",
-        "MSRValue": "0x0810000004",
+        "MSRValue": "0x3F84000491",
         "Offcore": "1",
         "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
         "SampleAfterValue": "100003",
         "UMask": "0x1"
     },
     {
-        "BriefDescription": "This event is deprecated. Refer to new event OCR.ALL_PF_DATA_RD.L3_MISS_LOCAL_DRAM.SNOOP_MISS",
+        "BriefDescription": "This event is deprecated. Refer to new event OCR.ALL_DATA_RD.L3_MISS_LOCAL_DRAM.HITM_OTHER_CORE",
         "Counter": "0,1,2,3",
         "CounterHTOff": "0,1,2,3",
         "Deprecated": "1",
         "EventCode": "0xB7, 0xBB",
-        "EventName": "OFFCORE_RESPONSE.ALL_PF_DATA_RD.L3_MISS_LOCAL_DRAM.SNOOP_MISS",
+        "EventName": "OFFCORE_RESPONSE.ALL_DATA_RD.L3_MISS_LOCAL_DRAM.HITM_OTHER_CORE",
         "MSRIndex": "0x1a6,0x1a7",
-        "MSRValue": "0x0204000490",
+        "MSRValue": "0x1004000491",
         "Offcore": "1",
         "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
         "SampleAfterValue": "100003",
         "UMask": "0x1"
     },
     {
-        "BriefDescription": "This event is deprecated. Refer to new event OCR.ALL_DATA_RD.L3_MISS_REMOTE_HOP1_DRAM.ANY_SNOOP",
+        "BriefDescription": "This event is deprecated. Refer to new event OCR.ALL_DATA_RD.L3_MISS_LOCAL_DRAM.HIT_OTHER_CORE_FWD",
         "Counter": "0,1,2,3",
         "CounterHTOff": "0,1,2,3",
         "Deprecated": "1",
         "EventCode": "0xB7, 0xBB",
-        "EventName": "OFFCORE_RESPONSE.ALL_DATA_RD.L3_MISS_REMOTE_HOP1_DRAM.ANY_SNOOP",
+        "EventName": "OFFCORE_RESPONSE.ALL_DATA_RD.L3_MISS_LOCAL_DRAM.HIT_OTHER_CORE_FWD",
         "MSRIndex": "0x1a6,0x1a7",
-        "MSRValue": "0x3F90000491",
+        "MSRValue": "0x0804000491",
         "Offcore": "1",
         "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
         "SampleAfterValue": "100003",
         "UMask": "0x1"
     },
     {
-        "BriefDescription": "OCR.ALL_DATA_RD.L3_MISS_LOCAL_DRAM.HIT_OTHER_CORE_NO_FWD  OCR.ALL_DATA_RD.L3_MISS_LOCAL_DRAM.HIT_OTHER_CORE_NO_FWD",
+        "BriefDescription": "This event is deprecated. Refer to new event OCR.ALL_DATA_RD.L3_MISS_LOCAL_DRAM.HIT_OTHER_CORE_NO_FWD",
         "Counter": "0,1,2,3",
         "CounterHTOff": "0,1,2,3",
+        "Deprecated": "1",
         "EventCode": "0xB7, 0xBB",
-        "EventName": "OCR.ALL_DATA_RD.L3_MISS_LOCAL_DRAM.HIT_OTHER_CORE_NO_FWD",
+        "EventName": "OFFCORE_RESPONSE.ALL_DATA_RD.L3_MISS_LOCAL_DRAM.HIT_OTHER_CORE_NO_FWD",
         "MSRIndex": "0x1a6,0x1a7",
         "MSRValue": "0x0404000491",
         "Offcore": "1",
@@ -5152,2707 +4999,2702 @@
         "UMask": "0x1"
     },
     {
-        "BriefDescription": "This event is deprecated. Refer to new event OCR.DEMAND_RFO.L3_MISS.REMOTE_HITM",
+        "BriefDescription": "This event is deprecated. Refer to new event OCR.ALL_DATA_RD.L3_MISS_LOCAL_DRAM.NO_SNOOP_NEEDED",
         "Counter": "0,1,2,3",
         "CounterHTOff": "0,1,2,3",
         "Deprecated": "1",
         "EventCode": "0xB7, 0xBB",
-        "EventName": "OFFCORE_RESPONSE.DEMAND_RFO.L3_MISS.REMOTE_HITM",
+        "EventName": "OFFCORE_RESPONSE.ALL_DATA_RD.L3_MISS_LOCAL_DRAM.NO_SNOOP_NEEDED",
         "MSRIndex": "0x1a6,0x1a7",
-        "MSRValue": "0x103FC00002",
+        "MSRValue": "0x0104000491",
         "Offcore": "1",
         "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
         "SampleAfterValue": "100003",
         "UMask": "0x1"
     },
     {
-        "BriefDescription": "Number of times an HLE execution aborted due to hardware timer expiration.",
-        "Counter": "0,1,2,3",
-        "CounterHTOff": "0,1,2,3,4,5,6,7",
-        "EventCode": "0xC8",
-        "EventName": "HLE_RETIRED.ABORTED_TIMER",
-        "SampleAfterValue": "2000003",
-        "UMask": "0x10"
-    },
-    {
-        "BriefDescription": "OCR.ALL_RFO.L3_MISS.REMOTE_HITM OCR.ALL_RFO.L3_MISS.REMOTE_HITM",
+        "BriefDescription": "This event is deprecated. Refer to new event OCR.ALL_DATA_RD.L3_MISS_LOCAL_DRAM.SNOOP_MISS",
         "Counter": "0,1,2,3",
         "CounterHTOff": "0,1,2,3",
+        "Deprecated": "1",
         "EventCode": "0xB7, 0xBB",
-        "EventName": "OCR.ALL_RFO.L3_MISS.REMOTE_HITM",
+        "EventName": "OFFCORE_RESPONSE.ALL_DATA_RD.L3_MISS_LOCAL_DRAM.SNOOP_MISS",
         "MSRIndex": "0x1a6,0x1a7",
-        "MSRValue": "0x103FC00122",
+        "MSRValue": "0x0204000491",
         "Offcore": "1",
         "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
         "SampleAfterValue": "100003",
         "UMask": "0x1"
     },
     {
-        "BriefDescription": "OCR.ALL_DATA_RD.L3_MISS_REMOTE_HOP1_DRAM.SNOOP_NONE",
+        "BriefDescription": "This event is deprecated. Refer to new event OCR.ALL_DATA_RD.L3_MISS_LOCAL_DRAM.SNOOP_MISS_OR_NO_FWD",
         "Counter": "0,1,2,3",
         "CounterHTOff": "0,1,2,3",
+        "Deprecated": "1",
         "EventCode": "0xB7, 0xBB",
-        "EventName": "OCR.ALL_DATA_RD.L3_MISS_REMOTE_HOP1_DRAM.SNOOP_NONE",
+        "EventName": "OFFCORE_RESPONSE.ALL_DATA_RD.L3_MISS_LOCAL_DRAM.SNOOP_MISS_OR_NO_FWD",
         "MSRIndex": "0x1a6,0x1a7",
-        "MSRValue": "0x0090000491",
+        "MSRValue": "0x0604000491",
         "Offcore": "1",
         "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
         "SampleAfterValue": "100003",
         "UMask": "0x1"
     },
     {
-        "BriefDescription": "Counts all prefetch (that bring data to LLC only) data reads OCR.PF_L3_DATA_RD.L3_MISS.REMOTE_HITM",
+        "BriefDescription": "This event is deprecated. Refer to new event OCR.ALL_DATA_RD.L3_MISS_LOCAL_DRAM.SNOOP_NONE",
         "Counter": "0,1,2,3",
         "CounterHTOff": "0,1,2,3",
+        "Deprecated": "1",
         "EventCode": "0xB7, 0xBB",
-        "EventName": "OCR.PF_L3_DATA_RD.L3_MISS.REMOTE_HITM",
+        "EventName": "OFFCORE_RESPONSE.ALL_DATA_RD.L3_MISS_LOCAL_DRAM.SNOOP_NONE",
         "MSRIndex": "0x1a6,0x1a7",
-        "MSRValue": "0x103FC00080",
+        "MSRValue": "0x0084000491",
         "Offcore": "1",
         "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
         "SampleAfterValue": "100003",
         "UMask": "0x1"
     },
     {
-        "BriefDescription": "Counts all demand code reads OCR.DEMAND_CODE_RD.L3_MISS_REMOTE_DRAM.SNOOP_MISS_OR_NO_FWD",
+        "BriefDescription": "This event is deprecated. Refer to new event OCR.ALL_DATA_RD.L3_MISS_REMOTE_DRAM.SNOOP_MISS_OR_NO_FWD",
         "Counter": "0,1,2,3",
         "CounterHTOff": "0,1,2,3",
+        "Deprecated": "1",
         "EventCode": "0xB7, 0xBB",
-        "EventName": "OCR.DEMAND_CODE_RD.L3_MISS_REMOTE_DRAM.SNOOP_MISS_OR_NO_FWD",
+        "EventName": "OFFCORE_RESPONSE.ALL_DATA_RD.L3_MISS_REMOTE_DRAM.SNOOP_MISS_OR_NO_FWD",
         "MSRIndex": "0x1a6,0x1a7",
-        "MSRValue": "0x063B800004",
+        "MSRValue": "0x063B800491",
         "Offcore": "1",
         "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
         "SampleAfterValue": "100003",
         "UMask": "0x1"
     },
     {
-        "BriefDescription": "OCR.ALL_PF_DATA_RD.L3_MISS_LOCAL_DRAM.HIT_OTHER_CORE_NO_FWD  OCR.ALL_PF_DATA_RD.L3_MISS_LOCAL_DRAM.HIT_OTHER_CORE_NO_FWD",
+        "BriefDescription": "This event is deprecated. Refer to new event OCR.ALL_DATA_RD.L3_MISS_REMOTE_HOP1_DRAM.ANY_SNOOP",
         "Counter": "0,1,2,3",
         "CounterHTOff": "0,1,2,3",
+        "Deprecated": "1",
         "EventCode": "0xB7, 0xBB",
-        "EventName": "OCR.ALL_PF_DATA_RD.L3_MISS_LOCAL_DRAM.HIT_OTHER_CORE_NO_FWD",
+        "EventName": "OFFCORE_RESPONSE.ALL_DATA_RD.L3_MISS_REMOTE_HOP1_DRAM.ANY_SNOOP",
         "MSRIndex": "0x1a6,0x1a7",
-        "MSRValue": "0x0404000490",
+        "MSRValue": "0x3F90000491",
         "Offcore": "1",
         "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
         "SampleAfterValue": "100003",
         "UMask": "0x1"
     },
     {
-        "BriefDescription": "Counts any other requests  OCR.OTHER.L3_MISS_REMOTE_HOP1_DRAM.ANY_SNOOP",
+        "BriefDescription": "This event is deprecated. Refer to new event OCR.ALL_DATA_RD.L3_MISS_REMOTE_HOP1_DRAM.HITM_OTHER_CORE",
         "Counter": "0,1,2,3",
         "CounterHTOff": "0,1,2,3",
+        "Deprecated": "1",
         "EventCode": "0xB7, 0xBB",
-        "EventName": "OCR.OTHER.L3_MISS_REMOTE_HOP1_DRAM.ANY_SNOOP",
+        "EventName": "OFFCORE_RESPONSE.ALL_DATA_RD.L3_MISS_REMOTE_HOP1_DRAM.HITM_OTHER_CORE",
         "MSRIndex": "0x1a6,0x1a7",
-        "MSRValue": "0x3F90008000",
+        "MSRValue": "0x1010000491",
         "Offcore": "1",
         "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
         "SampleAfterValue": "100003",
         "UMask": "0x1"
     },
     {
-        "BriefDescription": "OCR.ALL_PF_DATA_RD.L3_MISS_REMOTE_HOP1_DRAM.HIT_OTHER_CORE_NO_FWD  OCR.ALL_PF_DATA_RD.L3_MISS_REMOTE_HOP1_DRAM.HIT_OTHER_CORE_NO_FWD",
+        "BriefDescription": "This event is deprecated. Refer to new event OCR.ALL_DATA_RD.L3_MISS_REMOTE_HOP1_DRAM.HIT_OTHER_CORE_FWD",
         "Counter": "0,1,2,3",
         "CounterHTOff": "0,1,2,3",
+        "Deprecated": "1",
         "EventCode": "0xB7, 0xBB",
-        "EventName": "OCR.ALL_PF_DATA_RD.L3_MISS_REMOTE_HOP1_DRAM.HIT_OTHER_CORE_NO_FWD",
+        "EventName": "OFFCORE_RESPONSE.ALL_DATA_RD.L3_MISS_REMOTE_HOP1_DRAM.HIT_OTHER_CORE_FWD",
         "MSRIndex": "0x1a6,0x1a7",
-        "MSRValue": "0x0410000490",
+        "MSRValue": "0x0810000491",
         "Offcore": "1",
         "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
         "SampleAfterValue": "100003",
         "UMask": "0x1"
     },
     {
-        "BriefDescription": "This event is deprecated. Refer to new event OCR.PF_L2_DATA_RD.L3_MISS.ANY_SNOOP",
+        "BriefDescription": "This event is deprecated. Refer to new event OCR.ALL_DATA_RD.L3_MISS_REMOTE_HOP1_DRAM.HIT_OTHER_CORE_NO_FWD",
         "Counter": "0,1,2,3",
         "CounterHTOff": "0,1,2,3",
         "Deprecated": "1",
         "EventCode": "0xB7, 0xBB",
-        "EventName": "OFFCORE_RESPONSE.PF_L2_DATA_RD.L3_MISS.ANY_SNOOP",
+        "EventName": "OFFCORE_RESPONSE.ALL_DATA_RD.L3_MISS_REMOTE_HOP1_DRAM.HIT_OTHER_CORE_NO_FWD",
         "MSRIndex": "0x1a6,0x1a7",
-        "MSRValue": "0x3FBC000010",
+        "MSRValue": "0x0410000491",
         "Offcore": "1",
         "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
         "SampleAfterValue": "100003",
         "UMask": "0x1"
     },
     {
-        "BriefDescription": "This event is deprecated. Refer to new event OCR.PF_L2_RFO.L3_MISS_LOCAL_DRAM.ANY_SNOOP",
+        "BriefDescription": "This event is deprecated. Refer to new event OCR.ALL_DATA_RD.L3_MISS_REMOTE_HOP1_DRAM.NO_SNOOP_NEEDED",
         "Counter": "0,1,2,3",
         "CounterHTOff": "0,1,2,3",
         "Deprecated": "1",
         "EventCode": "0xB7, 0xBB",
-        "EventName": "OFFCORE_RESPONSE.PF_L2_RFO.L3_MISS_LOCAL_DRAM.ANY_SNOOP",
+        "EventName": "OFFCORE_RESPONSE.ALL_DATA_RD.L3_MISS_REMOTE_HOP1_DRAM.NO_SNOOP_NEEDED",
         "MSRIndex": "0x1a6,0x1a7",
-        "MSRValue": "0x3F84000020",
+        "MSRValue": "0x0110000491",
         "Offcore": "1",
         "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
         "SampleAfterValue": "100003",
         "UMask": "0x1"
     },
     {
-        "BriefDescription": "This event is deprecated. Refer to new event OCR.DEMAND_DATA_RD.L3_MISS_LOCAL_DRAM.SNOOP_MISS_OR_NO_FWD",
+        "BriefDescription": "This event is deprecated. Refer to new event OCR.ALL_DATA_RD.L3_MISS_REMOTE_HOP1_DRAM.SNOOP_MISS",
         "Counter": "0,1,2,3",
         "CounterHTOff": "0,1,2,3",
         "Deprecated": "1",
         "EventCode": "0xB7, 0xBB",
-        "EventName": "OFFCORE_RESPONSE.DEMAND_DATA_RD.L3_MISS_LOCAL_DRAM.SNOOP_MISS_OR_NO_FWD",
+        "EventName": "OFFCORE_RESPONSE.ALL_DATA_RD.L3_MISS_REMOTE_HOP1_DRAM.SNOOP_MISS",
         "MSRIndex": "0x1a6,0x1a7",
-        "MSRValue": "0x0604000001",
+        "MSRValue": "0x0210000491",
         "Offcore": "1",
         "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
         "SampleAfterValue": "100003",
         "UMask": "0x1"
     },
     {
-        "BriefDescription": "This event is deprecated. Refer to new event OCR.PF_L3_RFO.L3_MISS_LOCAL_DRAM.SNOOP_NONE",
+        "BriefDescription": "This event is deprecated. Refer to new event OCR.ALL_DATA_RD.L3_MISS_REMOTE_HOP1_DRAM.SNOOP_NONE",
         "Counter": "0,1,2,3",
         "CounterHTOff": "0,1,2,3",
         "Deprecated": "1",
         "EventCode": "0xB7, 0xBB",
-        "EventName": "OFFCORE_RESPONSE.PF_L3_RFO.L3_MISS_LOCAL_DRAM.SNOOP_NONE",
+        "EventName": "OFFCORE_RESPONSE.ALL_DATA_RD.L3_MISS_REMOTE_HOP1_DRAM.SNOOP_NONE",
         "MSRIndex": "0x1a6,0x1a7",
-        "MSRValue": "0x0084000100",
+        "MSRValue": "0x0090000491",
         "Offcore": "1",
         "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
         "SampleAfterValue": "100003",
         "UMask": "0x1"
     },
     {
-        "BriefDescription": "Counts any other requests OCR.OTHER.L3_MISS.SNOOP_MISS",
+        "BriefDescription": "This event is deprecated. Refer to new event OCR.ALL_PF_DATA_RD.L3_MISS.ANY_SNOOP",
         "Counter": "0,1,2,3",
         "CounterHTOff": "0,1,2,3",
+        "Deprecated": "1",
         "EventCode": "0xB7, 0xBB",
-        "EventName": "OCR.OTHER.L3_MISS.SNOOP_MISS",
+        "EventName": "OFFCORE_RESPONSE.ALL_PF_DATA_RD.L3_MISS.ANY_SNOOP",
         "MSRIndex": "0x1a6,0x1a7",
-        "MSRValue": "0x023C008000",
+        "MSRValue": "0x3FBC000490",
         "Offcore": "1",
         "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
         "SampleAfterValue": "100003",
         "UMask": "0x1"
     },
     {
-        "BriefDescription": "This event is deprecated. Refer to new event OCR.PF_L1D_AND_SW.L3_MISS_LOCAL_DRAM.HIT_OTHER_CORE_NO_FWD",
+        "BriefDescription": "This event is deprecated. Refer to new event OCR.ALL_PF_DATA_RD.L3_MISS.HITM_OTHER_CORE",
         "Counter": "0,1,2,3",
         "CounterHTOff": "0,1,2,3",
         "Deprecated": "1",
         "EventCode": "0xB7, 0xBB",
-        "EventName": "OFFCORE_RESPONSE.PF_L1D_AND_SW.L3_MISS_LOCAL_DRAM.HIT_OTHER_CORE_NO_FWD",
+        "EventName": "OFFCORE_RESPONSE.ALL_PF_DATA_RD.L3_MISS.HITM_OTHER_CORE",
         "MSRIndex": "0x1a6,0x1a7",
-        "MSRValue": "0x0404000400",
+        "MSRValue": "0x103C000490",
         "Offcore": "1",
         "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
         "SampleAfterValue": "100003",
         "UMask": "0x1"
     },
     {
-        "BriefDescription": "This event is deprecated. Refer to new event OCR.ALL_DATA_RD.L3_MISS_REMOTE_DRAM.SNOOP_MISS_OR_NO_FWD",
+        "BriefDescription": "This event is deprecated. Refer to new event OCR.ALL_PF_DATA_RD.L3_MISS.HIT_OTHER_CORE_FWD",
         "Counter": "0,1,2,3",
         "CounterHTOff": "0,1,2,3",
         "Deprecated": "1",
         "EventCode": "0xB7, 0xBB",
-        "EventName": "OFFCORE_RESPONSE.ALL_DATA_RD.L3_MISS_REMOTE_DRAM.SNOOP_MISS_OR_NO_FWD",
+        "EventName": "OFFCORE_RESPONSE.ALL_PF_DATA_RD.L3_MISS.HIT_OTHER_CORE_FWD",
         "MSRIndex": "0x1a6,0x1a7",
-        "MSRValue": "0x063B800491",
+        "MSRValue": "0x083C000490",
         "Offcore": "1",
         "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
         "SampleAfterValue": "100003",
         "UMask": "0x1"
     },
     {
-        "BriefDescription": "This event is deprecated. Refer to new event OCR.PF_L2_RFO.L3_MISS_LOCAL_DRAM.SNOOP_NONE",
+        "BriefDescription": "This event is deprecated. Refer to new event OCR.ALL_PF_DATA_RD.L3_MISS.HIT_OTHER_CORE_NO_FWD",
         "Counter": "0,1,2,3",
         "CounterHTOff": "0,1,2,3",
         "Deprecated": "1",
         "EventCode": "0xB7, 0xBB",
-        "EventName": "OFFCORE_RESPONSE.PF_L2_RFO.L3_MISS_LOCAL_DRAM.SNOOP_NONE",
+        "EventName": "OFFCORE_RESPONSE.ALL_PF_DATA_RD.L3_MISS.HIT_OTHER_CORE_NO_FWD",
         "MSRIndex": "0x1a6,0x1a7",
-        "MSRValue": "0x0084000020",
+        "MSRValue": "0x043C000490",
         "Offcore": "1",
         "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
         "SampleAfterValue": "100003",
         "UMask": "0x1"
     },
     {
-        "BriefDescription": "Counts any other requests OCR.OTHER.L3_MISS_LOCAL_DRAM.SNOOP_MISS_OR_NO_FWD",
+        "BriefDescription": "This event is deprecated. Refer to new event OCR.ALL_PF_DATA_RD.L3_MISS.NO_SNOOP_NEEDED",
         "Counter": "0,1,2,3",
         "CounterHTOff": "0,1,2,3",
+        "Deprecated": "1",
         "EventCode": "0xB7, 0xBB",
-        "EventName": "OCR.OTHER.L3_MISS_LOCAL_DRAM.SNOOP_MISS_OR_NO_FWD",
+        "EventName": "OFFCORE_RESPONSE.ALL_PF_DATA_RD.L3_MISS.NO_SNOOP_NEEDED",
         "MSRIndex": "0x1a6,0x1a7",
-        "MSRValue": "0x0604008000",
+        "MSRValue": "0x013C000490",
         "Offcore": "1",
         "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
         "SampleAfterValue": "100003",
         "UMask": "0x1"
     },
     {
-        "BriefDescription": "Counts all prefetch (that bring data to LLC only) RFOs  OCR.PF_L3_RFO.L3_MISS_REMOTE_HOP1_DRAM.NO_SNOOP_NEEDED",
+        "BriefDescription": "This event is deprecated. Refer to new event OCR.ALL_PF_DATA_RD.L3_MISS.REMOTE_HITM",
         "Counter": "0,1,2,3",
         "CounterHTOff": "0,1,2,3",
+        "Deprecated": "1",
         "EventCode": "0xB7, 0xBB",
-        "EventName": "OCR.PF_L3_RFO.L3_MISS_REMOTE_HOP1_DRAM.NO_SNOOP_NEEDED",
+        "EventName": "OFFCORE_RESPONSE.ALL_PF_DATA_RD.L3_MISS.REMOTE_HITM",
         "MSRIndex": "0x1a6,0x1a7",
-        "MSRValue": "0x0110000100",
+        "MSRValue": "0x103FC00490",
         "Offcore": "1",
         "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
         "SampleAfterValue": "100003",
         "UMask": "0x1"
     },
     {
-        "BriefDescription": "Counts randomly selected loads when the latency from first dispatch to completion is greater than 32 cycles.",
-        "Counter": "0,1,2,3",
-        "CounterHTOff": "0,1,2,3",
-        "Data_LA": "1",
-        "EventCode": "0xcd",
-        "EventName": "MEM_TRANS_RETIRED.LOAD_LATENCY_GT_32",
-        "MSRIndex": "0x3F6",
-        "MSRValue": "0x20",
-        "PEBS": "2",
-        "PublicDescription": "Counts randomly selected loads when the latency from first dispatch to completion is greater than 32 cycles.  Reported latency may be longer than just the memory latency.",
-        "SampleAfterValue": "100007",
-        "TakenAlone": "1",
-        "UMask": "0x1"
-    },
-    {
-        "BriefDescription": "This event is deprecated. Refer to new event OCR.PF_L3_DATA_RD.L3_MISS.HIT_OTHER_CORE_FWD",
+        "BriefDescription": "This event is deprecated. Refer to new event OCR.ALL_PF_DATA_RD.L3_MISS.REMOTE_HIT_FORWARD",
         "Counter": "0,1,2,3",
         "CounterHTOff": "0,1,2,3",
         "Deprecated": "1",
         "EventCode": "0xB7, 0xBB",
-        "EventName": "OFFCORE_RESPONSE.PF_L3_DATA_RD.L3_MISS.HIT_OTHER_CORE_FWD",
+        "EventName": "OFFCORE_RESPONSE.ALL_PF_DATA_RD.L3_MISS.REMOTE_HIT_FORWARD",
         "MSRIndex": "0x1a6,0x1a7",
-        "MSRValue": "0x083C000080",
+        "MSRValue": "0x083FC00490",
         "Offcore": "1",
         "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
         "SampleAfterValue": "100003",
         "UMask": "0x1"
     },
     {
-        "BriefDescription": "OCR.ALL_RFO.L3_MISS_REMOTE_HOP1_DRAM.NO_SNOOP_NEEDED  OCR.ALL_RFO.L3_MISS_REMOTE_HOP1_DRAM.NO_SNOOP_NEEDED",
+        "BriefDescription": "This event is deprecated. Refer to new event OCR.ALL_PF_DATA_RD.L3_MISS.SNOOP_MISS",
         "Counter": "0,1,2,3",
         "CounterHTOff": "0,1,2,3",
+        "Deprecated": "1",
         "EventCode": "0xB7, 0xBB",
-        "EventName": "OCR.ALL_RFO.L3_MISS_REMOTE_HOP1_DRAM.NO_SNOOP_NEEDED",
+        "EventName": "OFFCORE_RESPONSE.ALL_PF_DATA_RD.L3_MISS.SNOOP_MISS",
         "MSRIndex": "0x1a6,0x1a7",
-        "MSRValue": "0x0110000122",
+        "MSRValue": "0x023C000490",
         "Offcore": "1",
         "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
         "SampleAfterValue": "100003",
         "UMask": "0x1"
     },
     {
-        "BriefDescription": "Counts all prefetch (that bring data to L2) RFOs  OCR.PF_L2_RFO.L3_MISS_REMOTE_HOP1_DRAM.HIT_OTHER_CORE_FWD",
+        "BriefDescription": "This event is deprecated. Refer to new event OCR.ALL_PF_DATA_RD.L3_MISS.SNOOP_NONE",
         "Counter": "0,1,2,3",
         "CounterHTOff": "0,1,2,3",
+        "Deprecated": "1",
         "EventCode": "0xB7, 0xBB",
-        "EventName": "OCR.PF_L2_RFO.L3_MISS_REMOTE_HOP1_DRAM.HIT_OTHER_CORE_FWD",
+        "EventName": "OFFCORE_RESPONSE.ALL_PF_DATA_RD.L3_MISS.SNOOP_NONE",
         "MSRIndex": "0x1a6,0x1a7",
-        "MSRValue": "0x0810000020",
+        "MSRValue": "0x00BC000490",
         "Offcore": "1",
         "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
         "SampleAfterValue": "100003",
         "UMask": "0x1"
     },
     {
-        "BriefDescription": "Counts all prefetch (that bring data to LLC only) data reads OCR.PF_L3_DATA_RD.L3_MISS.REMOTE_HIT_FORWARD",
+        "BriefDescription": "This event is deprecated. Refer to new event OCR.ALL_PF_DATA_RD.L3_MISS_LOCAL_DRAM.ANY_SNOOP",
         "Counter": "0,1,2,3",
         "CounterHTOff": "0,1,2,3",
+        "Deprecated": "1",
         "EventCode": "0xB7, 0xBB",
-        "EventName": "OCR.PF_L3_DATA_RD.L3_MISS.REMOTE_HIT_FORWARD",
+        "EventName": "OFFCORE_RESPONSE.ALL_PF_DATA_RD.L3_MISS_LOCAL_DRAM.ANY_SNOOP",
         "MSRIndex": "0x1a6,0x1a7",
-        "MSRValue": "0x083FC00080",
+        "MSRValue": "0x3F84000490",
         "Offcore": "1",
         "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
         "SampleAfterValue": "100003",
         "UMask": "0x1"
     },
     {
-        "BriefDescription": "Counts all demand code reads",
+        "BriefDescription": "This event is deprecated. Refer to new event OCR.ALL_PF_DATA_RD.L3_MISS_LOCAL_DRAM.HITM_OTHER_CORE",
         "Counter": "0,1,2,3",
         "CounterHTOff": "0,1,2,3",
+        "Deprecated": "1",
         "EventCode": "0xB7, 0xBB",
-        "EventName": "OCR.DEMAND_CODE_RD.L3_MISS_REMOTE_HOP1_DRAM.SNOOP_MISS",
+        "EventName": "OFFCORE_RESPONSE.ALL_PF_DATA_RD.L3_MISS_LOCAL_DRAM.HITM_OTHER_CORE",
         "MSRIndex": "0x1a6,0x1a7",
-        "MSRValue": "0x0210000004",
+        "MSRValue": "0x1004000490",
         "Offcore": "1",
         "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
         "SampleAfterValue": "100003",
         "UMask": "0x1"
     },
     {
-        "BriefDescription": "This event is deprecated. Refer to new event OCR.PF_L3_DATA_RD.L3_MISS_LOCAL_DRAM.NO_SNOOP_NEEDED",
+        "BriefDescription": "This event is deprecated. Refer to new event OCR.ALL_PF_DATA_RD.L3_MISS_LOCAL_DRAM.HIT_OTHER_CORE_FWD",
         "Counter": "0,1,2,3",
         "CounterHTOff": "0,1,2,3",
         "Deprecated": "1",
         "EventCode": "0xB7, 0xBB",
-        "EventName": "OFFCORE_RESPONSE.PF_L3_DATA_RD.L3_MISS_LOCAL_DRAM.NO_SNOOP_NEEDED",
+        "EventName": "OFFCORE_RESPONSE.ALL_PF_DATA_RD.L3_MISS_LOCAL_DRAM.HIT_OTHER_CORE_FWD",
         "MSRIndex": "0x1a6,0x1a7",
-        "MSRValue": "0x0104000080",
+        "MSRValue": "0x0804000490",
         "Offcore": "1",
         "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
         "SampleAfterValue": "100003",
         "UMask": "0x1"
     },
     {
-        "BriefDescription": "OCR.ALL_RFO.L3_MISS_REMOTE_HOP1_DRAM.SNOOP_MISS",
+        "BriefDescription": "This event is deprecated. Refer to new event OCR.ALL_PF_DATA_RD.L3_MISS_LOCAL_DRAM.HIT_OTHER_CORE_NO_FWD",
         "Counter": "0,1,2,3",
         "CounterHTOff": "0,1,2,3",
+        "Deprecated": "1",
         "EventCode": "0xB7, 0xBB",
-        "EventName": "OCR.ALL_RFO.L3_MISS_REMOTE_HOP1_DRAM.SNOOP_MISS",
+        "EventName": "OFFCORE_RESPONSE.ALL_PF_DATA_RD.L3_MISS_LOCAL_DRAM.HIT_OTHER_CORE_NO_FWD",
         "MSRIndex": "0x1a6,0x1a7",
-        "MSRValue": "0x0210000122",
+        "MSRValue": "0x0404000490",
         "Offcore": "1",
         "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
         "SampleAfterValue": "100003",
         "UMask": "0x1"
     },
     {
-        "BriefDescription": "Counts all prefetch (that bring data to LLC only) RFOs OCR.PF_L3_RFO.L3_MISS.SNOOP_NONE",
+        "BriefDescription": "This event is deprecated. Refer to new event OCR.ALL_PF_DATA_RD.L3_MISS_LOCAL_DRAM.NO_SNOOP_NEEDED",
         "Counter": "0,1,2,3",
         "CounterHTOff": "0,1,2,3",
+        "Deprecated": "1",
         "EventCode": "0xB7, 0xBB",
-        "EventName": "OCR.PF_L3_RFO.L3_MISS.SNOOP_NONE",
+        "EventName": "OFFCORE_RESPONSE.ALL_PF_DATA_RD.L3_MISS_LOCAL_DRAM.NO_SNOOP_NEEDED",
         "MSRIndex": "0x1a6,0x1a7",
-        "MSRValue": "0x00BC000100",
+        "MSRValue": "0x0104000490",
         "Offcore": "1",
         "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
         "SampleAfterValue": "100003",
         "UMask": "0x1"
     },
     {
-        "BriefDescription": "Counts all demand data writes (RFOs)  OCR.DEMAND_RFO.L3_MISS_REMOTE_HOP1_DRAM.HIT_OTHER_CORE_FWD",
+        "BriefDescription": "This event is deprecated. Refer to new event OCR.ALL_PF_DATA_RD.L3_MISS_LOCAL_DRAM.SNOOP_MISS",
         "Counter": "0,1,2,3",
         "CounterHTOff": "0,1,2,3",
+        "Deprecated": "1",
         "EventCode": "0xB7, 0xBB",
-        "EventName": "OCR.DEMAND_RFO.L3_MISS_REMOTE_HOP1_DRAM.HIT_OTHER_CORE_FWD",
+        "EventName": "OFFCORE_RESPONSE.ALL_PF_DATA_RD.L3_MISS_LOCAL_DRAM.SNOOP_MISS",
         "MSRIndex": "0x1a6,0x1a7",
-        "MSRValue": "0x0810000002",
+        "MSRValue": "0x0204000490",
         "Offcore": "1",
         "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
         "SampleAfterValue": "100003",
         "UMask": "0x1"
     },
     {
-        "BriefDescription": "Counts demand data reads OCR.DEMAND_DATA_RD.L3_MISS.SNOOP_MISS",
+        "BriefDescription": "This event is deprecated. Refer to new event OCR.ALL_PF_DATA_RD.L3_MISS_LOCAL_DRAM.SNOOP_MISS_OR_NO_FWD",
         "Counter": "0,1,2,3",
         "CounterHTOff": "0,1,2,3",
+        "Deprecated": "1",
         "EventCode": "0xB7, 0xBB",
-        "EventName": "OCR.DEMAND_DATA_RD.L3_MISS.SNOOP_MISS",
+        "EventName": "OFFCORE_RESPONSE.ALL_PF_DATA_RD.L3_MISS_LOCAL_DRAM.SNOOP_MISS_OR_NO_FWD",
         "MSRIndex": "0x1a6,0x1a7",
-        "MSRValue": "0x023C000001",
+        "MSRValue": "0x0604000490",
         "Offcore": "1",
         "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
         "SampleAfterValue": "100003",
         "UMask": "0x1"
     },
     {
-        "BriefDescription": "Number of times an HLE execution aborted due to various memory events (e.g., read/write capacity and conflicts).",
-        "Counter": "0,1,2,3",
-        "CounterHTOff": "0,1,2,3,4,5,6,7",
-        "EventCode": "0xC8",
-        "EventName": "HLE_RETIRED.ABORTED_MEM",
-        "SampleAfterValue": "2000003",
-        "UMask": "0x8"
-    },
-    {
-        "BriefDescription": "This event is deprecated. Refer to new event OCR.PF_L3_DATA_RD.L3_MISS_REMOTE_HOP1_DRAM.HIT_OTHER_CORE_NO_FWD",
+        "BriefDescription": "This event is deprecated. Refer to new event OCR.ALL_PF_DATA_RD.L3_MISS_LOCAL_DRAM.SNOOP_NONE",
         "Counter": "0,1,2,3",
         "CounterHTOff": "0,1,2,3",
         "Deprecated": "1",
         "EventCode": "0xB7, 0xBB",
-        "EventName": "OFFCORE_RESPONSE.PF_L3_DATA_RD.L3_MISS_REMOTE_HOP1_DRAM.HIT_OTHER_CORE_NO_FWD",
+        "EventName": "OFFCORE_RESPONSE.ALL_PF_DATA_RD.L3_MISS_LOCAL_DRAM.SNOOP_NONE",
         "MSRIndex": "0x1a6,0x1a7",
-        "MSRValue": "0x0410000080",
+        "MSRValue": "0x0084000490",
         "Offcore": "1",
         "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
         "SampleAfterValue": "100003",
         "UMask": "0x1"
     },
     {
-        "BriefDescription": "OCR.ALL_RFO.L3_MISS.SNOOP_NONE OCR.ALL_RFO.L3_MISS.SNOOP_NONE",
+        "BriefDescription": "This event is deprecated. Refer to new event OCR.ALL_PF_DATA_RD.L3_MISS_REMOTE_DRAM.SNOOP_MISS_OR_NO_FWD",
         "Counter": "0,1,2,3",
         "CounterHTOff": "0,1,2,3",
+        "Deprecated": "1",
         "EventCode": "0xB7, 0xBB",
-        "EventName": "OCR.ALL_RFO.L3_MISS.SNOOP_NONE",
+        "EventName": "OFFCORE_RESPONSE.ALL_PF_DATA_RD.L3_MISS_REMOTE_DRAM.SNOOP_MISS_OR_NO_FWD",
         "MSRIndex": "0x1a6,0x1a7",
-        "MSRValue": "0x00BC000122",
+        "MSRValue": "0x063B800490",
         "Offcore": "1",
         "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
         "SampleAfterValue": "100003",
         "UMask": "0x1"
     },
     {
-        "BriefDescription": "OCR.ALL_PF_RFO.L3_MISS_REMOTE_DRAM.SNOOP_MISS_OR_NO_FWD OCR.ALL_PF_RFO.L3_MISS_REMOTE_DRAM.SNOOP_MISS_OR_NO_FWD",
+        "BriefDescription": "This event is deprecated. Refer to new event OCR.ALL_PF_DATA_RD.L3_MISS_REMOTE_HOP1_DRAM.ANY_SNOOP",
         "Counter": "0,1,2,3",
         "CounterHTOff": "0,1,2,3",
+        "Deprecated": "1",
         "EventCode": "0xB7, 0xBB",
-        "EventName": "OCR.ALL_PF_RFO.L3_MISS_REMOTE_DRAM.SNOOP_MISS_OR_NO_FWD",
+        "EventName": "OFFCORE_RESPONSE.ALL_PF_DATA_RD.L3_MISS_REMOTE_HOP1_DRAM.ANY_SNOOP",
         "MSRIndex": "0x1a6,0x1a7",
-        "MSRValue": "0x063B800120",
+        "MSRValue": "0x3F90000490",
         "Offcore": "1",
         "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
         "SampleAfterValue": "100003",
         "UMask": "0x1"
     },
     {
-        "BriefDescription": "This event is deprecated. Refer to new event OCR.OTHER.L3_MISS.REMOTE_HITM",
+        "BriefDescription": "This event is deprecated. Refer to new event OCR.ALL_PF_DATA_RD.L3_MISS_REMOTE_HOP1_DRAM.HITM_OTHER_CORE",
         "Counter": "0,1,2,3",
         "CounterHTOff": "0,1,2,3",
         "Deprecated": "1",
         "EventCode": "0xB7, 0xBB",
-        "EventName": "OFFCORE_RESPONSE.OTHER.L3_MISS.REMOTE_HITM",
+        "EventName": "OFFCORE_RESPONSE.ALL_PF_DATA_RD.L3_MISS_REMOTE_HOP1_DRAM.HITM_OTHER_CORE",
         "MSRIndex": "0x1a6,0x1a7",
-        "MSRValue": "0x103FC08000",
+        "MSRValue": "0x1010000490",
         "Offcore": "1",
         "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
         "SampleAfterValue": "100003",
         "UMask": "0x1"
     },
     {
-        "BriefDescription": "This event is deprecated. Refer to new event OCR.ALL_PF_DATA_RD.L3_MISS_REMOTE_HOP1_DRAM.ANY_SNOOP",
+        "BriefDescription": "This event is deprecated. Refer to new event OCR.ALL_PF_DATA_RD.L3_MISS_REMOTE_HOP1_DRAM.HIT_OTHER_CORE_FWD",
         "Counter": "0,1,2,3",
         "CounterHTOff": "0,1,2,3",
         "Deprecated": "1",
         "EventCode": "0xB7, 0xBB",
-        "EventName": "OFFCORE_RESPONSE.ALL_PF_DATA_RD.L3_MISS_REMOTE_HOP1_DRAM.ANY_SNOOP",
+        "EventName": "OFFCORE_RESPONSE.ALL_PF_DATA_RD.L3_MISS_REMOTE_HOP1_DRAM.HIT_OTHER_CORE_FWD",
         "MSRIndex": "0x1a6,0x1a7",
-        "MSRValue": "0x3F90000490",
+        "MSRValue": "0x0810000490",
         "Offcore": "1",
         "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
         "SampleAfterValue": "100003",
         "UMask": "0x1"
     },
     {
-        "BriefDescription": "This event is deprecated. Refer to new event OCR.DEMAND_CODE_RD.L3_MISS_REMOTE_HOP1_DRAM.HIT_OTHER_CORE_NO_FWD",
+        "BriefDescription": "This event is deprecated. Refer to new event OCR.ALL_PF_DATA_RD.L3_MISS_REMOTE_HOP1_DRAM.HIT_OTHER_CORE_NO_FWD",
         "Counter": "0,1,2,3",
         "CounterHTOff": "0,1,2,3",
         "Deprecated": "1",
         "EventCode": "0xB7, 0xBB",
-        "EventName": "OFFCORE_RESPONSE.DEMAND_CODE_RD.L3_MISS_REMOTE_HOP1_DRAM.HIT_OTHER_CORE_NO_FWD",
+        "EventName": "OFFCORE_RESPONSE.ALL_PF_DATA_RD.L3_MISS_REMOTE_HOP1_DRAM.HIT_OTHER_CORE_NO_FWD",
         "MSRIndex": "0x1a6,0x1a7",
-        "MSRValue": "0x0410000004",
+        "MSRValue": "0x0410000490",
         "Offcore": "1",
         "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
         "SampleAfterValue": "100003",
         "UMask": "0x1"
     },
     {
-        "BriefDescription": "This event is deprecated. Refer to new event OCR.DEMAND_RFO.L3_MISS.SNOOP_MISS",
+        "BriefDescription": "This event is deprecated. Refer to new event OCR.ALL_PF_DATA_RD.L3_MISS_REMOTE_HOP1_DRAM.NO_SNOOP_NEEDED",
         "Counter": "0,1,2,3",
         "CounterHTOff": "0,1,2,3",
         "Deprecated": "1",
         "EventCode": "0xB7, 0xBB",
-        "EventName": "OFFCORE_RESPONSE.DEMAND_RFO.L3_MISS.SNOOP_MISS",
+        "EventName": "OFFCORE_RESPONSE.ALL_PF_DATA_RD.L3_MISS_REMOTE_HOP1_DRAM.NO_SNOOP_NEEDED",
         "MSRIndex": "0x1a6,0x1a7",
-        "MSRValue": "0x023C000002",
+        "MSRValue": "0x0110000490",
         "Offcore": "1",
         "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
         "SampleAfterValue": "100003",
         "UMask": "0x1"
     },
     {
-        "BriefDescription": "Counts prefetch (that bring data to L2) data reads  OCR.PF_L2_DATA_RD.L3_MISS_LOCAL_DRAM.HIT_OTHER_CORE_FWD",
+        "BriefDescription": "This event is deprecated. Refer to new event OCR.ALL_PF_DATA_RD.L3_MISS_REMOTE_HOP1_DRAM.SNOOP_MISS",
         "Counter": "0,1,2,3",
         "CounterHTOff": "0,1,2,3",
+        "Deprecated": "1",
         "EventCode": "0xB7, 0xBB",
-        "EventName": "OCR.PF_L2_DATA_RD.L3_MISS_LOCAL_DRAM.HIT_OTHER_CORE_FWD",
+        "EventName": "OFFCORE_RESPONSE.ALL_PF_DATA_RD.L3_MISS_REMOTE_HOP1_DRAM.SNOOP_MISS",
         "MSRIndex": "0x1a6,0x1a7",
-        "MSRValue": "0x0804000010",
+        "MSRValue": "0x0210000490",
         "Offcore": "1",
         "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
         "SampleAfterValue": "100003",
         "UMask": "0x1"
     },
     {
-        "BriefDescription": "This event is deprecated. Refer to new event OCR.ALL_DATA_RD.L3_MISS_LOCAL_DRAM.NO_SNOOP_NEEDED",
+        "BriefDescription": "This event is deprecated. Refer to new event OCR.ALL_PF_DATA_RD.L3_MISS_REMOTE_HOP1_DRAM.SNOOP_NONE",
         "Counter": "0,1,2,3",
         "CounterHTOff": "0,1,2,3",
         "Deprecated": "1",
         "EventCode": "0xB7, 0xBB",
-        "EventName": "OFFCORE_RESPONSE.ALL_DATA_RD.L3_MISS_LOCAL_DRAM.NO_SNOOP_NEEDED",
+        "EventName": "OFFCORE_RESPONSE.ALL_PF_DATA_RD.L3_MISS_REMOTE_HOP1_DRAM.SNOOP_NONE",
         "MSRIndex": "0x1a6,0x1a7",
-        "MSRValue": "0x0104000491",
+        "MSRValue": "0x0090000490",
         "Offcore": "1",
         "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
         "SampleAfterValue": "100003",
         "UMask": "0x1"
     },
     {
-        "BriefDescription": "This event is deprecated. Refer to new event OCR.PF_L2_DATA_RD.L3_MISS_REMOTE_HOP1_DRAM.SNOOP_NONE",
+        "BriefDescription": "This event is deprecated. Refer to new event OCR.ALL_PF_RFO.L3_MISS.ANY_SNOOP",
         "Counter": "0,1,2,3",
         "CounterHTOff": "0,1,2,3",
         "Deprecated": "1",
         "EventCode": "0xB7, 0xBB",
-        "EventName": "OFFCORE_RESPONSE.PF_L2_DATA_RD.L3_MISS_REMOTE_HOP1_DRAM.SNOOP_NONE",
+        "EventName": "OFFCORE_RESPONSE.ALL_PF_RFO.L3_MISS.ANY_SNOOP",
         "MSRIndex": "0x1a6,0x1a7",
-        "MSRValue": "0x0090000010",
+        "MSRValue": "0x3FBC000120",
         "Offcore": "1",
         "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
         "SampleAfterValue": "100003",
         "UMask": "0x1"
     },
     {
-        "BriefDescription": "Counts any other requests OCR.OTHER.L3_MISS.HIT_OTHER_CORE_NO_FWD OCR.OTHER.L3_MISS.HIT_OTHER_CORE_NO_FWD",
+        "BriefDescription": "This event is deprecated. Refer to new event OCR.ALL_PF_RFO.L3_MISS.HITM_OTHER_CORE",
         "Counter": "0,1,2,3",
         "CounterHTOff": "0,1,2,3",
+        "Deprecated": "1",
         "EventCode": "0xB7, 0xBB",
-        "EventName": "OCR.OTHER.L3_MISS.HIT_OTHER_CORE_NO_FWD",
+        "EventName": "OFFCORE_RESPONSE.ALL_PF_RFO.L3_MISS.HITM_OTHER_CORE",
         "MSRIndex": "0x1a6,0x1a7",
-        "MSRValue": "0x043C008000",
+        "MSRValue": "0x103C000120",
         "Offcore": "1",
         "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
         "SampleAfterValue": "100003",
         "UMask": "0x1"
     },
     {
-        "BriefDescription": "Counts any other requests",
+        "BriefDescription": "This event is deprecated. Refer to new event OCR.ALL_PF_RFO.L3_MISS.HIT_OTHER_CORE_FWD",
         "Counter": "0,1,2,3",
         "CounterHTOff": "0,1,2,3",
+        "Deprecated": "1",
         "EventCode": "0xB7, 0xBB",
-        "EventName": "OCR.OTHER.L3_MISS_REMOTE_HOP1_DRAM.SNOOP_MISS",
+        "EventName": "OFFCORE_RESPONSE.ALL_PF_RFO.L3_MISS.HIT_OTHER_CORE_FWD",
         "MSRIndex": "0x1a6,0x1a7",
-        "MSRValue": "0x0210008000",
+        "MSRValue": "0x083C000120",
         "Offcore": "1",
         "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
         "SampleAfterValue": "100003",
         "UMask": "0x1"
     },
     {
-        "BriefDescription": "Counts all prefetch (that bring data to LLC only) data reads",
+        "BriefDescription": "This event is deprecated. Refer to new event OCR.ALL_PF_RFO.L3_MISS.HIT_OTHER_CORE_NO_FWD",
         "Counter": "0,1,2,3",
         "CounterHTOff": "0,1,2,3",
+        "Deprecated": "1",
         "EventCode": "0xB7, 0xBB",
-        "EventName": "OCR.PF_L3_DATA_RD.L3_MISS_LOCAL_DRAM.SNOOP_NONE",
+        "EventName": "OFFCORE_RESPONSE.ALL_PF_RFO.L3_MISS.HIT_OTHER_CORE_NO_FWD",
         "MSRIndex": "0x1a6,0x1a7",
-        "MSRValue": "0x0084000080",
+        "MSRValue": "0x043C000120",
         "Offcore": "1",
         "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
         "SampleAfterValue": "100003",
         "UMask": "0x1"
     },
     {
-        "BriefDescription": "Counts demand data reads OCR.DEMAND_DATA_RD.L3_MISS.REMOTE_HIT_FORWARD",
+        "BriefDescription": "This event is deprecated. Refer to new event OCR.ALL_PF_RFO.L3_MISS.NO_SNOOP_NEEDED",
         "Counter": "0,1,2,3",
         "CounterHTOff": "0,1,2,3",
+        "Deprecated": "1",
         "EventCode": "0xB7, 0xBB",
-        "EventName": "OCR.DEMAND_DATA_RD.L3_MISS.REMOTE_HIT_FORWARD",
+        "EventName": "OFFCORE_RESPONSE.ALL_PF_RFO.L3_MISS.NO_SNOOP_NEEDED",
         "MSRIndex": "0x1a6,0x1a7",
-        "MSRValue": "0x083FC00001",
+        "MSRValue": "0x013C000120",
         "Offcore": "1",
         "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
         "SampleAfterValue": "100003",
         "UMask": "0x1"
     },
     {
-        "BriefDescription": "OCR.ALL_READS.L3_MISS.SNOOP_NONE OCR.ALL_READS.L3_MISS.SNOOP_NONE",
+        "BriefDescription": "This event is deprecated. Refer to new event OCR.ALL_PF_RFO.L3_MISS.REMOTE_HITM",
         "Counter": "0,1,2,3",
         "CounterHTOff": "0,1,2,3",
+        "Deprecated": "1",
         "EventCode": "0xB7, 0xBB",
-        "EventName": "OCR.ALL_READS.L3_MISS.SNOOP_NONE",
+        "EventName": "OFFCORE_RESPONSE.ALL_PF_RFO.L3_MISS.REMOTE_HITM",
         "MSRIndex": "0x1a6,0x1a7",
-        "MSRValue": "0x00BC0007F7",
+        "MSRValue": "0x103FC00120",
         "Offcore": "1",
         "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
         "SampleAfterValue": "100003",
         "UMask": "0x1"
     },
     {
-        "BriefDescription": "This event is deprecated. Refer to new event OCR.ALL_DATA_RD.L3_MISS_REMOTE_HOP1_DRAM.NO_SNOOP_NEEDED",
+        "BriefDescription": "This event is deprecated. Refer to new event OCR.ALL_PF_RFO.L3_MISS.REMOTE_HIT_FORWARD",
         "Counter": "0,1,2,3",
         "CounterHTOff": "0,1,2,3",
         "Deprecated": "1",
         "EventCode": "0xB7, 0xBB",
-        "EventName": "OFFCORE_RESPONSE.ALL_DATA_RD.L3_MISS_REMOTE_HOP1_DRAM.NO_SNOOP_NEEDED",
+        "EventName": "OFFCORE_RESPONSE.ALL_PF_RFO.L3_MISS.REMOTE_HIT_FORWARD",
         "MSRIndex": "0x1a6,0x1a7",
-        "MSRValue": "0x0110000491",
+        "MSRValue": "0x083FC00120",
         "Offcore": "1",
         "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
         "SampleAfterValue": "100003",
         "UMask": "0x1"
     },
     {
-        "BriefDescription": "This event is deprecated. Refer to new event OCR.ALL_READS.L3_MISS.ANY_SNOOP",
+        "BriefDescription": "This event is deprecated. Refer to new event OCR.ALL_PF_RFO.L3_MISS.SNOOP_MISS",
         "Counter": "0,1,2,3",
         "CounterHTOff": "0,1,2,3",
         "Deprecated": "1",
         "EventCode": "0xB7, 0xBB",
-        "EventName": "OFFCORE_RESPONSE.ALL_READS.L3_MISS.ANY_SNOOP",
+        "EventName": "OFFCORE_RESPONSE.ALL_PF_RFO.L3_MISS.SNOOP_MISS",
         "MSRIndex": "0x1a6,0x1a7",
-        "MSRValue": "0x3FBC0007F7",
+        "MSRValue": "0x023C000120",
         "Offcore": "1",
         "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
         "SampleAfterValue": "100003",
         "UMask": "0x1"
     },
     {
-        "BriefDescription": "This event is deprecated. Refer to new event OCR.ALL_DATA_RD.L3_MISS_REMOTE_HOP1_DRAM.HIT_OTHER_CORE_NO_FWD",
+        "BriefDescription": "This event is deprecated. Refer to new event OCR.ALL_PF_RFO.L3_MISS.SNOOP_NONE",
         "Counter": "0,1,2,3",
         "CounterHTOff": "0,1,2,3",
         "Deprecated": "1",
         "EventCode": "0xB7, 0xBB",
-        "EventName": "OFFCORE_RESPONSE.ALL_DATA_RD.L3_MISS_REMOTE_HOP1_DRAM.HIT_OTHER_CORE_NO_FWD",
+        "EventName": "OFFCORE_RESPONSE.ALL_PF_RFO.L3_MISS.SNOOP_NONE",
         "MSRIndex": "0x1a6,0x1a7",
-        "MSRValue": "0x0410000491",
+        "MSRValue": "0x00BC000120",
         "Offcore": "1",
         "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
         "SampleAfterValue": "100003",
         "UMask": "0x1"
     },
     {
-        "BriefDescription": "This event is deprecated. Refer to new event OCR.DEMAND_DATA_RD.L3_MISS.REMOTE_HITM",
+        "BriefDescription": "This event is deprecated. Refer to new event OCR.ALL_PF_RFO.L3_MISS_LOCAL_DRAM.ANY_SNOOP",
         "Counter": "0,1,2,3",
         "CounterHTOff": "0,1,2,3",
         "Deprecated": "1",
         "EventCode": "0xB7, 0xBB",
-        "EventName": "OFFCORE_RESPONSE.DEMAND_DATA_RD.L3_MISS.REMOTE_HITM",
+        "EventName": "OFFCORE_RESPONSE.ALL_PF_RFO.L3_MISS_LOCAL_DRAM.ANY_SNOOP",
         "MSRIndex": "0x1a6,0x1a7",
-        "MSRValue": "0x103FC00001",
+        "MSRValue": "0x3F84000120",
         "Offcore": "1",
         "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
         "SampleAfterValue": "100003",
         "UMask": "0x1"
     },
     {
-        "BriefDescription": "Counts all prefetch (that bring data to L2) RFOs  OCR.PF_L2_RFO.L3_MISS_REMOTE_HOP1_DRAM.NO_SNOOP_NEEDED",
+        "BriefDescription": "This event is deprecated. Refer to new event OCR.ALL_PF_RFO.L3_MISS_LOCAL_DRAM.HITM_OTHER_CORE",
         "Counter": "0,1,2,3",
         "CounterHTOff": "0,1,2,3",
+        "Deprecated": "1",
         "EventCode": "0xB7, 0xBB",
-        "EventName": "OCR.PF_L2_RFO.L3_MISS_REMOTE_HOP1_DRAM.NO_SNOOP_NEEDED",
+        "EventName": "OFFCORE_RESPONSE.ALL_PF_RFO.L3_MISS_LOCAL_DRAM.HITM_OTHER_CORE",
         "MSRIndex": "0x1a6,0x1a7",
-        "MSRValue": "0x0110000020",
+        "MSRValue": "0x1004000120",
         "Offcore": "1",
         "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
         "SampleAfterValue": "100003",
         "UMask": "0x1"
     },
     {
-        "BriefDescription": "This event is deprecated. Refer to new event OCR.DEMAND_DATA_RD.L3_MISS_REMOTE_HOP1_DRAM.HIT_OTHER_CORE_NO_FWD",
+        "BriefDescription": "This event is deprecated. Refer to new event OCR.ALL_PF_RFO.L3_MISS_LOCAL_DRAM.HIT_OTHER_CORE_FWD",
         "Counter": "0,1,2,3",
         "CounterHTOff": "0,1,2,3",
         "Deprecated": "1",
         "EventCode": "0xB7, 0xBB",
-        "EventName": "OFFCORE_RESPONSE.DEMAND_DATA_RD.L3_MISS_REMOTE_HOP1_DRAM.HIT_OTHER_CORE_NO_FWD",
+        "EventName": "OFFCORE_RESPONSE.ALL_PF_RFO.L3_MISS_LOCAL_DRAM.HIT_OTHER_CORE_FWD",
         "MSRIndex": "0x1a6,0x1a7",
-        "MSRValue": "0x0410000001",
+        "MSRValue": "0x0804000120",
         "Offcore": "1",
         "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
         "SampleAfterValue": "100003",
         "UMask": "0x1"
     },
     {
-        "BriefDescription": "This event is deprecated. Refer to new event OCR.ALL_DATA_RD.L3_MISS_LOCAL_DRAM.HIT_OTHER_CORE_NO_FWD",
+        "BriefDescription": "This event is deprecated. Refer to new event OCR.ALL_PF_RFO.L3_MISS_LOCAL_DRAM.HIT_OTHER_CORE_NO_FWD",
         "Counter": "0,1,2,3",
         "CounterHTOff": "0,1,2,3",
         "Deprecated": "1",
         "EventCode": "0xB7, 0xBB",
-        "EventName": "OFFCORE_RESPONSE.ALL_DATA_RD.L3_MISS_LOCAL_DRAM.HIT_OTHER_CORE_NO_FWD",
+        "EventName": "OFFCORE_RESPONSE.ALL_PF_RFO.L3_MISS_LOCAL_DRAM.HIT_OTHER_CORE_NO_FWD",
         "MSRIndex": "0x1a6,0x1a7",
-        "MSRValue": "0x0404000491",
+        "MSRValue": "0x0404000120",
         "Offcore": "1",
         "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
         "SampleAfterValue": "100003",
         "UMask": "0x1"
     },
     {
-        "BriefDescription": "Counts prefetch (that bring data to L2) data reads  OCR.PF_L2_DATA_RD.L3_MISS_LOCAL_DRAM.HITM_OTHER_CORE",
+        "BriefDescription": "This event is deprecated. Refer to new event OCR.ALL_PF_RFO.L3_MISS_LOCAL_DRAM.NO_SNOOP_NEEDED",
         "Counter": "0,1,2,3",
         "CounterHTOff": "0,1,2,3",
+        "Deprecated": "1",
         "EventCode": "0xB7, 0xBB",
-        "EventName": "OCR.PF_L2_DATA_RD.L3_MISS_LOCAL_DRAM.HITM_OTHER_CORE",
+        "EventName": "OFFCORE_RESPONSE.ALL_PF_RFO.L3_MISS_LOCAL_DRAM.NO_SNOOP_NEEDED",
         "MSRIndex": "0x1a6,0x1a7",
-        "MSRValue": "0x1004000010",
+        "MSRValue": "0x0104000120",
         "Offcore": "1",
         "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
         "SampleAfterValue": "100003",
         "UMask": "0x1"
     },
     {
-        "BriefDescription": "This event is deprecated. Refer to new event OCR.ALL_DATA_RD.L3_MISS_LOCAL_DRAM.HIT_OTHER_CORE_FWD",
+        "BriefDescription": "This event is deprecated. Refer to new event OCR.ALL_PF_RFO.L3_MISS_LOCAL_DRAM.SNOOP_MISS",
         "Counter": "0,1,2,3",
         "CounterHTOff": "0,1,2,3",
         "Deprecated": "1",
         "EventCode": "0xB7, 0xBB",
-        "EventName": "OFFCORE_RESPONSE.ALL_DATA_RD.L3_MISS_LOCAL_DRAM.HIT_OTHER_CORE_FWD",
+        "EventName": "OFFCORE_RESPONSE.ALL_PF_RFO.L3_MISS_LOCAL_DRAM.SNOOP_MISS",
         "MSRIndex": "0x1a6,0x1a7",
-        "MSRValue": "0x0804000491",
+        "MSRValue": "0x0204000120",
         "Offcore": "1",
         "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
         "SampleAfterValue": "100003",
         "UMask": "0x1"
     },
     {
-        "BriefDescription": "This event is deprecated. Refer to new event OCR.OTHER.L3_MISS_LOCAL_DRAM.SNOOP_NONE",
+        "BriefDescription": "This event is deprecated. Refer to new event OCR.ALL_PF_RFO.L3_MISS_LOCAL_DRAM.SNOOP_MISS_OR_NO_FWD",
         "Counter": "0,1,2,3",
         "CounterHTOff": "0,1,2,3",
         "Deprecated": "1",
         "EventCode": "0xB7, 0xBB",
-        "EventName": "OFFCORE_RESPONSE.OTHER.L3_MISS_LOCAL_DRAM.SNOOP_NONE",
+        "EventName": "OFFCORE_RESPONSE.ALL_PF_RFO.L3_MISS_LOCAL_DRAM.SNOOP_MISS_OR_NO_FWD",
         "MSRIndex": "0x1a6,0x1a7",
-        "MSRValue": "0x0084008000",
+        "MSRValue": "0x0604000120",
         "Offcore": "1",
         "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
         "SampleAfterValue": "100003",
         "UMask": "0x1"
     },
     {
-        "BriefDescription": "This event is deprecated. Refer to new event OCR.PF_L2_RFO.L3_MISS_LOCAL_DRAM.NO_SNOOP_NEEDED",
+        "BriefDescription": "This event is deprecated. Refer to new event OCR.ALL_PF_RFO.L3_MISS_LOCAL_DRAM.SNOOP_NONE",
         "Counter": "0,1,2,3",
         "CounterHTOff": "0,1,2,3",
         "Deprecated": "1",
         "EventCode": "0xB7, 0xBB",
-        "EventName": "OFFCORE_RESPONSE.PF_L2_RFO.L3_MISS_LOCAL_DRAM.NO_SNOOP_NEEDED",
+        "EventName": "OFFCORE_RESPONSE.ALL_PF_RFO.L3_MISS_LOCAL_DRAM.SNOOP_NONE",
         "MSRIndex": "0x1a6,0x1a7",
-        "MSRValue": "0x0104000020",
+        "MSRValue": "0x0084000120",
         "Offcore": "1",
         "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
         "SampleAfterValue": "100003",
         "UMask": "0x1"
     },
     {
-        "BriefDescription": "Counts all prefetch (that bring data to L2) RFOs  OCR.PF_L2_RFO.L3_MISS_LOCAL_DRAM.ANY_SNOOP",
+        "BriefDescription": "This event is deprecated. Refer to new event OCR.ALL_PF_RFO.L3_MISS_REMOTE_DRAM.SNOOP_MISS_OR_NO_FWD",
         "Counter": "0,1,2,3",
         "CounterHTOff": "0,1,2,3",
+        "Deprecated": "1",
         "EventCode": "0xB7, 0xBB",
-        "EventName": "OCR.PF_L2_RFO.L3_MISS_LOCAL_DRAM.ANY_SNOOP",
+        "EventName": "OFFCORE_RESPONSE.ALL_PF_RFO.L3_MISS_REMOTE_DRAM.SNOOP_MISS_OR_NO_FWD",
         "MSRIndex": "0x1a6,0x1a7",
-        "MSRValue": "0x3F84000020",
+        "MSRValue": "0x063B800120",
         "Offcore": "1",
         "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
         "SampleAfterValue": "100003",
         "UMask": "0x1"
     },
     {
-        "BriefDescription": "OCR.ALL_PF_RFO.L3_MISS_LOCAL_DRAM.SNOOP_MISS_OR_NO_FWD OCR.ALL_PF_RFO.L3_MISS_LOCAL_DRAM.SNOOP_MISS_OR_NO_FWD",
+        "BriefDescription": "This event is deprecated. Refer to new event OCR.ALL_PF_RFO.L3_MISS_REMOTE_HOP1_DRAM.ANY_SNOOP",
         "Counter": "0,1,2,3",
         "CounterHTOff": "0,1,2,3",
+        "Deprecated": "1",
         "EventCode": "0xB7, 0xBB",
-        "EventName": "OCR.ALL_PF_RFO.L3_MISS_LOCAL_DRAM.SNOOP_MISS_OR_NO_FWD",
+        "EventName": "OFFCORE_RESPONSE.ALL_PF_RFO.L3_MISS_REMOTE_HOP1_DRAM.ANY_SNOOP",
         "MSRIndex": "0x1a6,0x1a7",
-        "MSRValue": "0x0604000120",
+        "MSRValue": "0x3F90000120",
         "Offcore": "1",
         "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
         "SampleAfterValue": "100003",
         "UMask": "0x1"
     },
     {
-        "BriefDescription": "OCR.ALL_PF_RFO.L3_MISS_REMOTE_HOP1_DRAM.SNOOP_NONE",
+        "BriefDescription": "This event is deprecated. Refer to new event OCR.ALL_PF_RFO.L3_MISS_REMOTE_HOP1_DRAM.HITM_OTHER_CORE",
         "Counter": "0,1,2,3",
         "CounterHTOff": "0,1,2,3",
+        "Deprecated": "1",
         "EventCode": "0xB7, 0xBB",
-        "EventName": "OCR.ALL_PF_RFO.L3_MISS_REMOTE_HOP1_DRAM.SNOOP_NONE",
+        "EventName": "OFFCORE_RESPONSE.ALL_PF_RFO.L3_MISS_REMOTE_HOP1_DRAM.HITM_OTHER_CORE",
         "MSRIndex": "0x1a6,0x1a7",
-        "MSRValue": "0x0090000120",
+        "MSRValue": "0x1010000120",
         "Offcore": "1",
         "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
         "SampleAfterValue": "100003",
         "UMask": "0x1"
     },
     {
-        "BriefDescription": "Counts all prefetch (that bring data to LLC only) RFOs OCR.PF_L3_RFO.L3_MISS.REMOTE_HIT_FORWARD",
+        "BriefDescription": "This event is deprecated. Refer to new event OCR.ALL_PF_RFO.L3_MISS_REMOTE_HOP1_DRAM.HIT_OTHER_CORE_FWD",
         "Counter": "0,1,2,3",
         "CounterHTOff": "0,1,2,3",
+        "Deprecated": "1",
         "EventCode": "0xB7, 0xBB",
-        "EventName": "OCR.PF_L3_RFO.L3_MISS.REMOTE_HIT_FORWARD",
+        "EventName": "OFFCORE_RESPONSE.ALL_PF_RFO.L3_MISS_REMOTE_HOP1_DRAM.HIT_OTHER_CORE_FWD",
         "MSRIndex": "0x1a6,0x1a7",
-        "MSRValue": "0x083FC00100",
+        "MSRValue": "0x0810000120",
         "Offcore": "1",
         "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
         "SampleAfterValue": "100003",
         "UMask": "0x1"
     },
     {
-        "BriefDescription": "OCR.ALL_DATA_RD.L3_MISS.SNOOP_MISS OCR.ALL_DATA_RD.L3_MISS.SNOOP_MISS",
+        "BriefDescription": "This event is deprecated. Refer to new event OCR.ALL_PF_RFO.L3_MISS_REMOTE_HOP1_DRAM.HIT_OTHER_CORE_NO_FWD",
         "Counter": "0,1,2,3",
         "CounterHTOff": "0,1,2,3",
+        "Deprecated": "1",
         "EventCode": "0xB7, 0xBB",
-        "EventName": "OCR.ALL_DATA_RD.L3_MISS.SNOOP_MISS",
+        "EventName": "OFFCORE_RESPONSE.ALL_PF_RFO.L3_MISS_REMOTE_HOP1_DRAM.HIT_OTHER_CORE_NO_FWD",
         "MSRIndex": "0x1a6,0x1a7",
-        "MSRValue": "0x023C000491",
+        "MSRValue": "0x0410000120",
         "Offcore": "1",
         "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
         "SampleAfterValue": "100003",
         "UMask": "0x1"
     },
     {
-        "BriefDescription": "This event is deprecated. Refer to new event OCR.PF_L2_RFO.L3_MISS_REMOTE_DRAM.SNOOP_MISS_OR_NO_FWD",
+        "BriefDescription": "This event is deprecated. Refer to new event OCR.ALL_PF_RFO.L3_MISS_REMOTE_HOP1_DRAM.NO_SNOOP_NEEDED",
         "Counter": "0,1,2,3",
         "CounterHTOff": "0,1,2,3",
         "Deprecated": "1",
         "EventCode": "0xB7, 0xBB",
-        "EventName": "OFFCORE_RESPONSE.PF_L2_RFO.L3_MISS_REMOTE_DRAM.SNOOP_MISS_OR_NO_FWD",
+        "EventName": "OFFCORE_RESPONSE.ALL_PF_RFO.L3_MISS_REMOTE_HOP1_DRAM.NO_SNOOP_NEEDED",
         "MSRIndex": "0x1a6,0x1a7",
-        "MSRValue": "0x063B800020",
+        "MSRValue": "0x0110000120",
         "Offcore": "1",
         "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
         "SampleAfterValue": "100003",
         "UMask": "0x1"
     },
     {
-        "BriefDescription": "OCR.ALL_PF_DATA_RD.L3_MISS.HIT_OTHER_CORE_NO_FWD OCR.ALL_PF_DATA_RD.L3_MISS.HIT_OTHER_CORE_NO_FWD OCR.ALL_PF_DATA_RD.L3_MISS.HIT_OTHER_CORE_NO_FWD",
+        "BriefDescription": "This event is deprecated. Refer to new event OCR.ALL_PF_RFO.L3_MISS_REMOTE_HOP1_DRAM.SNOOP_MISS",
         "Counter": "0,1,2,3",
         "CounterHTOff": "0,1,2,3",
+        "Deprecated": "1",
         "EventCode": "0xB7, 0xBB",
-        "EventName": "OCR.ALL_PF_DATA_RD.L3_MISS.HIT_OTHER_CORE_NO_FWD",
+        "EventName": "OFFCORE_RESPONSE.ALL_PF_RFO.L3_MISS_REMOTE_HOP1_DRAM.SNOOP_MISS",
         "MSRIndex": "0x1a6,0x1a7",
-        "MSRValue": "0x043C000490",
+        "MSRValue": "0x0210000120",
         "Offcore": "1",
         "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
         "SampleAfterValue": "100003",
         "UMask": "0x1"
     },
     {
-        "BriefDescription": "Counts all demand code reads  OCR.DEMAND_CODE_RD.L3_MISS_LOCAL_DRAM.NO_SNOOP_NEEDED",
+        "BriefDescription": "This event is deprecated. Refer to new event OCR.ALL_PF_RFO.L3_MISS_REMOTE_HOP1_DRAM.SNOOP_NONE",
         "Counter": "0,1,2,3",
         "CounterHTOff": "0,1,2,3",
+        "Deprecated": "1",
         "EventCode": "0xB7, 0xBB",
-        "EventName": "OCR.DEMAND_CODE_RD.L3_MISS_LOCAL_DRAM.NO_SNOOP_NEEDED",
+        "EventName": "OFFCORE_RESPONSE.ALL_PF_RFO.L3_MISS_REMOTE_HOP1_DRAM.SNOOP_NONE",
         "MSRIndex": "0x1a6,0x1a7",
-        "MSRValue": "0x0104000004",
+        "MSRValue": "0x0090000120",
         "Offcore": "1",
         "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
         "SampleAfterValue": "100003",
         "UMask": "0x1"
     },
     {
-        "BriefDescription": "Counts demand data reads  OCR.DEMAND_DATA_RD.L3_MISS_LOCAL_DRAM.HIT_OTHER_CORE_NO_FWD",
+        "BriefDescription": "This event is deprecated. Refer to new event OCR.ALL_READS.L3_MISS.ANY_SNOOP",
         "Counter": "0,1,2,3",
         "CounterHTOff": "0,1,2,3",
+        "Deprecated": "1",
         "EventCode": "0xB7, 0xBB",
-        "EventName": "OCR.DEMAND_DATA_RD.L3_MISS_LOCAL_DRAM.HIT_OTHER_CORE_NO_FWD",
+        "EventName": "OFFCORE_RESPONSE.ALL_READS.L3_MISS.ANY_SNOOP",
         "MSRIndex": "0x1a6,0x1a7",
-        "MSRValue": "0x0404000001",
+        "MSRValue": "0x3FBC0007F7",
         "Offcore": "1",
         "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
         "SampleAfterValue": "100003",
         "UMask": "0x1"
     },
     {
-        "BriefDescription": "This event is deprecated. Refer to new event OCR.PF_L3_DATA_RD.L3_MISS.ANY_SNOOP",
+        "BriefDescription": "This event is deprecated. Refer to new event OCR.ALL_READS.L3_MISS.HITM_OTHER_CORE",
         "Counter": "0,1,2,3",
         "CounterHTOff": "0,1,2,3",
         "Deprecated": "1",
         "EventCode": "0xB7, 0xBB",
-        "EventName": "OFFCORE_RESPONSE.PF_L3_DATA_RD.L3_MISS.ANY_SNOOP",
+        "EventName": "OFFCORE_RESPONSE.ALL_READS.L3_MISS.HITM_OTHER_CORE",
         "MSRIndex": "0x1a6,0x1a7",
-        "MSRValue": "0x3FBC000080",
+        "MSRValue": "0x103C0007F7",
         "Offcore": "1",
         "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
         "SampleAfterValue": "100003",
         "UMask": "0x1"
     },
     {
-        "BriefDescription": "This event is deprecated. Refer to new event OCR.ALL_RFO.L3_MISS_LOCAL_DRAM.HITM_OTHER_CORE",
+        "BriefDescription": "This event is deprecated. Refer to new event OCR.ALL_READS.L3_MISS.HIT_OTHER_CORE_FWD",
         "Counter": "0,1,2,3",
         "CounterHTOff": "0,1,2,3",
         "Deprecated": "1",
         "EventCode": "0xB7, 0xBB",
-        "EventName": "OFFCORE_RESPONSE.ALL_RFO.L3_MISS_LOCAL_DRAM.HITM_OTHER_CORE",
+        "EventName": "OFFCORE_RESPONSE.ALL_READS.L3_MISS.HIT_OTHER_CORE_FWD",
         "MSRIndex": "0x1a6,0x1a7",
-        "MSRValue": "0x1004000122",
+        "MSRValue": "0x083C0007F7",
         "Offcore": "1",
         "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
         "SampleAfterValue": "100003",
         "UMask": "0x1"
     },
     {
-        "BriefDescription": "This event is deprecated. Refer to new event OCR.PF_L2_RFO.L3_MISS_LOCAL_DRAM.HIT_OTHER_CORE_FWD",
+        "BriefDescription": "This event is deprecated. Refer to new event OCR.ALL_READS.L3_MISS.HIT_OTHER_CORE_NO_FWD",
         "Counter": "0,1,2,3",
         "CounterHTOff": "0,1,2,3",
         "Deprecated": "1",
         "EventCode": "0xB7, 0xBB",
-        "EventName": "OFFCORE_RESPONSE.PF_L2_RFO.L3_MISS_LOCAL_DRAM.HIT_OTHER_CORE_FWD",
+        "EventName": "OFFCORE_RESPONSE.ALL_READS.L3_MISS.HIT_OTHER_CORE_NO_FWD",
         "MSRIndex": "0x1a6,0x1a7",
-        "MSRValue": "0x0804000020",
+        "MSRValue": "0x043C0007F7",
         "Offcore": "1",
         "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
         "SampleAfterValue": "100003",
         "UMask": "0x1"
     },
     {
-        "BriefDescription": "This event is deprecated. Refer to new event OCR.DEMAND_DATA_RD.L3_MISS_REMOTE_HOP1_DRAM.HITM_OTHER_CORE",
+        "BriefDescription": "This event is deprecated. Refer to new event OCR.ALL_READS.L3_MISS.NO_SNOOP_NEEDED",
         "Counter": "0,1,2,3",
         "CounterHTOff": "0,1,2,3",
         "Deprecated": "1",
         "EventCode": "0xB7, 0xBB",
-        "EventName": "OFFCORE_RESPONSE.DEMAND_DATA_RD.L3_MISS_REMOTE_HOP1_DRAM.HITM_OTHER_CORE",
+        "EventName": "OFFCORE_RESPONSE.ALL_READS.L3_MISS.NO_SNOOP_NEEDED",
         "MSRIndex": "0x1a6,0x1a7",
-        "MSRValue": "0x1010000001",
+        "MSRValue": "0x013C0007F7",
         "Offcore": "1",
         "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
         "SampleAfterValue": "100003",
         "UMask": "0x1"
     },
     {
-        "BriefDescription": "OCR.ALL_RFO.L3_MISS_LOCAL_DRAM.HIT_OTHER_CORE_NO_FWD  OCR.ALL_RFO.L3_MISS_LOCAL_DRAM.HIT_OTHER_CORE_NO_FWD",
+        "BriefDescription": "This event is deprecated. Refer to new event OCR.ALL_READS.L3_MISS.REMOTE_HITM",
         "Counter": "0,1,2,3",
         "CounterHTOff": "0,1,2,3",
+        "Deprecated": "1",
         "EventCode": "0xB7, 0xBB",
-        "EventName": "OCR.ALL_RFO.L3_MISS_LOCAL_DRAM.HIT_OTHER_CORE_NO_FWD",
+        "EventName": "OFFCORE_RESPONSE.ALL_READS.L3_MISS.REMOTE_HITM",
         "MSRIndex": "0x1a6,0x1a7",
-        "MSRValue": "0x0404000122",
+        "MSRValue": "0x103FC007F7",
         "Offcore": "1",
         "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
         "SampleAfterValue": "100003",
         "UMask": "0x1"
     },
     {
-        "BriefDescription": "This event is deprecated. Refer to new event OCR.OTHER.L3_MISS.SNOOP_NONE",
+        "BriefDescription": "This event is deprecated. Refer to new event OCR.ALL_READS.L3_MISS.REMOTE_HIT_FORWARD",
         "Counter": "0,1,2,3",
         "CounterHTOff": "0,1,2,3",
         "Deprecated": "1",
         "EventCode": "0xB7, 0xBB",
-        "EventName": "OFFCORE_RESPONSE.OTHER.L3_MISS.SNOOP_NONE",
+        "EventName": "OFFCORE_RESPONSE.ALL_READS.L3_MISS.REMOTE_HIT_FORWARD",
         "MSRIndex": "0x1a6,0x1a7",
-        "MSRValue": "0x00BC008000",
+        "MSRValue": "0x083FC007F7",
         "Offcore": "1",
         "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
         "SampleAfterValue": "100003",
         "UMask": "0x1"
     },
     {
-        "BriefDescription": "This event is deprecated. Refer to new event OCR.ALL_DATA_RD.L3_MISS.SNOOP_NONE",
+        "BriefDescription": "This event is deprecated. Refer to new event OCR.ALL_READS.L3_MISS.SNOOP_MISS",
         "Counter": "0,1,2,3",
         "CounterHTOff": "0,1,2,3",
         "Deprecated": "1",
         "EventCode": "0xB7, 0xBB",
-        "EventName": "OFFCORE_RESPONSE.ALL_DATA_RD.L3_MISS.SNOOP_NONE",
+        "EventName": "OFFCORE_RESPONSE.ALL_READS.L3_MISS.SNOOP_MISS",
         "MSRIndex": "0x1a6,0x1a7",
-        "MSRValue": "0x00BC000491",
+        "MSRValue": "0x023C0007F7",
         "Offcore": "1",
         "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
         "SampleAfterValue": "100003",
         "UMask": "0x1"
     },
     {
-        "BriefDescription": "This event is deprecated. Refer to new event OCR.ALL_PF_RFO.L3_MISS.NO_SNOOP_NEEDED",
+        "BriefDescription": "This event is deprecated. Refer to new event OCR.ALL_READS.L3_MISS.SNOOP_NONE",
         "Counter": "0,1,2,3",
         "CounterHTOff": "0,1,2,3",
         "Deprecated": "1",
         "EventCode": "0xB7, 0xBB",
-        "EventName": "OFFCORE_RESPONSE.ALL_PF_RFO.L3_MISS.NO_SNOOP_NEEDED",
+        "EventName": "OFFCORE_RESPONSE.ALL_READS.L3_MISS.SNOOP_NONE",
         "MSRIndex": "0x1a6,0x1a7",
-        "MSRValue": "0x013C000120",
+        "MSRValue": "0x00BC0007F7",
         "Offcore": "1",
         "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
         "SampleAfterValue": "100003",
         "UMask": "0x1"
     },
     {
-        "BriefDescription": "Counts all prefetch (that bring data to L2) RFOs",
+        "BriefDescription": "This event is deprecated. Refer to new event OCR.ALL_READS.L3_MISS_LOCAL_DRAM.ANY_SNOOP",
         "Counter": "0,1,2,3",
         "CounterHTOff": "0,1,2,3",
+        "Deprecated": "1",
         "EventCode": "0xB7, 0xBB",
-        "EventName": "OCR.PF_L2_RFO.L3_MISS_REMOTE_HOP1_DRAM.SNOOP_MISS",
+        "EventName": "OFFCORE_RESPONSE.ALL_READS.L3_MISS_LOCAL_DRAM.ANY_SNOOP",
         "MSRIndex": "0x1a6,0x1a7",
-        "MSRValue": "0x0210000020",
+        "MSRValue": "0x3F840007F7",
         "Offcore": "1",
         "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
         "SampleAfterValue": "100003",
         "UMask": "0x1"
     },
     {
-        "BriefDescription": "This event is deprecated. Refer to new event OCR.PF_L3_DATA_RD.L3_MISS_REMOTE_HOP1_DRAM.NO_SNOOP_NEEDED",
+        "BriefDescription": "This event is deprecated. Refer to new event OCR.ALL_READS.L3_MISS_LOCAL_DRAM.HITM_OTHER_CORE",
         "Counter": "0,1,2,3",
         "CounterHTOff": "0,1,2,3",
         "Deprecated": "1",
         "EventCode": "0xB7, 0xBB",
-        "EventName": "OFFCORE_RESPONSE.PF_L3_DATA_RD.L3_MISS_REMOTE_HOP1_DRAM.NO_SNOOP_NEEDED",
+        "EventName": "OFFCORE_RESPONSE.ALL_READS.L3_MISS_LOCAL_DRAM.HITM_OTHER_CORE",
         "MSRIndex": "0x1a6,0x1a7",
-        "MSRValue": "0x0110000080",
+        "MSRValue": "0x10040007F7",
         "Offcore": "1",
         "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
         "SampleAfterValue": "100003",
         "UMask": "0x1"
     },
     {
-        "BriefDescription": "OCR.ALL_PF_RFO.L3_MISS.HITM_OTHER_CORE OCR.ALL_PF_RFO.L3_MISS.HITM_OTHER_CORE OCR.ALL_PF_RFO.L3_MISS.HITM_OTHER_CORE",
+        "BriefDescription": "This event is deprecated. Refer to new event OCR.ALL_READS.L3_MISS_LOCAL_DRAM.HIT_OTHER_CORE_FWD",
         "Counter": "0,1,2,3",
         "CounterHTOff": "0,1,2,3",
+        "Deprecated": "1",
         "EventCode": "0xB7, 0xBB",
-        "EventName": "OCR.ALL_PF_RFO.L3_MISS.HITM_OTHER_CORE",
+        "EventName": "OFFCORE_RESPONSE.ALL_READS.L3_MISS_LOCAL_DRAM.HIT_OTHER_CORE_FWD",
         "MSRIndex": "0x1a6,0x1a7",
-        "MSRValue": "0x103C000120",
+        "MSRValue": "0x08040007F7",
         "Offcore": "1",
         "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
         "SampleAfterValue": "100003",
         "UMask": "0x1"
     },
     {
-        "BriefDescription": "This event is deprecated. Refer to new event OCR.ALL_PF_RFO.L3_MISS.HITM_OTHER_CORE",
+        "BriefDescription": "This event is deprecated. Refer to new event OCR.ALL_READS.L3_MISS_LOCAL_DRAM.HIT_OTHER_CORE_NO_FWD",
         "Counter": "0,1,2,3",
         "CounterHTOff": "0,1,2,3",
         "Deprecated": "1",
         "EventCode": "0xB7, 0xBB",
-        "EventName": "OFFCORE_RESPONSE.ALL_PF_RFO.L3_MISS.HITM_OTHER_CORE",
+        "EventName": "OFFCORE_RESPONSE.ALL_READS.L3_MISS_LOCAL_DRAM.HIT_OTHER_CORE_NO_FWD",
         "MSRIndex": "0x1a6,0x1a7",
-        "MSRValue": "0x103C000120",
+        "MSRValue": "0x04040007F7",
         "Offcore": "1",
         "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
         "SampleAfterValue": "100003",
         "UMask": "0x1"
     },
     {
-        "BriefDescription": "This event is deprecated. Refer to new event OCR.PF_L1D_AND_SW.L3_MISS.SNOOP_NONE",
+        "BriefDescription": "This event is deprecated. Refer to new event OCR.ALL_READS.L3_MISS_LOCAL_DRAM.NO_SNOOP_NEEDED",
         "Counter": "0,1,2,3",
         "CounterHTOff": "0,1,2,3",
         "Deprecated": "1",
         "EventCode": "0xB7, 0xBB",
-        "EventName": "OFFCORE_RESPONSE.PF_L1D_AND_SW.L3_MISS.SNOOP_NONE",
+        "EventName": "OFFCORE_RESPONSE.ALL_READS.L3_MISS_LOCAL_DRAM.NO_SNOOP_NEEDED",
         "MSRIndex": "0x1a6,0x1a7",
-        "MSRValue": "0x00BC000400",
+        "MSRValue": "0x01040007F7",
         "Offcore": "1",
         "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
         "SampleAfterValue": "100003",
         "UMask": "0x1"
     },
     {
-        "BriefDescription": "Counts demand data reads  OCR.DEMAND_DATA_RD.L3_MISS_REMOTE_HOP1_DRAM.NO_SNOOP_NEEDED",
+        "BriefDescription": "This event is deprecated. Refer to new event OCR.ALL_READS.L3_MISS_LOCAL_DRAM.SNOOP_MISS",
         "Counter": "0,1,2,3",
         "CounterHTOff": "0,1,2,3",
+        "Deprecated": "1",
         "EventCode": "0xB7, 0xBB",
-        "EventName": "OCR.DEMAND_DATA_RD.L3_MISS_REMOTE_HOP1_DRAM.NO_SNOOP_NEEDED",
+        "EventName": "OFFCORE_RESPONSE.ALL_READS.L3_MISS_LOCAL_DRAM.SNOOP_MISS",
         "MSRIndex": "0x1a6,0x1a7",
-        "MSRValue": "0x0110000001",
+        "MSRValue": "0x02040007F7",
         "Offcore": "1",
         "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
         "SampleAfterValue": "100003",
         "UMask": "0x1"
     },
     {
-        "BriefDescription": "Counts all prefetch (that bring data to L2) RFOs OCR.PF_L2_RFO.L3_MISS.ANY_SNOOP OCR.PF_L2_RFO.L3_MISS.ANY_SNOOP",
+        "BriefDescription": "This event is deprecated. Refer to new event OCR.ALL_READS.L3_MISS_LOCAL_DRAM.SNOOP_MISS_OR_NO_FWD",
         "Counter": "0,1,2,3",
         "CounterHTOff": "0,1,2,3",
+        "Deprecated": "1",
         "EventCode": "0xB7, 0xBB",
-        "EventName": "OCR.PF_L2_RFO.L3_MISS.ANY_SNOOP",
+        "EventName": "OFFCORE_RESPONSE.ALL_READS.L3_MISS_LOCAL_DRAM.SNOOP_MISS_OR_NO_FWD",
         "MSRIndex": "0x1a6,0x1a7",
-        "MSRValue": "0x3FBC000020",
+        "MSRValue": "0x06040007F7",
         "Offcore": "1",
         "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
         "SampleAfterValue": "100003",
         "UMask": "0x1"
     },
     {
-        "BriefDescription": "This event is deprecated. Refer to new event OCR.ALL_READS.L3_MISS.SNOOP_NONE",
+        "BriefDescription": "This event is deprecated. Refer to new event OCR.ALL_READS.L3_MISS_LOCAL_DRAM.SNOOP_NONE",
         "Counter": "0,1,2,3",
         "CounterHTOff": "0,1,2,3",
         "Deprecated": "1",
         "EventCode": "0xB7, 0xBB",
-        "EventName": "OFFCORE_RESPONSE.ALL_READS.L3_MISS.SNOOP_NONE",
+        "EventName": "OFFCORE_RESPONSE.ALL_READS.L3_MISS_LOCAL_DRAM.SNOOP_NONE",
         "MSRIndex": "0x1a6,0x1a7",
-        "MSRValue": "0x00BC0007F7",
+        "MSRValue": "0x00840007F7",
         "Offcore": "1",
         "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
         "SampleAfterValue": "100003",
         "UMask": "0x1"
     },
     {
-        "BriefDescription": "Counts all prefetch (that bring data to L2) RFOs OCR.PF_L2_RFO.L3_MISS.SNOOP_MISS",
+        "BriefDescription": "This event is deprecated. Refer to new event OCR.ALL_READS.L3_MISS_REMOTE_DRAM.SNOOP_MISS_OR_NO_FWD",
         "Counter": "0,1,2,3",
         "CounterHTOff": "0,1,2,3",
+        "Deprecated": "1",
         "EventCode": "0xB7, 0xBB",
-        "EventName": "OCR.PF_L2_RFO.L3_MISS.SNOOP_MISS",
+        "EventName": "OFFCORE_RESPONSE.ALL_READS.L3_MISS_REMOTE_DRAM.SNOOP_MISS_OR_NO_FWD",
         "MSRIndex": "0x1a6,0x1a7",
-        "MSRValue": "0x023C000020",
+        "MSRValue": "0x063B8007F7",
         "Offcore": "1",
         "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
         "SampleAfterValue": "100003",
         "UMask": "0x1"
     },
     {
-        "BriefDescription": "This event is deprecated. Refer to new event OCR.DEMAND_DATA_RD.L3_MISS_LOCAL_DRAM.NO_SNOOP_NEEDED",
+        "BriefDescription": "This event is deprecated. Refer to new event OCR.ALL_READS.L3_MISS_REMOTE_HOP1_DRAM.ANY_SNOOP",
         "Counter": "0,1,2,3",
         "CounterHTOff": "0,1,2,3",
         "Deprecated": "1",
         "EventCode": "0xB7, 0xBB",
-        "EventName": "OFFCORE_RESPONSE.DEMAND_DATA_RD.L3_MISS_LOCAL_DRAM.NO_SNOOP_NEEDED",
+        "EventName": "OFFCORE_RESPONSE.ALL_READS.L3_MISS_REMOTE_HOP1_DRAM.ANY_SNOOP",
         "MSRIndex": "0x1a6,0x1a7",
-        "MSRValue": "0x0104000001",
+        "MSRValue": "0x3F900007F7",
         "Offcore": "1",
         "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
         "SampleAfterValue": "100003",
         "UMask": "0x1"
     },
     {
-        "BriefDescription": "This event is deprecated. Refer to new event OCR.PF_L3_RFO.L3_MISS.HIT_OTHER_CORE_NO_FWD",
+        "BriefDescription": "This event is deprecated. Refer to new event OCR.ALL_READS.L3_MISS_REMOTE_HOP1_DRAM.HITM_OTHER_CORE",
         "Counter": "0,1,2,3",
         "CounterHTOff": "0,1,2,3",
         "Deprecated": "1",
         "EventCode": "0xB7, 0xBB",
-        "EventName": "OFFCORE_RESPONSE.PF_L3_RFO.L3_MISS.HIT_OTHER_CORE_NO_FWD",
+        "EventName": "OFFCORE_RESPONSE.ALL_READS.L3_MISS_REMOTE_HOP1_DRAM.HITM_OTHER_CORE",
         "MSRIndex": "0x1a6,0x1a7",
-        "MSRValue": "0x043C000100",
+        "MSRValue": "0x10100007F7",
         "Offcore": "1",
         "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
         "SampleAfterValue": "100003",
         "UMask": "0x1"
     },
     {
-        "BriefDescription": "This event is deprecated. Refer to new event OCR.DEMAND_DATA_RD.L3_MISS.SNOOP_MISS",
+        "BriefDescription": "This event is deprecated. Refer to new event OCR.ALL_READS.L3_MISS_REMOTE_HOP1_DRAM.HIT_OTHER_CORE_FWD",
         "Counter": "0,1,2,3",
         "CounterHTOff": "0,1,2,3",
         "Deprecated": "1",
         "EventCode": "0xB7, 0xBB",
-        "EventName": "OFFCORE_RESPONSE.DEMAND_DATA_RD.L3_MISS.SNOOP_MISS",
+        "EventName": "OFFCORE_RESPONSE.ALL_READS.L3_MISS_REMOTE_HOP1_DRAM.HIT_OTHER_CORE_FWD",
         "MSRIndex": "0x1a6,0x1a7",
-        "MSRValue": "0x023C000001",
+        "MSRValue": "0x08100007F7",
         "Offcore": "1",
         "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
         "SampleAfterValue": "100003",
         "UMask": "0x1"
     },
     {
-        "BriefDescription": "This event is deprecated. Refer to new event OCR.PF_L3_DATA_RD.L3_MISS.HIT_OTHER_CORE_NO_FWD",
+        "BriefDescription": "This event is deprecated. Refer to new event OCR.ALL_READS.L3_MISS_REMOTE_HOP1_DRAM.HIT_OTHER_CORE_NO_FWD",
         "Counter": "0,1,2,3",
         "CounterHTOff": "0,1,2,3",
         "Deprecated": "1",
         "EventCode": "0xB7, 0xBB",
-        "EventName": "OFFCORE_RESPONSE.PF_L3_DATA_RD.L3_MISS.HIT_OTHER_CORE_NO_FWD",
+        "EventName": "OFFCORE_RESPONSE.ALL_READS.L3_MISS_REMOTE_HOP1_DRAM.HIT_OTHER_CORE_NO_FWD",
         "MSRIndex": "0x1a6,0x1a7",
-        "MSRValue": "0x043C000080",
+        "MSRValue": "0x04100007F7",
         "Offcore": "1",
         "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
         "SampleAfterValue": "100003",
         "UMask": "0x1"
     },
     {
-        "BriefDescription": "This event is deprecated. Refer to new event OCR.DEMAND_CODE_RD.L3_MISS.HIT_OTHER_CORE_FWD",
+        "BriefDescription": "This event is deprecated. Refer to new event OCR.ALL_READS.L3_MISS_REMOTE_HOP1_DRAM.NO_SNOOP_NEEDED",
         "Counter": "0,1,2,3",
         "CounterHTOff": "0,1,2,3",
         "Deprecated": "1",
         "EventCode": "0xB7, 0xBB",
-        "EventName": "OFFCORE_RESPONSE.DEMAND_CODE_RD.L3_MISS.HIT_OTHER_CORE_FWD",
+        "EventName": "OFFCORE_RESPONSE.ALL_READS.L3_MISS_REMOTE_HOP1_DRAM.NO_SNOOP_NEEDED",
         "MSRIndex": "0x1a6,0x1a7",
-        "MSRValue": "0x083C000004",
+        "MSRValue": "0x01100007F7",
         "Offcore": "1",
         "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
         "SampleAfterValue": "100003",
         "UMask": "0x1"
     },
     {
-        "BriefDescription": "This event is deprecated. Refer to new event OCR.DEMAND_DATA_RD.L3_MISS_LOCAL_DRAM.HIT_OTHER_CORE_NO_FWD",
+        "BriefDescription": "This event is deprecated. Refer to new event OCR.ALL_READS.L3_MISS_REMOTE_HOP1_DRAM.SNOOP_MISS",
         "Counter": "0,1,2,3",
         "CounterHTOff": "0,1,2,3",
         "Deprecated": "1",
         "EventCode": "0xB7, 0xBB",
-        "EventName": "OFFCORE_RESPONSE.DEMAND_DATA_RD.L3_MISS_LOCAL_DRAM.HIT_OTHER_CORE_NO_FWD",
+        "EventName": "OFFCORE_RESPONSE.ALL_READS.L3_MISS_REMOTE_HOP1_DRAM.SNOOP_MISS",
         "MSRIndex": "0x1a6,0x1a7",
-        "MSRValue": "0x0404000001",
+        "MSRValue": "0x02100007F7",
         "Offcore": "1",
         "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
         "SampleAfterValue": "100003",
         "UMask": "0x1"
     },
     {
-        "BriefDescription": "OCR.ALL_DATA_RD.L3_MISS_REMOTE_HOP1_DRAM.ANY_SNOOP  OCR.ALL_DATA_RD.L3_MISS_REMOTE_HOP1_DRAM.ANY_SNOOP",
+        "BriefDescription": "This event is deprecated. Refer to new event OCR.ALL_READS.L3_MISS_REMOTE_HOP1_DRAM.SNOOP_NONE",
         "Counter": "0,1,2,3",
         "CounterHTOff": "0,1,2,3",
+        "Deprecated": "1",
         "EventCode": "0xB7, 0xBB",
-        "EventName": "OCR.ALL_DATA_RD.L3_MISS_REMOTE_HOP1_DRAM.ANY_SNOOP",
+        "EventName": "OFFCORE_RESPONSE.ALL_READS.L3_MISS_REMOTE_HOP1_DRAM.SNOOP_NONE",
         "MSRIndex": "0x1a6,0x1a7",
-        "MSRValue": "0x3F90000491",
+        "MSRValue": "0x00900007F7",
         "Offcore": "1",
         "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
         "SampleAfterValue": "100003",
         "UMask": "0x1"
     },
     {
-        "BriefDescription": "This event is deprecated. Refer to new event OCR.ALL_RFO.L3_MISS.HIT_OTHER_CORE_NO_FWD",
+        "BriefDescription": "This event is deprecated. Refer to new event OCR.ALL_RFO.L3_MISS.ANY_SNOOP",
         "Counter": "0,1,2,3",
         "CounterHTOff": "0,1,2,3",
         "Deprecated": "1",
         "EventCode": "0xB7, 0xBB",
-        "EventName": "OFFCORE_RESPONSE.ALL_RFO.L3_MISS.HIT_OTHER_CORE_NO_FWD",
+        "EventName": "OFFCORE_RESPONSE.ALL_RFO.L3_MISS.ANY_SNOOP",
         "MSRIndex": "0x1a6,0x1a7",
-        "MSRValue": "0x043C000122",
+        "MSRValue": "0x3FBC000122",
         "Offcore": "1",
         "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
         "SampleAfterValue": "100003",
         "UMask": "0x1"
     },
     {
-        "BriefDescription": "This event is deprecated. Refer to new event OCR.PF_L2_RFO.L3_MISS_REMOTE_HOP1_DRAM.NO_SNOOP_NEEDED",
+        "BriefDescription": "This event is deprecated. Refer to new event OCR.ALL_RFO.L3_MISS.HITM_OTHER_CORE",
         "Counter": "0,1,2,3",
         "CounterHTOff": "0,1,2,3",
         "Deprecated": "1",
         "EventCode": "0xB7, 0xBB",
-        "EventName": "OFFCORE_RESPONSE.PF_L2_RFO.L3_MISS_REMOTE_HOP1_DRAM.NO_SNOOP_NEEDED",
+        "EventName": "OFFCORE_RESPONSE.ALL_RFO.L3_MISS.HITM_OTHER_CORE",
         "MSRIndex": "0x1a6,0x1a7",
-        "MSRValue": "0x0110000020",
+        "MSRValue": "0x103C000122",
         "Offcore": "1",
         "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
         "SampleAfterValue": "100003",
         "UMask": "0x1"
     },
     {
-        "BriefDescription": "Counts L1 data cache hardware prefetch requests and software prefetch requests  OCR.PF_L1D_AND_SW.L3_MISS_LOCAL_DRAM.HITM_OTHER_CORE",
+        "BriefDescription": "This event is deprecated. Refer to new event OCR.ALL_RFO.L3_MISS.HIT_OTHER_CORE_FWD",
         "Counter": "0,1,2,3",
         "CounterHTOff": "0,1,2,3",
+        "Deprecated": "1",
         "EventCode": "0xB7, 0xBB",
-        "EventName": "OCR.PF_L1D_AND_SW.L3_MISS_LOCAL_DRAM.HITM_OTHER_CORE",
+        "EventName": "OFFCORE_RESPONSE.ALL_RFO.L3_MISS.HIT_OTHER_CORE_FWD",
         "MSRIndex": "0x1a6,0x1a7",
-        "MSRValue": "0x1004000400",
+        "MSRValue": "0x083C000122",
         "Offcore": "1",
         "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
         "SampleAfterValue": "100003",
         "UMask": "0x1"
     },
     {
-        "BriefDescription": "Counts all demand data writes (RFOs) OCR.DEMAND_RFO.L3_MISS.SNOOP_NONE",
+        "BriefDescription": "This event is deprecated. Refer to new event OCR.ALL_RFO.L3_MISS.HIT_OTHER_CORE_NO_FWD",
         "Counter": "0,1,2,3",
         "CounterHTOff": "0,1,2,3",
+        "Deprecated": "1",
         "EventCode": "0xB7, 0xBB",
-        "EventName": "OCR.DEMAND_RFO.L3_MISS.SNOOP_NONE",
+        "EventName": "OFFCORE_RESPONSE.ALL_RFO.L3_MISS.HIT_OTHER_CORE_NO_FWD",
         "MSRIndex": "0x1a6,0x1a7",
-        "MSRValue": "0x00BC000002",
+        "MSRValue": "0x043C000122",
         "Offcore": "1",
         "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
         "SampleAfterValue": "100003",
         "UMask": "0x1"
     },
     {
-        "BriefDescription": "Counts all prefetch (that bring data to L2) RFOs  OCR.PF_L2_RFO.L3_MISS_REMOTE_HOP1_DRAM.HITM_OTHER_CORE",
+        "BriefDescription": "This event is deprecated. Refer to new event OCR.ALL_RFO.L3_MISS.NO_SNOOP_NEEDED",
         "Counter": "0,1,2,3",
         "CounterHTOff": "0,1,2,3",
+        "Deprecated": "1",
         "EventCode": "0xB7, 0xBB",
-        "EventName": "OCR.PF_L2_RFO.L3_MISS_REMOTE_HOP1_DRAM.HITM_OTHER_CORE",
+        "EventName": "OFFCORE_RESPONSE.ALL_RFO.L3_MISS.NO_SNOOP_NEEDED",
         "MSRIndex": "0x1a6,0x1a7",
-        "MSRValue": "0x1010000020",
+        "MSRValue": "0x013C000122",
         "Offcore": "1",
         "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
         "SampleAfterValue": "100003",
         "UMask": "0x1"
     },
     {
-        "BriefDescription": "OCR.ALL_PF_RFO.L3_MISS_REMOTE_HOP1_DRAM.NO_SNOOP_NEEDED  OCR.ALL_PF_RFO.L3_MISS_REMOTE_HOP1_DRAM.NO_SNOOP_NEEDED",
+        "BriefDescription": "This event is deprecated. Refer to new event OCR.ALL_RFO.L3_MISS.REMOTE_HITM",
         "Counter": "0,1,2,3",
         "CounterHTOff": "0,1,2,3",
+        "Deprecated": "1",
         "EventCode": "0xB7, 0xBB",
-        "EventName": "OCR.ALL_PF_RFO.L3_MISS_REMOTE_HOP1_DRAM.NO_SNOOP_NEEDED",
+        "EventName": "OFFCORE_RESPONSE.ALL_RFO.L3_MISS.REMOTE_HITM",
         "MSRIndex": "0x1a6,0x1a7",
-        "MSRValue": "0x0110000120",
+        "MSRValue": "0x103FC00122",
         "Offcore": "1",
         "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
         "SampleAfterValue": "100003",
         "UMask": "0x1"
     },
     {
-        "BriefDescription": "Counts any other requests OCR.OTHER.L3_MISS.HITM_OTHER_CORE OCR.OTHER.L3_MISS.HITM_OTHER_CORE",
+        "BriefDescription": "This event is deprecated. Refer to new event OCR.ALL_RFO.L3_MISS.REMOTE_HIT_FORWARD",
         "Counter": "0,1,2,3",
         "CounterHTOff": "0,1,2,3",
+        "Deprecated": "1",
         "EventCode": "0xB7, 0xBB",
-        "EventName": "OCR.OTHER.L3_MISS.HITM_OTHER_CORE",
+        "EventName": "OFFCORE_RESPONSE.ALL_RFO.L3_MISS.REMOTE_HIT_FORWARD",
         "MSRIndex": "0x1a6,0x1a7",
-        "MSRValue": "0x103C008000",
+        "MSRValue": "0x083FC00122",
         "Offcore": "1",
         "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
         "SampleAfterValue": "100003",
         "UMask": "0x1"
     },
     {
-        "BriefDescription": "This event is deprecated. Refer to new event OCR.DEMAND_CODE_RD.L3_MISS_LOCAL_DRAM.HIT_OTHER_CORE_FWD",
+        "BriefDescription": "This event is deprecated. Refer to new event OCR.ALL_RFO.L3_MISS.SNOOP_MISS",
         "Counter": "0,1,2,3",
         "CounterHTOff": "0,1,2,3",
         "Deprecated": "1",
         "EventCode": "0xB7, 0xBB",
-        "EventName": "OFFCORE_RESPONSE.DEMAND_CODE_RD.L3_MISS_LOCAL_DRAM.HIT_OTHER_CORE_FWD",
+        "EventName": "OFFCORE_RESPONSE.ALL_RFO.L3_MISS.SNOOP_MISS",
         "MSRIndex": "0x1a6,0x1a7",
-        "MSRValue": "0x0804000004",
+        "MSRValue": "0x023C000122",
         "Offcore": "1",
         "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
         "SampleAfterValue": "100003",
         "UMask": "0x1"
     },
     {
-        "BriefDescription": "This event is deprecated. Refer to new event OCR.OTHER.L3_MISS_LOCAL_DRAM.NO_SNOOP_NEEDED",
+        "BriefDescription": "This event is deprecated. Refer to new event OCR.ALL_RFO.L3_MISS.SNOOP_NONE",
         "Counter": "0,1,2,3",
         "CounterHTOff": "0,1,2,3",
         "Deprecated": "1",
         "EventCode": "0xB7, 0xBB",
-        "EventName": "OFFCORE_RESPONSE.OTHER.L3_MISS_LOCAL_DRAM.NO_SNOOP_NEEDED",
+        "EventName": "OFFCORE_RESPONSE.ALL_RFO.L3_MISS.SNOOP_NONE",
         "MSRIndex": "0x1a6,0x1a7",
-        "MSRValue": "0x0104008000",
+        "MSRValue": "0x00BC000122",
         "Offcore": "1",
         "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
         "SampleAfterValue": "100003",
         "UMask": "0x1"
     },
     {
-        "BriefDescription": "Counts randomly selected loads when the latency from first dispatch to completion is greater than 256 cycles.",
+        "BriefDescription": "This event is deprecated. Refer to new event OCR.ALL_RFO.L3_MISS_LOCAL_DRAM.ANY_SNOOP",
         "Counter": "0,1,2,3",
         "CounterHTOff": "0,1,2,3",
-        "Data_LA": "1",
-        "EventCode": "0xcd",
-        "EventName": "MEM_TRANS_RETIRED.LOAD_LATENCY_GT_256",
-        "MSRIndex": "0x3F6",
-        "MSRValue": "0x100",
-        "PEBS": "2",
-        "PublicDescription": "Counts randomly selected loads when the latency from first dispatch to completion is greater than 256 cycles.  Reported latency may be longer than just the memory latency.",
-        "SampleAfterValue": "503",
-        "TakenAlone": "1",
+        "Deprecated": "1",
+        "EventCode": "0xB7, 0xBB",
+        "EventName": "OFFCORE_RESPONSE.ALL_RFO.L3_MISS_LOCAL_DRAM.ANY_SNOOP",
+        "MSRIndex": "0x1a6,0x1a7",
+        "MSRValue": "0x3F84000122",
+        "Offcore": "1",
+        "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
+        "SampleAfterValue": "100003",
         "UMask": "0x1"
     },
     {
-        "BriefDescription": "Counts all demand code reads",
+        "BriefDescription": "This event is deprecated. Refer to new event OCR.ALL_RFO.L3_MISS_LOCAL_DRAM.HITM_OTHER_CORE",
         "Counter": "0,1,2,3",
         "CounterHTOff": "0,1,2,3",
+        "Deprecated": "1",
         "EventCode": "0xB7, 0xBB",
-        "EventName": "OCR.DEMAND_CODE_RD.L3_MISS_LOCAL_DRAM.SNOOP_MISS",
+        "EventName": "OFFCORE_RESPONSE.ALL_RFO.L3_MISS_LOCAL_DRAM.HITM_OTHER_CORE",
         "MSRIndex": "0x1a6,0x1a7",
-        "MSRValue": "0x0204000004",
+        "MSRValue": "0x1004000122",
         "Offcore": "1",
         "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
         "SampleAfterValue": "100003",
         "UMask": "0x1"
     },
     {
-        "BriefDescription": "This event is deprecated. Refer to new event OCR.PF_L3_RFO.L3_MISS_REMOTE_HOP1_DRAM.HIT_OTHER_CORE_FWD",
+        "BriefDescription": "This event is deprecated. Refer to new event OCR.ALL_RFO.L3_MISS_LOCAL_DRAM.HIT_OTHER_CORE_FWD",
         "Counter": "0,1,2,3",
         "CounterHTOff": "0,1,2,3",
         "Deprecated": "1",
         "EventCode": "0xB7, 0xBB",
-        "EventName": "OFFCORE_RESPONSE.PF_L3_RFO.L3_MISS_REMOTE_HOP1_DRAM.HIT_OTHER_CORE_FWD",
+        "EventName": "OFFCORE_RESPONSE.ALL_RFO.L3_MISS_LOCAL_DRAM.HIT_OTHER_CORE_FWD",
         "MSRIndex": "0x1a6,0x1a7",
-        "MSRValue": "0x0810000100",
+        "MSRValue": "0x0804000122",
         "Offcore": "1",
         "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
         "SampleAfterValue": "100003",
         "UMask": "0x1"
     },
     {
-        "BriefDescription": "Counts all prefetch (that bring data to LLC only) RFOs",
+        "BriefDescription": "This event is deprecated. Refer to new event OCR.ALL_RFO.L3_MISS_LOCAL_DRAM.HIT_OTHER_CORE_NO_FWD",
         "Counter": "0,1,2,3",
         "CounterHTOff": "0,1,2,3",
+        "Deprecated": "1",
         "EventCode": "0xB7, 0xBB",
-        "EventName": "OCR.PF_L3_RFO.L3_MISS_REMOTE_HOP1_DRAM.SNOOP_MISS",
+        "EventName": "OFFCORE_RESPONSE.ALL_RFO.L3_MISS_LOCAL_DRAM.HIT_OTHER_CORE_NO_FWD",
         "MSRIndex": "0x1a6,0x1a7",
-        "MSRValue": "0x0210000100",
+        "MSRValue": "0x0404000122",
         "Offcore": "1",
         "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
         "SampleAfterValue": "100003",
         "UMask": "0x1"
     },
     {
-        "BriefDescription": "This event is deprecated. Refer to new event OCR.DEMAND_RFO.L3_MISS_LOCAL_DRAM.SNOOP_NONE",
+        "BriefDescription": "This event is deprecated. Refer to new event OCR.ALL_RFO.L3_MISS_LOCAL_DRAM.NO_SNOOP_NEEDED",
         "Counter": "0,1,2,3",
         "CounterHTOff": "0,1,2,3",
         "Deprecated": "1",
         "EventCode": "0xB7, 0xBB",
-        "EventName": "OFFCORE_RESPONSE.DEMAND_RFO.L3_MISS_LOCAL_DRAM.SNOOP_NONE",
+        "EventName": "OFFCORE_RESPONSE.ALL_RFO.L3_MISS_LOCAL_DRAM.NO_SNOOP_NEEDED",
         "MSRIndex": "0x1a6,0x1a7",
-        "MSRValue": "0x0084000002",
+        "MSRValue": "0x0104000122",
         "Offcore": "1",
         "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
         "SampleAfterValue": "100003",
         "UMask": "0x1"
     },
     {
-        "BriefDescription": "This event is deprecated. Refer to new event OCR.DEMAND_CODE_RD.L3_MISS_REMOTE_DRAM.SNOOP_MISS_OR_NO_FWD",
+        "BriefDescription": "This event is deprecated. Refer to new event OCR.ALL_RFO.L3_MISS_LOCAL_DRAM.SNOOP_MISS",
         "Counter": "0,1,2,3",
         "CounterHTOff": "0,1,2,3",
         "Deprecated": "1",
         "EventCode": "0xB7, 0xBB",
-        "EventName": "OFFCORE_RESPONSE.DEMAND_CODE_RD.L3_MISS_REMOTE_DRAM.SNOOP_MISS_OR_NO_FWD",
+        "EventName": "OFFCORE_RESPONSE.ALL_RFO.L3_MISS_LOCAL_DRAM.SNOOP_MISS",
         "MSRIndex": "0x1a6,0x1a7",
-        "MSRValue": "0x063B800004",
+        "MSRValue": "0x0204000122",
         "Offcore": "1",
         "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
         "SampleAfterValue": "100003",
         "UMask": "0x1"
     },
     {
-        "BriefDescription": "This event is deprecated. Refer to new event OCR.PF_L3_DATA_RD.L3_MISS.NO_SNOOP_NEEDED",
+        "BriefDescription": "This event is deprecated. Refer to new event OCR.ALL_RFO.L3_MISS_LOCAL_DRAM.SNOOP_MISS_OR_NO_FWD",
         "Counter": "0,1,2,3",
         "CounterHTOff": "0,1,2,3",
         "Deprecated": "1",
         "EventCode": "0xB7, 0xBB",
-        "EventName": "OFFCORE_RESPONSE.PF_L3_DATA_RD.L3_MISS.NO_SNOOP_NEEDED",
+        "EventName": "OFFCORE_RESPONSE.ALL_RFO.L3_MISS_LOCAL_DRAM.SNOOP_MISS_OR_NO_FWD",
         "MSRIndex": "0x1a6,0x1a7",
-        "MSRValue": "0x013C000080",
+        "MSRValue": "0x0604000122",
         "Offcore": "1",
         "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
         "SampleAfterValue": "100003",
         "UMask": "0x1"
     },
     {
-        "BriefDescription": "Counts randomly selected loads when the latency from first dispatch to completion is greater than 16 cycles.",
+        "BriefDescription": "This event is deprecated. Refer to new event OCR.ALL_RFO.L3_MISS_LOCAL_DRAM.SNOOP_NONE",
         "Counter": "0,1,2,3",
         "CounterHTOff": "0,1,2,3",
-        "Data_LA": "1",
-        "EventCode": "0xcd",
-        "EventName": "MEM_TRANS_RETIRED.LOAD_LATENCY_GT_16",
-        "MSRIndex": "0x3F6",
-        "MSRValue": "0x10",
-        "PEBS": "2",
-        "PublicDescription": "Counts randomly selected loads when the latency from first dispatch to completion is greater than 16 cycles.  Reported latency may be longer than just the memory latency.",
-        "SampleAfterValue": "20011",
-        "TakenAlone": "1",
+        "Deprecated": "1",
+        "EventCode": "0xB7, 0xBB",
+        "EventName": "OFFCORE_RESPONSE.ALL_RFO.L3_MISS_LOCAL_DRAM.SNOOP_NONE",
+        "MSRIndex": "0x1a6,0x1a7",
+        "MSRValue": "0x0084000122",
+        "Offcore": "1",
+        "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
+        "SampleAfterValue": "100003",
         "UMask": "0x1"
     },
     {
-        "BriefDescription": "This event is deprecated. Refer to new event OCR.PF_L1D_AND_SW.L3_MISS.HIT_OTHER_CORE_FWD",
+        "BriefDescription": "This event is deprecated. Refer to new event OCR.ALL_RFO.L3_MISS_REMOTE_DRAM.SNOOP_MISS_OR_NO_FWD",
         "Counter": "0,1,2,3",
         "CounterHTOff": "0,1,2,3",
         "Deprecated": "1",
         "EventCode": "0xB7, 0xBB",
-        "EventName": "OFFCORE_RESPONSE.PF_L1D_AND_SW.L3_MISS.HIT_OTHER_CORE_FWD",
+        "EventName": "OFFCORE_RESPONSE.ALL_RFO.L3_MISS_REMOTE_DRAM.SNOOP_MISS_OR_NO_FWD",
         "MSRIndex": "0x1a6,0x1a7",
-        "MSRValue": "0x083C000400",
+        "MSRValue": "0x063B800122",
         "Offcore": "1",
         "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
         "SampleAfterValue": "100003",
         "UMask": "0x1"
     },
     {
-        "BriefDescription": "Counts all prefetch (that bring data to L2) RFOs",
+        "BriefDescription": "This event is deprecated. Refer to new event OCR.ALL_RFO.L3_MISS_REMOTE_HOP1_DRAM.ANY_SNOOP",
         "Counter": "0,1,2,3",
         "CounterHTOff": "0,1,2,3",
+        "Deprecated": "1",
         "EventCode": "0xB7, 0xBB",
-        "EventName": "OCR.PF_L2_RFO.L3_MISS_LOCAL_DRAM.SNOOP_NONE",
+        "EventName": "OFFCORE_RESPONSE.ALL_RFO.L3_MISS_REMOTE_HOP1_DRAM.ANY_SNOOP",
         "MSRIndex": "0x1a6,0x1a7",
-        "MSRValue": "0x0084000020",
+        "MSRValue": "0x3F90000122",
         "Offcore": "1",
         "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
         "SampleAfterValue": "100003",
         "UMask": "0x1"
     },
     {
-        "BriefDescription": "Number of times an HLE transactional execution aborted due to an unsupported read alignment from the elision buffer.",
-        "Counter": "0,1,2,3",
-        "CounterHTOff": "0,1,2,3,4,5,6,7",
-        "EventCode": "0x54",
-        "EventName": "TX_MEM.ABORT_HLE_ELISION_BUFFER_UNSUPPORTED_ALIGNMENT",
-        "PublicDescription": "Number of times a TSX Abort was triggered due to attempting an unsupported alignment from Lock Buffer.",
-        "SampleAfterValue": "2000003",
-        "UMask": "0x20"
-    },
-    {
-        "BriefDescription": "Counts all prefetch (that bring data to LLC only) data reads OCR.PF_L3_DATA_RD.L3_MISS.SNOOP_MISS",
+        "BriefDescription": "This event is deprecated. Refer to new event OCR.ALL_RFO.L3_MISS_REMOTE_HOP1_DRAM.HITM_OTHER_CORE",
         "Counter": "0,1,2,3",
         "CounterHTOff": "0,1,2,3",
+        "Deprecated": "1",
         "EventCode": "0xB7, 0xBB",
-        "EventName": "OCR.PF_L3_DATA_RD.L3_MISS.SNOOP_MISS",
+        "EventName": "OFFCORE_RESPONSE.ALL_RFO.L3_MISS_REMOTE_HOP1_DRAM.HITM_OTHER_CORE",
         "MSRIndex": "0x1a6,0x1a7",
-        "MSRValue": "0x023C000080",
+        "MSRValue": "0x1010000122",
         "Offcore": "1",
         "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
         "SampleAfterValue": "100003",
         "UMask": "0x1"
     },
     {
-        "BriefDescription": "This event is deprecated. Refer to new event OCR.ALL_DATA_RD.L3_MISS_REMOTE_HOP1_DRAM.SNOOP_NONE",
+        "BriefDescription": "This event is deprecated. Refer to new event OCR.ALL_RFO.L3_MISS_REMOTE_HOP1_DRAM.HIT_OTHER_CORE_FWD",
         "Counter": "0,1,2,3",
         "CounterHTOff": "0,1,2,3",
         "Deprecated": "1",
         "EventCode": "0xB7, 0xBB",
-        "EventName": "OFFCORE_RESPONSE.ALL_DATA_RD.L3_MISS_REMOTE_HOP1_DRAM.SNOOP_NONE",
+        "EventName": "OFFCORE_RESPONSE.ALL_RFO.L3_MISS_REMOTE_HOP1_DRAM.HIT_OTHER_CORE_FWD",
         "MSRIndex": "0x1a6,0x1a7",
-        "MSRValue": "0x0090000491",
+        "MSRValue": "0x0810000122",
         "Offcore": "1",
         "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
         "SampleAfterValue": "100003",
         "UMask": "0x1"
     },
     {
-        "BriefDescription": "This event is deprecated. Refer to new event OCR.PF_L1D_AND_SW.L3_MISS_LOCAL_DRAM.ANY_SNOOP",
+        "BriefDescription": "This event is deprecated. Refer to new event OCR.ALL_RFO.L3_MISS_REMOTE_HOP1_DRAM.HIT_OTHER_CORE_NO_FWD",
         "Counter": "0,1,2,3",
         "CounterHTOff": "0,1,2,3",
         "Deprecated": "1",
         "EventCode": "0xB7, 0xBB",
-        "EventName": "OFFCORE_RESPONSE.PF_L1D_AND_SW.L3_MISS_LOCAL_DRAM.ANY_SNOOP",
+        "EventName": "OFFCORE_RESPONSE.ALL_RFO.L3_MISS_REMOTE_HOP1_DRAM.HIT_OTHER_CORE_NO_FWD",
         "MSRIndex": "0x1a6,0x1a7",
-        "MSRValue": "0x3F84000400",
+        "MSRValue": "0x0410000122",
         "Offcore": "1",
         "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
         "SampleAfterValue": "100003",
         "UMask": "0x1"
     },
     {
-        "BriefDescription": "Counts demand data reads",
+        "BriefDescription": "This event is deprecated. Refer to new event OCR.ALL_RFO.L3_MISS_REMOTE_HOP1_DRAM.NO_SNOOP_NEEDED",
         "Counter": "0,1,2,3",
         "CounterHTOff": "0,1,2,3",
+        "Deprecated": "1",
         "EventCode": "0xB7, 0xBB",
-        "EventName": "OCR.DEMAND_DATA_RD.L3_MISS_REMOTE_HOP1_DRAM.SNOOP_NONE",
+        "EventName": "OFFCORE_RESPONSE.ALL_RFO.L3_MISS_REMOTE_HOP1_DRAM.NO_SNOOP_NEEDED",
         "MSRIndex": "0x1a6,0x1a7",
-        "MSRValue": "0x0090000001",
+        "MSRValue": "0x0110000122",
         "Offcore": "1",
         "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
         "SampleAfterValue": "100003",
         "UMask": "0x1"
     },
     {
-        "BriefDescription": "Counts prefetch (that bring data to L2) data reads OCR.PF_L2_DATA_RD.L3_MISS.SNOOP_NONE",
+        "BriefDescription": "This event is deprecated. Refer to new event OCR.ALL_RFO.L3_MISS_REMOTE_HOP1_DRAM.SNOOP_MISS",
         "Counter": "0,1,2,3",
         "CounterHTOff": "0,1,2,3",
+        "Deprecated": "1",
         "EventCode": "0xB7, 0xBB",
-        "EventName": "OCR.PF_L2_DATA_RD.L3_MISS.SNOOP_NONE",
+        "EventName": "OFFCORE_RESPONSE.ALL_RFO.L3_MISS_REMOTE_HOP1_DRAM.SNOOP_MISS",
         "MSRIndex": "0x1a6,0x1a7",
-        "MSRValue": "0x00BC000010",
+        "MSRValue": "0x0210000122",
         "Offcore": "1",
         "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
         "SampleAfterValue": "100003",
         "UMask": "0x1"
     },
     {
-        "BriefDescription": "Counts all demand data writes (RFOs)  OCR.DEMAND_RFO.L3_MISS_LOCAL_DRAM.HITM_OTHER_CORE",
+        "BriefDescription": "This event is deprecated. Refer to new event OCR.ALL_RFO.L3_MISS_REMOTE_HOP1_DRAM.SNOOP_NONE",
         "Counter": "0,1,2,3",
         "CounterHTOff": "0,1,2,3",
+        "Deprecated": "1",
         "EventCode": "0xB7, 0xBB",
-        "EventName": "OCR.DEMAND_RFO.L3_MISS_LOCAL_DRAM.HITM_OTHER_CORE",
+        "EventName": "OFFCORE_RESPONSE.ALL_RFO.L3_MISS_REMOTE_HOP1_DRAM.SNOOP_NONE",
         "MSRIndex": "0x1a6,0x1a7",
-        "MSRValue": "0x1004000002",
+        "MSRValue": "0x0090000122",
         "Offcore": "1",
         "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
         "SampleAfterValue": "100003",
         "UMask": "0x1"
     },
     {
-        "BriefDescription": "This event is deprecated. Refer to new event OCR.PF_L1D_AND_SW.L3_MISS_REMOTE_DRAM.SNOOP_MISS_OR_NO_FWD",
+        "BriefDescription": "This event is deprecated. Refer to new event OCR.DEMAND_CODE_RD.L3_MISS.ANY_SNOOP",
         "Counter": "0,1,2,3",
         "CounterHTOff": "0,1,2,3",
         "Deprecated": "1",
         "EventCode": "0xB7, 0xBB",
-        "EventName": "OFFCORE_RESPONSE.PF_L1D_AND_SW.L3_MISS_REMOTE_DRAM.SNOOP_MISS_OR_NO_FWD",
+        "EventName": "OFFCORE_RESPONSE.DEMAND_CODE_RD.L3_MISS.ANY_SNOOP",
         "MSRIndex": "0x1a6,0x1a7",
-        "MSRValue": "0x063B800400",
+        "MSRValue": "0x3FBC000004",
         "Offcore": "1",
         "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
         "SampleAfterValue": "100003",
         "UMask": "0x1"
     },
     {
-        "BriefDescription": "Number of times a transactional abort was signaled due to a data capacity limitation for transactional reads or writes.",
-        "Counter": "0,1,2,3",
-        "CounterHTOff": "0,1,2,3,4,5,6,7",
-        "EventCode": "0x54",
-        "EventName": "TX_MEM.ABORT_CAPACITY",
-        "SampleAfterValue": "2000003",
-        "UMask": "0x2"
-    },
-    {
-        "BriefDescription": "Number of times an HLE execution aborted due to incompatible memory type",
-        "Counter": "0,1,2,3",
-        "CounterHTOff": "0,1,2,3,4,5,6,7",
-        "EventCode": "0xC8",
-        "EventName": "HLE_RETIRED.ABORTED_MEMTYPE",
-        "PublicDescription": "Number of times an HLE execution aborted due to incompatible memory type.",
-        "SampleAfterValue": "2000003",
-        "UMask": "0x40"
-    },
-    {
-        "BriefDescription": "Number of times an RTM execution started.",
-        "Counter": "0,1,2,3",
-        "CounterHTOff": "0,1,2,3,4,5,6,7",
-        "EventCode": "0xC9",
-        "EventName": "RTM_RETIRED.START",
-        "PublicDescription": "Number of times we entered an RTM region. Does not count nested transactions.",
-        "SampleAfterValue": "2000003",
-        "UMask": "0x1"
-    },
-    {
-        "BriefDescription": "OCR.ALL_RFO.L3_MISS_LOCAL_DRAM.HIT_OTHER_CORE_FWD  OCR.ALL_RFO.L3_MISS_LOCAL_DRAM.HIT_OTHER_CORE_FWD",
+        "BriefDescription": "This event is deprecated. Refer to new event OCR.DEMAND_CODE_RD.L3_MISS.HITM_OTHER_CORE",
         "Counter": "0,1,2,3",
         "CounterHTOff": "0,1,2,3",
+        "Deprecated": "1",
         "EventCode": "0xB7, 0xBB",
-        "EventName": "OCR.ALL_RFO.L3_MISS_LOCAL_DRAM.HIT_OTHER_CORE_FWD",
+        "EventName": "OFFCORE_RESPONSE.DEMAND_CODE_RD.L3_MISS.HITM_OTHER_CORE",
         "MSRIndex": "0x1a6,0x1a7",
-        "MSRValue": "0x0804000122",
+        "MSRValue": "0x103C000004",
         "Offcore": "1",
         "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
         "SampleAfterValue": "100003",
         "UMask": "0x1"
     },
     {
-        "BriefDescription": "OCR.ALL_PF_DATA_RD.L3_MISS_REMOTE_HOP1_DRAM.SNOOP_MISS",
+        "BriefDescription": "This event is deprecated. Refer to new event OCR.DEMAND_CODE_RD.L3_MISS.HIT_OTHER_CORE_FWD",
         "Counter": "0,1,2,3",
         "CounterHTOff": "0,1,2,3",
+        "Deprecated": "1",
         "EventCode": "0xB7, 0xBB",
-        "EventName": "OCR.ALL_PF_DATA_RD.L3_MISS_REMOTE_HOP1_DRAM.SNOOP_MISS",
+        "EventName": "OFFCORE_RESPONSE.DEMAND_CODE_RD.L3_MISS.HIT_OTHER_CORE_FWD",
         "MSRIndex": "0x1a6,0x1a7",
-        "MSRValue": "0x0210000490",
+        "MSRValue": "0x083C000004",
         "Offcore": "1",
         "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
         "SampleAfterValue": "100003",
         "UMask": "0x1"
     },
     {
-        "BriefDescription": "Counts any other requests  OCR.OTHER.L3_MISS_LOCAL_DRAM.HIT_OTHER_CORE_NO_FWD",
+        "BriefDescription": "This event is deprecated. Refer to new event OCR.DEMAND_CODE_RD.L3_MISS.HIT_OTHER_CORE_NO_FWD",
         "Counter": "0,1,2,3",
         "CounterHTOff": "0,1,2,3",
+        "Deprecated": "1",
         "EventCode": "0xB7, 0xBB",
-        "EventName": "OCR.OTHER.L3_MISS_LOCAL_DRAM.HIT_OTHER_CORE_NO_FWD",
+        "EventName": "OFFCORE_RESPONSE.DEMAND_CODE_RD.L3_MISS.HIT_OTHER_CORE_NO_FWD",
         "MSRIndex": "0x1a6,0x1a7",
-        "MSRValue": "0x0404008000",
+        "MSRValue": "0x043C000004",
         "Offcore": "1",
         "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
         "SampleAfterValue": "100003",
         "UMask": "0x1"
     },
     {
-        "BriefDescription": "This event is deprecated. Refer to new event OCR.PF_L3_RFO.L3_MISS_REMOTE_HOP1_DRAM.SNOOP_MISS",
+        "BriefDescription": "This event is deprecated. Refer to new event OCR.DEMAND_CODE_RD.L3_MISS.NO_SNOOP_NEEDED",
         "Counter": "0,1,2,3",
         "CounterHTOff": "0,1,2,3",
         "Deprecated": "1",
         "EventCode": "0xB7, 0xBB",
-        "EventName": "OFFCORE_RESPONSE.PF_L3_RFO.L3_MISS_REMOTE_HOP1_DRAM.SNOOP_MISS",
+        "EventName": "OFFCORE_RESPONSE.DEMAND_CODE_RD.L3_MISS.NO_SNOOP_NEEDED",
         "MSRIndex": "0x1a6,0x1a7",
-        "MSRValue": "0x0210000100",
+        "MSRValue": "0x013C000004",
         "Offcore": "1",
         "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
         "SampleAfterValue": "100003",
         "UMask": "0x1"
     },
     {
-        "BriefDescription": "Counts demand data reads  OCR.DEMAND_DATA_RD.L3_MISS_REMOTE_HOP1_DRAM.HITM_OTHER_CORE",
+        "BriefDescription": "This event is deprecated. Refer to new event OCR.DEMAND_CODE_RD.L3_MISS.REMOTE_HITM",
         "Counter": "0,1,2,3",
         "CounterHTOff": "0,1,2,3",
+        "Deprecated": "1",
         "EventCode": "0xB7, 0xBB",
-        "EventName": "OCR.DEMAND_DATA_RD.L3_MISS_REMOTE_HOP1_DRAM.HITM_OTHER_CORE",
+        "EventName": "OFFCORE_RESPONSE.DEMAND_CODE_RD.L3_MISS.REMOTE_HITM",
         "MSRIndex": "0x1a6,0x1a7",
-        "MSRValue": "0x1010000001",
+        "MSRValue": "0x103FC00004",
         "Offcore": "1",
         "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
         "SampleAfterValue": "100003",
         "UMask": "0x1"
     },
     {
-        "BriefDescription": "This event is deprecated. Refer to new event OCR.ALL_PF_DATA_RD.L3_MISS.SNOOP_NONE",
+        "BriefDescription": "This event is deprecated. Refer to new event OCR.DEMAND_CODE_RD.L3_MISS.REMOTE_HIT_FORWARD",
         "Counter": "0,1,2,3",
         "CounterHTOff": "0,1,2,3",
         "Deprecated": "1",
         "EventCode": "0xB7, 0xBB",
-        "EventName": "OFFCORE_RESPONSE.ALL_PF_DATA_RD.L3_MISS.SNOOP_NONE",
+        "EventName": "OFFCORE_RESPONSE.DEMAND_CODE_RD.L3_MISS.REMOTE_HIT_FORWARD",
         "MSRIndex": "0x1a6,0x1a7",
-        "MSRValue": "0x00BC000490",
+        "MSRValue": "0x083FC00004",
         "Offcore": "1",
         "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
         "SampleAfterValue": "100003",
         "UMask": "0x1"
     },
     {
-        "BriefDescription": "OCR.ALL_PF_DATA_RD.L3_MISS_REMOTE_HOP1_DRAM.HITM_OTHER_CORE  OCR.ALL_PF_DATA_RD.L3_MISS_REMOTE_HOP1_DRAM.HITM_OTHER_CORE",
+        "BriefDescription": "This event is deprecated. Refer to new event OCR.DEMAND_CODE_RD.L3_MISS.SNOOP_MISS",
         "Counter": "0,1,2,3",
         "CounterHTOff": "0,1,2,3",
+        "Deprecated": "1",
         "EventCode": "0xB7, 0xBB",
-        "EventName": "OCR.ALL_PF_DATA_RD.L3_MISS_REMOTE_HOP1_DRAM.HITM_OTHER_CORE",
+        "EventName": "OFFCORE_RESPONSE.DEMAND_CODE_RD.L3_MISS.SNOOP_MISS",
         "MSRIndex": "0x1a6,0x1a7",
-        "MSRValue": "0x1010000490",
+        "MSRValue": "0x023C000004",
         "Offcore": "1",
         "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
         "SampleAfterValue": "100003",
         "UMask": "0x1"
     },
     {
-        "BriefDescription": "This event is deprecated. Refer to new event OCR.PF_L3_DATA_RD.L3_MISS_LOCAL_DRAM.SNOOP_NONE",
+        "BriefDescription": "This event is deprecated. Refer to new event OCR.DEMAND_CODE_RD.L3_MISS.SNOOP_NONE",
         "Counter": "0,1,2,3",
         "CounterHTOff": "0,1,2,3",
         "Deprecated": "1",
         "EventCode": "0xB7, 0xBB",
-        "EventName": "OFFCORE_RESPONSE.PF_L3_DATA_RD.L3_MISS_LOCAL_DRAM.SNOOP_NONE",
+        "EventName": "OFFCORE_RESPONSE.DEMAND_CODE_RD.L3_MISS.SNOOP_NONE",
         "MSRIndex": "0x1a6,0x1a7",
-        "MSRValue": "0x0084000080",
+        "MSRValue": "0x00BC000004",
         "Offcore": "1",
         "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
         "SampleAfterValue": "100003",
         "UMask": "0x1"
     },
     {
-        "BriefDescription": "Counts randomly selected loads when the latency from first dispatch to completion is greater than 512 cycles.",
+        "BriefDescription": "This event is deprecated. Refer to new event OCR.DEMAND_CODE_RD.L3_MISS_LOCAL_DRAM.ANY_SNOOP",
         "Counter": "0,1,2,3",
         "CounterHTOff": "0,1,2,3",
-        "Data_LA": "1",
-        "EventCode": "0xcd",
-        "EventName": "MEM_TRANS_RETIRED.LOAD_LATENCY_GT_512",
-        "MSRIndex": "0x3F6",
-        "MSRValue": "0x200",
-        "PEBS": "2",
-        "PublicDescription": "Counts randomly selected loads when the latency from first dispatch to completion is greater than 512 cycles.  Reported latency may be longer than just the memory latency.",
-        "SampleAfterValue": "101",
-        "TakenAlone": "1",
+        "Deprecated": "1",
+        "EventCode": "0xB7, 0xBB",
+        "EventName": "OFFCORE_RESPONSE.DEMAND_CODE_RD.L3_MISS_LOCAL_DRAM.ANY_SNOOP",
+        "MSRIndex": "0x1a6,0x1a7",
+        "MSRValue": "0x3F84000004",
+        "Offcore": "1",
+        "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
+        "SampleAfterValue": "100003",
         "UMask": "0x1"
     },
     {
-        "BriefDescription": "Number of times an HLE execution aborted due to HLE-unfriendly instructions and certain unfriendly events (such as AD assists etc.).",
-        "Counter": "0,1,2,3",
-        "CounterHTOff": "0,1,2,3,4,5,6,7",
-        "EventCode": "0xC8",
-        "EventName": "HLE_RETIRED.ABORTED_UNFRIENDLY",
-        "SampleAfterValue": "2000003",
-        "UMask": "0x20"
-    },
-    {
-        "BriefDescription": "Counts prefetch (that bring data to L2) data reads  OCR.PF_L2_DATA_RD.L3_MISS_LOCAL_DRAM.HIT_OTHER_CORE_NO_FWD",
+        "BriefDescription": "This event is deprecated. Refer to new event OCR.DEMAND_CODE_RD.L3_MISS_LOCAL_DRAM.HITM_OTHER_CORE",
         "Counter": "0,1,2,3",
         "CounterHTOff": "0,1,2,3",
+        "Deprecated": "1",
         "EventCode": "0xB7, 0xBB",
-        "EventName": "OCR.PF_L2_DATA_RD.L3_MISS_LOCAL_DRAM.HIT_OTHER_CORE_NO_FWD",
+        "EventName": "OFFCORE_RESPONSE.DEMAND_CODE_RD.L3_MISS_LOCAL_DRAM.HITM_OTHER_CORE",
         "MSRIndex": "0x1a6,0x1a7",
-        "MSRValue": "0x0404000010",
+        "MSRValue": "0x1004000004",
         "Offcore": "1",
         "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
         "SampleAfterValue": "100003",
         "UMask": "0x1"
     },
     {
-        "BriefDescription": "This event is deprecated. Refer to new event OCR.PF_L2_RFO.L3_MISS.HITM_OTHER_CORE",
+        "BriefDescription": "This event is deprecated. Refer to new event OCR.DEMAND_CODE_RD.L3_MISS_LOCAL_DRAM.HIT_OTHER_CORE_FWD",
         "Counter": "0,1,2,3",
         "CounterHTOff": "0,1,2,3",
         "Deprecated": "1",
         "EventCode": "0xB7, 0xBB",
-        "EventName": "OFFCORE_RESPONSE.PF_L2_RFO.L3_MISS.HITM_OTHER_CORE",
+        "EventName": "OFFCORE_RESPONSE.DEMAND_CODE_RD.L3_MISS_LOCAL_DRAM.HIT_OTHER_CORE_FWD",
         "MSRIndex": "0x1a6,0x1a7",
-        "MSRValue": "0x103C000020",
+        "MSRValue": "0x0804000004",
         "Offcore": "1",
         "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
         "SampleAfterValue": "100003",
         "UMask": "0x1"
     },
     {
-        "BriefDescription": "This event is deprecated. Refer to new event OCR.PF_L3_DATA_RD.L3_MISS_REMOTE_HOP1_DRAM.HIT_OTHER_CORE_FWD",
+        "BriefDescription": "This event is deprecated. Refer to new event OCR.DEMAND_CODE_RD.L3_MISS_LOCAL_DRAM.HIT_OTHER_CORE_NO_FWD",
         "Counter": "0,1,2,3",
         "CounterHTOff": "0,1,2,3",
         "Deprecated": "1",
         "EventCode": "0xB7, 0xBB",
-        "EventName": "OFFCORE_RESPONSE.PF_L3_DATA_RD.L3_MISS_REMOTE_HOP1_DRAM.HIT_OTHER_CORE_FWD",
+        "EventName": "OFFCORE_RESPONSE.DEMAND_CODE_RD.L3_MISS_LOCAL_DRAM.HIT_OTHER_CORE_NO_FWD",
         "MSRIndex": "0x1a6,0x1a7",
-        "MSRValue": "0x0810000080",
+        "MSRValue": "0x0404000004",
         "Offcore": "1",
         "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
         "SampleAfterValue": "100003",
         "UMask": "0x1"
     },
     {
-        "BriefDescription": "This event is deprecated. Refer to new event OCR.ALL_PF_RFO.L3_MISS_REMOTE_HOP1_DRAM.HITM_OTHER_CORE",
+        "BriefDescription": "This event is deprecated. Refer to new event OCR.DEMAND_CODE_RD.L3_MISS_LOCAL_DRAM.NO_SNOOP_NEEDED",
         "Counter": "0,1,2,3",
         "CounterHTOff": "0,1,2,3",
         "Deprecated": "1",
         "EventCode": "0xB7, 0xBB",
-        "EventName": "OFFCORE_RESPONSE.ALL_PF_RFO.L3_MISS_REMOTE_HOP1_DRAM.HITM_OTHER_CORE",
+        "EventName": "OFFCORE_RESPONSE.DEMAND_CODE_RD.L3_MISS_LOCAL_DRAM.NO_SNOOP_NEEDED",
         "MSRIndex": "0x1a6,0x1a7",
-        "MSRValue": "0x1010000120",
+        "MSRValue": "0x0104000004",
         "Offcore": "1",
         "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
         "SampleAfterValue": "100003",
         "UMask": "0x1"
     },
     {
-        "BriefDescription": "This event is deprecated. Refer to new event OCR.ALL_PF_DATA_RD.L3_MISS_LOCAL_DRAM.ANY_SNOOP",
+        "BriefDescription": "This event is deprecated. Refer to new event OCR.DEMAND_CODE_RD.L3_MISS_LOCAL_DRAM.SNOOP_MISS",
         "Counter": "0,1,2,3",
         "CounterHTOff": "0,1,2,3",
         "Deprecated": "1",
         "EventCode": "0xB7, 0xBB",
-        "EventName": "OFFCORE_RESPONSE.ALL_PF_DATA_RD.L3_MISS_LOCAL_DRAM.ANY_SNOOP",
+        "EventName": "OFFCORE_RESPONSE.DEMAND_CODE_RD.L3_MISS_LOCAL_DRAM.SNOOP_MISS",
         "MSRIndex": "0x1a6,0x1a7",
-        "MSRValue": "0x3F84000490",
+        "MSRValue": "0x0204000004",
         "Offcore": "1",
         "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
         "SampleAfterValue": "100003",
         "UMask": "0x1"
     },
     {
-        "BriefDescription": "Counts prefetch (that bring data to L2) data reads OCR.PF_L2_DATA_RD.L3_MISS_LOCAL_DRAM.SNOOP_MISS_OR_NO_FWD",
+        "BriefDescription": "This event is deprecated. Refer to new event OCR.DEMAND_CODE_RD.L3_MISS_LOCAL_DRAM.SNOOP_MISS_OR_NO_FWD",
         "Counter": "0,1,2,3",
         "CounterHTOff": "0,1,2,3",
+        "Deprecated": "1",
         "EventCode": "0xB7, 0xBB",
-        "EventName": "OCR.PF_L2_DATA_RD.L3_MISS_LOCAL_DRAM.SNOOP_MISS_OR_NO_FWD",
+        "EventName": "OFFCORE_RESPONSE.DEMAND_CODE_RD.L3_MISS_LOCAL_DRAM.SNOOP_MISS_OR_NO_FWD",
         "MSRIndex": "0x1a6,0x1a7",
-        "MSRValue": "0x0604000010",
+        "MSRValue": "0x0604000004",
         "Offcore": "1",
         "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
         "SampleAfterValue": "100003",
         "UMask": "0x1"
     },
     {
-        "BriefDescription": "OCR.ALL_READS.L3_MISS_REMOTE_HOP1_DRAM.SNOOP_MISS",
+        "BriefDescription": "This event is deprecated. Refer to new event OCR.DEMAND_CODE_RD.L3_MISS_LOCAL_DRAM.SNOOP_NONE",
         "Counter": "0,1,2,3",
         "CounterHTOff": "0,1,2,3",
+        "Deprecated": "1",
         "EventCode": "0xB7, 0xBB",
-        "EventName": "OCR.ALL_READS.L3_MISS_REMOTE_HOP1_DRAM.SNOOP_MISS",
+        "EventName": "OFFCORE_RESPONSE.DEMAND_CODE_RD.L3_MISS_LOCAL_DRAM.SNOOP_NONE",
         "MSRIndex": "0x1a6,0x1a7",
-        "MSRValue": "0x02100007F7",
+        "MSRValue": "0x0084000004",
         "Offcore": "1",
         "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
         "SampleAfterValue": "100003",
         "UMask": "0x1"
     },
     {
-        "BriefDescription": "This event is deprecated. Refer to new event OCR.ALL_READS.L3_MISS.HITM_OTHER_CORE",
+        "BriefDescription": "This event is deprecated. Refer to new event OCR.DEMAND_CODE_RD.L3_MISS_REMOTE_DRAM.SNOOP_MISS_OR_NO_FWD",
         "Counter": "0,1,2,3",
         "CounterHTOff": "0,1,2,3",
         "Deprecated": "1",
         "EventCode": "0xB7, 0xBB",
-        "EventName": "OFFCORE_RESPONSE.ALL_READS.L3_MISS.HITM_OTHER_CORE",
+        "EventName": "OFFCORE_RESPONSE.DEMAND_CODE_RD.L3_MISS_REMOTE_DRAM.SNOOP_MISS_OR_NO_FWD",
         "MSRIndex": "0x1a6,0x1a7",
-        "MSRValue": "0x103C0007F7",
+        "MSRValue": "0x063B800004",
         "Offcore": "1",
         "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
         "SampleAfterValue": "100003",
         "UMask": "0x1"
     },
     {
-        "BriefDescription": "This event is deprecated. Refer to new event OCR.PF_L2_RFO.L3_MISS.NO_SNOOP_NEEDED",
+        "BriefDescription": "This event is deprecated. Refer to new event OCR.DEMAND_CODE_RD.L3_MISS_REMOTE_HOP1_DRAM.ANY_SNOOP",
         "Counter": "0,1,2,3",
         "CounterHTOff": "0,1,2,3",
         "Deprecated": "1",
         "EventCode": "0xB7, 0xBB",
-        "EventName": "OFFCORE_RESPONSE.PF_L2_RFO.L3_MISS.NO_SNOOP_NEEDED",
+        "EventName": "OFFCORE_RESPONSE.DEMAND_CODE_RD.L3_MISS_REMOTE_HOP1_DRAM.ANY_SNOOP",
         "MSRIndex": "0x1a6,0x1a7",
-        "MSRValue": "0x013C000020",
+        "MSRValue": "0x3F90000004",
         "Offcore": "1",
         "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
         "SampleAfterValue": "100003",
         "UMask": "0x1"
     },
     {
-        "BriefDescription": "Counts all demand code reads OCR.DEMAND_CODE_RD.L3_MISS.NO_SNOOP_NEEDED OCR.DEMAND_CODE_RD.L3_MISS.NO_SNOOP_NEEDED",
+        "BriefDescription": "This event is deprecated. Refer to new event OCR.DEMAND_CODE_RD.L3_MISS_REMOTE_HOP1_DRAM.HITM_OTHER_CORE",
         "Counter": "0,1,2,3",
         "CounterHTOff": "0,1,2,3",
+        "Deprecated": "1",
         "EventCode": "0xB7, 0xBB",
-        "EventName": "OCR.DEMAND_CODE_RD.L3_MISS.NO_SNOOP_NEEDED",
+        "EventName": "OFFCORE_RESPONSE.DEMAND_CODE_RD.L3_MISS_REMOTE_HOP1_DRAM.HITM_OTHER_CORE",
         "MSRIndex": "0x1a6,0x1a7",
-        "MSRValue": "0x013C000004",
+        "MSRValue": "0x1010000004",
         "Offcore": "1",
         "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
         "SampleAfterValue": "100003",
         "UMask": "0x1"
     },
     {
-        "BriefDescription": "Counts L1 data cache hardware prefetch requests and software prefetch requests OCR.PF_L1D_AND_SW.L3_MISS.HITM_OTHER_CORE OCR.PF_L1D_AND_SW.L3_MISS.HITM_OTHER_CORE",
+        "BriefDescription": "This event is deprecated. Refer to new event OCR.DEMAND_CODE_RD.L3_MISS_REMOTE_HOP1_DRAM.HIT_OTHER_CORE_FWD",
         "Counter": "0,1,2,3",
         "CounterHTOff": "0,1,2,3",
+        "Deprecated": "1",
         "EventCode": "0xB7, 0xBB",
-        "EventName": "OCR.PF_L1D_AND_SW.L3_MISS.HITM_OTHER_CORE",
+        "EventName": "OFFCORE_RESPONSE.DEMAND_CODE_RD.L3_MISS_REMOTE_HOP1_DRAM.HIT_OTHER_CORE_FWD",
         "MSRIndex": "0x1a6,0x1a7",
-        "MSRValue": "0x103C000400",
+        "MSRValue": "0x0810000004",
         "Offcore": "1",
         "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
         "SampleAfterValue": "100003",
         "UMask": "0x1"
     },
     {
-        "BriefDescription": "This event is deprecated. Refer to new event OCR.DEMAND_RFO.L3_MISS_REMOTE_HOP1_DRAM.NO_SNOOP_NEEDED",
+        "BriefDescription": "This event is deprecated. Refer to new event OCR.DEMAND_CODE_RD.L3_MISS_REMOTE_HOP1_DRAM.HIT_OTHER_CORE_NO_FWD",
         "Counter": "0,1,2,3",
         "CounterHTOff": "0,1,2,3",
         "Deprecated": "1",
         "EventCode": "0xB7, 0xBB",
-        "EventName": "OFFCORE_RESPONSE.DEMAND_RFO.L3_MISS_REMOTE_HOP1_DRAM.NO_SNOOP_NEEDED",
+        "EventName": "OFFCORE_RESPONSE.DEMAND_CODE_RD.L3_MISS_REMOTE_HOP1_DRAM.HIT_OTHER_CORE_NO_FWD",
         "MSRIndex": "0x1a6,0x1a7",
-        "MSRValue": "0x0110000002",
+        "MSRValue": "0x0410000004",
         "Offcore": "1",
         "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
         "SampleAfterValue": "100003",
         "UMask": "0x1"
     },
     {
-        "BriefDescription": "This event is deprecated. Refer to new event OCR.ALL_RFO.L3_MISS_REMOTE_HOP1_DRAM.HITM_OTHER_CORE",
+        "BriefDescription": "This event is deprecated. Refer to new event OCR.DEMAND_CODE_RD.L3_MISS_REMOTE_HOP1_DRAM.NO_SNOOP_NEEDED",
         "Counter": "0,1,2,3",
         "CounterHTOff": "0,1,2,3",
         "Deprecated": "1",
         "EventCode": "0xB7, 0xBB",
-        "EventName": "OFFCORE_RESPONSE.ALL_RFO.L3_MISS_REMOTE_HOP1_DRAM.HITM_OTHER_CORE",
+        "EventName": "OFFCORE_RESPONSE.DEMAND_CODE_RD.L3_MISS_REMOTE_HOP1_DRAM.NO_SNOOP_NEEDED",
         "MSRIndex": "0x1a6,0x1a7",
-        "MSRValue": "0x1010000122",
+        "MSRValue": "0x0110000004",
         "Offcore": "1",
         "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
         "SampleAfterValue": "100003",
         "UMask": "0x1"
     },
     {
-        "BriefDescription": "Counts demand data reads OCR.DEMAND_DATA_RD.L3_MISS.REMOTE_HITM",
+        "BriefDescription": "This event is deprecated. Refer to new event OCR.DEMAND_CODE_RD.L3_MISS_REMOTE_HOP1_DRAM.SNOOP_MISS",
         "Counter": "0,1,2,3",
         "CounterHTOff": "0,1,2,3",
+        "Deprecated": "1",
         "EventCode": "0xB7, 0xBB",
-        "EventName": "OCR.DEMAND_DATA_RD.L3_MISS.REMOTE_HITM",
+        "EventName": "OFFCORE_RESPONSE.DEMAND_CODE_RD.L3_MISS_REMOTE_HOP1_DRAM.SNOOP_MISS",
         "MSRIndex": "0x1a6,0x1a7",
-        "MSRValue": "0x103FC00001",
+        "MSRValue": "0x0210000004",
         "Offcore": "1",
         "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
         "SampleAfterValue": "100003",
         "UMask": "0x1"
     },
     {
-        "BriefDescription": "Counts demand data reads OCR.DEMAND_DATA_RD.L3_MISS.NO_SNOOP_NEEDED OCR.DEMAND_DATA_RD.L3_MISS.NO_SNOOP_NEEDED",
+        "BriefDescription": "This event is deprecated. Refer to new event OCR.DEMAND_CODE_RD.L3_MISS_REMOTE_HOP1_DRAM.SNOOP_NONE",
         "Counter": "0,1,2,3",
         "CounterHTOff": "0,1,2,3",
+        "Deprecated": "1",
         "EventCode": "0xB7, 0xBB",
-        "EventName": "OCR.DEMAND_DATA_RD.L3_MISS.NO_SNOOP_NEEDED",
+        "EventName": "OFFCORE_RESPONSE.DEMAND_CODE_RD.L3_MISS_REMOTE_HOP1_DRAM.SNOOP_NONE",
         "MSRIndex": "0x1a6,0x1a7",
-        "MSRValue": "0x013C000001",
+        "MSRValue": "0x0090000004",
         "Offcore": "1",
         "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
         "SampleAfterValue": "100003",
         "UMask": "0x1"
     },
     {
-        "BriefDescription": "Counts all prefetch (that bring data to L2) RFOs OCR.PF_L2_RFO.L3_MISS_REMOTE_DRAM.SNOOP_MISS_OR_NO_FWD",
+        "BriefDescription": "This event is deprecated. Refer to new event OCR.DEMAND_DATA_RD.L3_MISS.ANY_SNOOP",
         "Counter": "0,1,2,3",
         "CounterHTOff": "0,1,2,3",
+        "Deprecated": "1",
         "EventCode": "0xB7, 0xBB",
-        "EventName": "OCR.PF_L2_RFO.L3_MISS_REMOTE_DRAM.SNOOP_MISS_OR_NO_FWD",
+        "EventName": "OFFCORE_RESPONSE.DEMAND_DATA_RD.L3_MISS.ANY_SNOOP",
         "MSRIndex": "0x1a6,0x1a7",
-        "MSRValue": "0x063B800020",
+        "MSRValue": "0x3FBC000001",
         "Offcore": "1",
         "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
         "SampleAfterValue": "100003",
         "UMask": "0x1"
     },
     {
-        "BriefDescription": "This event is deprecated. Refer to new event OCR.ALL_READS.L3_MISS_REMOTE_HOP1_DRAM.HIT_OTHER_CORE_NO_FWD",
+        "BriefDescription": "This event is deprecated. Refer to new event OCR.DEMAND_DATA_RD.L3_MISS.HITM_OTHER_CORE",
         "Counter": "0,1,2,3",
         "CounterHTOff": "0,1,2,3",
         "Deprecated": "1",
         "EventCode": "0xB7, 0xBB",
-        "EventName": "OFFCORE_RESPONSE.ALL_READS.L3_MISS_REMOTE_HOP1_DRAM.HIT_OTHER_CORE_NO_FWD",
+        "EventName": "OFFCORE_RESPONSE.DEMAND_DATA_RD.L3_MISS.HITM_OTHER_CORE",
         "MSRIndex": "0x1a6,0x1a7",
-        "MSRValue": "0x04100007F7",
+        "MSRValue": "0x103C000001",
         "Offcore": "1",
         "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
         "SampleAfterValue": "100003",
         "UMask": "0x1"
     },
     {
-        "BriefDescription": "Counts all prefetch (that bring data to LLC only) data reads OCR.PF_L3_DATA_RD.L3_MISS_LOCAL_DRAM.SNOOP_MISS_OR_NO_FWD",
+        "BriefDescription": "This event is deprecated. Refer to new event OCR.DEMAND_DATA_RD.L3_MISS.HIT_OTHER_CORE_FWD",
         "Counter": "0,1,2,3",
         "CounterHTOff": "0,1,2,3",
+        "Deprecated": "1",
         "EventCode": "0xB7, 0xBB",
-        "EventName": "OCR.PF_L3_DATA_RD.L3_MISS_LOCAL_DRAM.SNOOP_MISS_OR_NO_FWD",
+        "EventName": "OFFCORE_RESPONSE.DEMAND_DATA_RD.L3_MISS.HIT_OTHER_CORE_FWD",
         "MSRIndex": "0x1a6,0x1a7",
-        "MSRValue": "0x0604000080",
+        "MSRValue": "0x083C000001",
         "Offcore": "1",
         "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
         "SampleAfterValue": "100003",
         "UMask": "0x1"
     },
     {
-        "BriefDescription": "OCR.ALL_RFO.L3_MISS.ANY_SNOOP OCR.ALL_RFO.L3_MISS.ANY_SNOOP OCR.ALL_RFO.L3_MISS.ANY_SNOOP",
+        "BriefDescription": "This event is deprecated. Refer to new event OCR.DEMAND_DATA_RD.L3_MISS.HIT_OTHER_CORE_NO_FWD",
         "Counter": "0,1,2,3",
         "CounterHTOff": "0,1,2,3",
+        "Deprecated": "1",
         "EventCode": "0xB7, 0xBB",
-        "EventName": "OCR.ALL_RFO.L3_MISS.ANY_SNOOP",
+        "EventName": "OFFCORE_RESPONSE.DEMAND_DATA_RD.L3_MISS.HIT_OTHER_CORE_NO_FWD",
         "MSRIndex": "0x1a6,0x1a7",
-        "MSRValue": "0x3FBC000122",
+        "MSRValue": "0x043C000001",
         "Offcore": "1",
         "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
         "SampleAfterValue": "100003",
         "UMask": "0x1"
     },
     {
-        "BriefDescription": "OCR.ALL_PF_DATA_RD.L3_MISS_REMOTE_HOP1_DRAM.ANY_SNOOP  OCR.ALL_PF_DATA_RD.L3_MISS_REMOTE_HOP1_DRAM.ANY_SNOOP",
+        "BriefDescription": "This event is deprecated. Refer to new event OCR.DEMAND_DATA_RD.L3_MISS.NO_SNOOP_NEEDED",
         "Counter": "0,1,2,3",
         "CounterHTOff": "0,1,2,3",
+        "Deprecated": "1",
         "EventCode": "0xB7, 0xBB",
-        "EventName": "OCR.ALL_PF_DATA_RD.L3_MISS_REMOTE_HOP1_DRAM.ANY_SNOOP",
+        "EventName": "OFFCORE_RESPONSE.DEMAND_DATA_RD.L3_MISS.NO_SNOOP_NEEDED",
         "MSRIndex": "0x1a6,0x1a7",
-        "MSRValue": "0x3F90000490",
+        "MSRValue": "0x013C000001",
         "Offcore": "1",
         "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
         "SampleAfterValue": "100003",
         "UMask": "0x1"
     },
     {
-        "BriefDescription": "This event is deprecated. Refer to new event OCR.ALL_PF_DATA_RD.L3_MISS_REMOTE_HOP1_DRAM.HITM_OTHER_CORE",
+        "BriefDescription": "This event is deprecated. Refer to new event OCR.DEMAND_DATA_RD.L3_MISS.REMOTE_HITM",
         "Counter": "0,1,2,3",
         "CounterHTOff": "0,1,2,3",
         "Deprecated": "1",
         "EventCode": "0xB7, 0xBB",
-        "EventName": "OFFCORE_RESPONSE.ALL_PF_DATA_RD.L3_MISS_REMOTE_HOP1_DRAM.HITM_OTHER_CORE",
+        "EventName": "OFFCORE_RESPONSE.DEMAND_DATA_RD.L3_MISS.REMOTE_HITM",
         "MSRIndex": "0x1a6,0x1a7",
-        "MSRValue": "0x1010000490",
+        "MSRValue": "0x103FC00001",
         "Offcore": "1",
         "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
         "SampleAfterValue": "100003",
         "UMask": "0x1"
     },
     {
-        "BriefDescription": "This event is deprecated. Refer to new event OCR.DEMAND_CODE_RD.L3_MISS.ANY_SNOOP",
+        "BriefDescription": "This event is deprecated. Refer to new event OCR.DEMAND_DATA_RD.L3_MISS.REMOTE_HIT_FORWARD",
         "Counter": "0,1,2,3",
         "CounterHTOff": "0,1,2,3",
         "Deprecated": "1",
         "EventCode": "0xB7, 0xBB",
-        "EventName": "OFFCORE_RESPONSE.DEMAND_CODE_RD.L3_MISS.ANY_SNOOP",
+        "EventName": "OFFCORE_RESPONSE.DEMAND_DATA_RD.L3_MISS.REMOTE_HIT_FORWARD",
         "MSRIndex": "0x1a6,0x1a7",
-        "MSRValue": "0x3FBC000004",
+        "MSRValue": "0x083FC00001",
         "Offcore": "1",
         "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
         "SampleAfterValue": "100003",
         "UMask": "0x1"
     },
     {
-        "BriefDescription": "This event is deprecated. Refer to new event OCR.ALL_PF_RFO.L3_MISS.REMOTE_HIT_FORWARD",
+        "BriefDescription": "This event is deprecated. Refer to new event OCR.DEMAND_DATA_RD.L3_MISS.SNOOP_MISS",
         "Counter": "0,1,2,3",
         "CounterHTOff": "0,1,2,3",
         "Deprecated": "1",
         "EventCode": "0xB7, 0xBB",
-        "EventName": "OFFCORE_RESPONSE.ALL_PF_RFO.L3_MISS.REMOTE_HIT_FORWARD",
+        "EventName": "OFFCORE_RESPONSE.DEMAND_DATA_RD.L3_MISS.SNOOP_MISS",
         "MSRIndex": "0x1a6,0x1a7",
-        "MSRValue": "0x083FC00120",
+        "MSRValue": "0x023C000001",
         "Offcore": "1",
         "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
         "SampleAfterValue": "100003",
         "UMask": "0x1"
     },
     {
-        "BriefDescription": "Counts all demand code reads OCR.DEMAND_CODE_RD.L3_MISS.HIT_OTHER_CORE_FWD OCR.DEMAND_CODE_RD.L3_MISS.HIT_OTHER_CORE_FWD",
+        "BriefDescription": "This event is deprecated. Refer to new event OCR.DEMAND_DATA_RD.L3_MISS.SNOOP_NONE",
         "Counter": "0,1,2,3",
         "CounterHTOff": "0,1,2,3",
+        "Deprecated": "1",
         "EventCode": "0xB7, 0xBB",
-        "EventName": "OCR.DEMAND_CODE_RD.L3_MISS.HIT_OTHER_CORE_FWD",
+        "EventName": "OFFCORE_RESPONSE.DEMAND_DATA_RD.L3_MISS.SNOOP_NONE",
         "MSRIndex": "0x1a6,0x1a7",
-        "MSRValue": "0x083C000004",
+        "MSRValue": "0x00BC000001",
         "Offcore": "1",
         "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
         "SampleAfterValue": "100003",
         "UMask": "0x1"
     },
     {
-        "BriefDescription": "Counts prefetch (that bring data to L2) data reads OCR.PF_L2_DATA_RD.L3_MISS.REMOTE_HITM",
+        "BriefDescription": "This event is deprecated. Refer to new event OCR.DEMAND_DATA_RD.L3_MISS_LOCAL_DRAM.ANY_SNOOP",
         "Counter": "0,1,2,3",
         "CounterHTOff": "0,1,2,3",
+        "Deprecated": "1",
         "EventCode": "0xB7, 0xBB",
-        "EventName": "OCR.PF_L2_DATA_RD.L3_MISS.REMOTE_HITM",
+        "EventName": "OFFCORE_RESPONSE.DEMAND_DATA_RD.L3_MISS_LOCAL_DRAM.ANY_SNOOP",
         "MSRIndex": "0x1a6,0x1a7",
-        "MSRValue": "0x103FC00010",
+        "MSRValue": "0x3F84000001",
         "Offcore": "1",
         "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
         "SampleAfterValue": "100003",
         "UMask": "0x1"
     },
     {
-        "BriefDescription": "Cycles with at least 6 Demand Data Read requests that miss L3 cache in the superQ.",
-        "Counter": "0,1,2,3",
-        "CounterHTOff": "0,1,2,3,4,5,6,7",
-        "CounterMask": "6",
-        "EventCode": "0x60",
-        "EventName": "OFFCORE_REQUESTS_OUTSTANDING.L3_MISS_DEMAND_DATA_RD_GE_6",
-        "SampleAfterValue": "2000003",
-        "UMask": "0x10"
-    },
-    {
-        "BriefDescription": "Counts randomly selected loads when the latency from first dispatch to completion is greater than 64 cycles.",
+        "BriefDescription": "This event is deprecated. Refer to new event OCR.DEMAND_DATA_RD.L3_MISS_LOCAL_DRAM.HITM_OTHER_CORE",
         "Counter": "0,1,2,3",
         "CounterHTOff": "0,1,2,3",
-        "Data_LA": "1",
-        "EventCode": "0xcd",
-        "EventName": "MEM_TRANS_RETIRED.LOAD_LATENCY_GT_64",
-        "MSRIndex": "0x3F6",
-        "MSRValue": "0x40",
-        "PEBS": "2",
-        "PublicDescription": "Counts randomly selected loads when the latency from first dispatch to completion is greater than 64 cycles.  Reported latency may be longer than just the memory latency.",
-        "SampleAfterValue": "2003",
-        "TakenAlone": "1",
+        "Deprecated": "1",
+        "EventCode": "0xB7, 0xBB",
+        "EventName": "OFFCORE_RESPONSE.DEMAND_DATA_RD.L3_MISS_LOCAL_DRAM.HITM_OTHER_CORE",
+        "MSRIndex": "0x1a6,0x1a7",
+        "MSRValue": "0x1004000001",
+        "Offcore": "1",
+        "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
+        "SampleAfterValue": "100003",
         "UMask": "0x1"
     },
     {
-        "BriefDescription": "OCR.ALL_RFO.L3_MISS.HITM_OTHER_CORE OCR.ALL_RFO.L3_MISS.HITM_OTHER_CORE OCR.ALL_RFO.L3_MISS.HITM_OTHER_CORE",
+        "BriefDescription": "This event is deprecated. Refer to new event OCR.DEMAND_DATA_RD.L3_MISS_LOCAL_DRAM.HIT_OTHER_CORE_FWD",
         "Counter": "0,1,2,3",
         "CounterHTOff": "0,1,2,3",
+        "Deprecated": "1",
         "EventCode": "0xB7, 0xBB",
-        "EventName": "OCR.ALL_RFO.L3_MISS.HITM_OTHER_CORE",
+        "EventName": "OFFCORE_RESPONSE.DEMAND_DATA_RD.L3_MISS_LOCAL_DRAM.HIT_OTHER_CORE_FWD",
         "MSRIndex": "0x1a6,0x1a7",
-        "MSRValue": "0x103C000122",
+        "MSRValue": "0x0804000001",
         "Offcore": "1",
         "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
         "SampleAfterValue": "100003",
         "UMask": "0x1"
     },
     {
-        "BriefDescription": "This event is deprecated. Refer to new event OCR.DEMAND_RFO.L3_MISS.HIT_OTHER_CORE_NO_FWD",
+        "BriefDescription": "This event is deprecated. Refer to new event OCR.DEMAND_DATA_RD.L3_MISS_LOCAL_DRAM.HIT_OTHER_CORE_NO_FWD",
         "Counter": "0,1,2,3",
         "CounterHTOff": "0,1,2,3",
         "Deprecated": "1",
         "EventCode": "0xB7, 0xBB",
-        "EventName": "OFFCORE_RESPONSE.DEMAND_RFO.L3_MISS.HIT_OTHER_CORE_NO_FWD",
+        "EventName": "OFFCORE_RESPONSE.DEMAND_DATA_RD.L3_MISS_LOCAL_DRAM.HIT_OTHER_CORE_NO_FWD",
         "MSRIndex": "0x1a6,0x1a7",
-        "MSRValue": "0x043C000002",
+        "MSRValue": "0x0404000001",
         "Offcore": "1",
         "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
         "SampleAfterValue": "100003",
         "UMask": "0x1"
     },
     {
-        "BriefDescription": "This event is deprecated. Refer to new event OCR.PF_L2_DATA_RD.L3_MISS_LOCAL_DRAM.HIT_OTHER_CORE_NO_FWD",
+        "BriefDescription": "This event is deprecated. Refer to new event OCR.DEMAND_DATA_RD.L3_MISS_LOCAL_DRAM.NO_SNOOP_NEEDED",
         "Counter": "0,1,2,3",
         "CounterHTOff": "0,1,2,3",
         "Deprecated": "1",
         "EventCode": "0xB7, 0xBB",
-        "EventName": "OFFCORE_RESPONSE.PF_L2_DATA_RD.L3_MISS_LOCAL_DRAM.HIT_OTHER_CORE_NO_FWD",
+        "EventName": "OFFCORE_RESPONSE.DEMAND_DATA_RD.L3_MISS_LOCAL_DRAM.NO_SNOOP_NEEDED",
         "MSRIndex": "0x1a6,0x1a7",
-        "MSRValue": "0x0404000010",
+        "MSRValue": "0x0104000001",
         "Offcore": "1",
         "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
         "SampleAfterValue": "100003",
         "UMask": "0x1"
     },
     {
-        "BriefDescription": "OCR.ALL_PF_DATA_RD.L3_MISS.SNOOP_NONE OCR.ALL_PF_DATA_RD.L3_MISS.SNOOP_NONE",
+        "BriefDescription": "This event is deprecated. Refer to new event OCR.DEMAND_DATA_RD.L3_MISS_LOCAL_DRAM.SNOOP_MISS",
         "Counter": "0,1,2,3",
         "CounterHTOff": "0,1,2,3",
+        "Deprecated": "1",
         "EventCode": "0xB7, 0xBB",
-        "EventName": "OCR.ALL_PF_DATA_RD.L3_MISS.SNOOP_NONE",
+        "EventName": "OFFCORE_RESPONSE.DEMAND_DATA_RD.L3_MISS_LOCAL_DRAM.SNOOP_MISS",
         "MSRIndex": "0x1a6,0x1a7",
-        "MSRValue": "0x00BC000490",
+        "MSRValue": "0x0204000001",
         "Offcore": "1",
         "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
         "SampleAfterValue": "100003",
         "UMask": "0x1"
     },
     {
-        "BriefDescription": "This event is deprecated. Refer to new event OCR.ALL_PF_RFO.L3_MISS.REMOTE_HITM",
+        "BriefDescription": "This event is deprecated. Refer to new event OCR.DEMAND_DATA_RD.L3_MISS_LOCAL_DRAM.SNOOP_MISS_OR_NO_FWD",
         "Counter": "0,1,2,3",
         "CounterHTOff": "0,1,2,3",
         "Deprecated": "1",
         "EventCode": "0xB7, 0xBB",
-        "EventName": "OFFCORE_RESPONSE.ALL_PF_RFO.L3_MISS.REMOTE_HITM",
+        "EventName": "OFFCORE_RESPONSE.DEMAND_DATA_RD.L3_MISS_LOCAL_DRAM.SNOOP_MISS_OR_NO_FWD",
         "MSRIndex": "0x1a6,0x1a7",
-        "MSRValue": "0x103FC00120",
+        "MSRValue": "0x0604000001",
         "Offcore": "1",
         "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
         "SampleAfterValue": "100003",
         "UMask": "0x1"
     },
     {
-        "BriefDescription": "This event is deprecated. Refer to new event OCR.ALL_RFO.L3_MISS_REMOTE_HOP1_DRAM.SNOOP_NONE",
+        "BriefDescription": "This event is deprecated. Refer to new event OCR.DEMAND_DATA_RD.L3_MISS_LOCAL_DRAM.SNOOP_NONE",
         "Counter": "0,1,2,3",
         "CounterHTOff": "0,1,2,3",
         "Deprecated": "1",
         "EventCode": "0xB7, 0xBB",
-        "EventName": "OFFCORE_RESPONSE.ALL_RFO.L3_MISS_REMOTE_HOP1_DRAM.SNOOP_NONE",
+        "EventName": "OFFCORE_RESPONSE.DEMAND_DATA_RD.L3_MISS_LOCAL_DRAM.SNOOP_NONE",
         "MSRIndex": "0x1a6,0x1a7",
-        "MSRValue": "0x0090000122",
+        "MSRValue": "0x0084000001",
         "Offcore": "1",
         "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
         "SampleAfterValue": "100003",
         "UMask": "0x1"
     },
     {
-        "BriefDescription": "Counts any other requests OCR.OTHER.L3_MISS.NO_SNOOP_NEEDED OCR.OTHER.L3_MISS.NO_SNOOP_NEEDED",
+        "BriefDescription": "This event is deprecated. Refer to new event OCR.DEMAND_DATA_RD.L3_MISS_REMOTE_DRAM.SNOOP_MISS_OR_NO_FWD",
         "Counter": "0,1,2,3",
         "CounterHTOff": "0,1,2,3",
+        "Deprecated": "1",
         "EventCode": "0xB7, 0xBB",
-        "EventName": "OCR.OTHER.L3_MISS.NO_SNOOP_NEEDED",
+        "EventName": "OFFCORE_RESPONSE.DEMAND_DATA_RD.L3_MISS_REMOTE_DRAM.SNOOP_MISS_OR_NO_FWD",
         "MSRIndex": "0x1a6,0x1a7",
-        "MSRValue": "0x013C008000",
+        "MSRValue": "0x063B800001",
         "Offcore": "1",
         "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
         "SampleAfterValue": "100003",
         "UMask": "0x1"
     },
     {
-        "BriefDescription": "This event is deprecated. Refer to new event OCR.ALL_PF_RFO.L3_MISS_LOCAL_DRAM.SNOOP_MISS",
+        "BriefDescription": "This event is deprecated. Refer to new event OCR.DEMAND_DATA_RD.L3_MISS_REMOTE_HOP1_DRAM.ANY_SNOOP",
         "Counter": "0,1,2,3",
         "CounterHTOff": "0,1,2,3",
         "Deprecated": "1",
         "EventCode": "0xB7, 0xBB",
-        "EventName": "OFFCORE_RESPONSE.ALL_PF_RFO.L3_MISS_LOCAL_DRAM.SNOOP_MISS",
+        "EventName": "OFFCORE_RESPONSE.DEMAND_DATA_RD.L3_MISS_REMOTE_HOP1_DRAM.ANY_SNOOP",
         "MSRIndex": "0x1a6,0x1a7",
-        "MSRValue": "0x0204000120",
+        "MSRValue": "0x3F90000001",
         "Offcore": "1",
         "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
         "SampleAfterValue": "100003",
         "UMask": "0x1"
     },
     {
-        "BriefDescription": "This event is deprecated. Refer to new event OCR.PF_L2_DATA_RD.L3_MISS_REMOTE_DRAM.SNOOP_MISS_OR_NO_FWD",
+        "BriefDescription": "This event is deprecated. Refer to new event OCR.DEMAND_DATA_RD.L3_MISS_REMOTE_HOP1_DRAM.HITM_OTHER_CORE",
         "Counter": "0,1,2,3",
         "CounterHTOff": "0,1,2,3",
         "Deprecated": "1",
         "EventCode": "0xB7, 0xBB",
-        "EventName": "OFFCORE_RESPONSE.PF_L2_DATA_RD.L3_MISS_REMOTE_DRAM.SNOOP_MISS_OR_NO_FWD",
+        "EventName": "OFFCORE_RESPONSE.DEMAND_DATA_RD.L3_MISS_REMOTE_HOP1_DRAM.HITM_OTHER_CORE",
         "MSRIndex": "0x1a6,0x1a7",
-        "MSRValue": "0x063B800010",
+        "MSRValue": "0x1010000001",
         "Offcore": "1",
         "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
         "SampleAfterValue": "100003",
         "UMask": "0x1"
     },
     {
-        "BriefDescription": "This event is deprecated. Refer to new event OCR.ALL_DATA_RD.L3_MISS_LOCAL_DRAM.SNOOP_MISS",
+        "BriefDescription": "This event is deprecated. Refer to new event OCR.DEMAND_DATA_RD.L3_MISS_REMOTE_HOP1_DRAM.HIT_OTHER_CORE_FWD",
         "Counter": "0,1,2,3",
         "CounterHTOff": "0,1,2,3",
         "Deprecated": "1",
         "EventCode": "0xB7, 0xBB",
-        "EventName": "OFFCORE_RESPONSE.ALL_DATA_RD.L3_MISS_LOCAL_DRAM.SNOOP_MISS",
+        "EventName": "OFFCORE_RESPONSE.DEMAND_DATA_RD.L3_MISS_REMOTE_HOP1_DRAM.HIT_OTHER_CORE_FWD",
         "MSRIndex": "0x1a6,0x1a7",
-        "MSRValue": "0x0204000491",
+        "MSRValue": "0x0810000001",
         "Offcore": "1",
         "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
         "SampleAfterValue": "100003",
         "UMask": "0x1"
     },
     {
-        "BriefDescription": "This event is deprecated. Refer to new event OCR.PF_L2_RFO.L3_MISS.REMOTE_HIT_FORWARD",
+        "BriefDescription": "This event is deprecated. Refer to new event OCR.DEMAND_DATA_RD.L3_MISS_REMOTE_HOP1_DRAM.HIT_OTHER_CORE_NO_FWD",
         "Counter": "0,1,2,3",
         "CounterHTOff": "0,1,2,3",
         "Deprecated": "1",
         "EventCode": "0xB7, 0xBB",
-        "EventName": "OFFCORE_RESPONSE.PF_L2_RFO.L3_MISS.REMOTE_HIT_FORWARD",
+        "EventName": "OFFCORE_RESPONSE.DEMAND_DATA_RD.L3_MISS_REMOTE_HOP1_DRAM.HIT_OTHER_CORE_NO_FWD",
         "MSRIndex": "0x1a6,0x1a7",
-        "MSRValue": "0x083FC00020",
+        "MSRValue": "0x0410000001",
         "Offcore": "1",
         "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
         "SampleAfterValue": "100003",
         "UMask": "0x1"
     },
     {
-        "BriefDescription": "Counts L1 data cache hardware prefetch requests and software prefetch requests OCR.PF_L1D_AND_SW.L3_MISS_REMOTE_DRAM.SNOOP_MISS_OR_NO_FWD",
+        "BriefDescription": "This event is deprecated. Refer to new event OCR.DEMAND_DATA_RD.L3_MISS_REMOTE_HOP1_DRAM.NO_SNOOP_NEEDED",
         "Counter": "0,1,2,3",
         "CounterHTOff": "0,1,2,3",
+        "Deprecated": "1",
         "EventCode": "0xB7, 0xBB",
-        "EventName": "OCR.PF_L1D_AND_SW.L3_MISS_REMOTE_DRAM.SNOOP_MISS_OR_NO_FWD",
+        "EventName": "OFFCORE_RESPONSE.DEMAND_DATA_RD.L3_MISS_REMOTE_HOP1_DRAM.NO_SNOOP_NEEDED",
         "MSRIndex": "0x1a6,0x1a7",
-        "MSRValue": "0x063B800400",
+        "MSRValue": "0x0110000001",
         "Offcore": "1",
         "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
         "SampleAfterValue": "100003",
         "UMask": "0x1"
     },
     {
-        "BriefDescription": "Counts any other requests OCR.OTHER.L3_MISS.ANY_SNOOP OCR.OTHER.L3_MISS.ANY_SNOOP",
+        "BriefDescription": "This event is deprecated. Refer to new event OCR.DEMAND_DATA_RD.L3_MISS_REMOTE_HOP1_DRAM.SNOOP_MISS",
         "Counter": "0,1,2,3",
         "CounterHTOff": "0,1,2,3",
+        "Deprecated": "1",
         "EventCode": "0xB7, 0xBB",
-        "EventName": "OCR.OTHER.L3_MISS.ANY_SNOOP",
+        "EventName": "OFFCORE_RESPONSE.DEMAND_DATA_RD.L3_MISS_REMOTE_HOP1_DRAM.SNOOP_MISS",
         "MSRIndex": "0x1a6,0x1a7",
-        "MSRValue": "0x3FBC008000",
+        "MSRValue": "0x0210000001",
         "Offcore": "1",
         "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
         "SampleAfterValue": "100003",
         "UMask": "0x1"
     },
     {
-        "BriefDescription": "Counts all demand code reads  OCR.DEMAND_CODE_RD.L3_MISS_REMOTE_HOP1_DRAM.NO_SNOOP_NEEDED",
+        "BriefDescription": "This event is deprecated. Refer to new event OCR.DEMAND_DATA_RD.L3_MISS_REMOTE_HOP1_DRAM.SNOOP_NONE",
         "Counter": "0,1,2,3",
         "CounterHTOff": "0,1,2,3",
+        "Deprecated": "1",
         "EventCode": "0xB7, 0xBB",
-        "EventName": "OCR.DEMAND_CODE_RD.L3_MISS_REMOTE_HOP1_DRAM.NO_SNOOP_NEEDED",
+        "EventName": "OFFCORE_RESPONSE.DEMAND_DATA_RD.L3_MISS_REMOTE_HOP1_DRAM.SNOOP_NONE",
         "MSRIndex": "0x1a6,0x1a7",
-        "MSRValue": "0x0110000004",
+        "MSRValue": "0x0090000001",
         "Offcore": "1",
         "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
         "SampleAfterValue": "100003",
         "UMask": "0x1"
     },
     {
-        "BriefDescription": "OCR.ALL_RFO.L3_MISS.HIT_OTHER_CORE_NO_FWD OCR.ALL_RFO.L3_MISS.HIT_OTHER_CORE_NO_FWD OCR.ALL_RFO.L3_MISS.HIT_OTHER_CORE_NO_FWD",
+        "BriefDescription": "This event is deprecated. Refer to new event OCR.DEMAND_RFO.L3_MISS.ANY_SNOOP",
         "Counter": "0,1,2,3",
         "CounterHTOff": "0,1,2,3",
+        "Deprecated": "1",
         "EventCode": "0xB7, 0xBB",
-        "EventName": "OCR.ALL_RFO.L3_MISS.HIT_OTHER_CORE_NO_FWD",
+        "EventName": "OFFCORE_RESPONSE.DEMAND_RFO.L3_MISS.ANY_SNOOP",
         "MSRIndex": "0x1a6,0x1a7",
-        "MSRValue": "0x043C000122",
+        "MSRValue": "0x3FBC000002",
         "Offcore": "1",
         "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
         "SampleAfterValue": "100003",
         "UMask": "0x1"
     },
     {
-        "BriefDescription": "OCR.ALL_RFO.L3_MISS_LOCAL_DRAM.SNOOP_MISS_OR_NO_FWD OCR.ALL_RFO.L3_MISS_LOCAL_DRAM.SNOOP_MISS_OR_NO_FWD",
+        "BriefDescription": "This event is deprecated. Refer to new event OCR.DEMAND_RFO.L3_MISS.HITM_OTHER_CORE",
         "Counter": "0,1,2,3",
         "CounterHTOff": "0,1,2,3",
+        "Deprecated": "1",
         "EventCode": "0xB7, 0xBB",
-        "EventName": "OCR.ALL_RFO.L3_MISS_LOCAL_DRAM.SNOOP_MISS_OR_NO_FWD",
+        "EventName": "OFFCORE_RESPONSE.DEMAND_RFO.L3_MISS.HITM_OTHER_CORE",
         "MSRIndex": "0x1a6,0x1a7",
-        "MSRValue": "0x0604000122",
+        "MSRValue": "0x103C000002",
         "Offcore": "1",
         "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
         "SampleAfterValue": "100003",
         "UMask": "0x1"
     },
     {
-        "BriefDescription": "Counts all prefetch (that bring data to LLC only) data reads OCR.PF_L3_DATA_RD.L3_MISS.HIT_OTHER_CORE_FWD OCR.PF_L3_DATA_RD.L3_MISS.HIT_OTHER_CORE_FWD",
+        "BriefDescription": "This event is deprecated. Refer to new event OCR.DEMAND_RFO.L3_MISS.HIT_OTHER_CORE_FWD",
         "Counter": "0,1,2,3",
         "CounterHTOff": "0,1,2,3",
+        "Deprecated": "1",
         "EventCode": "0xB7, 0xBB",
-        "EventName": "OCR.PF_L3_DATA_RD.L3_MISS.HIT_OTHER_CORE_FWD",
+        "EventName": "OFFCORE_RESPONSE.DEMAND_RFO.L3_MISS.HIT_OTHER_CORE_FWD",
         "MSRIndex": "0x1a6,0x1a7",
-        "MSRValue": "0x083C000080",
+        "MSRValue": "0x083C000002",
         "Offcore": "1",
         "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
         "SampleAfterValue": "100003",
         "UMask": "0x1"
     },
     {
-        "BriefDescription": "OCR.ALL_PF_DATA_RD.L3_MISS_LOCAL_DRAM.SNOOP_MISS_OR_NO_FWD OCR.ALL_PF_DATA_RD.L3_MISS_LOCAL_DRAM.SNOOP_MISS_OR_NO_FWD",
+        "BriefDescription": "This event is deprecated. Refer to new event OCR.DEMAND_RFO.L3_MISS.HIT_OTHER_CORE_NO_FWD",
         "Counter": "0,1,2,3",
         "CounterHTOff": "0,1,2,3",
+        "Deprecated": "1",
         "EventCode": "0xB7, 0xBB",
-        "EventName": "OCR.ALL_PF_DATA_RD.L3_MISS_LOCAL_DRAM.SNOOP_MISS_OR_NO_FWD",
+        "EventName": "OFFCORE_RESPONSE.DEMAND_RFO.L3_MISS.HIT_OTHER_CORE_NO_FWD",
         "MSRIndex": "0x1a6,0x1a7",
-        "MSRValue": "0x0604000490",
+        "MSRValue": "0x043C000002",
         "Offcore": "1",
         "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
         "SampleAfterValue": "100003",
         "UMask": "0x1"
     },
     {
-        "BriefDescription": "This event is deprecated. Refer to new event OCR.PF_L3_DATA_RD.L3_MISS_REMOTE_HOP1_DRAM.SNOOP_MISS",
+        "BriefDescription": "This event is deprecated. Refer to new event OCR.DEMAND_RFO.L3_MISS.NO_SNOOP_NEEDED",
         "Counter": "0,1,2,3",
         "CounterHTOff": "0,1,2,3",
         "Deprecated": "1",
         "EventCode": "0xB7, 0xBB",
-        "EventName": "OFFCORE_RESPONSE.PF_L3_DATA_RD.L3_MISS_REMOTE_HOP1_DRAM.SNOOP_MISS",
+        "EventName": "OFFCORE_RESPONSE.DEMAND_RFO.L3_MISS.NO_SNOOP_NEEDED",
         "MSRIndex": "0x1a6,0x1a7",
-        "MSRValue": "0x0210000080",
+        "MSRValue": "0x013C000002",
         "Offcore": "1",
         "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
         "SampleAfterValue": "100003",
         "UMask": "0x1"
     },
     {
-        "BriefDescription": "This event is deprecated. Refer to new event OCR.PF_L2_RFO.L3_MISS.REMOTE_HITM",
+        "BriefDescription": "This event is deprecated. Refer to new event OCR.DEMAND_RFO.L3_MISS.REMOTE_HITM",
         "Counter": "0,1,2,3",
         "CounterHTOff": "0,1,2,3",
         "Deprecated": "1",
         "EventCode": "0xB7, 0xBB",
-        "EventName": "OFFCORE_RESPONSE.PF_L2_RFO.L3_MISS.REMOTE_HITM",
+        "EventName": "OFFCORE_RESPONSE.DEMAND_RFO.L3_MISS.REMOTE_HITM",
         "MSRIndex": "0x1a6,0x1a7",
-        "MSRValue": "0x103FC00020",
+        "MSRValue": "0x103FC00002",
         "Offcore": "1",
         "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
         "SampleAfterValue": "100003",
         "UMask": "0x1"
     },
     {
-        "BriefDescription": "OCR.ALL_PF_RFO.L3_MISS.REMOTE_HIT_FORWARD OCR.ALL_PF_RFO.L3_MISS.REMOTE_HIT_FORWARD",
+        "BriefDescription": "This event is deprecated. Refer to new event OCR.DEMAND_RFO.L3_MISS.REMOTE_HIT_FORWARD",
         "Counter": "0,1,2,3",
         "CounterHTOff": "0,1,2,3",
+        "Deprecated": "1",
         "EventCode": "0xB7, 0xBB",
-        "EventName": "OCR.ALL_PF_RFO.L3_MISS.REMOTE_HIT_FORWARD",
+        "EventName": "OFFCORE_RESPONSE.DEMAND_RFO.L3_MISS.REMOTE_HIT_FORWARD",
         "MSRIndex": "0x1a6,0x1a7",
-        "MSRValue": "0x083FC00120",
+        "MSRValue": "0x083FC00002",
         "Offcore": "1",
         "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
         "SampleAfterValue": "100003",
         "UMask": "0x1"
     },
     {
-        "BriefDescription": "This event is deprecated. Refer to new event OCR.PF_L3_DATA_RD.L3_MISS_LOCAL_DRAM.HIT_OTHER_CORE_FWD",
+        "BriefDescription": "This event is deprecated. Refer to new event OCR.DEMAND_RFO.L3_MISS.SNOOP_MISS",
         "Counter": "0,1,2,3",
         "CounterHTOff": "0,1,2,3",
         "Deprecated": "1",
         "EventCode": "0xB7, 0xBB",
-        "EventName": "OFFCORE_RESPONSE.PF_L3_DATA_RD.L3_MISS_LOCAL_DRAM.HIT_OTHER_CORE_FWD",
+        "EventName": "OFFCORE_RESPONSE.DEMAND_RFO.L3_MISS.SNOOP_MISS",
         "MSRIndex": "0x1a6,0x1a7",
-        "MSRValue": "0x0804000080",
+        "MSRValue": "0x023C000002",
         "Offcore": "1",
         "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
         "SampleAfterValue": "100003",
         "UMask": "0x1"
     },
     {
-        "BriefDescription": "OCR.ALL_PF_RFO.L3_MISS_LOCAL_DRAM.HITM_OTHER_CORE  OCR.ALL_PF_RFO.L3_MISS_LOCAL_DRAM.HITM_OTHER_CORE",
+        "BriefDescription": "This event is deprecated. Refer to new event OCR.DEMAND_RFO.L3_MISS.SNOOP_NONE",
         "Counter": "0,1,2,3",
         "CounterHTOff": "0,1,2,3",
+        "Deprecated": "1",
         "EventCode": "0xB7, 0xBB",
-        "EventName": "OCR.ALL_PF_RFO.L3_MISS_LOCAL_DRAM.HITM_OTHER_CORE",
+        "EventName": "OFFCORE_RESPONSE.DEMAND_RFO.L3_MISS.SNOOP_NONE",
         "MSRIndex": "0x1a6,0x1a7",
-        "MSRValue": "0x1004000120",
+        "MSRValue": "0x00BC000002",
         "Offcore": "1",
         "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
         "SampleAfterValue": "100003",
         "UMask": "0x1"
     },
     {
-        "BriefDescription": "Counts any other requests  OCR.OTHER.L3_MISS_LOCAL_DRAM.ANY_SNOOP",
+        "BriefDescription": "This event is deprecated. Refer to new event OCR.DEMAND_RFO.L3_MISS_LOCAL_DRAM.ANY_SNOOP",
         "Counter": "0,1,2,3",
         "CounterHTOff": "0,1,2,3",
+        "Deprecated": "1",
         "EventCode": "0xB7, 0xBB",
-        "EventName": "OCR.OTHER.L3_MISS_LOCAL_DRAM.ANY_SNOOP",
+        "EventName": "OFFCORE_RESPONSE.DEMAND_RFO.L3_MISS_LOCAL_DRAM.ANY_SNOOP",
         "MSRIndex": "0x1a6,0x1a7",
-        "MSRValue": "0x3F84008000",
+        "MSRValue": "0x3F84000002",
         "Offcore": "1",
         "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
         "SampleAfterValue": "100003",
         "UMask": "0x1"
     },
     {
-        "BriefDescription": "Counts all prefetch (that bring data to LLC only) RFOs OCR.PF_L3_RFO.L3_MISS.HIT_OTHER_CORE_NO_FWD OCR.PF_L3_RFO.L3_MISS.HIT_OTHER_CORE_NO_FWD",
+        "BriefDescription": "This event is deprecated. Refer to new event OCR.DEMAND_RFO.L3_MISS_LOCAL_DRAM.HITM_OTHER_CORE",
         "Counter": "0,1,2,3",
         "CounterHTOff": "0,1,2,3",
+        "Deprecated": "1",
         "EventCode": "0xB7, 0xBB",
-        "EventName": "OCR.PF_L3_RFO.L3_MISS.HIT_OTHER_CORE_NO_FWD",
+        "EventName": "OFFCORE_RESPONSE.DEMAND_RFO.L3_MISS_LOCAL_DRAM.HITM_OTHER_CORE",
         "MSRIndex": "0x1a6,0x1a7",
-        "MSRValue": "0x043C000100",
+        "MSRValue": "0x1004000002",
         "Offcore": "1",
         "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
         "SampleAfterValue": "100003",
         "UMask": "0x1"
     },
     {
-        "BriefDescription": "This event is deprecated. Refer to new event OCR.OTHER.L3_MISS.HITM_OTHER_CORE",
+        "BriefDescription": "This event is deprecated. Refer to new event OCR.DEMAND_RFO.L3_MISS_LOCAL_DRAM.HIT_OTHER_CORE_FWD",
         "Counter": "0,1,2,3",
         "CounterHTOff": "0,1,2,3",
         "Deprecated": "1",
         "EventCode": "0xB7, 0xBB",
-        "EventName": "OFFCORE_RESPONSE.OTHER.L3_MISS.HITM_OTHER_CORE",
+        "EventName": "OFFCORE_RESPONSE.DEMAND_RFO.L3_MISS_LOCAL_DRAM.HIT_OTHER_CORE_FWD",
         "MSRIndex": "0x1a6,0x1a7",
-        "MSRValue": "0x103C008000",
+        "MSRValue": "0x0804000002",
         "Offcore": "1",
         "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
         "SampleAfterValue": "100003",
         "UMask": "0x1"
     },
     {
-        "BriefDescription": "Counts all demand data writes (RFOs)  OCR.DEMAND_RFO.L3_MISS_LOCAL_DRAM.ANY_SNOOP",
+        "BriefDescription": "This event is deprecated. Refer to new event OCR.DEMAND_RFO.L3_MISS_LOCAL_DRAM.HIT_OTHER_CORE_NO_FWD",
         "Counter": "0,1,2,3",
         "CounterHTOff": "0,1,2,3",
+        "Deprecated": "1",
         "EventCode": "0xB7, 0xBB",
-        "EventName": "OCR.DEMAND_RFO.L3_MISS_LOCAL_DRAM.ANY_SNOOP",
+        "EventName": "OFFCORE_RESPONSE.DEMAND_RFO.L3_MISS_LOCAL_DRAM.HIT_OTHER_CORE_NO_FWD",
         "MSRIndex": "0x1a6,0x1a7",
-        "MSRValue": "0x3F84000002",
+        "MSRValue": "0x0404000002",
         "Offcore": "1",
         "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
         "SampleAfterValue": "100003",
         "UMask": "0x1"
     },
     {
-        "BriefDescription": "Counts all demand data writes (RFOs) OCR.DEMAND_RFO.L3_MISS.HITM_OTHER_CORE OCR.DEMAND_RFO.L3_MISS.HITM_OTHER_CORE",
+        "BriefDescription": "This event is deprecated. Refer to new event OCR.DEMAND_RFO.L3_MISS_LOCAL_DRAM.NO_SNOOP_NEEDED",
         "Counter": "0,1,2,3",
         "CounterHTOff": "0,1,2,3",
+        "Deprecated": "1",
         "EventCode": "0xB7, 0xBB",
-        "EventName": "OCR.DEMAND_RFO.L3_MISS.HITM_OTHER_CORE",
+        "EventName": "OFFCORE_RESPONSE.DEMAND_RFO.L3_MISS_LOCAL_DRAM.NO_SNOOP_NEEDED",
         "MSRIndex": "0x1a6,0x1a7",
-        "MSRValue": "0x103C000002",
+        "MSRValue": "0x0104000002",
         "Offcore": "1",
         "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
         "SampleAfterValue": "100003",
         "UMask": "0x1"
     },
     {
-        "BriefDescription": "This event is deprecated. Refer to new event OCR.PF_L1D_AND_SW.L3_MISS.REMOTE_HITM",
+        "BriefDescription": "This event is deprecated. Refer to new event OCR.DEMAND_RFO.L3_MISS_LOCAL_DRAM.SNOOP_MISS",
         "Counter": "0,1,2,3",
         "CounterHTOff": "0,1,2,3",
         "Deprecated": "1",
         "EventCode": "0xB7, 0xBB",
-        "EventName": "OFFCORE_RESPONSE.PF_L1D_AND_SW.L3_MISS.REMOTE_HITM",
+        "EventName": "OFFCORE_RESPONSE.DEMAND_RFO.L3_MISS_LOCAL_DRAM.SNOOP_MISS",
         "MSRIndex": "0x1a6,0x1a7",
-        "MSRValue": "0x103FC00400",
+        "MSRValue": "0x0204000002",
         "Offcore": "1",
         "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
         "SampleAfterValue": "100003",
         "UMask": "0x1"
     },
     {
-        "BriefDescription": "This event is deprecated. Refer to new event OCR.ALL_PF_RFO.L3_MISS_REMOTE_HOP1_DRAM.HIT_OTHER_CORE_NO_FWD",
+        "BriefDescription": "This event is deprecated. Refer to new event OCR.DEMAND_RFO.L3_MISS_LOCAL_DRAM.SNOOP_MISS_OR_NO_FWD",
         "Counter": "0,1,2,3",
         "CounterHTOff": "0,1,2,3",
         "Deprecated": "1",
         "EventCode": "0xB7, 0xBB",
-        "EventName": "OFFCORE_RESPONSE.ALL_PF_RFO.L3_MISS_REMOTE_HOP1_DRAM.HIT_OTHER_CORE_NO_FWD",
+        "EventName": "OFFCORE_RESPONSE.DEMAND_RFO.L3_MISS_LOCAL_DRAM.SNOOP_MISS_OR_NO_FWD",
         "MSRIndex": "0x1a6,0x1a7",
-        "MSRValue": "0x0410000120",
+        "MSRValue": "0x0604000002",
         "Offcore": "1",
         "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
         "SampleAfterValue": "100003",
         "UMask": "0x1"
     },
     {
-        "BriefDescription": "OCR.ALL_DATA_RD.L3_MISS_REMOTE_HOP1_DRAM.SNOOP_MISS",
+        "BriefDescription": "This event is deprecated. Refer to new event OCR.DEMAND_RFO.L3_MISS_LOCAL_DRAM.SNOOP_NONE",
         "Counter": "0,1,2,3",
         "CounterHTOff": "0,1,2,3",
+        "Deprecated": "1",
         "EventCode": "0xB7, 0xBB",
-        "EventName": "OCR.ALL_DATA_RD.L3_MISS_REMOTE_HOP1_DRAM.SNOOP_MISS",
+        "EventName": "OFFCORE_RESPONSE.DEMAND_RFO.L3_MISS_LOCAL_DRAM.SNOOP_NONE",
         "MSRIndex": "0x1a6,0x1a7",
-        "MSRValue": "0x0210000491",
+        "MSRValue": "0x0084000002",
         "Offcore": "1",
         "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
         "SampleAfterValue": "100003",
         "UMask": "0x1"
     },
     {
-        "BriefDescription": "Cycles while L3 cache miss demand load is outstanding.",
-        "Counter": "0,1,2,3",
-        "CounterHTOff": "0,1,2,3,4,5,6,7",
-        "CounterMask": "2",
-        "EventCode": "0xA3",
-        "EventName": "CYCLE_ACTIVITY.CYCLES_L3_MISS",
-        "SampleAfterValue": "2000003",
-        "UMask": "0x2"
-    },
-    {
-        "BriefDescription": "Counts any other requests OCR.OTHER.L3_MISS.REMOTE_HITM",
+        "BriefDescription": "This event is deprecated. Refer to new event OCR.DEMAND_RFO.L3_MISS_REMOTE_DRAM.SNOOP_MISS_OR_NO_FWD",
         "Counter": "0,1,2,3",
         "CounterHTOff": "0,1,2,3",
+        "Deprecated": "1",
         "EventCode": "0xB7, 0xBB",
-        "EventName": "OCR.OTHER.L3_MISS.REMOTE_HITM",
+        "EventName": "OFFCORE_RESPONSE.DEMAND_RFO.L3_MISS_REMOTE_DRAM.SNOOP_MISS_OR_NO_FWD",
         "MSRIndex": "0x1a6,0x1a7",
-        "MSRValue": "0x103FC08000",
+        "MSRValue": "0x063B800002",
         "Offcore": "1",
         "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
         "SampleAfterValue": "100003",
         "UMask": "0x1"
     },
     {
-        "BriefDescription": "This event is deprecated. Refer to new event OCR.ALL_RFO.L3_MISS.NO_SNOOP_NEEDED",
+        "BriefDescription": "This event is deprecated. Refer to new event OCR.DEMAND_RFO.L3_MISS_REMOTE_HOP1_DRAM.ANY_SNOOP",
         "Counter": "0,1,2,3",
         "CounterHTOff": "0,1,2,3",
         "Deprecated": "1",
         "EventCode": "0xB7, 0xBB",
-        "EventName": "OFFCORE_RESPONSE.ALL_RFO.L3_MISS.NO_SNOOP_NEEDED",
+        "EventName": "OFFCORE_RESPONSE.DEMAND_RFO.L3_MISS_REMOTE_HOP1_DRAM.ANY_SNOOP",
         "MSRIndex": "0x1a6,0x1a7",
-        "MSRValue": "0x013C000122",
+        "MSRValue": "0x3F90000002",
         "Offcore": "1",
         "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
         "SampleAfterValue": "100003",
         "UMask": "0x1"
     },
     {
-        "BriefDescription": "Counts demand data reads OCR.DEMAND_DATA_RD.L3_MISS_REMOTE_DRAM.SNOOP_MISS_OR_NO_FWD",
+        "BriefDescription": "This event is deprecated. Refer to new event OCR.DEMAND_RFO.L3_MISS_REMOTE_HOP1_DRAM.HITM_OTHER_CORE",
         "Counter": "0,1,2,3",
         "CounterHTOff": "0,1,2,3",
+        "Deprecated": "1",
         "EventCode": "0xB7, 0xBB",
-        "EventName": "OCR.DEMAND_DATA_RD.L3_MISS_REMOTE_DRAM.SNOOP_MISS_OR_NO_FWD",
+        "EventName": "OFFCORE_RESPONSE.DEMAND_RFO.L3_MISS_REMOTE_HOP1_DRAM.HITM_OTHER_CORE",
         "MSRIndex": "0x1a6,0x1a7",
-        "MSRValue": "0x063B800001",
+        "MSRValue": "0x1010000002",
         "Offcore": "1",
         "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
         "SampleAfterValue": "100003",
         "UMask": "0x1"
     },
     {
-        "BriefDescription": "This event is deprecated. Refer to new event OCR.OTHER.L3_MISS.NO_SNOOP_NEEDED",
+        "BriefDescription": "This event is deprecated. Refer to new event OCR.DEMAND_RFO.L3_MISS_REMOTE_HOP1_DRAM.HIT_OTHER_CORE_FWD",
         "Counter": "0,1,2,3",
         "CounterHTOff": "0,1,2,3",
         "Deprecated": "1",
         "EventCode": "0xB7, 0xBB",
-        "EventName": "OFFCORE_RESPONSE.OTHER.L3_MISS.NO_SNOOP_NEEDED",
+        "EventName": "OFFCORE_RESPONSE.DEMAND_RFO.L3_MISS_REMOTE_HOP1_DRAM.HIT_OTHER_CORE_FWD",
         "MSRIndex": "0x1a6,0x1a7",
-        "MSRValue": "0x013C008000",
+        "MSRValue": "0x0810000002",
         "Offcore": "1",
         "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
         "SampleAfterValue": "100003",
         "UMask": "0x1"
     },
     {
-        "BriefDescription": "Counts all prefetch (that bring data to LLC only) data reads OCR.PF_L3_DATA_RD.L3_MISS.HIT_OTHER_CORE_NO_FWD OCR.PF_L3_DATA_RD.L3_MISS.HIT_OTHER_CORE_NO_FWD",
+        "BriefDescription": "This event is deprecated. Refer to new event OCR.DEMAND_RFO.L3_MISS_REMOTE_HOP1_DRAM.HIT_OTHER_CORE_NO_FWD",
         "Counter": "0,1,2,3",
         "CounterHTOff": "0,1,2,3",
+        "Deprecated": "1",
         "EventCode": "0xB7, 0xBB",
-        "EventName": "OCR.PF_L3_DATA_RD.L3_MISS.HIT_OTHER_CORE_NO_FWD",
+        "EventName": "OFFCORE_RESPONSE.DEMAND_RFO.L3_MISS_REMOTE_HOP1_DRAM.HIT_OTHER_CORE_NO_FWD",
         "MSRIndex": "0x1a6,0x1a7",
-        "MSRValue": "0x043C000080",
+        "MSRValue": "0x0410000002",
         "Offcore": "1",
         "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
         "SampleAfterValue": "100003",
         "UMask": "0x1"
     },
     {
-        "BriefDescription": "This event is deprecated. Refer to new event OCR.ALL_RFO.L3_MISS_LOCAL_DRAM.ANY_SNOOP",
+        "BriefDescription": "This event is deprecated. Refer to new event OCR.DEMAND_RFO.L3_MISS_REMOTE_HOP1_DRAM.NO_SNOOP_NEEDED",
         "Counter": "0,1,2,3",
         "CounterHTOff": "0,1,2,3",
         "Deprecated": "1",
         "EventCode": "0xB7, 0xBB",
-        "EventName": "OFFCORE_RESPONSE.ALL_RFO.L3_MISS_LOCAL_DRAM.ANY_SNOOP",
+        "EventName": "OFFCORE_RESPONSE.DEMAND_RFO.L3_MISS_REMOTE_HOP1_DRAM.NO_SNOOP_NEEDED",
         "MSRIndex": "0x1a6,0x1a7",
-        "MSRValue": "0x3F84000122",
+        "MSRValue": "0x0110000002",
         "Offcore": "1",
         "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
         "SampleAfterValue": "100003",
         "UMask": "0x1"
     },
     {
-        "BriefDescription": "OCR.ALL_PF_RFO.L3_MISS_REMOTE_HOP1_DRAM.SNOOP_MISS",
+        "BriefDescription": "This event is deprecated. Refer to new event OCR.DEMAND_RFO.L3_MISS_REMOTE_HOP1_DRAM.SNOOP_MISS",
         "Counter": "0,1,2,3",
         "CounterHTOff": "0,1,2,3",
+        "Deprecated": "1",
         "EventCode": "0xB7, 0xBB",
-        "EventName": "OCR.ALL_PF_RFO.L3_MISS_REMOTE_HOP1_DRAM.SNOOP_MISS",
+        "EventName": "OFFCORE_RESPONSE.DEMAND_RFO.L3_MISS_REMOTE_HOP1_DRAM.SNOOP_MISS",
         "MSRIndex": "0x1a6,0x1a7",
-        "MSRValue": "0x0210000120",
+        "MSRValue": "0x0210000002",
         "Offcore": "1",
         "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
         "SampleAfterValue": "100003",
         "UMask": "0x1"
     },
     {
-        "BriefDescription": "This event is deprecated. Refer to new event OCR.ALL_READS.L3_MISS.HIT_OTHER_CORE_NO_FWD",
+        "BriefDescription": "This event is deprecated. Refer to new event OCR.DEMAND_RFO.L3_MISS_REMOTE_HOP1_DRAM.SNOOP_NONE",
         "Counter": "0,1,2,3",
         "CounterHTOff": "0,1,2,3",
         "Deprecated": "1",
         "EventCode": "0xB7, 0xBB",
-        "EventName": "OFFCORE_RESPONSE.ALL_READS.L3_MISS.HIT_OTHER_CORE_NO_FWD",
+        "EventName": "OFFCORE_RESPONSE.DEMAND_RFO.L3_MISS_REMOTE_HOP1_DRAM.SNOOP_NONE",
         "MSRIndex": "0x1a6,0x1a7",
-        "MSRValue": "0x043C0007F7",
+        "MSRValue": "0x0090000002",
         "Offcore": "1",
         "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
         "SampleAfterValue": "100003",
         "UMask": "0x1"
     },
     {
-        "BriefDescription": "Counts all demand data writes (RFOs)  OCR.DEMAND_RFO.L3_MISS_REMOTE_HOP1_DRAM.NO_SNOOP_NEEDED",
+        "BriefDescription": "This event is deprecated. Refer to new event OCR.OTHER.L3_MISS.ANY_SNOOP",
         "Counter": "0,1,2,3",
         "CounterHTOff": "0,1,2,3",
+        "Deprecated": "1",
         "EventCode": "0xB7, 0xBB",
-        "EventName": "OCR.DEMAND_RFO.L3_MISS_REMOTE_HOP1_DRAM.NO_SNOOP_NEEDED",
+        "EventName": "OFFCORE_RESPONSE.OTHER.L3_MISS.ANY_SNOOP",
         "MSRIndex": "0x1a6,0x1a7",
-        "MSRValue": "0x0110000002",
+        "MSRValue": "0x3FBC008000",
         "Offcore": "1",
         "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
         "SampleAfterValue": "100003",
         "UMask": "0x1"
     },
     {
-        "BriefDescription": "OCR.ALL_DATA_RD.L3_MISS.SNOOP_NONE OCR.ALL_DATA_RD.L3_MISS.SNOOP_NONE",
+        "BriefDescription": "This event is deprecated. Refer to new event OCR.OTHER.L3_MISS.HITM_OTHER_CORE",
         "Counter": "0,1,2,3",
         "CounterHTOff": "0,1,2,3",
+        "Deprecated": "1",
         "EventCode": "0xB7, 0xBB",
-        "EventName": "OCR.ALL_DATA_RD.L3_MISS.SNOOP_NONE",
+        "EventName": "OFFCORE_RESPONSE.OTHER.L3_MISS.HITM_OTHER_CORE",
         "MSRIndex": "0x1a6,0x1a7",
-        "MSRValue": "0x00BC000491",
+        "MSRValue": "0x103C008000",
         "Offcore": "1",
         "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
         "SampleAfterValue": "100003",
         "UMask": "0x1"
     },
     {
-        "BriefDescription": "OCR.ALL_DATA_RD.L3_MISS.NO_SNOOP_NEEDED OCR.ALL_DATA_RD.L3_MISS.NO_SNOOP_NEEDED OCR.ALL_DATA_RD.L3_MISS.NO_SNOOP_NEEDED",
+        "BriefDescription": "This event is deprecated. Refer to new event OCR.OTHER.L3_MISS.HIT_OTHER_CORE_FWD",
         "Counter": "0,1,2,3",
         "CounterHTOff": "0,1,2,3",
+        "Deprecated": "1",
         "EventCode": "0xB7, 0xBB",
-        "EventName": "OCR.ALL_DATA_RD.L3_MISS.NO_SNOOP_NEEDED",
+        "EventName": "OFFCORE_RESPONSE.OTHER.L3_MISS.HIT_OTHER_CORE_FWD",
         "MSRIndex": "0x1a6,0x1a7",
-        "MSRValue": "0x013C000491",
+        "MSRValue": "0x083C008000",
         "Offcore": "1",
         "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
         "SampleAfterValue": "100003",
         "UMask": "0x1"
     },
     {
-        "BriefDescription": "OCR.ALL_RFO.L3_MISS_LOCAL_DRAM.ANY_SNOOP  OCR.ALL_RFO.L3_MISS_LOCAL_DRAM.ANY_SNOOP",
+        "BriefDescription": "This event is deprecated. Refer to new event OCR.OTHER.L3_MISS.HIT_OTHER_CORE_NO_FWD",
         "Counter": "0,1,2,3",
         "CounterHTOff": "0,1,2,3",
+        "Deprecated": "1",
         "EventCode": "0xB7, 0xBB",
-        "EventName": "OCR.ALL_RFO.L3_MISS_LOCAL_DRAM.ANY_SNOOP",
+        "EventName": "OFFCORE_RESPONSE.OTHER.L3_MISS.HIT_OTHER_CORE_NO_FWD",
         "MSRIndex": "0x1a6,0x1a7",
-        "MSRValue": "0x3F84000122",
+        "MSRValue": "0x043C008000",
         "Offcore": "1",
         "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
         "SampleAfterValue": "100003",
         "UMask": "0x1"
     },
     {
-        "BriefDescription": "OCR.ALL_PF_DATA_RD.L3_MISS_LOCAL_DRAM.SNOOP_NONE",
+        "BriefDescription": "This event is deprecated. Refer to new event OCR.OTHER.L3_MISS.NO_SNOOP_NEEDED",
         "Counter": "0,1,2,3",
         "CounterHTOff": "0,1,2,3",
+        "Deprecated": "1",
         "EventCode": "0xB7, 0xBB",
-        "EventName": "OCR.ALL_PF_DATA_RD.L3_MISS_LOCAL_DRAM.SNOOP_NONE",
+        "EventName": "OFFCORE_RESPONSE.OTHER.L3_MISS.NO_SNOOP_NEEDED",
         "MSRIndex": "0x1a6,0x1a7",
-        "MSRValue": "0x0084000490",
+        "MSRValue": "0x013C008000",
         "Offcore": "1",
         "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
         "SampleAfterValue": "100003",
         "UMask": "0x1"
     },
     {
-        "BriefDescription": "Counts demand data reads  OCR.DEMAND_DATA_RD.L3_MISS_LOCAL_DRAM.HIT_OTHER_CORE_FWD",
+        "BriefDescription": "This event is deprecated. Refer to new event OCR.OTHER.L3_MISS.REMOTE_HITM",
         "Counter": "0,1,2,3",
         "CounterHTOff": "0,1,2,3",
+        "Deprecated": "1",
         "EventCode": "0xB7, 0xBB",
-        "EventName": "OCR.DEMAND_DATA_RD.L3_MISS_LOCAL_DRAM.HIT_OTHER_CORE_FWD",
+        "EventName": "OFFCORE_RESPONSE.OTHER.L3_MISS.REMOTE_HITM",
         "MSRIndex": "0x1a6,0x1a7",
-        "MSRValue": "0x0804000001",
+        "MSRValue": "0x103FC08000",
         "Offcore": "1",
         "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
         "SampleAfterValue": "100003",
@@ -7873,490 +7715,476 @@
         "UMask": "0x1"
     },
     {
-        "BriefDescription": "This event is deprecated. Refer to new event OCR.ALL_PF_RFO.L3_MISS_REMOTE_HOP1_DRAM.HIT_OTHER_CORE_FWD",
+        "BriefDescription": "This event is deprecated. Refer to new event OCR.OTHER.L3_MISS.SNOOP_MISS",
         "Counter": "0,1,2,3",
         "CounterHTOff": "0,1,2,3",
         "Deprecated": "1",
         "EventCode": "0xB7, 0xBB",
-        "EventName": "OFFCORE_RESPONSE.ALL_PF_RFO.L3_MISS_REMOTE_HOP1_DRAM.HIT_OTHER_CORE_FWD",
+        "EventName": "OFFCORE_RESPONSE.OTHER.L3_MISS.SNOOP_MISS",
         "MSRIndex": "0x1a6,0x1a7",
-        "MSRValue": "0x0810000120",
+        "MSRValue": "0x023C008000",
         "Offcore": "1",
         "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
         "SampleAfterValue": "100003",
         "UMask": "0x1"
     },
     {
-        "BriefDescription": "OCR.ALL_DATA_RD.L3_MISS.HITM_OTHER_CORE OCR.ALL_DATA_RD.L3_MISS.HITM_OTHER_CORE OCR.ALL_DATA_RD.L3_MISS.HITM_OTHER_CORE",
+        "BriefDescription": "This event is deprecated. Refer to new event OCR.OTHER.L3_MISS.SNOOP_NONE",
         "Counter": "0,1,2,3",
         "CounterHTOff": "0,1,2,3",
+        "Deprecated": "1",
         "EventCode": "0xB7, 0xBB",
-        "EventName": "OCR.ALL_DATA_RD.L3_MISS.HITM_OTHER_CORE",
+        "EventName": "OFFCORE_RESPONSE.OTHER.L3_MISS.SNOOP_NONE",
         "MSRIndex": "0x1a6,0x1a7",
-        "MSRValue": "0x103C000491",
+        "MSRValue": "0x00BC008000",
         "Offcore": "1",
         "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
         "SampleAfterValue": "100003",
         "UMask": "0x1"
     },
     {
-        "BriefDescription": "This event is deprecated. Refer to new event OCR.PF_L2_RFO.L3_MISS.ANY_SNOOP",
+        "BriefDescription": "This event is deprecated. Refer to new event OCR.OTHER.L3_MISS_LOCAL_DRAM.ANY_SNOOP",
         "Counter": "0,1,2,3",
         "CounterHTOff": "0,1,2,3",
         "Deprecated": "1",
         "EventCode": "0xB7, 0xBB",
-        "EventName": "OFFCORE_RESPONSE.PF_L2_RFO.L3_MISS.ANY_SNOOP",
+        "EventName": "OFFCORE_RESPONSE.OTHER.L3_MISS_LOCAL_DRAM.ANY_SNOOP",
         "MSRIndex": "0x1a6,0x1a7",
-        "MSRValue": "0x3FBC000020",
+        "MSRValue": "0x3F84008000",
         "Offcore": "1",
         "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
         "SampleAfterValue": "100003",
         "UMask": "0x1"
     },
     {
-        "BriefDescription": "Counts all prefetch (that bring data to LLC only) data reads  OCR.PF_L3_DATA_RD.L3_MISS_REMOTE_HOP1_DRAM.HITM_OTHER_CORE",
+        "BriefDescription": "This event is deprecated. Refer to new event OCR.OTHER.L3_MISS_LOCAL_DRAM.HITM_OTHER_CORE",
         "Counter": "0,1,2,3",
         "CounterHTOff": "0,1,2,3",
+        "Deprecated": "1",
         "EventCode": "0xB7, 0xBB",
-        "EventName": "OCR.PF_L3_DATA_RD.L3_MISS_REMOTE_HOP1_DRAM.HITM_OTHER_CORE",
+        "EventName": "OFFCORE_RESPONSE.OTHER.L3_MISS_LOCAL_DRAM.HITM_OTHER_CORE",
         "MSRIndex": "0x1a6,0x1a7",
-        "MSRValue": "0x1010000080",
+        "MSRValue": "0x1004008000",
         "Offcore": "1",
         "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
         "SampleAfterValue": "100003",
         "UMask": "0x1"
     },
     {
-        "BriefDescription": "This event is deprecated. Refer to new event OCR.DEMAND_DATA_RD.L3_MISS.HIT_OTHER_CORE_FWD",
+        "BriefDescription": "This event is deprecated. Refer to new event OCR.OTHER.L3_MISS_LOCAL_DRAM.HIT_OTHER_CORE_FWD",
         "Counter": "0,1,2,3",
         "CounterHTOff": "0,1,2,3",
         "Deprecated": "1",
         "EventCode": "0xB7, 0xBB",
-        "EventName": "OFFCORE_RESPONSE.DEMAND_DATA_RD.L3_MISS.HIT_OTHER_CORE_FWD",
+        "EventName": "OFFCORE_RESPONSE.OTHER.L3_MISS_LOCAL_DRAM.HIT_OTHER_CORE_FWD",
         "MSRIndex": "0x1a6,0x1a7",
-        "MSRValue": "0x083C000001",
+        "MSRValue": "0x0804008000",
         "Offcore": "1",
         "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
         "SampleAfterValue": "100003",
         "UMask": "0x1"
     },
     {
-        "BriefDescription": "OCR.ALL_DATA_RD.L3_MISS_LOCAL_DRAM.NO_SNOOP_NEEDED  OCR.ALL_DATA_RD.L3_MISS_LOCAL_DRAM.NO_SNOOP_NEEDED",
+        "BriefDescription": "This event is deprecated. Refer to new event OCR.OTHER.L3_MISS_LOCAL_DRAM.HIT_OTHER_CORE_NO_FWD",
         "Counter": "0,1,2,3",
         "CounterHTOff": "0,1,2,3",
+        "Deprecated": "1",
         "EventCode": "0xB7, 0xBB",
-        "EventName": "OCR.ALL_DATA_RD.L3_MISS_LOCAL_DRAM.NO_SNOOP_NEEDED",
+        "EventName": "OFFCORE_RESPONSE.OTHER.L3_MISS_LOCAL_DRAM.HIT_OTHER_CORE_NO_FWD",
         "MSRIndex": "0x1a6,0x1a7",
-        "MSRValue": "0x0104000491",
+        "MSRValue": "0x0404008000",
         "Offcore": "1",
         "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
         "SampleAfterValue": "100003",
         "UMask": "0x1"
     },
     {
-        "BriefDescription": "This event is deprecated. Refer to new event OCR.OTHER.L3_MISS_LOCAL_DRAM.HITM_OTHER_CORE",
+        "BriefDescription": "This event is deprecated. Refer to new event OCR.OTHER.L3_MISS_LOCAL_DRAM.NO_SNOOP_NEEDED",
         "Counter": "0,1,2,3",
         "CounterHTOff": "0,1,2,3",
         "Deprecated": "1",
         "EventCode": "0xB7, 0xBB",
-        "EventName": "OFFCORE_RESPONSE.OTHER.L3_MISS_LOCAL_DRAM.HITM_OTHER_CORE",
+        "EventName": "OFFCORE_RESPONSE.OTHER.L3_MISS_LOCAL_DRAM.NO_SNOOP_NEEDED",
         "MSRIndex": "0x1a6,0x1a7",
-        "MSRValue": "0x1004008000",
+        "MSRValue": "0x0104008000",
         "Offcore": "1",
         "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
         "SampleAfterValue": "100003",
         "UMask": "0x1"
     },
     {
-        "BriefDescription": "This event is deprecated. Refer to new event OCR.PF_L2_DATA_RD.L3_MISS_REMOTE_HOP1_DRAM.SNOOP_MISS",
+        "BriefDescription": "This event is deprecated. Refer to new event OCR.OTHER.L3_MISS_LOCAL_DRAM.SNOOP_MISS",
         "Counter": "0,1,2,3",
         "CounterHTOff": "0,1,2,3",
         "Deprecated": "1",
         "EventCode": "0xB7, 0xBB",
-        "EventName": "OFFCORE_RESPONSE.PF_L2_DATA_RD.L3_MISS_REMOTE_HOP1_DRAM.SNOOP_MISS",
+        "EventName": "OFFCORE_RESPONSE.OTHER.L3_MISS_LOCAL_DRAM.SNOOP_MISS",
         "MSRIndex": "0x1a6,0x1a7",
-        "MSRValue": "0x0210000010",
+        "MSRValue": "0x0204008000",
         "Offcore": "1",
         "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
         "SampleAfterValue": "100003",
         "UMask": "0x1"
     },
     {
-        "BriefDescription": "Counts prefetch (that bring data to L2) data reads  OCR.PF_L2_DATA_RD.L3_MISS_LOCAL_DRAM.NO_SNOOP_NEEDED",
+        "BriefDescription": "This event is deprecated. Refer to new event OCR.OTHER.L3_MISS_LOCAL_DRAM.SNOOP_MISS_OR_NO_FWD",
         "Counter": "0,1,2,3",
         "CounterHTOff": "0,1,2,3",
+        "Deprecated": "1",
         "EventCode": "0xB7, 0xBB",
-        "EventName": "OCR.PF_L2_DATA_RD.L3_MISS_LOCAL_DRAM.NO_SNOOP_NEEDED",
+        "EventName": "OFFCORE_RESPONSE.OTHER.L3_MISS_LOCAL_DRAM.SNOOP_MISS_OR_NO_FWD",
         "MSRIndex": "0x1a6,0x1a7",
-        "MSRValue": "0x0104000010",
+        "MSRValue": "0x0604008000",
         "Offcore": "1",
         "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
         "SampleAfterValue": "100003",
         "UMask": "0x1"
     },
     {
-        "BriefDescription": "Number of times an RTM execution successfully committed",
-        "Counter": "0,1,2,3",
-        "CounterHTOff": "0,1,2,3,4,5,6,7",
-        "EventCode": "0xC9",
-        "EventName": "RTM_RETIRED.COMMIT",
-        "PublicDescription": "Number of times RTM commit succeeded.",
-        "SampleAfterValue": "2000003",
-        "UMask": "0x2"
-    },
-    {
-        "BriefDescription": "Counts L1 data cache hardware prefetch requests and software prefetch requests OCR.PF_L1D_AND_SW.L3_MISS_LOCAL_DRAM.SNOOP_MISS_OR_NO_FWD",
+        "BriefDescription": "This event is deprecated. Refer to new event OCR.OTHER.L3_MISS_LOCAL_DRAM.SNOOP_NONE",
         "Counter": "0,1,2,3",
         "CounterHTOff": "0,1,2,3",
+        "Deprecated": "1",
         "EventCode": "0xB7, 0xBB",
-        "EventName": "OCR.PF_L1D_AND_SW.L3_MISS_LOCAL_DRAM.SNOOP_MISS_OR_NO_FWD",
+        "EventName": "OFFCORE_RESPONSE.OTHER.L3_MISS_LOCAL_DRAM.SNOOP_NONE",
         "MSRIndex": "0x1a6,0x1a7",
-        "MSRValue": "0x0604000400",
+        "MSRValue": "0x0084008000",
         "Offcore": "1",
         "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
         "SampleAfterValue": "100003",
         "UMask": "0x1"
     },
     {
-        "BriefDescription": "OCR.ALL_DATA_RD.L3_MISS_LOCAL_DRAM.HITM_OTHER_CORE  OCR.ALL_DATA_RD.L3_MISS_LOCAL_DRAM.HITM_OTHER_CORE",
+        "BriefDescription": "This event is deprecated. Refer to new event OCR.OTHER.L3_MISS_REMOTE_DRAM.SNOOP_MISS_OR_NO_FWD",
         "Counter": "0,1,2,3",
         "CounterHTOff": "0,1,2,3",
+        "Deprecated": "1",
         "EventCode": "0xB7, 0xBB",
-        "EventName": "OCR.ALL_DATA_RD.L3_MISS_LOCAL_DRAM.HITM_OTHER_CORE",
+        "EventName": "OFFCORE_RESPONSE.OTHER.L3_MISS_REMOTE_DRAM.SNOOP_MISS_OR_NO_FWD",
         "MSRIndex": "0x1a6,0x1a7",
-        "MSRValue": "0x1004000491",
+        "MSRValue": "0x063B808000",
         "Offcore": "1",
         "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
         "SampleAfterValue": "100003",
         "UMask": "0x1"
     },
     {
-        "BriefDescription": "Number of times an HLE execution aborted due to unfriendly events (such as interrupts).",
-        "Counter": "0,1,2,3",
-        "CounterHTOff": "0,1,2,3,4,5,6,7",
-        "EventCode": "0xC8",
-        "EventName": "HLE_RETIRED.ABORTED_EVENTS",
-        "SampleAfterValue": "2000003",
-        "UMask": "0x80"
-    },
-    {
-        "BriefDescription": "Counts number of Offcore outstanding Demand Data Read requests that miss L3 cache in the superQ every cycle.",
-        "Counter": "0,1,2,3",
-        "CounterHTOff": "0,1,2,3,4,5,6,7",
-        "EventCode": "0x60",
-        "EventName": "OFFCORE_REQUESTS_OUTSTANDING.L3_MISS_DEMAND_DATA_RD",
-        "SampleAfterValue": "2000003",
-        "UMask": "0x10"
-    },
-    {
-        "BriefDescription": "Counts all demand data writes (RFOs) OCR.DEMAND_RFO.L3_MISS.ANY_SNOOP OCR.DEMAND_RFO.L3_MISS.ANY_SNOOP",
+        "BriefDescription": "This event is deprecated. Refer to new event OCR.OTHER.L3_MISS_REMOTE_HOP1_DRAM.ANY_SNOOP",
         "Counter": "0,1,2,3",
         "CounterHTOff": "0,1,2,3",
+        "Deprecated": "1",
         "EventCode": "0xB7, 0xBB",
-        "EventName": "OCR.DEMAND_RFO.L3_MISS.ANY_SNOOP",
+        "EventName": "OFFCORE_RESPONSE.OTHER.L3_MISS_REMOTE_HOP1_DRAM.ANY_SNOOP",
         "MSRIndex": "0x1a6,0x1a7",
-        "MSRValue": "0x3FBC000002",
+        "MSRValue": "0x3F90008000",
         "Offcore": "1",
         "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
         "SampleAfterValue": "100003",
         "UMask": "0x1"
     },
     {
-        "BriefDescription": "OCR.ALL_PF_DATA_RD.L3_MISS_LOCAL_DRAM.SNOOP_MISS",
+        "BriefDescription": "This event is deprecated. Refer to new event OCR.OTHER.L3_MISS_REMOTE_HOP1_DRAM.HITM_OTHER_CORE",
         "Counter": "0,1,2,3",
         "CounterHTOff": "0,1,2,3",
+        "Deprecated": "1",
         "EventCode": "0xB7, 0xBB",
-        "EventName": "OCR.ALL_PF_DATA_RD.L3_MISS_LOCAL_DRAM.SNOOP_MISS",
+        "EventName": "OFFCORE_RESPONSE.OTHER.L3_MISS_REMOTE_HOP1_DRAM.HITM_OTHER_CORE",
         "MSRIndex": "0x1a6,0x1a7",
-        "MSRValue": "0x0204000490",
+        "MSRValue": "0x1010008000",
         "Offcore": "1",
         "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
         "SampleAfterValue": "100003",
         "UMask": "0x1"
     },
     {
-        "BriefDescription": "This event is deprecated. Refer to new event OCR.ALL_READS.L3_MISS_LOCAL_DRAM.HIT_OTHER_CORE_NO_FWD",
+        "BriefDescription": "This event is deprecated. Refer to new event OCR.OTHER.L3_MISS_REMOTE_HOP1_DRAM.HIT_OTHER_CORE_FWD",
         "Counter": "0,1,2,3",
         "CounterHTOff": "0,1,2,3",
         "Deprecated": "1",
         "EventCode": "0xB7, 0xBB",
-        "EventName": "OFFCORE_RESPONSE.ALL_READS.L3_MISS_LOCAL_DRAM.HIT_OTHER_CORE_NO_FWD",
+        "EventName": "OFFCORE_RESPONSE.OTHER.L3_MISS_REMOTE_HOP1_DRAM.HIT_OTHER_CORE_FWD",
         "MSRIndex": "0x1a6,0x1a7",
-        "MSRValue": "0x04040007F7",
+        "MSRValue": "0x0810008000",
         "Offcore": "1",
         "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
         "SampleAfterValue": "100003",
         "UMask": "0x1"
     },
     {
-        "BriefDescription": "This event is deprecated. Refer to new event OCR.PF_L3_DATA_RD.L3_MISS.SNOOP_NONE",
+        "BriefDescription": "This event is deprecated. Refer to new event OCR.OTHER.L3_MISS_REMOTE_HOP1_DRAM.HIT_OTHER_CORE_NO_FWD",
         "Counter": "0,1,2,3",
         "CounterHTOff": "0,1,2,3",
         "Deprecated": "1",
         "EventCode": "0xB7, 0xBB",
-        "EventName": "OFFCORE_RESPONSE.PF_L3_DATA_RD.L3_MISS.SNOOP_NONE",
+        "EventName": "OFFCORE_RESPONSE.OTHER.L3_MISS_REMOTE_HOP1_DRAM.HIT_OTHER_CORE_NO_FWD",
         "MSRIndex": "0x1a6,0x1a7",
-        "MSRValue": "0x00BC000080",
+        "MSRValue": "0x0410008000",
         "Offcore": "1",
         "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
         "SampleAfterValue": "100003",
         "UMask": "0x1"
     },
     {
-        "BriefDescription": "This event is deprecated. Refer to new event OCR.DEMAND_DATA_RD.L3_MISS_LOCAL_DRAM.HIT_OTHER_CORE_FWD",
+        "BriefDescription": "This event is deprecated. Refer to new event OCR.OTHER.L3_MISS_REMOTE_HOP1_DRAM.NO_SNOOP_NEEDED",
         "Counter": "0,1,2,3",
         "CounterHTOff": "0,1,2,3",
         "Deprecated": "1",
         "EventCode": "0xB7, 0xBB",
-        "EventName": "OFFCORE_RESPONSE.DEMAND_DATA_RD.L3_MISS_LOCAL_DRAM.HIT_OTHER_CORE_FWD",
+        "EventName": "OFFCORE_RESPONSE.OTHER.L3_MISS_REMOTE_HOP1_DRAM.NO_SNOOP_NEEDED",
         "MSRIndex": "0x1a6,0x1a7",
-        "MSRValue": "0x0804000001",
+        "MSRValue": "0x0110008000",
         "Offcore": "1",
         "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
         "SampleAfterValue": "100003",
         "UMask": "0x1"
     },
     {
-        "BriefDescription": "This event is deprecated. Refer to new event OCR.ALL_READS.L3_MISS_REMOTE_HOP1_DRAM.ANY_SNOOP",
+        "BriefDescription": "This event is deprecated. Refer to new event OCR.OTHER.L3_MISS_REMOTE_HOP1_DRAM.SNOOP_MISS",
         "Counter": "0,1,2,3",
         "CounterHTOff": "0,1,2,3",
         "Deprecated": "1",
         "EventCode": "0xB7, 0xBB",
-        "EventName": "OFFCORE_RESPONSE.ALL_READS.L3_MISS_REMOTE_HOP1_DRAM.ANY_SNOOP",
+        "EventName": "OFFCORE_RESPONSE.OTHER.L3_MISS_REMOTE_HOP1_DRAM.SNOOP_MISS",
         "MSRIndex": "0x1a6,0x1a7",
-        "MSRValue": "0x3F900007F7",
+        "MSRValue": "0x0210008000",
         "Offcore": "1",
         "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
         "SampleAfterValue": "100003",
         "UMask": "0x1"
     },
     {
-        "BriefDescription": "This event is deprecated. Refer to new event OCR.DEMAND_CODE_RD.L3_MISS_LOCAL_DRAM.ANY_SNOOP",
+        "BriefDescription": "This event is deprecated. Refer to new event OCR.OTHER.L3_MISS_REMOTE_HOP1_DRAM.SNOOP_NONE",
         "Counter": "0,1,2,3",
         "CounterHTOff": "0,1,2,3",
         "Deprecated": "1",
         "EventCode": "0xB7, 0xBB",
-        "EventName": "OFFCORE_RESPONSE.DEMAND_CODE_RD.L3_MISS_LOCAL_DRAM.ANY_SNOOP",
+        "EventName": "OFFCORE_RESPONSE.OTHER.L3_MISS_REMOTE_HOP1_DRAM.SNOOP_NONE",
         "MSRIndex": "0x1a6,0x1a7",
-        "MSRValue": "0x3F84000004",
+        "MSRValue": "0x0090008000",
         "Offcore": "1",
         "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
         "SampleAfterValue": "100003",
         "UMask": "0x1"
     },
     {
-        "BriefDescription": "This event is deprecated. Refer to new event OCR.ALL_PF_RFO.L3_MISS.HIT_OTHER_CORE_FWD",
+        "BriefDescription": "This event is deprecated. Refer to new event OCR.PF_L1D_AND_SW.L3_MISS.ANY_SNOOP",
         "Counter": "0,1,2,3",
         "CounterHTOff": "0,1,2,3",
         "Deprecated": "1",
         "EventCode": "0xB7, 0xBB",
-        "EventName": "OFFCORE_RESPONSE.ALL_PF_RFO.L3_MISS.HIT_OTHER_CORE_FWD",
+        "EventName": "OFFCORE_RESPONSE.PF_L1D_AND_SW.L3_MISS.ANY_SNOOP",
         "MSRIndex": "0x1a6,0x1a7",
-        "MSRValue": "0x083C000120",
+        "MSRValue": "0x3FBC000400",
         "Offcore": "1",
         "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
         "SampleAfterValue": "100003",
         "UMask": "0x1"
     },
     {
-        "BriefDescription": "This event is deprecated. Refer to new event OCR.PF_L2_DATA_RD.L3_MISS.SNOOP_NONE",
+        "BriefDescription": "This event is deprecated. Refer to new event OCR.PF_L1D_AND_SW.L3_MISS.HITM_OTHER_CORE",
         "Counter": "0,1,2,3",
         "CounterHTOff": "0,1,2,3",
         "Deprecated": "1",
         "EventCode": "0xB7, 0xBB",
-        "EventName": "OFFCORE_RESPONSE.PF_L2_DATA_RD.L3_MISS.SNOOP_NONE",
+        "EventName": "OFFCORE_RESPONSE.PF_L1D_AND_SW.L3_MISS.HITM_OTHER_CORE",
         "MSRIndex": "0x1a6,0x1a7",
-        "MSRValue": "0x00BC000010",
+        "MSRValue": "0x103C000400",
         "Offcore": "1",
         "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
         "SampleAfterValue": "100003",
         "UMask": "0x1"
     },
     {
-        "BriefDescription": "This event is deprecated. Refer to new event OCR.OTHER.L3_MISS.HIT_OTHER_CORE_NO_FWD",
+        "BriefDescription": "This event is deprecated. Refer to new event OCR.PF_L1D_AND_SW.L3_MISS.HIT_OTHER_CORE_FWD",
         "Counter": "0,1,2,3",
         "CounterHTOff": "0,1,2,3",
         "Deprecated": "1",
         "EventCode": "0xB7, 0xBB",
-        "EventName": "OFFCORE_RESPONSE.OTHER.L3_MISS.HIT_OTHER_CORE_NO_FWD",
+        "EventName": "OFFCORE_RESPONSE.PF_L1D_AND_SW.L3_MISS.HIT_OTHER_CORE_FWD",
         "MSRIndex": "0x1a6,0x1a7",
-        "MSRValue": "0x043C008000",
+        "MSRValue": "0x083C000400",
         "Offcore": "1",
         "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
         "SampleAfterValue": "100003",
         "UMask": "0x1"
     },
     {
-        "BriefDescription": "This event is deprecated. Refer to new event OCR.PF_L2_RFO.L3_MISS.SNOOP_NONE",
+        "BriefDescription": "This event is deprecated. Refer to new event OCR.PF_L1D_AND_SW.L3_MISS.HIT_OTHER_CORE_NO_FWD",
         "Counter": "0,1,2,3",
         "CounterHTOff": "0,1,2,3",
         "Deprecated": "1",
         "EventCode": "0xB7, 0xBB",
-        "EventName": "OFFCORE_RESPONSE.PF_L2_RFO.L3_MISS.SNOOP_NONE",
+        "EventName": "OFFCORE_RESPONSE.PF_L1D_AND_SW.L3_MISS.HIT_OTHER_CORE_NO_FWD",
         "MSRIndex": "0x1a6,0x1a7",
-        "MSRValue": "0x00BC000020",
+        "MSRValue": "0x043C000400",
         "Offcore": "1",
         "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
         "SampleAfterValue": "100003",
         "UMask": "0x1"
     },
     {
-        "BriefDescription": "This event is deprecated. Refer to new event OCR.DEMAND_CODE_RD.L3_MISS.REMOTE_HITM",
+        "BriefDescription": "This event is deprecated. Refer to new event OCR.PF_L1D_AND_SW.L3_MISS.NO_SNOOP_NEEDED",
         "Counter": "0,1,2,3",
         "CounterHTOff": "0,1,2,3",
         "Deprecated": "1",
         "EventCode": "0xB7, 0xBB",
-        "EventName": "OFFCORE_RESPONSE.DEMAND_CODE_RD.L3_MISS.REMOTE_HITM",
+        "EventName": "OFFCORE_RESPONSE.PF_L1D_AND_SW.L3_MISS.NO_SNOOP_NEEDED",
         "MSRIndex": "0x1a6,0x1a7",
-        "MSRValue": "0x103FC00004",
+        "MSRValue": "0x013C000400",
         "Offcore": "1",
         "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
         "SampleAfterValue": "100003",
         "UMask": "0x1"
     },
     {
-        "BriefDescription": "OCR.ALL_READS.L3_MISS_LOCAL_DRAM.NO_SNOOP_NEEDED  OCR.ALL_READS.L3_MISS_LOCAL_DRAM.NO_SNOOP_NEEDED",
+        "BriefDescription": "This event is deprecated. Refer to new event OCR.PF_L1D_AND_SW.L3_MISS.REMOTE_HITM",
         "Counter": "0,1,2,3",
         "CounterHTOff": "0,1,2,3",
+        "Deprecated": "1",
         "EventCode": "0xB7, 0xBB",
-        "EventName": "OCR.ALL_READS.L3_MISS_LOCAL_DRAM.NO_SNOOP_NEEDED",
+        "EventName": "OFFCORE_RESPONSE.PF_L1D_AND_SW.L3_MISS.REMOTE_HITM",
         "MSRIndex": "0x1a6,0x1a7",
-        "MSRValue": "0x01040007F7",
+        "MSRValue": "0x103FC00400",
         "Offcore": "1",
         "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
         "SampleAfterValue": "100003",
         "UMask": "0x1"
     },
     {
-        "BriefDescription": "Counts all demand code reads OCR.DEMAND_CODE_RD.L3_MISS.HIT_OTHER_CORE_NO_FWD OCR.DEMAND_CODE_RD.L3_MISS.HIT_OTHER_CORE_NO_FWD",
+        "BriefDescription": "This event is deprecated. Refer to new event OCR.PF_L1D_AND_SW.L3_MISS.REMOTE_HIT_FORWARD",
         "Counter": "0,1,2,3",
         "CounterHTOff": "0,1,2,3",
+        "Deprecated": "1",
         "EventCode": "0xB7, 0xBB",
-        "EventName": "OCR.DEMAND_CODE_RD.L3_MISS.HIT_OTHER_CORE_NO_FWD",
+        "EventName": "OFFCORE_RESPONSE.PF_L1D_AND_SW.L3_MISS.REMOTE_HIT_FORWARD",
         "MSRIndex": "0x1a6,0x1a7",
-        "MSRValue": "0x043C000004",
+        "MSRValue": "0x083FC00400",
         "Offcore": "1",
         "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
         "SampleAfterValue": "100003",
         "UMask": "0x1"
     },
     {
-        "BriefDescription": "Counts all prefetch (that bring data to LLC only) data reads OCR.PF_L3_DATA_RD.L3_MISS_REMOTE_DRAM.SNOOP_MISS_OR_NO_FWD",
+        "BriefDescription": "This event is deprecated. Refer to new event OCR.PF_L1D_AND_SW.L3_MISS.SNOOP_MISS",
         "Counter": "0,1,2,3",
         "CounterHTOff": "0,1,2,3",
+        "Deprecated": "1",
         "EventCode": "0xB7, 0xBB",
-        "EventName": "OCR.PF_L3_DATA_RD.L3_MISS_REMOTE_DRAM.SNOOP_MISS_OR_NO_FWD",
+        "EventName": "OFFCORE_RESPONSE.PF_L1D_AND_SW.L3_MISS.SNOOP_MISS",
         "MSRIndex": "0x1a6,0x1a7",
-        "MSRValue": "0x063B800080",
+        "MSRValue": "0x023C000400",
         "Offcore": "1",
         "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
         "SampleAfterValue": "100003",
         "UMask": "0x1"
     },
     {
-        "BriefDescription": "This event is deprecated. Refer to new event OCR.PF_L1D_AND_SW.L3_MISS.HITM_OTHER_CORE",
+        "BriefDescription": "This event is deprecated. Refer to new event OCR.PF_L1D_AND_SW.L3_MISS.SNOOP_NONE",
         "Counter": "0,1,2,3",
         "CounterHTOff": "0,1,2,3",
         "Deprecated": "1",
         "EventCode": "0xB7, 0xBB",
-        "EventName": "OFFCORE_RESPONSE.PF_L1D_AND_SW.L3_MISS.HITM_OTHER_CORE",
+        "EventName": "OFFCORE_RESPONSE.PF_L1D_AND_SW.L3_MISS.SNOOP_NONE",
         "MSRIndex": "0x1a6,0x1a7",
-        "MSRValue": "0x103C000400",
+        "MSRValue": "0x00BC000400",
         "Offcore": "1",
         "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
         "SampleAfterValue": "100003",
         "UMask": "0x1"
     },
     {
-        "BriefDescription": "OCR.ALL_PF_RFO.L3_MISS_LOCAL_DRAM.SNOOP_NONE",
+        "BriefDescription": "This event is deprecated. Refer to new event OCR.PF_L1D_AND_SW.L3_MISS_LOCAL_DRAM.ANY_SNOOP",
         "Counter": "0,1,2,3",
         "CounterHTOff": "0,1,2,3",
+        "Deprecated": "1",
         "EventCode": "0xB7, 0xBB",
-        "EventName": "OCR.ALL_PF_RFO.L3_MISS_LOCAL_DRAM.SNOOP_NONE",
+        "EventName": "OFFCORE_RESPONSE.PF_L1D_AND_SW.L3_MISS_LOCAL_DRAM.ANY_SNOOP",
         "MSRIndex": "0x1a6,0x1a7",
-        "MSRValue": "0x0084000120",
+        "MSRValue": "0x3F84000400",
         "Offcore": "1",
         "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
         "SampleAfterValue": "100003",
         "UMask": "0x1"
     },
     {
-        "BriefDescription": "This event is deprecated. Refer to new event OCR.PF_L2_RFO.L3_MISS_REMOTE_HOP1_DRAM.HIT_OTHER_CORE_FWD",
+        "BriefDescription": "This event is deprecated. Refer to new event OCR.PF_L1D_AND_SW.L3_MISS_LOCAL_DRAM.HITM_OTHER_CORE",
         "Counter": "0,1,2,3",
         "CounterHTOff": "0,1,2,3",
         "Deprecated": "1",
         "EventCode": "0xB7, 0xBB",
-        "EventName": "OFFCORE_RESPONSE.PF_L2_RFO.L3_MISS_REMOTE_HOP1_DRAM.HIT_OTHER_CORE_FWD",
+        "EventName": "OFFCORE_RESPONSE.PF_L1D_AND_SW.L3_MISS_LOCAL_DRAM.HITM_OTHER_CORE",
         "MSRIndex": "0x1a6,0x1a7",
-        "MSRValue": "0x0810000020",
+        "MSRValue": "0x1004000400",
         "Offcore": "1",
         "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
         "SampleAfterValue": "100003",
         "UMask": "0x1"
     },
     {
-        "BriefDescription": "This event is deprecated. Refer to new event OCR.PF_L3_DATA_RD.L3_MISS_LOCAL_DRAM.ANY_SNOOP",
+        "BriefDescription": "This event is deprecated. Refer to new event OCR.PF_L1D_AND_SW.L3_MISS_LOCAL_DRAM.HIT_OTHER_CORE_FWD",
         "Counter": "0,1,2,3",
         "CounterHTOff": "0,1,2,3",
         "Deprecated": "1",
         "EventCode": "0xB7, 0xBB",
-        "EventName": "OFFCORE_RESPONSE.PF_L3_DATA_RD.L3_MISS_LOCAL_DRAM.ANY_SNOOP",
+        "EventName": "OFFCORE_RESPONSE.PF_L1D_AND_SW.L3_MISS_LOCAL_DRAM.HIT_OTHER_CORE_FWD",
         "MSRIndex": "0x1a6,0x1a7",
-        "MSRValue": "0x3F84000080",
+        "MSRValue": "0x0804000400",
         "Offcore": "1",
         "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
         "SampleAfterValue": "100003",
         "UMask": "0x1"
     },
     {
-        "BriefDescription": "This event is deprecated. Refer to new event OCR.PF_L2_DATA_RD.L3_MISS_REMOTE_HOP1_DRAM.HITM_OTHER_CORE",
+        "BriefDescription": "This event is deprecated. Refer to new event OCR.PF_L1D_AND_SW.L3_MISS_LOCAL_DRAM.HIT_OTHER_CORE_NO_FWD",
         "Counter": "0,1,2,3",
         "CounterHTOff": "0,1,2,3",
         "Deprecated": "1",
         "EventCode": "0xB7, 0xBB",
-        "EventName": "OFFCORE_RESPONSE.PF_L2_DATA_RD.L3_MISS_REMOTE_HOP1_DRAM.HITM_OTHER_CORE",
+        "EventName": "OFFCORE_RESPONSE.PF_L1D_AND_SW.L3_MISS_LOCAL_DRAM.HIT_OTHER_CORE_NO_FWD",
         "MSRIndex": "0x1a6,0x1a7",
-        "MSRValue": "0x1010000010",
+        "MSRValue": "0x0404000400",
         "Offcore": "1",
         "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
         "SampleAfterValue": "100003",
         "UMask": "0x1"
     },
     {
-        "BriefDescription": "OCR.ALL_DATA_RD.L3_MISS.REMOTE_HITM OCR.ALL_DATA_RD.L3_MISS.REMOTE_HITM",
+        "BriefDescription": "This event is deprecated. Refer to new event OCR.PF_L1D_AND_SW.L3_MISS_LOCAL_DRAM.NO_SNOOP_NEEDED",
         "Counter": "0,1,2,3",
         "CounterHTOff": "0,1,2,3",
+        "Deprecated": "1",
         "EventCode": "0xB7, 0xBB",
-        "EventName": "OCR.ALL_DATA_RD.L3_MISS.REMOTE_HITM",
+        "EventName": "OFFCORE_RESPONSE.PF_L1D_AND_SW.L3_MISS_LOCAL_DRAM.NO_SNOOP_NEEDED",
         "MSRIndex": "0x1a6,0x1a7",
-        "MSRValue": "0x103FC00491",
+        "MSRValue": "0x0104000400",
         "Offcore": "1",
         "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
         "SampleAfterValue": "100003",
         "UMask": "0x1"
     },
     {
-        "BriefDescription": "OCR.ALL_RFO.L3_MISS_REMOTE_HOP1_DRAM.SNOOP_NONE",
+        "BriefDescription": "This event is deprecated. Refer to new event OCR.PF_L1D_AND_SW.L3_MISS_LOCAL_DRAM.SNOOP_MISS",
         "Counter": "0,1,2,3",
         "CounterHTOff": "0,1,2,3",
+        "Deprecated": "1",
         "EventCode": "0xB7, 0xBB",
-        "EventName": "OCR.ALL_RFO.L3_MISS_REMOTE_HOP1_DRAM.SNOOP_NONE",
+        "EventName": "OFFCORE_RESPONSE.PF_L1D_AND_SW.L3_MISS_LOCAL_DRAM.SNOOP_MISS",
         "MSRIndex": "0x1a6,0x1a7",
-        "MSRValue": "0x0090000122",
+        "MSRValue": "0x0204000400",
         "Offcore": "1",
         "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
         "SampleAfterValue": "100003",
         "UMask": "0x1"
     },
     {
-        "BriefDescription": "This event is deprecated. Refer to new event OCR.DEMAND_DATA_RD.L3_MISS.HIT_OTHER_CORE_NO_FWD",
+        "BriefDescription": "This event is deprecated. Refer to new event OCR.PF_L1D_AND_SW.L3_MISS_LOCAL_DRAM.SNOOP_MISS_OR_NO_FWD",
         "Counter": "0,1,2,3",
         "CounterHTOff": "0,1,2,3",
         "Deprecated": "1",
         "EventCode": "0xB7, 0xBB",
-        "EventName": "OFFCORE_RESPONSE.DEMAND_DATA_RD.L3_MISS.HIT_OTHER_CORE_NO_FWD",
+        "EventName": "OFFCORE_RESPONSE.PF_L1D_AND_SW.L3_MISS_LOCAL_DRAM.SNOOP_MISS_OR_NO_FWD",
         "MSRIndex": "0x1a6,0x1a7",
-        "MSRValue": "0x043C000001",
+        "MSRValue": "0x0604000400",
         "Offcore": "1",
         "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
         "SampleAfterValue": "100003",
@@ -8377,1541 +8205,1713 @@
         "UMask": "0x1"
     },
     {
-        "BriefDescription": "This event is deprecated. Refer to new event OCR.DEMAND_DATA_RD.L3_MISS.REMOTE_HIT_FORWARD",
+        "BriefDescription": "This event is deprecated. Refer to new event OCR.PF_L1D_AND_SW.L3_MISS_REMOTE_DRAM.SNOOP_MISS_OR_NO_FWD",
         "Counter": "0,1,2,3",
         "CounterHTOff": "0,1,2,3",
         "Deprecated": "1",
         "EventCode": "0xB7, 0xBB",
-        "EventName": "OFFCORE_RESPONSE.DEMAND_DATA_RD.L3_MISS.REMOTE_HIT_FORWARD",
+        "EventName": "OFFCORE_RESPONSE.PF_L1D_AND_SW.L3_MISS_REMOTE_DRAM.SNOOP_MISS_OR_NO_FWD",
         "MSRIndex": "0x1a6,0x1a7",
-        "MSRValue": "0x083FC00001",
+        "MSRValue": "0x063B800400",
         "Offcore": "1",
         "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
         "SampleAfterValue": "100003",
         "UMask": "0x1"
     },
     {
-        "BriefDescription": "This event is deprecated. Refer to new event OCR.ALL_PF_RFO.L3_MISS_LOCAL_DRAM.HIT_OTHER_CORE_NO_FWD",
+        "BriefDescription": "This event is deprecated. Refer to new event OCR.PF_L1D_AND_SW.L3_MISS_REMOTE_HOP1_DRAM.ANY_SNOOP",
         "Counter": "0,1,2,3",
         "CounterHTOff": "0,1,2,3",
         "Deprecated": "1",
         "EventCode": "0xB7, 0xBB",
-        "EventName": "OFFCORE_RESPONSE.ALL_PF_RFO.L3_MISS_LOCAL_DRAM.HIT_OTHER_CORE_NO_FWD",
+        "EventName": "OFFCORE_RESPONSE.PF_L1D_AND_SW.L3_MISS_REMOTE_HOP1_DRAM.ANY_SNOOP",
         "MSRIndex": "0x1a6,0x1a7",
-        "MSRValue": "0x0404000120",
+        "MSRValue": "0x3F90000400",
         "Offcore": "1",
         "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
         "SampleAfterValue": "100003",
         "UMask": "0x1"
     },
     {
-        "BriefDescription": "Counts all demand code reads OCR.DEMAND_CODE_RD.L3_MISS.REMOTE_HITM",
+        "BriefDescription": "This event is deprecated. Refer to new event OCR.PF_L1D_AND_SW.L3_MISS_REMOTE_HOP1_DRAM.HITM_OTHER_CORE",
         "Counter": "0,1,2,3",
         "CounterHTOff": "0,1,2,3",
+        "Deprecated": "1",
         "EventCode": "0xB7, 0xBB",
-        "EventName": "OCR.DEMAND_CODE_RD.L3_MISS.REMOTE_HITM",
+        "EventName": "OFFCORE_RESPONSE.PF_L1D_AND_SW.L3_MISS_REMOTE_HOP1_DRAM.HITM_OTHER_CORE",
         "MSRIndex": "0x1a6,0x1a7",
-        "MSRValue": "0x103FC00004",
+        "MSRValue": "0x1010000400",
         "Offcore": "1",
         "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
         "SampleAfterValue": "100003",
         "UMask": "0x1"
     },
     {
-        "BriefDescription": "OCR.ALL_DATA_RD.L3_MISS_LOCAL_DRAM.HIT_OTHER_CORE_FWD  OCR.ALL_DATA_RD.L3_MISS_LOCAL_DRAM.HIT_OTHER_CORE_FWD",
+        "BriefDescription": "This event is deprecated. Refer to new event OCR.PF_L1D_AND_SW.L3_MISS_REMOTE_HOP1_DRAM.HIT_OTHER_CORE_FWD",
         "Counter": "0,1,2,3",
         "CounterHTOff": "0,1,2,3",
+        "Deprecated": "1",
         "EventCode": "0xB7, 0xBB",
-        "EventName": "OCR.ALL_DATA_RD.L3_MISS_LOCAL_DRAM.HIT_OTHER_CORE_FWD",
+        "EventName": "OFFCORE_RESPONSE.PF_L1D_AND_SW.L3_MISS_REMOTE_HOP1_DRAM.HIT_OTHER_CORE_FWD",
         "MSRIndex": "0x1a6,0x1a7",
-        "MSRValue": "0x0804000491",
+        "MSRValue": "0x0810000400",
         "Offcore": "1",
         "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
         "SampleAfterValue": "100003",
         "UMask": "0x1"
     },
     {
-        "BriefDescription": "Counts all demand data writes (RFOs) OCR.DEMAND_RFO.L3_MISS.REMOTE_HIT_FORWARD",
+        "BriefDescription": "This event is deprecated. Refer to new event OCR.PF_L1D_AND_SW.L3_MISS_REMOTE_HOP1_DRAM.HIT_OTHER_CORE_NO_FWD",
         "Counter": "0,1,2,3",
         "CounterHTOff": "0,1,2,3",
+        "Deprecated": "1",
         "EventCode": "0xB7, 0xBB",
-        "EventName": "OCR.DEMAND_RFO.L3_MISS.REMOTE_HIT_FORWARD",
+        "EventName": "OFFCORE_RESPONSE.PF_L1D_AND_SW.L3_MISS_REMOTE_HOP1_DRAM.HIT_OTHER_CORE_NO_FWD",
         "MSRIndex": "0x1a6,0x1a7",
-        "MSRValue": "0x083FC00002",
+        "MSRValue": "0x0410000400",
         "Offcore": "1",
         "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
         "SampleAfterValue": "100003",
         "UMask": "0x1"
     },
     {
-        "BriefDescription": "This event is deprecated. Refer to new event OCR.ALL_PF_RFO.L3_MISS.SNOOP_MISS",
+        "BriefDescription": "This event is deprecated. Refer to new event OCR.PF_L1D_AND_SW.L3_MISS_REMOTE_HOP1_DRAM.NO_SNOOP_NEEDED",
         "Counter": "0,1,2,3",
         "CounterHTOff": "0,1,2,3",
         "Deprecated": "1",
         "EventCode": "0xB7, 0xBB",
-        "EventName": "OFFCORE_RESPONSE.ALL_PF_RFO.L3_MISS.SNOOP_MISS",
+        "EventName": "OFFCORE_RESPONSE.PF_L1D_AND_SW.L3_MISS_REMOTE_HOP1_DRAM.NO_SNOOP_NEEDED",
         "MSRIndex": "0x1a6,0x1a7",
-        "MSRValue": "0x023C000120",
+        "MSRValue": "0x0110000400",
         "Offcore": "1",
         "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
         "SampleAfterValue": "100003",
         "UMask": "0x1"
     },
     {
-        "BriefDescription": "This event is deprecated. Refer to new event OCR.ALL_PF_DATA_RD.L3_MISS.HITM_OTHER_CORE",
+        "BriefDescription": "This event is deprecated. Refer to new event OCR.PF_L1D_AND_SW.L3_MISS_REMOTE_HOP1_DRAM.SNOOP_MISS",
         "Counter": "0,1,2,3",
         "CounterHTOff": "0,1,2,3",
         "Deprecated": "1",
         "EventCode": "0xB7, 0xBB",
-        "EventName": "OFFCORE_RESPONSE.ALL_PF_DATA_RD.L3_MISS.HITM_OTHER_CORE",
+        "EventName": "OFFCORE_RESPONSE.PF_L1D_AND_SW.L3_MISS_REMOTE_HOP1_DRAM.SNOOP_MISS",
         "MSRIndex": "0x1a6,0x1a7",
-        "MSRValue": "0x103C000490",
+        "MSRValue": "0x0210000400",
         "Offcore": "1",
         "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
         "SampleAfterValue": "100003",
         "UMask": "0x1"
     },
     {
-        "BriefDescription": "This event is deprecated. Refer to new event OCR.PF_L3_RFO.L3_MISS.REMOTE_HIT_FORWARD",
+        "BriefDescription": "This event is deprecated. Refer to new event OCR.PF_L1D_AND_SW.L3_MISS_REMOTE_HOP1_DRAM.SNOOP_NONE",
         "Counter": "0,1,2,3",
         "CounterHTOff": "0,1,2,3",
         "Deprecated": "1",
         "EventCode": "0xB7, 0xBB",
-        "EventName": "OFFCORE_RESPONSE.PF_L3_RFO.L3_MISS.REMOTE_HIT_FORWARD",
+        "EventName": "OFFCORE_RESPONSE.PF_L1D_AND_SW.L3_MISS_REMOTE_HOP1_DRAM.SNOOP_NONE",
         "MSRIndex": "0x1a6,0x1a7",
-        "MSRValue": "0x083FC00100",
+        "MSRValue": "0x0090000400",
         "Offcore": "1",
         "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
         "SampleAfterValue": "100003",
         "UMask": "0x1"
     },
     {
-        "BriefDescription": "Counts all demand data writes (RFOs)",
+        "BriefDescription": "This event is deprecated. Refer to new event OCR.PF_L2_DATA_RD.L3_MISS.ANY_SNOOP",
         "Counter": "0,1,2,3",
         "CounterHTOff": "0,1,2,3",
+        "Deprecated": "1",
         "EventCode": "0xB7, 0xBB",
-        "EventName": "OCR.DEMAND_RFO.L3_MISS_LOCAL_DRAM.SNOOP_NONE",
+        "EventName": "OFFCORE_RESPONSE.PF_L2_DATA_RD.L3_MISS.ANY_SNOOP",
         "MSRIndex": "0x1a6,0x1a7",
-        "MSRValue": "0x0084000002",
+        "MSRValue": "0x3FBC000010",
         "Offcore": "1",
         "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
         "SampleAfterValue": "100003",
         "UMask": "0x1"
     },
     {
-        "BriefDescription": "Counts all prefetch (that bring data to LLC only) RFOs OCR.PF_L3_RFO.L3_MISS.REMOTE_HITM",
+        "BriefDescription": "This event is deprecated. Refer to new event OCR.PF_L2_DATA_RD.L3_MISS.HITM_OTHER_CORE",
         "Counter": "0,1,2,3",
         "CounterHTOff": "0,1,2,3",
+        "Deprecated": "1",
         "EventCode": "0xB7, 0xBB",
-        "EventName": "OCR.PF_L3_RFO.L3_MISS.REMOTE_HITM",
+        "EventName": "OFFCORE_RESPONSE.PF_L2_DATA_RD.L3_MISS.HITM_OTHER_CORE",
         "MSRIndex": "0x1a6,0x1a7",
-        "MSRValue": "0x103FC00100",
+        "MSRValue": "0x103C000010",
         "Offcore": "1",
         "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
         "SampleAfterValue": "100003",
         "UMask": "0x1"
     },
     {
-        "BriefDescription": "This event is deprecated. Refer to new event OCR.DEMAND_DATA_RD.L3_MISS_REMOTE_DRAM.SNOOP_MISS_OR_NO_FWD",
+        "BriefDescription": "This event is deprecated. Refer to new event OCR.PF_L2_DATA_RD.L3_MISS.HIT_OTHER_CORE_FWD",
         "Counter": "0,1,2,3",
         "CounterHTOff": "0,1,2,3",
         "Deprecated": "1",
         "EventCode": "0xB7, 0xBB",
-        "EventName": "OFFCORE_RESPONSE.DEMAND_DATA_RD.L3_MISS_REMOTE_DRAM.SNOOP_MISS_OR_NO_FWD",
+        "EventName": "OFFCORE_RESPONSE.PF_L2_DATA_RD.L3_MISS.HIT_OTHER_CORE_FWD",
         "MSRIndex": "0x1a6,0x1a7",
-        "MSRValue": "0x063B800001",
+        "MSRValue": "0x083C000010",
         "Offcore": "1",
         "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
         "SampleAfterValue": "100003",
         "UMask": "0x1"
     },
     {
-        "BriefDescription": "This event is deprecated. Refer to new event OCR.DEMAND_RFO.L3_MISS_LOCAL_DRAM.NO_SNOOP_NEEDED",
+        "BriefDescription": "This event is deprecated. Refer to new event OCR.PF_L2_DATA_RD.L3_MISS.HIT_OTHER_CORE_NO_FWD",
         "Counter": "0,1,2,3",
         "CounterHTOff": "0,1,2,3",
         "Deprecated": "1",
         "EventCode": "0xB7, 0xBB",
-        "EventName": "OFFCORE_RESPONSE.DEMAND_RFO.L3_MISS_LOCAL_DRAM.NO_SNOOP_NEEDED",
+        "EventName": "OFFCORE_RESPONSE.PF_L2_DATA_RD.L3_MISS.HIT_OTHER_CORE_NO_FWD",
         "MSRIndex": "0x1a6,0x1a7",
-        "MSRValue": "0x0104000002",
+        "MSRValue": "0x043C000010",
         "Offcore": "1",
         "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
         "SampleAfterValue": "100003",
         "UMask": "0x1"
     },
     {
-        "BriefDescription": "Counts all prefetch (that bring data to LLC only) data reads",
+        "BriefDescription": "This event is deprecated. Refer to new event OCR.PF_L2_DATA_RD.L3_MISS.NO_SNOOP_NEEDED",
         "Counter": "0,1,2,3",
         "CounterHTOff": "0,1,2,3",
+        "Deprecated": "1",
         "EventCode": "0xB7, 0xBB",
-        "EventName": "OCR.PF_L3_DATA_RD.L3_MISS_LOCAL_DRAM.SNOOP_MISS",
+        "EventName": "OFFCORE_RESPONSE.PF_L2_DATA_RD.L3_MISS.NO_SNOOP_NEEDED",
         "MSRIndex": "0x1a6,0x1a7",
-        "MSRValue": "0x0204000080",
+        "MSRValue": "0x013C000010",
         "Offcore": "1",
         "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
         "SampleAfterValue": "100003",
         "UMask": "0x1"
     },
     {
-        "BriefDescription": "This event is deprecated. Refer to new event OCR.ALL_RFO.L3_MISS_LOCAL_DRAM.HIT_OTHER_CORE_FWD",
+        "BriefDescription": "This event is deprecated. Refer to new event OCR.PF_L2_DATA_RD.L3_MISS.REMOTE_HITM",
         "Counter": "0,1,2,3",
         "CounterHTOff": "0,1,2,3",
         "Deprecated": "1",
         "EventCode": "0xB7, 0xBB",
-        "EventName": "OFFCORE_RESPONSE.ALL_RFO.L3_MISS_LOCAL_DRAM.HIT_OTHER_CORE_FWD",
+        "EventName": "OFFCORE_RESPONSE.PF_L2_DATA_RD.L3_MISS.REMOTE_HITM",
         "MSRIndex": "0x1a6,0x1a7",
-        "MSRValue": "0x0804000122",
+        "MSRValue": "0x103FC00010",
         "Offcore": "1",
         "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
         "SampleAfterValue": "100003",
         "UMask": "0x1"
     },
     {
-        "BriefDescription": "This event is deprecated. Refer to new event OCR.DEMAND_RFO.L3_MISS_REMOTE_HOP1_DRAM.HIT_OTHER_CORE_FWD",
+        "BriefDescription": "This event is deprecated. Refer to new event OCR.PF_L2_DATA_RD.L3_MISS.REMOTE_HIT_FORWARD",
         "Counter": "0,1,2,3",
         "CounterHTOff": "0,1,2,3",
         "Deprecated": "1",
         "EventCode": "0xB7, 0xBB",
-        "EventName": "OFFCORE_RESPONSE.DEMAND_RFO.L3_MISS_REMOTE_HOP1_DRAM.HIT_OTHER_CORE_FWD",
+        "EventName": "OFFCORE_RESPONSE.PF_L2_DATA_RD.L3_MISS.REMOTE_HIT_FORWARD",
         "MSRIndex": "0x1a6,0x1a7",
-        "MSRValue": "0x0810000002",
+        "MSRValue": "0x083FC00010",
         "Offcore": "1",
         "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
         "SampleAfterValue": "100003",
         "UMask": "0x1"
     },
     {
-        "BriefDescription": "This event is deprecated. Refer to new event OCR.PF_L3_DATA_RD.L3_MISS_LOCAL_DRAM.SNOOP_MISS",
+        "BriefDescription": "This event is deprecated. Refer to new event OCR.PF_L2_DATA_RD.L3_MISS.SNOOP_MISS",
         "Counter": "0,1,2,3",
         "CounterHTOff": "0,1,2,3",
         "Deprecated": "1",
         "EventCode": "0xB7, 0xBB",
-        "EventName": "OFFCORE_RESPONSE.PF_L3_DATA_RD.L3_MISS_LOCAL_DRAM.SNOOP_MISS",
+        "EventName": "OFFCORE_RESPONSE.PF_L2_DATA_RD.L3_MISS.SNOOP_MISS",
         "MSRIndex": "0x1a6,0x1a7",
-        "MSRValue": "0x0204000080",
+        "MSRValue": "0x023C000010",
         "Offcore": "1",
         "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
         "SampleAfterValue": "100003",
         "UMask": "0x1"
     },
     {
-        "BriefDescription": "This event is deprecated. Refer to new event OCR.PF_L1D_AND_SW.L3_MISS_LOCAL_DRAM.NO_SNOOP_NEEDED",
+        "BriefDescription": "This event is deprecated. Refer to new event OCR.PF_L2_DATA_RD.L3_MISS.SNOOP_NONE",
         "Counter": "0,1,2,3",
         "CounterHTOff": "0,1,2,3",
         "Deprecated": "1",
         "EventCode": "0xB7, 0xBB",
-        "EventName": "OFFCORE_RESPONSE.PF_L1D_AND_SW.L3_MISS_LOCAL_DRAM.NO_SNOOP_NEEDED",
+        "EventName": "OFFCORE_RESPONSE.PF_L2_DATA_RD.L3_MISS.SNOOP_NONE",
         "MSRIndex": "0x1a6,0x1a7",
-        "MSRValue": "0x0104000400",
+        "MSRValue": "0x00BC000010",
         "Offcore": "1",
         "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
         "SampleAfterValue": "100003",
         "UMask": "0x1"
     },
     {
-        "BriefDescription": "Counts all prefetch (that bring data to LLC only) data reads OCR.PF_L3_DATA_RD.L3_MISS.NO_SNOOP_NEEDED OCR.PF_L3_DATA_RD.L3_MISS.NO_SNOOP_NEEDED",
+        "BriefDescription": "This event is deprecated. Refer to new event OCR.PF_L2_DATA_RD.L3_MISS_LOCAL_DRAM.ANY_SNOOP",
         "Counter": "0,1,2,3",
         "CounterHTOff": "0,1,2,3",
+        "Deprecated": "1",
         "EventCode": "0xB7, 0xBB",
-        "EventName": "OCR.PF_L3_DATA_RD.L3_MISS.NO_SNOOP_NEEDED",
+        "EventName": "OFFCORE_RESPONSE.PF_L2_DATA_RD.L3_MISS_LOCAL_DRAM.ANY_SNOOP",
         "MSRIndex": "0x1a6,0x1a7",
-        "MSRValue": "0x013C000080",
+        "MSRValue": "0x3F84000010",
         "Offcore": "1",
         "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
         "SampleAfterValue": "100003",
         "UMask": "0x1"
     },
     {
-        "BriefDescription": "This event is deprecated. Refer to new event OCR.PF_L2_DATA_RD.L3_MISS_REMOTE_HOP1_DRAM.ANY_SNOOP",
+        "BriefDescription": "This event is deprecated. Refer to new event OCR.PF_L2_DATA_RD.L3_MISS_LOCAL_DRAM.HITM_OTHER_CORE",
         "Counter": "0,1,2,3",
         "CounterHTOff": "0,1,2,3",
         "Deprecated": "1",
         "EventCode": "0xB7, 0xBB",
-        "EventName": "OFFCORE_RESPONSE.PF_L2_DATA_RD.L3_MISS_REMOTE_HOP1_DRAM.ANY_SNOOP",
+        "EventName": "OFFCORE_RESPONSE.PF_L2_DATA_RD.L3_MISS_LOCAL_DRAM.HITM_OTHER_CORE",
         "MSRIndex": "0x1a6,0x1a7",
-        "MSRValue": "0x3F90000010",
+        "MSRValue": "0x1004000010",
         "Offcore": "1",
         "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
         "SampleAfterValue": "100003",
         "UMask": "0x1"
     },
     {
-        "BriefDescription": "This event is deprecated. Refer to new event OCR.PF_L1D_AND_SW.L3_MISS_LOCAL_DRAM.HITM_OTHER_CORE",
+        "BriefDescription": "This event is deprecated. Refer to new event OCR.PF_L2_DATA_RD.L3_MISS_LOCAL_DRAM.HIT_OTHER_CORE_FWD",
         "Counter": "0,1,2,3",
         "CounterHTOff": "0,1,2,3",
         "Deprecated": "1",
         "EventCode": "0xB7, 0xBB",
-        "EventName": "OFFCORE_RESPONSE.PF_L1D_AND_SW.L3_MISS_LOCAL_DRAM.HITM_OTHER_CORE",
+        "EventName": "OFFCORE_RESPONSE.PF_L2_DATA_RD.L3_MISS_LOCAL_DRAM.HIT_OTHER_CORE_FWD",
         "MSRIndex": "0x1a6,0x1a7",
-        "MSRValue": "0x1004000400",
+        "MSRValue": "0x0804000010",
         "Offcore": "1",
         "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
         "SampleAfterValue": "100003",
         "UMask": "0x1"
     },
     {
-        "BriefDescription": "This event is deprecated. Refer to new event OCR.OTHER.L3_MISS.HIT_OTHER_CORE_FWD",
+        "BriefDescription": "This event is deprecated. Refer to new event OCR.PF_L2_DATA_RD.L3_MISS_LOCAL_DRAM.HIT_OTHER_CORE_NO_FWD",
         "Counter": "0,1,2,3",
         "CounterHTOff": "0,1,2,3",
         "Deprecated": "1",
         "EventCode": "0xB7, 0xBB",
-        "EventName": "OFFCORE_RESPONSE.OTHER.L3_MISS.HIT_OTHER_CORE_FWD",
+        "EventName": "OFFCORE_RESPONSE.PF_L2_DATA_RD.L3_MISS_LOCAL_DRAM.HIT_OTHER_CORE_NO_FWD",
         "MSRIndex": "0x1a6,0x1a7",
-        "MSRValue": "0x083C008000",
+        "MSRValue": "0x0404000010",
         "Offcore": "1",
         "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
         "SampleAfterValue": "100003",
         "UMask": "0x1"
     },
     {
-        "BriefDescription": "This event is deprecated. Refer to new event OCR.DEMAND_CODE_RD.L3_MISS_REMOTE_HOP1_DRAM.NO_SNOOP_NEEDED",
+        "BriefDescription": "This event is deprecated. Refer to new event OCR.PF_L2_DATA_RD.L3_MISS_LOCAL_DRAM.NO_SNOOP_NEEDED",
         "Counter": "0,1,2,3",
         "CounterHTOff": "0,1,2,3",
         "Deprecated": "1",
         "EventCode": "0xB7, 0xBB",
-        "EventName": "OFFCORE_RESPONSE.DEMAND_CODE_RD.L3_MISS_REMOTE_HOP1_DRAM.NO_SNOOP_NEEDED",
+        "EventName": "OFFCORE_RESPONSE.PF_L2_DATA_RD.L3_MISS_LOCAL_DRAM.NO_SNOOP_NEEDED",
         "MSRIndex": "0x1a6,0x1a7",
-        "MSRValue": "0x0110000004",
+        "MSRValue": "0x0104000010",
         "Offcore": "1",
         "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
         "SampleAfterValue": "100003",
         "UMask": "0x1"
     },
     {
-        "BriefDescription": "This event is deprecated. Refer to new event OCR.DEMAND_DATA_RD.L3_MISS.NO_SNOOP_NEEDED",
+        "BriefDescription": "This event is deprecated. Refer to new event OCR.PF_L2_DATA_RD.L3_MISS_LOCAL_DRAM.SNOOP_MISS",
         "Counter": "0,1,2,3",
         "CounterHTOff": "0,1,2,3",
         "Deprecated": "1",
         "EventCode": "0xB7, 0xBB",
-        "EventName": "OFFCORE_RESPONSE.DEMAND_DATA_RD.L3_MISS.NO_SNOOP_NEEDED",
+        "EventName": "OFFCORE_RESPONSE.PF_L2_DATA_RD.L3_MISS_LOCAL_DRAM.SNOOP_MISS",
         "MSRIndex": "0x1a6,0x1a7",
-        "MSRValue": "0x013C000001",
+        "MSRValue": "0x0204000010",
         "Offcore": "1",
         "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
         "SampleAfterValue": "100003",
         "UMask": "0x1"
     },
     {
-        "BriefDescription": "Counts L1 data cache hardware prefetch requests and software prefetch requests  OCR.PF_L1D_AND_SW.L3_MISS_REMOTE_HOP1_DRAM.HIT_OTHER_CORE_FWD",
+        "BriefDescription": "This event is deprecated. Refer to new event OCR.PF_L2_DATA_RD.L3_MISS_LOCAL_DRAM.SNOOP_MISS_OR_NO_FWD",
         "Counter": "0,1,2,3",
         "CounterHTOff": "0,1,2,3",
+        "Deprecated": "1",
         "EventCode": "0xB7, 0xBB",
-        "EventName": "OCR.PF_L1D_AND_SW.L3_MISS_REMOTE_HOP1_DRAM.HIT_OTHER_CORE_FWD",
+        "EventName": "OFFCORE_RESPONSE.PF_L2_DATA_RD.L3_MISS_LOCAL_DRAM.SNOOP_MISS_OR_NO_FWD",
         "MSRIndex": "0x1a6,0x1a7",
-        "MSRValue": "0x0810000400",
+        "MSRValue": "0x0604000010",
         "Offcore": "1",
         "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
         "SampleAfterValue": "100003",
         "UMask": "0x1"
     },
     {
-        "BriefDescription": "Counts all prefetch (that bring data to LLC only) data reads  OCR.PF_L3_DATA_RD.L3_MISS_LOCAL_DRAM.HIT_OTHER_CORE_FWD",
+        "BriefDescription": "This event is deprecated. Refer to new event OCR.PF_L2_DATA_RD.L3_MISS_LOCAL_DRAM.SNOOP_NONE",
         "Counter": "0,1,2,3",
         "CounterHTOff": "0,1,2,3",
+        "Deprecated": "1",
         "EventCode": "0xB7, 0xBB",
-        "EventName": "OCR.PF_L3_DATA_RD.L3_MISS_LOCAL_DRAM.HIT_OTHER_CORE_FWD",
+        "EventName": "OFFCORE_RESPONSE.PF_L2_DATA_RD.L3_MISS_LOCAL_DRAM.SNOOP_NONE",
         "MSRIndex": "0x1a6,0x1a7",
-        "MSRValue": "0x0804000080",
+        "MSRValue": "0x0084000010",
         "Offcore": "1",
         "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
         "SampleAfterValue": "100003",
         "UMask": "0x1"
     },
     {
-        "BriefDescription": "This event is deprecated. Refer to new event OCR.OTHER.L3_MISS_REMOTE_HOP1_DRAM.SNOOP_MISS",
+        "BriefDescription": "This event is deprecated. Refer to new event OCR.PF_L2_DATA_RD.L3_MISS_REMOTE_DRAM.SNOOP_MISS_OR_NO_FWD",
         "Counter": "0,1,2,3",
         "CounterHTOff": "0,1,2,3",
         "Deprecated": "1",
         "EventCode": "0xB7, 0xBB",
-        "EventName": "OFFCORE_RESPONSE.OTHER.L3_MISS_REMOTE_HOP1_DRAM.SNOOP_MISS",
+        "EventName": "OFFCORE_RESPONSE.PF_L2_DATA_RD.L3_MISS_REMOTE_DRAM.SNOOP_MISS_OR_NO_FWD",
         "MSRIndex": "0x1a6,0x1a7",
-        "MSRValue": "0x0210008000",
+        "MSRValue": "0x063B800010",
         "Offcore": "1",
         "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
         "SampleAfterValue": "100003",
         "UMask": "0x1"
     },
     {
-        "BriefDescription": "Counts all prefetch (that bring data to LLC only) RFOs",
+        "BriefDescription": "This event is deprecated. Refer to new event OCR.PF_L2_DATA_RD.L3_MISS_REMOTE_HOP1_DRAM.ANY_SNOOP",
         "Counter": "0,1,2,3",
         "CounterHTOff": "0,1,2,3",
+        "Deprecated": "1",
         "EventCode": "0xB7, 0xBB",
-        "EventName": "OCR.PF_L3_RFO.L3_MISS_LOCAL_DRAM.SNOOP_MISS",
+        "EventName": "OFFCORE_RESPONSE.PF_L2_DATA_RD.L3_MISS_REMOTE_HOP1_DRAM.ANY_SNOOP",
         "MSRIndex": "0x1a6,0x1a7",
-        "MSRValue": "0x0204000100",
+        "MSRValue": "0x3F90000010",
         "Offcore": "1",
         "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
         "SampleAfterValue": "100003",
         "UMask": "0x1"
     },
     {
-        "BriefDescription": "OCR.ALL_DATA_RD.L3_MISS.REMOTE_HIT_FORWARD OCR.ALL_DATA_RD.L3_MISS.REMOTE_HIT_FORWARD",
+        "BriefDescription": "This event is deprecated. Refer to new event OCR.PF_L2_DATA_RD.L3_MISS_REMOTE_HOP1_DRAM.HITM_OTHER_CORE",
         "Counter": "0,1,2,3",
         "CounterHTOff": "0,1,2,3",
+        "Deprecated": "1",
         "EventCode": "0xB7, 0xBB",
-        "EventName": "OCR.ALL_DATA_RD.L3_MISS.REMOTE_HIT_FORWARD",
+        "EventName": "OFFCORE_RESPONSE.PF_L2_DATA_RD.L3_MISS_REMOTE_HOP1_DRAM.HITM_OTHER_CORE",
         "MSRIndex": "0x1a6,0x1a7",
-        "MSRValue": "0x083FC00491",
+        "MSRValue": "0x1010000010",
         "Offcore": "1",
         "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
         "SampleAfterValue": "100003",
         "UMask": "0x1"
     },
     {
-        "BriefDescription": "This event is deprecated. Refer to new event OCR.ALL_DATA_RD.L3_MISS.ANY_SNOOP",
+        "BriefDescription": "This event is deprecated. Refer to new event OCR.PF_L2_DATA_RD.L3_MISS_REMOTE_HOP1_DRAM.HIT_OTHER_CORE_FWD",
         "Counter": "0,1,2,3",
         "CounterHTOff": "0,1,2,3",
         "Deprecated": "1",
         "EventCode": "0xB7, 0xBB",
-        "EventName": "OFFCORE_RESPONSE.ALL_DATA_RD.L3_MISS.ANY_SNOOP",
+        "EventName": "OFFCORE_RESPONSE.PF_L2_DATA_RD.L3_MISS_REMOTE_HOP1_DRAM.HIT_OTHER_CORE_FWD",
         "MSRIndex": "0x1a6,0x1a7",
-        "MSRValue": "0x3FBC000491",
+        "MSRValue": "0x0810000010",
         "Offcore": "1",
         "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
         "SampleAfterValue": "100003",
         "UMask": "0x1"
     },
     {
-        "BriefDescription": "Demand Data Read requests who miss L3 cache",
-        "Counter": "0,1,2,3",
-        "CounterHTOff": "0,1,2,3,4,5,6,7",
-        "EventCode": "0xB0",
-        "EventName": "OFFCORE_REQUESTS.L3_MISS_DEMAND_DATA_RD",
-        "PublicDescription": "Demand Data Read requests who miss L3 cache.",
-        "SampleAfterValue": "100003",
-        "UMask": "0x10"
-    },
-    {
-        "BriefDescription": "OCR.ALL_READS.L3_MISS_REMOTE_HOP1_DRAM.NO_SNOOP_NEEDED  OCR.ALL_READS.L3_MISS_REMOTE_HOP1_DRAM.NO_SNOOP_NEEDED",
+        "BriefDescription": "This event is deprecated. Refer to new event OCR.PF_L2_DATA_RD.L3_MISS_REMOTE_HOP1_DRAM.HIT_OTHER_CORE_NO_FWD",
         "Counter": "0,1,2,3",
         "CounterHTOff": "0,1,2,3",
+        "Deprecated": "1",
         "EventCode": "0xB7, 0xBB",
-        "EventName": "OCR.ALL_READS.L3_MISS_REMOTE_HOP1_DRAM.NO_SNOOP_NEEDED",
+        "EventName": "OFFCORE_RESPONSE.PF_L2_DATA_RD.L3_MISS_REMOTE_HOP1_DRAM.HIT_OTHER_CORE_NO_FWD",
         "MSRIndex": "0x1a6,0x1a7",
-        "MSRValue": "0x01100007F7",
+        "MSRValue": "0x0410000010",
         "Offcore": "1",
         "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
         "SampleAfterValue": "100003",
         "UMask": "0x1"
     },
     {
-        "BriefDescription": "Counts prefetch (that bring data to L2) data reads  OCR.PF_L2_DATA_RD.L3_MISS_REMOTE_HOP1_DRAM.HIT_OTHER_CORE_FWD",
+        "BriefDescription": "This event is deprecated. Refer to new event OCR.PF_L2_DATA_RD.L3_MISS_REMOTE_HOP1_DRAM.NO_SNOOP_NEEDED",
         "Counter": "0,1,2,3",
         "CounterHTOff": "0,1,2,3",
+        "Deprecated": "1",
         "EventCode": "0xB7, 0xBB",
-        "EventName": "OCR.PF_L2_DATA_RD.L3_MISS_REMOTE_HOP1_DRAM.HIT_OTHER_CORE_FWD",
+        "EventName": "OFFCORE_RESPONSE.PF_L2_DATA_RD.L3_MISS_REMOTE_HOP1_DRAM.NO_SNOOP_NEEDED",
         "MSRIndex": "0x1a6,0x1a7",
-        "MSRValue": "0x0810000010",
+        "MSRValue": "0x0110000010",
         "Offcore": "1",
         "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
         "SampleAfterValue": "100003",
         "UMask": "0x1"
     },
     {
-        "BriefDescription": "This event is deprecated. Refer to new event OCR.PF_L1D_AND_SW.L3_MISS.NO_SNOOP_NEEDED",
+        "BriefDescription": "This event is deprecated. Refer to new event OCR.PF_L2_DATA_RD.L3_MISS_REMOTE_HOP1_DRAM.SNOOP_MISS",
         "Counter": "0,1,2,3",
         "CounterHTOff": "0,1,2,3",
         "Deprecated": "1",
         "EventCode": "0xB7, 0xBB",
-        "EventName": "OFFCORE_RESPONSE.PF_L1D_AND_SW.L3_MISS.NO_SNOOP_NEEDED",
+        "EventName": "OFFCORE_RESPONSE.PF_L2_DATA_RD.L3_MISS_REMOTE_HOP1_DRAM.SNOOP_MISS",
         "MSRIndex": "0x1a6,0x1a7",
-        "MSRValue": "0x013C000400",
+        "MSRValue": "0x0210000010",
         "Offcore": "1",
         "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
         "SampleAfterValue": "100003",
         "UMask": "0x1"
     },
     {
-        "BriefDescription": "This event is deprecated. Refer to new event OCR.ALL_READS.L3_MISS_LOCAL_DRAM.SNOOP_MISS",
+        "BriefDescription": "This event is deprecated. Refer to new event OCR.PF_L2_DATA_RD.L3_MISS_REMOTE_HOP1_DRAM.SNOOP_NONE",
         "Counter": "0,1,2,3",
         "CounterHTOff": "0,1,2,3",
         "Deprecated": "1",
         "EventCode": "0xB7, 0xBB",
-        "EventName": "OFFCORE_RESPONSE.ALL_READS.L3_MISS_LOCAL_DRAM.SNOOP_MISS",
+        "EventName": "OFFCORE_RESPONSE.PF_L2_DATA_RD.L3_MISS_REMOTE_HOP1_DRAM.SNOOP_NONE",
         "MSRIndex": "0x1a6,0x1a7",
-        "MSRValue": "0x02040007F7",
+        "MSRValue": "0x0090000010",
         "Offcore": "1",
         "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
         "SampleAfterValue": "100003",
         "UMask": "0x1"
     },
     {
-        "BriefDescription": "OCR.ALL_PF_RFO.L3_MISS_LOCAL_DRAM.HIT_OTHER_CORE_NO_FWD  OCR.ALL_PF_RFO.L3_MISS_LOCAL_DRAM.HIT_OTHER_CORE_NO_FWD",
+        "BriefDescription": "This event is deprecated. Refer to new event OCR.PF_L2_RFO.L3_MISS.ANY_SNOOP",
         "Counter": "0,1,2,3",
         "CounterHTOff": "0,1,2,3",
+        "Deprecated": "1",
         "EventCode": "0xB7, 0xBB",
-        "EventName": "OCR.ALL_PF_RFO.L3_MISS_LOCAL_DRAM.HIT_OTHER_CORE_NO_FWD",
+        "EventName": "OFFCORE_RESPONSE.PF_L2_RFO.L3_MISS.ANY_SNOOP",
         "MSRIndex": "0x1a6,0x1a7",
-        "MSRValue": "0x0404000120",
+        "MSRValue": "0x3FBC000020",
         "Offcore": "1",
         "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
         "SampleAfterValue": "100003",
         "UMask": "0x1"
     },
     {
-        "BriefDescription": "This event is deprecated. Refer to new event OCR.PF_L1D_AND_SW.L3_MISS_REMOTE_HOP1_DRAM.NO_SNOOP_NEEDED",
+        "BriefDescription": "This event is deprecated. Refer to new event OCR.PF_L2_RFO.L3_MISS.HITM_OTHER_CORE",
         "Counter": "0,1,2,3",
         "CounterHTOff": "0,1,2,3",
         "Deprecated": "1",
         "EventCode": "0xB7, 0xBB",
-        "EventName": "OFFCORE_RESPONSE.PF_L1D_AND_SW.L3_MISS_REMOTE_HOP1_DRAM.NO_SNOOP_NEEDED",
+        "EventName": "OFFCORE_RESPONSE.PF_L2_RFO.L3_MISS.HITM_OTHER_CORE",
         "MSRIndex": "0x1a6,0x1a7",
-        "MSRValue": "0x0110000400",
+        "MSRValue": "0x103C000020",
         "Offcore": "1",
         "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
         "SampleAfterValue": "100003",
         "UMask": "0x1"
     },
     {
-        "BriefDescription": "This event is deprecated. Refer to new event OCR.ALL_READS.L3_MISS_REMOTE_HOP1_DRAM.SNOOP_NONE",
+        "BriefDescription": "This event is deprecated. Refer to new event OCR.PF_L2_RFO.L3_MISS.HIT_OTHER_CORE_FWD",
         "Counter": "0,1,2,3",
         "CounterHTOff": "0,1,2,3",
         "Deprecated": "1",
         "EventCode": "0xB7, 0xBB",
-        "EventName": "OFFCORE_RESPONSE.ALL_READS.L3_MISS_REMOTE_HOP1_DRAM.SNOOP_NONE",
+        "EventName": "OFFCORE_RESPONSE.PF_L2_RFO.L3_MISS.HIT_OTHER_CORE_FWD",
         "MSRIndex": "0x1a6,0x1a7",
-        "MSRValue": "0x00900007F7",
+        "MSRValue": "0x083C000020",
         "Offcore": "1",
         "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
         "SampleAfterValue": "100003",
         "UMask": "0x1"
     },
     {
-        "BriefDescription": "This event is deprecated. Refer to new event OCR.ALL_PF_RFO.L3_MISS_LOCAL_DRAM.ANY_SNOOP",
+        "BriefDescription": "This event is deprecated. Refer to new event OCR.PF_L2_RFO.L3_MISS.HIT_OTHER_CORE_NO_FWD",
         "Counter": "0,1,2,3",
         "CounterHTOff": "0,1,2,3",
         "Deprecated": "1",
         "EventCode": "0xB7, 0xBB",
-        "EventName": "OFFCORE_RESPONSE.ALL_PF_RFO.L3_MISS_LOCAL_DRAM.ANY_SNOOP",
+        "EventName": "OFFCORE_RESPONSE.PF_L2_RFO.L3_MISS.HIT_OTHER_CORE_NO_FWD",
         "MSRIndex": "0x1a6,0x1a7",
-        "MSRValue": "0x3F84000120",
+        "MSRValue": "0x043C000020",
         "Offcore": "1",
         "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
         "SampleAfterValue": "100003",
         "UMask": "0x1"
     },
     {
-        "BriefDescription": "This event is deprecated. Refer to new event OCR.PF_L3_RFO.L3_MISS_REMOTE_HOP1_DRAM.HIT_OTHER_CORE_NO_FWD",
+        "BriefDescription": "This event is deprecated. Refer to new event OCR.PF_L2_RFO.L3_MISS.NO_SNOOP_NEEDED",
         "Counter": "0,1,2,3",
         "CounterHTOff": "0,1,2,3",
         "Deprecated": "1",
         "EventCode": "0xB7, 0xBB",
-        "EventName": "OFFCORE_RESPONSE.PF_L3_RFO.L3_MISS_REMOTE_HOP1_DRAM.HIT_OTHER_CORE_NO_FWD",
+        "EventName": "OFFCORE_RESPONSE.PF_L2_RFO.L3_MISS.NO_SNOOP_NEEDED",
         "MSRIndex": "0x1a6,0x1a7",
-        "MSRValue": "0x0410000100",
+        "MSRValue": "0x013C000020",
         "Offcore": "1",
         "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
         "SampleAfterValue": "100003",
         "UMask": "0x1"
     },
     {
-        "BriefDescription": "This event is deprecated. Refer to new event OCR.ALL_RFO.L3_MISS.SNOOP_NONE",
+        "BriefDescription": "This event is deprecated. Refer to new event OCR.PF_L2_RFO.L3_MISS.REMOTE_HITM",
         "Counter": "0,1,2,3",
         "CounterHTOff": "0,1,2,3",
         "Deprecated": "1",
         "EventCode": "0xB7, 0xBB",
-        "EventName": "OFFCORE_RESPONSE.ALL_RFO.L3_MISS.SNOOP_NONE",
+        "EventName": "OFFCORE_RESPONSE.PF_L2_RFO.L3_MISS.REMOTE_HITM",
         "MSRIndex": "0x1a6,0x1a7",
-        "MSRValue": "0x00BC000122",
+        "MSRValue": "0x103FC00020",
         "Offcore": "1",
         "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
         "SampleAfterValue": "100003",
         "UMask": "0x1"
     },
     {
-        "BriefDescription": "This event is deprecated. Refer to new event OCR.ALL_PF_DATA_RD.L3_MISS_REMOTE_HOP1_DRAM.HIT_OTHER_CORE_FWD",
+        "BriefDescription": "This event is deprecated. Refer to new event OCR.PF_L2_RFO.L3_MISS.REMOTE_HIT_FORWARD",
         "Counter": "0,1,2,3",
         "CounterHTOff": "0,1,2,3",
         "Deprecated": "1",
         "EventCode": "0xB7, 0xBB",
-        "EventName": "OFFCORE_RESPONSE.ALL_PF_DATA_RD.L3_MISS_REMOTE_HOP1_DRAM.HIT_OTHER_CORE_FWD",
+        "EventName": "OFFCORE_RESPONSE.PF_L2_RFO.L3_MISS.REMOTE_HIT_FORWARD",
         "MSRIndex": "0x1a6,0x1a7",
-        "MSRValue": "0x0810000490",
+        "MSRValue": "0x083FC00020",
         "Offcore": "1",
         "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
         "SampleAfterValue": "100003",
         "UMask": "0x1"
     },
     {
-        "BriefDescription": "This event is deprecated. Refer to new event OCR.PF_L3_DATA_RD.L3_MISS_LOCAL_DRAM.SNOOP_MISS_OR_NO_FWD",
+        "BriefDescription": "This event is deprecated. Refer to new event OCR.PF_L2_RFO.L3_MISS.SNOOP_MISS",
         "Counter": "0,1,2,3",
         "CounterHTOff": "0,1,2,3",
         "Deprecated": "1",
         "EventCode": "0xB7, 0xBB",
-        "EventName": "OFFCORE_RESPONSE.PF_L3_DATA_RD.L3_MISS_LOCAL_DRAM.SNOOP_MISS_OR_NO_FWD",
+        "EventName": "OFFCORE_RESPONSE.PF_L2_RFO.L3_MISS.SNOOP_MISS",
         "MSRIndex": "0x1a6,0x1a7",
-        "MSRValue": "0x0604000080",
+        "MSRValue": "0x023C000020",
         "Offcore": "1",
         "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
         "SampleAfterValue": "100003",
         "UMask": "0x1"
     },
     {
-        "BriefDescription": "Counts L1 data cache hardware prefetch requests and software prefetch requests OCR.PF_L1D_AND_SW.L3_MISS.HIT_OTHER_CORE_NO_FWD OCR.PF_L1D_AND_SW.L3_MISS.HIT_OTHER_CORE_NO_FWD",
+        "BriefDescription": "This event is deprecated. Refer to new event OCR.PF_L2_RFO.L3_MISS.SNOOP_NONE",
         "Counter": "0,1,2,3",
         "CounterHTOff": "0,1,2,3",
+        "Deprecated": "1",
         "EventCode": "0xB7, 0xBB",
-        "EventName": "OCR.PF_L1D_AND_SW.L3_MISS.HIT_OTHER_CORE_NO_FWD",
+        "EventName": "OFFCORE_RESPONSE.PF_L2_RFO.L3_MISS.SNOOP_NONE",
         "MSRIndex": "0x1a6,0x1a7",
-        "MSRValue": "0x043C000400",
+        "MSRValue": "0x00BC000020",
         "Offcore": "1",
         "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
         "SampleAfterValue": "100003",
         "UMask": "0x1"
     },
     {
-        "BriefDescription": "Counts all demand code reads OCR.DEMAND_CODE_RD.L3_MISS.HITM_OTHER_CORE OCR.DEMAND_CODE_RD.L3_MISS.HITM_OTHER_CORE",
+        "BriefDescription": "This event is deprecated. Refer to new event OCR.PF_L2_RFO.L3_MISS_LOCAL_DRAM.ANY_SNOOP",
         "Counter": "0,1,2,3",
         "CounterHTOff": "0,1,2,3",
+        "Deprecated": "1",
         "EventCode": "0xB7, 0xBB",
-        "EventName": "OCR.DEMAND_CODE_RD.L3_MISS.HITM_OTHER_CORE",
+        "EventName": "OFFCORE_RESPONSE.PF_L2_RFO.L3_MISS_LOCAL_DRAM.ANY_SNOOP",
         "MSRIndex": "0x1a6,0x1a7",
-        "MSRValue": "0x103C000004",
+        "MSRValue": "0x3F84000020",
         "Offcore": "1",
         "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
         "SampleAfterValue": "100003",
         "UMask": "0x1"
     },
     {
-        "BriefDescription": "OCR.ALL_DATA_RD.L3_MISS_REMOTE_HOP1_DRAM.HITM_OTHER_CORE  OCR.ALL_DATA_RD.L3_MISS_REMOTE_HOP1_DRAM.HITM_OTHER_CORE",
+        "BriefDescription": "This event is deprecated. Refer to new event OCR.PF_L2_RFO.L3_MISS_LOCAL_DRAM.HITM_OTHER_CORE",
         "Counter": "0,1,2,3",
         "CounterHTOff": "0,1,2,3",
+        "Deprecated": "1",
         "EventCode": "0xB7, 0xBB",
-        "EventName": "OCR.ALL_DATA_RD.L3_MISS_REMOTE_HOP1_DRAM.HITM_OTHER_CORE",
+        "EventName": "OFFCORE_RESPONSE.PF_L2_RFO.L3_MISS_LOCAL_DRAM.HITM_OTHER_CORE",
         "MSRIndex": "0x1a6,0x1a7",
-        "MSRValue": "0x1010000491",
+        "MSRValue": "0x1004000020",
         "Offcore": "1",
         "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
         "SampleAfterValue": "100003",
         "UMask": "0x1"
     },
     {
-        "BriefDescription": "This event is deprecated. Refer to new event OCR.PF_L1D_AND_SW.L3_MISS.SNOOP_MISS",
+        "BriefDescription": "This event is deprecated. Refer to new event OCR.PF_L2_RFO.L3_MISS_LOCAL_DRAM.HIT_OTHER_CORE_FWD",
         "Counter": "0,1,2,3",
         "CounterHTOff": "0,1,2,3",
         "Deprecated": "1",
         "EventCode": "0xB7, 0xBB",
-        "EventName": "OFFCORE_RESPONSE.PF_L1D_AND_SW.L3_MISS.SNOOP_MISS",
+        "EventName": "OFFCORE_RESPONSE.PF_L2_RFO.L3_MISS_LOCAL_DRAM.HIT_OTHER_CORE_FWD",
         "MSRIndex": "0x1a6,0x1a7",
-        "MSRValue": "0x023C000400",
+        "MSRValue": "0x0804000020",
         "Offcore": "1",
         "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
         "SampleAfterValue": "100003",
         "UMask": "0x1"
     },
     {
-        "BriefDescription": "Counts all prefetch (that bring data to L2) RFOs  OCR.PF_L2_RFO.L3_MISS_LOCAL_DRAM.HIT_OTHER_CORE_FWD",
+        "BriefDescription": "This event is deprecated. Refer to new event OCR.PF_L2_RFO.L3_MISS_LOCAL_DRAM.HIT_OTHER_CORE_NO_FWD",
         "Counter": "0,1,2,3",
         "CounterHTOff": "0,1,2,3",
+        "Deprecated": "1",
         "EventCode": "0xB7, 0xBB",
-        "EventName": "OCR.PF_L2_RFO.L3_MISS_LOCAL_DRAM.HIT_OTHER_CORE_FWD",
+        "EventName": "OFFCORE_RESPONSE.PF_L2_RFO.L3_MISS_LOCAL_DRAM.HIT_OTHER_CORE_NO_FWD",
         "MSRIndex": "0x1a6,0x1a7",
-        "MSRValue": "0x0804000020",
+        "MSRValue": "0x0404000020",
         "Offcore": "1",
         "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
         "SampleAfterValue": "100003",
         "UMask": "0x1"
     },
     {
-        "BriefDescription": "Counts all prefetch (that bring data to L2) RFOs",
+        "BriefDescription": "This event is deprecated. Refer to new event OCR.PF_L2_RFO.L3_MISS_LOCAL_DRAM.NO_SNOOP_NEEDED",
         "Counter": "0,1,2,3",
         "CounterHTOff": "0,1,2,3",
+        "Deprecated": "1",
         "EventCode": "0xB7, 0xBB",
-        "EventName": "OCR.PF_L2_RFO.L3_MISS_LOCAL_DRAM.SNOOP_MISS",
+        "EventName": "OFFCORE_RESPONSE.PF_L2_RFO.L3_MISS_LOCAL_DRAM.NO_SNOOP_NEEDED",
         "MSRIndex": "0x1a6,0x1a7",
-        "MSRValue": "0x0204000020",
+        "MSRValue": "0x0104000020",
         "Offcore": "1",
         "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
         "SampleAfterValue": "100003",
         "UMask": "0x1"
     },
     {
-        "BriefDescription": "OCR.ALL_DATA_RD.L3_MISS_REMOTE_HOP1_DRAM.NO_SNOOP_NEEDED  OCR.ALL_DATA_RD.L3_MISS_REMOTE_HOP1_DRAM.NO_SNOOP_NEEDED",
+        "BriefDescription": "This event is deprecated. Refer to new event OCR.PF_L2_RFO.L3_MISS_LOCAL_DRAM.SNOOP_MISS",
         "Counter": "0,1,2,3",
         "CounterHTOff": "0,1,2,3",
+        "Deprecated": "1",
         "EventCode": "0xB7, 0xBB",
-        "EventName": "OCR.ALL_DATA_RD.L3_MISS_REMOTE_HOP1_DRAM.NO_SNOOP_NEEDED",
+        "EventName": "OFFCORE_RESPONSE.PF_L2_RFO.L3_MISS_LOCAL_DRAM.SNOOP_MISS",
         "MSRIndex": "0x1a6,0x1a7",
-        "MSRValue": "0x0110000491",
+        "MSRValue": "0x0204000020",
         "Offcore": "1",
         "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
         "SampleAfterValue": "100003",
         "UMask": "0x1"
     },
     {
-        "BriefDescription": "OCR.ALL_READS.L3_MISS_LOCAL_DRAM.HIT_OTHER_CORE_FWD  OCR.ALL_READS.L3_MISS_LOCAL_DRAM.HIT_OTHER_CORE_FWD",
+        "BriefDescription": "This event is deprecated. Refer to new event OCR.PF_L2_RFO.L3_MISS_LOCAL_DRAM.SNOOP_MISS_OR_NO_FWD",
         "Counter": "0,1,2,3",
         "CounterHTOff": "0,1,2,3",
+        "Deprecated": "1",
         "EventCode": "0xB7, 0xBB",
-        "EventName": "OCR.ALL_READS.L3_MISS_LOCAL_DRAM.HIT_OTHER_CORE_FWD",
+        "EventName": "OFFCORE_RESPONSE.PF_L2_RFO.L3_MISS_LOCAL_DRAM.SNOOP_MISS_OR_NO_FWD",
         "MSRIndex": "0x1a6,0x1a7",
-        "MSRValue": "0x08040007F7",
+        "MSRValue": "0x0604000020",
         "Offcore": "1",
         "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
         "SampleAfterValue": "100003",
         "UMask": "0x1"
     },
     {
-        "BriefDescription": "Counts all prefetch (that bring data to LLC only) RFOs  OCR.PF_L3_RFO.L3_MISS_LOCAL_DRAM.HITM_OTHER_CORE",
+        "BriefDescription": "This event is deprecated. Refer to new event OCR.PF_L2_RFO.L3_MISS_LOCAL_DRAM.SNOOP_NONE",
         "Counter": "0,1,2,3",
         "CounterHTOff": "0,1,2,3",
+        "Deprecated": "1",
         "EventCode": "0xB7, 0xBB",
-        "EventName": "OCR.PF_L3_RFO.L3_MISS_LOCAL_DRAM.HITM_OTHER_CORE",
+        "EventName": "OFFCORE_RESPONSE.PF_L2_RFO.L3_MISS_LOCAL_DRAM.SNOOP_NONE",
         "MSRIndex": "0x1a6,0x1a7",
-        "MSRValue": "0x1004000100",
+        "MSRValue": "0x0084000020",
         "Offcore": "1",
         "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
         "SampleAfterValue": "100003",
         "UMask": "0x1"
     },
     {
-        "BriefDescription": "Counts all demand code reads",
+        "BriefDescription": "This event is deprecated. Refer to new event OCR.PF_L2_RFO.L3_MISS_REMOTE_DRAM.SNOOP_MISS_OR_NO_FWD",
         "Counter": "0,1,2,3",
         "CounterHTOff": "0,1,2,3",
+        "Deprecated": "1",
         "EventCode": "0xB7, 0xBB",
-        "EventName": "OCR.DEMAND_CODE_RD.L3_MISS_LOCAL_DRAM.SNOOP_NONE",
+        "EventName": "OFFCORE_RESPONSE.PF_L2_RFO.L3_MISS_REMOTE_DRAM.SNOOP_MISS_OR_NO_FWD",
         "MSRIndex": "0x1a6,0x1a7",
-        "MSRValue": "0x0084000004",
+        "MSRValue": "0x063B800020",
         "Offcore": "1",
         "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
         "SampleAfterValue": "100003",
         "UMask": "0x1"
     },
     {
-        "BriefDescription": "This event is deprecated. Refer to new event OCR.ALL_RFO.L3_MISS_REMOTE_HOP1_DRAM.ANY_SNOOP",
+        "BriefDescription": "This event is deprecated. Refer to new event OCR.PF_L2_RFO.L3_MISS_REMOTE_HOP1_DRAM.ANY_SNOOP",
         "Counter": "0,1,2,3",
         "CounterHTOff": "0,1,2,3",
         "Deprecated": "1",
         "EventCode": "0xB7, 0xBB",
-        "EventName": "OFFCORE_RESPONSE.ALL_RFO.L3_MISS_REMOTE_HOP1_DRAM.ANY_SNOOP",
+        "EventName": "OFFCORE_RESPONSE.PF_L2_RFO.L3_MISS_REMOTE_HOP1_DRAM.ANY_SNOOP",
         "MSRIndex": "0x1a6,0x1a7",
-        "MSRValue": "0x3F90000122",
+        "MSRValue": "0x3F90000020",
         "Offcore": "1",
         "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
         "SampleAfterValue": "100003",
         "UMask": "0x1"
     },
     {
-        "BriefDescription": "This event is deprecated. Refer to new event OCR.PF_L2_DATA_RD.L3_MISS.REMOTE_HITM",
+        "BriefDescription": "This event is deprecated. Refer to new event OCR.PF_L2_RFO.L3_MISS_REMOTE_HOP1_DRAM.HITM_OTHER_CORE",
         "Counter": "0,1,2,3",
         "CounterHTOff": "0,1,2,3",
         "Deprecated": "1",
         "EventCode": "0xB7, 0xBB",
-        "EventName": "OFFCORE_RESPONSE.PF_L2_DATA_RD.L3_MISS.REMOTE_HITM",
+        "EventName": "OFFCORE_RESPONSE.PF_L2_RFO.L3_MISS_REMOTE_HOP1_DRAM.HITM_OTHER_CORE",
         "MSRIndex": "0x1a6,0x1a7",
-        "MSRValue": "0x103FC00010",
+        "MSRValue": "0x1010000020",
         "Offcore": "1",
         "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
         "SampleAfterValue": "100003",
         "UMask": "0x1"
     },
     {
-        "BriefDescription": "This event is deprecated. Refer to new event OCR.DEMAND_RFO.L3_MISS_LOCAL_DRAM.HITM_OTHER_CORE",
+        "BriefDescription": "This event is deprecated. Refer to new event OCR.PF_L2_RFO.L3_MISS_REMOTE_HOP1_DRAM.HIT_OTHER_CORE_FWD",
         "Counter": "0,1,2,3",
         "CounterHTOff": "0,1,2,3",
         "Deprecated": "1",
         "EventCode": "0xB7, 0xBB",
-        "EventName": "OFFCORE_RESPONSE.DEMAND_RFO.L3_MISS_LOCAL_DRAM.HITM_OTHER_CORE",
+        "EventName": "OFFCORE_RESPONSE.PF_L2_RFO.L3_MISS_REMOTE_HOP1_DRAM.HIT_OTHER_CORE_FWD",
         "MSRIndex": "0x1a6,0x1a7",
-        "MSRValue": "0x1004000002",
+        "MSRValue": "0x0810000020",
         "Offcore": "1",
         "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
         "SampleAfterValue": "100003",
         "UMask": "0x1"
     },
     {
-        "BriefDescription": "This event is deprecated. Refer to new event OCR.DEMAND_CODE_RD.L3_MISS_LOCAL_DRAM.SNOOP_NONE",
+        "BriefDescription": "This event is deprecated. Refer to new event OCR.PF_L2_RFO.L3_MISS_REMOTE_HOP1_DRAM.HIT_OTHER_CORE_NO_FWD",
         "Counter": "0,1,2,3",
         "CounterHTOff": "0,1,2,3",
         "Deprecated": "1",
         "EventCode": "0xB7, 0xBB",
-        "EventName": "OFFCORE_RESPONSE.DEMAND_CODE_RD.L3_MISS_LOCAL_DRAM.SNOOP_NONE",
+        "EventName": "OFFCORE_RESPONSE.PF_L2_RFO.L3_MISS_REMOTE_HOP1_DRAM.HIT_OTHER_CORE_NO_FWD",
         "MSRIndex": "0x1a6,0x1a7",
-        "MSRValue": "0x0084000004",
+        "MSRValue": "0x0410000020",
         "Offcore": "1",
         "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
         "SampleAfterValue": "100003",
         "UMask": "0x1"
     },
     {
-        "BriefDescription": "This event is deprecated. Refer to new event OCR.ALL_PF_RFO.L3_MISS_LOCAL_DRAM.NO_SNOOP_NEEDED",
+        "BriefDescription": "This event is deprecated. Refer to new event OCR.PF_L2_RFO.L3_MISS_REMOTE_HOP1_DRAM.NO_SNOOP_NEEDED",
         "Counter": "0,1,2,3",
         "CounterHTOff": "0,1,2,3",
         "Deprecated": "1",
         "EventCode": "0xB7, 0xBB",
-        "EventName": "OFFCORE_RESPONSE.ALL_PF_RFO.L3_MISS_LOCAL_DRAM.NO_SNOOP_NEEDED",
+        "EventName": "OFFCORE_RESPONSE.PF_L2_RFO.L3_MISS_REMOTE_HOP1_DRAM.NO_SNOOP_NEEDED",
         "MSRIndex": "0x1a6,0x1a7",
-        "MSRValue": "0x0104000120",
+        "MSRValue": "0x0110000020",
         "Offcore": "1",
         "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
         "SampleAfterValue": "100003",
         "UMask": "0x1"
     },
     {
-        "BriefDescription": "Counts demand data reads  OCR.DEMAND_DATA_RD.L3_MISS_REMOTE_HOP1_DRAM.HIT_OTHER_CORE_FWD",
+        "BriefDescription": "This event is deprecated. Refer to new event OCR.PF_L2_RFO.L3_MISS_REMOTE_HOP1_DRAM.SNOOP_MISS",
         "Counter": "0,1,2,3",
         "CounterHTOff": "0,1,2,3",
+        "Deprecated": "1",
         "EventCode": "0xB7, 0xBB",
-        "EventName": "OCR.DEMAND_DATA_RD.L3_MISS_REMOTE_HOP1_DRAM.HIT_OTHER_CORE_FWD",
+        "EventName": "OFFCORE_RESPONSE.PF_L2_RFO.L3_MISS_REMOTE_HOP1_DRAM.SNOOP_MISS",
         "MSRIndex": "0x1a6,0x1a7",
-        "MSRValue": "0x0810000001",
+        "MSRValue": "0x0210000020",
         "Offcore": "1",
         "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
         "SampleAfterValue": "100003",
         "UMask": "0x1"
     },
     {
-        "BriefDescription": "Number of times an HLE execution aborted due to any reasons (multiple categories may count as one).",
-        "Counter": "0,1,2,3",
-        "CounterHTOff": "0,1,2,3,4,5,6,7",
-        "EventCode": "0xC8",
-        "EventName": "HLE_RETIRED.ABORTED",
-        "PEBS": "1",
-        "PublicDescription": "Number of times HLE abort was triggered.",
-        "SampleAfterValue": "2000003",
-        "UMask": "0x4"
-    },
-    {
-        "BriefDescription": "This event is deprecated. Refer to new event OCR.OTHER.L3_MISS.SNOOP_MISS",
+        "BriefDescription": "This event is deprecated. Refer to new event OCR.PF_L2_RFO.L3_MISS_REMOTE_HOP1_DRAM.SNOOP_NONE",
         "Counter": "0,1,2,3",
         "CounterHTOff": "0,1,2,3",
         "Deprecated": "1",
         "EventCode": "0xB7, 0xBB",
-        "EventName": "OFFCORE_RESPONSE.OTHER.L3_MISS.SNOOP_MISS",
+        "EventName": "OFFCORE_RESPONSE.PF_L2_RFO.L3_MISS_REMOTE_HOP1_DRAM.SNOOP_NONE",
         "MSRIndex": "0x1a6,0x1a7",
-        "MSRValue": "0x023C008000",
+        "MSRValue": "0x0090000020",
         "Offcore": "1",
         "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
         "SampleAfterValue": "100003",
         "UMask": "0x1"
     },
     {
-        "BriefDescription": "This event is deprecated. Refer to new event OCR.PF_L2_DATA_RD.L3_MISS.NO_SNOOP_NEEDED",
+        "BriefDescription": "This event is deprecated. Refer to new event OCR.PF_L3_DATA_RD.L3_MISS.ANY_SNOOP",
         "Counter": "0,1,2,3",
         "CounterHTOff": "0,1,2,3",
         "Deprecated": "1",
         "EventCode": "0xB7, 0xBB",
-        "EventName": "OFFCORE_RESPONSE.PF_L2_DATA_RD.L3_MISS.NO_SNOOP_NEEDED",
+        "EventName": "OFFCORE_RESPONSE.PF_L3_DATA_RD.L3_MISS.ANY_SNOOP",
         "MSRIndex": "0x1a6,0x1a7",
-        "MSRValue": "0x013C000010",
+        "MSRValue": "0x3FBC000080",
         "Offcore": "1",
         "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
         "SampleAfterValue": "100003",
         "UMask": "0x1"
     },
     {
-        "BriefDescription": "This event is deprecated. Refer to new event OCR.ALL_PF_RFO.L3_MISS_LOCAL_DRAM.SNOOP_MISS_OR_NO_FWD",
+        "BriefDescription": "This event is deprecated. Refer to new event OCR.PF_L3_DATA_RD.L3_MISS.HITM_OTHER_CORE",
         "Counter": "0,1,2,3",
         "CounterHTOff": "0,1,2,3",
         "Deprecated": "1",
         "EventCode": "0xB7, 0xBB",
-        "EventName": "OFFCORE_RESPONSE.ALL_PF_RFO.L3_MISS_LOCAL_DRAM.SNOOP_MISS_OR_NO_FWD",
+        "EventName": "OFFCORE_RESPONSE.PF_L3_DATA_RD.L3_MISS.HITM_OTHER_CORE",
         "MSRIndex": "0x1a6,0x1a7",
-        "MSRValue": "0x0604000120",
+        "MSRValue": "0x103C000080",
         "Offcore": "1",
         "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
         "SampleAfterValue": "100003",
         "UMask": "0x1"
     },
     {
-        "BriefDescription": "Counts all prefetch (that bring data to L2) RFOs",
+        "BriefDescription": "This event is deprecated. Refer to new event OCR.PF_L3_DATA_RD.L3_MISS.HIT_OTHER_CORE_FWD",
         "Counter": "0,1,2,3",
         "CounterHTOff": "0,1,2,3",
+        "Deprecated": "1",
         "EventCode": "0xB7, 0xBB",
-        "EventName": "OCR.PF_L2_RFO.L3_MISS_REMOTE_HOP1_DRAM.SNOOP_NONE",
+        "EventName": "OFFCORE_RESPONSE.PF_L3_DATA_RD.L3_MISS.HIT_OTHER_CORE_FWD",
         "MSRIndex": "0x1a6,0x1a7",
-        "MSRValue": "0x0090000020",
+        "MSRValue": "0x083C000080",
         "Offcore": "1",
         "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
         "SampleAfterValue": "100003",
         "UMask": "0x1"
     },
     {
-        "BriefDescription": "Counts prefetch (that bring data to L2) data reads",
+        "BriefDescription": "This event is deprecated. Refer to new event OCR.PF_L3_DATA_RD.L3_MISS.HIT_OTHER_CORE_NO_FWD",
         "Counter": "0,1,2,3",
         "CounterHTOff": "0,1,2,3",
+        "Deprecated": "1",
         "EventCode": "0xB7, 0xBB",
-        "EventName": "OCR.PF_L2_DATA_RD.L3_MISS_LOCAL_DRAM.SNOOP_MISS",
+        "EventName": "OFFCORE_RESPONSE.PF_L3_DATA_RD.L3_MISS.HIT_OTHER_CORE_NO_FWD",
         "MSRIndex": "0x1a6,0x1a7",
-        "MSRValue": "0x0204000010",
+        "MSRValue": "0x043C000080",
         "Offcore": "1",
         "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
         "SampleAfterValue": "100003",
         "UMask": "0x1"
     },
     {
-        "BriefDescription": "Counts all demand code reads",
+        "BriefDescription": "This event is deprecated. Refer to new event OCR.PF_L3_DATA_RD.L3_MISS.NO_SNOOP_NEEDED",
         "Counter": "0,1,2,3",
         "CounterHTOff": "0,1,2,3",
+        "Deprecated": "1",
         "EventCode": "0xB7, 0xBB",
-        "EventName": "OCR.DEMAND_CODE_RD.L3_MISS_REMOTE_HOP1_DRAM.SNOOP_NONE",
+        "EventName": "OFFCORE_RESPONSE.PF_L3_DATA_RD.L3_MISS.NO_SNOOP_NEEDED",
         "MSRIndex": "0x1a6,0x1a7",
-        "MSRValue": "0x0090000004",
+        "MSRValue": "0x013C000080",
         "Offcore": "1",
         "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
         "SampleAfterValue": "100003",
         "UMask": "0x1"
     },
     {
-        "BriefDescription": "OCR.ALL_DATA_RD.L3_MISS.HIT_OTHER_CORE_FWD OCR.ALL_DATA_RD.L3_MISS.HIT_OTHER_CORE_FWD OCR.ALL_DATA_RD.L3_MISS.HIT_OTHER_CORE_FWD",
+        "BriefDescription": "This event is deprecated. Refer to new event OCR.PF_L3_DATA_RD.L3_MISS.REMOTE_HITM",
         "Counter": "0,1,2,3",
         "CounterHTOff": "0,1,2,3",
+        "Deprecated": "1",
         "EventCode": "0xB7, 0xBB",
-        "EventName": "OCR.ALL_DATA_RD.L3_MISS.HIT_OTHER_CORE_FWD",
+        "EventName": "OFFCORE_RESPONSE.PF_L3_DATA_RD.L3_MISS.REMOTE_HITM",
         "MSRIndex": "0x1a6,0x1a7",
-        "MSRValue": "0x083C000491",
+        "MSRValue": "0x103FC00080",
         "Offcore": "1",
         "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
         "SampleAfterValue": "100003",
         "UMask": "0x1"
     },
     {
-        "BriefDescription": "This event is deprecated. Refer to new event OCR.ALL_DATA_RD.L3_MISS_REMOTE_HOP1_DRAM.HITM_OTHER_CORE",
+        "BriefDescription": "This event is deprecated. Refer to new event OCR.PF_L3_DATA_RD.L3_MISS.REMOTE_HIT_FORWARD",
         "Counter": "0,1,2,3",
         "CounterHTOff": "0,1,2,3",
         "Deprecated": "1",
         "EventCode": "0xB7, 0xBB",
-        "EventName": "OFFCORE_RESPONSE.ALL_DATA_RD.L3_MISS_REMOTE_HOP1_DRAM.HITM_OTHER_CORE",
+        "EventName": "OFFCORE_RESPONSE.PF_L3_DATA_RD.L3_MISS.REMOTE_HIT_FORWARD",
         "MSRIndex": "0x1a6,0x1a7",
-        "MSRValue": "0x1010000491",
+        "MSRValue": "0x083FC00080",
         "Offcore": "1",
         "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
         "SampleAfterValue": "100003",
         "UMask": "0x1"
     },
     {
-        "BriefDescription": "This event is deprecated. Refer to new event OCR.PF_L1D_AND_SW.L3_MISS_LOCAL_DRAM.HIT_OTHER_CORE_FWD",
+        "BriefDescription": "This event is deprecated. Refer to new event OCR.PF_L3_DATA_RD.L3_MISS.SNOOP_MISS",
         "Counter": "0,1,2,3",
         "CounterHTOff": "0,1,2,3",
         "Deprecated": "1",
         "EventCode": "0xB7, 0xBB",
-        "EventName": "OFFCORE_RESPONSE.PF_L1D_AND_SW.L3_MISS_LOCAL_DRAM.HIT_OTHER_CORE_FWD",
+        "EventName": "OFFCORE_RESPONSE.PF_L3_DATA_RD.L3_MISS.SNOOP_MISS",
         "MSRIndex": "0x1a6,0x1a7",
-        "MSRValue": "0x0804000400",
+        "MSRValue": "0x023C000080",
         "Offcore": "1",
         "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
         "SampleAfterValue": "100003",
         "UMask": "0x1"
     },
     {
-        "BriefDescription": "This event is deprecated. Refer to new event OCR.ALL_PF_RFO.L3_MISS_REMOTE_DRAM.SNOOP_MISS_OR_NO_FWD",
+        "BriefDescription": "This event is deprecated. Refer to new event OCR.PF_L3_DATA_RD.L3_MISS.SNOOP_NONE",
         "Counter": "0,1,2,3",
         "CounterHTOff": "0,1,2,3",
         "Deprecated": "1",
         "EventCode": "0xB7, 0xBB",
-        "EventName": "OFFCORE_RESPONSE.ALL_PF_RFO.L3_MISS_REMOTE_DRAM.SNOOP_MISS_OR_NO_FWD",
+        "EventName": "OFFCORE_RESPONSE.PF_L3_DATA_RD.L3_MISS.SNOOP_NONE",
         "MSRIndex": "0x1a6,0x1a7",
-        "MSRValue": "0x063B800120",
+        "MSRValue": "0x00BC000080",
         "Offcore": "1",
         "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
         "SampleAfterValue": "100003",
         "UMask": "0x1"
     },
     {
-        "BriefDescription": "OCR.ALL_PF_DATA_RD.L3_MISS_REMOTE_HOP1_DRAM.HIT_OTHER_CORE_FWD  OCR.ALL_PF_DATA_RD.L3_MISS_REMOTE_HOP1_DRAM.HIT_OTHER_CORE_FWD",
+        "BriefDescription": "This event is deprecated. Refer to new event OCR.PF_L3_DATA_RD.L3_MISS_LOCAL_DRAM.ANY_SNOOP",
         "Counter": "0,1,2,3",
         "CounterHTOff": "0,1,2,3",
+        "Deprecated": "1",
         "EventCode": "0xB7, 0xBB",
-        "EventName": "OCR.ALL_PF_DATA_RD.L3_MISS_REMOTE_HOP1_DRAM.HIT_OTHER_CORE_FWD",
+        "EventName": "OFFCORE_RESPONSE.PF_L3_DATA_RD.L3_MISS_LOCAL_DRAM.ANY_SNOOP",
         "MSRIndex": "0x1a6,0x1a7",
-        "MSRValue": "0x0810000490",
+        "MSRValue": "0x3F84000080",
         "Offcore": "1",
         "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
         "SampleAfterValue": "100003",
         "UMask": "0x1"
     },
     {
-        "BriefDescription": "OCR.ALL_DATA_RD.L3_MISS_LOCAL_DRAM.ANY_SNOOP  OCR.ALL_DATA_RD.L3_MISS_LOCAL_DRAM.ANY_SNOOP",
+        "BriefDescription": "This event is deprecated. Refer to new event OCR.PF_L3_DATA_RD.L3_MISS_LOCAL_DRAM.HITM_OTHER_CORE",
         "Counter": "0,1,2,3",
         "CounterHTOff": "0,1,2,3",
+        "Deprecated": "1",
         "EventCode": "0xB7, 0xBB",
-        "EventName": "OCR.ALL_DATA_RD.L3_MISS_LOCAL_DRAM.ANY_SNOOP",
+        "EventName": "OFFCORE_RESPONSE.PF_L3_DATA_RD.L3_MISS_LOCAL_DRAM.HITM_OTHER_CORE",
         "MSRIndex": "0x1a6,0x1a7",
-        "MSRValue": "0x3F84000491",
+        "MSRValue": "0x1004000080",
         "Offcore": "1",
         "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
         "SampleAfterValue": "100003",
         "UMask": "0x1"
     },
     {
-        "BriefDescription": "This event is deprecated. Refer to new event OCR.PF_L1D_AND_SW.L3_MISS_REMOTE_HOP1_DRAM.HIT_OTHER_CORE_FWD",
+        "BriefDescription": "This event is deprecated. Refer to new event OCR.PF_L3_DATA_RD.L3_MISS_LOCAL_DRAM.HIT_OTHER_CORE_FWD",
         "Counter": "0,1,2,3",
         "CounterHTOff": "0,1,2,3",
         "Deprecated": "1",
         "EventCode": "0xB7, 0xBB",
-        "EventName": "OFFCORE_RESPONSE.PF_L1D_AND_SW.L3_MISS_REMOTE_HOP1_DRAM.HIT_OTHER_CORE_FWD",
+        "EventName": "OFFCORE_RESPONSE.PF_L3_DATA_RD.L3_MISS_LOCAL_DRAM.HIT_OTHER_CORE_FWD",
         "MSRIndex": "0x1a6,0x1a7",
-        "MSRValue": "0x0810000400",
+        "MSRValue": "0x0804000080",
         "Offcore": "1",
         "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
         "SampleAfterValue": "100003",
         "UMask": "0x1"
     },
     {
-        "BriefDescription": "Counts prefetch (that bring data to L2) data reads OCR.PF_L2_DATA_RD.L3_MISS.ANY_SNOOP OCR.PF_L2_DATA_RD.L3_MISS.ANY_SNOOP",
+        "BriefDescription": "This event is deprecated. Refer to new event OCR.PF_L3_DATA_RD.L3_MISS_LOCAL_DRAM.HIT_OTHER_CORE_NO_FWD",
         "Counter": "0,1,2,3",
         "CounterHTOff": "0,1,2,3",
+        "Deprecated": "1",
         "EventCode": "0xB7, 0xBB",
-        "EventName": "OCR.PF_L2_DATA_RD.L3_MISS.ANY_SNOOP",
+        "EventName": "OFFCORE_RESPONSE.PF_L3_DATA_RD.L3_MISS_LOCAL_DRAM.HIT_OTHER_CORE_NO_FWD",
         "MSRIndex": "0x1a6,0x1a7",
-        "MSRValue": "0x3FBC000010",
+        "MSRValue": "0x0404000080",
         "Offcore": "1",
         "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
         "SampleAfterValue": "100003",
         "UMask": "0x1"
     },
     {
-        "BriefDescription": "This event is deprecated. Refer to new event OCR.ALL_RFO.L3_MISS_REMOTE_HOP1_DRAM.HIT_OTHER_CORE_NO_FWD",
+        "BriefDescription": "This event is deprecated. Refer to new event OCR.PF_L3_DATA_RD.L3_MISS_LOCAL_DRAM.NO_SNOOP_NEEDED",
         "Counter": "0,1,2,3",
         "CounterHTOff": "0,1,2,3",
         "Deprecated": "1",
         "EventCode": "0xB7, 0xBB",
-        "EventName": "OFFCORE_RESPONSE.ALL_RFO.L3_MISS_REMOTE_HOP1_DRAM.HIT_OTHER_CORE_NO_FWD",
+        "EventName": "OFFCORE_RESPONSE.PF_L3_DATA_RD.L3_MISS_LOCAL_DRAM.NO_SNOOP_NEEDED",
         "MSRIndex": "0x1a6,0x1a7",
-        "MSRValue": "0x0410000122",
+        "MSRValue": "0x0104000080",
         "Offcore": "1",
         "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
         "SampleAfterValue": "100003",
         "UMask": "0x1"
     },
     {
-        "BriefDescription": "Counts all demand code reads  OCR.DEMAND_CODE_RD.L3_MISS_REMOTE_HOP1_DRAM.HIT_OTHER_CORE_NO_FWD",
+        "BriefDescription": "This event is deprecated. Refer to new event OCR.PF_L3_DATA_RD.L3_MISS_LOCAL_DRAM.SNOOP_MISS",
         "Counter": "0,1,2,3",
         "CounterHTOff": "0,1,2,3",
+        "Deprecated": "1",
         "EventCode": "0xB7, 0xBB",
-        "EventName": "OCR.DEMAND_CODE_RD.L3_MISS_REMOTE_HOP1_DRAM.HIT_OTHER_CORE_NO_FWD",
+        "EventName": "OFFCORE_RESPONSE.PF_L3_DATA_RD.L3_MISS_LOCAL_DRAM.SNOOP_MISS",
         "MSRIndex": "0x1a6,0x1a7",
-        "MSRValue": "0x0410000004",
+        "MSRValue": "0x0204000080",
         "Offcore": "1",
         "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
         "SampleAfterValue": "100003",
         "UMask": "0x1"
     },
     {
-        "BriefDescription": "This event is deprecated. Refer to new event OCR.DEMAND_CODE_RD.L3_MISS_REMOTE_HOP1_DRAM.SNOOP_MISS",
+        "BriefDescription": "This event is deprecated. Refer to new event OCR.PF_L3_DATA_RD.L3_MISS_LOCAL_DRAM.SNOOP_MISS_OR_NO_FWD",
         "Counter": "0,1,2,3",
         "CounterHTOff": "0,1,2,3",
         "Deprecated": "1",
         "EventCode": "0xB7, 0xBB",
-        "EventName": "OFFCORE_RESPONSE.DEMAND_CODE_RD.L3_MISS_REMOTE_HOP1_DRAM.SNOOP_MISS",
+        "EventName": "OFFCORE_RESPONSE.PF_L3_DATA_RD.L3_MISS_LOCAL_DRAM.SNOOP_MISS_OR_NO_FWD",
         "MSRIndex": "0x1a6,0x1a7",
-        "MSRValue": "0x0210000004",
+        "MSRValue": "0x0604000080",
         "Offcore": "1",
         "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
         "SampleAfterValue": "100003",
         "UMask": "0x1"
     },
     {
-        "BriefDescription": "This event is deprecated. Refer to new event OCR.PF_L1D_AND_SW.L3_MISS_REMOTE_HOP1_DRAM.HITM_OTHER_CORE",
+        "BriefDescription": "This event is deprecated. Refer to new event OCR.PF_L3_DATA_RD.L3_MISS_LOCAL_DRAM.SNOOP_NONE",
         "Counter": "0,1,2,3",
         "CounterHTOff": "0,1,2,3",
         "Deprecated": "1",
         "EventCode": "0xB7, 0xBB",
-        "EventName": "OFFCORE_RESPONSE.PF_L1D_AND_SW.L3_MISS_REMOTE_HOP1_DRAM.HITM_OTHER_CORE",
+        "EventName": "OFFCORE_RESPONSE.PF_L3_DATA_RD.L3_MISS_LOCAL_DRAM.SNOOP_NONE",
         "MSRIndex": "0x1a6,0x1a7",
-        "MSRValue": "0x1010000400",
+        "MSRValue": "0x0084000080",
         "Offcore": "1",
         "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
         "SampleAfterValue": "100003",
         "UMask": "0x1"
     },
     {
-        "BriefDescription": "Counts all prefetch (that bring data to LLC only) data reads  OCR.PF_L3_DATA_RD.L3_MISS_REMOTE_HOP1_DRAM.HIT_OTHER_CORE_NO_FWD",
+        "BriefDescription": "This event is deprecated. Refer to new event OCR.PF_L3_DATA_RD.L3_MISS_REMOTE_DRAM.SNOOP_MISS_OR_NO_FWD",
         "Counter": "0,1,2,3",
         "CounterHTOff": "0,1,2,3",
+        "Deprecated": "1",
         "EventCode": "0xB7, 0xBB",
-        "EventName": "OCR.PF_L3_DATA_RD.L3_MISS_REMOTE_HOP1_DRAM.HIT_OTHER_CORE_NO_FWD",
+        "EventName": "OFFCORE_RESPONSE.PF_L3_DATA_RD.L3_MISS_REMOTE_DRAM.SNOOP_MISS_OR_NO_FWD",
         "MSRIndex": "0x1a6,0x1a7",
-        "MSRValue": "0x0410000080",
+        "MSRValue": "0x063B800080",
         "Offcore": "1",
         "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
         "SampleAfterValue": "100003",
         "UMask": "0x1"
     },
     {
-        "BriefDescription": "This event is deprecated. Refer to new event OCR.ALL_DATA_RD.L3_MISS_REMOTE_HOP1_DRAM.HIT_OTHER_CORE_FWD",
+        "BriefDescription": "This event is deprecated. Refer to new event OCR.PF_L3_DATA_RD.L3_MISS_REMOTE_HOP1_DRAM.ANY_SNOOP",
         "Counter": "0,1,2,3",
         "CounterHTOff": "0,1,2,3",
         "Deprecated": "1",
         "EventCode": "0xB7, 0xBB",
-        "EventName": "OFFCORE_RESPONSE.ALL_DATA_RD.L3_MISS_REMOTE_HOP1_DRAM.HIT_OTHER_CORE_FWD",
+        "EventName": "OFFCORE_RESPONSE.PF_L3_DATA_RD.L3_MISS_REMOTE_HOP1_DRAM.ANY_SNOOP",
         "MSRIndex": "0x1a6,0x1a7",
-        "MSRValue": "0x0810000491",
+        "MSRValue": "0x3F90000080",
         "Offcore": "1",
         "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
         "SampleAfterValue": "100003",
         "UMask": "0x1"
     },
     {
-        "BriefDescription": "This event is deprecated. Refer to new event OCR.DEMAND_DATA_RD.L3_MISS_REMOTE_HOP1_DRAM.ANY_SNOOP",
+        "BriefDescription": "This event is deprecated. Refer to new event OCR.PF_L3_DATA_RD.L3_MISS_REMOTE_HOP1_DRAM.HITM_OTHER_CORE",
         "Counter": "0,1,2,3",
         "CounterHTOff": "0,1,2,3",
         "Deprecated": "1",
         "EventCode": "0xB7, 0xBB",
-        "EventName": "OFFCORE_RESPONSE.DEMAND_DATA_RD.L3_MISS_REMOTE_HOP1_DRAM.ANY_SNOOP",
+        "EventName": "OFFCORE_RESPONSE.PF_L3_DATA_RD.L3_MISS_REMOTE_HOP1_DRAM.HITM_OTHER_CORE",
         "MSRIndex": "0x1a6,0x1a7",
-        "MSRValue": "0x3F90000001",
+        "MSRValue": "0x1010000080",
         "Offcore": "1",
         "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
         "SampleAfterValue": "100003",
         "UMask": "0x1"
     },
     {
-        "BriefDescription": "Counts demand data reads  OCR.DEMAND_DATA_RD.L3_MISS_LOCAL_DRAM.NO_SNOOP_NEEDED",
+        "BriefDescription": "This event is deprecated. Refer to new event OCR.PF_L3_DATA_RD.L3_MISS_REMOTE_HOP1_DRAM.HIT_OTHER_CORE_FWD",
         "Counter": "0,1,2,3",
         "CounterHTOff": "0,1,2,3",
+        "Deprecated": "1",
         "EventCode": "0xB7, 0xBB",
-        "EventName": "OCR.DEMAND_DATA_RD.L3_MISS_LOCAL_DRAM.NO_SNOOP_NEEDED",
+        "EventName": "OFFCORE_RESPONSE.PF_L3_DATA_RD.L3_MISS_REMOTE_HOP1_DRAM.HIT_OTHER_CORE_FWD",
         "MSRIndex": "0x1a6,0x1a7",
-        "MSRValue": "0x0104000001",
+        "MSRValue": "0x0810000080",
         "Offcore": "1",
         "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
         "SampleAfterValue": "100003",
         "UMask": "0x1"
     },
     {
-        "BriefDescription": "This event is deprecated. Refer to new event OCR.PF_L2_DATA_RD.L3_MISS.REMOTE_HIT_FORWARD",
+        "BriefDescription": "This event is deprecated. Refer to new event OCR.PF_L3_DATA_RD.L3_MISS_REMOTE_HOP1_DRAM.HIT_OTHER_CORE_NO_FWD",
         "Counter": "0,1,2,3",
         "CounterHTOff": "0,1,2,3",
         "Deprecated": "1",
         "EventCode": "0xB7, 0xBB",
-        "EventName": "OFFCORE_RESPONSE.PF_L2_DATA_RD.L3_MISS.REMOTE_HIT_FORWARD",
+        "EventName": "OFFCORE_RESPONSE.PF_L3_DATA_RD.L3_MISS_REMOTE_HOP1_DRAM.HIT_OTHER_CORE_NO_FWD",
         "MSRIndex": "0x1a6,0x1a7",
-        "MSRValue": "0x083FC00010",
+        "MSRValue": "0x0410000080",
         "Offcore": "1",
         "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
         "SampleAfterValue": "100003",
         "UMask": "0x1"
     },
     {
-        "BriefDescription": "This event is deprecated. Refer to new event OCR.ALL_DATA_RD.L3_MISS.HITM_OTHER_CORE",
+        "BriefDescription": "This event is deprecated. Refer to new event OCR.PF_L3_DATA_RD.L3_MISS_REMOTE_HOP1_DRAM.NO_SNOOP_NEEDED",
         "Counter": "0,1,2,3",
         "CounterHTOff": "0,1,2,3",
         "Deprecated": "1",
         "EventCode": "0xB7, 0xBB",
-        "EventName": "OFFCORE_RESPONSE.ALL_DATA_RD.L3_MISS.HITM_OTHER_CORE",
+        "EventName": "OFFCORE_RESPONSE.PF_L3_DATA_RD.L3_MISS_REMOTE_HOP1_DRAM.NO_SNOOP_NEEDED",
         "MSRIndex": "0x1a6,0x1a7",
-        "MSRValue": "0x103C000491",
+        "MSRValue": "0x0110000080",
         "Offcore": "1",
         "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
         "SampleAfterValue": "100003",
         "UMask": "0x1"
     },
     {
-        "BriefDescription": "Counts prefetch (that bring data to L2) data reads OCR.PF_L2_DATA_RD.L3_MISS.HIT_OTHER_CORE_NO_FWD OCR.PF_L2_DATA_RD.L3_MISS.HIT_OTHER_CORE_NO_FWD",
+        "BriefDescription": "This event is deprecated. Refer to new event OCR.PF_L3_DATA_RD.L3_MISS_REMOTE_HOP1_DRAM.SNOOP_MISS",
         "Counter": "0,1,2,3",
         "CounterHTOff": "0,1,2,3",
+        "Deprecated": "1",
         "EventCode": "0xB7, 0xBB",
-        "EventName": "OCR.PF_L2_DATA_RD.L3_MISS.HIT_OTHER_CORE_NO_FWD",
+        "EventName": "OFFCORE_RESPONSE.PF_L3_DATA_RD.L3_MISS_REMOTE_HOP1_DRAM.SNOOP_MISS",
         "MSRIndex": "0x1a6,0x1a7",
-        "MSRValue": "0x043C000010",
+        "MSRValue": "0x0210000080",
         "Offcore": "1",
         "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
         "SampleAfterValue": "100003",
         "UMask": "0x1"
     },
     {
-        "BriefDescription": "This event is deprecated. Refer to new event OCR.PF_L3_RFO.L3_MISS.HIT_OTHER_CORE_FWD",
+        "BriefDescription": "This event is deprecated. Refer to new event OCR.PF_L3_DATA_RD.L3_MISS_REMOTE_HOP1_DRAM.SNOOP_NONE",
         "Counter": "0,1,2,3",
         "CounterHTOff": "0,1,2,3",
         "Deprecated": "1",
         "EventCode": "0xB7, 0xBB",
-        "EventName": "OFFCORE_RESPONSE.PF_L3_RFO.L3_MISS.HIT_OTHER_CORE_FWD",
+        "EventName": "OFFCORE_RESPONSE.PF_L3_DATA_RD.L3_MISS_REMOTE_HOP1_DRAM.SNOOP_NONE",
         "MSRIndex": "0x1a6,0x1a7",
-        "MSRValue": "0x083C000100",
+        "MSRValue": "0x0090000080",
         "Offcore": "1",
         "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
         "SampleAfterValue": "100003",
         "UMask": "0x1"
     },
     {
-        "BriefDescription": "Counts L1 data cache hardware prefetch requests and software prefetch requests  OCR.PF_L1D_AND_SW.L3_MISS_LOCAL_DRAM.HIT_OTHER_CORE_FWD",
+        "BriefDescription": "This event is deprecated. Refer to new event OCR.PF_L3_RFO.L3_MISS.ANY_SNOOP",
         "Counter": "0,1,2,3",
         "CounterHTOff": "0,1,2,3",
+        "Deprecated": "1",
         "EventCode": "0xB7, 0xBB",
-        "EventName": "OCR.PF_L1D_AND_SW.L3_MISS_LOCAL_DRAM.HIT_OTHER_CORE_FWD",
+        "EventName": "OFFCORE_RESPONSE.PF_L3_RFO.L3_MISS.ANY_SNOOP",
         "MSRIndex": "0x1a6,0x1a7",
-        "MSRValue": "0x0804000400",
+        "MSRValue": "0x3FBC000100",
         "Offcore": "1",
         "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
         "SampleAfterValue": "100003",
         "UMask": "0x1"
     },
     {
-        "BriefDescription": "Counts all prefetch (that bring data to L2) RFOs  OCR.PF_L2_RFO.L3_MISS_REMOTE_HOP1_DRAM.ANY_SNOOP",
+        "BriefDescription": "This event is deprecated. Refer to new event OCR.PF_L3_RFO.L3_MISS.HITM_OTHER_CORE",
         "Counter": "0,1,2,3",
         "CounterHTOff": "0,1,2,3",
+        "Deprecated": "1",
         "EventCode": "0xB7, 0xBB",
-        "EventName": "OCR.PF_L2_RFO.L3_MISS_REMOTE_HOP1_DRAM.ANY_SNOOP",
+        "EventName": "OFFCORE_RESPONSE.PF_L3_RFO.L3_MISS.HITM_OTHER_CORE",
         "MSRIndex": "0x1a6,0x1a7",
-        "MSRValue": "0x3F90000020",
+        "MSRValue": "0x103C000100",
         "Offcore": "1",
         "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
         "SampleAfterValue": "100003",
         "UMask": "0x1"
     },
     {
-        "BriefDescription": "This event is deprecated. Refer to new event OCR.ALL_RFO.L3_MISS_REMOTE_HOP1_DRAM.SNOOP_MISS",
+        "BriefDescription": "This event is deprecated. Refer to new event OCR.PF_L3_RFO.L3_MISS.HIT_OTHER_CORE_FWD",
         "Counter": "0,1,2,3",
         "CounterHTOff": "0,1,2,3",
         "Deprecated": "1",
         "EventCode": "0xB7, 0xBB",
-        "EventName": "OFFCORE_RESPONSE.ALL_RFO.L3_MISS_REMOTE_HOP1_DRAM.SNOOP_MISS",
+        "EventName": "OFFCORE_RESPONSE.PF_L3_RFO.L3_MISS.HIT_OTHER_CORE_FWD",
         "MSRIndex": "0x1a6,0x1a7",
-        "MSRValue": "0x0210000122",
+        "MSRValue": "0x083C000100",
         "Offcore": "1",
         "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
         "SampleAfterValue": "100003",
         "UMask": "0x1"
     },
     {
-        "BriefDescription": "This event is deprecated. Refer to new event OCR.DEMAND_DATA_RD.L3_MISS_REMOTE_HOP1_DRAM.HIT_OTHER_CORE_FWD",
+        "BriefDescription": "This event is deprecated. Refer to new event OCR.PF_L3_RFO.L3_MISS.HIT_OTHER_CORE_NO_FWD",
         "Counter": "0,1,2,3",
         "CounterHTOff": "0,1,2,3",
         "Deprecated": "1",
         "EventCode": "0xB7, 0xBB",
-        "EventName": "OFFCORE_RESPONSE.DEMAND_DATA_RD.L3_MISS_REMOTE_HOP1_DRAM.HIT_OTHER_CORE_FWD",
+        "EventName": "OFFCORE_RESPONSE.PF_L3_RFO.L3_MISS.HIT_OTHER_CORE_NO_FWD",
         "MSRIndex": "0x1a6,0x1a7",
-        "MSRValue": "0x0810000001",
+        "MSRValue": "0x043C000100",
         "Offcore": "1",
         "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
         "SampleAfterValue": "100003",
         "UMask": "0x1"
     },
     {
-        "BriefDescription": "This event is deprecated. Refer to new event OCR.PF_L1D_AND_SW.L3_MISS_REMOTE_HOP1_DRAM.ANY_SNOOP",
+        "BriefDescription": "This event is deprecated. Refer to new event OCR.PF_L3_RFO.L3_MISS.NO_SNOOP_NEEDED",
         "Counter": "0,1,2,3",
         "CounterHTOff": "0,1,2,3",
         "Deprecated": "1",
         "EventCode": "0xB7, 0xBB",
-        "EventName": "OFFCORE_RESPONSE.PF_L1D_AND_SW.L3_MISS_REMOTE_HOP1_DRAM.ANY_SNOOP",
+        "EventName": "OFFCORE_RESPONSE.PF_L3_RFO.L3_MISS.NO_SNOOP_NEEDED",
         "MSRIndex": "0x1a6,0x1a7",
-        "MSRValue": "0x3F90000400",
+        "MSRValue": "0x013C000100",
         "Offcore": "1",
         "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
         "SampleAfterValue": "100003",
         "UMask": "0x1"
     },
     {
-        "BriefDescription": "OCR.ALL_PF_DATA_RD.L3_MISS_REMOTE_HOP1_DRAM.SNOOP_NONE",
+        "BriefDescription": "This event is deprecated. Refer to new event OCR.PF_L3_RFO.L3_MISS.REMOTE_HITM",
         "Counter": "0,1,2,3",
         "CounterHTOff": "0,1,2,3",
+        "Deprecated": "1",
         "EventCode": "0xB7, 0xBB",
-        "EventName": "OCR.ALL_PF_DATA_RD.L3_MISS_REMOTE_HOP1_DRAM.SNOOP_NONE",
+        "EventName": "OFFCORE_RESPONSE.PF_L3_RFO.L3_MISS.REMOTE_HITM",
         "MSRIndex": "0x1a6,0x1a7",
-        "MSRValue": "0x0090000490",
+        "MSRValue": "0x103FC00100",
         "Offcore": "1",
         "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
         "SampleAfterValue": "100003",
         "UMask": "0x1"
     },
     {
-        "BriefDescription": "Counts L1 data cache hardware prefetch requests and software prefetch requests OCR.PF_L1D_AND_SW.L3_MISS.HIT_OTHER_CORE_FWD OCR.PF_L1D_AND_SW.L3_MISS.HIT_OTHER_CORE_FWD",
+        "BriefDescription": "This event is deprecated. Refer to new event OCR.PF_L3_RFO.L3_MISS.REMOTE_HIT_FORWARD",
         "Counter": "0,1,2,3",
         "CounterHTOff": "0,1,2,3",
+        "Deprecated": "1",
         "EventCode": "0xB7, 0xBB",
-        "EventName": "OCR.PF_L1D_AND_SW.L3_MISS.HIT_OTHER_CORE_FWD",
+        "EventName": "OFFCORE_RESPONSE.PF_L3_RFO.L3_MISS.REMOTE_HIT_FORWARD",
         "MSRIndex": "0x1a6,0x1a7",
-        "MSRValue": "0x083C000400",
+        "MSRValue": "0x083FC00100",
         "Offcore": "1",
         "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
         "SampleAfterValue": "100003",
         "UMask": "0x1"
     },
     {
-        "BriefDescription": "This event is deprecated. Refer to new event OCR.ALL_PF_RFO.L3_MISS_REMOTE_HOP1_DRAM.NO_SNOOP_NEEDED",
+        "BriefDescription": "This event is deprecated. Refer to new event OCR.PF_L3_RFO.L3_MISS.SNOOP_MISS",
         "Counter": "0,1,2,3",
         "CounterHTOff": "0,1,2,3",
         "Deprecated": "1",
         "EventCode": "0xB7, 0xBB",
-        "EventName": "OFFCORE_RESPONSE.ALL_PF_RFO.L3_MISS_REMOTE_HOP1_DRAM.NO_SNOOP_NEEDED",
+        "EventName": "OFFCORE_RESPONSE.PF_L3_RFO.L3_MISS.SNOOP_MISS",
         "MSRIndex": "0x1a6,0x1a7",
-        "MSRValue": "0x0110000120",
+        "MSRValue": "0x023C000100",
         "Offcore": "1",
         "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
         "SampleAfterValue": "100003",
         "UMask": "0x1"
     },
     {
-        "BriefDescription": "Counts all prefetch (that bring data to L2) RFOs OCR.PF_L2_RFO.L3_MISS.REMOTE_HITM",
+        "BriefDescription": "This event is deprecated. Refer to new event OCR.PF_L3_RFO.L3_MISS.SNOOP_NONE",
         "Counter": "0,1,2,3",
         "CounterHTOff": "0,1,2,3",
+        "Deprecated": "1",
         "EventCode": "0xB7, 0xBB",
-        "EventName": "OCR.PF_L2_RFO.L3_MISS.REMOTE_HITM",
+        "EventName": "OFFCORE_RESPONSE.PF_L3_RFO.L3_MISS.SNOOP_NONE",
         "MSRIndex": "0x1a6,0x1a7",
-        "MSRValue": "0x103FC00020",
+        "MSRValue": "0x00BC000100",
         "Offcore": "1",
         "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
         "SampleAfterValue": "100003",
         "UMask": "0x1"
     },
     {
-        "BriefDescription": "Number of times an HLE execution started.",
-        "Counter": "0,1,2,3",
-        "CounterHTOff": "0,1,2,3,4,5,6,7",
-        "EventCode": "0xC8",
-        "EventName": "HLE_RETIRED.START",
-        "PublicDescription": "Number of times we entered an HLE region. Does not count nested transactions.",
-        "SampleAfterValue": "2000003",
-        "UMask": "0x1"
-    },
-    {
-        "BriefDescription": "This event is deprecated. Refer to new event OCR.PF_L2_RFO.L3_MISS_LOCAL_DRAM.HITM_OTHER_CORE",
+        "BriefDescription": "This event is deprecated. Refer to new event OCR.PF_L3_RFO.L3_MISS_LOCAL_DRAM.ANY_SNOOP",
         "Counter": "0,1,2,3",
         "CounterHTOff": "0,1,2,3",
         "Deprecated": "1",
         "EventCode": "0xB7, 0xBB",
-        "EventName": "OFFCORE_RESPONSE.PF_L2_RFO.L3_MISS_LOCAL_DRAM.HITM_OTHER_CORE",
+        "EventName": "OFFCORE_RESPONSE.PF_L3_RFO.L3_MISS_LOCAL_DRAM.ANY_SNOOP",
         "MSRIndex": "0x1a6,0x1a7",
-        "MSRValue": "0x1004000020",
+        "MSRValue": "0x3F84000100",
         "Offcore": "1",
         "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
         "SampleAfterValue": "100003",
         "UMask": "0x1"
     },
     {
-        "BriefDescription": "Counts L1 data cache hardware prefetch requests and software prefetch requests  OCR.PF_L1D_AND_SW.L3_MISS_REMOTE_HOP1_DRAM.HITM_OTHER_CORE",
+        "BriefDescription": "This event is deprecated. Refer to new event OCR.PF_L3_RFO.L3_MISS_LOCAL_DRAM.HITM_OTHER_CORE",
         "Counter": "0,1,2,3",
         "CounterHTOff": "0,1,2,3",
+        "Deprecated": "1",
         "EventCode": "0xB7, 0xBB",
-        "EventName": "OCR.PF_L1D_AND_SW.L3_MISS_REMOTE_HOP1_DRAM.HITM_OTHER_CORE",
+        "EventName": "OFFCORE_RESPONSE.PF_L3_RFO.L3_MISS_LOCAL_DRAM.HITM_OTHER_CORE",
         "MSRIndex": "0x1a6,0x1a7",
-        "MSRValue": "0x1010000400",
+        "MSRValue": "0x1004000100",
         "Offcore": "1",
         "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
         "SampleAfterValue": "100003",
         "UMask": "0x1"
     },
     {
-        "BriefDescription": "This event is deprecated. Refer to new event OCR.DEMAND_CODE_RD.L3_MISS.REMOTE_HIT_FORWARD",
+        "BriefDescription": "This event is deprecated. Refer to new event OCR.PF_L3_RFO.L3_MISS_LOCAL_DRAM.HIT_OTHER_CORE_FWD",
         "Counter": "0,1,2,3",
         "CounterHTOff": "0,1,2,3",
         "Deprecated": "1",
         "EventCode": "0xB7, 0xBB",
-        "EventName": "OFFCORE_RESPONSE.DEMAND_CODE_RD.L3_MISS.REMOTE_HIT_FORWARD",
+        "EventName": "OFFCORE_RESPONSE.PF_L3_RFO.L3_MISS_LOCAL_DRAM.HIT_OTHER_CORE_FWD",
         "MSRIndex": "0x1a6,0x1a7",
-        "MSRValue": "0x083FC00004",
+        "MSRValue": "0x0804000100",
         "Offcore": "1",
         "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
         "SampleAfterValue": "100003",
         "UMask": "0x1"
     },
     {
-        "BriefDescription": "OCR.ALL_DATA_RD.L3_MISS_REMOTE_HOP1_DRAM.HIT_OTHER_CORE_NO_FWD  OCR.ALL_DATA_RD.L3_MISS_REMOTE_HOP1_DRAM.HIT_OTHER_CORE_NO_FWD",
+        "BriefDescription": "This event is deprecated. Refer to new event OCR.PF_L3_RFO.L3_MISS_LOCAL_DRAM.HIT_OTHER_CORE_NO_FWD",
         "Counter": "0,1,2,3",
         "CounterHTOff": "0,1,2,3",
+        "Deprecated": "1",
         "EventCode": "0xB7, 0xBB",
-        "EventName": "OCR.ALL_DATA_RD.L3_MISS_REMOTE_HOP1_DRAM.HIT_OTHER_CORE_NO_FWD",
+        "EventName": "OFFCORE_RESPONSE.PF_L3_RFO.L3_MISS_LOCAL_DRAM.HIT_OTHER_CORE_NO_FWD",
         "MSRIndex": "0x1a6,0x1a7",
-        "MSRValue": "0x0410000491",
+        "MSRValue": "0x0404000100",
         "Offcore": "1",
         "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
         "SampleAfterValue": "100003",
         "UMask": "0x1"
     },
     {
-        "BriefDescription": "OCR.ALL_PF_RFO.L3_MISS.HIT_OTHER_CORE_FWD OCR.ALL_PF_RFO.L3_MISS.HIT_OTHER_CORE_FWD OCR.ALL_PF_RFO.L3_MISS.HIT_OTHER_CORE_FWD",
+        "BriefDescription": "This event is deprecated. Refer to new event OCR.PF_L3_RFO.L3_MISS_LOCAL_DRAM.NO_SNOOP_NEEDED",
         "Counter": "0,1,2,3",
         "CounterHTOff": "0,1,2,3",
+        "Deprecated": "1",
         "EventCode": "0xB7, 0xBB",
-        "EventName": "OCR.ALL_PF_RFO.L3_MISS.HIT_OTHER_CORE_FWD",
+        "EventName": "OFFCORE_RESPONSE.PF_L3_RFO.L3_MISS_LOCAL_DRAM.NO_SNOOP_NEEDED",
         "MSRIndex": "0x1a6,0x1a7",
-        "MSRValue": "0x083C000120",
+        "MSRValue": "0x0104000100",
         "Offcore": "1",
         "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
         "SampleAfterValue": "100003",
         "UMask": "0x1"
     },
     {
-        "BriefDescription": "This event is deprecated. Refer to new event OCR.PF_L1D_AND_SW.L3_MISS_REMOTE_HOP1_DRAM.HIT_OTHER_CORE_NO_FWD",
+        "BriefDescription": "This event is deprecated. Refer to new event OCR.PF_L3_RFO.L3_MISS_LOCAL_DRAM.SNOOP_MISS",
         "Counter": "0,1,2,3",
         "CounterHTOff": "0,1,2,3",
         "Deprecated": "1",
         "EventCode": "0xB7, 0xBB",
-        "EventName": "OFFCORE_RESPONSE.PF_L1D_AND_SW.L3_MISS_REMOTE_HOP1_DRAM.HIT_OTHER_CORE_NO_FWD",
+        "EventName": "OFFCORE_RESPONSE.PF_L3_RFO.L3_MISS_LOCAL_DRAM.SNOOP_MISS",
         "MSRIndex": "0x1a6,0x1a7",
-        "MSRValue": "0x0410000400",
+        "MSRValue": "0x0204000100",
         "Offcore": "1",
         "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
         "SampleAfterValue": "100003",
         "UMask": "0x1"
     },
     {
-        "BriefDescription": "OCR.ALL_READS.L3_MISS.HITM_OTHER_CORE OCR.ALL_READS.L3_MISS.HITM_OTHER_CORE OCR.ALL_READS.L3_MISS.HITM_OTHER_CORE",
+        "BriefDescription": "This event is deprecated. Refer to new event OCR.PF_L3_RFO.L3_MISS_LOCAL_DRAM.SNOOP_MISS_OR_NO_FWD",
         "Counter": "0,1,2,3",
         "CounterHTOff": "0,1,2,3",
+        "Deprecated": "1",
         "EventCode": "0xB7, 0xBB",
-        "EventName": "OCR.ALL_READS.L3_MISS.HITM_OTHER_CORE",
+        "EventName": "OFFCORE_RESPONSE.PF_L3_RFO.L3_MISS_LOCAL_DRAM.SNOOP_MISS_OR_NO_FWD",
         "MSRIndex": "0x1a6,0x1a7",
-        "MSRValue": "0x103C0007F7",
+        "MSRValue": "0x0604000100",
         "Offcore": "1",
         "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
         "SampleAfterValue": "100003",
         "UMask": "0x1"
     },
     {
-        "BriefDescription": "This event is deprecated. Refer to new event OCR.PF_L3_RFO.L3_MISS_LOCAL_DRAM.HIT_OTHER_CORE_NO_FWD",
+        "BriefDescription": "This event is deprecated. Refer to new event OCR.PF_L3_RFO.L3_MISS_LOCAL_DRAM.SNOOP_NONE",
         "Counter": "0,1,2,3",
         "CounterHTOff": "0,1,2,3",
         "Deprecated": "1",
         "EventCode": "0xB7, 0xBB",
-        "EventName": "OFFCORE_RESPONSE.PF_L3_RFO.L3_MISS_LOCAL_DRAM.HIT_OTHER_CORE_NO_FWD",
+        "EventName": "OFFCORE_RESPONSE.PF_L3_RFO.L3_MISS_LOCAL_DRAM.SNOOP_NONE",
         "MSRIndex": "0x1a6,0x1a7",
-        "MSRValue": "0x0404000100",
+        "MSRValue": "0x0084000100",
         "Offcore": "1",
         "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
         "SampleAfterValue": "100003",
         "UMask": "0x1"
     },
     {
-        "BriefDescription": "Counts all prefetch (that bring data to LLC only) RFOs  OCR.PF_L3_RFO.L3_MISS_REMOTE_HOP1_DRAM.HITM_OTHER_CORE",
+        "BriefDescription": "This event is deprecated. Refer to new event OCR.PF_L3_RFO.L3_MISS_REMOTE_DRAM.SNOOP_MISS_OR_NO_FWD",
         "Counter": "0,1,2,3",
         "CounterHTOff": "0,1,2,3",
+        "Deprecated": "1",
         "EventCode": "0xB7, 0xBB",
-        "EventName": "OCR.PF_L3_RFO.L3_MISS_REMOTE_HOP1_DRAM.HITM_OTHER_CORE",
+        "EventName": "OFFCORE_RESPONSE.PF_L3_RFO.L3_MISS_REMOTE_DRAM.SNOOP_MISS_OR_NO_FWD",
         "MSRIndex": "0x1a6,0x1a7",
-        "MSRValue": "0x1010000100",
+        "MSRValue": "0x063B800100",
         "Offcore": "1",
         "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
         "SampleAfterValue": "100003",
         "UMask": "0x1"
     },
     {
-        "BriefDescription": "Counts any other requests OCR.OTHER.L3_MISS.HIT_OTHER_CORE_FWD OCR.OTHER.L3_MISS.HIT_OTHER_CORE_FWD",
+        "BriefDescription": "This event is deprecated. Refer to new event OCR.PF_L3_RFO.L3_MISS_REMOTE_HOP1_DRAM.ANY_SNOOP",
         "Counter": "0,1,2,3",
         "CounterHTOff": "0,1,2,3",
+        "Deprecated": "1",
         "EventCode": "0xB7, 0xBB",
-        "EventName": "OCR.OTHER.L3_MISS.HIT_OTHER_CORE_FWD",
+        "EventName": "OFFCORE_RESPONSE.PF_L3_RFO.L3_MISS_REMOTE_HOP1_DRAM.ANY_SNOOP",
         "MSRIndex": "0x1a6,0x1a7",
-        "MSRValue": "0x083C008000",
+        "MSRValue": "0x3F90000100",
         "Offcore": "1",
         "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
         "SampleAfterValue": "100003",
         "UMask": "0x1"
     },
     {
-        "BriefDescription": "This event is deprecated. Refer to new event OCR.OTHER.L3_MISS_REMOTE_HOP1_DRAM.HIT_OTHER_CORE_FWD",
+        "BriefDescription": "This event is deprecated. Refer to new event OCR.PF_L3_RFO.L3_MISS_REMOTE_HOP1_DRAM.HITM_OTHER_CORE",
         "Counter": "0,1,2,3",
         "CounterHTOff": "0,1,2,3",
         "Deprecated": "1",
         "EventCode": "0xB7, 0xBB",
-        "EventName": "OFFCORE_RESPONSE.OTHER.L3_MISS_REMOTE_HOP1_DRAM.HIT_OTHER_CORE_FWD",
+        "EventName": "OFFCORE_RESPONSE.PF_L3_RFO.L3_MISS_REMOTE_HOP1_DRAM.HITM_OTHER_CORE",
         "MSRIndex": "0x1a6,0x1a7",
-        "MSRValue": "0x0810008000",
+        "MSRValue": "0x1010000100",
         "Offcore": "1",
         "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
         "SampleAfterValue": "100003",
         "UMask": "0x1"
     },
     {
-        "BriefDescription": "OCR.ALL_PF_RFO.L3_MISS.REMOTE_HITM OCR.ALL_PF_RFO.L3_MISS.REMOTE_HITM",
+        "BriefDescription": "This event is deprecated. Refer to new event OCR.PF_L3_RFO.L3_MISS_REMOTE_HOP1_DRAM.HIT_OTHER_CORE_FWD",
         "Counter": "0,1,2,3",
         "CounterHTOff": "0,1,2,3",
+        "Deprecated": "1",
         "EventCode": "0xB7, 0xBB",
-        "EventName": "OCR.ALL_PF_RFO.L3_MISS.REMOTE_HITM",
+        "EventName": "OFFCORE_RESPONSE.PF_L3_RFO.L3_MISS_REMOTE_HOP1_DRAM.HIT_OTHER_CORE_FWD",
         "MSRIndex": "0x1a6,0x1a7",
-        "MSRValue": "0x103FC00120",
+        "MSRValue": "0x0810000100",
         "Offcore": "1",
         "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
         "SampleAfterValue": "100003",
         "UMask": "0x1"
     },
     {
-        "BriefDescription": "This event is deprecated. Refer to new event OCR.PF_L2_DATA_RD.L3_MISS_LOCAL_DRAM.SNOOP_MISS",
+        "BriefDescription": "This event is deprecated. Refer to new event OCR.PF_L3_RFO.L3_MISS_REMOTE_HOP1_DRAM.HIT_OTHER_CORE_NO_FWD",
         "Counter": "0,1,2,3",
         "CounterHTOff": "0,1,2,3",
         "Deprecated": "1",
         "EventCode": "0xB7, 0xBB",
-        "EventName": "OFFCORE_RESPONSE.PF_L2_DATA_RD.L3_MISS_LOCAL_DRAM.SNOOP_MISS",
+        "EventName": "OFFCORE_RESPONSE.PF_L3_RFO.L3_MISS_REMOTE_HOP1_DRAM.HIT_OTHER_CORE_NO_FWD",
         "MSRIndex": "0x1a6,0x1a7",
-        "MSRValue": "0x0204000010",
+        "MSRValue": "0x0410000100",
         "Offcore": "1",
         "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
         "SampleAfterValue": "100003",
         "UMask": "0x1"
     },
     {
-        "BriefDescription": "OCR.ALL_READS.L3_MISS_LOCAL_DRAM.SNOOP_NONE",
+        "BriefDescription": "This event is deprecated. Refer to new event OCR.PF_L3_RFO.L3_MISS_REMOTE_HOP1_DRAM.NO_SNOOP_NEEDED",
         "Counter": "0,1,2,3",
         "CounterHTOff": "0,1,2,3",
+        "Deprecated": "1",
         "EventCode": "0xB7, 0xBB",
-        "EventName": "OCR.ALL_READS.L3_MISS_LOCAL_DRAM.SNOOP_NONE",
+        "EventName": "OFFCORE_RESPONSE.PF_L3_RFO.L3_MISS_REMOTE_HOP1_DRAM.NO_SNOOP_NEEDED",
         "MSRIndex": "0x1a6,0x1a7",
-        "MSRValue": "0x00840007F7",
+        "MSRValue": "0x0110000100",
         "Offcore": "1",
         "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
         "SampleAfterValue": "100003",
         "UMask": "0x1"
     },
     {
-        "BriefDescription": "Counts randomly selected loads when the latency from first dispatch to completion is greater than 4 cycles.",
+        "BriefDescription": "This event is deprecated. Refer to new event OCR.PF_L3_RFO.L3_MISS_REMOTE_HOP1_DRAM.SNOOP_MISS",
         "Counter": "0,1,2,3",
         "CounterHTOff": "0,1,2,3",
-        "Data_LA": "1",
-        "EventCode": "0xcd",
-        "EventName": "MEM_TRANS_RETIRED.LOAD_LATENCY_GT_4",
-        "MSRIndex": "0x3F6",
-        "MSRValue": "0x4",
-        "PEBS": "2",
-        "PublicDescription": "Counts randomly selected loads when the latency from first dispatch to completion is greater than 4 cycles.  Reported latency may be longer than just the memory latency.",
+        "Deprecated": "1",
+        "EventCode": "0xB7, 0xBB",
+        "EventName": "OFFCORE_RESPONSE.PF_L3_RFO.L3_MISS_REMOTE_HOP1_DRAM.SNOOP_MISS",
+        "MSRIndex": "0x1a6,0x1a7",
+        "MSRValue": "0x0210000100",
+        "Offcore": "1",
+        "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
         "SampleAfterValue": "100003",
-        "TakenAlone": "1",
         "UMask": "0x1"
     },
     {
-        "BriefDescription": "This event is deprecated. Refer to new event OCR.PF_L3_RFO.L3_MISS.SNOOP_MISS",
+        "BriefDescription": "This event is deprecated. Refer to new event OCR.PF_L3_RFO.L3_MISS_REMOTE_HOP1_DRAM.SNOOP_NONE",
         "Counter": "0,1,2,3",
         "CounterHTOff": "0,1,2,3",
         "Deprecated": "1",
         "EventCode": "0xB7, 0xBB",
-        "EventName": "OFFCORE_RESPONSE.PF_L3_RFO.L3_MISS.SNOOP_MISS",
+        "EventName": "OFFCORE_RESPONSE.PF_L3_RFO.L3_MISS_REMOTE_HOP1_DRAM.SNOOP_NONE",
         "MSRIndex": "0x1a6,0x1a7",
-        "MSRValue": "0x023C000100",
+        "MSRValue": "0x0090000100",
         "Offcore": "1",
         "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
         "SampleAfterValue": "100003",
         "UMask": "0x1"
     },
     {
-        "BriefDescription": "Counts randomly selected loads when the latency from first dispatch to completion is greater than 8 cycles.",
+        "BriefDescription": "Number of times an RTM execution aborted due to any reasons (multiple categories may count as one).",
         "Counter": "0,1,2,3",
-        "CounterHTOff": "0,1,2,3",
-        "Data_LA": "1",
-        "EventCode": "0xcd",
-        "EventName": "MEM_TRANS_RETIRED.LOAD_LATENCY_GT_8",
-        "MSRIndex": "0x3F6",
-        "MSRValue": "0x8",
-        "PEBS": "2",
-        "PublicDescription": "Counts randomly selected loads when the latency from first dispatch to completion is greater than 8 cycles.  Reported latency may be longer than just the memory latency.",
-        "SampleAfterValue": "50021",
-        "TakenAlone": "1",
+        "CounterHTOff": "0,1,2,3,4,5,6,7",
+        "EventCode": "0xC9",
+        "EventName": "RTM_RETIRED.ABORTED",
+        "PEBS": "1",
+        "PublicDescription": "Number of times RTM abort was triggered.",
+        "SampleAfterValue": "2000003",
+        "UMask": "0x4"
+    },
+    {
+        "BriefDescription": "Number of times an RTM execution aborted due to none of the previous 4 categories (e.g. interrupt)",
+        "Counter": "0,1,2,3",
+        "CounterHTOff": "0,1,2,3,4,5,6,7",
+        "EventCode": "0xC9",
+        "EventName": "RTM_RETIRED.ABORTED_EVENTS",
+        "PublicDescription": "Number of times an RTM execution aborted due to none of the previous 4 categories (e.g. interrupt).",
+        "SampleAfterValue": "2000003",
+        "UMask": "0x80"
+    },
+    {
+        "BriefDescription": "Number of times an RTM execution aborted due to various memory events (e.g. read/write capacity and conflicts)",
+        "Counter": "0,1,2,3",
+        "CounterHTOff": "0,1,2,3,4,5,6,7",
+        "EventCode": "0xC9",
+        "EventName": "RTM_RETIRED.ABORTED_MEM",
+        "PublicDescription": "Number of times an RTM execution aborted due to various memory events (e.g. read/write capacity and conflicts).",
+        "SampleAfterValue": "2000003",
+        "UMask": "0x8"
+    },
+    {
+        "BriefDescription": "Number of times an RTM execution aborted due to incompatible memory type",
+        "Counter": "0,1,2,3",
+        "CounterHTOff": "0,1,2,3,4,5,6,7",
+        "EventCode": "0xC9",
+        "EventName": "RTM_RETIRED.ABORTED_MEMTYPE",
+        "PublicDescription": "Number of times an RTM execution aborted due to incompatible memory type.",
+        "SampleAfterValue": "2000003",
+        "UMask": "0x40"
+    },
+    {
+        "BriefDescription": "Number of times an RTM execution aborted due to uncommon conditions.",
+        "Counter": "0,1,2,3",
+        "CounterHTOff": "0,1,2,3,4,5,6,7",
+        "EventCode": "0xC9",
+        "EventName": "RTM_RETIRED.ABORTED_TIMER",
+        "SampleAfterValue": "2000003",
+        "UMask": "0x10"
+    },
+    {
+        "BriefDescription": "Number of times an RTM execution aborted due to HLE-unfriendly instructions",
+        "Counter": "0,1,2,3",
+        "CounterHTOff": "0,1,2,3,4,5,6,7",
+        "EventCode": "0xC9",
+        "EventName": "RTM_RETIRED.ABORTED_UNFRIENDLY",
+        "PublicDescription": "Number of times an RTM execution aborted due to HLE-unfriendly instructions.",
+        "SampleAfterValue": "2000003",
+        "UMask": "0x20"
+    },
+    {
+        "BriefDescription": "Number of times an RTM execution successfully committed",
+        "Counter": "0,1,2,3",
+        "CounterHTOff": "0,1,2,3,4,5,6,7",
+        "EventCode": "0xC9",
+        "EventName": "RTM_RETIRED.COMMIT",
+        "PublicDescription": "Number of times RTM commit succeeded.",
+        "SampleAfterValue": "2000003",
+        "UMask": "0x2"
+    },
+    {
+        "BriefDescription": "Number of times an RTM execution started.",
+        "Counter": "0,1,2,3",
+        "CounterHTOff": "0,1,2,3,4,5,6,7",
+        "EventCode": "0xC9",
+        "EventName": "RTM_RETIRED.START",
+        "PublicDescription": "Number of times we entered an RTM region. Does not count nested transactions.",
+        "SampleAfterValue": "2000003",
         "UMask": "0x1"
     },
     {
-        "BriefDescription": "OCR.ALL_READS.L3_MISS.HIT_OTHER_CORE_FWD OCR.ALL_READS.L3_MISS.HIT_OTHER_CORE_FWD OCR.ALL_READS.L3_MISS.HIT_OTHER_CORE_FWD",
+        "BriefDescription": "Counts the number of times a class of instructions that may cause a transactional abort was executed. Since this is the count of execution, it may not always cause a transactional abort.",
         "Counter": "0,1,2,3",
-        "CounterHTOff": "0,1,2,3",
-        "EventCode": "0xB7, 0xBB",
-        "EventName": "OCR.ALL_READS.L3_MISS.HIT_OTHER_CORE_FWD",
-        "MSRIndex": "0x1a6,0x1a7",
-        "MSRValue": "0x083C0007F7",
-        "Offcore": "1",
-        "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
-        "SampleAfterValue": "100003",
+        "CounterHTOff": "0,1,2,3,4,5,6,7",
+        "EventCode": "0x5d",
+        "EventName": "TX_EXEC.MISC1",
+        "SampleAfterValue": "2000003",
         "UMask": "0x1"
     },
     {
-        "BriefDescription": "This event is deprecated. Refer to new event OCR.DEMAND_DATA_RD.L3_MISS_REMOTE_HOP1_DRAM.NO_SNOOP_NEEDED",
+        "BriefDescription": "Counts the number of times a class of instructions (e.g., vzeroupper) that may cause a transactional abort was executed inside a transactional region",
         "Counter": "0,1,2,3",
-        "CounterHTOff": "0,1,2,3",
-        "Deprecated": "1",
-        "EventCode": "0xB7, 0xBB",
-        "EventName": "OFFCORE_RESPONSE.DEMAND_DATA_RD.L3_MISS_REMOTE_HOP1_DRAM.NO_SNOOP_NEEDED",
-        "MSRIndex": "0x1a6,0x1a7",
-        "MSRValue": "0x0110000001",
-        "Offcore": "1",
-        "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
-        "SampleAfterValue": "100003",
+        "CounterHTOff": "0,1,2,3,4,5,6,7",
+        "EventCode": "0x5d",
+        "EventName": "TX_EXEC.MISC2",
+        "PublicDescription": "Unfriendly TSX abort triggered by a vzeroupper instruction.",
+        "SampleAfterValue": "2000003",
+        "UMask": "0x2"
+    },
+    {
+        "BriefDescription": "Counts the number of times an instruction execution caused the transactional nest count supported to be exceeded",
+        "Counter": "0,1,2,3",
+        "CounterHTOff": "0,1,2,3,4,5,6,7",
+        "EventCode": "0x5d",
+        "EventName": "TX_EXEC.MISC3",
+        "PublicDescription": "Unfriendly TSX abort triggered by a nest count that is too deep.",
+        "SampleAfterValue": "2000003",
+        "UMask": "0x4"
+    },
+    {
+        "BriefDescription": "Counts the number of times a XBEGIN instruction was executed inside an HLE transactional region.",
+        "Counter": "0,1,2,3",
+        "CounterHTOff": "0,1,2,3,4,5,6,7",
+        "EventCode": "0x5d",
+        "EventName": "TX_EXEC.MISC4",
+        "PublicDescription": "RTM region detected inside HLE.",
+        "SampleAfterValue": "2000003",
+        "UMask": "0x8"
+    },
+    {
+        "BriefDescription": "Counts the number of times an HLE XACQUIRE instruction was executed inside an RTM transactional region",
+        "Counter": "0,1,2,3",
+        "CounterHTOff": "0,1,2,3,4,5,6,7",
+        "EventCode": "0x5d",
+        "EventName": "TX_EXEC.MISC5",
+        "PublicDescription": "Counts the number of times an HLE XACQUIRE instruction was executed inside an RTM transactional region.",
+        "SampleAfterValue": "2000003",
+        "UMask": "0x10"
+    },
+    {
+        "BriefDescription": "Number of times a transactional abort was signaled due to a data capacity limitation for transactional reads or writes.",
+        "Counter": "0,1,2,3",
+        "CounterHTOff": "0,1,2,3,4,5,6,7",
+        "EventCode": "0x54",
+        "EventName": "TX_MEM.ABORT_CAPACITY",
+        "SampleAfterValue": "2000003",
+        "UMask": "0x2"
+    },
+    {
+        "BriefDescription": "Number of times a transactional abort was signaled due to a data conflict on a transactionally accessed address",
+        "Counter": "0,1,2,3",
+        "CounterHTOff": "0,1,2,3,4,5,6,7",
+        "EventCode": "0x54",
+        "EventName": "TX_MEM.ABORT_CONFLICT",
+        "PublicDescription": "Number of times a TSX line had a cache conflict.",
+        "SampleAfterValue": "2000003",
         "UMask": "0x1"
+    },
+    {
+        "BriefDescription": "Number of times an HLE transactional execution aborted due to XRELEASE lock not satisfying the address and value requirements in the elision buffer",
+        "Counter": "0,1,2,3",
+        "CounterHTOff": "0,1,2,3,4,5,6,7",
+        "EventCode": "0x54",
+        "EventName": "TX_MEM.ABORT_HLE_ELISION_BUFFER_MISMATCH",
+        "PublicDescription": "Number of times a TSX Abort was triggered due to release/commit but data and address mismatch.",
+        "SampleAfterValue": "2000003",
+        "UMask": "0x10"
+    },
+    {
+        "BriefDescription": "Number of times an HLE transactional execution aborted due to NoAllocatedElisionBuffer being non-zero.",
+        "Counter": "0,1,2,3",
+        "CounterHTOff": "0,1,2,3,4,5,6,7",
+        "EventCode": "0x54",
+        "EventName": "TX_MEM.ABORT_HLE_ELISION_BUFFER_NOT_EMPTY",
+        "PublicDescription": "Number of times a TSX Abort was triggered due to commit but Lock Buffer not empty.",
+        "SampleAfterValue": "2000003",
+        "UMask": "0x8"
+    },
+    {
+        "BriefDescription": "Number of times an HLE transactional execution aborted due to an unsupported read alignment from the elision buffer.",
+        "Counter": "0,1,2,3",
+        "CounterHTOff": "0,1,2,3,4,5,6,7",
+        "EventCode": "0x54",
+        "EventName": "TX_MEM.ABORT_HLE_ELISION_BUFFER_UNSUPPORTED_ALIGNMENT",
+        "PublicDescription": "Number of times a TSX Abort was triggered due to attempting an unsupported alignment from Lock Buffer.",
+        "SampleAfterValue": "2000003",
+        "UMask": "0x20"
+    },
+    {
+        "BriefDescription": "Number of times a HLE transactional region aborted due to a non XRELEASE prefixed instruction writing to an elided lock in the elision buffer",
+        "Counter": "0,1,2,3",
+        "CounterHTOff": "0,1,2,3,4,5,6,7",
+        "EventCode": "0x54",
+        "EventName": "TX_MEM.ABORT_HLE_STORE_TO_ELIDED_LOCK",
+        "PublicDescription": "Number of times a TSX Abort was triggered due to a non-release/commit store to lock.",
+        "SampleAfterValue": "2000003",
+        "UMask": "0x4"
+    },
+    {
+        "BriefDescription": "Number of times HLE lock could not be elided due to ElisionBufferAvailable being zero.",
+        "Counter": "0,1,2,3",
+        "CounterHTOff": "0,1,2,3,4,5,6,7",
+        "EventCode": "0x54",
+        "EventName": "TX_MEM.HLE_ELISION_BUFFER_FULL",
+        "PublicDescription": "Number of times we could not allocate Lock Buffer.",
+        "SampleAfterValue": "2000003",
+        "UMask": "0x40"
     }
 ]
 \ No newline at end of file
diff --git a/tools/perf/pmu-events/arch/x86/cascadelakex/other.json b/tools/perf/pmu-events/arch/x86/cascadelakex/other.json
index f77d78e90954..2f111a22d81f 100644
--- a/tools/perf/pmu-events/arch/x86/cascadelakex/other.json
+++ b/tools/perf/pmu-events/arch/x86/cascadelakex/other.json
@@ -1,1335 +1,1382 @@
 [
     {
-        "BriefDescription": "Counts prefetch (that bring data to L2) data reads  OCR.PF_L2_DATA_RD.L3_HIT_S.HIT_OTHER_CORE_NO_FWD",
+        "BriefDescription": "Core cycles where the core was running in a manner where Turbo may be clipped to the Non-AVX turbo schedule.",
         "Counter": "0,1,2,3",
-        "CounterHTOff": "0,1,2,3",
-        "EventCode": "0xB7, 0xBB",
-        "EventName": "OCR.PF_L2_DATA_RD.L3_HIT_S.HIT_OTHER_CORE_NO_FWD",
-        "MSRIndex": "0x1a6,0x1a7",
-        "MSRValue": "0x0400100010",
-        "Offcore": "1",
-        "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
-        "SampleAfterValue": "100003",
+        "CounterHTOff": "0,1,2,3,4,5,6,7",
+        "EventCode": "0x28",
+        "EventName": "CORE_POWER.LVL0_TURBO_LICENSE",
+        "PublicDescription": "Core cycles where the core was running with power-delivery for baseline license level 0.  This includes non-AVX codes, SSE, AVX 128-bit, and low-current AVX 256-bit codes.",
+        "SampleAfterValue": "200003",
+        "UMask": "0x7"
+    },
+    {
+        "BriefDescription": "Core cycles where the core was running in a manner where Turbo may be clipped to the AVX2 turbo schedule.",
+        "Counter": "0,1,2,3",
+        "CounterHTOff": "0,1,2,3,4,5,6,7",
+        "EventCode": "0x28",
+        "EventName": "CORE_POWER.LVL1_TURBO_LICENSE",
+        "PublicDescription": "Core cycles where the core was running with power-delivery for license level 1.  This includes high current AVX 256-bit instructions as well as low current AVX 512-bit instructions.",
+        "SampleAfterValue": "200003",
+        "UMask": "0x18"
+    },
+    {
+        "BriefDescription": "Core cycles where the core was running in a manner where Turbo may be clipped to the AVX512 turbo schedule.",
+        "Counter": "0,1,2,3",
+        "CounterHTOff": "0,1,2,3,4,5,6,7",
+        "EventCode": "0x28",
+        "EventName": "CORE_POWER.LVL2_TURBO_LICENSE",
+        "PublicDescription": "Core cycles where the core was running with power-delivery for license level 2 (introduced in Skylake Server michroarchtecture).  This includes high current AVX 512-bit instructions.",
+        "SampleAfterValue": "200003",
+        "UMask": "0x20"
+    },
+    {
+        "BriefDescription": "Core cycles the core was throttled due to a pending power level request.",
+        "Counter": "0,1,2,3",
+        "CounterHTOff": "0,1,2,3,4,5,6,7",
+        "EventCode": "0x28",
+        "EventName": "CORE_POWER.THROTTLE",
+        "PublicDescription": "Core cycles the out-of-order engine was throttled due to a pending power level request.",
+        "SampleAfterValue": "200003",
+        "UMask": "0x40"
+    },
+    {
+        "BriefDescription": "Number of hardware interrupts received by the processor.",
+        "Counter": "0,1,2,3",
+        "CounterHTOff": "0,1,2,3,4,5,6,7",
+        "EventCode": "0xCB",
+        "EventName": "HW_INTERRUPTS.RECEIVED",
+        "PublicDescription": "Counts the number of hardware interruptions received by the processor.",
+        "SampleAfterValue": "203",
         "UMask": "0x1"
     },
     {
-        "BriefDescription": "OCR.ALL_PF_RFO.L3_HIT_S.HITM_OTHER_CORE  OCR.ALL_PF_RFO.L3_HIT_S.HITM_OTHER_CORE",
+        "BriefDescription": "Counts number of cache lines that are dropped and not written back to L3 as they are deemed to be less likely to be reused shortly",
+        "Counter": "0,1,2,3",
+        "CounterHTOff": "0,1,2,3,4,5,6,7",
+        "EventCode": "0xFE",
+        "EventName": "IDI_MISC.WB_DOWNGRADE",
+        "PublicDescription": "Counts number of cache lines that are dropped and not written back to L3 as they are deemed to be less likely to be reused shortly.",
+        "SampleAfterValue": "100003",
+        "UMask": "0x4"
+    },
+    {
+        "BriefDescription": "Counts number of cache lines that are allocated and written back to L3 with the intention that they are more likely to be reused shortly",
+        "Counter": "0,1,2,3",
+        "CounterHTOff": "0,1,2,3,4,5,6,7",
+        "EventCode": "0xFE",
+        "EventName": "IDI_MISC.WB_UPGRADE",
+        "PublicDescription": "Counts number of cache lines that are allocated and written back to L3 with the intention that they are more likely to be reused shortly.",
+        "SampleAfterValue": "100003",
+        "UMask": "0x2"
+    },
+    {
+        "BriefDescription": "OCR.ALL_DATA_RD.ANY_RESPONSE have any response type.",
         "Counter": "0,1,2,3",
         "CounterHTOff": "0,1,2,3",
         "EventCode": "0xB7, 0xBB",
-        "EventName": "OCR.ALL_PF_RFO.L3_HIT_S.HITM_OTHER_CORE",
+        "EventName": "OCR.ALL_DATA_RD.ANY_RESPONSE",
         "MSRIndex": "0x1a6,0x1a7",
-        "MSRValue": "0x1000100120",
+        "MSRValue": "0x0000010491",
         "Offcore": "1",
         "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
         "SampleAfterValue": "100003",
         "UMask": "0x1"
     },
     {
-        "BriefDescription": "OCR.ALL_READS.L3_HIT_F.NO_SNOOP_NEEDED  OCR.ALL_READS.L3_HIT_F.NO_SNOOP_NEEDED",
+        "BriefDescription": "OCR.ALL_DATA_RD.L3_HIT.ANY_SNOOP OCR.ALL_DATA_RD.L3_HIT.ANY_SNOOP OCR.ALL_DATA_RD.L3_HIT.ANY_SNOOP",
         "Counter": "0,1,2,3",
         "CounterHTOff": "0,1,2,3",
         "EventCode": "0xB7, 0xBB",
-        "EventName": "OCR.ALL_READS.L3_HIT_F.NO_SNOOP_NEEDED",
+        "EventName": "OCR.ALL_DATA_RD.L3_HIT.ANY_SNOOP",
         "MSRIndex": "0x1a6,0x1a7",
-        "MSRValue": "0x01002007F7",
+        "MSRValue": "0x3F803C0491",
         "Offcore": "1",
         "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
         "SampleAfterValue": "100003",
         "UMask": "0x1"
     },
     {
-        "BriefDescription": "OCR.ALL_DATA_RD.L3_HIT.SNOOP_HIT_WITH_FWD",
+        "BriefDescription": "OCR.ALL_DATA_RD.L3_HIT.HITM_OTHER_CORE OCR.ALL_DATA_RD.L3_HIT.HITM_OTHER_CORE OCR.ALL_DATA_RD.L3_HIT.HITM_OTHER_CORE",
         "Counter": "0,1,2,3",
         "CounterHTOff": "0,1,2,3",
         "EventCode": "0xB7, 0xBB",
-        "EventName": "OCR.ALL_DATA_RD.L3_HIT.SNOOP_HIT_WITH_FWD",
+        "EventName": "OCR.ALL_DATA_RD.L3_HIT.HITM_OTHER_CORE",
         "MSRIndex": "0x1a6,0x1a7",
-        "MSRValue": "0x08007C0491",
+        "MSRValue": "0x10003C0491",
         "Offcore": "1",
         "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
         "SampleAfterValue": "100003",
         "UMask": "0x1"
     },
     {
-        "BriefDescription": "OCR.ALL_RFO.SUPPLIER_NONE.SNOOP_MISS",
+        "BriefDescription": "OCR.ALL_DATA_RD.L3_HIT.HIT_OTHER_CORE_FWD OCR.ALL_DATA_RD.L3_HIT.HIT_OTHER_CORE_FWD OCR.ALL_DATA_RD.L3_HIT.HIT_OTHER_CORE_FWD",
         "Counter": "0,1,2,3",
         "CounterHTOff": "0,1,2,3",
         "EventCode": "0xB7, 0xBB",
-        "EventName": "OCR.ALL_RFO.SUPPLIER_NONE.SNOOP_MISS",
+        "EventName": "OCR.ALL_DATA_RD.L3_HIT.HIT_OTHER_CORE_FWD",
         "MSRIndex": "0x1a6,0x1a7",
-        "MSRValue": "0x0200020122",
+        "MSRValue": "0x08003C0491",
         "Offcore": "1",
         "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
         "SampleAfterValue": "100003",
         "UMask": "0x1"
     },
     {
-        "BriefDescription": "OCR.ALL_READS.L3_HIT_M.HIT_OTHER_CORE_FWD  OCR.ALL_READS.L3_HIT_M.HIT_OTHER_CORE_FWD",
+        "BriefDescription": "OCR.ALL_DATA_RD.L3_HIT.HIT_OTHER_CORE_NO_FWD OCR.ALL_DATA_RD.L3_HIT.HIT_OTHER_CORE_NO_FWD OCR.ALL_DATA_RD.L3_HIT.HIT_OTHER_CORE_NO_FWD",
         "Counter": "0,1,2,3",
         "CounterHTOff": "0,1,2,3",
         "EventCode": "0xB7, 0xBB",
-        "EventName": "OCR.ALL_READS.L3_HIT_M.HIT_OTHER_CORE_FWD",
+        "EventName": "OCR.ALL_DATA_RD.L3_HIT.HIT_OTHER_CORE_NO_FWD",
         "MSRIndex": "0x1a6,0x1a7",
-        "MSRValue": "0x08000407F7",
+        "MSRValue": "0x04003C0491",
         "Offcore": "1",
         "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
         "SampleAfterValue": "100003",
         "UMask": "0x1"
     },
     {
-        "BriefDescription": "Counts all demand code reads  OCR.DEMAND_CODE_RD.L3_HIT_S.HITM_OTHER_CORE",
+        "BriefDescription": "OCR.ALL_DATA_RD.L3_HIT.NO_SNOOP_NEEDED OCR.ALL_DATA_RD.L3_HIT.NO_SNOOP_NEEDED OCR.ALL_DATA_RD.L3_HIT.NO_SNOOP_NEEDED",
         "Counter": "0,1,2,3",
         "CounterHTOff": "0,1,2,3",
         "EventCode": "0xB7, 0xBB",
-        "EventName": "OCR.DEMAND_CODE_RD.L3_HIT_S.HITM_OTHER_CORE",
+        "EventName": "OCR.ALL_DATA_RD.L3_HIT.NO_SNOOP_NEEDED",
         "MSRIndex": "0x1a6,0x1a7",
-        "MSRValue": "0x1000100004",
+        "MSRValue": "0x01003C0491",
         "Offcore": "1",
         "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
         "SampleAfterValue": "100003",
         "UMask": "0x1"
     },
     {
-        "BriefDescription": "Counts any other requests",
+        "BriefDescription": "OCR.ALL_DATA_RD.L3_HIT.SNOOP_HIT_WITH_FWD",
         "Counter": "0,1,2,3",
         "CounterHTOff": "0,1,2,3",
         "EventCode": "0xB7, 0xBB",
-        "EventName": "OCR.OTHER.L3_HIT_E.SNOOP_NONE",
+        "EventName": "OCR.ALL_DATA_RD.L3_HIT.SNOOP_HIT_WITH_FWD",
         "MSRIndex": "0x1a6,0x1a7",
-        "MSRValue": "0x0080088000",
+        "MSRValue": "0x08007C0491",
         "Offcore": "1",
         "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
         "SampleAfterValue": "100003",
         "UMask": "0x1"
     },
     {
-        "BriefDescription": "Counts demand data reads  OCR.DEMAND_DATA_RD.L3_HIT_S.NO_SNOOP_NEEDED",
+        "BriefDescription": "OCR.ALL_DATA_RD.L3_HIT.SNOOP_MISS OCR.ALL_DATA_RD.L3_HIT.SNOOP_MISS",
         "Counter": "0,1,2,3",
         "CounterHTOff": "0,1,2,3",
         "EventCode": "0xB7, 0xBB",
-        "EventName": "OCR.DEMAND_DATA_RD.L3_HIT_S.NO_SNOOP_NEEDED",
+        "EventName": "OCR.ALL_DATA_RD.L3_HIT.SNOOP_MISS",
         "MSRIndex": "0x1a6,0x1a7",
-        "MSRValue": "0x0100100001",
+        "MSRValue": "0x02003C0491",
         "Offcore": "1",
         "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
         "SampleAfterValue": "100003",
         "UMask": "0x1"
     },
     {
-        "BriefDescription": "OCR.ALL_RFO.L3_HIT_S.NO_SNOOP_NEEDED  OCR.ALL_RFO.L3_HIT_S.NO_SNOOP_NEEDED",
+        "BriefDescription": "OCR.ALL_DATA_RD.L3_HIT.SNOOP_NONE OCR.ALL_DATA_RD.L3_HIT.SNOOP_NONE",
         "Counter": "0,1,2,3",
         "CounterHTOff": "0,1,2,3",
         "EventCode": "0xB7, 0xBB",
-        "EventName": "OCR.ALL_RFO.L3_HIT_S.NO_SNOOP_NEEDED",
+        "EventName": "OCR.ALL_DATA_RD.L3_HIT.SNOOP_NONE",
         "MSRIndex": "0x1a6,0x1a7",
-        "MSRValue": "0x0100100122",
+        "MSRValue": "0x00803C0491",
         "Offcore": "1",
         "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
         "SampleAfterValue": "100003",
         "UMask": "0x1"
     },
     {
-        "BriefDescription": "Counts any other requests  OCR.OTHER.SUPPLIER_NONE.HITM_OTHER_CORE",
+        "BriefDescription": "OCR.ALL_DATA_RD.L3_HIT_E.ANY_SNOOP  OCR.ALL_DATA_RD.L3_HIT_E.ANY_SNOOP",
         "Counter": "0,1,2,3",
         "CounterHTOff": "0,1,2,3",
         "EventCode": "0xB7, 0xBB",
-        "EventName": "OCR.OTHER.SUPPLIER_NONE.HITM_OTHER_CORE",
+        "EventName": "OCR.ALL_DATA_RD.L3_HIT_E.ANY_SNOOP",
         "MSRIndex": "0x1a6,0x1a7",
-        "MSRValue": "0x1000028000",
+        "MSRValue": "0x3F80080491",
         "Offcore": "1",
         "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
         "SampleAfterValue": "100003",
         "UMask": "0x1"
     },
     {
-        "BriefDescription": "Counts all demand data writes (RFOs)  OCR.DEMAND_RFO.L3_HIT_M.NO_SNOOP_NEEDED",
+        "BriefDescription": "OCR.ALL_DATA_RD.L3_HIT_E.HITM_OTHER_CORE  OCR.ALL_DATA_RD.L3_HIT_E.HITM_OTHER_CORE",
         "Counter": "0,1,2,3",
         "CounterHTOff": "0,1,2,3",
         "EventCode": "0xB7, 0xBB",
-        "EventName": "OCR.DEMAND_RFO.L3_HIT_M.NO_SNOOP_NEEDED",
+        "EventName": "OCR.ALL_DATA_RD.L3_HIT_E.HITM_OTHER_CORE",
         "MSRIndex": "0x1a6,0x1a7",
-        "MSRValue": "0x0100040002",
+        "MSRValue": "0x1000080491",
         "Offcore": "1",
         "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
         "SampleAfterValue": "100003",
         "UMask": "0x1"
     },
     {
-        "BriefDescription": "Counts all prefetch (that bring data to L2) RFOs  OCR.PF_L2_RFO.L3_HIT_F.HIT_OTHER_CORE_NO_FWD",
+        "BriefDescription": "OCR.ALL_DATA_RD.L3_HIT_E.HIT_OTHER_CORE_FWD  OCR.ALL_DATA_RD.L3_HIT_E.HIT_OTHER_CORE_FWD",
         "Counter": "0,1,2,3",
         "CounterHTOff": "0,1,2,3",
         "EventCode": "0xB7, 0xBB",
-        "EventName": "OCR.PF_L2_RFO.L3_HIT_F.HIT_OTHER_CORE_NO_FWD",
+        "EventName": "OCR.ALL_DATA_RD.L3_HIT_E.HIT_OTHER_CORE_FWD",
         "MSRIndex": "0x1a6,0x1a7",
-        "MSRValue": "0x0400200020",
+        "MSRValue": "0x0800080491",
         "Offcore": "1",
         "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
         "SampleAfterValue": "100003",
         "UMask": "0x1"
     },
     {
-        "BriefDescription": "OCR.ALL_PF_RFO.L3_HIT_S.HIT_OTHER_CORE_FWD  OCR.ALL_PF_RFO.L3_HIT_S.HIT_OTHER_CORE_FWD",
+        "BriefDescription": "OCR.ALL_DATA_RD.L3_HIT_E.HIT_OTHER_CORE_NO_FWD  OCR.ALL_DATA_RD.L3_HIT_E.HIT_OTHER_CORE_NO_FWD",
         "Counter": "0,1,2,3",
         "CounterHTOff": "0,1,2,3",
         "EventCode": "0xB7, 0xBB",
-        "EventName": "OCR.ALL_PF_RFO.L3_HIT_S.HIT_OTHER_CORE_FWD",
+        "EventName": "OCR.ALL_DATA_RD.L3_HIT_E.HIT_OTHER_CORE_NO_FWD",
         "MSRIndex": "0x1a6,0x1a7",
-        "MSRValue": "0x0800100120",
+        "MSRValue": "0x0400080491",
         "Offcore": "1",
         "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
         "SampleAfterValue": "100003",
         "UMask": "0x1"
     },
     {
-        "BriefDescription": "Counts all demand code reads  OCR.DEMAND_CODE_RD.L3_HIT_E.ANY_SNOOP",
+        "BriefDescription": "OCR.ALL_DATA_RD.L3_HIT_E.NO_SNOOP_NEEDED  OCR.ALL_DATA_RD.L3_HIT_E.NO_SNOOP_NEEDED",
         "Counter": "0,1,2,3",
         "CounterHTOff": "0,1,2,3",
         "EventCode": "0xB7, 0xBB",
-        "EventName": "OCR.DEMAND_CODE_RD.L3_HIT_E.ANY_SNOOP",
+        "EventName": "OCR.ALL_DATA_RD.L3_HIT_E.NO_SNOOP_NEEDED",
         "MSRIndex": "0x1a6,0x1a7",
-        "MSRValue": "0x3F80080004",
+        "MSRValue": "0x0100080491",
         "Offcore": "1",
         "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
         "SampleAfterValue": "100003",
         "UMask": "0x1"
     },
     {
-        "BriefDescription": "OCR.ALL_PF_RFO.L3_HIT.ANY_SNOOP OCR.ALL_PF_RFO.L3_HIT.ANY_SNOOP OCR.ALL_PF_RFO.L3_HIT.ANY_SNOOP",
+        "BriefDescription": "OCR.ALL_DATA_RD.L3_HIT_E.SNOOP_MISS",
         "Counter": "0,1,2,3",
         "CounterHTOff": "0,1,2,3",
         "EventCode": "0xB7, 0xBB",
-        "EventName": "OCR.ALL_PF_RFO.L3_HIT.ANY_SNOOP",
+        "EventName": "OCR.ALL_DATA_RD.L3_HIT_E.SNOOP_MISS",
         "MSRIndex": "0x1a6,0x1a7",
-        "MSRValue": "0x3F803C0120",
+        "MSRValue": "0x0200080491",
         "Offcore": "1",
         "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
         "SampleAfterValue": "100003",
         "UMask": "0x1"
     },
     {
-        "BriefDescription": "OCR.ALL_DATA_RD.L3_HIT_S.HITM_OTHER_CORE  OCR.ALL_DATA_RD.L3_HIT_S.HITM_OTHER_CORE",
+        "BriefDescription": "OCR.ALL_DATA_RD.L3_HIT_E.SNOOP_NONE",
         "Counter": "0,1,2,3",
         "CounterHTOff": "0,1,2,3",
         "EventCode": "0xB7, 0xBB",
-        "EventName": "OCR.ALL_DATA_RD.L3_HIT_S.HITM_OTHER_CORE",
+        "EventName": "OCR.ALL_DATA_RD.L3_HIT_E.SNOOP_NONE",
         "MSRIndex": "0x1a6,0x1a7",
-        "MSRValue": "0x1000100491",
+        "MSRValue": "0x0080080491",
         "Offcore": "1",
         "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
         "SampleAfterValue": "100003",
         "UMask": "0x1"
     },
     {
-        "BriefDescription": "OCR.ALL_PF_RFO.L3_HIT_S.ANY_SNOOP  OCR.ALL_PF_RFO.L3_HIT_S.ANY_SNOOP",
+        "BriefDescription": "OCR.ALL_DATA_RD.L3_HIT_F.ANY_SNOOP  OCR.ALL_DATA_RD.L3_HIT_F.ANY_SNOOP",
         "Counter": "0,1,2,3",
         "CounterHTOff": "0,1,2,3",
         "EventCode": "0xB7, 0xBB",
-        "EventName": "OCR.ALL_PF_RFO.L3_HIT_S.ANY_SNOOP",
+        "EventName": "OCR.ALL_DATA_RD.L3_HIT_F.ANY_SNOOP",
         "MSRIndex": "0x1a6,0x1a7",
-        "MSRValue": "0x3F80100120",
+        "MSRValue": "0x3F80200491",
         "Offcore": "1",
         "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
         "SampleAfterValue": "100003",
         "UMask": "0x1"
     },
     {
-        "BriefDescription": "Counts demand data reads  OCR.DEMAND_DATA_RD.L3_HIT_S.HIT_OTHER_CORE_NO_FWD",
+        "BriefDescription": "OCR.ALL_DATA_RD.L3_HIT_F.HITM_OTHER_CORE  OCR.ALL_DATA_RD.L3_HIT_F.HITM_OTHER_CORE",
         "Counter": "0,1,2,3",
         "CounterHTOff": "0,1,2,3",
         "EventCode": "0xB7, 0xBB",
-        "EventName": "OCR.DEMAND_DATA_RD.L3_HIT_S.HIT_OTHER_CORE_NO_FWD",
+        "EventName": "OCR.ALL_DATA_RD.L3_HIT_F.HITM_OTHER_CORE",
         "MSRIndex": "0x1a6,0x1a7",
-        "MSRValue": "0x0400100001",
+        "MSRValue": "0x1000200491",
         "Offcore": "1",
         "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
         "SampleAfterValue": "100003",
         "UMask": "0x1"
     },
     {
-        "BriefDescription": "Counts demand data reads OCR.DEMAND_DATA_RD.L3_HIT.SNOOP_MISS",
+        "BriefDescription": "OCR.ALL_DATA_RD.L3_HIT_F.HIT_OTHER_CORE_FWD  OCR.ALL_DATA_RD.L3_HIT_F.HIT_OTHER_CORE_FWD",
         "Counter": "0,1,2,3",
         "CounterHTOff": "0,1,2,3",
         "EventCode": "0xB7, 0xBB",
-        "EventName": "OCR.DEMAND_DATA_RD.L3_HIT.SNOOP_MISS",
+        "EventName": "OCR.ALL_DATA_RD.L3_HIT_F.HIT_OTHER_CORE_FWD",
         "MSRIndex": "0x1a6,0x1a7",
-        "MSRValue": "0x02003C0001",
+        "MSRValue": "0x0800200491",
         "Offcore": "1",
         "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
         "SampleAfterValue": "100003",
         "UMask": "0x1"
     },
     {
-        "BriefDescription": "Counts any other requests  OCR.OTHER.L3_HIT_M.HITM_OTHER_CORE",
+        "BriefDescription": "OCR.ALL_DATA_RD.L3_HIT_F.HIT_OTHER_CORE_NO_FWD  OCR.ALL_DATA_RD.L3_HIT_F.HIT_OTHER_CORE_NO_FWD",
         "Counter": "0,1,2,3",
         "CounterHTOff": "0,1,2,3",
         "EventCode": "0xB7, 0xBB",
-        "EventName": "OCR.OTHER.L3_HIT_M.HITM_OTHER_CORE",
+        "EventName": "OCR.ALL_DATA_RD.L3_HIT_F.HIT_OTHER_CORE_NO_FWD",
         "MSRIndex": "0x1a6,0x1a7",
-        "MSRValue": "0x1000048000",
+        "MSRValue": "0x0400200491",
         "Offcore": "1",
         "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
         "SampleAfterValue": "100003",
         "UMask": "0x1"
     },
     {
-        "BriefDescription": "Counts all prefetch (that bring data to L2) RFOs  OCR.PF_L2_RFO.SUPPLIER_NONE.HIT_OTHER_CORE_NO_FWD",
+        "BriefDescription": "OCR.ALL_DATA_RD.L3_HIT_F.NO_SNOOP_NEEDED  OCR.ALL_DATA_RD.L3_HIT_F.NO_SNOOP_NEEDED",
         "Counter": "0,1,2,3",
         "CounterHTOff": "0,1,2,3",
         "EventCode": "0xB7, 0xBB",
-        "EventName": "OCR.PF_L2_RFO.SUPPLIER_NONE.HIT_OTHER_CORE_NO_FWD",
+        "EventName": "OCR.ALL_DATA_RD.L3_HIT_F.NO_SNOOP_NEEDED",
         "MSRIndex": "0x1a6,0x1a7",
-        "MSRValue": "0x0400020020",
+        "MSRValue": "0x0100200491",
         "Offcore": "1",
         "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
         "SampleAfterValue": "100003",
         "UMask": "0x1"
     },
     {
-        "BriefDescription": "Counts all demand data writes (RFOs)",
+        "BriefDescription": "OCR.ALL_DATA_RD.L3_HIT_F.SNOOP_MISS",
         "Counter": "0,1,2,3",
         "CounterHTOff": "0,1,2,3",
         "EventCode": "0xB7, 0xBB",
-        "EventName": "OCR.DEMAND_RFO.L3_HIT_S.SNOOP_NONE",
+        "EventName": "OCR.ALL_DATA_RD.L3_HIT_F.SNOOP_MISS",
         "MSRIndex": "0x1a6,0x1a7",
-        "MSRValue": "0x0080100002",
+        "MSRValue": "0x0200200491",
         "Offcore": "1",
         "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
         "SampleAfterValue": "100003",
         "UMask": "0x1"
     },
     {
-        "BriefDescription": "Counts all prefetch (that bring data to LLC only) data reads  OCR.PF_L3_DATA_RD.L3_HIT_E.HIT_OTHER_CORE_NO_FWD",
+        "BriefDescription": "OCR.ALL_DATA_RD.L3_HIT_F.SNOOP_NONE",
         "Counter": "0,1,2,3",
         "CounterHTOff": "0,1,2,3",
         "EventCode": "0xB7, 0xBB",
-        "EventName": "OCR.PF_L3_DATA_RD.L3_HIT_E.HIT_OTHER_CORE_NO_FWD",
+        "EventName": "OCR.ALL_DATA_RD.L3_HIT_F.SNOOP_NONE",
         "MSRIndex": "0x1a6,0x1a7",
-        "MSRValue": "0x0400080080",
+        "MSRValue": "0x0080200491",
         "Offcore": "1",
         "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
         "SampleAfterValue": "100003",
         "UMask": "0x1"
     },
     {
-        "BriefDescription": "Counts all prefetch (that bring data to LLC only) data reads  OCR.PF_L3_DATA_RD.L3_HIT_F.HIT_OTHER_CORE_NO_FWD",
+        "BriefDescription": "OCR.ALL_DATA_RD.L3_HIT_M.ANY_SNOOP  OCR.ALL_DATA_RD.L3_HIT_M.ANY_SNOOP",
         "Counter": "0,1,2,3",
         "CounterHTOff": "0,1,2,3",
         "EventCode": "0xB7, 0xBB",
-        "EventName": "OCR.PF_L3_DATA_RD.L3_HIT_F.HIT_OTHER_CORE_NO_FWD",
+        "EventName": "OCR.ALL_DATA_RD.L3_HIT_M.ANY_SNOOP",
         "MSRIndex": "0x1a6,0x1a7",
-        "MSRValue": "0x0400200080",
+        "MSRValue": "0x3F80040491",
         "Offcore": "1",
         "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
         "SampleAfterValue": "100003",
         "UMask": "0x1"
     },
     {
-        "BriefDescription": "OCR.ALL_READS.L3_HIT_M.NO_SNOOP_NEEDED  OCR.ALL_READS.L3_HIT_M.NO_SNOOP_NEEDED",
+        "BriefDescription": "OCR.ALL_DATA_RD.L3_HIT_M.HITM_OTHER_CORE  OCR.ALL_DATA_RD.L3_HIT_M.HITM_OTHER_CORE",
         "Counter": "0,1,2,3",
         "CounterHTOff": "0,1,2,3",
         "EventCode": "0xB7, 0xBB",
-        "EventName": "OCR.ALL_READS.L3_HIT_M.NO_SNOOP_NEEDED",
+        "EventName": "OCR.ALL_DATA_RD.L3_HIT_M.HITM_OTHER_CORE",
         "MSRIndex": "0x1a6,0x1a7",
-        "MSRValue": "0x01000407F7",
+        "MSRValue": "0x1000040491",
         "Offcore": "1",
         "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
         "SampleAfterValue": "100003",
         "UMask": "0x1"
     },
     {
-        "BriefDescription": "Counts any other requests  OCR.OTHER.L3_HIT_E.ANY_SNOOP",
+        "BriefDescription": "OCR.ALL_DATA_RD.L3_HIT_M.HIT_OTHER_CORE_FWD  OCR.ALL_DATA_RD.L3_HIT_M.HIT_OTHER_CORE_FWD",
         "Counter": "0,1,2,3",
         "CounterHTOff": "0,1,2,3",
         "EventCode": "0xB7, 0xBB",
-        "EventName": "OCR.OTHER.L3_HIT_E.ANY_SNOOP",
+        "EventName": "OCR.ALL_DATA_RD.L3_HIT_M.HIT_OTHER_CORE_FWD",
         "MSRIndex": "0x1a6,0x1a7",
-        "MSRValue": "0x3F80088000",
+        "MSRValue": "0x0800040491",
         "Offcore": "1",
         "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
         "SampleAfterValue": "100003",
         "UMask": "0x1"
     },
     {
-        "BriefDescription": "Counts all prefetch (that bring data to L2) RFOs OCR.PF_L2_RFO.L3_HIT.HIT_OTHER_CORE_FWD OCR.PF_L2_RFO.L3_HIT.HIT_OTHER_CORE_FWD",
+        "BriefDescription": "OCR.ALL_DATA_RD.L3_HIT_M.HIT_OTHER_CORE_NO_FWD  OCR.ALL_DATA_RD.L3_HIT_M.HIT_OTHER_CORE_NO_FWD",
         "Counter": "0,1,2,3",
         "CounterHTOff": "0,1,2,3",
         "EventCode": "0xB7, 0xBB",
-        "EventName": "OCR.PF_L2_RFO.L3_HIT.HIT_OTHER_CORE_FWD",
+        "EventName": "OCR.ALL_DATA_RD.L3_HIT_M.HIT_OTHER_CORE_NO_FWD",
         "MSRIndex": "0x1a6,0x1a7",
-        "MSRValue": "0x08003C0020",
+        "MSRValue": "0x0400040491",
         "Offcore": "1",
         "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
         "SampleAfterValue": "100003",
         "UMask": "0x1"
     },
     {
-        "BriefDescription": "OCR.ALL_PF_RFO.L3_HIT_E.HITM_OTHER_CORE  OCR.ALL_PF_RFO.L3_HIT_E.HITM_OTHER_CORE",
+        "BriefDescription": "OCR.ALL_DATA_RD.L3_HIT_M.NO_SNOOP_NEEDED  OCR.ALL_DATA_RD.L3_HIT_M.NO_SNOOP_NEEDED",
         "Counter": "0,1,2,3",
         "CounterHTOff": "0,1,2,3",
         "EventCode": "0xB7, 0xBB",
-        "EventName": "OCR.ALL_PF_RFO.L3_HIT_E.HITM_OTHER_CORE",
+        "EventName": "OCR.ALL_DATA_RD.L3_HIT_M.NO_SNOOP_NEEDED",
         "MSRIndex": "0x1a6,0x1a7",
-        "MSRValue": "0x1000080120",
+        "MSRValue": "0x0100040491",
         "Offcore": "1",
         "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
         "SampleAfterValue": "100003",
         "UMask": "0x1"
     },
     {
-        "BriefDescription": "OCR.ALL_PF_DATA_RD.L3_HIT.HITM_OTHER_CORE OCR.ALL_PF_DATA_RD.L3_HIT.HITM_OTHER_CORE OCR.ALL_PF_DATA_RD.L3_HIT.HITM_OTHER_CORE",
+        "BriefDescription": "OCR.ALL_DATA_RD.L3_HIT_M.SNOOP_MISS",
         "Counter": "0,1,2,3",
         "CounterHTOff": "0,1,2,3",
         "EventCode": "0xB7, 0xBB",
-        "EventName": "OCR.ALL_PF_DATA_RD.L3_HIT.HITM_OTHER_CORE",
+        "EventName": "OCR.ALL_DATA_RD.L3_HIT_M.SNOOP_MISS",
         "MSRIndex": "0x1a6,0x1a7",
-        "MSRValue": "0x10003C0490",
+        "MSRValue": "0x0200040491",
         "Offcore": "1",
         "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
         "SampleAfterValue": "100003",
         "UMask": "0x1"
     },
     {
-        "BriefDescription": "OCR.ALL_RFO.SUPPLIER_NONE.NO_SNOOP_NEEDED  OCR.ALL_RFO.SUPPLIER_NONE.NO_SNOOP_NEEDED",
+        "BriefDescription": "OCR.ALL_DATA_RD.L3_HIT_M.SNOOP_NONE",
         "Counter": "0,1,2,3",
         "CounterHTOff": "0,1,2,3",
         "EventCode": "0xB7, 0xBB",
-        "EventName": "OCR.ALL_RFO.SUPPLIER_NONE.NO_SNOOP_NEEDED",
+        "EventName": "OCR.ALL_DATA_RD.L3_HIT_M.SNOOP_NONE",
         "MSRIndex": "0x1a6,0x1a7",
-        "MSRValue": "0x0100020122",
+        "MSRValue": "0x0080040491",
         "Offcore": "1",
         "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
         "SampleAfterValue": "100003",
         "UMask": "0x1"
     },
     {
-        "BriefDescription": "OCR.ALL_DATA_RD.L3_HIT_F.HITM_OTHER_CORE  OCR.ALL_DATA_RD.L3_HIT_F.HITM_OTHER_CORE",
+        "BriefDescription": "OCR.ALL_DATA_RD.L3_HIT_S.ANY_SNOOP  OCR.ALL_DATA_RD.L3_HIT_S.ANY_SNOOP",
         "Counter": "0,1,2,3",
         "CounterHTOff": "0,1,2,3",
         "EventCode": "0xB7, 0xBB",
-        "EventName": "OCR.ALL_DATA_RD.L3_HIT_F.HITM_OTHER_CORE",
+        "EventName": "OCR.ALL_DATA_RD.L3_HIT_S.ANY_SNOOP",
         "MSRIndex": "0x1a6,0x1a7",
-        "MSRValue": "0x1000200491",
+        "MSRValue": "0x3F80100491",
         "Offcore": "1",
         "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
         "SampleAfterValue": "100003",
         "UMask": "0x1"
     },
     {
-        "BriefDescription": "Counts any other requests OCR.OTHER.PMM_HIT_LOCAL_PMM.ANY_SNOOP",
+        "BriefDescription": "OCR.ALL_DATA_RD.L3_HIT_S.HITM_OTHER_CORE  OCR.ALL_DATA_RD.L3_HIT_S.HITM_OTHER_CORE",
         "Counter": "0,1,2,3",
         "CounterHTOff": "0,1,2,3",
         "EventCode": "0xB7, 0xBB",
-        "EventName": "OCR.OTHER.PMM_HIT_LOCAL_PMM.ANY_SNOOP",
+        "EventName": "OCR.ALL_DATA_RD.L3_HIT_S.HITM_OTHER_CORE",
         "MSRIndex": "0x1a6,0x1a7",
-        "MSRValue": "0x3F80408000",
+        "MSRValue": "0x1000100491",
         "Offcore": "1",
         "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
         "SampleAfterValue": "100003",
         "UMask": "0x1"
     },
     {
-        "BriefDescription": "OCR.ALL_PF_DATA_RD.SUPPLIER_NONE.HITM_OTHER_CORE  OCR.ALL_PF_DATA_RD.SUPPLIER_NONE.HITM_OTHER_CORE",
+        "BriefDescription": "OCR.ALL_DATA_RD.L3_HIT_S.HIT_OTHER_CORE_FWD  OCR.ALL_DATA_RD.L3_HIT_S.HIT_OTHER_CORE_FWD",
         "Counter": "0,1,2,3",
         "CounterHTOff": "0,1,2,3",
         "EventCode": "0xB7, 0xBB",
-        "EventName": "OCR.ALL_PF_DATA_RD.SUPPLIER_NONE.HITM_OTHER_CORE",
+        "EventName": "OCR.ALL_DATA_RD.L3_HIT_S.HIT_OTHER_CORE_FWD",
         "MSRIndex": "0x1a6,0x1a7",
-        "MSRValue": "0x1000020490",
+        "MSRValue": "0x0800100491",
         "Offcore": "1",
         "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
         "SampleAfterValue": "100003",
         "UMask": "0x1"
     },
     {
-        "BriefDescription": "OCR.ALL_READS.PMM_HIT_LOCAL_PMM.ANY_SNOOP OCR.ALL_READS.PMM_HIT_LOCAL_PMM.ANY_SNOOP",
+        "BriefDescription": "OCR.ALL_DATA_RD.L3_HIT_S.HIT_OTHER_CORE_NO_FWD  OCR.ALL_DATA_RD.L3_HIT_S.HIT_OTHER_CORE_NO_FWD",
         "Counter": "0,1,2,3",
         "CounterHTOff": "0,1,2,3",
         "EventCode": "0xB7, 0xBB",
-        "EventName": "OCR.ALL_READS.PMM_HIT_LOCAL_PMM.ANY_SNOOP",
+        "EventName": "OCR.ALL_DATA_RD.L3_HIT_S.HIT_OTHER_CORE_NO_FWD",
         "MSRIndex": "0x1a6,0x1a7",
-        "MSRValue": "0x3F804007F7",
+        "MSRValue": "0x0400100491",
         "Offcore": "1",
         "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
         "SampleAfterValue": "100003",
         "UMask": "0x1"
     },
     {
-        "BriefDescription": "Counts number of cache lines that are dropped and not written back to L3 as they are deemed to be less likely to be reused shortly",
-        "Counter": "0,1,2,3",
-        "CounterHTOff": "0,1,2,3,4,5,6,7",
-        "EventCode": "0xFE",
-        "EventName": "IDI_MISC.WB_DOWNGRADE",
-        "PublicDescription": "Counts number of cache lines that are dropped and not written back to L3 as they are deemed to be less likely to be reused shortly.",
-        "SampleAfterValue": "100003",
-        "UMask": "0x4"
-    },
-    {
-        "BriefDescription": "Counts all demand data writes (RFOs)",
+        "BriefDescription": "OCR.ALL_DATA_RD.L3_HIT_S.NO_SNOOP_NEEDED  OCR.ALL_DATA_RD.L3_HIT_S.NO_SNOOP_NEEDED",
         "Counter": "0,1,2,3",
         "CounterHTOff": "0,1,2,3",
         "EventCode": "0xB7, 0xBB",
-        "EventName": "OCR.DEMAND_RFO.L3_HIT_E.SNOOP_NONE",
+        "EventName": "OCR.ALL_DATA_RD.L3_HIT_S.NO_SNOOP_NEEDED",
         "MSRIndex": "0x1a6,0x1a7",
-        "MSRValue": "0x0080080002",
+        "MSRValue": "0x0100100491",
         "Offcore": "1",
         "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
         "SampleAfterValue": "100003",
         "UMask": "0x1"
     },
     {
-        "BriefDescription": "Counts all demand code reads OCR.DEMAND_CODE_RD.L3_HIT.HITM_OTHER_CORE OCR.DEMAND_CODE_RD.L3_HIT.HITM_OTHER_CORE",
+        "BriefDescription": "OCR.ALL_DATA_RD.L3_HIT_S.SNOOP_MISS",
         "Counter": "0,1,2,3",
         "CounterHTOff": "0,1,2,3",
         "EventCode": "0xB7, 0xBB",
-        "EventName": "OCR.DEMAND_CODE_RD.L3_HIT.HITM_OTHER_CORE",
+        "EventName": "OCR.ALL_DATA_RD.L3_HIT_S.SNOOP_MISS",
         "MSRIndex": "0x1a6,0x1a7",
-        "MSRValue": "0x10003C0004",
+        "MSRValue": "0x0200100491",
         "Offcore": "1",
         "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
         "SampleAfterValue": "100003",
         "UMask": "0x1"
     },
     {
-        "BriefDescription": "OCR.ALL_PF_RFO.L3_HIT_E.HIT_OTHER_CORE_NO_FWD  OCR.ALL_PF_RFO.L3_HIT_E.HIT_OTHER_CORE_NO_FWD",
+        "BriefDescription": "OCR.ALL_DATA_RD.L3_HIT_S.SNOOP_NONE",
         "Counter": "0,1,2,3",
         "CounterHTOff": "0,1,2,3",
         "EventCode": "0xB7, 0xBB",
-        "EventName": "OCR.ALL_PF_RFO.L3_HIT_E.HIT_OTHER_CORE_NO_FWD",
+        "EventName": "OCR.ALL_DATA_RD.L3_HIT_S.SNOOP_NONE",
         "MSRIndex": "0x1a6,0x1a7",
-        "MSRValue": "0x0400080120",
+        "MSRValue": "0x0080100491",
         "Offcore": "1",
         "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
         "SampleAfterValue": "100003",
         "UMask": "0x1"
     },
     {
-        "BriefDescription": "Counts demand data reads",
+        "BriefDescription": "OCR.ALL_DATA_RD.PMM_HIT_LOCAL_PMM.ANY_SNOOP OCR.ALL_DATA_RD.PMM_HIT_LOCAL_PMM.ANY_SNOOP",
         "Counter": "0,1,2,3",
         "CounterHTOff": "0,1,2,3",
         "EventCode": "0xB7, 0xBB",
-        "EventName": "OCR.DEMAND_DATA_RD.L3_HIT_F.SNOOP_MISS",
+        "EventName": "OCR.ALL_DATA_RD.PMM_HIT_LOCAL_PMM.ANY_SNOOP",
         "MSRIndex": "0x1a6,0x1a7",
-        "MSRValue": "0x0200200001",
+        "MSRValue": "0x3F80400491",
         "Offcore": "1",
         "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
         "SampleAfterValue": "100003",
         "UMask": "0x1"
     },
     {
-        "BriefDescription": "Counts all demand code reads  OCR.DEMAND_CODE_RD.L3_HIT_M.ANY_SNOOP",
+        "BriefDescription": "OCR.ALL_DATA_RD.PMM_HIT_LOCAL_PMM.SNOOP_NONE OCR.ALL_DATA_RD.PMM_HIT_LOCAL_PMM.SNOOP_NONE",
         "Counter": "0,1,2,3",
         "CounterHTOff": "0,1,2,3",
         "EventCode": "0xB7, 0xBB",
-        "EventName": "OCR.DEMAND_CODE_RD.L3_HIT_M.ANY_SNOOP",
+        "EventName": "OCR.ALL_DATA_RD.PMM_HIT_LOCAL_PMM.SNOOP_NONE",
         "MSRIndex": "0x1a6,0x1a7",
-        "MSRValue": "0x3F80040004",
+        "MSRValue": "0x0080400491",
         "Offcore": "1",
         "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
         "SampleAfterValue": "100003",
         "UMask": "0x1"
     },
     {
-        "BriefDescription": "Counts any other requests  OCR.OTHER.L3_HIT_S.ANY_SNOOP",
+        "BriefDescription": "OCR.ALL_DATA_RD.PMM_HIT_LOCAL_PMM.SNOOP_NOT_NEEDED OCR.ALL_DATA_RD.PMM_HIT_LOCAL_PMM.SNOOP_NOT_NEEDED",
         "Counter": "0,1,2,3",
         "CounterHTOff": "0,1,2,3",
         "EventCode": "0xB7, 0xBB",
-        "EventName": "OCR.OTHER.L3_HIT_S.ANY_SNOOP",
+        "EventName": "OCR.ALL_DATA_RD.PMM_HIT_LOCAL_PMM.SNOOP_NOT_NEEDED",
         "MSRIndex": "0x1a6,0x1a7",
-        "MSRValue": "0x3F80108000",
+        "MSRValue": "0x0100400491",
         "Offcore": "1",
         "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
         "SampleAfterValue": "100003",
         "UMask": "0x1"
     },
     {
-        "BriefDescription": "Counts any other requests OCR.OTHER.L3_HIT.HIT_OTHER_CORE_NO_FWD OCR.OTHER.L3_HIT.HIT_OTHER_CORE_NO_FWD",
+        "BriefDescription": "OCR.ALL_DATA_RD.SUPPLIER_NONE.ANY_SNOOP  OCR.ALL_DATA_RD.SUPPLIER_NONE.ANY_SNOOP",
         "Counter": "0,1,2,3",
         "CounterHTOff": "0,1,2,3",
         "EventCode": "0xB7, 0xBB",
-        "EventName": "OCR.OTHER.L3_HIT.HIT_OTHER_CORE_NO_FWD",
+        "EventName": "OCR.ALL_DATA_RD.SUPPLIER_NONE.ANY_SNOOP",
         "MSRIndex": "0x1a6,0x1a7",
-        "MSRValue": "0x04003C8000",
+        "MSRValue": "0x3F80020491",
         "Offcore": "1",
         "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
         "SampleAfterValue": "100003",
         "UMask": "0x1"
     },
     {
-        "BriefDescription": "OCR.ALL_DATA_RD.PMM_HIT_LOCAL_PMM.ANY_SNOOP OCR.ALL_DATA_RD.PMM_HIT_LOCAL_PMM.ANY_SNOOP",
+        "BriefDescription": "OCR.ALL_DATA_RD.SUPPLIER_NONE.HITM_OTHER_CORE  OCR.ALL_DATA_RD.SUPPLIER_NONE.HITM_OTHER_CORE",
         "Counter": "0,1,2,3",
         "CounterHTOff": "0,1,2,3",
         "EventCode": "0xB7, 0xBB",
-        "EventName": "OCR.ALL_DATA_RD.PMM_HIT_LOCAL_PMM.ANY_SNOOP",
+        "EventName": "OCR.ALL_DATA_RD.SUPPLIER_NONE.HITM_OTHER_CORE",
         "MSRIndex": "0x1a6,0x1a7",
-        "MSRValue": "0x3F80400491",
+        "MSRValue": "0x1000020491",
         "Offcore": "1",
         "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
         "SampleAfterValue": "100003",
         "UMask": "0x1"
     },
     {
-        "BriefDescription": "Counts prefetch (that bring data to L2) data reads",
+        "BriefDescription": "OCR.ALL_DATA_RD.SUPPLIER_NONE.HIT_OTHER_CORE_FWD  OCR.ALL_DATA_RD.SUPPLIER_NONE.HIT_OTHER_CORE_FWD",
         "Counter": "0,1,2,3",
         "CounterHTOff": "0,1,2,3",
         "EventCode": "0xB7, 0xBB",
-        "EventName": "OCR.PF_L2_DATA_RD.L3_HIT.SNOOP_HIT_WITH_FWD",
+        "EventName": "OCR.ALL_DATA_RD.SUPPLIER_NONE.HIT_OTHER_CORE_FWD",
         "MSRIndex": "0x1a6,0x1a7",
-        "MSRValue": "0x08007C0010",
+        "MSRValue": "0x0800020491",
         "Offcore": "1",
         "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
         "SampleAfterValue": "100003",
         "UMask": "0x1"
     },
     {
-        "BriefDescription": "Counts any other requests  OCR.OTHER.L3_HIT_S.NO_SNOOP_NEEDED",
+        "BriefDescription": "OCR.ALL_DATA_RD.SUPPLIER_NONE.HIT_OTHER_CORE_NO_FWD  OCR.ALL_DATA_RD.SUPPLIER_NONE.HIT_OTHER_CORE_NO_FWD",
         "Counter": "0,1,2,3",
         "CounterHTOff": "0,1,2,3",
         "EventCode": "0xB7, 0xBB",
-        "EventName": "OCR.OTHER.L3_HIT_S.NO_SNOOP_NEEDED",
+        "EventName": "OCR.ALL_DATA_RD.SUPPLIER_NONE.HIT_OTHER_CORE_NO_FWD",
         "MSRIndex": "0x1a6,0x1a7",
-        "MSRValue": "0x0100108000",
+        "MSRValue": "0x0400020491",
         "Offcore": "1",
         "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
         "SampleAfterValue": "100003",
         "UMask": "0x1"
     },
     {
-        "BriefDescription": "OCR.ALL_READS.L3_HIT_F.SNOOP_MISS",
+        "BriefDescription": "OCR.ALL_DATA_RD.SUPPLIER_NONE.NO_SNOOP_NEEDED  OCR.ALL_DATA_RD.SUPPLIER_NONE.NO_SNOOP_NEEDED",
         "Counter": "0,1,2,3",
         "CounterHTOff": "0,1,2,3",
         "EventCode": "0xB7, 0xBB",
-        "EventName": "OCR.ALL_READS.L3_HIT_F.SNOOP_MISS",
+        "EventName": "OCR.ALL_DATA_RD.SUPPLIER_NONE.NO_SNOOP_NEEDED",
         "MSRIndex": "0x1a6,0x1a7",
-        "MSRValue": "0x02002007F7",
+        "MSRValue": "0x0100020491",
         "Offcore": "1",
         "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
         "SampleAfterValue": "100003",
         "UMask": "0x1"
     },
     {
-        "BriefDescription": "Counts L1 data cache hardware prefetch requests and software prefetch requests",
+        "BriefDescription": "OCR.ALL_DATA_RD.SUPPLIER_NONE.SNOOP_MISS",
         "Counter": "0,1,2,3",
         "CounterHTOff": "0,1,2,3",
         "EventCode": "0xB7, 0xBB",
-        "EventName": "OCR.PF_L1D_AND_SW.L3_HIT.SNOOP_HIT_WITH_FWD",
+        "EventName": "OCR.ALL_DATA_RD.SUPPLIER_NONE.SNOOP_MISS",
         "MSRIndex": "0x1a6,0x1a7",
-        "MSRValue": "0x08007C0400",
+        "MSRValue": "0x0200020491",
         "Offcore": "1",
         "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
         "SampleAfterValue": "100003",
         "UMask": "0x1"
     },
     {
-        "BriefDescription": "OCR.ALL_PF_DATA_RD.L3_HIT_M.HIT_OTHER_CORE_NO_FWD  OCR.ALL_PF_DATA_RD.L3_HIT_M.HIT_OTHER_CORE_NO_FWD",
+        "BriefDescription": "OCR.ALL_DATA_RD.SUPPLIER_NONE.SNOOP_NONE",
         "Counter": "0,1,2,3",
         "CounterHTOff": "0,1,2,3",
         "EventCode": "0xB7, 0xBB",
-        "EventName": "OCR.ALL_PF_DATA_RD.L3_HIT_M.HIT_OTHER_CORE_NO_FWD",
+        "EventName": "OCR.ALL_DATA_RD.SUPPLIER_NONE.SNOOP_NONE",
         "MSRIndex": "0x1a6,0x1a7",
-        "MSRValue": "0x0400040490",
+        "MSRValue": "0x0080020491",
         "Offcore": "1",
         "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
         "SampleAfterValue": "100003",
         "UMask": "0x1"
     },
     {
-        "BriefDescription": "Counts all prefetch (that bring data to L2) RFOs  OCR.PF_L2_RFO.L3_HIT_E.HITM_OTHER_CORE",
+        "BriefDescription": "OCR.ALL_PF_DATA_RD.ANY_RESPONSE have any response type.",
         "Counter": "0,1,2,3",
         "CounterHTOff": "0,1,2,3",
         "EventCode": "0xB7, 0xBB",
-        "EventName": "OCR.PF_L2_RFO.L3_HIT_E.HITM_OTHER_CORE",
+        "EventName": "OCR.ALL_PF_DATA_RD.ANY_RESPONSE",
         "MSRIndex": "0x1a6,0x1a7",
-        "MSRValue": "0x1000080020",
+        "MSRValue": "0x0000010490",
         "Offcore": "1",
         "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
         "SampleAfterValue": "100003",
         "UMask": "0x1"
     },
     {
-        "BriefDescription": "Counts all demand code reads",
+        "BriefDescription": "OCR.ALL_PF_DATA_RD.L3_HIT.ANY_SNOOP OCR.ALL_PF_DATA_RD.L3_HIT.ANY_SNOOP OCR.ALL_PF_DATA_RD.L3_HIT.ANY_SNOOP",
         "Counter": "0,1,2,3",
         "CounterHTOff": "0,1,2,3",
         "EventCode": "0xB7, 0xBB",
-        "EventName": "OCR.DEMAND_CODE_RD.SUPPLIER_NONE.SNOOP_NONE",
+        "EventName": "OCR.ALL_PF_DATA_RD.L3_HIT.ANY_SNOOP",
         "MSRIndex": "0x1a6,0x1a7",
-        "MSRValue": "0x0080020004",
+        "MSRValue": "0x3F803C0490",
         "Offcore": "1",
         "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
         "SampleAfterValue": "100003",
         "UMask": "0x1"
     },
     {
-        "BriefDescription": "OCR.ALL_PF_RFO.L3_HIT_E.HIT_OTHER_CORE_FWD  OCR.ALL_PF_RFO.L3_HIT_E.HIT_OTHER_CORE_FWD",
+        "BriefDescription": "OCR.ALL_PF_DATA_RD.L3_HIT.HITM_OTHER_CORE OCR.ALL_PF_DATA_RD.L3_HIT.HITM_OTHER_CORE OCR.ALL_PF_DATA_RD.L3_HIT.HITM_OTHER_CORE",
         "Counter": "0,1,2,3",
         "CounterHTOff": "0,1,2,3",
         "EventCode": "0xB7, 0xBB",
-        "EventName": "OCR.ALL_PF_RFO.L3_HIT_E.HIT_OTHER_CORE_FWD",
+        "EventName": "OCR.ALL_PF_DATA_RD.L3_HIT.HITM_OTHER_CORE",
         "MSRIndex": "0x1a6,0x1a7",
-        "MSRValue": "0x0800080120",
+        "MSRValue": "0x10003C0490",
         "Offcore": "1",
         "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
         "SampleAfterValue": "100003",
         "UMask": "0x1"
     },
     {
-        "BriefDescription": "OCR.ALL_RFO.L3_HIT_S.HIT_OTHER_CORE_NO_FWD  OCR.ALL_RFO.L3_HIT_S.HIT_OTHER_CORE_NO_FWD",
+        "BriefDescription": "OCR.ALL_PF_DATA_RD.L3_HIT.HIT_OTHER_CORE_FWD OCR.ALL_PF_DATA_RD.L3_HIT.HIT_OTHER_CORE_FWD OCR.ALL_PF_DATA_RD.L3_HIT.HIT_OTHER_CORE_FWD",
         "Counter": "0,1,2,3",
         "CounterHTOff": "0,1,2,3",
         "EventCode": "0xB7, 0xBB",
-        "EventName": "OCR.ALL_RFO.L3_HIT_S.HIT_OTHER_CORE_NO_FWD",
+        "EventName": "OCR.ALL_PF_DATA_RD.L3_HIT.HIT_OTHER_CORE_FWD",
         "MSRIndex": "0x1a6,0x1a7",
-        "MSRValue": "0x0400100122",
+        "MSRValue": "0x08003C0490",
         "Offcore": "1",
         "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
         "SampleAfterValue": "100003",
         "UMask": "0x1"
     },
     {
-        "BriefDescription": "Counts all prefetch (that bring data to LLC only) RFOs",
+        "BriefDescription": "OCR.ALL_PF_DATA_RD.L3_HIT.HIT_OTHER_CORE_NO_FWD OCR.ALL_PF_DATA_RD.L3_HIT.HIT_OTHER_CORE_NO_FWD OCR.ALL_PF_DATA_RD.L3_HIT.HIT_OTHER_CORE_NO_FWD",
         "Counter": "0,1,2,3",
         "CounterHTOff": "0,1,2,3",
         "EventCode": "0xB7, 0xBB",
-        "EventName": "OCR.PF_L3_RFO.L3_HIT.SNOOP_HIT_WITH_FWD",
+        "EventName": "OCR.ALL_PF_DATA_RD.L3_HIT.HIT_OTHER_CORE_NO_FWD",
         "MSRIndex": "0x1a6,0x1a7",
-        "MSRValue": "0x08007C0100",
+        "MSRValue": "0x04003C0490",
         "Offcore": "1",
         "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
         "SampleAfterValue": "100003",
         "UMask": "0x1"
     },
     {
-        "BriefDescription": "OCR.ALL_PF_DATA_RD.L3_HIT_S.HIT_OTHER_CORE_FWD  OCR.ALL_PF_DATA_RD.L3_HIT_S.HIT_OTHER_CORE_FWD",
+        "BriefDescription": "OCR.ALL_PF_DATA_RD.L3_HIT.NO_SNOOP_NEEDED OCR.ALL_PF_DATA_RD.L3_HIT.NO_SNOOP_NEEDED OCR.ALL_PF_DATA_RD.L3_HIT.NO_SNOOP_NEEDED",
         "Counter": "0,1,2,3",
         "CounterHTOff": "0,1,2,3",
         "EventCode": "0xB7, 0xBB",
-        "EventName": "OCR.ALL_PF_DATA_RD.L3_HIT_S.HIT_OTHER_CORE_FWD",
+        "EventName": "OCR.ALL_PF_DATA_RD.L3_HIT.NO_SNOOP_NEEDED",
         "MSRIndex": "0x1a6,0x1a7",
-        "MSRValue": "0x0800100490",
+        "MSRValue": "0x01003C0490",
         "Offcore": "1",
         "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
         "SampleAfterValue": "100003",
         "UMask": "0x1"
     },
     {
-        "BriefDescription": "Counts any other requests OCR.OTHER.L3_HIT.SNOOP_MISS",
+        "BriefDescription": "OCR.ALL_PF_DATA_RD.L3_HIT.SNOOP_HIT_WITH_FWD",
         "Counter": "0,1,2,3",
         "CounterHTOff": "0,1,2,3",
         "EventCode": "0xB7, 0xBB",
-        "EventName": "OCR.OTHER.L3_HIT.SNOOP_MISS",
+        "EventName": "OCR.ALL_PF_DATA_RD.L3_HIT.SNOOP_HIT_WITH_FWD",
         "MSRIndex": "0x1a6,0x1a7",
-        "MSRValue": "0x02003C8000",
+        "MSRValue": "0x08007C0490",
         "Offcore": "1",
         "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
         "SampleAfterValue": "100003",
         "UMask": "0x1"
     },
     {
-        "BriefDescription": "Counts L1 data cache hardware prefetch requests and software prefetch requests",
+        "BriefDescription": "OCR.ALL_PF_DATA_RD.L3_HIT.SNOOP_MISS OCR.ALL_PF_DATA_RD.L3_HIT.SNOOP_MISS",
         "Counter": "0,1,2,3",
         "CounterHTOff": "0,1,2,3",
         "EventCode": "0xB7, 0xBB",
-        "EventName": "OCR.PF_L1D_AND_SW.L3_HIT_E.SNOOP_MISS",
+        "EventName": "OCR.ALL_PF_DATA_RD.L3_HIT.SNOOP_MISS",
         "MSRIndex": "0x1a6,0x1a7",
-        "MSRValue": "0x0200080400",
+        "MSRValue": "0x02003C0490",
         "Offcore": "1",
         "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
         "SampleAfterValue": "100003",
         "UMask": "0x1"
     },
     {
-        "BriefDescription": "Counts demand data reads  OCR.DEMAND_DATA_RD.L3_HIT_S.HIT_OTHER_CORE_FWD",
+        "BriefDescription": "OCR.ALL_PF_DATA_RD.L3_HIT.SNOOP_NONE OCR.ALL_PF_DATA_RD.L3_HIT.SNOOP_NONE",
         "Counter": "0,1,2,3",
         "CounterHTOff": "0,1,2,3",
         "EventCode": "0xB7, 0xBB",
-        "EventName": "OCR.DEMAND_DATA_RD.L3_HIT_S.HIT_OTHER_CORE_FWD",
+        "EventName": "OCR.ALL_PF_DATA_RD.L3_HIT.SNOOP_NONE",
         "MSRIndex": "0x1a6,0x1a7",
-        "MSRValue": "0x0800100001",
+        "MSRValue": "0x00803C0490",
         "Offcore": "1",
         "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
         "SampleAfterValue": "100003",
         "UMask": "0x1"
     },
     {
-        "BriefDescription": "OCR.ALL_RFO.L3_HIT.SNOOP_MISS OCR.ALL_RFO.L3_HIT.SNOOP_MISS",
+        "BriefDescription": "OCR.ALL_PF_DATA_RD.L3_HIT_E.ANY_SNOOP  OCR.ALL_PF_DATA_RD.L3_HIT_E.ANY_SNOOP",
         "Counter": "0,1,2,3",
         "CounterHTOff": "0,1,2,3",
         "EventCode": "0xB7, 0xBB",
-        "EventName": "OCR.ALL_RFO.L3_HIT.SNOOP_MISS",
+        "EventName": "OCR.ALL_PF_DATA_RD.L3_HIT_E.ANY_SNOOP",
         "MSRIndex": "0x1a6,0x1a7",
-        "MSRValue": "0x02003C0122",
+        "MSRValue": "0x3F80080490",
         "Offcore": "1",
         "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
         "SampleAfterValue": "100003",
         "UMask": "0x1"
     },
     {
-        "BriefDescription": "Counts all demand data writes (RFOs)  OCR.DEMAND_RFO.L3_HIT_M.HITM_OTHER_CORE",
+        "BriefDescription": "OCR.ALL_PF_DATA_RD.L3_HIT_E.HITM_OTHER_CORE  OCR.ALL_PF_DATA_RD.L3_HIT_E.HITM_OTHER_CORE",
         "Counter": "0,1,2,3",
         "CounterHTOff": "0,1,2,3",
         "EventCode": "0xB7, 0xBB",
-        "EventName": "OCR.DEMAND_RFO.L3_HIT_M.HITM_OTHER_CORE",
+        "EventName": "OCR.ALL_PF_DATA_RD.L3_HIT_E.HITM_OTHER_CORE",
         "MSRIndex": "0x1a6,0x1a7",
-        "MSRValue": "0x1000040002",
+        "MSRValue": "0x1000080490",
         "Offcore": "1",
         "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
         "SampleAfterValue": "100003",
         "UMask": "0x1"
     },
     {
-        "BriefDescription": "Counts all prefetch (that bring data to LLC only) data reads  OCR.PF_L3_DATA_RD.L3_HIT_S.ANY_SNOOP",
+        "BriefDescription": "OCR.ALL_PF_DATA_RD.L3_HIT_E.HIT_OTHER_CORE_FWD  OCR.ALL_PF_DATA_RD.L3_HIT_E.HIT_OTHER_CORE_FWD",
         "Counter": "0,1,2,3",
         "CounterHTOff": "0,1,2,3",
         "EventCode": "0xB7, 0xBB",
-        "EventName": "OCR.PF_L3_DATA_RD.L3_HIT_S.ANY_SNOOP",
+        "EventName": "OCR.ALL_PF_DATA_RD.L3_HIT_E.HIT_OTHER_CORE_FWD",
         "MSRIndex": "0x1a6,0x1a7",
-        "MSRValue": "0x3F80100080",
+        "MSRValue": "0x0800080490",
         "Offcore": "1",
         "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
         "SampleAfterValue": "100003",
         "UMask": "0x1"
     },
     {
-        "BriefDescription": "OCR.ALL_RFO.L3_HIT_F.HITM_OTHER_CORE  OCR.ALL_RFO.L3_HIT_F.HITM_OTHER_CORE",
+        "BriefDescription": "OCR.ALL_PF_DATA_RD.L3_HIT_E.HIT_OTHER_CORE_NO_FWD  OCR.ALL_PF_DATA_RD.L3_HIT_E.HIT_OTHER_CORE_NO_FWD",
         "Counter": "0,1,2,3",
         "CounterHTOff": "0,1,2,3",
         "EventCode": "0xB7, 0xBB",
-        "EventName": "OCR.ALL_RFO.L3_HIT_F.HITM_OTHER_CORE",
+        "EventName": "OCR.ALL_PF_DATA_RD.L3_HIT_E.HIT_OTHER_CORE_NO_FWD",
         "MSRIndex": "0x1a6,0x1a7",
-        "MSRValue": "0x1000200122",
+        "MSRValue": "0x0400080490",
         "Offcore": "1",
         "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
         "SampleAfterValue": "100003",
         "UMask": "0x1"
     },
     {
-        "BriefDescription": "Counts any other requests  OCR.OTHER.L3_HIT_E.HIT_OTHER_CORE_FWD",
+        "BriefDescription": "OCR.ALL_PF_DATA_RD.L3_HIT_E.NO_SNOOP_NEEDED  OCR.ALL_PF_DATA_RD.L3_HIT_E.NO_SNOOP_NEEDED",
         "Counter": "0,1,2,3",
         "CounterHTOff": "0,1,2,3",
         "EventCode": "0xB7, 0xBB",
-        "EventName": "OCR.OTHER.L3_HIT_E.HIT_OTHER_CORE_FWD",
+        "EventName": "OCR.ALL_PF_DATA_RD.L3_HIT_E.NO_SNOOP_NEEDED",
         "MSRIndex": "0x1a6,0x1a7",
-        "MSRValue": "0x0800088000",
+        "MSRValue": "0x0100080490",
         "Offcore": "1",
         "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
         "SampleAfterValue": "100003",
         "UMask": "0x1"
     },
     {
-        "BriefDescription": "OCR.ALL_DATA_RD.SUPPLIER_NONE.SNOOP_MISS",
+        "BriefDescription": "OCR.ALL_PF_DATA_RD.L3_HIT_E.SNOOP_MISS",
         "Counter": "0,1,2,3",
         "CounterHTOff": "0,1,2,3",
         "EventCode": "0xB7, 0xBB",
-        "EventName": "OCR.ALL_DATA_RD.SUPPLIER_NONE.SNOOP_MISS",
+        "EventName": "OCR.ALL_PF_DATA_RD.L3_HIT_E.SNOOP_MISS",
         "MSRIndex": "0x1a6,0x1a7",
-        "MSRValue": "0x0200020491",
+        "MSRValue": "0x0200080490",
         "Offcore": "1",
         "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
         "SampleAfterValue": "100003",
         "UMask": "0x1"
     },
     {
-        "BriefDescription": "Counts prefetch (that bring data to L2) data reads  OCR.PF_L2_DATA_RD.L3_HIT_E.HIT_OTHER_CORE_NO_FWD",
+        "BriefDescription": "OCR.ALL_PF_DATA_RD.L3_HIT_E.SNOOP_NONE",
         "Counter": "0,1,2,3",
         "CounterHTOff": "0,1,2,3",
         "EventCode": "0xB7, 0xBB",
-        "EventName": "OCR.PF_L2_DATA_RD.L3_HIT_E.HIT_OTHER_CORE_NO_FWD",
+        "EventName": "OCR.ALL_PF_DATA_RD.L3_HIT_E.SNOOP_NONE",
         "MSRIndex": "0x1a6,0x1a7",
-        "MSRValue": "0x0400080010",
+        "MSRValue": "0x0080080490",
         "Offcore": "1",
         "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
         "SampleAfterValue": "100003",
         "UMask": "0x1"
     },
     {
-        "BriefDescription": "Counts L1 data cache hardware prefetch requests and software prefetch requests OCR.PF_L1D_AND_SW.L3_HIT.SNOOP_NONE",
+        "BriefDescription": "OCR.ALL_PF_DATA_RD.L3_HIT_F.ANY_SNOOP  OCR.ALL_PF_DATA_RD.L3_HIT_F.ANY_SNOOP",
         "Counter": "0,1,2,3",
         "CounterHTOff": "0,1,2,3",
         "EventCode": "0xB7, 0xBB",
-        "EventName": "OCR.PF_L1D_AND_SW.L3_HIT.SNOOP_NONE",
+        "EventName": "OCR.ALL_PF_DATA_RD.L3_HIT_F.ANY_SNOOP",
         "MSRIndex": "0x1a6,0x1a7",
-        "MSRValue": "0x00803C0400",
+        "MSRValue": "0x3F80200490",
         "Offcore": "1",
         "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
         "SampleAfterValue": "100003",
         "UMask": "0x1"
     },
     {
-        "BriefDescription": "Counts all demand data writes (RFOs)  OCR.DEMAND_RFO.L3_HIT_E.ANY_SNOOP",
+        "BriefDescription": "OCR.ALL_PF_DATA_RD.L3_HIT_F.HITM_OTHER_CORE  OCR.ALL_PF_DATA_RD.L3_HIT_F.HITM_OTHER_CORE",
         "Counter": "0,1,2,3",
         "CounterHTOff": "0,1,2,3",
         "EventCode": "0xB7, 0xBB",
-        "EventName": "OCR.DEMAND_RFO.L3_HIT_E.ANY_SNOOP",
+        "EventName": "OCR.ALL_PF_DATA_RD.L3_HIT_F.HITM_OTHER_CORE",
         "MSRIndex": "0x1a6,0x1a7",
-        "MSRValue": "0x3F80080002",
+        "MSRValue": "0x1000200490",
         "Offcore": "1",
         "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
         "SampleAfterValue": "100003",
         "UMask": "0x1"
     },
     {
-        "BriefDescription": "OCR.ALL_RFO.L3_HIT_M.SNOOP_MISS",
+        "BriefDescription": "OCR.ALL_PF_DATA_RD.L3_HIT_F.HIT_OTHER_CORE_FWD  OCR.ALL_PF_DATA_RD.L3_HIT_F.HIT_OTHER_CORE_FWD",
         "Counter": "0,1,2,3",
         "CounterHTOff": "0,1,2,3",
         "EventCode": "0xB7, 0xBB",
-        "EventName": "OCR.ALL_RFO.L3_HIT_M.SNOOP_MISS",
+        "EventName": "OCR.ALL_PF_DATA_RD.L3_HIT_F.HIT_OTHER_CORE_FWD",
         "MSRIndex": "0x1a6,0x1a7",
-        "MSRValue": "0x0200040122",
+        "MSRValue": "0x0800200490",
         "Offcore": "1",
         "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
         "SampleAfterValue": "100003",
         "UMask": "0x1"
     },
     {
-        "BriefDescription": "OCR.ALL_RFO.L3_HIT.HITM_OTHER_CORE OCR.ALL_RFO.L3_HIT.HITM_OTHER_CORE OCR.ALL_RFO.L3_HIT.HITM_OTHER_CORE",
+        "BriefDescription": "OCR.ALL_PF_DATA_RD.L3_HIT_F.HIT_OTHER_CORE_NO_FWD  OCR.ALL_PF_DATA_RD.L3_HIT_F.HIT_OTHER_CORE_NO_FWD",
         "Counter": "0,1,2,3",
         "CounterHTOff": "0,1,2,3",
         "EventCode": "0xB7, 0xBB",
-        "EventName": "OCR.ALL_RFO.L3_HIT.HITM_OTHER_CORE",
+        "EventName": "OCR.ALL_PF_DATA_RD.L3_HIT_F.HIT_OTHER_CORE_NO_FWD",
         "MSRIndex": "0x1a6,0x1a7",
-        "MSRValue": "0x10003C0122",
+        "MSRValue": "0x0400200490",
         "Offcore": "1",
         "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
         "SampleAfterValue": "100003",
         "UMask": "0x1"
     },
     {
-        "BriefDescription": "Counts all prefetch (that bring data to LLC only) data reads OCR.PF_L3_DATA_RD.L3_HIT.SNOOP_NONE",
+        "BriefDescription": "OCR.ALL_PF_DATA_RD.L3_HIT_F.NO_SNOOP_NEEDED  OCR.ALL_PF_DATA_RD.L3_HIT_F.NO_SNOOP_NEEDED",
         "Counter": "0,1,2,3",
         "CounterHTOff": "0,1,2,3",
         "EventCode": "0xB7, 0xBB",
-        "EventName": "OCR.PF_L3_DATA_RD.L3_HIT.SNOOP_NONE",
+        "EventName": "OCR.ALL_PF_DATA_RD.L3_HIT_F.NO_SNOOP_NEEDED",
         "MSRIndex": "0x1a6,0x1a7",
-        "MSRValue": "0x00803C0080",
+        "MSRValue": "0x0100200490",
         "Offcore": "1",
         "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
         "SampleAfterValue": "100003",
         "UMask": "0x1"
     },
     {
-        "BriefDescription": "Counts demand data reads",
+        "BriefDescription": "OCR.ALL_PF_DATA_RD.L3_HIT_F.SNOOP_MISS",
         "Counter": "0,1,2,3",
         "CounterHTOff": "0,1,2,3",
         "EventCode": "0xB7, 0xBB",
-        "EventName": "OCR.DEMAND_DATA_RD.L3_HIT_E.SNOOP_NONE",
+        "EventName": "OCR.ALL_PF_DATA_RD.L3_HIT_F.SNOOP_MISS",
         "MSRIndex": "0x1a6,0x1a7",
-        "MSRValue": "0x0080080001",
+        "MSRValue": "0x0200200490",
         "Offcore": "1",
         "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
         "SampleAfterValue": "100003",
         "UMask": "0x1"
     },
     {
-        "BriefDescription": "Counts L1 data cache hardware prefetch requests and software prefetch requests",
+        "BriefDescription": "OCR.ALL_PF_DATA_RD.L3_HIT_F.SNOOP_NONE",
         "Counter": "0,1,2,3",
         "CounterHTOff": "0,1,2,3",
         "EventCode": "0xB7, 0xBB",
-        "EventName": "OCR.PF_L1D_AND_SW.L3_HIT_M.SNOOP_MISS",
+        "EventName": "OCR.ALL_PF_DATA_RD.L3_HIT_F.SNOOP_NONE",
         "MSRIndex": "0x1a6,0x1a7",
-        "MSRValue": "0x0200040400",
+        "MSRValue": "0x0080200490",
         "Offcore": "1",
         "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
         "SampleAfterValue": "100003",
         "UMask": "0x1"
     },
     {
-        "BriefDescription": "Counts all prefetch (that bring data to L2) RFOs have any response type.",
+        "BriefDescription": "OCR.ALL_PF_DATA_RD.L3_HIT_M.ANY_SNOOP  OCR.ALL_PF_DATA_RD.L3_HIT_M.ANY_SNOOP",
         "Counter": "0,1,2,3",
         "CounterHTOff": "0,1,2,3",
         "EventCode": "0xB7, 0xBB",
-        "EventName": "OCR.PF_L2_RFO.ANY_RESPONSE",
+        "EventName": "OCR.ALL_PF_DATA_RD.L3_HIT_M.ANY_SNOOP",
         "MSRIndex": "0x1a6,0x1a7",
-        "MSRValue": "0x0000010020",
+        "MSRValue": "0x3F80040490",
         "Offcore": "1",
         "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
         "SampleAfterValue": "100003",
         "UMask": "0x1"
     },
     {
-        "BriefDescription": "Counts all demand data writes (RFOs)  OCR.DEMAND_RFO.SUPPLIER_NONE.HIT_OTHER_CORE_FWD",
+        "BriefDescription": "OCR.ALL_PF_DATA_RD.L3_HIT_M.HITM_OTHER_CORE  OCR.ALL_PF_DATA_RD.L3_HIT_M.HITM_OTHER_CORE",
         "Counter": "0,1,2,3",
         "CounterHTOff": "0,1,2,3",
         "EventCode": "0xB7, 0xBB",
-        "EventName": "OCR.DEMAND_RFO.SUPPLIER_NONE.HIT_OTHER_CORE_FWD",
+        "EventName": "OCR.ALL_PF_DATA_RD.L3_HIT_M.HITM_OTHER_CORE",
         "MSRIndex": "0x1a6,0x1a7",
-        "MSRValue": "0x0800020002",
+        "MSRValue": "0x1000040490",
         "Offcore": "1",
         "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
         "SampleAfterValue": "100003",
         "UMask": "0x1"
     },
     {
-        "BriefDescription": "Counts all demand code reads",
+        "BriefDescription": "OCR.ALL_PF_DATA_RD.L3_HIT_M.HIT_OTHER_CORE_FWD  OCR.ALL_PF_DATA_RD.L3_HIT_M.HIT_OTHER_CORE_FWD",
         "Counter": "0,1,2,3",
         "CounterHTOff": "0,1,2,3",
         "EventCode": "0xB7, 0xBB",
-        "EventName": "OCR.DEMAND_CODE_RD.L3_HIT_F.SNOOP_MISS",
+        "EventName": "OCR.ALL_PF_DATA_RD.L3_HIT_M.HIT_OTHER_CORE_FWD",
         "MSRIndex": "0x1a6,0x1a7",
-        "MSRValue": "0x0200200004",
+        "MSRValue": "0x0800040490",
         "Offcore": "1",
         "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
         "SampleAfterValue": "100003",
         "UMask": "0x1"
     },
     {
-        "BriefDescription": "Counts any other requests",
+        "BriefDescription": "OCR.ALL_PF_DATA_RD.L3_HIT_M.HIT_OTHER_CORE_NO_FWD  OCR.ALL_PF_DATA_RD.L3_HIT_M.HIT_OTHER_CORE_NO_FWD",
         "Counter": "0,1,2,3",
         "CounterHTOff": "0,1,2,3",
         "EventCode": "0xB7, 0xBB",
-        "EventName": "OCR.OTHER.SUPPLIER_NONE.SNOOP_MISS",
+        "EventName": "OCR.ALL_PF_DATA_RD.L3_HIT_M.HIT_OTHER_CORE_NO_FWD",
         "MSRIndex": "0x1a6,0x1a7",
-        "MSRValue": "0x0200028000",
+        "MSRValue": "0x0400040490",
         "Offcore": "1",
         "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
         "SampleAfterValue": "100003",
         "UMask": "0x1"
     },
     {
-        "BriefDescription": "Counts all prefetch (that bring data to LLC only) RFOs  OCR.PF_L3_RFO.L3_HIT_E.NO_SNOOP_NEEDED",
+        "BriefDescription": "OCR.ALL_PF_DATA_RD.L3_HIT_M.NO_SNOOP_NEEDED  OCR.ALL_PF_DATA_RD.L3_HIT_M.NO_SNOOP_NEEDED",
         "Counter": "0,1,2,3",
         "CounterHTOff": "0,1,2,3",
         "EventCode": "0xB7, 0xBB",
-        "EventName": "OCR.PF_L3_RFO.L3_HIT_E.NO_SNOOP_NEEDED",
+        "EventName": "OCR.ALL_PF_DATA_RD.L3_HIT_M.NO_SNOOP_NEEDED",
         "MSRIndex": "0x1a6,0x1a7",
-        "MSRValue": "0x0100080100",
+        "MSRValue": "0x0100040490",
         "Offcore": "1",
         "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
         "SampleAfterValue": "100003",
         "UMask": "0x1"
     },
     {
-        "BriefDescription": "Counts all prefetch (that bring data to LLC only) data reads OCR.PF_L3_DATA_RD.PMM_HIT_LOCAL_PMM.SNOOP_NOT_NEEDED",
+        "BriefDescription": "OCR.ALL_PF_DATA_RD.L3_HIT_M.SNOOP_MISS",
         "Counter": "0,1,2,3",
         "CounterHTOff": "0,1,2,3",
         "EventCode": "0xB7, 0xBB",
-        "EventName": "OCR.PF_L3_DATA_RD.PMM_HIT_LOCAL_PMM.SNOOP_NOT_NEEDED",
+        "EventName": "OCR.ALL_PF_DATA_RD.L3_HIT_M.SNOOP_MISS",
         "MSRIndex": "0x1a6,0x1a7",
-        "MSRValue": "0x0100400080",
+        "MSRValue": "0x0200040490",
         "Offcore": "1",
         "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
         "SampleAfterValue": "100003",
         "UMask": "0x1"
     },
     {
-        "BriefDescription": "OCR.ALL_READS.L3_HIT_E.HIT_OTHER_CORE_FWD  OCR.ALL_READS.L3_HIT_E.HIT_OTHER_CORE_FWD",
+        "BriefDescription": "OCR.ALL_PF_DATA_RD.L3_HIT_M.SNOOP_NONE",
         "Counter": "0,1,2,3",
         "CounterHTOff": "0,1,2,3",
         "EventCode": "0xB7, 0xBB",
-        "EventName": "OCR.ALL_READS.L3_HIT_E.HIT_OTHER_CORE_FWD",
+        "EventName": "OCR.ALL_PF_DATA_RD.L3_HIT_M.SNOOP_NONE",
         "MSRIndex": "0x1a6,0x1a7",
-        "MSRValue": "0x08000807F7",
+        "MSRValue": "0x0080040490",
         "Offcore": "1",
         "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
         "SampleAfterValue": "100003",
         "UMask": "0x1"
     },
     {
-        "BriefDescription": "OCR.ALL_RFO.L3_HIT_F.HIT_OTHER_CORE_NO_FWD  OCR.ALL_RFO.L3_HIT_F.HIT_OTHER_CORE_NO_FWD",
+        "BriefDescription": "OCR.ALL_PF_DATA_RD.L3_HIT_S.ANY_SNOOP  OCR.ALL_PF_DATA_RD.L3_HIT_S.ANY_SNOOP",
         "Counter": "0,1,2,3",
         "CounterHTOff": "0,1,2,3",
         "EventCode": "0xB7, 0xBB",
-        "EventName": "OCR.ALL_RFO.L3_HIT_F.HIT_OTHER_CORE_NO_FWD",
+        "EventName": "OCR.ALL_PF_DATA_RD.L3_HIT_S.ANY_SNOOP",
         "MSRIndex": "0x1a6,0x1a7",
-        "MSRValue": "0x0400200122",
+        "MSRValue": "0x3F80100490",
         "Offcore": "1",
         "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
         "SampleAfterValue": "100003",
         "UMask": "0x1"
     },
     {
-        "BriefDescription": "Counts all prefetch (that bring data to L2) RFOs",
+        "BriefDescription": "OCR.ALL_PF_DATA_RD.L3_HIT_S.HITM_OTHER_CORE  OCR.ALL_PF_DATA_RD.L3_HIT_S.HITM_OTHER_CORE",
         "Counter": "0,1,2,3",
         "CounterHTOff": "0,1,2,3",
         "EventCode": "0xB7, 0xBB",
-        "EventName": "OCR.PF_L2_RFO.L3_HIT_M.SNOOP_NONE",
+        "EventName": "OCR.ALL_PF_DATA_RD.L3_HIT_S.HITM_OTHER_CORE",
         "MSRIndex": "0x1a6,0x1a7",
-        "MSRValue": "0x0080040020",
+        "MSRValue": "0x1000100490",
         "Offcore": "1",
         "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
         "SampleAfterValue": "100003",
         "UMask": "0x1"
     },
     {
-        "BriefDescription": "Counts demand data reads  OCR.DEMAND_DATA_RD.L3_HIT_F.ANY_SNOOP",
+        "BriefDescription": "OCR.ALL_PF_DATA_RD.L3_HIT_S.HIT_OTHER_CORE_FWD  OCR.ALL_PF_DATA_RD.L3_HIT_S.HIT_OTHER_CORE_FWD",
         "Counter": "0,1,2,3",
         "CounterHTOff": "0,1,2,3",
         "EventCode": "0xB7, 0xBB",
-        "EventName": "OCR.DEMAND_DATA_RD.L3_HIT_F.ANY_SNOOP",
+        "EventName": "OCR.ALL_PF_DATA_RD.L3_HIT_S.HIT_OTHER_CORE_FWD",
         "MSRIndex": "0x1a6,0x1a7",
-        "MSRValue": "0x3F80200001",
+        "MSRValue": "0x0800100490",
         "Offcore": "1",
         "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
         "SampleAfterValue": "100003",
         "UMask": "0x1"
     },
     {
-        "BriefDescription": "Counts demand data reads  OCR.DEMAND_DATA_RD.L3_HIT_M.HIT_OTHER_CORE_FWD",
+        "BriefDescription": "OCR.ALL_PF_DATA_RD.L3_HIT_S.HIT_OTHER_CORE_NO_FWD  OCR.ALL_PF_DATA_RD.L3_HIT_S.HIT_OTHER_CORE_NO_FWD",
         "Counter": "0,1,2,3",
         "CounterHTOff": "0,1,2,3",
         "EventCode": "0xB7, 0xBB",
-        "EventName": "OCR.DEMAND_DATA_RD.L3_HIT_M.HIT_OTHER_CORE_FWD",
+        "EventName": "OCR.ALL_PF_DATA_RD.L3_HIT_S.HIT_OTHER_CORE_NO_FWD",
         "MSRIndex": "0x1a6,0x1a7",
-        "MSRValue": "0x0800040001",
+        "MSRValue": "0x0400100490",
         "Offcore": "1",
         "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
         "SampleAfterValue": "100003",
         "UMask": "0x1"
     },
     {
-        "BriefDescription": "Counts L1 data cache hardware prefetch requests and software prefetch requests OCR.PF_L1D_AND_SW.PMM_HIT_LOCAL_PMM.SNOOP_NONE",
+        "BriefDescription": "OCR.ALL_PF_DATA_RD.L3_HIT_S.NO_SNOOP_NEEDED  OCR.ALL_PF_DATA_RD.L3_HIT_S.NO_SNOOP_NEEDED",
         "Counter": "0,1,2,3",
         "CounterHTOff": "0,1,2,3",
         "EventCode": "0xB7, 0xBB",
-        "EventName": "OCR.PF_L1D_AND_SW.PMM_HIT_LOCAL_PMM.SNOOP_NONE",
+        "EventName": "OCR.ALL_PF_DATA_RD.L3_HIT_S.NO_SNOOP_NEEDED",
         "MSRIndex": "0x1a6,0x1a7",
-        "MSRValue": "0x0080400400",
+        "MSRValue": "0x0100100490",
         "Offcore": "1",
         "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
         "SampleAfterValue": "100003",
         "UMask": "0x1"
     },
     {
-        "BriefDescription": "Counts all prefetch (that bring data to LLC only) RFOs OCR.PF_L3_RFO.L3_HIT.HITM_OTHER_CORE OCR.PF_L3_RFO.L3_HIT.HITM_OTHER_CORE",
+        "BriefDescription": "OCR.ALL_PF_DATA_RD.L3_HIT_S.SNOOP_MISS",
         "Counter": "0,1,2,3",
         "CounterHTOff": "0,1,2,3",
         "EventCode": "0xB7, 0xBB",
-        "EventName": "OCR.PF_L3_RFO.L3_HIT.HITM_OTHER_CORE",
+        "EventName": "OCR.ALL_PF_DATA_RD.L3_HIT_S.SNOOP_MISS",
         "MSRIndex": "0x1a6,0x1a7",
-        "MSRValue": "0x10003C0100",
+        "MSRValue": "0x0200100490",
         "Offcore": "1",
         "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
         "SampleAfterValue": "100003",
         "UMask": "0x1"
     },
     {
-        "BriefDescription": "Counts prefetch (that bring data to L2) data reads  OCR.PF_L2_DATA_RD.L3_HIT_M.NO_SNOOP_NEEDED",
+        "BriefDescription": "OCR.ALL_PF_DATA_RD.L3_HIT_S.SNOOP_NONE",
         "Counter": "0,1,2,3",
         "CounterHTOff": "0,1,2,3",
         "EventCode": "0xB7, 0xBB",
-        "EventName": "OCR.PF_L2_DATA_RD.L3_HIT_M.NO_SNOOP_NEEDED",
+        "EventName": "OCR.ALL_PF_DATA_RD.L3_HIT_S.SNOOP_NONE",
         "MSRIndex": "0x1a6,0x1a7",
-        "MSRValue": "0x0100040010",
+        "MSRValue": "0x0080100490",
         "Offcore": "1",
         "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
         "SampleAfterValue": "100003",
         "UMask": "0x1"
     },
     {
-        "BriefDescription": "Counts any other requests  OCR.OTHER.L3_HIT_M.HIT_OTHER_CORE_FWD",
+        "BriefDescription": "OCR.ALL_PF_DATA_RD.PMM_HIT_LOCAL_PMM.ANY_SNOOP OCR.ALL_PF_DATA_RD.PMM_HIT_LOCAL_PMM.ANY_SNOOP",
         "Counter": "0,1,2,3",
         "CounterHTOff": "0,1,2,3",
         "EventCode": "0xB7, 0xBB",
-        "EventName": "OCR.OTHER.L3_HIT_M.HIT_OTHER_CORE_FWD",
+        "EventName": "OCR.ALL_PF_DATA_RD.PMM_HIT_LOCAL_PMM.ANY_SNOOP",
         "MSRIndex": "0x1a6,0x1a7",
-        "MSRValue": "0x0800048000",
+        "MSRValue": "0x3F80400490",
         "Offcore": "1",
         "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
         "SampleAfterValue": "100003",
         "UMask": "0x1"
     },
     {
-        "BriefDescription": "Counts demand data reads  OCR.DEMAND_DATA_RD.L3_HIT_E.HITM_OTHER_CORE",
+        "BriefDescription": "OCR.ALL_PF_DATA_RD.PMM_HIT_LOCAL_PMM.SNOOP_NONE OCR.ALL_PF_DATA_RD.PMM_HIT_LOCAL_PMM.SNOOP_NONE",
         "Counter": "0,1,2,3",
         "CounterHTOff": "0,1,2,3",
         "EventCode": "0xB7, 0xBB",
-        "EventName": "OCR.DEMAND_DATA_RD.L3_HIT_E.HITM_OTHER_CORE",
+        "EventName": "OCR.ALL_PF_DATA_RD.PMM_HIT_LOCAL_PMM.SNOOP_NONE",
         "MSRIndex": "0x1a6,0x1a7",
-        "MSRValue": "0x1000080001",
+        "MSRValue": "0x0080400490",
         "Offcore": "1",
         "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
         "SampleAfterValue": "100003",
         "UMask": "0x1"
     },
     {
-        "BriefDescription": "OCR.ALL_DATA_RD.L3_HIT_E.NO_SNOOP_NEEDED  OCR.ALL_DATA_RD.L3_HIT_E.NO_SNOOP_NEEDED",
+        "BriefDescription": "OCR.ALL_PF_DATA_RD.PMM_HIT_LOCAL_PMM.SNOOP_NOT_NEEDED OCR.ALL_PF_DATA_RD.PMM_HIT_LOCAL_PMM.SNOOP_NOT_NEEDED",
         "Counter": "0,1,2,3",
         "CounterHTOff": "0,1,2,3",
         "EventCode": "0xB7, 0xBB",
-        "EventName": "OCR.ALL_DATA_RD.L3_HIT_E.NO_SNOOP_NEEDED",
+        "EventName": "OCR.ALL_PF_DATA_RD.PMM_HIT_LOCAL_PMM.SNOOP_NOT_NEEDED",
         "MSRIndex": "0x1a6,0x1a7",
-        "MSRValue": "0x0100080491",
+        "MSRValue": "0x0100400490",
         "Offcore": "1",
         "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
         "SampleAfterValue": "100003",
         "UMask": "0x1"
     },
     {
-        "BriefDescription": "Counts all prefetch (that bring data to L2) RFOs",
+        "BriefDescription": "OCR.ALL_PF_DATA_RD.SUPPLIER_NONE.ANY_SNOOP  OCR.ALL_PF_DATA_RD.SUPPLIER_NONE.ANY_SNOOP",
         "Counter": "0,1,2,3",
         "CounterHTOff": "0,1,2,3",
         "EventCode": "0xB7, 0xBB",
-        "EventName": "OCR.PF_L2_RFO.L3_HIT_S.SNOOP_NONE",
+        "EventName": "OCR.ALL_PF_DATA_RD.SUPPLIER_NONE.ANY_SNOOP",
         "MSRIndex": "0x1a6,0x1a7",
-        "MSRValue": "0x0080100020",
+        "MSRValue": "0x3F80020490",
         "Offcore": "1",
         "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
         "SampleAfterValue": "100003",
         "UMask": "0x1"
     },
     {
-        "BriefDescription": "Counts all prefetch (that bring data to L2) RFOs  OCR.PF_L2_RFO.L3_HIT_E.NO_SNOOP_NEEDED",
+        "BriefDescription": "OCR.ALL_PF_DATA_RD.SUPPLIER_NONE.HITM_OTHER_CORE  OCR.ALL_PF_DATA_RD.SUPPLIER_NONE.HITM_OTHER_CORE",
         "Counter": "0,1,2,3",
         "CounterHTOff": "0,1,2,3",
         "EventCode": "0xB7, 0xBB",
-        "EventName": "OCR.PF_L2_RFO.L3_HIT_E.NO_SNOOP_NEEDED",
+        "EventName": "OCR.ALL_PF_DATA_RD.SUPPLIER_NONE.HITM_OTHER_CORE",
         "MSRIndex": "0x1a6,0x1a7",
-        "MSRValue": "0x0100080020",
+        "MSRValue": "0x1000020490",
         "Offcore": "1",
         "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
         "SampleAfterValue": "100003",
         "UMask": "0x1"
     },
     {
-        "BriefDescription": "Counts all demand data writes (RFOs) OCR.DEMAND_RFO.L3_HIT.ANY_SNOOP OCR.DEMAND_RFO.L3_HIT.ANY_SNOOP",
+        "BriefDescription": "OCR.ALL_PF_DATA_RD.SUPPLIER_NONE.HIT_OTHER_CORE_FWD  OCR.ALL_PF_DATA_RD.SUPPLIER_NONE.HIT_OTHER_CORE_FWD",
         "Counter": "0,1,2,3",
         "CounterHTOff": "0,1,2,3",
         "EventCode": "0xB7, 0xBB",
-        "EventName": "OCR.DEMAND_RFO.L3_HIT.ANY_SNOOP",
+        "EventName": "OCR.ALL_PF_DATA_RD.SUPPLIER_NONE.HIT_OTHER_CORE_FWD",
         "MSRIndex": "0x1a6,0x1a7",
-        "MSRValue": "0x3F803C0002",
+        "MSRValue": "0x0800020490",
         "Offcore": "1",
         "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
         "SampleAfterValue": "100003",
         "UMask": "0x1"
     },
     {
-        "BriefDescription": "OCR.ALL_DATA_RD.SUPPLIER_NONE.HIT_OTHER_CORE_NO_FWD  OCR.ALL_DATA_RD.SUPPLIER_NONE.HIT_OTHER_CORE_NO_FWD",
+        "BriefDescription": "OCR.ALL_PF_DATA_RD.SUPPLIER_NONE.HIT_OTHER_CORE_NO_FWD  OCR.ALL_PF_DATA_RD.SUPPLIER_NONE.HIT_OTHER_CORE_NO_FWD",
         "Counter": "0,1,2,3",
         "CounterHTOff": "0,1,2,3",
         "EventCode": "0xB7, 0xBB",
-        "EventName": "OCR.ALL_DATA_RD.SUPPLIER_NONE.HIT_OTHER_CORE_NO_FWD",
+        "EventName": "OCR.ALL_PF_DATA_RD.SUPPLIER_NONE.HIT_OTHER_CORE_NO_FWD",
         "MSRIndex": "0x1a6,0x1a7",
-        "MSRValue": "0x0400020491",
+        "MSRValue": "0x0400020490",
         "Offcore": "1",
         "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
         "SampleAfterValue": "100003",
         "UMask": "0x1"
     },
     {
-        "BriefDescription": "Counts prefetch (that bring data to L2) data reads  OCR.PF_L2_DATA_RD.SUPPLIER_NONE.NO_SNOOP_NEEDED",
+        "BriefDescription": "OCR.ALL_PF_DATA_RD.SUPPLIER_NONE.NO_SNOOP_NEEDED  OCR.ALL_PF_DATA_RD.SUPPLIER_NONE.NO_SNOOP_NEEDED",
         "Counter": "0,1,2,3",
         "CounterHTOff": "0,1,2,3",
         "EventCode": "0xB7, 0xBB",
-        "EventName": "OCR.PF_L2_DATA_RD.SUPPLIER_NONE.NO_SNOOP_NEEDED",
+        "EventName": "OCR.ALL_PF_DATA_RD.SUPPLIER_NONE.NO_SNOOP_NEEDED",
         "MSRIndex": "0x1a6,0x1a7",
-        "MSRValue": "0x0100020010",
+        "MSRValue": "0x0100020490",
         "Offcore": "1",
         "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
         "SampleAfterValue": "100003",
         "UMask": "0x1"
     },
     {
-        "BriefDescription": "OCR.ALL_PF_DATA_RD.SUPPLIER_NONE.HIT_OTHER_CORE_FWD  OCR.ALL_PF_DATA_RD.SUPPLIER_NONE.HIT_OTHER_CORE_FWD",
+        "BriefDescription": "OCR.ALL_PF_DATA_RD.SUPPLIER_NONE.SNOOP_MISS",
         "Counter": "0,1,2,3",
         "CounterHTOff": "0,1,2,3",
         "EventCode": "0xB7, 0xBB",
-        "EventName": "OCR.ALL_PF_DATA_RD.SUPPLIER_NONE.HIT_OTHER_CORE_FWD",
+        "EventName": "OCR.ALL_PF_DATA_RD.SUPPLIER_NONE.SNOOP_MISS",
         "MSRIndex": "0x1a6,0x1a7",
-        "MSRValue": "0x0800020490",
+        "MSRValue": "0x0200020490",
         "Offcore": "1",
         "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
         "SampleAfterValue": "100003",
         "UMask": "0x1"
     },
     {
-        "BriefDescription": "OCR.ALL_RFO.L3_HIT.NO_SNOOP_NEEDED OCR.ALL_RFO.L3_HIT.NO_SNOOP_NEEDED OCR.ALL_RFO.L3_HIT.NO_SNOOP_NEEDED",
+        "BriefDescription": "OCR.ALL_PF_DATA_RD.SUPPLIER_NONE.SNOOP_NONE",
         "Counter": "0,1,2,3",
         "CounterHTOff": "0,1,2,3",
         "EventCode": "0xB7, 0xBB",
-        "EventName": "OCR.ALL_RFO.L3_HIT.NO_SNOOP_NEEDED",
+        "EventName": "OCR.ALL_PF_DATA_RD.SUPPLIER_NONE.SNOOP_NONE",
         "MSRIndex": "0x1a6,0x1a7",
-        "MSRValue": "0x01003C0122",
+        "MSRValue": "0x0080020490",
         "Offcore": "1",
         "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
         "SampleAfterValue": "100003",
         "UMask": "0x1"
     },
     {
-        "BriefDescription": "Counts demand data reads  OCR.DEMAND_DATA_RD.SUPPLIER_NONE.NO_SNOOP_NEEDED",
+        "BriefDescription": "OCR.ALL_PF_RFO.ANY_RESPONSE have any response type.",
         "Counter": "0,1,2,3",
         "CounterHTOff": "0,1,2,3",
         "EventCode": "0xB7, 0xBB",
-        "EventName": "OCR.DEMAND_DATA_RD.SUPPLIER_NONE.NO_SNOOP_NEEDED",
+        "EventName": "OCR.ALL_PF_RFO.ANY_RESPONSE",
         "MSRIndex": "0x1a6,0x1a7",
-        "MSRValue": "0x0100020001",
+        "MSRValue": "0x0000010120",
         "Offcore": "1",
         "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
         "SampleAfterValue": "100003",
         "UMask": "0x1"
     },
     {
-        "BriefDescription": "Counts demand data reads OCR.DEMAND_DATA_RD.L3_HIT.SNOOP_NONE",
+        "BriefDescription": "OCR.ALL_PF_RFO.L3_HIT.ANY_SNOOP OCR.ALL_PF_RFO.L3_HIT.ANY_SNOOP OCR.ALL_PF_RFO.L3_HIT.ANY_SNOOP",
         "Counter": "0,1,2,3",
         "CounterHTOff": "0,1,2,3",
         "EventCode": "0xB7, 0xBB",
-        "EventName": "OCR.DEMAND_DATA_RD.L3_HIT.SNOOP_NONE",
+        "EventName": "OCR.ALL_PF_RFO.L3_HIT.ANY_SNOOP",
         "MSRIndex": "0x1a6,0x1a7",
-        "MSRValue": "0x00803C0001",
+        "MSRValue": "0x3F803C0120",
         "Offcore": "1",
         "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
         "SampleAfterValue": "100003",
         "UMask": "0x1"
     },
     {
-        "BriefDescription": "Counts all prefetch (that bring data to LLC only) RFOs",
+        "BriefDescription": "OCR.ALL_PF_RFO.L3_HIT.HITM_OTHER_CORE OCR.ALL_PF_RFO.L3_HIT.HITM_OTHER_CORE OCR.ALL_PF_RFO.L3_HIT.HITM_OTHER_CORE",
         "Counter": "0,1,2,3",
         "CounterHTOff": "0,1,2,3",
         "EventCode": "0xB7, 0xBB",
-        "EventName": "OCR.PF_L3_RFO.SUPPLIER_NONE.SNOOP_MISS",
+        "EventName": "OCR.ALL_PF_RFO.L3_HIT.HITM_OTHER_CORE",
         "MSRIndex": "0x1a6,0x1a7",
-        "MSRValue": "0x0200020100",
+        "MSRValue": "0x10003C0120",
         "Offcore": "1",
         "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
         "SampleAfterValue": "100003",
         "UMask": "0x1"
     },
     {
-        "BriefDescription": "Counts prefetch (that bring data to L2) data reads",
+        "BriefDescription": "OCR.ALL_PF_RFO.L3_HIT.HIT_OTHER_CORE_FWD OCR.ALL_PF_RFO.L3_HIT.HIT_OTHER_CORE_FWD OCR.ALL_PF_RFO.L3_HIT.HIT_OTHER_CORE_FWD",
         "Counter": "0,1,2,3",
         "CounterHTOff": "0,1,2,3",
         "EventCode": "0xB7, 0xBB",
-        "EventName": "OCR.PF_L2_DATA_RD.L3_HIT_F.SNOOP_MISS",
+        "EventName": "OCR.ALL_PF_RFO.L3_HIT.HIT_OTHER_CORE_FWD",
         "MSRIndex": "0x1a6,0x1a7",
-        "MSRValue": "0x0200200010",
+        "MSRValue": "0x08003C0120",
         "Offcore": "1",
         "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
         "SampleAfterValue": "100003",
         "UMask": "0x1"
     },
     {
-        "BriefDescription": "OCR.ALL_PF_DATA_RD.L3_HIT_S.HIT_OTHER_CORE_NO_FWD  OCR.ALL_PF_DATA_RD.L3_HIT_S.HIT_OTHER_CORE_NO_FWD",
+        "BriefDescription": "OCR.ALL_PF_RFO.L3_HIT.HIT_OTHER_CORE_NO_FWD OCR.ALL_PF_RFO.L3_HIT.HIT_OTHER_CORE_NO_FWD OCR.ALL_PF_RFO.L3_HIT.HIT_OTHER_CORE_NO_FWD",
         "Counter": "0,1,2,3",
         "CounterHTOff": "0,1,2,3",
         "EventCode": "0xB7, 0xBB",
-        "EventName": "OCR.ALL_PF_DATA_RD.L3_HIT_S.HIT_OTHER_CORE_NO_FWD",
+        "EventName": "OCR.ALL_PF_RFO.L3_HIT.HIT_OTHER_CORE_NO_FWD",
         "MSRIndex": "0x1a6,0x1a7",
-        "MSRValue": "0x0400100490",
+        "MSRValue": "0x04003C0120",
         "Offcore": "1",
         "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
         "SampleAfterValue": "100003",
         "UMask": "0x1"
     },
     {
-        "BriefDescription": "OCR.ALL_PF_DATA_RD.ANY_RESPONSE have any response type.",
+        "BriefDescription": "OCR.ALL_PF_RFO.L3_HIT.NO_SNOOP_NEEDED OCR.ALL_PF_RFO.L3_HIT.NO_SNOOP_NEEDED OCR.ALL_PF_RFO.L3_HIT.NO_SNOOP_NEEDED",
         "Counter": "0,1,2,3",
         "CounterHTOff": "0,1,2,3",
         "EventCode": "0xB7, 0xBB",
-        "EventName": "OCR.ALL_PF_DATA_RD.ANY_RESPONSE",
+        "EventName": "OCR.ALL_PF_RFO.L3_HIT.NO_SNOOP_NEEDED",
         "MSRIndex": "0x1a6,0x1a7",
-        "MSRValue": "0x0000010490",
+        "MSRValue": "0x01003C0120",
         "Offcore": "1",
         "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
         "SampleAfterValue": "100003",
         "UMask": "0x1"
     },
     {
-        "BriefDescription": "Counts any other requests  OCR.OTHER.L3_HIT_S.HIT_OTHER_CORE_FWD",
+        "BriefDescription": "OCR.ALL_PF_RFO.L3_HIT.SNOOP_HIT_WITH_FWD",
         "Counter": "0,1,2,3",
         "CounterHTOff": "0,1,2,3",
         "EventCode": "0xB7, 0xBB",
-        "EventName": "OCR.OTHER.L3_HIT_S.HIT_OTHER_CORE_FWD",
+        "EventName": "OCR.ALL_PF_RFO.L3_HIT.SNOOP_HIT_WITH_FWD",
         "MSRIndex": "0x1a6,0x1a7",
-        "MSRValue": "0x0800108000",
+        "MSRValue": "0x08007C0120",
         "Offcore": "1",
         "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
         "SampleAfterValue": "100003",
@@ -1349,2756 +1396,2717 @@
         "UMask": "0x1"
     },
     {
-        "BriefDescription": "Counts L1 data cache hardware prefetch requests and software prefetch requests  OCR.PF_L1D_AND_SW.L3_HIT_M.HIT_OTHER_CORE_NO_FWD",
+        "BriefDescription": "OCR.ALL_PF_RFO.L3_HIT.SNOOP_NONE OCR.ALL_PF_RFO.L3_HIT.SNOOP_NONE",
         "Counter": "0,1,2,3",
         "CounterHTOff": "0,1,2,3",
         "EventCode": "0xB7, 0xBB",
-        "EventName": "OCR.PF_L1D_AND_SW.L3_HIT_M.HIT_OTHER_CORE_NO_FWD",
+        "EventName": "OCR.ALL_PF_RFO.L3_HIT.SNOOP_NONE",
         "MSRIndex": "0x1a6,0x1a7",
-        "MSRValue": "0x0400040400",
+        "MSRValue": "0x00803C0120",
         "Offcore": "1",
         "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
         "SampleAfterValue": "100003",
         "UMask": "0x1"
     },
     {
-        "BriefDescription": "Counts prefetch (that bring data to L2) data reads  OCR.PF_L2_DATA_RD.SUPPLIER_NONE.HIT_OTHER_CORE_NO_FWD",
+        "BriefDescription": "OCR.ALL_PF_RFO.L3_HIT_E.ANY_SNOOP  OCR.ALL_PF_RFO.L3_HIT_E.ANY_SNOOP",
         "Counter": "0,1,2,3",
         "CounterHTOff": "0,1,2,3",
         "EventCode": "0xB7, 0xBB",
-        "EventName": "OCR.PF_L2_DATA_RD.SUPPLIER_NONE.HIT_OTHER_CORE_NO_FWD",
+        "EventName": "OCR.ALL_PF_RFO.L3_HIT_E.ANY_SNOOP",
         "MSRIndex": "0x1a6,0x1a7",
-        "MSRValue": "0x0400020010",
+        "MSRValue": "0x3F80080120",
         "Offcore": "1",
         "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
         "SampleAfterValue": "100003",
         "UMask": "0x1"
     },
     {
-        "BriefDescription": "Counts any other requests",
+        "BriefDescription": "OCR.ALL_PF_RFO.L3_HIT_E.HITM_OTHER_CORE  OCR.ALL_PF_RFO.L3_HIT_E.HITM_OTHER_CORE",
         "Counter": "0,1,2,3",
         "CounterHTOff": "0,1,2,3",
         "EventCode": "0xB7, 0xBB",
-        "EventName": "OCR.OTHER.L3_HIT_M.SNOOP_MISS",
+        "EventName": "OCR.ALL_PF_RFO.L3_HIT_E.HITM_OTHER_CORE",
         "MSRIndex": "0x1a6,0x1a7",
-        "MSRValue": "0x0200048000",
+        "MSRValue": "0x1000080120",
         "Offcore": "1",
         "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
         "SampleAfterValue": "100003",
         "UMask": "0x1"
     },
     {
-        "BriefDescription": "OCR.ALL_DATA_RD.L3_HIT_S.SNOOP_MISS",
+        "BriefDescription": "OCR.ALL_PF_RFO.L3_HIT_E.HIT_OTHER_CORE_FWD  OCR.ALL_PF_RFO.L3_HIT_E.HIT_OTHER_CORE_FWD",
         "Counter": "0,1,2,3",
         "CounterHTOff": "0,1,2,3",
         "EventCode": "0xB7, 0xBB",
-        "EventName": "OCR.ALL_DATA_RD.L3_HIT_S.SNOOP_MISS",
+        "EventName": "OCR.ALL_PF_RFO.L3_HIT_E.HIT_OTHER_CORE_FWD",
         "MSRIndex": "0x1a6,0x1a7",
-        "MSRValue": "0x0200100491",
+        "MSRValue": "0x0800080120",
         "Offcore": "1",
         "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
         "SampleAfterValue": "100003",
         "UMask": "0x1"
     },
     {
-        "BriefDescription": "OCR.ALL_RFO.L3_HIT_M.ANY_SNOOP  OCR.ALL_RFO.L3_HIT_M.ANY_SNOOP",
+        "BriefDescription": "OCR.ALL_PF_RFO.L3_HIT_E.HIT_OTHER_CORE_NO_FWD  OCR.ALL_PF_RFO.L3_HIT_E.HIT_OTHER_CORE_NO_FWD",
         "Counter": "0,1,2,3",
         "CounterHTOff": "0,1,2,3",
         "EventCode": "0xB7, 0xBB",
-        "EventName": "OCR.ALL_RFO.L3_HIT_M.ANY_SNOOP",
+        "EventName": "OCR.ALL_PF_RFO.L3_HIT_E.HIT_OTHER_CORE_NO_FWD",
         "MSRIndex": "0x1a6,0x1a7",
-        "MSRValue": "0x3F80040122",
+        "MSRValue": "0x0400080120",
         "Offcore": "1",
         "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
         "SampleAfterValue": "100003",
         "UMask": "0x1"
     },
     {
-        "BriefDescription": "Counts all demand data writes (RFOs)  OCR.DEMAND_RFO.L3_HIT_F.HIT_OTHER_CORE_FWD",
+        "BriefDescription": "OCR.ALL_PF_RFO.L3_HIT_E.NO_SNOOP_NEEDED  OCR.ALL_PF_RFO.L3_HIT_E.NO_SNOOP_NEEDED",
         "Counter": "0,1,2,3",
         "CounterHTOff": "0,1,2,3",
         "EventCode": "0xB7, 0xBB",
-        "EventName": "OCR.DEMAND_RFO.L3_HIT_F.HIT_OTHER_CORE_FWD",
+        "EventName": "OCR.ALL_PF_RFO.L3_HIT_E.NO_SNOOP_NEEDED",
         "MSRIndex": "0x1a6,0x1a7",
-        "MSRValue": "0x0800200002",
+        "MSRValue": "0x0100080120",
         "Offcore": "1",
         "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
         "SampleAfterValue": "100003",
         "UMask": "0x1"
     },
     {
-        "BriefDescription": "Counts all prefetch (that bring data to LLC only) RFOs  OCR.PF_L3_RFO.SUPPLIER_NONE.HITM_OTHER_CORE",
+        "BriefDescription": "OCR.ALL_PF_RFO.L3_HIT_E.SNOOP_MISS",
         "Counter": "0,1,2,3",
         "CounterHTOff": "0,1,2,3",
         "EventCode": "0xB7, 0xBB",
-        "EventName": "OCR.PF_L3_RFO.SUPPLIER_NONE.HITM_OTHER_CORE",
+        "EventName": "OCR.ALL_PF_RFO.L3_HIT_E.SNOOP_MISS",
         "MSRIndex": "0x1a6,0x1a7",
-        "MSRValue": "0x1000020100",
+        "MSRValue": "0x0200080120",
         "Offcore": "1",
         "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
         "SampleAfterValue": "100003",
         "UMask": "0x1"
     },
     {
-        "BriefDescription": "OCR.ALL_READS.L3_HIT_E.ANY_SNOOP  OCR.ALL_READS.L3_HIT_E.ANY_SNOOP",
+        "BriefDescription": "OCR.ALL_PF_RFO.L3_HIT_E.SNOOP_NONE",
         "Counter": "0,1,2,3",
         "CounterHTOff": "0,1,2,3",
         "EventCode": "0xB7, 0xBB",
-        "EventName": "OCR.ALL_READS.L3_HIT_E.ANY_SNOOP",
+        "EventName": "OCR.ALL_PF_RFO.L3_HIT_E.SNOOP_NONE",
         "MSRIndex": "0x1a6,0x1a7",
-        "MSRValue": "0x3F800807F7",
+        "MSRValue": "0x0080080120",
         "Offcore": "1",
         "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
         "SampleAfterValue": "100003",
         "UMask": "0x1"
     },
     {
-        "BriefDescription": "Counts L1 data cache hardware prefetch requests and software prefetch requests  OCR.PF_L1D_AND_SW.L3_HIT_E.NO_SNOOP_NEEDED",
+        "BriefDescription": "OCR.ALL_PF_RFO.L3_HIT_F.ANY_SNOOP  OCR.ALL_PF_RFO.L3_HIT_F.ANY_SNOOP",
         "Counter": "0,1,2,3",
         "CounterHTOff": "0,1,2,3",
         "EventCode": "0xB7, 0xBB",
-        "EventName": "OCR.PF_L1D_AND_SW.L3_HIT_E.NO_SNOOP_NEEDED",
+        "EventName": "OCR.ALL_PF_RFO.L3_HIT_F.ANY_SNOOP",
         "MSRIndex": "0x1a6,0x1a7",
-        "MSRValue": "0x0100080400",
+        "MSRValue": "0x3F80200120",
         "Offcore": "1",
         "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
         "SampleAfterValue": "100003",
         "UMask": "0x1"
     },
     {
-        "BriefDescription": "Counts all prefetch (that bring data to LLC only) RFOs  OCR.PF_L3_RFO.SUPPLIER_NONE.ANY_SNOOP",
+        "BriefDescription": "OCR.ALL_PF_RFO.L3_HIT_F.HITM_OTHER_CORE  OCR.ALL_PF_RFO.L3_HIT_F.HITM_OTHER_CORE",
         "Counter": "0,1,2,3",
         "CounterHTOff": "0,1,2,3",
         "EventCode": "0xB7, 0xBB",
-        "EventName": "OCR.PF_L3_RFO.SUPPLIER_NONE.ANY_SNOOP",
+        "EventName": "OCR.ALL_PF_RFO.L3_HIT_F.HITM_OTHER_CORE",
         "MSRIndex": "0x1a6,0x1a7",
-        "MSRValue": "0x3F80020100",
+        "MSRValue": "0x1000200120",
         "Offcore": "1",
         "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
         "SampleAfterValue": "100003",
         "UMask": "0x1"
     },
     {
-        "BriefDescription": "OCR.ALL_PF_DATA_RD.L3_HIT_F.HITM_OTHER_CORE  OCR.ALL_PF_DATA_RD.L3_HIT_F.HITM_OTHER_CORE",
+        "BriefDescription": "OCR.ALL_PF_RFO.L3_HIT_F.HIT_OTHER_CORE_FWD  OCR.ALL_PF_RFO.L3_HIT_F.HIT_OTHER_CORE_FWD",
         "Counter": "0,1,2,3",
         "CounterHTOff": "0,1,2,3",
         "EventCode": "0xB7, 0xBB",
-        "EventName": "OCR.ALL_PF_DATA_RD.L3_HIT_F.HITM_OTHER_CORE",
+        "EventName": "OCR.ALL_PF_RFO.L3_HIT_F.HIT_OTHER_CORE_FWD",
         "MSRIndex": "0x1a6,0x1a7",
-        "MSRValue": "0x1000200490",
+        "MSRValue": "0x0800200120",
         "Offcore": "1",
         "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
         "SampleAfterValue": "100003",
         "UMask": "0x1"
     },
     {
-        "BriefDescription": "OCR.ALL_READS.SUPPLIER_NONE.HIT_OTHER_CORE_FWD  OCR.ALL_READS.SUPPLIER_NONE.HIT_OTHER_CORE_FWD",
+        "BriefDescription": "OCR.ALL_PF_RFO.L3_HIT_F.HIT_OTHER_CORE_NO_FWD  OCR.ALL_PF_RFO.L3_HIT_F.HIT_OTHER_CORE_NO_FWD",
         "Counter": "0,1,2,3",
         "CounterHTOff": "0,1,2,3",
         "EventCode": "0xB7, 0xBB",
-        "EventName": "OCR.ALL_READS.SUPPLIER_NONE.HIT_OTHER_CORE_FWD",
+        "EventName": "OCR.ALL_PF_RFO.L3_HIT_F.HIT_OTHER_CORE_NO_FWD",
         "MSRIndex": "0x1a6,0x1a7",
-        "MSRValue": "0x08000207F7",
+        "MSRValue": "0x0400200120",
         "Offcore": "1",
         "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
         "SampleAfterValue": "100003",
         "UMask": "0x1"
     },
     {
-        "BriefDescription": "OCR.ALL_READS.SUPPLIER_NONE.HIT_OTHER_CORE_NO_FWD  OCR.ALL_READS.SUPPLIER_NONE.HIT_OTHER_CORE_NO_FWD",
+        "BriefDescription": "OCR.ALL_PF_RFO.L3_HIT_F.NO_SNOOP_NEEDED  OCR.ALL_PF_RFO.L3_HIT_F.NO_SNOOP_NEEDED",
         "Counter": "0,1,2,3",
         "CounterHTOff": "0,1,2,3",
         "EventCode": "0xB7, 0xBB",
-        "EventName": "OCR.ALL_READS.SUPPLIER_NONE.HIT_OTHER_CORE_NO_FWD",
+        "EventName": "OCR.ALL_PF_RFO.L3_HIT_F.NO_SNOOP_NEEDED",
         "MSRIndex": "0x1a6,0x1a7",
-        "MSRValue": "0x04000207F7",
+        "MSRValue": "0x0100200120",
         "Offcore": "1",
         "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
         "SampleAfterValue": "100003",
         "UMask": "0x1"
     },
     {
-        "BriefDescription": "OCR.ALL_PF_DATA_RD.L3_HIT_E.SNOOP_MISS",
+        "BriefDescription": "OCR.ALL_PF_RFO.L3_HIT_F.SNOOP_MISS",
         "Counter": "0,1,2,3",
         "CounterHTOff": "0,1,2,3",
         "EventCode": "0xB7, 0xBB",
-        "EventName": "OCR.ALL_PF_DATA_RD.L3_HIT_E.SNOOP_MISS",
+        "EventName": "OCR.ALL_PF_RFO.L3_HIT_F.SNOOP_MISS",
         "MSRIndex": "0x1a6,0x1a7",
-        "MSRValue": "0x0200080490",
+        "MSRValue": "0x0200200120",
         "Offcore": "1",
         "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
         "SampleAfterValue": "100003",
         "UMask": "0x1"
     },
     {
-        "BriefDescription": "OCR.ALL_DATA_RD.L3_HIT_F.HIT_OTHER_CORE_NO_FWD  OCR.ALL_DATA_RD.L3_HIT_F.HIT_OTHER_CORE_NO_FWD",
+        "BriefDescription": "OCR.ALL_PF_RFO.L3_HIT_F.SNOOP_NONE",
         "Counter": "0,1,2,3",
         "CounterHTOff": "0,1,2,3",
         "EventCode": "0xB7, 0xBB",
-        "EventName": "OCR.ALL_DATA_RD.L3_HIT_F.HIT_OTHER_CORE_NO_FWD",
+        "EventName": "OCR.ALL_PF_RFO.L3_HIT_F.SNOOP_NONE",
         "MSRIndex": "0x1a6,0x1a7",
-        "MSRValue": "0x0400200491",
+        "MSRValue": "0x0080200120",
         "Offcore": "1",
         "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
         "SampleAfterValue": "100003",
         "UMask": "0x1"
     },
     {
-        "BriefDescription": "OCR.ALL_PF_RFO.L3_HIT_E.NO_SNOOP_NEEDED  OCR.ALL_PF_RFO.L3_HIT_E.NO_SNOOP_NEEDED",
+        "BriefDescription": "OCR.ALL_PF_RFO.L3_HIT_M.ANY_SNOOP  OCR.ALL_PF_RFO.L3_HIT_M.ANY_SNOOP",
         "Counter": "0,1,2,3",
         "CounterHTOff": "0,1,2,3",
         "EventCode": "0xB7, 0xBB",
-        "EventName": "OCR.ALL_PF_RFO.L3_HIT_E.NO_SNOOP_NEEDED",
+        "EventName": "OCR.ALL_PF_RFO.L3_HIT_M.ANY_SNOOP",
         "MSRIndex": "0x1a6,0x1a7",
-        "MSRValue": "0x0100080120",
+        "MSRValue": "0x3F80040120",
         "Offcore": "1",
         "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
         "SampleAfterValue": "100003",
         "UMask": "0x1"
     },
     {
-        "BriefDescription": "OCR.ALL_RFO.L3_HIT_E.HIT_OTHER_CORE_FWD  OCR.ALL_RFO.L3_HIT_E.HIT_OTHER_CORE_FWD",
+        "BriefDescription": "OCR.ALL_PF_RFO.L3_HIT_M.HITM_OTHER_CORE  OCR.ALL_PF_RFO.L3_HIT_M.HITM_OTHER_CORE",
         "Counter": "0,1,2,3",
         "CounterHTOff": "0,1,2,3",
         "EventCode": "0xB7, 0xBB",
-        "EventName": "OCR.ALL_RFO.L3_HIT_E.HIT_OTHER_CORE_FWD",
+        "EventName": "OCR.ALL_PF_RFO.L3_HIT_M.HITM_OTHER_CORE",
         "MSRIndex": "0x1a6,0x1a7",
-        "MSRValue": "0x0800080122",
+        "MSRValue": "0x1000040120",
         "Offcore": "1",
         "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
         "SampleAfterValue": "100003",
         "UMask": "0x1"
     },
     {
-        "BriefDescription": "OCR.ALL_DATA_RD.L3_HIT_F.ANY_SNOOP  OCR.ALL_DATA_RD.L3_HIT_F.ANY_SNOOP",
+        "BriefDescription": "OCR.ALL_PF_RFO.L3_HIT_M.HIT_OTHER_CORE_FWD  OCR.ALL_PF_RFO.L3_HIT_M.HIT_OTHER_CORE_FWD",
         "Counter": "0,1,2,3",
         "CounterHTOff": "0,1,2,3",
         "EventCode": "0xB7, 0xBB",
-        "EventName": "OCR.ALL_DATA_RD.L3_HIT_F.ANY_SNOOP",
+        "EventName": "OCR.ALL_PF_RFO.L3_HIT_M.HIT_OTHER_CORE_FWD",
         "MSRIndex": "0x1a6,0x1a7",
-        "MSRValue": "0x3F80200491",
+        "MSRValue": "0x0800040120",
         "Offcore": "1",
         "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
         "SampleAfterValue": "100003",
         "UMask": "0x1"
     },
     {
-        "BriefDescription": "Counts all prefetch (that bring data to LLC only) RFOs OCR.PF_L3_RFO.L3_HIT.SNOOP_NONE",
+        "BriefDescription": "OCR.ALL_PF_RFO.L3_HIT_M.HIT_OTHER_CORE_NO_FWD  OCR.ALL_PF_RFO.L3_HIT_M.HIT_OTHER_CORE_NO_FWD",
         "Counter": "0,1,2,3",
         "CounterHTOff": "0,1,2,3",
         "EventCode": "0xB7, 0xBB",
-        "EventName": "OCR.PF_L3_RFO.L3_HIT.SNOOP_NONE",
+        "EventName": "OCR.ALL_PF_RFO.L3_HIT_M.HIT_OTHER_CORE_NO_FWD",
         "MSRIndex": "0x1a6,0x1a7",
-        "MSRValue": "0x00803C0100",
+        "MSRValue": "0x0400040120",
         "Offcore": "1",
         "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
         "SampleAfterValue": "100003",
         "UMask": "0x1"
     },
     {
-        "BriefDescription": "Counts demand data reads  OCR.DEMAND_DATA_RD.L3_HIT_E.HIT_OTHER_CORE_FWD",
+        "BriefDescription": "OCR.ALL_PF_RFO.L3_HIT_M.NO_SNOOP_NEEDED  OCR.ALL_PF_RFO.L3_HIT_M.NO_SNOOP_NEEDED",
         "Counter": "0,1,2,3",
         "CounterHTOff": "0,1,2,3",
         "EventCode": "0xB7, 0xBB",
-        "EventName": "OCR.DEMAND_DATA_RD.L3_HIT_E.HIT_OTHER_CORE_FWD",
+        "EventName": "OCR.ALL_PF_RFO.L3_HIT_M.NO_SNOOP_NEEDED",
         "MSRIndex": "0x1a6,0x1a7",
-        "MSRValue": "0x0800080001",
+        "MSRValue": "0x0100040120",
         "Offcore": "1",
         "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
         "SampleAfterValue": "100003",
         "UMask": "0x1"
     },
     {
-        "BriefDescription": "Counts demand data reads  OCR.DEMAND_DATA_RD.L3_HIT_E.NO_SNOOP_NEEDED",
+        "BriefDescription": "OCR.ALL_PF_RFO.L3_HIT_M.SNOOP_MISS",
         "Counter": "0,1,2,3",
         "CounterHTOff": "0,1,2,3",
         "EventCode": "0xB7, 0xBB",
-        "EventName": "OCR.DEMAND_DATA_RD.L3_HIT_E.NO_SNOOP_NEEDED",
+        "EventName": "OCR.ALL_PF_RFO.L3_HIT_M.SNOOP_MISS",
         "MSRIndex": "0x1a6,0x1a7",
-        "MSRValue": "0x0100080001",
+        "MSRValue": "0x0200040120",
         "Offcore": "1",
         "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
         "SampleAfterValue": "100003",
         "UMask": "0x1"
     },
     {
-        "BriefDescription": "Counts any other requests  OCR.OTHER.L3_HIT_E.HIT_OTHER_CORE_NO_FWD",
+        "BriefDescription": "OCR.ALL_PF_RFO.L3_HIT_M.SNOOP_NONE",
         "Counter": "0,1,2,3",
         "CounterHTOff": "0,1,2,3",
         "EventCode": "0xB7, 0xBB",
-        "EventName": "OCR.OTHER.L3_HIT_E.HIT_OTHER_CORE_NO_FWD",
+        "EventName": "OCR.ALL_PF_RFO.L3_HIT_M.SNOOP_NONE",
         "MSRIndex": "0x1a6,0x1a7",
-        "MSRValue": "0x0400088000",
+        "MSRValue": "0x0080040120",
         "Offcore": "1",
         "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
         "SampleAfterValue": "100003",
         "UMask": "0x1"
     },
     {
-        "BriefDescription": "OCR.ALL_RFO.L3_HIT_M.HITM_OTHER_CORE  OCR.ALL_RFO.L3_HIT_M.HITM_OTHER_CORE",
+        "BriefDescription": "OCR.ALL_PF_RFO.L3_HIT_S.ANY_SNOOP  OCR.ALL_PF_RFO.L3_HIT_S.ANY_SNOOP",
         "Counter": "0,1,2,3",
         "CounterHTOff": "0,1,2,3",
         "EventCode": "0xB7, 0xBB",
-        "EventName": "OCR.ALL_RFO.L3_HIT_M.HITM_OTHER_CORE",
+        "EventName": "OCR.ALL_PF_RFO.L3_HIT_S.ANY_SNOOP",
         "MSRIndex": "0x1a6,0x1a7",
-        "MSRValue": "0x1000040122",
+        "MSRValue": "0x3F80100120",
         "Offcore": "1",
         "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
         "SampleAfterValue": "100003",
         "UMask": "0x1"
     },
     {
-        "BriefDescription": "Counts demand data reads",
+        "BriefDescription": "OCR.ALL_PF_RFO.L3_HIT_S.HITM_OTHER_CORE  OCR.ALL_PF_RFO.L3_HIT_S.HITM_OTHER_CORE",
         "Counter": "0,1,2,3",
         "CounterHTOff": "0,1,2,3",
         "EventCode": "0xB7, 0xBB",
-        "EventName": "OCR.DEMAND_DATA_RD.L3_HIT_M.SNOOP_MISS",
+        "EventName": "OCR.ALL_PF_RFO.L3_HIT_S.HITM_OTHER_CORE",
         "MSRIndex": "0x1a6,0x1a7",
-        "MSRValue": "0x0200040001",
+        "MSRValue": "0x1000100120",
         "Offcore": "1",
         "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
         "SampleAfterValue": "100003",
         "UMask": "0x1"
     },
     {
-        "BriefDescription": "OCR.ALL_RFO.ANY_RESPONSE have any response type.",
+        "BriefDescription": "OCR.ALL_PF_RFO.L3_HIT_S.HIT_OTHER_CORE_FWD  OCR.ALL_PF_RFO.L3_HIT_S.HIT_OTHER_CORE_FWD",
         "Counter": "0,1,2,3",
         "CounterHTOff": "0,1,2,3",
         "EventCode": "0xB7, 0xBB",
-        "EventName": "OCR.ALL_RFO.ANY_RESPONSE",
+        "EventName": "OCR.ALL_PF_RFO.L3_HIT_S.HIT_OTHER_CORE_FWD",
         "MSRIndex": "0x1a6,0x1a7",
-        "MSRValue": "0x0000010122",
+        "MSRValue": "0x0800100120",
         "Offcore": "1",
         "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
         "SampleAfterValue": "100003",
         "UMask": "0x1"
     },
     {
-        "BriefDescription": "Counts all demand code reads have any response type.",
+        "BriefDescription": "OCR.ALL_PF_RFO.L3_HIT_S.HIT_OTHER_CORE_NO_FWD  OCR.ALL_PF_RFO.L3_HIT_S.HIT_OTHER_CORE_NO_FWD",
         "Counter": "0,1,2,3",
         "CounterHTOff": "0,1,2,3",
         "EventCode": "0xB7, 0xBB",
-        "EventName": "OCR.DEMAND_CODE_RD.ANY_RESPONSE",
+        "EventName": "OCR.ALL_PF_RFO.L3_HIT_S.HIT_OTHER_CORE_NO_FWD",
         "MSRIndex": "0x1a6,0x1a7",
-        "MSRValue": "0x0000010004",
+        "MSRValue": "0x0400100120",
         "Offcore": "1",
         "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
         "SampleAfterValue": "100003",
         "UMask": "0x1"
     },
     {
-        "BriefDescription": "Counts all demand code reads",
+        "BriefDescription": "OCR.ALL_PF_RFO.L3_HIT_S.NO_SNOOP_NEEDED  OCR.ALL_PF_RFO.L3_HIT_S.NO_SNOOP_NEEDED",
         "Counter": "0,1,2,3",
         "CounterHTOff": "0,1,2,3",
         "EventCode": "0xB7, 0xBB",
-        "EventName": "OCR.DEMAND_CODE_RD.L3_HIT_E.SNOOP_MISS",
+        "EventName": "OCR.ALL_PF_RFO.L3_HIT_S.NO_SNOOP_NEEDED",
         "MSRIndex": "0x1a6,0x1a7",
-        "MSRValue": "0x0200080004",
+        "MSRValue": "0x0100100120",
         "Offcore": "1",
         "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
         "SampleAfterValue": "100003",
         "UMask": "0x1"
     },
     {
-        "BriefDescription": "Counts all prefetch (that bring data to LLC only) RFOs",
+        "BriefDescription": "OCR.ALL_PF_RFO.L3_HIT_S.SNOOP_MISS",
         "Counter": "0,1,2,3",
         "CounterHTOff": "0,1,2,3",
         "EventCode": "0xB7, 0xBB",
-        "EventName": "OCR.PF_L3_RFO.L3_HIT_M.SNOOP_MISS",
+        "EventName": "OCR.ALL_PF_RFO.L3_HIT_S.SNOOP_MISS",
         "MSRIndex": "0x1a6,0x1a7",
-        "MSRValue": "0x0200040100",
+        "MSRValue": "0x0200100120",
         "Offcore": "1",
         "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
         "SampleAfterValue": "100003",
         "UMask": "0x1"
     },
     {
-        "BriefDescription": "Counts all demand data writes (RFOs)",
+        "BriefDescription": "OCR.ALL_PF_RFO.L3_HIT_S.SNOOP_NONE",
         "Counter": "0,1,2,3",
         "CounterHTOff": "0,1,2,3",
         "EventCode": "0xB7, 0xBB",
-        "EventName": "OCR.DEMAND_RFO.L3_HIT.SNOOP_HIT_WITH_FWD",
+        "EventName": "OCR.ALL_PF_RFO.L3_HIT_S.SNOOP_NONE",
         "MSRIndex": "0x1a6,0x1a7",
-        "MSRValue": "0x08007C0002",
+        "MSRValue": "0x0080100120",
         "Offcore": "1",
         "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
         "SampleAfterValue": "100003",
         "UMask": "0x1"
     },
     {
-        "BriefDescription": "Counts L1 data cache hardware prefetch requests and software prefetch requests OCR.PF_L1D_AND_SW.PMM_HIT_LOCAL_PMM.ANY_SNOOP",
+        "BriefDescription": "OCR.ALL_PF_RFO.PMM_HIT_LOCAL_PMM.ANY_SNOOP OCR.ALL_PF_RFO.PMM_HIT_LOCAL_PMM.ANY_SNOOP",
         "Counter": "0,1,2,3",
         "CounterHTOff": "0,1,2,3",
         "EventCode": "0xB7, 0xBB",
-        "EventName": "OCR.PF_L1D_AND_SW.PMM_HIT_LOCAL_PMM.ANY_SNOOP",
+        "EventName": "OCR.ALL_PF_RFO.PMM_HIT_LOCAL_PMM.ANY_SNOOP",
         "MSRIndex": "0x1a6,0x1a7",
-        "MSRValue": "0x3F80400400",
+        "MSRValue": "0x3F80400120",
         "Offcore": "1",
         "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
         "SampleAfterValue": "100003",
         "UMask": "0x1"
     },
     {
-        "BriefDescription": "Counts demand data reads OCR.DEMAND_DATA_RD.L3_HIT.HIT_OTHER_CORE_FWD OCR.DEMAND_DATA_RD.L3_HIT.HIT_OTHER_CORE_FWD",
+        "BriefDescription": "OCR.ALL_PF_RFO.PMM_HIT_LOCAL_PMM.SNOOP_NONE OCR.ALL_PF_RFO.PMM_HIT_LOCAL_PMM.SNOOP_NONE",
         "Counter": "0,1,2,3",
         "CounterHTOff": "0,1,2,3",
         "EventCode": "0xB7, 0xBB",
-        "EventName": "OCR.DEMAND_DATA_RD.L3_HIT.HIT_OTHER_CORE_FWD",
+        "EventName": "OCR.ALL_PF_RFO.PMM_HIT_LOCAL_PMM.SNOOP_NONE",
         "MSRIndex": "0x1a6,0x1a7",
-        "MSRValue": "0x08003C0001",
+        "MSRValue": "0x0080400120",
         "Offcore": "1",
         "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
         "SampleAfterValue": "100003",
         "UMask": "0x1"
     },
     {
-        "BriefDescription": "Counts all prefetch (that bring data to L2) RFOs OCR.PF_L2_RFO.L3_HIT.ANY_SNOOP OCR.PF_L2_RFO.L3_HIT.ANY_SNOOP",
+        "BriefDescription": "OCR.ALL_PF_RFO.PMM_HIT_LOCAL_PMM.SNOOP_NOT_NEEDED OCR.ALL_PF_RFO.PMM_HIT_LOCAL_PMM.SNOOP_NOT_NEEDED",
         "Counter": "0,1,2,3",
         "CounterHTOff": "0,1,2,3",
         "EventCode": "0xB7, 0xBB",
-        "EventName": "OCR.PF_L2_RFO.L3_HIT.ANY_SNOOP",
+        "EventName": "OCR.ALL_PF_RFO.PMM_HIT_LOCAL_PMM.SNOOP_NOT_NEEDED",
         "MSRIndex": "0x1a6,0x1a7",
-        "MSRValue": "0x3F803C0020",
+        "MSRValue": "0x0100400120",
         "Offcore": "1",
         "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
         "SampleAfterValue": "100003",
         "UMask": "0x1"
     },
     {
-        "BriefDescription": "Counts all prefetch (that bring data to L2) RFOs  OCR.PF_L2_RFO.L3_HIT_E.HIT_OTHER_CORE_NO_FWD",
+        "BriefDescription": "OCR.ALL_PF_RFO.SUPPLIER_NONE.ANY_SNOOP  OCR.ALL_PF_RFO.SUPPLIER_NONE.ANY_SNOOP",
         "Counter": "0,1,2,3",
         "CounterHTOff": "0,1,2,3",
         "EventCode": "0xB7, 0xBB",
-        "EventName": "OCR.PF_L2_RFO.L3_HIT_E.HIT_OTHER_CORE_NO_FWD",
+        "EventName": "OCR.ALL_PF_RFO.SUPPLIER_NONE.ANY_SNOOP",
         "MSRIndex": "0x1a6,0x1a7",
-        "MSRValue": "0x0400080020",
+        "MSRValue": "0x3F80020120",
         "Offcore": "1",
         "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
         "SampleAfterValue": "100003",
         "UMask": "0x1"
     },
     {
-        "BriefDescription": "Counts all prefetch (that bring data to L2) RFOs OCR.PF_L2_RFO.PMM_HIT_LOCAL_PMM.SNOOP_NOT_NEEDED",
+        "BriefDescription": "OCR.ALL_PF_RFO.SUPPLIER_NONE.HITM_OTHER_CORE  OCR.ALL_PF_RFO.SUPPLIER_NONE.HITM_OTHER_CORE",
         "Counter": "0,1,2,3",
         "CounterHTOff": "0,1,2,3",
         "EventCode": "0xB7, 0xBB",
-        "EventName": "OCR.PF_L2_RFO.PMM_HIT_LOCAL_PMM.SNOOP_NOT_NEEDED",
+        "EventName": "OCR.ALL_PF_RFO.SUPPLIER_NONE.HITM_OTHER_CORE",
         "MSRIndex": "0x1a6,0x1a7",
-        "MSRValue": "0x0100400020",
+        "MSRValue": "0x1000020120",
         "Offcore": "1",
         "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
         "SampleAfterValue": "100003",
         "UMask": "0x1"
     },
     {
-        "BriefDescription": "Counts all prefetch (that bring data to L2) RFOs  OCR.PF_L2_RFO.L3_HIT_F.ANY_SNOOP",
+        "BriefDescription": "OCR.ALL_PF_RFO.SUPPLIER_NONE.HIT_OTHER_CORE_FWD  OCR.ALL_PF_RFO.SUPPLIER_NONE.HIT_OTHER_CORE_FWD",
         "Counter": "0,1,2,3",
         "CounterHTOff": "0,1,2,3",
         "EventCode": "0xB7, 0xBB",
-        "EventName": "OCR.PF_L2_RFO.L3_HIT_F.ANY_SNOOP",
+        "EventName": "OCR.ALL_PF_RFO.SUPPLIER_NONE.HIT_OTHER_CORE_FWD",
         "MSRIndex": "0x1a6,0x1a7",
-        "MSRValue": "0x3F80200020",
+        "MSRValue": "0x0800020120",
         "Offcore": "1",
         "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
         "SampleAfterValue": "100003",
         "UMask": "0x1"
     },
     {
-        "BriefDescription": "Counts all prefetch (that bring data to LLC only) data reads  OCR.PF_L3_DATA_RD.SUPPLIER_NONE.HITM_OTHER_CORE",
+        "BriefDescription": "OCR.ALL_PF_RFO.SUPPLIER_NONE.HIT_OTHER_CORE_NO_FWD  OCR.ALL_PF_RFO.SUPPLIER_NONE.HIT_OTHER_CORE_NO_FWD",
         "Counter": "0,1,2,3",
         "CounterHTOff": "0,1,2,3",
         "EventCode": "0xB7, 0xBB",
-        "EventName": "OCR.PF_L3_DATA_RD.SUPPLIER_NONE.HITM_OTHER_CORE",
+        "EventName": "OCR.ALL_PF_RFO.SUPPLIER_NONE.HIT_OTHER_CORE_NO_FWD",
         "MSRIndex": "0x1a6,0x1a7",
-        "MSRValue": "0x1000020080",
+        "MSRValue": "0x0400020120",
         "Offcore": "1",
         "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
         "SampleAfterValue": "100003",
         "UMask": "0x1"
     },
     {
-        "BriefDescription": "OCR.ALL_READS.L3_HIT.SNOOP_MISS OCR.ALL_READS.L3_HIT.SNOOP_MISS",
+        "BriefDescription": "OCR.ALL_PF_RFO.SUPPLIER_NONE.NO_SNOOP_NEEDED  OCR.ALL_PF_RFO.SUPPLIER_NONE.NO_SNOOP_NEEDED",
         "Counter": "0,1,2,3",
         "CounterHTOff": "0,1,2,3",
         "EventCode": "0xB7, 0xBB",
-        "EventName": "OCR.ALL_READS.L3_HIT.SNOOP_MISS",
+        "EventName": "OCR.ALL_PF_RFO.SUPPLIER_NONE.NO_SNOOP_NEEDED",
         "MSRIndex": "0x1a6,0x1a7",
-        "MSRValue": "0x02003C07F7",
+        "MSRValue": "0x0100020120",
         "Offcore": "1",
         "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
         "SampleAfterValue": "100003",
         "UMask": "0x1"
     },
     {
-        "BriefDescription": "OCR.ALL_PF_DATA_RD.SUPPLIER_NONE.SNOOP_MISS",
+        "BriefDescription": "OCR.ALL_PF_RFO.SUPPLIER_NONE.SNOOP_MISS",
         "Counter": "0,1,2,3",
         "CounterHTOff": "0,1,2,3",
         "EventCode": "0xB7, 0xBB",
-        "EventName": "OCR.ALL_PF_DATA_RD.SUPPLIER_NONE.SNOOP_MISS",
+        "EventName": "OCR.ALL_PF_RFO.SUPPLIER_NONE.SNOOP_MISS",
         "MSRIndex": "0x1a6,0x1a7",
-        "MSRValue": "0x0200020490",
+        "MSRValue": "0x0200020120",
         "Offcore": "1",
         "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
         "SampleAfterValue": "100003",
         "UMask": "0x1"
     },
     {
-        "BriefDescription": "Counts demand data reads  OCR.DEMAND_DATA_RD.L3_HIT_F.HIT_OTHER_CORE_NO_FWD",
+        "BriefDescription": "OCR.ALL_PF_RFO.SUPPLIER_NONE.SNOOP_NONE",
         "Counter": "0,1,2,3",
         "CounterHTOff": "0,1,2,3",
         "EventCode": "0xB7, 0xBB",
-        "EventName": "OCR.DEMAND_DATA_RD.L3_HIT_F.HIT_OTHER_CORE_NO_FWD",
+        "EventName": "OCR.ALL_PF_RFO.SUPPLIER_NONE.SNOOP_NONE",
         "MSRIndex": "0x1a6,0x1a7",
-        "MSRValue": "0x0400200001",
+        "MSRValue": "0x0080020120",
         "Offcore": "1",
         "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
         "SampleAfterValue": "100003",
         "UMask": "0x1"
     },
     {
-        "BriefDescription": "Counts prefetch (that bring data to L2) data reads OCR.PF_L2_DATA_RD.PMM_HIT_LOCAL_PMM.SNOOP_NONE",
+        "BriefDescription": "OCR.ALL_READS.ANY_RESPONSE have any response type.",
         "Counter": "0,1,2,3",
         "CounterHTOff": "0,1,2,3",
         "EventCode": "0xB7, 0xBB",
-        "EventName": "OCR.PF_L2_DATA_RD.PMM_HIT_LOCAL_PMM.SNOOP_NONE",
+        "EventName": "OCR.ALL_READS.ANY_RESPONSE",
         "MSRIndex": "0x1a6,0x1a7",
-        "MSRValue": "0x0080400010",
+        "MSRValue": "0x00000107F7",
         "Offcore": "1",
         "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
         "SampleAfterValue": "100003",
         "UMask": "0x1"
     },
     {
-        "BriefDescription": "Counts all prefetch (that bring data to LLC only) RFOs",
+        "BriefDescription": "OCR.ALL_READS.L3_HIT.ANY_SNOOP OCR.ALL_READS.L3_HIT.ANY_SNOOP OCR.ALL_READS.L3_HIT.ANY_SNOOP",
         "Counter": "0,1,2,3",
         "CounterHTOff": "0,1,2,3",
         "EventCode": "0xB7, 0xBB",
-        "EventName": "OCR.PF_L3_RFO.L3_HIT_E.SNOOP_MISS",
+        "EventName": "OCR.ALL_READS.L3_HIT.ANY_SNOOP",
         "MSRIndex": "0x1a6,0x1a7",
-        "MSRValue": "0x0200080100",
+        "MSRValue": "0x3F803C07F7",
         "Offcore": "1",
         "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
         "SampleAfterValue": "100003",
         "UMask": "0x1"
     },
     {
-        "BriefDescription": "Counts all demand data writes (RFOs)",
+        "BriefDescription": "OCR.ALL_READS.L3_HIT.HITM_OTHER_CORE OCR.ALL_READS.L3_HIT.HITM_OTHER_CORE OCR.ALL_READS.L3_HIT.HITM_OTHER_CORE",
         "Counter": "0,1,2,3",
         "CounterHTOff": "0,1,2,3",
         "EventCode": "0xB7, 0xBB",
-        "EventName": "OCR.DEMAND_RFO.L3_HIT_F.SNOOP_NONE",
+        "EventName": "OCR.ALL_READS.L3_HIT.HITM_OTHER_CORE",
         "MSRIndex": "0x1a6,0x1a7",
-        "MSRValue": "0x0080200002",
+        "MSRValue": "0x10003C07F7",
         "Offcore": "1",
         "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
         "SampleAfterValue": "100003",
         "UMask": "0x1"
     },
     {
-        "BriefDescription": "Counts all prefetch (that bring data to L2) RFOs",
+        "BriefDescription": "OCR.ALL_READS.L3_HIT.HIT_OTHER_CORE_FWD OCR.ALL_READS.L3_HIT.HIT_OTHER_CORE_FWD OCR.ALL_READS.L3_HIT.HIT_OTHER_CORE_FWD",
         "Counter": "0,1,2,3",
         "CounterHTOff": "0,1,2,3",
         "EventCode": "0xB7, 0xBB",
-        "EventName": "OCR.PF_L2_RFO.SUPPLIER_NONE.SNOOP_NONE",
+        "EventName": "OCR.ALL_READS.L3_HIT.HIT_OTHER_CORE_FWD",
         "MSRIndex": "0x1a6,0x1a7",
-        "MSRValue": "0x0080020020",
+        "MSRValue": "0x08003C07F7",
         "Offcore": "1",
         "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
         "SampleAfterValue": "100003",
         "UMask": "0x1"
     },
     {
-        "BriefDescription": "Counts any other requests  OCR.OTHER.L3_HIT_E.NO_SNOOP_NEEDED",
+        "BriefDescription": "OCR.ALL_READS.L3_HIT.HIT_OTHER_CORE_NO_FWD OCR.ALL_READS.L3_HIT.HIT_OTHER_CORE_NO_FWD OCR.ALL_READS.L3_HIT.HIT_OTHER_CORE_NO_FWD",
         "Counter": "0,1,2,3",
         "CounterHTOff": "0,1,2,3",
         "EventCode": "0xB7, 0xBB",
-        "EventName": "OCR.OTHER.L3_HIT_E.NO_SNOOP_NEEDED",
+        "EventName": "OCR.ALL_READS.L3_HIT.HIT_OTHER_CORE_NO_FWD",
         "MSRIndex": "0x1a6,0x1a7",
-        "MSRValue": "0x0100088000",
+        "MSRValue": "0x04003C07F7",
         "Offcore": "1",
         "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
         "SampleAfterValue": "100003",
         "UMask": "0x1"
     },
     {
-        "BriefDescription": "Counts all prefetch (that bring data to LLC only) RFOs  OCR.PF_L3_RFO.L3_HIT_F.HIT_OTHER_CORE_NO_FWD",
+        "BriefDescription": "OCR.ALL_READS.L3_HIT.NO_SNOOP_NEEDED OCR.ALL_READS.L3_HIT.NO_SNOOP_NEEDED OCR.ALL_READS.L3_HIT.NO_SNOOP_NEEDED",
         "Counter": "0,1,2,3",
         "CounterHTOff": "0,1,2,3",
         "EventCode": "0xB7, 0xBB",
-        "EventName": "OCR.PF_L3_RFO.L3_HIT_F.HIT_OTHER_CORE_NO_FWD",
+        "EventName": "OCR.ALL_READS.L3_HIT.NO_SNOOP_NEEDED",
         "MSRIndex": "0x1a6,0x1a7",
-        "MSRValue": "0x0400200100",
+        "MSRValue": "0x01003C07F7",
         "Offcore": "1",
         "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
         "SampleAfterValue": "100003",
         "UMask": "0x1"
     },
     {
-        "BriefDescription": "OCR.ALL_PF_DATA_RD.L3_HIT_F.ANY_SNOOP  OCR.ALL_PF_DATA_RD.L3_HIT_F.ANY_SNOOP",
+        "BriefDescription": "OCR.ALL_READS.L3_HIT.SNOOP_HIT_WITH_FWD",
         "Counter": "0,1,2,3",
         "CounterHTOff": "0,1,2,3",
         "EventCode": "0xB7, 0xBB",
-        "EventName": "OCR.ALL_PF_DATA_RD.L3_HIT_F.ANY_SNOOP",
+        "EventName": "OCR.ALL_READS.L3_HIT.SNOOP_HIT_WITH_FWD",
         "MSRIndex": "0x1a6,0x1a7",
-        "MSRValue": "0x3F80200490",
+        "MSRValue": "0x08007C07F7",
         "Offcore": "1",
         "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
         "SampleAfterValue": "100003",
         "UMask": "0x1"
     },
     {
-        "BriefDescription": "OCR.ALL_PF_RFO.L3_HIT_M.HIT_OTHER_CORE_FWD  OCR.ALL_PF_RFO.L3_HIT_M.HIT_OTHER_CORE_FWD",
+        "BriefDescription": "OCR.ALL_READS.L3_HIT.SNOOP_MISS OCR.ALL_READS.L3_HIT.SNOOP_MISS",
         "Counter": "0,1,2,3",
         "CounterHTOff": "0,1,2,3",
         "EventCode": "0xB7, 0xBB",
-        "EventName": "OCR.ALL_PF_RFO.L3_HIT_M.HIT_OTHER_CORE_FWD",
+        "EventName": "OCR.ALL_READS.L3_HIT.SNOOP_MISS",
         "MSRIndex": "0x1a6,0x1a7",
-        "MSRValue": "0x0800040120",
+        "MSRValue": "0x02003C07F7",
         "Offcore": "1",
         "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
         "SampleAfterValue": "100003",
         "UMask": "0x1"
     },
     {
-        "BriefDescription": "OCR.ALL_READS.SUPPLIER_NONE.ANY_SNOOP  OCR.ALL_READS.SUPPLIER_NONE.ANY_SNOOP",
+        "BriefDescription": "OCR.ALL_READS.L3_HIT.SNOOP_NONE OCR.ALL_READS.L3_HIT.SNOOP_NONE",
         "Counter": "0,1,2,3",
         "CounterHTOff": "0,1,2,3",
         "EventCode": "0xB7, 0xBB",
-        "EventName": "OCR.ALL_READS.SUPPLIER_NONE.ANY_SNOOP",
+        "EventName": "OCR.ALL_READS.L3_HIT.SNOOP_NONE",
         "MSRIndex": "0x1a6,0x1a7",
-        "MSRValue": "0x3F800207F7",
+        "MSRValue": "0x00803C07F7",
         "Offcore": "1",
         "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
         "SampleAfterValue": "100003",
         "UMask": "0x1"
     },
     {
-        "BriefDescription": "Counts all prefetch (that bring data to LLC only) data reads  OCR.PF_L3_DATA_RD.L3_HIT_F.ANY_SNOOP",
+        "BriefDescription": "OCR.ALL_READS.L3_HIT_E.ANY_SNOOP  OCR.ALL_READS.L3_HIT_E.ANY_SNOOP",
         "Counter": "0,1,2,3",
         "CounterHTOff": "0,1,2,3",
         "EventCode": "0xB7, 0xBB",
-        "EventName": "OCR.PF_L3_DATA_RD.L3_HIT_F.ANY_SNOOP",
+        "EventName": "OCR.ALL_READS.L3_HIT_E.ANY_SNOOP",
         "MSRIndex": "0x1a6,0x1a7",
-        "MSRValue": "0x3F80200080",
+        "MSRValue": "0x3F800807F7",
         "Offcore": "1",
         "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
         "SampleAfterValue": "100003",
         "UMask": "0x1"
     },
     {
-        "BriefDescription": "Counts prefetch (that bring data to L2) data reads OCR.PF_L2_DATA_RD.L3_HIT.SNOOP_NONE",
+        "BriefDescription": "OCR.ALL_READS.L3_HIT_E.HITM_OTHER_CORE  OCR.ALL_READS.L3_HIT_E.HITM_OTHER_CORE",
         "Counter": "0,1,2,3",
         "CounterHTOff": "0,1,2,3",
         "EventCode": "0xB7, 0xBB",
-        "EventName": "OCR.PF_L2_DATA_RD.L3_HIT.SNOOP_NONE",
+        "EventName": "OCR.ALL_READS.L3_HIT_E.HITM_OTHER_CORE",
         "MSRIndex": "0x1a6,0x1a7",
-        "MSRValue": "0x00803C0010",
+        "MSRValue": "0x10000807F7",
         "Offcore": "1",
         "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
         "SampleAfterValue": "100003",
         "UMask": "0x1"
     },
     {
-        "BriefDescription": "OCR.ALL_DATA_RD.L3_HIT_S.NO_SNOOP_NEEDED  OCR.ALL_DATA_RD.L3_HIT_S.NO_SNOOP_NEEDED",
+        "BriefDescription": "OCR.ALL_READS.L3_HIT_E.HIT_OTHER_CORE_FWD  OCR.ALL_READS.L3_HIT_E.HIT_OTHER_CORE_FWD",
         "Counter": "0,1,2,3",
         "CounterHTOff": "0,1,2,3",
         "EventCode": "0xB7, 0xBB",
-        "EventName": "OCR.ALL_DATA_RD.L3_HIT_S.NO_SNOOP_NEEDED",
+        "EventName": "OCR.ALL_READS.L3_HIT_E.HIT_OTHER_CORE_FWD",
         "MSRIndex": "0x1a6,0x1a7",
-        "MSRValue": "0x0100100491",
+        "MSRValue": "0x08000807F7",
         "Offcore": "1",
         "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
         "SampleAfterValue": "100003",
         "UMask": "0x1"
     },
     {
-        "BriefDescription": "OCR.ALL_PF_RFO.SUPPLIER_NONE.SNOOP_MISS",
+        "BriefDescription": "OCR.ALL_READS.L3_HIT_E.HIT_OTHER_CORE_NO_FWD  OCR.ALL_READS.L3_HIT_E.HIT_OTHER_CORE_NO_FWD",
         "Counter": "0,1,2,3",
         "CounterHTOff": "0,1,2,3",
         "EventCode": "0xB7, 0xBB",
-        "EventName": "OCR.ALL_PF_RFO.SUPPLIER_NONE.SNOOP_MISS",
+        "EventName": "OCR.ALL_READS.L3_HIT_E.HIT_OTHER_CORE_NO_FWD",
         "MSRIndex": "0x1a6,0x1a7",
-        "MSRValue": "0x0200020120",
+        "MSRValue": "0x04000807F7",
         "Offcore": "1",
         "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
         "SampleAfterValue": "100003",
         "UMask": "0x1"
     },
     {
-        "BriefDescription": "OCR.ALL_RFO.L3_HIT_M.HIT_OTHER_CORE_NO_FWD  OCR.ALL_RFO.L3_HIT_M.HIT_OTHER_CORE_NO_FWD",
+        "BriefDescription": "OCR.ALL_READS.L3_HIT_E.NO_SNOOP_NEEDED  OCR.ALL_READS.L3_HIT_E.NO_SNOOP_NEEDED",
         "Counter": "0,1,2,3",
         "CounterHTOff": "0,1,2,3",
         "EventCode": "0xB7, 0xBB",
-        "EventName": "OCR.ALL_RFO.L3_HIT_M.HIT_OTHER_CORE_NO_FWD",
+        "EventName": "OCR.ALL_READS.L3_HIT_E.NO_SNOOP_NEEDED",
         "MSRIndex": "0x1a6,0x1a7",
-        "MSRValue": "0x0400040122",
+        "MSRValue": "0x01000807F7",
         "Offcore": "1",
         "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
         "SampleAfterValue": "100003",
         "UMask": "0x1"
     },
     {
-        "BriefDescription": "OCR.ALL_READS.SUPPLIER_NONE.SNOOP_NONE",
+        "BriefDescription": "OCR.ALL_READS.L3_HIT_E.SNOOP_MISS",
         "Counter": "0,1,2,3",
         "CounterHTOff": "0,1,2,3",
         "EventCode": "0xB7, 0xBB",
-        "EventName": "OCR.ALL_READS.SUPPLIER_NONE.SNOOP_NONE",
+        "EventName": "OCR.ALL_READS.L3_HIT_E.SNOOP_MISS",
         "MSRIndex": "0x1a6,0x1a7",
-        "MSRValue": "0x00800207F7",
+        "MSRValue": "0x02000807F7",
         "Offcore": "1",
         "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
         "SampleAfterValue": "100003",
         "UMask": "0x1"
     },
     {
-        "BriefDescription": "OCR.ALL_DATA_RD.L3_HIT_S.HIT_OTHER_CORE_FWD  OCR.ALL_DATA_RD.L3_HIT_S.HIT_OTHER_CORE_FWD",
+        "BriefDescription": "OCR.ALL_READS.L3_HIT_E.SNOOP_NONE",
         "Counter": "0,1,2,3",
         "CounterHTOff": "0,1,2,3",
         "EventCode": "0xB7, 0xBB",
-        "EventName": "OCR.ALL_DATA_RD.L3_HIT_S.HIT_OTHER_CORE_FWD",
+        "EventName": "OCR.ALL_READS.L3_HIT_E.SNOOP_NONE",
         "MSRIndex": "0x1a6,0x1a7",
-        "MSRValue": "0x0800100491",
+        "MSRValue": "0x00800807F7",
         "Offcore": "1",
         "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
         "SampleAfterValue": "100003",
         "UMask": "0x1"
     },
     {
-        "BriefDescription": "Counts L1 data cache hardware prefetch requests and software prefetch requests",
+        "BriefDescription": "OCR.ALL_READS.L3_HIT_F.ANY_SNOOP  OCR.ALL_READS.L3_HIT_F.ANY_SNOOP",
         "Counter": "0,1,2,3",
         "CounterHTOff": "0,1,2,3",
         "EventCode": "0xB7, 0xBB",
-        "EventName": "OCR.PF_L1D_AND_SW.SUPPLIER_NONE.SNOOP_MISS",
+        "EventName": "OCR.ALL_READS.L3_HIT_F.ANY_SNOOP",
         "MSRIndex": "0x1a6,0x1a7",
-        "MSRValue": "0x0200020400",
+        "MSRValue": "0x3F802007F7",
         "Offcore": "1",
         "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
         "SampleAfterValue": "100003",
         "UMask": "0x1"
     },
     {
-        "BriefDescription": "OCR.ALL_RFO.PMM_HIT_LOCAL_PMM.SNOOP_NONE OCR.ALL_RFO.PMM_HIT_LOCAL_PMM.SNOOP_NONE",
+        "BriefDescription": "OCR.ALL_READS.L3_HIT_F.HITM_OTHER_CORE  OCR.ALL_READS.L3_HIT_F.HITM_OTHER_CORE",
         "Counter": "0,1,2,3",
         "CounterHTOff": "0,1,2,3",
         "EventCode": "0xB7, 0xBB",
-        "EventName": "OCR.ALL_RFO.PMM_HIT_LOCAL_PMM.SNOOP_NONE",
+        "EventName": "OCR.ALL_READS.L3_HIT_F.HITM_OTHER_CORE",
         "MSRIndex": "0x1a6,0x1a7",
-        "MSRValue": "0x0080400122",
+        "MSRValue": "0x10002007F7",
         "Offcore": "1",
         "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
         "SampleAfterValue": "100003",
         "UMask": "0x1"
     },
     {
-        "BriefDescription": "OCR.ALL_PF_DATA_RD.SUPPLIER_NONE.SNOOP_NONE",
+        "BriefDescription": "OCR.ALL_READS.L3_HIT_F.HIT_OTHER_CORE_FWD  OCR.ALL_READS.L3_HIT_F.HIT_OTHER_CORE_FWD",
         "Counter": "0,1,2,3",
         "CounterHTOff": "0,1,2,3",
         "EventCode": "0xB7, 0xBB",
-        "EventName": "OCR.ALL_PF_DATA_RD.SUPPLIER_NONE.SNOOP_NONE",
+        "EventName": "OCR.ALL_READS.L3_HIT_F.HIT_OTHER_CORE_FWD",
         "MSRIndex": "0x1a6,0x1a7",
-        "MSRValue": "0x0080020490",
+        "MSRValue": "0x08002007F7",
         "Offcore": "1",
         "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
         "SampleAfterValue": "100003",
         "UMask": "0x1"
     },
     {
-        "BriefDescription": "OCR.ALL_READS.L3_HIT_E.SNOOP_MISS",
+        "BriefDescription": "OCR.ALL_READS.L3_HIT_F.HIT_OTHER_CORE_NO_FWD  OCR.ALL_READS.L3_HIT_F.HIT_OTHER_CORE_NO_FWD",
         "Counter": "0,1,2,3",
         "CounterHTOff": "0,1,2,3",
         "EventCode": "0xB7, 0xBB",
-        "EventName": "OCR.ALL_READS.L3_HIT_E.SNOOP_MISS",
+        "EventName": "OCR.ALL_READS.L3_HIT_F.HIT_OTHER_CORE_NO_FWD",
         "MSRIndex": "0x1a6,0x1a7",
-        "MSRValue": "0x02000807F7",
+        "MSRValue": "0x04002007F7",
         "Offcore": "1",
         "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
         "SampleAfterValue": "100003",
         "UMask": "0x1"
     },
     {
-        "BriefDescription": "Counts all prefetch (that bring data to LLC only) RFOs OCR.PF_L3_RFO.PMM_HIT_LOCAL_PMM.SNOOP_NOT_NEEDED",
+        "BriefDescription": "OCR.ALL_READS.L3_HIT_F.NO_SNOOP_NEEDED  OCR.ALL_READS.L3_HIT_F.NO_SNOOP_NEEDED",
         "Counter": "0,1,2,3",
         "CounterHTOff": "0,1,2,3",
         "EventCode": "0xB7, 0xBB",
-        "EventName": "OCR.PF_L3_RFO.PMM_HIT_LOCAL_PMM.SNOOP_NOT_NEEDED",
+        "EventName": "OCR.ALL_READS.L3_HIT_F.NO_SNOOP_NEEDED",
         "MSRIndex": "0x1a6,0x1a7",
-        "MSRValue": "0x0100400100",
+        "MSRValue": "0x01002007F7",
         "Offcore": "1",
         "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
         "SampleAfterValue": "100003",
         "UMask": "0x1"
     },
     {
-        "BriefDescription": "OCR.ALL_DATA_RD.L3_HIT_M.SNOOP_NONE",
+        "BriefDescription": "OCR.ALL_READS.L3_HIT_F.SNOOP_MISS",
         "Counter": "0,1,2,3",
         "CounterHTOff": "0,1,2,3",
         "EventCode": "0xB7, 0xBB",
-        "EventName": "OCR.ALL_DATA_RD.L3_HIT_M.SNOOP_NONE",
+        "EventName": "OCR.ALL_READS.L3_HIT_F.SNOOP_MISS",
         "MSRIndex": "0x1a6,0x1a7",
-        "MSRValue": "0x0080040491",
+        "MSRValue": "0x02002007F7",
         "Offcore": "1",
         "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
         "SampleAfterValue": "100003",
         "UMask": "0x1"
     },
     {
-        "BriefDescription": "OCR.ALL_RFO.PMM_HIT_LOCAL_PMM.ANY_SNOOP OCR.ALL_RFO.PMM_HIT_LOCAL_PMM.ANY_SNOOP",
+        "BriefDescription": "OCR.ALL_READS.L3_HIT_F.SNOOP_NONE",
         "Counter": "0,1,2,3",
         "CounterHTOff": "0,1,2,3",
         "EventCode": "0xB7, 0xBB",
-        "EventName": "OCR.ALL_RFO.PMM_HIT_LOCAL_PMM.ANY_SNOOP",
+        "EventName": "OCR.ALL_READS.L3_HIT_F.SNOOP_NONE",
         "MSRIndex": "0x1a6,0x1a7",
-        "MSRValue": "0x3F80400122",
+        "MSRValue": "0x00802007F7",
         "Offcore": "1",
         "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
         "SampleAfterValue": "100003",
         "UMask": "0x1"
     },
     {
-        "BriefDescription": "OCR.ALL_RFO.L3_HIT_M.HIT_OTHER_CORE_FWD  OCR.ALL_RFO.L3_HIT_M.HIT_OTHER_CORE_FWD",
+        "BriefDescription": "OCR.ALL_READS.L3_HIT_M.ANY_SNOOP  OCR.ALL_READS.L3_HIT_M.ANY_SNOOP",
         "Counter": "0,1,2,3",
         "CounterHTOff": "0,1,2,3",
         "EventCode": "0xB7, 0xBB",
-        "EventName": "OCR.ALL_RFO.L3_HIT_M.HIT_OTHER_CORE_FWD",
+        "EventName": "OCR.ALL_READS.L3_HIT_M.ANY_SNOOP",
         "MSRIndex": "0x1a6,0x1a7",
-        "MSRValue": "0x0800040122",
+        "MSRValue": "0x3F800407F7",
         "Offcore": "1",
         "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
         "SampleAfterValue": "100003",
         "UMask": "0x1"
     },
     {
-        "BriefDescription": "Counts all demand code reads  OCR.DEMAND_CODE_RD.L3_HIT_M.HIT_OTHER_CORE_NO_FWD",
+        "BriefDescription": "OCR.ALL_READS.L3_HIT_M.HITM_OTHER_CORE  OCR.ALL_READS.L3_HIT_M.HITM_OTHER_CORE",
         "Counter": "0,1,2,3",
         "CounterHTOff": "0,1,2,3",
         "EventCode": "0xB7, 0xBB",
-        "EventName": "OCR.DEMAND_CODE_RD.L3_HIT_M.HIT_OTHER_CORE_NO_FWD",
+        "EventName": "OCR.ALL_READS.L3_HIT_M.HITM_OTHER_CORE",
         "MSRIndex": "0x1a6,0x1a7",
-        "MSRValue": "0x0400040004",
+        "MSRValue": "0x10000407F7",
         "Offcore": "1",
         "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
         "SampleAfterValue": "100003",
         "UMask": "0x1"
     },
     {
-        "BriefDescription": "Counts prefetch (that bring data to L2) data reads",
+        "BriefDescription": "OCR.ALL_READS.L3_HIT_M.HIT_OTHER_CORE_FWD  OCR.ALL_READS.L3_HIT_M.HIT_OTHER_CORE_FWD",
         "Counter": "0,1,2,3",
         "CounterHTOff": "0,1,2,3",
         "EventCode": "0xB7, 0xBB",
-        "EventName": "OCR.PF_L2_DATA_RD.L3_HIT_F.SNOOP_NONE",
+        "EventName": "OCR.ALL_READS.L3_HIT_M.HIT_OTHER_CORE_FWD",
         "MSRIndex": "0x1a6,0x1a7",
-        "MSRValue": "0x0080200010",
+        "MSRValue": "0x08000407F7",
         "Offcore": "1",
         "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
         "SampleAfterValue": "100003",
         "UMask": "0x1"
     },
     {
-        "BriefDescription": "Counts prefetch (that bring data to L2) data reads  OCR.PF_L2_DATA_RD.L3_HIT_M.HIT_OTHER_CORE_NO_FWD",
+        "BriefDescription": "OCR.ALL_READS.L3_HIT_M.HIT_OTHER_CORE_NO_FWD  OCR.ALL_READS.L3_HIT_M.HIT_OTHER_CORE_NO_FWD",
         "Counter": "0,1,2,3",
         "CounterHTOff": "0,1,2,3",
         "EventCode": "0xB7, 0xBB",
-        "EventName": "OCR.PF_L2_DATA_RD.L3_HIT_M.HIT_OTHER_CORE_NO_FWD",
+        "EventName": "OCR.ALL_READS.L3_HIT_M.HIT_OTHER_CORE_NO_FWD",
         "MSRIndex": "0x1a6,0x1a7",
-        "MSRValue": "0x0400040010",
+        "MSRValue": "0x04000407F7",
         "Offcore": "1",
         "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
         "SampleAfterValue": "100003",
         "UMask": "0x1"
     },
     {
-        "BriefDescription": "Counts demand data reads OCR.DEMAND_DATA_RD.L3_HIT.HIT_OTHER_CORE_NO_FWD OCR.DEMAND_DATA_RD.L3_HIT.HIT_OTHER_CORE_NO_FWD",
+        "BriefDescription": "OCR.ALL_READS.L3_HIT_M.NO_SNOOP_NEEDED  OCR.ALL_READS.L3_HIT_M.NO_SNOOP_NEEDED",
         "Counter": "0,1,2,3",
         "CounterHTOff": "0,1,2,3",
         "EventCode": "0xB7, 0xBB",
-        "EventName": "OCR.DEMAND_DATA_RD.L3_HIT.HIT_OTHER_CORE_NO_FWD",
+        "EventName": "OCR.ALL_READS.L3_HIT_M.NO_SNOOP_NEEDED",
         "MSRIndex": "0x1a6,0x1a7",
-        "MSRValue": "0x04003C0001",
+        "MSRValue": "0x01000407F7",
         "Offcore": "1",
         "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
         "SampleAfterValue": "100003",
         "UMask": "0x1"
     },
     {
-        "BriefDescription": "OCR.ALL_PF_RFO.L3_HIT.HIT_OTHER_CORE_FWD OCR.ALL_PF_RFO.L3_HIT.HIT_OTHER_CORE_FWD OCR.ALL_PF_RFO.L3_HIT.HIT_OTHER_CORE_FWD",
+        "BriefDescription": "OCR.ALL_READS.L3_HIT_M.SNOOP_MISS",
         "Counter": "0,1,2,3",
         "CounterHTOff": "0,1,2,3",
         "EventCode": "0xB7, 0xBB",
-        "EventName": "OCR.ALL_PF_RFO.L3_HIT.HIT_OTHER_CORE_FWD",
+        "EventName": "OCR.ALL_READS.L3_HIT_M.SNOOP_MISS",
         "MSRIndex": "0x1a6,0x1a7",
-        "MSRValue": "0x08003C0120",
+        "MSRValue": "0x02000407F7",
         "Offcore": "1",
         "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
         "SampleAfterValue": "100003",
         "UMask": "0x1"
     },
     {
-        "BriefDescription": "Counts all prefetch (that bring data to LLC only) RFOs  OCR.PF_L3_RFO.L3_HIT_F.HIT_OTHER_CORE_FWD",
+        "BriefDescription": "OCR.ALL_READS.L3_HIT_M.SNOOP_NONE",
         "Counter": "0,1,2,3",
         "CounterHTOff": "0,1,2,3",
         "EventCode": "0xB7, 0xBB",
-        "EventName": "OCR.PF_L3_RFO.L3_HIT_F.HIT_OTHER_CORE_FWD",
+        "EventName": "OCR.ALL_READS.L3_HIT_M.SNOOP_NONE",
         "MSRIndex": "0x1a6,0x1a7",
-        "MSRValue": "0x0800200100",
+        "MSRValue": "0x00800407F7",
         "Offcore": "1",
         "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
         "SampleAfterValue": "100003",
         "UMask": "0x1"
     },
     {
-        "BriefDescription": "Counts all prefetch (that bring data to LLC only) data reads  OCR.PF_L3_DATA_RD.L3_HIT_S.HIT_OTHER_CORE_NO_FWD",
+        "BriefDescription": "OCR.ALL_READS.L3_HIT_S.ANY_SNOOP  OCR.ALL_READS.L3_HIT_S.ANY_SNOOP",
         "Counter": "0,1,2,3",
         "CounterHTOff": "0,1,2,3",
         "EventCode": "0xB7, 0xBB",
-        "EventName": "OCR.PF_L3_DATA_RD.L3_HIT_S.HIT_OTHER_CORE_NO_FWD",
+        "EventName": "OCR.ALL_READS.L3_HIT_S.ANY_SNOOP",
         "MSRIndex": "0x1a6,0x1a7",
-        "MSRValue": "0x0400100080",
+        "MSRValue": "0x3F801007F7",
         "Offcore": "1",
         "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
         "SampleAfterValue": "100003",
         "UMask": "0x1"
     },
     {
-        "BriefDescription": "Counts demand data reads  OCR.DEMAND_DATA_RD.L3_HIT_E.ANY_SNOOP",
+        "BriefDescription": "OCR.ALL_READS.L3_HIT_S.HITM_OTHER_CORE  OCR.ALL_READS.L3_HIT_S.HITM_OTHER_CORE",
         "Counter": "0,1,2,3",
         "CounterHTOff": "0,1,2,3",
         "EventCode": "0xB7, 0xBB",
-        "EventName": "OCR.DEMAND_DATA_RD.L3_HIT_E.ANY_SNOOP",
+        "EventName": "OCR.ALL_READS.L3_HIT_S.HITM_OTHER_CORE",
         "MSRIndex": "0x1a6,0x1a7",
-        "MSRValue": "0x3F80080001",
+        "MSRValue": "0x10001007F7",
         "Offcore": "1",
         "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
         "SampleAfterValue": "100003",
         "UMask": "0x1"
     },
     {
-        "BriefDescription": "Counts L1 data cache hardware prefetch requests and software prefetch requests  OCR.PF_L1D_AND_SW.L3_HIT_F.NO_SNOOP_NEEDED",
+        "BriefDescription": "OCR.ALL_READS.L3_HIT_S.HIT_OTHER_CORE_FWD  OCR.ALL_READS.L3_HIT_S.HIT_OTHER_CORE_FWD",
         "Counter": "0,1,2,3",
         "CounterHTOff": "0,1,2,3",
         "EventCode": "0xB7, 0xBB",
-        "EventName": "OCR.PF_L1D_AND_SW.L3_HIT_F.NO_SNOOP_NEEDED",
+        "EventName": "OCR.ALL_READS.L3_HIT_S.HIT_OTHER_CORE_FWD",
         "MSRIndex": "0x1a6,0x1a7",
-        "MSRValue": "0x0100200400",
+        "MSRValue": "0x08001007F7",
         "Offcore": "1",
         "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
         "SampleAfterValue": "100003",
         "UMask": "0x1"
     },
     {
-        "BriefDescription": "Counts prefetch (that bring data to L2) data reads  OCR.PF_L2_DATA_RD.L3_HIT_S.ANY_SNOOP",
+        "BriefDescription": "OCR.ALL_READS.L3_HIT_S.HIT_OTHER_CORE_NO_FWD  OCR.ALL_READS.L3_HIT_S.HIT_OTHER_CORE_NO_FWD",
         "Counter": "0,1,2,3",
         "CounterHTOff": "0,1,2,3",
         "EventCode": "0xB7, 0xBB",
-        "EventName": "OCR.PF_L2_DATA_RD.L3_HIT_S.ANY_SNOOP",
+        "EventName": "OCR.ALL_READS.L3_HIT_S.HIT_OTHER_CORE_NO_FWD",
         "MSRIndex": "0x1a6,0x1a7",
-        "MSRValue": "0x3F80100010",
+        "MSRValue": "0x04001007F7",
         "Offcore": "1",
         "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
         "SampleAfterValue": "100003",
         "UMask": "0x1"
     },
     {
-        "BriefDescription": "Counts demand data reads",
+        "BriefDescription": "OCR.ALL_READS.L3_HIT_S.NO_SNOOP_NEEDED  OCR.ALL_READS.L3_HIT_S.NO_SNOOP_NEEDED",
         "Counter": "0,1,2,3",
         "CounterHTOff": "0,1,2,3",
         "EventCode": "0xB7, 0xBB",
-        "EventName": "OCR.DEMAND_DATA_RD.L3_HIT_F.SNOOP_NONE",
+        "EventName": "OCR.ALL_READS.L3_HIT_S.NO_SNOOP_NEEDED",
         "MSRIndex": "0x1a6,0x1a7",
-        "MSRValue": "0x0080200001",
+        "MSRValue": "0x01001007F7",
         "Offcore": "1",
         "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
         "SampleAfterValue": "100003",
         "UMask": "0x1"
     },
     {
-        "BriefDescription": "Counts all demand code reads",
+        "BriefDescription": "OCR.ALL_READS.L3_HIT_S.SNOOP_MISS",
         "Counter": "0,1,2,3",
         "CounterHTOff": "0,1,2,3",
         "EventCode": "0xB7, 0xBB",
-        "EventName": "OCR.DEMAND_CODE_RD.L3_HIT_E.SNOOP_NONE",
+        "EventName": "OCR.ALL_READS.L3_HIT_S.SNOOP_MISS",
         "MSRIndex": "0x1a6,0x1a7",
-        "MSRValue": "0x0080080004",
+        "MSRValue": "0x02001007F7",
         "Offcore": "1",
         "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
         "SampleAfterValue": "100003",
         "UMask": "0x1"
     },
     {
-        "BriefDescription": "Counts prefetch (that bring data to L2) data reads",
+        "BriefDescription": "OCR.ALL_READS.L3_HIT_S.SNOOP_NONE",
         "Counter": "0,1,2,3",
         "CounterHTOff": "0,1,2,3",
         "EventCode": "0xB7, 0xBB",
-        "EventName": "OCR.PF_L2_DATA_RD.SUPPLIER_NONE.SNOOP_MISS",
+        "EventName": "OCR.ALL_READS.L3_HIT_S.SNOOP_NONE",
         "MSRIndex": "0x1a6,0x1a7",
-        "MSRValue": "0x0200020010",
+        "MSRValue": "0x00801007F7",
         "Offcore": "1",
         "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
         "SampleAfterValue": "100003",
         "UMask": "0x1"
     },
     {
-        "BriefDescription": "OCR.ALL_PF_DATA_RD.L3_HIT_F.SNOOP_MISS",
+        "BriefDescription": "OCR.ALL_READS.PMM_HIT_LOCAL_PMM.ANY_SNOOP OCR.ALL_READS.PMM_HIT_LOCAL_PMM.ANY_SNOOP",
         "Counter": "0,1,2,3",
         "CounterHTOff": "0,1,2,3",
         "EventCode": "0xB7, 0xBB",
-        "EventName": "OCR.ALL_PF_DATA_RD.L3_HIT_F.SNOOP_MISS",
+        "EventName": "OCR.ALL_READS.PMM_HIT_LOCAL_PMM.ANY_SNOOP",
         "MSRIndex": "0x1a6,0x1a7",
-        "MSRValue": "0x0200200490",
+        "MSRValue": "0x3F804007F7",
         "Offcore": "1",
         "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
         "SampleAfterValue": "100003",
         "UMask": "0x1"
     },
     {
-        "BriefDescription": "OCR.ALL_PF_DATA_RD.L3_HIT_M.ANY_SNOOP  OCR.ALL_PF_DATA_RD.L3_HIT_M.ANY_SNOOP",
+        "BriefDescription": "OCR.ALL_READS.PMM_HIT_LOCAL_PMM.SNOOP_NONE OCR.ALL_READS.PMM_HIT_LOCAL_PMM.SNOOP_NONE",
         "Counter": "0,1,2,3",
         "CounterHTOff": "0,1,2,3",
         "EventCode": "0xB7, 0xBB",
-        "EventName": "OCR.ALL_PF_DATA_RD.L3_HIT_M.ANY_SNOOP",
+        "EventName": "OCR.ALL_READS.PMM_HIT_LOCAL_PMM.SNOOP_NONE",
         "MSRIndex": "0x1a6,0x1a7",
-        "MSRValue": "0x3F80040490",
+        "MSRValue": "0x00804007F7",
         "Offcore": "1",
         "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
         "SampleAfterValue": "100003",
         "UMask": "0x1"
     },
     {
-        "BriefDescription": "Counts all demand code reads  OCR.DEMAND_CODE_RD.SUPPLIER_NONE.HITM_OTHER_CORE",
+        "BriefDescription": "OCR.ALL_READS.PMM_HIT_LOCAL_PMM.SNOOP_NOT_NEEDED OCR.ALL_READS.PMM_HIT_LOCAL_PMM.SNOOP_NOT_NEEDED",
         "Counter": "0,1,2,3",
         "CounterHTOff": "0,1,2,3",
         "EventCode": "0xB7, 0xBB",
-        "EventName": "OCR.DEMAND_CODE_RD.SUPPLIER_NONE.HITM_OTHER_CORE",
+        "EventName": "OCR.ALL_READS.PMM_HIT_LOCAL_PMM.SNOOP_NOT_NEEDED",
         "MSRIndex": "0x1a6,0x1a7",
-        "MSRValue": "0x1000020004",
+        "MSRValue": "0x01004007F7",
         "Offcore": "1",
         "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
         "SampleAfterValue": "100003",
         "UMask": "0x1"
     },
     {
-        "BriefDescription": "Counts all demand code reads OCR.DEMAND_CODE_RD.L3_HIT.HIT_OTHER_CORE_FWD OCR.DEMAND_CODE_RD.L3_HIT.HIT_OTHER_CORE_FWD",
+        "BriefDescription": "OCR.ALL_READS.SUPPLIER_NONE.ANY_SNOOP  OCR.ALL_READS.SUPPLIER_NONE.ANY_SNOOP",
         "Counter": "0,1,2,3",
         "CounterHTOff": "0,1,2,3",
         "EventCode": "0xB7, 0xBB",
-        "EventName": "OCR.DEMAND_CODE_RD.L3_HIT.HIT_OTHER_CORE_FWD",
+        "EventName": "OCR.ALL_READS.SUPPLIER_NONE.ANY_SNOOP",
         "MSRIndex": "0x1a6,0x1a7",
-        "MSRValue": "0x08003C0004",
+        "MSRValue": "0x3F800207F7",
         "Offcore": "1",
         "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
         "SampleAfterValue": "100003",
         "UMask": "0x1"
     },
     {
-        "BriefDescription": "Counts all demand code reads  OCR.DEMAND_CODE_RD.SUPPLIER_NONE.HIT_OTHER_CORE_NO_FWD",
+        "BriefDescription": "OCR.ALL_READS.SUPPLIER_NONE.HITM_OTHER_CORE  OCR.ALL_READS.SUPPLIER_NONE.HITM_OTHER_CORE",
         "Counter": "0,1,2,3",
         "CounterHTOff": "0,1,2,3",
         "EventCode": "0xB7, 0xBB",
-        "EventName": "OCR.DEMAND_CODE_RD.SUPPLIER_NONE.HIT_OTHER_CORE_NO_FWD",
+        "EventName": "OCR.ALL_READS.SUPPLIER_NONE.HITM_OTHER_CORE",
         "MSRIndex": "0x1a6,0x1a7",
-        "MSRValue": "0x0400020004",
+        "MSRValue": "0x10000207F7",
         "Offcore": "1",
         "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
         "SampleAfterValue": "100003",
         "UMask": "0x1"
     },
     {
-        "BriefDescription": "Counts all prefetch (that bring data to L2) RFOs  OCR.PF_L2_RFO.L3_HIT_S.HITM_OTHER_CORE",
+        "BriefDescription": "OCR.ALL_READS.SUPPLIER_NONE.HIT_OTHER_CORE_FWD  OCR.ALL_READS.SUPPLIER_NONE.HIT_OTHER_CORE_FWD",
         "Counter": "0,1,2,3",
         "CounterHTOff": "0,1,2,3",
         "EventCode": "0xB7, 0xBB",
-        "EventName": "OCR.PF_L2_RFO.L3_HIT_S.HITM_OTHER_CORE",
+        "EventName": "OCR.ALL_READS.SUPPLIER_NONE.HIT_OTHER_CORE_FWD",
         "MSRIndex": "0x1a6,0x1a7",
-        "MSRValue": "0x1000100020",
+        "MSRValue": "0x08000207F7",
         "Offcore": "1",
         "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
         "SampleAfterValue": "100003",
         "UMask": "0x1"
     },
     {
-        "BriefDescription": "Counts all demand data writes (RFOs)",
+        "BriefDescription": "OCR.ALL_READS.SUPPLIER_NONE.HIT_OTHER_CORE_NO_FWD  OCR.ALL_READS.SUPPLIER_NONE.HIT_OTHER_CORE_NO_FWD",
         "Counter": "0,1,2,3",
         "CounterHTOff": "0,1,2,3",
         "EventCode": "0xB7, 0xBB",
-        "EventName": "OCR.DEMAND_RFO.L3_HIT_M.SNOOP_MISS",
+        "EventName": "OCR.ALL_READS.SUPPLIER_NONE.HIT_OTHER_CORE_NO_FWD",
         "MSRIndex": "0x1a6,0x1a7",
-        "MSRValue": "0x0200040002",
+        "MSRValue": "0x04000207F7",
         "Offcore": "1",
         "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
         "SampleAfterValue": "100003",
         "UMask": "0x1"
     },
     {
-        "BriefDescription": "OCR.ALL_RFO.L3_HIT_F.HIT_OTHER_CORE_FWD  OCR.ALL_RFO.L3_HIT_F.HIT_OTHER_CORE_FWD",
+        "BriefDescription": "OCR.ALL_READS.SUPPLIER_NONE.NO_SNOOP_NEEDED  OCR.ALL_READS.SUPPLIER_NONE.NO_SNOOP_NEEDED",
         "Counter": "0,1,2,3",
         "CounterHTOff": "0,1,2,3",
         "EventCode": "0xB7, 0xBB",
-        "EventName": "OCR.ALL_RFO.L3_HIT_F.HIT_OTHER_CORE_FWD",
+        "EventName": "OCR.ALL_READS.SUPPLIER_NONE.NO_SNOOP_NEEDED",
         "MSRIndex": "0x1a6,0x1a7",
-        "MSRValue": "0x0800200122",
+        "MSRValue": "0x01000207F7",
         "Offcore": "1",
         "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
         "SampleAfterValue": "100003",
         "UMask": "0x1"
     },
     {
-        "BriefDescription": "OCR.ALL_RFO.L3_HIT_E.HIT_OTHER_CORE_NO_FWD  OCR.ALL_RFO.L3_HIT_E.HIT_OTHER_CORE_NO_FWD",
+        "BriefDescription": "OCR.ALL_READS.SUPPLIER_NONE.SNOOP_MISS",
         "Counter": "0,1,2,3",
         "CounterHTOff": "0,1,2,3",
         "EventCode": "0xB7, 0xBB",
-        "EventName": "OCR.ALL_RFO.L3_HIT_E.HIT_OTHER_CORE_NO_FWD",
+        "EventName": "OCR.ALL_READS.SUPPLIER_NONE.SNOOP_MISS",
         "MSRIndex": "0x1a6,0x1a7",
-        "MSRValue": "0x0400080122",
+        "MSRValue": "0x02000207F7",
         "Offcore": "1",
         "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
         "SampleAfterValue": "100003",
         "UMask": "0x1"
     },
     {
-        "BriefDescription": "Counts all demand data writes (RFOs)  OCR.DEMAND_RFO.SUPPLIER_NONE.HITM_OTHER_CORE",
+        "BriefDescription": "OCR.ALL_READS.SUPPLIER_NONE.SNOOP_NONE",
         "Counter": "0,1,2,3",
         "CounterHTOff": "0,1,2,3",
         "EventCode": "0xB7, 0xBB",
-        "EventName": "OCR.DEMAND_RFO.SUPPLIER_NONE.HITM_OTHER_CORE",
+        "EventName": "OCR.ALL_READS.SUPPLIER_NONE.SNOOP_NONE",
         "MSRIndex": "0x1a6,0x1a7",
-        "MSRValue": "0x1000020002",
+        "MSRValue": "0x00800207F7",
         "Offcore": "1",
         "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
         "SampleAfterValue": "100003",
         "UMask": "0x1"
     },
     {
-        "BriefDescription": "Core cycles where the core was running in a manner where Turbo may be clipped to the Non-AVX turbo schedule.",
-        "Counter": "0,1,2,3",
-        "CounterHTOff": "0,1,2,3,4,5,6,7",
-        "EventCode": "0x28",
-        "EventName": "CORE_POWER.LVL0_TURBO_LICENSE",
-        "PublicDescription": "Core cycles where the core was running with power-delivery for baseline license level 0.  This includes non-AVX codes, SSE, AVX 128-bit, and low-current AVX 256-bit codes.",
-        "SampleAfterValue": "200003",
-        "UMask": "0x7"
-    },
-    {
-        "BriefDescription": "Counts any other requests",
+        "BriefDescription": "OCR.ALL_RFO.ANY_RESPONSE have any response type.",
         "Counter": "0,1,2,3",
         "CounterHTOff": "0,1,2,3",
         "EventCode": "0xB7, 0xBB",
-        "EventName": "OCR.OTHER.SUPPLIER_NONE.SNOOP_NONE",
+        "EventName": "OCR.ALL_RFO.ANY_RESPONSE",
         "MSRIndex": "0x1a6,0x1a7",
-        "MSRValue": "0x0080028000",
+        "MSRValue": "0x0000010122",
         "Offcore": "1",
         "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
         "SampleAfterValue": "100003",
         "UMask": "0x1"
     },
     {
-        "BriefDescription": "OCR.ALL_PF_DATA_RD.PMM_HIT_LOCAL_PMM.SNOOP_NONE OCR.ALL_PF_DATA_RD.PMM_HIT_LOCAL_PMM.SNOOP_NONE",
+        "BriefDescription": "OCR.ALL_RFO.L3_HIT.ANY_SNOOP OCR.ALL_RFO.L3_HIT.ANY_SNOOP OCR.ALL_RFO.L3_HIT.ANY_SNOOP",
         "Counter": "0,1,2,3",
         "CounterHTOff": "0,1,2,3",
         "EventCode": "0xB7, 0xBB",
-        "EventName": "OCR.ALL_PF_DATA_RD.PMM_HIT_LOCAL_PMM.SNOOP_NONE",
+        "EventName": "OCR.ALL_RFO.L3_HIT.ANY_SNOOP",
         "MSRIndex": "0x1a6,0x1a7",
-        "MSRValue": "0x0080400490",
+        "MSRValue": "0x3F803C0122",
         "Offcore": "1",
         "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
         "SampleAfterValue": "100003",
         "UMask": "0x1"
     },
     {
-        "BriefDescription": "Counts all demand data writes (RFOs)  OCR.DEMAND_RFO.L3_HIT_M.HIT_OTHER_CORE_FWD",
+        "BriefDescription": "OCR.ALL_RFO.L3_HIT.HITM_OTHER_CORE OCR.ALL_RFO.L3_HIT.HITM_OTHER_CORE OCR.ALL_RFO.L3_HIT.HITM_OTHER_CORE",
         "Counter": "0,1,2,3",
         "CounterHTOff": "0,1,2,3",
         "EventCode": "0xB7, 0xBB",
-        "EventName": "OCR.DEMAND_RFO.L3_HIT_M.HIT_OTHER_CORE_FWD",
+        "EventName": "OCR.ALL_RFO.L3_HIT.HITM_OTHER_CORE",
         "MSRIndex": "0x1a6,0x1a7",
-        "MSRValue": "0x0800040002",
+        "MSRValue": "0x10003C0122",
         "Offcore": "1",
         "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
         "SampleAfterValue": "100003",
         "UMask": "0x1"
     },
     {
-        "BriefDescription": "Counts all demand data writes (RFOs)  OCR.DEMAND_RFO.L3_HIT_F.NO_SNOOP_NEEDED",
+        "BriefDescription": "OCR.ALL_RFO.L3_HIT.HIT_OTHER_CORE_FWD OCR.ALL_RFO.L3_HIT.HIT_OTHER_CORE_FWD OCR.ALL_RFO.L3_HIT.HIT_OTHER_CORE_FWD",
         "Counter": "0,1,2,3",
         "CounterHTOff": "0,1,2,3",
         "EventCode": "0xB7, 0xBB",
-        "EventName": "OCR.DEMAND_RFO.L3_HIT_F.NO_SNOOP_NEEDED",
+        "EventName": "OCR.ALL_RFO.L3_HIT.HIT_OTHER_CORE_FWD",
         "MSRIndex": "0x1a6,0x1a7",
-        "MSRValue": "0x0100200002",
+        "MSRValue": "0x08003C0122",
         "Offcore": "1",
         "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
         "SampleAfterValue": "100003",
         "UMask": "0x1"
     },
     {
-        "BriefDescription": "OCR.ALL_RFO.SUPPLIER_NONE.SNOOP_NONE",
+        "BriefDescription": "OCR.ALL_RFO.L3_HIT.HIT_OTHER_CORE_NO_FWD OCR.ALL_RFO.L3_HIT.HIT_OTHER_CORE_NO_FWD OCR.ALL_RFO.L3_HIT.HIT_OTHER_CORE_NO_FWD",
         "Counter": "0,1,2,3",
         "CounterHTOff": "0,1,2,3",
         "EventCode": "0xB7, 0xBB",
-        "EventName": "OCR.ALL_RFO.SUPPLIER_NONE.SNOOP_NONE",
+        "EventName": "OCR.ALL_RFO.L3_HIT.HIT_OTHER_CORE_NO_FWD",
         "MSRIndex": "0x1a6,0x1a7",
-        "MSRValue": "0x0080020122",
+        "MSRValue": "0x04003C0122",
         "Offcore": "1",
         "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
         "SampleAfterValue": "100003",
         "UMask": "0x1"
     },
     {
-        "BriefDescription": "Counts all prefetch (that bring data to L2) RFOs OCR.PF_L2_RFO.PMM_HIT_LOCAL_PMM.SNOOP_NONE",
+        "BriefDescription": "OCR.ALL_RFO.L3_HIT.NO_SNOOP_NEEDED OCR.ALL_RFO.L3_HIT.NO_SNOOP_NEEDED OCR.ALL_RFO.L3_HIT.NO_SNOOP_NEEDED",
         "Counter": "0,1,2,3",
         "CounterHTOff": "0,1,2,3",
         "EventCode": "0xB7, 0xBB",
-        "EventName": "OCR.PF_L2_RFO.PMM_HIT_LOCAL_PMM.SNOOP_NONE",
+        "EventName": "OCR.ALL_RFO.L3_HIT.NO_SNOOP_NEEDED",
         "MSRIndex": "0x1a6,0x1a7",
-        "MSRValue": "0x0080400020",
+        "MSRValue": "0x01003C0122",
         "Offcore": "1",
         "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
         "SampleAfterValue": "100003",
         "UMask": "0x1"
     },
     {
-        "BriefDescription": "Counts all demand code reads  OCR.DEMAND_CODE_RD.L3_HIT_E.HITM_OTHER_CORE",
+        "BriefDescription": "OCR.ALL_RFO.L3_HIT.SNOOP_HIT_WITH_FWD",
         "Counter": "0,1,2,3",
         "CounterHTOff": "0,1,2,3",
         "EventCode": "0xB7, 0xBB",
-        "EventName": "OCR.DEMAND_CODE_RD.L3_HIT_E.HITM_OTHER_CORE",
+        "EventName": "OCR.ALL_RFO.L3_HIT.SNOOP_HIT_WITH_FWD",
         "MSRIndex": "0x1a6,0x1a7",
-        "MSRValue": "0x1000080004",
+        "MSRValue": "0x08007C0122",
         "Offcore": "1",
         "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
         "SampleAfterValue": "100003",
         "UMask": "0x1"
     },
     {
-        "BriefDescription": "OCR.ALL_DATA_RD.L3_HIT_S.ANY_SNOOP  OCR.ALL_DATA_RD.L3_HIT_S.ANY_SNOOP",
+        "BriefDescription": "OCR.ALL_RFO.L3_HIT.SNOOP_MISS OCR.ALL_RFO.L3_HIT.SNOOP_MISS",
         "Counter": "0,1,2,3",
         "CounterHTOff": "0,1,2,3",
         "EventCode": "0xB7, 0xBB",
-        "EventName": "OCR.ALL_DATA_RD.L3_HIT_S.ANY_SNOOP",
+        "EventName": "OCR.ALL_RFO.L3_HIT.SNOOP_MISS",
         "MSRIndex": "0x1a6,0x1a7",
-        "MSRValue": "0x3F80100491",
+        "MSRValue": "0x02003C0122",
         "Offcore": "1",
         "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
         "SampleAfterValue": "100003",
         "UMask": "0x1"
     },
     {
-        "BriefDescription": "OCR.ALL_READS.L3_HIT_F.HIT_OTHER_CORE_FWD  OCR.ALL_READS.L3_HIT_F.HIT_OTHER_CORE_FWD",
+        "BriefDescription": "OCR.ALL_RFO.L3_HIT.SNOOP_NONE OCR.ALL_RFO.L3_HIT.SNOOP_NONE",
         "Counter": "0,1,2,3",
         "CounterHTOff": "0,1,2,3",
         "EventCode": "0xB7, 0xBB",
-        "EventName": "OCR.ALL_READS.L3_HIT_F.HIT_OTHER_CORE_FWD",
+        "EventName": "OCR.ALL_RFO.L3_HIT.SNOOP_NONE",
         "MSRIndex": "0x1a6,0x1a7",
-        "MSRValue": "0x08002007F7",
+        "MSRValue": "0x00803C0122",
         "Offcore": "1",
         "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
         "SampleAfterValue": "100003",
         "UMask": "0x1"
     },
     {
-        "BriefDescription": "OCR.ALL_DATA_RD.L3_HIT_F.SNOOP_MISS",
+        "BriefDescription": "OCR.ALL_RFO.L3_HIT_E.ANY_SNOOP  OCR.ALL_RFO.L3_HIT_E.ANY_SNOOP",
         "Counter": "0,1,2,3",
         "CounterHTOff": "0,1,2,3",
         "EventCode": "0xB7, 0xBB",
-        "EventName": "OCR.ALL_DATA_RD.L3_HIT_F.SNOOP_MISS",
+        "EventName": "OCR.ALL_RFO.L3_HIT_E.ANY_SNOOP",
         "MSRIndex": "0x1a6,0x1a7",
-        "MSRValue": "0x0200200491",
+        "MSRValue": "0x3F80080122",
         "Offcore": "1",
         "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
         "SampleAfterValue": "100003",
         "UMask": "0x1"
     },
     {
-        "BriefDescription": "OCR.ALL_PF_RFO.SUPPLIER_NONE.SNOOP_NONE",
+        "BriefDescription": "OCR.ALL_RFO.L3_HIT_E.HITM_OTHER_CORE  OCR.ALL_RFO.L3_HIT_E.HITM_OTHER_CORE",
         "Counter": "0,1,2,3",
         "CounterHTOff": "0,1,2,3",
         "EventCode": "0xB7, 0xBB",
-        "EventName": "OCR.ALL_PF_RFO.SUPPLIER_NONE.SNOOP_NONE",
+        "EventName": "OCR.ALL_RFO.L3_HIT_E.HITM_OTHER_CORE",
         "MSRIndex": "0x1a6,0x1a7",
-        "MSRValue": "0x0080020120",
+        "MSRValue": "0x1000080122",
         "Offcore": "1",
         "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
         "SampleAfterValue": "100003",
         "UMask": "0x1"
     },
     {
-        "BriefDescription": "OCR.ALL_PF_RFO.ANY_RESPONSE have any response type.",
+        "BriefDescription": "OCR.ALL_RFO.L3_HIT_E.HIT_OTHER_CORE_FWD  OCR.ALL_RFO.L3_HIT_E.HIT_OTHER_CORE_FWD",
         "Counter": "0,1,2,3",
         "CounterHTOff": "0,1,2,3",
         "EventCode": "0xB7, 0xBB",
-        "EventName": "OCR.ALL_PF_RFO.ANY_RESPONSE",
+        "EventName": "OCR.ALL_RFO.L3_HIT_E.HIT_OTHER_CORE_FWD",
         "MSRIndex": "0x1a6,0x1a7",
-        "MSRValue": "0x0000010120",
+        "MSRValue": "0x0800080122",
         "Offcore": "1",
         "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
         "SampleAfterValue": "100003",
         "UMask": "0x1"
     },
     {
-        "BriefDescription": "OCR.ALL_RFO.L3_HIT.HIT_OTHER_CORE_NO_FWD OCR.ALL_RFO.L3_HIT.HIT_OTHER_CORE_NO_FWD OCR.ALL_RFO.L3_HIT.HIT_OTHER_CORE_NO_FWD",
+        "BriefDescription": "OCR.ALL_RFO.L3_HIT_E.HIT_OTHER_CORE_NO_FWD  OCR.ALL_RFO.L3_HIT_E.HIT_OTHER_CORE_NO_FWD",
         "Counter": "0,1,2,3",
         "CounterHTOff": "0,1,2,3",
         "EventCode": "0xB7, 0xBB",
-        "EventName": "OCR.ALL_RFO.L3_HIT.HIT_OTHER_CORE_NO_FWD",
+        "EventName": "OCR.ALL_RFO.L3_HIT_E.HIT_OTHER_CORE_NO_FWD",
         "MSRIndex": "0x1a6,0x1a7",
-        "MSRValue": "0x04003C0122",
+        "MSRValue": "0x0400080122",
         "Offcore": "1",
         "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
         "SampleAfterValue": "100003",
         "UMask": "0x1"
     },
     {
-        "BriefDescription": "Counts all prefetch (that bring data to LLC only) RFOs  OCR.PF_L3_RFO.L3_HIT_M.HITM_OTHER_CORE",
+        "BriefDescription": "OCR.ALL_RFO.L3_HIT_E.NO_SNOOP_NEEDED  OCR.ALL_RFO.L3_HIT_E.NO_SNOOP_NEEDED",
         "Counter": "0,1,2,3",
         "CounterHTOff": "0,1,2,3",
         "EventCode": "0xB7, 0xBB",
-        "EventName": "OCR.PF_L3_RFO.L3_HIT_M.HITM_OTHER_CORE",
+        "EventName": "OCR.ALL_RFO.L3_HIT_E.NO_SNOOP_NEEDED",
         "MSRIndex": "0x1a6,0x1a7",
-        "MSRValue": "0x1000040100",
+        "MSRValue": "0x0100080122",
         "Offcore": "1",
         "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
         "SampleAfterValue": "100003",
         "UMask": "0x1"
     },
     {
-        "BriefDescription": "Counts all demand code reads  OCR.DEMAND_CODE_RD.L3_HIT_F.ANY_SNOOP",
+        "BriefDescription": "OCR.ALL_RFO.L3_HIT_E.SNOOP_MISS",
         "Counter": "0,1,2,3",
         "CounterHTOff": "0,1,2,3",
         "EventCode": "0xB7, 0xBB",
-        "EventName": "OCR.DEMAND_CODE_RD.L3_HIT_F.ANY_SNOOP",
+        "EventName": "OCR.ALL_RFO.L3_HIT_E.SNOOP_MISS",
         "MSRIndex": "0x1a6,0x1a7",
-        "MSRValue": "0x3F80200004",
+        "MSRValue": "0x0200080122",
         "Offcore": "1",
         "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
         "SampleAfterValue": "100003",
         "UMask": "0x1"
     },
     {
-        "BriefDescription": "Counts prefetch (that bring data to L2) data reads  OCR.PF_L2_DATA_RD.L3_HIT_E.ANY_SNOOP",
+        "BriefDescription": "OCR.ALL_RFO.L3_HIT_E.SNOOP_NONE",
         "Counter": "0,1,2,3",
         "CounterHTOff": "0,1,2,3",
         "EventCode": "0xB7, 0xBB",
-        "EventName": "OCR.PF_L2_DATA_RD.L3_HIT_E.ANY_SNOOP",
+        "EventName": "OCR.ALL_RFO.L3_HIT_E.SNOOP_NONE",
         "MSRIndex": "0x1a6,0x1a7",
-        "MSRValue": "0x3F80080010",
+        "MSRValue": "0x0080080122",
         "Offcore": "1",
         "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
         "SampleAfterValue": "100003",
         "UMask": "0x1"
     },
     {
-        "BriefDescription": "OCR.ALL_READS.L3_HIT_M.SNOOP_MISS",
+        "BriefDescription": "OCR.ALL_RFO.L3_HIT_F.ANY_SNOOP  OCR.ALL_RFO.L3_HIT_F.ANY_SNOOP",
         "Counter": "0,1,2,3",
         "CounterHTOff": "0,1,2,3",
         "EventCode": "0xB7, 0xBB",
-        "EventName": "OCR.ALL_READS.L3_HIT_M.SNOOP_MISS",
+        "EventName": "OCR.ALL_RFO.L3_HIT_F.ANY_SNOOP",
         "MSRIndex": "0x1a6,0x1a7",
-        "MSRValue": "0x02000407F7",
+        "MSRValue": "0x3F80200122",
         "Offcore": "1",
         "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
         "SampleAfterValue": "100003",
         "UMask": "0x1"
     },
     {
-        "BriefDescription": "Counts all prefetch (that bring data to L2) RFOs OCR.PF_L2_RFO.L3_HIT.SNOOP_MISS",
+        "BriefDescription": "OCR.ALL_RFO.L3_HIT_F.HITM_OTHER_CORE  OCR.ALL_RFO.L3_HIT_F.HITM_OTHER_CORE",
         "Counter": "0,1,2,3",
         "CounterHTOff": "0,1,2,3",
         "EventCode": "0xB7, 0xBB",
-        "EventName": "OCR.PF_L2_RFO.L3_HIT.SNOOP_MISS",
+        "EventName": "OCR.ALL_RFO.L3_HIT_F.HITM_OTHER_CORE",
         "MSRIndex": "0x1a6,0x1a7",
-        "MSRValue": "0x02003C0020",
+        "MSRValue": "0x1000200122",
         "Offcore": "1",
         "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
         "SampleAfterValue": "100003",
         "UMask": "0x1"
     },
     {
-        "BriefDescription": "Counts all prefetch (that bring data to LLC only) data reads  OCR.PF_L3_DATA_RD.L3_HIT_M.NO_SNOOP_NEEDED",
+        "BriefDescription": "OCR.ALL_RFO.L3_HIT_F.HIT_OTHER_CORE_FWD  OCR.ALL_RFO.L3_HIT_F.HIT_OTHER_CORE_FWD",
         "Counter": "0,1,2,3",
         "CounterHTOff": "0,1,2,3",
         "EventCode": "0xB7, 0xBB",
-        "EventName": "OCR.PF_L3_DATA_RD.L3_HIT_M.NO_SNOOP_NEEDED",
+        "EventName": "OCR.ALL_RFO.L3_HIT_F.HIT_OTHER_CORE_FWD",
         "MSRIndex": "0x1a6,0x1a7",
-        "MSRValue": "0x0100040080",
+        "MSRValue": "0x0800200122",
         "Offcore": "1",
         "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
         "SampleAfterValue": "100003",
         "UMask": "0x1"
     },
     {
-        "BriefDescription": "Counts all demand data writes (RFOs) OCR.DEMAND_RFO.L3_HIT.SNOOP_MISS",
+        "BriefDescription": "OCR.ALL_RFO.L3_HIT_F.HIT_OTHER_CORE_NO_FWD  OCR.ALL_RFO.L3_HIT_F.HIT_OTHER_CORE_NO_FWD",
         "Counter": "0,1,2,3",
         "CounterHTOff": "0,1,2,3",
         "EventCode": "0xB7, 0xBB",
-        "EventName": "OCR.DEMAND_RFO.L3_HIT.SNOOP_MISS",
+        "EventName": "OCR.ALL_RFO.L3_HIT_F.HIT_OTHER_CORE_NO_FWD",
         "MSRIndex": "0x1a6,0x1a7",
-        "MSRValue": "0x02003C0002",
+        "MSRValue": "0x0400200122",
         "Offcore": "1",
         "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
         "SampleAfterValue": "100003",
         "UMask": "0x1"
     },
     {
-        "BriefDescription": "Core cycles the core was throttled due to a pending power level request.",
-        "Counter": "0,1,2,3",
-        "CounterHTOff": "0,1,2,3,4,5,6,7",
-        "EventCode": "0x28",
-        "EventName": "CORE_POWER.THROTTLE",
-        "PublicDescription": "Core cycles the out-of-order engine was throttled due to a pending power level request.",
-        "SampleAfterValue": "200003",
-        "UMask": "0x40"
-    },
-    {
-        "BriefDescription": "Counts all demand data writes (RFOs) OCR.DEMAND_RFO.L3_HIT.HITM_OTHER_CORE OCR.DEMAND_RFO.L3_HIT.HITM_OTHER_CORE",
+        "BriefDescription": "OCR.ALL_RFO.L3_HIT_F.NO_SNOOP_NEEDED  OCR.ALL_RFO.L3_HIT_F.NO_SNOOP_NEEDED",
         "Counter": "0,1,2,3",
         "CounterHTOff": "0,1,2,3",
         "EventCode": "0xB7, 0xBB",
-        "EventName": "OCR.DEMAND_RFO.L3_HIT.HITM_OTHER_CORE",
+        "EventName": "OCR.ALL_RFO.L3_HIT_F.NO_SNOOP_NEEDED",
         "MSRIndex": "0x1a6,0x1a7",
-        "MSRValue": "0x10003C0002",
+        "MSRValue": "0x0100200122",
         "Offcore": "1",
         "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
         "SampleAfterValue": "100003",
         "UMask": "0x1"
     },
     {
-        "BriefDescription": "Counts all prefetch (that bring data to L2) RFOs",
+        "BriefDescription": "OCR.ALL_RFO.L3_HIT_F.SNOOP_MISS",
         "Counter": "0,1,2,3",
         "CounterHTOff": "0,1,2,3",
         "EventCode": "0xB7, 0xBB",
-        "EventName": "OCR.PF_L2_RFO.L3_HIT_F.SNOOP_MISS",
+        "EventName": "OCR.ALL_RFO.L3_HIT_F.SNOOP_MISS",
         "MSRIndex": "0x1a6,0x1a7",
-        "MSRValue": "0x0200200020",
+        "MSRValue": "0x0200200122",
         "Offcore": "1",
         "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
         "SampleAfterValue": "100003",
         "UMask": "0x1"
     },
     {
-        "BriefDescription": "Counts all demand code reads  OCR.DEMAND_CODE_RD.L3_HIT_E.HIT_OTHER_CORE_FWD",
+        "BriefDescription": "OCR.ALL_RFO.L3_HIT_F.SNOOP_NONE",
         "Counter": "0,1,2,3",
         "CounterHTOff": "0,1,2,3",
         "EventCode": "0xB7, 0xBB",
-        "EventName": "OCR.DEMAND_CODE_RD.L3_HIT_E.HIT_OTHER_CORE_FWD",
+        "EventName": "OCR.ALL_RFO.L3_HIT_F.SNOOP_NONE",
         "MSRIndex": "0x1a6,0x1a7",
-        "MSRValue": "0x0800080004",
+        "MSRValue": "0x0080200122",
         "Offcore": "1",
         "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
         "SampleAfterValue": "100003",
         "UMask": "0x1"
     },
     {
-        "BriefDescription": "Counts all demand code reads",
+        "BriefDescription": "OCR.ALL_RFO.L3_HIT_M.ANY_SNOOP  OCR.ALL_RFO.L3_HIT_M.ANY_SNOOP",
         "Counter": "0,1,2,3",
         "CounterHTOff": "0,1,2,3",
         "EventCode": "0xB7, 0xBB",
-        "EventName": "OCR.DEMAND_CODE_RD.SUPPLIER_NONE.SNOOP_MISS",
+        "EventName": "OCR.ALL_RFO.L3_HIT_M.ANY_SNOOP",
         "MSRIndex": "0x1a6,0x1a7",
-        "MSRValue": "0x0200020004",
+        "MSRValue": "0x3F80040122",
         "Offcore": "1",
         "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
         "SampleAfterValue": "100003",
         "UMask": "0x1"
     },
     {
-        "BriefDescription": "Counts L1 data cache hardware prefetch requests and software prefetch requests  OCR.PF_L1D_AND_SW.L3_HIT_M.ANY_SNOOP",
+        "BriefDescription": "OCR.ALL_RFO.L3_HIT_M.HITM_OTHER_CORE  OCR.ALL_RFO.L3_HIT_M.HITM_OTHER_CORE",
         "Counter": "0,1,2,3",
         "CounterHTOff": "0,1,2,3",
         "EventCode": "0xB7, 0xBB",
-        "EventName": "OCR.PF_L1D_AND_SW.L3_HIT_M.ANY_SNOOP",
+        "EventName": "OCR.ALL_RFO.L3_HIT_M.HITM_OTHER_CORE",
         "MSRIndex": "0x1a6,0x1a7",
-        "MSRValue": "0x3F80040400",
+        "MSRValue": "0x1000040122",
         "Offcore": "1",
         "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
         "SampleAfterValue": "100003",
         "UMask": "0x1"
     },
     {
-        "BriefDescription": "OCR.ALL_PF_RFO.L3_HIT_M.SNOOP_NONE",
+        "BriefDescription": "OCR.ALL_RFO.L3_HIT_M.HIT_OTHER_CORE_FWD  OCR.ALL_RFO.L3_HIT_M.HIT_OTHER_CORE_FWD",
         "Counter": "0,1,2,3",
         "CounterHTOff": "0,1,2,3",
         "EventCode": "0xB7, 0xBB",
-        "EventName": "OCR.ALL_PF_RFO.L3_HIT_M.SNOOP_NONE",
+        "EventName": "OCR.ALL_RFO.L3_HIT_M.HIT_OTHER_CORE_FWD",
         "MSRIndex": "0x1a6,0x1a7",
-        "MSRValue": "0x0080040120",
+        "MSRValue": "0x0800040122",
         "Offcore": "1",
         "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
         "SampleAfterValue": "100003",
         "UMask": "0x1"
     },
     {
-        "BriefDescription": "Counts demand data reads  OCR.DEMAND_DATA_RD.L3_HIT_S.HITM_OTHER_CORE",
+        "BriefDescription": "OCR.ALL_RFO.L3_HIT_M.HIT_OTHER_CORE_NO_FWD  OCR.ALL_RFO.L3_HIT_M.HIT_OTHER_CORE_NO_FWD",
         "Counter": "0,1,2,3",
         "CounterHTOff": "0,1,2,3",
         "EventCode": "0xB7, 0xBB",
-        "EventName": "OCR.DEMAND_DATA_RD.L3_HIT_S.HITM_OTHER_CORE",
+        "EventName": "OCR.ALL_RFO.L3_HIT_M.HIT_OTHER_CORE_NO_FWD",
         "MSRIndex": "0x1a6,0x1a7",
-        "MSRValue": "0x1000100001",
+        "MSRValue": "0x0400040122",
         "Offcore": "1",
         "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
         "SampleAfterValue": "100003",
         "UMask": "0x1"
     },
     {
-        "BriefDescription": "Counts all demand data writes (RFOs)  OCR.DEMAND_RFO.L3_HIT_M.ANY_SNOOP",
+        "BriefDescription": "OCR.ALL_RFO.L3_HIT_M.NO_SNOOP_NEEDED  OCR.ALL_RFO.L3_HIT_M.NO_SNOOP_NEEDED",
         "Counter": "0,1,2,3",
         "CounterHTOff": "0,1,2,3",
         "EventCode": "0xB7, 0xBB",
-        "EventName": "OCR.DEMAND_RFO.L3_HIT_M.ANY_SNOOP",
+        "EventName": "OCR.ALL_RFO.L3_HIT_M.NO_SNOOP_NEEDED",
         "MSRIndex": "0x1a6,0x1a7",
-        "MSRValue": "0x3F80040002",
+        "MSRValue": "0x0100040122",
         "Offcore": "1",
         "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
         "SampleAfterValue": "100003",
         "UMask": "0x1"
     },
     {
-        "BriefDescription": "Counts demand data reads OCR.DEMAND_DATA_RD.L3_HIT.NO_SNOOP_NEEDED OCR.DEMAND_DATA_RD.L3_HIT.NO_SNOOP_NEEDED",
+        "BriefDescription": "OCR.ALL_RFO.L3_HIT_M.SNOOP_MISS",
         "Counter": "0,1,2,3",
         "CounterHTOff": "0,1,2,3",
         "EventCode": "0xB7, 0xBB",
-        "EventName": "OCR.DEMAND_DATA_RD.L3_HIT.NO_SNOOP_NEEDED",
+        "EventName": "OCR.ALL_RFO.L3_HIT_M.SNOOP_MISS",
         "MSRIndex": "0x1a6,0x1a7",
-        "MSRValue": "0x01003C0001",
+        "MSRValue": "0x0200040122",
         "Offcore": "1",
         "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
         "SampleAfterValue": "100003",
         "UMask": "0x1"
     },
     {
-        "BriefDescription": "Counts L1 data cache hardware prefetch requests and software prefetch requests",
+        "BriefDescription": "OCR.ALL_RFO.L3_HIT_M.SNOOP_NONE",
         "Counter": "0,1,2,3",
         "CounterHTOff": "0,1,2,3",
         "EventCode": "0xB7, 0xBB",
-        "EventName": "OCR.PF_L1D_AND_SW.L3_HIT_M.SNOOP_NONE",
+        "EventName": "OCR.ALL_RFO.L3_HIT_M.SNOOP_NONE",
         "MSRIndex": "0x1a6,0x1a7",
-        "MSRValue": "0x0080040400",
+        "MSRValue": "0x0080040122",
         "Offcore": "1",
         "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
         "SampleAfterValue": "100003",
         "UMask": "0x1"
     },
     {
-        "BriefDescription": "Counts prefetch (that bring data to L2) data reads  OCR.PF_L2_DATA_RD.L3_HIT_S.HITM_OTHER_CORE",
+        "BriefDescription": "OCR.ALL_RFO.L3_HIT_S.ANY_SNOOP  OCR.ALL_RFO.L3_HIT_S.ANY_SNOOP",
         "Counter": "0,1,2,3",
         "CounterHTOff": "0,1,2,3",
         "EventCode": "0xB7, 0xBB",
-        "EventName": "OCR.PF_L2_DATA_RD.L3_HIT_S.HITM_OTHER_CORE",
+        "EventName": "OCR.ALL_RFO.L3_HIT_S.ANY_SNOOP",
         "MSRIndex": "0x1a6,0x1a7",
-        "MSRValue": "0x1000100010",
+        "MSRValue": "0x3F80100122",
         "Offcore": "1",
         "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
         "SampleAfterValue": "100003",
         "UMask": "0x1"
     },
     {
-        "BriefDescription": "OCR.ALL_RFO.SUPPLIER_NONE.HIT_OTHER_CORE_FWD  OCR.ALL_RFO.SUPPLIER_NONE.HIT_OTHER_CORE_FWD",
+        "BriefDescription": "OCR.ALL_RFO.L3_HIT_S.HITM_OTHER_CORE  OCR.ALL_RFO.L3_HIT_S.HITM_OTHER_CORE",
         "Counter": "0,1,2,3",
         "CounterHTOff": "0,1,2,3",
         "EventCode": "0xB7, 0xBB",
-        "EventName": "OCR.ALL_RFO.SUPPLIER_NONE.HIT_OTHER_CORE_FWD",
+        "EventName": "OCR.ALL_RFO.L3_HIT_S.HITM_OTHER_CORE",
         "MSRIndex": "0x1a6,0x1a7",
-        "MSRValue": "0x0800020122",
+        "MSRValue": "0x1000100122",
         "Offcore": "1",
         "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
         "SampleAfterValue": "100003",
         "UMask": "0x1"
     },
     {
-        "BriefDescription": "Counts any other requests have any response type.",
+        "BriefDescription": "OCR.ALL_RFO.L3_HIT_S.HIT_OTHER_CORE_FWD  OCR.ALL_RFO.L3_HIT_S.HIT_OTHER_CORE_FWD",
         "Counter": "0,1,2,3",
         "CounterHTOff": "0,1,2,3",
         "EventCode": "0xB7, 0xBB",
-        "EventName": "OCR.OTHER.ANY_RESPONSE",
+        "EventName": "OCR.ALL_RFO.L3_HIT_S.HIT_OTHER_CORE_FWD",
         "MSRIndex": "0x1a6,0x1a7",
-        "MSRValue": "0x0000018000",
+        "MSRValue": "0x0800100122",
         "Offcore": "1",
         "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
         "SampleAfterValue": "100003",
         "UMask": "0x1"
     },
     {
-        "BriefDescription": "Counts all demand code reads  OCR.DEMAND_CODE_RD.L3_HIT_S.HIT_OTHER_CORE_NO_FWD",
+        "BriefDescription": "OCR.ALL_RFO.L3_HIT_S.HIT_OTHER_CORE_NO_FWD  OCR.ALL_RFO.L3_HIT_S.HIT_OTHER_CORE_NO_FWD",
         "Counter": "0,1,2,3",
         "CounterHTOff": "0,1,2,3",
         "EventCode": "0xB7, 0xBB",
-        "EventName": "OCR.DEMAND_CODE_RD.L3_HIT_S.HIT_OTHER_CORE_NO_FWD",
+        "EventName": "OCR.ALL_RFO.L3_HIT_S.HIT_OTHER_CORE_NO_FWD",
         "MSRIndex": "0x1a6,0x1a7",
-        "MSRValue": "0x0400100004",
+        "MSRValue": "0x0400100122",
         "Offcore": "1",
         "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
         "SampleAfterValue": "100003",
         "UMask": "0x1"
     },
     {
-        "BriefDescription": "Counts all demand code reads  OCR.DEMAND_CODE_RD.SUPPLIER_NONE.HIT_OTHER_CORE_FWD",
+        "BriefDescription": "OCR.ALL_RFO.L3_HIT_S.NO_SNOOP_NEEDED  OCR.ALL_RFO.L3_HIT_S.NO_SNOOP_NEEDED",
         "Counter": "0,1,2,3",
         "CounterHTOff": "0,1,2,3",
         "EventCode": "0xB7, 0xBB",
-        "EventName": "OCR.DEMAND_CODE_RD.SUPPLIER_NONE.HIT_OTHER_CORE_FWD",
+        "EventName": "OCR.ALL_RFO.L3_HIT_S.NO_SNOOP_NEEDED",
         "MSRIndex": "0x1a6,0x1a7",
-        "MSRValue": "0x0800020004",
+        "MSRValue": "0x0100100122",
         "Offcore": "1",
         "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
         "SampleAfterValue": "100003",
         "UMask": "0x1"
     },
     {
-        "BriefDescription": "Counts all prefetch (that bring data to L2) RFOs  OCR.PF_L2_RFO.L3_HIT_E.ANY_SNOOP",
+        "BriefDescription": "OCR.ALL_RFO.L3_HIT_S.SNOOP_MISS",
         "Counter": "0,1,2,3",
         "CounterHTOff": "0,1,2,3",
         "EventCode": "0xB7, 0xBB",
-        "EventName": "OCR.PF_L2_RFO.L3_HIT_E.ANY_SNOOP",
+        "EventName": "OCR.ALL_RFO.L3_HIT_S.SNOOP_MISS",
         "MSRIndex": "0x1a6,0x1a7",
-        "MSRValue": "0x3F80080020",
+        "MSRValue": "0x0200100122",
         "Offcore": "1",
         "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
         "SampleAfterValue": "100003",
         "UMask": "0x1"
     },
     {
-        "BriefDescription": "Counts prefetch (that bring data to L2) data reads  OCR.PF_L2_DATA_RD.SUPPLIER_NONE.ANY_SNOOP",
+        "BriefDescription": "OCR.ALL_RFO.L3_HIT_S.SNOOP_NONE",
         "Counter": "0,1,2,3",
         "CounterHTOff": "0,1,2,3",
         "EventCode": "0xB7, 0xBB",
-        "EventName": "OCR.PF_L2_DATA_RD.SUPPLIER_NONE.ANY_SNOOP",
+        "EventName": "OCR.ALL_RFO.L3_HIT_S.SNOOP_NONE",
         "MSRIndex": "0x1a6,0x1a7",
-        "MSRValue": "0x3F80020010",
+        "MSRValue": "0x0080100122",
         "Offcore": "1",
         "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
         "SampleAfterValue": "100003",
         "UMask": "0x1"
     },
     {
-        "BriefDescription": "Counts all demand data writes (RFOs)  OCR.DEMAND_RFO.SUPPLIER_NONE.NO_SNOOP_NEEDED",
+        "BriefDescription": "OCR.ALL_RFO.PMM_HIT_LOCAL_PMM.ANY_SNOOP OCR.ALL_RFO.PMM_HIT_LOCAL_PMM.ANY_SNOOP",
         "Counter": "0,1,2,3",
         "CounterHTOff": "0,1,2,3",
         "EventCode": "0xB7, 0xBB",
-        "EventName": "OCR.DEMAND_RFO.SUPPLIER_NONE.NO_SNOOP_NEEDED",
+        "EventName": "OCR.ALL_RFO.PMM_HIT_LOCAL_PMM.ANY_SNOOP",
         "MSRIndex": "0x1a6,0x1a7",
-        "MSRValue": "0x0100020002",
+        "MSRValue": "0x3F80400122",
         "Offcore": "1",
         "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
         "SampleAfterValue": "100003",
         "UMask": "0x1"
     },
     {
-        "BriefDescription": "Counts prefetch (that bring data to L2) data reads",
+        "BriefDescription": "OCR.ALL_RFO.PMM_HIT_LOCAL_PMM.SNOOP_NONE OCR.ALL_RFO.PMM_HIT_LOCAL_PMM.SNOOP_NONE",
         "Counter": "0,1,2,3",
         "CounterHTOff": "0,1,2,3",
         "EventCode": "0xB7, 0xBB",
-        "EventName": "OCR.PF_L2_DATA_RD.L3_HIT_E.SNOOP_NONE",
+        "EventName": "OCR.ALL_RFO.PMM_HIT_LOCAL_PMM.SNOOP_NONE",
         "MSRIndex": "0x1a6,0x1a7",
-        "MSRValue": "0x0080080010",
+        "MSRValue": "0x0080400122",
         "Offcore": "1",
         "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
         "SampleAfterValue": "100003",
         "UMask": "0x1"
     },
     {
-        "BriefDescription": "OCR.ALL_PF_RFO.L3_HIT.NO_SNOOP_NEEDED OCR.ALL_PF_RFO.L3_HIT.NO_SNOOP_NEEDED OCR.ALL_PF_RFO.L3_HIT.NO_SNOOP_NEEDED",
+        "BriefDescription": "OCR.ALL_RFO.PMM_HIT_LOCAL_PMM.SNOOP_NOT_NEEDED OCR.ALL_RFO.PMM_HIT_LOCAL_PMM.SNOOP_NOT_NEEDED",
         "Counter": "0,1,2,3",
         "CounterHTOff": "0,1,2,3",
         "EventCode": "0xB7, 0xBB",
-        "EventName": "OCR.ALL_PF_RFO.L3_HIT.NO_SNOOP_NEEDED",
+        "EventName": "OCR.ALL_RFO.PMM_HIT_LOCAL_PMM.SNOOP_NOT_NEEDED",
         "MSRIndex": "0x1a6,0x1a7",
-        "MSRValue": "0x01003C0120",
+        "MSRValue": "0x0100400122",
         "Offcore": "1",
         "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
         "SampleAfterValue": "100003",
         "UMask": "0x1"
     },
     {
-        "BriefDescription": "Counts demand data reads have any response type.",
+        "BriefDescription": "OCR.ALL_RFO.SUPPLIER_NONE.ANY_SNOOP  OCR.ALL_RFO.SUPPLIER_NONE.ANY_SNOOP",
         "Counter": "0,1,2,3",
         "CounterHTOff": "0,1,2,3",
         "EventCode": "0xB7, 0xBB",
-        "EventName": "OCR.DEMAND_DATA_RD.ANY_RESPONSE",
+        "EventName": "OCR.ALL_RFO.SUPPLIER_NONE.ANY_SNOOP",
         "MSRIndex": "0x1a6,0x1a7",
-        "MSRValue": "0x0000010001",
+        "MSRValue": "0x3F80020122",
         "Offcore": "1",
         "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
         "SampleAfterValue": "100003",
         "UMask": "0x1"
     },
     {
-        "BriefDescription": "Counts all demand code reads OCR.DEMAND_CODE_RD.L3_HIT.SNOOP_MISS",
+        "BriefDescription": "OCR.ALL_RFO.SUPPLIER_NONE.HITM_OTHER_CORE  OCR.ALL_RFO.SUPPLIER_NONE.HITM_OTHER_CORE",
         "Counter": "0,1,2,3",
         "CounterHTOff": "0,1,2,3",
         "EventCode": "0xB7, 0xBB",
-        "EventName": "OCR.DEMAND_CODE_RD.L3_HIT.SNOOP_MISS",
+        "EventName": "OCR.ALL_RFO.SUPPLIER_NONE.HITM_OTHER_CORE",
         "MSRIndex": "0x1a6,0x1a7",
-        "MSRValue": "0x02003C0004",
+        "MSRValue": "0x1000020122",
         "Offcore": "1",
         "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
         "SampleAfterValue": "100003",
         "UMask": "0x1"
     },
     {
-        "BriefDescription": "Counts any other requests",
+        "BriefDescription": "OCR.ALL_RFO.SUPPLIER_NONE.HIT_OTHER_CORE_FWD  OCR.ALL_RFO.SUPPLIER_NONE.HIT_OTHER_CORE_FWD",
         "Counter": "0,1,2,3",
         "CounterHTOff": "0,1,2,3",
         "EventCode": "0xB7, 0xBB",
-        "EventName": "OCR.OTHER.L3_HIT_F.SNOOP_NONE",
+        "EventName": "OCR.ALL_RFO.SUPPLIER_NONE.HIT_OTHER_CORE_FWD",
         "MSRIndex": "0x1a6,0x1a7",
-        "MSRValue": "0x0080208000",
+        "MSRValue": "0x0800020122",
         "Offcore": "1",
         "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
         "SampleAfterValue": "100003",
         "UMask": "0x1"
     },
     {
-        "BriefDescription": "Counts all prefetch (that bring data to L2) RFOs  OCR.PF_L2_RFO.SUPPLIER_NONE.ANY_SNOOP",
+        "BriefDescription": "OCR.ALL_RFO.SUPPLIER_NONE.HIT_OTHER_CORE_NO_FWD  OCR.ALL_RFO.SUPPLIER_NONE.HIT_OTHER_CORE_NO_FWD",
         "Counter": "0,1,2,3",
         "CounterHTOff": "0,1,2,3",
         "EventCode": "0xB7, 0xBB",
-        "EventName": "OCR.PF_L2_RFO.SUPPLIER_NONE.ANY_SNOOP",
+        "EventName": "OCR.ALL_RFO.SUPPLIER_NONE.HIT_OTHER_CORE_NO_FWD",
         "MSRIndex": "0x1a6,0x1a7",
-        "MSRValue": "0x3F80020020",
+        "MSRValue": "0x0400020122",
         "Offcore": "1",
         "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
         "SampleAfterValue": "100003",
         "UMask": "0x1"
     },
     {
-        "BriefDescription": "Counts all prefetch (that bring data to LLC only) data reads  OCR.PF_L3_DATA_RD.L3_HIT_S.HITM_OTHER_CORE",
+        "BriefDescription": "OCR.ALL_RFO.SUPPLIER_NONE.NO_SNOOP_NEEDED  OCR.ALL_RFO.SUPPLIER_NONE.NO_SNOOP_NEEDED",
         "Counter": "0,1,2,3",
         "CounterHTOff": "0,1,2,3",
         "EventCode": "0xB7, 0xBB",
-        "EventName": "OCR.PF_L3_DATA_RD.L3_HIT_S.HITM_OTHER_CORE",
+        "EventName": "OCR.ALL_RFO.SUPPLIER_NONE.NO_SNOOP_NEEDED",
         "MSRIndex": "0x1a6,0x1a7",
-        "MSRValue": "0x1000100080",
+        "MSRValue": "0x0100020122",
         "Offcore": "1",
         "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
         "SampleAfterValue": "100003",
         "UMask": "0x1"
     },
     {
-        "BriefDescription": "Counts all demand data writes (RFOs) OCR.DEMAND_RFO.PMM_HIT_LOCAL_PMM.SNOOP_NONE",
+        "BriefDescription": "OCR.ALL_RFO.SUPPLIER_NONE.SNOOP_MISS",
         "Counter": "0,1,2,3",
         "CounterHTOff": "0,1,2,3",
         "EventCode": "0xB7, 0xBB",
-        "EventName": "OCR.DEMAND_RFO.PMM_HIT_LOCAL_PMM.SNOOP_NONE",
+        "EventName": "OCR.ALL_RFO.SUPPLIER_NONE.SNOOP_MISS",
         "MSRIndex": "0x1a6,0x1a7",
-        "MSRValue": "0x0080400002",
+        "MSRValue": "0x0200020122",
         "Offcore": "1",
         "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
         "SampleAfterValue": "100003",
         "UMask": "0x1"
     },
     {
-        "BriefDescription": "Counts all demand data writes (RFOs)  OCR.DEMAND_RFO.L3_HIT_S.NO_SNOOP_NEEDED",
+        "BriefDescription": "OCR.ALL_RFO.SUPPLIER_NONE.SNOOP_NONE",
         "Counter": "0,1,2,3",
         "CounterHTOff": "0,1,2,3",
         "EventCode": "0xB7, 0xBB",
-        "EventName": "OCR.DEMAND_RFO.L3_HIT_S.NO_SNOOP_NEEDED",
+        "EventName": "OCR.ALL_RFO.SUPPLIER_NONE.SNOOP_NONE",
         "MSRIndex": "0x1a6,0x1a7",
-        "MSRValue": "0x0100100002",
+        "MSRValue": "0x0080020122",
         "Offcore": "1",
         "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
         "SampleAfterValue": "100003",
         "UMask": "0x1"
     },
     {
-        "BriefDescription": "OCR.ALL_PF_RFO.L3_HIT_M.NO_SNOOP_NEEDED  OCR.ALL_PF_RFO.L3_HIT_M.NO_SNOOP_NEEDED",
+        "BriefDescription": "Counts all demand code reads have any response type.",
         "Counter": "0,1,2,3",
         "CounterHTOff": "0,1,2,3",
         "EventCode": "0xB7, 0xBB",
-        "EventName": "OCR.ALL_PF_RFO.L3_HIT_M.NO_SNOOP_NEEDED",
+        "EventName": "OCR.DEMAND_CODE_RD.ANY_RESPONSE",
         "MSRIndex": "0x1a6,0x1a7",
-        "MSRValue": "0x0100040120",
+        "MSRValue": "0x0000010004",
         "Offcore": "1",
         "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
         "SampleAfterValue": "100003",
         "UMask": "0x1"
     },
     {
-        "BriefDescription": "OCR.ALL_PF_RFO.L3_HIT_E.SNOOP_MISS",
+        "BriefDescription": "Counts all demand code reads OCR.DEMAND_CODE_RD.L3_HIT.ANY_SNOOP OCR.DEMAND_CODE_RD.L3_HIT.ANY_SNOOP",
         "Counter": "0,1,2,3",
         "CounterHTOff": "0,1,2,3",
         "EventCode": "0xB7, 0xBB",
-        "EventName": "OCR.ALL_PF_RFO.L3_HIT_E.SNOOP_MISS",
+        "EventName": "OCR.DEMAND_CODE_RD.L3_HIT.ANY_SNOOP",
         "MSRIndex": "0x1a6,0x1a7",
-        "MSRValue": "0x0200080120",
+        "MSRValue": "0x3F803C0004",
         "Offcore": "1",
         "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
         "SampleAfterValue": "100003",
         "UMask": "0x1"
     },
     {
-        "BriefDescription": "Counts demand data reads  OCR.DEMAND_DATA_RD.SUPPLIER_NONE.ANY_SNOOP",
+        "BriefDescription": "Counts all demand code reads OCR.DEMAND_CODE_RD.L3_HIT.HITM_OTHER_CORE OCR.DEMAND_CODE_RD.L3_HIT.HITM_OTHER_CORE",
         "Counter": "0,1,2,3",
         "CounterHTOff": "0,1,2,3",
         "EventCode": "0xB7, 0xBB",
-        "EventName": "OCR.DEMAND_DATA_RD.SUPPLIER_NONE.ANY_SNOOP",
+        "EventName": "OCR.DEMAND_CODE_RD.L3_HIT.HITM_OTHER_CORE",
         "MSRIndex": "0x1a6,0x1a7",
-        "MSRValue": "0x3F80020001",
+        "MSRValue": "0x10003C0004",
         "Offcore": "1",
         "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
         "SampleAfterValue": "100003",
         "UMask": "0x1"
     },
     {
-        "BriefDescription": "OCR.ALL_DATA_RD.ANY_RESPONSE have any response type.",
+        "BriefDescription": "Counts all demand code reads OCR.DEMAND_CODE_RD.L3_HIT.HIT_OTHER_CORE_FWD OCR.DEMAND_CODE_RD.L3_HIT.HIT_OTHER_CORE_FWD",
         "Counter": "0,1,2,3",
         "CounterHTOff": "0,1,2,3",
         "EventCode": "0xB7, 0xBB",
-        "EventName": "OCR.ALL_DATA_RD.ANY_RESPONSE",
+        "EventName": "OCR.DEMAND_CODE_RD.L3_HIT.HIT_OTHER_CORE_FWD",
         "MSRIndex": "0x1a6,0x1a7",
-        "MSRValue": "0x0000010491",
+        "MSRValue": "0x08003C0004",
         "Offcore": "1",
         "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
         "SampleAfterValue": "100003",
         "UMask": "0x1"
     },
     {
-        "BriefDescription": "Counts all prefetch (that bring data to LLC only) RFOs  OCR.PF_L3_RFO.L3_HIT_E.HITM_OTHER_CORE",
+        "BriefDescription": "Counts all demand code reads OCR.DEMAND_CODE_RD.L3_HIT.HIT_OTHER_CORE_NO_FWD OCR.DEMAND_CODE_RD.L3_HIT.HIT_OTHER_CORE_NO_FWD",
         "Counter": "0,1,2,3",
         "CounterHTOff": "0,1,2,3",
         "EventCode": "0xB7, 0xBB",
-        "EventName": "OCR.PF_L3_RFO.L3_HIT_E.HITM_OTHER_CORE",
+        "EventName": "OCR.DEMAND_CODE_RD.L3_HIT.HIT_OTHER_CORE_NO_FWD",
         "MSRIndex": "0x1a6,0x1a7",
-        "MSRValue": "0x1000080100",
+        "MSRValue": "0x04003C0004",
         "Offcore": "1",
         "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
         "SampleAfterValue": "100003",
         "UMask": "0x1"
     },
     {
-        "BriefDescription": "OCR.ALL_RFO.L3_HIT_S.SNOOP_NONE",
+        "BriefDescription": "Counts all demand code reads OCR.DEMAND_CODE_RD.L3_HIT.NO_SNOOP_NEEDED OCR.DEMAND_CODE_RD.L3_HIT.NO_SNOOP_NEEDED",
         "Counter": "0,1,2,3",
         "CounterHTOff": "0,1,2,3",
         "EventCode": "0xB7, 0xBB",
-        "EventName": "OCR.ALL_RFO.L3_HIT_S.SNOOP_NONE",
+        "EventName": "OCR.DEMAND_CODE_RD.L3_HIT.NO_SNOOP_NEEDED",
         "MSRIndex": "0x1a6,0x1a7",
-        "MSRValue": "0x0080100122",
+        "MSRValue": "0x01003C0004",
         "Offcore": "1",
         "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
         "SampleAfterValue": "100003",
         "UMask": "0x1"
     },
     {
-        "BriefDescription": "Counts all prefetch (that bring data to L2) RFOs",
+        "BriefDescription": "Counts all demand code reads",
         "Counter": "0,1,2,3",
         "CounterHTOff": "0,1,2,3",
         "EventCode": "0xB7, 0xBB",
-        "EventName": "OCR.PF_L2_RFO.L3_HIT_E.SNOOP_NONE",
+        "EventName": "OCR.DEMAND_CODE_RD.L3_HIT.SNOOP_HIT_WITH_FWD",
         "MSRIndex": "0x1a6,0x1a7",
-        "MSRValue": "0x0080080020",
+        "MSRValue": "0x08007C0004",
         "Offcore": "1",
         "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
         "SampleAfterValue": "100003",
         "UMask": "0x1"
     },
     {
-        "BriefDescription": "Counts all demand code reads  OCR.DEMAND_CODE_RD.L3_HIT_F.HIT_OTHER_CORE_FWD",
+        "BriefDescription": "Counts all demand code reads OCR.DEMAND_CODE_RD.L3_HIT.SNOOP_MISS",
         "Counter": "0,1,2,3",
         "CounterHTOff": "0,1,2,3",
         "EventCode": "0xB7, 0xBB",
-        "EventName": "OCR.DEMAND_CODE_RD.L3_HIT_F.HIT_OTHER_CORE_FWD",
+        "EventName": "OCR.DEMAND_CODE_RD.L3_HIT.SNOOP_MISS",
         "MSRIndex": "0x1a6,0x1a7",
-        "MSRValue": "0x0800200004",
+        "MSRValue": "0x02003C0004",
         "Offcore": "1",
         "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
         "SampleAfterValue": "100003",
         "UMask": "0x1"
     },
     {
-        "BriefDescription": "OCR.ALL_PF_RFO.PMM_HIT_LOCAL_PMM.SNOOP_NONE OCR.ALL_PF_RFO.PMM_HIT_LOCAL_PMM.SNOOP_NONE",
+        "BriefDescription": "Counts all demand code reads OCR.DEMAND_CODE_RD.L3_HIT.SNOOP_NONE",
         "Counter": "0,1,2,3",
         "CounterHTOff": "0,1,2,3",
         "EventCode": "0xB7, 0xBB",
-        "EventName": "OCR.ALL_PF_RFO.PMM_HIT_LOCAL_PMM.SNOOP_NONE",
+        "EventName": "OCR.DEMAND_CODE_RD.L3_HIT.SNOOP_NONE",
         "MSRIndex": "0x1a6,0x1a7",
-        "MSRValue": "0x0080400120",
+        "MSRValue": "0x00803C0004",
         "Offcore": "1",
         "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
         "SampleAfterValue": "100003",
         "UMask": "0x1"
     },
     {
-        "BriefDescription": "Counts all prefetch (that bring data to LLC only) data reads  OCR.PF_L3_DATA_RD.SUPPLIER_NONE.HIT_OTHER_CORE_NO_FWD",
+        "BriefDescription": "Counts all demand code reads  OCR.DEMAND_CODE_RD.L3_HIT_E.ANY_SNOOP",
         "Counter": "0,1,2,3",
         "CounterHTOff": "0,1,2,3",
         "EventCode": "0xB7, 0xBB",
-        "EventName": "OCR.PF_L3_DATA_RD.SUPPLIER_NONE.HIT_OTHER_CORE_NO_FWD",
+        "EventName": "OCR.DEMAND_CODE_RD.L3_HIT_E.ANY_SNOOP",
         "MSRIndex": "0x1a6,0x1a7",
-        "MSRValue": "0x0400020080",
+        "MSRValue": "0x3F80080004",
         "Offcore": "1",
         "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
         "SampleAfterValue": "100003",
         "UMask": "0x1"
     },
     {
-        "BriefDescription": "Counts all demand data writes (RFOs)",
+        "BriefDescription": "Counts all demand code reads  OCR.DEMAND_CODE_RD.L3_HIT_E.HITM_OTHER_CORE",
         "Counter": "0,1,2,3",
         "CounterHTOff": "0,1,2,3",
         "EventCode": "0xB7, 0xBB",
-        "EventName": "OCR.DEMAND_RFO.L3_HIT_M.SNOOP_NONE",
+        "EventName": "OCR.DEMAND_CODE_RD.L3_HIT_E.HITM_OTHER_CORE",
         "MSRIndex": "0x1a6,0x1a7",
-        "MSRValue": "0x0080040002",
+        "MSRValue": "0x1000080004",
         "Offcore": "1",
         "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
         "SampleAfterValue": "100003",
         "UMask": "0x1"
     },
     {
-        "BriefDescription": "OCR.ALL_PF_RFO.L3_HIT_F.SNOOP_NONE",
+        "BriefDescription": "Counts all demand code reads  OCR.DEMAND_CODE_RD.L3_HIT_E.HIT_OTHER_CORE_FWD",
         "Counter": "0,1,2,3",
         "CounterHTOff": "0,1,2,3",
         "EventCode": "0xB7, 0xBB",
-        "EventName": "OCR.ALL_PF_RFO.L3_HIT_F.SNOOP_NONE",
+        "EventName": "OCR.DEMAND_CODE_RD.L3_HIT_E.HIT_OTHER_CORE_FWD",
         "MSRIndex": "0x1a6,0x1a7",
-        "MSRValue": "0x0080200120",
+        "MSRValue": "0x0800080004",
         "Offcore": "1",
         "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
         "SampleAfterValue": "100003",
         "UMask": "0x1"
     },
     {
-        "BriefDescription": "Counts all prefetch (that bring data to LLC only) RFOs  OCR.PF_L3_RFO.L3_HIT_F.NO_SNOOP_NEEDED",
+        "BriefDescription": "Counts all demand code reads  OCR.DEMAND_CODE_RD.L3_HIT_E.HIT_OTHER_CORE_NO_FWD",
         "Counter": "0,1,2,3",
         "CounterHTOff": "0,1,2,3",
         "EventCode": "0xB7, 0xBB",
-        "EventName": "OCR.PF_L3_RFO.L3_HIT_F.NO_SNOOP_NEEDED",
+        "EventName": "OCR.DEMAND_CODE_RD.L3_HIT_E.HIT_OTHER_CORE_NO_FWD",
         "MSRIndex": "0x1a6,0x1a7",
-        "MSRValue": "0x0100200100",
+        "MSRValue": "0x0400080004",
         "Offcore": "1",
         "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
         "SampleAfterValue": "100003",
         "UMask": "0x1"
     },
     {
-        "BriefDescription": "OCR.ALL_PF_RFO.L3_HIT_E.ANY_SNOOP  OCR.ALL_PF_RFO.L3_HIT_E.ANY_SNOOP",
+        "BriefDescription": "Counts all demand code reads  OCR.DEMAND_CODE_RD.L3_HIT_E.NO_SNOOP_NEEDED",
         "Counter": "0,1,2,3",
         "CounterHTOff": "0,1,2,3",
         "EventCode": "0xB7, 0xBB",
-        "EventName": "OCR.ALL_PF_RFO.L3_HIT_E.ANY_SNOOP",
+        "EventName": "OCR.DEMAND_CODE_RD.L3_HIT_E.NO_SNOOP_NEEDED",
         "MSRIndex": "0x1a6,0x1a7",
-        "MSRValue": "0x3F80080120",
+        "MSRValue": "0x0100080004",
         "Offcore": "1",
         "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
         "SampleAfterValue": "100003",
         "UMask": "0x1"
     },
     {
-        "BriefDescription": "Counts any other requests OCR.OTHER.L3_HIT.HIT_OTHER_CORE_FWD OCR.OTHER.L3_HIT.HIT_OTHER_CORE_FWD",
+        "BriefDescription": "Counts all demand code reads",
         "Counter": "0,1,2,3",
         "CounterHTOff": "0,1,2,3",
         "EventCode": "0xB7, 0xBB",
-        "EventName": "OCR.OTHER.L3_HIT.HIT_OTHER_CORE_FWD",
+        "EventName": "OCR.DEMAND_CODE_RD.L3_HIT_E.SNOOP_MISS",
         "MSRIndex": "0x1a6,0x1a7",
-        "MSRValue": "0x08003C8000",
+        "MSRValue": "0x0200080004",
         "Offcore": "1",
         "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
         "SampleAfterValue": "100003",
         "UMask": "0x1"
     },
     {
-        "BriefDescription": "Counts all prefetch (that bring data to LLC only) RFOs",
+        "BriefDescription": "Counts all demand code reads",
         "Counter": "0,1,2,3",
         "CounterHTOff": "0,1,2,3",
         "EventCode": "0xB7, 0xBB",
-        "EventName": "OCR.PF_L3_RFO.L3_HIT_M.SNOOP_NONE",
+        "EventName": "OCR.DEMAND_CODE_RD.L3_HIT_E.SNOOP_NONE",
         "MSRIndex": "0x1a6,0x1a7",
-        "MSRValue": "0x0080040100",
+        "MSRValue": "0x0080080004",
         "Offcore": "1",
         "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
         "SampleAfterValue": "100003",
         "UMask": "0x1"
     },
     {
-        "BriefDescription": "OCR.ALL_PF_DATA_RD.L3_HIT_M.HIT_OTHER_CORE_FWD  OCR.ALL_PF_DATA_RD.L3_HIT_M.HIT_OTHER_CORE_FWD",
+        "BriefDescription": "Counts all demand code reads  OCR.DEMAND_CODE_RD.L3_HIT_F.ANY_SNOOP",
         "Counter": "0,1,2,3",
         "CounterHTOff": "0,1,2,3",
         "EventCode": "0xB7, 0xBB",
-        "EventName": "OCR.ALL_PF_DATA_RD.L3_HIT_M.HIT_OTHER_CORE_FWD",
+        "EventName": "OCR.DEMAND_CODE_RD.L3_HIT_F.ANY_SNOOP",
         "MSRIndex": "0x1a6,0x1a7",
-        "MSRValue": "0x0800040490",
+        "MSRValue": "0x3F80200004",
         "Offcore": "1",
         "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
         "SampleAfterValue": "100003",
         "UMask": "0x1"
     },
     {
-        "BriefDescription": "Counts all prefetch (that bring data to LLC only) RFOs  OCR.PF_L3_RFO.SUPPLIER_NONE.HIT_OTHER_CORE_FWD",
+        "BriefDescription": "Counts all demand code reads  OCR.DEMAND_CODE_RD.L3_HIT_F.HITM_OTHER_CORE",
         "Counter": "0,1,2,3",
         "CounterHTOff": "0,1,2,3",
         "EventCode": "0xB7, 0xBB",
-        "EventName": "OCR.PF_L3_RFO.SUPPLIER_NONE.HIT_OTHER_CORE_FWD",
+        "EventName": "OCR.DEMAND_CODE_RD.L3_HIT_F.HITM_OTHER_CORE",
         "MSRIndex": "0x1a6,0x1a7",
-        "MSRValue": "0x0800020100",
+        "MSRValue": "0x1000200004",
         "Offcore": "1",
         "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
         "SampleAfterValue": "100003",
         "UMask": "0x1"
     },
     {
-        "BriefDescription": "OCR.ALL_PF_DATA_RD.L3_HIT_E.HITM_OTHER_CORE  OCR.ALL_PF_DATA_RD.L3_HIT_E.HITM_OTHER_CORE",
+        "BriefDescription": "Counts all demand code reads  OCR.DEMAND_CODE_RD.L3_HIT_F.HIT_OTHER_CORE_FWD",
         "Counter": "0,1,2,3",
         "CounterHTOff": "0,1,2,3",
         "EventCode": "0xB7, 0xBB",
-        "EventName": "OCR.ALL_PF_DATA_RD.L3_HIT_E.HITM_OTHER_CORE",
+        "EventName": "OCR.DEMAND_CODE_RD.L3_HIT_F.HIT_OTHER_CORE_FWD",
         "MSRIndex": "0x1a6,0x1a7",
-        "MSRValue": "0x1000080490",
+        "MSRValue": "0x0800200004",
         "Offcore": "1",
         "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
         "SampleAfterValue": "100003",
         "UMask": "0x1"
     },
     {
-        "BriefDescription": "Counts all prefetch (that bring data to L2) RFOs  OCR.PF_L2_RFO.L3_HIT_F.HITM_OTHER_CORE",
+        "BriefDescription": "Counts all demand code reads  OCR.DEMAND_CODE_RD.L3_HIT_F.HIT_OTHER_CORE_NO_FWD",
         "Counter": "0,1,2,3",
         "CounterHTOff": "0,1,2,3",
         "EventCode": "0xB7, 0xBB",
-        "EventName": "OCR.PF_L2_RFO.L3_HIT_F.HITM_OTHER_CORE",
+        "EventName": "OCR.DEMAND_CODE_RD.L3_HIT_F.HIT_OTHER_CORE_NO_FWD",
         "MSRIndex": "0x1a6,0x1a7",
-        "MSRValue": "0x1000200020",
+        "MSRValue": "0x0400200004",
         "Offcore": "1",
         "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
         "SampleAfterValue": "100003",
         "UMask": "0x1"
     },
     {
-        "BriefDescription": "Counts L1 data cache hardware prefetch requests and software prefetch requests  OCR.PF_L1D_AND_SW.SUPPLIER_NONE.NO_SNOOP_NEEDED",
+        "BriefDescription": "Counts all demand code reads  OCR.DEMAND_CODE_RD.L3_HIT_F.NO_SNOOP_NEEDED",
         "Counter": "0,1,2,3",
         "CounterHTOff": "0,1,2,3",
         "EventCode": "0xB7, 0xBB",
-        "EventName": "OCR.PF_L1D_AND_SW.SUPPLIER_NONE.NO_SNOOP_NEEDED",
+        "EventName": "OCR.DEMAND_CODE_RD.L3_HIT_F.NO_SNOOP_NEEDED",
         "MSRIndex": "0x1a6,0x1a7",
-        "MSRValue": "0x0100020400",
+        "MSRValue": "0x0100200004",
         "Offcore": "1",
         "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
         "SampleAfterValue": "100003",
         "UMask": "0x1"
     },
     {
-        "BriefDescription": "Counts all prefetch (that bring data to LLC only) RFOs  OCR.PF_L3_RFO.L3_HIT_M.HIT_OTHER_CORE_NO_FWD",
+        "BriefDescription": "Counts all demand code reads",
         "Counter": "0,1,2,3",
         "CounterHTOff": "0,1,2,3",
         "EventCode": "0xB7, 0xBB",
-        "EventName": "OCR.PF_L3_RFO.L3_HIT_M.HIT_OTHER_CORE_NO_FWD",
+        "EventName": "OCR.DEMAND_CODE_RD.L3_HIT_F.SNOOP_MISS",
         "MSRIndex": "0x1a6,0x1a7",
-        "MSRValue": "0x0400040100",
+        "MSRValue": "0x0200200004",
         "Offcore": "1",
         "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
         "SampleAfterValue": "100003",
         "UMask": "0x1"
     },
     {
-        "BriefDescription": "Counts all prefetch (that bring data to LLC only) RFOs  OCR.PF_L3_RFO.L3_HIT_F.HITM_OTHER_CORE",
+        "BriefDescription": "Counts all demand code reads",
         "Counter": "0,1,2,3",
         "CounterHTOff": "0,1,2,3",
         "EventCode": "0xB7, 0xBB",
-        "EventName": "OCR.PF_L3_RFO.L3_HIT_F.HITM_OTHER_CORE",
+        "EventName": "OCR.DEMAND_CODE_RD.L3_HIT_F.SNOOP_NONE",
         "MSRIndex": "0x1a6,0x1a7",
-        "MSRValue": "0x1000200100",
+        "MSRValue": "0x0080200004",
         "Offcore": "1",
         "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
         "SampleAfterValue": "100003",
         "UMask": "0x1"
     },
     {
-        "BriefDescription": "Counts all prefetch (that bring data to LLC only) RFOs  OCR.PF_L3_RFO.SUPPLIER_NONE.NO_SNOOP_NEEDED",
+        "BriefDescription": "Counts all demand code reads  OCR.DEMAND_CODE_RD.L3_HIT_M.ANY_SNOOP",
         "Counter": "0,1,2,3",
         "CounterHTOff": "0,1,2,3",
         "EventCode": "0xB7, 0xBB",
-        "EventName": "OCR.PF_L3_RFO.SUPPLIER_NONE.NO_SNOOP_NEEDED",
+        "EventName": "OCR.DEMAND_CODE_RD.L3_HIT_M.ANY_SNOOP",
         "MSRIndex": "0x1a6,0x1a7",
-        "MSRValue": "0x0100020100",
+        "MSRValue": "0x3F80040004",
         "Offcore": "1",
         "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
         "SampleAfterValue": "100003",
         "UMask": "0x1"
     },
     {
-        "BriefDescription": "Counts all prefetch (that bring data to LLC only) RFOs",
+        "BriefDescription": "Counts all demand code reads  OCR.DEMAND_CODE_RD.L3_HIT_M.HITM_OTHER_CORE",
         "Counter": "0,1,2,3",
         "CounterHTOff": "0,1,2,3",
         "EventCode": "0xB7, 0xBB",
-        "EventName": "OCR.PF_L3_RFO.L3_HIT_S.SNOOP_NONE",
+        "EventName": "OCR.DEMAND_CODE_RD.L3_HIT_M.HITM_OTHER_CORE",
         "MSRIndex": "0x1a6,0x1a7",
-        "MSRValue": "0x0080100100",
+        "MSRValue": "0x1000040004",
         "Offcore": "1",
         "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
         "SampleAfterValue": "100003",
         "UMask": "0x1"
     },
     {
-        "BriefDescription": "OCR.ALL_DATA_RD.L3_HIT.NO_SNOOP_NEEDED OCR.ALL_DATA_RD.L3_HIT.NO_SNOOP_NEEDED OCR.ALL_DATA_RD.L3_HIT.NO_SNOOP_NEEDED",
+        "BriefDescription": "Counts all demand code reads  OCR.DEMAND_CODE_RD.L3_HIT_M.HIT_OTHER_CORE_FWD",
         "Counter": "0,1,2,3",
         "CounterHTOff": "0,1,2,3",
         "EventCode": "0xB7, 0xBB",
-        "EventName": "OCR.ALL_DATA_RD.L3_HIT.NO_SNOOP_NEEDED",
+        "EventName": "OCR.DEMAND_CODE_RD.L3_HIT_M.HIT_OTHER_CORE_FWD",
         "MSRIndex": "0x1a6,0x1a7",
-        "MSRValue": "0x01003C0491",
+        "MSRValue": "0x0800040004",
         "Offcore": "1",
         "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
         "SampleAfterValue": "100003",
         "UMask": "0x1"
     },
     {
-        "BriefDescription": "OCR.ALL_READS.L3_HIT.HIT_OTHER_CORE_FWD OCR.ALL_READS.L3_HIT.HIT_OTHER_CORE_FWD OCR.ALL_READS.L3_HIT.HIT_OTHER_CORE_FWD",
+        "BriefDescription": "Counts all demand code reads  OCR.DEMAND_CODE_RD.L3_HIT_M.HIT_OTHER_CORE_NO_FWD",
         "Counter": "0,1,2,3",
         "CounterHTOff": "0,1,2,3",
         "EventCode": "0xB7, 0xBB",
-        "EventName": "OCR.ALL_READS.L3_HIT.HIT_OTHER_CORE_FWD",
+        "EventName": "OCR.DEMAND_CODE_RD.L3_HIT_M.HIT_OTHER_CORE_NO_FWD",
         "MSRIndex": "0x1a6,0x1a7",
-        "MSRValue": "0x08003C07F7",
+        "MSRValue": "0x0400040004",
         "Offcore": "1",
         "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
         "SampleAfterValue": "100003",
         "UMask": "0x1"
     },
     {
-        "BriefDescription": "Counts all demand code reads OCR.DEMAND_CODE_RD.L3_HIT.SNOOP_NONE",
+        "BriefDescription": "Counts all demand code reads  OCR.DEMAND_CODE_RD.L3_HIT_M.NO_SNOOP_NEEDED",
         "Counter": "0,1,2,3",
         "CounterHTOff": "0,1,2,3",
         "EventCode": "0xB7, 0xBB",
-        "EventName": "OCR.DEMAND_CODE_RD.L3_HIT.SNOOP_NONE",
+        "EventName": "OCR.DEMAND_CODE_RD.L3_HIT_M.NO_SNOOP_NEEDED",
         "MSRIndex": "0x1a6,0x1a7",
-        "MSRValue": "0x00803C0004",
+        "MSRValue": "0x0100040004",
         "Offcore": "1",
         "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
         "SampleAfterValue": "100003",
         "UMask": "0x1"
     },
     {
-        "BriefDescription": "OCR.ALL_DATA_RD.PMM_HIT_LOCAL_PMM.SNOOP_NONE OCR.ALL_DATA_RD.PMM_HIT_LOCAL_PMM.SNOOP_NONE",
+        "BriefDescription": "Counts all demand code reads",
         "Counter": "0,1,2,3",
         "CounterHTOff": "0,1,2,3",
         "EventCode": "0xB7, 0xBB",
-        "EventName": "OCR.ALL_DATA_RD.PMM_HIT_LOCAL_PMM.SNOOP_NONE",
+        "EventName": "OCR.DEMAND_CODE_RD.L3_HIT_M.SNOOP_MISS",
         "MSRIndex": "0x1a6,0x1a7",
-        "MSRValue": "0x0080400491",
+        "MSRValue": "0x0200040004",
         "Offcore": "1",
         "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
         "SampleAfterValue": "100003",
         "UMask": "0x1"
     },
     {
-        "BriefDescription": "OCR.ALL_DATA_RD.SUPPLIER_NONE.SNOOP_NONE",
+        "BriefDescription": "Counts all demand code reads",
         "Counter": "0,1,2,3",
         "CounterHTOff": "0,1,2,3",
         "EventCode": "0xB7, 0xBB",
-        "EventName": "OCR.ALL_DATA_RD.SUPPLIER_NONE.SNOOP_NONE",
+        "EventName": "OCR.DEMAND_CODE_RD.L3_HIT_M.SNOOP_NONE",
         "MSRIndex": "0x1a6,0x1a7",
-        "MSRValue": "0x0080020491",
+        "MSRValue": "0x0080040004",
         "Offcore": "1",
         "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
         "SampleAfterValue": "100003",
         "UMask": "0x1"
     },
     {
-        "BriefDescription": "Counts all demand data writes (RFOs) OCR.DEMAND_RFO.PMM_HIT_LOCAL_PMM.SNOOP_NOT_NEEDED",
+        "BriefDescription": "Counts all demand code reads  OCR.DEMAND_CODE_RD.L3_HIT_S.ANY_SNOOP",
         "Counter": "0,1,2,3",
         "CounterHTOff": "0,1,2,3",
         "EventCode": "0xB7, 0xBB",
-        "EventName": "OCR.DEMAND_RFO.PMM_HIT_LOCAL_PMM.SNOOP_NOT_NEEDED",
+        "EventName": "OCR.DEMAND_CODE_RD.L3_HIT_S.ANY_SNOOP",
         "MSRIndex": "0x1a6,0x1a7",
-        "MSRValue": "0x0100400002",
+        "MSRValue": "0x3F80100004",
         "Offcore": "1",
         "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
         "SampleAfterValue": "100003",
         "UMask": "0x1"
     },
     {
-        "BriefDescription": "OCR.ALL_PF_DATA_RD.L3_HIT.HIT_OTHER_CORE_FWD OCR.ALL_PF_DATA_RD.L3_HIT.HIT_OTHER_CORE_FWD OCR.ALL_PF_DATA_RD.L3_HIT.HIT_OTHER_CORE_FWD",
+        "BriefDescription": "Counts all demand code reads  OCR.DEMAND_CODE_RD.L3_HIT_S.HITM_OTHER_CORE",
         "Counter": "0,1,2,3",
         "CounterHTOff": "0,1,2,3",
         "EventCode": "0xB7, 0xBB",
-        "EventName": "OCR.ALL_PF_DATA_RD.L3_HIT.HIT_OTHER_CORE_FWD",
+        "EventName": "OCR.DEMAND_CODE_RD.L3_HIT_S.HITM_OTHER_CORE",
         "MSRIndex": "0x1a6,0x1a7",
-        "MSRValue": "0x08003C0490",
+        "MSRValue": "0x1000100004",
         "Offcore": "1",
         "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
         "SampleAfterValue": "100003",
         "UMask": "0x1"
     },
     {
-        "BriefDescription": "OCR.ALL_DATA_RD.L3_HIT.HITM_OTHER_CORE OCR.ALL_DATA_RD.L3_HIT.HITM_OTHER_CORE OCR.ALL_DATA_RD.L3_HIT.HITM_OTHER_CORE",
+        "BriefDescription": "Counts all demand code reads  OCR.DEMAND_CODE_RD.L3_HIT_S.HIT_OTHER_CORE_FWD",
         "Counter": "0,1,2,3",
         "CounterHTOff": "0,1,2,3",
         "EventCode": "0xB7, 0xBB",
-        "EventName": "OCR.ALL_DATA_RD.L3_HIT.HITM_OTHER_CORE",
+        "EventName": "OCR.DEMAND_CODE_RD.L3_HIT_S.HIT_OTHER_CORE_FWD",
         "MSRIndex": "0x1a6,0x1a7",
-        "MSRValue": "0x10003C0491",
+        "MSRValue": "0x0800100004",
         "Offcore": "1",
         "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
         "SampleAfterValue": "100003",
         "UMask": "0x1"
     },
     {
-        "BriefDescription": "Counts L1 data cache hardware prefetch requests and software prefetch requests  OCR.PF_L1D_AND_SW.L3_HIT_M.HIT_OTHER_CORE_FWD",
+        "BriefDescription": "Counts all demand code reads  OCR.DEMAND_CODE_RD.L3_HIT_S.HIT_OTHER_CORE_NO_FWD",
         "Counter": "0,1,2,3",
         "CounterHTOff": "0,1,2,3",
         "EventCode": "0xB7, 0xBB",
-        "EventName": "OCR.PF_L1D_AND_SW.L3_HIT_M.HIT_OTHER_CORE_FWD",
+        "EventName": "OCR.DEMAND_CODE_RD.L3_HIT_S.HIT_OTHER_CORE_NO_FWD",
         "MSRIndex": "0x1a6,0x1a7",
-        "MSRValue": "0x0800040400",
+        "MSRValue": "0x0400100004",
         "Offcore": "1",
         "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
         "SampleAfterValue": "100003",
         "UMask": "0x1"
     },
     {
-        "BriefDescription": "Counts demand data reads  OCR.DEMAND_DATA_RD.L3_HIT_M.ANY_SNOOP",
+        "BriefDescription": "Counts all demand code reads  OCR.DEMAND_CODE_RD.L3_HIT_S.NO_SNOOP_NEEDED",
         "Counter": "0,1,2,3",
         "CounterHTOff": "0,1,2,3",
         "EventCode": "0xB7, 0xBB",
-        "EventName": "OCR.DEMAND_DATA_RD.L3_HIT_M.ANY_SNOOP",
+        "EventName": "OCR.DEMAND_CODE_RD.L3_HIT_S.NO_SNOOP_NEEDED",
         "MSRIndex": "0x1a6,0x1a7",
-        "MSRValue": "0x3F80040001",
+        "MSRValue": "0x0100100004",
         "Offcore": "1",
         "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
         "SampleAfterValue": "100003",
         "UMask": "0x1"
     },
     {
-        "BriefDescription": "Counts L1 data cache hardware prefetch requests and software prefetch requests  OCR.PF_L1D_AND_SW.L3_HIT_F.HIT_OTHER_CORE_FWD",
+        "BriefDescription": "Counts all demand code reads",
         "Counter": "0,1,2,3",
         "CounterHTOff": "0,1,2,3",
         "EventCode": "0xB7, 0xBB",
-        "EventName": "OCR.PF_L1D_AND_SW.L3_HIT_F.HIT_OTHER_CORE_FWD",
+        "EventName": "OCR.DEMAND_CODE_RD.L3_HIT_S.SNOOP_MISS",
         "MSRIndex": "0x1a6,0x1a7",
-        "MSRValue": "0x0800200400",
+        "MSRValue": "0x0200100004",
         "Offcore": "1",
         "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
         "SampleAfterValue": "100003",
         "UMask": "0x1"
     },
     {
-        "BriefDescription": "OCR.ALL_DATA_RD.L3_HIT.HIT_OTHER_CORE_NO_FWD OCR.ALL_DATA_RD.L3_HIT.HIT_OTHER_CORE_NO_FWD OCR.ALL_DATA_RD.L3_HIT.HIT_OTHER_CORE_NO_FWD",
+        "BriefDescription": "Counts all demand code reads",
         "Counter": "0,1,2,3",
         "CounterHTOff": "0,1,2,3",
         "EventCode": "0xB7, 0xBB",
-        "EventName": "OCR.ALL_DATA_RD.L3_HIT.HIT_OTHER_CORE_NO_FWD",
+        "EventName": "OCR.DEMAND_CODE_RD.L3_HIT_S.SNOOP_NONE",
         "MSRIndex": "0x1a6,0x1a7",
-        "MSRValue": "0x04003C0491",
+        "MSRValue": "0x0080100004",
         "Offcore": "1",
         "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
         "SampleAfterValue": "100003",
         "UMask": "0x1"
     },
     {
-        "BriefDescription": "Counts all prefetch (that bring data to LLC only) RFOs OCR.PF_L3_RFO.L3_HIT.SNOOP_MISS",
+        "BriefDescription": "Counts all demand code reads OCR.DEMAND_CODE_RD.PMM_HIT_LOCAL_PMM.ANY_SNOOP",
         "Counter": "0,1,2,3",
         "CounterHTOff": "0,1,2,3",
         "EventCode": "0xB7, 0xBB",
-        "EventName": "OCR.PF_L3_RFO.L3_HIT.SNOOP_MISS",
+        "EventName": "OCR.DEMAND_CODE_RD.PMM_HIT_LOCAL_PMM.ANY_SNOOP",
         "MSRIndex": "0x1a6,0x1a7",
-        "MSRValue": "0x02003C0100",
+        "MSRValue": "0x3F80400004",
         "Offcore": "1",
         "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
         "SampleAfterValue": "100003",
         "UMask": "0x1"
     },
     {
-        "BriefDescription": "Counts prefetch (that bring data to L2) data reads  OCR.PF_L2_DATA_RD.L3_HIT_S.NO_SNOOP_NEEDED",
+        "BriefDescription": "Counts all demand code reads OCR.DEMAND_CODE_RD.PMM_HIT_LOCAL_PMM.SNOOP_NONE",
         "Counter": "0,1,2,3",
         "CounterHTOff": "0,1,2,3",
         "EventCode": "0xB7, 0xBB",
-        "EventName": "OCR.PF_L2_DATA_RD.L3_HIT_S.NO_SNOOP_NEEDED",
+        "EventName": "OCR.DEMAND_CODE_RD.PMM_HIT_LOCAL_PMM.SNOOP_NONE",
         "MSRIndex": "0x1a6,0x1a7",
-        "MSRValue": "0x0100100010",
+        "MSRValue": "0x0080400004",
         "Offcore": "1",
         "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
         "SampleAfterValue": "100003",
         "UMask": "0x1"
     },
     {
-        "BriefDescription": "Counts all prefetch (that bring data to LLC only) RFOs  OCR.PF_L3_RFO.L3_HIT_S.HITM_OTHER_CORE",
+        "BriefDescription": "Counts all demand code reads OCR.DEMAND_CODE_RD.PMM_HIT_LOCAL_PMM.SNOOP_NOT_NEEDED",
         "Counter": "0,1,2,3",
         "CounterHTOff": "0,1,2,3",
         "EventCode": "0xB7, 0xBB",
-        "EventName": "OCR.PF_L3_RFO.L3_HIT_S.HITM_OTHER_CORE",
+        "EventName": "OCR.DEMAND_CODE_RD.PMM_HIT_LOCAL_PMM.SNOOP_NOT_NEEDED",
         "MSRIndex": "0x1a6,0x1a7",
-        "MSRValue": "0x1000100100",
+        "MSRValue": "0x0100400004",
         "Offcore": "1",
         "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
         "SampleAfterValue": "100003",
         "UMask": "0x1"
     },
     {
-        "BriefDescription": "OCR.ALL_DATA_RD.L3_HIT_E.ANY_SNOOP  OCR.ALL_DATA_RD.L3_HIT_E.ANY_SNOOP",
+        "BriefDescription": "Counts all demand code reads  OCR.DEMAND_CODE_RD.SUPPLIER_NONE.ANY_SNOOP",
         "Counter": "0,1,2,3",
         "CounterHTOff": "0,1,2,3",
         "EventCode": "0xB7, 0xBB",
-        "EventName": "OCR.ALL_DATA_RD.L3_HIT_E.ANY_SNOOP",
+        "EventName": "OCR.DEMAND_CODE_RD.SUPPLIER_NONE.ANY_SNOOP",
         "MSRIndex": "0x1a6,0x1a7",
-        "MSRValue": "0x3F80080491",
+        "MSRValue": "0x3F80020004",
         "Offcore": "1",
         "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
         "SampleAfterValue": "100003",
         "UMask": "0x1"
     },
     {
-        "BriefDescription": "Counts all prefetch (that bring data to LLC only) RFOs OCR.PF_L3_RFO.PMM_HIT_LOCAL_PMM.ANY_SNOOP",
+        "BriefDescription": "Counts all demand code reads  OCR.DEMAND_CODE_RD.SUPPLIER_NONE.HITM_OTHER_CORE",
         "Counter": "0,1,2,3",
         "CounterHTOff": "0,1,2,3",
         "EventCode": "0xB7, 0xBB",
-        "EventName": "OCR.PF_L3_RFO.PMM_HIT_LOCAL_PMM.ANY_SNOOP",
+        "EventName": "OCR.DEMAND_CODE_RD.SUPPLIER_NONE.HITM_OTHER_CORE",
         "MSRIndex": "0x1a6,0x1a7",
-        "MSRValue": "0x3F80400100",
+        "MSRValue": "0x1000020004",
         "Offcore": "1",
         "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
         "SampleAfterValue": "100003",
         "UMask": "0x1"
     },
     {
-        "BriefDescription": "OCR.ALL_READS.L3_HIT_F.HITM_OTHER_CORE  OCR.ALL_READS.L3_HIT_F.HITM_OTHER_CORE",
+        "BriefDescription": "Counts all demand code reads  OCR.DEMAND_CODE_RD.SUPPLIER_NONE.HIT_OTHER_CORE_FWD",
         "Counter": "0,1,2,3",
         "CounterHTOff": "0,1,2,3",
         "EventCode": "0xB7, 0xBB",
-        "EventName": "OCR.ALL_READS.L3_HIT_F.HITM_OTHER_CORE",
+        "EventName": "OCR.DEMAND_CODE_RD.SUPPLIER_NONE.HIT_OTHER_CORE_FWD",
         "MSRIndex": "0x1a6,0x1a7",
-        "MSRValue": "0x10002007F7",
+        "MSRValue": "0x0800020004",
         "Offcore": "1",
         "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
         "SampleAfterValue": "100003",
         "UMask": "0x1"
     },
     {
-        "BriefDescription": "Counts L1 data cache hardware prefetch requests and software prefetch requests OCR.PF_L1D_AND_SW.L3_HIT.HIT_OTHER_CORE_NO_FWD OCR.PF_L1D_AND_SW.L3_HIT.HIT_OTHER_CORE_NO_FWD",
+        "BriefDescription": "Counts all demand code reads  OCR.DEMAND_CODE_RD.SUPPLIER_NONE.HIT_OTHER_CORE_NO_FWD",
         "Counter": "0,1,2,3",
         "CounterHTOff": "0,1,2,3",
         "EventCode": "0xB7, 0xBB",
-        "EventName": "OCR.PF_L1D_AND_SW.L3_HIT.HIT_OTHER_CORE_NO_FWD",
+        "EventName": "OCR.DEMAND_CODE_RD.SUPPLIER_NONE.HIT_OTHER_CORE_NO_FWD",
         "MSRIndex": "0x1a6,0x1a7",
-        "MSRValue": "0x04003C0400",
+        "MSRValue": "0x0400020004",
         "Offcore": "1",
         "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
         "SampleAfterValue": "100003",
         "UMask": "0x1"
     },
     {
-        "BriefDescription": "OCR.ALL_PF_RFO.L3_HIT.HITM_OTHER_CORE OCR.ALL_PF_RFO.L3_HIT.HITM_OTHER_CORE OCR.ALL_PF_RFO.L3_HIT.HITM_OTHER_CORE",
+        "BriefDescription": "Counts all demand code reads  OCR.DEMAND_CODE_RD.SUPPLIER_NONE.NO_SNOOP_NEEDED",
         "Counter": "0,1,2,3",
         "CounterHTOff": "0,1,2,3",
         "EventCode": "0xB7, 0xBB",
-        "EventName": "OCR.ALL_PF_RFO.L3_HIT.HITM_OTHER_CORE",
+        "EventName": "OCR.DEMAND_CODE_RD.SUPPLIER_NONE.NO_SNOOP_NEEDED",
         "MSRIndex": "0x1a6,0x1a7",
-        "MSRValue": "0x10003C0120",
+        "MSRValue": "0x0100020004",
         "Offcore": "1",
         "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
         "SampleAfterValue": "100003",
         "UMask": "0x1"
     },
     {
-        "BriefDescription": "Counts any other requests",
+        "BriefDescription": "Counts all demand code reads",
         "Counter": "0,1,2,3",
         "CounterHTOff": "0,1,2,3",
         "EventCode": "0xB7, 0xBB",
-        "EventName": "OCR.OTHER.L3_HIT_S.SNOOP_MISS",
+        "EventName": "OCR.DEMAND_CODE_RD.SUPPLIER_NONE.SNOOP_MISS",
         "MSRIndex": "0x1a6,0x1a7",
-        "MSRValue": "0x0200108000",
+        "MSRValue": "0x0200020004",
         "Offcore": "1",
         "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
         "SampleAfterValue": "100003",
         "UMask": "0x1"
     },
     {
-        "BriefDescription": "Counts all prefetch (that bring data to LLC only) RFOs OCR.PF_L3_RFO.L3_HIT.HIT_OTHER_CORE_FWD OCR.PF_L3_RFO.L3_HIT.HIT_OTHER_CORE_FWD",
+        "BriefDescription": "Counts all demand code reads",
         "Counter": "0,1,2,3",
         "CounterHTOff": "0,1,2,3",
         "EventCode": "0xB7, 0xBB",
-        "EventName": "OCR.PF_L3_RFO.L3_HIT.HIT_OTHER_CORE_FWD",
+        "EventName": "OCR.DEMAND_CODE_RD.SUPPLIER_NONE.SNOOP_NONE",
         "MSRIndex": "0x1a6,0x1a7",
-        "MSRValue": "0x08003C0100",
+        "MSRValue": "0x0080020004",
         "Offcore": "1",
         "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
         "SampleAfterValue": "100003",
         "UMask": "0x1"
     },
     {
-        "BriefDescription": "Counts L1 data cache hardware prefetch requests and software prefetch requests  OCR.PF_L1D_AND_SW.SUPPLIER_NONE.ANY_SNOOP",
+        "BriefDescription": "Counts demand data reads have any response type.",
         "Counter": "0,1,2,3",
         "CounterHTOff": "0,1,2,3",
         "EventCode": "0xB7, 0xBB",
-        "EventName": "OCR.PF_L1D_AND_SW.SUPPLIER_NONE.ANY_SNOOP",
+        "EventName": "OCR.DEMAND_DATA_RD.ANY_RESPONSE",
         "MSRIndex": "0x1a6,0x1a7",
-        "MSRValue": "0x3F80020400",
+        "MSRValue": "0x0000010001",
         "Offcore": "1",
         "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
         "SampleAfterValue": "100003",
         "UMask": "0x1"
     },
     {
-        "BriefDescription": "Counts prefetch (that bring data to L2) data reads have any response type.",
+        "BriefDescription": "Counts demand data reads OCR.DEMAND_DATA_RD.L3_HIT.ANY_SNOOP OCR.DEMAND_DATA_RD.L3_HIT.ANY_SNOOP",
         "Counter": "0,1,2,3",
         "CounterHTOff": "0,1,2,3",
         "EventCode": "0xB7, 0xBB",
-        "EventName": "OCR.PF_L2_DATA_RD.ANY_RESPONSE",
+        "EventName": "OCR.DEMAND_DATA_RD.L3_HIT.ANY_SNOOP",
         "MSRIndex": "0x1a6,0x1a7",
-        "MSRValue": "0x0000010010",
+        "MSRValue": "0x3F803C0001",
         "Offcore": "1",
         "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
         "SampleAfterValue": "100003",
         "UMask": "0x1"
     },
     {
-        "BriefDescription": "Counts all demand data writes (RFOs)  OCR.DEMAND_RFO.L3_HIT_S.HIT_OTHER_CORE_NO_FWD",
+        "BriefDescription": "Counts demand data reads OCR.DEMAND_DATA_RD.L3_HIT.HITM_OTHER_CORE OCR.DEMAND_DATA_RD.L3_HIT.HITM_OTHER_CORE",
         "Counter": "0,1,2,3",
         "CounterHTOff": "0,1,2,3",
         "EventCode": "0xB7, 0xBB",
-        "EventName": "OCR.DEMAND_RFO.L3_HIT_S.HIT_OTHER_CORE_NO_FWD",
+        "EventName": "OCR.DEMAND_DATA_RD.L3_HIT.HITM_OTHER_CORE",
         "MSRIndex": "0x1a6,0x1a7",
-        "MSRValue": "0x0400100002",
+        "MSRValue": "0x10003C0001",
         "Offcore": "1",
         "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
         "SampleAfterValue": "100003",
         "UMask": "0x1"
     },
     {
-        "BriefDescription": "OCR.ALL_DATA_RD.L3_HIT_M.HITM_OTHER_CORE  OCR.ALL_DATA_RD.L3_HIT_M.HITM_OTHER_CORE",
+        "BriefDescription": "Counts demand data reads OCR.DEMAND_DATA_RD.L3_HIT.HIT_OTHER_CORE_FWD OCR.DEMAND_DATA_RD.L3_HIT.HIT_OTHER_CORE_FWD",
         "Counter": "0,1,2,3",
         "CounterHTOff": "0,1,2,3",
         "EventCode": "0xB7, 0xBB",
-        "EventName": "OCR.ALL_DATA_RD.L3_HIT_M.HITM_OTHER_CORE",
+        "EventName": "OCR.DEMAND_DATA_RD.L3_HIT.HIT_OTHER_CORE_FWD",
         "MSRIndex": "0x1a6,0x1a7",
-        "MSRValue": "0x1000040491",
+        "MSRValue": "0x08003C0001",
         "Offcore": "1",
         "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
         "SampleAfterValue": "100003",
         "UMask": "0x1"
     },
     {
-        "BriefDescription": "Counts number of cache lines that are allocated and written back to L3 with the intention that they are more likely to be reused shortly",
-        "Counter": "0,1,2,3",
-        "CounterHTOff": "0,1,2,3,4,5,6,7",
-        "EventCode": "0xFE",
-        "EventName": "IDI_MISC.WB_UPGRADE",
-        "PublicDescription": "Counts number of cache lines that are allocated and written back to L3 with the intention that they are more likely to be reused shortly.",
-        "SampleAfterValue": "100003",
-        "UMask": "0x2"
-    },
-    {
-        "BriefDescription": "Counts prefetch (that bring data to L2) data reads  OCR.PF_L2_DATA_RD.L3_HIT_F.HIT_OTHER_CORE_FWD",
+        "BriefDescription": "Counts demand data reads OCR.DEMAND_DATA_RD.L3_HIT.HIT_OTHER_CORE_NO_FWD OCR.DEMAND_DATA_RD.L3_HIT.HIT_OTHER_CORE_NO_FWD",
         "Counter": "0,1,2,3",
         "CounterHTOff": "0,1,2,3",
         "EventCode": "0xB7, 0xBB",
-        "EventName": "OCR.PF_L2_DATA_RD.L3_HIT_F.HIT_OTHER_CORE_FWD",
+        "EventName": "OCR.DEMAND_DATA_RD.L3_HIT.HIT_OTHER_CORE_NO_FWD",
         "MSRIndex": "0x1a6,0x1a7",
-        "MSRValue": "0x0800200010",
+        "MSRValue": "0x04003C0001",
         "Offcore": "1",
         "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
         "SampleAfterValue": "100003",
         "UMask": "0x1"
     },
     {
-        "BriefDescription": "Counts all demand code reads",
+        "BriefDescription": "Counts demand data reads OCR.DEMAND_DATA_RD.L3_HIT.NO_SNOOP_NEEDED OCR.DEMAND_DATA_RD.L3_HIT.NO_SNOOP_NEEDED",
         "Counter": "0,1,2,3",
         "CounterHTOff": "0,1,2,3",
         "EventCode": "0xB7, 0xBB",
-        "EventName": "OCR.DEMAND_CODE_RD.L3_HIT.SNOOP_HIT_WITH_FWD",
+        "EventName": "OCR.DEMAND_DATA_RD.L3_HIT.NO_SNOOP_NEEDED",
         "MSRIndex": "0x1a6,0x1a7",
-        "MSRValue": "0x08007C0004",
+        "MSRValue": "0x01003C0001",
         "Offcore": "1",
         "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
         "SampleAfterValue": "100003",
         "UMask": "0x1"
     },
     {
-        "BriefDescription": "Counts any other requests  OCR.OTHER.L3_HIT_M.NO_SNOOP_NEEDED",
+        "BriefDescription": "Counts demand data reads",
         "Counter": "0,1,2,3",
         "CounterHTOff": "0,1,2,3",
         "EventCode": "0xB7, 0xBB",
-        "EventName": "OCR.OTHER.L3_HIT_M.NO_SNOOP_NEEDED",
+        "EventName": "OCR.DEMAND_DATA_RD.L3_HIT.SNOOP_HIT_WITH_FWD",
         "MSRIndex": "0x1a6,0x1a7",
-        "MSRValue": "0x0100048000",
+        "MSRValue": "0x08007C0001",
         "Offcore": "1",
         "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
         "SampleAfterValue": "100003",
         "UMask": "0x1"
     },
     {
-        "BriefDescription": "OCR.ALL_PF_DATA_RD.L3_HIT.SNOOP_HIT_WITH_FWD",
+        "BriefDescription": "Counts demand data reads OCR.DEMAND_DATA_RD.L3_HIT.SNOOP_MISS",
         "Counter": "0,1,2,3",
         "CounterHTOff": "0,1,2,3",
         "EventCode": "0xB7, 0xBB",
-        "EventName": "OCR.ALL_PF_DATA_RD.L3_HIT.SNOOP_HIT_WITH_FWD",
+        "EventName": "OCR.DEMAND_DATA_RD.L3_HIT.SNOOP_MISS",
         "MSRIndex": "0x1a6,0x1a7",
-        "MSRValue": "0x08007C0490",
+        "MSRValue": "0x02003C0001",
         "Offcore": "1",
         "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
         "SampleAfterValue": "100003",
         "UMask": "0x1"
     },
     {
-        "BriefDescription": "OCR.ALL_PF_DATA_RD.L3_HIT_M.NO_SNOOP_NEEDED  OCR.ALL_PF_DATA_RD.L3_HIT_M.NO_SNOOP_NEEDED",
+        "BriefDescription": "Counts demand data reads OCR.DEMAND_DATA_RD.L3_HIT.SNOOP_NONE",
         "Counter": "0,1,2,3",
         "CounterHTOff": "0,1,2,3",
         "EventCode": "0xB7, 0xBB",
-        "EventName": "OCR.ALL_PF_DATA_RD.L3_HIT_M.NO_SNOOP_NEEDED",
+        "EventName": "OCR.DEMAND_DATA_RD.L3_HIT.SNOOP_NONE",
         "MSRIndex": "0x1a6,0x1a7",
-        "MSRValue": "0x0100040490",
+        "MSRValue": "0x00803C0001",
         "Offcore": "1",
         "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
         "SampleAfterValue": "100003",
         "UMask": "0x1"
     },
     {
-        "BriefDescription": "Counts L1 data cache hardware prefetch requests and software prefetch requests  OCR.PF_L1D_AND_SW.L3_HIT_F.ANY_SNOOP",
+        "BriefDescription": "Counts demand data reads  OCR.DEMAND_DATA_RD.L3_HIT_E.ANY_SNOOP",
         "Counter": "0,1,2,3",
         "CounterHTOff": "0,1,2,3",
         "EventCode": "0xB7, 0xBB",
-        "EventName": "OCR.PF_L1D_AND_SW.L3_HIT_F.ANY_SNOOP",
+        "EventName": "OCR.DEMAND_DATA_RD.L3_HIT_E.ANY_SNOOP",
         "MSRIndex": "0x1a6,0x1a7",
-        "MSRValue": "0x3F80200400",
+        "MSRValue": "0x3F80080001",
         "Offcore": "1",
         "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
         "SampleAfterValue": "100003",
         "UMask": "0x1"
     },
     {
-        "BriefDescription": "OCR.ALL_DATA_RD.L3_HIT_F.SNOOP_NONE",
+        "BriefDescription": "Counts demand data reads  OCR.DEMAND_DATA_RD.L3_HIT_E.HITM_OTHER_CORE",
         "Counter": "0,1,2,3",
         "CounterHTOff": "0,1,2,3",
         "EventCode": "0xB7, 0xBB",
-        "EventName": "OCR.ALL_DATA_RD.L3_HIT_F.SNOOP_NONE",
+        "EventName": "OCR.DEMAND_DATA_RD.L3_HIT_E.HITM_OTHER_CORE",
         "MSRIndex": "0x1a6,0x1a7",
-        "MSRValue": "0x0080200491",
+        "MSRValue": "0x1000080001",
         "Offcore": "1",
         "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
         "SampleAfterValue": "100003",
         "UMask": "0x1"
     },
     {
-        "BriefDescription": "Counts prefetch (that bring data to L2) data reads",
+        "BriefDescription": "Counts demand data reads  OCR.DEMAND_DATA_RD.L3_HIT_E.HIT_OTHER_CORE_FWD",
         "Counter": "0,1,2,3",
         "CounterHTOff": "0,1,2,3",
         "EventCode": "0xB7, 0xBB",
-        "EventName": "OCR.PF_L2_DATA_RD.L3_HIT_S.SNOOP_MISS",
+        "EventName": "OCR.DEMAND_DATA_RD.L3_HIT_E.HIT_OTHER_CORE_FWD",
         "MSRIndex": "0x1a6,0x1a7",
-        "MSRValue": "0x0200100010",
+        "MSRValue": "0x0800080001",
         "Offcore": "1",
         "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
         "SampleAfterValue": "100003",
         "UMask": "0x1"
     },
     {
-        "BriefDescription": "OCR.ALL_PF_RFO.L3_HIT_F.NO_SNOOP_NEEDED  OCR.ALL_PF_RFO.L3_HIT_F.NO_SNOOP_NEEDED",
+        "BriefDescription": "Counts demand data reads  OCR.DEMAND_DATA_RD.L3_HIT_E.HIT_OTHER_CORE_NO_FWD",
         "Counter": "0,1,2,3",
         "CounterHTOff": "0,1,2,3",
         "EventCode": "0xB7, 0xBB",
-        "EventName": "OCR.ALL_PF_RFO.L3_HIT_F.NO_SNOOP_NEEDED",
+        "EventName": "OCR.DEMAND_DATA_RD.L3_HIT_E.HIT_OTHER_CORE_NO_FWD",
         "MSRIndex": "0x1a6,0x1a7",
-        "MSRValue": "0x0100200120",
+        "MSRValue": "0x0400080001",
         "Offcore": "1",
         "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
         "SampleAfterValue": "100003",
         "UMask": "0x1"
     },
     {
-        "BriefDescription": "Counts all demand code reads  OCR.DEMAND_CODE_RD.L3_HIT_S.NO_SNOOP_NEEDED",
+        "BriefDescription": "Counts demand data reads  OCR.DEMAND_DATA_RD.L3_HIT_E.NO_SNOOP_NEEDED",
         "Counter": "0,1,2,3",
         "CounterHTOff": "0,1,2,3",
         "EventCode": "0xB7, 0xBB",
-        "EventName": "OCR.DEMAND_CODE_RD.L3_HIT_S.NO_SNOOP_NEEDED",
+        "EventName": "OCR.DEMAND_DATA_RD.L3_HIT_E.NO_SNOOP_NEEDED",
         "MSRIndex": "0x1a6,0x1a7",
-        "MSRValue": "0x0100100004",
+        "MSRValue": "0x0100080001",
         "Offcore": "1",
         "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
         "SampleAfterValue": "100003",
         "UMask": "0x1"
     },
     {
-        "BriefDescription": "Counts prefetch (that bring data to L2) data reads",
+        "BriefDescription": "Counts demand data reads",
         "Counter": "0,1,2,3",
         "CounterHTOff": "0,1,2,3",
         "EventCode": "0xB7, 0xBB",
-        "EventName": "OCR.PF_L2_DATA_RD.L3_HIT_M.SNOOP_NONE",
+        "EventName": "OCR.DEMAND_DATA_RD.L3_HIT_E.SNOOP_MISS",
         "MSRIndex": "0x1a6,0x1a7",
-        "MSRValue": "0x0080040010",
+        "MSRValue": "0x0200080001",
         "Offcore": "1",
         "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
         "SampleAfterValue": "100003",
         "UMask": "0x1"
     },
     {
-        "BriefDescription": "Counts all prefetch (that bring data to L2) RFOs OCR.PF_L2_RFO.L3_HIT.HIT_OTHER_CORE_NO_FWD OCR.PF_L2_RFO.L3_HIT.HIT_OTHER_CORE_NO_FWD",
+        "BriefDescription": "Counts demand data reads",
         "Counter": "0,1,2,3",
         "CounterHTOff": "0,1,2,3",
         "EventCode": "0xB7, 0xBB",
-        "EventName": "OCR.PF_L2_RFO.L3_HIT.HIT_OTHER_CORE_NO_FWD",
+        "EventName": "OCR.DEMAND_DATA_RD.L3_HIT_E.SNOOP_NONE",
         "MSRIndex": "0x1a6,0x1a7",
-        "MSRValue": "0x04003C0020",
+        "MSRValue": "0x0080080001",
         "Offcore": "1",
         "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
         "SampleAfterValue": "100003",
         "UMask": "0x1"
     },
     {
-        "BriefDescription": "Counts prefetch (that bring data to L2) data reads OCR.PF_L2_DATA_RD.L3_HIT.SNOOP_MISS",
+        "BriefDescription": "Counts demand data reads  OCR.DEMAND_DATA_RD.L3_HIT_F.ANY_SNOOP",
         "Counter": "0,1,2,3",
         "CounterHTOff": "0,1,2,3",
         "EventCode": "0xB7, 0xBB",
-        "EventName": "OCR.PF_L2_DATA_RD.L3_HIT.SNOOP_MISS",
+        "EventName": "OCR.DEMAND_DATA_RD.L3_HIT_F.ANY_SNOOP",
         "MSRIndex": "0x1a6,0x1a7",
-        "MSRValue": "0x02003C0010",
+        "MSRValue": "0x3F80200001",
         "Offcore": "1",
         "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
         "SampleAfterValue": "100003",
         "UMask": "0x1"
     },
     {
-        "BriefDescription": "Counts L1 data cache hardware prefetch requests and software prefetch requests OCR.PF_L1D_AND_SW.PMM_HIT_LOCAL_PMM.SNOOP_NOT_NEEDED",
+        "BriefDescription": "Counts demand data reads  OCR.DEMAND_DATA_RD.L3_HIT_F.HITM_OTHER_CORE",
         "Counter": "0,1,2,3",
         "CounterHTOff": "0,1,2,3",
         "EventCode": "0xB7, 0xBB",
-        "EventName": "OCR.PF_L1D_AND_SW.PMM_HIT_LOCAL_PMM.SNOOP_NOT_NEEDED",
+        "EventName": "OCR.DEMAND_DATA_RD.L3_HIT_F.HITM_OTHER_CORE",
         "MSRIndex": "0x1a6,0x1a7",
-        "MSRValue": "0x0100400400",
+        "MSRValue": "0x1000200001",
         "Offcore": "1",
         "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
         "SampleAfterValue": "100003",
         "UMask": "0x1"
     },
     {
-        "BriefDescription": "Counts all prefetch (that bring data to L2) RFOs OCR.PF_L2_RFO.PMM_HIT_LOCAL_PMM.ANY_SNOOP",
+        "BriefDescription": "Counts demand data reads  OCR.DEMAND_DATA_RD.L3_HIT_F.HIT_OTHER_CORE_FWD",
         "Counter": "0,1,2,3",
         "CounterHTOff": "0,1,2,3",
         "EventCode": "0xB7, 0xBB",
-        "EventName": "OCR.PF_L2_RFO.PMM_HIT_LOCAL_PMM.ANY_SNOOP",
+        "EventName": "OCR.DEMAND_DATA_RD.L3_HIT_F.HIT_OTHER_CORE_FWD",
         "MSRIndex": "0x1a6,0x1a7",
-        "MSRValue": "0x3F80400020",
+        "MSRValue": "0x0800200001",
         "Offcore": "1",
         "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
         "SampleAfterValue": "100003",
         "UMask": "0x1"
     },
     {
-        "BriefDescription": "OCR.ALL_READS.L3_HIT.HITM_OTHER_CORE OCR.ALL_READS.L3_HIT.HITM_OTHER_CORE OCR.ALL_READS.L3_HIT.HITM_OTHER_CORE",
+        "BriefDescription": "Counts demand data reads  OCR.DEMAND_DATA_RD.L3_HIT_F.HIT_OTHER_CORE_NO_FWD",
         "Counter": "0,1,2,3",
         "CounterHTOff": "0,1,2,3",
         "EventCode": "0xB7, 0xBB",
-        "EventName": "OCR.ALL_READS.L3_HIT.HITM_OTHER_CORE",
+        "EventName": "OCR.DEMAND_DATA_RD.L3_HIT_F.HIT_OTHER_CORE_NO_FWD",
         "MSRIndex": "0x1a6,0x1a7",
-        "MSRValue": "0x10003C07F7",
+        "MSRValue": "0x0400200001",
         "Offcore": "1",
         "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
         "SampleAfterValue": "100003",
         "UMask": "0x1"
     },
     {
-        "BriefDescription": "Counts L1 data cache hardware prefetch requests and software prefetch requests  OCR.PF_L1D_AND_SW.L3_HIT_S.NO_SNOOP_NEEDED",
+        "BriefDescription": "Counts demand data reads  OCR.DEMAND_DATA_RD.L3_HIT_F.NO_SNOOP_NEEDED",
         "Counter": "0,1,2,3",
         "CounterHTOff": "0,1,2,3",
         "EventCode": "0xB7, 0xBB",
-        "EventName": "OCR.PF_L1D_AND_SW.L3_HIT_S.NO_SNOOP_NEEDED",
+        "EventName": "OCR.DEMAND_DATA_RD.L3_HIT_F.NO_SNOOP_NEEDED",
         "MSRIndex": "0x1a6,0x1a7",
-        "MSRValue": "0x0100100400",
+        "MSRValue": "0x0100200001",
         "Offcore": "1",
         "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
         "SampleAfterValue": "100003",
         "UMask": "0x1"
     },
     {
-        "BriefDescription": "Counts all demand code reads  OCR.DEMAND_CODE_RD.L3_HIT_M.NO_SNOOP_NEEDED",
+        "BriefDescription": "Counts demand data reads",
         "Counter": "0,1,2,3",
         "CounterHTOff": "0,1,2,3",
         "EventCode": "0xB7, 0xBB",
-        "EventName": "OCR.DEMAND_CODE_RD.L3_HIT_M.NO_SNOOP_NEEDED",
+        "EventName": "OCR.DEMAND_DATA_RD.L3_HIT_F.SNOOP_MISS",
         "MSRIndex": "0x1a6,0x1a7",
-        "MSRValue": "0x0100040004",
+        "MSRValue": "0x0200200001",
         "Offcore": "1",
         "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
         "SampleAfterValue": "100003",
         "UMask": "0x1"
     },
     {
-        "BriefDescription": "OCR.ALL_READS.L3_HIT.HIT_OTHER_CORE_NO_FWD OCR.ALL_READS.L3_HIT.HIT_OTHER_CORE_NO_FWD OCR.ALL_READS.L3_HIT.HIT_OTHER_CORE_NO_FWD",
+        "BriefDescription": "Counts demand data reads",
         "Counter": "0,1,2,3",
         "CounterHTOff": "0,1,2,3",
         "EventCode": "0xB7, 0xBB",
-        "EventName": "OCR.ALL_READS.L3_HIT.HIT_OTHER_CORE_NO_FWD",
+        "EventName": "OCR.DEMAND_DATA_RD.L3_HIT_F.SNOOP_NONE",
         "MSRIndex": "0x1a6,0x1a7",
-        "MSRValue": "0x04003C07F7",
+        "MSRValue": "0x0080200001",
         "Offcore": "1",
         "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
         "SampleAfterValue": "100003",
         "UMask": "0x1"
     },
     {
-        "BriefDescription": "OCR.ALL_PF_DATA_RD.L3_HIT_F.NO_SNOOP_NEEDED  OCR.ALL_PF_DATA_RD.L3_HIT_F.NO_SNOOP_NEEDED",
+        "BriefDescription": "Counts demand data reads  OCR.DEMAND_DATA_RD.L3_HIT_M.ANY_SNOOP",
         "Counter": "0,1,2,3",
         "CounterHTOff": "0,1,2,3",
         "EventCode": "0xB7, 0xBB",
-        "EventName": "OCR.ALL_PF_DATA_RD.L3_HIT_F.NO_SNOOP_NEEDED",
+        "EventName": "OCR.DEMAND_DATA_RD.L3_HIT_M.ANY_SNOOP",
         "MSRIndex": "0x1a6,0x1a7",
-        "MSRValue": "0x0100200490",
+        "MSRValue": "0x3F80040001",
         "Offcore": "1",
         "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
         "SampleAfterValue": "100003",
         "UMask": "0x1"
     },
     {
-        "BriefDescription": "Counts any other requests  OCR.OTHER.SUPPLIER_NONE.NO_SNOOP_NEEDED",
+        "BriefDescription": "Counts demand data reads  OCR.DEMAND_DATA_RD.L3_HIT_M.HITM_OTHER_CORE",
         "Counter": "0,1,2,3",
         "CounterHTOff": "0,1,2,3",
         "EventCode": "0xB7, 0xBB",
-        "EventName": "OCR.OTHER.SUPPLIER_NONE.NO_SNOOP_NEEDED",
+        "EventName": "OCR.DEMAND_DATA_RD.L3_HIT_M.HITM_OTHER_CORE",
         "MSRIndex": "0x1a6,0x1a7",
-        "MSRValue": "0x0100028000",
+        "MSRValue": "0x1000040001",
         "Offcore": "1",
         "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
         "SampleAfterValue": "100003",
         "UMask": "0x1"
     },
     {
-        "BriefDescription": "Counts all prefetch (that bring data to LLC only) data reads  OCR.PF_L3_DATA_RD.L3_HIT_E.ANY_SNOOP",
+        "BriefDescription": "Counts demand data reads  OCR.DEMAND_DATA_RD.L3_HIT_M.HIT_OTHER_CORE_FWD",
         "Counter": "0,1,2,3",
         "CounterHTOff": "0,1,2,3",
         "EventCode": "0xB7, 0xBB",
-        "EventName": "OCR.PF_L3_DATA_RD.L3_HIT_E.ANY_SNOOP",
+        "EventName": "OCR.DEMAND_DATA_RD.L3_HIT_M.HIT_OTHER_CORE_FWD",
         "MSRIndex": "0x1a6,0x1a7",
-        "MSRValue": "0x3F80080080",
+        "MSRValue": "0x0800040001",
         "Offcore": "1",
         "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
         "SampleAfterValue": "100003",
         "UMask": "0x1"
     },
     {
-        "BriefDescription": "Counts any other requests OCR.OTHER.PMM_HIT_LOCAL_PMM.SNOOP_NONE",
+        "BriefDescription": "Counts demand data reads  OCR.DEMAND_DATA_RD.L3_HIT_M.HIT_OTHER_CORE_NO_FWD",
         "Counter": "0,1,2,3",
         "CounterHTOff": "0,1,2,3",
         "EventCode": "0xB7, 0xBB",
-        "EventName": "OCR.OTHER.PMM_HIT_LOCAL_PMM.SNOOP_NONE",
+        "EventName": "OCR.DEMAND_DATA_RD.L3_HIT_M.HIT_OTHER_CORE_NO_FWD",
         "MSRIndex": "0x1a6,0x1a7",
-        "MSRValue": "0x0080408000",
+        "MSRValue": "0x0400040001",
         "Offcore": "1",
         "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
         "SampleAfterValue": "100003",
         "UMask": "0x1"
     },
     {
-        "BriefDescription": "OCR.ALL_READS.L3_HIT_S.HIT_OTHER_CORE_FWD  OCR.ALL_READS.L3_HIT_S.HIT_OTHER_CORE_FWD",
+        "BriefDescription": "Counts demand data reads  OCR.DEMAND_DATA_RD.L3_HIT_M.NO_SNOOP_NEEDED",
         "Counter": "0,1,2,3",
         "CounterHTOff": "0,1,2,3",
         "EventCode": "0xB7, 0xBB",
-        "EventName": "OCR.ALL_READS.L3_HIT_S.HIT_OTHER_CORE_FWD",
+        "EventName": "OCR.DEMAND_DATA_RD.L3_HIT_M.NO_SNOOP_NEEDED",
         "MSRIndex": "0x1a6,0x1a7",
-        "MSRValue": "0x08001007F7",
+        "MSRValue": "0x0100040001",
         "Offcore": "1",
         "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
         "SampleAfterValue": "100003",
         "UMask": "0x1"
     },
     {
-        "BriefDescription": "Number of PREFETCHT1 or PREFETCHT2 instructions executed.",
-        "Counter": "0,1,2,3",
-        "CounterHTOff": "0,1,2,3,4,5,6,7",
-        "EventCode": "0x32",
-        "EventName": "SW_PREFETCH_ACCESS.T1_T2",
-        "SampleAfterValue": "2000003",
-        "UMask": "0x4"
-    },
-    {
-        "BriefDescription": "Counts prefetch (that bring data to L2) data reads OCR.PF_L2_DATA_RD.PMM_HIT_LOCAL_PMM.SNOOP_NOT_NEEDED",
+        "BriefDescription": "Counts demand data reads",
         "Counter": "0,1,2,3",
         "CounterHTOff": "0,1,2,3",
         "EventCode": "0xB7, 0xBB",
-        "EventName": "OCR.PF_L2_DATA_RD.PMM_HIT_LOCAL_PMM.SNOOP_NOT_NEEDED",
+        "EventName": "OCR.DEMAND_DATA_RD.L3_HIT_M.SNOOP_MISS",
         "MSRIndex": "0x1a6,0x1a7",
-        "MSRValue": "0x0100400010",
+        "MSRValue": "0x0200040001",
         "Offcore": "1",
         "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
         "SampleAfterValue": "100003",
@@ -4109,945 +4117,945 @@
         "Counter": "0,1,2,3",
         "CounterHTOff": "0,1,2,3",
         "EventCode": "0xB7, 0xBB",
-        "EventName": "OCR.DEMAND_DATA_RD.SUPPLIER_NONE.SNOOP_NONE",
+        "EventName": "OCR.DEMAND_DATA_RD.L3_HIT_M.SNOOP_NONE",
         "MSRIndex": "0x1a6,0x1a7",
-        "MSRValue": "0x0080020001",
+        "MSRValue": "0x0080040001",
         "Offcore": "1",
         "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
         "SampleAfterValue": "100003",
         "UMask": "0x1"
     },
     {
-        "BriefDescription": "Counts all prefetch (that bring data to L2) RFOs  OCR.PF_L2_RFO.L3_HIT_M.ANY_SNOOP",
+        "BriefDescription": "Counts demand data reads  OCR.DEMAND_DATA_RD.L3_HIT_S.ANY_SNOOP",
         "Counter": "0,1,2,3",
         "CounterHTOff": "0,1,2,3",
         "EventCode": "0xB7, 0xBB",
-        "EventName": "OCR.PF_L2_RFO.L3_HIT_M.ANY_SNOOP",
+        "EventName": "OCR.DEMAND_DATA_RD.L3_HIT_S.ANY_SNOOP",
         "MSRIndex": "0x1a6,0x1a7",
-        "MSRValue": "0x3F80040020",
+        "MSRValue": "0x3F80100001",
         "Offcore": "1",
         "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
         "SampleAfterValue": "100003",
         "UMask": "0x1"
     },
     {
-        "BriefDescription": "Counts all prefetch (that bring data to L2) RFOs  OCR.PF_L2_RFO.SUPPLIER_NONE.HITM_OTHER_CORE",
+        "BriefDescription": "Counts demand data reads  OCR.DEMAND_DATA_RD.L3_HIT_S.HITM_OTHER_CORE",
         "Counter": "0,1,2,3",
         "CounterHTOff": "0,1,2,3",
         "EventCode": "0xB7, 0xBB",
-        "EventName": "OCR.PF_L2_RFO.SUPPLIER_NONE.HITM_OTHER_CORE",
+        "EventName": "OCR.DEMAND_DATA_RD.L3_HIT_S.HITM_OTHER_CORE",
         "MSRIndex": "0x1a6,0x1a7",
-        "MSRValue": "0x1000020020",
+        "MSRValue": "0x1000100001",
         "Offcore": "1",
         "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
         "SampleAfterValue": "100003",
         "UMask": "0x1"
     },
     {
-        "BriefDescription": "OCR.ALL_DATA_RD.L3_HIT_M.HIT_OTHER_CORE_FWD  OCR.ALL_DATA_RD.L3_HIT_M.HIT_OTHER_CORE_FWD",
+        "BriefDescription": "Counts demand data reads  OCR.DEMAND_DATA_RD.L3_HIT_S.HIT_OTHER_CORE_FWD",
         "Counter": "0,1,2,3",
         "CounterHTOff": "0,1,2,3",
         "EventCode": "0xB7, 0xBB",
-        "EventName": "OCR.ALL_DATA_RD.L3_HIT_M.HIT_OTHER_CORE_FWD",
+        "EventName": "OCR.DEMAND_DATA_RD.L3_HIT_S.HIT_OTHER_CORE_FWD",
         "MSRIndex": "0x1a6,0x1a7",
-        "MSRValue": "0x0800040491",
+        "MSRValue": "0x0800100001",
         "Offcore": "1",
         "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
         "SampleAfterValue": "100003",
         "UMask": "0x1"
     },
     {
-        "BriefDescription": "Counts all prefetch (that bring data to L2) RFOs  OCR.PF_L2_RFO.L3_HIT_F.NO_SNOOP_NEEDED",
+        "BriefDescription": "Counts demand data reads  OCR.DEMAND_DATA_RD.L3_HIT_S.HIT_OTHER_CORE_NO_FWD",
         "Counter": "0,1,2,3",
         "CounterHTOff": "0,1,2,3",
         "EventCode": "0xB7, 0xBB",
-        "EventName": "OCR.PF_L2_RFO.L3_HIT_F.NO_SNOOP_NEEDED",
+        "EventName": "OCR.DEMAND_DATA_RD.L3_HIT_S.HIT_OTHER_CORE_NO_FWD",
         "MSRIndex": "0x1a6,0x1a7",
-        "MSRValue": "0x0100200020",
+        "MSRValue": "0x0400100001",
         "Offcore": "1",
         "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
         "SampleAfterValue": "100003",
         "UMask": "0x1"
     },
     {
-        "BriefDescription": "OCR.ALL_RFO.L3_HIT_F.SNOOP_MISS",
+        "BriefDescription": "Counts demand data reads  OCR.DEMAND_DATA_RD.L3_HIT_S.NO_SNOOP_NEEDED",
         "Counter": "0,1,2,3",
         "CounterHTOff": "0,1,2,3",
         "EventCode": "0xB7, 0xBB",
-        "EventName": "OCR.ALL_RFO.L3_HIT_F.SNOOP_MISS",
+        "EventName": "OCR.DEMAND_DATA_RD.L3_HIT_S.NO_SNOOP_NEEDED",
         "MSRIndex": "0x1a6,0x1a7",
-        "MSRValue": "0x0200200122",
+        "MSRValue": "0x0100100001",
         "Offcore": "1",
         "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
         "SampleAfterValue": "100003",
         "UMask": "0x1"
     },
     {
-        "BriefDescription": "Counts all prefetch (that bring data to L2) RFOs",
+        "BriefDescription": "Counts demand data reads",
         "Counter": "0,1,2,3",
         "CounterHTOff": "0,1,2,3",
         "EventCode": "0xB7, 0xBB",
-        "EventName": "OCR.PF_L2_RFO.L3_HIT_F.SNOOP_NONE",
+        "EventName": "OCR.DEMAND_DATA_RD.L3_HIT_S.SNOOP_MISS",
         "MSRIndex": "0x1a6,0x1a7",
-        "MSRValue": "0x0080200020",
+        "MSRValue": "0x0200100001",
         "Offcore": "1",
         "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
         "SampleAfterValue": "100003",
         "UMask": "0x1"
     },
     {
-        "BriefDescription": "Counts L1 data cache hardware prefetch requests and software prefetch requests  OCR.PF_L1D_AND_SW.SUPPLIER_NONE.HITM_OTHER_CORE",
+        "BriefDescription": "Counts demand data reads",
         "Counter": "0,1,2,3",
         "CounterHTOff": "0,1,2,3",
         "EventCode": "0xB7, 0xBB",
-        "EventName": "OCR.PF_L1D_AND_SW.SUPPLIER_NONE.HITM_OTHER_CORE",
+        "EventName": "OCR.DEMAND_DATA_RD.L3_HIT_S.SNOOP_NONE",
         "MSRIndex": "0x1a6,0x1a7",
-        "MSRValue": "0x1000020400",
+        "MSRValue": "0x0080100001",
         "Offcore": "1",
         "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
         "SampleAfterValue": "100003",
         "UMask": "0x1"
     },
     {
-        "BriefDescription": "OCR.ALL_PF_RFO.L3_HIT.HIT_OTHER_CORE_NO_FWD OCR.ALL_PF_RFO.L3_HIT.HIT_OTHER_CORE_NO_FWD OCR.ALL_PF_RFO.L3_HIT.HIT_OTHER_CORE_NO_FWD",
+        "BriefDescription": "Counts demand data reads OCR.DEMAND_DATA_RD.PMM_HIT_LOCAL_PMM.ANY_SNOOP",
         "Counter": "0,1,2,3",
         "CounterHTOff": "0,1,2,3",
         "EventCode": "0xB7, 0xBB",
-        "EventName": "OCR.ALL_PF_RFO.L3_HIT.HIT_OTHER_CORE_NO_FWD",
+        "EventName": "OCR.DEMAND_DATA_RD.PMM_HIT_LOCAL_PMM.ANY_SNOOP",
         "MSRIndex": "0x1a6,0x1a7",
-        "MSRValue": "0x04003C0120",
+        "MSRValue": "0x3F80400001",
         "Offcore": "1",
         "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
         "SampleAfterValue": "100003",
         "UMask": "0x1"
     },
     {
-        "BriefDescription": "Counts all prefetch (that bring data to LLC only) data reads have any response type.",
+        "BriefDescription": "Counts demand data reads OCR.DEMAND_DATA_RD.PMM_HIT_LOCAL_PMM.SNOOP_NONE",
         "Counter": "0,1,2,3",
         "CounterHTOff": "0,1,2,3",
         "EventCode": "0xB7, 0xBB",
-        "EventName": "OCR.PF_L3_DATA_RD.ANY_RESPONSE",
+        "EventName": "OCR.DEMAND_DATA_RD.PMM_HIT_LOCAL_PMM.SNOOP_NONE",
         "MSRIndex": "0x1a6,0x1a7",
-        "MSRValue": "0x0000010080",
+        "MSRValue": "0x0080400001",
         "Offcore": "1",
         "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
         "SampleAfterValue": "100003",
         "UMask": "0x1"
     },
     {
-        "BriefDescription": "OCR.ALL_PF_DATA_RD.SUPPLIER_NONE.ANY_SNOOP  OCR.ALL_PF_DATA_RD.SUPPLIER_NONE.ANY_SNOOP",
+        "BriefDescription": "Counts demand data reads OCR.DEMAND_DATA_RD.PMM_HIT_LOCAL_PMM.SNOOP_NOT_NEEDED",
         "Counter": "0,1,2,3",
         "CounterHTOff": "0,1,2,3",
         "EventCode": "0xB7, 0xBB",
-        "EventName": "OCR.ALL_PF_DATA_RD.SUPPLIER_NONE.ANY_SNOOP",
+        "EventName": "OCR.DEMAND_DATA_RD.PMM_HIT_LOCAL_PMM.SNOOP_NOT_NEEDED",
         "MSRIndex": "0x1a6,0x1a7",
-        "MSRValue": "0x3F80020490",
+        "MSRValue": "0x0100400001",
         "Offcore": "1",
         "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
         "SampleAfterValue": "100003",
         "UMask": "0x1"
     },
     {
-        "BriefDescription": "Counts all prefetch (that bring data to LLC only) RFOs  OCR.PF_L3_RFO.L3_HIT_M.ANY_SNOOP",
+        "BriefDescription": "Counts demand data reads  OCR.DEMAND_DATA_RD.SUPPLIER_NONE.ANY_SNOOP",
         "Counter": "0,1,2,3",
         "CounterHTOff": "0,1,2,3",
         "EventCode": "0xB7, 0xBB",
-        "EventName": "OCR.PF_L3_RFO.L3_HIT_M.ANY_SNOOP",
+        "EventName": "OCR.DEMAND_DATA_RD.SUPPLIER_NONE.ANY_SNOOP",
         "MSRIndex": "0x1a6,0x1a7",
-        "MSRValue": "0x3F80040100",
+        "MSRValue": "0x3F80020001",
         "Offcore": "1",
         "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
         "SampleAfterValue": "100003",
         "UMask": "0x1"
     },
     {
-        "BriefDescription": "OCR.ALL_RFO.SUPPLIER_NONE.HITM_OTHER_CORE  OCR.ALL_RFO.SUPPLIER_NONE.HITM_OTHER_CORE",
+        "BriefDescription": "Counts demand data reads  OCR.DEMAND_DATA_RD.SUPPLIER_NONE.HITM_OTHER_CORE",
         "Counter": "0,1,2,3",
         "CounterHTOff": "0,1,2,3",
         "EventCode": "0xB7, 0xBB",
-        "EventName": "OCR.ALL_RFO.SUPPLIER_NONE.HITM_OTHER_CORE",
+        "EventName": "OCR.DEMAND_DATA_RD.SUPPLIER_NONE.HITM_OTHER_CORE",
         "MSRIndex": "0x1a6,0x1a7",
-        "MSRValue": "0x1000020122",
+        "MSRValue": "0x1000020001",
         "Offcore": "1",
         "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
         "SampleAfterValue": "100003",
         "UMask": "0x1"
     },
     {
-        "BriefDescription": "Counts prefetch (that bring data to L2) data reads OCR.PF_L2_DATA_RD.L3_HIT.HIT_OTHER_CORE_NO_FWD OCR.PF_L2_DATA_RD.L3_HIT.HIT_OTHER_CORE_NO_FWD",
+        "BriefDescription": "Counts demand data reads  OCR.DEMAND_DATA_RD.SUPPLIER_NONE.HIT_OTHER_CORE_FWD",
         "Counter": "0,1,2,3",
         "CounterHTOff": "0,1,2,3",
         "EventCode": "0xB7, 0xBB",
-        "EventName": "OCR.PF_L2_DATA_RD.L3_HIT.HIT_OTHER_CORE_NO_FWD",
+        "EventName": "OCR.DEMAND_DATA_RD.SUPPLIER_NONE.HIT_OTHER_CORE_FWD",
         "MSRIndex": "0x1a6,0x1a7",
-        "MSRValue": "0x04003C0010",
+        "MSRValue": "0x0800020001",
         "Offcore": "1",
         "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
         "SampleAfterValue": "100003",
         "UMask": "0x1"
     },
     {
-        "BriefDescription": "OCR.ALL_RFO.L3_HIT_F.ANY_SNOOP  OCR.ALL_RFO.L3_HIT_F.ANY_SNOOP",
+        "BriefDescription": "Counts demand data reads  OCR.DEMAND_DATA_RD.SUPPLIER_NONE.HIT_OTHER_CORE_NO_FWD",
         "Counter": "0,1,2,3",
         "CounterHTOff": "0,1,2,3",
         "EventCode": "0xB7, 0xBB",
-        "EventName": "OCR.ALL_RFO.L3_HIT_F.ANY_SNOOP",
+        "EventName": "OCR.DEMAND_DATA_RD.SUPPLIER_NONE.HIT_OTHER_CORE_NO_FWD",
         "MSRIndex": "0x1a6,0x1a7",
-        "MSRValue": "0x3F80200122",
+        "MSRValue": "0x0400020001",
         "Offcore": "1",
         "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
         "SampleAfterValue": "100003",
         "UMask": "0x1"
     },
     {
-        "BriefDescription": "OCR.ALL_PF_DATA_RD.L3_HIT_S.HITM_OTHER_CORE  OCR.ALL_PF_DATA_RD.L3_HIT_S.HITM_OTHER_CORE",
+        "BriefDescription": "Counts demand data reads  OCR.DEMAND_DATA_RD.SUPPLIER_NONE.NO_SNOOP_NEEDED",
         "Counter": "0,1,2,3",
         "CounterHTOff": "0,1,2,3",
         "EventCode": "0xB7, 0xBB",
-        "EventName": "OCR.ALL_PF_DATA_RD.L3_HIT_S.HITM_OTHER_CORE",
+        "EventName": "OCR.DEMAND_DATA_RD.SUPPLIER_NONE.NO_SNOOP_NEEDED",
         "MSRIndex": "0x1a6,0x1a7",
-        "MSRValue": "0x1000100490",
+        "MSRValue": "0x0100020001",
         "Offcore": "1",
         "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
         "SampleAfterValue": "100003",
         "UMask": "0x1"
     },
     {
-        "BriefDescription": "OCR.ALL_RFO.L3_HIT_S.ANY_SNOOP  OCR.ALL_RFO.L3_HIT_S.ANY_SNOOP",
+        "BriefDescription": "Counts demand data reads",
         "Counter": "0,1,2,3",
         "CounterHTOff": "0,1,2,3",
         "EventCode": "0xB7, 0xBB",
-        "EventName": "OCR.ALL_RFO.L3_HIT_S.ANY_SNOOP",
+        "EventName": "OCR.DEMAND_DATA_RD.SUPPLIER_NONE.SNOOP_MISS",
         "MSRIndex": "0x1a6,0x1a7",
-        "MSRValue": "0x3F80100122",
+        "MSRValue": "0x0200020001",
         "Offcore": "1",
         "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
         "SampleAfterValue": "100003",
         "UMask": "0x1"
     },
     {
-        "BriefDescription": "Counts any other requests OCR.OTHER.PMM_HIT_LOCAL_PMM.SNOOP_NOT_NEEDED",
+        "BriefDescription": "Counts demand data reads",
         "Counter": "0,1,2,3",
         "CounterHTOff": "0,1,2,3",
         "EventCode": "0xB7, 0xBB",
-        "EventName": "OCR.OTHER.PMM_HIT_LOCAL_PMM.SNOOP_NOT_NEEDED",
+        "EventName": "OCR.DEMAND_DATA_RD.SUPPLIER_NONE.SNOOP_NONE",
         "MSRIndex": "0x1a6,0x1a7",
-        "MSRValue": "0x0100408000",
+        "MSRValue": "0x0080020001",
         "Offcore": "1",
         "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
         "SampleAfterValue": "100003",
         "UMask": "0x1"
     },
     {
-        "BriefDescription": "Counts all prefetch (that bring data to L2) RFOs OCR.PF_L2_RFO.L3_HIT.SNOOP_NONE",
+        "BriefDescription": "Counts all demand data writes (RFOs) have any response type.",
         "Counter": "0,1,2,3",
         "CounterHTOff": "0,1,2,3",
         "EventCode": "0xB7, 0xBB",
-        "EventName": "OCR.PF_L2_RFO.L3_HIT.SNOOP_NONE",
+        "EventName": "OCR.DEMAND_RFO.ANY_RESPONSE",
         "MSRIndex": "0x1a6,0x1a7",
-        "MSRValue": "0x00803C0020",
+        "MSRValue": "0x0000010002",
         "Offcore": "1",
         "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
         "SampleAfterValue": "100003",
         "UMask": "0x1"
     },
     {
-        "BriefDescription": "Counts all demand data writes (RFOs) OCR.DEMAND_RFO.L3_HIT.NO_SNOOP_NEEDED OCR.DEMAND_RFO.L3_HIT.NO_SNOOP_NEEDED",
+        "BriefDescription": "Counts all demand data writes (RFOs) OCR.DEMAND_RFO.L3_HIT.ANY_SNOOP OCR.DEMAND_RFO.L3_HIT.ANY_SNOOP",
         "Counter": "0,1,2,3",
         "CounterHTOff": "0,1,2,3",
         "EventCode": "0xB7, 0xBB",
-        "EventName": "OCR.DEMAND_RFO.L3_HIT.NO_SNOOP_NEEDED",
+        "EventName": "OCR.DEMAND_RFO.L3_HIT.ANY_SNOOP",
         "MSRIndex": "0x1a6,0x1a7",
-        "MSRValue": "0x01003C0002",
+        "MSRValue": "0x3F803C0002",
         "Offcore": "1",
         "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
         "SampleAfterValue": "100003",
         "UMask": "0x1"
     },
     {
-        "BriefDescription": "OCR.ALL_READS.L3_HIT_S.HIT_OTHER_CORE_NO_FWD  OCR.ALL_READS.L3_HIT_S.HIT_OTHER_CORE_NO_FWD",
+        "BriefDescription": "Counts all demand data writes (RFOs) OCR.DEMAND_RFO.L3_HIT.HITM_OTHER_CORE OCR.DEMAND_RFO.L3_HIT.HITM_OTHER_CORE",
         "Counter": "0,1,2,3",
         "CounterHTOff": "0,1,2,3",
         "EventCode": "0xB7, 0xBB",
-        "EventName": "OCR.ALL_READS.L3_HIT_S.HIT_OTHER_CORE_NO_FWD",
+        "EventName": "OCR.DEMAND_RFO.L3_HIT.HITM_OTHER_CORE",
         "MSRIndex": "0x1a6,0x1a7",
-        "MSRValue": "0x04001007F7",
+        "MSRValue": "0x10003C0002",
         "Offcore": "1",
         "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
         "SampleAfterValue": "100003",
         "UMask": "0x1"
     },
     {
-        "BriefDescription": "Counts any other requests  OCR.OTHER.L3_HIT_M.ANY_SNOOP",
+        "BriefDescription": "Counts all demand data writes (RFOs) OCR.DEMAND_RFO.L3_HIT.HIT_OTHER_CORE_FWD OCR.DEMAND_RFO.L3_HIT.HIT_OTHER_CORE_FWD",
         "Counter": "0,1,2,3",
         "CounterHTOff": "0,1,2,3",
         "EventCode": "0xB7, 0xBB",
-        "EventName": "OCR.OTHER.L3_HIT_M.ANY_SNOOP",
+        "EventName": "OCR.DEMAND_RFO.L3_HIT.HIT_OTHER_CORE_FWD",
         "MSRIndex": "0x1a6,0x1a7",
-        "MSRValue": "0x3F80048000",
+        "MSRValue": "0x08003C0002",
         "Offcore": "1",
         "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
         "SampleAfterValue": "100003",
         "UMask": "0x1"
     },
     {
-        "BriefDescription": "OCR.ALL_PF_DATA_RD.L3_HIT_S.ANY_SNOOP  OCR.ALL_PF_DATA_RD.L3_HIT_S.ANY_SNOOP",
+        "BriefDescription": "Counts all demand data writes (RFOs) OCR.DEMAND_RFO.L3_HIT.HIT_OTHER_CORE_NO_FWD OCR.DEMAND_RFO.L3_HIT.HIT_OTHER_CORE_NO_FWD",
         "Counter": "0,1,2,3",
         "CounterHTOff": "0,1,2,3",
         "EventCode": "0xB7, 0xBB",
-        "EventName": "OCR.ALL_PF_DATA_RD.L3_HIT_S.ANY_SNOOP",
+        "EventName": "OCR.DEMAND_RFO.L3_HIT.HIT_OTHER_CORE_NO_FWD",
         "MSRIndex": "0x1a6,0x1a7",
-        "MSRValue": "0x3F80100490",
+        "MSRValue": "0x04003C0002",
         "Offcore": "1",
         "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
         "SampleAfterValue": "100003",
         "UMask": "0x1"
     },
     {
-        "BriefDescription": "Counts all demand data writes (RFOs)  OCR.DEMAND_RFO.L3_HIT_F.HITM_OTHER_CORE",
+        "BriefDescription": "Counts all demand data writes (RFOs) OCR.DEMAND_RFO.L3_HIT.NO_SNOOP_NEEDED OCR.DEMAND_RFO.L3_HIT.NO_SNOOP_NEEDED",
         "Counter": "0,1,2,3",
         "CounterHTOff": "0,1,2,3",
         "EventCode": "0xB7, 0xBB",
-        "EventName": "OCR.DEMAND_RFO.L3_HIT_F.HITM_OTHER_CORE",
+        "EventName": "OCR.DEMAND_RFO.L3_HIT.NO_SNOOP_NEEDED",
         "MSRIndex": "0x1a6,0x1a7",
-        "MSRValue": "0x1000200002",
+        "MSRValue": "0x01003C0002",
         "Offcore": "1",
         "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
         "SampleAfterValue": "100003",
         "UMask": "0x1"
     },
     {
-        "BriefDescription": "OCR.ALL_DATA_RD.L3_HIT_E.HITM_OTHER_CORE  OCR.ALL_DATA_RD.L3_HIT_E.HITM_OTHER_CORE",
+        "BriefDescription": "Counts all demand data writes (RFOs)",
         "Counter": "0,1,2,3",
         "CounterHTOff": "0,1,2,3",
         "EventCode": "0xB7, 0xBB",
-        "EventName": "OCR.ALL_DATA_RD.L3_HIT_E.HITM_OTHER_CORE",
+        "EventName": "OCR.DEMAND_RFO.L3_HIT.SNOOP_HIT_WITH_FWD",
         "MSRIndex": "0x1a6,0x1a7",
-        "MSRValue": "0x1000080491",
+        "MSRValue": "0x08007C0002",
         "Offcore": "1",
         "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
         "SampleAfterValue": "100003",
         "UMask": "0x1"
     },
     {
-        "BriefDescription": "OCR.ALL_READS.L3_HIT_E.NO_SNOOP_NEEDED  OCR.ALL_READS.L3_HIT_E.NO_SNOOP_NEEDED",
+        "BriefDescription": "Counts all demand data writes (RFOs) OCR.DEMAND_RFO.L3_HIT.SNOOP_MISS",
         "Counter": "0,1,2,3",
         "CounterHTOff": "0,1,2,3",
         "EventCode": "0xB7, 0xBB",
-        "EventName": "OCR.ALL_READS.L3_HIT_E.NO_SNOOP_NEEDED",
+        "EventName": "OCR.DEMAND_RFO.L3_HIT.SNOOP_MISS",
         "MSRIndex": "0x1a6,0x1a7",
-        "MSRValue": "0x01000807F7",
+        "MSRValue": "0x02003C0002",
         "Offcore": "1",
         "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
         "SampleAfterValue": "100003",
         "UMask": "0x1"
     },
     {
-        "BriefDescription": "Counts L1 data cache hardware prefetch requests and software prefetch requests  OCR.PF_L1D_AND_SW.L3_HIT_S.HIT_OTHER_CORE_NO_FWD",
+        "BriefDescription": "Counts all demand data writes (RFOs) OCR.DEMAND_RFO.L3_HIT.SNOOP_NONE",
         "Counter": "0,1,2,3",
         "CounterHTOff": "0,1,2,3",
         "EventCode": "0xB7, 0xBB",
-        "EventName": "OCR.PF_L1D_AND_SW.L3_HIT_S.HIT_OTHER_CORE_NO_FWD",
+        "EventName": "OCR.DEMAND_RFO.L3_HIT.SNOOP_NONE",
         "MSRIndex": "0x1a6,0x1a7",
-        "MSRValue": "0x0400100400",
+        "MSRValue": "0x00803C0002",
         "Offcore": "1",
         "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
         "SampleAfterValue": "100003",
         "UMask": "0x1"
     },
     {
-        "BriefDescription": "Counts all demand data writes (RFOs)",
+        "BriefDescription": "Counts all demand data writes (RFOs)  OCR.DEMAND_RFO.L3_HIT_E.ANY_SNOOP",
         "Counter": "0,1,2,3",
         "CounterHTOff": "0,1,2,3",
         "EventCode": "0xB7, 0xBB",
-        "EventName": "OCR.DEMAND_RFO.L3_HIT_F.SNOOP_MISS",
+        "EventName": "OCR.DEMAND_RFO.L3_HIT_E.ANY_SNOOP",
         "MSRIndex": "0x1a6,0x1a7",
-        "MSRValue": "0x0200200002",
+        "MSRValue": "0x3F80080002",
         "Offcore": "1",
         "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
         "SampleAfterValue": "100003",
         "UMask": "0x1"
     },
     {
-        "BriefDescription": "Counts all demand code reads  OCR.DEMAND_CODE_RD.L3_HIT_S.ANY_SNOOP",
+        "BriefDescription": "Counts all demand data writes (RFOs)  OCR.DEMAND_RFO.L3_HIT_E.HITM_OTHER_CORE",
         "Counter": "0,1,2,3",
         "CounterHTOff": "0,1,2,3",
         "EventCode": "0xB7, 0xBB",
-        "EventName": "OCR.DEMAND_CODE_RD.L3_HIT_S.ANY_SNOOP",
+        "EventName": "OCR.DEMAND_RFO.L3_HIT_E.HITM_OTHER_CORE",
         "MSRIndex": "0x1a6,0x1a7",
-        "MSRValue": "0x3F80100004",
+        "MSRValue": "0x1000080002",
         "Offcore": "1",
         "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
         "SampleAfterValue": "100003",
         "UMask": "0x1"
     },
     {
-        "BriefDescription": "Counts L1 data cache hardware prefetch requests and software prefetch requests have any response type.",
+        "BriefDescription": "Counts all demand data writes (RFOs)  OCR.DEMAND_RFO.L3_HIT_E.HIT_OTHER_CORE_FWD",
         "Counter": "0,1,2,3",
         "CounterHTOff": "0,1,2,3",
         "EventCode": "0xB7, 0xBB",
-        "EventName": "OCR.PF_L1D_AND_SW.ANY_RESPONSE",
+        "EventName": "OCR.DEMAND_RFO.L3_HIT_E.HIT_OTHER_CORE_FWD",
         "MSRIndex": "0x1a6,0x1a7",
-        "MSRValue": "0x0000010400",
+        "MSRValue": "0x0800080002",
         "Offcore": "1",
         "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
         "SampleAfterValue": "100003",
         "UMask": "0x1"
     },
     {
-        "BriefDescription": "Counts demand data reads  OCR.DEMAND_DATA_RD.L3_HIT_F.NO_SNOOP_NEEDED",
+        "BriefDescription": "Counts all demand data writes (RFOs)  OCR.DEMAND_RFO.L3_HIT_E.HIT_OTHER_CORE_NO_FWD",
         "Counter": "0,1,2,3",
         "CounterHTOff": "0,1,2,3",
         "EventCode": "0xB7, 0xBB",
-        "EventName": "OCR.DEMAND_DATA_RD.L3_HIT_F.NO_SNOOP_NEEDED",
+        "EventName": "OCR.DEMAND_RFO.L3_HIT_E.HIT_OTHER_CORE_NO_FWD",
         "MSRIndex": "0x1a6,0x1a7",
-        "MSRValue": "0x0100200001",
+        "MSRValue": "0x0400080002",
         "Offcore": "1",
         "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
         "SampleAfterValue": "100003",
         "UMask": "0x1"
     },
     {
-        "BriefDescription": "Counts any other requests",
+        "BriefDescription": "Counts all demand data writes (RFOs)  OCR.DEMAND_RFO.L3_HIT_E.NO_SNOOP_NEEDED",
         "Counter": "0,1,2,3",
         "CounterHTOff": "0,1,2,3",
         "EventCode": "0xB7, 0xBB",
-        "EventName": "OCR.OTHER.L3_HIT.SNOOP_HIT_WITH_FWD",
+        "EventName": "OCR.DEMAND_RFO.L3_HIT_E.NO_SNOOP_NEEDED",
         "MSRIndex": "0x1a6,0x1a7",
-        "MSRValue": "0x08007C8000",
+        "MSRValue": "0x0100080002",
         "Offcore": "1",
         "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
         "SampleAfterValue": "100003",
         "UMask": "0x1"
     },
     {
-        "BriefDescription": "OCR.ALL_PF_RFO.L3_HIT_S.NO_SNOOP_NEEDED  OCR.ALL_PF_RFO.L3_HIT_S.NO_SNOOP_NEEDED",
+        "BriefDescription": "Counts all demand data writes (RFOs)",
         "Counter": "0,1,2,3",
         "CounterHTOff": "0,1,2,3",
         "EventCode": "0xB7, 0xBB",
-        "EventName": "OCR.ALL_PF_RFO.L3_HIT_S.NO_SNOOP_NEEDED",
+        "EventName": "OCR.DEMAND_RFO.L3_HIT_E.SNOOP_MISS",
         "MSRIndex": "0x1a6,0x1a7",
-        "MSRValue": "0x0100100120",
+        "MSRValue": "0x0200080002",
         "Offcore": "1",
         "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
         "SampleAfterValue": "100003",
         "UMask": "0x1"
     },
     {
-        "BriefDescription": "Counts prefetch (that bring data to L2) data reads  OCR.PF_L2_DATA_RD.L3_HIT_M.HIT_OTHER_CORE_FWD",
+        "BriefDescription": "Counts all demand data writes (RFOs)",
         "Counter": "0,1,2,3",
         "CounterHTOff": "0,1,2,3",
         "EventCode": "0xB7, 0xBB",
-        "EventName": "OCR.PF_L2_DATA_RD.L3_HIT_M.HIT_OTHER_CORE_FWD",
+        "EventName": "OCR.DEMAND_RFO.L3_HIT_E.SNOOP_NONE",
         "MSRIndex": "0x1a6,0x1a7",
-        "MSRValue": "0x0800040010",
+        "MSRValue": "0x0080080002",
         "Offcore": "1",
         "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
         "SampleAfterValue": "100003",
         "UMask": "0x1"
     },
     {
-        "BriefDescription": "Counts all prefetch (that bring data to LLC only) data reads",
+        "BriefDescription": "Counts all demand data writes (RFOs)  OCR.DEMAND_RFO.L3_HIT_F.ANY_SNOOP",
         "Counter": "0,1,2,3",
         "CounterHTOff": "0,1,2,3",
         "EventCode": "0xB7, 0xBB",
-        "EventName": "OCR.PF_L3_DATA_RD.L3_HIT_S.SNOOP_NONE",
+        "EventName": "OCR.DEMAND_RFO.L3_HIT_F.ANY_SNOOP",
         "MSRIndex": "0x1a6,0x1a7",
-        "MSRValue": "0x0080100080",
+        "MSRValue": "0x3F80200002",
         "Offcore": "1",
         "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
         "SampleAfterValue": "100003",
         "UMask": "0x1"
     },
     {
-        "BriefDescription": "OCR.ALL_RFO.SUPPLIER_NONE.HIT_OTHER_CORE_NO_FWD  OCR.ALL_RFO.SUPPLIER_NONE.HIT_OTHER_CORE_NO_FWD",
+        "BriefDescription": "Counts all demand data writes (RFOs)  OCR.DEMAND_RFO.L3_HIT_F.HITM_OTHER_CORE",
         "Counter": "0,1,2,3",
         "CounterHTOff": "0,1,2,3",
         "EventCode": "0xB7, 0xBB",
-        "EventName": "OCR.ALL_RFO.SUPPLIER_NONE.HIT_OTHER_CORE_NO_FWD",
+        "EventName": "OCR.DEMAND_RFO.L3_HIT_F.HITM_OTHER_CORE",
         "MSRIndex": "0x1a6,0x1a7",
-        "MSRValue": "0x0400020122",
+        "MSRValue": "0x1000200002",
         "Offcore": "1",
         "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
         "SampleAfterValue": "100003",
         "UMask": "0x1"
     },
     {
-        "BriefDescription": "Counts any other requests  OCR.OTHER.L3_HIT_E.HITM_OTHER_CORE",
+        "BriefDescription": "Counts all demand data writes (RFOs)  OCR.DEMAND_RFO.L3_HIT_F.HIT_OTHER_CORE_FWD",
         "Counter": "0,1,2,3",
         "CounterHTOff": "0,1,2,3",
         "EventCode": "0xB7, 0xBB",
-        "EventName": "OCR.OTHER.L3_HIT_E.HITM_OTHER_CORE",
+        "EventName": "OCR.DEMAND_RFO.L3_HIT_F.HIT_OTHER_CORE_FWD",
         "MSRIndex": "0x1a6,0x1a7",
-        "MSRValue": "0x1000088000",
+        "MSRValue": "0x0800200002",
         "Offcore": "1",
         "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
         "SampleAfterValue": "100003",
         "UMask": "0x1"
     },
     {
-        "BriefDescription": "OCR.ALL_READS.L3_HIT.ANY_SNOOP OCR.ALL_READS.L3_HIT.ANY_SNOOP OCR.ALL_READS.L3_HIT.ANY_SNOOP",
+        "BriefDescription": "Counts all demand data writes (RFOs)  OCR.DEMAND_RFO.L3_HIT_F.HIT_OTHER_CORE_NO_FWD",
         "Counter": "0,1,2,3",
         "CounterHTOff": "0,1,2,3",
         "EventCode": "0xB7, 0xBB",
-        "EventName": "OCR.ALL_READS.L3_HIT.ANY_SNOOP",
+        "EventName": "OCR.DEMAND_RFO.L3_HIT_F.HIT_OTHER_CORE_NO_FWD",
         "MSRIndex": "0x1a6,0x1a7",
-        "MSRValue": "0x3F803C07F7",
+        "MSRValue": "0x0400200002",
         "Offcore": "1",
         "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
         "SampleAfterValue": "100003",
         "UMask": "0x1"
     },
     {
-        "BriefDescription": "OCR.ALL_READS.PMM_HIT_LOCAL_PMM.SNOOP_NONE OCR.ALL_READS.PMM_HIT_LOCAL_PMM.SNOOP_NONE",
+        "BriefDescription": "Counts all demand data writes (RFOs)  OCR.DEMAND_RFO.L3_HIT_F.NO_SNOOP_NEEDED",
         "Counter": "0,1,2,3",
         "CounterHTOff": "0,1,2,3",
         "EventCode": "0xB7, 0xBB",
-        "EventName": "OCR.ALL_READS.PMM_HIT_LOCAL_PMM.SNOOP_NONE",
+        "EventName": "OCR.DEMAND_RFO.L3_HIT_F.NO_SNOOP_NEEDED",
         "MSRIndex": "0x1a6,0x1a7",
-        "MSRValue": "0x00804007F7",
+        "MSRValue": "0x0100200002",
         "Offcore": "1",
         "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
         "SampleAfterValue": "100003",
         "UMask": "0x1"
     },
     {
-        "BriefDescription": "Counts any other requests  OCR.OTHER.SUPPLIER_NONE.HIT_OTHER_CORE_NO_FWD",
+        "BriefDescription": "Counts all demand data writes (RFOs)",
         "Counter": "0,1,2,3",
         "CounterHTOff": "0,1,2,3",
         "EventCode": "0xB7, 0xBB",
-        "EventName": "OCR.OTHER.SUPPLIER_NONE.HIT_OTHER_CORE_NO_FWD",
+        "EventName": "OCR.DEMAND_RFO.L3_HIT_F.SNOOP_MISS",
         "MSRIndex": "0x1a6,0x1a7",
-        "MSRValue": "0x0400028000",
+        "MSRValue": "0x0200200002",
         "Offcore": "1",
         "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
         "SampleAfterValue": "100003",
         "UMask": "0x1"
     },
     {
-        "BriefDescription": "OCR.ALL_PF_DATA_RD.L3_HIT.ANY_SNOOP OCR.ALL_PF_DATA_RD.L3_HIT.ANY_SNOOP OCR.ALL_PF_DATA_RD.L3_HIT.ANY_SNOOP",
+        "BriefDescription": "Counts all demand data writes (RFOs)",
         "Counter": "0,1,2,3",
         "CounterHTOff": "0,1,2,3",
         "EventCode": "0xB7, 0xBB",
-        "EventName": "OCR.ALL_PF_DATA_RD.L3_HIT.ANY_SNOOP",
+        "EventName": "OCR.DEMAND_RFO.L3_HIT_F.SNOOP_NONE",
         "MSRIndex": "0x1a6,0x1a7",
-        "MSRValue": "0x3F803C0490",
+        "MSRValue": "0x0080200002",
         "Offcore": "1",
         "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
         "SampleAfterValue": "100003",
         "UMask": "0x1"
     },
     {
-        "BriefDescription": "Counts all demand code reads OCR.DEMAND_CODE_RD.PMM_HIT_LOCAL_PMM.SNOOP_NOT_NEEDED",
+        "BriefDescription": "Counts all demand data writes (RFOs)  OCR.DEMAND_RFO.L3_HIT_M.ANY_SNOOP",
         "Counter": "0,1,2,3",
         "CounterHTOff": "0,1,2,3",
         "EventCode": "0xB7, 0xBB",
-        "EventName": "OCR.DEMAND_CODE_RD.PMM_HIT_LOCAL_PMM.SNOOP_NOT_NEEDED",
+        "EventName": "OCR.DEMAND_RFO.L3_HIT_M.ANY_SNOOP",
         "MSRIndex": "0x1a6,0x1a7",
-        "MSRValue": "0x0100400004",
+        "MSRValue": "0x3F80040002",
         "Offcore": "1",
         "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
         "SampleAfterValue": "100003",
         "UMask": "0x1"
     },
     {
-        "BriefDescription": "Counts demand data reads OCR.DEMAND_DATA_RD.L3_HIT.HITM_OTHER_CORE OCR.DEMAND_DATA_RD.L3_HIT.HITM_OTHER_CORE",
+        "BriefDescription": "Counts all demand data writes (RFOs)  OCR.DEMAND_RFO.L3_HIT_M.HITM_OTHER_CORE",
         "Counter": "0,1,2,3",
         "CounterHTOff": "0,1,2,3",
         "EventCode": "0xB7, 0xBB",
-        "EventName": "OCR.DEMAND_DATA_RD.L3_HIT.HITM_OTHER_CORE",
+        "EventName": "OCR.DEMAND_RFO.L3_HIT_M.HITM_OTHER_CORE",
         "MSRIndex": "0x1a6,0x1a7",
-        "MSRValue": "0x10003C0001",
+        "MSRValue": "0x1000040002",
         "Offcore": "1",
         "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
         "SampleAfterValue": "100003",
         "UMask": "0x1"
     },
     {
-        "BriefDescription": "Counts all demand data writes (RFOs)",
+        "BriefDescription": "Counts all demand data writes (RFOs)  OCR.DEMAND_RFO.L3_HIT_M.HIT_OTHER_CORE_FWD",
         "Counter": "0,1,2,3",
         "CounterHTOff": "0,1,2,3",
         "EventCode": "0xB7, 0xBB",
-        "EventName": "OCR.DEMAND_RFO.SUPPLIER_NONE.SNOOP_MISS",
+        "EventName": "OCR.DEMAND_RFO.L3_HIT_M.HIT_OTHER_CORE_FWD",
         "MSRIndex": "0x1a6,0x1a7",
-        "MSRValue": "0x0200020002",
+        "MSRValue": "0x0800040002",
         "Offcore": "1",
         "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
         "SampleAfterValue": "100003",
         "UMask": "0x1"
     },
     {
-        "BriefDescription": "OCR.ALL_READS.L3_HIT.SNOOP_HIT_WITH_FWD",
+        "BriefDescription": "Counts all demand data writes (RFOs)  OCR.DEMAND_RFO.L3_HIT_M.HIT_OTHER_CORE_NO_FWD",
         "Counter": "0,1,2,3",
         "CounterHTOff": "0,1,2,3",
         "EventCode": "0xB7, 0xBB",
-        "EventName": "OCR.ALL_READS.L3_HIT.SNOOP_HIT_WITH_FWD",
+        "EventName": "OCR.DEMAND_RFO.L3_HIT_M.HIT_OTHER_CORE_NO_FWD",
         "MSRIndex": "0x1a6,0x1a7",
-        "MSRValue": "0x08007C07F7",
+        "MSRValue": "0x0400040002",
         "Offcore": "1",
         "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
         "SampleAfterValue": "100003",
         "UMask": "0x1"
     },
     {
-        "BriefDescription": "Counts all demand code reads OCR.DEMAND_CODE_RD.PMM_HIT_LOCAL_PMM.SNOOP_NONE",
+        "BriefDescription": "Counts all demand data writes (RFOs)  OCR.DEMAND_RFO.L3_HIT_M.NO_SNOOP_NEEDED",
         "Counter": "0,1,2,3",
         "CounterHTOff": "0,1,2,3",
         "EventCode": "0xB7, 0xBB",
-        "EventName": "OCR.DEMAND_CODE_RD.PMM_HIT_LOCAL_PMM.SNOOP_NONE",
+        "EventName": "OCR.DEMAND_RFO.L3_HIT_M.NO_SNOOP_NEEDED",
         "MSRIndex": "0x1a6,0x1a7",
-        "MSRValue": "0x0080400004",
+        "MSRValue": "0x0100040002",
         "Offcore": "1",
         "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
         "SampleAfterValue": "100003",
         "UMask": "0x1"
     },
     {
-        "BriefDescription": "Counts demand data reads",
+        "BriefDescription": "Counts all demand data writes (RFOs)",
         "Counter": "0,1,2,3",
         "CounterHTOff": "0,1,2,3",
         "EventCode": "0xB7, 0xBB",
-        "EventName": "OCR.DEMAND_DATA_RD.L3_HIT_S.SNOOP_MISS",
+        "EventName": "OCR.DEMAND_RFO.L3_HIT_M.SNOOP_MISS",
         "MSRIndex": "0x1a6,0x1a7",
-        "MSRValue": "0x0200100001",
+        "MSRValue": "0x0200040002",
         "Offcore": "1",
         "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
         "SampleAfterValue": "100003",
         "UMask": "0x1"
     },
     {
-        "BriefDescription": "OCR.ALL_READS.L3_HIT.SNOOP_NONE OCR.ALL_READS.L3_HIT.SNOOP_NONE",
+        "BriefDescription": "Counts all demand data writes (RFOs)",
         "Counter": "0,1,2,3",
         "CounterHTOff": "0,1,2,3",
         "EventCode": "0xB7, 0xBB",
-        "EventName": "OCR.ALL_READS.L3_HIT.SNOOP_NONE",
+        "EventName": "OCR.DEMAND_RFO.L3_HIT_M.SNOOP_NONE",
         "MSRIndex": "0x1a6,0x1a7",
-        "MSRValue": "0x00803C07F7",
+        "MSRValue": "0x0080040002",
         "Offcore": "1",
         "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
         "SampleAfterValue": "100003",
         "UMask": "0x1"
     },
     {
-        "BriefDescription": "Counts all prefetch (that bring data to LLC only) data reads  OCR.PF_L3_DATA_RD.L3_HIT_E.HITM_OTHER_CORE",
+        "BriefDescription": "Counts all demand data writes (RFOs)  OCR.DEMAND_RFO.L3_HIT_S.ANY_SNOOP",
         "Counter": "0,1,2,3",
         "CounterHTOff": "0,1,2,3",
         "EventCode": "0xB7, 0xBB",
-        "EventName": "OCR.PF_L3_DATA_RD.L3_HIT_E.HITM_OTHER_CORE",
+        "EventName": "OCR.DEMAND_RFO.L3_HIT_S.ANY_SNOOP",
         "MSRIndex": "0x1a6,0x1a7",
-        "MSRValue": "0x1000080080",
+        "MSRValue": "0x3F80100002",
         "Offcore": "1",
         "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
         "SampleAfterValue": "100003",
         "UMask": "0x1"
     },
     {
-        "BriefDescription": "OCR.ALL_PF_DATA_RD.SUPPLIER_NONE.NO_SNOOP_NEEDED  OCR.ALL_PF_DATA_RD.SUPPLIER_NONE.NO_SNOOP_NEEDED",
+        "BriefDescription": "Counts all demand data writes (RFOs)  OCR.DEMAND_RFO.L3_HIT_S.HITM_OTHER_CORE",
         "Counter": "0,1,2,3",
         "CounterHTOff": "0,1,2,3",
         "EventCode": "0xB7, 0xBB",
-        "EventName": "OCR.ALL_PF_DATA_RD.SUPPLIER_NONE.NO_SNOOP_NEEDED",
+        "EventName": "OCR.DEMAND_RFO.L3_HIT_S.HITM_OTHER_CORE",
         "MSRIndex": "0x1a6,0x1a7",
-        "MSRValue": "0x0100020490",
+        "MSRValue": "0x1000100002",
         "Offcore": "1",
         "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
         "SampleAfterValue": "100003",
         "UMask": "0x1"
     },
     {
-        "BriefDescription": "Counts any other requests  OCR.OTHER.SUPPLIER_NONE.ANY_SNOOP",
+        "BriefDescription": "Counts all demand data writes (RFOs)  OCR.DEMAND_RFO.L3_HIT_S.HIT_OTHER_CORE_FWD",
         "Counter": "0,1,2,3",
         "CounterHTOff": "0,1,2,3",
         "EventCode": "0xB7, 0xBB",
-        "EventName": "OCR.OTHER.SUPPLIER_NONE.ANY_SNOOP",
+        "EventName": "OCR.DEMAND_RFO.L3_HIT_S.HIT_OTHER_CORE_FWD",
         "MSRIndex": "0x1a6,0x1a7",
-        "MSRValue": "0x3F80028000",
+        "MSRValue": "0x0800100002",
         "Offcore": "1",
         "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
         "SampleAfterValue": "100003",
         "UMask": "0x1"
     },
     {
-        "BriefDescription": "OCR.ALL_PF_DATA_RD.L3_HIT.NO_SNOOP_NEEDED OCR.ALL_PF_DATA_RD.L3_HIT.NO_SNOOP_NEEDED OCR.ALL_PF_DATA_RD.L3_HIT.NO_SNOOP_NEEDED",
+        "BriefDescription": "Counts all demand data writes (RFOs)  OCR.DEMAND_RFO.L3_HIT_S.HIT_OTHER_CORE_NO_FWD",
         "Counter": "0,1,2,3",
         "CounterHTOff": "0,1,2,3",
         "EventCode": "0xB7, 0xBB",
-        "EventName": "OCR.ALL_PF_DATA_RD.L3_HIT.NO_SNOOP_NEEDED",
+        "EventName": "OCR.DEMAND_RFO.L3_HIT_S.HIT_OTHER_CORE_NO_FWD",
         "MSRIndex": "0x1a6,0x1a7",
-        "MSRValue": "0x01003C0490",
+        "MSRValue": "0x0400100002",
         "Offcore": "1",
         "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
         "SampleAfterValue": "100003",
         "UMask": "0x1"
     },
     {
-        "BriefDescription": "Counts all prefetch (that bring data to L2) RFOs  OCR.PF_L2_RFO.L3_HIT_F.HIT_OTHER_CORE_FWD",
+        "BriefDescription": "Counts all demand data writes (RFOs)  OCR.DEMAND_RFO.L3_HIT_S.NO_SNOOP_NEEDED",
         "Counter": "0,1,2,3",
         "CounterHTOff": "0,1,2,3",
         "EventCode": "0xB7, 0xBB",
-        "EventName": "OCR.PF_L2_RFO.L3_HIT_F.HIT_OTHER_CORE_FWD",
+        "EventName": "OCR.DEMAND_RFO.L3_HIT_S.NO_SNOOP_NEEDED",
         "MSRIndex": "0x1a6,0x1a7",
-        "MSRValue": "0x0800200020",
+        "MSRValue": "0x0100100002",
         "Offcore": "1",
         "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
         "SampleAfterValue": "100003",
         "UMask": "0x1"
     },
     {
-        "BriefDescription": "OCR.ALL_READS.L3_HIT_S.NO_SNOOP_NEEDED  OCR.ALL_READS.L3_HIT_S.NO_SNOOP_NEEDED",
+        "BriefDescription": "Counts all demand data writes (RFOs)",
         "Counter": "0,1,2,3",
         "CounterHTOff": "0,1,2,3",
         "EventCode": "0xB7, 0xBB",
-        "EventName": "OCR.ALL_READS.L3_HIT_S.NO_SNOOP_NEEDED",
+        "EventName": "OCR.DEMAND_RFO.L3_HIT_S.SNOOP_MISS",
         "MSRIndex": "0x1a6,0x1a7",
-        "MSRValue": "0x01001007F7",
+        "MSRValue": "0x0200100002",
         "Offcore": "1",
         "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
         "SampleAfterValue": "100003",
         "UMask": "0x1"
     },
     {
-        "BriefDescription": "OCR.ALL_RFO.L3_HIT_S.HITM_OTHER_CORE  OCR.ALL_RFO.L3_HIT_S.HITM_OTHER_CORE",
+        "BriefDescription": "Counts all demand data writes (RFOs)",
         "Counter": "0,1,2,3",
         "CounterHTOff": "0,1,2,3",
         "EventCode": "0xB7, 0xBB",
-        "EventName": "OCR.ALL_RFO.L3_HIT_S.HITM_OTHER_CORE",
+        "EventName": "OCR.DEMAND_RFO.L3_HIT_S.SNOOP_NONE",
         "MSRIndex": "0x1a6,0x1a7",
-        "MSRValue": "0x1000100122",
+        "MSRValue": "0x0080100002",
         "Offcore": "1",
         "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
         "SampleAfterValue": "100003",
         "UMask": "0x1"
     },
     {
-        "BriefDescription": "OCR.ALL_RFO.L3_HIT.SNOOP_HIT_WITH_FWD",
+        "BriefDescription": "Counts all demand data writes (RFOs) OCR.DEMAND_RFO.PMM_HIT_LOCAL_PMM.ANY_SNOOP",
         "Counter": "0,1,2,3",
         "CounterHTOff": "0,1,2,3",
         "EventCode": "0xB7, 0xBB",
-        "EventName": "OCR.ALL_RFO.L3_HIT.SNOOP_HIT_WITH_FWD",
+        "EventName": "OCR.DEMAND_RFO.PMM_HIT_LOCAL_PMM.ANY_SNOOP",
         "MSRIndex": "0x1a6,0x1a7",
-        "MSRValue": "0x08007C0122",
+        "MSRValue": "0x3F80400002",
         "Offcore": "1",
         "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
         "SampleAfterValue": "100003",
         "UMask": "0x1"
     },
     {
-        "BriefDescription": "OCR.ALL_READS.L3_HIT_M.ANY_SNOOP  OCR.ALL_READS.L3_HIT_M.ANY_SNOOP",
+        "BriefDescription": "Counts all demand data writes (RFOs) OCR.DEMAND_RFO.PMM_HIT_LOCAL_PMM.SNOOP_NONE",
         "Counter": "0,1,2,3",
         "CounterHTOff": "0,1,2,3",
         "EventCode": "0xB7, 0xBB",
-        "EventName": "OCR.ALL_READS.L3_HIT_M.ANY_SNOOP",
+        "EventName": "OCR.DEMAND_RFO.PMM_HIT_LOCAL_PMM.SNOOP_NONE",
         "MSRIndex": "0x1a6,0x1a7",
-        "MSRValue": "0x3F800407F7",
+        "MSRValue": "0x0080400002",
         "Offcore": "1",
         "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
         "SampleAfterValue": "100003",
         "UMask": "0x1"
     },
     {
-        "BriefDescription": "OCR.ALL_READS.L3_HIT.NO_SNOOP_NEEDED OCR.ALL_READS.L3_HIT.NO_SNOOP_NEEDED OCR.ALL_READS.L3_HIT.NO_SNOOP_NEEDED",
+        "BriefDescription": "Counts all demand data writes (RFOs) OCR.DEMAND_RFO.PMM_HIT_LOCAL_PMM.SNOOP_NOT_NEEDED",
         "Counter": "0,1,2,3",
         "CounterHTOff": "0,1,2,3",
         "EventCode": "0xB7, 0xBB",
-        "EventName": "OCR.ALL_READS.L3_HIT.NO_SNOOP_NEEDED",
+        "EventName": "OCR.DEMAND_RFO.PMM_HIT_LOCAL_PMM.SNOOP_NOT_NEEDED",
         "MSRIndex": "0x1a6,0x1a7",
-        "MSRValue": "0x01003C07F7",
+        "MSRValue": "0x0100400002",
         "Offcore": "1",
         "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
         "SampleAfterValue": "100003",
         "UMask": "0x1"
     },
     {
-        "BriefDescription": "OCR.ALL_READS.ANY_RESPONSE have any response type.",
+        "BriefDescription": "Counts all demand data writes (RFOs)  OCR.DEMAND_RFO.SUPPLIER_NONE.ANY_SNOOP",
         "Counter": "0,1,2,3",
         "CounterHTOff": "0,1,2,3",
         "EventCode": "0xB7, 0xBB",
-        "EventName": "OCR.ALL_READS.ANY_RESPONSE",
+        "EventName": "OCR.DEMAND_RFO.SUPPLIER_NONE.ANY_SNOOP",
         "MSRIndex": "0x1a6,0x1a7",
-        "MSRValue": "0x00000107F7",
+        "MSRValue": "0x3F80020002",
         "Offcore": "1",
         "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
         "SampleAfterValue": "100003",
         "UMask": "0x1"
     },
     {
-        "BriefDescription": "OCR.ALL_DATA_RD.L3_HIT.HIT_OTHER_CORE_FWD OCR.ALL_DATA_RD.L3_HIT.HIT_OTHER_CORE_FWD OCR.ALL_DATA_RD.L3_HIT.HIT_OTHER_CORE_FWD",
+        "BriefDescription": "Counts all demand data writes (RFOs)  OCR.DEMAND_RFO.SUPPLIER_NONE.HITM_OTHER_CORE",
         "Counter": "0,1,2,3",
         "CounterHTOff": "0,1,2,3",
         "EventCode": "0xB7, 0xBB",
-        "EventName": "OCR.ALL_DATA_RD.L3_HIT.HIT_OTHER_CORE_FWD",
+        "EventName": "OCR.DEMAND_RFO.SUPPLIER_NONE.HITM_OTHER_CORE",
         "MSRIndex": "0x1a6,0x1a7",
-        "MSRValue": "0x08003C0491",
+        "MSRValue": "0x1000020002",
         "Offcore": "1",
         "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
         "SampleAfterValue": "100003",
         "UMask": "0x1"
     },
     {
-        "BriefDescription": "OCR.ALL_DATA_RD.L3_HIT_M.NO_SNOOP_NEEDED  OCR.ALL_DATA_RD.L3_HIT_M.NO_SNOOP_NEEDED",
+        "BriefDescription": "Counts all demand data writes (RFOs)  OCR.DEMAND_RFO.SUPPLIER_NONE.HIT_OTHER_CORE_FWD",
         "Counter": "0,1,2,3",
         "CounterHTOff": "0,1,2,3",
         "EventCode": "0xB7, 0xBB",
-        "EventName": "OCR.ALL_DATA_RD.L3_HIT_M.NO_SNOOP_NEEDED",
+        "EventName": "OCR.DEMAND_RFO.SUPPLIER_NONE.HIT_OTHER_CORE_FWD",
         "MSRIndex": "0x1a6,0x1a7",
-        "MSRValue": "0x0100040491",
+        "MSRValue": "0x0800020002",
         "Offcore": "1",
         "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
         "SampleAfterValue": "100003",
         "UMask": "0x1"
     },
     {
-        "BriefDescription": "Counts all prefetch (that bring data to LLC only) data reads  OCR.PF_L3_DATA_RD.SUPPLIER_NONE.ANY_SNOOP",
+        "BriefDescription": "Counts all demand data writes (RFOs)  OCR.DEMAND_RFO.SUPPLIER_NONE.HIT_OTHER_CORE_NO_FWD",
         "Counter": "0,1,2,3",
         "CounterHTOff": "0,1,2,3",
         "EventCode": "0xB7, 0xBB",
-        "EventName": "OCR.PF_L3_DATA_RD.SUPPLIER_NONE.ANY_SNOOP",
+        "EventName": "OCR.DEMAND_RFO.SUPPLIER_NONE.HIT_OTHER_CORE_NO_FWD",
         "MSRIndex": "0x1a6,0x1a7",
-        "MSRValue": "0x3F80020080",
+        "MSRValue": "0x0400020002",
         "Offcore": "1",
         "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
         "SampleAfterValue": "100003",
         "UMask": "0x1"
     },
     {
-        "BriefDescription": "Counts any other requests  OCR.OTHER.L3_HIT_F.HITM_OTHER_CORE",
+        "BriefDescription": "Counts all demand data writes (RFOs)  OCR.DEMAND_RFO.SUPPLIER_NONE.NO_SNOOP_NEEDED",
         "Counter": "0,1,2,3",
         "CounterHTOff": "0,1,2,3",
         "EventCode": "0xB7, 0xBB",
-        "EventName": "OCR.OTHER.L3_HIT_F.HITM_OTHER_CORE",
+        "EventName": "OCR.DEMAND_RFO.SUPPLIER_NONE.NO_SNOOP_NEEDED",
         "MSRIndex": "0x1a6,0x1a7",
-        "MSRValue": "0x1000208000",
+        "MSRValue": "0x0100020002",
         "Offcore": "1",
         "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
         "SampleAfterValue": "100003",
         "UMask": "0x1"
     },
     {
-        "BriefDescription": "OCR.ALL_DATA_RD.L3_HIT.ANY_SNOOP OCR.ALL_DATA_RD.L3_HIT.ANY_SNOOP OCR.ALL_DATA_RD.L3_HIT.ANY_SNOOP",
+        "BriefDescription": "Counts all demand data writes (RFOs)",
         "Counter": "0,1,2,3",
         "CounterHTOff": "0,1,2,3",
         "EventCode": "0xB7, 0xBB",
-        "EventName": "OCR.ALL_DATA_RD.L3_HIT.ANY_SNOOP",
+        "EventName": "OCR.DEMAND_RFO.SUPPLIER_NONE.SNOOP_MISS",
         "MSRIndex": "0x1a6,0x1a7",
-        "MSRValue": "0x3F803C0491",
+        "MSRValue": "0x0200020002",
         "Offcore": "1",
         "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
         "SampleAfterValue": "100003",
         "UMask": "0x1"
     },
     {
-        "BriefDescription": "Counts L1 data cache hardware prefetch requests and software prefetch requests  OCR.PF_L1D_AND_SW.L3_HIT_M.HITM_OTHER_CORE",
+        "BriefDescription": "Counts all demand data writes (RFOs)",
         "Counter": "0,1,2,3",
         "CounterHTOff": "0,1,2,3",
         "EventCode": "0xB7, 0xBB",
-        "EventName": "OCR.PF_L1D_AND_SW.L3_HIT_M.HITM_OTHER_CORE",
+        "EventName": "OCR.DEMAND_RFO.SUPPLIER_NONE.SNOOP_NONE",
         "MSRIndex": "0x1a6,0x1a7",
-        "MSRValue": "0x1000040400",
+        "MSRValue": "0x0080020002",
         "Offcore": "1",
         "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
         "SampleAfterValue": "100003",
         "UMask": "0x1"
     },
     {
-        "BriefDescription": "Counts prefetch (that bring data to L2) data reads  OCR.PF_L2_DATA_RD.L3_HIT_E.HIT_OTHER_CORE_FWD",
+        "BriefDescription": "Counts any other requests have any response type.",
         "Counter": "0,1,2,3",
         "CounterHTOff": "0,1,2,3",
         "EventCode": "0xB7, 0xBB",
-        "EventName": "OCR.PF_L2_DATA_RD.L3_HIT_E.HIT_OTHER_CORE_FWD",
+        "EventName": "OCR.OTHER.ANY_RESPONSE",
         "MSRIndex": "0x1a6,0x1a7",
-        "MSRValue": "0x0800080010",
+        "MSRValue": "0x0000018000",
         "Offcore": "1",
         "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
         "SampleAfterValue": "100003",
         "UMask": "0x1"
     },
     {
-        "BriefDescription": "Counts all prefetch (that bring data to LLC only) RFOs OCR.PF_L3_RFO.PMM_HIT_LOCAL_PMM.SNOOP_NONE",
+        "BriefDescription": "Counts any other requests OCR.OTHER.L3_HIT.ANY_SNOOP OCR.OTHER.L3_HIT.ANY_SNOOP",
         "Counter": "0,1,2,3",
         "CounterHTOff": "0,1,2,3",
         "EventCode": "0xB7, 0xBB",
-        "EventName": "OCR.PF_L3_RFO.PMM_HIT_LOCAL_PMM.SNOOP_NONE",
+        "EventName": "OCR.OTHER.L3_HIT.ANY_SNOOP",
         "MSRIndex": "0x1a6,0x1a7",
-        "MSRValue": "0x0080400100",
+        "MSRValue": "0x3F803C8000",
         "Offcore": "1",
         "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
         "SampleAfterValue": "100003",
         "UMask": "0x1"
     },
     {
-        "BriefDescription": "Counts all prefetch (that bring data to LLC only) data reads  OCR.PF_L3_DATA_RD.L3_HIT_S.HIT_OTHER_CORE_FWD",
+        "BriefDescription": "Counts any other requests OCR.OTHER.L3_HIT.HITM_OTHER_CORE OCR.OTHER.L3_HIT.HITM_OTHER_CORE",
         "Counter": "0,1,2,3",
         "CounterHTOff": "0,1,2,3",
         "EventCode": "0xB7, 0xBB",
-        "EventName": "OCR.PF_L3_DATA_RD.L3_HIT_S.HIT_OTHER_CORE_FWD",
+        "EventName": "OCR.OTHER.L3_HIT.HITM_OTHER_CORE",
         "MSRIndex": "0x1a6,0x1a7",
-        "MSRValue": "0x0800100080",
+        "MSRValue": "0x10003C8000",
         "Offcore": "1",
         "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
         "SampleAfterValue": "100003",
         "UMask": "0x1"
     },
     {
-        "BriefDescription": "Counts all prefetch (that bring data to LLC only) RFOs",
+        "BriefDescription": "Counts any other requests OCR.OTHER.L3_HIT.HIT_OTHER_CORE_FWD OCR.OTHER.L3_HIT.HIT_OTHER_CORE_FWD",
         "Counter": "0,1,2,3",
         "CounterHTOff": "0,1,2,3",
         "EventCode": "0xB7, 0xBB",
-        "EventName": "OCR.PF_L3_RFO.L3_HIT_E.SNOOP_NONE",
+        "EventName": "OCR.OTHER.L3_HIT.HIT_OTHER_CORE_FWD",
         "MSRIndex": "0x1a6,0x1a7",
-        "MSRValue": "0x0080080100",
+        "MSRValue": "0x08003C8000",
         "Offcore": "1",
         "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
         "SampleAfterValue": "100003",
         "UMask": "0x1"
     },
     {
-        "BriefDescription": "Counts prefetch (that bring data to L2) data reads",
+        "BriefDescription": "Counts any other requests OCR.OTHER.L3_HIT.HIT_OTHER_CORE_NO_FWD OCR.OTHER.L3_HIT.HIT_OTHER_CORE_NO_FWD",
         "Counter": "0,1,2,3",
         "CounterHTOff": "0,1,2,3",
         "EventCode": "0xB7, 0xBB",
-        "EventName": "OCR.PF_L2_DATA_RD.L3_HIT_E.SNOOP_MISS",
+        "EventName": "OCR.OTHER.L3_HIT.HIT_OTHER_CORE_NO_FWD",
         "MSRIndex": "0x1a6,0x1a7",
-        "MSRValue": "0x0200080010",
+        "MSRValue": "0x04003C8000",
         "Offcore": "1",
         "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
         "SampleAfterValue": "100003",
         "UMask": "0x1"
     },
     {
-        "BriefDescription": "OCR.ALL_PF_RFO.PMM_HIT_LOCAL_PMM.ANY_SNOOP OCR.ALL_PF_RFO.PMM_HIT_LOCAL_PMM.ANY_SNOOP",
+        "BriefDescription": "Counts any other requests OCR.OTHER.L3_HIT.NO_SNOOP_NEEDED OCR.OTHER.L3_HIT.NO_SNOOP_NEEDED",
         "Counter": "0,1,2,3",
         "CounterHTOff": "0,1,2,3",
         "EventCode": "0xB7, 0xBB",
-        "EventName": "OCR.ALL_PF_RFO.PMM_HIT_LOCAL_PMM.ANY_SNOOP",
+        "EventName": "OCR.OTHER.L3_HIT.NO_SNOOP_NEEDED",
         "MSRIndex": "0x1a6,0x1a7",
-        "MSRValue": "0x3F80400120",
+        "MSRValue": "0x01003C8000",
         "Offcore": "1",
         "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
         "SampleAfterValue": "100003",
         "UMask": "0x1"
     },
     {
-        "BriefDescription": "Counts L1 data cache hardware prefetch requests and software prefetch requests",
+        "BriefDescription": "Counts any other requests",
         "Counter": "0,1,2,3",
         "CounterHTOff": "0,1,2,3",
         "EventCode": "0xB7, 0xBB",
-        "EventName": "OCR.PF_L1D_AND_SW.L3_HIT_E.SNOOP_NONE",
+        "EventName": "OCR.OTHER.L3_HIT.SNOOP_HIT_WITH_FWD",
         "MSRIndex": "0x1a6,0x1a7",
-        "MSRValue": "0x0080080400",
+        "MSRValue": "0x08007C8000",
         "Offcore": "1",
         "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
         "SampleAfterValue": "100003",
         "UMask": "0x1"
     },
     {
-        "BriefDescription": "OCR.ALL_PF_DATA_RD.L3_HIT.SNOOP_MISS OCR.ALL_PF_DATA_RD.L3_HIT.SNOOP_MISS",
+        "BriefDescription": "Counts any other requests OCR.OTHER.L3_HIT.SNOOP_MISS",
         "Counter": "0,1,2,3",
         "CounterHTOff": "0,1,2,3",
         "EventCode": "0xB7, 0xBB",
-        "EventName": "OCR.ALL_PF_DATA_RD.L3_HIT.SNOOP_MISS",
+        "EventName": "OCR.OTHER.L3_HIT.SNOOP_MISS",
         "MSRIndex": "0x1a6,0x1a7",
-        "MSRValue": "0x02003C0490",
+        "MSRValue": "0x02003C8000",
         "Offcore": "1",
         "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
         "SampleAfterValue": "100003",
@@ -5067,1111 +5075,1092 @@
         "UMask": "0x1"
     },
     {
-        "BriefDescription": "Counts any other requests  OCR.OTHER.SUPPLIER_NONE.HIT_OTHER_CORE_FWD",
+        "BriefDescription": "Counts any other requests  OCR.OTHER.L3_HIT_E.ANY_SNOOP",
         "Counter": "0,1,2,3",
         "CounterHTOff": "0,1,2,3",
         "EventCode": "0xB7, 0xBB",
-        "EventName": "OCR.OTHER.SUPPLIER_NONE.HIT_OTHER_CORE_FWD",
+        "EventName": "OCR.OTHER.L3_HIT_E.ANY_SNOOP",
         "MSRIndex": "0x1a6,0x1a7",
-        "MSRValue": "0x0800028000",
+        "MSRValue": "0x3F80088000",
         "Offcore": "1",
         "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
         "SampleAfterValue": "100003",
         "UMask": "0x1"
     },
     {
-        "BriefDescription": "OCR.ALL_DATA_RD.L3_HIT_E.SNOOP_NONE",
+        "BriefDescription": "Counts any other requests  OCR.OTHER.L3_HIT_E.HITM_OTHER_CORE",
         "Counter": "0,1,2,3",
         "CounterHTOff": "0,1,2,3",
         "EventCode": "0xB7, 0xBB",
-        "EventName": "OCR.ALL_DATA_RD.L3_HIT_E.SNOOP_NONE",
+        "EventName": "OCR.OTHER.L3_HIT_E.HITM_OTHER_CORE",
         "MSRIndex": "0x1a6,0x1a7",
-        "MSRValue": "0x0080080491",
+        "MSRValue": "0x1000088000",
         "Offcore": "1",
         "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
         "SampleAfterValue": "100003",
         "UMask": "0x1"
     },
     {
-        "BriefDescription": "Counts all demand data writes (RFOs)  OCR.DEMAND_RFO.L3_HIT_S.HITM_OTHER_CORE",
+        "BriefDescription": "Counts any other requests  OCR.OTHER.L3_HIT_E.HIT_OTHER_CORE_FWD",
         "Counter": "0,1,2,3",
         "CounterHTOff": "0,1,2,3",
         "EventCode": "0xB7, 0xBB",
-        "EventName": "OCR.DEMAND_RFO.L3_HIT_S.HITM_OTHER_CORE",
+        "EventName": "OCR.OTHER.L3_HIT_E.HIT_OTHER_CORE_FWD",
         "MSRIndex": "0x1a6,0x1a7",
-        "MSRValue": "0x1000100002",
+        "MSRValue": "0x0800088000",
         "Offcore": "1",
         "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
         "SampleAfterValue": "100003",
         "UMask": "0x1"
     },
     {
-        "BriefDescription": "OCR.ALL_READS.L3_HIT_S.HITM_OTHER_CORE  OCR.ALL_READS.L3_HIT_S.HITM_OTHER_CORE",
+        "BriefDescription": "Counts any other requests  OCR.OTHER.L3_HIT_E.HIT_OTHER_CORE_NO_FWD",
         "Counter": "0,1,2,3",
         "CounterHTOff": "0,1,2,3",
         "EventCode": "0xB7, 0xBB",
-        "EventName": "OCR.ALL_READS.L3_HIT_S.HITM_OTHER_CORE",
+        "EventName": "OCR.OTHER.L3_HIT_E.HIT_OTHER_CORE_NO_FWD",
         "MSRIndex": "0x1a6,0x1a7",
-        "MSRValue": "0x10001007F7",
+        "MSRValue": "0x0400088000",
         "Offcore": "1",
         "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
         "SampleAfterValue": "100003",
         "UMask": "0x1"
     },
     {
-        "BriefDescription": "OCR.ALL_READS.L3_HIT_F.ANY_SNOOP  OCR.ALL_READS.L3_HIT_F.ANY_SNOOP",
+        "BriefDescription": "Counts any other requests  OCR.OTHER.L3_HIT_E.NO_SNOOP_NEEDED",
         "Counter": "0,1,2,3",
         "CounterHTOff": "0,1,2,3",
         "EventCode": "0xB7, 0xBB",
-        "EventName": "OCR.ALL_READS.L3_HIT_F.ANY_SNOOP",
+        "EventName": "OCR.OTHER.L3_HIT_E.NO_SNOOP_NEEDED",
         "MSRIndex": "0x1a6,0x1a7",
-        "MSRValue": "0x3F802007F7",
+        "MSRValue": "0x0100088000",
         "Offcore": "1",
         "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
         "SampleAfterValue": "100003",
         "UMask": "0x1"
     },
     {
-        "BriefDescription": "OCR.ALL_READS.L3_HIT_M.HIT_OTHER_CORE_NO_FWD  OCR.ALL_READS.L3_HIT_M.HIT_OTHER_CORE_NO_FWD",
+        "BriefDescription": "Counts any other requests",
         "Counter": "0,1,2,3",
         "CounterHTOff": "0,1,2,3",
         "EventCode": "0xB7, 0xBB",
-        "EventName": "OCR.ALL_READS.L3_HIT_M.HIT_OTHER_CORE_NO_FWD",
+        "EventName": "OCR.OTHER.L3_HIT_E.SNOOP_MISS",
         "MSRIndex": "0x1a6,0x1a7",
-        "MSRValue": "0x04000407F7",
+        "MSRValue": "0x0200088000",
         "Offcore": "1",
         "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
         "SampleAfterValue": "100003",
         "UMask": "0x1"
     },
     {
-        "BriefDescription": "OCR.ALL_READS.L3_HIT_S.SNOOP_NONE",
+        "BriefDescription": "Counts any other requests",
         "Counter": "0,1,2,3",
         "CounterHTOff": "0,1,2,3",
         "EventCode": "0xB7, 0xBB",
-        "EventName": "OCR.ALL_READS.L3_HIT_S.SNOOP_NONE",
+        "EventName": "OCR.OTHER.L3_HIT_E.SNOOP_NONE",
         "MSRIndex": "0x1a6,0x1a7",
-        "MSRValue": "0x00801007F7",
+        "MSRValue": "0x0080088000",
         "Offcore": "1",
         "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
         "SampleAfterValue": "100003",
         "UMask": "0x1"
     },
     {
-        "BriefDescription": "Counts all prefetch (that bring data to LLC only) data reads OCR.PF_L3_DATA_RD.L3_HIT.SNOOP_MISS",
+        "BriefDescription": "Counts any other requests  OCR.OTHER.L3_HIT_F.ANY_SNOOP",
         "Counter": "0,1,2,3",
         "CounterHTOff": "0,1,2,3",
         "EventCode": "0xB7, 0xBB",
-        "EventName": "OCR.PF_L3_DATA_RD.L3_HIT.SNOOP_MISS",
+        "EventName": "OCR.OTHER.L3_HIT_F.ANY_SNOOP",
         "MSRIndex": "0x1a6,0x1a7",
-        "MSRValue": "0x02003C0080",
+        "MSRValue": "0x3F80208000",
         "Offcore": "1",
         "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
         "SampleAfterValue": "100003",
         "UMask": "0x1"
     },
     {
-        "BriefDescription": "OCR.ALL_PF_DATA_RD.L3_HIT_E.SNOOP_NONE",
+        "BriefDescription": "Counts any other requests  OCR.OTHER.L3_HIT_F.HITM_OTHER_CORE",
         "Counter": "0,1,2,3",
         "CounterHTOff": "0,1,2,3",
         "EventCode": "0xB7, 0xBB",
-        "EventName": "OCR.ALL_PF_DATA_RD.L3_HIT_E.SNOOP_NONE",
+        "EventName": "OCR.OTHER.L3_HIT_F.HITM_OTHER_CORE",
         "MSRIndex": "0x1a6,0x1a7",
-        "MSRValue": "0x0080080490",
+        "MSRValue": "0x1000208000",
         "Offcore": "1",
         "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
         "SampleAfterValue": "100003",
         "UMask": "0x1"
     },
     {
-        "BriefDescription": "Counts L1 data cache hardware prefetch requests and software prefetch requests  OCR.PF_L1D_AND_SW.SUPPLIER_NONE.HIT_OTHER_CORE_NO_FWD",
+        "BriefDescription": "Counts any other requests  OCR.OTHER.L3_HIT_F.HIT_OTHER_CORE_FWD",
         "Counter": "0,1,2,3",
         "CounterHTOff": "0,1,2,3",
         "EventCode": "0xB7, 0xBB",
-        "EventName": "OCR.PF_L1D_AND_SW.SUPPLIER_NONE.HIT_OTHER_CORE_NO_FWD",
+        "EventName": "OCR.OTHER.L3_HIT_F.HIT_OTHER_CORE_FWD",
         "MSRIndex": "0x1a6,0x1a7",
-        "MSRValue": "0x0400020400",
+        "MSRValue": "0x0800208000",
         "Offcore": "1",
         "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
         "SampleAfterValue": "100003",
         "UMask": "0x1"
     },
     {
-        "BriefDescription": "Counts L1 data cache hardware prefetch requests and software prefetch requests  OCR.PF_L1D_AND_SW.L3_HIT_S.HIT_OTHER_CORE_FWD",
+        "BriefDescription": "Counts any other requests  OCR.OTHER.L3_HIT_F.HIT_OTHER_CORE_NO_FWD",
         "Counter": "0,1,2,3",
         "CounterHTOff": "0,1,2,3",
         "EventCode": "0xB7, 0xBB",
-        "EventName": "OCR.PF_L1D_AND_SW.L3_HIT_S.HIT_OTHER_CORE_FWD",
+        "EventName": "OCR.OTHER.L3_HIT_F.HIT_OTHER_CORE_NO_FWD",
         "MSRIndex": "0x1a6,0x1a7",
-        "MSRValue": "0x0800100400",
+        "MSRValue": "0x0400208000",
         "Offcore": "1",
         "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
         "SampleAfterValue": "100003",
         "UMask": "0x1"
     },
     {
-        "BriefDescription": "OCR.ALL_READS.L3_HIT_E.HIT_OTHER_CORE_NO_FWD  OCR.ALL_READS.L3_HIT_E.HIT_OTHER_CORE_NO_FWD",
+        "BriefDescription": "Counts any other requests  OCR.OTHER.L3_HIT_F.NO_SNOOP_NEEDED",
         "Counter": "0,1,2,3",
         "CounterHTOff": "0,1,2,3",
         "EventCode": "0xB7, 0xBB",
-        "EventName": "OCR.ALL_READS.L3_HIT_E.HIT_OTHER_CORE_NO_FWD",
+        "EventName": "OCR.OTHER.L3_HIT_F.NO_SNOOP_NEEDED",
         "MSRIndex": "0x1a6,0x1a7",
-        "MSRValue": "0x04000807F7",
+        "MSRValue": "0x0100208000",
         "Offcore": "1",
         "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
         "SampleAfterValue": "100003",
         "UMask": "0x1"
     },
     {
-        "BriefDescription": "Counts all demand code reads",
+        "BriefDescription": "Counts any other requests",
         "Counter": "0,1,2,3",
         "CounterHTOff": "0,1,2,3",
         "EventCode": "0xB7, 0xBB",
-        "EventName": "OCR.DEMAND_CODE_RD.L3_HIT_S.SNOOP_MISS",
+        "EventName": "OCR.OTHER.L3_HIT_F.SNOOP_MISS",
         "MSRIndex": "0x1a6,0x1a7",
-        "MSRValue": "0x0200100004",
+        "MSRValue": "0x0200208000",
         "Offcore": "1",
         "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
         "SampleAfterValue": "100003",
         "UMask": "0x1"
     },
     {
-        "BriefDescription": "OCR.ALL_DATA_RD.L3_HIT_S.SNOOP_NONE",
+        "BriefDescription": "Counts any other requests",
         "Counter": "0,1,2,3",
         "CounterHTOff": "0,1,2,3",
         "EventCode": "0xB7, 0xBB",
-        "EventName": "OCR.ALL_DATA_RD.L3_HIT_S.SNOOP_NONE",
+        "EventName": "OCR.OTHER.L3_HIT_F.SNOOP_NONE",
         "MSRIndex": "0x1a6,0x1a7",
-        "MSRValue": "0x0080100491",
+        "MSRValue": "0x0080208000",
         "Offcore": "1",
         "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
         "SampleAfterValue": "100003",
         "UMask": "0x1"
     },
     {
-        "BriefDescription": "Counts all demand data writes (RFOs)  OCR.DEMAND_RFO.SUPPLIER_NONE.ANY_SNOOP",
+        "BriefDescription": "Counts any other requests  OCR.OTHER.L3_HIT_M.ANY_SNOOP",
         "Counter": "0,1,2,3",
         "CounterHTOff": "0,1,2,3",
         "EventCode": "0xB7, 0xBB",
-        "EventName": "OCR.DEMAND_RFO.SUPPLIER_NONE.ANY_SNOOP",
+        "EventName": "OCR.OTHER.L3_HIT_M.ANY_SNOOP",
         "MSRIndex": "0x1a6,0x1a7",
-        "MSRValue": "0x3F80020002",
+        "MSRValue": "0x3F80048000",
         "Offcore": "1",
         "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
         "SampleAfterValue": "100003",
         "UMask": "0x1"
     },
     {
-        "BriefDescription": "Counts all prefetch (that bring data to LLC only) data reads OCR.PF_L3_DATA_RD.L3_HIT.ANY_SNOOP OCR.PF_L3_DATA_RD.L3_HIT.ANY_SNOOP",
+        "BriefDescription": "Counts any other requests  OCR.OTHER.L3_HIT_M.HITM_OTHER_CORE",
         "Counter": "0,1,2,3",
         "CounterHTOff": "0,1,2,3",
         "EventCode": "0xB7, 0xBB",
-        "EventName": "OCR.PF_L3_DATA_RD.L3_HIT.ANY_SNOOP",
+        "EventName": "OCR.OTHER.L3_HIT_M.HITM_OTHER_CORE",
         "MSRIndex": "0x1a6,0x1a7",
-        "MSRValue": "0x3F803C0080",
+        "MSRValue": "0x1000048000",
         "Offcore": "1",
         "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
         "SampleAfterValue": "100003",
         "UMask": "0x1"
     },
     {
-        "BriefDescription": "OCR.ALL_RFO.L3_HIT.SNOOP_NONE OCR.ALL_RFO.L3_HIT.SNOOP_NONE",
+        "BriefDescription": "Counts any other requests  OCR.OTHER.L3_HIT_M.HIT_OTHER_CORE_FWD",
         "Counter": "0,1,2,3",
         "CounterHTOff": "0,1,2,3",
         "EventCode": "0xB7, 0xBB",
-        "EventName": "OCR.ALL_RFO.L3_HIT.SNOOP_NONE",
+        "EventName": "OCR.OTHER.L3_HIT_M.HIT_OTHER_CORE_FWD",
         "MSRIndex": "0x1a6,0x1a7",
-        "MSRValue": "0x00803C0122",
+        "MSRValue": "0x0800048000",
         "Offcore": "1",
         "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
         "SampleAfterValue": "100003",
         "UMask": "0x1"
     },
     {
-        "BriefDescription": "OCR.ALL_PF_DATA_RD.L3_HIT_M.SNOOP_NONE",
+        "BriefDescription": "Counts any other requests  OCR.OTHER.L3_HIT_M.HIT_OTHER_CORE_NO_FWD",
         "Counter": "0,1,2,3",
         "CounterHTOff": "0,1,2,3",
         "EventCode": "0xB7, 0xBB",
-        "EventName": "OCR.ALL_PF_DATA_RD.L3_HIT_M.SNOOP_NONE",
+        "EventName": "OCR.OTHER.L3_HIT_M.HIT_OTHER_CORE_NO_FWD",
         "MSRIndex": "0x1a6,0x1a7",
-        "MSRValue": "0x0080040490",
+        "MSRValue": "0x0400048000",
         "Offcore": "1",
         "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
         "SampleAfterValue": "100003",
         "UMask": "0x1"
     },
     {
-        "BriefDescription": "Counts all demand data writes (RFOs) OCR.DEMAND_RFO.L3_HIT.HIT_OTHER_CORE_NO_FWD OCR.DEMAND_RFO.L3_HIT.HIT_OTHER_CORE_NO_FWD",
+        "BriefDescription": "Counts any other requests  OCR.OTHER.L3_HIT_M.NO_SNOOP_NEEDED",
         "Counter": "0,1,2,3",
         "CounterHTOff": "0,1,2,3",
         "EventCode": "0xB7, 0xBB",
-        "EventName": "OCR.DEMAND_RFO.L3_HIT.HIT_OTHER_CORE_NO_FWD",
+        "EventName": "OCR.OTHER.L3_HIT_M.NO_SNOOP_NEEDED",
         "MSRIndex": "0x1a6,0x1a7",
-        "MSRValue": "0x04003C0002",
+        "MSRValue": "0x0100048000",
         "Offcore": "1",
         "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
         "SampleAfterValue": "100003",
         "UMask": "0x1"
     },
     {
-        "BriefDescription": "Counts all prefetch (that bring data to LLC only) RFOs  OCR.PF_L3_RFO.SUPPLIER_NONE.HIT_OTHER_CORE_NO_FWD",
+        "BriefDescription": "Counts any other requests",
         "Counter": "0,1,2,3",
         "CounterHTOff": "0,1,2,3",
         "EventCode": "0xB7, 0xBB",
-        "EventName": "OCR.PF_L3_RFO.SUPPLIER_NONE.HIT_OTHER_CORE_NO_FWD",
+        "EventName": "OCR.OTHER.L3_HIT_M.SNOOP_MISS",
         "MSRIndex": "0x1a6,0x1a7",
-        "MSRValue": "0x0400020100",
+        "MSRValue": "0x0200048000",
         "Offcore": "1",
         "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
         "SampleAfterValue": "100003",
         "UMask": "0x1"
     },
     {
-        "BriefDescription": "OCR.ALL_PF_RFO.L3_HIT_M.HIT_OTHER_CORE_NO_FWD  OCR.ALL_PF_RFO.L3_HIT_M.HIT_OTHER_CORE_NO_FWD",
+        "BriefDescription": "Counts any other requests",
         "Counter": "0,1,2,3",
         "CounterHTOff": "0,1,2,3",
         "EventCode": "0xB7, 0xBB",
-        "EventName": "OCR.ALL_PF_RFO.L3_HIT_M.HIT_OTHER_CORE_NO_FWD",
+        "EventName": "OCR.OTHER.L3_HIT_M.SNOOP_NONE",
         "MSRIndex": "0x1a6,0x1a7",
-        "MSRValue": "0x0400040120",
+        "MSRValue": "0x0080048000",
         "Offcore": "1",
         "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
         "SampleAfterValue": "100003",
         "UMask": "0x1"
     },
     {
-        "BriefDescription": "OCR.ALL_READS.L3_HIT_E.SNOOP_NONE",
+        "BriefDescription": "Counts any other requests  OCR.OTHER.L3_HIT_S.ANY_SNOOP",
         "Counter": "0,1,2,3",
         "CounterHTOff": "0,1,2,3",
         "EventCode": "0xB7, 0xBB",
-        "EventName": "OCR.ALL_READS.L3_HIT_E.SNOOP_NONE",
+        "EventName": "OCR.OTHER.L3_HIT_S.ANY_SNOOP",
         "MSRIndex": "0x1a6,0x1a7",
-        "MSRValue": "0x00800807F7",
+        "MSRValue": "0x3F80108000",
         "Offcore": "1",
         "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
         "SampleAfterValue": "100003",
         "UMask": "0x1"
     },
     {
-        "BriefDescription": "Counts demand data reads  OCR.DEMAND_DATA_RD.SUPPLIER_NONE.HITM_OTHER_CORE",
+        "BriefDescription": "Counts any other requests  OCR.OTHER.L3_HIT_S.HITM_OTHER_CORE",
         "Counter": "0,1,2,3",
         "CounterHTOff": "0,1,2,3",
         "EventCode": "0xB7, 0xBB",
-        "EventName": "OCR.DEMAND_DATA_RD.SUPPLIER_NONE.HITM_OTHER_CORE",
+        "EventName": "OCR.OTHER.L3_HIT_S.HITM_OTHER_CORE",
         "MSRIndex": "0x1a6,0x1a7",
-        "MSRValue": "0x1000020001",
+        "MSRValue": "0x1000108000",
         "Offcore": "1",
         "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
         "SampleAfterValue": "100003",
         "UMask": "0x1"
     },
     {
-        "BriefDescription": "OCR.ALL_READS.SUPPLIER_NONE.SNOOP_MISS",
+        "BriefDescription": "Counts any other requests  OCR.OTHER.L3_HIT_S.HIT_OTHER_CORE_FWD",
         "Counter": "0,1,2,3",
         "CounterHTOff": "0,1,2,3",
         "EventCode": "0xB7, 0xBB",
-        "EventName": "OCR.ALL_READS.SUPPLIER_NONE.SNOOP_MISS",
+        "EventName": "OCR.OTHER.L3_HIT_S.HIT_OTHER_CORE_FWD",
         "MSRIndex": "0x1a6,0x1a7",
-        "MSRValue": "0x02000207F7",
+        "MSRValue": "0x0800108000",
         "Offcore": "1",
         "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
         "SampleAfterValue": "100003",
         "UMask": "0x1"
     },
     {
-        "BriefDescription": "Counts all prefetch (that bring data to LLC only) data reads  OCR.PF_L3_DATA_RD.L3_HIT_F.HITM_OTHER_CORE",
+        "BriefDescription": "Counts any other requests  OCR.OTHER.L3_HIT_S.HIT_OTHER_CORE_NO_FWD",
         "Counter": "0,1,2,3",
         "CounterHTOff": "0,1,2,3",
         "EventCode": "0xB7, 0xBB",
-        "EventName": "OCR.PF_L3_DATA_RD.L3_HIT_F.HITM_OTHER_CORE",
+        "EventName": "OCR.OTHER.L3_HIT_S.HIT_OTHER_CORE_NO_FWD",
         "MSRIndex": "0x1a6,0x1a7",
-        "MSRValue": "0x1000200080",
+        "MSRValue": "0x0400108000",
         "Offcore": "1",
         "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
         "SampleAfterValue": "100003",
         "UMask": "0x1"
     },
     {
-        "BriefDescription": "Number of PREFETCHT0 instructions executed.",
-        "Counter": "0,1,2,3",
-        "CounterHTOff": "0,1,2,3,4,5,6,7",
-        "EventCode": "0x32",
-        "EventName": "SW_PREFETCH_ACCESS.T0",
-        "SampleAfterValue": "2000003",
-        "UMask": "0x2"
-    },
-    {
-        "BriefDescription": "OCR.ALL_RFO.L3_HIT.HIT_OTHER_CORE_FWD OCR.ALL_RFO.L3_HIT.HIT_OTHER_CORE_FWD OCR.ALL_RFO.L3_HIT.HIT_OTHER_CORE_FWD",
+        "BriefDescription": "Counts any other requests  OCR.OTHER.L3_HIT_S.NO_SNOOP_NEEDED",
         "Counter": "0,1,2,3",
         "CounterHTOff": "0,1,2,3",
         "EventCode": "0xB7, 0xBB",
-        "EventName": "OCR.ALL_RFO.L3_HIT.HIT_OTHER_CORE_FWD",
+        "EventName": "OCR.OTHER.L3_HIT_S.NO_SNOOP_NEEDED",
         "MSRIndex": "0x1a6,0x1a7",
-        "MSRValue": "0x08003C0122",
+        "MSRValue": "0x0100108000",
         "Offcore": "1",
         "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
         "SampleAfterValue": "100003",
         "UMask": "0x1"
     },
     {
-        "BriefDescription": "Counts demand data reads  OCR.DEMAND_DATA_RD.L3_HIT_F.HIT_OTHER_CORE_FWD",
+        "BriefDescription": "Counts any other requests",
         "Counter": "0,1,2,3",
         "CounterHTOff": "0,1,2,3",
         "EventCode": "0xB7, 0xBB",
-        "EventName": "OCR.DEMAND_DATA_RD.L3_HIT_F.HIT_OTHER_CORE_FWD",
+        "EventName": "OCR.OTHER.L3_HIT_S.SNOOP_MISS",
         "MSRIndex": "0x1a6,0x1a7",
-        "MSRValue": "0x0800200001",
+        "MSRValue": "0x0200108000",
         "Offcore": "1",
         "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
         "SampleAfterValue": "100003",
         "UMask": "0x1"
     },
     {
-        "BriefDescription": "OCR.ALL_DATA_RD.L3_HIT_S.HIT_OTHER_CORE_NO_FWD  OCR.ALL_DATA_RD.L3_HIT_S.HIT_OTHER_CORE_NO_FWD",
+        "BriefDescription": "Counts any other requests",
         "Counter": "0,1,2,3",
         "CounterHTOff": "0,1,2,3",
         "EventCode": "0xB7, 0xBB",
-        "EventName": "OCR.ALL_DATA_RD.L3_HIT_S.HIT_OTHER_CORE_NO_FWD",
+        "EventName": "OCR.OTHER.L3_HIT_S.SNOOP_NONE",
         "MSRIndex": "0x1a6,0x1a7",
-        "MSRValue": "0x0400100491",
+        "MSRValue": "0x0080108000",
         "Offcore": "1",
         "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
         "SampleAfterValue": "100003",
         "UMask": "0x1"
     },
     {
-        "BriefDescription": "OCR.ALL_READS.L3_HIT_M.SNOOP_NONE",
+        "BriefDescription": "Counts any other requests OCR.OTHER.PMM_HIT_LOCAL_PMM.ANY_SNOOP",
         "Counter": "0,1,2,3",
         "CounterHTOff": "0,1,2,3",
         "EventCode": "0xB7, 0xBB",
-        "EventName": "OCR.ALL_READS.L3_HIT_M.SNOOP_NONE",
+        "EventName": "OCR.OTHER.PMM_HIT_LOCAL_PMM.ANY_SNOOP",
         "MSRIndex": "0x1a6,0x1a7",
-        "MSRValue": "0x00800407F7",
+        "MSRValue": "0x3F80408000",
         "Offcore": "1",
         "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
         "SampleAfterValue": "100003",
         "UMask": "0x1"
     },
     {
-        "BriefDescription": "Counts all prefetch (that bring data to L2) RFOs",
+        "BriefDescription": "Counts any other requests OCR.OTHER.PMM_HIT_LOCAL_PMM.SNOOP_NONE",
         "Counter": "0,1,2,3",
         "CounterHTOff": "0,1,2,3",
         "EventCode": "0xB7, 0xBB",
-        "EventName": "OCR.PF_L2_RFO.L3_HIT_S.SNOOP_MISS",
+        "EventName": "OCR.OTHER.PMM_HIT_LOCAL_PMM.SNOOP_NONE",
         "MSRIndex": "0x1a6,0x1a7",
-        "MSRValue": "0x0200100020",
+        "MSRValue": "0x0080408000",
         "Offcore": "1",
         "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
         "SampleAfterValue": "100003",
         "UMask": "0x1"
     },
     {
-        "BriefDescription": "Core cycles where the core was running in a manner where Turbo may be clipped to the AVX512 turbo schedule.",
-        "Counter": "0,1,2,3",
-        "CounterHTOff": "0,1,2,3,4,5,6,7",
-        "EventCode": "0x28",
-        "EventName": "CORE_POWER.LVL2_TURBO_LICENSE",
-        "PublicDescription": "Core cycles where the core was running with power-delivery for license level 2 (introduced in Skylake Server michroarchtecture).  This includes high current AVX 512-bit instructions.",
-        "SampleAfterValue": "200003",
-        "UMask": "0x20"
-    },
-    {
-        "BriefDescription": "Counts L1 data cache hardware prefetch requests and software prefetch requests OCR.PF_L1D_AND_SW.L3_HIT.HITM_OTHER_CORE OCR.PF_L1D_AND_SW.L3_HIT.HITM_OTHER_CORE",
+        "BriefDescription": "Counts any other requests OCR.OTHER.PMM_HIT_LOCAL_PMM.SNOOP_NOT_NEEDED",
         "Counter": "0,1,2,3",
         "CounterHTOff": "0,1,2,3",
         "EventCode": "0xB7, 0xBB",
-        "EventName": "OCR.PF_L1D_AND_SW.L3_HIT.HITM_OTHER_CORE",
+        "EventName": "OCR.OTHER.PMM_HIT_LOCAL_PMM.SNOOP_NOT_NEEDED",
         "MSRIndex": "0x1a6,0x1a7",
-        "MSRValue": "0x10003C0400",
+        "MSRValue": "0x0100408000",
         "Offcore": "1",
         "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
         "SampleAfterValue": "100003",
         "UMask": "0x1"
     },
     {
-        "BriefDescription": "Counts all prefetch (that bring data to L2) RFOs OCR.PF_L2_RFO.L3_HIT.NO_SNOOP_NEEDED OCR.PF_L2_RFO.L3_HIT.NO_SNOOP_NEEDED",
+        "BriefDescription": "Counts any other requests  OCR.OTHER.SUPPLIER_NONE.ANY_SNOOP",
         "Counter": "0,1,2,3",
         "CounterHTOff": "0,1,2,3",
         "EventCode": "0xB7, 0xBB",
-        "EventName": "OCR.PF_L2_RFO.L3_HIT.NO_SNOOP_NEEDED",
+        "EventName": "OCR.OTHER.SUPPLIER_NONE.ANY_SNOOP",
         "MSRIndex": "0x1a6,0x1a7",
-        "MSRValue": "0x01003C0020",
+        "MSRValue": "0x3F80028000",
         "Offcore": "1",
         "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
         "SampleAfterValue": "100003",
         "UMask": "0x1"
     },
     {
-        "BriefDescription": "Counts L1 data cache hardware prefetch requests and software prefetch requests  OCR.PF_L1D_AND_SW.L3_HIT_E.HITM_OTHER_CORE",
+        "BriefDescription": "Counts any other requests  OCR.OTHER.SUPPLIER_NONE.HITM_OTHER_CORE",
         "Counter": "0,1,2,3",
         "CounterHTOff": "0,1,2,3",
         "EventCode": "0xB7, 0xBB",
-        "EventName": "OCR.PF_L1D_AND_SW.L3_HIT_E.HITM_OTHER_CORE",
+        "EventName": "OCR.OTHER.SUPPLIER_NONE.HITM_OTHER_CORE",
         "MSRIndex": "0x1a6,0x1a7",
-        "MSRValue": "0x1000080400",
+        "MSRValue": "0x1000028000",
         "Offcore": "1",
         "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
         "SampleAfterValue": "100003",
         "UMask": "0x1"
     },
     {
-        "BriefDescription": "OCR.ALL_PF_RFO.L3_HIT_F.HIT_OTHER_CORE_NO_FWD  OCR.ALL_PF_RFO.L3_HIT_F.HIT_OTHER_CORE_NO_FWD",
+        "BriefDescription": "Counts any other requests  OCR.OTHER.SUPPLIER_NONE.HIT_OTHER_CORE_FWD",
         "Counter": "0,1,2,3",
         "CounterHTOff": "0,1,2,3",
         "EventCode": "0xB7, 0xBB",
-        "EventName": "OCR.ALL_PF_RFO.L3_HIT_F.HIT_OTHER_CORE_NO_FWD",
+        "EventName": "OCR.OTHER.SUPPLIER_NONE.HIT_OTHER_CORE_FWD",
         "MSRIndex": "0x1a6,0x1a7",
-        "MSRValue": "0x0400200120",
+        "MSRValue": "0x0800028000",
         "Offcore": "1",
         "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
         "SampleAfterValue": "100003",
         "UMask": "0x1"
     },
     {
-        "BriefDescription": "Counts all prefetch (that bring data to LLC only) data reads OCR.PF_L3_DATA_RD.PMM_HIT_LOCAL_PMM.ANY_SNOOP",
+        "BriefDescription": "Counts any other requests  OCR.OTHER.SUPPLIER_NONE.HIT_OTHER_CORE_NO_FWD",
         "Counter": "0,1,2,3",
         "CounterHTOff": "0,1,2,3",
         "EventCode": "0xB7, 0xBB",
-        "EventName": "OCR.PF_L3_DATA_RD.PMM_HIT_LOCAL_PMM.ANY_SNOOP",
+        "EventName": "OCR.OTHER.SUPPLIER_NONE.HIT_OTHER_CORE_NO_FWD",
         "MSRIndex": "0x1a6,0x1a7",
-        "MSRValue": "0x3F80400080",
+        "MSRValue": "0x0400028000",
         "Offcore": "1",
         "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
         "SampleAfterValue": "100003",
         "UMask": "0x1"
     },
     {
-        "BriefDescription": "Counts all demand code reads  OCR.DEMAND_CODE_RD.SUPPLIER_NONE.ANY_SNOOP",
+        "BriefDescription": "Counts any other requests  OCR.OTHER.SUPPLIER_NONE.NO_SNOOP_NEEDED",
         "Counter": "0,1,2,3",
         "CounterHTOff": "0,1,2,3",
         "EventCode": "0xB7, 0xBB",
-        "EventName": "OCR.DEMAND_CODE_RD.SUPPLIER_NONE.ANY_SNOOP",
+        "EventName": "OCR.OTHER.SUPPLIER_NONE.NO_SNOOP_NEEDED",
         "MSRIndex": "0x1a6,0x1a7",
-        "MSRValue": "0x3F80020004",
+        "MSRValue": "0x0100028000",
         "Offcore": "1",
         "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
         "SampleAfterValue": "100003",
         "UMask": "0x1"
     },
     {
-        "BriefDescription": "Counts L1 data cache hardware prefetch requests and software prefetch requests  OCR.PF_L1D_AND_SW.L3_HIT_S.ANY_SNOOP",
+        "BriefDescription": "Counts any other requests",
         "Counter": "0,1,2,3",
         "CounterHTOff": "0,1,2,3",
         "EventCode": "0xB7, 0xBB",
-        "EventName": "OCR.PF_L1D_AND_SW.L3_HIT_S.ANY_SNOOP",
+        "EventName": "OCR.OTHER.SUPPLIER_NONE.SNOOP_MISS",
         "MSRIndex": "0x1a6,0x1a7",
-        "MSRValue": "0x3F80100400",
+        "MSRValue": "0x0200028000",
         "Offcore": "1",
         "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
         "SampleAfterValue": "100003",
         "UMask": "0x1"
     },
     {
-        "BriefDescription": "Counts all demand data writes (RFOs)  OCR.DEMAND_RFO.L3_HIT_E.HIT_OTHER_CORE_FWD",
+        "BriefDescription": "Counts any other requests",
         "Counter": "0,1,2,3",
         "CounterHTOff": "0,1,2,3",
         "EventCode": "0xB7, 0xBB",
-        "EventName": "OCR.DEMAND_RFO.L3_HIT_E.HIT_OTHER_CORE_FWD",
+        "EventName": "OCR.OTHER.SUPPLIER_NONE.SNOOP_NONE",
         "MSRIndex": "0x1a6,0x1a7",
-        "MSRValue": "0x0800080002",
+        "MSRValue": "0x0080028000",
         "Offcore": "1",
         "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
         "SampleAfterValue": "100003",
         "UMask": "0x1"
     },
     {
-        "BriefDescription": "OCR.ALL_PF_RFO.L3_HIT_F.SNOOP_MISS",
+        "BriefDescription": "Counts L1 data cache hardware prefetch requests and software prefetch requests have any response type.",
         "Counter": "0,1,2,3",
         "CounterHTOff": "0,1,2,3",
         "EventCode": "0xB7, 0xBB",
-        "EventName": "OCR.ALL_PF_RFO.L3_HIT_F.SNOOP_MISS",
+        "EventName": "OCR.PF_L1D_AND_SW.ANY_RESPONSE",
         "MSRIndex": "0x1a6,0x1a7",
-        "MSRValue": "0x0200200120",
+        "MSRValue": "0x0000010400",
         "Offcore": "1",
         "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
         "SampleAfterValue": "100003",
         "UMask": "0x1"
     },
     {
-        "BriefDescription": "Counts all demand code reads OCR.DEMAND_CODE_RD.L3_HIT.HIT_OTHER_CORE_NO_FWD OCR.DEMAND_CODE_RD.L3_HIT.HIT_OTHER_CORE_NO_FWD",
+        "BriefDescription": "Counts L1 data cache hardware prefetch requests and software prefetch requests OCR.PF_L1D_AND_SW.L3_HIT.ANY_SNOOP OCR.PF_L1D_AND_SW.L3_HIT.ANY_SNOOP",
         "Counter": "0,1,2,3",
         "CounterHTOff": "0,1,2,3",
         "EventCode": "0xB7, 0xBB",
-        "EventName": "OCR.DEMAND_CODE_RD.L3_HIT.HIT_OTHER_CORE_NO_FWD",
+        "EventName": "OCR.PF_L1D_AND_SW.L3_HIT.ANY_SNOOP",
         "MSRIndex": "0x1a6,0x1a7",
-        "MSRValue": "0x04003C0004",
+        "MSRValue": "0x3F803C0400",
         "Offcore": "1",
         "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
         "SampleAfterValue": "100003",
         "UMask": "0x1"
     },
     {
-        "BriefDescription": "OCR.ALL_PF_DATA_RD.L3_HIT_F.HIT_OTHER_CORE_NO_FWD  OCR.ALL_PF_DATA_RD.L3_HIT_F.HIT_OTHER_CORE_NO_FWD",
+        "BriefDescription": "Counts L1 data cache hardware prefetch requests and software prefetch requests OCR.PF_L1D_AND_SW.L3_HIT.HITM_OTHER_CORE OCR.PF_L1D_AND_SW.L3_HIT.HITM_OTHER_CORE",
         "Counter": "0,1,2,3",
         "CounterHTOff": "0,1,2,3",
         "EventCode": "0xB7, 0xBB",
-        "EventName": "OCR.ALL_PF_DATA_RD.L3_HIT_F.HIT_OTHER_CORE_NO_FWD",
+        "EventName": "OCR.PF_L1D_AND_SW.L3_HIT.HITM_OTHER_CORE",
         "MSRIndex": "0x1a6,0x1a7",
-        "MSRValue": "0x0400200490",
+        "MSRValue": "0x10003C0400",
         "Offcore": "1",
         "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
         "SampleAfterValue": "100003",
         "UMask": "0x1"
     },
     {
-        "BriefDescription": "Counts any other requests  OCR.OTHER.L3_HIT_F.ANY_SNOOP",
+        "BriefDescription": "Counts L1 data cache hardware prefetch requests and software prefetch requests OCR.PF_L1D_AND_SW.L3_HIT.HIT_OTHER_CORE_FWD OCR.PF_L1D_AND_SW.L3_HIT.HIT_OTHER_CORE_FWD",
         "Counter": "0,1,2,3",
         "CounterHTOff": "0,1,2,3",
         "EventCode": "0xB7, 0xBB",
-        "EventName": "OCR.OTHER.L3_HIT_F.ANY_SNOOP",
+        "EventName": "OCR.PF_L1D_AND_SW.L3_HIT.HIT_OTHER_CORE_FWD",
         "MSRIndex": "0x1a6,0x1a7",
-        "MSRValue": "0x3F80208000",
+        "MSRValue": "0x08003C0400",
         "Offcore": "1",
         "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
         "SampleAfterValue": "100003",
         "UMask": "0x1"
     },
     {
-        "BriefDescription": "Counts L1 data cache hardware prefetch requests and software prefetch requests",
+        "BriefDescription": "Counts L1 data cache hardware prefetch requests and software prefetch requests OCR.PF_L1D_AND_SW.L3_HIT.HIT_OTHER_CORE_NO_FWD OCR.PF_L1D_AND_SW.L3_HIT.HIT_OTHER_CORE_NO_FWD",
         "Counter": "0,1,2,3",
         "CounterHTOff": "0,1,2,3",
         "EventCode": "0xB7, 0xBB",
-        "EventName": "OCR.PF_L1D_AND_SW.L3_HIT_S.SNOOP_NONE",
+        "EventName": "OCR.PF_L1D_AND_SW.L3_HIT.HIT_OTHER_CORE_NO_FWD",
         "MSRIndex": "0x1a6,0x1a7",
-        "MSRValue": "0x0080100400",
+        "MSRValue": "0x04003C0400",
         "Offcore": "1",
         "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
         "SampleAfterValue": "100003",
         "UMask": "0x1"
     },
     {
-        "BriefDescription": "OCR.ALL_READS.SUPPLIER_NONE.NO_SNOOP_NEEDED  OCR.ALL_READS.SUPPLIER_NONE.NO_SNOOP_NEEDED",
+        "BriefDescription": "Counts L1 data cache hardware prefetch requests and software prefetch requests OCR.PF_L1D_AND_SW.L3_HIT.NO_SNOOP_NEEDED OCR.PF_L1D_AND_SW.L3_HIT.NO_SNOOP_NEEDED",
         "Counter": "0,1,2,3",
         "CounterHTOff": "0,1,2,3",
         "EventCode": "0xB7, 0xBB",
-        "EventName": "OCR.ALL_READS.SUPPLIER_NONE.NO_SNOOP_NEEDED",
+        "EventName": "OCR.PF_L1D_AND_SW.L3_HIT.NO_SNOOP_NEEDED",
         "MSRIndex": "0x1a6,0x1a7",
-        "MSRValue": "0x01000207F7",
+        "MSRValue": "0x01003C0400",
         "Offcore": "1",
         "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
         "SampleAfterValue": "100003",
         "UMask": "0x1"
     },
     {
-        "BriefDescription": "OCR.ALL_PF_DATA_RD.L3_HIT_F.SNOOP_NONE",
+        "BriefDescription": "Counts L1 data cache hardware prefetch requests and software prefetch requests",
         "Counter": "0,1,2,3",
         "CounterHTOff": "0,1,2,3",
         "EventCode": "0xB7, 0xBB",
-        "EventName": "OCR.ALL_PF_DATA_RD.L3_HIT_F.SNOOP_NONE",
+        "EventName": "OCR.PF_L1D_AND_SW.L3_HIT.SNOOP_HIT_WITH_FWD",
         "MSRIndex": "0x1a6,0x1a7",
-        "MSRValue": "0x0080200490",
+        "MSRValue": "0x08007C0400",
         "Offcore": "1",
         "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
         "SampleAfterValue": "100003",
         "UMask": "0x1"
     },
     {
-        "BriefDescription": "Counts all demand data writes (RFOs)  OCR.DEMAND_RFO.L3_HIT_S.HIT_OTHER_CORE_FWD",
+        "BriefDescription": "Counts L1 data cache hardware prefetch requests and software prefetch requests OCR.PF_L1D_AND_SW.L3_HIT.SNOOP_MISS",
         "Counter": "0,1,2,3",
         "CounterHTOff": "0,1,2,3",
         "EventCode": "0xB7, 0xBB",
-        "EventName": "OCR.DEMAND_RFO.L3_HIT_S.HIT_OTHER_CORE_FWD",
+        "EventName": "OCR.PF_L1D_AND_SW.L3_HIT.SNOOP_MISS",
         "MSRIndex": "0x1a6,0x1a7",
-        "MSRValue": "0x0800100002",
+        "MSRValue": "0x02003C0400",
         "Offcore": "1",
         "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
         "SampleAfterValue": "100003",
         "UMask": "0x1"
     },
     {
-        "BriefDescription": "OCR.ALL_READS.L3_HIT_F.SNOOP_NONE",
+        "BriefDescription": "Counts L1 data cache hardware prefetch requests and software prefetch requests OCR.PF_L1D_AND_SW.L3_HIT.SNOOP_NONE",
         "Counter": "0,1,2,3",
         "CounterHTOff": "0,1,2,3",
         "EventCode": "0xB7, 0xBB",
-        "EventName": "OCR.ALL_READS.L3_HIT_F.SNOOP_NONE",
+        "EventName": "OCR.PF_L1D_AND_SW.L3_HIT.SNOOP_NONE",
         "MSRIndex": "0x1a6,0x1a7",
-        "MSRValue": "0x00802007F7",
+        "MSRValue": "0x00803C0400",
         "Offcore": "1",
         "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
         "SampleAfterValue": "100003",
         "UMask": "0x1"
     },
     {
-        "BriefDescription": "Counts L1 data cache hardware prefetch requests and software prefetch requests OCR.PF_L1D_AND_SW.L3_HIT.ANY_SNOOP OCR.PF_L1D_AND_SW.L3_HIT.ANY_SNOOP",
+        "BriefDescription": "Counts L1 data cache hardware prefetch requests and software prefetch requests  OCR.PF_L1D_AND_SW.L3_HIT_E.ANY_SNOOP",
         "Counter": "0,1,2,3",
         "CounterHTOff": "0,1,2,3",
         "EventCode": "0xB7, 0xBB",
-        "EventName": "OCR.PF_L1D_AND_SW.L3_HIT.ANY_SNOOP",
+        "EventName": "OCR.PF_L1D_AND_SW.L3_HIT_E.ANY_SNOOP",
         "MSRIndex": "0x1a6,0x1a7",
-        "MSRValue": "0x3F803C0400",
+        "MSRValue": "0x3F80080400",
         "Offcore": "1",
         "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
         "SampleAfterValue": "100003",
         "UMask": "0x1"
     },
     {
-        "BriefDescription": "Counts all prefetch (that bring data to LLC only) data reads OCR.PF_L3_DATA_RD.L3_HIT.HITM_OTHER_CORE OCR.PF_L3_DATA_RD.L3_HIT.HITM_OTHER_CORE",
+        "BriefDescription": "Counts L1 data cache hardware prefetch requests and software prefetch requests  OCR.PF_L1D_AND_SW.L3_HIT_E.HITM_OTHER_CORE",
         "Counter": "0,1,2,3",
         "CounterHTOff": "0,1,2,3",
         "EventCode": "0xB7, 0xBB",
-        "EventName": "OCR.PF_L3_DATA_RD.L3_HIT.HITM_OTHER_CORE",
+        "EventName": "OCR.PF_L1D_AND_SW.L3_HIT_E.HITM_OTHER_CORE",
         "MSRIndex": "0x1a6,0x1a7",
-        "MSRValue": "0x10003C0080",
+        "MSRValue": "0x1000080400",
         "Offcore": "1",
         "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
         "SampleAfterValue": "100003",
         "UMask": "0x1"
     },
     {
-        "BriefDescription": "Counts all prefetch (that bring data to LLC only) RFOs  OCR.PF_L3_RFO.L3_HIT_S.HIT_OTHER_CORE_NO_FWD",
+        "BriefDescription": "Counts L1 data cache hardware prefetch requests and software prefetch requests  OCR.PF_L1D_AND_SW.L3_HIT_E.HIT_OTHER_CORE_FWD",
         "Counter": "0,1,2,3",
         "CounterHTOff": "0,1,2,3",
         "EventCode": "0xB7, 0xBB",
-        "EventName": "OCR.PF_L3_RFO.L3_HIT_S.HIT_OTHER_CORE_NO_FWD",
+        "EventName": "OCR.PF_L1D_AND_SW.L3_HIT_E.HIT_OTHER_CORE_FWD",
         "MSRIndex": "0x1a6,0x1a7",
-        "MSRValue": "0x0400100100",
+        "MSRValue": "0x0800080400",
         "Offcore": "1",
         "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
         "SampleAfterValue": "100003",
         "UMask": "0x1"
     },
     {
-        "BriefDescription": "Counts prefetch (that bring data to L2) data reads  OCR.PF_L2_DATA_RD.L3_HIT_F.NO_SNOOP_NEEDED",
+        "BriefDescription": "Counts L1 data cache hardware prefetch requests and software prefetch requests  OCR.PF_L1D_AND_SW.L3_HIT_E.HIT_OTHER_CORE_NO_FWD",
         "Counter": "0,1,2,3",
         "CounterHTOff": "0,1,2,3",
         "EventCode": "0xB7, 0xBB",
-        "EventName": "OCR.PF_L2_DATA_RD.L3_HIT_F.NO_SNOOP_NEEDED",
+        "EventName": "OCR.PF_L1D_AND_SW.L3_HIT_E.HIT_OTHER_CORE_NO_FWD",
         "MSRIndex": "0x1a6,0x1a7",
-        "MSRValue": "0x0100200010",
+        "MSRValue": "0x0400080400",
         "Offcore": "1",
         "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
         "SampleAfterValue": "100003",
         "UMask": "0x1"
     },
     {
-        "BriefDescription": "OCR.ALL_PF_RFO.L3_HIT_M.HITM_OTHER_CORE  OCR.ALL_PF_RFO.L3_HIT_M.HITM_OTHER_CORE",
+        "BriefDescription": "Counts L1 data cache hardware prefetch requests and software prefetch requests  OCR.PF_L1D_AND_SW.L3_HIT_E.NO_SNOOP_NEEDED",
         "Counter": "0,1,2,3",
         "CounterHTOff": "0,1,2,3",
         "EventCode": "0xB7, 0xBB",
-        "EventName": "OCR.ALL_PF_RFO.L3_HIT_M.HITM_OTHER_CORE",
+        "EventName": "OCR.PF_L1D_AND_SW.L3_HIT_E.NO_SNOOP_NEEDED",
         "MSRIndex": "0x1a6,0x1a7",
-        "MSRValue": "0x1000040120",
+        "MSRValue": "0x0100080400",
         "Offcore": "1",
         "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
         "SampleAfterValue": "100003",
         "UMask": "0x1"
     },
     {
-        "BriefDescription": "Counts all demand code reads  OCR.DEMAND_CODE_RD.L3_HIT_F.HITM_OTHER_CORE",
+        "BriefDescription": "Counts L1 data cache hardware prefetch requests and software prefetch requests",
         "Counter": "0,1,2,3",
         "CounterHTOff": "0,1,2,3",
         "EventCode": "0xB7, 0xBB",
-        "EventName": "OCR.DEMAND_CODE_RD.L3_HIT_F.HITM_OTHER_CORE",
+        "EventName": "OCR.PF_L1D_AND_SW.L3_HIT_E.SNOOP_MISS",
         "MSRIndex": "0x1a6,0x1a7",
-        "MSRValue": "0x1000200004",
+        "MSRValue": "0x0200080400",
         "Offcore": "1",
         "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
         "SampleAfterValue": "100003",
         "UMask": "0x1"
     },
     {
-        "BriefDescription": "Counts all demand data writes (RFOs) OCR.DEMAND_RFO.L3_HIT.HIT_OTHER_CORE_FWD OCR.DEMAND_RFO.L3_HIT.HIT_OTHER_CORE_FWD",
+        "BriefDescription": "Counts L1 data cache hardware prefetch requests and software prefetch requests",
         "Counter": "0,1,2,3",
         "CounterHTOff": "0,1,2,3",
         "EventCode": "0xB7, 0xBB",
-        "EventName": "OCR.DEMAND_RFO.L3_HIT.HIT_OTHER_CORE_FWD",
+        "EventName": "OCR.PF_L1D_AND_SW.L3_HIT_E.SNOOP_NONE",
         "MSRIndex": "0x1a6,0x1a7",
-        "MSRValue": "0x08003C0002",
+        "MSRValue": "0x0080080400",
         "Offcore": "1",
         "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
         "SampleAfterValue": "100003",
         "UMask": "0x1"
     },
     {
-        "BriefDescription": "Counts all prefetch (that bring data to LLC only) data reads  OCR.PF_L3_DATA_RD.L3_HIT_F.HIT_OTHER_CORE_FWD",
+        "BriefDescription": "Counts L1 data cache hardware prefetch requests and software prefetch requests  OCR.PF_L1D_AND_SW.L3_HIT_F.ANY_SNOOP",
         "Counter": "0,1,2,3",
         "CounterHTOff": "0,1,2,3",
         "EventCode": "0xB7, 0xBB",
-        "EventName": "OCR.PF_L3_DATA_RD.L3_HIT_F.HIT_OTHER_CORE_FWD",
+        "EventName": "OCR.PF_L1D_AND_SW.L3_HIT_F.ANY_SNOOP",
         "MSRIndex": "0x1a6,0x1a7",
-        "MSRValue": "0x0800200080",
+        "MSRValue": "0x3F80200400",
         "Offcore": "1",
         "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
         "SampleAfterValue": "100003",
         "UMask": "0x1"
     },
     {
-        "BriefDescription": "Counts all prefetch (that bring data to LLC only) data reads  OCR.PF_L3_DATA_RD.L3_HIT_M.HIT_OTHER_CORE_FWD",
+        "BriefDescription": "Counts L1 data cache hardware prefetch requests and software prefetch requests  OCR.PF_L1D_AND_SW.L3_HIT_F.HITM_OTHER_CORE",
         "Counter": "0,1,2,3",
         "CounterHTOff": "0,1,2,3",
         "EventCode": "0xB7, 0xBB",
-        "EventName": "OCR.PF_L3_DATA_RD.L3_HIT_M.HIT_OTHER_CORE_FWD",
+        "EventName": "OCR.PF_L1D_AND_SW.L3_HIT_F.HITM_OTHER_CORE",
         "MSRIndex": "0x1a6,0x1a7",
-        "MSRValue": "0x0800040080",
+        "MSRValue": "0x1000200400",
         "Offcore": "1",
         "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
         "SampleAfterValue": "100003",
         "UMask": "0x1"
     },
     {
-        "BriefDescription": "OCR.ALL_PF_DATA_RD.PMM_HIT_LOCAL_PMM.ANY_SNOOP OCR.ALL_PF_DATA_RD.PMM_HIT_LOCAL_PMM.ANY_SNOOP",
+        "BriefDescription": "Counts L1 data cache hardware prefetch requests and software prefetch requests  OCR.PF_L1D_AND_SW.L3_HIT_F.HIT_OTHER_CORE_FWD",
         "Counter": "0,1,2,3",
         "CounterHTOff": "0,1,2,3",
         "EventCode": "0xB7, 0xBB",
-        "EventName": "OCR.ALL_PF_DATA_RD.PMM_HIT_LOCAL_PMM.ANY_SNOOP",
+        "EventName": "OCR.PF_L1D_AND_SW.L3_HIT_F.HIT_OTHER_CORE_FWD",
         "MSRIndex": "0x1a6,0x1a7",
-        "MSRValue": "0x3F80400490",
+        "MSRValue": "0x0800200400",
         "Offcore": "1",
         "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
         "SampleAfterValue": "100003",
         "UMask": "0x1"
     },
     {
-        "BriefDescription": "Counts L1 data cache hardware prefetch requests and software prefetch requests  OCR.PF_L1D_AND_SW.SUPPLIER_NONE.HIT_OTHER_CORE_FWD",
+        "BriefDescription": "Counts L1 data cache hardware prefetch requests and software prefetch requests  OCR.PF_L1D_AND_SW.L3_HIT_F.HIT_OTHER_CORE_NO_FWD",
         "Counter": "0,1,2,3",
         "CounterHTOff": "0,1,2,3",
         "EventCode": "0xB7, 0xBB",
-        "EventName": "OCR.PF_L1D_AND_SW.SUPPLIER_NONE.HIT_OTHER_CORE_FWD",
+        "EventName": "OCR.PF_L1D_AND_SW.L3_HIT_F.HIT_OTHER_CORE_NO_FWD",
         "MSRIndex": "0x1a6,0x1a7",
-        "MSRValue": "0x0800020400",
+        "MSRValue": "0x0400200400",
         "Offcore": "1",
         "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
         "SampleAfterValue": "100003",
         "UMask": "0x1"
     },
     {
-        "BriefDescription": "OCR.ALL_DATA_RD.L3_HIT.SNOOP_NONE OCR.ALL_DATA_RD.L3_HIT.SNOOP_NONE",
+        "BriefDescription": "Counts L1 data cache hardware prefetch requests and software prefetch requests  OCR.PF_L1D_AND_SW.L3_HIT_F.NO_SNOOP_NEEDED",
         "Counter": "0,1,2,3",
         "CounterHTOff": "0,1,2,3",
         "EventCode": "0xB7, 0xBB",
-        "EventName": "OCR.ALL_DATA_RD.L3_HIT.SNOOP_NONE",
+        "EventName": "OCR.PF_L1D_AND_SW.L3_HIT_F.NO_SNOOP_NEEDED",
         "MSRIndex": "0x1a6,0x1a7",
-        "MSRValue": "0x00803C0491",
+        "MSRValue": "0x0100200400",
         "Offcore": "1",
         "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
         "SampleAfterValue": "100003",
         "UMask": "0x1"
     },
     {
-        "BriefDescription": "Counts all demand data writes (RFOs)  OCR.DEMAND_RFO.L3_HIT_S.ANY_SNOOP",
+        "BriefDescription": "Counts L1 data cache hardware prefetch requests and software prefetch requests",
         "Counter": "0,1,2,3",
         "CounterHTOff": "0,1,2,3",
         "EventCode": "0xB7, 0xBB",
-        "EventName": "OCR.DEMAND_RFO.L3_HIT_S.ANY_SNOOP",
+        "EventName": "OCR.PF_L1D_AND_SW.L3_HIT_F.SNOOP_MISS",
         "MSRIndex": "0x1a6,0x1a7",
-        "MSRValue": "0x3F80100002",
+        "MSRValue": "0x0200200400",
         "Offcore": "1",
         "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
         "SampleAfterValue": "100003",
         "UMask": "0x1"
     },
     {
-        "BriefDescription": "OCR.ALL_RFO.L3_HIT_S.SNOOP_MISS",
+        "BriefDescription": "Counts L1 data cache hardware prefetch requests and software prefetch requests",
         "Counter": "0,1,2,3",
         "CounterHTOff": "0,1,2,3",
         "EventCode": "0xB7, 0xBB",
-        "EventName": "OCR.ALL_RFO.L3_HIT_S.SNOOP_MISS",
+        "EventName": "OCR.PF_L1D_AND_SW.L3_HIT_F.SNOOP_NONE",
         "MSRIndex": "0x1a6,0x1a7",
-        "MSRValue": "0x0200100122",
+        "MSRValue": "0x0080200400",
         "Offcore": "1",
         "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
         "SampleAfterValue": "100003",
         "UMask": "0x1"
     },
     {
-        "BriefDescription": "OCR.ALL_PF_DATA_RD.L3_HIT_E.ANY_SNOOP  OCR.ALL_PF_DATA_RD.L3_HIT_E.ANY_SNOOP",
+        "BriefDescription": "Counts L1 data cache hardware prefetch requests and software prefetch requests  OCR.PF_L1D_AND_SW.L3_HIT_M.ANY_SNOOP",
         "Counter": "0,1,2,3",
         "CounterHTOff": "0,1,2,3",
         "EventCode": "0xB7, 0xBB",
-        "EventName": "OCR.ALL_PF_DATA_RD.L3_HIT_E.ANY_SNOOP",
+        "EventName": "OCR.PF_L1D_AND_SW.L3_HIT_M.ANY_SNOOP",
         "MSRIndex": "0x1a6,0x1a7",
-        "MSRValue": "0x3F80080490",
+        "MSRValue": "0x3F80040400",
         "Offcore": "1",
         "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
         "SampleAfterValue": "100003",
         "UMask": "0x1"
     },
     {
-        "BriefDescription": "Counts all prefetch (that bring data to LLC only) data reads",
+        "BriefDescription": "Counts L1 data cache hardware prefetch requests and software prefetch requests  OCR.PF_L1D_AND_SW.L3_HIT_M.HITM_OTHER_CORE",
         "Counter": "0,1,2,3",
         "CounterHTOff": "0,1,2,3",
         "EventCode": "0xB7, 0xBB",
-        "EventName": "OCR.PF_L3_DATA_RD.L3_HIT_E.SNOOP_MISS",
+        "EventName": "OCR.PF_L1D_AND_SW.L3_HIT_M.HITM_OTHER_CORE",
         "MSRIndex": "0x1a6,0x1a7",
-        "MSRValue": "0x0200080080",
+        "MSRValue": "0x1000040400",
         "Offcore": "1",
         "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
         "SampleAfterValue": "100003",
         "UMask": "0x1"
     },
     {
-        "BriefDescription": "Counts all prefetch (that bring data to L2) RFOs  OCR.PF_L2_RFO.L3_HIT_M.HIT_OTHER_CORE_NO_FWD",
+        "BriefDescription": "Counts L1 data cache hardware prefetch requests and software prefetch requests  OCR.PF_L1D_AND_SW.L3_HIT_M.HIT_OTHER_CORE_FWD",
         "Counter": "0,1,2,3",
         "CounterHTOff": "0,1,2,3",
         "EventCode": "0xB7, 0xBB",
-        "EventName": "OCR.PF_L2_RFO.L3_HIT_M.HIT_OTHER_CORE_NO_FWD",
+        "EventName": "OCR.PF_L1D_AND_SW.L3_HIT_M.HIT_OTHER_CORE_FWD",
         "MSRIndex": "0x1a6,0x1a7",
-        "MSRValue": "0x0400040020",
+        "MSRValue": "0x0800040400",
         "Offcore": "1",
         "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
         "SampleAfterValue": "100003",
         "UMask": "0x1"
     },
     {
-        "BriefDescription": "Counts all prefetch (that bring data to LLC only) RFOs OCR.PF_L3_RFO.L3_HIT.ANY_SNOOP OCR.PF_L3_RFO.L3_HIT.ANY_SNOOP",
+        "BriefDescription": "Counts L1 data cache hardware prefetch requests and software prefetch requests  OCR.PF_L1D_AND_SW.L3_HIT_M.HIT_OTHER_CORE_NO_FWD",
         "Counter": "0,1,2,3",
         "CounterHTOff": "0,1,2,3",
         "EventCode": "0xB7, 0xBB",
-        "EventName": "OCR.PF_L3_RFO.L3_HIT.ANY_SNOOP",
+        "EventName": "OCR.PF_L1D_AND_SW.L3_HIT_M.HIT_OTHER_CORE_NO_FWD",
         "MSRIndex": "0x1a6,0x1a7",
-        "MSRValue": "0x3F803C0100",
+        "MSRValue": "0x0400040400",
         "Offcore": "1",
         "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
         "SampleAfterValue": "100003",
         "UMask": "0x1"
     },
     {
-        "BriefDescription": "OCR.ALL_DATA_RD.L3_HIT_M.SNOOP_MISS",
+        "BriefDescription": "Counts L1 data cache hardware prefetch requests and software prefetch requests  OCR.PF_L1D_AND_SW.L3_HIT_M.NO_SNOOP_NEEDED",
         "Counter": "0,1,2,3",
         "CounterHTOff": "0,1,2,3",
         "EventCode": "0xB7, 0xBB",
-        "EventName": "OCR.ALL_DATA_RD.L3_HIT_M.SNOOP_MISS",
+        "EventName": "OCR.PF_L1D_AND_SW.L3_HIT_M.NO_SNOOP_NEEDED",
         "MSRIndex": "0x1a6,0x1a7",
-        "MSRValue": "0x0200040491",
+        "MSRValue": "0x0100040400",
         "Offcore": "1",
         "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
         "SampleAfterValue": "100003",
         "UMask": "0x1"
     },
     {
-        "BriefDescription": "Counts all prefetch (that bring data to LLC only) data reads",
+        "BriefDescription": "Counts L1 data cache hardware prefetch requests and software prefetch requests",
         "Counter": "0,1,2,3",
         "CounterHTOff": "0,1,2,3",
         "EventCode": "0xB7, 0xBB",
-        "EventName": "OCR.PF_L3_DATA_RD.L3_HIT_M.SNOOP_NONE",
+        "EventName": "OCR.PF_L1D_AND_SW.L3_HIT_M.SNOOP_MISS",
         "MSRIndex": "0x1a6,0x1a7",
-        "MSRValue": "0x0080040080",
+        "MSRValue": "0x0200040400",
         "Offcore": "1",
         "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
         "SampleAfterValue": "100003",
         "UMask": "0x1"
     },
     {
-        "BriefDescription": "Counts all prefetch (that bring data to LLC only) RFOs OCR.PF_L3_RFO.L3_HIT.NO_SNOOP_NEEDED OCR.PF_L3_RFO.L3_HIT.NO_SNOOP_NEEDED",
+        "BriefDescription": "Counts L1 data cache hardware prefetch requests and software prefetch requests",
         "Counter": "0,1,2,3",
         "CounterHTOff": "0,1,2,3",
         "EventCode": "0xB7, 0xBB",
-        "EventName": "OCR.PF_L3_RFO.L3_HIT.NO_SNOOP_NEEDED",
+        "EventName": "OCR.PF_L1D_AND_SW.L3_HIT_M.SNOOP_NONE",
         "MSRIndex": "0x1a6,0x1a7",
-        "MSRValue": "0x01003C0100",
+        "MSRValue": "0x0080040400",
         "Offcore": "1",
         "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
         "SampleAfterValue": "100003",
         "UMask": "0x1"
     },
     {
-        "BriefDescription": "Counts all prefetch (that bring data to LLC only) data reads OCR.PF_L3_DATA_RD.PMM_HIT_LOCAL_PMM.SNOOP_NONE",
+        "BriefDescription": "Counts L1 data cache hardware prefetch requests and software prefetch requests  OCR.PF_L1D_AND_SW.L3_HIT_S.ANY_SNOOP",
         "Counter": "0,1,2,3",
         "CounterHTOff": "0,1,2,3",
         "EventCode": "0xB7, 0xBB",
-        "EventName": "OCR.PF_L3_DATA_RD.PMM_HIT_LOCAL_PMM.SNOOP_NONE",
+        "EventName": "OCR.PF_L1D_AND_SW.L3_HIT_S.ANY_SNOOP",
         "MSRIndex": "0x1a6,0x1a7",
-        "MSRValue": "0x0080400080",
+        "MSRValue": "0x3F80100400",
         "Offcore": "1",
         "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
         "SampleAfterValue": "100003",
         "UMask": "0x1"
     },
     {
-        "BriefDescription": "OCR.ALL_PF_RFO.L3_HIT_F.HIT_OTHER_CORE_FWD  OCR.ALL_PF_RFO.L3_HIT_F.HIT_OTHER_CORE_FWD",
+        "BriefDescription": "Counts L1 data cache hardware prefetch requests and software prefetch requests  OCR.PF_L1D_AND_SW.L3_HIT_S.HITM_OTHER_CORE",
         "Counter": "0,1,2,3",
         "CounterHTOff": "0,1,2,3",
         "EventCode": "0xB7, 0xBB",
-        "EventName": "OCR.ALL_PF_RFO.L3_HIT_F.HIT_OTHER_CORE_FWD",
+        "EventName": "OCR.PF_L1D_AND_SW.L3_HIT_S.HITM_OTHER_CORE",
         "MSRIndex": "0x1a6,0x1a7",
-        "MSRValue": "0x0800200120",
+        "MSRValue": "0x1000100400",
         "Offcore": "1",
         "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
         "SampleAfterValue": "100003",
         "UMask": "0x1"
     },
     {
-        "BriefDescription": "OCR.ALL_PF_RFO.L3_HIT_E.SNOOP_NONE",
+        "BriefDescription": "Counts L1 data cache hardware prefetch requests and software prefetch requests  OCR.PF_L1D_AND_SW.L3_HIT_S.HIT_OTHER_CORE_FWD",
         "Counter": "0,1,2,3",
         "CounterHTOff": "0,1,2,3",
         "EventCode": "0xB7, 0xBB",
-        "EventName": "OCR.ALL_PF_RFO.L3_HIT_E.SNOOP_NONE",
+        "EventName": "OCR.PF_L1D_AND_SW.L3_HIT_S.HIT_OTHER_CORE_FWD",
         "MSRIndex": "0x1a6,0x1a7",
-        "MSRValue": "0x0080080120",
+        "MSRValue": "0x0800100400",
         "Offcore": "1",
         "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
         "SampleAfterValue": "100003",
         "UMask": "0x1"
     },
     {
-        "BriefDescription": "Counts all demand data writes (RFOs)  OCR.DEMAND_RFO.L3_HIT_E.HITM_OTHER_CORE",
+        "BriefDescription": "Counts L1 data cache hardware prefetch requests and software prefetch requests  OCR.PF_L1D_AND_SW.L3_HIT_S.HIT_OTHER_CORE_NO_FWD",
         "Counter": "0,1,2,3",
         "CounterHTOff": "0,1,2,3",
         "EventCode": "0xB7, 0xBB",
-        "EventName": "OCR.DEMAND_RFO.L3_HIT_E.HITM_OTHER_CORE",
+        "EventName": "OCR.PF_L1D_AND_SW.L3_HIT_S.HIT_OTHER_CORE_NO_FWD",
         "MSRIndex": "0x1a6,0x1a7",
-        "MSRValue": "0x1000080002",
+        "MSRValue": "0x0400100400",
         "Offcore": "1",
         "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
         "SampleAfterValue": "100003",
         "UMask": "0x1"
     },
     {
-        "BriefDescription": "OCR.ALL_PF_DATA_RD.L3_HIT_M.SNOOP_MISS",
+        "BriefDescription": "Counts L1 data cache hardware prefetch requests and software prefetch requests  OCR.PF_L1D_AND_SW.L3_HIT_S.NO_SNOOP_NEEDED",
         "Counter": "0,1,2,3",
         "CounterHTOff": "0,1,2,3",
         "EventCode": "0xB7, 0xBB",
-        "EventName": "OCR.ALL_PF_DATA_RD.L3_HIT_M.SNOOP_MISS",
+        "EventName": "OCR.PF_L1D_AND_SW.L3_HIT_S.NO_SNOOP_NEEDED",
         "MSRIndex": "0x1a6,0x1a7",
-        "MSRValue": "0x0200040490",
+        "MSRValue": "0x0100100400",
         "Offcore": "1",
         "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
         "SampleAfterValue": "100003",
         "UMask": "0x1"
     },
     {
-        "BriefDescription": "Counts all demand data writes (RFOs)  OCR.DEMAND_RFO.L3_HIT_E.NO_SNOOP_NEEDED",
+        "BriefDescription": "Counts L1 data cache hardware prefetch requests and software prefetch requests",
         "Counter": "0,1,2,3",
         "CounterHTOff": "0,1,2,3",
         "EventCode": "0xB7, 0xBB",
-        "EventName": "OCR.DEMAND_RFO.L3_HIT_E.NO_SNOOP_NEEDED",
+        "EventName": "OCR.PF_L1D_AND_SW.L3_HIT_S.SNOOP_MISS",
         "MSRIndex": "0x1a6,0x1a7",
-        "MSRValue": "0x0100080002",
+        "MSRValue": "0x0200100400",
         "Offcore": "1",
         "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
         "SampleAfterValue": "100003",
         "UMask": "0x1"
     },
     {
-        "BriefDescription": "Counts all prefetch (that bring data to L2) RFOs  OCR.PF_L2_RFO.SUPPLIER_NONE.HIT_OTHER_CORE_FWD",
+        "BriefDescription": "Counts L1 data cache hardware prefetch requests and software prefetch requests",
         "Counter": "0,1,2,3",
         "CounterHTOff": "0,1,2,3",
         "EventCode": "0xB7, 0xBB",
-        "EventName": "OCR.PF_L2_RFO.SUPPLIER_NONE.HIT_OTHER_CORE_FWD",
+        "EventName": "OCR.PF_L1D_AND_SW.L3_HIT_S.SNOOP_NONE",
         "MSRIndex": "0x1a6,0x1a7",
-        "MSRValue": "0x0800020020",
+        "MSRValue": "0x0080100400",
         "Offcore": "1",
         "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
         "SampleAfterValue": "100003",
         "UMask": "0x1"
     },
     {
-        "BriefDescription": "Counts any other requests",
+        "BriefDescription": "Counts L1 data cache hardware prefetch requests and software prefetch requests OCR.PF_L1D_AND_SW.PMM_HIT_LOCAL_PMM.ANY_SNOOP",
         "Counter": "0,1,2,3",
         "CounterHTOff": "0,1,2,3",
         "EventCode": "0xB7, 0xBB",
-        "EventName": "OCR.OTHER.L3_HIT_M.SNOOP_NONE",
+        "EventName": "OCR.PF_L1D_AND_SW.PMM_HIT_LOCAL_PMM.ANY_SNOOP",
         "MSRIndex": "0x1a6,0x1a7",
-        "MSRValue": "0x0080048000",
+        "MSRValue": "0x3F80400400",
         "Offcore": "1",
         "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
         "SampleAfterValue": "100003",
         "UMask": "0x1"
     },
     {
-        "BriefDescription": "Counts all demand data writes (RFOs)  OCR.DEMAND_RFO.L3_HIT_E.HIT_OTHER_CORE_NO_FWD",
+        "BriefDescription": "Counts L1 data cache hardware prefetch requests and software prefetch requests OCR.PF_L1D_AND_SW.PMM_HIT_LOCAL_PMM.SNOOP_NONE",
         "Counter": "0,1,2,3",
         "CounterHTOff": "0,1,2,3",
         "EventCode": "0xB7, 0xBB",
-        "EventName": "OCR.DEMAND_RFO.L3_HIT_E.HIT_OTHER_CORE_NO_FWD",
+        "EventName": "OCR.PF_L1D_AND_SW.PMM_HIT_LOCAL_PMM.SNOOP_NONE",
         "MSRIndex": "0x1a6,0x1a7",
-        "MSRValue": "0x0400080002",
+        "MSRValue": "0x0080400400",
         "Offcore": "1",
         "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
         "SampleAfterValue": "100003",
         "UMask": "0x1"
     },
     {
-        "BriefDescription": "Counts demand data reads",
+        "BriefDescription": "Counts L1 data cache hardware prefetch requests and software prefetch requests OCR.PF_L1D_AND_SW.PMM_HIT_LOCAL_PMM.SNOOP_NOT_NEEDED",
         "Counter": "0,1,2,3",
         "CounterHTOff": "0,1,2,3",
         "EventCode": "0xB7, 0xBB",
-        "EventName": "OCR.DEMAND_DATA_RD.L3_HIT_E.SNOOP_MISS",
+        "EventName": "OCR.PF_L1D_AND_SW.PMM_HIT_LOCAL_PMM.SNOOP_NOT_NEEDED",
         "MSRIndex": "0x1a6,0x1a7",
-        "MSRValue": "0x0200080001",
+        "MSRValue": "0x0100400400",
         "Offcore": "1",
         "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
         "SampleAfterValue": "100003",
         "UMask": "0x1"
     },
     {
-        "BriefDescription": "OCR.ALL_RFO.L3_HIT_F.NO_SNOOP_NEEDED  OCR.ALL_RFO.L3_HIT_F.NO_SNOOP_NEEDED",
+        "BriefDescription": "Counts L1 data cache hardware prefetch requests and software prefetch requests  OCR.PF_L1D_AND_SW.SUPPLIER_NONE.ANY_SNOOP",
         "Counter": "0,1,2,3",
         "CounterHTOff": "0,1,2,3",
         "EventCode": "0xB7, 0xBB",
-        "EventName": "OCR.ALL_RFO.L3_HIT_F.NO_SNOOP_NEEDED",
+        "EventName": "OCR.PF_L1D_AND_SW.SUPPLIER_NONE.ANY_SNOOP",
         "MSRIndex": "0x1a6,0x1a7",
-        "MSRValue": "0x0100200122",
+        "MSRValue": "0x3F80020400",
         "Offcore": "1",
         "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
         "SampleAfterValue": "100003",
         "UMask": "0x1"
     },
     {
-        "BriefDescription": "OCR.ALL_PF_DATA_RD.L3_HIT_E.NO_SNOOP_NEEDED  OCR.ALL_PF_DATA_RD.L3_HIT_E.NO_SNOOP_NEEDED",
+        "BriefDescription": "Counts L1 data cache hardware prefetch requests and software prefetch requests  OCR.PF_L1D_AND_SW.SUPPLIER_NONE.HITM_OTHER_CORE",
         "Counter": "0,1,2,3",
         "CounterHTOff": "0,1,2,3",
         "EventCode": "0xB7, 0xBB",
-        "EventName": "OCR.ALL_PF_DATA_RD.L3_HIT_E.NO_SNOOP_NEEDED",
+        "EventName": "OCR.PF_L1D_AND_SW.SUPPLIER_NONE.HITM_OTHER_CORE",
         "MSRIndex": "0x1a6,0x1a7",
-        "MSRValue": "0x0100080490",
+        "MSRValue": "0x1000020400",
         "Offcore": "1",
         "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
         "SampleAfterValue": "100003",
         "UMask": "0x1"
     },
     {
-        "BriefDescription": "Counts all demand code reads",
+        "BriefDescription": "Counts L1 data cache hardware prefetch requests and software prefetch requests  OCR.PF_L1D_AND_SW.SUPPLIER_NONE.HIT_OTHER_CORE_FWD",
         "Counter": "0,1,2,3",
         "CounterHTOff": "0,1,2,3",
         "EventCode": "0xB7, 0xBB",
-        "EventName": "OCR.DEMAND_CODE_RD.L3_HIT_M.SNOOP_MISS",
+        "EventName": "OCR.PF_L1D_AND_SW.SUPPLIER_NONE.HIT_OTHER_CORE_FWD",
         "MSRIndex": "0x1a6,0x1a7",
-        "MSRValue": "0x0200040004",
+        "MSRValue": "0x0800020400",
         "Offcore": "1",
         "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
         "SampleAfterValue": "100003",
         "UMask": "0x1"
     },
     {
-        "BriefDescription": "Counts all demand data writes (RFOs)  OCR.DEMAND_RFO.L3_HIT_M.HIT_OTHER_CORE_NO_FWD",
+        "BriefDescription": "Counts L1 data cache hardware prefetch requests and software prefetch requests  OCR.PF_L1D_AND_SW.SUPPLIER_NONE.HIT_OTHER_CORE_NO_FWD",
         "Counter": "0,1,2,3",
         "CounterHTOff": "0,1,2,3",
         "EventCode": "0xB7, 0xBB",
-        "EventName": "OCR.DEMAND_RFO.L3_HIT_M.HIT_OTHER_CORE_NO_FWD",
+        "EventName": "OCR.PF_L1D_AND_SW.SUPPLIER_NONE.HIT_OTHER_CORE_NO_FWD",
         "MSRIndex": "0x1a6,0x1a7",
-        "MSRValue": "0x0400040002",
+        "MSRValue": "0x0400020400",
         "Offcore": "1",
         "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
         "SampleAfterValue": "100003",
         "UMask": "0x1"
     },
     {
-        "BriefDescription": "Counts demand data reads  OCR.DEMAND_DATA_RD.L3_HIT_M.HITM_OTHER_CORE",
+        "BriefDescription": "Counts L1 data cache hardware prefetch requests and software prefetch requests  OCR.PF_L1D_AND_SW.SUPPLIER_NONE.NO_SNOOP_NEEDED",
         "Counter": "0,1,2,3",
         "CounterHTOff": "0,1,2,3",
         "EventCode": "0xB7, 0xBB",
-        "EventName": "OCR.DEMAND_DATA_RD.L3_HIT_M.HITM_OTHER_CORE",
+        "EventName": "OCR.PF_L1D_AND_SW.SUPPLIER_NONE.NO_SNOOP_NEEDED",
         "MSRIndex": "0x1a6,0x1a7",
-        "MSRValue": "0x1000040001",
+        "MSRValue": "0x0100020400",
         "Offcore": "1",
         "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
         "SampleAfterValue": "100003",
         "UMask": "0x1"
     },
     {
-        "BriefDescription": "Counts L1 data cache hardware prefetch requests and software prefetch requests  OCR.PF_L1D_AND_SW.L3_HIT_F.HITM_OTHER_CORE",
+        "BriefDescription": "Counts L1 data cache hardware prefetch requests and software prefetch requests",
         "Counter": "0,1,2,3",
         "CounterHTOff": "0,1,2,3",
         "EventCode": "0xB7, 0xBB",
-        "EventName": "OCR.PF_L1D_AND_SW.L3_HIT_F.HITM_OTHER_CORE",
+        "EventName": "OCR.PF_L1D_AND_SW.SUPPLIER_NONE.SNOOP_MISS",
         "MSRIndex": "0x1a6,0x1a7",
-        "MSRValue": "0x1000200400",
+        "MSRValue": "0x0200020400",
         "Offcore": "1",
         "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
         "SampleAfterValue": "100003",
@@ -6182,1325 +6171,1309 @@
         "Counter": "0,1,2,3",
         "CounterHTOff": "0,1,2,3",
         "EventCode": "0xB7, 0xBB",
-        "EventName": "OCR.PF_L1D_AND_SW.L3_HIT_S.SNOOP_MISS",
+        "EventName": "OCR.PF_L1D_AND_SW.SUPPLIER_NONE.SNOOP_NONE",
         "MSRIndex": "0x1a6,0x1a7",
-        "MSRValue": "0x0200100400",
+        "MSRValue": "0x0080020400",
         "Offcore": "1",
         "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
         "SampleAfterValue": "100003",
         "UMask": "0x1"
     },
     {
-        "BriefDescription": "Counts demand data reads OCR.DEMAND_DATA_RD.PMM_HIT_LOCAL_PMM.SNOOP_NOT_NEEDED",
+        "BriefDescription": "Counts prefetch (that bring data to L2) data reads have any response type.",
         "Counter": "0,1,2,3",
         "CounterHTOff": "0,1,2,3",
         "EventCode": "0xB7, 0xBB",
-        "EventName": "OCR.DEMAND_DATA_RD.PMM_HIT_LOCAL_PMM.SNOOP_NOT_NEEDED",
+        "EventName": "OCR.PF_L2_DATA_RD.ANY_RESPONSE",
         "MSRIndex": "0x1a6,0x1a7",
-        "MSRValue": "0x0100400001",
+        "MSRValue": "0x0000010010",
         "Offcore": "1",
         "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
         "SampleAfterValue": "100003",
         "UMask": "0x1"
     },
     {
-        "BriefDescription": "OCR.ALL_RFO.L3_HIT_M.NO_SNOOP_NEEDED  OCR.ALL_RFO.L3_HIT_M.NO_SNOOP_NEEDED",
+        "BriefDescription": "Counts prefetch (that bring data to L2) data reads OCR.PF_L2_DATA_RD.L3_HIT.ANY_SNOOP OCR.PF_L2_DATA_RD.L3_HIT.ANY_SNOOP",
         "Counter": "0,1,2,3",
         "CounterHTOff": "0,1,2,3",
         "EventCode": "0xB7, 0xBB",
-        "EventName": "OCR.ALL_RFO.L3_HIT_M.NO_SNOOP_NEEDED",
+        "EventName": "OCR.PF_L2_DATA_RD.L3_HIT.ANY_SNOOP",
         "MSRIndex": "0x1a6,0x1a7",
-        "MSRValue": "0x0100040122",
+        "MSRValue": "0x3F803C0010",
         "Offcore": "1",
         "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
         "SampleAfterValue": "100003",
         "UMask": "0x1"
     },
     {
-        "BriefDescription": "Counts demand data reads OCR.DEMAND_DATA_RD.PMM_HIT_LOCAL_PMM.ANY_SNOOP",
+        "BriefDescription": "Counts prefetch (that bring data to L2) data reads OCR.PF_L2_DATA_RD.L3_HIT.HITM_OTHER_CORE OCR.PF_L2_DATA_RD.L3_HIT.HITM_OTHER_CORE",
         "Counter": "0,1,2,3",
         "CounterHTOff": "0,1,2,3",
         "EventCode": "0xB7, 0xBB",
-        "EventName": "OCR.DEMAND_DATA_RD.PMM_HIT_LOCAL_PMM.ANY_SNOOP",
+        "EventName": "OCR.PF_L2_DATA_RD.L3_HIT.HITM_OTHER_CORE",
         "MSRIndex": "0x1a6,0x1a7",
-        "MSRValue": "0x3F80400001",
+        "MSRValue": "0x10003C0010",
         "Offcore": "1",
         "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
         "SampleAfterValue": "100003",
         "UMask": "0x1"
     },
     {
-        "BriefDescription": "OCR.ALL_DATA_RD.L3_HIT_F.NO_SNOOP_NEEDED  OCR.ALL_DATA_RD.L3_HIT_F.NO_SNOOP_NEEDED",
+        "BriefDescription": "Counts prefetch (that bring data to L2) data reads OCR.PF_L2_DATA_RD.L3_HIT.HIT_OTHER_CORE_FWD OCR.PF_L2_DATA_RD.L3_HIT.HIT_OTHER_CORE_FWD",
         "Counter": "0,1,2,3",
         "CounterHTOff": "0,1,2,3",
         "EventCode": "0xB7, 0xBB",
-        "EventName": "OCR.ALL_DATA_RD.L3_HIT_F.NO_SNOOP_NEEDED",
+        "EventName": "OCR.PF_L2_DATA_RD.L3_HIT.HIT_OTHER_CORE_FWD",
         "MSRIndex": "0x1a6,0x1a7",
-        "MSRValue": "0x0100200491",
+        "MSRValue": "0x08003C0010",
         "Offcore": "1",
         "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
         "SampleAfterValue": "100003",
         "UMask": "0x1"
     },
     {
-        "BriefDescription": "Counts all demand data writes (RFOs) OCR.DEMAND_RFO.PMM_HIT_LOCAL_PMM.ANY_SNOOP",
+        "BriefDescription": "Counts prefetch (that bring data to L2) data reads OCR.PF_L2_DATA_RD.L3_HIT.HIT_OTHER_CORE_NO_FWD OCR.PF_L2_DATA_RD.L3_HIT.HIT_OTHER_CORE_NO_FWD",
         "Counter": "0,1,2,3",
         "CounterHTOff": "0,1,2,3",
         "EventCode": "0xB7, 0xBB",
-        "EventName": "OCR.DEMAND_RFO.PMM_HIT_LOCAL_PMM.ANY_SNOOP",
+        "EventName": "OCR.PF_L2_DATA_RD.L3_HIT.HIT_OTHER_CORE_NO_FWD",
         "MSRIndex": "0x1a6,0x1a7",
-        "MSRValue": "0x3F80400002",
+        "MSRValue": "0x04003C0010",
         "Offcore": "1",
         "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
         "SampleAfterValue": "100003",
         "UMask": "0x1"
     },
     {
-        "BriefDescription": "Counts prefetch (that bring data to L2) data reads OCR.PF_L2_DATA_RD.PMM_HIT_LOCAL_PMM.ANY_SNOOP",
+        "BriefDescription": "Counts prefetch (that bring data to L2) data reads OCR.PF_L2_DATA_RD.L3_HIT.NO_SNOOP_NEEDED OCR.PF_L2_DATA_RD.L3_HIT.NO_SNOOP_NEEDED",
         "Counter": "0,1,2,3",
         "CounterHTOff": "0,1,2,3",
         "EventCode": "0xB7, 0xBB",
-        "EventName": "OCR.PF_L2_DATA_RD.PMM_HIT_LOCAL_PMM.ANY_SNOOP",
+        "EventName": "OCR.PF_L2_DATA_RD.L3_HIT.NO_SNOOP_NEEDED",
         "MSRIndex": "0x1a6,0x1a7",
-        "MSRValue": "0x3F80400010",
+        "MSRValue": "0x01003C0010",
         "Offcore": "1",
         "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
         "SampleAfterValue": "100003",
         "UMask": "0x1"
     },
     {
-        "BriefDescription": "Counts any other requests OCR.OTHER.L3_HIT.ANY_SNOOP OCR.OTHER.L3_HIT.ANY_SNOOP",
+        "BriefDescription": "Counts prefetch (that bring data to L2) data reads",
         "Counter": "0,1,2,3",
         "CounterHTOff": "0,1,2,3",
         "EventCode": "0xB7, 0xBB",
-        "EventName": "OCR.OTHER.L3_HIT.ANY_SNOOP",
+        "EventName": "OCR.PF_L2_DATA_RD.L3_HIT.SNOOP_HIT_WITH_FWD",
         "MSRIndex": "0x1a6,0x1a7",
-        "MSRValue": "0x3F803C8000",
+        "MSRValue": "0x08007C0010",
         "Offcore": "1",
         "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
         "SampleAfterValue": "100003",
         "UMask": "0x1"
     },
     {
-        "BriefDescription": "Counts all prefetch (that bring data to LLC only) data reads  OCR.PF_L3_DATA_RD.L3_HIT_F.NO_SNOOP_NEEDED",
+        "BriefDescription": "Counts prefetch (that bring data to L2) data reads OCR.PF_L2_DATA_RD.L3_HIT.SNOOP_MISS",
         "Counter": "0,1,2,3",
         "CounterHTOff": "0,1,2,3",
         "EventCode": "0xB7, 0xBB",
-        "EventName": "OCR.PF_L3_DATA_RD.L3_HIT_F.NO_SNOOP_NEEDED",
+        "EventName": "OCR.PF_L2_DATA_RD.L3_HIT.SNOOP_MISS",
         "MSRIndex": "0x1a6,0x1a7",
-        "MSRValue": "0x0100200080",
+        "MSRValue": "0x02003C0010",
         "Offcore": "1",
         "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
         "SampleAfterValue": "100003",
         "UMask": "0x1"
     },
     {
-        "BriefDescription": "OCR.ALL_PF_RFO.L3_HIT_S.SNOOP_MISS",
+        "BriefDescription": "Counts prefetch (that bring data to L2) data reads OCR.PF_L2_DATA_RD.L3_HIT.SNOOP_NONE",
         "Counter": "0,1,2,3",
         "CounterHTOff": "0,1,2,3",
         "EventCode": "0xB7, 0xBB",
-        "EventName": "OCR.ALL_PF_RFO.L3_HIT_S.SNOOP_MISS",
+        "EventName": "OCR.PF_L2_DATA_RD.L3_HIT.SNOOP_NONE",
         "MSRIndex": "0x1a6,0x1a7",
-        "MSRValue": "0x0200100120",
+        "MSRValue": "0x00803C0010",
         "Offcore": "1",
         "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
         "SampleAfterValue": "100003",
         "UMask": "0x1"
     },
     {
-        "BriefDescription": "Counts prefetch (that bring data to L2) data reads OCR.PF_L2_DATA_RD.L3_HIT.HITM_OTHER_CORE OCR.PF_L2_DATA_RD.L3_HIT.HITM_OTHER_CORE",
+        "BriefDescription": "Counts prefetch (that bring data to L2) data reads  OCR.PF_L2_DATA_RD.L3_HIT_E.ANY_SNOOP",
         "Counter": "0,1,2,3",
         "CounterHTOff": "0,1,2,3",
         "EventCode": "0xB7, 0xBB",
-        "EventName": "OCR.PF_L2_DATA_RD.L3_HIT.HITM_OTHER_CORE",
+        "EventName": "OCR.PF_L2_DATA_RD.L3_HIT_E.ANY_SNOOP",
         "MSRIndex": "0x1a6,0x1a7",
-        "MSRValue": "0x10003C0010",
+        "MSRValue": "0x3F80080010",
         "Offcore": "1",
         "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
         "SampleAfterValue": "100003",
         "UMask": "0x1"
     },
     {
-        "BriefDescription": "OCR.ALL_READS.L3_HIT_E.HITM_OTHER_CORE  OCR.ALL_READS.L3_HIT_E.HITM_OTHER_CORE",
+        "BriefDescription": "Counts prefetch (that bring data to L2) data reads  OCR.PF_L2_DATA_RD.L3_HIT_E.HITM_OTHER_CORE",
         "Counter": "0,1,2,3",
         "CounterHTOff": "0,1,2,3",
         "EventCode": "0xB7, 0xBB",
-        "EventName": "OCR.ALL_READS.L3_HIT_E.HITM_OTHER_CORE",
+        "EventName": "OCR.PF_L2_DATA_RD.L3_HIT_E.HITM_OTHER_CORE",
         "MSRIndex": "0x1a6,0x1a7",
-        "MSRValue": "0x10000807F7",
+        "MSRValue": "0x1000080010",
         "Offcore": "1",
         "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
         "SampleAfterValue": "100003",
         "UMask": "0x1"
     },
     {
-        "BriefDescription": "Counts prefetch (that bring data to L2) data reads",
+        "BriefDescription": "Counts prefetch (that bring data to L2) data reads  OCR.PF_L2_DATA_RD.L3_HIT_E.HIT_OTHER_CORE_FWD",
         "Counter": "0,1,2,3",
         "CounterHTOff": "0,1,2,3",
         "EventCode": "0xB7, 0xBB",
-        "EventName": "OCR.PF_L2_DATA_RD.L3_HIT_S.SNOOP_NONE",
+        "EventName": "OCR.PF_L2_DATA_RD.L3_HIT_E.HIT_OTHER_CORE_FWD",
         "MSRIndex": "0x1a6,0x1a7",
-        "MSRValue": "0x0080100010",
+        "MSRValue": "0x0800080010",
         "Offcore": "1",
         "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
         "SampleAfterValue": "100003",
         "UMask": "0x1"
     },
     {
-        "BriefDescription": "Counts all demand code reads OCR.DEMAND_CODE_RD.PMM_HIT_LOCAL_PMM.ANY_SNOOP",
+        "BriefDescription": "Counts prefetch (that bring data to L2) data reads  OCR.PF_L2_DATA_RD.L3_HIT_E.HIT_OTHER_CORE_NO_FWD",
         "Counter": "0,1,2,3",
         "CounterHTOff": "0,1,2,3",
         "EventCode": "0xB7, 0xBB",
-        "EventName": "OCR.DEMAND_CODE_RD.PMM_HIT_LOCAL_PMM.ANY_SNOOP",
+        "EventName": "OCR.PF_L2_DATA_RD.L3_HIT_E.HIT_OTHER_CORE_NO_FWD",
         "MSRIndex": "0x1a6,0x1a7",
-        "MSRValue": "0x3F80400004",
+        "MSRValue": "0x0400080010",
         "Offcore": "1",
         "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
         "SampleAfterValue": "100003",
         "UMask": "0x1"
     },
     {
-        "BriefDescription": "OCR.ALL_DATA_RD.L3_HIT_E.HIT_OTHER_CORE_NO_FWD  OCR.ALL_DATA_RD.L3_HIT_E.HIT_OTHER_CORE_NO_FWD",
+        "BriefDescription": "Counts prefetch (that bring data to L2) data reads  OCR.PF_L2_DATA_RD.L3_HIT_E.NO_SNOOP_NEEDED",
         "Counter": "0,1,2,3",
         "CounterHTOff": "0,1,2,3",
         "EventCode": "0xB7, 0xBB",
-        "EventName": "OCR.ALL_DATA_RD.L3_HIT_E.HIT_OTHER_CORE_NO_FWD",
+        "EventName": "OCR.PF_L2_DATA_RD.L3_HIT_E.NO_SNOOP_NEEDED",
         "MSRIndex": "0x1a6,0x1a7",
-        "MSRValue": "0x0400080491",
+        "MSRValue": "0x0100080010",
         "Offcore": "1",
         "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
         "SampleAfterValue": "100003",
         "UMask": "0x1"
     },
     {
-        "BriefDescription": "Counts demand data reads  OCR.DEMAND_DATA_RD.SUPPLIER_NONE.HIT_OTHER_CORE_FWD",
+        "BriefDescription": "Counts prefetch (that bring data to L2) data reads",
         "Counter": "0,1,2,3",
         "CounterHTOff": "0,1,2,3",
         "EventCode": "0xB7, 0xBB",
-        "EventName": "OCR.DEMAND_DATA_RD.SUPPLIER_NONE.HIT_OTHER_CORE_FWD",
+        "EventName": "OCR.PF_L2_DATA_RD.L3_HIT_E.SNOOP_MISS",
         "MSRIndex": "0x1a6,0x1a7",
-        "MSRValue": "0x0800020001",
+        "MSRValue": "0x0200080010",
         "Offcore": "1",
         "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
         "SampleAfterValue": "100003",
         "UMask": "0x1"
     },
     {
-        "BriefDescription": "Counts any other requests  OCR.OTHER.L3_HIT_F.HIT_OTHER_CORE_FWD",
+        "BriefDescription": "Counts prefetch (that bring data to L2) data reads",
         "Counter": "0,1,2,3",
         "CounterHTOff": "0,1,2,3",
         "EventCode": "0xB7, 0xBB",
-        "EventName": "OCR.OTHER.L3_HIT_F.HIT_OTHER_CORE_FWD",
+        "EventName": "OCR.PF_L2_DATA_RD.L3_HIT_E.SNOOP_NONE",
         "MSRIndex": "0x1a6,0x1a7",
-        "MSRValue": "0x0800208000",
+        "MSRValue": "0x0080080010",
         "Offcore": "1",
         "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
         "SampleAfterValue": "100003",
         "UMask": "0x1"
     },
     {
-        "BriefDescription": "Counts all prefetch (that bring data to LLC only) RFOs",
+        "BriefDescription": "Counts prefetch (that bring data to L2) data reads  OCR.PF_L2_DATA_RD.L3_HIT_F.ANY_SNOOP",
         "Counter": "0,1,2,3",
         "CounterHTOff": "0,1,2,3",
         "EventCode": "0xB7, 0xBB",
-        "EventName": "OCR.PF_L3_RFO.SUPPLIER_NONE.SNOOP_NONE",
+        "EventName": "OCR.PF_L2_DATA_RD.L3_HIT_F.ANY_SNOOP",
         "MSRIndex": "0x1a6,0x1a7",
-        "MSRValue": "0x0080020100",
+        "MSRValue": "0x3F80200010",
         "Offcore": "1",
         "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
         "SampleAfterValue": "100003",
         "UMask": "0x1"
     },
     {
-        "BriefDescription": "Counts demand data reads OCR.DEMAND_DATA_RD.L3_HIT.ANY_SNOOP OCR.DEMAND_DATA_RD.L3_HIT.ANY_SNOOP",
+        "BriefDescription": "Counts prefetch (that bring data to L2) data reads  OCR.PF_L2_DATA_RD.L3_HIT_F.HITM_OTHER_CORE",
         "Counter": "0,1,2,3",
         "CounterHTOff": "0,1,2,3",
         "EventCode": "0xB7, 0xBB",
-        "EventName": "OCR.DEMAND_DATA_RD.L3_HIT.ANY_SNOOP",
+        "EventName": "OCR.PF_L2_DATA_RD.L3_HIT_F.HITM_OTHER_CORE",
         "MSRIndex": "0x1a6,0x1a7",
-        "MSRValue": "0x3F803C0001",
+        "MSRValue": "0x1000200010",
         "Offcore": "1",
         "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
         "SampleAfterValue": "100003",
         "UMask": "0x1"
     },
     {
-        "BriefDescription": "Counts all prefetch (that bring data to LLC only) data reads",
+        "BriefDescription": "Counts prefetch (that bring data to L2) data reads  OCR.PF_L2_DATA_RD.L3_HIT_F.HIT_OTHER_CORE_FWD",
         "Counter": "0,1,2,3",
         "CounterHTOff": "0,1,2,3",
         "EventCode": "0xB7, 0xBB",
-        "EventName": "OCR.PF_L3_DATA_RD.L3_HIT_M.SNOOP_MISS",
+        "EventName": "OCR.PF_L2_DATA_RD.L3_HIT_F.HIT_OTHER_CORE_FWD",
         "MSRIndex": "0x1a6,0x1a7",
-        "MSRValue": "0x0200040080",
+        "MSRValue": "0x0800200010",
         "Offcore": "1",
         "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
         "SampleAfterValue": "100003",
         "UMask": "0x1"
     },
     {
-        "BriefDescription": "Counts L1 data cache hardware prefetch requests and software prefetch requests OCR.PF_L1D_AND_SW.L3_HIT.SNOOP_MISS",
+        "BriefDescription": "Counts prefetch (that bring data to L2) data reads  OCR.PF_L2_DATA_RD.L3_HIT_F.HIT_OTHER_CORE_NO_FWD",
         "Counter": "0,1,2,3",
         "CounterHTOff": "0,1,2,3",
         "EventCode": "0xB7, 0xBB",
-        "EventName": "OCR.PF_L1D_AND_SW.L3_HIT.SNOOP_MISS",
+        "EventName": "OCR.PF_L2_DATA_RD.L3_HIT_F.HIT_OTHER_CORE_NO_FWD",
         "MSRIndex": "0x1a6,0x1a7",
-        "MSRValue": "0x02003C0400",
+        "MSRValue": "0x0400200010",
         "Offcore": "1",
         "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
         "SampleAfterValue": "100003",
         "UMask": "0x1"
     },
     {
-        "BriefDescription": "OCR.ALL_PF_RFO.L3_HIT.SNOOP_HIT_WITH_FWD",
+        "BriefDescription": "Counts prefetch (that bring data to L2) data reads  OCR.PF_L2_DATA_RD.L3_HIT_F.NO_SNOOP_NEEDED",
         "Counter": "0,1,2,3",
         "CounterHTOff": "0,1,2,3",
         "EventCode": "0xB7, 0xBB",
-        "EventName": "OCR.ALL_PF_RFO.L3_HIT.SNOOP_HIT_WITH_FWD",
+        "EventName": "OCR.PF_L2_DATA_RD.L3_HIT_F.NO_SNOOP_NEEDED",
         "MSRIndex": "0x1a6,0x1a7",
-        "MSRValue": "0x08007C0120",
+        "MSRValue": "0x0100200010",
         "Offcore": "1",
         "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
         "SampleAfterValue": "100003",
         "UMask": "0x1"
     },
     {
-        "BriefDescription": "OCR.ALL_PF_RFO.SUPPLIER_NONE.HITM_OTHER_CORE  OCR.ALL_PF_RFO.SUPPLIER_NONE.HITM_OTHER_CORE",
+        "BriefDescription": "Counts prefetch (that bring data to L2) data reads",
         "Counter": "0,1,2,3",
         "CounterHTOff": "0,1,2,3",
         "EventCode": "0xB7, 0xBB",
-        "EventName": "OCR.ALL_PF_RFO.SUPPLIER_NONE.HITM_OTHER_CORE",
+        "EventName": "OCR.PF_L2_DATA_RD.L3_HIT_F.SNOOP_MISS",
         "MSRIndex": "0x1a6,0x1a7",
-        "MSRValue": "0x1000020120",
+        "MSRValue": "0x0200200010",
         "Offcore": "1",
         "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
         "SampleAfterValue": "100003",
         "UMask": "0x1"
     },
     {
-        "BriefDescription": "Counts any other requests  OCR.OTHER.L3_HIT_S.HIT_OTHER_CORE_NO_FWD",
+        "BriefDescription": "Counts prefetch (that bring data to L2) data reads",
         "Counter": "0,1,2,3",
         "CounterHTOff": "0,1,2,3",
         "EventCode": "0xB7, 0xBB",
-        "EventName": "OCR.OTHER.L3_HIT_S.HIT_OTHER_CORE_NO_FWD",
+        "EventName": "OCR.PF_L2_DATA_RD.L3_HIT_F.SNOOP_NONE",
         "MSRIndex": "0x1a6,0x1a7",
-        "MSRValue": "0x0400108000",
+        "MSRValue": "0x0080200010",
         "Offcore": "1",
         "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
         "SampleAfterValue": "100003",
         "UMask": "0x1"
     },
     {
-        "BriefDescription": "OCR.ALL_PF_DATA_RD.L3_HIT.HIT_OTHER_CORE_NO_FWD OCR.ALL_PF_DATA_RD.L3_HIT.HIT_OTHER_CORE_NO_FWD OCR.ALL_PF_DATA_RD.L3_HIT.HIT_OTHER_CORE_NO_FWD",
+        "BriefDescription": "Counts prefetch (that bring data to L2) data reads  OCR.PF_L2_DATA_RD.L3_HIT_M.ANY_SNOOP",
         "Counter": "0,1,2,3",
         "CounterHTOff": "0,1,2,3",
         "EventCode": "0xB7, 0xBB",
-        "EventName": "OCR.ALL_PF_DATA_RD.L3_HIT.HIT_OTHER_CORE_NO_FWD",
+        "EventName": "OCR.PF_L2_DATA_RD.L3_HIT_M.ANY_SNOOP",
         "MSRIndex": "0x1a6,0x1a7",
-        "MSRValue": "0x04003C0490",
+        "MSRValue": "0x3F80040010",
         "Offcore": "1",
         "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
         "SampleAfterValue": "100003",
         "UMask": "0x1"
     },
     {
-        "BriefDescription": "Counts any other requests",
+        "BriefDescription": "Counts prefetch (that bring data to L2) data reads  OCR.PF_L2_DATA_RD.L3_HIT_M.HITM_OTHER_CORE",
         "Counter": "0,1,2,3",
         "CounterHTOff": "0,1,2,3",
         "EventCode": "0xB7, 0xBB",
-        "EventName": "OCR.OTHER.L3_HIT_S.SNOOP_NONE",
+        "EventName": "OCR.PF_L2_DATA_RD.L3_HIT_M.HITM_OTHER_CORE",
         "MSRIndex": "0x1a6,0x1a7",
-        "MSRValue": "0x0080108000",
+        "MSRValue": "0x1000040010",
         "Offcore": "1",
         "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
         "SampleAfterValue": "100003",
         "UMask": "0x1"
     },
     {
-        "BriefDescription": "Counts all prefetch (that bring data to LLC only) data reads",
+        "BriefDescription": "Counts prefetch (that bring data to L2) data reads  OCR.PF_L2_DATA_RD.L3_HIT_M.HIT_OTHER_CORE_FWD",
         "Counter": "0,1,2,3",
         "CounterHTOff": "0,1,2,3",
         "EventCode": "0xB7, 0xBB",
-        "EventName": "OCR.PF_L3_DATA_RD.SUPPLIER_NONE.SNOOP_MISS",
+        "EventName": "OCR.PF_L2_DATA_RD.L3_HIT_M.HIT_OTHER_CORE_FWD",
         "MSRIndex": "0x1a6,0x1a7",
-        "MSRValue": "0x0200020080",
+        "MSRValue": "0x0800040010",
         "Offcore": "1",
         "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
         "SampleAfterValue": "100003",
         "UMask": "0x1"
     },
     {
-        "BriefDescription": "OCR.ALL_READS.PMM_HIT_LOCAL_PMM.SNOOP_NOT_NEEDED OCR.ALL_READS.PMM_HIT_LOCAL_PMM.SNOOP_NOT_NEEDED",
+        "BriefDescription": "Counts prefetch (that bring data to L2) data reads  OCR.PF_L2_DATA_RD.L3_HIT_M.HIT_OTHER_CORE_NO_FWD",
         "Counter": "0,1,2,3",
         "CounterHTOff": "0,1,2,3",
         "EventCode": "0xB7, 0xBB",
-        "EventName": "OCR.ALL_READS.PMM_HIT_LOCAL_PMM.SNOOP_NOT_NEEDED",
+        "EventName": "OCR.PF_L2_DATA_RD.L3_HIT_M.HIT_OTHER_CORE_NO_FWD",
         "MSRIndex": "0x1a6,0x1a7",
-        "MSRValue": "0x01004007F7",
+        "MSRValue": "0x0400040010",
         "Offcore": "1",
         "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
         "SampleAfterValue": "100003",
         "UMask": "0x1"
     },
     {
-        "BriefDescription": "Counts prefetch (that bring data to L2) data reads  OCR.PF_L2_DATA_RD.L3_HIT_M.ANY_SNOOP",
+        "BriefDescription": "Counts prefetch (that bring data to L2) data reads  OCR.PF_L2_DATA_RD.L3_HIT_M.NO_SNOOP_NEEDED",
         "Counter": "0,1,2,3",
         "CounterHTOff": "0,1,2,3",
         "EventCode": "0xB7, 0xBB",
-        "EventName": "OCR.PF_L2_DATA_RD.L3_HIT_M.ANY_SNOOP",
+        "EventName": "OCR.PF_L2_DATA_RD.L3_HIT_M.NO_SNOOP_NEEDED",
         "MSRIndex": "0x1a6,0x1a7",
-        "MSRValue": "0x3F80040010",
+        "MSRValue": "0x0100040010",
         "Offcore": "1",
         "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
         "SampleAfterValue": "100003",
         "UMask": "0x1"
     },
     {
-        "BriefDescription": "Counts prefetch (that bring data to L2) data reads  OCR.PF_L2_DATA_RD.L3_HIT_F.ANY_SNOOP",
+        "BriefDescription": "Counts prefetch (that bring data to L2) data reads",
         "Counter": "0,1,2,3",
         "CounterHTOff": "0,1,2,3",
         "EventCode": "0xB7, 0xBB",
-        "EventName": "OCR.PF_L2_DATA_RD.L3_HIT_F.ANY_SNOOP",
+        "EventName": "OCR.PF_L2_DATA_RD.L3_HIT_M.SNOOP_MISS",
         "MSRIndex": "0x1a6,0x1a7",
-        "MSRValue": "0x3F80200010",
+        "MSRValue": "0x0200040010",
         "Offcore": "1",
         "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
         "SampleAfterValue": "100003",
         "UMask": "0x1"
     },
     {
-        "BriefDescription": "Counts all demand code reads OCR.DEMAND_CODE_RD.L3_HIT.NO_SNOOP_NEEDED OCR.DEMAND_CODE_RD.L3_HIT.NO_SNOOP_NEEDED",
+        "BriefDescription": "Counts prefetch (that bring data to L2) data reads",
         "Counter": "0,1,2,3",
         "CounterHTOff": "0,1,2,3",
         "EventCode": "0xB7, 0xBB",
-        "EventName": "OCR.DEMAND_CODE_RD.L3_HIT.NO_SNOOP_NEEDED",
+        "EventName": "OCR.PF_L2_DATA_RD.L3_HIT_M.SNOOP_NONE",
         "MSRIndex": "0x1a6,0x1a7",
-        "MSRValue": "0x01003C0004",
+        "MSRValue": "0x0080040010",
         "Offcore": "1",
         "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
         "SampleAfterValue": "100003",
         "UMask": "0x1"
     },
     {
-        "BriefDescription": "Counts all prefetch (that bring data to LLC only) RFOs",
+        "BriefDescription": "Counts prefetch (that bring data to L2) data reads  OCR.PF_L2_DATA_RD.L3_HIT_S.ANY_SNOOP",
         "Counter": "0,1,2,3",
         "CounterHTOff": "0,1,2,3",
         "EventCode": "0xB7, 0xBB",
-        "EventName": "OCR.PF_L3_RFO.L3_HIT_F.SNOOP_MISS",
+        "EventName": "OCR.PF_L2_DATA_RD.L3_HIT_S.ANY_SNOOP",
         "MSRIndex": "0x1a6,0x1a7",
-        "MSRValue": "0x0200200100",
+        "MSRValue": "0x3F80100010",
         "Offcore": "1",
         "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
         "SampleAfterValue": "100003",
         "UMask": "0x1"
     },
     {
-        "BriefDescription": "Counts all prefetch (that bring data to LLC only) data reads",
+        "BriefDescription": "Counts prefetch (that bring data to L2) data reads  OCR.PF_L2_DATA_RD.L3_HIT_S.HITM_OTHER_CORE",
         "Counter": "0,1,2,3",
         "CounterHTOff": "0,1,2,3",
         "EventCode": "0xB7, 0xBB",
-        "EventName": "OCR.PF_L3_DATA_RD.L3_HIT_F.SNOOP_NONE",
+        "EventName": "OCR.PF_L2_DATA_RD.L3_HIT_S.HITM_OTHER_CORE",
         "MSRIndex": "0x1a6,0x1a7",
-        "MSRValue": "0x0080200080",
+        "MSRValue": "0x1000100010",
         "Offcore": "1",
         "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
         "SampleAfterValue": "100003",
         "UMask": "0x1"
     },
     {
-        "BriefDescription": "Counts all prefetch (that bring data to L2) RFOs",
+        "BriefDescription": "Counts prefetch (that bring data to L2) data reads  OCR.PF_L2_DATA_RD.L3_HIT_S.HIT_OTHER_CORE_FWD",
         "Counter": "0,1,2,3",
         "CounterHTOff": "0,1,2,3",
         "EventCode": "0xB7, 0xBB",
-        "EventName": "OCR.PF_L2_RFO.L3_HIT_M.SNOOP_MISS",
+        "EventName": "OCR.PF_L2_DATA_RD.L3_HIT_S.HIT_OTHER_CORE_FWD",
         "MSRIndex": "0x1a6,0x1a7",
-        "MSRValue": "0x0200040020",
+        "MSRValue": "0x0800100010",
         "Offcore": "1",
         "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
         "SampleAfterValue": "100003",
         "UMask": "0x1"
     },
     {
-        "BriefDescription": "Counts all prefetch (that bring data to LLC only) data reads  OCR.PF_L3_DATA_RD.SUPPLIER_NONE.NO_SNOOP_NEEDED",
+        "BriefDescription": "Counts prefetch (that bring data to L2) data reads  OCR.PF_L2_DATA_RD.L3_HIT_S.HIT_OTHER_CORE_NO_FWD",
         "Counter": "0,1,2,3",
         "CounterHTOff": "0,1,2,3",
         "EventCode": "0xB7, 0xBB",
-        "EventName": "OCR.PF_L3_DATA_RD.SUPPLIER_NONE.NO_SNOOP_NEEDED",
+        "EventName": "OCR.PF_L2_DATA_RD.L3_HIT_S.HIT_OTHER_CORE_NO_FWD",
         "MSRIndex": "0x1a6,0x1a7",
-        "MSRValue": "0x0100020080",
+        "MSRValue": "0x0400100010",
         "Offcore": "1",
         "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
         "SampleAfterValue": "100003",
         "UMask": "0x1"
     },
     {
-        "BriefDescription": "OCR.ALL_READS.L3_HIT_M.HITM_OTHER_CORE  OCR.ALL_READS.L3_HIT_M.HITM_OTHER_CORE",
+        "BriefDescription": "Counts prefetch (that bring data to L2) data reads  OCR.PF_L2_DATA_RD.L3_HIT_S.NO_SNOOP_NEEDED",
         "Counter": "0,1,2,3",
         "CounterHTOff": "0,1,2,3",
         "EventCode": "0xB7, 0xBB",
-        "EventName": "OCR.ALL_READS.L3_HIT_M.HITM_OTHER_CORE",
+        "EventName": "OCR.PF_L2_DATA_RD.L3_HIT_S.NO_SNOOP_NEEDED",
         "MSRIndex": "0x1a6,0x1a7",
-        "MSRValue": "0x10000407F7",
+        "MSRValue": "0x0100100010",
         "Offcore": "1",
         "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
         "SampleAfterValue": "100003",
         "UMask": "0x1"
     },
     {
-        "BriefDescription": "Counts all prefetch (that bring data to LLC only) data reads",
+        "BriefDescription": "Counts prefetch (that bring data to L2) data reads",
         "Counter": "0,1,2,3",
         "CounterHTOff": "0,1,2,3",
         "EventCode": "0xB7, 0xBB",
-        "EventName": "OCR.PF_L3_DATA_RD.L3_HIT_S.SNOOP_MISS",
+        "EventName": "OCR.PF_L2_DATA_RD.L3_HIT_S.SNOOP_MISS",
         "MSRIndex": "0x1a6,0x1a7",
-        "MSRValue": "0x0200100080",
+        "MSRValue": "0x0200100010",
         "Offcore": "1",
         "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
         "SampleAfterValue": "100003",
         "UMask": "0x1"
     },
     {
-        "BriefDescription": "OCR.ALL_READS.SUPPLIER_NONE.HITM_OTHER_CORE  OCR.ALL_READS.SUPPLIER_NONE.HITM_OTHER_CORE",
+        "BriefDescription": "Counts prefetch (that bring data to L2) data reads",
         "Counter": "0,1,2,3",
         "CounterHTOff": "0,1,2,3",
         "EventCode": "0xB7, 0xBB",
-        "EventName": "OCR.ALL_READS.SUPPLIER_NONE.HITM_OTHER_CORE",
+        "EventName": "OCR.PF_L2_DATA_RD.L3_HIT_S.SNOOP_NONE",
         "MSRIndex": "0x1a6,0x1a7",
-        "MSRValue": "0x10000207F7",
+        "MSRValue": "0x0080100010",
         "Offcore": "1",
         "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
         "SampleAfterValue": "100003",
         "UMask": "0x1"
     },
     {
-        "BriefDescription": "OCR.ALL_DATA_RD.L3_HIT_M.ANY_SNOOP  OCR.ALL_DATA_RD.L3_HIT_M.ANY_SNOOP",
+        "BriefDescription": "Counts prefetch (that bring data to L2) data reads OCR.PF_L2_DATA_RD.PMM_HIT_LOCAL_PMM.ANY_SNOOP",
         "Counter": "0,1,2,3",
         "CounterHTOff": "0,1,2,3",
         "EventCode": "0xB7, 0xBB",
-        "EventName": "OCR.ALL_DATA_RD.L3_HIT_M.ANY_SNOOP",
+        "EventName": "OCR.PF_L2_DATA_RD.PMM_HIT_LOCAL_PMM.ANY_SNOOP",
         "MSRIndex": "0x1a6,0x1a7",
-        "MSRValue": "0x3F80040491",
+        "MSRValue": "0x3F80400010",
         "Offcore": "1",
         "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
         "SampleAfterValue": "100003",
         "UMask": "0x1"
     },
     {
-        "BriefDescription": "OCR.ALL_RFO.L3_HIT_E.NO_SNOOP_NEEDED  OCR.ALL_RFO.L3_HIT_E.NO_SNOOP_NEEDED",
+        "BriefDescription": "Counts prefetch (that bring data to L2) data reads OCR.PF_L2_DATA_RD.PMM_HIT_LOCAL_PMM.SNOOP_NONE",
         "Counter": "0,1,2,3",
         "CounterHTOff": "0,1,2,3",
         "EventCode": "0xB7, 0xBB",
-        "EventName": "OCR.ALL_RFO.L3_HIT_E.NO_SNOOP_NEEDED",
+        "EventName": "OCR.PF_L2_DATA_RD.PMM_HIT_LOCAL_PMM.SNOOP_NONE",
         "MSRIndex": "0x1a6,0x1a7",
-        "MSRValue": "0x0100080122",
+        "MSRValue": "0x0080400010",
         "Offcore": "1",
         "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
         "SampleAfterValue": "100003",
         "UMask": "0x1"
     },
     {
-        "BriefDescription": "Counts L1 data cache hardware prefetch requests and software prefetch requests  OCR.PF_L1D_AND_SW.L3_HIT_F.HIT_OTHER_CORE_NO_FWD",
+        "BriefDescription": "Counts prefetch (that bring data to L2) data reads OCR.PF_L2_DATA_RD.PMM_HIT_LOCAL_PMM.SNOOP_NOT_NEEDED",
         "Counter": "0,1,2,3",
         "CounterHTOff": "0,1,2,3",
         "EventCode": "0xB7, 0xBB",
-        "EventName": "OCR.PF_L1D_AND_SW.L3_HIT_F.HIT_OTHER_CORE_NO_FWD",
+        "EventName": "OCR.PF_L2_DATA_RD.PMM_HIT_LOCAL_PMM.SNOOP_NOT_NEEDED",
         "MSRIndex": "0x1a6,0x1a7",
-        "MSRValue": "0x0400200400",
+        "MSRValue": "0x0100400010",
         "Offcore": "1",
         "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
         "SampleAfterValue": "100003",
         "UMask": "0x1"
     },
     {
-        "BriefDescription": "Counts all prefetch (that bring data to LLC only) RFOs  OCR.PF_L3_RFO.L3_HIT_F.ANY_SNOOP",
+        "BriefDescription": "Counts prefetch (that bring data to L2) data reads  OCR.PF_L2_DATA_RD.SUPPLIER_NONE.ANY_SNOOP",
         "Counter": "0,1,2,3",
         "CounterHTOff": "0,1,2,3",
         "EventCode": "0xB7, 0xBB",
-        "EventName": "OCR.PF_L3_RFO.L3_HIT_F.ANY_SNOOP",
+        "EventName": "OCR.PF_L2_DATA_RD.SUPPLIER_NONE.ANY_SNOOP",
         "MSRIndex": "0x1a6,0x1a7",
-        "MSRValue": "0x3F80200100",
+        "MSRValue": "0x3F80020010",
         "Offcore": "1",
         "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
         "SampleAfterValue": "100003",
         "UMask": "0x1"
     },
     {
-        "BriefDescription": "Counts prefetch (that bring data to L2) data reads",
+        "BriefDescription": "Counts prefetch (that bring data to L2) data reads  OCR.PF_L2_DATA_RD.SUPPLIER_NONE.HITM_OTHER_CORE",
         "Counter": "0,1,2,3",
         "CounterHTOff": "0,1,2,3",
         "EventCode": "0xB7, 0xBB",
-        "EventName": "OCR.PF_L2_DATA_RD.L3_HIT_M.SNOOP_MISS",
+        "EventName": "OCR.PF_L2_DATA_RD.SUPPLIER_NONE.HITM_OTHER_CORE",
         "MSRIndex": "0x1a6,0x1a7",
-        "MSRValue": "0x0200040010",
+        "MSRValue": "0x1000020010",
         "Offcore": "1",
         "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
         "SampleAfterValue": "100003",
         "UMask": "0x1"
     },
     {
-        "BriefDescription": "Counts all prefetch (that bring data to LLC only) data reads",
+        "BriefDescription": "Counts prefetch (that bring data to L2) data reads  OCR.PF_L2_DATA_RD.SUPPLIER_NONE.HIT_OTHER_CORE_FWD",
         "Counter": "0,1,2,3",
         "CounterHTOff": "0,1,2,3",
         "EventCode": "0xB7, 0xBB",
-        "EventName": "OCR.PF_L3_DATA_RD.L3_HIT_E.SNOOP_NONE",
+        "EventName": "OCR.PF_L2_DATA_RD.SUPPLIER_NONE.HIT_OTHER_CORE_FWD",
         "MSRIndex": "0x1a6,0x1a7",
-        "MSRValue": "0x0080080080",
+        "MSRValue": "0x0800020010",
         "Offcore": "1",
         "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
         "SampleAfterValue": "100003",
         "UMask": "0x1"
     },
     {
-        "BriefDescription": "Counts all demand code reads",
+        "BriefDescription": "Counts prefetch (that bring data to L2) data reads  OCR.PF_L2_DATA_RD.SUPPLIER_NONE.HIT_OTHER_CORE_NO_FWD",
         "Counter": "0,1,2,3",
         "CounterHTOff": "0,1,2,3",
         "EventCode": "0xB7, 0xBB",
-        "EventName": "OCR.DEMAND_CODE_RD.L3_HIT_F.SNOOP_NONE",
+        "EventName": "OCR.PF_L2_DATA_RD.SUPPLIER_NONE.HIT_OTHER_CORE_NO_FWD",
         "MSRIndex": "0x1a6,0x1a7",
-        "MSRValue": "0x0080200004",
+        "MSRValue": "0x0400020010",
         "Offcore": "1",
         "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
         "SampleAfterValue": "100003",
         "UMask": "0x1"
     },
     {
-        "BriefDescription": "Counts any other requests OCR.OTHER.L3_HIT.NO_SNOOP_NEEDED OCR.OTHER.L3_HIT.NO_SNOOP_NEEDED",
+        "BriefDescription": "Counts prefetch (that bring data to L2) data reads  OCR.PF_L2_DATA_RD.SUPPLIER_NONE.NO_SNOOP_NEEDED",
         "Counter": "0,1,2,3",
         "CounterHTOff": "0,1,2,3",
         "EventCode": "0xB7, 0xBB",
-        "EventName": "OCR.OTHER.L3_HIT.NO_SNOOP_NEEDED",
+        "EventName": "OCR.PF_L2_DATA_RD.SUPPLIER_NONE.NO_SNOOP_NEEDED",
         "MSRIndex": "0x1a6,0x1a7",
-        "MSRValue": "0x01003C8000",
+        "MSRValue": "0x0100020010",
         "Offcore": "1",
         "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
         "SampleAfterValue": "100003",
         "UMask": "0x1"
     },
     {
-        "BriefDescription": "OCR.ALL_PF_RFO.L3_HIT_F.HITM_OTHER_CORE  OCR.ALL_PF_RFO.L3_HIT_F.HITM_OTHER_CORE",
+        "BriefDescription": "Counts prefetch (that bring data to L2) data reads",
         "Counter": "0,1,2,3",
         "CounterHTOff": "0,1,2,3",
         "EventCode": "0xB7, 0xBB",
-        "EventName": "OCR.ALL_PF_RFO.L3_HIT_F.HITM_OTHER_CORE",
+        "EventName": "OCR.PF_L2_DATA_RD.SUPPLIER_NONE.SNOOP_MISS",
         "MSRIndex": "0x1a6,0x1a7",
-        "MSRValue": "0x1000200120",
+        "MSRValue": "0x0200020010",
         "Offcore": "1",
         "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
         "SampleAfterValue": "100003",
         "UMask": "0x1"
     },
     {
-        "BriefDescription": "Counts all prefetch (that bring data to LLC only) RFOs  OCR.PF_L3_RFO.L3_HIT_S.NO_SNOOP_NEEDED",
+        "BriefDescription": "Counts prefetch (that bring data to L2) data reads",
         "Counter": "0,1,2,3",
         "CounterHTOff": "0,1,2,3",
         "EventCode": "0xB7, 0xBB",
-        "EventName": "OCR.PF_L3_RFO.L3_HIT_S.NO_SNOOP_NEEDED",
+        "EventName": "OCR.PF_L2_DATA_RD.SUPPLIER_NONE.SNOOP_NONE",
         "MSRIndex": "0x1a6,0x1a7",
-        "MSRValue": "0x0100100100",
+        "MSRValue": "0x0080020010",
         "Offcore": "1",
         "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
         "SampleAfterValue": "100003",
         "UMask": "0x1"
     },
     {
-        "BriefDescription": "Counts demand data reads",
+        "BriefDescription": "Counts all prefetch (that bring data to L2) RFOs have any response type.",
         "Counter": "0,1,2,3",
         "CounterHTOff": "0,1,2,3",
         "EventCode": "0xB7, 0xBB",
-        "EventName": "OCR.DEMAND_DATA_RD.L3_HIT.SNOOP_HIT_WITH_FWD",
+        "EventName": "OCR.PF_L2_RFO.ANY_RESPONSE",
         "MSRIndex": "0x1a6,0x1a7",
-        "MSRValue": "0x08007C0001",
+        "MSRValue": "0x0000010020",
         "Offcore": "1",
         "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
         "SampleAfterValue": "100003",
         "UMask": "0x1"
     },
     {
-        "BriefDescription": "OCR.ALL_RFO.L3_HIT.ANY_SNOOP OCR.ALL_RFO.L3_HIT.ANY_SNOOP OCR.ALL_RFO.L3_HIT.ANY_SNOOP",
+        "BriefDescription": "Counts all prefetch (that bring data to L2) RFOs OCR.PF_L2_RFO.L3_HIT.ANY_SNOOP OCR.PF_L2_RFO.L3_HIT.ANY_SNOOP",
         "Counter": "0,1,2,3",
         "CounterHTOff": "0,1,2,3",
         "EventCode": "0xB7, 0xBB",
-        "EventName": "OCR.ALL_RFO.L3_HIT.ANY_SNOOP",
+        "EventName": "OCR.PF_L2_RFO.L3_HIT.ANY_SNOOP",
         "MSRIndex": "0x1a6,0x1a7",
-        "MSRValue": "0x3F803C0122",
+        "MSRValue": "0x3F803C0020",
         "Offcore": "1",
         "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
         "SampleAfterValue": "100003",
         "UMask": "0x1"
     },
     {
-        "BriefDescription": "Counts all prefetch (that bring data to LLC only) RFOs OCR.PF_L3_RFO.L3_HIT.HIT_OTHER_CORE_NO_FWD OCR.PF_L3_RFO.L3_HIT.HIT_OTHER_CORE_NO_FWD",
+        "BriefDescription": "Counts all prefetch (that bring data to L2) RFOs OCR.PF_L2_RFO.L3_HIT.HITM_OTHER_CORE OCR.PF_L2_RFO.L3_HIT.HITM_OTHER_CORE",
         "Counter": "0,1,2,3",
         "CounterHTOff": "0,1,2,3",
         "EventCode": "0xB7, 0xBB",
-        "EventName": "OCR.PF_L3_RFO.L3_HIT.HIT_OTHER_CORE_NO_FWD",
+        "EventName": "OCR.PF_L2_RFO.L3_HIT.HITM_OTHER_CORE",
         "MSRIndex": "0x1a6,0x1a7",
-        "MSRValue": "0x04003C0100",
+        "MSRValue": "0x10003C0020",
         "Offcore": "1",
         "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
         "SampleAfterValue": "100003",
         "UMask": "0x1"
     },
     {
-        "BriefDescription": "Counts prefetch (that bring data to L2) data reads  OCR.PF_L2_DATA_RD.L3_HIT_E.NO_SNOOP_NEEDED",
+        "BriefDescription": "Counts all prefetch (that bring data to L2) RFOs OCR.PF_L2_RFO.L3_HIT.HIT_OTHER_CORE_FWD OCR.PF_L2_RFO.L3_HIT.HIT_OTHER_CORE_FWD",
         "Counter": "0,1,2,3",
         "CounterHTOff": "0,1,2,3",
         "EventCode": "0xB7, 0xBB",
-        "EventName": "OCR.PF_L2_DATA_RD.L3_HIT_E.NO_SNOOP_NEEDED",
+        "EventName": "OCR.PF_L2_RFO.L3_HIT.HIT_OTHER_CORE_FWD",
         "MSRIndex": "0x1a6,0x1a7",
-        "MSRValue": "0x0100080010",
+        "MSRValue": "0x08003C0020",
         "Offcore": "1",
         "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
         "SampleAfterValue": "100003",
         "UMask": "0x1"
     },
     {
-        "BriefDescription": "Counts any other requests",
+        "BriefDescription": "Counts all prefetch (that bring data to L2) RFOs OCR.PF_L2_RFO.L3_HIT.HIT_OTHER_CORE_NO_FWD OCR.PF_L2_RFO.L3_HIT.HIT_OTHER_CORE_NO_FWD",
         "Counter": "0,1,2,3",
         "CounterHTOff": "0,1,2,3",
         "EventCode": "0xB7, 0xBB",
-        "EventName": "OCR.OTHER.L3_HIT_E.SNOOP_MISS",
+        "EventName": "OCR.PF_L2_RFO.L3_HIT.HIT_OTHER_CORE_NO_FWD",
         "MSRIndex": "0x1a6,0x1a7",
-        "MSRValue": "0x0200088000",
+        "MSRValue": "0x04003C0020",
         "Offcore": "1",
         "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
         "SampleAfterValue": "100003",
         "UMask": "0x1"
     },
     {
-        "BriefDescription": "Counts L1 data cache hardware prefetch requests and software prefetch requests  OCR.PF_L1D_AND_SW.L3_HIT_E.ANY_SNOOP",
+        "BriefDescription": "Counts all prefetch (that bring data to L2) RFOs OCR.PF_L2_RFO.L3_HIT.NO_SNOOP_NEEDED OCR.PF_L2_RFO.L3_HIT.NO_SNOOP_NEEDED",
         "Counter": "0,1,2,3",
         "CounterHTOff": "0,1,2,3",
         "EventCode": "0xB7, 0xBB",
-        "EventName": "OCR.PF_L1D_AND_SW.L3_HIT_E.ANY_SNOOP",
+        "EventName": "OCR.PF_L2_RFO.L3_HIT.NO_SNOOP_NEEDED",
         "MSRIndex": "0x1a6,0x1a7",
-        "MSRValue": "0x3F80080400",
+        "MSRValue": "0x01003C0020",
         "Offcore": "1",
         "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
         "SampleAfterValue": "100003",
         "UMask": "0x1"
     },
     {
-        "BriefDescription": "OCR.ALL_DATA_RD.SUPPLIER_NONE.ANY_SNOOP  OCR.ALL_DATA_RD.SUPPLIER_NONE.ANY_SNOOP",
+        "BriefDescription": "Counts all prefetch (that bring data to L2) RFOs",
         "Counter": "0,1,2,3",
         "CounterHTOff": "0,1,2,3",
         "EventCode": "0xB7, 0xBB",
-        "EventName": "OCR.ALL_DATA_RD.SUPPLIER_NONE.ANY_SNOOP",
+        "EventName": "OCR.PF_L2_RFO.L3_HIT.SNOOP_HIT_WITH_FWD",
         "MSRIndex": "0x1a6,0x1a7",
-        "MSRValue": "0x3F80020491",
+        "MSRValue": "0x08007C0020",
         "Offcore": "1",
         "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
         "SampleAfterValue": "100003",
         "UMask": "0x1"
     },
     {
-        "BriefDescription": "OCR.ALL_RFO.L3_HIT_M.SNOOP_NONE",
+        "BriefDescription": "Counts all prefetch (that bring data to L2) RFOs OCR.PF_L2_RFO.L3_HIT.SNOOP_MISS",
         "Counter": "0,1,2,3",
         "CounterHTOff": "0,1,2,3",
         "EventCode": "0xB7, 0xBB",
-        "EventName": "OCR.ALL_RFO.L3_HIT_M.SNOOP_NONE",
+        "EventName": "OCR.PF_L2_RFO.L3_HIT.SNOOP_MISS",
         "MSRIndex": "0x1a6,0x1a7",
-        "MSRValue": "0x0080040122",
+        "MSRValue": "0x02003C0020",
         "Offcore": "1",
         "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
         "SampleAfterValue": "100003",
         "UMask": "0x1"
     },
     {
-        "BriefDescription": "Counts all prefetch (that bring data to L2) RFOs  OCR.PF_L2_RFO.SUPPLIER_NONE.NO_SNOOP_NEEDED",
+        "BriefDescription": "Counts all prefetch (that bring data to L2) RFOs OCR.PF_L2_RFO.L3_HIT.SNOOP_NONE",
         "Counter": "0,1,2,3",
         "CounterHTOff": "0,1,2,3",
         "EventCode": "0xB7, 0xBB",
-        "EventName": "OCR.PF_L2_RFO.SUPPLIER_NONE.NO_SNOOP_NEEDED",
+        "EventName": "OCR.PF_L2_RFO.L3_HIT.SNOOP_NONE",
         "MSRIndex": "0x1a6,0x1a7",
-        "MSRValue": "0x0100020020",
+        "MSRValue": "0x00803C0020",
         "Offcore": "1",
         "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
         "SampleAfterValue": "100003",
         "UMask": "0x1"
     },
     {
-        "BriefDescription": "OCR.ALL_PF_DATA_RD.PMM_HIT_LOCAL_PMM.SNOOP_NOT_NEEDED OCR.ALL_PF_DATA_RD.PMM_HIT_LOCAL_PMM.SNOOP_NOT_NEEDED",
+        "BriefDescription": "Counts all prefetch (that bring data to L2) RFOs  OCR.PF_L2_RFO.L3_HIT_E.ANY_SNOOP",
         "Counter": "0,1,2,3",
         "CounterHTOff": "0,1,2,3",
         "EventCode": "0xB7, 0xBB",
-        "EventName": "OCR.ALL_PF_DATA_RD.PMM_HIT_LOCAL_PMM.SNOOP_NOT_NEEDED",
+        "EventName": "OCR.PF_L2_RFO.L3_HIT_E.ANY_SNOOP",
         "MSRIndex": "0x1a6,0x1a7",
-        "MSRValue": "0x0100400490",
+        "MSRValue": "0x3F80080020",
         "Offcore": "1",
         "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
         "SampleAfterValue": "100003",
         "UMask": "0x1"
     },
     {
-        "BriefDescription": "Counts all demand data writes (RFOs)",
+        "BriefDescription": "Counts all prefetch (that bring data to L2) RFOs  OCR.PF_L2_RFO.L3_HIT_E.HITM_OTHER_CORE",
         "Counter": "0,1,2,3",
         "CounterHTOff": "0,1,2,3",
         "EventCode": "0xB7, 0xBB",
-        "EventName": "OCR.DEMAND_RFO.SUPPLIER_NONE.SNOOP_NONE",
+        "EventName": "OCR.PF_L2_RFO.L3_HIT_E.HITM_OTHER_CORE",
         "MSRIndex": "0x1a6,0x1a7",
-        "MSRValue": "0x0080020002",
+        "MSRValue": "0x1000080020",
         "Offcore": "1",
         "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
         "SampleAfterValue": "100003",
         "UMask": "0x1"
     },
     {
-        "BriefDescription": "Counts all demand data writes (RFOs)",
+        "BriefDescription": "Counts all prefetch (that bring data to L2) RFOs  OCR.PF_L2_RFO.L3_HIT_E.HIT_OTHER_CORE_FWD",
         "Counter": "0,1,2,3",
         "CounterHTOff": "0,1,2,3",
         "EventCode": "0xB7, 0xBB",
-        "EventName": "OCR.DEMAND_RFO.L3_HIT_S.SNOOP_MISS",
+        "EventName": "OCR.PF_L2_RFO.L3_HIT_E.HIT_OTHER_CORE_FWD",
         "MSRIndex": "0x1a6,0x1a7",
-        "MSRValue": "0x0200100002",
+        "MSRValue": "0x0800080020",
         "Offcore": "1",
         "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
         "SampleAfterValue": "100003",
         "UMask": "0x1"
     },
     {
-        "BriefDescription": "OCR.ALL_RFO.SUPPLIER_NONE.ANY_SNOOP  OCR.ALL_RFO.SUPPLIER_NONE.ANY_SNOOP",
+        "BriefDescription": "Counts all prefetch (that bring data to L2) RFOs  OCR.PF_L2_RFO.L3_HIT_E.HIT_OTHER_CORE_NO_FWD",
         "Counter": "0,1,2,3",
         "CounterHTOff": "0,1,2,3",
         "EventCode": "0xB7, 0xBB",
-        "EventName": "OCR.ALL_RFO.SUPPLIER_NONE.ANY_SNOOP",
+        "EventName": "OCR.PF_L2_RFO.L3_HIT_E.HIT_OTHER_CORE_NO_FWD",
         "MSRIndex": "0x1a6,0x1a7",
-        "MSRValue": "0x3F80020122",
+        "MSRValue": "0x0400080020",
         "Offcore": "1",
         "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
         "SampleAfterValue": "100003",
         "UMask": "0x1"
     },
     {
-        "BriefDescription": "Counts all prefetch (that bring data to LLC only) data reads  OCR.PF_L3_DATA_RD.SUPPLIER_NONE.HIT_OTHER_CORE_FWD",
+        "BriefDescription": "Counts all prefetch (that bring data to L2) RFOs  OCR.PF_L2_RFO.L3_HIT_E.NO_SNOOP_NEEDED",
         "Counter": "0,1,2,3",
         "CounterHTOff": "0,1,2,3",
         "EventCode": "0xB7, 0xBB",
-        "EventName": "OCR.PF_L3_DATA_RD.SUPPLIER_NONE.HIT_OTHER_CORE_FWD",
+        "EventName": "OCR.PF_L2_RFO.L3_HIT_E.NO_SNOOP_NEEDED",
         "MSRIndex": "0x1a6,0x1a7",
-        "MSRValue": "0x0800020080",
+        "MSRValue": "0x0100080020",
         "Offcore": "1",
         "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
         "SampleAfterValue": "100003",
         "UMask": "0x1"
     },
     {
-        "BriefDescription": "Counts demand data reads  OCR.DEMAND_DATA_RD.SUPPLIER_NONE.HIT_OTHER_CORE_NO_FWD",
+        "BriefDescription": "Counts all prefetch (that bring data to L2) RFOs",
         "Counter": "0,1,2,3",
         "CounterHTOff": "0,1,2,3",
         "EventCode": "0xB7, 0xBB",
-        "EventName": "OCR.DEMAND_DATA_RD.SUPPLIER_NONE.HIT_OTHER_CORE_NO_FWD",
+        "EventName": "OCR.PF_L2_RFO.L3_HIT_E.SNOOP_MISS",
         "MSRIndex": "0x1a6,0x1a7",
-        "MSRValue": "0x0400020001",
+        "MSRValue": "0x0200080020",
         "Offcore": "1",
         "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
         "SampleAfterValue": "100003",
         "UMask": "0x1"
     },
     {
-        "BriefDescription": "OCR.ALL_PF_DATA_RD.L3_HIT_M.HITM_OTHER_CORE  OCR.ALL_PF_DATA_RD.L3_HIT_M.HITM_OTHER_CORE",
+        "BriefDescription": "Counts all prefetch (that bring data to L2) RFOs",
         "Counter": "0,1,2,3",
         "CounterHTOff": "0,1,2,3",
         "EventCode": "0xB7, 0xBB",
-        "EventName": "OCR.ALL_PF_DATA_RD.L3_HIT_M.HITM_OTHER_CORE",
+        "EventName": "OCR.PF_L2_RFO.L3_HIT_E.SNOOP_NONE",
         "MSRIndex": "0x1a6,0x1a7",
-        "MSRValue": "0x1000040490",
+        "MSRValue": "0x0080080020",
         "Offcore": "1",
         "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
         "SampleAfterValue": "100003",
         "UMask": "0x1"
     },
     {
-        "BriefDescription": "Number of PREFETCHW instructions executed.",
-        "Counter": "0,1,2,3",
-        "CounterHTOff": "0,1,2,3,4,5,6,7",
-        "EventCode": "0x32",
-        "EventName": "SW_PREFETCH_ACCESS.PREFETCHW",
-        "SampleAfterValue": "2000003",
-        "UMask": "0x8"
-    },
-    {
-        "BriefDescription": "OCR.ALL_PF_DATA_RD.L3_HIT_S.SNOOP_MISS",
+        "BriefDescription": "Counts all prefetch (that bring data to L2) RFOs  OCR.PF_L2_RFO.L3_HIT_F.ANY_SNOOP",
         "Counter": "0,1,2,3",
         "CounterHTOff": "0,1,2,3",
         "EventCode": "0xB7, 0xBB",
-        "EventName": "OCR.ALL_PF_DATA_RD.L3_HIT_S.SNOOP_MISS",
+        "EventName": "OCR.PF_L2_RFO.L3_HIT_F.ANY_SNOOP",
         "MSRIndex": "0x1a6,0x1a7",
-        "MSRValue": "0x0200100490",
+        "MSRValue": "0x3F80200020",
         "Offcore": "1",
         "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
         "SampleAfterValue": "100003",
         "UMask": "0x1"
     },
     {
-        "BriefDescription": "Counts all prefetch (that bring data to L2) RFOs  OCR.PF_L2_RFO.L3_HIT_S.HIT_OTHER_CORE_FWD",
+        "BriefDescription": "Counts all prefetch (that bring data to L2) RFOs  OCR.PF_L2_RFO.L3_HIT_F.HITM_OTHER_CORE",
         "Counter": "0,1,2,3",
         "CounterHTOff": "0,1,2,3",
         "EventCode": "0xB7, 0xBB",
-        "EventName": "OCR.PF_L2_RFO.L3_HIT_S.HIT_OTHER_CORE_FWD",
+        "EventName": "OCR.PF_L2_RFO.L3_HIT_F.HITM_OTHER_CORE",
         "MSRIndex": "0x1a6,0x1a7",
-        "MSRValue": "0x0800100020",
+        "MSRValue": "0x1000200020",
         "Offcore": "1",
         "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
         "SampleAfterValue": "100003",
         "UMask": "0x1"
     },
     {
-        "BriefDescription": "OCR.ALL_DATA_RD.L3_HIT_E.HIT_OTHER_CORE_FWD  OCR.ALL_DATA_RD.L3_HIT_E.HIT_OTHER_CORE_FWD",
+        "BriefDescription": "Counts all prefetch (that bring data to L2) RFOs  OCR.PF_L2_RFO.L3_HIT_F.HIT_OTHER_CORE_FWD",
         "Counter": "0,1,2,3",
         "CounterHTOff": "0,1,2,3",
         "EventCode": "0xB7, 0xBB",
-        "EventName": "OCR.ALL_DATA_RD.L3_HIT_E.HIT_OTHER_CORE_FWD",
+        "EventName": "OCR.PF_L2_RFO.L3_HIT_F.HIT_OTHER_CORE_FWD",
         "MSRIndex": "0x1a6,0x1a7",
-        "MSRValue": "0x0800080491",
+        "MSRValue": "0x0800200020",
         "Offcore": "1",
         "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
         "SampleAfterValue": "100003",
         "UMask": "0x1"
     },
     {
-        "BriefDescription": "OCR.ALL_PF_RFO.L3_HIT_M.ANY_SNOOP  OCR.ALL_PF_RFO.L3_HIT_M.ANY_SNOOP",
+        "BriefDescription": "Counts all prefetch (that bring data to L2) RFOs  OCR.PF_L2_RFO.L3_HIT_F.HIT_OTHER_CORE_NO_FWD",
         "Counter": "0,1,2,3",
         "CounterHTOff": "0,1,2,3",
         "EventCode": "0xB7, 0xBB",
-        "EventName": "OCR.ALL_PF_RFO.L3_HIT_M.ANY_SNOOP",
+        "EventName": "OCR.PF_L2_RFO.L3_HIT_F.HIT_OTHER_CORE_NO_FWD",
         "MSRIndex": "0x1a6,0x1a7",
-        "MSRValue": "0x3F80040120",
+        "MSRValue": "0x0400200020",
         "Offcore": "1",
         "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
         "SampleAfterValue": "100003",
         "UMask": "0x1"
     },
     {
-        "BriefDescription": "OCR.ALL_PF_RFO.L3_HIT_M.SNOOP_MISS",
+        "BriefDescription": "Counts all prefetch (that bring data to L2) RFOs  OCR.PF_L2_RFO.L3_HIT_F.NO_SNOOP_NEEDED",
         "Counter": "0,1,2,3",
         "CounterHTOff": "0,1,2,3",
         "EventCode": "0xB7, 0xBB",
-        "EventName": "OCR.ALL_PF_RFO.L3_HIT_M.SNOOP_MISS",
+        "EventName": "OCR.PF_L2_RFO.L3_HIT_F.NO_SNOOP_NEEDED",
         "MSRIndex": "0x1a6,0x1a7",
-        "MSRValue": "0x0200040120",
+        "MSRValue": "0x0100200020",
         "Offcore": "1",
         "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
         "SampleAfterValue": "100003",
         "UMask": "0x1"
     },
     {
-        "BriefDescription": "Counts prefetch (that bring data to L2) data reads  OCR.PF_L2_DATA_RD.SUPPLIER_NONE.HITM_OTHER_CORE",
+        "BriefDescription": "Counts all prefetch (that bring data to L2) RFOs",
         "Counter": "0,1,2,3",
         "CounterHTOff": "0,1,2,3",
         "EventCode": "0xB7, 0xBB",
-        "EventName": "OCR.PF_L2_DATA_RD.SUPPLIER_NONE.HITM_OTHER_CORE",
+        "EventName": "OCR.PF_L2_RFO.L3_HIT_F.SNOOP_MISS",
         "MSRIndex": "0x1a6,0x1a7",
-        "MSRValue": "0x1000020010",
+        "MSRValue": "0x0200200020",
         "Offcore": "1",
         "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
         "SampleAfterValue": "100003",
         "UMask": "0x1"
     },
     {
-        "BriefDescription": "OCR.ALL_PF_DATA_RD.SUPPLIER_NONE.HIT_OTHER_CORE_NO_FWD  OCR.ALL_PF_DATA_RD.SUPPLIER_NONE.HIT_OTHER_CORE_NO_FWD",
+        "BriefDescription": "Counts all prefetch (that bring data to L2) RFOs",
         "Counter": "0,1,2,3",
         "CounterHTOff": "0,1,2,3",
         "EventCode": "0xB7, 0xBB",
-        "EventName": "OCR.ALL_PF_DATA_RD.SUPPLIER_NONE.HIT_OTHER_CORE_NO_FWD",
+        "EventName": "OCR.PF_L2_RFO.L3_HIT_F.SNOOP_NONE",
         "MSRIndex": "0x1a6,0x1a7",
-        "MSRValue": "0x0400020490",
+        "MSRValue": "0x0080200020",
         "Offcore": "1",
         "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
         "SampleAfterValue": "100003",
         "UMask": "0x1"
     },
     {
-        "BriefDescription": "Counts all demand code reads OCR.DEMAND_CODE_RD.L3_HIT.ANY_SNOOP OCR.DEMAND_CODE_RD.L3_HIT.ANY_SNOOP",
+        "BriefDescription": "Counts all prefetch (that bring data to L2) RFOs  OCR.PF_L2_RFO.L3_HIT_M.ANY_SNOOP",
         "Counter": "0,1,2,3",
         "CounterHTOff": "0,1,2,3",
         "EventCode": "0xB7, 0xBB",
-        "EventName": "OCR.DEMAND_CODE_RD.L3_HIT.ANY_SNOOP",
+        "EventName": "OCR.PF_L2_RFO.L3_HIT_M.ANY_SNOOP",
         "MSRIndex": "0x1a6,0x1a7",
-        "MSRValue": "0x3F803C0004",
+        "MSRValue": "0x3F80040020",
         "Offcore": "1",
         "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
         "SampleAfterValue": "100003",
         "UMask": "0x1"
     },
     {
-        "BriefDescription": "OCR.ALL_PF_DATA_RD.L3_HIT_F.HIT_OTHER_CORE_FWD  OCR.ALL_PF_DATA_RD.L3_HIT_F.HIT_OTHER_CORE_FWD",
+        "BriefDescription": "Counts all prefetch (that bring data to L2) RFOs  OCR.PF_L2_RFO.L3_HIT_M.HITM_OTHER_CORE",
         "Counter": "0,1,2,3",
         "CounterHTOff": "0,1,2,3",
         "EventCode": "0xB7, 0xBB",
-        "EventName": "OCR.ALL_PF_DATA_RD.L3_HIT_F.HIT_OTHER_CORE_FWD",
+        "EventName": "OCR.PF_L2_RFO.L3_HIT_M.HITM_OTHER_CORE",
         "MSRIndex": "0x1a6,0x1a7",
-        "MSRValue": "0x0800200490",
+        "MSRValue": "0x1000040020",
         "Offcore": "1",
         "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
         "SampleAfterValue": "100003",
         "UMask": "0x1"
     },
     {
-        "BriefDescription": "OCR.ALL_RFO.L3_HIT_E.SNOOP_MISS",
+        "BriefDescription": "Counts all prefetch (that bring data to L2) RFOs  OCR.PF_L2_RFO.L3_HIT_M.HIT_OTHER_CORE_FWD",
         "Counter": "0,1,2,3",
         "CounterHTOff": "0,1,2,3",
         "EventCode": "0xB7, 0xBB",
-        "EventName": "OCR.ALL_RFO.L3_HIT_E.SNOOP_MISS",
+        "EventName": "OCR.PF_L2_RFO.L3_HIT_M.HIT_OTHER_CORE_FWD",
         "MSRIndex": "0x1a6,0x1a7",
-        "MSRValue": "0x0200080122",
+        "MSRValue": "0x0800040020",
         "Offcore": "1",
         "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
         "SampleAfterValue": "100003",
         "UMask": "0x1"
     },
     {
-        "BriefDescription": "Counts demand data reads  OCR.DEMAND_DATA_RD.L3_HIT_M.HIT_OTHER_CORE_NO_FWD",
+        "BriefDescription": "Counts all prefetch (that bring data to L2) RFOs  OCR.PF_L2_RFO.L3_HIT_M.HIT_OTHER_CORE_NO_FWD",
         "Counter": "0,1,2,3",
         "CounterHTOff": "0,1,2,3",
         "EventCode": "0xB7, 0xBB",
-        "EventName": "OCR.DEMAND_DATA_RD.L3_HIT_M.HIT_OTHER_CORE_NO_FWD",
+        "EventName": "OCR.PF_L2_RFO.L3_HIT_M.HIT_OTHER_CORE_NO_FWD",
         "MSRIndex": "0x1a6,0x1a7",
-        "MSRValue": "0x0400040001",
+        "MSRValue": "0x0400040020",
         "Offcore": "1",
         "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
         "SampleAfterValue": "100003",
         "UMask": "0x1"
     },
     {
-        "BriefDescription": "Counts prefetch (that bring data to L2) data reads OCR.PF_L2_DATA_RD.L3_HIT.HIT_OTHER_CORE_FWD OCR.PF_L2_DATA_RD.L3_HIT.HIT_OTHER_CORE_FWD",
+        "BriefDescription": "Counts all prefetch (that bring data to L2) RFOs  OCR.PF_L2_RFO.L3_HIT_M.NO_SNOOP_NEEDED",
         "Counter": "0,1,2,3",
         "CounterHTOff": "0,1,2,3",
         "EventCode": "0xB7, 0xBB",
-        "EventName": "OCR.PF_L2_DATA_RD.L3_HIT.HIT_OTHER_CORE_FWD",
+        "EventName": "OCR.PF_L2_RFO.L3_HIT_M.NO_SNOOP_NEEDED",
         "MSRIndex": "0x1a6,0x1a7",
-        "MSRValue": "0x08003C0010",
+        "MSRValue": "0x0100040020",
         "Offcore": "1",
         "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
         "SampleAfterValue": "100003",
         "UMask": "0x1"
     },
     {
-        "BriefDescription": "Counts all prefetch (that bring data to LLC only) RFOs  OCR.PF_L3_RFO.L3_HIT_S.HIT_OTHER_CORE_FWD",
+        "BriefDescription": "Counts all prefetch (that bring data to L2) RFOs",
         "Counter": "0,1,2,3",
         "CounterHTOff": "0,1,2,3",
         "EventCode": "0xB7, 0xBB",
-        "EventName": "OCR.PF_L3_RFO.L3_HIT_S.HIT_OTHER_CORE_FWD",
+        "EventName": "OCR.PF_L2_RFO.L3_HIT_M.SNOOP_MISS",
         "MSRIndex": "0x1a6,0x1a7",
-        "MSRValue": "0x0800100100",
+        "MSRValue": "0x0200040020",
         "Offcore": "1",
         "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
         "SampleAfterValue": "100003",
         "UMask": "0x1"
     },
     {
-        "BriefDescription": "Counts all prefetch (that bring data to LLC only) data reads OCR.PF_L3_DATA_RD.L3_HIT.NO_SNOOP_NEEDED OCR.PF_L3_DATA_RD.L3_HIT.NO_SNOOP_NEEDED",
+        "BriefDescription": "Counts all prefetch (that bring data to L2) RFOs",
         "Counter": "0,1,2,3",
         "CounterHTOff": "0,1,2,3",
         "EventCode": "0xB7, 0xBB",
-        "EventName": "OCR.PF_L3_DATA_RD.L3_HIT.NO_SNOOP_NEEDED",
+        "EventName": "OCR.PF_L2_RFO.L3_HIT_M.SNOOP_NONE",
         "MSRIndex": "0x1a6,0x1a7",
-        "MSRValue": "0x01003C0080",
+        "MSRValue": "0x0080040020",
         "Offcore": "1",
         "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
         "SampleAfterValue": "100003",
         "UMask": "0x1"
     },
     {
-        "BriefDescription": "Counts all prefetch (that bring data to LLC only) data reads  OCR.PF_L3_DATA_RD.L3_HIT_M.HITM_OTHER_CORE",
+        "BriefDescription": "Counts all prefetch (that bring data to L2) RFOs  OCR.PF_L2_RFO.L3_HIT_S.ANY_SNOOP",
         "Counter": "0,1,2,3",
         "CounterHTOff": "0,1,2,3",
         "EventCode": "0xB7, 0xBB",
-        "EventName": "OCR.PF_L3_DATA_RD.L3_HIT_M.HITM_OTHER_CORE",
+        "EventName": "OCR.PF_L2_RFO.L3_HIT_S.ANY_SNOOP",
         "MSRIndex": "0x1a6,0x1a7",
-        "MSRValue": "0x1000040080",
+        "MSRValue": "0x3F80100020",
         "Offcore": "1",
         "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
         "SampleAfterValue": "100003",
         "UMask": "0x1"
     },
     {
-        "BriefDescription": "Counts prefetch (that bring data to L2) data reads  OCR.PF_L2_DATA_RD.L3_HIT_E.HITM_OTHER_CORE",
+        "BriefDescription": "Counts all prefetch (that bring data to L2) RFOs  OCR.PF_L2_RFO.L3_HIT_S.HITM_OTHER_CORE",
         "Counter": "0,1,2,3",
         "CounterHTOff": "0,1,2,3",
         "EventCode": "0xB7, 0xBB",
-        "EventName": "OCR.PF_L2_DATA_RD.L3_HIT_E.HITM_OTHER_CORE",
+        "EventName": "OCR.PF_L2_RFO.L3_HIT_S.HITM_OTHER_CORE",
         "MSRIndex": "0x1a6,0x1a7",
-        "MSRValue": "0x1000080010",
+        "MSRValue": "0x1000100020",
         "Offcore": "1",
         "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
         "SampleAfterValue": "100003",
         "UMask": "0x1"
     },
     {
-        "BriefDescription": "OCR.ALL_PF_RFO.SUPPLIER_NONE.NO_SNOOP_NEEDED  OCR.ALL_PF_RFO.SUPPLIER_NONE.NO_SNOOP_NEEDED",
+        "BriefDescription": "Counts all prefetch (that bring data to L2) RFOs  OCR.PF_L2_RFO.L3_HIT_S.HIT_OTHER_CORE_FWD",
         "Counter": "0,1,2,3",
         "CounterHTOff": "0,1,2,3",
         "EventCode": "0xB7, 0xBB",
-        "EventName": "OCR.ALL_PF_RFO.SUPPLIER_NONE.NO_SNOOP_NEEDED",
+        "EventName": "OCR.PF_L2_RFO.L3_HIT_S.HIT_OTHER_CORE_FWD",
         "MSRIndex": "0x1a6,0x1a7",
-        "MSRValue": "0x0100020120",
+        "MSRValue": "0x0800100020",
         "Offcore": "1",
         "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
         "SampleAfterValue": "100003",
         "UMask": "0x1"
     },
     {
-        "BriefDescription": "OCR.ALL_RFO.L3_HIT_S.HIT_OTHER_CORE_FWD  OCR.ALL_RFO.L3_HIT_S.HIT_OTHER_CORE_FWD",
+        "BriefDescription": "Counts all prefetch (that bring data to L2) RFOs  OCR.PF_L2_RFO.L3_HIT_S.HIT_OTHER_CORE_NO_FWD",
         "Counter": "0,1,2,3",
         "CounterHTOff": "0,1,2,3",
         "EventCode": "0xB7, 0xBB",
-        "EventName": "OCR.ALL_RFO.L3_HIT_S.HIT_OTHER_CORE_FWD",
+        "EventName": "OCR.PF_L2_RFO.L3_HIT_S.HIT_OTHER_CORE_NO_FWD",
         "MSRIndex": "0x1a6,0x1a7",
-        "MSRValue": "0x0800100122",
+        "MSRValue": "0x0400100020",
         "Offcore": "1",
         "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
         "SampleAfterValue": "100003",
         "UMask": "0x1"
     },
     {
-        "BriefDescription": "OCR.ALL_PF_RFO.SUPPLIER_NONE.HIT_OTHER_CORE_NO_FWD  OCR.ALL_PF_RFO.SUPPLIER_NONE.HIT_OTHER_CORE_NO_FWD",
+        "BriefDescription": "Counts all prefetch (that bring data to L2) RFOs  OCR.PF_L2_RFO.L3_HIT_S.NO_SNOOP_NEEDED",
         "Counter": "0,1,2,3",
         "CounterHTOff": "0,1,2,3",
         "EventCode": "0xB7, 0xBB",
-        "EventName": "OCR.ALL_PF_RFO.SUPPLIER_NONE.HIT_OTHER_CORE_NO_FWD",
+        "EventName": "OCR.PF_L2_RFO.L3_HIT_S.NO_SNOOP_NEEDED",
         "MSRIndex": "0x1a6,0x1a7",
-        "MSRValue": "0x0400020120",
+        "MSRValue": "0x0100100020",
         "Offcore": "1",
         "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
         "SampleAfterValue": "100003",
         "UMask": "0x1"
     },
     {
-        "BriefDescription": "Core cycles where the core was running in a manner where Turbo may be clipped to the AVX2 turbo schedule.",
-        "Counter": "0,1,2,3",
-        "CounterHTOff": "0,1,2,3,4,5,6,7",
-        "EventCode": "0x28",
-        "EventName": "CORE_POWER.LVL1_TURBO_LICENSE",
-        "PublicDescription": "Core cycles where the core was running with power-delivery for license level 1.  This includes high current AVX 256-bit instructions as well as low current AVX 512-bit instructions.",
-        "SampleAfterValue": "200003",
-        "UMask": "0x18"
-    },
-    {
-        "BriefDescription": "Counts all prefetch (that bring data to L2) RFOs  OCR.PF_L2_RFO.L3_HIT_M.HIT_OTHER_CORE_FWD",
+        "BriefDescription": "Counts all prefetch (that bring data to L2) RFOs",
         "Counter": "0,1,2,3",
         "CounterHTOff": "0,1,2,3",
         "EventCode": "0xB7, 0xBB",
-        "EventName": "OCR.PF_L2_RFO.L3_HIT_M.HIT_OTHER_CORE_FWD",
+        "EventName": "OCR.PF_L2_RFO.L3_HIT_S.SNOOP_MISS",
         "MSRIndex": "0x1a6,0x1a7",
-        "MSRValue": "0x0800040020",
+        "MSRValue": "0x0200100020",
         "Offcore": "1",
         "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
         "SampleAfterValue": "100003",
         "UMask": "0x1"
     },
     {
-        "BriefDescription": "Counts all demand code reads  OCR.DEMAND_CODE_RD.L3_HIT_F.HIT_OTHER_CORE_NO_FWD",
+        "BriefDescription": "Counts all prefetch (that bring data to L2) RFOs",
         "Counter": "0,1,2,3",
         "CounterHTOff": "0,1,2,3",
         "EventCode": "0xB7, 0xBB",
-        "EventName": "OCR.DEMAND_CODE_RD.L3_HIT_F.HIT_OTHER_CORE_NO_FWD",
+        "EventName": "OCR.PF_L2_RFO.L3_HIT_S.SNOOP_NONE",
         "MSRIndex": "0x1a6,0x1a7",
-        "MSRValue": "0x0400200004",
+        "MSRValue": "0x0080100020",
         "Offcore": "1",
         "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
         "SampleAfterValue": "100003",
         "UMask": "0x1"
     },
     {
-        "BriefDescription": "Counts all prefetch (that bring data to LLC only) data reads",
+        "BriefDescription": "Counts all prefetch (that bring data to L2) RFOs OCR.PF_L2_RFO.PMM_HIT_LOCAL_PMM.ANY_SNOOP",
         "Counter": "0,1,2,3",
         "CounterHTOff": "0,1,2,3",
         "EventCode": "0xB7, 0xBB",
-        "EventName": "OCR.PF_L3_DATA_RD.SUPPLIER_NONE.SNOOP_NONE",
+        "EventName": "OCR.PF_L2_RFO.PMM_HIT_LOCAL_PMM.ANY_SNOOP",
         "MSRIndex": "0x1a6,0x1a7",
-        "MSRValue": "0x0080020080",
+        "MSRValue": "0x3F80400020",
         "Offcore": "1",
         "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
         "SampleAfterValue": "100003",
         "UMask": "0x1"
     },
     {
-        "BriefDescription": "Counts all prefetch (that bring data to L2) RFOs  OCR.PF_L2_RFO.L3_HIT_M.HITM_OTHER_CORE",
+        "BriefDescription": "Counts all prefetch (that bring data to L2) RFOs OCR.PF_L2_RFO.PMM_HIT_LOCAL_PMM.SNOOP_NONE",
         "Counter": "0,1,2,3",
         "CounterHTOff": "0,1,2,3",
         "EventCode": "0xB7, 0xBB",
-        "EventName": "OCR.PF_L2_RFO.L3_HIT_M.HITM_OTHER_CORE",
+        "EventName": "OCR.PF_L2_RFO.PMM_HIT_LOCAL_PMM.SNOOP_NONE",
         "MSRIndex": "0x1a6,0x1a7",
-        "MSRValue": "0x1000040020",
+        "MSRValue": "0x0080400020",
         "Offcore": "1",
         "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
         "SampleAfterValue": "100003",
         "UMask": "0x1"
     },
     {
-        "BriefDescription": "Counts demand data reads OCR.DEMAND_DATA_RD.PMM_HIT_LOCAL_PMM.SNOOP_NONE",
+        "BriefDescription": "Counts all prefetch (that bring data to L2) RFOs OCR.PF_L2_RFO.PMM_HIT_LOCAL_PMM.SNOOP_NOT_NEEDED",
         "Counter": "0,1,2,3",
         "CounterHTOff": "0,1,2,3",
         "EventCode": "0xB7, 0xBB",
-        "EventName": "OCR.DEMAND_DATA_RD.PMM_HIT_LOCAL_PMM.SNOOP_NONE",
+        "EventName": "OCR.PF_L2_RFO.PMM_HIT_LOCAL_PMM.SNOOP_NOT_NEEDED",
         "MSRIndex": "0x1a6,0x1a7",
-        "MSRValue": "0x0080400001",
+        "MSRValue": "0x0100400020",
         "Offcore": "1",
         "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
         "SampleAfterValue": "100003",
         "UMask": "0x1"
     },
     {
-        "BriefDescription": "Counts all demand code reads  OCR.DEMAND_CODE_RD.L3_HIT_E.NO_SNOOP_NEEDED",
+        "BriefDescription": "Counts all prefetch (that bring data to L2) RFOs  OCR.PF_L2_RFO.SUPPLIER_NONE.ANY_SNOOP",
         "Counter": "0,1,2,3",
         "CounterHTOff": "0,1,2,3",
         "EventCode": "0xB7, 0xBB",
-        "EventName": "OCR.DEMAND_CODE_RD.L3_HIT_E.NO_SNOOP_NEEDED",
+        "EventName": "OCR.PF_L2_RFO.SUPPLIER_NONE.ANY_SNOOP",
         "MSRIndex": "0x1a6,0x1a7",
-        "MSRValue": "0x0100080004",
+        "MSRValue": "0x3F80020020",
         "Offcore": "1",
         "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
         "SampleAfterValue": "100003",
         "UMask": "0x1"
     },
     {
-        "BriefDescription": "Counts all prefetch (that bring data to LLC only) RFOs  OCR.PF_L3_RFO.L3_HIT_E.ANY_SNOOP",
+        "BriefDescription": "Counts all prefetch (that bring data to L2) RFOs  OCR.PF_L2_RFO.SUPPLIER_NONE.HITM_OTHER_CORE",
         "Counter": "0,1,2,3",
         "CounterHTOff": "0,1,2,3",
         "EventCode": "0xB7, 0xBB",
-        "EventName": "OCR.PF_L3_RFO.L3_HIT_E.ANY_SNOOP",
+        "EventName": "OCR.PF_L2_RFO.SUPPLIER_NONE.HITM_OTHER_CORE",
         "MSRIndex": "0x1a6,0x1a7",
-        "MSRValue": "0x3F80080100",
+        "MSRValue": "0x1000020020",
         "Offcore": "1",
         "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
         "SampleAfterValue": "100003",
         "UMask": "0x1"
     },
     {
-        "BriefDescription": "OCR.ALL_PF_DATA_RD.L3_HIT_S.SNOOP_NONE",
+        "BriefDescription": "Counts all prefetch (that bring data to L2) RFOs  OCR.PF_L2_RFO.SUPPLIER_NONE.HIT_OTHER_CORE_FWD",
         "Counter": "0,1,2,3",
         "CounterHTOff": "0,1,2,3",
         "EventCode": "0xB7, 0xBB",
-        "EventName": "OCR.ALL_PF_DATA_RD.L3_HIT_S.SNOOP_NONE",
+        "EventName": "OCR.PF_L2_RFO.SUPPLIER_NONE.HIT_OTHER_CORE_FWD",
         "MSRIndex": "0x1a6,0x1a7",
-        "MSRValue": "0x0080100490",
+        "MSRValue": "0x0800020020",
         "Offcore": "1",
         "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
         "SampleAfterValue": "100003",
         "UMask": "0x1"
     },
     {
-        "BriefDescription": "Counts all prefetch (that bring data to L2) RFOs  OCR.PF_L2_RFO.L3_HIT_S.HIT_OTHER_CORE_NO_FWD",
+        "BriefDescription": "Counts all prefetch (that bring data to L2) RFOs  OCR.PF_L2_RFO.SUPPLIER_NONE.HIT_OTHER_CORE_NO_FWD",
         "Counter": "0,1,2,3",
         "CounterHTOff": "0,1,2,3",
         "EventCode": "0xB7, 0xBB",
-        "EventName": "OCR.PF_L2_RFO.L3_HIT_S.HIT_OTHER_CORE_NO_FWD",
+        "EventName": "OCR.PF_L2_RFO.SUPPLIER_NONE.HIT_OTHER_CORE_NO_FWD",
         "MSRIndex": "0x1a6,0x1a7",
-        "MSRValue": "0x0400100020",
+        "MSRValue": "0x0400020020",
         "Offcore": "1",
         "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
         "SampleAfterValue": "100003",
         "UMask": "0x1"
     },
     {
-        "BriefDescription": "OCR.ALL_RFO.L3_HIT_E.ANY_SNOOP  OCR.ALL_RFO.L3_HIT_E.ANY_SNOOP",
+        "BriefDescription": "Counts all prefetch (that bring data to L2) RFOs  OCR.PF_L2_RFO.SUPPLIER_NONE.NO_SNOOP_NEEDED",
         "Counter": "0,1,2,3",
         "CounterHTOff": "0,1,2,3",
         "EventCode": "0xB7, 0xBB",
-        "EventName": "OCR.ALL_RFO.L3_HIT_E.ANY_SNOOP",
+        "EventName": "OCR.PF_L2_RFO.SUPPLIER_NONE.NO_SNOOP_NEEDED",
         "MSRIndex": "0x1a6,0x1a7",
-        "MSRValue": "0x3F80080122",
+        "MSRValue": "0x0100020020",
         "Offcore": "1",
         "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
         "SampleAfterValue": "100003",
         "UMask": "0x1"
     },
     {
-        "BriefDescription": "OCR.ALL_PF_RFO.L3_HIT_S.HIT_OTHER_CORE_NO_FWD  OCR.ALL_PF_RFO.L3_HIT_S.HIT_OTHER_CORE_NO_FWD",
+        "BriefDescription": "Counts all prefetch (that bring data to L2) RFOs",
         "Counter": "0,1,2,3",
         "CounterHTOff": "0,1,2,3",
         "EventCode": "0xB7, 0xBB",
-        "EventName": "OCR.ALL_PF_RFO.L3_HIT_S.HIT_OTHER_CORE_NO_FWD",
+        "EventName": "OCR.PF_L2_RFO.SUPPLIER_NONE.SNOOP_MISS",
         "MSRIndex": "0x1a6,0x1a7",
-        "MSRValue": "0x0400100120",
+        "MSRValue": "0x0200020020",
         "Offcore": "1",
         "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
         "SampleAfterValue": "100003",
         "UMask": "0x1"
     },
     {
-        "BriefDescription": "OCR.ALL_DATA_RD.PMM_HIT_LOCAL_PMM.SNOOP_NOT_NEEDED OCR.ALL_DATA_RD.PMM_HIT_LOCAL_PMM.SNOOP_NOT_NEEDED",
+        "BriefDescription": "Counts all prefetch (that bring data to L2) RFOs",
         "Counter": "0,1,2,3",
         "CounterHTOff": "0,1,2,3",
         "EventCode": "0xB7, 0xBB",
-        "EventName": "OCR.ALL_DATA_RD.PMM_HIT_LOCAL_PMM.SNOOP_NOT_NEEDED",
+        "EventName": "OCR.PF_L2_RFO.SUPPLIER_NONE.SNOOP_NONE",
         "MSRIndex": "0x1a6,0x1a7",
-        "MSRValue": "0x0100400491",
+        "MSRValue": "0x0080020020",
         "Offcore": "1",
         "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
         "SampleAfterValue": "100003",
         "UMask": "0x1"
     },
     {
-        "BriefDescription": "Counts all prefetch (that bring data to LLC only) data reads  OCR.PF_L3_DATA_RD.L3_HIT_E.NO_SNOOP_NEEDED",
+        "BriefDescription": "Counts all prefetch (that bring data to LLC only) data reads have any response type.",
         "Counter": "0,1,2,3",
         "CounterHTOff": "0,1,2,3",
         "EventCode": "0xB7, 0xBB",
-        "EventName": "OCR.PF_L3_DATA_RD.L3_HIT_E.NO_SNOOP_NEEDED",
+        "EventName": "OCR.PF_L3_DATA_RD.ANY_RESPONSE",
         "MSRIndex": "0x1a6,0x1a7",
-        "MSRValue": "0x0100080080",
+        "MSRValue": "0x0000010080",
         "Offcore": "1",
         "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
         "SampleAfterValue": "100003",
         "UMask": "0x1"
     },
     {
-        "BriefDescription": "Counts all demand data writes (RFOs)  OCR.DEMAND_RFO.L3_HIT_F.ANY_SNOOP",
+        "BriefDescription": "Counts all prefetch (that bring data to LLC only) data reads OCR.PF_L3_DATA_RD.L3_HIT.ANY_SNOOP OCR.PF_L3_DATA_RD.L3_HIT.ANY_SNOOP",
         "Counter": "0,1,2,3",
         "CounterHTOff": "0,1,2,3",
         "EventCode": "0xB7, 0xBB",
-        "EventName": "OCR.DEMAND_RFO.L3_HIT_F.ANY_SNOOP",
+        "EventName": "OCR.PF_L3_DATA_RD.L3_HIT.ANY_SNOOP",
         "MSRIndex": "0x1a6,0x1a7",
-        "MSRValue": "0x3F80200002",
+        "MSRValue": "0x3F803C0080",
         "Offcore": "1",
         "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
         "SampleAfterValue": "100003",
         "UMask": "0x1"
     },
     {
-        "BriefDescription": "OCR.ALL_PF_RFO.L3_HIT.SNOOP_NONE OCR.ALL_PF_RFO.L3_HIT.SNOOP_NONE",
+        "BriefDescription": "Counts all prefetch (that bring data to LLC only) data reads OCR.PF_L3_DATA_RD.L3_HIT.HITM_OTHER_CORE OCR.PF_L3_DATA_RD.L3_HIT.HITM_OTHER_CORE",
         "Counter": "0,1,2,3",
         "CounterHTOff": "0,1,2,3",
         "EventCode": "0xB7, 0xBB",
-        "EventName": "OCR.ALL_PF_RFO.L3_HIT.SNOOP_NONE",
+        "EventName": "OCR.PF_L3_DATA_RD.L3_HIT.HITM_OTHER_CORE",
         "MSRIndex": "0x1a6,0x1a7",
-        "MSRValue": "0x00803C0120",
+        "MSRValue": "0x10003C0080",
         "Offcore": "1",
         "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
         "SampleAfterValue": "100003",
         "UMask": "0x1"
     },
     {
-        "BriefDescription": "Number of hardware interrupts received by the processor.",
+        "BriefDescription": "Counts all prefetch (that bring data to LLC only) data reads OCR.PF_L3_DATA_RD.L3_HIT.HIT_OTHER_CORE_FWD OCR.PF_L3_DATA_RD.L3_HIT.HIT_OTHER_CORE_FWD",
         "Counter": "0,1,2,3",
-        "CounterHTOff": "0,1,2,3,4,5,6,7",
-        "EventCode": "0xCB",
-        "EventName": "HW_INTERRUPTS.RECEIVED",
-        "PublicDescription": "Counts the number of hardware interruptions received by the processor.",
-        "SampleAfterValue": "203",
+        "CounterHTOff": "0,1,2,3",
+        "EventCode": "0xB7, 0xBB",
+        "EventName": "OCR.PF_L3_DATA_RD.L3_HIT.HIT_OTHER_CORE_FWD",
+        "MSRIndex": "0x1a6,0x1a7",
+        "MSRValue": "0x08003C0080",
+        "Offcore": "1",
+        "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
+        "SampleAfterValue": "100003",
         "UMask": "0x1"
     },
     {
-        "BriefDescription": "Counts all prefetch (that bring data to LLC only) data reads",
+        "BriefDescription": "Counts all prefetch (that bring data to LLC only) data reads OCR.PF_L3_DATA_RD.L3_HIT.HIT_OTHER_CORE_NO_FWD OCR.PF_L3_DATA_RD.L3_HIT.HIT_OTHER_CORE_NO_FWD",
         "Counter": "0,1,2,3",
         "CounterHTOff": "0,1,2,3",
         "EventCode": "0xB7, 0xBB",
-        "EventName": "OCR.PF_L3_DATA_RD.L3_HIT_F.SNOOP_MISS",
+        "EventName": "OCR.PF_L3_DATA_RD.L3_HIT.HIT_OTHER_CORE_NO_FWD",
         "MSRIndex": "0x1a6,0x1a7",
-        "MSRValue": "0x0200200080",
+        "MSRValue": "0x04003C0080",
         "Offcore": "1",
         "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
         "SampleAfterValue": "100003",
         "UMask": "0x1"
     },
     {
-        "BriefDescription": "Counts all prefetch (that bring data to L2) RFOs",
+        "BriefDescription": "Counts all prefetch (that bring data to LLC only) data reads OCR.PF_L3_DATA_RD.L3_HIT.NO_SNOOP_NEEDED OCR.PF_L3_DATA_RD.L3_HIT.NO_SNOOP_NEEDED",
         "Counter": "0,1,2,3",
         "CounterHTOff": "0,1,2,3",
         "EventCode": "0xB7, 0xBB",
-        "EventName": "OCR.PF_L2_RFO.SUPPLIER_NONE.SNOOP_MISS",
+        "EventName": "OCR.PF_L3_DATA_RD.L3_HIT.NO_SNOOP_NEEDED",
         "MSRIndex": "0x1a6,0x1a7",
-        "MSRValue": "0x0200020020",
+        "MSRValue": "0x01003C0080",
         "Offcore": "1",
         "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
         "SampleAfterValue": "100003",
@@ -7520,1143 +7493,1170 @@
         "UMask": "0x1"
     },
     {
-        "BriefDescription": "Counts all prefetch (that bring data to L2) RFOs OCR.PF_L2_RFO.L3_HIT.HITM_OTHER_CORE OCR.PF_L2_RFO.L3_HIT.HITM_OTHER_CORE",
+        "BriefDescription": "Counts all prefetch (that bring data to LLC only) data reads OCR.PF_L3_DATA_RD.L3_HIT.SNOOP_MISS",
         "Counter": "0,1,2,3",
         "CounterHTOff": "0,1,2,3",
         "EventCode": "0xB7, 0xBB",
-        "EventName": "OCR.PF_L2_RFO.L3_HIT.HITM_OTHER_CORE",
+        "EventName": "OCR.PF_L3_DATA_RD.L3_HIT.SNOOP_MISS",
         "MSRIndex": "0x1a6,0x1a7",
-        "MSRValue": "0x10003C0020",
+        "MSRValue": "0x02003C0080",
         "Offcore": "1",
         "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
         "SampleAfterValue": "100003",
         "UMask": "0x1"
     },
     {
-        "BriefDescription": "Counts any other requests  OCR.OTHER.L3_HIT_S.HITM_OTHER_CORE",
+        "BriefDescription": "Counts all prefetch (that bring data to LLC only) data reads OCR.PF_L3_DATA_RD.L3_HIT.SNOOP_NONE",
         "Counter": "0,1,2,3",
         "CounterHTOff": "0,1,2,3",
         "EventCode": "0xB7, 0xBB",
-        "EventName": "OCR.OTHER.L3_HIT_S.HITM_OTHER_CORE",
+        "EventName": "OCR.PF_L3_DATA_RD.L3_HIT.SNOOP_NONE",
         "MSRIndex": "0x1a6,0x1a7",
-        "MSRValue": "0x1000108000",
+        "MSRValue": "0x00803C0080",
         "Offcore": "1",
         "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
         "SampleAfterValue": "100003",
         "UMask": "0x1"
     },
     {
-        "BriefDescription": "Counts all prefetch (that bring data to LLC only) data reads  OCR.PF_L3_DATA_RD.L3_HIT_S.NO_SNOOP_NEEDED",
+        "BriefDescription": "Counts all prefetch (that bring data to LLC only) data reads  OCR.PF_L3_DATA_RD.L3_HIT_E.ANY_SNOOP",
         "Counter": "0,1,2,3",
         "CounterHTOff": "0,1,2,3",
         "EventCode": "0xB7, 0xBB",
-        "EventName": "OCR.PF_L3_DATA_RD.L3_HIT_S.NO_SNOOP_NEEDED",
+        "EventName": "OCR.PF_L3_DATA_RD.L3_HIT_E.ANY_SNOOP",
         "MSRIndex": "0x1a6,0x1a7",
-        "MSRValue": "0x0100100080",
+        "MSRValue": "0x3F80080080",
         "Offcore": "1",
         "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
         "SampleAfterValue": "100003",
         "UMask": "0x1"
     },
     {
-        "BriefDescription": "Counts demand data reads",
+        "BriefDescription": "Counts all prefetch (that bring data to LLC only) data reads  OCR.PF_L3_DATA_RD.L3_HIT_E.HITM_OTHER_CORE",
         "Counter": "0,1,2,3",
         "CounterHTOff": "0,1,2,3",
         "EventCode": "0xB7, 0xBB",
-        "EventName": "OCR.DEMAND_DATA_RD.L3_HIT_M.SNOOP_NONE",
+        "EventName": "OCR.PF_L3_DATA_RD.L3_HIT_E.HITM_OTHER_CORE",
         "MSRIndex": "0x1a6,0x1a7",
-        "MSRValue": "0x0080040001",
+        "MSRValue": "0x1000080080",
         "Offcore": "1",
         "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
         "SampleAfterValue": "100003",
         "UMask": "0x1"
     },
     {
-        "BriefDescription": "Counts all demand data writes (RFOs) OCR.DEMAND_RFO.L3_HIT.SNOOP_NONE",
+        "BriefDescription": "Counts all prefetch (that bring data to LLC only) data reads  OCR.PF_L3_DATA_RD.L3_HIT_E.HIT_OTHER_CORE_FWD",
         "Counter": "0,1,2,3",
         "CounterHTOff": "0,1,2,3",
         "EventCode": "0xB7, 0xBB",
-        "EventName": "OCR.DEMAND_RFO.L3_HIT.SNOOP_NONE",
+        "EventName": "OCR.PF_L3_DATA_RD.L3_HIT_E.HIT_OTHER_CORE_FWD",
         "MSRIndex": "0x1a6,0x1a7",
-        "MSRValue": "0x00803C0002",
+        "MSRValue": "0x0800080080",
         "Offcore": "1",
         "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
         "SampleAfterValue": "100003",
         "UMask": "0x1"
     },
     {
-        "BriefDescription": "OCR.ALL_DATA_RD.L3_HIT_M.HIT_OTHER_CORE_NO_FWD  OCR.ALL_DATA_RD.L3_HIT_M.HIT_OTHER_CORE_NO_FWD",
+        "BriefDescription": "Counts all prefetch (that bring data to LLC only) data reads  OCR.PF_L3_DATA_RD.L3_HIT_E.HIT_OTHER_CORE_NO_FWD",
         "Counter": "0,1,2,3",
         "CounterHTOff": "0,1,2,3",
         "EventCode": "0xB7, 0xBB",
-        "EventName": "OCR.ALL_DATA_RD.L3_HIT_M.HIT_OTHER_CORE_NO_FWD",
+        "EventName": "OCR.PF_L3_DATA_RD.L3_HIT_E.HIT_OTHER_CORE_NO_FWD",
         "MSRIndex": "0x1a6,0x1a7",
-        "MSRValue": "0x0400040491",
+        "MSRValue": "0x0400080080",
         "Offcore": "1",
         "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
         "SampleAfterValue": "100003",
         "UMask": "0x1"
     },
     {
-        "BriefDescription": "OCR.ALL_PF_RFO.SUPPLIER_NONE.HIT_OTHER_CORE_FWD  OCR.ALL_PF_RFO.SUPPLIER_NONE.HIT_OTHER_CORE_FWD",
+        "BriefDescription": "Counts all prefetch (that bring data to LLC only) data reads  OCR.PF_L3_DATA_RD.L3_HIT_E.NO_SNOOP_NEEDED",
         "Counter": "0,1,2,3",
         "CounterHTOff": "0,1,2,3",
         "EventCode": "0xB7, 0xBB",
-        "EventName": "OCR.ALL_PF_RFO.SUPPLIER_NONE.HIT_OTHER_CORE_FWD",
+        "EventName": "OCR.PF_L3_DATA_RD.L3_HIT_E.NO_SNOOP_NEEDED",
         "MSRIndex": "0x1a6,0x1a7",
-        "MSRValue": "0x0800020120",
+        "MSRValue": "0x0100080080",
         "Offcore": "1",
         "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
         "SampleAfterValue": "100003",
         "UMask": "0x1"
     },
     {
-        "BriefDescription": "OCR.ALL_PF_DATA_RD.L3_HIT_S.NO_SNOOP_NEEDED  OCR.ALL_PF_DATA_RD.L3_HIT_S.NO_SNOOP_NEEDED",
+        "BriefDescription": "Counts all prefetch (that bring data to LLC only) data reads",
         "Counter": "0,1,2,3",
         "CounterHTOff": "0,1,2,3",
         "EventCode": "0xB7, 0xBB",
-        "EventName": "OCR.ALL_PF_DATA_RD.L3_HIT_S.NO_SNOOP_NEEDED",
+        "EventName": "OCR.PF_L3_DATA_RD.L3_HIT_E.SNOOP_MISS",
         "MSRIndex": "0x1a6,0x1a7",
-        "MSRValue": "0x0100100490",
+        "MSRValue": "0x0200080080",
         "Offcore": "1",
         "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
         "SampleAfterValue": "100003",
         "UMask": "0x1"
     },
     {
-        "BriefDescription": "Counts all demand data writes (RFOs) have any response type.",
+        "BriefDescription": "Counts all prefetch (that bring data to LLC only) data reads",
         "Counter": "0,1,2,3",
         "CounterHTOff": "0,1,2,3",
         "EventCode": "0xB7, 0xBB",
-        "EventName": "OCR.DEMAND_RFO.ANY_RESPONSE",
+        "EventName": "OCR.PF_L3_DATA_RD.L3_HIT_E.SNOOP_NONE",
         "MSRIndex": "0x1a6,0x1a7",
-        "MSRValue": "0x0000010002",
+        "MSRValue": "0x0080080080",
         "Offcore": "1",
         "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
         "SampleAfterValue": "100003",
         "UMask": "0x1"
     },
     {
-        "BriefDescription": "Counts all demand code reads  OCR.DEMAND_CODE_RD.L3_HIT_M.HITM_OTHER_CORE",
+        "BriefDescription": "Counts all prefetch (that bring data to LLC only) data reads  OCR.PF_L3_DATA_RD.L3_HIT_F.ANY_SNOOP",
         "Counter": "0,1,2,3",
         "CounterHTOff": "0,1,2,3",
         "EventCode": "0xB7, 0xBB",
-        "EventName": "OCR.DEMAND_CODE_RD.L3_HIT_M.HITM_OTHER_CORE",
+        "EventName": "OCR.PF_L3_DATA_RD.L3_HIT_F.ANY_SNOOP",
         "MSRIndex": "0x1a6,0x1a7",
-        "MSRValue": "0x1000040004",
+        "MSRValue": "0x3F80200080",
         "Offcore": "1",
         "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
         "SampleAfterValue": "100003",
         "UMask": "0x1"
     },
     {
-        "BriefDescription": "Counts all demand code reads  OCR.DEMAND_CODE_RD.SUPPLIER_NONE.NO_SNOOP_NEEDED",
+        "BriefDescription": "Counts all prefetch (that bring data to LLC only) data reads  OCR.PF_L3_DATA_RD.L3_HIT_F.HITM_OTHER_CORE",
         "Counter": "0,1,2,3",
         "CounterHTOff": "0,1,2,3",
         "EventCode": "0xB7, 0xBB",
-        "EventName": "OCR.DEMAND_CODE_RD.SUPPLIER_NONE.NO_SNOOP_NEEDED",
+        "EventName": "OCR.PF_L3_DATA_RD.L3_HIT_F.HITM_OTHER_CORE",
         "MSRIndex": "0x1a6,0x1a7",
-        "MSRValue": "0x0100020004",
+        "MSRValue": "0x1000200080",
         "Offcore": "1",
         "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
         "SampleAfterValue": "100003",
         "UMask": "0x1"
     },
     {
-        "BriefDescription": "Counts demand data reads",
+        "BriefDescription": "Counts all prefetch (that bring data to LLC only) data reads  OCR.PF_L3_DATA_RD.L3_HIT_F.HIT_OTHER_CORE_FWD",
         "Counter": "0,1,2,3",
         "CounterHTOff": "0,1,2,3",
         "EventCode": "0xB7, 0xBB",
-        "EventName": "OCR.DEMAND_DATA_RD.L3_HIT_S.SNOOP_NONE",
+        "EventName": "OCR.PF_L3_DATA_RD.L3_HIT_F.HIT_OTHER_CORE_FWD",
         "MSRIndex": "0x1a6,0x1a7",
-        "MSRValue": "0x0080100001",
+        "MSRValue": "0x0800200080",
         "Offcore": "1",
         "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
         "SampleAfterValue": "100003",
         "UMask": "0x1"
     },
     {
-        "BriefDescription": "Counts all demand data writes (RFOs)  OCR.DEMAND_RFO.SUPPLIER_NONE.HIT_OTHER_CORE_NO_FWD",
+        "BriefDescription": "Counts all prefetch (that bring data to LLC only) data reads  OCR.PF_L3_DATA_RD.L3_HIT_F.HIT_OTHER_CORE_NO_FWD",
         "Counter": "0,1,2,3",
         "CounterHTOff": "0,1,2,3",
         "EventCode": "0xB7, 0xBB",
-        "EventName": "OCR.DEMAND_RFO.SUPPLIER_NONE.HIT_OTHER_CORE_NO_FWD",
+        "EventName": "OCR.PF_L3_DATA_RD.L3_HIT_F.HIT_OTHER_CORE_NO_FWD",
         "MSRIndex": "0x1a6,0x1a7",
-        "MSRValue": "0x0400020002",
+        "MSRValue": "0x0400200080",
         "Offcore": "1",
         "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
         "SampleAfterValue": "100003",
         "UMask": "0x1"
     },
     {
-        "BriefDescription": "Counts all demand code reads  OCR.DEMAND_CODE_RD.L3_HIT_S.HIT_OTHER_CORE_FWD",
+        "BriefDescription": "Counts all prefetch (that bring data to LLC only) data reads  OCR.PF_L3_DATA_RD.L3_HIT_F.NO_SNOOP_NEEDED",
         "Counter": "0,1,2,3",
         "CounterHTOff": "0,1,2,3",
         "EventCode": "0xB7, 0xBB",
-        "EventName": "OCR.DEMAND_CODE_RD.L3_HIT_S.HIT_OTHER_CORE_FWD",
+        "EventName": "OCR.PF_L3_DATA_RD.L3_HIT_F.NO_SNOOP_NEEDED",
         "MSRIndex": "0x1a6,0x1a7",
-        "MSRValue": "0x0800100004",
+        "MSRValue": "0x0100200080",
         "Offcore": "1",
         "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
         "SampleAfterValue": "100003",
         "UMask": "0x1"
     },
     {
-        "BriefDescription": "Counts any other requests  OCR.OTHER.L3_HIT_F.HIT_OTHER_CORE_NO_FWD",
+        "BriefDescription": "Counts all prefetch (that bring data to LLC only) data reads",
         "Counter": "0,1,2,3",
         "CounterHTOff": "0,1,2,3",
         "EventCode": "0xB7, 0xBB",
-        "EventName": "OCR.OTHER.L3_HIT_F.HIT_OTHER_CORE_NO_FWD",
+        "EventName": "OCR.PF_L3_DATA_RD.L3_HIT_F.SNOOP_MISS",
         "MSRIndex": "0x1a6,0x1a7",
-        "MSRValue": "0x0400208000",
+        "MSRValue": "0x0200200080",
         "Offcore": "1",
         "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
         "SampleAfterValue": "100003",
         "UMask": "0x1"
     },
     {
-        "BriefDescription": "Counts all prefetch (that bring data to LLC only) RFOs have any response type.",
+        "BriefDescription": "Counts all prefetch (that bring data to LLC only) data reads",
         "Counter": "0,1,2,3",
         "CounterHTOff": "0,1,2,3",
         "EventCode": "0xB7, 0xBB",
-        "EventName": "OCR.PF_L3_RFO.ANY_RESPONSE",
+        "EventName": "OCR.PF_L3_DATA_RD.L3_HIT_F.SNOOP_NONE",
         "MSRIndex": "0x1a6,0x1a7",
-        "MSRValue": "0x0000010100",
+        "MSRValue": "0x0080200080",
         "Offcore": "1",
         "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
         "SampleAfterValue": "100003",
         "UMask": "0x1"
     },
     {
-        "BriefDescription": "OCR.ALL_DATA_RD.L3_HIT_E.SNOOP_MISS",
+        "BriefDescription": "Counts all prefetch (that bring data to LLC only) data reads  OCR.PF_L3_DATA_RD.L3_HIT_M.ANY_SNOOP",
         "Counter": "0,1,2,3",
         "CounterHTOff": "0,1,2,3",
         "EventCode": "0xB7, 0xBB",
-        "EventName": "OCR.ALL_DATA_RD.L3_HIT_E.SNOOP_MISS",
+        "EventName": "OCR.PF_L3_DATA_RD.L3_HIT_M.ANY_SNOOP",
         "MSRIndex": "0x1a6,0x1a7",
-        "MSRValue": "0x0200080491",
+        "MSRValue": "0x3F80040080",
         "Offcore": "1",
         "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
         "SampleAfterValue": "100003",
         "UMask": "0x1"
     },
     {
-        "BriefDescription": "OCR.ALL_PF_DATA_RD.L3_HIT_E.HIT_OTHER_CORE_NO_FWD  OCR.ALL_PF_DATA_RD.L3_HIT_E.HIT_OTHER_CORE_NO_FWD",
+        "BriefDescription": "Counts all prefetch (that bring data to LLC only) data reads  OCR.PF_L3_DATA_RD.L3_HIT_M.HITM_OTHER_CORE",
         "Counter": "0,1,2,3",
         "CounterHTOff": "0,1,2,3",
         "EventCode": "0xB7, 0xBB",
-        "EventName": "OCR.ALL_PF_DATA_RD.L3_HIT_E.HIT_OTHER_CORE_NO_FWD",
+        "EventName": "OCR.PF_L3_DATA_RD.L3_HIT_M.HITM_OTHER_CORE",
         "MSRIndex": "0x1a6,0x1a7",
-        "MSRValue": "0x0400080490",
+        "MSRValue": "0x1000040080",
         "Offcore": "1",
         "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
         "SampleAfterValue": "100003",
         "UMask": "0x1"
     },
     {
-        "BriefDescription": "Counts all prefetch (that bring data to LLC only) RFOs  OCR.PF_L3_RFO.L3_HIT_M.NO_SNOOP_NEEDED",
+        "BriefDescription": "Counts all prefetch (that bring data to LLC only) data reads  OCR.PF_L3_DATA_RD.L3_HIT_M.HIT_OTHER_CORE_FWD",
         "Counter": "0,1,2,3",
         "CounterHTOff": "0,1,2,3",
         "EventCode": "0xB7, 0xBB",
-        "EventName": "OCR.PF_L3_RFO.L3_HIT_M.NO_SNOOP_NEEDED",
+        "EventName": "OCR.PF_L3_DATA_RD.L3_HIT_M.HIT_OTHER_CORE_FWD",
         "MSRIndex": "0x1a6,0x1a7",
-        "MSRValue": "0x0100040100",
+        "MSRValue": "0x0800040080",
         "Offcore": "1",
         "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
         "SampleAfterValue": "100003",
         "UMask": "0x1"
     },
     {
-        "BriefDescription": "Counts demand data reads  OCR.DEMAND_DATA_RD.L3_HIT_S.ANY_SNOOP",
+        "BriefDescription": "Counts all prefetch (that bring data to LLC only) data reads  OCR.PF_L3_DATA_RD.L3_HIT_M.HIT_OTHER_CORE_NO_FWD",
         "Counter": "0,1,2,3",
         "CounterHTOff": "0,1,2,3",
         "EventCode": "0xB7, 0xBB",
-        "EventName": "OCR.DEMAND_DATA_RD.L3_HIT_S.ANY_SNOOP",
+        "EventName": "OCR.PF_L3_DATA_RD.L3_HIT_M.HIT_OTHER_CORE_NO_FWD",
         "MSRIndex": "0x1a6,0x1a7",
-        "MSRValue": "0x3F80100001",
+        "MSRValue": "0x0400040080",
         "Offcore": "1",
         "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
         "SampleAfterValue": "100003",
         "UMask": "0x1"
     },
     {
-        "BriefDescription": "OCR.ALL_DATA_RD.L3_HIT.SNOOP_MISS OCR.ALL_DATA_RD.L3_HIT.SNOOP_MISS",
+        "BriefDescription": "Counts all prefetch (that bring data to LLC only) data reads  OCR.PF_L3_DATA_RD.L3_HIT_M.NO_SNOOP_NEEDED",
         "Counter": "0,1,2,3",
         "CounterHTOff": "0,1,2,3",
         "EventCode": "0xB7, 0xBB",
-        "EventName": "OCR.ALL_DATA_RD.L3_HIT.SNOOP_MISS",
+        "EventName": "OCR.PF_L3_DATA_RD.L3_HIT_M.NO_SNOOP_NEEDED",
         "MSRIndex": "0x1a6,0x1a7",
-        "MSRValue": "0x02003C0491",
+        "MSRValue": "0x0100040080",
         "Offcore": "1",
         "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
         "SampleAfterValue": "100003",
         "UMask": "0x1"
     },
     {
-        "BriefDescription": "Counts any other requests  OCR.OTHER.L3_HIT_M.HIT_OTHER_CORE_NO_FWD",
+        "BriefDescription": "Counts all prefetch (that bring data to LLC only) data reads",
         "Counter": "0,1,2,3",
         "CounterHTOff": "0,1,2,3",
         "EventCode": "0xB7, 0xBB",
-        "EventName": "OCR.OTHER.L3_HIT_M.HIT_OTHER_CORE_NO_FWD",
+        "EventName": "OCR.PF_L3_DATA_RD.L3_HIT_M.SNOOP_MISS",
         "MSRIndex": "0x1a6,0x1a7",
-        "MSRValue": "0x0400048000",
+        "MSRValue": "0x0200040080",
         "Offcore": "1",
         "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
         "SampleAfterValue": "100003",
         "UMask": "0x1"
     },
     {
-        "BriefDescription": "Counts prefetch (that bring data to L2) data reads OCR.PF_L2_DATA_RD.L3_HIT.ANY_SNOOP OCR.PF_L2_DATA_RD.L3_HIT.ANY_SNOOP",
+        "BriefDescription": "Counts all prefetch (that bring data to LLC only) data reads",
         "Counter": "0,1,2,3",
         "CounterHTOff": "0,1,2,3",
         "EventCode": "0xB7, 0xBB",
-        "EventName": "OCR.PF_L2_DATA_RD.L3_HIT.ANY_SNOOP",
+        "EventName": "OCR.PF_L3_DATA_RD.L3_HIT_M.SNOOP_NONE",
         "MSRIndex": "0x1a6,0x1a7",
-        "MSRValue": "0x3F803C0010",
+        "MSRValue": "0x0080040080",
         "Offcore": "1",
         "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
         "SampleAfterValue": "100003",
         "UMask": "0x1"
     },
     {
-        "BriefDescription": "Counts any other requests",
+        "BriefDescription": "Counts all prefetch (that bring data to LLC only) data reads  OCR.PF_L3_DATA_RD.L3_HIT_S.ANY_SNOOP",
         "Counter": "0,1,2,3",
         "CounterHTOff": "0,1,2,3",
         "EventCode": "0xB7, 0xBB",
-        "EventName": "OCR.OTHER.L3_HIT_F.SNOOP_MISS",
+        "EventName": "OCR.PF_L3_DATA_RD.L3_HIT_S.ANY_SNOOP",
         "MSRIndex": "0x1a6,0x1a7",
-        "MSRValue": "0x0200208000",
+        "MSRValue": "0x3F80100080",
         "Offcore": "1",
         "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
         "SampleAfterValue": "100003",
         "UMask": "0x1"
     },
     {
-        "BriefDescription": "OCR.ALL_PF_DATA_RD.L3_HIT.SNOOP_NONE OCR.ALL_PF_DATA_RD.L3_HIT.SNOOP_NONE",
+        "BriefDescription": "Counts all prefetch (that bring data to LLC only) data reads  OCR.PF_L3_DATA_RD.L3_HIT_S.HITM_OTHER_CORE",
         "Counter": "0,1,2,3",
         "CounterHTOff": "0,1,2,3",
         "EventCode": "0xB7, 0xBB",
-        "EventName": "OCR.ALL_PF_DATA_RD.L3_HIT.SNOOP_NONE",
+        "EventName": "OCR.PF_L3_DATA_RD.L3_HIT_S.HITM_OTHER_CORE",
         "MSRIndex": "0x1a6,0x1a7",
-        "MSRValue": "0x00803C0490",
+        "MSRValue": "0x1000100080",
         "Offcore": "1",
         "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
         "SampleAfterValue": "100003",
         "UMask": "0x1"
     },
     {
-        "BriefDescription": "OCR.ALL_PF_RFO.SUPPLIER_NONE.ANY_SNOOP  OCR.ALL_PF_RFO.SUPPLIER_NONE.ANY_SNOOP",
+        "BriefDescription": "Counts all prefetch (that bring data to LLC only) data reads  OCR.PF_L3_DATA_RD.L3_HIT_S.HIT_OTHER_CORE_FWD",
         "Counter": "0,1,2,3",
         "CounterHTOff": "0,1,2,3",
         "EventCode": "0xB7, 0xBB",
-        "EventName": "OCR.ALL_PF_RFO.SUPPLIER_NONE.ANY_SNOOP",
+        "EventName": "OCR.PF_L3_DATA_RD.L3_HIT_S.HIT_OTHER_CORE_FWD",
         "MSRIndex": "0x1a6,0x1a7",
-        "MSRValue": "0x3F80020120",
+        "MSRValue": "0x0800100080",
         "Offcore": "1",
         "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
         "SampleAfterValue": "100003",
         "UMask": "0x1"
     },
     {
-        "BriefDescription": "Counts prefetch (that bring data to L2) data reads OCR.PF_L2_DATA_RD.L3_HIT.NO_SNOOP_NEEDED OCR.PF_L2_DATA_RD.L3_HIT.NO_SNOOP_NEEDED",
+        "BriefDescription": "Counts all prefetch (that bring data to LLC only) data reads  OCR.PF_L3_DATA_RD.L3_HIT_S.HIT_OTHER_CORE_NO_FWD",
         "Counter": "0,1,2,3",
         "CounterHTOff": "0,1,2,3",
         "EventCode": "0xB7, 0xBB",
-        "EventName": "OCR.PF_L2_DATA_RD.L3_HIT.NO_SNOOP_NEEDED",
+        "EventName": "OCR.PF_L3_DATA_RD.L3_HIT_S.HIT_OTHER_CORE_NO_FWD",
         "MSRIndex": "0x1a6,0x1a7",
-        "MSRValue": "0x01003C0010",
+        "MSRValue": "0x0400100080",
         "Offcore": "1",
         "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
         "SampleAfterValue": "100003",
         "UMask": "0x1"
     },
     {
-        "BriefDescription": "Counts L1 data cache hardware prefetch requests and software prefetch requests",
+        "BriefDescription": "Counts all prefetch (that bring data to LLC only) data reads  OCR.PF_L3_DATA_RD.L3_HIT_S.NO_SNOOP_NEEDED",
         "Counter": "0,1,2,3",
         "CounterHTOff": "0,1,2,3",
         "EventCode": "0xB7, 0xBB",
-        "EventName": "OCR.PF_L1D_AND_SW.L3_HIT_F.SNOOP_NONE",
+        "EventName": "OCR.PF_L3_DATA_RD.L3_HIT_S.NO_SNOOP_NEEDED",
         "MSRIndex": "0x1a6,0x1a7",
-        "MSRValue": "0x0080200400",
+        "MSRValue": "0x0100100080",
         "Offcore": "1",
         "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
         "SampleAfterValue": "100003",
         "UMask": "0x1"
     },
     {
-        "BriefDescription": "Counts all prefetch (that bring data to L2) RFOs  OCR.PF_L2_RFO.L3_HIT_E.HIT_OTHER_CORE_FWD",
+        "BriefDescription": "Counts all prefetch (that bring data to LLC only) data reads",
         "Counter": "0,1,2,3",
         "CounterHTOff": "0,1,2,3",
         "EventCode": "0xB7, 0xBB",
-        "EventName": "OCR.PF_L2_RFO.L3_HIT_E.HIT_OTHER_CORE_FWD",
+        "EventName": "OCR.PF_L3_DATA_RD.L3_HIT_S.SNOOP_MISS",
         "MSRIndex": "0x1a6,0x1a7",
-        "MSRValue": "0x0800080020",
+        "MSRValue": "0x0200100080",
         "Offcore": "1",
         "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
         "SampleAfterValue": "100003",
         "UMask": "0x1"
     },
     {
-        "BriefDescription": "OCR.ALL_DATA_RD.L3_HIT_F.HIT_OTHER_CORE_FWD  OCR.ALL_DATA_RD.L3_HIT_F.HIT_OTHER_CORE_FWD",
+        "BriefDescription": "Counts all prefetch (that bring data to LLC only) data reads",
         "Counter": "0,1,2,3",
         "CounterHTOff": "0,1,2,3",
         "EventCode": "0xB7, 0xBB",
-        "EventName": "OCR.ALL_DATA_RD.L3_HIT_F.HIT_OTHER_CORE_FWD",
+        "EventName": "OCR.PF_L3_DATA_RD.L3_HIT_S.SNOOP_NONE",
         "MSRIndex": "0x1a6,0x1a7",
-        "MSRValue": "0x0800200491",
+        "MSRValue": "0x0080100080",
         "Offcore": "1",
         "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
         "SampleAfterValue": "100003",
         "UMask": "0x1"
     },
     {
-        "BriefDescription": "Counts L1 data cache hardware prefetch requests and software prefetch requests  OCR.PF_L1D_AND_SW.L3_HIT_S.HITM_OTHER_CORE",
+        "BriefDescription": "Counts all prefetch (that bring data to LLC only) data reads OCR.PF_L3_DATA_RD.PMM_HIT_LOCAL_PMM.ANY_SNOOP",
         "Counter": "0,1,2,3",
         "CounterHTOff": "0,1,2,3",
         "EventCode": "0xB7, 0xBB",
-        "EventName": "OCR.PF_L1D_AND_SW.L3_HIT_S.HITM_OTHER_CORE",
+        "EventName": "OCR.PF_L3_DATA_RD.PMM_HIT_LOCAL_PMM.ANY_SNOOP",
         "MSRIndex": "0x1a6,0x1a7",
-        "MSRValue": "0x1000100400",
+        "MSRValue": "0x3F80400080",
         "Offcore": "1",
         "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
         "SampleAfterValue": "100003",
         "UMask": "0x1"
     },
     {
-        "BriefDescription": "Counts prefetch (that bring data to L2) data reads",
+        "BriefDescription": "Counts all prefetch (that bring data to LLC only) data reads OCR.PF_L3_DATA_RD.PMM_HIT_LOCAL_PMM.SNOOP_NONE",
         "Counter": "0,1,2,3",
         "CounterHTOff": "0,1,2,3",
         "EventCode": "0xB7, 0xBB",
-        "EventName": "OCR.PF_L2_DATA_RD.SUPPLIER_NONE.SNOOP_NONE",
+        "EventName": "OCR.PF_L3_DATA_RD.PMM_HIT_LOCAL_PMM.SNOOP_NONE",
         "MSRIndex": "0x1a6,0x1a7",
-        "MSRValue": "0x0080020010",
+        "MSRValue": "0x0080400080",
         "Offcore": "1",
         "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
         "SampleAfterValue": "100003",
         "UMask": "0x1"
     },
     {
-        "BriefDescription": "Counts all demand code reads",
+        "BriefDescription": "Counts all prefetch (that bring data to LLC only) data reads OCR.PF_L3_DATA_RD.PMM_HIT_LOCAL_PMM.SNOOP_NOT_NEEDED",
         "Counter": "0,1,2,3",
         "CounterHTOff": "0,1,2,3",
         "EventCode": "0xB7, 0xBB",
-        "EventName": "OCR.DEMAND_CODE_RD.L3_HIT_M.SNOOP_NONE",
+        "EventName": "OCR.PF_L3_DATA_RD.PMM_HIT_LOCAL_PMM.SNOOP_NOT_NEEDED",
         "MSRIndex": "0x1a6,0x1a7",
-        "MSRValue": "0x0080040004",
+        "MSRValue": "0x0100400080",
         "Offcore": "1",
         "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
         "SampleAfterValue": "100003",
         "UMask": "0x1"
     },
     {
-        "BriefDescription": "Counts all demand code reads  OCR.DEMAND_CODE_RD.L3_HIT_E.HIT_OTHER_CORE_NO_FWD",
+        "BriefDescription": "Counts all prefetch (that bring data to LLC only) data reads  OCR.PF_L3_DATA_RD.SUPPLIER_NONE.ANY_SNOOP",
         "Counter": "0,1,2,3",
         "CounterHTOff": "0,1,2,3",
         "EventCode": "0xB7, 0xBB",
-        "EventName": "OCR.DEMAND_CODE_RD.L3_HIT_E.HIT_OTHER_CORE_NO_FWD",
+        "EventName": "OCR.PF_L3_DATA_RD.SUPPLIER_NONE.ANY_SNOOP",
         "MSRIndex": "0x1a6,0x1a7",
-        "MSRValue": "0x0400080004",
+        "MSRValue": "0x3F80020080",
         "Offcore": "1",
         "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
         "SampleAfterValue": "100003",
         "UMask": "0x1"
     },
     {
-        "BriefDescription": "Counts all prefetch (that bring data to LLC only) RFOs  OCR.PF_L3_RFO.L3_HIT_E.HIT_OTHER_CORE_NO_FWD",
+        "BriefDescription": "Counts all prefetch (that bring data to LLC only) data reads  OCR.PF_L3_DATA_RD.SUPPLIER_NONE.HITM_OTHER_CORE",
         "Counter": "0,1,2,3",
         "CounterHTOff": "0,1,2,3",
         "EventCode": "0xB7, 0xBB",
-        "EventName": "OCR.PF_L3_RFO.L3_HIT_E.HIT_OTHER_CORE_NO_FWD",
+        "EventName": "OCR.PF_L3_DATA_RD.SUPPLIER_NONE.HITM_OTHER_CORE",
         "MSRIndex": "0x1a6,0x1a7",
-        "MSRValue": "0x0400080100",
+        "MSRValue": "0x1000020080",
         "Offcore": "1",
         "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
         "SampleAfterValue": "100003",
         "UMask": "0x1"
     },
     {
-        "BriefDescription": "Counts prefetch (that bring data to L2) data reads  OCR.PF_L2_DATA_RD.L3_HIT_M.HITM_OTHER_CORE",
+        "BriefDescription": "Counts all prefetch (that bring data to LLC only) data reads  OCR.PF_L3_DATA_RD.SUPPLIER_NONE.HIT_OTHER_CORE_FWD",
         "Counter": "0,1,2,3",
         "CounterHTOff": "0,1,2,3",
         "EventCode": "0xB7, 0xBB",
-        "EventName": "OCR.PF_L2_DATA_RD.L3_HIT_M.HITM_OTHER_CORE",
+        "EventName": "OCR.PF_L3_DATA_RD.SUPPLIER_NONE.HIT_OTHER_CORE_FWD",
         "MSRIndex": "0x1a6,0x1a7",
-        "MSRValue": "0x1000040010",
+        "MSRValue": "0x0800020080",
         "Offcore": "1",
         "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
         "SampleAfterValue": "100003",
         "UMask": "0x1"
     },
     {
-        "BriefDescription": "Counts demand data reads",
+        "BriefDescription": "Counts all prefetch (that bring data to LLC only) data reads  OCR.PF_L3_DATA_RD.SUPPLIER_NONE.HIT_OTHER_CORE_NO_FWD",
         "Counter": "0,1,2,3",
         "CounterHTOff": "0,1,2,3",
         "EventCode": "0xB7, 0xBB",
-        "EventName": "OCR.DEMAND_DATA_RD.SUPPLIER_NONE.SNOOP_MISS",
+        "EventName": "OCR.PF_L3_DATA_RD.SUPPLIER_NONE.HIT_OTHER_CORE_NO_FWD",
         "MSRIndex": "0x1a6,0x1a7",
-        "MSRValue": "0x0200020001",
+        "MSRValue": "0x0400020080",
         "Offcore": "1",
         "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
         "SampleAfterValue": "100003",
         "UMask": "0x1"
     },
     {
-        "BriefDescription": "Counts demand data reads  OCR.DEMAND_DATA_RD.L3_HIT_E.HIT_OTHER_CORE_NO_FWD",
+        "BriefDescription": "Counts all prefetch (that bring data to LLC only) data reads  OCR.PF_L3_DATA_RD.SUPPLIER_NONE.NO_SNOOP_NEEDED",
         "Counter": "0,1,2,3",
         "CounterHTOff": "0,1,2,3",
         "EventCode": "0xB7, 0xBB",
-        "EventName": "OCR.DEMAND_DATA_RD.L3_HIT_E.HIT_OTHER_CORE_NO_FWD",
+        "EventName": "OCR.PF_L3_DATA_RD.SUPPLIER_NONE.NO_SNOOP_NEEDED",
         "MSRIndex": "0x1a6,0x1a7",
-        "MSRValue": "0x0400080001",
+        "MSRValue": "0x0100020080",
         "Offcore": "1",
         "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
         "SampleAfterValue": "100003",
         "UMask": "0x1"
     },
     {
-        "BriefDescription": "OCR.ALL_READS.L3_HIT_F.HIT_OTHER_CORE_NO_FWD  OCR.ALL_READS.L3_HIT_F.HIT_OTHER_CORE_NO_FWD",
+        "BriefDescription": "Counts all prefetch (that bring data to LLC only) data reads",
         "Counter": "0,1,2,3",
         "CounterHTOff": "0,1,2,3",
         "EventCode": "0xB7, 0xBB",
-        "EventName": "OCR.ALL_READS.L3_HIT_F.HIT_OTHER_CORE_NO_FWD",
+        "EventName": "OCR.PF_L3_DATA_RD.SUPPLIER_NONE.SNOOP_MISS",
         "MSRIndex": "0x1a6,0x1a7",
-        "MSRValue": "0x04002007F7",
+        "MSRValue": "0x0200020080",
         "Offcore": "1",
         "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
         "SampleAfterValue": "100003",
         "UMask": "0x1"
     },
     {
-        "BriefDescription": "OCR.ALL_RFO.L3_HIT_E.HITM_OTHER_CORE  OCR.ALL_RFO.L3_HIT_E.HITM_OTHER_CORE",
+        "BriefDescription": "Counts all prefetch (that bring data to LLC only) data reads",
         "Counter": "0,1,2,3",
         "CounterHTOff": "0,1,2,3",
         "EventCode": "0xB7, 0xBB",
-        "EventName": "OCR.ALL_RFO.L3_HIT_E.HITM_OTHER_CORE",
+        "EventName": "OCR.PF_L3_DATA_RD.SUPPLIER_NONE.SNOOP_NONE",
         "MSRIndex": "0x1a6,0x1a7",
-        "MSRValue": "0x1000080122",
+        "MSRValue": "0x0080020080",
         "Offcore": "1",
         "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
         "SampleAfterValue": "100003",
         "UMask": "0x1"
     },
     {
-        "BriefDescription": "Counts all prefetch (that bring data to L2) RFOs",
+        "BriefDescription": "Counts all prefetch (that bring data to LLC only) RFOs have any response type.",
         "Counter": "0,1,2,3",
         "CounterHTOff": "0,1,2,3",
         "EventCode": "0xB7, 0xBB",
-        "EventName": "OCR.PF_L2_RFO.L3_HIT.SNOOP_HIT_WITH_FWD",
+        "EventName": "OCR.PF_L3_RFO.ANY_RESPONSE",
         "MSRIndex": "0x1a6,0x1a7",
-        "MSRValue": "0x08007C0020",
+        "MSRValue": "0x0000010100",
         "Offcore": "1",
         "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
         "SampleAfterValue": "100003",
         "UMask": "0x1"
     },
     {
-        "BriefDescription": "Counts all demand data writes (RFOs)  OCR.DEMAND_RFO.L3_HIT_F.HIT_OTHER_CORE_NO_FWD",
+        "BriefDescription": "Counts all prefetch (that bring data to LLC only) RFOs OCR.PF_L3_RFO.L3_HIT.ANY_SNOOP OCR.PF_L3_RFO.L3_HIT.ANY_SNOOP",
         "Counter": "0,1,2,3",
         "CounterHTOff": "0,1,2,3",
         "EventCode": "0xB7, 0xBB",
-        "EventName": "OCR.DEMAND_RFO.L3_HIT_F.HIT_OTHER_CORE_NO_FWD",
+        "EventName": "OCR.PF_L3_RFO.L3_HIT.ANY_SNOOP",
         "MSRIndex": "0x1a6,0x1a7",
-        "MSRValue": "0x0400200002",
+        "MSRValue": "0x3F803C0100",
         "Offcore": "1",
         "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
         "SampleAfterValue": "100003",
         "UMask": "0x1"
     },
     {
-        "BriefDescription": "Counts all prefetch (that bring data to L2) RFOs",
+        "BriefDescription": "Counts all prefetch (that bring data to LLC only) RFOs OCR.PF_L3_RFO.L3_HIT.HITM_OTHER_CORE OCR.PF_L3_RFO.L3_HIT.HITM_OTHER_CORE",
         "Counter": "0,1,2,3",
         "CounterHTOff": "0,1,2,3",
         "EventCode": "0xB7, 0xBB",
-        "EventName": "OCR.PF_L2_RFO.L3_HIT_E.SNOOP_MISS",
+        "EventName": "OCR.PF_L3_RFO.L3_HIT.HITM_OTHER_CORE",
         "MSRIndex": "0x1a6,0x1a7",
-        "MSRValue": "0x0200080020",
+        "MSRValue": "0x10003C0100",
         "Offcore": "1",
         "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
         "SampleAfterValue": "100003",
         "UMask": "0x1"
     },
     {
-        "BriefDescription": "Counts L1 data cache hardware prefetch requests and software prefetch requests",
+        "BriefDescription": "Counts all prefetch (that bring data to LLC only) RFOs OCR.PF_L3_RFO.L3_HIT.HIT_OTHER_CORE_FWD OCR.PF_L3_RFO.L3_HIT.HIT_OTHER_CORE_FWD",
         "Counter": "0,1,2,3",
         "CounterHTOff": "0,1,2,3",
         "EventCode": "0xB7, 0xBB",
-        "EventName": "OCR.PF_L1D_AND_SW.SUPPLIER_NONE.SNOOP_NONE",
+        "EventName": "OCR.PF_L3_RFO.L3_HIT.HIT_OTHER_CORE_FWD",
         "MSRIndex": "0x1a6,0x1a7",
-        "MSRValue": "0x0080020400",
+        "MSRValue": "0x08003C0100",
         "Offcore": "1",
         "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
         "SampleAfterValue": "100003",
         "UMask": "0x1"
     },
     {
-        "BriefDescription": "OCR.ALL_DATA_RD.SUPPLIER_NONE.NO_SNOOP_NEEDED  OCR.ALL_DATA_RD.SUPPLIER_NONE.NO_SNOOP_NEEDED",
+        "BriefDescription": "Counts all prefetch (that bring data to LLC only) RFOs OCR.PF_L3_RFO.L3_HIT.HIT_OTHER_CORE_NO_FWD OCR.PF_L3_RFO.L3_HIT.HIT_OTHER_CORE_NO_FWD",
         "Counter": "0,1,2,3",
         "CounterHTOff": "0,1,2,3",
         "EventCode": "0xB7, 0xBB",
-        "EventName": "OCR.ALL_DATA_RD.SUPPLIER_NONE.NO_SNOOP_NEEDED",
+        "EventName": "OCR.PF_L3_RFO.L3_HIT.HIT_OTHER_CORE_NO_FWD",
         "MSRIndex": "0x1a6,0x1a7",
-        "MSRValue": "0x0100020491",
+        "MSRValue": "0x04003C0100",
         "Offcore": "1",
         "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
         "SampleAfterValue": "100003",
         "UMask": "0x1"
     },
     {
-        "BriefDescription": "Counts all prefetch (that bring data to LLC only) RFOs  OCR.PF_L3_RFO.L3_HIT_S.ANY_SNOOP",
+        "BriefDescription": "Counts all prefetch (that bring data to LLC only) RFOs OCR.PF_L3_RFO.L3_HIT.NO_SNOOP_NEEDED OCR.PF_L3_RFO.L3_HIT.NO_SNOOP_NEEDED",
         "Counter": "0,1,2,3",
         "CounterHTOff": "0,1,2,3",
         "EventCode": "0xB7, 0xBB",
-        "EventName": "OCR.PF_L3_RFO.L3_HIT_S.ANY_SNOOP",
+        "EventName": "OCR.PF_L3_RFO.L3_HIT.NO_SNOOP_NEEDED",
         "MSRIndex": "0x1a6,0x1a7",
-        "MSRValue": "0x3F80100100",
+        "MSRValue": "0x01003C0100",
         "Offcore": "1",
         "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
         "SampleAfterValue": "100003",
         "UMask": "0x1"
     },
     {
-        "BriefDescription": "OCR.ALL_PF_RFO.L3_HIT_F.ANY_SNOOP  OCR.ALL_PF_RFO.L3_HIT_F.ANY_SNOOP",
+        "BriefDescription": "Counts all prefetch (that bring data to LLC only) RFOs",
         "Counter": "0,1,2,3",
         "CounterHTOff": "0,1,2,3",
         "EventCode": "0xB7, 0xBB",
-        "EventName": "OCR.ALL_PF_RFO.L3_HIT_F.ANY_SNOOP",
+        "EventName": "OCR.PF_L3_RFO.L3_HIT.SNOOP_HIT_WITH_FWD",
         "MSRIndex": "0x1a6,0x1a7",
-        "MSRValue": "0x3F80200120",
+        "MSRValue": "0x08007C0100",
         "Offcore": "1",
         "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
         "SampleAfterValue": "100003",
         "UMask": "0x1"
     },
     {
-        "BriefDescription": "Counts L1 data cache hardware prefetch requests and software prefetch requests OCR.PF_L1D_AND_SW.L3_HIT.NO_SNOOP_NEEDED OCR.PF_L1D_AND_SW.L3_HIT.NO_SNOOP_NEEDED",
+        "BriefDescription": "Counts all prefetch (that bring data to LLC only) RFOs OCR.PF_L3_RFO.L3_HIT.SNOOP_MISS",
         "Counter": "0,1,2,3",
         "CounterHTOff": "0,1,2,3",
         "EventCode": "0xB7, 0xBB",
-        "EventName": "OCR.PF_L1D_AND_SW.L3_HIT.NO_SNOOP_NEEDED",
+        "EventName": "OCR.PF_L3_RFO.L3_HIT.SNOOP_MISS",
         "MSRIndex": "0x1a6,0x1a7",
-        "MSRValue": "0x01003C0400",
+        "MSRValue": "0x02003C0100",
         "Offcore": "1",
         "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
         "SampleAfterValue": "100003",
         "UMask": "0x1"
     },
     {
-        "BriefDescription": "Counts all prefetch (that bring data to LLC only) data reads  OCR.PF_L3_DATA_RD.L3_HIT_M.HIT_OTHER_CORE_NO_FWD",
+        "BriefDescription": "Counts all prefetch (that bring data to LLC only) RFOs OCR.PF_L3_RFO.L3_HIT.SNOOP_NONE",
         "Counter": "0,1,2,3",
         "CounterHTOff": "0,1,2,3",
         "EventCode": "0xB7, 0xBB",
-        "EventName": "OCR.PF_L3_DATA_RD.L3_HIT_M.HIT_OTHER_CORE_NO_FWD",
+        "EventName": "OCR.PF_L3_RFO.L3_HIT.SNOOP_NONE",
         "MSRIndex": "0x1a6,0x1a7",
-        "MSRValue": "0x0400040080",
+        "MSRValue": "0x00803C0100",
         "Offcore": "1",
         "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
         "SampleAfterValue": "100003",
         "UMask": "0x1"
     },
     {
-        "BriefDescription": "Counts prefetch (that bring data to L2) data reads  OCR.PF_L2_DATA_RD.L3_HIT_F.HITM_OTHER_CORE",
+        "BriefDescription": "Counts all prefetch (that bring data to LLC only) RFOs  OCR.PF_L3_RFO.L3_HIT_E.ANY_SNOOP",
         "Counter": "0,1,2,3",
         "CounterHTOff": "0,1,2,3",
         "EventCode": "0xB7, 0xBB",
-        "EventName": "OCR.PF_L2_DATA_RD.L3_HIT_F.HITM_OTHER_CORE",
+        "EventName": "OCR.PF_L3_RFO.L3_HIT_E.ANY_SNOOP",
         "MSRIndex": "0x1a6,0x1a7",
-        "MSRValue": "0x1000200010",
+        "MSRValue": "0x3F80080100",
         "Offcore": "1",
         "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
         "SampleAfterValue": "100003",
         "UMask": "0x1"
     },
     {
-        "BriefDescription": "Counts all prefetch (that bring data to L2) RFOs  OCR.PF_L2_RFO.L3_HIT_M.NO_SNOOP_NEEDED",
+        "BriefDescription": "Counts all prefetch (that bring data to LLC only) RFOs  OCR.PF_L3_RFO.L3_HIT_E.HITM_OTHER_CORE",
         "Counter": "0,1,2,3",
         "CounterHTOff": "0,1,2,3",
         "EventCode": "0xB7, 0xBB",
-        "EventName": "OCR.PF_L2_RFO.L3_HIT_M.NO_SNOOP_NEEDED",
+        "EventName": "OCR.PF_L3_RFO.L3_HIT_E.HITM_OTHER_CORE",
         "MSRIndex": "0x1a6,0x1a7",
-        "MSRValue": "0x0100040020",
+        "MSRValue": "0x1000080100",
         "Offcore": "1",
         "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
         "SampleAfterValue": "100003",
         "UMask": "0x1"
     },
     {
-        "BriefDescription": "Counts any other requests OCR.OTHER.L3_HIT.HITM_OTHER_CORE OCR.OTHER.L3_HIT.HITM_OTHER_CORE",
+        "BriefDescription": "Counts all prefetch (that bring data to LLC only) RFOs  OCR.PF_L3_RFO.L3_HIT_E.HIT_OTHER_CORE_FWD",
         "Counter": "0,1,2,3",
         "CounterHTOff": "0,1,2,3",
         "EventCode": "0xB7, 0xBB",
-        "EventName": "OCR.OTHER.L3_HIT.HITM_OTHER_CORE",
+        "EventName": "OCR.PF_L3_RFO.L3_HIT_E.HIT_OTHER_CORE_FWD",
         "MSRIndex": "0x1a6,0x1a7",
-        "MSRValue": "0x10003C8000",
+        "MSRValue": "0x0800080100",
         "Offcore": "1",
         "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
         "SampleAfterValue": "100003",
         "UMask": "0x1"
     },
     {
-        "BriefDescription": "Counts all prefetch (that bring data to LLC only) RFOs  OCR.PF_L3_RFO.L3_HIT_M.HIT_OTHER_CORE_FWD",
+        "BriefDescription": "Counts all prefetch (that bring data to LLC only) RFOs  OCR.PF_L3_RFO.L3_HIT_E.HIT_OTHER_CORE_NO_FWD",
         "Counter": "0,1,2,3",
         "CounterHTOff": "0,1,2,3",
         "EventCode": "0xB7, 0xBB",
-        "EventName": "OCR.PF_L3_RFO.L3_HIT_M.HIT_OTHER_CORE_FWD",
+        "EventName": "OCR.PF_L3_RFO.L3_HIT_E.HIT_OTHER_CORE_NO_FWD",
         "MSRIndex": "0x1a6,0x1a7",
-        "MSRValue": "0x0800040100",
+        "MSRValue": "0x0400080100",
         "Offcore": "1",
         "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
         "SampleAfterValue": "100003",
         "UMask": "0x1"
     },
     {
-        "BriefDescription": "Counts all prefetch (that bring data to LLC only) RFOs",
+        "BriefDescription": "Counts all prefetch (that bring data to LLC only) RFOs  OCR.PF_L3_RFO.L3_HIT_E.NO_SNOOP_NEEDED",
         "Counter": "0,1,2,3",
         "CounterHTOff": "0,1,2,3",
         "EventCode": "0xB7, 0xBB",
-        "EventName": "OCR.PF_L3_RFO.L3_HIT_S.SNOOP_MISS",
+        "EventName": "OCR.PF_L3_RFO.L3_HIT_E.NO_SNOOP_NEEDED",
         "MSRIndex": "0x1a6,0x1a7",
-        "MSRValue": "0x0200100100",
+        "MSRValue": "0x0100080100",
         "Offcore": "1",
         "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
         "SampleAfterValue": "100003",
         "UMask": "0x1"
     },
     {
-        "BriefDescription": "Counts L1 data cache hardware prefetch requests and software prefetch requests  OCR.PF_L1D_AND_SW.L3_HIT_E.HIT_OTHER_CORE_NO_FWD",
+        "BriefDescription": "Counts all prefetch (that bring data to LLC only) RFOs",
         "Counter": "0,1,2,3",
         "CounterHTOff": "0,1,2,3",
         "EventCode": "0xB7, 0xBB",
-        "EventName": "OCR.PF_L1D_AND_SW.L3_HIT_E.HIT_OTHER_CORE_NO_FWD",
+        "EventName": "OCR.PF_L3_RFO.L3_HIT_E.SNOOP_MISS",
         "MSRIndex": "0x1a6,0x1a7",
-        "MSRValue": "0x0400080400",
+        "MSRValue": "0x0200080100",
         "Offcore": "1",
         "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
         "SampleAfterValue": "100003",
         "UMask": "0x1"
     },
     {
-        "BriefDescription": "Counts demand data reads  OCR.DEMAND_DATA_RD.L3_HIT_F.HITM_OTHER_CORE",
+        "BriefDescription": "Counts all prefetch (that bring data to LLC only) RFOs",
         "Counter": "0,1,2,3",
         "CounterHTOff": "0,1,2,3",
         "EventCode": "0xB7, 0xBB",
-        "EventName": "OCR.DEMAND_DATA_RD.L3_HIT_F.HITM_OTHER_CORE",
+        "EventName": "OCR.PF_L3_RFO.L3_HIT_E.SNOOP_NONE",
         "MSRIndex": "0x1a6,0x1a7",
-        "MSRValue": "0x1000200001",
+        "MSRValue": "0x0080080100",
         "Offcore": "1",
         "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
         "SampleAfterValue": "100003",
         "UMask": "0x1"
     },
     {
-        "BriefDescription": "Number of PREFETCHNTA instructions executed.",
-        "Counter": "0,1,2,3",
-        "CounterHTOff": "0,1,2,3,4,5,6,7",
-        "EventCode": "0x32",
-        "EventName": "SW_PREFETCH_ACCESS.NTA",
-        "SampleAfterValue": "2000003",
-        "UMask": "0x1"
-    },
-    {
-        "BriefDescription": "Counts all demand code reads  OCR.DEMAND_CODE_RD.L3_HIT_F.NO_SNOOP_NEEDED",
+        "BriefDescription": "Counts all prefetch (that bring data to LLC only) RFOs  OCR.PF_L3_RFO.L3_HIT_F.ANY_SNOOP",
         "Counter": "0,1,2,3",
         "CounterHTOff": "0,1,2,3",
         "EventCode": "0xB7, 0xBB",
-        "EventName": "OCR.DEMAND_CODE_RD.L3_HIT_F.NO_SNOOP_NEEDED",
+        "EventName": "OCR.PF_L3_RFO.L3_HIT_F.ANY_SNOOP",
         "MSRIndex": "0x1a6,0x1a7",
-        "MSRValue": "0x0100200004",
+        "MSRValue": "0x3F80200100",
         "Offcore": "1",
         "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
         "SampleAfterValue": "100003",
         "UMask": "0x1"
     },
     {
-        "BriefDescription": "Counts all demand code reads  OCR.DEMAND_CODE_RD.L3_HIT_M.HIT_OTHER_CORE_FWD",
+        "BriefDescription": "Counts all prefetch (that bring data to LLC only) RFOs  OCR.PF_L3_RFO.L3_HIT_F.HITM_OTHER_CORE",
         "Counter": "0,1,2,3",
         "CounterHTOff": "0,1,2,3",
         "EventCode": "0xB7, 0xBB",
-        "EventName": "OCR.DEMAND_CODE_RD.L3_HIT_M.HIT_OTHER_CORE_FWD",
+        "EventName": "OCR.PF_L3_RFO.L3_HIT_F.HITM_OTHER_CORE",
         "MSRIndex": "0x1a6,0x1a7",
-        "MSRValue": "0x0800040004",
+        "MSRValue": "0x1000200100",
         "Offcore": "1",
         "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
         "SampleAfterValue": "100003",
         "UMask": "0x1"
     },
     {
-        "BriefDescription": "OCR.ALL_PF_RFO.L3_HIT_S.SNOOP_NONE",
+        "BriefDescription": "Counts all prefetch (that bring data to LLC only) RFOs  OCR.PF_L3_RFO.L3_HIT_F.HIT_OTHER_CORE_FWD",
         "Counter": "0,1,2,3",
         "CounterHTOff": "0,1,2,3",
         "EventCode": "0xB7, 0xBB",
-        "EventName": "OCR.ALL_PF_RFO.L3_HIT_S.SNOOP_NONE",
+        "EventName": "OCR.PF_L3_RFO.L3_HIT_F.HIT_OTHER_CORE_FWD",
         "MSRIndex": "0x1a6,0x1a7",
-        "MSRValue": "0x0080100120",
+        "MSRValue": "0x0800200100",
         "Offcore": "1",
         "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
         "SampleAfterValue": "100003",
         "UMask": "0x1"
     },
     {
-        "BriefDescription": "Counts all demand code reads",
+        "BriefDescription": "Counts all prefetch (that bring data to LLC only) RFOs  OCR.PF_L3_RFO.L3_HIT_F.HIT_OTHER_CORE_NO_FWD",
         "Counter": "0,1,2,3",
         "CounterHTOff": "0,1,2,3",
         "EventCode": "0xB7, 0xBB",
-        "EventName": "OCR.DEMAND_CODE_RD.L3_HIT_S.SNOOP_NONE",
+        "EventName": "OCR.PF_L3_RFO.L3_HIT_F.HIT_OTHER_CORE_NO_FWD",
         "MSRIndex": "0x1a6,0x1a7",
-        "MSRValue": "0x0080100004",
+        "MSRValue": "0x0400200100",
         "Offcore": "1",
         "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
         "SampleAfterValue": "100003",
         "UMask": "0x1"
     },
     {
-        "BriefDescription": "Counts L1 data cache hardware prefetch requests and software prefetch requests  OCR.PF_L1D_AND_SW.L3_HIT_M.NO_SNOOP_NEEDED",
+        "BriefDescription": "Counts all prefetch (that bring data to LLC only) RFOs  OCR.PF_L3_RFO.L3_HIT_F.NO_SNOOP_NEEDED",
         "Counter": "0,1,2,3",
         "CounterHTOff": "0,1,2,3",
         "EventCode": "0xB7, 0xBB",
-        "EventName": "OCR.PF_L1D_AND_SW.L3_HIT_M.NO_SNOOP_NEEDED",
+        "EventName": "OCR.PF_L3_RFO.L3_HIT_F.NO_SNOOP_NEEDED",
         "MSRIndex": "0x1a6,0x1a7",
-        "MSRValue": "0x0100040400",
+        "MSRValue": "0x0100200100",
         "Offcore": "1",
         "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
         "SampleAfterValue": "100003",
         "UMask": "0x1"
     },
     {
-        "BriefDescription": "Counts L1 data cache hardware prefetch requests and software prefetch requests OCR.PF_L1D_AND_SW.L3_HIT.HIT_OTHER_CORE_FWD OCR.PF_L1D_AND_SW.L3_HIT.HIT_OTHER_CORE_FWD",
+        "BriefDescription": "Counts all prefetch (that bring data to LLC only) RFOs",
         "Counter": "0,1,2,3",
         "CounterHTOff": "0,1,2,3",
         "EventCode": "0xB7, 0xBB",
-        "EventName": "OCR.PF_L1D_AND_SW.L3_HIT.HIT_OTHER_CORE_FWD",
+        "EventName": "OCR.PF_L3_RFO.L3_HIT_F.SNOOP_MISS",
         "MSRIndex": "0x1a6,0x1a7",
-        "MSRValue": "0x08003C0400",
+        "MSRValue": "0x0200200100",
         "Offcore": "1",
         "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
         "SampleAfterValue": "100003",
         "UMask": "0x1"
     },
     {
-        "BriefDescription": "Counts L1 data cache hardware prefetch requests and software prefetch requests",
+        "BriefDescription": "Counts all prefetch (that bring data to LLC only) RFOs",
         "Counter": "0,1,2,3",
         "CounterHTOff": "0,1,2,3",
         "EventCode": "0xB7, 0xBB",
-        "EventName": "OCR.PF_L1D_AND_SW.L3_HIT_F.SNOOP_MISS",
+        "EventName": "OCR.PF_L3_RFO.L3_HIT_F.SNOOP_NONE",
         "MSRIndex": "0x1a6,0x1a7",
-        "MSRValue": "0x0200200400",
+        "MSRValue": "0x0080200100",
         "Offcore": "1",
         "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
         "SampleAfterValue": "100003",
         "UMask": "0x1"
     },
     {
-        "BriefDescription": "OCR.ALL_RFO.L3_HIT_E.SNOOP_NONE",
+        "BriefDescription": "Counts all prefetch (that bring data to LLC only) RFOs  OCR.PF_L3_RFO.L3_HIT_M.ANY_SNOOP",
         "Counter": "0,1,2,3",
         "CounterHTOff": "0,1,2,3",
         "EventCode": "0xB7, 0xBB",
-        "EventName": "OCR.ALL_RFO.L3_HIT_E.SNOOP_NONE",
+        "EventName": "OCR.PF_L3_RFO.L3_HIT_M.ANY_SNOOP",
         "MSRIndex": "0x1a6,0x1a7",
-        "MSRValue": "0x0080080122",
+        "MSRValue": "0x3F80040100",
         "Offcore": "1",
         "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
         "SampleAfterValue": "100003",
         "UMask": "0x1"
     },
     {
-        "BriefDescription": "Counts all prefetch (that bring data to L2) RFOs  OCR.PF_L2_RFO.L3_HIT_S.ANY_SNOOP",
+        "BriefDescription": "Counts all prefetch (that bring data to LLC only) RFOs  OCR.PF_L3_RFO.L3_HIT_M.HITM_OTHER_CORE",
         "Counter": "0,1,2,3",
         "CounterHTOff": "0,1,2,3",
         "EventCode": "0xB7, 0xBB",
-        "EventName": "OCR.PF_L2_RFO.L3_HIT_S.ANY_SNOOP",
+        "EventName": "OCR.PF_L3_RFO.L3_HIT_M.HITM_OTHER_CORE",
         "MSRIndex": "0x1a6,0x1a7",
-        "MSRValue": "0x3F80100020",
+        "MSRValue": "0x1000040100",
         "Offcore": "1",
         "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
         "SampleAfterValue": "100003",
         "UMask": "0x1"
     },
     {
-        "BriefDescription": "Counts all prefetch (that bring data to L2) RFOs  OCR.PF_L2_RFO.L3_HIT_S.NO_SNOOP_NEEDED",
+        "BriefDescription": "Counts all prefetch (that bring data to LLC only) RFOs  OCR.PF_L3_RFO.L3_HIT_M.HIT_OTHER_CORE_FWD",
         "Counter": "0,1,2,3",
         "CounterHTOff": "0,1,2,3",
         "EventCode": "0xB7, 0xBB",
-        "EventName": "OCR.PF_L2_RFO.L3_HIT_S.NO_SNOOP_NEEDED",
+        "EventName": "OCR.PF_L3_RFO.L3_HIT_M.HIT_OTHER_CORE_FWD",
         "MSRIndex": "0x1a6,0x1a7",
-        "MSRValue": "0x0100100020",
+        "MSRValue": "0x0800040100",
         "Offcore": "1",
         "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
         "SampleAfterValue": "100003",
         "UMask": "0x1"
     },
     {
-        "BriefDescription": "OCR.ALL_RFO.L3_HIT_F.SNOOP_NONE",
+        "BriefDescription": "Counts all prefetch (that bring data to LLC only) RFOs  OCR.PF_L3_RFO.L3_HIT_M.HIT_OTHER_CORE_NO_FWD",
         "Counter": "0,1,2,3",
         "CounterHTOff": "0,1,2,3",
         "EventCode": "0xB7, 0xBB",
-        "EventName": "OCR.ALL_RFO.L3_HIT_F.SNOOP_NONE",
+        "EventName": "OCR.PF_L3_RFO.L3_HIT_M.HIT_OTHER_CORE_NO_FWD",
         "MSRIndex": "0x1a6,0x1a7",
-        "MSRValue": "0x0080200122",
+        "MSRValue": "0x0400040100",
         "Offcore": "1",
         "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
         "SampleAfterValue": "100003",
         "UMask": "0x1"
     },
     {
-        "BriefDescription": "Counts all prefetch (that bring data to LLC only) data reads OCR.PF_L3_DATA_RD.L3_HIT.HIT_OTHER_CORE_FWD OCR.PF_L3_DATA_RD.L3_HIT.HIT_OTHER_CORE_FWD",
+        "BriefDescription": "Counts all prefetch (that bring data to LLC only) RFOs  OCR.PF_L3_RFO.L3_HIT_M.NO_SNOOP_NEEDED",
         "Counter": "0,1,2,3",
         "CounterHTOff": "0,1,2,3",
         "EventCode": "0xB7, 0xBB",
-        "EventName": "OCR.PF_L3_DATA_RD.L3_HIT.HIT_OTHER_CORE_FWD",
+        "EventName": "OCR.PF_L3_RFO.L3_HIT_M.NO_SNOOP_NEEDED",
         "MSRIndex": "0x1a6,0x1a7",
-        "MSRValue": "0x08003C0080",
+        "MSRValue": "0x0100040100",
         "Offcore": "1",
         "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
         "SampleAfterValue": "100003",
         "UMask": "0x1"
     },
     {
-        "BriefDescription": "Counts demand data reads  OCR.DEMAND_DATA_RD.L3_HIT_M.NO_SNOOP_NEEDED",
+        "BriefDescription": "Counts all prefetch (that bring data to LLC only) RFOs",
         "Counter": "0,1,2,3",
         "CounterHTOff": "0,1,2,3",
         "EventCode": "0xB7, 0xBB",
-        "EventName": "OCR.DEMAND_DATA_RD.L3_HIT_M.NO_SNOOP_NEEDED",
+        "EventName": "OCR.PF_L3_RFO.L3_HIT_M.SNOOP_MISS",
         "MSRIndex": "0x1a6,0x1a7",
-        "MSRValue": "0x0100040001",
+        "MSRValue": "0x0200040100",
         "Offcore": "1",
         "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
         "SampleAfterValue": "100003",
         "UMask": "0x1"
     },
     {
-        "BriefDescription": "Counts prefetch (that bring data to L2) data reads  OCR.PF_L2_DATA_RD.L3_HIT_S.HIT_OTHER_CORE_FWD",
+        "BriefDescription": "Counts all prefetch (that bring data to LLC only) RFOs",
         "Counter": "0,1,2,3",
         "CounterHTOff": "0,1,2,3",
         "EventCode": "0xB7, 0xBB",
-        "EventName": "OCR.PF_L2_DATA_RD.L3_HIT_S.HIT_OTHER_CORE_FWD",
+        "EventName": "OCR.PF_L3_RFO.L3_HIT_M.SNOOP_NONE",
         "MSRIndex": "0x1a6,0x1a7",
-        "MSRValue": "0x0800100010",
+        "MSRValue": "0x0080040100",
         "Offcore": "1",
         "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
         "SampleAfterValue": "100003",
         "UMask": "0x1"
     },
     {
-        "BriefDescription": "OCR.ALL_DATA_RD.SUPPLIER_NONE.HITM_OTHER_CORE  OCR.ALL_DATA_RD.SUPPLIER_NONE.HITM_OTHER_CORE",
+        "BriefDescription": "Counts all prefetch (that bring data to LLC only) RFOs  OCR.PF_L3_RFO.L3_HIT_S.ANY_SNOOP",
         "Counter": "0,1,2,3",
         "CounterHTOff": "0,1,2,3",
         "EventCode": "0xB7, 0xBB",
-        "EventName": "OCR.ALL_DATA_RD.SUPPLIER_NONE.HITM_OTHER_CORE",
+        "EventName": "OCR.PF_L3_RFO.L3_HIT_S.ANY_SNOOP",
         "MSRIndex": "0x1a6,0x1a7",
-        "MSRValue": "0x1000020491",
+        "MSRValue": "0x3F80100100",
         "Offcore": "1",
         "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
         "SampleAfterValue": "100003",
         "UMask": "0x1"
     },
     {
-        "BriefDescription": "OCR.ALL_PF_DATA_RD.L3_HIT_E.HIT_OTHER_CORE_FWD  OCR.ALL_PF_DATA_RD.L3_HIT_E.HIT_OTHER_CORE_FWD",
+        "BriefDescription": "Counts all prefetch (that bring data to LLC only) RFOs  OCR.PF_L3_RFO.L3_HIT_S.HITM_OTHER_CORE",
         "Counter": "0,1,2,3",
         "CounterHTOff": "0,1,2,3",
         "EventCode": "0xB7, 0xBB",
-        "EventName": "OCR.ALL_PF_DATA_RD.L3_HIT_E.HIT_OTHER_CORE_FWD",
+        "EventName": "OCR.PF_L3_RFO.L3_HIT_S.HITM_OTHER_CORE",
         "MSRIndex": "0x1a6,0x1a7",
-        "MSRValue": "0x0800080490",
+        "MSRValue": "0x1000100100",
         "Offcore": "1",
         "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
         "SampleAfterValue": "100003",
         "UMask": "0x1"
     },
     {
-        "BriefDescription": "Counts any other requests  OCR.OTHER.L3_HIT_F.NO_SNOOP_NEEDED",
+        "BriefDescription": "Counts all prefetch (that bring data to LLC only) RFOs  OCR.PF_L3_RFO.L3_HIT_S.HIT_OTHER_CORE_FWD",
         "Counter": "0,1,2,3",
         "CounterHTOff": "0,1,2,3",
         "EventCode": "0xB7, 0xBB",
-        "EventName": "OCR.OTHER.L3_HIT_F.NO_SNOOP_NEEDED",
+        "EventName": "OCR.PF_L3_RFO.L3_HIT_S.HIT_OTHER_CORE_FWD",
         "MSRIndex": "0x1a6,0x1a7",
-        "MSRValue": "0x0100208000",
+        "MSRValue": "0x0800100100",
         "Offcore": "1",
         "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
         "SampleAfterValue": "100003",
         "UMask": "0x1"
     },
     {
-        "BriefDescription": "OCR.ALL_READS.L3_HIT_S.ANY_SNOOP  OCR.ALL_READS.L3_HIT_S.ANY_SNOOP",
+        "BriefDescription": "Counts all prefetch (that bring data to LLC only) RFOs  OCR.PF_L3_RFO.L3_HIT_S.HIT_OTHER_CORE_NO_FWD",
         "Counter": "0,1,2,3",
         "CounterHTOff": "0,1,2,3",
         "EventCode": "0xB7, 0xBB",
-        "EventName": "OCR.ALL_READS.L3_HIT_S.ANY_SNOOP",
+        "EventName": "OCR.PF_L3_RFO.L3_HIT_S.HIT_OTHER_CORE_NO_FWD",
         "MSRIndex": "0x1a6,0x1a7",
-        "MSRValue": "0x3F801007F7",
+        "MSRValue": "0x0400100100",
         "Offcore": "1",
         "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
         "SampleAfterValue": "100003",
         "UMask": "0x1"
     },
     {
-        "BriefDescription": "Counts all demand data writes (RFOs)",
+        "BriefDescription": "Counts all prefetch (that bring data to LLC only) RFOs  OCR.PF_L3_RFO.L3_HIT_S.NO_SNOOP_NEEDED",
         "Counter": "0,1,2,3",
         "CounterHTOff": "0,1,2,3",
         "EventCode": "0xB7, 0xBB",
-        "EventName": "OCR.DEMAND_RFO.L3_HIT_E.SNOOP_MISS",
+        "EventName": "OCR.PF_L3_RFO.L3_HIT_S.NO_SNOOP_NEEDED",
         "MSRIndex": "0x1a6,0x1a7",
-        "MSRValue": "0x0200080002",
+        "MSRValue": "0x0100100100",
         "Offcore": "1",
         "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
         "SampleAfterValue": "100003",
         "UMask": "0x1"
     },
     {
-        "BriefDescription": "Counts prefetch (that bring data to L2) data reads  OCR.PF_L2_DATA_RD.SUPPLIER_NONE.HIT_OTHER_CORE_FWD",
+        "BriefDescription": "Counts all prefetch (that bring data to LLC only) RFOs",
         "Counter": "0,1,2,3",
         "CounterHTOff": "0,1,2,3",
         "EventCode": "0xB7, 0xBB",
-        "EventName": "OCR.PF_L2_DATA_RD.SUPPLIER_NONE.HIT_OTHER_CORE_FWD",
+        "EventName": "OCR.PF_L3_RFO.L3_HIT_S.SNOOP_MISS",
         "MSRIndex": "0x1a6,0x1a7",
-        "MSRValue": "0x0800020010",
+        "MSRValue": "0x0200100100",
         "Offcore": "1",
         "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
         "SampleAfterValue": "100003",
         "UMask": "0x1"
     },
     {
-        "BriefDescription": "Counts all prefetch (that bring data to LLC only) data reads OCR.PF_L3_DATA_RD.L3_HIT.HIT_OTHER_CORE_NO_FWD OCR.PF_L3_DATA_RD.L3_HIT.HIT_OTHER_CORE_NO_FWD",
+        "BriefDescription": "Counts all prefetch (that bring data to LLC only) RFOs",
         "Counter": "0,1,2,3",
         "CounterHTOff": "0,1,2,3",
         "EventCode": "0xB7, 0xBB",
-        "EventName": "OCR.PF_L3_DATA_RD.L3_HIT.HIT_OTHER_CORE_NO_FWD",
+        "EventName": "OCR.PF_L3_RFO.L3_HIT_S.SNOOP_NONE",
         "MSRIndex": "0x1a6,0x1a7",
-        "MSRValue": "0x04003C0080",
+        "MSRValue": "0x0080100100",
         "Offcore": "1",
         "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
         "SampleAfterValue": "100003",
         "UMask": "0x1"
     },
     {
-        "BriefDescription": "Counts all prefetch (that bring data to LLC only) RFOs",
+        "BriefDescription": "Counts all prefetch (that bring data to LLC only) RFOs OCR.PF_L3_RFO.PMM_HIT_LOCAL_PMM.ANY_SNOOP",
         "Counter": "0,1,2,3",
         "CounterHTOff": "0,1,2,3",
         "EventCode": "0xB7, 0xBB",
-        "EventName": "OCR.PF_L3_RFO.L3_HIT_F.SNOOP_NONE",
+        "EventName": "OCR.PF_L3_RFO.PMM_HIT_LOCAL_PMM.ANY_SNOOP",
         "MSRIndex": "0x1a6,0x1a7",
-        "MSRValue": "0x0080200100",
+        "MSRValue": "0x3F80400100",
         "Offcore": "1",
         "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
         "SampleAfterValue": "100003",
         "UMask": "0x1"
     },
     {
-        "BriefDescription": "Counts all prefetch (that bring data to LLC only) data reads  OCR.PF_L3_DATA_RD.L3_HIT_E.HIT_OTHER_CORE_FWD",
+        "BriefDescription": "Counts all prefetch (that bring data to LLC only) RFOs OCR.PF_L3_RFO.PMM_HIT_LOCAL_PMM.SNOOP_NONE",
         "Counter": "0,1,2,3",
         "CounterHTOff": "0,1,2,3",
         "EventCode": "0xB7, 0xBB",
-        "EventName": "OCR.PF_L3_DATA_RD.L3_HIT_E.HIT_OTHER_CORE_FWD",
+        "EventName": "OCR.PF_L3_RFO.PMM_HIT_LOCAL_PMM.SNOOP_NONE",
         "MSRIndex": "0x1a6,0x1a7",
-        "MSRValue": "0x0800080080",
+        "MSRValue": "0x0080400100",
         "Offcore": "1",
         "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
         "SampleAfterValue": "100003",
         "UMask": "0x1"
     },
     {
-        "BriefDescription": "OCR.ALL_RFO.PMM_HIT_LOCAL_PMM.SNOOP_NOT_NEEDED OCR.ALL_RFO.PMM_HIT_LOCAL_PMM.SNOOP_NOT_NEEDED",
+        "BriefDescription": "Counts all prefetch (that bring data to LLC only) RFOs OCR.PF_L3_RFO.PMM_HIT_LOCAL_PMM.SNOOP_NOT_NEEDED",
         "Counter": "0,1,2,3",
         "CounterHTOff": "0,1,2,3",
         "EventCode": "0xB7, 0xBB",
-        "EventName": "OCR.ALL_RFO.PMM_HIT_LOCAL_PMM.SNOOP_NOT_NEEDED",
+        "EventName": "OCR.PF_L3_RFO.PMM_HIT_LOCAL_PMM.SNOOP_NOT_NEEDED",
         "MSRIndex": "0x1a6,0x1a7",
-        "MSRValue": "0x0100400122",
+        "MSRValue": "0x0100400100",
         "Offcore": "1",
         "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
         "SampleAfterValue": "100003",
         "UMask": "0x1"
     },
     {
-        "BriefDescription": "Counts all prefetch (that bring data to LLC only) RFOs  OCR.PF_L3_RFO.L3_HIT_E.HIT_OTHER_CORE_FWD",
+        "BriefDescription": "Counts all prefetch (that bring data to LLC only) RFOs  OCR.PF_L3_RFO.SUPPLIER_NONE.ANY_SNOOP",
         "Counter": "0,1,2,3",
         "CounterHTOff": "0,1,2,3",
         "EventCode": "0xB7, 0xBB",
-        "EventName": "OCR.PF_L3_RFO.L3_HIT_E.HIT_OTHER_CORE_FWD",
+        "EventName": "OCR.PF_L3_RFO.SUPPLIER_NONE.ANY_SNOOP",
         "MSRIndex": "0x1a6,0x1a7",
-        "MSRValue": "0x0800080100",
+        "MSRValue": "0x3F80020100",
         "Offcore": "1",
         "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
         "SampleAfterValue": "100003",
         "UMask": "0x1"
     },
     {
-        "BriefDescription": "Counts all prefetch (that bring data to LLC only) data reads  OCR.PF_L3_DATA_RD.L3_HIT_M.ANY_SNOOP",
+        "BriefDescription": "Counts all prefetch (that bring data to LLC only) RFOs  OCR.PF_L3_RFO.SUPPLIER_NONE.HITM_OTHER_CORE",
         "Counter": "0,1,2,3",
         "CounterHTOff": "0,1,2,3",
         "EventCode": "0xB7, 0xBB",
-        "EventName": "OCR.PF_L3_DATA_RD.L3_HIT_M.ANY_SNOOP",
+        "EventName": "OCR.PF_L3_RFO.SUPPLIER_NONE.HITM_OTHER_CORE",
         "MSRIndex": "0x1a6,0x1a7",
-        "MSRValue": "0x3F80040080",
+        "MSRValue": "0x1000020100",
         "Offcore": "1",
         "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
         "SampleAfterValue": "100003",
         "UMask": "0x1"
     },
     {
-        "BriefDescription": "OCR.ALL_PF_RFO.PMM_HIT_LOCAL_PMM.SNOOP_NOT_NEEDED OCR.ALL_PF_RFO.PMM_HIT_LOCAL_PMM.SNOOP_NOT_NEEDED",
+        "BriefDescription": "Counts all prefetch (that bring data to LLC only) RFOs  OCR.PF_L3_RFO.SUPPLIER_NONE.HIT_OTHER_CORE_FWD",
         "Counter": "0,1,2,3",
         "CounterHTOff": "0,1,2,3",
         "EventCode": "0xB7, 0xBB",
-        "EventName": "OCR.ALL_PF_RFO.PMM_HIT_LOCAL_PMM.SNOOP_NOT_NEEDED",
+        "EventName": "OCR.PF_L3_RFO.SUPPLIER_NONE.HIT_OTHER_CORE_FWD",
         "MSRIndex": "0x1a6,0x1a7",
-        "MSRValue": "0x0100400120",
+        "MSRValue": "0x0800020100",
         "Offcore": "1",
         "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
         "SampleAfterValue": "100003",
         "UMask": "0x1"
     },
     {
-        "BriefDescription": "OCR.ALL_DATA_RD.SUPPLIER_NONE.HIT_OTHER_CORE_FWD  OCR.ALL_DATA_RD.SUPPLIER_NONE.HIT_OTHER_CORE_FWD",
+        "BriefDescription": "Counts all prefetch (that bring data to LLC only) RFOs  OCR.PF_L3_RFO.SUPPLIER_NONE.HIT_OTHER_CORE_NO_FWD",
         "Counter": "0,1,2,3",
         "CounterHTOff": "0,1,2,3",
         "EventCode": "0xB7, 0xBB",
-        "EventName": "OCR.ALL_DATA_RD.SUPPLIER_NONE.HIT_OTHER_CORE_FWD",
+        "EventName": "OCR.PF_L3_RFO.SUPPLIER_NONE.HIT_OTHER_CORE_NO_FWD",
         "MSRIndex": "0x1a6,0x1a7",
-        "MSRValue": "0x0800020491",
+        "MSRValue": "0x0400020100",
         "Offcore": "1",
         "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
         "SampleAfterValue": "100003",
         "UMask": "0x1"
     },
     {
-        "BriefDescription": "Counts L1 data cache hardware prefetch requests and software prefetch requests  OCR.PF_L1D_AND_SW.L3_HIT_E.HIT_OTHER_CORE_FWD",
+        "BriefDescription": "Counts all prefetch (that bring data to LLC only) RFOs  OCR.PF_L3_RFO.SUPPLIER_NONE.NO_SNOOP_NEEDED",
         "Counter": "0,1,2,3",
         "CounterHTOff": "0,1,2,3",
         "EventCode": "0xB7, 0xBB",
-        "EventName": "OCR.PF_L1D_AND_SW.L3_HIT_E.HIT_OTHER_CORE_FWD",
+        "EventName": "OCR.PF_L3_RFO.SUPPLIER_NONE.NO_SNOOP_NEEDED",
         "MSRIndex": "0x1a6,0x1a7",
-        "MSRValue": "0x0800080400",
+        "MSRValue": "0x0100020100",
         "Offcore": "1",
         "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
         "SampleAfterValue": "100003",
         "UMask": "0x1"
     },
     {
-        "BriefDescription": "Counts prefetch (that bring data to L2) data reads  OCR.PF_L2_DATA_RD.L3_HIT_F.HIT_OTHER_CORE_NO_FWD",
+        "BriefDescription": "Counts all prefetch (that bring data to LLC only) RFOs",
         "Counter": "0,1,2,3",
         "CounterHTOff": "0,1,2,3",
         "EventCode": "0xB7, 0xBB",
-        "EventName": "OCR.PF_L2_DATA_RD.L3_HIT_F.HIT_OTHER_CORE_NO_FWD",
+        "EventName": "OCR.PF_L3_RFO.SUPPLIER_NONE.SNOOP_MISS",
         "MSRIndex": "0x1a6,0x1a7",
-        "MSRValue": "0x0400200010",
+        "MSRValue": "0x0200020100",
         "Offcore": "1",
         "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
         "SampleAfterValue": "100003",
         "UMask": "0x1"
     },
     {
-        "BriefDescription": "OCR.ALL_READS.L3_HIT_S.SNOOP_MISS",
+        "BriefDescription": "Counts all prefetch (that bring data to LLC only) RFOs",
         "Counter": "0,1,2,3",
         "CounterHTOff": "0,1,2,3",
         "EventCode": "0xB7, 0xBB",
-        "EventName": "OCR.ALL_READS.L3_HIT_S.SNOOP_MISS",
+        "EventName": "OCR.PF_L3_RFO.SUPPLIER_NONE.SNOOP_NONE",
         "MSRIndex": "0x1a6,0x1a7",
-        "MSRValue": "0x02001007F7",
+        "MSRValue": "0x0080020100",
         "Offcore": "1",
         "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
         "SampleAfterValue": "100003",
         "UMask": "0x1"
+    },
+    {
+        "BriefDescription": "Number of PREFETCHNTA instructions executed.",
+        "Counter": "0,1,2,3",
+        "CounterHTOff": "0,1,2,3,4,5,6,7",
+        "EventCode": "0x32",
+        "EventName": "SW_PREFETCH_ACCESS.NTA",
+        "SampleAfterValue": "2000003",
+        "UMask": "0x1"
+    },
+    {
+        "BriefDescription": "Number of PREFETCHW instructions executed.",
+        "Counter": "0,1,2,3",
+        "CounterHTOff": "0,1,2,3,4,5,6,7",
+        "EventCode": "0x32",
+        "EventName": "SW_PREFETCH_ACCESS.PREFETCHW",
+        "SampleAfterValue": "2000003",
+        "UMask": "0x8"
+    },
+    {
+        "BriefDescription": "Number of PREFETCHT0 instructions executed.",
+        "Counter": "0,1,2,3",
+        "CounterHTOff": "0,1,2,3,4,5,6,7",
+        "EventCode": "0x32",
+        "EventName": "SW_PREFETCH_ACCESS.T0",
+        "SampleAfterValue": "2000003",
+        "UMask": "0x2"
+    },
+    {
+        "BriefDescription": "Number of PREFETCHT1 or PREFETCHT2 instructions executed.",
+        "Counter": "0,1,2,3",
+        "CounterHTOff": "0,1,2,3,4,5,6,7",
+        "EventCode": "0x32",
+        "EventName": "SW_PREFETCH_ACCESS.T1_T2",
+        "SampleAfterValue": "2000003",
+        "UMask": "0x4"
     }
 ]
 \ No newline at end of file
diff --git a/tools/perf/pmu-events/arch/x86/cascadelakex/pipeline.json b/tools/perf/pmu-events/arch/x86/cascadelakex/pipeline.json
index 023f31c72a42..ca5748120666 100644
--- a/tools/perf/pmu-events/arch/x86/cascadelakex/pipeline.json
+++ b/tools/perf/pmu-events/arch/x86/cascadelakex/pipeline.json
@@ -1,148 +1,158 @@
 [
     {
-        "BriefDescription": "Far branch instructions retired.",
+        "BriefDescription": "Cycles when divide unit is busy executing divide or square root operations. Accounts for integer and floating-point operations.",
+        "Counter": "0,1,2,3",
+        "CounterHTOff": "0,1,2,3,4,5,6,7",
+        "CounterMask": "1",
+        "EventCode": "0x14",
+        "EventName": "ARITH.DIVIDER_ACTIVE",
+        "SampleAfterValue": "2000003",
+        "UMask": "0x1"
+    },
+    {
+        "BriefDescription": "All (macro) branch instructions retired.",
         "Counter": "0,1,2,3",
         "CounterHTOff": "0,1,2,3,4,5,6,7",
         "Errata": "SKL091",
         "EventCode": "0xC4",
-        "EventName": "BR_INST_RETIRED.FAR_BRANCH",
-        "PEBS": "1",
-        "PublicDescription": "This event counts far branch instructions retired.",
-        "SampleAfterValue": "100007",
-        "UMask": "0x40"
+        "EventName": "BR_INST_RETIRED.ALL_BRANCHES",
+        "PublicDescription": "Counts all (macro) branch instructions retired.",
+        "SampleAfterValue": "400009"
     },
     {
-        "BriefDescription": "Total execution stalls.",
+        "BriefDescription": "All (macro) branch instructions retired.",
         "Counter": "0,1,2,3",
-        "CounterHTOff": "0,1,2,3,4,5,6,7",
-        "CounterMask": "4",
-        "EventCode": "0xA3",
-        "EventName": "CYCLE_ACTIVITY.STALLS_TOTAL",
-        "SampleAfterValue": "2000003",
+        "CounterHTOff": "0,1,2,3",
+        "Errata": "SKL091",
+        "EventCode": "0xC4",
+        "EventName": "BR_INST_RETIRED.ALL_BRANCHES_PEBS",
+        "PEBS": "2",
+        "PublicDescription": "This is a precise version of BR_INST_RETIRED.ALL_BRANCHES that counts all (macro) branch instructions retired.",
+        "SampleAfterValue": "400009",
         "UMask": "0x4"
     },
     {
-        "BriefDescription": "Number of slow LEA uops being allocated. A uop is generally considered SlowLea if it has 3 sources (e.g. 2 sources + immediate) regardless if as a result of LEA instruction or not.",
+        "BriefDescription": "Conditional branch instructions retired.",
         "Counter": "0,1,2,3",
         "CounterHTOff": "0,1,2,3,4,5,6,7",
-        "EventCode": "0x0E",
-        "EventName": "UOPS_ISSUED.SLOW_LEA",
-        "SampleAfterValue": "2000003",
-        "UMask": "0x20"
+        "Errata": "SKL091",
+        "EventCode": "0xC4",
+        "EventName": "BR_INST_RETIRED.CONDITIONAL",
+        "PEBS": "1",
+        "PublicDescription": "This event counts conditional branch instructions retired.",
+        "SampleAfterValue": "400009",
+        "UMask": "0x1"
     },
     {
-        "BriefDescription": "Cycles with less than 10 actually retired uops.",
+        "BriefDescription": "Not taken branch instructions retired.",
         "Counter": "0,1,2,3",
         "CounterHTOff": "0,1,2,3,4,5,6,7",
-        "CounterMask": "10",
-        "EventCode": "0xC2",
-        "EventName": "UOPS_RETIRED.TOTAL_CYCLES",
-        "Invert": "1",
-        "PublicDescription": "Number of cycles using always true condition (uops_ret < 16) applied to non PEBS uops retired event.",
-        "SampleAfterValue": "2000003",
-        "UMask": "0x2"
+        "Errata": "SKL091",
+        "EventCode": "0xc4",
+        "EventName": "BR_INST_RETIRED.COND_NTAKEN",
+        "PublicDescription": "This event counts not taken branch instructions retired.",
+        "SampleAfterValue": "400009",
+        "UMask": "0x10"
     },
     {
-        "BriefDescription": "Thread cycles when thread is not in halt state",
+        "BriefDescription": "Far branch instructions retired.",
         "Counter": "0,1,2,3",
         "CounterHTOff": "0,1,2,3,4,5,6,7",
-        "EventCode": "0x3C",
-        "EventName": "CPU_CLK_UNHALTED.THREAD_P",
-        "PublicDescription": "This is an architectural event that counts the number of thread cycles while the thread is not in a halt state. The thread enters the halt state when it is running the HLT instruction. The core frequency may change from time to time due to power or thermal throttling. For this reason, this event may have a changing ratio with regards to wall clock time.",
-        "SampleAfterValue": "2000003"
+        "Errata": "SKL091",
+        "EventCode": "0xC4",
+        "EventName": "BR_INST_RETIRED.FAR_BRANCH",
+        "PEBS": "1",
+        "PublicDescription": "This event counts far branch instructions retired.",
+        "SampleAfterValue": "100007",
+        "UMask": "0x40"
     },
     {
-        "BriefDescription": "Cycles without actually retired uops.",
+        "BriefDescription": "Direct and indirect near call instructions retired.",
         "Counter": "0,1,2,3",
         "CounterHTOff": "0,1,2,3,4,5,6,7",
-        "CounterMask": "1",
-        "EventCode": "0xC2",
-        "EventName": "UOPS_RETIRED.STALL_CYCLES",
-        "Invert": "1",
-        "PublicDescription": "This event counts cycles without actually retired uops.",
-        "SampleAfterValue": "2000003",
+        "Errata": "SKL091",
+        "EventCode": "0xC4",
+        "EventName": "BR_INST_RETIRED.NEAR_CALL",
+        "PEBS": "1",
+        "PublicDescription": "This event counts both direct and indirect near call instructions retired.",
+        "SampleAfterValue": "100007",
         "UMask": "0x2"
     },
     {
-        "BriefDescription": "Cycles with no micro-ops executed from any thread on physical core.",
+        "BriefDescription": "Return instructions retired.",
         "Counter": "0,1,2,3",
         "CounterHTOff": "0,1,2,3,4,5,6,7",
-        "CounterMask": "1",
-        "EventCode": "0xB1",
-        "EventName": "UOPS_EXECUTED.CORE_CYCLES_NONE",
-        "Invert": "1",
-        "SampleAfterValue": "2000003",
-        "UMask": "0x2"
+        "Errata": "SKL091",
+        "EventCode": "0xC4",
+        "EventName": "BR_INST_RETIRED.NEAR_RETURN",
+        "PEBS": "1",
+        "PublicDescription": "This event counts return instructions retired.",
+        "SampleAfterValue": "100007",
+        "UMask": "0x8"
     },
     {
-        "AnyThread": "1",
-        "BriefDescription": "Core cycles the allocator was stalled due to recovery from earlier clear event for any thread running on the physical core (e.g. misprediction or memory nuke).",
+        "BriefDescription": "Taken branch instructions retired.",
         "Counter": "0,1,2,3",
         "CounterHTOff": "0,1,2,3,4,5,6,7",
-        "EventCode": "0x0D",
-        "EventName": "INT_MISC.RECOVERY_CYCLES_ANY",
-        "SampleAfterValue": "2000003",
-        "UMask": "0x1"
+        "Errata": "SKL091",
+        "EventCode": "0xC4",
+        "EventName": "BR_INST_RETIRED.NEAR_TAKEN",
+        "PEBS": "1",
+        "PublicDescription": "This event counts taken branch instructions retired.",
+        "SampleAfterValue": "400009",
+        "UMask": "0x20"
     },
     {
-        "BriefDescription": "Cycles 4 Uops delivered by the LSD, but didn't come from the decoder.",
+        "BriefDescription": "Not taken branch instructions retired.",
         "Counter": "0,1,2,3",
         "CounterHTOff": "0,1,2,3,4,5,6,7",
-        "CounterMask": "4",
-        "EventCode": "0xA8",
-        "EventName": "LSD.CYCLES_4_UOPS",
-        "PublicDescription": "Counts the cycles when 4 uops are delivered by the LSD (Loop-stream detector).",
-        "SampleAfterValue": "2000003",
-        "UMask": "0x1"
-    },
-    {
-        "BriefDescription": "Core cycles when the thread is not in halt state",
-        "Counter": "Fixed counter 1",
-        "CounterHTOff": "Fixed counter 1",
-        "EventName": "CPU_CLK_UNHALTED.THREAD",
-        "PublicDescription": "Counts the number of core cycles while the thread is not in a halt state. The thread enters the halt state when it is running the HLT instruction. This event is a component in many key event ratios. The core frequency may change from time to time due to transitions associated with Enhanced Intel SpeedStep Technology or TM2. For this reason this event may have a changing ratio with regards to time. When the core frequency is constant, this event can approximate elapsed time while the core was not in the halt state. It is counted on a dedicated fixed counter, leaving the four (eight when Hyperthreading is disabled) programmable counters available for other events.",
-        "SampleAfterValue": "2000003",
-        "UMask": "0x2"
+        "Errata": "SKL091",
+        "EventCode": "0xC4",
+        "EventName": "BR_INST_RETIRED.NOT_TAKEN",
+        "PublicDescription": "This event counts not taken branch instructions retired.",
+        "SampleAfterValue": "400009",
+        "UMask": "0x10"
     },
     {
-        "AnyThread": "1",
-        "BriefDescription": "Core crystal clock cycles when at least one thread on the physical core is unhalted.",
+        "BriefDescription": "All mispredicted macro branch instructions retired.",
         "Counter": "0,1,2,3",
         "CounterHTOff": "0,1,2,3,4,5,6,7",
-        "EventCode": "0x3C",
-        "EventName": "CPU_CLK_UNHALTED.REF_XCLK_ANY",
-        "SampleAfterValue": "25003",
-        "UMask": "0x1"
+        "EventCode": "0xC5",
+        "EventName": "BR_MISP_RETIRED.ALL_BRANCHES",
+        "PublicDescription": "Counts all the retired branch instructions that were mispredicted by the processor. A branch misprediction occurs when the processor incorrectly predicts the destination of the branch.  When the misprediction is discovered at execution, all the instructions executed in the wrong (speculative) path must be discarded, and the processor must start fetching from the correct path.",
+        "SampleAfterValue": "400009"
     },
     {
-        "BriefDescription": "Direct and indirect near call instructions retired.",
+        "BriefDescription": "Mispredicted macro branch instructions retired.",
         "Counter": "0,1,2,3",
-        "CounterHTOff": "0,1,2,3,4,5,6,7",
-        "Errata": "SKL091",
-        "EventCode": "0xC4",
-        "EventName": "BR_INST_RETIRED.NEAR_CALL",
-        "PEBS": "1",
-        "PublicDescription": "This event counts both direct and indirect near call instructions retired.",
-        "SampleAfterValue": "100007",
-        "UMask": "0x2"
+        "CounterHTOff": "0,1,2,3",
+        "EventCode": "0xC5",
+        "EventName": "BR_MISP_RETIRED.ALL_BRANCHES_PEBS",
+        "PEBS": "2",
+        "PublicDescription": "This is a precise version of BR_MISP_RETIRED.ALL_BRANCHES that counts all mispredicted macro branch instructions retired.",
+        "SampleAfterValue": "400009",
+        "UMask": "0x4"
     },
     {
-        "BriefDescription": "Cycles when divide unit is busy executing divide or square root operations. Accounts for integer and floating-point operations.",
+        "BriefDescription": "Mispredicted conditional branch instructions retired.",
         "Counter": "0,1,2,3",
         "CounterHTOff": "0,1,2,3,4,5,6,7",
-        "CounterMask": "1",
-        "EventCode": "0x14",
-        "EventName": "ARITH.DIVIDER_ACTIVE",
-        "SampleAfterValue": "2000003",
+        "EventCode": "0xC5",
+        "EventName": "BR_MISP_RETIRED.CONDITIONAL",
+        "PEBS": "1",
+        "PublicDescription": "This event counts mispredicted conditional branch instructions retired.",
+        "SampleAfterValue": "400009",
         "UMask": "0x1"
     },
     {
-        "BriefDescription": "Core crystal clock cycles when this thread is unhalted and the other thread is halted.",
+        "BriefDescription": "Mispredicted direct and indirect near call instructions retired.",
         "Counter": "0,1,2,3",
         "CounterHTOff": "0,1,2,3,4,5,6,7",
-        "EventCode": "0x3C",
-        "EventName": "CPU_CLK_UNHALTED.ONE_THREAD_ACTIVE",
-        "SampleAfterValue": "25003",
+        "EventCode": "0xC5",
+        "EventName": "BR_MISP_RETIRED.NEAR_CALL",
+        "PEBS": "1",
+        "PublicDescription": "Counts both taken and not taken retired mispredicted direct and indirect near calls, including both register and memory indirect.",
+        "SampleAfterValue": "400009",
         "UMask": "0x2"
     },
     {
@@ -156,217 +166,206 @@
         "UMask": "0x20"
     },
     {
-        "BriefDescription": "Increments whenever there is an update to the LBR array.",
+        "BriefDescription": "Core crystal clock cycles when this thread is unhalted and the other thread is halted.",
         "Counter": "0,1,2,3",
         "CounterHTOff": "0,1,2,3,4,5,6,7",
-        "EventCode": "0xCC",
-        "EventName": "ROB_MISC_EVENTS.LBR_INSERTS",
-        "PublicDescription": "Increments when an entry is added to the Last Branch Record (LBR) array (or removed from the array in case of RETURNs in call stack mode). The event requires LBR enable via IA32_DEBUGCTL MSR and branch type selection via MSR_LBR_SELECT.",
-        "SampleAfterValue": "2000003",
-        "UMask": "0x20"
+        "EventCode": "0x3C",
+        "EventName": "CPU_CLK_THREAD_UNHALTED.ONE_THREAD_ACTIVE",
+        "SampleAfterValue": "25003",
+        "UMask": "0x2"
     },
     {
-        "BriefDescription": "Instructions retired from execution.",
-        "Counter": "Fixed counter 0",
-        "CounterHTOff": "Fixed counter 0",
-        "EventName": "INST_RETIRED.ANY",
-        "PublicDescription": "Counts the number of instructions retired from execution. For instructions that consist of multiple micro-ops, Counts the retirement of the last micro-op of the instruction. Counting continues during hardware interrupts, traps, and inside interrupt handlers. Notes: INST_RETIRED.ANY is counted by a designated fixed counter, leaving the four (eight when Hyperthreading is disabled) programmable counters available for other events. INST_RETIRED.ANY_P is counted by a programmable counter and it is an architectural performance event. Counting: Faulting executions of GETSEC/VM entry/VM Exit/MWait will not count as retired instructions.",
-        "SampleAfterValue": "2000003",
+        "BriefDescription": "Core crystal clock cycles when the thread is unhalted.",
+        "Counter": "0,1,2,3",
+        "CounterHTOff": "0,1,2,3,4,5,6,7",
+        "EventCode": "0x3C",
+        "EventName": "CPU_CLK_THREAD_UNHALTED.REF_XCLK",
+        "SampleAfterValue": "25003",
         "UMask": "0x1"
     },
     {
-        "BriefDescription": "Conditional branch instructions retired.",
+        "AnyThread": "1",
+        "BriefDescription": "Core crystal clock cycles when at least one thread on the physical core is unhalted.",
         "Counter": "0,1,2,3",
         "CounterHTOff": "0,1,2,3,4,5,6,7",
-        "Errata": "SKL091",
-        "EventCode": "0xC4",
-        "EventName": "BR_INST_RETIRED.CONDITIONAL",
-        "PEBS": "1",
-        "PublicDescription": "This event counts conditional branch instructions retired.",
-        "SampleAfterValue": "400009",
+        "EventCode": "0x3C",
+        "EventName": "CPU_CLK_THREAD_UNHALTED.REF_XCLK_ANY",
+        "SampleAfterValue": "25003",
         "UMask": "0x1"
     },
     {
-        "BriefDescription": "Cycles the issue-stage is waiting for front-end to fetch from resteered path following branch misprediction or machine clear events.",
+        "BriefDescription": "Core crystal clock cycles when this thread is unhalted and the other thread is halted.",
         "Counter": "0,1,2,3",
         "CounterHTOff": "0,1,2,3,4,5,6,7",
-        "EventCode": "0x0D",
-        "EventName": "INT_MISC.CLEAR_RESTEER_CYCLES",
-        "SampleAfterValue": "2000003",
-        "UMask": "0x80"
+        "EventCode": "0x3C",
+        "EventName": "CPU_CLK_UNHALTED.ONE_THREAD_ACTIVE",
+        "SampleAfterValue": "25003",
+        "UMask": "0x2"
     },
     {
-        "BriefDescription": "Cycles where at least 1 uop was executed per-thread",
-        "Counter": "0,1,2,3",
-        "CounterHTOff": "0,1,2,3,4,5,6,7",
-        "CounterMask": "1",
-        "EventCode": "0xB1",
-        "EventName": "UOPS_EXECUTED.CYCLES_GE_1_UOP_EXEC",
-        "PublicDescription": "Cycles where at least 1 uop was executed per-thread.",
+        "BriefDescription": "Reference cycles when the core is not in halt state.",
+        "Counter": "Fixed counter 2",
+        "CounterHTOff": "Fixed counter 2",
+        "EventName": "CPU_CLK_UNHALTED.REF_TSC",
+        "PublicDescription": "Counts the number of reference cycles when the core is not in a halt state. The core enters the halt state when it is running the HLT instruction or the MWAIT instruction. This event is not affected by core frequency changes (for example, P states, TM2 transitions) but has the same incrementing frequency as the time stamp counter. This event can approximate elapsed time while the core was not in a halt state. This event has a constant ratio with the CPU_CLK_UNHALTED.REF_XCLK event. It is counted on a dedicated fixed counter, leaving the four (eight when Hyperthreading is disabled) programmable counters available for other events. Note: On all current platforms this event stops counting during 'throttling (TM)' states duty off periods the processor is 'halted'.  The counter update is done at a lower clock rate then the core clock the overflow status bit for this counter may appear 'sticky'.  After the counter has overflowed and software clears the overflow status bit and resets the counter to less than MAX. The reset value to the counter is not clocked immediately so the overflow status bit will flip 'high (1)' and generate another PMI (if enabled) after which the reset value gets clocked into the counter. Therefore, software will get the interrupt, read the overflow status bit '1 for bit 34 while the counter value is less than MAX. Software should ignore this case.",
         "SampleAfterValue": "2000003",
-        "UMask": "0x1"
+        "UMask": "0x3"
     },
     {
-        "BriefDescription": "Counts number of cycles no uops were dispatched to be executed on this thread.",
+        "BriefDescription": "Core crystal clock cycles when the thread is unhalted.",
         "Counter": "0,1,2,3",
         "CounterHTOff": "0,1,2,3,4,5,6,7",
-        "CounterMask": "1",
-        "EventCode": "0xB1",
-        "EventName": "UOPS_EXECUTED.STALL_CYCLES",
-        "Invert": "1",
-        "PublicDescription": "Counts cycles during which no uops were dispatched from the Reservation Station (RS) per thread.",
-        "SampleAfterValue": "2000003",
+        "EventCode": "0x3C",
+        "EventName": "CPU_CLK_UNHALTED.REF_XCLK",
+        "SampleAfterValue": "25003",
         "UMask": "0x1"
     },
     {
-        "BriefDescription": "Cycles total of 4 uops are executed on all ports and Reservation Station was not empty.",
+        "AnyThread": "1",
+        "BriefDescription": "Core crystal clock cycles when at least one thread on the physical core is unhalted.",
         "Counter": "0,1,2,3",
         "CounterHTOff": "0,1,2,3,4,5,6,7",
-        "EventCode": "0xA6",
-        "EventName": "EXE_ACTIVITY.4_PORTS_UTIL",
-        "PublicDescription": "Cycles total of 4 uops are executed on all ports and Reservation Station (RS) was not empty.",
-        "SampleAfterValue": "2000003",
-        "UMask": "0x10"
+        "EventCode": "0x3C",
+        "EventName": "CPU_CLK_UNHALTED.REF_XCLK_ANY",
+        "SampleAfterValue": "25003",
+        "UMask": "0x1"
     },
     {
-        "BriefDescription": "Execution stalls while L2 cache miss demand load is outstanding.",
+        "BriefDescription": "Counts when there is a transition from ring 1, 2 or 3 to ring 0.",
         "Counter": "0,1,2,3",
         "CounterHTOff": "0,1,2,3,4,5,6,7",
-        "CounterMask": "5",
-        "EventCode": "0xA3",
-        "EventName": "CYCLE_ACTIVITY.STALLS_L2_MISS",
-        "SampleAfterValue": "2000003",
-        "UMask": "0x5"
+        "CounterMask": "1",
+        "EdgeDetect": "1",
+        "EventCode": "0x3C",
+        "EventName": "CPU_CLK_UNHALTED.RING0_TRANS",
+        "PublicDescription": "Counts when the Current Privilege Level (CPL) transitions from ring 1, 2 or 3 to ring 0 (Kernel).",
+        "SampleAfterValue": "100007"
     },
     {
-        "BriefDescription": "Mispredicted macro branch instructions retired.",
-        "Counter": "0,1,2,3",
-        "CounterHTOff": "0,1,2,3",
-        "EventCode": "0xC5",
-        "EventName": "BR_MISP_RETIRED.ALL_BRANCHES_PEBS",
-        "PEBS": "2",
-        "PublicDescription": "This is a precise version of BR_MISP_RETIRED.ALL_BRANCHES that counts all mispredicted macro branch instructions retired.",
-        "SampleAfterValue": "400009",
-        "UMask": "0x4"
+        "BriefDescription": "Core cycles when the thread is not in halt state",
+        "Counter": "Fixed counter 1",
+        "CounterHTOff": "Fixed counter 1",
+        "EventName": "CPU_CLK_UNHALTED.THREAD",
+        "PublicDescription": "Counts the number of core cycles while the thread is not in a halt state. The thread enters the halt state when it is running the HLT instruction. This event is a component in many key event ratios. The core frequency may change from time to time due to transitions associated with Enhanced Intel SpeedStep Technology or TM2. For this reason this event may have a changing ratio with regards to time. When the core frequency is constant, this event can approximate elapsed time while the core was not in the halt state. It is counted on a dedicated fixed counter, leaving the four (eight when Hyperthreading is disabled) programmable counters available for other events.",
+        "SampleAfterValue": "2000003",
+        "UMask": "0x2"
     },
     {
-        "BriefDescription": "Cycles where at least 2 uops were executed per-thread",
-        "Counter": "0,1,2,3",
-        "CounterHTOff": "0,1,2,3,4,5,6,7",
-        "CounterMask": "2",
-        "EventCode": "0xB1",
-        "EventName": "UOPS_EXECUTED.CYCLES_GE_2_UOPS_EXEC",
-        "PublicDescription": "Cycles where at least 2 uops were executed per-thread.",
+        "AnyThread": "1",
+        "BriefDescription": "Core cycles when at least one thread on the physical core is not in halt state.",
+        "Counter": "Fixed counter 1",
+        "CounterHTOff": "Fixed counter 1",
+        "EventName": "CPU_CLK_UNHALTED.THREAD_ANY",
         "SampleAfterValue": "2000003",
-        "UMask": "0x1"
+        "UMask": "0x2"
     },
     {
-        "BriefDescription": "Core crystal clock cycles when the thread is unhalted.",
+        "BriefDescription": "Thread cycles when thread is not in halt state",
         "Counter": "0,1,2,3",
         "CounterHTOff": "0,1,2,3,4,5,6,7",
         "EventCode": "0x3C",
-        "EventName": "CPU_CLK_THREAD_UNHALTED.REF_XCLK",
-        "SampleAfterValue": "25003",
-        "UMask": "0x1"
+        "EventName": "CPU_CLK_UNHALTED.THREAD_P",
+        "PublicDescription": "This is an architectural event that counts the number of thread cycles while the thread is not in a halt state. The thread enters the halt state when it is running the HLT instruction. The core frequency may change from time to time due to power or thermal throttling. For this reason, this event may have a changing ratio with regards to wall clock time.",
+        "SampleAfterValue": "2000003"
     },
     {
-        "BriefDescription": "Core crystal clock cycles when this thread is unhalted and the other thread is halted.",
+        "AnyThread": "1",
+        "BriefDescription": "Core cycles when at least one thread on the physical core is not in halt state.",
         "Counter": "0,1,2,3",
         "CounterHTOff": "0,1,2,3,4,5,6,7",
         "EventCode": "0x3C",
-        "EventName": "CPU_CLK_THREAD_UNHALTED.ONE_THREAD_ACTIVE",
-        "SampleAfterValue": "25003",
-        "UMask": "0x2"
+        "EventName": "CPU_CLK_UNHALTED.THREAD_P_ANY",
+        "SampleAfterValue": "2000003"
     },
     {
-        "BriefDescription": "All (macro) branch instructions retired.",
+        "BriefDescription": "Cycles while L1 cache miss demand load is outstanding.",
         "Counter": "0,1,2,3",
         "CounterHTOff": "0,1,2,3,4,5,6,7",
-        "Errata": "SKL091",
-        "EventCode": "0xC4",
-        "EventName": "BR_INST_RETIRED.ALL_BRANCHES",
-        "PublicDescription": "Counts all (macro) branch instructions retired.",
-        "SampleAfterValue": "400009"
+        "CounterMask": "8",
+        "EventCode": "0xA3",
+        "EventName": "CYCLE_ACTIVITY.CYCLES_L1D_MISS",
+        "SampleAfterValue": "2000003",
+        "UMask": "0x8"
     },
     {
-        "BriefDescription": "Cycles per thread when uops are executed in port 0",
+        "BriefDescription": "Cycles while L2 cache miss demand load is outstanding.",
         "Counter": "0,1,2,3",
         "CounterHTOff": "0,1,2,3,4,5,6,7",
-        "EventCode": "0xA1",
-        "EventName": "UOPS_DISPATCHED_PORT.PORT_0",
-        "PublicDescription": "Counts, on the per-thread basis, cycles during which at least one uop is dispatched from the Reservation Station (RS) to port 0.",
+        "CounterMask": "1",
+        "EventCode": "0xA3",
+        "EventName": "CYCLE_ACTIVITY.CYCLES_L2_MISS",
         "SampleAfterValue": "2000003",
         "UMask": "0x1"
     },
     {
-        "BriefDescription": "Cycles per thread when uops are executed in port 1",
+        "BriefDescription": "Cycles while memory subsystem has an outstanding load.",
         "Counter": "0,1,2,3",
         "CounterHTOff": "0,1,2,3,4,5,6,7",
-        "EventCode": "0xA1",
-        "EventName": "UOPS_DISPATCHED_PORT.PORT_1",
-        "PublicDescription": "Counts, on the per-thread basis, cycles during which at least one uop is dispatched from the Reservation Station (RS) to port 1.",
+        "CounterMask": "16",
+        "EventCode": "0xA3",
+        "EventName": "CYCLE_ACTIVITY.CYCLES_MEM_ANY",
         "SampleAfterValue": "2000003",
-        "UMask": "0x2"
+        "UMask": "0x10"
     },
     {
-        "BriefDescription": "Cycles per thread when uops are executed in port 2",
+        "BriefDescription": "Execution stalls while L1 cache miss demand load is outstanding.",
         "Counter": "0,1,2,3",
         "CounterHTOff": "0,1,2,3,4,5,6,7",
-        "EventCode": "0xA1",
-        "EventName": "UOPS_DISPATCHED_PORT.PORT_2",
-        "PublicDescription": "Counts, on the per-thread basis, cycles during which at least one uop is dispatched from the Reservation Station (RS) to port 2.",
+        "CounterMask": "12",
+        "EventCode": "0xA3",
+        "EventName": "CYCLE_ACTIVITY.STALLS_L1D_MISS",
         "SampleAfterValue": "2000003",
-        "UMask": "0x4"
+        "UMask": "0xc"
     },
     {
-        "BriefDescription": "Cycles per thread when uops are executed in port 3",
+        "BriefDescription": "Execution stalls while L2 cache miss demand load is outstanding.",
         "Counter": "0,1,2,3",
         "CounterHTOff": "0,1,2,3,4,5,6,7",
-        "EventCode": "0xA1",
-        "EventName": "UOPS_DISPATCHED_PORT.PORT_3",
-        "PublicDescription": "Counts, on the per-thread basis, cycles during which at least one uop is dispatched from the Reservation Station (RS) to port 3.",
+        "CounterMask": "5",
+        "EventCode": "0xA3",
+        "EventName": "CYCLE_ACTIVITY.STALLS_L2_MISS",
         "SampleAfterValue": "2000003",
-        "UMask": "0x8"
+        "UMask": "0x5"
     },
     {
-        "BriefDescription": "Cycles per thread when uops are executed in port 4",
+        "BriefDescription": "Execution stalls while memory subsystem has an outstanding load.",
         "Counter": "0,1,2,3",
-        "CounterHTOff": "0,1,2,3,4,5,6,7",
-        "EventCode": "0xA1",
-        "EventName": "UOPS_DISPATCHED_PORT.PORT_4",
-        "PublicDescription": "Counts, on the per-thread basis, cycles during which at least one uop is dispatched from the Reservation Station (RS) to port 4.",
+        "CounterHTOff": "0,1,2,3",
+        "CounterMask": "20",
+        "EventCode": "0xA3",
+        "EventName": "CYCLE_ACTIVITY.STALLS_MEM_ANY",
         "SampleAfterValue": "2000003",
-        "UMask": "0x10"
+        "UMask": "0x14"
     },
     {
-        "BriefDescription": "Cycles per thread when uops are executed in port 5",
+        "BriefDescription": "Total execution stalls.",
         "Counter": "0,1,2,3",
         "CounterHTOff": "0,1,2,3,4,5,6,7",
-        "EventCode": "0xA1",
-        "EventName": "UOPS_DISPATCHED_PORT.PORT_5",
-        "PublicDescription": "Counts, on the per-thread basis, cycles during which at least one uop is dispatched from the Reservation Station (RS) to port 5.",
+        "CounterMask": "4",
+        "EventCode": "0xA3",
+        "EventName": "CYCLE_ACTIVITY.STALLS_TOTAL",
         "SampleAfterValue": "2000003",
-        "UMask": "0x20"
+        "UMask": "0x4"
     },
     {
-        "BriefDescription": "Cycles per thread when uops are executed in port 6",
+        "BriefDescription": "Cycles total of 1 uop is executed on all ports and Reservation Station was not empty.",
         "Counter": "0,1,2,3",
         "CounterHTOff": "0,1,2,3,4,5,6,7",
-        "EventCode": "0xA1",
-        "EventName": "UOPS_DISPATCHED_PORT.PORT_6",
-        "PublicDescription": "Counts, on the per-thread basis, cycles during which at least one uop is dispatched from the Reservation Station (RS) to port 6.",
+        "EventCode": "0xA6",
+        "EventName": "EXE_ACTIVITY.1_PORTS_UTIL",
+        "PublicDescription": "Counts cycles during which a total of 1 uop was executed on all ports and Reservation Station (RS) was not empty.",
         "SampleAfterValue": "2000003",
-        "UMask": "0x40"
+        "UMask": "0x2"
     },
     {
-        "BriefDescription": "Cycles per thread when uops are executed in port 7",
+        "BriefDescription": "Cycles total of 2 uops are executed on all ports and Reservation Station was not empty.",
         "Counter": "0,1,2,3",
         "CounterHTOff": "0,1,2,3,4,5,6,7",
-        "EventCode": "0xA1",
-        "EventName": "UOPS_DISPATCHED_PORT.PORT_7",
-        "PublicDescription": "Counts, on the per-thread basis, cycles during which at least one uop is dispatched from the Reservation Station (RS) to port 7.",
+        "EventCode": "0xA6",
+        "EventName": "EXE_ACTIVITY.2_PORTS_UTIL",
+        "PublicDescription": "Counts cycles during which a total of 2 uops were executed on all ports and Reservation Station (RS) was not empty.",
         "SampleAfterValue": "2000003",
-        "UMask": "0x80"
+        "UMask": "0x4"
     },
     {
         "BriefDescription": "Cycles total of 3 uops are executed on all ports and Reservation Station was not empty.",
@@ -379,37 +378,31 @@
         "UMask": "0x8"
     },
     {
-        "BriefDescription": "Counts end of periods where the Reservation Station (RS) was empty. Could be useful to precisely locate Frontend Latency Bound issues.",
+        "BriefDescription": "Cycles total of 4 uops are executed on all ports and Reservation Station was not empty.",
         "Counter": "0,1,2,3",
         "CounterHTOff": "0,1,2,3,4,5,6,7",
-        "CounterMask": "1",
-        "EdgeDetect": "1",
-        "EventCode": "0x5E",
-        "EventName": "RS_EVENTS.EMPTY_END",
-        "Invert": "1",
-        "PublicDescription": "Counts end of periods where the Reservation Station (RS) was empty. Could be useful to precisely locate front-end Latency Bound issues.",
+        "EventCode": "0xA6",
+        "EventName": "EXE_ACTIVITY.4_PORTS_UTIL",
+        "PublicDescription": "Cycles total of 4 uops are executed on all ports and Reservation Station (RS) was not empty.",
         "SampleAfterValue": "2000003",
-        "UMask": "0x1"
+        "UMask": "0x10"
     },
     {
-        "BriefDescription": "Cycles when Resource Allocation Table (RAT) does not issue Uops to Reservation Station (RS) for the thread",
+        "BriefDescription": "Cycles where the Store Buffer was full and no outstanding load.",
         "Counter": "0,1,2,3",
         "CounterHTOff": "0,1,2,3,4,5,6,7",
-        "CounterMask": "1",
-        "EventCode": "0x0E",
-        "EventName": "UOPS_ISSUED.STALL_CYCLES",
-        "Invert": "1",
-        "PublicDescription": "Counts cycles during which the Resource Allocation Table (RAT) does not issue any Uops to the reservation station (RS) for the current thread.",
+        "EventCode": "0xA6",
+        "EventName": "EXE_ACTIVITY.BOUND_ON_STORES",
         "SampleAfterValue": "2000003",
-        "UMask": "0x1"
+        "UMask": "0x40"
     },
     {
-        "BriefDescription": "Number of Uops delivered by the LSD.",
+        "BriefDescription": "Cycles where no uops were executed, the Reservation Station was not empty, the Store Buffer was full and there was no outstanding load.",
         "Counter": "0,1,2,3",
         "CounterHTOff": "0,1,2,3,4,5,6,7",
-        "EventCode": "0xA8",
-        "EventName": "LSD.UOPS",
-        "PublicDescription": "Number of uops delivered to the back-end by the LSD(Loop Stream Detector).",
+        "EventCode": "0xA6",
+        "EventName": "EXE_ACTIVITY.EXE_BOUND_0_PORTS",
+        "PublicDescription": "Counts cycles during which no uops were executed on all ports and Reservation Station (RS) was not empty.",
         "SampleAfterValue": "2000003",
         "UMask": "0x1"
     },
@@ -424,117 +417,108 @@
         "UMask": "0x1"
     },
     {
-        "BriefDescription": "Taken branch instructions retired.",
-        "Counter": "0,1,2,3",
-        "CounterHTOff": "0,1,2,3,4,5,6,7",
-        "Errata": "SKL091",
-        "EventCode": "0xC4",
-        "EventName": "BR_INST_RETIRED.NEAR_TAKEN",
-        "PEBS": "1",
-        "PublicDescription": "This event counts taken branch instructions retired.",
-        "SampleAfterValue": "400009",
-        "UMask": "0x20"
-    },
-    {
-        "BriefDescription": "Uops that Resource Allocation Table (RAT) issues to Reservation Station (RS)",
-        "Counter": "0,1,2,3",
-        "CounterHTOff": "0,1,2,3,4,5,6,7",
-        "EventCode": "0x0E",
-        "EventName": "UOPS_ISSUED.ANY",
-        "PublicDescription": "Counts the number of uops that the Resource Allocation Table (RAT) issues to the Reservation Station (RS).",
+        "BriefDescription": "Instructions retired from execution.",
+        "Counter": "Fixed counter 0",
+        "CounterHTOff": "Fixed counter 0",
+        "EventName": "INST_RETIRED.ANY",
+        "PublicDescription": "Counts the number of instructions retired from execution. For instructions that consist of multiple micro-ops, Counts the retirement of the last micro-op of the instruction. Counting continues during hardware interrupts, traps, and inside interrupt handlers. Notes: INST_RETIRED.ANY is counted by a designated fixed counter, leaving the four (eight when Hyperthreading is disabled) programmable counters available for other events. INST_RETIRED.ANY_P is counted by a programmable counter and it is an architectural performance event. Counting: Faulting executions of GETSEC/VM entry/VM Exit/MWait will not count as retired instructions.",
         "SampleAfterValue": "2000003",
         "UMask": "0x1"
     },
     {
-        "BriefDescription": "Resource-related stall cycles",
+        "BriefDescription": "Number of instructions retired. General Counter - architectural event",
         "Counter": "0,1,2,3",
         "CounterHTOff": "0,1,2,3,4,5,6,7",
-        "EventCode": "0xa2",
-        "EventName": "RESOURCE_STALLS.ANY",
-        "PublicDescription": "Counts resource-related stall cycles.",
-        "SampleAfterValue": "2000003",
-        "UMask": "0x1"
+        "Errata": "SKL091, SKL044",
+        "EventCode": "0xC0",
+        "EventName": "INST_RETIRED.ANY_P",
+        "PublicDescription": "Counts the number of instructions (EOMs) retired. Counting covers macro-fused instructions individually (that is, increments by two).",
+        "SampleAfterValue": "2000003"
     },
     {
-        "BriefDescription": "Execution stalls while memory subsystem has an outstanding load.",
-        "Counter": "0,1,2,3",
-        "CounterHTOff": "0,1,2,3",
-        "CounterMask": "20",
-        "EventCode": "0xA3",
-        "EventName": "CYCLE_ACTIVITY.STALLS_MEM_ANY",
+        "BriefDescription": "Precise instruction retired event with HW to reduce effect of PEBS shadow in IP distribution",
+        "Counter": "1",
+        "CounterHTOff": "1",
+        "Errata": "SKL091, SKL044",
+        "EventCode": "0xC0",
+        "EventName": "INST_RETIRED.PREC_DIST",
+        "PEBS": "2",
+        "PublicDescription": "A version of INST_RETIRED that allows for a more unbiased distribution of samples across instructions retired. It utilizes the Precise Distribution of Instructions Retired (PDIR) feature to mitigate some bias in how retired instructions get sampled.",
         "SampleAfterValue": "2000003",
-        "UMask": "0x14"
+        "UMask": "0x1"
     },
     {
-        "BriefDescription": "Cycles where no uops were executed, the Reservation Station was not empty, the Store Buffer was full and there was no outstanding load.",
-        "Counter": "0,1,2,3",
-        "CounterHTOff": "0,1,2,3,4,5,6,7",
-        "EventCode": "0xA6",
-        "EventName": "EXE_ACTIVITY.EXE_BOUND_0_PORTS",
-        "PublicDescription": "Counts cycles during which no uops were executed on all ports and Reservation Station (RS) was not empty.",
+        "BriefDescription": "Number of cycles using always true condition applied to  PEBS instructions retired event.",
+        "Counter": "0,2,3",
+        "CounterHTOff": "0,2,3",
+        "CounterMask": "10",
+        "Errata": "SKL091, SKL044",
+        "EventCode": "0xC0",
+        "EventName": "INST_RETIRED.TOTAL_CYCLES_PS",
+        "Invert": "1",
+        "PEBS": "2",
+        "PublicDescription": "Number of cycles using an always true condition applied to  PEBS instructions retired event. (inst_ret< 16)",
         "SampleAfterValue": "2000003",
         "UMask": "0x1"
     },
     {
-        "BriefDescription": "Cycles stalled due to no store buffers available. (not including draining form sync).",
+        "BriefDescription": "Cycles the issue-stage is waiting for front-end to fetch from resteered path following branch misprediction or machine clear events.",
         "Counter": "0,1,2,3",
         "CounterHTOff": "0,1,2,3,4,5,6,7",
-        "EventCode": "0xA2",
-        "EventName": "RESOURCE_STALLS.SB",
-        "PublicDescription": "Counts allocation stall cycles caused by the store buffer (SB) being full. This counts cycles that the pipeline back-end blocked uop delivery from the front-end.",
+        "EventCode": "0x0D",
+        "EventName": "INT_MISC.CLEAR_RESTEER_CYCLES",
         "SampleAfterValue": "2000003",
-        "UMask": "0x8"
+        "UMask": "0x80"
     },
     {
-        "BriefDescription": "Not taken branch instructions retired.",
+        "BriefDescription": "Core cycles the allocator was stalled due to recovery from earlier clear event for this thread (e.g. misprediction or memory nuke)",
         "Counter": "0,1,2,3",
         "CounterHTOff": "0,1,2,3,4,5,6,7",
-        "Errata": "SKL091",
-        "EventCode": "0xC4",
-        "EventName": "BR_INST_RETIRED.NOT_TAKEN",
-        "PublicDescription": "This event counts not taken branch instructions retired.",
-        "SampleAfterValue": "400009",
-        "UMask": "0x10"
+        "EventCode": "0x0D",
+        "EventName": "INT_MISC.RECOVERY_CYCLES",
+        "PublicDescription": "Core cycles the Resource allocator was stalled due to recovery from an earlier branch misprediction or machine clear event.",
+        "SampleAfterValue": "2000003",
+        "UMask": "0x1"
     },
     {
-        "BriefDescription": "Execution stalls while L1 cache miss demand load is outstanding.",
+        "AnyThread": "1",
+        "BriefDescription": "Core cycles the allocator was stalled due to recovery from earlier clear event for any thread running on the physical core (e.g. misprediction or memory nuke).",
         "Counter": "0,1,2,3",
         "CounterHTOff": "0,1,2,3,4,5,6,7",
-        "CounterMask": "12",
-        "EventCode": "0xA3",
-        "EventName": "CYCLE_ACTIVITY.STALLS_L1D_MISS",
+        "EventCode": "0x0D",
+        "EventName": "INT_MISC.RECOVERY_CYCLES_ANY",
         "SampleAfterValue": "2000003",
-        "UMask": "0xc"
+        "UMask": "0x1"
     },
     {
-        "BriefDescription": "Cycles total of 2 uops are executed on all ports and Reservation Station was not empty.",
+        "BriefDescription": "The number of times that split load operations are temporarily blocked because all resources for handling the split accesses are in use",
         "Counter": "0,1,2,3",
         "CounterHTOff": "0,1,2,3,4,5,6,7",
-        "EventCode": "0xA6",
-        "EventName": "EXE_ACTIVITY.2_PORTS_UTIL",
-        "PublicDescription": "Counts cycles during which a total of 2 uops were executed on all ports and Reservation Station (RS) was not empty.",
-        "SampleAfterValue": "2000003",
-        "UMask": "0x4"
+        "EventCode": "0x03",
+        "EventName": "LD_BLOCKS.NO_SR",
+        "PublicDescription": "The number of times that split load operations are temporarily blocked because all resources for handling the split accesses are in use.",
+        "SampleAfterValue": "100003",
+        "UMask": "0x8"
     },
     {
-        "BriefDescription": "Number of instructions retired. General Counter - architectural event",
+        "BriefDescription": "Loads blocked due to overlapping with a preceding store that cannot be forwarded.",
         "Counter": "0,1,2,3",
         "CounterHTOff": "0,1,2,3,4,5,6,7",
-        "Errata": "SKL091, SKL044",
-        "EventCode": "0xC0",
-        "EventName": "INST_RETIRED.ANY_P",
-        "PublicDescription": "Counts the number of instructions (EOMs) retired. Counting covers macro-fused instructions individually (that is, increments by two).",
-        "SampleAfterValue": "2000003"
+        "EventCode": "0x03",
+        "EventName": "LD_BLOCKS.STORE_FORWARD",
+        "PublicDescription": "Counts the number of times where store forwarding was prevented for a load operation. The most common case is a load blocked due to the address of memory access (partially) overlapping with a preceding uncompleted store. Note: See the table of not supported store forwards in the Optimization Guide.",
+        "SampleAfterValue": "100003",
+        "UMask": "0x2"
     },
     {
-        "BriefDescription": "Counts the number of x87 uops dispatched.",
+        "BriefDescription": "False dependencies in MOB due to partial compare on address.",
         "Counter": "0,1,2,3",
         "CounterHTOff": "0,1,2,3,4,5,6,7",
-        "EventCode": "0xB1",
-        "EventName": "UOPS_EXECUTED.X87",
-        "PublicDescription": "Counts the number of x87 uops executed.",
-        "SampleAfterValue": "2000003",
-        "UMask": "0x10"
+        "EventCode": "0x07",
+        "EventName": "LD_BLOCKS_PARTIAL.ADDRESS_ALIAS",
+        "PublicDescription": "Counts false dependencies in MOB when the partial comparison upon loose net check and dependency was resolved by the Enhanced Loose net mechanism. This may not result in high performance penalties. Loose net checks can fail when loads and stores are 4k aliased.",
+        "SampleAfterValue": "100003",
+        "UMask": "0x1"
     },
     {
         "BriefDescription": "Demand load dispatches that hit L1D fill buffer (FB) allocated for software prefetch.",
@@ -547,219 +531,238 @@
         "UMask": "0x1"
     },
     {
-        "BriefDescription": "Mispredicted direct and indirect near call instructions retired.",
+        "BriefDescription": "Cycles 4 Uops delivered by the LSD, but didn't come from the decoder.",
         "Counter": "0,1,2,3",
         "CounterHTOff": "0,1,2,3,4,5,6,7",
-        "EventCode": "0xC5",
-        "EventName": "BR_MISP_RETIRED.NEAR_CALL",
-        "PEBS": "1",
-        "PublicDescription": "Counts both taken and not taken retired mispredicted direct and indirect near calls, including both register and memory indirect.",
-        "SampleAfterValue": "400009",
-        "UMask": "0x2"
+        "CounterMask": "4",
+        "EventCode": "0xA8",
+        "EventName": "LSD.CYCLES_4_UOPS",
+        "PublicDescription": "Counts the cycles when 4 uops are delivered by the LSD (Loop-stream detector).",
+        "SampleAfterValue": "2000003",
+        "UMask": "0x1"
     },
     {
-        "BriefDescription": "Counts the number of uops to be executed per-thread each cycle.",
+        "BriefDescription": "Cycles Uops delivered by the LSD, but didn't come from the decoder.",
         "Counter": "0,1,2,3",
         "CounterHTOff": "0,1,2,3,4,5,6,7",
-        "EventCode": "0xB1",
-        "EventName": "UOPS_EXECUTED.THREAD",
-        "PublicDescription": "Number of uops to be executed per-thread each cycle.",
+        "CounterMask": "1",
+        "EventCode": "0xA8",
+        "EventName": "LSD.CYCLES_ACTIVE",
+        "PublicDescription": "Counts the cycles when at least one uop is delivered by the LSD (Loop-stream detector).",
         "SampleAfterValue": "2000003",
         "UMask": "0x1"
     },
     {
-        "BriefDescription": "Cycles where at least 3 uops were executed per-thread",
+        "BriefDescription": "Number of Uops delivered by the LSD.",
         "Counter": "0,1,2,3",
         "CounterHTOff": "0,1,2,3,4,5,6,7",
-        "CounterMask": "3",
-        "EventCode": "0xB1",
-        "EventName": "UOPS_EXECUTED.CYCLES_GE_3_UOPS_EXEC",
-        "PublicDescription": "Cycles where at least 3 uops were executed per-thread.",
+        "EventCode": "0xA8",
+        "EventName": "LSD.UOPS",
+        "PublicDescription": "Number of uops delivered to the back-end by the LSD(Loop Stream Detector).",
         "SampleAfterValue": "2000003",
         "UMask": "0x1"
     },
     {
-        "BriefDescription": "Cycles Uops delivered by the LSD, but didn't come from the decoder.",
+        "BriefDescription": "Number of machine clears (nukes) of any type.",
         "Counter": "0,1,2,3",
         "CounterHTOff": "0,1,2,3,4,5,6,7",
         "CounterMask": "1",
-        "EventCode": "0xA8",
-        "EventName": "LSD.CYCLES_ACTIVE",
-        "PublicDescription": "Counts the cycles when at least one uop is delivered by the LSD (Loop-stream detector).",
-        "SampleAfterValue": "2000003",
+        "EdgeDetect": "1",
+        "EventCode": "0xC3",
+        "EventName": "MACHINE_CLEARS.COUNT",
+        "SampleAfterValue": "100003",
         "UMask": "0x1"
     },
     {
-        "BriefDescription": "Core cycles the allocator was stalled due to recovery from earlier clear event for this thread (e.g. misprediction or memory nuke)",
+        "BriefDescription": "Self-modifying code (SMC) detected.",
         "Counter": "0,1,2,3",
         "CounterHTOff": "0,1,2,3,4,5,6,7",
-        "EventCode": "0x0D",
-        "EventName": "INT_MISC.RECOVERY_CYCLES",
-        "PublicDescription": "Core cycles the Resource allocator was stalled due to recovery from an earlier branch misprediction or machine clear event.",
+        "EventCode": "0xC3",
+        "EventName": "MACHINE_CLEARS.SMC",
+        "PublicDescription": "Counts self-modifying code (SMC) detected, which causes a machine clear.",
+        "SampleAfterValue": "100003",
+        "UMask": "0x4"
+    },
+    {
+        "BriefDescription": "Number of times a microcode assist is invoked by HW other than FP-assist. Examples include AD (page Access Dirty) and AVX* related assists.",
+        "Counter": "0,1,2,3",
+        "CounterHTOff": "0,1,2,3,4,5,6,7",
+        "EventCode": "0xC1",
+        "EventName": "OTHER_ASSISTS.ANY",
+        "SampleAfterValue": "100003",
+        "UMask": "0x3f"
+    },
+    {
+        "BriefDescription": "Cycles where the pipeline is stalled due to serializing operations.",
+        "Counter": "0,1,2,3",
+        "CounterHTOff": "0,1,2,3,4,5,6,7",
+        "EventCode": "0x59",
+        "EventName": "PARTIAL_RAT_STALLS.SCOREBOARD",
+        "PublicDescription": "This event counts cycles during which the microcode scoreboard stalls happen.",
         "SampleAfterValue": "2000003",
         "UMask": "0x1"
     },
     {
-        "BriefDescription": "Core crystal clock cycles when the thread is unhalted.",
+        "BriefDescription": "Resource-related stall cycles",
         "Counter": "0,1,2,3",
         "CounterHTOff": "0,1,2,3,4,5,6,7",
-        "EventCode": "0x3C",
-        "EventName": "CPU_CLK_UNHALTED.REF_XCLK",
-        "SampleAfterValue": "25003",
+        "EventCode": "0xa2",
+        "EventName": "RESOURCE_STALLS.ANY",
+        "PublicDescription": "Counts resource-related stall cycles.",
+        "SampleAfterValue": "2000003",
         "UMask": "0x1"
     },
     {
-        "BriefDescription": "Cycles while L1 cache miss demand load is outstanding.",
+        "BriefDescription": "Cycles stalled due to no store buffers available. (not including draining form sync).",
         "Counter": "0,1,2,3",
         "CounterHTOff": "0,1,2,3,4,5,6,7",
-        "CounterMask": "8",
-        "EventCode": "0xA3",
-        "EventName": "CYCLE_ACTIVITY.CYCLES_L1D_MISS",
+        "EventCode": "0xA2",
+        "EventName": "RESOURCE_STALLS.SB",
+        "PublicDescription": "Counts allocation stall cycles caused by the store buffer (SB) being full. This counts cycles that the pipeline back-end blocked uop delivery from the front-end.",
         "SampleAfterValue": "2000003",
         "UMask": "0x8"
     },
     {
-        "BriefDescription": "Precise instruction retired event with HW to reduce effect of PEBS shadow in IP distribution",
-        "Counter": "1",
-        "CounterHTOff": "1",
-        "Errata": "SKL091, SKL044",
-        "EventCode": "0xC0",
-        "EventName": "INST_RETIRED.PREC_DIST",
-        "PEBS": "2",
-        "PublicDescription": "A version of INST_RETIRED that allows for a more unbiased distribution of samples across instructions retired. It utilizes the Precise Distribution of Instructions Retired (PDIR) feature to mitigate some bias in how retired instructions get sampled.",
+        "BriefDescription": "Increments whenever there is an update to the LBR array.",
+        "Counter": "0,1,2,3",
+        "CounterHTOff": "0,1,2,3,4,5,6,7",
+        "EventCode": "0xCC",
+        "EventName": "ROB_MISC_EVENTS.LBR_INSERTS",
+        "PublicDescription": "Increments when an entry is added to the Last Branch Record (LBR) array (or removed from the array in case of RETURNs in call stack mode). The event requires LBR enable via IA32_DEBUGCTL MSR and branch type selection via MSR_LBR_SELECT.",
         "SampleAfterValue": "2000003",
-        "UMask": "0x1"
+        "UMask": "0x20"
     },
     {
-        "BriefDescription": "Not taken branch instructions retired.",
+        "BriefDescription": "Number of retired PAUSE instructions (that do not end up with a VMExit to the VMM; TSX aborted Instructions may be counted). This event is not supported on first SKL and KBL products.",
         "Counter": "0,1,2,3",
         "CounterHTOff": "0,1,2,3,4,5,6,7",
-        "Errata": "SKL091",
-        "EventCode": "0xc4",
-        "EventName": "BR_INST_RETIRED.COND_NTAKEN",
-        "PublicDescription": "This event counts not taken branch instructions retired.",
-        "SampleAfterValue": "400009",
-        "UMask": "0x10"
+        "EventCode": "0xCC",
+        "EventName": "ROB_MISC_EVENTS.PAUSE_INST",
+        "SampleAfterValue": "2000003",
+        "UMask": "0x40"
     },
     {
-        "BriefDescription": "Cycles at least 3 micro-op is executed from any thread on physical core.",
+        "BriefDescription": "Cycles when Reservation Station (RS) is empty for the thread",
         "Counter": "0,1,2,3",
         "CounterHTOff": "0,1,2,3,4,5,6,7",
-        "CounterMask": "3",
-        "EventCode": "0xB1",
-        "EventName": "UOPS_EXECUTED.CORE_CYCLES_GE_3",
+        "EventCode": "0x5E",
+        "EventName": "RS_EVENTS.EMPTY_CYCLES",
+        "PublicDescription": "Counts cycles during which the reservation station (RS) is empty for the thread.; Note: In ST-mode, not active thread should drive 0. This is usually caused by severely costly branch mispredictions, or allocator/FE issues.",
         "SampleAfterValue": "2000003",
-        "UMask": "0x2"
+        "UMask": "0x1"
     },
     {
-        "BriefDescription": "Cycles at least 1 micro-op is executed from any thread on physical core.",
+        "BriefDescription": "Counts end of periods where the Reservation Station (RS) was empty. Could be useful to precisely locate Frontend Latency Bound issues.",
         "Counter": "0,1,2,3",
         "CounterHTOff": "0,1,2,3,4,5,6,7",
         "CounterMask": "1",
-        "EventCode": "0xB1",
-        "EventName": "UOPS_EXECUTED.CORE_CYCLES_GE_1",
+        "EdgeDetect": "1",
+        "EventCode": "0x5E",
+        "EventName": "RS_EVENTS.EMPTY_END",
+        "Invert": "1",
+        "PublicDescription": "Counts end of periods where the Reservation Station (RS) was empty. Could be useful to precisely locate front-end Latency Bound issues.",
         "SampleAfterValue": "2000003",
-        "UMask": "0x2"
+        "UMask": "0x1"
     },
     {
-        "BriefDescription": "Cycles at least 4 micro-op is executed from any thread on physical core.",
+        "BriefDescription": "Cycles per thread when uops are executed in port 0",
         "Counter": "0,1,2,3",
         "CounterHTOff": "0,1,2,3,4,5,6,7",
-        "CounterMask": "4",
-        "EventCode": "0xB1",
-        "EventName": "UOPS_EXECUTED.CORE_CYCLES_GE_4",
+        "EventCode": "0xA1",
+        "EventName": "UOPS_DISPATCHED_PORT.PORT_0",
+        "PublicDescription": "Counts, on the per-thread basis, cycles during which at least one uop is dispatched from the Reservation Station (RS) to port 0.",
         "SampleAfterValue": "2000003",
-        "UMask": "0x2"
+        "UMask": "0x1"
     },
     {
-        "BriefDescription": "Number of times a microcode assist is invoked by HW other than FP-assist. Examples include AD (page Access Dirty) and AVX* related assists.",
+        "BriefDescription": "Cycles per thread when uops are executed in port 1",
         "Counter": "0,1,2,3",
         "CounterHTOff": "0,1,2,3,4,5,6,7",
-        "EventCode": "0xC1",
-        "EventName": "OTHER_ASSISTS.ANY",
-        "SampleAfterValue": "100003",
-        "UMask": "0x3f"
+        "EventCode": "0xA1",
+        "EventName": "UOPS_DISPATCHED_PORT.PORT_1",
+        "PublicDescription": "Counts, on the per-thread basis, cycles during which at least one uop is dispatched from the Reservation Station (RS) to port 1.",
+        "SampleAfterValue": "2000003",
+        "UMask": "0x2"
     },
     {
-        "BriefDescription": "Cycles where the Store Buffer was full and no outstanding load.",
+        "BriefDescription": "Cycles per thread when uops are executed in port 2",
         "Counter": "0,1,2,3",
         "CounterHTOff": "0,1,2,3,4,5,6,7",
-        "EventCode": "0xA6",
-        "EventName": "EXE_ACTIVITY.BOUND_ON_STORES",
+        "EventCode": "0xA1",
+        "EventName": "UOPS_DISPATCHED_PORT.PORT_2",
+        "PublicDescription": "Counts, on the per-thread basis, cycles during which at least one uop is dispatched from the Reservation Station (RS) to port 2.",
         "SampleAfterValue": "2000003",
-        "UMask": "0x40"
+        "UMask": "0x4"
     },
     {
-        "BriefDescription": "Cycles while memory subsystem has an outstanding load.",
+        "BriefDescription": "Cycles per thread when uops are executed in port 3",
         "Counter": "0,1,2,3",
         "CounterHTOff": "0,1,2,3,4,5,6,7",
-        "CounterMask": "16",
-        "EventCode": "0xA3",
-        "EventName": "CYCLE_ACTIVITY.CYCLES_MEM_ANY",
+        "EventCode": "0xA1",
+        "EventName": "UOPS_DISPATCHED_PORT.PORT_3",
+        "PublicDescription": "Counts, on the per-thread basis, cycles during which at least one uop is dispatched from the Reservation Station (RS) to port 3.",
         "SampleAfterValue": "2000003",
-        "UMask": "0x10"
+        "UMask": "0x8"
     },
     {
-        "BriefDescription": "Number of retired PAUSE instructions (that do not end up with a VMExit to the VMM; TSX aborted Instructions may be counted). This event is not supported on first SKL and KBL products.",
+        "BriefDescription": "Cycles per thread when uops are executed in port 4",
         "Counter": "0,1,2,3",
         "CounterHTOff": "0,1,2,3,4,5,6,7",
-        "EventCode": "0xCC",
-        "EventName": "ROB_MISC_EVENTS.PAUSE_INST",
+        "EventCode": "0xA1",
+        "EventName": "UOPS_DISPATCHED_PORT.PORT_4",
+        "PublicDescription": "Counts, on the per-thread basis, cycles during which at least one uop is dispatched from the Reservation Station (RS) to port 4.",
         "SampleAfterValue": "2000003",
-        "UMask": "0x40"
+        "UMask": "0x10"
     },
     {
-        "BriefDescription": "Cycles where at least 4 uops were executed per-thread",
+        "BriefDescription": "Cycles per thread when uops are executed in port 5",
         "Counter": "0,1,2,3",
         "CounterHTOff": "0,1,2,3,4,5,6,7",
-        "CounterMask": "4",
-        "EventCode": "0xB1",
-        "EventName": "UOPS_EXECUTED.CYCLES_GE_4_UOPS_EXEC",
-        "PublicDescription": "Cycles where at least 4 uops were executed per-thread.",
+        "EventCode": "0xA1",
+        "EventName": "UOPS_DISPATCHED_PORT.PORT_5",
+        "PublicDescription": "Counts, on the per-thread basis, cycles during which at least one uop is dispatched from the Reservation Station (RS) to port 5.",
         "SampleAfterValue": "2000003",
-        "UMask": "0x1"
+        "UMask": "0x20"
     },
     {
-        "BriefDescription": "Cycles where the pipeline is stalled due to serializing operations.",
+        "BriefDescription": "Cycles per thread when uops are executed in port 6",
         "Counter": "0,1,2,3",
         "CounterHTOff": "0,1,2,3,4,5,6,7",
-        "EventCode": "0x59",
-        "EventName": "PARTIAL_RAT_STALLS.SCOREBOARD",
-        "PublicDescription": "This event counts cycles during which the microcode scoreboard stalls happen.",
+        "EventCode": "0xA1",
+        "EventName": "UOPS_DISPATCHED_PORT.PORT_6",
+        "PublicDescription": "Counts, on the per-thread basis, cycles during which at least one uop is dispatched from the Reservation Station (RS) to port 6.",
         "SampleAfterValue": "2000003",
-        "UMask": "0x1"
+        "UMask": "0x40"
     },
     {
-        "BriefDescription": "Retirement slots used.",
+        "BriefDescription": "Cycles per thread when uops are executed in port 7",
         "Counter": "0,1,2,3",
         "CounterHTOff": "0,1,2,3,4,5,6,7",
-        "EventCode": "0xC2",
-        "EventName": "UOPS_RETIRED.RETIRE_SLOTS",
-        "PublicDescription": "Counts the retirement slots used.",
+        "EventCode": "0xA1",
+        "EventName": "UOPS_DISPATCHED_PORT.PORT_7",
+        "PublicDescription": "Counts, on the per-thread basis, cycles during which at least one uop is dispatched from the Reservation Station (RS) to port 7.",
         "SampleAfterValue": "2000003",
-        "UMask": "0x2"
+        "UMask": "0x80"
     },
     {
-        "BriefDescription": "Uops inserted at issue-stage in order to preserve upper bits of vector registers.",
+        "BriefDescription": "Number of uops executed on the core.",
         "Counter": "0,1,2,3",
         "CounterHTOff": "0,1,2,3,4,5,6,7",
-        "EventCode": "0x0E",
-        "EventName": "UOPS_ISSUED.VECTOR_WIDTH_MISMATCH",
-        "PublicDescription": "Counts the number of Blend Uops issued by the Resource Allocation Table (RAT) to the reservation station (RS) in order to preserve upper bits of vector registers. Starting with the Skylake microarchitecture, these Blend uops are needed since every Intel SSE instruction executed in Dirty Upper State needs to preserve bits 128-255 of the destination register. For more information, refer to Mixing Intel AVX and Intel SSE Code section of the Optimization Guide.",
+        "EventCode": "0xB1",
+        "EventName": "UOPS_EXECUTED.CORE",
+        "PublicDescription": "Number of uops executed from any thread.",
         "SampleAfterValue": "2000003",
         "UMask": "0x2"
     },
     {
-        "BriefDescription": "Return instructions retired.",
+        "BriefDescription": "Cycles at least 1 micro-op is executed from any thread on physical core.",
         "Counter": "0,1,2,3",
         "CounterHTOff": "0,1,2,3,4,5,6,7",
-        "Errata": "SKL091",
-        "EventCode": "0xC4",
-        "EventName": "BR_INST_RETIRED.NEAR_RETURN",
-        "PEBS": "1",
-        "PublicDescription": "This event counts return instructions retired.",
-        "SampleAfterValue": "100007",
-        "UMask": "0x8"
+        "CounterMask": "1",
+        "EventCode": "0xB1",
+        "EventName": "UOPS_EXECUTED.CORE_CYCLES_GE_1",
+        "SampleAfterValue": "2000003",
+        "UMask": "0x2"
     },
     {
         "BriefDescription": "Cycles at least 2 micro-op is executed from any thread on physical core.",
@@ -772,188 +775,195 @@
         "UMask": "0x2"
     },
     {
-        "BriefDescription": "Mispredicted conditional branch instructions retired.",
+        "BriefDescription": "Cycles at least 3 micro-op is executed from any thread on physical core.",
         "Counter": "0,1,2,3",
         "CounterHTOff": "0,1,2,3,4,5,6,7",
-        "EventCode": "0xC5",
-        "EventName": "BR_MISP_RETIRED.CONDITIONAL",
-        "PEBS": "1",
-        "PublicDescription": "This event counts mispredicted conditional branch instructions retired.",
-        "SampleAfterValue": "400009",
-        "UMask": "0x1"
+        "CounterMask": "3",
+        "EventCode": "0xB1",
+        "EventName": "UOPS_EXECUTED.CORE_CYCLES_GE_3",
+        "SampleAfterValue": "2000003",
+        "UMask": "0x2"
     },
     {
-        "BriefDescription": "Number of cycles using always true condition applied to  PEBS instructions retired event.",
-        "Counter": "0,2,3",
-        "CounterHTOff": "0,2,3",
-        "CounterMask": "10",
-        "Errata": "SKL091, SKL044",
-        "EventCode": "0xC0",
-        "EventName": "INST_RETIRED.TOTAL_CYCLES_PS",
-        "Invert": "1",
-        "PEBS": "2",
-        "PublicDescription": "Number of cycles using an always true condition applied to  PEBS instructions retired event. (inst_ret< 16)",
+        "BriefDescription": "Cycles at least 4 micro-op is executed from any thread on physical core.",
+        "Counter": "0,1,2,3",
+        "CounterHTOff": "0,1,2,3,4,5,6,7",
+        "CounterMask": "4",
+        "EventCode": "0xB1",
+        "EventName": "UOPS_EXECUTED.CORE_CYCLES_GE_4",
         "SampleAfterValue": "2000003",
-        "UMask": "0x1"
+        "UMask": "0x2"
     },
     {
-        "BriefDescription": "False dependencies in MOB due to partial compare on address.",
+        "BriefDescription": "Cycles with no micro-ops executed from any thread on physical core.",
         "Counter": "0,1,2,3",
         "CounterHTOff": "0,1,2,3,4,5,6,7",
-        "EventCode": "0x07",
-        "EventName": "LD_BLOCKS_PARTIAL.ADDRESS_ALIAS",
-        "PublicDescription": "Counts false dependencies in MOB when the partial comparison upon loose net check and dependency was resolved by the Enhanced Loose net mechanism. This may not result in high performance penalties. Loose net checks can fail when loads and stores are 4k aliased.",
-        "SampleAfterValue": "100003",
-        "UMask": "0x1"
+        "CounterMask": "1",
+        "EventCode": "0xB1",
+        "EventName": "UOPS_EXECUTED.CORE_CYCLES_NONE",
+        "Invert": "1",
+        "SampleAfterValue": "2000003",
+        "UMask": "0x2"
     },
     {
-        "BriefDescription": "Number of machine clears (nukes) of any type.",
+        "BriefDescription": "Cycles where at least 1 uop was executed per-thread",
         "Counter": "0,1,2,3",
         "CounterHTOff": "0,1,2,3,4,5,6,7",
         "CounterMask": "1",
-        "EdgeDetect": "1",
-        "EventCode": "0xC3",
-        "EventName": "MACHINE_CLEARS.COUNT",
-        "SampleAfterValue": "100003",
+        "EventCode": "0xB1",
+        "EventName": "UOPS_EXECUTED.CYCLES_GE_1_UOP_EXEC",
+        "PublicDescription": "Cycles where at least 1 uop was executed per-thread.",
+        "SampleAfterValue": "2000003",
         "UMask": "0x1"
     },
     {
-        "AnyThread": "1",
-        "BriefDescription": "Core cycles when at least one thread on the physical core is not in halt state.",
-        "Counter": "Fixed counter 1",
-        "CounterHTOff": "Fixed counter 1",
-        "EventName": "CPU_CLK_UNHALTED.THREAD_ANY",
+        "BriefDescription": "Cycles where at least 2 uops were executed per-thread",
+        "Counter": "0,1,2,3",
+        "CounterHTOff": "0,1,2,3,4,5,6,7",
+        "CounterMask": "2",
+        "EventCode": "0xB1",
+        "EventName": "UOPS_EXECUTED.CYCLES_GE_2_UOPS_EXEC",
+        "PublicDescription": "Cycles where at least 2 uops were executed per-thread.",
         "SampleAfterValue": "2000003",
-        "UMask": "0x2"
+        "UMask": "0x1"
     },
     {
-        "BriefDescription": "Loads blocked due to overlapping with a preceding store that cannot be forwarded.",
+        "BriefDescription": "Cycles where at least 3 uops were executed per-thread",
         "Counter": "0,1,2,3",
         "CounterHTOff": "0,1,2,3,4,5,6,7",
-        "EventCode": "0x03",
-        "EventName": "LD_BLOCKS.STORE_FORWARD",
-        "PublicDescription": "Counts the number of times where store forwarding was prevented for a load operation. The most common case is a load blocked due to the address of memory access (partially) overlapping with a preceding uncompleted store. Note: See the table of not supported store forwards in the Optimization Guide.",
-        "SampleAfterValue": "100003",
-        "UMask": "0x2"
+        "CounterMask": "3",
+        "EventCode": "0xB1",
+        "EventName": "UOPS_EXECUTED.CYCLES_GE_3_UOPS_EXEC",
+        "PublicDescription": "Cycles where at least 3 uops were executed per-thread.",
+        "SampleAfterValue": "2000003",
+        "UMask": "0x1"
     },
     {
-        "AnyThread": "1",
-        "BriefDescription": "Core crystal clock cycles when at least one thread on the physical core is unhalted.",
+        "BriefDescription": "Cycles where at least 4 uops were executed per-thread",
         "Counter": "0,1,2,3",
         "CounterHTOff": "0,1,2,3,4,5,6,7",
-        "EventCode": "0x3C",
-        "EventName": "CPU_CLK_THREAD_UNHALTED.REF_XCLK_ANY",
-        "SampleAfterValue": "25003",
+        "CounterMask": "4",
+        "EventCode": "0xB1",
+        "EventName": "UOPS_EXECUTED.CYCLES_GE_4_UOPS_EXEC",
+        "PublicDescription": "Cycles where at least 4 uops were executed per-thread.",
+        "SampleAfterValue": "2000003",
         "UMask": "0x1"
     },
     {
-        "BriefDescription": "Reference cycles when the core is not in halt state.",
-        "Counter": "Fixed counter 2",
-        "CounterHTOff": "Fixed counter 2",
-        "EventName": "CPU_CLK_UNHALTED.REF_TSC",
-        "PublicDescription": "Counts the number of reference cycles when the core is not in a halt state. The core enters the halt state when it is running the HLT instruction or the MWAIT instruction. This event is not affected by core frequency changes (for example, P states, TM2 transitions) but has the same incrementing frequency as the time stamp counter. This event can approximate elapsed time while the core was not in a halt state. This event has a constant ratio with the CPU_CLK_UNHALTED.REF_XCLK event. It is counted on a dedicated fixed counter, leaving the four (eight when Hyperthreading is disabled) programmable counters available for other events. Note: On all current platforms this event stops counting during 'throttling (TM)' states duty off periods the processor is 'halted'.  The counter update is done at a lower clock rate then the core clock the overflow status bit for this counter may appear 'sticky'.  After the counter has overflowed and software clears the overflow status bit and resets the counter to less than MAX. The reset value to the counter is not clocked immediately so the overflow status bit will flip 'high (1)' and generate another PMI (if enabled) after which the reset value gets clocked into the counter. Therefore, software will get the interrupt, read the overflow status bit '1 for bit 34 while the counter value is less than MAX. Software should ignore this case.",
+        "BriefDescription": "Counts number of cycles no uops were dispatched to be executed on this thread.",
+        "Counter": "0,1,2,3",
+        "CounterHTOff": "0,1,2,3,4,5,6,7",
+        "CounterMask": "1",
+        "EventCode": "0xB1",
+        "EventName": "UOPS_EXECUTED.STALL_CYCLES",
+        "Invert": "1",
+        "PublicDescription": "Counts cycles during which no uops were dispatched from the Reservation Station (RS) per thread.",
         "SampleAfterValue": "2000003",
-        "UMask": "0x3"
+        "UMask": "0x1"
     },
     {
-        "BriefDescription": "All mispredicted macro branch instructions retired.",
+        "BriefDescription": "Counts the number of uops to be executed per-thread each cycle.",
         "Counter": "0,1,2,3",
         "CounterHTOff": "0,1,2,3,4,5,6,7",
-        "EventCode": "0xC5",
-        "EventName": "BR_MISP_RETIRED.ALL_BRANCHES",
-        "PublicDescription": "Counts all the retired branch instructions that were mispredicted by the processor. A branch misprediction occurs when the processor incorrectly predicts the destination of the branch.  When the misprediction is discovered at execution, all the instructions executed in the wrong (speculative) path must be discarded, and the processor must start fetching from the correct path.",
-        "SampleAfterValue": "400009"
+        "EventCode": "0xB1",
+        "EventName": "UOPS_EXECUTED.THREAD",
+        "PublicDescription": "Number of uops to be executed per-thread each cycle.",
+        "SampleAfterValue": "2000003",
+        "UMask": "0x1"
     },
     {
-        "BriefDescription": "The number of times that split load operations are temporarily blocked because all resources for handling the split accesses are in use",
+        "BriefDescription": "Counts the number of x87 uops dispatched.",
         "Counter": "0,1,2,3",
         "CounterHTOff": "0,1,2,3,4,5,6,7",
-        "EventCode": "0x03",
-        "EventName": "LD_BLOCKS.NO_SR",
-        "PublicDescription": "The number of times that split load operations are temporarily blocked because all resources for handling the split accesses are in use.",
-        "SampleAfterValue": "100003",
-        "UMask": "0x8"
+        "EventCode": "0xB1",
+        "EventName": "UOPS_EXECUTED.X87",
+        "PublicDescription": "Counts the number of x87 uops executed.",
+        "SampleAfterValue": "2000003",
+        "UMask": "0x10"
     },
     {
-        "BriefDescription": "Self-modifying code (SMC) detected.",
+        "BriefDescription": "Uops that Resource Allocation Table (RAT) issues to Reservation Station (RS)",
         "Counter": "0,1,2,3",
         "CounterHTOff": "0,1,2,3,4,5,6,7",
-        "EventCode": "0xC3",
-        "EventName": "MACHINE_CLEARS.SMC",
-        "PublicDescription": "Counts self-modifying code (SMC) detected, which causes a machine clear.",
-        "SampleAfterValue": "100003",
-        "UMask": "0x4"
+        "EventCode": "0x0E",
+        "EventName": "UOPS_ISSUED.ANY",
+        "PublicDescription": "Counts the number of uops that the Resource Allocation Table (RAT) issues to the Reservation Station (RS).",
+        "SampleAfterValue": "2000003",
+        "UMask": "0x1"
     },
     {
-        "AnyThread": "1",
-        "BriefDescription": "Core cycles when at least one thread on the physical core is not in halt state.",
+        "BriefDescription": "Number of slow LEA uops being allocated. A uop is generally considered SlowLea if it has 3 sources (e.g. 2 sources + immediate) regardless if as a result of LEA instruction or not.",
         "Counter": "0,1,2,3",
         "CounterHTOff": "0,1,2,3,4,5,6,7",
-        "EventCode": "0x3C",
-        "EventName": "CPU_CLK_UNHALTED.THREAD_P_ANY",
-        "SampleAfterValue": "2000003"
+        "EventCode": "0x0E",
+        "EventName": "UOPS_ISSUED.SLOW_LEA",
+        "SampleAfterValue": "2000003",
+        "UMask": "0x20"
     },
     {
-        "BriefDescription": "Cycles when Reservation Station (RS) is empty for the thread",
+        "BriefDescription": "Cycles when Resource Allocation Table (RAT) does not issue Uops to Reservation Station (RS) for the thread",
         "Counter": "0,1,2,3",
         "CounterHTOff": "0,1,2,3,4,5,6,7",
-        "EventCode": "0x5E",
-        "EventName": "RS_EVENTS.EMPTY_CYCLES",
-        "PublicDescription": "Counts cycles during which the reservation station (RS) is empty for the thread.; Note: In ST-mode, not active thread should drive 0. This is usually caused by severely costly branch mispredictions, or allocator/FE issues.",
+        "CounterMask": "1",
+        "EventCode": "0x0E",
+        "EventName": "UOPS_ISSUED.STALL_CYCLES",
+        "Invert": "1",
+        "PublicDescription": "Counts cycles during which the Resource Allocation Table (RAT) does not issue any Uops to the reservation station (RS) for the current thread.",
         "SampleAfterValue": "2000003",
         "UMask": "0x1"
     },
     {
-        "BriefDescription": "Counts when there is a transition from ring 1, 2 or 3 to ring 0.",
+        "BriefDescription": "Uops inserted at issue-stage in order to preserve upper bits of vector registers.",
         "Counter": "0,1,2,3",
         "CounterHTOff": "0,1,2,3,4,5,6,7",
-        "CounterMask": "1",
-        "EdgeDetect": "1",
-        "EventCode": "0x3C",
-        "EventName": "CPU_CLK_UNHALTED.RING0_TRANS",
-        "PublicDescription": "Counts when the Current Privilege Level (CPL) transitions from ring 1, 2 or 3 to ring 0 (Kernel).",
-        "SampleAfterValue": "100007"
+        "EventCode": "0x0E",
+        "EventName": "UOPS_ISSUED.VECTOR_WIDTH_MISMATCH",
+        "PublicDescription": "Counts the number of Blend Uops issued by the Resource Allocation Table (RAT) to the reservation station (RS) in order to preserve upper bits of vector registers. Starting with the Skylake microarchitecture, these Blend uops are needed since every Intel SSE instruction executed in Dirty Upper State needs to preserve bits 128-255 of the destination register. For more information, refer to Mixing Intel AVX and Intel SSE Code section of the Optimization Guide.",
+        "SampleAfterValue": "2000003",
+        "UMask": "0x2"
     },
     {
-        "BriefDescription": "All (macro) branch instructions retired.",
+        "BriefDescription": "Number of macro-fused uops retired. (non precise)",
         "Counter": "0,1,2,3",
-        "CounterHTOff": "0,1,2,3",
-        "Errata": "SKL091",
-        "EventCode": "0xC4",
-        "EventName": "BR_INST_RETIRED.ALL_BRANCHES_PEBS",
-        "PEBS": "2",
-        "PublicDescription": "This is a precise version of BR_INST_RETIRED.ALL_BRANCHES that counts all (macro) branch instructions retired.",
-        "SampleAfterValue": "400009",
+        "CounterHTOff": "0,1,2,3,4,5,6,7",
+        "EventCode": "0xc2",
+        "EventName": "UOPS_RETIRED.MACRO_FUSED",
+        "PublicDescription": "Counts the number of macro-fused uops retired. (non precise)",
+        "SampleAfterValue": "2000003",
         "UMask": "0x4"
     },
     {
-        "BriefDescription": "Cycles total of 1 uop is executed on all ports and Reservation Station was not empty.",
+        "BriefDescription": "Retirement slots used.",
         "Counter": "0,1,2,3",
         "CounterHTOff": "0,1,2,3,4,5,6,7",
-        "EventCode": "0xA6",
-        "EventName": "EXE_ACTIVITY.1_PORTS_UTIL",
-        "PublicDescription": "Counts cycles during which a total of 1 uop was executed on all ports and Reservation Station (RS) was not empty.",
+        "EventCode": "0xC2",
+        "EventName": "UOPS_RETIRED.RETIRE_SLOTS",
+        "PublicDescription": "Counts the retirement slots used.",
         "SampleAfterValue": "2000003",
         "UMask": "0x2"
     },
     {
-        "BriefDescription": "Number of uops executed on the core.",
+        "BriefDescription": "Cycles without actually retired uops.",
         "Counter": "0,1,2,3",
         "CounterHTOff": "0,1,2,3,4,5,6,7",
-        "EventCode": "0xB1",
-        "EventName": "UOPS_EXECUTED.CORE",
-        "PublicDescription": "Number of uops executed from any thread.",
+        "CounterMask": "1",
+        "EventCode": "0xC2",
+        "EventName": "UOPS_RETIRED.STALL_CYCLES",
+        "Invert": "1",
+        "PublicDescription": "This event counts cycles without actually retired uops.",
         "SampleAfterValue": "2000003",
         "UMask": "0x2"
     },
     {
-        "BriefDescription": "Cycles while L2 cache miss demand load is outstanding.",
+        "BriefDescription": "Cycles with less than 10 actually retired uops.",
         "Counter": "0,1,2,3",
         "CounterHTOff": "0,1,2,3,4,5,6,7",
-        "CounterMask": "1",
-        "EventCode": "0xA3",
-        "EventName": "CYCLE_ACTIVITY.CYCLES_L2_MISS",
+        "CounterMask": "10",
+        "EventCode": "0xC2",
+        "EventName": "UOPS_RETIRED.TOTAL_CYCLES",
+        "Invert": "1",
+        "PublicDescription": "Number of cycles using always true condition (uops_ret < 16) applied to non PEBS uops retired event.",
         "SampleAfterValue": "2000003",
-        "UMask": "0x1"
+        "UMask": "0x2"
     }
 ]
 \ No newline at end of file
diff --git a/tools/perf/pmu-events/arch/x86/cascadelakex/uncore-memory.json b/tools/perf/pmu-events/arch/x86/cascadelakex/uncore-memory.json
index 4ba9e6d9f25e..2600fd8d7a54 100644
--- a/tools/perf/pmu-events/arch/x86/cascadelakex/uncore-memory.json
+++ b/tools/perf/pmu-events/arch/x86/cascadelakex/uncore-memory.json
@@ -65,15 +65,6 @@
         "Unit": "iMC"
     },
     {
-        "BriefDescription": "Pre-charge for writes",
-        "Counter": "0,1,2,3",
-        "EventCode": "0x2",
-        "EventName": "UNC_M_PRE_COUNT.WR",
-        "PerPkg": "1",
-        "UMask": "0x8",
-        "Unit": "iMC"
-    },
-    {
         "BriefDescription": "Write requests allocated in the PMM Write Pending Queue for Intel Optane DC persistent memory",
         "Counter": "0,1,2,3",
         "EventCode": "0xE3",
@@ -90,32 +81,32 @@
         "Unit": "iMC"
     },
     {
-        "BriefDescription": "Intel Optane DC persistent memory bandwidth read (MB). Derived from unc_m_pmm_rpq_inserts",
+        "BriefDescription": "Intel Optane DC persistent memory bandwidth read (MB/sec). Derived from unc_m_pmm_rpq_inserts",
         "Counter": "0,1,2,3",
         "EventCode": "0xE3",
         "EventName": "UNC_M_PMM_BANDWIDTH.READ",
         "PerPkg": "1",
-        "ScaleUnit": "6.103515625E-5MB",
+        "ScaleUnit": "6.103515625E-5MB/sec",
         "Unit": "iMC"
     },
     {
-        "BriefDescription": "Intel Optane DC persistent memory bandwidth write (MB). Derived from unc_m_pmm_wpq_inserts",
+        "BriefDescription": "Intel Optane DC persistent memory bandwidth write (MB/sec). Derived from unc_m_pmm_wpq_inserts",
         "Counter": "0,1,2,3",
         "EventCode": "0xE7",
         "EventName": "UNC_M_PMM_BANDWIDTH.WRITE",
         "PerPkg": "1",
-        "ScaleUnit": "6.103515625E-5MB",
+        "ScaleUnit": "6.103515625E-5MB/sec",
         "Unit": "iMC"
     },
     {
-        "BriefDescription": "Intel Optane DC persistent memory bandwidth total (MB). Derived from unc_m_pmm_rpq_inserts",
+        "BriefDescription": "Intel Optane DC persistent memory bandwidth total (MB/sec). Derived from unc_m_pmm_rpq_inserts",
         "Counter": "0,1,2,3",
         "EventCode": "0xE3",
         "EventName": "UNC_M_PMM_BANDWIDTH.TOTAL",
         "MetricExpr": "UNC_M_PMM_RPQ_INSERTS + UNC_M_PMM_WPQ_INSERTS",
         "MetricName": "UNC_M_PMM_BANDWIDTH.TOTAL",
         "PerPkg": "1",
-        "ScaleUnit": "6.103515625E-5MB",
+        "ScaleUnit": "6.103515625E-5MB/sec",
         "Unit": "iMC"
     },
     {
diff --git a/tools/perf/pmu-events/arch/x86/cascadelakex/uncore-other.json b/tools/perf/pmu-events/arch/x86/cascadelakex/uncore-other.json
index 0cd083839e75..3be09986ce8b 100644
--- a/tools/perf/pmu-events/arch/x86/cascadelakex/uncore-other.json
+++ b/tools/perf/pmu-events/arch/x86/cascadelakex/uncore-other.json
@@ -104,15 +104,6 @@
         "Unit": "CHA"
     },
     {
-        "BriefDescription": "write requests from remote home agent",
-        "Counter": "0,1,2,3",
-        "EventCode": "0x50",
-        "EventName": "UNC_CHA_REQUESTS.WRITES_REMOTE",
-        "PerPkg": "1",
-        "UMask": "0x08",
-        "Unit": "CHA"
-    },
-    {
         "BriefDescription": "UPI interconnect send bandwidth for payload. Derived from unc_upi_txl_flits.all_data",
         "Counter": "0,1,2,3",
         "EventCode": "0x2",
@@ -544,7 +535,7 @@
         "EventName": "UNC_CHA_TOR_INSERTS.IA_MISS_DRD",
         "Filter": "config1=0x40433",
         "PerPkg": "1",
-        "PublicDescription": "TOR Inserts : DRds issued by iA Cores that Missed the LLC : Counts the number of entries successfully inserted into the TOR that match qualifications specified by the subevent.   Does not include addressless requests such as locks and interrupts.",
+        "PublicDescription": "TOR Inserts : DRds issued by iA Cores that Missed the LLC : Counts the number of entries successfuly inserted into the TOR that match qualifications specified by the subevent.   Does not include addressless requests such as locks and interrupts.",
         "UMask": "0x21",
         "Unit": "CHA"
     },
@@ -568,6 +559,98 @@
         "Unit": "IIO"
     },
     {
+        "BriefDescription": "PCIe Completion Buffer Inserts of completions with data: Part 0",
+        "Counter": "0,1,2,3",
+        "EventCode": "0xC2",
+        "EventName": "UNC_IIO_COMP_BUF_INSERTS.CMPD.PART0",
+        "FCMask": "0x4",
+        "PerPkg": "1",
+        "PortMask": "0x01",
+        "PublicDescription": "PCIe Completion Buffer Inserts of completions with data: Part 0",
+        "UMask": "0x03",
+        "Unit": "IIO"
+    },
+    {
+        "BriefDescription": "PCIe Completion Buffer Inserts of completions with data: Part 1",
+        "Counter": "0,1,2,3",
+        "EventCode": "0xC2",
+        "EventName": "UNC_IIO_COMP_BUF_INSERTS.CMPD.PART1",
+        "FCMask": "0x4",
+        "PerPkg": "1",
+        "PortMask": "0x02",
+        "PublicDescription": "PCIe Completion Buffer Inserts of completions with data: Part 1",
+        "UMask": "0x03",
+        "Unit": "IIO"
+    },
+    {
+        "BriefDescription": "PCIe Completion Buffer Inserts of completions with data: Part 2",
+        "Counter": "0,1,2,3",
+        "EventCode": "0xC2",
+        "EventName": "UNC_IIO_COMP_BUF_INSERTS.CMPD.PART2",
+        "FCMask": "0x4",
+        "PerPkg": "1",
+        "PortMask": "0x04",
+        "PublicDescription": "PCIe Completion Buffer Inserts of completions with data: Part 2",
+        "UMask": "0x03",
+        "Unit": "IIO"
+    },
+    {
+        "BriefDescription": "PCIe Completion Buffer Inserts of completions with data: Part 3",
+        "Counter": "0,1,2,3",
+        "EventCode": "0xC2",
+        "EventName": "UNC_IIO_COMP_BUF_INSERTS.CMPD.PART3",
+        "FCMask": "0x4",
+        "PerPkg": "1",
+        "PortMask": "0x08",
+        "PublicDescription": "PCIe Completion Buffer Inserts of completions with data: Part 3",
+        "UMask": "0x03",
+        "Unit": "IIO"
+    },
+    {
+        "BriefDescription": "PCIe Completion Buffer occupancy of completions with data: Part 0",
+        "Counter": "2,3",
+        "EventCode": "0xD5",
+        "EventName": "UNC_IIO_COMP_BUF_OCCUPANCY.CMPD.PART0",
+        "FCMask": "0x04",
+        "PerPkg": "1",
+        "PublicDescription": "PCIe Completion Buffer occupancy of completions with data: Part 0",
+        "UMask": "0x01",
+        "Unit": "IIO"
+    },
+    {
+        "BriefDescription": "PCIe Completion Buffer occupancy of completions with data: Part 1",
+        "Counter": "2,3",
+        "EventCode": "0xD5",
+        "EventName": "UNC_IIO_COMP_BUF_OCCUPANCY.CMPD.PART1",
+        "FCMask": "0x04",
+        "PerPkg": "1",
+        "PublicDescription": "PCIe Completion Buffer occupancy of completions with data: Part 1",
+        "UMask": "0x02",
+        "Unit": "IIO"
+    },
+    {
+        "BriefDescription": "PCIe Completion Buffer occupancy of completions with data: Part 2",
+        "Counter": "2,3",
+        "EventCode": "0xD5",
+        "EventName": "UNC_IIO_COMP_BUF_OCCUPANCY.CMPD.PART2",
+        "FCMask": "0x04",
+        "PerPkg": "1",
+        "PublicDescription": "PCIe Completion Buffer occupancy of completions with data: Part 2",
+        "UMask": "0x04",
+        "Unit": "IIO"
+    },
+    {
+        "BriefDescription": "PCIe Completion Buffer occupancy of completions with data: Part 3",
+        "Counter": "2,3",
+        "EventCode": "0xD5",
+        "EventName": "UNC_IIO_COMP_BUF_OCCUPANCY.CMPD.PART3",
+        "FCMask": "0x04",
+        "PerPkg": "1",
+        "PublicDescription": "PCIe Completion Buffer occupancy of completions with data: Part 3",
+        "UMask": "0x08",
+        "Unit": "IIO"
+    },
+    {
         "BriefDescription": "Read request for 4 bytes made by the CPU to IIO Part0",
         "Counter": "2,3",
         "EventCode": "0xC0",
@@ -1240,6 +1323,64 @@
         "Unit": "IIO"
     },
     {
+        "BriefDescription": "Total IRP occupancy of inbound read and write requests.",
+        "Counter": "0,1",
+        "EventCode": "0xF",
+        "EventName": "UNC_I_CACHE_TOTAL_OCCUPANCY.MEM",
+        "PerPkg": "1",
+        "PublicDescription": "Total IRP occupancy of inbound read and write requests.  This is effectively the sum of read occupancy and write occupancy.",
+        "UMask": "0x4",
+        "Unit": "IRP"
+    },
+    {
+        "BriefDescription": "PCIITOM request issued by the IRP unit to the mesh with the intention of writing a full cacheline.",
+        "Counter": "0,1",
+        "EventCode": "0x10",
+        "EventName": "UNC_I_COHERENT_OPS.PCITOM",
+        "PerPkg": "1",
+        "PublicDescription": "PCIITOM request issued by the IRP unit to the mesh with the intention of writing a full cacheline to coherent memory, without a RFO.  PCIITOM is a speculative Invalidate to Modified command that requests ownership of the cacheline and does not move data from the mesh to IRP cache.",
+        "UMask": "0x10",
+        "Unit": "IRP"
+    },
+    {
+        "BriefDescription": "RFO request issued by the IRP unit to the mesh with the intention of writing a partial cacheline.",
+        "Counter": "0,1",
+        "EventCode": "0x10",
+        "EventName": "UNC_I_COHERENT_OPS.RFO",
+        "PerPkg": "1",
+        "PublicDescription": "RFO request issued by the IRP unit to the mesh with the intention of writing a partial cacheline to coherent memory.  RFO is a Read For Ownership command that requests ownership of the cacheline and moves data from the mesh to IRP cache.",
+        "UMask": "0x8",
+        "Unit": "IRP"
+    },
+    {
+        "BriefDescription": "Inbound read requests received by the IRP and inserted into the FAF queue.",
+        "Counter": "0,1",
+        "EventCode": "0x18",
+        "EventName": "UNC_I_FAF_INSERTS",
+        "PerPkg": "1",
+        "PublicDescription": "Inbound read requests to coherent memory, received by the IRP and inserted into the Fire and Forget queue (FAF), a queue used for processing inbound reads in the IRP.",
+        "Unit": "IRP"
+    },
+    {
+        "BriefDescription": "Occupancy of the IRP FAF queue.",
+        "Counter": "0,1",
+        "EventCode": "0x19",
+        "EventName": "UNC_I_FAF_OCCUPANCY",
+        "PerPkg": "1",
+        "PublicDescription": "Occupancy of the IRP Fire and Forget (FAF) queue, a queue used for processing inbound reads in the IRP.",
+        "Unit": "IRP"
+    },
+    {
+        "BriefDescription": "Inbound write (fast path) requests received by the IRP.",
+        "Counter": "0,1",
+        "EventCode": "0x11",
+        "EventName": "UNC_I_TRANSACTIONS.WR_PREF",
+        "PerPkg": "1",
+        "PublicDescription": "Inbound write (fast path) requests to coherent memory, received by the IRP resulting in write ownership requests issued by IRP to the mesh.",
+        "UMask": "0x8",
+        "Unit": "IRP"
+    },
+    {
         "BriefDescription": "Traffic in which the M2M to iMC Bypass was not taken",
         "Counter": "0,1,2,3",
         "EventCode": "0x22",
diff --git a/tools/perf/pmu-events/arch/x86/cascadelakex/virtual-memory.json b/tools/perf/pmu-events/arch/x86/cascadelakex/virtual-memory.json
index d13b4111eb52..792ca39f013a 100644
--- a/tools/perf/pmu-events/arch/x86/cascadelakex/virtual-memory.json
+++ b/tools/perf/pmu-events/arch/x86/cascadelakex/virtual-memory.json
@@ -1,35 +1,5 @@
 [
     {
-        "BriefDescription": "Page walk completed due to a demand data store to a 2M/4M page",
-        "Counter": "0,1,2,3",
-        "CounterHTOff": "0,1,2,3,4,5,6,7",
-        "EventCode": "0x49",
-        "EventName": "DTLB_STORE_MISSES.WALK_COMPLETED_2M_4M",
-        "PublicDescription": "Counts page walks completed due to demand data stores whose address translations missed in the TLB and were mapped to 2M/4M pages.  The page walks can end with or without a page fault.",
-        "SampleAfterValue": "100003",
-        "UMask": "0x4"
-    },
-    {
-        "BriefDescription": "Counts 1 per cycle for each PMH that is busy with a page walk for an instruction fetch request. EPT page walk duration are excluded in Skylake.",
-        "Counter": "0,1,2,3",
-        "CounterHTOff": "0,1,2,3,4,5,6,7",
-        "EventCode": "0x85",
-        "EventName": "ITLB_MISSES.WALK_PENDING",
-        "PublicDescription": "Counts 1 per cycle for each PMH (Page Miss Handler) that is busy with a page walk for an instruction fetch request. EPT page walk duration are excluded in Skylake michroarchitecture.",
-        "SampleAfterValue": "100003",
-        "UMask": "0x10"
-    },
-    {
-        "BriefDescription": "Code miss in all TLB levels causes a page walk that completes. (All page sizes)",
-        "Counter": "0,1,2,3",
-        "CounterHTOff": "0,1,2,3,4,5,6,7",
-        "EventCode": "0x85",
-        "EventName": "ITLB_MISSES.WALK_COMPLETED",
-        "PublicDescription": "Counts completed page walks (2M and 4M page sizes) caused by a code fetch. This implies it missed in the ITLB and further levels of TLB. The page walk can end with or without a fault.",
-        "SampleAfterValue": "100003",
-        "UMask": "0xe"
-    },
-    {
         "BriefDescription": "Load misses in all DTLB levels that cause page walks",
         "Counter": "0,1,2,3",
         "CounterHTOff": "0,1,2,3,4,5,6,7",
@@ -40,13 +10,13 @@
         "UMask": "0x1"
     },
     {
-        "BriefDescription": "STLB flush attempts",
+        "BriefDescription": "Loads that miss the DTLB and hit the STLB.",
         "Counter": "0,1,2,3",
         "CounterHTOff": "0,1,2,3,4,5,6,7",
-        "EventCode": "0xBD",
-        "EventName": "TLB_FLUSH.STLB_ANY",
-        "PublicDescription": "Counts the number of any STLB flush attempts (such as entire, VPID, PCID, InvPage, CR3 write, etc.).",
-        "SampleAfterValue": "100007",
+        "EventCode": "0x08",
+        "EventName": "DTLB_LOAD_MISSES.STLB_HIT",
+        "PublicDescription": "Counts loads that miss the DTLB (Data TLB) and hit the STLB (Second level TLB).",
+        "SampleAfterValue": "2000003",
         "UMask": "0x20"
     },
     {
@@ -61,44 +31,34 @@
         "UMask": "0x10"
     },
     {
-        "BriefDescription": "Page walk completed due to a demand data store to a 1G page",
-        "Counter": "0,1,2,3",
-        "CounterHTOff": "0,1,2,3,4,5,6,7",
-        "EventCode": "0x49",
-        "EventName": "DTLB_STORE_MISSES.WALK_COMPLETED_1G",
-        "PublicDescription": "Counts page walks completed due to demand data stores whose address translations missed in the TLB and were mapped to 1G pages.  The page walks can end with or without a page fault.",
-        "SampleAfterValue": "100003",
-        "UMask": "0x8"
-    },
-    {
-        "BriefDescription": "Store misses in all DTLB levels that cause page walks",
+        "BriefDescription": "Load miss in all TLB levels causes a page walk that completes. (All page sizes)",
         "Counter": "0,1,2,3",
         "CounterHTOff": "0,1,2,3,4,5,6,7",
-        "EventCode": "0x49",
-        "EventName": "DTLB_STORE_MISSES.MISS_CAUSES_A_WALK",
-        "PublicDescription": "Counts demand data stores that caused a page walk of any page size (4K/2M/4M/1G). This implies it missed in all TLB levels, but the walk need not have completed.",
+        "EventCode": "0x08",
+        "EventName": "DTLB_LOAD_MISSES.WALK_COMPLETED",
+        "PublicDescription": "Counts completed page walks  (all page sizes) caused by demand data loads. This implies it missed in the DTLB and further levels of TLB. The page walk can end with or without a fault.",
         "SampleAfterValue": "100003",
-        "UMask": "0x1"
+        "UMask": "0xe"
     },
     {
-        "BriefDescription": "Flushing of the Instruction TLB (ITLB) pages, includes 4k/2M/4M pages.",
+        "BriefDescription": "Page walk completed due to a demand data load to a 1G page",
         "Counter": "0,1,2,3",
         "CounterHTOff": "0,1,2,3,4,5,6,7",
-        "EventCode": "0xAE",
-        "EventName": "ITLB.ITLB_FLUSH",
-        "PublicDescription": "Counts the number of flushes of the big or small ITLB pages. Counting include both TLB Flush (covering all sets) and TLB Set Clear (set-specific).",
-        "SampleAfterValue": "100007",
-        "UMask": "0x1"
+        "EventCode": "0x08",
+        "EventName": "DTLB_LOAD_MISSES.WALK_COMPLETED_1G",
+        "PublicDescription": "Counts completed page walks  (1G sizes) caused by demand data loads. This implies address translations missed in the DTLB and further levels of TLB. The page walk can end with or without a fault.",
+        "SampleAfterValue": "2000003",
+        "UMask": "0x8"
     },
     {
-        "BriefDescription": "Counts 1 per cycle for each PMH that is busy with a page walk for a store. EPT page walk duration are excluded in Skylake.",
+        "BriefDescription": "Page walk completed due to a demand data load to a 2M/4M page",
         "Counter": "0,1,2,3",
         "CounterHTOff": "0,1,2,3,4,5,6,7",
-        "EventCode": "0x49",
-        "EventName": "DTLB_STORE_MISSES.WALK_PENDING",
-        "PublicDescription": "Counts 1 per cycle for each PMH that is busy with a page walk for a store. EPT page walk duration are excluded in Skylake microarchitecture.",
+        "EventCode": "0x08",
+        "EventName": "DTLB_LOAD_MISSES.WALK_COMPLETED_2M_4M",
+        "PublicDescription": "Counts completed page walks  (2M/4M sizes) caused by demand data loads. This implies address translations missed in the DTLB and further levels of TLB. The page walk can end with or without a fault.",
         "SampleAfterValue": "2000003",
-        "UMask": "0x10"
+        "UMask": "0x4"
     },
     {
         "BriefDescription": "Page walk completed due to a demand data load to a 4K page",
@@ -106,21 +66,11 @@
         "CounterHTOff": "0,1,2,3,4,5,6,7",
         "EventCode": "0x08",
         "EventName": "DTLB_LOAD_MISSES.WALK_COMPLETED_4K",
-        "PublicDescription": "Counts page walks completed due to demand data loads whose address translations missed in the TLB and were mapped to 4K pages.  The page walks can end with or without a page fault.",
+        "PublicDescription": "Counts completed page walks  (4K sizes) caused by demand data loads. This implies address translations missed in the DTLB and further levels of TLB. The page walk can end with or without a fault.",
         "SampleAfterValue": "2000003",
         "UMask": "0x2"
     },
     {
-        "BriefDescription": "Page walk completed due to a demand data load to a 2M/4M page",
-        "Counter": "0,1,2,3",
-        "CounterHTOff": "0,1,2,3,4,5,6,7",
-        "EventCode": "0x08",
-        "EventName": "DTLB_LOAD_MISSES.WALK_COMPLETED_2M_4M",
-        "PublicDescription": "Counts page walks completed due to demand data loads whose address translations missed in the TLB and were mapped to 2M/4M pages.  The page walks can end with or without a page fault.",
-        "SampleAfterValue": "2000003",
-        "UMask": "0x4"
-    },
-    {
         "BriefDescription": "Counts 1 per cycle for each PMH that is busy with a page walk for a load. EPT page walk duration are excluded in Skylake.",
         "Counter": "0,1,2,3",
         "CounterHTOff": "0,1,2,3,4,5,6,7",
@@ -131,14 +81,24 @@
         "UMask": "0x10"
     },
     {
-        "BriefDescription": "Counts 1 per cycle for each PMH that is busy with a EPT (Extended Page Table) walk for any request type.",
+        "BriefDescription": "Store misses in all DTLB levels that cause page walks",
         "Counter": "0,1,2,3",
         "CounterHTOff": "0,1,2,3,4,5,6,7",
-        "EventCode": "0x4F",
-        "EventName": "EPT.WALK_PENDING",
-        "PublicDescription": "Counts cycles for each PMH (Page Miss Handler) that is busy with an EPT (Extended Page Table) walk for any request type.",
-        "SampleAfterValue": "2000003",
-        "UMask": "0x10"
+        "EventCode": "0x49",
+        "EventName": "DTLB_STORE_MISSES.MISS_CAUSES_A_WALK",
+        "PublicDescription": "Counts demand data stores that caused a page walk of any page size (4K/2M/4M/1G). This implies it missed in all TLB levels, but the walk need not have completed.",
+        "SampleAfterValue": "100003",
+        "UMask": "0x1"
+    },
+    {
+        "BriefDescription": "Stores that miss the DTLB and hit the STLB.",
+        "Counter": "0,1,2,3",
+        "CounterHTOff": "0,1,2,3,4,5,6,7",
+        "EventCode": "0x49",
+        "EventName": "DTLB_STORE_MISSES.STLB_HIT",
+        "PublicDescription": "Stores that miss the DTLB (Data TLB) and hit the STLB (2nd Level TLB).",
+        "SampleAfterValue": "100003",
+        "UMask": "0x20"
     },
     {
         "BriefDescription": "Cycles when at least one PMH is busy with a page walk for a store. EPT page walk duration are excluded in Skylake.",
@@ -152,34 +112,34 @@
         "UMask": "0x10"
     },
     {
-        "BriefDescription": "Misses at all ITLB levels that cause page walks",
+        "BriefDescription": "Store misses in all TLB levels causes a page walk that completes. (All page sizes)",
         "Counter": "0,1,2,3",
         "CounterHTOff": "0,1,2,3,4,5,6,7",
-        "EventCode": "0x85",
-        "EventName": "ITLB_MISSES.MISS_CAUSES_A_WALK",
-        "PublicDescription": "Counts page walks of any page size (4K/2M/4M/1G) caused by a code fetch. This implies it missed in the ITLB and further levels of TLB, but the walk need not have completed.",
+        "EventCode": "0x49",
+        "EventName": "DTLB_STORE_MISSES.WALK_COMPLETED",
+        "PublicDescription": "Counts completed page walks  (all page sizes) caused by demand data stores. This implies it missed in the DTLB and further levels of TLB. The page walk can end with or without a fault.",
         "SampleAfterValue": "100003",
-        "UMask": "0x1"
+        "UMask": "0xe"
     },
     {
-        "BriefDescription": "Stores that miss the DTLB and hit the STLB.",
+        "BriefDescription": "Page walk completed due to a demand data store to a 1G page",
         "Counter": "0,1,2,3",
         "CounterHTOff": "0,1,2,3,4,5,6,7",
         "EventCode": "0x49",
-        "EventName": "DTLB_STORE_MISSES.STLB_HIT",
-        "PublicDescription": "Stores that miss the DTLB (Data TLB) and hit the STLB (2nd Level TLB).",
+        "EventName": "DTLB_STORE_MISSES.WALK_COMPLETED_1G",
+        "PublicDescription": "Counts completed page walks  (1G sizes) caused by demand data stores. This implies address translations missed in the DTLB and further levels of TLB. The page walk can end with or without a fault.",
         "SampleAfterValue": "100003",
-        "UMask": "0x20"
+        "UMask": "0x8"
     },
     {
-        "BriefDescription": "Load miss in all TLB levels causes a page walk that completes. (All page sizes)",
+        "BriefDescription": "Page walk completed due to a demand data store to a 2M/4M page",
         "Counter": "0,1,2,3",
         "CounterHTOff": "0,1,2,3,4,5,6,7",
-        "EventCode": "0x08",
-        "EventName": "DTLB_LOAD_MISSES.WALK_COMPLETED",
-        "PublicDescription": "Counts demand data loads that caused a completed page walk of any page size (4K/2M/4M/1G). This implies it missed in all TLB levels. The page walk can end with or without a fault.",
+        "EventCode": "0x49",
+        "EventName": "DTLB_STORE_MISSES.WALK_COMPLETED_2M_4M",
+        "PublicDescription": "Counts completed page walks  (2M/4M sizes) caused by demand data stores. This implies address translations missed in the DTLB and further levels of TLB. The page walk can end with or without a fault.",
         "SampleAfterValue": "100003",
-        "UMask": "0xe"
+        "UMask": "0x4"
     },
     {
         "BriefDescription": "Page walk completed due to a demand data store to a 4K page",
@@ -187,29 +147,49 @@
         "CounterHTOff": "0,1,2,3,4,5,6,7",
         "EventCode": "0x49",
         "EventName": "DTLB_STORE_MISSES.WALK_COMPLETED_4K",
-        "PublicDescription": "Counts page walks completed due to demand data stores whose address translations missed in the TLB and were mapped to 4K pages.  The page walks can end with or without a page fault.",
+        "PublicDescription": "Counts completed page walks  (4K sizes) caused by demand data stores. This implies address translations missed in the DTLB and further levels of TLB. The page walk can end with or without a fault.",
         "SampleAfterValue": "100003",
         "UMask": "0x2"
     },
     {
-        "BriefDescription": "Code miss in all TLB levels causes a page walk that completes. (4K)",
+        "BriefDescription": "Counts 1 per cycle for each PMH that is busy with a page walk for a store. EPT page walk duration are excluded in Skylake.",
         "Counter": "0,1,2,3",
         "CounterHTOff": "0,1,2,3,4,5,6,7",
-        "EventCode": "0x85",
-        "EventName": "ITLB_MISSES.WALK_COMPLETED_4K",
-        "PublicDescription": "Counts completed page walks (4K page size) caused by a code fetch. This implies it missed in the ITLB and further levels of TLB. The page walk can end with or without a fault.",
-        "SampleAfterValue": "100003",
-        "UMask": "0x2"
+        "EventCode": "0x49",
+        "EventName": "DTLB_STORE_MISSES.WALK_PENDING",
+        "PublicDescription": "Counts 1 per cycle for each PMH that is busy with a page walk for a store. EPT page walk duration are excluded in Skylake microarchitecture.",
+        "SampleAfterValue": "2000003",
+        "UMask": "0x10"
     },
     {
-        "BriefDescription": "Code miss in all TLB levels causes a page walk that completes. (1G)",
+        "BriefDescription": "Counts 1 per cycle for each PMH that is busy with a EPT (Extended Page Table) walk for any request type.",
+        "Counter": "0,1,2,3",
+        "CounterHTOff": "0,1,2,3,4,5,6,7",
+        "EventCode": "0x4f",
+        "EventName": "EPT.WALK_PENDING",
+        "PublicDescription": "Counts cycles for each PMH (Page Miss Handler) that is busy with an EPT (Extended Page Table) walk for any request type.",
+        "SampleAfterValue": "2000003",
+        "UMask": "0x10"
+    },
+    {
+        "BriefDescription": "Flushing of the Instruction TLB (ITLB) pages, includes 4k/2M/4M pages.",
+        "Counter": "0,1,2,3",
+        "CounterHTOff": "0,1,2,3,4,5,6,7",
+        "EventCode": "0xAE",
+        "EventName": "ITLB.ITLB_FLUSH",
+        "PublicDescription": "Counts the number of flushes of the big or small ITLB pages. Counting include both TLB Flush (covering all sets) and TLB Set Clear (set-specific).",
+        "SampleAfterValue": "100007",
+        "UMask": "0x1"
+    },
+    {
+        "BriefDescription": "Misses at all ITLB levels that cause page walks",
         "Counter": "0,1,2,3",
         "CounterHTOff": "0,1,2,3,4,5,6,7",
         "EventCode": "0x85",
-        "EventName": "ITLB_MISSES.WALK_COMPLETED_1G",
-        "PublicDescription": "Counts store misses in all DTLB levels that cause a completed page walk (1G page size). The page walk can end with or without a fault.",
+        "EventName": "ITLB_MISSES.MISS_CAUSES_A_WALK",
+        "PublicDescription": "Counts page walks of any page size (4K/2M/4M/1G) caused by a code fetch. This implies it missed in the ITLB and further levels of TLB, but the walk need not have completed.",
         "SampleAfterValue": "100003",
-        "UMask": "0x8"
+        "UMask": "0x1"
     },
     {
         "BriefDescription": "Instruction fetch requests that miss the ITLB and hit the STLB.",
@@ -221,13 +201,34 @@
         "UMask": "0x20"
     },
     {
-        "BriefDescription": "Page walk completed due to a demand data load to a 1G page",
+        "BriefDescription": "Cycles when at least one PMH is busy with a page walk for code (instruction fetch) request. EPT page walk duration are excluded in Skylake.",
         "Counter": "0,1,2,3",
         "CounterHTOff": "0,1,2,3,4,5,6,7",
-        "EventCode": "0x08",
-        "EventName": "DTLB_LOAD_MISSES.WALK_COMPLETED_1G",
-        "PublicDescription": "Counts page walks completed due to demand data loads whose address translations missed in the TLB and were mapped to 4K pages.  The page walks can end with or without a page fault.",
-        "SampleAfterValue": "2000003",
+        "CounterMask": "1",
+        "EventCode": "0x85",
+        "EventName": "ITLB_MISSES.WALK_ACTIVE",
+        "PublicDescription": "Cycles when at least one PMH is busy with a page walk for code (instruction fetch) request. EPT page walk duration are excluded in Skylake microarchitecture.",
+        "SampleAfterValue": "100003",
+        "UMask": "0x10"
+    },
+    {
+        "BriefDescription": "Code miss in all TLB levels causes a page walk that completes. (All page sizes)",
+        "Counter": "0,1,2,3",
+        "CounterHTOff": "0,1,2,3,4,5,6,7",
+        "EventCode": "0x85",
+        "EventName": "ITLB_MISSES.WALK_COMPLETED",
+        "PublicDescription": "Counts completed page walks (all page sizes) caused by a code fetch. This implies it missed in the ITLB (Instruction TLB) and further levels of TLB. The page walk can end with or without a fault.",
+        "SampleAfterValue": "100003",
+        "UMask": "0xe"
+    },
+    {
+        "BriefDescription": "Code miss in all TLB levels causes a page walk that completes. (1G)",
+        "Counter": "0,1,2,3",
+        "CounterHTOff": "0,1,2,3,4,5,6,7",
+        "EventCode": "0x85",
+        "EventName": "ITLB_MISSES.WALK_COMPLETED_1G",
+        "PublicDescription": "Counts completed page walks (1G page sizes) caused by a code fetch. This implies it missed in the ITLB (Instruction TLB) and further levels of TLB. The page walk can end with or without a fault.",
+        "SampleAfterValue": "100003",
         "UMask": "0x8"
     },
     {
@@ -236,42 +237,31 @@
         "CounterHTOff": "0,1,2,3,4,5,6,7",
         "EventCode": "0x85",
         "EventName": "ITLB_MISSES.WALK_COMPLETED_2M_4M",
-        "PublicDescription": "Counts code misses in all ITLB levels that caused a completed page walk (2M and 4M page sizes). The page walk can end with or without a fault.",
+        "PublicDescription": "Counts completed page walks (2M/4M page sizes) caused by a code fetch. This implies it missed in the ITLB (Instruction TLB) and further levels of TLB. The page walk can end with or without a fault.",
         "SampleAfterValue": "100003",
         "UMask": "0x4"
     },
     {
-        "BriefDescription": "Store misses in all TLB levels causes a page walk that completes. (All page sizes)",
+        "BriefDescription": "Code miss in all TLB levels causes a page walk that completes. (4K)",
         "Counter": "0,1,2,3",
         "CounterHTOff": "0,1,2,3,4,5,6,7",
-        "EventCode": "0x49",
-        "EventName": "DTLB_STORE_MISSES.WALK_COMPLETED",
-        "PublicDescription": "Counts demand data stores that caused a completed page walk of any page size (4K/2M/4M/1G). This implies it missed in all TLB levels. The page walk can end with or without a fault.",
+        "EventCode": "0x85",
+        "EventName": "ITLB_MISSES.WALK_COMPLETED_4K",
+        "PublicDescription": "Counts completed page walks (4K page sizes) caused by a code fetch. This implies it missed in the ITLB (Instruction TLB) and further levels of TLB. The page walk can end with or without a fault.",
         "SampleAfterValue": "100003",
-        "UMask": "0xe"
+        "UMask": "0x2"
     },
     {
-        "BriefDescription": "Cycles when at least one PMH is busy with a page walk for code (instruction fetch) request. EPT page walk duration are excluded in Skylake.",
+        "BriefDescription": "Counts 1 per cycle for each PMH that is busy with a page walk for an instruction fetch request. EPT page walk duration are excluded in Skylake.",
         "Counter": "0,1,2,3",
         "CounterHTOff": "0,1,2,3,4,5,6,7",
-        "CounterMask": "1",
         "EventCode": "0x85",
-        "EventName": "ITLB_MISSES.WALK_ACTIVE",
-        "PublicDescription": "Cycles when at least one PMH is busy with a page walk for code (instruction fetch) request. EPT page walk duration are excluded in Skylake microarchitecture.",
+        "EventName": "ITLB_MISSES.WALK_PENDING",
+        "PublicDescription": "Counts 1 per cycle for each PMH (Page Miss Handler) that is busy with a page walk for an instruction fetch request. EPT page walk duration are excluded in Skylake michroarchitecture.",
         "SampleAfterValue": "100003",
         "UMask": "0x10"
     },
     {
-        "BriefDescription": "Loads that miss the DTLB and hit the STLB.",
-        "Counter": "0,1,2,3",
-        "CounterHTOff": "0,1,2,3,4,5,6,7",
-        "EventCode": "0x08",
-        "EventName": "DTLB_LOAD_MISSES.STLB_HIT",
-        "PublicDescription": "Counts loads that miss the DTLB (Data TLB) and hit the STLB (Second level TLB).",
-        "SampleAfterValue": "2000003",
-        "UMask": "0x20"
-    },
-    {
         "BriefDescription": "DTLB flush attempts of the thread-specific entries",
         "Counter": "0,1,2,3",
         "CounterHTOff": "0,1,2,3,4,5,6,7",
@@ -280,5 +270,15 @@
         "PublicDescription": "Counts the number of DTLB flush attempts of the thread-specific entries.",
         "SampleAfterValue": "100007",
         "UMask": "0x1"
+    },
+    {
+        "BriefDescription": "STLB flush attempts",
+        "Counter": "0,1,2,3",
+        "CounterHTOff": "0,1,2,3,4,5,6,7",
+        "EventCode": "0xBD",
+        "EventName": "TLB_FLUSH.STLB_ANY",
+        "PublicDescription": "Counts the number of any STLB flush attempts (such as entire, VPID, PCID, InvPage, CR3 write, etc.).",
+        "SampleAfterValue": "100007",
+        "UMask": "0x20"
     }
 ]
 \ No newline at end of file
diff --git a/tools/perf/pmu-events/arch/x86/elkhartlake/cache.json b/tools/perf/pmu-events/arch/x86/elkhartlake/cache.json
new file mode 100644
index 000000000000..734be4ea095f
--- /dev/null
+++ b/tools/perf/pmu-events/arch/x86/elkhartlake/cache.json
@@ -0,0 +1,226 @@
+[
+    {
+        "BriefDescription": "Counts the number of first level data cacheline (dirty) evictions caused by misses, stores, and prefetches.",
+        "CollectPEBSRecord": "2",
+        "Counter": "0,1,2,3",
+        "EventCode": "0x51",
+        "EventName": "DL1.DIRTY_EVICTION",
+        "PDIR_COUNTER": "na",
+        "PEBScounters": "0,1,2,3",
+        "PublicDescription": "Counts the number of first level data cacheline (dirty) evictions caused by misses, stores, and prefetches.  Does not count evictions or dirty writebacks caused by snoops.  Does not count a replacement unless a (dirty) line was written back.",
+        "SampleAfterValue": "200003",
+        "UMask": "0x1"
+    },
+    {
+        "BriefDescription": "Counts the number of cacheable memory requests that miss in the LLC. Counts on a per core basis.",
+        "CollectPEBSRecord": "2",
+        "Counter": "0,1,2,3",
+        "EventCode": "0x2e",
+        "EventName": "LONGEST_LAT_CACHE.MISS",
+        "PDIR_COUNTER": "na",
+        "PEBScounters": "0,1,2,3",
+        "PublicDescription": "Counts the number of cacheable memory requests that miss in the Last Level Cache (LLC). If the platform has an L3 cache, the LLC is the L3 cache, otherwise it is the L2 cache. Counts on a per core basis.",
+        "SampleAfterValue": "200003",
+        "UMask": "0x41"
+    },
+    {
+        "BriefDescription": "Counts the number of cacheable memory requests that access the LLC. Counts on a per core basis.",
+        "CollectPEBSRecord": "2",
+        "Counter": "0,1,2,3",
+        "EventCode": "0x2e",
+        "EventName": "LONGEST_LAT_CACHE.REFERENCE",
+        "PDIR_COUNTER": "na",
+        "PEBScounters": "0,1,2,3",
+        "PublicDescription": "Counts the number of cacheable memory requests that access the Last Level Cache (LLC). Requests include demand loads, reads for ownership (RFO), instruction fetches and L1 HW prefetches. If the platform has an L3 cache, the LLC is the L3 cache, otherwise it is the L2 cache. Counts on a per core basis.",
+        "SampleAfterValue": "200003",
+        "UMask": "0x4f"
+    },
+    {
+        "BriefDescription": "Counts the number of cycles the core is stalled due to an instruction cache or TLB miss which hit in DRAM or MMIO (Non-DRAM).",
+        "CollectPEBSRecord": "2",
+        "Counter": "0,1,2,3",
+        "EventCode": "0x34",
+        "EventName": "MEM_BOUND_STALLS.IFETCH_DRAM_HIT",
+        "PDIR_COUNTER": "na",
+        "PEBScounters": "0,1,2,3",
+        "PublicDescription": "Counts the number of cycles a core is stalled due to an instruction cache or translation lookaside buffer (TLB) access which hit in DRAM or MMIO (non-DRAM).",
+        "SampleAfterValue": "200003",
+        "UMask": "0x20"
+    },
+    {
+        "BriefDescription": "Counts the number of cycles the core is stalled due to an instruction cache or TLB miss which hit in the L2 cache.",
+        "CollectPEBSRecord": "2",
+        "Counter": "0,1,2,3",
+        "EventCode": "0x34",
+        "EventName": "MEM_BOUND_STALLS.IFETCH_L2_HIT",
+        "PDIR_COUNTER": "na",
+        "PEBScounters": "0,1,2,3",
+        "PublicDescription": "Counts the number of cycles a core is stalled due to an instruction cache or Translation Lookaside Buffer (TLB) access which hit in the L2 cache.",
+        "SampleAfterValue": "200003",
+        "UMask": "0x8"
+    },
+    {
+        "BriefDescription": "Counts the number of cycles the core is stalled due to an instruction cache or TLB miss which hit in the LLC or other core with HITE/F/M.",
+        "CollectPEBSRecord": "2",
+        "Counter": "0,1,2,3",
+        "EventCode": "0x34",
+        "EventName": "MEM_BOUND_STALLS.IFETCH_LLC_HIT",
+        "PDIR_COUNTER": "na",
+        "PEBScounters": "0,1,2,3",
+        "PublicDescription": "Counts the number of cycles a core is stalled due to an instruction cache or Translation Lookaside Buffer (TLB) access which hit in the Last Level Cache (LLC) or other core with HITE/F/M.",
+        "SampleAfterValue": "200003",
+        "UMask": "0x10"
+    },
+    {
+        "BriefDescription": "Counts the number of cycles the core is stalled due to a demand load miss which hit in DRAM or MMIO (Non-DRAM).",
+        "CollectPEBSRecord": "2",
+        "Counter": "0,1,2,3",
+        "EventCode": "0x34",
+        "EventName": "MEM_BOUND_STALLS.LOAD_DRAM_HIT",
+        "PDIR_COUNTER": "na",
+        "PEBScounters": "0,1,2,3",
+        "SampleAfterValue": "200003",
+        "UMask": "0x4"
+    },
+    {
+        "BriefDescription": "Counts the number of cycles the core is stalled due to a demand load which hit in the L2 cache.",
+        "CollectPEBSRecord": "2",
+        "Counter": "0,1,2,3",
+        "EventCode": "0x34",
+        "EventName": "MEM_BOUND_STALLS.LOAD_L2_HIT",
+        "PDIR_COUNTER": "na",
+        "PEBScounters": "0,1,2,3",
+        "PublicDescription": "Counts the number of cycles a core is stalled due to a demand load which hit in the L2 cache.",
+        "SampleAfterValue": "200003",
+        "UMask": "0x1"
+    },
+    {
+        "BriefDescription": "Counts the number of cycles the core is stalled due to a demand load which hit in the LLC or other core with HITE/F/M.",
+        "CollectPEBSRecord": "2",
+        "Counter": "0,1,2,3",
+        "EventCode": "0x34",
+        "EventName": "MEM_BOUND_STALLS.LOAD_LLC_HIT",
+        "PDIR_COUNTER": "na",
+        "PEBScounters": "0,1,2,3",
+        "PublicDescription": "Counts the number of cycles a core is stalled due to a demand load which hit in the Last Level Cache (LLC) or other core with HITE/F/M.",
+        "SampleAfterValue": "200003",
+        "UMask": "0x2"
+    },
+    {
+        "BriefDescription": "Counts the number of cycles a core is stalled due to a store buffer being full.",
+        "CollectPEBSRecord": "2",
+        "Counter": "0,1,2,3",
+        "EventCode": "0x34",
+        "EventName": "MEM_BOUND_STALLS.STORE_BUFFER_FULL",
+        "PDIR_COUNTER": "na",
+        "PEBScounters": "0,1,2,3",
+        "SampleAfterValue": "200003",
+        "UMask": "0x40"
+    },
+    {
+        "BriefDescription": "Counts the number of load ops retired that hit in DRAM.",
+        "Counter": "0,1,2,3",
+        "Data_LA": "1",
+        "EventCode": "0xd1",
+        "EventName": "MEM_LOAD_UOPS_RETIRED.DRAM_HIT",
+        "PEBScounters": "0,1,2,3",
+        "SampleAfterValue": "200003",
+        "UMask": "0x80"
+    },
+    {
+        "BriefDescription": "Counts the number of load uops retired that hit in the L1 data cache.",
+        "CollectPEBSRecord": "2",
+        "Counter": "0,1,2,3",
+        "Data_LA": "1",
+        "EventCode": "0xd1",
+        "EventName": "MEM_LOAD_UOPS_RETIRED.L1_HIT",
+        "PEBS": "1",
+        "PEBScounters": "0,1,2,3",
+        "SampleAfterValue": "200003",
+        "UMask": "0x1"
+    },
+    {
+        "BriefDescription": "Counts the number of load uops retired that miss in the L1 data cache.",
+        "CollectPEBSRecord": "2",
+        "Counter": "0,1,2,3",
+        "Data_LA": "1",
+        "EventCode": "0xd1",
+        "EventName": "MEM_LOAD_UOPS_RETIRED.L1_MISS",
+        "PEBS": "1",
+        "PEBScounters": "0,1,2,3",
+        "SampleAfterValue": "200003",
+        "UMask": "0x8"
+    },
+    {
+        "BriefDescription": "Counts the number of load uops retired that hit in the L2 cache.",
+        "CollectPEBSRecord": "2",
+        "Counter": "0,1,2,3",
+        "Data_LA": "1",
+        "EventCode": "0xd1",
+        "EventName": "MEM_LOAD_UOPS_RETIRED.L2_HIT",
+        "PEBS": "1",
+        "PEBScounters": "0,1,2,3",
+        "SampleAfterValue": "200003",
+        "UMask": "0x2"
+    },
+    {
+        "BriefDescription": "Counts the number of load uops retired that miss in the L2 cache.",
+        "CollectPEBSRecord": "2",
+        "Counter": "0,1,2,3",
+        "Data_LA": "1",
+        "EventCode": "0xd1",
+        "EventName": "MEM_LOAD_UOPS_RETIRED.L2_MISS",
+        "PEBS": "1",
+        "PEBScounters": "0,1,2,3",
+        "SampleAfterValue": "200003",
+        "UMask": "0x10"
+    },
+    {
+        "BriefDescription": "Counts the number of load uops retired that hit in the L3 cache.",
+        "CollectPEBSRecord": "2",
+        "Counter": "0,1,2,3",
+        "EventCode": "0xd1",
+        "EventName": "MEM_LOAD_UOPS_RETIRED.L3_HIT",
+        "PEBS": "1",
+        "PEBScounters": "0,1,2,3",
+        "SampleAfterValue": "200003",
+        "UMask": "0x4"
+    },
+    {
+        "BriefDescription": "Counts the number of load uops retired.",
+        "CollectPEBSRecord": "2",
+        "Counter": "0,1,2,3",
+        "Data_LA": "1",
+        "EventCode": "0xd0",
+        "EventName": "MEM_UOPS_RETIRED.ALL_LOADS",
+        "PEBS": "1",
+        "PEBScounters": "0,1,2,3",
+        "PublicDescription": "Counts the total number of load uops retired.",
+        "SampleAfterValue": "200003",
+        "UMask": "0x81"
+    },
+    {
+        "BriefDescription": "Counts the number of store uops retired.",
+        "CollectPEBSRecord": "2",
+        "Counter": "0,1,2,3",
+        "Data_LA": "1",
+        "EventCode": "0xd0",
+        "EventName": "MEM_UOPS_RETIRED.ALL_STORES",
+        "PEBS": "1",
+        "PEBScounters": "0,1,2,3",
+        "PublicDescription": "Counts the total number of store uops retired.",
+        "SampleAfterValue": "200003",
+        "UMask": "0x82"
+    },
+    {
+        "BriefDescription": "Counts the number of issue slots every cycle that were not delivered by the frontend due to instruction cache misses.",
+        "CollectPEBSRecord": "2",
+        "Counter": "0,1,2,3",
+        "EventCode": "0x71",
+        "EventName": "TOPDOWN_FE_BOUND.ICACHE",
+        "PDIR_COUNTER": "na",
+        "PEBScounters": "0,1,2,3",
+        "SampleAfterValue": "1000003",
+        "UMask": "0x20"
+    }
+]
+\ No newline at end of file
diff --git a/tools/perf/pmu-events/arch/x86/elkhartlake/ehl-metrics.json b/tools/perf/pmu-events/arch/x86/elkhartlake/ehl-metrics.json
new file mode 100644
index 000000000000..b6f7126be1fd
--- /dev/null
+++ b/tools/perf/pmu-events/arch/x86/elkhartlake/ehl-metrics.json
@@ -0,0 +1,57 @@
+[
+    {
+        "MetricExpr": "INST_RETIRED.ANY / cycles",
+        "BriefDescription": "Instructions Per Cycle (per Logical Processor)",
+        "MetricName": "IPC"
+    },
+    {
+        "MetricExpr": "1 / IPC",
+        "BriefDescription": "Cycles Per Instruction (per Logical Processor)",
+        "MetricName": "CPI"
+    },
+    {
+        "MetricExpr": "cycles",
+        "BriefDescription": "Per-Logical Processor actual clocks when the Logical Processor is active.",
+        "MetricName": "CLKS"
+    },
+    {
+        "MetricExpr": "INST_RETIRED.ANY / BR_MISP_RETIRED.ALL_BRANCHES",
+        "BriefDescription": "Number of Instructions per non-speculative Branch Misprediction (JEClear)",
+        "MetricName": "IpMispredict"
+    },
+    {
+        "MetricExpr": "INST_RETIRED.ANY / BR_INST_RETIRED.ALL_BRANCHES",
+        "BriefDescription": "Instructions per Branch (lower number means higher occurrence rate)",
+        "MetricName": "IpBranch"
+    },
+    {
+        "MetricExpr": "INST_RETIRED.ANY",
+        "BriefDescription": "Total number of retired Instructions",
+        "MetricName": "Instructions"
+    },
+    {
+        "MetricExpr": "64 * LONGEST_LAT_CACHE.MISS / 1000000000 ",
+        "BriefDescription": "Average per-core data fill bandwidth to the L3 cache [GB / sec]",
+        "MetricName": "L3_Cache_Fill_BW"
+    },
+    {
+        "MetricExpr": "CPU_CLK_UNHALTED.REF_TSC / msr@tsc@",
+        "BriefDescription": "Average CPU Utilization",
+        "MetricName": "CPU_Utilization"
+    },
+    {
+        "MetricExpr": "(cycles / CPU_CLK_UNHALTED.REF_TSC) * msr@tsc@ / 1000000000 ",
+        "BriefDescription": "Measured Average Frequency for unhalted processors [GHz]",
+        "MetricName": "Average_Frequency"
+    },
+    {
+        "MetricExpr": "cycles / CPU_CLK_UNHALTED.REF_TSC",
+        "BriefDescription": "Average Frequency Utilization relative nominal frequency",
+        "MetricName": "Turbo_Utilization"
+    },
+    {
+        "MetricExpr": "cycles:k / cycles",
+        "BriefDescription": "Fraction of cycles spent in the Operating System (OS) Kernel mode",
+        "MetricName": "Kernel_Utilization"
+    }
+]
diff --git a/tools/perf/pmu-events/arch/x86/elkhartlake/floating-point.json b/tools/perf/pmu-events/arch/x86/elkhartlake/floating-point.json
new file mode 100644
index 000000000000..2515b9aa6e66
--- /dev/null
+++ b/tools/perf/pmu-events/arch/x86/elkhartlake/floating-point.json
@@ -0,0 +1,24 @@
+[
+    {
+        "BriefDescription": "Counts the number of cycles the floating point divider is busy.  Does not imply a stall waiting for the divider.",
+        "CollectPEBSRecord": "2",
+        "Counter": "0,1,2,3",
+        "EventCode": "0xcd",
+        "EventName": "CYCLES_DIV_BUSY.FPDIV",
+        "PDIR_COUNTER": "na",
+        "PEBScounters": "0,1,2,3",
+        "SampleAfterValue": "200003",
+        "UMask": "0x2"
+    },
+    {
+        "BriefDescription": "Counts the number of floating point divide uops retired (x87 and SSE, including x87 sqrt).",
+        "CollectPEBSRecord": "2",
+        "Counter": "0,1,2,3",
+        "EventCode": "0xc2",
+        "EventName": "UOPS_RETIRED.FPDIV",
+        "PEBS": "1",
+        "PEBScounters": "0,1,2,3",
+        "SampleAfterValue": "2000003",
+        "UMask": "0x8"
+    }
+]
+\ No newline at end of file
diff --git a/tools/perf/pmu-events/arch/x86/elkhartlake/frontend.json b/tools/perf/pmu-events/arch/x86/elkhartlake/frontend.json
new file mode 100644
index 000000000000..b7b8cb7bd868
--- /dev/null
+++ b/tools/perf/pmu-events/arch/x86/elkhartlake/frontend.json
@@ -0,0 +1,93 @@
+[
+    {
+        "BriefDescription": "Counts the total number of BACLEARS.",
+        "CollectPEBSRecord": "2",
+        "Counter": "0,1,2,3",
+        "EventCode": "0xe6",
+        "EventName": "BACLEARS.ANY",
+        "PDIR_COUNTER": "na",
+        "PEBScounters": "0,1,2,3",
+        "PublicDescription": "Counts the total number of BACLEARS, which occur when the Branch Target Buffer (BTB) prediction or lack thereof, was corrected by a later branch predictor in the frontend.  Includes BACLEARS due to all branch types including conditional and unconditional jumps, returns, and indirect branches.",
+        "SampleAfterValue": "200003",
+        "UMask": "0x1"
+    },
+    {
+        "BriefDescription": "Counts the number of BACLEARS due to a conditional jump.",
+        "CollectPEBSRecord": "2",
+        "Counter": "0,1,2,3",
+        "EventCode": "0xe6",
+        "EventName": "BACLEARS.COND",
+        "PDIR_COUNTER": "na",
+        "PEBScounters": "0,1,2,3",
+        "SampleAfterValue": "200003",
+        "UMask": "0x10"
+    },
+    {
+        "BriefDescription": "Counts the number of BACLEARS due to an indirect branch.",
+        "CollectPEBSRecord": "2",
+        "Counter": "0,1,2,3",
+        "EventCode": "0xe6",
+        "EventName": "BACLEARS.INDIRECT",
+        "PDIR_COUNTER": "na",
+        "PEBScounters": "0,1,2,3",
+        "SampleAfterValue": "200003",
+        "UMask": "0x2"
+    },
+    {
+        "BriefDescription": "Counts the number of BACLEARS due to a return branch.",
+        "CollectPEBSRecord": "2",
+        "Counter": "0,1,2,3",
+        "EventCode": "0xe6",
+        "EventName": "BACLEARS.RETURN",
+        "PDIR_COUNTER": "na",
+        "PEBScounters": "0,1,2,3",
+        "SampleAfterValue": "200003",
+        "UMask": "0x8"
+    },
+    {
+        "BriefDescription": "Counts the number of BACLEARS due to a non-indirect, non-conditional jump.",
+        "CollectPEBSRecord": "2",
+        "Counter": "0,1,2,3",
+        "EventCode": "0xe6",
+        "EventName": "BACLEARS.UNCOND",
+        "PDIR_COUNTER": "na",
+        "PEBScounters": "0,1,2,3",
+        "SampleAfterValue": "200003",
+        "UMask": "0x4"
+    },
+    {
+        "BriefDescription": "Counts the number of times a decode restriction reduces the decode throughput due to wrong instruction length prediction.",
+        "CollectPEBSRecord": "2",
+        "Counter": "0,1,2,3",
+        "EventCode": "0xe9",
+        "EventName": "DECODE_RESTRICTION.PREDECODE_WRONG",
+        "PDIR_COUNTER": "na",
+        "PEBScounters": "0,1,2,3",
+        "SampleAfterValue": "200003",
+        "UMask": "0x1"
+    },
+    {
+        "BriefDescription": "Counts the number of requests to the instruction cache for one or more bytes of a cache line.",
+        "CollectPEBSRecord": "2",
+        "Counter": "0,1,2,3",
+        "EventCode": "0x80",
+        "EventName": "ICACHE.ACCESSES",
+        "PDIR_COUNTER": "na",
+        "PEBScounters": "0,1,2,3",
+        "PublicDescription": "Counts the total number of requests to the instruction cache.  The event only counts new cache line accesses, so that multiple back to back fetches to the exact same cache line or byte chunk count as one.  Specifically, the event counts when accesses from sequential code crosses the cache line boundary, or when a branch target is moved to a new line or to a non-sequential byte chunk of the same line.",
+        "SampleAfterValue": "200003",
+        "UMask": "0x3"
+    },
+    {
+        "BriefDescription": "Counts the number of instruction cache misses.",
+        "CollectPEBSRecord": "2",
+        "Counter": "0,1,2,3",
+        "EventCode": "0x80",
+        "EventName": "ICACHE.MISSES",
+        "PDIR_COUNTER": "na",
+        "PEBScounters": "0,1,2,3",
+        "PublicDescription": "Counts the number of missed requests to the instruction cache.  The event only counts new cache line accesses, so that multiple back to back fetches to the exact same cache line and byte chunk count as one.  Specifically, the event counts when accesses from sequential code crosses the cache line boundary, or when a branch target is moved to a new line or to a non-sequential byte chunk of the same line.",
+        "SampleAfterValue": "200003",
+        "UMask": "0x2"
+    }
+]
+\ No newline at end of file
diff --git a/tools/perf/pmu-events/arch/x86/elkhartlake/memory.json b/tools/perf/pmu-events/arch/x86/elkhartlake/memory.json
new file mode 100644
index 000000000000..4e4eab23a300
--- /dev/null
+++ b/tools/perf/pmu-events/arch/x86/elkhartlake/memory.json
@@ -0,0 +1,86 @@
+[
+    {
+        "BriefDescription": "Counts the number of memory ordering machine clears triggered by a snoop from an external agent.",
+        "CollectPEBSRecord": "2",
+        "Counter": "0,1,2,3",
+        "EventCode": "0xc3",
+        "EventName": "MACHINE_CLEARS.MEMORY_ORDERING",
+        "PDIR_COUNTER": "na",
+        "PEBScounters": "0,1,2,3",
+        "PublicDescription": "Counts the number of memory ordering machine clears triggered by a snoop from an external agent. Does not count internally generated machine clears such as those due to disambiguations.",
+        "SampleAfterValue": "20003",
+        "UMask": "0x2"
+    },
+    {
+        "BriefDescription": "Counts cacheable demand data reads, L1 data cache hardware prefetches and software prefetches (except PREFETCHW) that were not supplied by the L3 cache.",
+        "Counter": "0,1,2,3",
+        "EventCode": "0XB7",
+        "EventName": "OCR.DEMAND_DATA_AND_L1PF_RD.L3_MISS",
+        "MSRIndex": "0x1a6,0x1a7",
+        "MSRValue": "0x2104000001",
+        "Offcore": "1",
+        "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
+        "SampleAfterValue": "100003",
+        "UMask": "0x1"
+    },
+    {
+        "BriefDescription": "Counts cacheable demand data reads, L1 data cache hardware prefetches and software prefetches (except PREFETCHW) that were not supplied by the L3 cache.",
+        "Counter": "0,1,2,3",
+        "EventCode": "0XB7",
+        "EventName": "OCR.DEMAND_DATA_AND_L1PF_RD.L3_MISS_LOCAL",
+        "MSRIndex": "0x1a6,0x1a7",
+        "MSRValue": "0x2104000001",
+        "Offcore": "1",
+        "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
+        "SampleAfterValue": "100003",
+        "UMask": "0x1"
+    },
+    {
+        "BriefDescription": "This event is deprecated. Refer to new event OCR.DEMAND_DATA_AND_L1PF_RD.L3_MISS",
+        "Counter": "0,1,2,3",
+        "EventCode": "0XB7",
+        "EventName": "OCR.DEMAND_DATA_RD.L3_MISS",
+        "MSRIndex": "0x1a6,0x1a7",
+        "MSRValue": "0x2104000001",
+        "Offcore": "1",
+        "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
+        "SampleAfterValue": "100003",
+        "UMask": "0x1"
+    },
+    {
+        "BriefDescription": "This event is deprecated. Refer to new event OCR.DEMAND_DATA_AND_L1PF_RD.L3_MISS_LOCAL",
+        "Counter": "0,1,2,3",
+        "EventCode": "0XB7",
+        "EventName": "OCR.DEMAND_DATA_RD.L3_MISS_LOCAL",
+        "MSRIndex": "0x1a6,0x1a7",
+        "MSRValue": "0x2104000001",
+        "Offcore": "1",
+        "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
+        "SampleAfterValue": "100003",
+        "UMask": "0x1"
+    },
+    {
+        "BriefDescription": "Counts demand reads for ownership (RFO) and software prefetches for exclusive ownership (PREFETCHW) that were not supplied by the L3 cache.",
+        "Counter": "0,1,2,3",
+        "EventCode": "0XB7",
+        "EventName": "OCR.DEMAND_RFO.L3_MISS",
+        "MSRIndex": "0x1a6,0x1a7",
+        "MSRValue": "0x2104000002",
+        "Offcore": "1",
+        "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
+        "SampleAfterValue": "100003",
+        "UMask": "0x1"
+    },
+    {
+        "BriefDescription": "Counts demand reads for ownership (RFO) and software prefetches for exclusive ownership (PREFETCHW) that were not supplied by the L3 cache.",
+        "Counter": "0,1,2,3",
+        "EventCode": "0XB7",
+        "EventName": "OCR.DEMAND_RFO.L3_MISS_LOCAL",
+        "MSRIndex": "0x1a6,0x1a7",
+        "MSRValue": "0x2104000002",
+        "Offcore": "1",
+        "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
+        "SampleAfterValue": "100003",
+        "UMask": "0x1"
+    }
+]
+\ No newline at end of file
diff --git a/tools/perf/pmu-events/arch/x86/elkhartlake/other.json b/tools/perf/pmu-events/arch/x86/elkhartlake/other.json
new file mode 100644
index 000000000000..627691404155
--- /dev/null
+++ b/tools/perf/pmu-events/arch/x86/elkhartlake/other.json
@@ -0,0 +1,424 @@
+[
+    {
+        "BriefDescription": "Counts the total number of BTCLEARS.",
+        "CollectPEBSRecord": "2",
+        "Counter": "0,1,2,3",
+        "EventCode": "0xe8",
+        "EventName": "BTCLEAR.ANY",
+        "PDIR_COUNTER": "na",
+        "PEBScounters": "0,1,2,3",
+        "PublicDescription": "Counts the total number of BTCLEARS which occurs when the Branch Target Buffer (BTB) predicts a taken branch.",
+        "SampleAfterValue": "200003"
+    },
+    {
+        "BriefDescription": "This event is deprecated. Refer to new event BUS_LOCK.SELF_LOCKS",
+        "CollectPEBSRecord": "2",
+        "Counter": "0,1,2,3",
+        "EdgeDetect": "1",
+        "EventCode": "0x63",
+        "EventName": "BUS_LOCK.ALL",
+        "PDIR_COUNTER": "na",
+        "PEBScounters": "0,1,2,3",
+        "SampleAfterValue": "200003"
+    },
+    {
+        "BriefDescription": "Counts the number of unhalted cycles a core is blocked due to an accepted lock issued by other cores.",
+        "CollectPEBSRecord": "2",
+        "Counter": "0,1,2,3",
+        "EventCode": "0x63",
+        "EventName": "BUS_LOCK.BLOCK_CYCLES",
+        "PEBScounters": "0,1,2,3",
+        "PublicDescription": "Counts the number of unhalted cycles a core is blocked due to an accepted lock issued by other cores. Counts on a per core basis.",
+        "SampleAfterValue": "200003",
+        "UMask": "0x2"
+    },
+    {
+        "BriefDescription": "This event is deprecated. Refer to new event BUS_LOCK.BLOCK_CYCLES",
+        "CollectPEBSRecord": "2",
+        "Counter": "0,1,2,3",
+        "EventCode": "0x63",
+        "EventName": "BUS_LOCK.CYCLES_OTHER_BLOCK",
+        "PDIR_COUNTER": "na",
+        "PEBScounters": "0,1,2,3",
+        "SampleAfterValue": "200003",
+        "UMask": "0x2"
+    },
+    {
+        "BriefDescription": "This event is deprecated. Refer to new event BUS_LOCK.LOCK_CYCLES",
+        "CollectPEBSRecord": "2",
+        "Counter": "0,1,2,3",
+        "EventCode": "0x63",
+        "EventName": "BUS_LOCK.CYCLES_SELF_BLOCK",
+        "PDIR_COUNTER": "na",
+        "PEBScounters": "0,1,2,3",
+        "SampleAfterValue": "200003",
+        "UMask": "0x1"
+    },
+    {
+        "BriefDescription": "Counts the number of unhalted cycles a core is blocked due to an accepted lock it issued.",
+        "CollectPEBSRecord": "2",
+        "Counter": "0,1,2,3",
+        "EventCode": "0x63",
+        "EventName": "BUS_LOCK.LOCK_CYCLES",
+        "PEBScounters": "0,1,2,3",
+        "PublicDescription": "Counts the number of unhalted cycles a core is blocked due to an accepted lock it issued. Counts on a per core basis.",
+        "SampleAfterValue": "200003",
+        "UMask": "0x1"
+    },
+    {
+        "BriefDescription": "Counts the number of bus locks a core issued its self (e.g. lock to UC or Split Lock) and does not include cache locks.",
+        "CollectPEBSRecord": "2",
+        "Counter": "0,1,2,3",
+        "EdgeDetect": "1",
+        "EventCode": "0x63",
+        "EventName": "BUS_LOCK.SELF_LOCKS",
+        "PEBScounters": "0,1,2,3",
+        "PublicDescription": "Counts the number of bus locks a core issued its self (e.g. lock to UC or Split Lock) and does not include cache locks. Counts on a per core basis.",
+        "SampleAfterValue": "200003"
+    },
+    {
+        "BriefDescription": "This event is deprecated. Refer to new event MEM_BOUND_STALLS.LOAD_DRAM_HIT",
+        "CollectPEBSRecord": "2",
+        "Counter": "0,1,2,3",
+        "EventCode": "0x34",
+        "EventName": "C0_STALLS.LOAD_DRAM_HIT",
+        "PDIR_COUNTER": "na",
+        "PEBScounters": "0,1,2,3",
+        "SampleAfterValue": "200003",
+        "UMask": "0x4"
+    },
+    {
+        "BriefDescription": "This event is deprecated. Refer to new event MEM_BOUND_STALLS.LOAD_L2_HIT",
+        "CollectPEBSRecord": "2",
+        "Counter": "0,1,2,3",
+        "EventCode": "0x34",
+        "EventName": "C0_STALLS.LOAD_L2_HIT",
+        "PDIR_COUNTER": "na",
+        "PEBScounters": "0,1,2,3",
+        "SampleAfterValue": "200003",
+        "UMask": "0x1"
+    },
+    {
+        "BriefDescription": "This event is deprecated. Refer to new event MEM_BOUND_STALLS.LOAD_LLC_HIT",
+        "CollectPEBSRecord": "2",
+        "Counter": "0,1,2,3",
+        "EventCode": "0x34",
+        "EventName": "C0_STALLS.LOAD_LLC_HIT",
+        "PDIR_COUNTER": "na",
+        "PEBScounters": "0,1,2,3",
+        "SampleAfterValue": "200003",
+        "UMask": "0x2"
+    },
+    {
+        "BriefDescription": "Counts the number of core cycles during which interrupts are masked (disabled).",
+        "CollectPEBSRecord": "2",
+        "Counter": "0,1,2,3",
+        "EventCode": "0xcb",
+        "EventName": "HW_INTERRUPTS.MASKED",
+        "PDIR_COUNTER": "na",
+        "PEBScounters": "0,1,2,3",
+        "PublicDescription": "Counts the number of core cycles during which interrupts are masked (disabled). Increments by 1 each core cycle that EFLAGS.IF is 0, regardless of whether interrupts are pending or not.",
+        "SampleAfterValue": "200003",
+        "UMask": "0x2"
+    },
+    {
+        "BriefDescription": "Counts the number of core cycles during which there are pending interrupts while interrupts are masked (disabled).",
+        "CollectPEBSRecord": "2",
+        "Counter": "0,1,2,3",
+        "EventCode": "0xcb",
+        "EventName": "HW_INTERRUPTS.PENDING_AND_MASKED",
+        "PDIR_COUNTER": "na",
+        "PEBScounters": "0,1,2,3",
+        "PublicDescription": "Counts the number of core cycles during which there are pending interrupts while interrupts are masked (disabled). Increments by 1 each core cycle that both EFLAGS.IF is 0 and an INTR is pending (which means the APIC is telling the ROB to cause an INTR). This event does not increment if EFLAGS.IF is 0 but all interrupt in the APICs Interrupt Request Register (IRR) are inhibited by the PPR (thus either by ISRV or TPR)  because in these cases the interrupts would be held up in the APIC and would not be pended to the ROB. This event does count when an interrupt is only inhibited by MOV/POP SS state machines or the STI state machine. These extra inhibits only last for a single instructions and would not be important.",
+        "SampleAfterValue": "200003",
+        "UMask": "0x4"
+    },
+    {
+        "BriefDescription": "Counts the number of hardware interrupts received by the processor.",
+        "CollectPEBSRecord": "2",
+        "Counter": "0,1,2,3",
+        "EventCode": "0xcb",
+        "EventName": "HW_INTERRUPTS.RECEIVED",
+        "PDIR_COUNTER": "na",
+        "PEBScounters": "0,1,2,3",
+        "SampleAfterValue": "203",
+        "UMask": "0x1"
+    },
+    {
+        "BriefDescription": "Counts cacheable demand data reads, L1 data cache hardware prefetches and software prefetches (except PREFETCHW) that have any type of response.",
+        "Counter": "0,1,2,3",
+        "EventCode": "0XB7",
+        "EventName": "OCR.DEMAND_DATA_AND_L1PF_RD.ANY_RESPONSE",
+        "MSRIndex": "0x1a6,0x1a7",
+        "MSRValue": "0x10001",
+        "Offcore": "1",
+        "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
+        "SampleAfterValue": "100003",
+        "UMask": "0x1"
+    },
+    {
+        "BriefDescription": "This event is deprecated. Refer to new event OCR.DEMAND_DATA_AND_L1PF_RD.ANY_RESPONSE",
+        "Counter": "0,1,2,3",
+        "EventCode": "0XB7",
+        "EventName": "OCR.DEMAND_DATA_RD.ANY_RESPONSE",
+        "MSRIndex": "0x1a6,0x1a7",
+        "MSRValue": "0x10001",
+        "Offcore": "1",
+        "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
+        "SampleAfterValue": "100003",
+        "UMask": "0x1"
+    },
+    {
+        "BriefDescription": "Counts demand reads for ownership (RFO) and software prefetches for exclusive ownership (PREFETCHW) that have any type of response.",
+        "Counter": "0,1,2,3",
+        "EventCode": "0XB7",
+        "EventName": "OCR.DEMAND_RFO.ANY_RESPONSE",
+        "MSRIndex": "0x1a6,0x1a7",
+        "MSRValue": "0x10002",
+        "Offcore": "1",
+        "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
+        "SampleAfterValue": "100003",
+        "UMask": "0x1"
+    },
+    {
+        "BriefDescription": "Counts the total number of issue slots that were not consumed by the backend because allocation is stalled due to a mispredicted jump or a machine clear.",
+        "CollectPEBSRecord": "2",
+        "Counter": "0,1,2,3",
+        "EventCode": "0x73",
+        "EventName": "TOPDOWN_BAD_SPECULATION.ALL",
+        "PDIR_COUNTER": "na",
+        "PEBScounters": "0,1,2,3",
+        "PublicDescription": "Counts the total number of issue slots that were not consumed by the backend because allocation is stalled due to a mispredicted jump or a machine clear. Only issue slots wasted due to fast nukes such as memory ordering nukes are counted. Other nukes are not accounted for. Counts all issue slots blocked during this recovery window including relevant microcode flows and while uops are not yet available in the instruction queue (IQ). Also includes the issue slots that were consumed by the backend but were thrown away because they were younger than the mispredict or machine clear.",
+        "SampleAfterValue": "1000003",
+        "UMask": "0x6"
+    },
+    {
+        "BriefDescription": "Counts the number of issue slots every cycle that were not consumed by the backend due to fast nukes such as memory ordering and memory disambiguation machine clears.",
+        "Counter": "0,1,2,3",
+        "EventCode": "0x73",
+        "EventName": "TOPDOWN_BAD_SPECULATION.FASTNUKE",
+        "PDIR_COUNTER": "na",
+        "PEBScounters": "0,1,2,3",
+        "SampleAfterValue": "1000003",
+        "UMask": "0x2"
+    },
+    {
+        "BriefDescription": "Counts the total number of issue slots that were not consumed by the backend because allocation is stalled due to a machine clear (nuke) of any kind including memory ordering and memory disambiguation.",
+        "Counter": "0,1,2,3",
+        "EventCode": "0x73",
+        "EventName": "TOPDOWN_BAD_SPECULATION.MACHINE_CLEARS",
+        "PDIR_COUNTER": "na",
+        "PEBScounters": "0,1,2,3",
+        "SampleAfterValue": "1000003",
+        "UMask": "0x2"
+    },
+    {
+        "BriefDescription": "Counts the number of issue slots every cycle that were not consumed by the backend due to branch mispredicts.",
+        "CollectPEBSRecord": "2",
+        "Counter": "0,1,2,3",
+        "EventCode": "0x73",
+        "EventName": "TOPDOWN_BAD_SPECULATION.MISPREDICT",
+        "PDIR_COUNTER": "na",
+        "PEBScounters": "0,1,2,3",
+        "SampleAfterValue": "1000003",
+        "UMask": "0x4"
+    },
+    {
+        "BriefDescription": "This event is deprecated. Refer to new event TOPDOWN_BAD_SPECULATION.FASTNUKE",
+        "CollectPEBSRecord": "2",
+        "Counter": "0,1,2,3",
+        "EventCode": "0x73",
+        "EventName": "TOPDOWN_BAD_SPECULATION.MONUKE",
+        "PDIR_COUNTER": "na",
+        "PEBScounters": "0,1,2,3",
+        "SampleAfterValue": "1000003",
+        "UMask": "0x2"
+    },
+    {
+        "BriefDescription": "Counts the total number of issue slots every cycle that were not consumed by the backend due to backend stalls.",
+        "CollectPEBSRecord": "2",
+        "Counter": "0,1,2,3",
+        "EventCode": "0x74",
+        "EventName": "TOPDOWN_BE_BOUND.ALL",
+        "PDIR_COUNTER": "na",
+        "PEBScounters": "0,1,2,3",
+        "SampleAfterValue": "1000003"
+    },
+    {
+        "BriefDescription": "Counts the number of issue slots every cycle that were not consumed by the backend due to certain allocation restrictions.",
+        "CollectPEBSRecord": "2",
+        "Counter": "0,1,2,3",
+        "EventCode": "0x74",
+        "EventName": "TOPDOWN_BE_BOUND.ALLOC_RESTRICTIONS",
+        "PDIR_COUNTER": "na",
+        "PEBScounters": "0,1,2,3",
+        "SampleAfterValue": "1000003",
+        "UMask": "0x1"
+    },
+    {
+        "BriefDescription": "Counts the number of issue slots every cycle that were not consumed by the backend due to memory reservation stalls in which a scheduler is not able to accept uops.",
+        "CollectPEBSRecord": "2",
+        "Counter": "0,1,2,3",
+        "EventCode": "0x74",
+        "EventName": "TOPDOWN_BE_BOUND.MEM_SCHEDULER",
+        "PDIR_COUNTER": "na",
+        "PEBScounters": "0,1,2,3",
+        "SampleAfterValue": "1000003",
+        "UMask": "0x2"
+    },
+    {
+        "BriefDescription": "Counts the number of issue slots every cycle that were not consumed by the backend due to IEC or FPC RAT stalls, which can be due to FIQ or IEC reservation stalls in which the integer, floating point or SIMD scheduler is not able to accept uops.",
+        "CollectPEBSRecord": "2",
+        "Counter": "0,1,2,3",
+        "EventCode": "0x74",
+        "EventName": "TOPDOWN_BE_BOUND.NON_MEM_SCHEDULER",
+        "PDIR_COUNTER": "na",
+        "PEBScounters": "0,1,2,3",
+        "SampleAfterValue": "1000003",
+        "UMask": "0x8"
+    },
+    {
+        "BriefDescription": "Counts the number of issue slots every cycle that were not consumed by the backend due to the physical register file unable to accept an entry (marble stalls).",
+        "CollectPEBSRecord": "2",
+        "Counter": "0,1,2,3",
+        "EventCode": "0x74",
+        "EventName": "TOPDOWN_BE_BOUND.REGISTER",
+        "PDIR_COUNTER": "na",
+        "PEBScounters": "0,1,2,3",
+        "SampleAfterValue": "1000003",
+        "UMask": "0x20"
+    },
+    {
+        "BriefDescription": "Counts the number of issue slots every cycle that were not consumed by the backend due to the reorder buffer being full (ROB stalls).",
+        "CollectPEBSRecord": "2",
+        "Counter": "0,1,2,3",
+        "EventCode": "0x74",
+        "EventName": "TOPDOWN_BE_BOUND.REORDER_BUFFER",
+        "PDIR_COUNTER": "na",
+        "PEBScounters": "0,1,2,3",
+        "SampleAfterValue": "1000003",
+        "UMask": "0x40"
+    },
+    {
+        "BriefDescription": "Counts the number of issue slots every cycle that were not consumed by the backend due to scoreboards from the instruction queue (IQ), jump execution unit (JEU), or microcode sequencer (MS).",
+        "CollectPEBSRecord": "2",
+        "Counter": "0,1,2,3",
+        "EventCode": "0x74",
+        "EventName": "TOPDOWN_BE_BOUND.SERIALIZATION",
+        "PDIR_COUNTER": "na",
+        "PEBScounters": "0,1,2,3",
+        "SampleAfterValue": "1000003",
+        "UMask": "0x10"
+    },
+    {
+        "BriefDescription": "This event is deprecated.",
+        "CollectPEBSRecord": "2",
+        "Counter": "0,1,2,3",
+        "EventCode": "0x74",
+        "EventName": "TOPDOWN_BE_BOUND.STORE_BUFFER",
+        "PDIR_COUNTER": "na",
+        "PEBScounters": "0,1,2,3",
+        "SampleAfterValue": "1000003",
+        "UMask": "0x4"
+    },
+    {
+        "BriefDescription": "Counts the number of issue slots every cycle that were not consumed by the backend due to frontend stalls.",
+        "CollectPEBSRecord": "2",
+        "Counter": "0,1,2,3",
+        "EventCode": "0x71",
+        "EventName": "TOPDOWN_FE_BOUND.ALL",
+        "PDIR_COUNTER": "na",
+        "PEBScounters": "0,1,2,3",
+        "SampleAfterValue": "1000003"
+    },
+    {
+        "BriefDescription": "Counts the number of issue slots every cycle that were not delivered by the frontend due to BACLEARS.",
+        "CollectPEBSRecord": "2",
+        "Counter": "0,1,2,3",
+        "EventCode": "0x71",
+        "EventName": "TOPDOWN_FE_BOUND.BRANCH_DETECT",
+        "PDIR_COUNTER": "na",
+        "PEBScounters": "0,1,2,3",
+        "PublicDescription": "Counts the number of issue slots every cycle that were not delivered by the frontend due to BACLEARS, which occurs when the Branch Target Buffer (BTB) prediction or lack thereof, was corrected by a later branch predictor in the frontend. Includes BACLEARS due to all branch types including conditional and unconditional jumps, returns, and indirect branches.",
+        "SampleAfterValue": "1000003",
+        "UMask": "0x2"
+    },
+    {
+        "BriefDescription": "Counts the number of issue slots every cycle that were not delivered by the frontend due to BTCLEARS.",
+        "CollectPEBSRecord": "2",
+        "Counter": "0,1,2,3",
+        "EventCode": "0x71",
+        "EventName": "TOPDOWN_FE_BOUND.BRANCH_RESTEER",
+        "PDIR_COUNTER": "na",
+        "PEBScounters": "0,1,2,3",
+        "PublicDescription": "Counts the number of issue slots every cycle that were not delivered by the frontend due to BTCLEARS, which occurs when the Branch Target Buffer (BTB) predicts a taken branch.",
+        "SampleAfterValue": "1000003",
+        "UMask": "0x40"
+    },
+    {
+        "BriefDescription": "Counts the number of issue slots every cycle that were not delivered by the frontend due to the microcode sequencer (MS).",
+        "CollectPEBSRecord": "2",
+        "Counter": "0,1,2,3",
+        "EventCode": "0x71",
+        "EventName": "TOPDOWN_FE_BOUND.CISC",
+        "PDIR_COUNTER": "na",
+        "PEBScounters": "0,1,2,3",
+        "SampleAfterValue": "1000003",
+        "UMask": "0x1"
+    },
+    {
+        "BriefDescription": "Counts the number of issue slots every cycle that were not delivered by the frontend due to decode stalls.",
+        "CollectPEBSRecord": "2",
+        "Counter": "0,1,2,3",
+        "EventCode": "0x71",
+        "EventName": "TOPDOWN_FE_BOUND.DECODE",
+        "PDIR_COUNTER": "na",
+        "PEBScounters": "0,1,2,3",
+        "SampleAfterValue": "1000003",
+        "UMask": "0x8"
+    },
+    {
+        "BriefDescription": "Counts the number of issue slots every cycle that were not delivered by the frontend due to ITLB misses.",
+        "CollectPEBSRecord": "2",
+        "Counter": "0,1,2,3",
+        "EventCode": "0x71",
+        "EventName": "TOPDOWN_FE_BOUND.ITLB",
+        "PDIR_COUNTER": "na",
+        "PEBScounters": "0,1,2,3",
+        "PublicDescription": "Counts the number of issue slots every cycle that were not delivered by the frontend due to Instruction Table Lookaside Buffer (ITLB) misses.",
+        "SampleAfterValue": "1000003",
+        "UMask": "0x10"
+    },
+    {
+        "BriefDescription": "Counts the number of issue slots every cycle that were not delivered by the frontend due to other common frontend stalls not categorized.",
+        "CollectPEBSRecord": "2",
+        "Counter": "0,1,2,3",
+        "EventCode": "0x71",
+        "EventName": "TOPDOWN_FE_BOUND.OTHER",
+        "PDIR_COUNTER": "na",
+        "PEBScounters": "0,1,2,3",
+        "SampleAfterValue": "1000003",
+        "UMask": "0x80"
+    },
+    {
+        "BriefDescription": "Counts the number of issue slots every cycle that were not delivered by the frontend due to wrong predecodes.",
+        "CollectPEBSRecord": "2",
+        "Counter": "0,1,2,3",
+        "EventCode": "0x71",
+        "EventName": "TOPDOWN_FE_BOUND.PREDECODE",
+        "PDIR_COUNTER": "na",
+        "PEBScounters": "0,1,2,3",
+        "SampleAfterValue": "1000003",
+        "UMask": "0x4"
+    },
+    {
+        "BriefDescription": "Counts the total number of consumed retirement slots.",
+        "CollectPEBSRecord": "2",
+        "Counter": "0,1,2,3",
+        "EventCode": "0xc2",
+        "EventName": "TOPDOWN_RETIRING.ALL",
+        "PEBS": "1",
+        "PEBScounters": "0,1,2,3",
+        "SampleAfterValue": "1000003"
+    }
+]
+\ No newline at end of file
diff --git a/tools/perf/pmu-events/arch/x86/elkhartlake/pipeline.json b/tools/perf/pmu-events/arch/x86/elkhartlake/pipeline.json
new file mode 100644
index 000000000000..41e5dfad8f51
--- /dev/null
+++ b/tools/perf/pmu-events/arch/x86/elkhartlake/pipeline.json
@@ -0,0 +1,278 @@
+[
+    {
+        "BriefDescription": "Counts the total number of branch instructions retired for all branch types.",
+        "CollectPEBSRecord": "2",
+        "Counter": "0,1,2,3",
+        "EventCode": "0xc4",
+        "EventName": "BR_INST_RETIRED.ALL_BRANCHES",
+        "PEBS": "1",
+        "PEBScounters": "0,1,2,3",
+        "PublicDescription": "Counts the total number of instructions in which the instruction pointer (IP) of the processor is resteered due to a branch instruction and the branch instruction successfully retires.  All branch type instructions are accounted for.",
+        "SampleAfterValue": "200003"
+    },
+    {
+        "BriefDescription": "Counts the number of near CALL branch instructions retired.",
+        "CollectPEBSRecord": "2",
+        "Counter": "0,1,2,3",
+        "EventCode": "0xc4",
+        "EventName": "BR_INST_RETIRED.CALL",
+        "PEBS": "1",
+        "PEBScounters": "0,1,2,3",
+        "SampleAfterValue": "200003",
+        "UMask": "0xf9"
+    },
+    {
+        "BriefDescription": "Counts the number of far branch instructions retired, includes far jump, far call and return, and interrupt call and return.",
+        "CollectPEBSRecord": "2",
+        "Counter": "0,1,2,3",
+        "EventCode": "0xc4",
+        "EventName": "BR_INST_RETIRED.FAR_BRANCH",
+        "PEBS": "1",
+        "PEBScounters": "0,1,2,3",
+        "SampleAfterValue": "200003",
+        "UMask": "0xbf"
+    },
+    {
+        "BriefDescription": "Counts the number of near indirect CALL branch instructions retired.",
+        "CollectPEBSRecord": "2",
+        "Counter": "0,1,2,3",
+        "EventCode": "0xc4",
+        "EventName": "BR_INST_RETIRED.IND_CALL",
+        "PEBS": "1",
+        "PEBScounters": "0,1,2,3",
+        "SampleAfterValue": "200003",
+        "UMask": "0xfb"
+    },
+    {
+        "BriefDescription": "Counts the number of retired JCC (Jump on Conditional Code) branch instructions retired, includes both taken and not taken branches.",
+        "CollectPEBSRecord": "2",
+        "Counter": "0,1,2,3",
+        "EventCode": "0xc4",
+        "EventName": "BR_INST_RETIRED.JCC",
+        "PEBS": "1",
+        "PEBScounters": "0,1,2,3",
+        "SampleAfterValue": "200003",
+        "UMask": "0x7e"
+    },
+    {
+        "BriefDescription": "Counts the number of near indirect JMP and near indirect CALL branch instructions retired.",
+        "CollectPEBSRecord": "2",
+        "Counter": "0,1,2,3",
+        "EventCode": "0xc4",
+        "EventName": "BR_INST_RETIRED.NON_RETURN_IND",
+        "PEBS": "1",
+        "PEBScounters": "0,1,2,3",
+        "SampleAfterValue": "200003",
+        "UMask": "0xeb"
+    },
+    {
+        "BriefDescription": "Counts the number of near relative CALL branch instructions retired.",
+        "CollectPEBSRecord": "2",
+        "Counter": "0,1,2,3",
+        "EventCode": "0xc4",
+        "EventName": "BR_INST_RETIRED.REL_CALL",
+        "PEBS": "1",
+        "PEBScounters": "0,1,2,3",
+        "SampleAfterValue": "200003",
+        "UMask": "0xfd"
+    },
+    {
+        "BriefDescription": "Counts the number of near RET branch instructions retired.",
+        "CollectPEBSRecord": "2",
+        "Counter": "0,1,2,3",
+        "EventCode": "0xc4",
+        "EventName": "BR_INST_RETIRED.RETURN",
+        "PEBS": "1",
+        "PEBScounters": "0,1,2,3",
+        "SampleAfterValue": "200003",
+        "UMask": "0xf7"
+    },
+    {
+        "BriefDescription": "Counts the number of taken JCC (Jump on Conditional Code) branch instructions retired.",
+        "CollectPEBSRecord": "2",
+        "Counter": "0,1,2,3",
+        "EventCode": "0xc4",
+        "EventName": "BR_INST_RETIRED.TAKEN_JCC",
+        "PEBS": "1",
+        "PEBScounters": "0,1,2,3",
+        "SampleAfterValue": "200003",
+        "UMask": "0xfe"
+    },
+    {
+        "BriefDescription": "Counts the total number of mispredicted branch instructions retired for all branch types.",
+        "CollectPEBSRecord": "2",
+        "Counter": "0,1,2,3",
+        "EventCode": "0xc5",
+        "EventName": "BR_MISP_RETIRED.ALL_BRANCHES",
+        "PEBS": "1",
+        "PEBScounters": "0,1,2,3",
+        "PublicDescription": "Counts the total number of mispredicted branch instructions retired.  All branch type instructions are accounted for.  Prediction of the branch target address enables the processor to begin executing instructions before the non-speculative execution path is known. The branch prediction unit (BPU) predicts the target address based on the instruction pointer (IP) of the branch and on the execution path through which execution reached this IP.    A branch misprediction occurs when the prediction is wrong, and results in discarding all instructions executed in the speculative path and re-fetching from the correct path.",
+        "SampleAfterValue": "200003"
+    },
+    {
+        "BriefDescription": "Counts the number of mispredicted near indirect CALL branch instructions retired.",
+        "CollectPEBSRecord": "2",
+        "Counter": "0,1,2,3",
+        "EventCode": "0xc5",
+        "EventName": "BR_MISP_RETIRED.IND_CALL",
+        "PEBS": "1",
+        "PEBScounters": "0,1,2,3",
+        "SampleAfterValue": "200003",
+        "UMask": "0xfb"
+    },
+    {
+        "BriefDescription": "Counts the number of mispredicted JCC (Jump on Conditional Code) branch instructions retired.",
+        "CollectPEBSRecord": "2",
+        "Counter": "0,1,2,3",
+        "EventCode": "0xc5",
+        "EventName": "BR_MISP_RETIRED.JCC",
+        "PEBS": "1",
+        "PEBScounters": "0,1,2,3",
+        "SampleAfterValue": "200003",
+        "UMask": "0x7e"
+    },
+    {
+        "BriefDescription": "Counts the number of mispredicted near RET branch instructions retired.",
+        "CollectPEBSRecord": "2",
+        "Counter": "0,1,2,3",
+        "EventCode": "0xc5",
+        "EventName": "BR_MISP_RETIRED.RETURN",
+        "PEBS": "1",
+        "PEBScounters": "0,1,2,3",
+        "SampleAfterValue": "200003",
+        "UMask": "0xf7"
+    },
+    {
+        "BriefDescription": "Counts the number of mispredicted taken JCC (Jump on Conditional Code) branch instructions retired.",
+        "CollectPEBSRecord": "2",
+        "Counter": "0,1,2,3",
+        "EventCode": "0xc5",
+        "EventName": "BR_MISP_RETIRED.TAKEN_JCC",
+        "PEBS": "1",
+        "PEBScounters": "0,1,2,3",
+        "SampleAfterValue": "200003",
+        "UMask": "0xfe"
+    },
+    {
+        "BriefDescription": "Counts the number of unhalted core clock cycles. (Fixed event)",
+        "CollectPEBSRecord": "2",
+        "Counter": "Fixed counter 1",
+        "EventName": "CPU_CLK_UNHALTED.CORE",
+        "PDIR_COUNTER": "na",
+        "PEBScounters": "33",
+        "PublicDescription": "Counts the number of core cycles while the core is not in a halt state.  The core enters the halt state when it is running the HLT instruction. The core frequency may change from time to time. For this reason this event may have a changing ratio with regards to time. This event uses fixed counter 1.",
+        "SampleAfterValue": "2000003",
+        "UMask": "0x2"
+    },
+    {
+        "BriefDescription": "Counts the number of unhalted core clock cycles.",
+        "CollectPEBSRecord": "2",
+        "Counter": "0,1,2,3",
+        "EventCode": "0x3c",
+        "EventName": "CPU_CLK_UNHALTED.CORE_P",
+        "PDIR_COUNTER": "na",
+        "PEBScounters": "0,1,2,3",
+        "PublicDescription": "Counts the number of core cycles while the core is not in a halt state.  The core enters the halt state when it is running the HLT instruction. The core frequency may change from time to time. For this reason this event may have a changing ratio with regards to time. This event uses a programmable general purpose performance counter.",
+        "SampleAfterValue": "2000003"
+    },
+    {
+        "BriefDescription": "Counts the number of unhalted reference clock cycles at TSC frequency.",
+        "CollectPEBSRecord": "2",
+        "Counter": "0,1,2,3",
+        "EventCode": "0x3c",
+        "EventName": "CPU_CLK_UNHALTED.REF",
+        "PDIR_COUNTER": "na",
+        "PEBScounters": "0,1,2,3",
+        "PublicDescription": "Counts the number of reference cycles that the core is not in a halt state. The core enters the halt state when it is running the HLT instruction. This event is not affected by core frequency changes and increments at a fixed frequency that is also used for the Time Stamp Counter (TSC). This event uses fixed counter 2.",
+        "SampleAfterValue": "2000003",
+        "UMask": "0x1"
+    },
+    {
+        "BriefDescription": "Counts the number of unhalted reference clock cycles at TSC frequency. (Fixed event)",
+        "CollectPEBSRecord": "2",
+        "Counter": "Fixed counter 2",
+        "EventName": "CPU_CLK_UNHALTED.REF_TSC",
+        "PDIR_COUNTER": "na",
+        "PEBScounters": "34",
+        "PublicDescription": "Counts the number of reference cycles that the core is not in a halt state. The core enters the halt state when it is running the HLT instruction. This event is not affected by core frequency changes and increments at a fixed frequency that is also used for the Time Stamp Counter (TSC). This event uses fixed counter 2.",
+        "SampleAfterValue": "2000003",
+        "UMask": "0x3"
+    },
+    {
+        "BriefDescription": "Counts the number of unhalted reference clock cycles at TSC frequency.",
+        "CollectPEBSRecord": "2",
+        "Counter": "0,1,2,3",
+        "EventCode": "0x3c",
+        "EventName": "CPU_CLK_UNHALTED.REF_TSC_P",
+        "PEBScounters": "0,1,2,3",
+        "PublicDescription": "Counts the number of reference cycles that the core is not in a halt state. The core enters the halt state when it is running the HLT instruction.  This event is not affected by core frequency changes and increments at a fixed frequency that is also used for the Time Stamp Counter (TSC). This event uses a programmable general purpose performance counter.",
+        "SampleAfterValue": "2000003",
+        "UMask": "0x1"
+    },
+    {
+        "BriefDescription": "This event is deprecated.",
+        "CollectPEBSRecord": "2",
+        "Counter": "0,1,2,3",
+        "EventCode": "0xcd",
+        "EventName": "CYCLES_DIV_BUSY.ANY",
+        "PDIR_COUNTER": "na",
+        "PEBScounters": "0,1,2,3",
+        "SampleAfterValue": "2000003"
+    },
+    {
+        "BriefDescription": "Counts the number of cycles the integer divider is busy.  Does not imply a stall waiting for the divider.",
+        "CollectPEBSRecord": "2",
+        "Counter": "0,1,2,3",
+        "EventCode": "0xcd",
+        "EventName": "CYCLES_DIV_BUSY.IDIV",
+        "PDIR_COUNTER": "na",
+        "PEBScounters": "0,1,2,3",
+        "SampleAfterValue": "200003",
+        "UMask": "0x1"
+    },
+    {
+        "BriefDescription": "Counts the total number of instructions retired. (Fixed event)",
+        "CollectPEBSRecord": "2",
+        "Counter": "Fixed counter 0",
+        "EventName": "INST_RETIRED.ANY",
+        "PEBS": "1",
+        "PEBScounters": "32",
+        "PublicDescription": "Counts the total number of instructions that retired. For instructions that consist of multiple uops, this event counts the retirement of the last uop of the instruction. This event continues counting during hardware interrupts, traps, and inside interrupt handlers. This event uses fixed counter 0.",
+        "SampleAfterValue": "2000003",
+        "UMask": "0x1"
+    },
+    {
+        "BriefDescription": "Counts the total number of instructions retired.",
+        "CollectPEBSRecord": "2",
+        "Counter": "0,1,2,3",
+        "EventCode": "0xc0",
+        "EventName": "INST_RETIRED.ANY_P",
+        "PEBS": "1",
+        "PEBScounters": "0,1,2,3",
+        "PublicDescription": "Counts the total number of instructions that retired. For instructions that consist of multiple uops, this event counts the retirement of the last uop of the instruction. This event continues counting during hardware interrupts, traps, and inside interrupt handlers. This event uses a programmable general purpose performance counter.",
+        "SampleAfterValue": "2000003"
+    },
+    {
+        "BriefDescription": "Counts the total number of machine clears including memory ordering, memory disambiguation, self-modifying code, page faults and floating point assist.",
+        "CollectPEBSRecord": "2",
+        "Counter": "0,1,2,3",
+        "EventCode": "0xc3",
+        "EventName": "MACHINE_CLEARS.ANY",
+        "PDIR_COUNTER": "na",
+        "PEBScounters": "0,1,2,3",
+        "SampleAfterValue": "20003"
+    },
+    {
+        "BriefDescription": "Counts the number of uops that are from complex flows issued by the micro-sequencer (MS).",
+        "CollectPEBSRecord": "2",
+        "Counter": "0,1,2,3",
+        "EventCode": "0xc2",
+        "EventName": "UOPS_RETIRED.MS",
+        "PDIR_COUNTER": "na",
+        "PEBS": "1",
+        "PEBScounters": "0,1,2,3",
+        "PublicDescription": "Counts the number of uops that are from complex flows issued by the Microcode Sequencer (MS). This includes uops from flows due to complex instructions, faults, assists, and inserted flows.",
+        "SampleAfterValue": "2000003",
+        "UMask": "0x1"
+    }
+]
+\ No newline at end of file
diff --git a/tools/perf/pmu-events/arch/x86/elkhartlake/virtual-memory.json b/tools/perf/pmu-events/arch/x86/elkhartlake/virtual-memory.json
new file mode 100644
index 000000000000..c58b589ff80f
--- /dev/null
+++ b/tools/perf/pmu-events/arch/x86/elkhartlake/virtual-memory.json
@@ -0,0 +1,273 @@
+[
+    {
+        "BriefDescription": "Counts the number of page walks due to loads that miss the PDE (Page Directory Entry) cache.",
+        "CollectPEBSRecord": "2",
+        "Counter": "0,1,2,3",
+        "EventCode": "0x08",
+        "EventName": "DTLB_LOAD_MISSES.PDE_CACHE_MISS",
+        "PDIR_COUNTER": "na",
+        "PEBScounters": "0,1,2,3",
+        "SampleAfterValue": "200003",
+        "UMask": "0x80"
+    },
+    {
+        "BriefDescription": "Counts the number of first level TLB misses but second level hits due to loads that did not start a page walk. Account for all pages sizes. Will result in a DTLB write from STLB.",
+        "CollectPEBSRecord": "2",
+        "Counter": "0,1,2,3",
+        "EventCode": "0x08",
+        "EventName": "DTLB_LOAD_MISSES.STLB_HIT",
+        "PDIR_COUNTER": "na",
+        "PEBScounters": "0,1,2,3",
+        "SampleAfterValue": "200003",
+        "UMask": "0x20"
+    },
+    {
+        "BriefDescription": "Counts the number of page walks completed due to load DTLB misses to any page size.",
+        "CollectPEBSRecord": "2",
+        "Counter": "0,1,2,3",
+        "EventCode": "0x08",
+        "EventName": "DTLB_LOAD_MISSES.WALK_COMPLETED",
+        "PEBScounters": "0,1,2,3",
+        "PublicDescription": "Counts the number of page walks completed due to loads (including SW prefetches) whose address translations missed in all Translation Lookaside Buffer (TLB) levels and were mapped to any page size. Includes page walks that page fault.",
+        "SampleAfterValue": "200003",
+        "UMask": "0xe"
+    },
+    {
+        "BriefDescription": "Counts the number of page walks completed due to load DTLB misses to a 2M or 4M page.",
+        "CollectPEBSRecord": "2",
+        "Counter": "0,1,2,3",
+        "EventCode": "0x08",
+        "EventName": "DTLB_LOAD_MISSES.WALK_COMPLETED_2M_4M",
+        "PDIR_COUNTER": "na",
+        "PEBScounters": "0,1,2,3",
+        "PublicDescription": "Counts the number of page walks completed due to loads (including SW prefetches) whose address translations missed in all Translation Lookaside Buffer (TLB) levels and were mapped to 2M or 4M pages. Includes page walks that page fault.",
+        "SampleAfterValue": "200003",
+        "UMask": "0x4"
+    },
+    {
+        "BriefDescription": "Counts the number of page walks completed due to load DTLB misses to a 4K page.",
+        "CollectPEBSRecord": "2",
+        "Counter": "0,1,2,3",
+        "EventCode": "0x08",
+        "EventName": "DTLB_LOAD_MISSES.WALK_COMPLETED_4K",
+        "PDIR_COUNTER": "na",
+        "PEBScounters": "0,1,2,3",
+        "PublicDescription": "Counts the number of page walks completed due to loads (including SW prefetches) whose address translations missed in all Translation Lookaside Buffer (TLB) levels and were mapped to 4K pages. Includes page walks that page fault.",
+        "SampleAfterValue": "200003",
+        "UMask": "0x2"
+    },
+    {
+        "BriefDescription": "Counts the number of page walks outstanding in the page miss handler (PMH) for loads every cycle.",
+        "CollectPEBSRecord": "2",
+        "Counter": "0,1,2,3",
+        "EventCode": "0x08",
+        "EventName": "DTLB_LOAD_MISSES.WALK_PENDING",
+        "PDIR_COUNTER": "na",
+        "PEBScounters": "0,1,2,3",
+        "PublicDescription": "Counts the number of page walks outstanding in the page miss handler (PMH) for loads every cycle.  A page walk is outstanding from start till PMH becomes idle again (ready to serve next walk). Includes EPT-walk intervals.",
+        "SampleAfterValue": "200003",
+        "UMask": "0x10"
+    },
+    {
+        "BriefDescription": "Counts the number of page walks due to stores that miss the PDE (Page Directory Entry) cache.",
+        "CollectPEBSRecord": "2",
+        "Counter": "0,1,2,3",
+        "EventCode": "0x49",
+        "EventName": "DTLB_STORE_MISSES.PDE_CACHE_MISS",
+        "PDIR_COUNTER": "na",
+        "PEBScounters": "0,1,2,3",
+        "PublicDescription": "Counts the number of page walks due to storse that miss the PDE (Page Directory Entry) cache.",
+        "SampleAfterValue": "2000003",
+        "UMask": "0x80"
+    },
+    {
+        "BriefDescription": "Counts the number of page walks completed due to store DTLB misses to a 2M or 4M page.",
+        "CollectPEBSRecord": "2",
+        "Counter": "0,1,2,3",
+        "EventCode": "0x49",
+        "EventName": "DTLB_STORE_MISSES.WALK_COMPLETED_2M_4M",
+        "PDIR_COUNTER": "na",
+        "PEBScounters": "0,1,2,3",
+        "PublicDescription": "Counts the number of page walks completed due to stores whose address translations missed in all Translation Lookaside Buffer (TLB) levels and were mapped to 2M or 4M pages.  Includes page walks that page fault.",
+        "SampleAfterValue": "2000003",
+        "UMask": "0x4"
+    },
+    {
+        "BriefDescription": "Counts the number of page walks completed due to store DTLB misses to a 4K page.",
+        "CollectPEBSRecord": "2",
+        "Counter": "0,1,2,3",
+        "EventCode": "0x49",
+        "EventName": "DTLB_STORE_MISSES.WALK_COMPLETED_4K",
+        "PDIR_COUNTER": "na",
+        "PEBScounters": "0,1,2,3",
+        "PublicDescription": "Counts the number of page walks completed due to stores whose address translations missed in all Translation Lookaside Buffer (TLB) levels and were mapped to 4K pages.  Includes page walks that page fault.",
+        "SampleAfterValue": "2000003",
+        "UMask": "0x2"
+    },
+    {
+        "BriefDescription": "Counts the number of page walks outstanding in the page miss handler (PMH) for stores every cycle.",
+        "CollectPEBSRecord": "2",
+        "Counter": "0,1,2,3",
+        "EventCode": "0x49",
+        "EventName": "DTLB_STORE_MISSES.WALK_PENDING",
+        "PDIR_COUNTER": "na",
+        "PEBScounters": "0,1,2,3",
+        "PublicDescription": "Counts the number of page walks outstanding in the page miss handler (PMH) for stores every cycle.  A page walk is outstanding from start till PMH becomes idle again (ready to serve next walk). Includes EPT-walk intervals.",
+        "SampleAfterValue": "200003",
+        "UMask": "0x10"
+    },
+    {
+        "BriefDescription": "Counts the number of Extended Page Directory Entry hits.",
+        "CollectPEBSRecord": "2",
+        "Counter": "0,1,2,3",
+        "EventCode": "0x4f",
+        "EventName": "EPT.EPDE_HIT",
+        "PDIR_COUNTER": "na",
+        "PEBScounters": "0,1,2,3",
+        "PublicDescription": "Counts the number of Extended Page Directory Entry hits.  The Extended Page Directory cache is used by Virtual Machine operating systems while the guest operating systems use the standard TLB caches.",
+        "SampleAfterValue": "2000003",
+        "UMask": "0x2"
+    },
+    {
+        "BriefDescription": "Counts the number of Extended Page Directory Entry misses.",
+        "CollectPEBSRecord": "2",
+        "Counter": "0,1,2,3",
+        "EventCode": "0x4f",
+        "EventName": "EPT.EPDE_MISS",
+        "PDIR_COUNTER": "na",
+        "PEBScounters": "0,1,2,3",
+        "PublicDescription": "Counts the number Extended Page Directory Entry misses.  The Extended Page Directory cache is used by Virtual Machine operating systems while the guest operating systems use the standard TLB caches.",
+        "SampleAfterValue": "2000003",
+        "UMask": "0x1"
+    },
+    {
+        "BriefDescription": "Counts the number of Extended Page Directory Pointer Entry hits.",
+        "CollectPEBSRecord": "2",
+        "Counter": "0,1,2,3",
+        "EventCode": "0x4f",
+        "EventName": "EPT.EPDPE_HIT",
+        "PDIR_COUNTER": "na",
+        "PEBScounters": "0,1,2,3",
+        "PublicDescription": "Counts the number Extended Page Directory Pointer Entry hits.  The Extended Page Directory cache is used by Virtual Machine operating systems while the guest operating systems use the standard TLB caches.",
+        "SampleAfterValue": "2000003",
+        "UMask": "0x4"
+    },
+    {
+        "BriefDescription": "Counts the number of Extended Page Directory Pointer Entry misses.",
+        "CollectPEBSRecord": "2",
+        "Counter": "0,1,2,3",
+        "EventCode": "0x4f",
+        "EventName": "EPT.EPDPE_MISS",
+        "PDIR_COUNTER": "na",
+        "PEBScounters": "0,1,2,3",
+        "PublicDescription": "Counts the number Extended Page Directory Pointer Entry misses.  The Extended Page Directory cache is used by Virtual Machine operating systems while the guest operating systems use the standard TLB caches.",
+        "SampleAfterValue": "2000003",
+        "UMask": "0x8"
+    },
+    {
+        "BriefDescription": "Counts the number of times there was an ITLB miss and a new translation was filled into the ITLB.",
+        "CollectPEBSRecord": "2",
+        "Counter": "0,1,2,3",
+        "EventCode": "0x81",
+        "EventName": "ITLB.FILLS",
+        "PDIR_COUNTER": "na",
+        "PEBScounters": "0,1,2,3",
+        "PublicDescription": "Counts the number of times the machine was unable to find a translation in the Instruction Translation Lookaside Buffer (ITLB) and a new translation was filled into the ITLB. The event is speculative in nature, but will not count translations (page walks) that are begun and not finished, or translations that are finished but not filled into the ITLB.",
+        "SampleAfterValue": "200003",
+        "UMask": "0x4"
+    },
+    {
+        "BriefDescription": "Counts the number of page walks due to an instruction fetch that miss the PDE (Page Directory Entry) cache.",
+        "CollectPEBSRecord": "2",
+        "Counter": "0,1,2,3",
+        "EventCode": "0x85",
+        "EventName": "ITLB_MISSES.PDE_CACHE_MISS",
+        "PDIR_COUNTER": "na",
+        "PEBScounters": "0,1,2,3",
+        "SampleAfterValue": "2000003",
+        "UMask": "0x80"
+    },
+    {
+        "BriefDescription": "Counts the number of first level TLB misses but second level hits due to an instruction fetch that did not start a page walk. Account for all pages sizes. Will results in a DTLB write from STLB.",
+        "CollectPEBSRecord": "2",
+        "Counter": "0,1,2,3",
+        "EventCode": "0x85",
+        "EventName": "ITLB_MISSES.STLB_HIT",
+        "PDIR_COUNTER": "na",
+        "PEBScounters": "0,1,2,3",
+        "SampleAfterValue": "2000003",
+        "UMask": "0x20"
+    },
+    {
+        "BriefDescription": "Counts the number of page walks completed due to instruction fetch misses to a 2M or 4M page.",
+        "CollectPEBSRecord": "2",
+        "Counter": "0,1,2,3",
+        "EventCode": "0x85",
+        "EventName": "ITLB_MISSES.WALK_COMPLETED_2M_4M",
+        "PDIR_COUNTER": "na",
+        "PEBScounters": "0,1,2,3",
+        "PublicDescription": "Counts the number of page walks completed due to instruction fetches whose address translations missed in all Translation Lookaside Buffer (TLB) levels and were mapped to 2M or 4M pages.  Includes page walks that page fault.",
+        "SampleAfterValue": "2000003",
+        "UMask": "0x4"
+    },
+    {
+        "BriefDescription": "Counts the number of page walks completed due to instruction fetch misses to a 4K page.",
+        "CollectPEBSRecord": "2",
+        "Counter": "0,1,2,3",
+        "EventCode": "0x85",
+        "EventName": "ITLB_MISSES.WALK_COMPLETED_4K",
+        "PDIR_COUNTER": "na",
+        "PEBScounters": "0,1,2,3",
+        "PublicDescription": "Counts the number of page walks completed due to instruction fetches whose address translations missed in all Translation Lookaside Buffer (TLB) levels and were mapped to 4K pages.  Includes page walks that page fault.",
+        "SampleAfterValue": "2000003",
+        "UMask": "0x2"
+    },
+    {
+        "BriefDescription": "Counts the number of page walks outstanding in the page miss handler (PMH) for instruction fetches every cycle.",
+        "CollectPEBSRecord": "2",
+        "Counter": "0,1,2,3",
+        "EventCode": "0x85",
+        "EventName": "ITLB_MISSES.WALK_PENDING",
+        "PDIR_COUNTER": "na",
+        "PEBScounters": "0,1,2,3",
+        "PublicDescription": "Counts the number of page walks outstanding in the page miss handler (PMH) for instruction fetches every cycle.  A page walk is outstanding from start till PMH becomes idle again (ready to serve next walk).",
+        "SampleAfterValue": "200003",
+        "UMask": "0x10"
+    },
+    {
+        "BriefDescription": "Counts the number of memory retired ops that missed in the second level TLB.",
+        "CollectPEBSRecord": "2",
+        "Counter": "0,1,2,3",
+        "Data_LA": "1",
+        "EventCode": "0xd0",
+        "EventName": "MEM_UOPS_RETIRED.DTLB_MISS",
+        "PEBS": "1",
+        "PEBScounters": "0,1,2,3",
+        "SampleAfterValue": "200003",
+        "UMask": "0x13"
+    },
+    {
+        "BriefDescription": "Counts the number of load ops retired that miss in the second Level TLB.",
+        "CollectPEBSRecord": "2",
+        "Counter": "0,1,2,3",
+        "Data_LA": "1",
+        "EventCode": "0xd0",
+        "EventName": "MEM_UOPS_RETIRED.DTLB_MISS_LOADS",
+        "PEBS": "1",
+        "PEBScounters": "0,1,2,3",
+        "SampleAfterValue": "200003",
+        "UMask": "0x11"
+    },
+    {
+        "BriefDescription": "Counts the number of store ops retired that miss in the second level TLB.",
+        "CollectPEBSRecord": "2",
+        "Counter": "0,1,2,3",
+        "Data_LA": "1",
+        "EventCode": "0xd0",
+        "EventName": "MEM_UOPS_RETIRED.DTLB_MISS_STORES",
+        "PEBS": "1",
+        "PEBScounters": "0,1,2,3",
+        "SampleAfterValue": "200003",
+        "UMask": "0x12"
+    }
+]
+\ No newline at end of file
diff --git a/tools/perf/pmu-events/arch/x86/icelakex/icx-metrics.json b/tools/perf/pmu-events/arch/x86/icelakex/icx-metrics.json
new file mode 100644
index 000000000000..57ddbb9f9b31
--- /dev/null
+++ b/tools/perf/pmu-events/arch/x86/icelakex/icx-metrics.json
@@ -0,0 +1,315 @@
+[
+    {
+        "BriefDescription": "Instructions Per Cycle (per Logical Processor)",
+        "MetricExpr": "INST_RETIRED.ANY / CPU_CLK_UNHALTED.THREAD",
+        "MetricGroup": "Summary",
+        "MetricName": "IPC"
+    },
+    {
+        "BriefDescription": "Uops Per Instruction",
+        "MetricExpr": "UOPS_RETIRED.SLOTS / INST_RETIRED.ANY",
+        "MetricGroup": "Pipeline;Retire",
+        "MetricName": "UPI"
+    },
+    {
+        "BriefDescription": "Instruction per taken branch",
+        "MetricExpr": "INST_RETIRED.ANY / BR_INST_RETIRED.NEAR_TAKEN",
+        "MetricGroup": "Branches;FetchBW;PGO",
+        "MetricName": "IpTB"
+    },
+    {
+        "BriefDescription": "Cycles Per Instruction (per Logical Processor)",
+        "MetricExpr": "1 / (INST_RETIRED.ANY / CPU_CLK_UNHALTED.THREAD)",
+        "MetricGroup": "Pipeline",
+        "MetricName": "CPI"
+    },
+    {
+        "BriefDescription": "Per-Logical Processor actual clocks when the Logical Processor is active.",
+        "MetricExpr": "CPU_CLK_UNHALTED.THREAD",
+        "MetricGroup": "Pipeline",
+        "MetricName": "CLKS"
+    },
+    {
+        "BriefDescription": "Instructions Per Cycle (per physical core)",
+        "MetricExpr": "INST_RETIRED.ANY / CPU_CLK_UNHALTED.DISTRIBUTED",
+        "MetricGroup": "SMT;TmaL1",
+        "MetricName": "CoreIPC"
+    },
+    {
+        "BriefDescription": "Floating Point Operations Per Cycle",
+        "MetricExpr": "( 1 * ( FP_ARITH_INST_RETIRED.SCALAR_SINGLE + FP_ARITH_INST_RETIRED.SCALAR_DOUBLE ) + 2 * FP_ARITH_INST_RETIRED.128B_PACKED_DOUBLE + 4 * ( FP_ARITH_INST_RETIRED.128B_PACKED_SINGLE + FP_ARITH_INST_RETIRED.256B_PACKED_DOUBLE ) + 8 * ( FP_ARITH_INST_RETIRED.256B_PACKED_SINGLE + FP_ARITH_INST_RETIRED.512B_PACKED_DOUBLE ) + 16 * FP_ARITH_INST_RETIRED.512B_PACKED_SINGLE ) / CPU_CLK_UNHALTED.DISTRIBUTED",
+        "MetricGroup": "Flops",
+        "MetricName": "FLOPc"
+    },
+    {
+        "BriefDescription": "Instruction-Level-Parallelism (average number of uops executed when there is at least 1 uop executed)",
+        "MetricExpr": "UOPS_EXECUTED.THREAD / (( UOPS_EXECUTED.CORE_CYCLES_GE_1 / 2 ) if #SMT_on else UOPS_EXECUTED.CORE_CYCLES_GE_1)",
+        "MetricGroup": "Pipeline;PortsUtil",
+        "MetricName": "ILP"
+    },
+    {
+        "BriefDescription": "Number of Instructions per non-speculative Branch Misprediction (JEClear)",
+        "MetricExpr": "INST_RETIRED.ANY / BR_MISP_RETIRED.ALL_BRANCHES",
+        "MetricGroup": "BrMispredicts",
+        "MetricName": "IpMispredict"
+    },
+    {
+        "BriefDescription": "Core actual clocks when any Logical Processor is active on the Physical Core",
+        "MetricExpr": "CPU_CLK_UNHALTED.DISTRIBUTED",
+        "MetricGroup": "SMT",
+        "MetricName": "CORE_CLKS"
+    },
+    {
+        "BriefDescription": "Instructions per Load (lower number means higher occurrence rate)",
+        "MetricExpr": "INST_RETIRED.ANY / MEM_INST_RETIRED.ALL_LOADS",
+        "MetricGroup": "InsType",
+        "MetricName": "IpLoad"
+    },
+    {
+        "BriefDescription": "Instructions per Store (lower number means higher occurrence rate)",
+        "MetricExpr": "INST_RETIRED.ANY / MEM_INST_RETIRED.ALL_STORES",
+        "MetricGroup": "InsType",
+        "MetricName": "IpStore"
+    },
+    {
+        "BriefDescription": "Instructions per Branch (lower number means higher occurrence rate)",
+        "MetricExpr": "INST_RETIRED.ANY / BR_INST_RETIRED.ALL_BRANCHES",
+        "MetricGroup": "Branches;InsType",
+        "MetricName": "IpBranch"
+    },
+    {
+        "BriefDescription": "Instructions per (near) call (lower number means higher occurrence rate)",
+        "MetricExpr": "INST_RETIRED.ANY / BR_INST_RETIRED.NEAR_CALL",
+        "MetricGroup": "Branches",
+        "MetricName": "IpCall"
+    },
+    {
+        "BriefDescription": "Branch instructions per taken branch. ",
+        "MetricExpr": "BR_INST_RETIRED.ALL_BRANCHES / BR_INST_RETIRED.NEAR_TAKEN",
+        "MetricGroup": "Branches;PGO",
+        "MetricName": "BpTkBranch"
+    },
+    {
+        "BriefDescription": "Instructions per Floating Point (FP) Operation (lower number means higher occurrence rate)",
+        "MetricExpr": "INST_RETIRED.ANY / ( 1 * ( FP_ARITH_INST_RETIRED.SCALAR_SINGLE + FP_ARITH_INST_RETIRED.SCALAR_DOUBLE ) + 2 * FP_ARITH_INST_RETIRED.128B_PACKED_DOUBLE + 4 * ( FP_ARITH_INST_RETIRED.128B_PACKED_SINGLE + FP_ARITH_INST_RETIRED.256B_PACKED_DOUBLE ) + 8 * ( FP_ARITH_INST_RETIRED.256B_PACKED_SINGLE + FP_ARITH_INST_RETIRED.512B_PACKED_DOUBLE ) + 16 * FP_ARITH_INST_RETIRED.512B_PACKED_SINGLE )",
+        "MetricGroup": "Flops;FpArith;InsType",
+        "MetricName": "IpFLOP"
+    },
+    {
+        "BriefDescription": "Total number of retired Instructions, Sample with: INST_RETIRED.PREC_DIST",
+        "MetricExpr": "INST_RETIRED.ANY",
+        "MetricGroup": "Summary;TmaL1",
+        "MetricName": "Instructions"
+    },
+    {
+        "BriefDescription": "Fraction of Uops delivered by the LSD (Loop Stream Detector; aka Loop Cache)",
+        "MetricExpr": "LSD.UOPS / (IDQ.DSB_UOPS + LSD.UOPS + IDQ.MITE_UOPS + IDQ.MS_UOPS)",
+        "MetricGroup": "LSD",
+        "MetricName": "LSD_Coverage"
+    },
+    {
+        "BriefDescription": "Fraction of Uops delivered by the DSB (aka Decoded ICache; or Uop Cache)",
+        "MetricExpr": "IDQ.DSB_UOPS / (IDQ.DSB_UOPS + LSD.UOPS + IDQ.MITE_UOPS + IDQ.MS_UOPS)",
+        "MetricGroup": "DSB;FetchBW",
+        "MetricName": "DSB_Coverage"
+    },
+    {
+        "BriefDescription": "Actual Average Latency for L1 data-cache miss demand loads (in core cycles)",
+        "MetricExpr": "L1D_PEND_MISS.PENDING / ( MEM_LOAD_RETIRED.L1_MISS + MEM_LOAD_RETIRED.FB_HIT )",
+        "MetricGroup": "MemoryBound;MemoryLat",
+        "MetricName": "Load_Miss_Real_Latency"
+    },
+    {
+        "BriefDescription": "Memory-Level-Parallelism (average number of L1 miss demand load when there is at least one such miss. Per-Logical Processor)",
+        "MetricExpr": "L1D_PEND_MISS.PENDING / L1D_PEND_MISS.PENDING_CYCLES",
+        "MetricGroup": "MemoryBound;MemoryBW",
+        "MetricName": "MLP"
+    },
+    {
+        "BriefDescription": "Utilization of the core's Page Walker(s) serving STLB misses triggered by instruction/Load/Store accesses",
+        "MetricConstraint": "NO_NMI_WATCHDOG",
+        "MetricExpr": "( ITLB_MISSES.WALK_PENDING + DTLB_LOAD_MISSES.WALK_PENDING + DTLB_STORE_MISSES.WALK_PENDING ) / ( 2 * CPU_CLK_UNHALTED.DISTRIBUTED )",
+        "MetricGroup": "MemoryTLB",
+        "MetricName": "Page_Walks_Utilization"
+    },
+    {
+        "BriefDescription": "Average data fill bandwidth to the L1 data cache [GB / sec]",
+        "MetricExpr": "64 * L1D.REPLACEMENT / 1000000000 / duration_time",
+        "MetricGroup": "MemoryBW",
+        "MetricName": "L1D_Cache_Fill_BW"
+    },
+    {
+        "BriefDescription": "Average data fill bandwidth to the L2 cache [GB / sec]",
+        "MetricExpr": "64 * L2_LINES_IN.ALL / 1000000000 / duration_time",
+        "MetricGroup": "MemoryBW",
+        "MetricName": "L2_Cache_Fill_BW"
+    },
+    {
+        "BriefDescription": "Average per-core data fill bandwidth to the L3 cache [GB / sec]",
+        "MetricExpr": "64 * LONGEST_LAT_CACHE.MISS / 1000000000 / duration_time",
+        "MetricGroup": "MemoryBW",
+        "MetricName": "L3_Cache_Fill_BW"
+    },
+    {
+        "BriefDescription": "Average per-core data access bandwidth to the L3 cache [GB / sec]",
+        "MetricExpr": "64 * OFFCORE_REQUESTS.ALL_REQUESTS / 1000000000 / duration_time",
+        "MetricGroup": "MemoryBW;Offcore",
+        "MetricName": "L3_Cache_Access_BW"
+    },
+    {
+        "BriefDescription": "L1 cache true misses per kilo instruction for retired demand loads",
+        "MetricExpr": "1000 * MEM_LOAD_RETIRED.L1_MISS / INST_RETIRED.ANY",
+        "MetricGroup": "CacheMisses",
+        "MetricName": "L1MPKI"
+    },
+    {
+        "BriefDescription": "L2 cache true misses per kilo instruction for retired demand loads",
+        "MetricExpr": "1000 * MEM_LOAD_RETIRED.L2_MISS / INST_RETIRED.ANY",
+        "MetricGroup": "CacheMisses",
+        "MetricName": "L2MPKI"
+    },
+    {
+        "BriefDescription": "L2 cache misses per kilo instruction for all request types (including speculative)",
+        "MetricExpr": "1000 * ( ( OFFCORE_REQUESTS.ALL_DATA_RD - OFFCORE_REQUESTS.DEMAND_DATA_RD ) + L2_RQSTS.ALL_DEMAND_MISS + L2_RQSTS.SWPF_MISS ) / INST_RETIRED.ANY",
+        "MetricGroup": "CacheMisses;Offcore",
+        "MetricName": "L2MPKI_All"
+    },
+    {
+        "BriefDescription": "L3 cache true misses per kilo instruction for retired demand loads",
+        "MetricExpr": "1000 * MEM_LOAD_RETIRED.L3_MISS / INST_RETIRED.ANY",
+        "MetricGroup": "CacheMisses",
+        "MetricName": "L3MPKI"
+    },
+    {
+        "BriefDescription": "Rate of silent evictions from the L2 cache per Kilo instruction where the evicted lines are dropped (no writeback to L3 or memory)",
+        "MetricExpr": "1000 * L2_LINES_OUT.SILENT / INST_RETIRED.ANY",
+        "MetricGroup": "L2Evicts;Server",
+        "MetricName": "L2_Evictions_Silent_PKI"
+    },
+    {
+        "BriefDescription": "Rate of non silent evictions from the L2 cache per Kilo instruction",
+        "MetricExpr": "1000 * L2_LINES_OUT.NON_SILENT / INST_RETIRED.ANY",
+        "MetricGroup": "L2Evicts;Server",
+        "MetricName": "L2_Evictions_NonSilent_PKI"
+    },
+    {
+        "BriefDescription": "Average CPU Utilization",
+        "MetricExpr": "CPU_CLK_UNHALTED.REF_TSC / msr@tsc@",
+        "MetricGroup": "HPC;Summary",
+        "MetricName": "CPU_Utilization"
+    },
+    {
+        "BriefDescription": "Measured Average Frequency for unhalted processors [GHz]",
+        "MetricExpr": "(CPU_CLK_UNHALTED.THREAD / CPU_CLK_UNHALTED.REF_TSC) * msr@tsc@ / 1000000000 / duration_time",
+        "MetricGroup": "Summary;Power",
+        "MetricName": "Average_Frequency"
+    },
+    {
+        "BriefDescription": "Giga Floating Point Operations Per Second",
+        "MetricExpr": "( ( 1 * ( FP_ARITH_INST_RETIRED.SCALAR_SINGLE + FP_ARITH_INST_RETIRED.SCALAR_DOUBLE ) + 2 * FP_ARITH_INST_RETIRED.128B_PACKED_DOUBLE + 4 * ( FP_ARITH_INST_RETIRED.128B_PACKED_SINGLE + FP_ARITH_INST_RETIRED.256B_PACKED_DOUBLE ) + 8 * ( FP_ARITH_INST_RETIRED.256B_PACKED_SINGLE + FP_ARITH_INST_RETIRED.512B_PACKED_DOUBLE ) + 16 * FP_ARITH_INST_RETIRED.512B_PACKED_SINGLE ) / 1000000000 ) / duration_time",
+        "MetricGroup": "Flops;HPC",
+        "MetricName": "GFLOPs"
+    },
+    {
+        "BriefDescription": "Average Frequency Utilization relative nominal frequency",
+        "MetricExpr": "CPU_CLK_UNHALTED.THREAD / CPU_CLK_UNHALTED.REF_TSC",
+        "MetricGroup": "Power",
+        "MetricName": "Turbo_Utilization"
+    },
+    {
+        "BriefDescription": "Fraction of cycles where both hardware Logical Processors were active",
+        "MetricExpr": "1 - CPU_CLK_UNHALTED.ONE_THREAD_ACTIVE / CPU_CLK_UNHALTED.REF_DISTRIBUTED if #SMT_on else 0",
+        "MetricGroup": "SMT",
+        "MetricName": "SMT_2T_Utilization"
+    },
+    {
+        "BriefDescription": "Fraction of cycles spent in the Operating System (OS) Kernel mode",
+        "MetricExpr": "CPU_CLK_UNHALTED.THREAD_P:k / CPU_CLK_UNHALTED.THREAD",
+        "MetricGroup": "OS",
+        "MetricName": "Kernel_Utilization"
+    },
+    {
+        "BriefDescription": "Average external Memory Bandwidth Use for reads and writes [GB / sec]",
+        "MetricExpr": "( 64 * ( uncore_imc@cas_count_read@ + uncore_imc@cas_count_write@ ) / 1000000000 ) / duration_time",
+        "MetricGroup": "HPC;MemoryBW;SoC",
+        "MetricName": "DRAM_BW_Use"
+    },
+    {
+        "BriefDescription": "Average latency of data read request to external memory (in nanoseconds). Accounts for demand loads and L1/L2 prefetches",
+        "MetricExpr": "1000000000 * ( UNC_CHA_TOR_OCCUPANCY.IA_MISS_DRD / UNC_CHA_TOR_INSERTS.IA_MISS_DRD ) / ( cha_0@event\\=0x0@ / duration_time )",
+        "MetricGroup": "MemoryLat;SoC",
+        "MetricName": "MEM_Read_Latency"
+    },
+    {
+        "BriefDescription": "Average number of parallel data read requests to external memory. Accounts for demand loads and L1/L2 prefetches",
+        "MetricExpr": "UNC_CHA_TOR_OCCUPANCY.IA_MISS_DRD / cha@event\\=0x36\\,umask\\=0xC817FE01\\,thresh\\=1@",
+        "MetricGroup": "MemoryBW;SoC",
+        "MetricName": "MEM_Parallel_Reads"
+    },
+    {
+        "BriefDescription": "Average latency of data read request to external 3D X-Point memory [in nanoseconds]. Accounts for demand loads and L1/L2 data-read prefetches",
+        "MetricExpr": "( 1000000000 * ( UNC_CHA_TOR_OCCUPANCY.IA_MISS_DRD_PMM / UNC_CHA_TOR_INSERTS.IA_MISS_DRD_PMM ) / cha_0@event\\=0x0@ )",
+        "MetricGroup": "MemoryLat;SoC;Server",
+        "MetricName": "MEM_PMM_Read_Latency"
+    },
+    {
+        "BriefDescription": "Average 3DXP Memory Bandwidth Use for reads [GB / sec]",
+        "MetricExpr": "( ( 64 * imc@event\\=0xe3@ / 1000000000 ) / duration_time )",
+        "MetricGroup": "MemoryBW;SoC;Server",
+        "MetricName": "PMM_Read_BW"
+    },
+    {
+        "BriefDescription": "Average 3DXP Memory Bandwidth Use for Writes [GB / sec]",
+        "MetricExpr": "( ( 64 * imc@event\\=0xe7@ / 1000000000 ) / duration_time )",
+        "MetricGroup": "MemoryBW;SoC;Server",
+        "MetricName": "PMM_Write_BW"
+    },
+    {
+        "BriefDescription": "Average IO (network or disk) Bandwidth Use for Writes [GB / sec]",
+        "MetricExpr": "UNC_CHA_TOR_INSERTS.IO_PCIRDCUR * 64 / 1000000000 / duration_time",
+        "MetricGroup": "IoBW;SoC;Server",
+        "MetricName": "IO_Write_BW"
+    },
+    {
+        "BriefDescription": "Average IO (network or disk) Bandwidth Use for Reads [GB / sec]",
+        "MetricExpr": "( UNC_CHA_TOR_INSERTS.IO_HIT_ITOM + UNC_CHA_TOR_INSERTS.IO_MISS_ITOM + UNC_CHA_TOR_INSERTS.IO_HIT_ITOMCACHENEAR + UNC_CHA_TOR_INSERTS.IO_MISS_ITOMCACHENEAR ) * 64 / 1000000000 / duration_time",
+        "MetricGroup": "IoBW;SoC;Server",
+        "MetricName": "IO_Read_BW"
+    },
+    {
+        "BriefDescription": "Socket actual clocks when any core is active on that socket",
+        "MetricExpr": "cha_0@event\\=0x0@",
+        "MetricGroup": "SoC",
+        "MetricName": "Socket_CLKS"
+    },
+    {
+        "BriefDescription": "Instructions per Far Branch ( Far Branches apply upon transition from application to operating system, handling interrupts, exceptions) [lower number means higher occurrence rate]",
+        "MetricExpr": "INST_RETIRED.ANY / BR_INST_RETIRED.FAR_BRANCH:u",
+        "MetricGroup": "Branches;OS",
+        "MetricName": "IpFarBranch"
+    },
+    {
+        "BriefDescription": "C1 residency percent per core",
+        "MetricExpr": "(cstate_core@c1\\-residency@ / msr@tsc@) * 100",
+        "MetricGroup": "Power",
+        "MetricName": "C1_Core_Residency"
+    },
+    {
+        "BriefDescription": "C6 residency percent per core",
+        "MetricExpr": "(cstate_core@c6\\-residency@ / msr@tsc@) * 100",
+        "MetricGroup": "Power",
+        "MetricName": "C6_Core_Residency"
+    },
+    {
+        "BriefDescription": "C2 residency percent per package",
+        "MetricExpr": "(cstate_pkg@c2\\-residency@ / msr@tsc@) * 100",
+        "MetricGroup": "Power",
+        "MetricName": "C2_Pkg_Residency"
+    },
+    {
+        "BriefDescription": "C6 residency percent per package",
+        "MetricExpr": "(cstate_pkg@c6\\-residency@ / msr@tsc@) * 100",
+        "MetricGroup": "Power",
+        "MetricName": "C6_Pkg_Residency"
+    },
+]
diff --git a/tools/perf/pmu-events/arch/x86/mapfile.csv b/tools/perf/pmu-events/arch/x86/mapfile.csv
index 5f5df6560202..0cf2d1fa6b76 100644
--- a/tools/perf/pmu-events/arch/x86/mapfile.csv
+++ b/tools/perf/pmu-events/arch/x86/mapfile.csv
@@ -36,11 +36,12 @@ GenuineIntel-6-55-[01234],v1,skylakex,core
 GenuineIntel-6-55-[56789ABCDEF],v1,cascadelakex,core
 GenuineIntel-6-7D,v1,icelake,core
 GenuineIntel-6-7E,v1,icelake,core
-GenuineIntel-6-8[CD],v1,icelake,core
+GenuineIntel-6-8[CD],v1,tigerlake,core
 GenuineIntel-6-A7,v1,icelake,core
 GenuineIntel-6-6A,v1,icelakex,core
 GenuineIntel-6-6C,v1,icelakex,core
 GenuineIntel-6-86,v1,tremontx,core
+GenuineIntel-6-96,v1,elkhartlake,core
 AuthenticAMD-23-([12][0-9A-F]|[0-9A-F]),v2,amdzen1,core
 AuthenticAMD-23-[[:xdigit:]]+,v1,amdzen2,core
 AuthenticAMD-25-[[:xdigit:]]+,v1,amdzen3,core
diff --git a/tools/perf/pmu-events/arch/x86/skylakex/cache.json b/tools/perf/pmu-events/arch/x86/skylakex/cache.json
index e750a21976f1..9ff67206ade4 100644
--- a/tools/perf/pmu-events/arch/x86/skylakex/cache.json
+++ b/tools/perf/pmu-events/arch/x86/skylakex/cache.json
@@ -1,49 +1,123 @@
 [
     {
-        "BriefDescription": "Counts all demand code reads that hit in the L3 and sibling core snoops are not needed as either the core-valid bit is not set or the shared line is present in multiple cores.",
+        "BriefDescription": "L1D data line replacements",
         "Counter": "0,1,2,3",
-        "CounterHTOff": "0,1,2,3",
-        "EventCode": "0xB7, 0xBB",
-        "EventName": "OFFCORE_RESPONSE.DEMAND_CODE_RD.L3_HIT.NO_SNOOP_NEEDED",
-        "MSRIndex": "0x1a6,0x1a7",
-        "MSRValue": "0x01003C0004",
-        "Offcore": "1",
-        "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
-        "SampleAfterValue": "100003",
+        "CounterHTOff": "0,1,2,3,4,5,6,7",
+        "EventCode": "0x51",
+        "EventName": "L1D.REPLACEMENT",
+        "PublicDescription": "Counts L1D data line replacements including opportunistic replacements, and replacements that require stall-for-replace or block-for-replace.",
+        "SampleAfterValue": "2000003",
         "UMask": "0x1"
     },
     {
-        "BriefDescription": "Demand RFO requests including regular RFOs, locks, ItoM",
+        "BriefDescription": "Number of times a request needed a FB entry but there was no entry available for it. That is the FB unavailability was dominant reason for blocking the request. A request includes cacheable/uncacheable demands that is load, store or SW prefetch.",
         "Counter": "0,1,2,3",
         "CounterHTOff": "0,1,2,3,4,5,6,7",
-        "EventCode": "0xB0",
-        "EventName": "OFFCORE_REQUESTS.DEMAND_RFO",
-        "PublicDescription": "Counts the demand RFO (read for ownership) requests including regular RFOs, locks, ItoM.",
-        "SampleAfterValue": "100003",
-        "UMask": "0x4"
+        "EventCode": "0x48",
+        "EventName": "L1D_PEND_MISS.FB_FULL",
+        "PublicDescription": "Number of times a request needed a FB (Fill Buffer) entry but there was no entry available for it. A request includes cacheable/uncacheable demands that are load, store or SW prefetch instructions.",
+        "SampleAfterValue": "2000003",
+        "UMask": "0x2"
     },
     {
-        "BriefDescription": "Counts all demand code reads that have any response type.",
+        "BriefDescription": "L1D miss outstandings duration in cycles",
         "Counter": "0,1,2,3",
-        "CounterHTOff": "0,1,2,3",
-        "EventCode": "0xB7, 0xBB",
-        "EventName": "OFFCORE_RESPONSE.DEMAND_CODE_RD.ANY_RESPONSE",
-        "MSRIndex": "0x1a6,0x1a7",
-        "MSRValue": "0x0000010004",
-        "Offcore": "1",
-        "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
+        "CounterHTOff": "0,1,2,3,4,5,6,7",
+        "EventCode": "0x48",
+        "EventName": "L1D_PEND_MISS.PENDING",
+        "PublicDescription": "Counts duration of L1D miss outstanding, that is each cycle number of Fill Buffers (FB) outstanding required by Demand Reads. FB either is held by demand loads, or it is held by non-demand loads and gets hit at least once by demand. The valid outstanding interval is defined until the FB deallocation by one of the following ways: from FB allocation, if FB is allocated by demand from the demand Hit FB, if it is allocated by hardware or software prefetch.Note: In the L1D, a Demand Read contains cacheable or noncacheable demand loads, including ones causing cache-line splits and reads due to page walks resulted from any request type.",
+        "SampleAfterValue": "2000003",
+        "UMask": "0x1"
+    },
+    {
+        "BriefDescription": "Cycles with L1D load Misses outstanding.",
+        "Counter": "0,1,2,3",
+        "CounterHTOff": "0,1,2,3,4,5,6,7",
+        "CounterMask": "1",
+        "EventCode": "0x48",
+        "EventName": "L1D_PEND_MISS.PENDING_CYCLES",
+        "PublicDescription": "Counts duration of L1D miss outstanding in cycles.",
+        "SampleAfterValue": "2000003",
+        "UMask": "0x1"
+    },
+    {
+        "AnyThread": "1",
+        "BriefDescription": "Cycles with L1D load Misses outstanding from any thread on physical core.",
+        "Counter": "0,1,2,3",
+        "CounterHTOff": "0,1,2,3,4,5,6,7",
+        "CounterMask": "1",
+        "EventCode": "0x48",
+        "EventName": "L1D_PEND_MISS.PENDING_CYCLES_ANY",
+        "SampleAfterValue": "2000003",
+        "UMask": "0x1"
+    },
+    {
+        "BriefDescription": "L2 cache lines filling L2",
+        "Counter": "0,1,2,3",
+        "CounterHTOff": "0,1,2,3,4,5,6,7",
+        "EventCode": "0xF1",
+        "EventName": "L2_LINES_IN.ALL",
+        "PublicDescription": "Counts the number of L2 cache lines filling the L2. Counting does not cover rejects.",
         "SampleAfterValue": "100003",
+        "UMask": "0x1f"
+    },
+    {
+        "BriefDescription": "Counts the number of lines that are evicted by L2 cache when triggered by an L2 cache fill. Those lines can be either in modified state or clean state. Modified lines may either be written back to L3 or directly written to memory and not allocated in L3.  Clean lines may either be allocated in L3 or dropped",
+        "Counter": "0,1,2,3",
+        "CounterHTOff": "0,1,2,3,4,5,6,7",
+        "EventCode": "0xF2",
+        "EventName": "L2_LINES_OUT.NON_SILENT",
+        "PublicDescription": "Counts the number of lines that are evicted by L2 cache when triggered by an L2 cache fill. Those lines can be either in modified state or clean state. Modified lines may either be written back to L3 or directly written to memory and not allocated in L3.  Clean lines may either be allocated in L3 or dropped.",
+        "SampleAfterValue": "200003",
+        "UMask": "0x2"
+    },
+    {
+        "BriefDescription": "Counts the number of lines that are silently dropped by L2 cache when triggered by an L2 cache fill. These lines are typically in Shared state. A non-threaded event.",
+        "Counter": "0,1,2,3",
+        "CounterHTOff": "0,1,2,3,4,5,6,7",
+        "EventCode": "0xF2",
+        "EventName": "L2_LINES_OUT.SILENT",
+        "SampleAfterValue": "200003",
         "UMask": "0x1"
     },
     {
-        "BriefDescription": "Requests from the L1/L2/L3 hardware prefetchers or Load software prefetches that miss L2 cache",
+        "BriefDescription": "Counts the number of lines that have been hardware prefetched but not used and now evicted by L2 cache",
+        "Counter": "0,1,2,3",
+        "CounterHTOff": "0,1,2,3,4,5,6,7",
+        "EventCode": "0xF2",
+        "EventName": "L2_LINES_OUT.USELESS_HWPF",
+        "SampleAfterValue": "200003",
+        "UMask": "0x4"
+    },
+    {
+        "BriefDescription": "This event is deprecated. Refer to new event L2_LINES_OUT.USELESS_HWPF",
+        "Counter": "0,1,2,3",
+        "CounterHTOff": "0,1,2,3,4,5,6,7",
+        "Deprecated": "1",
+        "EventCode": "0xF2",
+        "EventName": "L2_LINES_OUT.USELESS_PREF",
+        "SampleAfterValue": "200003",
+        "UMask": "0x4"
+    },
+    {
+        "BriefDescription": "L2 code requests",
         "Counter": "0,1,2,3",
         "CounterHTOff": "0,1,2,3,4,5,6,7",
         "EventCode": "0x24",
-        "EventName": "L2_RQSTS.PF_MISS",
-        "PublicDescription": "Counts requests from the L1/L2/L3 hardware prefetchers or Load software prefetches that miss L2 cache.",
+        "EventName": "L2_RQSTS.ALL_CODE_RD",
+        "PublicDescription": "Counts the total number of L2 code requests.",
         "SampleAfterValue": "200003",
-        "UMask": "0x38"
+        "UMask": "0xe4"
+    },
+    {
+        "BriefDescription": "Demand Data Read requests",
+        "Counter": "0,1,2,3",
+        "CounterHTOff": "0,1,2,3,4,5,6,7",
+        "EventCode": "0x24",
+        "EventName": "L2_RQSTS.ALL_DEMAND_DATA_RD",
+        "PublicDescription": "Counts the number of demand Data Read requests (including requests from L1D hardware prefetchers). These loads may hit or miss L2 cache. Only non rejected loads are counted.",
+        "SampleAfterValue": "200003",
+        "UMask": "0xe1"
     },
     {
         "BriefDescription": "Demand requests that miss L2 cache",
@@ -56,157 +130,144 @@
         "UMask": "0x27"
     },
     {
-        "BriefDescription": "OFFCORE_RESPONSE.DEMAND_RFO.L3_HIT.SNOOP_HIT_WITH_FWD",
+        "BriefDescription": "Demand requests to L2 cache",
         "Counter": "0,1,2,3",
-        "CounterHTOff": "0,1,2,3",
-        "EventCode": "0xB7, 0xBB",
-        "EventName": "OFFCORE_RESPONSE.DEMAND_RFO.L3_HIT.SNOOP_HIT_WITH_FWD",
-        "MSRIndex": "0x1a6,0x1a7",
-        "MSRValue": "0x08003C0002",
-        "Offcore": "1",
-        "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
-        "SampleAfterValue": "100003",
-        "UMask": "0x1"
+        "CounterHTOff": "0,1,2,3,4,5,6,7",
+        "EventCode": "0x24",
+        "EventName": "L2_RQSTS.ALL_DEMAND_REFERENCES",
+        "PublicDescription": "Demand requests to L2 cache.",
+        "SampleAfterValue": "200003",
+        "UMask": "0xe7"
     },
     {
-        "BriefDescription": "Retired load instructions with L3 cache hits as data sources",
+        "BriefDescription": "Requests from the L1/L2/L3 hardware prefetchers or Load software prefetches",
         "Counter": "0,1,2,3",
-        "CounterHTOff": "0,1,2,3",
-        "Data_LA": "1",
-        "EventCode": "0xD1",
-        "EventName": "MEM_LOAD_RETIRED.L3_HIT",
-        "PEBS": "1",
-        "PublicDescription": "Counts retired load instructions with at least one uop that hit in the L3 cache.",
-        "SampleAfterValue": "50021",
-        "UMask": "0x4"
+        "CounterHTOff": "0,1,2,3,4,5,6,7",
+        "EventCode": "0x24",
+        "EventName": "L2_RQSTS.ALL_PF",
+        "PublicDescription": "Counts the total number of requests from the L2 hardware prefetchers.",
+        "SampleAfterValue": "200003",
+        "UMask": "0xf8"
     },
     {
-        "BriefDescription": "L2 writebacks that access L2 cache",
+        "BriefDescription": "RFO requests to L2 cache",
         "Counter": "0,1,2,3",
         "CounterHTOff": "0,1,2,3,4,5,6,7",
-        "EventCode": "0xF0",
-        "EventName": "L2_TRANS.L2_WB",
-        "PublicDescription": "Counts L2 writebacks that access L2 cache.",
+        "EventCode": "0x24",
+        "EventName": "L2_RQSTS.ALL_RFO",
+        "PublicDescription": "Counts the total number of RFO (read for ownership) requests to L2 cache. L2 RFO requests include both L1D demand RFO misses as well as L1D RFO prefetches.",
         "SampleAfterValue": "200003",
-        "UMask": "0x40"
+        "UMask": "0xe2"
     },
     {
-        "BriefDescription": "L2 cache lines filling L2",
+        "BriefDescription": "L2 cache hits when fetching instructions, code reads.",
         "Counter": "0,1,2,3",
         "CounterHTOff": "0,1,2,3,4,5,6,7",
-        "EventCode": "0xF1",
-        "EventName": "L2_LINES_IN.ALL",
-        "PublicDescription": "Counts the number of L2 cache lines filling the L2. Counting does not cover rejects.",
-        "SampleAfterValue": "100003",
-        "UMask": "0x1f"
+        "EventCode": "0x24",
+        "EventName": "L2_RQSTS.CODE_RD_HIT",
+        "PublicDescription": "Counts L2 cache hits when fetching instructions, code reads.",
+        "SampleAfterValue": "200003",
+        "UMask": "0xc4"
     },
     {
-        "BriefDescription": "Counts L1 data cache hardware prefetch requests and software prefetch requests that hit in the L3 and the snoop to one of the sibling cores hits the line in M state and the line is forwarded.",
+        "BriefDescription": "L2 cache misses when fetching instructions",
         "Counter": "0,1,2,3",
-        "CounterHTOff": "0,1,2,3",
-        "EventCode": "0xB7, 0xBB",
-        "EventName": "OFFCORE_RESPONSE.PF_L1D_AND_SW.L3_HIT.HIT_OTHER_CORE_NO_FWD",
-        "MSRIndex": "0x1a6,0x1a7",
-        "MSRValue": "0x04003C0400",
-        "Offcore": "1",
-        "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
-        "SampleAfterValue": "100003",
-        "UMask": "0x1"
+        "CounterHTOff": "0,1,2,3,4,5,6,7",
+        "EventCode": "0x24",
+        "EventName": "L2_RQSTS.CODE_RD_MISS",
+        "PublicDescription": "Counts L2 cache misses when fetching instructions.",
+        "SampleAfterValue": "200003",
+        "UMask": "0x24"
     },
     {
-        "BriefDescription": "Counts demand data reads that hit in the L3 and the snoop to one of the sibling cores hits the line in M state and the line is forwarded.",
+        "BriefDescription": "Demand Data Read requests that hit L2 cache",
         "Counter": "0,1,2,3",
-        "CounterHTOff": "0,1,2,3",
-        "EventCode": "0xB7, 0xBB",
-        "EventName": "OFFCORE_RESPONSE.DEMAND_DATA_RD.L3_HIT.HITM_OTHER_CORE",
-        "MSRIndex": "0x1a6,0x1a7",
-        "MSRValue": "0x10003C0001",
-        "Offcore": "1",
-        "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
-        "SampleAfterValue": "100003",
-        "UMask": "0x1"
+        "CounterHTOff": "0,1,2,3,4,5,6,7",
+        "EventCode": "0x24",
+        "EventName": "L2_RQSTS.DEMAND_DATA_RD_HIT",
+        "PublicDescription": "Counts the number of demand Data Read requests, initiated by load instructions, that hit L2 cache",
+        "SampleAfterValue": "200003",
+        "UMask": "0xc1"
     },
     {
-        "BriefDescription": "Counts all prefetch (that bring data to LLC only) RFOs that hit in the L3.",
+        "BriefDescription": "Demand Data Read miss L2, no rejects",
         "Counter": "0,1,2,3",
-        "CounterHTOff": "0,1,2,3",
-        "EventCode": "0xB7, 0xBB",
-        "EventName": "OFFCORE_RESPONSE.PF_L3_RFO.L3_HIT.ANY_SNOOP",
-        "MSRIndex": "0x1a6,0x1a7",
-        "MSRValue": "0x3F803C0100",
-        "Offcore": "1",
-        "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
-        "SampleAfterValue": "100003",
-        "UMask": "0x1"
+        "CounterHTOff": "0,1,2,3,4,5,6,7",
+        "EventCode": "0x24",
+        "EventName": "L2_RQSTS.DEMAND_DATA_RD_MISS",
+        "PublicDescription": "Counts the number of demand Data Read requests that miss L2 cache. Only not rejected loads are counted.",
+        "SampleAfterValue": "200003",
+        "UMask": "0x21"
     },
     {
-        "BriefDescription": "Demand Data Read requests sent to uncore",
+        "BriefDescription": "All requests that miss L2 cache",
         "Counter": "0,1,2,3",
         "CounterHTOff": "0,1,2,3,4,5,6,7",
-        "EventCode": "0xB0",
-        "EventName": "OFFCORE_REQUESTS.DEMAND_DATA_RD",
-        "PublicDescription": "Counts the Demand Data Read requests sent to uncore. Use it in conjunction with OFFCORE_REQUESTS_OUTSTANDING to determine average latency in the uncore.",
-        "SampleAfterValue": "100003",
-        "UMask": "0x1"
+        "EventCode": "0x24",
+        "EventName": "L2_RQSTS.MISS",
+        "PublicDescription": "All requests that miss L2 cache.",
+        "SampleAfterValue": "200003",
+        "UMask": "0x3f"
     },
     {
-        "BriefDescription": "Retired load instructions missed L3 cache as data sources",
+        "BriefDescription": "Requests from the L1/L2/L3 hardware prefetchers or Load software prefetches that hit L2 cache",
         "Counter": "0,1,2,3",
-        "CounterHTOff": "0,1,2,3",
-        "Data_LA": "1",
-        "EventCode": "0xD1",
-        "EventName": "MEM_LOAD_RETIRED.L3_MISS",
-        "PEBS": "1",
-        "PublicDescription": "Counts retired load instructions with at least one uop that missed in the L3 cache.",
-        "SampleAfterValue": "100007",
-        "UMask": "0x20"
+        "CounterHTOff": "0,1,2,3,4,5,6,7",
+        "EventCode": "0x24",
+        "EventName": "L2_RQSTS.PF_HIT",
+        "PublicDescription": "Counts requests from the L1/L2/L3 hardware prefetchers or Load software prefetches that hit L2 cache.",
+        "SampleAfterValue": "200003",
+        "UMask": "0xd8"
     },
     {
-        "BriefDescription": "All retired store instructions.",
+        "BriefDescription": "Requests from the L1/L2/L3 hardware prefetchers or Load software prefetches that miss L2 cache",
         "Counter": "0,1,2,3",
-        "CounterHTOff": "0,1,2,3",
-        "Data_LA": "1",
-        "EventCode": "0xD0",
-        "EventName": "MEM_INST_RETIRED.ALL_STORES",
-        "L1_Hit_Indication": "1",
-        "PEBS": "1",
-        "SampleAfterValue": "2000003",
-        "UMask": "0x82"
+        "CounterHTOff": "0,1,2,3,4,5,6,7",
+        "EventCode": "0x24",
+        "EventName": "L2_RQSTS.PF_MISS",
+        "PublicDescription": "Counts requests from the L1/L2/L3 hardware prefetchers or Load software prefetches that miss L2 cache.",
+        "SampleAfterValue": "200003",
+        "UMask": "0x38"
     },
     {
-        "BriefDescription": "Counts the number of lines that are silently dropped by L2 cache when triggered by an L2 cache fill. These lines are typically in Shared state. A non-threaded event.",
+        "BriefDescription": "All L2 requests",
         "Counter": "0,1,2,3",
         "CounterHTOff": "0,1,2,3,4,5,6,7",
-        "EventCode": "0xF2",
-        "EventName": "L2_LINES_OUT.SILENT",
+        "EventCode": "0x24",
+        "EventName": "L2_RQSTS.REFERENCES",
+        "PublicDescription": "All L2 requests.",
         "SampleAfterValue": "200003",
-        "UMask": "0x1"
+        "UMask": "0xff"
     },
     {
-        "BriefDescription": "Counts all prefetch data reads that hit in the L3.",
+        "BriefDescription": "RFO requests that hit L2 cache",
         "Counter": "0,1,2,3",
-        "CounterHTOff": "0,1,2,3",
-        "EventCode": "0xB7, 0xBB",
-        "EventName": "OFFCORE_RESPONSE.ALL_PF_DATA_RD.L3_HIT.ANY_SNOOP",
-        "MSRIndex": "0x1a6,0x1a7",
-        "MSRValue": "0x3F803C0490",
-        "Offcore": "1",
-        "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
-        "SampleAfterValue": "100003",
-        "UMask": "0x1"
+        "CounterHTOff": "0,1,2,3,4,5,6,7",
+        "EventCode": "0x24",
+        "EventName": "L2_RQSTS.RFO_HIT",
+        "PublicDescription": "Counts the RFO (Read-for-Ownership) requests that hit L2 cache.",
+        "SampleAfterValue": "200003",
+        "UMask": "0xc2"
     },
     {
-        "BriefDescription": "Counts all prefetch data reads that hit in the L3 and the snoop to one of the sibling cores hits the line in M state and the line is forwarded.",
+        "BriefDescription": "RFO requests that miss L2 cache",
         "Counter": "0,1,2,3",
-        "CounterHTOff": "0,1,2,3",
-        "EventCode": "0xB7, 0xBB",
-        "EventName": "OFFCORE_RESPONSE.ALL_PF_DATA_RD.L3_HIT.HITM_OTHER_CORE",
-        "MSRIndex": "0x1a6,0x1a7",
-        "MSRValue": "0x10003C0490",
-        "Offcore": "1",
-        "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
-        "SampleAfterValue": "100003",
-        "UMask": "0x1"
+        "CounterHTOff": "0,1,2,3,4,5,6,7",
+        "EventCode": "0x24",
+        "EventName": "L2_RQSTS.RFO_MISS",
+        "PublicDescription": "Counts the RFO (Read-for-Ownership) requests that miss L2 cache.",
+        "SampleAfterValue": "200003",
+        "UMask": "0x22"
+    },
+    {
+        "BriefDescription": "L2 writebacks that access L2 cache",
+        "Counter": "0,1,2,3",
+        "CounterHTOff": "0,1,2,3,4,5,6,7",
+        "EventCode": "0xF0",
+        "EventName": "L2_TRANS.L2_WB",
+        "PublicDescription": "Counts L2 writebacks that access L2 cache.",
+        "SampleAfterValue": "200003",
+        "UMask": "0x40"
     },
     {
         "BriefDescription": "Core-originated cacheable demand requests missed L3",
@@ -220,223 +281,225 @@
         "UMask": "0x41"
     },
     {
-        "BriefDescription": "Requests from the L1/L2/L3 hardware prefetchers or Load software prefetches",
+        "BriefDescription": "Core-originated cacheable demand requests that refer to L3",
         "Counter": "0,1,2,3",
         "CounterHTOff": "0,1,2,3,4,5,6,7",
-        "EventCode": "0x24",
-        "EventName": "L2_RQSTS.ALL_PF",
-        "PublicDescription": "Counts the total number of requests from the L2 hardware prefetchers.",
-        "SampleAfterValue": "200003",
-        "UMask": "0xf8"
+        "Errata": "SKL057",
+        "EventCode": "0x2E",
+        "EventName": "LONGEST_LAT_CACHE.REFERENCE",
+        "PublicDescription": "Counts core-originated cacheable requests to the  L3 cache (Longest Latency cache). Requests include data and code reads, Reads-for-Ownership (RFOs), speculative accesses and hardware prefetches from L1 and L2.  It does not include all accesses to the L3.",
+        "SampleAfterValue": "100003",
+        "UMask": "0x4f"
     },
     {
-        "BriefDescription": "Retired load instructions whose data sources was remote HITM",
+        "BriefDescription": "All retired load instructions.",
         "Counter": "0,1,2,3",
         "CounterHTOff": "0,1,2,3",
         "Data_LA": "1",
-        "EventCode": "0xD3",
-        "EventName": "MEM_LOAD_L3_MISS_RETIRED.REMOTE_HITM",
+        "EventCode": "0xD0",
+        "EventName": "MEM_INST_RETIRED.ALL_LOADS",
         "PEBS": "1",
-        "PublicDescription": "Retired load instructions whose data sources was remote HITM.",
-        "SampleAfterValue": "100007",
-        "UMask": "0x4"
+        "SampleAfterValue": "2000003",
+        "UMask": "0x81"
     },
     {
-        "BriefDescription": "Counts all prefetch data reads that have any response type.",
+        "BriefDescription": "All retired store instructions.",
         "Counter": "0,1,2,3",
         "CounterHTOff": "0,1,2,3",
-        "EventCode": "0xB7, 0xBB",
-        "EventName": "OFFCORE_RESPONSE.ALL_PF_DATA_RD.ANY_RESPONSE",
-        "MSRIndex": "0x1a6,0x1a7",
-        "MSRValue": "0x0000010490",
-        "Offcore": "1",
-        "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
-        "SampleAfterValue": "100003",
-        "UMask": "0x1"
+        "Data_LA": "1",
+        "EventCode": "0xD0",
+        "EventName": "MEM_INST_RETIRED.ALL_STORES",
+        "L1_Hit_Indication": "1",
+        "PEBS": "1",
+        "SampleAfterValue": "2000003",
+        "UMask": "0x82"
     },
     {
-        "BriefDescription": "OFFCORE_RESPONSE.ALL_RFO.L3_HIT.SNOOP_HIT_WITH_FWD",
+        "BriefDescription": "Retired load instructions with locked access.",
         "Counter": "0,1,2,3",
         "CounterHTOff": "0,1,2,3",
-        "EventCode": "0xB7, 0xBB",
-        "EventName": "OFFCORE_RESPONSE.ALL_RFO.L3_HIT.SNOOP_HIT_WITH_FWD",
-        "MSRIndex": "0x1a6,0x1a7",
-        "MSRValue": "0x08003C0122",
-        "Offcore": "1",
-        "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
-        "SampleAfterValue": "100003",
-        "UMask": "0x1"
+        "Data_LA": "1",
+        "EventCode": "0xD0",
+        "EventName": "MEM_INST_RETIRED.LOCK_LOADS",
+        "PEBS": "1",
+        "SampleAfterValue": "100007",
+        "UMask": "0x21"
     },
     {
-        "BriefDescription": "OFFCORE_RESPONSE.PF_L3_RFO.L3_HIT.SNOOP_HIT_WITH_FWD",
+        "BriefDescription": "Retired load instructions that split across a cacheline boundary.",
         "Counter": "0,1,2,3",
         "CounterHTOff": "0,1,2,3",
-        "EventCode": "0xB7, 0xBB",
-        "EventName": "OFFCORE_RESPONSE.PF_L3_RFO.L3_HIT.SNOOP_HIT_WITH_FWD",
-        "MSRIndex": "0x1a6,0x1a7",
-        "MSRValue": "0x08003C0100",
-        "Offcore": "1",
-        "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
+        "Data_LA": "1",
+        "EventCode": "0xD0",
+        "EventName": "MEM_INST_RETIRED.SPLIT_LOADS",
+        "PEBS": "1",
+        "PublicDescription": "Counts retired load instructions that split across a cacheline boundary.",
         "SampleAfterValue": "100003",
-        "UMask": "0x1"
+        "UMask": "0x41"
     },
     {
-        "BriefDescription": "Offcore outstanding demand rfo reads transactions in SuperQueue (SQ), queue to uncore, every cycle",
+        "BriefDescription": "Retired store instructions that split across a cacheline boundary.",
         "Counter": "0,1,2,3",
-        "CounterHTOff": "0,1,2,3,4,5,6,7",
-        "EventCode": "0x60",
-        "EventName": "OFFCORE_REQUESTS_OUTSTANDING.DEMAND_RFO",
-        "PublicDescription": "Counts the number of offcore outstanding RFO (store) transactions in the super queue (SQ) every cycle. A transaction is considered to be in the Offcore outstanding state between L2 miss and transaction completion sent to requestor (SQ de-allocation). See corresponding Umask under OFFCORE_REQUESTS.",
-        "SampleAfterValue": "2000003",
-        "UMask": "0x4"
+        "CounterHTOff": "0,1,2,3",
+        "Data_LA": "1",
+        "EventCode": "0xD0",
+        "EventName": "MEM_INST_RETIRED.SPLIT_STORES",
+        "L1_Hit_Indication": "1",
+        "PEBS": "1",
+        "PublicDescription": "Counts retired store instructions that split across a cacheline boundary.",
+        "SampleAfterValue": "100003",
+        "UMask": "0x42"
     },
     {
-        "BriefDescription": "Counts all demand & prefetch data reads that hit in the L3 and the snoop to one of the sibling cores hits the line in M state and the line is forwarded.",
+        "BriefDescription": "Retired load instructions that miss the STLB.",
         "Counter": "0,1,2,3",
         "CounterHTOff": "0,1,2,3",
-        "EventCode": "0xB7, 0xBB",
-        "EventName": "OFFCORE_RESPONSE.ALL_DATA_RD.L3_HIT.HITM_OTHER_CORE",
-        "MSRIndex": "0x1a6,0x1a7",
-        "MSRValue": "0x10003C0491",
-        "Offcore": "1",
-        "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
+        "Data_LA": "1",
+        "EventCode": "0xD0",
+        "EventName": "MEM_INST_RETIRED.STLB_MISS_LOADS",
+        "PEBS": "1",
         "SampleAfterValue": "100003",
-        "UMask": "0x1"
+        "UMask": "0x11"
     },
     {
-        "BriefDescription": "RFO requests that miss L2 cache",
+        "BriefDescription": "Retired store instructions that miss the STLB.",
         "Counter": "0,1,2,3",
-        "CounterHTOff": "0,1,2,3,4,5,6,7",
-        "EventCode": "0x24",
-        "EventName": "L2_RQSTS.RFO_MISS",
-        "PublicDescription": "Counts the RFO (Read-for-Ownership) requests that miss L2 cache.",
-        "SampleAfterValue": "200003",
-        "UMask": "0x22"
+        "CounterHTOff": "0,1,2,3",
+        "Data_LA": "1",
+        "EventCode": "0xD0",
+        "EventName": "MEM_INST_RETIRED.STLB_MISS_STORES",
+        "L1_Hit_Indication": "1",
+        "PEBS": "1",
+        "SampleAfterValue": "100003",
+        "UMask": "0x12"
     },
     {
-        "BriefDescription": "Counts the number of lines that are evicted by L2 cache when triggered by an L2 cache fill. Those lines can be either in modified state or clean state. Modified lines may either be written back to L3 or directly written to memory and not allocated in L3.  Clean lines may either be allocated in L3 or dropped",
+        "BriefDescription": "Retired load instructions which data sources were L3 and cross-core snoop hits in on-pkg core cache",
         "Counter": "0,1,2,3",
-        "CounterHTOff": "0,1,2,3,4,5,6,7",
-        "EventCode": "0xF2",
-        "EventName": "L2_LINES_OUT.NON_SILENT",
-        "PublicDescription": "Counts the number of lines that are evicted by L2 cache when triggered by an L2 cache fill. Those lines can be either in modified state or clean state. Modified lines may either be written back to L3 or directly written to memory and not allocated in L3.  Clean lines may either be allocated in L3 or dropped.",
-        "SampleAfterValue": "200003",
+        "CounterHTOff": "0,1,2,3",
+        "Data_LA": "1",
+        "EventCode": "0xD2",
+        "EventName": "MEM_LOAD_L3_HIT_RETIRED.XSNP_HIT",
+        "PEBS": "1",
+        "PublicDescription": "Retired load instructions which data sources were L3 and cross-core snoop hits in on-pkg core cache.",
+        "SampleAfterValue": "20011",
         "UMask": "0x2"
     },
     {
-        "BriefDescription": "Counts the number of lines that have been hardware prefetched but not used and now evicted by L2 cache",
+        "BriefDescription": "Retired load instructions which data sources were HitM responses from shared L3",
         "Counter": "0,1,2,3",
-        "CounterHTOff": "0,1,2,3,4,5,6,7",
-        "EventCode": "0xF2",
-        "EventName": "L2_LINES_OUT.USELESS_HWPF",
-        "SampleAfterValue": "200003",
+        "CounterHTOff": "0,1,2,3",
+        "Data_LA": "1",
+        "EventCode": "0xD2",
+        "EventName": "MEM_LOAD_L3_HIT_RETIRED.XSNP_HITM",
+        "PEBS": "1",
+        "PublicDescription": "Retired load instructions which data sources were HitM responses from shared L3.",
+        "SampleAfterValue": "20011",
         "UMask": "0x4"
     },
     {
-        "BriefDescription": "Counts all demand data writes (RFOs) that have any response type.",
+        "BriefDescription": "Retired load instructions which data sources were L3 hit and cross-core snoop missed in on-pkg core cache.",
         "Counter": "0,1,2,3",
         "CounterHTOff": "0,1,2,3",
-        "EventCode": "0xB7, 0xBB",
-        "EventName": "OFFCORE_RESPONSE.DEMAND_RFO.ANY_RESPONSE",
-        "MSRIndex": "0x1a6,0x1a7",
-        "MSRValue": "0x0000010002",
-        "Offcore": "1",
-        "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
-        "SampleAfterValue": "100003",
+        "Data_LA": "1",
+        "EventCode": "0xD2",
+        "EventName": "MEM_LOAD_L3_HIT_RETIRED.XSNP_MISS",
+        "PEBS": "1",
+        "SampleAfterValue": "20011",
         "UMask": "0x1"
     },
     {
-        "BriefDescription": "All requests that miss L2 cache",
+        "BriefDescription": "Retired load instructions which data sources were hits in L3 without snoops required",
         "Counter": "0,1,2,3",
-        "CounterHTOff": "0,1,2,3,4,5,6,7",
-        "EventCode": "0x24",
-        "EventName": "L2_RQSTS.MISS",
-        "PublicDescription": "All requests that miss L2 cache.",
-        "SampleAfterValue": "200003",
-        "UMask": "0x3f"
+        "CounterHTOff": "0,1,2,3",
+        "Data_LA": "1",
+        "EventCode": "0xD2",
+        "EventName": "MEM_LOAD_L3_HIT_RETIRED.XSNP_NONE",
+        "PEBS": "1",
+        "PublicDescription": "Retired load instructions which data sources were hits in L3 without snoops required.",
+        "SampleAfterValue": "100003",
+        "UMask": "0x8"
     },
     {
-        "BriefDescription": "L2 code requests",
+        "BriefDescription": "Retired load instructions which data sources missed L3 but serviced from local dram",
         "Counter": "0,1,2,3",
-        "CounterHTOff": "0,1,2,3,4,5,6,7",
-        "EventCode": "0x24",
-        "EventName": "L2_RQSTS.ALL_CODE_RD",
-        "PublicDescription": "Counts the total number of L2 code requests.",
-        "SampleAfterValue": "200003",
-        "UMask": "0xe4"
+        "CounterHTOff": "0,1,2,3",
+        "Data_LA": "1",
+        "EventCode": "0xD3",
+        "EventName": "MEM_LOAD_L3_MISS_RETIRED.LOCAL_DRAM",
+        "PEBS": "1",
+        "PublicDescription": "Retired load instructions which data sources missed L3 but serviced from local DRAM.",
+        "SampleAfterValue": "100007",
+        "UMask": "0x1"
     },
     {
-        "BriefDescription": "Retired load instructions whose data sources was forwarded from a remote cache",
+        "BriefDescription": "Retired load instructions which data sources missed L3 but serviced from remote dram",
         "Counter": "0,1,2,3",
         "CounterHTOff": "0,1,2,3",
         "Data_LA": "1",
         "EventCode": "0xD3",
-        "EventName": "MEM_LOAD_L3_MISS_RETIRED.REMOTE_FWD",
+        "EventName": "MEM_LOAD_L3_MISS_RETIRED.REMOTE_DRAM",
         "PEBS": "1",
-        "PublicDescription": "Retired load instructions whose data sources was forwarded from a remote cache.",
         "SampleAfterValue": "100007",
-        "UMask": "0x8"
+        "UMask": "0x2"
     },
     {
-        "BriefDescription": "Counts all prefetch (that bring data to L2) RFOs that hit in the L3 and the snoop to one of the sibling cores hits the line in M state and the line is forwarded.",
+        "BriefDescription": "Retired load instructions whose data sources was forwarded from a remote cache",
         "Counter": "0,1,2,3",
         "CounterHTOff": "0,1,2,3",
-        "EventCode": "0xB7, 0xBB",
-        "EventName": "OFFCORE_RESPONSE.PF_L2_RFO.L3_HIT.HITM_OTHER_CORE",
-        "MSRIndex": "0x1a6,0x1a7",
-        "MSRValue": "0x10003C0020",
-        "Offcore": "1",
-        "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
-        "SampleAfterValue": "100003",
-        "UMask": "0x1"
+        "Data_LA": "1",
+        "EventCode": "0xD3",
+        "EventName": "MEM_LOAD_L3_MISS_RETIRED.REMOTE_FWD",
+        "PEBS": "1",
+        "PublicDescription": "Retired load instructions whose data sources was forwarded from a remote cache.",
+        "SampleAfterValue": "100007",
+        "UMask": "0x8"
     },
     {
-        "BriefDescription": "Counts all demand & prefetch data reads that have any response type.",
+        "BriefDescription": "Retired load instructions whose data sources was remote HITM",
         "Counter": "0,1,2,3",
         "CounterHTOff": "0,1,2,3",
-        "EventCode": "0xB7, 0xBB",
-        "EventName": "OFFCORE_RESPONSE.ALL_DATA_RD.ANY_RESPONSE",
-        "MSRIndex": "0x1a6,0x1a7",
-        "MSRValue": "0x0000010491",
-        "Offcore": "1",
-        "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
-        "SampleAfterValue": "100003",
-        "UMask": "0x1"
+        "Data_LA": "1",
+        "EventCode": "0xD3",
+        "EventName": "MEM_LOAD_L3_MISS_RETIRED.REMOTE_HITM",
+        "PEBS": "1",
+        "PublicDescription": "Retired load instructions whose data sources was remote HITM.",
+        "SampleAfterValue": "100007",
+        "UMask": "0x4"
     },
     {
-        "BriefDescription": "Retired load instructions which data sources were L3 and cross-core snoop hits in on-pkg core cache",
+        "BriefDescription": "Retired instructions with at least 1 uncacheable load or lock.",
         "Counter": "0,1,2,3",
         "CounterHTOff": "0,1,2,3",
         "Data_LA": "1",
-        "EventCode": "0xD2",
-        "EventName": "MEM_LOAD_L3_HIT_RETIRED.XSNP_HIT",
+        "EventCode": "0xD4",
+        "EventName": "MEM_LOAD_MISC_RETIRED.UC",
         "PEBS": "1",
-        "PublicDescription": "Retired load instructions which data sources were L3 and cross-core snoop hits in on-pkg core cache.",
-        "SampleAfterValue": "20011",
-        "UMask": "0x2"
+        "SampleAfterValue": "100007",
+        "UMask": "0x4"
     },
     {
-        "BriefDescription": "Retired load instructions which data sources missed L3 but serviced from remote dram",
+        "BriefDescription": "Retired load instructions which data sources were load missed L1 but hit FB due to preceding miss to the same cache line with data not ready",
         "Counter": "0,1,2,3",
         "CounterHTOff": "0,1,2,3",
         "Data_LA": "1",
-        "EventCode": "0xD3",
-        "EventName": "MEM_LOAD_L3_MISS_RETIRED.REMOTE_DRAM",
+        "EventCode": "0xD1",
+        "EventName": "MEM_LOAD_RETIRED.FB_HIT",
         "PEBS": "1",
+        "PublicDescription": "Counts retired load instructions with at least one uop was load missed in L1 but hit FB (Fill Buffers) due to preceding miss to the same cache line with data not ready.",
         "SampleAfterValue": "100007",
-        "UMask": "0x2"
+        "UMask": "0x40"
     },
     {
-        "BriefDescription": "Counts all demand & prefetch RFOs that hit in the L3 and sibling core snoops are not needed as either the core-valid bit is not set or the shared line is present in multiple cores.",
+        "BriefDescription": "Retired load instructions with L1 cache hits as data sources",
         "Counter": "0,1,2,3",
         "CounterHTOff": "0,1,2,3",
-        "EventCode": "0xB7, 0xBB",
-        "EventName": "OFFCORE_RESPONSE.ALL_RFO.L3_HIT.NO_SNOOP_NEEDED",
-        "MSRIndex": "0x1a6,0x1a7",
-        "MSRValue": "0x01003C0122",
-        "Offcore": "1",
-        "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
-        "SampleAfterValue": "100003",
+        "Data_LA": "1",
+        "EventCode": "0xD1",
+        "EventName": "MEM_LOAD_RETIRED.L1_HIT",
+        "PEBS": "1",
+        "PublicDescription": "Counts retired load instructions with at least one uop that hit in the L1 data cache. This event includes all SW prefetches and lock instructions regardless of the data source.",
+        "SampleAfterValue": "2000003",
         "UMask": "0x1"
     },
     {
@@ -452,104 +515,135 @@
         "UMask": "0x8"
     },
     {
-        "BriefDescription": "Counts all prefetch (that bring data to L2) RFOs that hit in the L3 and sibling core snoops are not needed as either the core-valid bit is not set or the shared line is present in multiple cores.",
+        "BriefDescription": "Retired load instructions with L2 cache hits as data sources",
         "Counter": "0,1,2,3",
         "CounterHTOff": "0,1,2,3",
-        "EventCode": "0xB7, 0xBB",
-        "EventName": "OFFCORE_RESPONSE.PF_L2_RFO.L3_HIT.NO_SNOOP_NEEDED",
-        "MSRIndex": "0x1a6,0x1a7",
-        "MSRValue": "0x01003C0020",
-        "Offcore": "1",
-        "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
+        "Data_LA": "1",
+        "EventCode": "0xD1",
+        "EventName": "MEM_LOAD_RETIRED.L2_HIT",
+        "PEBS": "1",
+        "PublicDescription": "Retired load instructions with L2 cache hits as data sources.",
         "SampleAfterValue": "100003",
-        "UMask": "0x1"
+        "UMask": "0x2"
     },
     {
-        "BriefDescription": "Counts L1 data cache hardware prefetch requests and software prefetch requests that hit in the L3 and the snoop to one of the sibling cores hits the line in M state and the line is forwarded.",
+        "BriefDescription": "Retired load instructions missed L2 cache as data sources",
         "Counter": "0,1,2,3",
         "CounterHTOff": "0,1,2,3",
-        "EventCode": "0xB7, 0xBB",
-        "EventName": "OFFCORE_RESPONSE.PF_L1D_AND_SW.L3_HIT.HITM_OTHER_CORE",
-        "MSRIndex": "0x1a6,0x1a7",
-        "MSRValue": "0x10003C0400",
-        "Offcore": "1",
-        "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
-        "SampleAfterValue": "100003",
-        "UMask": "0x1"
+        "Data_LA": "1",
+        "EventCode": "0xD1",
+        "EventName": "MEM_LOAD_RETIRED.L2_MISS",
+        "PEBS": "1",
+        "PublicDescription": "Retired load instructions missed L2 cache as data sources.",
+        "SampleAfterValue": "50021",
+        "UMask": "0x10"
     },
     {
-        "BriefDescription": "L2 cache misses when fetching instructions",
+        "BriefDescription": "Retired load instructions with L3 cache hits as data sources",
         "Counter": "0,1,2,3",
-        "CounterHTOff": "0,1,2,3,4,5,6,7",
-        "EventCode": "0x24",
-        "EventName": "L2_RQSTS.CODE_RD_MISS",
-        "PublicDescription": "Counts L2 cache misses when fetching instructions.",
-        "SampleAfterValue": "200003",
-        "UMask": "0x24"
+        "CounterHTOff": "0,1,2,3",
+        "Data_LA": "1",
+        "EventCode": "0xD1",
+        "EventName": "MEM_LOAD_RETIRED.L3_HIT",
+        "PEBS": "1",
+        "PublicDescription": "Counts retired load instructions with at least one uop that hit in the L3 cache.",
+        "SampleAfterValue": "50021",
+        "UMask": "0x4"
     },
     {
-        "BriefDescription": "Counts all prefetch data reads that hit in the L3 and the snoop to one of the sibling cores hits the line in M state and the line is forwarded.",
+        "BriefDescription": "Retired load instructions missed L3 cache as data sources",
         "Counter": "0,1,2,3",
         "CounterHTOff": "0,1,2,3",
-        "EventCode": "0xB7, 0xBB",
-        "EventName": "OFFCORE_RESPONSE.ALL_PF_DATA_RD.L3_HIT.HIT_OTHER_CORE_NO_FWD",
-        "MSRIndex": "0x1a6,0x1a7",
-        "MSRValue": "0x04003C0490",
-        "Offcore": "1",
-        "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
+        "Data_LA": "1",
+        "EventCode": "0xD1",
+        "EventName": "MEM_LOAD_RETIRED.L3_MISS",
+        "PEBS": "1",
+        "PublicDescription": "Counts retired load instructions with at least one uop that missed in the L3 cache.",
+        "SampleAfterValue": "100007",
+        "UMask": "0x20"
+    },
+    {
+        "BriefDescription": "Demand and prefetch data reads",
+        "Counter": "0,1,2,3",
+        "CounterHTOff": "0,1,2,3,4,5,6,7",
+        "EventCode": "0xB0",
+        "EventName": "OFFCORE_REQUESTS.ALL_DATA_RD",
+        "PublicDescription": "Counts the demand and prefetch data reads. All Core Data Reads include cacheable 'Demands' and L2 prefetchers (not L3 prefetchers). Counting also covers reads due to page walks resulted from any request type.",
         "SampleAfterValue": "100003",
-        "UMask": "0x1"
+        "UMask": "0x8"
     },
     {
-        "BriefDescription": "Offcore outstanding Demand Data Read transactions in uncore queue.",
+        "BriefDescription": "Any memory transaction that reached the SQ.",
         "Counter": "0,1,2,3",
         "CounterHTOff": "0,1,2,3,4,5,6,7",
-        "EventCode": "0x60",
-        "EventName": "OFFCORE_REQUESTS_OUTSTANDING.DEMAND_DATA_RD",
-        "PublicDescription": "Counts the number of offcore outstanding Demand Data Read transactions in the super queue (SQ) every cycle. A transaction is considered to be in the Offcore outstanding state between L2 miss and transaction completion sent to requestor. See the corresponding Umask under OFFCORE_REQUESTS.Note: A prefetch promoted to Demand is counted from the promotion point.",
-        "SampleAfterValue": "2000003",
-        "UMask": "0x1"
+        "EventCode": "0xB0",
+        "EventName": "OFFCORE_REQUESTS.ALL_REQUESTS",
+        "PublicDescription": "Counts memory transactions reached the super queue including requests initiated by the core, all L3 prefetches, page walks, etc..",
+        "SampleAfterValue": "100003",
+        "UMask": "0x80"
     },
     {
-        "BriefDescription": "Counts demand data reads that hit in the L3.",
+        "BriefDescription": "Cacheable and noncachaeble code read requests",
         "Counter": "0,1,2,3",
-        "CounterHTOff": "0,1,2,3",
-        "EventCode": "0xB7, 0xBB",
-        "EventName": "OFFCORE_RESPONSE.DEMAND_DATA_RD.L3_HIT.ANY_SNOOP",
-        "MSRIndex": "0x1a6,0x1a7",
-        "MSRValue": "0x3F803C0001",
-        "Offcore": "1",
-        "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
+        "CounterHTOff": "0,1,2,3,4,5,6,7",
+        "EventCode": "0xB0",
+        "EventName": "OFFCORE_REQUESTS.DEMAND_CODE_RD",
+        "PublicDescription": "Counts both cacheable and non-cacheable code read requests.",
         "SampleAfterValue": "100003",
-        "UMask": "0x1"
+        "UMask": "0x2"
     },
     {
-        "BriefDescription": "Counts prefetch RFOs that hit in the L3 and sibling core snoops are not needed as either the core-valid bit is not set or the shared line is present in multiple cores.",
+        "BriefDescription": "Demand Data Read requests sent to uncore",
         "Counter": "0,1,2,3",
-        "CounterHTOff": "0,1,2,3",
-        "EventCode": "0xB7, 0xBB",
-        "EventName": "OFFCORE_RESPONSE.ALL_PF_RFO.L3_HIT.NO_SNOOP_NEEDED",
-        "MSRIndex": "0x1a6,0x1a7",
-        "MSRValue": "0x01003C0120",
-        "Offcore": "1",
-        "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
+        "CounterHTOff": "0,1,2,3,4,5,6,7",
+        "EventCode": "0xB0",
+        "EventName": "OFFCORE_REQUESTS.DEMAND_DATA_RD",
+        "PublicDescription": "Counts the Demand Data Read requests sent to uncore. Use it in conjunction with OFFCORE_REQUESTS_OUTSTANDING to determine average latency in the uncore.",
         "SampleAfterValue": "100003",
         "UMask": "0x1"
     },
     {
-        "BriefDescription": "Counts prefetch RFOs that hit in the L3 and the snoop to one of the sibling cores hits the line in M state and the line is forwarded.",
+        "BriefDescription": "Demand RFO requests including regular RFOs, locks, ItoM",
         "Counter": "0,1,2,3",
-        "CounterHTOff": "0,1,2,3",
-        "EventCode": "0xB7, 0xBB",
-        "EventName": "OFFCORE_RESPONSE.ALL_PF_RFO.L3_HIT.HIT_OTHER_CORE_NO_FWD",
-        "MSRIndex": "0x1a6,0x1a7",
-        "MSRValue": "0x04003C0120",
-        "Offcore": "1",
-        "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
+        "CounterHTOff": "0,1,2,3,4,5,6,7",
+        "EventCode": "0xB0",
+        "EventName": "OFFCORE_REQUESTS.DEMAND_RFO",
+        "PublicDescription": "Counts the demand RFO (read for ownership) requests including regular RFOs, locks, ItoM.",
         "SampleAfterValue": "100003",
+        "UMask": "0x4"
+    },
+    {
+        "BriefDescription": "Offcore requests buffer cannot take more entries for this thread core.",
+        "Counter": "0,1,2,3",
+        "CounterHTOff": "0,1,2,3,4,5,6,7",
+        "EventCode": "0xB2",
+        "EventName": "OFFCORE_REQUESTS_BUFFER.SQ_FULL",
+        "PublicDescription": "Counts the number of cases when the offcore requests buffer cannot take more entries for the core. This can happen when the superqueue does not contain eligible entries, or when L1D writeback pending FIFO requests is full.Note: Writeback pending FIFO has six entries.",
+        "SampleAfterValue": "2000003",
         "UMask": "0x1"
     },
     {
+        "BriefDescription": "Offcore outstanding cacheable Core Data Read transactions in SuperQueue (SQ), queue to uncore",
+        "Counter": "0,1,2,3",
+        "CounterHTOff": "0,1,2,3,4,5,6,7",
+        "EventCode": "0x60",
+        "EventName": "OFFCORE_REQUESTS_OUTSTANDING.ALL_DATA_RD",
+        "PublicDescription": "Counts the number of offcore outstanding cacheable Core Data Read transactions in the super queue every cycle. A transaction is considered to be in the Offcore outstanding state between L2 miss and transaction completion sent to requestor (SQ de-allocation). See corresponding Umask under OFFCORE_REQUESTS.",
+        "SampleAfterValue": "2000003",
+        "UMask": "0x8"
+    },
+    {
+        "BriefDescription": "Cycles when offcore outstanding cacheable Core Data Read transactions are present in SuperQueue (SQ), queue to uncore.",
+        "Counter": "0,1,2,3",
+        "CounterHTOff": "0,1,2,3,4,5,6,7",
+        "CounterMask": "1",
+        "EventCode": "0x60",
+        "EventName": "OFFCORE_REQUESTS_OUTSTANDING.CYCLES_WITH_DATA_RD",
+        "PublicDescription": "Counts cycles when offcore outstanding cacheable Core Data Read transactions are present in the super queue. A transaction is considered to be in the Offcore outstanding state between L2 miss and transaction completion sent to requestor (SQ de-allocation). See corresponding Umask under OFFCORE_REQUESTS.",
+        "SampleAfterValue": "2000003",
+        "UMask": "0x8"
+    },
+    {
         "BriefDescription": "Cycles with offcore outstanding Code Reads transactions in the SuperQueue (SQ), queue to uncore.",
         "Counter": "0,1,2,3",
         "CounterHTOff": "0,1,2,3,4,5,6,7",
@@ -561,212 +655,257 @@
         "UMask": "0x2"
     },
     {
-        "BriefDescription": "Demand requests to L2 cache",
+        "BriefDescription": "Cycles when offcore outstanding Demand Data Read transactions are present in SuperQueue (SQ), queue to uncore",
         "Counter": "0,1,2,3",
         "CounterHTOff": "0,1,2,3,4,5,6,7",
-        "EventCode": "0x24",
-        "EventName": "L2_RQSTS.ALL_DEMAND_REFERENCES",
-        "PublicDescription": "Demand requests to L2 cache.",
-        "SampleAfterValue": "200003",
-        "UMask": "0xe7"
+        "CounterMask": "1",
+        "EventCode": "0x60",
+        "EventName": "OFFCORE_REQUESTS_OUTSTANDING.CYCLES_WITH_DEMAND_DATA_RD",
+        "PublicDescription": "Counts cycles when offcore outstanding Demand Data Read transactions are present in the super queue (SQ). A transaction is considered to be in the Offcore outstanding state between L2 miss and transaction completion sent to requestor (SQ de-allocation).",
+        "SampleAfterValue": "2000003",
+        "UMask": "0x1"
     },
     {
-        "BriefDescription": "Counts all prefetch (that bring data to LLC only) data reads that hit in the L3.",
+        "BriefDescription": "Cycles with offcore outstanding demand rfo reads transactions in SuperQueue (SQ), queue to uncore.",
+        "Counter": "0,1,2,3",
+        "CounterHTOff": "0,1,2,3,4,5,6,7",
+        "CounterMask": "1",
+        "EventCode": "0x60",
+        "EventName": "OFFCORE_REQUESTS_OUTSTANDING.CYCLES_WITH_DEMAND_RFO",
+        "PublicDescription": "Counts the number of offcore outstanding demand rfo Reads transactions in the super queue every cycle. The 'Offcore outstanding' state of the transaction lasts from the L2 miss until the sending transaction completion to requestor (SQ deallocation). See the corresponding Umask under OFFCORE_REQUESTS.",
+        "SampleAfterValue": "2000003",
+        "UMask": "0x4"
+    },
+    {
+        "BriefDescription": "Offcore outstanding Code Reads transactions in the SuperQueue (SQ), queue to uncore, every cycle.",
+        "Counter": "0,1,2,3",
+        "CounterHTOff": "0,1,2,3,4,5,6,7",
+        "EventCode": "0x60",
+        "EventName": "OFFCORE_REQUESTS_OUTSTANDING.DEMAND_CODE_RD",
+        "PublicDescription": "Counts the number of offcore outstanding Code Reads transactions in the super queue every cycle. The 'Offcore outstanding' state of the transaction lasts from the L2 miss until the sending transaction completion to requestor (SQ deallocation). See the corresponding Umask under OFFCORE_REQUESTS.",
+        "SampleAfterValue": "2000003",
+        "UMask": "0x2"
+    },
+    {
+        "BriefDescription": "Offcore outstanding Demand Data Read transactions in uncore queue.",
+        "Counter": "0,1,2,3",
+        "CounterHTOff": "0,1,2,3,4,5,6,7",
+        "EventCode": "0x60",
+        "EventName": "OFFCORE_REQUESTS_OUTSTANDING.DEMAND_DATA_RD",
+        "PublicDescription": "Counts the number of offcore outstanding Demand Data Read transactions in the super queue (SQ) every cycle. A transaction is considered to be in the Offcore outstanding state between L2 miss and transaction completion sent to requestor. See the corresponding Umask under OFFCORE_REQUESTS.Note: A prefetch promoted to Demand is counted from the promotion point.",
+        "SampleAfterValue": "2000003",
+        "UMask": "0x1"
+    },
+    {
+        "BriefDescription": "Cycles with at least 6 offcore outstanding Demand Data Read transactions in uncore queue.",
+        "Counter": "0,1,2,3",
+        "CounterHTOff": "0,1,2,3,4,5,6,7",
+        "CounterMask": "6",
+        "EventCode": "0x60",
+        "EventName": "OFFCORE_REQUESTS_OUTSTANDING.DEMAND_DATA_RD_GE_6",
+        "SampleAfterValue": "2000003",
+        "UMask": "0x1"
+    },
+    {
+        "BriefDescription": "Offcore outstanding demand rfo reads transactions in SuperQueue (SQ), queue to uncore, every cycle",
+        "Counter": "0,1,2,3",
+        "CounterHTOff": "0,1,2,3,4,5,6,7",
+        "EventCode": "0x60",
+        "EventName": "OFFCORE_REQUESTS_OUTSTANDING.DEMAND_RFO",
+        "PublicDescription": "Counts the number of offcore outstanding RFO (store) transactions in the super queue (SQ) every cycle. A transaction is considered to be in the Offcore outstanding state between L2 miss and transaction completion sent to requestor (SQ de-allocation). See corresponding Umask under OFFCORE_REQUESTS.",
+        "SampleAfterValue": "2000003",
+        "UMask": "0x4"
+    },
+    {
+        "BriefDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction",
         "Counter": "0,1,2,3",
         "CounterHTOff": "0,1,2,3",
         "EventCode": "0xB7, 0xBB",
-        "EventName": "OFFCORE_RESPONSE.PF_L3_DATA_RD.L3_HIT.ANY_SNOOP",
-        "MSRIndex": "0x1a6,0x1a7",
-        "MSRValue": "0x3F803C0080",
-        "Offcore": "1",
+        "EventName": "OFFCORE_RESPONSE",
         "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
         "SampleAfterValue": "100003",
         "UMask": "0x1"
     },
     {
-        "BriefDescription": "Counts all demand & prefetch data reads that hit in the L3 and the snoop to one of the sibling cores hits the line in M state and the line is forwarded.",
+        "BriefDescription": "Counts all demand & prefetch data reads that have any response type.",
         "Counter": "0,1,2,3",
         "CounterHTOff": "0,1,2,3",
         "EventCode": "0xB7, 0xBB",
-        "EventName": "OFFCORE_RESPONSE.ALL_DATA_RD.L3_HIT.HIT_OTHER_CORE_NO_FWD",
+        "EventName": "OFFCORE_RESPONSE.ALL_DATA_RD.ANY_RESPONSE",
         "MSRIndex": "0x1a6,0x1a7",
-        "MSRValue": "0x04003C0491",
+        "MSRValue": "0x0000010491",
         "Offcore": "1",
         "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
         "SampleAfterValue": "100003",
         "UMask": "0x1"
     },
     {
-        "BriefDescription": "Counts all prefetch (that bring data to L2) RFOs that have any response type.",
+        "BriefDescription": "Counts all demand & prefetch data reads that hit in the L3.",
         "Counter": "0,1,2,3",
         "CounterHTOff": "0,1,2,3",
         "EventCode": "0xB7, 0xBB",
-        "EventName": "OFFCORE_RESPONSE.PF_L2_RFO.ANY_RESPONSE",
+        "EventName": "OFFCORE_RESPONSE.ALL_DATA_RD.L3_HIT.ANY_SNOOP",
         "MSRIndex": "0x1a6,0x1a7",
-        "MSRValue": "0x0000010020",
+        "MSRValue": "0x3F803C0491",
         "Offcore": "1",
         "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
         "SampleAfterValue": "100003",
         "UMask": "0x1"
     },
     {
-        "BriefDescription": "Counts all demand & prefetch RFOs that hit in the L3.",
+        "BriefDescription": "Counts all demand & prefetch data reads that hit in the L3 and the snoop to one of the sibling cores hits the line in M state and the line is forwarded.",
         "Counter": "0,1,2,3",
         "CounterHTOff": "0,1,2,3",
         "EventCode": "0xB7, 0xBB",
-        "EventName": "OFFCORE_RESPONSE.ALL_RFO.L3_HIT.ANY_SNOOP",
+        "EventName": "OFFCORE_RESPONSE.ALL_DATA_RD.L3_HIT.HITM_OTHER_CORE",
         "MSRIndex": "0x1a6,0x1a7",
-        "MSRValue": "0x3F803C0122",
+        "MSRValue": "0x10003C0491",
         "Offcore": "1",
         "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
         "SampleAfterValue": "100003",
         "UMask": "0x1"
     },
     {
-        "BriefDescription": "OFFCORE_RESPONSE.DEMAND_DATA_RD.L3_HIT.SNOOP_HIT_WITH_FWD",
+        "BriefDescription": "Counts all demand & prefetch data reads that hit in the L3 and the snoop to one of the sibling cores hits the line in M state and the line is forwarded.",
         "Counter": "0,1,2,3",
         "CounterHTOff": "0,1,2,3",
         "EventCode": "0xB7, 0xBB",
-        "EventName": "OFFCORE_RESPONSE.DEMAND_DATA_RD.L3_HIT.SNOOP_HIT_WITH_FWD",
+        "EventName": "OFFCORE_RESPONSE.ALL_DATA_RD.L3_HIT.HIT_OTHER_CORE_NO_FWD",
         "MSRIndex": "0x1a6,0x1a7",
-        "MSRValue": "0x08003C0001",
+        "MSRValue": "0x04003C0491",
         "Offcore": "1",
         "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
         "SampleAfterValue": "100003",
         "UMask": "0x1"
     },
     {
-        "BriefDescription": "Counts all prefetch (that bring data to LLC only) data reads that hit in the L3 and the snoop to one of the sibling cores hits the line in M state and the line is forwarded.",
+        "BriefDescription": "Counts all demand & prefetch data reads that hit in the L3 and sibling core snoops are not needed as either the core-valid bit is not set or the shared line is present in multiple cores.",
         "Counter": "0,1,2,3",
         "CounterHTOff": "0,1,2,3",
         "EventCode": "0xB7, 0xBB",
-        "EventName": "OFFCORE_RESPONSE.PF_L3_DATA_RD.L3_HIT.HITM_OTHER_CORE",
+        "EventName": "OFFCORE_RESPONSE.ALL_DATA_RD.L3_HIT.NO_SNOOP_NEEDED",
         "MSRIndex": "0x1a6,0x1a7",
-        "MSRValue": "0x10003C0080",
+        "MSRValue": "0x01003C0491",
         "Offcore": "1",
         "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
         "SampleAfterValue": "100003",
         "UMask": "0x1"
     },
     {
-        "AnyThread": "1",
-        "BriefDescription": "Cycles with L1D load Misses outstanding from any thread on physical core.",
-        "Counter": "0,1,2,3",
-        "CounterHTOff": "0,1,2,3,4,5,6,7",
-        "CounterMask": "1",
-        "EventCode": "0x48",
-        "EventName": "L1D_PEND_MISS.PENDING_CYCLES_ANY",
-        "SampleAfterValue": "2000003",
-        "UMask": "0x1"
-    },
-    {
-        "BriefDescription": "Counts all prefetch data reads that hit in the L3 and sibling core snoops are not needed as either the core-valid bit is not set or the shared line is present in multiple cores.",
+        "BriefDescription": "OFFCORE_RESPONSE.ALL_DATA_RD.L3_HIT.SNOOP_HIT_WITH_FWD",
         "Counter": "0,1,2,3",
         "CounterHTOff": "0,1,2,3",
         "EventCode": "0xB7, 0xBB",
-        "EventName": "OFFCORE_RESPONSE.ALL_PF_DATA_RD.L3_HIT.NO_SNOOP_NEEDED",
+        "EventName": "OFFCORE_RESPONSE.ALL_DATA_RD.L3_HIT.SNOOP_HIT_WITH_FWD",
         "MSRIndex": "0x1a6,0x1a7",
-        "MSRValue": "0x01003C0490",
+        "MSRValue": "0x08003C0491",
         "Offcore": "1",
         "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
         "SampleAfterValue": "100003",
         "UMask": "0x1"
     },
     {
-        "BriefDescription": "Core-originated cacheable demand requests that refer to L3",
+        "BriefDescription": "Counts all prefetch data reads that have any response type.",
         "Counter": "0,1,2,3",
-        "CounterHTOff": "0,1,2,3,4,5,6,7",
-        "Errata": "SKL057",
-        "EventCode": "0x2E",
-        "EventName": "LONGEST_LAT_CACHE.REFERENCE",
-        "PublicDescription": "Counts core-originated cacheable requests to the  L3 cache (Longest Latency cache). Requests include data and code reads, Reads-for-Ownership (RFOs), speculative accesses and hardware prefetches from L1 and L2.  It does not include all accesses to the L3.",
+        "CounterHTOff": "0,1,2,3",
+        "EventCode": "0xB7, 0xBB",
+        "EventName": "OFFCORE_RESPONSE.ALL_PF_DATA_RD.ANY_RESPONSE",
+        "MSRIndex": "0x1a6,0x1a7",
+        "MSRValue": "0x0000010490",
+        "Offcore": "1",
+        "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
         "SampleAfterValue": "100003",
-        "UMask": "0x4f"
+        "UMask": "0x1"
     },
     {
-        "BriefDescription": "Counts all prefetch (that bring data to LLC only) data reads that have any response type.",
+        "BriefDescription": "Counts all prefetch data reads that hit in the L3.",
         "Counter": "0,1,2,3",
         "CounterHTOff": "0,1,2,3",
         "EventCode": "0xB7, 0xBB",
-        "EventName": "OFFCORE_RESPONSE.PF_L3_DATA_RD.ANY_RESPONSE",
+        "EventName": "OFFCORE_RESPONSE.ALL_PF_DATA_RD.L3_HIT.ANY_SNOOP",
         "MSRIndex": "0x1a6,0x1a7",
-        "MSRValue": "0x0000010080",
+        "MSRValue": "0x3F803C0490",
         "Offcore": "1",
         "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
         "SampleAfterValue": "100003",
         "UMask": "0x1"
     },
     {
-        "BriefDescription": "OFFCORE_RESPONSE.ALL_PF_RFO.L3_HIT.SNOOP_HIT_WITH_FWD",
+        "BriefDescription": "Counts all prefetch data reads that hit in the L3 and the snoop to one of the sibling cores hits the line in M state and the line is forwarded.",
         "Counter": "0,1,2,3",
         "CounterHTOff": "0,1,2,3",
         "EventCode": "0xB7, 0xBB",
-        "EventName": "OFFCORE_RESPONSE.ALL_PF_RFO.L3_HIT.SNOOP_HIT_WITH_FWD",
+        "EventName": "OFFCORE_RESPONSE.ALL_PF_DATA_RD.L3_HIT.HITM_OTHER_CORE",
         "MSRIndex": "0x1a6,0x1a7",
-        "MSRValue": "0x08003C0120",
+        "MSRValue": "0x10003C0490",
         "Offcore": "1",
         "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
         "SampleAfterValue": "100003",
         "UMask": "0x1"
     },
     {
-        "BriefDescription": "Counts all demand & prefetch data reads that hit in the L3.",
+        "BriefDescription": "Counts all prefetch data reads that hit in the L3 and the snoop to one of the sibling cores hits the line in M state and the line is forwarded.",
         "Counter": "0,1,2,3",
         "CounterHTOff": "0,1,2,3",
         "EventCode": "0xB7, 0xBB",
-        "EventName": "OFFCORE_RESPONSE.ALL_DATA_RD.L3_HIT.ANY_SNOOP",
+        "EventName": "OFFCORE_RESPONSE.ALL_PF_DATA_RD.L3_HIT.HIT_OTHER_CORE_NO_FWD",
         "MSRIndex": "0x1a6,0x1a7",
-        "MSRValue": "0x3F803C0491",
+        "MSRValue": "0x04003C0490",
         "Offcore": "1",
         "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
         "SampleAfterValue": "100003",
         "UMask": "0x1"
     },
     {
-        "BriefDescription": "Retired load instructions that miss the STLB.",
+        "BriefDescription": "Counts all prefetch data reads that hit in the L3 and sibling core snoops are not needed as either the core-valid bit is not set or the shared line is present in multiple cores.",
         "Counter": "0,1,2,3",
         "CounterHTOff": "0,1,2,3",
-        "Data_LA": "1",
-        "EventCode": "0xD0",
-        "EventName": "MEM_INST_RETIRED.STLB_MISS_LOADS",
-        "PEBS": "1",
+        "EventCode": "0xB7, 0xBB",
+        "EventName": "OFFCORE_RESPONSE.ALL_PF_DATA_RD.L3_HIT.NO_SNOOP_NEEDED",
+        "MSRIndex": "0x1a6,0x1a7",
+        "MSRValue": "0x01003C0490",
+        "Offcore": "1",
+        "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
         "SampleAfterValue": "100003",
-        "UMask": "0x11"
+        "UMask": "0x1"
     },
     {
-        "BriefDescription": "Counts demand data reads that have any response type.",
+        "BriefDescription": "OFFCORE_RESPONSE.ALL_PF_DATA_RD.L3_HIT.SNOOP_HIT_WITH_FWD",
         "Counter": "0,1,2,3",
         "CounterHTOff": "0,1,2,3",
         "EventCode": "0xB7, 0xBB",
-        "EventName": "OFFCORE_RESPONSE.DEMAND_DATA_RD.ANY_RESPONSE",
+        "EventName": "OFFCORE_RESPONSE.ALL_PF_DATA_RD.L3_HIT.SNOOP_HIT_WITH_FWD",
         "MSRIndex": "0x1a6,0x1a7",
-        "MSRValue": "0x0000010001",
+        "MSRValue": "0x08003C0490",
         "Offcore": "1",
         "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
         "SampleAfterValue": "100003",
         "UMask": "0x1"
     },
     {
-        "BriefDescription": "Counts all prefetch (that bring data to L2) RFOs that hit in the L3.",
+        "BriefDescription": "Counts prefetch RFOs that have any response type.",
         "Counter": "0,1,2,3",
         "CounterHTOff": "0,1,2,3",
         "EventCode": "0xB7, 0xBB",
-        "EventName": "OFFCORE_RESPONSE.PF_L2_RFO.L3_HIT.ANY_SNOOP",
+        "EventName": "OFFCORE_RESPONSE.ALL_PF_RFO.ANY_RESPONSE",
         "MSRIndex": "0x1a6,0x1a7",
-        "MSRValue": "0x3F803C0020",
+        "MSRValue": "0x0000010120",
         "Offcore": "1",
         "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
         "SampleAfterValue": "100003",
         "UMask": "0x1"
     },
     {
-        "BriefDescription": "L1D data line replacements",
+        "BriefDescription": "Counts prefetch RFOs that hit in the L3.",
         "Counter": "0,1,2,3",
-        "CounterHTOff": "0,1,2,3,4,5,6,7",
-        "EventCode": "0x51",
-        "EventName": "L1D.REPLACEMENT",
-        "PublicDescription": "Counts L1D data line replacements including opportunistic replacements, and replacements that require stall-for-replace or block-for-replace.",
-        "SampleAfterValue": "2000003",
+        "CounterHTOff": "0,1,2,3",
+        "EventCode": "0xB7, 0xBB",
+        "EventName": "OFFCORE_RESPONSE.ALL_PF_RFO.L3_HIT.ANY_SNOOP",
+        "MSRIndex": "0x1a6,0x1a7",
+        "MSRValue": "0x3F803C0120",
+        "Offcore": "1",
+        "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
+        "SampleAfterValue": "100003",
         "UMask": "0x1"
     },
     {
@@ -783,47 +922,43 @@
         "UMask": "0x1"
     },
     {
-        "BriefDescription": "Retired instructions with at least 1 uncacheable load or lock.",
+        "BriefDescription": "Counts prefetch RFOs that hit in the L3 and the snoop to one of the sibling cores hits the line in M state and the line is forwarded.",
         "Counter": "0,1,2,3",
         "CounterHTOff": "0,1,2,3",
-        "Data_LA": "1",
-        "EventCode": "0xD4",
-        "EventName": "MEM_LOAD_MISC_RETIRED.UC",
-        "PEBS": "1",
-        "SampleAfterValue": "100007",
-        "UMask": "0x4"
+        "EventCode": "0xB7, 0xBB",
+        "EventName": "OFFCORE_RESPONSE.ALL_PF_RFO.L3_HIT.HIT_OTHER_CORE_NO_FWD",
+        "MSRIndex": "0x1a6,0x1a7",
+        "MSRValue": "0x04003C0120",
+        "Offcore": "1",
+        "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
+        "SampleAfterValue": "100003",
+        "UMask": "0x1"
     },
     {
-        "BriefDescription": "Retired load instructions which data sources were load missed L1 but hit FB due to preceding miss to the same cache line with data not ready",
+        "BriefDescription": "Counts prefetch RFOs that hit in the L3 and sibling core snoops are not needed as either the core-valid bit is not set or the shared line is present in multiple cores.",
         "Counter": "0,1,2,3",
         "CounterHTOff": "0,1,2,3",
-        "Data_LA": "1",
-        "EventCode": "0xD1",
-        "EventName": "MEM_LOAD_RETIRED.FB_HIT",
-        "PEBS": "1",
-        "PublicDescription": "Counts retired load instructions with at least one uop was load missed in L1 but hit FB (Fill Buffers) due to preceding miss to the same cache line with data not ready.",
-        "SampleAfterValue": "100007",
-        "UMask": "0x40"
-    },
-    {
-        "BriefDescription": "This event is deprecated. Refer to new event L2_LINES_OUT.USELESS_HWPF",
-        "Counter": "0,1,2,3",
-        "CounterHTOff": "0,1,2,3,4,5,6,7",
-        "Deprecated": "1",
-        "EventCode": "0xF2",
-        "EventName": "L2_LINES_OUT.USELESS_PREF",
-        "SampleAfterValue": "200003",
-        "UMask": "0x4"
+        "EventCode": "0xB7, 0xBB",
+        "EventName": "OFFCORE_RESPONSE.ALL_PF_RFO.L3_HIT.NO_SNOOP_NEEDED",
+        "MSRIndex": "0x1a6,0x1a7",
+        "MSRValue": "0x01003C0120",
+        "Offcore": "1",
+        "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
+        "SampleAfterValue": "100003",
+        "UMask": "0x1"
     },
     {
-        "BriefDescription": "Requests from the L1/L2/L3 hardware prefetchers or Load software prefetches that hit L2 cache",
+        "BriefDescription": "OFFCORE_RESPONSE.ALL_PF_RFO.L3_HIT.SNOOP_HIT_WITH_FWD",
         "Counter": "0,1,2,3",
-        "CounterHTOff": "0,1,2,3,4,5,6,7",
-        "EventCode": "0x24",
-        "EventName": "L2_RQSTS.PF_HIT",
-        "PublicDescription": "Counts requests from the L1/L2/L3 hardware prefetchers or Load software prefetches that hit L2 cache.",
-        "SampleAfterValue": "200003",
-        "UMask": "0xd8"
+        "CounterHTOff": "0,1,2,3",
+        "EventCode": "0xB7, 0xBB",
+        "EventName": "OFFCORE_RESPONSE.ALL_PF_RFO.L3_HIT.SNOOP_HIT_WITH_FWD",
+        "MSRIndex": "0x1a6,0x1a7",
+        "MSRValue": "0x08003C0120",
+        "Offcore": "1",
+        "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
+        "SampleAfterValue": "100003",
+        "UMask": "0x1"
     },
     {
         "BriefDescription": "Counts all demand & prefetch RFOs that have any response type.",
@@ -839,14 +974,17 @@
         "UMask": "0x1"
     },
     {
-        "BriefDescription": "Demand Data Read miss L2, no rejects",
+        "BriefDescription": "Counts all demand & prefetch RFOs that hit in the L3.",
         "Counter": "0,1,2,3",
-        "CounterHTOff": "0,1,2,3,4,5,6,7",
-        "EventCode": "0x24",
-        "EventName": "L2_RQSTS.DEMAND_DATA_RD_MISS",
-        "PublicDescription": "Counts the number of demand Data Read requests that miss L2 cache. Only not rejected loads are counted.",
-        "SampleAfterValue": "200003",
-        "UMask": "0x21"
+        "CounterHTOff": "0,1,2,3",
+        "EventCode": "0xB7, 0xBB",
+        "EventName": "OFFCORE_RESPONSE.ALL_RFO.L3_HIT.ANY_SNOOP",
+        "MSRIndex": "0x1a6,0x1a7",
+        "MSRValue": "0x3F803C0122",
+        "Offcore": "1",
+        "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
+        "SampleAfterValue": "100003",
+        "UMask": "0x1"
     },
     {
         "BriefDescription": "Counts all demand & prefetch RFOs that hit in the L3 and the snoop to one of the sibling cores hits the line in M state and the line is forwarded.",
@@ -862,293 +1000,247 @@
         "UMask": "0x1"
     },
     {
-        "BriefDescription": "Counts all prefetch (that bring data to LLC only) RFOs that have any response type.",
+        "BriefDescription": "Counts all demand & prefetch RFOs that hit in the L3 and the snoop to one of the sibling cores hits the line in M state and the line is forwarded.",
         "Counter": "0,1,2,3",
         "CounterHTOff": "0,1,2,3",
         "EventCode": "0xB7, 0xBB",
-        "EventName": "OFFCORE_RESPONSE.PF_L3_RFO.ANY_RESPONSE",
+        "EventName": "OFFCORE_RESPONSE.ALL_RFO.L3_HIT.HIT_OTHER_CORE_NO_FWD",
         "MSRIndex": "0x1a6,0x1a7",
-        "MSRValue": "0x0000010100",
+        "MSRValue": "0x04003C0122",
         "Offcore": "1",
         "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
         "SampleAfterValue": "100003",
         "UMask": "0x1"
     },
     {
-        "BriefDescription": "Retired load instructions which data sources were hits in L3 without snoops required",
+        "BriefDescription": "Counts all demand & prefetch RFOs that hit in the L3 and sibling core snoops are not needed as either the core-valid bit is not set or the shared line is present in multiple cores.",
         "Counter": "0,1,2,3",
         "CounterHTOff": "0,1,2,3",
-        "Data_LA": "1",
-        "EventCode": "0xD2",
-        "EventName": "MEM_LOAD_L3_HIT_RETIRED.XSNP_NONE",
-        "PEBS": "1",
-        "PublicDescription": "Retired load instructions which data sources were hits in L3 without snoops required.",
+        "EventCode": "0xB7, 0xBB",
+        "EventName": "OFFCORE_RESPONSE.ALL_RFO.L3_HIT.NO_SNOOP_NEEDED",
+        "MSRIndex": "0x1a6,0x1a7",
+        "MSRValue": "0x01003C0122",
+        "Offcore": "1",
+        "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
         "SampleAfterValue": "100003",
-        "UMask": "0x8"
+        "UMask": "0x1"
     },
     {
-        "BriefDescription": "Counts all prefetch (that bring data to LLC only) data reads that hit in the L3 and the snoop to one of the sibling cores hits the line in M state and the line is forwarded.",
+        "BriefDescription": "OFFCORE_RESPONSE.ALL_RFO.L3_HIT.SNOOP_HIT_WITH_FWD",
         "Counter": "0,1,2,3",
         "CounterHTOff": "0,1,2,3",
         "EventCode": "0xB7, 0xBB",
-        "EventName": "OFFCORE_RESPONSE.PF_L3_DATA_RD.L3_HIT.HIT_OTHER_CORE_NO_FWD",
+        "EventName": "OFFCORE_RESPONSE.ALL_RFO.L3_HIT.SNOOP_HIT_WITH_FWD",
         "MSRIndex": "0x1a6,0x1a7",
-        "MSRValue": "0x04003C0080",
+        "MSRValue": "0x08003C0122",
         "Offcore": "1",
         "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
         "SampleAfterValue": "100003",
         "UMask": "0x1"
     },
     {
-        "BriefDescription": "Counts all demand & prefetch data reads that hit in the L3 and sibling core snoops are not needed as either the core-valid bit is not set or the shared line is present in multiple cores.",
+        "BriefDescription": "Counts all demand code reads that have any response type.",
         "Counter": "0,1,2,3",
         "CounterHTOff": "0,1,2,3",
         "EventCode": "0xB7, 0xBB",
-        "EventName": "OFFCORE_RESPONSE.ALL_DATA_RD.L3_HIT.NO_SNOOP_NEEDED",
+        "EventName": "OFFCORE_RESPONSE.DEMAND_CODE_RD.ANY_RESPONSE",
         "MSRIndex": "0x1a6,0x1a7",
-        "MSRValue": "0x01003C0491",
+        "MSRValue": "0x0000010004",
         "Offcore": "1",
         "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
         "SampleAfterValue": "100003",
         "UMask": "0x1"
     },
     {
-        "BriefDescription": "All retired load instructions.",
+        "BriefDescription": "Counts all demand code reads that hit in the L3.",
         "Counter": "0,1,2,3",
         "CounterHTOff": "0,1,2,3",
-        "Data_LA": "1",
-        "EventCode": "0xD0",
-        "EventName": "MEM_INST_RETIRED.ALL_LOADS",
-        "PEBS": "1",
-        "SampleAfterValue": "2000003",
-        "UMask": "0x81"
+        "EventCode": "0xB7, 0xBB",
+        "EventName": "OFFCORE_RESPONSE.DEMAND_CODE_RD.L3_HIT.ANY_SNOOP",
+        "MSRIndex": "0x1a6,0x1a7",
+        "MSRValue": "0x3F803C0004",
+        "Offcore": "1",
+        "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
+        "SampleAfterValue": "100003",
+        "UMask": "0x1"
     },
     {
-        "BriefDescription": "Retired load instructions which data sources were L3 hit and cross-core snoop missed in on-pkg core cache.",
+        "BriefDescription": "Counts all demand code reads that hit in the L3 and the snoop to one of the sibling cores hits the line in M state and the line is forwarded.",
         "Counter": "0,1,2,3",
         "CounterHTOff": "0,1,2,3",
-        "Data_LA": "1",
-        "EventCode": "0xD2",
-        "EventName": "MEM_LOAD_L3_HIT_RETIRED.XSNP_MISS",
-        "PEBS": "1",
-        "SampleAfterValue": "20011",
+        "EventCode": "0xB7, 0xBB",
+        "EventName": "OFFCORE_RESPONSE.DEMAND_CODE_RD.L3_HIT.HITM_OTHER_CORE",
+        "MSRIndex": "0x1a6,0x1a7",
+        "MSRValue": "0x10003C0004",
+        "Offcore": "1",
+        "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
+        "SampleAfterValue": "100003",
         "UMask": "0x1"
     },
     {
-        "BriefDescription": "Demand Data Read requests",
-        "Counter": "0,1,2,3",
-        "CounterHTOff": "0,1,2,3,4,5,6,7",
-        "EventCode": "0x24",
-        "EventName": "L2_RQSTS.ALL_DEMAND_DATA_RD",
-        "PublicDescription": "Counts the number of demand Data Read requests (including requests from L1D hardware prefetchers). These loads may hit or miss L2 cache. Only non rejected loads are counted.",
-        "SampleAfterValue": "200003",
-        "UMask": "0xe1"
-    },
-    {
-        "BriefDescription": "All L2 requests",
-        "Counter": "0,1,2,3",
-        "CounterHTOff": "0,1,2,3,4,5,6,7",
-        "EventCode": "0x24",
-        "EventName": "L2_RQSTS.REFERENCES",
-        "PublicDescription": "All L2 requests.",
-        "SampleAfterValue": "200003",
-        "UMask": "0xff"
-    },
-    {
-        "BriefDescription": "Cycles with L1D load Misses outstanding.",
+        "BriefDescription": "Counts all demand code reads that hit in the L3 and the snoop to one of the sibling cores hits the line in M state and the line is forwarded.",
         "Counter": "0,1,2,3",
-        "CounterHTOff": "0,1,2,3,4,5,6,7",
-        "CounterMask": "1",
-        "EventCode": "0x48",
-        "EventName": "L1D_PEND_MISS.PENDING_CYCLES",
-        "PublicDescription": "Counts duration of L1D miss outstanding in cycles.",
-        "SampleAfterValue": "2000003",
+        "CounterHTOff": "0,1,2,3",
+        "EventCode": "0xB7, 0xBB",
+        "EventName": "OFFCORE_RESPONSE.DEMAND_CODE_RD.L3_HIT.HIT_OTHER_CORE_NO_FWD",
+        "MSRIndex": "0x1a6,0x1a7",
+        "MSRValue": "0x04003C0004",
+        "Offcore": "1",
+        "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
+        "SampleAfterValue": "100003",
         "UMask": "0x1"
     },
     {
-        "BriefDescription": "Cycles with offcore outstanding demand rfo reads transactions in SuperQueue (SQ), queue to uncore.",
-        "Counter": "0,1,2,3",
-        "CounterHTOff": "0,1,2,3,4,5,6,7",
-        "CounterMask": "1",
-        "EventCode": "0x60",
-        "EventName": "OFFCORE_REQUESTS_OUTSTANDING.CYCLES_WITH_DEMAND_RFO",
-        "PublicDescription": "Counts the number of offcore outstanding demand rfo Reads transactions in the super queue every cycle. The 'Offcore outstanding' state of the transaction lasts from the L2 miss until the sending transaction completion to requestor (SQ deallocation). See the corresponding Umask under OFFCORE_REQUESTS.",
-        "SampleAfterValue": "2000003",
-        "UMask": "0x4"
-    },
-    {
-        "BriefDescription": "Counts all prefetch (that bring data to LLC only) RFOs that hit in the L3 and sibling core snoops are not needed as either the core-valid bit is not set or the shared line is present in multiple cores.",
+        "BriefDescription": "Counts all demand code reads that hit in the L3 and sibling core snoops are not needed as either the core-valid bit is not set or the shared line is present in multiple cores.",
         "Counter": "0,1,2,3",
         "CounterHTOff": "0,1,2,3",
         "EventCode": "0xB7, 0xBB",
-        "EventName": "OFFCORE_RESPONSE.PF_L3_RFO.L3_HIT.NO_SNOOP_NEEDED",
+        "EventName": "OFFCORE_RESPONSE.DEMAND_CODE_RD.L3_HIT.NO_SNOOP_NEEDED",
         "MSRIndex": "0x1a6,0x1a7",
-        "MSRValue": "0x01003C0100",
+        "MSRValue": "0x01003C0004",
         "Offcore": "1",
         "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
         "SampleAfterValue": "100003",
         "UMask": "0x1"
     },
     {
-        "BriefDescription": "Number of cache line split locks sent to uncore.",
+        "BriefDescription": "OFFCORE_RESPONSE.DEMAND_CODE_RD.L3_HIT.SNOOP_HIT_WITH_FWD",
         "Counter": "0,1,2,3",
-        "CounterHTOff": "0,1,2,3,4,5,6,7",
-        "EventCode": "0xF4",
-        "EventName": "SQ_MISC.SPLIT_LOCK",
-        "PublicDescription": "Counts the number of cache line split locks sent to the uncore.",
+        "CounterHTOff": "0,1,2,3",
+        "EventCode": "0xB7, 0xBB",
+        "EventName": "OFFCORE_RESPONSE.DEMAND_CODE_RD.L3_HIT.SNOOP_HIT_WITH_FWD",
+        "MSRIndex": "0x1a6,0x1a7",
+        "MSRValue": "0x08003C0004",
+        "Offcore": "1",
+        "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
         "SampleAfterValue": "100003",
-        "UMask": "0x10"
+        "UMask": "0x1"
     },
     {
-        "BriefDescription": "Counts all demand data writes (RFOs) that hit in the L3.",
+        "BriefDescription": "Counts demand data reads that have any response type.",
         "Counter": "0,1,2,3",
         "CounterHTOff": "0,1,2,3",
         "EventCode": "0xB7, 0xBB",
-        "EventName": "OFFCORE_RESPONSE.DEMAND_RFO.L3_HIT.ANY_SNOOP",
+        "EventName": "OFFCORE_RESPONSE.DEMAND_DATA_RD.ANY_RESPONSE",
         "MSRIndex": "0x1a6,0x1a7",
-        "MSRValue": "0x3F803C0002",
+        "MSRValue": "0x0000010001",
         "Offcore": "1",
         "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
         "SampleAfterValue": "100003",
         "UMask": "0x1"
     },
     {
-        "BriefDescription": "Counts all prefetch (that bring data to LLC only) RFOs that hit in the L3 and the snoop to one of the sibling cores hits the line in M state and the line is forwarded.",
+        "BriefDescription": "Counts demand data reads that hit in the L3.",
         "Counter": "0,1,2,3",
         "CounterHTOff": "0,1,2,3",
         "EventCode": "0xB7, 0xBB",
-        "EventName": "OFFCORE_RESPONSE.PF_L3_RFO.L3_HIT.HIT_OTHER_CORE_NO_FWD",
+        "EventName": "OFFCORE_RESPONSE.DEMAND_DATA_RD.L3_HIT.ANY_SNOOP",
         "MSRIndex": "0x1a6,0x1a7",
-        "MSRValue": "0x04003C0100",
+        "MSRValue": "0x3F803C0001",
         "Offcore": "1",
         "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
         "SampleAfterValue": "100003",
         "UMask": "0x1"
     },
     {
-        "BriefDescription": "L2 cache hits when fetching instructions, code reads.",
-        "Counter": "0,1,2,3",
-        "CounterHTOff": "0,1,2,3,4,5,6,7",
-        "EventCode": "0x24",
-        "EventName": "L2_RQSTS.CODE_RD_HIT",
-        "PublicDescription": "Counts L2 cache hits when fetching instructions, code reads.",
-        "SampleAfterValue": "200003",
-        "UMask": "0xc4"
-    },
-    {
-        "BriefDescription": "Counts L1 data cache hardware prefetch requests and software prefetch requests that hit in the L3.",
+        "BriefDescription": "Counts demand data reads that hit in the L3 and the snoop to one of the sibling cores hits the line in M state and the line is forwarded.",
         "Counter": "0,1,2,3",
         "CounterHTOff": "0,1,2,3",
         "EventCode": "0xB7, 0xBB",
-        "EventName": "OFFCORE_RESPONSE.PF_L1D_AND_SW.L3_HIT.ANY_SNOOP",
+        "EventName": "OFFCORE_RESPONSE.DEMAND_DATA_RD.L3_HIT.HITM_OTHER_CORE",
         "MSRIndex": "0x1a6,0x1a7",
-        "MSRValue": "0x3F803C0400",
+        "MSRValue": "0x10003C0001",
         "Offcore": "1",
         "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
         "SampleAfterValue": "100003",
         "UMask": "0x1"
     },
     {
-        "BriefDescription": "Counts prefetch (that bring data to L2) data reads that have any response type.",
+        "BriefDescription": "Counts demand data reads that hit in the L3 and the snoop to one of the sibling cores hits the line in M state and the line is forwarded.",
         "Counter": "0,1,2,3",
         "CounterHTOff": "0,1,2,3",
         "EventCode": "0xB7, 0xBB",
-        "EventName": "OFFCORE_RESPONSE.PF_L2_DATA_RD.ANY_RESPONSE",
+        "EventName": "OFFCORE_RESPONSE.DEMAND_DATA_RD.L3_HIT.HIT_OTHER_CORE_NO_FWD",
         "MSRIndex": "0x1a6,0x1a7",
-        "MSRValue": "0x0000010010",
+        "MSRValue": "0x04003C0001",
         "Offcore": "1",
         "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
         "SampleAfterValue": "100003",
         "UMask": "0x1"
     },
     {
-        "BriefDescription": "Retired load instructions with L2 cache hits as data sources",
+        "BriefDescription": "Counts demand data reads that hit in the L3 and sibling core snoops are not needed as either the core-valid bit is not set or the shared line is present in multiple cores.",
         "Counter": "0,1,2,3",
         "CounterHTOff": "0,1,2,3",
-        "Data_LA": "1",
-        "EventCode": "0xD1",
-        "EventName": "MEM_LOAD_RETIRED.L2_HIT",
-        "PEBS": "1",
-        "PublicDescription": "Retired load instructions with L2 cache hits as data sources.",
+        "EventCode": "0xB7, 0xBB",
+        "EventName": "OFFCORE_RESPONSE.DEMAND_DATA_RD.L3_HIT.NO_SNOOP_NEEDED",
+        "MSRIndex": "0x1a6,0x1a7",
+        "MSRValue": "0x01003C0001",
+        "Offcore": "1",
+        "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
         "SampleAfterValue": "100003",
-        "UMask": "0x2"
-    },
-    {
-        "BriefDescription": "RFO requests that hit L2 cache",
-        "Counter": "0,1,2,3",
-        "CounterHTOff": "0,1,2,3,4,5,6,7",
-        "EventCode": "0x24",
-        "EventName": "L2_RQSTS.RFO_HIT",
-        "PublicDescription": "Counts the RFO (Read-for-Ownership) requests that hit L2 cache.",
-        "SampleAfterValue": "200003",
-        "UMask": "0xc2"
+        "UMask": "0x1"
     },
     {
-        "BriefDescription": "Counts all prefetch (that bring data to LLC only) data reads that hit in the L3 and sibling core snoops are not needed as either the core-valid bit is not set or the shared line is present in multiple cores.",
+        "BriefDescription": "OFFCORE_RESPONSE.DEMAND_DATA_RD.L3_HIT.SNOOP_HIT_WITH_FWD",
         "Counter": "0,1,2,3",
         "CounterHTOff": "0,1,2,3",
         "EventCode": "0xB7, 0xBB",
-        "EventName": "OFFCORE_RESPONSE.PF_L3_DATA_RD.L3_HIT.NO_SNOOP_NEEDED",
+        "EventName": "OFFCORE_RESPONSE.DEMAND_DATA_RD.L3_HIT.SNOOP_HIT_WITH_FWD",
         "MSRIndex": "0x1a6,0x1a7",
-        "MSRValue": "0x01003C0080",
+        "MSRValue": "0x08003C0001",
         "Offcore": "1",
         "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
         "SampleAfterValue": "100003",
         "UMask": "0x1"
     },
     {
-        "BriefDescription": "L1D miss outstandings duration in cycles",
+        "BriefDescription": "Counts all demand data writes (RFOs) that have any response type.",
         "Counter": "0,1,2,3",
-        "CounterHTOff": "0,1,2,3,4,5,6,7",
-        "EventCode": "0x48",
-        "EventName": "L1D_PEND_MISS.PENDING",
-        "PublicDescription": "Counts duration of L1D miss outstanding, that is each cycle number of Fill Buffers (FB) outstanding required by Demand Reads. FB either is held by demand loads, or it is held by non-demand loads and gets hit at least once by demand. The valid outstanding interval is defined until the FB deallocation by one of the following ways: from FB allocation, if FB is allocated by demand from the demand Hit FB, if it is allocated by hardware or software prefetch.Note: In the L1D, a Demand Read contains cacheable or noncacheable demand loads, including ones causing cache-line splits and reads due to page walks resulted from any request type.",
-        "SampleAfterValue": "2000003",
+        "CounterHTOff": "0,1,2,3",
+        "EventCode": "0xB7, 0xBB",
+        "EventName": "OFFCORE_RESPONSE.DEMAND_RFO.ANY_RESPONSE",
+        "MSRIndex": "0x1a6,0x1a7",
+        "MSRValue": "0x0000010002",
+        "Offcore": "1",
+        "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
+        "SampleAfterValue": "100003",
         "UMask": "0x1"
     },
     {
-        "BriefDescription": "OFFCORE_RESPONSE.ALL_DATA_RD.L3_HIT.SNOOP_HIT_WITH_FWD",
+        "BriefDescription": "Counts all demand data writes (RFOs) that hit in the L3.",
         "Counter": "0,1,2,3",
         "CounterHTOff": "0,1,2,3",
         "EventCode": "0xB7, 0xBB",
-        "EventName": "OFFCORE_RESPONSE.ALL_DATA_RD.L3_HIT.SNOOP_HIT_WITH_FWD",
+        "EventName": "OFFCORE_RESPONSE.DEMAND_RFO.L3_HIT.ANY_SNOOP",
         "MSRIndex": "0x1a6,0x1a7",
-        "MSRValue": "0x08003C0491",
+        "MSRValue": "0x3F803C0002",
         "Offcore": "1",
         "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
         "SampleAfterValue": "100003",
         "UMask": "0x1"
     },
     {
-        "BriefDescription": "Demand Data Read requests that hit L2 cache",
-        "Counter": "0,1,2,3",
-        "CounterHTOff": "0,1,2,3,4,5,6,7",
-        "EventCode": "0x24",
-        "EventName": "L2_RQSTS.DEMAND_DATA_RD_HIT",
-        "PublicDescription": "Counts the number of demand Data Read requests, initiated by load instructions, that hit L2 cache",
-        "SampleAfterValue": "200003",
-        "UMask": "0xc1"
-    },
-    {
-        "BriefDescription": "Retired load instructions which data sources were HitM responses from shared L3",
+        "BriefDescription": "Counts all demand data writes (RFOs) that hit in the L3 and the snoop to one of the sibling cores hits the line in M state and the line is forwarded.",
         "Counter": "0,1,2,3",
         "CounterHTOff": "0,1,2,3",
-        "Data_LA": "1",
-        "EventCode": "0xD2",
-        "EventName": "MEM_LOAD_L3_HIT_RETIRED.XSNP_HITM",
-        "PEBS": "1",
-        "PublicDescription": "Retired load instructions which data sources were HitM responses from shared L3.",
-        "SampleAfterValue": "20011",
-        "UMask": "0x4"
+        "EventCode": "0xB7, 0xBB",
+        "EventName": "OFFCORE_RESPONSE.DEMAND_RFO.L3_HIT.HITM_OTHER_CORE",
+        "MSRIndex": "0x1a6,0x1a7",
+        "MSRValue": "0x10003C0002",
+        "Offcore": "1",
+        "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
+        "SampleAfterValue": "100003",
+        "UMask": "0x1"
     },
     {
-        "BriefDescription": "Counts demand data reads that hit in the L3 and the snoop to one of the sibling cores hits the line in M state and the line is forwarded.",
+        "BriefDescription": "Counts all demand data writes (RFOs) that hit in the L3 and the snoop to one of the sibling cores hits the line in M state and the line is forwarded.",
         "Counter": "0,1,2,3",
         "CounterHTOff": "0,1,2,3",
         "EventCode": "0xB7, 0xBB",
-        "EventName": "OFFCORE_RESPONSE.DEMAND_DATA_RD.L3_HIT.HIT_OTHER_CORE_NO_FWD",
+        "EventName": "OFFCORE_RESPONSE.DEMAND_RFO.L3_HIT.HIT_OTHER_CORE_NO_FWD",
         "MSRIndex": "0x1a6,0x1a7",
-        "MSRValue": "0x04003C0001",
+        "MSRValue": "0x04003C0002",
         "Offcore": "1",
         "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
         "SampleAfterValue": "100003",
@@ -1168,508 +1260,416 @@
         "UMask": "0x1"
     },
     {
-        "BriefDescription": "OFFCORE_RESPONSE.PF_L3_DATA_RD.L3_HIT.SNOOP_HIT_WITH_FWD",
+        "BriefDescription": "OFFCORE_RESPONSE.DEMAND_RFO.L3_HIT.SNOOP_HIT_WITH_FWD",
         "Counter": "0,1,2,3",
         "CounterHTOff": "0,1,2,3",
         "EventCode": "0xB7, 0xBB",
-        "EventName": "OFFCORE_RESPONSE.PF_L3_DATA_RD.L3_HIT.SNOOP_HIT_WITH_FWD",
+        "EventName": "OFFCORE_RESPONSE.DEMAND_RFO.L3_HIT.SNOOP_HIT_WITH_FWD",
         "MSRIndex": "0x1a6,0x1a7",
-        "MSRValue": "0x08003C0080",
+        "MSRValue": "0x08003C0002",
         "Offcore": "1",
         "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
         "SampleAfterValue": "100003",
         "UMask": "0x1"
     },
     {
-        "BriefDescription": "Counts prefetch (that bring data to L2) data reads that hit in the L3 and sibling core snoops are not needed as either the core-valid bit is not set or the shared line is present in multiple cores.",
+        "BriefDescription": "Counts L1 data cache hardware prefetch requests and software prefetch requests that have any response type.",
         "Counter": "0,1,2,3",
         "CounterHTOff": "0,1,2,3",
         "EventCode": "0xB7, 0xBB",
-        "EventName": "OFFCORE_RESPONSE.PF_L2_DATA_RD.L3_HIT.NO_SNOOP_NEEDED",
+        "EventName": "OFFCORE_RESPONSE.PF_L1D_AND_SW.ANY_RESPONSE",
         "MSRIndex": "0x1a6,0x1a7",
-        "MSRValue": "0x01003C0010",
+        "MSRValue": "0x0000010400",
         "Offcore": "1",
         "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
         "SampleAfterValue": "100003",
         "UMask": "0x1"
     },
     {
-        "BriefDescription": "Retired store instructions that split across a cacheline boundary.",
+        "BriefDescription": "Counts L1 data cache hardware prefetch requests and software prefetch requests that hit in the L3.",
         "Counter": "0,1,2,3",
         "CounterHTOff": "0,1,2,3",
-        "Data_LA": "1",
-        "EventCode": "0xD0",
-        "EventName": "MEM_INST_RETIRED.SPLIT_STORES",
-        "L1_Hit_Indication": "1",
-        "PEBS": "1",
-        "PublicDescription": "Counts retired store instructions that split across a cacheline boundary.",
+        "EventCode": "0xB7, 0xBB",
+        "EventName": "OFFCORE_RESPONSE.PF_L1D_AND_SW.L3_HIT.ANY_SNOOP",
+        "MSRIndex": "0x1a6,0x1a7",
+        "MSRValue": "0x3F803C0400",
+        "Offcore": "1",
+        "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
         "SampleAfterValue": "100003",
-        "UMask": "0x42"
+        "UMask": "0x1"
     },
     {
-        "BriefDescription": "Counts prefetch (that bring data to L2) data reads that hit in the L3 and the snoop to one of the sibling cores hits the line in M state and the line is forwarded.",
+        "BriefDescription": "Counts L1 data cache hardware prefetch requests and software prefetch requests that hit in the L3 and the snoop to one of the sibling cores hits the line in M state and the line is forwarded.",
         "Counter": "0,1,2,3",
         "CounterHTOff": "0,1,2,3",
         "EventCode": "0xB7, 0xBB",
-        "EventName": "OFFCORE_RESPONSE.PF_L2_DATA_RD.L3_HIT.HITM_OTHER_CORE",
+        "EventName": "OFFCORE_RESPONSE.PF_L1D_AND_SW.L3_HIT.HITM_OTHER_CORE",
         "MSRIndex": "0x1a6,0x1a7",
-        "MSRValue": "0x10003C0010",
+        "MSRValue": "0x10003C0400",
         "Offcore": "1",
         "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
         "SampleAfterValue": "100003",
         "UMask": "0x1"
     },
     {
-        "BriefDescription": "Any memory transaction that reached the SQ.",
-        "Counter": "0,1,2,3",
-        "CounterHTOff": "0,1,2,3,4,5,6,7",
-        "EventCode": "0xB0",
-        "EventName": "OFFCORE_REQUESTS.ALL_REQUESTS",
-        "PublicDescription": "Counts memory transactions reached the super queue including requests initiated by the core, all L3 prefetches, page walks, etc..",
-        "SampleAfterValue": "100003",
-        "UMask": "0x80"
-    },
-    {
-        "BriefDescription": "Cacheable and noncachaeble code read requests",
-        "Counter": "0,1,2,3",
-        "CounterHTOff": "0,1,2,3,4,5,6,7",
-        "EventCode": "0xB0",
-        "EventName": "OFFCORE_REQUESTS.DEMAND_CODE_RD",
-        "PublicDescription": "Counts both cacheable and non-cacheable code read requests.",
-        "SampleAfterValue": "100003",
-        "UMask": "0x2"
-    },
-    {
-        "BriefDescription": "Counts all prefetch (that bring data to LLC only) RFOs that hit in the L3 and the snoop to one of the sibling cores hits the line in M state and the line is forwarded.",
+        "BriefDescription": "Counts L1 data cache hardware prefetch requests and software prefetch requests that hit in the L3 and the snoop to one of the sibling cores hits the line in M state and the line is forwarded.",
         "Counter": "0,1,2,3",
         "CounterHTOff": "0,1,2,3",
         "EventCode": "0xB7, 0xBB",
-        "EventName": "OFFCORE_RESPONSE.PF_L3_RFO.L3_HIT.HITM_OTHER_CORE",
+        "EventName": "OFFCORE_RESPONSE.PF_L1D_AND_SW.L3_HIT.HIT_OTHER_CORE_NO_FWD",
         "MSRIndex": "0x1a6,0x1a7",
-        "MSRValue": "0x10003C0100",
+        "MSRValue": "0x04003C0400",
         "Offcore": "1",
         "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
         "SampleAfterValue": "100003",
         "UMask": "0x1"
     },
     {
-        "BriefDescription": "OFFCORE_RESPONSE.DEMAND_CODE_RD.L3_HIT.SNOOP_HIT_WITH_FWD",
+        "BriefDescription": "Counts L1 data cache hardware prefetch requests and software prefetch requests that hit in the L3 and sibling core snoops are not needed as either the core-valid bit is not set or the shared line is present in multiple cores.",
         "Counter": "0,1,2,3",
         "CounterHTOff": "0,1,2,3",
         "EventCode": "0xB7, 0xBB",
-        "EventName": "OFFCORE_RESPONSE.DEMAND_CODE_RD.L3_HIT.SNOOP_HIT_WITH_FWD",
+        "EventName": "OFFCORE_RESPONSE.PF_L1D_AND_SW.L3_HIT.NO_SNOOP_NEEDED",
         "MSRIndex": "0x1a6,0x1a7",
-        "MSRValue": "0x08003C0004",
+        "MSRValue": "0x01003C0400",
         "Offcore": "1",
         "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
         "SampleAfterValue": "100003",
         "UMask": "0x1"
     },
     {
-        "BriefDescription": "Cycles when offcore outstanding Demand Data Read transactions are present in SuperQueue (SQ), queue to uncore",
-        "Counter": "0,1,2,3",
-        "CounterHTOff": "0,1,2,3,4,5,6,7",
-        "CounterMask": "1",
-        "EventCode": "0x60",
-        "EventName": "OFFCORE_REQUESTS_OUTSTANDING.CYCLES_WITH_DEMAND_DATA_RD",
-        "PublicDescription": "Counts cycles when offcore outstanding Demand Data Read transactions are present in the super queue (SQ). A transaction is considered to be in the Offcore outstanding state between L2 miss and transaction completion sent to requestor (SQ de-allocation).",
-        "SampleAfterValue": "2000003",
-        "UMask": "0x1"
-    },
-    {
-        "BriefDescription": "Counts L1 data cache hardware prefetch requests and software prefetch requests that have any response type.",
+        "BriefDescription": "OFFCORE_RESPONSE.PF_L1D_AND_SW.L3_HIT.SNOOP_HIT_WITH_FWD",
         "Counter": "0,1,2,3",
         "CounterHTOff": "0,1,2,3",
         "EventCode": "0xB7, 0xBB",
-        "EventName": "OFFCORE_RESPONSE.PF_L1D_AND_SW.ANY_RESPONSE",
+        "EventName": "OFFCORE_RESPONSE.PF_L1D_AND_SW.L3_HIT.SNOOP_HIT_WITH_FWD",
         "MSRIndex": "0x1a6,0x1a7",
-        "MSRValue": "0x0000010400",
+        "MSRValue": "0x08003C0400",
         "Offcore": "1",
         "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
         "SampleAfterValue": "100003",
         "UMask": "0x1"
     },
     {
-        "BriefDescription": "Counts all demand code reads that hit in the L3 and the snoop to one of the sibling cores hits the line in M state and the line is forwarded.",
+        "BriefDescription": "Counts prefetch (that bring data to L2) data reads that have any response type.",
         "Counter": "0,1,2,3",
         "CounterHTOff": "0,1,2,3",
         "EventCode": "0xB7, 0xBB",
-        "EventName": "OFFCORE_RESPONSE.DEMAND_CODE_RD.L3_HIT.HITM_OTHER_CORE",
+        "EventName": "OFFCORE_RESPONSE.PF_L2_DATA_RD.ANY_RESPONSE",
         "MSRIndex": "0x1a6,0x1a7",
-        "MSRValue": "0x10003C0004",
+        "MSRValue": "0x0000010010",
         "Offcore": "1",
         "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
         "SampleAfterValue": "100003",
         "UMask": "0x1"
     },
     {
-        "BriefDescription": "Counts all demand code reads that hit in the L3 and the snoop to one of the sibling cores hits the line in M state and the line is forwarded.",
+        "BriefDescription": "Counts prefetch (that bring data to L2) data reads that hit in the L3.",
         "Counter": "0,1,2,3",
         "CounterHTOff": "0,1,2,3",
         "EventCode": "0xB7, 0xBB",
-        "EventName": "OFFCORE_RESPONSE.DEMAND_CODE_RD.L3_HIT.HIT_OTHER_CORE_NO_FWD",
+        "EventName": "OFFCORE_RESPONSE.PF_L2_DATA_RD.L3_HIT.ANY_SNOOP",
         "MSRIndex": "0x1a6,0x1a7",
-        "MSRValue": "0x04003C0004",
+        "MSRValue": "0x3F803C0010",
         "Offcore": "1",
         "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
         "SampleAfterValue": "100003",
         "UMask": "0x1"
     },
     {
-        "BriefDescription": "Number of times a request needed a FB entry but there was no entry available for it. That is the FB unavailability was dominant reason for blocking the request. A request includes cacheable/uncacheable demands that is load, store or SW prefetch.",
-        "Counter": "0,1,2,3",
-        "CounterHTOff": "0,1,2,3,4,5,6,7",
-        "EventCode": "0x48",
-        "EventName": "L1D_PEND_MISS.FB_FULL",
-        "PublicDescription": "Number of times a request needed a FB (Fill Buffer) entry but there was no entry available for it. A request includes cacheable/uncacheable demands that are load, store or SW prefetch instructions.",
-        "SampleAfterValue": "2000003",
-        "UMask": "0x2"
-    },
-    {
-        "BriefDescription": "OFFCORE_RESPONSE.PF_L2_RFO.L3_HIT.SNOOP_HIT_WITH_FWD",
+        "BriefDescription": "Counts prefetch (that bring data to L2) data reads that hit in the L3 and the snoop to one of the sibling cores hits the line in M state and the line is forwarded.",
         "Counter": "0,1,2,3",
         "CounterHTOff": "0,1,2,3",
         "EventCode": "0xB7, 0xBB",
-        "EventName": "OFFCORE_RESPONSE.PF_L2_RFO.L3_HIT.SNOOP_HIT_WITH_FWD",
+        "EventName": "OFFCORE_RESPONSE.PF_L2_DATA_RD.L3_HIT.HITM_OTHER_CORE",
         "MSRIndex": "0x1a6,0x1a7",
-        "MSRValue": "0x08003C0020",
+        "MSRValue": "0x10003C0010",
         "Offcore": "1",
         "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
         "SampleAfterValue": "100003",
         "UMask": "0x1"
     },
     {
-        "BriefDescription": "Counts all demand code reads that hit in the L3.",
+        "BriefDescription": "Counts prefetch (that bring data to L2) data reads that hit in the L3 and the snoop to one of the sibling cores hits the line in M state and the line is forwarded.",
         "Counter": "0,1,2,3",
         "CounterHTOff": "0,1,2,3",
         "EventCode": "0xB7, 0xBB",
-        "EventName": "OFFCORE_RESPONSE.DEMAND_CODE_RD.L3_HIT.ANY_SNOOP",
+        "EventName": "OFFCORE_RESPONSE.PF_L2_DATA_RD.L3_HIT.HIT_OTHER_CORE_NO_FWD",
         "MSRIndex": "0x1a6,0x1a7",
-        "MSRValue": "0x3F803C0004",
+        "MSRValue": "0x04003C0010",
         "Offcore": "1",
         "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
         "SampleAfterValue": "100003",
         "UMask": "0x1"
     },
     {
-        "BriefDescription": "Counts prefetch RFOs that hit in the L3.",
+        "BriefDescription": "Counts prefetch (that bring data to L2) data reads that hit in the L3 and sibling core snoops are not needed as either the core-valid bit is not set or the shared line is present in multiple cores.",
         "Counter": "0,1,2,3",
         "CounterHTOff": "0,1,2,3",
         "EventCode": "0xB7, 0xBB",
-        "EventName": "OFFCORE_RESPONSE.ALL_PF_RFO.L3_HIT.ANY_SNOOP",
+        "EventName": "OFFCORE_RESPONSE.PF_L2_DATA_RD.L3_HIT.NO_SNOOP_NEEDED",
         "MSRIndex": "0x1a6,0x1a7",
-        "MSRValue": "0x3F803C0120",
+        "MSRValue": "0x01003C0010",
         "Offcore": "1",
         "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
         "SampleAfterValue": "100003",
         "UMask": "0x1"
     },
     {
-        "BriefDescription": "Counts all prefetch (that bring data to L2) RFOs that hit in the L3 and the snoop to one of the sibling cores hits the line in M state and the line is forwarded.",
+        "BriefDescription": "OFFCORE_RESPONSE.PF_L2_DATA_RD.L3_HIT.SNOOP_HIT_WITH_FWD",
         "Counter": "0,1,2,3",
         "CounterHTOff": "0,1,2,3",
         "EventCode": "0xB7, 0xBB",
-        "EventName": "OFFCORE_RESPONSE.PF_L2_RFO.L3_HIT.HIT_OTHER_CORE_NO_FWD",
+        "EventName": "OFFCORE_RESPONSE.PF_L2_DATA_RD.L3_HIT.SNOOP_HIT_WITH_FWD",
         "MSRIndex": "0x1a6,0x1a7",
-        "MSRValue": "0x04003C0020",
+        "MSRValue": "0x08003C0010",
         "Offcore": "1",
         "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
         "SampleAfterValue": "100003",
         "UMask": "0x1"
     },
     {
-        "BriefDescription": "OFFCORE_RESPONSE.PF_L1D_AND_SW.L3_HIT.SNOOP_HIT_WITH_FWD",
+        "BriefDescription": "Counts all prefetch (that bring data to L2) RFOs that have any response type.",
         "Counter": "0,1,2,3",
         "CounterHTOff": "0,1,2,3",
         "EventCode": "0xB7, 0xBB",
-        "EventName": "OFFCORE_RESPONSE.PF_L1D_AND_SW.L3_HIT.SNOOP_HIT_WITH_FWD",
+        "EventName": "OFFCORE_RESPONSE.PF_L2_RFO.ANY_RESPONSE",
         "MSRIndex": "0x1a6,0x1a7",
-        "MSRValue": "0x08003C0400",
+        "MSRValue": "0x0000010020",
         "Offcore": "1",
         "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
         "SampleAfterValue": "100003",
         "UMask": "0x1"
     },
     {
-        "BriefDescription": "Retired load instructions with L1 cache hits as data sources",
+        "BriefDescription": "Counts all prefetch (that bring data to L2) RFOs that hit in the L3.",
         "Counter": "0,1,2,3",
         "CounterHTOff": "0,1,2,3",
-        "Data_LA": "1",
-        "EventCode": "0xD1",
-        "EventName": "MEM_LOAD_RETIRED.L1_HIT",
-        "PEBS": "1",
-        "PublicDescription": "Counts retired load instructions with at least one uop that hit in the L1 data cache. This event includes all SW prefetches and lock instructions regardless of the data source.",
-        "SampleAfterValue": "2000003",
+        "EventCode": "0xB7, 0xBB",
+        "EventName": "OFFCORE_RESPONSE.PF_L2_RFO.L3_HIT.ANY_SNOOP",
+        "MSRIndex": "0x1a6,0x1a7",
+        "MSRValue": "0x3F803C0020",
+        "Offcore": "1",
+        "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
+        "SampleAfterValue": "100003",
         "UMask": "0x1"
     },
     {
-        "BriefDescription": "Cycles when offcore outstanding cacheable Core Data Read transactions are present in SuperQueue (SQ), queue to uncore.",
-        "Counter": "0,1,2,3",
-        "CounterHTOff": "0,1,2,3,4,5,6,7",
-        "CounterMask": "1",
-        "EventCode": "0x60",
-        "EventName": "OFFCORE_REQUESTS_OUTSTANDING.CYCLES_WITH_DATA_RD",
-        "PublicDescription": "Counts cycles when offcore outstanding cacheable Core Data Read transactions are present in the super queue. A transaction is considered to be in the Offcore outstanding state between L2 miss and transaction completion sent to requestor (SQ de-allocation). See corresponding Umask under OFFCORE_REQUESTS.",
-        "SampleAfterValue": "2000003",
-        "UMask": "0x8"
-    },
-    {
-        "BriefDescription": "OFFCORE_RESPONSE.ALL_PF_DATA_RD.L3_HIT.SNOOP_HIT_WITH_FWD",
+        "BriefDescription": "Counts all prefetch (that bring data to L2) RFOs that hit in the L3 and the snoop to one of the sibling cores hits the line in M state and the line is forwarded.",
         "Counter": "0,1,2,3",
         "CounterHTOff": "0,1,2,3",
         "EventCode": "0xB7, 0xBB",
-        "EventName": "OFFCORE_RESPONSE.ALL_PF_DATA_RD.L3_HIT.SNOOP_HIT_WITH_FWD",
+        "EventName": "OFFCORE_RESPONSE.PF_L2_RFO.L3_HIT.HITM_OTHER_CORE",
         "MSRIndex": "0x1a6,0x1a7",
-        "MSRValue": "0x08003C0490",
+        "MSRValue": "0x10003C0020",
         "Offcore": "1",
         "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
         "SampleAfterValue": "100003",
         "UMask": "0x1"
     },
     {
-        "BriefDescription": "Retired load instructions with locked access.",
+        "BriefDescription": "Counts all prefetch (that bring data to L2) RFOs that hit in the L3 and the snoop to one of the sibling cores hits the line in M state and the line is forwarded.",
         "Counter": "0,1,2,3",
         "CounterHTOff": "0,1,2,3",
-        "Data_LA": "1",
-        "EventCode": "0xD0",
-        "EventName": "MEM_INST_RETIRED.LOCK_LOADS",
-        "PEBS": "1",
-        "SampleAfterValue": "100007",
-        "UMask": "0x21"
-    },
-    {
-        "BriefDescription": "Demand and prefetch data reads",
-        "Counter": "0,1,2,3",
-        "CounterHTOff": "0,1,2,3,4,5,6,7",
-        "EventCode": "0xB0",
-        "EventName": "OFFCORE_REQUESTS.ALL_DATA_RD",
-        "PublicDescription": "Counts the demand and prefetch data reads. All Core Data Reads include cacheable 'Demands' and L2 prefetchers (not L3 prefetchers). Counting also covers reads due to page walks resulted from any request type.",
+        "EventCode": "0xB7, 0xBB",
+        "EventName": "OFFCORE_RESPONSE.PF_L2_RFO.L3_HIT.HIT_OTHER_CORE_NO_FWD",
+        "MSRIndex": "0x1a6,0x1a7",
+        "MSRValue": "0x04003C0020",
+        "Offcore": "1",
+        "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
         "SampleAfterValue": "100003",
-        "UMask": "0x8"
-    },
-    {
-        "BriefDescription": "Retired load instructions which data sources missed L3 but serviced from local dram",
-        "Counter": "0,1,2,3",
-        "CounterHTOff": "0,1,2,3",
-        "Data_LA": "1",
-        "EventCode": "0xD3",
-        "EventName": "MEM_LOAD_L3_MISS_RETIRED.LOCAL_DRAM",
-        "PEBS": "1",
-        "PublicDescription": "Retired load instructions which data sources missed L3 but serviced from local DRAM.",
-        "SampleAfterValue": "100007",
         "UMask": "0x1"
     },
     {
-        "BriefDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction",
+        "BriefDescription": "Counts all prefetch (that bring data to L2) RFOs that hit in the L3 and sibling core snoops are not needed as either the core-valid bit is not set or the shared line is present in multiple cores.",
         "Counter": "0,1,2,3",
         "CounterHTOff": "0,1,2,3",
         "EventCode": "0xB7, 0xBB",
-        "EventName": "OFFCORE_RESPONSE",
+        "EventName": "OFFCORE_RESPONSE.PF_L2_RFO.L3_HIT.NO_SNOOP_NEEDED",
+        "MSRIndex": "0x1a6,0x1a7",
+        "MSRValue": "0x01003C0020",
+        "Offcore": "1",
         "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
         "SampleAfterValue": "100003",
         "UMask": "0x1"
     },
     {
-        "BriefDescription": "Counts L1 data cache hardware prefetch requests and software prefetch requests that hit in the L3 and sibling core snoops are not needed as either the core-valid bit is not set or the shared line is present in multiple cores.",
+        "BriefDescription": "OFFCORE_RESPONSE.PF_L2_RFO.L3_HIT.SNOOP_HIT_WITH_FWD",
         "Counter": "0,1,2,3",
         "CounterHTOff": "0,1,2,3",
         "EventCode": "0xB7, 0xBB",
-        "EventName": "OFFCORE_RESPONSE.PF_L1D_AND_SW.L3_HIT.NO_SNOOP_NEEDED",
+        "EventName": "OFFCORE_RESPONSE.PF_L2_RFO.L3_HIT.SNOOP_HIT_WITH_FWD",
         "MSRIndex": "0x1a6,0x1a7",
-        "MSRValue": "0x01003C0400",
+        "MSRValue": "0x08003C0020",
         "Offcore": "1",
         "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
         "SampleAfterValue": "100003",
         "UMask": "0x1"
     },
     {
-        "BriefDescription": "Retired load instructions that split across a cacheline boundary.",
+        "BriefDescription": "Counts all prefetch (that bring data to LLC only) data reads that have any response type.",
         "Counter": "0,1,2,3",
         "CounterHTOff": "0,1,2,3",
-        "Data_LA": "1",
-        "EventCode": "0xD0",
-        "EventName": "MEM_INST_RETIRED.SPLIT_LOADS",
-        "PEBS": "1",
-        "PublicDescription": "Counts retired load instructions that split across a cacheline boundary.",
+        "EventCode": "0xB7, 0xBB",
+        "EventName": "OFFCORE_RESPONSE.PF_L3_DATA_RD.ANY_RESPONSE",
+        "MSRIndex": "0x1a6,0x1a7",
+        "MSRValue": "0x0000010080",
+        "Offcore": "1",
+        "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
         "SampleAfterValue": "100003",
-        "UMask": "0x41"
-    },
-    {
-        "BriefDescription": "Offcore requests buffer cannot take more entries for this thread core.",
-        "Counter": "0,1,2,3",
-        "CounterHTOff": "0,1,2,3,4,5,6,7",
-        "EventCode": "0xB2",
-        "EventName": "OFFCORE_REQUESTS_BUFFER.SQ_FULL",
-        "PublicDescription": "Counts the number of cases when the offcore requests buffer cannot take more entries for the core. This can happen when the superqueue does not contain eligible entries, or when L1D writeback pending FIFO requests is full.Note: Writeback pending FIFO has six entries.",
-        "SampleAfterValue": "2000003",
         "UMask": "0x1"
     },
     {
-        "BriefDescription": "Cycles with at least 6 offcore outstanding Demand Data Read transactions in uncore queue.",
+        "BriefDescription": "Counts all prefetch (that bring data to LLC only) data reads that hit in the L3.",
         "Counter": "0,1,2,3",
-        "CounterHTOff": "0,1,2,3,4,5,6,7",
-        "CounterMask": "6",
-        "EventCode": "0x60",
-        "EventName": "OFFCORE_REQUESTS_OUTSTANDING.DEMAND_DATA_RD_GE_6",
-        "SampleAfterValue": "2000003",
+        "CounterHTOff": "0,1,2,3",
+        "EventCode": "0xB7, 0xBB",
+        "EventName": "OFFCORE_RESPONSE.PF_L3_DATA_RD.L3_HIT.ANY_SNOOP",
+        "MSRIndex": "0x1a6,0x1a7",
+        "MSRValue": "0x3F803C0080",
+        "Offcore": "1",
+        "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
+        "SampleAfterValue": "100003",
         "UMask": "0x1"
     },
     {
-        "BriefDescription": "Offcore outstanding Code Reads transactions in the SuperQueue (SQ), queue to uncore, every cycle.",
-        "Counter": "0,1,2,3",
-        "CounterHTOff": "0,1,2,3,4,5,6,7",
-        "EventCode": "0x60",
-        "EventName": "OFFCORE_REQUESTS_OUTSTANDING.DEMAND_CODE_RD",
-        "PublicDescription": "Counts the number of offcore outstanding Code Reads transactions in the super queue every cycle. The 'Offcore outstanding' state of the transaction lasts from the L2 miss until the sending transaction completion to requestor (SQ deallocation). See the corresponding Umask under OFFCORE_REQUESTS.",
-        "SampleAfterValue": "2000003",
-        "UMask": "0x2"
-    },
-    {
-        "BriefDescription": "Counts all demand data writes (RFOs) that hit in the L3 and the snoop to one of the sibling cores hits the line in M state and the line is forwarded.",
+        "BriefDescription": "Counts all prefetch (that bring data to LLC only) data reads that hit in the L3 and the snoop to one of the sibling cores hits the line in M state and the line is forwarded.",
         "Counter": "0,1,2,3",
         "CounterHTOff": "0,1,2,3",
         "EventCode": "0xB7, 0xBB",
-        "EventName": "OFFCORE_RESPONSE.DEMAND_RFO.L3_HIT.HIT_OTHER_CORE_NO_FWD",
+        "EventName": "OFFCORE_RESPONSE.PF_L3_DATA_RD.L3_HIT.HITM_OTHER_CORE",
         "MSRIndex": "0x1a6,0x1a7",
-        "MSRValue": "0x04003C0002",
+        "MSRValue": "0x10003C0080",
         "Offcore": "1",
         "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
         "SampleAfterValue": "100003",
         "UMask": "0x1"
     },
     {
-        "BriefDescription": "Counts prefetch (that bring data to L2) data reads that hit in the L3.",
+        "BriefDescription": "Counts all prefetch (that bring data to LLC only) data reads that hit in the L3 and the snoop to one of the sibling cores hits the line in M state and the line is forwarded.",
         "Counter": "0,1,2,3",
         "CounterHTOff": "0,1,2,3",
         "EventCode": "0xB7, 0xBB",
-        "EventName": "OFFCORE_RESPONSE.PF_L2_DATA_RD.L3_HIT.ANY_SNOOP",
+        "EventName": "OFFCORE_RESPONSE.PF_L3_DATA_RD.L3_HIT.HIT_OTHER_CORE_NO_FWD",
         "MSRIndex": "0x1a6,0x1a7",
-        "MSRValue": "0x3F803C0010",
+        "MSRValue": "0x04003C0080",
         "Offcore": "1",
         "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
         "SampleAfterValue": "100003",
         "UMask": "0x1"
     },
     {
-        "BriefDescription": "Counts prefetch RFOs that have any response type.",
+        "BriefDescription": "Counts all prefetch (that bring data to LLC only) data reads that hit in the L3 and sibling core snoops are not needed as either the core-valid bit is not set or the shared line is present in multiple cores.",
         "Counter": "0,1,2,3",
         "CounterHTOff": "0,1,2,3",
         "EventCode": "0xB7, 0xBB",
-        "EventName": "OFFCORE_RESPONSE.ALL_PF_RFO.ANY_RESPONSE",
+        "EventName": "OFFCORE_RESPONSE.PF_L3_DATA_RD.L3_HIT.NO_SNOOP_NEEDED",
         "MSRIndex": "0x1a6,0x1a7",
-        "MSRValue": "0x0000010120",
+        "MSRValue": "0x01003C0080",
         "Offcore": "1",
         "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
         "SampleAfterValue": "100003",
         "UMask": "0x1"
     },
     {
-        "BriefDescription": "Counts all demand data writes (RFOs) that hit in the L3 and the snoop to one of the sibling cores hits the line in M state and the line is forwarded.",
+        "BriefDescription": "OFFCORE_RESPONSE.PF_L3_DATA_RD.L3_HIT.SNOOP_HIT_WITH_FWD",
         "Counter": "0,1,2,3",
         "CounterHTOff": "0,1,2,3",
         "EventCode": "0xB7, 0xBB",
-        "EventName": "OFFCORE_RESPONSE.DEMAND_RFO.L3_HIT.HITM_OTHER_CORE",
+        "EventName": "OFFCORE_RESPONSE.PF_L3_DATA_RD.L3_HIT.SNOOP_HIT_WITH_FWD",
         "MSRIndex": "0x1a6,0x1a7",
-        "MSRValue": "0x10003C0002",
+        "MSRValue": "0x08003C0080",
         "Offcore": "1",
         "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
         "SampleAfterValue": "100003",
         "UMask": "0x1"
     },
     {
-        "BriefDescription": "Retired store instructions that miss the STLB.",
+        "BriefDescription": "Counts all prefetch (that bring data to LLC only) RFOs that have any response type.",
         "Counter": "0,1,2,3",
         "CounterHTOff": "0,1,2,3",
-        "Data_LA": "1",
-        "EventCode": "0xD0",
-        "EventName": "MEM_INST_RETIRED.STLB_MISS_STORES",
-        "L1_Hit_Indication": "1",
-        "PEBS": "1",
+        "EventCode": "0xB7, 0xBB",
+        "EventName": "OFFCORE_RESPONSE.PF_L3_RFO.ANY_RESPONSE",
+        "MSRIndex": "0x1a6,0x1a7",
+        "MSRValue": "0x0000010100",
+        "Offcore": "1",
+        "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
         "SampleAfterValue": "100003",
-        "UMask": "0x12"
-    },
-    {
-        "BriefDescription": "RFO requests to L2 cache",
-        "Counter": "0,1,2,3",
-        "CounterHTOff": "0,1,2,3,4,5,6,7",
-        "EventCode": "0x24",
-        "EventName": "L2_RQSTS.ALL_RFO",
-        "PublicDescription": "Counts the total number of RFO (read for ownership) requests to L2 cache. L2 RFO requests include both L1D demand RFO misses as well as L1D RFO prefetches.",
-        "SampleAfterValue": "200003",
-        "UMask": "0xe2"
+        "UMask": "0x1"
     },
     {
-        "BriefDescription": "OFFCORE_RESPONSE.PF_L2_DATA_RD.L3_HIT.SNOOP_HIT_WITH_FWD",
+        "BriefDescription": "Counts all prefetch (that bring data to LLC only) RFOs that hit in the L3.",
         "Counter": "0,1,2,3",
         "CounterHTOff": "0,1,2,3",
         "EventCode": "0xB7, 0xBB",
-        "EventName": "OFFCORE_RESPONSE.PF_L2_DATA_RD.L3_HIT.SNOOP_HIT_WITH_FWD",
+        "EventName": "OFFCORE_RESPONSE.PF_L3_RFO.L3_HIT.ANY_SNOOP",
         "MSRIndex": "0x1a6,0x1a7",
-        "MSRValue": "0x08003C0010",
+        "MSRValue": "0x3F803C0100",
         "Offcore": "1",
         "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
         "SampleAfterValue": "100003",
         "UMask": "0x1"
     },
     {
-        "BriefDescription": "Counts prefetch (that bring data to L2) data reads that hit in the L3 and the snoop to one of the sibling cores hits the line in M state and the line is forwarded.",
+        "BriefDescription": "Counts all prefetch (that bring data to LLC only) RFOs that hit in the L3 and the snoop to one of the sibling cores hits the line in M state and the line is forwarded.",
         "Counter": "0,1,2,3",
         "CounterHTOff": "0,1,2,3",
         "EventCode": "0xB7, 0xBB",
-        "EventName": "OFFCORE_RESPONSE.PF_L2_DATA_RD.L3_HIT.HIT_OTHER_CORE_NO_FWD",
+        "EventName": "OFFCORE_RESPONSE.PF_L3_RFO.L3_HIT.HITM_OTHER_CORE",
         "MSRIndex": "0x1a6,0x1a7",
-        "MSRValue": "0x04003C0010",
+        "MSRValue": "0x10003C0100",
         "Offcore": "1",
         "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
         "SampleAfterValue": "100003",
         "UMask": "0x1"
     },
     {
-        "BriefDescription": "Retired load instructions missed L2 cache as data sources",
+        "BriefDescription": "Counts all prefetch (that bring data to LLC only) RFOs that hit in the L3 and the snoop to one of the sibling cores hits the line in M state and the line is forwarded.",
         "Counter": "0,1,2,3",
         "CounterHTOff": "0,1,2,3",
-        "Data_LA": "1",
-        "EventCode": "0xD1",
-        "EventName": "MEM_LOAD_RETIRED.L2_MISS",
-        "PEBS": "1",
-        "PublicDescription": "Retired load instructions missed L2 cache as data sources.",
-        "SampleAfterValue": "50021",
-        "UMask": "0x10"
+        "EventCode": "0xB7, 0xBB",
+        "EventName": "OFFCORE_RESPONSE.PF_L3_RFO.L3_HIT.HIT_OTHER_CORE_NO_FWD",
+        "MSRIndex": "0x1a6,0x1a7",
+        "MSRValue": "0x04003C0100",
+        "Offcore": "1",
+        "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
+        "SampleAfterValue": "100003",
+        "UMask": "0x1"
     },
     {
-        "BriefDescription": "Counts demand data reads that hit in the L3 and sibling core snoops are not needed as either the core-valid bit is not set or the shared line is present in multiple cores.",
+        "BriefDescription": "Counts all prefetch (that bring data to LLC only) RFOs that hit in the L3 and sibling core snoops are not needed as either the core-valid bit is not set or the shared line is present in multiple cores.",
         "Counter": "0,1,2,3",
         "CounterHTOff": "0,1,2,3",
         "EventCode": "0xB7, 0xBB",
-        "EventName": "OFFCORE_RESPONSE.DEMAND_DATA_RD.L3_HIT.NO_SNOOP_NEEDED",
+        "EventName": "OFFCORE_RESPONSE.PF_L3_RFO.L3_HIT.NO_SNOOP_NEEDED",
         "MSRIndex": "0x1a6,0x1a7",
-        "MSRValue": "0x01003C0001",
+        "MSRValue": "0x01003C0100",
         "Offcore": "1",
         "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
         "SampleAfterValue": "100003",
         "UMask": "0x1"
     },
     {
-        "BriefDescription": "Offcore outstanding cacheable Core Data Read transactions in SuperQueue (SQ), queue to uncore",
-        "Counter": "0,1,2,3",
-        "CounterHTOff": "0,1,2,3,4,5,6,7",
-        "EventCode": "0x60",
-        "EventName": "OFFCORE_REQUESTS_OUTSTANDING.ALL_DATA_RD",
-        "PublicDescription": "Counts the number of offcore outstanding cacheable Core Data Read transactions in the super queue every cycle. A transaction is considered to be in the Offcore outstanding state between L2 miss and transaction completion sent to requestor (SQ de-allocation). See corresponding Umask under OFFCORE_REQUESTS.",
-        "SampleAfterValue": "2000003",
-        "UMask": "0x8"
-    },
-    {
-        "BriefDescription": "Counts all demand & prefetch RFOs that hit in the L3 and the snoop to one of the sibling cores hits the line in M state and the line is forwarded.",
+        "BriefDescription": "OFFCORE_RESPONSE.PF_L3_RFO.L3_HIT.SNOOP_HIT_WITH_FWD",
         "Counter": "0,1,2,3",
         "CounterHTOff": "0,1,2,3",
         "EventCode": "0xB7, 0xBB",
-        "EventName": "OFFCORE_RESPONSE.ALL_RFO.L3_HIT.HIT_OTHER_CORE_NO_FWD",
+        "EventName": "OFFCORE_RESPONSE.PF_L3_RFO.L3_HIT.SNOOP_HIT_WITH_FWD",
         "MSRIndex": "0x1a6,0x1a7",
-        "MSRValue": "0x04003C0122",
+        "MSRValue": "0x08003C0100",
         "Offcore": "1",
         "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
         "SampleAfterValue": "100003",
         "UMask": "0x1"
+    },
+    {
+        "BriefDescription": "Number of cache line split locks sent to uncore.",
+        "Counter": "0,1,2,3",
+        "CounterHTOff": "0,1,2,3,4,5,6,7",
+        "EventCode": "0xF4",
+        "EventName": "SQ_MISC.SPLIT_LOCK",
+        "PublicDescription": "Counts the number of cache line split locks sent to the uncore.",
+        "SampleAfterValue": "100003",
+        "UMask": "0x10"
     }
 ]
 \ No newline at end of file
diff --git a/tools/perf/pmu-events/arch/x86/skylakex/floating-point.json b/tools/perf/pmu-events/arch/x86/skylakex/floating-point.json
index e197cde15047..503737ed3a83 100644
--- a/tools/perf/pmu-events/arch/x86/skylakex/floating-point.json
+++ b/tools/perf/pmu-events/arch/x86/skylakex/floating-point.json
@@ -9,22 +9,13 @@
         "UMask": "0x4"
     },
     {
-        "BriefDescription": "Number of SSE/AVX computational 512-bit packed double precision floating-point instructions retired; some instructions will count twice as noted below.  Each count represents 8 computation operations, one for each element.  Applies to SSE* and AVX* packed double precision floating-point instructions: ADD SUB MUL DIV MIN MAX RCP14 RSQRT14 SQRT DPP FM(N)ADD/SUB.  DPP and FM(N)ADD/SUB instructions count twice as they perform 8 calculations per element.",
-        "Counter": "0,1,2,3",
-        "CounterHTOff": "0,1,2,3,4,5,6,7",
-        "EventCode": "0xC7",
-        "EventName": "FP_ARITH_INST_RETIRED.512B_PACKED_DOUBLE",
-        "SampleAfterValue": "2000003",
-        "UMask": "0x40"
-    },
-    {
-        "BriefDescription": "Number of SSE/AVX computational scalar single precision floating-point instructions retired; some instructions will count twice as noted below.  Each count represents 1 computation. Applies to SSE* and AVX* scalar single precision floating-point instructions: ADD SUB MUL DIV MIN MAX RCP14 RSQRT14 SQRT DPP FM(N)ADD/SUB.  DPP and FM(N)ADD/SUB instructions count twice as they perform 2 calculations per element.",
+        "BriefDescription": "Number of SSE/AVX computational 128-bit packed single precision floating-point instructions retired; some instructions will count twice as noted below.  Each count represents 4 computation operations, one for each element.  Applies to SSE* and AVX* packed single precision floating-point instructions: ADD SUB MUL DIV MIN MAX RCP14 RSQRT14 SQRT DPP FM(N)ADD/SUB.  DPP and FM(N)ADD/SUB instructions count twice as they perform 2 calculations per element.",
         "Counter": "0,1,2,3",
         "CounterHTOff": "0,1,2,3,4,5,6,7",
         "EventCode": "0xC7",
-        "EventName": "FP_ARITH_INST_RETIRED.SCALAR_SINGLE",
+        "EventName": "FP_ARITH_INST_RETIRED.128B_PACKED_SINGLE",
         "SampleAfterValue": "2000003",
-        "UMask": "0x2"
+        "UMask": "0x8"
     },
     {
         "BriefDescription": "Number of SSE/AVX computational 256-bit packed double precision floating-point instructions retired; some instructions will count twice as noted below.  Each count represents 4 computation operations, one for each element.  Applies to SSE* and AVX* packed double precision floating-point instructions: ADD SUB MUL DIV MIN MAX RCP14 RSQRT14 SQRT DPP FM(N)ADD/SUB.  DPP and FM(N)ADD/SUB instructions count twice as they perform 2 calculations per element.",
@@ -36,33 +27,31 @@
         "UMask": "0x10"
     },
     {
-        "BriefDescription": "Number of SSE/AVX computational 512-bit packed single precision floating-point instructions retired; some instructions will count twice as noted below.  Each count represents 16 computation operations, one for each element.  Applies to SSE* and AVX* packed single precision floating-point instructions: ADD SUB MUL DIV MIN MAX RCP14 RSQRT14 SQRT DPP FM(N)ADD/SUB.  DPP and FM(N)ADD/SUB instructions count twice as they perform 16 calculations per element.",
+        "BriefDescription": "Number of SSE/AVX computational 256-bit packed single precision floating-point instructions retired; some instructions will count twice as noted below.  Each count represents 8 computation operations, one for each element.  Applies to SSE* and AVX* packed single precision floating-point instructions: ADD SUB MUL DIV MIN MAX RCP14 RSQRT14 SQRT DPP FM(N)ADD/SUB.  DPP and FM(N)ADD/SUB instructions count twice as they perform 2 calculations per element.",
         "Counter": "0,1,2,3",
         "CounterHTOff": "0,1,2,3,4,5,6,7",
         "EventCode": "0xC7",
-        "EventName": "FP_ARITH_INST_RETIRED.512B_PACKED_SINGLE",
+        "EventName": "FP_ARITH_INST_RETIRED.256B_PACKED_SINGLE",
         "SampleAfterValue": "2000003",
-        "UMask": "0x80"
+        "UMask": "0x20"
     },
     {
-        "BriefDescription": "Number of SSE/AVX computational 256-bit packed single precision floating-point instructions retired; some instructions will count twice as noted below.  Each count represents 8 computation operations, one for each element.  Applies to SSE* and AVX* packed single precision floating-point instructions: ADD SUB MUL DIV MIN MAX RCP14 RSQRT14 SQRT DPP FM(N)ADD/SUB.  DPP and FM(N)ADD/SUB instructions count twice as they perform 2 calculations per element.",
+        "BriefDescription": "Number of SSE/AVX computational 512-bit packed double precision floating-point instructions retired; some instructions will count twice as noted below.  Each count represents 8 computation operations, one for each element.  Applies to SSE* and AVX* packed double precision floating-point instructions: ADD SUB MUL DIV MIN MAX RCP14 RSQRT14 SQRT DPP FM(N)ADD/SUB.  DPP and FM(N)ADD/SUB instructions count twice as they perform 8 calculations per element.",
         "Counter": "0,1,2,3",
         "CounterHTOff": "0,1,2,3,4,5,6,7",
         "EventCode": "0xC7",
-        "EventName": "FP_ARITH_INST_RETIRED.256B_PACKED_SINGLE",
+        "EventName": "FP_ARITH_INST_RETIRED.512B_PACKED_DOUBLE",
         "SampleAfterValue": "2000003",
-        "UMask": "0x20"
+        "UMask": "0x40"
     },
     {
-        "BriefDescription": "Cycles with any input/output SSE or FP assist",
+        "BriefDescription": "Number of SSE/AVX computational 512-bit packed single precision floating-point instructions retired; some instructions will count twice as noted below.  Each count represents 16 computation operations, one for each element.  Applies to SSE* and AVX* packed single precision floating-point instructions: ADD SUB MUL DIV MIN MAX RCP14 RSQRT14 SQRT DPP FM(N)ADD/SUB.  DPP and FM(N)ADD/SUB instructions count twice as they perform 16 calculations per element.",
         "Counter": "0,1,2,3",
         "CounterHTOff": "0,1,2,3,4,5,6,7",
-        "CounterMask": "1",
-        "EventCode": "0xCA",
-        "EventName": "FP_ASSIST.ANY",
-        "PublicDescription": "Counts cycles with any input and output SSE or x87 FP assist. If an input and output assist are detected on the same cycle the event increments by 1.",
-        "SampleAfterValue": "100003",
-        "UMask": "0x1e"
+        "EventCode": "0xC7",
+        "EventName": "FP_ARITH_INST_RETIRED.512B_PACKED_SINGLE",
+        "SampleAfterValue": "2000003",
+        "UMask": "0x80"
     },
     {
         "BriefDescription": "Number of SSE/AVX computational scalar double precision floating-point instructions retired; some instructions will count twice as noted below.  Each count represents 1 computation. Applies to SSE* and AVX* scalar double precision floating-point instructions: ADD SUB MUL DIV MIN MAX RCP14 RSQRT14 SQRT DPP FM(N)ADD/SUB.  DPP and FM(N)ADD/SUB instructions count twice as they perform 2 calculations per element.",
@@ -74,12 +63,23 @@
         "UMask": "0x1"
     },
     {
-        "BriefDescription": "Number of SSE/AVX computational 128-bit packed single precision floating-point instructions retired; some instructions will count twice as noted below.  Each count represents 4 computation operations, one for each element.  Applies to SSE* and AVX* packed single precision floating-point instructions: ADD SUB MUL DIV MIN MAX RCP14 RSQRT14 SQRT DPP FM(N)ADD/SUB.  DPP and FM(N)ADD/SUB instructions count twice as they perform 2 calculations per element.",
+        "BriefDescription": "Number of SSE/AVX computational scalar single precision floating-point instructions retired; some instructions will count twice as noted below.  Each count represents 1 computation. Applies to SSE* and AVX* scalar single precision floating-point instructions: ADD SUB MUL DIV MIN MAX RCP14 RSQRT14 SQRT DPP FM(N)ADD/SUB.  DPP and FM(N)ADD/SUB instructions count twice as they perform 2 calculations per element.",
         "Counter": "0,1,2,3",
         "CounterHTOff": "0,1,2,3,4,5,6,7",
         "EventCode": "0xC7",
-        "EventName": "FP_ARITH_INST_RETIRED.128B_PACKED_SINGLE",
+        "EventName": "FP_ARITH_INST_RETIRED.SCALAR_SINGLE",
         "SampleAfterValue": "2000003",
-        "UMask": "0x8"
+        "UMask": "0x2"
+    },
+    {
+        "BriefDescription": "Cycles with any input/output SSE or FP assist",
+        "Counter": "0,1,2,3",
+        "CounterHTOff": "0,1,2,3,4,5,6,7",
+        "CounterMask": "1",
+        "EventCode": "0xCA",
+        "EventName": "FP_ASSIST.ANY",
+        "PublicDescription": "Counts cycles with any input and output SSE or x87 FP assist. If an input and output assist are detected on the same cycle the event increments by 1.",
+        "SampleAfterValue": "100003",
+        "UMask": "0x1e"
     }
 ]
 \ No newline at end of file
diff --git a/tools/perf/pmu-events/arch/x86/skylakex/frontend.json b/tools/perf/pmu-events/arch/x86/skylakex/frontend.json
index cdf95bd2a73d..078706a50091 100644
--- a/tools/perf/pmu-events/arch/x86/skylakex/frontend.json
+++ b/tools/perf/pmu-events/arch/x86/skylakex/frontend.json
@@ -1,13 +1,47 @@
 [
     {
-        "BriefDescription": "Cycles where a code fetch is stalled due to L1 instruction cache miss.",
+        "BriefDescription": "Counts the total number when the front end is resteered, mainly when the BPU cannot provide a correct prediction and this is corrected by other branch handling mechanisms at the front end.",
         "Counter": "0,1,2,3",
         "CounterHTOff": "0,1,2,3,4,5,6,7",
-        "EventCode": "0x80",
-        "EventName": "ICACHE_16B.IFDATA_STALL",
-        "PublicDescription": "Cycles where a code line fetch is stalled due to an L1 instruction cache miss. The legacy decode pipeline works at a 16 Byte granularity.",
+        "EventCode": "0xE6",
+        "EventName": "BACLEARS.ANY",
+        "PublicDescription": "Counts the number of times the front-end is resteered when it finds a branch instruction in a fetch line. This occurs for the first time a branch instruction is fetched or when the branch is not tracked by the BPU (Branch Prediction Unit) anymore.",
+        "SampleAfterValue": "100003",
+        "UMask": "0x1"
+    },
+    {
+        "BriefDescription": "Decode Stream Buffer (DSB)-to-MITE switches",
+        "Counter": "0,1,2,3",
+        "CounterHTOff": "0,1,2,3,4,5,6,7",
+        "EventCode": "0xAB",
+        "EventName": "DSB2MITE_SWITCHES.COUNT",
+        "PublicDescription": "This event counts the number of the Decode Stream Buffer (DSB)-to-MITE switches including all misses because of missing Decode Stream Buffer (DSB) cache and u-arch forced misses.\nNote: Invoking MITE requires two or three cycles delay.",
         "SampleAfterValue": "2000003",
-        "UMask": "0x4"
+        "UMask": "0x1"
+    },
+    {
+        "BriefDescription": "Decode Stream Buffer (DSB)-to-MITE switch true penalty cycles.",
+        "Counter": "0,1,2,3",
+        "CounterHTOff": "0,1,2,3,4,5,6,7",
+        "EventCode": "0xAB",
+        "EventName": "DSB2MITE_SWITCHES.PENALTY_CYCLES",
+        "PublicDescription": "Counts Decode Stream Buffer (DSB)-to-MITE switch true penalty cycles. These cycles do not include uops routed through because of the switch itself, for example, when Instruction Decode Queue (IDQ) pre-allocation is unavailable, or Instruction Decode Queue (IDQ) is full. SBD-to-MITE switch true penalty cycles happen after the merge mux (MM) receives Decode Stream Buffer (DSB) Sync-indication until receiving the first MITE uop. MM is placed before Instruction Decode Queue (IDQ) to merge uops being fed from the MITE and Decode Stream Buffer (DSB) paths. Decode Stream Buffer (DSB) inserts the Sync-indication whenever a Decode Stream Buffer (DSB)-to-MITE switch occurs.Penalty: A Decode Stream Buffer (DSB) hit followed by a Decode Stream Buffer (DSB) miss can cost up to six cycles in which no uops are delivered to the IDQ. Most often, such switches from the Decode Stream Buffer (DSB) to the legacy pipeline cost 02 cycles.",
+        "SampleAfterValue": "2000003",
+        "UMask": "0x2"
+    },
+    {
+        "BriefDescription": "Retired Instructions who experienced decode stream buffer (DSB - the decoded instruction-cache) miss.",
+        "Counter": "0,1,2,3",
+        "CounterHTOff": "0,1,2,3",
+        "EventCode": "0xC6",
+        "EventName": "FRONTEND_RETIRED.DSB_MISS",
+        "MSRIndex": "0x3F7",
+        "MSRValue": "0x11",
+        "PEBS": "1",
+        "PublicDescription": "Counts retired Instructions that experienced DSB (Decode stream buffer i.e. the decoded instruction-cache) miss.",
+        "SampleAfterValue": "100007",
+        "TakenAlone": "1",
+        "UMask": "0x1"
     },
     {
         "BriefDescription": "Retired Instructions who experienced iTLB true miss.",
@@ -24,220 +58,246 @@
         "UMask": "0x1"
     },
     {
-        "BriefDescription": "Retired instructions that are fetched after an interval where the front-end delivered no uops for a period of 128 cycles which was not interrupted by a back-end stall.",
+        "BriefDescription": "Retired Instructions who experienced Instruction L1 Cache true miss.",
         "Counter": "0,1,2,3",
         "CounterHTOff": "0,1,2,3",
         "EventCode": "0xC6",
-        "EventName": "FRONTEND_RETIRED.LATENCY_GE_128",
+        "EventName": "FRONTEND_RETIRED.L1I_MISS",
         "MSRIndex": "0x3F7",
-        "MSRValue": "0x408006",
+        "MSRValue": "0x12",
         "PEBS": "1",
         "SampleAfterValue": "100007",
         "TakenAlone": "1",
         "UMask": "0x1"
     },
     {
-        "BriefDescription": "Cycles with less than 3 uops delivered by the front end.",
+        "BriefDescription": "Retired Instructions who experienced Instruction L2 Cache true miss.",
         "Counter": "0,1,2,3",
-        "CounterHTOff": "0,1,2,3,4,5,6,7",
-        "CounterMask": "1",
-        "EventCode": "0x9C",
-        "EventName": "IDQ_UOPS_NOT_DELIVERED.CYCLES_LE_3_UOP_DELIV.CORE",
-        "PublicDescription": "Cycles with less than 3 uops delivered by the front-end.",
-        "SampleAfterValue": "2000003",
+        "CounterHTOff": "0,1,2,3",
+        "EventCode": "0xC6",
+        "EventName": "FRONTEND_RETIRED.L2_MISS",
+        "MSRIndex": "0x3F7",
+        "MSRValue": "0x13",
+        "PEBS": "1",
+        "SampleAfterValue": "100007",
+        "TakenAlone": "1",
         "UMask": "0x1"
     },
     {
-        "BriefDescription": "Cycles when uops are being delivered to Instruction Decode Queue (IDQ) from Decode Stream Buffer (DSB) path",
-        "Counter": "0,1,2,3",
-        "CounterHTOff": "0,1,2,3,4,5,6,7",
-        "CounterMask": "1",
-        "EventCode": "0x79",
-        "EventName": "IDQ.DSB_CYCLES",
-        "PublicDescription": "Counts cycles during which uops are being delivered to Instruction Decode Queue (IDQ) from the Decode Stream Buffer (DSB) path. Counting includes uops that may 'bypass' the IDQ.",
-        "SampleAfterValue": "2000003",
-        "UMask": "0x8"
-    },
-    {
-        "BriefDescription": "Cycles per thread when 3 or more uops are not delivered to Resource Allocation Table (RAT) when backend of the machine is not stalled",
+        "BriefDescription": "Retired instructions after front-end starvation of at least 1 cycle",
         "Counter": "0,1,2,3",
-        "CounterHTOff": "0,1,2,3,4,5,6,7",
-        "CounterMask": "3",
-        "EventCode": "0x9C",
-        "EventName": "IDQ_UOPS_NOT_DELIVERED.CYCLES_LE_1_UOP_DELIV.CORE",
-        "PublicDescription": "Counts, on the per-thread basis, cycles when less than 1 uop is delivered to Resource Allocation Table (RAT). IDQ_Uops_Not_Delivered.core >= 3.",
-        "SampleAfterValue": "2000003",
+        "CounterHTOff": "0,1,2,3",
+        "EventCode": "0xc6",
+        "EventName": "FRONTEND_RETIRED.LATENCY_GE_1",
+        "MSRIndex": "0x3F7",
+        "MSRValue": "0x400106",
+        "PEBS": "2",
+        "PublicDescription": "Retired instructions that are fetched after an interval where the front-end delivered no uops for a period of at least 1 cycle which was not interrupted by a back-end stall.",
+        "SampleAfterValue": "100007",
+        "TakenAlone": "1",
         "UMask": "0x1"
     },
     {
-        "BriefDescription": "Counts the total number when the front end is resteered, mainly when the BPU cannot provide a correct prediction and this is corrected by other branch handling mechanisms at the front end.",
+        "BriefDescription": "Retired instructions that are fetched after an interval where the front-end delivered no uops for a period of 128 cycles which was not interrupted by a back-end stall.",
         "Counter": "0,1,2,3",
-        "CounterHTOff": "0,1,2,3,4,5,6,7",
-        "EventCode": "0xE6",
-        "EventName": "BACLEARS.ANY",
-        "PublicDescription": "Counts the number of times the front-end is resteered when it finds a branch instruction in a fetch line. This occurs for the first time a branch instruction is fetched or when the branch is not tracked by the BPU (Branch Prediction Unit) anymore.",
-        "SampleAfterValue": "100003",
+        "CounterHTOff": "0,1,2,3",
+        "EventCode": "0xC6",
+        "EventName": "FRONTEND_RETIRED.LATENCY_GE_128",
+        "MSRIndex": "0x3F7",
+        "MSRValue": "0x408006",
+        "PEBS": "1",
+        "SampleAfterValue": "100007",
+        "TakenAlone": "1",
         "UMask": "0x1"
     },
     {
-        "BriefDescription": "Retired Instructions who experienced decode stream buffer (DSB - the decoded instruction-cache) miss.",
+        "BriefDescription": "Retired instructions that are fetched after an interval where the front-end delivered no uops for a period of 16 cycles which was not interrupted by a back-end stall.",
         "Counter": "0,1,2,3",
         "CounterHTOff": "0,1,2,3",
         "EventCode": "0xC6",
-        "EventName": "FRONTEND_RETIRED.DSB_MISS",
+        "EventName": "FRONTEND_RETIRED.LATENCY_GE_16",
         "MSRIndex": "0x3F7",
-        "MSRValue": "0x11",
+        "MSRValue": "0x401006",
         "PEBS": "1",
-        "PublicDescription": "Counts retired Instructions that experienced DSB (Decode stream buffer i.e. the decoded instruction-cache) miss.",
+        "PublicDescription": "Counts retired instructions that are delivered to the back-end after a front-end stall of at least 16 cycles. During this period the front-end delivered no uops.",
         "SampleAfterValue": "100007",
         "TakenAlone": "1",
         "UMask": "0x1"
     },
     {
-        "BriefDescription": "Cycles per thread when 4 or more uops are not delivered to Resource Allocation Table (RAT) when backend of the machine is not stalled",
+        "BriefDescription": "Retired instructions that are fetched after an interval where the front-end delivered no uops for a period of 2 cycles which was not interrupted by a back-end stall.",
         "Counter": "0,1,2,3",
-        "CounterHTOff": "0,1,2,3,4,5,6,7",
-        "CounterMask": "4",
-        "EventCode": "0x9C",
-        "EventName": "IDQ_UOPS_NOT_DELIVERED.CYCLES_0_UOPS_DELIV.CORE",
-        "PublicDescription": "Counts, on the per-thread basis, cycles when no uops are delivered to Resource Allocation Table (RAT). IDQ_Uops_Not_Delivered.core =4.",
-        "SampleAfterValue": "2000003",
+        "CounterHTOff": "0,1,2,3",
+        "EventCode": "0xC6",
+        "EventName": "FRONTEND_RETIRED.LATENCY_GE_2",
+        "MSRIndex": "0x3F7",
+        "MSRValue": "0x400206",
+        "PEBS": "1",
+        "SampleAfterValue": "100007",
+        "TakenAlone": "1",
         "UMask": "0x1"
     },
     {
-        "BriefDescription": "Retired instructions that are fetched after an interval where the front-end delivered no uops for a period of 16 cycles which was not interrupted by a back-end stall.",
+        "BriefDescription": "Retired instructions that are fetched after an interval where the front-end delivered no uops for a period of 256 cycles which was not interrupted by a back-end stall.",
         "Counter": "0,1,2,3",
         "CounterHTOff": "0,1,2,3",
         "EventCode": "0xC6",
-        "EventName": "FRONTEND_RETIRED.LATENCY_GE_16",
+        "EventName": "FRONTEND_RETIRED.LATENCY_GE_256",
         "MSRIndex": "0x3F7",
-        "MSRValue": "0x401006",
+        "MSRValue": "0x410006",
         "PEBS": "1",
-        "PublicDescription": "Counts retired instructions that are delivered to the back-end after a front-end stall of at least 16 cycles. During this period the front-end delivered no uops.",
         "SampleAfterValue": "100007",
         "TakenAlone": "1",
         "UMask": "0x1"
     },
     {
-        "BriefDescription": "Uops delivered to Instruction Decode Queue (IDQ) from MITE path",
+        "BriefDescription": "Retired instructions that are fetched after an interval where the front-end had at least 1 bubble-slot for a period of 2 cycles which was not interrupted by a back-end stall.",
         "Counter": "0,1,2,3",
-        "CounterHTOff": "0,1,2,3,4,5,6,7",
-        "EventCode": "0x79",
-        "EventName": "IDQ.MITE_UOPS",
-        "PublicDescription": "Counts the number of uops delivered to Instruction Decode Queue (IDQ) from the MITE path. Counting includes uops that may 'bypass' the IDQ. This also means that uops are not being delivered from the Decode Stream Buffer (DSB).",
-        "SampleAfterValue": "2000003",
-        "UMask": "0x4"
+        "CounterHTOff": "0,1,2,3",
+        "EventCode": "0xC6",
+        "EventName": "FRONTEND_RETIRED.LATENCY_GE_2_BUBBLES_GE_1",
+        "MSRIndex": "0x3F7",
+        "MSRValue": "0x100206",
+        "PEBS": "1",
+        "PublicDescription": "Counts retired instructions that are delivered to the back-end after the front-end had at least 1 bubble-slot for a period of 2 cycles. A bubble-slot is an empty issue-pipeline slot while there was no RAT stall.",
+        "SampleAfterValue": "100007",
+        "TakenAlone": "1",
+        "UMask": "0x1"
     },
     {
-        "BriefDescription": "Cycles with less than 2 uops delivered by the front end.",
+        "BriefDescription": "Retired instructions that are fetched after an interval where the front-end had at least 2 bubble-slots for a period of 2 cycles which was not interrupted by a back-end stall.",
         "Counter": "0,1,2,3",
-        "CounterHTOff": "0,1,2,3,4,5,6,7",
-        "CounterMask": "2",
-        "EventCode": "0x9C",
-        "EventName": "IDQ_UOPS_NOT_DELIVERED.CYCLES_LE_2_UOP_DELIV.CORE",
-        "PublicDescription": "Cycles with less than 2 uops delivered by the front-end.",
-        "SampleAfterValue": "2000003",
+        "CounterHTOff": "0,1,2,3",
+        "EventCode": "0xC6",
+        "EventName": "FRONTEND_RETIRED.LATENCY_GE_2_BUBBLES_GE_2",
+        "MSRIndex": "0x3F7",
+        "MSRValue": "0x200206",
+        "PEBS": "1",
+        "SampleAfterValue": "100007",
+        "TakenAlone": "1",
         "UMask": "0x1"
     },
     {
-        "BriefDescription": "Cycles when uops are being delivered to Instruction Decode Queue (IDQ) while Microcode Sequenser (MS) is busy",
+        "BriefDescription": "Retired instructions that are fetched after an interval where the front-end had at least 3 bubble-slots for a period of 2 cycles which was not interrupted by a back-end stall.",
         "Counter": "0,1,2,3",
-        "CounterHTOff": "0,1,2,3,4,5,6,7",
-        "CounterMask": "1",
-        "EventCode": "0x79",
-        "EventName": "IDQ.MS_CYCLES",
-        "PublicDescription": "Counts cycles during which uops are being delivered to Instruction Decode Queue (IDQ) while the Microcode Sequencer (MS) is busy. Counting includes uops that may 'bypass' the IDQ. Uops maybe initiated by Decode Stream Buffer (DSB) or MITE.",
-        "SampleAfterValue": "2000003",
-        "UMask": "0x30"
+        "CounterHTOff": "0,1,2,3",
+        "EventCode": "0xC6",
+        "EventName": "FRONTEND_RETIRED.LATENCY_GE_2_BUBBLES_GE_3",
+        "MSRIndex": "0x3F7",
+        "MSRValue": "0x300206",
+        "PEBS": "1",
+        "SampleAfterValue": "100007",
+        "TakenAlone": "1",
+        "UMask": "0x1"
     },
     {
-        "BriefDescription": "Cycles MITE is delivering any Uop",
+        "BriefDescription": "Retired instructions that are fetched after an interval where the front-end delivered no uops for a period of 32 cycles which was not interrupted by a back-end stall.",
         "Counter": "0,1,2,3",
-        "CounterHTOff": "0,1,2,3,4,5,6,7",
-        "CounterMask": "1",
-        "EventCode": "0x79",
-        "EventName": "IDQ.ALL_MITE_CYCLES_ANY_UOPS",
-        "PublicDescription": "Counts the number of cycles uops were delivered to the Instruction Decode Queue (IDQ) from the MITE (legacy decode pipeline) path. Counting includes uops that may 'bypass' the IDQ. During these cycles uops are not being delivered from the Decode Stream Buffer (DSB).",
-        "SampleAfterValue": "2000003",
-        "UMask": "0x24"
+        "CounterHTOff": "0,1,2,3",
+        "EventCode": "0xC6",
+        "EventName": "FRONTEND_RETIRED.LATENCY_GE_32",
+        "MSRIndex": "0x3F7",
+        "MSRValue": "0x402006",
+        "PEBS": "1",
+        "PublicDescription": "Counts retired instructions that are delivered to the back-end after a front-end stall of at least 32 cycles. During this period the front-end delivered no uops.",
+        "SampleAfterValue": "100007",
+        "TakenAlone": "1",
+        "UMask": "0x1"
     },
     {
-        "BriefDescription": "Instruction fetch tag lookups that hit in the instruction cache (L1I). Counts at 64-byte cache-line granularity.",
+        "BriefDescription": "Retired instructions that are fetched after an interval where the front-end delivered no uops for a period of 4 cycles which was not interrupted by a back-end stall.",
         "Counter": "0,1,2,3",
-        "CounterHTOff": "0,1,2,3,4,5,6,7",
-        "EventCode": "0x83",
-        "EventName": "ICACHE_64B.IFTAG_HIT",
-        "SampleAfterValue": "200003",
+        "CounterHTOff": "0,1,2,3",
+        "EventCode": "0xC6",
+        "EventName": "FRONTEND_RETIRED.LATENCY_GE_4",
+        "MSRIndex": "0x3F7",
+        "MSRValue": "0x400406",
+        "PEBS": "1",
+        "SampleAfterValue": "100007",
+        "TakenAlone": "1",
         "UMask": "0x1"
     },
     {
-        "BriefDescription": "Number of switches from DSB (Decode Stream Buffer) or MITE (legacy decode pipeline) to the Microcode Sequencer",
+        "BriefDescription": "Retired instructions that are fetched after an interval where the front-end delivered no uops for a period of 512 cycles which was not interrupted by a back-end stall.",
         "Counter": "0,1,2,3",
-        "CounterHTOff": "0,1,2,3,4,5,6,7",
-        "CounterMask": "1",
-        "EdgeDetect": "1",
-        "EventCode": "0x79",
-        "EventName": "IDQ.MS_SWITCHES",
-        "PublicDescription": "Number of switches from DSB (Decode Stream Buffer) or MITE (legacy decode pipeline) to the Microcode Sequencer.",
-        "SampleAfterValue": "2000003",
-        "UMask": "0x30"
+        "CounterHTOff": "0,1,2,3",
+        "EventCode": "0xC6",
+        "EventName": "FRONTEND_RETIRED.LATENCY_GE_512",
+        "MSRIndex": "0x3F7",
+        "MSRValue": "0x420006",
+        "PEBS": "1",
+        "SampleAfterValue": "100007",
+        "TakenAlone": "1",
+        "UMask": "0x1"
     },
     {
-        "BriefDescription": "Retired Instructions who experienced Instruction L2 Cache true miss.",
+        "BriefDescription": "Retired instructions that are fetched after an interval where the front-end delivered no uops for a period of 64 cycles which was not interrupted by a back-end stall.",
         "Counter": "0,1,2,3",
         "CounterHTOff": "0,1,2,3",
         "EventCode": "0xC6",
-        "EventName": "FRONTEND_RETIRED.L2_MISS",
+        "EventName": "FRONTEND_RETIRED.LATENCY_GE_64",
         "MSRIndex": "0x3F7",
-        "MSRValue": "0x13",
+        "MSRValue": "0x404006",
         "PEBS": "1",
         "SampleAfterValue": "100007",
         "TakenAlone": "1",
         "UMask": "0x1"
     },
     {
-        "BriefDescription": "Cycles when uops are being delivered to Instruction Decode Queue (IDQ) from MITE path",
+        "BriefDescription": "Retired instructions that are fetched after an interval where the front-end delivered no uops for a period of 8 cycles which was not interrupted by a back-end stall.",
         "Counter": "0,1,2,3",
-        "CounterHTOff": "0,1,2,3,4,5,6,7",
-        "CounterMask": "1",
-        "EventCode": "0x79",
-        "EventName": "IDQ.MITE_CYCLES",
-        "PublicDescription": "Counts cycles during which uops are being delivered to Instruction Decode Queue (IDQ) from the MITE path. Counting includes uops that may 'bypass' the IDQ.",
-        "SampleAfterValue": "2000003",
-        "UMask": "0x4"
+        "CounterHTOff": "0,1,2,3",
+        "EventCode": "0xC6",
+        "EventName": "FRONTEND_RETIRED.LATENCY_GE_8",
+        "MSRIndex": "0x3F7",
+        "MSRValue": "0x400806",
+        "PEBS": "1",
+        "PublicDescription": "Counts retired instructions that are delivered to the back-end after a front-end stall of at least 8 cycles. During this period the front-end delivered no uops.",
+        "SampleAfterValue": "100007",
+        "TakenAlone": "1",
+        "UMask": "0x1"
     },
     {
-        "BriefDescription": "Retired instructions that are fetched after an interval where the front-end delivered no uops for a period of 64 cycles which was not interrupted by a back-end stall.",
+        "BriefDescription": "Retired Instructions who experienced STLB (2nd level TLB) true miss.",
         "Counter": "0,1,2,3",
         "CounterHTOff": "0,1,2,3",
         "EventCode": "0xC6",
-        "EventName": "FRONTEND_RETIRED.LATENCY_GE_64",
+        "EventName": "FRONTEND_RETIRED.STLB_MISS",
         "MSRIndex": "0x3F7",
-        "MSRValue": "0x404006",
+        "MSRValue": "0x15",
         "PEBS": "1",
+        "PublicDescription": "Counts retired Instructions that experienced STLB (2nd level TLB) true miss.",
         "SampleAfterValue": "100007",
         "TakenAlone": "1",
         "UMask": "0x1"
     },
     {
-        "BriefDescription": "Uops not delivered to Resource Allocation Table (RAT) per thread when backend of the machine is not stalled",
+        "BriefDescription": "Cycles where a code fetch is stalled due to L1 instruction cache miss.",
         "Counter": "0,1,2,3",
         "CounterHTOff": "0,1,2,3,4,5,6,7",
-        "EventCode": "0x9C",
-        "EventName": "IDQ_UOPS_NOT_DELIVERED.CORE",
-        "PublicDescription": "Counts the number of uops not delivered to Resource Allocation Table (RAT) per thread adding 4  x when Resource Allocation Table (RAT) is not stalled and Instruction Decode Queue (IDQ) delivers x uops to Resource Allocation Table (RAT) (where x belongs to {0,1,2,3}). Counting does not cover cases when: a. IDQ-Resource Allocation Table (RAT) pipe serves the other thread. b. Resource Allocation Table (RAT) is stalled for the thread (including uop drops and clear BE conditions).  c. Instruction Decode Queue (IDQ) delivers four uops.",
+        "EventCode": "0x80",
+        "EventName": "ICACHE_16B.IFDATA_STALL",
+        "PublicDescription": "Cycles where a code line fetch is stalled due to an L1 instruction cache miss. The legacy decode pipeline works at a 16 Byte granularity.",
         "SampleAfterValue": "2000003",
+        "UMask": "0x4"
+    },
+    {
+        "BriefDescription": "Instruction fetch tag lookups that hit in the instruction cache (L1I). Counts at 64-byte cache-line granularity.",
+        "Counter": "0,1,2,3",
+        "CounterHTOff": "0,1,2,3,4,5,6,7",
+        "EventCode": "0x83",
+        "EventName": "ICACHE_64B.IFTAG_HIT",
+        "SampleAfterValue": "200003",
         "UMask": "0x1"
     },
     {
-        "BriefDescription": "Uops initiated by MITE and delivered to Instruction Decode Queue (IDQ) while Microcode Sequenser (MS) is busy",
+        "BriefDescription": "Instruction fetch tag lookups that miss in the instruction cache (L1I). Counts at 64-byte cache-line granularity.",
         "Counter": "0,1,2,3",
         "CounterHTOff": "0,1,2,3,4,5,6,7",
-        "EventCode": "0x79",
-        "EventName": "IDQ.MS_MITE_UOPS",
-        "PublicDescription": "Counts the number of uops initiated by MITE and delivered to Instruction Decode Queue (IDQ) while the Microcode Sequencer (MS) is busy. Counting includes uops that may 'bypass' the IDQ.",
-        "SampleAfterValue": "2000003",
-        "UMask": "0x20"
+        "EventCode": "0x83",
+        "EventName": "ICACHE_64B.IFTAG_MISS",
+        "SampleAfterValue": "200003",
+        "UMask": "0x2"
     },
     {
         "BriefDescription": "Cycles where a code fetch is stalled due to L1 instruction cache tag miss.",
@@ -249,14 +309,15 @@
         "UMask": "0x4"
     },
     {
-        "BriefDescription": "Decode Stream Buffer (DSB)-to-MITE switch true penalty cycles.",
+        "BriefDescription": "Cycles Decode Stream Buffer (DSB) is delivering 4 Uops",
         "Counter": "0,1,2,3",
         "CounterHTOff": "0,1,2,3,4,5,6,7",
-        "EventCode": "0xAB",
-        "EventName": "DSB2MITE_SWITCHES.PENALTY_CYCLES",
-        "PublicDescription": "Counts Decode Stream Buffer (DSB)-to-MITE switch true penalty cycles. These cycles do not include uops routed through because of the switch itself, for example, when Instruction Decode Queue (IDQ) pre-allocation is unavailable, or Instruction Decode Queue (IDQ) is full. SBD-to-MITE switch true penalty cycles happen after the merge mux (MM) receives Decode Stream Buffer (DSB) Sync-indication until receiving the first MITE uop. MM is placed before Instruction Decode Queue (IDQ) to merge uops being fed from the MITE and Decode Stream Buffer (DSB) paths. Decode Stream Buffer (DSB) inserts the Sync-indication whenever a Decode Stream Buffer (DSB)-to-MITE switch occurs.Penalty: A Decode Stream Buffer (DSB) hit followed by a Decode Stream Buffer (DSB) miss can cost up to six cycles in which no uops are delivered to the IDQ. Most often, such switches from the Decode Stream Buffer (DSB) to the legacy pipeline cost 02 cycles.",
+        "CounterMask": "4",
+        "EventCode": "0x79",
+        "EventName": "IDQ.ALL_DSB_CYCLES_4_UOPS",
+        "PublicDescription": "Counts the number of cycles 4 uops were delivered to Instruction Decode Queue (IDQ) from the Decode Stream Buffer (DSB) path. Count includes uops that may 'bypass' the IDQ.",
         "SampleAfterValue": "2000003",
-        "UMask": "0x2"
+        "UMask": "0x18"
     },
     {
         "BriefDescription": "Cycles Decode Stream Buffer (DSB) is delivering any Uop",
@@ -270,106 +331,79 @@
         "UMask": "0x18"
     },
     {
-        "BriefDescription": "Retired Instructions who experienced STLB (2nd level TLB) true miss.",
+        "BriefDescription": "Cycles MITE is delivering 4 Uops",
         "Counter": "0,1,2,3",
-        "CounterHTOff": "0,1,2,3",
-        "EventCode": "0xC6",
-        "EventName": "FRONTEND_RETIRED.STLB_MISS",
-        "MSRIndex": "0x3F7",
-        "MSRValue": "0x15",
-        "PEBS": "1",
-        "PublicDescription": "Counts retired Instructions that experienced STLB (2nd level TLB) true miss.",
-        "SampleAfterValue": "100007",
-        "TakenAlone": "1",
-        "UMask": "0x1"
+        "CounterHTOff": "0,1,2,3,4,5,6,7",
+        "CounterMask": "4",
+        "EventCode": "0x79",
+        "EventName": "IDQ.ALL_MITE_CYCLES_4_UOPS",
+        "PublicDescription": "Counts the number of cycles 4 uops were delivered to the Instruction Decode Queue (IDQ) from the MITE (legacy decode pipeline) path. Counting includes uops that may 'bypass' the IDQ. During these cycles uops are not being delivered from the Decode Stream Buffer (DSB).",
+        "SampleAfterValue": "2000003",
+        "UMask": "0x24"
     },
     {
-        "BriefDescription": "Uops delivered to Instruction Decode Queue (IDQ) from the Decode Stream Buffer (DSB) path",
+        "BriefDescription": "Cycles MITE is delivering any Uop",
         "Counter": "0,1,2,3",
         "CounterHTOff": "0,1,2,3,4,5,6,7",
+        "CounterMask": "1",
         "EventCode": "0x79",
-        "EventName": "IDQ.DSB_UOPS",
-        "PublicDescription": "Counts the number of uops delivered to Instruction Decode Queue (IDQ) from the Decode Stream Buffer (DSB) path. Counting includes uops that may 'bypass' the IDQ.",
+        "EventName": "IDQ.ALL_MITE_CYCLES_ANY_UOPS",
+        "PublicDescription": "Counts the number of cycles uops were delivered to the Instruction Decode Queue (IDQ) from the MITE (legacy decode pipeline) path. Counting includes uops that may 'bypass' the IDQ. During these cycles uops are not being delivered from the Decode Stream Buffer (DSB).",
         "SampleAfterValue": "2000003",
-        "UMask": "0x8"
+        "UMask": "0x24"
     },
     {
-        "BriefDescription": "Retired instructions that are fetched after an interval where the front-end delivered no uops for a period of 512 cycles which was not interrupted by a back-end stall.",
+        "BriefDescription": "Cycles when uops are being delivered to Instruction Decode Queue (IDQ) from Decode Stream Buffer (DSB) path",
         "Counter": "0,1,2,3",
-        "CounterHTOff": "0,1,2,3",
-        "EventCode": "0xC6",
-        "EventName": "FRONTEND_RETIRED.LATENCY_GE_512",
-        "MSRIndex": "0x3F7",
-        "MSRValue": "0x420006",
-        "PEBS": "1",
-        "SampleAfterValue": "100007",
-        "TakenAlone": "1",
-        "UMask": "0x1"
+        "CounterHTOff": "0,1,2,3,4,5,6,7",
+        "CounterMask": "1",
+        "EventCode": "0x79",
+        "EventName": "IDQ.DSB_CYCLES",
+        "PublicDescription": "Counts cycles during which uops are being delivered to Instruction Decode Queue (IDQ) from the Decode Stream Buffer (DSB) path. Counting includes uops that may 'bypass' the IDQ.",
+        "SampleAfterValue": "2000003",
+        "UMask": "0x8"
     },
     {
-        "BriefDescription": "Retired instructions that are fetched after an interval where the front-end delivered no uops for a period of 8 cycles which was not interrupted by a back-end stall.",
+        "BriefDescription": "Uops delivered to Instruction Decode Queue (IDQ) from the Decode Stream Buffer (DSB) path",
         "Counter": "0,1,2,3",
-        "CounterHTOff": "0,1,2,3",
-        "EventCode": "0xC6",
-        "EventName": "FRONTEND_RETIRED.LATENCY_GE_8",
-        "MSRIndex": "0x3F7",
-        "MSRValue": "0x400806",
-        "PEBS": "1",
-        "PublicDescription": "Counts retired instructions that are delivered to the back-end after a front-end stall of at least 8 cycles. During this period the front-end delivered no uops.",
-        "SampleAfterValue": "100007",
-        "TakenAlone": "1",
-        "UMask": "0x1"
-    },
-    {
-        "BriefDescription": "Retired instructions after front-end starvation of at least 1 cycle",
-        "Counter": "0,1,2,3,4,5,6,7",
         "CounterHTOff": "0,1,2,3,4,5,6,7",
-        "EventCode": "0xc6",
-        "EventName": "FRONTEND_RETIRED.LATENCY_GE_1",
-        "MSRIndex": "0x3F7",
-        "MSRValue": "0x400106",
-        "PEBS": "2",
-        "PublicDescription": "Retired instructions that are fetched after an interval where the front-end delivered no uops for a period of at least 1 cycle which was not interrupted by a back-end stall.",
-        "SampleAfterValue": "100007",
-        "TakenAlone": "1",
-        "UMask": "0x1"
+        "EventCode": "0x79",
+        "EventName": "IDQ.DSB_UOPS",
+        "PublicDescription": "Counts the number of uops delivered to Instruction Decode Queue (IDQ) from the Decode Stream Buffer (DSB) path. Counting includes uops that may 'bypass' the IDQ.",
+        "SampleAfterValue": "2000003",
+        "UMask": "0x8"
     },
     {
-        "BriefDescription": "Retired instructions that are fetched after an interval where the front-end delivered no uops for a period of 2 cycles which was not interrupted by a back-end stall.",
+        "BriefDescription": "Cycles when uops are being delivered to Instruction Decode Queue (IDQ) from MITE path",
         "Counter": "0,1,2,3",
-        "CounterHTOff": "0,1,2,3",
-        "EventCode": "0xC6",
-        "EventName": "FRONTEND_RETIRED.LATENCY_GE_2",
-        "MSRIndex": "0x3F7",
-        "MSRValue": "0x400206",
-        "PEBS": "1",
-        "SampleAfterValue": "100007",
-        "TakenAlone": "1",
-        "UMask": "0x1"
+        "CounterHTOff": "0,1,2,3,4,5,6,7",
+        "CounterMask": "1",
+        "EventCode": "0x79",
+        "EventName": "IDQ.MITE_CYCLES",
+        "PublicDescription": "Counts cycles during which uops are being delivered to Instruction Decode Queue (IDQ) from the MITE path. Counting includes uops that may 'bypass' the IDQ.",
+        "SampleAfterValue": "2000003",
+        "UMask": "0x4"
     },
     {
-        "BriefDescription": "Retired instructions that are fetched after an interval where the front-end delivered no uops for a period of 4 cycles which was not interrupted by a back-end stall.",
+        "BriefDescription": "Uops delivered to Instruction Decode Queue (IDQ) from MITE path",
         "Counter": "0,1,2,3",
-        "CounterHTOff": "0,1,2,3",
-        "EventCode": "0xC6",
-        "EventName": "FRONTEND_RETIRED.LATENCY_GE_4",
-        "MSRIndex": "0x3F7",
-        "MSRValue": "0x400406",
-        "PEBS": "1",
-        "SampleAfterValue": "100007",
-        "TakenAlone": "1",
-        "UMask": "0x1"
+        "CounterHTOff": "0,1,2,3,4,5,6,7",
+        "EventCode": "0x79",
+        "EventName": "IDQ.MITE_UOPS",
+        "PublicDescription": "Counts the number of uops delivered to Instruction Decode Queue (IDQ) from the MITE path. Counting includes uops that may 'bypass' the IDQ. This also means that uops are not being delivered from the Decode Stream Buffer (DSB).",
+        "SampleAfterValue": "2000003",
+        "UMask": "0x4"
     },
     {
-        "BriefDescription": "Cycles MITE is delivering 4 Uops",
+        "BriefDescription": "Cycles when uops are being delivered to Instruction Decode Queue (IDQ) while Microcode Sequenser (MS) is busy",
         "Counter": "0,1,2,3",
         "CounterHTOff": "0,1,2,3,4,5,6,7",
-        "CounterMask": "4",
+        "CounterMask": "1",
         "EventCode": "0x79",
-        "EventName": "IDQ.ALL_MITE_CYCLES_4_UOPS",
-        "PublicDescription": "Counts the number of cycles 4 uops were delivered to the Instruction Decode Queue (IDQ) from the MITE (legacy decode pipeline) path. Counting includes uops that may 'bypass' the IDQ. During these cycles uops are not being delivered from the Decode Stream Buffer (DSB).",
+        "EventName": "IDQ.MS_CYCLES",
+        "PublicDescription": "Counts cycles during which uops are being delivered to Instruction Decode Queue (IDQ) while the Microcode Sequencer (MS) is busy. Counting includes uops that may 'bypass' the IDQ. Uops maybe initiated by Decode Stream Buffer (DSB) or MITE.",
         "SampleAfterValue": "2000003",
-        "UMask": "0x24"
+        "UMask": "0x30"
     },
     {
         "BriefDescription": "Cycles when uops initiated by Decode Stream Buffer (DSB) are being delivered to Instruction Decode Queue (IDQ) while Microcode Sequenser (MS) is busy",
@@ -383,101 +417,56 @@
         "UMask": "0x10"
     },
     {
-        "BriefDescription": "Uops delivered to Instruction Decode Queue (IDQ) while Microcode Sequenser (MS) is busy",
+        "BriefDescription": "Uops initiated by MITE and delivered to Instruction Decode Queue (IDQ) while Microcode Sequenser (MS) is busy",
         "Counter": "0,1,2,3",
         "CounterHTOff": "0,1,2,3,4,5,6,7",
         "EventCode": "0x79",
-        "EventName": "IDQ.MS_UOPS",
-        "PublicDescription": "Counts the total number of uops delivered by the Microcode Sequencer (MS). Any instruction over 4 uops will be delivered by the MS. Some instructions such as transcendentals may additionally generate uops from the MS.",
+        "EventName": "IDQ.MS_MITE_UOPS",
+        "PublicDescription": "Counts the number of uops initiated by MITE and delivered to Instruction Decode Queue (IDQ) while the Microcode Sequencer (MS) is busy. Counting includes uops that may 'bypass' the IDQ.",
         "SampleAfterValue": "2000003",
-        "UMask": "0x30"
-    },
-    {
-        "BriefDescription": "Retired instructions that are fetched after an interval where the front-end delivered no uops for a period of 256 cycles which was not interrupted by a back-end stall.",
-        "Counter": "0,1,2,3",
-        "CounterHTOff": "0,1,2,3",
-        "EventCode": "0xC6",
-        "EventName": "FRONTEND_RETIRED.LATENCY_GE_256",
-        "MSRIndex": "0x3F7",
-        "MSRValue": "0x410006",
-        "PEBS": "1",
-        "SampleAfterValue": "100007",
-        "TakenAlone": "1",
-        "UMask": "0x1"
-    },
-    {
-        "BriefDescription": "Retired instructions that are fetched after an interval where the front-end had at least 2 bubble-slots for a period of 2 cycles which was not interrupted by a back-end stall.",
-        "Counter": "0,1,2,3",
-        "CounterHTOff": "0,1,2,3",
-        "EventCode": "0xC6",
-        "EventName": "FRONTEND_RETIRED.LATENCY_GE_2_BUBBLES_GE_2",
-        "MSRIndex": "0x3F7",
-        "MSRValue": "0x200206",
-        "PEBS": "1",
-        "SampleAfterValue": "100007",
-        "TakenAlone": "1",
-        "UMask": "0x1"
-    },
-    {
-        "BriefDescription": "Retired instructions that are fetched after an interval where the front-end had at least 3 bubble-slots for a period of 2 cycles which was not interrupted by a back-end stall.",
-        "Counter": "0,1,2,3",
-        "CounterHTOff": "0,1,2,3",
-        "EventCode": "0xC6",
-        "EventName": "FRONTEND_RETIRED.LATENCY_GE_2_BUBBLES_GE_3",
-        "MSRIndex": "0x3F7",
-        "MSRValue": "0x300206",
-        "PEBS": "1",
-        "SampleAfterValue": "100007",
-        "TakenAlone": "1",
-        "UMask": "0x1"
+        "UMask": "0x20"
     },
     {
-        "BriefDescription": "Retired instructions that are fetched after an interval where the front-end had at least 1 bubble-slot for a period of 2 cycles which was not interrupted by a back-end stall.",
+        "BriefDescription": "Number of switches from DSB (Decode Stream Buffer) or MITE (legacy decode pipeline) to the Microcode Sequencer",
         "Counter": "0,1,2,3",
-        "CounterHTOff": "0,1,2,3",
-        "EventCode": "0xC6",
-        "EventName": "FRONTEND_RETIRED.LATENCY_GE_2_BUBBLES_GE_1",
-        "MSRIndex": "0x3F7",
-        "MSRValue": "0x100206",
-        "PEBS": "1",
-        "PublicDescription": "Counts retired instructions that are delivered to the back-end after the front-end had at least 1 bubble-slot for a period of 2 cycles. A bubble-slot is an empty issue-pipeline slot while there was no RAT stall.",
-        "SampleAfterValue": "100007",
-        "TakenAlone": "1",
-        "UMask": "0x1"
+        "CounterHTOff": "0,1,2,3,4,5,6,7",
+        "CounterMask": "1",
+        "EdgeDetect": "1",
+        "EventCode": "0x79",
+        "EventName": "IDQ.MS_SWITCHES",
+        "PublicDescription": "Number of switches from DSB (Decode Stream Buffer) or MITE (legacy decode pipeline) to the Microcode Sequencer.",
+        "SampleAfterValue": "2000003",
+        "UMask": "0x30"
     },
     {
-        "BriefDescription": "Cycles Decode Stream Buffer (DSB) is delivering 4 Uops",
+        "BriefDescription": "Uops delivered to Instruction Decode Queue (IDQ) while Microcode Sequenser (MS) is busy",
         "Counter": "0,1,2,3",
         "CounterHTOff": "0,1,2,3,4,5,6,7",
-        "CounterMask": "4",
         "EventCode": "0x79",
-        "EventName": "IDQ.ALL_DSB_CYCLES_4_UOPS",
-        "PublicDescription": "Counts the number of cycles 4 uops were delivered to Instruction Decode Queue (IDQ) from the Decode Stream Buffer (DSB) path. Count includes uops that may 'bypass' the IDQ.",
+        "EventName": "IDQ.MS_UOPS",
+        "PublicDescription": "Counts the total number of uops delivered by the Microcode Sequencer (MS). Any instruction over 4 uops will be delivered by the MS. Some instructions such as transcendentals may additionally generate uops from the MS.",
         "SampleAfterValue": "2000003",
-        "UMask": "0x18"
+        "UMask": "0x30"
     },
     {
-        "BriefDescription": "Decode Stream Buffer (DSB)-to-MITE switches",
+        "BriefDescription": "Uops not delivered to Resource Allocation Table (RAT) per thread when backend of the machine is not stalled",
         "Counter": "0,1,2,3",
         "CounterHTOff": "0,1,2,3,4,5,6,7",
-        "EventCode": "0xAB",
-        "EventName": "DSB2MITE_SWITCHES.COUNT",
-        "PublicDescription": "This event counts the number of the Decode Stream Buffer (DSB)-to-MITE switches including all misses because of missing Decode Stream Buffer (DSB) cache and u-arch forced misses.\nNote: Invoking MITE requires two or three cycles delay.",
+        "EventCode": "0x9C",
+        "EventName": "IDQ_UOPS_NOT_DELIVERED.CORE",
+        "PublicDescription": "Counts the number of uops not delivered to Resource Allocation Table (RAT) per thread adding 4  x when Resource Allocation Table (RAT) is not stalled and Instruction Decode Queue (IDQ) delivers x uops to Resource Allocation Table (RAT) (where x belongs to {0,1,2,3}). Counting does not cover cases when: a. IDQ-Resource Allocation Table (RAT) pipe serves the other thread. b. Resource Allocation Table (RAT) is stalled for the thread (including uop drops and clear BE conditions).  c. Instruction Decode Queue (IDQ) delivers four uops.",
         "SampleAfterValue": "2000003",
         "UMask": "0x1"
     },
     {
-        "BriefDescription": "Retired instructions that are fetched after an interval where the front-end delivered no uops for a period of 32 cycles which was not interrupted by a back-end stall.",
+        "BriefDescription": "Cycles per thread when 4 or more uops are not delivered to Resource Allocation Table (RAT) when backend of the machine is not stalled",
         "Counter": "0,1,2,3",
-        "CounterHTOff": "0,1,2,3",
-        "EventCode": "0xC6",
-        "EventName": "FRONTEND_RETIRED.LATENCY_GE_32",
-        "MSRIndex": "0x3F7",
-        "MSRValue": "0x402006",
-        "PEBS": "1",
-        "PublicDescription": "Counts retired instructions that are delivered to the back-end after a front-end stall of at least 32 cycles. During this period the front-end delivered no uops.",
-        "SampleAfterValue": "100007",
-        "TakenAlone": "1",
+        "CounterHTOff": "0,1,2,3,4,5,6,7",
+        "CounterMask": "4",
+        "EventCode": "0x9C",
+        "EventName": "IDQ_UOPS_NOT_DELIVERED.CYCLES_0_UOPS_DELIV.CORE",
+        "PublicDescription": "Counts, on the per-thread basis, cycles when no uops are delivered to Resource Allocation Table (RAT). IDQ_Uops_Not_Delivered.core =4.",
+        "SampleAfterValue": "2000003",
         "UMask": "0x1"
     },
     {
@@ -492,25 +481,36 @@
         "UMask": "0x1"
     },
     {
-        "BriefDescription": "Instruction fetch tag lookups that miss in the instruction cache (L1I). Counts at 64-byte cache-line granularity.",
+        "BriefDescription": "Cycles per thread when 3 or more uops are not delivered to Resource Allocation Table (RAT) when backend of the machine is not stalled",
         "Counter": "0,1,2,3",
         "CounterHTOff": "0,1,2,3,4,5,6,7",
-        "EventCode": "0x83",
-        "EventName": "ICACHE_64B.IFTAG_MISS",
-        "SampleAfterValue": "200003",
-        "UMask": "0x2"
+        "CounterMask": "3",
+        "EventCode": "0x9C",
+        "EventName": "IDQ_UOPS_NOT_DELIVERED.CYCLES_LE_1_UOP_DELIV.CORE",
+        "PublicDescription": "Counts, on the per-thread basis, cycles when less than 1 uop is delivered to Resource Allocation Table (RAT). IDQ_Uops_Not_Delivered.core >= 3.",
+        "SampleAfterValue": "2000003",
+        "UMask": "0x1"
     },
     {
-        "BriefDescription": "Retired Instructions who experienced Instruction L1 Cache true miss.",
+        "BriefDescription": "Cycles with less than 2 uops delivered by the front end.",
         "Counter": "0,1,2,3",
-        "CounterHTOff": "0,1,2,3",
-        "EventCode": "0xC6",
-        "EventName": "FRONTEND_RETIRED.L1I_MISS",
-        "MSRIndex": "0x3F7",
-        "MSRValue": "0x12",
-        "PEBS": "1",
-        "SampleAfterValue": "100007",
-        "TakenAlone": "1",
+        "CounterHTOff": "0,1,2,3,4,5,6,7",
+        "CounterMask": "2",
+        "EventCode": "0x9C",
+        "EventName": "IDQ_UOPS_NOT_DELIVERED.CYCLES_LE_2_UOP_DELIV.CORE",
+        "PublicDescription": "Cycles with less than 2 uops delivered by the front-end.",
+        "SampleAfterValue": "2000003",
+        "UMask": "0x1"
+    },
+    {
+        "BriefDescription": "Cycles with less than 3 uops delivered by the front end.",
+        "Counter": "0,1,2,3",
+        "CounterHTOff": "0,1,2,3,4,5,6,7",
+        "CounterMask": "1",
+        "EventCode": "0x9C",
+        "EventName": "IDQ_UOPS_NOT_DELIVERED.CYCLES_LE_3_UOP_DELIV.CORE",
+        "PublicDescription": "Cycles with less than 3 uops delivered by the front-end.",
+        "SampleAfterValue": "2000003",
         "UMask": "0x1"
     }
 ]
 \ No newline at end of file
diff --git a/tools/perf/pmu-events/arch/x86/skylakex/memory.json b/tools/perf/pmu-events/arch/x86/skylakex/memory.json
index 6c3fd89d204d..6f29b02fa320 100644
--- a/tools/perf/pmu-events/arch/x86/skylakex/memory.json
+++ b/tools/perf/pmu-events/arch/x86/skylakex/memory.json
@@ -1,31 +1,262 @@
 [
     {
-        "BriefDescription": "Counts all demand & prefetch RFOs that miss the L3 and the data is returned from remote dram.",
+        "BriefDescription": "Cycles while L3 cache miss demand load is outstanding.",
         "Counter": "0,1,2,3",
-        "CounterHTOff": "0,1,2,3",
-        "EventCode": "0xB7, 0xBB",
-        "EventName": "OFFCORE_RESPONSE.ALL_RFO.L3_MISS_REMOTE_DRAM.SNOOP_MISS_OR_NO_FWD",
-        "MSRIndex": "0x1a6,0x1a7",
-        "MSRValue": "0x063B800122",
-        "Offcore": "1",
-        "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
+        "CounterHTOff": "0,1,2,3,4,5,6,7",
+        "CounterMask": "2",
+        "EventCode": "0xA3",
+        "EventName": "CYCLE_ACTIVITY.CYCLES_L3_MISS",
+        "SampleAfterValue": "2000003",
+        "UMask": "0x2"
+    },
+    {
+        "BriefDescription": "Execution stalls while L3 cache miss demand load is outstanding.",
+        "Counter": "0,1,2,3",
+        "CounterHTOff": "0,1,2,3,4,5,6,7",
+        "CounterMask": "6",
+        "EventCode": "0xA3",
+        "EventName": "CYCLE_ACTIVITY.STALLS_L3_MISS",
+        "SampleAfterValue": "2000003",
+        "UMask": "0x6"
+    },
+    {
+        "BriefDescription": "Number of times an HLE execution aborted due to any reasons (multiple categories may count as one).",
+        "Counter": "0,1,2,3",
+        "CounterHTOff": "0,1,2,3,4,5,6,7",
+        "EventCode": "0xC8",
+        "EventName": "HLE_RETIRED.ABORTED",
+        "PEBS": "1",
+        "PublicDescription": "Number of times HLE abort was triggered.",
+        "SampleAfterValue": "2000003",
+        "UMask": "0x4"
+    },
+    {
+        "BriefDescription": "Number of times an HLE execution aborted due to unfriendly events (such as interrupts).",
+        "Counter": "0,1,2,3",
+        "CounterHTOff": "0,1,2,3,4,5,6,7",
+        "EventCode": "0xC8",
+        "EventName": "HLE_RETIRED.ABORTED_EVENTS",
+        "SampleAfterValue": "2000003",
+        "UMask": "0x80"
+    },
+    {
+        "BriefDescription": "Number of times an HLE execution aborted due to various memory events (e.g., read/write capacity and conflicts).",
+        "Counter": "0,1,2,3",
+        "CounterHTOff": "0,1,2,3,4,5,6,7",
+        "EventCode": "0xC8",
+        "EventName": "HLE_RETIRED.ABORTED_MEM",
+        "SampleAfterValue": "2000003",
+        "UMask": "0x8"
+    },
+    {
+        "BriefDescription": "Number of times an HLE execution aborted due to incompatible memory type",
+        "Counter": "0,1,2,3",
+        "CounterHTOff": "0,1,2,3,4,5,6,7",
+        "EventCode": "0xC8",
+        "EventName": "HLE_RETIRED.ABORTED_MEMTYPE",
+        "PublicDescription": "Number of times an HLE execution aborted due to incompatible memory type.",
+        "SampleAfterValue": "2000003",
+        "UMask": "0x40"
+    },
+    {
+        "BriefDescription": "Number of times an HLE execution aborted due to hardware timer expiration.",
+        "Counter": "0,1,2,3",
+        "CounterHTOff": "0,1,2,3,4,5,6,7",
+        "EventCode": "0xC8",
+        "EventName": "HLE_RETIRED.ABORTED_TIMER",
+        "SampleAfterValue": "2000003",
+        "UMask": "0x10"
+    },
+    {
+        "BriefDescription": "Number of times an HLE execution aborted due to HLE-unfriendly instructions and certain unfriendly events (such as AD assists etc.).",
+        "Counter": "0,1,2,3",
+        "CounterHTOff": "0,1,2,3,4,5,6,7",
+        "EventCode": "0xC8",
+        "EventName": "HLE_RETIRED.ABORTED_UNFRIENDLY",
+        "SampleAfterValue": "2000003",
+        "UMask": "0x20"
+    },
+    {
+        "BriefDescription": "Number of times an HLE execution successfully committed",
+        "Counter": "0,1,2,3",
+        "CounterHTOff": "0,1,2,3,4,5,6,7",
+        "EventCode": "0xC8",
+        "EventName": "HLE_RETIRED.COMMIT",
+        "PublicDescription": "Number of times HLE commit succeeded.",
+        "SampleAfterValue": "2000003",
+        "UMask": "0x2"
+    },
+    {
+        "BriefDescription": "Number of times an HLE execution started.",
+        "Counter": "0,1,2,3",
+        "CounterHTOff": "0,1,2,3,4,5,6,7",
+        "EventCode": "0xC8",
+        "EventName": "HLE_RETIRED.START",
+        "PublicDescription": "Number of times we entered an HLE region. Does not count nested transactions.",
+        "SampleAfterValue": "2000003",
+        "UMask": "0x1"
+    },
+    {
+        "BriefDescription": "Counts the number of machine clears due to memory order conflicts.",
+        "Counter": "0,1,2,3",
+        "CounterHTOff": "0,1,2,3,4,5,6,7",
+        "Errata": "SKL089",
+        "EventCode": "0xC3",
+        "EventName": "MACHINE_CLEARS.MEMORY_ORDERING",
+        "PublicDescription": "Counts the number of memory ordering Machine Clears detected. Memory Ordering Machine Clears can result from one of the following:a. memory disambiguation,b. external snoop, orc. cross SMT-HW-thread snoop (stores) hitting load buffer.",
         "SampleAfterValue": "100003",
+        "UMask": "0x2"
+    },
+    {
+        "BriefDescription": "Counts randomly selected loads when the latency from first dispatch to completion is greater than 128 cycles.",
+        "Counter": "0,1,2,3",
+        "CounterHTOff": "0,1,2,3",
+        "Data_LA": "1",
+        "EventCode": "0xcd",
+        "EventName": "MEM_TRANS_RETIRED.LOAD_LATENCY_GT_128",
+        "MSRIndex": "0x3F6",
+        "MSRValue": "0x80",
+        "PEBS": "2",
+        "PublicDescription": "Counts randomly selected loads when the latency from first dispatch to completion is greater than 128 cycles.  Reported latency may be longer than just the memory latency.",
+        "SampleAfterValue": "1009",
+        "TakenAlone": "1",
         "UMask": "0x1"
     },
     {
-        "BriefDescription": "Counts all demand & prefetch RFOs that miss in the L3.",
+        "BriefDescription": "Counts randomly selected loads when the latency from first dispatch to completion is greater than 16 cycles.",
         "Counter": "0,1,2,3",
         "CounterHTOff": "0,1,2,3",
-        "EventCode": "0xB7, 0xBB",
-        "EventName": "OFFCORE_RESPONSE.ALL_RFO.L3_MISS.ANY_SNOOP",
-        "MSRIndex": "0x1a6,0x1a7",
-        "MSRValue": "0x3FBC000122",
-        "Offcore": "1",
-        "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
+        "Data_LA": "1",
+        "EventCode": "0xcd",
+        "EventName": "MEM_TRANS_RETIRED.LOAD_LATENCY_GT_16",
+        "MSRIndex": "0x3F6",
+        "MSRValue": "0x10",
+        "PEBS": "2",
+        "PublicDescription": "Counts randomly selected loads when the latency from first dispatch to completion is greater than 16 cycles.  Reported latency may be longer than just the memory latency.",
+        "SampleAfterValue": "20011",
+        "TakenAlone": "1",
+        "UMask": "0x1"
+    },
+    {
+        "BriefDescription": "Counts randomly selected loads when the latency from first dispatch to completion is greater than 256 cycles.",
+        "Counter": "0,1,2,3",
+        "CounterHTOff": "0,1,2,3",
+        "Data_LA": "1",
+        "EventCode": "0xcd",
+        "EventName": "MEM_TRANS_RETIRED.LOAD_LATENCY_GT_256",
+        "MSRIndex": "0x3F6",
+        "MSRValue": "0x100",
+        "PEBS": "2",
+        "PublicDescription": "Counts randomly selected loads when the latency from first dispatch to completion is greater than 256 cycles.  Reported latency may be longer than just the memory latency.",
+        "SampleAfterValue": "503",
+        "TakenAlone": "1",
+        "UMask": "0x1"
+    },
+    {
+        "BriefDescription": "Counts randomly selected loads when the latency from first dispatch to completion is greater than 32 cycles.",
+        "Counter": "0,1,2,3",
+        "CounterHTOff": "0,1,2,3",
+        "Data_LA": "1",
+        "EventCode": "0xcd",
+        "EventName": "MEM_TRANS_RETIRED.LOAD_LATENCY_GT_32",
+        "MSRIndex": "0x3F6",
+        "MSRValue": "0x20",
+        "PEBS": "2",
+        "PublicDescription": "Counts randomly selected loads when the latency from first dispatch to completion is greater than 32 cycles.  Reported latency may be longer than just the memory latency.",
+        "SampleAfterValue": "100007",
+        "TakenAlone": "1",
+        "UMask": "0x1"
+    },
+    {
+        "BriefDescription": "Counts randomly selected loads when the latency from first dispatch to completion is greater than 4 cycles.",
+        "Counter": "0,1,2,3",
+        "CounterHTOff": "0,1,2,3",
+        "Data_LA": "1",
+        "EventCode": "0xcd",
+        "EventName": "MEM_TRANS_RETIRED.LOAD_LATENCY_GT_4",
+        "MSRIndex": "0x3F6",
+        "MSRValue": "0x4",
+        "PEBS": "2",
+        "PublicDescription": "Counts randomly selected loads when the latency from first dispatch to completion is greater than 4 cycles.  Reported latency may be longer than just the memory latency.",
         "SampleAfterValue": "100003",
+        "TakenAlone": "1",
+        "UMask": "0x1"
+    },
+    {
+        "BriefDescription": "Counts randomly selected loads when the latency from first dispatch to completion is greater than 512 cycles.",
+        "Counter": "0,1,2,3",
+        "CounterHTOff": "0,1,2,3",
+        "Data_LA": "1",
+        "EventCode": "0xcd",
+        "EventName": "MEM_TRANS_RETIRED.LOAD_LATENCY_GT_512",
+        "MSRIndex": "0x3F6",
+        "MSRValue": "0x200",
+        "PEBS": "2",
+        "PublicDescription": "Counts randomly selected loads when the latency from first dispatch to completion is greater than 512 cycles.  Reported latency may be longer than just the memory latency.",
+        "SampleAfterValue": "101",
+        "TakenAlone": "1",
+        "UMask": "0x1"
+    },
+    {
+        "BriefDescription": "Counts randomly selected loads when the latency from first dispatch to completion is greater than 64 cycles.",
+        "Counter": "0,1,2,3",
+        "CounterHTOff": "0,1,2,3",
+        "Data_LA": "1",
+        "EventCode": "0xcd",
+        "EventName": "MEM_TRANS_RETIRED.LOAD_LATENCY_GT_64",
+        "MSRIndex": "0x3F6",
+        "MSRValue": "0x40",
+        "PEBS": "2",
+        "PublicDescription": "Counts randomly selected loads when the latency from first dispatch to completion is greater than 64 cycles.  Reported latency may be longer than just the memory latency.",
+        "SampleAfterValue": "2003",
+        "TakenAlone": "1",
+        "UMask": "0x1"
+    },
+    {
+        "BriefDescription": "Counts randomly selected loads when the latency from first dispatch to completion is greater than 8 cycles.",
+        "Counter": "0,1,2,3",
+        "CounterHTOff": "0,1,2,3",
+        "Data_LA": "1",
+        "EventCode": "0xcd",
+        "EventName": "MEM_TRANS_RETIRED.LOAD_LATENCY_GT_8",
+        "MSRIndex": "0x3F6",
+        "MSRValue": "0x8",
+        "PEBS": "2",
+        "PublicDescription": "Counts randomly selected loads when the latency from first dispatch to completion is greater than 8 cycles.  Reported latency may be longer than just the memory latency.",
+        "SampleAfterValue": "50021",
+        "TakenAlone": "1",
         "UMask": "0x1"
     },
     {
+        "BriefDescription": "Demand Data Read requests who miss L3 cache",
+        "Counter": "0,1,2,3",
+        "CounterHTOff": "0,1,2,3,4,5,6,7",
+        "EventCode": "0xB0",
+        "EventName": "OFFCORE_REQUESTS.L3_MISS_DEMAND_DATA_RD",
+        "PublicDescription": "Demand Data Read requests who miss L3 cache.",
+        "SampleAfterValue": "100003",
+        "UMask": "0x10"
+    },
+    {
+        "BriefDescription": "Cycles with at least 1 Demand Data Read requests who miss L3 cache in the superQ.",
+        "Counter": "0,1,2,3",
+        "CounterHTOff": "0,1,2,3,4,5,6,7",
+        "CounterMask": "1",
+        "EventCode": "0x60",
+        "EventName": "OFFCORE_REQUESTS_OUTSTANDING.CYCLES_WITH_L3_MISS_DEMAND_DATA_RD",
+        "SampleAfterValue": "2000003",
+        "UMask": "0x10"
+    },
+    {
+        "BriefDescription": "Counts number of Offcore outstanding Demand Data Read requests that miss L3 cache in the superQ every cycle.",
+        "Counter": "0,1,2,3",
+        "CounterHTOff": "0,1,2,3,4,5,6,7",
+        "EventCode": "0x60",
+        "EventName": "OFFCORE_REQUESTS_OUTSTANDING.L3_MISS_DEMAND_DATA_RD",
+        "SampleAfterValue": "2000003",
+        "UMask": "0x10"
+    },
+    {
         "BriefDescription": "Cycles with at least 6 Demand Data Read requests that miss L3 cache in the superQ.",
         "Counter": "0,1,2,3",
         "CounterHTOff": "0,1,2,3,4,5,6,7",
@@ -36,146 +267,130 @@
         "UMask": "0x10"
     },
     {
-        "BriefDescription": "Counts all prefetch (that bring data to L2) RFOs that miss the L3 and the data is returned from remote dram.",
+        "BriefDescription": "Counts all demand & prefetch data reads that miss in the L3.",
         "Counter": "0,1,2,3",
         "CounterHTOff": "0,1,2,3",
         "EventCode": "0xB7, 0xBB",
-        "EventName": "OFFCORE_RESPONSE.PF_L2_RFO.L3_MISS_REMOTE_DRAM.SNOOP_MISS_OR_NO_FWD",
+        "EventName": "OFFCORE_RESPONSE.ALL_DATA_RD.L3_MISS.ANY_SNOOP",
         "MSRIndex": "0x1a6,0x1a7",
-        "MSRValue": "0x063B800020",
+        "MSRValue": "0x3FBC000491",
         "Offcore": "1",
         "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
         "SampleAfterValue": "100003",
         "UMask": "0x1"
     },
     {
-        "BriefDescription": "Counts all prefetch (that bring data to LLC only) RFOs that miss in the L3.",
+        "BriefDescription": "Counts all demand & prefetch data reads that miss the L3 and the modified data is transferred from remote cache.",
         "Counter": "0,1,2,3",
         "CounterHTOff": "0,1,2,3",
         "EventCode": "0xB7, 0xBB",
-        "EventName": "OFFCORE_RESPONSE.PF_L3_RFO.L3_MISS.ANY_SNOOP",
+        "EventName": "OFFCORE_RESPONSE.ALL_DATA_RD.L3_MISS.REMOTE_HITM",
         "MSRIndex": "0x1a6,0x1a7",
-        "MSRValue": "0x3FBC000100",
+        "MSRValue": "0x103FC00491",
         "Offcore": "1",
         "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
         "SampleAfterValue": "100003",
         "UMask": "0x1"
     },
     {
-        "BriefDescription": "Counts all demand data writes (RFOs) that miss the L3 and the modified data is transferred from remote cache.",
+        "BriefDescription": "Counts all demand & prefetch data reads that miss the L3 and clean or shared data is transferred from remote cache.",
         "Counter": "0,1,2,3",
         "CounterHTOff": "0,1,2,3",
         "EventCode": "0xB7, 0xBB",
-        "EventName": "OFFCORE_RESPONSE.DEMAND_RFO.L3_MISS.REMOTE_HITM",
+        "EventName": "OFFCORE_RESPONSE.ALL_DATA_RD.L3_MISS.REMOTE_HIT_FORWARD",
         "MSRIndex": "0x1a6,0x1a7",
-        "MSRValue": "0x103FC00002",
+        "MSRValue": "0x083FC00491",
         "Offcore": "1",
         "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
         "SampleAfterValue": "100003",
         "UMask": "0x1"
     },
     {
-        "BriefDescription": "Number of times an HLE execution aborted due to hardware timer expiration.",
-        "Counter": "0,1,2,3",
-        "CounterHTOff": "0,1,2,3,4,5,6,7",
-        "EventCode": "0xC8",
-        "EventName": "HLE_RETIRED.ABORTED_TIMER",
-        "SampleAfterValue": "2000003",
-        "UMask": "0x10"
-    },
-    {
-        "BriefDescription": "Counts all prefetch data reads that miss the L3 and the data is returned from remote dram.",
+        "BriefDescription": "Counts all demand & prefetch data reads that miss the L3 and the data is returned from local or remote dram.",
         "Counter": "0,1,2,3",
         "CounterHTOff": "0,1,2,3",
         "EventCode": "0xB7, 0xBB",
-        "EventName": "OFFCORE_RESPONSE.ALL_PF_DATA_RD.L3_MISS_REMOTE_DRAM.SNOOP_MISS_OR_NO_FWD",
+        "EventName": "OFFCORE_RESPONSE.ALL_DATA_RD.L3_MISS.SNOOP_MISS_OR_NO_FWD",
         "MSRIndex": "0x1a6,0x1a7",
-        "MSRValue": "0x063B800490",
+        "MSRValue": "0x063FC00491",
         "Offcore": "1",
         "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
         "SampleAfterValue": "100003",
         "UMask": "0x1"
     },
     {
-        "BriefDescription": "Number of times an RTM execution aborted due to various memory events (e.g. read/write capacity and conflicts)",
-        "Counter": "0,1,2,3",
-        "CounterHTOff": "0,1,2,3,4,5,6,7",
-        "EventCode": "0xC9",
-        "EventName": "RTM_RETIRED.ABORTED_MEM",
-        "PublicDescription": "Number of times an RTM execution aborted due to various memory events (e.g. read/write capacity and conflicts).",
-        "SampleAfterValue": "2000003",
-        "UMask": "0x8"
-    },
-    {
-        "BriefDescription": "Counts all demand code reads that miss the L3 and the data is returned from remote dram.",
+        "BriefDescription": "Counts all demand & prefetch data reads that miss the L3 and the data is returned from local dram.",
         "Counter": "0,1,2,3",
         "CounterHTOff": "0,1,2,3",
         "EventCode": "0xB7, 0xBB",
-        "EventName": "OFFCORE_RESPONSE.DEMAND_CODE_RD.L3_MISS_REMOTE_DRAM.SNOOP_MISS_OR_NO_FWD",
+        "EventName": "OFFCORE_RESPONSE.ALL_DATA_RD.L3_MISS_LOCAL_DRAM.SNOOP_MISS_OR_NO_FWD",
         "MSRIndex": "0x1a6,0x1a7",
-        "MSRValue": "0x063B800004",
+        "MSRValue": "0x0604000491",
         "Offcore": "1",
         "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
         "SampleAfterValue": "100003",
         "UMask": "0x1"
     },
     {
-        "BriefDescription": "Number of times an RTM execution aborted due to incompatible memory type",
+        "BriefDescription": "Counts all demand & prefetch data reads that miss the L3 and the data is returned from remote dram.",
         "Counter": "0,1,2,3",
-        "CounterHTOff": "0,1,2,3,4,5,6,7",
-        "EventCode": "0xC9",
-        "EventName": "RTM_RETIRED.ABORTED_MEMTYPE",
-        "PublicDescription": "Number of times an RTM execution aborted due to incompatible memory type.",
-        "SampleAfterValue": "2000003",
-        "UMask": "0x40"
+        "CounterHTOff": "0,1,2,3",
+        "EventCode": "0xB7, 0xBB",
+        "EventName": "OFFCORE_RESPONSE.ALL_DATA_RD.L3_MISS_REMOTE_DRAM.SNOOP_MISS_OR_NO_FWD",
+        "MSRIndex": "0x1a6,0x1a7",
+        "MSRValue": "0x063B800491",
+        "Offcore": "1",
+        "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
+        "SampleAfterValue": "100003",
+        "UMask": "0x1"
     },
     {
-        "BriefDescription": "Counts all demand data writes (RFOs) that miss in the L3.",
+        "BriefDescription": "Counts all prefetch data reads that miss in the L3.",
         "Counter": "0,1,2,3",
         "CounterHTOff": "0,1,2,3",
         "EventCode": "0xB7, 0xBB",
-        "EventName": "OFFCORE_RESPONSE.DEMAND_RFO.L3_MISS.ANY_SNOOP",
+        "EventName": "OFFCORE_RESPONSE.ALL_PF_DATA_RD.L3_MISS.ANY_SNOOP",
         "MSRIndex": "0x1a6,0x1a7",
-        "MSRValue": "0x3FBC000002",
+        "MSRValue": "0x3FBC000490",
         "Offcore": "1",
         "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
         "SampleAfterValue": "100003",
         "UMask": "0x1"
     },
     {
-        "BriefDescription": "Counts prefetch RFOs that miss the L3 and the data is returned from local or remote dram.",
+        "BriefDescription": "Counts all prefetch data reads that miss the L3 and the modified data is transferred from remote cache.",
         "Counter": "0,1,2,3",
         "CounterHTOff": "0,1,2,3",
         "EventCode": "0xB7, 0xBB",
-        "EventName": "OFFCORE_RESPONSE.ALL_PF_RFO.L3_MISS.SNOOP_MISS_OR_NO_FWD",
+        "EventName": "OFFCORE_RESPONSE.ALL_PF_DATA_RD.L3_MISS.REMOTE_HITM",
         "MSRIndex": "0x1a6,0x1a7",
-        "MSRValue": "0x063FC00120",
+        "MSRValue": "0x103FC00490",
         "Offcore": "1",
         "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
         "SampleAfterValue": "100003",
         "UMask": "0x1"
     },
     {
-        "BriefDescription": "Counts all prefetch data reads that miss in the L3.",
+        "BriefDescription": "Counts all prefetch data reads that miss the L3 and clean or shared data is transferred from remote cache.",
         "Counter": "0,1,2,3",
         "CounterHTOff": "0,1,2,3",
         "EventCode": "0xB7, 0xBB",
-        "EventName": "OFFCORE_RESPONSE.ALL_PF_DATA_RD.L3_MISS.ANY_SNOOP",
+        "EventName": "OFFCORE_RESPONSE.ALL_PF_DATA_RD.L3_MISS.REMOTE_HIT_FORWARD",
         "MSRIndex": "0x1a6,0x1a7",
-        "MSRValue": "0x3FBC000490",
+        "MSRValue": "0x083FC00490",
         "Offcore": "1",
         "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
         "SampleAfterValue": "100003",
         "UMask": "0x1"
     },
     {
-        "BriefDescription": "Counts prefetch RFOs that miss the L3 and the data is returned from local dram.",
+        "BriefDescription": "Counts all prefetch data reads that miss the L3 and the data is returned from local or remote dram.",
         "Counter": "0,1,2,3",
         "CounterHTOff": "0,1,2,3",
         "EventCode": "0xB7, 0xBB",
-        "EventName": "OFFCORE_RESPONSE.ALL_PF_RFO.L3_MISS_LOCAL_DRAM.SNOOP_MISS_OR_NO_FWD",
+        "EventName": "OFFCORE_RESPONSE.ALL_PF_DATA_RD.L3_MISS.SNOOP_MISS_OR_NO_FWD",
         "MSRIndex": "0x1a6,0x1a7",
-        "MSRValue": "0x0604000120",
+        "MSRValue": "0x063FC00490",
         "Offcore": "1",
         "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
         "SampleAfterValue": "100003",
@@ -195,464 +410,312 @@
         "UMask": "0x1"
     },
     {
-        "BriefDescription": "Cycles while L3 cache miss demand load is outstanding.",
-        "Counter": "0,1,2,3",
-        "CounterHTOff": "0,1,2,3,4,5,6,7",
-        "CounterMask": "2",
-        "EventCode": "0xA3",
-        "EventName": "CYCLE_ACTIVITY.CYCLES_L3_MISS",
-        "SampleAfterValue": "2000003",
-        "UMask": "0x2"
-    },
-    {
-        "BriefDescription": "Counts all demand & prefetch data reads that miss the L3 and the data is returned from local dram.",
+        "BriefDescription": "Counts all prefetch data reads that miss the L3 and the data is returned from remote dram.",
         "Counter": "0,1,2,3",
         "CounterHTOff": "0,1,2,3",
         "EventCode": "0xB7, 0xBB",
-        "EventName": "OFFCORE_RESPONSE.ALL_DATA_RD.L3_MISS_LOCAL_DRAM.SNOOP_MISS_OR_NO_FWD",
+        "EventName": "OFFCORE_RESPONSE.ALL_PF_DATA_RD.L3_MISS_REMOTE_DRAM.SNOOP_MISS_OR_NO_FWD",
         "MSRIndex": "0x1a6,0x1a7",
-        "MSRValue": "0x0604000491",
+        "MSRValue": "0x063B800490",
         "Offcore": "1",
         "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
         "SampleAfterValue": "100003",
         "UMask": "0x1"
     },
     {
-        "BriefDescription": "Counts all demand code reads that miss the L3 and clean or shared data is transferred from remote cache.",
+        "BriefDescription": "Counts prefetch RFOs that miss in the L3.",
         "Counter": "0,1,2,3",
         "CounterHTOff": "0,1,2,3",
         "EventCode": "0xB7, 0xBB",
-        "EventName": "OFFCORE_RESPONSE.DEMAND_CODE_RD.L3_MISS.REMOTE_HIT_FORWARD",
+        "EventName": "OFFCORE_RESPONSE.ALL_PF_RFO.L3_MISS.ANY_SNOOP",
         "MSRIndex": "0x1a6,0x1a7",
-        "MSRValue": "0x083FC00004",
+        "MSRValue": "0x3FBC000120",
         "Offcore": "1",
         "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
         "SampleAfterValue": "100003",
         "UMask": "0x1"
     },
     {
-        "BriefDescription": "Counts all demand code reads that miss the L3 and the data is returned from local or remote dram.",
+        "BriefDescription": "Counts prefetch RFOs that miss the L3 and the modified data is transferred from remote cache.",
         "Counter": "0,1,2,3",
         "CounterHTOff": "0,1,2,3",
         "EventCode": "0xB7, 0xBB",
-        "EventName": "OFFCORE_RESPONSE.DEMAND_CODE_RD.L3_MISS.SNOOP_MISS_OR_NO_FWD",
+        "EventName": "OFFCORE_RESPONSE.ALL_PF_RFO.L3_MISS.REMOTE_HITM",
         "MSRIndex": "0x1a6,0x1a7",
-        "MSRValue": "0x063FC00004",
+        "MSRValue": "0x103FC00120",
         "Offcore": "1",
         "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
         "SampleAfterValue": "100003",
         "UMask": "0x1"
     },
     {
-        "BriefDescription": "Counts randomly selected loads when the latency from first dispatch to completion is greater than 32 cycles.",
-        "Counter": "0,1,2,3",
-        "CounterHTOff": "0,1,2,3",
-        "Data_LA": "1",
-        "EventCode": "0xcd",
-        "EventName": "MEM_TRANS_RETIRED.LOAD_LATENCY_GT_32",
-        "MSRIndex": "0x3F6",
-        "MSRValue": "0x20",
-        "PEBS": "2",
-        "PublicDescription": "Counts randomly selected loads when the latency from first dispatch to completion is greater than 32 cycles.  Reported latency may be longer than just the memory latency.",
-        "SampleAfterValue": "100007",
-        "TakenAlone": "1",
-        "UMask": "0x1"
-    },
-    {
-        "BriefDescription": "Counts all prefetch (that bring data to L2) RFOs that miss the L3 and the modified data is transferred from remote cache.",
+        "BriefDescription": "Counts prefetch RFOs that miss the L3 and clean or shared data is transferred from remote cache.",
         "Counter": "0,1,2,3",
         "CounterHTOff": "0,1,2,3",
         "EventCode": "0xB7, 0xBB",
-        "EventName": "OFFCORE_RESPONSE.PF_L2_RFO.L3_MISS.REMOTE_HITM",
+        "EventName": "OFFCORE_RESPONSE.ALL_PF_RFO.L3_MISS.REMOTE_HIT_FORWARD",
         "MSRIndex": "0x1a6,0x1a7",
-        "MSRValue": "0x103FC00020",
+        "MSRValue": "0x083FC00120",
         "Offcore": "1",
         "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
         "SampleAfterValue": "100003",
         "UMask": "0x1"
     },
     {
-        "BriefDescription": "Counts all prefetch data reads that miss the L3 and the modified data is transferred from remote cache.",
+        "BriefDescription": "Counts prefetch RFOs that miss the L3 and the data is returned from local or remote dram.",
         "Counter": "0,1,2,3",
         "CounterHTOff": "0,1,2,3",
         "EventCode": "0xB7, 0xBB",
-        "EventName": "OFFCORE_RESPONSE.ALL_PF_DATA_RD.L3_MISS.REMOTE_HITM",
+        "EventName": "OFFCORE_RESPONSE.ALL_PF_RFO.L3_MISS.SNOOP_MISS_OR_NO_FWD",
         "MSRIndex": "0x1a6,0x1a7",
-        "MSRValue": "0x103FC00490",
+        "MSRValue": "0x063FC00120",
         "Offcore": "1",
         "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
         "SampleAfterValue": "100003",
         "UMask": "0x1"
     },
     {
-        "BriefDescription": "Counts all prefetch (that bring data to L2) RFOs that miss in the L3.",
+        "BriefDescription": "Counts prefetch RFOs that miss the L3 and the data is returned from local dram.",
         "Counter": "0,1,2,3",
         "CounterHTOff": "0,1,2,3",
         "EventCode": "0xB7, 0xBB",
-        "EventName": "OFFCORE_RESPONSE.PF_L2_RFO.L3_MISS.ANY_SNOOP",
+        "EventName": "OFFCORE_RESPONSE.ALL_PF_RFO.L3_MISS_LOCAL_DRAM.SNOOP_MISS_OR_NO_FWD",
         "MSRIndex": "0x1a6,0x1a7",
-        "MSRValue": "0x3FBC000020",
+        "MSRValue": "0x0604000120",
         "Offcore": "1",
         "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
         "SampleAfterValue": "100003",
         "UMask": "0x1"
     },
     {
-        "BriefDescription": "Number of times a HLE transactional region aborted due to a non XRELEASE prefixed instruction writing to an elided lock in the elision buffer",
-        "Counter": "0,1,2,3",
-        "CounterHTOff": "0,1,2,3,4,5,6,7",
-        "EventCode": "0x54",
-        "EventName": "TX_MEM.ABORT_HLE_STORE_TO_ELIDED_LOCK",
-        "PublicDescription": "Number of times a TSX Abort was triggered due to a non-release/commit store to lock.",
-        "SampleAfterValue": "2000003",
-        "UMask": "0x4"
-    },
-    {
-        "BriefDescription": "Number of times a transactional abort was signaled due to a data conflict on a transactionally accessed address",
-        "Counter": "0,1,2,3",
-        "CounterHTOff": "0,1,2,3,4,5,6,7",
-        "EventCode": "0x54",
-        "EventName": "TX_MEM.ABORT_CONFLICT",
-        "PublicDescription": "Number of times a TSX line had a cache conflict.",
-        "SampleAfterValue": "2000003",
-        "UMask": "0x1"
-    },
-    {
-        "BriefDescription": "Counts L1 data cache hardware prefetch requests and software prefetch requests that miss in the L3.",
+        "BriefDescription": "Counts prefetch RFOs that miss the L3 and the data is returned from remote dram.",
         "Counter": "0,1,2,3",
         "CounterHTOff": "0,1,2,3",
         "EventCode": "0xB7, 0xBB",
-        "EventName": "OFFCORE_RESPONSE.PF_L1D_AND_SW.L3_MISS.ANY_SNOOP",
+        "EventName": "OFFCORE_RESPONSE.ALL_PF_RFO.L3_MISS_REMOTE_DRAM.SNOOP_MISS_OR_NO_FWD",
         "MSRIndex": "0x1a6,0x1a7",
-        "MSRValue": "0x3FBC000400",
+        "MSRValue": "0x063B800120",
         "Offcore": "1",
         "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
         "SampleAfterValue": "100003",
         "UMask": "0x1"
     },
     {
-        "BriefDescription": "Counts randomly selected loads when the latency from first dispatch to completion is greater than 64 cycles.",
+        "BriefDescription": "Counts all demand & prefetch RFOs that miss in the L3.",
         "Counter": "0,1,2,3",
         "CounterHTOff": "0,1,2,3",
-        "Data_LA": "1",
-        "EventCode": "0xcd",
-        "EventName": "MEM_TRANS_RETIRED.LOAD_LATENCY_GT_64",
-        "MSRIndex": "0x3F6",
-        "MSRValue": "0x40",
-        "PEBS": "2",
-        "PublicDescription": "Counts randomly selected loads when the latency from first dispatch to completion is greater than 64 cycles.  Reported latency may be longer than just the memory latency.",
-        "SampleAfterValue": "2003",
-        "TakenAlone": "1",
+        "EventCode": "0xB7, 0xBB",
+        "EventName": "OFFCORE_RESPONSE.ALL_RFO.L3_MISS.ANY_SNOOP",
+        "MSRIndex": "0x1a6,0x1a7",
+        "MSRValue": "0x3FBC000122",
+        "Offcore": "1",
+        "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
+        "SampleAfterValue": "100003",
         "UMask": "0x1"
     },
     {
-        "BriefDescription": "Counts all demand data writes (RFOs) that miss the L3 and clean or shared data is transferred from remote cache.",
+        "BriefDescription": "Counts all demand & prefetch RFOs that miss the L3 and the modified data is transferred from remote cache.",
         "Counter": "0,1,2,3",
         "CounterHTOff": "0,1,2,3",
         "EventCode": "0xB7, 0xBB",
-        "EventName": "OFFCORE_RESPONSE.DEMAND_RFO.L3_MISS.REMOTE_HIT_FORWARD",
+        "EventName": "OFFCORE_RESPONSE.ALL_RFO.L3_MISS.REMOTE_HITM",
         "MSRIndex": "0x1a6,0x1a7",
-        "MSRValue": "0x083FC00002",
+        "MSRValue": "0x103FC00122",
         "Offcore": "1",
         "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
         "SampleAfterValue": "100003",
         "UMask": "0x1"
     },
     {
-        "BriefDescription": "Number of times an HLE execution aborted due to various memory events (e.g., read/write capacity and conflicts).",
-        "Counter": "0,1,2,3",
-        "CounterHTOff": "0,1,2,3,4,5,6,7",
-        "EventCode": "0xC8",
-        "EventName": "HLE_RETIRED.ABORTED_MEM",
-        "SampleAfterValue": "2000003",
-        "UMask": "0x8"
-    },
-    {
-        "BriefDescription": "Number of times an HLE transactional execution aborted due to NoAllocatedElisionBuffer being non-zero.",
-        "Counter": "0,1,2,3",
-        "CounterHTOff": "0,1,2,3,4,5,6,7",
-        "EventCode": "0x54",
-        "EventName": "TX_MEM.ABORT_HLE_ELISION_BUFFER_NOT_EMPTY",
-        "PublicDescription": "Number of times a TSX Abort was triggered due to commit but Lock Buffer not empty.",
-        "SampleAfterValue": "2000003",
-        "UMask": "0x8"
-    },
-    {
-        "BriefDescription": "Counts all prefetch (that bring data to LLC only) data reads that miss the L3 and the modified data is transferred from remote cache.",
+        "BriefDescription": "Counts all demand & prefetch RFOs that miss the L3 and clean or shared data is transferred from remote cache.",
         "Counter": "0,1,2,3",
         "CounterHTOff": "0,1,2,3",
         "EventCode": "0xB7, 0xBB",
-        "EventName": "OFFCORE_RESPONSE.PF_L3_DATA_RD.L3_MISS.REMOTE_HITM",
+        "EventName": "OFFCORE_RESPONSE.ALL_RFO.L3_MISS.REMOTE_HIT_FORWARD",
         "MSRIndex": "0x1a6,0x1a7",
-        "MSRValue": "0x103FC00080",
+        "MSRValue": "0x083FC00122",
         "Offcore": "1",
         "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
         "SampleAfterValue": "100003",
         "UMask": "0x1"
     },
     {
-        "BriefDescription": "Counts the number of times an HLE XACQUIRE instruction was executed inside an RTM transactional region",
-        "Counter": "0,1,2,3",
-        "CounterHTOff": "0,1,2,3,4,5,6,7",
-        "EventCode": "0x5d",
-        "EventName": "TX_EXEC.MISC5",
-        "PublicDescription": "Counts the number of times an HLE XACQUIRE instruction was executed inside an RTM transactional region.",
-        "SampleAfterValue": "2000003",
-        "UMask": "0x10"
-    },
-    {
-        "BriefDescription": "Counts the number of times a XBEGIN instruction was executed inside an HLE transactional region.",
-        "Counter": "0,1,2,3",
-        "CounterHTOff": "0,1,2,3,4,5,6,7",
-        "EventCode": "0x5d",
-        "EventName": "TX_EXEC.MISC4",
-        "PublicDescription": "RTM region detected inside HLE.",
-        "SampleAfterValue": "2000003",
-        "UMask": "0x8"
-    },
-    {
-        "BriefDescription": "Counts the number of times an instruction execution caused the transactional nest count supported to be exceeded",
-        "Counter": "0,1,2,3",
-        "CounterHTOff": "0,1,2,3,4,5,6,7",
-        "EventCode": "0x5d",
-        "EventName": "TX_EXEC.MISC3",
-        "PublicDescription": "Unfriendly TSX abort triggered by a nest count that is too deep.",
-        "SampleAfterValue": "2000003",
-        "UMask": "0x4"
-    },
-    {
-        "BriefDescription": "Counts the number of times a class of instructions (e.g., vzeroupper) that may cause a transactional abort was executed inside a transactional region",
-        "Counter": "0,1,2,3",
-        "CounterHTOff": "0,1,2,3,4,5,6,7",
-        "EventCode": "0x5d",
-        "EventName": "TX_EXEC.MISC2",
-        "PublicDescription": "Unfriendly TSX abort triggered by a vzeroupper instruction.",
-        "SampleAfterValue": "2000003",
-        "UMask": "0x2"
-    },
-    {
-        "BriefDescription": "Counts the number of times a class of instructions that may cause a transactional abort was executed. Since this is the count of execution, it may not always cause a transactional abort.",
-        "Counter": "0,1,2,3",
-        "CounterHTOff": "0,1,2,3,4,5,6,7",
-        "EventCode": "0x5d",
-        "EventName": "TX_EXEC.MISC1",
-        "SampleAfterValue": "2000003",
-        "UMask": "0x1"
-    },
-    {
-        "BriefDescription": "Number of times an RTM execution successfully committed",
-        "Counter": "0,1,2,3",
-        "CounterHTOff": "0,1,2,3,4,5,6,7",
-        "EventCode": "0xC9",
-        "EventName": "RTM_RETIRED.COMMIT",
-        "PublicDescription": "Number of times RTM commit succeeded.",
-        "SampleAfterValue": "2000003",
-        "UMask": "0x2"
-    },
-    {
-        "BriefDescription": "Counts prefetch RFOs that miss in the L3.",
+        "BriefDescription": "Counts all demand & prefetch RFOs that miss the L3 and the data is returned from local or remote dram.",
         "Counter": "0,1,2,3",
         "CounterHTOff": "0,1,2,3",
         "EventCode": "0xB7, 0xBB",
-        "EventName": "OFFCORE_RESPONSE.ALL_PF_RFO.L3_MISS.ANY_SNOOP",
+        "EventName": "OFFCORE_RESPONSE.ALL_RFO.L3_MISS.SNOOP_MISS_OR_NO_FWD",
         "MSRIndex": "0x1a6,0x1a7",
-        "MSRValue": "0x3FBC000120",
+        "MSRValue": "0x063FC00122",
         "Offcore": "1",
         "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
         "SampleAfterValue": "100003",
         "UMask": "0x1"
     },
     {
-        "BriefDescription": "Counts number of Offcore outstanding Demand Data Read requests that miss L3 cache in the superQ every cycle.",
-        "Counter": "0,1,2,3",
-        "CounterHTOff": "0,1,2,3,4,5,6,7",
-        "EventCode": "0x60",
-        "EventName": "OFFCORE_REQUESTS_OUTSTANDING.L3_MISS_DEMAND_DATA_RD",
-        "SampleAfterValue": "2000003",
-        "UMask": "0x10"
-    },
-    {
-        "BriefDescription": "Counts all demand & prefetch data reads that miss the L3 and the data is returned from remote dram.",
+        "BriefDescription": "Counts all demand & prefetch RFOs that miss the L3 and the data is returned from local dram.",
         "Counter": "0,1,2,3",
         "CounterHTOff": "0,1,2,3",
         "EventCode": "0xB7, 0xBB",
-        "EventName": "OFFCORE_RESPONSE.ALL_DATA_RD.L3_MISS_REMOTE_DRAM.SNOOP_MISS_OR_NO_FWD",
+        "EventName": "OFFCORE_RESPONSE.ALL_RFO.L3_MISS_LOCAL_DRAM.SNOOP_MISS_OR_NO_FWD",
         "MSRIndex": "0x1a6,0x1a7",
-        "MSRValue": "0x063B800491",
+        "MSRValue": "0x0604000122",
         "Offcore": "1",
         "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
         "SampleAfterValue": "100003",
         "UMask": "0x1"
     },
     {
-        "BriefDescription": "Counts all prefetch (that bring data to LLC only) data reads that miss the L3 and the data is returned from remote dram.",
+        "BriefDescription": "Counts all demand & prefetch RFOs that miss the L3 and the data is returned from remote dram.",
         "Counter": "0,1,2,3",
         "CounterHTOff": "0,1,2,3",
         "EventCode": "0xB7, 0xBB",
-        "EventName": "OFFCORE_RESPONSE.PF_L3_DATA_RD.L3_MISS_REMOTE_DRAM.SNOOP_MISS_OR_NO_FWD",
+        "EventName": "OFFCORE_RESPONSE.ALL_RFO.L3_MISS_REMOTE_DRAM.SNOOP_MISS_OR_NO_FWD",
         "MSRIndex": "0x1a6,0x1a7",
-        "MSRValue": "0x063B800080",
+        "MSRValue": "0x063B800122",
         "Offcore": "1",
         "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
         "SampleAfterValue": "100003",
         "UMask": "0x1"
     },
     {
-        "BriefDescription": "Counts all prefetch data reads that miss the L3 and clean or shared data is transferred from remote cache.",
+        "BriefDescription": "Counts all demand code reads that miss in the L3.",
         "Counter": "0,1,2,3",
         "CounterHTOff": "0,1,2,3",
         "EventCode": "0xB7, 0xBB",
-        "EventName": "OFFCORE_RESPONSE.ALL_PF_DATA_RD.L3_MISS.REMOTE_HIT_FORWARD",
+        "EventName": "OFFCORE_RESPONSE.DEMAND_CODE_RD.L3_MISS.ANY_SNOOP",
         "MSRIndex": "0x1a6,0x1a7",
-        "MSRValue": "0x083FC00490",
+        "MSRValue": "0x3FBC000004",
         "Offcore": "1",
         "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
         "SampleAfterValue": "100003",
         "UMask": "0x1"
     },
     {
-        "BriefDescription": "Counts prefetch (that bring data to L2) data reads that miss the L3 and the data is returned from local dram.",
+        "BriefDescription": "Counts all demand code reads that miss the L3 and the modified data is transferred from remote cache.",
         "Counter": "0,1,2,3",
         "CounterHTOff": "0,1,2,3",
         "EventCode": "0xB7, 0xBB",
-        "EventName": "OFFCORE_RESPONSE.PF_L2_DATA_RD.L3_MISS_LOCAL_DRAM.SNOOP_MISS_OR_NO_FWD",
+        "EventName": "OFFCORE_RESPONSE.DEMAND_CODE_RD.L3_MISS.REMOTE_HITM",
         "MSRIndex": "0x1a6,0x1a7",
-        "MSRValue": "0x0604000010",
+        "MSRValue": "0x103FC00004",
         "Offcore": "1",
         "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
         "SampleAfterValue": "100003",
         "UMask": "0x1"
     },
     {
-        "BriefDescription": "Number of times an RTM execution aborted due to uncommon conditions.",
-        "Counter": "0,1,2,3",
-        "CounterHTOff": "0,1,2,3,4,5,6,7",
-        "EventCode": "0xC9",
-        "EventName": "RTM_RETIRED.ABORTED_TIMER",
-        "SampleAfterValue": "2000003",
-        "UMask": "0x10"
-    },
-    {
-        "BriefDescription": "Counts all prefetch (that bring data to L2) RFOs that miss the L3 and the data is returned from local or remote dram.",
+        "BriefDescription": "Counts all demand code reads that miss the L3 and clean or shared data is transferred from remote cache.",
         "Counter": "0,1,2,3",
         "CounterHTOff": "0,1,2,3",
         "EventCode": "0xB7, 0xBB",
-        "EventName": "OFFCORE_RESPONSE.PF_L2_RFO.L3_MISS.SNOOP_MISS_OR_NO_FWD",
+        "EventName": "OFFCORE_RESPONSE.DEMAND_CODE_RD.L3_MISS.REMOTE_HIT_FORWARD",
         "MSRIndex": "0x1a6,0x1a7",
-        "MSRValue": "0x063FC00020",
+        "MSRValue": "0x083FC00004",
         "Offcore": "1",
         "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
         "SampleAfterValue": "100003",
         "UMask": "0x1"
     },
     {
-        "BriefDescription": "Counts all demand data writes (RFOs) that miss the L3 and the data is returned from local or remote dram.",
+        "BriefDescription": "Counts all demand code reads that miss the L3 and the data is returned from local or remote dram.",
         "Counter": "0,1,2,3",
         "CounterHTOff": "0,1,2,3",
         "EventCode": "0xB7, 0xBB",
-        "EventName": "OFFCORE_RESPONSE.DEMAND_RFO.L3_MISS.SNOOP_MISS_OR_NO_FWD",
+        "EventName": "OFFCORE_RESPONSE.DEMAND_CODE_RD.L3_MISS.SNOOP_MISS_OR_NO_FWD",
         "MSRIndex": "0x1a6,0x1a7",
-        "MSRValue": "0x063FC00002",
+        "MSRValue": "0x063FC00004",
         "Offcore": "1",
         "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
         "SampleAfterValue": "100003",
         "UMask": "0x1"
     },
     {
-        "BriefDescription": "Counts all prefetch data reads that miss the L3 and the data is returned from local or remote dram.",
+        "BriefDescription": "Counts all demand code reads that miss the L3 and the data is returned from local dram.",
         "Counter": "0,1,2,3",
         "CounterHTOff": "0,1,2,3",
         "EventCode": "0xB7, 0xBB",
-        "EventName": "OFFCORE_RESPONSE.ALL_PF_DATA_RD.L3_MISS.SNOOP_MISS_OR_NO_FWD",
+        "EventName": "OFFCORE_RESPONSE.DEMAND_CODE_RD.L3_MISS_LOCAL_DRAM.SNOOP_MISS_OR_NO_FWD",
         "MSRIndex": "0x1a6,0x1a7",
-        "MSRValue": "0x063FC00490",
+        "MSRValue": "0x0604000004",
         "Offcore": "1",
         "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
         "SampleAfterValue": "100003",
         "UMask": "0x1"
     },
     {
-        "BriefDescription": "Counts all prefetch (that bring data to LLC only) RFOs that miss the L3 and the data is returned from remote dram.",
+        "BriefDescription": "Counts all demand code reads that miss the L3 and the data is returned from remote dram.",
         "Counter": "0,1,2,3",
         "CounterHTOff": "0,1,2,3",
         "EventCode": "0xB7, 0xBB",
-        "EventName": "OFFCORE_RESPONSE.PF_L3_RFO.L3_MISS_REMOTE_DRAM.SNOOP_MISS_OR_NO_FWD",
+        "EventName": "OFFCORE_RESPONSE.DEMAND_CODE_RD.L3_MISS_REMOTE_DRAM.SNOOP_MISS_OR_NO_FWD",
         "MSRIndex": "0x1a6,0x1a7",
-        "MSRValue": "0x063B800100",
+        "MSRValue": "0x063B800004",
         "Offcore": "1",
         "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
         "SampleAfterValue": "100003",
         "UMask": "0x1"
     },
     {
-        "BriefDescription": "Counts prefetch (that bring data to L2) data reads that miss the L3 and the modified data is transferred from remote cache.",
+        "BriefDescription": "Counts demand data reads that miss in the L3.",
         "Counter": "0,1,2,3",
         "CounterHTOff": "0,1,2,3",
         "EventCode": "0xB7, 0xBB",
-        "EventName": "OFFCORE_RESPONSE.PF_L2_DATA_RD.L3_MISS.REMOTE_HITM",
+        "EventName": "OFFCORE_RESPONSE.DEMAND_DATA_RD.L3_MISS.ANY_SNOOP",
         "MSRIndex": "0x1a6,0x1a7",
-        "MSRValue": "0x103FC00010",
+        "MSRValue": "0x3FBC000001",
         "Offcore": "1",
         "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
         "SampleAfterValue": "100003",
         "UMask": "0x1"
     },
     {
-        "BriefDescription": "Counts prefetch (that bring data to L2) data reads that miss the L3 and the data is returned from local or remote dram.",
+        "BriefDescription": "Counts demand data reads that miss the L3 and the modified data is transferred from remote cache.",
         "Counter": "0,1,2,3",
         "CounterHTOff": "0,1,2,3",
         "EventCode": "0xB7, 0xBB",
-        "EventName": "OFFCORE_RESPONSE.PF_L2_DATA_RD.L3_MISS.SNOOP_MISS_OR_NO_FWD",
+        "EventName": "OFFCORE_RESPONSE.DEMAND_DATA_RD.L3_MISS.REMOTE_HITM",
         "MSRIndex": "0x1a6,0x1a7",
-        "MSRValue": "0x063FC00010",
+        "MSRValue": "0x103FC00001",
         "Offcore": "1",
         "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
         "SampleAfterValue": "100003",
         "UMask": "0x1"
     },
     {
-        "BriefDescription": "Number of times an HLE execution aborted due to HLE-unfriendly instructions and certain unfriendly events (such as AD assists etc.).",
-        "Counter": "0,1,2,3",
-        "CounterHTOff": "0,1,2,3,4,5,6,7",
-        "EventCode": "0xC8",
-        "EventName": "HLE_RETIRED.ABORTED_UNFRIENDLY",
-        "SampleAfterValue": "2000003",
-        "UMask": "0x20"
-    },
-    {
-        "BriefDescription": "Counts L1 data cache hardware prefetch requests and software prefetch requests that miss the L3 and the data is returned from remote dram.",
+        "BriefDescription": "Counts demand data reads that miss the L3 and clean or shared data is transferred from remote cache.",
         "Counter": "0,1,2,3",
         "CounterHTOff": "0,1,2,3",
         "EventCode": "0xB7, 0xBB",
-        "EventName": "OFFCORE_RESPONSE.PF_L1D_AND_SW.L3_MISS_REMOTE_DRAM.SNOOP_MISS_OR_NO_FWD",
+        "EventName": "OFFCORE_RESPONSE.DEMAND_DATA_RD.L3_MISS.REMOTE_HIT_FORWARD",
         "MSRIndex": "0x1a6,0x1a7",
-        "MSRValue": "0x063B800400",
+        "MSRValue": "0x083FC00001",
         "Offcore": "1",
         "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
         "SampleAfterValue": "100003",
         "UMask": "0x1"
     },
     {
-        "BriefDescription": "Counts all demand & prefetch RFOs that miss the L3 and clean or shared data is transferred from remote cache.",
+        "BriefDescription": "Counts demand data reads that miss the L3 and the data is returned from local or remote dram.",
         "Counter": "0,1,2,3",
         "CounterHTOff": "0,1,2,3",
         "EventCode": "0xB7, 0xBB",
-        "EventName": "OFFCORE_RESPONSE.ALL_RFO.L3_MISS.REMOTE_HIT_FORWARD",
+        "EventName": "OFFCORE_RESPONSE.DEMAND_DATA_RD.L3_MISS.SNOOP_MISS_OR_NO_FWD",
         "MSRIndex": "0x1a6,0x1a7",
-        "MSRValue": "0x083FC00122",
+        "MSRValue": "0x063FC00001",
         "Offcore": "1",
         "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
         "SampleAfterValue": "100003",
         "UMask": "0x1"
     },
     {
-        "BriefDescription": "Counts all demand & prefetch RFOs that miss the L3 and the modified data is transferred from remote cache.",
+        "BriefDescription": "Counts demand data reads that miss the L3 and the data is returned from local dram.",
         "Counter": "0,1,2,3",
         "CounterHTOff": "0,1,2,3",
         "EventCode": "0xB7, 0xBB",
-        "EventName": "OFFCORE_RESPONSE.ALL_RFO.L3_MISS.REMOTE_HITM",
+        "EventName": "OFFCORE_RESPONSE.DEMAND_DATA_RD.L3_MISS_LOCAL_DRAM.SNOOP_MISS_OR_NO_FWD",
         "MSRIndex": "0x1a6,0x1a7",
-        "MSRValue": "0x103FC00122",
+        "MSRValue": "0x0604000001",
         "Offcore": "1",
         "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
         "SampleAfterValue": "100003",
@@ -672,541 +735,442 @@
         "UMask": "0x1"
     },
     {
-        "BriefDescription": "Counts demand data reads that miss the L3 and the data is returned from local dram.",
+        "BriefDescription": "Counts all demand data writes (RFOs) that miss in the L3.",
         "Counter": "0,1,2,3",
         "CounterHTOff": "0,1,2,3",
         "EventCode": "0xB7, 0xBB",
-        "EventName": "OFFCORE_RESPONSE.DEMAND_DATA_RD.L3_MISS_LOCAL_DRAM.SNOOP_MISS_OR_NO_FWD",
+        "EventName": "OFFCORE_RESPONSE.DEMAND_RFO.L3_MISS.ANY_SNOOP",
         "MSRIndex": "0x1a6,0x1a7",
-        "MSRValue": "0x0604000001",
+        "MSRValue": "0x3FBC000002",
         "Offcore": "1",
         "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
         "SampleAfterValue": "100003",
         "UMask": "0x1"
     },
     {
-        "BriefDescription": "Number of times an HLE execution successfully committed",
-        "Counter": "0,1,2,3",
-        "CounterHTOff": "0,1,2,3,4,5,6,7",
-        "EventCode": "0xC8",
-        "EventName": "HLE_RETIRED.COMMIT",
-        "PublicDescription": "Number of times HLE commit succeeded.",
-        "SampleAfterValue": "2000003",
-        "UMask": "0x2"
-    },
-    {
-        "BriefDescription": "Number of times HLE lock could not be elided due to ElisionBufferAvailable being zero.",
-        "Counter": "0,1,2,3",
-        "CounterHTOff": "0,1,2,3,4,5,6,7",
-        "EventCode": "0x54",
-        "EventName": "TX_MEM.HLE_ELISION_BUFFER_FULL",
-        "PublicDescription": "Number of times we could not allocate Lock Buffer.",
-        "SampleAfterValue": "2000003",
-        "UMask": "0x40"
-    },
-    {
-        "BriefDescription": "Counts all prefetch (that bring data to LLC only) RFOs that miss the L3 and clean or shared data is transferred from remote cache.",
+        "BriefDescription": "Counts all demand data writes (RFOs) that miss the L3 and the modified data is transferred from remote cache.",
         "Counter": "0,1,2,3",
         "CounterHTOff": "0,1,2,3",
         "EventCode": "0xB7, 0xBB",
-        "EventName": "OFFCORE_RESPONSE.PF_L3_RFO.L3_MISS.REMOTE_HIT_FORWARD",
+        "EventName": "OFFCORE_RESPONSE.DEMAND_RFO.L3_MISS.REMOTE_HITM",
         "MSRIndex": "0x1a6,0x1a7",
-        "MSRValue": "0x083FC00100",
+        "MSRValue": "0x103FC00002",
         "Offcore": "1",
         "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
         "SampleAfterValue": "100003",
         "UMask": "0x1"
     },
     {
-        "BriefDescription": "Counts demand data reads that miss the L3 and the modified data is transferred from remote cache.",
+        "BriefDescription": "Counts all demand data writes (RFOs) that miss the L3 and clean or shared data is transferred from remote cache.",
         "Counter": "0,1,2,3",
         "CounterHTOff": "0,1,2,3",
         "EventCode": "0xB7, 0xBB",
-        "EventName": "OFFCORE_RESPONSE.DEMAND_DATA_RD.L3_MISS.REMOTE_HITM",
+        "EventName": "OFFCORE_RESPONSE.DEMAND_RFO.L3_MISS.REMOTE_HIT_FORWARD",
         "MSRIndex": "0x1a6,0x1a7",
-        "MSRValue": "0x103FC00001",
+        "MSRValue": "0x083FC00002",
         "Offcore": "1",
         "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
         "SampleAfterValue": "100003",
         "UMask": "0x1"
     },
     {
-        "BriefDescription": "Counts all prefetch (that bring data to L2) RFOs that miss the L3 and the data is returned from local dram.",
+        "BriefDescription": "Counts all demand data writes (RFOs) that miss the L3 and the data is returned from local or remote dram.",
         "Counter": "0,1,2,3",
         "CounterHTOff": "0,1,2,3",
         "EventCode": "0xB7, 0xBB",
-        "EventName": "OFFCORE_RESPONSE.PF_L2_RFO.L3_MISS_LOCAL_DRAM.SNOOP_MISS_OR_NO_FWD",
+        "EventName": "OFFCORE_RESPONSE.DEMAND_RFO.L3_MISS.SNOOP_MISS_OR_NO_FWD",
         "MSRIndex": "0x1a6,0x1a7",
-        "MSRValue": "0x0604000020",
+        "MSRValue": "0x063FC00002",
         "Offcore": "1",
         "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
         "SampleAfterValue": "100003",
         "UMask": "0x1"
     },
     {
-        "BriefDescription": "Counts all prefetch (that bring data to LLC only) data reads that miss the L3 and clean or shared data is transferred from remote cache.",
+        "BriefDescription": "Counts all demand data writes (RFOs) that miss the L3 and the data is returned from local dram.",
         "Counter": "0,1,2,3",
         "CounterHTOff": "0,1,2,3",
         "EventCode": "0xB7, 0xBB",
-        "EventName": "OFFCORE_RESPONSE.PF_L3_DATA_RD.L3_MISS.REMOTE_HIT_FORWARD",
+        "EventName": "OFFCORE_RESPONSE.DEMAND_RFO.L3_MISS_LOCAL_DRAM.SNOOP_MISS_OR_NO_FWD",
         "MSRIndex": "0x1a6,0x1a7",
-        "MSRValue": "0x083FC00080",
+        "MSRValue": "0x0604000002",
         "Offcore": "1",
         "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
         "SampleAfterValue": "100003",
         "UMask": "0x1"
     },
     {
-        "BriefDescription": "Demand Data Read requests who miss L3 cache",
+        "BriefDescription": "Counts all demand data writes (RFOs) that miss the L3 and the data is returned from remote dram.",
         "Counter": "0,1,2,3",
-        "CounterHTOff": "0,1,2,3,4,5,6,7",
-        "EventCode": "0xB0",
-        "EventName": "OFFCORE_REQUESTS.L3_MISS_DEMAND_DATA_RD",
-        "PublicDescription": "Demand Data Read requests who miss L3 cache.",
+        "CounterHTOff": "0,1,2,3",
+        "EventCode": "0xB7, 0xBB",
+        "EventName": "OFFCORE_RESPONSE.DEMAND_RFO.L3_MISS_REMOTE_DRAM.SNOOP_MISS_OR_NO_FWD",
+        "MSRIndex": "0x1a6,0x1a7",
+        "MSRValue": "0x063B800002",
+        "Offcore": "1",
+        "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
         "SampleAfterValue": "100003",
-        "UMask": "0x10"
+        "UMask": "0x1"
     },
     {
-        "BriefDescription": "Counts all prefetch (that bring data to LLC only) data reads that miss the L3 and the data is returned from local or remote dram.",
+        "BriefDescription": "Counts L1 data cache hardware prefetch requests and software prefetch requests that miss in the L3.",
         "Counter": "0,1,2,3",
         "CounterHTOff": "0,1,2,3",
         "EventCode": "0xB7, 0xBB",
-        "EventName": "OFFCORE_RESPONSE.PF_L3_DATA_RD.L3_MISS.SNOOP_MISS_OR_NO_FWD",
+        "EventName": "OFFCORE_RESPONSE.PF_L1D_AND_SW.L3_MISS.ANY_SNOOP",
         "MSRIndex": "0x1a6,0x1a7",
-        "MSRValue": "0x063FC00080",
+        "MSRValue": "0x3FBC000400",
         "Offcore": "1",
         "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
         "SampleAfterValue": "100003",
         "UMask": "0x1"
     },
     {
-        "BriefDescription": "Counts demand data reads that miss in the L3.",
+        "BriefDescription": "Counts L1 data cache hardware prefetch requests and software prefetch requests that miss the L3 and the modified data is transferred from remote cache.",
         "Counter": "0,1,2,3",
         "CounterHTOff": "0,1,2,3",
         "EventCode": "0xB7, 0xBB",
-        "EventName": "OFFCORE_RESPONSE.DEMAND_DATA_RD.L3_MISS.ANY_SNOOP",
+        "EventName": "OFFCORE_RESPONSE.PF_L1D_AND_SW.L3_MISS.REMOTE_HITM",
         "MSRIndex": "0x1a6,0x1a7",
-        "MSRValue": "0x3FBC000001",
+        "MSRValue": "0x103FC00400",
         "Offcore": "1",
         "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
         "SampleAfterValue": "100003",
         "UMask": "0x1"
     },
     {
-        "BriefDescription": "Number of times an RTM execution aborted due to HLE-unfriendly instructions",
-        "Counter": "0,1,2,3",
-        "CounterHTOff": "0,1,2,3,4,5,6,7",
-        "EventCode": "0xC9",
-        "EventName": "RTM_RETIRED.ABORTED_UNFRIENDLY",
-        "PublicDescription": "Number of times an RTM execution aborted due to HLE-unfriendly instructions.",
-        "SampleAfterValue": "2000003",
-        "UMask": "0x20"
-    },
-    {
-        "BriefDescription": "Counts prefetch (that bring data to L2) data reads that miss the L3 and clean or shared data is transferred from remote cache.",
+        "BriefDescription": "Counts L1 data cache hardware prefetch requests and software prefetch requests that miss the L3 and clean or shared data is transferred from remote cache.",
         "Counter": "0,1,2,3",
         "CounterHTOff": "0,1,2,3",
         "EventCode": "0xB7, 0xBB",
-        "EventName": "OFFCORE_RESPONSE.PF_L2_DATA_RD.L3_MISS.REMOTE_HIT_FORWARD",
+        "EventName": "OFFCORE_RESPONSE.PF_L1D_AND_SW.L3_MISS.REMOTE_HIT_FORWARD",
         "MSRIndex": "0x1a6,0x1a7",
-        "MSRValue": "0x083FC00010",
+        "MSRValue": "0x083FC00400",
         "Offcore": "1",
         "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
         "SampleAfterValue": "100003",
         "UMask": "0x1"
     },
     {
-        "BriefDescription": "Counts prefetch RFOs that miss the L3 and clean or shared data is transferred from remote cache.",
+        "BriefDescription": "Counts L1 data cache hardware prefetch requests and software prefetch requests that miss the L3 and the data is returned from local or remote dram.",
         "Counter": "0,1,2,3",
         "CounterHTOff": "0,1,2,3",
         "EventCode": "0xB7, 0xBB",
-        "EventName": "OFFCORE_RESPONSE.ALL_PF_RFO.L3_MISS.REMOTE_HIT_FORWARD",
+        "EventName": "OFFCORE_RESPONSE.PF_L1D_AND_SW.L3_MISS.SNOOP_MISS_OR_NO_FWD",
         "MSRIndex": "0x1a6,0x1a7",
-        "MSRValue": "0x083FC00120",
+        "MSRValue": "0x063FC00400",
         "Offcore": "1",
         "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
         "SampleAfterValue": "100003",
         "UMask": "0x1"
     },
     {
-        "BriefDescription": "Counts all demand code reads that miss the L3 and the modified data is transferred from remote cache.",
+        "BriefDescription": "Counts L1 data cache hardware prefetch requests and software prefetch requests that miss the L3 and the data is returned from local dram.",
         "Counter": "0,1,2,3",
         "CounterHTOff": "0,1,2,3",
         "EventCode": "0xB7, 0xBB",
-        "EventName": "OFFCORE_RESPONSE.DEMAND_CODE_RD.L3_MISS.REMOTE_HITM",
+        "EventName": "OFFCORE_RESPONSE.PF_L1D_AND_SW.L3_MISS_LOCAL_DRAM.SNOOP_MISS_OR_NO_FWD",
         "MSRIndex": "0x1a6,0x1a7",
-        "MSRValue": "0x103FC00004",
+        "MSRValue": "0x0604000400",
         "Offcore": "1",
         "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
         "SampleAfterValue": "100003",
         "UMask": "0x1"
     },
     {
-        "BriefDescription": "Counts all prefetch (that bring data to LLC only) RFOs that miss the L3 and the modified data is transferred from remote cache.",
+        "BriefDescription": "Counts L1 data cache hardware prefetch requests and software prefetch requests that miss the L3 and the data is returned from remote dram.",
         "Counter": "0,1,2,3",
         "CounterHTOff": "0,1,2,3",
         "EventCode": "0xB7, 0xBB",
-        "EventName": "OFFCORE_RESPONSE.PF_L3_RFO.L3_MISS.REMOTE_HITM",
+        "EventName": "OFFCORE_RESPONSE.PF_L1D_AND_SW.L3_MISS_REMOTE_DRAM.SNOOP_MISS_OR_NO_FWD",
         "MSRIndex": "0x1a6,0x1a7",
-        "MSRValue": "0x103FC00100",
+        "MSRValue": "0x063B800400",
         "Offcore": "1",
         "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
         "SampleAfterValue": "100003",
         "UMask": "0x1"
     },
     {
-        "BriefDescription": "Counts randomly selected loads when the latency from first dispatch to completion is greater than 256 cycles.",
+        "BriefDescription": "Counts prefetch (that bring data to L2) data reads that miss in the L3.",
         "Counter": "0,1,2,3",
         "CounterHTOff": "0,1,2,3",
-        "Data_LA": "1",
-        "EventCode": "0xcd",
-        "EventName": "MEM_TRANS_RETIRED.LOAD_LATENCY_GT_256",
-        "MSRIndex": "0x3F6",
-        "MSRValue": "0x100",
-        "PEBS": "2",
-        "PublicDescription": "Counts randomly selected loads when the latency from first dispatch to completion is greater than 256 cycles.  Reported latency may be longer than just the memory latency.",
-        "SampleAfterValue": "503",
-        "TakenAlone": "1",
+        "EventCode": "0xB7, 0xBB",
+        "EventName": "OFFCORE_RESPONSE.PF_L2_DATA_RD.L3_MISS.ANY_SNOOP",
+        "MSRIndex": "0x1a6,0x1a7",
+        "MSRValue": "0x3FBC000010",
+        "Offcore": "1",
+        "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
+        "SampleAfterValue": "100003",
         "UMask": "0x1"
     },
     {
-        "BriefDescription": "Counts all demand & prefetch RFOs that miss the L3 and the data is returned from local or remote dram.",
+        "BriefDescription": "Counts prefetch (that bring data to L2) data reads that miss the L3 and the modified data is transferred from remote cache.",
         "Counter": "0,1,2,3",
         "CounterHTOff": "0,1,2,3",
         "EventCode": "0xB7, 0xBB",
-        "EventName": "OFFCORE_RESPONSE.ALL_RFO.L3_MISS.SNOOP_MISS_OR_NO_FWD",
+        "EventName": "OFFCORE_RESPONSE.PF_L2_DATA_RD.L3_MISS.REMOTE_HITM",
         "MSRIndex": "0x1a6,0x1a7",
-        "MSRValue": "0x063FC00122",
+        "MSRValue": "0x103FC00010",
         "Offcore": "1",
         "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
         "SampleAfterValue": "100003",
         "UMask": "0x1"
     },
     {
-        "BriefDescription": "Counts demand data reads that miss the L3 and the data is returned from local or remote dram.",
+        "BriefDescription": "Counts prefetch (that bring data to L2) data reads that miss the L3 and clean or shared data is transferred from remote cache.",
         "Counter": "0,1,2,3",
         "CounterHTOff": "0,1,2,3",
         "EventCode": "0xB7, 0xBB",
-        "EventName": "OFFCORE_RESPONSE.DEMAND_DATA_RD.L3_MISS.SNOOP_MISS_OR_NO_FWD",
+        "EventName": "OFFCORE_RESPONSE.PF_L2_DATA_RD.L3_MISS.REMOTE_HIT_FORWARD",
         "MSRIndex": "0x1a6,0x1a7",
-        "MSRValue": "0x063FC00001",
+        "MSRValue": "0x083FC00010",
         "Offcore": "1",
         "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
         "SampleAfterValue": "100003",
         "UMask": "0x1"
     },
     {
-        "BriefDescription": "Counts all prefetch (that bring data to L2) RFOs that miss the L3 and clean or shared data is transferred from remote cache.",
+        "BriefDescription": "Counts prefetch (that bring data to L2) data reads that miss the L3 and the data is returned from local or remote dram.",
         "Counter": "0,1,2,3",
         "CounterHTOff": "0,1,2,3",
         "EventCode": "0xB7, 0xBB",
-        "EventName": "OFFCORE_RESPONSE.PF_L2_RFO.L3_MISS.REMOTE_HIT_FORWARD",
+        "EventName": "OFFCORE_RESPONSE.PF_L2_DATA_RD.L3_MISS.SNOOP_MISS_OR_NO_FWD",
         "MSRIndex": "0x1a6,0x1a7",
-        "MSRValue": "0x083FC00020",
+        "MSRValue": "0x063FC00010",
         "Offcore": "1",
         "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
         "SampleAfterValue": "100003",
         "UMask": "0x1"
     },
     {
-        "BriefDescription": "Counts all demand data writes (RFOs) that miss the L3 and the data is returned from remote dram.",
+        "BriefDescription": "Counts prefetch (that bring data to L2) data reads that miss the L3 and the data is returned from local dram.",
         "Counter": "0,1,2,3",
         "CounterHTOff": "0,1,2,3",
         "EventCode": "0xB7, 0xBB",
-        "EventName": "OFFCORE_RESPONSE.DEMAND_RFO.L3_MISS_REMOTE_DRAM.SNOOP_MISS_OR_NO_FWD",
+        "EventName": "OFFCORE_RESPONSE.PF_L2_DATA_RD.L3_MISS_LOCAL_DRAM.SNOOP_MISS_OR_NO_FWD",
         "MSRIndex": "0x1a6,0x1a7",
-        "MSRValue": "0x063B800002",
+        "MSRValue": "0x0604000010",
         "Offcore": "1",
         "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
         "SampleAfterValue": "100003",
         "UMask": "0x1"
     },
     {
-        "BriefDescription": "Counts all demand & prefetch data reads that miss the L3 and the modified data is transferred from remote cache.",
+        "BriefDescription": "Counts prefetch (that bring data to L2) data reads that miss the L3 and the data is returned from remote dram.",
         "Counter": "0,1,2,3",
         "CounterHTOff": "0,1,2,3",
         "EventCode": "0xB7, 0xBB",
-        "EventName": "OFFCORE_RESPONSE.ALL_DATA_RD.L3_MISS.REMOTE_HITM",
+        "EventName": "OFFCORE_RESPONSE.PF_L2_DATA_RD.L3_MISS_REMOTE_DRAM.SNOOP_MISS_OR_NO_FWD",
         "MSRIndex": "0x1a6,0x1a7",
-        "MSRValue": "0x103FC00491",
+        "MSRValue": "0x063B800010",
         "Offcore": "1",
         "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
         "SampleAfterValue": "100003",
         "UMask": "0x1"
     },
     {
-        "BriefDescription": "Number of times an RTM execution aborted due to any reasons (multiple categories may count as one).",
-        "Counter": "0,1,2,3",
-        "CounterHTOff": "0,1,2,3,4,5,6,7",
-        "EventCode": "0xC9",
-        "EventName": "RTM_RETIRED.ABORTED",
-        "PEBS": "1",
-        "PublicDescription": "Number of times RTM abort was triggered.",
-        "SampleAfterValue": "2000003",
-        "UMask": "0x4"
-    },
-    {
-        "BriefDescription": "Counts L1 data cache hardware prefetch requests and software prefetch requests that miss the L3 and the data is returned from local or remote dram.",
+        "BriefDescription": "Counts all prefetch (that bring data to L2) RFOs that miss in the L3.",
         "Counter": "0,1,2,3",
         "CounterHTOff": "0,1,2,3",
         "EventCode": "0xB7, 0xBB",
-        "EventName": "OFFCORE_RESPONSE.PF_L1D_AND_SW.L3_MISS.SNOOP_MISS_OR_NO_FWD",
+        "EventName": "OFFCORE_RESPONSE.PF_L2_RFO.L3_MISS.ANY_SNOOP",
         "MSRIndex": "0x1a6,0x1a7",
-        "MSRValue": "0x063FC00400",
+        "MSRValue": "0x3FBC000020",
         "Offcore": "1",
         "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
         "SampleAfterValue": "100003",
         "UMask": "0x1"
     },
     {
-        "BriefDescription": "Number of times an HLE execution aborted due to any reasons (multiple categories may count as one).",
-        "Counter": "0,1,2,3",
-        "CounterHTOff": "0,1,2,3,4,5,6,7",
-        "EventCode": "0xC8",
-        "EventName": "HLE_RETIRED.ABORTED",
-        "PEBS": "1",
-        "PublicDescription": "Number of times HLE abort was triggered.",
-        "SampleAfterValue": "2000003",
-        "UMask": "0x4"
-    },
-    {
-        "BriefDescription": "Counts randomly selected loads when the latency from first dispatch to completion is greater than 16 cycles.",
+        "BriefDescription": "Counts all prefetch (that bring data to L2) RFOs that miss the L3 and the modified data is transferred from remote cache.",
         "Counter": "0,1,2,3",
         "CounterHTOff": "0,1,2,3",
-        "Data_LA": "1",
-        "EventCode": "0xcd",
-        "EventName": "MEM_TRANS_RETIRED.LOAD_LATENCY_GT_16",
-        "MSRIndex": "0x3F6",
-        "MSRValue": "0x10",
-        "PEBS": "2",
-        "PublicDescription": "Counts randomly selected loads when the latency from first dispatch to completion is greater than 16 cycles.  Reported latency may be longer than just the memory latency.",
-        "SampleAfterValue": "20011",
-        "TakenAlone": "1",
+        "EventCode": "0xB7, 0xBB",
+        "EventName": "OFFCORE_RESPONSE.PF_L2_RFO.L3_MISS.REMOTE_HITM",
+        "MSRIndex": "0x1a6,0x1a7",
+        "MSRValue": "0x103FC00020",
+        "Offcore": "1",
+        "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
+        "SampleAfterValue": "100003",
         "UMask": "0x1"
     },
     {
-        "BriefDescription": "Number of times an HLE transactional execution aborted due to an unsupported read alignment from the elision buffer.",
-        "Counter": "0,1,2,3",
-        "CounterHTOff": "0,1,2,3,4,5,6,7",
-        "EventCode": "0x54",
-        "EventName": "TX_MEM.ABORT_HLE_ELISION_BUFFER_UNSUPPORTED_ALIGNMENT",
-        "PublicDescription": "Number of times a TSX Abort was triggered due to attempting an unsupported alignment from Lock Buffer.",
-        "SampleAfterValue": "2000003",
-        "UMask": "0x20"
-    },
-    {
-        "BriefDescription": "Counts L1 data cache hardware prefetch requests and software prefetch requests that miss the L3 and clean or shared data is transferred from remote cache.",
+        "BriefDescription": "Counts all prefetch (that bring data to L2) RFOs that miss the L3 and clean or shared data is transferred from remote cache.",
         "Counter": "0,1,2,3",
         "CounterHTOff": "0,1,2,3",
         "EventCode": "0xB7, 0xBB",
-        "EventName": "OFFCORE_RESPONSE.PF_L1D_AND_SW.L3_MISS.REMOTE_HIT_FORWARD",
+        "EventName": "OFFCORE_RESPONSE.PF_L2_RFO.L3_MISS.REMOTE_HIT_FORWARD",
         "MSRIndex": "0x1a6,0x1a7",
-        "MSRValue": "0x083FC00400",
+        "MSRValue": "0x083FC00020",
         "Offcore": "1",
         "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
         "SampleAfterValue": "100003",
         "UMask": "0x1"
     },
     {
-        "BriefDescription": "Counts prefetch (that bring data to L2) data reads that miss the L3 and the data is returned from remote dram.",
+        "BriefDescription": "Counts all prefetch (that bring data to L2) RFOs that miss the L3 and the data is returned from local or remote dram.",
         "Counter": "0,1,2,3",
         "CounterHTOff": "0,1,2,3",
         "EventCode": "0xB7, 0xBB",
-        "EventName": "OFFCORE_RESPONSE.PF_L2_DATA_RD.L3_MISS_REMOTE_DRAM.SNOOP_MISS_OR_NO_FWD",
+        "EventName": "OFFCORE_RESPONSE.PF_L2_RFO.L3_MISS.SNOOP_MISS_OR_NO_FWD",
         "MSRIndex": "0x1a6,0x1a7",
-        "MSRValue": "0x063B800010",
+        "MSRValue": "0x063FC00020",
         "Offcore": "1",
         "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
         "SampleAfterValue": "100003",
         "UMask": "0x1"
     },
     {
-        "BriefDescription": "Cycles with at least 1 Demand Data Read requests who miss L3 cache in the superQ.",
-        "Counter": "0,1,2,3",
-        "CounterHTOff": "0,1,2,3,4,5,6,7",
-        "CounterMask": "1",
-        "EventCode": "0x60",
-        "EventName": "OFFCORE_REQUESTS_OUTSTANDING.CYCLES_WITH_L3_MISS_DEMAND_DATA_RD",
-        "SampleAfterValue": "2000003",
-        "UMask": "0x10"
-    },
-    {
-        "BriefDescription": "Counts all demand data writes (RFOs) that miss the L3 and the data is returned from local dram.",
+        "BriefDescription": "Counts all prefetch (that bring data to L2) RFOs that miss the L3 and the data is returned from local dram.",
         "Counter": "0,1,2,3",
         "CounterHTOff": "0,1,2,3",
         "EventCode": "0xB7, 0xBB",
-        "EventName": "OFFCORE_RESPONSE.DEMAND_RFO.L3_MISS_LOCAL_DRAM.SNOOP_MISS_OR_NO_FWD",
+        "EventName": "OFFCORE_RESPONSE.PF_L2_RFO.L3_MISS_LOCAL_DRAM.SNOOP_MISS_OR_NO_FWD",
         "MSRIndex": "0x1a6,0x1a7",
-        "MSRValue": "0x0604000002",
+        "MSRValue": "0x0604000020",
         "Offcore": "1",
         "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
         "SampleAfterValue": "100003",
         "UMask": "0x1"
     },
     {
-        "BriefDescription": "Counts randomly selected loads when the latency from first dispatch to completion is greater than 512 cycles.",
+        "BriefDescription": "Counts all prefetch (that bring data to L2) RFOs that miss the L3 and the data is returned from remote dram.",
         "Counter": "0,1,2,3",
         "CounterHTOff": "0,1,2,3",
-        "Data_LA": "1",
-        "EventCode": "0xcd",
-        "EventName": "MEM_TRANS_RETIRED.LOAD_LATENCY_GT_512",
-        "MSRIndex": "0x3F6",
-        "MSRValue": "0x200",
-        "PEBS": "2",
-        "PublicDescription": "Counts randomly selected loads when the latency from first dispatch to completion is greater than 512 cycles.  Reported latency may be longer than just the memory latency.",
-        "SampleAfterValue": "101",
-        "TakenAlone": "1",
+        "EventCode": "0xB7, 0xBB",
+        "EventName": "OFFCORE_RESPONSE.PF_L2_RFO.L3_MISS_REMOTE_DRAM.SNOOP_MISS_OR_NO_FWD",
+        "MSRIndex": "0x1a6,0x1a7",
+        "MSRValue": "0x063B800020",
+        "Offcore": "1",
+        "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
+        "SampleAfterValue": "100003",
         "UMask": "0x1"
     },
     {
-        "BriefDescription": "Counts L1 data cache hardware prefetch requests and software prefetch requests that miss the L3 and the modified data is transferred from remote cache.",
+        "BriefDescription": "Counts all prefetch (that bring data to LLC only) data reads that miss in the L3.",
         "Counter": "0,1,2,3",
         "CounterHTOff": "0,1,2,3",
         "EventCode": "0xB7, 0xBB",
-        "EventName": "OFFCORE_RESPONSE.PF_L1D_AND_SW.L3_MISS.REMOTE_HITM",
+        "EventName": "OFFCORE_RESPONSE.PF_L3_DATA_RD.L3_MISS.ANY_SNOOP",
         "MSRIndex": "0x1a6,0x1a7",
-        "MSRValue": "0x103FC00400",
+        "MSRValue": "0x3FBC000080",
         "Offcore": "1",
         "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
         "SampleAfterValue": "100003",
         "UMask": "0x1"
     },
     {
-        "BriefDescription": "Counts prefetch (that bring data to L2) data reads that miss in the L3.",
+        "BriefDescription": "Counts all prefetch (that bring data to LLC only) data reads that miss the L3 and the modified data is transferred from remote cache.",
         "Counter": "0,1,2,3",
         "CounterHTOff": "0,1,2,3",
         "EventCode": "0xB7, 0xBB",
-        "EventName": "OFFCORE_RESPONSE.PF_L2_DATA_RD.L3_MISS.ANY_SNOOP",
+        "EventName": "OFFCORE_RESPONSE.PF_L3_DATA_RD.L3_MISS.REMOTE_HITM",
         "MSRIndex": "0x1a6,0x1a7",
-        "MSRValue": "0x3FBC000010",
+        "MSRValue": "0x103FC00080",
         "Offcore": "1",
         "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
         "SampleAfterValue": "100003",
         "UMask": "0x1"
     },
     {
-        "BriefDescription": "Counts all demand code reads that miss the L3 and the data is returned from local dram.",
+        "BriefDescription": "Counts all prefetch (that bring data to LLC only) data reads that miss the L3 and clean or shared data is transferred from remote cache.",
         "Counter": "0,1,2,3",
         "CounterHTOff": "0,1,2,3",
         "EventCode": "0xB7, 0xBB",
-        "EventName": "OFFCORE_RESPONSE.DEMAND_CODE_RD.L3_MISS_LOCAL_DRAM.SNOOP_MISS_OR_NO_FWD",
+        "EventName": "OFFCORE_RESPONSE.PF_L3_DATA_RD.L3_MISS.REMOTE_HIT_FORWARD",
         "MSRIndex": "0x1a6,0x1a7",
-        "MSRValue": "0x0604000004",
+        "MSRValue": "0x083FC00080",
         "Offcore": "1",
         "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
         "SampleAfterValue": "100003",
         "UMask": "0x1"
     },
     {
-        "BriefDescription": "Number of times a transactional abort was signaled due to a data capacity limitation for transactional reads or writes.",
-        "Counter": "0,1,2,3",
-        "CounterHTOff": "0,1,2,3,4,5,6,7",
-        "EventCode": "0x54",
-        "EventName": "TX_MEM.ABORT_CAPACITY",
-        "SampleAfterValue": "2000003",
-        "UMask": "0x2"
-    },
-    {
-        "BriefDescription": "Counts prefetch RFOs that miss the L3 and the data is returned from remote dram.",
+        "BriefDescription": "Counts all prefetch (that bring data to LLC only) data reads that miss the L3 and the data is returned from local or remote dram.",
         "Counter": "0,1,2,3",
         "CounterHTOff": "0,1,2,3",
         "EventCode": "0xB7, 0xBB",
-        "EventName": "OFFCORE_RESPONSE.ALL_PF_RFO.L3_MISS_REMOTE_DRAM.SNOOP_MISS_OR_NO_FWD",
+        "EventName": "OFFCORE_RESPONSE.PF_L3_DATA_RD.L3_MISS.SNOOP_MISS_OR_NO_FWD",
         "MSRIndex": "0x1a6,0x1a7",
-        "MSRValue": "0x063B800120",
+        "MSRValue": "0x063FC00080",
         "Offcore": "1",
         "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
         "SampleAfterValue": "100003",
         "UMask": "0x1"
     },
     {
-        "BriefDescription": "Number of times an HLE execution aborted due to incompatible memory type",
-        "Counter": "0,1,2,3",
-        "CounterHTOff": "0,1,2,3,4,5,6,7",
-        "EventCode": "0xC8",
-        "EventName": "HLE_RETIRED.ABORTED_MEMTYPE",
-        "PublicDescription": "Number of times an HLE execution aborted due to incompatible memory type.",
-        "SampleAfterValue": "2000003",
-        "UMask": "0x40"
-    },
-    {
-        "BriefDescription": "Number of times an RTM execution started.",
+        "BriefDescription": "Counts all prefetch (that bring data to LLC only) data reads that miss the L3 and the data is returned from local dram.",
         "Counter": "0,1,2,3",
-        "CounterHTOff": "0,1,2,3,4,5,6,7",
-        "EventCode": "0xC9",
-        "EventName": "RTM_RETIRED.START",
-        "PublicDescription": "Number of times we entered an RTM region. Does not count nested transactions.",
-        "SampleAfterValue": "2000003",
+        "CounterHTOff": "0,1,2,3",
+        "EventCode": "0xB7, 0xBB",
+        "EventName": "OFFCORE_RESPONSE.PF_L3_DATA_RD.L3_MISS_LOCAL_DRAM.SNOOP_MISS_OR_NO_FWD",
+        "MSRIndex": "0x1a6,0x1a7",
+        "MSRValue": "0x0604000080",
+        "Offcore": "1",
+        "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
+        "SampleAfterValue": "100003",
         "UMask": "0x1"
     },
     {
-        "BriefDescription": "Counts the number of machine clears due to memory order conflicts.",
+        "BriefDescription": "Counts all prefetch (that bring data to LLC only) data reads that miss the L3 and the data is returned from remote dram.",
         "Counter": "0,1,2,3",
-        "CounterHTOff": "0,1,2,3,4,5,6,7",
-        "Errata": "SKL089",
-        "EventCode": "0xC3",
-        "EventName": "MACHINE_CLEARS.MEMORY_ORDERING",
-        "PublicDescription": "Counts the number of memory ordering Machine Clears detected. Memory Ordering Machine Clears can result from one of the following:a. memory disambiguation,b. external snoop, orc. cross SMT-HW-thread snoop (stores) hitting load buffer.",
+        "CounterHTOff": "0,1,2,3",
+        "EventCode": "0xB7, 0xBB",
+        "EventName": "OFFCORE_RESPONSE.PF_L3_DATA_RD.L3_MISS_REMOTE_DRAM.SNOOP_MISS_OR_NO_FWD",
+        "MSRIndex": "0x1a6,0x1a7",
+        "MSRValue": "0x063B800080",
+        "Offcore": "1",
+        "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
         "SampleAfterValue": "100003",
-        "UMask": "0x2"
-    },
-    {
-        "BriefDescription": "Number of times an HLE transactional execution aborted due to XRELEASE lock not satisfying the address and value requirements in the elision buffer",
-        "Counter": "0,1,2,3",
-        "CounterHTOff": "0,1,2,3,4,5,6,7",
-        "EventCode": "0x54",
-        "EventName": "TX_MEM.ABORT_HLE_ELISION_BUFFER_MISMATCH",
-        "PublicDescription": "Number of times a TSX Abort was triggered due to release/commit but data and address mismatch.",
-        "SampleAfterValue": "2000003",
-        "UMask": "0x10"
+        "UMask": "0x1"
     },
     {
-        "BriefDescription": "Counts all demand & prefetch data reads that miss the L3 and the data is returned from local or remote dram.",
+        "BriefDescription": "Counts all prefetch (that bring data to LLC only) RFOs that miss in the L3.",
         "Counter": "0,1,2,3",
         "CounterHTOff": "0,1,2,3",
         "EventCode": "0xB7, 0xBB",
-        "EventName": "OFFCORE_RESPONSE.ALL_DATA_RD.L3_MISS.SNOOP_MISS_OR_NO_FWD",
+        "EventName": "OFFCORE_RESPONSE.PF_L3_RFO.L3_MISS.ANY_SNOOP",
         "MSRIndex": "0x1a6,0x1a7",
-        "MSRValue": "0x063FC00491",
+        "MSRValue": "0x3FBC000100",
         "Offcore": "1",
         "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
         "SampleAfterValue": "100003",
         "UMask": "0x1"
     },
     {
-        "BriefDescription": "Counts all demand & prefetch data reads that miss in the L3.",
+        "BriefDescription": "Counts all prefetch (that bring data to LLC only) RFOs that miss the L3 and the modified data is transferred from remote cache.",
         "Counter": "0,1,2,3",
         "CounterHTOff": "0,1,2,3",
         "EventCode": "0xB7, 0xBB",
-        "EventName": "OFFCORE_RESPONSE.ALL_DATA_RD.L3_MISS.ANY_SNOOP",
+        "EventName": "OFFCORE_RESPONSE.PF_L3_RFO.L3_MISS.REMOTE_HITM",
         "MSRIndex": "0x1a6,0x1a7",
-        "MSRValue": "0x3FBC000491",
+        "MSRValue": "0x103FC00100",
         "Offcore": "1",
         "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
         "SampleAfterValue": "100003",
         "UMask": "0x1"
     },
     {
-        "BriefDescription": "Counts demand data reads that miss the L3 and clean or shared data is transferred from remote cache.",
+        "BriefDescription": "Counts all prefetch (that bring data to LLC only) RFOs that miss the L3 and clean or shared data is transferred from remote cache.",
         "Counter": "0,1,2,3",
         "CounterHTOff": "0,1,2,3",
         "EventCode": "0xB7, 0xBB",
-        "EventName": "OFFCORE_RESPONSE.DEMAND_DATA_RD.L3_MISS.REMOTE_HIT_FORWARD",
+        "EventName": "OFFCORE_RESPONSE.PF_L3_RFO.L3_MISS.REMOTE_HIT_FORWARD",
         "MSRIndex": "0x1a6,0x1a7",
-        "MSRValue": "0x083FC00001",
+        "MSRValue": "0x083FC00100",
         "Offcore": "1",
         "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
         "SampleAfterValue": "100003",
         "UMask": "0x1"
     },
     {
-        "BriefDescription": "Counts all prefetch (that bring data to LLC only) data reads that miss in the L3.",
+        "BriefDescription": "Counts all prefetch (that bring data to LLC only) RFOs that miss the L3 and the data is returned from local or remote dram.",
         "Counter": "0,1,2,3",
         "CounterHTOff": "0,1,2,3",
         "EventCode": "0xB7, 0xBB",
-        "EventName": "OFFCORE_RESPONSE.PF_L3_DATA_RD.L3_MISS.ANY_SNOOP",
+        "EventName": "OFFCORE_RESPONSE.PF_L3_RFO.L3_MISS.SNOOP_MISS_OR_NO_FWD",
         "MSRIndex": "0x1a6,0x1a7",
-        "MSRValue": "0x3FBC000080",
+        "MSRValue": "0x063FC00100",
         "Offcore": "1",
         "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
         "SampleAfterValue": "100003",
@@ -1226,19 +1190,30 @@
         "UMask": "0x1"
     },
     {
-        "BriefDescription": "Counts all prefetch (that bring data to LLC only) data reads that miss the L3 and the data is returned from local dram.",
+        "BriefDescription": "Counts all prefetch (that bring data to LLC only) RFOs that miss the L3 and the data is returned from remote dram.",
         "Counter": "0,1,2,3",
         "CounterHTOff": "0,1,2,3",
         "EventCode": "0xB7, 0xBB",
-        "EventName": "OFFCORE_RESPONSE.PF_L3_DATA_RD.L3_MISS_LOCAL_DRAM.SNOOP_MISS_OR_NO_FWD",
+        "EventName": "OFFCORE_RESPONSE.PF_L3_RFO.L3_MISS_REMOTE_DRAM.SNOOP_MISS_OR_NO_FWD",
         "MSRIndex": "0x1a6,0x1a7",
-        "MSRValue": "0x0604000080",
+        "MSRValue": "0x063B800100",
         "Offcore": "1",
         "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
         "SampleAfterValue": "100003",
         "UMask": "0x1"
     },
     {
+        "BriefDescription": "Number of times an RTM execution aborted due to any reasons (multiple categories may count as one).",
+        "Counter": "0,1,2,3",
+        "CounterHTOff": "0,1,2,3,4,5,6,7",
+        "EventCode": "0xC9",
+        "EventName": "RTM_RETIRED.ABORTED",
+        "PEBS": "1",
+        "PublicDescription": "Number of times RTM abort was triggered.",
+        "SampleAfterValue": "2000003",
+        "UMask": "0x4"
+    },
+    {
         "BriefDescription": "Number of times an RTM execution aborted due to none of the previous 4 categories (e.g. interrupt)",
         "Counter": "0,1,2,3",
         "CounterHTOff": "0,1,2,3,4,5,6,7",
@@ -1249,155 +1224,180 @@
         "UMask": "0x80"
     },
     {
-        "BriefDescription": "Number of times an HLE execution started.",
+        "BriefDescription": "Number of times an RTM execution aborted due to various memory events (e.g. read/write capacity and conflicts)",
         "Counter": "0,1,2,3",
         "CounterHTOff": "0,1,2,3,4,5,6,7",
-        "EventCode": "0xC8",
-        "EventName": "HLE_RETIRED.START",
-        "PublicDescription": "Number of times we entered an HLE region. Does not count nested transactions.",
+        "EventCode": "0xC9",
+        "EventName": "RTM_RETIRED.ABORTED_MEM",
+        "PublicDescription": "Number of times an RTM execution aborted due to various memory events (e.g. read/write capacity and conflicts).",
         "SampleAfterValue": "2000003",
-        "UMask": "0x1"
+        "UMask": "0x8"
     },
     {
-        "BriefDescription": "Counts all demand code reads that miss in the L3.",
+        "BriefDescription": "Number of times an RTM execution aborted due to incompatible memory type",
         "Counter": "0,1,2,3",
-        "CounterHTOff": "0,1,2,3",
-        "EventCode": "0xB7, 0xBB",
-        "EventName": "OFFCORE_RESPONSE.DEMAND_CODE_RD.L3_MISS.ANY_SNOOP",
-        "MSRIndex": "0x1a6,0x1a7",
-        "MSRValue": "0x3FBC000004",
-        "Offcore": "1",
-        "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
-        "SampleAfterValue": "100003",
-        "UMask": "0x1"
+        "CounterHTOff": "0,1,2,3,4,5,6,7",
+        "EventCode": "0xC9",
+        "EventName": "RTM_RETIRED.ABORTED_MEMTYPE",
+        "PublicDescription": "Number of times an RTM execution aborted due to incompatible memory type.",
+        "SampleAfterValue": "2000003",
+        "UMask": "0x40"
     },
     {
-        "BriefDescription": "Counts randomly selected loads when the latency from first dispatch to completion is greater than 128 cycles.",
+        "BriefDescription": "Number of times an RTM execution aborted due to uncommon conditions.",
         "Counter": "0,1,2,3",
-        "CounterHTOff": "0,1,2,3",
-        "Data_LA": "1",
-        "EventCode": "0xcd",
-        "EventName": "MEM_TRANS_RETIRED.LOAD_LATENCY_GT_128",
-        "MSRIndex": "0x3F6",
-        "MSRValue": "0x80",
-        "PEBS": "2",
-        "PublicDescription": "Counts randomly selected loads when the latency from first dispatch to completion is greater than 128 cycles.  Reported latency may be longer than just the memory latency.",
-        "SampleAfterValue": "1009",
-        "TakenAlone": "1",
-        "UMask": "0x1"
+        "CounterHTOff": "0,1,2,3,4,5,6,7",
+        "EventCode": "0xC9",
+        "EventName": "RTM_RETIRED.ABORTED_TIMER",
+        "SampleAfterValue": "2000003",
+        "UMask": "0x10"
     },
     {
-        "BriefDescription": "Counts all demand & prefetch data reads that miss the L3 and clean or shared data is transferred from remote cache.",
+        "BriefDescription": "Number of times an RTM execution aborted due to HLE-unfriendly instructions",
         "Counter": "0,1,2,3",
-        "CounterHTOff": "0,1,2,3",
-        "EventCode": "0xB7, 0xBB",
-        "EventName": "OFFCORE_RESPONSE.ALL_DATA_RD.L3_MISS.REMOTE_HIT_FORWARD",
-        "MSRIndex": "0x1a6,0x1a7",
-        "MSRValue": "0x083FC00491",
-        "Offcore": "1",
-        "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
-        "SampleAfterValue": "100003",
-        "UMask": "0x1"
+        "CounterHTOff": "0,1,2,3,4,5,6,7",
+        "EventCode": "0xC9",
+        "EventName": "RTM_RETIRED.ABORTED_UNFRIENDLY",
+        "PublicDescription": "Number of times an RTM execution aborted due to HLE-unfriendly instructions.",
+        "SampleAfterValue": "2000003",
+        "UMask": "0x20"
     },
     {
-        "BriefDescription": "Counts prefetch RFOs that miss the L3 and the modified data is transferred from remote cache.",
+        "BriefDescription": "Number of times an RTM execution successfully committed",
         "Counter": "0,1,2,3",
-        "CounterHTOff": "0,1,2,3",
-        "EventCode": "0xB7, 0xBB",
-        "EventName": "OFFCORE_RESPONSE.ALL_PF_RFO.L3_MISS.REMOTE_HITM",
-        "MSRIndex": "0x1a6,0x1a7",
-        "MSRValue": "0x103FC00120",
-        "Offcore": "1",
-        "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
-        "SampleAfterValue": "100003",
+        "CounterHTOff": "0,1,2,3,4,5,6,7",
+        "EventCode": "0xC9",
+        "EventName": "RTM_RETIRED.COMMIT",
+        "PublicDescription": "Number of times RTM commit succeeded.",
+        "SampleAfterValue": "2000003",
+        "UMask": "0x2"
+    },
+    {
+        "BriefDescription": "Number of times an RTM execution started.",
+        "Counter": "0,1,2,3",
+        "CounterHTOff": "0,1,2,3,4,5,6,7",
+        "EventCode": "0xC9",
+        "EventName": "RTM_RETIRED.START",
+        "PublicDescription": "Number of times we entered an RTM region. Does not count nested transactions.",
+        "SampleAfterValue": "2000003",
         "UMask": "0x1"
     },
     {
-        "BriefDescription": "Counts all prefetch (that bring data to LLC only) RFOs that miss the L3 and the data is returned from local or remote dram.",
+        "BriefDescription": "Counts the number of times a class of instructions that may cause a transactional abort was executed. Since this is the count of execution, it may not always cause a transactional abort.",
         "Counter": "0,1,2,3",
-        "CounterHTOff": "0,1,2,3",
-        "EventCode": "0xB7, 0xBB",
-        "EventName": "OFFCORE_RESPONSE.PF_L3_RFO.L3_MISS.SNOOP_MISS_OR_NO_FWD",
-        "MSRIndex": "0x1a6,0x1a7",
-        "MSRValue": "0x063FC00100",
-        "Offcore": "1",
-        "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
-        "SampleAfterValue": "100003",
+        "CounterHTOff": "0,1,2,3,4,5,6,7",
+        "EventCode": "0x5d",
+        "EventName": "TX_EXEC.MISC1",
+        "SampleAfterValue": "2000003",
         "UMask": "0x1"
     },
     {
-        "BriefDescription": "Execution stalls while L3 cache miss demand load is outstanding.",
+        "BriefDescription": "Counts the number of times a class of instructions (e.g., vzeroupper) that may cause a transactional abort was executed inside a transactional region",
         "Counter": "0,1,2,3",
         "CounterHTOff": "0,1,2,3,4,5,6,7",
-        "CounterMask": "6",
-        "EventCode": "0xA3",
-        "EventName": "CYCLE_ACTIVITY.STALLS_L3_MISS",
+        "EventCode": "0x5d",
+        "EventName": "TX_EXEC.MISC2",
+        "PublicDescription": "Unfriendly TSX abort triggered by a vzeroupper instruction.",
         "SampleAfterValue": "2000003",
-        "UMask": "0x6"
+        "UMask": "0x2"
     },
     {
-        "BriefDescription": "Number of times an HLE execution aborted due to unfriendly events (such as interrupts).",
+        "BriefDescription": "Counts the number of times an instruction execution caused the transactional nest count supported to be exceeded",
         "Counter": "0,1,2,3",
         "CounterHTOff": "0,1,2,3,4,5,6,7",
-        "EventCode": "0xC8",
-        "EventName": "HLE_RETIRED.ABORTED_EVENTS",
+        "EventCode": "0x5d",
+        "EventName": "TX_EXEC.MISC3",
+        "PublicDescription": "Unfriendly TSX abort triggered by a nest count that is too deep.",
         "SampleAfterValue": "2000003",
-        "UMask": "0x80"
+        "UMask": "0x4"
     },
     {
-        "BriefDescription": "Counts all demand & prefetch RFOs that miss the L3 and the data is returned from local dram.",
+        "BriefDescription": "Counts the number of times a XBEGIN instruction was executed inside an HLE transactional region.",
         "Counter": "0,1,2,3",
-        "CounterHTOff": "0,1,2,3",
-        "EventCode": "0xB7, 0xBB",
-        "EventName": "OFFCORE_RESPONSE.ALL_RFO.L3_MISS_LOCAL_DRAM.SNOOP_MISS_OR_NO_FWD",
-        "MSRIndex": "0x1a6,0x1a7",
-        "MSRValue": "0x0604000122",
-        "Offcore": "1",
-        "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
-        "SampleAfterValue": "100003",
-        "UMask": "0x1"
+        "CounterHTOff": "0,1,2,3,4,5,6,7",
+        "EventCode": "0x5d",
+        "EventName": "TX_EXEC.MISC4",
+        "PublicDescription": "RTM region detected inside HLE.",
+        "SampleAfterValue": "2000003",
+        "UMask": "0x8"
     },
     {
-        "BriefDescription": "Counts randomly selected loads when the latency from first dispatch to completion is greater than 4 cycles.",
+        "BriefDescription": "Counts the number of times an HLE XACQUIRE instruction was executed inside an RTM transactional region",
         "Counter": "0,1,2,3",
-        "CounterHTOff": "0,1,2,3",
-        "Data_LA": "1",
-        "EventCode": "0xcd",
-        "EventName": "MEM_TRANS_RETIRED.LOAD_LATENCY_GT_4",
-        "MSRIndex": "0x3F6",
-        "MSRValue": "0x4",
-        "PEBS": "2",
-        "PublicDescription": "Counts randomly selected loads when the latency from first dispatch to completion is greater than 4 cycles.  Reported latency may be longer than just the memory latency.",
-        "SampleAfterValue": "100003",
-        "TakenAlone": "1",
-        "UMask": "0x1"
+        "CounterHTOff": "0,1,2,3,4,5,6,7",
+        "EventCode": "0x5d",
+        "EventName": "TX_EXEC.MISC5",
+        "PublicDescription": "Counts the number of times an HLE XACQUIRE instruction was executed inside an RTM transactional region.",
+        "SampleAfterValue": "2000003",
+        "UMask": "0x10"
     },
     {
-        "BriefDescription": "Counts randomly selected loads when the latency from first dispatch to completion is greater than 8 cycles.",
+        "BriefDescription": "Number of times a transactional abort was signaled due to a data capacity limitation for transactional reads or writes.",
         "Counter": "0,1,2,3",
-        "CounterHTOff": "0,1,2,3",
-        "Data_LA": "1",
-        "EventCode": "0xcd",
-        "EventName": "MEM_TRANS_RETIRED.LOAD_LATENCY_GT_8",
-        "MSRIndex": "0x3F6",
-        "MSRValue": "0x8",
-        "PEBS": "2",
-        "PublicDescription": "Counts randomly selected loads when the latency from first dispatch to completion is greater than 8 cycles.  Reported latency may be longer than just the memory latency.",
-        "SampleAfterValue": "50021",
-        "TakenAlone": "1",
-        "UMask": "0x1"
+        "CounterHTOff": "0,1,2,3,4,5,6,7",
+        "EventCode": "0x54",
+        "EventName": "TX_MEM.ABORT_CAPACITY",
+        "SampleAfterValue": "2000003",
+        "UMask": "0x2"
     },
     {
-        "BriefDescription": "Counts L1 data cache hardware prefetch requests and software prefetch requests that miss the L3 and the data is returned from local dram.",
+        "BriefDescription": "Number of times a transactional abort was signaled due to a data conflict on a transactionally accessed address",
         "Counter": "0,1,2,3",
-        "CounterHTOff": "0,1,2,3",
-        "EventCode": "0xB7, 0xBB",
-        "EventName": "OFFCORE_RESPONSE.PF_L1D_AND_SW.L3_MISS_LOCAL_DRAM.SNOOP_MISS_OR_NO_FWD",
-        "MSRIndex": "0x1a6,0x1a7",
-        "MSRValue": "0x0604000400",
-        "Offcore": "1",
-        "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
-        "SampleAfterValue": "100003",
+        "CounterHTOff": "0,1,2,3,4,5,6,7",
+        "EventCode": "0x54",
+        "EventName": "TX_MEM.ABORT_CONFLICT",
+        "PublicDescription": "Number of times a TSX line had a cache conflict.",
+        "SampleAfterValue": "2000003",
         "UMask": "0x1"
+    },
+    {
+        "BriefDescription": "Number of times an HLE transactional execution aborted due to XRELEASE lock not satisfying the address and value requirements in the elision buffer",
+        "Counter": "0,1,2,3",
+        "CounterHTOff": "0,1,2,3,4,5,6,7",
+        "EventCode": "0x54",
+        "EventName": "TX_MEM.ABORT_HLE_ELISION_BUFFER_MISMATCH",
+        "PublicDescription": "Number of times a TSX Abort was triggered due to release/commit but data and address mismatch.",
+        "SampleAfterValue": "2000003",
+        "UMask": "0x10"
+    },
+    {
+        "BriefDescription": "Number of times an HLE transactional execution aborted due to NoAllocatedElisionBuffer being non-zero.",
+        "Counter": "0,1,2,3",
+        "CounterHTOff": "0,1,2,3,4,5,6,7",
+        "EventCode": "0x54",
+        "EventName": "TX_MEM.ABORT_HLE_ELISION_BUFFER_NOT_EMPTY",
+        "PublicDescription": "Number of times a TSX Abort was triggered due to commit but Lock Buffer not empty.",
+        "SampleAfterValue": "2000003",
+        "UMask": "0x8"
+    },
+    {
+        "BriefDescription": "Number of times an HLE transactional execution aborted due to an unsupported read alignment from the elision buffer.",
+        "Counter": "0,1,2,3",
+        "CounterHTOff": "0,1,2,3,4,5,6,7",
+        "EventCode": "0x54",
+        "EventName": "TX_MEM.ABORT_HLE_ELISION_BUFFER_UNSUPPORTED_ALIGNMENT",
+        "PublicDescription": "Number of times a TSX Abort was triggered due to attempting an unsupported alignment from Lock Buffer.",
+        "SampleAfterValue": "2000003",
+        "UMask": "0x20"
+    },
+    {
+        "BriefDescription": "Number of times a HLE transactional region aborted due to a non XRELEASE prefixed instruction writing to an elided lock in the elision buffer",
+        "Counter": "0,1,2,3",
+        "CounterHTOff": "0,1,2,3,4,5,6,7",
+        "EventCode": "0x54",
+        "EventName": "TX_MEM.ABORT_HLE_STORE_TO_ELIDED_LOCK",
+        "PublicDescription": "Number of times a TSX Abort was triggered due to a non-release/commit store to lock.",
+        "SampleAfterValue": "2000003",
+        "UMask": "0x4"
+    },
+    {
+        "BriefDescription": "Number of times HLE lock could not be elided due to ElisionBufferAvailable being zero.",
+        "Counter": "0,1,2,3",
+        "CounterHTOff": "0,1,2,3,4,5,6,7",
+        "EventCode": "0x54",
+        "EventName": "TX_MEM.HLE_ELISION_BUFFER_FULL",
+        "PublicDescription": "Number of times we could not allocate Lock Buffer.",
+        "SampleAfterValue": "2000003",
+        "UMask": "0x40"
     }
 ]
 \ No newline at end of file
diff --git a/tools/perf/pmu-events/arch/x86/skylakex/other.json b/tools/perf/pmu-events/arch/x86/skylakex/other.json
index f6b147ba8ef6..8b344259176f 100644
--- a/tools/perf/pmu-events/arch/x86/skylakex/other.json
+++ b/tools/perf/pmu-events/arch/x86/skylakex/other.json
@@ -1,34 +1,5 @@
 [
     {
-        "BriefDescription": "Core cycles the core was throttled due to a pending power level request.",
-        "Counter": "0,1,2,3",
-        "CounterHTOff": "0,1,2,3,4,5,6,7",
-        "EventCode": "0x28",
-        "EventName": "CORE_POWER.THROTTLE",
-        "PublicDescription": "Core cycles the out-of-order engine was throttled due to a pending power level request.",
-        "SampleAfterValue": "200003",
-        "UMask": "0x40"
-    },
-    {
-        "BriefDescription": "Counts number of cache lines that are dropped and not written back to L3 as they are deemed to be less likely to be reused shortly",
-        "Counter": "0,1,2,3",
-        "CounterHTOff": "0,1,2,3,4,5,6,7",
-        "EventCode": "0xFE",
-        "EventName": "IDI_MISC.WB_DOWNGRADE",
-        "PublicDescription": "Counts number of cache lines that are dropped and not written back to L3 as they are deemed to be less likely to be reused shortly.",
-        "SampleAfterValue": "100003",
-        "UMask": "0x4"
-    },
-    {
-        "BriefDescription": "Number of PREFETCHW instructions executed.",
-        "Counter": "0,1,2,3",
-        "CounterHTOff": "0,1,2,3,4,5,6,7",
-        "EventCode": "0x32",
-        "EventName": "SW_PREFETCH_ACCESS.PREFETCHW",
-        "SampleAfterValue": "2000003",
-        "UMask": "0x8"
-    },
-    {
         "BriefDescription": "Core cycles where the core was running in a manner where Turbo may be clipped to the Non-AVX turbo schedule.",
         "Counter": "0,1,2,3",
         "CounterHTOff": "0,1,2,3,4,5,6,7",
@@ -49,13 +20,24 @@
         "UMask": "0x18"
     },
     {
-        "BriefDescription": "Number of PREFETCHT0 instructions executed.",
+        "BriefDescription": "Core cycles where the core was running in a manner where Turbo may be clipped to the AVX512 turbo schedule.",
         "Counter": "0,1,2,3",
         "CounterHTOff": "0,1,2,3,4,5,6,7",
-        "EventCode": "0x32",
-        "EventName": "SW_PREFETCH_ACCESS.T0",
-        "SampleAfterValue": "2000003",
-        "UMask": "0x2"
+        "EventCode": "0x28",
+        "EventName": "CORE_POWER.LVL2_TURBO_LICENSE",
+        "PublicDescription": "Core cycles where the core was running with power-delivery for license level 2 (introduced in Skylake Server michroarchtecture).  This includes high current AVX 512-bit instructions.",
+        "SampleAfterValue": "200003",
+        "UMask": "0x20"
+    },
+    {
+        "BriefDescription": "Core cycles the core was throttled due to a pending power level request.",
+        "Counter": "0,1,2,3",
+        "CounterHTOff": "0,1,2,3,4,5,6,7",
+        "EventCode": "0x28",
+        "EventName": "CORE_POWER.THROTTLE",
+        "PublicDescription": "Core cycles the out-of-order engine was throttled due to a pending power level request.",
+        "SampleAfterValue": "200003",
+        "UMask": "0x40"
     },
     {
         "BriefDescription": "Number of hardware interrupts received by the processor.",
@@ -68,14 +50,32 @@
         "UMask": "0x1"
     },
     {
-        "BriefDescription": "Core cycles where the core was running in a manner where Turbo may be clipped to the AVX512 turbo schedule.",
+        "BriefDescription": "Counts number of cache lines that are dropped and not written back to L3 as they are deemed to be less likely to be reused shortly",
         "Counter": "0,1,2,3",
         "CounterHTOff": "0,1,2,3,4,5,6,7",
-        "EventCode": "0x28",
-        "EventName": "CORE_POWER.LVL2_TURBO_LICENSE",
-        "PublicDescription": "Core cycles where the core was running with power-delivery for license level 2 (introduced in Skylake Server michroarchtecture).  This includes high current AVX 512-bit instructions.",
-        "SampleAfterValue": "200003",
-        "UMask": "0x20"
+        "EventCode": "0xFE",
+        "EventName": "IDI_MISC.WB_DOWNGRADE",
+        "PublicDescription": "Counts number of cache lines that are dropped and not written back to L3 as they are deemed to be less likely to be reused shortly.",
+        "SampleAfterValue": "100003",
+        "UMask": "0x4"
+    },
+    {
+        "BriefDescription": "Counts number of cache lines that are allocated and written back to L3 with the intention that they are more likely to be reused shortly",
+        "Counter": "0,1,2,3",
+        "CounterHTOff": "0,1,2,3,4,5,6,7",
+        "EventCode": "0xFE",
+        "EventName": "IDI_MISC.WB_UPGRADE",
+        "PublicDescription": "Counts number of cache lines that are allocated and written back to L3 with the intention that they are more likely to be reused shortly.",
+        "SampleAfterValue": "100003",
+        "UMask": "0x2"
+    },
+    {
+        "Counter": "0,1,2,3",
+        "CounterHTOff": "0,1,2,3,4,5,6,7",
+        "EventCode": "0x09",
+        "EventName": "MEMORY_DISAMBIGUATION.HISTORY_RESET",
+        "SampleAfterValue": "2000003",
+        "UMask": "0x1"
     },
     {
         "BriefDescription": "Number of PREFETCHNTA instructions executed.",
@@ -87,30 +87,30 @@
         "UMask": "0x1"
     },
     {
-        "BriefDescription": "Number of PREFETCHT1 or PREFETCHT2 instructions executed.",
+        "BriefDescription": "Number of PREFETCHW instructions executed.",
         "Counter": "0,1,2,3",
         "CounterHTOff": "0,1,2,3,4,5,6,7",
         "EventCode": "0x32",
-        "EventName": "SW_PREFETCH_ACCESS.T1_T2",
+        "EventName": "SW_PREFETCH_ACCESS.PREFETCHW",
         "SampleAfterValue": "2000003",
-        "UMask": "0x4"
+        "UMask": "0x8"
     },
     {
+        "BriefDescription": "Number of PREFETCHT0 instructions executed.",
         "Counter": "0,1,2,3",
         "CounterHTOff": "0,1,2,3,4,5,6,7",
-        "EventCode": "0x09",
-        "EventName": "MEMORY_DISAMBIGUATION.HISTORY_RESET",
+        "EventCode": "0x32",
+        "EventName": "SW_PREFETCH_ACCESS.T0",
         "SampleAfterValue": "2000003",
-        "UMask": "0x1"
+        "UMask": "0x2"
     },
     {
-        "BriefDescription": "Counts number of cache lines that are allocated and written back to L3 with the intention that they are more likely to be reused shortly",
+        "BriefDescription": "Number of PREFETCHT1 or PREFETCHT2 instructions executed.",
         "Counter": "0,1,2,3",
         "CounterHTOff": "0,1,2,3,4,5,6,7",
-        "EventCode": "0xFE",
-        "EventName": "IDI_MISC.WB_UPGRADE",
-        "PublicDescription": "Counts number of cache lines that are allocated and written back to L3 with the intention that they are more likely to be reused shortly.",
-        "SampleAfterValue": "100003",
-        "UMask": "0x2"
+        "EventCode": "0x32",
+        "EventName": "SW_PREFETCH_ACCESS.T1_T2",
+        "SampleAfterValue": "2000003",
+        "UMask": "0x4"
     }
 ]
 \ No newline at end of file
diff --git a/tools/perf/pmu-events/arch/x86/skylakex/pipeline.json b/tools/perf/pmu-events/arch/x86/skylakex/pipeline.json
index 3bfc6943ddf9..ca5748120666 100644
--- a/tools/perf/pmu-events/arch/x86/skylakex/pipeline.json
+++ b/tools/perf/pmu-events/arch/x86/skylakex/pipeline.json
@@ -1,55 +1,58 @@
 [
     {
-        "BriefDescription": "Number of instructions retired. General Counter - architectural event",
-        "Counter": "0,1,2,3",
-        "CounterHTOff": "0,1,2,3,4,5,6,7",
-        "Errata": "SKL091, SKL044",
-        "EventCode": "0xC0",
-        "EventName": "INST_RETIRED.ANY_P",
-        "PublicDescription": "Counts the number of instructions (EOMs) retired. Counting covers macro-fused instructions individually (that is, increments by two).",
-        "SampleAfterValue": "2000003"
-    },
-    {
-        "BriefDescription": "Counts number of cycles no uops were dispatched to be executed on this thread.",
+        "BriefDescription": "Cycles when divide unit is busy executing divide or square root operations. Accounts for integer and floating-point operations.",
         "Counter": "0,1,2,3",
         "CounterHTOff": "0,1,2,3,4,5,6,7",
         "CounterMask": "1",
-        "EventCode": "0xB1",
-        "EventName": "UOPS_EXECUTED.STALL_CYCLES",
-        "Invert": "1",
-        "PublicDescription": "Counts cycles during which no uops were dispatched from the Reservation Station (RS) per thread.",
+        "EventCode": "0x14",
+        "EventName": "ARITH.DIVIDER_ACTIVE",
         "SampleAfterValue": "2000003",
         "UMask": "0x1"
     },
     {
-        "BriefDescription": "Cycles total of 4 uops are executed on all ports and Reservation Station was not empty.",
+        "BriefDescription": "All (macro) branch instructions retired.",
         "Counter": "0,1,2,3",
         "CounterHTOff": "0,1,2,3,4,5,6,7",
-        "EventCode": "0xA6",
-        "EventName": "EXE_ACTIVITY.4_PORTS_UTIL",
-        "PublicDescription": "Cycles total of 4 uops are executed on all ports and Reservation Station (RS) was not empty.",
-        "SampleAfterValue": "2000003",
-        "UMask": "0x10"
+        "Errata": "SKL091",
+        "EventCode": "0xC4",
+        "EventName": "BR_INST_RETIRED.ALL_BRANCHES",
+        "PublicDescription": "Counts all (macro) branch instructions retired.",
+        "SampleAfterValue": "400009"
     },
     {
-        "BriefDescription": "Cycles when divide unit is busy executing divide or square root operations. Accounts for integer and floating-point operations.",
+        "BriefDescription": "All (macro) branch instructions retired.",
+        "Counter": "0,1,2,3",
+        "CounterHTOff": "0,1,2,3",
+        "Errata": "SKL091",
+        "EventCode": "0xC4",
+        "EventName": "BR_INST_RETIRED.ALL_BRANCHES_PEBS",
+        "PEBS": "2",
+        "PublicDescription": "This is a precise version of BR_INST_RETIRED.ALL_BRANCHES that counts all (macro) branch instructions retired.",
+        "SampleAfterValue": "400009",
+        "UMask": "0x4"
+    },
+    {
+        "BriefDescription": "Conditional branch instructions retired.",
         "Counter": "0,1,2,3",
         "CounterHTOff": "0,1,2,3,4,5,6,7",
-        "CounterMask": "1",
-        "EventCode": "0x14",
-        "EventName": "ARITH.DIVIDER_ACTIVE",
-        "SampleAfterValue": "2000003",
+        "Errata": "SKL091",
+        "EventCode": "0xC4",
+        "EventName": "BR_INST_RETIRED.CONDITIONAL",
+        "PEBS": "1",
+        "PublicDescription": "This event counts conditional branch instructions retired.",
+        "SampleAfterValue": "400009",
         "UMask": "0x1"
     },
     {
-        "BriefDescription": "False dependencies in MOB due to partial compare on address.",
+        "BriefDescription": "Not taken branch instructions retired.",
         "Counter": "0,1,2,3",
         "CounterHTOff": "0,1,2,3,4,5,6,7",
-        "EventCode": "0x07",
-        "EventName": "LD_BLOCKS_PARTIAL.ADDRESS_ALIAS",
-        "PublicDescription": "Counts false dependencies in MOB when the partial comparison upon loose net check and dependency was resolved by the Enhanced Loose net mechanism. This may not result in high performance penalties. Loose net checks can fail when loads and stores are 4k aliased.",
-        "SampleAfterValue": "100003",
-        "UMask": "0x1"
+        "Errata": "SKL091",
+        "EventCode": "0xc4",
+        "EventName": "BR_INST_RETIRED.COND_NTAKEN",
+        "PublicDescription": "This event counts not taken branch instructions retired.",
+        "SampleAfterValue": "400009",
+        "UMask": "0x10"
     },
     {
         "BriefDescription": "Far branch instructions retired.",
@@ -64,23 +67,81 @@
         "UMask": "0x40"
     },
     {
-        "BriefDescription": "Counts the number of x87 uops dispatched.",
+        "BriefDescription": "Direct and indirect near call instructions retired.",
         "Counter": "0,1,2,3",
         "CounterHTOff": "0,1,2,3,4,5,6,7",
-        "EventCode": "0xB1",
-        "EventName": "UOPS_EXECUTED.X87",
-        "PublicDescription": "Counts the number of x87 uops executed.",
-        "SampleAfterValue": "2000003",
+        "Errata": "SKL091",
+        "EventCode": "0xC4",
+        "EventName": "BR_INST_RETIRED.NEAR_CALL",
+        "PEBS": "1",
+        "PublicDescription": "This event counts both direct and indirect near call instructions retired.",
+        "SampleAfterValue": "100007",
+        "UMask": "0x2"
+    },
+    {
+        "BriefDescription": "Return instructions retired.",
+        "Counter": "0,1,2,3",
+        "CounterHTOff": "0,1,2,3,4,5,6,7",
+        "Errata": "SKL091",
+        "EventCode": "0xC4",
+        "EventName": "BR_INST_RETIRED.NEAR_RETURN",
+        "PEBS": "1",
+        "PublicDescription": "This event counts return instructions retired.",
+        "SampleAfterValue": "100007",
+        "UMask": "0x8"
+    },
+    {
+        "BriefDescription": "Taken branch instructions retired.",
+        "Counter": "0,1,2,3",
+        "CounterHTOff": "0,1,2,3,4,5,6,7",
+        "Errata": "SKL091",
+        "EventCode": "0xC4",
+        "EventName": "BR_INST_RETIRED.NEAR_TAKEN",
+        "PEBS": "1",
+        "PublicDescription": "This event counts taken branch instructions retired.",
+        "SampleAfterValue": "400009",
+        "UMask": "0x20"
+    },
+    {
+        "BriefDescription": "Not taken branch instructions retired.",
+        "Counter": "0,1,2,3",
+        "CounterHTOff": "0,1,2,3,4,5,6,7",
+        "Errata": "SKL091",
+        "EventCode": "0xC4",
+        "EventName": "BR_INST_RETIRED.NOT_TAKEN",
+        "PublicDescription": "This event counts not taken branch instructions retired.",
+        "SampleAfterValue": "400009",
         "UMask": "0x10"
     },
     {
-        "BriefDescription": "Demand load dispatches that hit L1D fill buffer (FB) allocated for software prefetch.",
+        "BriefDescription": "All mispredicted macro branch instructions retired.",
         "Counter": "0,1,2,3",
         "CounterHTOff": "0,1,2,3,4,5,6,7",
-        "EventCode": "0x4C",
-        "EventName": "LOAD_HIT_PRE.SW_PF",
-        "PublicDescription": "Counts all not software-prefetch load dispatches that hit the fill buffer (FB) allocated for the software prefetch. It can also be incremented by some lock instructions. So it should only be used with profiling so that the locks can be excluded by ASM (Assembly File) inspection of the nearby instructions.",
-        "SampleAfterValue": "100003",
+        "EventCode": "0xC5",
+        "EventName": "BR_MISP_RETIRED.ALL_BRANCHES",
+        "PublicDescription": "Counts all the retired branch instructions that were mispredicted by the processor. A branch misprediction occurs when the processor incorrectly predicts the destination of the branch.  When the misprediction is discovered at execution, all the instructions executed in the wrong (speculative) path must be discarded, and the processor must start fetching from the correct path.",
+        "SampleAfterValue": "400009"
+    },
+    {
+        "BriefDescription": "Mispredicted macro branch instructions retired.",
+        "Counter": "0,1,2,3",
+        "CounterHTOff": "0,1,2,3",
+        "EventCode": "0xC5",
+        "EventName": "BR_MISP_RETIRED.ALL_BRANCHES_PEBS",
+        "PEBS": "2",
+        "PublicDescription": "This is a precise version of BR_MISP_RETIRED.ALL_BRANCHES that counts all mispredicted macro branch instructions retired.",
+        "SampleAfterValue": "400009",
+        "UMask": "0x4"
+    },
+    {
+        "BriefDescription": "Mispredicted conditional branch instructions retired.",
+        "Counter": "0,1,2,3",
+        "CounterHTOff": "0,1,2,3,4,5,6,7",
+        "EventCode": "0xC5",
+        "EventName": "BR_MISP_RETIRED.CONDITIONAL",
+        "PEBS": "1",
+        "PublicDescription": "This event counts mispredicted conditional branch instructions retired.",
+        "SampleAfterValue": "400009",
         "UMask": "0x1"
     },
     {
@@ -95,85 +156,99 @@
         "UMask": "0x2"
     },
     {
-        "BriefDescription": "Total execution stalls.",
+        "BriefDescription": "Number of near branch instructions retired that were mispredicted and taken.",
         "Counter": "0,1,2,3",
         "CounterHTOff": "0,1,2,3,4,5,6,7",
-        "CounterMask": "4",
-        "EventCode": "0xA3",
-        "EventName": "CYCLE_ACTIVITY.STALLS_TOTAL",
-        "SampleAfterValue": "2000003",
-        "UMask": "0x4"
+        "EventCode": "0xC5",
+        "EventName": "BR_MISP_RETIRED.NEAR_TAKEN",
+        "PEBS": "1",
+        "SampleAfterValue": "400009",
+        "UMask": "0x20"
     },
     {
-        "BriefDescription": "Execution stalls while L2 cache miss demand load is outstanding.",
+        "BriefDescription": "Core crystal clock cycles when this thread is unhalted and the other thread is halted.",
         "Counter": "0,1,2,3",
         "CounterHTOff": "0,1,2,3,4,5,6,7",
-        "CounterMask": "5",
-        "EventCode": "0xA3",
-        "EventName": "CYCLE_ACTIVITY.STALLS_L2_MISS",
-        "SampleAfterValue": "2000003",
-        "UMask": "0x5"
+        "EventCode": "0x3C",
+        "EventName": "CPU_CLK_THREAD_UNHALTED.ONE_THREAD_ACTIVE",
+        "SampleAfterValue": "25003",
+        "UMask": "0x2"
     },
     {
-        "BriefDescription": "Number of slow LEA uops being allocated. A uop is generally considered SlowLea if it has 3 sources (e.g. 2 sources + immediate) regardless if as a result of LEA instruction or not.",
+        "BriefDescription": "Core crystal clock cycles when the thread is unhalted.",
         "Counter": "0,1,2,3",
         "CounterHTOff": "0,1,2,3,4,5,6,7",
-        "EventCode": "0x0E",
-        "EventName": "UOPS_ISSUED.SLOW_LEA",
-        "SampleAfterValue": "2000003",
-        "UMask": "0x20"
+        "EventCode": "0x3C",
+        "EventName": "CPU_CLK_THREAD_UNHALTED.REF_XCLK",
+        "SampleAfterValue": "25003",
+        "UMask": "0x1"
     },
     {
-        "BriefDescription": "Cycles with less than 10 actually retired uops.",
+        "AnyThread": "1",
+        "BriefDescription": "Core crystal clock cycles when at least one thread on the physical core is unhalted.",
         "Counter": "0,1,2,3",
         "CounterHTOff": "0,1,2,3,4,5,6,7",
-        "CounterMask": "10",
-        "EventCode": "0xC2",
-        "EventName": "UOPS_RETIRED.TOTAL_CYCLES",
-        "Invert": "1",
-        "PublicDescription": "Number of cycles using always true condition (uops_ret < 16) applied to non PEBS uops retired event.",
-        "SampleAfterValue": "2000003",
-        "UMask": "0x2"
+        "EventCode": "0x3C",
+        "EventName": "CPU_CLK_THREAD_UNHALTED.REF_XCLK_ANY",
+        "SampleAfterValue": "25003",
+        "UMask": "0x1"
     },
     {
-        "BriefDescription": "Thread cycles when thread is not in halt state",
+        "BriefDescription": "Core crystal clock cycles when this thread is unhalted and the other thread is halted.",
         "Counter": "0,1,2,3",
         "CounterHTOff": "0,1,2,3,4,5,6,7",
         "EventCode": "0x3C",
-        "EventName": "CPU_CLK_UNHALTED.THREAD_P",
-        "PublicDescription": "This is an architectural event that counts the number of thread cycles while the thread is not in a halt state. The thread enters the halt state when it is running the HLT instruction. The core frequency may change from time to time due to power or thermal throttling. For this reason, this event may have a changing ratio with regards to wall clock time.",
-        "SampleAfterValue": "2000003"
+        "EventName": "CPU_CLK_UNHALTED.ONE_THREAD_ACTIVE",
+        "SampleAfterValue": "25003",
+        "UMask": "0x2"
     },
     {
-        "BriefDescription": "Cycles where at least 2 uops were executed per-thread",
+        "BriefDescription": "Reference cycles when the core is not in halt state.",
+        "Counter": "Fixed counter 2",
+        "CounterHTOff": "Fixed counter 2",
+        "EventName": "CPU_CLK_UNHALTED.REF_TSC",
+        "PublicDescription": "Counts the number of reference cycles when the core is not in a halt state. The core enters the halt state when it is running the HLT instruction or the MWAIT instruction. This event is not affected by core frequency changes (for example, P states, TM2 transitions) but has the same incrementing frequency as the time stamp counter. This event can approximate elapsed time while the core was not in a halt state. This event has a constant ratio with the CPU_CLK_UNHALTED.REF_XCLK event. It is counted on a dedicated fixed counter, leaving the four (eight when Hyperthreading is disabled) programmable counters available for other events. Note: On all current platforms this event stops counting during 'throttling (TM)' states duty off periods the processor is 'halted'.  The counter update is done at a lower clock rate then the core clock the overflow status bit for this counter may appear 'sticky'.  After the counter has overflowed and software clears the overflow status bit and resets the counter to less than MAX. The reset value to the counter is not clocked immediately so the overflow status bit will flip 'high (1)' and generate another PMI (if enabled) after which the reset value gets clocked into the counter. Therefore, software will get the interrupt, read the overflow status bit '1 for bit 34 while the counter value is less than MAX. Software should ignore this case.",
+        "SampleAfterValue": "2000003",
+        "UMask": "0x3"
+    },
+    {
+        "BriefDescription": "Core crystal clock cycles when the thread is unhalted.",
         "Counter": "0,1,2,3",
         "CounterHTOff": "0,1,2,3,4,5,6,7",
-        "CounterMask": "2",
-        "EventCode": "0xB1",
-        "EventName": "UOPS_EXECUTED.CYCLES_GE_2_UOPS_EXEC",
-        "PublicDescription": "Cycles where at least 2 uops were executed per-thread.",
-        "SampleAfterValue": "2000003",
+        "EventCode": "0x3C",
+        "EventName": "CPU_CLK_UNHALTED.REF_XCLK",
+        "SampleAfterValue": "25003",
         "UMask": "0x1"
     },
     {
-        "BriefDescription": "Core crystal clock cycles when the thread is unhalted.",
+        "AnyThread": "1",
+        "BriefDescription": "Core crystal clock cycles when at least one thread on the physical core is unhalted.",
         "Counter": "0,1,2,3",
         "CounterHTOff": "0,1,2,3,4,5,6,7",
         "EventCode": "0x3C",
-        "EventName": "CPU_CLK_THREAD_UNHALTED.REF_XCLK",
+        "EventName": "CPU_CLK_UNHALTED.REF_XCLK_ANY",
         "SampleAfterValue": "25003",
         "UMask": "0x1"
     },
     {
-        "BriefDescription": "Number of machine clears (nukes) of any type.",
+        "BriefDescription": "Counts when there is a transition from ring 1, 2 or 3 to ring 0.",
         "Counter": "0,1,2,3",
         "CounterHTOff": "0,1,2,3,4,5,6,7",
         "CounterMask": "1",
         "EdgeDetect": "1",
-        "EventCode": "0xC3",
-        "EventName": "MACHINE_CLEARS.COUNT",
-        "SampleAfterValue": "100003",
-        "UMask": "0x1"
+        "EventCode": "0x3C",
+        "EventName": "CPU_CLK_UNHALTED.RING0_TRANS",
+        "PublicDescription": "Counts when the Current Privilege Level (CPL) transitions from ring 1, 2 or 3 to ring 0 (Kernel).",
+        "SampleAfterValue": "100007"
+    },
+    {
+        "BriefDescription": "Core cycles when the thread is not in halt state",
+        "Counter": "Fixed counter 1",
+        "CounterHTOff": "Fixed counter 1",
+        "EventName": "CPU_CLK_UNHALTED.THREAD",
+        "PublicDescription": "Counts the number of core cycles while the thread is not in a halt state. The thread enters the halt state when it is running the HLT instruction. This event is a component in many key event ratios. The core frequency may change from time to time due to transitions associated with Enhanced Intel SpeedStep Technology or TM2. For this reason this event may have a changing ratio with regards to time. When the core frequency is constant, this event can approximate elapsed time while the core was not in the halt state. It is counted on a dedicated fixed counter, leaving the four (eight when Hyperthreading is disabled) programmable counters available for other events.",
+        "SampleAfterValue": "2000003",
+        "UMask": "0x2"
     },
     {
         "AnyThread": "1",
@@ -185,177 +260,182 @@
         "UMask": "0x2"
     },
     {
-        "BriefDescription": "Counts the number of uops to be executed per-thread each cycle.",
+        "BriefDescription": "Thread cycles when thread is not in halt state",
         "Counter": "0,1,2,3",
         "CounterHTOff": "0,1,2,3,4,5,6,7",
-        "EventCode": "0xB1",
-        "EventName": "UOPS_EXECUTED.THREAD",
-        "PublicDescription": "Number of uops to be executed per-thread each cycle.",
-        "SampleAfterValue": "2000003",
-        "UMask": "0x1"
+        "EventCode": "0x3C",
+        "EventName": "CPU_CLK_UNHALTED.THREAD_P",
+        "PublicDescription": "This is an architectural event that counts the number of thread cycles while the thread is not in a halt state. The thread enters the halt state when it is running the HLT instruction. The core frequency may change from time to time due to power or thermal throttling. For this reason, this event may have a changing ratio with regards to wall clock time.",
+        "SampleAfterValue": "2000003"
     },
     {
-        "BriefDescription": "Cycles where at least 3 uops were executed per-thread",
+        "AnyThread": "1",
+        "BriefDescription": "Core cycles when at least one thread on the physical core is not in halt state.",
         "Counter": "0,1,2,3",
         "CounterHTOff": "0,1,2,3,4,5,6,7",
-        "CounterMask": "3",
-        "EventCode": "0xB1",
-        "EventName": "UOPS_EXECUTED.CYCLES_GE_3_UOPS_EXEC",
-        "PublicDescription": "Cycles where at least 3 uops were executed per-thread.",
-        "SampleAfterValue": "2000003",
-        "UMask": "0x1"
+        "EventCode": "0x3C",
+        "EventName": "CPU_CLK_UNHALTED.THREAD_P_ANY",
+        "SampleAfterValue": "2000003"
     },
     {
-        "BriefDescription": "Cycles with no micro-ops executed from any thread on physical core.",
+        "BriefDescription": "Cycles while L1 cache miss demand load is outstanding.",
         "Counter": "0,1,2,3",
         "CounterHTOff": "0,1,2,3,4,5,6,7",
-        "CounterMask": "1",
-        "EventCode": "0xB1",
-        "EventName": "UOPS_EXECUTED.CORE_CYCLES_NONE",
-        "Invert": "1",
+        "CounterMask": "8",
+        "EventCode": "0xA3",
+        "EventName": "CYCLE_ACTIVITY.CYCLES_L1D_MISS",
         "SampleAfterValue": "2000003",
-        "UMask": "0x2"
+        "UMask": "0x8"
     },
     {
-        "BriefDescription": "Cycles where the Store Buffer was full and no outstanding load.",
+        "BriefDescription": "Cycles while L2 cache miss demand load is outstanding.",
         "Counter": "0,1,2,3",
         "CounterHTOff": "0,1,2,3,4,5,6,7",
-        "EventCode": "0xA6",
-        "EventName": "EXE_ACTIVITY.BOUND_ON_STORES",
+        "CounterMask": "1",
+        "EventCode": "0xA3",
+        "EventName": "CYCLE_ACTIVITY.CYCLES_L2_MISS",
         "SampleAfterValue": "2000003",
-        "UMask": "0x40"
+        "UMask": "0x1"
     },
     {
-        "BriefDescription": "Cycles while L1 cache miss demand load is outstanding.",
+        "BriefDescription": "Cycles while memory subsystem has an outstanding load.",
         "Counter": "0,1,2,3",
         "CounterHTOff": "0,1,2,3,4,5,6,7",
-        "CounterMask": "8",
+        "CounterMask": "16",
         "EventCode": "0xA3",
-        "EventName": "CYCLE_ACTIVITY.CYCLES_L1D_MISS",
+        "EventName": "CYCLE_ACTIVITY.CYCLES_MEM_ANY",
         "SampleAfterValue": "2000003",
-        "UMask": "0x8"
+        "UMask": "0x10"
     },
     {
-        "BriefDescription": "Cycles Uops delivered by the LSD, but didn't come from the decoder.",
+        "BriefDescription": "Execution stalls while L1 cache miss demand load is outstanding.",
         "Counter": "0,1,2,3",
         "CounterHTOff": "0,1,2,3,4,5,6,7",
-        "CounterMask": "1",
-        "EventCode": "0xA8",
-        "EventName": "LSD.CYCLES_ACTIVE",
-        "PublicDescription": "Counts the cycles when at least one uop is delivered by the LSD (Loop-stream detector).",
+        "CounterMask": "12",
+        "EventCode": "0xA3",
+        "EventName": "CYCLE_ACTIVITY.STALLS_L1D_MISS",
         "SampleAfterValue": "2000003",
-        "UMask": "0x1"
+        "UMask": "0xc"
     },
     {
-        "BriefDescription": "Core cycles the allocator was stalled due to recovery from earlier clear event for this thread (e.g. misprediction or memory nuke)",
+        "BriefDescription": "Execution stalls while L2 cache miss demand load is outstanding.",
         "Counter": "0,1,2,3",
         "CounterHTOff": "0,1,2,3,4,5,6,7",
-        "EventCode": "0x0D",
-        "EventName": "INT_MISC.RECOVERY_CYCLES",
-        "PublicDescription": "Core cycles the Resource allocator was stalled due to recovery from an earlier branch misprediction or machine clear event.",
+        "CounterMask": "5",
+        "EventCode": "0xA3",
+        "EventName": "CYCLE_ACTIVITY.STALLS_L2_MISS",
         "SampleAfterValue": "2000003",
-        "UMask": "0x1"
+        "UMask": "0x5"
     },
     {
-        "BriefDescription": "Core crystal clock cycles when the thread is unhalted.",
+        "BriefDescription": "Execution stalls while memory subsystem has an outstanding load.",
         "Counter": "0,1,2,3",
-        "CounterHTOff": "0,1,2,3,4,5,6,7",
-        "EventCode": "0x3C",
-        "EventName": "CPU_CLK_UNHALTED.REF_XCLK",
-        "SampleAfterValue": "25003",
-        "UMask": "0x1"
+        "CounterHTOff": "0,1,2,3",
+        "CounterMask": "20",
+        "EventCode": "0xA3",
+        "EventName": "CYCLE_ACTIVITY.STALLS_MEM_ANY",
+        "SampleAfterValue": "2000003",
+        "UMask": "0x14"
     },
     {
-        "BriefDescription": "Cycles per thread when uops are executed in port 0",
+        "BriefDescription": "Total execution stalls.",
         "Counter": "0,1,2,3",
         "CounterHTOff": "0,1,2,3,4,5,6,7",
-        "EventCode": "0xA1",
-        "EventName": "UOPS_DISPATCHED_PORT.PORT_0",
-        "PublicDescription": "Counts, on the per-thread basis, cycles during which at least one uop is dispatched from the Reservation Station (RS) to port 0.",
+        "CounterMask": "4",
+        "EventCode": "0xA3",
+        "EventName": "CYCLE_ACTIVITY.STALLS_TOTAL",
         "SampleAfterValue": "2000003",
-        "UMask": "0x1"
+        "UMask": "0x4"
     },
     {
-        "BriefDescription": "Cycles per thread when uops are executed in port 1",
+        "BriefDescription": "Cycles total of 1 uop is executed on all ports and Reservation Station was not empty.",
         "Counter": "0,1,2,3",
         "CounterHTOff": "0,1,2,3,4,5,6,7",
-        "EventCode": "0xA1",
-        "EventName": "UOPS_DISPATCHED_PORT.PORT_1",
-        "PublicDescription": "Counts, on the per-thread basis, cycles during which at least one uop is dispatched from the Reservation Station (RS) to port 1.",
+        "EventCode": "0xA6",
+        "EventName": "EXE_ACTIVITY.1_PORTS_UTIL",
+        "PublicDescription": "Counts cycles during which a total of 1 uop was executed on all ports and Reservation Station (RS) was not empty.",
         "SampleAfterValue": "2000003",
         "UMask": "0x2"
     },
     {
-        "BriefDescription": "Cycles per thread when uops are executed in port 2",
+        "BriefDescription": "Cycles total of 2 uops are executed on all ports and Reservation Station was not empty.",
         "Counter": "0,1,2,3",
         "CounterHTOff": "0,1,2,3,4,5,6,7",
-        "EventCode": "0xA1",
-        "EventName": "UOPS_DISPATCHED_PORT.PORT_2",
-        "PublicDescription": "Counts, on the per-thread basis, cycles during which at least one uop is dispatched from the Reservation Station (RS) to port 2.",
+        "EventCode": "0xA6",
+        "EventName": "EXE_ACTIVITY.2_PORTS_UTIL",
+        "PublicDescription": "Counts cycles during which a total of 2 uops were executed on all ports and Reservation Station (RS) was not empty.",
         "SampleAfterValue": "2000003",
         "UMask": "0x4"
     },
     {
-        "BriefDescription": "Cycles per thread when uops are executed in port 3",
+        "BriefDescription": "Cycles total of 3 uops are executed on all ports and Reservation Station was not empty.",
         "Counter": "0,1,2,3",
         "CounterHTOff": "0,1,2,3,4,5,6,7",
-        "EventCode": "0xA1",
-        "EventName": "UOPS_DISPATCHED_PORT.PORT_3",
-        "PublicDescription": "Counts, on the per-thread basis, cycles during which at least one uop is dispatched from the Reservation Station (RS) to port 3.",
+        "EventCode": "0xA6",
+        "EventName": "EXE_ACTIVITY.3_PORTS_UTIL",
+        "PublicDescription": "Cycles total of 3 uops are executed on all ports and Reservation Station (RS) was not empty.",
         "SampleAfterValue": "2000003",
         "UMask": "0x8"
     },
     {
-        "BriefDescription": "Cycles per thread when uops are executed in port 4",
+        "BriefDescription": "Cycles total of 4 uops are executed on all ports and Reservation Station was not empty.",
         "Counter": "0,1,2,3",
         "CounterHTOff": "0,1,2,3,4,5,6,7",
-        "EventCode": "0xA1",
-        "EventName": "UOPS_DISPATCHED_PORT.PORT_4",
-        "PublicDescription": "Counts, on the per-thread basis, cycles during which at least one uop is dispatched from the Reservation Station (RS) to port 4.",
+        "EventCode": "0xA6",
+        "EventName": "EXE_ACTIVITY.4_PORTS_UTIL",
+        "PublicDescription": "Cycles total of 4 uops are executed on all ports and Reservation Station (RS) was not empty.",
         "SampleAfterValue": "2000003",
         "UMask": "0x10"
     },
     {
-        "BriefDescription": "Cycles per thread when uops are executed in port 5",
+        "BriefDescription": "Cycles where the Store Buffer was full and no outstanding load.",
         "Counter": "0,1,2,3",
         "CounterHTOff": "0,1,2,3,4,5,6,7",
-        "EventCode": "0xA1",
-        "EventName": "UOPS_DISPATCHED_PORT.PORT_5",
-        "PublicDescription": "Counts, on the per-thread basis, cycles during which at least one uop is dispatched from the Reservation Station (RS) to port 5.",
+        "EventCode": "0xA6",
+        "EventName": "EXE_ACTIVITY.BOUND_ON_STORES",
         "SampleAfterValue": "2000003",
-        "UMask": "0x20"
+        "UMask": "0x40"
     },
     {
-        "BriefDescription": "Cycles per thread when uops are executed in port 6",
+        "BriefDescription": "Cycles where no uops were executed, the Reservation Station was not empty, the Store Buffer was full and there was no outstanding load.",
         "Counter": "0,1,2,3",
         "CounterHTOff": "0,1,2,3,4,5,6,7",
-        "EventCode": "0xA1",
-        "EventName": "UOPS_DISPATCHED_PORT.PORT_6",
-        "PublicDescription": "Counts, on the per-thread basis, cycles during which at least one uop is dispatched from the Reservation Station (RS) to port 6.",
+        "EventCode": "0xA6",
+        "EventName": "EXE_ACTIVITY.EXE_BOUND_0_PORTS",
+        "PublicDescription": "Counts cycles during which no uops were executed on all ports and Reservation Station (RS) was not empty.",
         "SampleAfterValue": "2000003",
-        "UMask": "0x40"
+        "UMask": "0x1"
     },
     {
-        "BriefDescription": "Cycles per thread when uops are executed in port 7",
+        "BriefDescription": "Stalls caused by changing prefix length of the instruction.",
         "Counter": "0,1,2,3",
         "CounterHTOff": "0,1,2,3,4,5,6,7",
-        "EventCode": "0xA1",
-        "EventName": "UOPS_DISPATCHED_PORT.PORT_7",
-        "PublicDescription": "Counts, on the per-thread basis, cycles during which at least one uop is dispatched from the Reservation Station (RS) to port 7.",
+        "EventCode": "0x87",
+        "EventName": "ILD_STALL.LCP",
+        "PublicDescription": "Counts cycles that the Instruction Length decoder (ILD) stalls occurred due to dynamically changing prefix length of the decoded instruction (by operand size prefix instruction 0x66, address size prefix instruction 0x67 or REX.W for Intel64). Count is proportional to the number of prefixes in a 16B-line. This may result in a three-cycle penalty for each LCP (Length changing prefix) in a 16-byte chunk.",
         "SampleAfterValue": "2000003",
-        "UMask": "0x80"
+        "UMask": "0x1"
     },
     {
-        "AnyThread": "1",
-        "BriefDescription": "Core cycles the allocator was stalled due to recovery from earlier clear event for any thread running on the physical core (e.g. misprediction or memory nuke).",
-        "Counter": "0,1,2,3",
-        "CounterHTOff": "0,1,2,3,4,5,6,7",
-        "EventCode": "0x0D",
-        "EventName": "INT_MISC.RECOVERY_CYCLES_ANY",
+        "BriefDescription": "Instructions retired from execution.",
+        "Counter": "Fixed counter 0",
+        "CounterHTOff": "Fixed counter 0",
+        "EventName": "INST_RETIRED.ANY",
+        "PublicDescription": "Counts the number of instructions retired from execution. For instructions that consist of multiple micro-ops, Counts the retirement of the last micro-op of the instruction. Counting continues during hardware interrupts, traps, and inside interrupt handlers. Notes: INST_RETIRED.ANY is counted by a designated fixed counter, leaving the four (eight when Hyperthreading is disabled) programmable counters available for other events. INST_RETIRED.ANY_P is counted by a programmable counter and it is an architectural performance event. Counting: Faulting executions of GETSEC/VM entry/VM Exit/MWait will not count as retired instructions.",
         "SampleAfterValue": "2000003",
         "UMask": "0x1"
     },
     {
+        "BriefDescription": "Number of instructions retired. General Counter - architectural event",
+        "Counter": "0,1,2,3",
+        "CounterHTOff": "0,1,2,3,4,5,6,7",
+        "Errata": "SKL091, SKL044",
+        "EventCode": "0xC0",
+        "EventName": "INST_RETIRED.ANY_P",
+        "PublicDescription": "Counts the number of instructions (EOMs) retired. Counting covers macro-fused instructions individually (that is, increments by two).",
+        "SampleAfterValue": "2000003"
+    },
+    {
         "BriefDescription": "Precise instruction retired event with HW to reduce effect of PEBS shadow in IP distribution",
         "Counter": "1",
         "CounterHTOff": "1",
@@ -368,24 +448,56 @@
         "UMask": "0x1"
     },
     {
-        "BriefDescription": "Cycles 4 Uops delivered by the LSD, but didn't come from the decoder.",
+        "BriefDescription": "Number of cycles using always true condition applied to  PEBS instructions retired event.",
+        "Counter": "0,2,3",
+        "CounterHTOff": "0,2,3",
+        "CounterMask": "10",
+        "Errata": "SKL091, SKL044",
+        "EventCode": "0xC0",
+        "EventName": "INST_RETIRED.TOTAL_CYCLES_PS",
+        "Invert": "1",
+        "PEBS": "2",
+        "PublicDescription": "Number of cycles using an always true condition applied to  PEBS instructions retired event. (inst_ret< 16)",
+        "SampleAfterValue": "2000003",
+        "UMask": "0x1"
+    },
+    {
+        "BriefDescription": "Cycles the issue-stage is waiting for front-end to fetch from resteered path following branch misprediction or machine clear events.",
         "Counter": "0,1,2,3",
         "CounterHTOff": "0,1,2,3,4,5,6,7",
-        "CounterMask": "4",
-        "EventCode": "0xA8",
-        "EventName": "LSD.CYCLES_4_UOPS",
-        "PublicDescription": "Counts the cycles when 4 uops are delivered by the LSD (Loop-stream detector).",
+        "EventCode": "0x0D",
+        "EventName": "INT_MISC.CLEAR_RESTEER_CYCLES",
+        "SampleAfterValue": "2000003",
+        "UMask": "0x80"
+    },
+    {
+        "BriefDescription": "Core cycles the allocator was stalled due to recovery from earlier clear event for this thread (e.g. misprediction or memory nuke)",
+        "Counter": "0,1,2,3",
+        "CounterHTOff": "0,1,2,3,4,5,6,7",
+        "EventCode": "0x0D",
+        "EventName": "INT_MISC.RECOVERY_CYCLES",
+        "PublicDescription": "Core cycles the Resource allocator was stalled due to recovery from an earlier branch misprediction or machine clear event.",
         "SampleAfterValue": "2000003",
         "UMask": "0x1"
     },
     {
-        "BriefDescription": "Cycles total of 3 uops are executed on all ports and Reservation Station was not empty.",
+        "AnyThread": "1",
+        "BriefDescription": "Core cycles the allocator was stalled due to recovery from earlier clear event for any thread running on the physical core (e.g. misprediction or memory nuke).",
         "Counter": "0,1,2,3",
         "CounterHTOff": "0,1,2,3,4,5,6,7",
-        "EventCode": "0xA6",
-        "EventName": "EXE_ACTIVITY.3_PORTS_UTIL",
-        "PublicDescription": "Cycles total of 3 uops are executed on all ports and Reservation Station (RS) was not empty.",
+        "EventCode": "0x0D",
+        "EventName": "INT_MISC.RECOVERY_CYCLES_ANY",
         "SampleAfterValue": "2000003",
+        "UMask": "0x1"
+    },
+    {
+        "BriefDescription": "The number of times that split load operations are temporarily blocked because all resources for handling the split accesses are in use",
+        "Counter": "0,1,2,3",
+        "CounterHTOff": "0,1,2,3,4,5,6,7",
+        "EventCode": "0x03",
+        "EventName": "LD_BLOCKS.NO_SR",
+        "PublicDescription": "The number of times that split load operations are temporarily blocked because all resources for handling the split accesses are in use.",
+        "SampleAfterValue": "100003",
         "UMask": "0x8"
     },
     {
@@ -399,108 +511,77 @@
         "UMask": "0x2"
     },
     {
-        "BriefDescription": "Counts end of periods where the Reservation Station (RS) was empty. Could be useful to precisely locate Frontend Latency Bound issues.",
+        "BriefDescription": "False dependencies in MOB due to partial compare on address.",
         "Counter": "0,1,2,3",
         "CounterHTOff": "0,1,2,3,4,5,6,7",
-        "CounterMask": "1",
-        "EdgeDetect": "1",
-        "EventCode": "0x5E",
-        "EventName": "RS_EVENTS.EMPTY_END",
-        "Invert": "1",
-        "PublicDescription": "Counts end of periods where the Reservation Station (RS) was empty. Could be useful to precisely locate front-end Latency Bound issues.",
-        "SampleAfterValue": "2000003",
+        "EventCode": "0x07",
+        "EventName": "LD_BLOCKS_PARTIAL.ADDRESS_ALIAS",
+        "PublicDescription": "Counts false dependencies in MOB when the partial comparison upon loose net check and dependency was resolved by the Enhanced Loose net mechanism. This may not result in high performance penalties. Loose net checks can fail when loads and stores are 4k aliased.",
+        "SampleAfterValue": "100003",
         "UMask": "0x1"
     },
     {
-        "AnyThread": "1",
-        "BriefDescription": "Core crystal clock cycles when at least one thread on the physical core is unhalted.",
+        "BriefDescription": "Demand load dispatches that hit L1D fill buffer (FB) allocated for software prefetch.",
         "Counter": "0,1,2,3",
         "CounterHTOff": "0,1,2,3,4,5,6,7",
-        "EventCode": "0x3C",
-        "EventName": "CPU_CLK_THREAD_UNHALTED.REF_XCLK_ANY",
-        "SampleAfterValue": "25003",
+        "EventCode": "0x4C",
+        "EventName": "LOAD_HIT_PRE.SW_PF",
+        "PublicDescription": "Counts all not software-prefetch load dispatches that hit the fill buffer (FB) allocated for the software prefetch. It can also be incremented by some lock instructions. So it should only be used with profiling so that the locks can be excluded by ASM (Assembly File) inspection of the nearby instructions.",
+        "SampleAfterValue": "100003",
         "UMask": "0x1"
     },
     {
-        "BriefDescription": "Cycles where the pipeline is stalled due to serializing operations.",
+        "BriefDescription": "Cycles 4 Uops delivered by the LSD, but didn't come from the decoder.",
         "Counter": "0,1,2,3",
         "CounterHTOff": "0,1,2,3,4,5,6,7",
-        "EventCode": "0x59",
-        "EventName": "PARTIAL_RAT_STALLS.SCOREBOARD",
-        "PublicDescription": "This event counts cycles during which the microcode scoreboard stalls happen.",
+        "CounterMask": "4",
+        "EventCode": "0xA8",
+        "EventName": "LSD.CYCLES_4_UOPS",
+        "PublicDescription": "Counts the cycles when 4 uops are delivered by the LSD (Loop-stream detector).",
         "SampleAfterValue": "2000003",
         "UMask": "0x1"
     },
     {
-        "BriefDescription": "Cycles when Resource Allocation Table (RAT) does not issue Uops to Reservation Station (RS) for the thread",
+        "BriefDescription": "Cycles Uops delivered by the LSD, but didn't come from the decoder.",
         "Counter": "0,1,2,3",
         "CounterHTOff": "0,1,2,3,4,5,6,7",
         "CounterMask": "1",
-        "EventCode": "0x0E",
-        "EventName": "UOPS_ISSUED.STALL_CYCLES",
-        "Invert": "1",
-        "PublicDescription": "Counts cycles during which the Resource Allocation Table (RAT) does not issue any Uops to the reservation station (RS) for the current thread.",
+        "EventCode": "0xA8",
+        "EventName": "LSD.CYCLES_ACTIVE",
+        "PublicDescription": "Counts the cycles when at least one uop is delivered by the LSD (Loop-stream detector).",
         "SampleAfterValue": "2000003",
         "UMask": "0x1"
     },
     {
-        "BriefDescription": "Not taken branch instructions retired.",
-        "Counter": "0,1,2,3",
-        "CounterHTOff": "0,1,2,3,4,5,6,7",
-        "Errata": "SKL091",
-        "EventCode": "0xc4",
-        "EventName": "BR_INST_RETIRED.COND_NTAKEN",
-        "PublicDescription": "This event counts not taken branch instructions retired.",
-        "SampleAfterValue": "400009",
-        "UMask": "0x10"
-    },
-    {
-        "BriefDescription": "Cycles at least 3 micro-op is executed from any thread on physical core.",
+        "BriefDescription": "Number of Uops delivered by the LSD.",
         "Counter": "0,1,2,3",
         "CounterHTOff": "0,1,2,3,4,5,6,7",
-        "CounterMask": "3",
-        "EventCode": "0xB1",
-        "EventName": "UOPS_EXECUTED.CORE_CYCLES_GE_3",
+        "EventCode": "0xA8",
+        "EventName": "LSD.UOPS",
+        "PublicDescription": "Number of uops delivered to the back-end by the LSD(Loop Stream Detector).",
         "SampleAfterValue": "2000003",
-        "UMask": "0x2"
+        "UMask": "0x1"
     },
     {
-        "BriefDescription": "Cycles at least 1 micro-op is executed from any thread on physical core.",
+        "BriefDescription": "Number of machine clears (nukes) of any type.",
         "Counter": "0,1,2,3",
         "CounterHTOff": "0,1,2,3,4,5,6,7",
         "CounterMask": "1",
-        "EventCode": "0xB1",
-        "EventName": "UOPS_EXECUTED.CORE_CYCLES_GE_1",
-        "SampleAfterValue": "2000003",
-        "UMask": "0x2"
-    },
-    {
-        "BriefDescription": "Cycles at least 4 micro-op is executed from any thread on physical core.",
-        "Counter": "0,1,2,3",
-        "CounterHTOff": "0,1,2,3,4,5,6,7",
-        "CounterMask": "4",
-        "EventCode": "0xB1",
-        "EventName": "UOPS_EXECUTED.CORE_CYCLES_GE_4",
-        "SampleAfterValue": "2000003",
-        "UMask": "0x2"
-    },
-    {
-        "BriefDescription": "Reference cycles when the core is not in halt state.",
-        "Counter": "Fixed counter 2",
-        "CounterHTOff": "Fixed counter 2",
-        "EventName": "CPU_CLK_UNHALTED.REF_TSC",
-        "PublicDescription": "Counts the number of reference cycles when the core is not in a halt state. The core enters the halt state when it is running the HLT instruction or the MWAIT instruction. This event is not affected by core frequency changes (for example, P states, TM2 transitions) but has the same incrementing frequency as the time stamp counter. This event can approximate elapsed time while the core was not in a halt state. This event has a constant ratio with the CPU_CLK_UNHALTED.REF_XCLK event. It is counted on a dedicated fixed counter, leaving the four (eight when Hyperthreading is disabled) programmable counters available for other events. Note: On all current platforms this event stops counting during 'throttling (TM)' states duty off periods the processor is 'halted'.  The counter update is done at a lower clock rate then the core clock the overflow status bit for this counter may appear 'sticky'.  After the counter has overflowed and software clears the overflow status bit and resets the counter to less than MAX. The reset value to the counter is not clocked immediately so the overflow status bit will flip 'high (1)' and generate another PMI (if enabled) after which the reset value gets clocked into the counter. Therefore, software will get the interrupt, read the overflow status bit '1 for bit 34 while the counter value is less than MAX. Software should ignore this case.",
-        "SampleAfterValue": "2000003",
-        "UMask": "0x3"
+        "EdgeDetect": "1",
+        "EventCode": "0xC3",
+        "EventName": "MACHINE_CLEARS.COUNT",
+        "SampleAfterValue": "100003",
+        "UMask": "0x1"
     },
     {
-        "BriefDescription": "All mispredicted macro branch instructions retired.",
+        "BriefDescription": "Self-modifying code (SMC) detected.",
         "Counter": "0,1,2,3",
         "CounterHTOff": "0,1,2,3,4,5,6,7",
-        "EventCode": "0xC5",
-        "EventName": "BR_MISP_RETIRED.ALL_BRANCHES",
-        "PublicDescription": "Counts all the retired branch instructions that were mispredicted by the processor. A branch misprediction occurs when the processor incorrectly predicts the destination of the branch.  When the misprediction is discovered at execution, all the instructions executed in the wrong (speculative) path must be discarded, and the processor must start fetching from the correct path.",
-        "SampleAfterValue": "400009"
+        "EventCode": "0xC3",
+        "EventName": "MACHINE_CLEARS.SMC",
+        "PublicDescription": "Counts self-modifying code (SMC) detected, which causes a machine clear.",
+        "SampleAfterValue": "100003",
+        "UMask": "0x4"
     },
     {
         "BriefDescription": "Number of times a microcode assist is invoked by HW other than FP-assist. Examples include AD (page Access Dirty) and AVX* related assists.",
@@ -512,448 +593,377 @@
         "UMask": "0x3f"
     },
     {
-        "BriefDescription": "Cycles without actually retired uops.",
-        "Counter": "0,1,2,3",
-        "CounterHTOff": "0,1,2,3,4,5,6,7",
-        "CounterMask": "1",
-        "EventCode": "0xC2",
-        "EventName": "UOPS_RETIRED.STALL_CYCLES",
-        "Invert": "1",
-        "PublicDescription": "This event counts cycles without actually retired uops.",
-        "SampleAfterValue": "2000003",
-        "UMask": "0x2"
-    },
-    {
-        "BriefDescription": "Number of Uops delivered by the LSD.",
+        "BriefDescription": "Cycles where the pipeline is stalled due to serializing operations.",
         "Counter": "0,1,2,3",
         "CounterHTOff": "0,1,2,3,4,5,6,7",
-        "EventCode": "0xA8",
-        "EventName": "LSD.UOPS",
-        "PublicDescription": "Number of uops delivered to the back-end by the LSD(Loop Stream Detector).",
+        "EventCode": "0x59",
+        "EventName": "PARTIAL_RAT_STALLS.SCOREBOARD",
+        "PublicDescription": "This event counts cycles during which the microcode scoreboard stalls happen.",
         "SampleAfterValue": "2000003",
         "UMask": "0x1"
     },
     {
-        "BriefDescription": "Core crystal clock cycles when this thread is unhalted and the other thread is halted.",
-        "Counter": "0,1,2,3",
-        "CounterHTOff": "0,1,2,3,4,5,6,7",
-        "EventCode": "0x3C",
-        "EventName": "CPU_CLK_THREAD_UNHALTED.ONE_THREAD_ACTIVE",
-        "SampleAfterValue": "25003",
-        "UMask": "0x2"
-    },
-    {
-        "BriefDescription": "Stalls caused by changing prefix length of the instruction.",
+        "BriefDescription": "Resource-related stall cycles",
         "Counter": "0,1,2,3",
         "CounterHTOff": "0,1,2,3,4,5,6,7",
-        "EventCode": "0x87",
-        "EventName": "ILD_STALL.LCP",
-        "PublicDescription": "Counts cycles that the Instruction Length decoder (ILD) stalls occurred due to dynamically changing prefix length of the decoded instruction (by operand size prefix instruction 0x66, address size prefix instruction 0x67 or REX.W for Intel64). Count is proportional to the number of prefixes in a 16B-line. This may result in a three-cycle penalty for each LCP (Length changing prefix) in a 16-byte chunk.",
+        "EventCode": "0xa2",
+        "EventName": "RESOURCE_STALLS.ANY",
+        "PublicDescription": "Counts resource-related stall cycles.",
         "SampleAfterValue": "2000003",
         "UMask": "0x1"
     },
     {
-        "BriefDescription": "Cycles while memory subsystem has an outstanding load.",
+        "BriefDescription": "Cycles stalled due to no store buffers available. (not including draining form sync).",
         "Counter": "0,1,2,3",
         "CounterHTOff": "0,1,2,3,4,5,6,7",
-        "CounterMask": "16",
-        "EventCode": "0xA3",
-        "EventName": "CYCLE_ACTIVITY.CYCLES_MEM_ANY",
+        "EventCode": "0xA2",
+        "EventName": "RESOURCE_STALLS.SB",
+        "PublicDescription": "Counts allocation stall cycles caused by the store buffer (SB) being full. This counts cycles that the pipeline back-end blocked uop delivery from the front-end.",
         "SampleAfterValue": "2000003",
-        "UMask": "0x10"
+        "UMask": "0x8"
     },
     {
-        "BriefDescription": "Taken branch instructions retired.",
+        "BriefDescription": "Increments whenever there is an update to the LBR array.",
         "Counter": "0,1,2,3",
         "CounterHTOff": "0,1,2,3,4,5,6,7",
-        "Errata": "SKL091",
-        "EventCode": "0xC4",
-        "EventName": "BR_INST_RETIRED.NEAR_TAKEN",
-        "PEBS": "1",
-        "PublicDescription": "This event counts taken branch instructions retired.",
-        "SampleAfterValue": "400009",
+        "EventCode": "0xCC",
+        "EventName": "ROB_MISC_EVENTS.LBR_INSERTS",
+        "PublicDescription": "Increments when an entry is added to the Last Branch Record (LBR) array (or removed from the array in case of RETURNs in call stack mode). The event requires LBR enable via IA32_DEBUGCTL MSR and branch type selection via MSR_LBR_SELECT.",
+        "SampleAfterValue": "2000003",
         "UMask": "0x20"
     },
     {
-        "BriefDescription": "The number of times that split load operations are temporarily blocked because all resources for handling the split accesses are in use",
+        "BriefDescription": "Number of retired PAUSE instructions (that do not end up with a VMExit to the VMM; TSX aborted Instructions may be counted). This event is not supported on first SKL and KBL products.",
         "Counter": "0,1,2,3",
         "CounterHTOff": "0,1,2,3,4,5,6,7",
-        "EventCode": "0x03",
-        "EventName": "LD_BLOCKS.NO_SR",
-        "PublicDescription": "The number of times that split load operations are temporarily blocked because all resources for handling the split accesses are in use.",
-        "SampleAfterValue": "100003",
-        "UMask": "0x8"
+        "EventCode": "0xCC",
+        "EventName": "ROB_MISC_EVENTS.PAUSE_INST",
+        "SampleAfterValue": "2000003",
+        "UMask": "0x40"
     },
     {
-        "BriefDescription": "Uops that Resource Allocation Table (RAT) issues to Reservation Station (RS)",
+        "BriefDescription": "Cycles when Reservation Station (RS) is empty for the thread",
         "Counter": "0,1,2,3",
         "CounterHTOff": "0,1,2,3,4,5,6,7",
-        "EventCode": "0x0E",
-        "EventName": "UOPS_ISSUED.ANY",
-        "PublicDescription": "Counts the number of uops that the Resource Allocation Table (RAT) issues to the Reservation Station (RS).",
+        "EventCode": "0x5E",
+        "EventName": "RS_EVENTS.EMPTY_CYCLES",
+        "PublicDescription": "Counts cycles during which the reservation station (RS) is empty for the thread.; Note: In ST-mode, not active thread should drive 0. This is usually caused by severely costly branch mispredictions, or allocator/FE issues.",
         "SampleAfterValue": "2000003",
         "UMask": "0x1"
     },
     {
-        "BriefDescription": "Core cycles when the thread is not in halt state",
-        "Counter": "Fixed counter 1",
-        "CounterHTOff": "Fixed counter 1",
-        "EventName": "CPU_CLK_UNHALTED.THREAD",
-        "PublicDescription": "Counts the number of core cycles while the thread is not in a halt state. The thread enters the halt state when it is running the HLT instruction. This event is a component in many key event ratios. The core frequency may change from time to time due to transitions associated with Enhanced Intel SpeedStep Technology or TM2. For this reason this event may have a changing ratio with regards to time. When the core frequency is constant, this event can approximate elapsed time while the core was not in the halt state. It is counted on a dedicated fixed counter, leaving the four (eight when Hyperthreading is disabled) programmable counters available for other events.",
+        "BriefDescription": "Counts end of periods where the Reservation Station (RS) was empty. Could be useful to precisely locate Frontend Latency Bound issues.",
+        "Counter": "0,1,2,3",
+        "CounterHTOff": "0,1,2,3,4,5,6,7",
+        "CounterMask": "1",
+        "EdgeDetect": "1",
+        "EventCode": "0x5E",
+        "EventName": "RS_EVENTS.EMPTY_END",
+        "Invert": "1",
+        "PublicDescription": "Counts end of periods where the Reservation Station (RS) was empty. Could be useful to precisely locate front-end Latency Bound issues.",
         "SampleAfterValue": "2000003",
-        "UMask": "0x2"
+        "UMask": "0x1"
     },
     {
-        "AnyThread": "1",
-        "BriefDescription": "Core crystal clock cycles when at least one thread on the physical core is unhalted.",
+        "BriefDescription": "Cycles per thread when uops are executed in port 0",
         "Counter": "0,1,2,3",
         "CounterHTOff": "0,1,2,3,4,5,6,7",
-        "EventCode": "0x3C",
-        "EventName": "CPU_CLK_UNHALTED.REF_XCLK_ANY",
-        "SampleAfterValue": "25003",
+        "EventCode": "0xA1",
+        "EventName": "UOPS_DISPATCHED_PORT.PORT_0",
+        "PublicDescription": "Counts, on the per-thread basis, cycles during which at least one uop is dispatched from the Reservation Station (RS) to port 0.",
+        "SampleAfterValue": "2000003",
         "UMask": "0x1"
     },
     {
-        "BriefDescription": "Direct and indirect near call instructions retired.",
+        "BriefDescription": "Cycles per thread when uops are executed in port 1",
         "Counter": "0,1,2,3",
         "CounterHTOff": "0,1,2,3,4,5,6,7",
-        "Errata": "SKL091",
-        "EventCode": "0xC4",
-        "EventName": "BR_INST_RETIRED.NEAR_CALL",
-        "PEBS": "1",
-        "PublicDescription": "This event counts both direct and indirect near call instructions retired.",
-        "SampleAfterValue": "100007",
+        "EventCode": "0xA1",
+        "EventName": "UOPS_DISPATCHED_PORT.PORT_1",
+        "PublicDescription": "Counts, on the per-thread basis, cycles during which at least one uop is dispatched from the Reservation Station (RS) to port 1.",
+        "SampleAfterValue": "2000003",
         "UMask": "0x2"
     },
     {
-        "BriefDescription": "Number of retired PAUSE instructions (that do not end up with a VMExit to the VMM; TSX aborted Instructions may be counted). This event is not supported on first SKL and KBL products.",
+        "BriefDescription": "Cycles per thread when uops are executed in port 2",
         "Counter": "0,1,2,3",
         "CounterHTOff": "0,1,2,3,4,5,6,7",
-        "EventCode": "0xCC",
-        "EventName": "ROB_MISC_EVENTS.PAUSE_INST",
+        "EventCode": "0xA1",
+        "EventName": "UOPS_DISPATCHED_PORT.PORT_2",
+        "PublicDescription": "Counts, on the per-thread basis, cycles during which at least one uop is dispatched from the Reservation Station (RS) to port 2.",
         "SampleAfterValue": "2000003",
-        "UMask": "0x40"
+        "UMask": "0x4"
     },
     {
-        "BriefDescription": "Resource-related stall cycles",
+        "BriefDescription": "Cycles per thread when uops are executed in port 3",
         "Counter": "0,1,2,3",
         "CounterHTOff": "0,1,2,3,4,5,6,7",
-        "EventCode": "0xa2",
-        "EventName": "RESOURCE_STALLS.ANY",
-        "PublicDescription": "Counts resource-related stall cycles.",
+        "EventCode": "0xA1",
+        "EventName": "UOPS_DISPATCHED_PORT.PORT_3",
+        "PublicDescription": "Counts, on the per-thread basis, cycles during which at least one uop is dispatched from the Reservation Station (RS) to port 3.",
         "SampleAfterValue": "2000003",
-        "UMask": "0x1"
+        "UMask": "0x8"
     },
     {
-        "BriefDescription": "Self-modifying code (SMC) detected.",
+        "BriefDescription": "Cycles per thread when uops are executed in port 4",
         "Counter": "0,1,2,3",
         "CounterHTOff": "0,1,2,3,4,5,6,7",
-        "EventCode": "0xC3",
-        "EventName": "MACHINE_CLEARS.SMC",
-        "PublicDescription": "Counts self-modifying code (SMC) detected, which causes a machine clear.",
-        "SampleAfterValue": "100003",
-        "UMask": "0x4"
+        "EventCode": "0xA1",
+        "EventName": "UOPS_DISPATCHED_PORT.PORT_4",
+        "PublicDescription": "Counts, on the per-thread basis, cycles during which at least one uop is dispatched from the Reservation Station (RS) to port 4.",
+        "SampleAfterValue": "2000003",
+        "UMask": "0x10"
     },
     {
-        "BriefDescription": "Core crystal clock cycles when this thread is unhalted and the other thread is halted.",
+        "BriefDescription": "Cycles per thread when uops are executed in port 5",
         "Counter": "0,1,2,3",
         "CounterHTOff": "0,1,2,3,4,5,6,7",
-        "EventCode": "0x3C",
-        "EventName": "CPU_CLK_UNHALTED.ONE_THREAD_ACTIVE",
-        "SampleAfterValue": "25003",
-        "UMask": "0x2"
+        "EventCode": "0xA1",
+        "EventName": "UOPS_DISPATCHED_PORT.PORT_5",
+        "PublicDescription": "Counts, on the per-thread basis, cycles during which at least one uop is dispatched from the Reservation Station (RS) to port 5.",
+        "SampleAfterValue": "2000003",
+        "UMask": "0x20"
     },
     {
-        "BriefDescription": "Cycles where at least 4 uops were executed per-thread",
+        "BriefDescription": "Cycles per thread when uops are executed in port 6",
         "Counter": "0,1,2,3",
         "CounterHTOff": "0,1,2,3,4,5,6,7",
-        "CounterMask": "4",
-        "EventCode": "0xB1",
-        "EventName": "UOPS_EXECUTED.CYCLES_GE_4_UOPS_EXEC",
-        "PublicDescription": "Cycles where at least 4 uops were executed per-thread.",
+        "EventCode": "0xA1",
+        "EventName": "UOPS_DISPATCHED_PORT.PORT_6",
+        "PublicDescription": "Counts, on the per-thread basis, cycles during which at least one uop is dispatched from the Reservation Station (RS) to port 6.",
         "SampleAfterValue": "2000003",
-        "UMask": "0x1"
+        "UMask": "0x40"
     },
     {
-        "BriefDescription": "Number of near branch instructions retired that were mispredicted and taken.",
+        "BriefDescription": "Cycles per thread when uops are executed in port 7",
         "Counter": "0,1,2,3",
         "CounterHTOff": "0,1,2,3,4,5,6,7",
-        "EventCode": "0xC5",
-        "EventName": "BR_MISP_RETIRED.NEAR_TAKEN",
-        "PEBS": "1",
-        "SampleAfterValue": "400009",
-        "UMask": "0x20"
-    },
-    {
-        "BriefDescription": "Execution stalls while memory subsystem has an outstanding load.",
-        "Counter": "0,1,2,3",
-        "CounterHTOff": "0,1,2,3",
-        "CounterMask": "20",
-        "EventCode": "0xA3",
-        "EventName": "CYCLE_ACTIVITY.STALLS_MEM_ANY",
+        "EventCode": "0xA1",
+        "EventName": "UOPS_DISPATCHED_PORT.PORT_7",
+        "PublicDescription": "Counts, on the per-thread basis, cycles during which at least one uop is dispatched from the Reservation Station (RS) to port 7.",
         "SampleAfterValue": "2000003",
-        "UMask": "0x14"
+        "UMask": "0x80"
     },
     {
-        "BriefDescription": "Cycles where no uops were executed, the Reservation Station was not empty, the Store Buffer was full and there was no outstanding load.",
+        "BriefDescription": "Number of uops executed on the core.",
         "Counter": "0,1,2,3",
         "CounterHTOff": "0,1,2,3,4,5,6,7",
-        "EventCode": "0xA6",
-        "EventName": "EXE_ACTIVITY.EXE_BOUND_0_PORTS",
-        "PublicDescription": "Counts cycles during which no uops were executed on all ports and Reservation Station (RS) was not empty.",
+        "EventCode": "0xB1",
+        "EventName": "UOPS_EXECUTED.CORE",
+        "PublicDescription": "Number of uops executed from any thread.",
         "SampleAfterValue": "2000003",
-        "UMask": "0x1"
+        "UMask": "0x2"
     },
     {
-        "BriefDescription": "Number of cycles using always true condition applied to  PEBS instructions retired event.",
-        "Counter": "0,2,3",
-        "CounterHTOff": "0,2,3",
-        "CounterMask": "10",
-        "Errata": "SKL091, SKL044",
-        "EventCode": "0xC0",
-        "EventName": "INST_RETIRED.TOTAL_CYCLES_PS",
-        "Invert": "1",
-        "PEBS": "2",
-        "PublicDescription": "Number of cycles using an always true condition applied to  PEBS instructions retired event. (inst_ret< 16)",
+        "BriefDescription": "Cycles at least 1 micro-op is executed from any thread on physical core.",
+        "Counter": "0,1,2,3",
+        "CounterHTOff": "0,1,2,3,4,5,6,7",
+        "CounterMask": "1",
+        "EventCode": "0xB1",
+        "EventName": "UOPS_EXECUTED.CORE_CYCLES_GE_1",
         "SampleAfterValue": "2000003",
-        "UMask": "0x1"
+        "UMask": "0x2"
     },
     {
-        "BriefDescription": "Retirement slots used.",
+        "BriefDescription": "Cycles at least 2 micro-op is executed from any thread on physical core.",
         "Counter": "0,1,2,3",
         "CounterHTOff": "0,1,2,3,4,5,6,7",
-        "EventCode": "0xC2",
-        "EventName": "UOPS_RETIRED.RETIRE_SLOTS",
-        "PublicDescription": "Counts the retirement slots used.",
+        "CounterMask": "2",
+        "EventCode": "0xB1",
+        "EventName": "UOPS_EXECUTED.CORE_CYCLES_GE_2",
         "SampleAfterValue": "2000003",
         "UMask": "0x2"
     },
     {
-        "AnyThread": "1",
-        "BriefDescription": "Core cycles when at least one thread on the physical core is not in halt state.",
+        "BriefDescription": "Cycles at least 3 micro-op is executed from any thread on physical core.",
         "Counter": "0,1,2,3",
         "CounterHTOff": "0,1,2,3,4,5,6,7",
-        "EventCode": "0x3C",
-        "EventName": "CPU_CLK_UNHALTED.THREAD_P_ANY",
-        "SampleAfterValue": "2000003"
+        "CounterMask": "3",
+        "EventCode": "0xB1",
+        "EventName": "UOPS_EXECUTED.CORE_CYCLES_GE_3",
+        "SampleAfterValue": "2000003",
+        "UMask": "0x2"
     },
     {
-        "BriefDescription": "Uops inserted at issue-stage in order to preserve upper bits of vector registers.",
+        "BriefDescription": "Cycles at least 4 micro-op is executed from any thread on physical core.",
         "Counter": "0,1,2,3",
         "CounterHTOff": "0,1,2,3,4,5,6,7",
-        "EventCode": "0x0E",
-        "EventName": "UOPS_ISSUED.VECTOR_WIDTH_MISMATCH",
-        "PublicDescription": "Counts the number of Blend Uops issued by the Resource Allocation Table (RAT) to the reservation station (RS) in order to preserve upper bits of vector registers. Starting with the Skylake microarchitecture, these Blend uops are needed since every Intel SSE instruction executed in Dirty Upper State needs to preserve bits 128-255 of the destination register. For more information, refer to Mixing Intel AVX and Intel SSE Code section of the Optimization Guide.",
+        "CounterMask": "4",
+        "EventCode": "0xB1",
+        "EventName": "UOPS_EXECUTED.CORE_CYCLES_GE_4",
         "SampleAfterValue": "2000003",
         "UMask": "0x2"
     },
     {
-        "BriefDescription": "Increments whenever there is an update to the LBR array.",
+        "BriefDescription": "Cycles with no micro-ops executed from any thread on physical core.",
         "Counter": "0,1,2,3",
         "CounterHTOff": "0,1,2,3,4,5,6,7",
-        "EventCode": "0xCC",
-        "EventName": "ROB_MISC_EVENTS.LBR_INSERTS",
-        "PublicDescription": "Increments when an entry is added to the Last Branch Record (LBR) array (or removed from the array in case of RETURNs in call stack mode). The event requires LBR enable via IA32_DEBUGCTL MSR and branch type selection via MSR_LBR_SELECT.",
+        "CounterMask": "1",
+        "EventCode": "0xB1",
+        "EventName": "UOPS_EXECUTED.CORE_CYCLES_NONE",
+        "Invert": "1",
         "SampleAfterValue": "2000003",
-        "UMask": "0x20"
+        "UMask": "0x2"
     },
     {
-        "BriefDescription": "Cycles when Reservation Station (RS) is empty for the thread",
+        "BriefDescription": "Cycles where at least 1 uop was executed per-thread",
         "Counter": "0,1,2,3",
         "CounterHTOff": "0,1,2,3,4,5,6,7",
-        "EventCode": "0x5E",
-        "EventName": "RS_EVENTS.EMPTY_CYCLES",
-        "PublicDescription": "Counts cycles during which the reservation station (RS) is empty for the thread.; Note: In ST-mode, not active thread should drive 0. This is usually caused by severely costly branch mispredictions, or allocator/FE issues.",
+        "CounterMask": "1",
+        "EventCode": "0xB1",
+        "EventName": "UOPS_EXECUTED.CYCLES_GE_1_UOP_EXEC",
+        "PublicDescription": "Cycles where at least 1 uop was executed per-thread.",
         "SampleAfterValue": "2000003",
         "UMask": "0x1"
     },
     {
-        "BriefDescription": "Return instructions retired.",
+        "BriefDescription": "Cycles where at least 2 uops were executed per-thread",
         "Counter": "0,1,2,3",
         "CounterHTOff": "0,1,2,3,4,5,6,7",
-        "Errata": "SKL091",
-        "EventCode": "0xC4",
-        "EventName": "BR_INST_RETIRED.NEAR_RETURN",
-        "PEBS": "1",
-        "PublicDescription": "This event counts return instructions retired.",
-        "SampleAfterValue": "100007",
-        "UMask": "0x8"
-    },
-    {
-        "BriefDescription": "Instructions retired from execution.",
-        "Counter": "Fixed counter 0",
-        "CounterHTOff": "Fixed counter 0",
-        "EventName": "INST_RETIRED.ANY",
-        "PublicDescription": "Counts the number of instructions retired from execution. For instructions that consist of multiple micro-ops, Counts the retirement of the last micro-op of the instruction. Counting continues during hardware interrupts, traps, and inside interrupt handlers. Notes: INST_RETIRED.ANY is counted by a designated fixed counter, leaving the four (eight when Hyperthreading is disabled) programmable counters available for other events. INST_RETIRED.ANY_P is counted by a programmable counter and it is an architectural performance event. Counting: Faulting executions of GETSEC/VM entry/VM Exit/MWait will not count as retired instructions.",
+        "CounterMask": "2",
+        "EventCode": "0xB1",
+        "EventName": "UOPS_EXECUTED.CYCLES_GE_2_UOPS_EXEC",
+        "PublicDescription": "Cycles where at least 2 uops were executed per-thread.",
         "SampleAfterValue": "2000003",
         "UMask": "0x1"
     },
     {
-        "BriefDescription": "Cycles at least 2 micro-op is executed from any thread on physical core.",
+        "BriefDescription": "Cycles where at least 3 uops were executed per-thread",
         "Counter": "0,1,2,3",
         "CounterHTOff": "0,1,2,3,4,5,6,7",
-        "CounterMask": "2",
+        "CounterMask": "3",
         "EventCode": "0xB1",
-        "EventName": "UOPS_EXECUTED.CORE_CYCLES_GE_2",
+        "EventName": "UOPS_EXECUTED.CYCLES_GE_3_UOPS_EXEC",
+        "PublicDescription": "Cycles where at least 3 uops were executed per-thread.",
         "SampleAfterValue": "2000003",
-        "UMask": "0x2"
+        "UMask": "0x1"
     },
     {
-        "BriefDescription": "Cycles stalled due to no store buffers available. (not including draining form sync).",
+        "BriefDescription": "Cycles where at least 4 uops were executed per-thread",
         "Counter": "0,1,2,3",
         "CounterHTOff": "0,1,2,3,4,5,6,7",
-        "EventCode": "0xA2",
-        "EventName": "RESOURCE_STALLS.SB",
-        "PublicDescription": "Counts allocation stall cycles caused by the store buffer (SB) being full. This counts cycles that the pipeline back-end blocked uop delivery from the front-end.",
+        "CounterMask": "4",
+        "EventCode": "0xB1",
+        "EventName": "UOPS_EXECUTED.CYCLES_GE_4_UOPS_EXEC",
+        "PublicDescription": "Cycles where at least 4 uops were executed per-thread.",
         "SampleAfterValue": "2000003",
-        "UMask": "0x8"
+        "UMask": "0x1"
     },
     {
-        "BriefDescription": "Counts when there is a transition from ring 1, 2 or 3 to ring 0.",
+        "BriefDescription": "Counts number of cycles no uops were dispatched to be executed on this thread.",
         "Counter": "0,1,2,3",
         "CounterHTOff": "0,1,2,3,4,5,6,7",
         "CounterMask": "1",
-        "EdgeDetect": "1",
-        "EventCode": "0x3C",
-        "EventName": "CPU_CLK_UNHALTED.RING0_TRANS",
-        "PublicDescription": "Counts when the Current Privilege Level (CPL) transitions from ring 1, 2 or 3 to ring 0 (Kernel).",
-        "SampleAfterValue": "100007"
-    },
-    {
-        "BriefDescription": "All (macro) branch instructions retired.",
-        "Counter": "0,1,2,3",
-        "CounterHTOff": "0,1,2,3",
-        "Errata": "SKL091",
-        "EventCode": "0xC4",
-        "EventName": "BR_INST_RETIRED.ALL_BRANCHES_PEBS",
-        "PEBS": "2",
-        "PublicDescription": "This is a precise version of BR_INST_RETIRED.ALL_BRANCHES that counts all (macro) branch instructions retired.",
-        "SampleAfterValue": "400009",
-        "UMask": "0x4"
-    },
-    {
-        "BriefDescription": "Mispredicted macro branch instructions retired.",
-        "Counter": "0,1,2,3",
-        "CounterHTOff": "0,1,2,3",
-        "EventCode": "0xC5",
-        "EventName": "BR_MISP_RETIRED.ALL_BRANCHES_PEBS",
-        "PEBS": "2",
-        "PublicDescription": "This is a precise version of BR_MISP_RETIRED.ALL_BRANCHES that counts all mispredicted macro branch instructions retired.",
-        "SampleAfterValue": "400009",
-        "UMask": "0x4"
+        "EventCode": "0xB1",
+        "EventName": "UOPS_EXECUTED.STALL_CYCLES",
+        "Invert": "1",
+        "PublicDescription": "Counts cycles during which no uops were dispatched from the Reservation Station (RS) per thread.",
+        "SampleAfterValue": "2000003",
+        "UMask": "0x1"
     },
     {
-        "BriefDescription": "Cycles total of 1 uop is executed on all ports and Reservation Station was not empty.",
+        "BriefDescription": "Counts the number of uops to be executed per-thread each cycle.",
         "Counter": "0,1,2,3",
         "CounterHTOff": "0,1,2,3,4,5,6,7",
-        "EventCode": "0xA6",
-        "EventName": "EXE_ACTIVITY.1_PORTS_UTIL",
-        "PublicDescription": "Counts cycles during which a total of 1 uop was executed on all ports and Reservation Station (RS) was not empty.",
+        "EventCode": "0xB1",
+        "EventName": "UOPS_EXECUTED.THREAD",
+        "PublicDescription": "Number of uops to be executed per-thread each cycle.",
         "SampleAfterValue": "2000003",
-        "UMask": "0x2"
+        "UMask": "0x1"
     },
     {
-        "BriefDescription": "Not taken branch instructions retired.",
+        "BriefDescription": "Counts the number of x87 uops dispatched.",
         "Counter": "0,1,2,3",
         "CounterHTOff": "0,1,2,3,4,5,6,7",
-        "Errata": "SKL091",
-        "EventCode": "0xC4",
-        "EventName": "BR_INST_RETIRED.NOT_TAKEN",
-        "PublicDescription": "This event counts not taken branch instructions retired.",
-        "SampleAfterValue": "400009",
+        "EventCode": "0xB1",
+        "EventName": "UOPS_EXECUTED.X87",
+        "PublicDescription": "Counts the number of x87 uops executed.",
+        "SampleAfterValue": "2000003",
         "UMask": "0x10"
     },
     {
-        "BriefDescription": "Conditional branch instructions retired.",
+        "BriefDescription": "Uops that Resource Allocation Table (RAT) issues to Reservation Station (RS)",
         "Counter": "0,1,2,3",
         "CounterHTOff": "0,1,2,3,4,5,6,7",
-        "Errata": "SKL091",
-        "EventCode": "0xC4",
-        "EventName": "BR_INST_RETIRED.CONDITIONAL",
-        "PEBS": "1",
-        "PublicDescription": "This event counts conditional branch instructions retired.",
-        "SampleAfterValue": "400009",
+        "EventCode": "0x0E",
+        "EventName": "UOPS_ISSUED.ANY",
+        "PublicDescription": "Counts the number of uops that the Resource Allocation Table (RAT) issues to the Reservation Station (RS).",
+        "SampleAfterValue": "2000003",
         "UMask": "0x1"
     },
     {
-        "BriefDescription": "Mispredicted conditional branch instructions retired.",
+        "BriefDescription": "Number of slow LEA uops being allocated. A uop is generally considered SlowLea if it has 3 sources (e.g. 2 sources + immediate) regardless if as a result of LEA instruction or not.",
         "Counter": "0,1,2,3",
         "CounterHTOff": "0,1,2,3,4,5,6,7",
-        "EventCode": "0xC5",
-        "EventName": "BR_MISP_RETIRED.CONDITIONAL",
-        "PEBS": "1",
-        "PublicDescription": "This event counts mispredicted conditional branch instructions retired.",
-        "SampleAfterValue": "400009",
-        "UMask": "0x1"
+        "EventCode": "0x0E",
+        "EventName": "UOPS_ISSUED.SLOW_LEA",
+        "SampleAfterValue": "2000003",
+        "UMask": "0x20"
     },
     {
-        "BriefDescription": "Number of uops executed on the core.",
+        "BriefDescription": "Cycles when Resource Allocation Table (RAT) does not issue Uops to Reservation Station (RS) for the thread",
         "Counter": "0,1,2,3",
         "CounterHTOff": "0,1,2,3,4,5,6,7",
-        "EventCode": "0xB1",
-        "EventName": "UOPS_EXECUTED.CORE",
-        "PublicDescription": "Number of uops executed from any thread.",
+        "CounterMask": "1",
+        "EventCode": "0x0E",
+        "EventName": "UOPS_ISSUED.STALL_CYCLES",
+        "Invert": "1",
+        "PublicDescription": "Counts cycles during which the Resource Allocation Table (RAT) does not issue any Uops to the reservation station (RS) for the current thread.",
         "SampleAfterValue": "2000003",
-        "UMask": "0x2"
+        "UMask": "0x1"
     },
     {
-        "BriefDescription": "Execution stalls while L1 cache miss demand load is outstanding.",
+        "BriefDescription": "Uops inserted at issue-stage in order to preserve upper bits of vector registers.",
         "Counter": "0,1,2,3",
         "CounterHTOff": "0,1,2,3,4,5,6,7",
-        "CounterMask": "12",
-        "EventCode": "0xA3",
-        "EventName": "CYCLE_ACTIVITY.STALLS_L1D_MISS",
+        "EventCode": "0x0E",
+        "EventName": "UOPS_ISSUED.VECTOR_WIDTH_MISMATCH",
+        "PublicDescription": "Counts the number of Blend Uops issued by the Resource Allocation Table (RAT) to the reservation station (RS) in order to preserve upper bits of vector registers. Starting with the Skylake microarchitecture, these Blend uops are needed since every Intel SSE instruction executed in Dirty Upper State needs to preserve bits 128-255 of the destination register. For more information, refer to Mixing Intel AVX and Intel SSE Code section of the Optimization Guide.",
         "SampleAfterValue": "2000003",
-        "UMask": "0xc"
+        "UMask": "0x2"
     },
     {
-        "BriefDescription": "Cycles total of 2 uops are executed on all ports and Reservation Station was not empty.",
+        "BriefDescription": "Number of macro-fused uops retired. (non precise)",
         "Counter": "0,1,2,3",
         "CounterHTOff": "0,1,2,3,4,5,6,7",
-        "EventCode": "0xA6",
-        "EventName": "EXE_ACTIVITY.2_PORTS_UTIL",
-        "PublicDescription": "Counts cycles during which a total of 2 uops were executed on all ports and Reservation Station (RS) was not empty.",
+        "EventCode": "0xc2",
+        "EventName": "UOPS_RETIRED.MACRO_FUSED",
+        "PublicDescription": "Counts the number of macro-fused uops retired. (non precise)",
         "SampleAfterValue": "2000003",
         "UMask": "0x4"
     },
     {
-        "BriefDescription": "Cycles the issue-stage is waiting for front-end to fetch from resteered path following branch misprediction or machine clear events.",
+        "BriefDescription": "Retirement slots used.",
         "Counter": "0,1,2,3",
         "CounterHTOff": "0,1,2,3,4,5,6,7",
-        "EventCode": "0x0D",
-        "EventName": "INT_MISC.CLEAR_RESTEER_CYCLES",
+        "EventCode": "0xC2",
+        "EventName": "UOPS_RETIRED.RETIRE_SLOTS",
+        "PublicDescription": "Counts the retirement slots used.",
         "SampleAfterValue": "2000003",
-        "UMask": "0x80"
-    },
-    {
-        "BriefDescription": "All (macro) branch instructions retired.",
-        "Counter": "0,1,2,3",
-        "CounterHTOff": "0,1,2,3,4,5,6,7",
-        "Errata": "SKL091",
-        "EventCode": "0xC4",
-        "EventName": "BR_INST_RETIRED.ALL_BRANCHES",
-        "PublicDescription": "Counts all (macro) branch instructions retired.",
-        "SampleAfterValue": "400009"
+        "UMask": "0x2"
     },
     {
-        "BriefDescription": "Cycles where at least 1 uop was executed per-thread",
+        "BriefDescription": "Cycles without actually retired uops.",
         "Counter": "0,1,2,3",
         "CounterHTOff": "0,1,2,3,4,5,6,7",
         "CounterMask": "1",
-        "EventCode": "0xB1",
-        "EventName": "UOPS_EXECUTED.CYCLES_GE_1_UOP_EXEC",
-        "PublicDescription": "Cycles where at least 1 uop was executed per-thread.",
+        "EventCode": "0xC2",
+        "EventName": "UOPS_RETIRED.STALL_CYCLES",
+        "Invert": "1",
+        "PublicDescription": "This event counts cycles without actually retired uops.",
         "SampleAfterValue": "2000003",
-        "UMask": "0x1"
+        "UMask": "0x2"
     },
     {
-        "BriefDescription": "Cycles while L2 cache miss demand load is outstanding.",
+        "BriefDescription": "Cycles with less than 10 actually retired uops.",
         "Counter": "0,1,2,3",
         "CounterHTOff": "0,1,2,3,4,5,6,7",
-        "CounterMask": "1",
-        "EventCode": "0xA3",
-        "EventName": "CYCLE_ACTIVITY.CYCLES_L2_MISS",
+        "CounterMask": "10",
+        "EventCode": "0xC2",
+        "EventName": "UOPS_RETIRED.TOTAL_CYCLES",
+        "Invert": "1",
+        "PublicDescription": "Number of cycles using always true condition (uops_ret < 16) applied to non PEBS uops retired event.",
         "SampleAfterValue": "2000003",
-        "UMask": "0x1"
+        "UMask": "0x2"
     }
 ]
 \ No newline at end of file
diff --git a/tools/perf/pmu-events/arch/x86/skylakex/skx-metrics.json b/tools/perf/pmu-events/arch/x86/skylakex/skx-metrics.json
index 0dd8b13b5cfb..863c9e103969 100644
--- a/tools/perf/pmu-events/arch/x86/skylakex/skx-metrics.json
+++ b/tools/perf/pmu-events/arch/x86/skylakex/skx-metrics.json
@@ -1,62 +1,5 @@
 [
     {
-        "BriefDescription": "This category represents fraction of slots where the processor's Frontend undersupplies its Backend",
-        "MetricExpr": "IDQ_UOPS_NOT_DELIVERED.CORE / (4 * cycles)",
-        "MetricGroup": "TopdownL1",
-        "MetricName": "Frontend_Bound",
-        "PublicDescription": "This category represents fraction of slots where the processor's Frontend undersupplies its Backend. Frontend denotes the first part of the processor core responsible to fetch operations that are executed later on by the Backend part. Within the Frontend; a branch predictor predicts the next address to fetch; cache-lines are fetched from the memory subsystem; parsed into instructions; and lastly decoded into micro-operations (uops). Ideally the Frontend can issue Machine_Width uops every cycle to the Backend. Frontend Bound denotes unutilized issue-slots when there is no Backend stall; i.e. bubbles where Frontend delivered no uops while Backend could have accepted them. For example; stalls due to instruction-cache misses would be categorized under Frontend Bound."
-    },
-    {
-        "BriefDescription": "This category represents fraction of slots where the processor's Frontend undersupplies its Backend. SMT version; use when SMT is enabled and measuring per logical CPU.",
-        "MetricExpr": "IDQ_UOPS_NOT_DELIVERED.CORE / (4 * ( ( CPU_CLK_UNHALTED.THREAD / 2 ) * ( 1 + CPU_CLK_UNHALTED.ONE_THREAD_ACTIVE / CPU_CLK_UNHALTED.REF_XCLK ) ))",
-        "MetricGroup": "TopdownL1_SMT",
-        "MetricName": "Frontend_Bound_SMT",
-        "PublicDescription": "This category represents fraction of slots where the processor's Frontend undersupplies its Backend. Frontend denotes the first part of the processor core responsible to fetch operations that are executed later on by the Backend part. Within the Frontend; a branch predictor predicts the next address to fetch; cache-lines are fetched from the memory subsystem; parsed into instructions; and lastly decoded into micro-operations (uops). Ideally the Frontend can issue Machine_Width uops every cycle to the Backend. Frontend Bound denotes unutilized issue-slots when there is no Backend stall; i.e. bubbles where Frontend delivered no uops while Backend could have accepted them. For example; stalls due to instruction-cache misses would be categorized under Frontend Bound. SMT version; use when SMT is enabled and measuring per logical CPU."
-    },
-    {
-        "BriefDescription": "This category represents fraction of slots wasted due to incorrect speculations",
-        "MetricExpr": "( UOPS_ISSUED.ANY - UOPS_RETIRED.RETIRE_SLOTS + 4 * INT_MISC.RECOVERY_CYCLES ) / (4 * cycles)",
-        "MetricGroup": "TopdownL1",
-        "MetricName": "Bad_Speculation",
-        "PublicDescription": "This category represents fraction of slots wasted due to incorrect speculations. This include slots used to issue uops that do not eventually get retired and slots for which the issue-pipeline was blocked due to recovery from earlier incorrect speculation. For example; wasted work due to miss-predicted branches are categorized under Bad Speculation category. Incorrect data speculation followed by Memory Ordering Nukes is another example."
-    },
-    {
-        "BriefDescription": "This category represents fraction of slots wasted due to incorrect speculations. SMT version; use when SMT is enabled and measuring per logical CPU.",
-        "MetricExpr": "( UOPS_ISSUED.ANY - UOPS_RETIRED.RETIRE_SLOTS + 4 * ( INT_MISC.RECOVERY_CYCLES_ANY / 2 ) ) / (4 * ( ( CPU_CLK_UNHALTED.THREAD / 2 ) * ( 1 + CPU_CLK_UNHALTED.ONE_THREAD_ACTIVE / CPU_CLK_UNHALTED.REF_XCLK ) ))",
-        "MetricGroup": "TopdownL1_SMT",
-        "MetricName": "Bad_Speculation_SMT",
-        "PublicDescription": "This category represents fraction of slots wasted due to incorrect speculations. This include slots used to issue uops that do not eventually get retired and slots for which the issue-pipeline was blocked due to recovery from earlier incorrect speculation. For example; wasted work due to miss-predicted branches are categorized under Bad Speculation category. Incorrect data speculation followed by Memory Ordering Nukes is another example. SMT version; use when SMT is enabled and measuring per logical CPU."
-    },
-    {
-        "BriefDescription": "This category represents fraction of slots where no uops are being delivered due to a lack of required resources for accepting new uops in the Backend",
-        "MetricConstraint": "NO_NMI_WATCHDOG",
-        "MetricExpr": "1 - ( (IDQ_UOPS_NOT_DELIVERED.CORE / (4 * cycles)) + (( UOPS_ISSUED.ANY - UOPS_RETIRED.RETIRE_SLOTS + 4 * INT_MISC.RECOVERY_CYCLES ) / (4 * cycles)) + (UOPS_RETIRED.RETIRE_SLOTS / (4 * cycles)) )",
-        "MetricGroup": "TopdownL1",
-        "MetricName": "Backend_Bound",
-        "PublicDescription": "This category represents fraction of slots where no uops are being delivered due to a lack of required resources for accepting new uops in the Backend. Backend is the portion of the processor core where the out-of-order scheduler dispatches ready uops into their respective execution units; and once completed these uops get retired according to program order. For example; stalls due to data-cache misses or stalls due to the divider unit being overloaded are both categorized under Backend Bound. Backend Bound is further divided into two main categories: Memory Bound and Core Bound."
-    },
-    {
-        "BriefDescription": "This category represents fraction of slots where no uops are being delivered due to a lack of required resources for accepting new uops in the Backend. SMT version; use when SMT is enabled and measuring per logical CPU.",
-        "MetricExpr": "1 - ( (IDQ_UOPS_NOT_DELIVERED.CORE / (4 * ( ( CPU_CLK_UNHALTED.THREAD / 2 ) * ( 1 + CPU_CLK_UNHALTED.ONE_THREAD_ACTIVE / CPU_CLK_UNHALTED.REF_XCLK ) ))) + (( UOPS_ISSUED.ANY - UOPS_RETIRED.RETIRE_SLOTS + 4 * ( INT_MISC.RECOVERY_CYCLES_ANY / 2 ) ) / (4 * ( ( CPU_CLK_UNHALTED.THREAD / 2 ) * ( 1 + CPU_CLK_UNHALTED.ONE_THREAD_ACTIVE / CPU_CLK_UNHALTED.REF_XCLK ) ))) + (UOPS_RETIRED.RETIRE_SLOTS / (4 * ( ( CPU_CLK_UNHALTED.THREAD / 2 ) * ( 1 + CPU_CLK_UNHALTED.ONE_THREAD_ACTIVE / CPU_CLK_UNHALTED.REF_XCLK ) ))) )",
-        "MetricGroup": "TopdownL1_SMT",
-        "MetricName": "Backend_Bound_SMT",
-        "PublicDescription": "This category represents fraction of slots where no uops are being delivered due to a lack of required resources for accepting new uops in the Backend. Backend is the portion of the processor core where the out-of-order scheduler dispatches ready uops into their respective execution units; and once completed these uops get retired according to program order. For example; stalls due to data-cache misses or stalls due to the divider unit being overloaded are both categorized under Backend Bound. Backend Bound is further divided into two main categories: Memory Bound and Core Bound. SMT version; use when SMT is enabled and measuring per logical CPU."
-    },
-    {
-        "BriefDescription": "This category represents fraction of slots utilized by useful work i.e. issued uops that eventually get retired",
-        "MetricExpr": "UOPS_RETIRED.RETIRE_SLOTS / (4 * cycles)",
-        "MetricGroup": "TopdownL1",
-        "MetricName": "Retiring",
-        "PublicDescription": "This category represents fraction of slots utilized by useful work i.e. issued uops that eventually get retired. Ideally; all pipeline slots would be attributed to the Retiring category.  Retiring of 100% would indicate the maximum 4 uops retired per cycle has been achieved.  Maximizing Retiring typically increases the Instruction-Per-Cycle metric. Note that a high Retiring value does not necessary mean there is no room for more performance.  For example; Microcode assists are categorized under Retiring. They hurt performance and can often be avoided. "
-    },
-    {
-        "BriefDescription": "This category represents fraction of slots utilized by useful work i.e. issued uops that eventually get retired. SMT version; use when SMT is enabled and measuring per logical CPU.",
-        "MetricExpr": "UOPS_RETIRED.RETIRE_SLOTS / (4 * ( ( CPU_CLK_UNHALTED.THREAD / 2 ) * ( 1 + CPU_CLK_UNHALTED.ONE_THREAD_ACTIVE / CPU_CLK_UNHALTED.REF_XCLK ) ))",
-        "MetricGroup": "TopdownL1_SMT",
-        "MetricName": "Retiring_SMT",
-        "PublicDescription": "This category represents fraction of slots utilized by useful work i.e. issued uops that eventually get retired. Ideally; all pipeline slots would be attributed to the Retiring category.  Retiring of 100% would indicate the maximum 4 uops retired per cycle has been achieved.  Maximizing Retiring typically increases the Instruction-Per-Cycle metric. Note that a high Retiring value does not necessary mean there is no room for more performance.  For example; Microcode assists are categorized under Retiring. They hurt performance and can often be avoided. SMT version; use when SMT is enabled and measuring per logical CPU."
-    },
-    {
         "BriefDescription": "Instructions Per Cycle (per Logical Processor)",
         "MetricExpr": "INST_RETIRED.ANY / CPU_CLK_UNHALTED.THREAD",
         "MetricGroup": "Summary",
@@ -71,49 +14,79 @@
     {
         "BriefDescription": "Instruction per taken branch",
         "MetricExpr": "INST_RETIRED.ANY / BR_INST_RETIRED.NEAR_TAKEN",
-        "MetricGroup": "Branches;Fetch_BW;PGO",
+        "MetricGroup": "Branches;FetchBW;PGO",
         "MetricName": "IpTB"
     },
     {
         "BriefDescription": "Cycles Per Instruction (per Logical Processor)",
-        "MetricExpr": "1 / (INST_RETIRED.ANY / cycles)",
-        "MetricGroup": "Pipeline;Summary",
+        "MetricExpr": "1 / (INST_RETIRED.ANY / CPU_CLK_UNHALTED.THREAD)",
+        "MetricGroup": "Pipeline",
         "MetricName": "CPI"
     },
     {
         "BriefDescription": "Per-Logical Processor actual clocks when the Logical Processor is active.",
         "MetricExpr": "CPU_CLK_UNHALTED.THREAD",
-        "MetricGroup": "Summary",
+        "MetricGroup": "Pipeline",
         "MetricName": "CLKS"
     },
     {
-        "BriefDescription": "Total issue-pipeline slots (per-Physical Core till ICL; per-Logical Processor ICL onward)",
-        "MetricExpr": "4 * cycles",
-        "MetricGroup": "TopDownL1",
-        "MetricName": "SLOTS"
+        "BriefDescription": "Instructions Per Cycle (per physical core)",
+        "MetricExpr": "INST_RETIRED.ANY / CPU_CLK_UNHALTED.THREAD",
+        "MetricGroup": "SMT;TmaL1",
+        "MetricName": "CoreIPC"
     },
     {
-        "BriefDescription": "Total issue-pipeline slots (per-Physical Core till ICL; per-Logical Processor ICL onward)",
-        "MetricExpr": "4 * ( ( CPU_CLK_UNHALTED.THREAD / 2 ) * ( 1 + CPU_CLK_UNHALTED.ONE_THREAD_ACTIVE / CPU_CLK_UNHALTED.REF_XCLK ) )",
-        "MetricGroup": "TopDownL1_SMT",
-        "MetricName": "SLOTS_SMT"
+        "BriefDescription": "Instructions Per Cycle (per physical core)",
+        "MetricExpr": "INST_RETIRED.ANY / ( ( CPU_CLK_UNHALTED.THREAD / 2 ) * ( 1 + CPU_CLK_UNHALTED.ONE_THREAD_ACTIVE / CPU_CLK_UNHALTED.REF_XCLK ) )",
+        "MetricGroup": "SMT;TmaL1",
+        "MetricName": "CoreIPC_SMT"
+    },
+    {
+        "BriefDescription": "Floating Point Operations Per Cycle",
+        "MetricExpr": "( 1 * ( FP_ARITH_INST_RETIRED.SCALAR_SINGLE + FP_ARITH_INST_RETIRED.SCALAR_DOUBLE ) + 2 * FP_ARITH_INST_RETIRED.128B_PACKED_DOUBLE + 4 * ( FP_ARITH_INST_RETIRED.128B_PACKED_SINGLE + FP_ARITH_INST_RETIRED.256B_PACKED_DOUBLE ) + 8 * ( FP_ARITH_INST_RETIRED.256B_PACKED_SINGLE + FP_ARITH_INST_RETIRED.512B_PACKED_DOUBLE ) + 16 * FP_ARITH_INST_RETIRED.512B_PACKED_SINGLE ) / CPU_CLK_UNHALTED.THREAD",
+        "MetricGroup": "Flops",
+        "MetricName": "FLOPc"
+    },
+    {
+        "BriefDescription": "Floating Point Operations Per Cycle",
+        "MetricExpr": "( 1 * ( FP_ARITH_INST_RETIRED.SCALAR_SINGLE + FP_ARITH_INST_RETIRED.SCALAR_DOUBLE ) + 2 * FP_ARITH_INST_RETIRED.128B_PACKED_DOUBLE + 4 * ( FP_ARITH_INST_RETIRED.128B_PACKED_SINGLE + FP_ARITH_INST_RETIRED.256B_PACKED_DOUBLE ) + 8 * ( FP_ARITH_INST_RETIRED.256B_PACKED_SINGLE + FP_ARITH_INST_RETIRED.512B_PACKED_DOUBLE ) + 16 * FP_ARITH_INST_RETIRED.512B_PACKED_SINGLE ) / ( ( CPU_CLK_UNHALTED.THREAD / 2 ) * ( 1 + CPU_CLK_UNHALTED.ONE_THREAD_ACTIVE / CPU_CLK_UNHALTED.REF_XCLK ) )",
+        "MetricGroup": "Flops_SMT",
+        "MetricName": "FLOPc_SMT"
+    },
+    {
+        "BriefDescription": "Instruction-Level-Parallelism (average number of uops executed when there is at least 1 uop executed)",
+        "MetricExpr": "UOPS_EXECUTED.THREAD / (( UOPS_EXECUTED.CORE_CYCLES_GE_1 / 2 ) if #SMT_on else UOPS_EXECUTED.CORE_CYCLES_GE_1)",
+        "MetricGroup": "Pipeline;PortsUtil",
+        "MetricName": "ILP"
+    },
+    {
+        "BriefDescription": "Number of Instructions per non-speculative Branch Misprediction (JEClear)",
+        "MetricExpr": "INST_RETIRED.ANY / BR_MISP_RETIRED.ALL_BRANCHES",
+        "MetricGroup": "BrMispredicts",
+        "MetricName": "IpMispredict"
+    },
+    {
+        "BriefDescription": "Core actual clocks when any Logical Processor is active on the Physical Core",
+        "MetricExpr": "( CPU_CLK_UNHALTED.THREAD_ANY / 2 ) if #SMT_on else CPU_CLK_UNHALTED.THREAD",
+        "MetricGroup": "SMT",
+        "MetricName": "CORE_CLKS"
     },
     {
         "BriefDescription": "Instructions per Load (lower number means higher occurrence rate)",
         "MetricExpr": "INST_RETIRED.ANY / MEM_INST_RETIRED.ALL_LOADS",
-        "MetricGroup": "Instruction_Type",
+        "MetricGroup": "InsType",
         "MetricName": "IpLoad"
     },
     {
         "BriefDescription": "Instructions per Store (lower number means higher occurrence rate)",
         "MetricExpr": "INST_RETIRED.ANY / MEM_INST_RETIRED.ALL_STORES",
-        "MetricGroup": "Instruction_Type",
+        "MetricGroup": "InsType",
         "MetricName": "IpStore"
     },
     {
         "BriefDescription": "Instructions per Branch (lower number means higher occurrence rate)",
         "MetricExpr": "INST_RETIRED.ANY / BR_INST_RETIRED.ALL_BRANCHES",
-        "MetricGroup": "Branches;Instruction_Type",
+        "MetricGroup": "Branches;InsType",
         "MetricName": "IpBranch"
     },
     {
@@ -131,176 +104,122 @@
     {
         "BriefDescription": "Instructions per Floating Point (FP) Operation (lower number means higher occurrence rate)",
         "MetricExpr": "INST_RETIRED.ANY / ( 1 * ( FP_ARITH_INST_RETIRED.SCALAR_SINGLE + FP_ARITH_INST_RETIRED.SCALAR_DOUBLE ) + 2 * FP_ARITH_INST_RETIRED.128B_PACKED_DOUBLE + 4 * ( FP_ARITH_INST_RETIRED.128B_PACKED_SINGLE + FP_ARITH_INST_RETIRED.256B_PACKED_DOUBLE ) + 8 * ( FP_ARITH_INST_RETIRED.256B_PACKED_SINGLE + FP_ARITH_INST_RETIRED.512B_PACKED_DOUBLE ) + 16 * FP_ARITH_INST_RETIRED.512B_PACKED_SINGLE )",
-        "MetricGroup": "FLOPS;FP_Arith;Instruction_Type",
+        "MetricGroup": "Flops;FpArith;InsType",
         "MetricName": "IpFLOP"
     },
     {
-        "BriefDescription": "Total number of retired Instructions",
+        "BriefDescription": "Total number of retired Instructions, Sample with: INST_RETIRED.PREC_DIST",
         "MetricExpr": "INST_RETIRED.ANY",
-        "MetricGroup": "Summary;TopDownL1",
+        "MetricGroup": "Summary;TmaL1",
         "MetricName": "Instructions"
     },
     {
         "BriefDescription": "Fraction of Uops delivered by the DSB (aka Decoded ICache; or Uop Cache)",
         "MetricExpr": "IDQ.DSB_UOPS / (IDQ.DSB_UOPS + IDQ.MITE_UOPS + IDQ.MS_UOPS)",
-        "MetricGroup": "DSB;Fetch_BW",
+        "MetricGroup": "DSB;FetchBW",
         "MetricName": "DSB_Coverage"
     },
     {
-        "BriefDescription": "Instructions Per Cycle (per physical core)",
-        "MetricExpr": "INST_RETIRED.ANY / cycles",
-        "MetricGroup": "SMT;TopDownL1",
-        "MetricName": "CoreIPC"
-    },
-    {
-        "BriefDescription": "Instructions Per Cycle (per physical core)",
-        "MetricExpr": "INST_RETIRED.ANY / ( ( CPU_CLK_UNHALTED.THREAD / 2 ) * ( 1 + CPU_CLK_UNHALTED.ONE_THREAD_ACTIVE / CPU_CLK_UNHALTED.REF_XCLK ) )",
-        "MetricGroup": "SMT;TopDownL1",
-        "MetricName": "CoreIPC_SMT"
-    },
-    {
-        "BriefDescription": "Floating Point Operations Per Cycle",
-        "MetricExpr": "( 1 * ( FP_ARITH_INST_RETIRED.SCALAR_SINGLE + FP_ARITH_INST_RETIRED.SCALAR_DOUBLE ) + 2 * FP_ARITH_INST_RETIRED.128B_PACKED_DOUBLE + 4 * ( FP_ARITH_INST_RETIRED.128B_PACKED_SINGLE + FP_ARITH_INST_RETIRED.256B_PACKED_DOUBLE ) + 8 * ( FP_ARITH_INST_RETIRED.256B_PACKED_SINGLE + FP_ARITH_INST_RETIRED.512B_PACKED_DOUBLE ) + 16 * FP_ARITH_INST_RETIRED.512B_PACKED_SINGLE ) / cycles",
-        "MetricGroup": "FLOPS",
-        "MetricName": "FLOPc"
-    },
-    {
-        "BriefDescription": "Floating Point Operations Per Cycle",
-        "MetricExpr": "( 1 * ( FP_ARITH_INST_RETIRED.SCALAR_SINGLE + FP_ARITH_INST_RETIRED.SCALAR_DOUBLE ) + 2 * FP_ARITH_INST_RETIRED.128B_PACKED_DOUBLE + 4 * ( FP_ARITH_INST_RETIRED.128B_PACKED_SINGLE + FP_ARITH_INST_RETIRED.256B_PACKED_DOUBLE ) + 8 * ( FP_ARITH_INST_RETIRED.256B_PACKED_SINGLE + FP_ARITH_INST_RETIRED.512B_PACKED_DOUBLE ) + 16 * FP_ARITH_INST_RETIRED.512B_PACKED_SINGLE ) / ( ( CPU_CLK_UNHALTED.THREAD / 2 ) * ( 1 + CPU_CLK_UNHALTED.ONE_THREAD_ACTIVE / CPU_CLK_UNHALTED.REF_XCLK ) )",
-        "MetricGroup": "FLOPS_SMT",
-        "MetricName": "FLOPc_SMT"
-    },
-    {
-        "BriefDescription": "Instruction-Level-Parallelism (average number of uops executed when there is at least 1 uop executed)",
-        "MetricExpr": "UOPS_EXECUTED.THREAD / ( UOPS_EXECUTED.CORE_CYCLES_GE_1 / 2 )",
-        "MetricGroup": "Pipeline;Ports_Utilization",
-        "MetricName": "ILP"
-    },
-    {
-        "BriefDescription": "Branch Misprediction Cost: Fraction of TopDown slots wasted per non-speculative branch misprediction (jeclear)",
-        "MetricExpr": "( ((BR_MISP_RETIRED.ALL_BRANCHES / ( BR_MISP_RETIRED.ALL_BRANCHES + MACHINE_CLEARS.COUNT )) * (( UOPS_ISSUED.ANY - UOPS_RETIRED.RETIRE_SLOTS + 4 * INT_MISC.RECOVERY_CYCLES ) / (4 * cycles))) + (4 * ( IDQ_UOPS_NOT_DELIVERED.CYCLES_0_UOPS_DELIV.CORE - ( FRONTEND_RETIRED.LATENCY_GE_1 - FRONTEND_RETIRED.LATENCY_GE_2 ) / (UOPS_RETIRED.RETIRE_SLOTS / UOPS_ISSUED.ANY) ) / (4 * cycles)) * (( INT_MISC.CLEAR_RESTEER_CYCLES + 9 * BACLEARS.ANY ) / cycles) / (4 * ( IDQ_UOPS_NOT_DELIVERED.CYCLES_0_UOPS_DELIV.CORE - ( FRONTEND_RETIRED.LATENCY_GE_1 - FRONTEND_RETIRED.LATENCY_GE_2 ) / (UOPS_RETIRED.RETIRE_SLOTS / UOPS_ISSUED.ANY) ) / (4 * cycles)) ) * (4 * cycles) / BR_MISP_RETIRED.ALL_BRANCHES",
-        "MetricGroup": "BrMispredicts",
-        "MetricName": "Branch_Misprediction_Cost"
-    },
-    {
-        "BriefDescription": "Branch Misprediction Cost: Fraction of TopDown slots wasted per non-speculative branch misprediction (jeclear)",
-        "MetricExpr": "( ((BR_MISP_RETIRED.ALL_BRANCHES / ( BR_MISP_RETIRED.ALL_BRANCHES + MACHINE_CLEARS.COUNT )) * (( UOPS_ISSUED.ANY - UOPS_RETIRED.RETIRE_SLOTS + 4 * ( INT_MISC.RECOVERY_CYCLES_ANY / 2 ) ) / (4 * ( ( CPU_CLK_UNHALTED.THREAD / 2 ) * ( 1 + CPU_CLK_UNHALTED.ONE_THREAD_ACTIVE / CPU_CLK_UNHALTED.REF_XCLK ) )))) + (4 * ( IDQ_UOPS_NOT_DELIVERED.CYCLES_0_UOPS_DELIV.CORE - ( FRONTEND_RETIRED.LATENCY_GE_1 - FRONTEND_RETIRED.LATENCY_GE_2 ) / (UOPS_RETIRED.RETIRE_SLOTS / UOPS_ISSUED.ANY) ) / (4 * ( ( CPU_CLK_UNHALTED.THREAD / 2 ) * ( 1 + CPU_CLK_UNHALTED.ONE_THREAD_ACTIVE / CPU_CLK_UNHALTED.REF_XCLK ) ))) * (( INT_MISC.CLEAR_RESTEER_CYCLES + 9 * BACLEARS.ANY ) / cycles) / (4 * ( IDQ_UOPS_NOT_DELIVERED.CYCLES_0_UOPS_DELIV.CORE - ( FRONTEND_RETIRED.LATENCY_GE_1 - FRONTEND_RETIRED.LATENCY_GE_2 ) / (UOPS_RETIRED.RETIRE_SLOTS / UOPS_ISSUED.ANY) ) / (4 * ( ( CPU_CLK_UNHALTED.THREAD / 2 ) * ( 1 + CPU_CLK_UNHALTED.ONE_THREAD_ACTIVE / CPU_CLK_UNHALTED.REF_XCLK ) ))) ) * (4 * ( ( CPU_CLK_UNHALTED.THREAD / 2 ) * ( 1 + CPU_CLK_UNHALTED.ONE_THREAD_ACTIVE / CPU_CLK_UNHALTED.REF_XCLK ) )) / BR_MISP_RETIRED.ALL_BRANCHES",
-        "MetricGroup": "BrMispredicts_SMT",
-        "MetricName": "Branch_Misprediction_Cost_SMT"
-    },
-    {
-        "BriefDescription": "Number of Instructions per non-speculative Branch Misprediction (JEClear)",
-        "MetricExpr": "INST_RETIRED.ANY / BR_MISP_RETIRED.ALL_BRANCHES",
-        "MetricGroup": "BrMispredicts",
-        "MetricName": "IpMispredict"
-    },
-    {
-        "BriefDescription": "Core actual clocks when any Logical Processor is active on the Physical Core",
-        "MetricExpr": "( ( CPU_CLK_UNHALTED.THREAD / 2 ) * ( 1 + CPU_CLK_UNHALTED.ONE_THREAD_ACTIVE / CPU_CLK_UNHALTED.REF_XCLK ) )",
-        "MetricGroup": "SMT",
-        "MetricName": "CORE_CLKS"
-    },
-    {
         "BriefDescription": "Actual Average Latency for L1 data-cache miss demand loads (in core cycles)",
         "MetricExpr": "L1D_PEND_MISS.PENDING / ( MEM_LOAD_RETIRED.L1_MISS + MEM_LOAD_RETIRED.FB_HIT )",
-        "MetricGroup": "Memory_Bound;Memory_Lat",
+        "MetricGroup": "MemoryBound;MemoryLat",
         "MetricName": "Load_Miss_Real_Latency"
     },
     {
         "BriefDescription": "Memory-Level-Parallelism (average number of L1 miss demand load when there is at least one such miss. Per-Logical Processor)",
         "MetricExpr": "L1D_PEND_MISS.PENDING / L1D_PEND_MISS.PENDING_CYCLES",
-        "MetricGroup": "Memory_Bound;Memory_BW",
+        "MetricGroup": "MemoryBound;MemoryBW",
         "MetricName": "MLP"
     },
     {
         "BriefDescription": "Utilization of the core's Page Walker(s) serving STLB misses triggered by instruction/Load/Store accesses",
         "MetricConstraint": "NO_NMI_WATCHDOG",
-        "MetricExpr": "( ITLB_MISSES.WALK_PENDING + DTLB_LOAD_MISSES.WALK_PENDING + DTLB_STORE_MISSES.WALK_PENDING + EPT.WALK_PENDING ) / ( 2 * cycles )",
-        "MetricGroup": "TLB",
+        "MetricExpr": "( ITLB_MISSES.WALK_PENDING + DTLB_LOAD_MISSES.WALK_PENDING + DTLB_STORE_MISSES.WALK_PENDING + EPT.WALK_PENDING ) / ( 2 * CORE_CLKS )",
+        "MetricGroup": "MemoryTLB",
         "MetricName": "Page_Walks_Utilization"
     },
     {
-        "BriefDescription": "Utilization of the core's Page Walker(s) serving STLB misses triggered by instruction/Load/Store accesses",
-        "MetricExpr": "( ITLB_MISSES.WALK_PENDING + DTLB_LOAD_MISSES.WALK_PENDING + DTLB_STORE_MISSES.WALK_PENDING + EPT.WALK_PENDING ) / ( 2 * ( ( CPU_CLK_UNHALTED.THREAD / 2 ) * ( 1 + CPU_CLK_UNHALTED.ONE_THREAD_ACTIVE / CPU_CLK_UNHALTED.REF_XCLK ) ) )",
-        "MetricGroup": "TLB_SMT",
-        "MetricName": "Page_Walks_Utilization_SMT"
-    },
-    {
         "BriefDescription": "Average data fill bandwidth to the L1 data cache [GB / sec]",
         "MetricExpr": "64 * L1D.REPLACEMENT / 1000000000 / duration_time",
-        "MetricGroup": "Memory_BW",
+        "MetricGroup": "MemoryBW",
         "MetricName": "L1D_Cache_Fill_BW"
     },
     {
         "BriefDescription": "Average data fill bandwidth to the L2 cache [GB / sec]",
         "MetricExpr": "64 * L2_LINES_IN.ALL / 1000000000 / duration_time",
-        "MetricGroup": "Memory_BW",
+        "MetricGroup": "MemoryBW",
         "MetricName": "L2_Cache_Fill_BW"
     },
     {
         "BriefDescription": "Average per-core data fill bandwidth to the L3 cache [GB / sec]",
         "MetricExpr": "64 * LONGEST_LAT_CACHE.MISS / 1000000000 / duration_time",
-        "MetricGroup": "Memory_BW",
+        "MetricGroup": "MemoryBW",
         "MetricName": "L3_Cache_Fill_BW"
     },
     {
-        "BriefDescription": "Average per-core data fill bandwidth to the L3 cache [GB / sec]",
+        "BriefDescription": "Average per-core data access bandwidth to the L3 cache [GB / sec]",
         "MetricExpr": "64 * OFFCORE_REQUESTS.ALL_REQUESTS / 1000000000 / duration_time",
-        "MetricGroup": "Memory_BW;Offcore",
+        "MetricGroup": "MemoryBW;Offcore",
         "MetricName": "L3_Cache_Access_BW"
     },
     {
         "BriefDescription": "L1 cache true misses per kilo instruction for retired demand loads",
         "MetricExpr": "1000 * MEM_LOAD_RETIRED.L1_MISS / INST_RETIRED.ANY",
-        "MetricGroup": "Cache_Misses",
+        "MetricGroup": "CacheMisses",
         "MetricName": "L1MPKI"
     },
     {
         "BriefDescription": "L2 cache true misses per kilo instruction for retired demand loads",
         "MetricExpr": "1000 * MEM_LOAD_RETIRED.L2_MISS / INST_RETIRED.ANY",
-        "MetricGroup": "Cache_Misses",
+        "MetricGroup": "CacheMisses",
         "MetricName": "L2MPKI"
     },
     {
         "BriefDescription": "L2 cache misses per kilo instruction for all request types (including speculative)",
         "MetricExpr": "1000 * L2_RQSTS.MISS / INST_RETIRED.ANY",
-        "MetricGroup": "Cache_Misses;Offcore",
+        "MetricGroup": "CacheMisses;Offcore",
         "MetricName": "L2MPKI_All"
     },
     {
         "BriefDescription": "L2 cache hits per kilo instruction for all request types (including speculative)",
         "MetricExpr": "1000 * ( L2_RQSTS.REFERENCES - L2_RQSTS.MISS ) / INST_RETIRED.ANY",
-        "MetricGroup": "Cache_Misses",
+        "MetricGroup": "CacheMisses",
         "MetricName": "L2HPKI_All"
     },
     {
         "BriefDescription": "L3 cache true misses per kilo instruction for retired demand loads",
         "MetricExpr": "1000 * MEM_LOAD_RETIRED.L3_MISS / INST_RETIRED.ANY",
-        "MetricGroup": "Cache_Misses",
+        "MetricGroup": "CacheMisses",
         "MetricName": "L3MPKI"
     },
     {
         "BriefDescription": "Rate of silent evictions from the L2 cache per Kilo instruction where the evicted lines are dropped (no writeback to L3 or memory)",
         "MetricExpr": "1000 * L2_LINES_OUT.SILENT / INST_RETIRED.ANY",
-        "MetricGroup": "",
+        "MetricGroup": "L2Evicts;Server",
         "MetricName": "L2_Evictions_Silent_PKI"
     },
     {
         "BriefDescription": "Rate of non silent evictions from the L2 cache per Kilo instruction",
         "MetricExpr": "1000 * L2_LINES_OUT.NON_SILENT / INST_RETIRED.ANY",
-        "MetricGroup": "",
+        "MetricGroup": "L2Evicts;Server",
         "MetricName": "L2_Evictions_NonSilent_PKI"
     },
     {
         "BriefDescription": "Average CPU Utilization",
         "MetricExpr": "CPU_CLK_UNHALTED.REF_TSC / msr@tsc@",
-        "MetricGroup": "Summary",
+        "MetricGroup": "HPC;Summary",
         "MetricName": "CPU_Utilization"
     },
     {
+        "BriefDescription": "Measured Average Frequency for unhalted processors [GHz]",
+        "MetricExpr": "(CPU_CLK_UNHALTED.THREAD / CPU_CLK_UNHALTED.REF_TSC) * msr@tsc@ / 1000000000 / duration_time",
+        "MetricGroup": "Summary;Power",
+        "MetricName": "Average_Frequency"
+    },
+    {
         "BriefDescription": "Giga Floating Point Operations Per Second",
         "MetricExpr": "( ( 1 * ( FP_ARITH_INST_RETIRED.SCALAR_SINGLE + FP_ARITH_INST_RETIRED.SCALAR_DOUBLE ) + 2 * FP_ARITH_INST_RETIRED.128B_PACKED_DOUBLE + 4 * ( FP_ARITH_INST_RETIRED.128B_PACKED_SINGLE + FP_ARITH_INST_RETIRED.256B_PACKED_DOUBLE ) + 8 * ( FP_ARITH_INST_RETIRED.256B_PACKED_SINGLE + FP_ARITH_INST_RETIRED.512B_PACKED_DOUBLE ) + 16 * FP_ARITH_INST_RETIRED.512B_PACKED_SINGLE ) / 1000000000 ) / duration_time",
-        "MetricGroup": "FLOPS;Summary",
+        "MetricGroup": "Flops;HPC",
         "MetricName": "GFLOPs"
     },
     {
@@ -311,44 +230,44 @@
     },
     {
         "BriefDescription": "Fraction of cycles where both hardware Logical Processors were active",
-        "MetricExpr": "1 - CPU_CLK_THREAD_UNHALTED.ONE_THREAD_ACTIVE / ( CPU_CLK_THREAD_UNHALTED.REF_XCLK_ANY / 2 )",
-        "MetricGroup": "SMT;Summary",
+        "MetricExpr": "1 - CPU_CLK_UNHALTED.ONE_THREAD_ACTIVE / ( CPU_CLK_UNHALTED.REF_XCLK_ANY / 2 ) if #SMT_on else 0",
+        "MetricGroup": "SMT",
         "MetricName": "SMT_2T_Utilization"
     },
     {
         "BriefDescription": "Fraction of cycles spent in the Operating System (OS) Kernel mode",
-        "MetricExpr": "CPU_CLK_UNHALTED.THREAD:k / CPU_CLK_UNHALTED.THREAD",
+        "MetricExpr": "CPU_CLK_UNHALTED.THREAD_P:k / CPU_CLK_UNHALTED.THREAD",
         "MetricGroup": "OS",
         "MetricName": "Kernel_Utilization"
     },
     {
         "BriefDescription": "Average external Memory Bandwidth Use for reads and writes [GB / sec]",
-        "MetricExpr": "( ( ( uncore_imc@cas_count_read@ + uncore_imc@cas_count_write@ ) * 1048576 ) / 1000000000 ) / duration_time",
-        "MetricGroup": "Memory_BW;SoC",
+        "MetricExpr": "( 64 * ( uncore_imc@cas_count_read@ + uncore_imc@cas_count_write@ ) / 1000000000 ) / duration_time",
+        "MetricGroup": "HPC;MemoryBW;SoC",
         "MetricName": "DRAM_BW_Use"
     },
     {
         "BriefDescription": "Average latency of data read request to external memory (in nanoseconds). Accounts for demand loads and L1/L2 prefetches",
         "MetricExpr": "1000000000 * ( cha@event\\=0x36\\,umask\\=0x21\\,config\\=0x40433@ / cha@event\\=0x35\\,umask\\=0x21\\,config\\=0x40433@ ) / ( cha_0@event\\=0x0@ / duration_time )",
-        "MetricGroup": "Memory_Lat;SoC",
+        "MetricGroup": "MemoryLat;SoC",
         "MetricName": "MEM_Read_Latency"
     },
     {
         "BriefDescription": "Average number of parallel data read requests to external memory. Accounts for demand loads and L1/L2 prefetches",
         "MetricExpr": "cha@event\\=0x36\\,umask\\=0x21\\,config\\=0x40433@ / cha@event\\=0x36\\,umask\\=0x21\\,config\\=0x40433\\,thresh\\=1@",
-        "MetricGroup": "Memory_BW;SoC",
+        "MetricGroup": "MemoryBW;SoC",
         "MetricName": "MEM_Parallel_Reads"
     },
     {
         "BriefDescription": "Average IO (network or disk) Bandwidth Use for Writes [GB / sec]",
         "MetricExpr": "( UNC_IIO_DATA_REQ_OF_CPU.MEM_READ.PART0 + UNC_IIO_DATA_REQ_OF_CPU.MEM_READ.PART1 + UNC_IIO_DATA_REQ_OF_CPU.MEM_READ.PART2 + UNC_IIO_DATA_REQ_OF_CPU.MEM_READ.PART3 ) * 4 / 1000000000 / duration_time",
-        "MetricGroup": "IO_BW;SoC;Server",
+        "MetricGroup": "IoBW;SoC;Server",
         "MetricName": "IO_Write_BW"
     },
     {
         "BriefDescription": "Average IO (network or disk) Bandwidth Use for Reads [GB / sec]",
         "MetricExpr": "( UNC_IIO_DATA_REQ_OF_CPU.MEM_WRITE.PART0 + UNC_IIO_DATA_REQ_OF_CPU.MEM_WRITE.PART1 + UNC_IIO_DATA_REQ_OF_CPU.MEM_WRITE.PART2 + UNC_IIO_DATA_REQ_OF_CPU.MEM_WRITE.PART3 ) * 4 / 1000000000 / duration_time",
-        "MetricGroup": "IO_BW;SoC;Server",
+        "MetricGroup": "IoBW;SoC;Server",
         "MetricName": "IO_Read_BW"
     },
     {
@@ -359,7 +278,7 @@
     },
     {
         "BriefDescription": "Instructions per Far Branch ( Far Branches apply upon transition from application to operating system, handling interrupts, exceptions) [lower number means higher occurrence rate]",
-        "MetricExpr": "INST_RETIRED.ANY / ( BR_INST_RETIRED.FAR_BRANCH / 2 )",
+        "MetricExpr": "INST_RETIRED.ANY / BR_INST_RETIRED.FAR_BRANCH:u",
         "MetricGroup": "Branches;OS",
         "MetricName": "IpFarBranch"
     },
diff --git a/tools/perf/pmu-events/arch/x86/skylakex/uncore-memory.json b/tools/perf/pmu-events/arch/x86/skylakex/uncore-memory.json
index b80b5d66385d..0b66e6af8177 100644
--- a/tools/perf/pmu-events/arch/x86/skylakex/uncore-memory.json
+++ b/tools/perf/pmu-events/arch/x86/skylakex/uncore-memory.json
@@ -65,15 +65,6 @@
         "Unit": "iMC"
     },
     {
-        "BriefDescription": "Pre-charge for writes",
-        "Counter": "0,1,2,3",
-        "EventCode": "0x2",
-        "EventName": "UNC_M_PRE_COUNT.WR",
-        "PerPkg": "1",
-        "UMask": "0x8",
-        "Unit": "iMC"
-    },
-    {
         "BriefDescription": "DRAM Page Activate commands sent due to a write request",
         "Counter": "0,1,2,3",
         "EventCode": "0x1",
diff --git a/tools/perf/pmu-events/arch/x86/skylakex/uncore-other.json b/tools/perf/pmu-events/arch/x86/skylakex/uncore-other.json
index d7a0270de983..6ed92bc5c129 100644
--- a/tools/perf/pmu-events/arch/x86/skylakex/uncore-other.json
+++ b/tools/perf/pmu-events/arch/x86/skylakex/uncore-other.json
@@ -104,15 +104,6 @@
         "Unit": "CHA"
     },
     {
-        "BriefDescription": "write requests from remote home agent",
-        "Counter": "0,1,2,3",
-        "EventCode": "0x50",
-        "EventName": "UNC_CHA_REQUESTS.WRITES_REMOTE",
-        "PerPkg": "1",
-        "UMask": "0x08",
-        "Unit": "CHA"
-    },
-    {
         "BriefDescription": "UPI interconnect send bandwidth for payload. Derived from unc_upi_txl_flits.all_data",
         "Counter": "0,1,2,3",
         "EventCode": "0x2",
@@ -533,7 +524,7 @@
         "EventCode": "0x5C",
         "EventName": "UNC_CHA_SNOOP_RESP.RSP_WBWB",
         "PerPkg": "1",
-        "PublicDescription": "Counts when a transaction with the opcode type Rsp*WB Snoop Response was received which indicates which indicates the data was written back to it's home.  This is returned when a non-RFO request hits a cacheline in the Modified state. The Cache can either downgrade the cacheline to a S (Shared) or I (Invalid) state depending on how the system has been configured.  This response will also be sent when a cache requests E (Exclusive) ownership of a cache line without receiving data, because the cache must acquire ownership.",
+        "PublicDescription": "Counts when a transaction with the opcode type Rsp*WB Snoop Response was received which indicates which indicates the data was written back to it's home.  This is returned when a non-RFO request hits a cacheline in the Modified state. The Cache can either downgrade the cacheline to a S (Shared) or I (Invalid) state depending on how the system has been configured.  This reponse will also be sent when a cache requests E (Exclusive) ownership of a cache line without receiving data, because the cache must acquire ownership.",
         "UMask": "0x10",
         "Unit": "CHA"
     },
@@ -547,6 +538,98 @@
         "Unit": "IIO"
     },
     {
+        "BriefDescription": "PCIe Completion Buffer Inserts of completions with data: Part 0",
+        "Counter": "0,1,2,3",
+        "EventCode": "0xC2",
+        "EventName": "UNC_IIO_COMP_BUF_INSERTS.CMPD.PART0",
+        "FCMask": "0x4",
+        "PerPkg": "1",
+        "PortMask": "0x01",
+        "PublicDescription": "PCIe Completion Buffer Inserts of completions with data: Part 0",
+        "UMask": "0x03",
+        "Unit": "IIO"
+    },
+    {
+        "BriefDescription": "PCIe Completion Buffer Inserts of completions with data: Part 1",
+        "Counter": "0,1,2,3",
+        "EventCode": "0xC2",
+        "EventName": "UNC_IIO_COMP_BUF_INSERTS.CMPD.PART1",
+        "FCMask": "0x4",
+        "PerPkg": "1",
+        "PortMask": "0x02",
+        "PublicDescription": "PCIe Completion Buffer Inserts of completions with data: Part 1",
+        "UMask": "0x03",
+        "Unit": "IIO"
+    },
+    {
+        "BriefDescription": "PCIe Completion Buffer Inserts of completions with data: Part 2",
+        "Counter": "0,1,2,3",
+        "EventCode": "0xC2",
+        "EventName": "UNC_IIO_COMP_BUF_INSERTS.CMPD.PART2",
+        "FCMask": "0x4",
+        "PerPkg": "1",
+        "PortMask": "0x04",
+        "PublicDescription": "PCIe Completion Buffer Inserts of completions with data: Part 2",
+        "UMask": "0x03",
+        "Unit": "IIO"
+    },
+    {
+        "BriefDescription": "PCIe Completion Buffer Inserts of completions with data: Part 3",
+        "Counter": "0,1,2,3",
+        "EventCode": "0xC2",
+        "EventName": "UNC_IIO_COMP_BUF_INSERTS.CMPD.PART3",
+        "FCMask": "0x4",
+        "PerPkg": "1",
+        "PortMask": "0x08",
+        "PublicDescription": "PCIe Completion Buffer Inserts of completions with data: Part 3",
+        "UMask": "0x03",
+        "Unit": "IIO"
+    },
+    {
+        "BriefDescription": "PCIe Completion Buffer occupancy of completions with data: Part 0",
+        "Counter": "2,3",
+        "EventCode": "0xD5",
+        "EventName": "UNC_IIO_COMP_BUF_OCCUPANCY.CMPD.PART0",
+        "FCMask": "0x04",
+        "PerPkg": "1",
+        "PublicDescription": "PCIe Completion Buffer occupancy of completions with data: Part 0",
+        "UMask": "0x01",
+        "Unit": "IIO"
+    },
+    {
+        "BriefDescription": "PCIe Completion Buffer occupancy of completions with data: Part 1",
+        "Counter": "2,3",
+        "EventCode": "0xD5",
+        "EventName": "UNC_IIO_COMP_BUF_OCCUPANCY.CMPD.PART1",
+        "FCMask": "0x04",
+        "PerPkg": "1",
+        "PublicDescription": "PCIe Completion Buffer occupancy of completions with data: Part 1",
+        "UMask": "0x02",
+        "Unit": "IIO"
+    },
+    {
+        "BriefDescription": "PCIe Completion Buffer occupancy of completions with data: Part 2",
+        "Counter": "2,3",
+        "EventCode": "0xD5",
+        "EventName": "UNC_IIO_COMP_BUF_OCCUPANCY.CMPD.PART2",
+        "FCMask": "0x04",
+        "PerPkg": "1",
+        "PublicDescription": "PCIe Completion Buffer occupancy of completions with data: Part 2",
+        "UMask": "0x04",
+        "Unit": "IIO"
+    },
+    {
+        "BriefDescription": "PCIe Completion Buffer occupancy of completions with data: Part 3",
+        "Counter": "2,3",
+        "EventCode": "0xD5",
+        "EventName": "UNC_IIO_COMP_BUF_OCCUPANCY.CMPD.PART3",
+        "FCMask": "0x04",
+        "PerPkg": "1",
+        "PublicDescription": "PCIe Completion Buffer occupancy of completions with data: Part 3",
+        "UMask": "0x08",
+        "Unit": "IIO"
+    },
+    {
         "BriefDescription": "Read request for 4 bytes made by the CPU to IIO Part0",
         "Counter": "2,3",
         "EventCode": "0xC0",
@@ -1219,6 +1302,64 @@
         "Unit": "IIO"
     },
     {
+        "BriefDescription": "Total IRP occupancy of inbound read and write requests.",
+        "Counter": "0,1",
+        "EventCode": "0xF",
+        "EventName": "UNC_I_CACHE_TOTAL_OCCUPANCY.MEM",
+        "PerPkg": "1",
+        "PublicDescription": "Total IRP occupancy of inbound read and write requests.  This is effectively the sum of read occupancy and write occupancy.",
+        "UMask": "0x4",
+        "Unit": "IRP"
+    },
+    {
+        "BriefDescription": "PCIITOM request issued by the IRP unit to the mesh with the intention of writing a full cacheline.",
+        "Counter": "0,1",
+        "EventCode": "0x10",
+        "EventName": "UNC_I_COHERENT_OPS.PCITOM",
+        "PerPkg": "1",
+        "PublicDescription": "PCIITOM request issued by the IRP unit to the mesh with the intention of writing a full cacheline to coherent memory, without a RFO.  PCIITOM is a speculative Invalidate to Modified command that requests ownership of the cacheline and does not move data from the mesh to IRP cache.",
+        "UMask": "0x10",
+        "Unit": "IRP"
+    },
+    {
+        "BriefDescription": "RFO request issued by the IRP unit to the mesh with the intention of writing a partial cacheline.",
+        "Counter": "0,1",
+        "EventCode": "0x10",
+        "EventName": "UNC_I_COHERENT_OPS.RFO",
+        "PerPkg": "1",
+        "PublicDescription": "RFO request issued by the IRP unit to the mesh with the intention of writing a partial cacheline to coherent memory.  RFO is a Read For Ownership command that requests ownership of the cacheline and moves data from the mesh to IRP cache.",
+        "UMask": "0x8",
+        "Unit": "IRP"
+    },
+    {
+        "BriefDescription": "Inbound read requests received by the IRP and inserted into the FAF queue.",
+        "Counter": "0,1",
+        "EventCode": "0x18",
+        "EventName": "UNC_I_FAF_INSERTS",
+        "PerPkg": "1",
+        "PublicDescription": "Inbound read requests to coherent memory, received by the IRP and inserted into the Fire and Forget queue (FAF), a queue used for processing inbound reads in the IRP.",
+        "Unit": "IRP"
+    },
+    {
+        "BriefDescription": "Occupancy of the IRP FAF queue.",
+        "Counter": "0,1",
+        "EventCode": "0x19",
+        "EventName": "UNC_I_FAF_OCCUPANCY",
+        "PerPkg": "1",
+        "PublicDescription": "Occupancy of the IRP Fire and Forget (FAF) queue, a queue used for processing inbound reads in the IRP.",
+        "Unit": "IRP"
+    },
+    {
+        "BriefDescription": "Inbound write (fast path) requests received by the IRP.",
+        "Counter": "0,1",
+        "EventCode": "0x11",
+        "EventName": "UNC_I_TRANSACTIONS.WR_PREF",
+        "PerPkg": "1",
+        "PublicDescription": "Inbound write (fast path) requests to coherent memory, received by the IRP resulting in write ownership requests issued by IRP to the mesh.",
+        "UMask": "0x8",
+        "Unit": "IRP"
+    },
+    {
         "BriefDescription": "Traffic in which the M2M to iMC Bypass was not taken",
         "Counter": "0,1,2,3",
         "EventCode": "0x22",
@@ -1466,7 +1607,7 @@
         "EventCode": "0x57",
         "EventName": "UNC_M2M_PREFCAM_INSERTS",
         "PerPkg": "1",
-        "PublicDescription": "Counts when the M2M (Mesh to Memory) receives a prefetch request and inserts it into its outstanding prefetch queue.  Explanatory Side Note: the prefect queue is made from CAM: Content Addressable Memory",
+        "PublicDescription": "Counts when the M2M (Mesh to Memory) recieves a prefetch request and inserts it into its outstanding prefetch queue.  Explanatory Side Note: the prefect queue is made from CAM: Content Addressable Memory",
         "Unit": "M2M"
     },
     {
@@ -1605,7 +1746,7 @@
         "EventCode": "0x31",
         "EventName": "UNC_UPI_RxL_BYPASSED.SLOT0",
         "PerPkg": "1",
-        "PublicDescription": "Counts incoming FLITs (FLow control unITs) which bypassed the slot0 RxQ buffer (Receive Queue) and passed directly to the Egress.  This is a latency optimization, and should generally be the common case.  If this value is less than the number of FLITs transferred, it implies that there was queueing getting onto the ring, and thus the transactions saw higher latency.",
+        "PublicDescription": "Counts incoming FLITs (FLow control unITs) which bypassed the slot0 RxQ buffer (Receive Queue) and passed directly to the Egress.  This is a latency optimization, and should generally be the common case.  If this value is less than the number of FLITs transfered, it implies that there was queueing getting onto the ring, and thus the transactions saw higher latency.",
         "UMask": "0x1",
         "Unit": "UPI LL"
     },
@@ -1615,17 +1756,17 @@
         "EventCode": "0x31",
         "EventName": "UNC_UPI_RxL_BYPASSED.SLOT1",
         "PerPkg": "1",
-        "PublicDescription": "Counts incoming FLITs (FLow control unITs) which bypassed the slot1 RxQ buffer  (Receive Queue) and passed directly across the BGF and into the Egress.  This is a latency optimization, and should generally be the common case.  If this value is less than the number of FLITs transferred, it implies that there was queueing getting onto the ring, and thus the transactions saw higher latency.",
+        "PublicDescription": "Counts incoming FLITs (FLow control unITs) which bypassed the slot1 RxQ buffer  (Receive Queue) and passed directly across the BGF and into the Egress.  This is a latency optimization, and should generally be the common case.  If this value is less than the number of FLITs transfered, it implies that there was queueing getting onto the ring, and thus the transactions saw higher latency.",
         "UMask": "0x2",
         "Unit": "UPI LL"
     },
     {
-        "BriefDescription": "FLITs received which bypassed the Slot0 Receive Buffer",
+        "BriefDescription": "FLITs received which bypassed the Slot0 Recieve Buffer",
         "Counter": "0,1,2,3",
         "EventCode": "0x31",
         "EventName": "UNC_UPI_RxL_BYPASSED.SLOT2",
         "PerPkg": "1",
-        "PublicDescription": "Counts incoming FLITs (FLow control unITs) which bypassed the slot2 RxQ buffer (Receive Queue)  and passed directly to the Egress.  This is a latency optimization, and should generally be the common case.  If this value is less than the number of FLITs transferred, it implies that there was queueing getting onto the ring, and thus the transactions saw higher latency.",
+        "PublicDescription": "Counts incoming FLITs (FLow control unITs) whcih bypassed the slot2 RxQ buffer (Receive Queue)  and passed directly to the Egress.  This is a latency optimization, and should generally be the common case.  If this value is less than the number of FLITs transfered, it implies that there was queueing getting onto the ring, and thus the transactions saw higher latency.",
         "UMask": "0x4",
         "Unit": "UPI LL"
     },
diff --git a/tools/perf/pmu-events/arch/x86/skylakex/virtual-memory.json b/tools/perf/pmu-events/arch/x86/skylakex/virtual-memory.json
index bbeee1058096..792ca39f013a 100644
--- a/tools/perf/pmu-events/arch/x86/skylakex/virtual-memory.json
+++ b/tools/perf/pmu-events/arch/x86/skylakex/virtual-memory.json
@@ -1,103 +1,74 @@
 [
     {
-        "BriefDescription": "Instruction fetch requests that miss the ITLB and hit the STLB.",
-        "Counter": "0,1,2,3",
-        "CounterHTOff": "0,1,2,3,4,5,6,7",
-        "EventCode": "0x85",
-        "EventName": "ITLB_MISSES.STLB_HIT",
-        "SampleAfterValue": "100003",
-        "UMask": "0x20"
-    },
-    {
-        "BriefDescription": "Store misses in all DTLB levels that cause page walks",
+        "BriefDescription": "Load misses in all DTLB levels that cause page walks",
         "Counter": "0,1,2,3",
         "CounterHTOff": "0,1,2,3,4,5,6,7",
-        "EventCode": "0x49",
-        "EventName": "DTLB_STORE_MISSES.MISS_CAUSES_A_WALK",
-        "PublicDescription": "Counts demand data stores that caused a page walk of any page size (4K/2M/4M/1G). This implies it missed in all TLB levels, but the walk need not have completed.",
+        "EventCode": "0x08",
+        "EventName": "DTLB_LOAD_MISSES.MISS_CAUSES_A_WALK",
+        "PublicDescription": "Counts demand data loads that caused a page walk of any page size (4K/2M/4M/1G). This implies it missed in all TLB levels, but the walk need not have completed.",
         "SampleAfterValue": "100003",
         "UMask": "0x1"
     },
     {
-        "BriefDescription": "Page walk completed due to a demand data store to a 2M/4M page",
+        "BriefDescription": "Loads that miss the DTLB and hit the STLB.",
         "Counter": "0,1,2,3",
         "CounterHTOff": "0,1,2,3,4,5,6,7",
-        "EventCode": "0x49",
-        "EventName": "DTLB_STORE_MISSES.WALK_COMPLETED_2M_4M",
-        "PublicDescription": "Counts page walks completed due to demand data stores whose address translations missed in the TLB and were mapped to 2M/4M pages.  The page walks can end with or without a page fault.",
-        "SampleAfterValue": "100003",
-        "UMask": "0x4"
+        "EventCode": "0x08",
+        "EventName": "DTLB_LOAD_MISSES.STLB_HIT",
+        "PublicDescription": "Counts loads that miss the DTLB (Data TLB) and hit the STLB (Second level TLB).",
+        "SampleAfterValue": "2000003",
+        "UMask": "0x20"
     },
     {
-        "BriefDescription": "Counts 1 per cycle for each PMH that is busy with a page walk for an instruction fetch request. EPT page walk duration are excluded in Skylake.",
+        "BriefDescription": "Cycles when at least one PMH is busy with a page walk for a load. EPT page walk duration are excluded in Skylake.",
         "Counter": "0,1,2,3",
         "CounterHTOff": "0,1,2,3,4,5,6,7",
-        "EventCode": "0x85",
-        "EventName": "ITLB_MISSES.WALK_PENDING",
-        "PublicDescription": "Counts 1 per cycle for each PMH (Page Miss Handler) that is busy with a page walk for an instruction fetch request. EPT page walk duration are excluded in Skylake michroarchitecture.",
+        "CounterMask": "1",
+        "EventCode": "0x08",
+        "EventName": "DTLB_LOAD_MISSES.WALK_ACTIVE",
+        "PublicDescription": "Counts cycles when at least one PMH (Page Miss Handler) is busy with a page walk for a load.",
         "SampleAfterValue": "100003",
         "UMask": "0x10"
     },
     {
-        "BriefDescription": "Code miss in all TLB levels causes a page walk that completes. (4K)",
-        "Counter": "0,1,2,3",
-        "CounterHTOff": "0,1,2,3,4,5,6,7",
-        "EventCode": "0x85",
-        "EventName": "ITLB_MISSES.WALK_COMPLETED_4K",
-        "PublicDescription": "Counts completed page walks (4K page size) caused by a code fetch. This implies it missed in the ITLB and further levels of TLB. The page walk can end with or without a fault.",
-        "SampleAfterValue": "100003",
-        "UMask": "0x2"
-    },
-    {
-        "BriefDescription": "Flushing of the Instruction TLB (ITLB) pages, includes 4k/2M/4M pages.",
-        "Counter": "0,1,2,3",
-        "CounterHTOff": "0,1,2,3,4,5,6,7",
-        "EventCode": "0xAE",
-        "EventName": "ITLB.ITLB_FLUSH",
-        "PublicDescription": "Counts the number of flushes of the big or small ITLB pages. Counting include both TLB Flush (covering all sets) and TLB Set Clear (set-specific).",
-        "SampleAfterValue": "100007",
-        "UMask": "0x1"
-    },
-    {
-        "BriefDescription": "Cycles when at least one PMH is busy with a page walk for code (instruction fetch) request. EPT page walk duration are excluded in Skylake.",
+        "BriefDescription": "Load miss in all TLB levels causes a page walk that completes. (All page sizes)",
         "Counter": "0,1,2,3",
         "CounterHTOff": "0,1,2,3,4,5,6,7",
-        "CounterMask": "1",
-        "EventCode": "0x85",
-        "EventName": "ITLB_MISSES.WALK_ACTIVE",
-        "PublicDescription": "Cycles when at least one PMH is busy with a page walk for code (instruction fetch) request. EPT page walk duration are excluded in Skylake microarchitecture.",
+        "EventCode": "0x08",
+        "EventName": "DTLB_LOAD_MISSES.WALK_COMPLETED",
+        "PublicDescription": "Counts completed page walks  (all page sizes) caused by demand data loads. This implies it missed in the DTLB and further levels of TLB. The page walk can end with or without a fault.",
         "SampleAfterValue": "100003",
-        "UMask": "0x10"
+        "UMask": "0xe"
     },
     {
-        "BriefDescription": "Loads that miss the DTLB and hit the STLB.",
+        "BriefDescription": "Page walk completed due to a demand data load to a 1G page",
         "Counter": "0,1,2,3",
         "CounterHTOff": "0,1,2,3,4,5,6,7",
         "EventCode": "0x08",
-        "EventName": "DTLB_LOAD_MISSES.STLB_HIT",
-        "PublicDescription": "Counts loads that miss the DTLB (Data TLB) and hit the STLB (Second level TLB).",
+        "EventName": "DTLB_LOAD_MISSES.WALK_COMPLETED_1G",
+        "PublicDescription": "Counts completed page walks  (1G sizes) caused by demand data loads. This implies address translations missed in the DTLB and further levels of TLB. The page walk can end with or without a fault.",
         "SampleAfterValue": "2000003",
-        "UMask": "0x20"
+        "UMask": "0x8"
     },
     {
-        "BriefDescription": "Counts 1 per cycle for each PMH that is busy with a page walk for a store. EPT page walk duration are excluded in Skylake.",
+        "BriefDescription": "Page walk completed due to a demand data load to a 2M/4M page",
         "Counter": "0,1,2,3",
         "CounterHTOff": "0,1,2,3,4,5,6,7",
-        "EventCode": "0x49",
-        "EventName": "DTLB_STORE_MISSES.WALK_PENDING",
-        "PublicDescription": "Counts 1 per cycle for each PMH that is busy with a page walk for a store. EPT page walk duration are excluded in Skylake microarchitecture.",
+        "EventCode": "0x08",
+        "EventName": "DTLB_LOAD_MISSES.WALK_COMPLETED_2M_4M",
+        "PublicDescription": "Counts completed page walks  (2M/4M sizes) caused by demand data loads. This implies address translations missed in the DTLB and further levels of TLB. The page walk can end with or without a fault.",
         "SampleAfterValue": "2000003",
-        "UMask": "0x10"
+        "UMask": "0x4"
     },
     {
-        "BriefDescription": "DTLB flush attempts of the thread-specific entries",
+        "BriefDescription": "Page walk completed due to a demand data load to a 4K page",
         "Counter": "0,1,2,3",
         "CounterHTOff": "0,1,2,3,4,5,6,7",
-        "EventCode": "0xBD",
-        "EventName": "TLB_FLUSH.DTLB_THREAD",
-        "PublicDescription": "Counts the number of DTLB flush attempts of the thread-specific entries.",
-        "SampleAfterValue": "100007",
-        "UMask": "0x1"
+        "EventCode": "0x08",
+        "EventName": "DTLB_LOAD_MISSES.WALK_COMPLETED_4K",
+        "PublicDescription": "Counts completed page walks  (4K sizes) caused by demand data loads. This implies address translations missed in the DTLB and further levels of TLB. The page walk can end with or without a fault.",
+        "SampleAfterValue": "2000003",
+        "UMask": "0x2"
     },
     {
         "BriefDescription": "Counts 1 per cycle for each PMH that is busy with a page walk for a load. EPT page walk duration are excluded in Skylake.",
@@ -110,23 +81,12 @@
         "UMask": "0x10"
     },
     {
-        "BriefDescription": "Cycles when at least one PMH is busy with a page walk for a store. EPT page walk duration are excluded in Skylake.",
+        "BriefDescription": "Store misses in all DTLB levels that cause page walks",
         "Counter": "0,1,2,3",
         "CounterHTOff": "0,1,2,3,4,5,6,7",
-        "CounterMask": "1",
         "EventCode": "0x49",
-        "EventName": "DTLB_STORE_MISSES.WALK_ACTIVE",
-        "PublicDescription": "Counts cycles when at least one PMH (Page Miss Handler) is busy with a page walk for a store.",
-        "SampleAfterValue": "100003",
-        "UMask": "0x10"
-    },
-    {
-        "BriefDescription": "Misses at all ITLB levels that cause page walks",
-        "Counter": "0,1,2,3",
-        "CounterHTOff": "0,1,2,3,4,5,6,7",
-        "EventCode": "0x85",
-        "EventName": "ITLB_MISSES.MISS_CAUSES_A_WALK",
-        "PublicDescription": "Counts page walks of any page size (4K/2M/4M/1G) caused by a code fetch. This implies it missed in the ITLB and further levels of TLB, but the walk need not have completed.",
+        "EventName": "DTLB_STORE_MISSES.MISS_CAUSES_A_WALK",
+        "PublicDescription": "Counts demand data stores that caused a page walk of any page size (4K/2M/4M/1G). This implies it missed in all TLB levels, but the walk need not have completed.",
         "SampleAfterValue": "100003",
         "UMask": "0x1"
     },
@@ -141,125 +101,135 @@
         "UMask": "0x20"
     },
     {
-        "BriefDescription": "Store misses in all TLB levels causes a page walk that completes. (All page sizes)",
+        "BriefDescription": "Cycles when at least one PMH is busy with a page walk for a store. EPT page walk duration are excluded in Skylake.",
         "Counter": "0,1,2,3",
         "CounterHTOff": "0,1,2,3,4,5,6,7",
+        "CounterMask": "1",
         "EventCode": "0x49",
-        "EventName": "DTLB_STORE_MISSES.WALK_COMPLETED",
-        "PublicDescription": "Counts demand data stores that caused a completed page walk of any page size (4K/2M/4M/1G). This implies it missed in all TLB levels. The page walk can end with or without a fault.",
+        "EventName": "DTLB_STORE_MISSES.WALK_ACTIVE",
+        "PublicDescription": "Counts cycles when at least one PMH (Page Miss Handler) is busy with a page walk for a store.",
         "SampleAfterValue": "100003",
-        "UMask": "0xe"
+        "UMask": "0x10"
     },
     {
-        "BriefDescription": "Load miss in all TLB levels causes a page walk that completes. (All page sizes)",
+        "BriefDescription": "Store misses in all TLB levels causes a page walk that completes. (All page sizes)",
         "Counter": "0,1,2,3",
         "CounterHTOff": "0,1,2,3,4,5,6,7",
-        "EventCode": "0x08",
-        "EventName": "DTLB_LOAD_MISSES.WALK_COMPLETED",
-        "PublicDescription": "Counts demand data loads that caused a completed page walk of any page size (4K/2M/4M/1G). This implies it missed in all TLB levels. The page walk can end with or without a fault.",
+        "EventCode": "0x49",
+        "EventName": "DTLB_STORE_MISSES.WALK_COMPLETED",
+        "PublicDescription": "Counts completed page walks  (all page sizes) caused by demand data stores. This implies it missed in the DTLB and further levels of TLB. The page walk can end with or without a fault.",
         "SampleAfterValue": "100003",
         "UMask": "0xe"
     },
     {
-        "BriefDescription": "Page walk completed due to a demand data store to a 4K page",
+        "BriefDescription": "Page walk completed due to a demand data store to a 1G page",
         "Counter": "0,1,2,3",
         "CounterHTOff": "0,1,2,3,4,5,6,7",
         "EventCode": "0x49",
-        "EventName": "DTLB_STORE_MISSES.WALK_COMPLETED_4K",
-        "PublicDescription": "Counts page walks completed due to demand data stores whose address translations missed in the TLB and were mapped to 4K pages.  The page walks can end with or without a page fault.",
+        "EventName": "DTLB_STORE_MISSES.WALK_COMPLETED_1G",
+        "PublicDescription": "Counts completed page walks  (1G sizes) caused by demand data stores. This implies address translations missed in the DTLB and further levels of TLB. The page walk can end with or without a fault.",
         "SampleAfterValue": "100003",
-        "UMask": "0x2"
+        "UMask": "0x8"
     },
     {
-        "BriefDescription": "Code miss in all TLB levels causes a page walk that completes. (1G)",
+        "BriefDescription": "Page walk completed due to a demand data store to a 2M/4M page",
         "Counter": "0,1,2,3",
         "CounterHTOff": "0,1,2,3,4,5,6,7",
-        "EventCode": "0x85",
-        "EventName": "ITLB_MISSES.WALK_COMPLETED_1G",
-        "PublicDescription": "Counts store misses in all DTLB levels that cause a completed page walk (1G page size). The page walk can end with or without a fault.",
+        "EventCode": "0x49",
+        "EventName": "DTLB_STORE_MISSES.WALK_COMPLETED_2M_4M",
+        "PublicDescription": "Counts completed page walks  (2M/4M sizes) caused by demand data stores. This implies address translations missed in the DTLB and further levels of TLB. The page walk can end with or without a fault.",
         "SampleAfterValue": "100003",
-        "UMask": "0x8"
+        "UMask": "0x4"
     },
     {
-        "BriefDescription": "Code miss in all TLB levels causes a page walk that completes. (All page sizes)",
+        "BriefDescription": "Page walk completed due to a demand data store to a 4K page",
         "Counter": "0,1,2,3",
         "CounterHTOff": "0,1,2,3,4,5,6,7",
-        "EventCode": "0x85",
-        "EventName": "ITLB_MISSES.WALK_COMPLETED",
-        "PublicDescription": "Counts completed page walks (2M and 4M page sizes) caused by a code fetch. This implies it missed in the ITLB and further levels of TLB. The page walk can end with or without a fault.",
+        "EventCode": "0x49",
+        "EventName": "DTLB_STORE_MISSES.WALK_COMPLETED_4K",
+        "PublicDescription": "Counts completed page walks  (4K sizes) caused by demand data stores. This implies address translations missed in the DTLB and further levels of TLB. The page walk can end with or without a fault.",
         "SampleAfterValue": "100003",
-        "UMask": "0xe"
+        "UMask": "0x2"
     },
     {
-        "BriefDescription": "Page walk completed due to a demand data load to a 4K page",
+        "BriefDescription": "Counts 1 per cycle for each PMH that is busy with a page walk for a store. EPT page walk duration are excluded in Skylake.",
         "Counter": "0,1,2,3",
         "CounterHTOff": "0,1,2,3,4,5,6,7",
-        "EventCode": "0x08",
-        "EventName": "DTLB_LOAD_MISSES.WALK_COMPLETED_4K",
-        "PublicDescription": "Counts page walks completed due to demand data loads whose address translations missed in the TLB and were mapped to 4K pages.  The page walks can end with or without a page fault.",
+        "EventCode": "0x49",
+        "EventName": "DTLB_STORE_MISSES.WALK_PENDING",
+        "PublicDescription": "Counts 1 per cycle for each PMH that is busy with a page walk for a store. EPT page walk duration are excluded in Skylake microarchitecture.",
         "SampleAfterValue": "2000003",
-        "UMask": "0x2"
+        "UMask": "0x10"
     },
     {
-        "BriefDescription": "Page walk completed due to a demand data load to a 2M/4M page",
+        "BriefDescription": "Counts 1 per cycle for each PMH that is busy with a EPT (Extended Page Table) walk for any request type.",
         "Counter": "0,1,2,3",
         "CounterHTOff": "0,1,2,3,4,5,6,7",
-        "EventCode": "0x08",
-        "EventName": "DTLB_LOAD_MISSES.WALK_COMPLETED_2M_4M",
-        "PublicDescription": "Counts page walks completed due to demand data loads whose address translations missed in the TLB and were mapped to 2M/4M pages.  The page walks can end with or without a page fault.",
+        "EventCode": "0x4f",
+        "EventName": "EPT.WALK_PENDING",
+        "PublicDescription": "Counts cycles for each PMH (Page Miss Handler) that is busy with an EPT (Extended Page Table) walk for any request type.",
         "SampleAfterValue": "2000003",
-        "UMask": "0x4"
+        "UMask": "0x10"
     },
     {
-        "BriefDescription": "Load misses in all DTLB levels that cause page walks",
+        "BriefDescription": "Flushing of the Instruction TLB (ITLB) pages, includes 4k/2M/4M pages.",
         "Counter": "0,1,2,3",
         "CounterHTOff": "0,1,2,3,4,5,6,7",
-        "EventCode": "0x08",
-        "EventName": "DTLB_LOAD_MISSES.MISS_CAUSES_A_WALK",
-        "PublicDescription": "Counts demand data loads that caused a page walk of any page size (4K/2M/4M/1G). This implies it missed in all TLB levels, but the walk need not have completed.",
-        "SampleAfterValue": "100003",
+        "EventCode": "0xAE",
+        "EventName": "ITLB.ITLB_FLUSH",
+        "PublicDescription": "Counts the number of flushes of the big or small ITLB pages. Counting include both TLB Flush (covering all sets) and TLB Set Clear (set-specific).",
+        "SampleAfterValue": "100007",
         "UMask": "0x1"
     },
     {
-        "BriefDescription": "Counts 1 per cycle for each PMH that is busy with a EPT (Extended Page Table) walk for any request type.",
+        "BriefDescription": "Misses at all ITLB levels that cause page walks",
         "Counter": "0,1,2,3",
         "CounterHTOff": "0,1,2,3,4,5,6,7",
-        "EventCode": "0x4F",
-        "EventName": "EPT.WALK_PENDING",
-        "PublicDescription": "Counts cycles for each PMH (Page Miss Handler) that is busy with an EPT (Extended Page Table) walk for any request type.",
-        "SampleAfterValue": "2000003",
-        "UMask": "0x10"
+        "EventCode": "0x85",
+        "EventName": "ITLB_MISSES.MISS_CAUSES_A_WALK",
+        "PublicDescription": "Counts page walks of any page size (4K/2M/4M/1G) caused by a code fetch. This implies it missed in the ITLB and further levels of TLB, but the walk need not have completed.",
+        "SampleAfterValue": "100003",
+        "UMask": "0x1"
     },
     {
-        "BriefDescription": "STLB flush attempts",
+        "BriefDescription": "Instruction fetch requests that miss the ITLB and hit the STLB.",
         "Counter": "0,1,2,3",
         "CounterHTOff": "0,1,2,3,4,5,6,7",
-        "EventCode": "0xBD",
-        "EventName": "TLB_FLUSH.STLB_ANY",
-        "PublicDescription": "Counts the number of any STLB flush attempts (such as entire, VPID, PCID, InvPage, CR3 write, etc.).",
-        "SampleAfterValue": "100007",
+        "EventCode": "0x85",
+        "EventName": "ITLB_MISSES.STLB_HIT",
+        "SampleAfterValue": "100003",
         "UMask": "0x20"
     },
     {
-        "BriefDescription": "Page walk completed due to a demand data load to a 1G page",
+        "BriefDescription": "Cycles when at least one PMH is busy with a page walk for code (instruction fetch) request. EPT page walk duration are excluded in Skylake.",
         "Counter": "0,1,2,3",
         "CounterHTOff": "0,1,2,3,4,5,6,7",
-        "EventCode": "0x08",
-        "EventName": "DTLB_LOAD_MISSES.WALK_COMPLETED_1G",
-        "PublicDescription": "Counts page walks completed due to demand data loads whose address translations missed in the TLB and were mapped to 4K pages.  The page walks can end with or without a page fault.",
-        "SampleAfterValue": "2000003",
-        "UMask": "0x8"
+        "CounterMask": "1",
+        "EventCode": "0x85",
+        "EventName": "ITLB_MISSES.WALK_ACTIVE",
+        "PublicDescription": "Cycles when at least one PMH is busy with a page walk for code (instruction fetch) request. EPT page walk duration are excluded in Skylake microarchitecture.",
+        "SampleAfterValue": "100003",
+        "UMask": "0x10"
     },
     {
-        "BriefDescription": "Cycles when at least one PMH is busy with a page walk for a load. EPT page walk duration are excluded in Skylake.",
+        "BriefDescription": "Code miss in all TLB levels causes a page walk that completes. (All page sizes)",
         "Counter": "0,1,2,3",
         "CounterHTOff": "0,1,2,3,4,5,6,7",
-        "CounterMask": "1",
-        "EventCode": "0x08",
-        "EventName": "DTLB_LOAD_MISSES.WALK_ACTIVE",
-        "PublicDescription": "Counts cycles when at least one PMH (Page Miss Handler) is busy with a page walk for a load.",
+        "EventCode": "0x85",
+        "EventName": "ITLB_MISSES.WALK_COMPLETED",
+        "PublicDescription": "Counts completed page walks (all page sizes) caused by a code fetch. This implies it missed in the ITLB (Instruction TLB) and further levels of TLB. The page walk can end with or without a fault.",
         "SampleAfterValue": "100003",
-        "UMask": "0x10"
+        "UMask": "0xe"
+    },
+    {
+        "BriefDescription": "Code miss in all TLB levels causes a page walk that completes. (1G)",
+        "Counter": "0,1,2,3",
+        "CounterHTOff": "0,1,2,3,4,5,6,7",
+        "EventCode": "0x85",
+        "EventName": "ITLB_MISSES.WALK_COMPLETED_1G",
+        "PublicDescription": "Counts completed page walks (1G page sizes) caused by a code fetch. This implies it missed in the ITLB (Instruction TLB) and further levels of TLB. The page walk can end with or without a fault.",
+        "SampleAfterValue": "100003",
+        "UMask": "0x8"
     },
     {
         "BriefDescription": "Code miss in all TLB levels causes a page walk that completes. (2M/4M)",
@@ -267,18 +237,48 @@
         "CounterHTOff": "0,1,2,3,4,5,6,7",
         "EventCode": "0x85",
         "EventName": "ITLB_MISSES.WALK_COMPLETED_2M_4M",
-        "PublicDescription": "Counts code misses in all ITLB levels that caused a completed page walk (2M and 4M page sizes). The page walk can end with or without a fault.",
+        "PublicDescription": "Counts completed page walks (2M/4M page sizes) caused by a code fetch. This implies it missed in the ITLB (Instruction TLB) and further levels of TLB. The page walk can end with or without a fault.",
         "SampleAfterValue": "100003",
         "UMask": "0x4"
     },
     {
-        "BriefDescription": "Page walk completed due to a demand data store to a 1G page",
+        "BriefDescription": "Code miss in all TLB levels causes a page walk that completes. (4K)",
         "Counter": "0,1,2,3",
         "CounterHTOff": "0,1,2,3,4,5,6,7",
-        "EventCode": "0x49",
-        "EventName": "DTLB_STORE_MISSES.WALK_COMPLETED_1G",
-        "PublicDescription": "Counts page walks completed due to demand data stores whose address translations missed in the TLB and were mapped to 1G pages.  The page walks can end with or without a page fault.",
+        "EventCode": "0x85",
+        "EventName": "ITLB_MISSES.WALK_COMPLETED_4K",
+        "PublicDescription": "Counts completed page walks (4K page sizes) caused by a code fetch. This implies it missed in the ITLB (Instruction TLB) and further levels of TLB. The page walk can end with or without a fault.",
         "SampleAfterValue": "100003",
-        "UMask": "0x8"
+        "UMask": "0x2"
+    },
+    {
+        "BriefDescription": "Counts 1 per cycle for each PMH that is busy with a page walk for an instruction fetch request. EPT page walk duration are excluded in Skylake.",
+        "Counter": "0,1,2,3",
+        "CounterHTOff": "0,1,2,3,4,5,6,7",
+        "EventCode": "0x85",
+        "EventName": "ITLB_MISSES.WALK_PENDING",
+        "PublicDescription": "Counts 1 per cycle for each PMH (Page Miss Handler) that is busy with a page walk for an instruction fetch request. EPT page walk duration are excluded in Skylake michroarchitecture.",
+        "SampleAfterValue": "100003",
+        "UMask": "0x10"
+    },
+    {
+        "BriefDescription": "DTLB flush attempts of the thread-specific entries",
+        "Counter": "0,1,2,3",
+        "CounterHTOff": "0,1,2,3,4,5,6,7",
+        "EventCode": "0xBD",
+        "EventName": "TLB_FLUSH.DTLB_THREAD",
+        "PublicDescription": "Counts the number of DTLB flush attempts of the thread-specific entries.",
+        "SampleAfterValue": "100007",
+        "UMask": "0x1"
+    },
+    {
+        "BriefDescription": "STLB flush attempts",
+        "Counter": "0,1,2,3",
+        "CounterHTOff": "0,1,2,3,4,5,6,7",
+        "EventCode": "0xBD",
+        "EventName": "TLB_FLUSH.STLB_ANY",
+        "PublicDescription": "Counts the number of any STLB flush attempts (such as entire, VPID, PCID, InvPage, CR3 write, etc.).",
+        "SampleAfterValue": "100007",
+        "UMask": "0x20"
     }
 ]
 \ No newline at end of file
diff --git a/tools/perf/pmu-events/arch/x86/tigerlake/cache.json b/tools/perf/pmu-events/arch/x86/tigerlake/cache.json
new file mode 100644
index 000000000000..8d767b8932b0
--- /dev/null
+++ b/tools/perf/pmu-events/arch/x86/tigerlake/cache.json
@@ -0,0 +1,595 @@
+[
+    {
+        "BriefDescription": "Counts the number of cache lines replaced in L1 data cache.",
+        "CollectPEBSRecord": "2",
+        "Counter": "0,1,2,3",
+        "EventCode": "0x51",
+        "EventName": "L1D.REPLACEMENT",
+        "PEBScounters": "0,1,2,3",
+        "PublicDescription": "Counts L1D data line replacements including opportunistic replacements, and replacements that require stall-for-replace or block-for-replace.",
+        "SampleAfterValue": "100003",
+        "UMask": "0x1"
+    },
+    {
+        "BriefDescription": "Number of cycles a demand request has waited due to L1D Fill Buffer (FB) unavailability.",
+        "CollectPEBSRecord": "2",
+        "Counter": "0,1,2,3",
+        "EventCode": "0x48",
+        "EventName": "L1D_PEND_MISS.FB_FULL",
+        "PEBScounters": "0,1,2,3",
+        "PublicDescription": "Counts number of cycles a demand request has waited due to L1D Fill Buffer (FB) unavailablability. Demand requests include cacheable/uncacheable demand load, store, lock or SW prefetch accesses.",
+        "SampleAfterValue": "1000003",
+        "UMask": "0x2"
+    },
+    {
+        "BriefDescription": "Number of phases a demand request has waited due to L1D Fill Buffer (FB) unavailablability.",
+        "CollectPEBSRecord": "2",
+        "Counter": "0,1,2,3",
+        "CounterMask": "1",
+        "EdgeDetect": "1",
+        "EventCode": "0x48",
+        "EventName": "L1D_PEND_MISS.FB_FULL_PERIODS",
+        "PEBScounters": "0,1,2,3",
+        "PublicDescription": "Counts number of phases a demand request has waited due to L1D Fill Buffer (FB) unavailablability. Demand requests include cacheable/uncacheable demand load, store, lock or SW prefetch accesses.",
+        "SampleAfterValue": "1000003",
+        "UMask": "0x2"
+    },
+    {
+        "BriefDescription": "Number of cycles a demand request has waited due to L1D due to lack of L2 resources.",
+        "CollectPEBSRecord": "2",
+        "Counter": "0,1,2,3",
+        "EventCode": "0x48",
+        "EventName": "L1D_PEND_MISS.L2_STALL",
+        "PEBScounters": "0,1,2,3",
+        "PublicDescription": "Counts number of cycles a demand request has waited due to L1D due to lack of L2 resources. Demand requests include cacheable/uncacheable demand load, store, lock or SW prefetch accesses.",
+        "SampleAfterValue": "1000003",
+        "UMask": "0x4"
+    },
+    {
+        "BriefDescription": "Number of L1D misses that are outstanding",
+        "CollectPEBSRecord": "2",
+        "Counter": "0,1,2,3",
+        "EventCode": "0x48",
+        "EventName": "L1D_PEND_MISS.PENDING",
+        "PEBScounters": "0,1,2,3",
+        "PublicDescription": "Counts number of L1D misses that are outstanding in each cycle, that is each cycle the number of Fill Buffers (FB) outstanding required by Demand Reads. FB either is held by demand loads, or it is held by non-demand loads and gets hit at least once by demand. The valid outstanding interval is defined until the FB deallocation by one of the following ways: from FB allocation, if FB is allocated by demand from the demand Hit FB, if it is allocated by hardware or software prefetch. Note: In the L1D, a Demand Read contains cacheable or noncacheable demand loads, including ones causing cache-line splits and reads due to page walks resulted from any request type.",
+        "SampleAfterValue": "1000003",
+        "UMask": "0x1"
+    },
+    {
+        "BriefDescription": "Cycles with L1D load Misses outstanding.",
+        "CollectPEBSRecord": "2",
+        "Counter": "0,1,2,3",
+        "CounterMask": "1",
+        "EventCode": "0x48",
+        "EventName": "L1D_PEND_MISS.PENDING_CYCLES",
+        "PEBScounters": "0,1,2,3",
+        "PublicDescription": "Counts duration of L1D miss outstanding in cycles.",
+        "SampleAfterValue": "1000003",
+        "UMask": "0x1"
+    },
+    {
+        "BriefDescription": "L2 cache lines filling L2",
+        "CollectPEBSRecord": "2",
+        "Counter": "0,1,2,3",
+        "EventCode": "0xf1",
+        "EventName": "L2_LINES_IN.ALL",
+        "PEBScounters": "0,1,2,3",
+        "PublicDescription": "Counts the number of L2 cache lines filling the L2. Counting does not cover rejects.",
+        "SampleAfterValue": "100003",
+        "UMask": "0x1f"
+    },
+    {
+        "BriefDescription": "Modified cache lines that are evicted by L2 cache when triggered by an L2 cache fill.",
+        "CollectPEBSRecord": "2",
+        "Counter": "0,1,2,3",
+        "EventCode": "0xf2",
+        "EventName": "L2_LINES_OUT.NON_SILENT",
+        "PEBScounters": "0,1,2,3",
+        "PublicDescription": "Counts the number of lines that are evicted by L2 cache when triggered by an L2 cache fill. Those lines are in Modified state. Modified lines are written back to L3",
+        "SampleAfterValue": "200003",
+        "UMask": "0x2"
+    },
+    {
+        "BriefDescription": "Non-modified cache lines that are silently dropped by L2 cache when triggered by an L2 cache fill.",
+        "CollectPEBSRecord": "2",
+        "Counter": "0,1,2,3",
+        "EventCode": "0xf2",
+        "EventName": "L2_LINES_OUT.SILENT",
+        "PEBScounters": "0,1,2,3",
+        "PublicDescription": "Counts the number of lines that are silently dropped by L2 cache when triggered by an L2 cache fill. These lines are typically in Shared or Exclusive state. A non-threaded event.",
+        "SampleAfterValue": "200003",
+        "UMask": "0x1"
+    },
+    {
+        "BriefDescription": "L2 code requests",
+        "CollectPEBSRecord": "2",
+        "Counter": "0,1,2,3",
+        "EventCode": "0x24",
+        "EventName": "L2_RQSTS.ALL_CODE_RD",
+        "PEBScounters": "0,1,2,3",
+        "PublicDescription": "Counts the total number of L2 code requests.",
+        "SampleAfterValue": "200003",
+        "UMask": "0xe4"
+    },
+    {
+        "BriefDescription": "RFO requests to L2 cache",
+        "CollectPEBSRecord": "2",
+        "Counter": "0,1,2,3",
+        "EventCode": "0x24",
+        "EventName": "L2_RQSTS.ALL_RFO",
+        "PEBScounters": "0,1,2,3",
+        "PublicDescription": "Counts the total number of RFO (read for ownership) requests to L2 cache. L2 RFO requests include both L1D demand RFO misses as well as L1D RFO prefetches.",
+        "SampleAfterValue": "200003",
+        "UMask": "0xe2"
+    },
+    {
+        "BriefDescription": "L2 cache hits when fetching instructions, code reads.",
+        "CollectPEBSRecord": "2",
+        "Counter": "0,1,2,3",
+        "EventCode": "0x24",
+        "EventName": "L2_RQSTS.CODE_RD_HIT",
+        "PEBScounters": "0,1,2,3",
+        "PublicDescription": "Counts L2 cache hits when fetching instructions, code reads.",
+        "SampleAfterValue": "200003",
+        "UMask": "0xc4"
+    },
+    {
+        "BriefDescription": "L2 cache misses when fetching instructions",
+        "CollectPEBSRecord": "2",
+        "Counter": "0,1,2,3",
+        "EventCode": "0x24",
+        "EventName": "L2_RQSTS.CODE_RD_MISS",
+        "PEBScounters": "0,1,2,3",
+        "PublicDescription": "Counts L2 cache misses when fetching instructions.",
+        "SampleAfterValue": "200003",
+        "UMask": "0x24"
+    },
+    {
+        "BriefDescription": "All requests that miss L2 cache",
+        "CollectPEBSRecord": "2",
+        "Counter": "0,1,2,3",
+        "EventCode": "0x24",
+        "EventName": "L2_RQSTS.MISS",
+        "PEBScounters": "0,1,2,3",
+        "PublicDescription": "Counts all requests that miss L2 cache.",
+        "SampleAfterValue": "200003",
+        "UMask": "0x3f"
+    },
+    {
+        "BriefDescription": "RFO requests that hit L2 cache",
+        "CollectPEBSRecord": "2",
+        "Counter": "0,1,2,3",
+        "EventCode": "0x24",
+        "EventName": "L2_RQSTS.RFO_HIT",
+        "PEBScounters": "0,1,2,3",
+        "PublicDescription": "Counts the RFO (Read-for-Ownership) requests that hit L2 cache.",
+        "SampleAfterValue": "200003",
+        "UMask": "0xc2"
+    },
+    {
+        "BriefDescription": "RFO requests that miss L2 cache",
+        "CollectPEBSRecord": "2",
+        "Counter": "0,1,2,3",
+        "EventCode": "0x24",
+        "EventName": "L2_RQSTS.RFO_MISS",
+        "PEBScounters": "0,1,2,3",
+        "PublicDescription": "Counts the RFO (Read-for-Ownership) requests that miss L2 cache.",
+        "SampleAfterValue": "200003",
+        "UMask": "0x22"
+    },
+    {
+        "BriefDescription": "SW prefetch requests that hit L2 cache.",
+        "CollectPEBSRecord": "2",
+        "Counter": "0,1,2,3",
+        "EventCode": "0x24",
+        "EventName": "L2_RQSTS.SWPF_HIT",
+        "PEBScounters": "0,1,2,3",
+        "PublicDescription": "Counts Software prefetch requests that hit the L2 cache. This event accounts for PREFETCHNTA and PREFETCHT0/1/2 instructions.",
+        "SampleAfterValue": "200003",
+        "UMask": "0xc8"
+    },
+    {
+        "BriefDescription": "SW prefetch requests that miss L2 cache.",
+        "CollectPEBSRecord": "2",
+        "Counter": "0,1,2,3",
+        "EventCode": "0x24",
+        "EventName": "L2_RQSTS.SWPF_MISS",
+        "PEBScounters": "0,1,2,3",
+        "PublicDescription": "Counts Software prefetch requests that miss the L2 cache. This event accounts for PREFETCHNTA and PREFETCHT0/1/2 instructions.",
+        "SampleAfterValue": "200003",
+        "UMask": "0x28"
+    },
+    {
+        "BriefDescription": "L2 writebacks that access L2 cache",
+        "CollectPEBSRecord": "2",
+        "Counter": "0,1,2,3",
+        "EventCode": "0xf0",
+        "EventName": "L2_TRANS.L2_WB",
+        "PEBScounters": "0,1,2,3",
+        "PublicDescription": "Counts L2 writebacks that access L2 cache.",
+        "SampleAfterValue": "200003",
+        "UMask": "0x40"
+    },
+    {
+        "BriefDescription": "Cycles when L1D is locked",
+        "CollectPEBSRecord": "2",
+        "Counter": "0,1,2,3",
+        "EventCode": "0x63",
+        "EventName": "LOCK_CYCLES.CACHE_LOCK_DURATION",
+        "PEBScounters": "0,1,2,3",
+        "PublicDescription": "This event counts the number of cycles when the L1D is locked. It is a superset of the 0x1 mask (BUS_LOCK_CLOCKS.BUS_LOCK_DURATION).",
+        "SampleAfterValue": "2000003",
+        "UMask": "0x2"
+    },
+    {
+        "BriefDescription": "All retired load instructions.",
+        "CollectPEBSRecord": "2",
+        "Counter": "0,1,2,3",
+        "Data_LA": "1",
+        "EventCode": "0xd0",
+        "EventName": "MEM_INST_RETIRED.ALL_LOADS",
+        "PEBS": "1",
+        "PEBScounters": "0,1,2,3",
+        "PublicDescription": "Counts all retired load instructions. This event accounts for SW prefetch instructions for loads.",
+        "SampleAfterValue": "1000003",
+        "UMask": "0x81"
+    },
+    {
+        "BriefDescription": "All retired store instructions.",
+        "CollectPEBSRecord": "2",
+        "Counter": "0,1,2,3",
+        "Data_LA": "1",
+        "EventCode": "0xd0",
+        "EventName": "MEM_INST_RETIRED.ALL_STORES",
+        "L1_Hit_Indication": "1",
+        "PEBS": "1",
+        "PEBScounters": "0,1,2,3",
+        "PublicDescription": "Counts all retired store instructions. This event account for SW prefetch instructions and PREFETCHW instruction for stores.",
+        "SampleAfterValue": "1000003",
+        "UMask": "0x82"
+    },
+    {
+        "BriefDescription": "Retired load instructions with locked access.",
+        "CollectPEBSRecord": "2",
+        "Counter": "0,1,2,3",
+        "Data_LA": "1",
+        "EventCode": "0xd0",
+        "EventName": "MEM_INST_RETIRED.LOCK_LOADS",
+        "PEBS": "1",
+        "PEBScounters": "0,1,2,3",
+        "PublicDescription": "Counts retired load instructions with locked access.",
+        "SampleAfterValue": "100007",
+        "UMask": "0x21"
+    },
+    {
+        "BriefDescription": "Retired load instructions that split across a cacheline boundary.",
+        "CollectPEBSRecord": "2",
+        "Counter": "0,1,2,3",
+        "Data_LA": "1",
+        "EventCode": "0xd0",
+        "EventName": "MEM_INST_RETIRED.SPLIT_LOADS",
+        "PEBS": "1",
+        "PEBScounters": "0,1,2,3",
+        "PublicDescription": "Counts retired load instructions that split across a cacheline boundary.",
+        "SampleAfterValue": "100003",
+        "UMask": "0x41"
+    },
+    {
+        "BriefDescription": "Retired store instructions that split across a cacheline boundary.",
+        "CollectPEBSRecord": "2",
+        "Counter": "0,1,2,3",
+        "Data_LA": "1",
+        "EventCode": "0xd0",
+        "EventName": "MEM_INST_RETIRED.SPLIT_STORES",
+        "L1_Hit_Indication": "1",
+        "PEBS": "1",
+        "PEBScounters": "0,1,2,3",
+        "PublicDescription": "Counts retired store instructions that split across a cacheline boundary.",
+        "SampleAfterValue": "100003",
+        "UMask": "0x42"
+    },
+    {
+        "BriefDescription": "Retired load instructions that miss the STLB.",
+        "CollectPEBSRecord": "2",
+        "Counter": "0,1,2,3",
+        "Data_LA": "1",
+        "EventCode": "0xd0",
+        "EventName": "MEM_INST_RETIRED.STLB_MISS_LOADS",
+        "PEBS": "1",
+        "PEBScounters": "0,1,2,3",
+        "PublicDescription": "Counts retired load instructions that true miss the STLB.",
+        "SampleAfterValue": "100003",
+        "UMask": "0x11"
+    },
+    {
+        "BriefDescription": "Retired store instructions that miss the STLB.",
+        "CollectPEBSRecord": "2",
+        "Counter": "0,1,2,3",
+        "Data_LA": "1",
+        "EventCode": "0xd0",
+        "EventName": "MEM_INST_RETIRED.STLB_MISS_STORES",
+        "L1_Hit_Indication": "1",
+        "PEBS": "1",
+        "PEBScounters": "0,1,2,3",
+        "PublicDescription": "Counts retired store instructions that true miss the STLB.",
+        "SampleAfterValue": "100003",
+        "UMask": "0x12"
+    },
+    {
+        "BriefDescription": "TBD",
+        "CollectPEBSRecord": "2",
+        "Counter": "0,1,2,3",
+        "Data_LA": "1",
+        "EventCode": "0xd2",
+        "EventName": "MEM_LOAD_L3_HIT_RETIRED.XSNP_FWD",
+        "PEBS": "1",
+        "PEBScounters": "0,1,2,3",
+        "SampleAfterValue": "20011",
+        "UMask": "0x4"
+    },
+    {
+        "BriefDescription": "Retired load instructions whose data sources were L3 hit and cross-core snoop missed in on-pkg core cache.",
+        "CollectPEBSRecord": "2",
+        "Counter": "0,1,2,3",
+        "Data_LA": "1",
+        "EventCode": "0xd2",
+        "EventName": "MEM_LOAD_L3_HIT_RETIRED.XSNP_MISS",
+        "PEBS": "1",
+        "PEBScounters": "0,1,2,3",
+        "PublicDescription": "Counts the retired load instructions whose data sources were L3 hit and cross-core snoop missed in on-pkg core cache.",
+        "SampleAfterValue": "20011",
+        "UMask": "0x1"
+    },
+    {
+        "BriefDescription": "Retired load instructions whose data sources were hits in L3 without snoops required",
+        "CollectPEBSRecord": "2",
+        "Counter": "0,1,2,3",
+        "Data_LA": "1",
+        "EventCode": "0xd2",
+        "EventName": "MEM_LOAD_L3_HIT_RETIRED.XSNP_NONE",
+        "PEBS": "1",
+        "PEBScounters": "0,1,2,3",
+        "PublicDescription": "Counts retired load instructions whose data sources were hits in L3 without snoops required.",
+        "SampleAfterValue": "100003",
+        "UMask": "0x8"
+    },
+    {
+        "BriefDescription": "TBD",
+        "CollectPEBSRecord": "2",
+        "Counter": "0,1,2,3",
+        "Data_LA": "1",
+        "EventCode": "0xd2",
+        "EventName": "MEM_LOAD_L3_HIT_RETIRED.XSNP_NO_FWD",
+        "PEBS": "1",
+        "PEBScounters": "0,1,2,3",
+        "SampleAfterValue": "20011",
+        "UMask": "0x2"
+    },
+    {
+        "BriefDescription": "Number of completed demand load requests that missed the L1, but hit the FB(fill buffer), because a preceding miss to the same cacheline initiated the line to be brought into L1, but data is not yet ready in L1.",
+        "CollectPEBSRecord": "2",
+        "Counter": "0,1,2,3",
+        "Data_LA": "1",
+        "EventCode": "0xd1",
+        "EventName": "MEM_LOAD_RETIRED.FB_HIT",
+        "PEBS": "1",
+        "PEBScounters": "0,1,2,3",
+        "PublicDescription": "Counts retired load instructions with at least one uop was load missed in L1 but hit FB (Fill Buffers) due to preceding miss to the same cache line with data not ready.",
+        "SampleAfterValue": "100007",
+        "UMask": "0x40"
+    },
+    {
+        "BriefDescription": "Retired load instructions with L1 cache hits as data sources",
+        "CollectPEBSRecord": "2",
+        "Counter": "0,1,2,3",
+        "Data_LA": "1",
+        "EventCode": "0xd1",
+        "EventName": "MEM_LOAD_RETIRED.L1_HIT",
+        "PEBS": "1",
+        "PEBScounters": "0,1,2,3",
+        "PublicDescription": "Counts retired load instructions with at least one uop that hit in the L1 data cache. This event includes all SW prefetches and lock instructions regardless of the data source.",
+        "SampleAfterValue": "1000003",
+        "UMask": "0x1"
+    },
+    {
+        "BriefDescription": "Retired load instructions missed L1 cache as data sources",
+        "CollectPEBSRecord": "2",
+        "Counter": "0,1,2,3",
+        "Data_LA": "1",
+        "EventCode": "0xd1",
+        "EventName": "MEM_LOAD_RETIRED.L1_MISS",
+        "PEBS": "1",
+        "PEBScounters": "0,1,2,3",
+        "PublicDescription": "Counts retired load instructions with at least one uop that missed in the L1 cache.",
+        "SampleAfterValue": "200003",
+        "UMask": "0x8"
+    },
+    {
+        "BriefDescription": "Retired load instructions with L2 cache hits as data sources",
+        "CollectPEBSRecord": "2",
+        "Counter": "0,1,2,3",
+        "Data_LA": "1",
+        "EventCode": "0xd1",
+        "EventName": "MEM_LOAD_RETIRED.L2_HIT",
+        "PEBS": "1",
+        "PEBScounters": "0,1,2,3",
+        "PublicDescription": "Counts retired load instructions with L2 cache hits as data sources.",
+        "SampleAfterValue": "200003",
+        "UMask": "0x2"
+    },
+    {
+        "BriefDescription": "Retired load instructions missed L2 cache as data sources",
+        "CollectPEBSRecord": "2",
+        "Counter": "0,1,2,3",
+        "Data_LA": "1",
+        "EventCode": "0xd1",
+        "EventName": "MEM_LOAD_RETIRED.L2_MISS",
+        "PEBS": "1",
+        "PEBScounters": "0,1,2,3",
+        "PublicDescription": "Counts retired load instructions missed L2 cache as data sources.",
+        "SampleAfterValue": "100021",
+        "UMask": "0x10"
+    },
+    {
+        "BriefDescription": "Retired load instructions with L3 cache hits as data sources",
+        "CollectPEBSRecord": "2",
+        "Counter": "0,1,2,3",
+        "Data_LA": "1",
+        "EventCode": "0xd1",
+        "EventName": "MEM_LOAD_RETIRED.L3_HIT",
+        "PEBS": "1",
+        "PEBScounters": "0,1,2,3",
+        "PublicDescription": "Counts retired load instructions with at least one uop that hit in the L3 cache.",
+        "SampleAfterValue": "100021",
+        "UMask": "0x4"
+    },
+    {
+        "BriefDescription": "Retired load instructions missed L3 cache as data sources",
+        "CollectPEBSRecord": "2",
+        "Counter": "0,1,2,3",
+        "Data_LA": "1",
+        "EventCode": "0xd1",
+        "EventName": "MEM_LOAD_RETIRED.L3_MISS",
+        "PEBS": "1",
+        "PEBScounters": "0,1,2,3",
+        "PublicDescription": "Counts retired load instructions with at least one uop that missed in the L3 cache.",
+        "SampleAfterValue": "50021",
+        "UMask": "0x20"
+    },
+    {
+        "BriefDescription": "Demand and prefetch data reads",
+        "CollectPEBSRecord": "2",
+        "Counter": "0,1,2,3",
+        "EventCode": "0xb0",
+        "EventName": "OFFCORE_REQUESTS.ALL_DATA_RD",
+        "PEBScounters": "0,1,2,3",
+        "PublicDescription": "Counts the demand and prefetch data reads. All Core Data Reads include cacheable 'Demands' and L2 prefetchers (not L3 prefetchers). Counting also covers reads due to page walks resulted from any request type.",
+        "SampleAfterValue": "100003",
+        "UMask": "0x8"
+    },
+    {
+        "BriefDescription": "Any memory transaction that reached the SQ.",
+        "CollectPEBSRecord": "2",
+        "Counter": "0,1,2,3",
+        "EventCode": "0xb0",
+        "EventName": "OFFCORE_REQUESTS.ALL_REQUESTS",
+        "PEBScounters": "0,1,2,3",
+        "PublicDescription": "Counts memory transactions reached the super queue including requests initiated by the core, all L3 prefetches, page walks, etc..",
+        "SampleAfterValue": "100003",
+        "UMask": "0x80"
+    },
+    {
+        "BriefDescription": "Demand Data Read requests sent to uncore",
+        "CollectPEBSRecord": "2",
+        "Counter": "0,1,2,3",
+        "EventCode": "0xb0",
+        "EventName": "OFFCORE_REQUESTS.DEMAND_DATA_RD",
+        "PEBScounters": "0,1,2,3",
+        "PublicDescription": "Counts the Demand Data Read requests sent to uncore. Use it in conjunction with OFFCORE_REQUESTS_OUTSTANDING to determine average latency in the uncore.",
+        "SampleAfterValue": "100003",
+        "UMask": "0x1"
+    },
+    {
+        "BriefDescription": "Demand RFO requests including regular RFOs, locks, ItoM",
+        "CollectPEBSRecord": "2",
+        "Counter": "0,1,2,3",
+        "EventCode": "0xb0",
+        "EventName": "OFFCORE_REQUESTS.DEMAND_RFO",
+        "PEBScounters": "0,1,2,3",
+        "PublicDescription": "Counts the demand RFO (read for ownership) requests including regular RFOs, locks, ItoM.",
+        "SampleAfterValue": "100003",
+        "UMask": "0x4"
+    },
+    {
+        "BriefDescription": "Offcore outstanding cacheable Core Data Read transactions in SuperQueue (SQ), queue to uncore",
+        "CollectPEBSRecord": "2",
+        "Counter": "0,1,2,3",
+        "EventCode": "0x60",
+        "EventName": "OFFCORE_REQUESTS_OUTSTANDING.ALL_DATA_RD",
+        "PEBScounters": "0,1,2,3",
+        "PublicDescription": "Counts the number of offcore outstanding cacheable Core Data Read transactions in the super queue every cycle. A transaction is considered to be in the Offcore outstanding state between L2 miss and transaction completion sent to requestor (SQ de-allocation). See corresponding Umask under OFFCORE_REQUESTS.",
+        "SampleAfterValue": "1000003",
+        "UMask": "0x8"
+    },
+    {
+        "BriefDescription": "Cycles when offcore outstanding cacheable Core Data Read transactions are present in SuperQueue (SQ), queue to uncore.",
+        "CollectPEBSRecord": "2",
+        "Counter": "0,1,2,3",
+        "CounterMask": "1",
+        "EventCode": "0x60",
+        "EventName": "OFFCORE_REQUESTS_OUTSTANDING.CYCLES_WITH_DATA_RD",
+        "PEBScounters": "0,1,2,3",
+        "PublicDescription": "Counts cycles when offcore outstanding cacheable Core Data Read transactions are present in the super queue. A transaction is considered to be in the Offcore outstanding state between L2 miss and transaction completion sent to requestor (SQ de-allocation). See corresponding Umask under OFFCORE_REQUESTS.",
+        "SampleAfterValue": "1000003",
+        "UMask": "0x8"
+    },
+    {
+        "BriefDescription": "Cycles when offcore outstanding Demand Data Read transactions are present in SuperQueue (SQ), queue to uncore",
+        "CollectPEBSRecord": "2",
+        "Counter": "0,1,2,3",
+        "CounterMask": "1",
+        "EventCode": "0x60",
+        "EventName": "OFFCORE_REQUESTS_OUTSTANDING.CYCLES_WITH_DEMAND_DATA_RD",
+        "PEBScounters": "0,1,2,3",
+        "PublicDescription": "Counts cycles when offcore outstanding Demand Data Read transactions are present in the super queue (SQ). A transaction is considered to be in the Offcore outstanding state between L2 miss and transaction completion sent to requestor (SQ de-allocation).",
+        "SampleAfterValue": "2000003",
+        "UMask": "0x1"
+    },
+    {
+        "BriefDescription": "Cycles with offcore outstanding demand rfo reads transactions in SuperQueue (SQ), queue to uncore.",
+        "CollectPEBSRecord": "2",
+        "Counter": "0,1,2,3",
+        "CounterMask": "1",
+        "EventCode": "0x60",
+        "EventName": "OFFCORE_REQUESTS_OUTSTANDING.CYCLES_WITH_DEMAND_RFO",
+        "PEBScounters": "0,1,2,3",
+        "PublicDescription": "Counts the number of offcore outstanding demand rfo Reads transactions in the super queue every cycle. The 'Offcore outstanding' state of the transaction lasts from the L2 miss until the sending transaction completion to requestor (SQ deallocation). See the corresponding Umask under OFFCORE_REQUESTS.",
+        "SampleAfterValue": "1000003",
+        "UMask": "0x4"
+    },
+    {
+        "BriefDescription": "Demand Data Read transactions pending for off-core. Highly correlated.",
+        "CollectPEBSRecord": "2",
+        "Counter": "0,1,2,3",
+        "EventCode": "0x60",
+        "EventName": "OFFCORE_REQUESTS_OUTSTANDING.DEMAND_DATA_RD",
+        "PEBScounters": "0,1,2,3",
+        "PublicDescription": "Counts the number of off-core outstanding Demand Data Read transactions every cycle. A transaction is considered to be in the Off-core outstanding state between L2 cache miss and data-return to the core.",
+        "SampleAfterValue": "1000003",
+        "UMask": "0x1"
+    },
+    {
+        "BriefDescription": "Cycles with at least 6 offcore outstanding Demand Data Read transactions in uncore queue.",
+        "CollectPEBSRecord": "2",
+        "Counter": "0,1,2,3",
+        "CounterMask": "6",
+        "EventCode": "0x60",
+        "EventName": "OFFCORE_REQUESTS_OUTSTANDING.DEMAND_DATA_RD_GE_6",
+        "PEBScounters": "0,1,2,3",
+        "SampleAfterValue": "2000003",
+        "UMask": "0x1"
+    },
+    {
+        "BriefDescription": "Store Read transactions pending for off-core. Highly correlated.",
+        "CollectPEBSRecord": "2",
+        "Counter": "0,1,2,3",
+        "EventCode": "0x60",
+        "EventName": "OFFCORE_REQUESTS_OUTSTANDING.DEMAND_RFO",
+        "PEBScounters": "0,1,2,3",
+        "PublicDescription": "Counts the number of off-core outstanding read-for-ownership (RFO) store transactions every cycle. An RFO transaction is considered to be in the Off-core outstanding state between L2 cache miss and transaction completion.",
+        "SampleAfterValue": "1000003",
+        "UMask": "0x4"
+    },
+    {
+        "BriefDescription": "Cycles the superQ cannot take any more entries.",
+        "CollectPEBSRecord": "2",
+        "Counter": "0,1,2,3",
+        "EventCode": "0xf4",
+        "EventName": "SQ_MISC.SQ_FULL",
+        "PEBScounters": "0,1,2,3",
+        "PublicDescription": "Counts the cycles for which the thread is active and the superQ cannot take any more entries.",
+        "SampleAfterValue": "100003",
+        "UMask": "0x4"
+    }
+]
+\ No newline at end of file
diff --git a/tools/perf/pmu-events/arch/x86/tigerlake/floating-point.json b/tools/perf/pmu-events/arch/x86/tigerlake/floating-point.json
new file mode 100644
index 000000000000..402f01851313
--- /dev/null
+++ b/tools/perf/pmu-events/arch/x86/tigerlake/floating-point.json
@@ -0,0 +1,94 @@
+[
+    {
+        "BriefDescription": "Counts all microcode FP assists.",
+        "CollectPEBSRecord": "2",
+        "Counter": "0,1,2,3,4,5,6,7",
+        "EventCode": "0xc1",
+        "EventName": "ASSISTS.FP",
+        "PEBScounters": "0,1,2,3,4,5,6,7",
+        "PublicDescription": "Counts all microcode Floating Point assists.",
+        "SampleAfterValue": "100003",
+        "UMask": "0x2"
+    },
+    {
+        "BriefDescription": "Counts number of SSE/AVX computational 128-bit packed double precision floating-point instructions retired; some instructions will count twice as noted below.  Each count represents 2 computation operations, one for each element.  Applies to SSE* and AVX* packed double precision floating-point instructions: ADD SUB HADD HSUB SUBADD MUL DIV MIN MAX SQRT DPP FM(N)ADD/SUB.  DPP and FM(N)ADD/SUB instructions count twice as they perform 2 calculations per element.",
+        "CollectPEBSRecord": "2",
+        "Counter": "0,1,2,3,4,5,6,7",
+        "EventCode": "0xc7",
+        "EventName": "FP_ARITH_INST_RETIRED.128B_PACKED_DOUBLE",
+        "PEBScounters": "0,1,2,3,4,5,6,7",
+        "SampleAfterValue": "100003",
+        "UMask": "0x4"
+    },
+    {
+        "BriefDescription": "Number of SSE/AVX computational 128-bit packed single precision floating-point instructions retired; some instructions will count twice as noted below.  Each count represents 4 computation operations, one for each element.  Applies to SSE* and AVX* packed single precision floating-point instructions: ADD SUB MUL DIV MIN MAX RCP14 RSQRT14 SQRT DPP FM(N)ADD/SUB.  DPP and FM(N)ADD/SUB instructions count twice as they perform 2 calculations per element.",
+        "CollectPEBSRecord": "2",
+        "Counter": "0,1,2,3,4,5,6,7",
+        "EventCode": "0xc7",
+        "EventName": "FP_ARITH_INST_RETIRED.128B_PACKED_SINGLE",
+        "PEBScounters": "0,1,2,3,4,5,6,7",
+        "PublicDescription": "Counts number of SSE/AVX computational 128-bit packed single precision floating-point instructions retired; some instructions will count twice as noted below.  Each count represents 4 computation operations, one for each element.  Applies to SSE* and AVX* packed single precision floating-point instructions: ADD SUB HADD HSUB SUBADD MUL DIV MIN MAX SQRT RSQRT RCP DPP FM(N)ADD/SUB.  DPP and FM(N)ADD/SUB instructions count twice as they perform 2 calculations per element.",
+        "SampleAfterValue": "100003",
+        "UMask": "0x8"
+    },
+    {
+        "BriefDescription": "Counts number of SSE/AVX computational 256-bit packed double precision floating-point instructions retired; some instructions will count twice as noted below.  Each count represents 4 computation operations, one for each element.  Applies to SSE* and AVX* packed double precision floating-point instructions: ADD SUB HADD HSUB SUBADD MUL DIV MIN MAX SQRT FM(N)ADD/SUB.  FM(N)ADD/SUB instructions count twice as they perform 2 calculations per element.",
+        "CollectPEBSRecord": "2",
+        "Counter": "0,1,2,3,4,5,6,7",
+        "EventCode": "0xc7",
+        "EventName": "FP_ARITH_INST_RETIRED.256B_PACKED_DOUBLE",
+        "PEBScounters": "0,1,2,3,4,5,6,7",
+        "SampleAfterValue": "100003",
+        "UMask": "0x10"
+    },
+    {
+        "BriefDescription": "Counts number of SSE/AVX computational 256-bit packed single precision floating-point instructions retired; some instructions will count twice as noted below.  Each count represents 8 computation operations, one for each element.  Applies to SSE* and AVX* packed single precision floating-point instructions: ADD SUB HADD HSUB SUBADD MUL DIV MIN MAX SQRT RSQRT RCP DPP FM(N)ADD/SUB.  DPP and FM(N)ADD/SUB instructions count twice as they perform 2 calculations per element.",
+        "CollectPEBSRecord": "2",
+        "Counter": "0,1,2,3,4,5,6,7",
+        "EventCode": "0xc7",
+        "EventName": "FP_ARITH_INST_RETIRED.256B_PACKED_SINGLE",
+        "PEBScounters": "0,1,2,3,4,5,6,7",
+        "SampleAfterValue": "100003",
+        "UMask": "0x20"
+    },
+    {
+        "BriefDescription": "Counts number of SSE/AVX computational 512-bit packed double precision floating-point instructions retired; some instructions will count twice as noted below.  Each count represents 8 computation operations, one for each element.  Applies to SSE* and AVX* packed double precision floating-point instructions: ADD SUB MUL DIV MIN MAX SQRT RSQRT14 RCP14 FM(N)ADD/SUB. FM(N)ADD/SUB instructions count twice as they perform 2 calculations per element.",
+        "CollectPEBSRecord": "2",
+        "Counter": "0,1,2,3,4,5,6,7",
+        "EventCode": "0xc7",
+        "EventName": "FP_ARITH_INST_RETIRED.512B_PACKED_DOUBLE",
+        "PEBScounters": "0,1,2,3,4,5,6,7",
+        "SampleAfterValue": "100003",
+        "UMask": "0x40"
+    },
+    {
+        "BriefDescription": "Counts number of SSE/AVX computational 512-bit packed double precision floating-point instructions retired; some instructions will count twice as noted below.  Each count represents 16 computation operations, one for each element.  Applies to SSE* and AVX* packed double precision floating-point instructions: ADD SUB MUL DIV MIN MAX SQRT RSQRT14 RCP14 FM(N)ADD/SUB. FM(N)ADD/SUB instructions count twice as they perform 2 calculations per element.",
+        "CollectPEBSRecord": "2",
+        "Counter": "0,1,2,3,4,5,6,7",
+        "EventCode": "0xc7",
+        "EventName": "FP_ARITH_INST_RETIRED.512B_PACKED_SINGLE",
+        "PEBScounters": "0,1,2,3,4,5,6,7",
+        "SampleAfterValue": "100003",
+        "UMask": "0x80"
+    },
+    {
+        "BriefDescription": "Counts number of SSE/AVX computational scalar double precision floating-point instructions retired; some instructions will count twice as noted below.  Each count represents 1 computational operation. Applies to SSE* and AVX* scalar double precision floating-point instructions: ADD SUB MUL DIV MIN MAX SQRT FM(N)ADD/SUB.  FM(N)ADD/SUB instructions count twice as they perform 2 calculations per element.",
+        "CollectPEBSRecord": "2",
+        "Counter": "0,1,2,3,4,5,6,7",
+        "EventCode": "0xc7",
+        "EventName": "FP_ARITH_INST_RETIRED.SCALAR_DOUBLE",
+        "PEBScounters": "0,1,2,3,4,5,6,7",
+        "SampleAfterValue": "100003",
+        "UMask": "0x1"
+    },
+    {
+        "BriefDescription": "Counts number of SSE/AVX computational scalar single precision floating-point instructions retired; some instructions will count twice as noted below.  Each count represents 1 computational operation. Applies to SSE* and AVX* scalar single precision floating-point instructions: ADD SUB MUL DIV MIN MAX SQRT RSQRT RCP FM(N)ADD/SUB.  FM(N)ADD/SUB instructions count twice as they perform 2 calculations per element.",
+        "CollectPEBSRecord": "2",
+        "Counter": "0,1,2,3,4,5,6,7",
+        "EventCode": "0xc7",
+        "EventName": "FP_ARITH_INST_RETIRED.SCALAR_SINGLE",
+        "PEBScounters": "0,1,2,3,4,5,6,7",
+        "SampleAfterValue": "100003",
+        "UMask": "0x2"
+    }
+]
+\ No newline at end of file
diff --git a/tools/perf/pmu-events/arch/x86/tigerlake/frontend.json b/tools/perf/pmu-events/arch/x86/tigerlake/frontend.json
new file mode 100644
index 000000000000..24c736ac8f8e
--- /dev/null
+++ b/tools/perf/pmu-events/arch/x86/tigerlake/frontend.json
@@ -0,0 +1,463 @@
+[
+    {
+        "BriefDescription": "Counts the total number when the front end is resteered, mainly when the BPU cannot provide a correct prediction and this is corrected by other branch handling mechanisms at the front end.",
+        "CollectPEBSRecord": "2",
+        "Counter": "0,1,2,3",
+        "EventCode": "0xe6",
+        "EventName": "BACLEARS.ANY",
+        "PEBScounters": "0,1,2,3",
+        "PublicDescription": "Counts the number of times the front-end is resteered when it finds a branch instruction in a fetch line. This occurs for the first time a branch instruction is fetched or when the branch is not tracked by the BPU (Branch Prediction Unit) anymore.",
+        "SampleAfterValue": "100003",
+        "UMask": "0x1"
+    },
+    {
+        "BriefDescription": "Decode Stream Buffer (DSB)-to-MITE transitions count.",
+        "CollectPEBSRecord": "2",
+        "Counter": "0,1,2,3",
+        "CounterMask": "1",
+        "EdgeDetect": "1",
+        "EventCode": "0xab",
+        "EventName": "DSB2MITE_SWITCHES.COUNT",
+        "PEBScounters": "0,1,2,3",
+        "PublicDescription": "Counts the number of Decode Stream Buffer (DSB a.k.a. Uop Cache)-to-MITE speculative transitions.",
+        "SampleAfterValue": "100003",
+        "UMask": "0x2"
+    },
+    {
+        "BriefDescription": "DSB-to-MITE switch true penalty cycles.",
+        "CollectPEBSRecord": "2",
+        "Counter": "0,1,2,3",
+        "EventCode": "0xab",
+        "EventName": "DSB2MITE_SWITCHES.PENALTY_CYCLES",
+        "PEBScounters": "0,1,2,3",
+        "PublicDescription": "Decode Stream Buffer (DSB) is a Uop-cache that holds translations of previously fetched instructions that were decoded by the legacy x86 decode pipeline (MITE). This event counts fetch penalty cycles when a transition occurs from DSB to MITE.",
+        "SampleAfterValue": "100003",
+        "UMask": "0x2"
+    },
+    {
+        "BriefDescription": "Retired Instructions who experienced DSB miss.",
+        "CollectPEBSRecord": "2",
+        "Counter": "0,1,2,3,4,5,6,7",
+        "EventCode": "0xc6",
+        "EventName": "FRONTEND_RETIRED.DSB_MISS",
+        "MSRIndex": "0x3F7",
+        "MSRValue": "0x11",
+        "PEBS": "1",
+        "PEBScounters": "0,1,2,3,4,5,6,7",
+        "PublicDescription": "Counts retired Instructions that experienced DSB (Decode stream buffer i.e. the decoded instruction-cache) miss.",
+        "SampleAfterValue": "100007",
+        "TakenAlone": "1",
+        "UMask": "0x1"
+    },
+    {
+        "BriefDescription": "Retired Instructions who experienced iTLB true miss.",
+        "CollectPEBSRecord": "2",
+        "Counter": "0,1,2,3,4,5,6,7",
+        "EventCode": "0xc6",
+        "EventName": "FRONTEND_RETIRED.ITLB_MISS",
+        "MSRIndex": "0x3F7",
+        "MSRValue": "0x14",
+        "PEBS": "1",
+        "PEBScounters": "0,1,2,3,4,5,6,7",
+        "PublicDescription": "Counts retired Instructions that experienced iTLB (Instruction TLB) true miss.",
+        "SampleAfterValue": "100007",
+        "TakenAlone": "1",
+        "UMask": "0x1"
+    },
+    {
+        "BriefDescription": "Retired Instructions who experienced Instruction L1 Cache true miss.",
+        "CollectPEBSRecord": "2",
+        "Counter": "0,1,2,3,4,5,6,7",
+        "EventCode": "0xc6",
+        "EventName": "FRONTEND_RETIRED.L1I_MISS",
+        "MSRIndex": "0x3F7",
+        "MSRValue": "0x12",
+        "PEBS": "1",
+        "PEBScounters": "0,1,2,3,4,5,6,7",
+        "PublicDescription": "Counts retired Instructions who experienced Instruction L1 Cache true miss.",
+        "SampleAfterValue": "100007",
+        "TakenAlone": "1",
+        "UMask": "0x1"
+    },
+    {
+        "BriefDescription": "Retired Instructions who experienced Instruction L2 Cache true miss.",
+        "CollectPEBSRecord": "2",
+        "Counter": "0,1,2,3,4,5,6,7",
+        "EventCode": "0xc6",
+        "EventName": "FRONTEND_RETIRED.L2_MISS",
+        "MSRIndex": "0x3F7",
+        "MSRValue": "0x13",
+        "PEBS": "1",
+        "PEBScounters": "0,1,2,3,4,5,6,7",
+        "PublicDescription": "Counts retired Instructions who experienced Instruction L2 Cache true miss.",
+        "SampleAfterValue": "100007",
+        "TakenAlone": "1",
+        "UMask": "0x1"
+    },
+    {
+        "BriefDescription": "Retired instructions after front-end starvation of at least 1 cycle",
+        "CollectPEBSRecord": "2",
+        "Counter": "0,1,2,3,4,5,6,7",
+        "EventCode": "0xc6",
+        "EventName": "FRONTEND_RETIRED.LATENCY_GE_1",
+        "MSRIndex": "0x3F7",
+        "MSRValue": "0x500106",
+        "PEBS": "1",
+        "PEBScounters": "0,1,2,3,4,5,6,7",
+        "PublicDescription": "Retired instructions that are fetched after an interval where the front-end delivered no uops for a period of at least 1 cycle which was not interrupted by a back-end stall.",
+        "SampleAfterValue": "100007",
+        "TakenAlone": "1",
+        "UMask": "0x1"
+    },
+    {
+        "BriefDescription": "Retired instructions that are fetched after an interval where the front-end delivered no uops for a period of 128 cycles which was not interrupted by a back-end stall.",
+        "CollectPEBSRecord": "2",
+        "Counter": "0,1,2,3,4,5,6,7",
+        "EventCode": "0xc6",
+        "EventName": "FRONTEND_RETIRED.LATENCY_GE_128",
+        "MSRIndex": "0x3F7",
+        "MSRValue": "0x508006",
+        "PEBS": "1",
+        "PEBScounters": "0,1,2,3,4,5,6,7",
+        "PublicDescription": "Counts retired instructions that are fetched after an interval where the front-end delivered no uops for a period of 128 cycles which was not interrupted by a back-end stall.",
+        "SampleAfterValue": "100007",
+        "TakenAlone": "1",
+        "UMask": "0x1"
+    },
+    {
+        "BriefDescription": "Retired instructions that are fetched after an interval where the front-end delivered no uops for a period of 16 cycles which was not interrupted by a back-end stall.",
+        "CollectPEBSRecord": "2",
+        "Counter": "0,1,2,3,4,5,6,7",
+        "EventCode": "0xc6",
+        "EventName": "FRONTEND_RETIRED.LATENCY_GE_16",
+        "MSRIndex": "0x3F7",
+        "MSRValue": "0x501006",
+        "PEBS": "1",
+        "PEBScounters": "0,1,2,3,4,5,6,7",
+        "PublicDescription": "Counts retired instructions that are delivered to the back-end after a front-end stall of at least 16 cycles. During this period the front-end delivered no uops.",
+        "SampleAfterValue": "100007",
+        "TakenAlone": "1",
+        "UMask": "0x1"
+    },
+    {
+        "BriefDescription": "Retired instructions after front-end starvation of at least 2 cycles",
+        "CollectPEBSRecord": "2",
+        "Counter": "0,1,2,3,4,5,6,7",
+        "EventCode": "0xc6",
+        "EventName": "FRONTEND_RETIRED.LATENCY_GE_2",
+        "MSRIndex": "0x3F7",
+        "MSRValue": "0x500206",
+        "PEBS": "1",
+        "PEBScounters": "0,1,2,3,4,5,6,7",
+        "PublicDescription": "Retired instructions that are fetched after an interval where the front-end delivered no uops for a period of at least 2 cycles which was not interrupted by a back-end stall.",
+        "SampleAfterValue": "100007",
+        "TakenAlone": "1",
+        "UMask": "0x1"
+    },
+    {
+        "BriefDescription": "Retired instructions that are fetched after an interval where the front-end delivered no uops for a period of 256 cycles which was not interrupted by a back-end stall.",
+        "CollectPEBSRecord": "2",
+        "Counter": "0,1,2,3,4,5,6,7",
+        "EventCode": "0xc6",
+        "EventName": "FRONTEND_RETIRED.LATENCY_GE_256",
+        "MSRIndex": "0x3F7",
+        "MSRValue": "0x510006",
+        "PEBS": "1",
+        "PEBScounters": "0,1,2,3,4,5,6,7",
+        "PublicDescription": "Counts retired instructions that are fetched after an interval where the front-end delivered no uops for a period of 256 cycles which was not interrupted by a back-end stall.",
+        "SampleAfterValue": "100007",
+        "TakenAlone": "1",
+        "UMask": "0x1"
+    },
+    {
+        "BriefDescription": "Retired instructions that are fetched after an interval where the front-end had at least 1 bubble-slot for a period of 2 cycles which was not interrupted by a back-end stall.",
+        "CollectPEBSRecord": "2",
+        "Counter": "0,1,2,3,4,5,6,7",
+        "EventCode": "0xc6",
+        "EventName": "FRONTEND_RETIRED.LATENCY_GE_2_BUBBLES_GE_1",
+        "MSRIndex": "0x3F7",
+        "MSRValue": "0x100206",
+        "PEBS": "1",
+        "PEBScounters": "0,1,2,3,4,5,6,7",
+        "PublicDescription": "Counts retired instructions that are delivered to the back-end after the front-end had at least 1 bubble-slot for a period of 2 cycles. A bubble-slot is an empty issue-pipeline slot while there was no RAT stall.",
+        "SampleAfterValue": "100007",
+        "TakenAlone": "1",
+        "UMask": "0x1"
+    },
+    {
+        "BriefDescription": "Retired instructions that are fetched after an interval where the front-end delivered no uops for a period of 32 cycles which was not interrupted by a back-end stall.",
+        "CollectPEBSRecord": "2",
+        "Counter": "0,1,2,3,4,5,6,7",
+        "EventCode": "0xc6",
+        "EventName": "FRONTEND_RETIRED.LATENCY_GE_32",
+        "MSRIndex": "0x3F7",
+        "MSRValue": "0x502006",
+        "PEBS": "1",
+        "PEBScounters": "0,1,2,3,4,5,6,7",
+        "PublicDescription": "Counts retired instructions that are delivered to the back-end after a front-end stall of at least 32 cycles. During this period the front-end delivered no uops.",
+        "SampleAfterValue": "100007",
+        "TakenAlone": "1",
+        "UMask": "0x1"
+    },
+    {
+        "BriefDescription": "Retired instructions that are fetched after an interval where the front-end delivered no uops for a period of 4 cycles which was not interrupted by a back-end stall.",
+        "CollectPEBSRecord": "2",
+        "Counter": "0,1,2,3,4,5,6,7",
+        "EventCode": "0xc6",
+        "EventName": "FRONTEND_RETIRED.LATENCY_GE_4",
+        "MSRIndex": "0x3F7",
+        "MSRValue": "0x500406",
+        "PEBS": "1",
+        "PEBScounters": "0,1,2,3,4,5,6,7",
+        "PublicDescription": "Counts retired instructions that are fetched after an interval where the front-end delivered no uops for a period of 4 cycles which was not interrupted by a back-end stall.",
+        "SampleAfterValue": "100007",
+        "TakenAlone": "1",
+        "UMask": "0x1"
+    },
+    {
+        "BriefDescription": "Retired instructions that are fetched after an interval where the front-end delivered no uops for a period of 512 cycles which was not interrupted by a back-end stall.",
+        "CollectPEBSRecord": "2",
+        "Counter": "0,1,2,3,4,5,6,7",
+        "EventCode": "0xc6",
+        "EventName": "FRONTEND_RETIRED.LATENCY_GE_512",
+        "MSRIndex": "0x3F7",
+        "MSRValue": "0x520006",
+        "PEBS": "1",
+        "PEBScounters": "0,1,2,3,4,5,6,7",
+        "PublicDescription": "Counts retired instructions that are fetched after an interval where the front-end delivered no uops for a period of 512 cycles which was not interrupted by a back-end stall.",
+        "SampleAfterValue": "100007",
+        "TakenAlone": "1",
+        "UMask": "0x1"
+    },
+    {
+        "BriefDescription": "Retired instructions that are fetched after an interval where the front-end delivered no uops for a period of 64 cycles which was not interrupted by a back-end stall.",
+        "CollectPEBSRecord": "2",
+        "Counter": "0,1,2,3,4,5,6,7",
+        "EventCode": "0xc6",
+        "EventName": "FRONTEND_RETIRED.LATENCY_GE_64",
+        "MSRIndex": "0x3F7",
+        "MSRValue": "0x504006",
+        "PEBS": "1",
+        "PEBScounters": "0,1,2,3,4,5,6,7",
+        "PublicDescription": "Counts retired instructions that are fetched after an interval where the front-end delivered no uops for a period of 64 cycles which was not interrupted by a back-end stall.",
+        "SampleAfterValue": "100007",
+        "TakenAlone": "1",
+        "UMask": "0x1"
+    },
+    {
+        "BriefDescription": "Retired instructions that are fetched after an interval where the front-end delivered no uops for a period of 8 cycles which was not interrupted by a back-end stall.",
+        "CollectPEBSRecord": "2",
+        "Counter": "0,1,2,3,4,5,6,7",
+        "EventCode": "0xc6",
+        "EventName": "FRONTEND_RETIRED.LATENCY_GE_8",
+        "MSRIndex": "0x3F7",
+        "MSRValue": "0x500806",
+        "PEBS": "1",
+        "PEBScounters": "0,1,2,3,4,5,6,7",
+        "PublicDescription": "Counts retired instructions that are delivered to the back-end after a front-end stall of at least 8 cycles. During this period the front-end delivered no uops.",
+        "SampleAfterValue": "100007",
+        "TakenAlone": "1",
+        "UMask": "0x1"
+    },
+    {
+        "BriefDescription": "Retired Instructions who experienced STLB (2nd level TLB) true miss.",
+        "CollectPEBSRecord": "2",
+        "Counter": "0,1,2,3,4,5,6,7",
+        "EventCode": "0xc6",
+        "EventName": "FRONTEND_RETIRED.STLB_MISS",
+        "MSRIndex": "0x3F7",
+        "MSRValue": "0x15",
+        "PEBS": "1",
+        "PEBScounters": "0,1,2,3,4,5,6,7",
+        "PublicDescription": "Counts retired Instructions that experienced STLB (2nd level TLB) true miss.",
+        "SampleAfterValue": "100007",
+        "TakenAlone": "1",
+        "UMask": "0x1"
+    },
+    {
+        "BriefDescription": "Cycles where a code fetch is stalled due to L1 instruction cache miss.",
+        "CollectPEBSRecord": "2",
+        "Counter": "0,1,2,3",
+        "EventCode": "0x80",
+        "EventName": "ICACHE_16B.IFDATA_STALL",
+        "PEBScounters": "0,1,2,3",
+        "PublicDescription": "Counts cycles where a code line fetch is stalled due to an L1 instruction cache miss. The legacy decode pipeline works at a 16 Byte granularity.",
+        "SampleAfterValue": "500009",
+        "UMask": "0x4"
+    },
+    {
+        "BriefDescription": "Instruction fetch tag lookups that hit in the instruction cache (L1I). Counts at 64-byte cache-line granularity.",
+        "CollectPEBSRecord": "2",
+        "Counter": "0,1,2,3",
+        "EventCode": "0x83",
+        "EventName": "ICACHE_64B.IFTAG_HIT",
+        "PEBScounters": "0,1,2,3",
+        "PublicDescription": "Counts instruction fetch tag lookups that hit in the instruction cache (L1I). Counts at 64-byte cache-line granularity. Accounts for both cacheable and uncacheable accesses.",
+        "SampleAfterValue": "200003",
+        "UMask": "0x1"
+    },
+    {
+        "BriefDescription": "Instruction fetch tag lookups that miss in the instruction cache (L1I). Counts at 64-byte cache-line granularity.",
+        "CollectPEBSRecord": "2",
+        "Counter": "0,1,2,3",
+        "EventCode": "0x83",
+        "EventName": "ICACHE_64B.IFTAG_MISS",
+        "PEBScounters": "0,1,2,3",
+        "PublicDescription": "Counts instruction fetch tag lookups that miss in the instruction cache (L1I). Counts at 64-byte cache-line granularity. Accounts for both cacheable and uncacheable accesses.",
+        "SampleAfterValue": "200003",
+        "UMask": "0x2"
+    },
+    {
+        "BriefDescription": "Cycles where a code fetch is stalled due to L1 instruction cache tag miss.",
+        "CollectPEBSRecord": "2",
+        "Counter": "0,1,2,3",
+        "EventCode": "0x83",
+        "EventName": "ICACHE_64B.IFTAG_STALL",
+        "PEBScounters": "0,1,2,3",
+        "PublicDescription": "Counts cycles where a code fetch is stalled due to L1 instruction cache tag miss.",
+        "SampleAfterValue": "200003",
+        "UMask": "0x4"
+    },
+    {
+        "BriefDescription": "Cycles Decode Stream Buffer (DSB) is delivering any Uop",
+        "CollectPEBSRecord": "2",
+        "Counter": "0,1,2,3",
+        "CounterMask": "1",
+        "EventCode": "0x79",
+        "EventName": "IDQ.DSB_CYCLES_ANY",
+        "PEBScounters": "0,1,2,3",
+        "PublicDescription": "Counts the number of cycles uops were delivered to Instruction Decode Queue (IDQ) from the Decode Stream Buffer (DSB) path.",
+        "SampleAfterValue": "2000003",
+        "UMask": "0x8"
+    },
+    {
+        "BriefDescription": "Cycles DSB is delivering optimal number of Uops",
+        "CollectPEBSRecord": "2",
+        "Counter": "0,1,2,3",
+        "CounterMask": "5",
+        "EventCode": "0x79",
+        "EventName": "IDQ.DSB_CYCLES_OK",
+        "PEBScounters": "0,1,2,3",
+        "PublicDescription": "Counts the number of cycles where optimal number of uops was delivered to the Instruction Decode Queue (IDQ) from the MITE (legacy decode pipeline) path. During these cycles uops are not being delivered from the Decode Stream Buffer (DSB).",
+        "SampleAfterValue": "2000003",
+        "UMask": "0x8"
+    },
+    {
+        "BriefDescription": "Uops delivered to Instruction Decode Queue (IDQ) from the Decode Stream Buffer (DSB) path",
+        "CollectPEBSRecord": "2",
+        "Counter": "0,1,2,3",
+        "EventCode": "0x79",
+        "EventName": "IDQ.DSB_UOPS",
+        "PEBScounters": "0,1,2,3",
+        "PublicDescription": "Counts the number of uops delivered to Instruction Decode Queue (IDQ) from the Decode Stream Buffer (DSB) path.",
+        "SampleAfterValue": "2000003",
+        "UMask": "0x8"
+    },
+    {
+        "BriefDescription": "Cycles MITE is delivering any Uop",
+        "CollectPEBSRecord": "2",
+        "Counter": "0,1,2,3",
+        "CounterMask": "1",
+        "EventCode": "0x79",
+        "EventName": "IDQ.MITE_CYCLES_ANY",
+        "PEBScounters": "0,1,2,3",
+        "PublicDescription": "Counts the number of cycles uops were delivered to the Instruction Decode Queue (IDQ) from the MITE (legacy decode pipeline) path. During these cycles uops are not being delivered from the Decode Stream Buffer (DSB).",
+        "SampleAfterValue": "2000003",
+        "UMask": "0x4"
+    },
+    {
+        "BriefDescription": "Cycles MITE is delivering optimal number of Uops",
+        "CollectPEBSRecord": "2",
+        "Counter": "0,1,2,3",
+        "CounterMask": "5",
+        "EventCode": "0x79",
+        "EventName": "IDQ.MITE_CYCLES_OK",
+        "PEBScounters": "0,1,2,3",
+        "PublicDescription": "Counts the number of cycles where optimal number of uops was delivered to the Instruction Decode Queue (IDQ) from the MITE (legacy decode pipeline) path. During these cycles uops are not being delivered from the Decode Stream Buffer (DSB).",
+        "SampleAfterValue": "2000003",
+        "UMask": "0x4"
+    },
+    {
+        "BriefDescription": "Uops delivered to Instruction Decode Queue (IDQ) from MITE path",
+        "CollectPEBSRecord": "2",
+        "Counter": "0,1,2,3",
+        "EventCode": "0x79",
+        "EventName": "IDQ.MITE_UOPS",
+        "PEBScounters": "0,1,2,3",
+        "PublicDescription": "Counts the number of uops delivered to Instruction Decode Queue (IDQ) from the MITE path. This also means that uops are not being delivered from the Decode Stream Buffer (DSB).",
+        "SampleAfterValue": "2000003",
+        "UMask": "0x4"
+    },
+    {
+        "BriefDescription": "Cycles when uops are being delivered to IDQ while MS is busy",
+        "CollectPEBSRecord": "2",
+        "Counter": "0,1,2,3",
+        "CounterMask": "1",
+        "EventCode": "0x79",
+        "EventName": "IDQ.MS_CYCLES_ANY",
+        "PEBScounters": "0,1,2,3",
+        "PublicDescription": "Counts cycles during which uops are being delivered to Instruction Decode Queue (IDQ) while the Microcode Sequencer (MS) is busy. Uops maybe initiated by Decode Stream Buffer (DSB) or MITE.",
+        "SampleAfterValue": "2000003",
+        "UMask": "0x30"
+    },
+    {
+        "BriefDescription": "Number of switches from DSB or MITE to the MS",
+        "CollectPEBSRecord": "2",
+        "Counter": "0,1,2,3",
+        "CounterMask": "1",
+        "EdgeDetect": "1",
+        "EventCode": "0x79",
+        "EventName": "IDQ.MS_SWITCHES",
+        "PEBScounters": "0,1,2,3",
+        "PublicDescription": "Number of switches from DSB (Decode Stream Buffer) or MITE (legacy decode pipeline) to the Microcode Sequencer.",
+        "SampleAfterValue": "100003",
+        "UMask": "0x30"
+    },
+    {
+        "BriefDescription": "Uops delivered to IDQ while MS is busy",
+        "CollectPEBSRecord": "2",
+        "Counter": "0,1,2,3",
+        "EventCode": "0x79",
+        "EventName": "IDQ.MS_UOPS",
+        "PEBScounters": "0,1,2,3",
+        "PublicDescription": "Counts the total number of uops delivered by the Microcode Sequencer (MS). Any instruction over 4 uops will be delivered by the MS. Some instructions such as transcendentals may additionally generate uops from the MS.",
+        "SampleAfterValue": "100003",
+        "UMask": "0x30"
+    },
+    {
+        "BriefDescription": "Uops not delivered by IDQ when backend of the machine is not stalled",
+        "CollectPEBSRecord": "2",
+        "Counter": "0,1,2,3,4,5,6,7",
+        "EventCode": "0x9c",
+        "EventName": "IDQ_UOPS_NOT_DELIVERED.CORE",
+        "PEBScounters": "0,1,2,3,4,5,6,7",
+        "PublicDescription": "Counts the number of uops not delivered to by the Instruction Decode Queue (IDQ) to the back-end of the pipeline when there was no back-end stalls. This event counts for one SMT thread in a given cycle.",
+        "SampleAfterValue": "1000003",
+        "UMask": "0x1"
+    },
+    {
+        "BriefDescription": "Cycles when no uops are not delivered by the IDQ when backend of the machine is not stalled",
+        "CollectPEBSRecord": "2",
+        "Counter": "0,1,2,3,4,5,6,7",
+        "CounterMask": "5",
+        "EventCode": "0x9c",
+        "EventName": "IDQ_UOPS_NOT_DELIVERED.CYCLES_0_UOPS_DELIV.CORE",
+        "PEBScounters": "0,1,2,3,4,5,6,7",
+        "PublicDescription": "Counts the number of cycles when no uops were delivered by the Instruction Decode Queue (IDQ) to the back-end of the pipeline when there was no back-end stalls. This event counts for one SMT thread in a given cycle.",
+        "SampleAfterValue": "1000003",
+        "UMask": "0x1"
+    },
+    {
+        "BriefDescription": "Cycles when optimal number of uops was delivered to the back-end when the back-end is not stalled",
+        "CollectPEBSRecord": "2",
+        "Counter": "0,1,2,3,4,5,6,7",
+        "CounterMask": "1",
+        "EventCode": "0x9c",
+        "EventName": "IDQ_UOPS_NOT_DELIVERED.CYCLES_FE_WAS_OK",
+        "Invert": "1",
+        "PEBScounters": "0,1,2,3,4,5,6,7",
+        "PublicDescription": "Counts the number of cycles when the optimal number of uops were delivered by the Instruction Decode Queue (IDQ) to the back-end of the pipeline when there was no back-end stalls. This event counts for one SMT thread in a given cycle.",
+        "SampleAfterValue": "1000003",
+        "UMask": "0x1"
+    }
+]
+\ No newline at end of file
diff --git a/tools/perf/pmu-events/arch/x86/tigerlake/memory.json b/tools/perf/pmu-events/arch/x86/tigerlake/memory.json
new file mode 100644
index 000000000000..0948de0b160c
--- /dev/null
+++ b/tools/perf/pmu-events/arch/x86/tigerlake/memory.json
@@ -0,0 +1,295 @@
+[
+    {
+        "BriefDescription": "Execution stalls while L3 cache miss demand load is outstanding.",
+        "CollectPEBSRecord": "2",
+        "Counter": "0,1,2,3",
+        "CounterMask": "6",
+        "EventCode": "0xa3",
+        "EventName": "CYCLE_ACTIVITY.STALLS_L3_MISS",
+        "PEBScounters": "0,1,2,3",
+        "SampleAfterValue": "1000003",
+        "UMask": "0x6"
+    },
+    {
+        "BriefDescription": "Number of machine clears due to memory ordering conflicts.",
+        "CollectPEBSRecord": "2",
+        "Counter": "0,1,2,3,4,5,6,7",
+        "EventCode": "0xc3",
+        "EventName": "MACHINE_CLEARS.MEMORY_ORDERING",
+        "PEBScounters": "0,1,2,3,4,5,6,7",
+        "PublicDescription": "Counts the number of Machine Clears detected dye to memory ordering. Memory Ordering Machine Clears may apply when a memory read may not conform to the memory ordering rules of the x86 architecture",
+        "SampleAfterValue": "100003",
+        "UMask": "0x2"
+    },
+    {
+        "BriefDescription": "Counts randomly selected loads when the latency from first dispatch to completion is greater than 128 cycles.",
+        "CollectPEBSRecord": "2",
+        "Counter": "0,1,2,3,4,5,6,7",
+        "Data_LA": "1",
+        "EventCode": "0xcd",
+        "EventName": "MEM_TRANS_RETIRED.LOAD_LATENCY_GT_128",
+        "MSRIndex": "0x3F6",
+        "MSRValue": "0x80",
+        "PEBS": "2",
+        "PEBScounters": "0,1,2,3,4,5,6,7",
+        "PublicDescription": "Counts randomly selected loads when the latency from first dispatch to completion is greater than 128 cycles.  Reported latency may be longer than just the memory latency.",
+        "SampleAfterValue": "1009",
+        "TakenAlone": "1",
+        "UMask": "0x1"
+    },
+    {
+        "BriefDescription": "Counts randomly selected loads when the latency from first dispatch to completion is greater than 16 cycles.",
+        "CollectPEBSRecord": "2",
+        "Counter": "0,1,2,3,4,5,6,7",
+        "Data_LA": "1",
+        "EventCode": "0xcd",
+        "EventName": "MEM_TRANS_RETIRED.LOAD_LATENCY_GT_16",
+        "MSRIndex": "0x3F6",
+        "MSRValue": "0x10",
+        "PEBS": "2",
+        "PEBScounters": "0,1,2,3,4,5,6,7",
+        "PublicDescription": "Counts randomly selected loads when the latency from first dispatch to completion is greater than 16 cycles.  Reported latency may be longer than just the memory latency.",
+        "SampleAfterValue": "20011",
+        "TakenAlone": "1",
+        "UMask": "0x1"
+    },
+    {
+        "BriefDescription": "Counts randomly selected loads when the latency from first dispatch to completion is greater than 256 cycles.",
+        "CollectPEBSRecord": "2",
+        "Counter": "0,1,2,3,4,5,6,7",
+        "Data_LA": "1",
+        "EventCode": "0xcd",
+        "EventName": "MEM_TRANS_RETIRED.LOAD_LATENCY_GT_256",
+        "MSRIndex": "0x3F6",
+        "MSRValue": "0x100",
+        "PEBS": "2",
+        "PEBScounters": "0,1,2,3,4,5,6,7",
+        "PublicDescription": "Counts randomly selected loads when the latency from first dispatch to completion is greater than 256 cycles.  Reported latency may be longer than just the memory latency.",
+        "SampleAfterValue": "503",
+        "TakenAlone": "1",
+        "UMask": "0x1"
+    },
+    {
+        "BriefDescription": "Counts randomly selected loads when the latency from first dispatch to completion is greater than 32 cycles.",
+        "CollectPEBSRecord": "2",
+        "Counter": "0,1,2,3,4,5,6,7",
+        "Data_LA": "1",
+        "EventCode": "0xcd",
+        "EventName": "MEM_TRANS_RETIRED.LOAD_LATENCY_GT_32",
+        "MSRIndex": "0x3F6",
+        "MSRValue": "0x20",
+        "PEBS": "2",
+        "PEBScounters": "0,1,2,3,4,5,6,7",
+        "PublicDescription": "Counts randomly selected loads when the latency from first dispatch to completion is greater than 32 cycles.  Reported latency may be longer than just the memory latency.",
+        "SampleAfterValue": "100007",
+        "TakenAlone": "1",
+        "UMask": "0x1"
+    },
+    {
+        "BriefDescription": "Counts randomly selected loads when the latency from first dispatch to completion is greater than 4 cycles.",
+        "CollectPEBSRecord": "2",
+        "Counter": "0,1,2,3,4,5,6,7",
+        "Data_LA": "1",
+        "EventCode": "0xcd",
+        "EventName": "MEM_TRANS_RETIRED.LOAD_LATENCY_GT_4",
+        "MSRIndex": "0x3F6",
+        "MSRValue": "0x4",
+        "PEBS": "2",
+        "PEBScounters": "0,1,2,3,4,5,6,7",
+        "PublicDescription": "Counts randomly selected loads when the latency from first dispatch to completion is greater than 4 cycles.  Reported latency may be longer than just the memory latency.",
+        "SampleAfterValue": "100003",
+        "TakenAlone": "1",
+        "UMask": "0x1"
+    },
+    {
+        "BriefDescription": "Counts randomly selected loads when the latency from first dispatch to completion is greater than 512 cycles.",
+        "CollectPEBSRecord": "2",
+        "Counter": "0,1,2,3,4,5,6,7",
+        "Data_LA": "1",
+        "EventCode": "0xcd",
+        "EventName": "MEM_TRANS_RETIRED.LOAD_LATENCY_GT_512",
+        "MSRIndex": "0x3F6",
+        "MSRValue": "0x200",
+        "PEBS": "2",
+        "PEBScounters": "0,1,2,3,4,5,6,7",
+        "PublicDescription": "Counts randomly selected loads when the latency from first dispatch to completion is greater than 512 cycles.  Reported latency may be longer than just the memory latency.",
+        "SampleAfterValue": "101",
+        "TakenAlone": "1",
+        "UMask": "0x1"
+    },
+    {
+        "BriefDescription": "Counts randomly selected loads when the latency from first dispatch to completion is greater than 64 cycles.",
+        "CollectPEBSRecord": "2",
+        "Counter": "0,1,2,3,4,5,6,7",
+        "Data_LA": "1",
+        "EventCode": "0xcd",
+        "EventName": "MEM_TRANS_RETIRED.LOAD_LATENCY_GT_64",
+        "MSRIndex": "0x3F6",
+        "MSRValue": "0x40",
+        "PEBS": "2",
+        "PEBScounters": "0,1,2,3,4,5,6,7",
+        "PublicDescription": "Counts randomly selected loads when the latency from first dispatch to completion is greater than 64 cycles.  Reported latency may be longer than just the memory latency.",
+        "SampleAfterValue": "2003",
+        "TakenAlone": "1",
+        "UMask": "0x1"
+    },
+    {
+        "BriefDescription": "Counts randomly selected loads when the latency from first dispatch to completion is greater than 8 cycles.",
+        "CollectPEBSRecord": "2",
+        "Counter": "0,1,2,3,4,5,6,7",
+        "Data_LA": "1",
+        "EventCode": "0xcd",
+        "EventName": "MEM_TRANS_RETIRED.LOAD_LATENCY_GT_8",
+        "MSRIndex": "0x3F6",
+        "MSRValue": "0x8",
+        "PEBS": "2",
+        "PEBScounters": "0,1,2,3,4,5,6,7",
+        "PublicDescription": "Counts randomly selected loads when the latency from first dispatch to completion is greater than 8 cycles.  Reported latency may be longer than just the memory latency.",
+        "SampleAfterValue": "50021",
+        "TakenAlone": "1",
+        "UMask": "0x1"
+    },
+    {
+        "BriefDescription": "Demand Data Read requests who miss L3 cache",
+        "CollectPEBSRecord": "2",
+        "Counter": "0,1,2,3",
+        "EventCode": "0xb0",
+        "EventName": "OFFCORE_REQUESTS.L3_MISS_DEMAND_DATA_RD",
+        "PEBScounters": "0,1,2,3",
+        "PublicDescription": "Demand Data Read requests who miss L3 cache.",
+        "SampleAfterValue": "100003",
+        "UMask": "0x10"
+    },
+    {
+        "BriefDescription": "Number of times an RTM execution aborted.",
+        "CollectPEBSRecord": "2",
+        "Counter": "0,1,2,3,4,5,6,7",
+        "EventCode": "0xc9",
+        "EventName": "RTM_RETIRED.ABORTED",
+        "PEBScounters": "0,1,2,3,4,5,6,7",
+        "PublicDescription": "Counts the number of times RTM abort was triggered.",
+        "SampleAfterValue": "100003",
+        "UMask": "0x4"
+    },
+    {
+        "BriefDescription": "Number of times an RTM execution aborted due to none of the previous 4 categories (e.g. interrupt)",
+        "CollectPEBSRecord": "2",
+        "Counter": "0,1,2,3,4,5,6,7",
+        "EventCode": "0xc9",
+        "EventName": "RTM_RETIRED.ABORTED_EVENTS",
+        "PEBScounters": "0,1,2,3,4,5,6,7",
+        "PublicDescription": "Counts the number of times an RTM execution aborted due to none of the previous 4 categories (e.g. interrupt).",
+        "SampleAfterValue": "100003",
+        "UMask": "0x80"
+    },
+    {
+        "BriefDescription": "Number of times an RTM execution aborted due to various memory events (e.g. read/write capacity and conflicts)",
+        "CollectPEBSRecord": "2",
+        "Counter": "0,1,2,3,4,5,6,7",
+        "EventCode": "0xc9",
+        "EventName": "RTM_RETIRED.ABORTED_MEM",
+        "PEBScounters": "0,1,2,3,4,5,6,7",
+        "PublicDescription": "Counts the number of times an RTM execution aborted due to various memory events (e.g. read/write capacity and conflicts).",
+        "SampleAfterValue": "100003",
+        "UMask": "0x8"
+    },
+    {
+        "BriefDescription": "Number of times an RTM execution aborted due to incompatible memory type",
+        "CollectPEBSRecord": "2",
+        "Counter": "0,1,2,3,4,5,6,7",
+        "EventCode": "0xc9",
+        "EventName": "RTM_RETIRED.ABORTED_MEMTYPE",
+        "PEBScounters": "0,1,2,3,4,5,6,7",
+        "PublicDescription": "Counts the number of times an RTM execution aborted due to incompatible memory type.",
+        "SampleAfterValue": "100003",
+        "UMask": "0x40"
+    },
+    {
+        "BriefDescription": "Number of times an RTM execution aborted due to HLE-unfriendly instructions",
+        "CollectPEBSRecord": "2",
+        "Counter": "0,1,2,3,4,5,6,7",
+        "EventCode": "0xc9",
+        "EventName": "RTM_RETIRED.ABORTED_UNFRIENDLY",
+        "PEBScounters": "0,1,2,3,4,5,6,7",
+        "PublicDescription": "Counts the number of times an RTM execution aborted due to HLE-unfriendly instructions.",
+        "SampleAfterValue": "100003",
+        "UMask": "0x20"
+    },
+    {
+        "BriefDescription": "Number of times an RTM execution successfully committed",
+        "CollectPEBSRecord": "2",
+        "Counter": "0,1,2,3,4,5,6,7",
+        "EventCode": "0xc9",
+        "EventName": "RTM_RETIRED.COMMIT",
+        "PEBScounters": "0,1,2,3,4,5,6,7",
+        "PublicDescription": "Counts the number of times RTM commit succeeded.",
+        "SampleAfterValue": "100003",
+        "UMask": "0x2"
+    },
+    {
+        "BriefDescription": "Number of times an RTM execution started.",
+        "CollectPEBSRecord": "2",
+        "Counter": "0,1,2,3,4,5,6,7",
+        "EventCode": "0xc9",
+        "EventName": "RTM_RETIRED.START",
+        "PEBScounters": "0,1,2,3,4,5,6,7",
+        "PublicDescription": "Counts the number of times we entered an RTM region. Does not count nested transactions.",
+        "SampleAfterValue": "100003",
+        "UMask": "0x1"
+    },
+    {
+        "BriefDescription": "Counts the number of times a class of instructions that may cause a transactional abort was executed inside a transactional region",
+        "CollectPEBSRecord": "2",
+        "Counter": "0,1,2,3,4,5,6,7",
+        "EventCode": "0x5d",
+        "EventName": "TX_EXEC.MISC2",
+        "PEBScounters": "0,1,2,3,4,5,6,7",
+        "PublicDescription": "Counts Unfriendly TSX abort triggered by a vzeroupper instruction.",
+        "SampleAfterValue": "100003",
+        "UMask": "0x2"
+    },
+    {
+        "BriefDescription": "Number of times an instruction execution caused the transactional nest count supported to be exceeded",
+        "CollectPEBSRecord": "2",
+        "Counter": "0,1,2,3,4,5,6,7",
+        "EventCode": "0x5d",
+        "EventName": "TX_EXEC.MISC3",
+        "PEBScounters": "0,1,2,3,4,5,6,7",
+        "PublicDescription": "Counts Unfriendly TSX abort triggered by a nest count that is too deep.",
+        "SampleAfterValue": "100003",
+        "UMask": "0x4"
+    },
+    {
+        "BriefDescription": "Speculatively counts the number of TSX aborts due to a data capacity limitation for transactional reads",
+        "CollectPEBSRecord": "2",
+        "Counter": "0,1,2,3",
+        "EventCode": "0x54",
+        "EventName": "TX_MEM.ABORT_CAPACITY_READ",
+        "PEBScounters": "0,1,2,3",
+        "PublicDescription": "Speculatively counts the number of Transactional Synchronization Extensions (TSX) aborts due to a data capacity limitation for transactional reads",
+        "SampleAfterValue": "100003",
+        "UMask": "0x80"
+    },
+    {
+        "BriefDescription": "Speculatively counts the number of TSX aborts due to a data capacity limitation for transactional writes.",
+        "CollectPEBSRecord": "2",
+        "Counter": "0,1,2,3",
+        "EventCode": "0x54",
+        "EventName": "TX_MEM.ABORT_CAPACITY_WRITE",
+        "PEBScounters": "0,1,2,3",
+        "PublicDescription": "Speculatively counts the number of Transactional Synchronization Extensions (TSX) aborts due to a data capacity limitation for transactional writes.",
+        "SampleAfterValue": "100003",
+        "UMask": "0x2"
+    },
+    {
+        "BriefDescription": "Number of times a transactional abort was signaled due to a data conflict on a transactionally accessed address",
+        "CollectPEBSRecord": "2",
+        "Counter": "0,1,2,3",
+        "EventCode": "0x54",
+        "EventName": "TX_MEM.ABORT_CONFLICT",
+        "PEBScounters": "0,1,2,3",
+        "PublicDescription": "Counts the number of times a TSX line had a cache conflict.",
+        "SampleAfterValue": "100003",
+        "UMask": "0x1"
+    }
+]
+\ No newline at end of file
diff --git a/tools/perf/pmu-events/arch/x86/tigerlake/other.json b/tools/perf/pmu-events/arch/x86/tigerlake/other.json
new file mode 100644
index 000000000000..b1143fe74246
--- /dev/null
+++ b/tools/perf/pmu-events/arch/x86/tigerlake/other.json
@@ -0,0 +1,189 @@
+[
+    {
+        "BriefDescription": "Number of occurrences where a microcode assist is invoked by hardware.",
+        "CollectPEBSRecord": "2",
+        "Counter": "0,1,2,3,4,5,6,7",
+        "EventCode": "0xc1",
+        "EventName": "ASSISTS.ANY",
+        "PEBScounters": "0,1,2,3,4,5,6,7",
+        "PublicDescription": "Counts the number of occurrences where a microcode assist is invoked by hardware Examples include AD (page Access Dirty), FP and AVX related assists.",
+        "SampleAfterValue": "100003",
+        "UMask": "0x7"
+    },
+    {
+        "BriefDescription": "Core cycles where the core was running in a manner where Turbo may be clipped to the Non-AVX turbo schedule.",
+        "CollectPEBSRecord": "2",
+        "Counter": "0,1,2,3",
+        "EventCode": "0x28",
+        "EventName": "CORE_POWER.LVL0_TURBO_LICENSE",
+        "PEBScounters": "0,1,2,3",
+        "PublicDescription": "Counts Core cycles where the core was running with power-delivery for baseline license level 0.  This includes non-AVX codes, SSE, AVX 128-bit, and low-current AVX 256-bit codes.",
+        "SampleAfterValue": "200003",
+        "UMask": "0x7"
+    },
+    {
+        "BriefDescription": "Core cycles where the core was running in a manner where Turbo may be clipped to the AVX2 turbo schedule.",
+        "CollectPEBSRecord": "2",
+        "Counter": "0,1,2,3",
+        "EventCode": "0x28",
+        "EventName": "CORE_POWER.LVL1_TURBO_LICENSE",
+        "PEBScounters": "0,1,2,3",
+        "PublicDescription": "Counts Core cycles where the core was running with power-delivery for license level 1.  This includes high current AVX 256-bit instructions as well as low current AVX 512-bit instructions.",
+        "SampleAfterValue": "200003",
+        "UMask": "0x18"
+    },
+    {
+        "BriefDescription": "Core cycles where the core was running in a manner where Turbo may be clipped to the AVX512 turbo schedule.",
+        "CollectPEBSRecord": "2",
+        "Counter": "0,1,2,3",
+        "EventCode": "0x28",
+        "EventName": "CORE_POWER.LVL2_TURBO_LICENSE",
+        "PEBScounters": "0,1,2,3",
+        "PublicDescription": "Core cycles where the core was running with power-delivery for license level 2 (introduced in Skylake Server microarchtecture).  This includes high current AVX 512-bit instructions.",
+        "SampleAfterValue": "200003",
+        "UMask": "0x20"
+    },
+    {
+        "BriefDescription": "Counts demand data reads that hit a cacheline in the L3 where a snoop hit in another cores caches, data forwarding is required as the data is modified.",
+        "CollectPEBSRecord": "2",
+        "Counter": "0,1,2,3",
+        "EventCode": "0xB7, 0xBB",
+        "EventName": "OCR.DEMAND_DATA_RD.L3_HIT.SNOOP_HITM",
+        "MSRIndex": "0x1a6,0x1a7",
+        "MSRValue": "0x10003C0001",
+        "Offcore": "1",
+        "PEBScounters": "0,1,2,3",
+        "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
+        "SampleAfterValue": "100003",
+        "UMask": "0x1"
+    },
+    {
+        "BriefDescription": "OCR.DEMAND_DATA_RD.L3_HIT.SNOOP_HIT_WITH_FWD",
+        "CollectPEBSRecord": "2",
+        "Counter": "0,1,2,3",
+        "EventCode": "0xB7, 0xBB",
+        "EventName": "OCR.DEMAND_DATA_RD.L3_HIT.SNOOP_HIT_WITH_FWD",
+        "MSRIndex": "0x1a6,0x1a7",
+        "MSRValue": "0x8003C0001",
+        "Offcore": "1",
+        "PEBScounters": "0,1,2,3",
+        "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
+        "SampleAfterValue": "100003",
+        "UMask": "0x1"
+    },
+    {
+        "BriefDescription": "Counts demand reads for ownership (RFO) requests and software prefetches for exclusive ownership (PREFETCHW) that hit a cacheline in the L3 where a snoop hit in another cores caches, data forwarding is required as the data is modified.",
+        "CollectPEBSRecord": "2",
+        "Counter": "0,1,2,3",
+        "EventCode": "0xB7, 0xBB",
+        "EventName": "OCR.DEMAND_RFO.L3_HIT.SNOOP_HITM",
+        "MSRIndex": "0x1a6,0x1a7",
+        "MSRValue": "0x10003C0002",
+        "Offcore": "1",
+        "PEBScounters": "0,1,2,3",
+        "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
+        "SampleAfterValue": "100003",
+        "UMask": "0x1"
+    },
+    {
+        "BriefDescription": "Counts streaming stores that have any type of response.",
+        "CollectPEBSRecord": "2",
+        "Counter": "0,1,2,3",
+        "EventCode": "0xB7, 0xBB",
+        "EventName": "OCR.STREAMING_WR.ANY_RESPONSE",
+        "MSRIndex": "0x1a6,0x1a7",
+        "MSRValue": "0x10800",
+        "Offcore": "1",
+        "PEBScounters": "0,1,2,3",
+        "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
+        "SampleAfterValue": "100003",
+        "UMask": "0x1"
+    },
+    {
+        "BriefDescription": "Number of PREFETCHNTA instructions executed.",
+        "CollectPEBSRecord": "2",
+        "Counter": "0,1,2,3",
+        "EventCode": "0x32",
+        "EventName": "SW_PREFETCH_ACCESS.NTA",
+        "PEBScounters": "0,1,2,3",
+        "PublicDescription": "Counts the number of PREFETCHNTA instructions executed.",
+        "SampleAfterValue": "100003",
+        "UMask": "0x1"
+    },
+    {
+        "BriefDescription": "Number of PREFETCHW instructions executed.",
+        "CollectPEBSRecord": "2",
+        "Counter": "0,1,2,3",
+        "EventCode": "0x32",
+        "EventName": "SW_PREFETCH_ACCESS.PREFETCHW",
+        "PEBScounters": "0,1,2,3",
+        "PublicDescription": "Counts the number of PREFETCHW instructions executed.",
+        "SampleAfterValue": "100003",
+        "UMask": "0x8"
+    },
+    {
+        "BriefDescription": "Number of PREFETCHT0 instructions executed.",
+        "CollectPEBSRecord": "2",
+        "Counter": "0,1,2,3",
+        "EventCode": "0x32",
+        "EventName": "SW_PREFETCH_ACCESS.T0",
+        "PEBScounters": "0,1,2,3",
+        "PublicDescription": "Counts the number of PREFETCHT0 instructions executed.",
+        "SampleAfterValue": "100003",
+        "UMask": "0x2"
+    },
+    {
+        "BriefDescription": "Number of PREFETCHT1 or PREFETCHT2 instructions executed.",
+        "CollectPEBSRecord": "2",
+        "Counter": "0,1,2,3",
+        "EventCode": "0x32",
+        "EventName": "SW_PREFETCH_ACCESS.T1_T2",
+        "PEBScounters": "0,1,2,3",
+        "PublicDescription": "Counts the number of PREFETCHT1 or PREFETCHT2 instructions executed.",
+        "SampleAfterValue": "100003",
+        "UMask": "0x4"
+    },
+    {
+        "BriefDescription": "TMA slots where no uops were being issued due to lack of back-end resources.",
+        "CollectPEBSRecord": "2",
+        "Counter": "0,1,2,3,4,5,6,7",
+        "EventCode": "0xa4",
+        "EventName": "TOPDOWN.BACKEND_BOUND_SLOTS",
+        "PEBScounters": "0,1,2,3,4,5,6,7",
+        "PublicDescription": "Counts the number of Top-down Microarchitecture Analysis (TMA) method's  slots where no micro-operations were being issued from front-end to back-end of the machine due to lack of back-end resources.",
+        "SampleAfterValue": "10000003",
+        "UMask": "0x2"
+    },
+    {
+        "BriefDescription": "TMA slots wasted due to incorrect speculation by branch mispredictions",
+        "CollectPEBSRecord": "2",
+        "Counter": "0,1,2,3,4,5,6,7",
+        "EventCode": "0xa4",
+        "EventName": "TOPDOWN.BR_MISPREDICT_SLOTS",
+        "PEBScounters": "0,1,2,3,4,5,6,7",
+        "PublicDescription": "Number of TMA slots that were wasted due to incorrect speculation by branch mispredictions. This event estimates number of operations that were issued but not retired from the specualtive path as well as the out-of-order engine recovery past a branch misprediction.",
+        "SampleAfterValue": "10000003",
+        "UMask": "0x8"
+    },
+    {
+        "BriefDescription": "TMA slots available for an unhalted logical processor. Fixed counter - architectural event",
+        "CollectPEBSRecord": "2",
+        "Counter": "Fixed counter 3",
+        "EventName": "TOPDOWN.SLOTS",
+        "PEBScounters": "35",
+        "PublicDescription": "Number of available slots for an unhalted logical processor. The event increments by machine-width of the narrowest pipeline as employed by the Top-down Microarchitecture Analysis method (TMA). The count is distributed among unhalted logical processors (hyper-threads) who share the same physical core. Software can use this event as the denominator for the top-level metrics of the TMA method. This architectural event is counted on a designated fixed counter (Fixed Counter 3).",
+        "SampleAfterValue": "10000003",
+        "UMask": "0x4"
+    },
+    {
+        "BriefDescription": "TMA slots available for an unhalted logical processor. General counter - architectural event",
+        "CollectPEBSRecord": "2",
+        "Counter": "0,1,2,3,4,5,6,7",
+        "EventCode": "0xa4",
+        "EventName": "TOPDOWN.SLOTS_P",
+        "PEBScounters": "0,1,2,3,4,5,6,7",
+        "PublicDescription": "Counts the number of available slots for an unhalted logical processor. The event increments by machine-width of the narrowest pipeline as employed by the Top-down Microarchitecture Analysis method. The count is distributed among unhalted logical processors (hyper-threads) who share the same physical core.",
+        "SampleAfterValue": "10000003",
+        "UMask": "0x1"
+    }
+]
+\ No newline at end of file
diff --git a/tools/perf/pmu-events/arch/x86/tigerlake/pipeline.json b/tools/perf/pmu-events/arch/x86/tigerlake/pipeline.json
new file mode 100644
index 000000000000..d0d8a09bc470
--- /dev/null
+++ b/tools/perf/pmu-events/arch/x86/tigerlake/pipeline.json
@@ -0,0 +1,982 @@
+[
+    {
+        "BriefDescription": "Cycles when divide unit is busy executing divide or square root operations.",
+        "CollectPEBSRecord": "2",
+        "Counter": "0,1,2,3,4,5,6,7",
+        "CounterMask": "1",
+        "EventCode": "0x14",
+        "EventName": "ARITH.DIVIDER_ACTIVE",
+        "PEBScounters": "0,1,2,3,4,5,6,7",
+        "PublicDescription": "Counts cycles when divide unit is busy executing divide or square root operations. Accounts for integer and floating-point operations.",
+        "SampleAfterValue": "1000003",
+        "UMask": "0x9"
+    },
+    {
+        "BriefDescription": "All branch instructions retired.",
+        "CollectPEBSRecord": "2",
+        "Counter": "0,1,2,3,4,5,6,7",
+        "EventCode": "0xc4",
+        "EventName": "BR_INST_RETIRED.ALL_BRANCHES",
+        "PEBS": "1",
+        "PEBScounters": "0,1,2,3,4,5,6,7",
+        "PublicDescription": "Counts all branch instructions retired.",
+        "SampleAfterValue": "400009"
+    },
+    {
+        "BriefDescription": "Conditional branch instructions retired.",
+        "CollectPEBSRecord": "2",
+        "Counter": "0,1,2,3,4,5,6,7",
+        "EventCode": "0xc4",
+        "EventName": "BR_INST_RETIRED.COND",
+        "PEBS": "1",
+        "PEBScounters": "0,1,2,3,4,5,6,7",
+        "PublicDescription": "Counts conditional branch instructions retired.",
+        "SampleAfterValue": "400009",
+        "UMask": "0x11"
+    },
+    {
+        "BriefDescription": "Not taken branch instructions retired.",
+        "CollectPEBSRecord": "2",
+        "Counter": "0,1,2,3,4,5,6,7",
+        "EventCode": "0xc4",
+        "EventName": "BR_INST_RETIRED.COND_NTAKEN",
+        "PEBS": "1",
+        "PEBScounters": "0,1,2,3,4,5,6,7",
+        "PublicDescription": "Counts not taken branch instructions retired.",
+        "SampleAfterValue": "400009",
+        "UMask": "0x10"
+    },
+    {
+        "BriefDescription": "Taken conditional branch instructions retired.",
+        "CollectPEBSRecord": "2",
+        "Counter": "0,1,2,3,4,5,6,7",
+        "EventCode": "0xc4",
+        "EventName": "BR_INST_RETIRED.COND_TAKEN",
+        "PEBS": "1",
+        "PEBScounters": "0,1,2,3,4,5,6,7",
+        "PublicDescription": "Counts taken conditional branch instructions retired.",
+        "SampleAfterValue": "400009",
+        "UMask": "0x1"
+    },
+    {
+        "BriefDescription": "Far branch instructions retired.",
+        "CollectPEBSRecord": "2",
+        "Counter": "0,1,2,3,4,5,6,7",
+        "EventCode": "0xc4",
+        "EventName": "BR_INST_RETIRED.FAR_BRANCH",
+        "PEBS": "1",
+        "PEBScounters": "0,1,2,3,4,5,6,7",
+        "PublicDescription": "Counts far branch instructions retired.",
+        "SampleAfterValue": "100007",
+        "UMask": "0x40"
+    },
+    {
+        "BriefDescription": "All indirect branch instructions retired (excluding RETs. TSX aborts are considered indirect branch).",
+        "CollectPEBSRecord": "2",
+        "Counter": "0,1,2,3,4,5,6,7",
+        "EventCode": "0xc4",
+        "EventName": "BR_INST_RETIRED.INDIRECT",
+        "PEBS": "1",
+        "PEBScounters": "0,1,2,3,4,5,6,7",
+        "PublicDescription": "Counts all indirect branch instructions retired (excluding RETs. TSX aborts is considered indirect branch).",
+        "SampleAfterValue": "100003",
+        "UMask": "0x80"
+    },
+    {
+        "BriefDescription": "Direct and indirect near call instructions retired.",
+        "CollectPEBSRecord": "2",
+        "Counter": "0,1,2,3,4,5,6,7",
+        "EventCode": "0xc4",
+        "EventName": "BR_INST_RETIRED.NEAR_CALL",
+        "PEBS": "1",
+        "PEBScounters": "0,1,2,3,4,5,6,7",
+        "PublicDescription": "Counts both direct and indirect near call instructions retired.",
+        "SampleAfterValue": "100007",
+        "UMask": "0x2"
+    },
+    {
+        "BriefDescription": "Return instructions retired.",
+        "CollectPEBSRecord": "2",
+        "Counter": "0,1,2,3,4,5,6,7",
+        "EventCode": "0xc4",
+        "EventName": "BR_INST_RETIRED.NEAR_RETURN",
+        "PEBS": "1",
+        "PEBScounters": "0,1,2,3,4,5,6,7",
+        "PublicDescription": "Counts return instructions retired.",
+        "SampleAfterValue": "100007",
+        "UMask": "0x8"
+    },
+    {
+        "BriefDescription": "Taken branch instructions retired.",
+        "CollectPEBSRecord": "2",
+        "Counter": "0,1,2,3,4,5,6,7",
+        "EventCode": "0xc4",
+        "EventName": "BR_INST_RETIRED.NEAR_TAKEN",
+        "PEBS": "1",
+        "PEBScounters": "0,1,2,3,4,5,6,7",
+        "PublicDescription": "Counts taken branch instructions retired.",
+        "SampleAfterValue": "400009",
+        "UMask": "0x20"
+    },
+    {
+        "BriefDescription": "All mispredicted branch instructions retired.",
+        "CollectPEBSRecord": "2",
+        "Counter": "0,1,2,3,4,5,6,7",
+        "EventCode": "0xc5",
+        "EventName": "BR_MISP_RETIRED.ALL_BRANCHES",
+        "PEBS": "1",
+        "PEBScounters": "0,1,2,3,4,5,6,7",
+        "PublicDescription": "Counts all the retired branch instructions that were mispredicted by the processor. A branch misprediction occurs when the processor incorrectly predicts the destination of the branch.  When the misprediction is discovered at execution, all the instructions executed in the wrong (speculative) path must be discarded, and the processor must start fetching from the correct path.",
+        "SampleAfterValue": "50021"
+    },
+    {
+        "BriefDescription": "Mispredicted conditional branch instructions retired.",
+        "CollectPEBSRecord": "2",
+        "Counter": "0,1,2,3,4,5,6,7",
+        "EventCode": "0xc5",
+        "EventName": "BR_MISP_RETIRED.COND",
+        "PEBS": "1",
+        "PEBScounters": "0,1,2,3,4,5,6,7",
+        "PublicDescription": "Counts mispredicted conditional branch instructions retired.",
+        "SampleAfterValue": "50021",
+        "UMask": "0x11"
+    },
+    {
+        "BriefDescription": "Mispredicted non-taken conditional branch instructions retired.",
+        "CollectPEBSRecord": "2",
+        "Counter": "0,1,2,3,4,5,6,7",
+        "EventCode": "0xc5",
+        "EventName": "BR_MISP_RETIRED.COND_NTAKEN",
+        "PEBS": "1",
+        "PEBScounters": "0,1,2,3,4,5,6,7",
+        "PublicDescription": "Counts the number of conditional branch instructions retired that were mispredicted and the branch direction was not taken.",
+        "SampleAfterValue": "50021",
+        "UMask": "0x10"
+    },
+    {
+        "BriefDescription": "number of branch instructions retired that were mispredicted and taken. Non PEBS",
+        "CollectPEBSRecord": "2",
+        "Counter": "0,1,2,3,4,5,6,7",
+        "EventCode": "0xc5",
+        "EventName": "BR_MISP_RETIRED.COND_TAKEN",
+        "PEBS": "1",
+        "PEBScounters": "0,1,2,3,4,5,6,7",
+        "PublicDescription": "Counts taken conditional mispredicted branch instructions retired.",
+        "SampleAfterValue": "50021",
+        "UMask": "0x1"
+    },
+    {
+        "BriefDescription": "All miss-predicted indirect branch instructions retired (excluding RETs. TSX aborts is considered indirect branch).",
+        "CollectPEBSRecord": "2",
+        "Counter": "0,1,2,3,4,5,6,7",
+        "EventCode": "0xc5",
+        "EventName": "BR_MISP_RETIRED.INDIRECT",
+        "PEBS": "1",
+        "PEBScounters": "0,1,2,3,4,5,6,7",
+        "PublicDescription": "Counts all miss-predicted indirect branch instructions retired (excluding RETs. TSX aborts is considered indirect branch).",
+        "SampleAfterValue": "50021",
+        "UMask": "0x80"
+    },
+    {
+        "BriefDescription": "Mispredicted indirect CALL instructions retired.",
+        "CollectPEBSRecord": "2",
+        "Counter": "0,1,2,3,4,5,6,7",
+        "EventCode": "0xc5",
+        "EventName": "BR_MISP_RETIRED.INDIRECT_CALL",
+        "PEBS": "1",
+        "PEBScounters": "0,1,2,3,4,5,6,7",
+        "PublicDescription": "Counts retired mispredicted indirect (near taken) CALL instructions, including both register and memory indirect.",
+        "SampleAfterValue": "50021",
+        "UMask": "0x2"
+    },
+    {
+        "BriefDescription": "Number of near branch instructions retired that were mispredicted and taken.",
+        "CollectPEBSRecord": "2",
+        "Counter": "0,1,2,3,4,5,6,7",
+        "EventCode": "0xc5",
+        "EventName": "BR_MISP_RETIRED.NEAR_TAKEN",
+        "PEBS": "1",
+        "PEBScounters": "0,1,2,3,4,5,6,7",
+        "PublicDescription": "Counts number of near branch instructions retired that were mispredicted and taken.",
+        "SampleAfterValue": "50021",
+        "UMask": "0x20"
+    },
+    {
+        "BriefDescription": "Cycle counts are evenly distributed between active threads in the Core.",
+        "CollectPEBSRecord": "2",
+        "Counter": "0,1,2,3,4,5,6,7",
+        "EventCode": "0xec",
+        "EventName": "CPU_CLK_UNHALTED.DISTRIBUTED",
+        "PEBScounters": "0,1,2,3,4,5,6,7",
+        "PublicDescription": "This event distributes cycle counts between active hyperthreads, i.e., those in C0.  A hyperthread becomes inactive when it executes the HLT or MWAIT instructions.  If all other hyperthreads are inactive (or disabled or do not exist), all counts are attributed to this hyperthread. To obtain the full count when the Core is active, sum the counts from each hyperthread.",
+        "SampleAfterValue": "2000003",
+        "UMask": "0x2"
+    },
+    {
+        "BriefDescription": "Core crystal clock cycles when this thread is unhalted and the other thread is halted.",
+        "CollectPEBSRecord": "2",
+        "Counter": "0,1,2,3,4,5,6,7",
+        "EventCode": "0x3c",
+        "EventName": "CPU_CLK_UNHALTED.ONE_THREAD_ACTIVE",
+        "PEBScounters": "0,1,2,3,4,5,6,7",
+        "PublicDescription": "Counts Core crystal clock cycles when current thread is unhalted and the other thread is halted.",
+        "SampleAfterValue": "25003",
+        "UMask": "0x2"
+    },
+    {
+        "BriefDescription": "Core crystal clock cycles. Cycle counts are evenly distributed between active threads in the Core.",
+        "CollectPEBSRecord": "2",
+        "Counter": "0,1,2,3,4,5,6,7",
+        "EventCode": "0x3c",
+        "EventName": "CPU_CLK_UNHALTED.REF_DISTRIBUTED",
+        "PEBScounters": "0,1,2,3,4,5,6,7",
+        "PublicDescription": "This event distributes Core crystal clock cycle counts between active hyperthreads, i.e., those in C0 sleep-state. A hyperthread becomes inactive when it executes the HLT or MWAIT instructions. If one thread is active in a core, all counts are attributed to this hyperthread. To obtain the full count when the Core is active, sum the counts from each hyperthread.",
+        "SampleAfterValue": "2000003",
+        "UMask": "0x8"
+    },
+    {
+        "BriefDescription": "Reference cycles when the core is not in halt state.",
+        "CollectPEBSRecord": "2",
+        "Counter": "Fixed counter 2",
+        "EventName": "CPU_CLK_UNHALTED.REF_TSC",
+        "PEBScounters": "34",
+        "PublicDescription": "Counts the number of reference cycles when the core is not in a halt state. The core enters the halt state when it is running the HLT instruction or the MWAIT instruction. This event is not affected by core frequency changes (for example, P states, TM2 transitions) but has the same incrementing frequency as the time stamp counter. This event can approximate elapsed time while the core was not in a halt state. This event has a constant ratio with the CPU_CLK_UNHALTED.REF_XCLK event. It is counted on a dedicated fixed counter, leaving the eight programmable counters available for other events. Note: On all current platforms this event stops counting during 'throttling (TM)' states duty off periods the processor is 'halted'.  The counter update is done at a lower clock rate then the core clock the overflow status bit for this counter may appear 'sticky'.  After the counter has overflowed and software clears the overflow status bit and resets the counter to less than MAX. The reset value to the counter is not clocked immediately so the overflow status bit will flip 'high (1)' and generate another PMI (if enabled) after which the reset value gets clocked into the counter. Therefore, software will get the interrupt, read the overflow status bit '1 for bit 34 while the counter value is less than MAX. Software should ignore this case.",
+        "SampleAfterValue": "2000003",
+        "UMask": "0x3"
+    },
+    {
+        "BriefDescription": "Core crystal clock cycles when the thread is unhalted.",
+        "CollectPEBSRecord": "2",
+        "Counter": "0,1,2,3,4,5,6,7",
+        "EventCode": "0x3c",
+        "EventName": "CPU_CLK_UNHALTED.REF_XCLK",
+        "PEBScounters": "0,1,2,3,4,5,6,7",
+        "PublicDescription": "Counts core crystal clock cycles when the thread is unhalted.",
+        "SampleAfterValue": "25003",
+        "UMask": "0x1"
+    },
+    {
+        "BriefDescription": "Core cycles when the thread is not in halt state",
+        "CollectPEBSRecord": "2",
+        "Counter": "Fixed counter 1",
+        "EventName": "CPU_CLK_UNHALTED.THREAD",
+        "PEBScounters": "33",
+        "PublicDescription": "Counts the number of core cycles while the thread is not in a halt state. The thread enters the halt state when it is running the HLT instruction. This event is a component in many key event ratios. The core frequency may change from time to time due to transitions associated with Enhanced Intel SpeedStep Technology or TM2. For this reason this event may have a changing ratio with regards to time. When the core frequency is constant, this event can approximate elapsed time while the core was not in the halt state. It is counted on a dedicated fixed counter, leaving the eight programmable counters available for other events.",
+        "SampleAfterValue": "2000003",
+        "UMask": "0x2"
+    },
+    {
+        "BriefDescription": "Thread cycles when thread is not in halt state",
+        "CollectPEBSRecord": "2",
+        "Counter": "0,1,2,3,4,5,6,7",
+        "EventCode": "0x3c",
+        "EventName": "CPU_CLK_UNHALTED.THREAD_P",
+        "PEBScounters": "0,1,2,3,4,5,6,7",
+        "PublicDescription": "This is an architectural event that counts the number of thread cycles while the thread is not in a halt state. The thread enters the halt state when it is running the HLT instruction. The core frequency may change from time to time due to power or thermal throttling. For this reason, this event may have a changing ratio with regards to wall clock time.",
+        "SampleAfterValue": "2000003"
+    },
+    {
+        "BriefDescription": "Cycles while L1 cache miss demand load is outstanding.",
+        "CollectPEBSRecord": "2",
+        "Counter": "0,1,2,3",
+        "CounterMask": "8",
+        "EventCode": "0xa3",
+        "EventName": "CYCLE_ACTIVITY.CYCLES_L1D_MISS",
+        "PEBScounters": "0,1,2,3",
+        "SampleAfterValue": "1000003",
+        "UMask": "0x8"
+    },
+    {
+        "BriefDescription": "Cycles while L2 cache miss demand load is outstanding.",
+        "CollectPEBSRecord": "2",
+        "Counter": "0,1,2,3",
+        "CounterMask": "1",
+        "EventCode": "0xa3",
+        "EventName": "CYCLE_ACTIVITY.CYCLES_L2_MISS",
+        "PEBScounters": "0,1,2,3",
+        "SampleAfterValue": "1000003",
+        "UMask": "0x1"
+    },
+    {
+        "BriefDescription": "Cycles while memory subsystem has an outstanding load.",
+        "CollectPEBSRecord": "2",
+        "Counter": "0,1,2,3,4,5,6,7",
+        "CounterMask": "16",
+        "EventCode": "0xa3",
+        "EventName": "CYCLE_ACTIVITY.CYCLES_MEM_ANY",
+        "PEBScounters": "0,1,2,3,4,5,6,7",
+        "SampleAfterValue": "1000003",
+        "UMask": "0x10"
+    },
+    {
+        "BriefDescription": "Execution stalls while L1 cache miss demand load is outstanding.",
+        "CollectPEBSRecord": "2",
+        "Counter": "0,1,2,3",
+        "CounterMask": "12",
+        "EventCode": "0xa3",
+        "EventName": "CYCLE_ACTIVITY.STALLS_L1D_MISS",
+        "PEBScounters": "0,1,2,3",
+        "SampleAfterValue": "1000003",
+        "UMask": "0xc"
+    },
+    {
+        "BriefDescription": "Execution stalls while L2 cache miss demand load is outstanding.",
+        "CollectPEBSRecord": "2",
+        "Counter": "0,1,2,3",
+        "CounterMask": "5",
+        "EventCode": "0xa3",
+        "EventName": "CYCLE_ACTIVITY.STALLS_L2_MISS",
+        "PEBScounters": "0,1,2,3",
+        "SampleAfterValue": "1000003",
+        "UMask": "0x5"
+    },
+    {
+        "BriefDescription": "Execution stalls while memory subsystem has an outstanding load.",
+        "CollectPEBSRecord": "2",
+        "Counter": "0,1,2,3,4,5,6,7",
+        "CounterMask": "20",
+        "EventCode": "0xa3",
+        "EventName": "CYCLE_ACTIVITY.STALLS_MEM_ANY",
+        "PEBScounters": "0,1,2,3,4,5,6,7",
+        "SampleAfterValue": "1000003",
+        "UMask": "0x14"
+    },
+    {
+        "BriefDescription": "Total execution stalls.",
+        "CollectPEBSRecord": "2",
+        "Counter": "0,1,2,3,4,5,6,7",
+        "CounterMask": "4",
+        "EventCode": "0xa3",
+        "EventName": "CYCLE_ACTIVITY.STALLS_TOTAL",
+        "PEBScounters": "0,1,2,3,4,5,6,7",
+        "SampleAfterValue": "1000003",
+        "UMask": "0x4"
+    },
+    {
+        "BriefDescription": "Cycles total of 1 uop is executed on all ports and Reservation Station was not empty.",
+        "CollectPEBSRecord": "2",
+        "Counter": "0,1,2,3,4,5,6,7",
+        "EventCode": "0xa6",
+        "EventName": "EXE_ACTIVITY.1_PORTS_UTIL",
+        "PEBScounters": "0,1,2,3,4,5,6,7",
+        "PublicDescription": "Counts cycles during which a total of 1 uop was executed on all ports and Reservation Station (RS) was not empty.",
+        "SampleAfterValue": "2000003",
+        "UMask": "0x2"
+    },
+    {
+        "BriefDescription": "Cycles total of 2 uops are executed on all ports and Reservation Station was not empty.",
+        "CollectPEBSRecord": "2",
+        "Counter": "0,1,2,3,4,5,6,7",
+        "EventCode": "0xa6",
+        "EventName": "EXE_ACTIVITY.2_PORTS_UTIL",
+        "PEBScounters": "0,1,2,3,4,5,6,7",
+        "PublicDescription": "Counts cycles during which a total of 2 uops were executed on all ports and Reservation Station (RS) was not empty.",
+        "SampleAfterValue": "2000003",
+        "UMask": "0x4"
+    },
+    {
+        "BriefDescription": "Cycles total of 3 uops are executed on all ports and Reservation Station was not empty.",
+        "CollectPEBSRecord": "2",
+        "Counter": "0,1,2,3,4,5,6,7",
+        "EventCode": "0xa6",
+        "EventName": "EXE_ACTIVITY.3_PORTS_UTIL",
+        "PEBScounters": "0,1,2,3,4,5,6,7",
+        "PublicDescription": "Cycles total of 3 uops are executed on all ports and Reservation Station (RS) was not empty.",
+        "SampleAfterValue": "2000003",
+        "UMask": "0x8"
+    },
+    {
+        "BriefDescription": "Cycles total of 4 uops are executed on all ports and Reservation Station was not empty.",
+        "CollectPEBSRecord": "2",
+        "Counter": "0,1,2,3,4,5,6,7",
+        "EventCode": "0xa6",
+        "EventName": "EXE_ACTIVITY.4_PORTS_UTIL",
+        "PEBScounters": "0,1,2,3,4,5,6,7",
+        "PublicDescription": "Cycles total of 4 uops are executed on all ports and Reservation Station (RS) was not empty.",
+        "SampleAfterValue": "2000003",
+        "UMask": "0x10"
+    },
+    {
+        "BriefDescription": "Cycles when the memory subsystem has an outstanding load. Increments by 4 for every such cycle.",
+        "CollectPEBSRecord": "2",
+        "Counter": "0,1,2,3,4,5,6,7",
+        "CounterMask": "5",
+        "EventCode": "0xa6",
+        "EventName": "EXE_ACTIVITY.BOUND_ON_LOADS",
+        "PEBScounters": "0,1,2,3,4,5,6,7",
+        "PublicDescription": "Counts cycles when the memory subsystem has an outstanding load. Increments by 4 for every such cycle.",
+        "SampleAfterValue": "2000003",
+        "UMask": "0x21"
+    },
+    {
+        "BriefDescription": "Cycles where the Store Buffer was full and no loads caused an execution stall.",
+        "CollectPEBSRecord": "2",
+        "Counter": "0,1,2,3,4,5,6,7",
+        "CounterMask": "2",
+        "EventCode": "0xa6",
+        "EventName": "EXE_ACTIVITY.BOUND_ON_STORES",
+        "PEBScounters": "0,1,2,3,4,5,6,7",
+        "PublicDescription": "Counts cycles where the Store Buffer was full and no loads caused an execution stall.",
+        "SampleAfterValue": "1000003",
+        "UMask": "0x40"
+    },
+    {
+        "BriefDescription": "Cycles where no uops were executed, the Reservation Station was not empty, the Store Buffer was full and there was no outstanding load.",
+        "CollectPEBSRecord": "2",
+        "Counter": "0,1,2,3,4,5,6,7",
+        "EventCode": "0xa6",
+        "EventName": "EXE_ACTIVITY.EXE_BOUND_0_PORTS",
+        "PEBScounters": "0,1,2,3,4,5,6,7",
+        "PublicDescription": "Counts cycles during which no uops were executed on all ports and Reservation Station (RS) was not empty.",
+        "SampleAfterValue": "1000003",
+        "UMask": "0x80"
+    },
+    {
+        "BriefDescription": "Stalls caused by changing prefix length of the instruction.",
+        "CollectPEBSRecord": "2",
+        "Counter": "0,1,2,3",
+        "EventCode": "0x87",
+        "EventName": "ILD_STALL.LCP",
+        "PEBScounters": "0,1,2,3",
+        "PublicDescription": "Counts cycles that the Instruction Length decoder (ILD) stalls occurred due to dynamically changing prefix length of the decoded instruction (by operand size prefix instruction 0x66, address size prefix instruction 0x67 or REX.W for Intel64). Count is proportional to the number of prefixes in a 16B-line. This may result in a three-cycle penalty for each LCP (Length changing prefix) in a 16-byte chunk.",
+        "SampleAfterValue": "500009",
+        "UMask": "0x1"
+    },
+    {
+        "BriefDescription": "Number of instructions retired. Fixed Counter - architectural event",
+        "CollectPEBSRecord": "2",
+        "Counter": "Fixed counter 0",
+        "EventName": "INST_RETIRED.ANY",
+        "PEBS": "1",
+        "PEBScounters": "32",
+        "PublicDescription": "Counts the number of X86 instructions retired - an Architectural PerfMon event. Counting continues during hardware interrupts, traps, and inside interrupt handlers. Notes: INST_RETIRED.ANY is counted by a designated fixed counter freeing up programmable counters to count other events. INST_RETIRED.ANY_P is counted by a programmable counter.",
+        "SampleAfterValue": "2000003",
+        "UMask": "0x1"
+    },
+    {
+        "BriefDescription": "Number of instructions retired. General Counter - architectural event",
+        "CollectPEBSRecord": "2",
+        "Counter": "0,1,2,3,4,5,6,7",
+        "EventCode": "0xc0",
+        "EventName": "INST_RETIRED.ANY_P",
+        "PEBS": "1",
+        "PEBScounters": "0,1,2,3,4,5,6,7",
+        "PublicDescription": "Counts the number of X86 instructions retired - an Architectural PerfMon event. Counting continues during hardware interrupts, traps, and inside interrupt handlers. Notes: INST_RETIRED.ANY is counted by a designated fixed counter freeing up programmable counters to count other events. INST_RETIRED.ANY_P is counted by a programmable counter.",
+        "SampleAfterValue": "2000003"
+    },
+    {
+        "BriefDescription": "Precise instruction retired event with a reduced effect of PEBS shadow in IP distribution",
+        "CollectPEBSRecord": "2",
+        "Counter": "Fixed counter 0",
+        "EventName": "INST_RETIRED.PREC_DIST",
+        "PEBS": "1",
+        "PEBScounters": "32",
+        "PublicDescription": "A version of INST_RETIRED that allows for a more unbiased distribution of samples across instructions retired. It utilizes the Precise Distribution of Instructions Retired (PDIR) feature to mitigate some bias in how retired instructions get sampled. Use on Fixed Counter 0.",
+        "SampleAfterValue": "2000003",
+        "UMask": "0x1"
+    },
+    {
+        "BriefDescription": "Cycles the Backend cluster is recovering after a miss-speculation or a Store Buffer or Load Buffer drain stall.",
+        "CollectPEBSRecord": "2",
+        "Counter": "0,1,2,3,4,5,6,7",
+        "CounterMask": "1",
+        "EventCode": "0x0d",
+        "EventName": "INT_MISC.ALL_RECOVERY_CYCLES",
+        "PEBScounters": "0,1,2,3,4,5,6,7",
+        "PublicDescription": "Counts cycles the Backend cluster is recovering after a miss-speculation or a Store Buffer or Load Buffer drain stall.",
+        "SampleAfterValue": "2000003",
+        "UMask": "0x3"
+    },
+    {
+        "BriefDescription": "Counts cycles after recovery from a branch misprediction or machine clear till the first uop is issued from the resteered path.",
+        "CollectPEBSRecord": "2",
+        "Counter": "0,1,2,3,4,5,6,7",
+        "EventCode": "0x0d",
+        "EventName": "INT_MISC.CLEAR_RESTEER_CYCLES",
+        "PEBScounters": "0,1,2,3,4,5,6,7",
+        "PublicDescription": "Cycles after recovery from a branch misprediction or machine clear till the first uop is issued from the resteered path.",
+        "SampleAfterValue": "500009",
+        "UMask": "0x80"
+    },
+    {
+        "BriefDescription": "Core cycles the allocator was stalled due to recovery from earlier clear event for this thread",
+        "CollectPEBSRecord": "2",
+        "Counter": "0,1,2,3,4,5,6,7",
+        "EventCode": "0x0d",
+        "EventName": "INT_MISC.RECOVERY_CYCLES",
+        "PEBScounters": "0,1,2,3,4,5,6,7",
+        "PublicDescription": "Counts core cycles when the Resource allocator was stalled due to recovery from an earlier branch misprediction or machine clear event.",
+        "SampleAfterValue": "500009",
+        "UMask": "0x1"
+    },
+    {
+        "BriefDescription": "TMA slots where uops got dropped",
+        "CollectPEBSRecord": "2",
+        "Counter": "0,1,2,3,4,5,6,7",
+        "EventCode": "0x0d",
+        "EventName": "INT_MISC.UOP_DROPPING",
+        "PEBScounters": "0,1,2,3,4,5,6,7",
+        "PublicDescription": "Estimated number of Top-down Microarchitecture Analysis slots that got dropped due to non front-end reasons",
+        "SampleAfterValue": "1000003",
+        "UMask": "0x10"
+    },
+    {
+        "BriefDescription": "The number of times that split load operations are temporarily blocked because all resources for handling the split accesses are in use.",
+        "CollectPEBSRecord": "2",
+        "Counter": "0,1,2,3",
+        "EventCode": "0x03",
+        "EventName": "LD_BLOCKS.NO_SR",
+        "PEBScounters": "0,1,2,3",
+        "PublicDescription": "Counts the number of times that split load operations are temporarily blocked because all resources for handling the split accesses are in use.",
+        "SampleAfterValue": "100003",
+        "UMask": "0x8"
+    },
+    {
+        "BriefDescription": "Loads blocked due to overlapping with a preceding store that cannot be forwarded.",
+        "CollectPEBSRecord": "2",
+        "Counter": "0,1,2,3",
+        "EventCode": "0x03",
+        "EventName": "LD_BLOCKS.STORE_FORWARD",
+        "PEBScounters": "0,1,2,3",
+        "PublicDescription": "Counts the number of times where store forwarding was prevented for a load operation. The most common case is a load blocked due to the address of memory access (partially) overlapping with a preceding uncompleted store. Note: See the table of not supported store forwards in the Optimization Guide.",
+        "SampleAfterValue": "100003",
+        "UMask": "0x2"
+    },
+    {
+        "BriefDescription": "False dependencies in MOB due to partial compare on address.",
+        "CollectPEBSRecord": "2",
+        "Counter": "0,1,2,3",
+        "EventCode": "0x07",
+        "EventName": "LD_BLOCKS_PARTIAL.ADDRESS_ALIAS",
+        "PEBScounters": "0,1,2,3",
+        "PublicDescription": "Counts the number of times a load got blocked due to false dependencies in MOB due to partial compare on address.",
+        "SampleAfterValue": "100003",
+        "UMask": "0x1"
+    },
+    {
+        "BriefDescription": "Counts the number of demand load dispatches that hit L1D fill buffer (FB) allocated for software prefetch.",
+        "CollectPEBSRecord": "2",
+        "Counter": "0,1,2,3",
+        "EventCode": "0x4c",
+        "EventName": "LOAD_HIT_PREFETCH.SWPF",
+        "PEBScounters": "0,1,2,3",
+        "PublicDescription": "Counts all not software-prefetch load dispatches that hit the fill buffer (FB) allocated for the software prefetch. It can also be incremented by some lock instructions. So it should only be used with profiling so that the locks can be excluded by ASM (Assembly File) inspection of the nearby instructions.",
+        "SampleAfterValue": "100003",
+        "UMask": "0x1"
+    },
+    {
+        "BriefDescription": "Cycles Uops delivered by the LSD, but didn't come from the decoder.",
+        "CollectPEBSRecord": "2",
+        "Counter": "0,1,2,3",
+        "CounterMask": "1",
+        "EventCode": "0xa8",
+        "EventName": "LSD.CYCLES_ACTIVE",
+        "PEBScounters": "0,1,2,3",
+        "PublicDescription": "Counts the cycles when at least one uop is delivered by the LSD (Loop-stream detector).",
+        "SampleAfterValue": "2000003",
+        "UMask": "0x1"
+    },
+    {
+        "BriefDescription": "Cycles optimal number of Uops delivered by the LSD, but did not come from the decoder.",
+        "CollectPEBSRecord": "2",
+        "Counter": "0,1,2,3",
+        "CounterMask": "5",
+        "EventCode": "0xa8",
+        "EventName": "LSD.CYCLES_OK",
+        "PEBScounters": "0,1,2,3",
+        "PublicDescription": "Counts the cycles when optimal number of uops is delivered by the LSD (Loop-stream detector).",
+        "SampleAfterValue": "2000003",
+        "UMask": "0x1"
+    },
+    {
+        "BriefDescription": "Number of Uops delivered by the LSD.",
+        "CollectPEBSRecord": "2",
+        "Counter": "0,1,2,3",
+        "EventCode": "0xa8",
+        "EventName": "LSD.UOPS",
+        "PEBScounters": "0,1,2,3",
+        "PublicDescription": "Counts the number of uops delivered to the back-end by the LSD(Loop Stream Detector).",
+        "SampleAfterValue": "2000003",
+        "UMask": "0x1"
+    },
+    {
+        "BriefDescription": "Number of machine clears (nukes) of any type.",
+        "CollectPEBSRecord": "2",
+        "Counter": "0,1,2,3,4,5,6,7",
+        "CounterMask": "1",
+        "EdgeDetect": "1",
+        "EventCode": "0xc3",
+        "EventName": "MACHINE_CLEARS.COUNT",
+        "PEBScounters": "0,1,2,3,4,5,6,7",
+        "PublicDescription": "Counts the number of machine clears (nukes) of any type.",
+        "SampleAfterValue": "100003",
+        "UMask": "0x1"
+    },
+    {
+        "BriefDescription": "Self-modifying code (SMC) detected.",
+        "CollectPEBSRecord": "2",
+        "Counter": "0,1,2,3,4,5,6,7",
+        "EventCode": "0xc3",
+        "EventName": "MACHINE_CLEARS.SMC",
+        "PEBScounters": "0,1,2,3,4,5,6,7",
+        "PublicDescription": "Counts self-modifying code (SMC) detected, which causes a machine clear.",
+        "SampleAfterValue": "100003",
+        "UMask": "0x4"
+    },
+    {
+        "BriefDescription": "Increments whenever there is an update to the LBR array.",
+        "CollectPEBSRecord": "2",
+        "Counter": "0,1,2,3,4,5,6,7",
+        "EventCode": "0xcc",
+        "EventName": "MISC_RETIRED.LBR_INSERTS",
+        "PEBScounters": "0,1,2,3,4,5,6,7",
+        "PublicDescription": "Increments when an entry is added to the Last Branch Record (LBR) array (or removed from the array in case of RETURNs in call stack mode). The event requires LBR enable via IA32_DEBUGCTL MSR and branch type selection via MSR_LBR_SELECT.",
+        "SampleAfterValue": "100003",
+        "UMask": "0x20"
+    },
+    {
+        "BriefDescription": "Number of retired PAUSE instructions. This event is not supported on first SKL and KBL products.",
+        "Counter": "0,1,2,3,4,5,6,7",
+        "EventCode": "0xcc",
+        "EventName": "MISC_RETIRED.PAUSE_INST",
+        "PublicDescription": "Counts number of retired PAUSE instructions. This event is not supported on first SKL and KBL products.",
+        "SampleAfterValue": "100003",
+        "UMask": "0x40"
+    },
+    {
+        "BriefDescription": "Cycles stalled due to no store buffers available. (not including draining form sync).",
+        "CollectPEBSRecord": "2",
+        "Counter": "0,1,2,3,4,5,6,7",
+        "EventCode": "0xa2",
+        "EventName": "RESOURCE_STALLS.SB",
+        "PEBScounters": "0,1,2,3,4,5,6,7",
+        "PublicDescription": "Counts allocation stall cycles caused by the store buffer (SB) being full. This counts cycles that the pipeline back-end blocked uop delivery from the front-end.",
+        "SampleAfterValue": "100003",
+        "UMask": "0x8"
+    },
+    {
+        "BriefDescription": "Counts cycles where the pipeline is stalled due to serializing operations.",
+        "CollectPEBSRecord": "2",
+        "Counter": "0,1,2,3,4,5,6,7",
+        "EventCode": "0xa2",
+        "EventName": "RESOURCE_STALLS.SCOREBOARD",
+        "PEBScounters": "0,1,2,3,4,5,6,7",
+        "SampleAfterValue": "100003",
+        "UMask": "0x2"
+    },
+    {
+        "BriefDescription": "Cycles when Reservation Station (RS) is empty for the thread",
+        "CollectPEBSRecord": "2",
+        "Counter": "0,1,2,3,4,5,6,7",
+        "EventCode": "0x5e",
+        "EventName": "RS_EVENTS.EMPTY_CYCLES",
+        "PEBScounters": "0,1,2,3,4,5,6,7",
+        "PublicDescription": "Counts cycles during which the reservation station (RS) is empty for this logical processor. This is usually caused when the front-end pipeline runs into stravation periods (e.g. branch mispredictions or i-cache misses)",
+        "SampleAfterValue": "1000003",
+        "UMask": "0x1"
+    },
+    {
+        "BriefDescription": "Counts end of periods where the Reservation Station (RS) was empty.",
+        "CollectPEBSRecord": "2",
+        "Counter": "0,1,2,3,4,5,6,7",
+        "CounterMask": "1",
+        "EdgeDetect": "1",
+        "EventCode": "0x5e",
+        "EventName": "RS_EVENTS.EMPTY_END",
+        "Invert": "1",
+        "PEBScounters": "0,1,2,3,4,5,6,7",
+        "PublicDescription": "Counts end of periods where the Reservation Station (RS) was empty. Could be useful to closely sample on front-end latency issues (see the FRONTEND_RETIRED event of designated precise events)",
+        "SampleAfterValue": "100003",
+        "UMask": "0x1"
+    },
+    {
+        "BriefDescription": "Number of uops executed on port 0",
+        "CollectPEBSRecord": "2",
+        "Counter": "0,1,2,3,4,5,6,7",
+        "EventCode": "0xa1",
+        "EventName": "UOPS_DISPATCHED.PORT_0",
+        "PEBScounters": "0,1,2,3,4,5,6,7",
+        "PublicDescription": "Counts, on the per-thread basis, cycles during which at least one uop is dispatched from the Reservation Station (RS) to port 0.",
+        "SampleAfterValue": "2000003",
+        "UMask": "0x1"
+    },
+    {
+        "BriefDescription": "Number of uops executed on port 1",
+        "CollectPEBSRecord": "2",
+        "Counter": "0,1,2,3,4,5,6,7",
+        "EventCode": "0xa1",
+        "EventName": "UOPS_DISPATCHED.PORT_1",
+        "PEBScounters": "0,1,2,3,4,5,6,7",
+        "PublicDescription": "Counts, on the per-thread basis, cycles during which at least one uop is dispatched from the Reservation Station (RS) to port 1.",
+        "SampleAfterValue": "2000003",
+        "UMask": "0x2"
+    },
+    {
+        "BriefDescription": "Number of uops executed on port 2 and 3",
+        "CollectPEBSRecord": "2",
+        "Counter": "0,1,2,3,4,5,6,7",
+        "EventCode": "0xa1",
+        "EventName": "UOPS_DISPATCHED.PORT_2_3",
+        "PEBScounters": "0,1,2,3,4,5,6,7",
+        "PublicDescription": "Counts, on the per-thread basis, cycles during which at least one uop is dispatched from the Reservation Station (RS) to ports 2 and 3.",
+        "SampleAfterValue": "2000003",
+        "UMask": "0x4"
+    },
+    {
+        "BriefDescription": "Number of uops executed on port 4 and 9",
+        "CollectPEBSRecord": "2",
+        "Counter": "0,1,2,3,4,5,6,7",
+        "EventCode": "0xa1",
+        "EventName": "UOPS_DISPATCHED.PORT_4_9",
+        "PEBScounters": "0,1,2,3,4,5,6,7",
+        "PublicDescription": "Counts, on the per-thread basis, cycles during which at least one uop is dispatched from the Reservation Station (RS) to ports 5 and 9.",
+        "SampleAfterValue": "2000003",
+        "UMask": "0x10"
+    },
+    {
+        "BriefDescription": "Number of uops executed on port 5",
+        "CollectPEBSRecord": "2",
+        "Counter": "0,1,2,3,4,5,6,7",
+        "EventCode": "0xa1",
+        "EventName": "UOPS_DISPATCHED.PORT_5",
+        "PEBScounters": "0,1,2,3,4,5,6,7",
+        "PublicDescription": "Counts, on the per-thread basis, cycles during which at least one uop is dispatched from the Reservation Station (RS) to port 5.",
+        "SampleAfterValue": "2000003",
+        "UMask": "0x20"
+    },
+    {
+        "BriefDescription": "Number of uops executed on port 6",
+        "CollectPEBSRecord": "2",
+        "Counter": "0,1,2,3,4,5,6,7",
+        "EventCode": "0xa1",
+        "EventName": "UOPS_DISPATCHED.PORT_6",
+        "PEBScounters": "0,1,2,3,4,5,6,7",
+        "PublicDescription": "Counts, on the per-thread basis, cycles during which at least one uop is dispatched from the Reservation Station (RS) to port 6.",
+        "SampleAfterValue": "2000003",
+        "UMask": "0x40"
+    },
+    {
+        "BriefDescription": "Number of uops executed on port 7 and 8",
+        "CollectPEBSRecord": "2",
+        "Counter": "0,1,2,3,4,5,6,7",
+        "EventCode": "0xa1",
+        "EventName": "UOPS_DISPATCHED.PORT_7_8",
+        "PEBScounters": "0,1,2,3,4,5,6,7",
+        "PublicDescription": "Counts, on the per-thread basis, cycles during which at least one uop is dispatched from the Reservation Station (RS) to ports 7 and 8.",
+        "SampleAfterValue": "2000003",
+        "UMask": "0x80"
+    },
+    {
+        "BriefDescription": "Number of uops executed on the core.",
+        "CollectPEBSRecord": "2",
+        "Counter": "0,1,2,3,4,5,6,7",
+        "EventCode": "0xb1",
+        "EventName": "UOPS_EXECUTED.CORE",
+        "PEBScounters": "0,1,2,3,4,5,6,7",
+        "PublicDescription": "Counts the number of uops executed from any thread.",
+        "SampleAfterValue": "2000003",
+        "UMask": "0x2"
+    },
+    {
+        "BriefDescription": "Cycles at least 1 micro-op is executed from any thread on physical core.",
+        "CollectPEBSRecord": "2",
+        "Counter": "0,1,2,3,4,5,6,7",
+        "CounterMask": "1",
+        "EventCode": "0xb1",
+        "EventName": "UOPS_EXECUTED.CORE_CYCLES_GE_1",
+        "PEBScounters": "0,1,2,3,4,5,6,7",
+        "PublicDescription": "Counts cycles when at least 1 micro-op is executed from any thread on physical core.",
+        "SampleAfterValue": "2000003",
+        "UMask": "0x2"
+    },
+    {
+        "BriefDescription": "Cycles at least 2 micro-op is executed from any thread on physical core.",
+        "CollectPEBSRecord": "2",
+        "Counter": "0,1,2,3,4,5,6,7",
+        "CounterMask": "2",
+        "EventCode": "0xb1",
+        "EventName": "UOPS_EXECUTED.CORE_CYCLES_GE_2",
+        "PEBScounters": "0,1,2,3,4,5,6,7",
+        "PublicDescription": "Counts cycles when at least 2 micro-ops are executed from any thread on physical core.",
+        "SampleAfterValue": "2000003",
+        "UMask": "0x2"
+    },
+    {
+        "BriefDescription": "Cycles at least 3 micro-op is executed from any thread on physical core.",
+        "CollectPEBSRecord": "2",
+        "Counter": "0,1,2,3,4,5,6,7",
+        "CounterMask": "3",
+        "EventCode": "0xb1",
+        "EventName": "UOPS_EXECUTED.CORE_CYCLES_GE_3",
+        "PEBScounters": "0,1,2,3,4,5,6,7",
+        "PublicDescription": "Counts cycles when at least 3 micro-ops are executed from any thread on physical core.",
+        "SampleAfterValue": "2000003",
+        "UMask": "0x2"
+    },
+    {
+        "BriefDescription": "Cycles at least 4 micro-op is executed from any thread on physical core.",
+        "CollectPEBSRecord": "2",
+        "Counter": "0,1,2,3,4,5,6,7",
+        "CounterMask": "4",
+        "EventCode": "0xb1",
+        "EventName": "UOPS_EXECUTED.CORE_CYCLES_GE_4",
+        "PEBScounters": "0,1,2,3,4,5,6,7",
+        "PublicDescription": "Counts cycles when at least 4 micro-ops are executed from any thread on physical core.",
+        "SampleAfterValue": "2000003",
+        "UMask": "0x2"
+    },
+    {
+        "BriefDescription": "Cycles where at least 1 uop was executed per-thread",
+        "CollectPEBSRecord": "2",
+        "Counter": "0,1,2,3,4,5,6,7",
+        "CounterMask": "1",
+        "EventCode": "0xb1",
+        "EventName": "UOPS_EXECUTED.CYCLES_GE_1",
+        "PEBScounters": "0,1,2,3,4,5,6,7",
+        "PublicDescription": "Cycles where at least 1 uop was executed per-thread.",
+        "SampleAfterValue": "2000003",
+        "UMask": "0x1"
+    },
+    {
+        "BriefDescription": "Cycles where at least 2 uops were executed per-thread",
+        "CollectPEBSRecord": "2",
+        "Counter": "0,1,2,3,4,5,6,7",
+        "CounterMask": "2",
+        "EventCode": "0xb1",
+        "EventName": "UOPS_EXECUTED.CYCLES_GE_2",
+        "PEBScounters": "0,1,2,3,4,5,6,7",
+        "PublicDescription": "Cycles where at least 2 uops were executed per-thread.",
+        "SampleAfterValue": "2000003",
+        "UMask": "0x1"
+    },
+    {
+        "BriefDescription": "Cycles where at least 3 uops were executed per-thread",
+        "CollectPEBSRecord": "2",
+        "Counter": "0,1,2,3,4,5,6,7",
+        "CounterMask": "3",
+        "EventCode": "0xb1",
+        "EventName": "UOPS_EXECUTED.CYCLES_GE_3",
+        "PEBScounters": "0,1,2,3,4,5,6,7",
+        "PublicDescription": "Cycles where at least 3 uops were executed per-thread.",
+        "SampleAfterValue": "2000003",
+        "UMask": "0x1"
+    },
+    {
+        "BriefDescription": "Cycles where at least 4 uops were executed per-thread",
+        "CollectPEBSRecord": "2",
+        "Counter": "0,1,2,3,4,5,6,7",
+        "CounterMask": "4",
+        "EventCode": "0xb1",
+        "EventName": "UOPS_EXECUTED.CYCLES_GE_4",
+        "PEBScounters": "0,1,2,3,4,5,6,7",
+        "PublicDescription": "Cycles where at least 4 uops were executed per-thread.",
+        "SampleAfterValue": "2000003",
+        "UMask": "0x1"
+    },
+    {
+        "BriefDescription": "Counts number of cycles no uops were dispatched to be executed on this thread.",
+        "CollectPEBSRecord": "2",
+        "Counter": "0,1,2,3,4,5,6,7",
+        "CounterMask": "1",
+        "EventCode": "0xb1",
+        "EventName": "UOPS_EXECUTED.STALL_CYCLES",
+        "Invert": "1",
+        "PEBScounters": "0,1,2,3,4,5,6,7",
+        "PublicDescription": "Counts cycles during which no uops were dispatched from the Reservation Station (RS) per thread.",
+        "SampleAfterValue": "2000003",
+        "UMask": "0x1"
+    },
+    {
+        "BriefDescription": "Counts the number of uops to be executed per-thread each cycle.",
+        "CollectPEBSRecord": "2",
+        "Counter": "0,1,2,3,4,5,6,7",
+        "EventCode": "0xb1",
+        "EventName": "UOPS_EXECUTED.THREAD",
+        "PEBScounters": "0,1,2,3,4,5,6,7",
+        "SampleAfterValue": "2000003",
+        "UMask": "0x1"
+    },
+    {
+        "BriefDescription": "Counts the number of x87 uops dispatched.",
+        "CollectPEBSRecord": "2",
+        "Counter": "0,1,2,3,4,5,6,7",
+        "EventCode": "0xb1",
+        "EventName": "UOPS_EXECUTED.X87",
+        "PEBScounters": "0,1,2,3,4,5,6,7",
+        "PublicDescription": "Counts the number of x87 uops executed.",
+        "SampleAfterValue": "2000003",
+        "UMask": "0x10"
+    },
+    {
+        "BriefDescription": "Uops that RAT issues to RS",
+        "CollectPEBSRecord": "2",
+        "Counter": "0,1,2,3,4,5,6,7",
+        "EventCode": "0x0e",
+        "EventName": "UOPS_ISSUED.ANY",
+        "PEBScounters": "0,1,2,3,4,5,6,7",
+        "PublicDescription": "Counts the number of uops that the Resource Allocation Table (RAT) issues to the Reservation Station (RS).",
+        "SampleAfterValue": "2000003",
+        "UMask": "0x1"
+    },
+    {
+        "BriefDescription": "Cycles when RAT does not issue Uops to RS for the thread",
+        "CollectPEBSRecord": "2",
+        "Counter": "0,1,2,3,4,5,6,7",
+        "CounterMask": "1",
+        "EventCode": "0x0e",
+        "EventName": "UOPS_ISSUED.STALL_CYCLES",
+        "Invert": "1",
+        "PEBScounters": "0,1,2,3,4,5,6,7",
+        "PublicDescription": "Counts cycles during which the Resource Allocation Table (RAT) does not issue any Uops to the reservation station (RS) for the current thread.",
+        "SampleAfterValue": "1000003",
+        "UMask": "0x1"
+    },
+    {
+        "BriefDescription": "Uops inserted at issue-stage in order to preserve upper bits of vector registers.",
+        "CollectPEBSRecord": "2",
+        "Counter": "0,1,2,3,4,5,6,7",
+        "EventCode": "0x0e",
+        "EventName": "UOPS_ISSUED.VECTOR_WIDTH_MISMATCH",
+        "PEBScounters": "0,1,2,3,4,5,6,7",
+        "PublicDescription": "Counts the number of Blend Uops issued by the Resource Allocation Table (RAT) to the reservation station (RS) in order to preserve upper bits of vector registers. Starting with the Skylake microarchitecture, these Blend uops are needed since every Intel SSE instruction executed in Dirty Upper State needs to preserve bits 128-255 of the destination register. For more information, refer to Mixing Intel AVX and Intel SSE Code section of the Optimization Guide.",
+        "SampleAfterValue": "100003",
+        "UMask": "0x2"
+    },
+    {
+        "BriefDescription": "Retirement slots used.",
+        "CollectPEBSRecord": "2",
+        "Counter": "0,1,2,3,4,5,6,7",
+        "EventCode": "0xc2",
+        "EventName": "UOPS_RETIRED.SLOTS",
+        "PEBScounters": "0,1,2,3,4,5,6,7",
+        "PublicDescription": "Counts the retirement slots used each cycle.",
+        "SampleAfterValue": "2000003",
+        "UMask": "0x2"
+    },
+    {
+        "BriefDescription": "Cycles without actually retired uops.",
+        "CollectPEBSRecord": "2",
+        "Counter": "0,1,2,3,4,5,6,7",
+        "CounterMask": "1",
+        "EventCode": "0xc2",
+        "EventName": "UOPS_RETIRED.STALL_CYCLES",
+        "Invert": "1",
+        "PEBScounters": "0,1,2,3,4,5,6,7",
+        "PublicDescription": "This event counts cycles without actually retired uops.",
+        "SampleAfterValue": "1000003",
+        "UMask": "0x2"
+    },
+    {
+        "BriefDescription": "Cycles with less than 10 actually retired uops.",
+        "CollectPEBSRecord": "2",
+        "Counter": "0,1,2,3,4,5,6,7",
+        "CounterMask": "10",
+        "EventCode": "0xc2",
+        "EventName": "UOPS_RETIRED.TOTAL_CYCLES",
+        "Invert": "1",
+        "PEBScounters": "0,1,2,3,4,5,6,7",
+        "PublicDescription": "Counts the number of cycles using always true condition (uops_ret &amp;lt; 16) applied to non PEBS uops retired event.",
+        "SampleAfterValue": "1000003",
+        "UMask": "0x2"
+    }
+]
+\ No newline at end of file
diff --git a/tools/perf/pmu-events/arch/x86/tigerlake/tgl-metrics.json b/tools/perf/pmu-events/arch/x86/tigerlake/tgl-metrics.json
new file mode 100644
index 000000000000..00a16f1a0f44
--- /dev/null
+++ b/tools/perf/pmu-events/arch/x86/tigerlake/tgl-metrics.json
@@ -0,0 +1,231 @@
+[
+    {
+        "BriefDescription": "Instructions Per Cycle (per Logical Processor)",
+        "MetricExpr": "INST_RETIRED.ANY / CPU_CLK_UNHALTED.THREAD",
+        "MetricGroup": "Summary",
+        "MetricName": "IPC"
+    },
+    {
+        "BriefDescription": "Instruction per taken branch",
+        "MetricExpr": "INST_RETIRED.ANY / BR_INST_RETIRED.NEAR_TAKEN",
+        "MetricGroup": "Branches;FetchBW;PGO",
+        "MetricName": "IpTB"
+    },
+    {
+        "BriefDescription": "Cycles Per Instruction (per Logical Processor)",
+        "MetricExpr": "1 / IPC",
+        "MetricGroup": "Pipeline",
+        "MetricName": "CPI"
+    },
+    {
+        "BriefDescription": "Per-Logical Processor actual clocks when the Logical Processor is active.",
+        "MetricExpr": "CPU_CLK_UNHALTED.THREAD",
+        "MetricGroup": "Pipeline",
+        "MetricName": "CLKS"
+    },
+    {
+        "BriefDescription": "Instructions Per Cycle (per physical core)",
+        "MetricExpr": "INST_RETIRED.ANY / CPU_CLK_UNHALTED.DISTRIBUTED",
+        "MetricGroup": "SMT;TmaL1",
+        "MetricName": "CoreIPC"
+    },
+    {
+        "BriefDescription": "Floating Point Operations Per Cycle",
+        "MetricExpr": "( 1 * ( FP_ARITH_INST_RETIRED.SCALAR_SINGLE + FP_ARITH_INST_RETIRED.SCALAR_DOUBLE ) + 2 * FP_ARITH_INST_RETIRED.128B_PACKED_DOUBLE + 4 * ( FP_ARITH_INST_RETIRED.128B_PACKED_SINGLE + FP_ARITH_INST_RETIRED.256B_PACKED_DOUBLE ) + 8 * ( FP_ARITH_INST_RETIRED.256B_PACKED_SINGLE + FP_ARITH_INST_RETIRED.512B_PACKED_DOUBLE ) + 16 * FP_ARITH_INST_RETIRED.512B_PACKED_SINGLE ) / CPU_CLK_UNHALTED.DISTRIBUTED",
+        "MetricGroup": "Flops",
+        "MetricName": "FLOPc"
+    },
+    {
+        "BriefDescription": "Instruction-Level-Parallelism (average number of uops executed when there is at least 1 uop executed)",
+        "MetricExpr": "UOPS_EXECUTED.THREAD / ( UOPS_EXECUTED.CORE_CYCLES_GE_1 / 2 )",
+        "MetricGroup": "Pipeline;PortsUtil",
+        "MetricName": "ILP"
+    },
+    {
+        "BriefDescription": "Number of Instructions per non-speculative Branch Misprediction (JEClear)",
+        "MetricExpr": "INST_RETIRED.ANY / BR_MISP_RETIRED.ALL_BRANCHES",
+        "MetricGroup": "BrMispredicts",
+        "MetricName": "IpMispredict"
+    },
+    {
+        "BriefDescription": "Core actual clocks when any Logical Processor is active on the Physical Core",
+        "MetricExpr": "CPU_CLK_UNHALTED.DISTRIBUTED",
+        "MetricGroup": "SMT",
+        "MetricName": "CORE_CLKS"
+    },
+    {
+        "BriefDescription": "Instructions per Load (lower number means higher occurrence rate)",
+        "MetricExpr": "INST_RETIRED.ANY / MEM_INST_RETIRED.ALL_LOADS",
+        "MetricGroup": "InsType",
+        "MetricName": "IpLoad"
+    },
+    {
+        "BriefDescription": "Instructions per Store (lower number means higher occurrence rate)",
+        "MetricExpr": "INST_RETIRED.ANY / MEM_INST_RETIRED.ALL_STORES",
+        "MetricGroup": "InsType",
+        "MetricName": "IpStore"
+    },
+    {
+        "BriefDescription": "Instructions per Branch (lower number means higher occurrence rate)",
+        "MetricExpr": "INST_RETIRED.ANY / BR_INST_RETIRED.ALL_BRANCHES",
+        "MetricGroup": "Branches;InsType",
+        "MetricName": "IpBranch"
+    },
+    {
+        "BriefDescription": "Instructions per (near) call (lower number means higher occurrence rate)",
+        "MetricExpr": "INST_RETIRED.ANY / BR_INST_RETIRED.NEAR_CALL",
+        "MetricGroup": "Branches",
+        "MetricName": "IpCall"
+    },
+    {
+        "BriefDescription": "Branch instructions per taken branch. ",
+        "MetricExpr": "BR_INST_RETIRED.ALL_BRANCHES / BR_INST_RETIRED.NEAR_TAKEN",
+        "MetricGroup": "Branches;PGO",
+        "MetricName": "BpTkBranch"
+    },
+    {
+        "BriefDescription": "Instructions per Floating Point (FP) Operation (lower number means higher occurrence rate)",
+        "MetricExpr": "INST_RETIRED.ANY / ( 1 * ( FP_ARITH_INST_RETIRED.SCALAR_SINGLE + FP_ARITH_INST_RETIRED.SCALAR_DOUBLE ) + 2 * FP_ARITH_INST_RETIRED.128B_PACKED_DOUBLE + 4 * ( FP_ARITH_INST_RETIRED.128B_PACKED_SINGLE + FP_ARITH_INST_RETIRED.256B_PACKED_DOUBLE ) + 8 * ( FP_ARITH_INST_RETIRED.256B_PACKED_SINGLE + FP_ARITH_INST_RETIRED.512B_PACKED_DOUBLE ) + 16 * FP_ARITH_INST_RETIRED.512B_PACKED_SINGLE )",
+        "MetricGroup": "Flops;FpArith;InsType",
+        "MetricName": "IpFLOP"
+    },
+    {
+        "BriefDescription": "Total number of retired Instructions, Sample with: INST_RETIRED.PREC_DIST",
+        "MetricExpr": "INST_RETIRED.ANY",
+        "MetricGroup": "Summary;TmaL1",
+        "MetricName": "Instructions"
+    },
+    {
+        "BriefDescription": "Fraction of Uops delivered by the LSD (Loop Stream Detector; aka Loop Cache)",
+        "MetricExpr": "LSD.UOPS / (IDQ.DSB_UOPS + LSD.UOPS + IDQ.MITE_UOPS + IDQ.MS_UOPS)",
+        "MetricGroup": "LSD",
+        "MetricName": "LSD_Coverage"
+    },
+    {
+        "BriefDescription": "Fraction of Uops delivered by the DSB (aka Decoded ICache; or Uop Cache)",
+        "MetricExpr": "IDQ.DSB_UOPS / (IDQ.DSB_UOPS + LSD.UOPS + IDQ.MITE_UOPS + IDQ.MS_UOPS)",
+        "MetricGroup": "DSB;FetchBW",
+        "MetricName": "DSB_Coverage"
+    },
+    {
+        "BriefDescription": "Actual Average Latency for L1 data-cache miss demand loads (in core cycles)",
+        "MetricExpr": "L1D_PEND_MISS.PENDING / ( MEM_LOAD_RETIRED.L1_MISS + MEM_LOAD_RETIRED.FB_HIT )",
+        "MetricGroup": "MemoryBound;MemoryLat",
+        "MetricName": "Load_Miss_Real_Latency"
+    },
+    {
+        "BriefDescription": "Memory-Level-Parallelism (average number of L1 miss demand load when there is at least one such miss. Per-Logical Processor)",
+        "MetricExpr": "L1D_PEND_MISS.PENDING / L1D_PEND_MISS.PENDING_CYCLES",
+        "MetricGroup": "MemoryBound;MemoryBW",
+        "MetricName": "MLP"
+    },
+    {
+        "BriefDescription": "Utilization of the core's Page Walker(s) serving STLB misses triggered by instruction/Load/Store accesses",
+        "MetricConstraint": "NO_NMI_WATCHDOG",
+        "MetricExpr": "( ITLB_MISSES.WALK_PENDING + DTLB_LOAD_MISSES.WALK_PENDING + DTLB_STORE_MISSES.WALK_PENDING ) / ( 2 * CORE_CLKS )",
+        "MetricGroup": "MemoryTLB",
+        "MetricName": "Page_Walks_Utilization"
+    },
+    {
+        "BriefDescription": "Average data fill bandwidth to the L1 data cache [GB / sec]",
+        "MetricExpr": "64 * L1D.REPLACEMENT / 1000000000 / duration_time",
+        "MetricGroup": "MemoryBW",
+        "MetricName": "L1D_Cache_Fill_BW"
+    },
+    {
+        "BriefDescription": "Average data fill bandwidth to the L2 cache [GB / sec]",
+        "MetricExpr": "64 * L2_LINES_IN.ALL / 1000000000 / duration_time",
+        "MetricGroup": "MemoryBW",
+        "MetricName": "L2_Cache_Fill_BW"
+    },
+    {
+        "BriefDescription": "Average per-core data access bandwidth to the L3 cache [GB / sec]",
+        "MetricExpr": "64 * OFFCORE_REQUESTS.ALL_REQUESTS / 1000000000 / duration_time",
+        "MetricGroup": "MemoryBW;Offcore",
+        "MetricName": "L3_Cache_Access_BW"
+    },
+    {
+        "BriefDescription": "L1 cache true misses per kilo instruction for retired demand loads",
+        "MetricExpr": "1000 * MEM_LOAD_RETIRED.L1_MISS / INST_RETIRED.ANY",
+        "MetricGroup": "CacheMisses",
+        "MetricName": "L1MPKI"
+    },
+    {
+        "BriefDescription": "L2 cache true misses per kilo instruction for retired demand loads",
+        "MetricExpr": "1000 * MEM_LOAD_RETIRED.L2_MISS / INST_RETIRED.ANY",
+        "MetricGroup": "CacheMisses",
+        "MetricName": "L2MPKI"
+    },
+    {
+        "BriefDescription": "L3 cache true misses per kilo instruction for retired demand loads",
+        "MetricExpr": "1000 * MEM_LOAD_RETIRED.L3_MISS / INST_RETIRED.ANY",
+        "MetricGroup": "CacheMisses",
+        "MetricName": "L3MPKI"
+    },
+    {
+        "BriefDescription": "Average CPU Utilization",
+        "MetricExpr": "CPU_CLK_UNHALTED.REF_TSC / msr@tsc@",
+        "MetricGroup": "HPC;Summary",
+        "MetricName": "CPU_Utilization"
+    },
+    {
+        "BriefDescription": "Measured Average Frequency for unhalted processors [GHz]",
+        "MetricExpr": "(CPU_CLK_UNHALTED.THREAD / CPU_CLK_UNHALTED.REF_TSC) * msr@tsc@ / 1000000000 / duration_time",
+        "MetricGroup": "Summary;Power",
+        "MetricName": "Average_Frequency"
+    },
+    {
+        "BriefDescription": "Giga Floating Point Operations Per Second",
+        "MetricExpr": "( ( 1 * ( FP_ARITH_INST_RETIRED.SCALAR_SINGLE + FP_ARITH_INST_RETIRED.SCALAR_DOUBLE ) + 2 * FP_ARITH_INST_RETIRED.128B_PACKED_DOUBLE + 4 * ( FP_ARITH_INST_RETIRED.128B_PACKED_SINGLE + FP_ARITH_INST_RETIRED.256B_PACKED_DOUBLE ) + 8 * ( FP_ARITH_INST_RETIRED.256B_PACKED_SINGLE + FP_ARITH_INST_RETIRED.512B_PACKED_DOUBLE ) + 16 * FP_ARITH_INST_RETIRED.512B_PACKED_SINGLE ) / 1000000000 ) / duration_time",
+        "MetricGroup": "Flops;HPC",
+        "MetricName": "GFLOPs"
+    },
+    {
+        "BriefDescription": "Average Frequency Utilization relative nominal frequency",
+        "MetricExpr": "CPU_CLK_UNHALTED.THREAD / CPU_CLK_UNHALTED.REF_TSC",
+        "MetricGroup": "Power",
+        "MetricName": "Turbo_Utilization"
+    },
+    {
+        "BriefDescription": "Fraction of cycles where both hardware Logical Processors were active",
+        "MetricExpr": "1 - CPU_CLK_UNHALTED.ONE_THREAD_ACTIVE / CPU_CLK_UNHALTED.REF_DISTRIBUTED",
+        "MetricGroup": "SMT",
+        "MetricName": "SMT_2T_Utilization"
+    },
+    {
+        "BriefDescription": "Fraction of cycles spent in the Operating System (OS) Kernel mode",
+        "MetricExpr": "CPU_CLK_UNHALTED.THREAD_P:k / CPU_CLK_UNHALTED.THREAD",
+        "MetricGroup": "OS",
+        "MetricName": "Kernel_Utilization"
+    },
+    {
+        "BriefDescription": "Instructions per Far Branch ( Far Branches apply upon transition from application to operating system, handling interrupts, exceptions) [lower number means higher occurrence rate]",
+        "MetricExpr": "INST_RETIRED.ANY / BR_INST_RETIRED.FAR_BRANCH:u",
+        "MetricGroup": "Branches;OS",
+        "MetricName": "IpFarBranch"
+    },
+    {
+        "BriefDescription": "C6 residency percent per core",
+        "MetricExpr": "(cstate_core@c6\\-residency@ / msr@tsc@) * 100",
+        "MetricGroup": "Power",
+        "MetricName": "C6_Core_Residency"
+    },
+    {
+        "BriefDescription": "C7 residency percent per core",
+        "MetricExpr": "(cstate_core@c7\\-residency@ / msr@tsc@) * 100",
+        "MetricGroup": "Power",
+        "MetricName": "C7_Core_Residency"
+    },
+    {
+        "BriefDescription": "C6 residency percent per package",
+        "MetricExpr": "(cstate_pkg@c6\\-residency@ / msr@tsc@) * 100",
+        "MetricGroup": "Power",
+        "MetricName": "C6_Pkg_Residency"
+    },
+    {
+        "BriefDescription": "C7 residency percent per package",
+        "MetricExpr": "(cstate_pkg@c7\\-residency@ / msr@tsc@) * 100",
+        "MetricGroup": "Power",
+        "MetricName": "C7_Pkg_Residency"
+    }
+]
diff --git a/tools/perf/pmu-events/arch/x86/tigerlake/virtual-memory.json b/tools/perf/pmu-events/arch/x86/tigerlake/virtual-memory.json
new file mode 100644
index 000000000000..3ebec78969b0
--- /dev/null
+++ b/tools/perf/pmu-events/arch/x86/tigerlake/virtual-memory.json
@@ -0,0 +1,225 @@
+[
+    {
+        "BriefDescription": "Loads that miss the DTLB and hit the STLB.",
+        "CollectPEBSRecord": "2",
+        "Counter": "0,1,2,3",
+        "EventCode": "0x08",
+        "EventName": "DTLB_LOAD_MISSES.STLB_HIT",
+        "PEBScounters": "0,1,2,3",
+        "PublicDescription": "Counts loads that miss the DTLB (Data TLB) and hit the STLB (Second level TLB).",
+        "SampleAfterValue": "100003",
+        "UMask": "0x20"
+    },
+    {
+        "BriefDescription": "Cycles when at least one PMH is busy with a page walk for a demand load.",
+        "CollectPEBSRecord": "2",
+        "Counter": "0,1,2,3",
+        "CounterMask": "1",
+        "EventCode": "0x08",
+        "EventName": "DTLB_LOAD_MISSES.WALK_ACTIVE",
+        "PEBScounters": "0,1,2,3",
+        "PublicDescription": "Counts cycles when at least one PMH (Page Miss Handler) is busy with a page walk for a demand load.",
+        "SampleAfterValue": "100003",
+        "UMask": "0x10"
+    },
+    {
+        "BriefDescription": "Load miss in all TLB levels causes a page walk that completes. (All page sizes)",
+        "CollectPEBSRecord": "2",
+        "Counter": "0,1,2,3",
+        "EventCode": "0x08",
+        "EventName": "DTLB_LOAD_MISSES.WALK_COMPLETED",
+        "PEBScounters": "0,1,2,3",
+        "PublicDescription": "Counts completed page walks  (all page sizes) caused by demand data loads. This implies it missed in the DTLB and further levels of TLB. The page walk can end with or without a fault.",
+        "SampleAfterValue": "100003",
+        "UMask": "0xe"
+    },
+    {
+        "BriefDescription": "Page walks completed due to a demand data load to a 2M/4M page.",
+        "CollectPEBSRecord": "2",
+        "Counter": "0,1,2,3",
+        "EventCode": "0x08",
+        "EventName": "DTLB_LOAD_MISSES.WALK_COMPLETED_2M_4M",
+        "PEBScounters": "0,1,2,3",
+        "PublicDescription": "Counts completed page walks  (2M/4M sizes) caused by demand data loads. This implies address translations missed in the DTLB and further levels of TLB. The page walk can end with or without a fault.",
+        "SampleAfterValue": "100003",
+        "UMask": "0x4"
+    },
+    {
+        "BriefDescription": "Page walks completed due to a demand data load to a 4K page.",
+        "CollectPEBSRecord": "2",
+        "Counter": "0,1,2,3",
+        "EventCode": "0x08",
+        "EventName": "DTLB_LOAD_MISSES.WALK_COMPLETED_4K",
+        "PEBScounters": "0,1,2,3",
+        "PublicDescription": "Counts completed page walks  (4K sizes) caused by demand data loads. This implies address translations missed in the DTLB and further levels of TLB. The page walk can end with or without a fault.",
+        "SampleAfterValue": "100003",
+        "UMask": "0x2"
+    },
+    {
+        "BriefDescription": "Number of page walks outstanding for a demand load in the PMH each cycle.",
+        "CollectPEBSRecord": "2",
+        "Counter": "0,1,2,3",
+        "EventCode": "0x08",
+        "EventName": "DTLB_LOAD_MISSES.WALK_PENDING",
+        "PEBScounters": "0,1,2,3",
+        "PublicDescription": "Counts the number of page walks outstanding for a demand load in the PMH (Page Miss Handler) each cycle.",
+        "SampleAfterValue": "100003",
+        "UMask": "0x10"
+    },
+    {
+        "BriefDescription": "Stores that miss the DTLB and hit the STLB.",
+        "CollectPEBSRecord": "2",
+        "Counter": "0,1,2,3",
+        "EventCode": "0x49",
+        "EventName": "DTLB_STORE_MISSES.STLB_HIT",
+        "PEBScounters": "0,1,2,3",
+        "PublicDescription": "Counts stores that miss the DTLB (Data TLB) and hit the STLB (2nd Level TLB).",
+        "SampleAfterValue": "100003",
+        "UMask": "0x20"
+    },
+    {
+        "BriefDescription": "Cycles when at least one PMH is busy with a page walk for a store.",
+        "CollectPEBSRecord": "2",
+        "Counter": "0,1,2,3",
+        "CounterMask": "1",
+        "EventCode": "0x49",
+        "EventName": "DTLB_STORE_MISSES.WALK_ACTIVE",
+        "PEBScounters": "0,1,2,3",
+        "PublicDescription": "Counts cycles when at least one PMH (Page Miss Handler) is busy with a page walk for a store.",
+        "SampleAfterValue": "100003",
+        "UMask": "0x10"
+    },
+    {
+        "BriefDescription": "Store misses in all TLB levels causes a page walk that completes. (All page sizes)",
+        "CollectPEBSRecord": "2",
+        "Counter": "0,1,2,3",
+        "EventCode": "0x49",
+        "EventName": "DTLB_STORE_MISSES.WALK_COMPLETED",
+        "PEBScounters": "0,1,2,3",
+        "PublicDescription": "Counts completed page walks  (all page sizes) caused by demand data stores. This implies it missed in the DTLB and further levels of TLB. The page walk can end with or without a fault.",
+        "SampleAfterValue": "100003",
+        "UMask": "0xe"
+    },
+    {
+        "BriefDescription": "Page walks completed due to a demand data store to a 2M/4M page.",
+        "CollectPEBSRecord": "2",
+        "Counter": "0,1,2,3",
+        "EventCode": "0x49",
+        "EventName": "DTLB_STORE_MISSES.WALK_COMPLETED_2M_4M",
+        "PEBScounters": "0,1,2,3",
+        "PublicDescription": "Counts page walks completed due to demand data stores whose address translations missed in the TLB and were mapped to 2M/4M pages.  The page walks can end with or without a page fault.",
+        "SampleAfterValue": "100003",
+        "UMask": "0x4"
+    },
+    {
+        "BriefDescription": "Page walks completed due to a demand data store to a 4K page.",
+        "CollectPEBSRecord": "2",
+        "Counter": "0,1,2,3",
+        "EventCode": "0x49",
+        "EventName": "DTLB_STORE_MISSES.WALK_COMPLETED_4K",
+        "PEBScounters": "0,1,2,3",
+        "PublicDescription": "Counts page walks completed due to demand data stores whose address translations missed in the TLB and were mapped to 4K pages.  The page walks can end with or without a page fault.",
+        "SampleAfterValue": "100003",
+        "UMask": "0x2"
+    },
+    {
+        "BriefDescription": "Number of page walks outstanding for a store in the PMH each cycle.",
+        "CollectPEBSRecord": "2",
+        "Counter": "0,1,2,3",
+        "EventCode": "0x49",
+        "EventName": "DTLB_STORE_MISSES.WALK_PENDING",
+        "PEBScounters": "0,1,2,3",
+        "PublicDescription": "Counts the number of page walks outstanding for a store in the PMH (Page Miss Handler) each cycle.",
+        "SampleAfterValue": "100003",
+        "UMask": "0x10"
+    },
+    {
+        "BriefDescription": "Instruction fetch requests that miss the ITLB and hit the STLB.",
+        "CollectPEBSRecord": "2",
+        "Counter": "0,1,2,3",
+        "EventCode": "0x85",
+        "EventName": "ITLB_MISSES.STLB_HIT",
+        "PEBScounters": "0,1,2,3",
+        "PublicDescription": "Counts instruction fetch requests that miss the ITLB (Instruction TLB) and hit the STLB (Second-level TLB).",
+        "SampleAfterValue": "100003",
+        "UMask": "0x20"
+    },
+    {
+        "BriefDescription": "Cycles when at least one PMH is busy with a page walk for code (instruction fetch) request.",
+        "CollectPEBSRecord": "2",
+        "Counter": "0,1,2,3",
+        "CounterMask": "1",
+        "EventCode": "0x85",
+        "EventName": "ITLB_MISSES.WALK_ACTIVE",
+        "PEBScounters": "0,1,2,3",
+        "PublicDescription": "Counts cycles when at least one PMH (Page Miss Handler) is busy with a page walk for a code (instruction fetch) request.",
+        "SampleAfterValue": "100003",
+        "UMask": "0x10"
+    },
+    {
+        "BriefDescription": "Code miss in all TLB levels causes a page walk that completes. (All page sizes)",
+        "CollectPEBSRecord": "2",
+        "Counter": "0,1,2,3",
+        "EventCode": "0x85",
+        "EventName": "ITLB_MISSES.WALK_COMPLETED",
+        "PEBScounters": "0,1,2,3",
+        "PublicDescription": "Counts completed page walks (all page sizes) caused by a code fetch. This implies it missed in the ITLB (Instruction TLB) and further levels of TLB. The page walk can end with or without a fault.",
+        "SampleAfterValue": "100003",
+        "UMask": "0xe"
+    },
+    {
+        "BriefDescription": "Code miss in all TLB levels causes a page walk that completes. (2M/4M)",
+        "CollectPEBSRecord": "2",
+        "Counter": "0,1,2,3",
+        "EventCode": "0x85",
+        "EventName": "ITLB_MISSES.WALK_COMPLETED_2M_4M",
+        "PEBScounters": "0,1,2,3",
+        "PublicDescription": "Counts completed page walks (2M/4M page sizes) caused by a code fetch. This implies it missed in the ITLB (Instruction TLB) and further levels of TLB. The page walk can end with or without a fault.",
+        "SampleAfterValue": "100003",
+        "UMask": "0x4"
+    },
+    {
+        "BriefDescription": "Code miss in all TLB levels causes a page walk that completes. (4K)",
+        "CollectPEBSRecord": "2",
+        "Counter": "0,1,2,3",
+        "EventCode": "0x85",
+        "EventName": "ITLB_MISSES.WALK_COMPLETED_4K",
+        "PEBScounters": "0,1,2,3",
+        "PublicDescription": "Counts completed page walks (4K page sizes) caused by a code fetch. This implies it missed in the ITLB (Instruction TLB) and further levels of TLB. The page walk can end with or without a fault.",
+        "SampleAfterValue": "100003",
+        "UMask": "0x2"
+    },
+    {
+        "BriefDescription": "Number of page walks outstanding for an outstanding code request in the PMH each cycle.",
+        "CollectPEBSRecord": "2",
+        "Counter": "0,1,2,3",
+        "EventCode": "0x85",
+        "EventName": "ITLB_MISSES.WALK_PENDING",
+        "PEBScounters": "0,1,2,3",
+        "PublicDescription": "Counts the number of page walks outstanding for an outstanding code (instruction fetch) request in the PMH (Page Miss Handler) each cycle.",
+        "SampleAfterValue": "100003",
+        "UMask": "0x10"
+    },
+    {
+        "BriefDescription": "DTLB flush attempts of the thread-specific entries",
+        "CollectPEBSRecord": "2",
+        "Counter": "0,1,2,3",
+        "EventCode": "0xbd",
+        "EventName": "TLB_FLUSH.DTLB_THREAD",
+        "PEBScounters": "0,1,2,3",
+        "PublicDescription": "Counts the number of DTLB flush attempts of the thread-specific entries.",
+        "SampleAfterValue": "100007",
+        "UMask": "0x1"
+    },
+    {
+        "BriefDescription": "STLB flush attempts",
+        "CollectPEBSRecord": "2",
+        "Counter": "0,1,2,3",
+        "EventCode": "0xbd",
+        "EventName": "TLB_FLUSH.STLB_ANY",
+        "PEBScounters": "0,1,2,3",
+        "PublicDescription": "Counts the number of any STLB flush attempts (such as entire, VPID, PCID, InvPage, CR3 write, etc.).",
+        "SampleAfterValue": "100007",
+        "UMask": "0x20"
+    }
+]
+\ No newline at end of file
diff --git a/tools/perf/pmu-events/jevents.c b/tools/perf/pmu-events/jevents.c
index 9604446f8360..6731b3cf0c2f 100644
--- a/tools/perf/pmu-events/jevents.c
+++ b/tools/perf/pmu-events/jevents.c
@@ -814,7 +814,7 @@ static void print_mapping_test_table(FILE *outfp)
 	fprintf(outfp, "\t.cpuid = \"testcpu\",\n");
 	fprintf(outfp, "\t.version = \"v1\",\n");
 	fprintf(outfp, "\t.type = \"core\",\n");
-	fprintf(outfp, "\t.table = pme_test_cpu,\n");
+	fprintf(outfp, "\t.table = pme_test_soc_cpu,\n");
 	fprintf(outfp, "},\n");
 }
 
@@ -836,7 +836,8 @@ static int process_system_event_tables(FILE *outfp)
 	print_system_event_mapping_table_prefix(outfp);
 
 	list_for_each_entry(sys_event_table, &sys_event_tables, list) {
-		fprintf(outfp, "\n\t{\n\t\t.table = %s,\n\t},",
+		fprintf(outfp, "\n\t{\n\t\t.table = %s,\n\t\t.name = \"%s\",\n\t},",
+			sys_event_table->soc_id,
 			sys_event_table->soc_id);
 	}
 
diff --git a/tools/perf/pmu-events/pmu-events.h b/tools/perf/pmu-events/pmu-events.h
index d1172f6aebf1..5c2bf7275c1c 100644
--- a/tools/perf/pmu-events/pmu-events.h
+++ b/tools/perf/pmu-events/pmu-events.h
@@ -45,6 +45,7 @@ struct pmu_events_map {
 };
 
 struct pmu_sys_events {
+	const char *name;
 	struct pmu_event *table;
 };
 
diff --git a/tools/perf/scripts/python/bin/stackcollapse-report b/tools/perf/scripts/python/bin/stackcollapse-report
index 356b9656393d..21a356bd27f6 100755
--- a/tools/perf/scripts/python/bin/stackcollapse-report
+++ b/tools/perf/scripts/python/bin/stackcollapse-report
@@ -1,3 +1,3 @@
 #!/bin/sh
 # description: produce callgraphs in short form for scripting use
-perf script -s "$PERF_EXEC_PATH"/scripts/python/stackcollapse.py -- "$@"
+perf script -s "$PERF_EXEC_PATH"/scripts/python/stackcollapse.py "$@"
diff --git a/tools/perf/scripts/python/flamegraph.py b/tools/perf/scripts/python/flamegraph.py
index 65780013f745..b6af1dd5f816 100755
--- a/tools/perf/scripts/python/flamegraph.py
+++ b/tools/perf/scripts/python/flamegraph.py
@@ -13,6 +13,10 @@
 # Written by Andreas Gerstmayr <agerstmayr@redhat.com>
 # Flame Graphs invented by Brendan Gregg <bgregg@netflix.com>
 # Works in tandem with d3-flame-graph by Martin Spier <mspier@netflix.com>
+#
+# pylint: disable=missing-module-docstring
+# pylint: disable=missing-class-docstring
+# pylint: disable=missing-function-docstring
 
 from __future__ import print_function
 import sys
@@ -20,16 +24,19 @@ import os
 import io
 import argparse
 import json
+import subprocess
 
-
+# pylint: disable=too-few-public-methods
 class Node:
-    def __init__(self, name, libtype=""):
+    def __init__(self, name, libtype):
         self.name = name
+        # "root" | "kernel" | ""
+        # "" indicates user space
         self.libtype = libtype
         self.value = 0
         self.children = []
 
-    def toJSON(self):
+    def to_json(self):
         return {
             "n": self.name,
             "l": self.libtype,
@@ -41,7 +48,7 @@ class Node:
 class FlameGraphCLI:
     def __init__(self, args):
         self.args = args
-        self.stack = Node("root")
+        self.stack = Node("all", "root")
 
         if self.args.format == "html" and \
                 not os.path.isfile(self.args.template):
@@ -53,13 +60,21 @@ class FlameGraphCLI:
                   file=sys.stderr)
             sys.exit(1)
 
-    def find_or_create_node(self, node, name, dso):
-        libtype = "kernel" if dso == "[kernel.kallsyms]" else ""
-        if name is None:
-            name = "[unknown]"
+    @staticmethod
+    def get_libtype_from_dso(dso):
+        """
+        when kernel-debuginfo is installed,
+        dso points to /usr/lib/debug/lib/modules/*/vmlinux
+        """
+        if dso and (dso == "[kernel.kallsyms]" or dso.endswith("/vmlinux")):
+            return "kernel"
 
+        return ""
+
+    @staticmethod
+    def find_or_create_node(node, name, libtype):
         for child in node.children:
-            if child.name == name and child.libtype == libtype:
+            if child.name == name:
                 return child
 
         child = Node(name, libtype)
@@ -67,30 +82,65 @@ class FlameGraphCLI:
         return child
 
     def process_event(self, event):
-        node = self.find_or_create_node(self.stack, event["comm"], None)
+        pid = event.get("sample", {}).get("pid", 0)
+        # event["dso"] sometimes contains /usr/lib/debug/lib/modules/*/vmlinux
+        # for user-space processes; let's use pid for kernel or user-space distinction
+        if pid == 0:
+            comm = event["comm"]
+            libtype = "kernel"
+        else:
+            comm = "{} ({})".format(event["comm"], pid)
+            libtype = ""
+        node = self.find_or_create_node(self.stack, comm, libtype)
+
         if "callchain" in event:
-            for entry in reversed(event['callchain']):
-                node = self.find_or_create_node(
-                    node, entry.get("sym", {}).get("name"), event.get("dso"))
+            for entry in reversed(event["callchain"]):
+                name = entry.get("sym", {}).get("name", "[unknown]")
+                libtype = self.get_libtype_from_dso(entry.get("dso"))
+                node = self.find_or_create_node(node, name, libtype)
         else:
-            node = self.find_or_create_node(
-                node, entry.get("symbol"), event.get("dso"))
+            name = event.get("symbol", "[unknown]")
+            libtype = self.get_libtype_from_dso(event.get("dso"))
+            node = self.find_or_create_node(node, name, libtype)
         node.value += 1
 
+    def get_report_header(self):
+        if self.args.input == "-":
+            # when this script is invoked with "perf script flamegraph",
+            # no perf.data is created and we cannot read the header of it
+            return ""
+
+        try:
+            output = subprocess.check_output(["perf", "report", "--header-only"])
+            return output.decode("utf-8")
+        except Exception as err:  # pylint: disable=broad-except
+            print("Error reading report header: {}".format(err), file=sys.stderr)
+            return ""
+
     def trace_end(self):
-        json_str = json.dumps(self.stack, default=lambda x: x.toJSON())
+        stacks_json = json.dumps(self.stack, default=lambda x: x.to_json())
 
         if self.args.format == "html":
+            report_header = self.get_report_header()
+            options = {
+                "colorscheme": self.args.colorscheme,
+                "context": report_header
+            }
+            options_json = json.dumps(options)
+
             try:
-                with io.open(self.args.template, encoding="utf-8") as f:
-                    output_str = f.read().replace("/** @flamegraph_json **/",
-                                                  json_str)
-            except IOError as e:
-                print("Error reading template file: {}".format(e), file=sys.stderr)
+                with io.open(self.args.template, encoding="utf-8") as template:
+                    output_str = (
+                        template.read()
+                        .replace("/** @options_json **/", options_json)
+                        .replace("/** @flamegraph_json **/", stacks_json)
+                    )
+            except IOError as err:
+                print("Error reading template file: {}".format(err), file=sys.stderr)
                 sys.exit(1)
             output_fn = self.args.output or "flamegraph.html"
         else:
-            output_str = json_str
+            output_str = stacks_json
             output_fn = self.args.output or "stacks.json"
 
         if output_fn == "-":
@@ -101,8 +151,8 @@ class FlameGraphCLI:
             try:
                 with io.open(output_fn, "w", encoding="utf-8") as out:
                     out.write(output_str)
-            except IOError as e:
-                print("Error writing output file: {}".format(e), file=sys.stderr)
+            except IOError as err:
+                print("Error writing output file: {}".format(err), file=sys.stderr)
                 sys.exit(1)
 
 
@@ -115,12 +165,16 @@ if __name__ == "__main__":
                         help="output file name")
     parser.add_argument("--template",
                         default="/usr/share/d3-flame-graph/d3-flamegraph-base.html",
-                        help="path to flamegraph HTML template")
+                        help="path to flame graph HTML template")
+    parser.add_argument("--colorscheme",
+                        default="blue-green",
+                        help="flame graph color scheme",
+                        choices=["blue-green", "orange"])
     parser.add_argument("-i", "--input",
                         help=argparse.SUPPRESS)
 
-    args = parser.parse_args()
-    cli = FlameGraphCLI(args)
+    cli_args = parser.parse_args()
+    cli = FlameGraphCLI(cli_args)
 
     process_event = cli.process_event
     trace_end = cli.trace_end
diff --git a/tools/perf/tests/Build b/tools/perf/tests/Build
index 650aec19d490..803ca426f8e6 100644
--- a/tools/perf/tests/Build
+++ b/tools/perf/tests/Build
@@ -64,6 +64,7 @@ perf-y += parse-metric.o
 perf-y += pe-file-parsing.o
 perf-y += expand-cgroup.o
 perf-y += perf-time-to-tsc.o
+perf-y += dlfilter-test.o
 
 $(OUTPUT)tests/llvm-src-base.c: tests/bpf-script-example.c tests/Build
 	$(call rule_mkdir)
diff --git a/tools/perf/tests/bitmap.c b/tools/perf/tests/bitmap.c
index 96c137360918..12b805efdca0 100644
--- a/tools/perf/tests/bitmap.c
+++ b/tools/perf/tests/bitmap.c
@@ -14,7 +14,7 @@ static unsigned long *get_bitmap(const char *str, int nbits)
 	unsigned long *bm = NULL;
 	int i;
 
-	bm = bitmap_alloc(nbits);
+	bm = bitmap_zalloc(nbits);
 
 	if (map && bm) {
 		for (i = 0; i < map->nr; i++)
diff --git a/tools/perf/tests/bpf.c b/tools/perf/tests/bpf.c
index dbf5f5215abe..fa03ff0dc083 100644
--- a/tools/perf/tests/bpf.c
+++ b/tools/perf/tests/bpf.c
@@ -192,7 +192,7 @@ static int do_test(struct bpf_object *obj, int (*func)(void),
 	}
 
 	if (count != expect * evlist->core.nr_entries) {
-		pr_debug("BPF filter result incorrect, expected %d, got %d samples\n", expect, count);
+		pr_debug("BPF filter result incorrect, expected %d, got %d samples\n", expect * evlist->core.nr_entries, count);
 		goto out_delete_evlist;
 	}
 
diff --git a/tools/perf/tests/builtin-test.c b/tools/perf/tests/builtin-test.c
index 5e6242576236..da7dc5e45d0c 100644
--- a/tools/perf/tests/builtin-test.c
+++ b/tools/perf/tests/builtin-test.c
@@ -361,6 +361,10 @@ static struct test generic_tests[] = {
 		.is_supported = test__tsc_is_supported,
 	},
 	{
+		.desc = "dlfilter C API",
+		.func = test__dlfilter,
+	},
+	{
 		.func = NULL,
 	},
 };
@@ -594,7 +598,8 @@ static int shell_test__run(struct test *test, int subdir __maybe_unused)
 	return WEXITSTATUS(err) == 2 ? TEST_SKIP : TEST_FAIL;
 }
 
-static int run_shell_tests(int argc, const char *argv[], int i, int width)
+static int run_shell_tests(int argc, const char *argv[], int i, int width,
+				struct intlist *skiplist)
 {
 	struct dirent **entlist;
 	struct dirent *ent;
@@ -628,6 +633,12 @@ static int run_shell_tests(int argc, const char *argv[], int i, int width)
 
 		st.file = ent->d_name;
 		pr_info("%2d: %-*s:", i, width, test.desc);
+
+		if (intlist__find(skiplist, i)) {
+			color_fprintf(stderr, PERF_COLOR_YELLOW, " Skip (user override)\n");
+			continue;
+		}
+
 		test_and_print(&test, false, -1);
 	}
 
@@ -727,7 +738,7 @@ static int __cmd_test(int argc, const char *argv[], struct intlist *skiplist)
 		}
 	}
 
-	return run_shell_tests(argc, argv, i, width);
+	return run_shell_tests(argc, argv, i, width, skiplist);
 }
 
 static int perf_test__list_shell(int argc, const char **argv, int i)
diff --git a/tools/perf/tests/dlfilter-test.c b/tools/perf/tests/dlfilter-test.c
new file mode 100644
index 000000000000..bc03b5df6828
--- /dev/null
+++ b/tools/perf/tests/dlfilter-test.c
@@ -0,0 +1,416 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Test dlfilter C API. A perf.data file is synthesized and then processed
+ * by perf script with a dlfilter named dlfilter-test-api-v0.so. Also a C file
+ * is compiled to provide a dso to match the synthesized perf.data file.
+ */
+
+#include <linux/compiler.h>
+#include <linux/kernel.h>
+#include <linux/string.h>
+#include <linux/perf_event.h>
+#include <internal/lib.h>
+#include <subcmd/exec-cmd.h>
+#include <sys/types.h>
+#include <sys/stat.h>
+#include <fcntl.h>
+#include <stdlib.h>
+#include <unistd.h>
+#include <inttypes.h>
+#include <libgen.h>
+#include <string.h>
+#include <errno.h>
+#include "debug.h"
+#include "tool.h"
+#include "event.h"
+#include "header.h"
+#include "machine.h"
+#include "dso.h"
+#include "map.h"
+#include "symbol.h"
+#include "synthetic-events.h"
+#include "util.h"
+#include "archinsn.h"
+#include "dlfilter.h"
+#include "tests.h"
+
+#define MAP_START 0x400000
+
+struct test_data {
+	struct perf_tool tool;
+	struct machine *machine;
+	int fd;
+	u64 foo;
+	u64 bar;
+	u64 ip;
+	u64 addr;
+	char perf[PATH_MAX];
+	char perf_data_file_name[PATH_MAX];
+	char c_file_name[PATH_MAX];
+	char prog_file_name[PATH_MAX];
+	char dlfilters[PATH_MAX];
+};
+
+static int test_result(const char *msg, int ret)
+{
+	pr_debug("%s\n", msg);
+	return ret;
+}
+
+static int process(struct perf_tool *tool, union perf_event *event,
+		   struct perf_sample *sample __maybe_unused,
+		   struct machine *machine __maybe_unused)
+{
+	struct test_data *td = container_of(tool, struct test_data, tool);
+	int fd = td->fd;
+
+	if (writen(fd, event, event->header.size) != event->header.size)
+		return -1;
+
+	return 0;
+}
+
+#define MAXCMD 4096
+#define REDIRECT_TO_DEV_NULL " >/dev/null 2>&1"
+
+static __printf(1, 2) int system_cmd(const char *fmt, ...)
+{
+	char cmd[MAXCMD + sizeof(REDIRECT_TO_DEV_NULL)];
+	int ret;
+
+	va_list args;
+
+	va_start(args, fmt);
+	ret = vsnprintf(cmd, MAXCMD, fmt, args);
+	va_end(args);
+
+	if (ret <= 0 || ret >= MAXCMD)
+		return -1;
+
+	if (!verbose)
+		strcat(cmd, REDIRECT_TO_DEV_NULL);
+
+	pr_debug("Command: %s\n", cmd);
+	ret = system(cmd);
+	if (ret)
+		pr_debug("Failed with return value %d\n", ret);
+
+	return ret;
+}
+
+static bool have_gcc(void)
+{
+	pr_debug("Checking for gcc\n");
+	return !system_cmd("gcc --version");
+}
+
+static int write_attr(struct test_data *td, u64 sample_type, u64 *id)
+{
+	struct perf_event_attr attr = {
+		.size = sizeof(attr),
+		.type = PERF_TYPE_HARDWARE,
+		.config = PERF_COUNT_HW_BRANCH_INSTRUCTIONS,
+		.sample_type = sample_type,
+		.sample_period = 1,
+	};
+
+	return perf_event__synthesize_attr(&td->tool, &attr, 1, id, process);
+}
+
+static int write_comm(int fd, pid_t pid, pid_t tid, const char *comm_str)
+{
+	struct perf_record_comm comm;
+	ssize_t sz = sizeof(comm);
+
+	comm.header.type = PERF_RECORD_COMM;
+	comm.header.misc = PERF_RECORD_MISC_USER;
+	comm.header.size = sz;
+
+	comm.pid = pid;
+	comm.tid = tid;
+	strncpy(comm.comm, comm_str, 16);
+
+	if (writen(fd, &comm, sz) != sz) {
+		pr_debug("%s failed\n", __func__);
+		return -1;
+	}
+
+	return 0;
+}
+
+static int write_mmap(int fd, pid_t pid, pid_t tid, u64 start, u64 len, u64 pgoff,
+		      const char *filename)
+{
+	char buf[PERF_SAMPLE_MAX_SIZE];
+	struct perf_record_mmap *mmap = (struct perf_record_mmap *)buf;
+	size_t fsz = roundup(strlen(filename) + 1, 8);
+	ssize_t sz = sizeof(*mmap) - sizeof(mmap->filename) + fsz;
+
+	mmap->header.type = PERF_RECORD_MMAP;
+	mmap->header.misc = PERF_RECORD_MISC_USER;
+	mmap->header.size = sz;
+
+	mmap->pid   = pid;
+	mmap->tid   = tid;
+	mmap->start = start;
+	mmap->len   = len;
+	mmap->pgoff = pgoff;
+	strncpy(mmap->filename, filename, sizeof(mmap->filename));
+
+	if (writen(fd, mmap, sz) != sz) {
+		pr_debug("%s failed\n", __func__);
+		return -1;
+	}
+
+	return 0;
+}
+
+static int write_sample(struct test_data *td, u64 sample_type, u64 id, pid_t pid, pid_t tid)
+{
+	char buf[PERF_SAMPLE_MAX_SIZE];
+	union perf_event *event = (union perf_event *)buf;
+	struct perf_sample sample = {
+		.ip		= td->ip,
+		.addr		= td->addr,
+		.id		= id,
+		.time		= 1234567890,
+		.cpu		= 31,
+		.pid		= pid,
+		.tid		= tid,
+		.period		= 543212345,
+		.stream_id	= 101,
+	};
+	int err;
+
+	event->header.type = PERF_RECORD_SAMPLE;
+	event->header.misc = PERF_RECORD_MISC_USER;
+	event->header.size = perf_event__sample_event_size(&sample, sample_type, 0);
+	err = perf_event__synthesize_sample(event, sample_type, 0, &sample);
+	if (err)
+		return test_result("perf_event__synthesize_sample() failed", TEST_FAIL);
+
+	err = process(&td->tool, event, &sample, td->machine);
+	if (err)
+		return test_result("Failed to write sample", TEST_FAIL);
+
+	return TEST_OK;
+}
+
+static void close_fd(int fd)
+{
+	if (fd >= 0)
+		close(fd);
+}
+
+static const char *prog = "int bar(){};int foo(){bar();};int main(){foo();return 0;}";
+
+static int write_prog(char *file_name)
+{
+	int fd = creat(file_name, 0644);
+	ssize_t n = strlen(prog);
+	bool err = fd < 0 || writen(fd, prog, n) != n;
+
+	close_fd(fd);
+	return err ? -1 : 0;
+}
+
+static int get_dlfilters_path(char *buf, size_t sz)
+{
+	char perf[PATH_MAX];
+	char path[PATH_MAX];
+	char *perf_path;
+	char *exec_path;
+
+	perf_exe(perf, sizeof(perf));
+	perf_path = dirname(perf);
+	snprintf(path, sizeof(path), "%s/dlfilters/dlfilter-test-api-v0.so", perf_path);
+	if (access(path, R_OK)) {
+		exec_path = get_argv_exec_path();
+		if (!exec_path)
+			return -1;
+		snprintf(path, sizeof(path), "%s/dlfilters/dlfilter-test-api-v0.so", exec_path);
+		free(exec_path);
+		if (access(path, R_OK))
+			return -1;
+	}
+	strlcpy(buf, dirname(path), sz);
+	return 0;
+}
+
+static int check_filter_desc(struct test_data *td)
+{
+	char *long_desc = NULL;
+	char *desc = NULL;
+	int ret;
+
+	if (get_filter_desc(td->dlfilters, "dlfilter-test-api-v0.so", &desc, &long_desc) &&
+	    long_desc && !strcmp(long_desc, "Filter used by the 'dlfilter C API' perf test") &&
+	    desc && !strcmp(desc, "dlfilter to test v0 C API"))
+		ret = 0;
+	else
+		ret = -1;
+
+	free(desc);
+	free(long_desc);
+	return ret;
+}
+
+static int get_ip_addr(struct test_data *td)
+{
+	struct map *map;
+	struct symbol *sym;
+
+	map = dso__new_map(td->prog_file_name);
+	if (!map)
+		return -1;
+
+	sym = map__find_symbol_by_name(map, "foo");
+	if (sym)
+		td->foo = sym->start;
+
+	sym = map__find_symbol_by_name(map, "bar");
+	if (sym)
+		td->bar = sym->start;
+
+	map__put(map);
+
+	td->ip = MAP_START + td->foo;
+	td->addr = MAP_START + td->bar;
+
+	return td->foo && td->bar ? 0 : -1;
+}
+
+static int do_run_perf_script(struct test_data *td, int do_early)
+{
+	return system_cmd("%s script -i %s "
+			  "--dlfilter %s/dlfilter-test-api-v0.so "
+			  "--dlarg first "
+			  "--dlarg %d "
+			  "--dlarg %" PRIu64 " "
+			  "--dlarg %" PRIu64 " "
+			  "--dlarg %d "
+			  "--dlarg last",
+			  td->perf, td->perf_data_file_name, td->dlfilters,
+			  verbose, td->ip, td->addr, do_early);
+}
+
+static int run_perf_script(struct test_data *td)
+{
+	int do_early;
+	int err;
+
+	for (do_early = 0; do_early < 3; do_early++) {
+		err = do_run_perf_script(td, do_early);
+		if (err)
+			return err;
+	}
+	return 0;
+}
+
+#define TEST_SAMPLE_TYPE (PERF_SAMPLE_IP | PERF_SAMPLE_TID | \
+			  PERF_SAMPLE_IDENTIFIER | PERF_SAMPLE_TIME | \
+			  PERF_SAMPLE_ADDR | PERF_SAMPLE_CPU | \
+			  PERF_SAMPLE_PERIOD | PERF_SAMPLE_STREAM_ID)
+
+static int test__dlfilter_test(struct test_data *td)
+{
+	u64 sample_type = TEST_SAMPLE_TYPE;
+	pid_t pid = 12345;
+	pid_t tid = 12346;
+	u64 id = 99;
+	int err;
+
+	if (get_dlfilters_path(td->dlfilters, PATH_MAX))
+		return test_result("dlfilters not found", TEST_SKIP);
+
+	if (check_filter_desc(td))
+		return test_result("Failed to get expected filter description", TEST_FAIL);
+
+	if (!have_gcc())
+		return test_result("gcc not found", TEST_SKIP);
+
+	pr_debug("dlfilters path: %s\n", td->dlfilters);
+
+	if (write_prog(td->c_file_name))
+		return test_result("Failed to write test C file", TEST_FAIL);
+
+	if (verbose > 1)
+		system_cmd("cat %s ; echo", td->c_file_name);
+
+	if (system_cmd("gcc -g -o %s %s", td->prog_file_name, td->c_file_name))
+		return TEST_FAIL;
+
+	if (verbose > 2)
+		system_cmd("objdump -x -dS %s", td->prog_file_name);
+
+	if (get_ip_addr(td))
+		return test_result("Failed to find program symbols", TEST_FAIL);
+
+	pr_debug("Creating new host machine structure\n");
+	td->machine = machine__new_host();
+	td->machine->env = &perf_env;
+
+	td->fd = creat(td->perf_data_file_name, 0644);
+	if (td->fd < 0)
+		return test_result("Failed to create test perf.data file", TEST_FAIL);
+
+	err = perf_header__write_pipe(td->fd);
+	if (err < 0)
+		return test_result("perf_header__write_pipe() failed", TEST_FAIL);
+
+	err = write_attr(td, sample_type, &id);
+	if (err)
+		return test_result("perf_event__synthesize_attr() failed", TEST_FAIL);
+
+	if (write_comm(td->fd, pid, tid, "test-prog"))
+		return TEST_FAIL;
+
+	if (write_mmap(td->fd, pid, tid, MAP_START, 0x10000, 0, td->prog_file_name))
+		return TEST_FAIL;
+
+	if (write_sample(td, sample_type, id, pid, tid) != TEST_OK)
+		return TEST_FAIL;
+
+	if (verbose > 1)
+		system_cmd("%s script -i %s -D", td->perf, td->perf_data_file_name);
+
+	err = run_perf_script(td);
+	if (err)
+		return TEST_FAIL;
+
+	return TEST_OK;
+}
+
+static void unlink_path(const char *path)
+{
+	if (*path)
+		unlink(path);
+}
+
+static void test_data__free(struct test_data *td)
+{
+	machine__delete(td->machine);
+	close_fd(td->fd);
+	if (verbose <= 2) {
+		unlink_path(td->c_file_name);
+		unlink_path(td->prog_file_name);
+		unlink_path(td->perf_data_file_name);
+	}
+}
+
+int test__dlfilter(struct test *test __maybe_unused, int subtest __maybe_unused)
+{
+	struct test_data td = {.fd = -1};
+	int pid = getpid();
+	int err;
+
+	perf_exe(td.perf, sizeof(td.perf));
+
+	snprintf(td.perf_data_file_name, PATH_MAX, "/tmp/dlfilter-test-%u-perf-data", pid);
+	snprintf(td.c_file_name, PATH_MAX, "/tmp/dlfilter-test-%u-prog.c", pid);
+	snprintf(td.prog_file_name, PATH_MAX, "/tmp/dlfilter-test-%u-prog", pid);
+
+	err = test__dlfilter_test(&td);
+	test_data__free(&td);
+	return err;
+}
diff --git a/tools/perf/tests/dso-data.c b/tools/perf/tests/dso-data.c
index 627c1aaf1c9e..43e1b01e5afc 100644
--- a/tools/perf/tests/dso-data.c
+++ b/tools/perf/tests/dso-data.c
@@ -308,10 +308,20 @@ int test__dso_data_cache(struct test *test __maybe_unused, int subtest __maybe_u
 	return 0;
 }
 
+static long new_limit(int count)
+{
+	int fd = open("/dev/null", O_RDONLY);
+	long ret = fd;
+	if (count > 0)
+		ret = new_limit(--count);
+	close(fd);
+	return ret;
+}
+
 int test__dso_data_reopen(struct test *test __maybe_unused, int subtest __maybe_unused)
 {
 	struct machine machine;
-	long nr_end, nr = open_files_cnt();
+	long nr_end, nr = open_files_cnt(), lim = new_limit(3);
 	int fd, fd_extra;
 
 #define dso_0 (dsos[0])
@@ -334,7 +344,7 @@ int test__dso_data_reopen(struct test *test __maybe_unused, int subtest __maybe_
 
 	/* Make sure we are able to open 3 fds anyway */
 	TEST_ASSERT_VAL("failed to set file limit",
-			!set_fd_limit((nr + 3)));
+			!set_fd_limit((lim)));
 
 	TEST_ASSERT_VAL("failed to create dsos\n", !dsos__create(3, TEST_FILE_SIZE));
 
diff --git a/tools/perf/tests/llvm.c b/tools/perf/tests/llvm.c
index 98da8a8757ab..33e43cce9064 100644
--- a/tools/perf/tests/llvm.c
+++ b/tools/perf/tests/llvm.c
@@ -67,12 +67,11 @@ test_llvm__fetch_bpf_obj(void **p_obj_buf,
 
 	/*
 	 * Skip this test if user's .perfconfig doesn't set [llvm] section
-	 * and clang is not found in $PATH, and this is not perf test -v
+	 * and clang is not found in $PATH
 	 */
-	if (!force && (verbose <= 0 &&
-		       !llvm_param.user_set_param &&
+	if (!force && (!llvm_param.user_set_param &&
 		       llvm__search_clang())) {
-		pr_debug("No clang and no verbosive, skip this test\n");
+		pr_debug("No clang, skip this test\n");
 		return TEST_SKIP;
 	}
 
diff --git a/tools/perf/tests/mem2node.c b/tools/perf/tests/mem2node.c
index a258bd51f1a4..e4d0d58b97f8 100644
--- a/tools/perf/tests/mem2node.c
+++ b/tools/perf/tests/mem2node.c
@@ -27,7 +27,7 @@ static unsigned long *get_bitmap(const char *str, int nbits)
 	unsigned long *bm = NULL;
 	int i;
 
-	bm = bitmap_alloc(nbits);
+	bm = bitmap_zalloc(nbits);
 
 	if (map && bm) {
 		for (i = 0; i < map->nr; i++) {
diff --git a/tools/perf/tests/parse-events.c b/tools/perf/tests/parse-events.c
index 8d4866739255..fd3556cc9ad4 100644
--- a/tools/perf/tests/parse-events.c
+++ b/tools/perf/tests/parse-events.c
@@ -9,6 +9,7 @@
 #include "pmu-hybrid.h"
 #include <dirent.h>
 #include <errno.h>
+#include "fncache.h"
 #include <sys/types.h>
 #include <sys/stat.h>
 #include <unistd.h>
@@ -2194,9 +2195,91 @@ static int test_pmu_events(void)
 	return ret;
 }
 
+static bool test_alias(char **event, char **alias)
+{
+	char path[PATH_MAX];
+	DIR *dir;
+	struct dirent *dent;
+	const char *sysfs = sysfs__mountpoint();
+	char buf[128];
+	FILE *file;
+
+	if (!sysfs)
+		return false;
+
+	snprintf(path, PATH_MAX, "%s/bus/event_source/devices/", sysfs);
+	dir = opendir(path);
+	if (!dir)
+		return false;
+
+	while ((dent = readdir(dir))) {
+		if (!strcmp(dent->d_name, ".") ||
+		    !strcmp(dent->d_name, ".."))
+			continue;
+
+		snprintf(path, PATH_MAX, "%s/bus/event_source/devices/%s/alias",
+			 sysfs, dent->d_name);
+
+		if (!file_available(path))
+			continue;
+
+		file = fopen(path, "r");
+		if (!file)
+			continue;
+
+		if (!fgets(buf, sizeof(buf), file)) {
+			fclose(file);
+			continue;
+		}
+
+		/* Remove the last '\n' */
+		buf[strlen(buf) - 1] = 0;
+
+		fclose(file);
+		*event = strdup(dent->d_name);
+		*alias = strdup(buf);
+		closedir(dir);
+
+		if (*event == NULL || *alias == NULL) {
+			free(*event);
+			free(*alias);
+			return false;
+		}
+
+		return true;
+	}
+
+	closedir(dir);
+	return false;
+}
+
+static int test__checkevent_pmu_events_alias(struct evlist *evlist)
+{
+	struct evsel *evsel1 = evlist__first(evlist);
+	struct evsel *evsel2 = evlist__last(evlist);
+
+	TEST_ASSERT_VAL("wrong type", evsel1->core.attr.type == evsel2->core.attr.type);
+	TEST_ASSERT_VAL("wrong config", evsel1->core.attr.config == evsel2->core.attr.config);
+	return 0;
+}
+
+static int test_pmu_events_alias(char *event, char *alias)
+{
+	struct evlist_test e = { .id = 0, };
+	char name[2 * NAME_MAX + 20];
+
+	snprintf(name, sizeof(name), "%s/event=1/,%s/event=1/",
+		 event, alias);
+
+	e.name  = name;
+	e.check = test__checkevent_pmu_events_alias;
+	return test_event(&e);
+}
+
 int test__parse_events(struct test *test __maybe_unused, int subtest __maybe_unused)
 {
 	int ret1, ret2 = 0;
+	char *event, *alias;
 
 #define TEST_EVENTS(tests)				\
 do {							\
@@ -2221,6 +2304,15 @@ do {							\
 			return ret;
 	}
 
+	if (test_alias(&event, &alias)) {
+		int ret = test_pmu_events_alias(event, alias);
+
+		free(event);
+		free(alias);
+		if (ret)
+			return ret;
+	}
+
 	ret1 = test_terms(test__terms, ARRAY_SIZE(test__terms));
 	if (!ret2)
 		ret2 = ret1;
diff --git a/tools/perf/tests/pmu-events.c b/tools/perf/tests/pmu-events.c
index b8aff8fb50d8..43743cf719ef 100644
--- a/tools/perf/tests/pmu-events.c
+++ b/tools/perf/tests/pmu-events.c
@@ -28,110 +28,190 @@ struct perf_pmu_test_event {
 	 * be set in the alias.
 	 */
 	const char *alias_long_desc;
+
+	/* PMU which we should match against */
+	const char *matching_pmu;
 };
 
-static struct perf_pmu_test_event test_cpu_events[] = {
-	{
-		.event = {
-			.name = "bp_l1_btb_correct",
-			.event = "event=0x8a",
-			.desc = "L1 BTB Correction",
-			.topic = "branch",
-		},
-		.alias_str = "event=0x8a",
-		.alias_long_desc = "L1 BTB Correction",
+struct perf_pmu_test_pmu {
+	struct perf_pmu pmu;
+	struct perf_pmu_test_event const *aliases[10];
+};
+
+static const struct perf_pmu_test_event bp_l1_btb_correct = {
+	.event = {
+		.name = "bp_l1_btb_correct",
+		.event = "event=0x8a",
+		.desc = "L1 BTB Correction",
+		.topic = "branch",
 	},
-	{
-		.event = {
-			.name = "bp_l2_btb_correct",
-			.event = "event=0x8b",
-			.desc = "L2 BTB Correction",
-			.topic = "branch",
-		},
-		.alias_str = "event=0x8b",
-		.alias_long_desc = "L2 BTB Correction",
+	.alias_str = "event=0x8a",
+	.alias_long_desc = "L1 BTB Correction",
+};
+
+static const struct perf_pmu_test_event bp_l2_btb_correct = {
+	.event = {
+		.name = "bp_l2_btb_correct",
+		.event = "event=0x8b",
+		.desc = "L2 BTB Correction",
+		.topic = "branch",
 	},
-	{
-		.event = {
-			.name = "segment_reg_loads.any",
-			.event = "umask=0x80,period=200000,event=0x6",
-			.desc = "Number of segment register loads",
-			.topic = "other",
-		},
-		.alias_str = "umask=0x80,(null)=0x30d40,event=0x6",
-		.alias_long_desc = "Number of segment register loads",
+	.alias_str = "event=0x8b",
+	.alias_long_desc = "L2 BTB Correction",
+};
+
+static const struct perf_pmu_test_event segment_reg_loads_any = {
+	.event = {
+		.name = "segment_reg_loads.any",
+		.event = "umask=0x80,period=200000,event=0x6",
+		.desc = "Number of segment register loads",
+		.topic = "other",
 	},
-	{
-		.event = {
-			.name = "dispatch_blocked.any",
-			.event = "umask=0x20,period=200000,event=0x9",
-			.desc = "Memory cluster signals to block micro-op dispatch for any reason",
-			.topic = "other",
-		},
-		.alias_str = "umask=0x20,(null)=0x30d40,event=0x9",
-		.alias_long_desc = "Memory cluster signals to block micro-op dispatch for any reason",
+	.alias_str = "umask=0x80,(null)=0x30d40,event=0x6",
+	.alias_long_desc = "Number of segment register loads",
+};
+
+static const struct perf_pmu_test_event dispatch_blocked_any = {
+	.event = {
+		.name = "dispatch_blocked.any",
+		.event = "umask=0x20,period=200000,event=0x9",
+		.desc = "Memory cluster signals to block micro-op dispatch for any reason",
+		.topic = "other",
 	},
-	{
-		.event = {
-			.name = "eist_trans",
-			.event = "umask=0x0,period=200000,event=0x3a",
-			.desc = "Number of Enhanced Intel SpeedStep(R) Technology (EIST) transitions",
-			.topic = "other",
-		},
-		.alias_str = "umask=0,(null)=0x30d40,event=0x3a",
-		.alias_long_desc = "Number of Enhanced Intel SpeedStep(R) Technology (EIST) transitions",
+	.alias_str = "umask=0x20,(null)=0x30d40,event=0x9",
+	.alias_long_desc = "Memory cluster signals to block micro-op dispatch for any reason",
+};
+
+static const struct perf_pmu_test_event eist_trans = {
+	.event = {
+		.name = "eist_trans",
+		.event = "umask=0x0,period=200000,event=0x3a",
+		.desc = "Number of Enhanced Intel SpeedStep(R) Technology (EIST) transitions",
+		.topic = "other",
 	},
-	{
-		.event = {
-			.name = "l3_cache_rd",
-			.event = "event=0x40",
-			.desc = "L3 cache access, read",
-			.long_desc = "Attributable Level 3 cache access, read",
-			.topic = "cache",
-		},
-		.alias_str = "event=0x40",
-		.alias_long_desc = "Attributable Level 3 cache access, read",
+	.alias_str = "umask=0,(null)=0x30d40,event=0x3a",
+	.alias_long_desc = "Number of Enhanced Intel SpeedStep(R) Technology (EIST) transitions",
+};
+
+static const struct perf_pmu_test_event l3_cache_rd = {
+	.event = {
+		.name = "l3_cache_rd",
+		.event = "event=0x40",
+		.desc = "L3 cache access, read",
+		.long_desc = "Attributable Level 3 cache access, read",
+		.topic = "cache",
 	},
-	{ /* sentinel */
-		.event = {
-			.name = NULL,
-		},
+	.alias_str = "event=0x40",
+	.alias_long_desc = "Attributable Level 3 cache access, read",
+};
+
+static const struct perf_pmu_test_event *core_events[] = {
+	&bp_l1_btb_correct,
+	&bp_l2_btb_correct,
+	&segment_reg_loads_any,
+	&dispatch_blocked_any,
+	&eist_trans,
+	&l3_cache_rd,
+	NULL
+};
+
+static const struct perf_pmu_test_event uncore_hisi_ddrc_flux_wcmd = {
+	.event = {
+		.name = "uncore_hisi_ddrc.flux_wcmd",
+		.event = "event=0x2",
+		.desc = "DDRC write commands. Unit: hisi_sccl,ddrc ",
+		.topic = "uncore",
+		.long_desc = "DDRC write commands",
+		.pmu = "hisi_sccl,ddrc",
 	},
+	.alias_str = "event=0x2",
+	.alias_long_desc = "DDRC write commands",
+	.matching_pmu = "hisi_sccl1_ddrc2",
 };
 
-static struct perf_pmu_test_event test_uncore_events[] = {
-	{
-		.event = {
-			.name = "uncore_hisi_ddrc.flux_wcmd",
-			.event = "event=0x2",
-			.desc = "DDRC write commands. Unit: hisi_sccl,ddrc ",
-			.topic = "uncore",
-			.long_desc = "DDRC write commands",
-			.pmu = "hisi_sccl,ddrc",
-		},
-		.alias_str = "event=0x2",
-		.alias_long_desc = "DDRC write commands",
+static const struct perf_pmu_test_event unc_cbo_xsnp_response_miss_eviction = {
+	.event = {
+		.name = "unc_cbo_xsnp_response.miss_eviction",
+		.event = "umask=0x81,event=0x22",
+		.desc = "Unit: uncore_cbox A cross-core snoop resulted from L3 Eviction which misses in some processor core",
+		.topic = "uncore",
+		.long_desc = "A cross-core snoop resulted from L3 Eviction which misses in some processor core",
+		.pmu = "uncore_cbox",
 	},
-	{
-		.event = {
-			.name = "unc_cbo_xsnp_response.miss_eviction",
-			.event = "umask=0x81,event=0x22",
-			.desc = "Unit: uncore_cbox A cross-core snoop resulted from L3 Eviction which misses in some processor core",
-			.topic = "uncore",
-			.long_desc = "A cross-core snoop resulted from L3 Eviction which misses in some processor core",
-			.pmu = "uncore_cbox",
-		},
-		.alias_str = "umask=0x81,event=0x22",
-		.alias_long_desc = "A cross-core snoop resulted from L3 Eviction which misses in some processor core",
+	.alias_str = "umask=0x81,event=0x22",
+	.alias_long_desc = "A cross-core snoop resulted from L3 Eviction which misses in some processor core",
+	.matching_pmu = "uncore_cbox_0",
+};
+
+static const struct perf_pmu_test_event uncore_hisi_l3c_rd_hit_cpipe = {
+	.event = {
+		.name = "uncore_hisi_l3c.rd_hit_cpipe",
+		.event = "event=0x2",
+		.desc = "Total read hits. Unit: hisi_sccl,l3c ",
+		.topic = "uncore",
+		.long_desc = "Total read hits",
+		.pmu = "hisi_sccl,l3c",
 	},
-	{ /* sentinel */
-		.event = {
-			.name = NULL,
-		},
-	}
+	.alias_str = "event=0x7",
+	.alias_long_desc = "Total read hits",
+	.matching_pmu = "hisi_sccl3_l3c7",
+};
+
+static const struct perf_pmu_test_event uncore_imc_free_running_cache_miss = {
+	.event = {
+		.name = "uncore_imc_free_running.cache_miss",
+		.event = "event=0x12",
+		.desc = "Total cache misses. Unit: uncore_imc_free_running ",
+		.topic = "uncore",
+		.long_desc = "Total cache misses",
+		.pmu = "uncore_imc_free_running",
+	},
+	.alias_str = "event=0x12",
+	.alias_long_desc = "Total cache misses",
+	.matching_pmu = "uncore_imc_free_running_0",
+};
+
+static const struct perf_pmu_test_event uncore_imc_cache_hits = {
+	.event = {
+		.name = "uncore_imc.cache_hits",
+		.event = "event=0x34",
+		.desc = "Total cache hits. Unit: uncore_imc ",
+		.topic = "uncore",
+		.long_desc = "Total cache hits",
+		.pmu = "uncore_imc",
+	},
+	.alias_str = "event=0x34",
+	.alias_long_desc = "Total cache hits",
+	.matching_pmu = "uncore_imc_0",
+};
+
+static const struct perf_pmu_test_event *uncore_events[] = {
+	&uncore_hisi_ddrc_flux_wcmd,
+	&unc_cbo_xsnp_response_miss_eviction,
+	&uncore_hisi_l3c_rd_hit_cpipe,
+	&uncore_imc_free_running_cache_miss,
+	&uncore_imc_cache_hits,
+	NULL
 };
 
-const int total_test_events_size = ARRAY_SIZE(test_uncore_events);
+static const struct perf_pmu_test_event sys_ddr_pmu_write_cycles = {
+	.event = {
+		.name = "sys_ddr_pmu.write_cycles",
+		.event = "event=0x2b",
+		.desc = "ddr write-cycles event. Unit: uncore_sys_ddr_pmu ",
+		.topic = "uncore",
+		.pmu = "uncore_sys_ddr_pmu",
+		.compat = "v8",
+	},
+	.alias_str = "event=0x2b",
+	.alias_long_desc = "ddr write-cycles event. Unit: uncore_sys_ddr_pmu ",
+	.matching_pmu = "uncore_sys_ddr_pmu",
+};
+
+static const struct perf_pmu_test_event *sys_events[] = {
+	&sys_ddr_pmu_write_cycles,
+	NULL
+};
 
 static bool is_same(const char *reference, const char *test)
 {
@@ -161,99 +241,207 @@ static struct pmu_events_map *__test_pmu_get_events_map(void)
 	return NULL;
 }
 
-/* Verify generated events from pmu-events.c is as expected */
+static struct pmu_event *__test_pmu_get_sys_events_table(void)
+{
+	struct pmu_sys_events *tables = &pmu_sys_event_tables[0];
+
+	for ( ; tables->name; tables++) {
+		if (!strcmp("pme_test_soc_sys", tables->name))
+			return tables->table;
+	}
+
+	return NULL;
+}
+
+static int compare_pmu_events(struct pmu_event *e1, const struct pmu_event *e2)
+{
+	if (!is_same(e1->desc, e2->desc)) {
+		pr_debug2("testing event e1 %s: mismatched desc, %s vs %s\n",
+			  e1->name, e1->desc, e2->desc);
+		return -1;
+	}
+
+	if (!is_same(e1->topic, e2->topic)) {
+		pr_debug2("testing event e1 %s: mismatched topic, %s vs %s\n",
+			  e1->name, e1->topic, e2->topic);
+		return -1;
+	}
+
+	if (!is_same(e1->long_desc, e2->long_desc)) {
+		pr_debug2("testing event e1 %s: mismatched long_desc, %s vs %s\n",
+			  e1->name, e1->long_desc, e2->long_desc);
+		return -1;
+	}
+
+	if (!is_same(e1->unit, e2->unit)) {
+		pr_debug2("testing event e1 %s: mismatched unit, %s vs %s\n",
+			  e1->name, e1->unit, e2->unit);
+		return -1;
+	}
+
+	if (!is_same(e1->perpkg, e2->perpkg)) {
+		pr_debug2("testing event e1 %s: mismatched perpkg, %s vs %s\n",
+			  e1->name, e1->perpkg, e2->perpkg);
+		return -1;
+	}
+
+	if (!is_same(e1->metric_expr, e2->metric_expr)) {
+		pr_debug2("testing event e1 %s: mismatched metric_expr, %s vs %s\n",
+			  e1->name, e1->metric_expr, e2->metric_expr);
+		return -1;
+	}
+
+	if (!is_same(e1->metric_name, e2->metric_name)) {
+		pr_debug2("testing event e1 %s: mismatched metric_name, %s vs %s\n",
+			  e1->name,	e1->metric_name, e2->metric_name);
+		return -1;
+	}
+
+	if (!is_same(e1->deprecated, e2->deprecated)) {
+		pr_debug2("testing event e1 %s: mismatched deprecated, %s vs %s\n",
+			  e1->name, e1->deprecated, e2->deprecated);
+		return -1;
+	}
+
+	if (!is_same(e1->pmu, e2->pmu)) {
+		pr_debug2("testing event e1 %s: mismatched pmu string, %s vs %s\n",
+			  e1->name, e1->pmu, e2->pmu);
+		return -1;
+	}
+
+	if (!is_same(e1->compat, e2->compat)) {
+		pr_debug2("testing event e1 %s: mismatched compat string, %s vs %s\n",
+			  e1->name, e1->compat, e2->compat);
+		return -1;
+	}
+
+	return 0;
+}
+
+static int compare_alias_to_test_event(struct perf_pmu_alias *alias,
+				struct perf_pmu_test_event const *test_event,
+				char const *pmu_name)
+{
+	struct pmu_event const *event = &test_event->event;
+
+	/* An alias was found, ensure everything is in order */
+	if (!is_same(alias->name, event->name)) {
+		pr_debug("testing aliases PMU %s: mismatched name, %s vs %s\n",
+			  pmu_name, alias->name, event->name);
+		return -1;
+	}
+
+	if (!is_same(alias->desc, event->desc)) {
+		pr_debug("testing aliases PMU %s: mismatched desc, %s vs %s\n",
+			  pmu_name, alias->desc, event->desc);
+		return -1;
+	}
+
+	if (!is_same(alias->long_desc, test_event->alias_long_desc)) {
+		pr_debug("testing aliases PMU %s: mismatched long_desc, %s vs %s\n",
+			  pmu_name, alias->long_desc,
+			  test_event->alias_long_desc);
+		return -1;
+	}
+
+	if (!is_same(alias->topic, event->topic)) {
+		pr_debug("testing aliases PMU %s: mismatched topic, %s vs %s\n",
+			  pmu_name, alias->topic, event->topic);
+		return -1;
+	}
+
+	if (!is_same(alias->str, test_event->alias_str)) {
+		pr_debug("testing aliases PMU %s: mismatched str, %s vs %s\n",
+			  pmu_name, alias->str, test_event->alias_str);
+		return -1;
+	}
+
+	if (!is_same(alias->long_desc, test_event->alias_long_desc)) {
+		pr_debug("testing aliases PMU %s: mismatched long desc, %s vs %s\n",
+			  pmu_name, alias->str, test_event->alias_long_desc);
+		return -1;
+	}
+
+
+	if (!is_same(alias->pmu_name, test_event->event.pmu)) {
+		pr_debug("testing aliases PMU %s: mismatched pmu_name, %s vs %s\n",
+			  pmu_name, alias->pmu_name, test_event->event.pmu);
+		return -1;
+	}
+
+	return 0;
+}
+
+/* Verify generated events from pmu-events.c are as expected */
 static int test_pmu_event_table(void)
 {
+	struct pmu_event *sys_event_tables = __test_pmu_get_sys_events_table();
 	struct pmu_events_map *map = __test_pmu_get_events_map();
 	struct pmu_event *table;
 	int map_events = 0, expected_events;
 
-	/* ignore 2x sentinels */
-	expected_events = ARRAY_SIZE(test_cpu_events) +
-			  ARRAY_SIZE(test_uncore_events) - 2;
+	/* ignore 3x sentinels */
+	expected_events = ARRAY_SIZE(core_events) +
+			  ARRAY_SIZE(uncore_events) +
+			  ARRAY_SIZE(sys_events) - 3;
 
-	if (!map)
+	if (!map || !sys_event_tables)
 		return -1;
 
 	for (table = map->table; table->name; table++) {
-		struct perf_pmu_test_event *test;
-		struct pmu_event *te;
+		struct perf_pmu_test_event const **test_event_table;
 		bool found = false;
 
 		if (table->pmu)
-			test = &test_uncore_events[0];
+			test_event_table = &uncore_events[0];
 		else
-			test = &test_cpu_events[0];
+			test_event_table = &core_events[0];
 
-		te = &test->event;
+		for (; *test_event_table; test_event_table++) {
+			struct perf_pmu_test_event const *test_event = *test_event_table;
+			struct pmu_event const *event = &test_event->event;
 
-		for (; te->name; test++, te = &test->event) {
-			if (strcmp(table->name, te->name))
+			if (strcmp(table->name, event->name))
 				continue;
 			found = true;
 			map_events++;
 
-			if (!is_same(table->desc, te->desc)) {
-				pr_debug2("testing event table %s: mismatched desc, %s vs %s\n",
-					  table->name, table->desc, te->desc);
+			if (compare_pmu_events(table, event))
 				return -1;
-			}
 
-			if (!is_same(table->topic, te->topic)) {
-				pr_debug2("testing event table %s: mismatched topic, %s vs %s\n",
-					  table->name, table->topic,
-					  te->topic);
-				return -1;
-			}
+			pr_debug("testing event table %s: pass\n", table->name);
+		}
 
-			if (!is_same(table->long_desc, te->long_desc)) {
-				pr_debug2("testing event table %s: mismatched long_desc, %s vs %s\n",
-					  table->name, table->long_desc,
-					  te->long_desc);
-				return -1;
-			}
+		if (!found) {
+			pr_err("testing event table: could not find event %s\n",
+			       table->name);
+			return -1;
+		}
+	}
 
-			if (!is_same(table->unit, te->unit)) {
-				pr_debug2("testing event table %s: mismatched unit, %s vs %s\n",
-					  table->name, table->unit,
-					  te->unit);
-				return -1;
-			}
+	for (table = sys_event_tables; table->name; table++) {
+		struct perf_pmu_test_event const **test_event_table;
+		bool found = false;
 
-			if (!is_same(table->perpkg, te->perpkg)) {
-				pr_debug2("testing event table %s: mismatched perpkg, %s vs %s\n",
-					  table->name, table->perpkg,
-					  te->perpkg);
-				return -1;
-			}
+		test_event_table = &sys_events[0];
 
-			if (!is_same(table->metric_expr, te->metric_expr)) {
-				pr_debug2("testing event table %s: mismatched metric_expr, %s vs %s\n",
-					  table->name, table->metric_expr,
-					  te->metric_expr);
-				return -1;
-			}
+		for (; *test_event_table; test_event_table++) {
+			struct perf_pmu_test_event const *test_event = *test_event_table;
+			struct pmu_event const *event = &test_event->event;
 
-			if (!is_same(table->metric_name, te->metric_name)) {
-				pr_debug2("testing event table %s: mismatched metric_name, %s vs %s\n",
-					  table->name,  table->metric_name,
-					  te->metric_name);
-				return -1;
-			}
+			if (strcmp(table->name, event->name))
+				continue;
+			found = true;
+			map_events++;
 
-			if (!is_same(table->deprecated, te->deprecated)) {
-				pr_debug2("testing event table %s: mismatched deprecated, %s vs %s\n",
-					  table->name, table->deprecated,
-					  te->deprecated);
+			if (compare_pmu_events(table, event))
 				return -1;
-			}
 
-			pr_debug("testing event table %s: pass\n", table->name);
+			pr_debug("testing sys event table %s: pass\n", table->name);
 		}
-
 		if (!found) {
-			pr_err("testing event table: could not find event %s\n",
-			       table->name);
+			pr_debug("testing event table: could not find event %s\n",
+				   table->name);
 			return -1;
 		}
 	}
@@ -279,27 +467,19 @@ static struct perf_pmu_alias *find_alias(const char *test_event, struct list_hea
 }
 
 /* Verify aliases are as expected */
-static int __test__pmu_event_aliases(char *pmu_name, int *count)
+static int __test_core_pmu_event_aliases(char *pmu_name, int *count)
 {
-	struct perf_pmu_test_event *test;
-	struct pmu_event *te;
+	struct perf_pmu_test_event const **test_event_table;
 	struct perf_pmu *pmu;
 	LIST_HEAD(aliases);
 	int res = 0;
-	bool use_uncore_table;
 	struct pmu_events_map *map = __test_pmu_get_events_map();
 	struct perf_pmu_alias *a, *tmp;
 
 	if (!map)
 		return -1;
 
-	if (is_pmu_core(pmu_name)) {
-		test = &test_cpu_events[0];
-		use_uncore_table = false;
-	} else {
-		test = &test_uncore_events[0];
-		use_uncore_table = true;
-	}
+	test_event_table = &core_events[0];
 
 	pmu = zalloc(sizeof(*pmu));
 	if (!pmu)
@@ -309,91 +489,202 @@ static int __test__pmu_event_aliases(char *pmu_name, int *count)
 
 	pmu_add_cpu_aliases_map(&aliases, pmu, map);
 
-	for (te = &test->event; te->name; test++, te = &test->event) {
-		struct perf_pmu_alias *alias = find_alias(te->name, &aliases);
+	for (; *test_event_table; test_event_table++) {
+		struct perf_pmu_test_event const *test_event = *test_event_table;
+		struct pmu_event const *event = &test_event->event;
+		struct perf_pmu_alias *alias = find_alias(event->name, &aliases);
 
 		if (!alias) {
-			bool uncore_match = pmu_uncore_alias_match(pmu_name,
-								   te->pmu);
-
-			if (use_uncore_table && !uncore_match) {
-				pr_debug3("testing aliases PMU %s: skip matching alias %s\n",
-					  pmu_name, te->name);
-				continue;
-			}
-
-			pr_debug2("testing aliases PMU %s: no alias, alias_table->name=%s\n",
-				  pmu_name, te->name);
+			pr_debug("testing aliases core PMU %s: no alias, alias_table->name=%s\n",
+				  pmu_name, event->name);
 			res = -1;
 			break;
 		}
 
-		if (!is_same(alias->desc, te->desc)) {
-			pr_debug2("testing aliases PMU %s: mismatched desc, %s vs %s\n",
-				  pmu_name, alias->desc, te->desc);
+		if (compare_alias_to_test_event(alias, test_event, pmu_name)) {
 			res = -1;
 			break;
 		}
 
-		if (!is_same(alias->long_desc, test->alias_long_desc)) {
-			pr_debug2("testing aliases PMU %s: mismatched long_desc, %s vs %s\n",
-				  pmu_name, alias->long_desc,
-				  test->alias_long_desc);
-			res = -1;
-			break;
-		}
+		(*count)++;
+		pr_debug2("testing aliases core PMU %s: matched event %s\n",
+			  pmu_name, alias->name);
+	}
 
-		if (!is_same(alias->str, test->alias_str)) {
-			pr_debug2("testing aliases PMU %s: mismatched str, %s vs %s\n",
-				  pmu_name, alias->str, test->alias_str);
-			res = -1;
-			break;
+	list_for_each_entry_safe(a, tmp, &aliases, list) {
+		list_del(&a->list);
+		perf_pmu_free_alias(a);
+	}
+	free(pmu);
+	return res;
+}
+
+static int __test_uncore_pmu_event_aliases(struct perf_pmu_test_pmu *test_pmu)
+{
+	int alias_count = 0, to_match_count = 0, matched_count = 0;
+	struct perf_pmu_test_event const **table;
+	struct perf_pmu *pmu = &test_pmu->pmu;
+	const char *pmu_name = pmu->name;
+	struct perf_pmu_alias *a, *tmp, *alias;
+	struct pmu_events_map *map;
+	LIST_HEAD(aliases);
+	int res = 0;
+
+	map = __test_pmu_get_events_map();
+	if (!map)
+		return -1;
+	pmu_add_cpu_aliases_map(&aliases, pmu, map);
+	pmu_add_sys_aliases(&aliases, pmu);
+
+	/* Count how many aliases we generated */
+	list_for_each_entry(alias, &aliases, list)
+		alias_count++;
+
+	/* Count how many aliases we expect from the known table */
+	for (table = &test_pmu->aliases[0]; *table; table++)
+		to_match_count++;
+
+	if (alias_count != to_match_count) {
+		pr_debug("testing aliases uncore PMU %s: mismatch expected aliases (%d) vs found (%d)\n",
+			 pmu_name, to_match_count, alias_count);
+		res = -1;
+		goto out;
+	}
+
+	list_for_each_entry(alias, &aliases, list) {
+		bool matched = false;
+
+		for (table = &test_pmu->aliases[0]; *table; table++) {
+			struct perf_pmu_test_event const *test_event = *table;
+			struct pmu_event const *event = &test_event->event;
+
+			if (!strcmp(event->name, alias->name)) {
+				if (compare_alias_to_test_event(alias,
+							test_event,
+							pmu_name)) {
+					continue;
+				}
+				matched = true;
+				matched_count++;
+			}
 		}
 
-		if (!is_same(alias->topic, te->topic)) {
-			pr_debug2("testing aliases PMU %s: mismatched topic, %s vs %s\n",
-				  pmu_name, alias->topic, te->topic);
+		if (matched == false) {
+			pr_debug("testing aliases uncore PMU %s: could not match alias %s\n",
+				 pmu_name, alias->name);
 			res = -1;
-			break;
+			goto out;
 		}
+	}
 
-		(*count)++;
-		pr_debug2("testing aliases PMU %s: matched event %s\n",
-			  pmu_name, alias->name);
+	if (alias_count != matched_count) {
+		pr_debug("testing aliases uncore PMU %s: mismatch found aliases (%d) vs matched (%d)\n",
+			 pmu_name, matched_count, alias_count);
+		res = -1;
 	}
 
+out:
 	list_for_each_entry_safe(a, tmp, &aliases, list) {
 		list_del(&a->list);
 		perf_pmu_free_alias(a);
 	}
-	free(pmu);
 	return res;
 }
 
+static struct perf_pmu_test_pmu test_pmus[] = {
+	{
+		.pmu = {
+			.name = (char *)"hisi_sccl1_ddrc2",
+			.is_uncore = 1,
+		},
+		.aliases = {
+			&uncore_hisi_ddrc_flux_wcmd,
+		},
+	},
+	{
+		.pmu = {
+			.name = (char *)"uncore_cbox_0",
+			.is_uncore = 1,
+		},
+		.aliases = {
+			&unc_cbo_xsnp_response_miss_eviction,
+		},
+	},
+	{
+		.pmu = {
+			.name = (char *)"hisi_sccl3_l3c7",
+			.is_uncore = 1,
+		},
+		.aliases = {
+			&uncore_hisi_l3c_rd_hit_cpipe,
+		},
+	},
+	{
+		.pmu = {
+			.name = (char *)"uncore_imc_free_running_0",
+			.is_uncore = 1,
+		},
+		.aliases = {
+			&uncore_imc_free_running_cache_miss,
+		},
+	},
+	{
+		.pmu = {
+			.name = (char *)"uncore_imc_0",
+			.is_uncore = 1,
+		},
+		.aliases = {
+			&uncore_imc_cache_hits,
+		},
+	},
+	{
+		.pmu = {
+			.name = (char *)"uncore_sys_ddr_pmu0",
+			.is_uncore = 1,
+			.id = (char *)"v8",
+		},
+		.aliases = {
+			&sys_ddr_pmu_write_cycles,
+		},
+	},
+};
 
 /* Test that aliases generated are as expected */
 static int test_aliases(void)
 {
 	struct perf_pmu *pmu = NULL;
+	unsigned long i;
 
 	while ((pmu = perf_pmu__scan(pmu)) != NULL) {
 		int count = 0;
 
+		if (!is_pmu_core(pmu->name))
+			continue;
+
 		if (list_empty(&pmu->format)) {
-			pr_debug2("skipping testing PMU %s\n", pmu->name);
+			pr_debug2("skipping testing core PMU %s\n", pmu->name);
 			continue;
 		}
 
-		if (__test__pmu_event_aliases(pmu->name, &count)) {
-			pr_debug("testing PMU %s aliases: failed\n", pmu->name);
+		if (__test_core_pmu_event_aliases(pmu->name, &count)) {
+			pr_debug("testing core PMU %s aliases: failed\n", pmu->name);
 			return -1;
 		}
 
-		if (count == 0)
-			pr_debug3("testing PMU %s aliases: no events to match\n",
+		if (count == 0) {
+			pr_debug("testing core PMU %s aliases: no events to match\n",
 				  pmu->name);
-		else
-			pr_debug("testing PMU %s aliases: pass\n", pmu->name);
+			return -1;
+		}
+
+		pr_debug("testing core PMU %s aliases: pass\n", pmu->name);
+	}
+
+	for (i = 0; i < ARRAY_SIZE(test_pmus); i++) {
+		int res = __test_uncore_pmu_event_aliases(&test_pmus[i]);
+
+		if (res)
+			return res;
 	}
 
 	return 0;
diff --git a/tools/perf/tests/shell/lib/probe_vfs_getname.sh b/tools/perf/tests/shell/lib/probe_vfs_getname.sh
index c2cc42daf924..5b17d916c555 100644
--- a/tools/perf/tests/shell/lib/probe_vfs_getname.sh
+++ b/tools/perf/tests/shell/lib/probe_vfs_getname.sh
@@ -19,6 +19,6 @@ add_probe_vfs_getname() {
 }
 
 skip_if_no_debuginfo() {
-	add_probe_vfs_getname -v 2>&1 | egrep -q "^(Failed to find the path for kernel|Debuginfo-analysis is not supported)" && return 2
+	add_probe_vfs_getname -v 2>&1 | egrep -q "^(Failed to find the path for the kernel|Debuginfo-analysis is not supported)" && return 2
 	return 1
 }
diff --git a/tools/perf/tests/shell/pipe_test.sh b/tools/perf/tests/shell/pipe_test.sh
new file mode 100755
index 000000000000..1b32b4f28391
--- /dev/null
+++ b/tools/perf/tests/shell/pipe_test.sh
@@ -0,0 +1,69 @@
+#!/bin/sh
+# perf pipe recording and injection test
+# SPDX-License-Identifier: GPL-2.0
+
+# skip if there's no compiler
+if ! [ -x "$(command -v cc)" ]; then
+	echo "failed: no compiler, install gcc"
+	exit 2
+fi
+
+file=$(mktemp /tmp/test.file.XXXXXX)
+data=$(mktemp /tmp/perf.data.XXXXXX)
+
+cat <<EOF | cc -o ${file} -x c -
+#include <signal.h>
+#include <stdlib.h>
+#include <unistd.h>
+
+volatile int done;
+
+void sigalrm(int sig) {
+	done = 1;
+}
+
+__attribute__((noinline)) void noploop(void) {
+	while (!done)
+		continue;
+}
+
+int main(int argc, char *argv[]) {
+	int sec = 1;
+
+	if (argc > 1)
+		sec = atoi(argv[1]);
+
+	signal(SIGALRM, sigalrm);
+	alarm(sec);
+
+	noploop();
+	return 0;
+}
+EOF
+
+
+if ! perf record -e task-clock:u -o - ${file} | perf report -i - --task | grep test.file; then
+	echo "cannot find the test file in the perf report"
+	exit 1
+fi
+
+if ! perf record -e task-clock:u -o - ${file} | perf inject -b | perf report -i - | grep noploop; then
+	echo "cannot find noploop function in pipe #1"
+	exit 1
+fi
+
+perf record -e task-clock:u -o - ${file} | perf inject -b -o ${data}
+if ! perf report -i ${data} | grep noploop; then
+	echo "cannot find noploop function in pipe #2"
+	exit 1
+fi
+
+perf record -e task-clock:u -o ${data} ${file}
+if ! perf inject -b -i ${data} | perf report -i - | grep noploop; then
+	echo "cannot find noploop function in pipe #3"
+	exit 1
+fi
+
+
+rm -f ${file} ${data} ${data}.old
+exit 0
diff --git a/tools/perf/tests/shell/record+zstd_comp_decomp.sh b/tools/perf/tests/shell/record+zstd_comp_decomp.sh
index 045723b3d992..8a168cf8bacc 100755
--- a/tools/perf/tests/shell/record+zstd_comp_decomp.sh
+++ b/tools/perf/tests/shell/record+zstd_comp_decomp.sh
@@ -25,8 +25,8 @@ check_compressed_stats() {
 
 check_compressed_output() {
 	$perf_tool inject -i $trace_file -o $trace_file.decomp &&
-	$perf_tool report -i $trace_file --stdio | head -n -3 > $trace_file.comp.output &&
-	$perf_tool report -i $trace_file.decomp --stdio | head -n -3 > $trace_file.decomp.output &&
+	$perf_tool report -i $trace_file --stdio -F comm,dso,sym | head -n -3 > $trace_file.comp.output &&
+	$perf_tool report -i $trace_file.decomp --stdio -F comm,dso,sym | head -n -3 > $trace_file.decomp.output &&
 	diff $trace_file.comp.output $trace_file.decomp.output
 }
 
diff --git a/tools/perf/tests/tests.h b/tools/perf/tests/tests.h
index 1100dd55b657..fe1306f58495 100644
--- a/tools/perf/tests/tests.h
+++ b/tools/perf/tests/tests.h
@@ -127,6 +127,7 @@ int test__parse_metric(struct test *test, int subtest);
 int test__pe_file_parsing(struct test *test, int subtest);
 int test__expand_cgroup_events(struct test *test, int subtest);
 int test__perf_time_to_tsc(struct test *test, int subtest);
+int test__dlfilter(struct test *test, int subtest);
 
 bool test__bp_signal_is_supported(void);
 bool test__bp_account_is_supported(void);
diff --git a/tools/perf/tests/topology.c b/tools/perf/tests/topology.c
index b5efe675b321..b9028e304ddd 100644
--- a/tools/perf/tests/topology.c
+++ b/tools/perf/tests/topology.c
@@ -38,7 +38,7 @@ static int session_write_header(char *path)
 		.mode = PERF_DATA_MODE_WRITE,
 	};
 
-	session = perf_session__new(&data, false, NULL);
+	session = perf_session__new(&data, NULL);
 	TEST_ASSERT_VAL("can't get session", !IS_ERR(session));
 
 	if (!perf_pmu__has_hybrid()) {
@@ -77,7 +77,7 @@ static int check_cpu_topology(char *path, struct perf_cpu_map *map)
 	int i;
 	struct aggr_cpu_id id;
 
-	session = perf_session__new(&data, false, NULL);
+	session = perf_session__new(&data, NULL);
 	TEST_ASSERT_VAL("can't get session", !IS_ERR(session));
 	cpu__setup_cpunode_map();
 
diff --git a/tools/perf/trace/beauty/arch_errno_names.sh b/tools/perf/trace/beauty/arch_errno_names.sh
index 9f9ea45cddc4..2c5f72fa8108 100755
--- a/tools/perf/trace/beauty/arch_errno_names.sh
+++ b/tools/perf/trace/beauty/arch_errno_names.sh
@@ -87,14 +87,13 @@ cat <<EoHEADER
 
 EoHEADER
 
-# Create list of architectures and ignore those that do not appear
-# in tools/perf/arch
+# Create list of architectures that have a specific errno.h.
 archlist=""
-for arch in $(find $toolsdir/arch -maxdepth 1 -mindepth 1 -type d -printf "%f\n" | grep -v x86 | sort); do
-	test -d $toolsdir/perf/arch/$arch && archlist="$archlist $arch"
+for arch in $(find $toolsdir/arch -maxdepth 1 -mindepth 1 -type d -printf "%f\n" | sort -r); do
+	test -f $toolsdir/arch/$arch/include/uapi/asm/errno.h && archlist="$archlist $arch"
 done
 
-for arch in x86 $archlist generic; do
+for arch in generic $archlist; do
 	process_arch "$arch"
 done
-create_arch_errno_table_func "x86 $archlist" "generic"
+create_arch_errno_table_func "$archlist" "generic"
diff --git a/tools/perf/trace/beauty/include/linux/socket.h b/tools/perf/trace/beauty/include/linux/socket.h
index 0d8e3dcb7f88..041d6032a348 100644
--- a/tools/perf/trace/beauty/include/linux/socket.h
+++ b/tools/perf/trace/beauty/include/linux/socket.h
@@ -223,8 +223,11 @@ struct ucred {
 				 * reuses AF_INET address family
 				 */
 #define AF_XDP		44	/* XDP sockets			*/
+#define AF_MCTP		45	/* Management component
+				 * transport protocol
+				 */
 
-#define AF_MAX		45	/* For now.. */
+#define AF_MAX		46	/* For now.. */
 
 /* Protocol families, same as address families. */
 #define PF_UNSPEC	AF_UNSPEC
@@ -274,6 +277,7 @@ struct ucred {
 #define PF_QIPCRTR	AF_QIPCRTR
 #define PF_SMC		AF_SMC
 #define PF_XDP		AF_XDP
+#define PF_MCTP		AF_MCTP
 #define PF_MAX		AF_MAX
 
 /* Maximum queue length specifiable by listen.  */
@@ -421,6 +425,9 @@ extern int __sys_accept4_file(struct file *file, unsigned file_flags,
 			struct sockaddr __user *upeer_sockaddr,
 			 int __user *upeer_addrlen, int flags,
 			 unsigned long nofile);
+extern struct file *do_accept(struct file *file, unsigned file_flags,
+			      struct sockaddr __user *upeer_sockaddr,
+			      int __user *upeer_addrlen, int flags);
 extern int __sys_accept4(int fd, struct sockaddr __user *upeer_sockaddr,
 			 int __user *upeer_addrlen, int flags);
 extern int __sys_socket(int family, int type, int protocol);
diff --git a/tools/perf/trace/beauty/move_mount_flags.sh b/tools/perf/trace/beauty/move_mount_flags.sh
index 55e59241daa4..4b1d9acc0bd0 100755
--- a/tools/perf/trace/beauty/move_mount_flags.sh
+++ b/tools/perf/trace/beauty/move_mount_flags.sh
@@ -10,7 +10,7 @@ fi
 linux_mount=${linux_header_dir}/mount.h
 
 printf "static const char *move_mount_flags[] = {\n"
-regex='^[[:space:]]*#[[:space:]]*define[[:space:]]+MOVE_MOUNT_([FT]_[[:alnum:]_]+)[[:space:]]+(0x[[:xdigit:]]+)[[:space:]]*.*'
+regex='^[[:space:]]*#[[:space:]]*define[[:space:]]+MOVE_MOUNT_([^_]+_[[:alnum:]_]+)[[:space:]]+(0x[[:xdigit:]]+)[[:space:]]*.*'
 egrep $regex ${linux_mount} | \
 	sed -r "s/$regex/\2 \1/g"	| \
 	xargs printf "\t[ilog2(%s) + 1] = \"%s\",\n"
diff --git a/tools/perf/ui/browsers/annotate.c b/tools/perf/ui/browsers/annotate.c
index 701130ad43a2..ef4da4295bf7 100644
--- a/tools/perf/ui/browsers/annotate.c
+++ b/tools/perf/ui/browsers/annotate.c
@@ -966,6 +966,7 @@ int symbol__tui_annotate(struct map_symbol *ms, struct evsel *evsel,
 	err = symbol__annotate2(ms, evsel, opts, &browser.arch);
 	if (err) {
 		char msg[BUFSIZ];
+		ms->map->dso->annotate_warned = true;
 		symbol__strerror_disassemble(ms, err, msg, sizeof(msg));
 		ui__error("Couldn't annotate %s:\n%s", sym->name, msg);
 		goto out_free_offsets;
diff --git a/tools/perf/ui/gtk/annotate.c b/tools/perf/ui/gtk/annotate.c
index 94167bfed722..0a50e962f9a3 100644
--- a/tools/perf/ui/gtk/annotate.c
+++ b/tools/perf/ui/gtk/annotate.c
@@ -177,6 +177,7 @@ static int symbol__gtk_annotate(struct map_symbol *ms, struct evsel *evsel,
 	err = symbol__annotate(ms, evsel, &annotation__default_options, NULL);
 	if (err) {
 		char msg[BUFSIZ];
+		ms->map->dso->annotate_warned = true;
 		symbol__strerror_disassemble(ms, err, msg, sizeof(msg));
 		ui__error("Couldn't annotate %s: %s\n", sym->name, msg);
 		return -1;
diff --git a/tools/perf/util/Build b/tools/perf/util/Build
index 2d4fa1304178..f2914d5bed6e 100644
--- a/tools/perf/util/Build
+++ b/tools/perf/util/Build
@@ -59,6 +59,7 @@ perf-y += pstack.o
 perf-y += session.o
 perf-y += sample-raw.o
 perf-y += s390-sample-raw.o
+perf-y += amd-sample-raw.o
 perf-$(CONFIG_TRACE) += syscalltbl.o
 perf-y += ordered-events.o
 perf-y += namespaces.o
diff --git a/tools/perf/util/affinity.c b/tools/perf/util/affinity.c
index a5e31f826828..7b12bd7a3080 100644
--- a/tools/perf/util/affinity.c
+++ b/tools/perf/util/affinity.c
@@ -25,11 +25,11 @@ int affinity__setup(struct affinity *a)
 {
 	int cpu_set_size = get_cpu_set_size();
 
-	a->orig_cpus = bitmap_alloc(cpu_set_size * 8);
+	a->orig_cpus = bitmap_zalloc(cpu_set_size * 8);
 	if (!a->orig_cpus)
 		return -1;
 	sched_getaffinity(0, cpu_set_size, (cpu_set_t *)a->orig_cpus);
-	a->sched_cpus = bitmap_alloc(cpu_set_size * 8);
+	a->sched_cpus = bitmap_zalloc(cpu_set_size * 8);
 	if (!a->sched_cpus) {
 		zfree(&a->orig_cpus);
 		return -1;
diff --git a/tools/perf/util/amd-sample-raw.c b/tools/perf/util/amd-sample-raw.c
new file mode 100644
index 000000000000..d19d765195c5
--- /dev/null
+++ b/tools/perf/util/amd-sample-raw.c
@@ -0,0 +1,289 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * AMD specific. Provide textual annotation for IBS raw sample data.
+ */
+
+#include <unistd.h>
+#include <stdio.h>
+#include <string.h>
+#include <inttypes.h>
+
+#include <linux/string.h>
+#include "../../arch/x86/include/asm/amd-ibs.h"
+
+#include "debug.h"
+#include "session.h"
+#include "evlist.h"
+#include "sample-raw.h"
+#include "pmu-events/pmu-events.h"
+
+static u32 cpu_family, cpu_model, ibs_fetch_type, ibs_op_type;
+
+static void pr_ibs_fetch_ctl(union ibs_fetch_ctl reg)
+{
+	const char * const ic_miss_strs[] = {
+		" IcMiss 0",
+		" IcMiss 1",
+	};
+	const char * const l1tlb_pgsz_strs[] = {
+		" L1TlbPgSz 4KB",
+		" L1TlbPgSz 2MB",
+		" L1TlbPgSz 1GB",
+		" L1TlbPgSz RESERVED"
+	};
+	const char * const l1tlb_pgsz_strs_erratum1347[] = {
+		" L1TlbPgSz 4KB",
+		" L1TlbPgSz 16KB",
+		" L1TlbPgSz 2MB",
+		" L1TlbPgSz 1GB"
+	};
+	const char *ic_miss_str = NULL;
+	const char *l1tlb_pgsz_str = NULL;
+
+	if (cpu_family == 0x19 && cpu_model < 0x10) {
+		/*
+		 * Erratum #1238 workaround is to ignore MSRC001_1030[IbsIcMiss]
+		 * Erratum #1347 workaround is to use table provided in erratum
+		 */
+		if (reg.phy_addr_valid)
+			l1tlb_pgsz_str = l1tlb_pgsz_strs_erratum1347[reg.l1tlb_pgsz];
+	} else {
+		if (reg.phy_addr_valid)
+			l1tlb_pgsz_str = l1tlb_pgsz_strs[reg.l1tlb_pgsz];
+		ic_miss_str = ic_miss_strs[reg.ic_miss];
+	}
+
+	printf("ibs_fetch_ctl:\t%016llx MaxCnt %7d Cnt %7d Lat %5d En %d Val %d Comp %d%s "
+	       "PhyAddrValid %d%s L1TlbMiss %d L2TlbMiss %d RandEn %d%s\n",
+		reg.val, reg.fetch_maxcnt << 4, reg.fetch_cnt << 4, reg.fetch_lat,
+		reg.fetch_en, reg.fetch_val, reg.fetch_comp, ic_miss_str ? : "",
+		reg.phy_addr_valid, l1tlb_pgsz_str ? : "", reg.l1tlb_miss, reg.l2tlb_miss,
+		reg.rand_en, reg.fetch_comp ? (reg.fetch_l2_miss ? " L2Miss 1" : " L2Miss 0") : "");
+}
+
+static void pr_ic_ibs_extd_ctl(union ic_ibs_extd_ctl reg)
+{
+	printf("ic_ibs_ext_ctl:\t%016llx IbsItlbRefillLat %3d\n", reg.val, reg.itlb_refill_lat);
+}
+
+static void pr_ibs_op_ctl(union ibs_op_ctl reg)
+{
+	printf("ibs_op_ctl:\t%016llx MaxCnt %9d En %d Val %d CntCtl %d=%s CurCnt %9d\n",
+	       reg.val, ((reg.opmaxcnt_ext << 16) | reg.opmaxcnt) << 4, reg.op_en, reg.op_val,
+	       reg.cnt_ctl, reg.cnt_ctl ? "uOps" : "cycles", reg.opcurcnt);
+}
+
+static void pr_ibs_op_data(union ibs_op_data reg)
+{
+	printf("ibs_op_data:\t%016llx CompToRetCtr %5d TagToRetCtr %5d%s%s%s BrnRet %d "
+		" RipInvalid %d BrnFuse %d Microcode %d\n",
+		reg.val, reg.comp_to_ret_ctr, reg.tag_to_ret_ctr,
+		reg.op_brn_ret ? (reg.op_return ? " OpReturn 1" : " OpReturn 0") : "",
+		reg.op_brn_ret ? (reg.op_brn_taken ? " OpBrnTaken 1" : " OpBrnTaken 0") : "",
+		reg.op_brn_ret ? (reg.op_brn_misp ? " OpBrnMisp 1" : " OpBrnMisp 0") : "",
+		reg.op_brn_ret, reg.op_rip_invalid, reg.op_brn_fuse, reg.op_microcode);
+}
+
+static void pr_ibs_op_data2(union ibs_op_data2 reg)
+{
+	static const char * const data_src_str[] = {
+		"",
+		" DataSrc 1=(reserved)",
+		" DataSrc 2=Local node cache",
+		" DataSrc 3=DRAM",
+		" DataSrc 4=Remote node cache",
+		" DataSrc 5=(reserved)",
+		" DataSrc 6=(reserved)",
+		" DataSrc 7=Other"
+	};
+
+	printf("ibs_op_data2:\t%016llx %sRmtNode %d%s\n", reg.val,
+	       reg.data_src == 2 ? (reg.cache_hit_st ? "CacheHitSt 1=O-State "
+						     : "CacheHitSt 0=M-state ") : "",
+	       reg.rmt_node, data_src_str[reg.data_src]);
+}
+
+static void pr_ibs_op_data3(union ibs_op_data3 reg)
+{
+	char l2_miss_str[sizeof(" L2Miss _")] = "";
+	char op_mem_width_str[sizeof(" OpMemWidth _____ bytes")] = "";
+	char op_dc_miss_open_mem_reqs_str[sizeof(" OpDcMissOpenMemReqs __")] = "";
+
+	/*
+	 * Erratum #1293
+	 * Ignore L2Miss and OpDcMissOpenMemReqs (and opdata2) if DcMissNoMabAlloc or SwPf set
+	 */
+	if (!(cpu_family == 0x19 && cpu_model < 0x10 && (reg.dc_miss_no_mab_alloc || reg.sw_pf))) {
+		snprintf(l2_miss_str, sizeof(l2_miss_str), " L2Miss %d", reg.l2_miss);
+		snprintf(op_dc_miss_open_mem_reqs_str, sizeof(op_dc_miss_open_mem_reqs_str),
+			 " OpDcMissOpenMemReqs %2d", reg.op_dc_miss_open_mem_reqs);
+	}
+
+	if (reg.op_mem_width)
+		snprintf(op_mem_width_str, sizeof(op_mem_width_str),
+			 " OpMemWidth %2d bytes", 1 << (reg.op_mem_width - 1));
+
+	printf("ibs_op_data3:\t%016llx LdOp %d StOp %d DcL1TlbMiss %d DcL2TlbMiss %d "
+		"DcL1TlbHit2M %d DcL1TlbHit1G %d DcL2TlbHit2M %d DcMiss %d DcMisAcc %d "
+		"DcWcMemAcc %d DcUcMemAcc %d DcLockedOp %d DcMissNoMabAlloc %d DcLinAddrValid %d "
+		"DcPhyAddrValid %d DcL2TlbHit1G %d%s SwPf %d%s%s DcMissLat %5d TlbRefillLat %5d\n",
+		reg.val, reg.ld_op, reg.st_op, reg.dc_l1tlb_miss, reg.dc_l2tlb_miss,
+		reg.dc_l1tlb_hit_2m, reg.dc_l1tlb_hit_1g, reg.dc_l2tlb_hit_2m, reg.dc_miss,
+		reg.dc_mis_acc, reg.dc_wc_mem_acc, reg.dc_uc_mem_acc, reg.dc_locked_op,
+		reg.dc_miss_no_mab_alloc, reg.dc_lin_addr_valid, reg.dc_phy_addr_valid,
+		reg.dc_l2_tlb_hit_1g, l2_miss_str, reg.sw_pf, op_mem_width_str,
+		op_dc_miss_open_mem_reqs_str, reg.dc_miss_lat, reg.tlb_refill_lat);
+}
+
+/*
+ * IBS Op/Execution MSRs always saved, in order, are:
+ * IBS_OP_CTL, IBS_OP_RIP, IBS_OP_DATA, IBS_OP_DATA2,
+ * IBS_OP_DATA3, IBS_DC_LINADDR, IBS_DC_PHYSADDR, BP_IBSTGT_RIP
+ */
+static void amd_dump_ibs_op(struct perf_sample *sample)
+{
+	struct perf_ibs_data *data = sample->raw_data;
+	union ibs_op_ctl *op_ctl = (union ibs_op_ctl *)data->data;
+	__u64 *rip = (__u64 *)op_ctl + 1;
+	union ibs_op_data *op_data = (union ibs_op_data *)(rip + 1);
+	union ibs_op_data3 *op_data3 = (union ibs_op_data3 *)(rip + 3);
+
+	pr_ibs_op_ctl(*op_ctl);
+	if (!op_data->op_rip_invalid)
+		printf("IbsOpRip:\t%016llx\n", *rip);
+	pr_ibs_op_data(*op_data);
+	/*
+	 * Erratum #1293: ignore op_data2 if DcMissNoMabAlloc or SwPf are set
+	 */
+	if (!(cpu_family == 0x19 && cpu_model < 0x10 &&
+	      (op_data3->dc_miss_no_mab_alloc || op_data3->sw_pf)))
+		pr_ibs_op_data2(*(union ibs_op_data2 *)(rip + 2));
+	pr_ibs_op_data3(*op_data3);
+	if (op_data3->dc_lin_addr_valid)
+		printf("IbsDCLinAd:\t%016llx\n", *(rip + 4));
+	if (op_data3->dc_phy_addr_valid)
+		printf("IbsDCPhysAd:\t%016llx\n", *(rip + 5));
+	if (op_data->op_brn_ret && *(rip + 6))
+		printf("IbsBrTarget:\t%016llx\n", *(rip + 6));
+}
+
+/*
+ * IBS Fetch MSRs always saved, in order, are:
+ * IBS_FETCH_CTL, IBS_FETCH_LINADDR, IBS_FETCH_PHYSADDR, IC_IBS_EXTD_CTL
+ */
+static void amd_dump_ibs_fetch(struct perf_sample *sample)
+{
+	struct perf_ibs_data *data = sample->raw_data;
+	union ibs_fetch_ctl *fetch_ctl = (union ibs_fetch_ctl *)data->data;
+	__u64 *addr = (__u64 *)fetch_ctl + 1;
+	union ic_ibs_extd_ctl *extd_ctl = (union ic_ibs_extd_ctl *)addr + 2;
+
+	pr_ibs_fetch_ctl(*fetch_ctl);
+	printf("IbsFetchLinAd:\t%016llx\n", *addr++);
+	if (fetch_ctl->phy_addr_valid)
+		printf("IbsFetchPhysAd:\t%016llx\n", *addr);
+	pr_ic_ibs_extd_ctl(*extd_ctl);
+}
+
+/*
+ * Test for enable and valid bits in captured control MSRs.
+ */
+static bool is_valid_ibs_fetch_sample(struct perf_sample *sample)
+{
+	struct perf_ibs_data *data = sample->raw_data;
+	union ibs_fetch_ctl *fetch_ctl = (union ibs_fetch_ctl *)data->data;
+
+	if (fetch_ctl->fetch_en && fetch_ctl->fetch_val)
+		return true;
+
+	return false;
+}
+
+static bool is_valid_ibs_op_sample(struct perf_sample *sample)
+{
+	struct perf_ibs_data *data = sample->raw_data;
+	union ibs_op_ctl *op_ctl = (union ibs_op_ctl *)data->data;
+
+	if (op_ctl->op_en && op_ctl->op_val)
+		return true;
+
+	return false;
+}
+
+/* AMD vendor specific raw sample function. Check for PERF_RECORD_SAMPLE events
+ * and if the event was triggered by IBS, display its raw data with decoded text.
+ * The function is only invoked when the dump flag -D is set.
+ */
+void evlist__amd_sample_raw(struct evlist *evlist, union perf_event *event,
+			    struct perf_sample *sample)
+{
+	struct evsel *evsel;
+
+	if (event->header.type != PERF_RECORD_SAMPLE || !sample->raw_size)
+		return;
+
+	evsel = evlist__event2evsel(evlist, event);
+	if (!evsel)
+		return;
+
+	if (evsel->core.attr.type == ibs_fetch_type) {
+		if (!is_valid_ibs_fetch_sample(sample)) {
+			pr_debug("Invalid raw IBS Fetch MSR data encountered\n");
+			return;
+		}
+		amd_dump_ibs_fetch(sample);
+	} else if (evsel->core.attr.type == ibs_op_type) {
+		if (!is_valid_ibs_op_sample(sample)) {
+			pr_debug("Invalid raw IBS Op MSR data encountered\n");
+			return;
+		}
+		amd_dump_ibs_op(sample);
+	}
+}
+
+static void parse_cpuid(struct perf_env *env)
+{
+	const char *cpuid;
+	int ret;
+
+	cpuid = perf_env__cpuid(env);
+	/*
+	 * cpuid = "AuthenticAMD,family,model,stepping"
+	 */
+	ret = sscanf(cpuid, "%*[^,],%u,%u", &cpu_family, &cpu_model);
+	if (ret != 2)
+		pr_debug("problem parsing cpuid\n");
+}
+
+/*
+ * Find and assign the type number used for ibs_op or ibs_fetch samples.
+ * Device names can be large - we are only interested in the first 9 characters,
+ * to match "ibs_fetch".
+ */
+bool evlist__has_amd_ibs(struct evlist *evlist)
+{
+	struct perf_env *env = evlist->env;
+	int ret, nr_pmu_mappings = perf_env__nr_pmu_mappings(env);
+	const char *pmu_mapping = perf_env__pmu_mappings(env);
+	char name[sizeof("ibs_fetch")];
+	u32 type;
+
+	while (nr_pmu_mappings--) {
+		ret = sscanf(pmu_mapping, "%u:%9s", &type, name);
+		if (ret == 2) {
+			if (strstarts(name, "ibs_op"))
+				ibs_op_type = type;
+			else if (strstarts(name, "ibs_fetch"))
+				ibs_fetch_type = type;
+		}
+		pmu_mapping += strlen(pmu_mapping) + 1 /* '\0' */;
+	}
+
+	if (ibs_fetch_type || ibs_op_type) {
+		if (!cpu_family)
+			parse_cpuid(env);
+		return true;
+	}
+
+	return false;
+}
diff --git a/tools/perf/util/annotate.c b/tools/perf/util/annotate.c
index aa04a3655236..0bae061b2d6d 100644
--- a/tools/perf/util/annotate.c
+++ b/tools/perf/util/annotate.c
@@ -1833,7 +1833,7 @@ static int symbol__disassemble_bpf(struct symbol *sym,
 	ret = 0;
 out:
 	free(prog_linfo);
-	free(btf);
+	btf__free(btf);
 	fclose(s);
 	bfd_close(bfdf);
 	return ret;
@@ -2192,8 +2192,10 @@ int symbol__annotate(struct map_symbol *ms, struct evsel *evsel,
 		return errno;
 
 	args.arch = arch = arch__find(arch_name);
-	if (arch == NULL)
+	if (arch == NULL) {
+		pr_err("%s: unsupported arch %s\n", __func__, arch_name);
 		return ENOTSUP;
+	}
 
 	if (parch)
 		*parch = arch;
@@ -2787,9 +2789,17 @@ int symbol__tty_annotate2(struct map_symbol *ms, struct evsel *evsel,
 	struct rb_root source_line = RB_ROOT;
 	struct hists *hists = evsel__hists(evsel);
 	char buf[1024];
+	int err;
 
-	if (symbol__annotate2(ms, evsel, opts, NULL) < 0)
+	err = symbol__annotate2(ms, evsel, opts, NULL);
+	if (err) {
+		char msg[BUFSIZ];
+
+		dso->annotate_warned = true;
+		symbol__strerror_disassemble(ms, err, msg, sizeof(msg));
+		ui__error("Couldn't annotate %s:\n%s", sym->name, msg);
 		return -1;
+	}
 
 	if (opts->print_lines) {
 		srcline_full_filename = opts->full_path;
@@ -2813,9 +2823,17 @@ int symbol__tty_annotate(struct map_symbol *ms, struct evsel *evsel,
 	struct dso *dso = ms->map->dso;
 	struct symbol *sym = ms->sym;
 	struct rb_root source_line = RB_ROOT;
+	int err;
+
+	err = symbol__annotate(ms, evsel, opts, NULL);
+	if (err) {
+		char msg[BUFSIZ];
 
-	if (symbol__annotate(ms, evsel, opts, NULL) < 0)
+		dso->annotate_warned = true;
+		symbol__strerror_disassemble(ms, err, msg, sizeof(msg));
+		ui__error("Couldn't annotate %s:\n%s", sym->name, msg);
 		return -1;
+	}
 
 	symbol__calc_percent(sym, evsel);
 
diff --git a/tools/perf/util/auxtrace.c b/tools/perf/util/auxtrace.c
index cb19669d2a5b..8d2865b9ade2 100644
--- a/tools/perf/util/auxtrace.c
+++ b/tools/perf/util/auxtrace.c
@@ -130,11 +130,6 @@ int auxtrace_mmap__mmap(struct auxtrace_mmap *mm,
 		return 0;
 	}
 
-#if BITS_PER_LONG != 64 && !defined(HAVE_SYNC_COMPARE_AND_SWAP_SUPPORT)
-	pr_err("Cannot use AUX area tracing mmaps\n");
-	return -1;
-#endif
-
 	pc->aux_offset = mp->offset;
 	pc->aux_size = mp->len;
 
@@ -1674,6 +1669,82 @@ int perf_event__process_auxtrace_error(struct perf_session *session,
 	return 0;
 }
 
+/*
+ * In the compat mode kernel runs in 64-bit and perf tool runs in 32-bit mode,
+ * 32-bit perf tool cannot access 64-bit value atomically, which might lead to
+ * the issues caused by the below sequence on multiple CPUs: when perf tool
+ * accesses either the load operation or the store operation for 64-bit value,
+ * on some architectures the operation is divided into two instructions, one
+ * is for accessing the low 32-bit value and another is for the high 32-bit;
+ * thus these two user operations can give the kernel chances to access the
+ * 64-bit value, and thus leads to the unexpected load values.
+ *
+ *   kernel (64-bit)                        user (32-bit)
+ *
+ *   if (LOAD ->aux_tail) { --,             LOAD ->aux_head_lo
+ *       STORE $aux_data      |       ,--->
+ *       FLUSH $aux_data      |       |     LOAD ->aux_head_hi
+ *       STORE ->aux_head   --|-------`     smp_rmb()
+ *   }                        |             LOAD $data
+ *                            |             smp_mb()
+ *                            |             STORE ->aux_tail_lo
+ *                            `----------->
+ *                                          STORE ->aux_tail_hi
+ *
+ * For this reason, it's impossible for the perf tool to work correctly when
+ * the AUX head or tail is bigger than 4GB (more than 32 bits length); and we
+ * can not simply limit the AUX ring buffer to less than 4GB, the reason is
+ * the pointers can be increased monotonically, whatever the buffer size it is,
+ * at the end the head and tail can be bigger than 4GB and carry out to the
+ * high 32-bit.
+ *
+ * To mitigate the issues and improve the user experience, we can allow the
+ * perf tool working in certain conditions and bail out with error if detect
+ * any overflow cannot be handled.
+ *
+ * For reading the AUX head, it reads out the values for three times, and
+ * compares the high 4 bytes of the values between the first time and the last
+ * time, if there has no change for high 4 bytes injected by the kernel during
+ * the user reading sequence, it's safe for use the second value.
+ *
+ * When compat_auxtrace_mmap__write_tail() detects any carrying in the high
+ * 32 bits, it means there have two store operations in user space and it cannot
+ * promise the atomicity for 64-bit write, so return '-1' in this case to tell
+ * the caller an overflow error has happened.
+ */
+u64 __weak compat_auxtrace_mmap__read_head(struct auxtrace_mmap *mm)
+{
+	struct perf_event_mmap_page *pc = mm->userpg;
+	u64 first, second, last;
+	u64 mask = (u64)(UINT32_MAX) << 32;
+
+	do {
+		first = READ_ONCE(pc->aux_head);
+		/* Ensure all reads are done after we read the head */
+		smp_rmb();
+		second = READ_ONCE(pc->aux_head);
+		/* Ensure all reads are done after we read the head */
+		smp_rmb();
+		last = READ_ONCE(pc->aux_head);
+	} while ((first & mask) != (last & mask));
+
+	return second;
+}
+
+int __weak compat_auxtrace_mmap__write_tail(struct auxtrace_mmap *mm, u64 tail)
+{
+	struct perf_event_mmap_page *pc = mm->userpg;
+	u64 mask = (u64)(UINT32_MAX) << 32;
+
+	if (tail & mask)
+		return -1;
+
+	/* Ensure all reads are done before we write the tail out */
+	smp_mb();
+	WRITE_ONCE(pc->aux_tail, tail);
+	return 0;
+}
+
 static int __auxtrace_mmap__read(struct mmap *map,
 				 struct auxtrace_record *itr,
 				 struct perf_tool *tool, process_auxtrace_t fn,
@@ -1685,15 +1756,13 @@ static int __auxtrace_mmap__read(struct mmap *map,
 	size_t size, head_off, old_off, len1, len2, padding;
 	union perf_event ev;
 	void *data1, *data2;
+	int kernel_is_64_bit = perf_env__kernel_is_64_bit(evsel__env(NULL));
 
-	if (snapshot) {
-		head = auxtrace_mmap__read_snapshot_head(mm);
-		if (auxtrace_record__find_snapshot(itr, mm->idx, mm, data,
-						   &head, &old))
-			return -1;
-	} else {
-		head = auxtrace_mmap__read_head(mm);
-	}
+	head = auxtrace_mmap__read_head(mm, kernel_is_64_bit);
+
+	if (snapshot &&
+	    auxtrace_record__find_snapshot(itr, mm->idx, mm, data, &head, &old))
+		return -1;
 
 	if (old == head)
 		return 0;
@@ -1772,10 +1841,13 @@ static int __auxtrace_mmap__read(struct mmap *map,
 	mm->prev = head;
 
 	if (!snapshot) {
-		auxtrace_mmap__write_tail(mm, head);
-		if (itr->read_finish) {
-			int err;
+		int err;
 
+		err = auxtrace_mmap__write_tail(mm, head, kernel_is_64_bit);
+		if (err < 0)
+			return err;
+
+		if (itr->read_finish) {
 			err = itr->read_finish(itr, mm->idx);
 			if (err < 0)
 				return err;
diff --git a/tools/perf/util/auxtrace.h b/tools/perf/util/auxtrace.h
index cc1c1b9cec9c..5f383908ca6e 100644
--- a/tools/perf/util/auxtrace.h
+++ b/tools/perf/util/auxtrace.h
@@ -440,52 +440,39 @@ struct auxtrace_cache;
 
 #ifdef HAVE_AUXTRACE_SUPPORT
 
-/*
- * In snapshot mode the mmapped page is read-only which makes using
- * __sync_val_compare_and_swap() problematic.  However, snapshot mode expects
- * the buffer is not updated while the snapshot is made (e.g. Intel PT disables
- * the event) so there is not a race anyway.
- */
-static inline u64 auxtrace_mmap__read_snapshot_head(struct auxtrace_mmap *mm)
-{
-	struct perf_event_mmap_page *pc = mm->userpg;
-	u64 head = READ_ONCE(pc->aux_head);
-
-	/* Ensure all reads are done after we read the head */
-	smp_rmb();
-	return head;
-}
+u64 compat_auxtrace_mmap__read_head(struct auxtrace_mmap *mm);
+int compat_auxtrace_mmap__write_tail(struct auxtrace_mmap *mm, u64 tail);
 
-static inline u64 auxtrace_mmap__read_head(struct auxtrace_mmap *mm)
+static inline u64 auxtrace_mmap__read_head(struct auxtrace_mmap *mm,
+					   int kernel_is_64_bit __maybe_unused)
 {
 	struct perf_event_mmap_page *pc = mm->userpg;
-#if BITS_PER_LONG == 64 || !defined(HAVE_SYNC_COMPARE_AND_SWAP_SUPPORT)
-	u64 head = READ_ONCE(pc->aux_head);
-#else
-	u64 head = __sync_val_compare_and_swap(&pc->aux_head, 0, 0);
+	u64 head;
+
+#if BITS_PER_LONG == 32
+	if (kernel_is_64_bit)
+		return compat_auxtrace_mmap__read_head(mm);
 #endif
+	head = READ_ONCE(pc->aux_head);
 
 	/* Ensure all reads are done after we read the head */
 	smp_rmb();
 	return head;
 }
 
-static inline void auxtrace_mmap__write_tail(struct auxtrace_mmap *mm, u64 tail)
+static inline int auxtrace_mmap__write_tail(struct auxtrace_mmap *mm, u64 tail,
+					    int kernel_is_64_bit __maybe_unused)
 {
 	struct perf_event_mmap_page *pc = mm->userpg;
-#if BITS_PER_LONG != 64 && defined(HAVE_SYNC_COMPARE_AND_SWAP_SUPPORT)
-	u64 old_tail;
-#endif
 
+#if BITS_PER_LONG == 32
+	if (kernel_is_64_bit)
+		return compat_auxtrace_mmap__write_tail(mm, tail);
+#endif
 	/* Ensure all reads are done before we write the tail out */
 	smp_mb();
-#if BITS_PER_LONG == 64 || !defined(HAVE_SYNC_COMPARE_AND_SWAP_SUPPORT)
-	pc->aux_tail = tail;
-#else
-	do {
-		old_tail = __sync_val_compare_and_swap(&pc->aux_tail, 0, 0);
-	} while (!__sync_bool_compare_and_swap(&pc->aux_tail, old_tail, tail));
-#endif
+	WRITE_ONCE(pc->aux_tail, tail);
+	return 0;
 }
 
 int auxtrace_mmap__mmap(struct auxtrace_mmap *mm,
diff --git a/tools/perf/util/bpf-event.c b/tools/perf/util/bpf-event.c
index 996d025b8ed8..683f6d63560e 100644
--- a/tools/perf/util/bpf-event.c
+++ b/tools/perf/util/bpf-event.c
@@ -21,6 +21,14 @@
 #include "record.h"
 #include "util/synthetic-events.h"
 
+struct btf * __weak btf__load_from_kernel_by_id(__u32 id)
+{
+       struct btf *btf;
+       int err = btf__get_from_id(id, &btf);
+
+       return err ? ERR_PTR(err) : btf;
+}
+
 #define ptr_to_u64(ptr)    ((__u64)(unsigned long)(ptr))
 
 static int snprintf_hex(char *buf, size_t size, unsigned char *data, size_t len)
diff --git a/tools/perf/util/config.c b/tools/perf/util/config.c
index 63d472b336de..4fb5e90d7a57 100644
--- a/tools/perf/util/config.c
+++ b/tools/perf/util/config.c
@@ -581,7 +581,10 @@ const char *perf_home_perfconfig(void)
 	static const char *config;
 	static bool failed;
 
-	config = failed ? NULL : home_perfconfig();
+	if (failed || config)
+		return config;
+
+	config = home_perfconfig();
 	if (!config)
 		failed = true;
 
diff --git a/tools/perf/util/cs-etm-decoder/cs-etm-decoder.c b/tools/perf/util/cs-etm-decoder/cs-etm-decoder.c
index 3e1a05bc82cc..31fa3b45134a 100644
--- a/tools/perf/util/cs-etm-decoder/cs-etm-decoder.c
+++ b/tools/perf/util/cs-etm-decoder/cs-etm-decoder.c
@@ -13,8 +13,6 @@
 #include <linux/zalloc.h>
 #include <stdlib.h>
 #include <opencsd/c_api/opencsd_c_api.h>
-#include <opencsd/etmv4/trc_pkt_types_etmv4.h>
-#include <opencsd/ocsd_if_types.h>
 
 #include "cs-etm.h"
 #include "cs-etm-decoder.h"
@@ -35,9 +33,11 @@
 struct cs_etm_decoder {
 	void *data;
 	void (*packet_printer)(const char *msg);
+	bool suppress_printing;
 	dcd_tree_handle_t dcd_tree;
 	cs_etm_mem_cb_type mem_access;
 	ocsd_datapath_resp_t prev_return;
+	const char *decoder_name;
 };
 
 static u32
@@ -74,9 +74,10 @@ int cs_etm_decoder__reset(struct cs_etm_decoder *decoder)
 	ocsd_datapath_resp_t dp_ret;
 
 	decoder->prev_return = OCSD_RESP_CONT;
-
+	decoder->suppress_printing = true;
 	dp_ret = ocsd_dt_process_data(decoder->dcd_tree, OCSD_OP_RESET,
 				      0, 0, NULL, NULL);
+	decoder->suppress_printing = false;
 	if (OCSD_DATA_RESP_IS_FATAL(dp_ret))
 		return -1;
 
@@ -124,6 +125,21 @@ static int cs_etm_decoder__gen_etmv3_config(struct cs_etm_trace_params *params,
 	return 0;
 }
 
+#define TRCIDR1_TRCARCHMIN_SHIFT 4
+#define TRCIDR1_TRCARCHMIN_MASK  GENMASK(7, 4)
+#define TRCIDR1_TRCARCHMIN(x)    (((x) & TRCIDR1_TRCARCHMIN_MASK) >> TRCIDR1_TRCARCHMIN_SHIFT)
+
+static enum _ocsd_arch_version cs_etm_decoder__get_etmv4_arch_ver(u32 reg_idr1)
+{
+	/*
+	 * For ETMv4 if the trace minor version is 4 or more then we can assume
+	 * the architecture is ARCH_AA64 rather than just V8.
+	 * ARCH_V8 = V8 architecture
+	 * ARCH_AA64 = Min v8r3 plus additional AA64 PE features
+	 */
+	return TRCIDR1_TRCARCHMIN(reg_idr1) >= 4 ? ARCH_AA64 : ARCH_V8;
+}
+
 static void cs_etm_decoder__gen_etmv4_config(struct cs_etm_trace_params *params,
 					     ocsd_etmv4_cfg *config)
 {
@@ -138,7 +154,21 @@ static void cs_etm_decoder__gen_etmv4_config(struct cs_etm_trace_params *params,
 	config->reg_idr11 = 0;
 	config->reg_idr12 = 0;
 	config->reg_idr13 = 0;
-	config->arch_ver = ARCH_V8;
+	config->arch_ver = cs_etm_decoder__get_etmv4_arch_ver(params->etmv4.reg_idr1);
+	config->core_prof = profile_CortexA;
+}
+
+static void cs_etm_decoder__gen_ete_config(struct cs_etm_trace_params *params,
+					   ocsd_ete_cfg *config)
+{
+	config->reg_configr = params->ete.reg_configr;
+	config->reg_traceidr = params->ete.reg_traceidr;
+	config->reg_idr0 = params->ete.reg_idr0;
+	config->reg_idr1 = params->ete.reg_idr1;
+	config->reg_idr2 = params->ete.reg_idr2;
+	config->reg_idr8 = params->ete.reg_idr8;
+	config->reg_devarch = params->ete.reg_devarch;
+	config->arch_ver = ARCH_AA64;
 	config->core_prof = profile_CortexA;
 }
 
@@ -146,8 +176,10 @@ static void cs_etm_decoder__print_str_cb(const void *p_context,
 					 const char *msg,
 					 const int str_len)
 {
-	if (p_context && str_len)
-		((struct cs_etm_decoder *)p_context)->packet_printer(msg);
+	const struct cs_etm_decoder *decoder = p_context;
+
+	if (p_context && str_len && !decoder->suppress_printing)
+		decoder->packet_printer(msg);
 }
 
 static int
@@ -223,55 +255,6 @@ cs_etm_decoder__init_raw_frame_logging(
 }
 #endif
 
-static int cs_etm_decoder__create_packet_printer(struct cs_etm_decoder *decoder,
-						 const char *decoder_name,
-						 void *trace_config)
-{
-	u8 csid;
-
-	if (ocsd_dt_create_decoder(decoder->dcd_tree, decoder_name,
-				   OCSD_CREATE_FLG_PACKET_PROC,
-				   trace_config, &csid))
-		return -1;
-
-	if (ocsd_dt_set_pkt_protocol_printer(decoder->dcd_tree, csid, 0))
-		return -1;
-
-	return 0;
-}
-
-static int
-cs_etm_decoder__create_etm_packet_printer(struct cs_etm_trace_params *t_params,
-					  struct cs_etm_decoder *decoder)
-{
-	const char *decoder_name;
-	ocsd_etmv3_cfg config_etmv3;
-	ocsd_etmv4_cfg trace_config_etmv4;
-	void *trace_config;
-
-	switch (t_params->protocol) {
-	case CS_ETM_PROTO_ETMV3:
-	case CS_ETM_PROTO_PTM:
-		cs_etm_decoder__gen_etmv3_config(t_params, &config_etmv3);
-		decoder_name = (t_params->protocol == CS_ETM_PROTO_ETMV3) ?
-							OCSD_BUILTIN_DCD_ETMV3 :
-							OCSD_BUILTIN_DCD_PTM;
-		trace_config = &config_etmv3;
-		break;
-	case CS_ETM_PROTO_ETMV4i:
-		cs_etm_decoder__gen_etmv4_config(t_params, &trace_config_etmv4);
-		decoder_name = OCSD_BUILTIN_DCD_ETMV4I;
-		trace_config = &trace_config_etmv4;
-		break;
-	default:
-		return -1;
-	}
-
-	return cs_etm_decoder__create_packet_printer(decoder,
-						     decoder_name,
-						     trace_config);
-}
-
 static ocsd_datapath_resp_t
 cs_etm_decoder__do_soft_timestamp(struct cs_etm_queue *etmq,
 				  struct cs_etm_packet_queue *packet_queue,
@@ -324,8 +307,11 @@ cs_etm_decoder__do_hard_timestamp(struct cs_etm_queue *etmq,
 		 * underflow.
 		 */
 		packet_queue->cs_timestamp = 0;
-		WARN_ONCE(true, "Zero Coresight timestamp found at Idx:%" OCSD_TRC_IDX_STR
-				". Decoding may be improved with --itrace=Z...\n", indx);
+		if (!cs_etm__etmq_is_timeless(etmq))
+			pr_warning_once("Zero Coresight timestamp found at Idx:%" OCSD_TRC_IDX_STR
+					". Decoding may be improved by prepending 'Z' to your current --itrace arguments.\n",
+					indx);
+
 	} else if (packet_queue->instr_count > elem->timestamp) {
 		/*
 		 * Sanity check that the elem->timestamp - packet_queue->instr_count would not
@@ -625,13 +611,14 @@ static ocsd_datapath_resp_t cs_etm_decoder__gen_trace_elem_printer(
 	return resp;
 }
 
-static int cs_etm_decoder__create_etm_packet_decoder(
-					struct cs_etm_trace_params *t_params,
-					struct cs_etm_decoder *decoder)
+static int
+cs_etm_decoder__create_etm_decoder(struct cs_etm_decoder_params *d_params,
+				   struct cs_etm_trace_params *t_params,
+				   struct cs_etm_decoder *decoder)
 {
-	const char *decoder_name;
 	ocsd_etmv3_cfg config_etmv3;
 	ocsd_etmv4_cfg trace_config_etmv4;
+	ocsd_ete_cfg trace_config_ete;
 	void *trace_config;
 	u8 csid;
 
@@ -639,51 +626,55 @@ static int cs_etm_decoder__create_etm_packet_decoder(
 	case CS_ETM_PROTO_ETMV3:
 	case CS_ETM_PROTO_PTM:
 		cs_etm_decoder__gen_etmv3_config(t_params, &config_etmv3);
-		decoder_name = (t_params->protocol == CS_ETM_PROTO_ETMV3) ?
+		decoder->decoder_name = (t_params->protocol == CS_ETM_PROTO_ETMV3) ?
 							OCSD_BUILTIN_DCD_ETMV3 :
 							OCSD_BUILTIN_DCD_PTM;
 		trace_config = &config_etmv3;
 		break;
 	case CS_ETM_PROTO_ETMV4i:
 		cs_etm_decoder__gen_etmv4_config(t_params, &trace_config_etmv4);
-		decoder_name = OCSD_BUILTIN_DCD_ETMV4I;
+		decoder->decoder_name = OCSD_BUILTIN_DCD_ETMV4I;
 		trace_config = &trace_config_etmv4;
 		break;
+	case CS_ETM_PROTO_ETE:
+		cs_etm_decoder__gen_ete_config(t_params, &trace_config_ete);
+		decoder->decoder_name = OCSD_BUILTIN_DCD_ETE;
+		trace_config = &trace_config_ete;
+		break;
 	default:
 		return -1;
 	}
 
-	if (ocsd_dt_create_decoder(decoder->dcd_tree,
-				     decoder_name,
-				     OCSD_CREATE_FLG_FULL_DECODER,
-				     trace_config, &csid))
-		return -1;
+	if (d_params->operation == CS_ETM_OPERATION_DECODE) {
+		if (ocsd_dt_create_decoder(decoder->dcd_tree,
+					   decoder->decoder_name,
+					   OCSD_CREATE_FLG_FULL_DECODER,
+					   trace_config, &csid))
+			return -1;
 
-	if (ocsd_dt_set_gen_elem_outfn(decoder->dcd_tree,
-				       cs_etm_decoder__gen_trace_elem_printer,
-				       decoder))
-		return -1;
+		if (ocsd_dt_set_gen_elem_outfn(decoder->dcd_tree,
+					       cs_etm_decoder__gen_trace_elem_printer,
+					       decoder))
+			return -1;
 
-	return 0;
-}
+		return 0;
+	} else if (d_params->operation == CS_ETM_OPERATION_PRINT) {
+		if (ocsd_dt_create_decoder(decoder->dcd_tree, decoder->decoder_name,
+					   OCSD_CREATE_FLG_PACKET_PROC,
+					   trace_config, &csid))
+			return -1;
 
-static int
-cs_etm_decoder__create_etm_decoder(struct cs_etm_decoder_params *d_params,
-				   struct cs_etm_trace_params *t_params,
-				   struct cs_etm_decoder *decoder)
-{
-	if (d_params->operation == CS_ETM_OPERATION_PRINT)
-		return cs_etm_decoder__create_etm_packet_printer(t_params,
-								 decoder);
-	else if (d_params->operation == CS_ETM_OPERATION_DECODE)
-		return cs_etm_decoder__create_etm_packet_decoder(t_params,
-								 decoder);
+		if (ocsd_dt_set_pkt_protocol_printer(decoder->dcd_tree, csid, 0))
+			return -1;
+
+		return 0;
+	}
 
 	return -1;
 }
 
 struct cs_etm_decoder *
-cs_etm_decoder__new(int num_cpu, struct cs_etm_decoder_params *d_params,
+cs_etm_decoder__new(int decoders, struct cs_etm_decoder_params *d_params,
 		    struct cs_etm_trace_params t_params[])
 {
 	struct cs_etm_decoder *decoder;
@@ -728,7 +719,7 @@ cs_etm_decoder__new(int num_cpu, struct cs_etm_decoder_params *d_params,
 	/* init raw frame logging if required */
 	cs_etm_decoder__init_raw_frame_logging(d_params, decoder);
 
-	for (i = 0; i < num_cpu; i++) {
+	for (i = 0; i < decoders; i++) {
 		ret = cs_etm_decoder__create_etm_decoder(d_params,
 							 &t_params[i],
 							 decoder);
@@ -800,3 +791,8 @@ void cs_etm_decoder__free(struct cs_etm_decoder *decoder)
 	decoder->dcd_tree = NULL;
 	free(decoder);
 }
+
+const char *cs_etm_decoder__get_name(struct cs_etm_decoder *decoder)
+{
+	return decoder->decoder_name;
+}
diff --git a/tools/perf/util/cs-etm-decoder/cs-etm-decoder.h b/tools/perf/util/cs-etm-decoder/cs-etm-decoder.h
index 11f3391d06f2..92a855fbe5b8 100644
--- a/tools/perf/util/cs-etm-decoder/cs-etm-decoder.h
+++ b/tools/perf/util/cs-etm-decoder/cs-etm-decoder.h
@@ -37,11 +37,22 @@ struct cs_etmv4_trace_params {
 	u32 reg_traceidr;
 };
 
+struct cs_ete_trace_params {
+	u32 reg_idr0;
+	u32 reg_idr1;
+	u32 reg_idr2;
+	u32 reg_idr8;
+	u32 reg_configr;
+	u32 reg_traceidr;
+	u32 reg_devarch;
+};
+
 struct cs_etm_trace_params {
 	int protocol;
 	union {
 		struct cs_etmv3_trace_params etmv3;
 		struct cs_etmv4_trace_params etmv4;
+		struct cs_ete_trace_params ete;
 	};
 };
 
@@ -65,6 +76,7 @@ enum {
 	CS_ETM_PROTO_ETMV4i,
 	CS_ETM_PROTO_ETMV4d,
 	CS_ETM_PROTO_PTM,
+	CS_ETM_PROTO_ETE
 };
 
 enum cs_etm_decoder_operation {
@@ -92,5 +104,6 @@ int cs_etm_decoder__get_packet(struct cs_etm_packet_queue *packet_queue,
 			       struct cs_etm_packet *packet);
 
 int cs_etm_decoder__reset(struct cs_etm_decoder *decoder);
+const char *cs_etm_decoder__get_name(struct cs_etm_decoder *decoder);
 
 #endif /* INCLUDE__CS_ETM_DECODER_H__ */
diff --git a/tools/perf/util/cs-etm.c b/tools/perf/util/cs-etm.c
index bc1f64873c8f..f323adb1af85 100644
--- a/tools/perf/util/cs-etm.c
+++ b/tools/perf/util/cs-etm.c
@@ -62,7 +62,6 @@ struct cs_etm_auxtrace {
 	u64 instructions_sample_period;
 	u64 instructions_id;
 	u64 **metadata;
-	u64 kernel_start;
 	unsigned int pmu_type;
 };
 
@@ -97,7 +96,6 @@ struct cs_etm_queue {
 /* RB tree for quick conversion between traceID and metadata pointers */
 static struct intlist *traceid_list;
 
-static int cs_etm__update_queues(struct cs_etm_auxtrace *etm);
 static int cs_etm__process_queues(struct cs_etm_auxtrace *etm);
 static int cs_etm__process_timeless_queues(struct cs_etm_auxtrace *etm,
 					   pid_t tid);
@@ -462,14 +460,30 @@ static void cs_etm__set_trace_param_etmv4(struct cs_etm_trace_params *t_params,
 	t_params[idx].etmv4.reg_traceidr = metadata[idx][CS_ETMV4_TRCTRACEIDR];
 }
 
+static void cs_etm__set_trace_param_ete(struct cs_etm_trace_params *t_params,
+					  struct cs_etm_auxtrace *etm, int idx)
+{
+	u64 **metadata = etm->metadata;
+
+	t_params[idx].protocol = CS_ETM_PROTO_ETE;
+	t_params[idx].ete.reg_idr0 = metadata[idx][CS_ETMV4_TRCIDR0];
+	t_params[idx].ete.reg_idr1 = metadata[idx][CS_ETMV4_TRCIDR1];
+	t_params[idx].ete.reg_idr2 = metadata[idx][CS_ETMV4_TRCIDR2];
+	t_params[idx].ete.reg_idr8 = metadata[idx][CS_ETMV4_TRCIDR8];
+	t_params[idx].ete.reg_configr = metadata[idx][CS_ETMV4_TRCCONFIGR];
+	t_params[idx].ete.reg_traceidr = metadata[idx][CS_ETMV4_TRCTRACEIDR];
+	t_params[idx].ete.reg_devarch = metadata[idx][CS_ETE_TRCDEVARCH];
+}
+
 static int cs_etm__init_trace_params(struct cs_etm_trace_params *t_params,
-				     struct cs_etm_auxtrace *etm)
+				     struct cs_etm_auxtrace *etm,
+				     int decoders)
 {
 	int i;
 	u32 etmidr;
 	u64 architecture;
 
-	for (i = 0; i < etm->num_cpu; i++) {
+	for (i = 0; i < decoders; i++) {
 		architecture = etm->metadata[i][CS_ETM_MAGIC];
 
 		switch (architecture) {
@@ -480,6 +494,9 @@ static int cs_etm__init_trace_params(struct cs_etm_trace_params *t_params,
 		case __perf_cs_etmv4_magic:
 			cs_etm__set_trace_param_etmv4(t_params, etm, i);
 			break;
+		case __perf_cs_ete_magic:
+			cs_etm__set_trace_param_ete(t_params, etm, i);
+			break;
 		default:
 			return -EINVAL;
 		}
@@ -490,7 +507,8 @@ static int cs_etm__init_trace_params(struct cs_etm_trace_params *t_params,
 
 static int cs_etm__init_decoder_params(struct cs_etm_decoder_params *d_params,
 				       struct cs_etm_queue *etmq,
-				       enum cs_etm_decoder_operation mode)
+				       enum cs_etm_decoder_operation mode,
+				       bool formatted)
 {
 	int ret = -EINVAL;
 
@@ -500,7 +518,7 @@ static int cs_etm__init_decoder_params(struct cs_etm_decoder_params *d_params,
 	d_params->packet_printer = cs_etm__packet_dump;
 	d_params->operation = mode;
 	d_params->data = etmq;
-	d_params->formatted = true;
+	d_params->formatted = formatted;
 	d_params->fsyncs = false;
 	d_params->hsyncs = false;
 	d_params->frame_aligned = true;
@@ -510,44 +528,23 @@ out:
 	return ret;
 }
 
-static void cs_etm__dump_event(struct cs_etm_auxtrace *etm,
+static void cs_etm__dump_event(struct cs_etm_queue *etmq,
 			       struct auxtrace_buffer *buffer)
 {
 	int ret;
 	const char *color = PERF_COLOR_BLUE;
-	struct cs_etm_decoder_params d_params;
-	struct cs_etm_trace_params *t_params;
-	struct cs_etm_decoder *decoder;
 	size_t buffer_used = 0;
 
 	fprintf(stdout, "\n");
 	color_fprintf(stdout, color,
-		     ". ... CoreSight ETM Trace data: size %zu bytes\n",
-		     buffer->size);
-
-	/* Use metadata to fill in trace parameters for trace decoder */
-	t_params = zalloc(sizeof(*t_params) * etm->num_cpu);
-
-	if (!t_params)
-		return;
-
-	if (cs_etm__init_trace_params(t_params, etm))
-		goto out_free;
+		     ". ... CoreSight %s Trace data: size %zu bytes\n",
+		     cs_etm_decoder__get_name(etmq->decoder), buffer->size);
 
-	/* Set decoder parameters to simply print the trace packets */
-	if (cs_etm__init_decoder_params(&d_params, NULL,
-					CS_ETM_OPERATION_PRINT))
-		goto out_free;
-
-	decoder = cs_etm_decoder__new(etm->num_cpu, &d_params, t_params);
-
-	if (!decoder)
-		goto out_free;
 	do {
 		size_t consumed;
 
 		ret = cs_etm_decoder__process_data_block(
-				decoder, buffer->offset,
+				etmq->decoder, buffer->offset,
 				&((u8 *)buffer->data)[buffer_used],
 				buffer->size - buffer_used, &consumed);
 		if (ret)
@@ -556,16 +553,12 @@ static void cs_etm__dump_event(struct cs_etm_auxtrace *etm,
 		buffer_used += consumed;
 	} while (buffer_used < buffer->size);
 
-	cs_etm_decoder__free(decoder);
-
-out_free:
-	zfree(&t_params);
+	cs_etm_decoder__reset(etmq->decoder);
 }
 
 static int cs_etm__flush_events(struct perf_session *session,
 				struct perf_tool *tool)
 {
-	int ret;
 	struct cs_etm_auxtrace *etm = container_of(session->auxtrace,
 						   struct cs_etm_auxtrace,
 						   auxtrace);
@@ -575,11 +568,6 @@ static int cs_etm__flush_events(struct perf_session *session,
 	if (!tool->ordered_events)
 		return -EINVAL;
 
-	ret = cs_etm__update_queues(etm);
-
-	if (ret < 0)
-		return ret;
-
 	if (etm->timeless_decoding)
 		return cs_etm__process_timeless_queues(etm, -1);
 
@@ -691,7 +679,7 @@ static u8 cs_etm__cpu_mode(struct cs_etm_queue *etmq, u64 address)
 
 	machine = etmq->etm->machine;
 
-	if (address >= etmq->etm->kernel_start) {
+	if (address >= machine__kernel_start(machine)) {
 		if (machine__is_host(machine))
 			return PERF_RECORD_MISC_KERNEL;
 		else
@@ -746,17 +734,32 @@ static u32 cs_etm__mem_access(struct cs_etm_queue *etmq, u8 trace_chan_id,
 
 	len = dso__data_read_offset(al.map->dso, machine, offset, buffer, size);
 
-	if (len <= 0)
+	if (len <= 0) {
+		ui__warning_once("CS ETM Trace: Missing DSO. Use 'perf archive' or debuginfod to export data from the traced system.\n"
+				 "              Enable CONFIG_PROC_KCORE or use option '-k /path/to/vmlinux' for kernel symbols.\n");
+		if (!al.map->dso->auxtrace_warned) {
+			pr_err("CS ETM Trace: Debug data not found for address %#"PRIx64" in %s\n",
+				    address,
+				    al.map->dso->long_name ? al.map->dso->long_name : "Unknown");
+			al.map->dso->auxtrace_warned = true;
+		}
 		return 0;
+	}
 
 	return len;
 }
 
-static struct cs_etm_queue *cs_etm__alloc_queue(struct cs_etm_auxtrace *etm)
+static struct cs_etm_queue *cs_etm__alloc_queue(struct cs_etm_auxtrace *etm,
+						bool formatted)
 {
 	struct cs_etm_decoder_params d_params;
 	struct cs_etm_trace_params  *t_params = NULL;
 	struct cs_etm_queue *etmq;
+	/*
+	 * Each queue can only contain data from one CPU when unformatted, so only one decoder is
+	 * needed.
+	 */
+	int decoders = formatted ? etm->num_cpu : 1;
 
 	etmq = zalloc(sizeof(*etmq));
 	if (!etmq)
@@ -767,20 +770,23 @@ static struct cs_etm_queue *cs_etm__alloc_queue(struct cs_etm_auxtrace *etm)
 		goto out_free;
 
 	/* Use metadata to fill in trace parameters for trace decoder */
-	t_params = zalloc(sizeof(*t_params) * etm->num_cpu);
+	t_params = zalloc(sizeof(*t_params) * decoders);
 
 	if (!t_params)
 		goto out_free;
 
-	if (cs_etm__init_trace_params(t_params, etm))
+	if (cs_etm__init_trace_params(t_params, etm, decoders))
 		goto out_free;
 
 	/* Set decoder parameters to decode trace packets */
 	if (cs_etm__init_decoder_params(&d_params, etmq,
-					CS_ETM_OPERATION_DECODE))
+					dump_trace ? CS_ETM_OPERATION_PRINT :
+						     CS_ETM_OPERATION_DECODE,
+					formatted))
 		goto out_free;
 
-	etmq->decoder = cs_etm_decoder__new(etm->num_cpu, &d_params, t_params);
+	etmq->decoder = cs_etm_decoder__new(decoders, &d_params,
+					    t_params);
 
 	if (!etmq->decoder)
 		goto out_free;
@@ -808,31 +814,35 @@ out_free:
 
 static int cs_etm__setup_queue(struct cs_etm_auxtrace *etm,
 			       struct auxtrace_queue *queue,
-			       unsigned int queue_nr)
+			       unsigned int queue_nr,
+			       bool formatted)
 {
-	int ret = 0;
-	unsigned int cs_queue_nr;
-	u8 trace_chan_id;
-	u64 cs_timestamp;
 	struct cs_etm_queue *etmq = queue->priv;
 
 	if (list_empty(&queue->head) || etmq)
-		goto out;
+		return 0;
 
-	etmq = cs_etm__alloc_queue(etm);
+	etmq = cs_etm__alloc_queue(etm, formatted);
 
-	if (!etmq) {
-		ret = -ENOMEM;
-		goto out;
-	}
+	if (!etmq)
+		return -ENOMEM;
 
 	queue->priv = etmq;
 	etmq->etm = etm;
 	etmq->queue_nr = queue_nr;
 	etmq->offset = 0;
 
-	if (etm->timeless_decoding)
-		goto out;
+	return 0;
+}
+
+static int cs_etm__queue_first_cs_timestamp(struct cs_etm_auxtrace *etm,
+					    struct cs_etm_queue *etmq,
+					    unsigned int queue_nr)
+{
+	int ret = 0;
+	unsigned int cs_queue_nr;
+	u8 trace_chan_id;
+	u64 cs_timestamp;
 
 	/*
 	 * We are under a CPU-wide trace scenario.  As such we need to know
@@ -896,33 +906,6 @@ out:
 	return ret;
 }
 
-static int cs_etm__setup_queues(struct cs_etm_auxtrace *etm)
-{
-	unsigned int i;
-	int ret;
-
-	if (!etm->kernel_start)
-		etm->kernel_start = machine__kernel_start(etm->machine);
-
-	for (i = 0; i < etm->queues.nr_queues; i++) {
-		ret = cs_etm__setup_queue(etm, &etm->queues.queue_array[i], i);
-		if (ret)
-			return ret;
-	}
-
-	return 0;
-}
-
-static int cs_etm__update_queues(struct cs_etm_auxtrace *etm)
-{
-	if (etm->queues.new_data) {
-		etm->queues.new_data = false;
-		return cs_etm__setup_queues(etm);
-	}
-
-	return 0;
-}
-
 static inline
 void cs_etm__copy_last_branch_rb(struct cs_etm_queue *etmq,
 				 struct cs_etm_traceid_queue *tidq)
@@ -2222,13 +2205,27 @@ static int cs_etm__process_timeless_queues(struct cs_etm_auxtrace *etm,
 static int cs_etm__process_queues(struct cs_etm_auxtrace *etm)
 {
 	int ret = 0;
-	unsigned int cs_queue_nr, queue_nr;
+	unsigned int cs_queue_nr, queue_nr, i;
 	u8 trace_chan_id;
 	u64 cs_timestamp;
 	struct auxtrace_queue *queue;
 	struct cs_etm_queue *etmq;
 	struct cs_etm_traceid_queue *tidq;
 
+	/*
+	 * Pre-populate the heap with one entry from each queue so that we can
+	 * start processing in time order across all queues.
+	 */
+	for (i = 0; i < etm->queues.nr_queues; i++) {
+		etmq = etm->queues.queue_array[i].priv;
+		if (!etmq)
+			continue;
+
+		ret = cs_etm__queue_first_cs_timestamp(etm, etmq, i);
+		if (ret)
+			return ret;
+	}
+
 	while (1) {
 		if (!etm->heap.heap_cnt)
 			goto out;
@@ -2382,7 +2379,6 @@ static int cs_etm__process_event(struct perf_session *session,
 				 struct perf_sample *sample,
 				 struct perf_tool *tool)
 {
-	int err = 0;
 	u64 sample_kernel_timestamp;
 	struct cs_etm_auxtrace *etm = container_of(session->auxtrace,
 						   struct cs_etm_auxtrace,
@@ -2401,12 +2397,6 @@ static int cs_etm__process_event(struct perf_session *session,
 	else
 		sample_kernel_timestamp = 0;
 
-	if (sample_kernel_timestamp || etm->timeless_decoding) {
-		err = cs_etm__update_queues(etm);
-		if (err)
-			return err;
-	}
-
 	/*
 	 * Don't wait for cs_etm__flush_events() in per-thread/timeless mode to start the decode. We
 	 * need the tid of the PERF_RECORD_EXIT event to assign to the synthesised samples because
@@ -2447,7 +2437,7 @@ static void dump_queued_data(struct cs_etm_auxtrace *etm,
 	for (i = 0; i < etm->queues.nr_queues; ++i)
 		list_for_each_entry(buf, &etm->queues.queue_array[i].head, list)
 			if (buf->reference == event->reference)
-				cs_etm__dump_event(etm, buf);
+				cs_etm__dump_event(etm->queues.queue_array[i].priv, buf);
 }
 
 static int cs_etm__process_auxtrace_event(struct perf_session *session,
@@ -2463,6 +2453,7 @@ static int cs_etm__process_auxtrace_event(struct perf_session *session,
 		int fd = perf_data__fd(session->data);
 		bool is_pipe = perf_data__is_pipe(session->data);
 		int err;
+		int idx = event->auxtrace.idx;
 
 		if (is_pipe)
 			data_offset = 0;
@@ -2477,9 +2468,20 @@ static int cs_etm__process_auxtrace_event(struct perf_session *session,
 		if (err)
 			return err;
 
+		/*
+		 * Knowing if the trace is formatted or not requires a lookup of
+		 * the aux record so only works in non-piped mode where data is
+		 * queued in cs_etm__queue_aux_records(). Always assume
+		 * formatted in piped mode (true).
+		 */
+		err = cs_etm__setup_queue(etm, &etm->queues.queue_array[idx],
+					  idx, true);
+		if (err)
+			return err;
+
 		if (dump_trace)
 			if (auxtrace_buffer__get_data(buffer, fd)) {
-				cs_etm__dump_event(etm, buffer);
+				cs_etm__dump_event(etm->queues.queue_array[idx].priv, buffer);
 				auxtrace_buffer__put_data(buffer);
 			}
 	} else if (dump_trace)
@@ -2537,6 +2539,7 @@ static const char * const cs_etmv4_priv_fmts[] = {
 	[CS_ETMV4_TRCIDR2]	= "	TRCIDR2			       %llx\n",
 	[CS_ETMV4_TRCIDR8]	= "	TRCIDR8			       %llx\n",
 	[CS_ETMV4_TRCAUTHSTATUS] = "	TRCAUTHSTATUS		       %llx\n",
+	[CS_ETE_TRCDEVARCH]	= "	TRCDEVARCH                     %llx\n"
 };
 
 static const char * const param_unk_fmt =
@@ -2596,10 +2599,15 @@ static int cs_etm__print_cpu_metadata_v1(__u64 *val, int *offset)
 			else
 				fprintf(stdout, cs_etm_priv_fmts[j], val[i]);
 		}
-	} else if (magic == __perf_cs_etmv4_magic) {
+	} else if (magic == __perf_cs_etmv4_magic || magic == __perf_cs_ete_magic) {
+		/*
+		 * ETE and ETMv4 can be printed in the same block because the number of parameters
+		 * is saved and they share the list of parameter names. ETE is also only supported
+		 * in V1 files.
+		 */
 		for (j = 0; j < total_params; j++, i++) {
 			/* if newer record - could be excess params */
-			if (j >= CS_ETMV4_PRIV_MAX)
+			if (j >= CS_ETE_PRIV_MAX)
 				fprintf(stdout, param_unk_fmt, j, val[i]);
 			else
 				fprintf(stdout, cs_etmv4_priv_fmts[j], val[i]);
@@ -2719,6 +2727,8 @@ static int cs_etm__queue_aux_fragment(struct perf_session *session, off_t file_o
 	struct perf_record_auxtrace *auxtrace_event;
 	union perf_event auxtrace_fragment;
 	__u64 aux_offset, aux_size;
+	__u32 idx;
+	bool formatted;
 
 	struct cs_etm_auxtrace *etm = container_of(session->auxtrace,
 						   struct cs_etm_auxtrace,
@@ -2780,8 +2790,15 @@ static int cs_etm__queue_aux_fragment(struct perf_session *session, off_t file_o
 
 		pr_debug3("CS ETM: Queue buffer size: %#"PRI_lx64" offset: %#"PRI_lx64
 			  " tid: %d cpu: %d\n", aux_size, aux_offset, sample->tid, sample->cpu);
-		return auxtrace_queues__add_event(&etm->queues, session, &auxtrace_fragment,
-						  file_offset, NULL);
+		err = auxtrace_queues__add_event(&etm->queues, session, &auxtrace_fragment,
+						 file_offset, NULL);
+		if (err)
+			return err;
+
+		idx = auxtrace_event->idx;
+		formatted = !(aux_event->flags & PERF_AUX_FLAG_CORESIGHT_FORMAT_RAW);
+		return cs_etm__setup_queue(etm, &etm->queues.queue_array[idx],
+					   idx, formatted);
 	}
 
 	/* Wasn't inside this buffer, but there were no parse errors. 1 == 'not found' */
@@ -2959,6 +2976,16 @@ int cs_etm__process_auxtrace_info(union perf_event *event,
 
 			/* The traceID is our handle */
 			trcidr_idx = CS_ETMV4_TRCTRACEIDR;
+		} else if (ptr[i] == __perf_cs_ete_magic) {
+			metadata[j] = cs_etm__create_meta_blk(ptr, &i, CS_ETE_PRIV_MAX, -1);
+
+			/* ETE shares first part of metadata with ETMv4 */
+			trcidr_idx = CS_ETMV4_TRCTRACEIDR;
+		} else {
+			ui__error("CS ETM Trace: Unrecognised magic number %#"PRIx64". File could be from a newer version of perf.\n",
+				  ptr[i]);
+			err = -EINVAL;
+			goto err_free_metadata;
 		}
 
 		if (!metadata[j]) {
@@ -3070,6 +3097,13 @@ int cs_etm__process_auxtrace_info(union perf_event *event,
 		goto err_delete_thread;
 
 	etm->data_queued = etm->queues.populated;
+	/*
+	 * Print warning in pipe mode, see cs_etm__process_auxtrace_event() and
+	 * cs_etm__queue_aux_fragment() for details relating to limitations.
+	 */
+	if (!etm->data_queued)
+		pr_warning("CS ETM warning: Coresight decode and TRBE support requires random file access.\n"
+			   "Continuing with best effort decoding in piped mode.\n\n");
 
 	return 0;
 
diff --git a/tools/perf/util/cs-etm.h b/tools/perf/util/cs-etm.h
index d65c7b19407d..90c83f932d9a 100644
--- a/tools/perf/util/cs-etm.h
+++ b/tools/perf/util/cs-etm.h
@@ -77,6 +77,15 @@ enum {
 #define CS_ETMV4_NR_TRC_PARAMS_V0 (CS_ETMV4_TRCAUTHSTATUS - CS_ETMV4_TRCCONFIGR + 1)
 
 /*
+ * ETE metadata is ETMv4 plus TRCDEVARCH register and doesn't support header V0 since it was
+ * added in header V1
+ */
+enum {
+	CS_ETE_TRCDEVARCH = CS_ETMV4_PRIV_MAX,
+	CS_ETE_PRIV_MAX
+};
+
+/*
  * ETMv3 exception encoding number:
  * See Embedded Trace Macrocell specification (ARM IHI 0014Q)
  * table 7-12 Encoding of Exception[3:0] for non-ARMv7-M processors.
@@ -187,8 +196,10 @@ struct cs_etm_packet_queue {
 
 #define __perf_cs_etmv3_magic 0x3030303030303030ULL
 #define __perf_cs_etmv4_magic 0x4040404040404040ULL
+#define __perf_cs_ete_magic   0x5050505050505050ULL
 #define CS_ETMV3_PRIV_SIZE (CS_ETM_PRIV_MAX * sizeof(u64))
 #define CS_ETMV4_PRIV_SIZE (CS_ETMV4_PRIV_MAX * sizeof(u64))
+#define CS_ETE_PRIV_SIZE (CS_ETE_PRIV_MAX * sizeof(u64))
 
 #ifdef HAVE_CSTRACE_SUPPORT
 int cs_etm__process_auxtrace_info(union perf_event *event,
diff --git a/tools/perf/util/data-convert-bt.c b/tools/perf/util/data-convert-bt.c
index cace349fb700..aa862a26d95c 100644
--- a/tools/perf/util/data-convert-bt.c
+++ b/tools/perf/util/data-convert-bt.c
@@ -1634,7 +1634,7 @@ int bt_convert__perf2ctf(const char *input, const char *path,
 
 	err = -1;
 	/* perf.data session */
-	session = perf_session__new(&data, 0, &c.tool);
+	session = perf_session__new(&data, &c.tool);
 	if (IS_ERR(session))
 		return PTR_ERR(session);
 
diff --git a/tools/perf/util/data-convert-json.c b/tools/perf/util/data-convert-json.c
index 355cd1948bdf..f1ab6edba446 100644
--- a/tools/perf/util/data-convert-json.c
+++ b/tools/perf/util/data-convert-json.c
@@ -334,7 +334,7 @@ int bt_convert__perf2json(const char *input_name, const char *output_name,
 		goto err;
 	}
 
-	session = perf_session__new(&data, false, &c.tool);
+	session = perf_session__new(&data, &c.tool);
 	if (IS_ERR(session)) {
 		fprintf(stderr, "Error creating perf session!\n");
 		goto err_fclose;
diff --git a/tools/perf/util/debug.h b/tools/perf/util/debug.h
index 48f631966067..f99468a7f681 100644
--- a/tools/perf/util/debug.h
+++ b/tools/perf/util/debug.h
@@ -22,6 +22,13 @@ extern int debug_data_convert;
 	eprintf(0, verbose, pr_fmt(fmt), ##__VA_ARGS__)
 #define pr_warning(fmt, ...) \
 	eprintf(0, verbose, pr_fmt(fmt), ##__VA_ARGS__)
+#define pr_warning_once(fmt, ...) ({		\
+	static int __warned;			\
+	if (unlikely(!__warned)) {		\
+		pr_warning(fmt, ##__VA_ARGS__); \
+		__warned = 1;			\
+	}					\
+})
 #define pr_info(fmt, ...) \
 	eprintf(0, verbose, pr_fmt(fmt), ##__VA_ARGS__)
 #define pr_debug(fmt, ...) \
@@ -55,6 +62,13 @@ void trace_event(union perf_event *event);
 
 int ui__error(const char *format, ...) __printf(1, 2);
 int ui__warning(const char *format, ...) __printf(1, 2);
+#define ui__warning_once(format, ...) ({		\
+	static int __warned;				\
+	if (unlikely(!__warned)) {			\
+		ui__warning(format, ##__VA_ARGS__);	\
+		__warned = 1;				\
+	}						\
+})
 
 void pr_stat(const char *fmt, ...);
 
diff --git a/tools/perf/util/dlfilter.c b/tools/perf/util/dlfilter.c
index ca33fbc5efde..db964d5a52af 100644
--- a/tools/perf/util/dlfilter.c
+++ b/tools/perf/util/dlfilter.c
@@ -21,7 +21,7 @@
 #include "symbol.h"
 #include "srcline.h"
 #include "dlfilter.h"
-#include "perf_dlfilter.h"
+#include "../include/perf/perf_dlfilter.h"
 
 static void al_to_d_al(struct addr_location *al, struct perf_dlfilter_al *d_al)
 {
@@ -530,8 +530,8 @@ int dlfilter__do_filter_event(struct dlfilter *d,
 	return ret;
 }
 
-static bool get_filter_desc(const char *dirname, const char *name,
-			    char **desc, char **long_desc)
+bool get_filter_desc(const char *dirname, const char *name, char **desc,
+		     char **long_desc)
 {
 	char path[PATH_MAX];
 	void *handle;
diff --git a/tools/perf/util/dlfilter.h b/tools/perf/util/dlfilter.h
index 505980442360..cc4bb9657d05 100644
--- a/tools/perf/util/dlfilter.h
+++ b/tools/perf/util/dlfilter.h
@@ -93,5 +93,7 @@ static inline int dlfilter__filter_event_early(struct dlfilter *d,
 }
 
 int list_available_dlfilters(const struct option *opt, const char *s, int unset);
+bool get_filter_desc(const char *dirname, const char *name, char **desc,
+		     char **long_desc);
 
 #endif
diff --git a/tools/perf/util/dso.c b/tools/perf/util/dso.c
index ee15db2be2f4..9ed9a5676d35 100644
--- a/tools/perf/util/dso.c
+++ b/tools/perf/util/dso.c
@@ -1349,6 +1349,16 @@ void dso__set_build_id(struct dso *dso, struct build_id *bid)
 
 bool dso__build_id_equal(const struct dso *dso, struct build_id *bid)
 {
+	if (dso->bid.size > bid->size && dso->bid.size == BUILD_ID_SIZE) {
+		/*
+		 * For the backward compatibility, it allows a build-id has
+		 * trailing zeros.
+		 */
+		return !memcmp(dso->bid.data, bid->data, bid->size) &&
+			!memchr_inv(&dso->bid.data[bid->size], 0,
+				    dso->bid.size - bid->size);
+	}
+
 	return dso->bid.size == bid->size &&
 	       memcmp(dso->bid.data, bid->data, dso->bid.size) == 0;
 }
diff --git a/tools/perf/util/dso.h b/tools/perf/util/dso.h
index 52e7101c5609..83723ba11dc8 100644
--- a/tools/perf/util/dso.h
+++ b/tools/perf/util/dso.h
@@ -170,6 +170,7 @@ struct dso {
 	u8		 has_srcline:1;
 	u8		 hit:1;
 	u8		 annotate_warned:1;
+	u8		 auxtrace_warned:1;
 	u8		 short_name_allocated:1;
 	u8		 long_name_allocated:1;
 	u8		 is_64_bit:1;
diff --git a/tools/perf/util/env.c b/tools/perf/util/env.c
index cec2e6cad8aa..cf773f0dec38 100644
--- a/tools/perf/util/env.c
+++ b/tools/perf/util/env.c
@@ -10,6 +10,7 @@
 #include <sys/utsname.h>
 #include <stdlib.h>
 #include <string.h>
+#include "strbuf.h"
 
 struct perf_env perf_env;
 
@@ -219,13 +220,35 @@ void perf_env__exit(struct perf_env *env)
 	zfree(&env->hybrid_cpc_nodes);
 }
 
-void perf_env__init(struct perf_env *env __maybe_unused)
+void perf_env__init(struct perf_env *env)
 {
 #ifdef HAVE_LIBBPF_SUPPORT
 	env->bpf_progs.infos = RB_ROOT;
 	env->bpf_progs.btfs = RB_ROOT;
 	init_rwsem(&env->bpf_progs.lock);
 #endif
+	env->kernel_is_64_bit = -1;
+}
+
+static void perf_env__init_kernel_mode(struct perf_env *env)
+{
+	const char *arch = perf_env__raw_arch(env);
+
+	if (!strncmp(arch, "x86_64", 6) || !strncmp(arch, "aarch64", 7) ||
+	    !strncmp(arch, "arm64", 5) || !strncmp(arch, "mips64", 6) ||
+	    !strncmp(arch, "parisc64", 8) || !strncmp(arch, "riscv64", 7) ||
+	    !strncmp(arch, "s390x", 5) || !strncmp(arch, "sparc64", 7))
+		env->kernel_is_64_bit = 1;
+	else
+		env->kernel_is_64_bit = 0;
+}
+
+int perf_env__kernel_is_64_bit(struct perf_env *env)
+{
+	if (env->kernel_is_64_bit == -1)
+		perf_env__init_kernel_mode(env);
+
+	return env->kernel_is_64_bit;
 }
 
 int perf_env__set_cmdline(struct perf_env *env, int argc, const char *argv[])
@@ -284,6 +307,45 @@ int perf_env__read_cpu_topology_map(struct perf_env *env)
 	return 0;
 }
 
+int perf_env__read_pmu_mappings(struct perf_env *env)
+{
+	struct perf_pmu *pmu = NULL;
+	u32 pmu_num = 0;
+	struct strbuf sb;
+
+	while ((pmu = perf_pmu__scan(pmu))) {
+		if (!pmu->name)
+			continue;
+		pmu_num++;
+	}
+	if (!pmu_num) {
+		pr_debug("pmu mappings not available\n");
+		return -ENOENT;
+	}
+	env->nr_pmu_mappings = pmu_num;
+
+	if (strbuf_init(&sb, 128 * pmu_num) < 0)
+		return -ENOMEM;
+
+	while ((pmu = perf_pmu__scan(pmu))) {
+		if (!pmu->name)
+			continue;
+		if (strbuf_addf(&sb, "%u:%s", pmu->type, pmu->name) < 0)
+			goto error;
+		/* include a NULL character at the end */
+		if (strbuf_add(&sb, "", 1) < 0)
+			goto error;
+	}
+
+	env->pmu_mappings = strbuf_detach(&sb, NULL);
+
+	return 0;
+
+error:
+	strbuf_release(&sb);
+	return -1;
+}
+
 int perf_env__read_cpuid(struct perf_env *env)
 {
 	char cpuid[128];
@@ -349,7 +411,7 @@ static const char *normalize_arch(char *arch)
 		return "x86";
 	if (!strcmp(arch, "sun4u") || !strncmp(arch, "sparc", 5))
 		return "sparc";
-	if (!strcmp(arch, "aarch64") || !strcmp(arch, "arm64"))
+	if (!strncmp(arch, "aarch64", 7) || !strncmp(arch, "arm64", 5))
 		return "arm64";
 	if (!strncmp(arch, "arm", 3) || !strcmp(arch, "sa110"))
 		return "arm";
@@ -382,6 +444,44 @@ const char *perf_env__arch(struct perf_env *env)
 	return normalize_arch(arch_name);
 }
 
+const char *perf_env__cpuid(struct perf_env *env)
+{
+	int status;
+
+	if (!env || !env->cpuid) { /* Assume local operation */
+		status = perf_env__read_cpuid(env);
+		if (status)
+			return NULL;
+	}
+
+	return env->cpuid;
+}
+
+int perf_env__nr_pmu_mappings(struct perf_env *env)
+{
+	int status;
+
+	if (!env || !env->nr_pmu_mappings) { /* Assume local operation */
+		status = perf_env__read_pmu_mappings(env);
+		if (status)
+			return 0;
+	}
+
+	return env->nr_pmu_mappings;
+}
+
+const char *perf_env__pmu_mappings(struct perf_env *env)
+{
+	int status;
+
+	if (!env || !env->pmu_mappings) { /* Assume local operation */
+		status = perf_env__read_pmu_mappings(env);
+		if (status)
+			return NULL;
+	}
+
+	return env->pmu_mappings;
+}
 
 int perf_env__numa_node(struct perf_env *env, int cpu)
 {
diff --git a/tools/perf/util/env.h b/tools/perf/util/env.h
index 6824a7423a2d..1383876f72b3 100644
--- a/tools/perf/util/env.h
+++ b/tools/perf/util/env.h
@@ -61,6 +61,7 @@ struct perf_env {
 	unsigned long long	total_mem;
 	unsigned int		msr_pmu_type;
 	unsigned int		max_branches;
+	int			kernel_is_64_bit;
 
 	int			nr_cmdline;
 	int			nr_sibling_cores;
@@ -143,14 +144,21 @@ extern struct perf_env perf_env;
 
 void perf_env__exit(struct perf_env *env);
 
+int perf_env__kernel_is_64_bit(struct perf_env *env);
+
 int perf_env__set_cmdline(struct perf_env *env, int argc, const char *argv[]);
 
 int perf_env__read_cpuid(struct perf_env *env);
+int perf_env__read_pmu_mappings(struct perf_env *env);
+int perf_env__nr_pmu_mappings(struct perf_env *env);
+const char *perf_env__pmu_mappings(struct perf_env *env);
+
 int perf_env__read_cpu_topology_map(struct perf_env *env);
 
 void cpu_cache_level__free(struct cpu_cache_level *cache);
 
 const char *perf_env__arch(struct perf_env *env);
+const char *perf_env__cpuid(struct perf_env *env);
 const char *perf_env__raw_arch(struct perf_env *env);
 int perf_env__nr_cpus_avail(struct perf_env *env);
 
diff --git a/tools/perf/util/events_stats.h b/tools/perf/util/events_stats.h
index 3480bafd414b..1b0006092265 100644
--- a/tools/perf/util/events_stats.h
+++ b/tools/perf/util/events_stats.h
@@ -30,6 +30,7 @@ struct events_stats {
 	u64 total_lost_samples;
 	u64 total_aux_lost;
 	u64 total_aux_partial;
+	u64 total_aux_collision;
 	u64 total_invalid_chains;
 	u32 nr_events[PERF_RECORD_HEADER_MAX];
 	u32 nr_lost_warned;
diff --git a/tools/perf/util/evlist-hybrid.c b/tools/perf/util/evlist-hybrid.c
index db3f5fbdebe1..7c554234b43d 100644
--- a/tools/perf/util/evlist-hybrid.c
+++ b/tools/perf/util/evlist-hybrid.c
@@ -86,3 +86,76 @@ bool evlist__has_hybrid(struct evlist *evlist)
 
 	return false;
 }
+
+int evlist__fix_hybrid_cpus(struct evlist *evlist, const char *cpu_list)
+{
+	struct perf_cpu_map *cpus;
+	struct evsel *evsel, *tmp;
+	struct perf_pmu *pmu;
+	int ret, unmatched_count = 0, events_nr = 0;
+
+	if (!perf_pmu__has_hybrid() || !cpu_list)
+		return 0;
+
+	cpus = perf_cpu_map__new(cpu_list);
+	if (!cpus)
+		return -1;
+
+	/*
+	 * The evsels are created with hybrid pmu's cpus. But now we
+	 * need to check and adjust the cpus of evsel by cpu_list because
+	 * cpu_list may cause conflicts with cpus of evsel. For example,
+	 * cpus of evsel is cpu0-7, but the cpu_list is cpu6-8, we need
+	 * to adjust the cpus of evsel to cpu6-7. And then propatate maps
+	 * in evlist__create_maps().
+	 */
+	evlist__for_each_entry_safe(evlist, tmp, evsel) {
+		struct perf_cpu_map *matched_cpus, *unmatched_cpus;
+		char buf1[128], buf2[128];
+
+		pmu = perf_pmu__find_hybrid_pmu(evsel->pmu_name);
+		if (!pmu)
+			continue;
+
+		ret = perf_pmu__cpus_match(pmu, cpus, &matched_cpus,
+					   &unmatched_cpus);
+		if (ret)
+			goto out;
+
+		events_nr++;
+
+		if (matched_cpus->nr > 0 && (unmatched_cpus->nr > 0 ||
+		    matched_cpus->nr < cpus->nr ||
+		    matched_cpus->nr < pmu->cpus->nr)) {
+			perf_cpu_map__put(evsel->core.cpus);
+			perf_cpu_map__put(evsel->core.own_cpus);
+			evsel->core.cpus = perf_cpu_map__get(matched_cpus);
+			evsel->core.own_cpus = perf_cpu_map__get(matched_cpus);
+
+			if (unmatched_cpus->nr > 0) {
+				cpu_map__snprint(matched_cpus, buf1, sizeof(buf1));
+				pr_warning("WARNING: use %s in '%s' for '%s', skip other cpus in list.\n",
+					   buf1, pmu->name, evsel->name);
+			}
+		}
+
+		if (matched_cpus->nr == 0) {
+			evlist__remove(evlist, evsel);
+			evsel__delete(evsel);
+
+			cpu_map__snprint(cpus, buf1, sizeof(buf1));
+			cpu_map__snprint(pmu->cpus, buf2, sizeof(buf2));
+			pr_warning("WARNING: %s isn't a '%s', please use a CPU list in the '%s' range (%s)\n",
+				   buf1, pmu->name, pmu->name, buf2);
+			unmatched_count++;
+		}
+
+		perf_cpu_map__put(matched_cpus);
+		perf_cpu_map__put(unmatched_cpus);
+	}
+
+	ret = (unmatched_count == events_nr) ? -1 : 0;
+out:
+	perf_cpu_map__put(cpus);
+	return ret;
+}
diff --git a/tools/perf/util/evlist-hybrid.h b/tools/perf/util/evlist-hybrid.h
index 19f74b4c340a..aacdb1b0f948 100644
--- a/tools/perf/util/evlist-hybrid.h
+++ b/tools/perf/util/evlist-hybrid.h
@@ -10,5 +10,6 @@
 int evlist__add_default_hybrid(struct evlist *evlist, bool precise);
 void evlist__warn_hybrid_group(struct evlist *evlist);
 bool evlist__has_hybrid(struct evlist *evlist);
+int evlist__fix_hybrid_cpus(struct evlist *evlist, const char *cpu_list);
 
 #endif /* __PERF_EVLIST_HYBRID_H */
diff --git a/tools/perf/util/evlist.c b/tools/perf/util/evlist.c
index 47581a237c7a..5f92319ce258 100644
--- a/tools/perf/util/evlist.c
+++ b/tools/perf/util/evlist.c
@@ -27,6 +27,7 @@
 #include "util/perf_api_probe.h"
 #include "util/evsel_fprintf.h"
 #include "util/evlist-hybrid.h"
+#include "util/pmu.h"
 #include <signal.h>
 #include <unistd.h>
 #include <sched.h>
@@ -1002,7 +1003,7 @@ int evlist__create_maps(struct evlist *evlist, struct target *target)
 	if (!cpus)
 		goto out_delete_threads;
 
-	evlist->core.has_user_cpus = !!target->cpu_list;
+	evlist->core.has_user_cpus = !!target->cpu_list && !target->hybrid;
 
 	perf_evlist__set_maps(&evlist->core, cpus, threads);
 
diff --git a/tools/perf/util/evlist.h b/tools/perf/util/evlist.h
index 5c22383489ae..97bfb8d0be4f 100644
--- a/tools/perf/util/evlist.h
+++ b/tools/perf/util/evlist.h
@@ -276,6 +276,22 @@ void evlist__to_front(struct evlist *evlist, struct evsel *move_evsel);
 	__evlist__for_each_entry_continue(&(evlist)->core.entries, evsel)
 
 /**
+ * __evlist__for_each_entry_from - continue iteration from @evsel (included)
+ * @list: list_head instance to iterate
+ * @evsel: struct evsel iterator
+ */
+#define __evlist__for_each_entry_from(list, evsel) \
+	list_for_each_entry_from(evsel, list, core.node)
+
+/**
+ * evlist__for_each_entry_from - continue iteration from @evsel (included)
+ * @evlist: evlist instance to iterate
+ * @evsel: struct evsel iterator
+ */
+#define evlist__for_each_entry_from(evlist, evsel) \
+	__evlist__for_each_entry_from(&(evlist)->core.entries, evsel)
+
+/**
  * __evlist__for_each_entry_reverse - iterate thru all the evsels in reverse order
  * @list: list_head instance to iterate
  * @evsel: struct evsel iterator
diff --git a/tools/perf/util/evsel.c b/tools/perf/util/evsel.c
index f61e5dd53f5d..dbfeceb2546c 100644
--- a/tools/perf/util/evsel.c
+++ b/tools/perf/util/evsel.c
@@ -333,11 +333,11 @@ error_free:
 	goto out;
 }
 
-static int evsel__copy_config_terms(struct evsel *dst, struct evsel *src)
+int copy_config_terms(struct list_head *dst, struct list_head *src)
 {
 	struct evsel_config_term *pos, *tmp;
 
-	list_for_each_entry(pos, &src->config_terms, list) {
+	list_for_each_entry(pos, src, list) {
 		tmp = malloc(sizeof(*tmp));
 		if (tmp == NULL)
 			return -ENOMEM;
@@ -350,11 +350,16 @@ static int evsel__copy_config_terms(struct evsel *dst, struct evsel *src)
 				return -ENOMEM;
 			}
 		}
-		list_add_tail(&tmp->list, &dst->config_terms);
+		list_add_tail(&tmp->list, dst);
 	}
 	return 0;
 }
 
+static int evsel__copy_config_terms(struct evsel *dst, struct evsel *src)
+{
+	return copy_config_terms(&dst->config_terms, &src->config_terms);
+}
+
 /**
  * evsel__clone - create a new evsel copied from @orig
  * @orig: original evsel
@@ -1385,11 +1390,11 @@ int evsel__disable(struct evsel *evsel)
 	return err;
 }
 
-static void evsel__free_config_terms(struct evsel *evsel)
+void free_config_terms(struct list_head *config_terms)
 {
 	struct evsel_config_term *term, *h;
 
-	list_for_each_entry_safe(term, h, &evsel->config_terms, list) {
+	list_for_each_entry_safe(term, h, config_terms, list) {
 		list_del_init(&term->list);
 		if (term->free_str)
 			zfree(&term->val.str);
@@ -1397,6 +1402,11 @@ static void evsel__free_config_terms(struct evsel *evsel)
 	}
 }
 
+static void evsel__free_config_terms(struct evsel *evsel)
+{
+	free_config_terms(&evsel->config_terms);
+}
+
 void evsel__exit(struct evsel *evsel)
 {
 	assert(list_empty(&evsel->core.node));
@@ -1656,7 +1666,7 @@ static int update_fds(struct evsel *evsel,
 	return 0;
 }
 
-static bool ignore_missing_thread(struct evsel *evsel,
+bool evsel__ignore_missing_thread(struct evsel *evsel,
 				  int nr_cpus, int cpu,
 				  struct perf_thread_map *threads,
 				  int thread, int err)
@@ -1709,59 +1719,43 @@ static void display_attr(struct perf_event_attr *attr)
 	}
 }
 
-static int perf_event_open(struct evsel *evsel,
-			   pid_t pid, int cpu, int group_fd,
-			   unsigned long flags)
+bool evsel__precise_ip_fallback(struct evsel *evsel)
 {
-	int precise_ip = evsel->core.attr.precise_ip;
-	int fd;
-
-	while (1) {
-		pr_debug2_peo("sys_perf_event_open: pid %d  cpu %d  group_fd %d  flags %#lx",
-			  pid, cpu, group_fd, flags);
-
-		fd = sys_perf_event_open(&evsel->core.attr, pid, cpu, group_fd, flags);
-		if (fd >= 0)
-			break;
-
-		/* Do not try less precise if not requested. */
-		if (!evsel->precise_max)
-			break;
-
-		/*
-		 * We tried all the precise_ip values, and it's
-		 * still failing, so leave it to standard fallback.
-		 */
-		if (!evsel->core.attr.precise_ip) {
-			evsel->core.attr.precise_ip = precise_ip;
-			break;
-		}
+	/* Do not try less precise if not requested. */
+	if (!evsel->precise_max)
+		return false;
 
-		pr_debug2_peo("\nsys_perf_event_open failed, error %d\n", -ENOTSUP);
-		evsel->core.attr.precise_ip--;
-		pr_debug2_peo("decreasing precise_ip by one (%d)\n", evsel->core.attr.precise_ip);
-		display_attr(&evsel->core.attr);
+	/*
+	 * We tried all the precise_ip values, and it's
+	 * still failing, so leave it to standard fallback.
+	 */
+	if (!evsel->core.attr.precise_ip) {
+		evsel->core.attr.precise_ip = evsel->precise_ip_original;
+		return false;
 	}
 
-	return fd;
+	if (!evsel->precise_ip_original)
+		evsel->precise_ip_original = evsel->core.attr.precise_ip;
+
+	evsel->core.attr.precise_ip--;
+	pr_debug2_peo("decreasing precise_ip by one (%d)\n", evsel->core.attr.precise_ip);
+	display_attr(&evsel->core.attr);
+	return true;
 }
 
-static int evsel__open_cpu(struct evsel *evsel, struct perf_cpu_map *cpus,
-		struct perf_thread_map *threads,
-		int start_cpu, int end_cpu)
+static struct perf_cpu_map *empty_cpu_map;
+static struct perf_thread_map *empty_thread_map;
+
+static int __evsel__prepare_open(struct evsel *evsel, struct perf_cpu_map *cpus,
+		struct perf_thread_map *threads)
 {
-	int cpu, thread, nthreads;
-	unsigned long flags = PERF_FLAG_FD_CLOEXEC;
-	int pid = -1, err, old_errno;
-	enum { NO_CHANGE, SET_TO_MAX, INCREASED_MAX } set_rlimit = NO_CHANGE;
+	int nthreads;
 
 	if ((perf_missing_features.write_backward && evsel->core.attr.write_backward) ||
 	    (perf_missing_features.aux_output     && evsel->core.attr.aux_output))
 		return -EINVAL;
 
 	if (cpus == NULL) {
-		static struct perf_cpu_map *empty_cpu_map;
-
 		if (empty_cpu_map == NULL) {
 			empty_cpu_map = perf_cpu_map__dummy_new();
 			if (empty_cpu_map == NULL)
@@ -1772,8 +1766,6 @@ static int evsel__open_cpu(struct evsel *evsel, struct perf_cpu_map *cpus,
 	}
 
 	if (threads == NULL) {
-		static struct perf_thread_map *empty_thread_map;
-
 		if (empty_thread_map == NULL) {
 			empty_thread_map = thread_map__new_by_tid(-1);
 			if (empty_thread_map == NULL)
@@ -1792,12 +1784,15 @@ static int evsel__open_cpu(struct evsel *evsel, struct perf_cpu_map *cpus,
 	    perf_evsel__alloc_fd(&evsel->core, cpus->nr, nthreads) < 0)
 		return -ENOMEM;
 
-	if (evsel->cgrp) {
-		flags |= PERF_FLAG_PID_CGROUP;
-		pid = evsel->cgrp->fd;
-	}
+	evsel->open_flags = PERF_FLAG_FD_CLOEXEC;
+	if (evsel->cgrp)
+		evsel->open_flags |= PERF_FLAG_PID_CGROUP;
 
-fallback_missing_features:
+	return 0;
+}
+
+static void evsel__disable_missing_features(struct evsel *evsel)
+{
 	if (perf_missing_features.weight_struct) {
 		evsel__set_sample_bit(evsel, WEIGHT);
 		evsel__reset_sample_bit(evsel, WEIGHT_STRUCT);
@@ -1809,7 +1804,7 @@ fallback_missing_features:
 		evsel->core.attr.clockid = 0;
 	}
 	if (perf_missing_features.cloexec)
-		flags &= ~(unsigned long)PERF_FLAG_FD_CLOEXEC;
+		evsel->open_flags &= ~(unsigned long)PERF_FLAG_FD_CLOEXEC;
 	if (perf_missing_features.mmap2)
 		evsel->core.attr.mmap2 = 0;
 	if (perf_missing_features.exclude_guest)
@@ -1825,119 +1820,26 @@ fallback_missing_features:
 		evsel->core.attr.bpf_event = 0;
 	if (perf_missing_features.branch_hw_idx)
 		evsel->core.attr.branch_sample_type &= ~PERF_SAMPLE_BRANCH_HW_INDEX;
-retry_sample_id:
 	if (perf_missing_features.sample_id_all)
 		evsel->core.attr.sample_id_all = 0;
+}
 
-	display_attr(&evsel->core.attr);
-
-	for (cpu = start_cpu; cpu < end_cpu; cpu++) {
-
-		for (thread = 0; thread < nthreads; thread++) {
-			int fd, group_fd;
-
-			if (!evsel->cgrp && !evsel->core.system_wide)
-				pid = perf_thread_map__pid(threads, thread);
-
-			group_fd = get_group_fd(evsel, cpu, thread);
-retry_open:
-			test_attr__ready();
-
-			fd = perf_event_open(evsel, pid, cpus->map[cpu],
-					     group_fd, flags);
-
-			FD(evsel, cpu, thread) = fd;
-
-			bpf_counter__install_pe(evsel, cpu, fd);
-
-			if (unlikely(test_attr__enabled)) {
-				test_attr__open(&evsel->core.attr, pid, cpus->map[cpu],
-						fd, group_fd, flags);
-			}
-
-			if (fd < 0) {
-				err = -errno;
-
-				if (ignore_missing_thread(evsel, cpus->nr, cpu, threads, thread, err)) {
-					/*
-					 * We just removed 1 thread, so take a step
-					 * back on thread index and lower the upper
-					 * nthreads limit.
-					 */
-					nthreads--;
-					thread--;
-
-					/* ... and pretend like nothing have happened. */
-					err = 0;
-					continue;
-				}
-
-				pr_debug2_peo("\nsys_perf_event_open failed, error %d\n",
-					  err);
-				goto try_fallback;
-			}
-
-			pr_debug2_peo(" = %d\n", fd);
-
-			if (evsel->bpf_fd >= 0) {
-				int evt_fd = fd;
-				int bpf_fd = evsel->bpf_fd;
-
-				err = ioctl(evt_fd,
-					    PERF_EVENT_IOC_SET_BPF,
-					    bpf_fd);
-				if (err && errno != EEXIST) {
-					pr_err("failed to attach bpf fd %d: %s\n",
-					       bpf_fd, strerror(errno));
-					err = -EINVAL;
-					goto out_close;
-				}
-			}
-
-			set_rlimit = NO_CHANGE;
-
-			/*
-			 * If we succeeded but had to kill clockid, fail and
-			 * have evsel__open_strerror() print us a nice error.
-			 */
-			if (perf_missing_features.clockid ||
-			    perf_missing_features.clockid_wrong) {
-				err = -EINVAL;
-				goto out_close;
-			}
-		}
-	}
-
-	return 0;
+int evsel__prepare_open(struct evsel *evsel, struct perf_cpu_map *cpus,
+			struct perf_thread_map *threads)
+{
+	int err;
 
-try_fallback:
-	/*
-	 * perf stat needs between 5 and 22 fds per CPU. When we run out
-	 * of them try to increase the limits.
-	 */
-	if (err == -EMFILE && set_rlimit < INCREASED_MAX) {
-		struct rlimit l;
+	err = __evsel__prepare_open(evsel, cpus, threads);
+	if (err)
+		return err;
 
-		old_errno = errno;
-		if (getrlimit(RLIMIT_NOFILE, &l) == 0) {
-			if (set_rlimit == NO_CHANGE)
-				l.rlim_cur = l.rlim_max;
-			else {
-				l.rlim_cur = l.rlim_max + 1000;
-				l.rlim_max = l.rlim_cur;
-			}
-			if (setrlimit(RLIMIT_NOFILE, &l) == 0) {
-				set_rlimit++;
-				errno = old_errno;
-				goto retry_open;
-			}
-		}
-		errno = old_errno;
-	}
+	evsel__disable_missing_features(evsel);
 
-	if (err != -EINVAL || cpu > 0 || thread > 0)
-		goto out_close;
+	return err;
+}
 
+bool evsel__detect_missing_features(struct evsel *evsel)
+{
 	/*
 	 * Must probe features in the order they were added to the
 	 * perf_event_attr interface.
@@ -1946,82 +1848,239 @@ try_fallback:
 	    (evsel->core.attr.sample_type & PERF_SAMPLE_WEIGHT_STRUCT)) {
 		perf_missing_features.weight_struct = true;
 		pr_debug2("switching off weight struct support\n");
-		goto fallback_missing_features;
+		return true;
 	} else if (!perf_missing_features.code_page_size &&
 	    (evsel->core.attr.sample_type & PERF_SAMPLE_CODE_PAGE_SIZE)) {
 		perf_missing_features.code_page_size = true;
 		pr_debug2_peo("Kernel has no PERF_SAMPLE_CODE_PAGE_SIZE support, bailing out\n");
-		goto out_close;
+		return false;
 	} else if (!perf_missing_features.data_page_size &&
 	    (evsel->core.attr.sample_type & PERF_SAMPLE_DATA_PAGE_SIZE)) {
 		perf_missing_features.data_page_size = true;
 		pr_debug2_peo("Kernel has no PERF_SAMPLE_DATA_PAGE_SIZE support, bailing out\n");
-		goto out_close;
+		return false;
 	} else if (!perf_missing_features.cgroup && evsel->core.attr.cgroup) {
 		perf_missing_features.cgroup = true;
 		pr_debug2_peo("Kernel has no cgroup sampling support, bailing out\n");
-		goto out_close;
-        } else if (!perf_missing_features.branch_hw_idx &&
+		return false;
+	} else if (!perf_missing_features.branch_hw_idx &&
 	    (evsel->core.attr.branch_sample_type & PERF_SAMPLE_BRANCH_HW_INDEX)) {
 		perf_missing_features.branch_hw_idx = true;
 		pr_debug2("switching off branch HW index support\n");
-		goto fallback_missing_features;
+		return true;
 	} else if (!perf_missing_features.aux_output && evsel->core.attr.aux_output) {
 		perf_missing_features.aux_output = true;
 		pr_debug2_peo("Kernel has no attr.aux_output support, bailing out\n");
-		goto out_close;
+		return false;
 	} else if (!perf_missing_features.bpf && evsel->core.attr.bpf_event) {
 		perf_missing_features.bpf = true;
 		pr_debug2_peo("switching off bpf_event\n");
-		goto fallback_missing_features;
+		return true;
 	} else if (!perf_missing_features.ksymbol && evsel->core.attr.ksymbol) {
 		perf_missing_features.ksymbol = true;
 		pr_debug2_peo("switching off ksymbol\n");
-		goto fallback_missing_features;
+		return true;
 	} else if (!perf_missing_features.write_backward && evsel->core.attr.write_backward) {
 		perf_missing_features.write_backward = true;
 		pr_debug2_peo("switching off write_backward\n");
-		goto out_close;
+		return false;
 	} else if (!perf_missing_features.clockid_wrong && evsel->core.attr.use_clockid) {
 		perf_missing_features.clockid_wrong = true;
 		pr_debug2_peo("switching off clockid\n");
-		goto fallback_missing_features;
+		return true;
 	} else if (!perf_missing_features.clockid && evsel->core.attr.use_clockid) {
 		perf_missing_features.clockid = true;
 		pr_debug2_peo("switching off use_clockid\n");
-		goto fallback_missing_features;
-	} else if (!perf_missing_features.cloexec && (flags & PERF_FLAG_FD_CLOEXEC)) {
+		return true;
+	} else if (!perf_missing_features.cloexec && (evsel->open_flags & PERF_FLAG_FD_CLOEXEC)) {
 		perf_missing_features.cloexec = true;
 		pr_debug2_peo("switching off cloexec flag\n");
-		goto fallback_missing_features;
+		return true;
 	} else if (!perf_missing_features.mmap2 && evsel->core.attr.mmap2) {
 		perf_missing_features.mmap2 = true;
 		pr_debug2_peo("switching off mmap2\n");
-		goto fallback_missing_features;
+		return true;
 	} else if (!perf_missing_features.exclude_guest &&
 		   (evsel->core.attr.exclude_guest || evsel->core.attr.exclude_host)) {
 		perf_missing_features.exclude_guest = true;
 		pr_debug2_peo("switching off exclude_guest, exclude_host\n");
-		goto fallback_missing_features;
+		return true;
 	} else if (!perf_missing_features.sample_id_all) {
 		perf_missing_features.sample_id_all = true;
 		pr_debug2_peo("switching off sample_id_all\n");
-		goto retry_sample_id;
+		return true;
 	} else if (!perf_missing_features.lbr_flags &&
 			(evsel->core.attr.branch_sample_type &
 			 (PERF_SAMPLE_BRANCH_NO_CYCLES |
 			  PERF_SAMPLE_BRANCH_NO_FLAGS))) {
 		perf_missing_features.lbr_flags = true;
 		pr_debug2_peo("switching off branch sample type no (cycles/flags)\n");
-		goto fallback_missing_features;
+		return true;
 	} else if (!perf_missing_features.group_read &&
 		    evsel->core.attr.inherit &&
 		   (evsel->core.attr.read_format & PERF_FORMAT_GROUP) &&
 		   evsel__is_group_leader(evsel)) {
 		perf_missing_features.group_read = true;
 		pr_debug2_peo("switching off group read\n");
-		goto fallback_missing_features;
+		return true;
+	} else {
+		return false;
+	}
+}
+
+bool evsel__increase_rlimit(enum rlimit_action *set_rlimit)
+{
+	int old_errno;
+	struct rlimit l;
+
+	if (*set_rlimit < INCREASED_MAX) {
+		old_errno = errno;
+
+		if (getrlimit(RLIMIT_NOFILE, &l) == 0) {
+			if (*set_rlimit == NO_CHANGE) {
+				l.rlim_cur = l.rlim_max;
+			} else {
+				l.rlim_cur = l.rlim_max + 1000;
+				l.rlim_max = l.rlim_cur;
+			}
+			if (setrlimit(RLIMIT_NOFILE, &l) == 0) {
+				(*set_rlimit) += 1;
+				errno = old_errno;
+				return true;
+			}
+		}
+		errno = old_errno;
+	}
+
+	return false;
+}
+
+static int evsel__open_cpu(struct evsel *evsel, struct perf_cpu_map *cpus,
+		struct perf_thread_map *threads,
+		int start_cpu, int end_cpu)
+{
+	int cpu, thread, nthreads;
+	int pid = -1, err, old_errno;
+	enum rlimit_action set_rlimit = NO_CHANGE;
+
+	err = __evsel__prepare_open(evsel, cpus, threads);
+	if (err)
+		return err;
+
+	if (cpus == NULL)
+		cpus = empty_cpu_map;
+
+	if (threads == NULL)
+		threads = empty_thread_map;
+
+	if (evsel->core.system_wide)
+		nthreads = 1;
+	else
+		nthreads = threads->nr;
+
+	if (evsel->cgrp)
+		pid = evsel->cgrp->fd;
+
+fallback_missing_features:
+	evsel__disable_missing_features(evsel);
+
+	display_attr(&evsel->core.attr);
+
+	for (cpu = start_cpu; cpu < end_cpu; cpu++) {
+
+		for (thread = 0; thread < nthreads; thread++) {
+			int fd, group_fd;
+retry_open:
+			if (thread >= nthreads)
+				break;
+
+			if (!evsel->cgrp && !evsel->core.system_wide)
+				pid = perf_thread_map__pid(threads, thread);
+
+			group_fd = get_group_fd(evsel, cpu, thread);
+
+			test_attr__ready();
+
+			pr_debug2_peo("sys_perf_event_open: pid %d  cpu %d  group_fd %d  flags %#lx",
+				pid, cpus->map[cpu], group_fd, evsel->open_flags);
+
+			fd = sys_perf_event_open(&evsel->core.attr, pid, cpus->map[cpu],
+						group_fd, evsel->open_flags);
+
+			FD(evsel, cpu, thread) = fd;
+
+			if (fd < 0) {
+				err = -errno;
+
+				pr_debug2_peo("\nsys_perf_event_open failed, error %d\n",
+					  err);
+				goto try_fallback;
+			}
+
+			bpf_counter__install_pe(evsel, cpu, fd);
+
+			if (unlikely(test_attr__enabled)) {
+				test_attr__open(&evsel->core.attr, pid, cpus->map[cpu],
+						fd, group_fd, evsel->open_flags);
+			}
+
+			pr_debug2_peo(" = %d\n", fd);
+
+			if (evsel->bpf_fd >= 0) {
+				int evt_fd = fd;
+				int bpf_fd = evsel->bpf_fd;
+
+				err = ioctl(evt_fd,
+					    PERF_EVENT_IOC_SET_BPF,
+					    bpf_fd);
+				if (err && errno != EEXIST) {
+					pr_err("failed to attach bpf fd %d: %s\n",
+					       bpf_fd, strerror(errno));
+					err = -EINVAL;
+					goto out_close;
+				}
+			}
+
+			set_rlimit = NO_CHANGE;
+
+			/*
+			 * If we succeeded but had to kill clockid, fail and
+			 * have evsel__open_strerror() print us a nice error.
+			 */
+			if (perf_missing_features.clockid ||
+			    perf_missing_features.clockid_wrong) {
+				err = -EINVAL;
+				goto out_close;
+			}
+		}
+	}
+
+	return 0;
+
+try_fallback:
+	if (evsel__precise_ip_fallback(evsel))
+		goto retry_open;
+
+	if (evsel__ignore_missing_thread(evsel, cpus->nr, cpu, threads, thread, err)) {
+		/* We just removed 1 thread, so lower the upper nthreads limit. */
+		nthreads--;
+
+		/* ... and pretend like nothing have happened. */
+		err = 0;
+		goto retry_open;
 	}
+	/*
+	 * perf stat needs between 5 and 22 fds per CPU. When we run out
+	 * of them try to increase the limits.
+	 */
+	if (err == -EMFILE && evsel__increase_rlimit(&set_rlimit))
+		goto retry_open;
+
+	if (err != -EINVAL || cpu > 0 || thread > 0)
+		goto out_close;
+
+	if (evsel__detect_missing_features(evsel))
+		goto fallback_missing_features;
 out_close:
 	if (err)
 		threads->err_thread = thread;
diff --git a/tools/perf/util/evsel.h b/tools/perf/util/evsel.h
index 80383096d51c..1f7edfa8568a 100644
--- a/tools/perf/util/evsel.h
+++ b/tools/perf/util/evsel.h
@@ -150,6 +150,8 @@ struct evsel {
 		struct bperf_leader_bpf *leader_skel;
 		struct bperf_follower_bpf *follower_skel;
 	};
+	unsigned long		open_flags;
+	int			precise_ip_original;
 };
 
 struct perf_missing_features {
@@ -211,6 +213,9 @@ static inline struct evsel *evsel__new(struct perf_event_attr *attr)
 struct evsel *evsel__clone(struct evsel *orig);
 struct evsel *evsel__newtp_idx(const char *sys, const char *name, int idx);
 
+int copy_config_terms(struct list_head *dst, struct list_head *src);
+void free_config_terms(struct list_head *config_terms);
+
 /*
  * Returns pointer with encoded error via <linux/err.h> interface.
  */
@@ -286,6 +291,18 @@ int evsel__open_per_thread(struct evsel *evsel, struct perf_thread_map *threads)
 int evsel__open(struct evsel *evsel, struct perf_cpu_map *cpus,
 		struct perf_thread_map *threads);
 void evsel__close(struct evsel *evsel);
+int evsel__prepare_open(struct evsel *evsel, struct perf_cpu_map *cpus,
+		struct perf_thread_map *threads);
+bool evsel__detect_missing_features(struct evsel *evsel);
+
+enum rlimit_action { NO_CHANGE, SET_TO_MAX, INCREASED_MAX };
+bool evsel__increase_rlimit(enum rlimit_action *set_rlimit);
+
+bool evsel__ignore_missing_thread(struct evsel *evsel,
+				  int nr_cpus, int cpu,
+				  struct perf_thread_map *threads,
+				  int thread, int err);
+bool evsel__precise_ip_fallback(struct evsel *evsel);
 
 struct perf_sample;
 
diff --git a/tools/perf/util/get_current_dir_name.c b/tools/perf/util/get_current_dir_name.c
index b205d929245f..e68935e9ac8c 100644
--- a/tools/perf/util/get_current_dir_name.c
+++ b/tools/perf/util/get_current_dir_name.c
@@ -3,8 +3,9 @@
 //
 #ifndef HAVE_GET_CURRENT_DIR_NAME
 #include "get_current_dir_name.h"
+#include <limits.h>
+#include <string.h>
 #include <unistd.h>
-#include <stdlib.h>
 
 /* Android's 'bionic' library, for one, doesn't have this */
 
diff --git a/tools/perf/util/header.c b/tools/perf/util/header.c
index 44249027507a..1c7414f66655 100644
--- a/tools/perf/util/header.c
+++ b/tools/perf/util/header.c
@@ -278,7 +278,7 @@ static int do_read_bitmap(struct feat_fd *ff, unsigned long **pset, u64 *psize)
 	if (ret)
 		return ret;
 
-	set = bitmap_alloc(size);
+	set = bitmap_zalloc(size);
 	if (!set)
 		return -ENOMEM;
 
@@ -1284,7 +1284,7 @@ static int memory_node__read(struct memory_node *n, unsigned long idx)
 
 	dir = opendir(path);
 	if (!dir) {
-		pr_warning("failed: cant' open memory sysfs data\n");
+		pr_warning("failed: can't open memory sysfs data\n");
 		return -1;
 	}
 
@@ -1294,7 +1294,7 @@ static int memory_node__read(struct memory_node *n, unsigned long idx)
 
 	size++;
 
-	n->set = bitmap_alloc(size);
+	n->set = bitmap_zalloc(size);
 	if (!n->set) {
 		closedir(dir);
 		return -ENOMEM;
@@ -3865,10 +3865,10 @@ static int perf_file_section__process(struct perf_file_section *section,
 static int perf_file_header__read_pipe(struct perf_pipe_file_header *header,
 				       struct perf_header *ph,
 				       struct perf_data* data,
-				       bool repipe)
+				       bool repipe, int repipe_fd)
 {
 	struct feat_fd ff = {
-		.fd = STDOUT_FILENO,
+		.fd = repipe_fd,
 		.ph = ph,
 	};
 	ssize_t ret;
@@ -3891,13 +3891,13 @@ static int perf_file_header__read_pipe(struct perf_pipe_file_header *header,
 	return 0;
 }
 
-static int perf_header__read_pipe(struct perf_session *session)
+static int perf_header__read_pipe(struct perf_session *session, int repipe_fd)
 {
 	struct perf_header *header = &session->header;
 	struct perf_pipe_file_header f_header;
 
 	if (perf_file_header__read_pipe(&f_header, header, session->data,
-					session->repipe) < 0) {
+					session->repipe, repipe_fd) < 0) {
 		pr_debug("incompatible file format\n");
 		return -EINVAL;
 	}
@@ -3995,7 +3995,7 @@ static int evlist__prepare_tracepoint_events(struct evlist *evlist, struct tep_h
 	return 0;
 }
 
-int perf_session__read_header(struct perf_session *session)
+int perf_session__read_header(struct perf_session *session, int repipe_fd)
 {
 	struct perf_data *data = session->data;
 	struct perf_header *header = &session->header;
@@ -4016,7 +4016,7 @@ int perf_session__read_header(struct perf_session *session)
 	 * We can read 'pipe' data event from regular file,
 	 * check for the pipe header regardless of source.
 	 */
-	err = perf_header__read_pipe(session);
+	err = perf_header__read_pipe(session, repipe_fd);
 	if (!err || perf_data__is_pipe(data)) {
 		data->is_pipe = true;
 		return err;
diff --git a/tools/perf/util/header.h b/tools/perf/util/header.h
index ae6b1cf19a7d..c9e3265832d9 100644
--- a/tools/perf/util/header.h
+++ b/tools/perf/util/header.h
@@ -115,7 +115,7 @@ struct perf_session;
 struct perf_tool;
 union perf_event;
 
-int perf_session__read_header(struct perf_session *session);
+int perf_session__read_header(struct perf_session *session, int repipe_fd);
 int perf_session__write_header(struct perf_session *session,
 			       struct evlist *evlist,
 			       int fd, bool at_exit);
diff --git a/tools/perf/util/llvm-utils.c b/tools/perf/util/llvm-utils.c
index cbd9b268f168..96c8ef60f4f8 100644
--- a/tools/perf/util/llvm-utils.c
+++ b/tools/perf/util/llvm-utils.c
@@ -38,6 +38,8 @@ struct llvm_param llvm_param = {
 	.user_set_param = false,
 };
 
+static void version_notice(void);
+
 int perf_llvm_config(const char *var, const char *value)
 {
 	if (!strstarts(var, "llvm."))
@@ -108,6 +110,21 @@ search_program(const char *def, const char *name,
 	return ret;
 }
 
+static int search_program_and_warn(const char *def, const char *name,
+				   char *output)
+{
+	int ret = search_program(def, name, output);
+
+	if (ret) {
+		pr_err("ERROR:\tunable to find %s.\n"
+		       "Hint:\tTry to install latest clang/llvm to support BPF. Check your $PATH\n"
+		       "     \tand '%s-path' option in [llvm] section of ~/.perfconfig.\n",
+		       name, name);
+		version_notice();
+	}
+	return ret;
+}
+
 #define READ_SIZE	4096
 static int
 read_from_pipe(const char *cmd, void **p_buf, size_t *p_read_sz)
@@ -217,7 +234,7 @@ version_notice(void)
 "     \t\tgit clone http://llvm.org/git/clang.git\n\n"
 "     \tOr fetch the latest clang/llvm 3.7 from pre-built llvm packages for\n"
 "     \tdebian/ubuntu:\n"
-"     \t\thttp://llvm.org/apt\n\n"
+"     \t\thttps://apt.llvm.org/\n\n"
 "     \tIf you are using old version of clang, change 'clang-bpf-cmd-template'\n"
 "     \toption in [llvm] section of ~/.perfconfig to:\n\n"
 "     \t  \"$CLANG_EXEC $CLANG_OPTIONS $KERNEL_INC_OPTIONS $PERF_BPF_INC_OPTIONS \\\n"
@@ -458,16 +475,10 @@ int llvm__compile_bpf(const char *path, void **p_obj_buf,
 	if (!template)
 		template = CLANG_BPF_CMD_DEFAULT_TEMPLATE;
 
-	err = search_program(llvm_param.clang_path,
+	err = search_program_and_warn(llvm_param.clang_path,
 			     "clang", clang_path);
-	if (err) {
-		pr_err(
-"ERROR:\tunable to find clang.\n"
-"Hint:\tTry to install latest clang/llvm to support BPF. Check your $PATH\n"
-"     \tand 'clang-path' option in [llvm] section of ~/.perfconfig.\n");
-		version_notice();
+	if (err)
 		return -ENOENT;
-	}
 
 	/*
 	 * This is an optional work. Even it fail we can continue our
@@ -495,14 +506,9 @@ int llvm__compile_bpf(const char *path, void **p_obj_buf,
 	force_set_env("WORKING_DIR", kbuild_dir ? : ".");
 
 	if (opts) {
-		err = search_program(llvm_param.llc_path, "llc", llc_path);
-		if (err) {
-			pr_err("ERROR:\tunable to find llc.\n"
-			       "Hint:\tTry to install latest clang/llvm to support BPF. Check your $PATH\n"
-			       "     \tand 'llc-path' option in [llvm] section of ~/.perfconfig.\n");
-			version_notice();
+		err = search_program_and_warn(llvm_param.llc_path, "llc", llc_path);
+		if (err)
 			goto errout;
-		}
 
 		err = -ENOMEM;
 		if (asprintf(&pipe_template, "%s -emit-llvm | %s -march=bpf %s -filetype=obj -o -",
@@ -579,5 +585,5 @@ int llvm__search_clang(void)
 {
 	char clang_path[PATH_MAX];
 
-	return search_program(llvm_param.clang_path, "clang", clang_path);
+	return search_program_and_warn(llvm_param.clang_path, "clang", clang_path);
 }
diff --git a/tools/perf/util/metricgroup.c b/tools/perf/util/metricgroup.c
index 99d047c5ead0..29b747ac31c1 100644
--- a/tools/perf/util/metricgroup.c
+++ b/tools/perf/util/metricgroup.c
@@ -313,7 +313,7 @@ static int metricgroup__setup_events(struct list_head *groups,
 	struct evsel *evsel, *tmp;
 	unsigned long *evlist_used;
 
-	evlist_used = bitmap_alloc(perf_evlist->core.nr_entries);
+	evlist_used = bitmap_zalloc(perf_evlist->core.nr_entries);
 	if (!evlist_used)
 		return -ENOMEM;
 
diff --git a/tools/perf/util/mmap.c b/tools/perf/util/mmap.c
index ab7108d22428..512dc8b9c168 100644
--- a/tools/perf/util/mmap.c
+++ b/tools/perf/util/mmap.c
@@ -106,7 +106,7 @@ static int perf_mmap__aio_bind(struct mmap *map, int idx, int cpu, int affinity)
 		data = map->aio.data[idx];
 		mmap_len = mmap__mmap_len(map);
 		node_index = cpu__get_node(cpu);
-		node_mask = bitmap_alloc(node_index + 1);
+		node_mask = bitmap_zalloc(node_index + 1);
 		if (!node_mask) {
 			pr_err("Failed to allocate node mask for mbind: error %m\n");
 			return -1;
@@ -258,7 +258,7 @@ static void build_node_mask(int node, struct mmap_cpu_mask *mask)
 static int perf_mmap__setup_affinity_mask(struct mmap *map, struct mmap_params *mp)
 {
 	map->affinity_mask.nbits = cpu__max_cpu();
-	map->affinity_mask.bits = bitmap_alloc(map->affinity_mask.nbits);
+	map->affinity_mask.bits = bitmap_zalloc(map->affinity_mask.nbits);
 	if (!map->affinity_mask.bits)
 		return -1;
 
diff --git a/tools/perf/util/mmap.h b/tools/perf/util/mmap.h
index 9d5f589f02ae..af33118354dd 100644
--- a/tools/perf/util/mmap.h
+++ b/tools/perf/util/mmap.h
@@ -6,6 +6,7 @@
 #include <linux/refcount.h>
 #include <linux/types.h>
 #include <linux/ring_buffer.h>
+#include <linux/bitops.h>
 #include <stdbool.h>
 #include <pthread.h> // for cpu_set_t
 #ifdef HAVE_AIO_SUPPORT
diff --git a/tools/perf/util/parse-events-hybrid.c b/tools/perf/util/parse-events-hybrid.c
index 10160ab126f9..b234d95fb10a 100644
--- a/tools/perf/util/parse-events-hybrid.c
+++ b/tools/perf/util/parse-events-hybrid.c
@@ -76,12 +76,16 @@ static int add_hw_hybrid(struct parse_events_state *parse_state,
 	int ret;
 
 	perf_pmu__for_each_hybrid_pmu(pmu) {
+		LIST_HEAD(terms);
+
 		if (pmu_cmp(parse_state, pmu))
 			continue;
 
+		copy_config_terms(&terms, config_terms);
 		ret = create_event_hybrid(PERF_TYPE_HARDWARE,
 					  &parse_state->idx, list, attr, name,
-					  config_terms, pmu);
+					  &terms, pmu);
+		free_config_terms(&terms);
 		if (ret)
 			return ret;
 	}
@@ -115,11 +119,15 @@ static int add_raw_hybrid(struct parse_events_state *parse_state,
 	int ret;
 
 	perf_pmu__for_each_hybrid_pmu(pmu) {
+		LIST_HEAD(terms);
+
 		if (pmu_cmp(parse_state, pmu))
 			continue;
 
+		copy_config_terms(&terms, config_terms);
 		ret = create_raw_event_hybrid(&parse_state->idx, list, attr,
-					      name, config_terms, pmu);
+					      name, &terms, pmu);
+		free_config_terms(&terms);
 		if (ret)
 			return ret;
 	}
@@ -165,11 +173,15 @@ int parse_events__add_cache_hybrid(struct list_head *list, int *idx,
 
 	*hybrid = true;
 	perf_pmu__for_each_hybrid_pmu(pmu) {
+		LIST_HEAD(terms);
+
 		if (pmu_cmp(parse_state, pmu))
 			continue;
 
+		copy_config_terms(&terms, config_terms);
 		ret = create_event_hybrid(PERF_TYPE_HW_CACHE, idx, list,
-					  attr, name, config_terms, pmu);
+					  attr, name, &terms, pmu);
+		free_config_terms(&terms);
 		if (ret)
 			return ret;
 	}
diff --git a/tools/perf/util/parse-events.c b/tools/perf/util/parse-events.c
index e5eae23cfceb..51a2219df601 100644
--- a/tools/perf/util/parse-events.c
+++ b/tools/perf/util/parse-events.c
@@ -387,7 +387,7 @@ __add_event(struct list_head *list, int *idx,
 		evsel->name = strdup(name);
 
 	if (config_terms)
-		list_splice(config_terms, &evsel->config_terms);
+		list_splice_init(config_terms, &evsel->config_terms);
 
 	if (list)
 		list_add_tail(&evsel->core.node, list);
@@ -535,9 +535,12 @@ int parse_events_add_cache(struct list_head *list, int *idx,
 					     config_name ? : name, &config_terms,
 					     &hybrid, parse_state);
 	if (hybrid)
-		return ret;
+		goto out_free_terms;
 
-	return add_event(list, idx, &attr, config_name ? : name, &config_terms);
+	ret = add_event(list, idx, &attr, config_name ? : name, &config_terms);
+out_free_terms:
+	free_config_terms(&config_terms);
+	return ret;
 }
 
 static void tracepoint_error(struct parse_events_error *e, int err,
@@ -1457,10 +1460,13 @@ int parse_events_add_numeric(struct parse_events_state *parse_state,
 					       get_config_name(head_config),
 					       &config_terms, &hybrid);
 	if (hybrid)
-		return ret;
+		goto out_free_terms;
 
-	return add_event(list, &parse_state->idx, &attr,
-			 get_config_name(head_config), &config_terms);
+	ret = add_event(list, &parse_state->idx, &attr,
+			get_config_name(head_config), &config_terms);
+out_free_terms:
+	free_config_terms(&config_terms);
+	return ret;
 }
 
 int parse_events_add_tool(struct parse_events_state *parse_state,
@@ -1608,14 +1614,7 @@ int parse_events_add_pmu(struct parse_events_state *parse_state,
 	}
 
 	if (!parse_state->fake_pmu && perf_pmu__config(pmu, &attr, head_config, parse_state->error)) {
-		struct evsel_config_term *pos, *tmp;
-
-		list_for_each_entry_safe(pos, tmp, &config_terms, list) {
-			list_del_init(&pos->list);
-			if (pos->free_str)
-				zfree(&pos->val.str);
-			free(pos);
-		}
+		free_config_terms(&config_terms);
 		return -EINVAL;
 	}
 
diff --git a/tools/perf/util/parse-events.y b/tools/perf/util/parse-events.y
index 9321bd0e2f76..d94e48e1ff9b 100644
--- a/tools/perf/util/parse-events.y
+++ b/tools/perf/util/parse-events.y
@@ -316,7 +316,8 @@ event_pmu_name opt_pmu_config
 			if (!strncmp(name, "uncore_", 7) &&
 			    strncmp($1, "uncore_", 7))
 				name += 7;
-			if (!perf_pmu__match(pattern, name, $1)) {
+			if (!perf_pmu__match(pattern, name, $1) ||
+			    !perf_pmu__match(pattern, pmu->alias_name, $1)) {
 				if (parse_events_copy_term_list(orig_terms, &terms))
 					CLEANUP_YYABORT;
 				if (!parse_events_add_pmu(_parse_state, list, pmu->name, terms, true, false))
diff --git a/tools/perf/util/parse-sublevel-options.h b/tools/perf/util/parse-sublevel-options.h
index 9b9efcc2aaad..578b18ef03bb 100644
--- a/tools/perf/util/parse-sublevel-options.h
+++ b/tools/perf/util/parse-sublevel-options.h
@@ -8,4 +8,4 @@ struct sublevel_option {
 
 int perf_parse_sublevel_options(const char *str, struct sublevel_option *opts);
 
-#endif
-\ No newline at end of file
+#endif
diff --git a/tools/perf/util/perf_event_attr_fprintf.c b/tools/perf/util/perf_event_attr_fprintf.c
index 30481825515b..47b7531f51da 100644
--- a/tools/perf/util/perf_event_attr_fprintf.c
+++ b/tools/perf/util/perf_event_attr_fprintf.c
@@ -137,6 +137,9 @@ int perf_event_attr__fprintf(FILE *fp, struct perf_event_attr *attr,
 	PRINT_ATTRf(cgroup, p_unsigned);
 	PRINT_ATTRf(text_poke, p_unsigned);
 	PRINT_ATTRf(build_id, p_unsigned);
+	PRINT_ATTRf(inherit_thread, p_unsigned);
+	PRINT_ATTRf(remove_on_exec, p_unsigned);
+	PRINT_ATTRf(sigtrap, p_unsigned);
 
 	PRINT_ATTRn("{ wakeup_events, wakeup_watermark }", wakeup_events, p_unsigned);
 	PRINT_ATTRf(bp_type, p_unsigned);
@@ -150,7 +153,7 @@ int perf_event_attr__fprintf(FILE *fp, struct perf_event_attr *attr,
 	PRINT_ATTRf(aux_watermark, p_unsigned);
 	PRINT_ATTRf(sample_max_stack, p_unsigned);
 	PRINT_ATTRf(aux_sample_size, p_unsigned);
-	PRINT_ATTRf(text_poke, p_unsigned);
+	PRINT_ATTRf(sig_data, p_unsigned);
 
 	return ret;
 }
diff --git a/tools/perf/util/pmu.c b/tools/perf/util/pmu.c
index fc683bc41715..bdabd62170d2 100644
--- a/tools/perf/util/pmu.c
+++ b/tools/perf/util/pmu.c
@@ -843,8 +843,7 @@ void pmu_add_cpu_aliases_map(struct list_head *head, struct perf_pmu *pmu,
 			break;
 		}
 
-		if (pmu_is_uncore(name) &&
-		    pmu_uncore_alias_match(pname, name))
+		if (pmu->is_uncore && pmu_uncore_alias_match(pname, name))
 			goto new_alias;
 
 		if (strcmp(pname, name))
@@ -927,7 +926,7 @@ static int pmu_add_sys_aliases_iter_fn(struct pmu_event *pe, void *data)
 	return 0;
 }
 
-static void pmu_add_sys_aliases(struct list_head *head, struct perf_pmu *pmu)
+void pmu_add_sys_aliases(struct list_head *head, struct perf_pmu *pmu)
 {
 	struct pmu_sys_event_iter_data idata = {
 		.head = head,
@@ -946,6 +945,18 @@ perf_pmu__get_default_config(struct perf_pmu *pmu __maybe_unused)
 	return NULL;
 }
 
+char * __weak
+pmu_find_real_name(const char *name)
+{
+	return (char *)name;
+}
+
+char * __weak
+pmu_find_alias_name(const char *name __maybe_unused)
+{
+	return NULL;
+}
+
 static int pmu_max_precise(const char *name)
 {
 	char path[PATH_MAX];
@@ -959,13 +970,15 @@ static int pmu_max_precise(const char *name)
 	return max_precise;
 }
 
-static struct perf_pmu *pmu_lookup(const char *name)
+static struct perf_pmu *pmu_lookup(const char *lookup_name)
 {
 	struct perf_pmu *pmu;
 	LIST_HEAD(format);
 	LIST_HEAD(aliases);
 	__u32 type;
+	char *name = pmu_find_real_name(lookup_name);
 	bool is_hybrid = perf_pmu__hybrid_mounted(name);
+	char *alias_name;
 
 	/*
 	 * Check pmu name for hybrid and the pmu may be invalid in sysfs
@@ -996,6 +1009,16 @@ static struct perf_pmu *pmu_lookup(const char *name)
 
 	pmu->cpus = pmu_cpumask(name);
 	pmu->name = strdup(name);
+	if (!pmu->name)
+		goto err;
+
+	alias_name = pmu_find_alias_name(name);
+	if (alias_name) {
+		pmu->alias_name = strdup(alias_name);
+		if (!pmu->alias_name)
+			goto err;
+	}
+
 	pmu->type = type;
 	pmu->is_uncore = pmu_is_uncore(name);
 	if (pmu->is_uncore)
@@ -1018,15 +1041,22 @@ static struct perf_pmu *pmu_lookup(const char *name)
 	pmu->default_config = perf_pmu__get_default_config(pmu);
 
 	return pmu;
+err:
+	if (pmu->name)
+		free(pmu->name);
+	free(pmu);
+	return NULL;
 }
 
 static struct perf_pmu *pmu_find(const char *name)
 {
 	struct perf_pmu *pmu;
 
-	list_for_each_entry(pmu, &pmus, list)
-		if (!strcmp(pmu->name, name))
+	list_for_each_entry(pmu, &pmus, list) {
+		if (!strcmp(pmu->name, name) ||
+		    (pmu->alias_name && !strcmp(pmu->alias_name, name)))
 			return pmu;
+	}
 
 	return NULL;
 }
@@ -1920,6 +1950,9 @@ bool perf_pmu__has_hybrid(void)
 
 int perf_pmu__match(char *pattern, char *name, char *tok)
 {
+	if (!name)
+		return -1;
+
 	if (fnmatch(pattern, name, 0))
 		return -1;
 
@@ -1928,3 +1961,38 @@ int perf_pmu__match(char *pattern, char *name, char *tok)
 
 	return 0;
 }
+
+int perf_pmu__cpus_match(struct perf_pmu *pmu, struct perf_cpu_map *cpus,
+			 struct perf_cpu_map **mcpus_ptr,
+			 struct perf_cpu_map **ucpus_ptr)
+{
+	struct perf_cpu_map *pmu_cpus = pmu->cpus;
+	struct perf_cpu_map *matched_cpus, *unmatched_cpus;
+	int matched_nr = 0, unmatched_nr = 0;
+
+	matched_cpus = perf_cpu_map__default_new();
+	if (!matched_cpus)
+		return -1;
+
+	unmatched_cpus = perf_cpu_map__default_new();
+	if (!unmatched_cpus) {
+		perf_cpu_map__put(matched_cpus);
+		return -1;
+	}
+
+	for (int i = 0; i < cpus->nr; i++) {
+		int cpu;
+
+		cpu = perf_cpu_map__idx(pmu_cpus, cpus->map[i]);
+		if (cpu == -1)
+			unmatched_cpus->map[unmatched_nr++] = cpus->map[i];
+		else
+			matched_cpus->map[matched_nr++] = cpus->map[i];
+	}
+
+	unmatched_cpus->nr = unmatched_nr;
+	matched_cpus->nr = matched_nr;
+	*mcpus_ptr = matched_cpus;
+	*ucpus_ptr = unmatched_cpus;
+	return 0;
+}
diff --git a/tools/perf/util/pmu.h b/tools/perf/util/pmu.h
index 926da483a141..394898b07fd9 100644
--- a/tools/perf/util/pmu.h
+++ b/tools/perf/util/pmu.h
@@ -11,6 +11,7 @@
 #include "pmu-events/pmu-events.h"
 
 struct evsel_config_term;
+struct perf_cpu_map;
 
 enum {
 	PERF_PMU_FORMAT_VALUE_CONFIG,
@@ -21,6 +22,7 @@ enum {
 #define PERF_PMU_FORMAT_BITS 64
 #define EVENT_SOURCE_DEVICE_PATH "/bus/event_source/devices/"
 #define CPUS_TEMPLATE_CPU	"%s/bus/event_source/devices/%s/cpus"
+#define MAX_PMU_NAME_LEN 128
 
 struct perf_event_attr;
 
@@ -32,6 +34,7 @@ struct perf_pmu_caps {
 
 struct perf_pmu {
 	char *name;
+	char *alias_name;
 	char *id;
 	__u32 type;
 	bool selectable;
@@ -81,6 +84,7 @@ struct perf_pmu_alias {
 
 struct perf_pmu *perf_pmu__find(const char *name);
 struct perf_pmu *perf_pmu__find_by_type(unsigned int type);
+void pmu_add_sys_aliases(struct list_head *head, struct perf_pmu *pmu);
 int perf_pmu__config(struct perf_pmu *pmu, struct perf_event_attr *attr,
 		     struct list_head *head_terms,
 		     struct parse_events_error *error);
@@ -135,4 +139,10 @@ void perf_pmu__warn_invalid_config(struct perf_pmu *pmu, __u64 config,
 bool perf_pmu__has_hybrid(void);
 int perf_pmu__match(char *pattern, char *name, char *tok);
 
+int perf_pmu__cpus_match(struct perf_pmu *pmu, struct perf_cpu_map *cpus,
+			 struct perf_cpu_map **mcpus_ptr,
+			 struct perf_cpu_map **ucpus_ptr);
+
+char *pmu_find_real_name(const char *name);
+char *pmu_find_alias_name(const char *name);
 #endif /* __PMU_H */
diff --git a/tools/perf/util/sample-raw.c b/tools/perf/util/sample-raw.c
index cde5cd3ce49b..f3f6bd9d290e 100644
--- a/tools/perf/util/sample-raw.c
+++ b/tools/perf/util/sample-raw.c
@@ -1,8 +1,10 @@
 /* SPDX-License-Identifier: GPL-2.0 */
 
 #include <string.h>
+#include <linux/string.h>
 #include "evlist.h"
 #include "env.h"
+#include "header.h"
 #include "sample-raw.h"
 
 /*
@@ -12,7 +14,13 @@
 void evlist__init_trace_event_sample_raw(struct evlist *evlist)
 {
 	const char *arch_pf = perf_env__arch(evlist->env);
+	const char *cpuid = perf_env__cpuid(evlist->env);
 
 	if (arch_pf && !strcmp("s390", arch_pf))
 		evlist->trace_event_sample_raw = evlist__s390_sample_raw;
+	else if (arch_pf && !strcmp("x86", arch_pf) &&
+		 cpuid && strstarts(cpuid, "AuthenticAMD") &&
+		 evlist__has_amd_ibs(evlist)) {
+		evlist->trace_event_sample_raw = evlist__amd_sample_raw;
+	}
 }
diff --git a/tools/perf/util/sample-raw.h b/tools/perf/util/sample-raw.h
index 4be84a5510cf..ea01c5811503 100644
--- a/tools/perf/util/sample-raw.h
+++ b/tools/perf/util/sample-raw.h
@@ -6,6 +6,10 @@ struct evlist;
 union perf_event;
 struct perf_sample;
 
-void evlist__s390_sample_raw(struct evlist *evlist, union perf_event *event, struct perf_sample *sample);
+void evlist__s390_sample_raw(struct evlist *evlist, union perf_event *event,
+			     struct perf_sample *sample);
+bool evlist__has_amd_ibs(struct evlist *evlist);
+void evlist__amd_sample_raw(struct evlist *evlist, union perf_event *event,
+			    struct perf_sample *sample);
 void evlist__init_trace_event_sample_raw(struct evlist *evlist);
 #endif /* __PERF_EVLIST_H */
diff --git a/tools/perf/util/scripting-engines/trace-event-python.c b/tools/perf/util/scripting-engines/trace-event-python.c
index 69129e2aa7a1..c0c010350bc2 100644
--- a/tools/perf/util/scripting-engines/trace-event-python.c
+++ b/tools/perf/util/scripting-engines/trace-event-python.c
@@ -1422,6 +1422,37 @@ static void python_process_event(union perf_event *event,
 	}
 }
 
+static void python_process_throttle(union perf_event *event,
+				    struct perf_sample *sample,
+				    struct machine *machine)
+{
+	const char *handler_name;
+	PyObject *handler, *t;
+
+	if (event->header.type == PERF_RECORD_THROTTLE)
+		handler_name = "throttle";
+	else
+		handler_name = "unthrottle";
+	handler = get_handler(handler_name);
+	if (!handler)
+		return;
+
+	t = tuple_new(6);
+	if (!t)
+		return;
+
+	tuple_set_u64(t, 0, event->throttle.time);
+	tuple_set_u64(t, 1, event->throttle.id);
+	tuple_set_u64(t, 2, event->throttle.stream_id);
+	tuple_set_s32(t, 3, sample->cpu);
+	tuple_set_s32(t, 4, sample->pid);
+	tuple_set_s32(t, 5, sample->tid);
+
+	call_object(handler, t, handler_name);
+
+	Py_DECREF(t);
+}
+
 static void python_do_process_switch(union perf_event *event,
 				     struct perf_sample *sample,
 				     struct machine *machine)
@@ -2079,5 +2110,6 @@ struct scripting_ops python_scripting_ops = {
 	.process_auxtrace_error	= python_process_auxtrace_error,
 	.process_stat		= python_process_stat,
 	.process_stat_interval	= python_process_stat_interval,
+	.process_throttle	= python_process_throttle,
 	.generate_script	= python_generate_script,
 };
diff --git a/tools/perf/util/session.c b/tools/perf/util/session.c
index 51f727402912..069c2cfdd3be 100644
--- a/tools/perf/util/session.c
+++ b/tools/perf/util/session.c
@@ -102,11 +102,11 @@ static int perf_session__deliver_event(struct perf_session *session,
 				       struct perf_tool *tool,
 				       u64 file_offset);
 
-static int perf_session__open(struct perf_session *session)
+static int perf_session__open(struct perf_session *session, int repipe_fd)
 {
 	struct perf_data *data = session->data;
 
-	if (perf_session__read_header(session) < 0) {
+	if (perf_session__read_header(session, repipe_fd) < 0) {
 		pr_err("incompatible file format (rerun with -v to learn more)\n");
 		return -1;
 	}
@@ -185,8 +185,9 @@ static int ordered_events__deliver_event(struct ordered_events *oe,
 					   session->tool, event->file_offset);
 }
 
-struct perf_session *perf_session__new(struct perf_data *data,
-				       bool repipe, struct perf_tool *tool)
+struct perf_session *__perf_session__new(struct perf_data *data,
+					 bool repipe, int repipe_fd,
+					 struct perf_tool *tool)
 {
 	int ret = -ENOMEM;
 	struct perf_session *session = zalloc(sizeof(*session));
@@ -210,7 +211,7 @@ struct perf_session *perf_session__new(struct perf_data *data,
 		session->data = data;
 
 		if (perf_data__is_read(data)) {
-			ret = perf_session__open(session);
+			ret = perf_session__open(session, repipe_fd);
 			if (ret < 0)
 				goto out_delete;
 
@@ -1540,6 +1541,8 @@ static int machines__deliver_event(struct machines *machines,
 				evlist->stats.total_aux_lost += 1;
 			if (event->aux.flags & PERF_AUX_FLAG_PARTIAL)
 				evlist->stats.total_aux_partial += 1;
+			if (event->aux.flags & PERF_AUX_FLAG_COLLISION)
+				evlist->stats.total_aux_collision += 1;
 		}
 		return tool->aux(tool, event, sample, machine);
 	case PERF_RECORD_ITRACE_START:
@@ -1895,6 +1898,13 @@ static void perf_session__warn_about_errors(const struct perf_session *session)
 			    "");
 	}
 
+	if (session->tool->aux == perf_event__process_aux &&
+	    stats->total_aux_collision != 0) {
+		ui__warning("AUX data detected collision  %" PRIu64 " times out of %u!\n\n",
+			    stats->total_aux_collision,
+			    stats->nr_events[PERF_RECORD_AUX]);
+	}
+
 	if (stats->nr_unknown_events != 0) {
 		ui__warning("Found %u unknown events!\n\n"
 			    "Is this an older tool processing a perf.data "
diff --git a/tools/perf/util/session.h b/tools/perf/util/session.h
index e31ba4c92a6c..5d8bd14a0a39 100644
--- a/tools/perf/util/session.h
+++ b/tools/perf/util/session.h
@@ -54,8 +54,16 @@ struct decomp {
 
 struct perf_tool;
 
-struct perf_session *perf_session__new(struct perf_data *data,
-				       bool repipe, struct perf_tool *tool);
+struct perf_session *__perf_session__new(struct perf_data *data,
+					 bool repipe, int repipe_fd,
+					 struct perf_tool *tool);
+
+static inline struct perf_session *perf_session__new(struct perf_data *data,
+						     struct perf_tool *tool)
+{
+	return __perf_session__new(data, false, -1, tool);
+}
+
 void perf_session__delete(struct perf_session *session);
 
 void perf_event_header__bswap(struct perf_event_header *hdr);
diff --git a/tools/perf/util/symbol.c b/tools/perf/util/symbol.c
index 77fc46ca07c0..0fc9a5410739 100644
--- a/tools/perf/util/symbol.c
+++ b/tools/perf/util/symbol.c
@@ -1581,10 +1581,6 @@ int dso__load_bfd_symbols(struct dso *dso, const char *debugfile)
 	if (bfd_get_flavour(abfd) == bfd_target_elf_flavour)
 		goto out_close;
 
-	section = bfd_get_section_by_name(abfd, ".text");
-	if (section)
-		dso->text_offset = section->vma - section->filepos;
-
 	symbols_size = bfd_get_symtab_upper_bound(abfd);
 	if (symbols_size == 0) {
 		bfd_close(abfd);
@@ -1602,6 +1598,22 @@ int dso__load_bfd_symbols(struct dso *dso, const char *debugfile)
 	if (symbols_count < 0)
 		goto out_free;
 
+	section = bfd_get_section_by_name(abfd, ".text");
+	if (section) {
+		for (i = 0; i < symbols_count; ++i) {
+			if (!strcmp(bfd_asymbol_name(symbols[i]), "__ImageBase") ||
+			    !strcmp(bfd_asymbol_name(symbols[i]), "__image_base__"))
+				break;
+		}
+		if (i < symbols_count) {
+			/* PE symbols can only have 4 bytes, so use .text high bits */
+			dso->text_offset = section->vma - (u32)section->vma;
+			dso->text_offset += (u32)bfd_asymbol_value(symbols[i]);
+		} else {
+			dso->text_offset = section->vma - section->filepos;
+		}
+	}
+
 	qsort(symbols, symbols_count, sizeof(asymbol *), bfd_symbols__cmpvalue);
 
 #ifdef bfd_get_section
diff --git a/tools/perf/util/synthetic-events.c b/tools/perf/util/synthetic-events.c
index 35aa0c0f7cd9..a7e981b2d7de 100644
--- a/tools/perf/util/synthetic-events.c
+++ b/tools/perf/util/synthetic-events.c
@@ -1,5 +1,7 @@
 // SPDX-License-Identifier: GPL-2.0-only 
 
+#include "util/cgroup.h"
+#include "util/data.h"
 #include "util/debug.h"
 #include "util/dso.h"
 #include "util/event.h"
@@ -16,7 +18,6 @@
 #include "util/synthetic-events.h"
 #include "util/target.h"
 #include "util/time-utils.h"
-#include "util/cgroup.h"
 #include <linux/bitops.h>
 #include <linux/kernel.h>
 #include <linux/string.h>
@@ -2179,3 +2180,53 @@ int perf_event__synthesize_features(struct perf_tool *tool, struct perf_session
 	free(ff.buf);
 	return ret;
 }
+
+int perf_event__synthesize_for_pipe(struct perf_tool *tool,
+				    struct perf_session *session,
+				    struct perf_data *data,
+				    perf_event__handler_t process)
+{
+	int err;
+	int ret = 0;
+	struct evlist *evlist = session->evlist;
+
+	/*
+	 * We need to synthesize events first, because some
+	 * features works on top of them (on report side).
+	 */
+	err = perf_event__synthesize_attrs(tool, evlist, process);
+	if (err < 0) {
+		pr_err("Couldn't synthesize attrs.\n");
+		return err;
+	}
+	ret += err;
+
+	err = perf_event__synthesize_features(tool, session, evlist, process);
+	if (err < 0) {
+		pr_err("Couldn't synthesize features.\n");
+		return err;
+	}
+	ret += err;
+
+	if (have_tracepoints(&evlist->core.entries)) {
+		int fd = perf_data__fd(data);
+
+		/*
+		 * FIXME err <= 0 here actually means that
+		 * there were no tracepoints so its not really
+		 * an error, just that we don't need to
+		 * synthesize anything.  We really have to
+		 * return this more properly and also
+		 * propagate errors that now are calling die()
+		 */
+		err = perf_event__synthesize_tracing_data(tool,	fd, evlist,
+							  process);
+		if (err <= 0) {
+			pr_err("Couldn't record tracing data.\n");
+			return err;
+		}
+		ret += err;
+	}
+
+	return ret;
+}
diff --git a/tools/perf/util/synthetic-events.h b/tools/perf/util/synthetic-events.h
index e7a3e9589738..c845e2b9b444 100644
--- a/tools/perf/util/synthetic-events.h
+++ b/tools/perf/util/synthetic-events.h
@@ -14,6 +14,7 @@ struct evsel;
 struct machine;
 struct perf_counts_values;
 struct perf_cpu_map;
+struct perf_data;
 struct perf_event_attr;
 struct perf_event_mmap_page;
 struct perf_sample;
@@ -101,4 +102,9 @@ static inline int perf_event__synthesize_bpf_events(struct perf_session *session
 }
 #endif // HAVE_LIBBPF_SUPPORT
 
+int perf_event__synthesize_for_pipe(struct perf_tool *tool,
+				    struct perf_session *session,
+				    struct perf_data *data,
+				    perf_event__handler_t process);
+
 #endif // __PERF_SYNTHETIC_EVENTS_H
diff --git a/tools/perf/util/target.h b/tools/perf/util/target.h
index 4ff56217f2a6..daec6cba500d 100644
--- a/tools/perf/util/target.h
+++ b/tools/perf/util/target.h
@@ -17,6 +17,7 @@ struct target {
 	bool	     default_per_cpu;
 	bool	     per_thread;
 	bool	     use_bpf;
+	bool	     hybrid;
 	const char   *attr_map;
 };
 
diff --git a/tools/perf/util/trace-event.h b/tools/perf/util/trace-event.h
index 54aadeedf28c..640981105788 100644
--- a/tools/perf/util/trace-event.h
+++ b/tools/perf/util/trace-event.h
@@ -90,6 +90,9 @@ struct scripting_ops {
 	void (*process_stat)(struct perf_stat_config *config,
 			     struct evsel *evsel, u64 tstamp);
 	void (*process_stat_interval)(u64 tstamp);
+	void (*process_throttle)(union perf_event *event,
+				 struct perf_sample *sample,
+				 struct machine *machine);
 	int (*generate_script) (struct tep_handle *pevent, const char *outfile);
 };
 
diff --git a/tools/testing/ktest/examples/bootconfigs/boottrace.bconf b/tools/testing/ktest/examples/bootconfigs/boottrace.bconf
index 9db64ec589d5..7aa706cccb3b 100644
--- a/tools/testing/ktest/examples/bootconfigs/boottrace.bconf
+++ b/tools/testing/ktest/examples/bootconfigs/boottrace.bconf
@@ -10,13 +10,23 @@ ftrace.event {
 	}
 	synthetic.initcall_latency {
 		fields = "unsigned long func", "u64 lat"
-		actions = "hist:keys=func.sym,lat:vals=lat:sort=lat"
+		hist {
+			keys = func.sym,lat
+			values = lat
+			sort = lat
+		}
 	}
-	initcall.initcall_start {
-		actions = "hist:keys=func:ts0=common_timestamp.usecs"
+	initcall.initcall_start.hist {
+		keys = func;
+		var.ts0 = common_timestamp.usecs
 	}
-	initcall.initcall_finish {
-		actions = "hist:keys=func:lat=common_timestamp.usecs-$ts0:onmatch(initcall.initcall_start).initcall_latency(func,$lat)"
+	initcall.initcall_finish.hist {
+		keys = func
+		var.lat = common_timestamp.usecs - $ts0
+		onmatch {
+			event = initcall.initcall_start
+			trace = initcall_latency, func, $lat
+		}
 	}
 }
 
diff --git a/tools/testing/ktest/examples/bootconfigs/verify-boottrace.sh b/tools/testing/ktest/examples/bootconfigs/verify-boottrace.sh
index f271940ce7fb..233e95cfcf20 100755
--- a/tools/testing/ktest/examples/bootconfigs/verify-boottrace.sh
+++ b/tools/testing/ktest/examples/bootconfigs/verify-boottrace.sh
@@ -58,7 +58,7 @@ compare_file_partial "events/synthetic/initcall_latency/enable" "0"
 compare_file_partial "events/initcall/initcall_start/trigger" "hist:keys=func:vals=hitcount:ts0=common_timestamp.usecs"
 compare_file_partial "events/initcall/initcall_start/enable" "1"
 
-compare_file_partial "events/initcall/initcall_finish/trigger" 'hist:keys=func:vals=hitcount:lat=common_timestamp.usecs-\$ts0:sort=hitcount:size=2048:clock=global:onmatch(initcall.initcall_start).initcall_latency(func,\$lat)'
+compare_file_partial "events/initcall/initcall_finish/trigger" 'hist:keys=func:vals=hitcount:lat=common_timestamp.usecs-\$ts0:sort=hitcount:size=2048:clock=global:onmatch(initcall.initcall_start).trace(initcall_latency,func,\$lat)'
 compare_file_partial "events/initcall/initcall_finish/enable" "1"
 
 compare_file "instances/foo/current_tracer" "function"
diff --git a/tools/testing/kunit/kunit.py b/tools/testing/kunit/kunit.py
index 6276ce0c0196..5a931456e718 100755
--- a/tools/testing/kunit/kunit.py
+++ b/tools/testing/kunit/kunit.py
@@ -16,6 +16,7 @@ assert sys.version_info >= (3, 7), "Python version is too old"
 
 from collections import namedtuple
 from enum import Enum, auto
+from typing import Iterable
 
 import kunit_config
 import kunit_json
@@ -30,12 +31,13 @@ KunitBuildRequest = namedtuple('KunitBuildRequest',
 			       ['jobs', 'build_dir', 'alltests',
 				'make_options'])
 KunitExecRequest = namedtuple('KunitExecRequest',
-			      ['timeout', 'build_dir', 'alltests', 'filter_glob'])
+                              ['timeout', 'build_dir', 'alltests',
+                               'filter_glob', 'kernel_args'])
 KunitParseRequest = namedtuple('KunitParseRequest',
 			       ['raw_output', 'input_data', 'build_dir', 'json'])
 KunitRequest = namedtuple('KunitRequest', ['raw_output','timeout', 'jobs',
 					   'build_dir', 'alltests', 'filter_glob',
-					   'json', 'make_options'])
+					   'kernel_args', 'json', 'make_options'])
 
 KernelDirectoryPath = sys.argv[0].split('tools/testing/kunit/')[0]
 
@@ -94,6 +96,7 @@ def exec_tests(linux: kunit_kernel.LinuxSourceTree,
 	kunit_parser.print_with_timestamp('Starting KUnit Kernel ...')
 	test_start = time.time()
 	result = linux.run_kernel(
+		args=request.kernel_args,
 		timeout=None if request.alltests else request.timeout,
                 filter_glob=request.filter_glob,
 		build_dir=request.build_dir)
@@ -112,7 +115,16 @@ def parse_tests(request: KunitParseRequest) -> KunitResult:
 					      'Tests not Parsed.')
 
 	if request.raw_output:
-		kunit_parser.raw_output(request.input_data)
+		output: Iterable[str] = request.input_data
+		if request.raw_output == 'all':
+			pass
+		elif request.raw_output == 'kunit':
+			output = kunit_parser.extract_tap_lines(output)
+		else:
+			print(f'Unknown --raw_output option "{request.raw_output}"', file=sys.stderr)
+		for line in output:
+			print(line.rstrip())
+
 	else:
 		test_result = kunit_parser.parse_run_tests(request.input_data)
 	parse_end = time.time()
@@ -133,7 +145,6 @@ def parse_tests(request: KunitParseRequest) -> KunitResult:
 	return KunitResult(KunitStatus.SUCCESS, test_result,
 				parse_end - parse_start)
 
-
 def run_tests(linux: kunit_kernel.LinuxSourceTree,
 	      request: KunitRequest) -> KunitResult:
 	run_start = time.time()
@@ -152,7 +163,8 @@ def run_tests(linux: kunit_kernel.LinuxSourceTree,
 		return build_result
 
 	exec_request = KunitExecRequest(request.timeout, request.build_dir,
-					request.alltests, request.filter_glob)
+				 request.alltests, request.filter_glob,
+				 request.kernel_args)
 	exec_result = exec_tests(linux, exec_request)
 	if exec_result.status != KunitStatus.SUCCESS:
 		return exec_result
@@ -178,7 +190,7 @@ def add_common_opts(parser) -> None:
 	parser.add_argument('--build_dir',
 			    help='As in the make command, it specifies the build '
 			    'directory.',
-                            type=str, default='.kunit', metavar='build_dir')
+			    type=str, default='.kunit', metavar='build_dir')
 	parser.add_argument('--make_options',
 			    help='X=Y make option, can be repeated.',
 			    action='append')
@@ -238,10 +250,14 @@ def add_exec_opts(parser) -> None:
 			    nargs='?',
 			    default='',
 			    metavar='filter_glob')
+	parser.add_argument('--kernel_args',
+			    help='Kernel command-line parameters. Maybe be repeated',
+			     action='append')
 
 def add_parse_opts(parser) -> None:
-	parser.add_argument('--raw_output', help='don\'t format output from kernel',
-			    action='store_true')
+	parser.add_argument('--raw_output', help='If set don\'t format output from kernel. '
+			    'If set to --raw_output=kunit, filters to just KUnit output.',
+			    type=str, nargs='?', const='all', default=None)
 	parser.add_argument('--json',
 			    nargs='?',
 			    help='Stores test results in a JSON, and either '
@@ -309,6 +325,7 @@ def main(argv, linux=None):
 				       cli_args.build_dir,
 				       cli_args.alltests,
 				       cli_args.filter_glob,
+				       cli_args.kernel_args,
 				       cli_args.json,
 				       cli_args.make_options)
 		result = run_tests(linux, request)
@@ -363,7 +380,8 @@ def main(argv, linux=None):
 		exec_request = KunitExecRequest(cli_args.timeout,
 						cli_args.build_dir,
 						cli_args.alltests,
-						cli_args.filter_glob)
+						cli_args.filter_glob,
+						cli_args.kernel_args)
 		exec_result = exec_tests(linux, exec_request)
 		parse_request = KunitParseRequest(cli_args.raw_output,
 						  exec_result.result,
diff --git a/tools/testing/kunit/kunit_parser.py b/tools/testing/kunit/kunit_parser.py
index b88db3f51dc5..6310a641b151 100644
--- a/tools/testing/kunit/kunit_parser.py
+++ b/tools/testing/kunit/kunit_parser.py
@@ -106,10 +106,6 @@ def extract_tap_lines(kernel_output: Iterable[str]) -> LineStream:
 				yield line_num, line[prefix_len:]
 	return LineStream(lines=isolate_kunit_output(kernel_output))
 
-def raw_output(kernel_output) -> None:
-	for line in kernel_output:
-		print(line.rstrip())
-
 DIVIDER = '=' * 60
 
 RESET = '\033[0;0m'
@@ -137,7 +133,7 @@ def print_log(log) -> None:
 	for m in log:
 		print_with_timestamp(m)
 
-TAP_ENTRIES = re.compile(r'^(TAP|[\s]*ok|[\s]*not ok|[\s]*[0-9]+\.\.[0-9]+|[\s]*#).*$')
+TAP_ENTRIES = re.compile(r'^(TAP|[\s]*ok|[\s]*not ok|[\s]*[0-9]+\.\.[0-9]+|[\s]*# (Subtest:|.*: kunit test case crashed!)).*$')
 
 def consume_non_diagnostic(lines: LineStream) -> None:
 	while lines and not TAP_ENTRIES.match(lines.peek()):
diff --git a/tools/testing/kunit/kunit_tool_test.py b/tools/testing/kunit/kunit_tool_test.py
index 75045aa0f8a1..619c4554cbff 100755
--- a/tools/testing/kunit/kunit_tool_test.py
+++ b/tools/testing/kunit/kunit_tool_test.py
@@ -356,7 +356,7 @@ class KUnitMainTest(unittest.TestCase):
 		self.assertEqual(self.linux_source_mock.build_reconfig.call_count, 0)
 		self.assertEqual(self.linux_source_mock.run_kernel.call_count, 1)
 		self.linux_source_mock.run_kernel.assert_called_once_with(
-			build_dir='.kunit', filter_glob='', timeout=300)
+			args=None, build_dir='.kunit', filter_glob='', timeout=300)
 		self.print_mock.assert_any_call(StrContains('Testing complete.'))
 
 	def test_run_passes_args_pass(self):
@@ -364,7 +364,7 @@ class KUnitMainTest(unittest.TestCase):
 		self.assertEqual(self.linux_source_mock.build_reconfig.call_count, 1)
 		self.assertEqual(self.linux_source_mock.run_kernel.call_count, 1)
 		self.linux_source_mock.run_kernel.assert_called_once_with(
-			build_dir='.kunit', filter_glob='', timeout=300)
+			args=None, build_dir='.kunit', filter_glob='', timeout=300)
 		self.print_mock.assert_any_call(StrContains('Testing complete.'))
 
 	def test_exec_passes_args_fail(self):
@@ -399,11 +399,20 @@ class KUnitMainTest(unittest.TestCase):
 			self.assertNotEqual(call, mock.call(StrContains('Testing complete.')))
 			self.assertNotEqual(call, mock.call(StrContains(' 0 tests run')))
 
+	def test_run_raw_output_kunit(self):
+		self.linux_source_mock.run_kernel = mock.Mock(return_value=[])
+		kunit.main(['run', '--raw_output=kunit'], self.linux_source_mock)
+		self.assertEqual(self.linux_source_mock.build_reconfig.call_count, 1)
+		self.assertEqual(self.linux_source_mock.run_kernel.call_count, 1)
+		for call in self.print_mock.call_args_list:
+			self.assertNotEqual(call, mock.call(StrContains('Testing complete.')))
+			self.assertNotEqual(call, mock.call(StrContains(' 0 tests run')))
+
 	def test_exec_timeout(self):
 		timeout = 3453
 		kunit.main(['exec', '--timeout', str(timeout)], self.linux_source_mock)
 		self.linux_source_mock.run_kernel.assert_called_once_with(
-			build_dir='.kunit', filter_glob='', timeout=timeout)
+			args=None, build_dir='.kunit', filter_glob='', timeout=timeout)
 		self.print_mock.assert_any_call(StrContains('Testing complete.'))
 
 	def test_run_timeout(self):
@@ -411,7 +420,7 @@ class KUnitMainTest(unittest.TestCase):
 		kunit.main(['run', '--timeout', str(timeout)], self.linux_source_mock)
 		self.assertEqual(self.linux_source_mock.build_reconfig.call_count, 1)
 		self.linux_source_mock.run_kernel.assert_called_once_with(
-			build_dir='.kunit', filter_glob='', timeout=timeout)
+			args=None, build_dir='.kunit', filter_glob='', timeout=timeout)
 		self.print_mock.assert_any_call(StrContains('Testing complete.'))
 
 	def test_run_builddir(self):
@@ -419,7 +428,7 @@ class KUnitMainTest(unittest.TestCase):
 		kunit.main(['run', '--build_dir=.kunit'], self.linux_source_mock)
 		self.assertEqual(self.linux_source_mock.build_reconfig.call_count, 1)
 		self.linux_source_mock.run_kernel.assert_called_once_with(
-			build_dir=build_dir, filter_glob='', timeout=300)
+			args=None, build_dir=build_dir, filter_glob='', timeout=300)
 		self.print_mock.assert_any_call(StrContains('Testing complete.'))
 
 	def test_config_builddir(self):
@@ -436,7 +445,7 @@ class KUnitMainTest(unittest.TestCase):
 		build_dir = '.kunit'
 		kunit.main(['exec', '--build_dir', build_dir], self.linux_source_mock)
 		self.linux_source_mock.run_kernel.assert_called_once_with(
-			build_dir=build_dir, filter_glob='', timeout=300)
+			args=None, build_dir=build_dir, filter_glob='', timeout=300)
 		self.print_mock.assert_any_call(StrContains('Testing complete.'))
 
 	@mock.patch.object(kunit_kernel, 'LinuxSourceTree')
@@ -461,5 +470,13 @@ class KUnitMainTest(unittest.TestCase):
 							cross_compile=None,
 							qemu_config_path=None)
 
+	def test_run_kernel_args(self):
+		kunit.main(['run', '--kernel_args=a=1', '--kernel_args=b=2'], self.linux_source_mock)
+		self.assertEqual(self.linux_source_mock.build_reconfig.call_count, 1)
+		self.linux_source_mock.run_kernel.assert_called_once_with(
+		      args=['a=1','b=2'], build_dir='.kunit', filter_glob='', timeout=300)
+		self.print_mock.assert_any_call(StrContains('Testing complete.'))
+
+
 if __name__ == '__main__':
 	unittest.main()
diff --git a/tools/testing/scatterlist/linux/mm.h b/tools/testing/scatterlist/linux/mm.h
index f9a12005fcea..16ec895bbe5f 100644
--- a/tools/testing/scatterlist/linux/mm.h
+++ b/tools/testing/scatterlist/linux/mm.h
@@ -127,7 +127,6 @@ kmalloc_array(unsigned int n, unsigned int size, unsigned int flags)
 #define kmemleak_free(a)
 
 #define PageSlab(p) (0)
-#define flush_kernel_dcache_page(p)
 
 #define MAX_ERRNO	4095
 
diff --git a/tools/testing/scatterlist/main.c b/tools/testing/scatterlist/main.c
index 652254754b4c..08465a701cd5 100644
--- a/tools/testing/scatterlist/main.c
+++ b/tools/testing/scatterlist/main.c
@@ -85,32 +85,46 @@ int main(void)
 
 	for (i = 0, test = tests; test->expected_segments; test++, i++) {
 		int left_pages = test->pfn_app ? test->num_pages : 0;
+		struct sg_append_table append = {};
 		struct page *pages[MAX_PAGES];
-		struct sg_table st;
-		struct scatterlist *sg;
+		int ret;
 
 		set_pages(pages, test->pfn, test->num_pages);
 
-		sg = __sg_alloc_table_from_pages(&st, pages, test->num_pages, 0,
-				test->size, test->max_seg, NULL, left_pages, GFP_KERNEL);
-		assert(PTR_ERR_OR_ZERO(sg) == test->alloc_ret);
+		if (test->pfn_app)
+			ret = sg_alloc_append_table_from_pages(
+				&append, pages, test->num_pages, 0, test->size,
+				test->max_seg, left_pages, GFP_KERNEL);
+		else
+			ret = sg_alloc_table_from_pages_segment(
+				&append.sgt, pages, test->num_pages, 0,
+				test->size, test->max_seg, GFP_KERNEL);
+
+		assert(ret == test->alloc_ret);
 
 		if (test->alloc_ret)
 			continue;
 
 		if (test->pfn_app) {
 			set_pages(pages, test->pfn_app, test->num_pages);
-			sg = __sg_alloc_table_from_pages(&st, pages, test->num_pages, 0,
-					test->size, test->max_seg, sg, 0, GFP_KERNEL);
+			ret = sg_alloc_append_table_from_pages(
+				&append, pages, test->num_pages, 0, test->size,
+				test->max_seg, 0, GFP_KERNEL);
 
-			assert(PTR_ERR_OR_ZERO(sg) == test->alloc_ret);
+			assert(ret == test->alloc_ret);
 		}
 
-		VALIDATE(st.nents == test->expected_segments, &st, test);
+		VALIDATE(append.sgt.nents == test->expected_segments,
+			 &append.sgt, test);
 		if (!test->pfn_app)
-			VALIDATE(st.orig_nents == test->expected_segments, &st, test);
-
-		sg_free_table(&st);
+			VALIDATE(append.sgt.orig_nents ==
+					 test->expected_segments,
+				 &append.sgt, test);
+
+		if (test->pfn_app)
+			sg_free_append_table(&append);
+		else
+			sg_free_table(&append.sgt);
 	}
 
 	assert(i == (sizeof(tests) / sizeof(tests[0])) - 1);
diff --git a/tools/testing/selftests/bpf/prog_tests/xdp_bonding.c b/tools/testing/selftests/bpf/prog_tests/xdp_bonding.c
index 370d220288a6..ad3ba81b4048 100644
--- a/tools/testing/selftests/bpf/prog_tests/xdp_bonding.c
+++ b/tools/testing/selftests/bpf/prog_tests/xdp_bonding.c
@@ -384,8 +384,7 @@ static void test_xdp_bonding_attach(struct skeletons *skeletons)
 {
 	struct bpf_link *link = NULL;
 	struct bpf_link *link2 = NULL;
-	int veth, bond;
-	int err;
+	int veth, bond, err;
 
 	if (!ASSERT_OK(system("ip link add veth type veth"), "add veth"))
 		goto out;
@@ -399,22 +398,18 @@ static void test_xdp_bonding_attach(struct skeletons *skeletons)
 	if (!ASSERT_GE(bond, 0, "if_nametoindex bond"))
 		goto out;
 
-	/* enslaving with a XDP program loaded fails */
+	/* enslaving with a XDP program loaded is allowed */
 	link = bpf_program__attach_xdp(skeletons->xdp_dummy->progs.xdp_dummy_prog, veth);
 	if (!ASSERT_OK_PTR(link, "attach program to veth"))
 		goto out;
 
 	err = system("ip link set veth master bond");
-	if (!ASSERT_NEQ(err, 0, "attaching slave with xdp program expected to fail"))
+	if (!ASSERT_OK(err, "set veth master"))
 		goto out;
 
 	bpf_link__destroy(link);
 	link = NULL;
 
-	err = system("ip link set veth master bond");
-	if (!ASSERT_OK(err, "set veth master"))
-		goto out;
-
 	/* attaching to slave when master has no program is allowed */
 	link = bpf_program__attach_xdp(skeletons->xdp_dummy->progs.xdp_dummy_prog, veth);
 	if (!ASSERT_OK_PTR(link, "attach program to slave when enslaved"))
@@ -434,8 +429,26 @@ static void test_xdp_bonding_attach(struct skeletons *skeletons)
 		goto out;
 
 	/* attaching to slave not allowed when master has program loaded */
-	link2 = bpf_program__attach_xdp(skeletons->xdp_dummy->progs.xdp_dummy_prog, bond);
-	ASSERT_ERR_PTR(link2, "attach program to slave when master has program");
+	link2 = bpf_program__attach_xdp(skeletons->xdp_dummy->progs.xdp_dummy_prog, veth);
+	if (!ASSERT_ERR_PTR(link2, "attach program to slave when master has program"))
+		goto out;
+
+	bpf_link__destroy(link);
+	link = NULL;
+
+	/* test program unwinding with a non-XDP slave */
+	if (!ASSERT_OK(system("ip link add vxlan type vxlan id 1 remote 1.2.3.4 dstport 0 dev lo"),
+		       "add vxlan"))
+		goto out;
+
+	err = system("ip link set vxlan master bond");
+	if (!ASSERT_OK(err, "set vxlan master"))
+		goto out;
+
+	/* attaching not allowed when one slave does not support XDP */
+	link = bpf_program__attach_xdp(skeletons->xdp_dummy->progs.xdp_dummy_prog, bond);
+	if (!ASSERT_ERR_PTR(link, "attach program to master when slave does not support XDP"))
+		goto out;
 
 out:
 	bpf_link__destroy(link);
@@ -443,6 +456,44 @@ out:
 
 	system("ip link del veth");
 	system("ip link del bond");
+	system("ip link del vxlan");
+}
+
+/* Test with nested bonding devices to catch issue with negative jump label count */
+static void test_xdp_bonding_nested(struct skeletons *skeletons)
+{
+	struct bpf_link *link = NULL;
+	int bond, err;
+
+	if (!ASSERT_OK(system("ip link add bond type bond"), "add bond"))
+		goto out;
+
+	bond = if_nametoindex("bond");
+	if (!ASSERT_GE(bond, 0, "if_nametoindex bond"))
+		goto out;
+
+	if (!ASSERT_OK(system("ip link add bond_nest1 type bond"), "add bond_nest1"))
+		goto out;
+
+	err = system("ip link set bond_nest1 master bond");
+	if (!ASSERT_OK(err, "set bond_nest1 master"))
+		goto out;
+
+	if (!ASSERT_OK(system("ip link add bond_nest2 type bond"), "add bond_nest1"))
+		goto out;
+
+	err = system("ip link set bond_nest2 master bond_nest1");
+	if (!ASSERT_OK(err, "set bond_nest2 master"))
+		goto out;
+
+	link = bpf_program__attach_xdp(skeletons->xdp_dummy->progs.xdp_dummy_prog, bond);
+	ASSERT_OK_PTR(link, "attach program to master");
+
+out:
+	bpf_link__destroy(link);
+	system("ip link del bond");
+	system("ip link del bond_nest1");
+	system("ip link del bond_nest2");
 }
 
 static int libbpf_debug_print(enum libbpf_print_level level,
@@ -496,6 +547,9 @@ void test_xdp_bonding(void)
 	if (test__start_subtest("xdp_bonding_attach"))
 		test_xdp_bonding_attach(&skeletons);
 
+	if (test__start_subtest("xdp_bonding_nested"))
+		test_xdp_bonding_nested(&skeletons);
+
 	for (i = 0; i < ARRAY_SIZE(bond_test_cases); i++) {
 		struct bond_test_case *test_case = &bond_test_cases[i];
 
diff --git a/tools/testing/selftests/cpufreq/config b/tools/testing/selftests/cpufreq/config
index 27ff72ebd0f5..75e900793e8a 100644
--- a/tools/testing/selftests/cpufreq/config
+++ b/tools/testing/selftests/cpufreq/config
@@ -6,7 +6,7 @@ CONFIG_CPU_FREQ_GOV_ONDEMAND=y
 CONFIG_CPU_FREQ_GOV_CONSERVATIVE=y
 CONFIG_CPU_FREQ_GOV_SCHEDUTIL=y
 CONFIG_DEBUG_RT_MUTEXES=y
-CONFIG_DEBUG_PI_LIST=y
+CONFIG_DEBUG_PLIST=y
 CONFIG_DEBUG_SPINLOCK=y
 CONFIG_DEBUG_MUTEXES=y
 CONFIG_DEBUG_LOCK_ALLOC=y
diff --git a/tools/testing/selftests/damon/Makefile b/tools/testing/selftests/damon/Makefile
new file mode 100644
index 000000000000..8a3f2cd9fec0
--- /dev/null
+++ b/tools/testing/selftests/damon/Makefile
@@ -0,0 +1,7 @@
+# SPDX-License-Identifier: GPL-2.0
+# Makefile for damon selftests
+
+TEST_FILES = _chk_dependency.sh
+TEST_PROGS = debugfs_attrs.sh
+
+include ../lib.mk
diff --git a/tools/testing/selftests/damon/_chk_dependency.sh b/tools/testing/selftests/damon/_chk_dependency.sh
new file mode 100644
index 000000000000..0189db81550b
--- /dev/null
+++ b/tools/testing/selftests/damon/_chk_dependency.sh
@@ -0,0 +1,28 @@
+#!/bin/bash
+# SPDX-License-Identifier: GPL-2.0
+
+# Kselftest framework requirement - SKIP code is 4.
+ksft_skip=4
+
+DBGFS=/sys/kernel/debug/damon
+
+if [ $EUID -ne 0 ];
+then
+	echo "Run as root"
+	exit $ksft_skip
+fi
+
+if [ ! -d "$DBGFS" ]
+then
+	echo "$DBGFS not found"
+	exit $ksft_skip
+fi
+
+for f in attrs target_ids monitor_on
+do
+	if [ ! -f "$DBGFS/$f" ]
+	then
+		echo "$f not found"
+		exit 1
+	fi
+done
diff --git a/tools/testing/selftests/damon/debugfs_attrs.sh b/tools/testing/selftests/damon/debugfs_attrs.sh
new file mode 100644
index 000000000000..bfabb19dc0d3
--- /dev/null
+++ b/tools/testing/selftests/damon/debugfs_attrs.sh
@@ -0,0 +1,75 @@
+#!/bin/bash
+# SPDX-License-Identifier: GPL-2.0
+
+test_write_result() {
+	file=$1
+	content=$2
+	orig_content=$3
+	expect_reason=$4
+	expected=$5
+
+	echo "$content" > "$file"
+	if [ $? -ne "$expected" ]
+	then
+		echo "writing $content to $file doesn't return $expected"
+		echo "expected because: $expect_reason"
+		echo "$orig_content" > "$file"
+		exit 1
+	fi
+}
+
+test_write_succ() {
+	test_write_result "$1" "$2" "$3" "$4" 0
+}
+
+test_write_fail() {
+	test_write_result "$1" "$2" "$3" "$4" 1
+}
+
+test_content() {
+	file=$1
+	orig_content=$2
+	expected=$3
+	expect_reason=$4
+
+	content=$(cat "$file")
+	if [ "$content" != "$expected" ]
+	then
+		echo "reading $file expected $expected but $content"
+		echo "expected because: $expect_reason"
+		echo "$orig_content" > "$file"
+		exit 1
+	fi
+}
+
+source ./_chk_dependency.sh
+
+# Test attrs file
+# ===============
+
+file="$DBGFS/attrs"
+orig_content=$(cat "$file")
+
+test_write_succ "$file" "1 2 3 4 5" "$orig_content" "valid input"
+test_write_fail "$file" "1 2 3 4" "$orig_content" "no enough fields"
+test_write_fail "$file" "1 2 3 5 4" "$orig_content" \
+	"min_nr_regions > max_nr_regions"
+test_content "$file" "$orig_content" "1 2 3 4 5" "successfully written"
+echo "$orig_content" > "$file"
+
+# Test target_ids file
+# ====================
+
+file="$DBGFS/target_ids"
+orig_content=$(cat "$file")
+
+test_write_succ "$file" "1 2 3 4" "$orig_content" "valid input"
+test_write_succ "$file" "1 2 abc 4" "$orig_content" "still valid input"
+test_content "$file" "$orig_content" "1 2" "non-integer was there"
+test_write_succ "$file" "abc 2 3" "$orig_content" "the file allows wrong input"
+test_content "$file" "$orig_content" "" "wrong input written"
+test_write_succ "$file" "" "$orig_content" "empty input"
+test_content "$file" "$orig_content" "" "empty input written"
+echo "$orig_content" > "$file"
+
+echo "PASS"
diff --git a/tools/testing/selftests/ftrace/test.d/dynevent/add_remove_eprobe.tc b/tools/testing/selftests/ftrace/test.d/dynevent/add_remove_eprobe.tc
new file mode 100644
index 000000000000..5f5b2ba3e557
--- /dev/null
+++ b/tools/testing/selftests/ftrace/test.d/dynevent/add_remove_eprobe.tc
@@ -0,0 +1,40 @@
+#!/bin/sh
+# SPDX-License-Identifier: GPL-2.0
+# description: Generic dynamic event - add/remove eprobe events
+# requires: dynamic_events events/syscalls/sys_enter_openat "e[:[<group>/]<event>] <attached-group>.<attached-event> [<args>]":README
+
+echo 0 > events/enable
+
+clear_dynamic_events
+
+SYSTEM="syscalls"
+EVENT="sys_enter_openat"
+FIELD="filename"
+EPROBE="eprobe_open"
+
+echo "e:$EPROBE $SYSTEM/$EVENT file=+0(\$filename):ustring" >> dynamic_events
+
+grep -q "$EPROBE" dynamic_events
+test -d events/eprobes/$EPROBE
+
+echo 1 > events/eprobes/$EPROBE/enable
+ls
+echo 0 > events/eprobes/$EPROBE/enable
+
+content=`grep '^ *ls-' trace | grep 'file='`
+nocontent=`grep '^ *ls-' trace | grep 'file=' | grep -v -e '"/' -e '"."' -e '(fault)' ` || true
+
+if [ -z "$content" ]; then
+	exit_fail
+fi
+
+if [ ! -z "$nocontent" ]; then
+	exit_fail
+fi
+
+echo "-:$EPROBE" >> dynamic_events
+
+! grep -q "$EPROBE" dynamic_events
+! test -d events/eprobes/$EPROBE
+
+clear_trace
diff --git a/tools/testing/selftests/ftrace/test.d/dynevent/test_duplicates.tc b/tools/testing/selftests/ftrace/test.d/dynevent/test_duplicates.tc
new file mode 100644
index 000000000000..db522577ff78
--- /dev/null
+++ b/tools/testing/selftests/ftrace/test.d/dynevent/test_duplicates.tc
@@ -0,0 +1,38 @@
+#!/bin/sh
+# SPDX-License-Identifier: GPL-2.0
+# description: Generic dynamic event - check if duplicate events are caught
+# requires: dynamic_events "e[:[<group>/]<event>] <attached-group>.<attached-event> [<args>]":README
+
+echo 0 > events/enable
+
+HAVE_KPROBES=0
+
+if [ -f kprobe_events ]; then
+	HAVE_KPROBES=1
+fi
+
+clear_dynamic_events
+
+# first create dynamic events for eprobes and kprobes.
+
+echo 'e:egroup/eevent syscalls/sys_enter_openat file=+0($filename):ustring' >> dynamic_events
+
+# Test eprobe for same eprobe, existing kprobe and existing event
+! echo 'e:egroup/eevent syscalls/sys_enter_openat file=+0($filename):ustring' >> dynamic_events
+! echo 'e:syscalls/sys_enter_open syscalls/sys_enter_openat file=+0($filename):ustring' >> dynamic_events
+
+if [ $HAVE_KPROBES -eq 1 ]; then
+    echo 'p:kgroup/kevent vfs_open file=+0($arg2)' >> dynamic_events
+    ! echo 'e:kgroup/kevent syscalls/sys_enter_openat file=+0($filename):ustring' >> dynamic_events
+
+# Test kprobe for same kprobe, existing eprobe and existing event
+    ! echo 'p:kgroup/kevent vfs_open file=+0($arg2)' >> dynamic_events
+    ! echo 'p:egroup/eevent vfs_open file=+0($arg2)' >> dynamic_events
+    ! echo 'p:syscalls/sys_enter_open vfs_open file=+0($arg2)' >> dynamic_events
+
+    echo '-:kgroup/kevent' >> dynamic_events
+fi
+
+echo '-:egroup/eevent' >> dynamic_events
+
+clear_trace
diff --git a/tools/testing/selftests/ftrace/test.d/functions b/tools/testing/selftests/ftrace/test.d/functions
index a6fac927ee82..000fd05e84b1 100644
--- a/tools/testing/selftests/ftrace/test.d/functions
+++ b/tools/testing/selftests/ftrace/test.d/functions
@@ -83,6 +83,27 @@ clear_synthetic_events() { # reset all current synthetic events
     done
 }
 
+clear_dynamic_events() { # reset all current dynamic events
+    again=1
+    stop=1
+    # loop mulitple times as some events require other to be removed first
+    while [ $again -eq 1 ]; do
+	stop=$((stop+1))
+	# Prevent infinite loops
+	if [ $stop -gt 10 ]; then
+	    break;
+	fi
+	again=2
+	grep -v '^#' dynamic_events|
+	while read line; do
+	    del=`echo $line | sed -e 's/^.\([^ ]*\).*/-\1/'`
+	    if ! echo "$del" >> dynamic_events; then
+		again=1
+	    fi
+	done
+    done
+}
+
 initialize_ftrace() { # Reset ftrace to initial-state
 # As the initial state, ftrace will be set to nop tracer,
 # no events, no triggers, no filters, no function filters,
@@ -93,6 +114,7 @@ initialize_ftrace() { # Reset ftrace to initial-state
     reset_events_filter
     reset_ftrace_filter
     disable_events
+    clear_dynamic_events
     [ -f set_event_pid ] && echo > set_event_pid
     [ -f set_ftrace_pid ] && echo > set_ftrace_pid
     [ -f set_ftrace_notrace ] && echo > set_ftrace_notrace
@@ -115,7 +137,7 @@ check_requires() { # Check required files and tracers
                 echo "Required tracer $t is not configured."
                 exit_unsupported
             fi
-        elif [ $r != $i ]; then
+        elif [ "$r" != "$i" ]; then
             if ! grep -Fq "$r" README ; then
                 echo "Required feature pattern \"$r\" is not in README."
                 exit_unsupported
diff --git a/tools/testing/selftests/ftrace/test.d/trigger/inter-event/trigger-synthetic-eprobe.tc b/tools/testing/selftests/ftrace/test.d/trigger/inter-event/trigger-synthetic-eprobe.tc
new file mode 100644
index 000000000000..914fe2e5d030
--- /dev/null
+++ b/tools/testing/selftests/ftrace/test.d/trigger/inter-event/trigger-synthetic-eprobe.tc
@@ -0,0 +1,53 @@
+#!/bin/sh
+# SPDX-License-Identifier: GPL-2.0
+# description: event trigger - test inter-event histogram trigger eprobe on synthetic event
+# requires: dynamic_events synthetic_events events/syscalls/sys_enter_openat/hist "e[:[<group>/]<event>] <attached-group>.<attached-event> [<args>]":README
+
+echo 0 > events/enable
+
+clear_dynamic_events
+
+SYSTEM="syscalls"
+START="sys_enter_openat"
+END="sys_exit_openat"
+FIELD="filename"
+SYNTH="synth_open"
+EPROBE="eprobe_open"
+
+echo "$SYNTH u64 filename; s64 ret;" > synthetic_events
+echo "hist:keys=common_pid:__arg__1=$FIELD" > events/$SYSTEM/$START/trigger
+echo "hist:keys=common_pid:filename=\$__arg__1,ret=ret:onmatch($SYSTEM.$START).trace($SYNTH,\$filename,\$ret)" > events/$SYSTEM/$END/trigger
+
+echo "e:$EPROBE synthetic/$SYNTH file=+0(\$filename):ustring ret=\$ret:s64" >> dynamic_events
+
+grep -q "$SYNTH" dynamic_events
+grep -q "$EPROBE" dynamic_events
+test -d events/synthetic/$SYNTH
+test -d events/eprobes/$EPROBE
+
+echo 1 > events/eprobes/$EPROBE/enable
+ls
+echo 0 > events/eprobes/$EPROBE/enable
+
+content=`grep '^ *ls-' trace | grep 'file='`
+nocontent=`grep '^ *ls-' trace | grep 'file=' | grep -v -e '"/' -e '"."'` || true
+
+if [ -z "$content" ]; then
+	exit_fail
+fi
+
+if [ ! -z "$nocontent" ]; then
+	exit_fail
+fi
+
+echo "-:$EPROBE" >> dynamic_events
+echo '!'"hist:keys=common_pid:filename=\$__arg__1,ret=ret:onmatch($SYSTEM.$START).trace($SYNTH,\$filename,\$ret)" > events/$SYSTEM/$END/trigger
+echo '!'"hist:keys=common_pid:__arg__1=$FIELD" > events/$SYSTEM/$START/trigger
+echo '!'"$SYNTH u64 filename; s64 ret;" >> synthetic_events
+
+! grep -q "$SYNTH" dynamic_events
+! grep -q "$EPROBE" dynamic_events
+! test -d events/synthetic/$SYNTH
+! test -d events/eprobes/$EPROBE
+
+clear_trace
diff --git a/tools/testing/selftests/kvm/.gitignore b/tools/testing/selftests/kvm/.gitignore
index 0709af0144c8..98053d3afbda 100644
--- a/tools/testing/selftests/kvm/.gitignore
+++ b/tools/testing/selftests/kvm/.gitignore
@@ -1,6 +1,7 @@
 # SPDX-License-Identifier: GPL-2.0-only
 /aarch64/debug-exceptions
 /aarch64/get-reg-list
+/aarch64/psci_cpu_on_test
 /aarch64/vgic_init
 /s390x/memop
 /s390x/resets
diff --git a/tools/testing/selftests/kvm/Makefile b/tools/testing/selftests/kvm/Makefile
index 5832f510a16c..5d05801ab816 100644
--- a/tools/testing/selftests/kvm/Makefile
+++ b/tools/testing/selftests/kvm/Makefile
@@ -86,6 +86,7 @@ TEST_GEN_PROGS_x86_64 += kvm_binary_stats_test
 
 TEST_GEN_PROGS_aarch64 += aarch64/debug-exceptions
 TEST_GEN_PROGS_aarch64 += aarch64/get-reg-list
+TEST_GEN_PROGS_aarch64 += aarch64/psci_cpu_on_test
 TEST_GEN_PROGS_aarch64 += aarch64/vgic_init
 TEST_GEN_PROGS_aarch64 += demand_paging_test
 TEST_GEN_PROGS_aarch64 += dirty_log_test
diff --git a/tools/testing/selftests/kvm/aarch64/psci_cpu_on_test.c b/tools/testing/selftests/kvm/aarch64/psci_cpu_on_test.c
new file mode 100644
index 000000000000..018c269990e1
--- /dev/null
+++ b/tools/testing/selftests/kvm/aarch64/psci_cpu_on_test.c
@@ -0,0 +1,121 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * psci_cpu_on_test - Test that the observable state of a vCPU targeted by the
+ * CPU_ON PSCI call matches what the caller requested.
+ *
+ * Copyright (c) 2021 Google LLC.
+ *
+ * This is a regression test for a race between KVM servicing the PSCI call and
+ * userspace reading the vCPUs registers.
+ */
+
+#define _GNU_SOURCE
+
+#include <linux/psci.h>
+
+#include "kvm_util.h"
+#include "processor.h"
+#include "test_util.h"
+
+#define VCPU_ID_SOURCE 0
+#define VCPU_ID_TARGET 1
+
+#define CPU_ON_ENTRY_ADDR 0xfeedf00dul
+#define CPU_ON_CONTEXT_ID 0xdeadc0deul
+
+static uint64_t psci_cpu_on(uint64_t target_cpu, uint64_t entry_addr,
+			    uint64_t context_id)
+{
+	register uint64_t x0 asm("x0") = PSCI_0_2_FN64_CPU_ON;
+	register uint64_t x1 asm("x1") = target_cpu;
+	register uint64_t x2 asm("x2") = entry_addr;
+	register uint64_t x3 asm("x3") = context_id;
+
+	asm("hvc #0"
+	    : "=r"(x0)
+	    : "r"(x0), "r"(x1), "r"(x2), "r"(x3)
+	    : "memory");
+
+	return x0;
+}
+
+static uint64_t psci_affinity_info(uint64_t target_affinity,
+				   uint64_t lowest_affinity_level)
+{
+	register uint64_t x0 asm("x0") = PSCI_0_2_FN64_AFFINITY_INFO;
+	register uint64_t x1 asm("x1") = target_affinity;
+	register uint64_t x2 asm("x2") = lowest_affinity_level;
+
+	asm("hvc #0"
+	    : "=r"(x0)
+	    : "r"(x0), "r"(x1), "r"(x2)
+	    : "memory");
+
+	return x0;
+}
+
+static void guest_main(uint64_t target_cpu)
+{
+	GUEST_ASSERT(!psci_cpu_on(target_cpu, CPU_ON_ENTRY_ADDR, CPU_ON_CONTEXT_ID));
+	uint64_t target_state;
+
+	do {
+		target_state = psci_affinity_info(target_cpu, 0);
+
+		GUEST_ASSERT((target_state == PSCI_0_2_AFFINITY_LEVEL_ON) ||
+			     (target_state == PSCI_0_2_AFFINITY_LEVEL_OFF));
+	} while (target_state != PSCI_0_2_AFFINITY_LEVEL_ON);
+
+	GUEST_DONE();
+}
+
+int main(void)
+{
+	uint64_t target_mpidr, obs_pc, obs_x0;
+	struct kvm_vcpu_init init;
+	struct kvm_vm *vm;
+	struct ucall uc;
+
+	vm = vm_create(VM_MODE_DEFAULT, DEFAULT_GUEST_PHY_PAGES, O_RDWR);
+	kvm_vm_elf_load(vm, program_invocation_name);
+	ucall_init(vm, NULL);
+
+	vm_ioctl(vm, KVM_ARM_PREFERRED_TARGET, &init);
+	init.features[0] |= (1 << KVM_ARM_VCPU_PSCI_0_2);
+
+	aarch64_vcpu_add_default(vm, VCPU_ID_SOURCE, &init, guest_main);
+
+	/*
+	 * make sure the target is already off when executing the test.
+	 */
+	init.features[0] |= (1 << KVM_ARM_VCPU_POWER_OFF);
+	aarch64_vcpu_add_default(vm, VCPU_ID_TARGET, &init, guest_main);
+
+	get_reg(vm, VCPU_ID_TARGET, ARM64_SYS_REG(MPIDR_EL1), &target_mpidr);
+	vcpu_args_set(vm, VCPU_ID_SOURCE, 1, target_mpidr & MPIDR_HWID_BITMASK);
+	vcpu_run(vm, VCPU_ID_SOURCE);
+
+	switch (get_ucall(vm, VCPU_ID_SOURCE, &uc)) {
+	case UCALL_DONE:
+		break;
+	case UCALL_ABORT:
+		TEST_FAIL("%s at %s:%ld", (const char *)uc.args[0], __FILE__,
+			  uc.args[1]);
+		break;
+	default:
+		TEST_FAIL("Unhandled ucall: %lu", uc.cmd);
+	}
+
+	get_reg(vm, VCPU_ID_TARGET, ARM64_CORE_REG(regs.pc), &obs_pc);
+	get_reg(vm, VCPU_ID_TARGET, ARM64_CORE_REG(regs.regs[0]), &obs_x0);
+
+	TEST_ASSERT(obs_pc == CPU_ON_ENTRY_ADDR,
+		    "unexpected target cpu pc: %lx (expected: %lx)",
+		    obs_pc, CPU_ON_ENTRY_ADDR);
+	TEST_ASSERT(obs_x0 == CPU_ON_CONTEXT_ID,
+		    "unexpected target context id: %lx (expected: %lx)",
+		    obs_x0, CPU_ON_CONTEXT_ID);
+
+	kvm_vm_free(vm);
+	return 0;
+}
diff --git a/tools/testing/selftests/kvm/access_tracking_perf_test.c b/tools/testing/selftests/kvm/access_tracking_perf_test.c
index e2baa187a21e..71e277c7c3f3 100644
--- a/tools/testing/selftests/kvm/access_tracking_perf_test.c
+++ b/tools/testing/selftests/kvm/access_tracking_perf_test.c
@@ -222,8 +222,6 @@ static void *vcpu_thread_main(void *arg)
 	int vcpu_id = vcpu_args->vcpu_id;
 	int current_iteration = -1;
 
-	vcpu_args_set(vm, vcpu_id, 1, vcpu_id);
-
 	while (spin_wait_for_next_iteration(&current_iteration)) {
 		switch (READ_ONCE(iteration_work)) {
 		case ITERATION_ACCESS_MEMORY:
@@ -333,7 +331,7 @@ static void run_test(enum vm_guest_mode mode, void *arg)
 	pthread_t *vcpu_threads;
 	int vcpus = params->vcpus;
 
-	vm = perf_test_create_vm(mode, vcpus, params->vcpu_memory_bytes,
+	vm = perf_test_create_vm(mode, vcpus, params->vcpu_memory_bytes, 1,
 				 params->backing_src);
 
 	perf_test_setup_vcpus(vm, vcpus, params->vcpu_memory_bytes,
diff --git a/tools/testing/selftests/kvm/demand_paging_test.c b/tools/testing/selftests/kvm/demand_paging_test.c
index b74704305835..e79c1b64977f 100644
--- a/tools/testing/selftests/kvm/demand_paging_test.c
+++ b/tools/testing/selftests/kvm/demand_paging_test.c
@@ -52,7 +52,6 @@ static void *vcpu_worker(void *data)
 	struct timespec start;
 	struct timespec ts_diff;
 
-	vcpu_args_set(vm, vcpu_id, 1, vcpu_id);
 	run = vcpu_state(vm, vcpu_id);
 
 	clock_gettime(CLOCK_MONOTONIC, &start);
@@ -293,7 +292,7 @@ static void run_test(enum vm_guest_mode mode, void *arg)
 	int vcpu_id;
 	int r;
 
-	vm = perf_test_create_vm(mode, nr_vcpus, guest_percpu_mem_size,
+	vm = perf_test_create_vm(mode, nr_vcpus, guest_percpu_mem_size, 1,
 				 p->src_type);
 
 	perf_test_args.wr_fract = 1;
diff --git a/tools/testing/selftests/kvm/dirty_log_perf_test.c b/tools/testing/selftests/kvm/dirty_log_perf_test.c
index 80cbd3a748c0..479868570d59 100644
--- a/tools/testing/selftests/kvm/dirty_log_perf_test.c
+++ b/tools/testing/selftests/kvm/dirty_log_perf_test.c
@@ -44,7 +44,6 @@ static void *vcpu_worker(void *data)
 	struct perf_test_vcpu_args *vcpu_args = (struct perf_test_vcpu_args *)data;
 	int vcpu_id = vcpu_args->vcpu_id;
 
-	vcpu_args_set(vm, vcpu_id, 1, vcpu_id);
 	run = vcpu_state(vm, vcpu_id);
 
 	while (!READ_ONCE(host_quit)) {
@@ -94,8 +93,59 @@ struct test_params {
 	int wr_fract;
 	bool partition_vcpu_memory_access;
 	enum vm_mem_backing_src_type backing_src;
+	int slots;
 };
 
+static void toggle_dirty_logging(struct kvm_vm *vm, int slots, bool enable)
+{
+	int i;
+
+	for (i = 0; i < slots; i++) {
+		int slot = PERF_TEST_MEM_SLOT_INDEX + i;
+		int flags = enable ? KVM_MEM_LOG_DIRTY_PAGES : 0;
+
+		vm_mem_region_set_flags(vm, slot, flags);
+	}
+}
+
+static inline void enable_dirty_logging(struct kvm_vm *vm, int slots)
+{
+	toggle_dirty_logging(vm, slots, true);
+}
+
+static inline void disable_dirty_logging(struct kvm_vm *vm, int slots)
+{
+	toggle_dirty_logging(vm, slots, false);
+}
+
+static void get_dirty_log(struct kvm_vm *vm, int slots, unsigned long *bitmap,
+			  uint64_t nr_pages)
+{
+	uint64_t slot_pages = nr_pages / slots;
+	int i;
+
+	for (i = 0; i < slots; i++) {
+		int slot = PERF_TEST_MEM_SLOT_INDEX + i;
+		unsigned long *slot_bitmap = bitmap + i * slot_pages;
+
+		kvm_vm_get_dirty_log(vm, slot, slot_bitmap);
+	}
+}
+
+static void clear_dirty_log(struct kvm_vm *vm, int slots, unsigned long *bitmap,
+			    uint64_t nr_pages)
+{
+	uint64_t slot_pages = nr_pages / slots;
+	int i;
+
+	for (i = 0; i < slots; i++) {
+		int slot = PERF_TEST_MEM_SLOT_INDEX + i;
+		unsigned long *slot_bitmap = bitmap + i * slot_pages;
+
+		kvm_vm_clear_dirty_log(vm, slot, slot_bitmap, 0, slot_pages);
+	}
+}
+
 static void run_test(enum vm_guest_mode mode, void *arg)
 {
 	struct test_params *p = arg;
@@ -114,14 +164,14 @@ static void run_test(enum vm_guest_mode mode, void *arg)
 	struct timespec clear_dirty_log_total = (struct timespec){0};
 
 	vm = perf_test_create_vm(mode, nr_vcpus, guest_percpu_mem_size,
-				 p->backing_src);
+				 p->slots, p->backing_src);
 
 	perf_test_args.wr_fract = p->wr_fract;
 
 	guest_num_pages = (nr_vcpus * guest_percpu_mem_size) >> vm_get_page_shift(vm);
 	guest_num_pages = vm_adjust_num_guest_pages(mode, guest_num_pages);
 	host_num_pages = vm_num_host_pages(mode, guest_num_pages);
-	bmap = bitmap_alloc(host_num_pages);
+	bmap = bitmap_zalloc(host_num_pages);
 
 	if (dirty_log_manual_caps) {
 		cap.cap = KVM_CAP_MANUAL_DIRTY_LOG_PROTECT2;
@@ -163,8 +213,7 @@ static void run_test(enum vm_guest_mode mode, void *arg)
 
 	/* Enable dirty logging */
 	clock_gettime(CLOCK_MONOTONIC, &start);
-	vm_mem_region_set_flags(vm, PERF_TEST_MEM_SLOT_INDEX,
-				KVM_MEM_LOG_DIRTY_PAGES);
+	enable_dirty_logging(vm, p->slots);
 	ts_diff = timespec_elapsed(start);
 	pr_info("Enabling dirty logging time: %ld.%.9lds\n\n",
 		ts_diff.tv_sec, ts_diff.tv_nsec);
@@ -190,8 +239,7 @@ static void run_test(enum vm_guest_mode mode, void *arg)
 			iteration, ts_diff.tv_sec, ts_diff.tv_nsec);
 
 		clock_gettime(CLOCK_MONOTONIC, &start);
-		kvm_vm_get_dirty_log(vm, PERF_TEST_MEM_SLOT_INDEX, bmap);
-
+		get_dirty_log(vm, p->slots, bmap, host_num_pages);
 		ts_diff = timespec_elapsed(start);
 		get_dirty_log_total = timespec_add(get_dirty_log_total,
 						   ts_diff);
@@ -200,9 +248,7 @@ static void run_test(enum vm_guest_mode mode, void *arg)
 
 		if (dirty_log_manual_caps) {
 			clock_gettime(CLOCK_MONOTONIC, &start);
-			kvm_vm_clear_dirty_log(vm, PERF_TEST_MEM_SLOT_INDEX, bmap, 0,
-					       host_num_pages);
-
+			clear_dirty_log(vm, p->slots, bmap, host_num_pages);
 			ts_diff = timespec_elapsed(start);
 			clear_dirty_log_total = timespec_add(clear_dirty_log_total,
 							     ts_diff);
@@ -213,7 +259,7 @@ static void run_test(enum vm_guest_mode mode, void *arg)
 
 	/* Disable dirty logging */
 	clock_gettime(CLOCK_MONOTONIC, &start);
-	vm_mem_region_set_flags(vm, PERF_TEST_MEM_SLOT_INDEX, 0);
+	disable_dirty_logging(vm, p->slots);
 	ts_diff = timespec_elapsed(start);
 	pr_info("Disabling dirty logging time: %ld.%.9lds\n",
 		ts_diff.tv_sec, ts_diff.tv_nsec);
@@ -244,7 +290,8 @@ static void help(char *name)
 {
 	puts("");
 	printf("usage: %s [-h] [-i iterations] [-p offset] "
-	       "[-m mode] [-b vcpu bytes] [-v vcpus] [-o] [-s mem type]\n", name);
+	       "[-m mode] [-b vcpu bytes] [-v vcpus] [-o] [-s mem type]"
+	       "[-x memslots]\n", name);
 	puts("");
 	printf(" -i: specify iteration counts (default: %"PRIu64")\n",
 	       TEST_HOST_LOOP_N);
@@ -263,6 +310,8 @@ static void help(char *name)
 	       "     them into a separate region of memory for each vCPU.\n");
 	printf(" -s: specify the type of memory that should be used to\n"
 	       "     back the guest data region.\n\n");
+	printf(" -x: Split the memory region into this number of memslots.\n"
+	       "     (default: 1)");
 	backing_src_help();
 	puts("");
 	exit(0);
@@ -276,6 +325,7 @@ int main(int argc, char *argv[])
 		.wr_fract = 1,
 		.partition_vcpu_memory_access = true,
 		.backing_src = VM_MEM_SRC_ANONYMOUS,
+		.slots = 1,
 	};
 	int opt;
 
@@ -286,7 +336,7 @@ int main(int argc, char *argv[])
 
 	guest_modes_append_default();
 
-	while ((opt = getopt(argc, argv, "hi:p:m:b:f:v:os:")) != -1) {
+	while ((opt = getopt(argc, argv, "hi:p:m:b:f:v:os:x:")) != -1) {
 		switch (opt) {
 		case 'i':
 			p.iterations = atoi(optarg);
@@ -316,6 +366,9 @@ int main(int argc, char *argv[])
 		case 's':
 			p.backing_src = parse_backing_src_type(optarg);
 			break;
+		case 'x':
+			p.slots = atoi(optarg);
+			break;
 		case 'h':
 		default:
 			help(argv[0]);
diff --git a/tools/testing/selftests/kvm/dirty_log_test.c b/tools/testing/selftests/kvm/dirty_log_test.c
index 5fe0140e407e..792c60e1b17d 100644
--- a/tools/testing/selftests/kvm/dirty_log_test.c
+++ b/tools/testing/selftests/kvm/dirty_log_test.c
@@ -749,8 +749,8 @@ static void run_test(enum vm_guest_mode mode, void *arg)
 
 	pr_info("guest physical test memory offset: 0x%lx\n", guest_test_phys_mem);
 
-	bmap = bitmap_alloc(host_num_pages);
-	host_bmap_track = bitmap_alloc(host_num_pages);
+	bmap = bitmap_zalloc(host_num_pages);
+	host_bmap_track = bitmap_zalloc(host_num_pages);
 
 	/* Add an extra memory slot for testing dirty logging */
 	vm_userspace_mem_region_add(vm, VM_MEM_SRC_ANONYMOUS,
diff --git a/tools/testing/selftests/kvm/include/aarch64/processor.h b/tools/testing/selftests/kvm/include/aarch64/processor.h
index 27dc5c2e56b9..c0273aefa63d 100644
--- a/tools/testing/selftests/kvm/include/aarch64/processor.h
+++ b/tools/testing/selftests/kvm/include/aarch64/processor.h
@@ -17,6 +17,7 @@
 #define CPACR_EL1               3, 0,  1, 0, 2
 #define TCR_EL1                 3, 0,  2, 0, 2
 #define MAIR_EL1                3, 0, 10, 2, 0
+#define MPIDR_EL1               3, 0,  0, 0, 5
 #define TTBR0_EL1               3, 0,  2, 0, 0
 #define SCTLR_EL1               3, 0,  1, 0, 0
 #define VBAR_EL1                3, 0, 12, 0, 0
@@ -40,6 +41,8 @@
 			  (0xfful << (4 * 8)) | \
 			  (0xbbul << (5 * 8)))
 
+#define MPIDR_HWID_BITMASK (0xff00fffffful)
+
 static inline void get_reg(struct kvm_vm *vm, uint32_t vcpuid, uint64_t id, uint64_t *addr)
 {
 	struct kvm_one_reg reg;
diff --git a/tools/testing/selftests/kvm/include/perf_test_util.h b/tools/testing/selftests/kvm/include/perf_test_util.h
index 005f2143adeb..df9f1a3a3ffb 100644
--- a/tools/testing/selftests/kvm/include/perf_test_util.h
+++ b/tools/testing/selftests/kvm/include/perf_test_util.h
@@ -44,7 +44,7 @@ extern struct perf_test_args perf_test_args;
 extern uint64_t guest_test_phys_mem;
 
 struct kvm_vm *perf_test_create_vm(enum vm_guest_mode mode, int vcpus,
-				   uint64_t vcpu_memory_bytes,
+				   uint64_t vcpu_memory_bytes, int slots,
 				   enum vm_mem_backing_src_type backing_src);
 void perf_test_destroy_vm(struct kvm_vm *vm);
 void perf_test_setup_vcpus(struct kvm_vm *vm, int vcpus,
diff --git a/tools/testing/selftests/kvm/kvm_binary_stats_test.c b/tools/testing/selftests/kvm/kvm_binary_stats_test.c
index 5906bbc08483..17f65d514915 100644
--- a/tools/testing/selftests/kvm/kvm_binary_stats_test.c
+++ b/tools/testing/selftests/kvm/kvm_binary_stats_test.c
@@ -109,6 +109,18 @@ static void stats_test(int stats_fd)
 		/* Check size field, which should not be zero */
 		TEST_ASSERT(pdesc->size, "KVM descriptor(%s) with size of 0",
 				pdesc->name);
+		/* Check bucket_size field */
+		switch (pdesc->flags & KVM_STATS_TYPE_MASK) {
+		case KVM_STATS_TYPE_LINEAR_HIST:
+			TEST_ASSERT(pdesc->bucket_size,
+			    "Bucket size of Linear Histogram stats (%s) is zero",
+			    pdesc->name);
+			break;
+		default:
+			TEST_ASSERT(!pdesc->bucket_size,
+			    "Bucket size of stats (%s) is not zero",
+			    pdesc->name);
+		}
 		size_data += pdesc->size * sizeof(*stats_data);
 	}
 	/* Check overlap */
diff --git a/tools/testing/selftests/kvm/lib/perf_test_util.c b/tools/testing/selftests/kvm/lib/perf_test_util.c
index b488f4aefea8..0ef80dbdc116 100644
--- a/tools/testing/selftests/kvm/lib/perf_test_util.c
+++ b/tools/testing/selftests/kvm/lib/perf_test_util.c
@@ -50,11 +50,12 @@ static void guest_code(uint32_t vcpu_id)
 }
 
 struct kvm_vm *perf_test_create_vm(enum vm_guest_mode mode, int vcpus,
-				   uint64_t vcpu_memory_bytes,
+				   uint64_t vcpu_memory_bytes, int slots,
 				   enum vm_mem_backing_src_type backing_src)
 {
 	struct kvm_vm *vm;
 	uint64_t guest_num_pages;
+	int i;
 
 	pr_info("Testing guest mode: %s\n", vm_guest_mode_string(mode));
 
@@ -68,6 +69,9 @@ struct kvm_vm *perf_test_create_vm(enum vm_guest_mode mode, int vcpus,
 		    "Guest memory size is not host page size aligned.");
 	TEST_ASSERT(vcpu_memory_bytes % perf_test_args.guest_page_size == 0,
 		    "Guest memory size is not guest page size aligned.");
+	TEST_ASSERT(guest_num_pages % slots == 0,
+		    "Guest memory cannot be evenly divided into %d slots.",
+		    slots);
 
 	vm = vm_create_with_vcpus(mode, vcpus, DEFAULT_GUEST_PHY_PAGES,
 				  (vcpus * vcpu_memory_bytes) / perf_test_args.guest_page_size,
@@ -95,10 +99,16 @@ struct kvm_vm *perf_test_create_vm(enum vm_guest_mode mode, int vcpus,
 #endif
 	pr_info("guest physical test memory offset: 0x%lx\n", guest_test_phys_mem);
 
-	/* Add an extra memory slot for testing */
-	vm_userspace_mem_region_add(vm, backing_src, guest_test_phys_mem,
-				    PERF_TEST_MEM_SLOT_INDEX,
-				    guest_num_pages, 0);
+	/* Add extra memory slots for testing */
+	for (i = 0; i < slots; i++) {
+		uint64_t region_pages = guest_num_pages / slots;
+		vm_paddr_t region_start = guest_test_phys_mem +
+			region_pages * perf_test_args.guest_page_size * i;
+
+		vm_userspace_mem_region_add(vm, backing_src, region_start,
+					    PERF_TEST_MEM_SLOT_INDEX + i,
+					    region_pages, 0);
+	}
 
 	/* Do mapping for the demand paging memory slot */
 	virt_map(vm, guest_test_virt_mem, guest_test_phys_mem, guest_num_pages);
@@ -140,6 +150,8 @@ void perf_test_setup_vcpus(struct kvm_vm *vm, int vcpus,
 			vcpu_gpa = guest_test_phys_mem;
 		}
 
+		vcpu_args_set(vm, vcpu_id, 1, vcpu_id);
+
 		pr_debug("Added VCPU %d with test mem gpa [%lx, %lx)\n",
 			 vcpu_id, vcpu_gpa, vcpu_gpa +
 			 (vcpu_args->pages * perf_test_args.guest_page_size));
diff --git a/tools/testing/selftests/kvm/memslot_modification_stress_test.c b/tools/testing/selftests/kvm/memslot_modification_stress_test.c
index 98351ba0933c..4cfcafea9f5a 100644
--- a/tools/testing/selftests/kvm/memslot_modification_stress_test.c
+++ b/tools/testing/selftests/kvm/memslot_modification_stress_test.c
@@ -45,7 +45,6 @@ static void *vcpu_worker(void *data)
 	struct kvm_vm *vm = perf_test_args.vm;
 	struct kvm_run *run;
 
-	vcpu_args_set(vm, vcpu_id, 1, vcpu_id);
 	run = vcpu_state(vm, vcpu_id);
 
 	/* Let the guest access its memory until a stop signal is received */
@@ -105,7 +104,7 @@ static void run_test(enum vm_guest_mode mode, void *arg)
 	struct kvm_vm *vm;
 	int vcpu_id;
 
-	vm = perf_test_create_vm(mode, nr_vcpus, guest_percpu_mem_size,
+	vm = perf_test_create_vm(mode, nr_vcpus, guest_percpu_mem_size, 1,
 				 VM_MEM_SRC_ANONYMOUS);
 
 	perf_test_args.wr_fract = 1;
diff --git a/tools/testing/selftests/kvm/x86_64/debug_regs.c b/tools/testing/selftests/kvm/x86_64/debug_regs.c
index 6097a8283377..5f078db1bcba 100644
--- a/tools/testing/selftests/kvm/x86_64/debug_regs.c
+++ b/tools/testing/selftests/kvm/x86_64/debug_regs.c
@@ -8,12 +8,15 @@
 #include <string.h>
 #include "kvm_util.h"
 #include "processor.h"
+#include "apic.h"
 
 #define VCPU_ID 0
 
 #define DR6_BD		(1 << 13)
 #define DR7_GD		(1 << 13)
 
+#define IRQ_VECTOR 0xAA
+
 /* For testing data access debug BP */
 uint32_t guest_value;
 
@@ -21,6 +24,11 @@ extern unsigned char sw_bp, hw_bp, write_data, ss_start, bd_start;
 
 static void guest_code(void)
 {
+	/* Create a pending interrupt on current vCPU */
+	x2apic_enable();
+	x2apic_write_reg(APIC_ICR, APIC_DEST_SELF | APIC_INT_ASSERT |
+			 APIC_DM_FIXED | IRQ_VECTOR);
+
 	/*
 	 * Software BP tests.
 	 *
@@ -38,12 +46,19 @@ static void guest_code(void)
 		     "mov %%rax,%0;\n\t write_data:"
 		     : "=m" (guest_value) : : "rax");
 
-	/* Single step test, covers 2 basic instructions and 2 emulated */
+	/*
+	 * Single step test, covers 2 basic instructions and 2 emulated
+	 *
+	 * Enable interrupts during the single stepping to see that
+	 * pending interrupt we raised is not handled due to KVM_GUESTDBG_BLOCKIRQ
+	 */
 	asm volatile("ss_start: "
+		     "sti\n\t"
 		     "xor %%eax,%%eax\n\t"
 		     "cpuid\n\t"
 		     "movl $0x1a0,%%ecx\n\t"
 		     "rdmsr\n\t"
+		     "cli\n\t"
 		     : : : "eax", "ebx", "ecx", "edx");
 
 	/* DR6.BD test */
@@ -72,11 +87,13 @@ int main(void)
 	uint64_t cmd;
 	int i;
 	/* Instruction lengths starting at ss_start */
-	int ss_size[4] = {
+	int ss_size[6] = {
+		1,		/* sti*/
 		2,		/* xor */
 		2,		/* cpuid */
 		5,		/* mov */
 		2,		/* rdmsr */
+		1,		/* cli */
 	};
 
 	if (!kvm_check_cap(KVM_CAP_SET_GUEST_DEBUG)) {
@@ -154,7 +171,8 @@ int main(void)
 	for (i = 0; i < (sizeof(ss_size) / sizeof(ss_size[0])); i++) {
 		target_rip += ss_size[i];
 		CLEAR_DEBUG();
-		debug.control = KVM_GUESTDBG_ENABLE | KVM_GUESTDBG_SINGLESTEP;
+		debug.control = KVM_GUESTDBG_ENABLE | KVM_GUESTDBG_SINGLESTEP |
+				KVM_GUESTDBG_BLOCKIRQ;
 		debug.arch.debugreg[7] = 0x00000400;
 		APPLY_DEBUG();
 		vcpu_run(vm, VCPU_ID);
diff --git a/tools/testing/selftests/kvm/x86_64/vmx_dirty_log_test.c b/tools/testing/selftests/kvm/x86_64/vmx_dirty_log_test.c
index 06a64980a5d2..68f26a8b4f42 100644
--- a/tools/testing/selftests/kvm/x86_64/vmx_dirty_log_test.c
+++ b/tools/testing/selftests/kvm/x86_64/vmx_dirty_log_test.c
@@ -111,7 +111,7 @@ int main(int argc, char *argv[])
 	nested_map(vmx, vm, NESTED_TEST_MEM1, GUEST_TEST_MEM, 4096);
 	nested_map(vmx, vm, NESTED_TEST_MEM2, GUEST_TEST_MEM, 4096);
 
-	bmap = bitmap_alloc(TEST_MEM_PAGES);
+	bmap = bitmap_zalloc(TEST_MEM_PAGES);
 	host_test_mem = addr_gpa2hva(vm, GUEST_TEST_MEM);
 
 	while (!done) {
diff --git a/tools/testing/selftests/memfd/memfd_test.c b/tools/testing/selftests/memfd/memfd_test.c
index 74baab83fec3..192a2899bae8 100644
--- a/tools/testing/selftests/memfd/memfd_test.c
+++ b/tools/testing/selftests/memfd/memfd_test.c
@@ -56,7 +56,7 @@ static int mfd_assert_new(const char *name, loff_t sz, unsigned int flags)
 
 static int mfd_assert_reopen_fd(int fd_in)
 {
-	int r, fd;
+	int fd;
 	char path[100];
 
 	sprintf(path, "/proc/self/fd/%d", fd_in);
diff --git a/tools/testing/selftests/net/Makefile b/tools/testing/selftests/net/Makefile
index 378c0aac5a1a..492b273743b4 100644
--- a/tools/testing/selftests/net/Makefile
+++ b/tools/testing/selftests/net/Makefile
@@ -27,6 +27,7 @@ TEST_PROGS += udpgro_fwd.sh
 TEST_PROGS += veth.sh
 TEST_PROGS += ioam6.sh
 TEST_PROGS += gro.sh
+TEST_PROGS += gre_gso.sh
 TEST_PROGS_EXTENDED := in_netns.sh
 TEST_GEN_FILES =  socket nettest
 TEST_GEN_FILES += psock_fanout psock_tpacket msg_zerocopy reuseport_addr_any
diff --git a/tools/testing/selftests/net/gre_gso.sh b/tools/testing/selftests/net/gre_gso.sh
new file mode 100755
index 000000000000..facbb0c80443
--- /dev/null
+++ b/tools/testing/selftests/net/gre_gso.sh
@@ -0,0 +1,236 @@
+#!/bin/bash
+# SPDX-License-Identifier: GPL-2.0
+
+# This test is for checking GRE GSO.
+
+ret=0
+# Kselftest framework requirement - SKIP code is 4.
+ksft_skip=4
+
+# all tests in this script. Can be overridden with -t option
+TESTS="gre_gso"
+
+VERBOSE=0
+PAUSE_ON_FAIL=no
+PAUSE=no
+IP="ip -netns ns1"
+NS_EXEC="ip netns exec ns1"
+TMPFILE=`mktemp`
+PID=
+
+log_test()
+{
+	local rc=$1
+	local expected=$2
+	local msg="$3"
+
+	if [ ${rc} -eq ${expected} ]; then
+		printf "    TEST: %-60s  [ OK ]\n" "${msg}"
+		nsuccess=$((nsuccess+1))
+	else
+		ret=1
+		nfail=$((nfail+1))
+		printf "    TEST: %-60s  [FAIL]\n" "${msg}"
+		if [ "${PAUSE_ON_FAIL}" = "yes" ]; then
+		echo
+			echo "hit enter to continue, 'q' to quit"
+			read a
+			[ "$a" = "q" ] && exit 1
+		fi
+	fi
+
+	if [ "${PAUSE}" = "yes" ]; then
+		echo
+		echo "hit enter to continue, 'q' to quit"
+		read a
+		[ "$a" = "q" ] && exit 1
+	fi
+}
+
+setup()
+{
+	set -e
+	ip netns add ns1
+	ip netns set ns1 auto
+	$IP link set dev lo up
+
+	ip link add veth0 type veth peer name veth1
+	ip link set veth0 up
+	ip link set veth1 netns ns1
+	$IP link set veth1 name veth0
+	$IP link set veth0 up
+
+	dd if=/dev/urandom of=$TMPFILE bs=1024 count=2048 &>/dev/null
+	set +e
+}
+
+cleanup()
+{
+	rm -rf $TMPFILE
+	[ -n "$PID" ] && kill $PID
+	ip link del dev gre1 &> /dev/null
+	ip link del dev veth0 &> /dev/null
+	ip netns del ns1
+}
+
+get_linklocal()
+{
+	local dev=$1
+	local ns=$2
+	local addr
+
+	[ -n "$ns" ] && ns="-netns $ns"
+
+	addr=$(ip -6 -br $ns addr show dev ${dev} | \
+	awk '{
+		for (i = 3; i <= NF; ++i) {
+			if ($i ~ /^fe80/)
+				print $i
+		}
+	}'
+	)
+	addr=${addr/\/*}
+
+	[ -z "$addr" ] && return 1
+
+	echo $addr
+
+	return 0
+}
+
+gre_create_tun()
+{
+	local a1=$1
+	local a2=$2
+	local mode
+
+	[[ $a1 =~ ^[0-9.]*$ ]] && mode=gre || mode=ip6gre
+
+	ip tunnel add gre1 mode $mode local $a1 remote $a2 dev veth0
+	ip link set gre1 up
+	$IP tunnel add gre1 mode $mode local $a2 remote $a1 dev veth0
+	$IP link set gre1 up
+}
+
+gre_gst_test_checks()
+{
+	local name=$1
+	local addr=$2
+
+	$NS_EXEC nc -kl $port >/dev/null &
+	PID=$!
+	while ! $NS_EXEC ss -ltn | grep -q $port; do ((i++)); sleep 0.01; done
+
+	cat $TMPFILE | timeout 1 nc $addr $port
+	log_test $? 0 "$name - copy file w/ TSO"
+
+	ethtool -K veth0 tso off
+
+	cat $TMPFILE | timeout 1 nc $addr $port
+	log_test $? 0 "$name - copy file w/ GSO"
+
+	ethtool -K veth0 tso on
+
+	kill $PID
+	PID=
+}
+
+gre6_gso_test()
+{
+	local port=7777
+
+	setup
+
+	a1=$(get_linklocal veth0)
+	a2=$(get_linklocal veth0 ns1)
+
+	gre_create_tun $a1 $a2
+
+	ip  addr add 172.16.2.1/24 dev gre1
+	$IP addr add 172.16.2.2/24 dev gre1
+
+	ip  -6 addr add 2001:db8:1::1/64 dev gre1 nodad
+	$IP -6 addr add 2001:db8:1::2/64 dev gre1 nodad
+
+	sleep 2
+
+	gre_gst_test_checks GREv6/v4 172.16.2.2
+	gre_gst_test_checks GREv6/v6 2001:db8:1::2
+
+	cleanup
+}
+
+gre_gso_test()
+{
+	gre6_gso_test
+}
+
+################################################################################
+# usage
+
+usage()
+{
+	cat <<EOF
+usage: ${0##*/} OPTS
+
+        -t <test>   Test(s) to run (default: all)
+                    (options: $TESTS)
+        -p          Pause on fail
+        -P          Pause after each test before cleanup
+        -v          verbose mode (show commands and output)
+EOF
+}
+
+################################################################################
+# main
+
+while getopts :t:pPhv o
+do
+	case $o in
+		t) TESTS=$OPTARG;;
+		p) PAUSE_ON_FAIL=yes;;
+		P) PAUSE=yes;;
+		v) VERBOSE=$(($VERBOSE + 1));;
+		h) usage; exit 0;;
+		*) usage; exit 1;;
+	esac
+done
+
+PEER_CMD="ip netns exec ${PEER_NS}"
+
+# make sure we don't pause twice
+[ "${PAUSE}" = "yes" ] && PAUSE_ON_FAIL=no
+
+if [ "$(id -u)" -ne 0 ];then
+	echo "SKIP: Need root privileges"
+	exit $ksft_skip;
+fi
+
+if [ ! -x "$(command -v ip)" ]; then
+	echo "SKIP: Could not run test without ip tool"
+	exit $ksft_skip
+fi
+
+if [ ! -x "$(command -v nc)" ]; then
+	echo "SKIP: Could not run test without nc tool"
+	exit $ksft_skip
+fi
+
+# start clean
+cleanup &> /dev/null
+
+for t in $TESTS
+do
+	case $t in
+	gre_gso)		gre_gso_test;;
+
+	help) echo "Test names: $TESTS"; exit 0;;
+	esac
+done
+
+if [ "$TESTS" != "none" ]; then
+	printf "\nTests passed: %3d\n" ${nsuccess}
+	printf "Tests failed: %3d\n"   ${nfail}
+fi
+
+exit $ret
diff --git a/tools/testing/selftests/net/mptcp/simult_flows.sh b/tools/testing/selftests/net/mptcp/simult_flows.sh
index fd63ebfe9a2b..910d8126af8f 100755
--- a/tools/testing/selftests/net/mptcp/simult_flows.sh
+++ b/tools/testing/selftests/net/mptcp/simult_flows.sh
@@ -22,8 +22,8 @@ usage() {
 
 cleanup()
 {
-	rm -f "$cin" "$cout"
-	rm -f "$sin" "$sout"
+	rm -f "$cout" "$sout"
+	rm -f "$large" "$small"
 	rm -f "$capout"
 
 	local netns
diff --git a/tools/testing/selftests/openat2/openat2_test.c b/tools/testing/selftests/openat2/openat2_test.c
index d7ec1e7da0d0..1bddbe934204 100644
--- a/tools/testing/selftests/openat2/openat2_test.c
+++ b/tools/testing/selftests/openat2/openat2_test.c
@@ -22,7 +22,11 @@
  * XXX: This is wrong on {mips, parisc, powerpc, sparc}.
  */
 #undef	O_LARGEFILE
+#ifdef __aarch64__
+#define	O_LARGEFILE 0x20000
+#else
 #define	O_LARGEFILE 0x8000
+#endif
 
 struct open_how_ext {
 	struct open_how inner;
diff --git a/tools/testing/selftests/powerpc/primitives/asm/extable.h b/tools/testing/selftests/powerpc/primitives/asm/extable.h
new file mode 120000
index 000000000000..6385f059a951
--- /dev/null
+++ b/tools/testing/selftests/powerpc/primitives/asm/extable.h
@@ -0,0 +1 @@
+../../../../../../arch/powerpc/include/asm/extable.h
+\ No newline at end of file
diff --git a/tools/testing/selftests/powerpc/ptrace/ptrace-tm-gpr.c b/tools/testing/selftests/powerpc/ptrace/ptrace-tm-gpr.c
index 82f7bdc2e5e6..67ca297c5cca 100644
--- a/tools/testing/selftests/powerpc/ptrace/ptrace-tm-gpr.c
+++ b/tools/testing/selftests/powerpc/ptrace/ptrace-tm-gpr.c
@@ -57,7 +57,7 @@ trans:
 		: [gpr_1]"i"(GPR_1), [gpr_2]"i"(GPR_2),
 		[sprn_texasr] "i" (SPRN_TEXASR), [flt_1] "b" (&a),
 		[flt_2] "b" (&b), [cptr1] "b" (&cptr[1])
-		: "memory", "r7", "r8", "r9", "r10",
+		: "memory", "r0", "r7", "r8", "r9", "r10",
 		"r11", "r12", "r13", "r14", "r15", "r16",
 		"r17", "r18", "r19", "r20", "r21", "r22",
 		"r23", "r24", "r25", "r26", "r27", "r28",
@@ -113,6 +113,7 @@ int ptrace_tm_gpr(void)
 	int ret, status;
 
 	SKIP_IF(!have_htm());
+	SKIP_IF(htm_is_synthetic());
 	shm_id = shmget(IPC_PRIVATE, sizeof(int) * 2, 0777|IPC_CREAT);
 	pid = fork();
 	if (pid < 0) {
diff --git a/tools/testing/selftests/powerpc/ptrace/ptrace-tm-spd-gpr.c b/tools/testing/selftests/powerpc/ptrace/ptrace-tm-spd-gpr.c
index ad65be6e8e85..6f2bce1b6c5d 100644
--- a/tools/testing/selftests/powerpc/ptrace/ptrace-tm-spd-gpr.c
+++ b/tools/testing/selftests/powerpc/ptrace/ptrace-tm-spd-gpr.c
@@ -65,7 +65,7 @@ trans:
 		: [gpr_1]"i"(GPR_1), [gpr_2]"i"(GPR_2), [gpr_4]"i"(GPR_4),
 		[sprn_texasr] "i" (SPRN_TEXASR), [flt_1] "b" (&a),
 		[flt_4] "b" (&d)
-		: "memory", "r5", "r6", "r7",
+		: "memory", "r0", "r5", "r6", "r7",
 		"r8", "r9", "r10", "r11", "r12", "r13", "r14", "r15",
 		"r16", "r17", "r18", "r19", "r20", "r21", "r22", "r23",
 		"r24", "r25", "r26", "r27", "r28", "r29", "r30", "r31"
@@ -119,6 +119,7 @@ int ptrace_tm_spd_gpr(void)
 	int ret, status;
 
 	SKIP_IF(!have_htm());
+	SKIP_IF(htm_is_synthetic());
 	shm_id = shmget(IPC_PRIVATE, sizeof(int) * 3, 0777|IPC_CREAT);
 	pid = fork();
 	if (pid < 0) {
diff --git a/tools/testing/selftests/powerpc/ptrace/ptrace-tm-spd-tar.c b/tools/testing/selftests/powerpc/ptrace/ptrace-tm-spd-tar.c
index 2ecfa1158e2b..e112a34fbe59 100644
--- a/tools/testing/selftests/powerpc/ptrace/ptrace-tm-spd-tar.c
+++ b/tools/testing/selftests/powerpc/ptrace/ptrace-tm-spd-tar.c
@@ -129,6 +129,7 @@ int ptrace_tm_spd_tar(void)
 	int ret, status;
 
 	SKIP_IF(!have_htm());
+	SKIP_IF(htm_is_synthetic());
 	shm_id = shmget(IPC_PRIVATE, sizeof(int) * 3, 0777|IPC_CREAT);
 	pid = fork();
 	if (pid == 0)
diff --git a/tools/testing/selftests/powerpc/ptrace/ptrace-tm-spd-vsx.c b/tools/testing/selftests/powerpc/ptrace/ptrace-tm-spd-vsx.c
index 6f7fb51f0809..40133d49fe39 100644
--- a/tools/testing/selftests/powerpc/ptrace/ptrace-tm-spd-vsx.c
+++ b/tools/testing/selftests/powerpc/ptrace/ptrace-tm-spd-vsx.c
@@ -129,6 +129,7 @@ int ptrace_tm_spd_vsx(void)
 	int ret, status, i;
 
 	SKIP_IF(!have_htm());
+	SKIP_IF(htm_is_synthetic());
 	shm_id = shmget(IPC_PRIVATE, sizeof(int) * 3, 0777|IPC_CREAT);
 
 	for (i = 0; i < 128; i++) {
diff --git a/tools/testing/selftests/powerpc/ptrace/ptrace-tm-spr.c b/tools/testing/selftests/powerpc/ptrace/ptrace-tm-spr.c
index 068bfed2e606..880ba6a29a48 100644
--- a/tools/testing/selftests/powerpc/ptrace/ptrace-tm-spr.c
+++ b/tools/testing/selftests/powerpc/ptrace/ptrace-tm-spr.c
@@ -114,6 +114,7 @@ int ptrace_tm_spr(void)
 	int ret, status;
 
 	SKIP_IF(!have_htm());
+	SKIP_IF(htm_is_synthetic());
 	shm_id = shmget(IPC_PRIVATE, sizeof(struct shared), 0777|IPC_CREAT);
 	shm_id1 = shmget(IPC_PRIVATE, sizeof(int), 0777|IPC_CREAT);
 	pid = fork();
diff --git a/tools/testing/selftests/powerpc/ptrace/ptrace-tm-tar.c b/tools/testing/selftests/powerpc/ptrace/ptrace-tm-tar.c
index 46ef378a15ec..d0db6df0f0ea 100644
--- a/tools/testing/selftests/powerpc/ptrace/ptrace-tm-tar.c
+++ b/tools/testing/selftests/powerpc/ptrace/ptrace-tm-tar.c
@@ -117,6 +117,7 @@ int ptrace_tm_tar(void)
 	int ret, status;
 
 	SKIP_IF(!have_htm());
+	SKIP_IF(htm_is_synthetic());
 	shm_id = shmget(IPC_PRIVATE, sizeof(int) * 2, 0777|IPC_CREAT);
 	pid = fork();
 	if (pid == 0)
diff --git a/tools/testing/selftests/powerpc/ptrace/ptrace-tm-vsx.c b/tools/testing/selftests/powerpc/ptrace/ptrace-tm-vsx.c
index 70ca01234f79..4f05ce4fd282 100644
--- a/tools/testing/selftests/powerpc/ptrace/ptrace-tm-vsx.c
+++ b/tools/testing/selftests/powerpc/ptrace/ptrace-tm-vsx.c
@@ -113,6 +113,7 @@ int ptrace_tm_vsx(void)
 	int ret, status, i;
 
 	SKIP_IF(!have_htm());
+	SKIP_IF(htm_is_synthetic());
 	shm_id = shmget(IPC_PRIVATE, sizeof(int) * 2, 0777|IPC_CREAT);
 
 	for (i = 0; i < 128; i++) {
diff --git a/tools/testing/selftests/powerpc/signal/signal_tm.c b/tools/testing/selftests/powerpc/signal/signal_tm.c
index 5bf2224ef7f2..c9cf66a3daa2 100644
--- a/tools/testing/selftests/powerpc/signal/signal_tm.c
+++ b/tools/testing/selftests/powerpc/signal/signal_tm.c
@@ -56,6 +56,7 @@ static int test_signal_tm()
 	}
 
 	SKIP_IF(!have_htm());
+	SKIP_IF(htm_is_synthetic());
 
 	for (i = 0; i < MAX_ATTEMPT; i++) {
 		/*
diff --git a/tools/testing/selftests/powerpc/tm/tm-exec.c b/tools/testing/selftests/powerpc/tm/tm-exec.c
index 260cfdb97d23..c59919d6710d 100644
--- a/tools/testing/selftests/powerpc/tm/tm-exec.c
+++ b/tools/testing/selftests/powerpc/tm/tm-exec.c
@@ -27,6 +27,7 @@ static char *path;
 static int test_exec(void)
 {
 	SKIP_IF(!have_htm());
+	SKIP_IF(htm_is_synthetic());
 
 	asm __volatile__(
 		"tbegin.;"
diff --git a/tools/testing/selftests/powerpc/tm/tm-fork.c b/tools/testing/selftests/powerpc/tm/tm-fork.c
index 6efa5a685a77..c27b935f0e9f 100644
--- a/tools/testing/selftests/powerpc/tm/tm-fork.c
+++ b/tools/testing/selftests/powerpc/tm/tm-fork.c
@@ -21,6 +21,7 @@
 int test_fork(void)
 {
 	SKIP_IF(!have_htm());
+	SKIP_IF(htm_is_synthetic());
 
 	asm __volatile__(
 		"tbegin.;"
diff --git a/tools/testing/selftests/powerpc/tm/tm-poison.c b/tools/testing/selftests/powerpc/tm/tm-poison.c
index 29e5f26af7b9..a7bbf034b5bb 100644
--- a/tools/testing/selftests/powerpc/tm/tm-poison.c
+++ b/tools/testing/selftests/powerpc/tm/tm-poison.c
@@ -20,7 +20,6 @@
 #include <sched.h>
 #include <sys/types.h>
 #include <signal.h>
-#include <inttypes.h>
 
 #include "tm.h"
 
@@ -34,6 +33,7 @@ int tm_poison_test(void)
 	bool fail_vr = false;
 
 	SKIP_IF(!have_htm());
+	SKIP_IF(htm_is_synthetic());
 
 	cpu = pick_online_cpu();
 	FAIL_IF(cpu < 0);
diff --git a/tools/testing/selftests/powerpc/tm/tm-resched-dscr.c b/tools/testing/selftests/powerpc/tm/tm-resched-dscr.c
index 4cdb83964bb3..85c940ae6ff8 100644
--- a/tools/testing/selftests/powerpc/tm/tm-resched-dscr.c
+++ b/tools/testing/selftests/powerpc/tm/tm-resched-dscr.c
@@ -40,6 +40,7 @@ int test_body(void)
 	uint64_t rv, dscr1 = 1, dscr2, texasr;
 
 	SKIP_IF(!have_htm());
+	SKIP_IF(htm_is_synthetic());
 
 	printf("Check DSCR TM context switch: ");
 	fflush(stdout);
diff --git a/tools/testing/selftests/powerpc/tm/tm-signal-context-chk-fpu.c b/tools/testing/selftests/powerpc/tm/tm-signal-context-chk-fpu.c
index 254f912ad611..657d755b2905 100644
--- a/tools/testing/selftests/powerpc/tm/tm-signal-context-chk-fpu.c
+++ b/tools/testing/selftests/powerpc/tm/tm-signal-context-chk-fpu.c
@@ -79,6 +79,7 @@ static int tm_signal_context_chk_fpu()
 	pid_t pid = getpid();
 
 	SKIP_IF(!have_htm());
+	SKIP_IF(htm_is_synthetic());
 
 	act.sa_sigaction = signal_usr1;
 	sigemptyset(&act.sa_mask);
diff --git a/tools/testing/selftests/powerpc/tm/tm-signal-context-chk-gpr.c b/tools/testing/selftests/powerpc/tm/tm-signal-context-chk-gpr.c
index 0cc680f61828..400fa70ca71e 100644
--- a/tools/testing/selftests/powerpc/tm/tm-signal-context-chk-gpr.c
+++ b/tools/testing/selftests/powerpc/tm/tm-signal-context-chk-gpr.c
@@ -81,6 +81,7 @@ static int tm_signal_context_chk_gpr()
 	pid_t pid = getpid();
 
 	SKIP_IF(!have_htm());
+	SKIP_IF(htm_is_synthetic());
 
 	act.sa_sigaction = signal_usr1;
 	sigemptyset(&act.sa_mask);
diff --git a/tools/testing/selftests/powerpc/tm/tm-signal-context-chk-vmx.c b/tools/testing/selftests/powerpc/tm/tm-signal-context-chk-vmx.c
index b6d52730a0d8..d628fd302b28 100644
--- a/tools/testing/selftests/powerpc/tm/tm-signal-context-chk-vmx.c
+++ b/tools/testing/selftests/powerpc/tm/tm-signal-context-chk-vmx.c
@@ -104,6 +104,7 @@ static int tm_signal_context_chk()
 	pid_t pid = getpid();
 
 	SKIP_IF(!have_htm());
+	SKIP_IF(htm_is_synthetic());
 
 	act.sa_sigaction = signal_usr1;
 	sigemptyset(&act.sa_mask);
diff --git a/tools/testing/selftests/powerpc/tm/tm-signal-context-chk-vsx.c b/tools/testing/selftests/powerpc/tm/tm-signal-context-chk-vsx.c
index 8e25e2072ecd..9bd869245bad 100644
--- a/tools/testing/selftests/powerpc/tm/tm-signal-context-chk-vsx.c
+++ b/tools/testing/selftests/powerpc/tm/tm-signal-context-chk-vsx.c
@@ -153,6 +153,7 @@ static int tm_signal_context_chk()
 	pid_t pid = getpid();
 
 	SKIP_IF(!have_htm());
+	SKIP_IF(htm_is_synthetic());
 
 	act.sa_sigaction = signal_usr1;
 	sigemptyset(&act.sa_mask);
diff --git a/tools/testing/selftests/powerpc/tm/tm-signal-pagefault.c b/tools/testing/selftests/powerpc/tm/tm-signal-pagefault.c
index 5908bc6abe60..0b84c9208d62 100644
--- a/tools/testing/selftests/powerpc/tm/tm-signal-pagefault.c
+++ b/tools/testing/selftests/powerpc/tm/tm-signal-pagefault.c
@@ -226,6 +226,7 @@ int tm_signal_pagefault(void)
 	stack_t ss;
 
 	SKIP_IF(!have_htm());
+	SKIP_IF(htm_is_synthetic());
 	SKIP_IF(!have_userfaultfd());
 
 	setup_uf_mem();
diff --git a/tools/testing/selftests/powerpc/tm/tm-signal-sigreturn-nt.c b/tools/testing/selftests/powerpc/tm/tm-signal-sigreturn-nt.c
index 07c388147b75..06b801906f27 100644
--- a/tools/testing/selftests/powerpc/tm/tm-signal-sigreturn-nt.c
+++ b/tools/testing/selftests/powerpc/tm/tm-signal-sigreturn-nt.c
@@ -32,6 +32,7 @@ int tm_signal_sigreturn_nt(void)
 	struct sigaction trap_sa;
 
 	SKIP_IF(!have_htm());
+	SKIP_IF(htm_is_synthetic());
 
 	trap_sa.sa_flags = SA_SIGINFO;
 	trap_sa.sa_sigaction = trap_signal_handler;
diff --git a/tools/testing/selftests/powerpc/tm/tm-signal-stack.c b/tools/testing/selftests/powerpc/tm/tm-signal-stack.c
index cdcf8c5bbbc7..68807aac8dd3 100644
--- a/tools/testing/selftests/powerpc/tm/tm-signal-stack.c
+++ b/tools/testing/selftests/powerpc/tm/tm-signal-stack.c
@@ -35,6 +35,7 @@ int tm_signal_stack()
 	int pid;
 
 	SKIP_IF(!have_htm());
+	SKIP_IF(htm_is_synthetic());
 
 	pid = fork();
 	if (pid < 0)
diff --git a/tools/testing/selftests/powerpc/tm/tm-sigreturn.c b/tools/testing/selftests/powerpc/tm/tm-sigreturn.c
index 9a6017a1d769..ffe4e5515f33 100644
--- a/tools/testing/selftests/powerpc/tm/tm-sigreturn.c
+++ b/tools/testing/selftests/powerpc/tm/tm-sigreturn.c
@@ -55,6 +55,7 @@ int tm_sigreturn(void)
 	uint64_t ret = 0;
 
 	SKIP_IF(!have_htm());
+	SKIP_IF(htm_is_synthetic());
 	SKIP_IF(!is_ppc64le());
 
 	memset(&sa, 0, sizeof(sa));
diff --git a/tools/testing/selftests/powerpc/tm/tm-syscall.c b/tools/testing/selftests/powerpc/tm/tm-syscall.c
index becb8207b432..467a6b3134b2 100644
--- a/tools/testing/selftests/powerpc/tm/tm-syscall.c
+++ b/tools/testing/selftests/powerpc/tm/tm-syscall.c
@@ -25,7 +25,6 @@ extern int getppid_tm_suspended(void);
 unsigned retries = 0;
 
 #define TEST_DURATION 10 /* seconds */
-#define TM_RETRIES 100
 
 pid_t getppid_tm(bool suspend)
 {
@@ -67,6 +66,7 @@ int tm_syscall(void)
 	struct timeval end, now;
 
 	SKIP_IF(!have_htm_nosc());
+	SKIP_IF(htm_is_synthetic());
 
 	setbuf(stdout, NULL);
 
diff --git a/tools/testing/selftests/powerpc/tm/tm-tar.c b/tools/testing/selftests/powerpc/tm/tm-tar.c
index 03be8c47292b..f2a9137f3c1e 100644
--- a/tools/testing/selftests/powerpc/tm/tm-tar.c
+++ b/tools/testing/selftests/powerpc/tm/tm-tar.c
@@ -26,6 +26,7 @@ int test_tar(void)
 	int i;
 
 	SKIP_IF(!have_htm());
+	SKIP_IF(htm_is_synthetic());
 	SKIP_IF(!is_ppc64le());
 
 	for (i = 0; i < num_loops; i++)
diff --git a/tools/testing/selftests/powerpc/tm/tm-tmspr.c b/tools/testing/selftests/powerpc/tm/tm-tmspr.c
index 794d574db784..dd5ddffa28b7 100644
--- a/tools/testing/selftests/powerpc/tm/tm-tmspr.c
+++ b/tools/testing/selftests/powerpc/tm/tm-tmspr.c
@@ -96,6 +96,7 @@ int test_tmspr()
 	unsigned long	i;
 
 	SKIP_IF(!have_htm());
+	SKIP_IF(htm_is_synthetic());
 
 	/* To cause some context switching */
 	thread_num = 10 * sysconf(_SC_NPROCESSORS_ONLN);
diff --git a/tools/testing/selftests/powerpc/tm/tm-trap.c b/tools/testing/selftests/powerpc/tm/tm-trap.c
index 11521077f915..97cb74768e30 100644
--- a/tools/testing/selftests/powerpc/tm/tm-trap.c
+++ b/tools/testing/selftests/powerpc/tm/tm-trap.c
@@ -255,6 +255,7 @@ int tm_trap_test(void)
 	struct sigaction trap_sa;
 
 	SKIP_IF(!have_htm());
+	SKIP_IF(htm_is_synthetic());
 
 	trap_sa.sa_flags = SA_SIGINFO;
 	trap_sa.sa_sigaction = trap_signal_handler;
diff --git a/tools/testing/selftests/powerpc/tm/tm-unavailable.c b/tools/testing/selftests/powerpc/tm/tm-unavailable.c
index a1348a5f721a..6bf1b65b020d 100644
--- a/tools/testing/selftests/powerpc/tm/tm-unavailable.c
+++ b/tools/testing/selftests/powerpc/tm/tm-unavailable.c
@@ -344,6 +344,7 @@ int tm_unavailable_test(void)
 	cpu_set_t cpuset;
 
 	SKIP_IF(!have_htm());
+	SKIP_IF(htm_is_synthetic());
 
 	cpu = pick_online_cpu();
 	FAIL_IF(cpu < 0);
diff --git a/tools/testing/selftests/powerpc/tm/tm-vmx-unavail.c b/tools/testing/selftests/powerpc/tm/tm-vmx-unavail.c
index 9ef37a9836ac..34364ed2b6b7 100644
--- a/tools/testing/selftests/powerpc/tm/tm-vmx-unavail.c
+++ b/tools/testing/selftests/powerpc/tm/tm-vmx-unavail.c
@@ -91,6 +91,7 @@ int tm_vmx_unavail_test()
 	pthread_t *thread;
 
 	SKIP_IF(!have_htm());
+	SKIP_IF(htm_is_synthetic());
 
 	passed = 1;
 
diff --git a/tools/testing/selftests/powerpc/tm/tm-vmxcopy.c b/tools/testing/selftests/powerpc/tm/tm-vmxcopy.c
index c1e788a6df47..1640e7ead69b 100644
--- a/tools/testing/selftests/powerpc/tm/tm-vmxcopy.c
+++ b/tools/testing/selftests/powerpc/tm/tm-vmxcopy.c
@@ -46,6 +46,7 @@ int test_vmxcopy()
 	uint64_t aborted = 0;
 
 	SKIP_IF(!have_htm());
+	SKIP_IF(htm_is_synthetic());
 	SKIP_IF(!is_ppc64le());
 
 	fd = mkstemp(tmpfile);
diff --git a/tools/testing/selftests/powerpc/tm/tm.h b/tools/testing/selftests/powerpc/tm/tm.h
index c5a1e5c163fc..c03c6e778876 100644
--- a/tools/testing/selftests/powerpc/tm/tm.h
+++ b/tools/testing/selftests/powerpc/tm/tm.h
@@ -10,6 +10,9 @@
 #include <asm/tm.h>
 
 #include "utils.h"
+#include "reg.h"
+
+#define TM_RETRIES 100
 
 static inline bool have_htm(void)
 {
@@ -31,6 +34,39 @@ static inline bool have_htm_nosc(void)
 #endif
 }
 
+/*
+ * Transactional Memory was removed in ISA 3.1. A synthetic TM implementation
+ * is provided on P10 for threads running in P8/P9 compatibility  mode. The
+ * synthetic implementation immediately fails after tbegin. This failure sets
+ * Bit 7 (Failure Persistent) and Bit 15 (Implementation-specific).
+ */
+static inline bool htm_is_synthetic(void)
+{
+	int i;
+
+	/*
+	 * Per the ISA, the Failure Persistent bit may be incorrect. Try a few
+	 * times in case we got an Implementation-specific failure on a non ISA
+	 * v3.1 system. On these systems the Implementation-specific failure
+	 * should not be persistent.
+	 */
+	for (i = 0; i < TM_RETRIES; i++) {
+		asm volatile(
+		"tbegin.;"
+		"beq 1f;"
+		"tend.;"
+		"1:"
+		:
+		:
+		: "memory");
+
+		if ((__builtin_get_texasr() & (TEXASR_FP | TEXASR_IC)) !=
+		    (TEXASR_FP | TEXASR_IC))
+			break;
+	}
+	return i == TM_RETRIES;
+}
+
 static inline long failure_code(void)
 {
 	return __builtin_get_texasru() >> 24;
diff --git a/tools/testing/selftests/safesetid/safesetid-test.c b/tools/testing/selftests/safesetid/safesetid-test.c
index 0c4d50644c13..4b809c93ba36 100644
--- a/tools/testing/selftests/safesetid/safesetid-test.c
+++ b/tools/testing/selftests/safesetid/safesetid-test.c
@@ -152,7 +152,7 @@ static void write_policies(void)
 
 	fd = open(add_whitelist_policy_file, O_WRONLY);
 	if (fd < 0)
-		die("cant open add_whitelist_policy file\n");
+		die("can't open add_whitelist_policy file\n");
 	written = write(fd, policy_str, strlen(policy_str));
 	if (written != strlen(policy_str)) {
 		if (written >= 0) {
diff --git a/tools/testing/selftests/sched/cs_prctl_test.c b/tools/testing/selftests/sched/cs_prctl_test.c
index 63fe6521c56d..7db9cf822dc7 100644
--- a/tools/testing/selftests/sched/cs_prctl_test.c
+++ b/tools/testing/selftests/sched/cs_prctl_test.c
@@ -25,8 +25,6 @@
 #include <sys/types.h>
 #include <sched.h>
 #include <sys/prctl.h>
-#include <sys/types.h>
-#include <sys/wait.h>
 #include <unistd.h>
 #include <time.h>
 #include <stdio.h>
diff --git a/tools/testing/selftests/sync/config b/tools/testing/selftests/sync/config
index 1ab7e8130db2..47ff5afc3727 100644
--- a/tools/testing/selftests/sync/config
+++ b/tools/testing/selftests/sync/config
@@ -1,4 +1,3 @@
 CONFIG_STAGING=y
 CONFIG_ANDROID=y
-CONFIG_SYNC=y
 CONFIG_SW_SYNC=y
diff --git a/tools/testing/selftests/vm/.gitignore b/tools/testing/selftests/vm/.gitignore
index f0fd80ef17df..b02eac613fdd 100644
--- a/tools/testing/selftests/vm/.gitignore
+++ b/tools/testing/selftests/vm/.gitignore
@@ -27,3 +27,4 @@ hmm-tests
 memfd_secret
 local_config.*
 split_huge_page_test
+ksm_tests
diff --git a/tools/testing/selftests/vm/Makefile b/tools/testing/selftests/vm/Makefile
index 521243770f26..d9605bd10f2d 100644
--- a/tools/testing/selftests/vm/Makefile
+++ b/tools/testing/selftests/vm/Makefile
@@ -45,6 +45,7 @@ TEST_GEN_FILES += thuge-gen
 TEST_GEN_FILES += transhuge-stress
 TEST_GEN_FILES += userfaultfd
 TEST_GEN_FILES += split_huge_page_test
+TEST_GEN_FILES += ksm_tests
 
 ifeq ($(MACHINE),x86_64)
 CAN_BUILD_I386 := $(shell ./../x86/check_cc.sh $(CC) ../x86/trivial_32bit_program.c -m32)
@@ -145,6 +146,8 @@ $(OUTPUT)/hmm-tests: local_config.h
 # HMM_EXTRA_LIBS may get set in local_config.mk, or it may be left empty.
 $(OUTPUT)/hmm-tests: LDLIBS += $(HMM_EXTRA_LIBS)
 
+$(OUTPUT)/ksm_tests: LDLIBS += -lnuma
+
 local_config.mk local_config.h: check_config.sh
 	/bin/sh ./check_config.sh $(CC)
 
diff --git a/tools/testing/selftests/vm/charge_reserved_hugetlb.sh b/tools/testing/selftests/vm/charge_reserved_hugetlb.sh
index 18d33684faad..fe8fcfb334e0 100644
--- a/tools/testing/selftests/vm/charge_reserved_hugetlb.sh
+++ b/tools/testing/selftests/vm/charge_reserved_hugetlb.sh
@@ -1,11 +1,14 @@
 #!/bin/sh
 # SPDX-License-Identifier: GPL-2.0
 
+# Kselftest framework requirement - SKIP code is 4.
+ksft_skip=4
+
 set -e
 
 if [[ $(id -u) -ne 0 ]]; then
   echo "This test must be run as root. Skipping..."
-  exit 0
+  exit $ksft_skip
 fi
 
 fault_limit_file=limit_in_bytes
diff --git a/tools/testing/selftests/vm/hugetlb_reparenting_test.sh b/tools/testing/selftests/vm/hugetlb_reparenting_test.sh
index d11d1febccc3..4a9a3afe9fd4 100644
--- a/tools/testing/selftests/vm/hugetlb_reparenting_test.sh
+++ b/tools/testing/selftests/vm/hugetlb_reparenting_test.sh
@@ -1,11 +1,14 @@
 #!/bin/bash
 # SPDX-License-Identifier: GPL-2.0
 
+# Kselftest framework requirement - SKIP code is 4.
+ksft_skip=4
+
 set -e
 
 if [[ $(id -u) -ne 0 ]]; then
   echo "This test must be run as root. Skipping..."
-  exit 0
+  exit $ksft_skip
 fi
 
 usage_file=usage_in_bytes
diff --git a/tools/testing/selftests/vm/ksm_tests.c b/tools/testing/selftests/vm/ksm_tests.c
new file mode 100644
index 000000000000..b61dcdb44c5b
--- /dev/null
+++ b/tools/testing/selftests/vm/ksm_tests.c
@@ -0,0 +1,662 @@
+// SPDX-License-Identifier: GPL-2.0
+
+#include <sys/mman.h>
+#include <stdbool.h>
+#include <time.h>
+#include <string.h>
+#include <numa.h>
+
+#include "../kselftest.h"
+#include "../../../../include/vdso/time64.h"
+
+#define KSM_SYSFS_PATH "/sys/kernel/mm/ksm/"
+#define KSM_FP(s) (KSM_SYSFS_PATH s)
+#define KSM_SCAN_LIMIT_SEC_DEFAULT 120
+#define KSM_PAGE_COUNT_DEFAULT 10l
+#define KSM_PROT_STR_DEFAULT "rw"
+#define KSM_USE_ZERO_PAGES_DEFAULT false
+#define KSM_MERGE_ACROSS_NODES_DEFAULT true
+#define MB (1ul << 20)
+
+struct ksm_sysfs {
+	unsigned long max_page_sharing;
+	unsigned long merge_across_nodes;
+	unsigned long pages_to_scan;
+	unsigned long run;
+	unsigned long sleep_millisecs;
+	unsigned long stable_node_chains_prune_millisecs;
+	unsigned long use_zero_pages;
+};
+
+enum ksm_test_name {
+	CHECK_KSM_MERGE,
+	CHECK_KSM_UNMERGE,
+	CHECK_KSM_ZERO_PAGE_MERGE,
+	CHECK_KSM_NUMA_MERGE,
+	KSM_MERGE_TIME,
+	KSM_COW_TIME
+};
+
+static int ksm_write_sysfs(const char *file_path, unsigned long val)
+{
+	FILE *f = fopen(file_path, "w");
+
+	if (!f) {
+		fprintf(stderr, "f %s\n", file_path);
+		perror("fopen");
+		return 1;
+	}
+	if (fprintf(f, "%lu", val) < 0) {
+		perror("fprintf");
+		return 1;
+	}
+	fclose(f);
+
+	return 0;
+}
+
+static int ksm_read_sysfs(const char *file_path, unsigned long *val)
+{
+	FILE *f = fopen(file_path, "r");
+
+	if (!f) {
+		fprintf(stderr, "f %s\n", file_path);
+		perror("fopen");
+		return 1;
+	}
+	if (fscanf(f, "%lu", val) != 1) {
+		perror("fscanf");
+		return 1;
+	}
+	fclose(f);
+
+	return 0;
+}
+
+static int str_to_prot(char *prot_str)
+{
+	int prot = 0;
+
+	if ((strchr(prot_str, 'r')) != NULL)
+		prot |= PROT_READ;
+	if ((strchr(prot_str, 'w')) != NULL)
+		prot |= PROT_WRITE;
+	if ((strchr(prot_str, 'x')) != NULL)
+		prot |= PROT_EXEC;
+
+	return prot;
+}
+
+static void print_help(void)
+{
+	printf("usage: ksm_tests [-h] <test type> [-a prot] [-p page_count] [-l timeout]\n"
+	       "[-z use_zero_pages] [-m merge_across_nodes] [-s size]\n");
+
+	printf("Supported <test type>:\n"
+	       " -M (page merging)\n"
+	       " -Z (zero pages merging)\n"
+	       " -N (merging of pages in different NUMA nodes)\n"
+	       " -U (page unmerging)\n"
+	       " -P evaluate merging time and speed.\n"
+	       "    For this test, the size of duplicated memory area (in MiB)\n"
+	       "    must be provided using -s option\n"
+	       " -C evaluate the time required to break COW of merged pages.\n\n");
+
+	printf(" -a: specify the access protections of pages.\n"
+	       "     <prot> must be of the form [rwx].\n"
+	       "     Default: %s\n", KSM_PROT_STR_DEFAULT);
+	printf(" -p: specify the number of pages to test.\n"
+	       "     Default: %ld\n", KSM_PAGE_COUNT_DEFAULT);
+	printf(" -l: limit the maximum running time (in seconds) for a test.\n"
+	       "     Default: %d seconds\n", KSM_SCAN_LIMIT_SEC_DEFAULT);
+	printf(" -z: change use_zero_pages tunable\n"
+	       "     Default: %d\n", KSM_USE_ZERO_PAGES_DEFAULT);
+	printf(" -m: change merge_across_nodes tunable\n"
+	       "     Default: %d\n", KSM_MERGE_ACROSS_NODES_DEFAULT);
+	printf(" -s: the size of duplicated memory area (in MiB)\n");
+
+	exit(0);
+}
+
+static void  *allocate_memory(void *ptr, int prot, int mapping, char data, size_t map_size)
+{
+	void *map_ptr = mmap(ptr, map_size, PROT_WRITE, mapping, -1, 0);
+
+	if (!map_ptr) {
+		perror("mmap");
+		return NULL;
+	}
+	memset(map_ptr, data, map_size);
+	if (mprotect(map_ptr, map_size, prot)) {
+		perror("mprotect");
+		munmap(map_ptr, map_size);
+		return NULL;
+	}
+
+	return map_ptr;
+}
+
+static int ksm_do_scan(int scan_count, struct timespec start_time, int timeout)
+{
+	struct timespec cur_time;
+	unsigned long cur_scan, init_scan;
+
+	if (ksm_read_sysfs(KSM_FP("full_scans"), &init_scan))
+		return 1;
+	cur_scan = init_scan;
+
+	while (cur_scan < init_scan + scan_count) {
+		if (ksm_read_sysfs(KSM_FP("full_scans"), &cur_scan))
+			return 1;
+		if (clock_gettime(CLOCK_MONOTONIC_RAW, &cur_time)) {
+			perror("clock_gettime");
+			return 1;
+		}
+		if ((cur_time.tv_sec - start_time.tv_sec) > timeout) {
+			printf("Scan time limit exceeded\n");
+			return 1;
+		}
+	}
+
+	return 0;
+}
+
+static int ksm_merge_pages(void *addr, size_t size, struct timespec start_time, int timeout)
+{
+	if (madvise(addr, size, MADV_MERGEABLE)) {
+		perror("madvise");
+		return 1;
+	}
+	if (ksm_write_sysfs(KSM_FP("run"), 1))
+		return 1;
+
+	/* Since merging occurs only after 2 scans, make sure to get at least 2 full scans */
+	if (ksm_do_scan(2, start_time, timeout))
+		return 1;
+
+	return 0;
+}
+
+static bool assert_ksm_pages_count(long dupl_page_count)
+{
+	unsigned long max_page_sharing, pages_sharing, pages_shared;
+
+	if (ksm_read_sysfs(KSM_FP("pages_shared"), &pages_shared) ||
+	    ksm_read_sysfs(KSM_FP("pages_sharing"), &pages_sharing) ||
+	    ksm_read_sysfs(KSM_FP("max_page_sharing"), &max_page_sharing))
+		return false;
+
+	/*
+	 * Since there must be at least 2 pages for merging and 1 page can be
+	 * shared with the limited number of pages (max_page_sharing), sometimes
+	 * there are 'leftover' pages that cannot be merged. For example, if there
+	 * are 11 pages and max_page_sharing = 10, then only 10 pages will be
+	 * merged and the 11th page won't be affected. As a result, when the number
+	 * of duplicate pages is divided by max_page_sharing and the remainder is 1,
+	 * pages_shared and pages_sharing values will be equal between dupl_page_count
+	 * and dupl_page_count - 1.
+	 */
+	if (dupl_page_count % max_page_sharing == 1 || dupl_page_count % max_page_sharing == 0) {
+		if (pages_shared == dupl_page_count / max_page_sharing &&
+		    pages_sharing == pages_shared * (max_page_sharing - 1))
+			return true;
+	} else {
+		if (pages_shared == (dupl_page_count / max_page_sharing + 1) &&
+		    pages_sharing == dupl_page_count - pages_shared)
+			return true;
+	}
+
+	return false;
+}
+
+static int ksm_save_def(struct ksm_sysfs *ksm_sysfs)
+{
+	if (ksm_read_sysfs(KSM_FP("max_page_sharing"), &ksm_sysfs->max_page_sharing) ||
+	    ksm_read_sysfs(KSM_FP("merge_across_nodes"), &ksm_sysfs->merge_across_nodes) ||
+	    ksm_read_sysfs(KSM_FP("sleep_millisecs"), &ksm_sysfs->sleep_millisecs) ||
+	    ksm_read_sysfs(KSM_FP("pages_to_scan"), &ksm_sysfs->pages_to_scan) ||
+	    ksm_read_sysfs(KSM_FP("run"), &ksm_sysfs->run) ||
+	    ksm_read_sysfs(KSM_FP("stable_node_chains_prune_millisecs"),
+			   &ksm_sysfs->stable_node_chains_prune_millisecs) ||
+	    ksm_read_sysfs(KSM_FP("use_zero_pages"), &ksm_sysfs->use_zero_pages))
+		return 1;
+
+	return 0;
+}
+
+static int ksm_restore(struct ksm_sysfs *ksm_sysfs)
+{
+	if (ksm_write_sysfs(KSM_FP("max_page_sharing"), ksm_sysfs->max_page_sharing) ||
+	    ksm_write_sysfs(KSM_FP("merge_across_nodes"), ksm_sysfs->merge_across_nodes) ||
+	    ksm_write_sysfs(KSM_FP("pages_to_scan"), ksm_sysfs->pages_to_scan) ||
+	    ksm_write_sysfs(KSM_FP("run"), ksm_sysfs->run) ||
+	    ksm_write_sysfs(KSM_FP("sleep_millisecs"), ksm_sysfs->sleep_millisecs) ||
+	    ksm_write_sysfs(KSM_FP("stable_node_chains_prune_millisecs"),
+			    ksm_sysfs->stable_node_chains_prune_millisecs) ||
+	    ksm_write_sysfs(KSM_FP("use_zero_pages"), ksm_sysfs->use_zero_pages))
+		return 1;
+
+	return 0;
+}
+
+static int check_ksm_merge(int mapping, int prot, long page_count, int timeout, size_t page_size)
+{
+	void *map_ptr;
+	struct timespec start_time;
+
+	if (clock_gettime(CLOCK_MONOTONIC_RAW, &start_time)) {
+		perror("clock_gettime");
+		return KSFT_FAIL;
+	}
+
+	/* fill pages with the same data and merge them */
+	map_ptr = allocate_memory(NULL, prot, mapping, '*', page_size * page_count);
+	if (!map_ptr)
+		return KSFT_FAIL;
+
+	if (ksm_merge_pages(map_ptr, page_size * page_count, start_time, timeout))
+		goto err_out;
+
+	/* verify that the right number of pages are merged */
+	if (assert_ksm_pages_count(page_count)) {
+		printf("OK\n");
+		munmap(map_ptr, page_size * page_count);
+		return KSFT_PASS;
+	}
+
+err_out:
+	printf("Not OK\n");
+	munmap(map_ptr, page_size * page_count);
+	return KSFT_FAIL;
+}
+
+static int check_ksm_unmerge(int mapping, int prot, int timeout, size_t page_size)
+{
+	void *map_ptr;
+	struct timespec start_time;
+	int page_count = 2;
+
+	if (clock_gettime(CLOCK_MONOTONIC_RAW, &start_time)) {
+		perror("clock_gettime");
+		return KSFT_FAIL;
+	}
+
+	/* fill pages with the same data and merge them */
+	map_ptr = allocate_memory(NULL, prot, mapping, '*', page_size * page_count);
+	if (!map_ptr)
+		return KSFT_FAIL;
+
+	if (ksm_merge_pages(map_ptr, page_size * page_count, start_time, timeout))
+		goto err_out;
+
+	/* change 1 byte in each of the 2 pages -- KSM must automatically unmerge them */
+	memset(map_ptr, '-', 1);
+	memset(map_ptr + page_size, '+', 1);
+
+	/* get at least 1 scan, so KSM can detect that the pages were modified */
+	if (ksm_do_scan(1, start_time, timeout))
+		goto err_out;
+
+	/* check that unmerging was successful and 0 pages are currently merged */
+	if (assert_ksm_pages_count(0)) {
+		printf("OK\n");
+		munmap(map_ptr, page_size * page_count);
+		return KSFT_PASS;
+	}
+
+err_out:
+	printf("Not OK\n");
+	munmap(map_ptr, page_size * page_count);
+	return KSFT_FAIL;
+}
+
+static int check_ksm_zero_page_merge(int mapping, int prot, long page_count, int timeout,
+				     bool use_zero_pages, size_t page_size)
+{
+	void *map_ptr;
+	struct timespec start_time;
+
+	if (clock_gettime(CLOCK_MONOTONIC_RAW, &start_time)) {
+		perror("clock_gettime");
+		return KSFT_FAIL;
+	}
+
+	if (ksm_write_sysfs(KSM_FP("use_zero_pages"), use_zero_pages))
+		return KSFT_FAIL;
+
+	/* fill pages with zero and try to merge them */
+	map_ptr = allocate_memory(NULL, prot, mapping, 0, page_size * page_count);
+	if (!map_ptr)
+		return KSFT_FAIL;
+
+	if (ksm_merge_pages(map_ptr, page_size * page_count, start_time, timeout))
+		goto err_out;
+
+       /*
+	* verify that the right number of pages are merged:
+	* 1) if use_zero_pages is set to 1, empty pages are merged
+	*    with the kernel zero page instead of with each other;
+	* 2) if use_zero_pages is set to 0, empty pages are not treated specially
+	*    and merged as usual.
+	*/
+	if (use_zero_pages && !assert_ksm_pages_count(0))
+		goto err_out;
+	else if (!use_zero_pages && !assert_ksm_pages_count(page_count))
+		goto err_out;
+
+	printf("OK\n");
+	munmap(map_ptr, page_size * page_count);
+	return KSFT_PASS;
+
+err_out:
+	printf("Not OK\n");
+	munmap(map_ptr, page_size * page_count);
+	return KSFT_FAIL;
+}
+
+static int check_ksm_numa_merge(int mapping, int prot, int timeout, bool merge_across_nodes,
+				size_t page_size)
+{
+	void *numa1_map_ptr, *numa2_map_ptr;
+	struct timespec start_time;
+	int page_count = 2;
+
+	if (clock_gettime(CLOCK_MONOTONIC_RAW, &start_time)) {
+		perror("clock_gettime");
+		return KSFT_FAIL;
+	}
+
+	if (numa_available() < 0) {
+		perror("NUMA support not enabled");
+		return KSFT_SKIP;
+	}
+	if (numa_max_node() < 1) {
+		printf("At least 2 NUMA nodes must be available\n");
+		return KSFT_SKIP;
+	}
+	if (ksm_write_sysfs(KSM_FP("merge_across_nodes"), merge_across_nodes))
+		return KSFT_FAIL;
+
+	/* allocate 2 pages in 2 different NUMA nodes and fill them with the same data */
+	numa1_map_ptr = numa_alloc_onnode(page_size, 0);
+	numa2_map_ptr = numa_alloc_onnode(page_size, 1);
+	if (!numa1_map_ptr || !numa2_map_ptr) {
+		perror("numa_alloc_onnode");
+		return KSFT_FAIL;
+	}
+
+	memset(numa1_map_ptr, '*', page_size);
+	memset(numa2_map_ptr, '*', page_size);
+
+	/* try to merge the pages */
+	if (ksm_merge_pages(numa1_map_ptr, page_size, start_time, timeout) ||
+	    ksm_merge_pages(numa2_map_ptr, page_size, start_time, timeout))
+		goto err_out;
+
+       /*
+	* verify that the right number of pages are merged:
+	* 1) if merge_across_nodes was enabled, 2 duplicate pages will be merged;
+	* 2) if merge_across_nodes = 0, there must be 0 merged pages, since there is
+	*    only 1 unique page in each node and they can't be shared.
+	*/
+	if (merge_across_nodes && !assert_ksm_pages_count(page_count))
+		goto err_out;
+	else if (!merge_across_nodes && !assert_ksm_pages_count(0))
+		goto err_out;
+
+	numa_free(numa1_map_ptr, page_size);
+	numa_free(numa2_map_ptr, page_size);
+	printf("OK\n");
+	return KSFT_PASS;
+
+err_out:
+	numa_free(numa1_map_ptr, page_size);
+	numa_free(numa2_map_ptr, page_size);
+	printf("Not OK\n");
+	return KSFT_FAIL;
+}
+
+static int ksm_merge_time(int mapping, int prot, int timeout, size_t map_size)
+{
+	void *map_ptr;
+	struct timespec start_time, end_time;
+	unsigned long scan_time_ns;
+
+	map_size *= MB;
+
+	map_ptr = allocate_memory(NULL, prot, mapping, '*', map_size);
+	if (!map_ptr)
+		return KSFT_FAIL;
+
+	if (clock_gettime(CLOCK_MONOTONIC_RAW, &start_time)) {
+		perror("clock_gettime");
+		goto err_out;
+	}
+	if (ksm_merge_pages(map_ptr, map_size, start_time, timeout))
+		goto err_out;
+	if (clock_gettime(CLOCK_MONOTONIC_RAW, &end_time)) {
+		perror("clock_gettime");
+		goto err_out;
+	}
+
+	scan_time_ns = (end_time.tv_sec - start_time.tv_sec) * NSEC_PER_SEC +
+		       (end_time.tv_nsec - start_time.tv_nsec);
+
+	printf("Total size:    %lu MiB\n", map_size / MB);
+	printf("Total time:    %ld.%09ld s\n", scan_time_ns / NSEC_PER_SEC,
+	       scan_time_ns % NSEC_PER_SEC);
+	printf("Average speed:  %.3f MiB/s\n", (map_size / MB) /
+					       ((double)scan_time_ns / NSEC_PER_SEC));
+
+	munmap(map_ptr, map_size);
+	return KSFT_PASS;
+
+err_out:
+	printf("Not OK\n");
+	munmap(map_ptr, map_size);
+	return KSFT_FAIL;
+}
+
+static int ksm_cow_time(int mapping, int prot, int timeout, size_t page_size)
+{
+	void *map_ptr;
+	struct timespec start_time, end_time;
+	unsigned long cow_time_ns;
+
+	/* page_count must be less than 2*page_size */
+	size_t page_count = 4000;
+
+	map_ptr = allocate_memory(NULL, prot, mapping, '*', page_size * page_count);
+	if (!map_ptr)
+		return KSFT_FAIL;
+
+	if (clock_gettime(CLOCK_MONOTONIC_RAW, &start_time)) {
+		perror("clock_gettime");
+		return KSFT_FAIL;
+	}
+	for (size_t i = 0; i < page_count - 1; i = i + 2)
+		memset(map_ptr + page_size * i, '-', 1);
+	if (clock_gettime(CLOCK_MONOTONIC_RAW, &end_time)) {
+		perror("clock_gettime");
+		return KSFT_FAIL;
+	}
+
+	cow_time_ns = (end_time.tv_sec - start_time.tv_sec) * NSEC_PER_SEC +
+		       (end_time.tv_nsec - start_time.tv_nsec);
+
+	printf("Total size:    %lu MiB\n\n", (page_size * page_count) / MB);
+	printf("Not merged pages:\n");
+	printf("Total time:     %ld.%09ld s\n", cow_time_ns / NSEC_PER_SEC,
+	       cow_time_ns % NSEC_PER_SEC);
+	printf("Average speed:  %.3f MiB/s\n\n", ((page_size * (page_count / 2)) / MB) /
+					       ((double)cow_time_ns / NSEC_PER_SEC));
+
+	/* Create 2000 pairs of duplicate pages */
+	for (size_t i = 0; i < page_count - 1; i = i + 2) {
+		memset(map_ptr + page_size * i, '+', i / 2 + 1);
+		memset(map_ptr + page_size * (i + 1), '+', i / 2 + 1);
+	}
+	if (ksm_merge_pages(map_ptr, page_size * page_count, start_time, timeout))
+		goto err_out;
+
+	if (clock_gettime(CLOCK_MONOTONIC_RAW, &start_time)) {
+		perror("clock_gettime");
+		goto err_out;
+	}
+	for (size_t i = 0; i < page_count - 1; i = i + 2)
+		memset(map_ptr + page_size * i, '-', 1);
+	if (clock_gettime(CLOCK_MONOTONIC_RAW, &end_time)) {
+		perror("clock_gettime");
+		goto err_out;
+	}
+
+	cow_time_ns = (end_time.tv_sec - start_time.tv_sec) * NSEC_PER_SEC +
+		       (end_time.tv_nsec - start_time.tv_nsec);
+
+	printf("Merged pages:\n");
+	printf("Total time:     %ld.%09ld s\n", cow_time_ns / NSEC_PER_SEC,
+	       cow_time_ns % NSEC_PER_SEC);
+	printf("Average speed:  %.3f MiB/s\n", ((page_size * (page_count / 2)) / MB) /
+					       ((double)cow_time_ns / NSEC_PER_SEC));
+
+	munmap(map_ptr, page_size * page_count);
+	return KSFT_PASS;
+
+err_out:
+	printf("Not OK\n");
+	munmap(map_ptr, page_size * page_count);
+	return KSFT_FAIL;
+}
+
+int main(int argc, char *argv[])
+{
+	int ret, opt;
+	int prot = 0;
+	int ksm_scan_limit_sec = KSM_SCAN_LIMIT_SEC_DEFAULT;
+	long page_count = KSM_PAGE_COUNT_DEFAULT;
+	size_t page_size = sysconf(_SC_PAGESIZE);
+	struct ksm_sysfs ksm_sysfs_old;
+	int test_name = CHECK_KSM_MERGE;
+	bool use_zero_pages = KSM_USE_ZERO_PAGES_DEFAULT;
+	bool merge_across_nodes = KSM_MERGE_ACROSS_NODES_DEFAULT;
+	long size_MB = 0;
+
+	while ((opt = getopt(argc, argv, "ha:p:l:z:m:s:MUZNPC")) != -1) {
+		switch (opt) {
+		case 'a':
+			prot = str_to_prot(optarg);
+			break;
+		case 'p':
+			page_count = atol(optarg);
+			if (page_count <= 0) {
+				printf("The number of pages must be greater than 0\n");
+				return KSFT_FAIL;
+			}
+			break;
+		case 'l':
+			ksm_scan_limit_sec = atoi(optarg);
+			if (ksm_scan_limit_sec <= 0) {
+				printf("Timeout value must be greater than 0\n");
+				return KSFT_FAIL;
+			}
+			break;
+		case 'h':
+			print_help();
+			break;
+		case 'z':
+			if (strcmp(optarg, "0") == 0)
+				use_zero_pages = 0;
+			else
+				use_zero_pages = 1;
+			break;
+		case 'm':
+			if (strcmp(optarg, "0") == 0)
+				merge_across_nodes = 0;
+			else
+				merge_across_nodes = 1;
+			break;
+		case 's':
+			size_MB = atoi(optarg);
+			if (size_MB <= 0) {
+				printf("Size must be greater than 0\n");
+				return KSFT_FAIL;
+			}
+		case 'M':
+			break;
+		case 'U':
+			test_name = CHECK_KSM_UNMERGE;
+			break;
+		case 'Z':
+			test_name = CHECK_KSM_ZERO_PAGE_MERGE;
+			break;
+		case 'N':
+			test_name = CHECK_KSM_NUMA_MERGE;
+			break;
+		case 'P':
+			test_name = KSM_MERGE_TIME;
+			break;
+		case 'C':
+			test_name = KSM_COW_TIME;
+			break;
+		default:
+			return KSFT_FAIL;
+		}
+	}
+
+	if (prot == 0)
+		prot = str_to_prot(KSM_PROT_STR_DEFAULT);
+
+	if (access(KSM_SYSFS_PATH, F_OK)) {
+		printf("Config KSM not enabled\n");
+		return KSFT_SKIP;
+	}
+
+	if (ksm_save_def(&ksm_sysfs_old)) {
+		printf("Cannot save default tunables\n");
+		return KSFT_FAIL;
+	}
+
+	if (ksm_write_sysfs(KSM_FP("run"), 2) ||
+	    ksm_write_sysfs(KSM_FP("sleep_millisecs"), 0) ||
+	    ksm_write_sysfs(KSM_FP("merge_across_nodes"), 1) ||
+	    ksm_write_sysfs(KSM_FP("pages_to_scan"), page_count))
+		return KSFT_FAIL;
+
+	switch (test_name) {
+	case CHECK_KSM_MERGE:
+		ret = check_ksm_merge(MAP_PRIVATE | MAP_ANONYMOUS, prot, page_count,
+				      ksm_scan_limit_sec, page_size);
+		break;
+	case CHECK_KSM_UNMERGE:
+		ret = check_ksm_unmerge(MAP_PRIVATE | MAP_ANONYMOUS, prot, ksm_scan_limit_sec,
+					page_size);
+		break;
+	case CHECK_KSM_ZERO_PAGE_MERGE:
+		ret = check_ksm_zero_page_merge(MAP_PRIVATE | MAP_ANONYMOUS, prot, page_count,
+						ksm_scan_limit_sec, use_zero_pages, page_size);
+		break;
+	case CHECK_KSM_NUMA_MERGE:
+		ret = check_ksm_numa_merge(MAP_PRIVATE | MAP_ANONYMOUS, prot, ksm_scan_limit_sec,
+					   merge_across_nodes, page_size);
+		break;
+	case KSM_MERGE_TIME:
+		if (size_MB == 0) {
+			printf("Option '-s' is required.\n");
+			return KSFT_FAIL;
+		}
+		ret = ksm_merge_time(MAP_PRIVATE | MAP_ANONYMOUS, prot, ksm_scan_limit_sec,
+				     size_MB);
+		break;
+	case KSM_COW_TIME:
+		ret = ksm_cow_time(MAP_PRIVATE | MAP_ANONYMOUS, prot, ksm_scan_limit_sec,
+				   page_size);
+		break;
+	}
+
+	if (ksm_restore(&ksm_sysfs_old)) {
+		printf("Cannot restore default tunables\n");
+		return KSFT_FAIL;
+	}
+
+	return ret;
+}
diff --git a/tools/testing/selftests/vm/mlock-random-test.c b/tools/testing/selftests/vm/mlock-random-test.c
index ff4d72eb74b9..782ea94dee2f 100644
--- a/tools/testing/selftests/vm/mlock-random-test.c
+++ b/tools/testing/selftests/vm/mlock-random-test.c
@@ -70,7 +70,7 @@ int get_proc_locked_vm_size(void)
 		}
 	}
 
-	perror("cann't parse VmLck in /proc/self/status\n");
+	perror("cannot parse VmLck in /proc/self/status\n");
 	fclose(f);
 	return -1;
 }
diff --git a/tools/testing/selftests/vm/run_vmtests.sh b/tools/testing/selftests/vm/run_vmtests.sh
index d09a6b71f1e9..45e803af7c77 100755
--- a/tools/testing/selftests/vm/run_vmtests.sh
+++ b/tools/testing/selftests/vm/run_vmtests.sh
@@ -377,6 +377,102 @@ else
 	exitcode=1
 fi
 
+echo "-------------------------------------------------------"
+echo "running KSM MADV_MERGEABLE test with 10 identical pages"
+echo "-------------------------------------------------------"
+./ksm_tests -M -p 10
+ret_val=$?
+
+if [ $ret_val -eq 0 ]; then
+	echo "[PASS]"
+elif [ $ret_val -eq $ksft_skip ]; then
+	 echo "[SKIP]"
+	 exitcode=$ksft_skip
+else
+	echo "[FAIL]"
+	exitcode=1
+fi
+
+echo "------------------------"
+echo "running KSM unmerge test"
+echo "------------------------"
+./ksm_tests -U
+ret_val=$?
+
+if [ $ret_val -eq 0 ]; then
+	echo "[PASS]"
+elif [ $ret_val -eq $ksft_skip ]; then
+	 echo "[SKIP]"
+	 exitcode=$ksft_skip
+else
+	echo "[FAIL]"
+	exitcode=1
+fi
+
+echo "----------------------------------------------------------"
+echo "running KSM test with 10 zero pages and use_zero_pages = 0"
+echo "----------------------------------------------------------"
+./ksm_tests -Z -p 10 -z 0
+ret_val=$?
+
+if [ $ret_val -eq 0 ]; then
+	echo "[PASS]"
+elif [ $ret_val -eq $ksft_skip ]; then
+	 echo "[SKIP]"
+	 exitcode=$ksft_skip
+else
+	echo "[FAIL]"
+	exitcode=1
+fi
+
+echo "----------------------------------------------------------"
+echo "running KSM test with 10 zero pages and use_zero_pages = 1"
+echo "----------------------------------------------------------"
+./ksm_tests -Z -p 10 -z 1
+ret_val=$?
+
+if [ $ret_val -eq 0 ]; then
+	echo "[PASS]"
+elif [ $ret_val -eq $ksft_skip ]; then
+	 echo "[SKIP]"
+	 exitcode=$ksft_skip
+else
+	echo "[FAIL]"
+	exitcode=1
+fi
+
+echo "-------------------------------------------------------------"
+echo "running KSM test with 2 NUMA nodes and merge_across_nodes = 1"
+echo "-------------------------------------------------------------"
+./ksm_tests -N -m 1
+ret_val=$?
+
+if [ $ret_val -eq 0 ]; then
+	echo "[PASS]"
+elif [ $ret_val -eq $ksft_skip ]; then
+	 echo "[SKIP]"
+	 exitcode=$ksft_skip
+else
+	echo "[FAIL]"
+	exitcode=1
+fi
+
+echo "-------------------------------------------------------------"
+echo "running KSM test with 2 NUMA nodes and merge_across_nodes = 0"
+echo "-------------------------------------------------------------"
+./ksm_tests -N -m 0
+ret_val=$?
+
+if [ $ret_val -eq 0 ]; then
+	echo "[PASS]"
+elif [ $ret_val -eq $ksft_skip ]; then
+	 echo "[SKIP]"
+	 exitcode=$ksft_skip
+else
+	echo "[FAIL]"
+	exitcode=1
+fi
+
 exit $exitcode
 
 exit $exitcode
diff --git a/tools/testing/selftests/vm/userfaultfd.c b/tools/testing/selftests/vm/userfaultfd.c
index 2ea438e6b8b1..10ab56c2484a 100644
--- a/tools/testing/selftests/vm/userfaultfd.c
+++ b/tools/testing/selftests/vm/userfaultfd.c
@@ -566,6 +566,18 @@ static void retry_copy_page(int ufd, struct uffdio_copy *uffdio_copy,
 	}
 }
 
+static void wake_range(int ufd, unsigned long addr, unsigned long len)
+{
+	struct uffdio_range uffdio_wake;
+
+	uffdio_wake.start = addr;
+	uffdio_wake.len = len;
+
+	if (ioctl(ufd, UFFDIO_WAKE, &uffdio_wake))
+		fprintf(stderr, "error waking %lu\n",
+			addr), exit(1);
+}
+
 static int __copy_page(int ufd, unsigned long offset, bool retry)
 {
 	struct uffdio_copy uffdio_copy;
@@ -585,6 +597,7 @@ static int __copy_page(int ufd, unsigned long offset, bool retry)
 		if (uffdio_copy.copy != -EEXIST)
 			err("UFFDIO_COPY error: %"PRId64,
 			    (int64_t)uffdio_copy.copy);
+		wake_range(ufd, uffdio_copy.dst, page_size);
 	} else if (uffdio_copy.copy != page_size) {
 		err("UFFDIO_COPY error: %"PRId64, (int64_t)uffdio_copy.copy);
 	} else {
diff --git a/tools/testing/selftests/x86/mov_ss_trap.c b/tools/testing/selftests/x86/mov_ss_trap.c
index 6da0ac3f0135..cc3de6ff9fba 100644
--- a/tools/testing/selftests/x86/mov_ss_trap.c
+++ b/tools/testing/selftests/x86/mov_ss_trap.c
@@ -47,7 +47,6 @@
 unsigned short ss;
 extern unsigned char breakpoint_insn[];
 sigjmp_buf jmpbuf;
-static unsigned char altstack_data[SIGSTKSZ];
 
 static void enable_watchpoint(void)
 {
@@ -250,13 +249,14 @@ int main()
 	if (sigsetjmp(jmpbuf, 1) == 0) {
 		printf("[RUN]\tMOV SS; SYSENTER\n");
 		stack_t stack = {
-			.ss_sp = altstack_data,
+			.ss_sp = malloc(sizeof(char) * SIGSTKSZ),
 			.ss_size = SIGSTKSZ,
 		};
 		if (sigaltstack(&stack, NULL) != 0)
 			err(1, "sigaltstack");
 		sethandler(SIGSEGV, handle_and_longjmp, SA_RESETHAND | SA_ONSTACK);
 		nr = SYS_getpid;
+		free(stack.ss_sp);
 		/* Clear EBP first to make sure we segfault cleanly. */
 		asm volatile ("xorl %%ebp, %%ebp; mov %[ss], %%ss; SYSENTER" : "+a" (nr)
 			      : [ss] "m" (ss) : "flags", "rcx"
diff --git a/tools/testing/selftests/x86/sigreturn.c b/tools/testing/selftests/x86/sigreturn.c
index 57c4f67f16ef..5d7961a5f7f6 100644
--- a/tools/testing/selftests/x86/sigreturn.c
+++ b/tools/testing/selftests/x86/sigreturn.c
@@ -138,9 +138,6 @@ static unsigned short LDT3(int idx)
 	return (idx << 3) | 7;
 }
 
-/* Our sigaltstack scratch space. */
-static char altstack_data[SIGSTKSZ];
-
 static void sethandler(int sig, void (*handler)(int, siginfo_t *, void *),
 		       int flags)
 {
@@ -771,7 +768,8 @@ int main()
 	setup_ldt();
 
 	stack_t stack = {
-		.ss_sp = altstack_data,
+		/* Our sigaltstack scratch space. */
+		.ss_sp = malloc(sizeof(char) * SIGSTKSZ),
 		.ss_size = SIGSTKSZ,
 	};
 	if (sigaltstack(&stack, NULL) != 0)
@@ -872,5 +870,6 @@ int main()
 	total_nerrs += test_nonstrict_ss();
 #endif
 
+	free(stack.ss_sp);
 	return total_nerrs ? 1 : 0;
 }
diff --git a/tools/testing/selftests/x86/single_step_syscall.c b/tools/testing/selftests/x86/single_step_syscall.c
index 120ac741fe44..9a30f443e928 100644
--- a/tools/testing/selftests/x86/single_step_syscall.c
+++ b/tools/testing/selftests/x86/single_step_syscall.c
@@ -57,7 +57,6 @@ static void clearhandler(int sig)
 
 static volatile sig_atomic_t sig_traps, sig_eflags;
 sigjmp_buf jmpbuf;
-static unsigned char altstack_data[SIGSTKSZ];
 
 #ifdef __x86_64__
 # define REG_IP REG_RIP
@@ -210,7 +209,7 @@ int main()
 		unsigned long nr = SYS_getpid;
 		printf("[RUN]\tSet TF and check SYSENTER\n");
 		stack_t stack = {
-			.ss_sp = altstack_data,
+			.ss_sp = malloc(sizeof(char) * SIGSTKSZ),
 			.ss_size = SIGSTKSZ,
 		};
 		if (sigaltstack(&stack, NULL) != 0)
@@ -219,6 +218,7 @@ int main()
 			   SA_RESETHAND | SA_ONSTACK);
 		sethandler(SIGILL, print_and_longjmp, SA_RESETHAND);
 		set_eflags(get_eflags() | X86_EFLAGS_TF);
+		free(stack.ss_sp);
 		/* Clear EBP first to make sure we segfault cleanly. */
 		asm volatile ("xorl %%ebp, %%ebp; SYSENTER" : "+a" (nr) :: "flags", "rcx"
 #ifdef __x86_64__
diff --git a/tools/testing/selftests/x86/syscall_arg_fault.c b/tools/testing/selftests/x86/syscall_arg_fault.c
index bff474b5efc6..461fa41a4d02 100644
--- a/tools/testing/selftests/x86/syscall_arg_fault.c
+++ b/tools/testing/selftests/x86/syscall_arg_fault.c
@@ -17,9 +17,6 @@
 
 #include "helpers.h"
 
-/* Our sigaltstack scratch space. */
-static unsigned char altstack_data[SIGSTKSZ];
-
 static void sethandler(int sig, void (*handler)(int, siginfo_t *, void *),
 		       int flags)
 {
@@ -104,7 +101,8 @@ static void sigill(int sig, siginfo_t *info, void *ctx_void)
 int main()
 {
 	stack_t stack = {
-		.ss_sp = altstack_data,
+		/* Our sigaltstack scratch space. */
+		.ss_sp = malloc(sizeof(char) * SIGSTKSZ),
 		.ss_size = SIGSTKSZ,
 	};
 	if (sigaltstack(&stack, NULL) != 0)
@@ -233,5 +231,6 @@ int main()
 	set_eflags(get_eflags() & ~X86_EFLAGS_TF);
 #endif
 
+	free(stack.ss_sp);
 	return 0;
 }
diff --git a/tools/testing/vsock/vsock_test.c b/tools/testing/vsock/vsock_test.c
index 67766bfe176f..2a3638c0a008 100644
--- a/tools/testing/vsock/vsock_test.c
+++ b/tools/testing/vsock/vsock_test.c
@@ -282,6 +282,7 @@ static void test_stream_msg_peek_server(const struct test_opts *opts)
 }
 
 #define MESSAGES_CNT 7
+#define MSG_EOR_IDX (MESSAGES_CNT / 2)
 static void test_seqpacket_msg_bounds_client(const struct test_opts *opts)
 {
 	int fd;
@@ -294,7 +295,7 @@ static void test_seqpacket_msg_bounds_client(const struct test_opts *opts)
 
 	/* Send several messages, one with MSG_EOR flag */
 	for (int i = 0; i < MESSAGES_CNT; i++)
-		send_byte(fd, 1, 0);
+		send_byte(fd, 1, (i == MSG_EOR_IDX) ? MSG_EOR : 0);
 
 	control_writeln("SENDDONE");
 	close(fd);
@@ -324,6 +325,11 @@ static void test_seqpacket_msg_bounds_server(const struct test_opts *opts)
 			perror("message bound violated");
 			exit(EXIT_FAILURE);
 		}
+
+		if ((i == MSG_EOR_IDX) ^ !!(msg.msg_flags & MSG_EOR)) {
+			perror("MSG_EOR");
+			exit(EXIT_FAILURE);
+		}
 	}
 
 	close(fd);
diff --git a/tools/thermal/tmon/Makefile b/tools/thermal/tmon/Makefile
index 9db867df7679..f9c52b7fab7b 100644
--- a/tools/thermal/tmon/Makefile
+++ b/tools/thermal/tmon/Makefile
@@ -10,10 +10,9 @@ override CFLAGS+= $(call cc-option,-O3,-O1) ${WARNFLAGS}
 # Add "-fstack-protector" only if toolchain supports it.
 override CFLAGS+= $(call cc-option,-fstack-protector-strong)
 CC?= $(CROSS_COMPILE)gcc
-PKG_CONFIG?= pkg-config
+PKG_CONFIG?= $(CROSS_COMPILE)pkg-config
 
 override CFLAGS+=-D VERSION=\"$(VERSION)\"
-LDFLAGS+=
 TARGET=tmon
 
 INSTALL_PROGRAM=install -m 755 -p
@@ -33,7 +32,6 @@ override CFLAGS += $(shell $(PKG_CONFIG) --cflags $(STATIC) panelw ncursesw 2> /
 		     $(PKG_CONFIG) --cflags $(STATIC) panel ncurses 2> /dev/null)
 
 OBJS = tmon.o tui.o sysfs.o pid.o
-OBJS +=
 
 tmon: $(OBJS) Makefile tmon.h
 	$(CC) $(CFLAGS) $(LDFLAGS) $(OBJS)  -o $(TARGET) $(TMON_LIBS)
@@ -42,15 +40,13 @@ valgrind: tmon
 	 sudo valgrind -v --track-origins=yes --tool=memcheck --leak-check=yes --show-reachable=yes --num-callers=20 --track-fds=yes ./$(TARGET)  1> /dev/null
 
 install:
-	- mkdir -p $(INSTALL_ROOT)/$(BINDIR)
-	- $(INSTALL_PROGRAM) "$(TARGET)" "$(INSTALL_ROOT)/$(BINDIR)/$(TARGET)"
+	- $(INSTALL_PROGRAM) -D "$(TARGET)" "$(INSTALL_ROOT)/$(BINDIR)/$(TARGET)"
 
 uninstall:
 	$(DEL_FILE) "$(INSTALL_ROOT)/$(BINDIR)/$(TARGET)"
 
 clean:
-	find . -name "*.o" | xargs $(DEL_FILE)
-	rm -f $(TARGET)
+	rm -f $(TARGET) $(OBJS)
 
 dist:
 	git tag v$(VERSION)
diff --git a/virt/kvm/binary_stats.c b/virt/kvm/binary_stats.c
index e609d428811a..eefca6c69f51 100644
--- a/virt/kvm/binary_stats.c
+++ b/virt/kvm/binary_stats.c
@@ -136,9 +136,7 @@ ssize_t kvm_stats_read(char *id, const struct kvm_stats_header *header,
 		src = stats + pos - header->data_offset;
 		if (copy_to_user(dest, src, copylen))
 			return -EFAULT;
-		remain -= copylen;
 		pos += copylen;
-		dest += copylen;
 	}
 
 	*offset = pos;
diff --git a/virt/kvm/dirty_ring.c b/virt/kvm/dirty_ring.c
index 7aafefc50aa7..88f4683198ea 100644
--- a/virt/kvm/dirty_ring.c
+++ b/virt/kvm/dirty_ring.c
@@ -91,11 +91,6 @@ static inline void kvm_dirty_gfn_set_dirtied(struct kvm_dirty_gfn *gfn)
 	gfn->flags = KVM_DIRTY_GFN_F_DIRTY;
 }
 
-static inline bool kvm_dirty_gfn_invalid(struct kvm_dirty_gfn *gfn)
-{
-	return gfn->flags == 0;
-}
-
 static inline bool kvm_dirty_gfn_harvested(struct kvm_dirty_gfn *gfn)
 {
 	return gfn->flags & KVM_DIRTY_GFN_F_RESET;
diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c
index b50dbe269f4b..439d3b4cd1a9 100644
--- a/virt/kvm/kvm_main.c
+++ b/virt/kvm/kvm_main.c
@@ -189,16 +189,6 @@ bool kvm_is_reserved_pfn(kvm_pfn_t pfn)
 	return true;
 }
 
-bool kvm_is_transparent_hugepage(kvm_pfn_t pfn)
-{
-	struct page *page = pfn_to_page(pfn);
-
-	if (!PageTransCompoundMap(page))
-		return false;
-
-	return is_transparent_hugepage(compound_head(page));
-}
-
 /*
  * Switches to specified vcpu, until a matching vcpu_put()
  */
@@ -318,6 +308,7 @@ void kvm_flush_remote_tlbs(struct kvm *kvm)
 	 */
 	long dirty_count = smp_load_acquire(&kvm->tlbs_dirty);
 
+	++kvm->stat.generic.remote_tlb_flush_requests;
 	/*
 	 * We want to publish modifications to the page tables before reading
 	 * mode. Pairs with a memory barrier in arch-specific code.
@@ -415,6 +406,7 @@ static void kvm_vcpu_init(struct kvm_vcpu *vcpu, struct kvm *kvm, unsigned id)
 	vcpu->preempted = false;
 	vcpu->ready = false;
 	preempt_notifier_init(&vcpu->preempt_notifier, &kvm_preempt_ops);
+	vcpu->last_used_slot = 0;
 }
 
 void kvm_vcpu_destroy(struct kvm_vcpu *vcpu)
@@ -496,17 +488,6 @@ static __always_inline int __kvm_handle_hva_range(struct kvm *kvm,
 
 	idx = srcu_read_lock(&kvm->srcu);
 
-	/* The on_lock() path does not yet support lock elision. */
-	if (!IS_KVM_NULL_FN(range->on_lock)) {
-		locked = true;
-		KVM_MMU_LOCK(kvm);
-
-		range->on_lock(kvm, range->start, range->end);
-
-		if (IS_KVM_NULL_FN(range->handler))
-			goto out_unlock;
-	}
-
 	for (i = 0; i < KVM_ADDRESS_SPACE_NUM; i++) {
 		slots = __kvm_memslots(kvm, i);
 		kvm_for_each_memslot(slot, slots) {
@@ -538,6 +519,10 @@ static __always_inline int __kvm_handle_hva_range(struct kvm *kvm,
 			if (!locked) {
 				locked = true;
 				KVM_MMU_LOCK(kvm);
+				if (!IS_KVM_NULL_FN(range->on_lock))
+					range->on_lock(kvm, range->start, range->end);
+				if (IS_KVM_NULL_FN(range->handler))
+					break;
 			}
 			ret |= range->handler(kvm, &gfn_range);
 		}
@@ -546,7 +531,6 @@ static __always_inline int __kvm_handle_hva_range(struct kvm *kvm,
 	if (range->flush_on_ret && (ret || kvm->tlbs_dirty))
 		kvm_flush_remote_tlbs(kvm);
 
-out_unlock:
 	if (locked)
 		KVM_MMU_UNLOCK(kvm);
 
@@ -604,16 +588,20 @@ static void kvm_mmu_notifier_change_pte(struct mmu_notifier *mn,
 	trace_kvm_set_spte_hva(address);
 
 	/*
-	 * .change_pte() must be surrounded by .invalidate_range_{start,end}(),
-	 * and so always runs with an elevated notifier count.  This obviates
-	 * the need to bump the sequence count.
+	 * .change_pte() must be surrounded by .invalidate_range_{start,end}().
+	 * If mmu_notifier_count is zero, then no in-progress invalidations,
+	 * including this one, found a relevant memslot at start(); rechecking
+	 * memslots here is unnecessary.  Note, a false positive (count elevated
+	 * by a different invalidation) is sub-optimal but functionally ok.
 	 */
-	WARN_ON_ONCE(!kvm->mmu_notifier_count);
+	WARN_ON_ONCE(!READ_ONCE(kvm->mn_active_invalidate_count));
+	if (!READ_ONCE(kvm->mmu_notifier_count))
+		return;
 
 	kvm_handle_hva_range(mn, address, address + 1, pte, kvm_set_spte_gfn);
 }
 
-static void kvm_inc_notifier_count(struct kvm *kvm, unsigned long start,
+void kvm_inc_notifier_count(struct kvm *kvm, unsigned long start,
 				   unsigned long end)
 {
 	/*
@@ -658,12 +646,24 @@ static int kvm_mmu_notifier_invalidate_range_start(struct mmu_notifier *mn,
 
 	trace_kvm_unmap_hva_range(range->start, range->end);
 
+	/*
+	 * Prevent memslot modification between range_start() and range_end()
+	 * so that conditionally locking provides the same result in both
+	 * functions.  Without that guarantee, the mmu_notifier_count
+	 * adjustments will be imbalanced.
+	 *
+	 * Pairs with the decrement in range_end().
+	 */
+	spin_lock(&kvm->mn_invalidate_lock);
+	kvm->mn_active_invalidate_count++;
+	spin_unlock(&kvm->mn_invalidate_lock);
+
 	__kvm_handle_hva_range(kvm, &hva_range);
 
 	return 0;
 }
 
-static void kvm_dec_notifier_count(struct kvm *kvm, unsigned long start,
+void kvm_dec_notifier_count(struct kvm *kvm, unsigned long start,
 				   unsigned long end)
 {
 	/*
@@ -694,9 +694,22 @@ static void kvm_mmu_notifier_invalidate_range_end(struct mmu_notifier *mn,
 		.flush_on_ret	= false,
 		.may_block	= mmu_notifier_range_blockable(range),
 	};
+	bool wake;
 
 	__kvm_handle_hva_range(kvm, &hva_range);
 
+	/* Pairs with the increment in range_start(). */
+	spin_lock(&kvm->mn_invalidate_lock);
+	wake = (--kvm->mn_active_invalidate_count == 0);
+	spin_unlock(&kvm->mn_invalidate_lock);
+
+	/*
+	 * There can only be one waiter, since the wait happens under
+	 * slots_lock.
+	 */
+	if (wake)
+		rcuwait_wake_up(&kvm->mn_memslots_update_rcuwait);
+
 	BUG_ON(kvm->mmu_notifier_count < 0);
 }
 
@@ -897,7 +910,7 @@ static int kvm_create_vm_debugfs(struct kvm *kvm, int fd)
 	char dir_name[ITOA_MAX_LEN * 2];
 	struct kvm_stat_data *stat_data;
 	const struct _kvm_stats_desc *pdesc;
-	int i;
+	int i, ret;
 	int kvm_debugfs_num_entries = kvm_vm_stats_header.num_desc +
 				      kvm_vcpu_stats_header.num_desc;
 
@@ -954,6 +967,13 @@ static int kvm_create_vm_debugfs(struct kvm *kvm, int fd)
 				    kvm->debugfs_dentry, stat_data,
 				    &stat_fops_per_vm);
 	}
+
+	ret = kvm_arch_create_vm_debugfs(kvm);
+	if (ret) {
+		kvm_destroy_vm_debugfs(kvm);
+		return i;
+	}
+
 	return 0;
 }
 
@@ -974,6 +994,17 @@ void __weak kvm_arch_pre_destroy_vm(struct kvm *kvm)
 {
 }
 
+/*
+ * Called after per-vm debugfs created.  When called kvm->debugfs_dentry should
+ * be setup already, so we can create arch-specific debugfs entries under it.
+ * Cleanup should be automatic done in kvm_destroy_vm_debugfs() recursively, so
+ * a per-arch destroy interface is not needed.
+ */
+int __weak kvm_arch_create_vm_debugfs(struct kvm *kvm)
+{
+	return 0;
+}
+
 static struct kvm *kvm_create_vm(unsigned long type)
 {
 	struct kvm *kvm = kvm_arch_alloc_vm();
@@ -991,6 +1022,9 @@ static struct kvm *kvm_create_vm(unsigned long type)
 	mutex_init(&kvm->irq_lock);
 	mutex_init(&kvm->slots_lock);
 	mutex_init(&kvm->slots_arch_lock);
+	spin_lock_init(&kvm->mn_invalidate_lock);
+	rcuwait_init(&kvm->mn_memslots_update_rcuwait);
+
 	INIT_LIST_HEAD(&kvm->devices);
 
 	BUILD_BUG_ON(KVM_MEM_SLOTS_NUM > SHRT_MAX);
@@ -1113,6 +1147,16 @@ static void kvm_destroy_vm(struct kvm *kvm)
 	kvm_coalesced_mmio_free(kvm);
 #if defined(CONFIG_MMU_NOTIFIER) && defined(KVM_ARCH_WANT_MMU_NOTIFIER)
 	mmu_notifier_unregister(&kvm->mmu_notifier, kvm->mm);
+	/*
+	 * At this point, pending calls to invalidate_range_start()
+	 * have completed but no more MMU notifiers will run, so
+	 * mn_active_invalidate_count may remain unbalanced.
+	 * No threads can be waiting in install_new_memslots as the
+	 * last reference on KVM has been dropped, but freeing
+	 * memslots would deadlock without this manual intervention.
+	 */
+	WARN_ON(rcuwait_active(&kvm->mn_memslots_update_rcuwait));
+	kvm->mn_active_invalidate_count = 0;
 #else
 	kvm_arch_flush_shadow_all(kvm);
 #endif
@@ -1134,6 +1178,16 @@ void kvm_get_kvm(struct kvm *kvm)
 }
 EXPORT_SYMBOL_GPL(kvm_get_kvm);
 
+/*
+ * Make sure the vm is not during destruction, which is a safe version of
+ * kvm_get_kvm().  Return true if kvm referenced successfully, false otherwise.
+ */
+bool kvm_get_kvm_safe(struct kvm *kvm)
+{
+	return refcount_inc_not_zero(&kvm->users_count);
+}
+EXPORT_SYMBOL_GPL(kvm_get_kvm_safe);
+
 void kvm_put_kvm(struct kvm *kvm)
 {
 	if (refcount_dec_and_test(&kvm->users_count))
@@ -1194,8 +1248,8 @@ static inline void kvm_memslot_delete(struct kvm_memslots *slots,
 
 	slots->used_slots--;
 
-	if (atomic_read(&slots->lru_slot) >= slots->used_slots)
-		atomic_set(&slots->lru_slot, 0);
+	if (atomic_read(&slots->last_used_slot) >= slots->used_slots)
+		atomic_set(&slots->last_used_slot, 0);
 
 	for (i = slots->id_to_index[memslot->id]; i < slots->used_slots; i++) {
 		mslots[i] = mslots[i + 1];
@@ -1364,7 +1418,22 @@ static struct kvm_memslots *install_new_memslots(struct kvm *kvm,
 	WARN_ON(gen & KVM_MEMSLOT_GEN_UPDATE_IN_PROGRESS);
 	slots->generation = gen | KVM_MEMSLOT_GEN_UPDATE_IN_PROGRESS;
 
+	/*
+	 * Do not store the new memslots while there are invalidations in
+	 * progress, otherwise the locking in invalidate_range_start and
+	 * invalidate_range_end will be unbalanced.
+	 */
+	spin_lock(&kvm->mn_invalidate_lock);
+	prepare_to_rcuwait(&kvm->mn_memslots_update_rcuwait);
+	while (kvm->mn_active_invalidate_count) {
+		set_current_state(TASK_UNINTERRUPTIBLE);
+		spin_unlock(&kvm->mn_invalidate_lock);
+		schedule();
+		spin_lock(&kvm->mn_invalidate_lock);
+	}
+	finish_rcuwait(&kvm->mn_memslots_update_rcuwait);
 	rcu_assign_pointer(kvm->memslots[as_id], slots);
+	spin_unlock(&kvm->mn_invalidate_lock);
 
 	/*
 	 * Acquired in kvm_set_memslot. Must be released before synchronize
@@ -1980,7 +2049,26 @@ EXPORT_SYMBOL_GPL(gfn_to_memslot);
 
 struct kvm_memory_slot *kvm_vcpu_gfn_to_memslot(struct kvm_vcpu *vcpu, gfn_t gfn)
 {
-	return __gfn_to_memslot(kvm_vcpu_memslots(vcpu), gfn);
+	struct kvm_memslots *slots = kvm_vcpu_memslots(vcpu);
+	struct kvm_memory_slot *slot;
+	int slot_index;
+
+	slot = try_get_memslot(slots, vcpu->last_used_slot, gfn);
+	if (slot)
+		return slot;
+
+	/*
+	 * Fall back to searching all memslots. We purposely use
+	 * search_memslots() instead of __gfn_to_memslot() to avoid
+	 * thrashing the VM-wide last_used_index in kvm_memslots.
+	 */
+	slot = search_memslots(slots, gfn, &slot_index);
+	if (slot) {
+		vcpu->last_used_slot = slot_index;
+		return slot;
+	}
+
+	return NULL;
 }
 EXPORT_SYMBOL_GPL(kvm_vcpu_gfn_to_memslot);
 
@@ -2239,7 +2327,7 @@ static int hva_to_pfn_remapped(struct vm_area_struct *vma,
 	 * Get a reference here because callers of *hva_to_pfn* and
 	 * *gfn_to_pfn* ultimately call kvm_release_pfn_clean on the
 	 * returned pfn.  This is only needed if the VMA has VM_MIXEDMAP
-	 * set, but the kvm_get_pfn/kvm_release_pfn_clean pair will
+	 * set, but the kvm_try_get_pfn/kvm_release_pfn_clean pair will
 	 * simply do nothing for reserved pfns.
 	 *
 	 * Whoever called remap_pfn_range is also going to call e.g.
@@ -2636,13 +2724,6 @@ void kvm_set_pfn_accessed(kvm_pfn_t pfn)
 }
 EXPORT_SYMBOL_GPL(kvm_set_pfn_accessed);
 
-void kvm_get_pfn(kvm_pfn_t pfn)
-{
-	if (!kvm_is_reserved_pfn(pfn))
-		get_page(pfn_to_page(pfn));
-}
-EXPORT_SYMBOL_GPL(kvm_get_pfn);
-
 static int next_segment(unsigned long len, int offset)
 {
 	if (len > PAGE_SIZE - offset)
@@ -3122,13 +3203,23 @@ void kvm_vcpu_block(struct kvm_vcpu *vcpu)
 				++vcpu->stat.generic.halt_successful_poll;
 				if (!vcpu_valid_wakeup(vcpu))
 					++vcpu->stat.generic.halt_poll_invalid;
+
+				KVM_STATS_LOG_HIST_UPDATE(
+				      vcpu->stat.generic.halt_poll_success_hist,
+				      ktime_to_ns(ktime_get()) -
+				      ktime_to_ns(start));
 				goto out;
 			}
 			cpu_relax();
 			poll_end = cur = ktime_get();
 		} while (kvm_vcpu_can_poll(cur, stop));
+
+		KVM_STATS_LOG_HIST_UPDATE(
+				vcpu->stat.generic.halt_poll_fail_hist,
+				ktime_to_ns(ktime_get()) - ktime_to_ns(start));
 	}
 
+
 	prepare_to_rcuwait(&vcpu->wait);
 	for (;;) {
 		set_current_state(TASK_INTERRUPTIBLE);
@@ -3141,6 +3232,12 @@ void kvm_vcpu_block(struct kvm_vcpu *vcpu)
 	}
 	finish_rcuwait(&vcpu->wait);
 	cur = ktime_get();
+	if (waited) {
+		vcpu->stat.generic.halt_wait_ns +=
+			ktime_to_ns(cur) - ktime_to_ns(poll_end);
+		KVM_STATS_LOG_HIST_UPDATE(vcpu->stat.generic.halt_wait_hist,
+				ktime_to_ns(cur) - ktime_to_ns(poll_end));
+	}
 out:
 	kvm_arch_vcpu_unblocking(vcpu);
 	block_ns = ktime_to_ns(cur) - ktime_to_ns(start);
@@ -3612,7 +3709,7 @@ static long kvm_vcpu_ioctl(struct file *filp,
 	struct kvm_fpu *fpu = NULL;
 	struct kvm_sregs *kvm_sregs = NULL;
 
-	if (vcpu->kvm->mm != current->mm)
+	if (vcpu->kvm->mm != current->mm || vcpu->kvm->vm_bugged)
 		return -EIO;
 
 	if (unlikely(_IOC_TYPE(ioctl) != KVMIO))
@@ -3822,7 +3919,7 @@ static long kvm_vcpu_compat_ioctl(struct file *filp,
 	void __user *argp = compat_ptr(arg);
 	int r;
 
-	if (vcpu->kvm->mm != current->mm)
+	if (vcpu->kvm->mm != current->mm || vcpu->kvm->vm_bugged)
 		return -EIO;
 
 	switch (ioctl) {
@@ -3888,7 +3985,7 @@ static long kvm_device_ioctl(struct file *filp, unsigned int ioctl,
 {
 	struct kvm_device *dev = filp->private_data;
 
-	if (dev->kvm->mm != current->mm)
+	if (dev->kvm->mm != current->mm || dev->kvm->vm_bugged)
 		return -EIO;
 
 	switch (ioctl) {
@@ -4210,7 +4307,7 @@ static long kvm_vm_ioctl(struct file *filp,
 	void __user *argp = (void __user *)arg;
 	int r;
 
-	if (kvm->mm != current->mm)
+	if (kvm->mm != current->mm || kvm->vm_bugged)
 		return -EIO;
 	switch (ioctl) {
 	case KVM_CREATE_VCPU:
@@ -4421,7 +4518,7 @@ static long kvm_vm_compat_ioctl(struct file *filp,
 	struct kvm *kvm = filp->private_data;
 	int r;
 
-	if (kvm->mm != current->mm)
+	if (kvm->mm != current->mm || kvm->vm_bugged)
 		return -EIO;
 	switch (ioctl) {
 #ifdef CONFIG_KVM_GENERIC_DIRTYLOG_READ_PROTECT
@@ -4983,12 +5080,12 @@ static int kvm_debugfs_open(struct inode *inode, struct file *file,
 	struct kvm_stat_data *stat_data = (struct kvm_stat_data *)
 					  inode->i_private;
 
-	/* The debugfs files are a reference to the kvm struct which
-	 * is still valid when kvm_destroy_vm is called.
-	 * To avoid the race between open and the removal of the debugfs
-	 * directory we test against the users count.
+	/*
+	 * The debugfs files are a reference to the kvm struct which
+        * is still valid when kvm_destroy_vm is called.  kvm_get_kvm_safe
+        * avoids the race between open and the removal of the debugfs directory.
 	 */
-	if (!refcount_inc_not_zero(&stat_data->kvm->users_count))
+	if (!kvm_get_kvm_safe(stat_data->kvm))
 		return -ENOENT;
 
 	if (simple_attr_open(inode, file, get,